aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am103
-rw-r--r--src/configure.ac347
-rw-r--r--src/examples/sample-plugin/Makefile.am59
-rw-r--r--src/examples/sample-plugin/configure.ac11
-rw-r--r--src/examples/sample-plugin/sample.am31
-rw-r--r--src/examples/sample-plugin/sample/node.c295
-rw-r--r--src/examples/sample-plugin/sample/sample.api31
-rw-r--r--src/examples/sample-plugin/sample/sample.c238
-rw-r--r--src/examples/sample-plugin/sample/sample.h40
-rw-r--r--src/examples/sample-plugin/sample/sample_all_api_h.h16
-rw-r--r--src/examples/sample-plugin/sample/sample_msg_enum.h28
-rw-r--r--src/examples/sample-plugin/sample/sample_test.c180
-rw-r--r--src/examples/sample-plugin/sample_plugin_doc.md66
-rw-r--r--src/examples/srv6-sample-localsid/node.c261
-rwxr-xr-xsrc/examples/srv6-sample-localsid/srv6_localsid_sample.c179
-rw-r--r--src/examples/srv6-sample-localsid/srv6_localsid_sample.h61
-rw-r--r--src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md30
-rw-r--r--src/examples/vlib/dir.dox22
-rw-r--r--src/examples/vlib/main_stub.c417
-rw-r--r--src/examples/vlib/mc_test.c384
-rw-r--r--src/examples/vlib/plex_test.c527
-rw-r--r--src/g2.am32
-rw-r--r--src/m4/ax_vpp_find_jdk8.m443
-rw-r--r--src/perftool.am44
-rw-r--r--src/plugins/Makefile.am101
-rw-r--r--src/plugins/acl.am35
-rw-r--r--src/plugins/acl/acl.api477
-rw-r--r--src/plugins/acl/acl.c2709
-rw-r--r--src/plugins/acl/acl.h316
-rw-r--r--src/plugins/acl/acl_all_api_h.h21
-rw-r--r--src/plugins/acl/acl_hash_lookup_doc.md241
-rw-r--r--src/plugins/acl/acl_msg_enum.h28
-rw-r--r--src/plugins/acl/acl_multicore_doc.md349
-rw-r--r--src/plugins/acl/acl_test.c1219
-rw-r--r--src/plugins/acl/fa_node.c1874
-rw-r--r--src/plugins/acl/fa_node.h174
-rw-r--r--src/plugins/acl/hash_lookup.c894
-rw-r--r--src/plugins/acl/hash_lookup.h64
-rw-r--r--src/plugins/acl/hash_lookup_private.h33
-rw-r--r--src/plugins/acl/hash_lookup_types.h107
-rw-r--r--src/plugins/acl/manual_fns.h408
-rw-r--r--src/plugins/dpdk.am73
-rw-r--r--src/plugins/dpdk/api/dpdk.api76
-rw-r--r--src/plugins/dpdk/api/dpdk_all_api_h.h19
-rwxr-xr-xsrc/plugins/dpdk/api/dpdk_api.c332
-rw-r--r--src/plugins/dpdk/api/dpdk_msg_enum.h31
-rw-r--r--src/plugins/dpdk/api/dpdk_test.c397
-rw-r--r--src/plugins/dpdk/buffer.c575
-rw-r--r--src/plugins/dpdk/device/cli.c1955
-rw-r--r--src/plugins/dpdk/device/common.c315
-rw-r--r--src/plugins/dpdk/device/device.c856
-rw-r--r--src/plugins/dpdk/device/dir.dox27
-rw-r--r--src/plugins/dpdk/device/dpdk.h483
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h135
-rw-r--r--src/plugins/dpdk/device/format.c804
-rwxr-xr-xsrc/plugins/dpdk/device/init.c1589
-rw-r--r--src/plugins/dpdk/device/node.c704
-rw-r--r--src/plugins/dpdk/hqos/hqos.c772
-rw-r--r--src/plugins/dpdk/hqos/qos_doc.md411
-rw-r--r--src/plugins/dpdk/ipsec/cli.c229
-rw-r--r--src/plugins/dpdk/ipsec/crypto_node.c215
-rw-r--r--src/plugins/dpdk/ipsec/dir.dox27
-rw-r--r--src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md87
-rw-r--r--src/plugins/dpdk/ipsec/esp.h399
-rw-r--r--src/plugins/dpdk/ipsec/esp_decrypt.c569
-rw-r--r--src/plugins/dpdk/ipsec/esp_encrypt.c592
-rw-r--r--src/plugins/dpdk/ipsec/ipsec.c501
-rw-r--r--src/plugins/dpdk/ipsec/ipsec.h242
-rw-r--r--src/plugins/dpdk/main.c96
-rw-r--r--src/plugins/dpdk/thread.c85
-rw-r--r--src/plugins/flowprobe.am37
-rw-r--r--src/plugins/flowprobe/flowprobe.api40
-rw-r--r--src/plugins/flowprobe/flowprobe.c1137
-rw-r--r--src/plugins/flowprobe/flowprobe.h174
-rw-r--r--src/plugins/flowprobe/flowprobe_all_api_h.h18
-rw-r--r--src/plugins/flowprobe/flowprobe_msg_enum.h31
-rw-r--r--src/plugins/flowprobe/flowprobe_plugin_doc.md13
-rw-r--r--src/plugins/flowprobe/flowprobe_test.c263
-rw-r--r--src/plugins/flowprobe/node.c1053
-rw-r--r--src/plugins/gtpu.am38
-rw-r--r--src/plugins/gtpu/gtpu.api120
-rwxr-xr-xsrc/plugins/gtpu/gtpu.c1151
-rw-r--r--src/plugins/gtpu/gtpu.h264
-rw-r--r--src/plugins/gtpu/gtpu_all_api_h.h18
-rw-r--r--src/plugins/gtpu/gtpu_api.c256
-rw-r--r--src/plugins/gtpu/gtpu_decap.c1305
-rw-r--r--src/plugins/gtpu/gtpu_encap.c705
-rw-r--r--src/plugins/gtpu/gtpu_error.def18
-rw-r--r--src/plugins/gtpu/gtpu_msg_enum.h31
-rw-r--r--src/plugins/gtpu/gtpu_test.c498
-rw-r--r--src/plugins/ila.am20
-rw-r--r--src/plugins/ila/ila.c1079
-rw-r--r--src/plugins/ila/ila.h116
-rw-r--r--src/plugins/ioam.am247
-rw-r--r--src/plugins/ioam/analyse/ioam_analyse.h526
-rw-r--r--src/plugins/ioam/analyse/ioam_summary_export.c444
-rwxr-xr-xsrc/plugins/ioam/analyse/ioam_summary_export.h86
-rw-r--r--src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c163
-rw-r--r--src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h127
-rw-r--r--src/plugins/ioam/analyse/ip6/node.c523
-rw-r--r--src/plugins/ioam/dir.dox18
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.c216
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.h64
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.c265
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.h40
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_seqno.c76
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_seqno.h30
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.c502
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.h51
-rw-r--r--src/plugins/ioam/export-common/ioam_export.h634
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api34
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c271
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h16
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h28
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c179
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c50
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c171
-rw-r--r--src/plugins/ioam/export/ioam_export.api34
-rw-r--r--src/plugins/ioam/export/ioam_export.c249
-rw-r--r--src/plugins/ioam/export/ioam_export_all_api_h.h16
-rw-r--r--src/plugins/ioam/export/ioam_export_msg_enum.h28
-rw-r--r--src/plugins/ioam/export/ioam_export_test.c173
-rw-r--r--src/plugins/ioam/export/ioam_export_thread.c39
-rw-r--r--src/plugins/ioam/export/node.c168
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.md464
-rw-r--r--src/plugins/ioam/ip6/ioam_cache.api29
-rw-r--r--src/plugins/ioam/ip6/ioam_cache.c417
-rw-r--r--src/plugins/ioam/ip6/ioam_cache.h903
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_all_api_h.h16
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_msg_enum.h28
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_node.c421
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c768
-rw-r--r--src/plugins/ioam/ipfixcollector/ipfixcollector.c105
-rw-r--r--src/plugins/ioam/ipfixcollector/ipfixcollector.h124
-rw-r--r--src/plugins/ioam/ipfixcollector/node.c301
-rw-r--r--src/plugins/ioam/lib-e2e/e2e_util.h37
-rw-r--r--src/plugins/ioam/lib-e2e/ioam_seqno_lib.c84
-rw-r--r--src/plugins/ioam/lib-e2e/ioam_seqno_lib.h201
-rw-r--r--src/plugins/ioam/lib-pot/math64.h159
-rw-r--r--src/plugins/ioam/lib-pot/pot.api105
-rw-r--r--src/plugins/ioam/lib-pot/pot_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-pot/pot_api.c239
-rw-r--r--src/plugins/ioam/lib-pot/pot_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-pot/pot_test.c333
-rw-r--r--src/plugins/ioam/lib-pot/pot_util.c445
-rw-r--r--src/plugins/ioam/lib-pot/pot_util.h195
-rw-r--r--src/plugins/ioam/lib-trace/trace.api70
-rw-r--r--src/plugins/ioam/lib-trace/trace_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-trace/trace_api.c238
-rw-r--r--src/plugins/ioam/lib-trace/trace_config.h41
-rw-r--r--src/plugins/ioam/lib-trace/trace_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-trace/trace_test.c253
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.c207
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.h256
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c223
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c194
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c353
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c187
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api111
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c377
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c770
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h183
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h61
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c551
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h172
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c548
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping.api73
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping.h138
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_all_api_h.h24
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_api.c169
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_export.c306
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_msg_enum.h37
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_node.c814
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_packet.h154
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_test.c269
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_util.c330
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_util.h83
-rw-r--r--src/plugins/ixge.am20
-rw-r--r--src/plugins/ixge/ixge.c2958
-rw-r--r--src/plugins/ixge/ixge.h1295
-rw-r--r--src/plugins/lb.am42
-rw-r--r--src/plugins/lb/api.c209
-rw-r--r--src/plugins/lb/cli.c275
-rw-r--r--src/plugins/lb/lb.api56
-rw-r--r--src/plugins/lb/lb.c841
-rw-r--r--src/plugins/lb/lb.h333
-rw-r--r--src/plugins/lb/lb_plugin_doc.md141
-rw-r--r--src/plugins/lb/lb_test.c270
-rw-r--r--src/plugins/lb/lbhash.h216
-rw-r--r--src/plugins/lb/node.c421
-rw-r--r--src/plugins/lb/refcount.c41
-rw-r--r--src/plugins/lb/refcount.h67
-rw-r--r--src/plugins/lb/util.c72
-rw-r--r--src/plugins/lb/util.h40
-rw-r--r--src/plugins/memif.am37
-rw-r--r--src/plugins/memif/cli.c365
-rw-r--r--src/plugins/memif/device.c380
-rw-r--r--src/plugins/memif/memif.api124
-rw-r--r--src/plugins/memif/memif.c819
-rw-r--r--src/plugins/memif/memif.h185
-rw-r--r--src/plugins/memif/memif_all_api_h.h18
-rw-r--r--src/plugins/memif/memif_api.c350
-rw-r--r--src/plugins/memif/memif_msg_enum.h31
-rw-r--r--src/plugins/memif/memif_test.c372
-rw-r--r--src/plugins/memif/node.c533
-rw-r--r--src/plugins/memif/private.h261
-rw-r--r--src/plugins/memif/socket.c740
-rw-r--r--src/plugins/nat.am41
-rwxr-xr-xsrc/plugins/nat/in2out.c3683
-rw-r--r--src/plugins/nat/nat.api1546
-rw-r--r--src/plugins/nat/nat.c3229
-rw-r--r--src/plugins/nat/nat.h555
-rw-r--r--src/plugins/nat/nat64.c861
-rw-r--r--src/plugins/nat/nat64.h322
-rw-r--r--src/plugins/nat/nat64_cli.c984
-rw-r--r--src/plugins/nat/nat64_db.c603
-rw-r--r--src/plugins/nat/nat64_db.h307
-rw-r--r--src/plugins/nat/nat64_doc.md73
-rw-r--r--src/plugins/nat/nat64_in2out.c1118
-rw-r--r--src/plugins/nat/nat64_out2in.c494
-rw-r--r--src/plugins/nat/nat_all_api_h.h19
-rw-r--r--src/plugins/nat/nat_api.c3396
-rw-r--r--src/plugins/nat/nat_det.c158
-rw-r--r--src/plugins/nat/nat_det.h196
-rw-r--r--src/plugins/nat/nat_ipfix_logging.c848
-rw-r--r--src/plugins/nat/nat_ipfix_logging.h79
-rw-r--r--src/plugins/nat/nat_msg_enum.h31
-rw-r--r--src/plugins/nat/nat_test.c1167
-rwxr-xr-xsrc/plugins/nat/out2in.c2514
-rw-r--r--src/plugins/pppoe.am39
-rw-r--r--src/plugins/pppoe/pppoe.api90
-rw-r--r--src/plugins/pppoe/pppoe.c739
-rw-r--r--src/plugins/pppoe/pppoe.h289
-rw-r--r--src/plugins/pppoe/pppoe_all_api_h.h18
-rw-r--r--src/plugins/pppoe/pppoe_api.c224
-rw-r--r--src/plugins/pppoe/pppoe_decap.c422
-rw-r--r--src/plugins/pppoe/pppoe_error.def18
-rw-r--r--src/plugins/pppoe/pppoe_msg_enum.h31
-rw-r--r--src/plugins/pppoe/pppoe_tap.c89
-rw-r--r--src/plugins/pppoe/pppoe_tap_node.c297
-rw-r--r--src/plugins/pppoe/pppoe_test.c330
-rw-r--r--src/plugins/sixrd.am26
-rw-r--r--src/plugins/sixrd/ip4_sixrd.c127
-rw-r--r--src/plugins/sixrd/ip6_sixrd.c129
-rw-r--r--src/plugins/sixrd/sixrd.c377
-rw-r--r--src/plugins/sixrd/sixrd.h141
-rw-r--r--src/plugins/sixrd/sixrd_dpo.c132
-rw-r--r--src/plugins/sixrd/sixrd_dpo.h61
-rwxr-xr-xsrc/scripts/version50
-rw-r--r--src/scripts/vnet/arp421
-rw-r--r--src/scripts/vnet/arp4-mpls24
-rw-r--r--src/scripts/vnet/arp621
-rw-r--r--src/scripts/vnet/bvi76
-rw-r--r--src/scripts/vnet/dhcp/dhcpd.conf8
-rw-r--r--src/scripts/vnet/dhcp/left-ping-target.sh4
-rw-r--r--src/scripts/vnet/dhcp/leftpeer.conf17
-rw-r--r--src/scripts/vnet/dhcp/proxy22
-rw-r--r--src/scripts/vnet/icmp16
-rw-r--r--src/scripts/vnet/icmp616
-rw-r--r--src/scripts/vnet/ige19
-rw-r--r--src/scripts/vnet/ip629
-rw-r--r--src/scripts/vnet/ip6-hbh84
-rw-r--r--src/scripts/vnet/ixge15
-rw-r--r--src/scripts/vnet/l2efpfilter83
-rw-r--r--src/scripts/vnet/l2efpfilter_perf58
-rw-r--r--src/scripts/vnet/l2fib46
-rw-r--r--src/scripts/vnet/l2fib_perf29
-rw-r--r--src/scripts/vnet/l2fib_xc31
-rw-r--r--src/scripts/vnet/l2flood42
-rw-r--r--src/scripts/vnet/l2tp134
-rwxr-xr-xsrc/scripts/vnet/leftpeer/leftpeer-classify8
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-classify65
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-classifyl28
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-dhcp23
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-ioam.conf15
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-l3vxlan.conf12
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-lisp.conf18
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-mpls.conf17
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-sr.conf24
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer-vxlan.conf17
-rw-r--r--src/scripts/vnet/leftpeer/leftpeer.script9
-rw-r--r--src/scripts/vnet/lfib/ip4-to-mpls26
-rw-r--r--src/scripts/vnet/lfib/mpls-pop-to-mpls28
-rw-r--r--src/scripts/vnet/lfib/mpls-to-ip427
-rw-r--r--src/scripts/vnet/lfib/mpls-to-mpls26
-rw-r--r--src/scripts/vnet/mcast/ip425
-rw-r--r--src/scripts/vnet/mpls-o-ethernet/leftpeer.conf17
-rw-r--r--src/scripts/vnet/mpls-o-ethernet/pg10
-rw-r--r--src/scripts/vnet/mpls-o-ethernet/rightpeer.conf15
-rw-r--r--src/scripts/vnet/mpls-o-ethernet/single.conf17
-rw-r--r--src/scripts/vnet/mpls-o-gre/dhcpd.conf116
-rw-r--r--src/scripts/vnet/mpls-o-gre/leftpeer.conf14
-rw-r--r--src/scripts/vnet/mpls-o-gre/rightpeer.conf14
-rw-r--r--src/scripts/vnet/mpls-tunnel87
-rw-r--r--src/scripts/vnet/nat4441
-rw-r--r--src/scripts/vnet/nat44_det108
-rw-r--r--src/scripts/vnet/nat44_static44
-rw-r--r--src/scripts/vnet/nat44_static_with_port44
-rw-r--r--src/scripts/vnet/pcap18
-rw-r--r--src/scripts/vnet/probe411
-rw-r--r--src/scripts/vnet/probe67
-rw-r--r--src/scripts/vnet/rewrite62
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-ioam.conf14
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-l3vxlan.conf9
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-lisp.conf16
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-mpls-l2.conf24
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-mpls.conf17
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-sr.conf28
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer-vxlan.conf16
-rw-r--r--src/scripts/vnet/rightpeer/rightpeer.script9
-rw-r--r--src/scripts/vnet/rpf18
-rw-r--r--src/scripts/vnet/rtt-test31
-rw-r--r--src/scripts/vnet/source_and_port_range_check63
-rw-r--r--src/scripts/vnet/speed14
-rw-r--r--src/scripts/vnet/sr/sr_mpls11
-rw-r--r--src/scripts/vnet/srp27
-rw-r--r--src/scripts/vnet/tcp18
-rw-r--r--src/scripts/vnet/tcp-test6
-rw-r--r--src/scripts/vnet/tf-ucs-116
-rw-r--r--src/scripts/vnet/udp19
-rw-r--r--src/scripts/vnet/uri/afp_setup.cli5
-rw-r--r--src/scripts/vnet/uri/dpdk_setup.cli4
-rw-r--r--src/scripts/vnet/uri/dummy_app.py100
-rw-r--r--src/scripts/vnet/uri/tap_setup.cli5
-rwxr-xr-xsrc/scripts/vnet/uri/tcp-setup.sh39
-rw-r--r--src/scripts/vnet/uri/udp20
-rw-r--r--src/scripts/vnet/urpf86
-rw-r--r--src/scripts/vnet/virl/ip6sr.virl874
-rw-r--r--src/scripts/vnet/virl/ip6sr_notes.txt38
-rw-r--r--src/scripts/vnet/virl/mplsogre.virl319
-rw-r--r--src/scripts/vnet/virl/simple.virl389
-rw-r--r--src/scripts/vnet/vlan23
-rwxr-xr-xsrc/scripts/vppctl134
-rw-r--r--src/scripts/vppctl_completion30
-rw-r--r--src/suffix-rules.mk27
-rw-r--r--src/svm.am40
-rw-r--r--src/svm/dir.dox21
-rw-r--r--src/svm/persist.c258
-rw-r--r--src/svm/ssvm.c212
-rw-r--r--src/svm/ssvm.h176
-rw-r--r--src/svm/svm.c1268
-rw-r--r--src/svm/svm.h107
-rw-r--r--src/svm/svm_common.h135
-rw-r--r--src/svm/svm_fifo.c838
-rw-r--r--src/svm/svm_fifo.h228
-rw-r--r--src/svm/svm_fifo_segment.c643
-rw-r--r--src/svm/svm_fifo_segment.h136
-rw-r--r--src/svm/svm_test.c79
-rw-r--r--src/svm/svmdb.c676
-rw-r--r--src/svm/svmdb.h135
-rw-r--r--src/svm/svmdbtool.c537
-rw-r--r--src/svm/svmtool.c528
-rw-r--r--src/svm/test_svm_fifo1.c357
-rw-r--r--src/tests/vnet/README10
-rw-r--r--src/tests/vnet/lisp-cp/test_cp_serdes.c683
-rw-r--r--src/tests/vnet/lisp-cp/test_lisp_types.c657
-rw-r--r--src/tools/elftool/dir.dox19
-rw-r--r--src/tools/elftool/elftool.c464
-rw-r--r--src/tools/g2/clib.c157
-rw-r--r--src/tools/g2/cpel.c470
-rw-r--r--src/tools/g2/cpel.h83
-rw-r--r--src/tools/g2/events.c475
-rw-r--r--src/tools/g2/g2.h196
-rw-r--r--src/tools/g2/g2version.c19
-rw-r--r--src/tools/g2/main.c199
-rw-r--r--src/tools/g2/menu1.c565
-rw-r--r--src/tools/g2/mkversion.c77
-rw-r--r--src/tools/g2/pointsel.c854
-rw-r--r--src/tools/g2/props.c279
-rw-r--r--src/tools/g2/props.h21
-rw-r--r--src/tools/g2/view1.c3237
-rw-r--r--src/tools/perftool/c2cpel.c251
-rw-r--r--src/tools/perftool/cpel.h83
-rw-r--r--src/tools/perftool/cpel_util.c456
-rw-r--r--src/tools/perftool/cpel_util.h68
-rw-r--r--src/tools/perftool/cpelatency.c927
-rw-r--r--src/tools/perftool/cpeldump.c641
-rw-r--r--src/tools/perftool/cpelinreg.c892
-rw-r--r--src/tools/perftool/cpelstate.c822
-rw-r--r--src/tools/perftool/delsvec.c315
-rw-r--r--src/tools/perftool/elog_merge.c181
-rw-r--r--src/tools/perftool/linreg.c78
-rw-r--r--src/tools/perftool/new.cpelbin0 -> 1672 bytes
-rw-r--r--src/tools/perftool/new.elogbin0 -> 4525 bytes
-rw-r--r--src/tools/perftool/props.c280
-rw-r--r--src/tools/vppapigen/gram.y91
-rw-r--r--src/tools/vppapigen/lex.c1120
-rw-r--r--src/tools/vppapigen/lex.h51
-rw-r--r--src/tools/vppapigen/node.c1547
-rw-r--r--src/tools/vppapigen/node.h96
-rw-r--r--src/uri.am67
-rw-r--r--src/uri/sock_test.h415
-rw-r--r--src/uri/sock_test_client.c1076
-rw-r--r--src/uri/sock_test_server.c596
-rw-r--r--src/uri/uri_socket_server.c231
-rw-r--r--src/uri/uri_socket_test.c181
-rwxr-xr-xsrc/uri/uri_tcp_test.c1274
-rw-r--r--src/uri/uri_udp_test.c1040
-rw-r--r--src/uri/vcl_test_client.c27
-rw-r--r--src/uri/vcl_test_server.c27
-rw-r--r--src/uri/vppcom.c2440
-rw-r--r--src/uri/vppcom.h152
-rw-r--r--src/uri/vppcom_test.conf25
-rw-r--r--src/vat/api_format.c21027
-rw-r--r--src/vat/json_format.c304
-rw-r--r--src/vat/json_format.h254
-rw-r--r--src/vat/json_test.c75
-rw-r--r--src/vat/main.c434
-rw-r--r--src/vat/plugin.c200
-rw-r--r--src/vat/plugin.h61
-rw-r--r--src/vat/plugin_api.c292
-rw-r--r--src/vat/restart.c246
-rw-r--r--src/vat/vat.h252
-rw-r--r--src/vlib-api.am84
-rw-r--r--src/vlib.am100
-rw-r--r--src/vlib/buffer.c1134
-rw-r--r--src/vlib/buffer.h540
-rw-r--r--src/vlib/buffer_funcs.h946
-rw-r--r--src/vlib/buffer_node.h337
-rw-r--r--src/vlib/buffer_serialize.c248
-rw-r--r--src/vlib/cli.c1345
-rw-r--r--src/vlib/cli.h196
-rw-r--r--src/vlib/cli_funcs.h58
-rw-r--r--src/vlib/counter.c143
-rw-r--r--src/vlib/counter.h330
-rw-r--r--src/vlib/defs.h82
-rw-r--r--src/vlib/dir.dox23
-rw-r--r--src/vlib/elog_samples.c122
-rw-r--r--src/vlib/error.c338
-rw-r--r--src/vlib/error.h101
-rw-r--r--src/vlib/error_funcs.h90
-rw-r--r--src/vlib/format.c196
-rw-r--r--src/vlib/format_funcs.h75
-rw-r--r--src/vlib/global_funcs.h45
-rw-r--r--src/vlib/i2c.c231
-rw-r--r--src/vlib/i2c.h67
-rw-r--r--src/vlib/init.c168
-rw-r--r--src/vlib/init.h239
-rw-r--r--src/vlib/lex.c271
-rw-r--r--src/vlib/lex.h145
-rw-r--r--src/vlib/linux/pci.c666
-rw-r--r--src/vlib/linux/physmem.c277
-rw-r--r--src/vlib/main.c1816
-rw-r--r--src/vlib/main.h375
-rw-r--r--src/vlib/mc.c2609
-rw-r--r--src/vlib/mc.h687
-rw-r--r--src/vlib/node.c679
-rw-r--r--src/vlib/node.h736
-rw-r--r--src/vlib/node_cli.c456
-rw-r--r--src/vlib/node_format.c187
-rw-r--r--src/vlib/node_funcs.h1175
-rw-r--r--src/vlib/parse.c1007
-rw-r--r--src/vlib/parse.h221
-rw-r--r--src/vlib/parse_builtin.c150
-rw-r--r--src/vlib/pci/pci.c264
-rw-r--r--src/vlib/pci/pci.h251
-rw-r--r--src/vlib/pci/pci_config.h731
-rw-r--r--src/vlib/physmem.h81
-rw-r--r--src/vlib/physmem_funcs.h161
-rw-r--r--src/vlib/threads.c1820
-rw-r--r--src/vlib/threads.h537
-rw-r--r--src/vlib/threads_cli.c584
-rw-r--r--src/vlib/trace.c552
-rw-r--r--src/vlib/trace.h100
-rw-r--r--src/vlib/trace_funcs.h185
-rw-r--r--src/vlib/unix/cj.c272
-rw-r--r--src/vlib/unix/cj.h79
-rw-r--r--src/vlib/unix/cli.c3468
-rw-r--r--src/vlib/unix/dir.dox28
-rw-r--r--src/vlib/unix/input.c272
-rw-r--r--src/vlib/unix/main.c642
-rw-r--r--src/vlib/unix/mc_socket.c1050
-rw-r--r--src/vlib/unix/mc_socket.h137
-rw-r--r--src/vlib/unix/plugin.c553
-rw-r--r--src/vlib/unix/plugin.h126
-rw-r--r--src/vlib/unix/unix.h168
-rw-r--r--src/vlib/unix/util.c191
-rw-r--r--src/vlib/vlib.h87
-rw-r--r--src/vlib/vlib_process_doc.h147
-rw-r--r--src/vlibapi/api.h131
-rw-r--r--src/vlibapi/api_common.h330
-rw-r--r--src/vlibapi/api_doc.md352
-rw-r--r--src/vlibapi/api_helper_macros.h261
-rw-r--r--src/vlibapi/api_shared.c925
-rw-r--r--src/vlibapi/node_serialize.c394
-rw-r--r--src/vlibapi/node_unserialize.c227
-rw-r--r--src/vlibapi/vat_helper_macros.h76
-rw-r--r--src/vlibmemory/api.h61
-rw-r--r--src/vlibmemory/api_common.h139
-rw-r--r--src/vlibmemory/memclnt.api114
-rw-r--r--src/vlibmemory/memory_client.c536
-rw-r--r--src/vlibmemory/memory_shared.c666
-rw-r--r--src/vlibmemory/memory_vlib.c2405
-rw-r--r--src/vlibmemory/unix_shared_memory_queue.c385
-rw-r--r--src/vlibmemory/unix_shared_memory_queue.h70
-rw-r--r--src/vlibmemory/vl_memory_api_h.h32
-rw-r--r--src/vlibmemory/vl_memory_msg_enum.h42
-rw-r--r--src/vlibsocket/api.h87
-rw-r--r--src/vlibsocket/sock_test.c155
-rw-r--r--src/vlibsocket/sockclnt.api50
-rw-r--r--src/vlibsocket/sockclnt_vlib.c209
-rw-r--r--src/vlibsocket/socksvr_vlib.c706
-rw-r--r--src/vlibsocket/vl_socket_api_h.h33
-rw-r--r--src/vlibsocket/vl_socket_msg_enum.h42
-rw-r--r--src/vnet.am1074
-rw-r--r--src/vnet/adj/adj.c607
-rw-r--r--src/vnet/adj/adj.h379
-rw-r--r--src/vnet/adj/adj_bfd.c184
-rw-r--r--src/vnet/adj/adj_delegate.c144
-rw-r--r--src/vnet/adj/adj_delegate.h104
-rw-r--r--src/vnet/adj/adj_glean.c285
-rw-r--r--src/vnet/adj/adj_glean.h61
-rw-r--r--src/vnet/adj/adj_internal.h112
-rw-r--r--src/vnet/adj/adj_l2.c192
-rw-r--r--src/vnet/adj/adj_l2.h24
-rw-r--r--src/vnet/adj/adj_mcast.c483
-rw-r--r--src/vnet/adj/adj_mcast.h114
-rw-r--r--src/vnet/adj/adj_midchain.c666
-rw-r--r--src/vnet/adj/adj_midchain.h82
-rw-r--r--src/vnet/adj/adj_nbr.c1124
-rw-r--r--src/vnet/adj/adj_nbr.h176
-rw-r--r--src/vnet/adj/adj_nsh.c211
-rw-r--r--src/vnet/adj/adj_nsh.h31
-rw-r--r--src/vnet/adj/adj_types.h53
-rw-r--r--src/vnet/adj/rewrite.c234
-rw-r--r--src/vnet/adj/rewrite.h350
-rw-r--r--src/vnet/api_errno.h137
-rw-r--r--src/vnet/bfd/bfd.api288
-rw-r--r--src/vnet/bfd/bfd_api.c411
-rw-r--r--src/vnet/bfd/bfd_api.h117
-rw-r--r--src/vnet/bfd/bfd_cli.c950
-rw-r--r--src/vnet/bfd/bfd_debug.h86
-rw-r--r--src/vnet/bfd/bfd_doc.md374
-rw-r--r--src/vnet/bfd/bfd_main.c2058
-rw-r--r--src/vnet/bfd/bfd_main.h400
-rw-r--r--src/vnet/bfd/bfd_protocol.c195
-rw-r--r--src/vnet/bfd/bfd_protocol.h212
-rw-r--r--src/vnet/bfd/bfd_udp.c1516
-rw-r--r--src/vnet/bfd/bfd_udp.h124
-rw-r--r--src/vnet/bfd/dir.dox18
-rw-r--r--src/vnet/buffer.h326
-rw-r--r--src/vnet/cdp/cdp.pg7
-rw-r--r--src/vnet/cdp/cdp_input.c506
-rw-r--r--src/vnet/cdp/cdp_node.c208
-rw-r--r--src/vnet/cdp/cdp_node.h147
-rw-r--r--src/vnet/cdp/cdp_periodic.c515
-rw-r--r--src/vnet/cdp/cdp_protocol.h186
-rw-r--r--src/vnet/classify/README180
-rw-r--r--src/vnet/classify/classify.api327
-rw-r--r--src/vnet/classify/classify_api.c547
-rw-r--r--src/vnet/classify/flow_classify.c212
-rw-r--r--src/vnet/classify/flow_classify.h51
-rw-r--r--src/vnet/classify/flow_classify_node.c338
-rw-r--r--src/vnet/classify/input_acl.c283
-rw-r--r--src/vnet/classify/input_acl.h54
-rw-r--r--src/vnet/classify/ip_classify.c365
-rw-r--r--src/vnet/classify/policer_classify.c227
-rw-r--r--src/vnet/classify/policer_classify.h55
-rw-r--r--src/vnet/classify/vnet_classify.c2642
-rw-r--r--src/vnet/classify/vnet_classify.h538
-rw-r--r--src/vnet/config.c361
-rw-r--r--src/vnet/config.h176
-rw-r--r--src/vnet/cop/cop.api66
-rw-r--r--src/vnet/cop/cop.c387
-rw-r--r--src/vnet/cop/cop.h89
-rw-r--r--src/vnet/cop/cop_api.c141
-rw-r--r--src/vnet/cop/ip4_whitelist.c338
-rw-r--r--src/vnet/cop/ip6_whitelist.c298
-rw-r--r--src/vnet/cop/node1.c319
-rw-r--r--src/vnet/devices/af_packet/af_packet.api61
-rw-r--r--src/vnet/devices/af_packet/af_packet.c433
-rw-r--r--src/vnet/devices/af_packet/af_packet.h73
-rw-r--r--src/vnet/devices/af_packet/af_packet_api.c143
-rw-r--r--src/vnet/devices/af_packet/cli.c211
-rw-r--r--src/vnet/devices/af_packet/device.c354
-rw-r--r--src/vnet/devices/af_packet/dir.dox29
-rw-r--r--src/vnet/devices/af_packet/node.c310
-rw-r--r--src/vnet/devices/devices.c365
-rw-r--r--src/vnet/devices/devices.h168
-rw-r--r--src/vnet/devices/netmap/cli.c236
-rw-r--r--src/vnet/devices/netmap/device.c256
-rw-r--r--src/vnet/devices/netmap/dir.dox27
-rw-r--r--src/vnet/devices/netmap/net_netmap.h650
-rw-r--r--src/vnet/devices/netmap/netmap.api54
-rw-r--r--src/vnet/devices/netmap/netmap.c312
-rw-r--r--src/vnet/devices/netmap/netmap.h166
-rw-r--r--src/vnet/devices/netmap/netmap_api.c137
-rw-r--r--src/vnet/devices/netmap/node.c302
-rw-r--r--src/vnet/devices/ssvm/node.c345
-rw-r--r--src/vnet/devices/ssvm/ssvm_eth.c491
-rw-r--r--src/vnet/devices/ssvm/ssvm_eth.h141
-rw-r--r--src/vnet/devices/virtio/dir.dox27
-rw-r--r--src/vnet/devices/virtio/vhost-user.c3671
-rw-r--r--src/vnet/devices/virtio/vhost-user.h342
-rw-r--r--src/vnet/devices/virtio/vhost_user.api105
-rw-r--r--src/vnet/devices/virtio/vhost_user_api.c254
-rw-r--r--src/vnet/dhcp/client.c1135
-rw-r--r--src/vnet/dhcp/client.h122
-rw-r--r--src/vnet/dhcp/dhcp.api139
-rw-r--r--src/vnet/dhcp/dhcp4_packet.h66
-rw-r--r--src/vnet/dhcp/dhcp4_proxy_error.def32
-rw-r--r--src/vnet/dhcp/dhcp4_proxy_node.c1068
-rw-r--r--src/vnet/dhcp/dhcp6_packet.h183
-rw-r--r--src/vnet/dhcp/dhcp6_proxy_error.def29
-rw-r--r--src/vnet/dhcp/dhcp6_proxy_node.c1147
-rw-r--r--src/vnet/dhcp/dhcp_api.c290
-rw-r--r--src/vnet/dhcp/dhcp_proxy.c351
-rw-r--r--src/vnet/dhcp/dhcp_proxy.h286
-rw-r--r--src/vnet/dir.dox29
-rw-r--r--src/vnet/dpo/classify_dpo.c131
-rw-r--r--src/vnet/dpo/classify_dpo.h56
-rw-r--r--src/vnet/dpo/dpo.c574
-rw-r--r--src/vnet/dpo/dpo.h411
-rw-r--r--src/vnet/dpo/drop_dpo.c112
-rw-r--r--src/vnet/dpo/drop_dpo.h31
-rw-r--r--src/vnet/dpo/interface_rx_dpo.c445
-rw-r--r--src/vnet/dpo/interface_rx_dpo.h69
-rw-r--r--src/vnet/dpo/interface_tx_dpo.c92
-rw-r--r--src/vnet/dpo/interface_tx_dpo.h33
-rw-r--r--src/vnet/dpo/ip_null_dpo.c408
-rw-r--r--src/vnet/dpo/ip_null_dpo.h56
-rw-r--r--src/vnet/dpo/load_balance.c1115
-rw-r--r--src/vnet/dpo/load_balance.h219
-rw-r--r--src/vnet/dpo/load_balance_map.c582
-rw-r--r--src/vnet/dpo/load_balance_map.h110
-rw-r--r--src/vnet/dpo/lookup_dpo.c1423
-rw-r--r--src/vnet/dpo/lookup_dpo.h128
-rw-r--r--src/vnet/dpo/mpls_disposition.c364
-rw-r--r--src/vnet/dpo/mpls_disposition.h85
-rw-r--r--src/vnet/dpo/mpls_label_dpo.c703
-rw-r--r--src/vnet/dpo/mpls_label_dpo.h101
-rw-r--r--src/vnet/dpo/punt_dpo.c100
-rw-r--r--src/vnet/dpo/punt_dpo.h30
-rw-r--r--src/vnet/dpo/receive_dpo.c170
-rw-r--r--src/vnet/dpo/receive_dpo.h62
-rw-r--r--src/vnet/dpo/replicate_dpo.c821
-rw-r--r--src/vnet/dpo/replicate_dpo.h148
-rw-r--r--src/vnet/ethernet/arp.c2536
-rw-r--r--src/vnet/ethernet/arp_packet.h180
-rw-r--r--src/vnet/ethernet/dir.dox24
-rw-r--r--src/vnet/ethernet/error.def46
-rw-r--r--src/vnet/ethernet/ethernet.h577
-rw-r--r--src/vnet/ethernet/format.c348
-rw-r--r--src/vnet/ethernet/init.c128
-rw-r--r--src/vnet/ethernet/interface.c880
-rw-r--r--src/vnet/ethernet/mac_swap.c397
-rwxr-xr-xsrc/vnet/ethernet/node.c1419
-rw-r--r--src/vnet/ethernet/p2p_ethernet.api50
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c276
-rw-r--r--src/vnet/ethernet/p2p_ethernet.h63
-rw-r--r--src/vnet/ethernet/p2p_ethernet_api.c137
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c262
-rw-r--r--src/vnet/ethernet/packet.h152
-rw-r--r--src/vnet/ethernet/pg.c183
-rw-r--r--src/vnet/ethernet/sfp.c117
-rw-r--r--src/vnet/ethernet/sfp.h117
-rw-r--r--src/vnet/ethernet/types.def113
-rw-r--r--src/vnet/feature/feature.c456
-rw-r--r--src/vnet/feature/feature.h387
-rw-r--r--src/vnet/feature/registration.c301
-rw-r--r--src/vnet/fib/fib.c43
-rw-r--r--src/vnet/fib/fib.h650
-rw-r--r--src/vnet/fib/fib_api.h58
-rw-r--r--src/vnet/fib/fib_attached_export.c565
-rw-r--r--src/vnet/fib/fib_attached_export.h57
-rw-r--r--src/vnet/fib/fib_bfd.c196
-rw-r--r--src/vnet/fib/fib_entry.c1618
-rw-r--r--src/vnet/fib/fib_entry.h548
-rw-r--r--src/vnet/fib/fib_entry_cover.c180
-rw-r--r--src/vnet/fib/fib_entry_cover.h42
-rw-r--r--src/vnet/fib/fib_entry_delegate.c256
-rw-r--r--src/vnet/fib/fib_entry_delegate.h158
-rw-r--r--src/vnet/fib/fib_entry_src.c1423
-rw-r--r--src/vnet/fib/fib_entry_src.h298
-rw-r--r--src/vnet/fib/fib_entry_src_adj.c381
-rw-r--r--src/vnet/fib/fib_entry_src_api.c171
-rw-r--r--src/vnet/fib/fib_entry_src_default.c121
-rw-r--r--src/vnet/fib/fib_entry_src_default_route.c58
-rw-r--r--src/vnet/fib/fib_entry_src_interface.c216
-rw-r--r--src/vnet/fib/fib_entry_src_lisp.c133
-rw-r--r--src/vnet/fib/fib_entry_src_mpls.c199
-rw-r--r--src/vnet/fib/fib_entry_src_rr.c300
-rw-r--r--src/vnet/fib/fib_entry_src_special.c70
-rw-r--r--src/vnet/fib/fib_internal.h70
-rw-r--r--src/vnet/fib/fib_node.c277
-rw-r--r--src/vnet/fib/fib_node.h377
-rw-r--r--src/vnet/fib/fib_node_list.c390
-rw-r--r--src/vnet/fib/fib_node_list.h64
-rw-r--r--src/vnet/fib/fib_path.c2242
-rw-r--r--src/vnet/fib/fib_path.h185
-rw-r--r--src/vnet/fib/fib_path_ext.c438
-rw-r--r--src/vnet/fib/fib_path_ext.h144
-rw-r--r--src/vnet/fib/fib_path_list.c1380
-rw-r--r--src/vnet/fib/fib_path_list.h183
-rw-r--r--src/vnet/fib/fib_table.c1295
-rw-r--r--src/vnet/fib/fib_table.h811
-rw-r--r--src/vnet/fib/fib_test.c8768
-rw-r--r--src/vnet/fib/fib_test.h111
-rw-r--r--src/vnet/fib/fib_types.c327
-rw-r--r--src/vnet/fib/fib_types.h426
-rw-r--r--src/vnet/fib/fib_urpf_list.c268
-rw-r--r--src/vnet/fib/fib_urpf_list.h146
-rw-r--r--src/vnet/fib/fib_walk.c1205
-rw-r--r--src/vnet/fib/fib_walk.h61
-rw-r--r--src/vnet/fib/ip4_fib.c740
-rw-r--r--src/vnet/fib/ip4_fib.h168
-rw-r--r--src/vnet/fib/ip6_fib.c757
-rw-r--r--src/vnet/fib/ip6_fib.h174
-rw-r--r--src/vnet/fib/mpls_fib.c456
-rw-r--r--src/vnet/fib/mpls_fib.h139
-rw-r--r--src/vnet/flow/flow.api147
-rw-r--r--src/vnet/flow/flow_api.c397
-rw-r--r--src/vnet/flow/flow_report.c507
-rw-r--r--src/vnet/flow/flow_report.h146
-rw-r--r--src/vnet/flow/flow_report_classify.c530
-rw-r--r--src/vnet/flow/flow_report_classify.h122
-rw-r--r--src/vnet/flow/ipfix_info_elements.h430
-rw-r--r--src/vnet/flow/ipfix_packet.h188
-rw-r--r--src/vnet/global_funcs.h32
-rw-r--r--src/vnet/gre/error.def23
-rw-r--r--src/vnet/gre/gre.api57
-rw-r--r--src/vnet/gre/gre.c522
-rw-r--r--src/vnet/gre/gre.h253
-rw-r--r--src/vnet/gre/gre_api.c226
-rw-r--r--src/vnet/gre/interface.c706
-rw-r--r--src/vnet/gre/node.c703
-rw-r--r--src/vnet/gre/packet.h55
-rw-r--r--src/vnet/gre/pg.c77
-rw-r--r--src/vnet/handoff.c594
-rw-r--r--src/vnet/handoff.h257
-rw-r--r--src/vnet/hdlc/error.def42
-rw-r--r--src/vnet/hdlc/hdlc.c249
-rw-r--r--src/vnet/hdlc/hdlc.h123
-rw-r--r--src/vnet/hdlc/node.c358
-rw-r--r--src/vnet/hdlc/packet.h72
-rw-r--r--src/vnet/hdlc/pg.c105
-rw-r--r--src/vnet/interface.api396
-rw-r--r--src/vnet/interface.c1464
-rw-r--r--src/vnet/interface.h711
-rw-r--r--src/vnet/interface_api.c958
-rw-r--r--src/vnet/interface_cli.c1660
-rw-r--r--src/vnet/interface_format.c430
-rw-r--r--src/vnet/interface_funcs.h347
-rw-r--r--src/vnet/interface_output.c1260
-rw-r--r--src/vnet/ip/dir.dox26
-rw-r--r--src/vnet/ip/format.c121
-rw-r--r--src/vnet/ip/format.h114
-rw-r--r--src/vnet/ip/icmp4.c784
-rw-r--r--src/vnet/ip/icmp4.h60
-rw-r--r--src/vnet/ip/icmp46_packet.h398
-rw-r--r--src/vnet/ip/icmp6.c882
-rw-r--r--src/vnet/ip/icmp6.h82
-rw-r--r--src/vnet/ip/igmp_packet.h155
-rw-r--r--src/vnet/ip/ip.api551
-rw-r--r--src/vnet/ip/ip.h203
-rw-r--r--src/vnet/ip/ip4.h387
-rw-r--r--src/vnet/ip/ip46_cli.c236
-rw-r--r--src/vnet/ip/ip4_error.h95
-rw-r--r--src/vnet/ip/ip4_format.c256
-rwxr-xr-xsrc/vnet/ip/ip4_forward.c3197
-rw-r--r--src/vnet/ip/ip4_input.c507
-rw-r--r--src/vnet/ip/ip4_mtrie.c811
-rw-r--r--src/vnet/ip/ip4_mtrie.h237
-rw-r--r--src/vnet/ip/ip4_packet.h385
-rw-r--r--src/vnet/ip/ip4_pg.c387
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c1424
-rw-r--r--src/vnet/ip/ip4_source_check.c562
-rw-r--r--src/vnet/ip/ip4_test.c347
-rw-r--r--src/vnet/ip/ip4_to_ip6.h659
-rw-r--r--src/vnet/ip/ip6.h605
-rw-r--r--src/vnet/ip/ip6_error.h92
-rw-r--r--src/vnet/ip/ip6_format.c383
-rw-r--r--src/vnet/ip/ip6_forward.c3558
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c1166
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.h277
-rw-r--r--src/vnet/ip/ip6_hop_by_hop_packet.h56
-rw-r--r--src/vnet/ip/ip6_input.c378
-rw-r--r--src/vnet/ip/ip6_neighbor.c4332
-rw-r--r--src/vnet/ip/ip6_neighbor.h109
-rw-r--r--src/vnet/ip/ip6_packet.h536
-rw-r--r--src/vnet/ip/ip6_pg.c231
-rw-r--r--src/vnet/ip/ip6_to_ip4.h634
-rw-r--r--src/vnet/ip/ip_api.c1825
-rw-r--r--src/vnet/ip/ip_checksum.c228
-rw-r--r--src/vnet/ip/ip_frag.c581
-rw-r--r--src/vnet/ip/ip_frag.h96
-rw-r--r--src/vnet/ip/ip_init.c152
-rw-r--r--src/vnet/ip/ip_input_acl.c450
-rw-r--r--src/vnet/ip/ip_packet.h180
-rw-r--r--src/vnet/ip/ip_source_and_port_range_check.h148
-rw-r--r--src/vnet/ip/lookup.c1442
-rw-r--r--src/vnet/ip/lookup.h224
-rwxr-xr-xsrc/vnet/ip/ping.c928
-rw-r--r--src/vnet/ip/ping.h115
-rw-r--r--src/vnet/ip/ports.def757
-rw-r--r--src/vnet/ip/protocols.def162
-rw-r--r--src/vnet/ip/punt.c830
-rw-r--r--src/vnet/ip/punt.h91
-rw-r--r--src/vnet/ip/punt_error.def27
-rw-r--r--src/vnet/ipsec-gre/dir.dox18
-rw-r--r--src/vnet/ipsec-gre/error.def26
-rw-r--r--src/vnet/ipsec-gre/interface.c319
-rw-r--r--src/vnet/ipsec-gre/ipsec_gre.api79
-rw-r--r--src/vnet/ipsec-gre/ipsec_gre.c407
-rw-r--r--src/vnet/ipsec-gre/ipsec_gre.h114
-rw-r--r--src/vnet/ipsec-gre/ipsec_gre_api.c190
-rw-r--r--src/vnet/ipsec-gre/ipsec_gre_doc.md74
-rw-r--r--src/vnet/ipsec-gre/node.c433
-rw-r--r--src/vnet/ipsec/esp.h320
-rw-r--r--src/vnet/ipsec/esp_decrypt.c435
-rw-r--r--src/vnet/ipsec/esp_encrypt.c428
-rw-r--r--src/vnet/ipsec/ikev2.c3450
-rw-r--r--src/vnet/ipsec/ikev2.h435
-rw-r--r--src/vnet/ipsec/ikev2_cli.c602
-rw-r--r--src/vnet/ipsec/ikev2_crypto.c874
-rw-r--r--src/vnet/ipsec/ikev2_format.c155
-rw-r--r--src/vnet/ipsec/ikev2_payload.c550
-rw-r--r--src/vnet/ipsec/ikev2_priv.h362
-rw-r--r--src/vnet/ipsec/ipsec.api552
-rw-r--r--src/vnet/ipsec/ipsec.c586
-rw-r--r--src/vnet/ipsec/ipsec.h368
-rw-r--r--src/vnet/ipsec/ipsec_api.c757
-rw-r--r--src/vnet/ipsec/ipsec_cli.c863
-rw-r--r--src/vnet/ipsec/ipsec_format.c141
-rw-r--r--src/vnet/ipsec/ipsec_if.c413
-rw-r--r--src/vnet/ipsec/ipsec_if_in.c221
-rw-r--r--src/vnet/ipsec/ipsec_if_out.c172
-rw-r--r--src/vnet/ipsec/ipsec_input.c430
-rw-r--r--src/vnet/ipsec/ipsec_output.c458
-rw-r--r--src/vnet/l2/dir.dox24
-rw-r--r--src/vnet/l2/feat_bitmap.c185
-rw-r--r--src/vnet/l2/feat_bitmap.h110
-rw-r--r--src/vnet/l2/l2.api385
-rw-r--r--src/vnet/l2/l2_api.c679
-rw-r--r--src/vnet/l2/l2_bd.c1359
-rw-r--r--src/vnet/l2/l2_bd.h190
-rw-r--r--src/vnet/l2/l2_bvi.c40
-rw-r--r--src/vnet/l2/l2_bvi.h117
-rw-r--r--src/vnet/l2/l2_classify.h114
-rw-r--r--src/vnet/l2/l2_efp_filter.c575
-rw-r--r--src/vnet/l2/l2_efp_filter.h33
-rw-r--r--src/vnet/l2/l2_fib.c1250
-rw-r--r--src/vnet/l2/l2_fib.h432
-rw-r--r--src/vnet/l2/l2_flood.c568
-rw-r--r--src/vnet/l2/l2_flood.h35
-rw-r--r--src/vnet/l2/l2_fwd.c577
-rw-r--r--src/vnet/l2/l2_fwd.h36
-rw-r--r--src/vnet/l2/l2_input.c1187
-rw-r--r--src/vnet/l2/l2_input.h289
-rw-r--r--src/vnet/l2/l2_input_acl.c431
-rw-r--r--src/vnet/l2/l2_input_classify.c662
-rw-r--r--src/vnet/l2/l2_input_vtr.c369
-rw-r--r--src/vnet/l2/l2_input_vtr.h54
-rw-r--r--src/vnet/l2/l2_learn.c596
-rw-r--r--src/vnet/l2/l2_learn.h70
-rw-r--r--src/vnet/l2/l2_output.c710
-rw-r--r--src/vnet/l2/l2_output.h174
-rw-r--r--src/vnet/l2/l2_output_acl.c341
-rw-r--r--src/vnet/l2/l2_output_classify.c654
-rw-r--r--src/vnet/l2/l2_patch.c466
-rw-r--r--src/vnet/l2/l2_rw.c710
-rw-r--r--src/vnet/l2/l2_rw.h95
-rw-r--r--src/vnet/l2/l2_vtr.c831
-rw-r--r--src/vnet/l2/l2_vtr.h281
-rw-r--r--src/vnet/l2/l2_xcrw.c607
-rw-r--r--src/vnet/l2/l2_xcrw.h91
-rw-r--r--src/vnet/l2tp/decap.c309
-rw-r--r--src/vnet/l2tp/encap.c238
-rw-r--r--src/vnet/l2tp/l2tp.api104
-rw-r--r--src/vnet/l2tp/l2tp.c766
-rw-r--r--src/vnet/l2tp/l2tp.h147
-rw-r--r--src/vnet/l2tp/l2tp_api.c267
-rw-r--r--src/vnet/l2tp/packet.h44
-rw-r--r--src/vnet/l2tp/pg.c106
-rw-r--r--src/vnet/l3_types.h59
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.c112
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.h45
-rw-r--r--src/vnet/lawful-intercept/node.c285
-rw-r--r--src/vnet/lisp-cp/control.c4756
-rw-r--r--src/vnet/lisp-cp/control.h435
-rw-r--r--src/vnet/lisp-cp/gid_dictionary.c1055
-rw-r--r--src/vnet/lisp-cp/gid_dictionary.h161
-rw-r--r--src/vnet/lisp-cp/lisp.api750
-rw-r--r--src/vnet/lisp-cp/lisp_api.c1342
-rw-r--r--src/vnet/lisp-cp/lisp_cli.c1606
-rw-r--r--src/vnet/lisp-cp/lisp_cp_dpo.c122
-rw-r--r--src/vnet/lisp-cp/lisp_cp_dpo.h45
-rw-r--r--src/vnet/lisp-cp/lisp_cp_messages.h644
-rw-r--r--src/vnet/lisp-cp/lisp_msg_serdes.c380
-rw-r--r--src/vnet/lisp-cp/lisp_msg_serdes.h58
-rw-r--r--src/vnet/lisp-cp/lisp_types.c1779
-rw-r--r--src/vnet/lisp-cp/lisp_types.h407
-rw-r--r--src/vnet/lisp-cp/one.api1110
-rw-r--r--src/vnet/lisp-cp/one_api.c1809
-rw-r--r--src/vnet/lisp-cp/one_cli.c2158
-rw-r--r--src/vnet/lisp-cp/packets.c226
-rw-r--r--src/vnet/lisp-cp/packets.h38
-rw-r--r--src/vnet/lisp-gpe/decap.c605
-rw-r--r--src/vnet/lisp-gpe/dir.dox26
-rw-r--r--src/vnet/lisp-gpe/interface.c941
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe.api248
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe.c726
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe.h338
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_adjacency.c634
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_adjacency.h136
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_api.c597
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_error.def18
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c1588
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h240
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_packet.h149
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_sub_interface.c285
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_sub_interface.h157
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_tenant.c330
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_tenant.h88
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_tunnel.c293
-rw-r--r--src/vnet/lisp-gpe/lisp_gpe_tunnel.h89
-rw-r--r--src/vnet/lisp-gpe/rfc.txt826
-rw-r--r--src/vnet/llc/llc.c241
-rw-r--r--src/vnet/llc/llc.h191
-rw-r--r--src/vnet/llc/node.c331
-rw-r--r--src/vnet/llc/pg.c113
-rw-r--r--src/vnet/lldp/dir.dox18
-rw-r--r--src/vnet/lldp/lldp.api47
-rw-r--r--src/vnet/lldp/lldp.h32
-rw-r--r--src/vnet/lldp/lldp_api.c144
-rw-r--r--src/vnet/lldp/lldp_cli.c660
-rw-r--r--src/vnet/lldp/lldp_doc.md86
-rw-r--r--src/vnet/lldp/lldp_input.c302
-rw-r--r--src/vnet/lldp/lldp_node.c341
-rw-r--r--src/vnet/lldp/lldp_node.h148
-rw-r--r--src/vnet/lldp/lldp_output.c235
-rw-r--r--src/vnet/lldp/lldp_protocol.h142
-rwxr-xr-xsrc/vnet/map/examples/gen-rules.py186
-rw-r--r--src/vnet/map/examples/health_check.c109
-rwxr-xr-xsrc/vnet/map/examples/test_map.py141
-rwxr-xr-xsrc/vnet/map/gen-rules.py107
-rw-r--r--src/vnet/map/ip4_map.c754
-rw-r--r--src/vnet/map/ip4_map_t.c812
-rw-r--r--src/vnet/map/ip6_map.c1265
-rw-r--r--src/vnet/map/ip6_map_t.c1039
-rw-r--r--src/vnet/map/map.api160
-rw-r--r--src/vnet/map/map.c2287
-rw-r--r--src/vnet/map/map.h580
-rw-r--r--src/vnet/map/map_api.c302
-rw-r--r--src/vnet/map/map_doc.md69
-rw-r--r--src/vnet/map/map_dpo.c130
-rw-r--r--src/vnet/map/map_dpo.h43
-rw-r--r--src/vnet/map/test.c205
-rw-r--r--src/vnet/mfib/ip4_mfib.c494
-rw-r--r--src/vnet/mfib/ip4_mfib.h105
-rw-r--r--src/vnet/mfib/ip6_mfib.c699
-rw-r--r--src/vnet/mfib/ip6_mfib.h121
-rw-r--r--src/vnet/mfib/mfib_entry.c1313
-rw-r--r--src/vnet/mfib/mfib_entry.h184
-rw-r--r--src/vnet/mfib/mfib_forward.c525
-rw-r--r--src/vnet/mfib/mfib_itf.c123
-rw-r--r--src/vnet/mfib/mfib_itf.h63
-rw-r--r--src/vnet/mfib/mfib_signal.c214
-rw-r--r--src/vnet/mfib/mfib_signal.h59
-rw-r--r--src/vnet/mfib/mfib_table.c647
-rw-r--r--src/vnet/mfib/mfib_table.h424
-rw-r--r--src/vnet/mfib/mfib_test.c1405
-rw-r--r--src/vnet/mfib/mfib_types.c213
-rw-r--r--src/vnet/mfib/mfib_types.h205
-rw-r--r--src/vnet/misc.c127
-rw-r--r--src/vnet/mpls/error.def31
-rw-r--r--src/vnet/mpls/interface.c132
-rw-r--r--src/vnet/mpls/mpls.api246
-rw-r--r--src/vnet/mpls/mpls.c627
-rw-r--r--src/vnet/mpls/mpls.h110
-rw-r--r--src/vnet/mpls/mpls_api.c582
-rw-r--r--src/vnet/mpls/mpls_features.c154
-rw-r--r--src/vnet/mpls/mpls_input.c324
-rw-r--r--src/vnet/mpls/mpls_lookup.c723
-rw-r--r--src/vnet/mpls/mpls_lookup.h102
-rw-r--r--src/vnet/mpls/mpls_output.c498
-rw-r--r--src/vnet/mpls/mpls_tunnel.c1070
-rw-r--r--src/vnet/mpls/mpls_tunnel.h137
-rw-r--r--src/vnet/mpls/mpls_types.h60
-rw-r--r--src/vnet/mpls/packet.h125
-rw-r--r--src/vnet/mpls/pg.c71
-rw-r--r--src/vnet/osi/node.c326
-rw-r--r--src/vnet/osi/osi.c201
-rw-r--r--src/vnet/osi/osi.h168
-rw-r--r--src/vnet/osi/pg.c106
-rw-r--r--src/vnet/pg/cli.c655
-rw-r--r--src/vnet/pg/edit.c186
-rw-r--r--src/vnet/pg/edit.h210
-rw-r--r--src/vnet/pg/example.script6
-rw-r--r--src/vnet/pg/init.c72
-rw-r--r--src/vnet/pg/input.c1674
-rw-r--r--src/vnet/pg/output.c95
-rw-r--r--src/vnet/pg/pg.h390
-rw-r--r--src/vnet/pg/stream.c512
-rw-r--r--src/vnet/pipeline.h456
-rw-r--r--src/vnet/plugin/plugin.h25
-rw-r--r--src/vnet/policer/node_funcs.c942
-rw-r--r--src/vnet/policer/police.h214
-rw-r--r--src/vnet/policer/policer.api147
-rw-r--r--src/vnet/policer/policer.c569
-rw-r--r--src/vnet/policer/policer.h107
-rw-r--r--src/vnet/policer/policer_api.c232
-rw-r--r--src/vnet/policer/xlate.c1501
-rw-r--r--src/vnet/policer/xlate.h186
-rw-r--r--src/vnet/ppp/error.def42
-rw-r--r--src/vnet/ppp/node.c377
-rw-r--r--src/vnet/ppp/packet.h199
-rw-r--r--src/vnet/ppp/pg.c114
-rw-r--r--src/vnet/ppp/ppp.c261
-rw-r--r--src/vnet/ppp/ppp.h132
-rw-r--r--src/vnet/replication.c285
-rw-r--r--src/vnet/replication.h137
-rw-r--r--src/vnet/session/application.c657
-rw-r--r--src/vnet/session/application.h136
-rw-r--r--src/vnet/session/application_interface.c406
-rw-r--r--src/vnet/session/application_interface.h183
-rw-r--r--src/vnet/session/segment_manager.c636
-rw-r--r--src/vnet/session/segment_manager.h131
-rw-r--r--src/vnet/session/session.api331
-rw-r--r--src/vnet/session/session.c1036
-rw-r--r--src/vnet/session/session.h436
-rwxr-xr-xsrc/vnet/session/session_api.c763
-rwxr-xr-xsrc/vnet/session/session_cli.c494
-rw-r--r--src/vnet/session/session_debug.h142
-rw-r--r--src/vnet/session/session_lookup.c619
-rw-r--r--src/vnet/session/session_lookup.h100
-rw-r--r--src/vnet/session/session_node.c707
-rw-r--r--src/vnet/session/stream_session.h92
-rw-r--r--src/vnet/session/transport.h94
-rw-r--r--src/vnet/session/transport_interface.c109
-rw-r--r--src/vnet/session/transport_interface.h82
-rw-r--r--src/vnet/snap/node.c353
-rw-r--r--src/vnet/snap/pg.c116
-rw-r--r--src/vnet/snap/snap.c204
-rw-r--r--src/vnet/snap/snap.h206
-rw-r--r--src/vnet/span/node.c366
-rw-r--r--src/vnet/span/span.api56
-rw-r--r--src/vnet/span/span.c249
-rw-r--r--src/vnet/span/span.h77
-rw-r--r--src/vnet/span/span_api.c159
-rw-r--r--src/vnet/span/span_doc.md65
-rwxr-xr-xsrc/vnet/srmpls/dir.dox22
-rwxr-xr-xsrc/vnet/srmpls/sr.h152
-rw-r--r--src/vnet/srmpls/sr_doc.md87
-rwxr-xr-xsrc/vnet/srmpls/sr_mpls_policy.c569
-rwxr-xr-xsrc/vnet/srmpls/sr_mpls_steering.c453
-rw-r--r--src/vnet/srp/format.c147
-rw-r--r--src/vnet/srp/interface.c458
-rw-r--r--src/vnet/srp/node.c932
-rw-r--r--src/vnet/srp/packet.h204
-rw-r--r--src/vnet/srp/pg.c157
-rw-r--r--src/vnet/srp/srp.h222
-rwxr-xr-xsrc/vnet/srv6/dir.dox25
-rwxr-xr-xsrc/vnet/srv6/ietf_draft_05.txt1564
-rw-r--r--src/vnet/srv6/sr.api168
-rwxr-xr-xsrc/vnet/srv6/sr.c57
-rwxr-xr-xsrc/vnet/srv6/sr.h326
-rw-r--r--src/vnet/srv6/sr_api.c254
-rw-r--r--src/vnet/srv6/sr_doc.md55
-rwxr-xr-xsrc/vnet/srv6/sr_localsid.c1654
-rw-r--r--src/vnet/srv6/sr_localsid.md58
-rwxr-xr-xsrc/vnet/srv6/sr_packet.h159
-rw-r--r--src/vnet/srv6/sr_policy.md56
-rwxr-xr-xsrc/vnet/srv6/sr_policy_rewrite.c3231
-rwxr-xr-xsrc/vnet/srv6/sr_steering.c575
-rw-r--r--src/vnet/srv6/sr_steering.md11
-rw-r--r--src/vnet/tcp/builtin_client.c770
-rw-r--r--src/vnet/tcp/builtin_client.h121
-rw-r--r--src/vnet/tcp/builtin_http_server.c564
-rw-r--r--src/vnet/tcp/builtin_proxy.c601
-rw-r--r--src/vnet/tcp/builtin_proxy.h100
-rw-r--r--src/vnet/tcp/builtin_server.c455
-rw-r--r--src/vnet/tcp/tcp.api42
-rw-r--r--src/vnet/tcp/tcp.c1943
-rw-r--r--src/vnet/tcp/tcp.h985
-rw-r--r--src/vnet/tcp/tcp_api.c119
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h761
-rw-r--r--src/vnet/tcp/tcp_error.def43
-rw-r--r--src/vnet/tcp/tcp_format.c137
-rw-r--r--src/vnet/tcp/tcp_input.c3215
-rw-r--r--src/vnet/tcp/tcp_newreno.c107
-rw-r--r--src/vnet/tcp/tcp_output.c2113
-rw-r--r--src/vnet/tcp/tcp_packet.h184
-rw-r--r--src/vnet/tcp/tcp_pg.c244
-rw-r--r--src/vnet/tcp/tcp_syn_filter4.c545
-rw-r--r--src/vnet/tcp/tcp_test.c1764
-rw-r--r--src/vnet/tcp/tcp_timer.h29
-rw-r--r--src/vnet/udp/builtin_server.c261
-rw-r--r--src/vnet/udp/udp.c342
-rw-r--r--src/vnet/udp/udp.h371
-rw-r--r--src/vnet/udp/udp_error.def21
-rw-r--r--src/vnet/udp/udp_format.c91
-rw-r--r--src/vnet/udp/udp_input.c322
-rw-r--r--src/vnet/udp/udp_local.c656
-rw-r--r--src/vnet/udp/udp_packet.h65
-rw-r--r--src/vnet/udp/udp_pg.c237
-rw-r--r--src/vnet/unix/gdb_funcs.c224
-rw-r--r--src/vnet/unix/pcap.c241
-rw-r--r--src/vnet/unix/pcap.h230
-rw-r--r--src/vnet/unix/pcap2pg.c182
-rw-r--r--src/vnet/unix/tap.api119
-rw-r--r--src/vnet/unix/tap_api.c284
-rw-r--r--src/vnet/unix/tapcli.c1491
-rw-r--r--src/vnet/unix/tapcli.h52
-rw-r--r--src/vnet/unix/tuntap.c1037
-rw-r--r--src/vnet/unix/tuntap.h69
-rw-r--r--src/vnet/util/radix.c1104
-rw-r--r--src/vnet/util/radix.h147
-rw-r--r--src/vnet/vnet.h95
-rw-r--r--src/vnet/vnet_all_api_h.h69
-rw-r--r--src/vnet/vnet_msg_enum.h37
-rw-r--r--src/vnet/vxlan-gpe/decap.c1178
-rw-r--r--src/vnet/vxlan-gpe/dir.dox32
-rw-r--r--src/vnet/vxlan-gpe/encap.c388
-rw-r--r--src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt868
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.api79
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c1264
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h259
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c272
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_error.def16
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_packet.h110
-rw-r--r--src/vnet/vxlan/decap.c1239
-rw-r--r--src/vnet/vxlan/dir.dox24
-rw-r--r--src/vnet/vxlan/encap.c622
-rw-r--r--src/vnet/vxlan/vxlan.api71
-rw-r--r--src/vnet/vxlan/vxlan.c1113
-rw-r--r--src/vnet/vxlan/vxlan.h188
-rw-r--r--src/vnet/vxlan/vxlan_api.c240
-rw-r--r--src/vnet/vxlan/vxlan_error.def17
-rw-r--r--src/vnet/vxlan/vxlan_packet.h69
-rw-r--r--src/vpp-api-test.am62
-rw-r--r--src/vpp-api.am46
-rw-r--r--src/vpp-api/client/client.c489
-rw-r--r--src/vpp-api/client/libvppapiclient.map19
-rw-r--r--src/vpp-api/client/test.c140
-rw-r--r--src/vpp-api/client/vppapiclient.h36
-rw-r--r--src/vpp-api/java/Makefile.am263
-rw-r--r--src/vpp-api/java/Readme.txt236
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclExpectedDumpData.java135
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestData.java101
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestRequests.java159
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/FutureApiExample.java68
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/FutureApiTest.java62
-rw-r--r--src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-acl/jvpp_acl.c107
-rw-r--r--src/vpp-api/java/jvpp-acl/jvpp_acl.h42
-rw-r--r--src/vpp-api/java/jvpp-common/jvpp_common.c96
-rw-r--r--src/vpp-api/java/jvpp-common/jvpp_common.h74
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java100
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeExample.java110
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java87
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java88
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CreateSubInterfaceExample.java121
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java127
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java55
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java195
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/LispAdjacencyExample.java125
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java52
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/Readme.txt17
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java79
-rw-r--r--src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/Readme.txt18
-rw-r--r--src/vpp-api/java/jvpp-core/jvpp_core.c111
-rw-r--r--src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.c107
-rw-r--r--src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.h42
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/IoamExportApiExample.java56
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/FutureApiTest.java60
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.c107
-rw-r--r--src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.h42
-rw-r--r--src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/IoamPotApiExample.java76
-rw-r--r--src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/FutureApiTest.java66
-rw-r--r--src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.c107
-rw-r--r--src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.h42
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/IoamTraceApiExample.java77
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/FutureApiTest.java60
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.c107
-rw-r--r--src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.h42
-rw-r--r--src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/CallbackApiExample.java68
-rw-r--r--src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/Readme.txt1
-rw-r--r--src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/CallbackApiTest.java33
-rw-r--r--src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/FutureApiTest.java66
-rw-r--r--src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/Readme.txt4
-rw-r--r--src/vpp-api/java/jvpp-nat/jvpp_nat.c107
-rw-r--r--src/vpp-api/java/jvpp-nat/jvpp_nat.h42
-rw-r--r--src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.c107
-rw-r--r--src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.h42
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/AbstractCallbackApiTest.java63
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/Assertions.java32
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVpp.java56
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistry.java76
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java154
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/NativeLibraryLoader.java73
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppBaseCallException.java79
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppCallbackException.java48
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppConnection.java45
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppInvocationException.java33
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java152
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/ControlPingCallback.java29
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppCallback.java29
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppNotificationCallback.java24
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/coverity/SuppressFBWarnings.java40
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPing.java34
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPingReply.java58
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppDump.java24
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppNotification.java23
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReply.java24
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReplyDump.java25
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppRequest.java34
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/AbstractFutureJVppInvoker.java141
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/FutureJVppInvoker.java49
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistry.java25
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistryProvider.java28
-rw-r--r--src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/test/ConnectionTest.java44
-rw-r--r--src/vpp-api/java/jvpp-registry/jvpp_registry.c402
-rwxr-xr-xsrc/vpp-api/java/jvpp/gen/jvpp_gen.py189
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/__init__.py0
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/callback_gen.py105
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/dto_gen.py310
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/jni_gen.py303
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/jvpp_c_gen.py392
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/jvpp_callback_facade_gen.py326
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/jvpp_future_facade_gen.py331
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/jvpp_impl_gen.py219
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/notification_gen.py199
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/types_gen.py232
-rw-r--r--src/vpp-api/java/jvpp/gen/jvppgen/util.py212
-rw-r--r--src/vpp-api/lua/README.md50
-rw-r--r--src/vpp-api/lua/bench.lua70
-rw-r--r--src/vpp-api/lua/examples/cli/README.md5
-rw-r--r--src/vpp-api/lua/examples/cli/lua-cli.lua747
-rw-r--r--src/vpp-api/lua/examples/example-acl-plugin.lua110
-rw-r--r--src/vpp-api/lua/examples/example-classifier.lua51
-rw-r--r--src/vpp-api/lua/examples/example-cli.lua44
-rw-r--r--src/vpp-api/lua/examples/lute/README.md66
-rw-r--r--src/vpp-api/lua/examples/lute/lute.lua777
-rw-r--r--src/vpp-api/lua/examples/lute/script-inout-acl-noacl.lute329
-rw-r--r--src/vpp-api/lua/examples/lute/script-inout-acl-old.lute329
-rw-r--r--src/vpp-api/lua/examples/lute/script-inout-acl.lute329
-rw-r--r--src/vpp-api/lua/examples/lute/script.lute7
-rw-r--r--src/vpp-api/lua/examples/lute/sessions-acl.lute308
-rw-r--r--src/vpp-api/lua/vpp-lapi.lua989
-rw-r--r--src/vpp-api/python/LICENSE.txt202
-rw-r--r--src/vpp-api/python/Makefile.am23
-rw-r--r--src/vpp-api/python/README.rst0
-rw-r--r--src/vpp-api/python/setup.cfg5
-rw-r--r--src/vpp-api/python/setup.py33
-rwxr-xr-xsrc/vpp-api/python/tests/test_cli.py52
-rwxr-xr-xsrc/vpp-api/python/tests/test_modules.py18
-rwxr-xr-xsrc/vpp-api/python/tests/test_papi.py119
-rwxr-xr-xsrc/vpp-api/python/tests/test_version.py35
-rwxr-xr-xsrc/vpp-api/python/tests/test_vpp_papi2.py487
-rw-r--r--src/vpp-api/python/vpp_papi.py706
-rw-r--r--src/vpp-api/vapi/Makefile.am74
-rw-r--r--src/vpp-api/vapi/libvapiclient.map44
-rw-r--r--src/vpp-api/vapi/vapi.c933
-rw-r--r--src/vpp-api/vapi/vapi.h263
-rw-r--r--src/vpp-api/vapi/vapi.hpp905
-rwxr-xr-xsrc/vpp-api/vapi/vapi_c_gen.py693
-rw-r--r--src/vpp-api/vapi/vapi_common.h61
-rwxr-xr-xsrc/vpp-api/vapi/vapi_cpp_gen.py263
-rw-r--r--src/vpp-api/vapi/vapi_dbg.h77
-rw-r--r--src/vpp-api/vapi/vapi_doc.md155
-rw-r--r--src/vpp-api/vapi/vapi_internal.h138
-rw-r--r--src/vpp-api/vapi/vapi_json_parser.py305
-rw-r--r--src/vpp.am142
-rw-r--r--src/vpp/api/api.c2468
l---------src/vpp/api/api_format.c1
-rw-r--r--src/vpp/api/api_main.c250
-rw-r--r--src/vpp/api/custom_dump.c3231
-rw-r--r--src/vpp/api/gmon.c306
-rw-r--r--src/vpp/api/json_format.c304
-rw-r--r--src/vpp/api/json_format.h254
-rw-r--r--src/vpp/api/plugin.c201
-rw-r--r--src/vpp/api/plugin.h61
-rw-r--r--src/vpp/api/summary_stats_client.c302
-rw-r--r--src/vpp/api/test_client.c1526
-rw-r--r--src/vpp/api/test_ha.c249
l---------src/vpp/api/vat.h1
-rw-r--r--src/vpp/api/vpe.api936
-rw-r--r--src/vpp/api/vpe_all_api_h.h40
-rw-r--r--src/vpp/api/vpe_msg_enum.h37
-rw-r--r--src/vpp/api/vpp_get_metrics.c253
-rw-r--r--src/vpp/app/sticky_hash.c581
-rw-r--r--src/vpp/app/version.c92
-rw-r--r--src/vpp/app/vpe_cli.c136
-rw-r--r--src/vpp/app/vppctl.c384
-rw-r--r--src/vpp/conf/80-vpp.conf15
-rw-r--r--src/vpp/conf/startup.conf133
-rw-r--r--src/vpp/oam/oam.c644
-rw-r--r--src/vpp/oam/oam.h96
-rw-r--r--src/vpp/stats/stats.api265
-rw-r--r--src/vpp/stats/stats.c2410
-rw-r--r--src/vpp/stats/stats.h200
-rw-r--r--src/vpp/stats/stats.reg42
-rw-r--r--src/vpp/vnet/main.c367
-rw-r--r--src/vppapigen.am29
-rw-r--r--src/vppinfra.am308
-rw-r--r--src/vppinfra/README43
-rw-r--r--src/vppinfra/anneal.c172
-rw-r--r--src/vppinfra/anneal.h89
-rw-r--r--src/vppinfra/asm_mips.h351
-rw-r--r--src/vppinfra/asm_x86.c1947
-rw-r--r--src/vppinfra/asm_x86.h125
-rw-r--r--src/vppinfra/backtrace.c267
-rw-r--r--src/vppinfra/bihash_16_8.h84
-rw-r--r--src/vppinfra/bihash_24_8.h85
-rw-r--r--src/vppinfra/bihash_40_8.h87
-rw-r--r--src/vppinfra/bihash_48_8.h89
-rw-r--r--src/vppinfra/bihash_8_8.h99
-rw-r--r--src/vppinfra/bihash_doc.h149
-rw-r--r--src/vppinfra/bihash_template.c624
-rw-r--r--src/vppinfra/bihash_template.h419
-rw-r--r--src/vppinfra/bitmap.h774
-rw-r--r--src/vppinfra/bitops.h179
-rw-r--r--src/vppinfra/byte_order.h202
-rw-r--r--src/vppinfra/cache.h104
-rw-r--r--src/vppinfra/clib.h365
-rw-r--r--src/vppinfra/clib_error.h35
-rw-r--r--src/vppinfra/cpu.c133
-rw-r--r--src/vppinfra/cpu.h112
-rw-r--r--src/vppinfra/crc32.h84
-rw-r--r--src/vppinfra/dir.dox19
-rw-r--r--src/vppinfra/dlist.h156
-rw-r--r--src/vppinfra/elf.c2040
-rw-r--r--src/vppinfra/elf.h1062
-rw-r--r--src/vppinfra/elf_clib.c377
-rw-r--r--src/vppinfra/elf_clib.h144
-rw-r--r--src/vppinfra/elog.c1113
-rw-r--r--src/vppinfra/elog.h567
-rw-r--r--src/vppinfra/error.c292
-rw-r--r--src/vppinfra/error.h199
-rw-r--r--src/vppinfra/error_bootstrap.h106
-rw-r--r--src/vppinfra/fheap.c473
-rw-r--r--src/vppinfra/fheap.h140
-rw-r--r--src/vppinfra/fifo.c137
-rw-r--r--src/vppinfra/fifo.h304
-rw-r--r--src/vppinfra/file.h134
-rw-r--r--src/vppinfra/format.c819
-rw-r--r--src/vppinfra/format.h334
-rw-r--r--src/vppinfra/graph.c182
-rw-r--r--src/vppinfra/graph.h127
-rw-r--r--src/vppinfra/hash.c1095
-rw-r--r--src/vppinfra/hash.h694
-rw-r--r--src/vppinfra/heap.c828
-rw-r--r--src/vppinfra/heap.h357
-rw-r--r--src/vppinfra/linux/mem.c266
-rw-r--r--src/vppinfra/linux/syscall.h56
-rw-r--r--src/vppinfra/linux/sysfs.c250
-rw-r--r--src/vppinfra/linux/sysfs.h46
-rw-r--r--src/vppinfra/lock.h99
-rw-r--r--src/vppinfra/longjmp.S690
-rw-r--r--src/vppinfra/longjmp.h124
-rw-r--r--src/vppinfra/macros.c266
-rw-r--r--src/vppinfra/macros.h54
-rw-r--r--src/vppinfra/math.h71
-rw-r--r--src/vppinfra/md5.c317
-rw-r--r--src/vppinfra/md5.h57
-rw-r--r--src/vppinfra/mem.h365
-rw-r--r--src/vppinfra/mem_mheap.c165
-rw-r--r--src/vppinfra/memcheck.h317
-rw-r--r--src/vppinfra/memcpy_avx.h296
-rw-r--r--src/vppinfra/memcpy_sse3.h356
-rw-r--r--src/vppinfra/mhash.c408
-rw-r--r--src/vppinfra/mhash.h179
-rw-r--r--src/vppinfra/mheap.c1643
-rw-r--r--src/vppinfra/mheap.h94
-rw-r--r--src/vppinfra/mheap_bootstrap.h374
-rw-r--r--src/vppinfra/mod_test_hash.c27
-rw-r--r--src/vppinfra/os.h88
-rw-r--r--src/vppinfra/pfhash.c689
-rw-r--r--src/vppinfra/pfhash.h276
-rw-r--r--src/vppinfra/phash.c1017
-rw-r--r--src/vppinfra/phash.h194
-rw-r--r--src/vppinfra/pipeline.h176
-rw-r--r--src/vppinfra/pool.c131
-rw-r--r--src/vppinfra/pool.h519
-rw-r--r--src/vppinfra/ptclosure.c125
-rw-r--r--src/vppinfra/ptclosure.h40
-rw-r--r--src/vppinfra/qhash.c858
-rw-r--r--src/vppinfra/qhash.h169
-rw-r--r--src/vppinfra/qsort.c269
-rw-r--r--src/vppinfra/random.c51
-rw-r--r--src/vppinfra/random.h178
-rw-r--r--src/vppinfra/random_buffer.c86
-rw-r--r--src/vppinfra/random_buffer.h118
-rw-r--r--src/vppinfra/random_isaac.c434
-rw-r--r--src/vppinfra/random_isaac.h81
-rw-r--r--src/vppinfra/serialize.c1254
-rw-r--r--src/vppinfra/serialize.h443
-rw-r--r--src/vppinfra/slist.c336
-rw-r--r--src/vppinfra/slist.h145
-rw-r--r--src/vppinfra/smp.c325
-rw-r--r--src/vppinfra/smp.h81
-rw-r--r--src/vppinfra/smp_fifo.c91
-rw-r--r--src/vppinfra/smp_fifo.h313
-rw-r--r--src/vppinfra/socket.c559
-rw-r--r--src/vppinfra/socket.h192
-rw-r--r--src/vppinfra/sparse_vec.h244
-rw-r--r--src/vppinfra/std-formats.c330
-rw-r--r--src/vppinfra/string.c94
-rw-r--r--src/vppinfra/string.h83
-rw-r--r--src/vppinfra/test_bihash_template.c369
-rw-r--r--src/vppinfra/test_dlist.c193
-rw-r--r--src/vppinfra/test_elf.c217
-rw-r--r--src/vppinfra/test_elog.c315
-rw-r--r--src/vppinfra/test_fifo.c144
-rw-r--r--src/vppinfra/test_format.c199
-rw-r--r--src/vppinfra/test_fpool.c69
-rw-r--r--src/vppinfra/test_hash.c458
-rw-r--r--src/vppinfra/test_heap.c198
-rw-r--r--src/vppinfra/test_longjmp.c129
-rw-r--r--src/vppinfra/test_macros.c64
-rw-r--r--src/vppinfra/test_md5.c141
-rw-r--r--src/vppinfra/test_mheap.c242
-rw-r--r--src/vppinfra/test_pfhash.c322
-rw-r--r--src/vppinfra/test_phash.c149
-rw-r--r--src/vppinfra/test_pool.c86
-rw-r--r--src/vppinfra/test_pool_iterate.c59
-rw-r--r--src/vppinfra/test_ptclosure.c212
-rw-r--r--src/vppinfra/test_qhash.c333
-rw-r--r--src/vppinfra/test_random.c148
-rw-r--r--src/vppinfra/test_random_isaac.c142
-rw-r--r--src/vppinfra/test_serialize.c274
-rw-r--r--src/vppinfra/test_slist.c228
-rw-r--r--src/vppinfra/test_socket.c134
-rw-r--r--src/vppinfra/test_time.c104
-rw-r--r--src/vppinfra/test_timing_wheel.c389
-rw-r--r--src/vppinfra/test_tw_timer.c1275
-rw-r--r--src/vppinfra/test_vec.c1159
-rw-r--r--src/vppinfra/test_vec.h243
-rw-r--r--src/vppinfra/test_vhash.c757
-rw-r--r--src/vppinfra/test_zvec.c117
-rw-r--r--src/vppinfra/time.c232
-rw-r--r--src/vppinfra/time.h312
-rw-r--r--src/vppinfra/timer.c322
-rw-r--r--src/vppinfra/timer.h46
-rw-r--r--src/vppinfra/timing_wheel.c759
-rw-r--r--src/vppinfra/timing_wheel.h155
-rw-r--r--src/vppinfra/tw_timer_16t_1w_2048sl.c26
-rw-r--r--src/vppinfra/tw_timer_16t_1w_2048sl.h52
-rw-r--r--src/vppinfra/tw_timer_16t_2w_512sl.c26
-rw-r--r--src/vppinfra/tw_timer_16t_2w_512sl.h52
-rw-r--r--src/vppinfra/tw_timer_1t_3w_1024sl_ov.c26
-rw-r--r--src/vppinfra/tw_timer_1t_3w_1024sl_ov.h53
-rw-r--r--src/vppinfra/tw_timer_2t_1w_2048sl.c26
-rw-r--r--src/vppinfra/tw_timer_2t_1w_2048sl.h52
-rw-r--r--src/vppinfra/tw_timer_4t_3w_256sl.c26
-rw-r--r--src/vppinfra/tw_timer_4t_3w_256sl.h52
-rw-r--r--src/vppinfra/tw_timer_4t_3w_4sl_ov.c32
-rw-r--r--src/vppinfra/tw_timer_4t_3w_4sl_ov.h53
-rw-r--r--src/vppinfra/tw_timer_template.c832
-rw-r--r--src/vppinfra/tw_timer_template.h267
-rw-r--r--src/vppinfra/types.h174
-rw-r--r--src/vppinfra/unformat.c1083
-rw-r--r--src/vppinfra/unix-formats.c956
-rw-r--r--src/vppinfra/unix-kelog.c415
-rw-r--r--src/vppinfra/unix-misc.c237
-rw-r--r--src/vppinfra/unix.h64
-rw-r--r--src/vppinfra/unix_error.def145
-rw-r--r--src/vppinfra/valgrind.h4030
-rw-r--r--src/vppinfra/vec.c171
-rw-r--r--src/vppinfra/vec.h1009
-rw-r--r--src/vppinfra/vec_bootstrap.h201
-rw-r--r--src/vppinfra/vector.c54
-rw-r--r--src/vppinfra/vector.h268
-rw-r--r--src/vppinfra/vector_altivec.h178
-rw-r--r--src/vppinfra/vector_funcs.h334
-rw-r--r--src/vppinfra/vector_iwmmxt.h149
-rw-r--r--src/vppinfra/vector_neon.h71
-rw-r--r--src/vppinfra/vector_sse2.h705
-rw-r--r--src/vppinfra/vhash.c772
-rw-r--r--src/vppinfra/vhash.h850
-rw-r--r--src/vppinfra/xxhash.h86
-rw-r--r--src/vppinfra/xy.h56
-rw-r--r--src/vppinfra/zvec.c442
-rw-r--r--src/vppinfra/zvec.h166
1489 files changed, 558542 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 00000000..7b35e50c
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,103 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Global Defines
+###############################################################################
+
+AUTOMAKE_OPTIONS = foreign subdir-objects
+ACLOCAL_AMFLAGS = -I m4
+AM_LIBTOOLFLAGS = --quiet
+
+AM_CFLAGS = -Wall
+
+SUBDIRS = .
+SUFFIXES = .api.h .api .api.json
+API_FILES =
+noinst_HEADERS =
+dist_bin_SCRIPTS =
+lib_LTLIBRARIES =
+BUILT_SOURCES =
+CLEANFILES =
+install-data-local:
+ @echo "Building vppctl command list..."
+ @DIR_SEARCH="$(srcdir)" ; \
+ DIR_EXCLUDE="examples" ; \
+ GREP_TIME=`time (grep -wIr "\.path = " $$DIR_SEARCH --exclude-dir=$$DIR_EXCLUDE \
+ | cut -d '"' -f2 | sort -u > $(srcdir)/scripts/vppctl-cmd-list) 2>&1` ; \
+ GREP_TIME=`echo $$GREP_TIME | awk '{print $$2}'` ; \
+ echo "Command list built, Time taken: $$GREP_TIME"
+
+###############################################################################
+# Components
+###############################################################################
+
+include vppinfra.am
+include vppapigen.am
+
+if ENABLE_PERFTOOL
+include perftool.am
+endif
+
+if ENABLE_G2
+include g2.am
+endif
+
+if ENABLE_SVM
+include svm.am
+endif
+
+if ENABLE_VLIB
+include vlib.am
+endif
+
+if ENABLE_SVM
+if ENABLE_VLIB
+include vlib-api.am
+include vnet.am
+include vpp.am
+include vpp-api-test.am
+include uri.am
+
+SUBDIRS += plugins
+
+if ENABLE_PAPI
+include vpp-api.am
+SUBDIRS += vpp-api/python
+endif
+
+if ENABLE_JAPI
+SUBDIRS += vpp-api/java
+endif
+
+SUBDIRS += vpp-api/vapi
+
+###############################################################################
+# API
+###############################################################################
+
+include suffix-rules.mk
+
+# Set the suffix list
+apidir = $(prefix)/share/vpp/api/core
+
+api_DATA = \
+ $(patsubst %.api,%.api.json,$(API_FILES))
+
+BUILT_SOURCES += \
+ $(patsubst %.api,%.api.h,$(API_FILES))
+
+endif # if ENABLE_VLIB
+endif # if ENABLE_SVM
+
+CLEANFILES += $(BUILT_SOURCES) $(api_DATA)
diff --git a/src/configure.ac b/src/configure.ac
new file mode 100644
index 00000000..f5ce3be2
--- /dev/null
+++ b/src/configure.ac
@@ -0,0 +1,347 @@
+AC_INIT([vpp], [17.10], [vpp-dev@fd.io])
+LT_INIT
+AC_CONFIG_AUX_DIR([.])
+AM_INIT_AUTOMAKE([subdir-objects])
+AM_SILENT_RULES([yes])
+AC_CONFIG_FILES([Makefile plugins/Makefile vpp-api/python/Makefile vpp-api/java/Makefile vpp-api/vapi/Makefile])
+AC_CONFIG_MACRO_DIR([m4])
+
+AC_PROG_CC
+AC_PROG_CXX
+AM_PROG_AS
+AM_PROG_LIBTOOL
+AC_PROG_YACC
+AM_PATH_PYTHON
+
+AM_CONDITIONAL([CROSSCOMPILE], [test "$cross_compiling" == "yes"])
+
+###############################################################################
+# Macros
+###############################################################################
+
+AC_DEFUN([ENABLE_ARG],
+[
+ AC_ARG_ENABLE($1,
+ AC_HELP_STRING(patsubst([--enable-$1],[_],[-]), $2),
+ [enable_$1=yes n_enable_$1=1],
+ [enable_$1=no n_enable_$1=0])
+ AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes")
+ m4_append([list_of_enabled], [$1], [, ])
+])
+
+AC_DEFUN([DISABLE_ARG],
+[
+ AC_ARG_ENABLE($1,
+ AC_HELP_STRING(patsubst([--disable-$1],[_],[-]), $2),
+ [enable_$1=no n_enable_$1=0],
+ [enable_$1=yes n_enable_$1=1])
+ AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes")
+ m4_append([list_of_enabled], [$1], [, ])
+])
+
+AC_DEFUN([WITH_ARG],
+[
+ AC_ARG_WITH($1,
+ AC_HELP_STRING(patsubst([--with-$1],[_],[-]), $2),
+ [with_$1=yes n_with_$1=1],
+ [with_$1=no n_with_$1=0])
+ AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes")
+ m4_append([list_of_with], [$1], [, ])
+])
+
+AC_DEFUN([WITHOUT_ARG],
+[
+ AC_ARG_WITH($1,
+ AC_HELP_STRING(patsubst([--without-$1],[_],[-]), $2),
+ [with_$1=no n_with_$1=0],
+ [with_$1=yes n_with_$1=1])
+ AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes")
+ m4_append([list_of_with], [$1], [, ])
+])
+
+AC_DEFUN([PLUGIN_ENABLED],
+[
+ AC_ARG_ENABLE($1_plugin,
+ AC_HELP_STRING([--disable-$1-plugin], [Do not build $1 plugin]),
+ [enable_$1_plugin=no],
+ [enable_$1_plugin=yes ])
+ AM_CONDITIONAL(m4_toupper(ENABLE_$1_PLUGIN), test "$enable_$1_plugin" = "yes")
+ m4_append([list_of_plugins], [$1], [, ])
+])
+
+AC_DEFUN([PLUGIN_DISABLED],
+[
+ AC_ARG_ENABLE($1_plugin,
+ AC_HELP_STRING([--enable-$1-plugin], [Build $1 plugin]),
+ [enable_$1_plugin=yes ],
+ [enable_$1_plugin=no])
+ AM_CONDITIONAL(m4_toupper(ENABLE_$1_PLUGIN), test "$enable_$1_plugin" = "yes")
+ m4_append([list_of_plugins], [$1], [, ])
+])
+
+AC_DEFUN([PRINT_VAL], [ AC_MSG_RESULT(AC_HELP_STRING($1,$2)) ])
+
+AC_DEFUN([DPDK_IS_PMD_ENABLED],
+[
+ AC_MSG_CHECKING([for $1 in rte_config.h])
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [[#include <rte_config.h>]],
+ [[return RTE_$1;]],
+ )],
+ [with_$2=yes]
+ [AC_MSG_RESULT([yes])],
+ [with_$2=no]
+ [AC_MSG_RESULT([no])]
+ )
+ AM_CONDITIONAL(m4_toupper(WITH_$2), test "$with_$2" = "yes")
+ m4_append_uniq([list_of_with], [$2], [, ])
+])
+
+AC_DEFUN([DETECT_DPDK_IS_1702_OR_1705],
+[
+ AC_MSG_CHECKING([for RTE_VERSION 17.02/17.05 in rte_version.h])
+ AC_TRY_RUN(
+ [
+ #include <rte_version.h>
+ int main()
+ {
+ return ((RTE_VER_YEAR != 17) ||
+ (RTE_VER_MONTH != 2 && RTE_VER_MONTH != 5));
+ }
+ ],
+ [dpdk_is_1702_or_1705=yes]
+ [AC_MSG_RESULT([yes])],
+ [dpdk_is_1702_or_1705=no]
+ [AC_MSG_RESULT([no])]
+ )
+ AM_CONDITIONAL(DPDK_IS_1702_OR_1705, test "$dpdk_is_1702_or_1705" = "yes")
+])
+
+###############################################################################
+# configure arguments
+###############################################################################
+
+# --enable-X
+ENABLE_ARG(tests, [Enable unit tests])
+ENABLE_ARG(dpdk_shared, [Enable unit tests])
+ENABLE_ARG(perftool, [Enable perftool])
+ENABLE_ARG(g2, [Enable g2])
+
+# --disable-X
+DISABLE_ARG(vlib, [Disable vlib and dependant libs and binaries])
+DISABLE_ARG(svm, [Disable svm and dependant libs and binaries])
+DISABLE_ARG(papi, [Disable Python API bindings])
+DISABLE_ARG(japi, [Disable Java API bindings])
+
+# --with-X
+
+# --without-X
+WITHOUT_ARG(libssl, [Disable libssl])
+WITHOUT_ARG(apicli, [Disable binary api CLI])
+
+AC_ARG_WITH(unix,
+ AC_HELP_STRING([--with-unix],[Compile unix version of clib]),
+ [],
+ [case $host_os in
+ darwin* | linux*) with_unix=yes;;
+ *) with_unix=no;;
+ esac])
+
+AM_CONDITIONAL(WITH_UNIX, test "$with_unix" = "yes")
+
+AC_ARG_WITH(pre-data,
+ AC_HELP_STRING([--with-pre-data],[Set buffer rewrite space]),
+ [case $with_pre_data in
+ 128) ;;
+ 256) ;;
+ *) with_pre_data="pre-data-not-set" ;;
+ esac], [with_pre_data=128])
+
+###############################################################################
+# Substitutions and defines
+###############################################################################
+
+AC_SUBST(PRE_DATA_SIZE, [$with_pre_data])
+AC_SUBST(APICLI, [-DVPP_API_TEST_BUILTIN=${n_with_apicli}])
+
+AC_DEFINE_UNQUOTED(DPDK_SHARED_LIB, [${n_enable_dpdk_shared}])
+AC_DEFINE_UNQUOTED(WITH_LIBSSL, [${n_with_libssl}])
+
+
+# Silence following noise:
+# ar: `u' modifier ignored since `D' is the default (see `U')
+AR_FLAGS=cr
+AC_SUBST(AR_FLAGS)
+
+###############################################################################
+# Plugins
+###############################################################################
+
+# Please keep alphabetical order
+PLUGIN_ENABLED(acl)
+PLUGIN_ENABLED(dpdk)
+PLUGIN_ENABLED(flowprobe)
+PLUGIN_ENABLED(gtpu)
+PLUGIN_ENABLED(ila)
+PLUGIN_ENABLED(ioam)
+PLUGIN_ENABLED(ixge)
+PLUGIN_ENABLED(lb)
+PLUGIN_ENABLED(memif)
+PLUGIN_ENABLED(pppoe)
+PLUGIN_ENABLED(sixrd)
+PLUGIN_ENABLED(nat)
+
+###############################################################################
+# Dependency checks
+###############################################################################
+
+AM_COND_IF([ENABLE_DPDK_SHARED],
+[
+ AC_CHECK_HEADERS([rte_config.h],
+ [],
+ [AC_MSG_ERROR([DPDK header files not found])],)
+ AC_CHECK_LIB( [dpdk], [rte_eal_init],
+ [],
+ [AC_MSG_ERROR([DPDK shared library not found])],)
+])
+
+with_aesni_mb_lib=no
+with_isa_l_crypto_lib=no
+
+DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_MB, dpdk_aesni_mb_pmd)
+DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_GCM, dpdk_aesni_gcm_pmd)
+
+DETECT_DPDK_IS_1702_OR_1705()
+
+AM_COND_IF([WITH_DPDK_AESNI_MB_PMD],
+[
+ AC_CHECK_LIB([IPSec_MB], [submit_job_sse],
+ [with_aesni_mb_lib=yes],
+ [AC_MSG_ERROR([IPSec_MB library not found])])
+])
+
+AM_COND_IF([WITH_DPDK_AESNI_GCM_PMD],
+[
+ AM_COND_IF([DPDK_IS_1702_OR_1705],
+ [
+ AC_CHECK_LIB([isal_crypto], [aesni_gcm128_init],
+ [with_isa_l_crypto_lib=yes],
+ [AC_MSG_ERROR([isal_crypto library not found])])
+ ],
+ [
+ AC_CHECK_LIB([IPSec_MB], [submit_job_sse],
+ [with_aesni_mb_lib=yes],
+ [AC_MSG_ERROR([IPSec_MB library not found])])
+ ])
+])
+
+m4_append([list_of_with], [aesni_mb_lib], [, ])
+AM_CONDITIONAL(WITH_AESNI_MB_LIB, test "$with_aesni_mb_lib" = "yes")
+
+m4_append([list_of_with], [isa_l_crypto_lib], [, ])
+AM_CONDITIONAL(WITH_ISA_L_CRYPTO_LIB, test "$with_isa_l_crypto_lib" = "yes")
+
+
+with_ibverbs_lib=no
+DPDK_IS_PMD_ENABLED(LIBRTE_MLX4_PMD, dpdk_mlx4_pmd)
+AM_COND_IF([WITH_DPDK_MLX4_PMD],
+[
+ AC_CHECK_LIB([ibverbs], [ibv_fork_init],
+ [with_ibverbs_lib=yes],
+ [AC_MSG_ERROR([ibverbs library not found])])
+])
+
+DPDK_IS_PMD_ENABLED(LIBRTE_MLX5_PMD, dpdk_mlx5_pmd)
+AM_COND_IF([WITH_DPDK_MLX5_PMD],
+[
+ AC_CHECK_LIB([ibverbs], [ibv_fork_init],
+ [with_ibverbs_lib=yes],
+ [AC_MSG_ERROR([ibverbs library not found])])
+])
+
+m4_append([list_of_with], [ibverbs_lib], [, ])
+AM_CONDITIONAL(WITH_IBVERBS_LIB, test "$with_ibverbs_lib" = "yes")
+
+
+AM_COND_IF([ENABLE_G2],
+[
+ PKG_CHECK_MODULES(g2, gtk+-2.0)
+])
+
+# If cross-compiling, we need external vppapigen and we cannot continue without it
+# For native builds, we just set dependency on vpppaigen binary in top_builddir
+AM_COND_IF([CROSSCOMPILE],
+[
+ AC_PATH_PROG([VPPAPIGEN], [vppapigen], [no])
+ if test "$VPPAPIGEN" = "no"; then
+ AC_MSG_ERROR([Externaly built vppapigen is needed when cross-compiling...])
+ fi
+],[
+ VPPAPIGEN=\$\(top_builddir\)/vppapigen
+])
+AC_SUBST([VPPAPIGEN])
+
+
+###############################################################################
+# JAVA
+###############################################################################
+
+AM_COND_IF([ENABLE_JAPI],
+[
+ AX_VPP_FIND_JDK8
+ AC_SUBST(JAVA_HOME)
+ AC_SUBST(JAVAC)
+ AC_SUBST(JAVAH)
+ AC_SUBST(JAR)
+])
+
+###############################################################################
+# PYTHON
+###############################################################################
+
+AM_COND_IF([ENABLE_PAPI],
+[
+ AM_PATH_PYTHON
+])
+
+###############################################################################
+# Output
+###############################################################################
+
+AC_OUTPUT
+
+AC_MSG_RESULT([==============================================================================])
+PRINT_VAL([version], $PACKAGE $VERSION)
+PRINT_VAL([prefix], ${prefix})
+PRINT_VAL([libdir], ${libdir})
+PRINT_VAL([includedir], ${includedir})
+PRINT_VAL([CFLAGS], ${CFLAGS})
+PRINT_VAL([CPPFLAGS], ${CPPFLAGS})
+PRINT_VAL([LDFLAGS], ${LDFLAGS})
+AM_COND_IF([ENABLE_JAPI],
+[
+ PRINT_VAL([JAVA_VERSION], ${JAVA_VERSION})
+ PRINT_VAL([JAVA_HOME], ${JAVA_HOME})
+])
+
+AC_MSG_RESULT([])
+AC_MSG_RESULT([with:])
+m4_foreach([x], m4_dquote(list_of_with), [
+ AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${with_], x, [}])))
+])
+
+AC_MSG_RESULT([])
+AC_MSG_RESULT([enabled:])
+m4_foreach([x], m4_dquote(list_of_enabled), [
+ AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${enable_], x, [}])))
+])
+
+AC_MSG_RESULT([])
+AC_MSG_RESULT([plugins:])
+m4_foreach([x], m4_dquote(list_of_plugins), [
+ AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${enable_], x, [_plugin}])))
+])
+AC_MSG_RESULT([==============================================================================])
+
+
diff --git a/src/examples/sample-plugin/Makefile.am b/src/examples/sample-plugin/Makefile.am
new file mode 100644
index 00000000..a3a9a8d6
--- /dev/null
+++ b/src/examples/sample-plugin/Makefile.am
@@ -0,0 +1,59 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTOMAKE_OPTIONS = foreign subdir-objects
+
+AM_CFLAGS = -Wall -I${top_srcdir} -I${top_builddir}
+AM_LDFLAGS = -module -shared -avoid-version
+AM_LIBTOOLFLAGS = --quiet
+SUFFIXES = .api.h .api .api.json
+API_FILES =
+BUILT_SOURCES =
+vppplugins_LTLIBRARIES =
+vppapitestplugins_LTLIBRARIES =
+noinst_HEADERS =
+nobase_apiinclude_HEADERS =
+ACLOCAL_AMFLAGS = -I m4
+
+vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins
+vpppluginsdir = ${libdir}/vpp_plugins
+
+include sample.am
+
+%.api.h: %.api
+ mkdir -p `dirname $@` ; \
+ $(CC) $(CPPFLAGS) -E -P -C -x c $^ \
+ | vppapigen --input - --output $@ --show-name $@
+
+%.api.json: %.api
+ @echo " JSON APIGEN " $@ ; \
+ mkdir -p `dirname $@` ; \
+ $(CC) $(CPPFLAGS) -E -P -C -x c $^ \
+ | vppapigen --input - --json $@
+
+apidir = $(prefix)/api/plugins
+apiincludedir = ${includedir}/vpp_plugins
+
+api_DATA = \
+ $(patsubst %.api,%.api.json,$(API_FILES))
+
+BUILT_SOURCES += \
+ $(patsubst %.api,%.api.h,$(API_FILES))
+
+
+# Remove *.la files
+install-data-hook:
+ @(cd $(vpppluginsdir) && $(RM) $(vppplugins_LTLIBRARIES))
+ @(cd $(vppapitestpluginsdir) && $(RM) $(vppapitestplugins_LTLIBRARIES))
+
+CLEANFILES = $(BUILT_SOURCES)
diff --git a/src/examples/sample-plugin/configure.ac b/src/examples/sample-plugin/configure.ac
new file mode 100644
index 00000000..204da2fe
--- /dev/null
+++ b/src/examples/sample-plugin/configure.ac
@@ -0,0 +1,11 @@
+AC_INIT(vpp_plugins, 1.0)
+LT_INIT
+AM_INIT_AUTOMAKE
+AM_SILENT_RULES([yes])
+AC_PREFIX_DEFAULT([/usr])
+
+AC_PROG_CC
+
+AC_OUTPUT([Makefile])
+
+AC_CONFIG_MACRO_DIR([m4])
diff --git a/src/examples/sample-plugin/sample.am b/src/examples/sample-plugin/sample.am
new file mode 100644
index 00000000..871b610a
--- /dev/null
+++ b/src/examples/sample-plugin/sample.am
@@ -0,0 +1,31 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += sample_test_plugin.la
+vppplugins_LTLIBRARIES += sample_plugin.la
+
+sample_plugin_la_SOURCES = \
+ sample/sample.c \
+ sample/node.c \
+ sample/sample_plugin.api.h
+
+API_FILES += sample/sample.api
+
+nobase_apiinclude_HEADERS += \
+ sample/sample_all_api_h.h \
+ sample/sample_msg_enum.h \
+ sample/sample.api.h
+
+sample_test_plugin_la_SOURCES = sample/sample_test.c sample/sample_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/examples/sample-plugin/sample/node.c b/src/examples/sample-plugin/sample/node.c
new file mode 100644
index 00000000..94c1706b
--- /dev/null
+++ b/src/examples/sample-plugin/sample/node.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <sample/sample.h>
+
+typedef struct {
+ u32 next_index;
+ u32 sw_if_index;
+ u8 new_src_mac[6];
+ u8 new_dst_mac[6];
+} sample_trace_t;
+
+static u8 *
+format_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+/* packet trace format function */
+static u8 * format_sample_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sample_trace_t * t = va_arg (*args, sample_trace_t *);
+
+ s = format (s, "SAMPLE: sw_if_index %d, next index %d\n",
+ t->sw_if_index, t->next_index);
+ s = format (s, " new src %U -> new dst %U",
+ format_mac_address, t->new_src_mac,
+ format_mac_address, t->new_dst_mac);
+
+ return s;
+}
+
+vlib_node_registration_t sample_node;
+
+#define foreach_sample_error \
+_(SWAPPED, "Mac swap packets processed")
+
+typedef enum {
+#define _(sym,str) SAMPLE_ERROR_##sym,
+ foreach_sample_error
+#undef _
+ SAMPLE_N_ERROR,
+} sample_error_t;
+
+static char * sample_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sample_error
+#undef _
+};
+
+typedef enum {
+ SAMPLE_NEXT_INTERFACE_OUTPUT,
+ SAMPLE_N_NEXT,
+} sample_next_t;
+
+#define foreach_mac_address_offset \
+_(0) \
+_(1) \
+_(2) \
+_(3) \
+_(4) \
+_(5)
+
+static uword
+sample_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ sample_next_t next_index;
+ u32 pkts_swapped = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = SAMPLE_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ en0 = vlib_buffer_get_current (b0);
+ en1 = vlib_buffer_get_current (b1);
+
+ /* This is not the fastest way to swap src + dst mac addresses */
+#define _(a) tmp0[a] = en0->src_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en0->src_address[a] = en0->dst_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en0->dst_address[a] = tmp0[a];
+ foreach_mac_address_offset;
+#undef _
+
+#define _(a) tmp1[a] = en1->src_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en1->src_address[a] = en1->dst_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en1->dst_address[a] = tmp1[a];
+ foreach_mac_address_offset;
+#undef _
+
+
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* Send pkt back out the RX interface */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = sw_if_index1;
+
+ pkts_swapped += 2;
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ sample_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ clib_memcpy (t->new_src_mac, en0->src_address,
+ sizeof (t->new_src_mac));
+ clib_memcpy (t->new_dst_mac, en0->dst_address,
+ sizeof (t->new_dst_mac));
+
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ sample_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ clib_memcpy (t->new_src_mac, en1->src_address,
+ sizeof (t->new_src_mac));
+ clib_memcpy (t->new_dst_mac, en1->dst_address,
+ sizeof (t->new_dst_mac));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0;
+ u8 tmp0[6];
+ ethernet_header_t *en0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ /*
+ * Direct from the driver, we should be at offset 0
+ * aka at &b0->data[0]
+ */
+ ASSERT (b0->current_data == 0);
+
+ en0 = vlib_buffer_get_current (b0);
+
+ /* This is not the fastest way to swap src + dst mac addresses */
+#define _(a) tmp0[a] = en0->src_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en0->src_address[a] = en0->dst_address[a];
+ foreach_mac_address_offset;
+#undef _
+#define _(a) en0->dst_address[a] = tmp0[a];
+ foreach_mac_address_offset;
+#undef _
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ /* Send pkt back out the RX interface */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ sample_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ clib_memcpy (t->new_src_mac, en0->src_address,
+ sizeof (t->new_src_mac));
+ clib_memcpy (t->new_dst_mac, en0->dst_address,
+ sizeof (t->new_dst_mac));
+ }
+
+ pkts_swapped += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, sample_node.index,
+ SAMPLE_ERROR_SWAPPED, pkts_swapped);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sample_node) = {
+ .function = sample_node_fn,
+ .name = "sample",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sample_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(sample_error_strings),
+ .error_strings = sample_error_strings,
+
+ .n_next_nodes = SAMPLE_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SAMPLE_NEXT_INTERFACE_OUTPUT] = "interface-output",
+ },
+};
diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api
new file mode 100644
index 00000000..d565c0b1
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample.api
@@ -0,0 +1,31 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+autoreply define sample_macswap_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 enable_disable;
+
+ /* Interface handle */
+ u32 sw_if_index;
+};
diff --git a/src/examples/sample-plugin/sample/sample.c b/src/examples/sample-plugin/sample/sample.c
new file mode 100644
index 00000000..3929ac23
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Sample Plugin, plugin API / trace / CLI handling.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <sample/sample.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <sample/sample_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <sample/sample_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <sample/sample_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <sample/sample_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <sample/sample_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+
+#define foreach_sample_plugin_api_msg \
+_(SAMPLE_MACSWAP_ENABLE_DISABLE, sample_macswap_enable_disable)
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = SAMPLE_PLUGIN_BUILD_VER,
+ .description = "Sample of VPP Plugin",
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Enable/disable the macswap plugin.
+ *
+ * Action function shared between message handler and debug CLI.
+ */
+
+int sample_macswap_enable_disable (sample_main_t * sm, u32 sw_if_index,
+ int enable_disable)
+{
+ vnet_sw_interface_t * sw;
+ int rv = 0;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ /* Not a physical port? */
+ sw = vnet_get_sw_interface (sm->vnet_main, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vnet_feature_enable_disable ("device-input", "sample",
+ sw_if_index, enable_disable, 0, 0);
+
+ return rv;
+}
+
+static clib_error_t *
+macswap_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ sample_main_t * sm = &sample_main;
+ u32 sw_if_index = ~0;
+ int enable_disable = 1;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "disable"))
+ enable_disable = 0;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ sm->vnet_main, &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ rv = sample_macswap_enable_disable (sm, sw_if_index, enable_disable);
+
+ switch(rv) {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return
+ (0, "Invalid interface, only works on physical ports");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0, "Device driver doesn't support redirection");
+ break;
+
+ default:
+ return clib_error_return (0, "sample_macswap_enable_disable returned %d",
+ rv);
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI command to enable/disable the sample macswap plugin.
+ */
+VLIB_CLI_COMMAND (sr_content_command, static) = {
+ .path = "sample macswap",
+ .short_help =
+ "sample macswap <interface-name> [disable]",
+ .function = macswap_enable_disable_command_fn,
+};
+
+/**
+ * @brief Plugin API message handler.
+ */
+static void vl_api_sample_macswap_enable_disable_t_handler
+(vl_api_sample_macswap_enable_disable_t * mp)
+{
+ vl_api_sample_macswap_enable_disable_reply_t * rmp;
+ sample_main_t * sm = &sample_main;
+ int rv;
+
+ rv = sample_macswap_enable_disable (sm, ntohl(mp->sw_if_index),
+ (int) (mp->enable_disable));
+
+ REPLY_MACRO(VL_API_SAMPLE_MACSWAP_ENABLE_DISABLE_REPLY);
+}
+
+/**
+ * @brief Set up the API message handling tables.
+ */
+static clib_error_t *
+sample_plugin_api_hookup (vlib_main_t *vm)
+{
+ sample_main_t * sm = &sample_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_sample_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <sample/sample_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (sample_main_t * sm, api_main_t *am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_sample;
+#undef _
+}
+
+/**
+ * @brief Initialize the sample plugin.
+ */
+static clib_error_t * sample_init (vlib_main_t * vm)
+{
+ sample_main_t * sm = &sample_main;
+ clib_error_t * error = 0;
+ u8 * name;
+
+ sm->vnet_main = vnet_get_main ();
+
+ name = format (0, "sample_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = sample_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free(name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (sample_init);
+
+/**
+ * @brief Hook the sample plugin into the VPP graph hierarchy.
+ */
+VNET_FEATURE_INIT (sample, static) =
+{
+ .arc_name = "device-input",
+ .node_name = "sample",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
diff --git a/src/examples/sample-plugin/sample/sample.h b/src/examples/sample-plugin/sample/sample.h
new file mode 100644
index 00000000..c9778f74
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_sample_h__
+#define __included_sample_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vnet_main_t * vnet_main;
+} sample_main_t;
+
+sample_main_t sample_main;
+
+extern vlib_node_registration_t sample_node;
+
+#define SAMPLE_PLUGIN_BUILD_VER "1.0"
+
+#endif /* __included_sample_h__ */
diff --git a/src/examples/sample-plugin/sample/sample_all_api_h.h b/src/examples/sample-plugin/sample/sample_all_api_h.h
new file mode 100644
index 00000000..774d782f
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <sample/sample.api.h>
diff --git a/src/examples/sample-plugin/sample/sample_msg_enum.h b/src/examples/sample-plugin/sample/sample_msg_enum.h
new file mode 100644
index 00000000..af4172f7
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_sample_msg_enum_h
+#define included_sample_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <sample/sample_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_sample_msg_enum_h */
diff --git a/src/examples/sample-plugin/sample/sample_test.c b/src/examples/sample-plugin/sample/sample_test.c
new file mode 100644
index 00000000..2298675b
--- /dev/null
+++ b/src/examples/sample-plugin/sample/sample_test.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * sample_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base sample_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <sample/sample_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <sample/sample_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <sample/sample_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <sample/sample_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <sample/sample_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} sample_test_main_t;
+
+sample_test_main_t sample_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(sample_macswap_enable_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = sample_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(SAMPLE_MACSWAP_ENABLE_DISABLE_REPLY, sample_macswap_enable_disable_reply)
+
+
+static int api_sample_macswap_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ int enable_disable = 1;
+ u32 sw_if_index = ~0;
+ vl_api_sample_macswap_enable_disable_t * mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M(SAMPLE_MACSWAP_ENABLE_DISABLE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable_disable;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(sample_macswap_enable_disable, "<intfc> [disable]")
+
+static void sample_api_hookup (vat_main_t *vam)
+{
+ sample_test_main_t * sm = &sample_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ sample_test_main_t * sm = &sample_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "sample_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ sample_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/examples/sample-plugin/sample_plugin_doc.md b/src/examples/sample-plugin/sample_plugin_doc.md
new file mode 100644
index 00000000..501a8dca
--- /dev/null
+++ b/src/examples/sample-plugin/sample_plugin_doc.md
@@ -0,0 +1,66 @@
+# Sample plugin for VPP {#sample_plugin_doc}
+
+## Overview
+
+This is the VPP sample plugin demonstrates how to create a new plugin that integrates
+with VPP. The sample code implements a trival macswap algorithim that demonstrates plugin
+runtime integration with the VPP graph hierachy, api and cli.
+
+For deeper dive information see the annotations in the sample code itself. See [sample.c](@ref sample.c)
+
+## How to build and run the sample plugin.
+
+Now (re)build VPP.
+
+ $ make wipe
+
+Define environmental variable 'VPP_WITH_SAMPLE_PLUGIN=yes' with a process scope
+
+ $ VPP_WITH_SAMPLE_PLUGIN=yes make build
+
+or a session scope, and build VPP.
+
+ $ export VPP_WITH_SAMPLE_PLUGIN=yes
+ & make build
+
+Now run VPP and make sure the plugin is loaded.
+
+ $ make run
+ ...
+ load_one_plugin:184: Loaded plugin: memif_plugin.so (Packet Memory Interface (experimetal))
+ load_one_plugin:184: Loaded plugin: sample_plugin.so (Sample of VPP Plugin)
+ load_one_plugin:184: Loaded plugin: nat_plugin.so (Network Address Translation)
+ ...
+ DBGvpp#
+
+## How to create a new plugin
+
+To create a new plugin based on the sample plugin, copy and rename the sample plugin directory and automake config.
+
+ cp -r src/examples/sample-plugin/sample src/plugins/newplugin
+ cp src/examples/sample-plugin/sample.am src/plugins/newplugin.am
+
+Add the following entry to the plugins section of `src/configure.ac`.
+
+ PLUGIN_ENABLED(newplugin)
+
+Add the following entry to the plugins section of `src/plugins/Makefile.am`
+
+ if ENABLE_NEWPLUGIN
+ include newplugin.am
+ endif
+
+Now (re)build VPP.
+
+ $ make wipe
+ $ make build
+
+## Configuration
+
+To enable the sample plugin
+
+ sample macswap <interface name>
+
+To disable the sample plugin
+
+ sample macswap <interface name> disable
diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c
new file mode 100644
index 00000000..3ac7108b
--- /dev/null
+++ b/src/examples/srv6-sample-localsid/node.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <srv6-localsid/srv6_localsid_sample.h>
+
+typedef struct {
+ u32 localsid_index;
+} srv6_localsid_sample_trace_t;
+
+/* packet trace format function */
+static u8 * format_srv6_localsid_sample_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ srv6_localsid_sample_trace_t * t = va_arg (*args, srv6_localsid_sample_trace_t *);
+ s = format (s, "SRv6-sample-localsid: localsid_index %d\n",
+ t->localsid_index);
+ return s;
+}
+
+vlib_node_registration_t srv6_localsid_sample_node;
+
+#define foreach_srv6_localsid_counter \
+_(PROCESSED, "srv6-sample-localsid processed packets") \
+_(NO_SRH, "(Error) No SRH.")
+
+typedef enum {
+#define _(sym,str) SRV6_LOCALSID_COUNTER_##sym,
+ foreach_srv6_localsid_counter
+#undef _
+ SRV6_LOCALSID_N_COUNTERS,
+} srv6_localsid_sample_counters;
+
+static char * srv6_localsid_counter_strings[] = {
+#define _(sym,string) string,
+ foreach_srv6_localsid_counter
+#undef _
+};
+
+typedef enum {
+ SRV6_SAMPLE_LOCALSID_NEXT_ERROR,
+ SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP,
+ SRV6_SAMPLE_LOCALSID_N_NEXT,
+} srv6_localsid_sample_next_t;
+
+/**
+ * @brief Function doing End processing.
+ */
+static_always_inline void
+end_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0,
+ u32 * next0,
+ u8 psp,
+ ip6_ext_header_t * prev0)
+{
+ ip6_address_t *new_dst0;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (sr0->segments_left == 1 && psp)
+ {
+ u32 new_l0, sr_len;
+ u64 *copy_dst0, *copy_src0;
+ u32 copy_len_u64s0 = 0;
+
+ ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0];
+ ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1];
+
+ /* Remove the SRH taking care of the rest of IPv6 ext header */
+ if (prev0)
+ prev0->next_hdr = sr0->protocol;
+ else
+ ip0->protocol = sr0->protocol;
+
+ sr_len = ip6_ext_header_len (sr0);
+ vlib_buffer_advance (b0, sr_len);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+ /* number of 8 octet units to copy
+ * By default in absence of extension headers it is equal to length of ip6 header
+ * With extension headers it number of 8 octet units of ext headers preceding
+ * SR header
+ */
+ copy_len_u64s0 =
+ (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3;
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ int i;
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ else if(ls0->behavior == SR_BEHAVIOR_T)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ }
+ }
+ else if (PREDICT_TRUE(sr0->segments_left > 0))
+ {
+ sr0->segments_left -= 1;
+ new_dst0 = (ip6_address_t *) (sr0->segments);
+ new_dst0 += sr0->segments_left;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ else if(ls0->behavior == SR_BEHAVIOR_T)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ }
+ }
+ else
+ {
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS];
+ }
+ }
+ else
+ {
+ /* Error. Routing header of type != SR */
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH];
+ }
+}
+
+/*
+ * @brief SRv6 Sample Localsid graph node
+ * WARNING: YOU MUST DO THE DUAL LOOP
+ */
+static uword
+srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ u32 next_index;
+ u32 pkts_swapped = 0;
+
+ ip6_sr_main_t * sm = &sr_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0 = 0;
+ ip6_sr_header_t * sr0;
+ ip6_ext_header_t *prev0
+ u32 next0 = SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP;
+ ip6_sr_localsid_t *ls0;
+ srv6_localsid_sample_per_sid_memory_t *ls0_mem;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *)(ip0+1);
+
+ /* Lookup the SR End behavior based on IP DA (adj) */
+ ls0 = pool_elt_at_index (sm->localsids, vnet_buffer(b0)->ip.adj_index[VLIB_TX]);
+ ls0_mem = ls0->plugin_mem;
+
+ /* SRH processing */
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+ end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+ /* ==================================================================== */
+ /* INSERT CODE HERE */
+ /* Example starts here */
+ //In this example we are changing the next VRF table by the one in CLI
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = ls0_mem->fib_table;
+ /* Example finishes here */
+ /* ==================================================================== */
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ srv6_localsid_sample_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->localsid_index = ls0 - sm->localsids;
+ }
+
+ /* This increments the SRv6 per LocalSID counters.*/
+ vlib_increment_combined_counter
+ (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)),
+ thread_index,
+ ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+
+ pkts_swapped ++;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (srv6_localsid_sample_node) = {
+ .function = srv6_localsid_sample_fn,
+ .name = "srv6-localsid-sample",
+ .vector_size = sizeof (u32),
+ .format_trace = format_srv6_localsid_sample_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SRV6_LOCALSID_N_COUNTERS,
+ .error_strings = srv6_localsid_counter_strings,
+ .n_next_nodes = SRV6_SAMPLE_LOCALSID_N_NEXT,
+ .next_nodes = {
+ [SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP] = "ip6-lookup",
+ [SRV6_SAMPLE_LOCALSID_NEXT_ERROR] = "error-drop",
+ },
+};
diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.c b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c
new file mode 100755
index 00000000..ec16547e
--- /dev/null
+++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * srv6_localsid_sample.c - Simple SRv6 LocalSID
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <srv6-localsid/srv6_localsid_sample.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+unsigned char srv6_localsid_name[32] = "Sample-SRv6-LocalSID-plugin";
+unsigned char keyword_str[32] = "new_srv6_localsid";
+unsigned char def_str[64] = "This is a definition of a sample new_srv6_localsid";
+unsigned char params_str[32] = "<fib_table>";
+
+/*****************************************/
+/* SRv6 LocalSID instantiation and removal functions */
+static int
+srv6_localsid_creation_fn (ip6_sr_localsid_t *localsid)
+{
+ /*
+ * Do you want to do anything fancy upon localsid instantiation?
+ * You can do it here
+ * (If return != 0 the localsid creation will be cancelled.)
+ */
+ /* As an example Im going to do a +1 to the fib table inserted by the user */
+ srv6_localsid_sample_per_sid_memory_t *ls_mem = localsid->plugin_mem;
+ ls_mem->fib_table += 1;
+ return 0;
+}
+
+static int
+srv6_localsid_removal_fn (ip6_sr_localsid_t *localsid)
+{
+ /* Do you want to do anything fancy upon localsid removal?
+ * You can do it here
+ * (If return != 0 the localsid removal will be cancelled.)
+ */
+ /*
+ * BTW if you stored something in localsid->plugin_mem you should clean it now
+ */
+
+ //In this example we are only cleaning the memory allocated per localsid
+ clib_mem_free(localsid->plugin_mem);
+ return 0;
+}
+
+/**********************************/
+/* SRv6 LocalSID format functions */
+/*
+ * Prints nicely the parameters of a localsid
+ * Example: print "Table 5"
+ */
+u8 *
+format_srv6_localsid_sample (u8 * s, va_list * args)
+{
+ srv6_localsid_sample_per_sid_memory_t *ls_mem = va_arg (*args, void *);
+ return (format (s, "Table: %u", ls_mem->fib_table));
+}
+
+/*
+ * Process the parameters of a localsid
+ * Example: process from:
+ * sr localsid address cafe::1 behavior new_srv6_localsid 5
+ * everything from behavior on... so in this case 'new_srv6_localsid 5'
+ * Notice that it MUST match the keyword_str and params_str defined above.
+ */
+uword
+unformat_srv6_localsid_sample (unformat_input_t * input, va_list * args)
+{
+ void **plugin_mem = va_arg (*args, void **);
+ srv6_localsid_sample_per_sid_memory_t *ls_mem;
+ u32 table_id;
+ if (unformat (input, "new_srv6_localsid %u", &table_id))
+ {
+ /* Allocate a portion of memory */
+ ls_mem = clib_mem_alloc_aligned_at_offset (
+ sizeof(srv6_localsid_sample_per_sid_memory_t), 0, 0, 1);
+
+ /* Set to zero the memory */
+ memset (ls_mem, 0, sizeof(srv6_localsid_sample_per_sid_memory_t));
+
+ /* Our brand-new car is ready */
+ ls_mem->fib_table = table_id;
+
+ /* Dont forget to add it to the localsid */
+ *plugin_mem = ls_mem;
+ return 1;
+ }
+ return 0;
+}
+
+/*************************/
+/* SRv6 LocalSID FIB DPO */
+static u8 *
+format_srv6_localsid_sample_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "SR: localsid_sample_index:[%u]", index));
+}
+
+void
+srv6_localsid_sample_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+void
+srv6_localsid_sample_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t srv6_localsid_sample_vft = {
+ .dv_lock = srv6_localsid_sample_dpo_lock,
+ .dv_unlock = srv6_localsid_sample_dpo_unlock,
+ .dv_format = format_srv6_localsid_sample_dpo,
+};
+
+const static char *const srv6_localsid_sample_ip6_nodes[] = {
+ "srv6-localsid-sample",
+ NULL,
+};
+
+const static char *const *const srv6_localsid_sample_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = srv6_localsid_sample_ip6_nodes,
+};
+
+/**********************/
+static clib_error_t * srv6_localsid_sample_init (vlib_main_t * vm)
+{
+ srv6_localsid_sample_main_t * sm = &srv6_localsid_sample_main;
+ int rv = 0;
+ /* Create DPO */
+ sm->srv6_localsid_sample_dpo_type = dpo_register_new_type (
+ &srv6_localsid_sample_vft, srv6_localsid_sample_nodes);
+
+ /* Register SRv6 LocalSID */
+ rv = sr_localsid_register_function (vm,
+ srv6_localsid_name,
+ keyword_str,
+ def_str,
+ params_str,
+ &sm->srv6_localsid_sample_dpo_type,
+ format_srv6_localsid_sample,
+ unformat_srv6_localsid_sample,
+ srv6_localsid_creation_fn,
+ srv6_localsid_removal_fn);
+ if (rv < 0)
+ clib_error_return (0, "SRv6 LocalSID function could not be registered.");
+ else
+ sm->srv6_localsid_behavior_id = rv;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (srv6_localsid_sample_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = "1.0",
+};
diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.h b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h
new file mode 100644
index 00000000..ef74ea3e
--- /dev/null
+++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_srv6_localsid_sample_h__
+#define __included_srv6_localsid_sample_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/srv6/sr_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+
+ /* DPO type */
+ dpo_type_t srv6_localsid_sample_dpo_type;
+
+ /* SRv6 LocalSID behavior number */
+ u32 srv6_localsid_behavior_id;
+
+} srv6_localsid_sample_main_t;
+
+/*
+ * This is the memory that will be stored per each localsid
+ * the user instantiates
+ */
+typedef struct {
+ u32 fib_table; /* Stupid index used as an example.. */
+} srv6_localsid_sample_per_sid_memory_t ;
+
+srv6_localsid_sample_main_t srv6_localsid_sample_main;
+
+format_function_t format_srv6_localsid_sample;
+unformat_function_t unformat_srv6_localsid_sample;
+
+void srv6_localsid_sample_dpo_lock (dpo_id_t * dpo);
+void srv6_localsid_sample_dpo_unlock (dpo_id_t * dpo);
+
+extern vlib_node_registration_t srv6_localsid_sample_node;
+
+#endif /* __included_sample_h__ */
diff --git a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
new file mode 100644
index 00000000..cd717db8
--- /dev/null
+++ b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
@@ -0,0 +1,30 @@
+# Sample SRv6 LocalSID documentation {#srv6_plugin_doc}
+
+## Introduction
+
+This plugin is an example of how an user can create a new SRv6 LocalSID behavior by using VPP plugins with the appropiate API calls to the existing SR code.
+
+This **example** plugin registers a new localsid behavior, with cli keyword 'new_srv6_localsid' which only takes one parameter, a fib-table. Upon recival of a packet, this plugin will enforce the next IP6 lookup in the specific fib-table specified by the user. (Indeed it will do the lookup in the fib_table n+1 (since for the shake of the example we increment the fib-table.)
+
+Notice that the plugin only 'defines' a new SRv6 LocalSID behavior, but the existing SR code in VNET is the one actually instantiating new LocalSIDs. Notice that there are callback functions such that when you create or remove a LocalSID you can actually setup specific parameters through the functions in this plugin.
+
+## Variables to watch for
+
+* srv6_localsid_name: This variable is the name (used as a unique key) identifying this SR LocalSID plugin.
+* keyword_str: This is the CLI keyword to be used for the plugin. In this example 'new_srv6_localsid'. (i.e. sr localsid address cafe::1 behavior new_srv6_localsid <parameters>)
+* def_str: This is a definition of this SR behavior. This is printed when you do 'show sr localsid behaviors'.
+* params_str: This is a definition of the parameters of this localsid. This is printed when you do 'show sr localsid behaviors'.
+
+## Functions to watch for
+
+* srv6_localsid_creation_fn: This function will be called every time a new SR LocalSID is instantiated with the behavior defined in this plugin.
+* srv6_localsid_removal_fn: This function will be called every time a new SR LocalSID is removed with the behavior defined in this plugin. This function tends to be used for freeing up all the memory created in the previous function.
+* format_srv6_localsid_sample: This function prints nicely the parameters of every SR LocalSID using this behavior.
+* unformat_srv6_localsid_sample: This function parses the CLI command when initialising a new SR LocalSID using this behavior. It parses all the parameters and ensures that the parameters are correct.
+* format_srv6_localsid_sample_dpo: This function formats the 'show ip6 fib' message for the SR LocalSIDs created with this plugin behavior.
+
+## Graph node
+
+The current graph node uses the function 'end_srh_processing' to do the Segment Routing Endpoint behavior. Notice that it does not allow the cleanup of a Segment Routing header (as per the SRv6 behavior specs).
+This function is identical to the one found in /src/vnet/srv6/sr_localsid.c
+In case that by some other reason you want to do decapsulation, or SRH clean_up you can use the functions 'end_decaps_srh_processing' or 'end_psp_srh_processing' respectively.
diff --git a/src/examples/vlib/dir.dox b/src/examples/vlib/dir.dox
new file mode 100644
index 00000000..d3ac0ee4
--- /dev/null
+++ b/src/examples/vlib/dir.dox
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Doxygen directory documentation */
+/**
+@dir
+@brief Someone please fix this description
+@todo This directory needs a description.
+*/
diff --git a/src/examples/vlib/main_stub.c b/src/examples/vlib/main_stub.c
new file mode 100644
index 00000000..3b19c53f
--- /dev/null
+++ b/src/examples/vlib/main_stub.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <math.h>
+
+int
+main (int argc, char *argv[])
+{
+ return vlib_unix_main (argc, argv);
+}
+
+static clib_error_t *
+main_stub_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ if ((error = unix_physmem_init (vm)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, unix_cli_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (main_stub_init);
+
+#if 0
+/* Node test code. */
+typedef struct
+{
+ int scalar;
+ int vector[0];
+} my_frame_t;
+
+static u8 *
+format_my_node_frame (u8 * s, va_list * va)
+{
+ vlib_frame_t *f = va_arg (*va, vlib_frame_t *);
+ my_frame_t *g = vlib_frame_args (f);
+ int i;
+
+ s = format (s, "scalar %d, vector { ", g->scalar);
+ for (i = 0; i < f->n_vectors; i++)
+ s = format (s, "%d, ", g->vector[i]);
+ s = format (s, " }");
+
+ return s;
+}
+
+static uword
+my_func (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vlib_node_t *node;
+ my_frame_t *y;
+ u32 i, n_left = 0;
+ static int serial;
+ int verbose;
+
+ node = vlib_get_node (vm, rt->node_index);
+
+ verbose = 0;
+
+ if (verbose && f)
+ vlib_cli_output (vm, "%v: call frame %p %U", node->name,
+ f, format_my_node_frame, f);
+
+ if (rt->n_next_nodes > 0)
+ {
+ vlib_frame_t *next = vlib_get_next_frame (vm, rt, /* next index */ 0);
+ n_left = VLIB_FRAME_SIZE - next->n_vectors;
+ y = vlib_frame_args (next);
+ y->scalar = serial++;
+ }
+ else
+ y = 0;
+
+ for (i = 0; i < 5; i++)
+ {
+ if (y)
+ {
+ ASSERT (n_left > 0);
+ n_left--;
+ y->vector[i] = y->scalar + i;
+ }
+ }
+ if (y)
+ vlib_put_next_frame (vm, rt, /* next index */ 0, n_left);
+
+ if (verbose)
+ vlib_cli_output (vm, "%v: return frame %p", node->name, f);
+
+ return i;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (my_node1,static) = {
+ .function = my_func,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "my-node1",
+ .scalar_size = sizeof (my_frame_t),
+ .vector_size = STRUCT_SIZE_OF (my_frame_t, vector[0]),
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "my-node2",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (my_node2,static) = {
+ .function = my_func,
+ .name = "my-node2",
+ .scalar_size = sizeof (my_frame_t),
+ .vector_size = STRUCT_SIZE_OF (my_frame_t, vector[0]),
+};
+/* *INDENT-ON* */
+
+#endif
+
+#if 0
+
+typedef enum
+{
+ MY_EVENT_TYPE1,
+ MY_EVENT_TYPE2,
+} my_process_completion_type_t;
+
+typedef struct
+{
+ int a;
+ f64 b;
+} my_process_event_data_t;
+
+static u8 *
+format_my_process_event_data (u8 * s, va_list * va)
+{
+ my_process_event_data_t *d = va_arg (*va, my_process_event_data_t *);
+ return format (s, "{ a %d b %.6f}", d->a, d->b);
+}
+
+static uword
+my_proc (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vlib_node_t *node;
+ u32 i;
+
+ node = vlib_get_node (vm, rt->node_index);
+
+ vlib_cli_output (vm, "%v: call frame %p", node->name, f);
+
+ for (i = 0; i < 5; i++)
+ {
+ vlib_cli_output (vm, "%v: %d", node->name, i);
+ vlib_process_suspend (vm, 1e0 /* secs */ );
+ }
+
+ vlib_cli_output (vm, "%v: return frame %p", node->name, f);
+
+ if (0)
+ {
+ uword n_events_seen, type, *data = 0;
+
+ for (n_events_seen = 0; n_events_seen < 2;)
+ {
+ vlib_process_wait_for_event (vm);
+ type = vlib_process_get_events (vm, &data);
+ n_events_seen += vec_len (data);
+ vlib_cli_output (vm, "%U %v: completion #%d type %d data 0x%wx",
+ format_time_interval, "h:m:s:u",
+ vlib_time_now (vm), node->name, i, type, data[0]);
+ _vec_len (data) = 0;
+ }
+
+ vec_free (data);
+ }
+ else
+ {
+ uword n_events_seen, i, type;
+ my_process_event_data_t *data;
+ for (n_events_seen = 0; n_events_seen < 2;)
+ {
+ vlib_process_wait_for_event (vm);
+ data = vlib_process_get_event_data (vm, &type);
+ vec_foreach_index (i, data)
+ {
+ vlib_cli_output (vm, "%U event type %d data %U",
+ format_time_interval, "h:m:s:u",
+ vlib_time_now (vm), type,
+ format_my_process_event_data, data);
+ }
+ n_events_seen += vec_len (data);
+ vlib_process_put_event_data (vm, data);
+ }
+ }
+
+ return i;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (my_proc_node,static) = {
+ .function = my_proc,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "my-proc",
+};
+/* *INDENT-ON* */
+
+static uword
+my_proc_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ static int i;
+
+ if (i++ < 2)
+ {
+ if (0)
+ vlib_process_signal_event (vm, my_proc_node.index,
+ i == 1 ? MY_EVENT_TYPE1 : MY_EVENT_TYPE2,
+ 0x12340000 + i);
+ else
+ {
+ my_process_event_data_t *d;
+ f64 dt = 5;
+ d = vlib_process_signal_event_at_time (vm,
+ i * dt,
+ my_proc_node.index,
+ i ==
+ 1 ? MY_EVENT_TYPE1 :
+ MY_EVENT_TYPE2,
+ 1 /* elts */ ,
+ sizeof (d[0]));
+ d->a = i;
+ d->b = vlib_time_now (vm);
+ }
+ }
+ else
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (my_proc_input_node,static) = {
+ .function = my_proc_input,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "my-proc-input",
+};
+/* *INDENT-ON* */
+
+static uword
+_unformat_farith (unformat_input_t * i, va_list * args)
+{
+ u32 prec = va_arg (*args, u32);
+ f64 *result = va_arg (*args, f64 *);
+ f64 tmp[2];
+
+ /* Binary operations in from lowest to highest precedence. */
+ char *binops[] = {
+ "+%U", "-%U", "/%U", "*%U", "^%U",
+ };
+
+ if (prec <= ARRAY_LEN (binops) - 1
+ && unformat_user (i, _unformat_farith, prec + 1, &tmp[0]))
+ {
+ int p;
+ for (p = prec; p < ARRAY_LEN (binops); p++)
+ {
+ if (unformat (i, binops[p], _unformat_farith, prec + 0, &tmp[1]))
+ {
+ switch (binops[p][0])
+ {
+ case '+':
+ result[0] = tmp[0] + tmp[1];
+ break;
+ case '-':
+ result[0] = tmp[0] - tmp[1];
+ break;
+ case '/':
+ result[0] = tmp[0] / tmp[1];
+ break;
+ case '*':
+ result[0] = tmp[0] * tmp[1];
+ break;
+ case '^':
+ result[0] = pow (tmp[0], tmp[1]);
+ break;
+ default:
+ abort ();
+ }
+ return 1;
+ }
+ }
+ result[0] = tmp[0];
+ return 1;
+ }
+
+ else if (unformat (i, "-%U", _unformat_farith, prec + 0, &tmp[0]))
+ {
+ result[0] = -tmp[0];
+ return 1;
+ }
+
+ else if (unformat (i, "(%U)", _unformat_farith, 0, &tmp[0]))
+ {
+ result[0] = tmp[0];
+ return 1;
+ }
+
+ else if (unformat (i, "%f", result))
+ return 1;
+
+ else
+ return 0;
+}
+
+static uword
+unformat_farith (unformat_input_t * i, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ f64 *result = va_arg (*args, f64 *);
+ return unformat_user (i, _unformat_farith, 0, result);
+}
+
+static uword
+unformat_integer (unformat_input_t * i, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ u32 *data = va_arg (*args, u32 *);
+ return unformat (i, "%d", data);
+}
+
+static VLIB_CLI_PARSE_RULE (my_parse_rule1) =
+{
+.name = "decimal_integer",.short_help =
+ "a decimal integer",.unformat_function = unformat_integer,.data_size =
+ sizeof (u32),};
+
+static VLIB_CLI_PARSE_RULE (my_parse_rule2) =
+{
+.name = "float_expression",.short_help =
+ "floating point expression",.unformat_function =
+ unformat_farith,.data_size = sizeof (f64),};
+
+static clib_error_t *
+bar_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ switch (cmd->function_arg)
+ {
+ case 2:
+ {
+ u32 *d, *e;
+ d = vlib_cli_get_parse_rule_result (vm, 0);
+ e = vlib_cli_get_parse_rule_result (vm, 1);
+ vlib_cli_output (vm, "bar2 %d %d", d[0], e[0]);
+ break;
+ }
+
+ case 1:
+ {
+ u32 *d = vlib_cli_get_parse_rule_result (vm, 0);
+ vlib_cli_output (vm, "bar1 %d", d[0]);
+ break;
+ }
+
+ case 3:
+ {
+ f64 *d = vlib_cli_get_parse_rule_result (vm, 0);
+ vlib_cli_output (vm, "expr %.6f", d[0]);
+ }
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bar_command2, static) = {
+ .path = "bar %decimal_integer",
+ .short_help = "bar1 command",
+ .function = bar_command,
+ .function_arg = 1,
+};
+VLIB_CLI_COMMAND (bar_command1, static) = {
+ .path = "bar %decimal_integer %decimal_integer",
+ .short_help = "bar2 command",
+ .function = bar_command,
+ .function_arg = 2,
+};
+VLIB_CLI_COMMAND (bar_command3, static) = {
+ .path = "zap %float_expression",
+ .short_help = "bar3 command",
+ .function = bar_command,
+ .function_arg = 3,
+};
+/* *INDENT-ON* */
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/examples/vlib/mc_test.c b/src/examples/vlib/mc_test.c
new file mode 100644
index 00000000..e84a713c
--- /dev/null
+++ b/src/examples/vlib/mc_test.c
@@ -0,0 +1,384 @@
+/*
+ * mc_test.c: test program for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/mc_socket.h>
+#include <vppinfra/random.h>
+
+typedef struct
+{
+ u32 min_n_msg_bytes;
+ u32 max_n_msg_bytes;
+ u32 tx_serial;
+ u32 rx_serial;
+ u32 seed;
+ u32 verbose;
+ u32 validate;
+ u32 window_size;
+ f64 min_delay, max_delay;
+ f64 n_packets_to_send;
+} mc_test_main_t;
+
+always_inline u32
+choose_msg_size (mc_test_main_t * tm)
+{
+ u32 r = tm->min_n_msg_bytes;
+ if (tm->max_n_msg_bytes > tm->min_n_msg_bytes)
+ r +=
+ random_u32 (&tm->seed) % (1 + tm->max_n_msg_bytes -
+ tm->min_n_msg_bytes);
+ return r;
+}
+
+static mc_test_main_t mc_test_main;
+
+static void
+serialize_test_msg (serialize_main_t * m, va_list * va)
+{
+ mc_test_main_t *tm = &mc_test_main;
+ u32 n_bytes = choose_msg_size (tm);
+ u8 *msg;
+ int i;
+ serialize_integer (m, n_bytes, sizeof (n_bytes));
+ msg = serialize_get (m, n_bytes);
+ for (i = 0; i < n_bytes; i++)
+ msg[i] = i + tm->tx_serial;
+ tm->tx_serial += n_bytes;
+}
+
+static void
+unserialize_test_msg (serialize_main_t * m, va_list * va)
+{
+ mc_test_main_t *tm = &mc_test_main;
+ u32 i, n_bytes, dump_msg = tm->verbose;
+ u8 *p;
+ unserialize_integer (m, &n_bytes, sizeof (n_bytes));
+ p = unserialize_get (m, n_bytes);
+ if (tm->validate)
+ for (i = 0; i < n_bytes; i++)
+ if (p[i] != ((tm->rx_serial + i) & 0xff))
+ {
+ clib_warning ("corrupt msg at offset %d", i);
+ dump_msg = 1;
+ break;
+ }
+ if (dump_msg)
+ clib_warning ("got %d bytes, %U", n_bytes, format_hex_bytes, p, n_bytes);
+ tm->rx_serial += n_bytes;
+}
+
+MC_SERIALIZE_MSG (test_msg, static) =
+{
+.name = "test_msg",.serialize = serialize_test_msg,.unserialize =
+ unserialize_test_msg,};
+
+#define SERIALIZE 1
+
+#define EVENT_JOIN_STREAM 10
+#define EVENT_SEND_DATA 11
+
+static void
+test_rx_callback (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_peer_id_t peer_id, u32 buffer_index)
+{
+ if (SERIALIZE)
+ {
+ return mc_unserialize (mcm, stream, buffer_index);
+ }
+ else
+ {
+#if DEBUG > 1
+ vlib_main_t *vm = mcm->vlib_main;
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ u8 *dp = vlib_buffer_get_current (b);
+
+ fformat (stdout, "RX from %U %U\n",
+ stream->transport->format_peer_id, peer_id,
+ format_hex_bytes, dp, tm->n_msg_bytes);
+
+#endif
+ }
+}
+
+static u8 *
+test_snapshot_callback (mc_main_t * mcm,
+ u8 * data_vector, u32 last_global_sequence_processed)
+{
+ if (SERIALIZE)
+ {
+ serialize_main_t m;
+
+ /* Append serialized data to data vector. */
+ serialize_open_vector (&m, data_vector);
+ m.stream.current_buffer_index = vec_len (data_vector);
+
+ return serialize_close_vector (&m);
+ }
+ else
+ return format (data_vector,
+ "snapshot, last global seq 0x%x",
+ last_global_sequence_processed);
+}
+
+static void
+test_handle_snapshot_callback (mc_main_t * mcm, u8 * data, u32 n_data_bytes)
+{
+ if (SERIALIZE)
+ {
+ serialize_main_t s;
+ unserialize_open_data (&s, data, n_data_bytes);
+ }
+ else
+ clib_warning ("snapshot `%*s'", n_data_bytes, data);
+}
+
+static mc_socket_main_t mc_socket_main;
+
+static uword
+mc_test_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_test_main_t *tm = &mc_test_main;
+ mc_socket_main_t *msm = &mc_socket_main;
+ mc_main_t *mcm = &msm->mc_main;
+ uword event_type, *event_data = 0;
+ u32 data_serial = 0, stream_index;
+ f64 delay;
+ mc_stream_config_t config;
+ clib_error_t *error;
+ int i;
+ char *intfcs[] = { "eth1", "eth0", "ce" };
+
+ memset (&config, 0, sizeof (config));
+ config.name = "test";
+ config.window_size = tm->window_size;
+ config.rx_buffer = test_rx_callback;
+ config.catchup_snapshot = test_snapshot_callback;
+ config.catchup = test_handle_snapshot_callback;
+ stream_index = ~0;
+
+ msm->multicast_tx_ip4_address_host_byte_order = 0xefff0100;
+ msm->base_multicast_udp_port_host_byte_order = 0xffab;
+
+ error = mc_socket_main_init (&mc_socket_main, intfcs, ARRAY_LEN (intfcs));
+ if (error)
+ {
+ clib_error_report (error);
+ exit (1);
+ }
+
+ mcm->we_can_be_relay_master = 1;
+
+ while (1)
+ {
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+
+ switch (event_type)
+ {
+ case EVENT_JOIN_STREAM:
+ stream_index = mc_stream_join (mcm, &config);
+ break;
+
+ case EVENT_SEND_DATA:
+ {
+ f64 times[2];
+
+ if (stream_index == ~0)
+ stream_index = mc_stream_join (mcm, &config);
+
+ times[0] = vlib_time_now (vm);
+ for (i = 0; i < event_data[0]; i++)
+ {
+ u32 bi;
+ if (SERIALIZE)
+ {
+ mc_serialize_stream (mcm, stream_index, &test_msg,
+ data_serial);
+ }
+ else
+ {
+ u8 *mp;
+ mp = mc_get_vlib_buffer (vm, sizeof (mp[0]), &bi);
+ mp[0] = data_serial;
+ mc_stream_send (mcm, stream_index, bi);
+ }
+ if (tm->min_delay > 0)
+ {
+ delay =
+ tm->min_delay +
+ random_f64 (&tm->seed) * (tm->max_delay -
+ tm->min_delay);
+ vlib_process_suspend (vm, delay);
+ }
+ data_serial++;
+ }
+ times[1] = vlib_time_now (vm);
+ clib_warning ("done sending %d; %.4e per sec",
+ event_data[0],
+ (f64) event_data[0] / (times[1] - times[0]));
+ break;
+ }
+
+ default:
+ clib_warning ("bug");
+ break;
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+ }
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (mc_test_process_node, static) =
+{
+.function = mc_test_process,.type = VLIB_NODE_TYPE_PROCESS,.name =
+ "mc-test-process",};
+/* *INDENT-ON* */
+
+static clib_error_t *
+mc_test_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ f64 npkts = 10;
+
+ if (unformat (input, "join"))
+ {
+ vlib_cli_output (vm, "Join stream...\n");
+ vlib_process_signal_event (vm, mc_test_process_node.index,
+ EVENT_JOIN_STREAM, 0);
+ return 0;
+ }
+ else if (unformat (input, "send %f", &npkts) || unformat (input, "send"))
+ {
+ vlib_process_signal_event (vm, mc_test_process_node.index,
+ EVENT_SEND_DATA, (uword) npkts);
+ vlib_cli_output (vm, "Send %.0f pkts...\n", npkts);
+
+ return 0;
+ }
+ else
+ return unformat_parse_error (input);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_mc_command, static) =
+{
+.path = "test mc",.short_help = "Test mc command",.function =
+ mc_test_command,};
+/* *INDENT-ON* */
+
+static clib_error_t *
+mc_show_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ mc_main_t *mcm = &mc_socket_main.mc_main;
+ vlib_cli_output (vm, "%U", format_mc_main, mcm);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_mc_command, static) =
+{
+.path = "show mc",.short_help = "Show mc command",.function =
+ mc_show_command,};
+/* *INDENT-ON* */
+
+static clib_error_t *
+mc_clear_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ mc_main_t *mcm = &mc_socket_main.mc_main;
+ mc_clear_stream_stats (mcm);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_mc_command, static) =
+{
+.path = "clear mc",.short_help = "Clear mc command",.function =
+ mc_clear_command,};
+/* *INDENT-ON* */
+
+static clib_error_t *
+mc_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ mc_test_main_t *tm = &mc_test_main;
+ mc_socket_main_t *msm = &mc_socket_main;
+ clib_error_t *error = 0;
+
+ tm->min_n_msg_bytes = 4;
+ tm->max_n_msg_bytes = 4;
+ tm->window_size = 8;
+ tm->seed = getpid ();
+ tm->verbose = 0;
+ tm->validate = 1;
+ tm->min_delay = 10e-6;
+ tm->max_delay = 10e-3;
+ tm->n_packets_to_send = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "interface %s", &msm->multicast_interface_name))
+ ;
+
+ else if (unformat (input, "n-bytes %d", &tm->max_n_msg_bytes))
+ tm->min_n_msg_bytes = tm->max_n_msg_bytes;
+ else if (unformat (input, "max-n-bytes %d", &tm->max_n_msg_bytes))
+ ;
+ else if (unformat (input, "min-n-bytes %d", &tm->min_n_msg_bytes))
+ ;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "window %d", &tm->window_size))
+ ;
+ else if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else if (unformat (input, "no-validate"))
+ tm->validate = 0;
+ else if (unformat (input, "min-delay %f", &tm->min_delay))
+ ;
+ else if (unformat (input, "max-delay %f", &tm->max_delay))
+ ;
+ else if (unformat (input, "no-delay"))
+ tm->min_delay = tm->max_delay = 0;
+ else if (unformat (input, "n-packets %f", &tm->n_packets_to_send))
+ ;
+
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (tm->n_packets_to_send > 0)
+ vlib_process_signal_event (vm, mc_test_process_node.index,
+ EVENT_SEND_DATA,
+ (uword) tm->n_packets_to_send);
+
+ return error;
+}
+
+VLIB_CONFIG_FUNCTION (mc_config, "mc");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/examples/vlib/plex_test.c b/src/examples/vlib/plex_test.c
new file mode 100644
index 00000000..ce0c8ef1
--- /dev/null
+++ b/src/examples/vlib/plex_test.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+#include <vlib/unix/unix.h>
+
+static u8 *
+format_value_v4_address (u8 * s, va_list * args)
+{
+ vlib_parse_value_t *v = va_arg (*args, vlib_parse_value_t *);
+ u32 a = v->value.as_uword;
+
+ s = format (s, "%d.%d.%d.%d",
+ (a >> 24) & 0xFF,
+ (a >> 16) & 0xFF, (a >> 8) & 0xFF, (a >> 0) & 0xFF);
+
+ return s;
+}
+
+static vlib_parse_match_t
+v4_address_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ u32 digit;
+ u32 value = 0;
+ int i;
+
+ if (vec_len (pm->tokens) - (t - pm->tokens) < 7)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ /* NUMBER DOT NUMBER DOT NUMBER DOT NUMBER */
+
+ for (i = 0; i < 7; i++)
+ {
+ if ((i & 1) == 0)
+ {
+ if (t[i].token != VLIB_LEX_number)
+ return VLIB_PARSE_MATCH_FAIL;
+ if (t[i].value.as_uword > 0xff)
+ return VLIB_PARSE_MATCH_FAIL;
+ digit = t[i].value.as_uword;
+ value = (value << 8) | digit;
+ }
+ else
+ {
+ if (t[i].token != VLIB_LEX_dot)
+ return VLIB_PARSE_MATCH_FAIL;
+ }
+ }
+ /* note: caller advances by 1 */
+ pm->current_token_index += 6;
+ valuep->value.as_uword = value;
+ return VLIB_PARSE_MATCH_VALUE;
+}
+
+PARSE_TYPE_INIT (v4_address, v4_address_match, 0, format_value_v4_address)
+ static u8 *format_value_v4_address_and_mask (u8 * s, va_list * args)
+{
+ vlib_parse_value_t *v = va_arg (*args, vlib_parse_value_t *);
+ u32 *a = v->value.as_pointer;
+
+ s = format (s, "%d.%d.%d.%d",
+ (a[0] >> 24) & 0xFF,
+ (a[0] >> 16) & 0xFF, (a[0] >> 8) & 0xFF, (a[0] >> 0) & 0xFF);
+ s = format (s, "/%d", a[1]);
+
+ return s;
+}
+
+static vlib_parse_match_t
+v4_address_and_mask_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ u32 digit;
+ u32 address = 0;
+ u32 *rv = 0;
+ int i;
+
+ if (vec_len (pm->tokens) - (t - pm->tokens) < 9)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ /* NUMBER DOT NUMBER DOT NUMBER DOT NUMBER */
+
+ for (i = 0; i < 7; i++)
+ {
+ if ((i & 1) == 0)
+ {
+ if (t[i].token != VLIB_LEX_number)
+ return VLIB_PARSE_MATCH_FAIL;
+ if (t[i].value.as_uword > 0xff)
+ return VLIB_PARSE_MATCH_FAIL;
+ digit = t[i].value.as_uword;
+ address = (address << 8) | digit;
+ }
+ else
+ {
+ if (t[i].token != VLIB_LEX_dot)
+ return VLIB_PARSE_MATCH_FAIL;
+ }
+ }
+
+ if (t[7].token != VLIB_LEX_slash || t[8].token != VLIB_LEX_number)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ vec_add1 (rv, address);
+ vec_add1 (rv, t[8].value.as_uword);
+
+ /* note: caller advances by 1 */
+ pm->current_token_index += 8;
+ valuep->value.as_pointer = rv;
+ return VLIB_PARSE_MATCH_VALUE;
+}
+
+void
+v4_address_and_mask_cleanup (vlib_parse_value_t * valuep)
+{
+ u32 *trash = valuep->value.as_pointer;
+ vec_free (trash);
+}
+
+PARSE_TYPE_INIT (v4_address_and_mask, v4_address_and_mask_match,
+ v4_address_and_mask_cleanup,
+ format_value_v4_address_and_mask)
+ vlib_lex_main_t vlib_lex_main;
+
+
+
+ vlib_parse_match_t eval_factor0 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item,
+ vlib_parse_value_t * value)
+{
+ clib_warning ("%U", format_vlib_parse_value, pm);
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_factor1 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ clib_warning ("%U", format_vlib_parse_value, pm);
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_factor2 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ word a;
+ int index = vec_len (pm->parse_value) - 1;
+
+ a = pm->parse_value[index].value.as_word;
+
+ pm->parse_value[index].value.as_word = -a;
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_term0 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ clib_warning ("%U", format_vlib_parse_value, pm);
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_term1 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ uword a, b;
+ int index = vec_len (pm->parse_value) - 2;
+
+ a = pm->parse_value[index].value.as_uword;
+ b = pm->parse_value[index + 1].value.as_uword;
+
+ pm->parse_value[index].value.as_uword = a * b;
+ _vec_len (pm->parse_value) -= 1;
+ clib_warning ("%U", format_vlib_parse_value, pm);
+
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_term2 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ uword a, b;
+ int index = vec_len (pm->parse_value) - 2;
+
+ a = pm->parse_value[index].value.as_uword;
+ b = pm->parse_value[index + 1].value.as_uword;
+
+ pm->parse_value[index].value.as_uword = a / b;
+ _vec_len (pm->parse_value) -= 1;
+ clib_warning ("%U", format_vlib_parse_value, pm);
+
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_exp0 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_exp1 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ uword a, b;
+ int index = vec_len (pm->parse_value) - 2;
+
+ a = pm->parse_value[index].value.as_uword;
+ b = pm->parse_value[index + 1].value.as_uword;
+
+ pm->parse_value[index].value.as_uword = a + b;
+ _vec_len (pm->parse_value) -= 1;
+ clib_warning ("%U", format_vlib_parse_value, pm);
+
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_exp2 (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ uword a, b;
+ int index = vec_len (pm->parse_value) - 2;
+
+ a = pm->parse_value[index].value.as_uword;
+ b = pm->parse_value[index + 1].value.as_uword;
+
+ pm->parse_value[index].value.as_uword = a - b;
+ _vec_len (pm->parse_value) -= 1;
+ clib_warning ("%U", format_vlib_parse_value, pm);
+
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+vlib_parse_match_t
+eval_result (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ clib_warning ("%U", format_vlib_parse_value, pm);
+ return VLIB_PARSE_MATCH_DONE;
+}
+
+vlib_parse_match_t
+noop_match_rule (vlib_parse_main_t * pm,
+ vlib_parse_item_t * item, vlib_parse_value_t * value)
+{
+ clib_warning ("%U", format_vlib_parse_value, pm);
+ return VLIB_PARSE_MATCH_RULE;
+}
+
+#if 0
+PARSE_INIT (t1, "moo", eval0);
+PARSE_INIT (t2, "moo cow mumble", eval1);
+PARSE_INIT (t3, "moo cow", eval2);
+PARSE_INIT (t4, "moo cow mumble grunch", eval3);
+#endif
+
+#if 0
+PARSE_INIT (r1, "eval <exp>", eval_result);
+
+PARSE_INIT (r2, "<exp> = <term><exp2>", eval_exp0);
+PARSE_INIT (r3, "<exp2> = <plus> <exp>", eval_exp1);
+PARSE_INIT (r4, "<exp2> = <minus> <exp>", eval_exp2);
+PARSE_INIT (r5, "<exp2> = ", noop_match_rule);
+PARSE_TYPE_INIT (exp, rule_match, 0, 0);
+PARSE_TYPE_INIT (exp2, rule_match, 0, 0);
+
+PARSE_INIT (r6, "<term> = <factor><term2>", eval_term0);
+PARSE_INIT (r7, "<term2> = <star> <term>", eval_term1);
+PARSE_INIT (r8, "<term2> = <slash> <term>", eval_term2);
+PARSE_INIT (r9, "<term2> = ", noop_match_rule);
+PARSE_TYPE_INIT (term, rule_match, 0, 0);
+PARSE_TYPE_INIT (term2, rule_match, 0, 0);
+
+PARSE_INIT (r11, "<factor> = <lpar> <exp> <rpar>", eval_factor1);
+PARSE_INIT (r10, "<factor> = <number>", eval_factor0);
+PARSE_INIT (r12, "<factor> = <minus> <factor>", eval_factor2);
+
+PARSE_TYPE_INIT (factor, rule_match, 0, 0);
+#endif
+
+PARSE_INIT (r1, "eval <exp>", eval_result);
+
+#if 1
+PARSE_INIT (r2, "<exp> = <term><exp2>", eval_exp0);
+PARSE_INIT (r3, "<exp2> = <plus> <exp>", eval_exp1);
+PARSE_INIT (r4, "<exp2> = <minus> <exp>", eval_exp2);
+PARSE_INIT (r5, "<exp2> = ", noop_match_rule);
+PARSE_TYPE_INIT (exp, rule_match, 0, 0);
+PARSE_TYPE_INIT (exp2, rule_match, 0, 0);
+
+PARSE_INIT (r6, "<term> = <factor><term2>", eval_term0);
+PARSE_INIT (r7, "<term2> = <star> <term>", eval_term1);
+PARSE_INIT (r8, "<term2> = <slash> <term>", eval_term2);
+PARSE_INIT (r9, "<term2> = ", noop_match_rule);
+PARSE_TYPE_INIT (term, rule_match, 0, 0);
+PARSE_TYPE_INIT (term2, rule_match, 0, 0);
+
+PARSE_INIT (r11, "<factor> = <lpar> <exp> <rpar>", eval_factor1);
+PARSE_INIT (r10, "<factor> = <number>", eval_factor0);
+PARSE_INIT (r12, "<factor> = <minus> <factor>", eval_factor2);
+
+PARSE_TYPE_INIT (factor, rule_match, 0, 0);
+#endif
+
+#if 0
+PARSE_TYPE_INIT (exp, rule_match, 0, 0);
+PARSE_INIT (r6, "<exp> = a b", eval_term0);
+PARSE_INIT (r7, "<exp> = c d", eval_term1);
+PARSE_INIT (r9, "<exp> = ", noop_match_rule);
+#endif
+
+#if 0
+#define foreach_rule_evaluator \
+_(0) \
+_(1) \
+_(2) \
+_(3)
+
+#define _(n) \
+vlib_parse_match_t eval##n (vlib_parse_main_t *pm, \
+ vlib_parse_item_t *item, \
+ vlib_parse_value_t *value) \
+{ \
+ clib_warning ("%U", format_vlib_parse_value, pm); \
+ return VLIB_PARSE_MATCH_DONE; \
+}
+foreach_rule_evaluator
+#undef _
+PARSE_INIT (r1, "eval <moo>", eval_result);
+
+PARSE_INIT (r2, "<moo> = cow", eval0);
+PARSE_INIT (r4, "<moo> = ", eval1);
+PARSE_TYPE_INIT (moo, rule_match, 0, 0);
+#endif
+
+
+clib_error_t *
+test_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, parse_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (test_init);
+
+clib_error_t *
+vlib_stdlex_init (vlib_main_t * vm)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ u16 top_index;
+ u16 slash_index, slash_star_index, slash_slash_index, slash_star_star_index;
+ u16 slash_token;
+ u16 word_index;
+ u16 zero_index, octal_index, decimal_index, hex_index, binary_index;
+
+ top_index = vlib_lex_add_table ("top");
+
+#define foreach_top_level_single_character_token \
+ _('(', lpar) \
+ _(')', rpar) \
+ _(';', semi) \
+ _('[', lbrack) \
+ _(']', rbrack) \
+ _('{', lcurly) \
+ _('}', rcurly) \
+ _('+', plus) \
+ _('-', minus) \
+ _('*', star) \
+ _('%', percent) \
+ _('@', atsign) \
+ _(',', comma) \
+ _('.', dot) \
+ _('?', qmark)
+
+#define _(c,t) \
+ vlib_lex_set_action_range(top_index,c,c,VLIB_LEX_RETURN,vlib_lex_add_token(lm, #t), top_index);
+ foreach_top_level_single_character_token;
+#undef _
+
+ /* Numbers */
+ zero_index = vlib_lex_add_table ("zero");
+ octal_index = vlib_lex_add_table ("octal");
+ decimal_index = vlib_lex_add_table ("decimal");
+ hex_index = vlib_lex_add_table ("hex");
+ binary_index = vlib_lex_add_table ("binary");
+
+ /* Support 0x 0b 0t and 0123 [octal] */
+ vlib_lex_set_action_range (top_index, '0', '0', VLIB_LEX_START_NUMBER, 10,
+ zero_index);
+ vlib_lex_set_action_range (top_index, '1', '9', VLIB_LEX_START_NUMBER, 10,
+ decimal_index);
+
+ vlib_lex_set_action_range (zero_index, 0, 0x7F, VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_number, top_index);
+
+ vlib_lex_set_action_range (zero_index, 'x', 'x', VLIB_LEX_IGNORE, ~0,
+ hex_index);
+ vlib_lex_set_action_range (zero_index, 'b', 'b', VLIB_LEX_IGNORE, ~0,
+ binary_index);
+ vlib_lex_set_action_range (zero_index, 't', 't', VLIB_LEX_IGNORE, ~0,
+ decimal_index);
+ vlib_lex_set_action_range (zero_index, '0', '7', VLIB_LEX_START_NUMBER, 8,
+ octal_index);
+
+ /* Octal */
+ vlib_lex_set_action_range (octal_index, 0, 0x7f, VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_number, top_index);
+ vlib_lex_set_action_range (octal_index, '0', '7', VLIB_LEX_ADD_TO_NUMBER, 8,
+ octal_index);
+
+ /* Decimal */
+ vlib_lex_set_action_range (decimal_index, 0, 0x7f,
+ VLIB_LEX_RETURN_AND_RESCAN, VLIB_LEX_number,
+ top_index);
+ vlib_lex_set_action_range (decimal_index, '0', '9', VLIB_LEX_ADD_TO_NUMBER,
+ 10, decimal_index);
+
+ /* Hex */
+ vlib_lex_set_action_range (hex_index, 0, 0x7f, VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_number, top_index);
+ vlib_lex_set_action_range (hex_index, '0', '9', VLIB_LEX_ADD_TO_NUMBER, 16,
+ hex_index);
+ vlib_lex_set_action_range (hex_index, 'a', 'f', VLIB_LEX_ADD_TO_NUMBER, 16,
+ hex_index);
+ vlib_lex_set_action_range (hex_index, 'A', 'F', VLIB_LEX_ADD_TO_NUMBER, 16,
+ hex_index);
+
+ /* Binary */
+ vlib_lex_set_action_range (binary_index, 0, 0x7f,
+ VLIB_LEX_RETURN_AND_RESCAN, VLIB_LEX_number,
+ top_index);
+ vlib_lex_set_action_range (binary_index, '0', '1', VLIB_LEX_ADD_TO_NUMBER,
+ 2, binary_index);
+
+ /* c/c++ comment syntax is the worst... */
+
+ slash_index = vlib_lex_add_table ("slash");
+ slash_star_index = vlib_lex_add_table ("slash_star");
+ slash_star_star_index = vlib_lex_add_table ("slash_star_star");
+ slash_slash_index = vlib_lex_add_table ("slash_slash");
+ slash_token = vlib_lex_add_token (lm, "slash");
+
+ /* Top level: see a slash, ignore, go to slash table */
+ vlib_lex_set_action_range (top_index, '/', '/', VLIB_LEX_IGNORE, ~0,
+ slash_index);
+
+ /* default for slash table: return SLASH, go to top table */
+ vlib_lex_set_action_range (slash_index, 1, 0x7F, VLIB_LEX_RETURN_AND_RESCAN,
+ slash_token, top_index);
+ /* see slash-slash, go to s-s table */
+ vlib_lex_set_action_range (slash_index, '/', '/', VLIB_LEX_IGNORE, ~0,
+ slash_slash_index);
+ /* see slash-star, go to s-* table */
+ vlib_lex_set_action_range (slash_index, '*', '*', VLIB_LEX_IGNORE, ~0,
+ slash_star_index);
+
+ /* EOL in s-s table, ignore, go to top table */
+ vlib_lex_set_action_range (slash_slash_index, '\n', '\n', VLIB_LEX_IGNORE,
+ ~0, top_index);
+
+ /* slash-star blah blah star */
+ vlib_lex_set_action_range (slash_star_index, '*', '*', VLIB_LEX_IGNORE, ~0,
+ slash_star_star_index);
+
+ /* slash star blah blah star slash */
+ vlib_lex_set_action_range (slash_star_star_index, '/', '/', VLIB_LEX_IGNORE,
+ ~0, top_index);
+
+ /* LT, =, GT */
+ vlib_lex_set_action_range (top_index, '<', '<', VLIB_LEX_RETURN,
+ VLIB_LEX_lt, top_index);
+ vlib_lex_set_action_range (top_index, '=', '=', VLIB_LEX_RETURN,
+ VLIB_LEX_equals, top_index);
+ vlib_lex_set_action_range (top_index, '>', '>', VLIB_LEX_RETURN,
+ VLIB_LEX_gt, top_index);
+
+ /* words, key and otherwise */
+ word_index = vlib_lex_add_table ("word");
+
+ vlib_lex_set_action_range (top_index, 'a', 'z', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+ vlib_lex_set_action_range (top_index, 'A', 'Z', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+
+ vlib_lex_set_action_range (word_index, 0, 0x7f, VLIB_LEX_KEYWORD_CHECK, ~0,
+ top_index);
+
+ vlib_lex_set_action_range (word_index, 'a', 'z', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+ vlib_lex_set_action_range (word_index, 'A', 'Z', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+ vlib_lex_set_action_range (word_index, '_', '_', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+ vlib_lex_set_action_range (word_index, '0', '9', VLIB_LEX_ADD_TO_TOKEN, ~0,
+ word_index);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/g2.am b/src/g2.am
new file mode 100644
index 00000000..e7965733
--- /dev/null
+++ b/src/g2.am
@@ -0,0 +1,32 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin_PROGRAMS += g2
+
+g2_SOURCES = \
+ tools/g2/clib.c \
+ tools/g2/cpel.c \
+ tools/g2/cpel.h \
+ tools/g2/events.c \
+ tools/g2/g2.h \
+ tools/g2/main.c \
+ tools/g2/menu1.c \
+ tools/g2/pointsel.c \
+ tools/g2/props.c \
+ tools/g2/props.h \
+ tools/g2/g2version.c \
+ tools/g2/view1.c
+
+g2_LDADD = $(g2_LIBS) libvppinfra.la -lpthread -lm
+
+# vi:syntax=automake
diff --git a/src/m4/ax_vpp_find_jdk8.m4 b/src/m4/ax_vpp_find_jdk8.m4
new file mode 100644
index 00000000..382a6c80
--- /dev/null
+++ b/src/m4/ax_vpp_find_jdk8.m4
@@ -0,0 +1,43 @@
+
+
+AC_DEFUN([AX_VPP_FIND_JDK8],
+[
+while true
+do
+ if test "${JAVA_HOME+set}" = set ; then
+ AC_MSG_CHECKING([${JAVA_HOME} for Java 8 compiler])
+ JAVAC=${JAVA_HOME}/bin/javac
+ JAVAH=${JAVA_HOME}/bin/javah
+ JAR=${JAVA_HOME}/bin/jar
+ JAVA_VERSION=$(${JAVA_HOME}/bin/javac -source 8 -version 2>&1)
+ if test 0 -eq "$?"; then
+ JAVA_VERSION=$(echo "${JAVA_VERSION}" | cut -d\ -f2)
+ AC_MSG_RESULT([ok])
+ else
+ AC_MSG_RESULT([no])
+ AC_MSG_ERROR([Java in ${JAVA_HOME} (path specified in JAVA_HOME) cannot compile Java 8 code])
+ fi
+ break
+ fi
+
+ for dir in $(find /usr/lib/jvm/* -maxdepth 0 -type d); do
+ AC_MSG_CHECKING([${dir} for Java 8 compiler])
+ JAVA_VERSION=$(${dir}/bin/javac -source 8 -version 2>&1)
+ if test 0 -eq "$?"; then
+ JAVA_VERSION=$(echo "${JAVA_VERSION}" | cut -d\ -f2)
+ JAVA_HOME=${dir}
+ JAVAC=${dir}/bin/javac
+ JAVAH=${dir}/bin/javah
+ JAR=${dir}/bin/jar
+ AC_MSG_RESULT([found version $JAVA_VERSION])
+ break
+ else
+ JAVA_VERSION=""
+ AC_MSG_RESULT([no])
+ fi
+ done
+
+ test "${JAVA_HOME}set" = set && AC_MSG_ERROR([Could not find Java 8 compiler])
+ break
+done
+])
diff --git a/src/perftool.am b/src/perftool.am
new file mode 100644
index 00000000..23e45033
--- /dev/null
+++ b/src/perftool.am
@@ -0,0 +1,44 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin_PROGRAMS += c2cpel cpelatency cpeldump cpelinreg cpelstate elog_merge
+
+lib_LTLIBRARIES += libcperf.la
+
+libcperf_la_SOURCES = \
+ tools/perftool/delsvec.c \
+ tools/perftool/linreg.c \
+ tools/perftool/props.c \
+ tools/perftool/cpel_util.c
+
+PERFTOOL_LIBS = libcperf.la libvppinfra.la -lm
+
+c2cpel_SOURCES = tools/perftool/c2cpel.c
+c2cpel_LDADD = $(PERFTOOL_LIBS)
+
+cpelatency_SOURCES = tools/perftool/cpelatency.c
+cpelatency_LDADD = $(PERFTOOL_LIBS)
+
+cpeldump_SOURCES = tools/perftool/cpeldump.c
+cpeldump_LDADD = $(PERFTOOL_LIBS)
+
+cpelinreg_SOURCES = tools/perftool/cpelinreg.c
+cpelinreg_LDADD = $(PERFTOOL_LIBS)
+
+cpelstate_SOURCES = tools/perftool/cpelstate.c
+cpelstate_LDADD = $(PERFTOOL_LIBS)
+
+elog_merge_SOURCES = tools/perftool/elog_merge.c
+elog_merge_LDADD = $(PERFTOOL_LIBS)
+
+# vi:syntax=automake
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
new file mode 100644
index 00000000..205bfe6d
--- /dev/null
+++ b/src/plugins/Makefile.am
@@ -0,0 +1,101 @@
+
+# Copyright (c) <current-year> <your-organization>
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTOMAKE_OPTIONS = foreign subdir-objects
+
+AM_CFLAGS = -Wall -I${top_srcdir} -I${top_builddir}
+AM_LDFLAGS = -module -shared -avoid-version
+AM_LIBTOOLFLAGS = --quiet
+SUFFIXES = .api.h .api .api.json
+API_FILES =
+BUILT_SOURCES =
+vppplugins_LTLIBRARIES =
+vppapitestplugins_LTLIBRARIES =
+noinst_HEADERS =
+nobase_apiinclude_HEADERS =
+nobase_include_HEADERS =
+
+vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins
+vpppluginsdir = ${libdir}/vpp_plugins
+
+if ENABLE_ACL_PLUGIN
+include acl.am
+endif
+
+if ENABLE_DPDK_PLUGIN
+include dpdk.am
+endif
+
+if ENABLE_FLOWPROBE_PLUGIN
+include flowprobe.am
+endif
+
+
+if ENABLE_GTPU_PLUGIN
+include gtpu.am
+endif
+
+if ENABLE_ILA_PLUGIN
+include ila.am
+endif
+
+if ENABLE_IOAM_PLUGIN
+include ioam.am
+endif
+
+if ENABLE_IXGE_PLUGIN
+include ixge.am
+endif
+
+if ENABLE_LB_PLUGIN
+include lb.am
+endif
+
+if ENABLE_MEMIF_PLUGIN
+include memif.am
+endif
+
+if ENABLE_PPPOE_PLUGIN
+include pppoe.am
+endif
+
+if ENABLE_SIXRD_PLUGIN
+include sixrd.am
+endif
+
+if ENABLE_NAT_PLUGIN
+include nat.am
+endif
+
+include ../suffix-rules.mk
+
+# Remove *.la files
+install-data-hook:
+ @-(cd $(vpppluginsdir) && $(RM) $(vppplugins_LTLIBRARIES))
+ @-(cd $(vppapitestpluginsdir) && $(RM) $(vppapitestplugins_LTLIBRARIES))
+
+###############################################################################
+# API
+###############################################################################
+
+apidir = $(prefix)/share/vpp/api/plugins
+apiincludedir = ${includedir}/vpp_plugins
+
+api_DATA = \
+ $(patsubst %.api,%.api.json,$(API_FILES))
+
+BUILT_SOURCES += \
+ $(patsubst %.api,%.api.h,$(API_FILES))
+
+CLEANFILES = $(BUILT_SOURCES) $(api_DATA)
diff --git a/src/plugins/acl.am b/src/plugins/acl.am
new file mode 100644
index 00000000..0a414481
--- /dev/null
+++ b/src/plugins/acl.am
@@ -0,0 +1,35 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += acl_test_plugin.la
+vppplugins_LTLIBRARIES += acl_plugin.la
+
+acl_plugin_la_SOURCES = \
+ acl/acl.c \
+ acl/hash_lookup.c \
+ acl/fa_node.c \
+ acl/l2sess.h \
+ acl/manual_fns.h \
+ acl/acl_plugin.api.h
+
+API_FILES += acl/acl.api
+
+nobase_apiinclude_HEADERS += \
+ acl/acl_all_api_h.h \
+ acl/acl_msg_enum.h \
+ acl/manual_fns.h \
+ acl/acl.api.h
+
+acl_test_plugin_la_SOURCES = acl/acl_test.c acl/acl_plugin.api.h acl/acl_all_api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
new file mode 100644
index 00000000..a0de24a2
--- /dev/null
+++ b/src/plugins/acl/acl.api
@@ -0,0 +1,477 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+ This file defines the vpp control-plane API messages
+ used to control the ACL plugin
+*/
+
+
+/** \brief Get the plugin version
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+define acl_plugin_get_version
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to get the plugin version
+ @param context - returned sender context, to match reply w/ request
+ @param major - Incremented every time a known breaking behavior change is introduced
+ @param minor - Incremented with small changes, may be used to avoid buggy versions
+*/
+
+define acl_plugin_get_version_reply
+{
+ u32 context;
+ u32 major;
+ u32 minor;
+};
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define acl_plugin_control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define acl_plugin_control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+/** \brief Access List Rule entry
+ @param is_permit - deny (0), permit (1), or permit+reflect(2) action on this rule.
+ @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0)
+ @param src_ip_addr - Source prefix value
+ @param src_ip_prefix_len - Source prefix length
+ @param dst_ip_addr - Destination prefix value
+ @param dst_ip_prefix_len - Destination prefix length
+ @param proto - L4 protocol (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml)
+ @param srcport_or_icmptype_first - beginning of source port or ICMP4/6 type range
+ @param srcport_or_icmptype_last - end of source port or ICMP4/6 type range
+ @param dstport_or_icmpcode_first - beginning of destination port or ICMP4/6 code range
+ @param dstport_or_icmpcode_last - end of destination port or ICMP4/6 code range
+ @param tcp_flags_mask - if proto==6, match masked TCP flags with this value
+ @param tcp_flags_value - if proto==6, mask to AND the TCP flags in the packet with
+*/
+
+typeonly manual_print define acl_rule
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ u8 src_ip_addr[16];
+ u8 src_ip_prefix_len;
+ u8 dst_ip_addr[16];
+ u8 dst_ip_prefix_len;
+/*
+ * L4 protocol. IANA number. 1 = ICMP, 58 = ICMPv6, 6 = TCP, 17 = UDP.
+ * 0 => ignore L4 and ignore the ports/tcpflags when matching.
+ */
+ u8 proto;
+/*
+ * If the L4 protocol is TCP or UDP, the below
+ * hold ranges of ports, else if the L4 is ICMP/ICMPv6
+ * they hold ranges of ICMP(v6) types/codes.
+ *
+ * Ranges are inclusive, i.e. to match "any" TCP/UDP port,
+ * use first=0,last=65535. For ICMP(v6),
+ * use first=0,last=255.
+ */
+ u16 srcport_or_icmptype_first;
+ u16 srcport_or_icmptype_last;
+ u16 dstport_or_icmpcode_first;
+ u16 dstport_or_icmpcode_last;
+/*
+ * for proto = 6, this matches if the
+ * TCP flags in the packet, ANDed with tcp_flags_mask,
+ * is equal to tcp_flags_value.
+ */
+ u8 tcp_flags_mask;
+ u8 tcp_flags_value;
+};
+
+/** \brief MACIP Access List Rule entry
+ @param is_permit - deny (0), permit (1) action on this rule.
+ @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0)
+ @param src_mac - match masked source MAC address against this value
+ @param src_mac_mask - AND source MAC address with this value before matching
+ @param src_ip_addr - Source prefix value
+ @param src_ip_prefix_len - Source prefix length
+*/
+
+typeonly manual_print define macip_acl_rule
+{
+ u8 is_permit;
+ u8 is_ipv6;
+/*
+ * The source mac of the packet ANDed with src_mac_mask.
+ * The source ip[46] address in the packet is matched
+ * against src_ip_addr, with src_ip_prefix_len set to 0.
+ *
+ * For better performance, minimize the number of
+ * (src_mac_mask, src_ip_prefix_len) combinations
+ * in a MACIP ACL.
+ */
+ u8 src_mac[6];
+ u8 src_mac_mask[6];
+ u8 src_ip_addr[16];
+ u8 src_ip_prefix_len;
+};
+
+/** \brief Replace an existing ACL in-place or create a new ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - an existing ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new ACL
+ @param tag - a string value stored along with the ACL, for descriptive purposes
+ @param count - number of ACL rules
+ @r - Rules for this access-list
+*/
+
+manual_print manual_endian define acl_add_replace
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 to add, existing ACL# to replace */
+ u8 tag[64]; /* What gets in here gets out in the corresponding tag field when dumping the ACLs. */
+ u32 count;
+ vl_api_acl_rule_t r[count];
+};
+
+/** \brief Reply to add/replace ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of the updated or newly created ACL
+ @param retval 0 - no error
+*/
+
+define acl_add_replace_reply
+{
+ u32 context;
+ u32 acl_index;
+ i32 retval;
+};
+
+/** \brief Delete an ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - ACL index to delete
+*/
+
+autoreply manual_print define acl_del
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index;
+};
+
+/* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */
+/** \brief Use acl_interface_set_acl_list instead
+ Append/remove an ACL index to/from the list of ACLs checked for an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add or delete the ACL index from the list
+ @param is_input - check the ACL on input (1) or output (0)
+ @param sw_if_index - the interface to alter the list of ACLs on
+ @param acl_index - index of ACL for the operation
+*/
+
+autoreply manual_print define acl_interface_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+/*
+ * is_input = 0 => ACL applied on interface egress
+ * is_input = 1 => ACL applied on interface ingress
+ */
+ u8 is_input;
+ u32 sw_if_index;
+ u32 acl_index;
+};
+
+/** \brief Set the vector of input/output ACLs checked for an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface to alter the list of ACLs on
+ @param count - total number of ACL indices in the vector
+ @param n_input - this many first elements correspond to input ACLs, the rest - output
+ @param acls - vector of ACL indices
+*/
+
+autoreply manual_print define acl_interface_set_acl_list
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 count;
+ u8 n_input; /* First n_input ACLs are set as a list of input ACLs, the rest are applied as output */
+ u32 acls[count];
+};
+
+/** \brief Reply to set the ACL list on an interface
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+/** \brief Dump the specific ACL contents or all of the ACLs' contents
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - ACL index to dump, ~0 to dump all ACLs
+*/
+
+define acl_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 for all ACLs */
+};
+
+/** \brief Details about a single ACL contents
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - ACL index whose contents are being sent in this message
+ @param tag - Descriptive tag value which was supplied at ACL creation
+ @param count - Number of rules in this ACL
+ @param r - Array of rules within this ACL
+*/
+
+manual_endian manual_print define acl_details
+{
+ u32 context;
+ u32 acl_index;
+ u8 tag[64]; /* Same blob that was supplied to us when creating the ACL, one hopes. */
+ u32 count;
+ vl_api_acl_rule_t r[count];
+};
+
+/** \brief Dump the list(s) of ACL applied to specific or all interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to dump the ACL list for
+*/
+
+define acl_interface_list_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index; /* ~0 for all interfaces */
+};
+
+/** \brief Details about a single ACL contents
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface for which the list of ACLs is applied
+ @param count - total length of acl indices vector
+ @param n_input - this many of indices in the beginning are input ACLs, the rest - output
+ @param acls - the vector of ACL indices
+*/
+
+define acl_interface_list_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 count;
+ u8 n_input;
+ u32 acls[count];
+};
+
+/** \brief Add a MACIP ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param tag - descriptive value for this MACIP ACL
+ @param count - number of rules in this MACIP ACL
+ @param r - vector of MACIP ACL rules
+*/
+
+manual_endian manual_print define macip_acl_add
+{
+ u32 client_index;
+ u32 context;
+ u8 tag[64];
+ u32 count;
+ vl_api_macip_acl_rule_t r[count];
+};
+
+/** \brief Reply to add MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of the newly created MACIP ACL
+ @param retval 0 - no error
+*/
+
+define macip_acl_add_reply
+{
+ u32 context;
+ u32 acl_index;
+ i32 retval;
+};
+
+/** \brief Add/Replace a MACIP ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - an existing MACIP ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new MACIP ACL
+ @param tag - descriptive value for this MACIP ACL
+ @param count - number of rules in this MACIP ACL
+ @param r - vector of MACIP ACL rules
+*/
+
+manual_endian manual_print define macip_acl_add_replace
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 to add, existing MACIP ACL# to replace */
+ u8 tag[64];
+ u32 count;
+ vl_api_macip_acl_rule_t r[count];
+};
+
+/** \brief Reply to add/replace MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of the newly created MACIP ACL
+ @param retval 0 - no error
+*/
+
+define macip_acl_add_replace_reply
+{
+ u32 context;
+ u32 acl_index;
+ i32 retval;
+};
+
+/** \brief Delete a MACIP ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - MACIP ACL index to delete
+*/
+
+autoreply manual_print define macip_acl_del
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index;
+};
+
+/** \brief Add or delete a MACIP ACL to/from interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add (1) or delete (0) MACIP ACL from being used on an interface
+ @param sw_if_index - interface to apply the action to
+ @param acl_index - MACIP ACL index
+*/
+
+autoreply manual_print define macip_acl_interface_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ /* MACIP ACLs are always input */
+ u32 sw_if_index;
+ u32 acl_index;
+};
+
+/** \brief Dump one or all defined MACIP ACLs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - MACIP ACL index or ~0 to dump all MACIP ACLs
+*/
+
+define macip_acl_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 for all ACLs */
+};
+
+/** \brief Details about one MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of this MACIP ACL
+ @param tag - descriptive tag which was supplied during the creation
+ @param count - length of the vector of MACIP ACL rules
+ @param r - rules comprising this MACIP ACL
+*/
+
+manual_endian manual_print define macip_acl_details
+{
+ u32 context;
+ u32 acl_index;
+ u8 tag[64];
+ u32 count;
+ vl_api_macip_acl_rule_t r[count];
+};
+
+/** \brief Get the vector of MACIP ACL IDs applied to the interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+define macip_acl_interface_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply with the vector of MACIP ACLs by sw_if_index
+ @param context - returned sender context, to match reply w/ request
+ @param count - total number of elements in the vector
+ @param acls - the vector of active MACIP ACL indices per sw_if_index
+*/
+
+define macip_acl_interface_get_reply
+{
+ u32 context;
+ u32 count;
+ u32 acls[count];
+};
+
+/** \brief Dump the list(s) of MACIP ACLs applied to specific or all interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to dump the MACIP ACL list for
+*/
+
+define macip_acl_interface_list_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index; /* ~0 for all interfaces */
+};
+
+/** \brief Details about a single MACIP ACL contents
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface for which the list of MACIP ACLs is applied
+ @param count - total length of acl indices vector
+ @param acls - the vector of MACIP ACL indices
+*/
+
+define macip_acl_interface_list_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 count;
+ u32 acls[count];
+};
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
new file mode 100644
index 00000000..efd506de
--- /dev/null
+++ b/src/plugins/acl/acl.c
@@ -0,0 +1,2709 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <acl/acl.h>
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/classify/input_acl.h>
+#include <vpp/app/version.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <acl/acl_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <acl/acl_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <acl/acl_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <acl/acl_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <acl/acl_all_api_h.h>
+#undef vl_api_version
+
+#include "fa_node.h"
+#include "hash_lookup.h"
+
+acl_main_t acl_main;
+
+#define REPLY_MSG_ID_BASE am->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+
+#define foreach_acl_plugin_api_msg \
+_(ACL_PLUGIN_GET_VERSION, acl_plugin_get_version) \
+_(ACL_PLUGIN_CONTROL_PING, acl_plugin_control_ping) \
+_(ACL_ADD_REPLACE, acl_add_replace) \
+_(ACL_DEL, acl_del) \
+_(ACL_INTERFACE_ADD_DEL, acl_interface_add_del) \
+_(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list) \
+_(ACL_DUMP, acl_dump) \
+_(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump) \
+_(MACIP_ACL_ADD, macip_acl_add) \
+_(MACIP_ACL_ADD_REPLACE, macip_acl_add_replace) \
+_(MACIP_ACL_DEL, macip_acl_del) \
+_(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del) \
+_(MACIP_ACL_DUMP, macip_acl_dump) \
+_(MACIP_ACL_INTERFACE_GET, macip_acl_interface_get) \
+_(MACIP_ACL_INTERFACE_LIST_DUMP, macip_acl_interface_list_dump)
+
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Access Control Lists",
+};
+/* *INDENT-ON* */
+
+
+static void *
+acl_set_heap(acl_main_t *am)
+{
+ if (0 == am->acl_mheap) {
+ am->acl_mheap = mheap_alloc (0 /* use VM */ , am->acl_mheap_size);
+ mheap_t *h = mheap_header (am->acl_mheap);
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+ }
+ void *oldheap = clib_mem_set_heap(am->acl_mheap);
+ return oldheap;
+}
+
+void
+acl_plugin_acl_set_validate_heap(acl_main_t *am, int on)
+{
+ clib_mem_set_heap(acl_set_heap(am));
+ mheap_t *h = mheap_header (am->acl_mheap);
+ if (on) {
+ h->flags |= MHEAP_FLAG_VALIDATE;
+ h->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ mheap_validate(h);
+ } else {
+ h->flags &= ~MHEAP_FLAG_VALIDATE;
+ h->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ }
+}
+
+void
+acl_plugin_acl_set_trace_heap(acl_main_t *am, int on)
+{
+ clib_mem_set_heap(acl_set_heap(am));
+ mheap_t *h = mheap_header (am->acl_mheap);
+ if (on) {
+ h->flags |= MHEAP_FLAG_TRACE;
+ } else {
+ h->flags &= ~MHEAP_FLAG_TRACE;
+ }
+}
+
+static void
+vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_get_version_reply_t *rmp;
+ int msg_size = sizeof (*rmp);
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (msg_size);
+ memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_GET_VERSION_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->major = htonl (ACL_PLUGIN_VERSION_MAJOR);
+ rmp->minor = htonl (ACL_PLUGIN_VERSION_MINOR);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_acl_plugin_control_ping_t_handler (vl_api_acl_plugin_control_ping_t * mp)
+{
+ vl_api_acl_plugin_control_ping_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_ACL_PLUGIN_CONTROL_PING_REPLY,
+ ({
+ rmp->vpe_pid = ntohl (getpid ());
+ }));
+ /* *INDENT-ON* */
+}
+
+static int
+acl_add_list (u32 count, vl_api_acl_rule_t rules[],
+ u32 * acl_list_index, u8 * tag)
+{
+ acl_main_t *am = &acl_main;
+ acl_list_t *a;
+ acl_rule_t *r;
+ acl_rule_t *acl_new_rules = 0;
+ int i;
+
+ if (*acl_list_index != ~0)
+ {
+ /* They supplied some number, let's see if this ACL exists */
+ if (pool_is_free_index (am->acls, *acl_list_index))
+ {
+ /* tried to replace a non-existent ACL, no point doing anything */
+ clib_warning("acl-plugin-error: Trying to replace nonexistent ACL %d (tag %s)", *acl_list_index, tag);
+ return -1;
+ }
+ }
+ if (0 == count) {
+ clib_warning("acl-plugin-warning: supplied no rules for ACL %d (tag %s)", *acl_list_index, tag);
+ }
+
+ void *oldheap = acl_set_heap(am);
+
+ /* Create and populate the rules */
+ if (count > 0)
+ vec_validate(acl_new_rules, count-1);
+
+ for (i = 0; i < count; i++)
+ {
+ r = vec_elt_at_index(acl_new_rules, i);
+ memset(r, 0, sizeof(*r));
+ r->is_permit = rules[i].is_permit;
+ r->is_ipv6 = rules[i].is_ipv6;
+ if (r->is_ipv6)
+ {
+ memcpy (&r->src, rules[i].src_ip_addr, sizeof (r->src));
+ memcpy (&r->dst, rules[i].dst_ip_addr, sizeof (r->dst));
+ }
+ else
+ {
+ memcpy (&r->src.ip4, rules[i].src_ip_addr, sizeof (r->src.ip4));
+ memcpy (&r->dst.ip4, rules[i].dst_ip_addr, sizeof (r->dst.ip4));
+ }
+ r->src_prefixlen = rules[i].src_ip_prefix_len;
+ r->dst_prefixlen = rules[i].dst_ip_prefix_len;
+ r->proto = rules[i].proto;
+ r->src_port_or_type_first = ntohs ( rules[i].srcport_or_icmptype_first );
+ r->src_port_or_type_last = ntohs ( rules[i].srcport_or_icmptype_last );
+ r->dst_port_or_code_first = ntohs ( rules[i].dstport_or_icmpcode_first );
+ r->dst_port_or_code_last = ntohs ( rules[i].dstport_or_icmpcode_last );
+ r->tcp_flags_value = rules[i].tcp_flags_value;
+ r->tcp_flags_mask = rules[i].tcp_flags_mask;
+ }
+
+ if (~0 == *acl_list_index)
+ {
+ /* Get ACL index */
+ pool_get_aligned (am->acls, a, CLIB_CACHE_LINE_BYTES);
+ memset (a, 0, sizeof (*a));
+ /* Will return the newly allocated ACL index */
+ *acl_list_index = a - am->acls;
+ }
+ else
+ {
+ a = am->acls + *acl_list_index;
+ hash_acl_delete(am, *acl_list_index);
+ /* Get rid of the old rules */
+ if (a->rules)
+ vec_free (a->rules);
+ }
+ a->rules = acl_new_rules;
+ a->count = count;
+ memcpy (a->tag, tag, sizeof (a->tag));
+ hash_acl_add(am, *acl_list_index);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+static int
+acl_del_list (u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ acl_list_t *a;
+ int i, ii;
+ if (pool_is_free_index (am->acls, acl_list_index))
+ {
+ return -1;
+ }
+
+ if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) {
+ if (vec_len(vec_elt(am->input_sw_if_index_vec_by_acl, acl_list_index)) > 0) {
+ /* ACL is applied somewhere inbound. Refuse to delete */
+ return -1;
+ }
+ }
+ if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) {
+ if (vec_len(vec_elt(am->output_sw_if_index_vec_by_acl, acl_list_index)) > 0) {
+ /* ACL is applied somewhere outbound. Refuse to delete */
+ return -1;
+ }
+ }
+
+ void *oldheap = acl_set_heap(am);
+ /* delete any references to the ACL */
+ for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++)
+ {
+ for (ii = 0; ii < vec_len (am->output_acl_vec_by_sw_if_index[i]);
+ /* see body */ )
+ {
+ if (acl_list_index == am->output_acl_vec_by_sw_if_index[i][ii])
+ {
+ vec_del1 (am->output_acl_vec_by_sw_if_index[i], ii);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+ }
+ for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index); i++)
+ {
+ for (ii = 0; ii < vec_len (am->input_acl_vec_by_sw_if_index[i]);
+ /* see body */ )
+ {
+ if (acl_list_index == am->input_acl_vec_by_sw_if_index[i][ii])
+ {
+ vec_del1 (am->input_acl_vec_by_sw_if_index[i], ii);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+ }
+ /* delete the hash table data */
+
+ hash_acl_delete(am, acl_list_index);
+ /* now we can delete the ACL itself */
+ a = pool_elt_at_index (am->acls, acl_list_index);
+ if (a->rules)
+ vec_free (a->rules);
+
+ pool_put (am->acls, a);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+/* Some aids in ASCII graphing the content */
+#define XX "\377"
+#define __ "\000"
+#define _(x)
+#define v
+
+u8 ip4_5tuple_mask[] =
+_(" dmac smac etype ")
+_(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v
+ _(" v ihl totlen ")
+ _(0x0000)
+ __ __ __ __
+ _(" ident fl+fo ")
+ _(0x0004)
+ __ __ __ __
+ _(" ttl pr checksum ")
+ _(0x0008)
+ __ XX __ __
+ _(" src address ")
+ _(0x000C)
+ XX XX XX XX
+ _(" dst address ")
+ _(0x0010)
+ XX XX XX XX
+ _("L4 T/U sport dport ")
+ _(tcpudp)
+ XX XX XX XX
+ _(padpad)
+ __ __ __ __
+ _(padpad)
+ __ __ __ __
+ _(padeth)
+ __ __;
+
+ u8 ip6_5tuple_mask[] =
+ _(" dmac smac etype ")
+ _(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v
+ _(" v tc + flow ")
+ _(0x0000) __ __ __ __
+ _(" plen nh hl ")
+ _(0x0004) __ __ XX __
+ _(" src address ")
+ _(0x0008) XX XX XX XX
+ _(0x000C) XX XX XX XX
+ _(0x0010) XX XX XX XX
+ _(0x0014) XX XX XX XX
+ _(" dst address ")
+ _(0x0018) XX XX XX XX
+ _(0x001C) XX XX XX XX
+ _(0x0020) XX XX XX XX
+ _(0x0024) XX XX XX XX
+ _("L4T/U sport dport ")
+ _(tcpudp) XX XX XX XX _(padpad) __ __ __ __ _(padeth) __ __;
+
+#undef XX
+#undef __
+#undef _
+#undef v
+
+ static int count_skip (u8 * p, u32 size)
+{
+ u64 *p64 = (u64 *) p;
+ /* Be tolerant to null pointer */
+ if (0 == p)
+ return 0;
+
+ while ((0ULL == *p64) && ((u8 *) p64 - p) < size)
+ {
+ p64++;
+ }
+ return (p64 - (u64 *) p) / 2;
+}
+
+static int
+acl_classify_add_del_table_tiny (vnet_classify_main_t * cm, u8 * mask,
+ u32 mask_len, u32 next_table_index,
+ u32 miss_next_index, u32 * table_index,
+ int is_add)
+{
+ u32 nbuckets = 1;
+ u32 memory_size = 2 << 13;
+ u32 skip = count_skip (mask, mask_len);
+ u32 match = (mask_len / 16) - skip;
+ u8 *skip_mask_ptr = mask + 16 * skip;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+
+ if (0 == match)
+ match = 1;
+ void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base);
+ int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets,
+ memory_size, skip, match,
+ next_table_index, miss_next_index,
+ table_index, current_data_flag,
+ current_data_offset, is_add,
+ 1 /* delete_chain */);
+ clib_mem_set_heap (oldheap);
+ return ret;
+}
+
+static int
+acl_classify_add_del_table_small (vnet_classify_main_t * cm, u8 * mask,
+ u32 mask_len, u32 next_table_index,
+ u32 miss_next_index, u32 * table_index,
+ int is_add)
+{
+ u32 nbuckets = 32;
+ u32 memory_size = 2 << 20;
+ u32 skip = count_skip (mask, mask_len);
+ u32 match = (mask_len / 16) - skip;
+ u8 *skip_mask_ptr = mask + 16 * skip;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+
+ if (0 == match)
+ match = 1;
+
+ void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base);
+ int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets,
+ memory_size, skip, match,
+ next_table_index, miss_next_index,
+ table_index, current_data_flag,
+ current_data_offset, is_add,
+ 1 /* delete_chain */);
+ clib_mem_set_heap (oldheap);
+ return ret;
+}
+
+
+static int
+acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ void *oldheap = acl_set_heap(am);
+
+ vec_validate_init_empty (am->acl_ip4_input_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (am->acl_ip6_input_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+
+ /* switch to global heap while calling vnet_* functions */
+ clib_mem_set_heap (cm->vlib_main->heap_base);
+ vnet_l2_input_classify_enable_disable (sw_if_index, 0);
+
+ if (am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip4_table_index =
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ }
+ if (am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip6_table_index =
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip6,
+ &ip6_table_index, 0);
+ }
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+static int
+acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ void *oldheap = acl_set_heap(am);
+
+ vec_validate_init_empty (am->acl_ip4_output_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (am->acl_ip6_output_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+
+ /* switch to global heap while calling vnet_* functions */
+ clib_mem_set_heap (cm->vlib_main->heap_base);
+
+ vnet_l2_output_classify_enable_disable (sw_if_index, 0);
+
+ if (am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip4_table_index =
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ }
+ if (am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip6_table_index =
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip6,
+ &ip6_table_index, 0);
+ }
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+static int
+acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ int rv;
+
+ void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base);
+
+ /* in case there were previous tables attached */
+ acl_unhook_l2_input_classify (am, sw_if_index);
+ rv =
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip4,
+ &ip4_table_index, 1);
+ if (rv)
+ goto done;
+ rv =
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip6,
+ &ip6_table_index, 1);
+ if (rv)
+ {
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ goto done;
+ }
+ rv =
+ vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, ~0);
+ if (rv)
+ {
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip6,
+ &ip6_table_index, 0);
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ goto done;
+ }
+
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] =
+ ip4_table_index;
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] =
+ ip6_table_index;
+
+ vnet_l2_input_classify_enable_disable (sw_if_index, 1);
+done:
+ clib_mem_set_heap (prevheap);
+ return rv;
+}
+
+static int
+acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ int rv;
+
+ void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base);
+
+ /* in case there were previous tables attached */
+ acl_unhook_l2_output_classify (am, sw_if_index);
+ rv =
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip4,
+ &ip4_table_index, 1);
+ if (rv)
+ goto done;
+ rv =
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip6,
+ &ip6_table_index, 1);
+ if (rv)
+ {
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ goto done;
+ }
+ rv =
+ vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, ~0);
+ clib_warning
+ ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n",
+ sw_if_index, ip4_table_index, ip6_table_index);
+ if (rv)
+ {
+ acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip6,
+ &ip6_table_index, 0);
+ acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl_ip4,
+ &ip4_table_index, 0);
+ goto done;
+ }
+
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] =
+ ip4_table_index;
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] =
+ ip6_table_index;
+
+ vnet_l2_output_classify_enable_disable (sw_if_index, 1);
+done:
+ clib_mem_set_heap (prevheap);
+ return rv;
+}
+
+
+
+int
+acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index,
+ int enable_disable)
+{
+ int rv;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ acl_fa_enable_disable(sw_if_index, 1, enable_disable);
+
+ if (enable_disable)
+ {
+ rv = acl_hook_l2_input_classify (am, sw_if_index);
+ }
+ else
+ {
+ rv = acl_unhook_l2_input_classify (am, sw_if_index);
+ }
+
+ return rv;
+}
+
+int
+acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index,
+ int enable_disable)
+{
+ int rv;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ acl_fa_enable_disable(sw_if_index, 0, enable_disable);
+
+ if (enable_disable)
+ {
+ rv = acl_hook_l2_output_classify (am, sw_if_index);
+ }
+ else
+ {
+ rv = acl_unhook_l2_output_classify (am, sw_if_index);
+ }
+
+ return rv;
+}
+
+static int
+acl_is_not_defined(acl_main_t *am, u32 acl_list_index)
+{
+ return (pool_is_free_index (am->acls, acl_list_index));
+}
+
+
+static int
+acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ if (acl_is_not_defined(am, acl_list_index)) {
+ /* ACL is not defined. Can not apply */
+ return -1;
+ }
+ void *oldheap = acl_set_heap(am);
+
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+
+ u32 index = vec_search(am->input_acl_vec_by_sw_if_index[sw_if_index], acl_list_index);
+ if (index < vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])) {
+ clib_warning("ACL %d is already applied inbound on sw_if_index %d (index %d)",
+ acl_list_index, sw_if_index, index);
+ /* the entry is already there */
+ clib_mem_set_heap (oldheap);
+ return -1;
+ }
+ /* if there was no ACL applied before, enable the ACL processing */
+ if (vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index]) == 0) {
+ acl_interface_in_enable_disable (am, sw_if_index, 1);
+ }
+ vec_add (am->input_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index,
+ 1);
+ vec_validate (am->input_sw_if_index_vec_by_acl, acl_list_index);
+ vec_add (am->input_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index,
+ 1);
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+
+ u32 index = vec_search(am->output_acl_vec_by_sw_if_index[sw_if_index], acl_list_index);
+ if (index < vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])) {
+ clib_warning("ACL %d is already applied outbound on sw_if_index %d (index %d)",
+ acl_list_index, sw_if_index, index);
+ /* the entry is already there */
+ clib_mem_set_heap (oldheap);
+ return -1;
+ }
+ /* if there was no ACL applied before, enable the ACL processing */
+ if (vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index]) == 0) {
+ acl_interface_out_enable_disable (am, sw_if_index, 1);
+ }
+ vec_add (am->output_acl_vec_by_sw_if_index[sw_if_index],
+ &acl_list_index, 1);
+ vec_validate (am->output_sw_if_index_vec_by_acl, acl_list_index);
+ vec_add (am->output_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index,
+ 1);
+ }
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+
+static int
+acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ int i;
+ int rv = -1;
+ void *oldheap = acl_set_heap(am);
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ i++)
+ {
+ if (acl_list_index ==
+ am->input_acl_vec_by_sw_if_index[sw_if_index][i])
+ {
+ vec_del1 (am->input_acl_vec_by_sw_if_index[sw_if_index], i);
+ rv = 0;
+ break;
+ }
+ }
+
+ if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) {
+ u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index);
+ if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) {
+ hash_acl_unapply(am, sw_if_index, is_input, acl_list_index);
+ vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index);
+ }
+ }
+
+ /* If there is no more ACLs applied on an interface, disable ACL processing */
+ if (0 == vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]))
+ {
+ acl_interface_in_enable_disable (am, sw_if_index, 0);
+ }
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0;
+ i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); i++)
+ {
+ if (acl_list_index ==
+ am->output_acl_vec_by_sw_if_index[sw_if_index][i])
+ {
+ vec_del1 (am->output_acl_vec_by_sw_if_index[sw_if_index], i);
+ rv = 0;
+ break;
+ }
+ }
+
+ if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) {
+ u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index);
+ if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) {
+ hash_acl_unapply(am, sw_if_index, is_input, acl_list_index);
+ vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index);
+ }
+ }
+
+ /* If there is no more ACLs applied on an interface, disable ACL processing */
+ if (0 == vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]))
+ {
+ acl_interface_out_enable_disable (am, sw_if_index, 0);
+ }
+ }
+ clib_mem_set_heap (oldheap);
+ return rv;
+}
+
+static void
+acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input)
+{
+ acl_main_t *am = &acl_main;
+ int i;
+ void *oldheap = acl_set_heap(am);
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ if (vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index]) > 0) {
+ acl_interface_in_enable_disable (am, sw_if_index, 0);
+ }
+
+ for(i = vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) {
+ u32 acl_list_index = am->input_acl_vec_by_sw_if_index[sw_if_index][i];
+ hash_acl_unapply(am, sw_if_index, is_input, acl_list_index);
+ if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) {
+ u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index);
+ if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) {
+ vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index);
+ }
+ }
+ }
+
+ vec_reset_length (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ if (vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index]) > 0) {
+ acl_interface_out_enable_disable (am, sw_if_index, 0);
+ }
+
+ for(i = vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) {
+ u32 acl_list_index = am->output_acl_vec_by_sw_if_index[sw_if_index][i];
+ hash_acl_unapply(am, sw_if_index, is_input, acl_list_index);
+ if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) {
+ u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index);
+ if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) {
+ vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index);
+ }
+ }
+ }
+
+ vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]);
+ }
+ clib_mem_set_heap (oldheap);
+}
+
+static int
+acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input,
+ u32 acl_list_index)
+{
+ int rv = -1;
+ acl_main_t *am = &acl_main;
+ if (is_add)
+ {
+ rv =
+ acl_interface_add_inout_acl (sw_if_index, is_input, acl_list_index);
+ if (rv == 0)
+ {
+ hash_acl_apply(am, sw_if_index, is_input, acl_list_index);
+ }
+ }
+ else
+ {
+ hash_acl_unapply(am, sw_if_index, is_input, acl_list_index);
+ rv =
+ acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index);
+ }
+ return rv;
+}
+
+
+typedef struct
+{
+ u8 is_ipv6;
+ u8 mac_mask[6];
+ u8 prefix_len;
+ u32 count;
+ u32 table_index;
+ u32 arp_table_index;
+} macip_match_type_t;
+
+static u32
+macip_find_match_type (macip_match_type_t * mv, u8 * mac_mask, u8 prefix_len,
+ u8 is_ipv6)
+{
+ u32 i;
+ if (mv)
+ {
+ for (i = 0; i < vec_len (mv); i++)
+ {
+ if ((mv[i].prefix_len == prefix_len) && (mv[i].is_ipv6 == is_ipv6)
+ && (0 == memcmp (mv[i].mac_mask, mac_mask, 6)))
+ {
+ return i;
+ }
+ }
+ }
+ return ~0;
+}
+
+
+/* Get metric used to sort match types.
+ The more specific and the more often seen - the bigger the metric */
+static int
+match_type_metric (macip_match_type_t * m)
+{
+ unsigned int mac_bits_set = 0;
+ unsigned int mac_byte;
+ int i;
+ for (i=0; i<6; i++)
+ {
+ mac_byte = m->mac_mask[i];
+ for (; mac_byte; mac_byte >>= 1)
+ mac_bits_set += mac_byte & 1;
+ }
+ /*
+ * Attempt to place the more specific and the more used rules on top.
+ * There are obvious caveat corner cases to this, but they do not
+ * seem to be sensible in real world (e.g. specific IPv4 with wildcard MAC
+ * going with a wildcard IPv4 with a specific MAC).
+ */
+ return m->prefix_len + mac_bits_set + m->is_ipv6 + 10 * m->count;
+}
+
+static int
+match_type_compare (macip_match_type_t * m1, macip_match_type_t * m2)
+{
+ /* Ascending sort based on the metric values */
+ return match_type_metric (m1) - match_type_metric (m2);
+}
+
+/* Get the offset of L3 source within ethernet packet */
+static int
+get_l3_src_offset(int is6)
+{
+ if(is6)
+ return (sizeof(ethernet_header_t) + offsetof(ip6_header_t, src_address));
+ else
+ return (sizeof(ethernet_header_t) + offsetof(ip4_header_t, src_address));
+}
+
+static int
+macip_create_classify_tables (acl_main_t * am, u32 macip_acl_index)
+{
+ macip_match_type_t *mvec = NULL;
+ macip_match_type_t *mt;
+ macip_acl_list_t *a = pool_elt_at_index (am->macip_acls, macip_acl_index);
+ int i;
+ u32 match_type_index;
+ u32 last_table;
+ u8 mask[5 * 16];
+ vnet_classify_main_t *cm = &vnet_classify_main;
+
+ /* Count the number of different types of rules */
+ for (i = 0; i < a->count; i++)
+ {
+ if (~0 ==
+ (match_type_index =
+ macip_find_match_type (mvec, a->rules[i].src_mac_mask,
+ a->rules[i].src_prefixlen,
+ a->rules[i].is_ipv6)))
+ {
+ match_type_index = vec_len (mvec);
+ vec_validate (mvec, match_type_index);
+ memcpy (mvec[match_type_index].mac_mask,
+ a->rules[i].src_mac_mask, 6);
+ mvec[match_type_index].prefix_len = a->rules[i].src_prefixlen;
+ mvec[match_type_index].is_ipv6 = a->rules[i].is_ipv6;
+ mvec[match_type_index].table_index = ~0;
+ }
+ mvec[match_type_index].count++;
+ }
+ /* Put the most frequently used tables last in the list so we can create classifier tables in reverse order */
+ vec_sort_with_function (mvec, match_type_compare);
+ /* Create the classifier tables */
+ last_table = ~0;
+ /* First add ARP tables */
+ vec_foreach (mt, mvec)
+ {
+ int mask_len;
+ int is6 = mt->is_ipv6;
+
+ mt->arp_table_index = ~0;
+ if (!is6)
+ {
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], mt->mac_mask, 6);
+ memset (&mask[12], 0xff, 2); /* ethernet protocol */
+ memcpy (&mask[14 + 8], mt->mac_mask, 6);
+
+ for (i = 0; i < (mt->prefix_len / 8); i++)
+ mask[14 + 14 + i] = 0xff;
+ if (mt->prefix_len % 8)
+ mask[14 + 14 + (mt->prefix_len / 8)] = 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1);
+
+ mask_len = ((14 + 14 + ((mt->prefix_len+7) / 8) +
+ (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4);
+ acl_classify_add_del_table_small (cm, mask, mask_len, last_table,
+ (~0 == last_table) ? 0 : ~0, &mt->arp_table_index,
+ 1);
+ last_table = mt->arp_table_index;
+ }
+ }
+ /* Now add IP[46] tables */
+ vec_foreach (mt, mvec)
+ {
+ int mask_len;
+ int is6 = mt->is_ipv6;
+ int l3_src_offs = get_l3_src_offset(is6);
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], mt->mac_mask, 6);
+ for (i = 0; i < (mt->prefix_len / 8); i++)
+ {
+ mask[l3_src_offs + i] = 0xff;
+ }
+ if (mt->prefix_len % 8)
+ {
+ mask[l3_src_offs + (mt->prefix_len / 8)] =
+ 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1);
+ }
+ /*
+ * Round-up the number of bytes needed to store the prefix,
+ * and round up the number of vectors too
+ */
+ mask_len = ((l3_src_offs + ((mt->prefix_len+7) / 8) +
+ (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4);
+ acl_classify_add_del_table_small (cm, mask, mask_len, last_table,
+ (~0 == last_table) ? 0 : ~0, &mt->table_index,
+ 1);
+ last_table = mt->table_index;
+ }
+ a->ip4_table_index = last_table;
+ a->ip6_table_index = last_table;
+ a->l2_table_index = last_table;
+
+ /* Populate the classifier tables with rules from the MACIP ACL */
+ for (i = 0; i < a->count; i++)
+ {
+ u32 action = 0;
+ u32 metadata = 0;
+ int is6 = a->rules[i].is_ipv6;
+ int l3_src_offs = get_l3_src_offset(is6);
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], a->rules[i].src_mac, 6);
+ memset (&mask[12], 0xff, 2); /* ethernet protocol */
+ if (is6)
+ {
+ memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip6, 16);
+ mask[12] = 0x86;
+ mask[13] = 0xdd;
+ }
+ else
+ {
+ memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip4, 4);
+ mask[12] = 0x08;
+ mask[13] = 0x00;
+ }
+ match_type_index =
+ macip_find_match_type (mvec, a->rules[i].src_mac_mask,
+ a->rules[i].src_prefixlen,
+ a->rules[i].is_ipv6);
+ ASSERT(match_type_index != ~0);
+ /* add session to table mvec[match_type_index].table_index; */
+ vnet_classify_add_del_session (cm, mvec[match_type_index].table_index,
+ mask, a->rules[i].is_permit ? ~0 : 0, i,
+ 0, action, metadata, 1);
+ /* add ARP table entry too */
+ if (!is6 && (mvec[match_type_index].arp_table_index != ~0))
+ {
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], a->rules[i].src_mac, 6);
+ mask[12] = 0x08;
+ mask[13] = 0x06;
+ memcpy (&mask[14 + 8], a->rules[i].src_mac, 6);
+ memcpy (&mask[14 + 14], &a->rules[i].src_ip_addr.ip4, 4);
+ vnet_classify_add_del_session (cm, mvec[match_type_index].arp_table_index,
+ mask, a->rules[i].is_permit ? ~0 : 0, i,
+ 0, action, metadata, 1);
+ }
+ }
+ return 0;
+}
+
+static void
+macip_destroy_classify_tables (acl_main_t * am, u32 macip_acl_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ macip_acl_list_t *a = pool_elt_at_index (am->macip_acls, macip_acl_index);
+
+ if (a->ip4_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip4_table_index, 0);
+ a->ip4_table_index = ~0;
+ }
+ if (a->ip6_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip6_table_index, 0);
+ a->ip6_table_index = ~0;
+ }
+ if (a->l2_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->l2_table_index, 0);
+ a->l2_table_index = ~0;
+ }
+}
+
+static int
+macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[],
+ u32 * acl_list_index, u8 * tag)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *a;
+ macip_acl_rule_t *r;
+ macip_acl_rule_t *acl_new_rules = 0;
+ int i;
+
+ if (*acl_list_index != ~0)
+ {
+ /* They supplied some number, let's see if this MACIP ACL exists */
+ if (pool_is_free_index (am->macip_acls, *acl_list_index))
+ {
+ /* tried to replace a non-existent ACL, no point doing anything */
+ clib_warning("acl-plugin-error: Trying to replace nonexistent MACIP ACL %d (tag %s)", *acl_list_index, tag);
+ return -1;
+ }
+ }
+
+ if (0 == count) {
+ clib_warning("acl-plugin-warning: Trying to create empty MACIP ACL (tag %s)", tag);
+ }
+ void *oldheap = acl_set_heap(am);
+ /* Create and populate the rules */
+ if (count > 0)
+ vec_validate(acl_new_rules, count-1);
+
+ for (i = 0; i < count; i++)
+ {
+ r = &acl_new_rules[i];
+ r->is_permit = rules[i].is_permit;
+ r->is_ipv6 = rules[i].is_ipv6;
+ memcpy (&r->src_mac, rules[i].src_mac, 6);
+ memcpy (&r->src_mac_mask, rules[i].src_mac_mask, 6);
+ if(rules[i].is_ipv6)
+ memcpy (&r->src_ip_addr.ip6, rules[i].src_ip_addr, 16);
+ else
+ memcpy (&r->src_ip_addr.ip4, rules[i].src_ip_addr, 4);
+ r->src_prefixlen = rules[i].src_ip_prefix_len;
+ }
+
+ if (~0 == *acl_list_index)
+ {
+ /* Get ACL index */
+ pool_get_aligned (am->macip_acls, a, CLIB_CACHE_LINE_BYTES);
+ memset (a, 0, sizeof (*a));
+ /* Will return the newly allocated ACL index */
+ *acl_list_index = a - am->macip_acls;
+ }
+ else
+ {
+ a = pool_elt_at_index (am->macip_acls, *acl_list_index);
+ if (a->rules)
+ {
+ vec_free (a->rules);
+ }
+ macip_destroy_classify_tables (am, *acl_list_index);
+ }
+
+ a->rules = acl_new_rules;
+ a->count = count;
+ memcpy (a->tag, tag, sizeof (a->tag));
+
+ /* Create and populate the classifer tables */
+ macip_create_classify_tables (am, *acl_list_index);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+
+/* No check for validity of sw_if_index - the callers were supposed to validate */
+
+static int
+macip_acl_interface_del_acl (acl_main_t * am, u32 sw_if_index)
+{
+ int rv;
+ u32 macip_acl_index;
+ macip_acl_list_t *a;
+ void *oldheap = acl_set_heap(am);
+ vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0);
+ clib_mem_set_heap (oldheap);
+ macip_acl_index = am->macip_acl_by_sw_if_index[sw_if_index];
+ /* No point in deleting MACIP ACL which is not applied */
+ if (~0 == macip_acl_index)
+ return -1;
+ a = pool_elt_at_index (am->macip_acls, macip_acl_index);
+ /* remove the classifier tables off the interface L2 ACL */
+ rv =
+ vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index,
+ a->ip6_table_index, a->l2_table_index, 0);
+ /* Unset the MACIP ACL index */
+ am->macip_acl_by_sw_if_index[sw_if_index] = ~0;
+ return rv;
+}
+
+/* No check for validity of sw_if_index - the callers were supposed to validate */
+
+static int
+macip_acl_interface_add_acl (acl_main_t * am, u32 sw_if_index,
+ u32 macip_acl_index)
+{
+ macip_acl_list_t *a;
+ int rv;
+ if (pool_is_free_index (am->macip_acls, macip_acl_index))
+ {
+ return -1;
+ }
+ void *oldheap = acl_set_heap(am);
+ a = pool_elt_at_index (am->macip_acls, macip_acl_index);
+ vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0);
+ clib_mem_set_heap (oldheap);
+ /* If there already a MACIP ACL applied, unapply it */
+ if (~0 != am->macip_acl_by_sw_if_index[sw_if_index])
+ macip_acl_interface_del_acl(am, sw_if_index);
+ am->macip_acl_by_sw_if_index[sw_if_index] = macip_acl_index;
+
+ /* Apply the classifier tables for L2 ACLs */
+ rv =
+ vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index,
+ a->ip6_table_index, a->l2_table_index, 1);
+ return rv;
+}
+
+static int
+macip_acl_del_list (u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *a;
+ int i;
+ if (pool_is_free_index (am->macip_acls, acl_list_index))
+ {
+ return -1;
+ }
+
+ /* delete any references to the ACL */
+ for (i = 0; i < vec_len (am->macip_acl_by_sw_if_index); i++)
+ {
+ if (am->macip_acl_by_sw_if_index[i] == acl_list_index)
+ {
+ macip_acl_interface_del_acl (am, i);
+ }
+ }
+
+ void *oldheap = acl_set_heap(am);
+ /* Now that classifier tables are detached, clean them up */
+ macip_destroy_classify_tables (am, acl_list_index);
+
+ /* now we can delete the ACL itself */
+ a = pool_elt_at_index (am->macip_acls, acl_list_index);
+ if (a->rules)
+ {
+ vec_free (a->rules);
+ }
+ pool_put (am->macip_acls, a);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+
+static int
+macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add,
+ u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ int rv = -1;
+ if (is_add)
+ {
+ rv = macip_acl_interface_add_acl (am, sw_if_index, acl_list_index);
+ }
+ else
+ {
+ rv = macip_acl_interface_del_acl (am, sw_if_index);
+ }
+ return rv;
+}
+
+/*
+ * If the client does not allocate enough memory for a variable-length
+ * message, and then proceed to use it as if the full memory allocated,
+ * absent the check we happily consume that on the VPP side, and go
+ * along as if nothing happened. However, the resulting
+ * effects range from just garbage in the API decode
+ * (because the decoder snoops too far), to potential memory
+ * corruptions.
+ *
+ * This verifies that the actual length of the message is
+ * at least expected_len, and complains loudly if it is not.
+ *
+ * A failing check here is 100% a software bug on the API user side,
+ * so we might as well yell.
+ *
+ */
+static int verify_message_len(void *mp, u32 expected_len, char *where)
+{
+ u32 supplied_len = vl_msg_api_get_msg_length (mp);
+ if (supplied_len < expected_len) {
+ clib_warning("%s: Supplied message length %d is less than expected %d",
+ where, supplied_len, expected_len);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* API message handler */
+static void
+vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp)
+{
+ vl_api_acl_add_replace_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv;
+ u32 acl_list_index = ntohl (mp->acl_index);
+ u32 acl_count = ntohl (mp->count);
+ u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]);
+
+ if (verify_message_len(mp, expected_len, "acl_add_replace")) {
+ rv = acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag);
+ } else {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY,
+ ({
+ rmp->acl_index = htonl(acl_list_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_acl_del_t_handler (vl_api_acl_del_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_del_reply_t *rmp;
+ int rv;
+
+ rv = acl_del_list (ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_ACL_DEL_REPLY);
+}
+
+static void
+vl_api_acl_interface_add_del_t_handler (vl_api_acl_interface_add_del_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vl_api_acl_interface_add_del_reply_t *rmp;
+ int rv = -1;
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv =
+ acl_interface_add_del_inout_acl (sw_if_index, mp->is_add,
+ mp->is_input, ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_ACL_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_acl_interface_set_acl_list_t_handler
+ (vl_api_acl_interface_set_acl_list_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_interface_set_acl_list_reply_t *rmp;
+ int rv = 0;
+ int i;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ {
+ acl_interface_reset_inout_acls (sw_if_index, 0);
+ acl_interface_reset_inout_acls (sw_if_index, 1);
+
+ for (i = 0; i < mp->count; i++)
+ {
+ if(acl_is_not_defined(am, ntohl (mp->acls[i]))) {
+ /* ACL does not exist, so we can not apply it */
+ rv = -1;
+ }
+ }
+ if (0 == rv) {
+ for (i = 0; i < mp->count; i++)
+ {
+ acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input),
+ ntohl (mp->acls[i]));
+ }
+ }
+ }
+
+ REPLY_MACRO (VL_API_ACL_INTERFACE_SET_ACL_LIST_REPLY);
+}
+
+static void
+copy_acl_rule_to_api_rule (vl_api_acl_rule_t * api_rule, acl_rule_t * r)
+{
+ api_rule->is_permit = r->is_permit;
+ api_rule->is_ipv6 = r->is_ipv6;
+ if(r->is_ipv6)
+ {
+ memcpy (api_rule->src_ip_addr, &r->src, sizeof (r->src));
+ memcpy (api_rule->dst_ip_addr, &r->dst, sizeof (r->dst));
+ }
+ else
+ {
+ memcpy (api_rule->src_ip_addr, &r->src.ip4, sizeof (r->src.ip4));
+ memcpy (api_rule->dst_ip_addr, &r->dst.ip4, sizeof (r->dst.ip4));
+ }
+ api_rule->src_ip_prefix_len = r->src_prefixlen;
+ api_rule->dst_ip_prefix_len = r->dst_prefixlen;
+ api_rule->proto = r->proto;
+ api_rule->srcport_or_icmptype_first = htons (r->src_port_or_type_first);
+ api_rule->srcport_or_icmptype_last = htons (r->src_port_or_type_last);
+ api_rule->dstport_or_icmpcode_first = htons (r->dst_port_or_code_first);
+ api_rule->dstport_or_icmpcode_last = htons (r->dst_port_or_code_last);
+ api_rule->tcp_flags_mask = r->tcp_flags_mask;
+ api_rule->tcp_flags_value = r->tcp_flags_value;
+}
+
+static void
+send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q,
+ acl_list_t * acl, u32 context)
+{
+ vl_api_acl_details_t *mp;
+ vl_api_acl_rule_t *rules;
+ int i;
+ int msg_size = sizeof (*mp) + sizeof (mp->r[0]) * acl->count;
+ void *oldheap = acl_set_heap(am);
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ mp->count = htonl (acl->count);
+ mp->acl_index = htonl (acl - am->acls);
+ memcpy (mp->tag, acl->tag, sizeof (mp->tag));
+ // clib_memcpy (mp->r, acl->rules, acl->count * sizeof(acl->rules[0]));
+ rules = mp->r;
+ for (i = 0; i < acl->count; i++)
+ {
+ copy_acl_rule_to_api_rule (&rules[i], &acl->rules[i]);
+ }
+
+ clib_mem_set_heap (oldheap);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ u32 acl_index;
+ acl_list_t *acl;
+
+ int rv = -1;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->acl_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ /* Just dump all ACLs */
+ pool_foreach (acl, am->acls,
+ ({
+ send_acl_details(am, q, acl, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ acl_index = ntohl (mp->acl_index);
+ if (!pool_is_free_index (am->acls, acl_index))
+ {
+ acl = pool_elt_at_index (am->acls, acl_index);
+ send_acl_details (am, q, acl, mp->context);
+ }
+ }
+
+ if (rv == -1)
+ {
+ /* FIXME API: should we signal an error here at all ? */
+ return;
+ }
+}
+
+static void
+send_acl_interface_list_details (acl_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index, u32 context)
+{
+ vl_api_acl_interface_list_details_t *mp;
+ int msg_size;
+ int n_input;
+ int n_output;
+ int count;
+ int i = 0;
+ void *oldheap = acl_set_heap(am);
+
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+
+ n_input = vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ n_output = vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]);
+ count = n_input + n_output;
+
+ msg_size = sizeof (*mp);
+ msg_size += sizeof (mp->acls[0]) * count;
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->count = count;
+ mp->n_input = n_input;
+ for (i = 0; i < n_input; i++)
+ {
+ mp->acls[i] = htonl (am->input_acl_vec_by_sw_if_index[sw_if_index][i]);
+ }
+ for (i = 0; i < n_output; i++)
+ {
+ mp->acls[n_input + i] =
+ htonl (am->output_acl_vec_by_sw_if_index[sw_if_index][i]);
+ }
+ clib_mem_set_heap (oldheap);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t *
+ mp)
+{
+ acl_main_t *am = &acl_main;
+ vnet_sw_interface_t *swif;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+
+ u32 sw_if_index;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->sw_if_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ send_acl_interface_list_details(am, q, swif->sw_if_index, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ sw_if_index = ntohl (mp->sw_if_index);
+ if (!pool_is_free_index(im->sw_interfaces, sw_if_index))
+ send_acl_interface_list_details (am, q, sw_if_index, mp->context);
+ }
+}
+
+/* MACIP ACL API handlers */
+
+static void
+vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp)
+{
+ vl_api_macip_acl_add_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv;
+ u32 acl_list_index = ~0;
+ u32 acl_count = ntohl (mp->count);
+ u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]);
+
+ if (verify_message_len(mp, expected_len, "macip_acl_add")) {
+ rv = macip_acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag);
+ } else {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY,
+ ({
+ rmp->acl_index = htonl(acl_list_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_macip_acl_add_replace_t_handler (vl_api_macip_acl_add_replace_t * mp)
+{
+ vl_api_macip_acl_add_replace_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv;
+ u32 acl_list_index = ntohl (mp->acl_index);
+ u32 acl_count = ntohl (mp->count);
+ u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]);
+
+ if (verify_message_len(mp, expected_len, "macip_acl_add_replace")) {
+ rv = macip_acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag);
+ } else {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLACE_REPLY,
+ ({
+ rmp->acl_index = htonl(acl_list_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_macip_acl_del_t_handler (vl_api_macip_acl_del_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_macip_acl_del_reply_t *rmp;
+ int rv;
+
+ rv = macip_acl_del_list (ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_MACIP_ACL_DEL_REPLY);
+}
+
+static void
+vl_api_macip_acl_interface_add_del_t_handler
+ (vl_api_macip_acl_interface_add_del_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_macip_acl_interface_add_del_reply_t *rmp;
+ int rv = -1;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv =
+ macip_acl_interface_add_del_acl (ntohl (mp->sw_if_index), mp->is_add,
+ ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_MACIP_ACL_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+send_macip_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q,
+ macip_acl_list_t * acl, u32 context)
+{
+ vl_api_macip_acl_details_t *mp;
+ vl_api_macip_acl_rule_t *rules;
+ macip_acl_rule_t *r;
+ int i;
+ int msg_size = sizeof (*mp) + (acl ? sizeof (mp->r[0]) * acl->count : 0);
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ if (acl)
+ {
+ memcpy (mp->tag, acl->tag, sizeof (mp->tag));
+ mp->count = htonl (acl->count);
+ mp->acl_index = htonl (acl - am->macip_acls);
+ rules = mp->r;
+ for (i = 0; i < acl->count; i++)
+ {
+ r = &acl->rules[i];
+ rules[i].is_permit = r->is_permit;
+ rules[i].is_ipv6 = r->is_ipv6;
+ memcpy (rules[i].src_mac, &r->src_mac, sizeof (r->src_mac));
+ memcpy (rules[i].src_mac_mask, &r->src_mac_mask,
+ sizeof (r->src_mac_mask));
+ if (r->is_ipv6)
+ memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip6,
+ sizeof (r->src_ip_addr.ip6));
+ else
+ memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip4,
+ sizeof (r->src_ip_addr.ip4));
+ rules[i].src_ip_prefix_len = r->src_prefixlen;
+ }
+ }
+ else
+ {
+ /* No martini, no party - no ACL applied to this interface. */
+ mp->acl_index = ~0;
+ mp->count = 0;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *acl;
+
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->acl_index == ~0)
+ {
+ /* Just dump all ACLs for now, with sw_if_index = ~0 */
+ pool_foreach (acl, am->macip_acls, (
+ {
+ send_macip_acl_details (am, q, acl,
+ mp->
+ context);}
+ ));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ u32 acl_index = ntohl (mp->acl_index);
+ if (!pool_is_free_index (am->macip_acls, acl_index))
+ {
+ acl = pool_elt_at_index (am->macip_acls, acl_index);
+ send_macip_acl_details (am, q, acl, mp->context);
+ }
+ }
+}
+
+static void
+vl_api_macip_acl_interface_get_t_handler (vl_api_macip_acl_interface_get_t *
+ mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_macip_acl_interface_get_reply_t *rmp;
+ u32 count = vec_len (am->macip_acl_by_sw_if_index);
+ int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]) * count;
+ unix_shared_memory_queue_t *q;
+ int i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (msg_size);
+ memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_MACIP_ACL_INTERFACE_GET_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->count = htonl (count);
+ for (i = 0; i < count; i++)
+ {
+ rmp->acls[i] = htonl (am->macip_acl_by_sw_if_index[i]);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+send_macip_acl_interface_list_details (acl_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index,
+ u32 acl_index,
+ u32 context)
+{
+ vl_api_macip_acl_interface_list_details_t *rmp;
+ /* at this time there is only ever 1 mac ip acl per interface */
+ int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]);
+
+ rmp = vl_msg_api_alloc (msg_size);
+ memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ rmp->context = context;
+ rmp->count = 1;
+ rmp->sw_if_index = htonl (sw_if_index);
+ rmp->acls[0] = htonl (acl_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_macip_acl_interface_list_dump_t_handler (vl_api_macip_acl_interface_list_dump_t *mp)
+{
+ unix_shared_memory_queue_t *q;
+ acl_main_t *am = &acl_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ vec_foreach_index(sw_if_index, am->macip_acl_by_sw_if_index)
+ {
+ if (~0 != am->macip_acl_by_sw_if_index[sw_if_index])
+ {
+ send_macip_acl_interface_list_details(am, q, sw_if_index,
+ am->macip_acl_by_sw_if_index[sw_if_index],
+ mp->context);
+ }
+ }
+ }
+ else
+ {
+ if (vec_len(am->macip_acl_by_sw_if_index) > sw_if_index)
+ {
+ send_macip_acl_interface_list_details(am, q, sw_if_index,
+ am->macip_acl_by_sw_if_index[sw_if_index],
+ mp->context);
+ }
+ }
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+acl_plugin_api_hookup (vlib_main_t * vm)
+{
+ acl_main_t *am = &acl_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + am->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_acl_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <acl/acl_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (acl_main_t * am, api_main_t * apim)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (apim, #n "_" #crc, id + am->msg_id_base);
+ foreach_vl_msg_name_crc_acl;
+#undef _
+}
+
+static void
+acl_setup_fa_nodes (void)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ acl_main_t *am = &acl_main;
+ vlib_node_t *n, *n4, *n6;
+
+ n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify");
+ n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip4-l2");
+ n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip6-l2");
+
+
+ am->l2_input_classify_next_acl_ip4 =
+ vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0);
+ am->l2_input_classify_next_acl_ip6 =
+ vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0);
+
+ feat_bitmap_init_next_nodes (vm, n4->index, L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ am->fa_acl_in_ip4_l2_node_feat_next_node_index);
+
+ feat_bitmap_init_next_nodes (vm, n6->index, L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ am->fa_acl_in_ip6_l2_node_feat_next_node_index);
+
+
+ n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify");
+ n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip4-l2");
+ n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip6-l2");
+
+ am->l2_output_classify_next_acl_ip4 =
+ vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0);
+ am->l2_output_classify_next_acl_ip6 =
+ vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0);
+
+ feat_bitmap_init_next_nodes (vm, n4->index, L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ am->fa_acl_out_ip4_l2_node_feat_next_node_index);
+
+ feat_bitmap_init_next_nodes (vm, n6->index, L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ am->fa_acl_out_ip6_l2_node_feat_next_node_index);
+}
+
+static void
+acl_set_timeout_sec(int timeout_type, u32 value)
+{
+ acl_main_t *am = &acl_main;
+ clib_time_t *ct = &am->vlib_main->clib_time;
+
+ if (timeout_type < ACL_N_TIMEOUTS) {
+ am->session_timeout_sec[timeout_type] = value;
+ } else {
+ clib_warning("Unknown timeout type %d", timeout_type);
+ return;
+ }
+ am->session_timeout[timeout_type] = (u64)(((f64)value)/ct->seconds_per_clock);
+}
+
+static void
+acl_set_session_max_entries(u32 value)
+{
+ acl_main_t *am = &acl_main;
+ am->fa_conn_table_max_entries = value;
+}
+
+static int
+acl_set_skip_ipv6_eh(u32 eh, u32 value)
+{
+ acl_main_t *am = &acl_main;
+
+ if ((eh < 256) && (value < 2))
+ {
+ am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, eh, value);
+ return 1;
+ }
+ else
+ return 0;
+}
+
+
+static clib_error_t *
+acl_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ acl_main_t *am = &acl_main;
+ if (0 == am->acl_mheap) {
+ /* ACL heap is not initialized, so definitely nothing to do. */
+ return 0;
+ }
+ if (0 == is_add) {
+ vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+ ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index);
+ /* also unapply any ACLs in case the users did not do so. */
+ macip_acl_interface_del_acl(am, sw_if_index);
+ acl_interface_reset_inout_acls (sw_if_index, 0);
+ acl_interface_reset_inout_acls (sw_if_index, 1);
+ }
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (acl_sw_interface_add_del);
+
+
+
+static clib_error_t *
+acl_set_aclplugin_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ u32 timeout = 0;
+ u32 val = 0;
+ u32 eh_val = 0;
+ uword memory_size = 0;
+ acl_main_t *am = &acl_main;
+
+ if (unformat (input, "skip-ipv6-extension-header %u %u", &eh_val, &val)) {
+ if(!acl_set_skip_ipv6_eh(eh_val, val)) {
+ error = clib_error_return(0, "expecting eh=0..255, value=0..1");
+ }
+ goto done;
+ }
+ if (unformat (input, "use-hash-acl-matching %u", &val))
+ {
+ am->use_hash_acl_matching = (val !=0);
+ goto done;
+ }
+ if (unformat (input, "l4-match-nonfirst-fragment %u", &val))
+ {
+ am->l4_match_nonfirst_fragment = (val != 0);
+ goto done;
+ }
+ if (unformat (input, "heap"))
+ {
+ if (unformat(input, "main"))
+ {
+ if (unformat(input, "validate %u", &val))
+ acl_plugin_acl_set_validate_heap(am, val);
+ else if (unformat(input, "trace %u", &val))
+ acl_plugin_acl_set_trace_heap(am, val);
+ goto done;
+ }
+ else if (unformat(input, "hash"))
+ {
+ if (unformat(input, "validate %u", &val))
+ acl_plugin_hash_acl_set_validate_heap(am, val);
+ else if (unformat(input, "trace %u", &val))
+ acl_plugin_hash_acl_set_trace_heap(am, val);
+ goto done;
+ }
+ goto done;
+ }
+ if (unformat (input, "session")) {
+ if (unformat (input, "table")) {
+ /* The commands here are for tuning/testing. No user-serviceable parts inside */
+ if (unformat (input, "max-entries")) {
+ if (!unformat(input, "%u", &val)) {
+ error = clib_error_return(0,
+ "expecting maximum number of entries, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ acl_set_session_max_entries(val);
+ goto done;
+ }
+ }
+ if (unformat (input, "hash-table-buckets")) {
+ if (!unformat(input, "%u", &val)) {
+ error = clib_error_return(0,
+ "expecting maximum number of hash table buckets, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ am->fa_conn_table_hash_num_buckets = val;
+ goto done;
+ }
+ }
+ if (unformat (input, "hash-table-memory")) {
+ if (!unformat(input, "%U", unformat_memory_size, &memory_size)) {
+ error = clib_error_return(0,
+ "expecting maximum amount of hash table memory, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ am->fa_conn_table_hash_memory_size = memory_size;
+ goto done;
+ }
+ }
+ goto done;
+ }
+ if (unformat (input, "timeout")) {
+ if (unformat(input, "udp")) {
+ if(unformat(input, "idle")) {
+ if (!unformat(input, "%u", &timeout)) {
+ error = clib_error_return(0,
+ "expecting timeout value in seconds, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ acl_set_timeout_sec(ACL_TIMEOUT_UDP_IDLE, timeout);
+ goto done;
+ }
+ }
+ }
+ if (unformat(input, "tcp")) {
+ if(unformat(input, "idle")) {
+ if (!unformat(input, "%u", &timeout)) {
+ error = clib_error_return(0,
+ "expecting timeout value in seconds, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ acl_set_timeout_sec(ACL_TIMEOUT_TCP_IDLE, timeout);
+ goto done;
+ }
+ }
+ if(unformat(input, "transient")) {
+ if (!unformat(input, "%u", &timeout)) {
+ error = clib_error_return(0,
+ "expecting timeout value in seconds, got `%U`",
+ format_unformat_error, input);
+ goto done;
+ } else {
+ acl_set_timeout_sec(ACL_TIMEOUT_TCP_TRANSIENT, timeout);
+ goto done;
+ }
+ }
+ }
+ goto done;
+ }
+ }
+done:
+ return error;
+}
+
+static u8 *
+my_format_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+static inline u8 *
+my_macip_acl_rule_t_pretty_format (u8 *out, va_list *args)
+{
+ macip_acl_rule_t *a = va_arg (*args, macip_acl_rule_t *);
+
+ out = format(out, "%s action %d ip %U/%d mac %U mask %U",
+ a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit,
+ format_ip46_address, &a->src_ip_addr, IP46_TYPE_ANY,
+ a->src_prefixlen,
+ my_format_mac_address, a->src_mac,
+ my_format_mac_address, a->src_mac_mask);
+ return(out);
+}
+
+static void
+macip_acl_print(acl_main_t *am, u32 macip_acl_index)
+{
+ vlib_main_t * vm = am->vlib_main;
+ int i;
+
+ /* Don't try to print someone else's memory */
+ if (macip_acl_index > vec_len(am->macip_acls))
+ return;
+
+ macip_acl_list_t *a = vec_elt_at_index(am->macip_acls, macip_acl_index);
+ int free_pool_slot = pool_is_free_index(am->macip_acls, macip_acl_index);
+
+ vlib_cli_output(vm, "MACIP acl_index: %d, count: %d (true len %d) tag {%s} is free pool slot: %d\n",
+ macip_acl_index, a->count, vec_len(a->rules), a->tag, free_pool_slot);
+ vlib_cli_output(vm, " ip4_table_index %d, ip6_table_index %d, l2_table_index %d\n",
+ a->ip4_table_index, a->ip6_table_index, a->l2_table_index);
+ for(i=0; i<vec_len(a->rules); i++)
+ vlib_cli_output(vm, " rule %d: %U\n", i, my_macip_acl_rule_t_pretty_format,
+ vec_elt_at_index(a->rules, i));
+
+}
+
+static clib_error_t *
+acl_show_aclplugin_macip_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ acl_main_t *am = &acl_main;
+ int i;
+ if (unformat (input, "interface"))
+ {
+ for(i=0; i < vec_len(am->macip_acl_by_sw_if_index); i++)
+ {
+ vlib_cli_output(vm, " sw_if_index %d: %d\n", i, vec_elt(am->macip_acl_by_sw_if_index, i));
+ }
+ }
+ else if (unformat (input, "acl"))
+ {
+ for(i=0; i < vec_len(am->macip_acls); i++)
+ macip_acl_print(am, i);
+ }
+ return error;
+}
+
+
+static clib_error_t *
+acl_show_aclplugin_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ acl_main_t *am = &acl_main;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+ u32 *pj;
+
+ vnet_sw_interface_t *swif;
+
+ if (unformat (input, "sessions"))
+ {
+ u8 * out0 = format(0, "");
+ u16 wk;
+ u32 show_bihash_verbose = 0;
+ u32 show_session_thread_id = ~0;
+ u32 show_session_session_index = ~0;
+ unformat (input, "thread %u index %u", &show_session_thread_id, &show_session_session_index);
+ unformat (input, "verbose %u", &show_bihash_verbose);
+ {
+ u64 n_adds = am->fa_session_total_adds;
+ u64 n_dels = am->fa_session_total_dels;
+ out0 = format(out0, "Sessions total: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels);
+ }
+ out0 = format(out0, "\n\nPer-thread data:\n");
+ for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
+ out0 = format(out0, "Thread #%d:\n", wk);
+ if (show_session_thread_id == wk && show_session_session_index < pool_len(pw->fa_sessions_pool)) {
+ out0 = format(out0, " session index %u:\n", show_session_session_index);
+ fa_session_t *sess = pw->fa_sessions_pool + show_session_session_index;
+ u64 *m = (u64 *)&sess->info;
+ out0 = format(out0, " info: %016llx %016llx %016llx %016llx %016llx %016llx\n", m[0], m[1], m[2], m[3], m[4], m[5]);
+ out0 = format(out0, " sw_if_index: %u\n", sess->sw_if_index);
+ out0 = format(out0, " tcp_flags_seen: %x\n", sess->tcp_flags_seen.as_u16);
+ out0 = format(out0, " last active time: %lu\n", sess->last_active_time);
+ out0 = format(out0, " thread index: %u\n", sess->thread_index);
+ out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time);
+ out0 = format(out0, " link next index: %u\n", sess->link_next_idx);
+ out0 = format(out0, " link prev index: %u\n", sess->link_prev_idx);
+ out0 = format(out0, " link list id: %u\n", sess->link_list_id);
+ }
+ out0 = format(out0, " connection add/del stats:\n", wk);
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ u32 sw_if_index = swif->sw_if_index;
+ u64 n_adds = sw_if_index < vec_len(pw->fa_session_adds_by_sw_if_index) ? pw->fa_session_adds_by_sw_if_index[sw_if_index] : 0;
+ u64 n_dels = sw_if_index < vec_len(pw->fa_session_dels_by_sw_if_index) ? pw->fa_session_dels_by_sw_if_index[sw_if_index] : 0;
+ out0 = format(out0, " sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels);
+ }));
+
+ out0 = format(out0, " connection timeout type lists:\n", wk);
+ u8 tt = 0;
+ for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
+ u32 head_session_index = pw->fa_conn_list_head[tt];
+ out0 = format(out0, " fa_conn_list_head[%d]: %d\n", tt, head_session_index);
+ if (~0 != head_session_index) {
+ fa_session_t *sess = pw->fa_sessions_pool + head_session_index;
+ out0 = format(out0, " last active time: %lu\n", sess->last_active_time);
+ out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time);
+ }
+ }
+
+ out0 = format(out0, " Next expiry time: %lu\n", pw->next_expiry_time);
+ out0 = format(out0, " Requeue until time: %lu\n", pw->requeue_until_time);
+ out0 = format(out0, " Current time wait interval: %lu\n", pw->current_time_wait_interval);
+ out0 = format(out0, " Count of deleted sessions: %lu\n", pw->cnt_deleted_sessions);
+ out0 = format(out0, " Delete already deleted: %lu\n", pw->cnt_already_deleted_sessions);
+ out0 = format(out0, " Session timers restarted: %lu\n", pw->cnt_session_timer_restarted);
+ out0 = format(out0, " Swipe until this time: %lu\n", pw->swipe_end_time);
+ out0 = format(out0, " sw_if_index serviced bitmap: %U\n", format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
+ out0 = format(out0, " pending clear intfc bitmap : %U\n", format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap);
+ out0 = format(out0, " clear in progress: %u\n", pw->clear_in_process);
+ out0 = format(out0, " interrupt is pending: %d\n", pw->interrupt_is_pending);
+ out0 = format(out0, " interrupt is needed: %d\n", pw->interrupt_is_needed);
+ out0 = format(out0, " interrupt is unwanted: %d\n", pw->interrupt_is_unwanted);
+ out0 = format(out0, " interrupt generation: %d\n", pw->interrupt_generation);
+ }
+ out0 = format(out0, "\n\nConn cleaner thread counters:\n");
+#define _(cnt, desc) out0 = format(out0, " %20lu: %s\n", am->cnt, desc);
+ foreach_fa_cleaner_counter;
+#undef _
+ vec_terminate_c_string(out0);
+ vlib_cli_output(vm, "\n\n%s\n\n", out0);
+ vlib_cli_output(vm, "Interrupt generation: %d\n", am->fa_interrupt_generation);
+ vlib_cli_output(vm, "Sessions per interval: min %lu max %lu increment: %f ms current: %f ms",
+ am->fa_min_deleted_sessions_per_interval, am->fa_max_deleted_sessions_per_interval,
+ am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second);
+
+ vec_free(out0);
+ show_fa_sessions_hash(vm, show_bihash_verbose);
+ }
+ else if (unformat (input, "interface"))
+ {
+ u32 sw_if_index = ~0;
+ u32 swi;
+ u8 * out0 = format(0, "");
+ unformat (input, "sw_if_index %u", &sw_if_index);
+ for(swi = 0; (swi < vec_len(am->input_acl_vec_by_sw_if_index)) ||
+ (swi < vec_len(am->output_acl_vec_by_sw_if_index)); swi++) {
+ out0 = format(out0, "sw_if_index %d:\n", swi);
+
+ if ((swi < vec_len(am->input_acl_vec_by_sw_if_index)) &&
+ (vec_len(am->input_acl_vec_by_sw_if_index[swi]) > 0)) {
+ out0 = format(out0, " input acl(s): ");
+ vec_foreach(pj, am->input_acl_vec_by_sw_if_index[swi]) {
+ out0 = format(out0, "%d ", *pj);
+ }
+ out0 = format(out0, "\n");
+ }
+
+ if ((swi < vec_len(am->output_acl_vec_by_sw_if_index)) &&
+ (vec_len(am->output_acl_vec_by_sw_if_index[swi]) > 0)) {
+ out0 = format(out0, " output acl(s): ");
+ vec_foreach(pj, am->output_acl_vec_by_sw_if_index[swi]) {
+ out0 = format(out0, "%d ", *pj);
+ }
+ out0 = format(out0, "\n");
+ }
+
+ }
+ vec_terminate_c_string(out0);
+ vlib_cli_output(vm, "\n%s\n", out0);
+ vec_free(out0);
+ }
+ else if (unformat (input, "acl"))
+ {
+ u32 acl_index = ~0;
+ u32 i;
+ u8 * out0 = format(0, "");
+ unformat (input, "index %u", &acl_index);
+ for(i=0; i<vec_len(am->acls); i++) {
+ if (acl_is_not_defined(am, i)) {
+ /* don't attempt to show the ACLs that do not exist */
+ continue;
+ }
+ if ((acl_index != ~0) && (acl_index != i)) {
+ continue;
+ }
+ out0 = format(out0, "acl-index %u count %u tag {%s}\n", i, am->acls[i].count, am->acls[i].tag);
+ acl_rule_t *r;
+ int j;
+ for(j=0; j<am->acls[i].count; j++) {
+ r = &am->acls[i].rules[j];
+ out0 = format(out0, " %4d: %s ", j, r->is_ipv6 ? "ipv6" : "ipv4");
+ out0 = format_acl_action(out0, r->is_permit);
+ out0 = format(out0, " src %U/%d", format_ip46_address, &r->src, IP46_TYPE_ANY, r->src_prefixlen);
+ out0 = format(out0, " dst %U/%d", format_ip46_address, &r->dst, IP46_TYPE_ANY, r->dst_prefixlen);
+ out0 = format(out0, " proto %d", r->proto);
+ out0 = format(out0, " sport %d", r->src_port_or_type_first);
+ if (r->src_port_or_type_first != r->src_port_or_type_last) {
+ out0 = format(out0, "-%d", r->src_port_or_type_last);
+ }
+ out0 = format(out0, " dport %d", r->dst_port_or_code_first);
+ if (r->dst_port_or_code_first != r->dst_port_or_code_last) {
+ out0 = format(out0, "-%d", r->dst_port_or_code_last);
+ }
+ if (r->tcp_flags_mask || r->tcp_flags_value) {
+ out0 = format(out0, " tcpflags %d mask %d", r->tcp_flags_value, r->tcp_flags_mask);
+ }
+ out0 = format(out0, "\n");
+ }
+
+ if (i<vec_len(am->input_sw_if_index_vec_by_acl)) {
+ out0 = format(out0, " applied inbound on sw_if_index: ");
+ vec_foreach(pj, am->input_sw_if_index_vec_by_acl[i]) {
+ out0 = format(out0, "%d ", *pj);
+ }
+ out0 = format(out0, "\n");
+ }
+ if (i<vec_len(am->output_sw_if_index_vec_by_acl)) {
+ out0 = format(out0, " applied outbound on sw_if_index: ");
+ vec_foreach(pj, am->output_sw_if_index_vec_by_acl[i]) {
+ out0 = format(out0, "%d ", *pj);
+ }
+ out0 = format(out0, "\n");
+ }
+ }
+ vec_terminate_c_string(out0);
+ vlib_cli_output(vm, "\n%s\n", out0);
+ vec_free(out0);
+ }
+ else if (unformat (input, "memory"))
+ {
+ vlib_cli_output (vm, "ACL plugin main heap statistics:\n");
+ if (am->acl_mheap) {
+ vlib_cli_output (vm, " %U\n", format_mheap, am->acl_mheap, 1);
+ } else {
+ vlib_cli_output (vm, " Not initialized\n");
+ }
+ vlib_cli_output (vm, "ACL hash lookup support heap statistics:\n");
+ if (am->hash_lookup_mheap) {
+ vlib_cli_output (vm, " %U\n", format_mheap, am->hash_lookup_mheap, 1);
+ } else {
+ vlib_cli_output (vm, " Not initialized\n");
+ }
+ }
+ else if (unformat (input, "tables"))
+ {
+ ace_mask_type_entry_t *mte;
+ u32 acl_index = ~0;
+ u32 sw_if_index = ~0;
+ int show_acl_hash_info = 0;
+ int show_applied_info = 0;
+ int show_mask_type = 0;
+ int show_bihash = 0;
+ u32 show_bihash_verbose = 0;
+
+ if (unformat (input, "acl")) {
+ show_acl_hash_info = 1;
+ /* mask-type is handy to see as well right there */
+ show_mask_type = 1;
+ unformat (input, "index %u", &acl_index);
+ } else if (unformat (input, "applied")) {
+ show_applied_info = 1;
+ unformat (input, "sw_if_index %u", &sw_if_index);
+ } else if (unformat (input, "mask")) {
+ show_mask_type = 1;
+ } else if (unformat (input, "hash")) {
+ show_bihash = 1;
+ unformat (input, "verbose %u", &show_bihash_verbose);
+ }
+
+ if ( ! (show_mask_type || show_acl_hash_info || show_applied_info || show_bihash) ) {
+ /* if no qualifiers specified, show all */
+ show_mask_type = 1;
+ show_acl_hash_info = 1;
+ show_applied_info = 1;
+ show_bihash = 1;
+ }
+
+ if (show_mask_type) {
+ vlib_cli_output(vm, "Mask-type entries:");
+ /* *INDENT-OFF* */
+ pool_foreach(mte, am->ace_mask_type_pool,
+ ({
+ vlib_cli_output(vm, " %3d: %016llx %016llx %016llx %016llx %016llx %016llx refcount %d",
+ mte - am->ace_mask_type_pool,
+ mte->mask.kv.key[0], mte->mask.kv.key[1], mte->mask.kv.key[2],
+ mte->mask.kv.key[3], mte->mask.kv.key[4], mte->mask.kv.value, mte->refcount);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (show_acl_hash_info) {
+ u32 i,j;
+ u8 * out0 = format(0, "");
+ u64 *m;
+ out0 = format(out0, "Mask-ready ACL representations\n");
+ for (i=0; i< vec_len(am->hash_acl_infos); i++) {
+ if ((acl_index != ~0) && (acl_index != i)) {
+ continue;
+ }
+ hash_acl_info_t *ha = &am->hash_acl_infos[i];
+ out0 = format(out0, "acl-index %u bitmask-ready layout\n", i);
+ out0 = format(out0, " applied inbound on sw_if_index list: %U\n", format_vec32, ha->inbound_sw_if_index_list, "%d");
+ out0 = format(out0, " applied outbound on sw_if_index list: %U\n", format_vec32, ha->outbound_sw_if_index_list, "%d");
+ out0 = format(out0, " mask type index bitmap: %U\n", format_bitmap_hex, ha->mask_type_index_bitmap);
+ for(j=0; j<vec_len(ha->rules); j++) {
+ hash_ace_info_t *pa = &ha->rules[j];
+ m = (u64 *)&pa->match;
+ out0 = format(out0, " %4d: %016llx %016llx %016llx %016llx %016llx %016llx mask index %d acl %d rule %d action %d src/dst portrange not ^2: %d,%d\n",
+ j, m[0], m[1], m[2], m[3], m[4], m[5], pa->mask_type_index,
+ pa->acl_index, pa->ace_index, pa->action,
+ pa->src_portrange_not_powerof2, pa->dst_portrange_not_powerof2);
+ }
+ }
+ vec_terminate_c_string(out0);
+ vlib_cli_output(vm, "\n%s\n", out0);
+ vec_free(out0);
+ }
+
+ if (show_applied_info) {
+ u32 swi, j;
+ u8 * out0 = format(0, "");
+ out0 = format(out0, "Applied lookup entries for interfaces\n");
+
+ for(swi = 0; (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) ||
+ (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) ||
+ (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) ||
+ (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)); swi++) {
+ if ((sw_if_index != ~0) && (sw_if_index != swi)) {
+ continue;
+ }
+ out0 = format(out0, "sw_if_index %d:\n", swi);
+ if (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) {
+ applied_hash_acl_info_t *pal = &am->input_applied_hash_acl_info_by_sw_if_index[swi];
+ out0 = format(out0, " input lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap);
+ out0 = format(out0, " input applied acls: %U\n", format_vec32, pal->applied_acls, "%d");
+ }
+ if (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) {
+ out0 = format(out0, " input lookup applied entries:\n");
+ for(j=0; j<vec_len(am->input_hash_entry_vec_by_sw_if_index[swi]); j++) {
+ applied_hash_ace_entry_t *pae = &am->input_hash_entry_vec_by_sw_if_index[swi][j];
+ out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d hitcount %lld\n",
+ j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index,
+ pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index, pae->hitcount);
+ }
+ }
+
+ if (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) {
+ applied_hash_acl_info_t *pal = &am->output_applied_hash_acl_info_by_sw_if_index[swi];
+ out0 = format(out0, " output lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap);
+ out0 = format(out0, " output applied acls: %U\n", format_vec32, pal->applied_acls, "%d");
+ }
+ if (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)) {
+ out0 = format(out0, " output lookup applied entries:\n");
+ for(j=0; j<vec_len(am->output_hash_entry_vec_by_sw_if_index[swi]); j++) {
+ applied_hash_ace_entry_t *pae = &am->output_hash_entry_vec_by_sw_if_index[swi][j];
+ out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d hitcount %lld\n",
+ j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index,
+ pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index, pae->hitcount);
+ }
+ }
+
+ }
+ vec_terminate_c_string(out0);
+ vlib_cli_output(vm, "\n%s\n", out0);
+ vec_free(out0);
+ }
+
+ if (show_bihash) {
+ show_hash_acl_hash(vm, am, show_bihash_verbose);
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+acl_clear_aclplugin_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ acl_main_t *am = &acl_main;
+ vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+ ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, ~0);
+ return error;
+}
+
+ /* *INDENT-OFF* */
+VLIB_CLI_COMMAND (aclplugin_set_command, static) = {
+ .path = "set acl-plugin",
+ .short_help = "set acl-plugin session timeout {{udp idle}|tcp {idle|transient}} <seconds>",
+ .function = acl_set_aclplugin_fn,
+};
+
+VLIB_CLI_COMMAND (aclplugin_show_command, static) = {
+ .path = "show acl-plugin",
+ .short_help = "show acl-plugin {sessions|acl|interface|tables}",
+ .function = acl_show_aclplugin_fn,
+};
+
+VLIB_CLI_COMMAND (aclplugin_show_macip_command, static) = {
+ .path = "show acl-plugin macip",
+ .short_help = "show acl-plugin macip {acl|interface}",
+ .function = acl_show_aclplugin_macip_fn,
+};
+
+
+VLIB_CLI_COMMAND (aclplugin_clear_command, static) = {
+ .path = "clear acl-plugin sessions",
+ .short_help = "clear acl-plugin sessions",
+ .function = acl_clear_aclplugin_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+acl_plugin_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ acl_main_t *am = &acl_main;
+ u32 conn_table_hash_buckets;
+ u32 conn_table_hash_memory_size;
+ u32 conn_table_max_entries;
+ u32 main_heap_size;
+ u32 hash_heap_size;
+ u32 hash_lookup_hash_buckets;
+ u32 hash_lookup_hash_memory;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "connection hash buckets %d", &conn_table_hash_buckets))
+ am->fa_conn_table_hash_num_buckets = conn_table_hash_buckets;
+ else if (unformat (input, "connection hash memory %d",
+ &conn_table_hash_memory_size))
+ am->fa_conn_table_hash_memory_size = conn_table_hash_memory_size;
+ else if (unformat (input, "connection count max %d",
+ &conn_table_max_entries))
+ am->fa_conn_table_max_entries = conn_table_max_entries;
+ else if (unformat (input, "main heap size %d",
+ &main_heap_size))
+ am->acl_mheap_size = main_heap_size;
+ else if (unformat (input, "hash lookup heap size %d",
+ &hash_heap_size))
+ am->hash_lookup_mheap_size = hash_heap_size;
+ else if (unformat (input, "hash lookup hash buckets %d",
+ &hash_lookup_hash_buckets))
+ am->hash_lookup_hash_buckets = hash_lookup_hash_buckets;
+ else if (unformat (input, "hash lookup hash memory %d",
+ &hash_lookup_hash_memory))
+ am->hash_lookup_hash_memory = hash_lookup_hash_memory;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+VLIB_CONFIG_FUNCTION (acl_plugin_config, "acl-plugin");
+
+static clib_error_t *
+acl_init (vlib_main_t * vm)
+{
+ acl_main_t *am = &acl_main;
+ clib_error_t *error = 0;
+ memset (am, 0, sizeof (*am));
+ am->vlib_main = vm;
+ am->vnet_main = vnet_get_main ();
+
+ u8 *name = format (0, "acl_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ am->msg_id_base = vl_msg_api_get_msg_ids ((char *) name,
+ VL_MSG_FIRST_AVAILABLE);
+
+ error = acl_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (am, &api_main);
+
+ vec_free (name);
+
+ acl_setup_fa_nodes();
+
+ am->acl_mheap_size = ACL_FA_DEFAULT_HEAP_SIZE;
+ am->hash_lookup_mheap_size = ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE;
+
+ am->hash_lookup_hash_buckets = ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS;
+ am->hash_lookup_hash_memory = ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY;
+
+ am->session_timeout_sec[ACL_TIMEOUT_TCP_TRANSIENT] = TCP_SESSION_TRANSIENT_TIMEOUT_SEC;
+ am->session_timeout_sec[ACL_TIMEOUT_TCP_IDLE] = TCP_SESSION_IDLE_TIMEOUT_SEC;
+ am->session_timeout_sec[ACL_TIMEOUT_UDP_IDLE] = UDP_SESSION_IDLE_TIMEOUT_SEC;
+
+ am->fa_conn_table_hash_num_buckets = ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+ am->fa_conn_table_hash_memory_size = ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+ am->fa_conn_table_max_entries = ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vec_validate(am->per_worker_data, tm->n_vlib_mains-1);
+ {
+ u16 wk;
+ u8 tt;
+ for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
+ vec_validate(pw->fa_conn_list_head, ACL_N_TIMEOUTS-1);
+ vec_validate(pw->fa_conn_list_tail, ACL_N_TIMEOUTS-1);
+ for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
+ pw->fa_conn_list_head[tt] = ~0;
+ pw->fa_conn_list_tail[tt] = ~0;
+ }
+ }
+ }
+
+ am->fa_min_deleted_sessions_per_interval = ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL;
+ am->fa_max_deleted_sessions_per_interval = ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL;
+ am->fa_cleaner_wait_time_increment = ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT;
+
+ am->fa_cleaner_cnt_delete_by_sw_index = 0;
+ am->fa_cleaner_cnt_delete_by_sw_index_ok = 0;
+ am->fa_cleaner_cnt_unknown_event = 0;
+ am->fa_cleaner_cnt_timer_restarted = 0;
+ am->fa_cleaner_cnt_wait_with_timeout = 0;
+
+
+#define _(N, v, s) am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, v, 1);
+ foreach_acl_eh
+#undef _
+
+ am->l4_match_nonfirst_fragment = 1;
+
+ /* use the new fancy hash-based matching */
+ am->use_hash_acl_matching = 1;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (acl_init);
diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h
new file mode 100644
index 00000000..bed22e5f
--- /dev/null
+++ b/src/plugins/acl/acl.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_acl_h
+#define included_acl_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_output.h>
+
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/bihash_40_8.h>
+
+#include "fa_node.h"
+#include "hash_lookup_types.h"
+
+#define ACL_PLUGIN_VERSION_MAJOR 1
+#define ACL_PLUGIN_VERSION_MINOR 3
+
+#define UDP_SESSION_IDLE_TIMEOUT_SEC 600
+#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24)
+#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120
+
+#define ACL_FA_DEFAULT_HEAP_SIZE (2 << 29)
+
+#define ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE (2 << 25)
+#define ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS 65536
+#define ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY (2 << 25)
+
+extern vlib_node_registration_t acl_in_node;
+extern vlib_node_registration_t acl_out_node;
+
+void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap);
+void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap);
+
+enum acl_timeout_e {
+ ACL_TIMEOUT_UDP_IDLE = 0,
+ ACL_TIMEOUT_TCP_IDLE,
+ ACL_TIMEOUT_TCP_TRANSIENT,
+ ACL_N_TIMEOUTS
+};
+
+
+enum address_e { IP4, IP6 };
+typedef struct
+{
+ enum address_e type;
+ union {
+ ip6_address_t ip6;
+ ip4_address_t ip4;
+ } addr;
+} address_t;
+
+/*
+ * ACL rules
+ */
+typedef struct
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ ip46_address_t src;
+ u8 src_prefixlen;
+ ip46_address_t dst;
+ u8 dst_prefixlen;
+ u8 proto;
+ u16 src_port_or_type_first;
+ u16 src_port_or_type_last;
+ u16 dst_port_or_code_first;
+ u16 dst_port_or_code_last;
+ u8 tcp_flags_value;
+ u8 tcp_flags_mask;
+} acl_rule_t;
+
+typedef struct
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ u8 src_mac[6];
+ u8 src_mac_mask[6];
+ ip46_address_t src_ip_addr;
+ u8 src_prefixlen;
+} macip_acl_rule_t;
+
+/*
+ * ACL
+ */
+typedef struct
+{
+ u8 tag[64];
+ u32 count;
+ acl_rule_t *rules;
+} acl_list_t;
+
+typedef struct
+{
+ u8 tag[64];
+ u32 count;
+ macip_acl_rule_t *rules;
+ /* References to the classifier tables that will enforce the rules */
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u32 l2_table_index;
+} macip_acl_list_t;
+
+/*
+ * An element describing a particular configuration fo the mask,
+ * and how many times it has been used.
+ */
+typedef struct
+{
+ fa_5tuple_t mask;
+ u32 refcount;
+} ace_mask_type_entry_t;
+
+typedef struct {
+ /* mheap to hold all the ACL module related allocations, other than hash */
+ void *acl_mheap;
+ u32 acl_mheap_size;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ acl_list_t *acls; /* Pool of ACLs */
+ hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */
+ clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */
+ u32 hash_lookup_hash_buckets;
+ u32 hash_lookup_hash_memory;
+
+ /* mheap to hold all the miscellaneous allocations related to hash-based lookups */
+ void *hash_lookup_mheap;
+ u32 hash_lookup_mheap_size;
+ int acl_lookup_hash_initialized;
+ applied_hash_ace_entry_t **input_hash_entry_vec_by_sw_if_index;
+ applied_hash_ace_entry_t **output_hash_entry_vec_by_sw_if_index;
+ applied_hash_acl_info_t *input_applied_hash_acl_info_by_sw_if_index;
+ applied_hash_acl_info_t *output_applied_hash_acl_info_by_sw_if_index;
+
+ macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */
+
+ /* ACLs associated with interfaces */
+ u32 **input_acl_vec_by_sw_if_index;
+ u32 **output_acl_vec_by_sw_if_index;
+
+ /* interfaces on which given ACLs are applied */
+ u32 **input_sw_if_index_vec_by_acl;
+ u32 **output_sw_if_index_vec_by_acl;
+
+ /* Total count of interface+direction pairs enabled */
+ u32 fa_total_enabled_count;
+
+ /* Do we use hash-based ACL matching or linear */
+ int use_hash_acl_matching;
+
+ /* a pool of all mask types present in all ACEs */
+ ace_mask_type_entry_t *ace_mask_type_pool;
+
+ /*
+ * Classify tables used to grab the packets for the ACL check,
+ * and serving as the 5-tuple session tables at the same time
+ */
+ u32 *acl_ip4_input_classify_table_by_sw_if_index;
+ u32 *acl_ip6_input_classify_table_by_sw_if_index;
+ u32 *acl_ip4_output_classify_table_by_sw_if_index;
+ u32 *acl_ip6_output_classify_table_by_sw_if_index;
+
+ /* MACIP (input) ACLs associated with the interfaces */
+ u32 *macip_acl_by_sw_if_index;
+
+ /* bitmaps when set the processing is enabled on the interface */
+ uword *fa_in_acl_on_sw_if_index;
+ uword *fa_out_acl_on_sw_if_index;
+ /* bihash holding all of the sessions */
+ int fa_sessions_hash_is_initialized;
+ clib_bihash_40_8_t fa_sessions_hash;
+ /* The process node which orcherstrates the cleanup */
+ u32 fa_cleaner_node_index;
+ /* FA session timeouts, in seconds */
+ u32 session_timeout_sec[ACL_N_TIMEOUTS];
+ /* total session adds/dels */
+ u64 fa_session_total_adds;
+ u64 fa_session_total_dels;
+
+ /* L2 datapath glue */
+
+ /* next indices within L2 classifiers for ip4/ip6 fa L2 nodes */
+ u32 l2_input_classify_next_acl_ip4;
+ u32 l2_input_classify_next_acl_ip6;
+ u32 l2_output_classify_next_acl_ip4;
+ u32 l2_output_classify_next_acl_ip6;
+ /* next node indices for L2 dispatch */
+ u32 fa_acl_in_ip4_l2_node_feat_next_node_index[32];
+ u32 fa_acl_in_ip6_l2_node_feat_next_node_index[32];
+ u32 fa_acl_out_ip4_l2_node_feat_next_node_index[32];
+ u32 fa_acl_out_ip6_l2_node_feat_next_node_index[32];
+
+ /* EH values that we can skip over */
+ uword *fa_ipv6_known_eh_bitmap;
+
+ /* whether to match L4 ACEs with ports on the non-initial fragment */
+ int l4_match_nonfirst_fragment;
+
+ /* conn table per-interface conn table parameters */
+ u32 fa_conn_table_hash_num_buckets;
+ uword fa_conn_table_hash_memory_size;
+ u64 fa_conn_table_max_entries;
+
+ /*
+ * If the cleaner has to delete more than this number
+ * of connections, it halves the sleep time.
+ */
+
+#define ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL 100
+ u64 fa_max_deleted_sessions_per_interval;
+
+ /*
+ * If the cleaner deletes less than these connections,
+ * it increases the wait time by the "increment"
+ */
+
+#define ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL 1
+ u64 fa_min_deleted_sessions_per_interval;
+
+#define ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT 0.1
+ f64 fa_cleaner_wait_time_increment;
+
+ u64 fa_current_cleaner_timer_wait_interval;
+
+ int fa_interrupt_generation;
+
+ /* per-worker data related t conn management */
+ acl_fa_per_worker_data_t *per_worker_data;
+
+ /* Configured session timeout */
+ u64 session_timeout[ACL_N_TIMEOUTS];
+
+
+ /* Counters for the cleaner thread */
+
+#define foreach_fa_cleaner_counter \
+ _(fa_cleaner_cnt_delete_by_sw_index, "delete_by_sw_index events") \
+ _(fa_cleaner_cnt_delete_by_sw_index_ok, "delete_by_sw_index handled ok") \
+ _(fa_cleaner_cnt_unknown_event, "unknown events received") \
+ _(fa_cleaner_cnt_timer_restarted, "session idle timers restarted") \
+ _(fa_cleaner_cnt_wait_with_timeout, "event wait with timeout called") \
+ _(fa_cleaner_cnt_wait_without_timeout, "event wait w/o timeout called") \
+ _(fa_cleaner_cnt_event_cycles, "total event cycles") \
+/* end of counters */
+#define _(id, desc) u32 id;
+ foreach_fa_cleaner_counter
+#undef _
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} acl_main_t;
+
+#define foreach_acl_eh \
+ _(HOPBYHOP , 0 , "IPv6ExtHdrHopByHop") \
+ _(ROUTING , 43 , "IPv6ExtHdrRouting") \
+ _(DESTOPT , 60 , "IPv6ExtHdrDestOpt") \
+ _(FRAGMENT , 44 , "IPv6ExtHdrFragment") \
+ _(MOBILITY , 135, "Mobility Header") \
+ _(HIP , 139, "Experimental use Host Identity Protocol") \
+ _(SHIM6 , 140, "Shim6 Protocol") \
+ _(EXP1 , 253, "Use for experimentation and testing") \
+ _(EXP2 , 254, "Use for experimentation and testing")
+
+/*
+
+ "No Next Header" is not a header.
+ Also, Fragment header needs special processing.
+
+ _(NONEXT , 59 , "NoNextHdr") \
+
+
+ESP is hiding its internal format, so no point in trying to go past it.
+
+ _(ESP , 50 , "EncapsulatingSecurityPayload") \
+
+
+AH has a special treatment of its length, it is in 32-bit words, not 64-bit words like the rest.
+
+ _(AUTH , 51 , "Authentication Header") \
+
+
+*/
+
+
+ typedef enum {
+ #define _(N, v, s) ACL_EH_##N = v,
+ foreach_acl_eh
+ #undef _
+ } acl_eh_t;
+
+
+
+extern acl_main_t acl_main;
+
+
+#endif
diff --git a/src/plugins/acl/acl_all_api_h.h b/src/plugins/acl/acl_all_api_h.h
new file mode 100644
index 00000000..cb781cfd
--- /dev/null
+++ b/src/plugins/acl/acl_all_api_h.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <acl/acl.api.h>
+
+#ifdef vl_printfun
+#include <acl/manual_fns.h>
+#endif
+
diff --git a/src/plugins/acl/acl_hash_lookup_doc.md b/src/plugins/acl/acl_hash_lookup_doc.md
new file mode 100644
index 00000000..cb93df04
--- /dev/null
+++ b/src/plugins/acl/acl_hash_lookup_doc.md
@@ -0,0 +1,241 @@
+ACL plugin constant-time lookup design {#acl_hash_lookup}
+======================================
+
+The initial implementation of ACL plugin performs a trivial for() cycle,
+going through the assigned ACLs on a per-packet basis. This is not very
+efficient, even if for very short ACLs due to its simplicity it can beat
+more advanced methods.
+
+However, to cover the case of longer ACLs with acceptable performance,
+we need to have a better way of matching. This write-up proposes
+a mechanism to make a lookup from O(M) where M is number of entries
+to O(N) where N is number of different mask combinations.
+
+Preparation of ACL(s)
+---------------------
+
+The ACL plugin will maintain a global list of "mask types", i.e. the specific
+configurations of "do not care" bits within the ACEs.
+Upon the creation of a new ACL, a pass will be made through all the
+ACEs, to assign and possibly allocate the "mask type number".
+
+Each ACL has a structure *hash_acl_info_t* representing the "hash-based"
+parts of information related to that ACL, primarily the array of
+*hash_ace_info_t* structures - each of the members of that array
+corresponding to one of the rules (ACEs) in the original ACL,
+for this they have a pair of *(acl_index, ace_index)* to keep track,
+predominantly for the debugging.
+
+Why do we need a whole separate structure, and are not adding new fields
+to the existing rile structure ? First, encapsulation, to minimize
+the pollution of the main ACL code with the hash-based lookup artifacts.
+
+Second, one rule may correspond to more than one "hash-based" ACE.
+In fact, most of the rules do correspond to two of those. Why ?
+
+Consider that the current ACL lookup logic is that if a packet
+is not the initial fragment, and there is an L4 entry acting on the packet,
+the comparison will be made only on the L4 protocol field value rather
+than on the protocol and port values. This beaviour is governed by
+*l4_match_nonfirst_fragment* flag in the *acl_main*, and was needed to
+maintain the compatibility with the existing software switch implementation.
+
+While for the sequential check in *single_acl_match_5tuple()*
+it is very easy to implement by just breaking out at the right moment,
+in case of hash-based matching this cost us two checks:
+one on full 5-tuple and the flag *pkt.is_nonfirst_fragment* being zero,
+the second on 3-tuple and the flag *pkt.is_nonfirst_fragment* being one,
+with the second check triggered by the *acl_main.l4_match_nonfirst_fragment*
+setting being the default 1. This dictates the necessity of having a "match"
+field in a given *hash_ace_info_t* element, which would reflect the value
+we are supposed to match after applying the mask.
+
+There can be other circumstances when it might be beneficial to expand
+the given rule in the original ACL into multiple - for example, as an
+optimization within the port range handling for small port ranges
+(this is not done as of the time of writing).
+
+Assigning ACLs to an interface
+------------------------------
+
+Once the ACL list is assigned to an interface, or, rather, a new ACL
+is added to the list of the existing ACLs applied to the interface,
+we need to update the bihash accelerating the lookup.
+
+All the entries for the lookups are stored within a single *48_8* bihash,
+which captures the 5-tuple from the packet as well as the miscellaneous
+per-packet information flags, e.g. *l4_valid*, *is_non_first_fragment*,
+and so on. To facilitate the use of the single bihash by all the interfaces,
+the *is_ip6*, *is_input*, *sw_if_index* are part of the key,
+as well as *mask_type_index* - the latter being necessary because
+there can be entries with the same value but different masks, e.g.:
+`permit ::/0, permit::/128`.
+
+At the moment of an ACL being applied to an interface, we need to
+walk the list of *hash_ace_info_t* entries corresponding to that ACL,
+and update the bihash with the keys corresponding to the match
+values in these entries.
+
+The value of the hash match contains the index into a per-*sw_if_index* vector
+of *applied_ace_hash_entry_t* elements, as well as a couple of flags:
+*shadowed* (optimization: if this flag on a matched entry is zero, means
+we can stop the lookup early and declare a match - see below),
+and *need_portrange_check* - meaning that what matched was a superset
+of the actual match, and we need to perform an extra check.
+
+Also, upon insertion, we must keep in mind there can be
+multiple *applied_ace_hash_entry_t* for the same key and must keep
+a list of those. This is necessary to incrementally apply/unapply
+the ACLs as part of the ACL vector: say, two ACLs have
+"permit 2001:db8::1/128 any" - we should be able to retain the entry
+for the second ACL even if we have deleted the first one.
+Also, in case there are two entries with the same key but
+different port ranges, say 0..42 and 142..65535 - we need
+to be able to sequentially match on those if we decide not
+to expand them into individual port-specific entries.
+
+Per-packet lookup
+-----------------
+
+The simple single-packet lookup is defined in
+*multi_acl_match_get_applied_ace_index*, which returns the index
+of the applied hash ACE if there was a match, or ~0 if there wasn't.
+
+The future optimized per-packet lookup may be batched in three phases:
+
+1. Prepare the keys in the per-worker vector by doing logical AND of
+ original 5-tuple record with the elements of the mask vector.
+2. Lookup the keys in the bihash in a batch manner, collecting the
+ result with lowest u64 (acl index within vector, ACE index) from
+ the hash lookup value, and performing the list walk if necessary
+ (for portranges)
+3. Take the action from the ACL record as defined by (ACL#, ACE#) from the
+ resulting lookup winner, or, if no match found, then perform default deny.
+
+Shadowed/independent/redundant ACEs
+------------------------------------
+
+During the phase of combining multiple ACLs into one rulebase, when they
+are applied to interface, we also can perform several optimizations.
+
+If a given ACE is a strict subset of another ACE located up in the linear
+search order, we can ignore this ACE completely - because by definition
+it will never match. We will call such an ACE *redundant*. Here is an example:
+
+```
+permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+```
+
+A bit more formally, we can define this relationship of an ACE A to ACE B as:
+
+```
+redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA)
+ && contains(dstB, dstA) && is_after(A, B))
+```
+
+Here as "contains" we define an operation operating on the sets defined by
+the protocol, (srcIP, srcPortDefinition) and (dstIP, dstPortDefinition)
+respectively, and returning true if all the elements represented by
+the second argument are represented by the first argument. The "is_after"
+is true if A is located below B in the ruleset.
+
+If a given ACE does not intersect at all with any other ACE
+in front of it, we can mark it as such.
+
+Then during the sequence of the lookups the successful hit on this ACE means
+we do not need to look up other mask combinations - thus potentially
+significantly speeding up the match process. Here is an example,
+assuming we have the following ACL:
+
+```
+permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+deny 2001:db8:3::/48 2001:db8:2:1::/64 (A)
+```
+
+In this case if we match the second entry, we do not need to check whether
+we have matched the first one - the source addresses are completely
+different. We call such an ACE *independent* from another.
+
+We can define this as
+
+```
+independent(aceA, aceB) := (!intersect(protoA, protoB) ||
+ !intersect(srcA, srcB) ||
+ !intersect(dstA, dstB))
+```
+
+where intersect is defined as operation returning true if there are
+elements belonging to the sets of both arguments.
+
+If the entry A is neither redundant nor independent from B, and is below
+B in the ruleset, we call such an entry *shadowed* by B, here is an example:
+
+```
+deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B)
+permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+```
+
+This means the earlier rule "carves out" a subset of A, thus leaving
+a "shadow". (Evidently, the action needs to be different for the shadow
+to have an effect, but for for the terminology sake we do not care).
+
+The more formal definition:
+
+```
+shadowed(aceA, aceB) := !redundante(aceA, aceB) &&
+ !independent(aceA, aceB) &&
+ is_after(aceA, aceB)
+```
+
+Using this terminology, any ruleset can be represented as
+a DAG (Directed Acyclic Graph), with the bottom being the implicit
+"deny any", pointing to the set of rules shadowing it or the ones
+it is redundant for.
+
+These rules may in turn be shadowing each other. There is no cycles in
+this graph because of the natural order of the rules - the rule located
+closer to the end of the ruleset can never shadow or make redundant a rule
+higher up.
+
+The optimization that enables can allow for is to skip matching certain
+masks on a per-lookup basis - if a given rule has matched,
+the only adjustments that can happen is the match with one of
+the shadowing rules.
+
+Also, another avenue for the optimization can be starting the lookup process
+with the mask type that maximizes the chances of the independent ACE match,
+thus resulting in an ACE lookup being a single hash table hit.
+
+
+Plumbing
+--------
+
+All the new routines are located in a separate file,
+so we can cleanly experiment with a different approach if this
+does not fit all of the use cases.
+
+The constant-time lookup within the data path has the API with
+the same signature as:
+
+```
+u8
+multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+```
+
+There should be a new upper-level function with the same signature, which
+will make a decision whether to use a linear lookup, or to use the
+constant-time lookup implemented by this work, or to add some other
+optimizations (e.g. by keeping the cache of the last N lookups).
+
+The calls to the routine doing preparatory work should happen
+in `acl_add_list()` after creating the linear-lookup structures,
+and the routine doing the preparatory work populating the hashtable
+should be called from `acl_interface_add_del_inout_acl()` or its callees.
+
+The initial implementation will be geared towards looking up a single
+match at a time, with the subsequent optimizations possible to make
+the lookup for more than one packet.
+
diff --git a/src/plugins/acl/acl_msg_enum.h b/src/plugins/acl/acl_msg_enum.h
new file mode 100644
index 00000000..14d8b48c
--- /dev/null
+++ b/src/plugins/acl/acl_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_acl_msg_enum_h
+#define included_acl_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <acl/acl_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif
diff --git a/src/plugins/acl/acl_multicore_doc.md b/src/plugins/acl/acl_multicore_doc.md
new file mode 100644
index 00000000..b2cf7b9c
--- /dev/null
+++ b/src/plugins/acl/acl_multicore_doc.md
@@ -0,0 +1,349 @@
+Multicore support for ACL plugin {#acl_multicore}
+================================
+
+This captures some considerations and design decisions that I have made,
+both for my own memory later on ("what the hell was I thinking?!?"),
+and for anyone interested to criticize/improve/hack on this code.
+
+One of the factors taken into account while making these decisions,
+was the relative emphasis on the multi-thread vs. single-thread
+use cases: the latter is the vastly more prevalent. But,
+one can not optimize the single-thread performance without
+having a functioning code for multi-thread.
+
+stateless ACLs
+==============
+
+The stateless trivially parallelizes, and the only potential for the
+race between the different threads is during the reconfiguration,
+at the time of replacing the old ACL being checked, with
+the new ACL.
+
+In case an acl_add_replace is being used to replace the rules
+within the existing entry, a reallocation of `am->acls[X].rules`
+vector will happen and potentially a change in count.
+
+acl_match_5tuple() has the following code:
+
+```{.c}
+ a = am->acls + acl_index;
+ for (i = 0; i < a->count; i++)
+ {
+ r = a->rules + i;
+ . . .
+```
+
+Ideally we should be immune from a->rules changing,
+but the problem arises if the count changes in flight,
+and the new ruleset is smaller - then we will attempt
+to "match" against the free memory.
+
+This can(?) be solved by replacing the for() with while(),
+so the comparison happens at each iteration.
+
+full_acl_match_5tuple(), which iterates over the list
+of ACLs, is a bit less immune, since it takes the pointer
+to the vector to iterate and keeps a local copy of
+that pointer.
+
+This race can be solved by checking the
+current pointer to the vector with the source pointer,
+and seeing if there is an (unlikely) change, and if
+there is, return the "deny" action, or, better,
+restart the check.
+
+Since the check reloads the ACL list on a per-packet basis,
+there is only a window of opportunity of one packet to
+"match" packet against an incorrect rule set.
+The workers also do not change anything, only read.
+Therefore, it looks like building special structures
+to ensure that it does not happen at all might be not
+worth it.
+
+At least not until we have a unit-test able to
+reliably catch this condition and test that
+the measures applied are effective. Adding the code
+which is not possible to exercise is worse than
+not adding any code at all.
+
+So, I opt for "do-nothing" here for the moment.
+
+reflexive ACLs: single-thread
+=============================
+
+Before we talk multi-thread, is worth revisiting the
+design of the reflexive ACLs in the plugin, and
+the history of their evolution.
+
+The very first version of the ACL plugin, shipped in
+1701, mostly did the job using the existing components
+and gluing them together. Because it needed to work
+in bridged forwarding path only, using L2 classifier
+as an insertion point appeared natural, also L2 classifier,
+being a table with sessions, seemed like a good place
+to hold the sessions.
+
+So, the original design had two conceptual nodes:
+one, pointed by the next_miss from the L2 classifier table,
+was checking the actual ACL, and inserting session into
+the L2 classifier table, and the other one, pointed
+to by the next_match within the specific session rule,
+was checking the existing session. The timing out
+of the existing connections was done in the datapath,
+by periodically calling the aging function.
+
+This decision to use the existing components,
+with its attrativeness, did bring a few limitations as well:
+
+* L2 classifier is a simple mask-and-value match, with
+a fixed mask across the table. So, sanely supporting IPv6
+packets with extension headers in that framework was impossible.
+
+* There is no way to get a backpressure from L2 classifier
+depending on memory usage. When it runs out of memory,
+it simply crashes the box. When it runs out of memory ?
+We don't really know. Depends on how it allocates it.
+
+* Since we need to match the *reflected* traffic,
+we had to create *two* full session entries
+in two different directions, which is quite wasteful memory-wise.
+
+* (showstopper): the L2 classifier runs only in
+the bridged data path, so supporting routed data path
+would require creating something else entirely different,
+which would mean much more headaches support-wise going forward.
+
+Because of that, I have moved to a different model of
+creating a session-5-tuple from the packet data - once,
+and then doing all the matching just on that 5-tuple.
+
+This has allowed to add support for skipping IPv6 extension headers.
+
+Also, this new version started to store the sessions in a dedicated
+bihash-per-interface, with the session key data being
+aligned for the ingress packets, and being mirrored for the
+egress packets. This allows of significant savings in memory,
+because now we need to keep only one copy of the session table per
+interface instead of two, and also to only have ONE node for all the lookups,
+(L2/L3 path, in/out, IPv4/IPv6) - significantly reducing the code complexity.
+
+Unfortunately, bihash still has the "lack of backpressure" problem,
+in a sense that if you try to insert too many entries and run out
+of memory in the heap you supplied, you get a crash.
+
+To somewhat workaround against that, there is a "maximum tested number of sessions"
+value, which tracks the currently inserted sessions in the bihash,
+and if this number is being approached, a more aggressive cleanup
+can happen. If this number is reached, two behaviors are possible:
+
+* attempt to do the stateless ACL matching and permit the packet
+ if it succeeds
+
+* deny the packet
+
+Currently I have opted for a second one, since it allows for
+a better defined behavior, and if you have to permit
+the traffic in both directions, why using stateful anyway ?
+
+In order to be able to do the cleanup, we need to discriminate between
+the session types, with each session type having its own idle timeout.
+In order to do that, we keep three lists, defined in enum acl_timeout_e:
+ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT.
+
+The first one is hopefully obvious - it is just all UDP connections.
+They have an idle timeout of 600 seconds.
+
+The second and third is a bit more subtle. TCP is a complicated protocol,
+and we need to tread the fine line between doing too little and doing
+too much, and triggering the potential compatibility issues because of
+being a "middlebox".
+
+I decided to split the TCP connections into two classes:
+established, and everything else. "Established", means we have seen
+the SYN and ACK from both sides (with PUSH obviously masked out).
+This is the "active" state of any TCP connection and we would like
+to ensure we do not screw it up. So, the connections in this state
+have the default idle timer of 24 hours.
+
+All the rest of the connections have the idle timeout of 2 minutes,
+(inspired by an old value of MSL) and based on the observation
+that the states this class represent are usually very short lived.
+
+Once we have these three baskets of connections, it is trivial to
+imagine a simple cleanup mechanism to deal with this: take a
+TCP transient connection that has been hanging around.
+
+It is debatable whether we want to do discrimination between the
+different TCP transient connections. Assuming we do FIFO (and
+the lists allow us to do just that), it means a given connection
+on the head of the list has been hanging around for longest.
+Thus, if we are short on resources, we might just go ahead and
+reuse it within the datapath.
+
+This is where we are slowly approaching the question
+"Why in the world have not you used timer wheel or such ?"
+
+The answer is simple: within the above constraints, it does
+not buy me much.
+
+Also, timer wheel creates a leaky abstraction with a difficult
+to manage corner case. Which corner case ?
+
+We have a set of objects (sessions) with an event that may
+or may not happen (idle timeout timer firing), and a
+necessity to reset the idle timeout when there is
+activity on the session.
+
+In the worst case, where we had a 10000 of one-packet
+UDP sessions just created 10 minutes ago, we would need
+to deal with a spike of 10000 expired timers.
+
+Of course, if we have the active traffic on all
+of these 10000 connections, then we will not have
+to deal with that ? Right, but we will still have to deal
+with canceling and requeueing the timers.
+
+In the best possible case, requeueing a timer is
+going to be something along the lines of a linked-list
+removal and reinsertion.
+
+However, keep in mind we already need to classify the
+connections for reuse, so therefore we already have
+the linked lists!
+
+And if we just check these linked lists periodically in
+a FIFO fashion, we can get away with a very simple per-packet operation:
+writing back the timestamp of "now" into the connection structure.
+
+Then rather than requeueing the list on a per-packet or per-frame
+basis, we can defer this action until the time this session
+appears on the head of the FIFO list, and the cleaning
+routine makes the decision about whether to discard
+the session (because the interval since last activity is bigger
+than the idle timeout), or to requeue the session back to
+the end of the list (because the last activity was less
+than idle timeout ago).
+
+So, rather than using the timers, we can simply reuse our classification
+FIFOs, with the following heuristic: do not look at the session that was
+enqueued at time X until X+session_timeout. If we enqueue the sessions
+in the order of their initial activity, then we can simply use enqueue
+timestamp of the head session as a decision criterion for when we need
+to get back at looking at it for the timeout purposes.
+
+Since the number of FIFOs is small, we get a slightly worse check
+performance than with timers, but still O(1).
+
+We seemingly do quite a few "useless" operations of requeueing the items
+back to the tail of the list - but, these are the operations we do not
+have to do in the active data path, so overall it is a win.
+
+(Diversion: I believe this problem is congruent to poll vs. epoll or
+events vs. threads, some reading on this subject:
+http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html)
+
+We can also can run a TCP-like scheme for adaptively changing
+the wait period in the routine that deals with the connection timeouts:
+we can attempt to check the connections a couple of times per second
+(same as we would advance the timer wheel), and then if we have requeued
+close to a max-per-quantum number of connections, we can half the waiting
+interval, and if we did not requeue any, we can slowly increment the waiting
+interval - which at a steady state should stabilize similar to what the TCP rate
+does.
+
+reflexive ACLs: multi-thread
+=============================
+
+The single-threaded implementation in 1704 used a separate "cleaner" process
+to deal with the timing out of the connections.
+It is all good and great when you know that there is only a single core
+to run everything on, but the existence of the lists proves to be
+a massive difficulty when it comes to operating from multiple threads.
+
+Initial study shows that with a few assumptions (e.g. that the cleaner running in main thread
+and the worker have a demarcation point in time where either one or the other one touches
+the session in the list) it might be possible to make it work, but the resulting
+trickiness of doing it neatly with all the corner cases is quite large.
+
+So, for the multi-threaded scenario, we need to move the connection
+aging back to the same CPU as its creation.
+
+Luckily we can do this with the help of the interrupts.
+
+So, the design is as follows: the aging thread (acl_fa_session_cleaner_process)
+periodically fires the interrupts to the workers interrupt nodes (acl_fa_worker_session_cleaner_process_node.index),
+using vlib_node_set_interrupt_pending(), and
+the interrupt node acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions()
+which does the actual job of advancing the lists. And within the actual datapath the only thing we will be
+doing is putting the items onto FIFO, and updating the last active time on the existing connection.
+
+The one "delicate" part is that the worker for one leg of the connection might be different from
+the worker of another leg of the connection - but, even if the "owner" tries to free the connection,
+nothing terrible can happen - worst case the element of the pool (which is nominally free for a short period)
+will get the timestamp updated - same thing about the TCP flags seen.
+
+A slightly trickier issue arises when the packet initially seen by one worker (thus owned by that worker),
+and the return packet processed by another worker, and as a result changes the
+the class of the connection (e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa).
+If the class changes from one with the shorter idle time to the one with the longer idle time,
+then unless we are in the starvation mode where the transient connections are recycled,
+we can simply do nothing and let the normal requeue mechanism kick in. If the class changes from the longer idle
+timer to the shorter idle timer, then we risk keeping the connection around for longer than needed, which
+will affect the resource usage.
+
+One solution to that is to have NxN ring buffers (where N is the number of workers), such that the non-owner
+can signal to the owner the connection# that needs to be requeued out of order.
+
+A simpler solution though, is to ensure that each FIFO's period is equal to that of a shortest timer.
+This way the resource starvation problem is taken care of, at an expense of some additional work.
+
+This all looks sufficiently nice and simple until a skeleton falls out of the closet:
+sometimes we want to clean the connections en masse before they expire.
+
+There few potential scenarios:
+1) removal of an ACL from the interface
+2) removal of an interface
+3) manual action of an operator (in the future).
+
+In order to tackle this, we need to modify the logic which decides whether to requeue the
+connection on the end of the list, or to delete it due to idle timeout:
+
+We define a point in time, and have each worker thread fast-forward through its FIFO,
+in the process looking for sessions that satisfy the criteria, and either keeping them or requeueing them.
+
+To keep the ease of appearance to the outside world, we still process this as an event
+within the connection cleaner thread, but this event handler does as follows:
+1) it creates the bitmap of the sw_if_index values requested to be cleared
+2) for each worker, it waits to ensure there is no cleanup operation in progress (and if there is one,
+it waits), and then makes a copy of the bitmap, sets the per-worker flag of a cleanup operation, and sends an interrupt.
+3) wait until all cleanup operations have completed.
+
+Within the worker interrupt node, we check if the "cleanup in progress" is set,
+and if it is, we check the "fast forward time" value. If unset, we initialize it to value now, and compare the
+requested bitmap of sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of sw_if_index that this worker deals with.
+
+(we set the bit in the bitmap every time we enqueue the packet onto a FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session).
+
+If the result of this AND operation is zero - then we can clear the flag of cleanup in progress and return.
+Else we kick off the quantum of cleanup, and make sure we get another interrupt ASAP if that cleanup operation returns non-zero,
+meaning there is more work to do.
+When that operation returns zero, everything has been processed, we can clear the "cleanup-in-progress" flag, and
+zeroize the bitmap of sw_if_index-es requested to be cleaned.
+
+The interrupt node signals its wish to receive an interrupt ASAP by setting interrupt_is_needed
+flag within the per-worker structure. The main thread, while waiting for the
+cleanup operation to complete, checks if there is a request for interrupt,
+and if there is - it sends one.
+
+This approach gives us a way to mass-clean the connections which is reusing the code of the regular idle
+connection cleanup.
+
+One potential inefficiency is the bitmap values set by the session insertion
+in the data path - there is nothing to clear them.
+
+So, if one rearranges the interface placement with the workers, then the cleanups will cause some unnecessary work.
+For now, we consider it an acceptable limitation. It can be resolved by having another per-worker bitmap, which, when set,
+would trigger the cleanup of the bits in the serviced_sw_if_index_bitmap).
+
+=== the end ===
+
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
new file mode 100644
index 00000000..abb9643e
--- /dev/null
+++ b/src/plugins/acl/acl_test.c
@@ -0,0 +1,1219 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * acl_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <arpa/inet.h>
+
+#define __plugin_msg_base acl_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <acl/acl_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <acl/acl_all_api_h.h>
+#undef vl_typedefs
+
+/* define message structures */
+#define vl_endianfun
+#include <acl/acl_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <acl/acl_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <acl/acl_all_api_h.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} acl_test_main_t;
+
+acl_test_main_t acl_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(acl_del_reply) \
+_(acl_interface_add_del_reply) \
+_(macip_acl_interface_add_del_reply) \
+_(acl_interface_set_acl_list_reply) \
+_(macip_acl_del_reply)
+
+#define foreach_reply_retval_aclindex_handler \
+_(acl_add_replace_reply) \
+_(macip_acl_add_reply) \
+_(macip_acl_add_replace_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = acl_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = acl_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ clib_warning("ACL index: %d", ntohl(mp->acl_index)); \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_reply_retval_aclindex_handler;
+#undef _
+
+/* These two ought to be in a library somewhere but they aren't */
+static uword
+my_unformat_mac_address (unformat_input_t * input, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return unformat (input, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5]);
+}
+
+static u8 *
+my_format_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+
+
+static void vl_api_acl_plugin_get_version_reply_t_handler
+ (vl_api_acl_plugin_get_version_reply_t * mp)
+ {
+ vat_main_t * vam = acl_test_main.vat_main;
+ clib_warning("ACL plugin version: %d.%d", ntohl(mp->major), ntohl(mp->minor));
+ vam->result_ready = 1;
+ }
+
+static void vl_api_acl_interface_list_details_t_handler
+ (vl_api_acl_interface_list_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ u8 *out = 0;
+ vl_api_acl_interface_list_details_t_endian(mp);
+ out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
+ out = format(out, " input ");
+ for(i=0; i<mp->count; i++) {
+ if (i == mp->n_input)
+ out = format(out, "\n output ");
+ out = format(out, "%d ", ntohl (mp->acls[i]));
+ }
+ out = format(out, "\n");
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+
+static inline u8 *
+vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a)
+{
+ int af = a->is_ipv6 ? AF_INET6 : AF_INET;
+ u8 src[INET6_ADDRSTRLEN];
+ u8 dst[INET6_ADDRSTRLEN];
+ inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src));
+ inet_ntop(af, a->dst_ip_addr, (void *)dst, sizeof(dst));
+
+ out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d",
+ a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit,
+ src, a->src_ip_prefix_len,
+ dst, a->dst_ip_prefix_len,
+ a->proto,
+ a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
+ a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
+ a->tcp_flags_value, a->tcp_flags_mask);
+ return(out);
+}
+
+
+
+static void vl_api_acl_details_t_handler
+ (vl_api_acl_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ vl_api_acl_details_t_endian(mp);
+ u8 *out = 0;
+ out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ for(i=0; i<mp->count; i++) {
+ out = format(out, " ");
+ out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]);
+ out = format(out, "%s\n", i<mp->count-1 ? "," : "");
+ }
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+static inline u8 *
+vl_api_macip_acl_rule_t_pretty_format (u8 *out, vl_api_macip_acl_rule_t * a)
+{
+ int af = a->is_ipv6 ? AF_INET6 : AF_INET;
+ u8 src[INET6_ADDRSTRLEN];
+ inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src));
+
+ out = format(out, "%s action %d ip %s/%d mac %U mask %U",
+ a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit,
+ src, a->src_ip_prefix_len,
+ my_format_mac_address, a->src_mac,
+ my_format_mac_address, a->src_mac_mask);
+ return(out);
+}
+
+
+static void vl_api_macip_acl_details_t_handler
+ (vl_api_macip_acl_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ vl_api_macip_acl_details_t_endian(mp);
+ u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ for(i=0; i<mp->count; i++) {
+ out = format(out, " ");
+ out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]);
+ out = format(out, "%s\n", i<mp->count-1 ? "," : "");
+ }
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+static void vl_api_macip_acl_interface_get_reply_t_handler
+ (vl_api_macip_acl_interface_get_reply_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ u8 *out = format(0, "sw_if_index with MACIP ACL count: %d\n", ntohl(mp->count));
+ for(i=0; i<ntohl(mp->count); i++) {
+ out = format(out, " macip_acl_interface_add_del sw_if_index %d add acl %d\n", i, ntohl(mp->acls[i]));
+ }
+ out = format(out, "\n");
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+static void vl_api_acl_plugin_control_ping_reply_t_handler
+ (vl_api_acl_plugin_control_ping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(ACL_ADD_REPLACE_REPLY, acl_add_replace_reply) \
+_(ACL_DEL_REPLY, acl_del_reply) \
+_(ACL_INTERFACE_ADD_DEL_REPLY, acl_interface_add_del_reply) \
+_(ACL_INTERFACE_SET_ACL_LIST_REPLY, acl_interface_set_acl_list_reply) \
+_(ACL_INTERFACE_LIST_DETAILS, acl_interface_list_details) \
+_(ACL_DETAILS, acl_details) \
+_(MACIP_ACL_ADD_REPLY, macip_acl_add_reply) \
+_(MACIP_ACL_ADD_REPLACE_REPLY, macip_acl_add_replace_reply) \
+_(MACIP_ACL_DEL_REPLY, macip_acl_del_reply) \
+_(MACIP_ACL_DETAILS, macip_acl_details) \
+_(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \
+_(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \
+_(ACL_PLUGIN_CONTROL_PING_REPLY, acl_plugin_control_ping_reply) \
+_(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply)
+
+static int api_acl_plugin_get_version (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ vl_api_acl_plugin_get_version_t * mp;
+ u32 msg_size = sizeof(*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_PLUGIN_GET_VERSION + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_macip_acl_interface_get (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ vl_api_acl_plugin_get_version_t * mp;
+ u32 msg_size = sizeof(*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_GET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define vec_validate_acl_rules(v, idx) \
+ do { \
+ if (vec_len(v) < idx+1) { \
+ vec_validate(v, idx); \
+ v[idx].is_permit = 0x1; \
+ v[idx].srcport_or_icmptype_last = 0xffff; \
+ v[idx].dstport_or_icmpcode_last = 0xffff; \
+ } \
+ } while (0)
+
+
+static int api_acl_add_replace (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ vl_api_acl_add_replace_t * mp;
+ u32 acl_index = ~0;
+ u32 msg_size = sizeof (*mp); /* without the rules */
+
+ vl_api_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int n_rules = 0;
+ int n_rules_override = -1;
+ u32 proto = 0;
+ u32 port1 = 0;
+ u32 port2 = 0;
+ u32 action = 0;
+ u32 tcpflags, tcpmask;
+ u32 src_prefix_length = 0, dst_prefix_length = 0;
+ ip4_address_t src_v4address, dst_v4address;
+ ip6_address_t src_v6address, dst_v6address;
+ u8 *tag = 0;
+ int ret;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ /* Just assume -1 */
+ }
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "ipv4"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "permit+reflect"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 2;
+ }
+ else if (unformat (i, "permit"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (i, "deny"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 0;
+ }
+ else if (unformat (i, "count %d", &n_rules_override))
+ {
+ /* we will use this later */
+ }
+ else if (unformat (i, "action %d", &action))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (i, "src %U/%d",
+ unformat_ip4_address, &src_v4address, &src_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "src %U/%d",
+ unformat_ip6_address, &src_v6address, &src_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "dst %U/%d",
+ unformat_ip4_address, &dst_v4address, &dst_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].dst_ip_addr, &dst_v4address, 4);
+ rules[rule_idx].dst_ip_prefix_len = dst_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "dst %U/%d",
+ unformat_ip6_address, &dst_v6address, &dst_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].dst_ip_addr, &dst_v6address, 16);
+ rules[rule_idx].dst_ip_prefix_len = dst_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "sport %d-%d", &port1, &port2))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].srcport_or_icmptype_first = htons(port1);
+ rules[rule_idx].srcport_or_icmptype_last = htons(port2);
+ }
+ else if (unformat (i, "sport %d", &port1))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].srcport_or_icmptype_first = htons(port1);
+ rules[rule_idx].srcport_or_icmptype_last = htons(port1);
+ }
+ else if (unformat (i, "dport %d-%d", &port1, &port2))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].dstport_or_icmpcode_first = htons(port1);
+ rules[rule_idx].dstport_or_icmpcode_last = htons(port2);
+ }
+ else if (unformat (i, "dport %d", &port1))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].dstport_or_icmpcode_first = htons(port1);
+ rules[rule_idx].dstport_or_icmpcode_last = htons(port1);
+ }
+ else if (unformat (i, "tcpflags %d %d", &tcpflags, &tcpmask))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].tcp_flags_value = tcpflags;
+ rules[rule_idx].tcp_flags_mask = tcpmask;
+ }
+ else if (unformat (i, "tcpflags %d mask %d", &tcpflags, &tcpmask))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].tcp_flags_value = tcpflags;
+ rules[rule_idx].tcp_flags_mask = tcpmask;
+ }
+ else if (unformat (i, "proto %d", &proto))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].proto = proto;
+ }
+ else if (unformat (i, "tag %s", &tag))
+ {
+ }
+ else if (unformat (i, ","))
+ {
+ rule_idx++;
+ vec_validate_acl_rules(rules, rule_idx);
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ vam->result_ready = 0;
+
+ if(rules)
+ n_rules = vec_len(rules);
+ else
+ n_rules = 0;
+
+ if (n_rules_override >= 0)
+ n_rules = n_rules_override;
+
+ msg_size += n_rules*sizeof(rules[0]);
+
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_ADD_REPLACE + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ if ((n_rules > 0) && rules)
+ clib_memcpy(mp->r, rules, n_rules*sizeof (vl_api_acl_rule_t));
+ if (tag)
+ {
+ if (vec_len(tag) >= sizeof(mp->tag))
+ {
+ tag[sizeof(mp->tag)-1] = 0;
+ _vec_len(tag) = sizeof(mp->tag);
+ }
+ clib_memcpy(mp->tag, tag, vec_len(tag));
+ vec_free(tag);
+ }
+ mp->acl_index = ntohl(acl_index);
+ mp->count = htonl(n_rules);
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_acl_del (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_acl_del_t * mp;
+ u32 acl_index = ~0;
+ int ret;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ errmsg ("missing acl index\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M(ACL_DEL, mp);
+ mp->acl_index = ntohl(acl_index);
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_macip_acl_del (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_acl_del_t * mp;
+ u32 acl_index = ~0;
+ int ret;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ errmsg ("missing acl index\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M(MACIP_ACL_DEL, mp);
+ mp->acl_index = ntohl(acl_index);
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_acl_interface_add_del (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_acl_interface_add_del_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u8 is_input = 0;
+ u8 is_add = 0;
+ int ret;
+
+// acl_interface_add_del <intfc> | sw_if_index <if-idx> acl_index <acl-idx> [out] [del]
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else if (unformat (i, "input"))
+ is_input = 1;
+ else if (unformat (i, "output"))
+ is_input = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ if (acl_index == ~0) {
+ errmsg ("missing ACL index\n");
+ return -99;
+ }
+
+
+
+ /* Construct the API message */
+ M(ACL_INTERFACE_ADD_DEL, mp);
+ mp->acl_index = ntohl(acl_index);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+ mp->is_input = is_input;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_macip_acl_interface_add_del (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_macip_acl_interface_add_del_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u8 is_add = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ if (acl_index == ~0) {
+ errmsg ("missing ACL index\n");
+ return -99;
+ }
+
+
+
+ /* Construct the API message */
+ M(MACIP_ACL_INTERFACE_ADD_DEL, mp);
+ mp->acl_index = ntohl(acl_index);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_acl_interface_set_acl_list (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_acl_interface_set_acl_list_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u32 *inacls = 0;
+ u32 *outacls = 0;
+ u8 is_input = 0;
+ int ret;
+
+// acl_interface_set_acl_list <intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%d", &acl_index))
+ {
+ if(is_input)
+ vec_add1(inacls, htonl(acl_index));
+ else
+ vec_add1(outacls, htonl(acl_index));
+ }
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else if (unformat (i, "input"))
+ is_input = 1;
+ else if (unformat (i, "output"))
+ is_input = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2(ACL_INTERFACE_SET_ACL_LIST, mp, sizeof(u32) * (vec_len(inacls) + vec_len(outacls)));
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->n_input = vec_len(inacls);
+ mp->count = vec_len(inacls) + vec_len(outacls);
+ vec_append(inacls, outacls);
+ if (vec_len(inacls) > 0)
+ clib_memcpy(mp->acls, inacls, vec_len(inacls)*sizeof(u32));
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+api_acl_send_control_ping(vat_main_t *vam)
+{
+ vl_api_acl_plugin_control_ping_t *mp_ping;
+
+ M(ACL_PLUGIN_CONTROL_PING, mp_ping);
+ S(mp_ping);
+}
+
+
+static int api_acl_interface_list_dump (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ u32 sw_if_index = ~0;
+ vl_api_acl_interface_list_dump_t * mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(ACL_INTERFACE_LIST_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* send it... */
+ S(mp);
+
+ /* Use control ping for synchronization */
+ api_acl_send_control_ping(vam);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_acl_dump (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ u32 acl_index = ~0;
+ vl_api_acl_dump_t * mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(ACL_DUMP, mp);
+ mp->acl_index = ntohl (acl_index);
+
+ /* send it... */
+ S(mp);
+
+ /* Use control ping for synchronization */
+ api_acl_send_control_ping(vam);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_macip_acl_dump (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ u32 acl_index = ~0;
+ vl_api_acl_dump_t * mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(MACIP_ACL_DUMP, mp);
+ mp->acl_index = ntohl (acl_index);
+
+ /* send it... */
+ S(mp);
+
+ /* Use control ping for synchronization */
+ api_acl_send_control_ping(vam);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define vec_validate_macip_acl_rules(v, idx) \
+ do { \
+ if (vec_len(v) < idx+1) { \
+ vec_validate(v, idx); \
+ v[idx].is_permit = 0x1; \
+ } \
+ } while (0)
+
+
+static int api_macip_acl_add (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ vl_api_macip_acl_add_t * mp;
+ u32 msg_size = sizeof (*mp); /* without the rules */
+
+ vl_api_macip_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int n_rules = 0;
+ int n_rules_override = -1;
+ u32 src_prefix_length = 0;
+ u32 action = 0;
+ ip4_address_t src_v4address;
+ ip6_address_t src_v6address;
+ u8 src_mac[6];
+ u8 *tag = 0;
+ u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "ipv4"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "permit"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (i, "deny"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 0;
+ }
+ else if (unformat (i, "count %d", &n_rules_override))
+ {
+ /* we will use this later */
+ }
+ else if (unformat (i, "action %d", &action))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip4_address, &src_v4address, &src_prefix_length) ||
+ unformat (i, "ip %U",
+ unformat_ip4_address, &src_v4address))
+ {
+ if (src_prefix_length == 0)
+ src_prefix_length = 32;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "src"))
+ {
+ /* Everything in MACIP is "source" but allow this verbosity */
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip6_address, &src_v6address, &src_prefix_length) ||
+ unformat (i, "ip %U",
+ unformat_ip6_address, &src_v6address))
+ {
+ if (src_prefix_length == 0)
+ src_prefix_length = 128;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "mac %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac, &src_mac, 6);
+ memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6);
+ }
+ else if (unformat (i, "mask %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6);
+ }
+ else if (unformat (i, "tag %s", &tag))
+ {
+ }
+ else if (unformat (i, ","))
+ {
+ rule_idx++;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ vam->result_ready = 0;
+
+ if(rules)
+ n_rules = vec_len(rules);
+
+ if (n_rules_override >= 0)
+ n_rules = n_rules_override;
+
+ msg_size += n_rules*sizeof(rules[0]);
+
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ if ((n_rules > 0) && rules)
+ clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0]));
+ if (tag)
+ {
+ if (vec_len(tag) >= sizeof(mp->tag))
+ {
+ tag[sizeof(mp->tag)-1] = 0;
+ _vec_len(tag) = sizeof(mp->tag);
+ }
+ clib_memcpy(mp->tag, tag, vec_len(tag));
+ vec_free(tag);
+ }
+
+ mp->count = htonl(n_rules);
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int api_macip_acl_add_replace (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ vl_api_macip_acl_add_replace_t * mp;
+ u32 acl_index = ~0;
+ u32 msg_size = sizeof (*mp); /* without the rules */
+
+ vl_api_macip_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int n_rules = 0;
+ int n_rules_override = -1;
+ u32 src_prefix_length = 0;
+ u32 action = 0;
+ ip4_address_t src_v4address;
+ ip6_address_t src_v6address;
+ u8 src_mac[6];
+ u8 *tag = 0;
+ u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+ int ret;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ /* Just assume -1 */
+ }
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "ipv4"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "permit"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (i, "deny"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 0;
+ }
+ else if (unformat (i, "count %d", &n_rules_override))
+ {
+ /* we will use this later */
+ }
+ else if (unformat (i, "action %d", &action))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip4_address, &src_v4address, &src_prefix_length) ||
+ unformat (i, "ip %U",
+ unformat_ip4_address, &src_v4address))
+ {
+ if (src_prefix_length == 0)
+ src_prefix_length = 32;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "src"))
+ {
+ /* Everything in MACIP is "source" but allow this verbosity */
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip6_address, &src_v6address, &src_prefix_length) ||
+ unformat (i, "ip %U",
+ unformat_ip6_address, &src_v6address))
+ {
+ if (src_prefix_length == 0)
+ src_prefix_length = 128;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "mac %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac, &src_mac, 6);
+ memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6);
+ }
+ else if (unformat (i, "mask %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6);
+ }
+ else if (unformat (i, "tag %s", &tag))
+ {
+ }
+ else if (unformat (i, ","))
+ {
+ rule_idx++;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ }
+ else
+ break;
+ }
+
+ if (!rules)
+ {
+ errmsg ("rule/s required\n");
+ return -99;
+ }
+ /* Construct the API message */
+ vam->result_ready = 0;
+
+ if(rules)
+ n_rules = vec_len(rules);
+
+ if (n_rules_override >= 0)
+ n_rules = n_rules_override;
+
+ msg_size += n_rules*sizeof(rules[0]);
+
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD_REPLACE + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ if ((n_rules > 0) && rules)
+ clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0]));
+ if (tag)
+ {
+ if (vec_len(tag) >= sizeof(mp->tag))
+ {
+ tag[sizeof(mp->tag)-1] = 0;
+ _vec_len(tag) = sizeof(mp->tag);
+ }
+ clib_memcpy(mp->tag, tag, vec_len(tag));
+ vec_free(tag);
+ }
+
+ mp->acl_index = ntohl(acl_index);
+ mp->count = htonl(n_rules);
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(acl_plugin_get_version, "") \
+_(acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|permit+reflect|deny|action N> [src IP/plen] [dst IP/plen] [sport X-Y] [dport X-Y] [proto P] [tcpflags FL MASK], ... , ...") \
+_(acl_del, "<acl-idx>") \
+_(acl_dump, "[<acl-idx>]") \
+_(acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] [input|output] acl <acl-idx>") \
+_(acl_interface_set_acl_list, "<intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]") \
+_(acl_interface_list_dump, "[<intfc> | sw_if_index <if-idx>]") \
+_(macip_acl_add, "...") \
+_(macip_acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|deny|action N> [count <count>] [src] ip <ipaddress/[plen]> mac <mac> mask <mac_mask>, ... , ...") \
+_(macip_acl_del, "<acl-idx>")\
+_(macip_acl_dump, "[<acl-idx>]") \
+_(macip_acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] acl <acl-idx>") \
+_(macip_acl_interface_get, "")
+
+
+static
+void acl_vat_api_hookup (vat_main_t *vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "acl_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ acl_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c
new file mode 100644
index 00000000..a4ba967d
--- /dev/null
+++ b/src/plugins/acl/fa_node.c
@@ -0,0 +1,1874 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stddef.h>
+#include <netinet/in.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <acl/acl.h>
+#include <vppinfra/bihash_40_8.h>
+
+#include <vppinfra/bihash_template.h>
+#include <vppinfra/bihash_template.c>
+
+#include "fa_node.h"
+#include "hash_lookup.h"
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 match_acl_in_index;
+ u32 match_rule_index;
+ u64 packet_info[6];
+ u32 trace_bitmap;
+ u8 action;
+} acl_fa_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_acl_fa_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ acl_fa_trace_t *t = va_arg (*args, acl_fa_trace_t *);
+
+ s =
+ format (s,
+ "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n"
+ " pkt info %016llx %016llx %016llx %016llx %016llx %016llx",
+ t->sw_if_index, t->next_index, t->action, t->match_acl_in_index,
+ t->match_rule_index, t->trace_bitmap,
+ t->packet_info[0], t->packet_info[1], t->packet_info[2],
+ t->packet_info[3], t->packet_info[4], t->packet_info[5]);
+ return s;
+}
+
+/* *INDENT-OFF* */
+#define foreach_acl_fa_error \
+_(ACL_DROP, "ACL deny packets") \
+_(ACL_PERMIT, "ACL permit packets") \
+_(ACL_NEW_SESSION, "new sessions added") \
+_(ACL_EXIST_SESSION, "existing session packets") \
+_(ACL_CHECK, "checked packets") \
+_(ACL_RESTART_SESSION_TIMER, "restart session timer") \
+_(ACL_TOO_MANY_SESSIONS, "too many sessions to add new") \
+/* end of errors */
+
+typedef enum
+{
+#define _(sym,str) ACL_FA_ERROR_##sym,
+ foreach_acl_fa_error
+#undef _
+ ACL_FA_N_ERROR,
+} acl_fa_error_t;
+
+static char *acl_fa_error_strings[] = {
+#define _(sym,string) string,
+ foreach_acl_fa_error
+#undef _
+};
+/* *INDENT-ON* */
+
+static void *
+get_ptr_to_offset (vlib_buffer_t * b0, int offset)
+{
+ u8 *p = vlib_buffer_get_current (b0) + offset;
+ return p;
+}
+
+
+static int
+fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2,
+ int prefixlen, int is_ip6)
+{
+ if (prefixlen == 0)
+ {
+ /* match any always succeeds */
+ return 1;
+ }
+ if (is_ip6)
+ {
+ if (memcmp (addr1, addr2, prefixlen / 8))
+ {
+ /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
+ return 0;
+ }
+ if (prefixlen % 8)
+ {
+ u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
+ u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
+ u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
+ return (b1 & mask0) == b2;
+ }
+ else
+ {
+ /* The prefix fits into integer number of bytes, so nothing left to do */
+ return 1;
+ }
+ }
+ else
+ {
+ uint32_t a1 = ntohl (addr1->ip4.as_u32);
+ uint32_t a2 = ntohl (addr2->ip4.as_u32);
+ uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
+ return (a1 & mask0) == a2;
+ }
+}
+
+static int
+fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
+{
+ return ((port >= port_first) && (port <= port_last));
+}
+
+int
+single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
+ int is_ip6, u8 * r_action, u32 * r_acl_match_p,
+ u32 * r_rule_match_p, u32 * trace_bitmap)
+{
+ int i;
+ acl_list_t *a;
+ acl_rule_t *r;
+
+ if (pool_is_free_index (am->acls, acl_index))
+ {
+ if (r_acl_match_p)
+ *r_acl_match_p = acl_index;
+ if (r_rule_match_p)
+ *r_rule_match_p = -1;
+ /* the ACL does not exist but is used for policy. Block traffic. */
+ return 0;
+ }
+ a = am->acls + acl_index;
+ for (i = 0; i < a->count; i++)
+ {
+ r = a->rules + i;
+ if (is_ip6 != r->is_ipv6)
+ {
+ continue;
+ }
+ if (!fa_acl_match_addr
+ (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6))
+ continue;
+
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d",
+ acl_index, i, format_ip46_address, &pkt_5tuple->addr[1],
+ IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY,
+ r->dst_prefixlen);
+#endif
+
+ if (!fa_acl_match_addr
+ (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6))
+ continue;
+
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d",
+ acl_index, i, format_ip46_address, &pkt_5tuple->addr[0],
+ IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY,
+ r->src_prefixlen);
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d",
+ acl_index, i, pkt_5tuple->l4.proto, r->proto);
+#endif
+ if (r->proto)
+ {
+ if (pkt_5tuple->l4.proto != r->proto)
+ continue;
+
+ if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment &&
+ am->l4_match_nonfirst_fragment))
+ {
+ /* non-initial fragment with frag match configured - match this rule */
+ *trace_bitmap |= 0x80000000;
+ *r_action = r->is_permit;
+ if (r_acl_match_p)
+ *r_acl_match_p = acl_index;
+ if (r_rule_match_p)
+ *r_rule_match_p = i;
+ return 1;
+ }
+
+ /* A sanity check just to ensure we are about to match the ports extracted from the packet */
+ if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
+ continue;
+
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
+ acl_index, i, pkt_5tuple->l4.proto, r->proto);
+#endif
+
+ if (!fa_acl_match_port
+ (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
+ r->src_port_or_type_last, is_ip6))
+ continue;
+
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
+ acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
+ r->src_port_or_type_last);
+#endif
+
+ if (!fa_acl_match_port
+ (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
+ r->dst_port_or_code_last, is_ip6))
+ continue;
+
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
+ acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
+ r->dst_port_or_code_last);
+#endif
+ if (pkt_5tuple->pkt.tcp_flags_valid
+ && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
+ r->tcp_flags_value))
+ continue;
+ }
+ /* everything matches! */
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
+ acl_index, i, r->is_permit);
+#endif
+ *r_action = r->is_permit;
+ if (r_acl_match_p)
+ *r_acl_match_p = acl_index;
+ if (r_rule_match_p)
+ *r_rule_match_p = i;
+ return 1;
+ }
+ return 0;
+}
+
+static u8
+linear_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+{
+ acl_main_t *am = &acl_main;
+ int i;
+ u32 *acl_vector;
+ u8 action = 0;
+
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index];
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index];
+ }
+ for (i = 0; i < vec_len (acl_vector); i++)
+ {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
+ acl_vector[i]);
+#endif
+ if (single_acl_match_5tuple
+ (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
+ acl_match_p, rule_match_p, trace_bitmap))
+ {
+ return action;
+ }
+ }
+ if (vec_len (acl_vector) > 0)
+ {
+ /* If there are ACLs and none matched, deny by default */
+ return 0;
+ }
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index);
+#endif
+ /* Deny by default. If there are no ACLs defined we should not be here. */
+ return 0;
+}
+
+static u8
+multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+{
+ acl_main_t *am = &acl_main;
+ if (am->use_hash_acl_matching) {
+ return hash_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6,
+ is_input, acl_match_p, rule_match_p, trace_bitmap);
+ } else {
+ return linear_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6,
+ is_input, acl_match_p, rule_match_p, trace_bitmap);
+ }
+}
+
+static int
+offset_within_packet (vlib_buffer_t * b0, int offset)
+{
+ /* For the purposes of this code, "within" means we have at least 8 bytes after it */
+ return (offset <= (b0->current_length - 8));
+}
+
+static void
+acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
+ int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
+{
+ int l3_offset = ethernet_buffer_header_size(b0);
+ int l4_offset;
+ u16 ports[2];
+ u16 proto;
+ /* IP4 and IP6 protocol numbers of ICMP */
+ static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
+
+ if (is_input && !(is_l2_path))
+ {
+ l3_offset = 0;
+ }
+
+ /* key[0..3] contains src/dst address and is cleared/set below */
+ /* Remainder of the key and per-packet non-key data */
+ p5tuple_pkt->kv.key[4] = 0;
+ p5tuple_pkt->kv.value = 0;
+
+ if (is_ip6)
+ {
+ clib_memcpy (&p5tuple_pkt->addr,
+ get_ptr_to_offset (b0,
+ offsetof (ip6_header_t,
+ src_address) + l3_offset),
+ sizeof (p5tuple_pkt->addr));
+ proto =
+ *(u8 *) get_ptr_to_offset (b0,
+ offsetof (ip6_header_t,
+ protocol) + l3_offset);
+ l4_offset = l3_offset + sizeof (ip6_header_t);
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
+ l4_offset);
+#endif
+ /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
+ int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
+ if (PREDICT_FALSE (need_skip_eh))
+ {
+ while (need_skip_eh && offset_within_packet (b0, l4_offset))
+ {
+ /* Fragment header needs special handling */
+ if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
+ {
+ proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
+ u16 frag_offset;
+ clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset));
+ frag_offset = ntohs(frag_offset) >> 3;
+ if (frag_offset)
+ {
+ p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
+ /* invalidate L4 offset so we don't try to find L4 info */
+ l4_offset += b0->current_length;
+ }
+ else
+ {
+ /* First fragment: skip the frag header and move on. */
+ l4_offset += 8;
+ }
+ }
+ else
+ {
+ u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
+ proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
+ l4_offset += 8 * (1 + (u16) nwords);
+ }
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
+ proto, l4_offset);
+#endif
+ need_skip_eh =
+ clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
+ }
+ }
+ }
+ else
+ {
+ p5tuple_pkt->kv.key[0] = 0;
+ p5tuple_pkt->kv.key[1] = 0;
+ p5tuple_pkt->kv.key[2] = 0;
+ p5tuple_pkt->kv.key[3] = 0;
+ clib_memcpy (&p5tuple_pkt->addr[0].ip4,
+ get_ptr_to_offset (b0,
+ offsetof (ip4_header_t,
+ src_address) + l3_offset),
+ sizeof (p5tuple_pkt->addr[0].ip4));
+ clib_memcpy (&p5tuple_pkt->addr[1].ip4,
+ get_ptr_to_offset (b0,
+ offsetof (ip4_header_t,
+ dst_address) + l3_offset),
+ sizeof (p5tuple_pkt->addr[1].ip4));
+ proto =
+ *(u8 *) get_ptr_to_offset (b0,
+ offsetof (ip4_header_t,
+ protocol) + l3_offset);
+ l4_offset = l3_offset + sizeof (ip4_header_t);
+ u16 flags_and_fragment_offset;
+ clib_memcpy (&flags_and_fragment_offset,
+ get_ptr_to_offset (b0,
+ offsetof (ip4_header_t,
+ flags_and_fragment_offset)) + l3_offset,
+ sizeof(flags_and_fragment_offset));
+ flags_and_fragment_offset = ntohs (flags_and_fragment_offset);
+
+ /* non-initial fragments have non-zero offset */
+ if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset)))
+ {
+ p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
+ /* invalidate L4 offset so we don't try to find L4 info */
+ l4_offset += b0->current_length;
+ }
+
+ }
+ p5tuple_pkt->l4.proto = proto;
+ if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
+ {
+ p5tuple_pkt->pkt.l4_valid = 1;
+ if (icmp_protos[is_ip6] == proto)
+ {
+ /* type */
+ p5tuple_pkt->l4.port[0] =
+ *(u8 *) get_ptr_to_offset (b0,
+ l4_offset + offsetof (icmp46_header_t,
+ type));
+ /* code */
+ p5tuple_pkt->l4.port[1] =
+ *(u8 *) get_ptr_to_offset (b0,
+ l4_offset + offsetof (icmp46_header_t,
+ code));
+ }
+ else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto))
+ {
+ clib_memcpy (&ports,
+ get_ptr_to_offset (b0,
+ l4_offset + offsetof (tcp_header_t,
+ src_port)),
+ sizeof (ports));
+ p5tuple_pkt->l4.port[0] = ntohs (ports[0]);
+ p5tuple_pkt->l4.port[1] = ntohs (ports[1]);
+
+ p5tuple_pkt->pkt.tcp_flags =
+ *(u8 *) get_ptr_to_offset (b0,
+ l4_offset + offsetof (tcp_header_t,
+ flags));
+ p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP);
+ }
+ /*
+ * FIXME: rather than the above conditional, here could
+ * be a nice generic mechanism to extract two L4 values:
+ *
+ * have a per-protocol array of 4 elements like this:
+ * u8 offset; to take the byte from, off L4 header
+ * u8 mask; to mask it with, before storing
+ *
+ * this way we can describe UDP, TCP and ICMP[46] semantics,
+ * and add a sort of FPM-type behavior for other protocols.
+ *
+ * Of course, is it faster ? and is it needed ?
+ *
+ */
+ }
+}
+
+
+/* Session keys match the packets received, and mirror the packets sent */
+static void
+acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt,
+ fa_5tuple_t * p5tuple_sess)
+{
+ int src_index = is_input ? 0 : 1;
+ int dst_index = is_input ? 1 : 0;
+ p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
+ p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
+ p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
+ p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
+ p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
+}
+
+
+static int
+acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
+{
+ return am->fa_sessions_hash_is_initialized;
+}
+
+static int
+acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0)
+{
+ int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0);
+ return it_has;
+}
+
+static int
+acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
+{
+ int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0);
+ return it_has;
+}
+
+
+static int
+fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
+{
+ /* seen both SYNs and ACKs but not FINs means we are in establshed state */
+ u16 masked_flags =
+ sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) +
+ TCP_FLAGS_RSTFINACKSYN);
+ switch (sess->info.l4.proto)
+ {
+ case IPPROTO_TCP:
+ if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags)
+ {
+ return ACL_TIMEOUT_TCP_IDLE;
+ }
+ else
+ {
+ return ACL_TIMEOUT_TCP_TRANSIENT;
+ }
+ break;
+ case IPPROTO_UDP:
+ return ACL_TIMEOUT_UDP_IDLE;
+ break;
+ default:
+ return ACL_TIMEOUT_UDP_IDLE;
+ }
+}
+
+
+static u64
+fa_session_get_shortest_timeout(acl_main_t * am)
+{
+ int timeout_type;
+ u64 timeout = ~0LL;
+ for(timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++) {
+ if (timeout > am->session_timeout_sec[timeout_type]) {
+ timeout = am->session_timeout_sec[timeout_type];
+ }
+ }
+ return timeout;
+}
+
+/*
+ * Get the timeout of the session in a list since its enqueue time.
+ */
+
+static u64
+fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess)
+{
+ u64 timeout = am->vlib_main->clib_time.clocks_per_second;
+ /*
+ * we have the shortest possible timeout type in all the lists
+ * (see README-multicore for the rationale)
+ */
+ timeout *= fa_session_get_shortest_timeout(am);
+ return timeout;
+}
+
+/*
+ * Get the idle timeout of a session.
+ */
+
+static u64
+fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
+{
+ u64 timeout = am->vlib_main->clib_time.clocks_per_second;
+ int timeout_type = fa_session_get_timeout_type (am, sess);
+ timeout *= am->session_timeout_sec[timeout_type];
+ return timeout;
+}
+
+static void
+acl_fa_verify_init_sessions (acl_main_t * am)
+{
+ if (!am->fa_sessions_hash_is_initialized) {
+ u16 wk;
+ /* Allocate the per-worker sessions pools */
+ for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
+ pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
+ }
+
+ /* ... and the interface session hash table */
+ BV (clib_bihash_init) (&am->fa_sessions_hash,
+ "ACL plugin FA session bihash",
+ am->fa_conn_table_hash_num_buckets,
+ am->fa_conn_table_hash_memory_size);
+ am->fa_sessions_hash_is_initialized = 1;
+ }
+}
+
+static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
+{
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index);
+ return sess;
+}
+
+static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess)
+{
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)));
+}
+
+static void
+acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now)
+{
+ fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
+ u8 list_id = fa_session_get_timeout_type(am, sess);
+ uword thread_index = os_get_thread_index ();
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ /* the retrieved session thread index must be necessarily the same as the one in the key */
+ ASSERT (sess->thread_index == sess_id.thread_index);
+ /* the retrieved session thread index must be the same as current thread */
+ ASSERT (sess->thread_index == thread_index);
+ sess->link_enqueue_time = now;
+ sess->link_list_id = list_id;
+ sess->link_next_idx = ~0;
+ sess->link_prev_idx = pw->fa_conn_list_tail[list_id];
+ if (~0 != pw->fa_conn_list_tail[list_id]) {
+ fa_session_t *prev_sess = get_session_ptr(am, thread_index, pw->fa_conn_list_tail[list_id]);
+ prev_sess->link_next_idx = sess_id.session_index;
+ /* We should never try to link with a session on another thread */
+ ASSERT(prev_sess->thread_index == sess->thread_index);
+ }
+ pw->fa_conn_list_tail[list_id] = sess_id.session_index;
+ pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1);
+
+ if (~0 == pw->fa_conn_list_head[list_id]) {
+ pw->fa_conn_list_head[list_id] = sess_id.session_index;
+ }
+}
+
+static int
+acl_fa_conn_list_delete_session (acl_main_t *am, fa_full_session_id_t sess_id)
+{
+ uword thread_index = os_get_thread_index ();
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ if (thread_index != sess_id.thread_index) {
+ /* If another thread attempts to delete the session, fail it. */
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("thread id in key %d != curr thread index, not deleting");
+#endif
+ return 0;
+ }
+ fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
+ /* we should never try to delete the session with another thread index */
+ ASSERT(sess->thread_index == thread_index);
+ if (~0 != sess->link_prev_idx) {
+ fa_session_t *prev_sess = get_session_ptr(am, thread_index, sess->link_prev_idx);
+ /* the previous session must be in the same list as this one */
+ ASSERT(prev_sess->link_list_id == sess->link_list_id);
+ prev_sess->link_next_idx = sess->link_next_idx;
+ }
+ if (~0 != sess->link_next_idx) {
+ fa_session_t *next_sess = get_session_ptr(am, thread_index, sess->link_next_idx);
+ /* The next session must be in the same list as the one we are deleting */
+ ASSERT(next_sess->link_list_id == sess->link_list_id);
+ next_sess->link_prev_idx = sess->link_prev_idx;
+ }
+ if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index) {
+ pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
+ }
+ if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index) {
+ pw->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx;
+ }
+ return 1;
+}
+
+static int
+acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, fa_full_session_id_t sess_id)
+{
+ if (acl_fa_conn_list_delete_session(am, sess_id)) {
+ acl_fa_conn_list_add_session(am, sess_id, now);
+ return 1;
+ } else {
+ /*
+ * Our thread does not own this connection, so we can not delete
+ * The session. To avoid the complicated signaling, we simply
+ * pick the list waiting time to be the shortest of the timeouts.
+ * This way we do not have to do anything special, and let
+ * the regular requeue check take care of everything.
+ */
+ return 0;
+ }
+}
+
+
+static u8
+acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
+ fa_session_t * sess, fa_5tuple_t * pkt_5tuple)
+{
+ sess->last_active_time = now;
+ if (pkt_5tuple->pkt.tcp_flags_valid)
+ {
+ sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags;
+ }
+ return 3;
+}
+
+
+static void
+acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id)
+{
+ void *oldheap = clib_mem_set_heap(am->acl_mheap);
+ fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
+ ASSERT(sess->thread_index == os_get_thread_index ());
+ BV (clib_bihash_add_del) (&am->fa_sessions_hash,
+ &sess->info.kv, 0);
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
+ pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
+ /* Deleting from timer structures not needed,
+ as the caller must have dealt with the timers. */
+ vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
+ clib_mem_set_heap (oldheap);
+ pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
+ clib_smp_atomic_add(&am->fa_session_total_dels, 1);
+}
+
+static int
+acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
+{
+ u64 curr_sess_count;
+ curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
+ return (curr_sess_count < am->fa_conn_table_max_entries);
+}
+
+static u64
+acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type)
+{
+ fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]);
+ /*
+ * We can not check just the index here because inbetween the worker thread might
+ * dequeue the connection from the head just as we are about to check it.
+ */
+ if (!is_valid_session_ptr(am, thread_index, sess)) {
+ return ~0LL; // infinity.
+ } else {
+ u64 timeout_time =
+ sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
+ return timeout_time;
+ }
+}
+
+static int
+acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, u32 session_index)
+{
+ fa_session_t *sess = get_session_ptr(am, thread_index, session_index);
+ u64 timeout_time =
+ sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
+ return (timeout_time < now) || (sess->link_enqueue_time <= pw->swipe_end_time);
+}
+
+/*
+ * see if there are sessions ready to be checked,
+ * do the maintenance (requeue or delete), and
+ * return the total number of sessions reclaimed.
+ */
+static int
+acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now)
+{
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ fa_full_session_id_t fsid;
+ fsid.thread_index = thread_index;
+ int total_expired = 0;
+
+ {
+ u8 tt = 0;
+ for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
+ while((vec_len(pw->expired) < am->fa_max_deleted_sessions_per_interval)
+ && (~0 != pw->fa_conn_list_head[tt])
+ && (acl_fa_conn_time_to_check(am, pw, now, thread_index,
+ pw->fa_conn_list_head[tt]))) {
+ fsid.session_index = pw->fa_conn_list_head[tt];
+ vec_add1(pw->expired, fsid.session_index);
+ acl_fa_conn_list_delete_session(am, fsid);
+ }
+ }
+ }
+
+ u32 *psid = NULL;
+ vec_foreach (psid, pw->expired)
+ {
+ fsid.session_index = *psid;
+ if (!pool_is_free_index (pw->fa_sessions_pool, fsid.session_index))
+ {
+ fa_session_t *sess = get_session_ptr(am, thread_index, fsid.session_index);
+ u32 sw_if_index = sess->sw_if_index;
+ u64 sess_timeout_time =
+ sess->last_active_time + fa_session_get_timeout (am, sess);
+ if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index)))
+ {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d",
+ (int) session_index);
+#endif
+ /* There was activity on the session, so the idle timeout
+ has not passed. Enqueue for another time period. */
+
+ acl_fa_conn_list_add_session(am, fsid, now);
+ pw->cnt_session_timer_restarted++;
+ }
+ else
+ {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d",
+ (int) session_index);
+#endif
+ acl_fa_delete_session (am, sw_if_index, fsid);
+ pw->cnt_deleted_sessions++;
+ }
+ }
+ else
+ {
+ pw->cnt_already_deleted_sessions++;
+ }
+ }
+ total_expired = vec_len(pw->expired);
+ /* zero out the vector which we have acted on */
+ if (pw->expired)
+ _vec_len (pw->expired) = 0;
+ /* if we were advancing and reached the end
+ * (no more sessions to recycle), reset the fast-forward timestamp */
+
+ if (pw->swipe_end_time && 0 == total_expired)
+ pw->swipe_end_time = 0;
+ return (total_expired);
+}
+
+always_inline void
+acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 sw_if_index)
+{
+ /* try to recycle a TCP transient session */
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
+ fa_full_session_id_t sess_id;
+ sess_id.session_index = pw->fa_conn_list_head[timeout_type];
+ if (~0 != sess_id.session_index) {
+ sess_id.thread_index = thread_index;
+ acl_fa_conn_list_delete_session(am, sess_id);
+ acl_fa_delete_session(am, sw_if_index, sess_id);
+ }
+}
+
+static fa_session_t *
+acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
+ fa_5tuple_t * p5tuple)
+{
+ clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
+ clib_bihash_kv_40_8_t kv;
+ fa_full_session_id_t f_sess_id;
+ uword thread_index = os_get_thread_index();
+ void *oldheap = clib_mem_set_heap(am->acl_mheap);
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+
+ f_sess_id.thread_index = thread_index;
+ fa_session_t *sess;
+
+ pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES);
+ f_sess_id.session_index = sess - pw->fa_sessions_pool;
+
+ kv.key[0] = pkv->key[0];
+ kv.key[1] = pkv->key[1];
+ kv.key[2] = pkv->key[2];
+ kv.key[3] = pkv->key[3];
+ kv.key[4] = pkv->key[4];
+ kv.value = f_sess_id.as_u64;
+
+ memcpy (sess, pkv, sizeof (pkv->key));
+ sess->last_active_time = now;
+ sess->sw_if_index = sw_if_index;
+ sess->tcp_flags_seen.as_u16 = 0;
+ sess->thread_index = thread_index;
+ sess->link_list_id = ~0;
+ sess->link_prev_idx = ~0;
+ sess->link_next_idx = ~0;
+
+
+
+ ASSERT(am->fa_sessions_hash_is_initialized == 1);
+ BV (clib_bihash_add_del) (&am->fa_sessions_hash,
+ &kv, 1);
+ acl_fa_conn_list_add_session(am, f_sess_id, now);
+
+ vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
+ clib_mem_set_heap (oldheap);
+ pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
+ clib_smp_atomic_add(&am->fa_session_total_adds, 1);
+ return sess;
+}
+
+static int
+acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
+ clib_bihash_kv_40_8_t * pvalue_sess)
+{
+ return (BV (clib_bihash_search)
+ (&am->fa_sessions_hash, &p5tuple->kv,
+ pvalue_sess) == 0);
+}
+
+
+always_inline uword
+acl_fa_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6,
+ int is_input, int is_l2_path, u32 * l2_feat_next_node_index,
+ vlib_node_registration_t * acl_fa_node)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_fa_next_t next_index;
+ u32 pkts_acl_checked = 0;
+ u32 pkts_new_session = 0;
+ u32 pkts_exist_session = 0;
+ u32 pkts_acl_permit = 0;
+ u32 pkts_restart_session_timer = 0;
+ u32 trace_bitmap = 0;
+ acl_main_t *am = &acl_main;
+ fa_5tuple_t fa_5tuple, kv_sess;
+ clib_bihash_kv_40_8_t value_sess;
+ vlib_node_runtime_t *error_node;
+ u64 now = clib_cpu_time_now ();
+ uword thread_index = os_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ error_node = vlib_node_get_runtime (vm, acl_fa_node->index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ u8 action = 0;
+ u32 sw_if_index0;
+ int acl_check_needed = 1;
+ u32 match_acl_in_index = ~0;
+ u32 match_rule_index = ~0;
+ u8 error0 = 0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (is_input)
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ else
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /*
+ * Extract the L3/L4 matching info into a 5-tuple structure,
+ * then create a session key whose layout is independent on forward or reverse
+ * direction of the packet.
+ */
+
+ acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple);
+ fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
+ acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess);
+ fa_5tuple.pkt.sw_if_index = sw_if_index0;
+ fa_5tuple.pkt.is_ip6 = is_ip6;
+ fa_5tuple.pkt.is_input = is_input;
+ fa_5tuple.pkt.mask_type_index_lsb = ~0;
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
+ kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
+ kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
+ clib_warning
+ ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx",
+ fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
+ fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value);
+#endif
+
+ /* Try to match an existing session first */
+
+ if (acl_fa_ifc_has_sessions (am, sw_if_index0))
+ {
+ if (acl_fa_find_session
+ (am, sw_if_index0, &kv_sess, &value_sess))
+ {
+ trace_bitmap |= 0x80000000;
+ error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
+ fa_full_session_id_t f_sess_id;
+
+ f_sess_id.as_u64 = value_sess.value;
+ ASSERT(f_sess_id.thread_index < vec_len(vlib_mains));
+
+ fa_session_t *sess = get_session_ptr(am, f_sess_id.thread_index, f_sess_id.session_index);
+ int old_timeout_type =
+ fa_session_get_timeout_type (am, sess);
+ action =
+ acl_fa_track_session (am, is_input, sw_if_index0, now,
+ sess, &fa_5tuple);
+ /* expose the session id to the tracer */
+ match_rule_index = f_sess_id.session_index;
+ int new_timeout_type =
+ fa_session_get_timeout_type (am, sess);
+ acl_check_needed = 0;
+ pkts_exist_session += 1;
+ /* Tracking might have changed the session timeout type, e.g. from transient to established */
+ if (PREDICT_FALSE (old_timeout_type != new_timeout_type))
+ {
+ acl_fa_restart_timer_for_session (am, now, f_sess_id);
+ pkts_restart_session_timer++;
+ trace_bitmap |=
+ 0x00010000 + ((0xff & old_timeout_type) << 8) +
+ (0xff & new_timeout_type);
+ }
+ /*
+ * I estimate the likelihood to be very low - the VPP needs
+ * to have >64K interfaces to start with and then on
+ * exactly 64K indices apart needs to be exactly the same
+ * 5-tuple... Anyway, since this probability is nonzero -
+ * print an error and drop the unlucky packet.
+ * If this shows up in real world, we would need to bump
+ * the hash key length.
+ */
+ if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) {
+ clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!");
+ acl_check_needed = 0;
+ action = 0;
+ }
+ }
+ }
+
+ if (acl_check_needed)
+ {
+ action =
+ multi_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path,
+ is_ip6, is_input, &match_acl_in_index,
+ &match_rule_index, &trace_bitmap);
+ error0 = action;
+ if (1 == action)
+ pkts_acl_permit += 1;
+ if (2 == action)
+ {
+ if (!acl_fa_can_add_session (am, is_input, sw_if_index0))
+ acl_fa_try_recycle_session (am, is_input, thread_index, sw_if_index0);
+
+ if (acl_fa_can_add_session (am, is_input, sw_if_index0))
+ {
+ fa_session_t *sess = acl_fa_add_session (am, is_input, sw_if_index0, now,
+ &kv_sess);
+ acl_fa_track_session (am, is_input, sw_if_index0, now,
+ sess, &fa_5tuple);
+ pkts_new_session += 1;
+ }
+ else
+ {
+ action = 0;
+ error0 = ACL_FA_ERROR_ACL_TOO_MANY_SESSIONS;
+ }
+ }
+ }
+
+
+
+ if (action > 0)
+ {
+ if (is_l2_path)
+ next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0);
+ else
+ vnet_feature_next (sw_if_index0, &next0, b0);
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->match_acl_in_index = match_acl_in_index;
+ t->match_rule_index = match_rule_index;
+ t->packet_info[0] = fa_5tuple.kv.key[0];
+ t->packet_info[1] = fa_5tuple.kv.key[1];
+ t->packet_info[2] = fa_5tuple.kv.key[2];
+ t->packet_info[3] = fa_5tuple.kv.key[3];
+ t->packet_info[4] = fa_5tuple.kv.key[4];
+ t->packet_info[5] = fa_5tuple.kv.value;
+ t->action = action;
+ t->trace_bitmap = trace_bitmap;
+ }
+
+ next0 = next0 < node->n_next_nodes ? next0 : 0;
+ if (0 == next0)
+ b0->error = error_node->errors[error0];
+
+ pkts_acl_checked += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, bi0,
+ next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, acl_fa_node->index,
+ ACL_FA_ERROR_ACL_CHECK, pkts_acl_checked);
+ vlib_node_increment_counter (vm, acl_fa_node->index,
+ ACL_FA_ERROR_ACL_PERMIT, pkts_acl_permit);
+ vlib_node_increment_counter (vm, acl_fa_node->index,
+ ACL_FA_ERROR_ACL_NEW_SESSION,
+ pkts_new_session);
+ vlib_node_increment_counter (vm, acl_fa_node->index,
+ ACL_FA_ERROR_ACL_EXIST_SESSION,
+ pkts_exist_session);
+ vlib_node_increment_counter (vm, acl_fa_node->index,
+ ACL_FA_ERROR_ACL_RESTART_SESSION_TIMER,
+ pkts_restart_session_timer);
+ return frame->n_vectors;
+}
+
+
+vlib_node_registration_t acl_in_l2_ip6_node;
+static uword
+acl_in_ip6_l2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ acl_main_t *am = &acl_main;
+ return acl_fa_node_fn (vm, node, frame, 1, 1, 1,
+ am->fa_acl_in_ip6_l2_node_feat_next_node_index,
+ &acl_in_l2_ip6_node);
+}
+
+vlib_node_registration_t acl_in_l2_ip4_node;
+static uword
+acl_in_ip4_l2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ acl_main_t *am = &acl_main;
+ return acl_fa_node_fn (vm, node, frame, 0, 1, 1,
+ am->fa_acl_in_ip4_l2_node_feat_next_node_index,
+ &acl_in_l2_ip4_node);
+}
+
+vlib_node_registration_t acl_out_l2_ip6_node;
+static uword
+acl_out_ip6_l2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ acl_main_t *am = &acl_main;
+ return acl_fa_node_fn (vm, node, frame, 1, 0, 1,
+ am->fa_acl_out_ip6_l2_node_feat_next_node_index,
+ &acl_out_l2_ip6_node);
+}
+
+vlib_node_registration_t acl_out_l2_ip4_node;
+static uword
+acl_out_ip4_l2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ acl_main_t *am = &acl_main;
+ return acl_fa_node_fn (vm, node, frame, 0, 0, 1,
+ am->fa_acl_out_ip4_l2_node_feat_next_node_index,
+ &acl_out_l2_ip4_node);
+}
+
+
+/**** L3 processing path nodes ****/
+
+
+vlib_node_registration_t acl_in_fa_ip6_node;
+static uword
+acl_in_ip6_fa_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return acl_fa_node_fn (vm, node, frame, 1, 1, 0, 0, &acl_in_fa_ip6_node);
+}
+
+vlib_node_registration_t acl_in_fa_ip4_node;
+static uword
+acl_in_ip4_fa_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return acl_fa_node_fn (vm, node, frame, 0, 1, 0, 0, &acl_in_fa_ip4_node);
+}
+
+vlib_node_registration_t acl_out_fa_ip6_node;
+static uword
+acl_out_ip6_fa_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return acl_fa_node_fn (vm, node, frame, 1, 0, 0, 0, &acl_out_fa_ip6_node);
+}
+
+vlib_node_registration_t acl_out_fa_ip4_node;
+static uword
+acl_out_ip4_fa_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return acl_fa_node_fn (vm, node, frame, 0, 0, 0, 0, &acl_out_fa_ip4_node);
+}
+
+/*
+ * This process ensures the connection cleanup happens every so often
+ * even in absence of traffic, as well as provides general orchestration
+ * for requests like connection deletion on a given sw_if_index.
+ */
+
+
+/* *INDENT-OFF* */
+#define foreach_acl_fa_cleaner_error \
+_(UNKNOWN_EVENT, "unknown event received") \
+/* end of errors */
+
+typedef enum
+{
+#define _(sym,str) ACL_FA_CLEANER_ERROR_##sym,
+ foreach_acl_fa_cleaner_error
+#undef _
+ ACL_FA_CLEANER_N_ERROR,
+} acl_fa_cleaner_error_t;
+
+static char *acl_fa_cleaner_error_strings[] = {
+#define _(sym,string) string,
+ foreach_acl_fa_cleaner_error
+#undef _
+};
+
+/* *INDENT-ON* */
+
+static vlib_node_registration_t acl_fa_session_cleaner_process_node;
+static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
+
+/*
+ * Per-worker thread interrupt-driven cleaner thread
+ * to clean idle connections if there are no packets
+ */
+static uword
+acl_fa_worker_conn_cleaner_process(vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ acl_main_t *am = &acl_main;
+ u64 now = clib_cpu_time_now ();
+ u16 thread_index = os_get_thread_index ();
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ int num_expired;
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now);
+#endif
+ /* allow another interrupt to be queued */
+ pw->interrupt_is_pending = 0;
+ if (pw->clear_in_process) {
+ if (0 == pw->swipe_end_time) {
+ /*
+ * Someone has just set the flag to start clearing.
+ * we do this by combing through the connections up to a "time T"
+ * which is now, and requeueing everything except the expired
+ * connections and those matching the interface(s) being cleared.
+ */
+
+ /*
+ * first filter the sw_if_index bitmap that they want from us, by
+ * a bitmap of sw_if_index for which we actually have connections.
+ */
+ if ((pw->pending_clear_sw_if_index_bitmap == 0)
+ || (pw->serviced_sw_if_index_bitmap == 0)) {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty");
+#endif
+ clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
+ } else {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER-CLEAR: (before and) swiping sw-if-index bitmap: %U, my serviced bitmap %U",
+ format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
+ format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
+#endif
+ pw->pending_clear_sw_if_index_bitmap = clib_bitmap_and(pw->pending_clear_sw_if_index_bitmap,
+ pw->serviced_sw_if_index_bitmap);
+ }
+
+ if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) {
+ /* if the cross-section is a zero vector, no need to do anything. */
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER: clearing done - nothing to do");
+#endif
+ pw->clear_in_process = 0;
+ } else {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER-CLEAR: swiping sw-if-index bitmap: %U, my serviced bitmap %U",
+ format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
+ format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
+#endif
+ /* swipe through the connection lists until enqueue timestamps become above "now" */
+ pw->swipe_end_time = now;
+ }
+ }
+ }
+ num_expired = acl_fa_check_idle_sessions(am, thread_index, now);
+ // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process);
+ if (pw->clear_in_process) {
+ if (0 == num_expired) {
+ /* we were clearing but we could not process any more connections. time to stop. */
+ clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
+ pw->clear_in_process = 0;
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER: clearing done, all done");
+#endif
+ } else {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("WORKER-CLEAR: more work to do, raising interrupt");
+#endif
+ /* should continue clearing.. So could they please sent an interrupt again? */
+ pw->interrupt_is_needed = 1;
+ }
+ } else {
+ if (num_expired >= am->fa_max_deleted_sessions_per_interval) {
+ /* there was too much work, we should get an interrupt ASAP */
+ pw->interrupt_is_needed = 1;
+ pw->interrupt_is_unwanted = 0;
+ } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) {
+ /* signal that they should trigger us less */
+ pw->interrupt_is_needed = 0;
+ pw->interrupt_is_unwanted = 1;
+ } else {
+ /* the current rate of interrupts is ok */
+ pw->interrupt_is_needed = 0;
+ pw->interrupt_is_unwanted = 0;
+ }
+ }
+ pw->interrupt_generation = am->fa_interrupt_generation;
+ return 0;
+}
+
+static void
+send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index)
+{
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+ if (!pw->interrupt_is_pending) {
+ pw->interrupt_is_pending = 1;
+ vlib_node_set_interrupt_pending (vlib_mains[thread_index],
+ acl_fa_worker_session_cleaner_process_node.index);
+ /* if the interrupt was requested, mark that done. */
+ /* pw->interrupt_is_needed = 0; */
+ }
+}
+
+static void
+send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am)
+{
+ int i;
+ /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */
+ int n_threads = vec_len(vlib_mains);
+ for (i = n_threads > 1 ? 1 : 0; i < n_threads; i++) {
+ send_one_worker_interrupt(vm, am, i);
+ }
+}
+
+/* centralized process to drive per-worker cleaners */
+static uword
+acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ acl_main_t *am = &acl_main;
+ u64 now;
+ f64 cpu_cps = vm->clib_time.clocks_per_second;
+ u64 next_expire;
+ /* We should check if there are connections to clean up - at least twice a second */
+ u64 max_timer_wait_interval = cpu_cps / 2;
+ uword event_type, *event_data = 0;
+ acl_fa_per_worker_data_t *pw0;
+
+ am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
+ am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index;
+ am->fa_interrupt_generation = 1;
+ while (1)
+ {
+ now = clib_cpu_time_now ();
+ next_expire = now + am->fa_current_cleaner_timer_wait_interval;
+ int has_pending_conns = 0;
+ u16 ti;
+ u8 tt;
+
+ /*
+ * walk over all per-thread list heads of different timeouts,
+ * and see if there are any connections pending.
+ * If there aren't - we do not need to wake up until the
+ * worker code signals that it has added a connection.
+ *
+ * Also, while we are at it, calculate the earliest we need to wake up.
+ */
+ for(ti = 0; ti < vec_len(vlib_mains); ti++) {
+ if (ti >= vec_len(am->per_worker_data)) {
+ continue;
+ }
+ acl_fa_per_worker_data_t *pw = &am->per_worker_data[ti];
+ for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) {
+ u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt);
+ if ((head_expiry < next_expire) && !pw->interrupt_is_pending) {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt);
+#endif
+ next_expire = head_expiry;
+ }
+ if (~0 != pw->fa_conn_list_head[tt]) {
+ has_pending_conns = 1;
+ }
+ }
+ }
+
+ /* If no pending connections and no ACL applied then no point in timing out */
+ if (!has_pending_conns && (0 == am->fa_total_enabled_count))
+ {
+ am->fa_cleaner_cnt_wait_without_timeout++;
+ (void) vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ }
+ else
+ {
+ f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps;
+ if (timeout <= 0)
+ {
+ /* skip waiting altogether */
+ event_type = ~0;
+ }
+ else
+ {
+ am->fa_cleaner_cnt_wait_with_timeout++;
+ (void) vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ }
+ }
+
+ switch (event_type)
+ {
+ case ~0:
+ /* nothing to do */
+ break;
+ case ACL_FA_CLEANER_RESCHEDULE:
+ /* Nothing to do. */
+ break;
+ case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX:
+ {
+ uword *clear_sw_if_index_bitmap = 0;
+ uword *sw_if_index0;
+ int clear_all = 0;
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received");
+#endif
+ vec_foreach (sw_if_index0, event_data)
+ {
+ am->fa_cleaner_cnt_delete_by_sw_index++;
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning
+ ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d",
+ *sw_if_index0);
+#endif
+ if (*sw_if_index0 == ~0)
+ {
+ clear_all = 1;
+ }
+ else
+ {
+ if (!pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, *sw_if_index0))
+ {
+ clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1);
+ }
+ }
+ }
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap);
+#endif
+ vec_foreach(pw0, am->per_worker_data) {
+ CLIB_MEMORY_BARRIER ();
+ while (pw0->clear_in_process) {
+ CLIB_MEMORY_BARRIER ();
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data);
+#endif
+ vlib_process_suspend(vm, 0.0001);
+ if (pw0->interrupt_is_needed) {
+ send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
+ }
+ }
+ if (pw0->clear_in_process) {
+ clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress");
+ } else {
+ if (clear_all)
+ {
+ /* if we need to clear all, then just clear the interfaces that we are servicing */
+ pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(pw0->serviced_sw_if_index_bitmap);
+ }
+ else
+ {
+ pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap);
+ }
+ pw0->clear_in_process = 1;
+ }
+ }
+ /* send some interrupts so they can start working */
+ send_interrupts_to_workers(vm, am);
+
+ /* now wait till they all complete */
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data));
+#endif
+ vec_foreach(pw0, am->per_worker_data) {
+ CLIB_MEMORY_BARRIER ();
+ while (pw0->clear_in_process) {
+ CLIB_MEMORY_BARRIER ();
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data);
+#endif
+ vlib_process_suspend(vm, 0.0001);
+ if (pw0->interrupt_is_needed) {
+ send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
+ }
+ }
+ }
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ACL_FA_NODE_CLEAN: cleaning done");
+#endif
+ clib_bitmap_free(clear_sw_if_index_bitmap);
+ }
+ break;
+ default:
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning ("ACL plugin connection cleaner: unknown event %u",
+ event_type);
+#endif
+ vlib_node_increment_counter (vm,
+ acl_fa_session_cleaner_process_node.
+ index,
+ ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1);
+ am->fa_cleaner_cnt_unknown_event++;
+ break;
+ }
+
+ send_interrupts_to_workers(vm, am);
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+
+ /*
+ * If the interrupts were not processed yet, ensure we wait a bit,
+ * but up to a point.
+ */
+ int need_more_wait = 0;
+ int max_wait_cycles = 100;
+ do {
+ need_more_wait = 0;
+ vec_foreach(pw0, am->per_worker_data) {
+ if (pw0->interrupt_generation != am->fa_interrupt_generation) {
+ need_more_wait = 1;
+ }
+ }
+ if (need_more_wait) {
+ vlib_process_suspend(vm, 0.0001);
+ }
+ } while (need_more_wait && (--max_wait_cycles > 0));
+
+ int interrupts_needed = 0;
+ int interrupts_unwanted = 0;
+
+ vec_foreach(pw0, am->per_worker_data) {
+ if (pw0->interrupt_is_needed) {
+ interrupts_needed++;
+ /* the per-worker value is reset when sending the interrupt */
+ }
+ if (pw0->interrupt_is_unwanted) {
+ interrupts_unwanted++;
+ pw0->interrupt_is_unwanted = 0;
+ }
+ }
+ if (interrupts_needed) {
+ /* they need more interrupts, do less waiting around next time */
+ am->fa_current_cleaner_timer_wait_interval /= 2;
+ /* never go into zero-wait either though - we need to give the space to others */
+ am->fa_current_cleaner_timer_wait_interval += 1;
+ } else if (interrupts_unwanted) {
+ /* slowly increase the amount of sleep up to a limit */
+ if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval)
+ am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment;
+ }
+ am->fa_cleaner_cnt_event_cycles++;
+ am->fa_interrupt_generation++;
+ }
+ /* NOT REACHED */
+ return 0;
+}
+
+
+void
+acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
+{
+ acl_main_t *am = &acl_main;
+ if (enable_disable) {
+ acl_fa_verify_init_sessions(am);
+ am->fa_total_enabled_count++;
+ void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+ vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+ ACL_FA_CLEANER_RESCHEDULE, 0);
+ clib_mem_set_heap (oldheap);
+ } else {
+ am->fa_total_enabled_count--;
+ }
+
+ if (is_input)
+ {
+ ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable);
+ void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+ vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
+ sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
+ sw_if_index, enable_disable, 0, 0);
+ clib_mem_set_heap (oldheap);
+ am->fa_in_acl_on_sw_if_index =
+ clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
+ enable_disable);
+ }
+ else
+ {
+ ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable);
+ void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+ vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
+ sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
+ sw_if_index, enable_disable, 0, 0);
+ clib_mem_set_heap (oldheap);
+ am->fa_out_acl_on_sw_if_index =
+ clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
+ enable_disable);
+ }
+ if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index))
+ && (!acl_fa_ifc_has_out_acl (am, sw_if_index)))
+ {
+#ifdef FA_NODE_VERBOSE_DEBUG
+ clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index);
+#endif
+ void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+ vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+ ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
+ sw_if_index);
+ clib_mem_set_heap (oldheap);
+ }
+}
+
+void
+show_fa_sessions_hash(vlib_main_t * vm, u32 verbose)
+{
+ acl_main_t *am = &acl_main;
+ if (am->fa_sessions_hash_is_initialized) {
+ vlib_cli_output(vm, "\nSession lookup hash table:\n%U\n\n",
+ BV (format_bihash), &am->fa_sessions_hash, verbose);
+ } else {
+ vlib_cli_output(vm, "\nSession lookup hash table is not allocated.\n\n");
+ }
+}
+
+
+/* *INDENT-OFF* */
+
+VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = {
+ .function = acl_fa_worker_conn_cleaner_process,
+ .name = "acl-plugin-fa-worker-cleaner-process",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
+ .function = acl_fa_session_cleaner_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "acl-plugin-fa-cleaner-process",
+ .n_errors = ARRAY_LEN (acl_fa_cleaner_error_strings),
+ .error_strings = acl_fa_cleaner_error_strings,
+ .n_next_nodes = 0,
+ .next_nodes = {},
+};
+
+
+VLIB_REGISTER_NODE (acl_in_l2_ip6_node) =
+{
+ .function = acl_in_ip6_l2_node_fn,
+ .name = "acl-plugin-in-ip6-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VLIB_REGISTER_NODE (acl_in_l2_ip4_node) =
+{
+ .function = acl_in_ip4_l2_node_fn,
+ .name = "acl-plugin-in-ip4-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VLIB_REGISTER_NODE (acl_out_l2_ip6_node) =
+{
+ .function = acl_out_ip6_l2_node_fn,
+ .name = "acl-plugin-out-ip6-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VLIB_REGISTER_NODE (acl_out_l2_ip4_node) =
+{
+ .function = acl_out_ip4_l2_node_fn,
+ .name = "acl-plugin-out-ip4-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+
+VLIB_REGISTER_NODE (acl_in_fa_ip6_node) =
+{
+ .function = acl_in_ip6_fa_node_fn,
+ .name = "acl-plugin-in-ip6-fa",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VNET_FEATURE_INIT (acl_in_ip6_fa_feature, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "acl-plugin-in-ip6-fa",
+ .runs_before = VNET_FEATURES ("ip6-flow-classify"),
+};
+
+VLIB_REGISTER_NODE (acl_in_fa_ip4_node) =
+{
+ .function = acl_in_ip4_fa_node_fn,
+ .name = "acl-plugin-in-ip4-fa",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VNET_FEATURE_INIT (acl_in_ip4_fa_feature, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "acl-plugin-in-ip4-fa",
+ .runs_before = VNET_FEATURES ("ip4-flow-classify"),
+};
+
+
+VLIB_REGISTER_NODE (acl_out_fa_ip6_node) =
+{
+ .function = acl_out_ip6_fa_node_fn,
+ .name = "acl-plugin-out-ip6-fa",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VNET_FEATURE_INIT (acl_out_ip6_fa_feature, static) =
+{
+ .arc_name = "ip6-output",
+ .node_name = "acl-plugin-out-ip6-fa",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VLIB_REGISTER_NODE (acl_out_fa_ip4_node) =
+{
+ .function = acl_out_ip4_fa_node_fn,
+ .name = "acl-plugin-out-ip4-fa",
+ .vector_size = sizeof (u32),
+ .format_trace = format_acl_fa_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (acl_fa_error_strings),
+ .error_strings = acl_fa_error_strings,
+ .n_next_nodes = ACL_FA_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [ACL_FA_ERROR_DROP] = "error-drop",
+ }
+};
+
+VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "acl-plugin-out-ip4-fa",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+
+/* *INDENT-ON* */
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
new file mode 100644
index 00000000..fa9a2303
--- /dev/null
+++ b/src/plugins/acl/fa_node.h
@@ -0,0 +1,174 @@
+#ifndef _FA_NODE_H_
+#define _FA_NODE_H_
+
+#include <stddef.h>
+#include <vppinfra/bihash_40_8.h>
+
+#define TCP_FLAG_FIN 0x01
+#define TCP_FLAG_SYN 0x02
+#define TCP_FLAG_RST 0x04
+#define TCP_FLAG_PUSH 0x08
+#define TCP_FLAG_ACK 0x10
+#define TCP_FLAG_URG 0x20
+#define TCP_FLAG_ECE 0x40
+#define TCP_FLAG_CWR 0x80
+#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
+#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
+
+#define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1<<30)
+#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 1000000
+
+typedef union {
+ u64 as_u64;
+ struct {
+ u32 sw_if_index;
+ u16 mask_type_index_lsb;
+ u8 tcp_flags;
+ u8 tcp_flags_valid:1;
+ u8 is_input:1;
+ u8 l4_valid:1;
+ u8 is_nonfirst_fragment:1;
+ u8 is_ip6:1;
+ u8 flags_reserved:3;
+ };
+} fa_packet_info_t;
+
+typedef union {
+ u64 as_u64;
+ struct {
+ u16 port[2];
+ u16 proto;
+ u16 lsb_of_sw_if_index;
+ };
+} fa_session_l4_key_t;
+
+typedef union {
+ struct {
+ ip46_address_t addr[2];
+ fa_session_l4_key_t l4;
+ /* This field should align with u64 value in bihash_40_8 keyvalue struct */
+ fa_packet_info_t pkt;
+ };
+ clib_bihash_kv_40_8_t kv;
+} fa_5tuple_t;
+
+
+typedef struct {
+ fa_5tuple_t info; /* (5+1)*8 = 48 bytes */
+ u64 last_active_time; /* +8 bytes = 56 */
+ u32 sw_if_index; /* +4 bytes = 60 */
+ union {
+ u8 as_u8[2];
+ u16 as_u16;
+ } tcp_flags_seen; ; /* +2 bytes = 62 */
+ u16 thread_index; /* +2 bytes = 64 */
+ u64 link_enqueue_time; /* 8 byte = 8 */
+ u32 link_prev_idx; /* +4 bytes = 12 */
+ u32 link_next_idx; /* +4 bytes = 16 */
+ u8 link_list_id; /* +1 bytes = 17 */
+ u8 reserved1[7]; /* +7 bytes = 24 */
+ u64 reserved2[5]; /* +5*8 bytes = 64 */
+} fa_session_t;
+
+
+/* This structure is used to fill in the u64 value
+ in the per-sw-if-index hash table */
+typedef struct {
+ union {
+ u64 as_u64;
+ struct {
+ u32 session_index;
+ u16 thread_index;
+ u16 reserved0;
+ };
+ };
+} fa_full_session_id_t;
+
+/*
+ * A few compile-time constraints on the size and the layout of the union, to ensure
+ * it makes sense both for bihash and for us.
+ */
+
+#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
+CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value));
+CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64));
+CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64));
+CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t));
+
+/* Let's try to fit within two cachelines */
+CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);
+
+/* Session ID MUST be the same as u64 */
+CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));
+#undef CT_ASSERT_EQUAL
+
+typedef struct {
+ /* The pool of sessions managed by this worker */
+ fa_session_t *fa_sessions_pool;
+ /* per-worker ACL_N_TIMEOUTS of conn lists */
+ u32 *fa_conn_list_head;
+ u32 *fa_conn_list_tail;
+ /* adds and deletes per-worker-per-interface */
+ u64 *fa_session_dels_by_sw_if_index;
+ u64 *fa_session_adds_by_sw_if_index;
+ /* Vector of expired connections retrieved from lists */
+ u32 *expired;
+ /* the earliest next expiry time */
+ u64 next_expiry_time;
+ /* if not zero, look at all the elements until their enqueue timestamp is after below one */
+ u64 requeue_until_time;
+ /* Current time between the checks */
+ u64 current_time_wait_interval;
+ /* Counter of how many sessions we did delete */
+ u64 cnt_deleted_sessions;
+ /* Counter of already deleted sessions being deleted - should not increment unless a bug */
+ u64 cnt_already_deleted_sessions;
+ /* Number of times we requeued a session to a head of the list */
+ u64 cnt_session_timer_restarted;
+ /* swipe up to this enqueue time, rather than following the timeouts */
+ u64 swipe_end_time;
+ /* bitmap of sw_if_index serviced by this worker */
+ uword *serviced_sw_if_index_bitmap;
+ /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */
+ uword *pending_clear_sw_if_index_bitmap;
+ /* atomic, indicates that the swipe-deletion of connections is in progress */
+ u32 clear_in_process;
+ /* Interrupt is pending from main thread */
+ int interrupt_is_pending;
+ /*
+ * Interrupt node on the worker thread sets this if it knows there is
+ * more work to do, but it has to finish to avoid hogging the
+ * core for too long.
+ */
+ int interrupt_is_needed;
+ /*
+ * Set to indicate that the interrupt node wants to get less interrupts
+ * because there is not enough work for the current rate.
+ */
+ int interrupt_is_unwanted;
+ /*
+ * Set to copy of a "generation" counter in main thread so we can sync the interrupts.
+ */
+ int interrupt_generation;
+} acl_fa_per_worker_data_t;
+
+
+typedef enum {
+ ACL_FA_ERROR_DROP,
+ ACL_FA_N_NEXT,
+} acl_fa_next_t;
+
+
+enum
+{
+ ACL_FA_CLEANER_RESCHEDULE = 1,
+ ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
+} acl_fa_cleaner_process_event_e;
+
+void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable);
+
+void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose);
+
+
+#endif
diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c
new file mode 100644
index 00000000..7869027b
--- /dev/null
+++ b/src/plugins/acl/hash_lookup.c
@@ -0,0 +1,894 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stddef.h>
+#include <netinet/in.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/plugin/plugin.h>
+#include <acl/acl.h>
+#include <vppinfra/bihash_48_8.h>
+
+#include "hash_lookup.h"
+#include "hash_lookup_private.h"
+
+
+static inline applied_hash_ace_entry_t **get_applied_hash_aces(acl_main_t *am, int is_input, u32 sw_if_index)
+{
+ applied_hash_ace_entry_t **applied_hash_aces = is_input ? vec_elt_at_index(am->input_hash_entry_vec_by_sw_if_index, sw_if_index)
+ : vec_elt_at_index(am->output_hash_entry_vec_by_sw_if_index, sw_if_index);
+ return applied_hash_aces;
+}
+
+
+
+/*
+ * This returns true if there is indeed a match on the portranges.
+ * With all these levels of indirections, this is not going to be very fast,
+ * so, best use the individual ports or wildcard ports for performance.
+ */
+static int
+match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index)
+{
+
+ applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, match->pkt.is_input, match->pkt.sw_if_index);
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), index);
+
+ acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]);
+ DBG("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?",
+ r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last,
+ r->dst_port_or_code_first, match->l4.port[1], r->dst_port_or_code_last);
+
+ return ( ((r->src_port_or_type_first <= match->l4.port[0]) && r->src_port_or_type_last >= match->l4.port[0]) &&
+ ((r->dst_port_or_code_first <= match->l4.port[1]) && r->dst_port_or_code_last >= match->l4.port[1]) );
+}
+
+static u32
+multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match)
+{
+ clib_bihash_kv_48_8_t kv;
+ clib_bihash_kv_48_8_t result;
+ fa_5tuple_t *kv_key = (fa_5tuple_t *)kv.key;
+ hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value;
+ u64 *pmatch = (u64 *)match;
+ u64 *pmask;
+ u64 *pkey;
+ int mask_type_index;
+ u32 curr_match_index = ~0;
+
+ u32 sw_if_index = match->pkt.sw_if_index;
+ u8 is_input = match->pkt.is_input;
+ applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index);
+ applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index :
+ &am->output_applied_hash_acl_info_by_sw_if_index;
+
+ DBG("TRYING TO MATCH: %016llx %016llx %016llx %016llx %016llx %016llx",
+ pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]);
+
+ for(mask_type_index=0; mask_type_index < pool_len(am->ace_mask_type_pool); mask_type_index++) {
+ if (!clib_bitmap_get(vec_elt_at_index((*applied_hash_acls), sw_if_index)->mask_type_index_bitmap, mask_type_index)) {
+ /* This bit is not set. Avoid trying to match */
+ continue;
+ }
+ ace_mask_type_entry_t *mte = vec_elt_at_index(am->ace_mask_type_pool, mask_type_index);
+ pmatch = (u64 *)match;
+ pmask = (u64 *)&mte->mask;
+ pkey = (u64 *)kv.key;
+ /*
+ * unrolling the below loop results in a noticeable performance increase.
+ int i;
+ for(i=0; i<6; i++) {
+ kv.key[i] = pmatch[i] & pmask[i];
+ }
+ */
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+
+ kv_key->pkt.mask_type_index_lsb = mask_type_index;
+ DBG(" KEY %3d: %016llx %016llx %016llx %016llx %016llx %016llx", mask_type_index,
+ kv.key[0], kv.key[1], kv.key[2], kv.key[3], kv.key[4], kv.key[5]);
+ int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result);
+ if (res == 0) {
+ DBG("ACL-MATCH! result_val: %016llx", result_val->as_u64);
+ if (result_val->applied_entry_index < curr_match_index) {
+ if (PREDICT_FALSE(result_val->need_portrange_check)) {
+ /*
+ * This is going to be slow, since we can have multiple superset
+ * entries for narrow-ish portranges, e.g.:
+ * 0..42 100..400, 230..60000,
+ * so we need to walk linearly and check if they match.
+ */
+
+ u32 curr_index = result_val->applied_entry_index;
+ while ((curr_index != ~0) && !match_portranges(am, match, curr_index)) {
+ /* while no match and there are more entries, walk... */
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces),curr_index);
+ DBG("entry %d did not portmatch, advancing to %d", curr_index, pae->next_applied_entry_index);
+ curr_index = pae->next_applied_entry_index;
+ }
+ if (curr_index < curr_match_index) {
+ DBG("The index %d is the new candidate in portrange matches.", curr_index);
+ curr_match_index = curr_index;
+ } else {
+ DBG("Curr portmatch index %d is too big vs. current matched one %d", curr_index, curr_match_index);
+ }
+ } else {
+ /* The usual path is here. Found an entry in front of the current candiate - so it's a new one */
+ DBG("This match is the new candidate");
+ curr_match_index = result_val->applied_entry_index;
+ if (!result_val->shadowed) {
+ /* new result is known to not be shadowed, so no point to look up further */
+ break;
+ }
+ }
+ }
+ }
+ }
+ DBG("MATCH-RESULT: %d", curr_match_index);
+ return curr_match_index;
+}
+
+static void
+hashtable_add_del(acl_main_t *am, clib_bihash_kv_48_8_t *kv, int is_add)
+{
+ DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx %016llx add %d",
+ kv->key[0], kv->key[1], kv->key[2],
+ kv->key[3], kv->key[4], kv->key[5], kv->value, is_add);
+ BV (clib_bihash_add_del) (&am->acl_lookup_hash, kv, is_add);
+}
+
+static void
+fill_applied_hash_ace_kv(acl_main_t *am,
+ applied_hash_ace_entry_t **applied_hash_aces,
+ u32 sw_if_index, u8 is_input,
+ u32 new_index, clib_bihash_kv_48_8_t *kv)
+{
+ fa_5tuple_t *kv_key = (fa_5tuple_t *)kv->key;
+ hash_acl_lookup_value_t *kv_val = (hash_acl_lookup_value_t *)&kv->value;
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index);
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index);
+
+ memcpy(kv_key, &(vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->match), sizeof(*kv_key));
+ /* initialize the sw_if_index and direction */
+ kv_key->pkt.sw_if_index = sw_if_index;
+ kv_key->pkt.is_input = is_input;
+ kv_val->as_u64 = 0;
+ kv_val->applied_entry_index = new_index;
+ kv_val->need_portrange_check = vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->src_portrange_not_powerof2 ||
+ vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->dst_portrange_not_powerof2;
+ /* by default assume all values are shadowed -> check all mask types */
+ kv_val->shadowed = 1;
+}
+
+static void
+add_del_hashtable_entry(acl_main_t *am,
+ u32 sw_if_index, u8 is_input,
+ applied_hash_ace_entry_t **applied_hash_aces,
+ u32 index, int is_add)
+{
+ clib_bihash_kv_48_8_t kv;
+
+ fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, index, &kv);
+ hashtable_add_del(am, &kv, is_add);
+}
+
+
+
+static void
+activate_applied_ace_hash_entry(acl_main_t *am,
+ u32 sw_if_index, u8 is_input,
+ applied_hash_ace_entry_t **applied_hash_aces,
+ u32 new_index)
+{
+ clib_bihash_kv_48_8_t kv;
+ ASSERT(new_index != ~0);
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index);
+ DBG("activate_applied_ace_hash_entry sw_if_index %d is_input %d new_index %d", sw_if_index, is_input, new_index);
+
+ fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, new_index, &kv);
+
+ DBG("APPLY ADD KY: %016llx %016llx %016llx %016llx %016llx %016llx",
+ kv.key[0], kv.key[1], kv.key[2],
+ kv.key[3], kv.key[4], kv.key[5]);
+
+ clib_bihash_kv_48_8_t result;
+ hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value;
+ int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result);
+ ASSERT(new_index != ~0);
+ ASSERT(new_index < vec_len((*applied_hash_aces)));
+ if (res == 0) {
+ /* There already exists an entry or more. Append at the end. */
+ u32 first_index = result_val->applied_entry_index;
+ ASSERT(first_index != ~0);
+ DBG("A key already exists, with applied entry index: %d", first_index);
+ applied_hash_ace_entry_t *first_pae = vec_elt_at_index((*applied_hash_aces), first_index);
+ u32 last_index = first_pae->tail_applied_entry_index;
+ ASSERT(last_index != ~0);
+ applied_hash_ace_entry_t *last_pae = vec_elt_at_index((*applied_hash_aces), last_index);
+ DBG("...advance to chained entry index: %d", last_index);
+ /* link ourseves in */
+ last_pae->next_applied_entry_index = new_index;
+ pae->prev_applied_entry_index = last_index;
+ /* adjust the pointer to the new tail */
+ first_pae->tail_applied_entry_index = new_index;
+ } else {
+ /* It's the very first entry */
+ hashtable_add_del(am, &kv, 1);
+ ASSERT(new_index != ~0);
+ pae->tail_applied_entry_index = new_index;
+ }
+}
+
+static void
+applied_hash_entries_analyze(acl_main_t *am, applied_hash_ace_entry_t **applied_hash_aces)
+{
+ /*
+ * Go over the rules and check which ones are shadowed and which aren't.
+ * Naive approach: try to match the match value from every ACE as if it
+ * was a live packet, and see if the resulting match happens earlier in the list.
+ * if it does not match or it is later in the ACL - then the entry is not shadowed.
+ *
+ * This approach fails, an example:
+ * deny tcp 2001:db8::/32 2001:db8::/32
+ * permit ip 2001:db8::1/128 2001:db8::2/128
+ */
+}
+
+static void *
+hash_acl_set_heap(acl_main_t *am)
+{
+ if (0 == am->hash_lookup_mheap) {
+ am->hash_lookup_mheap = mheap_alloc (0 /* use VM */ , am->hash_lookup_mheap_size);
+ mheap_t *h = mheap_header (am->hash_lookup_mheap);
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+ }
+ void *oldheap = clib_mem_set_heap(am->hash_lookup_mheap);
+ return oldheap;
+}
+
+void
+acl_plugin_hash_acl_set_validate_heap(acl_main_t *am, int on)
+{
+ clib_mem_set_heap(hash_acl_set_heap(am));
+ mheap_t *h = mheap_header (am->hash_lookup_mheap);
+ if (on) {
+ h->flags |= MHEAP_FLAG_VALIDATE;
+ h->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ mheap_validate(h);
+ } else {
+ h->flags &= ~MHEAP_FLAG_VALIDATE;
+ h->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ }
+}
+
+void
+acl_plugin_hash_acl_set_trace_heap(acl_main_t *am, int on)
+{
+ clib_mem_set_heap(hash_acl_set_heap(am));
+ mheap_t *h = mheap_header (am->hash_lookup_mheap);
+ if (on) {
+ h->flags |= MHEAP_FLAG_TRACE;
+ } else {
+ h->flags &= ~MHEAP_FLAG_TRACE;
+ }
+}
+
+void
+hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index)
+{
+ int i;
+
+ DBG0("HASH ACL apply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index);
+ if (!am->acl_lookup_hash_initialized) {
+ BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash",
+ am->hash_lookup_hash_buckets, am->hash_lookup_hash_memory);
+ am->acl_lookup_hash_initialized = 1;
+ }
+
+ void *oldheap = hash_acl_set_heap(am);
+ if (is_input) {
+ vec_validate(am->input_hash_entry_vec_by_sw_if_index, sw_if_index);
+ } else {
+ vec_validate(am->output_hash_entry_vec_by_sw_if_index, sw_if_index);
+ }
+ vec_validate(am->hash_acl_infos, acl_index);
+ applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index);
+
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
+ u32 **hash_acl_applied_sw_if_index = is_input ? &ha->inbound_sw_if_index_list
+ : &ha->outbound_sw_if_index_list;
+
+ int base_offset = vec_len(*applied_hash_aces);
+
+ /* Update the bitmap of the mask types with which the lookup
+ needs to happen for the ACLs applied to this sw_if_index */
+ applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index :
+ &am->output_applied_hash_acl_info_by_sw_if_index;
+ vec_validate((*applied_hash_acls), sw_if_index);
+ applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index);
+
+ /* ensure the list of applied hash acls is initialized and add this acl# to it */
+ u32 index = vec_search(pal->applied_acls, acl_index);
+ if (index != ~0) {
+ clib_warning("BUG: trying to apply twice acl_index %d on sw_if_index %d is_input %d",
+ acl_index, sw_if_index, is_input);
+ goto done;
+ }
+ vec_add1(pal->applied_acls, acl_index);
+ u32 index2 = vec_search((*hash_acl_applied_sw_if_index), sw_if_index);
+ if (index2 != ~0) {
+ clib_warning("BUG: trying to apply twice acl_index %d on (sw_if_index %d) is_input %d",
+ acl_index, sw_if_index, is_input);
+ goto done;
+ }
+ vec_add1((*hash_acl_applied_sw_if_index), sw_if_index);
+
+ pal->mask_type_index_bitmap = clib_bitmap_or(pal->mask_type_index_bitmap,
+ ha->mask_type_index_bitmap);
+ /*
+ * if the applied ACL is empty, the current code will cause a
+ * different behavior compared to current linear search: an empty ACL will
+ * simply fallthrough to the next ACL, or the default deny in the end.
+ *
+ * This is not a problem, because after vpp-dev discussion,
+ * the consensus was it should not be possible to apply the non-existent
+ * ACL, so the change adding this code also takes care of that.
+ */
+
+ /* expand the applied aces vector by the necessary amount */
+ vec_resize((*applied_hash_aces), vec_len(ha->rules));
+
+ /* add the rules from the ACL to the hash table for lookup and append to the vector*/
+ for(i=0; i < vec_len(ha->rules); i++) {
+ u32 new_index = base_offset + i;
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index);
+ pae->acl_index = acl_index;
+ pae->ace_index = ha->rules[i].ace_index;
+ pae->action = ha->rules[i].action;
+ pae->hitcount = 0;
+ pae->hash_ace_info_index = i;
+ /* we might link it in later */
+ pae->next_applied_entry_index = ~0;
+ pae->prev_applied_entry_index = ~0;
+ pae->tail_applied_entry_index = ~0;
+ activate_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, new_index);
+ }
+ applied_hash_entries_analyze(am, applied_hash_aces);
+done:
+ clib_mem_set_heap (oldheap);
+}
+
+static u32
+find_head_applied_ace_index(applied_hash_ace_entry_t **applied_hash_aces, u32 curr_index)
+{
+ /*
+ * find back the first entry. Inefficient so might need to be a bit cleverer
+ * if this proves to be a problem..
+ */
+ u32 an_index = curr_index;
+ ASSERT(an_index != ~0);
+ applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), an_index);
+ while(head_pae->prev_applied_entry_index != ~0) {
+ an_index = head_pae->prev_applied_entry_index;
+ ASSERT(an_index != ~0);
+ head_pae = vec_elt_at_index((*applied_hash_aces), an_index);
+ }
+ return an_index;
+}
+
+static void
+move_applied_ace_hash_entry(acl_main_t *am,
+ u32 sw_if_index, u8 is_input,
+ applied_hash_ace_entry_t **applied_hash_aces,
+ u32 old_index, u32 new_index)
+{
+ ASSERT(old_index != ~0);
+ ASSERT(new_index != ~0);
+ /* move the entry */
+ *vec_elt_at_index((*applied_hash_aces), new_index) = *vec_elt_at_index((*applied_hash_aces), old_index);
+
+ /* update the linkage and hash table if necessary */
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index);
+
+ if (pae->prev_applied_entry_index != ~0) {
+ applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index);
+ ASSERT(prev_pae->next_applied_entry_index == old_index);
+ prev_pae->next_applied_entry_index = new_index;
+ } else {
+ /* first entry - so the hash points to it, update */
+ add_del_hashtable_entry(am, sw_if_index, is_input,
+ applied_hash_aces, new_index, 1);
+ ASSERT(pae->tail_applied_entry_index != ~0);
+ }
+ if (pae->next_applied_entry_index != ~0) {
+ applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index);
+ ASSERT(next_pae->prev_applied_entry_index == old_index);
+ next_pae->prev_applied_entry_index = new_index;
+ } else {
+ /*
+ * Moving the very last entry, so we need to update the tail pointer in the first one.
+ */
+ u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index);
+ ASSERT(head_index != ~0);
+ applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index);
+
+ ASSERT(head_pae->tail_applied_entry_index == old_index);
+ head_pae->tail_applied_entry_index = new_index;
+ }
+ /* invalidate the old entry */
+ pae->prev_applied_entry_index = ~0;
+ pae->next_applied_entry_index = ~0;
+ pae->tail_applied_entry_index = ~0;
+}
+
+static void
+deactivate_applied_ace_hash_entry(acl_main_t *am,
+ u32 sw_if_index, u8 is_input,
+ applied_hash_ace_entry_t **applied_hash_aces,
+ u32 old_index)
+{
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index);
+ DBG("UNAPPLY DEACTIVATE: sw_if_index %d is_input %d, applied index %d", sw_if_index, is_input, old_index);
+
+ if (pae->prev_applied_entry_index != ~0) {
+ DBG("UNAPPLY = index %d has prev_applied_entry_index %d", old_index, pae->prev_applied_entry_index);
+ applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index);
+ ASSERT(prev_pae->next_applied_entry_index == old_index);
+ prev_pae->next_applied_entry_index = pae->next_applied_entry_index;
+ if (pae->next_applied_entry_index == ~0) {
+ /* it was a last entry we removed, update the pointer on the first one */
+ u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index);
+ DBG("UNAPPLY = index %d head index to update %d", old_index, head_index);
+ ASSERT(head_index != ~0);
+ applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index);
+
+ ASSERT(head_pae->tail_applied_entry_index == old_index);
+ head_pae->tail_applied_entry_index = pae->prev_applied_entry_index;
+ } else {
+ applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index);
+ next_pae->prev_applied_entry_index = pae->prev_applied_entry_index;
+ }
+ } else {
+ /* It was the first entry. We need either to reset the hash entry or delete it */
+ if (pae->next_applied_entry_index != ~0) {
+ /* the next element becomes the new first one, so needs the tail pointer to be set */
+ applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index);
+ ASSERT(pae->tail_applied_entry_index != ~0);
+ next_pae->tail_applied_entry_index = pae->tail_applied_entry_index;
+ DBG("Resetting the hash table entry from %d to %d, setting tail index to %d", old_index, pae->next_applied_entry_index, pae->tail_applied_entry_index);
+ /* unlink from the next element */
+ next_pae->prev_applied_entry_index = ~0;
+ add_del_hashtable_entry(am, sw_if_index, is_input,
+ applied_hash_aces, pae->next_applied_entry_index, 1);
+ } else {
+ /* no next entry, so just delete the entry in the hash table */
+ add_del_hashtable_entry(am, sw_if_index, is_input,
+ applied_hash_aces, old_index, 0);
+ }
+ }
+ /* invalidate the old entry */
+ pae->prev_applied_entry_index = ~0;
+ pae->next_applied_entry_index = ~0;
+ pae->tail_applied_entry_index = ~0;
+}
+
+
+static void
+hash_acl_build_applied_lookup_bitmap(acl_main_t *am, u32 sw_if_index, u8 is_input)
+{
+ int i;
+ uword *new_lookup_bitmap = 0;
+ applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index
+ : &am->output_applied_hash_acl_info_by_sw_if_index;
+ applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index);
+ for(i=0; i < vec_len(pal->applied_acls); i++) {
+ u32 a_acl_index = *vec_elt_at_index((pal->applied_acls), i);
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, a_acl_index);
+ DBG("Update bitmask = %U or %U (acl_index %d)\n", format_bitmap_hex, new_lookup_bitmap,
+ format_bitmap_hex, ha->mask_type_index_bitmap, a_acl_index);
+ new_lookup_bitmap = clib_bitmap_or(new_lookup_bitmap,
+ ha->mask_type_index_bitmap);
+ }
+ uword *old_lookup_bitmap = pal->mask_type_index_bitmap;
+ pal->mask_type_index_bitmap = new_lookup_bitmap;
+ clib_bitmap_free(old_lookup_bitmap);
+}
+
+void
+hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index)
+{
+ int i;
+
+ DBG0("HASH ACL unapply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index);
+ applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index
+ : &am->output_applied_hash_acl_info_by_sw_if_index;
+ applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index);
+
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
+ u32 **hash_acl_applied_sw_if_index = is_input ? &ha->inbound_sw_if_index_list
+ : &ha->outbound_sw_if_index_list;
+
+ /* remove this acl# from the list of applied hash acls */
+ u32 index = vec_search(pal->applied_acls, acl_index);
+ if (index == ~0) {
+ clib_warning("BUG: trying to unapply unapplied acl_index %d on sw_if_index %d is_input %d",
+ acl_index, sw_if_index, is_input);
+ return;
+ }
+ vec_del1(pal->applied_acls, index);
+
+ u32 index2 = vec_search((*hash_acl_applied_sw_if_index), sw_if_index);
+ if (index2 == ~0) {
+ clib_warning("BUG: trying to unapply twice acl_index %d on (sw_if_index %d) is_input %d",
+ acl_index, sw_if_index, is_input);
+ return;
+ }
+ vec_del1((*hash_acl_applied_sw_if_index), index2);
+
+ applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index);
+
+ for(i=0; i < vec_len((*applied_hash_aces)); i++) {
+ if (vec_elt_at_index(*applied_hash_aces,i)->acl_index == acl_index) {
+ DBG("Found applied ACL#%d at applied index %d", acl_index, i);
+ break;
+ }
+ }
+ if (vec_len((*applied_hash_aces)) <= i) {
+ DBG("Did not find applied ACL#%d at sw_if_index %d", acl_index, sw_if_index);
+ /* we went all the way without finding any entries. Probably a list was empty. */
+ return;
+ }
+
+ void *oldheap = hash_acl_set_heap(am);
+ int base_offset = i;
+ int tail_offset = base_offset + vec_len(ha->rules);
+ int tail_len = vec_len((*applied_hash_aces)) - tail_offset;
+ DBG("base_offset: %d, tail_offset: %d, tail_len: %d", base_offset, tail_offset, tail_len);
+
+ for(i=0; i < vec_len(ha->rules); i ++) {
+ deactivate_applied_ace_hash_entry(am, sw_if_index, is_input,
+ applied_hash_aces, base_offset + i);
+ }
+ for(i=0; i < tail_len; i ++) {
+ /* move the entry at tail offset to base offset */
+ /* that is, from (tail_offset+i) -> (base_offset+i) */
+ DBG("UNAPPLY MOVE: sw_if_index %d is_input %d, applied index %d ->", sw_if_index, is_input, tail_offset+i, base_offset + i);
+ move_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, tail_offset + i, base_offset + i);
+ }
+ /* trim the end of the vector */
+ _vec_len((*applied_hash_aces)) -= vec_len(ha->rules);
+
+ applied_hash_entries_analyze(am, applied_hash_aces);
+
+ /* After deletion we might not need some of the mask-types anymore... */
+ hash_acl_build_applied_lookup_bitmap(am, sw_if_index, is_input);
+ clib_mem_set_heap (oldheap);
+}
+
+/*
+ * Create the applied ACEs and update the hash table,
+ * taking into account that the ACL may not be the last
+ * in the vector of applied ACLs.
+ *
+ * For now, walk from the end of the vector and unapply the ACLs,
+ * then apply the one in question and reapply the rest.
+ */
+
+void
+hash_acl_reapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index)
+{
+ u32 **applied_acls = is_input ? vec_elt_at_index(am->input_acl_vec_by_sw_if_index, sw_if_index)
+ : vec_elt_at_index(am->output_acl_vec_by_sw_if_index, sw_if_index);
+ int i;
+ int start_index = vec_search((*applied_acls), acl_index);
+ /*
+ * This function is called after we find out the sw_if_index where ACL is applied.
+ * If the by-sw_if_index vector does not have the ACL#, then it's a bug.
+ */
+ ASSERT(start_index < vec_len(*applied_acls));
+
+ /* unapply all the ACLs till the current one */
+ for(i = vec_len(*applied_acls) - 1; i > start_index; i--) {
+ hash_acl_unapply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i));
+ }
+ for(i = start_index; i < vec_len(*applied_acls); i++) {
+ hash_acl_apply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i));
+ }
+}
+
+static void
+make_address_mask(ip46_address_t *addr, u8 is_ipv6, u8 prefix_len)
+{
+ if (is_ipv6) {
+ ip6_address_mask_from_width(&addr->ip6, prefix_len);
+ } else {
+ /* FIXME: this may not be correct way */
+ ip6_address_mask_from_width(&addr->ip6, prefix_len + 3*32);
+ ip46_address_mask_ip4(addr);
+ }
+}
+
+static u8
+make_port_mask(u16 *portmask, u16 port_first, u16 port_last)
+{
+ if (port_first == port_last) {
+ *portmask = 0xffff;
+ /* single port is representable by masked value */
+ return 0;
+ }
+ if ((port_first == 0) && (port_last == 65535)) {
+ *portmask = 0;
+ /* wildcard port is representable by a masked value */
+ return 0;
+ }
+
+ /*
+ * For now match all the ports, later
+ * here might be a better optimization which would
+ * pick out bitmaskable portranges.
+ *
+ * However, adding a new mask type potentially
+ * adds a per-packet extra lookup, so the benefit is not clear.
+ */
+ *portmask = 0;
+ /* This port range can't be represented via bitmask exactly. */
+ return 1;
+}
+
+static void
+make_mask_and_match_from_rule(fa_5tuple_t *mask, acl_rule_t *r, hash_ace_info_t *hi, int match_nonfirst_fragment)
+{
+ memset(mask, 0, sizeof(*mask));
+ memset(&hi->match, 0, sizeof(hi->match));
+ hi->action = r->is_permit;
+
+ /* we will need to be matching based on sw_if_index, direction, and mask_type_index when applied */
+ mask->pkt.sw_if_index = ~0;
+ mask->pkt.is_input = 1;
+ /* we will assign the match of mask_type_index later when we find it*/
+ mask->pkt.mask_type_index_lsb = ~0;
+
+ mask->pkt.is_ip6 = 1;
+ hi->match.pkt.is_ip6 = r->is_ipv6;
+
+ make_address_mask(&mask->addr[0], r->is_ipv6, r->src_prefixlen);
+ hi->match.addr[0] = r->src;
+ make_address_mask(&mask->addr[1], r->is_ipv6, r->dst_prefixlen);
+ hi->match.addr[1] = r->dst;
+
+ if (r->proto != 0) {
+ mask->l4.proto = ~0; /* L4 proto needs to be matched */
+ hi->match.l4.proto = r->proto;
+ if (match_nonfirst_fragment) {
+ /* match the non-first fragments only */
+ mask->pkt.is_nonfirst_fragment = 1;
+ hi->match.pkt.is_nonfirst_fragment = 1;
+ } else {
+ /* Calculate the src/dst port masks and make the src/dst port matches accordingly */
+ hi->src_portrange_not_powerof2 = make_port_mask(&mask->l4.port[0], r->src_port_or_type_first, r->src_port_or_type_last);
+ hi->match.l4.port[0] = r->src_port_or_type_first & mask->l4.port[0];
+ hi->dst_portrange_not_powerof2 = make_port_mask(&mask->l4.port[1], r->dst_port_or_code_first, r->dst_port_or_code_last);
+ hi->match.l4.port[1] = r->dst_port_or_code_first & mask->l4.port[1];
+ /* L4 info must be valid in order to match */
+ mask->pkt.l4_valid = 1;
+ hi->match.pkt.l4_valid = 1;
+ /* And we must set the mask to check that it is an initial fragment */
+ mask->pkt.is_nonfirst_fragment = 1;
+ hi->match.pkt.is_nonfirst_fragment = 0;
+ if ((r->proto == IPPROTO_TCP) && (r->tcp_flags_mask != 0)) {
+ /* if we want to match on TCP flags, they must be masked off as well */
+ mask->pkt.tcp_flags = r->tcp_flags_mask;
+ hi->match.pkt.tcp_flags = r->tcp_flags_value;
+ /* and the flags need to be present within the packet being matched */
+ mask->pkt.tcp_flags_valid = 1;
+ hi->match.pkt.tcp_flags_valid = 1;
+ }
+ }
+ }
+ /* Sanitize the mask and the match */
+ u64 *pmask = (u64 *)mask;
+ u64 *pmatch = (u64 *)&hi->match;
+ int j;
+ for(j=0; j<6; j++) {
+ pmatch[j] = pmatch[j] & pmask[j];
+ }
+}
+
+static u32
+find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
+{
+ ace_mask_type_entry_t *mte;
+ /* *INDENT-OFF* */
+ pool_foreach(mte, am->ace_mask_type_pool,
+ ({
+ if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0)
+ return (mte - am->ace_mask_type_pool);
+ }));
+ /* *INDENT-ON* */
+ return ~0;
+}
+
+static u32
+assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
+{
+ u32 mask_type_index = find_mask_type_index(am, mask);
+ ace_mask_type_entry_t *mte;
+ if(~0 == mask_type_index) {
+ pool_get_aligned (am->ace_mask_type_pool, mte, CLIB_CACHE_LINE_BYTES);
+ mask_type_index = mte - am->ace_mask_type_pool;
+ clib_memcpy(&mte->mask, mask, sizeof(mte->mask));
+ mte->refcount = 0;
+ /*
+ * We can use only 16 bits, since in the match there is only u16 field.
+ * Realistically, once you go to 64K of mask types, it is a huge
+ * problem anyway, so we might as well stop half way.
+ */
+ ASSERT(mask_type_index < 32768);
+ }
+ mte = am->ace_mask_type_pool + mask_type_index;
+ mte->refcount++;
+ return mask_type_index;
+}
+
+static void
+release_mask_type_index(acl_main_t *am, u32 mask_type_index)
+{
+ ace_mask_type_entry_t *mte = pool_elt_at_index(am->ace_mask_type_pool, mask_type_index);
+ mte->refcount--;
+ if (mte->refcount == 0) {
+ /* we are not using this entry anymore */
+ pool_put(am->ace_mask_type_pool, mte);
+ }
+}
+
+void hash_acl_add(acl_main_t *am, int acl_index)
+{
+ void *oldheap = hash_acl_set_heap(am);
+ DBG("HASH ACL add : %d", acl_index);
+ int i;
+ acl_list_t *a = &am->acls[acl_index];
+ vec_validate(am->hash_acl_infos, acl_index);
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
+ memset(ha, 0, sizeof(*ha));
+
+ /* walk the newly added ACL entries and ensure that for each of them there
+ is a mask type, increment a reference count for that mask type */
+ for(i=0; i < a->count; i++) {
+ hash_ace_info_t ace_info;
+ fa_5tuple_t mask;
+ memset(&ace_info, 0, sizeof(ace_info));
+ ace_info.acl_index = acl_index;
+ ace_info.ace_index = i;
+
+ make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 0);
+ ace_info.mask_type_index = assign_mask_type_index(am, &mask);
+ /* assign the mask type index for matching itself */
+ ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index;
+ DBG("ACE: %d mask_type_index: %d", i, ace_info.mask_type_index);
+ /* Ensure a given index is set in the mask type index bitmap for this ACL */
+ ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1);
+ vec_add1(ha->rules, ace_info);
+ if (am->l4_match_nonfirst_fragment) {
+ /* add the second rule which matches the noninitial fragments with the respective mask */
+ make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 1);
+ ace_info.mask_type_index = assign_mask_type_index(am, &mask);
+ ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index;
+ DBG("ACE: %d (non-initial frags) mask_type_index: %d", i, ace_info.mask_type_index);
+ /* Ensure a given index is set in the mask type index bitmap for this ACL */
+ ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1);
+ vec_add1(ha->rules, ace_info);
+ }
+ }
+ /*
+ * if an ACL is applied somewhere, fill the corresponding lookup data structures.
+ * We need to take care if the ACL is not the last one in the vector of ACLs applied to the interface.
+ */
+ if (acl_index < vec_len(am->input_sw_if_index_vec_by_acl)) {
+ u32 *sw_if_index;
+ vec_foreach(sw_if_index, am->input_sw_if_index_vec_by_acl[acl_index]) {
+ hash_acl_reapply(am, *sw_if_index, 1, acl_index);
+ }
+ }
+ if (acl_index < vec_len(am->output_sw_if_index_vec_by_acl)) {
+ u32 *sw_if_index;
+ vec_foreach(sw_if_index, am->output_sw_if_index_vec_by_acl[acl_index]) {
+ hash_acl_reapply(am, *sw_if_index, 0, acl_index);
+ }
+ }
+ clib_mem_set_heap (oldheap);
+}
+
+void hash_acl_delete(acl_main_t *am, int acl_index)
+{
+ void *oldheap = hash_acl_set_heap(am);
+ DBG0("HASH ACL delete : %d", acl_index);
+ /*
+ * If the ACL is applied somewhere, remove the references of it (call hash_acl_unapply)
+ * this is a different behavior from the linear lookup where an empty ACL is "deny all",
+ *
+ * However, following vpp-dev discussion the ACL that is referenced elsewhere
+ * should not be possible to delete, and the change adding this also adds
+ * the safeguards to that respect, so this is not a problem.
+ *
+ * The part to rememeber is that this routine is called in process of reapplication
+ * during the acl_add_replace() API call - the old acl ruleset is deleted, then
+ * the new one is added, without the change in the applied ACLs - so this case
+ * has to be handled.
+ */
+ hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
+ u32 *interface_list_copy = 0;
+ {
+ u32 *sw_if_index;
+ interface_list_copy = vec_dup(ha->inbound_sw_if_index_list);
+ vec_foreach(sw_if_index, interface_list_copy) {
+ hash_acl_unapply(am, *sw_if_index, 1, acl_index);
+ }
+ vec_free(interface_list_copy);
+ interface_list_copy = vec_dup(ha->outbound_sw_if_index_list);
+ vec_foreach(sw_if_index, interface_list_copy) {
+ hash_acl_unapply(am, *sw_if_index, 0, acl_index);
+ }
+ }
+
+ /* walk the mask types for the ACL about-to-be-deleted, and decrease
+ * the reference count, possibly freeing up some of them */
+ int i;
+ for(i=0; i < vec_len(ha->rules); i++) {
+ release_mask_type_index(am, ha->rules[i].mask_type_index);
+ }
+ clib_bitmap_free(ha->mask_type_index_bitmap);
+ vec_free(ha->rules);
+ clib_mem_set_heap (oldheap);
+}
+
+u8
+hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+{
+ acl_main_t *am = &acl_main;
+ applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index);
+ u32 match_index = multi_acl_match_get_applied_ace_index(am, pkt_5tuple);
+ if (match_index < vec_len((*applied_hash_aces))) {
+ applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), match_index);
+ pae->hitcount++;
+ *acl_match_p = pae->acl_index;
+ *rule_match_p = pae->ace_index;
+ return pae->action;
+ }
+ return 0;
+}
+
+
+void
+show_hash_acl_hash (vlib_main_t * vm, acl_main_t *am, u32 verbose)
+{
+ vlib_cli_output(vm, "\nACL lookup hash table:\n%U\n",
+ BV (format_bihash), &am->acl_lookup_hash, verbose);
+}
diff --git a/src/plugins/acl/hash_lookup.h b/src/plugins/acl/hash_lookup.h
new file mode 100644
index 00000000..2d7058e8
--- /dev/null
+++ b/src/plugins/acl/hash_lookup.h
@@ -0,0 +1,64 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _ACL_HASH_LOOKUP_H_
+#define _ACL_HASH_LOOKUP_H_
+
+#include <stddef.h>
+#include "acl.h"
+
+/*
+ * Do the necessary to logically apply the ACL to the existing vector of ACLs looked up
+ * during the packet processing
+ */
+
+void hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index);
+
+/* Remove the ACL from the packet processing lookups on a given interface */
+
+void hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index);
+
+/*
+ * Add an ACL or delete an ACL. ACL may already have been referenced elsewhere,
+ * so potentially we also need to do the work to enable the lookups.
+ */
+
+void hash_acl_add(acl_main_t *am, int acl_index);
+void hash_acl_delete(acl_main_t *am, int acl_index);
+
+/*
+ * Do the work required to match a given 5-tuple from the packet,
+ * and return the action as well as populate the values pointed
+ * to by the *_match_p pointers and maybe trace_bitmap.
+ */
+
+u8
+hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap);
+
+
+/*
+ * The debug function to show the contents of the ACL lookup hash
+ */
+void show_hash_acl_hash(vlib_main_t * vm, acl_main_t *am, u32 verbose);
+
+/* Debug functions to turn validate/trace on and off */
+void acl_plugin_hash_acl_set_validate_heap(acl_main_t *am, int on);
+void acl_plugin_hash_acl_set_trace_heap(acl_main_t *am, int on);
+
+#endif
diff --git a/src/plugins/acl/hash_lookup_private.h b/src/plugins/acl/hash_lookup_private.h
new file mode 100644
index 00000000..bc621416
--- /dev/null
+++ b/src/plugins/acl/hash_lookup_private.h
@@ -0,0 +1,33 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define ACL_HASH_LOOKUP_DEBUG 0
+
+#if ACL_HASH_LOOKUP_DEBUG == 1
+#define DBG0(...) clib_warning(__VA_ARGS__)
+#define DBG(...)
+#define DBG_UNIX_LOG(...)
+#elif ACL_HASH_LOOKUP_DEBUG == 2
+#define DBG0(...) clib_warning(__VA_ARGS__)
+#define DBG(...) clib_warning(__VA_ARGS__)
+#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__)
+#else
+#define DBG0(...)
+#define DBG(...)
+#define DBG_UNIX_LOG(...)
+#endif
+
diff --git a/src/plugins/acl/hash_lookup_types.h b/src/plugins/acl/hash_lookup_types.h
new file mode 100644
index 00000000..1fa197ec
--- /dev/null
+++ b/src/plugins/acl/hash_lookup_types.h
@@ -0,0 +1,107 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _ACL_HASH_LOOKUP_TYPES_H_
+#define _ACL_HASH_LOOKUP_TYPES_H_
+
+/* The structure representing the single entry with hash representation */
+typedef struct {
+ /* these two entries refer to the original ACL# and rule# within that ACL */
+ u32 acl_index;
+ u32 ace_index;
+
+ u32 mask_type_index;
+ u8 src_portrange_not_powerof2;
+ u8 dst_portrange_not_powerof2;
+
+ fa_5tuple_t match;
+ u8 action;
+} hash_ace_info_t;
+
+/*
+ * The structure holding the information necessary for the hash-based ACL operation
+ */
+typedef struct {
+ /* The mask types present in this ACL */
+ uword *mask_type_index_bitmap;
+ /* hash ACL applied on these interfaces */
+ u32 *inbound_sw_if_index_list;
+ u32 *outbound_sw_if_index_list;
+ hash_ace_info_t *rules;
+} hash_acl_info_t;
+
+typedef struct {
+ /* original non-compiled ACL */
+ u32 acl_index;
+ u32 ace_index;
+ /* the index of the hash_ace_info_t */
+ u32 hash_ace_info_index;
+ /*
+ * in case of the same key having multiple entries,
+ * this holds the index of the next entry.
+ */
+ u32 next_applied_entry_index;
+ /*
+ * previous entry in the list of the chained ones,
+ * if ~0 then this is entry in the hash.
+ */
+ u32 prev_applied_entry_index;
+ /*
+ * chain tail, if this is the first entry
+ */
+ u32 tail_applied_entry_index;
+ /*
+ * number of hits on this entry
+ */
+ u64 hitcount;
+ /*
+ * Action of this applied ACE
+ */
+ u8 action;
+} applied_hash_ace_entry_t;
+
+typedef struct {
+ /*
+ * A logical OR of all the applied_ace_hash_entry_t=>
+ * hash_ace_info_t=>mask_type_index bits set
+ */
+ uword *mask_type_index_bitmap;
+ /* applied ACLs so we can track them independently from main ACL module */
+ u32 *applied_acls;
+} applied_hash_acl_info_t;
+
+
+typedef union {
+ u64 as_u64;
+ struct {
+ u32 applied_entry_index;
+ u16 reserved_u16;
+ u8 reserved_u8;
+ /* means there is some other entry in front intersecting with this one */
+ u8 shadowed:1;
+ u8 need_portrange_check:1;
+ u8 reserved_flags:6;
+ };
+} hash_acl_lookup_value_t;
+
+#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
+
+CT_ASSERT_EQUAL(hash_acl_lookup_value_t_is_u64, sizeof(hash_acl_lookup_value_t), sizeof(u64));
+
+#undef CT_ASSERT_EQUAL
+
+#endif
diff --git a/src/plugins/acl/manual_fns.h b/src/plugins/acl/manual_fns.h
new file mode 100644
index 00000000..e00f1abc
--- /dev/null
+++ b/src/plugins/acl/manual_fns.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_manual_fns_h
+#define included_manual_fns_h
+
+#include <vnet/ip/format.h>
+#include <vnet/ethernet/ethernet.h>
+
+/* Macro to finish up custom dump fns */
+#define PRINT_S \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s);
+
+static inline void
+vl_api_acl_rule_t_array_endian(vl_api_acl_rule_t *rules, u32 count)
+{
+ u32 i;
+ for(i=0; i<count; i++) {
+ vl_api_acl_rule_t_endian (&rules[i]);
+ }
+}
+
+static inline void
+vl_api_macip_acl_rule_t_array_endian(vl_api_macip_acl_rule_t *rules, u32 count)
+{
+ u32 i;
+ for(i=0; i<count; i++) {
+ vl_api_macip_acl_rule_t_endian (&rules[i]);
+ }
+}
+
+static inline void
+vl_api_acl_details_t_endian (vl_api_acl_details_t * a)
+{
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->acl_index = clib_net_to_host_u32 (a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32 (a->count);
+ vl_api_acl_rule_t_array_endian (a->r, a->count);
+}
+
+static inline void
+vl_api_macip_acl_details_t_endian (vl_api_macip_acl_details_t * a)
+{
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->acl_index = clib_net_to_host_u32 (a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32 (a->count);
+ vl_api_macip_acl_rule_t_array_endian (a->r, a->count);
+}
+
+
+static inline void
+vl_api_acl_add_replace_t_endian (vl_api_acl_add_replace_t * a)
+{
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32 (a->client_index);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->acl_index = clib_net_to_host_u32 (a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32 (a->count);
+ vl_api_acl_rule_t_array_endian (a->r, a->count);
+}
+
+static inline void
+vl_api_macip_acl_add_t_endian (vl_api_macip_acl_add_t * a)
+{
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32 (a->client_index);
+ a->context = clib_net_to_host_u32 (a->context);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32 (a->count);
+ vl_api_macip_acl_rule_t_array_endian (a->r, a->count);
+}
+
+static inline void
+vl_api_macip_acl_add_replace_t_endian (vl_api_macip_acl_add_replace_t * a)
+{
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32 (a->client_index);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->acl_index = clib_net_to_host_u32 (a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32 (a->count);
+ vl_api_macip_acl_rule_t_array_endian (a->r, a->count);
+}
+
+static inline u8 *
+format_acl_action(u8 *s, u8 action)
+{
+ switch(action) {
+ case 0:
+ s = format (s, "deny");
+ break;
+ case 1:
+ s = format (s, "permit");
+ break;
+ case 2:
+ s = format (s, "permit+reflect");
+ break;
+ default:
+ s = format (s, "action %d", action);
+ }
+ return(s);
+}
+
+static inline void *
+vl_api_acl_rule_t_print (vl_api_acl_rule_t * a, void *handle)
+{
+ u8 *s;
+
+ s = format (0, " %s ", a->is_ipv6 ? "ipv6" : "ipv4");
+ s = format_acl_action (s, a->is_permit);
+ s = format (s, " \\\n");
+
+ if (a->is_ipv6)
+ s = format (s, " src %U/%d dst %U/%d \\\n",
+ format_ip6_address, a->src_ip_addr, a->src_ip_prefix_len,
+ format_ip6_address, a->dst_ip_addr, a->dst_ip_prefix_len);
+ else
+ s = format (s, " src %U/%d dst %U/%d \\\n",
+ format_ip4_address, a->src_ip_addr, a->src_ip_prefix_len,
+ format_ip4_address, a->dst_ip_addr, a->dst_ip_prefix_len);
+ s = format (s, " proto %d \\\n", a->proto);
+ s = format (s, " sport %d-%d dport %d-%d \\\n",
+ clib_net_to_host_u16 (a->srcport_or_icmptype_first),
+ clib_net_to_host_u16 (a->srcport_or_icmptype_last),
+ clib_net_to_host_u16 (a->dstport_or_icmpcode_first),
+ clib_net_to_host_u16 (a->dstport_or_icmpcode_last));
+
+ s = format (s, " tcpflags %u mask %u, \\",
+ a->tcp_flags_value, a->tcp_flags_mask);
+ PRINT_S;
+ return handle;
+}
+
+
+
+static inline void *
+vl_api_macip_acl_rule_t_print (vl_api_macip_acl_rule_t * a, void *handle)
+{
+ u8 *s;
+
+ s = format (0, " %s %s \\\n", a->is_ipv6 ? "ipv6" : "ipv4",
+ a->is_permit ? "permit" : "deny");
+
+ s = format (s, " src mac %U mask %U \\\n",
+ format_ethernet_address, a->src_mac,
+ format_ethernet_address, a->src_mac_mask);
+
+ if (a->is_ipv6)
+ s = format (s, " src ip %U/%d, \\",
+ format_ip6_address, a->src_ip_addr, a->src_ip_prefix_len);
+ else
+ s = format (s, " src ip %U/%d, \\",
+ format_ip4_address, a->src_ip_addr, a->src_ip_prefix_len);
+
+ PRINT_S;
+ return handle;
+}
+
+static inline void *
+vl_api_acl_add_replace_t_print (vl_api_acl_add_replace_t * a, void *handle)
+{
+ u8 *s = 0;
+ int i;
+ u32 acl_index = clib_net_to_host_u32 (a->acl_index);
+ u32 count = clib_net_to_host_u32 (a->count);
+ if (count > 0x100000)
+ {
+ s = format (s, "WARN: acl_add_replace count endianness wrong? Fixup to avoid long loop.\n");
+ count = a->count;
+ }
+
+ s = format (s, "SCRIPT: acl_add_replace %d count %d ",
+ acl_index, count);
+
+ if (a->tag[0])
+ s = format (s, "tag %s ", a->tag);
+
+ s = format(s, "\\\n");
+ PRINT_S;
+
+ for (i = 0; i < count; i++)
+ vl_api_acl_rule_t_print (&a->r[i], handle);
+
+ s = format(s, "\n");
+ PRINT_S;
+ return handle;
+}
+
+static inline void *
+vl_api_acl_del_t_print (vl_api_macip_acl_del_t * a, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: acl_del %d ",
+ clib_host_to_net_u32 (a->acl_index));
+
+ PRINT_S;
+ return handle;
+}
+
+
+static inline void *
+vl_api_acl_details_t_print (vl_api_acl_details_t * a, void *handle)
+{
+ u8 *s = 0;
+ int i;
+ u32 acl_index = clib_net_to_host_u32 (a->acl_index);
+ u32 count = clib_net_to_host_u32 (a->count);
+ if (count > 0x100000)
+ {
+ s = format (s, "WARN: acl_defails count endianness wrong? Fixup to avoid long loop.\n");
+ count = a->count;
+ }
+
+ s = format (s, "acl_details index %d count %d ",
+ acl_index, count);
+
+ if (a->tag[0])
+ s = format (s, "tag %s ", a->tag);
+
+ s = format(s, "\n");
+ PRINT_S;
+
+ for (i = 0; i < count; i++)
+ vl_api_acl_rule_t_print (&a->r[i], handle);
+
+ return handle;
+}
+
+static inline void *
+vl_api_macip_acl_details_t_print (vl_api_macip_acl_details_t * a,
+ void *handle)
+{
+ u8 *s = 0;
+ int i;
+ u32 acl_index = clib_net_to_host_u32 (a->acl_index);
+ u32 count = clib_net_to_host_u32 (a->count);
+ if (count > 0x100000)
+ {
+ s = format (s, "WARN: macip_acl_defails count endianness wrong? Fixup to avoid long loop.\n");
+ count = a->count;
+ }
+
+ s = format (s, "macip_acl_details index %d count %d ",
+ acl_index, count);
+
+ if (a->tag[0])
+ s = format (s, "tag %s ", a->tag);
+
+ s = format(s, "\n");
+ PRINT_S;
+
+ for (i = 0; i < count; i++)
+ vl_api_macip_acl_rule_t_print (&a->r[i], handle);
+
+ return handle;
+}
+
+static inline void *
+vl_api_macip_acl_add_t_print (vl_api_macip_acl_add_t * a, void *handle)
+{
+ u8 *s = 0;
+ int i;
+ u32 count = clib_net_to_host_u32 (a->count);
+ if (count > 0x100000)
+ {
+ s = format (s, "WARN: macip_acl_add count endianness wrong? Fixup to avoid long loop.\n");
+ count = a->count;
+ }
+
+ s = format (s, "SCRIPT: macip_acl_add ");
+ if (a->tag[0])
+ s = format (s, "tag %s ", a->tag);
+
+ s = format (s, "count %d \\\n", count);
+
+ PRINT_S;
+
+ for (i = 0; i < count; i++)
+ vl_api_macip_acl_rule_t_print (&a->r[i], handle);
+
+ s = format (0, "\n");
+ PRINT_S;
+
+ return handle;
+}
+
+static inline void *
+vl_api_macip_acl_add_replace_t_print (vl_api_macip_acl_add_replace_t * a, void *handle)
+{
+ u8 *s = 0;
+ int i;
+ u32 acl_index = clib_net_to_host_u32 (a->acl_index);
+ u32 count = clib_net_to_host_u32 (a->count);
+ if (count > 0x100000)
+ {
+ s = format (s, "WARN: macip_acl_add_replace count endianness wrong? Fixup to avoid long loop.\n");
+ count = a->count;
+ }
+
+ s = format (s, "SCRIPT: macip_acl_add_replace %d count %d ",
+ acl_index, count);
+ if (a->tag[0])
+ s = format (s, "tag %s ", a->tag);
+
+ s = format (s, "count %d \\\n", count);
+
+ PRINT_S;
+
+ for (i = 0; i < count; i++)
+ vl_api_macip_acl_rule_t_print (&a->r[i], handle);
+
+ s = format (0, "\n");
+ PRINT_S;
+
+ return handle;
+}
+
+static inline void *
+vl_api_acl_interface_set_acl_list_t_print (vl_api_acl_interface_set_acl_list_t
+ * a, void *handle)
+{
+ u8 *s;
+ int i;
+
+ s = format
+ (0, "SCRIPT: acl_interface_set_acl_list sw_if_index %d count %d\n",
+ clib_net_to_host_u32 (a->sw_if_index), (u32) a->count);
+
+ s = format (s, " input ");
+
+ for (i = 0; i < a->count; i++)
+ {
+ if (i == a->n_input)
+ s = format (s, "output ");
+ s = format (s, "%d ", clib_net_to_host_u32 (a->acls[i]));
+ }
+
+ PRINT_S;
+ return handle;
+}
+
+static inline void *
+vl_api_acl_interface_add_del_t_print (vl_api_acl_interface_add_del_t * a,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: acl_interface_add_del sw_if_index %d acl %d ",
+ clib_net_to_host_u32 (a->sw_if_index),
+ clib_net_to_host_u32 (a->acl_index));
+ s = format (s, "%s %s",
+ a->is_input ? "input" : "output", a->is_add ? "add" : "del");
+
+ PRINT_S;
+ return handle;
+}
+
+static inline void *vl_api_macip_acl_interface_add_del_t_print
+ (vl_api_macip_acl_interface_add_del_t * a, void *handle)
+{
+ u8 *s;
+
+ s = format
+ (0,
+ "SCRIPT: macip_acl_interface_add_del sw_if_index %d acl_index %d ",
+ clib_net_to_host_u32 (a->sw_if_index),
+ clib_net_to_host_u32 (a->acl_index));
+ s = format (s, "%s", a->is_add ? "add" : "del");
+
+ PRINT_S;
+ return handle;
+}
+
+
+static inline void *
+vl_api_macip_acl_del_t_print (vl_api_macip_acl_del_t * a, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: macip_acl_del %d ",
+ clib_host_to_net_u32 (a->acl_index));
+
+ PRINT_S;
+ return handle;
+}
+
+
+#endif /* included_manual_fns_h */
diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am
new file mode 100644
index 00000000..15195a21
--- /dev/null
+++ b/src/plugins/dpdk.am
@@ -0,0 +1,73 @@
+# Copyright (c) 2016 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la
+vppplugins_LTLIBRARIES += dpdk_plugin.la
+
+if ENABLE_DPDK_SHARED
+dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -ldpdk
+else
+dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive
+endif
+if WITH_AESNI_MB_LIB
+dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a
+endif
+if WITH_ISA_L_CRYPTO_LIB
+dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a
+endif
+if WITH_IBVERBS_LIB
+dpdk_plugin_la_LDFLAGS += -Wl,-libverbs
+endif
+if DPDK_IS_1702_OR_1705
+dpdk_plugin_la_CFLAGS = $(AM_CFLAGS) -DDPDK_VOID_CALLBACK=1 -DDPDK_NO_AEAD=1
+else
+dpdk_plugin_la_CFLAGS = $(AM_CFLAGS) -DDPDK_VOID_CALLBACK=0 -DDPDK_NO_AEAD=0
+dpdk_plugin_la_LDFLAGS += -Wl,-lnuma
+endif
+
+dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl
+
+dpdk_plugin_la_SOURCES = \
+ dpdk/main.c \
+ dpdk/buffer.c \
+ dpdk/thread.c \
+ dpdk/api/dpdk_api.c \
+ dpdk/device/cli.c \
+ dpdk/device/common.c \
+ dpdk/device/dpdk_priv.h \
+ dpdk/device/device.c \
+ dpdk/device/format.c \
+ dpdk/device/init.c \
+ dpdk/device/node.c \
+ dpdk/hqos/hqos.c \
+ dpdk/ipsec/esp_encrypt.c \
+ dpdk/ipsec/esp_decrypt.c \
+ dpdk/ipsec/crypto_node.c \
+ dpdk/ipsec/cli.c \
+ dpdk/ipsec/ipsec.c \
+ dpdk/api/dpdk_plugin.api.h
+
+API_FILES += dpdk/api/dpdk.api
+
+nobase_include_HEADERS += \
+ dpdk/device/dpdk.h \
+ dpdk/api/dpdk_all_api_h.h
+
+nobase_include_HEADERS += \
+ dpdk/ipsec/ipsec.h \
+ dpdk/ipsec/esp.h
+
+dpdk_test_plugin_la_SOURCES = \
+ dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api
new file mode 100644
index 00000000..d43f8a36
--- /dev/null
+++ b/src/plugins/dpdk/api/dpdk.api
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief DPDK interface HQoS pipe profile set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param subport - subport ID
+ @param pipe - pipe ID within its subport
+ @param profile - pipe profile ID
+*/
+autoreply define sw_interface_set_dpdk_hqos_pipe {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 subport;
+ u32 pipe;
+ u32 profile;
+};
+
+/** \brief DPDK interface HQoS subport parameters set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param subport - subport ID
+ @param tb_rate - subport token bucket rate (measured in bytes/second)
+ @param tb_size - subport token bucket size (measured in credits)
+ @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second)
+ @param tc_period - enforcement period for rates (measured in milliseconds)
+*/
+autoreply define sw_interface_set_dpdk_hqos_subport {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 subport;
+ u32 tb_rate;
+ u32 tb_size;
+ u32 tc_rate[4];
+ u32 tc_period;
+};
+
+/** \brief DPDK interface HQoS tctbl entry set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param entry - entry index ID
+ @param tc - traffic class (0 .. 3)
+ @param queue - traffic class queue (0 .. 3)
+*/
+autoreply define sw_interface_set_dpdk_hqos_tctbl {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 entry;
+ u32 tc;
+ u32 queue;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/plugins/dpdk/api/dpdk_all_api_h.h b/src/plugins/dpdk/api/dpdk_all_api_h.h
new file mode 100644
index 00000000..15eb98d6
--- /dev/null
+++ b/src/plugins/dpdk/api/dpdk_all_api_h.h
@@ -0,0 +1,19 @@
+
+/*
+ * dpdk_all_api_h.h - skeleton vpp engine plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <dpdk/api/dpdk.api.h>
diff --git a/src/plugins/dpdk/api/dpdk_api.c b/src/plugins/dpdk/api/dpdk_api.c
new file mode 100755
index 00000000..97c4bc75
--- /dev/null
+++ b/src/plugins/dpdk/api/dpdk_api.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+#include <vlib/pci/pci.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <dpdk/device/dpdk_priv.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <dpdk/api/dpdk_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <dpdk/api/dpdk_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <dpdk/api/dpdk_all_api_h.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <dpdk/api/dpdk_all_api_h.h>
+#undef vl_api_version
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+#include <vlibapi/api_helper_macros.h>
+
+static void
+ vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler
+ (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp)
+{
+ vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp;
+ int rv = 0;
+
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 subport = ntohl (mp->subport);
+ u32 pipe = ntohl (mp->pipe);
+ u32 profile = ntohl (mp->profile);
+ vnet_hw_interface_t *hw;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ /* hw_if & dpdk device */
+ hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index);
+
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY);
+}
+
+static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print
+ (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe ");
+
+ s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index));
+
+ s = format (s, "subport %u pipe %u profile %u ",
+ ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile));
+
+ FINISH;
+}
+
+static void
+ vl_api_sw_interface_set_dpdk_hqos_subport_t_handler
+ (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp)
+{
+ vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp;
+ int rv = 0;
+
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd;
+ struct rte_sched_subport_params p;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 subport = ntohl (mp->subport);
+ p.tb_rate = ntohl (mp->tb_rate);
+ p.tb_size = ntohl (mp->tb_size);
+ p.tc_rate[0] = ntohl (mp->tc_rate[0]);
+ p.tc_rate[1] = ntohl (mp->tc_rate[1]);
+ p.tc_rate[2] = ntohl (mp->tc_rate[2]);
+ p.tc_rate[3] = ntohl (mp->tc_rate[3]);
+ p.tc_period = ntohl (mp->tc_period);
+
+ vnet_hw_interface_t *hw;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ /* hw_if & dpdk device */
+ hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index);
+
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY);
+}
+
+static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print
+ (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport ");
+
+ s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index));
+
+ s =
+ format (s,
+ "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u",
+ ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size),
+ ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]),
+ ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]),
+ ntohl (mp->tc_period));
+
+ FINISH;
+}
+
+static void
+ vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler
+ (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp)
+{
+ vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp;
+ int rv = 0;
+
+ dpdk_main_t *dm = &dpdk_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_device_t *xd;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 entry = ntohl (mp->entry);
+ u32 tc = ntohl (mp->tc);
+ u32 queue = ntohl (mp->queue);
+ u32 val, i;
+
+ vnet_hw_interface_t *hw;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ /* hw_if & dpdk device */
+ hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index);
+
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+ {
+ clib_warning ("invalid traffic class !!");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto done;
+ }
+ if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+ {
+ clib_warning ("invalid queue !!");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto done;
+ }
+
+ /* Detect the set of worker threads */
+ uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+
+ if (p == 0)
+ {
+ clib_warning ("worker thread registration AWOL !!");
+ rv = VNET_API_ERROR_INVALID_VALUE_2;
+ goto done;
+ }
+
+ vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0];
+ int worker_thread_first = tr->first_index;
+ int worker_thread_count = tr->count;
+
+ val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+ for (i = 0; i < worker_thread_count; i++)
+ xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val;
+
+ BAD_SW_IF_INDEX_LABEL;
+done:
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY);
+}
+
+static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print
+ (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl ");
+
+ s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index));
+
+ s = format (s, "entry %u tc %u queue %u",
+ ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue));
+
+ FINISH;
+}
+
+#define foreach_dpdk_plugin_api_msg \
+_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \
+_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \
+_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl)
+
+/* Set up the API message handling tables */
+static clib_error_t *
+dpdk_plugin_api_hookup (vlib_main_t * vm)
+{
+ dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_dpdk_plugin_api_msg;
+#undef _
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <dpdk/api/dpdk_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (dpdk_main_t * dm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base);
+ foreach_vl_msg_name_crc_dpdk;
+#undef _
+}
+
+// TODO
+/*
+static void plugin_custom_dump_configure (dpdk_main_t * dm)
+{
+#define _(n,f) dm->api_main->msg_print_handlers \
+ [VL_API_##n + dm->msg_id_base] \
+ = (void *) vl_api_##f##_t_print;
+ foreach_dpdk_plugin_api_msg;
+#undef _
+}
+*/
+/* force linker to link functions used by vlib and declared weak */
+
+static clib_error_t *
+dpdk_api_init (vlib_main_t * vm)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ clib_error_t *error = 0;
+
+ /* init CLI */
+ if ((error = vlib_call_init_function (vm, dpdk_init)))
+ return error;
+
+ u8 *name;
+ name = format (0, "dpdk_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ dm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+ vec_free (name);
+
+ error = dpdk_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (dm, &api_main);
+
+// TODO
+// plugin_custom_dump_configure (dm);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (dpdk_api_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/api/dpdk_msg_enum.h b/src/plugins/dpdk/api/dpdk_msg_enum.h
new file mode 100644
index 00000000..952ce6ad
--- /dev/null
+++ b/src/plugins/dpdk/api/dpdk_msg_enum.h
@@ -0,0 +1,31 @@
+
+/*
+ * dpdk_msg_enum.h - skeleton vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_dpdk_msg_enum_h
+#define included_dpdk_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <dpdk/api/dpdk_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_dpdk_msg_enum_h */
diff --git a/src/plugins/dpdk/api/dpdk_test.c b/src/plugins/dpdk/api/dpdk_test.c
new file mode 100644
index 00000000..ea17e5d0
--- /dev/null
+++ b/src/plugins/dpdk/api/dpdk_test.c
@@ -0,0 +1,397 @@
+
+/*
+ * dpdk_test.c - skeleton vpp-api-test plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <dpdk/api/dpdk_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <dpdk/api/dpdk.api.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <dpdk/api/dpdk.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <dpdk/api/dpdk.api.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <dpdk/api/dpdk.api.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} dpdk_test_main_t;
+
+dpdk_test_main_t dpdk_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(sw_interface_set_dpdk_hqos_pipe_reply) \
+_(sw_interface_set_dpdk_hqos_subport_reply) \
+_(sw_interface_set_dpdk_hqos_tctbl_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = dpdk_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \
+ sw_interface_set_dpdk_hqos_pipe_reply) \
+_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \
+ sw_interface_set_dpdk_hqos_subport_reply) \
+_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \
+ sw_interface_set_dpdk_hqos_tctbl_reply)
+
+/* M: construct, but don't yet send a message */
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int
+api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam)
+{
+ dpdk_test_main_t * dm = &dpdk_test_main;
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp;
+ f64 timeout;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 subport;
+ u8 subport_set = 0;
+ u32 pipe;
+ u8 pipe_set = 0;
+ u32 profile;
+ u8 profile_set = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx sw_if_index %u", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "subport %u", &subport))
+ subport_set = 1;
+ else if (unformat (i, "pipe %u", &pipe))
+ pipe_set = 1;
+ else if (unformat (i, "profile %u", &profile))
+ profile_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (subport_set == 0)
+ {
+ errmsg ("missing subport ");
+ return -99;
+ }
+
+ if (pipe_set == 0)
+ {
+ errmsg ("missing pipe");
+ return -99;
+ }
+
+ if (profile_set == 0)
+ {
+ errmsg ("missing profile");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->subport = ntohl (subport);
+ mp->pipe = ntohl (pipe);
+ mp->profile = ntohl (profile);
+
+
+ S;
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static int
+api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam)
+{
+ dpdk_test_main_t * dm = &dpdk_test_main;
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_dpdk_hqos_subport_t *mp;
+ f64 timeout;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 subport;
+ u8 subport_set = 0;
+ u32 tb_rate = 1250000000; /* 10GbE */
+ u32 tb_size = 1000000;
+ u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 };
+ u32 tc_period = 10;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx sw_if_index %u", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "subport %u", &subport))
+ subport_set = 1;
+ else if (unformat (i, "rate %u", &tb_rate))
+ {
+ u32 tc_id;
+
+ for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0]));
+ tc_id++)
+ tc_rate[tc_id] = tb_rate;
+ }
+ else if (unformat (i, "bktsize %u", &tb_size))
+ ;
+ else if (unformat (i, "tc0 %u", &tc_rate[0]))
+ ;
+ else if (unformat (i, "tc1 %u", &tc_rate[1]))
+ ;
+ else if (unformat (i, "tc2 %u", &tc_rate[2]))
+ ;
+ else if (unformat (i, "tc3 %u", &tc_rate[3]))
+ ;
+ else if (unformat (i, "period %u", &tc_period))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (subport_set == 0)
+ {
+ errmsg ("missing subport ");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->subport = ntohl (subport);
+ mp->tb_rate = ntohl (tb_rate);
+ mp->tb_size = ntohl (tb_size);
+ mp->tc_rate[0] = ntohl (tc_rate[0]);
+ mp->tc_rate[1] = ntohl (tc_rate[1]);
+ mp->tc_rate[2] = ntohl (tc_rate[2]);
+ mp->tc_rate[3] = ntohl (tc_rate[3]);
+ mp->tc_period = ntohl (tc_period);
+
+ S;
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static int
+api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam)
+{
+ dpdk_test_main_t * dm = &dpdk_test_main;
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp;
+ f64 timeout;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 entry_set = 0;
+ u8 tc_set = 0;
+ u8 queue_set = 0;
+ u32 entry, tc, queue;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx sw_if_index %u", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "entry %d", &entry))
+ entry_set = 1;
+ else if (unformat (i, "tc %d", &tc))
+ tc_set = 1;
+ else if (unformat (i, "queue %d", &queue))
+ queue_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (entry_set == 0)
+ {
+ errmsg ("missing entry ");
+ return -99;
+ }
+
+ if (tc_set == 0)
+ {
+ errmsg ("missing traffic class ");
+ return -99;
+ }
+
+ if (queue_set == 0)
+ {
+ errmsg ("missing queue ");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->entry = ntohl (entry);
+ mp->tc = ntohl (tc);
+ mp->queue = ntohl (queue);
+
+ S;
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(sw_interface_set_dpdk_hqos_pipe, \
+ "rx sw_if_index <id> subport <subport-id> pipe <pipe-id>\n" \
+ "profile <profile-id>\n") \
+_(sw_interface_set_dpdk_hqos_subport, \
+ "rx sw_if_index <id> subport <subport-id> [rate <n>]\n" \
+ "[bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]\n") \
+_(sw_interface_set_dpdk_hqos_tctbl, \
+ "rx sw_if_index <id> entry <n> tc <n> queue <n>\n")
+
+static void dpdk_api_hookup (vat_main_t *vam)
+{
+ dpdk_test_main_t * dm __attribute__((unused)) = &dpdk_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ dpdk_test_main_t * dm = &dpdk_test_main;
+ u8 * name;
+
+ dm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "dpdk_%08x%c", api_version, 0);
+ dm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (dm->msg_id_base != (u16) ~0)
+ dpdk_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
new file mode 100644
index 00000000..02a11b83
--- /dev/null
+++ b/src/plugins/dpdk/buffer.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Allocate/free network buffers.
+ */
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <dpdk/device/dpdk.h>
+#include <dpdk/device/dpdk_priv.h>
+
+
+STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM,
+ "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM");
+
+static_always_inline void
+dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_buffer_t *hb = b;
+ struct rte_mbuf *mb;
+ u32 next, flags;
+ mb = rte_mbuf_from_vlib_buffer (hb);
+
+next:
+ flags = b->flags;
+ next = b->next_buffer;
+ mb = rte_mbuf_from_vlib_buffer (b);
+
+ if (PREDICT_FALSE (b->n_add_refs))
+ {
+ rte_mbuf_refcnt_update (mb, b->n_add_refs);
+ b->n_add_refs = 0;
+ }
+
+ rte_pktmbuf_free_seg (mb);
+
+ if (flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, next);
+ goto next;
+ }
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+ u32 i;
+ vlib_buffer_t *b;
+
+ for (i = 0; i < vec_len (f->buffers); i++)
+ {
+ b = vlib_get_buffer (vm, f->buffers[i]);
+ dpdk_rte_pktmbuf_free (vm, b);
+ }
+
+ vec_free (f->name);
+ vec_free (f->buffers);
+}
+
+/* Add buffer free list. */
+static void
+dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ u32 merge_index;
+ int i;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ vlib_buffer_merge_free_lists (pool_elt_at_index
+ (bm->buffer_free_list_pool, merge_index),
+ f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ bm = vlib_mains[i]->buffer_main;
+ f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);;
+ memset (f, 0xab, sizeof (f[0]));
+ pool_put (bm->buffer_free_list_pool, f);
+ }
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl, uword min_free_buffers)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ int n, i;
+ u32 bi0, bi1, bi2, bi3;
+ unsigned socket_id = rte_socket_id ();
+ struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id];
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+ /* Too early? */
+ if (PREDICT_FALSE (rmp == 0))
+ return 0;
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32));
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ vec_validate_aligned (vm->mbuf_alloc_list, n - 1, CLIB_CACHE_LINE_BYTES);
+
+ if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
+ return 0;
+
+ _vec_len (vm->mbuf_alloc_list) = n;
+
+ i = 0;
+
+ while (i < (n - 7))
+ {
+ vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf
+ (vm->mbuf_alloc_list[i + 4]), STORE);
+ vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf
+ (vm->mbuf_alloc_list[i + 5]), STORE);
+ vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf
+ (vm->mbuf_alloc_list[i + 6]), STORE);
+ vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf
+ (vm->mbuf_alloc_list[i + 7]), STORE);
+
+ mb0 = vm->mbuf_alloc_list[i];
+ mb1 = vm->mbuf_alloc_list[i + 1];
+ mb2 = vm->mbuf_alloc_list[i + 2];
+ mb3 = vm->mbuf_alloc_list[i + 3];
+
+ b0 = vlib_buffer_from_rte_mbuf (mb0);
+ b1 = vlib_buffer_from_rte_mbuf (mb1);
+ b2 = vlib_buffer_from_rte_mbuf (mb2);
+ b3 = vlib_buffer_from_rte_mbuf (mb3);
+
+ bi0 = vlib_get_buffer_index (vm, b0);
+ bi1 = vlib_get_buffer_index (vm, b1);
+ bi2 = vlib_get_buffer_index (vm, b2);
+ bi3 = vlib_get_buffer_index (vm, b3);
+
+ vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES);
+ vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES);
+ vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES);
+ vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES);
+
+ vlib_buffer_init_for_free_list (b0, fl);
+ vlib_buffer_init_for_free_list (b1, fl);
+ vlib_buffer_init_for_free_list (b2, fl);
+ vlib_buffer_init_for_free_list (b3, fl);
+
+ if (fl->buffer_init_function)
+ {
+ fl->buffer_init_function (vm, fl, &bi0, 1);
+ fl->buffer_init_function (vm, fl, &bi1, 1);
+ fl->buffer_init_function (vm, fl, &bi2, 1);
+ fl->buffer_init_function (vm, fl, &bi3, 1);
+ }
+ i += 4;
+ }
+
+ while (i < n)
+ {
+ mb0 = vm->mbuf_alloc_list[i];
+
+ b0 = vlib_buffer_from_rte_mbuf (mb0);
+ bi0 = vlib_get_buffer_index (vm, b0);
+
+ vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES);
+
+ vlib_buffer_init_for_free_list (b0, fl);
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, &bi0, 1);
+ i++;
+ }
+
+ fl->n_alloc += n;
+
+ return n;
+}
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ u32 * alloc_buffers, u32 n_alloc_buffers)
+{
+ u32 *dst, *src;
+ uword len, n_filled;
+
+ dst = alloc_buffers;
+
+ n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+ if (n_filled == 0)
+ return 0;
+
+ len = vec_len (free_list->buffers);
+ ASSERT (len >= n_alloc_buffers);
+
+ src = free_list->buffers + len - n_alloc_buffers;
+ clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32));
+
+ _vec_len (free_list->buffers) -= n_alloc_buffers;
+
+ return n_alloc_buffers;
+}
+
+/* Allocate a given number of buffers into given array.
+ Returns number actually allocated which will be either zero or
+ number requested. */
+u32
+dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ return alloc_from_free_list
+ (vm,
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+ buffers, n_buffers);
+}
+
+
+u32
+dpdk_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+ return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+ u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+ u32 fi;
+ int i;
+ u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
+
+ if (!n_buffers)
+ return;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+ fl = vlib_buffer_get_buffer_free_list (vm, b, &fi);
+
+ /* The only current use of this callback: multicast recycle */
+ if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
+ {
+ int j;
+
+ vlib_buffer_add_to_free_list
+ (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
+
+ for (j = 0; j < vec_len (bm->announce_list); j++)
+ {
+ if (fl == bm->announce_list[j])
+ goto already_announced;
+ }
+ vec_add1 (bm->announce_list, fl);
+ already_announced:
+ ;
+ }
+ else
+ {
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
+ dpdk_rte_pktmbuf_free (vm, b);
+ }
+ }
+ if (vec_len (bm->announce_list))
+ {
+ vlib_buffer_free_list_t *fl;
+ for (i = 0; i < vec_len (bm->announce_list); i++)
+ {
+ fl = bm->announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len (bm->announce_list) = 0;
+ }
+}
+
+static void
+dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 1);
+}
+
+static void
+dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 0);
+}
+
+static void
+dpdk_packet_template_init (vlib_main_t * vm,
+ void *vt,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc, u8 * name)
+{
+ vlib_packet_template_t *t = (vlib_packet_template_t *) vt;
+
+ vlib_worker_thread_barrier_sync (vm);
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+clib_error_t *
+dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
+ unsigned socket_id)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ struct rte_mempool *rmp;
+ int i;
+
+ vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
+
+ /* pool already exists, nothing to do */
+ if (dm->pktmbuf_pools[socket_id])
+ return 0;
+
+ u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
+
+ rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */
+ num_mbufs, /* number of mbufs */
+ 512, /* cache size */
+ VLIB_BUFFER_HDR_SIZE, /* priv size */
+ VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */
+ socket_id); /* cpu socket */
+
+ if (rmp)
+ {
+ {
+ struct rte_mempool_memhdr *memhdr;
+
+ STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
+ vlib_buffer_add_mem_range (vm, (uword) memhdr->addr, memhdr->len);
+ }
+ if (rmp)
+ {
+ dm->pktmbuf_pools[socket_id] = rmp;
+ vec_free (pool_name);
+ return 0;
+ }
+ }
+
+ vec_free (pool_name);
+
+ /* no usable pool for this socket, try to use pool from another one */
+ for (i = 0; i < vec_len (dm->pktmbuf_pools); i++)
+ {
+ if (dm->pktmbuf_pools[i])
+ {
+ clib_warning
+ ("WARNING: Failed to allocate mempool for CPU socket %u. "
+ "Threads running on socket %u will use socket %u mempool.",
+ socket_id, socket_id, i);
+ dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i];
+ return 0;
+ }
+ }
+
+ return clib_error_return (0, "failed to allocate mempool on socket %u",
+ socket_id);
+}
+
+#if CLIB_DEBUG > 0
+
+u32 *vlib_buffer_state_validation_lock;
+uword *vlib_buffer_state_validation_hash;
+void *vlib_buffer_state_heap;
+
+static clib_error_t *
+buffer_state_validation_init (vlib_main_t * vm)
+{
+ void *oldheap;
+
+ vlib_buffer_state_heap = mheap_alloc (0, 10 << 20);
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword));
+ vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (buffer_state_validation_init);
+#endif
+
+#if CLI_DEBUG
+struct dpdk_validate_buf_result
+{
+ u32 invalid;
+ u32 uninitialized;
+};
+
+#define DPDK_TRAJECTORY_POISON 31
+
+static void
+dpdk_buffer_validate_trajectory (struct rte_mempool *mp, void *opaque,
+ void *obj, unsigned obj_idx)
+{
+ vlib_buffer_t *b;
+ struct dpdk_validate_buf_result *counter = opaque;
+ b = vlib_buffer_from_rte_mbuf ((struct rte_mbuf *) obj);
+ if (b->pre_data[0] != 0)
+ {
+ if (b->pre_data[0] == DPDK_TRAJECTORY_POISON)
+ counter->uninitialized++;
+ else
+ counter->invalid++;
+ }
+}
+
+int
+dpdk_buffer_validate_trajectory_all (u32 * uninitialized)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ struct dpdk_validate_buf_result counter = { 0 };
+ int i;
+
+ for (i = 0; i < vec_len (dm->pktmbuf_pools); i++)
+ rte_mempool_obj_iter (dm->pktmbuf_pools[i],
+ dpdk_buffer_validate_trajectory, &counter);
+ if (uninitialized)
+ *uninitialized = counter.uninitialized;
+ return counter.invalid;
+}
+
+static void
+dpdk_buffer_poison_trajectory (struct rte_mempool *mp, void *opaque,
+ void *obj, unsigned obj_idx)
+{
+ vlib_buffer_t *b;
+ b = vlib_buffer_from_rte_mbuf ((struct rte_mbuf *) obj);
+ b->pre_data[0] = DPDK_TRAJECTORY_POISON;
+}
+
+void
+dpdk_buffer_poison_trajectory_all (void)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ int i;
+
+ for (i = 0; i < vec_len (dm->pktmbuf_pools); i++)
+ rte_mempool_obj_iter (dm->pktmbuf_pools[i], dpdk_buffer_poison_trajectory,
+ 0);
+}
+#endif
+
+/* *INDENT-OFF* */
+VLIB_BUFFER_REGISTER_CALLBACKS (dpdk, static) = {
+ .vlib_buffer_alloc_cb = &dpdk_buffer_alloc,
+ .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list,
+ .vlib_buffer_free_cb = &dpdk_buffer_free,
+ .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next,
+ .vlib_packet_template_init_cb = &dpdk_packet_template_init,
+ .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list,
+};
+/* *INDENT-ON* */
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
new file mode 100644
index 00000000..c9fcea5c
--- /dev/null
+++ b/src/plugins/dpdk/device/cli.c
@@ -0,0 +1,1955 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/xxhash.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/mpls/packet.h>
+
+#include <dpdk/device/dpdk_priv.h>
+
+/**
+ * @file
+ * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace.
+ *
+ * This file contains the source code for CLI for DPDK
+ * Abstraction Layer and pcap Tx Trace.
+ */
+
+
+static clib_error_t *
+get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd,
+ dpdk_device_config_t ** devconf)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_hw_interface_t *hw;
+ struct rte_eth_dev_info dev_info;
+ uword *p = 0;
+ clib_error_t *error = NULL;
+
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto done;
+ }
+
+ if (subport_id != 0)
+ {
+ error = clib_error_return (0, "Invalid subport");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ *xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rte_eth_dev_info_get ((*xd)->device_index, &dev_info);
+ if (dev_info.pci_dev)
+ { /* bonded interface has no pci info */
+ vlib_pci_addr_t pci_addr;
+
+ pci_addr.domain = dev_info.pci_dev->addr.domain;
+ pci_addr.bus = dev_info.pci_dev->addr.bus;
+ pci_addr.slot = dev_info.pci_dev->addr.devid;
+ pci_addr.function = dev_info.pci_dev->addr.function;
+
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+ if (p)
+ (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ else
+ (*devconf) = &dm->conf->default_devconf;
+
+done:
+ return error;
+}
+
+static clib_error_t *
+pcap_trace_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+#define PCAP_DEF_PKT_TO_CAPTURE (100)
+
+ unformat_input_t _line_input, *line_input = &_line_input;
+ dpdk_main_t *dm = &dpdk_main;
+ u8 *filename;
+ u8 *chroot_filename = 0;
+ u32 max = 0;
+ int enabled = 0;
+ int errorFlag = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ {
+ if (dm->tx_pcap_enable == 0)
+ {
+ enabled = 1;
+ }
+ else
+ {
+ vlib_cli_output (vm, "pcap tx capture already on...");
+ errorFlag = 1;
+ break;
+ }
+ }
+ else if (unformat (line_input, "off"))
+ {
+ if (dm->tx_pcap_enable)
+ {
+ vlib_cli_output (vm, "captured %d pkts...",
+ dm->pcap_main.n_packets_captured + 1);
+ if (dm->pcap_main.n_packets_captured)
+ {
+ dm->pcap_main.n_packets_to_capture =
+ dm->pcap_main.n_packets_captured;
+ error = pcap_write (&dm->pcap_main);
+ if (error)
+ clib_error_report (error);
+ else
+ vlib_cli_output (vm, "saved to %s...", dm->pcap_filename);
+ }
+
+ dm->tx_pcap_enable = 0;
+ }
+ else
+ {
+ vlib_cli_output (vm, "pcap tx capture already off...");
+ errorFlag = 1;
+ break;
+ }
+ }
+ else if (unformat (line_input, "max %d", &max))
+ {
+ if (dm->tx_pcap_enable)
+ {
+ vlib_cli_output (vm,
+ "can't change max value while pcap tx capture active...");
+ errorFlag = 1;
+ break;
+ }
+ }
+ else if (unformat (line_input, "intfc %U",
+ unformat_vnet_sw_interface, dm->vnet_main,
+ &dm->pcap_sw_if_index))
+ ;
+
+ else if (unformat (line_input, "intfc any"))
+ {
+ dm->pcap_sw_if_index = 0;
+ }
+ else if (unformat (line_input, "file %s", &filename))
+ {
+ if (dm->tx_pcap_enable)
+ {
+ vlib_cli_output (vm,
+ "can't change file while pcap tx capture active...");
+ errorFlag = 1;
+ break;
+ }
+
+ /* Brain-police user path input */
+ if (strstr ((char *) filename, "..")
+ || index ((char *) filename, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'",
+ filename);
+ vlib_cli_output (vm,
+ "Hint: Only filename, do not enter directory structure.");
+ vec_free (filename);
+ errorFlag = 1;
+ break;
+ }
+
+ chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+ vec_free (filename);
+ }
+ else if (unformat (line_input, "status"))
+ {
+ if (dm->pcap_sw_if_index == 0)
+ {
+ vlib_cli_output (vm, "max is %d for any interface to file %s",
+ dm->
+ pcap_pkts_to_capture ? dm->pcap_pkts_to_capture
+ : PCAP_DEF_PKT_TO_CAPTURE,
+ dm->
+ pcap_filename ? dm->pcap_filename : (u8 *)
+ "/tmp/vpe.pcap");
+ }
+ else
+ {
+ vlib_cli_output (vm, "max is %d for interface %U to file %s",
+ dm->
+ pcap_pkts_to_capture ? dm->pcap_pkts_to_capture
+ : PCAP_DEF_PKT_TO_CAPTURE,
+ format_vnet_sw_if_index_name, dm->vnet_main,
+ dm->pcap_sw_if_index,
+ dm->
+ pcap_filename ? dm->pcap_filename : (u8 *)
+ "/tmp/vpe.pcap");
+ }
+
+ if (dm->tx_pcap_enable == 0)
+ {
+ vlib_cli_output (vm, "pcap tx capture is off...");
+ }
+ else
+ {
+ vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...",
+ dm->pcap_main.n_packets_captured,
+ dm->pcap_main.n_packets_to_capture);
+ }
+ break;
+ }
+
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ errorFlag = 1;
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+
+ if (errorFlag == 0)
+ {
+ /* Since no error, save configured values. */
+ if (chroot_filename)
+ {
+ if (dm->pcap_filename)
+ vec_free (dm->pcap_filename);
+ vec_add1 (chroot_filename, 0);
+ dm->pcap_filename = chroot_filename;
+ }
+
+ if (max)
+ dm->pcap_pkts_to_capture = max;
+
+
+ if (enabled)
+ {
+ if (dm->pcap_filename == 0)
+ dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0);
+
+ memset (&dm->pcap_main, 0, sizeof (dm->pcap_main));
+ dm->pcap_main.file_name = (char *) dm->pcap_filename;
+ dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE;
+ if (dm->pcap_pkts_to_capture)
+ dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture;
+
+ dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet;
+ dm->tx_pcap_enable = 1;
+ vlib_cli_output (vm, "pcap tx capture on...");
+ }
+ }
+ else if (chroot_filename)
+ vec_free (chroot_filename);
+
+
+ return error;
+}
+
+/*?
+ * This command is used to start or stop a packet capture, or show
+ * the status of packet capture.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>on|off</b> - Used to start or stop a packet capture.
+ *
+ * - <b>max <nn></b> - Depth of local buffer. Once '<em>nn</em>' number
+ * of packets have been received, buffer is flushed to file. Once another
+ * '<em>nn</em>' number of packets have been received, buffer is flushed
+ * to file, overwriting previous write. If not entered, value defaults
+ * to 100. Can only be updated if packet capture is off.
+ *
+ * - <b>intfc <interface>|any</b> - Used to specify a given interface,
+ * or use '<em>any</em>' to run packet capture on all interfaces.
+ * '<em>any</em>' is the default if not provided. Settings from a previous
+ * packet capture are preserved, so '<em>any</em>' can be used to reset
+ * the interface setting.
+ *
+ * - <b>file <name></b> - Used to specify the output filename. The file will
+ * be placed in the '<em>/tmp</em>' directory, so only the filename is
+ * supported. Directory should not be entered. If file already exists, file
+ * will be overwritten. If no filename is provided, '<em>/tmp/vpe.pcap</em>'
+ * will be used. Can only be updated if packet capture is off.
+ *
+ * - <b>status</b> - Displays the current status and configured attributes
+ * associated with a packet capture. If packet capture is in progress,
+ * '<em>status</em>' also will return the number of packets currently in
+ * the local buffer. All additional attributes entered on command line
+ * with '<em>status</em>' will be ingnored and not applied.
+ *
+ * @cliexpar
+ * Example of how to display the status of a tx packet capture when off:
+ * @cliexstart{pcap tx trace status}
+ * max is 100, for any interface to file /tmp/vpe.pcap
+ * pcap tx capture is off...
+ * @cliexend
+ * Example of how to start a tx packet capture:
+ * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap}
+ * pcap tx capture on...
+ * @cliexend
+ * Example of how to display the status of a tx packet capture in progress:
+ * @cliexstart{pcap tx trace status}
+ * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap
+ * pcap tx capture is on: 20 of 35 pkts...
+ * @cliexend
+ * Example of how to stop a tx packet capture:
+ * @cliexstart{vppctl pcap tx trace off}
+ * captured 21 pkts...
+ * saved to /tmp/vppTest.pcap...
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (pcap_trace_command, static) = {
+ .path = "pcap tx trace",
+ .short_help =
+ "pcap tx trace [on|off] [max <nn>] [intfc <interface>|any] [file <name>] [status]",
+ .function = pcap_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ struct rte_mempool *rmp;
+ int i;
+
+ for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++)
+ {
+ rmp = dpdk_main.pktmbuf_pools[i];
+ if (rmp)
+ {
+ unsigned count = rte_mempool_avail_count (rmp);
+ unsigned free_count = rte_mempool_in_use_count (rmp);
+
+ vlib_cli_output (vm,
+ "name=\"%s\" available = %7d allocated = %7d total = %7d\n",
+ rmp->name, (u32) count, (u32) free_count,
+ (u32) (count + free_count));
+ }
+ else
+ {
+ vlib_cli_output (vm, "rte_mempool is NULL (!)\n");
+ }
+ }
+ return 0;
+}
+
+/*?
+ * This command displays statistics of each DPDK mempool.
+ *
+ * @cliexpar
+ * Example of how to display DPDK buffer data:
+ * @cliexstart{show dpdk buffer}
+ * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = {
+ .path = "show dpdk buffer",
+ .short_help = "show dpdk buffer",
+ .function = show_dpdk_buffer,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ static u32 *allocated_buffers;
+ u32 n_alloc = 0;
+ u32 n_free = 0;
+ u32 first, actual_alloc;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "allocate %d", &n_alloc))
+ ;
+ else if (unformat (input, "free %d", &n_free))
+ ;
+ else
+ break;
+ }
+
+ if (n_free)
+ {
+ if (vec_len (allocated_buffers) < n_free)
+ return clib_error_return (0, "Can't free %d, only %d allocated",
+ n_free, vec_len (allocated_buffers));
+
+ first = vec_len (allocated_buffers) - n_free;
+ vlib_buffer_free (vm, allocated_buffers + first, n_free);
+ _vec_len (allocated_buffers) = first;
+ }
+ if (n_alloc)
+ {
+ first = vec_len (allocated_buffers);
+ vec_validate (allocated_buffers,
+ vec_len (allocated_buffers) + n_alloc - 1);
+
+ actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first,
+ n_alloc);
+ _vec_len (allocated_buffers) = first + actual_alloc;
+
+ if (actual_alloc < n_alloc)
+ vlib_cli_output (vm, "WARNING: only allocated %d buffers",
+ actual_alloc);
+ }
+
+ vlib_cli_output (vm, "Currently %d buffers allocated",
+ vec_len (allocated_buffers));
+
+ if (allocated_buffers && vec_len (allocated_buffers) == 0)
+ vec_free (allocated_buffers);
+
+ return 0;
+}
+
+/*?
+ * This command tests the allocation and freeing of DPDK buffers.
+ * If both '<em>allocate</em>' and '<em>free</em>' are entered on the
+ * same command, the '<em>free</em>' is executed first. If no
+ * parameters are provided, this command display how many DPDK buffers
+ * the test command has allocated.
+ *
+ * @cliexpar
+ * @parblock
+ *
+ * Example of how to display how many DPDK buffer test command has allcoated:
+ * @cliexstart{test dpdk buffer}
+ * Currently 0 buffers allocated
+ * @cliexend
+ *
+ * Example of how to allocate DPDK buffers using the test command:
+ * @cliexstart{test dpdk buffer allocate 10}
+ * Currently 10 buffers allocated
+ * @cliexend
+ *
+ * Example of how to free DPDK buffers allocated by the test command:
+ * @cliexstart{test dpdk buffer free 10}
+ * Currently 0 buffers allocated
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
+ .path = "test dpdk buffer",
+ .short_help = "test dpdk buffer [allocate <nn>] [free <nn>]",
+ .function = test_dpdk_buffer,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 nb_rx_desc = (u32) ~ 0;
+ u32 nb_tx_desc = (u32) ~ 0;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "tx %d", &nb_tx_desc))
+ ;
+ else if (unformat (line_input, "rx %d", &nb_rx_desc))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
+ {
+ error =
+ clib_error_return (0,
+ "number of descriptors can be set only for "
+ "physical devices");
+ goto done;
+ }
+
+ if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) &&
+ (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc))
+ {
+ error = clib_error_return (0, "nothing changed");
+ goto done;
+ }
+
+ if (nb_rx_desc != (u32) ~ 0)
+ xd->nb_rx_desc = nb_rx_desc;
+
+ if (nb_tx_desc != (u32) ~ 0)
+ xd->nb_tx_desc = nb_tx_desc;
+
+ dpdk_device_setup (xd);
+
+ if (vec_len (xd->errors))
+ return clib_error_return (0, "%U", format_dpdk_device_errors, xd);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command sets the number of DPDK '<em>rx</em>' and
+ * '<em>tx</em>' descriptors for the given physical interface. Use
+ * the command '<em>show hardware-interface</em>' to display the
+ * current descriptor allocation.
+ *
+ * @cliexpar
+ * Example of how to set the DPDK interface descriptors:
+ * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = {
+ .path = "set dpdk interface descriptors",
+ .short_help = "set dpdk interface descriptors <interface> [rx <nn>] [tx <nn>]",
+ .function = set_dpdk_if_desc,
+};
+/* *INDENT-ON* */
+
+static int
+dpdk_device_queue_sort (void *a1, void *a2)
+{
+ dpdk_device_and_queue_t *dq1 = a1;
+ dpdk_device_and_queue_t *dq2 = a2;
+
+ if (dq1->device > dq2->device)
+ return 1;
+ else if (dq1->device < dq2->device)
+ return -1;
+ else if (dq1->queue_id > dq2->queue_id)
+ return 1;
+ else if (dq1->queue_id < dq2->queue_id)
+ return -1;
+ else
+ return 0;
+}
+
+
+static clib_error_t *
+show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_and_queue_t *dq;
+ int cpu;
+
+ if (tm->n_vlib_mains == 1)
+ vlib_cli_output (vm, "All interfaces are handled by main thread");
+
+ for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++)
+ {
+ if (cpu >= dm->hqos_cpu_first_index &&
+ cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count))
+ vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu,
+ vlib_worker_threads[cpu].name,
+ vlib_worker_threads[cpu].lcore_id);
+
+ vec_foreach (dq, dm->devices_by_hqos_cpu[cpu])
+ {
+ u32 hw_if_index = dm->devices[dq->device].hw_if_index;
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id);
+ }
+ }
+ return 0;
+}
+
+/*?
+ * This command is used to display the thread and core each
+ * DPDK output interface and HQoS queue is assigned too.
+ *
+ * @cliexpar
+ * Example of how to display the DPDK output interface and HQoS queue placement:
+ * @cliexstart{show dpdk interface hqos placement}
+ * Thread 1 (vpp_hqos-threads_0 at lcore 3):
+ * GigabitEthernet0/8/0 queue 0
+ * Thread 2 (vpp_hqos-threads_1 at lcore 4):
+ * GigabitEthernet0/9/0 queue 0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = {
+ .path = "show dpdk interface hqos placement",
+ .short_help = "show dpdk interface hqos placement",
+ .function = show_dpdk_if_hqos_placement,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_and_queue_t *dq;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 cpu = (u32) ~ 0;
+ int i;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "thread %d", &cpu))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ if (cpu < dm->hqos_cpu_first_index ||
+ cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count))
+ {
+ error = clib_error_return (0, "please specify valid thread id");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++)
+ {
+ vec_foreach (dq, dm->devices_by_hqos_cpu[i])
+ {
+ if (hw_if_index == dm->devices[dq->device].hw_if_index)
+ {
+ if (cpu == i) /* nothing to do */
+ goto done;
+
+ vec_del1 (dm->devices_by_hqos_cpu[i],
+ dq - dm->devices_by_hqos_cpu[i]);
+ vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
+ dq->queue_id = 0;
+ dq->device = xd->device_index;
+
+ vec_sort_with_function (dm->devices_by_hqos_cpu[i],
+ dpdk_device_queue_sort);
+
+ vec_sort_with_function (dm->devices_by_hqos_cpu[cpu],
+ dpdk_device_queue_sort);
+
+ goto done;
+ }
+ }
+ }
+
+ error = clib_error_return (0, "not found");
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to assign a given DPDK output interface and
+ * HQoS queue to a different thread. This will not create a thread,
+ * so the thread must already exist. Use '<em>/etc/vpp/startup.conf</em>'
+ * for the initial thread creation. See @ref qos_doc for more details.
+ *
+ * @cliexpar
+ * Example of how to display the DPDK output interface and HQoS queue placement:
+ * @cliexstart{show dpdk interface hqos placement}
+ * Thread 1 (vpp_hqos-threads_0 at lcore 3):
+ * GigabitEthernet0/8/0 queue 0
+ * Thread 2 (vpp_hqos-threads_1 at lcore 4):
+ * GigabitEthernet0/9/0 queue 0
+ * @cliexend
+ * Example of how to assign a DPDK output interface and HQoS queue to a thread:
+ * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = {
+ .path = "set dpdk interface hqos placement",
+ .short_help = "set dpdk interface hqos placement <interface> thread <n>",
+ .function = set_dpdk_if_hqos_placement,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 subport_id = (u32) ~ 0;
+ u32 pipe_id = (u32) ~ 0;
+ u32 profile_id = (u32) ~ 0;
+ int rv;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "subport %d", &subport_id))
+ ;
+ else if (unformat (line_input, "pipe %d", &pipe_id))
+ ;
+ else if (unformat (line_input, "profile %d", &profile_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rv =
+ rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id,
+ profile_id);
+ if (rv)
+ {
+ error = clib_error_return (0, "pipe configuration failed");
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to change the profile associate with a HQoS pipe. The
+ * '<em><profile_id></em>' is zero based. Use the command
+ * '<em>show dpdk interface hqos</em>' to display the content of each profile.
+ * See @ref qos_doc for more details.
+ *
+ * @note
+ * Currently there is not an API to create a new HQoS pipe profile. One is
+ * created by default in the code (search for '<em>hqos_pipe_params_default</em>'').
+ * Additional profiles can be created in code and code recompiled. Then use this
+ * command to assign it.
+ *
+ * @cliexpar
+ * Example of how to assign a new profile to a HQoS pipe:
+ * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) =
+{
+ .path = "set dpdk interface hqos pipe",
+ .short_help = "set dpdk interface hqos pipe <interface> subport <subport_id> pipe <pipe_id> "
+ "profile <profile_id>",
+ .function = set_dpdk_if_hqos_pipe,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = NULL;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 subport_id = (u32) ~ 0;
+ struct rte_sched_subport_params p;
+ int rv;
+ clib_error_t *error = NULL;
+ u32 tb_rate = (u32) ~ 0;
+ u32 tb_size = (u32) ~ 0;
+ u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] =
+ { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 };
+ u32 tc_period = (u32) ~ 0;
+ dpdk_device_config_t *devconf = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "subport %d", &subport_id))
+ ;
+ else if (unformat (line_input, "rate %d", &tb_rate))
+ ;
+ else if (unformat (line_input, "bktsize %d", &tb_size))
+ ;
+ else if (unformat (line_input, "tc0 %d", &tc_rate[0]))
+ ;
+ else if (unformat (line_input, "tc1 %d", &tc_rate[1]))
+ ;
+ else if (unformat (line_input, "tc2 %d", &tc_rate[2]))
+ ;
+ else if (unformat (line_input, "tc3 %d", &tc_rate[3]))
+ ;
+ else if (unformat (line_input, "period %d", &tc_period))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ error = get_hqos (hw_if_index, subport_id, &xd, &devconf);
+
+ if (error == NULL)
+ {
+ /* Copy the current values over to local structure. */
+ memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p));
+
+ /* Update local structure with input values. */
+ if (tb_rate != (u32) ~ 0)
+ {
+ p.tb_rate = tb_rate;
+ p.tc_rate[0] = tb_rate;
+ p.tc_rate[1] = tb_rate;
+ p.tc_rate[2] = tb_rate;
+ p.tc_rate[3] = tb_rate;
+ }
+ if (tb_size != (u32) ~ 0)
+ {
+ p.tb_size = tb_size;
+ }
+ if (tc_rate[0] != (u32) ~ 0)
+ {
+ p.tc_rate[0] = tc_rate[0];
+ }
+ if (tc_rate[1] != (u32) ~ 0)
+ {
+ p.tc_rate[1] = tc_rate[1];
+ }
+ if (tc_rate[2] != (u32) ~ 0)
+ {
+ p.tc_rate[2] = tc_rate[2];
+ }
+ if (tc_rate[3] != (u32) ~ 0)
+ {
+ p.tc_rate[3] = tc_rate[3];
+ }
+ if (tc_period != (u32) ~ 0)
+ {
+ p.tc_period = tc_period;
+ }
+
+ /* Apply changes. */
+ rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p);
+ if (rv)
+ {
+ error = clib_error_return (0, "subport configuration failed");
+ goto done;
+ }
+ else
+ {
+ /* Successfully applied, so save of the input values. */
+ memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p));
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to set the subport level parameters such as token
+ * bucket rate (bytes per seconds), token bucket size (bytes), traffic class
+ * rates (bytes per seconds) and token update period (Milliseconds).
+ *
+ * By default, the '<em>rate</em>' is set to 1250000000 bytes/second (10GbE
+ * rate) and each of the four traffic classes is set to 100% of the port rate.
+ * If the '<em>rate</em>' is updated by this command, all four traffic classes
+ * are assigned the same value. Each of the four traffic classes can be updated
+ * individually.
+ *
+ * @cliexpar
+ * Example of how modify the subport attributes for a 1GbE link:
+ * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = {
+ .path = "set dpdk interface hqos subport",
+ .short_help = "set dpdk interface hqos subport <interface> subport <subport_id> "
+ "[rate <n>] [bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] "
+ "[period <n>]",
+ .function = set_dpdk_if_hqos_subport,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 tc = (u32) ~ 0;
+ u32 queue = (u32) ~ 0;
+ u32 entry = (u32) ~ 0;
+ u32 val, i;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "entry %d", &entry))
+ ;
+ else if (unformat (line_input, "tc %d", &tc))
+ ;
+ else if (unformat (line_input, "queue %d", &queue))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto done;
+ }
+ if (entry >= 64)
+ {
+ error = clib_error_return (0, "invalid entry");
+ goto done;
+ }
+ if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+ {
+ error = clib_error_return (0, "invalid traffic class");
+ goto done;
+ }
+ if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+ {
+ error = clib_error_return (0, "invalid traffic class queue");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ /* Detect the set of worker threads */
+ uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ /* Should never happen, shut up Coverity warning */
+ if (p == 0)
+ {
+ error = clib_error_return (0, "no worker registrations?");
+ goto done;
+ }
+
+ vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0];
+ int worker_thread_first = tr->first_index;
+ int worker_thread_count = tr->count;
+
+ val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+ for (i = 0; i < worker_thread_count; i++)
+ xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val;
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to set the traffic class translation table. The
+ * traffic class translation table is used to map 64 values (0-63) to one of
+ * four traffic class and one of four HQoS input queue. Use the '<em>show
+ * dpdk interface hqos</em>' command to display the traffic class translation
+ * table. See @ref qos_doc for more details.
+ *
+ * This command has the following parameters:
+ *
+ * - <b><interface></b> - Used to specify the output interface.
+ *
+ * - <b>entry <map_val></b> - Mapped value (0-63) to assign traffic class and queue to.
+ *
+ * - <b>tc <tc_id></b> - Traffic class (0-3) to be used by the provided mapped value.
+ *
+ * - <b>queue <queue_id></b> - HQoS input queue (0-3) to be used by the provided mapped value.
+ *
+ * @cliexpar
+ * Example of how modify the traffic class translation table:
+ * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = {
+ .path = "set dpdk interface hqos tctbl",
+ .short_help = "set dpdk interface hqos tctbl <interface> entry <map_val> tc <tc_id> queue <queue_id>",
+ .function = set_dpdk_if_hqos_tctbl,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_main_t *dm = &dpdk_main;
+ clib_error_t *error = NULL;
+
+ /* Device specific data */
+ struct rte_eth_dev_info dev_info;
+ dpdk_device_config_t *devconf = 0;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ u32 hw_if_index = (u32) ~ 0;
+
+ /* Detect the set of worker threads */
+ uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ /* Should never happen, shut up Coverity warning */
+ if (p == 0)
+ return clib_error_return (0, "no worker registrations?");
+
+ vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0];
+ int worker_thread_first = tr->first_index;
+ int worker_thread_count = tr->count;
+
+ /* Packet field configuration */
+ u64 mask = (u64) ~ 0;
+ u32 id = (u32) ~ 0;
+ u32 offset = (u32) ~ 0;
+
+ /* HQoS params */
+ u32 n_subports_per_port, n_pipes_per_subport, tctbl_size;
+
+ u32 i;
+
+ /* Parse input arguments */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "id subport"))
+ id = 0;
+ else if (unformat (line_input, "id pipe"))
+ id = 1;
+ else if (unformat (line_input, "id tc"))
+ id = 2;
+ else if (unformat (line_input, "id %d", &id))
+ ;
+ else if (unformat (line_input, "offset %d", &offset))
+ ;
+ else if (unformat (line_input, "mask %llx", &mask))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* Get interface */
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rte_eth_dev_info_get (xd->device_index, &dev_info);
+ if (dev_info.pci_dev)
+ { /* bonded interface has no pci info */
+ vlib_pci_addr_t pci_addr;
+
+ pci_addr.domain = dev_info.pci_dev->addr.domain;
+ pci_addr.bus = dev_info.pci_dev->addr.bus;
+ pci_addr.slot = dev_info.pci_dev->addr.devid;
+ pci_addr.function = dev_info.pci_dev->addr.function;
+
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+ if (p)
+ devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ else
+ devconf = &dm->conf->default_devconf;
+
+ if (devconf->hqos_enabled == 0)
+ {
+ vlib_cli_output (vm, "HQoS disabled for this interface");
+ goto done;
+ }
+
+ n_subports_per_port = devconf->hqos.port.n_subports_per_port;
+ n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport;
+ tctbl_size = RTE_DIM (devconf->hqos.tc_table);
+
+ /* Validate packet field configuration: id, offset and mask */
+ if (id >= 3)
+ {
+ error = clib_error_return (0, "invalid packet field id");
+ goto done;
+ }
+
+ switch (id)
+ {
+ case 0:
+ if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0)
+ {
+ error = clib_error_return (0, "invalid subport ID mask "
+ "(n_subports_per_port = %u)",
+ n_subports_per_port);
+ goto done;
+ }
+ break;
+ case 1:
+ if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0)
+ {
+ error = clib_error_return (0, "invalid pipe ID mask "
+ "(n_pipes_per_subport = %u)",
+ n_pipes_per_subport);
+ goto done;
+ }
+ break;
+ case 2:
+ default:
+ if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0)
+ {
+ error = clib_error_return (0, "invalid TC table index mask "
+ "(TC table size = %u)", tctbl_size);
+ goto done;
+ }
+ }
+
+ /* Propagate packet field configuration to all workers */
+ for (i = 0; i < worker_thread_count; i++)
+ switch (id)
+ {
+ case 0:
+ xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset;
+ xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask;
+ xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr =
+ __builtin_ctzll (mask);
+ break;
+ case 1:
+ xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset;
+ xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask;
+ xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr =
+ __builtin_ctzll (mask);
+ break;
+ case 2:
+ default:
+ xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset;
+ xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask;
+ xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr =
+ __builtin_ctzll (mask);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to set the packet fields required for classifiying the
+ * incoming packet. As a result of classification process, packet field
+ * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe,
+ * color) and stored in packet mbuf.
+ *
+ * This command has the following parameters:
+ *
+ * - <b><interface></b> - Used to specify the output interface.
+ *
+ * - <b>id subport|pipe|tc</b> - Classification occurs across three fields.
+ * This parameter indicates which of the three masks are being configured. Legacy
+ * code used 0-2 to represent these three fields, so 0-2 is still accepted.
+ * - <b>subport|0</b> - Currently only one subport is supported, so only
+ * an empty mask is supported for the subport classification.
+ * - <b>pipe|1</b> - Currently, 4096 pipes per subport are supported, so a
+ * 12-bit mask should be configure to map to the 0-4095 pipes.
+ * - <b>tc|2</b> - The translation table (see '<em>set dpdk interface hqos
+ * tctbl</em>' command) maps each value (0-63) into one of the 4 traffic classes
+ * per pipe. A 6-bit mask should be configure to map this field to a traffic class.
+ *
+ * - <b>offset <n></b> - Offset in the packet to apply the 64-bit mask for classification.
+ * The offset should be on an 8-byte boundary (0,8,16,24..).
+ *
+ * - <b>mask <hex-mask></b> - 64-bit mask to apply to packet at the given '<em>offset</em>'.
+ * Bits must be contiguous and should not include '<em>0x</em>'.
+ *
+ * The default values for the '<em>pktfield</em>' assumes Ethernet/IPv4/UDP packets with
+ * no VLAN. Adjust based on expected packet format and desired classification field.
+ * - '<em>subport</em>' is always empty (offset 0 mask 0000000000000000)
+ * - By default, '<em>pipe</em>' maps to the UDP payload bits 12 .. 23 (offset 40
+ * mask 0000000fff000000)
+ * - By default, '<em>tc</em>' maps to the DSCP field in IP header (offset 48 mask
+ * 00000000000000fc)
+ *
+ * @cliexpar
+ * Example of how modify the '<em>pipe</em>' classification filter to match VLAN:
+ * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = {
+ .path = "set dpdk interface hqos pktfield",
+ .short_help = "set dpdk interface hqos pktfield <interface> id subport|pipe|tc offset <n> "
+ "mask <hex-mask>",
+ .function = set_dpdk_if_hqos_pktfield,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ dpdk_device_config_hqos_t *cfg;
+ dpdk_device_hqos_per_hqos_thread_t *ht;
+ dpdk_device_hqos_per_worker_thread_t *wk;
+ u32 *tctbl;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 profile_id, subport_id, i;
+ struct rte_eth_dev_info dev_info;
+ dpdk_device_config_t *devconf = 0;
+ vlib_thread_registration_t *tr;
+ uword *p = 0;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify interface name!!");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rte_eth_dev_info_get (xd->device_index, &dev_info);
+ if (dev_info.pci_dev)
+ { /* bonded interface has no pci info */
+ vlib_pci_addr_t pci_addr;
+
+ pci_addr.domain = dev_info.pci_dev->addr.domain;
+ pci_addr.bus = dev_info.pci_dev->addr.bus;
+ pci_addr.slot = dev_info.pci_dev->addr.devid;
+ pci_addr.function = dev_info.pci_dev->addr.function;
+
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+ if (p)
+ devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ else
+ devconf = &dm->conf->default_devconf;
+
+ if (devconf->hqos_enabled == 0)
+ {
+ vlib_cli_output (vm, "HQoS disabled for this interface");
+ goto done;
+ }
+
+ /* Detect the set of worker threads */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+
+ /* Should never happen, shut up Coverity warning */
+ if (p == 0)
+ {
+ error = clib_error_return (0, "no worker registrations?");
+ goto done;
+ }
+
+ tr = (vlib_thread_registration_t *) p[0];
+
+ cfg = &devconf->hqos;
+ ht = xd->hqos_ht;
+ wk = &xd->hqos_wt[tr->first_index];
+ tctbl = wk->hqos_tc_table;
+
+ vlib_cli_output (vm, " Thread:");
+ vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size);
+ vlib_cli_output (vm, " Enqueue burst size = %u packets",
+ ht->hqos_burst_enq);
+ vlib_cli_output (vm, " Dequeue burst size = %u packets",
+ ht->hqos_burst_deq);
+
+ vlib_cli_output (vm,
+ " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)",
+ wk->hqos_field0_slabpos, wk->hqos_field0_slabmask);
+ vlib_cli_output (vm,
+ " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)",
+ wk->hqos_field1_slabpos, wk->hqos_field1_slabmask);
+ vlib_cli_output (vm,
+ " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)",
+ wk->hqos_field2_slabpos, wk->hqos_field2_slabmask);
+ vlib_cli_output (vm,
+ " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)");
+ vlib_cli_output (vm,
+ " [ 0 .. 15]: "
+ "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u",
+ tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ vlib_cli_output (vm,
+ " [16 .. 31]: "
+ "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u",
+ tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ vlib_cli_output (vm,
+ " [32 .. 47]: "
+ "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u",
+ tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ vlib_cli_output (vm,
+ " [48 .. 63]: "
+ "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u",
+ tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+ tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ vlib_cli_output (vm, " Port:");
+ vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate);
+ vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu);
+ vlib_cli_output (vm, " Frame overhead = %u bytes",
+ cfg->port.frame_overhead);
+ vlib_cli_output (vm, " Number of subports = %u",
+ cfg->port.n_subports_per_port);
+ vlib_cli_output (vm, " Number of pipes per subport = %u",
+ cfg->port.n_pipes_per_subport);
+ vlib_cli_output (vm,
+ " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets",
+ cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2],
+ cfg->port.qsize[3]);
+ vlib_cli_output (vm, " Number of pipe profiles = %u",
+ cfg->port.n_pipe_profiles);
+
+ for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++)
+ {
+ vlib_cli_output (vm, " Subport %u:", subport_id);
+ vlib_cli_output (vm, " Rate = %u bytes/second",
+ cfg->subport[subport_id].tb_rate);
+ vlib_cli_output (vm, " Token bucket size = %u bytes",
+ cfg->subport[subport_id].tb_size);
+ vlib_cli_output (vm,
+ " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second",
+ cfg->subport[subport_id].tc_rate[0],
+ cfg->subport[subport_id].tc_rate[1],
+ cfg->subport[subport_id].tc_rate[2],
+ cfg->subport[subport_id].tc_rate[3]);
+ vlib_cli_output (vm, " TC period = %u milliseconds",
+ cfg->subport[subport_id].tc_period);
+ }
+
+ for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++)
+ {
+ vlib_cli_output (vm, " Pipe profile %u:", profile_id);
+ vlib_cli_output (vm, " Rate = %u bytes/second",
+ cfg->pipe[profile_id].tb_rate);
+ vlib_cli_output (vm, " Token bucket size = %u bytes",
+ cfg->pipe[profile_id].tb_size);
+ vlib_cli_output (vm,
+ " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second",
+ cfg->pipe[profile_id].tc_rate[0],
+ cfg->pipe[profile_id].tc_rate[1],
+ cfg->pipe[profile_id].tc_rate[2],
+ cfg->pipe[profile_id].tc_rate[3]);
+ vlib_cli_output (vm, " TC period = %u milliseconds",
+ cfg->pipe[profile_id].tc_period);
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ vlib_cli_output (vm, " TC3 oversubscription_weight = %u",
+ cfg->pipe[profile_id].tc_ov_weight);
+#endif
+
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+ {
+ vlib_cli_output (vm,
+ " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u",
+ i, cfg->pipe[profile_id].wrr_weights[i * 4],
+ cfg->pipe[profile_id].wrr_weights[i * 4 + 1],
+ cfg->pipe[profile_id].wrr_weights[i * 4 + 2],
+ cfg->pipe[profile_id].wrr_weights[i * 4 + 3]);
+ }
+ }
+
+#ifdef RTE_SCHED_RED
+ vlib_cli_output (vm, " Weighted Random Early Detection (WRED):");
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+ {
+ vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i,
+ cfg->port.red_params[i][e_RTE_METER_GREEN].min_th,
+ cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th,
+ cfg->port.red_params[i][e_RTE_METER_RED].min_th);
+
+ vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i,
+ cfg->port.red_params[i][e_RTE_METER_GREEN].max_th,
+ cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th,
+ cfg->port.red_params[i][e_RTE_METER_RED].max_th);
+
+ vlib_cli_output (vm,
+ " TC%u inverted probability: G = %u, Y = %u, R = %u",
+ i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv,
+ cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv,
+ cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv);
+
+ vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i,
+ cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2,
+ cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2,
+ cfg->port.red_params[i][e_RTE_METER_RED].wq_log2);
+ }
+#endif
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to display details of an output interface's HQoS
+ * settings.
+ *
+ * @cliexpar
+ * Example of how to display HQoS settings for an interfaces:
+ * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0}
+ * Thread:
+ * Input SWQ size = 4096 packets
+ * Enqueue burst size = 256 packets
+ * Dequeue burst size = 220 packets
+ * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport)
+ * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe)
+ * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc)
+ * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)
+ * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ * Port:
+ * Rate = 1250000000 bytes/second
+ * MTU = 1514 bytes
+ * Frame overhead = 24 bytes
+ * Number of subports = 1
+ * Number of pipes per subport = 4096
+ * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets
+ * Number of pipe profiles = 2
+ * Subport 0:
+ * Rate = 1250000000 bytes/second
+ * Token bucket size = 1000000 bytes
+ * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second
+ * TC period = 10 milliseconds
+ * Pipe profile 0:
+ * Rate = 305175 bytes/second
+ * Token bucket size = 1000000 bytes
+ * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second
+ * TC period = 40 milliseconds
+ * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = {
+ .path = "show dpdk interface hqos",
+ .short_help = "show dpdk interface hqos <interface>",
+ .function = show_dpdk_if_hqos,
+};
+
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+#ifdef RTE_SCHED_COLLECT_STATS
+ dpdk_main_t *dm = &dpdk_main;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 subport = (u32) ~ 0;
+ u32 pipe = (u32) ~ 0;
+ u32 tc = (u32) ~ 0;
+ u32 tc_q = (u32) ~ 0;
+ vnet_hw_interface_t *hw;
+ dpdk_device_t *xd;
+ uword *p = 0;
+ struct rte_eth_dev_info dev_info;
+ dpdk_device_config_t *devconf = 0;
+ u32 qindex;
+ struct rte_sched_queue_stats stats;
+ u16 qlen;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
+ &hw_if_index))
+ ;
+
+ else if (unformat (line_input, "subport %d", &subport))
+ ;
+
+ else if (unformat (line_input, "pipe %d", &pipe))
+ ;
+
+ else if (unformat (line_input, "tc %d", &tc))
+ ;
+
+ else if (unformat (line_input, "tc_q %d", &tc_q))
+ ;
+
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "please specify interface name!!");
+ goto done;
+ }
+
+ hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ xd = vec_elt_at_index (dm->devices, hw->dev_instance);
+
+ rte_eth_dev_info_get (xd->device_index, &dev_info);
+ if (dev_info.pci_dev)
+ { /* bonded interface has no pci info */
+ vlib_pci_addr_t pci_addr;
+
+ pci_addr.domain = dev_info.pci_dev->addr.domain;
+ pci_addr.bus = dev_info.pci_dev->addr.bus;
+ pci_addr.slot = dev_info.pci_dev->addr.devid;
+ pci_addr.function = dev_info.pci_dev->addr.function;
+
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+ if (p)
+ devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ else
+ devconf = &dm->conf->default_devconf;
+
+ if (devconf->hqos_enabled == 0)
+ {
+ vlib_cli_output (vm, "HQoS disabled for this interface");
+ goto done;
+ }
+
+ /*
+ * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why
+ * that method isn't made public by DPDK - how _should_ we get the queue ID?)
+ */
+ qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe;
+ qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc;
+ qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q;
+
+ if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) !=
+ 0)
+ {
+ error = clib_error_return (0, "failed to read stats");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value");
+ vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts);
+ vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped);
+#ifdef RTE_SCHED_RED
+ vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)",
+ stats.n_pkts_red_dropped);
+#endif
+ vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes);
+ vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped);
+
+#else
+
+ /* Get a line of input */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK");
+ goto done;
+
+#endif
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to display statistics associated with a HQoS traffic class
+ * queue.
+ *
+ * @note
+ * Statistic collection by the scheduler is disabled by default in DPDK. In order to
+ * turn it on, add the following line to '<em>../vpp/dpdk/Makefile</em>':
+ * - <b>$(call set,RTE_SCHED_COLLECT_STATS,y)</b>
+ *
+ * @cliexpar
+ * Example of how to display statistics of HQoS a HQoS traffic class queue:
+ * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0}
+ * Stats Parameter Value
+ * Packets 140
+ * Packets dropped 0
+ * Bytes 8400
+ * Bytes dropped 0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = {
+ .path = "show dpdk hqos queue",
+ .short_help = "show dpdk hqos queue <interface> subport <subport_id> pipe <pipe_id> tc <tc_id> tc_q <queue_id>",
+ .function = show_dpdk_hqos_queue_stats,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_dpdk_version_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c);
+ _("DPDK Version", "%s", rte_version ());
+ _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str);
+#undef _
+ return 0;
+}
+
+/*?
+ * This command is used to display the current DPDK version and
+ * the list of arguments passed to DPDK when started.
+ *
+ * @cliexpar
+ * Example of how to display how many DPDK buffer test command has allcoated:
+ * @cliexstart{show dpdk version}
+ * DPDK Version: DPDK 16.11.0
+ * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
+ .path = "show dpdk version",
+ .short_help = "show dpdk version",
+ .function = show_dpdk_version_command_fn,
+};
+/* *INDENT-ON* */
+
+#if CLI_DEBUG
+
+static clib_error_t *
+dpdk_validate_buffers_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ u32 n_invalid_bufs = 0, uninitialized = 0;
+ u32 is_poison = 0, is_test = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "poison"))
+ is_poison = 1;
+ else if (unformat (input, "trajectory"))
+ is_test = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (VLIB_BUFFER_TRACE_TRAJECTORY == 0)
+ {
+ vlib_cli_output (vm, "Trajectory not enabled. Recompile with "
+ "VLIB_BUFFER_TRACE_TRAJECTORY 1");
+ return 0;
+ }
+ if (is_poison)
+ {
+ dpdk_buffer_poison_trajectory_all ();
+ }
+ if (is_test)
+ {
+ n_invalid_bufs = dpdk_buffer_validate_trajectory_all (&uninitialized);
+ if (!n_invalid_bufs)
+ vlib_cli_output (vm, "All buffers are valid %d uninitialized",
+ uninitialized);
+ else
+ vlib_cli_output (vm, "Found %d invalid buffers and %d uninitialized",
+ n_invalid_bufs, uninitialized);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_dpdk_buffers_command, static) =
+{
+ .path = "test dpdk buffers",
+ .short_help = "test dpdk buffers [poison] [trajectory]",
+ .function = dpdk_validate_buffers_fn,
+};
+/* *INDENT-ON* */
+
+#endif
+
+clib_error_t *
+dpdk_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (dpdk_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
new file mode 100644
index 00000000..aedc3f52
--- /dev/null
+++ b/src/plugins/dpdk/device/common.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vlib/unix/cj.h>
+#include <assert.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <dpdk/device/dpdk.h>
+
+#include <dpdk/device/dpdk_priv.h>
+#include <vppinfra/error.h>
+
+void
+dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
+{
+ xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s",
+ str, xd->device_index, rv,
+ rte_strerror (rv));
+}
+
+void
+dpdk_device_setup (dpdk_device_t * xd)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
+ int rv;
+ int j;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ clib_error_free (xd->errors);
+ sw->flags &= ~VNET_SW_INTERFACE_FLAG_ERROR;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
+ {
+ vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+ dpdk_device_stop (xd);
+ }
+
+ rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used,
+ xd->tx_q_used, &xd->port_conf);
+
+ if (rv < 0)
+ {
+ dpdk_device_error (xd, "rte_eth_dev_configure", rv);
+ goto error;
+ }
+
+ /* Set up one TX-queue per worker thread */
+ for (j = 0; j < xd->tx_q_used; j++)
+ {
+ rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc,
+ xd->cpu_socket, &xd->tx_conf);
+
+ /* retry with any other CPU socket */
+ if (rv < 0)
+ rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc,
+ SOCKET_ID_ANY, &xd->tx_conf);
+ if (rv < 0)
+ dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
+ }
+
+ for (j = 0; j < xd->rx_q_used; j++)
+ {
+ uword tidx = vnet_get_device_input_thread_index (dm->vnet_main,
+ xd->hw_if_index, j);
+ unsigned lcore = vlib_worker_threads[tidx].lcore_id;
+ u16 socket_id = rte_lcore_to_socket_id (lcore);
+
+ rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
+ xd->cpu_socket, 0,
+ dm->pktmbuf_pools[socket_id]);
+
+ /* retry with any other CPU socket */
+ if (rv < 0)
+ rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
+ SOCKET_ID_ANY, 0,
+ dm->pktmbuf_pools[socket_id]);
+
+ if (rv < 0)
+ dpdk_device_error (xd, "rte_eth_rx_queue_setup", rv);
+ }
+
+ if (vec_len (xd->errors))
+ goto error;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
+ dpdk_device_start (xd);
+
+ if (vec_len (xd->errors))
+ goto error;
+
+ return;
+
+error:
+ xd->flags |= DPDK_DEVICE_FLAG_PMD_INIT_FAIL;
+ sw->flags |= VNET_SW_INTERFACE_FLAG_ERROR;
+}
+
+void
+dpdk_device_start (dpdk_device_t * xd)
+{
+ int rv;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_PMD_INIT_FAIL)
+ return;
+
+ rv = rte_eth_dev_start (xd->device_index);
+
+ if (rv)
+ {
+ dpdk_device_error (xd, "rte_eth_dev_start", rv);
+ return;
+ }
+
+ if (xd->default_mac_address)
+ rv =
+ rte_eth_dev_default_mac_addr_set (xd->device_index,
+ (struct ether_addr *)
+ xd->default_mac_address);
+
+ if (rv)
+ dpdk_device_error (xd, "rte_eth_dev_default_mac_addr_set", rv);
+
+ if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
+ rte_eth_promiscuous_enable (xd->device_index);
+ else
+ rte_eth_promiscuous_disable (xd->device_index);
+
+ rte_eth_allmulticast_enable (xd->device_index);
+
+ if (xd->pmd == VNET_DPDK_PMD_BOND)
+ {
+ u8 slink[16];
+ int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16);
+ while (nlink >= 1)
+ {
+ u8 dpdk_port = slink[--nlink];
+ rte_eth_allmulticast_enable (dpdk_port);
+ }
+ }
+}
+
+void
+dpdk_device_stop (dpdk_device_t * xd)
+{
+ if (xd->flags & DPDK_DEVICE_FLAG_PMD_INIT_FAIL)
+ return;
+
+ rte_eth_allmulticast_disable (xd->device_index);
+ rte_eth_dev_stop (xd->device_index);
+
+ /* For bonded interface, stop slave links */
+ if (xd->pmd == VNET_DPDK_PMD_BOND)
+ {
+ u8 slink[16];
+ int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16);
+ while (nlink >= 1)
+ {
+ u8 dpdk_port = slink[--nlink];
+ rte_eth_dev_stop (dpdk_port);
+ }
+ }
+}
+
+/* Even type for send_garp_na_process */
+enum
+{
+ SEND_GARP_NA = 1,
+} dpdk_send_garp_na_process_event_t;
+
+static vlib_node_registration_t send_garp_na_proc_node;
+
+static uword
+send_garp_na_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ uword event_type, *event_data = 0;
+
+ while (1)
+ {
+ u32 i;
+ uword dpdk_port;
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ ASSERT (event_type == SEND_GARP_NA);
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ dpdk_port = event_data[i];
+ if (i < 5) /* wait 0.2 sec for link to settle, max total 1 sec */
+ vlib_process_suspend (vm, 0.2);
+ dpdk_device_t *xd = &dpdk_main.devices[dpdk_port];
+ u32 hw_if_index = xd->hw_if_index;
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ dpdk_update_link_state (xd, vlib_time_now (vm));
+ send_ip4_garp (vm, hi);
+ send_ip6_na (vm, hi);
+ }
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = {
+ .function = send_garp_na_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "send-garp-na-process",
+};
+/* *INDENT-ON* */
+
+void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+static void
+garp_na_proc_callback (uword * dpdk_port)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event
+ (vm, send_garp_na_proc_node.index, SEND_GARP_NA, *dpdk_port);
+}
+
+always_inline int
+dpdk_port_state_callback_inline (uint8_t port_id,
+ enum rte_eth_event_type type, void *param)
+{
+ struct rte_eth_link link;
+ dpdk_device_t *xd = &dpdk_main.devices[port_id];
+
+ RTE_SET_USED (param);
+ if (type != RTE_ETH_EVENT_INTR_LSC)
+ {
+ clib_warning ("Unknown event %d received for port %d", type, port_id);
+ return -1;
+ }
+
+ rte_eth_link_get_nowait (port_id, &link);
+ u8 link_up = link.link_status;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE)
+ {
+ uword bd_port = xd->bond_port;
+ int bd_mode = rte_eth_bond_mode_get (bd_port);
+#if 0
+ clib_warning ("Port %d state to %s, "
+ "slave of port %d BondEthernet%d in mode %d",
+ port_id, (link_up) ? "UP" : "DOWN",
+ bd_port, xd->port_id, bd_mode);
+#endif
+ if (bd_mode == BONDING_MODE_ACTIVE_BACKUP)
+ {
+ vl_api_force_rpc_call_main_thread
+ (garp_na_proc_callback, (u8 *) & bd_port, sizeof (uword));
+ }
+ xd->flags |= link_up ?
+ DPDK_DEVICE_FLAG_BOND_SLAVE_UP : ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
+ }
+ else /* Should not happen as callback not setup for "normal" links */
+ {
+ if (link_up)
+ clib_warning ("Port %d Link Up - speed %u Mbps - %s",
+ port_id, (unsigned) link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ "full-duplex" : "half-duplex");
+ else
+ clib_warning ("Port %d Link Down\n\n", port_id);
+ }
+
+ return 0;
+}
+
+#if DPDK_VOID_CALLBACK
+void
+dpdk_port_state_callback (uint8_t port_id,
+ enum rte_eth_event_type type, void *param)
+{
+ dpdk_port_state_callback_inline (port_id, type, param);
+}
+
+#else
+int
+dpdk_port_state_callback (uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *param,
+ void *ret_param __attribute__ ((unused)))
+{
+ return dpdk_port_state_callback_inline (port_id, type, param);
+}
+#endif
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
new file mode 100644
index 00000000..aa134327
--- /dev/null
+++ b/src/plugins/dpdk/device/device.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vlib/unix/cj.h>
+#include <assert.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+
+#include <dpdk/device/dpdk_priv.h>
+#include <vppinfra/error.h>
+
+#define foreach_dpdk_tx_func_error \
+ _(BAD_RETVAL, "DPDK tx function returned an error") \
+ _(RING_FULL, "Tx packet drops (ring full)") \
+ _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \
+ _(REPL_FAIL, "Tx packet drops (replication failure)")
+
+typedef enum
+{
+#define _(f,s) DPDK_TX_FUNC_ERROR_##f,
+ foreach_dpdk_tx_func_error
+#undef _
+ DPDK_TX_FUNC_N_ERROR,
+} dpdk_tx_func_error_t;
+
+static char *dpdk_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_dpdk_tx_func_error
+#undef _
+};
+
+static clib_error_t *
+dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address)
+{
+ int error;
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ error = rte_eth_dev_default_mac_addr_set (xd->device_index,
+ (struct ether_addr *) address);
+
+ if (error)
+ {
+ return clib_error_return (0, "mac address set failed: %d", error);
+ }
+ else
+ {
+ vec_reset_length (xd->default_mac_address);
+ vec_add (xd->default_mac_address, address, sizeof (address));
+ return NULL;
+ }
+}
+
+struct rte_mbuf *
+dpdk_replicate_packet_mb (vlib_buffer_t * b)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ struct rte_mbuf **mbufs = 0, *s, *d;
+ u8 nb_segs;
+ unsigned socket_id = rte_socket_id ();
+ int i;
+
+ ASSERT (dm->pktmbuf_pools[socket_id]);
+ s = rte_mbuf_from_vlib_buffer (b);
+ nb_segs = s->nb_segs;
+ vec_validate (mbufs, nb_segs - 1);
+
+ if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs))
+ {
+ vec_free (mbufs);
+ return 0;
+ }
+
+ d = mbufs[0];
+ d->nb_segs = s->nb_segs;
+ d->data_len = s->data_len;
+ d->pkt_len = s->pkt_len;
+ d->data_off = s->data_off;
+ clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len);
+
+ for (i = 1; i < nb_segs; i++)
+ {
+ d->next = mbufs[i];
+ d = mbufs[i];
+ s = s->next;
+ d->data_len = s->data_len;
+ clib_memcpy (d->buf_addr, s->buf_addr,
+ RTE_PKTMBUF_HEADROOM + s->data_len);
+ }
+
+ d = mbufs[0];
+ vec_free (mbufs);
+ return d;
+}
+
+static void
+dpdk_tx_trace_buffer (dpdk_main_t * dm,
+ vlib_node_runtime_t * node,
+ dpdk_device_t * xd,
+ u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ dpdk_tx_dma_trace_t *t0;
+ struct rte_mbuf *mb;
+
+ mb = rte_mbuf_from_vlib_buffer (buffer);
+
+ t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0]));
+ t0->queue_index = queue_id;
+ t0->device_index = xd->device_index;
+ t0->buffer_index = buffer_index;
+ clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
+ clib_memcpy (&t0->buffer, buffer,
+ sizeof (buffer[0]) - sizeof (buffer->pre_data));
+ clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
+ sizeof (t0->buffer.pre_data));
+}
+
+static_always_inline void
+dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
+ int maybe_multiseg)
+{
+ struct rte_mbuf *mb, *first_mb, *last_mb;
+
+ /* buffer is coming from non-dpdk source so we need to init
+ rte_mbuf header */
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0))
+ {
+ vlib_buffer_t *b2 = b;
+ last_mb = mb = rte_mbuf_from_vlib_buffer (b2);
+ rte_pktmbuf_reset (mb);
+ while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b2 = vlib_get_buffer (vm, b2->next_buffer);
+ mb = rte_mbuf_from_vlib_buffer (b2);
+ rte_pktmbuf_reset (mb);
+ }
+ }
+
+ last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b);
+ first_mb->nb_segs = 1;
+ mb->data_len = b->current_length;
+ mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) :
+ b->current_length;
+ mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
+
+ while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ last_mb->next = mb;
+ last_mb = mb;
+ mb->data_len = b->current_length;
+ mb->pkt_len = b->current_length;
+ mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
+ first_mb->nb_segs++;
+ if (PREDICT_FALSE (b->n_add_refs))
+ {
+ rte_mbuf_refcnt_update (mb, b->n_add_refs);
+ b->n_add_refs = 0;
+ }
+ }
+}
+
+/*
+ * This function calls the dpdk's tx_burst function to transmit the packets
+ * on the tx_vector. It manages a lock per-device if the device does not
+ * support multiple queues. It returns the number of packets untransmitted
+ * on the tx_vector. If all packets are transmitted (the normal case), the
+ * function returns 0.
+ *
+ * The function assumes there is at least one packet on the tx_vector.
+ */
+static_always_inline
+ u32 tx_burst_vector_internal (vlib_main_t * vm,
+ dpdk_device_t * xd,
+ struct rte_mbuf **tx_vector)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ u32 n_packets;
+ u32 tx_head;
+ u32 tx_tail;
+ u32 n_retry;
+ int rv;
+ int queue_id;
+ tx_ring_hdr_t *ring;
+
+ ring = vec_header (tx_vector, sizeof (*ring));
+
+ n_packets = ring->tx_head - ring->tx_tail;
+
+ tx_head = ring->tx_head % xd->nb_tx_desc;
+
+ /*
+ * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to
+ * unpredictable results.
+ */
+ ASSERT (n_packets > 0);
+
+ /*
+ * Check for tx_vector overflow. If this fails it is a system configuration
+ * error. The ring should be sized big enough to handle the largest un-flowed
+ * off burst from a traffic manager. A larger size also helps performance
+ * a bit because it decreases the probability of having to issue two tx_burst
+ * calls due to a ring wrap.
+ */
+ ASSERT (n_packets < xd->nb_tx_desc);
+ ASSERT (ring->tx_tail == 0);
+
+ n_retry = 16;
+ queue_id = vm->thread_index;
+
+ do
+ {
+ /* start the burst at the tail */
+ tx_tail = ring->tx_tail % xd->nb_tx_desc;
+
+ /*
+ * This device only supports one TX queue,
+ * and we're running multi-threaded...
+ */
+ if (PREDICT_FALSE (xd->lockp != 0))
+ {
+ queue_id = queue_id % xd->tx_q_used;
+ while (__sync_lock_test_and_set (xd->lockp[queue_id], 1))
+ /* zzzz */
+ queue_id = (queue_id + 1) % xd->tx_q_used;
+ }
+
+ if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
+ {
+ /* no wrap, transmit in one burst */
+ dpdk_device_hqos_per_worker_thread_t *hqos =
+ &xd->hqos_wt[vm->thread_index];
+
+ ASSERT (hqos->swq != NULL);
+
+ dpdk_hqos_metadata_set (hqos,
+ &tx_vector[tx_tail], tx_head - tx_tail);
+ rv = rte_ring_sp_enqueue_burst (hqos->swq,
+ (void **) &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail), 0);
+ }
+ else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
+ {
+ /* no wrap, transmit in one burst */
+ rv = rte_eth_tx_burst (xd->device_index,
+ (uint16_t) queue_id,
+ &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail));
+ }
+ else
+ {
+ ASSERT (0);
+ rv = 0;
+ }
+
+ if (PREDICT_FALSE (xd->lockp != 0))
+ *xd->lockp[queue_id] = 0;
+
+ if (PREDICT_FALSE (rv < 0))
+ {
+ // emit non-fatal message, bump counter
+ vnet_main_t *vnm = dm->vnet_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 node_index;
+
+ node_index = vec_elt_at_index (im->hw_interfaces,
+ xd->hw_if_index)->tx_node_index;
+
+ vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
+ clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index,
+ rv);
+ return n_packets; // untransmitted packets
+ }
+ ring->tx_tail += (u16) rv;
+ n_packets -= (uint16_t) rv;
+ }
+ while (rv && n_packets && (n_retry > 0));
+
+ return n_packets;
+}
+
+static_always_inline void
+dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b;
+ struct rte_mbuf *mb;
+ b = vlib_get_buffer (vm, bi);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ CLIB_PREFETCH (mb, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+static_always_inline void
+dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ u32 my_cpu = vm->thread_index;
+ struct rte_mbuf *mb_new;
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
+ return;
+
+ mb_new = dpdk_replicate_packet_mb (b);
+ if (PREDICT_FALSE (mb_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ *mbp = mb_new;
+
+ vec_add1 (dm->recycle[my_cpu], bi);
+}
+
+static_always_inline void
+dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
+ struct rte_mbuf *mb)
+{
+ u32 ip_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+ u32 tcp_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ u32 udp_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
+ u64 ol_flags;
+
+ /* Is there any work for us? */
+ if (PREDICT_TRUE ((ip_cksum | tcp_cksum | udp_cksum) == 0))
+ return;
+
+ mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
+ mb->l3_len = vnet_buffer (b)->l4_hdr_offset -
+ vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l3_len = 0;
+ mb->outer_l2_len = 0;
+ ol_flags = is_ip4 ? PKT_TX_IPV4 : PKT_TX_IPV6;
+ ol_flags |= ip_cksum ? PKT_TX_IP_CKSUM : 0;
+ ol_flags |= tcp_cksum ? PKT_TX_TCP_CKSUM : 0;
+ ol_flags |= udp_cksum ? PKT_TX_UDP_CKSUM : 0;
+ mb->ol_flags |= ol_flags;
+
+ /* we are trying to help compiler here by using local ol_flags with known
+ state of all flags */
+ if (xd->flags & DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM)
+ rte_net_intel_cksum_flags_prepare (mb, ol_flags);
+}
+
+/*
+ * Transmits the packets on the frame to the interface associated with the
+ * node. It first copies packets on the frame to a tx_vector containing the
+ * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
+ * which calls the dpdk tx_burst function.
+ */
+static uword
+dpdk_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
+ u32 n_packets = f->n_vectors;
+ u32 n_left;
+ u32 *from;
+ struct rte_mbuf **tx_vector;
+ u16 i;
+ u16 nb_tx_desc = xd->nb_tx_desc;
+ int queue_id;
+ u32 my_cpu;
+ u32 tx_pkts = 0;
+ tx_ring_hdr_t *ring;
+ u32 n_on_ring;
+
+ my_cpu = vm->thread_index;
+
+ queue_id = my_cpu;
+
+ tx_vector = xd->tx_vectors[queue_id];
+ ring = vec_header (tx_vector, sizeof (*ring));
+
+ n_on_ring = ring->tx_head - ring->tx_tail;
+ from = vlib_frame_vector_args (f);
+
+ ASSERT (n_packets <= VLIB_FRAME_SIZE);
+
+ if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc))
+ {
+ /*
+ * Overflowing the ring should never happen.
+ * If it does then drop the whole frame.
+ */
+ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL,
+ n_packets);
+
+ while (n_packets--)
+ {
+ u32 bi0 = from[n_packets];
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0);
+ rte_pktmbuf_free (mb0);
+ }
+ return n_on_ring;
+ }
+
+ if (PREDICT_FALSE (dm->tx_pcap_enable))
+ {
+ n_left = n_packets;
+ while (n_left > 0)
+ {
+ u32 bi0 = from[0];
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ if (dm->pcap_sw_if_index == 0 ||
+ dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX])
+ pcap_add_buffer (&dm->pcap_main, vm, bi0, 512);
+ from++;
+ n_left--;
+ }
+ }
+
+ from = vlib_frame_vector_args (f);
+ n_left = n_packets;
+ i = ring->tx_head % nb_tx_desc;
+
+ while (n_left >= 8)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 or_flags;
+
+ dpdk_prefetch_buffer_by_index (vm, from[4]);
+ dpdk_prefetch_buffer_by_index (vm, from[5]);
+ dpdk_prefetch_buffer_by_index (vm, from[6]);
+ dpdk_prefetch_buffer_by_index (vm, from[7]);
+
+ bi0 = from[0];
+ bi1 = from[1];
+ bi2 = from[2];
+ bi3 = from[3];
+ from += 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+ if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ dpdk_validate_rte_mbuf (vm, b0, 1);
+ dpdk_validate_rte_mbuf (vm, b1, 1);
+ dpdk_validate_rte_mbuf (vm, b2, 1);
+ dpdk_validate_rte_mbuf (vm, b3, 1);
+ }
+ else
+ {
+ dpdk_validate_rte_mbuf (vm, b0, 0);
+ dpdk_validate_rte_mbuf (vm, b1, 0);
+ dpdk_validate_rte_mbuf (vm, b2, 0);
+ dpdk_validate_rte_mbuf (vm, b3, 0);
+ }
+
+ mb0 = rte_mbuf_from_vlib_buffer (b0);
+ mb1 = rte_mbuf_from_vlib_buffer (b1);
+ mb2 = rte_mbuf_from_vlib_buffer (b2);
+ mb3 = rte_mbuf_from_vlib_buffer (b3);
+
+ if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) &&
+ (or_flags &
+ (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
+ | VNET_BUFFER_F_OFFLOAD_IP_CKSUM
+ | VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))))
+ {
+ dpdk_buffer_tx_offload (xd, b0, mb0);
+ dpdk_buffer_tx_offload (xd, b1, mb1);
+ dpdk_buffer_tx_offload (xd, b2, mb2);
+ dpdk_buffer_tx_offload (xd, b3, mb3);
+ }
+
+ if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE))
+ {
+ dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
+ dpdk_buffer_recycle (vm, node, b1, bi1, &mb1);
+ dpdk_buffer_recycle (vm, node, b2, bi2, &mb2);
+ dpdk_buffer_recycle (vm, node, b3, bi3, &mb3);
+
+ /* dont enqueue packets if replication failed as they must
+ be sent back to recycle */
+ if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb0;
+ if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb1;
+ if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb2;
+ if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb3;
+ }
+ else
+ {
+ if (PREDICT_FALSE (i + 3 >= nb_tx_desc))
+ {
+ tx_vector[i++ % nb_tx_desc] = mb0;
+ tx_vector[i++ % nb_tx_desc] = mb1;
+ tx_vector[i++ % nb_tx_desc] = mb2;
+ tx_vector[i++ % nb_tx_desc] = mb3;
+ i %= nb_tx_desc;
+ }
+ else
+ {
+ tx_vector[i++] = mb0;
+ tx_vector[i++] = mb1;
+ tx_vector[i++] = mb2;
+ tx_vector[i++] = mb3;
+ }
+ }
+
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2);
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3);
+ }
+
+ n_left -= 4;
+ }
+ while (n_left > 0)
+ {
+ u32 bi0;
+ struct rte_mbuf *mb0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ from++;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ dpdk_validate_rte_mbuf (vm, b0, 1);
+
+ mb0 = rte_mbuf_from_vlib_buffer (b0);
+ dpdk_buffer_tx_offload (xd, b0, mb0);
+ dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+
+ if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ {
+ tx_vector[i % nb_tx_desc] = mb0;
+ i++;
+ }
+ n_left--;
+ }
+
+ /* account for additional packets in the ring */
+ ring->tx_head += n_packets;
+ n_on_ring = ring->tx_head - ring->tx_tail;
+
+ /* transmit as many packets as possible */
+ n_packets = tx_burst_vector_internal (vm, xd, tx_vector);
+
+ /*
+ * tx_pkts is the number of packets successfully transmitted
+ * This is the number originally on ring minus the number remaining on ring
+ */
+ tx_pkts = n_on_ring - n_packets;
+
+ {
+ /* If there is no callback then drop any non-transmitted packets */
+ if (PREDICT_FALSE (n_packets))
+ {
+ vlib_simple_counter_main_t *cm;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
+
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ n_packets);
+
+ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
+ n_packets);
+
+ while (n_packets--)
+ rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
+ }
+
+ /* Reset head/tail to avoid unnecessary wrap */
+ ring->tx_head = 0;
+ ring->tx_tail = 0;
+ }
+
+ /* Recycle replicated buffers */
+ if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu])))
+ {
+ vlib_buffer_free (vm, dm->recycle[my_cpu],
+ vec_len (dm->recycle[my_cpu]));
+ _vec_len (dm->recycle[my_cpu]) = 0;
+ }
+
+ ASSERT (ring->tx_head >= ring->tx_tail);
+
+ return tx_pkts;
+}
+
+static void
+dpdk_clear_hw_interface_counters (u32 instance)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance);
+
+ /*
+ * Set the "last_cleared_stats" to the current stats, so that
+ * things appear to clear from a display perspective.
+ */
+ dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
+
+ clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats));
+ clib_memcpy (xd->last_cleared_xstats, xd->xstats,
+ vec_len (xd->last_cleared_xstats) *
+ sizeof (xd->last_cleared_xstats[0]));
+
+}
+
+static clib_error_t *
+dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance);
+
+ if (xd->flags & DPDK_DEVICE_FLAG_PMD_INIT_FAIL)
+ return clib_error_return (0, "Interface not initialized");
+
+ if (is_up)
+ {
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
+ dpdk_device_start (xd);
+ xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
+ f64 now = vlib_time_now (dm->vlib_main);
+ dpdk_update_counters (xd, now);
+ dpdk_update_link_state (xd, now);
+ }
+ else
+ {
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
+ if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0)
+ dpdk_device_stop (xd);
+ xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP;
+ }
+
+ return /* no error */ 0;
+}
+
+/*
+ * Dynamically redirect all pkts from a specific interface
+ * to the specified node
+ */
+static void
+dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ dpdk_main_t *xm = &dpdk_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ xd->per_interface_next_index = node_index;
+ return;
+ }
+
+ xd->per_interface_next_index =
+ vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
+}
+
+
+static clib_error_t *
+dpdk_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ dpdk_main_t *xm = &dpdk_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
+ vnet_sw_interface_t *t = (vnet_sw_interface_t *) st;
+ int r, vlan_offload;
+ u32 prev_subifs = xd->num_subifs;
+ clib_error_t *err = 0;
+
+ if (is_add)
+ xd->num_subifs++;
+ else if (xd->num_subifs)
+ xd->num_subifs--;
+
+ if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
+ goto done;
+
+ /* currently we program VLANS only for IXGBE VF and I40E VF */
+ if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF))
+ goto done;
+
+ if (t->sub.eth.flags.no_tags == 1)
+ goto done;
+
+ if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1))
+ {
+ xd->num_subifs = prev_subifs;
+ err = clib_error_return (0, "unsupported VLAN setup");
+ goto done;
+ }
+
+ vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index);
+ vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
+
+ if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload)))
+ {
+ xd->num_subifs = prev_subifs;
+ err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d",
+ xd->device_index, r);
+ goto done;
+ }
+
+
+ if ((r =
+ rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id,
+ is_add)))
+ {
+ xd->num_subifs = prev_subifs;
+ err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d",
+ xd->device_index, r);
+ goto done;
+ }
+
+done:
+ if (xd->num_subifs)
+ xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF;
+ else
+ xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF;
+
+ return err;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (dpdk_device_class) = {
+ .name = "dpdk",
+ .tx_function = dpdk_interface_tx,
+ .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
+ .tx_function_error_strings = dpdk_tx_func_error_strings,
+ .format_device_name = format_dpdk_device_name,
+ .format_device = format_dpdk_device,
+ .format_tx_trace = format_dpdk_tx_dma_trace,
+ .clear_counters = dpdk_clear_hw_interface_counters,
+ .admin_up_down_function = dpdk_interface_admin_up_down,
+ .subif_add_del_function = dpdk_subif_add_del_function,
+ .rx_redirect_to_node = dpdk_set_interface_next_node,
+ .mac_addr_change_function = dpdk_set_mac_address,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
+/* *INDENT-ON* */
+
+#define UP_DOWN_FLAG_EVENT 1
+
+uword
+admin_up_down_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ clib_error_t *error = 0;
+ uword event_type;
+ uword *event_data = 0;
+ u32 sw_if_index;
+ u32 flags;
+
+ while (1)
+ {
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+
+ dpdk_main.admin_up_down_in_progress = 1;
+
+ switch (event_type)
+ {
+ case UP_DOWN_FLAG_EVENT:
+ {
+ if (vec_len (event_data) == 2)
+ {
+ sw_if_index = event_data[0];
+ flags = event_data[1];
+ error =
+ vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index,
+ flags);
+ clib_error_report (error);
+ }
+ }
+ break;
+ }
+
+ vec_reset_length (event_data);
+
+ dpdk_main.admin_up_down_in_progress = 0;
+
+ }
+ return 0; /* or not */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (admin_up_down_process_node,static) = {
+ .function = admin_up_down_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "admin-up-down-process",
+ .process_log2_n_stack_bytes = 17, // 256KB
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/dir.dox b/src/plugins/dpdk/device/dir.dox
new file mode 100644
index 00000000..43e36753
--- /dev/null
+++ b/src/plugins/dpdk/device/dir.dox
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief DPDK Abstraction Layer.
+
+This directory contains the source code for the DPDK abstraction layer.
+
+*/
+/*? %%clicmd:group_label DPDK and pcap tx %% ?*/
+/*? %%syscfg:group_label DPDK and pcap tx %% ?*/
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
new file mode 100644
index 00000000..9762c713
--- /dev/null
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_dpdk_h__
+#define __included_dpdk_h__
+
+/* $$$$ We should rename always_inline -> clib_always_inline */
+#undef always_inline
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_dev.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+#include <rte_eth_bond.h>
+#include <rte_sched.h>
+#include <rte_net.h>
+
+#include <vnet/unix/pcap.h>
+#include <vnet/devices/devices.h>
+
+#if CLIB_DEBUG > 0
+#define always_inline static inline
+#else
+#define always_inline static inline __attribute__ ((__always_inline__))
+#endif
+
+#include <vlib/pci/pci.h>
+
+#define NB_MBUF (16<<10)
+
+extern vnet_device_class_t dpdk_device_class;
+extern vlib_node_registration_t dpdk_input_node;
+
+#define foreach_dpdk_pmd \
+ _ ("net_thunderx", THUNDERX) \
+ _ ("net_e1000_em", E1000EM) \
+ _ ("net_e1000_igb", IGB) \
+ _ ("net_e1000_igb_vf", IGBVF) \
+ _ ("net_ixgbe", IXGBE) \
+ _ ("net_ixgbe_vf", IXGBEVF) \
+ _ ("net_i40e", I40E) \
+ _ ("net_i40e_vf", I40EVF) \
+ _ ("net_virtio", VIRTIO) \
+ _ ("net_enic", ENIC) \
+ _ ("net_vmxnet3", VMXNET3) \
+ _ ("AF_PACKET PMD", AF_PACKET) \
+ _ ("net_bonding", BOND) \
+ _ ("net_fm10k", FM10K) \
+ _ ("net_cxgbe", CXGBE) \
+ _ ("net_mlx4", MLX4) \
+ _ ("net_mlx5", MLX5) \
+ _ ("net_dpaa2", DPAA2) \
+ _ ("net_virtio_user", VIRTIO_USER) \
+ _ ("net_vhost", VHOST_ETHER)
+
+typedef enum
+{
+ VNET_DPDK_PMD_NONE,
+#define _(s,f) VNET_DPDK_PMD_##f,
+ foreach_dpdk_pmd
+#undef _
+ VNET_DPDK_PMD_UNKNOWN, /* must be last */
+} dpdk_pmd_t;
+
+typedef enum
+{
+ VNET_DPDK_PORT_TYPE_ETH_1G,
+ VNET_DPDK_PORT_TYPE_ETH_10G,
+ VNET_DPDK_PORT_TYPE_ETH_25G,
+ VNET_DPDK_PORT_TYPE_ETH_40G,
+ VNET_DPDK_PORT_TYPE_ETH_50G,
+ VNET_DPDK_PORT_TYPE_ETH_100G,
+ VNET_DPDK_PORT_TYPE_ETH_BOND,
+ VNET_DPDK_PORT_TYPE_ETH_SWITCH,
+ VNET_DPDK_PORT_TYPE_AF_PACKET,
+ VNET_DPDK_PORT_TYPE_ETH_VF,
+ VNET_DPDK_PORT_TYPE_VIRTIO_USER,
+ VNET_DPDK_PORT_TYPE_VHOST_ETHER,
+ VNET_DPDK_PORT_TYPE_UNKNOWN,
+} dpdk_port_type_t;
+
+/*
+ * The header for the tx_vector in dpdk_device_t.
+ * Head and tail are indexes into the tx_vector and are of type
+ * u64 so they never overflow.
+ */
+typedef struct
+{
+ u64 tx_head;
+ u64 tx_tail;
+} tx_ring_hdr_t;
+
+typedef struct
+{
+ struct rte_ring *swq;
+
+ u64 hqos_field0_slabmask;
+ u32 hqos_field0_slabpos;
+ u32 hqos_field0_slabshr;
+ u64 hqos_field1_slabmask;
+ u32 hqos_field1_slabpos;
+ u32 hqos_field1_slabshr;
+ u64 hqos_field2_slabmask;
+ u32 hqos_field2_slabpos;
+ u32 hqos_field2_slabshr;
+ u32 hqos_tc_table[64];
+} dpdk_device_hqos_per_worker_thread_t;
+
+typedef struct
+{
+ struct rte_ring **swq;
+ struct rte_mbuf **pkts_enq;
+ struct rte_mbuf **pkts_deq;
+ struct rte_sched_port *hqos;
+ u32 hqos_burst_enq;
+ u32 hqos_burst_deq;
+ u32 pkts_enq_len;
+ u32 swq_pos;
+ u32 flush_count;
+} dpdk_device_hqos_per_hqos_thread_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 **lockp;
+
+ /* Instance ID */
+ u32 device_index;
+
+ u32 hw_if_index;
+ u32 vlib_sw_if_index;
+
+ /* next node index if we decide to steal the rx graph arc */
+ u32 per_interface_next_index;
+
+ /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */
+ struct rte_mbuf ***tx_vectors; /* one per worker thread */
+ struct rte_mbuf ***rx_vectors;
+
+ /* vector of traced contexts, per device */
+ u32 **d_trace_buffers;
+
+ dpdk_pmd_t pmd:8;
+ i8 cpu_socket;
+
+ u16 flags;
+#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0)
+#define DPDK_DEVICE_FLAG_PROMISC (1 << 1)
+#define DPDK_DEVICE_FLAG_PMD (1 << 2)
+#define DPDK_DEVICE_FLAG_PMD_INIT_FAIL (1 << 3)
+#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4)
+#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
+#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
+#define DPDK_DEVICE_FLAG_BOND_SLAVE (1 << 7)
+#define DPDK_DEVICE_FLAG_BOND_SLAVE_UP (1 << 8)
+#define DPDK_DEVICE_FLAG_TX_OFFLOAD (1 << 9)
+#define DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM (1 << 10)
+
+ u16 nb_tx_desc;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ u8 *interface_name_suffix;
+
+ /* number of sub-interfaces */
+ u16 num_subifs;
+
+ /* PMD related */
+ u16 tx_q_used;
+ u16 rx_q_used;
+ u16 nb_rx_desc;
+ u16 *cpu_socket_id_by_queue;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_txconf tx_conf;
+
+ /* HQoS related */
+ dpdk_device_hqos_per_worker_thread_t *hqos_wt;
+ dpdk_device_hqos_per_hqos_thread_t *hqos_ht;
+
+ /* af_packet or BondEthernet instance number */
+ u8 port_id;
+
+ /* Bonded interface port# of a slave -
+ only valid if DPDK_DEVICE_FLAG_BOND_SLAVE bit is set */
+ u8 bond_port;
+
+ struct rte_eth_link link;
+ f64 time_last_link_update;
+
+ struct rte_eth_stats stats;
+ struct rte_eth_stats last_stats;
+ struct rte_eth_stats last_cleared_stats;
+ struct rte_eth_xstat *xstats;
+ struct rte_eth_xstat *last_cleared_xstats;
+ f64 time_last_stats_update;
+ dpdk_port_type_t port_type;
+
+ /* mac address */
+ u8 *default_mac_address;
+
+ /* error string */
+ clib_error_t *errors;
+} dpdk_device_t;
+
+#define DPDK_STATS_POLL_INTERVAL (10.0)
+#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+
+#define DPDK_LINK_POLL_INTERVAL (3.0)
+#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
+
+typedef struct
+{
+ u32 device;
+ u16 queue_id;
+} dpdk_device_and_queue_t;
+
+#ifndef DPDK_HQOS_DBG_BYPASS
+#define DPDK_HQOS_DBG_BYPASS 0
+#endif
+
+#ifndef HQOS_FLUSH_COUNT_THRESHOLD
+#define HQOS_FLUSH_COUNT_THRESHOLD 100000
+#endif
+
+typedef struct dpdk_device_config_hqos_t
+{
+ u32 hqos_thread;
+ u32 hqos_thread_valid;
+
+ u32 swq_size;
+ u32 burst_enq;
+ u32 burst_deq;
+
+ u32 pktfield0_slabpos;
+ u32 pktfield1_slabpos;
+ u32 pktfield2_slabpos;
+ u64 pktfield0_slabmask;
+ u64 pktfield1_slabmask;
+ u64 pktfield2_slabmask;
+ u32 tc_table[64];
+
+ struct rte_sched_port_params port;
+ struct rte_sched_subport_params *subport;
+ struct rte_sched_pipe_params *pipe;
+ uint32_t *pipe_map;
+} dpdk_device_config_hqos_t;
+
+int dpdk_hqos_validate_mask (u64 mask, u32 n);
+void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t *
+ hqos, u32 pipe_profile_id);
+void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos);
+clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd,
+ dpdk_device_config_hqos_t * hqos);
+void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos,
+ struct rte_mbuf **pkts, u32 n_pkts);
+
+#define foreach_dpdk_device_config_item \
+ _ (num_rx_queues) \
+ _ (num_tx_queues) \
+ _ (num_rx_desc) \
+ _ (num_tx_desc) \
+ _ (rss_fn)
+
+typedef struct
+{
+ vlib_pci_addr_t pci_addr;
+ u8 is_blacklisted;
+ u8 vlan_strip_offload;
+#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
+#define DPDK_DEVICE_VLAN_STRIP_OFF 1
+#define DPDK_DEVICE_VLAN_STRIP_ON 2
+
+#define _(x) uword x;
+ foreach_dpdk_device_config_item
+#undef _
+ clib_bitmap_t * workers;
+ u32 hqos_enabled;
+ dpdk_device_config_hqos_t hqos;
+} dpdk_device_config_t;
+
+typedef struct
+{
+
+ /* Config stuff */
+ u8 **eal_init_args;
+ u8 *eal_init_args_str;
+ u8 *uio_driver_name;
+ u8 no_multi_seg;
+ u8 enable_tcp_udp_checksum;
+
+ /* Required config parameters */
+ u8 coremask_set_manually;
+ u8 nchannels_set_manually;
+ u32 coremask;
+ u32 nchannels;
+ u32 num_mbufs;
+
+ /*
+ * format interface names ala xxxEthernet%d/%d/%d instead of
+ * xxxEthernet%x/%x/%x.
+ */
+ u8 interface_name_format_decimal;
+
+ /* per-device config */
+ dpdk_device_config_t default_devconf;
+ dpdk_device_config_t *dev_confs;
+ uword *device_config_index_by_pci_addr;
+
+} dpdk_config_main_t;
+
+dpdk_config_main_t dpdk_config_main;
+
+typedef struct
+{
+
+ /* Devices */
+ dpdk_device_t *devices;
+ dpdk_device_and_queue_t **devices_by_hqos_cpu;
+
+ /* per-thread recycle lists */
+ u32 **recycle;
+
+ /* per-thread buffer templates */
+ vlib_buffer_t *buffer_templates;
+
+ /* buffer flags template, configurable to enable/disable tcp / udp cksum */
+ u32 buffer_flags_template;
+
+ /* vlib buffer free list, must be same size as an rte_mbuf */
+ u32 vlib_buffer_free_list_index;
+
+ /* Ethernet input node index */
+ u32 ethernet_input_node_index;
+
+ /* pcap tracing [only works if (CLIB_DEBUG > 0)] */
+ int tx_pcap_enable;
+ pcap_main_t pcap_main;
+ u8 *pcap_filename;
+ u32 pcap_sw_if_index;
+ u32 pcap_pkts_to_capture;
+
+ /*
+ * flag indicating that a posted admin up/down
+ * (via post_sw_interface_set_flags) is in progress
+ */
+ u8 admin_up_down_in_progress;
+
+ u8 use_rss;
+
+ /* which cpus are running I/O TX */
+ int hqos_cpu_first_index;
+ int hqos_cpu_count;
+
+ /* control interval of dpdk link state and stat polling */
+ f64 link_state_poll_interval;
+ f64 stat_poll_interval;
+
+ /* Sleep for this many usec after each device poll */
+ u32 poll_sleep_usec;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ dpdk_config_main_t *conf;
+
+ /* mempool */
+ struct rte_mempool **pktmbuf_pools;
+
+ /* API message ID base */
+ u16 msg_id_base;
+} dpdk_main_t;
+
+extern dpdk_main_t dpdk_main;
+
+typedef struct
+{
+ u32 buffer_index;
+ u16 device_index;
+ u8 queue_index;
+ struct rte_mbuf mb;
+ /* Copy of VLIB buffer; packet data stored in pre_data. */
+ vlib_buffer_t buffer;
+} dpdk_tx_dma_trace_t;
+
+typedef struct
+{
+ u32 buffer_index;
+ u16 device_index;
+ u16 queue_index;
+ struct rte_mbuf mb;
+ vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
+ u8 data[256]; /* First 256 data bytes, used for hexdump */
+} dpdk_rx_dma_trace_t;
+
+void dpdk_device_setup (dpdk_device_t * xd);
+void dpdk_device_start (dpdk_device_t * xd);
+void dpdk_device_stop (dpdk_device_t * xd);
+
+#if DPDK_VOID_CALLBACK
+void dpdk_port_state_callback (uint8_t port_id,
+ enum rte_eth_event_type type, void *param);
+#else
+int dpdk_port_state_callback (uint8_t port_id,
+ enum rte_eth_event_type type,
+ void *param, void *ret_param);
+#endif
+
+#define foreach_dpdk_error \
+ _(NONE, "no error") \
+ _(RX_PACKET_ERROR, "Rx packet errors") \
+ _(RX_BAD_FCS, "Rx bad fcs") \
+ _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
+ _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \
+ _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \
+ _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error")
+
+typedef enum
+{
+#define _(f,s) DPDK_ERROR_##f,
+ foreach_dpdk_error
+#undef _
+ DPDK_N_ERROR,
+} dpdk_error_t;
+
+void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
+
+format_function_t format_dpdk_device_name;
+format_function_t format_dpdk_device;
+format_function_t format_dpdk_device_errors;
+format_function_t format_dpdk_tx_dma_trace;
+format_function_t format_dpdk_rx_dma_trace;
+format_function_t format_dpdk_rte_mbuf;
+format_function_t format_dpdk_rx_rte_mbuf;
+unformat_function_t unformat_dpdk_log_level;
+clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
+clib_error_t *unformat_hqos (unformat_input_t * input,
+ dpdk_device_config_hqos_t * hqos);
+
+uword
+admin_up_down_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f);
+
+clib_error_t *dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
+ unsigned socket_id);
+
+#if CLI_DEBUG
+int dpdk_buffer_validate_trajectory_all (u32 * uninitialized);
+void dpdk_buffer_poison_trajectory_all (void);
+#endif
+
+#endif /* __included_dpdk_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
new file mode 100644
index 00000000..52b4ca4b
--- /dev/null
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1)
+#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1))
+
+#define DPDK_NB_RX_DESC_DEFAULT 1024
+#define DPDK_NB_TX_DESC_DEFAULT 1024
+#define DPDK_NB_RX_DESC_VIRTIO 256
+#define DPDK_NB_TX_DESC_VIRTIO 256
+
+#define I40E_DEV_ID_SFP_XL710 0x1572
+#define I40E_DEV_ID_QSFP_A 0x1583
+#define I40E_DEV_ID_QSFP_B 0x1584
+#define I40E_DEV_ID_QSFP_C 0x1585
+#define I40E_DEV_ID_10G_BASE_T 0x1586
+#define I40E_DEV_ID_VF 0x154C
+
+/* These args appear by themselves */
+#define foreach_eal_double_hyphen_predicate_arg \
+_(no-shconf) \
+_(no-hpet) \
+_(no-huge) \
+_(vmware-tsc-map)
+
+#define foreach_eal_single_hyphen_mandatory_arg \
+_(coremask, c) \
+_(nchannels, n) \
+
+#define foreach_eal_single_hyphen_arg \
+_(blacklist, b) \
+_(mem-alloc-request, m) \
+_(force-ranks, r)
+
+/* These args are preceeded by "--" and followed by a single string */
+#define foreach_eal_double_hyphen_arg \
+_(huge-dir) \
+_(proc-type) \
+_(file-prefix) \
+_(vdev)
+
+static inline void
+dpdk_get_xstats (dpdk_device_t * xd)
+{
+ int len;
+ if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0)
+ {
+ vec_validate (xd->xstats, len - 1);
+ vec_validate (xd->last_cleared_xstats, len - 1);
+
+ len =
+ rte_eth_xstats_get (xd->device_index, xd->xstats,
+ vec_len (xd->xstats));
+
+ ASSERT (vec_len (xd->xstats) == len);
+ ASSERT (vec_len (xd->last_cleared_xstats) == len);
+
+ _vec_len (xd->xstats) = len;
+ _vec_len (xd->last_cleared_xstats) = len;
+
+ }
+}
+
+
+static inline void
+dpdk_update_counters (dpdk_device_t * xd, f64 now)
+{
+ vlib_simple_counter_main_t *cm;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ u64 rxerrors, last_rxerrors;
+
+ /* only update counters for PMD interfaces */
+ if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
+ return;
+
+ xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
+ clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
+ rte_eth_stats_get (xd->device_index, &xd->stats);
+
+ /* maybe bump interface rx no buffer counter */
+ if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_NO_BUF);
+
+ vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
+ xd->stats.rx_nombuf -
+ xd->last_stats.rx_nombuf);
+ }
+
+ /* missed pkt counter */
+ if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_MISS);
+
+ vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
+ xd->stats.imissed -
+ xd->last_stats.imissed);
+ }
+ rxerrors = xd->stats.ierrors;
+ last_rxerrors = xd->last_stats.ierrors;
+
+ if (PREDICT_FALSE (rxerrors != last_rxerrors))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_ERROR);
+
+ vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
+ rxerrors - last_rxerrors);
+ }
+
+ dpdk_get_xstats (xd);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c
new file mode 100644
index 00000000..697bdbe5
--- /dev/null
+++ b/src/plugins/dpdk/device/format.c
@@ -0,0 +1,804 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vlib/unix/cj.h>
+#include <assert.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+
+#include <dpdk/device/dpdk_priv.h>
+#include <vppinfra/error.h>
+
+#define foreach_dpdk_counter \
+ _ (tx_frames_ok, opackets) \
+ _ (tx_bytes_ok, obytes) \
+ _ (tx_errors, oerrors) \
+ _ (rx_frames_ok, ipackets) \
+ _ (rx_bytes_ok, ibytes) \
+ _ (rx_errors, ierrors) \
+ _ (rx_missed, imissed) \
+ _ (rx_no_bufs, rx_nombuf)
+
+#define foreach_dpdk_q_counter \
+ _ (rx_frames_ok, q_ipackets) \
+ _ (tx_frames_ok, q_opackets) \
+ _ (rx_bytes_ok, q_ibytes) \
+ _ (tx_bytes_ok, q_obytes) \
+ _ (rx_errors, q_errors)
+
+#define foreach_dpdk_rss_hf \
+ _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _(ETH_RSS_IPV4, "ipv4") \
+ _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _(ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _(ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _(ETH_RSS_IPV6, "ipv6")
+
+
+#define foreach_dpdk_rx_offload_caps \
+ _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \
+ _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
+ _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
+ _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
+ _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \
+ _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip")
+
+#define foreach_dpdk_tx_offload_caps \
+ _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \
+ _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
+ _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
+ _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
+ _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \
+ _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \
+ _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \
+ _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
+ _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
+
+#define foreach_dpdk_pkt_rx_offload_flag \
+ _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
+ _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
+ _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
+ _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
+ _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
+ _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
+ _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+
+#define foreach_dpdk_pkt_type \
+ _ (L2, ETHER, "Ethernet packet") \
+ _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \
+ _ (L2, ETHER_ARP, "ARP packet") \
+ _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \
+ _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \
+ _ (L2, ETHER_VLAN, "VLAN packet") \
+ _ (L2, ETHER_QINQ, "QinQ packet") \
+ _ (L3, IPV4, "IPv4 packet without extension headers") \
+ _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \
+ _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
+ _ (L3, IPV6, "IPv6 packet without extension headers") \
+ _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \
+ _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \
+ _ (L4, TCP, "TCP packet") \
+ _ (L4, UDP, "UDP packet") \
+ _ (L4, FRAG, "Fragmented IP packet") \
+ _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \
+ _ (L4, ICMP, "ICMP packet") \
+ _ (L4, NONFRAG, "Non-fragmented IP packet") \
+ _ (TUNNEL, GRE, "GRE tunneling packet") \
+ _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \
+ _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \
+ _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \
+ _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \
+ _ (INNER_L2, ETHER, "Inner Ethernet packet") \
+ _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \
+ _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \
+ _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \
+ _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \
+ _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \
+ _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \
+ _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \
+ _ (INNER_L4, TCP, "Inner TCP packet") \
+ _ (INNER_L4, UDP, "Inner UDP packet") \
+ _ (INNER_L4, FRAG, "Inner fagmented IP packet") \
+ _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \
+ _ (INNER_L4, ICMP, "Inner ICMP packet") \
+ _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
+
+#define foreach_dpdk_pkt_tx_offload_flag \
+ _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
+ _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
+
+#define foreach_dpdk_pkt_offload_flag \
+ foreach_dpdk_pkt_rx_offload_flag \
+ foreach_dpdk_pkt_tx_offload_flag
+
+#define foreach_dpdk_log_level \
+ _ (EMERG, "emergency") \
+ _ (ALERT, "alert") \
+ _ (CRIT, "critical") \
+ _ (ERR, "error") \
+ _ (WARNING, "warning") \
+ _ (NOTICE, "notice") \
+ _ (INFO, "info") \
+ _ (DEBUG, "debug")
+
+u8 *
+format_dpdk_device_name (u8 * s, va_list * args)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ char *devname_format;
+ char *device_name;
+ u32 i = va_arg (*args, u32);
+ struct rte_eth_dev_info dev_info;
+ u8 *ret;
+
+ if (dm->conf->interface_name_format_decimal)
+ devname_format = "%s%d/%d/%d";
+ else
+ devname_format = "%s%x/%x/%x";
+
+ switch (dm->devices[i].port_type)
+ {
+ case VNET_DPDK_PORT_TYPE_ETH_1G:
+ device_name = "GigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_10G:
+ device_name = "TenGigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_25G:
+ device_name = "TwentyFiveGigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_40G:
+ device_name = "FortyGigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_50G:
+ device_name = "FiftyGigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_100G:
+ device_name = "HundredGigabitEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_BOND:
+ return format (s, "BondEthernet%d", dm->devices[i].port_id);
+
+ case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
+ device_name = "EthernetSwitch";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_ETH_VF:
+ device_name = "VirtualFunctionEthernet";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_AF_PACKET:
+ rte_eth_dev_info_get (i, &dev_info);
+ return format (s, "af_packet%d", dm->devices[i].port_id);
+
+ case VNET_DPDK_PORT_TYPE_VIRTIO_USER:
+ device_name = "VirtioUser";
+ break;
+
+ case VNET_DPDK_PORT_TYPE_VHOST_ETHER:
+ device_name = "VhostEthernet";
+ break;
+
+ default:
+ case VNET_DPDK_PORT_TYPE_UNKNOWN:
+ device_name = "UnknownEthernet";
+ break;
+ }
+
+ rte_eth_dev_info_get (i, &dev_info);
+
+ if (dev_info.pci_dev)
+ ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus,
+ dev_info.pci_dev->addr.devid,
+ dev_info.pci_dev->addr.function);
+ else
+ ret = format (s, "%s%d", device_name, dm->devices[i].device_index);
+
+ if (dm->devices[i].interface_name_suffix)
+ return format (ret, "/%s", dm->devices[i].interface_name_suffix);
+ return ret;
+}
+
+static u8 *
+format_dpdk_device_type (u8 * s, va_list * args)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ char *dev_type;
+ u32 i = va_arg (*args, u32);
+
+ switch (dm->devices[i].pmd)
+ {
+ case VNET_DPDK_PMD_E1000EM:
+ dev_type = "Intel 82540EM (e1000)";
+ break;
+
+ case VNET_DPDK_PMD_IGB:
+ dev_type = "Intel e1000";
+ break;
+
+ case VNET_DPDK_PMD_I40E:
+ dev_type = "Intel X710/XL710 Family";
+ break;
+
+ case VNET_DPDK_PMD_I40EVF:
+ dev_type = "Intel X710/XL710 Family VF";
+ break;
+
+ case VNET_DPDK_PMD_FM10K:
+ dev_type = "Intel FM10000 Family Ethernet Switch";
+ break;
+
+ case VNET_DPDK_PMD_IGBVF:
+ dev_type = "Intel e1000 VF";
+ break;
+
+ case VNET_DPDK_PMD_VIRTIO:
+ dev_type = "Red Hat Virtio";
+ break;
+
+ case VNET_DPDK_PMD_IXGBEVF:
+ dev_type = "Intel 82599 VF";
+ break;
+
+ case VNET_DPDK_PMD_IXGBE:
+ dev_type = "Intel 82599";
+ break;
+
+ case VNET_DPDK_PMD_ENIC:
+ dev_type = "Cisco VIC";
+ break;
+
+ case VNET_DPDK_PMD_CXGBE:
+ dev_type = "Chelsio T4/T5";
+ break;
+
+ case VNET_DPDK_PMD_MLX4:
+ dev_type = "Mellanox ConnectX-3 Family";
+ break;
+
+ case VNET_DPDK_PMD_MLX5:
+ dev_type = "Mellanox ConnectX-4 Family";
+ break;
+
+ case VNET_DPDK_PMD_VMXNET3:
+ dev_type = "VMware VMXNET3";
+ break;
+
+ case VNET_DPDK_PMD_AF_PACKET:
+ dev_type = "af_packet";
+ break;
+
+ case VNET_DPDK_PMD_BOND:
+ dev_type = "Ethernet Bonding";
+ break;
+
+ case VNET_DPDK_PMD_DPAA2:
+ dev_type = "NXP DPAA2 Mac";
+ break;
+
+ case VNET_DPDK_PMD_VIRTIO_USER:
+ dev_type = "Virtio User";
+ break;
+
+ case VNET_DPDK_PMD_THUNDERX:
+ dev_type = "Cavium ThunderX";
+ break;
+
+ case VNET_DPDK_PMD_VHOST_ETHER:
+ dev_type = "VhostEthernet";
+ break;
+
+ default:
+ case VNET_DPDK_PMD_UNKNOWN:
+ dev_type = "### UNKNOWN ###";
+ break;
+ }
+
+ return format (s, dev_type);
+}
+
+static u8 *
+format_dpdk_link_status (u8 * s, va_list * args)
+{
+ dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
+ struct rte_eth_link *l = &xd->link;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+
+ s = format (s, "%s ", l->link_status ? "up" : "down");
+ if (l->link_status)
+ {
+ u32 promisc = rte_eth_promiscuous_get (xd->device_index);
+
+ s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ "full" : "half");
+ s = format (s, "speed %u mtu %d %s\n", l->link_speed,
+ hi->max_packet_bytes, promisc ? " promisc" : "");
+ }
+ else
+ s = format (s, "\n");
+
+ return s;
+}
+
+#define _line_len 72
+#define _(v, str) \
+if (bitmap & v) { \
+ if (format_get_indent (s) > next_split ) { \
+ next_split += _line_len; \
+ s = format(s,"\n%U", format_white_space, indent); \
+ } \
+ s = format(s, "%s ", str); \
+}
+
+static u8 *
+format_dpdk_rss_hf_name (u8 * s, va_list * args)
+{
+ u64 bitmap = va_arg (*args, u64);
+ int next_split = _line_len;
+ int indent = format_get_indent (s);
+
+ if (!bitmap)
+ return format (s, "none");
+
+ foreach_dpdk_rss_hf return s;
+}
+
+static u8 *
+format_dpdk_rx_offload_caps (u8 * s, va_list * args)
+{
+ u32 bitmap = va_arg (*args, u32);
+ int next_split = _line_len;
+ int indent = format_get_indent (s);
+
+ if (!bitmap)
+ return format (s, "none");
+
+ foreach_dpdk_rx_offload_caps return s;
+}
+
+static u8 *
+format_dpdk_tx_offload_caps (u8 * s, va_list * args)
+{
+ u32 bitmap = va_arg (*args, u32);
+ int next_split = _line_len;
+ int indent = format_get_indent (s);
+ if (!bitmap)
+ return format (s, "none");
+
+ foreach_dpdk_tx_offload_caps return s;
+}
+
+#undef _line_len
+#undef _
+
+u8 *
+format_dpdk_device_errors (u8 * s, va_list * args)
+{
+ dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
+ clib_error_t *e;
+ uword indent = format_get_indent (s);
+
+ vec_foreach (e, xd->errors)
+ {
+ s = format (s, "%U%v\n", format_white_space, indent, e->what);
+ }
+ return s;
+}
+
+u8 *
+format_dpdk_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
+ uword indent = format_get_indent (s);
+ f64 now = vlib_time_now (dm->vlib_main);
+ struct rte_eth_dev_info di;
+
+ dpdk_update_counters (xd, now);
+ dpdk_update_link_state (xd, now);
+
+ s = format (s, "%U\n%Ucarrier %U",
+ format_dpdk_device_type, xd->device_index,
+ format_white_space, indent + 2, format_dpdk_link_status, xd);
+
+ rte_eth_dev_info_get (xd->device_index, &di);
+
+ if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD)
+ {
+ struct rte_pci_device *pci;
+ struct rte_eth_rss_conf rss_conf;
+ int vlan_off;
+ int retval;
+
+ rss_conf.rss_key = 0;
+ retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf);
+ if (retval < 0)
+ clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
+ pci = di.pci_dev;
+
+ if (pci)
+ s =
+ format (s,
+ "%Upci id: device %04x:%04x subsystem %04x:%04x\n"
+ "%Upci address: %04x:%02x:%02x.%02x\n",
+ format_white_space, indent + 2, pci->id.vendor_id,
+ pci->id.device_id, pci->id.subsystem_vendor_id,
+ pci->id.subsystem_device_id, format_white_space, indent + 2,
+ pci->addr.domain, pci->addr.bus, pci->addr.devid,
+ pci->addr.function);
+ s =
+ format (s, "%Umax rx packet len: %d\n", format_white_space,
+ indent + 2, di.max_rx_pktlen);
+ s =
+ format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space,
+ indent + 2, di.max_rx_queues, di.max_tx_queues);
+ s =
+ format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
+ format_white_space, indent + 2,
+ rte_eth_promiscuous_get (xd->device_index) ? "on" : "off",
+ rte_eth_allmulticast_get (xd->device_index) ? "on" : "off");
+ vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
+ s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
+ format_white_space, indent + 2,
+ vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
+ vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
+ vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
+ s = format (s, "%Urx offload caps: %U\n",
+ format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, di.rx_offload_capa);
+ s = format (s, "%Utx offload caps: %U\n",
+ format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, di.tx_offload_capa);
+ s = format (s, "%Urss active: %U\n"
+ "%Urss supported: %U\n",
+ format_white_space, indent + 2,
+ format_dpdk_rss_hf_name, rss_conf.rss_hf,
+ format_white_space, indent + 2,
+ format_dpdk_rss_hf_name, di.flow_type_rss_offloads);
+ }
+
+ s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n",
+ format_white_space, indent + 2,
+ xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc);
+
+ if (xd->cpu_socket > -1)
+ s = format (s, "%Ucpu socket %d\n",
+ format_white_space, indent + 2, xd->cpu_socket);
+
+ /* $$$ MIB counters */
+ {
+#define _(N, V) \
+ if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \
+ s = format (s, "\n%U%-40U%16Ld", \
+ format_white_space, indent + 2, \
+ format_c_identifier, #N, \
+ xd->stats.V - xd->last_cleared_stats.V); \
+ } \
+
+ foreach_dpdk_counter
+#undef _
+ }
+
+ u8 *xs = 0;
+ u32 i = 0;
+ struct rte_eth_xstat *xstat, *last_xstat;
+ struct rte_eth_xstat_name *xstat_names = 0;
+ int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0);
+ vec_validate (xstat_names, len - 1);
+ rte_eth_xstats_get_names (xd->device_index, xstat_names, len);
+
+ ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats));
+
+ /* *INDENT-OFF* */
+ vec_foreach_index(i, xd->xstats)
+ {
+ u64 delta = 0;
+ xstat = vec_elt_at_index(xd->xstats, i);
+ last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i);
+
+ delta = xstat->value - last_xstat->value;
+ if (verbose == 2 || (verbose && delta))
+ {
+ /* format_c_identifier doesn't like c strings inside vector */
+ u8 * name = format(0,"%s", xstat_names[i].name);
+ xs = format(xs, "\n%U%-38U%16Ld",
+ format_white_space, indent + 4,
+ format_c_identifier, name, delta);
+ vec_free(name);
+ }
+ }
+ /* *INDENT-ON* */
+
+ vec_free (xstat_names);
+
+ if (xs)
+ {
+ s = format (s, "\n%Uextended stats:%v",
+ format_white_space, indent + 2, xs);
+ vec_free (xs);
+ }
+
+ if (vec_len (xd->errors))
+ {
+ s = format (s, "%UErrors:\n %U", format_white_space, indent,
+ format_dpdk_device_errors, xd);
+ }
+
+ return s;
+}
+
+u8 *
+format_dpdk_tx_dma_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
+ dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *);
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
+ uword indent = format_get_indent (s);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
+
+ s = format (s, "%U tx queue %d",
+ format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+
+ s = format (s, "\n%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ethernet_header_with_length, t->buffer.pre_data,
+ sizeof (t->buffer.pre_data));
+
+ return s;
+}
+
+u8 *
+format_dpdk_rx_dma_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
+ dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *);
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
+ format_function_t *f;
+ uword indent = format_get_indent (s);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
+
+ s = format (s, "%U rx queue %d",
+ format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+
+ s = format (s, "\n%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_dpdk_rte_mbuf, &t->mb, &t->data);
+
+ if (vm->trace_main.verbose)
+ {
+ s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2,
+ t->mb.data_len > sizeof (t->data) ? " (truncated)" : "");
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_hexdump, &t->data,
+ t->mb.data_len >
+ sizeof (t->data) ? sizeof (t->data) : t->mb.data_len);
+ }
+ f = node->format_buffer;
+ if (!f)
+ f = format_hex_bytes;
+ s = format (s, "\n%U%U", format_white_space, indent,
+ f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
+
+ return s;
+}
+
+
+static inline u8 *
+format_dpdk_pkt_types (u8 * s, va_list * va)
+{
+ u32 *pkt_types = va_arg (*va, u32 *);
+ uword indent __attribute__ ((unused)) = format_get_indent (s) + 2;
+
+ if (!*pkt_types)
+ return s;
+
+ s = format (s, "Packet Types");
+
+#define _(L, F, S) \
+ if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
+ "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \
+ }
+
+ foreach_dpdk_pkt_type
+#undef _
+ return s;
+}
+
+static inline u8 *
+format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
+{
+ u64 *ol_flags = va_arg (*va, u64 *);
+ uword indent = format_get_indent (s) + 2;
+
+ if (!*ol_flags)
+ return s;
+
+ s = format (s, "Packet Offload Flags");
+
+#define _(F, S) \
+ if (*ol_flags & F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", \
+ format_white_space, indent, #F, F, S); \
+ }
+
+ foreach_dpdk_pkt_offload_flag
+#undef _
+ return s;
+}
+
+u8 *
+format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va)
+{
+ ethernet_vlan_header_tv_t *vlan_hdr =
+ va_arg (*va, ethernet_vlan_header_tv_t *);
+
+ if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD)
+ {
+ s = format (s, "%U 802.1q vlan ",
+ format_ethernet_vlan_tci,
+ clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id));
+ vlan_hdr++;
+ }
+
+ s = format (s, "%U",
+ format_ethernet_vlan_tci,
+ clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id));
+
+ return s;
+}
+
+u8 *
+format_dpdk_rte_mbuf (u8 * s, va_list * va)
+{
+ struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *);
+ ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *);
+ uword indent = format_get_indent (s) + 2;
+
+ s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
+ "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x"
+ "\n%Upacket_type 0x%x",
+ mb->port, mb->nb_segs, mb->pkt_len,
+ format_white_space, indent,
+ mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off,
+ mb->buf_physaddr, format_white_space, indent, mb->packet_type);
+
+ if (mb->ol_flags)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_offload_flags, &mb->ol_flags);
+
+ if ((mb->ol_flags & PKT_RX_VLAN_PKT) &&
+ ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
+ {
+ ethernet_vlan_header_tv_t *vlan_hdr =
+ ((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
+ s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr);
+ }
+
+ if (mb->packet_type)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_types, &mb->packet_type);
+
+ return s;
+}
+
+clib_error_t *
+unformat_rss_fn (unformat_input_t * input, uword * rss_fn)
+{
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0)
+ ;
+#undef _
+#define _(f, s) \
+ else if (unformat (input, s)) \
+ *rss_fn |= f;
+
+ foreach_dpdk_rss_hf
+#undef _
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ }
+ return 0;
+}
+
+uword
+unformat_dpdk_log_level (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,s) else if (unformat (input, s)) *r = RTE_LOG_##v;
+ foreach_dpdk_log_level
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+clib_error_t *
+unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos)
+{
+ clib_error_t *error = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hqos-thread %u", &hqos->hqos_thread))
+ hqos->hqos_thread_valid = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
new file mode 100755
index 00000000..acf712ff
--- /dev/null
+++ b/src/plugins/dpdk/device/init.c
@@ -0,0 +1,1589 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+#include <vlib/pci/pci.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <dpdk/device/dpdk_priv.h>
+
+dpdk_main_t dpdk_main;
+
+#define LINK_STATE_ELOGS 0
+
+/* Port configuration, mildly modified Intel app values */
+
+static struct rte_eth_conf port_conf_template = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static dpdk_port_type_t
+port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
+{
+
+ if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
+ return VNET_DPDK_PORT_TYPE_ETH_100G;
+ else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
+ return VNET_DPDK_PORT_TYPE_ETH_50G;
+ else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
+ return VNET_DPDK_PORT_TYPE_ETH_40G;
+ else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
+ return VNET_DPDK_PORT_TYPE_ETH_25G;
+ else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
+ return VNET_DPDK_PORT_TYPE_ETH_10G;
+ else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
+ return VNET_DPDK_PORT_TYPE_ETH_1G;
+
+ return VNET_DPDK_PORT_TYPE_UNKNOWN;
+}
+
+
+static u32
+dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+ u32 old = 0;
+
+ if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags))
+ {
+ old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
+
+ if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
+ xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
+ else
+ xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
+ {
+ if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
+ rte_eth_promiscuous_enable (xd->device_index);
+ else
+ rte_eth_promiscuous_disable (xd->device_index);
+ }
+ }
+ else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags))
+ {
+ int rv;
+
+ xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
+
+ if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
+ dpdk_device_stop (xd);
+
+ rv = rte_eth_dev_configure
+ (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf);
+
+ if (rv < 0)
+ vlib_cli_output (vlib_get_main (),
+ "rte_eth_dev_configure[%d]: err %d",
+ xd->device_index, rv);
+
+ rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
+
+ if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
+ dpdk_device_start (xd);
+
+ }
+ return old;
+}
+
+static void
+dpdk_device_lock_init (dpdk_device_t * xd)
+{
+ int q;
+ vec_validate (xd->lockp, xd->tx_q_used - 1);
+ for (q = 0; q < xd->tx_q_used; q++)
+ {
+ xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
+ }
+}
+
+static clib_error_t *
+dpdk_lib_init (dpdk_main_t * dm)
+{
+ u32 nports;
+ u32 nb_desc = 0;
+ int i;
+ clib_error_t *error;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hi;
+ dpdk_device_t *xd;
+ vlib_pci_addr_t last_pci_addr;
+ u32 last_pci_addr_port = 0;
+ vlib_thread_registration_t *tr_hqos;
+ uword *p_hqos;
+
+ u32 next_hqos_cpu = 0;
+ u8 af_packet_port_id = 0;
+ u8 bond_ether_port_id = 0;
+ last_pci_addr.as_u32 = ~0;
+
+ dm->hqos_cpu_first_index = 0;
+ dm->hqos_cpu_count = 0;
+
+ /* find out which cpus will be used for I/O TX */
+ p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
+ tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
+
+ if (tr_hqos && tr_hqos->count > 0)
+ {
+ dm->hqos_cpu_first_index = tr_hqos->first_index;
+ dm->hqos_cpu_count = tr_hqos->count;
+ }
+
+ vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ nports = rte_eth_dev_count ();
+ if (nports < 1)
+ {
+ clib_warning ("DPDK drivers found no ports...");
+ }
+
+ if (CLIB_DEBUG > 0)
+ clib_warning ("DPDK drivers found %d ports...", nports);
+
+ /*
+ * All buffers are all allocated from the same rte_mempool.
+ * Thus they all have the same number of data bytes.
+ */
+ dm->vlib_buffer_free_list_index =
+ vlib_buffer_get_or_create_free_list (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ "dpdk rx");
+
+ if (dm->conf->enable_tcp_udp_checksum)
+ dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
+ | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+
+ /* vlib_buffer_t template */
+ vec_validate_aligned (dm->buffer_templates, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ vlib_buffer_free_list_t *fl;
+ vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, i);
+ fl = vlib_buffer_get_free_list (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (bt, fl);
+ bt->flags = dm->buffer_flags_template;
+ bt->current_data = -RTE_PKTMBUF_HEADROOM;
+ vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ }
+
+ for (i = 0; i < nports; i++)
+ {
+ u8 addr[6];
+ u8 vlan_strip = 0;
+ int j;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_link l;
+ dpdk_device_config_t *devconf = 0;
+ vlib_pci_addr_t pci_addr;
+ uword *p = 0;
+
+ rte_eth_dev_info_get (i, &dev_info);
+ if (dev_info.pci_dev) /* bonded interface has no pci info */
+ {
+ pci_addr.domain = dev_info.pci_dev->addr.domain;
+ pci_addr.bus = dev_info.pci_dev->addr.bus;
+ pci_addr.slot = dev_info.pci_dev->addr.devid;
+ pci_addr.function = dev_info.pci_dev->addr.function;
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr,
+ pci_addr.as_u32);
+ }
+
+ if (p)
+ devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ else
+ devconf = &dm->conf->default_devconf;
+
+ /* Create vnet interface */
+ vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
+ xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
+ xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
+ xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
+
+ /* Handle interface naming for devices with multiple ports sharing same PCI ID */
+ if (dev_info.pci_dev)
+ {
+ struct rte_eth_dev_info di = { 0 };
+ rte_eth_dev_info_get (i + 1, &di);
+ if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
+ memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
+ sizeof (struct rte_pci_addr)) == 0)
+ {
+ xd->interface_name_suffix = format (0, "0");
+ last_pci_addr.as_u32 = pci_addr.as_u32;
+ last_pci_addr_port = i;
+ }
+ else if (pci_addr.as_u32 == last_pci_addr.as_u32)
+ {
+ xd->interface_name_suffix =
+ format (0, "%u", i - last_pci_addr_port);
+ }
+ else
+ {
+ last_pci_addr.as_u32 = ~0;
+ }
+ }
+ else
+ last_pci_addr.as_u32 = ~0;
+
+ clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
+ sizeof (struct rte_eth_txconf));
+ if (dm->conf->no_multi_seg)
+ {
+ xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
+ port_conf_template.rxmode.jumbo_frame = 0;
+ port_conf_template.rxmode.enable_scatter = 0;
+ }
+ else
+ {
+ xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
+ port_conf_template.rxmode.jumbo_frame = 1;
+ port_conf_template.rxmode.enable_scatter = 1;
+ xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
+ }
+
+ clib_memcpy (&xd->port_conf, &port_conf_template,
+ sizeof (struct rte_eth_conf));
+
+ xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
+
+ if (devconf->num_tx_queues > 0
+ && devconf->num_tx_queues < xd->tx_q_used)
+ xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
+
+ if (devconf->num_rx_queues > 1 && dm->use_rss == 0)
+ {
+ dm->use_rss = 1;
+ }
+
+ if (devconf->num_rx_queues > 1
+ && dev_info.max_rx_queues >= devconf->num_rx_queues)
+ {
+ xd->rx_q_used = devconf->num_rx_queues;
+ xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ if (devconf->rss_fn == 0)
+ xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
+ ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
+ else
+ xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
+ }
+ else
+ xd->rx_q_used = 1;
+
+ xd->flags |= DPDK_DEVICE_FLAG_PMD;
+
+ /* workaround for drivers not setting driver_name */
+ if ((!dev_info.driver_name) && (dev_info.pci_dev))
+ dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
+
+ ASSERT (dev_info.driver_name);
+
+ if (!xd->pmd)
+ {
+
+
+#define _(s,f) else if (dev_info.driver_name && \
+ !strcmp(dev_info.driver_name, s)) \
+ xd->pmd = VNET_DPDK_PMD_##f;
+ if (0)
+ ;
+ foreach_dpdk_pmd
+#undef _
+ else
+ xd->pmd = VNET_DPDK_PMD_UNKNOWN;
+
+ xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
+ xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
+ xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
+
+ switch (xd->pmd)
+ {
+ /* Drivers with valid speed_capa set */
+ case VNET_DPDK_PMD_E1000EM:
+ case VNET_DPDK_PMD_IGB:
+ case VNET_DPDK_PMD_IXGBE:
+ case VNET_DPDK_PMD_I40E:
+ xd->port_type = port_type_from_speed_capa (&dev_info);
+ xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD |
+ DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
+
+ break;
+ case VNET_DPDK_PMD_CXGBE:
+ case VNET_DPDK_PMD_MLX4:
+ case VNET_DPDK_PMD_MLX5:
+ xd->port_type = port_type_from_speed_capa (&dev_info);
+ break;
+
+ /* SR-IOV VFs */
+ case VNET_DPDK_PMD_IGBVF:
+ case VNET_DPDK_PMD_IXGBEVF:
+ case VNET_DPDK_PMD_I40EVF:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
+ xd->port_conf.rxmode.hw_strip_crc = 1;
+ break;
+
+ case VNET_DPDK_PMD_THUNDERX:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
+ xd->port_conf.rxmode.hw_strip_crc = 1;
+ break;
+
+ case VNET_DPDK_PMD_DPAA2:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
+ break;
+
+ /* Cisco VIC */
+ case VNET_DPDK_PMD_ENIC:
+ rte_eth_link_get_nowait (i, &l);
+ if (l.link_speed == 40000)
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
+ else
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
+ break;
+
+ /* Intel Red Rock Canyon */
+ case VNET_DPDK_PMD_FM10K:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
+ xd->port_conf.rxmode.hw_strip_crc = 1;
+ break;
+
+ /* virtio */
+ case VNET_DPDK_PMD_VIRTIO:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
+ xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO;
+ xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO;
+ break;
+
+ /* vmxnet3 */
+ case VNET_DPDK_PMD_VMXNET3:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
+ xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
+ break;
+
+ case VNET_DPDK_PMD_AF_PACKET:
+ xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET;
+ xd->port_id = af_packet_port_id++;
+ break;
+
+ case VNET_DPDK_PMD_BOND:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
+ xd->port_id = bond_ether_port_id++;
+ break;
+
+ case VNET_DPDK_PMD_VIRTIO_USER:
+ xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
+ break;
+
+ case VNET_DPDK_PMD_VHOST_ETHER:
+ xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
+ break;
+
+ default:
+ xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
+ }
+
+ if (devconf->num_rx_desc)
+ xd->nb_rx_desc = devconf->num_rx_desc;
+
+ if (devconf->num_tx_desc)
+ xd->nb_tx_desc = devconf->num_tx_desc;
+ }
+
+ /*
+ * Ensure default mtu is not > the mtu read from the hardware.
+ * Otherwise rte_eth_dev_configure() will fail and the port will
+ * not be available.
+ */
+ if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
+ {
+ /*
+ * This device does not support the platforms's max frame
+ * size. Use it's advertised mru instead.
+ */
+ xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+ }
+ else
+ {
+ xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
+
+ /*
+ * Some platforms do not account for Ethernet FCS (4 bytes) in
+ * MTU calculations. To interop with them increase mru but only
+ * if the device's settings can support it.
+ */
+ if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
+ xd->port_conf.rxmode.hw_strip_crc)
+ {
+ /*
+ * Allow additional 4 bytes (for Ethernet FCS). These bytes are
+ * stripped by h/w and so will not consume any buffer memory.
+ */
+ xd->port_conf.rxmode.max_rx_pkt_len += 4;
+ }
+ }
+
+ if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+ clib_memcpy (addr + 2, &rnd, sizeof (rnd));
+ addr[0] = 2;
+ addr[1] = 0xfe;
+ }
+ else
+ rte_eth_macaddr_get (i, (struct ether_addr *) addr);
+
+ if (xd->tx_q_used < tm->n_vlib_mains)
+ dpdk_device_lock_init (xd);
+
+ xd->device_index = xd - dm->devices;
+ ASSERT (i == xd->device_index);
+ xd->per_interface_next_index = ~0;
+
+ /* assign interface to input thread */
+ dpdk_device_and_queue_t *dq;
+ int q;
+
+ if (devconf->hqos_enabled)
+ {
+ xd->flags |= DPDK_DEVICE_FLAG_HQOS;
+
+ if (devconf->hqos.hqos_thread_valid)
+ {
+ int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
+
+ if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
+ return clib_error_return (0, "invalid HQoS thread index");
+
+ vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
+ dq->device = xd->device_index;
+ dq->queue_id = 0;
+ }
+ else
+ {
+ int cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
+
+ if (dm->hqos_cpu_count == 0)
+ return clib_error_return (0, "no HQoS threads available");
+
+ vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
+ dq->device = xd->device_index;
+ dq->queue_id = 0;
+
+ next_hqos_cpu++;
+ if (next_hqos_cpu == dm->hqos_cpu_count)
+ next_hqos_cpu = 0;
+
+ devconf->hqos.hqos_thread_valid = 1;
+ devconf->hqos.hqos_thread = cpu;
+ }
+ }
+
+ vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
+ CLIB_CACHE_LINE_BYTES);
+ for (j = 0; j < tm->n_vlib_mains; j++)
+ {
+ vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc,
+ sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (xd->tx_vectors[j]);
+ }
+
+ vec_validate_aligned (xd->rx_vectors, xd->rx_q_used,
+ CLIB_CACHE_LINE_BYTES);
+ for (j = 0; j < xd->rx_q_used; j++)
+ {
+ vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (xd->rx_vectors[j]);
+ }
+
+ vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains,
+ CLIB_CACHE_LINE_BYTES);
+
+
+ /* count the number of descriptors used for this device */
+ nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
+
+ error = ethernet_register_interface
+ (dm->vnet_main, dpdk_device_class.index, xd->device_index,
+ /* ethernet address */ addr,
+ &xd->hw_if_index, dpdk_flag_change);
+ if (error)
+ return error;
+
+ sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
+ xd->vlib_sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index,
+ dpdk_input_node.index);
+
+ if (devconf->workers)
+ {
+ int i;
+ q = 0;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, devconf->workers, ({
+ vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++,
+ vdm->first_worker_thread_index + i);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ for (q = 0; q < xd->rx_q_used; q++)
+ {
+ vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q, /* any */
+ ~1);
+ }
+
+ hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
+
+ if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD)
+ hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD;
+
+ dpdk_device_setup (xd);
+
+ if (vec_len (xd->errors))
+ clib_warning ("setup failed for device %U. Errors:\n %U",
+ format_dpdk_device_name, i,
+ format_dpdk_device_errors, xd);
+
+ if (devconf->hqos_enabled)
+ {
+ clib_error_t *rv;
+ rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
+ if (rv)
+ return rv;
+ }
+
+ /*
+ * For cisco VIC vNIC, set default to VLAN strip enabled, unless
+ * specified otherwise in the startup config.
+ * For other NICs default to VLAN strip disabled, unless specified
+ * otherwis in the startup config.
+ */
+ if (xd->pmd == VNET_DPDK_PMD_ENIC)
+ {
+ if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
+ vlan_strip = 1; /* remove vlan tag from VIC port by default */
+ else
+ clib_warning ("VLAN strip disabled for interface\n");
+ }
+ else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
+ vlan_strip = 1;
+
+ if (vlan_strip)
+ {
+ int vlan_off;
+ vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
+ vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
+ xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
+ if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
+ clib_warning ("VLAN strip enabled for interface\n");
+ else
+ clib_warning ("VLAN strip cannot be supported by interface\n");
+ }
+
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
+ xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
+
+ rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
+ }
+
+ if (nb_desc > dm->conf->num_mbufs)
+ clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
+ dm->conf->num_mbufs, nb_desc);
+
+ return 0;
+}
+
+static void
+dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ clib_error_t *error;
+ vlib_pci_device_t *d;
+ u8 *pci_addr = 0;
+ int num_whitelisted = vec_len (conf->dev_confs);
+
+ /* *INDENT-OFF* */
+ pool_foreach (d, pm->pci_devs, ({
+ dpdk_device_config_t * devconf = 0;
+ vec_reset_length (pci_addr);
+ pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
+ continue;
+
+ if (num_whitelisted)
+ {
+ uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32);
+
+ if (!p)
+ continue;
+
+ devconf = pool_elt_at_index (conf->dev_confs, p[0]);
+ }
+
+ /* virtio */
+ if (d->vendor_id == 0x1af4 && d->device_id == 0x1000)
+ ;
+ /* vmxnet3 */
+ else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
+ ;
+ /* all Intel network devices */
+ else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
+ ;
+ /* all Intel QAT devices VFs */
+ else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
+ (d->device_id == 0x0443 || d->device_id == 0x37c9 || d->device_id == 0x19e3))
+ ;
+ /* Cisco VIC */
+ else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
+ ;
+ /* Chelsio T4/T5 */
+ else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
+ ;
+ /* Mellanox */
+ else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a)
+ {
+ continue;
+ }
+ else
+ {
+ clib_warning ("Unsupported PCI device 0x%04x:0x%04x found "
+ "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
+ pci_addr);
+ continue;
+ }
+
+ error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name);
+
+ if (error)
+ {
+ if (devconf == 0)
+ {
+ pool_get (conf->dev_confs, devconf);
+ hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32,
+ devconf - conf->dev_confs);
+ devconf->pci_addr.as_u32 = d->bus_address.as_u32;
+ }
+ devconf->is_blacklisted = 1;
+ clib_error_report (error);
+ }
+ }));
+ /* *INDENT-ON* */
+ vec_free (pci_addr);
+}
+
+static clib_error_t *
+dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
+ unformat_input_t * input, u8 is_default)
+{
+ clib_error_t *error = 0;
+ uword *p;
+ dpdk_device_config_t *devconf;
+ unformat_input_t sub_input;
+
+ if (is_default)
+ {
+ devconf = &conf->default_devconf;
+ }
+ else
+ {
+ p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+
+ if (!p)
+ {
+ pool_get (conf->dev_confs, devconf);
+ hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32,
+ devconf - conf->dev_confs);
+ }
+ else
+ return clib_error_return (0,
+ "duplicate configuration for PCI address %U",
+ format_vlib_pci_addr, &pci_addr);
+ }
+
+ devconf->pci_addr.as_u32 = pci_addr.as_u32;
+ devconf->hqos_enabled = 0;
+ dpdk_device_config_hqos_default (&devconf->hqos);
+
+ if (!input)
+ return 0;
+
+ unformat_skip_white_space (input);
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
+ ;
+ else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
+ ;
+ else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
+ ;
+ else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
+ ;
+ else if (unformat (input, "workers %U", unformat_bitmap_list,
+ &devconf->workers))
+ ;
+ else
+ if (unformat
+ (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
+ if (error)
+ break;
+ }
+ else if (unformat (input, "vlan-strip-offload off"))
+ devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
+ else if (unformat (input, "vlan-strip-offload on"))
+ devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
+ else
+ if (unformat
+ (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
+ {
+ devconf->hqos_enabled = 1;
+ error = unformat_hqos (&sub_input, &devconf->hqos);
+ if (error)
+ break;
+ }
+ else if (unformat (input, "hqos"))
+ {
+ devconf->hqos_enabled = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ if (error)
+ return error;
+
+ if (devconf->workers && devconf->num_rx_queues == 0)
+ devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
+ else if (devconf->workers &&
+ clib_bitmap_count_set_bits (devconf->workers) !=
+ devconf->num_rx_queues)
+ error =
+ clib_error_return (0,
+ "%U: number of worker threadds must be "
+ "equal to number of rx queues", format_vlib_pci_addr,
+ &pci_addr);
+
+ return error;
+}
+
+static clib_error_t *
+dpdk_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_config_main_t *conf = &dpdk_config_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_device_config_t *devconf;
+ vlib_pci_addr_t pci_addr;
+ unformat_input_t sub_input;
+ uword x;
+ u8 *s, *tmp = 0;
+ u8 *rte_cmd = 0, *ethname = 0;
+ u32 log_level;
+ int ret, i;
+ int num_whitelisted = 0;
+ u8 no_pci = 0;
+ u8 no_huge = 0;
+ u8 huge_dir = 0;
+ u8 file_prefix = 0;
+ u8 *socket_mem = 0;
+ u8 *huge_dir_path = 0;
+
+ huge_dir_path =
+ format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
+
+ conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
+ log_level = RTE_LOG_NOTICE;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* Prime the pump */
+ if (unformat (input, "no-hugetlb"))
+ {
+ vec_add1 (conf->eal_init_args, (u8 *) "no-huge");
+ no_huge = 1;
+ }
+
+ else if (unformat (input, "enable-tcp-udp-checksum"))
+ conf->enable_tcp_udp_checksum = 1;
+
+ else if (unformat (input, "decimal-interface-names"))
+ conf->interface_name_format_decimal = 1;
+
+ else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x))
+ log_level = x;
+
+ else if (unformat (input, "no-multi-seg"))
+ conf->no_multi_seg = 1;
+
+ else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ error =
+ dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input,
+ 1);
+
+ if (error)
+ return error;
+ }
+ else
+ if (unformat
+ (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
+ unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error = dpdk_device_config (conf, pci_addr, &sub_input, 0);
+
+ if (error)
+ return error;
+
+ num_whitelisted++;
+ }
+ else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
+ {
+ error = dpdk_device_config (conf, pci_addr, 0, 0);
+
+ if (error)
+ return error;
+
+ num_whitelisted++;
+ }
+ else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
+ ;
+ else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
+ ;
+ else if (unformat (input, "socket-mem %s", &socket_mem))
+ ;
+ else if (unformat (input, "no-pci"))
+ {
+ no_pci = 1;
+ tmp = format (0, "--no-pci%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
+ else if (unformat (input, "poll-sleep %d", &dm->poll_sleep_usec))
+ ;
+
+#define _(a) \
+ else if (unformat(input, #a)) \
+ { \
+ tmp = format (0, "--%s%c", #a, 0); \
+ vec_add1 (conf->eal_init_args, tmp); \
+ }
+ foreach_eal_double_hyphen_predicate_arg
+#undef _
+#define _(a) \
+ else if (unformat(input, #a " %s", &s)) \
+ { \
+ if (!strncmp(#a, "huge-dir", 8)) \
+ huge_dir = 1; \
+ else if (!strncmp(#a, "file-prefix", 11)) \
+ file_prefix = 1; \
+ tmp = format (0, "--%s%c", #a, 0); \
+ vec_add1 (conf->eal_init_args, tmp); \
+ vec_add1 (s, 0); \
+ if (!strncmp(#a, "vdev", 4)) \
+ if (strstr((char*)s, "af_packet")) \
+ clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \
+ vec_add1 (conf->eal_init_args, s); \
+ }
+ foreach_eal_double_hyphen_arg
+#undef _
+#define _(a,b) \
+ else if (unformat(input, #a " %s", &s)) \
+ { \
+ tmp = format (0, "-%s%c", #b, 0); \
+ vec_add1 (conf->eal_init_args, tmp); \
+ vec_add1 (s, 0); \
+ vec_add1 (conf->eal_init_args, s); \
+ }
+ foreach_eal_single_hyphen_arg
+#undef _
+#define _(a,b) \
+ else if (unformat(input, #a " %s", &s)) \
+ { \
+ tmp = format (0, "-%s%c", #b, 0); \
+ vec_add1 (conf->eal_init_args, tmp); \
+ vec_add1 (s, 0); \
+ vec_add1 (conf->eal_init_args, s); \
+ conf->a##_set_manually = 1; \
+ }
+ foreach_eal_single_hyphen_mandatory_arg
+#undef _
+ else if (unformat (input, "default"))
+ ;
+
+ else if (unformat_skip_white_space (input))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (!conf->uio_driver_name)
+ conf->uio_driver_name = format (0, "uio_pci_generic%c", 0);
+
+ /*
+ * Use 1G huge pages if available.
+ */
+ if (!no_huge && !huge_dir)
+ {
+ u32 x, *mem_by_socket = 0;
+ uword c = 0;
+ u8 use_1g = 1;
+ u8 use_2m = 1;
+ u8 less_than_1g = 1;
+ int rv;
+
+ umount ((char *) huge_dir_path);
+
+ /* Process "socket-mem" parameter value */
+ if (vec_len (socket_mem))
+ {
+ unformat_input_t in;
+ unformat_init_vector (&in, socket_mem);
+ while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&in, "%u,", &x))
+ ;
+ else if (unformat (&in, "%u", &x))
+ ;
+ else if (unformat (&in, ","))
+ x = 0;
+ else
+ break;
+
+ vec_add1 (mem_by_socket, x);
+
+ if (x > 1023)
+ less_than_1g = 0;
+ }
+ /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
+ unformat_free (&in);
+ socket_mem = 0;
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
+ {
+ vec_validate(mem_by_socket, c);
+ mem_by_socket[c] = 256; /* default per-socket mem */
+ }
+ ));
+ /* *INDENT-ON* */
+ }
+
+ /* check if available enough 1GB pages for each socket */
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
+ {
+ int pages_avail, page_size, mem;
+ clib_error_t *e = 0;
+
+ vec_validate(mem_by_socket, c);
+ mem = mem_by_socket[c];
+
+ page_size = 1024;
+ e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
+
+ if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
+ use_1g = 0;
+
+ if (e)
+ clib_error_free (e);
+
+ page_size = 2;
+ e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
+
+ if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
+ use_2m = 0;
+
+ if (e)
+ clib_error_free (e);
+ }));
+ /* *INDENT-ON* */
+
+ if (mem_by_socket == 0)
+ {
+ error = clib_error_return (0, "mem_by_socket NULL");
+ goto done;
+ }
+ _vec_len (mem_by_socket) = c + 1;
+
+ /* regenerate socket_mem string */
+ vec_foreach_index (x, mem_by_socket)
+ socket_mem = format (socket_mem, "%s%u",
+ socket_mem ? "," : "", mem_by_socket[x]);
+ socket_mem = format (socket_mem, "%c", 0);
+
+ vec_free (mem_by_socket);
+
+ error = vlib_unix_recursive_mkdir ((char *) huge_dir_path);
+ if (error)
+ {
+ goto done;
+ }
+
+ if (use_1g && !(less_than_1g && use_2m))
+ {
+ rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0,
+ "pagesize=1G");
+ }
+ else if (use_2m)
+ {
+ rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
+ }
+ else
+ {
+ return clib_error_return (0, "not enough free huge pages");
+ }
+
+ if (rv)
+ {
+ error = clib_error_return (0, "mount failed %d", errno);
+ goto done;
+ }
+
+ tmp = format (0, "--huge-dir%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%s%c", huge_dir_path, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ if (!file_prefix)
+ {
+ tmp = format (0, "--file-prefix%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "vpp%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
+ }
+
+ vec_free (rte_cmd);
+ vec_free (ethname);
+
+ if (error)
+ return error;
+
+ /* I'll bet that -c and -n must be the first and second args... */
+ if (!conf->coremask_set_manually)
+ {
+ vlib_thread_registration_t *tr;
+ uword *coremask = 0;
+ int i;
+
+ /* main thread core */
+ coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ tr = tm->registrations[i];
+ coremask = clib_bitmap_or (coremask, tr->coremask);
+ }
+
+ vec_insert (conf->eal_init_args, 2, 1);
+ conf->eal_init_args[1] = (u8 *) "-c";
+ tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
+ conf->eal_init_args[2] = tmp;
+ clib_bitmap_free (coremask);
+ }
+
+ if (!conf->nchannels_set_manually)
+ {
+ vec_insert (conf->eal_init_args, 2, 3);
+ conf->eal_init_args[3] = (u8 *) "-n";
+ tmp = format (0, "%d", conf->nchannels);
+ conf->eal_init_args[4] = tmp;
+ }
+
+ if (no_pci == 0 && geteuid () == 0)
+ dpdk_bind_devices_to_uio (conf);
+
+#define _(x) \
+ if (devconf->x == 0 && conf->default_devconf.x > 0) \
+ devconf->x = conf->default_devconf.x ;
+
+ /* *INDENT-OFF* */
+ pool_foreach (devconf, conf->dev_confs, ({
+
+ /* default per-device config items */
+ foreach_dpdk_device_config_item
+
+ /* add DPDK EAL whitelist/blacklist entry */
+ if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
+ {
+ tmp = format (0, "-w%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
+ else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
+ {
+ tmp = format (0, "-b%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
+ }));
+ /* *INDENT-ON* */
+
+#undef _
+
+ /* set master-lcore */
+ tmp = format (0, "--master-lcore%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%u%c", tm->main_lcore, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+
+ /* set socket-mem */
+ tmp = format (0, "--socket-mem%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%s%c", socket_mem, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+
+ /* NULL terminate the "argv" vector, in case of stupidity */
+ vec_add1 (conf->eal_init_args, 0);
+ _vec_len (conf->eal_init_args) -= 1;
+
+ /* Set up DPDK eal and packet mbuf pool early. */
+
+ rte_log_set_global_level (log_level);
+
+ vm = vlib_get_main ();
+
+ /* make copy of args as rte_eal_init tends to mess up with arg array */
+ for (i = 1; i < vec_len (conf->eal_init_args); i++)
+ conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
+ conf->eal_init_args[i]);
+
+ clib_warning ("EAL init args: %s", conf->eal_init_args_str);
+ ret =
+ rte_eal_init (vec_len (conf->eal_init_args),
+ (char **) conf->eal_init_args);
+
+ /* lazy umount hugepages */
+ umount2 ((char *) huge_dir_path, MNT_DETACH);
+ rmdir ((char *) huge_dir_path);
+ vec_free (huge_dir_path);
+
+ if (ret < 0)
+ return clib_error_return (0, "rte_eal_init returned %d", ret);
+
+ /* Dump the physical memory layout prior to creating the mbuf_pool */
+ fprintf (stdout, "DPDK physical memory layout:\n");
+ rte_dump_physmem_layout (stdout);
+
+ /* main thread 1st */
+ error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
+ if (error)
+ return error;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ {
+ error = dpdk_buffer_pool_create (vm, conf->num_mbufs,
+ rte_lcore_to_socket_id (i));
+ if (error)
+ return error;
+ }
+
+done:
+ return error;
+}
+
+VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk");
+
+void
+dpdk_update_link_state (dpdk_device_t * xd, f64 now)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ struct rte_eth_link prev_link = xd->link;
+ u32 hw_flags = 0;
+ u8 hw_flags_chg = 0;
+
+ /* only update link state for PMD interfaces */
+ if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
+ return;
+
+ xd->time_last_link_update = now ? now : xd->time_last_link_update;
+ memset (&xd->link, 0, sizeof (xd->link));
+ rte_eth_link_get_nowait (xd->device_index, &xd->link);
+
+ if (LINK_STATE_ELOGS)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format =
+ "update-link-state: sw_if_index %d, admin_up %d,"
+ "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
+
+ struct
+ {
+ u32 sw_if_index;
+ u8 admin_up;
+ u8 old_link_state;
+ u8 new_link_state;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->sw_if_index = xd->vlib_sw_if_index;
+ ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
+ ed->old_link_state = (u8)
+ vnet_hw_interface_is_link_up (vnm, xd->hw_if_index);
+ ed->new_link_state = (u8) xd->link.link_status;
+ }
+
+ if ((xd->flags & (DPDK_DEVICE_FLAG_ADMIN_UP | DPDK_DEVICE_FLAG_BOND_SLAVE))
+ && ((xd->link.link_status != 0) ^
+ vnet_hw_interface_is_link_up (vnm, xd->hw_if_index)))
+ {
+ hw_flags_chg = 1;
+ hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ }
+
+ if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
+ {
+ hw_flags_chg = 1;
+ switch (xd->link.link_duplex)
+ {
+ case ETH_LINK_HALF_DUPLEX:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
+ break;
+ case ETH_LINK_FULL_DUPLEX:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
+ break;
+ default:
+ break;
+ }
+ }
+ if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
+ {
+ hw_flags_chg = 1;
+ switch (xd->link.link_speed)
+ {
+ case ETH_SPEED_NUM_10M:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M;
+ break;
+ case ETH_SPEED_NUM_100M:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M;
+ break;
+ case ETH_SPEED_NUM_1G:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
+ break;
+ case ETH_SPEED_NUM_10G:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G;
+ break;
+ case ETH_SPEED_NUM_40G:
+ hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G;
+ break;
+ case 0:
+ break;
+ default:
+ clib_warning ("unknown link speed %d", xd->link.link_speed);
+ break;
+ }
+ }
+ if (hw_flags_chg)
+ {
+ if (LINK_STATE_ELOGS)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format =
+ "update-link-state: sw_if_index %d, new flags %d",.format_args
+ = "i4i4",};
+
+ struct
+ {
+ u32 sw_if_index;
+ u32 flags;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->sw_if_index = xd->vlib_sw_if_index;
+ ed->flags = hw_flags;
+ }
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
+ }
+}
+
+static uword
+dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ clib_error_t *error;
+ vnet_main_t *vnm = vnet_get_main ();
+ dpdk_main_t *dm = &dpdk_main;
+ ethernet_main_t *em = &ethernet_main;
+ dpdk_device_t *xd;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int i;
+
+ error = dpdk_lib_init (dm);
+
+ if (error)
+ clib_error_report (error);
+
+ tm->worker_thread_release = 1;
+
+ f64 now = vlib_time_now (vm);
+ vec_foreach (xd, dm->devices)
+ {
+ dpdk_update_link_state (xd, now);
+ }
+
+ {
+ /*
+ * Extra set up for bond interfaces:
+ * 1. Setup MACs for bond interfaces and their slave links which was set
+ * in dpdk_device_setup() but needs to be done again here to take
+ * effect.
+ * 2. Set up info and register slave link state change callback handling.
+ * 3. Set up info for bond interface related CLI support.
+ */
+ int nports = rte_eth_dev_count ();
+ if (nports > 0)
+ {
+ for (i = 0; i < nports; i++)
+ {
+ xd = &dm->devices[i];
+ ASSERT (i == xd->device_index);
+ if (xd->pmd == VNET_DPDK_PMD_BOND)
+ {
+ u8 addr[6];
+ u8 slink[16];
+ int nlink = rte_eth_bond_slaves_get (i, slink, 16);
+ if (nlink > 0)
+ {
+ vnet_hw_interface_t *bhi;
+ ethernet_interface_t *bei;
+ int rv;
+
+ /* Get MAC of 1st slave link */
+ rte_eth_macaddr_get
+ (slink[0], (struct ether_addr *) addr);
+
+ /* Set MAC of bounded interface to that of 1st slave link */
+ clib_warning ("Set MAC for bond port %d BondEthernet%d",
+ i, xd->port_id);
+ rv = rte_eth_bond_mac_address_set
+ (i, (struct ether_addr *) addr);
+ if (rv)
+ clib_warning ("Set MAC addr failure rv=%d", rv);
+
+ /* Populate MAC of bonded interface in VPP hw tables */
+ bhi = vnet_get_hw_interface
+ (vnm, dm->devices[i].hw_if_index);
+ bei = pool_elt_at_index
+ (em->interfaces, bhi->hw_instance);
+ clib_memcpy (bhi->hw_address, addr, 6);
+ clib_memcpy (bei->address, addr, 6);
+
+ /* Init l3 packet size allowed on bonded interface */
+ bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES;
+ bhi->max_l3_packet_bytes[VLIB_RX] =
+ bhi->max_l3_packet_bytes[VLIB_TX] =
+ ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
+ while (nlink >= 1)
+ { /* for all slave links */
+ int slave = slink[--nlink];
+ dpdk_device_t *sdev = &dm->devices[slave];
+ vnet_hw_interface_t *shi;
+ vnet_sw_interface_t *ssi;
+ ethernet_interface_t *sei;
+ /* Add MAC to all slave links except the first one */
+ if (nlink)
+ {
+ clib_warning ("Add MAC for slave port %d", slave);
+ rv = rte_eth_dev_mac_addr_add
+ (slave, (struct ether_addr *) addr, 0);
+ if (rv)
+ clib_warning ("Add MAC addr failure rv=%d", rv);
+ }
+ /* Setup slave link state change callback handling */
+ rte_eth_dev_callback_register
+ (slave, RTE_ETH_EVENT_INTR_LSC,
+ dpdk_port_state_callback, NULL);
+ dpdk_device_t *sxd = &dm->devices[slave];
+ sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE;
+ sxd->bond_port = i;
+ /* Set slaves bitmap for bonded interface */
+ bhi->bond_info = clib_bitmap_set
+ (bhi->bond_info, sdev->hw_if_index, 1);
+ /* Set MACs and slave link flags on slave interface */
+ shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
+ ssi = vnet_get_sw_interface
+ (vnm, sdev->vlib_sw_if_index);
+ sei = pool_elt_at_index
+ (em->interfaces, shi->hw_instance);
+ shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
+ ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE;
+ clib_memcpy (shi->hw_address, addr, 6);
+ clib_memcpy (sei->address, addr, 6);
+ /* Set l3 packet size allowed as the lowest of slave */
+ if (bhi->max_l3_packet_bytes[VLIB_RX] >
+ shi->max_l3_packet_bytes[VLIB_RX])
+ bhi->max_l3_packet_bytes[VLIB_RX] =
+ bhi->max_l3_packet_bytes[VLIB_TX] =
+ shi->max_l3_packet_bytes[VLIB_RX];
+ /* Set max packet size allowed as the lowest of slave */
+ if (bhi->max_packet_bytes > shi->max_packet_bytes)
+ bhi->max_packet_bytes = shi->max_packet_bytes;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ while (1)
+ {
+ /*
+ * check each time through the loop in case intervals are changed
+ */
+ f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
+ dm->link_state_poll_interval : dm->stat_poll_interval;
+
+ vlib_process_wait_for_event_or_clock (vm, min_wait);
+
+ if (dm->admin_up_down_in_progress)
+ /* skip the poll if an admin up down is in progress (on any interface) */
+ continue;
+
+ vec_foreach (xd, dm->devices)
+ {
+ f64 now = vlib_time_now (vm);
+ if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
+ dpdk_update_counters (xd, now);
+ if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
+ dpdk_update_link_state (xd, now);
+
+ }
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_process_node,static) = {
+ .function = dpdk_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dpdk-process",
+ .process_log2_n_stack_bytes = 17,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+dpdk_init (vlib_main_t * vm)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vlib_node_t *ei;
+ clib_error_t *error = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ /* verify that structs are cacheline aligned */
+ STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
+ "Cache line marker must be 1st element in dpdk_device_t");
+ STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES,
+ "Data in cache line 0 is bigger than cache line size");
+ STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
+ "Cache line marker must be 1st element in frame_queue_trace_t");
+
+ dm->vlib_main = vm;
+ dm->vnet_main = vnet_get_main ();
+ dm->conf = &dpdk_config_main;
+
+ ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
+ if (ei == 0)
+ return clib_error_return (0, "ethernet-input node AWOL");
+
+ dm->ethernet_input_node_index = ei->index;
+
+ dm->conf->nchannels = 4;
+ dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
+ vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
+
+ vec_validate (dm->recycle, tm->n_thread_stacks - 1);
+
+ /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
+ dm->buffer_flags_template =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID
+ | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
+ dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
+
+ /* init CLI */
+ if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (dpdk_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
new file mode 100644
index 00000000..cf8b9699
--- /dev/null
+++ b/src/plugins/dpdk/device/node.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/xxhash.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/handoff.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <dpdk/device/dpdk_priv.h>
+
+static char *dpdk_error_strings[] = {
+#define _(n,s) s,
+ foreach_dpdk_error
+#undef _
+};
+
+always_inline int
+vlib_buffer_is_ip4 (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
+}
+
+always_inline int
+vlib_buffer_is_ip6 (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
+}
+
+always_inline int
+vlib_buffer_is_mpls (vlib_buffer_t * b)
+{
+ ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
+ return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS));
+}
+
+always_inline u32
+dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
+{
+ if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+ {
+ if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
+ return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+ else
+ return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ }
+ else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+ return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+ return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+ else
+ return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+}
+
+always_inline u32
+dpdk_rx_next_from_packet_start (struct rte_mbuf * mb, vlib_buffer_t * b0)
+{
+ word start_delta;
+ int rv;
+
+ start_delta = b0->current_data -
+ ((mb->buf_addr + mb->data_off) - (void *) b0->data);
+
+ vlib_buffer_advance (b0, -start_delta);
+
+ if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
+ {
+ if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
+ rv = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+ else
+ rv = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ }
+ else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
+ rv = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+ rv = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+ else
+ rv = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ vlib_buffer_advance (b0, start_delta);
+ return rv;
+}
+
+always_inline void
+dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error)
+{
+ if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD)
+ {
+ *error = DPDK_ERROR_IP_CHECKSUM_ERROR;
+ *next = VNET_DEVICE_INPUT_NEXT_DROP;
+ }
+ else
+ *error = DPDK_ERROR_NONE;
+}
+
+static void
+dpdk_rx_trace (dpdk_main_t * dm,
+ vlib_node_runtime_t * node,
+ dpdk_device_t * xd,
+ u16 queue_id, u32 * buffers, uword n_buffers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 *b, n_left;
+ u32 next0;
+
+ n_left = n_buffers;
+ b = buffers;
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ dpdk_rx_dma_trace_t *t0;
+ struct rte_mbuf *mb;
+ u8 error0;
+
+ bi0 = b[0];
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ mb = rte_mbuf_from_vlib_buffer (b0);
+
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ next0 = xd->per_interface_next_index;
+ else
+ next0 = dpdk_rx_next_from_packet_start (mb, b0);
+
+ dpdk_rx_error_from_mb (mb, &next0, &error0);
+
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->queue_index = queue_id;
+ t0->device_index = xd->device_index;
+ t0->buffer_index = bi0;
+
+ clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
+ clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ clib_memcpy (t0->buffer.pre_data, b0->data,
+ sizeof (t0->buffer.pre_data));
+ clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data));
+
+ b += 1;
+ }
+}
+
+static inline u32
+dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
+{
+ u32 n_buffers;
+ u32 n_left;
+ u32 n_this_chunk;
+
+ n_left = VLIB_FRAME_SIZE;
+ n_buffers = 0;
+
+ if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
+ {
+ while (n_left)
+ {
+ n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id,
+ xd->rx_vectors[queue_id] +
+ n_buffers, n_left);
+ n_buffers += n_this_chunk;
+ n_left -= n_this_chunk;
+
+ /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */
+ if (n_this_chunk < 32)
+ break;
+ }
+ }
+ else
+ {
+ ASSERT (0);
+ }
+
+ return n_buffers;
+}
+
+
+static_always_inline void
+dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
+ struct rte_mbuf *mb, vlib_buffer_free_list_t * fl)
+{
+ u8 nb_seg = 1;
+ struct rte_mbuf *mb_seg = 0;
+ vlib_buffer_t *b_seg, *b_chain = 0;
+ mb_seg = mb->next;
+ b_chain = b;
+
+ if (mb->nb_segs < 2)
+ return;
+
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b->total_length_not_including_first_buffer = 0;
+
+ while (nb_seg < mb->nb_segs)
+ {
+ ASSERT (mb_seg != 0);
+
+ b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
+ vlib_buffer_init_for_free_list (b_seg, fl);
+
+ ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+ ASSERT (b_seg->current_data == 0);
+
+ /*
+ * The driver (e.g. virtio) may not put the packet data at the start
+ * of the segment, so don't assume b_seg->current_data == 0 is correct.
+ */
+ b_seg->current_data =
+ (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
+
+ b_seg->current_length = mb_seg->data_len;
+ b->total_length_not_including_first_buffer += mb_seg->data_len;
+
+ b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
+
+ b_chain = b_seg;
+ mb_seg = mb_seg->next;
+ nb_seg++;
+ }
+}
+
+static_always_inline void
+dpdk_prefetch_buffer (struct rte_mbuf *mb)
+{
+ vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
+ CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static_always_inline void
+dpdk_prefetch_ethertype (struct rte_mbuf *mb)
+{
+ CLIB_PREFETCH (mb->buf_addr + mb->data_off +
+ STRUCT_OFFSET_OF (ethernet_header_t, type),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+
+/*
+ This function should fill 1st cacheline of vlib_buffer_t metadata with data
+ from buffer template. Instead of filling field by field, we construct
+ template and then use 128/256 bit vector instruction to copy data.
+ This code first loads whole cacheline into 4 128-bit registers (xmm)
+ or two 256 bit registers (ymm) and then stores data into all 4 buffers
+ efectively saving on register load operations.
+*/
+
+static_always_inline void
+dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3,
+ void *s)
+{
+#if defined(CLIB_HAVE_VEC128)
+ int i;
+ for (i = 0; i < 2; i++)
+ {
+ *(u8x32 *) (((u8 *) d0) + i * 32) =
+ *(u8x32 *) (((u8 *) d1) + i * 32) =
+ *(u8x32 *) (((u8 *) d2) + i * 32) =
+ *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32);
+ }
+#elif defined(CLIB_HAVE_VEC64)
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ *(u8x16 *) (((u8 *) d0) + i * 16) =
+ *(u8x16 *) (((u8 *) d1) + i * 16) =
+ *(u8x16 *) (((u8 *) d2) + i * 16) =
+ *(u8x16 *) (((u8 *) d3) + i * 16) = *(u8x16 *) (((u8 *) s) + i * 16);
+ }
+#else
+#error "Either CLIB_HAVE_VEC128 or CLIB_HAVE_VEC64 has to be defined"
+#endif
+}
+
+/*
+ * This function is used when there are no worker threads.
+ * The main thread performs IO and forwards the packets.
+ */
+static_always_inline u32
+dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
+ vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
+ int maybe_multiseg)
+{
+ u32 n_buffers;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ u32 n_left_to_next, *to_next;
+ u32 mb_index;
+ vlib_main_t *vm = vlib_get_main ();
+ uword n_rx_bytes = 0;
+ u32 n_trace, trace_cnt __attribute__ ((unused));
+ vlib_buffer_free_list_t *fl;
+ vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
+
+ if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
+ return 0;
+
+ n_buffers = dpdk_rx_burst (dm, xd, queue_id);
+
+ if (n_buffers == 0)
+ {
+ return 0;
+ }
+
+ vec_reset_length (xd->d_trace_buffers[thread_index]);
+ trace_cnt = n_trace = vlib_get_trace_count (vm, node);
+
+ if (n_trace > 0)
+ {
+ u32 n = clib_min (n_trace, n_buffers);
+ mb_index = 0;
+
+ while (n--)
+ {
+ struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
+ vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
+ vec_add1 (xd->d_trace_buffers[thread_index],
+ vlib_get_buffer_index (vm, b));
+ }
+ }
+
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ /* Update buffer template */
+ vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
+ bt->error = node->errors[DPDK_ERROR_NONE];
+
+ mb_index = 0;
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 bi0, next0;
+ u32 bi1, next1;
+ u32 bi2, next2;
+ u32 bi3, next3;
+ u8 error0, error1, error2, error3;
+ u64 or_ol_flags;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_buffers >= 12 && n_left_to_next >= 4)
+ {
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+
+ /* prefetches are interleaved with the rest of the code to reduce
+ pressure on L1 cache */
+ dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 8]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 4]);
+
+ mb0 = xd->rx_vectors[queue_id][mb_index];
+ mb1 = xd->rx_vectors[queue_id][mb_index + 1];
+ mb2 = xd->rx_vectors[queue_id][mb_index + 2];
+ mb3 = xd->rx_vectors[queue_id][mb_index + 3];
+
+ ASSERT (mb0);
+ ASSERT (mb1);
+ ASSERT (mb2);
+ ASSERT (mb3);
+
+ if (maybe_multiseg)
+ {
+ if (PREDICT_FALSE (mb0->nb_segs > 1))
+ dpdk_prefetch_buffer (mb0->next);
+ if (PREDICT_FALSE (mb1->nb_segs > 1))
+ dpdk_prefetch_buffer (mb1->next);
+ if (PREDICT_FALSE (mb2->nb_segs > 1))
+ dpdk_prefetch_buffer (mb2->next);
+ if (PREDICT_FALSE (mb3->nb_segs > 1))
+ dpdk_prefetch_buffer (mb3->next);
+ }
+
+ b0 = vlib_buffer_from_rte_mbuf (mb0);
+ b1 = vlib_buffer_from_rte_mbuf (mb1);
+ b2 = vlib_buffer_from_rte_mbuf (mb2);
+ b3 = vlib_buffer_from_rte_mbuf (mb3);
+
+ dpdk_buffer_init_from_template (b0, b1, b2, b3, bt);
+
+ dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]);
+
+ /* current_data must be set to -RTE_PKTMBUF_HEADROOM in template */
+ b0->current_data += mb0->data_off;
+ b1->current_data += mb1->data_off;
+ b2->current_data += mb2->data_off;
+ b3->current_data += mb3->data_off;
+
+ b0->current_length = mb0->data_len;
+ b1->current_length = mb1->data_len;
+ b2->current_length = mb2->data_len;
+ b3->current_length = mb3->data_len;
+
+ dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]);
+
+ bi0 = vlib_get_buffer_index (vm, b0);
+ bi1 = vlib_get_buffer_index (vm, b1);
+ bi2 = vlib_get_buffer_index (vm, b2);
+ bi3 = vlib_get_buffer_index (vm, b3);
+
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ to_next[2] = bi2;
+ to_next[3] = bi3;
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ {
+ next0 = next1 = next2 = next3 = xd->per_interface_next_index;
+ }
+ else
+ {
+ next0 = dpdk_rx_next_from_etype (mb0, b0);
+ next1 = dpdk_rx_next_from_etype (mb1, b1);
+ next2 = dpdk_rx_next_from_etype (mb2, b2);
+ next3 = dpdk_rx_next_from_etype (mb3, b3);
+ }
+
+ dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]);
+
+ or_ol_flags = (mb0->ol_flags | mb1->ol_flags |
+ mb2->ol_flags | mb3->ol_flags);
+ if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
+ {
+ dpdk_rx_error_from_mb (mb0, &next0, &error0);
+ dpdk_rx_error_from_mb (mb1, &next1, &error1);
+ dpdk_rx_error_from_mb (mb2, &next2, &error2);
+ dpdk_rx_error_from_mb (mb3, &next3, &error3);
+ b0->error = node->errors[error0];
+ b1->error = node->errors[error1];
+ b2->error = node->errors[error2];
+ b3->error = node->errors[error3];
+ }
+
+ vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
+ vlib_buffer_advance (b1, device_input_next_node_advance[next1]);
+ vlib_buffer_advance (b2, device_input_next_node_advance[next2]);
+ vlib_buffer_advance (b3, device_input_next_node_advance[next3]);
+
+ n_rx_bytes += mb0->pkt_len;
+ n_rx_bytes += mb1->pkt_len;
+ n_rx_bytes += mb2->pkt_len;
+ n_rx_bytes += mb3->pkt_len;
+
+ /* Process subsequent segments of multi-segment packets */
+ if (maybe_multiseg)
+ {
+ dpdk_process_subseq_segs (vm, b0, mb0, fl);
+ dpdk_process_subseq_segs (vm, b1, mb1, fl);
+ dpdk_process_subseq_segs (vm, b2, mb2, fl);
+ dpdk_process_subseq_segs (vm, b3, mb3, fl);
+ }
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See main.c...
+ */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+ /* Do we have any driver RX features configured on the interface? */
+ vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index,
+ &next0, &next1, &next2, &next3,
+ b0, b1, b2, b3);
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ n_buffers -= 4;
+ mb_index += 4;
+ }
+ while (n_buffers > 0 && n_left_to_next > 0)
+ {
+ struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index];
+
+ if (PREDICT_TRUE (n_buffers > 3))
+ {
+ dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 2]);
+ dpdk_prefetch_ethertype (xd->rx_vectors[queue_id]
+ [mb_index + 1]);
+ }
+
+ ASSERT (mb0);
+
+ b0 = vlib_buffer_from_rte_mbuf (mb0);
+
+ /* Prefetch one next segment if it exists. */
+ if (PREDICT_FALSE (mb0->nb_segs > 1))
+ dpdk_prefetch_buffer (mb0->next);
+
+ clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES);
+
+ ASSERT (b0->current_data == -RTE_PKTMBUF_HEADROOM);
+ b0->current_data += mb0->data_off;
+ b0->current_length = mb0->data_len;
+
+ bi0 = vlib_get_buffer_index (vm, b0);
+
+ to_next[0] = bi0;
+ to_next++;
+ n_left_to_next--;
+
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ next0 = xd->per_interface_next_index;
+ else
+ next0 = dpdk_rx_next_from_etype (mb0, b0);
+
+ dpdk_rx_error_from_mb (mb0, &next0, &error0);
+ b0->error = node->errors[error0];
+
+ vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
+
+ n_rx_bytes += mb0->pkt_len;
+
+ /* Process subsequent segments of multi-segment packets */
+ dpdk_process_subseq_segs (vm, b0, mb0, fl);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See main.c...
+ */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ /* Do we have any driver RX features configured on the interface? */
+ vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0,
+ b0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ n_buffers--;
+ mb_index++;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
+ {
+ dpdk_rx_trace (dm, node, xd, queue_id,
+ xd->d_trace_buffers[thread_index],
+ vec_len (xd->d_trace_buffers[thread_index]));
+ vlib_set_trace_count (vm, node,
+ n_trace -
+ vec_len (xd->d_trace_buffers[thread_index]));
+ }
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, mb_index);
+
+ return mb_index;
+}
+
+static inline void
+poll_rate_limit (dpdk_main_t * dm)
+{
+ /* Limit the poll rate by sleeping for N msec between polls */
+ if (PREDICT_FALSE (dm->poll_sleep_usec != 0))
+ {
+ struct timespec ts, tsrem;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1000 * dm->poll_sleep_usec;
+
+ while (nanosleep (&ts, &tsrem) < 0)
+ {
+ ts = tsrem;
+ }
+ }
+}
+
+/** \brief Main DPDK input node
+ @node dpdk-input
+
+ This is the main DPDK input node: across each assigned interface,
+ call rte_eth_rx_burst(...) or similar to obtain a vector of
+ packets to process. Handle early packet discard. Derive @c
+ vlib_buffer_t metadata from <code>struct rte_mbuf</code> metadata,
+ Depending on the resulting metadata: adjust <code>b->current_data,
+ b->current_length </code> and dispatch directly to
+ ip4-input-no-checksum, or ip6-input. Trace the packet if required.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param f vlib_frame_t input-node, not used.
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>struct rte_mbuf mb->ol_flags</code>
+ - PKT_RX_IP_CKSUM_BAD
+ - <code> RTE_ETH_IS_xxx_HDR(mb->packet_type) </code>
+ - packet classification result
+
+ @em Sets:
+ - <code>b->error</code> if the packet is to be dropped immediately
+ - <code>b->current_data, b->current_length</code>
+ - adjusted as needed to skip the L2 header in direct-dispatch cases
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - rx interface sw_if_index
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
+ - required by ipX-lookup
+ - <code>b->flags</code>
+ - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
+
+ <em>Next Nodes:</em>
+ - Static arcs to: error-drop, ethernet-input,
+ ip4-input-no-checksum, ip6-input, mpls-input
+ - per-interface redirection, controlled by
+ <code>xd->per_interface_next_index</code>
+*/
+
+static uword
+dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd;
+ uword n_rx_packets = 0;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+ u32 thread_index = node->thread_index;
+
+ /*
+ * Poll all devices on this cpu for input/interrupts.
+ */
+ /* *INDENT-OFF* */
+ foreach_device_and_queue (dq, rt->devices_and_queues)
+ {
+ xd = vec_elt_at_index(dm->devices, dq->dev_instance);
+ if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
+ continue; /* Do not poll slave to a bonded interface */
+ if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
+ n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
+ else
+ n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0);
+ }
+ /* *INDENT-ON* */
+
+ poll_rate_limit (dm);
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_input_node) = {
+ .function = dpdk_input,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "dpdk-input",
+ .sibling_of = "device-input",
+
+ /* Will be enabled if/when hardware is detected. */
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_dpdk_rx_dma_trace,
+
+ .n_errors = DPDK_N_ERROR,
+ .error_strings = dpdk_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input);
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c
new file mode 100644
index 00000000..c9b85652
--- /dev/null
+++ b/src/plugins/dpdk/hqos/hqos.c
@@ -0,0 +1,772 @@
+/*
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dpdk/device/dpdk.h>
+
+#include <vlib/pci/pci.h>
+#include <vlibmemory/api.h>
+#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+#include <dpdk/device/dpdk_priv.h>
+
+/***
+ *
+ * HQoS default configuration values
+ *
+ ***/
+
+static dpdk_device_config_hqos_t hqos_params_default = {
+ .hqos_thread_valid = 0,
+
+ .swq_size = 4096,
+ .burst_enq = 256,
+ .burst_deq = 220,
+
+ /*
+ * Packet field to identify the subport.
+ *
+ * Default value: Since only one subport is defined by default (see below:
+ * n_subports_per_port = 1), the subport ID is hardcoded to 0.
+ */
+ .pktfield0_slabpos = 0,
+ .pktfield0_slabmask = 0,
+
+ /*
+ * Packet field to identify the pipe.
+ *
+ * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23
+ */
+ .pktfield1_slabpos = 40,
+ .pktfield1_slabmask = 0x0000000FFF000000LLU,
+
+ /* Packet field used as index into TC translation table to identify the traffic
+ * class and queue.
+ *
+ * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field
+ */
+ .pktfield2_slabpos = 8,
+ .pktfield2_slabmask = 0x00000000000000FCLLU,
+ .tc_table = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ },
+
+ /* port */
+ .port = {
+ .name = NULL, /* Set at init */
+ .socket = 0, /* Set at init */
+ .rate = 1250000000, /* Assuming 10GbE port */
+ .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */
+ .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+ .n_subports_per_port = 1,
+ .n_pipes_per_subport = 4096,
+ .qsize = {64, 64, 64, 64},
+ .pipe_profiles = NULL, /* Set at config */
+ .n_pipe_profiles = 1,
+
+#ifdef RTE_SCHED_RED
+ .red_params = {
+ /* Traffic Class 0 Colors Green / Yellow / Red */
+ [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+
+ /* Traffic Class 1 - Colors Green / Yellow / Red */
+ [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+
+ /* Traffic Class 2 - Colors Green / Yellow / Red */
+ [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+
+ /* Traffic Class 3 - Colors Green / Yellow / Red */
+ [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9},
+ [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
+ 10,.wq_log2 = 9}
+ },
+#endif /* RTE_SCHED_RED */
+ },
+};
+
+static struct rte_sched_subport_params hqos_subport_params_default = {
+ .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */
+ .tb_size = 1000000,
+ .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+ .tc_period = 10,
+};
+
+static struct rte_sched_pipe_params hqos_pipe_params_default = {
+ .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */
+ .tb_size = 1000000,
+ .tc_rate = {305175, 305175, 305175, 305175},
+ .tc_period = 40,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ .tc_ov_weight = 1,
+#endif
+ .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+/***
+ *
+ * HQoS configuration
+ *
+ ***/
+
+int
+dpdk_hqos_validate_mask (u64 mask, u32 n)
+{
+ int count = __builtin_popcountll (mask);
+ int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask);
+ int pos_trail = __builtin_ctzll (mask);
+ int count_expected = __builtin_popcount (n - 1);
+
+ /* Handle the exceptions */
+ if (n == 0)
+ return -1; /* Error */
+
+ if ((mask == 0) && (n == 1))
+ return 0; /* OK */
+
+ if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1)))
+ return -2; /* Error */
+
+ /* Check that mask is contiguous */
+ if ((pos_lead - pos_trail) != count)
+ return -3; /* Error */
+
+ /* Check that mask contains the expected number of bits set */
+ if (count != count_expected)
+ return -4; /* Error */
+
+ return 0; /* OK */
+}
+
+void
+dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t *
+ hqos, u32 pipe_profile_id)
+{
+ memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default,
+ sizeof (hqos_pipe_params_default));
+}
+
+void
+dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos)
+{
+ struct rte_sched_subport_params *subport_params;
+ struct rte_sched_pipe_params *pipe_params;
+ u32 *pipe_map;
+ u32 i;
+
+ memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default));
+
+ /* pipe */
+ vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles);
+
+ for (i = 0; i < vec_len (hqos->pipe); i++)
+ memcpy (&pipe_params[i],
+ &hqos_pipe_params_default, sizeof (hqos_pipe_params_default));
+
+ hqos->port.pipe_profiles = hqos->pipe;
+
+ /* subport */
+ vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port);
+
+ for (i = 0; i < vec_len (hqos->subport); i++)
+ memcpy (&subport_params[i],
+ &hqos_subport_params_default,
+ sizeof (hqos_subport_params_default));
+
+ /* pipe profile */
+ vec_add2 (hqos->pipe_map,
+ pipe_map,
+ hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport);
+
+ for (i = 0; i < vec_len (hqos->pipe_map); i++)
+ pipe_map[i] = 0;
+}
+
+/***
+ *
+ * HQoS init
+ *
+ ***/
+
+clib_error_t *
+dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ char name[32];
+ u32 subport_id, i;
+ int rv;
+
+ /* Detect the set of worker threads */
+ int worker_thread_first = 0;
+ int worker_thread_count = 0;
+
+ uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ vlib_thread_registration_t *tr =
+ p ? (vlib_thread_registration_t *) p[0] : 0;
+
+ if (tr && tr->count > 0)
+ {
+ worker_thread_first = tr->first_index;
+ worker_thread_count = tr->count;
+ }
+
+ /* Allocate the per-thread device data array */
+ vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0]));
+
+ vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES);
+ memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0]));
+
+ /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */
+ vec_validate (xd->hqos_ht->swq, worker_thread_count);
+
+ /* SWQ */
+ for (i = 0; i < worker_thread_count + 1; i++)
+ {
+ u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ;
+
+ snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i,
+ xd->device_index);
+ xd->hqos_ht->swq[i] =
+ rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags);
+ if (xd->hqos_ht->swq[i] == NULL)
+ return clib_error_return (0,
+ "SWQ-worker%u-to-device%u: rte_ring_create err",
+ i, xd->device_index);
+ }
+
+ /*
+ * HQoS
+ */
+
+ /* HQoS port */
+ snprintf (name, sizeof (name), "HQoS%u", xd->device_index);
+ hqos->port.name = strdup (name);
+ if (hqos->port.name == NULL)
+ return clib_error_return (0, "HQoS%u: strdup err", xd->device_index);
+
+ hqos->port.socket = rte_eth_dev_socket_id (xd->device_index);
+ if (hqos->port.socket == SOCKET_ID_ANY)
+ hqos->port.socket = 0;
+
+ xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port);
+ if (xd->hqos_ht->hqos == NULL)
+ return clib_error_return (0, "HQoS%u: rte_sched_port_config err",
+ xd->device_index);
+
+ /* HQoS subport */
+ for (subport_id = 0; subport_id < hqos->port.n_subports_per_port;
+ subport_id++)
+ {
+ u32 pipe_id;
+
+ rv =
+ rte_sched_subport_config (xd->hqos_ht->hqos, subport_id,
+ &hqos->subport[subport_id]);
+ if (rv)
+ return clib_error_return (0,
+ "HQoS%u subport %u: rte_sched_subport_config err (%d)",
+ xd->device_index, subport_id, rv);
+
+ /* HQoS pipe */
+ for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++)
+ {
+ u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id;
+ u32 profile_id = hqos->pipe_map[pos];
+
+ rv =
+ rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id,
+ profile_id);
+ if (rv)
+ return clib_error_return (0,
+ "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)",
+ xd->device_index, subport_id, pipe_id,
+ rv);
+ }
+ }
+
+ /* Set up per-thread device data for the I/O TX thread */
+ xd->hqos_ht->hqos_burst_enq = hqos->burst_enq;
+ xd->hqos_ht->hqos_burst_deq = hqos->burst_deq;
+ vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1);
+ vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1);
+ xd->hqos_ht->pkts_enq_len = 0;
+ xd->hqos_ht->swq_pos = 0;
+ xd->hqos_ht->flush_count = 0;
+
+ /* Set up per-thread device data for each worker thread */
+ for (i = 0; i < worker_thread_count + 1; i++)
+ {
+ u32 tid;
+ if (i)
+ tid = worker_thread_first + (i - 1);
+ else
+ tid = i;
+
+ xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i];
+ xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos;
+ xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask;
+ xd->hqos_wt[tid].hqos_field0_slabshr =
+ __builtin_ctzll (hqos->pktfield0_slabmask);
+ xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos;
+ xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask;
+ xd->hqos_wt[tid].hqos_field1_slabshr =
+ __builtin_ctzll (hqos->pktfield1_slabmask);
+ xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos;
+ xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask;
+ xd->hqos_wt[tid].hqos_field2_slabshr =
+ __builtin_ctzll (hqos->pktfield2_slabmask);
+ memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table,
+ sizeof (hqos->tc_table));
+ }
+
+ return 0;
+}
+
+/***
+ *
+ * HQoS run-time
+ *
+ ***/
+/*
+ * dpdk_hqos_thread - Contains the main loop of an HQoS thread.
+ *
+ * w
+ * Information for the current thread
+ */
+static_always_inline void
+dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ u32 thread_index = vm->thread_index;
+ u32 dev_pos;
+
+ dev_pos = 0;
+ while (1)
+ {
+ vlib_worker_thread_barrier_check ();
+
+ u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
+ if (dev_pos >= n_devs)
+ dev_pos = 0;
+
+ dpdk_device_and_queue_t *dq =
+ vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
+
+ dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
+ u32 device_index = xd->device_index;
+ u16 queue_id = dq->queue_id;
+
+ struct rte_mbuf **pkts_enq = hqos->pkts_enq;
+ u32 pkts_enq_len = hqos->pkts_enq_len;
+ u32 swq_pos = hqos->swq_pos;
+ u32 n_swq = vec_len (hqos->swq), i;
+ u32 flush_count = hqos->flush_count;
+
+ for (i = 0; i < n_swq; i++)
+ {
+ /* Get current SWQ for this device */
+ struct rte_ring *swq = hqos->swq[swq_pos];
+
+ /* Read SWQ burst to packet buffer of this device */
+ pkts_enq_len += rte_ring_sc_dequeue_burst (swq,
+ (void **)
+ &pkts_enq[pkts_enq_len],
+ hqos->hqos_burst_enq, 0);
+
+ /* Get next SWQ for this device */
+ swq_pos++;
+ if (swq_pos >= n_swq)
+ swq_pos = 0;
+ hqos->swq_pos = swq_pos;
+
+ /* HWQ TX enqueue when burst available */
+ if (pkts_enq_len >= hqos->hqos_burst_enq)
+ {
+ u32 n_pkts = rte_eth_tx_burst (device_index,
+ (uint16_t) queue_id,
+ pkts_enq,
+ (uint16_t) pkts_enq_len);
+
+ for (; n_pkts < pkts_enq_len; n_pkts++)
+ rte_pktmbuf_free (pkts_enq[n_pkts]);
+
+ pkts_enq_len = 0;
+ flush_count = 0;
+ break;
+ }
+ }
+ if (pkts_enq_len)
+ {
+ flush_count++;
+ if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD))
+ {
+ rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
+
+ pkts_enq_len = 0;
+ flush_count = 0;
+ }
+ }
+ hqos->pkts_enq_len = pkts_enq_len;
+ hqos->flush_count = flush_count;
+
+ /* Advance to next device */
+ dev_pos++;
+ }
+}
+
+static_always_inline void
+dpdk_hqos_thread_internal (vlib_main_t * vm)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ u32 thread_index = vm->thread_index;
+ u32 dev_pos;
+
+ dev_pos = 0;
+ while (1)
+ {
+ vlib_worker_thread_barrier_check ();
+
+ u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
+ if (PREDICT_FALSE (n_devs == 0))
+ {
+ dev_pos = 0;
+ continue;
+ }
+ if (dev_pos >= n_devs)
+ dev_pos = 0;
+
+ dpdk_device_and_queue_t *dq =
+ vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
+
+ dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
+ u32 device_index = xd->device_index;
+ u16 queue_id = dq->queue_id;
+
+ struct rte_mbuf **pkts_enq = hqos->pkts_enq;
+ struct rte_mbuf **pkts_deq = hqos->pkts_deq;
+ u32 pkts_enq_len = hqos->pkts_enq_len;
+ u32 swq_pos = hqos->swq_pos;
+ u32 n_swq = vec_len (hqos->swq), i;
+ u32 flush_count = hqos->flush_count;
+
+ /*
+ * SWQ dequeue and HQoS enqueue for current device
+ */
+ for (i = 0; i < n_swq; i++)
+ {
+ /* Get current SWQ for this device */
+ struct rte_ring *swq = hqos->swq[swq_pos];
+
+ /* Read SWQ burst to packet buffer of this device */
+ pkts_enq_len += rte_ring_sc_dequeue_burst (swq,
+ (void **)
+ &pkts_enq[pkts_enq_len],
+ hqos->hqos_burst_enq, 0);
+
+ /* Get next SWQ for this device */
+ swq_pos++;
+ if (swq_pos >= n_swq)
+ swq_pos = 0;
+ hqos->swq_pos = swq_pos;
+
+ /* HQoS enqueue when burst available */
+ if (pkts_enq_len >= hqos->hqos_burst_enq)
+ {
+ rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
+
+ pkts_enq_len = 0;
+ flush_count = 0;
+ break;
+ }
+ }
+ if (pkts_enq_len)
+ {
+ flush_count++;
+ if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD))
+ {
+ rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
+
+ pkts_enq_len = 0;
+ flush_count = 0;
+ }
+ }
+ hqos->pkts_enq_len = pkts_enq_len;
+ hqos->flush_count = flush_count;
+
+ /*
+ * HQoS dequeue and HWQ TX enqueue for current device
+ */
+ {
+ u32 pkts_deq_len, n_pkts;
+
+ pkts_deq_len = rte_sched_port_dequeue (hqos->hqos,
+ pkts_deq,
+ hqos->hqos_burst_deq);
+
+ for (n_pkts = 0; n_pkts < pkts_deq_len;)
+ n_pkts += rte_eth_tx_burst (device_index,
+ (uint16_t) queue_id,
+ &pkts_deq[n_pkts],
+ (uint16_t) (pkts_deq_len - n_pkts));
+ }
+
+ /* Advance to next device */
+ dev_pos++;
+ }
+}
+
+void
+dpdk_hqos_thread (vlib_worker_thread_t * w)
+{
+ vlib_main_t *vm;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ dpdk_main_t *dm = &dpdk_main;
+
+ vm = vlib_get_main ();
+
+ ASSERT (vm->thread_index == vlib_get_thread_index ());
+
+ clib_time_init (&vm->clib_time);
+ clib_mem_set_heap (w->thread_mheap);
+
+ /* Wait until the dpdk init sequence is complete */
+ while (tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+
+ if (vec_len (dm->devices_by_hqos_cpu[vm->thread_index]) == 0)
+ return
+ clib_error
+ ("current I/O TX thread does not have any devices assigned to it");
+
+ if (DPDK_HQOS_DBG_BYPASS)
+ dpdk_hqos_thread_internal_hqos_dbg_bypass (vm);
+ else
+ dpdk_hqos_thread_internal (vm);
+}
+
+void
+dpdk_hqos_thread_fn (void *arg)
+{
+ vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+ vlib_worker_thread_init (w);
+ dpdk_hqos_thread (w);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_THREAD (hqos_thread_reg, static) =
+{
+ .name = "hqos-threads",
+ .short_name = "hqos-threads",
+ .function = dpdk_hqos_thread_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * HQoS run-time code to be called by the worker threads
+ */
+#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \
+({ \
+ u64 slab = *((u64 *) &byte_array[slab_pos]); \
+ u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \
+ val; \
+})
+
+#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \
+ ((((u64) (queue)) & 0x3) | \
+ ((((u64) (traffic_class)) & 0x3) << 2) | \
+ ((((u64) (color)) & 0x3) << 4) | \
+ ((((u64) (subport)) & 0xFFFF) << 16) | \
+ ((((u64) (pipe)) & 0xFFFFFFFF) << 32))
+
+void
+dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos,
+ struct rte_mbuf **pkts, u32 n_pkts)
+{
+ u32 i;
+
+ for (i = 0; i < (n_pkts & (~0x3)); i += 4)
+ {
+ struct rte_mbuf *pkt0 = pkts[i];
+ struct rte_mbuf *pkt1 = pkts[i + 1];
+ struct rte_mbuf *pkt2 = pkts[i + 2];
+ struct rte_mbuf *pkt3 = pkts[i + 3];
+
+ u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *);
+ u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *);
+ u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *);
+ u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *);
+
+ u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos,
+ hqos->hqos_field0_slabmask,
+ hqos->hqos_field0_slabshr);
+ u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos,
+ hqos->hqos_field1_slabmask,
+ hqos->hqos_field1_slabshr);
+ u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos,
+ hqos->hqos_field2_slabmask,
+ hqos->hqos_field2_slabshr);
+ u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2;
+ u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3;
+
+ u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos,
+ hqos->hqos_field0_slabmask,
+ hqos->hqos_field0_slabshr);
+ u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos,
+ hqos->hqos_field1_slabmask,
+ hqos->hqos_field1_slabshr);
+ u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos,
+ hqos->hqos_field2_slabmask,
+ hqos->hqos_field2_slabshr);
+ u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2;
+ u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3;
+
+ u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos,
+ hqos->hqos_field0_slabmask,
+ hqos->hqos_field0_slabshr);
+ u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos,
+ hqos->hqos_field1_slabmask,
+ hqos->hqos_field1_slabshr);
+ u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos,
+ hqos->hqos_field2_slabmask,
+ hqos->hqos_field2_slabshr);
+ u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2;
+ u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3;
+
+ u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos,
+ hqos->hqos_field0_slabmask,
+ hqos->hqos_field0_slabshr);
+ u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos,
+ hqos->hqos_field1_slabmask,
+ hqos->hqos_field1_slabshr);
+ u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos,
+ hqos->hqos_field2_slabmask,
+ hqos->hqos_field2_slabshr);
+ u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2;
+ u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3;
+
+ u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport,
+ pkt0_pipe,
+ pkt0_tc,
+ pkt0_tc_q,
+ 0);
+ u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport,
+ pkt1_pipe,
+ pkt1_tc,
+ pkt1_tc_q,
+ 0);
+ u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport,
+ pkt2_pipe,
+ pkt2_tc,
+ pkt2_tc_q,
+ 0);
+ u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport,
+ pkt3_pipe,
+ pkt3_tc,
+ pkt3_tc_q,
+ 0);
+
+ pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF;
+ pkt0->hash.sched.hi = pkt0_sched >> 32;
+ pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF;
+ pkt1->hash.sched.hi = pkt1_sched >> 32;
+ pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF;
+ pkt2->hash.sched.hi = pkt2_sched >> 32;
+ pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF;
+ pkt3->hash.sched.hi = pkt3_sched >> 32;
+ }
+
+ for (; i < n_pkts; i++)
+ {
+ struct rte_mbuf *pkt = pkts[i];
+
+ u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *);
+
+ u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos,
+ hqos->hqos_field0_slabmask,
+ hqos->hqos_field0_slabshr);
+ u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos,
+ hqos->hqos_field1_slabmask,
+ hqos->hqos_field1_slabshr);
+ u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos,
+ hqos->hqos_field2_slabmask,
+ hqos->hqos_field2_slabshr);
+ u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2;
+ u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3;
+
+ u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport,
+ pkt_pipe,
+ pkt_tc,
+ pkt_tc_q,
+ 0);
+
+ pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF;
+ pkt->hash.sched.hi = pkt_sched >> 32;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/hqos/qos_doc.md b/src/plugins/dpdk/hqos/qos_doc.md
new file mode 100644
index 00000000..7c064246
--- /dev/null
+++ b/src/plugins/dpdk/hqos/qos_doc.md
@@ -0,0 +1,411 @@
+# QoS Hierarchical Scheduler {#qos_doc}
+
+The Quality-of-Service (QoS) scheduler performs egress-traffic management by
+prioritizing the transmission of the packets of different type services and
+subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can
+be enabled on one or more NIC output interfaces depending upon the
+requirement.
+
+
+## Overview
+
+The QoS schdeuler supports a number of scheduling and shaping levels which
+construct hierarchical-tree. The first level in the hierarchy is port (i.e.
+the physical interface) that constitutes the root node of the tree. The
+subsequent level is subport which represents the group of the
+users/subscribers. The individual user/subscriber is represented by the pipe
+at the next level. Each user can have different traffic type based on the
+criteria of specific loss rate, jitter, and latency. These traffic types are
+represented at the traffic-class level in the form of different traffic-
+classes. The last level contains number of queues which are grouped together
+to host the packets of the specific class type traffic.
+
+The QoS scheduler implementation requires flow classification, enqueue and
+dequeue operations. The flow classification is mandatory stage for HQoS where
+incoming packets are classified by mapping the packet fields information to
+5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and
+color) and storing that information in mbuf sched field. The enqueue operation
+uses this information to determine the queue for storing the packet, and at
+this stage, if the specific queue is full, QoS drops the packet. The dequeue
+operation consists of scheduling the packet based on its length and available
+credits, and handing over the scheduled packet to the output interface.
+
+For more information on QoS Scheduler, please refer DPDK Programmer's Guide-
+http://dpdk.org/doc/guides/prog_guide/qos_framework.html
+
+
+### QoS Schdeuler Parameters
+
+Following illustrates the default HQoS configuration for each 10GbE output
+port:
+
+Single subport (subport 0):
+ - Subport rate set to 100% of port rate
+ - Each of the 4 traffic classes has rate set to 100% of port rate
+
+4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+ - Pipe rate set to 1/4K of port rate
+ - Each of the 4 traffic classes has rate set to 100% of pipe rate
+ - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1
+
+
+#### Port configuration
+
+```
+port {
+ rate 1250000000 /* Assuming 10GbE port */
+ frame_overhead 24 /* Overhead fields per Ethernet frame:
+ * 7B (Preamble) +
+ * 1B (Start of Frame Delimiter (SFD)) +
+ * 4B (Frame Check Sequence (FCS)) +
+ * 12B (Inter Frame Gap (IFG))
+ */
+ mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */
+ n_subports_per_port 1 /* Number of subports per output interface */
+ n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */
+ queue_sizes 64 64 64 64 /* Packet queue size for each traffic class.
+ * All queues within the same pipe traffic class
+ * have the same size. Queues from different
+ * pipes serving the same traffic class have
+ * the same size. */
+}
+```
+
+
+#### Subport configuration
+
+```
+subport 0 {
+ tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */
+ tb_size 1000000 /* Subport level token bucket size (bytes) */
+ tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */
+ tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */
+ tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */
+ tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */
+ tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */
+ pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */
+}
+```
+
+
+#### Pipe configuration
+
+```
+pipe_profile 0 {
+ tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */
+ tb_size 1000000 /* Pipe level token bucket size (bytes) */
+ tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */
+ tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */
+ tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */
+ tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */
+ tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */
+ tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */
+ tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */
+ tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */
+ tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */
+ tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */
+}
+```
+
+
+#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red)
+
+```
+red {
+ tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */
+ tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */
+ tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */
+ tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */
+ tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */
+ tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */
+ tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */
+ tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */
+ tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */
+ tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */
+ tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */
+ tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */
+ tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */
+ tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */
+ tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */
+ tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */
+}
+```
+
+
+### DPDK QoS Scheduler Integration in VPP
+
+The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as
+part of the logical NIC output interface. To enable HQoS on specific output
+interface, vpp startup.conf file has to be configured accordingly. The output
+interface that requires HQoS, should have "hqos" parameter specified in dpdk
+section. Another optional parameter "hqos-thread" has been defined which can
+be used to associate the output interface with specific hqos thread. In cpu
+section of the config file, "corelist-hqos-threads" is introduced to assign
+logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS
+objects each associated with different output interfaces. All worker threads
+instead of writing packets to NIC TX queue directly, write the packets to a
+software queues. The hqos_threads read the software queues, and enqueue the
+packets to HQoS objects, as well as dequeue packets from HQOS objects and
+write them to NIC output interfaces. The worker threads need to be able to
+send the packets to any output interface, therefore, each HQoS object
+associated with NIC output interface should have software queues equal to
+worker threads count.
+
+Following illustrates the sample startup configuration file with 4x worker
+threads feeding 2x hqos threads that handle each QoS scheduler for 1x output
+interface.
+
+```
+dpdk {
+ socket-mem 16384,16384
+
+ dev 0000:02:00.0 {
+ num-rx-queues 2
+ hqos
+ }
+ dev 0000:06:00.0 {
+ num-rx-queues 2
+ hqos
+ }
+
+ num-mbufs 1000000
+}
+
+cpu {
+ main-core 0
+ corelist-workers 1, 2, 3, 4
+ corelist-hqos-threads 5, 6
+}
+```
+
+
+### QoS scheduler CLI Commands
+
+Each QoS scheduler instance is initialised with default parameters required to
+configure hqos port, subport, pipe and queues. Some of the parameters can be
+re-configured in run-time through CLI commands.
+
+
+#### Configuration
+
+Following commands can be used to configure QoS scheduler parameters.
+
+The command below can be used to set the subport level parameters such as
+token bucket rate (bytes per seconds), token bucket size (bytes), traffic
+class rates (bytes per seconds) and token update period (Milliseconds).
+
+```
+set dpdk interface hqos subport <interface> subport <subport_id> [rate <n>]
+ [bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]
+```
+
+For setting the pipe profile, following command can be used.
+
+```
+set dpdk interface hqos pipe <interface> subport <subport_id> pipe <pipe_id>
+ profile <profile_id>
+```
+
+To assign QoS scheduler instance to the specific thread, following command can
+be used.
+
+```
+set dpdk interface hqos placement <interface> thread <n>
+```
+
+The command below is used to set the packet fields required for classifiying
+the incoming packet. As a result of classification process, packet field
+information will be mapped to 5 tuples (subport, pipe, traffic class, pipe,
+color) and stored in packet mbuf.
+
+```
+set dpdk interface hqos pktfield <interface> id subport|pipe|tc offset <n>
+ mask <hex-mask>
+```
+
+The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below;
+
+```
+set dpdk interface hqos tctbl <interface> entry <map_val> tc <tc_id> queue <queue_id>
+```
+
+
+#### Show Command
+
+The QoS Scheduler configuration can displayed using the command below.
+
+```
+ vpp# show dpdk interface hqos TenGigabitEthernet2/0/0
+ Thread:
+ Input SWQ size = 4096 packets
+ Enqueue burst size = 256 packets
+ Dequeue burst size = 220 packets
+ Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport)
+ Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe)
+ Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc)
+ Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)
+ [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
+ Port:
+ Rate = 1250000000 bytes/second
+ MTU = 1514 bytes
+ Frame overhead = 24 bytes
+ Number of subports = 1
+ Number of pipes per subport = 4096
+ Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets
+ Number of pipe profiles = 1
+ Subport 0:
+ Rate = 120000000 bytes/second
+ Token bucket size = 1000000 bytes
+ Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second
+ TC period = 10 milliseconds
+ Pipe profile 0:
+ Rate = 305175 bytes/second
+ Token bucket size = 1000000 bytes
+ Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second
+ TC period = 40 milliseconds
+ TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+ TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
+```
+
+The QoS Scheduler placement over the logical cpu cores can be displayed using
+below command.
+
+```
+ vpp# show dpdk interface hqos placement
+ Thread 5 (vpp_hqos-threads_0 at lcore 5):
+ TenGigabitEthernet2/0/0 queue 0
+ Thread 6 (vpp_hqos-threads_1 at lcore 6):
+ TenGigabitEthernet4/0/1 queue 0
+```
+
+
+### QoS Scheduler Binary APIs
+
+This section explans the available binary APIs for configuring QoS scheduler
+parameters in run-time.
+
+The following API can be used to set the pipe profile of a pipe that belongs
+to a given subport:
+
+```
+sw_interface_set_dpdk_hqos_pipe rx <intfc> | sw_if_index <id>
+ subport <subport-id> pipe <pipe-id> profile <profile-id>
+```
+
+The data structures used for set the pipe profile parameter are as follows;
+
+```
+ /** \\brief DPDK interface HQoS pipe profile set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param subport - subport ID
+ @param pipe - pipe ID within its subport
+ @param profile - pipe profile ID
+ */
+ define sw_interface_set_dpdk_hqos_pipe {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 subport;
+ u32 pipe;
+ u32 profile;
+ };
+
+ /** \\brief DPDK interface HQoS pipe profile set reply
+ @param context - sender context, to match reply w/ request
+ @param retval - request return code
+ */
+ define sw_interface_set_dpdk_hqos_pipe_reply {
+ u32 context;
+ i32 retval;
+ };
+```
+
+The following API can be used to set the subport level parameters, for
+example- token bucket rate (bytes per seconds), token bucket size (bytes),
+traffic class rate (bytes per seconds) and tokens update period.
+
+```
+sw_interface_set_dpdk_hqos_subport rx <intfc> | sw_if_index <id>
+ subport <subport-id> [rate <n>] [bktsize <n>]
+ [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]
+```
+
+The data structures used for set the subport level parameter are as follows;
+
+```
+ /** \\brief DPDK interface HQoS subport parameters set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param subport - subport ID
+ @param tb_rate - subport token bucket rate (measured in bytes/second)
+ @param tb_size - subport token bucket size (measured in credits)
+ @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second)
+ @param tc_period - enforcement period for rates (measured in milliseconds)
+ */
+ define sw_interface_set_dpdk_hqos_subport {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 subport;
+ u32 tb_rate;
+ u32 tb_size;
+ u32 tc_rate[4];
+ u32 tc_period;
+ };
+
+ /** \\brief DPDK interface HQoS subport parameters set reply
+ @param context - sender context, to match reply w/ request
+ @param retval - request return code
+ */
+ define sw_interface_set_dpdk_hqos_subport_reply {
+ u32 context;
+ i32 retval;
+ };
+```
+
+The following API can be used set the DSCP table entry. The DSCP table have
+64 entries to map the packet DSCP field onto traffic class and hqos input
+queue.
+
+```
+sw_interface_set_dpdk_hqos_tctbl rx <intfc> | sw_if_index <id>
+ entry <n> tc <n> queue <n>
+```
+
+The data structures used for setting DSCP table entries are given below.
+
+```
+ /** \\brief DPDK interface HQoS tctbl entry set request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param entry - entry index ID
+ @param tc - traffic class (0 .. 3)
+ @param queue - traffic class queue (0 .. 3)
+ */
+ define sw_interface_set_dpdk_hqos_tctbl {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 entry;
+ u32 tc;
+ u32 queue;
+ };
+
+ /** \\brief DPDK interface HQoS tctbl entry set reply
+ @param context - sender context, to match reply w/ request
+ @param retval - request return code
+ */
+ define sw_interface_set_dpdk_hqos_tctbl_reply {
+ u32 context;
+ i32 retval;
+ };
+```
diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c
new file mode 100644
index 00000000..a9cf2502
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/cli.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <dpdk/device/dpdk.h>
+#include <dpdk/ipsec/ipsec.h>
+
+static void
+dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
+{
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 i, skip_master;
+
+ if (!dcm->enabled)
+ {
+ vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n");
+ return;
+ }
+
+ if (detail_display)
+ vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n",
+ "cipher", "auth");
+ else
+ vlib_cli_output (vm, "worker\tcrypto device id(type)\n");
+
+ skip_master = vlib_num_workers () > 0;
+
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ uword key, data;
+ u32 thread_index = vlib_mains[i]->thread_index;
+ crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
+ u8 *s = 0;
+
+ if (skip_master)
+ {
+ skip_master = 0;
+ continue;
+ }
+
+ if (!detail_display)
+ {
+ i32 last_cdev = -1;
+ crypto_qp_data_t *qpd;
+
+ s = format (s, "%u\t", thread_index);
+
+ /* *INDENT-OFF* */
+ vec_foreach (qpd, cwm->qp_data)
+ {
+ u32 dev_id = qpd->dev_id;
+
+ if ((u16) last_cdev != dev_id)
+ {
+ struct rte_cryptodev_info cdev_info;
+
+ rte_cryptodev_info_get (dev_id, &cdev_info);
+
+ s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags &
+ RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW");
+ }
+ last_cdev = dev_id;
+ }
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "%s", s);
+ }
+ else
+ {
+ char cipher_str[15], auth_str[15];
+ struct rte_cryptodev_capabilities cap;
+ crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
+ /* *INDENT-OFF* */
+ hash_foreach (key, data, cwm->algo_qp_map,
+ ({
+ cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+#if DPDK_NO_AEAD
+ cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+ cap.sym.cipher.algo = p_key->cipher_algo;
+#else
+ if (p_key->is_aead)
+ {
+ cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AEAD;
+ cap.sym.aead.algo = p_key->cipher_algo;
+ }
+ else
+ {
+ cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+ cap.sym.cipher.algo = p_key->cipher_algo;
+ }
+#endif
+ check_algo_is_supported (&cap, cipher_str);
+
+ cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+ cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH;
+ cap.sym.auth.algo = p_key->auth_algo;
+ check_algo_is_supported (&cap, auth_str);
+
+ vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
+ vlib_mains[i]->thread_index, cipher_str, auth_str,
+ p_key->is_outbound ? "out" : "in",
+ cwm->qp_data[data].dev_id,
+ cwm->qp_data[data].qp_id);
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+}
+
+static clib_error_t *
+lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u16 detail = 0;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "verbose"))
+ detail = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ dpdk_ipsec_show_mapping (vm, detail);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to display the DPDK Crypto device data. See
+ * @ref dpdk_crypto_ipsec_doc for more details on initializing the
+ * DPDK Crypto device.
+ *
+ * @cliexpar
+ * Example of displaying the DPDK Crypto device data when disabled:
+ * @cliexstart{show crypto device mapping}
+ * DPDK Cryptodev support is disabled
+ * @cliexend
+ * Example of displaying the DPDK Crypto device data when enabled:
+ * @cliexstart{show crypto device mapping}
+ * worker crypto device id(type)
+ * 1 1(SW)
+ * 2 1(SW)
+ * @cliexend
+ * Example of displaying the DPDK Crypto device data when enabled with verbose:
+ * @cliexstart{show crypto device mapping verbose}
+ * worker cipher auth dir dev qp
+ * 1 AES_CTR AES-XCBC-MAC in 1 0
+ * 1 AES_CTR HMAC-SHA384 in 1 0
+ * 1 AES_CTR HMAC-SHA384 out 1 1
+ * 1 AES_CBC HMAC-SHA512 in 1 0
+ * 1 AES_CBC HMAC-SHA256 in 1 0
+ * 1 AES_CBC AES-XCBC-MAC out 1 1
+ * 1 AES_CTR AES-XCBC-MAC out 1 1
+ * 1 AES_CBC HMAC-SHA256 out 1 1
+ * 1 AES_CTR HMAC-SHA512 out 1 1
+ * 1 AES_CTR HMAC-SHA256 in 1 0
+ * 1 AES_CTR HMAC-SHA1 in 1 0
+ * 1 AES_CBC HMAC-SHA512 out 1 1
+ * 1 AES_CBC HMAC-SHA384 out 1 1
+ * 1 AES_CTR HMAC-SHA1 out 1 1
+ * 1 AES_CTR HMAC-SHA256 out 1 1
+ * 1 AES_CBC HMAC-SHA1 in 1 0
+ * 1 AES_CBC AES-XCBC-MAC in 1 0
+ * 1 AES_CTR HMAC-SHA512 in 1 0
+ * 1 AES_CBC HMAC-SHA1 out 1 1
+ * 1 AES_CBC HMAC-SHA384 in 1 0
+ * 2 AES_CTR AES-XCBC-MAC in 1 2
+ * 2 AES_CTR HMAC-SHA384 in 1 2
+ * 2 AES_CTR HMAC-SHA384 out 1 3
+ * 2 AES_CBC HMAC-SHA512 in 1 2
+ * 2 AES_CBC HMAC-SHA256 in 1 2
+ * 2 AES_CBC AES-XCBC-MAC out 1 3
+ * 2 AES_CTR AES-XCBC-MAC out 1 3
+ * 2 AES_CBC HMAC-SHA256 out 1 3
+ * 2 AES_CTR HMAC-SHA512 out 1 3
+ * 2 AES_CTR HMAC-SHA256 in 1 2
+ * 2 AES_CTR HMAC-SHA1 in 1 2
+ * 2 AES_CBC HMAC-SHA512 out 1 3
+ * 2 AES_CBC HMAC-SHA384 out 1 3
+ * 2 AES_CTR HMAC-SHA1 out 1 3
+ * 2 AES_CTR HMAC-SHA256 out 1 3
+ * 2 AES_CBC HMAC-SHA1 in 1 2
+ * 2 AES_CBC AES-XCBC-MAC in 1 2
+ * 2 AES_CTR HMAC-SHA512 in 1 2
+ * 2 AES_CBC HMAC-SHA1 out 1 3
+ * 2 AES_CBC HMAC-SHA384 in 1 2
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = {
+ .path = "show crypto device mapping",
+ .short_help =
+ "show cryptodev device mapping [verbose]",
+ .function = lcore_cryptodev_map_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c
new file mode 100644
index 00000000..a3c45902
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/crypto_node.c
@@ -0,0 +1,215 @@
+/*
+ *------------------------------------------------------------------
+ * crypto_node.c - DPDK Cryptodev input node
+ *
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ipsec/ipsec.h>
+
+#include <dpdk/device/dpdk.h>
+#include <dpdk/device/dpdk_priv.h>
+#include <dpdk/ipsec/ipsec.h>
+
+#define foreach_dpdk_crypto_input_next \
+ _(DROP, "error-drop") \
+ _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \
+ _(DECRYPT_POST, "dpdk-esp-decrypt-post")
+
+typedef enum
+{
+#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f,
+ foreach_dpdk_crypto_input_next
+#undef _
+ DPDK_CRYPTO_INPUT_N_NEXT,
+} dpdk_crypto_input_next_t;
+
+#define foreach_dpdk_crypto_input_error \
+ _(DQ_COPS, "Crypto ops dequeued") \
+ _(COP_FAILED, "Crypto op failed")
+
+typedef enum
+{
+#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f,
+ foreach_dpdk_crypto_input_error
+#undef _
+ DPDK_CRYPTO_INPUT_N_ERROR,
+} dpdk_crypto_input_error_t;
+
+static char *dpdk_crypto_input_error_strings[] = {
+#define _(n, s) s,
+ foreach_dpdk_crypto_input_error
+#undef _
+};
+
+vlib_node_registration_t dpdk_crypto_input_node;
+
+typedef struct
+{
+ u32 cdev;
+ u32 qp;
+ u32 status;
+ u32 sa_idx;
+ u32 next_index;
+} dpdk_crypto_input_trace_t;
+
+static u8 *
+format_dpdk_crypto_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *);
+
+ s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d",
+ t->cdev, t->qp, t->next_index);
+
+ s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx);
+
+ return s;
+}
+
+static_always_inline u32
+dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
+ crypto_qp_data_t * qpd)
+{
+ u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index;
+ struct rte_crypto_op **cops = qpd->cops;
+
+ if (qpd->inflights == 0)
+ return 0;
+
+ if (qpd->is_outbound)
+ def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST;
+ else
+ def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST;
+
+ n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id,
+ cops, VLIB_FRAME_SIZE);
+ n_deq = n_cops;
+ next_index = def_next_index;
+
+ qpd->inflights -= n_cops;
+ ASSERT (qpd->inflights >= 0);
+
+ while (n_cops > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_cops > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0 = 0;
+ struct rte_crypto_op *cop;
+ struct rte_crypto_sym_op *sym_cop;
+
+ cop = cops[0];
+ cops += 1;
+ n_cops -= 1;
+ n_left_to_next -= 1;
+
+ next0 = def_next_index;
+
+ if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+ {
+ next0 = DPDK_CRYPTO_INPUT_NEXT_DROP;
+ vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
+ DPDK_CRYPTO_INPUT_ERROR_COP_FAILED,
+ 1);
+ }
+ cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+
+ sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
+ b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src);
+ bi0 = vlib_get_buffer_index (vm, b0);
+
+ to_next[0] = bi0;
+ to_next += 1;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_next_frame (vm, node, next0);
+ dpdk_crypto_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->cdev = qpd->dev_id;
+ tr->qp = qpd->qp_id;
+ tr->status = cop->status;
+ tr->next_index = next0;
+ tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ crypto_free_cop (qpd, qpd->cops, n_deq);
+
+ vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
+ DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq);
+ return n_deq;
+}
+
+static uword
+dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
+ crypto_qp_data_t *qpd;
+ u32 n_deq = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (qpd, cwm->qp_data)
+ n_deq += dpdk_crypto_dequeue(vm, node, qpd);
+ /* *INDENT-ON* */
+
+ return n_deq;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_crypto_input_node) =
+{
+ .function = dpdk_crypto_input_fn,
+ .name = "dpdk-crypto-input",
+ .format_trace = format_dpdk_crypto_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_errors = DPDK_CRYPTO_INPUT_N_ERROR,
+ .error_strings = dpdk_crypto_input_error_strings,
+ .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n,
+ foreach_dpdk_crypto_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/ipsec/dir.dox b/src/plugins/dpdk/ipsec/dir.dox
new file mode 100644
index 00000000..05504541
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/dir.dox
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir src/plugins/dpdk/ipsec
+@brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API.
+
+This directory contains the source code for the DPDK Crypto abstraction layer.
+
+*/
+/*? %%clicmd:group_label DPDK Crypto %% ?*/
+/*? %%syscfg:group_label DPDK Crypto %% ?*/
diff --git a/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md
new file mode 100644
index 00000000..8ea77a6c
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md
@@ -0,0 +1,87 @@
+# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc}
+
+This document is meant to contain all related information about implementation and usability.
+
+
+## VPP IPsec with DPDK Cryptodev
+
+DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)).
+
+When there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing the default next nodes.
+
+The following nodes are added:
+* dpdk-crypto-input : polling input node, dequeuing from crypto devices.
+* dpdk-esp-encrypt : internal node.
+* dpdk-esp-decrypt : internal node.
+* dpdk-esp-encrypt-post : internal node.
+* dpdk-esp-decrypt-post : internal node.
+
+Set new default next nodes:
+* for esp encryption: esp-encrypt -> dpdk-esp-encrypt
+* for esp decryption: esp-decrypt -> dpdk-esp-decrypt
+
+
+### How to enable VPP IPSec with DPDK Cryptodev support
+
+When building DPDK with VPP, Cryptodev support is always enabled.
+
+Additionally, on x86_64 platforms, DPDK is built with SW crypto support.
+
+
+### Crypto Resources allocation
+
+VPP allocates crypto resources based on a best effort approach:
+* first allocate Hardware crypto resources, then Software.
+* if there are not enough crypto resources for all workers, the graph node is not modifed and the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed:
+
+ 0: dpdk_ipsec_init: not enough Cryptodevs, default to OpenSSL IPsec
+
+
+### Configuration example
+
+To enable DPDK Cryptodev the user just need to provide cryptodevs in the startup.conf.
+
+Below is an example startup.conf, it is not meant to be a default configuration:
+
+```
+dpdk {
+ dev 0000:81:00.0
+ dev 0000:81:00.1
+ dev 0000:85:01.0
+ dev 0000:85:01.1
+ vdev crypto_aesni_mb0,socket_id=1
+ vdev crypto_aesni_mb1,socket_id=1
+}
+```
+
+In the above configuration:
+* 0000:81:01.0 and 0000:81:01.1 are Ethernet device BDFs.
+* 0000:85:01.0 and 0000:85:01.1 are Crypto device BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options.
+* Two AESNI-MB Software (Virtual) Cryptodev PMDs are created in NUMA node 1.
+
+For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html)
+
+### Operational data
+
+The following CLI command displays the Cryptodev/Worker mapping:
+
+ show crypto device mapping [verbose]
+
+
+### nasm
+
+Building the DPDK Crypto Libraries requires the open source project nasm (The Netwide
+Assembler) to be installed. Recommended version of nasm is 2.12.02. Minimum supported
+version of nasm is 2.11.06. Use the following command to determine the current nasm version:
+
+ nasm -v
+
+CentOS 7.3 and earlier and Fedora 21 and earlier use unsupported versions
+of nasm. Use the following set of commands to build a supported version:
+
+ wget http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2
+ tar -xjvf nasm-2.12.02.tar.bz2
+ cd nasm-2.12.02/
+ ./configure
+ make
+ sudo make install
diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h
new file mode 100644
index 00000000..51224d6c
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/esp.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __DPDK_ESP_H__
+#define __DPDK_ESP_H__
+
+#include <dpdk/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+typedef struct
+{
+ enum rte_crypto_cipher_algorithm algo;
+#if ! DPDK_NO_AEAD
+ enum rte_crypto_aead_algorithm aead_algo;
+#endif
+ u8 key_len;
+ u8 iv_len;
+} dpdk_esp_crypto_alg_t;
+
+typedef struct
+{
+ enum rte_crypto_auth_algorithm algo;
+ u8 trunc_size;
+} dpdk_esp_integ_alg_t;
+
+typedef struct
+{
+ dpdk_esp_crypto_alg_t *esp_crypto_algs;
+ dpdk_esp_integ_alg_t *esp_integ_algs;
+} dpdk_esp_main_t;
+
+dpdk_esp_main_t dpdk_esp_main;
+
+static_always_inline void
+dpdk_esp_init ()
+{
+ dpdk_esp_main_t *em = &dpdk_esp_main;
+ dpdk_esp_integ_alg_t *i;
+ dpdk_esp_crypto_alg_t *c;
+
+ vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1);
+
+ c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128];
+ c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ c->key_len = 16;
+ c->iv_len = 16;
+
+ c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192];
+ c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ c->key_len = 24;
+ c->iv_len = 16;
+
+ c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256];
+ c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ c->key_len = 32;
+ c->iv_len = 16;
+
+ c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128];
+#if DPDK_NO_AEAD
+ c->algo = RTE_CRYPTO_CIPHER_AES_GCM;
+#else
+ c->aead_algo = RTE_CRYPTO_AEAD_AES_GCM;
+#endif
+ c->key_len = 16;
+ c->iv_len = 8;
+
+ vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1);
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96];
+ i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
+ i->trunc_size = 12;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96];
+ i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
+ i->trunc_size = 12;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128];
+ i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
+ i->trunc_size = 16;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192];
+ i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
+ i->trunc_size = 24;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256];
+ i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
+ i->trunc_size = 32;
+#if DPDK_NO_AEAD
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128];
+ i->algo = RTE_CRYPTO_AUTH_AES_GCM;
+ i->trunc_size = 16;
+#endif
+}
+
+static_always_inline int
+translate_crypto_algo (ipsec_crypto_alg_t crypto_algo,
+ struct rte_crypto_sym_xform *xform, u8 use_esn)
+{
+#if ! DPDK_NO_AEAD
+ const u16 iv_off =
+ sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op) +
+ offsetof (dpdk_cop_priv_t, cb);
+#endif
+
+ xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+
+ switch (crypto_algo)
+ {
+ case IPSEC_CRYPTO_ALG_NONE:
+#if ! DPDK_NO_AEAD
+ xform->cipher.iv.offset = iv_off;
+ xform->cipher.iv.length = 0;
+#endif
+ xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL;
+ break;
+ case IPSEC_CRYPTO_ALG_AES_CBC_128:
+ case IPSEC_CRYPTO_ALG_AES_CBC_192:
+ case IPSEC_CRYPTO_ALG_AES_CBC_256:
+#if ! DPDK_NO_AEAD
+ xform->cipher.iv.offset = iv_off;
+ xform->cipher.iv.length = 16;
+#endif
+ xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ break;
+ case IPSEC_CRYPTO_ALG_AES_GCM_128:
+#if DPDK_NO_AEAD
+ xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM;
+#else
+ xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
+ xform->aead.algo = RTE_CRYPTO_AEAD_AES_GCM;
+ xform->aead.iv.offset = iv_off;
+ xform->aead.iv.length = 12; /* GCM IV, not ESP IV */
+ xform->aead.digest_length = 16;
+ xform->aead.aad_length = use_esn ? 12 : 8;
+#endif
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static_always_inline int
+translate_integ_algo (ipsec_integ_alg_t integ_alg,
+ struct rte_crypto_sym_xform *auth_xform, u8 use_esn)
+{
+ auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+
+ switch (integ_alg)
+ {
+ case IPSEC_INTEG_ALG_NONE:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL;
+ auth_xform->auth.digest_length = 0;
+ break;
+ case IPSEC_INTEG_ALG_SHA1_96:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
+ auth_xform->auth.digest_length = 12;
+ break;
+ case IPSEC_INTEG_ALG_SHA_256_96:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
+ auth_xform->auth.digest_length = 12;
+ break;
+ case IPSEC_INTEG_ALG_SHA_256_128:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
+ auth_xform->auth.digest_length = 16;
+ break;
+ case IPSEC_INTEG_ALG_SHA_384_192:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
+ auth_xform->auth.digest_length = 24;
+ break;
+ case IPSEC_INTEG_ALG_SHA_512_256:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
+ auth_xform->auth.digest_length = 32;
+ break;
+#if DPDK_NO_AEAD
+ case IPSEC_INTEG_ALG_AES_GCM_128:
+ auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM;
+ auth_xform->auth.digest_length = 16;
+ auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8;
+ break;
+#endif
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static_always_inline i32
+create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess,
+ u8 is_outbound)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
+ struct rte_crypto_sym_xform cipher_xform = { 0 };
+ struct rte_crypto_sym_xform auth_xform = { 0 };
+ struct rte_crypto_sym_xform *xfs;
+ uword key = 0, *data;
+ crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
+#if ! DPDK_NO_AEAD
+ i32 socket_id = rte_socket_id ();
+ i32 ret;
+#endif
+
+ if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ {
+ sa->crypto_key_len -= 4;
+ clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4);
+ }
+ else
+ {
+ u32 seed = (u32) clib_cpu_time_now ();
+ sa->salt = random_u32 (&seed);
+ }
+
+ if (translate_crypto_algo (sa->crypto_alg, &cipher_xform, sa->use_esn) < 0)
+ return -1;
+ p_key->cipher_algo = cipher_xform.cipher.algo;
+
+ if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0)
+ return -1;
+ p_key->auth_algo = auth_xform.auth.algo;
+
+#if ! DPDK_NO_AEAD
+ if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ {
+ cipher_xform.aead.key.data = sa->crypto_key;
+ cipher_xform.aead.key.length = sa->crypto_key_len;
+
+ if (is_outbound)
+ cipher_xform.cipher.op =
+ (enum rte_crypto_cipher_operation) RTE_CRYPTO_AEAD_OP_ENCRYPT;
+ else
+ cipher_xform.cipher.op =
+ (enum rte_crypto_cipher_operation) RTE_CRYPTO_AEAD_OP_DECRYPT;
+ cipher_xform.next = NULL;
+ xfs = &cipher_xform;
+ p_key->is_aead = 1;
+ }
+ else /* Cipher + Auth */
+#endif
+ {
+ cipher_xform.cipher.key.data = sa->crypto_key;
+ cipher_xform.cipher.key.length = sa->crypto_key_len;
+
+ auth_xform.auth.key.data = sa->integ_key;
+ auth_xform.auth.key.length = sa->integ_key_len;
+
+ if (is_outbound)
+ {
+ cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+ auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
+ cipher_xform.next = &auth_xform;
+ xfs = &cipher_xform;
+ }
+ else
+ {
+ cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+ auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
+ auth_xform.next = &cipher_xform;
+ xfs = &auth_xform;
+ }
+ p_key->is_aead = 0;
+ }
+
+ p_key->is_outbound = is_outbound;
+
+ data = hash_get (cwm->algo_qp_map, key);
+ if (!data)
+ return -1;
+
+#if DPDK_NO_AEAD
+ sa_sess->sess =
+ rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs);
+ if (!sa_sess->sess)
+ return -1;
+#else
+ sa_sess->sess =
+ rte_cryptodev_sym_session_create (dcm->sess_h_pools[socket_id]);
+ if (!sa_sess->sess)
+ return -1;
+
+ ret =
+ rte_cryptodev_sym_session_init (cwm->qp_data[*data].dev_id, sa_sess->sess,
+ xfs, dcm->sess_pools[socket_id]);
+ if (ret)
+ return -1;
+#endif
+
+ sa_sess->qp_index = (u8) * data;
+
+ return 0;
+}
+
+static_always_inline void
+crypto_set_icb (dpdk_gcm_cnt_blk * icb, u32 salt, u32 seq, u32 seq_hi)
+{
+ icb->salt = salt;
+ icb->iv[0] = seq;
+ icb->iv[1] = seq_hi;
+#if DPDK_NO_AEAD
+ icb->cnt = clib_host_to_net_u32 (1);
+#endif
+}
+
+#define __unused __attribute__((unused))
+static_always_inline void
+crypto_op_setup (u8 is_aead, struct rte_mbuf *mb0,
+ struct rte_crypto_op *cop, void *session,
+ u32 cipher_off, u32 cipher_len,
+ u8 * icb __unused, u32 iv_size __unused,
+ u32 auth_off, u32 auth_len,
+ u8 * aad __unused, u32 aad_size __unused,
+ u8 * digest, u64 digest_paddr, u32 digest_size __unused)
+{
+ struct rte_crypto_sym_op *sym_cop;
+
+ sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
+
+ sym_cop->m_src = mb0;
+ rte_crypto_op_attach_sym_session (cop, session);
+
+#if DPDK_NO_AEAD
+ sym_cop->cipher.data.offset = cipher_off;
+ sym_cop->cipher.data.length = cipher_len;
+
+ sym_cop->cipher.iv.data = icb;
+ sym_cop->cipher.iv.phys_addr =
+ cop->phys_addr + (uintptr_t) icb - (uintptr_t) cop;
+ sym_cop->cipher.iv.length = iv_size;
+
+ if (is_aead)
+ {
+ sym_cop->auth.aad.data = aad;
+ sym_cop->auth.aad.phys_addr =
+ cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop;
+ sym_cop->auth.aad.length = aad_size;
+ }
+ else
+ {
+ sym_cop->auth.data.offset = auth_off;
+ sym_cop->auth.data.length = auth_len;
+ }
+
+ sym_cop->auth.digest.data = digest;
+ sym_cop->auth.digest.phys_addr = digest_paddr;
+ sym_cop->auth.digest.length = digest_size;
+#else /* ! DPDK_NO_AEAD */
+ if (is_aead)
+ {
+ sym_cop->aead.data.offset = cipher_off;
+ sym_cop->aead.data.length = cipher_len;
+
+ sym_cop->aead.aad.data = aad;
+ sym_cop->aead.aad.phys_addr =
+ cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop;
+
+ sym_cop->aead.digest.data = digest;
+ sym_cop->aead.digest.phys_addr = digest_paddr;
+ }
+ else
+ {
+ sym_cop->cipher.data.offset = cipher_off;
+ sym_cop->cipher.data.length = cipher_len;
+
+ sym_cop->auth.data.offset = auth_off;
+ sym_cop->auth.data.length = auth_len;
+
+ sym_cop->auth.digest.data = digest;
+ sym_cop->auth.digest.phys_addr = digest_paddr;
+ }
+#endif /* DPDK_NO_AEAD */
+}
+
+#undef __unused
+
+#endif /* __DPDK_ESP_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c
new file mode 100644
index 00000000..20936b36
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/esp_decrypt.c
@@ -0,0 +1,569 @@
+/*
+ * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev
+ *
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <dpdk/ipsec/ipsec.h>
+#include <dpdk/ipsec/esp.h>
+#include <dpdk/device/dpdk.h>
+#include <dpdk/device/dpdk_priv.h>
+
+#define foreach_esp_decrypt_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input")
+
+#define _(v, s) ESP_DECRYPT_NEXT_##v,
+typedef enum {
+ foreach_esp_decrypt_next
+#undef _
+ ESP_DECRYPT_N_NEXT,
+} esp_decrypt_next_t;
+
+#define foreach_esp_decrypt_error \
+ _(RX_PKTS, "ESP pkts received") \
+ _(DECRYPTION_FAILED, "ESP decryption failed") \
+ _(REPLAY, "SA replayed packet") \
+ _(NOT_IP, "Not IP packet (dropped)") \
+ _(ENQ_FAIL, "Enqueue failed (buffer full)") \
+ _(NO_CRYPTODEV, "Cryptodev not configured") \
+ _(BAD_LEN, "Invalid ciphertext length")
+
+
+typedef enum {
+#define _(sym,str) ESP_DECRYPT_ERROR_##sym,
+ foreach_esp_decrypt_error
+#undef _
+ ESP_DECRYPT_N_ERROR,
+} esp_decrypt_error_t;
+
+static char * esp_decrypt_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_decrypt_error
+#undef _
+};
+
+vlib_node_registration_t dpdk_esp_decrypt_node;
+
+typedef struct {
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+} esp_decrypt_trace_t;
+
+/* packet trace format function */
+static u8 * format_esp_decrypt_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *);
+
+ s = format (s, "esp: crypto %U integrity %U",
+ format_ipsec_crypto_alg, t->crypto_alg,
+ format_ipsec_integ_alg, t->integ_alg);
+ return s;
+}
+
+static uword
+dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next, next_index;
+ ipsec_main_t *im = &ipsec_main;
+ u32 thread_index = vlib_get_thread_index();
+ dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
+ dpdk_esp_main_t * em = &dpdk_esp_main;
+ u32 i;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ crypto_worker_main_t *cwm =
+ vec_elt_at_index(dcm->workers_main, thread_index);
+ u32 n_qps = vec_len(cwm->qp_data);
+ struct rte_crypto_op ** cops_to_enq[n_qps];
+ u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
+
+ for (i = 0; i < n_qps; i++)
+ {
+ bi_to_enq[i] = cwm->qp_data[i].bi;
+ cops_to_enq[i] = cwm->qp_data[i].cops;
+ }
+
+ memset(n_cop_qp, 0, n_qps * sizeof(u32));
+
+ crypto_alloc_cops();
+
+ next_index = ESP_DECRYPT_NEXT_DROP;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, sa_index0 = ~0, seq, trunc_size, iv_size;
+ vlib_buffer_t * b0;
+ esp_header_t * esp0;
+ ipsec_sa_t * sa0;
+ struct rte_mbuf * mb0 = 0;
+ const int BLOCK_SIZE = 16;
+ crypto_sa_session_t * sa_sess;
+ void * sess;
+ u16 qp_index;
+ struct rte_crypto_op * cop = 0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ esp0 = vlib_buffer_get_current (b0);
+
+ sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
+ sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+ seq = clib_host_to_net_u32(esp0->seq);
+
+ /* anti-replay check */
+ if (sa0->use_anti_replay)
+ {
+ int rv = 0;
+
+ if (PREDICT_TRUE(sa0->use_esn))
+ rv = esp_replay_check_esn(sa0, seq);
+ else
+ rv = esp_replay_check(sa0, seq);
+
+ if (PREDICT_FALSE(rv))
+ {
+ clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq);
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_REPLAY, 1);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ goto trace;
+ }
+ }
+
+ sa0->total_data_size += b0->current_length;
+
+ sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0);
+
+ if (PREDICT_FALSE(!sa_sess->sess))
+ {
+ int ret = create_sym_sess(sa0, sa_sess, 0);
+
+ if (PREDICT_FALSE (ret))
+ {
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ goto trace;
+ }
+ }
+
+ sess = sa_sess->sess;
+ qp_index = sa_sess->qp_index;
+
+ ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
+ cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
+ ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
+
+ cops_to_enq[qp_index][0] = cop;
+ cops_to_enq[qp_index] += 1;
+ n_cop_qp[qp_index] += 1;
+ bi_to_enq[qp_index][0] = bi0;
+ bi_to_enq[qp_index] += 1;
+
+ rte_crypto_op_attach_sym_session(cop, sess);
+
+ if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ trunc_size = 16;
+ else
+ trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+ iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+
+ /* Convert vlib buffer to mbuf */
+ mb0 = rte_mbuf_from_vlib_buffer(b0);
+ mb0->data_len = b0->current_length;
+ mb0->pkt_len = b0->current_length;
+ mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
+
+ /* Outer IP header has already been stripped */
+ u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) -
+ iv_size - trunc_size;
+
+ if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0))
+ {
+ clib_warning ("payload %u not multiple of %d\n",
+ payload_len, BLOCK_SIZE);
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_BAD_LEN, 1);
+ vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1);
+ bi_to_enq[qp_index] -= 1;
+ cops_to_enq[qp_index] -= 1;
+ n_cop_qp[qp_index] -= 1;
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ goto trace;
+ }
+
+ struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
+
+ u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128;
+ u32 cipher_off, cipher_len;
+ u32 auth_off = 0, auth_len = 0, aad_size = 0;
+ u8 *aad = NULL, *digest = NULL;
+ u64 digest_paddr;
+
+ u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t));
+ dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *)(sym_cop + 1);
+ dpdk_gcm_cnt_blk *icb = &priv->cb;
+
+ cipher_off = sizeof (esp_header_t) + iv_size;
+ cipher_len = payload_len;
+
+ digest =
+ vlib_buffer_get_current (b0) + sizeof(esp_header_t) +
+ iv_size + payload_len;
+
+ digest_paddr = mb0->buf_physaddr + (digest - (u8 *) mb0->buf_addr);
+
+ if (is_aead)
+ {
+ u32 *_iv = (u32 *) iv;
+
+ crypto_set_icb (icb, sa0->salt, _iv[0], _iv[1]);
+ iv_size = 16;
+
+ aad = priv->aad;
+ clib_memcpy(aad, esp0, 8);
+ aad_size = 8;
+ if (sa0->use_esn)
+ {
+ *((u32*)&aad[8]) = sa0->seq_hi;
+ aad_size = 12;
+ }
+ }
+ else
+ {
+ clib_memcpy(icb, iv, 16);
+
+ auth_off = 0;
+ auth_len = sizeof(esp_header_t) + iv_size + payload_len;
+
+ if (sa0->use_esn)
+ {
+ dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1);
+
+ clib_memcpy (priv->icv, digest, trunc_size);
+ *((u32*) digest) = sa0->seq_hi;
+ auth_len += sizeof(sa0->seq_hi);
+
+ digest = priv->icv;
+ digest_paddr =
+ cop->phys_addr + (uintptr_t) priv->icv - (uintptr_t) cop;
+ }
+ }
+
+ crypto_op_setup (is_aead, mb0, cop, sess,
+ cipher_off, cipher_len, (u8 *) icb, iv_size,
+ auth_off, auth_len, aad, aad_size,
+ digest, digest_paddr, trunc_size);
+trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+ crypto_qp_data_t *qpd;
+ /* *INDENT-OFF* */
+ vec_foreach_index (i, cwm->qp_data)
+ {
+ u32 enq;
+
+ if (!n_cop_qp[i])
+ continue;
+
+ qpd = vec_elt_at_index(cwm->qp_data, i);
+ enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
+ qpd->cops, n_cop_qp[i]);
+ qpd->inflights += enq;
+
+ if (PREDICT_FALSE(enq < n_cop_qp[i]))
+ {
+ crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
+ vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
+
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_ENQ_FAIL,
+ n_cop_qp[i] - enq);
+ }
+ }
+ /* *INDENT-ON* */
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = {
+ .function = dpdk_esp_decrypt_node_fn,
+ .name = "dpdk-esp-decrypt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_decrypt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
+ .error_strings = esp_decrypt_error_strings,
+
+ .n_next_nodes = ESP_DECRYPT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
+ foreach_esp_decrypt_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn)
+
+/*
+ * Decrypt Post Node
+ */
+
+#define foreach_esp_decrypt_post_error \
+ _(PKTS, "ESP post pkts")
+
+typedef enum {
+#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym,
+ foreach_esp_decrypt_post_error
+#undef _
+ ESP_DECRYPT_POST_N_ERROR,
+} esp_decrypt_post_error_t;
+
+static char * esp_decrypt_post_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_decrypt_post_error
+#undef _
+};
+
+vlib_node_registration_t dpdk_esp_decrypt_post_node;
+
+static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args)
+{
+ return s;
+}
+
+static uword
+dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next = 0, next_index;
+ ipsec_sa_t * sa0;
+ u32 sa_index0 = ~0;
+ ipsec_main_t *im = &ipsec_main;
+ dpdk_esp_main_t *em = &dpdk_esp_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ esp_footer_t * f0;
+ u32 bi0, next0, trunc_size, iv_size;
+ vlib_buffer_t * b0 = 0;
+ ip4_header_t *ih4 = 0, *oh4 = 0;
+ ip6_header_t *ih6 = 0, *oh6 = 0;
+ u8 tunnel_mode = 1;
+ u8 transport_ip6 = 0;
+
+ next0 = ESP_DECRYPT_NEXT_DROP;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
+ sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+ to_next[0] = bi0;
+ to_next += 1;
+
+ if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ trunc_size = 16;
+ else
+ trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+ iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+
+ if (sa0->use_anti_replay)
+ {
+ esp_header_t * esp0 = vlib_buffer_get_current (b0);
+ u32 seq;
+ seq = clib_host_to_net_u32(esp0->seq);
+ if (PREDICT_TRUE(sa0->use_esn))
+ esp_replay_advance_esn(sa0, seq);
+ else
+ esp_replay_advance(sa0, seq);
+ }
+
+ ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t));
+ vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size);
+
+ b0->current_length -= (trunc_size + 2);
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) +
+ b0->current_length);
+ b0->current_length -= f0->pad_length;
+
+ /* transport mode */
+ if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6))
+ {
+ tunnel_mode = 0;
+
+ if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40))
+ {
+ if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60))
+ transport_ip6 = 1;
+ else
+ {
+ clib_warning("next header: 0x%x", f0->next_header);
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_NOT_IP, 1);
+ goto trace;
+ }
+ }
+ }
+
+ if (PREDICT_TRUE (tunnel_mode))
+ {
+ if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP))
+ next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+ else if (f0->next_header == IP_PROTOCOL_IPV6)
+ next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
+ else
+ {
+ clib_warning("next header: 0x%x", f0->next_header);
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+ 1);
+ goto trace;
+ }
+ }
+ /* transport mode */
+ else
+ {
+ if (PREDICT_FALSE(transport_ip6))
+ {
+ ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t));
+ vlib_buffer_advance (b0, -sizeof(ip6_header_t));
+ oh6 = vlib_buffer_get_current (b0);
+ memmove(oh6, ih6, sizeof(ip6_header_t));
+
+ next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
+ oh6->protocol = f0->next_header;
+ oh6->payload_length =
+ clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain(vm, b0) -
+ sizeof (ip6_header_t));
+ }
+ else
+ {
+ vlib_buffer_advance (b0, -sizeof(ip4_header_t));
+ oh4 = vlib_buffer_get_current (b0);
+ memmove(oh4, ih4, sizeof(ip4_header_t));
+
+ next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+ oh4->ip_version_and_header_length = 0x45;
+ oh4->fragment_id = 0;
+ oh4->flags_and_fragment_offset = 0;
+ oh4->protocol = f0->next_header;
+ oh4->length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0));
+ oh4->checksum = ip4_header_checksum (oh4);
+ }
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
+
+trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index,
+ ESP_DECRYPT_POST_ERROR_PKTS,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = {
+ .function = dpdk_esp_decrypt_post_node_fn,
+ .name = "dpdk-esp-decrypt-post",
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_decrypt_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings),
+ .error_strings = esp_decrypt_post_error_strings,
+
+ .n_next_nodes = ESP_DECRYPT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
+ foreach_esp_decrypt_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn)
diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c
new file mode 100644
index 00000000..b4e29e91
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/esp_encrypt.c
@@ -0,0 +1,592 @@
+/*
+ * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev
+ *
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <dpdk/ipsec/ipsec.h>
+#include <dpdk/ipsec/esp.h>
+#include <dpdk/device/dpdk.h>
+#include <dpdk/device/dpdk_priv.h>
+
+#define foreach_esp_encrypt_next \
+_(DROP, "error-drop") \
+_(IP4_LOOKUP, "ip4-lookup") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(INTERFACE_OUTPUT, "interface-output")
+
+#define _(v, s) ESP_ENCRYPT_NEXT_##v,
+typedef enum
+{
+ foreach_esp_encrypt_next
+#undef _
+ ESP_ENCRYPT_N_NEXT,
+} esp_encrypt_next_t;
+
+#define foreach_esp_encrypt_error \
+ _(RX_PKTS, "ESP pkts received") \
+ _(SEQ_CYCLED, "sequence number cycled") \
+ _(ENQ_FAIL, "Enqueue failed (buffer full)") \
+ _(NO_CRYPTODEV, "Cryptodev not configured")
+
+
+typedef enum
+{
+#define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
+ foreach_esp_encrypt_error
+#undef _
+ ESP_ENCRYPT_N_ERROR,
+} esp_encrypt_error_t;
+
+static char *esp_encrypt_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_encrypt_error
+#undef _
+};
+
+vlib_node_registration_t dpdk_esp_encrypt_node;
+
+typedef struct
+{
+ u32 spi;
+ u32 seq;
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+} esp_encrypt_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_esp_encrypt_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
+
+ s = format (s, "esp: spi %u seq %u crypto %U integrity %U",
+ t->spi, t->seq,
+ format_ipsec_crypto_alg, t->crypto_alg,
+ format_ipsec_integ_alg, t->integ_alg);
+ return s;
+}
+
+static uword
+dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next, next_index;
+ ipsec_main_t *im = &ipsec_main;
+ u32 thread_index = vlib_get_thread_index ();
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ dpdk_esp_main_t *em = &dpdk_esp_main;
+ u32 i;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ crypto_worker_main_t *cwm =
+ vec_elt_at_index (dcm->workers_main, thread_index);
+ u32 n_qps = vec_len (cwm->qp_data);
+ struct rte_crypto_op **cops_to_enq[n_qps];
+ u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
+
+ for (i = 0; i < n_qps; i++)
+ {
+ bi_to_enq[i] = cwm->qp_data[i].bi;
+ cops_to_enq[i] = cwm->qp_data[i].cops;
+ }
+
+ memset (n_cop_qp, 0, n_qps * sizeof (u32));
+
+ crypto_alloc_cops ();
+
+ next_index = ESP_ENCRYPT_NEXT_DROP;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0 = 0;
+ u32 sa_index0;
+ ipsec_sa_t *sa0;
+ ip4_and_esp_header_t *ih0, *oh0 = 0;
+ ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
+ struct rte_mbuf *mb0 = 0;
+ esp_footer_t *f0;
+ u8 is_ipv6;
+ u8 ip_hdr_size;
+ u8 next_hdr_type;
+ u8 transport_mode = 0;
+ const int BLOCK_SIZE = 16;
+ u32 iv_size;
+ u16 orig_sz;
+ u8 trunc_size;
+ crypto_sa_session_t *sa_sess;
+ void *sess;
+ struct rte_crypto_op *cop = 0;
+ u16 qp_index;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
+ sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+ if (PREDICT_FALSE (esp_seq_advance (sa0)))
+ {
+ clib_warning ("sequence number counter has cycled SPI %u",
+ sa0->spi);
+ vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
+ //TODO: rekey SA
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ goto trace;
+ }
+
+ sa0->total_data_size += b0->current_length;
+
+ sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0);
+ if (PREDICT_FALSE (!sa_sess->sess))
+ {
+ int ret = create_sym_sess (sa0, sa_sess, 1);
+
+ if (PREDICT_FALSE (ret))
+ {
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ goto trace;
+ }
+ }
+
+ qp_index = sa_sess->qp_index;
+ sess = sa_sess->sess;
+
+ ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
+ cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
+ ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
+
+ cops_to_enq[qp_index][0] = cop;
+ cops_to_enq[qp_index] += 1;
+ n_cop_qp[qp_index] += 1;
+ bi_to_enq[qp_index][0] = bi0;
+ bi_to_enq[qp_index] += 1;
+
+ ssize_t adv;
+ iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+ if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ trunc_size = 16;
+ else
+ trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+
+ ih0 = vlib_buffer_get_current (b0);
+ orig_sz = b0->current_length;
+ is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60;
+ /* is ipv6 */
+ if (PREDICT_TRUE (sa0->is_tunnel))
+ {
+ if (PREDICT_TRUE (!is_ipv6))
+ adv = -sizeof (ip4_and_esp_header_t);
+ else
+ adv = -sizeof (ip6_and_esp_header_t);
+ }
+ else
+ {
+ adv = -sizeof (esp_header_t);
+ if (PREDICT_TRUE (!is_ipv6))
+ orig_sz -= sizeof (ip4_header_t);
+ else
+ orig_sz -= sizeof (ip6_header_t);
+ }
+
+ /*transport mode save the eth header before it is overwritten */
+ if (PREDICT_FALSE (!sa0->is_tunnel))
+ {
+ ethernet_header_t *ieh0 = (ethernet_header_t *)
+ ((u8 *) vlib_buffer_get_current (b0) -
+ sizeof (ethernet_header_t));
+ ethernet_header_t *oeh0 =
+ (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size));
+ clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t));
+ }
+
+ vlib_buffer_advance (b0, adv - iv_size);
+
+ /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */
+
+ /* is ipv6 */
+ if (PREDICT_FALSE (is_ipv6))
+ {
+ ih6_0 = (ip6_and_esp_header_t *) ih0;
+ ip_hdr_size = sizeof (ip6_header_t);
+ oh6_0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_TRUE (sa0->is_tunnel))
+ {
+ next_hdr_type = IP_PROTOCOL_IPV6;
+ oh6_0->ip6.ip_version_traffic_class_and_flow_label =
+ ih6_0->ip6.ip_version_traffic_class_and_flow_label;
+ }
+ else
+ {
+ next_hdr_type = ih6_0->ip6.protocol;
+ memmove (oh6_0, ih6_0, sizeof (ip6_header_t));
+ }
+
+ oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
+ oh6_0->ip6.hop_limit = 254;
+ oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi);
+ oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+ }
+ else
+ {
+ ip_hdr_size = sizeof (ip4_header_t);
+ oh0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_TRUE (sa0->is_tunnel))
+ {
+ next_hdr_type = IP_PROTOCOL_IP_IN_IP;
+ oh0->ip4.tos = ih0->ip4.tos;
+ }
+ else
+ {
+ next_hdr_type = ih0->ip4.protocol;
+ memmove (oh0, ih0, sizeof (ip4_header_t));
+ }
+
+ oh0->ip4.ip_version_and_header_length = 0x45;
+ oh0->ip4.fragment_id = 0;
+ oh0->ip4.flags_and_fragment_offset = 0;
+ oh0->ip4.ttl = 254;
+ oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
+ oh0->esp.spi = clib_net_to_host_u32 (sa0->spi);
+ oh0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+ }
+
+ if (PREDICT_TRUE
+ (!is_ipv6 && sa0->is_tunnel && !sa0->is_tunnel_ip6))
+ {
+ oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32;
+ oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32;
+
+ /* in tunnel mode send it back to FIB */
+ next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ }
+ else if (is_ipv6 && sa0->is_tunnel && sa0->is_tunnel_ip6)
+ {
+ oh6_0->ip6.src_address.as_u64[0] =
+ sa0->tunnel_src_addr.ip6.as_u64[0];
+ oh6_0->ip6.src_address.as_u64[1] =
+ sa0->tunnel_src_addr.ip6.as_u64[1];
+ oh6_0->ip6.dst_address.as_u64[0] =
+ sa0->tunnel_dst_addr.ip6.as_u64[0];
+ oh6_0->ip6.dst_address.as_u64[1] =
+ sa0->tunnel_dst_addr.ip6.as_u64[1];
+
+ /* in tunnel mode send it back to FIB */
+ next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ }
+ else
+ {
+ next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT;
+ transport_mode = 1;
+ }
+
+ int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE;
+
+ /* pad packet in input buffer */
+ u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz;
+ u8 i;
+ u8 *padding = vlib_buffer_get_current (b0) + b0->current_length;
+
+ for (i = 0; i < pad_bytes; ++i)
+ padding[i] = i + 1;
+
+ f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes;
+ f0->pad_length = pad_bytes;
+ f0->next_header = next_hdr_type;
+ b0->current_length += pad_bytes + 2 + trunc_size;
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ struct rte_crypto_sym_op *sym_cop;
+ sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
+
+ dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1);
+
+ vnet_buffer (b0)->unused[0] = next0;
+
+ mb0 = rte_mbuf_from_vlib_buffer (b0);
+ mb0->data_len = b0->current_length;
+ mb0->pkt_len = b0->current_length;
+ mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
+
+ dpdk_gcm_cnt_blk *icb = &priv->cb;
+
+ crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi);
+
+ u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128;
+ u32 cipher_off, cipher_len;
+ u32 auth_off = 0, auth_len = 0, aad_size = 0;
+ u8 *aad = NULL, *digest = NULL;
+ u64 digest_paddr;
+
+ digest =
+ vlib_buffer_get_current (b0) + b0->current_length - trunc_size;
+
+ digest_paddr = mb0->buf_physaddr + (digest - (u8 *) mb0->buf_addr);
+
+ if (is_aead)
+ {
+ u32 *esp_iv =
+ (u32 *) (b0->data + b0->current_data + ip_hdr_size +
+ sizeof (esp_header_t));
+ esp_iv[0] = sa0->seq;
+ esp_iv[1] = sa0->seq_hi;
+
+ cipher_off = ip_hdr_size + sizeof (esp_header_t) + iv_size;
+ cipher_len = BLOCK_SIZE * blocks;
+ iv_size = 16; /* GCM IV size, not ESP IV size */
+
+ aad = priv->aad;
+ clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size,
+ 8);
+ aad_size = 8;
+ if (PREDICT_FALSE (sa0->use_esn))
+ {
+ *((u32 *) & aad[8]) = sa0->seq_hi;
+ aad_size = 12;
+ }
+ }
+ else
+ {
+ cipher_off = ip_hdr_size + sizeof (esp_header_t);
+ cipher_len = BLOCK_SIZE * blocks + iv_size;
+
+ auth_off = ip_hdr_size;
+ auth_len = b0->current_length - ip_hdr_size - trunc_size;
+
+ if (PREDICT_FALSE (sa0->use_esn))
+ {
+ *((u32 *) digest) = sa0->seq_hi;
+ auth_len += sizeof (sa0->seq_hi);
+ }
+ }
+
+ crypto_op_setup (is_aead, mb0, cop, sess,
+ cipher_off, cipher_len, (u8 *) icb, iv_size,
+ auth_off, auth_len, aad, aad_size,
+ digest, digest_paddr, trunc_size);
+
+ if (PREDICT_FALSE (is_ipv6))
+ {
+ oh6_0->ip6.payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+ sizeof (ip6_header_t));
+ }
+ else
+ {
+ oh0->ip4.length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
+ }
+
+ if (transport_mode)
+ vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
+
+ trace:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ esp_encrypt_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->spi = sa0->spi;
+ tr->seq = sa0->seq - 1;
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+ crypto_qp_data_t *qpd;
+ /* *INDENT-OFF* */
+ vec_foreach_index (i, cwm->qp_data)
+ {
+ u32 enq;
+
+ if (!n_cop_qp[i])
+ continue;
+
+ qpd = vec_elt_at_index(cwm->qp_data, i);
+ enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
+ qpd->cops, n_cop_qp[i]);
+ qpd->inflights += enq;
+
+ if (PREDICT_FALSE(enq < n_cop_qp[i]))
+ {
+ crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
+ vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
+
+ vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_ENQ_FAIL,
+ n_cop_qp[i] - enq);
+ }
+ }
+ /* *INDENT-ON* */
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = {
+ .function = dpdk_esp_encrypt_node_fn,
+ .name = "dpdk-esp-encrypt",
+ .flags = VLIB_NODE_FLAG_IS_OUTPUT,
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_encrypt_trace,
+ .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
+ .error_strings = esp_encrypt_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [ESP_ENCRYPT_NEXT_DROP] = "error-drop",
+ }
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn)
+/*
+ * ESP Encrypt Post Node
+ */
+#define foreach_esp_encrypt_post_error \
+ _(PKTS, "ESP post pkts")
+ typedef enum
+ {
+#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym,
+ foreach_esp_encrypt_post_error
+#undef _
+ ESP_ENCRYPT_POST_N_ERROR,
+ } esp_encrypt_post_error_t;
+
+ static char *esp_encrypt_post_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_encrypt_post_error
+#undef _
+ };
+
+vlib_node_registration_t dpdk_esp_encrypt_post_node;
+
+static u8 *
+format_esp_encrypt_post_trace (u8 * s, va_list * args)
+{
+ return s;
+}
+
+static uword
+dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next = 0, next_index;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0 = 0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ to_next[0] = bi0;
+ to_next += 1;
+
+ next0 = vnet_buffer (b0)->unused[0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, bi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index,
+ ESP_ENCRYPT_POST_ERROR_PKTS,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = {
+ .function = dpdk_esp_encrypt_post_node_fn,
+ .name = "dpdk-esp-encrypt-post",
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_encrypt_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (esp_encrypt_post_error_strings),
+ .error_strings = esp_encrypt_post_error_strings,
+ .n_next_nodes = ESP_ENCRYPT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
+ foreach_esp_encrypt_next
+#undef _
+ }
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node,
+ dpdk_esp_encrypt_post_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
new file mode 100644
index 00000000..7783171f
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vnet/ipsec/ipsec.h>
+#include <vlib/node_funcs.h>
+
+#include <dpdk/device/dpdk.h>
+#include <dpdk/ipsec/ipsec.h>
+#include <dpdk/ipsec/esp.h>
+
+#define DPDK_CRYPTO_NB_SESS_OBJS 20000
+#define DPDK_CRYPTO_CACHE_SIZE 512
+#define DPDK_CRYPTO_PRIV_SIZE 128
+#define DPDK_CRYPTO_N_QUEUE_DESC 1024
+#define DPDK_CRYPTO_NB_COPS (1024 * 4)
+
+static int
+add_del_sa_sess (u32 sa_index, u8 is_add)
+{
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ crypto_worker_main_t *cwm;
+ u8 skip_master = vlib_num_workers () > 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (cwm, dcm->workers_main)
+ {
+ crypto_sa_session_t *sa_sess;
+ u8 is_outbound;
+
+ if (skip_master)
+ {
+ skip_master = 0;
+ continue;
+ }
+
+ for (is_outbound = 0; is_outbound < 2; is_outbound++)
+ {
+ if (is_add)
+ {
+ pool_get (cwm->sa_sess_d[is_outbound], sa_sess);
+ }
+ else
+ {
+ u8 dev_id;
+ i32 ret;
+
+ sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index);
+ dev_id = cwm->qp_data[sa_sess->qp_index].dev_id;
+
+ if (!sa_sess->sess)
+ continue;
+#if DPDK_NO_AEAD
+ ret = (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess) == NULL);
+ ASSERT (ret);
+#else
+ ret = rte_cryptodev_sym_session_clear(dev_id, sa_sess->sess);
+ ASSERT (!ret);
+
+ ret = rte_cryptodev_sym_session_free(sa_sess->sess);
+ ASSERT (!ret);
+#endif
+ memset(sa_sess, 0, sizeof(sa_sess[0]));
+ }
+ }
+ }
+ /* *INDENT-OFF* */
+
+ return 0;
+}
+
+static void
+update_qp_data (crypto_worker_main_t * cwm,
+ u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx)
+{
+ crypto_qp_data_t *qpd;
+
+ /* *INDENT-OFF* */
+ vec_foreach_index (*idx, cwm->qp_data)
+ {
+ qpd = vec_elt_at_index(cwm->qp_data, *idx);
+
+ if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id &&
+ qpd->is_outbound == is_outbound)
+ return;
+ }
+ /* *INDENT-ON* */
+
+ vec_add2_aligned (cwm->qp_data, qpd, 1, CLIB_CACHE_LINE_BYTES);
+
+ qpd->dev_id = cdev_id;
+ qpd->qp_id = qp_id;
+ qpd->is_outbound = is_outbound;
+}
+
+/*
+ * return:
+ * 0: already exist
+ * 1: mapped
+ */
+static int
+add_mapping (crypto_worker_main_t * cwm,
+ u8 cdev_id, u16 qp, u8 is_outbound,
+ const struct rte_cryptodev_capabilities *cipher_cap,
+ const struct rte_cryptodev_capabilities *auth_cap)
+{
+ u16 qp_index;
+ uword key = 0, data, *ret;
+ crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
+
+ p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo;
+ p_key->auth_algo = (u8) auth_cap->sym.auth.algo;
+ p_key->is_outbound = is_outbound;
+#if ! DPDK_NO_AEAD
+ p_key->is_aead = cipher_cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD;
+#endif
+
+ ret = hash_get (cwm->algo_qp_map, key);
+ if (ret)
+ return 0;
+
+ update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index);
+
+ data = (uword) qp_index;
+ hash_set (cwm->algo_qp_map, key, data);
+
+ return 1;
+}
+
+/*
+ * return:
+ * 0: already exist
+ * 1: mapped
+ */
+static int
+add_cdev_mapping (crypto_worker_main_t * cwm,
+ struct rte_cryptodev_info *dev_info, u8 cdev_id,
+ u16 qp, u8 is_outbound)
+{
+ const struct rte_cryptodev_capabilities *i, *j;
+ u32 mapped = 0;
+
+ for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++)
+ {
+#if ! DPDK_NO_AEAD
+ if (i->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD)
+ {
+ struct rte_cryptodev_capabilities none = { 0 };
+
+ if (check_algo_is_supported (i, NULL) != 0)
+ continue;
+
+ none.sym.auth.algo = RTE_CRYPTO_AUTH_NULL;
+
+ mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, &none);
+ continue;
+ }
+#endif
+ if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER)
+ continue;
+
+ if (check_algo_is_supported (i, NULL) != 0)
+ continue;
+
+ for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED;
+ j++)
+ {
+ if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH)
+ continue;
+
+ if (check_algo_is_supported (j, NULL) != 0)
+ continue;
+
+ mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j);
+ }
+ }
+
+ return mapped;
+}
+
+static int
+check_cryptodev_queues ()
+{
+ u32 n_qs = 0;
+ u8 cdev_id;
+ u32 n_req_qs = 2;
+
+ if (vlib_num_workers () > 0)
+ n_req_qs = vlib_num_workers () * 2;
+
+ for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++)
+ {
+ struct rte_cryptodev_info cdev_info;
+
+ rte_cryptodev_info_get (cdev_id, &cdev_info);
+
+ if (!
+ (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
+ continue;
+
+ n_qs += cdev_info.max_nb_queue_pairs;
+ }
+
+ if (n_qs >= n_req_qs)
+ return 0;
+ else
+ return -1;
+}
+
+static clib_error_t *
+dpdk_ipsec_check_support (ipsec_sa_t * sa)
+{
+ if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ {
+ if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
+ return clib_error_return (0, "unsupported integ-alg %U with "
+ "crypto-alg aes-gcm-128",
+ format_ipsec_integ_alg, sa->integ_alg);
+#if DPDK_NO_AEAD
+ sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128;
+#endif
+ }
+#if DPDK_NO_AEAD
+ else if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE ||
+ sa->integ_alg == IPSEC_INTEG_ALG_NONE ||
+ sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)
+#else
+ else if (sa->integ_alg == IPSEC_INTEG_ALG_NONE)
+#endif
+ return clib_error_return (0,
+ "unsupported integ-alg %U with crypto-alg %U",
+ format_ipsec_integ_alg, sa->integ_alg,
+ format_ipsec_crypto_alg, sa->crypto_alg);
+
+ return 0;
+}
+
+static uword
+dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ ipsec_main_t *im = &ipsec_main;
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ struct rte_cryptodev_config dev_conf;
+ struct rte_cryptodev_qp_conf qp_conf;
+ struct rte_cryptodev_info cdev_info;
+ struct rte_mempool *rmp;
+ i32 dev_id, ret;
+ u32 i, skip_master;
+#if ! DPDK_NO_AEAD
+ u32 max_sess_size = 0, sess_size;
+ i8 socket_id;
+#endif
+
+ if (check_cryptodev_queues () < 0)
+ {
+ clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec");
+ return 0;
+ }
+ dcm->enabled = 1;
+
+ vec_alloc (dcm->workers_main, tm->n_vlib_mains);
+ _vec_len (dcm->workers_main) = tm->n_vlib_mains;
+
+ skip_master = vlib_num_workers () > 0;
+
+ fprintf (stdout, "DPDK Cryptodevs info:\n");
+ fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n");
+ /* HW cryptodevs have higher dev_id, use HW first */
+ for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--)
+ {
+ u16 max_nb_qp, qp = 0;
+
+ rte_cryptodev_info_get (dev_id, &cdev_info);
+
+ if (!
+ (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
+ continue;
+
+ max_nb_qp = cdev_info.max_nb_queue_pairs;
+
+ for (i = skip_master; i < tm->n_vlib_mains; i++)
+ {
+ u8 is_outbound;
+ crypto_worker_main_t *cwm;
+ uword *map;
+
+ cwm = vec_elt_at_index (dcm->workers_main, i);
+ map = cwm->algo_qp_map;
+
+ if (!map)
+ {
+ map = hash_create (0, sizeof (crypto_worker_qp_key_t));
+ if (!map)
+ {
+ clib_warning ("unable to create hash table for worker %u",
+ vlib_mains[i]->thread_index);
+ goto error;
+ }
+ cwm->algo_qp_map = map;
+ }
+
+ for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp;
+ is_outbound++)
+ qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound);
+ }
+
+ if (qp == 0)
+ continue;
+
+ dev_conf.socket_id = rte_cryptodev_socket_id (dev_id);
+ dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs;
+#if DPDK_NO_AEAD
+ dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS;
+ dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE;
+#endif
+ ret = rte_cryptodev_configure (dev_id, &dev_conf);
+ if (ret < 0)
+ {
+ clib_warning ("cryptodev %u config error", dev_id);
+ goto error;
+ }
+
+ qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC;
+ for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++)
+ {
+#if DPDK_NO_AEAD
+ ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf,
+ dev_conf.socket_id);
+#else
+ ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf,
+ dev_conf.socket_id, NULL);
+#endif
+ if (ret < 0)
+ {
+ clib_warning ("cryptodev %u qp %u setup error", dev_id, qp);
+ goto error;
+ }
+ }
+ vec_validate (dcm->cop_pools, dev_conf.socket_id);
+
+#if ! DPDK_NO_AEAD
+ sess_size = rte_cryptodev_get_private_session_size (dev_id);
+ if (sess_size > max_sess_size)
+ max_sess_size = sess_size;
+#endif
+
+ if (!vec_elt (dcm->cop_pools, dev_conf.socket_id))
+ {
+ u8 *pool_name = format (0, "crypto_op_pool_socket%u%c",
+ dev_conf.socket_id, 0);
+
+ rmp = rte_crypto_op_pool_create ((char *) pool_name,
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+ DPDK_CRYPTO_NB_COPS *
+ (1 + vlib_num_workers ()),
+ DPDK_CRYPTO_CACHE_SIZE,
+ DPDK_CRYPTO_PRIV_SIZE,
+ dev_conf.socket_id);
+
+ if (!rmp)
+ {
+ clib_warning ("failed to allocate %s", pool_name);
+ vec_free (pool_name);
+ goto error;
+ }
+ vec_free (pool_name);
+ vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp;
+ }
+
+ fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs,
+ DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE);
+ }
+
+#if ! DPDK_NO_AEAD
+ /* *INDENT-OFF* */
+ vec_foreach_index (socket_id, dcm->cop_pools)
+ {
+ u8 *pool_name;
+
+ if (!vec_elt (dcm->cop_pools, socket_id))
+ continue;
+
+ vec_validate (dcm->sess_h_pools, socket_id);
+ pool_name = format (0, "crypto_sess_h_socket%u%c",
+ socket_id, 0);
+ rmp =
+ rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS,
+ rte_cryptodev_get_header_session_size (),
+ 512, 0, NULL, NULL, NULL, NULL,
+ socket_id, 0);
+ if (!rmp)
+ {
+ clib_warning ("failed to allocate %s", pool_name);
+ vec_free (pool_name);
+ goto error;
+ }
+ vec_free (pool_name);
+ vec_elt (dcm->sess_h_pools, socket_id) = rmp;
+
+ vec_validate (dcm->sess_pools, socket_id);
+ pool_name = format (0, "crypto_sess_socket%u%c",
+ socket_id, 0);
+ rmp =
+ rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS,
+ max_sess_size, 512, 0, NULL, NULL, NULL, NULL,
+ socket_id, 0);
+ if (!rmp)
+ {
+ clib_warning ("failed to allocate %s", pool_name);
+ vec_free (pool_name);
+ goto error;
+ }
+ vec_free (pool_name);
+ vec_elt (dcm->sess_pools, socket_id) = rmp;
+ }
+ /* *INDENT-ON* */
+#endif
+
+ dpdk_esp_init ();
+
+ /* Add new next node and set as default */
+ vlib_node_t *node, *next_node;
+
+ next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt");
+ ASSERT (next_node);
+ node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4");
+ ASSERT (node);
+ im->esp_encrypt_node_index = next_node->index;
+ im->esp_encrypt_next_index =
+ vlib_node_add_next (vm, node->index, next_node->index);
+
+ next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt");
+ ASSERT (next_node);
+ node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4");
+ ASSERT (node);
+ im->esp_decrypt_node_index = next_node->index;
+ im->esp_decrypt_next_index =
+ vlib_node_add_next (vm, node->index, next_node->index);
+
+ im->cb.check_support_cb = dpdk_ipsec_check_support;
+ im->cb.add_del_sa_sess_cb = add_del_sa_sess;
+
+ for (i = skip_master; i < tm->n_vlib_mains; i++)
+ vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ /* TODO cryptodev counters */
+
+ return 0;
+
+error:
+ ;
+ crypto_worker_main_t *cwm;
+ struct rte_mempool **mp;
+ /* *INDENT-OFF* */
+ vec_foreach (cwm, dcm->workers_main)
+ hash_free (cwm->algo_qp_map);
+
+ vec_foreach (mp, dcm->cop_pools)
+ {
+ if (mp)
+ rte_mempool_free (mp[0]);
+ }
+ /* *INDENT-ON* */
+ vec_free (dcm->workers_main);
+ vec_free (dcm->cop_pools);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = {
+ .function = dpdk_ipsec_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dpdk-ipsec-process",
+ .process_log2_n_stack_bytes = 17,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h
new file mode 100644
index 00000000..a94dd682
--- /dev/null
+++ b/src/plugins/dpdk/ipsec/ipsec.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2016 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __DPDK_IPSEC_H__
+#define __DPDK_IPSEC_H__
+
+#include <vnet/vnet.h>
+
+#undef always_inline
+#include <rte_config.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+
+#if CLIB_DEBUG > 0
+#define always_inline static inline
+#else
+#define always_inline static inline __attribute__ ((__always_inline__))
+#endif
+
+
+#define MAX_QP_PER_LCORE 16
+
+typedef struct
+{
+ u32 salt;
+ u32 iv[2];
+ u32 cnt;
+} dpdk_gcm_cnt_blk;
+
+typedef struct
+{
+ dpdk_gcm_cnt_blk cb;
+ union
+ {
+ u8 aad[12];
+ u8 icv[64];
+ };
+} dpdk_cop_priv_t;
+
+typedef struct
+{
+ u8 cipher_algo;
+ u8 auth_algo;
+ u8 is_outbound;
+ u8 is_aead;
+} crypto_worker_qp_key_t;
+
+typedef struct
+{
+ u16 dev_id;
+ u16 qp_id;
+ u16 is_outbound;
+ i16 inflights;
+ u32 bi[VLIB_FRAME_SIZE];
+ struct rte_crypto_op *cops[VLIB_FRAME_SIZE];
+ struct rte_crypto_op **free_cops;
+} crypto_qp_data_t;
+
+typedef struct
+{
+ u8 qp_index;
+ void *sess;
+} crypto_sa_session_t;
+
+typedef struct
+{
+ crypto_sa_session_t *sa_sess_d[2];
+ crypto_qp_data_t *qp_data;
+ uword *algo_qp_map;
+} crypto_worker_main_t;
+
+typedef struct
+{
+ struct rte_mempool **sess_h_pools;
+ struct rte_mempool **sess_pools;
+ struct rte_mempool **cop_pools;
+ crypto_worker_main_t *workers_main;
+ u8 enabled;
+} dpdk_crypto_main_t;
+
+dpdk_crypto_main_t dpdk_crypto_main;
+
+extern vlib_node_registration_t dpdk_crypto_input_node;
+
+#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3)
+
+static_always_inline void
+crypto_alloc_cops ()
+{
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+ u32 thread_index = vlib_get_thread_index ();
+ crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
+ unsigned socket_id = rte_socket_id ();
+ crypto_qp_data_t *qpd;
+
+ /* *INDENT-OFF* */
+ vec_foreach (qpd, cwm->qp_data)
+ {
+ u32 l = vec_len (qpd->free_cops);
+
+ if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
+ {
+ u32 n_alloc;
+
+ if (PREDICT_FALSE (!qpd->free_cops))
+ vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS);
+
+ n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id],
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+ &qpd->free_cops[l],
+ CRYPTO_N_FREE_COPS - l - 1);
+
+ _vec_len (qpd->free_cops) = l + n_alloc;
+ }
+ }
+ /* *INDENT-ON* */
+}
+
+static_always_inline void
+crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n)
+{
+ u32 l = vec_len (qpd->free_cops);
+
+ if (l + n >= CRYPTO_N_FREE_COPS)
+ {
+ l -= VLIB_FRAME_SIZE;
+ rte_mempool_put_bulk (cops[0]->mempool,
+ (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE);
+ }
+ clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n);
+
+ _vec_len (qpd->free_cops) = l + n;
+}
+
+static_always_inline int
+check_algo_is_supported (const struct rte_cryptodev_capabilities *cap,
+ char *name)
+{
+ struct
+ {
+ enum rte_crypto_sym_xform_type type;
+ union
+ {
+ enum rte_crypto_auth_algorithm auth;
+ enum rte_crypto_cipher_algorithm cipher;
+#if ! DPDK_NO_AEAD
+ enum rte_crypto_aead_algorithm aead;
+#endif
+ };
+ char *name;
+ } supported_algo[] =
+ {
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
+ RTE_CRYPTO_CIPHER_NULL,.name = "NULL"},
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
+ RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"},
+#if DPDK_NO_AEAD
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
+ RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"},
+#else
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AEAD,.aead =
+ RTE_CRYPTO_AEAD_AES_GCM,.name = "AES-GCM"},
+#endif
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_NULL,.name = "NULL"},
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"},
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"},
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"},
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"},
+#if DPDK_NO_AEAD
+ {
+ .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
+ RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"},
+#endif
+ {
+ /* tail */
+ .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED}
+ };
+
+ uint32_t i = 0;
+
+ if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+ return -1;
+
+ while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED)
+ {
+ if (cap->sym.xform_type == supported_algo[i].type)
+ {
+ if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
+ cap->sym.cipher.algo == supported_algo[i].cipher) ||
+#if ! DPDK_NO_AEAD
+ (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD &&
+ cap->sym.aead.algo == supported_algo[i].aead) ||
+#endif
+ (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH &&
+ cap->sym.auth.algo == supported_algo[i].auth))
+ {
+ if (name)
+ strcpy (name, supported_algo[i].name);
+ return 0;
+ }
+ }
+
+ i++;
+ }
+
+ return -1;
+}
+
+#endif /* __DPDK_IPSEC_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
new file mode 100644
index 00000000..f2f1ba22
--- /dev/null
+++ b/src/plugins/dpdk/main.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <dpdk/device/dpdk.h>
+#include <vpp/app/version.h>
+
+/*
+ * Called by the dpdk driver's rte_delay_us() function.
+ * Return 0 to have the dpdk do a regular delay loop.
+ * Return 1 if to skip the delay loop because we are suspending
+ * the calling vlib process instead.
+ */
+static int
+rte_delay_us_override (unsigned us)
+{
+ vlib_main_t *vm;
+
+ /* Don't bother intercepting for short delays */
+ if (us < 10)
+ return 0;
+
+ /*
+ * Only intercept if we are in a vlib process.
+ * If we are called from a vlib worker thread or the vlib main
+ * thread then do not intercept. (Must not be called from an
+ * independent pthread).
+ */
+ if (vlib_get_thread_index () == 0)
+ {
+ /*
+ * We're in the vlib main thread or a vlib process. Make sure
+ * the process is running and we're not still initializing.
+ */
+ vm = vlib_get_main ();
+ if (vlib_in_process_context (vm))
+ {
+ /* Only suspend for the admin_down_process */
+ vlib_process_t *proc = vlib_get_current_process (vm);
+ if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) ||
+ (proc->node_runtime.function != admin_up_down_process))
+ return 0;
+
+ f64 delay = 1e-6 * us;
+ vlib_process_suspend (vm, delay);
+ return 1;
+ }
+ }
+ return 0; // no override
+}
+
+static void
+rte_delay_us_override_cb (unsigned us)
+{
+ if (rte_delay_us_override (us) == 0)
+ rte_delay_us_block (us);
+}
+
+static clib_error_t * dpdk_main_init (vlib_main_t * vm)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ clib_error_t * error = 0;
+
+ dm->vlib_main = vm;
+ dm->vnet_main = vnet_get_main ();
+
+ if ((error = vlib_call_init_function (vm, dpdk_init)))
+ return error;
+
+ /* register custom delay function */
+ rte_delay_us_callback_register (rte_delay_us_override_cb);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (dpdk_main_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Data Plane Development Kit (DPDK)",
+};
+/* *INDENT-ON* */
diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c
new file mode 100644
index 00000000..3a3fcc6c
--- /dev/null
+++ b/src/plugins/dpdk/thread.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <dpdk/device/dpdk.h>
+#include <dpdk/device/dpdk_priv.h>
+
+static clib_error_t *
+dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
+{
+ int r;
+ r = rte_eal_remote_launch (fp, (void *) w, lcore_id);
+ if (r)
+ return clib_error_return (0, "Failed to launch thread %u", lcore_id);
+ return 0;
+}
+
+static clib_error_t *
+dpdk_thread_set_lcore (u32 thread, u16 lcore)
+{
+ return 0;
+}
+
+static vlib_thread_callbacks_t callbacks = {
+ .vlib_launch_thread_cb = &dpdk_launch_thread,
+ .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore,
+};
+
+static clib_error_t *
+dpdk_thread_init (vlib_main_t * vm)
+{
+ vlib_thread_cb_register (vm, &callbacks);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (dpdk_thread_init);
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/flowprobe.am b/src/plugins/flowprobe.am
new file mode 100644
index 00000000..c56e246d
--- /dev/null
+++ b/src/plugins/flowprobe.am
@@ -0,0 +1,37 @@
+
+# Copyright (c) <current-year> <your-organization>
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppplugins_LTLIBRARIES += flowprobe_plugin.la
+vppapitestplugins_LTLIBRARIES += flowprobe_test_plugin.la
+
+flowprobe_plugin_la_SOURCES = flowprobe/flowprobe.c \
+ flowprobe/node.c \
+ flowprobe/flowprobe_plugin.api.h
+
+BUILT_SOURCES += \
+ flowprobe/flowprobe.api.h \
+ flowprobe/flowprobe.api.json
+
+noinst_HEADERS += \
+ flowprobe/flowprobe_all_api_h.h \
+ flowprobe/flowprobe_msg_enum.h \
+ flowprobe/flowprobe.api.h
+
+flowprobe_test_plugin_la_SOURCES = \
+ flowprobe/flowprobe_test.c \
+ flowprobe/flowprobe_plugin.api.h
+
+API_FILES += flowprobe/flowprobe.api
+
+# vi:syntax=automake
diff --git a/src/plugins/flowprobe/flowprobe.api b/src/plugins/flowprobe/flowprobe.api
new file mode 100644
index 00000000..3f8c583b
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe.api
@@ -0,0 +1,40 @@
+/* Define a simple enable-disable binary API to control the feature */
+
+/** \file
+ This file defines the vpp control-plane API messages
+ used to control the flowprobe plugin
+*/
+
+/** \brief Enable / disable per-packet IPFIX recording on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param sw_if_index - index of the interface
+*/
+autoreply manual_print define flowprobe_tx_interface_add_del
+{
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_add;
+ u8 which; /* 0 = ipv4, 1 = l2, 2 = ipv6 */
+
+ /* Interface handle */
+ u32 sw_if_index;
+};
+
+autoreply define flowprobe_params
+{
+ u32 client_index;
+ u32 context;
+ u8 record_l2;
+ u8 record_l3;
+ u8 record_l4;
+ u32 active_timer; /* ~0 is off, 0 is default */
+ u32 passive_timer; /* ~0 is off, 0 is default */
+};
diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c
new file mode 100644
index 00000000..884b5a2e
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe.c
@@ -0,0 +1,1137 @@
+/*
+ * flowprobe.c - ipfix probe plugin
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Per-packet IPFIX flow record generator plugin
+ *
+ * This file implements vpp plugin registration mechanics,
+ * debug CLI, and binary API handling.
+ */
+
+#include <vnet/vnet.h>
+#include <vpp/app/version.h>
+#include <vnet/plugin/plugin.h>
+#include <flowprobe/flowprobe.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <flowprobe/flowprobe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_printfun
+
+flowprobe_main_t flowprobe_main;
+vlib_node_registration_t flowprobe_walker_node;
+static vlib_node_registration_t flowprobe_timer_node;
+uword flowprobe_walker_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f);
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE fm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* Define the per-interface configurable features */
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (flow_perpacket_ip4, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "flowprobe-ip4",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VNET_FEATURE_INIT (flow_perpacket_ip6, static) =
+{
+ .arc_name = "ip6-output",
+ .node_name = "flowprobe-ip6",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VNET_FEATURE_INIT (flow_perpacket_l2, static) =
+{
+ .arc_name = "interface-output",
+ .node_name = "flowprobe-l2",
+ .runs_before = VNET_FEATURES ("interface-tx"),
+};
+/* *INDENT-ON* */
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+static inline ipfix_field_specifier_t *
+flowprobe_template_ip4_fields (ipfix_field_specifier_t * f)
+{
+#define flowprobe_template_ip4_field_count() 4
+ /* sourceIpv4Address, TLV type 8, u32 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceIPv4Address, 4);
+ f++;
+ /* destinationIPv4Address, TLV type 12, u32 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationIPv4Address, 4);
+ f++;
+ /* protocolIdentifier, TLV type 4, u8 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ protocolIdentifier, 1);
+ f++;
+ /* octetDeltaCount, TLV type 1, u64 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ octetDeltaCount, 8);
+ f++;
+ return f;
+}
+
+static inline ipfix_field_specifier_t *
+flowprobe_template_ip6_fields (ipfix_field_specifier_t * f)
+{
+#define flowprobe_template_ip6_field_count() 4
+ /* sourceIpv6Address, TLV type 27, 16 octets */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceIPv6Address, 16);
+ f++;
+ /* destinationIPv6Address, TLV type 28, 16 octets */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationIPv6Address, 16);
+ f++;
+ /* protocolIdentifier, TLV type 4, u8 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ protocolIdentifier, 1);
+ f++;
+ /* octetDeltaCount, TLV type 1, u64 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ octetDeltaCount, 8);
+ f++;
+ return f;
+}
+
+static inline ipfix_field_specifier_t *
+flowprobe_template_l2_fields (ipfix_field_specifier_t * f)
+{
+#define flowprobe_template_l2_field_count() 3
+ /* sourceMacAddress, TLV type 56, u8[6] we hope */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceMacAddress, 6);
+ f++;
+ /* destinationMacAddress, TLV type 80, u8[6] we hope */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationMacAddress, 6);
+ f++;
+ /* ethernetType, TLV type 256, u16 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ ethernetType, 2);
+ f++;
+ return f;
+}
+
+static inline ipfix_field_specifier_t *
+flowprobe_template_common_fields (ipfix_field_specifier_t * f)
+{
+#define flowprobe_template_common_field_count() 5
+ /* ingressInterface, TLV type 10, u32 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ ingressInterface, 4);
+ f++;
+
+ /* egressInterface, TLV type 14, u32 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ egressInterface, 4);
+ f++;
+
+ /* packetDeltaCount, TLV type 2, u64 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ packetDeltaCount, 8);
+ f++;
+
+ /* flowStartNanoseconds, TLV type 156, u64 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ flowStartNanoseconds, 8);
+ f++;
+
+ /* flowEndNanoseconds, TLV type 157, u64 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ flowEndNanoseconds, 8);
+ f++;
+
+ return f;
+}
+
+static inline ipfix_field_specifier_t *
+flowprobe_template_l4_fields (ipfix_field_specifier_t * f)
+{
+#define flowprobe_template_l4_field_count() 3
+ /* sourceTransportPort, TLV type 7, u16 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceTransportPort, 2);
+ f++;
+ /* destinationTransportPort, TLV type 11, u16 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationTransportPort, 2);
+ f++;
+ /* tcpControlBits, TLV type 6, u16 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ tcpControlBits, 2);
+ f++;
+
+ return f;
+}
+
+/**
+ * @brief Create an IPFIX template packet rewrite string
+ * @param frm flow_report_main_t *
+ * @param fr flow_report_t *
+ * @param collector_address ip4_address_t * the IPFIX collector address
+ * @param src_address ip4_address_t * the source address we should use
+ * @param collector_port u16 the collector port we should use, host byte order
+ * @returns u8 * vector containing the indicated IPFIX template packet
+ */
+static inline u8 *
+flowprobe_template_rewrite_inline (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port,
+ flowprobe_variant_t which)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ ipfix_template_header_t *t;
+ ipfix_field_specifier_t *f;
+ ipfix_field_specifier_t *first_field;
+ u8 *rewrite = 0;
+ ip4_ipfix_template_packet_t *tp;
+ u32 field_count = 0;
+ flow_report_stream_t *stream;
+ flowprobe_main_t *fm = &flowprobe_main;
+ flowprobe_record_t flags = fr->opaque.as_uword;
+ bool collect_ip4 = false, collect_ip6 = false;
+
+ stream = &frm->streams[fr->stream_index];
+
+ if (flags & FLOW_RECORD_L3)
+ {
+ collect_ip4 = which == FLOW_VARIANT_L2_IP4 || which == FLOW_VARIANT_IP4;
+ collect_ip6 = which == FLOW_VARIANT_L2_IP6 || which == FLOW_VARIANT_IP6;
+ if (which == FLOW_VARIANT_L2_IP4)
+ flags |= FLOW_RECORD_L2_IP4;
+ if (which == FLOW_VARIANT_L2_IP6)
+ flags |= FLOW_RECORD_L2_IP6;
+ }
+
+ field_count += flowprobe_template_common_field_count ();
+ if (flags & FLOW_RECORD_L2)
+ field_count += flowprobe_template_l2_field_count ();
+ if (collect_ip4)
+ field_count += flowprobe_template_ip4_field_count ();
+ if (collect_ip6)
+ field_count += flowprobe_template_ip6_field_count ();
+ if (flags & FLOW_RECORD_L4)
+ field_count += flowprobe_template_l4_field_count ();
+
+ /* allocate rewrite space */
+ vec_validate_aligned
+ (rewrite, sizeof (ip4_ipfix_template_packet_t)
+ + field_count * sizeof (ipfix_field_specifier_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_template_packet_t *) rewrite;
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+ t = (ipfix_template_header_t *) (s + 1);
+ first_field = f = (ipfix_field_specifier_t *) (t + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (collector_port);
+ udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
+
+ /* FIXUP: message header export_time */
+ /* FIXUP: message header sequence_number */
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /* Add TLVs to the template */
+ f = flowprobe_template_common_fields (f);
+
+ if (flags & FLOW_RECORD_L2)
+ f = flowprobe_template_l2_fields (f);
+ if (collect_ip4)
+ f = flowprobe_template_ip4_fields (f);
+ if (collect_ip6)
+ f = flowprobe_template_ip6_fields (f);
+ if (flags & FLOW_RECORD_L4)
+ f = flowprobe_template_l4_fields (f);
+
+ /* Back to the template packet... */
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+
+ ASSERT (f - first_field);
+ /* Field count in this template */
+ t->id_count = ipfix_id_count (fr->template_id, f - first_field);
+
+ fm->template_size[flags] = (u8 *) f - (u8 *) s;
+
+ /* set length in octets */
+ s->set_id_length =
+ ipfix_set_id_length (2 /* set_id */ , (u8 *) f - (u8 *) s);
+
+ /* message length in octets */
+ h->version_length = version_length ((u8 *) f - (u8 *) h);
+
+ ip->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip);
+ ip->checksum = ip4_header_checksum (ip);
+
+ return rewrite;
+}
+
+static u8 *
+flowprobe_template_rewrite_ip6 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return flowprobe_template_rewrite_inline
+ (frm, fr, collector_address, src_address, collector_port,
+ FLOW_VARIANT_IP6);
+}
+
+static u8 *
+flowprobe_template_rewrite_ip4 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return flowprobe_template_rewrite_inline
+ (frm, fr, collector_address, src_address, collector_port,
+ FLOW_VARIANT_IP4);
+}
+
+static u8 *
+flowprobe_template_rewrite_l2 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return flowprobe_template_rewrite_inline
+ (frm, fr, collector_address, src_address, collector_port,
+ FLOW_VARIANT_L2);
+}
+
+static u8 *
+flowprobe_template_rewrite_l2_ip4 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return flowprobe_template_rewrite_inline
+ (frm, fr, collector_address, src_address, collector_port,
+ FLOW_VARIANT_L2_IP4);
+}
+
+static u8 *
+flowprobe_template_rewrite_l2_ip6 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return flowprobe_template_rewrite_inline
+ (frm, fr, collector_address, src_address, collector_port,
+ FLOW_VARIANT_L2_IP6);
+}
+
+/**
+ * @brief Flush accumulated data
+ * @param frm flow_report_main_t *
+ * @param fr flow_report_t *
+ * @param f vlib_frame_t *
+ *
+ * <em>Notes:</em>
+ * This function must simply return the incoming frame, or no template packets
+ * will be sent.
+ */
+vlib_frame_t *
+flowprobe_data_callback_ip4 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f, u32 * to_next, u32 node_index)
+{
+ flowprobe_flush_callback_ip4 ();
+ return f;
+}
+
+vlib_frame_t *
+flowprobe_data_callback_ip6 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f, u32 * to_next, u32 node_index)
+{
+ flowprobe_flush_callback_ip6 ();
+ return f;
+}
+
+vlib_frame_t *
+flowprobe_data_callback_l2 (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f, u32 * to_next, u32 node_index)
+{
+ flowprobe_flush_callback_l2 ();
+ return f;
+}
+
+static int
+flowprobe_template_add_del (u32 domain_id, u16 src_port,
+ flowprobe_record_t flags,
+ vnet_flow_data_callback_t * flow_data_callback,
+ vnet_flow_rewrite_callback_t * rewrite_callback,
+ bool is_add, u16 * template_id)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ vnet_flow_report_add_del_args_t a = {
+ .rewrite_callback = rewrite_callback,
+ .flow_data_callback = flow_data_callback,
+ .is_add = is_add,
+ .domain_id = domain_id,
+ .src_port = src_port,
+ .opaque.as_uword = flags,
+ };
+ return vnet_flow_report_add_del (frm, &a, template_id);
+}
+
+static void
+flowprobe_expired_timer_callback (u32 * expired_timers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ flowprobe_main_t *fm = &flowprobe_main;
+ u32 my_cpu_number = vm->thread_index;
+ int i;
+ u32 poolindex;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ poolindex = expired_timers[i] & 0x7FFFFFFF;
+ vec_add1 (fm->expired_passive_per_worker[my_cpu_number], poolindex);
+ }
+}
+
+static clib_error_t *
+flowprobe_create_state_tables (u32 active_timer)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error = 0;
+ u32 num_threads;
+ int i;
+
+ /* Decide how many worker threads we have */
+ num_threads = 1 /* main thread */ + tm->n_threads;
+
+ /* Hash table per worker */
+ fm->ht_log2len = FLOWPROBE_LOG2_HASHSIZE;
+
+ /* Init per worker flow state and timer wheels */
+ if (active_timer)
+ {
+ vec_validate (fm->timers_per_worker, num_threads - 1);
+ vec_validate (fm->expired_passive_per_worker, num_threads - 1);
+ vec_validate (fm->hash_per_worker, num_threads - 1);
+ vec_validate (fm->pool_per_worker, num_threads - 1);
+
+ for (i = 0; i < num_threads; i++)
+ {
+ int j;
+ pool_alloc (fm->pool_per_worker[i], 1 << fm->ht_log2len);
+ vec_resize (fm->hash_per_worker[i], 1 << fm->ht_log2len);
+ for (j = 0; j < (1 << fm->ht_log2len); j++)
+ fm->hash_per_worker[i][j] = ~0;
+ fm->timers_per_worker[i] =
+ clib_mem_alloc (sizeof (TWT (tw_timer_wheel)));
+ tw_timer_wheel_init_2t_1w_2048sl (fm->timers_per_worker[i],
+ flowprobe_expired_timer_callback,
+ 1.0, 1024);
+ }
+ fm->disabled = true;
+ }
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ vec_validate (fm->stateless_entry, num_threads - 1);
+ for (i = 0; i < num_threads; i++)
+ fm->stateless_entry[i].last_exported = now;
+ fm->disabled = false;
+ }
+ fm->initialized = true;
+ return error;
+}
+
+static int
+validate_feature_on_interface (flowprobe_main_t * fm, u32 sw_if_index,
+ u8 which)
+{
+ vec_validate_init_empty (fm->flow_per_interface, sw_if_index, ~0);
+
+ if (fm->flow_per_interface[sw_if_index] == (u8) ~ 0)
+ return -1;
+ else if (fm->flow_per_interface[sw_if_index] != which)
+ return 0;
+ else
+ return 1;
+}
+
+/**
+ * @brief configure / deconfigure the IPFIX flow-per-packet
+ * @param fm flowprobe_main_t * fm
+ * @param sw_if_index u32 the desired interface
+ * @param is_add int 1 to enable the feature, 0 to disable it
+ * @returns 0 if successful, non-zero otherwise
+ */
+
+static int
+flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
+ u32 sw_if_index, u8 which, int is_add)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u16 template_id = 0;
+ flowprobe_record_t flags = fm->record;
+
+ fm->flow_per_interface[sw_if_index] = (is_add) ? which : (u8) ~ 0;
+ fm->template_per_flow[which] += (is_add) ? 1 : -1;
+ if (is_add && fm->template_per_flow[which] > 1)
+ template_id = fm->template_reports[flags];
+
+ if ((is_add && fm->template_per_flow[which] == 1) ||
+ (!is_add && fm->template_per_flow[which] == 0))
+ {
+ if (which == FLOW_VARIANT_L2)
+ {
+ if (fm->record & FLOW_RECORD_L2)
+ {
+ rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
+ flowprobe_data_callback_l2,
+ flowprobe_template_rewrite_l2,
+ is_add, &template_id);
+ }
+ if (fm->record & FLOW_RECORD_L3 || fm->record & FLOW_RECORD_L4)
+ {
+ rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
+ flowprobe_data_callback_l2,
+ flowprobe_template_rewrite_l2_ip4,
+ is_add, &template_id);
+ fm->template_reports[flags | FLOW_RECORD_L2_IP4] =
+ (is_add) ? template_id : 0;
+ rv =
+ flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
+ flowprobe_data_callback_l2,
+ flowprobe_template_rewrite_l2_ip6,
+ is_add, &template_id);
+ fm->template_reports[flags | FLOW_RECORD_L2_IP6] =
+ (is_add) ? template_id : 0;
+
+ /* Special case L2 */
+ fm->context[FLOW_VARIANT_L2_IP4].flags =
+ flags | FLOW_RECORD_L2_IP4;
+ fm->context[FLOW_VARIANT_L2_IP6].flags =
+ flags | FLOW_RECORD_L2_IP6;
+
+ fm->template_reports[flags] = template_id;
+ }
+ }
+ else if (which == FLOW_VARIANT_IP4)
+ rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
+ flowprobe_data_callback_ip4,
+ flowprobe_template_rewrite_ip4,
+ is_add, &template_id);
+ else if (which == FLOW_VARIANT_IP6)
+ rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
+ flowprobe_data_callback_ip6,
+ flowprobe_template_rewrite_ip6,
+ is_add, &template_id);
+ }
+ if (rv && rv != VNET_API_ERROR_VALUE_EXIST)
+ {
+ clib_warning ("vnet_flow_report_add_del returned %d", rv);
+ return -1;
+ }
+
+ if (which != (u8) ~ 0)
+ {
+ fm->context[which].flags = fm->record;
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
+ }
+
+ if (which == FLOW_VARIANT_IP4)
+ vnet_feature_enable_disable ("ip4-output", "flowprobe-ip4",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_IP6)
+ vnet_feature_enable_disable ("ip6-output", "flowprobe-ip6",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_L2)
+ vnet_feature_enable_disable ("interface-output", "flowprobe-l2",
+ sw_if_index, is_add, 0, 0);
+
+ /* Stateful flow collection */
+ if (is_add && !fm->initialized)
+ {
+ flowprobe_create_state_tables (fm->active_timer);
+ if (fm->active_timer)
+ vlib_process_signal_event (vm, flowprobe_timer_node.index, 1, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * @brief API message handler
+ * @param mp vl_api_flowprobe_tx_interface_add_del_t * mp the api message
+ */
+void vl_api_flowprobe_tx_interface_add_del_t_handler
+ (vl_api_flowprobe_tx_interface_add_del_t * mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_tx_interface_add_del_reply_t *rmp;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->which != FLOW_VARIANT_IP4 && mp->which != FLOW_VARIANT_L2
+ && mp->which != FLOW_VARIANT_IP6)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+
+ if (fm->record == 0)
+ {
+ clib_warning ("Please specify flowprobe params record first...");
+ rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+ goto out;
+ }
+
+ rv = validate_feature_on_interface (fm, sw_if_index, mp->which);
+ if ((rv == 1 && mp->is_add == 1) || rv == 0)
+ {
+ rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+ goto out;
+ }
+
+ rv = flowprobe_tx_interface_add_del_feature
+ (fm, sw_if_index, mp->which, mp->is_add);
+
+out:
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_FLOWPROBE_TX_INTERFACE_ADD_DEL_REPLY);
+}
+
+/**
+ * @brief API message custom-dump function
+ * @param mp vl_api_flowprobe_tx_interface_add_del_t * mp the api message
+ * @param handle void * print function handle
+ * @returns u8 * output string
+ */
+static void *vl_api_flowprobe_tx_interface_add_del_t_print
+ (vl_api_flowprobe_tx_interface_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: flowprobe_tx_interface_add_del ");
+ s = format (s, "sw_if_index %d is_add %d which %d ",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ (int) mp->is_add, (int) mp->which);
+ FINISH;
+}
+
+#define vec_neg_search(v,E) \
+({ \
+ word _v(i) = 0; \
+ while (_v(i) < vec_len(v) && v[_v(i)] == E) \
+ { \
+ _v(i)++; \
+ } \
+ if (_v(i) == vec_len(v)) \
+ _v(i) = ~0; \
+ _v(i); \
+})
+
+static int
+flowprobe_params (flowprobe_main_t * fm, u8 record_l2,
+ u8 record_l3, u8 record_l4,
+ u32 active_timer, u32 passive_timer)
+{
+ flowprobe_record_t flags = 0;
+
+ if (vec_neg_search (fm->flow_per_interface, (u8) ~ 0) != ~0)
+ return ~0;
+
+ if (record_l2)
+ flags |= FLOW_RECORD_L2;
+ if (record_l3)
+ flags |= FLOW_RECORD_L3;
+ if (record_l4)
+ flags |= FLOW_RECORD_L4;
+
+ fm->record = flags;
+
+ /*
+ * Timers: ~0 is default, 0 is off
+ */
+ fm->active_timer =
+ (active_timer == (u32) ~ 0 ? FLOWPROBE_TIMER_ACTIVE : active_timer);
+ fm->passive_timer =
+ (passive_timer == (u32) ~ 0 ? FLOWPROBE_TIMER_PASSIVE : passive_timer);
+
+ return 0;
+}
+
+void
+vl_api_flowprobe_params_t_handler (vl_api_flowprobe_params_t * mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_params_reply_t *rmp;
+ int rv = 0;
+
+ rv = flowprobe_params
+ (fm, mp->record_l2, mp->record_l3, mp->record_l4,
+ clib_net_to_host_u32 (mp->active_timer),
+ clib_net_to_host_u32 (mp->passive_timer));
+
+ REPLY_MACRO (VL_API_FLOWPROBE_PARAMS_REPLY);
+}
+
+/* List of message types that this plugin understands */
+#define foreach_flowprobe_plugin_api_msg \
+_(FLOWPROBE_TX_INTERFACE_ADD_DEL, flowprobe_tx_interface_add_del) \
+_(FLOWPROBE_PARAMS, flowprobe_params)
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Flow per Packet",
+};
+/* *INDENT-ON* */
+
+u8 *
+format_flowprobe_entry (u8 * s, va_list * args)
+{
+ flowprobe_entry_t *e = va_arg (*args, flowprobe_entry_t *);
+ s = format (s, " %d/%d", e->key.rx_sw_if_index, e->key.tx_sw_if_index);
+
+ s = format (s, " %U %U", format_ethernet_address, &e->key.src_mac,
+ format_ethernet_address, &e->key.dst_mac);
+ s = format (s, " %U -> %U",
+ format_ip46_address, &e->key.src_address, IP46_TYPE_ANY,
+ format_ip46_address, &e->key.dst_address, IP46_TYPE_ANY);
+ s = format (s, " %d", e->key.protocol);
+ s = format (s, " %d %d\n", clib_net_to_host_u16 (e->key.src_port),
+ clib_net_to_host_u16 (e->key.dst_port));
+
+ return s;
+}
+
+static clib_error_t *
+flowprobe_show_table_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cm)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ int i;
+ flowprobe_entry_t *e;
+
+ vlib_cli_output (vm, "Dumping IPFIX table");
+
+ for (i = 0; i < vec_len (fm->pool_per_worker); i++)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (e, fm->pool_per_worker[i], (
+ {
+ vlib_cli_output (vm, "%U",
+ format_flowprobe_entry,
+ e);
+ }));
+ /* *INDENT-ON* */
+
+ }
+ return 0;
+}
+
+static clib_error_t *
+flowprobe_show_stats_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cm)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ int i;
+
+ vlib_cli_output (vm, "IPFIX table statistics");
+ vlib_cli_output (vm, "Flow entry size: %d\n", sizeof (flowprobe_entry_t));
+ vlib_cli_output (vm, "Flow pool size per thread: %d\n",
+ 0x1 << FLOWPROBE_LOG2_HASHSIZE);
+
+ for (i = 0; i < vec_len (fm->pool_per_worker); i++)
+ vlib_cli_output (vm, "Pool utilisation thread %d is %d%%\n", i,
+ (100 * pool_elts (fm->pool_per_worker[i])) /
+ (0x1 << FLOWPROBE_LOG2_HASHSIZE));
+ return 0;
+}
+
+static clib_error_t *
+flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ u32 sw_if_index = ~0;
+ int is_add = 1;
+ u8 which = FLOW_VARIANT_IP4;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ is_add = 0;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ fm->vnet_main, &sw_if_index));
+ else if (unformat (input, "ip4"))
+ which = FLOW_VARIANT_IP4;
+ else if (unformat (input, "ip6"))
+ which = FLOW_VARIANT_IP6;
+ else if (unformat (input, "l2"))
+ which = FLOW_VARIANT_L2;
+ else
+ break;
+ }
+
+ if (fm->record == 0)
+ return clib_error_return (0,
+ "Please specify flowprobe params record first...");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ rv = validate_feature_on_interface (fm, sw_if_index, which);
+ if (rv == 1)
+ {
+ if (is_add)
+ return clib_error_return (0,
+ "Datapath is already enabled for given interface...");
+ }
+ else if (rv == 0)
+ return clib_error_return (0,
+ "Interface has enable different datapath ...");
+
+ rv =
+ flowprobe_tx_interface_add_del_feature (fm, sw_if_index, which, is_add);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return
+ (0, "Invalid interface, only works on physical ports");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0, "ip6 not supported");
+ break;
+
+ default:
+ return clib_error_return (0, "flowprobe_enable_disable returned %d",
+ rv);
+ }
+ return 0;
+}
+
+static clib_error_t *
+flowprobe_params_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ bool record_l2 = false, record_l3 = false, record_l4 = false;
+ u32 active_timer = ~0;
+ u32 passive_timer = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "active %d", &active_timer))
+ ;
+ else if (unformat (input, "passive %d", &passive_timer))
+ ;
+ else if (unformat (input, "record"))
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "l2"))
+ record_l2 = true;
+ else if (unformat (input, "l3"))
+ record_l3 = true;
+ else if (unformat (input, "l4"))
+ record_l4 = true;
+ else
+ break;
+ }
+ else
+ break;
+ }
+
+ if (passive_timer > 0 && active_timer > passive_timer)
+ return clib_error_return (0,
+ "Passive timer has to be greater than active one...");
+
+ if (flowprobe_params (fm, record_l2, record_l3, record_l4,
+ active_timer, passive_timer))
+ return clib_error_return (0,
+ "Couldn't change flowperpacket params when feature is enabled on some interface ...");
+ return 0;
+}
+
+/*?
+ * '<em>flowprobe feature add-del</em>' commands to enable/disable
+ * per-packet IPFIX flow record generation on an interface
+ *
+ * @cliexpar
+ * @parblock
+ * To enable per-packet IPFIX flow-record generation on an interface:
+ * @cliexcmd{flowprobe feature add-del GigabitEthernet2/0/0}
+ *
+ * To disable per-packet IPFIX flow-record generation on an interface:
+ * @cliexcmd{flowprobe feature add-del GigabitEthernet2/0/0 disable}
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (flowprobe_enable_disable_command, static) = {
+ .path = "flowprobe feature add-del",
+ .short_help =
+ "flowprobe feature add-del <interface-name> <l2|ip4|ip6> disable",
+ .function = flowprobe_tx_interface_add_del_feature_command_fn,
+};
+VLIB_CLI_COMMAND (flowprobe_params_command, static) = {
+ .path = "flowprobe params",
+ .short_help =
+ "flowprobe params record <[l2] [l3] [l4]> [active <timer> passive <timer>]",
+ .function = flowprobe_params_command_fn,
+};
+VLIB_CLI_COMMAND (flowprobe_show_table_command, static) = {
+ .path = "show flowprobe table",
+ .short_help = "show flowprobe table",
+ .function = flowprobe_show_table_fn,
+};
+VLIB_CLI_COMMAND (flowprobe_show_stats_command, static) = {
+ .path = "show flowprobe statistics",
+ .short_help = "show flowprobe statistics",
+ .function = flowprobe_show_stats_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Set up the API message handling tables
+ * @param vm vlib_main_t * vlib main data structure pointer
+ * @returns 0 to indicate all is well
+ */
+static clib_error_t *
+flowprobe_plugin_api_hookup (vlib_main_t * vm)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + fm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_flowprobe_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (flowprobe_main_t * fm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + fm->msg_id_base);
+ foreach_vl_msg_name_crc_flowprobe;
+#undef _
+}
+
+/*
+ * Main-core process, sending an interrupt to the per worker input
+ * process that spins the per worker timer wheel.
+ */
+static uword
+timer_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ uword *event_data = 0;
+ vlib_main_t **worker_vms = 0, *worker_vm;
+ flowprobe_main_t *fm = &flowprobe_main;
+
+ /* Wait for Godot... */
+ vlib_process_wait_for_event_or_clock (vm, 1e9);
+ uword event_type = vlib_process_get_events (vm, &event_data);
+ if (event_type != 1)
+ clib_warning ("bogus kickoff event received, %d", event_type);
+ vec_reset_length (event_data);
+
+ int i;
+ if (vec_len (vlib_mains) == 0)
+ vec_add1 (worker_vms, vm);
+ else
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ worker_vm = vlib_mains[i];
+ if (worker_vm)
+ vec_add1 (worker_vms, worker_vm);
+ }
+ }
+ f64 sleep_duration = 0.1;
+
+ while (1)
+ {
+ /* Send an interrupt to each timer input node */
+ sleep_duration = 0.1;
+ for (i = 0; i < vec_len (worker_vms); i++)
+ {
+ worker_vm = worker_vms[i];
+ if (worker_vm)
+ {
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_walker_node.index);
+ sleep_duration =
+ (fm->expired_passive_per_worker[i] > 0) ? 1e-4 : 0.1;
+ }
+ }
+ vlib_process_suspend (vm, sleep_duration);
+ }
+ return 0; /* or not */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (flowprobe_timer_node,static) = {
+ .function = timer_process,
+ .name = "flowprobe-timer-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Set up the API message handling tables
+ * @param vm vlib_main_t * vlib main data structure pointer
+ * @returns 0 to indicate all is well, or a clib_error_t
+ */
+static clib_error_t *
+flowprobe_init (vlib_main_t * vm)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 num_threads;
+ int i;
+
+ fm->vnet_main = vnet_get_main ();
+
+ /* Construct the API name */
+ name = format (0, "flowprobe_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ fm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ /* Hook up message handlers */
+ error = flowprobe_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (fm, &api_main);
+
+ vec_free (name);
+
+ /* Set up time reference pair */
+ fm->vlib_time_0 = vlib_time_now (vm);
+ fm->nanosecond_time_0 = unix_time_now_nsec ();
+
+ memset (fm->template_reports, 0, sizeof (fm->template_reports));
+ memset (fm->template_size, 0, sizeof (fm->template_size));
+ memset (fm->template_per_flow, 0, sizeof (fm->template_per_flow));
+
+ /* Decide how many worker threads we have */
+ num_threads = 1 /* main thread */ + tm->n_threads;
+
+ /* Allocate per worker thread vectors per flavour */
+ for (i = 0; i < FLOW_N_VARIANTS; i++)
+ {
+ vec_validate (fm->context[i].buffers_per_worker, num_threads - 1);
+ vec_validate (fm->context[i].frames_per_worker, num_threads - 1);
+ vec_validate (fm->context[i].next_record_offset_per_worker,
+ num_threads - 1);
+ }
+
+ fm->active_timer = FLOWPROBE_TIMER_ACTIVE;
+ fm->passive_timer = FLOWPROBE_TIMER_PASSIVE;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (flowprobe_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/flowprobe/flowprobe.h b/src/plugins/flowprobe/flowprobe.h
new file mode 100644
index 00000000..02ee053c
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe.h
@@ -0,0 +1,174 @@
+/*
+ * flowprobe.h - ipfix probe plug-in header file
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_flowprobe_h__
+#define __included_flowprobe_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vnet/flow/flow_report.h>
+#include <vnet/flow/flow_report_classify.h>
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+
+/* Default timers in seconds */
+#define FLOWPROBE_TIMER_ACTIVE (15)
+#define FLOWPROBE_TIMER_PASSIVE 120 // XXXX: FOR TESTING (30*60)
+#define FLOWPROBE_LOG2_HASHSIZE (18)
+
+typedef enum
+{
+ FLOW_RECORD_L2 = 1 << 0,
+ FLOW_RECORD_L3 = 1 << 1,
+ FLOW_RECORD_L4 = 1 << 2,
+ FLOW_RECORD_L2_IP4 = 1 << 3,
+ FLOW_RECORD_L2_IP6 = 1 << 4,
+ FLOW_N_RECORDS = 1 << 5,
+} flowprobe_record_t;
+
+/* *INDENT-OFF* */
+typedef enum __attribute__ ((__packed__))
+{
+ FLOW_VARIANT_IP4,
+ FLOW_VARIANT_IP6,
+ FLOW_VARIANT_L2,
+ FLOW_VARIANT_L2_IP4,
+ FLOW_VARIANT_L2_IP6,
+ FLOW_N_VARIANTS,
+} flowprobe_variant_t;
+/* *INDENT-ON* */
+
+STATIC_ASSERT (sizeof (flowprobe_variant_t) == 1,
+ "flowprobe_variant_t is expected to be 1 byte, "
+ "revisit padding in flowprobe_key_t");
+
+#define FLOW_MAXIMUM_EXPORT_ENTRIES (1024)
+
+typedef struct
+{
+ /* what to collect per variant */
+ flowprobe_record_t flags;
+ /** ipfix buffers under construction, per-worker thread */
+ vlib_buffer_t **buffers_per_worker;
+ /** frames containing ipfix buffers, per-worker thread */
+ vlib_frame_t **frames_per_worker;
+ /** next record offset, per worker thread */
+ u16 *next_record_offset_per_worker;
+} flowprobe_protocol_context_t;
+
+/* *INDENT-OFF* */
+typedef struct __attribute__ ((aligned (8))) {
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+ u8 src_mac[6];
+ u8 dst_mac[6];
+ u16 ethertype;
+ ip46_address_t src_address;
+ ip46_address_t dst_address;
+ u8 protocol;
+ u16 src_port;
+ u16 dst_port;
+ flowprobe_variant_t which;
+} flowprobe_key_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 sec;
+ u32 nsec;
+} timestamp_nsec_t;
+
+typedef struct
+{
+ flowprobe_key_t key;
+ u64 packetcount;
+ u64 octetcount;
+ timestamp_nsec_t flow_start;
+ timestamp_nsec_t flow_end;
+ f64 last_updated;
+ f64 last_exported;
+ u32 passive_timer_handle;
+ union
+ {
+ struct
+ {
+ u16 flags;
+ } tcp;
+ } prot;
+} flowprobe_entry_t;
+
+/**
+ * @file
+ * @brief flow-per-packet plugin header file
+ */
+typedef struct
+{
+ /** API message ID base */
+ u16 msg_id_base;
+
+ flowprobe_protocol_context_t context[FLOW_N_VARIANTS];
+ u16 template_reports[FLOW_N_RECORDS];
+ u16 template_size[FLOW_N_RECORDS];
+
+ /** Time reference pair */
+ u64 nanosecond_time_0;
+ f64 vlib_time_0;
+
+ /** Per CPU flow-state */
+ u8 ht_log2len; /* Hash table size is 2^log2len */
+ u32 **hash_per_worker;
+ flowprobe_entry_t **pool_per_worker;
+ /* *INDENT-OFF* */
+ TWT (tw_timer_wheel) ** timers_per_worker;
+ /* *INDENT-ON* */
+ u32 **expired_passive_per_worker;
+
+ flowprobe_record_t record;
+ u32 active_timer;
+ u32 passive_timer;
+ flowprobe_entry_t *stateless_entry;
+
+ bool initialized;
+ bool disabled;
+
+ u16 template_per_flow[FLOW_N_VARIANTS];
+ u8 *flow_per_interface;
+
+ /** convenience vlib_main_t pointer */
+ vlib_main_t *vlib_main;
+ /** convenience vnet_main_t pointer */
+ vnet_main_t *vnet_main;
+} flowprobe_main_t;
+
+extern flowprobe_main_t flowprobe_main;
+
+void flowprobe_flush_callback_ip4 (void);
+void flowprobe_flush_callback_ip6 (void);
+void flowprobe_flush_callback_l2 (void);
+u8 *format_flowprobe_entry (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/flowprobe/flowprobe_all_api_h.h b/src/plugins/flowprobe/flowprobe_all_api_h.h
new file mode 100644
index 00000000..1f30eccc
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_all_api_h.h
@@ -0,0 +1,18 @@
+/*
+ * flowprobe_all_api_h.h - plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <flowprobe/flowprobe.api.h>
diff --git a/src/plugins/flowprobe/flowprobe_msg_enum.h b/src/plugins/flowprobe/flowprobe_msg_enum.h
new file mode 100644
index 00000000..bc0b21c9
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_msg_enum.h
@@ -0,0 +1,31 @@
+/*
+ * flowprobe_msg_enum.h - vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_flowprobe_msg_enum_h
+#define included_flowprobe_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <flowprobe/flowprobe_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_flowprobe_msg_enum_h */
diff --git a/src/plugins/flowprobe/flowprobe_plugin_doc.md b/src/plugins/flowprobe/flowprobe_plugin_doc.md
new file mode 100644
index 00000000..4c9b2342
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_plugin_doc.md
@@ -0,0 +1,13 @@
+IPFIX flow record plugin {#flowprobe_plugin_doc}
+========================
+
+## Introduction
+
+This plugin generates ipfix flow records on interfaces which have the feature enabled
+
+## Sample configuration
+
+set ipfix exporter collector 192.168.6.2 src 192.168.6.1 template-interval 20 port 4739 path-mtu 1500
+
+flowprobe params record l3 active 20 passive 120
+flowprobe feature add-del GigabitEthernet2/3/0 l2 \ No newline at end of file
diff --git a/src/plugins/flowprobe/flowprobe_test.c b/src/plugins/flowprobe/flowprobe_test.c
new file mode 100644
index 00000000..91793f55
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_test.c
@@ -0,0 +1,263 @@
+/*
+ * flowprobe.c - skeleton vpp-api-test plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <flowprobe/flowprobe.h>
+
+#define __plugin_msg_base flowprobe_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/**
+ * @file vpp_api_test plugin
+ */
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <flowprobe/flowprobe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <flowprobe/flowprobe_all_api_h.h>
+#undef vl_api_version
+
+typedef struct
+{
+ /** API message ID base */
+ u16 msg_id_base;
+ /** vat_main_t pointer */
+ vat_main_t *vat_main;
+} flowprobe_test_main_t;
+
+flowprobe_test_main_t flowprobe_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(flowprobe_tx_interface_add_del_reply) \
+_(flowprobe_params_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = flowprobe_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(FLOWPROBE_TX_INTERFACE_ADD_DEL_REPLY, \
+ flowprobe_tx_interface_add_del_reply) \
+_(FLOWPROBE_PARAMS_REPLY, flowprobe_params_reply)
+
+static int
+api_flowprobe_tx_interface_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ int enable_disable = 1;
+ u8 which = FLOW_VARIANT_IP4;
+ u32 sw_if_index = ~0;
+ vl_api_flowprobe_tx_interface_add_del_t *mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ enable_disable = 0;
+ else if (unformat (i, "ip4"))
+ which = FLOW_VARIANT_IP4;
+ else if (unformat (i, "ip6"))
+ which = FLOW_VARIANT_IP6;
+ else if (unformat (i, "l2"))
+ which = FLOW_VARIANT_L2;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_TX_INTERFACE_ADD_DEL, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = enable_disable;
+ mp->which = which;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_flowprobe_params (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ u8 record_l2 = 0, record_l3 = 0, record_l4 = 0;
+ u32 active_timer = ~0;
+ u32 passive_timer = ~0;
+ vl_api_flowprobe_params_t *mp;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "active %d", &active_timer))
+ ;
+ else if (unformat (i, "passive %d", &passive_timer))
+ ;
+ else if (unformat (i, "record"))
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "l2"))
+ record_l2 = 1;
+ else if (unformat (i, "l3"))
+ record_l3 = 1;
+ else if (unformat (i, "l4"))
+ record_l4 = 1;
+ else
+ break;
+ }
+ else
+ break;
+ }
+
+ if (passive_timer > 0 && active_timer > passive_timer)
+ {
+ errmsg ("Passive timer has to be greater than active one...\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_PARAMS, mp);
+ mp->record_l2 = record_l2;
+ mp->record_l3 = record_l3;
+ mp->record_l4 = record_l4;
+ mp->active_timer = ntohl (active_timer);
+ mp->passive_timer = ntohl (passive_timer);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(flowprobe_tx_interface_add_del, "<intfc> [disable]") \
+_(flowprobe_params, "record <[l2] [l3] [l4]> [active <timer> passive <timer>]")
+
+static void
+flowprobe_vat_api_hookup (vat_main_t * vam)
+{
+ flowprobe_test_main_t *sm = &flowprobe_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ flowprobe_test_main_t *sm = &flowprobe_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "flowprobe_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ /* Don't attempt to hook up API messages if the data plane plugin is AWOL */
+ if (sm->msg_id_base != (u16) ~ 0)
+ flowprobe_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/flowprobe/node.c b/src/plugins/flowprobe/node.c
new file mode 100644
index 00000000..2f7d0025
--- /dev/null
+++ b/src/plugins/flowprobe/node.c
@@ -0,0 +1,1053 @@
+/*
+ * node.c - ipfix probe graph node
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/crc32.h>
+#include <vppinfra/error.h>
+#include <flowprobe/flowprobe.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlibmemory/api.h>
+
+static void flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e);
+
+/**
+ * @file flow record generator graph node
+ */
+
+typedef struct
+{
+ /** interface handle */
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+ /** packet timestamp */
+ u64 timestamp;
+ /** size of the buffer */
+ u16 buffer_size;
+
+ /** L2 information */
+ u8 src_mac[6];
+ u8 dst_mac[6];
+ /** Ethertype */
+ u16 ethertype;
+
+ /** L3 information */
+ ip46_address_t src_address;
+ ip46_address_t dst_address;
+ u8 protocol;
+ u8 tos;
+
+ /** L4 information */
+ u16 src_port;
+ u16 dst_port;
+
+ flowprobe_variant_t which;
+} flowprobe_trace_t;
+
+static char *flowprobe_variant_strings[] = {
+ [FLOW_VARIANT_IP4] = "IP4",
+ [FLOW_VARIANT_IP6] = "IP6",
+ [FLOW_VARIANT_L2] = "L2",
+ [FLOW_VARIANT_L2_IP4] = "L2-IP4",
+ [FLOW_VARIANT_L2_IP6] = "L2-IP6",
+};
+
+/* packet trace format function */
+static u8 *
+format_flowprobe_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ flowprobe_trace_t *t = va_arg (*args, flowprobe_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s,
+ "FLOWPROBE[%s]: rx_sw_if_index %d, tx_sw_if_index %d, "
+ "timestamp %lld, size %d", flowprobe_variant_strings[t->which],
+ t->rx_sw_if_index, t->tx_sw_if_index,
+ t->timestamp, t->buffer_size);
+
+ if (t->which == FLOW_VARIANT_L2)
+ s = format (s, "\n%U -> %U", format_white_space, indent,
+ format_ethernet_address, &t->src_mac,
+ format_ethernet_address, &t->dst_mac);
+
+ if (t->protocol > 0
+ && (t->which == FLOW_VARIANT_L2_IP4 || t->which == FLOW_VARIANT_IP4
+ || t->which == FLOW_VARIANT_L2_IP6 || t->which == FLOW_VARIANT_IP6))
+ s =
+ format (s, "\n%U%U: %U -> %U", format_white_space, indent,
+ format_ip_protocol, t->protocol, format_ip46_address,
+ &t->src_address, IP46_TYPE_ANY, format_ip46_address,
+ &t->dst_address, IP46_TYPE_ANY);
+ return s;
+}
+
+vlib_node_registration_t flowprobe_ip4_node;
+vlib_node_registration_t flowprobe_ip6_node;
+vlib_node_registration_t flowprobe_l2_node;
+
+/* No counters at the moment */
+#define foreach_flowprobe_error \
+_(COLLISION, "Hash table collisions") \
+_(BUFFER, "Buffer allocation error") \
+_(EXPORTED_PACKETS, "Exported packets") \
+_(INPATH, "Exported packets in path")
+
+typedef enum
+{
+#define _(sym,str) FLOWPROBE_ERROR_##sym,
+ foreach_flowprobe_error
+#undef _
+ FLOWPROBE_N_ERROR,
+} flowprobe_error_t;
+
+static char *flowprobe_error_strings[] = {
+#define _(sym,string) string,
+ foreach_flowprobe_error
+#undef _
+};
+
+typedef enum
+{
+ FLOWPROBE_NEXT_DROP,
+ FLOWPROBE_NEXT_IP4_LOOKUP,
+ FLOWPROBE_N_NEXT,
+} flowprobe_next_t;
+
+#define FLOWPROBE_NEXT_NODES { \
+ [FLOWPROBE_NEXT_DROP] = "error-drop", \
+ [FLOWPROBE_NEXT_IP4_LOOKUP] = "ip4-lookup", \
+}
+
+static inline flowprobe_variant_t
+flowprobe_get_variant (flowprobe_variant_t which,
+ flowprobe_record_t flags, u16 ethertype)
+{
+ if (which == FLOW_VARIANT_L2
+ && (flags & FLOW_RECORD_L3 || flags & FLOW_RECORD_L4))
+ return ethertype == ETHERNET_TYPE_IP6 ? FLOW_VARIANT_L2_IP6 : ethertype ==
+ ETHERNET_TYPE_IP4 ? FLOW_VARIANT_L2_IP4 : FLOW_VARIANT_L2;
+ return which;
+}
+
+/*
+ * NTP rfc868 : 2 208 988 800 corresponds to 00:00 1 Jan 1970 GMT
+ */
+#define NTP_TIMESTAMP 2208988800L
+
+static inline u32
+flowprobe_common_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
+{
+ u16 start = offset;
+
+ /* Ingress interface */
+ u32 rx_if = clib_host_to_net_u32 (e->key.rx_sw_if_index);
+ clib_memcpy (to_b->data + offset, &rx_if, sizeof (rx_if));
+ offset += sizeof (rx_if);
+
+ /* Egress interface */
+ u32 tx_if = clib_host_to_net_u32 (e->key.tx_sw_if_index);
+ clib_memcpy (to_b->data + offset, &tx_if, sizeof (tx_if));
+ offset += sizeof (tx_if);
+
+ /* packet delta count */
+ u64 packetdelta = clib_host_to_net_u64 (e->packetcount);
+ clib_memcpy (to_b->data + offset, &packetdelta, sizeof (u64));
+ offset += sizeof (u64);
+
+ /* flowStartNanoseconds */
+ u32 t = clib_host_to_net_u32 (e->flow_start.sec + NTP_TIMESTAMP);
+ clib_memcpy (to_b->data + offset, &t, sizeof (u32));
+ offset += sizeof (u32);
+ t = clib_host_to_net_u32 (e->flow_start.nsec);
+ clib_memcpy (to_b->data + offset, &t, sizeof (u32));
+ offset += sizeof (u32);
+
+ /* flowEndNanoseconds */
+ t = clib_host_to_net_u32 (e->flow_end.sec + NTP_TIMESTAMP);
+ clib_memcpy (to_b->data + offset, &t, sizeof (u32));
+ offset += sizeof (u32);
+ t = clib_host_to_net_u32 (e->flow_end.nsec);
+ clib_memcpy (to_b->data + offset, &t, sizeof (u32));
+ offset += sizeof (u32);
+
+ return offset - start;
+}
+
+static inline u32
+flowprobe_l2_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
+{
+ u16 start = offset;
+
+ /* src mac address */
+ clib_memcpy (to_b->data + offset, &e->key.src_mac, 6);
+ offset += 6;
+
+ /* dst mac address */
+ clib_memcpy (to_b->data + offset, &e->key.dst_mac, 6);
+ offset += 6;
+
+ /* ethertype */
+ clib_memcpy (to_b->data + offset, &e->key.ethertype, 2);
+ offset += 2;
+
+ return offset - start;
+}
+
+static inline u32
+flowprobe_l3_ip6_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
+{
+ u16 start = offset;
+
+ /* ip6 src address */
+ clib_memcpy (to_b->data + offset, &e->key.src_address,
+ sizeof (ip6_address_t));
+ offset += sizeof (ip6_address_t);
+
+ /* ip6 dst address */
+ clib_memcpy (to_b->data + offset, &e->key.dst_address,
+ sizeof (ip6_address_t));
+ offset += sizeof (ip6_address_t);
+
+ /* Protocol */
+ to_b->data[offset++] = e->key.protocol;
+
+ /* octetDeltaCount */
+ u64 octetdelta = clib_host_to_net_u64 (e->octetcount);
+ clib_memcpy (to_b->data + offset, &octetdelta, sizeof (u64));
+ offset += sizeof (u64);
+
+ return offset - start;
+}
+
+static inline u32
+flowprobe_l3_ip4_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
+{
+ u16 start = offset;
+
+ /* ip4 src address */
+ clib_memcpy (to_b->data + offset, &e->key.src_address.ip4,
+ sizeof (ip4_address_t));
+ offset += sizeof (ip4_address_t);
+
+ /* ip4 dst address */
+ clib_memcpy (to_b->data + offset, &e->key.dst_address.ip4,
+ sizeof (ip4_address_t));
+ offset += sizeof (ip4_address_t);
+
+ /* Protocol */
+ to_b->data[offset++] = e->key.protocol;
+
+ /* octetDeltaCount */
+ u64 octetdelta = clib_host_to_net_u64 (e->octetcount);
+ clib_memcpy (to_b->data + offset, &octetdelta, sizeof (u64));
+ offset += sizeof (u64);
+
+ return offset - start;
+}
+
+static inline u32
+flowprobe_l4_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
+{
+ u16 start = offset;
+
+ /* src port */
+ clib_memcpy (to_b->data + offset, &e->key.src_port, 2);
+ offset += 2;
+
+ /* dst port */
+ clib_memcpy (to_b->data + offset, &e->key.dst_port, 2);
+ offset += 2;
+
+ /* tcp control bits */
+ u16 control_bits = htons (e->prot.tcp.flags);
+ clib_memcpy (to_b->data + offset, &control_bits, 2);
+ offset += 2;
+
+ return offset - start;
+}
+
+static inline u32
+flowprobe_hash (flowprobe_key_t * k)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ u32 h = 0;
+
+#ifdef clib_crc32c_uses_intrinsics
+ h = clib_crc32c ((u8 *) k, sizeof (*k));
+#else
+ int i;
+ u64 tmp = 0;
+ for (i = 0; i < sizeof (*k) / 8; i++)
+ tmp ^= ((u64 *) k)[i];
+
+ h = clib_xxhash (tmp);
+#endif
+
+ return h >> (32 - fm->ht_log2len);
+}
+
+flowprobe_entry_t *
+flowprobe_lookup (u32 my_cpu_number, flowprobe_key_t * k, u32 * poolindex,
+ bool * collision)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ flowprobe_entry_t *e;
+ u32 h;
+
+ h = (fm->active_timer) ? flowprobe_hash (k) : 0;
+
+ /* Lookup in the flow state pool */
+ *poolindex = fm->hash_per_worker[my_cpu_number][h];
+ if (*poolindex != ~0)
+ {
+ e = pool_elt_at_index (fm->pool_per_worker[my_cpu_number], *poolindex);
+ if (e)
+ {
+ /* Verify key or report collision */
+ if (memcmp (k, &e->key, sizeof (flowprobe_key_t)))
+ *collision = true;
+ return e;
+ }
+ }
+
+ return 0;
+}
+
+flowprobe_entry_t *
+flowprobe_create (u32 my_cpu_number, flowprobe_key_t * k, u32 * poolindex)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ u32 h;
+
+ flowprobe_entry_t *e;
+
+ /* Get my index */
+ h = (fm->active_timer) ? flowprobe_hash (k) : 0;
+
+ pool_get (fm->pool_per_worker[my_cpu_number], e);
+ *poolindex = e - fm->pool_per_worker[my_cpu_number];
+ fm->hash_per_worker[my_cpu_number][h] = *poolindex;
+
+ e->key = *k;
+
+ if (fm->passive_timer > 0)
+ {
+ e->passive_timer_handle = tw_timer_start_2t_1w_2048sl
+ (fm->timers_per_worker[my_cpu_number], *poolindex, 0,
+ fm->passive_timer);
+ }
+ return e;
+}
+
+static inline void
+add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
+ flowprobe_main_t * fm, vlib_buffer_t * b,
+ timestamp_nsec_t timestamp, u16 length,
+ flowprobe_variant_t which, flowprobe_trace_t * t)
+{
+ if (fm->disabled)
+ return;
+
+ u32 my_cpu_number = vm->thread_index;
+ u16 octets = 0;
+
+ flowprobe_record_t flags = fm->context[which].flags;
+ bool collect_ip4 = false, collect_ip6 = false;
+ ASSERT (b);
+ ethernet_header_t *eth = vlib_buffer_get_current (b);
+ u16 ethertype = clib_net_to_host_u16 (eth->type);
+ /* *INDENT-OFF* */
+ flowprobe_key_t k = {};
+ /* *INDENT-ON* */
+ ip4_header_t *ip4 = 0;
+ ip6_header_t *ip6 = 0;
+ udp_header_t *udp = 0;
+ tcp_header_t *tcp = 0;
+ u8 tcp_flags = 0;
+
+ if (flags & FLOW_RECORD_L3 || flags & FLOW_RECORD_L4)
+ {
+ collect_ip4 = which == FLOW_VARIANT_L2_IP4 || which == FLOW_VARIANT_IP4;
+ collect_ip6 = which == FLOW_VARIANT_L2_IP6 || which == FLOW_VARIANT_IP6;
+ }
+
+ k.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ k.tx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+
+ k.which = which;
+
+ if (flags & FLOW_RECORD_L2)
+ {
+ clib_memcpy (k.src_mac, eth->src_address, 6);
+ clib_memcpy (k.dst_mac, eth->dst_address, 6);
+ k.ethertype = ethertype;
+ }
+ if (collect_ip6 && ethertype == ETHERNET_TYPE_IP6)
+ {
+ ip6 = (ip6_header_t *) (eth + 1);
+ if (flags & FLOW_RECORD_L3)
+ {
+ k.src_address.as_u64[0] = ip6->src_address.as_u64[0];
+ k.src_address.as_u64[1] = ip6->src_address.as_u64[1];
+ k.dst_address.as_u64[0] = ip6->dst_address.as_u64[0];
+ k.dst_address.as_u64[1] = ip6->dst_address.as_u64[1];
+ }
+ k.protocol = ip6->protocol;
+ if (k.protocol == IP_PROTOCOL_UDP)
+ udp = (udp_header_t *) (ip6 + 1);
+ else if (k.protocol == IP_PROTOCOL_TCP)
+ tcp = (tcp_header_t *) (ip6 + 1);
+
+ octets = clib_net_to_host_u16 (ip6->payload_length)
+ + sizeof (ip6_header_t);
+ }
+ if (collect_ip4 && ethertype == ETHERNET_TYPE_IP4)
+ {
+ ip4 = (ip4_header_t *) (eth + 1);
+ if (flags & FLOW_RECORD_L3)
+ {
+ k.src_address.ip4.as_u32 = ip4->src_address.as_u32;
+ k.dst_address.ip4.as_u32 = ip4->dst_address.as_u32;
+ }
+ k.protocol = ip4->protocol;
+ if ((flags & FLOW_RECORD_L4) && k.protocol == IP_PROTOCOL_UDP)
+ udp = (udp_header_t *) (ip4 + 1);
+ else if ((flags & FLOW_RECORD_L4) && k.protocol == IP_PROTOCOL_TCP)
+ tcp = (tcp_header_t *) (ip4 + 1);
+
+ octets = clib_net_to_host_u16 (ip4->length);
+ }
+
+ if (udp)
+ {
+ k.src_port = udp->src_port;
+ k.dst_port = udp->dst_port;
+ }
+ else if (tcp)
+ {
+ k.src_port = tcp->src_port;
+ k.dst_port = tcp->dst_port;
+ tcp_flags = tcp->flags;
+ }
+
+ if (t)
+ {
+ t->rx_sw_if_index = k.rx_sw_if_index;
+ t->tx_sw_if_index = k.tx_sw_if_index;
+ clib_memcpy (t->src_mac, k.src_mac, 6);
+ clib_memcpy (t->dst_mac, k.dst_mac, 6);
+ t->ethertype = k.ethertype;
+ t->src_address.ip4.as_u32 = k.src_address.ip4.as_u32;
+ t->dst_address.ip4.as_u32 = k.dst_address.ip4.as_u32;
+ t->protocol = k.protocol;
+ t->src_port = k.src_port;
+ t->dst_port = k.dst_port;
+ t->which = k.which;
+ }
+
+ flowprobe_entry_t *e = 0;
+ f64 now = vlib_time_now (vm);
+ if (fm->active_timer > 0)
+ {
+ u32 poolindex = ~0;
+ bool collision = false;
+
+ e = flowprobe_lookup (my_cpu_number, &k, &poolindex, &collision);
+ if (collision)
+ {
+ /* Flush data and clean up entry for reuse. */
+ if (e->packetcount)
+ flowprobe_export_entry (vm, e);
+ e->key = k;
+ e->flow_start = timestamp;
+ vlib_node_increment_counter (vm, node->node_index,
+ FLOWPROBE_ERROR_COLLISION, 1);
+ }
+ if (!e) /* Create new entry */
+ {
+ e = flowprobe_create (my_cpu_number, &k, &poolindex);
+ e->last_exported = now;
+ e->flow_start = timestamp;
+ }
+ }
+ else
+ {
+ e = &fm->stateless_entry[my_cpu_number];
+ e->key = k;
+ }
+
+ if (e)
+ {
+ /* Updating entry */
+ e->packetcount++;
+ e->octetcount += octets;
+ e->last_updated = now;
+ e->flow_end = timestamp;
+ e->prot.tcp.flags |= tcp_flags;
+ if (fm->active_timer == 0
+ || (now > e->last_exported + fm->active_timer))
+ flowprobe_export_entry (vm, e);
+ }
+}
+
+static u16
+flowprobe_get_headersize (void)
+{
+ return sizeof (ip4_header_t) + sizeof (udp_header_t) +
+ sizeof (ipfix_message_header_t) + sizeof (ipfix_set_header_t);
+}
+
+static void
+flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
+ flowprobe_variant_t which)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_set_header_t *s;
+ ipfix_message_header_t *h;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ flowprobe_record_t flags = fm->context[which].flags;
+ u32 my_cpu_number = vm->thread_index;
+
+ /* Fill in header */
+ flow_report_stream_t *stream;
+
+ /* Nothing to send */
+ if (fm->context[which].next_record_offset_per_worker[my_cpu_number] <=
+ flowprobe_get_headersize ())
+ return;
+
+ u32 i, index = vec_len (frm->streams);
+ for (i = 0; i < index; i++)
+ if (frm->streams[i].domain_id == 1)
+ {
+ index = i;
+ break;
+ }
+ if (i == vec_len (frm->streams))
+ {
+ vec_validate (frm->streams, index);
+ frm->streams[index].domain_id = 1;
+ }
+ stream = &frm->streams[index];
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->flags_and_fragment_offset = 0;
+ ip->src_address.as_u32 = frm->src_address.as_u32;
+ ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
+ udp->checksum = 0;
+
+ /* FIXUP: message header export_time */
+ h->export_time = (u32)
+ (((f64) frm->unix_time_0) +
+ (vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
+ h->export_time = clib_host_to_net_u32 (h->export_time);
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = stream->sequence_number++;
+ h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
+
+ s->set_id_length = ipfix_set_id_length (fm->template_reports[flags],
+ b0->current_length -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length = version_length (b0->current_length -
+ (sizeof (*ip) + sizeof (*udp)));
+
+ ip->length = clib_host_to_net_u16 (b0->current_length);
+
+ ip->checksum = ip4_header_checksum (ip);
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ /* RFC 7011 section 10.3.2. */
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ /* Find or allocate a frame */
+ f = fm->context[which].frames_per_worker[my_cpu_number];
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ fm->context[which].frames_per_worker[my_cpu_number] = f;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+
+ /* Enqueue the buffer */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+ vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ FLOWPROBE_ERROR_EXPORTED_PACKETS, 1);
+
+ fm->context[which].frames_per_worker[my_cpu_number] = 0;
+ fm->context[which].buffers_per_worker[my_cpu_number] = 0;
+ fm->context[which].next_record_offset_per_worker[my_cpu_number] =
+ flowprobe_get_headersize ();
+}
+
+static vlib_buffer_t *
+flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_buffer_t *b0;
+ u32 bi0;
+ vlib_buffer_free_list_t *fl;
+ u32 my_cpu_number = vm->thread_index;
+
+ /* Find or allocate a buffer */
+ b0 = fm->context[which].buffers_per_worker[my_cpu_number];
+
+ /* Need to allocate a buffer? */
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ FLOWPROBE_ERROR_BUFFER, 1);
+ return 0;
+ }
+
+ /* Initialize the buffer */
+ b0 = fm->context[which].buffers_per_worker[my_cpu_number] =
+ vlib_get_buffer (vm, bi0);
+ fl =
+ vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ b0->current_data = 0;
+ b0->current_length = flowprobe_get_headersize ();
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ fm->context[which].next_record_offset_per_worker[my_cpu_number] =
+ b0->current_length;
+ }
+
+ return b0;
+}
+
+static void
+flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
+{
+ u32 my_cpu_number = vm->thread_index;
+ flowprobe_main_t *fm = &flowprobe_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_buffer_t *b0;
+ bool collect_ip4 = false, collect_ip6 = false;
+ flowprobe_variant_t which = e->key.which;
+ flowprobe_record_t flags = fm->context[which].flags;
+ u16 offset =
+ fm->context[which].next_record_offset_per_worker[my_cpu_number];
+
+ if (offset < flowprobe_get_headersize ())
+ offset = flowprobe_get_headersize ();
+
+ b0 = flowprobe_get_buffer (vm, which);
+ /* No available buffer, what to do... */
+ if (b0 == 0)
+ return;
+
+ if (flags & FLOW_RECORD_L3)
+ {
+ collect_ip4 = which == FLOW_VARIANT_L2_IP4 || which == FLOW_VARIANT_IP4;
+ collect_ip6 = which == FLOW_VARIANT_L2_IP6 || which == FLOW_VARIANT_IP6;
+ }
+
+ offset += flowprobe_common_add (b0, e, offset);
+
+ if (flags & FLOW_RECORD_L2)
+ offset += flowprobe_l2_add (b0, e, offset);
+ if (collect_ip6)
+ offset += flowprobe_l3_ip6_add (b0, e, offset);
+ if (collect_ip4)
+ offset += flowprobe_l3_ip4_add (b0, e, offset);
+ if (flags & FLOW_RECORD_L4)
+ offset += flowprobe_l4_add (b0, e, offset);
+
+ /* Reset per flow-export counters */
+ e->packetcount = 0;
+ e->octetcount = 0;
+ e->last_exported = vlib_time_now (vm);
+
+ b0->current_length = offset;
+
+ fm->context[which].next_record_offset_per_worker[my_cpu_number] = offset;
+ /* Time to flush the buffer? */
+ if (offset + fm->template_size[flags] > frm->path_mtu)
+ flowprobe_export_send (vm, b0, which);
+}
+
+uword
+flowprobe_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame,
+ flowprobe_variant_t which)
+{
+ u32 n_left_from, *from, *to_next;
+ flowprobe_next_t next_index;
+ flowprobe_main_t *fm = &flowprobe_main;
+ timestamp_nsec_t timestamp;
+
+ unix_time_now_nsec_fraction (&timestamp.sec, &timestamp.nsec);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = FLOWPROBE_NEXT_DROP;
+ u32 next1 = FLOWPROBE_NEXT_DROP;
+ u16 len0, len1;
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_TX],
+ &next0, b0);
+ vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_TX],
+ &next1, b1);
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ ethernet_header_t *eh0 = vlib_buffer_get_current (b0);
+ u16 ethertype0 = clib_net_to_host_u16 (eh0->type);
+
+ if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
+ add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant
+ (which, fm->context[which].flags,
+ ethertype0), 0);
+
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+ ethernet_header_t *eh1 = vlib_buffer_get_current (b1);
+ u16 ethertype1 = clib_net_to_host_u16 (eh1->type);
+
+ if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
+ add_to_flow_record_state (vm, node, fm, b1, timestamp, len1,
+ flowprobe_get_variant
+ (which, fm->context[which].flags,
+ ethertype1), 0);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = FLOWPROBE_NEXT_DROP;
+ u16 len0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_TX],
+ &next0, b0);
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ ethernet_header_t *eh0 = vlib_buffer_get_current (b0);
+ u16 ethertype0 = clib_net_to_host_u16 (eh0->type);
+
+ if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
+ {
+ flowprobe_trace_t *t = 0;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ t = vlib_add_trace (vm, node, b0, sizeof (*t));
+
+ add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant
+ (which, fm->context[which].flags,
+ ethertype0), t);
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+flowprobe_ip4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4);
+}
+
+static uword
+flowprobe_ip6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6);
+}
+
+static uword
+flowprobe_l2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2);
+}
+
+static inline void
+flush_record (flowprobe_variant_t which)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_t *b = flowprobe_get_buffer (vm, which);
+ if (b)
+ flowprobe_export_send (vm, b, which);
+}
+
+void
+flowprobe_flush_callback_ip4 (void)
+{
+ flush_record (FLOW_VARIANT_IP4);
+}
+
+void
+flowprobe_flush_callback_ip6 (void)
+{
+ flush_record (FLOW_VARIANT_IP6);
+}
+
+void
+flowprobe_flush_callback_l2 (void)
+{
+ flush_record (FLOW_VARIANT_L2);
+ flush_record (FLOW_VARIANT_L2_IP4);
+ flush_record (FLOW_VARIANT_L2_IP6);
+}
+
+
+static void
+flowprobe_delete_by_index (u32 my_cpu_number, u32 poolindex)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ flowprobe_entry_t *e;
+ u32 h;
+
+ e = pool_elt_at_index (fm->pool_per_worker[my_cpu_number], poolindex);
+
+ /* Get my index */
+ h = flowprobe_hash (&e->key);
+
+ /* Reset hash */
+ fm->hash_per_worker[my_cpu_number][h] = ~0;
+
+ pool_put_index (fm->pool_per_worker[my_cpu_number], poolindex);
+}
+
+
+/* Per worker process processing the active/passive expired entries */
+static uword
+flowprobe_walker_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ flow_report_main_t *frm = &flow_report_main;
+ flowprobe_entry_t *e;
+
+ /*
+ * $$$$ Remove this check from here and track FRM status and disable
+ * this process if required.
+ */
+ if (frm->ipfix_collector.as_u32 == 0 || frm->src_address.as_u32 == 0)
+ {
+ fm->disabled = true;
+ return 0;
+ }
+ fm->disabled = false;
+
+ u32 cpu_index = os_get_thread_index ();
+ u32 *to_be_removed = 0, *i;
+
+ /*
+ * Tick the timer when required and process the vector of expired
+ * timers
+ */
+ f64 start_time = vlib_time_now (vm);
+ u32 count = 0;
+
+ tw_timer_expire_timers_2t_1w_2048sl (fm->timers_per_worker[cpu_index],
+ start_time);
+
+ vec_foreach (i, fm->expired_passive_per_worker[cpu_index])
+ {
+ u32 exported = 0;
+ f64 now = vlib_time_now (vm);
+ if (now > start_time + 100e-6
+ || exported > FLOW_MAXIMUM_EXPORT_ENTRIES - 1)
+ break;
+
+ if (pool_is_free_index (fm->pool_per_worker[cpu_index], *i))
+ {
+ clib_warning ("Element is %d is freed already\n", *i);
+ continue;
+ }
+ else
+ e = pool_elt_at_index (fm->pool_per_worker[cpu_index], *i);
+
+ /* Check last update timestamp. If it is longer than passive time nuke
+ * entry. Otherwise restart timer with what's left
+ * Premature passive timer by more than 10%
+ */
+ if ((now - e->last_updated) < (u64) (fm->passive_timer * 0.9))
+ {
+ u64 delta = fm->passive_timer - (now - e->last_updated);
+ e->passive_timer_handle = tw_timer_start_2t_1w_2048sl
+ (fm->timers_per_worker[cpu_index], *i, 0, delta);
+ }
+ else /* Nuke entry */
+ {
+ vec_add1 (to_be_removed, *i);
+ }
+ /* If anything to report send it to the exporter */
+ if (e->packetcount && now > e->last_exported + fm->active_timer)
+ {
+ exported++;
+ flowprobe_export_entry (vm, e);
+ }
+ count++;
+ }
+ if (count)
+ vec_delete (fm->expired_passive_per_worker[cpu_index], count, 0);
+
+ vec_foreach (i, to_be_removed) flowprobe_delete_by_index (cpu_index, *i);
+ vec_free (to_be_removed);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (flowprobe_ip4_node) = {
+ .function = flowprobe_ip4_node_fn,
+ .name = "flowprobe-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_ip6_node) = {
+ .function = flowprobe_ip6_node_fn,
+ .name = "flowprobe-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_l2_node) = {
+ .function = flowprobe_l2_node_fn,
+ .name = "flowprobe-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_walker_node) = {
+ .function = flowprobe_walker_process,
+ .name = "flowprobe-walker",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gtpu.am b/src/plugins/gtpu.am
new file mode 100644
index 00000000..f4cca094
--- /dev/null
+++ b/src/plugins/gtpu.am
@@ -0,0 +1,38 @@
+# Copyright (c) 2016 Intel and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += gtpu_test_plugin.la
+vppplugins_LTLIBRARIES += gtpu_plugin.la
+
+gtpu_plugin_la_SOURCES = \
+ gtpu/gtpu_decap.c \
+ gtpu/gtpu_encap.c \
+ gtpu/gtpu.c \
+ gtpu/gtpu_api.c
+
+BUILT_SOURCES += \
+ gtpu/gtpu.api.h \
+ gtpu/gtpu.api.json
+
+API_FILES += gtpu/gtpu.api
+
+nobase_apiinclude_HEADERS += \
+ gtpu/gtpu_all_api_h.h \
+ gtpu/gtpu_msg_enum.h \
+ gtpu/gtpu.api.h
+
+gtpu_test_plugin_la_SOURCES = \
+ gtpu/gtpu_test.c \
+ gtpu/gtpu_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/gtpu/gtpu.api b/src/plugins/gtpu/gtpu.api
new file mode 100644
index 00000000..55ba0390
--- /dev/null
+++ b/src/plugins/gtpu/gtpu.api
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Set or delete an GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - src_address and dst_address is ipv6 or not
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local Tunnel Endpoint Identifier
+*/
+define gtpu_add_del_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 teid;
+};
+
+/** \brief reply for set or delete an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_add_del_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Dump GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief dump details of an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+ @param is_ipv6 - src_address and dst_address is ipv6 or not
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local Tunnel Endpoint Identifier
+*/
+define gtpu_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 teid;
+};
+
+/** \brief Interface set gtpu-bypass request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_ipv6 - if non-zero, enable ipv6-gtpu-bypass, else ipv4-gtpu-bypass
+ @param enable - if non-zero enable, else disable
+*/
+define sw_interface_set_gtpu_bypass
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 enable;
+};
+
+/** \brief Interface set gtpu-bypass response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define sw_interface_set_gtpu_bypass_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c
new file mode 100755
index 00000000..3dfb4210
--- /dev/null
+++ b/src/plugins/gtpu/gtpu.c
@@ -0,0 +1,1151 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <gtpu/gtpu.h>
+
+
+gtpu_main_t gtpu_main;
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (ip4_gtpu_bypass, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-gtpu-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_gtpu_bypass, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-gtpu-bypass",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+/* *INDENT-on* */
+
+static u8 *
+format_decap_next (u8 * s, va_list * args)
+{
+ u32 next_index = va_arg (*args, u32);
+
+ switch (next_index)
+ {
+ case GTPU_INPUT_NEXT_DROP:
+ return format (s, "drop");
+ case GTPU_INPUT_NEXT_L2_INPUT:
+ return format (s, "l2");
+ case GTPU_INPUT_NEXT_IP4_INPUT:
+ return format (s, "ip4");
+ case GTPU_INPUT_NEXT_IP6_INPUT:
+ return format (s, "ip6");
+ default:
+ return format (s, "index %d", next_index);
+ }
+ return s;
+}
+
+u8 *
+format_gtpu_tunnel (u8 * s, va_list * args)
+{
+ gtpu_tunnel_t *t = va_arg (*args, gtpu_tunnel_t *);
+ gtpu_main_t *ngm = &gtpu_main;
+
+ s = format (s, "[%d] src %U dst %U teid %d sw_if_index %d ",
+ t - ngm->tunnels,
+ format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY,
+ t->teid, t->sw_if_index);
+
+ if (ip46_address_is_multicast (&t->dst))
+ s = format (s, "mcast_sw_if_index %d ", t->mcast_sw_if_index);
+
+ s = format (s, "encap_fib_index %d fib_entry_index %d decap_next %U\n",
+ t->encap_fib_index, t->fib_entry_index,
+ format_decap_next, t->decap_next_index);
+ return s;
+}
+
+static u8 *
+format_gtpu_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "gtpu_tunnel%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+gtpu_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return /* no error */ 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (gtpu_device_class,static) = {
+ .name = "GTPU",
+ .format_device_name = format_gtpu_name,
+ .format_tx_trace = format_gtpu_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = gtpu_interface_admin_up_down,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_gtpu_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (gtpu_hw_class) =
+{
+ .name = "GTPU",
+ .format_header = format_gtpu_header_with_length,
+ .build_rewrite = default_build_rewrite,
+};
+/* *INDENT-ON* */
+
+static void
+gtpu_tunnel_restack_dpo (gtpu_tunnel_t * t)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ u32 encap_index = ip46_address_is_ip4 (&t->dst) ?
+ gtpu4_encap_node.index : gtpu6_encap_node.index;
+ fib_forward_chain_type_t forw_type = ip46_address_is_ip4 (&t->dst) ?
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+}
+
+static gtpu_tunnel_t *
+gtpu_tunnel_from_fib_node (fib_node_t * node)
+{
+ return ((gtpu_tunnel_t *) (((char *) node) -
+ STRUCT_OFFSET_OF (gtpu_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of GTPU DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+gtpu_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+ gtpu_tunnel_restack_dpo (gtpu_tunnel_from_fib_node (node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+gtpu_tunnel_fib_node_get (fib_node_index_t index)
+{
+ gtpu_tunnel_t *t;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ t = pool_elt_at_index (gtm->tunnels, index);
+
+ return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+gtpu_tunnel_last_lock_gone (fib_node_t * node)
+{
+ /*
+ * The GTPU tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT (0);
+}
+
+/*
+ * Virtual function table registered by GTPU tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t gtpu_vft = {
+ .fnv_get = gtpu_tunnel_fib_node_get,
+ .fnv_last_lock = gtpu_tunnel_last_lock_gone,
+ .fnv_back_walk = gtpu_tunnel_back_walk,
+};
+
+
+#define foreach_copy_field \
+_(teid) \
+_(mcast_sw_if_index) \
+_(encap_fib_index) \
+_(decap_next_index) \
+_(src) \
+_(dst)
+
+static void
+ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
+{
+ union
+ {
+ ip4_gtpu_header_t *h4;
+ ip6_gtpu_header_t *h6;
+ u8 *rw;
+ } r =
+ {
+ .rw = 0};
+ int len = is_ip6 ? sizeof *r.h6 : sizeof *r.h4;
+
+ vec_validate_aligned (r.rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ udp_header_t *udp;
+ gtpu_header_t *gtpu;
+ /* Fixed portion of the (outer) ip header */
+ if (!is_ip6)
+ {
+ ip4_header_t *ip = &r.h4->ip4;
+ udp = &r.h4->udp;
+ gtpu = &r.h4->gtpu;
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip4;
+ ip->dst_address = t->dst.ip4;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip->checksum = ip4_header_checksum (ip);
+ }
+ else
+ {
+ ip6_header_t *ip = &r.h6->ip6;
+ udp = &r.h6->udp;
+ gtpu = &r.h6->gtpu;
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+ ip->hop_limit = 255;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip6;
+ ip->dst_address = t->dst.ip6;
+ }
+
+ /* UDP header, randomize src port on something, maybe? */
+ udp->src_port = clib_host_to_net_u16 (2152);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_GTPU);
+
+ /* GTPU header */
+ gtpu->ver_flags = GTPU_V1_VER | GTPU_PT_GTP;
+ gtpu->type = GTPU_TYPE_GTPU;
+ gtpu->teid = clib_host_to_net_u32 (t->teid);
+
+ t->rewrite = r.rw;
+ /* Now only support 8-byte gtpu header. TBD */
+ _vec_len (t->rewrite) = sizeof (ip4_gtpu_header_t) - 4;
+
+ return;
+}
+
+static bool
+gtpu_decap_next_is_valid (gtpu_main_t * gtm, u32 is_ip6, u32 decap_next_index)
+{
+ vlib_main_t *vm = gtm->vlib_main;
+ u32 input_idx = (!is_ip6) ? gtpu4_input_node.index : gtpu6_input_node.index;
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
+
+ return decap_next_index < r->n_next_nodes;
+}
+
+static void
+hash_set_key_copy (uword ** h, void *key, uword v)
+{
+ size_t ksz = hash_header (*h)->user;
+ void *copy = clib_mem_alloc (ksz);
+ clib_memcpy (copy, key, ksz);
+ hash_set_mem (*h, copy, v);
+}
+
+static void
+hash_unset_key_free (uword ** h, void *key)
+{
+ hash_pair_t *hp = hash_get_pair_mem (*h, key);
+ ASSERT (hp);
+ key = uword_to_pointer (hp->key, void *);
+ hash_unset_mem (*h, key);
+ clib_mem_free (key);
+}
+
+static uword
+vtep_addr_ref (ip46_address_t * ip)
+{
+ uword *vtep = ip46_address_is_ip4 (ip) ?
+ hash_get (gtpu_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (gtpu_main.vtep6, &ip->ip6);
+ if (vtep)
+ return ++(*vtep);
+ ip46_address_is_ip4 (ip) ?
+ hash_set (gtpu_main.vtep4, ip->ip4.as_u32, 1) :
+ hash_set_key_copy (&gtpu_main.vtep6, &ip->ip6, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref (ip46_address_t * ip)
+{
+ uword *vtep = ip46_address_is_ip4 (ip) ?
+ hash_get (gtpu_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (gtpu_main.vtep6, &ip->ip6);
+ ASSERT (vtep);
+ if (--(*vtep) != 0)
+ return *vtep;
+ ip46_address_is_ip4 (ip) ?
+ hash_unset (gtpu_main.vtep4, ip->ip4.as_u32) :
+ hash_unset_key_free (&gtpu_main.vtep6, &ip->ip6);
+ return 0;
+}
+
+typedef CLIB_PACKED (union
+ {
+ struct
+ {
+ fib_node_index_t mfib_entry_index;
+ adj_index_t mcast_adj_index;
+ }; u64 as_u64;
+ }) mcast_shared_t;
+
+static inline mcast_shared_t
+mcast_shared_get (ip46_address_t * ip)
+{
+ ASSERT (ip46_address_is_multicast (ip));
+ uword *p = hash_get_mem (gtpu_main.mcast_shared, ip);
+ ASSERT (p);
+ return (mcast_shared_t)
+ {
+ .as_u64 = *p};
+}
+
+static inline void
+mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
+{
+ mcast_shared_t new_ep = {
+ .mcast_adj_index = ai,
+ .mfib_entry_index = mfei,
+ };
+
+ hash_set_key_copy (&gtpu_main.mcast_shared, dst, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove (ip46_address_t * dst)
+{
+ mcast_shared_t ep = mcast_shared_get (dst);
+
+ adj_unlock (ep.mcast_adj_index);
+ mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_GTPU);
+
+ hash_unset_key_free (&gtpu_main.mcast_shared, dst);
+}
+
+static inline fib_protocol_t
+fib_ip_proto (bool is_ip6)
+{
+ return (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
+}
+
+int vnet_gtpu_add_del_tunnel
+ (vnet_gtpu_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ gtpu_tunnel_t *t = 0;
+ vnet_main_t *vnm = gtm->vnet_main;
+ uword *p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ gtpu4_tunnel_key_t key4;
+ gtpu6_tunnel_key_t key6;
+ u32 is_ip6 = a->is_ip6;
+
+ if (!is_ip6)
+ {
+ key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
+ key4.teid = clib_host_to_net_u32 (a->teid);
+ p = hash_get (gtm->gtpu4_tunnel_by_key, key4.as_u64);
+ }
+ else
+ {
+ key6.src = a->dst.ip6;
+ key6.teid = clib_host_to_net_u32 (a->teid);
+ p = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6);
+ }
+
+ if (a->is_add)
+ {
+ l2input_main_t *l2im = &l2input_main;
+
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ /*if not set explicitly, default to l2 */
+ if (a->decap_next_index == ~0)
+ a->decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+ if (!gtpu_decap_next_is_valid (gtm, is_ip6, a->decap_next_index))
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ pool_get_aligned (gtm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_copy_field;
+#undef _
+
+ ip_udp_gtpu_rewrite (t, is_ip6);
+
+ /* copy the key */
+ if (is_ip6)
+ hash_set_key_copy (&gtm->gtpu6_tunnel_by_key, &key6,
+ t - gtm->tunnels);
+ else
+ hash_set (gtm->gtpu4_tunnel_by_key, key4.as_u64, t - gtm->tunnels);
+
+ vnet_hw_interface_t *hi;
+ if (vec_len (gtm->free_gtpu_tunnel_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t *im = &vnm->interface_main;
+ hw_if_index = gtm->free_gtpu_tunnel_hw_if_indices
+ [vec_len (gtm->free_gtpu_tunnel_hw_if_indices) - 1];
+ _vec_len (gtm->free_gtpu_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - gtm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock (im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
+ sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters
+ [VNET_INTERFACE_COUNTER_RX],
+ sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters
+ [VNET_INTERFACE_COUNTER_DROP],
+ sw_if_index);
+ vnet_interface_counter_unlock (im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, gtpu_device_class.index, t - gtm->tunnels,
+ gtpu_hw_class.index, t - gtm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ vec_validate_init_empty (gtm->tunnel_index_by_sw_if_index, sw_if_index,
+ ~0);
+ gtm->tunnel_index_by_sw_if_index[sw_if_index] = t - gtm->tunnels;
+
+ /* setup l2 input config with l2 feature and bd 0 to drop packet */
+ vec_validate (l2im->configs, sw_if_index);
+ l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+ l2im->configs[sw_if_index].bd_index = 0;
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ fib_node_init (&t->node, gtm->fib_node_type);
+ fib_prefix_t tun_dst_pfx;
+ u32 encap_index = !is_ip6 ?
+ gtpu4_encap_node.index : gtpu6_encap_node.index;
+ vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
+
+ fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
+ if (!ip46_address_is_multicast (&t->dst))
+ {
+ /* Unicast tunnel -
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ vtep_addr_ref (&t->src);
+ t->fib_entry_index = fib_table_entry_special_add
+ (t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ t->sibling_index = fib_entry_child_add
+ (t->fib_entry_index, gtm->fib_node_type, t - gtm->tunnels);
+ gtpu_tunnel_restack_dpo (t);
+ }
+ else
+ {
+ /* Multicast tunnel -
+ * as the same mcast group can be used for mutiple mcast tunnels
+ * with different VNIs, create the output fib adjecency only if
+ * it does not already exist
+ */
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+
+ if (vtep_addr_ref (&t->dst) == 1)
+ {
+ fib_node_index_t mfei;
+ adj_index_t ai;
+ fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo (fp),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ const mfib_prefix_t mpfx = {
+ .fp_proto = fp,
+ .fp_len = (is_ip6 ? 128 : 32),
+ .fp_grp_addr = tun_dst_pfx.fp_addr,
+ };
+
+ /*
+ * Setup the (*,G) to receive traffic on the mcast group
+ * - the forwarding interface is for-us
+ * - the accepting interface is that from the API
+ */
+ mfib_table_entry_path_update (t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_GTPU,
+ &path, MFIB_ITF_FLAG_FORWARD);
+
+ path.frp_sw_if_index = a->mcast_sw_if_index;
+ path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ mfei = mfib_table_entry_path_update (t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_GTPU,
+ &path,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ /*
+ * Create the mcast adjacency to send traffic to the group
+ */
+ ai = adj_mcast_add_or_lock (fp,
+ fib_proto_to_link (fp),
+ a->mcast_sw_if_index);
+
+ /*
+ * create a new end-point
+ */
+ mcast_shared_add (&t->dst, mfei, ai);
+ }
+
+ dpo_id_t dpo = DPO_INVALID;
+ mcast_shared_t ep = mcast_shared_get (&t->dst);
+
+ /* Stack shared mcast dst mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY_MCAST,
+ fib_proto_to_dpo (fp), ep.mcast_adj_index);
+
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
+ }
+
+ /* Set gtpu tunnel output node */
+ hi->output_node_index = encap_index;
+
+ vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
+ flood_class;
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (gtm->tunnels, p[0]);
+ sw_if_index = t->sw_if_index;
+
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
+
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode (gtm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0,
+ 0);
+ vec_add1 (gtm->free_gtpu_tunnel_hw_if_indices, t->hw_if_index);
+
+ gtm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+
+ if (!is_ip6)
+ hash_unset (gtm->gtpu4_tunnel_by_key, key4.as_u64);
+ else
+ hash_unset_key_free (&gtm->gtpu6_tunnel_by_key, &key6);
+
+ if (!ip46_address_is_multicast (&t->dst))
+ {
+ vtep_addr_unref (&t->src);
+ fib_entry_child_remove (t->fib_entry_index, t->sibling_index);
+ fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR);
+ }
+ else if (vtep_addr_unref (&t->dst) == 0)
+ {
+ mcast_shared_remove (&t->dst);
+ }
+
+ fib_node_deinit (&t->node);
+ vec_free (t->rewrite);
+ pool_put (gtm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static uword
+get_decap_next_for_node (u32 node_index, u32 ipv4_set)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ vlib_main_t *vm = gtm->vlib_main;
+ uword input_node = (ipv4_set) ? gtpu4_input_node.index :
+ gtpu6_input_node.index;
+
+ return vlib_node_add_next (vm, input_node, node_index);
+}
+
+static uword
+unformat_decap_next (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 ipv4_set = va_arg (*args, int);
+ gtpu_main_t *gtm = &gtpu_main;
+ vlib_main_t *vm = gtm->vlib_main;
+ u32 node_index;
+ u32 tmp;
+
+ if (unformat (input, "l2"))
+ *result = GTPU_INPUT_NEXT_L2_INPUT;
+ else if (unformat (input, "ip4"))
+ *result = GTPU_INPUT_NEXT_IP4_INPUT;
+ else if (unformat (input, "ip6"))
+ *result = GTPU_INPUT_NEXT_IP6_INPUT;
+ else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
+ *result = get_decap_next_for_node (node_index, ipv4_set);
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+
+ return 1;
+}
+
+static clib_error_t *
+gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t src, dst;
+ u8 is_add = 1;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 encap_fib_index = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+ u32 teid = 0;
+ u32 tmp;
+ int rv;
+ vnet_gtpu_add_del_tunnel_args_t _a, *a = &_a;
+ u32 tunnel_sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&src, 0, sizeof src);
+ memset (&dst, 0, sizeof dst);
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "src %U",
+ unformat_ip4_address, &src.ip4))
+ {
+ src_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst %U",
+ unformat_ip4_address, &dst.ip4))
+ {
+ dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "src %U",
+ unformat_ip6_address, &src.ip6))
+ {
+ src_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "dst %U",
+ unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &dst.ip4,
+ unformat_vnet_sw_interface,
+ vnet_get_main (), &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &dst.ip6,
+ unformat_vnet_sw_interface,
+ vnet_get_main (), &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp);
+ if (encap_fib_index == ~0)
+ {
+ error =
+ clib_error_return (0, "nonexistent encap-vrf-id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index, ipv4_set))
+ ;
+ else if (unformat (line_input, "teid %d", &teid))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ error = clib_error_return (0, "tunnel src address not specified");
+ goto done;
+ }
+
+ if (dst_set == 0)
+ {
+ error = clib_error_return (0, "tunnel dst address not specified");
+ goto done;
+ }
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ {
+ error = clib_error_return (0, "tunnel group address not multicast");
+ goto done;
+ }
+
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ {
+ error = clib_error_return (0, "dst address must be unicast");
+ goto done;
+ }
+
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "tunnel nonexistent multicast device");
+ goto done;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ goto done;
+ }
+
+ if (ip46_address_cmp (&src, &dst) == 0)
+ {
+ error = clib_error_return (0, "src and dst addresses are identical");
+ goto done;
+ }
+
+ if (decap_next_index == ~0)
+ {
+ error = clib_error_return (0, "next node not found");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+ a->is_ip6 = ipv6_set;
+
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ rv = vnet_gtpu_add_del_tunnel (a, &tunnel_sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), tunnel_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "vnet_gtpu_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a GTPU Tunnel.
+ *
+ * GTPU provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using GTPU tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a GTPU
+ * (Virtual eXtensible VLAN) segment.
+ *
+ * @cliexpar
+ * Example of how to create a GTPU Tunnel:
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 encap-vrf-id 7}
+ * Example of how to delete a GTPU Tunnel:
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 del}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_gtpu_tunnel_command, static) = {
+ .path = "create gtpu tunnel",
+ .short_help =
+ "create gtpu tunnel src <local-vtep-addr>"
+ " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} teid <nn>"
+ " [encap-vrf-id <nn>] [decap-next [l2|ip4|ip6|node <name>]] [del]",
+ .function = gtpu_add_del_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_gtpu_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ gtpu_tunnel_t *t;
+
+ if (pool_elts (gtm->tunnels) == 0)
+ vlib_cli_output (vm, "No gtpu tunnels configured...");
+
+ pool_foreach (t, gtm->tunnels, (
+ {
+ vlib_cli_output (vm, "%U",
+ format_gtpu_tunnel, t);
+ }
+ ));
+
+ return 0;
+}
+
+/*?
+ * Display all the GTPU Tunnel entries.
+ *
+ * @cliexpar
+ * Example of how to display the GTPU Tunnel entries:
+ * @cliexstart{show gtpu tunnel}
+ * [0] src 10.0.3.1 dst 10.0.3.3 teid 13 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_gtpu_tunnel_command, static) = {
+ .path = "show gtpu tunnel",
+ .short_help = "show gtpu tunnel",
+ .function = show_gtpu_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
+{
+ if (is_ip6)
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-gtpu-bypass",
+ sw_if_index, is_enable, 0, 0);
+ else
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-gtpu-bypass",
+ sw_if_index, is_enable, 0, 0);
+}
+
+static clib_error_t *
+set_ip_gtpu_bypass (u32 is_ip6,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_gtpu_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_gtpu_bypass (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return set_ip_gtpu_bypass (0, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip4-gtpu-bypass' graph node for a given interface.
+ * By adding the IPv4 gtpu-bypass graph node to an interface, the node checks
+ * for and validate input gtpu packet and bypass ip4-lookup, ip4-local,
+ * ip4-udp-lookup nodes to speedup gtpu packet forwarding. This node will
+ * cause extra overhead to for non-gtpu packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip4-gtpu-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-gtpu-bypass}
+ * Name Next Previous
+ * ip4-gtpu-bypass error-drop [0]
+ * gtpu4-input [1]
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip4-gtpu-bypass on an interface:
+ * @cliexcmd{set interface ip gtpu-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip4-gtpu-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-gtpu-bypass}
+ * Name Next Previous
+ * ip4-gtpu-bypass error-drop [0] ip4-input
+ * gtpu4-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv4 unicast:
+ * ip4-gtpu-bypass
+ * ip4-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip4-gtpu-bypass on an interface:
+ * @cliexcmd{set interface ip gtpu-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_gtpu_bypass_command, static) = {
+ .path = "set interface ip gtpu-bypass",
+ .function = set_ip4_gtpu_bypass,
+ .short_help = "set interface ip gtpu-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_gtpu_bypass (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return set_ip_gtpu_bypass (1, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip6-gtpu-bypass' graph node for a given interface.
+ * By adding the IPv6 gtpu-bypass graph node to an interface, the node checks
+ * for and validate input gtpu packet and bypass ip6-lookup, ip6-local,
+ * ip6-udp-lookup nodes to speedup gtpu packet forwarding. This node will
+ * cause extra overhead to for non-gtpu packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip6-gtpu-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-gtpu-bypass}
+ * Name Next Previous
+ * ip6-gtpu-bypass error-drop [0]
+ * gtpu6-input [1]
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip6-gtpu-bypass on an interface:
+ * @cliexcmd{set interface ip6 gtpu-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip6-gtpu-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-gtpu-bypass}
+ * Name Next Previous
+ * ip6-gtpu-bypass error-drop [0] ip6-input
+ * gtpu6-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv6 unicast:
+ * ip6-gtpu-bypass
+ * ip6-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip6-gtpu-bypass on an interface:
+ * @cliexcmd{set interface ip6 gtpu-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_gtpu_bypass_command, static) = {
+ .path = "set interface ip6 gtpu-bypass",
+ .function = set_ip6_gtpu_bypass,
+ .short_help = "set interface ip gtpu-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+gtpu_init (vlib_main_t * vm)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+
+ gtm->vnet_main = vnet_get_main ();
+ gtm->vlib_main = vm;
+
+ /* initialize the ip6 hash */
+ gtm->gtpu6_tunnel_by_key = hash_create_mem (0,
+ sizeof (gtpu6_tunnel_key_t),
+ sizeof (uword));
+ gtm->vtep6 = hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword));
+ gtm->mcast_shared = hash_create_mem (0,
+ sizeof (ip46_address_t),
+ sizeof (mcast_shared_t));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_GTPU,
+ gtpu4_input_node.index, /* is_ip4 */ 1);
+ udp_register_dst_port (vm, UDP_DST_PORT_GTPU6,
+ gtpu6_input_node.index, /* is_ip4 */ 0);
+
+ gtm->fib_node_type = fib_node_register_new_type (&gtpu_vft);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (gtpu_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "GTPv1-U",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gtpu/gtpu.h b/src/plugins/gtpu/gtpu.h
new file mode 100644
index 00000000..744d21d4
--- /dev/null
+++ b/src/plugins/gtpu/gtpu.h
@@ -0,0 +1,264 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vnet_gtpu_h
+#define included_vnet_gtpu_h
+
+#include <vppinfra/lock.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_table.h>
+
+/**
+ * Bits
+ * Octets 8 7 6 5 4 3 2 1
+ * 1 Version PT (*) E S PN
+ * 2 Message Type
+ * 3 Length (1st Octet)
+ * 4 Length (2nd Octet)
+ * 5 Tunnel Endpoint Identifier (1st Octet)
+ * 6 Tunnel Endpoint Identifier (2nd Octet)
+ * 7 Tunnel Endpoint Identifier (3rd Octet)
+ * 8 Tunnel Endpoint Identifier (4th Octet)
+ * 9 Sequence Number (1st Octet)1) 4)
+ * 10 Sequence Number (2nd Octet)1) 4)
+ * 11 N-PDU Number2) 4)
+ * 12 Next Extension Header Type3) 4)
+**/
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length; /* length in octets of the payload */
+ u32 teid;
+ u16 sequence;
+ u8 pdu_number;
+ u8 next_ext_type;
+} gtpu_header_t;
+
+#define GTPU_VER_MASK (7<<5)
+#define GTPU_PT_BIT (1<<4)
+#define GTPU_E_BIT (1<<2)
+#define GTPU_S_BIT (1<<1)
+#define GTPU_PN_BIT (1<<0)
+#define GTPU_E_S_PN_BIT (7<<0)
+
+#define GTPU_V1_VER (1<<5)
+
+#define GTPU_PT_GTP (1<<4)
+#define GTPU_TYPE_GTPU 255
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ gtpu_header_t gtpu; /* 8 bytes */
+}) ip4_gtpu_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+ ip6_header_t ip6; /* 40 bytes */
+ udp_header_t udp; /* 8 bytes */
+ gtpu_header_t gtpu; /* 8 bytes */
+}) ip6_gtpu_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED
+(struct {
+ /*
+ * Key fields: ip src and gtpu teid on incoming gtpu packet
+ * all fields in NET byte order
+ */
+ union {
+ struct {
+ u32 src;
+ u32 teid;
+ };
+ u64 as_u64;
+ };
+}) gtpu4_tunnel_key_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED
+(struct {
+ /*
+ * Key fields: ip src and gtpu teid on incoming gtpu packet
+ * all fields in NET byte order
+ */
+ ip6_address_t src;
+ u32 teid;
+}) gtpu6_tunnel_key_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* Rewrite string */
+ u8 *rewrite;
+
+ /* FIB DPO for IP forwarding of gtpu encap packet */
+ dpo_id_t next_dpo;
+
+ /* gtpu teid in HOST byte order */
+ u32 teid;
+
+ /* tunnel src and dst addresses */
+ ip46_address_t src;
+ ip46_address_t dst;
+
+ /* mcast packet output intf index (used only if dst is mcast) */
+ u32 mcast_sw_if_index;
+
+ /* decap next index */
+ u32 decap_next_index;
+
+ /* The FIB index for src/dst addresses */
+ u32 encap_fib_index;
+
+ /* vnet intfc index */
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /*
+ * The FIB entry for (depending on gtpu tunnel is unicast or mcast)
+ * sending unicast gtpu encap packets or receiving mcast gtpu packets
+ */
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its destination. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+} gtpu_tunnel_t;
+
+#define foreach_gtpu_input_next \
+_(DROP, "error-drop") \
+_(L2_INPUT, "l2-input") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input" )
+
+typedef enum
+{
+#define _(s,n) GTPU_INPUT_NEXT_##s,
+ foreach_gtpu_input_next
+#undef _
+ GTPU_INPUT_N_NEXT,
+} gtpu_input_next_t;
+
+typedef enum
+{
+#define gtpu_error(n,s) GTPU_ERROR_##n,
+#include <gtpu/gtpu_error.def>
+#undef gtpu_error
+ GTPU_N_ERROR,
+} gtpu_input_error_t;
+
+typedef struct
+{
+ /* vector of encap tunnel instances */
+ gtpu_tunnel_t *tunnels;
+
+ /* lookup tunnel by key */
+ uword *gtpu4_tunnel_by_key; /* keyed on ipv4.dst + teid */
+ uword *gtpu6_tunnel_by_key; /* keyed on ipv6.dst + teid */
+
+ /* local VTEP IPs ref count used by gtpu-bypass node to check if
+ received gtpu packet DIP matches any local VTEP address */
+ uword *vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
+ uword *vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+
+ /* mcast shared info */
+ uword *mcast_shared; /* keyed on mcast ip46 addr */
+
+ /* Free vlib hw_if_indices */
+ u32 *free_gtpu_tunnel_hw_if_indices;
+
+ /* Mapping from sw_if_index to tunnel index */
+ u32 *tunnel_index_by_sw_if_index;
+
+ /**
+ * Node type for registering to fib changes.
+ */
+ fib_node_type_t fib_node_type;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} gtpu_main_t;
+
+gtpu_main_t gtpu_main;
+
+extern vlib_node_registration_t gtpu4_input_node;
+extern vlib_node_registration_t gtpu6_input_node;
+extern vlib_node_registration_t gtpu4_encap_node;
+extern vlib_node_registration_t gtpu6_encap_node;
+
+u8 *format_gtpu_encap_trace (u8 * s, va_list * args);
+
+typedef struct
+{
+ u8 is_add;
+ u8 is_ip6;
+ ip46_address_t src, dst;
+ u32 mcast_sw_if_index;
+ u32 encap_fib_index;
+ u32 decap_next_index;
+ u32 teid;
+} vnet_gtpu_add_del_tunnel_args_t;
+
+int vnet_gtpu_add_del_tunnel
+ (vnet_gtpu_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
+
+void vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
+#endif /* included_vnet_gtpu_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gtpu/gtpu_all_api_h.h b/src/plugins/gtpu/gtpu_all_api_h.h
new file mode 100644
index 00000000..dbfe0397
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_all_api_h.h
@@ -0,0 +1,18 @@
+/*
+ * gtpu_all_api_h.h - plug-in api #include file
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <gtpu/gtpu.api.h>
diff --git a/src/plugins/gtpu/gtpu_api.c b/src/plugins/gtpu/gtpu_api.c
new file mode 100644
index 00000000..49a5053d
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_api.c
@@ -0,0 +1,256 @@
+/*
+ *------------------------------------------------------------------
+ * gtpu_api.c - gtpu api
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vppinfra/byte_order.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <gtpu/gtpu.h>
+
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <gtpu/gtpu.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <gtpu/gtpu.api.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <gtpu/gtpu.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <gtpu/gtpu.api.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <gtpu/gtpu.api.h>
+#undef vl_api_version
+
+#define vl_msg_name_crc_list
+#include <gtpu/gtpu.api.h>
+#undef vl_msg_name_crc_list
+
+#define REPLY_MSG_ID_BASE gtm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+setup_message_id_table (gtpu_main_t * gtm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + gtm->msg_id_base);
+ foreach_vl_msg_name_crc_gtpu;
+#undef _
+}
+
+#define foreach_gtpu_plugin_api_msg \
+_(SW_INTERFACE_SET_GTPU_BYPASS, sw_interface_set_gtpu_bypass) \
+_(GTPU_ADD_DEL_TUNNEL, gtpu_add_del_tunnel) \
+_(GTPU_TUNNEL_DUMP, gtpu_tunnel_dump)
+
+static void
+ vl_api_sw_interface_set_gtpu_bypass_t_handler
+ (vl_api_sw_interface_set_gtpu_bypass_t * mp)
+{
+ vl_api_sw_interface_set_gtpu_bypass_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ gtpu_main_t *gtm = &gtpu_main;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_int_gtpu_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_GTPU_BYPASS_REPLY);
+}
+
+static void vl_api_gtpu_add_del_tunnel_t_handler
+ (vl_api_gtpu_add_del_tunnel_t * mp)
+{
+ vl_api_gtpu_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ ip4_main_t *im = &ip4_main;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ uword *p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ vnet_gtpu_add_del_tunnel_args_t a = {
+ .is_add = mp->is_add,
+ .is_ip6 = mp->is_ipv6,
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .encap_fib_index = p[0],
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .teid = ntohl (mp->teid),
+ .dst = to_ip46 (mp->is_ipv6, mp->dst_address),
+ .src = to_ip46 (mp->is_ipv6, mp->src_address),
+ };
+
+ /* Check src & dst are different */
+ if (ip46_address_cmp (&a.dst, &a.src) == 0)
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_gtpu_add_del_tunnel (&a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GTPU_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_gtpu_tunnel_details
+ (gtpu_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_gtpu_tunnel_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GTPU_TUNNEL_DETAILS);
+ if (is_ipv6)
+ {
+ memcpy (rmp->src_address, t->src.ip6.as_u8, 16);
+ memcpy (rmp->dst_address, t->dst.ip6.as_u8, 16);
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+ }
+ else
+ {
+ memcpy (rmp->src_address, t->src.ip4.as_u8, 4);
+ memcpy (rmp->dst_address, t->dst.ip4.as_u8, 4);
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ }
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->teid = htonl (t->teid);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->is_ipv6 = is_ipv6;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_gtpu_tunnel_dump_t_handler (vl_api_gtpu_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ gtpu_main_t *gtm = &gtpu_main;
+ gtpu_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, gtm->tunnels,
+ ({
+ send_gtpu_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (gtm->tunnel_index_by_sw_if_index)) ||
+ (~0 == gtm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &gtm->tunnels[gtm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_gtpu_tunnel_details (t, q, mp->context);
+ }
+}
+
+
+static clib_error_t *
+gtpu_api_hookup (vlib_main_t * vm)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+
+ u8 *name = format (0, "gtpu_%08x%c", api_version, 0);
+ gtm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + gtm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_gtpu_plugin_api_msg;
+#undef _
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (gtm, &api_main);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (gtpu_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c
new file mode 100644
index 00000000..de235889
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_decap.c
@@ -0,0 +1,1305 @@
+/*
+ * decap.c: gtpu tunnel decap packet processing
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <gtpu/gtpu.h>
+
+vlib_node_registration_t gtpu4_input_node;
+vlib_node_registration_t gtpu6_input_node;
+
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+ u32 teid;
+} gtpu_rx_trace_t;
+
+static u8 * format_gtpu_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gtpu_rx_trace_t * t = va_arg (*args, gtpu_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "GTPU decap from gtpu_tunnel%d teid %d next %d error %d",
+ t->tunnel_index, t->teid, t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "GTPU decap error - tunnel for teid %d does not exist",
+ t->teid);
+ }
+ return s;
+}
+
+always_inline u32
+validate_gtpu_fib (vlib_buffer_t *b, gtpu_tunnel_t *t, u32 is_ip4)
+{
+ u32 fib_index, sw_if_index;
+
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+
+ if (is_ip4)
+ fib_index = (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index, sw_if_index) :
+ vnet_buffer (b)->sw_if_index[VLIB_TX];
+ else
+ fib_index = (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index, sw_if_index) :
+ vnet_buffer (b)->sw_if_index[VLIB_TX];
+
+ return (fib_index == t->encap_fib_index);
+}
+
+always_inline uword
+gtpu_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u32 is_ip4)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ gtpu_main_t * gtm = &gtpu_main;
+ vnet_main_t * vnm = gtm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 last_tunnel_index = ~0;
+ gtpu4_tunnel_key_t last_key4;
+ gtpu6_tunnel_key_t last_key6;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ if (is_ip4)
+ last_key4.as_u64 = ~0;
+ else
+ memset (&last_key6, 0xff, sizeof (last_key6));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ ip4_header_t * ip4_0, * ip4_1;
+ ip6_header_t * ip6_0, * ip6_1;
+ gtpu_header_t * gtpu0, * gtpu1;
+ u32 gtpu_hdr_len0 = 0, gtpu_hdr_len1 =0 ;
+ uword * p0, * p1;
+ u32 tunnel_index0, tunnel_index1;
+ gtpu_tunnel_t * t0, * t1, * mt0 = NULL, * mt1 = NULL;
+ gtpu4_tunnel_key_t key4_0, key4_1;
+ gtpu6_tunnel_key_t key6_0, key6_1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
+ gtpu1 = vlib_buffer_get_current (b1);
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ ip4_0 = vlib_buffer_get_current (b0);
+ ip4_1 = vlib_buffer_get_current (b1);
+ } else {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ ip6_0 = vlib_buffer_get_current (b0);
+ ip6_1 = vlib_buffer_get_current (b1);
+ }
+
+ /* pop (ip, udp, gtpu) */
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, sizeof(*ip4_0)+sizeof(udp_header_t));
+ vlib_buffer_advance
+ (b1, sizeof(*ip4_1)+sizeof(udp_header_t));
+ } else {
+ vlib_buffer_advance
+ (b0, sizeof(*ip6_0)+sizeof(udp_header_t));
+ vlib_buffer_advance
+ (b1, sizeof(*ip6_1)+sizeof(udp_header_t));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+
+ tunnel_index1 = ~0;
+ error1 = 0;
+
+ if (PREDICT_FALSE ((gtpu0->ver_flags & GTPU_VER_MASK) != GTPU_V1_VER))
+ {
+ error0 = GTPU_ERROR_BAD_VER;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Manipulate packet 0 */
+ if (is_ip4) {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
+ goto next0; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+ /* Make sure mcast GTPU tunnel exist by packet DIP and teid */
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next0; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+
+ } else /* !is_ip4 */ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ clib_memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
+ &t0->src.ip6)))
+ goto next0; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
+ {
+ key6_0.src.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next0; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ next0:
+ /* Manipulate gtpu header */
+ if (PREDICT_FALSE((gtpu0->ver_flags & GTPU_E_S_PN_BIT) != 0))
+ {
+ gtpu_hdr_len0 = sizeof(gtpu_header_t);
+
+ /* Manipulate Sequence Number and N-PDU Number */
+ /* TBD */
+
+ /* Manipulate Next Extension Header */
+ /* TBD */
+ }
+ else
+ {
+ gtpu_hdr_len0 = sizeof(gtpu_header_t) - 4;
+ }
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ next0 = t0->decap_next_index;
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next0 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->teid = clib_net_to_host_u32(gtpu0->teid);
+ }
+
+ if (PREDICT_FALSE ((gtpu1->ver_flags & GTPU_VER_MASK) != GTPU_V1_VER))
+ {
+ error1 = GTPU_ERROR_BAD_VER;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Manipulate packet 1 */
+ if (is_ip4) {
+ key4_1.src = ip4_1->src_address.as_u32;
+ key4_1.teid = gtpu1->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (key4_1.as_u64 != last_key4.as_u64))
+ {
+ p1 = hash_get (gtm->gtpu4_tunnel_by_key, key4_1.as_u64);
+ if (PREDICT_FALSE (p1 == NULL))
+ {
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ last_key4.as_u64 = key4_1.as_u64;
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_1->dst_address.as_u32 == t1->src.ip4.as_u32))
+ goto next1; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_1->dst_address)))
+ {
+ key4_1.src = ip4_1->dst_address.as_u32;
+ key4_1.teid = gtpu1->teid;
+ /* Make sure mcast GTPU tunnel exist by packet DIP and teid */
+ p1 = hash_get (gtm->gtpu4_tunnel_by_key, key4_1.as_u64);
+ if (PREDICT_TRUE (p1 != NULL))
+ {
+ mt1 = pool_elt_at_index (gtm->tunnels, p1[0]);
+ goto next1; /* valid packet */
+ }
+ }
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+
+ } else /* !is_ip4 */ {
+ key6_1.src.as_u64[0] = ip6_1->src_address.as_u64[0];
+ key6_1.src.as_u64[1] = ip6_1->src_address.as_u64[1];
+ key6_1.teid = gtpu1->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (memcmp(&key6_1, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p1 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_1);
+
+ if (PREDICT_FALSE (p1 == NULL))
+ {
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ clib_memcpy (&last_key6, &key6_1, sizeof(key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_1->dst_address,
+ &t1->src.ip6)))
+ goto next1; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_1->dst_address)))
+ {
+ key6_1.src.as_u64[0] = ip6_1->dst_address.as_u64[0];
+ key6_1.src.as_u64[1] = ip6_1->dst_address.as_u64[1];
+ key6_1.teid = gtpu1->teid;
+ p1 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_1);
+ if (PREDICT_TRUE (p1 != NULL))
+ {
+ mt1 = pool_elt_at_index (gtm->tunnels, p1[0]);
+ goto next1; /* valid packet */
+ }
+ }
+ error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ next1:
+ /* Manipulate gtpu header */
+ if (PREDICT_FALSE((gtpu1->ver_flags & GTPU_E_S_PN_BIT) != 0))
+ {
+ gtpu_hdr_len1 = sizeof(gtpu_header_t);
+
+ /* Manipulate Sequence Number and N-PDU Number */
+ /* TBD */
+
+ /* Manipulate Next Extension Header */
+ /* TBD */
+ }
+ else
+ {
+ gtpu_hdr_len1 = sizeof(gtpu_header_t) - 4;
+ }
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b1, gtpu_hdr_len1);
+
+ next1 = t1->decap_next_index;
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next1 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b1);
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ sw_if_index1 = (mt1) ? mt1->sw_if_index : sw_if_index1;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+
+ trace1:
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ tr->teid = clib_net_to_host_u32(gtpu1->teid);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_header_t * ip4_0;
+ ip6_header_t * ip6_0;
+ gtpu_header_t * gtpu0;
+ u32 gtpu_hdr_len0 = 0;
+ uword * p0;
+ u32 tunnel_index0;
+ gtpu_tunnel_t * t0, * mt0 = NULL;
+ gtpu4_tunnel_key_t key4_0;
+ gtpu6_tunnel_key_t key6_0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ ip4_0 = vlib_buffer_get_current (b0);
+ } else {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ ip6_0 = vlib_buffer_get_current (b0);
+ }
+
+ /* pop (ip, udp) */
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, sizeof(*ip4_0)+sizeof(udp_header_t));
+ } else {
+ vlib_buffer_advance
+ (b0, sizeof(*ip6_0)+sizeof(udp_header_t));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+ if (PREDICT_FALSE ((gtpu0->ver_flags & GTPU_VER_MASK) != GTPU_V1_VER))
+ {
+ error0 = GTPU_ERROR_BAD_VER;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ if (is_ip4) {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+ /* Make sure mcast GTPU tunnel exist by packet DIP and teid */
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+
+ } else /* !is_ip4 */ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
+ if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ clib_memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
+ &t0->src.ip6)))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
+ {
+ key6_0.src.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ next00:
+ /* Manipulate gtpu header */
+ if (PREDICT_FALSE((gtpu0->ver_flags & GTPU_E_S_PN_BIT) != 0))
+ {
+ gtpu_hdr_len0 = sizeof(gtpu_header_t);
+
+ /* Manipulate Sequence Number and N-PDU Number */
+ /* TBD */
+
+ /* Manipulate Next Extension Header */
+ /* TBD */
+ }
+ else
+ {
+ gtpu_hdr_len0 = sizeof(gtpu_header_t) - 4;
+ }
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ next0 = t0->decap_next_index;
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next0 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->teid = clib_net_to_host_u32(gtpu0->teid);
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ /* Do we still need this now that tunnel tx stats is kept? */
+ vlib_node_increment_counter (vm, is_ip4?
+ gtpu4_input_node.index:gtpu6_input_node.index,
+ GTPU_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+gtpu4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gtpu_input(vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+static uword
+gtpu6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gtpu_input(vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+static char * gtpu_error_strings[] = {
+#define gtpu_error(n,s) s,
+#include <gtpu/gtpu_error.def>
+#undef gtpu_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (gtpu4_input_node) = {
+ .function = gtpu4_input,
+ .name = "gtpu4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = GTPU_N_ERROR,
+ .error_strings = gtpu_error_strings,
+
+ .n_next_nodes = GTPU_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GTPU_INPUT_NEXT_##s] = n,
+ foreach_gtpu_input_next
+#undef _
+ },
+
+//temp .format_buffer = format_gtpu_header,
+ .format_trace = format_gtpu_rx_trace,
+ // $$$$ .unformat_buffer = unformat_gtpu_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gtpu4_input_node, gtpu4_input)
+
+VLIB_REGISTER_NODE (gtpu6_input_node) = {
+ .function = gtpu6_input,
+ .name = "gtpu6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = GTPU_N_ERROR,
+ .error_strings = gtpu_error_strings,
+
+ .n_next_nodes = GTPU_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GTPU_INPUT_NEXT_##s] = n,
+ foreach_gtpu_input_next
+#undef _
+ },
+
+//temp .format_buffer = format_gtpu_header,
+ .format_trace = format_gtpu_rx_trace,
+ // $$$$ .unformat_buffer = unformat_gtpu_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gtpu6_input_node, gtpu6_input)
+
+
+typedef enum {
+ IP_GTPU_BYPASS_NEXT_DROP,
+ IP_GTPU_BYPASS_NEXT_GTPU,
+ IP_GTPU_BYPASS_N_NEXT,
+} ip_vxan_bypass_next_t;
+
+always_inline uword
+ip_gtpu_bypass_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u32 is_ip4)
+{
+ gtpu_main_t * gtm = &gtpu_main;
+ u32 * from, * to_next, n_left_from, n_left_to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
+ ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ if (is_ip4) addr4.data_u32 = ~0;
+ else ip6_address_set_zero (&addr6);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ ip4_header_t * ip40, * ip41;
+ ip6_header_t * ip60, * ip61;
+ udp_header_t * udp0, * udp1;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u32 bi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, good_udp0, proto0;
+ u8 error1, good_udp1, proto1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ ip41 = vlib_buffer_get_current (b1);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ ip61 = vlib_buffer_get_current (b1);
+ }
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+ vnet_feature_next(vnet_buffer(b1)->sw_if_index[VLIB_RX], &next1, b1);
+
+ if (is_ip4)
+ {
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
+ proto1 = ip4_is_fragment(ip41) ? 0xfe : ip41->protocol;
+ }
+ else
+ {
+ proto0 = ip60->protocol;
+ proto1 = ip61->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit0; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_GTPU))
+ goto exit0; /* not GTPU packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (gtm->vtep4, ip40->dst_address.as_u32))
+ goto exit0; /* no local VTEP for GTPU packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (gtm->vtep6, &ip60->dst_address))
+ goto exit0; /* no local VTEP for GTPU packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_GTPU_BYPASS_NEXT_DROP : IP_GTPU_BYPASS_NEXT_GTPU;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* gtpu-input node expect current at GTPU header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit0:
+ /* Process packet 1 */
+ if (proto1 != IP_PROTOCOL_UDP)
+ goto exit1; /* not UDP packet */
+
+ if (is_ip4)
+ udp1 = ip4_next_header (ip41);
+ else
+ udp1 = ip6_next_header (ip61);
+
+ if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_GTPU))
+ goto exit1; /* not GTPU packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip41->dst_address.as_u32)
+ {
+ if (!hash_get (gtm->vtep4, ip41->dst_address.as_u32))
+ goto exit1; /* no local VTEP for GTPU packet */
+ addr4 = ip41->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
+ {
+ if (!hash_get_mem (gtm->vtep6, &ip61->dst_address))
+ goto exit1; /* no local VTEP for GTPU packet */
+ addr6 = ip61->dst_address;
+ }
+ }
+
+ flags1 = b1->flags;
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp1 |= udp1->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len1 = clib_net_to_host_u16 (ip41->length);
+ else
+ ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp1))
+ {
+ if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
+ else
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
+ good_udp1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next1 = error1 ?
+ IP_GTPU_BYPASS_NEXT_DROP : IP_GTPU_BYPASS_NEXT_GTPU;
+ b1->error = error1 ? error_node->errors[error1] : 0;
+
+ /* gtpu-input node expect current at GTPU header */
+ if (is_ip4)
+ vlib_buffer_advance (b1, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b1, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit1:
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ ip4_header_t * ip40;
+ ip6_header_t * ip60;
+ udp_header_t * udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, good_udp0, proto0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ if (is_ip4)
+ ip40 = vlib_buffer_get_current (b0);
+ else
+ ip60 = vlib_buffer_get_current (b0);
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+
+ if (is_ip4)
+ /* Treat IP4 frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
+ else
+ proto0 = ip60->protocol;
+
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_GTPU))
+ goto exit; /* not GTPU packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (gtm->vtep4, ip40->dst_address.as_u32))
+ goto exit; /* no local VTEP for GTPU packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (gtm->vtep6, &ip60->dst_address))
+ goto exit; /* no local VTEP for GTPU packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_GTPU_BYPASS_NEXT_DROP : IP_GTPU_BYPASS_NEXT_GTPU;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* gtpu-input node expect current at GTPU header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit:
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_gtpu_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_gtpu_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_gtpu_bypass_node) = {
+ .function = ip4_gtpu_bypass,
+ .name = "ip4-gtpu-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_GTPU_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_GTPU_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_GTPU_BYPASS_NEXT_GTPU] = "gtpu4-input",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_gtpu_bypass_node,ip4_gtpu_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip4_gtpu_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_gtpu_bypass_init);
+
+static uword
+ip6_gtpu_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_gtpu_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_gtpu_bypass_node) = {
+ .function = ip6_gtpu_bypass,
+ .name = "ip6-gtpu-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_GTPU_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_GTPU_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_GTPU_BYPASS_NEXT_GTPU] = "gtpu6-input",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_gtpu_bypass_node,ip6_gtpu_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip6_gtpu_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip6_gtpu_bypass_init);
diff --git a/src/plugins/gtpu/gtpu_encap.c b/src/plugins/gtpu/gtpu_encap.c
new file mode 100644
index 00000000..8ad53c53
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_encap.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <gtpu/gtpu.h>
+
+/* Statistics (not all errors) */
+#define foreach_gtpu_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char * gtpu_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_gtpu_encap_error
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) GTPU_ENCAP_ERROR_##sym,
+ foreach_gtpu_encap_error
+#undef _
+ GTPU_ENCAP_N_ERROR,
+} gtpu_encap_error_t;
+
+#define foreach_gtpu_encap_next \
+_(DROP, "error-drop") \
+_(IP4_LOOKUP, "ip4-lookup") \
+_(IP6_LOOKUP, "ip6-lookup")
+
+typedef enum {
+ GTPU_ENCAP_NEXT_DROP,
+ GTPU_ENCAP_NEXT_IP4_LOOKUP,
+ GTPU_ENCAP_NEXT_IP6_LOOKUP,
+ GTPU_ENCAP_N_NEXT,
+} gtpu_encap_next_t;
+
+typedef struct {
+ u32 tunnel_index;
+ u32 teid;
+} gtpu_encap_trace_t;
+
+u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gtpu_encap_trace_t * t
+ = va_arg (*args, gtpu_encap_trace_t *);
+
+ s = format (s, "GTPU encap to gtpu_tunnel%d teid %d",
+ t->tunnel_index, t->teid);
+ return s;
+}
+
+
+#define foreach_fixed_header4_offset \
+ _(0) _(1) _(2) _(3)
+
+#define foreach_fixed_header6_offset \
+ _(0) _(1) _(2) _(3) _(4) _(5) _(6)
+
+always_inline uword
+gtpu_encap_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u32 is_ip4)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ gtpu_main_t * gtm = &gtpu_main;
+ vnet_main_t * vnm = gtm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_encapsulated = 0;
+ u16 old_l0 = 0, old_l1 = 0, old_l2 = 0, old_l3 = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ u32 sw_if_index0 = 0, sw_if_index1 = 0, sw_if_index2 = 0, sw_if_index3 = 0;
+ u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
+ vnet_hw_interface_t * hi0, * hi1, * hi2, * hi3;
+ gtpu_tunnel_t * t0 = NULL, * t1 = NULL, * t2 = NULL, * t3 = NULL;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t * b0, * b1, * b2, * b3;
+ u32 flow_hash0, flow_hash1, flow_hash2, flow_hash3;
+ u32 len0, len1, len2, len3;
+ ip4_header_t * ip4_0, * ip4_1, * ip4_2, * ip4_3;
+ ip6_header_t * ip6_0, * ip6_1, * ip6_2, * ip6_3;
+ udp_header_t * udp0, * udp1, * udp2, * udp3;
+ gtpu_header_t * gtpu0, * gtpu1, * gtpu2, * gtpu3;
+ u64 * copy_src0, * copy_dst0;
+ u64 * copy_src1, * copy_dst1;
+ u64 * copy_src2, * copy_dst2;
+ u64 * copy_src3, * copy_dst3;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u32 * copy_src_last1, * copy_dst_last1;
+ u32 * copy_src_last2, * copy_dst_last2;
+ u32 * copy_src_last3, * copy_dst_last3;
+ u16 new_l0, new_l1, new_l2, new_l3;
+ ip_csum_t sum0, sum1, sum2, sum3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p4, * p5, * p6, * p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p5->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p6->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p7->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ bi2 = from[2];
+ bi3 = from[3];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ to_next[2] = bi2;
+ to_next[3] = bi3;
+ from += 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+ n_left_from -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ flow_hash0 = vnet_l2_compute_flow_hash (b0);
+ flow_hash1 = vnet_l2_compute_flow_hash (b1);
+ flow_hash2 = vnet_l2_compute_flow_hash (b2);
+ flow_hash3 = vnet_l2_compute_flow_hash (b3);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+ sw_if_index2 = vnet_buffer(b2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer(b3)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1);
+ hi2 = vnet_get_sup_hw_interface (vnm, sw_if_index2);
+ hi3 = vnet_get_sup_hw_interface (vnm, sw_if_index3);
+ t0 = &gtm->tunnels[hi0->dev_instance];
+ t1 = &gtm->tunnels[hi1->dev_instance];
+ t2 = &gtm->tunnels[hi2->dev_instance];
+ t3 = &gtm->tunnels[hi3->dev_instance];
+
+ /* Note: change to always set next0 if it may be set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
+ next1 = t1->next_dpo.dpoi_next_node;
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = t1->next_dpo.dpoi_index;
+ next2 = t2->next_dpo.dpoi_next_node;
+ vnet_buffer(b2)->ip.adj_index[VLIB_TX] = t2->next_dpo.dpoi_index;
+ next3 = t3->next_dpo.dpoi_next_node;
+ vnet_buffer(b3)->ip.adj_index[VLIB_TX] = t3->next_dpo.dpoi_index;
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
+ vlib_buffer_advance (b2, -(word)_vec_len(t2->rewrite));
+ vlib_buffer_advance (b3, -(word)_vec_len(t3->rewrite));
+
+ if (is_ip4)
+ {
+ ip4_0 = vlib_buffer_get_current(b0);
+ ip4_1 = vlib_buffer_get_current(b1);
+ ip4_2 = vlib_buffer_get_current(b2);
+ ip4_3 = vlib_buffer_get_current(b3);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip4_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip4_1;
+ copy_src1 = (u64 *) t1->rewrite;
+ copy_dst2 = (u64 *) ip4_2;
+ copy_src2 = (u64 *) t2->rewrite;
+ copy_dst3 = (u64 *) ip4_3;
+ copy_src3 = (u64 *) t3->rewrite;
+
+ /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header4_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header4_offset;
+#undef _
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ foreach_fixed_header4_offset;
+#undef _
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ foreach_fixed_header4_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+ copy_src_last0 = (u32 *)(&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ copy_dst_last1 = (u32 *)(&copy_dst1[4]);
+ copy_src_last1 = (u32 *)(&copy_src1[4]);
+ copy_dst_last1[0] = copy_src_last1[0];
+ copy_dst_last2 = (u32 *)(&copy_dst2[4]);
+ copy_src_last2 = (u32 *)(&copy_src2[4]);
+ copy_dst_last2[0] = copy_src_last2[0];
+ copy_dst_last3 = (u32 *)(&copy_dst3[4]);
+ copy_src_last3 = (u32 *)(&copy_src3[4]);
+ copy_dst_last3[0] = copy_src_last3[0];
+
+ /* Fix the IP4 checksum and length */
+ sum0 = ip4_0->checksum;
+ new_l0 = /* old_l0 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ ip4_0->length = new_l0;
+ sum1 = ip4_1->checksum;
+ new_l1 = /* old_l1 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */);
+ ip4_1->checksum = ip_csum_fold (sum1);
+ ip4_1->length = new_l1;
+ sum2 = ip4_2->checksum;
+ new_l2 = /* old_l0 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b2));
+ sum2 = ip_csum_update (sum2, old_l2, new_l2, ip4_header_t,
+ length /* changed member */);
+ ip4_2->checksum = ip_csum_fold (sum2);
+ ip4_2->length = new_l2;
+ sum3 = ip4_3->checksum;
+ new_l3 = /* old_l1 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b3));
+ sum3 = ip_csum_update (sum3, old_l3, new_l3, ip4_header_t,
+ length /* changed member */);
+ ip4_3->checksum = ip_csum_fold (sum3);
+ ip4_3->length = new_l3;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip4_0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0));
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+ udp1 = (udp_header_t *)(ip4_1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+ - sizeof (*ip4_1));
+ udp1->length = new_l1;
+ udp1->src_port = flow_hash1;
+ udp2 = (udp_header_t *)(ip4_2+1);
+ new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
+ - sizeof (*ip4_2));
+ udp2->length = new_l2;
+ udp2->src_port = flow_hash2;
+ udp3 = (udp_header_t *)(ip4_3+1);
+ new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
+ - sizeof (*ip4_3));
+ udp3->length = new_l3;
+ udp3->src_port = flow_hash3;
+
+ /* Fix GTPU length */
+ gtpu0 = (gtpu_header_t *)(udp0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0) - sizeof(*udp0));
+ gtpu0->length = new_l0;
+ gtpu1 = (gtpu_header_t *)(udp1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+ - sizeof (*ip4_1) - sizeof(*udp1));
+ gtpu1->length = new_l1;
+ gtpu2 = (gtpu_header_t *)(udp2+1);
+ new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
+ - sizeof (*ip4_2) - sizeof(*udp2));
+ gtpu2->length = new_l2;
+ gtpu3 = (gtpu_header_t *)(udp1+3);
+ new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
+ - sizeof (*ip4_3) - sizeof(*udp3));
+ gtpu3->length = new_l3;
+ }
+ else /* ipv6 */
+ {
+ int bogus = 0;
+
+ ip6_0 = vlib_buffer_get_current(b0);
+ ip6_1 = vlib_buffer_get_current(b1);
+ ip6_2 = vlib_buffer_get_current(b2);
+ ip6_3 = vlib_buffer_get_current(b3);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip6_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip6_1;
+ copy_src1 = (u64 *) t1->rewrite;
+ copy_dst2 = (u64 *) ip6_2;
+ copy_src2 = (u64 *) t2->rewrite;
+ copy_dst3 = (u64 *) ip6_3;
+ copy_src3 = (u64 *) t3->rewrite;
+ /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ foreach_fixed_header6_offset;
+#undef _
+ /* Fix IP6 payload length */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof(*ip6_0));
+ ip6_0->payload_length = new_l0;
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof(*ip6_1));
+ ip6_1->payload_length = new_l1;
+ new_l2 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b2)
+ - sizeof(*ip6_2));
+ ip6_2->payload_length = new_l2;
+ new_l3 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b3)
+ - sizeof(*ip6_3));
+ ip6_3->payload_length = new_l3;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip6_0+1);
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+ udp1 = (udp_header_t *)(ip6_1+1);
+ udp1->length = new_l1;
+ udp1->src_port = flow_hash1;
+ udp2 = (udp_header_t *)(ip6_2+1);
+ udp2->length = new_l2;
+ udp2->src_port = flow_hash2;
+ udp3 = (udp_header_t *)(ip6_3+1);
+ udp3->length = new_l3;
+ udp3->src_port = flow_hash3;
+
+ /* IPv6 UDP checksum is mandatory */
+ udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+ ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
+ ip6_1, &bogus);
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ udp2->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b2,
+ ip6_2, &bogus);
+ if (udp2->checksum == 0)
+ udp2->checksum = 0xffff;
+ udp3->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b3,
+ ip6_3, &bogus);
+ if (udp3->checksum == 0)
+ udp3->checksum = 0xffff;
+
+ /* Fix GTPU length */
+ gtpu0 = (gtpu_header_t *)(udp0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0) - sizeof(*udp0));
+ gtpu0->length = new_l0;
+ gtpu1 = (gtpu_header_t *)(udp1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+ - sizeof (*ip4_1) - sizeof(*udp1));
+ gtpu1->length = new_l1;
+ gtpu2 = (gtpu_header_t *)(udp2+1);
+ new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
+ - sizeof (*ip4_2) - sizeof(*udp2));
+ gtpu2->length = new_l2;
+ gtpu3 = (gtpu_header_t *)(udp3+1);
+ new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
+ - sizeof (*ip4_3) - sizeof(*udp3));
+ gtpu3->length = new_l3;
+ }
+
+ pkts_encapsulated += 4;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+ len2 = vlib_buffer_length_in_chain (vm, b2);
+ len3 = vlib_buffer_length_in_chain (vm, b3);
+ stats_n_packets += 4;
+ stats_n_bytes += len0 + len1 + len2 + len3;
+
+ /* Batch stats increment on the same gtpu tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE ((sw_if_index0 != stats_sw_if_index) ||
+ (sw_if_index1 != stats_sw_if_index) ||
+ (sw_if_index2 != stats_sw_if_index) ||
+ (sw_if_index3 != stats_sw_if_index) ))
+ {
+ stats_n_packets -= 4;
+ stats_n_bytes -= len0 + len1 + len2 + len3;
+ if ( (sw_if_index0 == sw_if_index1 ) &&
+ (sw_if_index1 == sw_if_index2 ) &&
+ (sw_if_index2 == sw_if_index3 ) )
+ {
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_sw_if_index = sw_if_index0;
+ stats_n_packets = 4;
+ stats_n_bytes = len0 + len1 + len2 + len3;
+ }
+ else
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index0, 1, len0);
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index1, 1, len1);
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index2, 1, len2);
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index3, 1, len3);
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - gtm->tunnels;
+ tr->teid = t0->teid;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - gtm->tunnels;
+ tr->teid = t1->teid;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 flow_hash0;
+ u32 len0;
+ ip4_header_t * ip4_0;
+ ip6_header_t * ip6_0;
+ udp_header_t * udp0;
+ gtpu_header_t * gtpu0;
+ u64 * copy_src0, * copy_dst0;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u16 new_l0;
+ ip_csum_t sum0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ flow_hash0 = vnet_l2_compute_flow_hash(b0);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = &gtm->tunnels[hi0->dev_instance];
+ /* Note: change to always set next0 if it may be set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+
+ if (is_ip4)
+ {
+ ip4_0 = vlib_buffer_get_current(b0);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip4_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header4_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+ copy_src_last0 = (u32 *)(&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+
+ /* Fix the IP4 checksum and length */
+ sum0 = ip4_0->checksum;
+ new_l0 = /* old_l0 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ ip4_0->length = new_l0;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip4_0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0));
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+
+ /* Fix GTPU length */
+ gtpu0 = (gtpu_header_t *)(udp0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0) - sizeof(*udp0));
+ gtpu0->length = new_l0;
+ }
+
+ else /* ip6 path */
+ {
+ int bogus = 0;
+
+ ip6_0 = vlib_buffer_get_current(b0);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip6_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header6_offset;
+#undef _
+ /* Fix IP6 payload length */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof(*ip6_0));
+ ip6_0->payload_length = new_l0;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip6_0+1);
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+
+ /* IPv6 UDP checksum is mandatory */
+ udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+ ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+
+ /* Fix GTPU length */
+ gtpu0 = (gtpu_header_t *)(udp0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0) - sizeof(*udp0));
+ gtpu0->length = new_l0;
+ }
+
+ pkts_encapsulated ++;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - gtm->tunnels;
+ tr->teid = t0->teid;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Do we still need this now that tunnel tx stats is kept? */
+ vlib_node_increment_counter (vm, node->node_index,
+ GTPU_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+gtpu4_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gtpu_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+static uword
+gtpu6_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gtpu_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (gtpu4_encap_node) = {
+ .function = gtpu4_encap,
+ .name = "gtpu4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_gtpu_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(gtpu_encap_error_strings),
+ .error_strings = gtpu_encap_error_strings,
+ .n_next_nodes = GTPU_ENCAP_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GTPU_ENCAP_NEXT_##s] = n,
+ foreach_gtpu_encap_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gtpu4_encap_node, gtpu4_encap)
+
+VLIB_REGISTER_NODE (gtpu6_encap_node) = {
+ .function = gtpu6_encap,
+ .name = "gtpu6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_gtpu_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(gtpu_encap_error_strings),
+ .error_strings = gtpu_encap_error_strings,
+ .n_next_nodes = GTPU_ENCAP_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GTPU_ENCAP_NEXT_##s] = n,
+ foreach_gtpu_encap_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gtpu6_encap_node, gtpu6_encap)
+
diff --git a/src/plugins/gtpu/gtpu_error.def b/src/plugins/gtpu/gtpu_error.def
new file mode 100644
index 00000000..093a886f
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_error.def
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+gtpu_error (DECAPSULATED, "good packets decapsulated")
+gtpu_error (NO_SUCH_TUNNEL, "no such tunnel packets")
+gtpu_error (BAD_VER, "packets with bad version in gtpu header")
+gtpu_error (BAD_FLAGS, "packets with bad flags field in gtpu header")
diff --git a/src/plugins/gtpu/gtpu_msg_enum.h b/src/plugins/gtpu/gtpu_msg_enum.h
new file mode 100644
index 00000000..358a220a
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_msg_enum.h
@@ -0,0 +1,31 @@
+/*
+ * gtpu_msg_enum.h - vpp engine plug-in message enumeration
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_gtpu_msg_enum_h
+#define included_gtpu_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <gtpu/gtpu_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_gtpu_msg_enum_h */
diff --git a/src/plugins/gtpu/gtpu_test.c b/src/plugins/gtpu/gtpu_test.c
new file mode 100644
index 00000000..e7fd0d54
--- /dev/null
+++ b/src/plugins/gtpu/gtpu_test.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <gtpu/gtpu.h>
+
+#define __plugin_msg_base gtpu_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+
+uword unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
+ ip46_address_mask_ip4(ip46);
+ return 1;
+ } else if ((type != IP46_TYPE_IP4) &&
+ unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
+ return 1;
+ }
+ return 0;
+}
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+/////////////////////////
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <gtpu/gtpu.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <gtpu/gtpu.api.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <gtpu/gtpu.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <gtpu/gtpu.api.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <gtpu/gtpu.api.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} gtpu_test_main_t;
+
+gtpu_test_main_t gtpu_test_main;
+
+static void vl_api_gtpu_add_del_tunnel_reply_t_handler
+ (vl_api_gtpu_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+
+#define foreach_standard_reply_retval_handler \
+ _(sw_interface_set_gtpu_bypass_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = gtpu_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+ foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+ _(SW_INTERFACE_SET_GTPU_BYPASS_REPLY, sw_interface_set_gtpu_bypass_reply) \
+ _(GTPU_ADD_DEL_TUNNEL_REPLY, gtpu_add_del_tunnel_reply) \
+ _(GTPU_TUNNEL_DETAILS, gtpu_tunnel_details)
+
+
+static uword
+api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u8 *if_name;
+ uword *p;
+
+ if (!unformat (input, "%s", &if_name))
+ return 0;
+
+ p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
+ if (p == 0)
+ return 0;
+ *result = p[0];
+ return 1;
+}
+
+static int
+api_sw_interface_set_gtpu_bypass (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_gtpu_bypass_t *mp;
+ u32 sw_if_index = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_enable = 1;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ is_enable = 1;
+ else if (unformat (i, "disable"))
+ is_enable = 0;
+ else if (unformat (i, "ip4"))
+ is_ipv6 = 0;
+ else if (unformat (i, "ip6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_GTPU_BYPASS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = is_enable;
+ mp->is_ipv6 = is_ipv6;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static uword unformat_gtpu_decap_next
+ (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 tmp;
+
+ if (unformat (input, "l2"))
+ *result = GTPU_INPUT_NEXT_L2_INPUT;
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static int
+api_gtpu_add_del_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_add_del_tunnel_t *mp;
+ ip46_address_t src, dst;
+ u8 is_add = 1;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_vrf_id = 0;
+ u32 decap_next_index = ~0;
+ u32 teid = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&src, 0, sizeof src);
+ memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
+ {
+ ipv4_set = 1;
+ src_set = 1;
+ }
+ else
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ ipv4_set = 1;
+ dst_set = 1;
+ }
+ else
+ if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6))
+ {
+ ipv6_set = 1;
+ src_set = 1;
+ }
+ else
+ if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ ipv6_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &dst.ip4,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip4_address, &dst.ip4))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &dst.ip6,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip6_address, &dst.ip6))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else
+ if (unformat (line_input, "mcast_sw_if_index %u", &mcast_sw_if_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "decap-next %U",
+ unformat_gtpu_decap_next, &decap_next_index))
+ ;
+ else if (unformat (line_input, "teid %d", &teid))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ errmsg ("tunnel src address not specified");
+ return -99;
+ }
+ if (dst_set == 0)
+ {
+ errmsg ("tunnel dst address not specified");
+ return -99;
+ }
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel group address not multicast");
+ return -99;
+ }
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ errmsg ("tunnel nonexistent multicast device");
+ return -99;
+ }
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel dst address must be unicast");
+ return -99;
+ }
+
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ M (GTPU_ADD_DEL_TUNNEL, mp);
+
+ if (ipv6_set)
+ {
+ clib_memcpy (mp->src_address, &src.ip6, sizeof (src.ip6));
+ clib_memcpy (mp->dst_address, &dst.ip6, sizeof (dst.ip6));
+ }
+ else
+ {
+ clib_memcpy (mp->src_address, &src.ip4, sizeof (src.ip4));
+ clib_memcpy (mp->dst_address, &dst.ip4, sizeof (dst.ip4));
+ }
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+ mp->mcast_sw_if_index = ntohl (mcast_sw_if_index);
+ mp->teid = ntohl (teid);
+ mp->is_add = is_add;
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_gtpu_tunnel_details_t_handler
+ (vl_api_gtpu_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t src = to_ip46 (mp->is_ipv6, mp->dst_address);
+ ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->src_address);
+
+ print (vam->ofp, "%11d%24U%24U%14d%18d%13d%19d",
+ ntohl (mp->sw_if_index),
+ format_ip46_address, &src, IP46_TYPE_ANY,
+ format_ip46_address, &dst, IP46_TYPE_ANY,
+ ntohl (mp->encap_vrf_id),
+ ntohl (mp->decap_next_index), ntohl (mp->teid),
+ ntohl (mp->mcast_sw_if_index));
+}
+
+static int
+api_gtpu_tunnel_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gtpu_tunnel_dump_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%14s%18s%13s%19s",
+ "sw_if_index", "src_address", "dst_address",
+ "encap_vrf_id", "decap_next_index", "teid", "mcast_sw_if_index");
+ }
+
+ /* Get list of gtpu-tunnel interfaces */
+ M (GTPU_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(sw_interface_set_gtpu_bypass, \
+ "<intfc> | sw_if_index <id> [ip4 | ip6] [enable | disable]") \
+_(gtpu_add_del_tunnel, \
+ "src <ip-addr> { dst <ip-addr> | group <mcast-ip-addr>\n" \
+ "{ <intfc> | mcast_sw_if_index <nn> } }\n" \
+ "teid <teid> [encap-vrf-id <nn>] [decap-next <l2|nn>] [del]") \
+_(gtpu_tunnel_dump, "[<intfc> | sw_if_index <nn>]") \
+
+static void
+gtpu_vat_api_hookup (vat_main_t *vam)
+{
+ gtpu_test_main_t * gtm = &gtpu_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + gtm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ gtpu_test_main_t * gtm = &gtpu_test_main;
+
+ u8 * name;
+
+ gtm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "gtpu_%08x%c", api_version, 0);
+ gtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (gtm->msg_id_base != (u16) ~0)
+ gtpu_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/ila.am b/src/plugins/ila.am
new file mode 100644
index 00000000..d900f3eb
--- /dev/null
+++ b/src/plugins/ila.am
@@ -0,0 +1,20 @@
+# Copyright (c) 2016 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppplugins_LTLIBRARIES += ila_plugin.la
+
+ila_plugin_la_SOURCES = ila/ila.c
+
+noinst_HEADERS += ila/ila.h
+
+# vi:syntax=automake
diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c
new file mode 100644
index 00000000..fd56043e
--- /dev/null
+++ b/src/plugins/ila/ila.c
@@ -0,0 +1,1079 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ila/ila.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_table.h>
+#include <vpp/app/version.h>
+
+static ila_main_t ila_main;
+
+#define ILA_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define ILA_TABLE_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+#define foreach_ila_error \
+ _(NONE, "valid ILA packets")
+
+typedef enum {
+#define _(sym,str) ILA_ERROR_##sym,
+ foreach_ila_error
+#undef _
+ ILA_N_ERROR,
+} ila_error_t;
+
+static char *ila_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ila_error
+#undef _
+};
+
+typedef enum {
+ ILA_ILA2SIR_NEXT_DROP,
+ ILA_ILA2SIR_N_NEXT,
+} ila_ila2sir_next_t;
+
+typedef struct {
+ u32 ila_index;
+ ip6_address_t initial_dst;
+ u32 adj_index;
+} ila_ila2sir_trace_t;
+
+static ila_entry_t ila_sir2ila_default_entry = {
+ .csum_mode = ILA_CSUM_MODE_NO_ACTION,
+ .type = ILA_TYPE_IID,
+ .dir = ILA_DIR_ILA2SIR, //Will pass the packet with no
+};
+
+/**
+ * @brief Dynamically registered DPO Type for ILA
+ */
+static dpo_type_t ila_dpo_type;
+
+/**
+ * @brief Dynamically registered FIB node type for ILA
+ */
+static fib_node_type_t ila_fib_node_type;
+
+u8 *
+format_half_ip6_address (u8 * s, va_list * va)
+{
+ u64 v = clib_net_to_host_u64 (va_arg (*va, u64));
+
+ return format (s, "%04x:%04x:%04x:%04x",
+ v >> 48, (v >> 32) & 0xffff, (v >> 16) & 0xffff, v & 0xffff);
+
+}
+
+u8 *
+format_ila_direction (u8 * s, va_list * args)
+{
+ ila_direction_t t = va_arg (*args, ila_direction_t);
+#define _(i,n,st) \
+ if (t == ILA_DIR_##i) \
+ return format(s, st);
+ ila_foreach_direction
+#undef _
+ return format (s, "invalid_ila_direction");
+}
+
+static u8 *
+format_csum_mode (u8 * s, va_list * va)
+{
+ ila_csum_mode_t csum_mode = va_arg (*va, ila_csum_mode_t);
+ char *txt;
+
+ switch (csum_mode)
+ {
+#define _(i,n,st) \
+ case ILA_CSUM_MODE_##i: \
+ txt = st; \
+ break;
+ ila_csum_foreach_type
+#undef _
+ default:
+ txt = "invalid_ila_csum_mode";
+ break;
+ }
+ return format (s, txt);
+}
+
+u8 *
+format_ila_type (u8 * s, va_list * args)
+{
+ ila_type_t t = va_arg (*args, ila_type_t);
+#define _(i,n,st) \
+ if (t == ILA_TYPE_##i) \
+ return format(s, st);
+ ila_foreach_type
+#undef _
+ return format (s, "invalid_ila_type");
+}
+
+static u8 *
+format_ila_entry (u8 * s, va_list * va)
+{
+ vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
+ ila_entry_t *e = va_arg (*va, ila_entry_t *);
+
+ if (!e)
+ {
+ return format (s, "%-15s%=40s%=40s%+16s%+18s%+11s", "Type", "SIR Address",
+ "ILA Address", "Checksum Mode", "Direction", "Next DPO");
+ }
+ else if (vnm)
+ {
+ if (ip6_address_is_zero(&e->next_hop))
+ {
+ return format (s, "%-15U%=40U%=40U%18U%11U%s",
+ format_ila_type, e->type,
+ format_ip6_address, &e->sir_address,
+ format_ip6_address, &e->ila_address,
+ format_csum_mode, e->csum_mode,
+ format_ila_direction, e->dir,
+ "n/a");
+ }
+ else
+ {
+ return format (s, "%-15U%=40U%=40U%18U%11U%U",
+ format_ila_type, e->type,
+ format_ip6_address, &e->sir_address,
+ format_ip6_address, &e->ila_address,
+ format_csum_mode, e->csum_mode,
+ format_ila_direction, e->dir,
+ format_dpo_id, &e->ila_dpo, 0);
+ }
+ }
+
+ return NULL;
+}
+
+u8 *
+format_ila_ila2sir_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ila_ila2sir_trace_t *t = va_arg (*args, ila_ila2sir_trace_t *);
+ return format (s,
+ "ILA -> SIR adj index: %d entry index: %d initial_dst: %U",
+ t->adj_index, t->ila_index, format_ip6_address,
+ &t->initial_dst);
+}
+
+static uword
+unformat_ila_direction (unformat_input_t * input, va_list * args)
+{
+ ila_direction_t *result = va_arg (*args, ila_direction_t *);
+#define _(i,n,s) \
+ if (unformat(input, s)) \
+ { \
+ *result = ILA_DIR_##i; \
+ return 1;\
+ }
+
+ ila_foreach_direction
+#undef _
+ return 0;
+}
+
+static uword
+unformat_ila_type (unformat_input_t * input, va_list * args)
+{
+ ila_type_t *result = va_arg (*args, ila_type_t *);
+#define _(i,n,s) \
+ if (unformat(input, s)) \
+ { \
+ *result = ILA_TYPE_##i; \
+ return 1;\
+ }
+
+ ila_foreach_type
+#undef _
+ return 0;
+}
+
+static uword
+unformat_ila_csum_mode (unformat_input_t * input, va_list * args)
+{
+ ila_csum_mode_t *result = va_arg (*args, ila_csum_mode_t *);
+ if (unformat (input, "none") || unformat (input, "no-action"))
+ {
+ *result = ILA_CSUM_MODE_NO_ACTION;
+ return 1;
+ }
+ if (unformat (input, "neutral-map"))
+ {
+ *result = ILA_CSUM_MODE_NEUTRAL_MAP;
+ return 1;
+ }
+ if (unformat (input, "adjust-transport"))
+ {
+ *result = ILA_CSUM_MODE_ADJUST_TRANSPORT;
+ return 1;
+ }
+ return 0;
+}
+
+static uword
+unformat_half_ip6_address (unformat_input_t * input, va_list * args)
+{
+ u64 *result = va_arg (*args, u64 *);
+ u32 a[4];
+
+ if (!unformat (input, "%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] > 0xFFFF || a[1] > 0xFFFF || a[2] > 0xFFFF || a[3] > 0xFFFF)
+ return 0;
+
+ *result = clib_host_to_net_u64 ((((u64) a[0]) << 48) |
+ (((u64) a[1]) << 32) |
+ (((u64) a[2]) << 16) | (((u64) a[3])));
+
+ return 1;
+}
+
+static vlib_node_registration_t ila_ila2sir_node;
+
+static uword
+ila_ila2sir (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ ila_main_t *ilm = &ila_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ila_entry_t *ie0, *ie1;
+ ip6_header_t *ip60, *ip61;
+ ip6_address_t *sir_address0, *sir_address1;
+
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ sir_address0 = &ip60->dst_address;
+ sir_address1 = &ip61->dst_address;
+ ie0 = pool_elt_at_index (ilm->entries,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+ ie1 = pool_elt_at_index (ilm->entries,
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX]);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_ila2sir_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->ila_index = ie0 - ilm->entries;
+ tr->initial_dst = ip60->dst_address;
+ tr->adj_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ }
+
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_ila2sir_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->ila_index = ie1 - ilm->entries;
+ tr->initial_dst = ip61->dst_address;
+ tr->adj_index = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ }
+
+ sir_address0 = (ie0->dir != ILA_DIR_SIR2ILA) ? &ie0->sir_address : sir_address0;
+ sir_address1 = (ie1->dir != ILA_DIR_SIR2ILA) ? &ie1->sir_address : sir_address1;
+ ip60->dst_address.as_u64[0] = sir_address0->as_u64[0];
+ ip60->dst_address.as_u64[1] = sir_address0->as_u64[1];
+ ip61->dst_address.as_u64[0] = sir_address1->as_u64[0];
+ ip61->dst_address.as_u64[1] = sir_address1->as_u64[1];
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = ie0->ila_dpo.dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = ie1->ila_dpo.dpoi_index;
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1,
+ ie0->ila_dpo.dpoi_next_node,
+ ie1->ila_dpo.dpoi_next_node);
+ }
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ila_entry_t *ie0;
+ ip6_header_t *ip60;
+ ip6_address_t *sir_address0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ sir_address0 = &ip60->dst_address;
+ ie0 = pool_elt_at_index (ilm->entries,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_ila2sir_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->ila_index = ie0 ? (ie0 - ilm->entries) : ~0;
+ tr->initial_dst = ip60->dst_address;
+ tr->adj_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ }
+
+ sir_address0 = (ie0->dir != ILA_DIR_SIR2ILA) ? &ie0->sir_address : sir_address0;
+ ip60->dst_address.as_u64[0] = sir_address0->as_u64[0];
+ ip60->dst_address.as_u64[1] = sir_address0->as_u64[1];
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = ie0->ila_dpo.dpoi_index;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ ie0->ila_dpo.dpoi_next_node);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/** *INDENT-OFF* */
+VLIB_REGISTER_NODE (ila_ila2sir_node, static) =
+{
+ .function = ila_ila2sir,
+ .name = "ila-to-sir",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ila_ila2sir_trace,
+ .n_errors = ILA_N_ERROR,
+ .error_strings = ila_error_strings,
+ .n_next_nodes = ILA_ILA2SIR_N_NEXT,
+ .next_nodes =
+ {
+ [ILA_ILA2SIR_NEXT_DROP] = "error-drop"
+ },
+};
+/** *INDENT-ON* */
+
+typedef enum
+{
+ ILA_SIR2ILA_NEXT_DROP,
+ ILA_SIR2ILA_N_NEXT,
+} ila_sir2ila_next_t;
+
+typedef struct
+{
+ u32 ila_index;
+ ip6_address_t initial_dst;
+} ila_sir2ila_trace_t;
+
+u8 *
+format_ila_sir2ila_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ila_sir2ila_trace_t *t = va_arg (*args, ila_sir2ila_trace_t *);
+
+ return format (s, "SIR -> ILA entry index: %d initial_dst: %U",
+ t->ila_index, format_ip6_address, &t->initial_dst);
+}
+
+static vlib_node_registration_t ila_sir2ila_node;
+
+static uword
+ila_sir2ila (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ ila_main_t *ilm = &ila_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ u32 next0 = ILA_SIR2ILA_NEXT_DROP;
+ u32 next1 = ILA_SIR2ILA_NEXT_DROP;
+ BVT (clib_bihash_kv) kv0, value0;
+ BVT (clib_bihash_kv) kv1, value1;
+ ila_entry_t *ie0 = &ila_sir2ila_default_entry;
+ ila_entry_t *ie1 = &ila_sir2ila_default_entry;
+ ip6_address_t *ila_address0, *ila_address1;
+
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ ila_address0 = &ip60->dst_address;
+ ila_address1 = &ip61->dst_address;
+ kv0.key[0] = ip60->dst_address.as_u64[0];
+ kv0.key[1] = ip60->dst_address.as_u64[1];
+ kv0.key[2] = 0;
+ kv1.key[0] = ip61->dst_address.as_u64[0];
+ kv1.key[1] = ip61->dst_address.as_u64[1];
+ kv1.key[2] = 0;
+
+ if (PREDICT_TRUE((BV (clib_bihash_search)
+ (&ilm->id_to_entry_table, &kv0, &value0)) == 0)) {
+ ie0 = &ilm->entries[value0.value];
+ ila_address0 = (ie0->dir != ILA_DIR_ILA2SIR) ? &ie0->ila_address : ila_address0;
+ }
+
+ if ((BV (clib_bihash_search)
+ (&ilm->id_to_entry_table, &kv1, &value1)) == 0) {
+ ie1 = &ilm->entries[value1.value];
+ ila_address1 = (ie1->dir != ILA_DIR_ILA2SIR) ? &ie1->ila_address : ila_address1;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_sir2ila_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->ila_index =
+ (ie0 != &ila_sir2ila_default_entry) ? (ie0 - ilm->entries) : ~0;
+ tr->initial_dst = ip60->dst_address;
+ }
+
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_sir2ila_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->ila_index =
+ (ie1 != &ila_sir2ila_default_entry) ? (ie1 - ilm->entries) : ~0;
+ tr->initial_dst = ip61->dst_address;
+ }
+
+ ip60->dst_address.as_u64[0] = ila_address0->as_u64[0];
+ ip60->dst_address.as_u64[1] = ila_address0->as_u64[1];
+ ip61->dst_address.as_u64[0] = ila_address1->as_u64[0];
+ ip61->dst_address.as_u64[1] = ila_address1->as_u64[1];
+
+ vnet_feature_next (vnet_buffer (p0)->sw_if_index[VLIB_RX], &next0, p0);
+ vnet_feature_next (vnet_buffer (p1)->sw_if_index[VLIB_RX], &next1, p1);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u32 next0 = ILA_SIR2ILA_NEXT_DROP;
+ BVT (clib_bihash_kv) kv0, value0;
+ ila_entry_t *ie0 = &ila_sir2ila_default_entry;
+ ip6_address_t *ila_address0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ ila_address0 = &ip60->dst_address;
+
+ kv0.key[0] = ip60->dst_address.as_u64[0];
+ kv0.key[1] = ip60->dst_address.as_u64[1];
+ kv0.key[2] = 0;
+
+ if (PREDICT_TRUE((BV (clib_bihash_search)
+ (&ilm->id_to_entry_table, &kv0, &value0)) == 0)) {
+ ie0 = &ilm->entries[value0.value];
+ ila_address0 = (ie0->dir != ILA_DIR_ILA2SIR) ? &ie0->ila_address : ila_address0;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ila_sir2ila_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->ila_index =
+ (ie0 != &ila_sir2ila_default_entry) ? (ie0 - ilm->entries) : ~0;
+ tr->initial_dst = ip60->dst_address;
+ }
+
+ //This operation should do everything for any type (except vnid4 obviously)
+ ip60->dst_address.as_u64[0] = ila_address0->as_u64[0];
+ ip60->dst_address.as_u64[1] = ila_address0->as_u64[1];
+
+ vnet_feature_next (vnet_buffer (p0)->sw_if_index[VLIB_RX], &next0, p0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/** *INDENT-OFF* */
+VLIB_REGISTER_NODE (ila_sir2ila_node, static) =
+{
+ .function = ila_sir2ila,.name = "sir-to-ila",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ila_sir2ila_trace,
+ .n_errors = ILA_N_ERROR,
+ .error_strings = ila_error_strings,
+ .n_next_nodes = ILA_SIR2ILA_N_NEXT,
+ .next_nodes =
+ {
+ [ILA_SIR2ILA_NEXT_DROP] = "error-drop"
+ },
+};
+/** *INDENT-ON* */
+
+/** *INDENT-OFF* */
+VNET_FEATURE_INIT (ila_sir2ila, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "sir-to-ila",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+/** *INDENT-ON* */
+
+static void
+ila_entry_stack (ila_entry_t *ie)
+{
+ /*
+ * restack on the next-hop's FIB entry
+ */
+ dpo_stack(ila_dpo_type,
+ DPO_PROTO_IP6,
+ &ie->ila_dpo,
+ fib_entry_contribute_ip_forwarding(
+ ie->next_hop_fib_entry_index));
+}
+
+int
+ila_add_del_entry (ila_add_del_entry_args_t * args)
+{
+ ila_main_t *ilm = &ila_main;
+ BVT (clib_bihash_kv) kv, value;
+
+ //Sanity check
+ if (args->type == ILA_TYPE_IID || args->type == ILA_TYPE_LUID)
+ {
+ if ((args->sir_address.as_u8[8] >> 5) != args->type)
+ {
+ clib_warning ("Incorrect SIR address (ILA type mismatch %d %d)",
+ args->sir_address.as_u8[8] >> 1, args->type);
+ return -1;
+ }
+ if (args->sir_address.as_u8[8] & 0x10)
+ {
+ clib_warning ("Checksum bit should not be set in SIR address");
+ return -1;
+ }
+ }
+ else if (args->type == ILA_TYPE_VNIDM)
+ {
+ if (args->sir_address.as_u8[0] != 0xff ||
+ (args->sir_address.as_u8[1] & 0xf0) != 0xf0)
+ {
+ clib_warning ("SIR multicast address must start with fff");
+ return -1;
+ }
+ if (args->sir_address.as_u16[1] || args->sir_address.as_u16[2] ||
+ args->sir_address.as_u16[3] || args->sir_address.as_u16[4] ||
+ args->sir_address.as_u16[5] || (args->sir_address.as_u8[12] & 0xf0))
+ {
+ clib_warning ("SIR multicast address must start with fff");
+ return -1;
+ }
+ }
+
+ if (!args->is_del)
+ {
+ ila_entry_t *e;
+ pool_get (ilm->entries, e);
+ e->type = args->type;
+ e->sir_address = args->sir_address;
+ e->next_hop = args->next_hop_address;
+ e->csum_mode = args->csum_mode;
+ e->dir = args->dir;
+
+ //Construct ILA address
+ switch (e->type)
+ {
+ case ILA_TYPE_IID:
+ e->ila_address = e->sir_address;
+ break;
+ case ILA_TYPE_LUID:
+ e->ila_address.as_u64[0] = args->locator;
+ e->ila_address.as_u64[1] = args->sir_address.as_u64[1];
+ break;
+ case ILA_TYPE_VNID6:
+ e->ila_address.as_u64[0] = args->locator;
+ e->ila_address.as_u8[8] = (ILA_TYPE_VNID6 << 1);
+ e->ila_address.as_u32[2] |= args->vnid;
+ e->ila_address.as_u32[3] = args->sir_address.as_u32[3];
+ break;
+ case ILA_TYPE_VNIDM:
+ e->ila_address.as_u64[0] = args->locator;
+ e->ila_address.as_u8[8] = (ILA_TYPE_VNIDM << 1);
+ e->ila_address.as_u32[2] |= args->vnid;
+ e->ila_address.as_u32[3] = args->sir_address.as_u32[3];
+ e->ila_address.as_u8[12] |= args->sir_address.as_u8[2] << 4;
+ break;
+ case ILA_TYPE_VNID4:
+ clib_warning ("ILA type '%U' is not supported", format_ila_type,
+ e->type);
+ return -1;
+ }
+
+ //Modify ILA checksum if necessary
+ if (e->csum_mode == ILA_CSUM_MODE_NEUTRAL_MAP)
+ {
+ ip_csum_t csum = e->ila_address.as_u16[7];
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ csum = ip_csum_sub_even (csum, e->sir_address.as_u32[i]);
+ csum = ip_csum_add_even (csum, e->ila_address.as_u32[i]);
+ }
+ csum = ip_csum_add_even (csum, clib_host_to_net_u16 (0x1000));
+ e->ila_address.as_u16[7] = ip_csum_fold (csum);
+ e->ila_address.as_u8[8] |= 0x10;
+ }
+
+ //Create entry with the sir address
+ kv.key[0] = e->sir_address.as_u64[0];
+ kv.key[1] = e->sir_address.as_u64[1];
+ kv.key[2] = 0;
+ kv.value = e - ilm->entries;
+ BV (clib_bihash_add_del) (&ilm->id_to_entry_table, &kv,
+ 1 /* is_add */ );
+
+ if (!ip6_address_is_zero(&e->next_hop))
+ {
+ /*
+ * become a child of the FIB netry for the next-hop
+ * so we are informed when its forwarding changes
+ */
+ fib_prefix_t next_hop = {
+ .fp_addr = {
+ .ip6 = e->next_hop,
+ },
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ e->next_hop_fib_entry_index =
+ fib_table_entry_special_add(0,
+ &next_hop,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ e->next_hop_child_index =
+ fib_entry_child_add(e->next_hop_fib_entry_index,
+ ila_fib_node_type,
+ e - ilm->entries);
+
+ /*
+ * Create a route that results in the ILA entry
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ fib_prefix_t pfx = {
+ .fp_addr = {
+ .ip6 = e->ila_address,
+ },
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ dpo_set(&dpo, ila_dpo_type, DPO_PROTO_IP6, e - ilm->entries);
+
+ fib_table_entry_special_dpo_add(0,
+ &pfx,
+ FIB_SOURCE_PLUGIN_HI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
+
+ /*
+ * finally stack the ILA entry so it will forward to the next-hop
+ */
+ ila_entry_stack (e);
+ }
+ }
+ else
+ {
+ ila_entry_t *e;
+ kv.key[0] = args->sir_address.as_u64[0];
+ kv.key[1] = args->sir_address.as_u64[1];
+ kv.key[2] = 0;
+
+ if ((BV (clib_bihash_search) (&ilm->id_to_entry_table, &kv, &value) <
+ 0))
+ {
+ return -1;
+ }
+
+ e = &ilm->entries[value.value];
+
+ if (!ip6_address_is_zero(&e->next_hop))
+ {
+ fib_prefix_t pfx = {
+ .fp_addr = {
+ .ip6 = e->ila_address,
+ },
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
+ /*
+ * remove this ILA entry as child of the FIB netry for the next-hop
+ */
+ fib_entry_child_remove(e->next_hop_fib_entry_index,
+ e->next_hop_child_index);
+ fib_table_entry_delete_index(e->next_hop_fib_entry_index,
+ FIB_SOURCE_RR);
+ e->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
+ }
+ dpo_reset (&e->ila_dpo);
+
+ BV (clib_bihash_add_del) (&ilm->id_to_entry_table, &kv,
+ 0 /* is_add */ );
+ pool_put (ilm->entries, e);
+ }
+ return 0;
+}
+
+int
+ila_interface (u32 sw_if_index, u8 disable)
+{
+ vnet_feature_enable_disable ("ip4-unicast", "sir-to-ila", sw_if_index,
+ !disable, 0, 0);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Identifier-locator addressing for IPv6",
+};
+/* *INDENT-ON* */
+
+u8 *format_ila_dpo (u8 * s, va_list * va)
+{
+ index_t index = va_arg (*va, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
+ ila_main_t *ilm = &ila_main;
+ ila_entry_t *ie = pool_elt_at_index (ilm->entries, index);
+ return format(s, "ILA: idx:%d sir:%U",
+ index,
+ format_ip6_address, &ie->sir_address);
+}
+
+/**
+ * @brief no-op lock function.
+ * The lifetime of the ILA entry is managed by the control plane
+ */
+static void
+ila_dpo_lock (dpo_id_t *dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the ILA entry is managed by the control plane
+ */
+static void
+ila_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+const static dpo_vft_t ila_vft = {
+ .dv_lock = ila_dpo_lock,
+ .dv_unlock = ila_dpo_unlock,
+ .dv_format = format_ila_dpo,
+};
+const static char* const ila_ip6_nodes[] =
+{
+ "ila-to-sir",
+ NULL,
+};
+const static char* const * const ila_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP6] = ila_ip6_nodes,
+};
+
+static fib_node_t *
+ila_fib_node_get_node (fib_node_index_t index)
+{
+ ila_main_t *ilm = &ila_main;
+ ila_entry_t *ie = pool_elt_at_index (ilm->entries, index);
+
+ return (&ie->ila_fib_node);
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the ILA entry is managed by the control plane
+ */
+static void
+ila_fib_node_last_lock_gone (fib_node_t *node)
+{
+}
+
+static ila_entry_t *
+ila_entry_from_fib_node (fib_node_t *node)
+{
+ return ((ila_entry_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(ila_entry_t, ila_fib_node)));
+}
+
+/**
+ * @brief
+ * Callback function invoked when the forwarding changes for the ILA next-hop
+ */
+static fib_node_back_walk_rc_t
+ila_fib_node_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ ila_entry_stack(ila_entry_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * ILA's FIB graph node virtual function table
+ */
+static const fib_node_vft_t ila_fib_node_vft = {
+ .fnv_get = ila_fib_node_get_node,
+ .fnv_last_lock = ila_fib_node_last_lock_gone,
+ .fnv_back_walk = ila_fib_node_back_walk_notify,
+};
+
+clib_error_t *
+ila_init (vlib_main_t * vm)
+{
+ ila_main_t *ilm = &ila_main;
+ ilm->entries = NULL;
+
+ ilm->lookup_table_nbuckets = ILA_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+ ilm->lookup_table_nbuckets = 1 << max_log2 (ilm->lookup_table_nbuckets);
+ ilm->lookup_table_size = ILA_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&ilm->id_to_entry_table,
+ "ila id to entry index table",
+ ilm->lookup_table_nbuckets, ilm->lookup_table_size);
+
+ ila_dpo_type = dpo_register_new_type(&ila_vft, ila_nodes);
+ ila_fib_node_type = fib_node_register_new_type(&ila_fib_node_vft);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (ila_init);
+
+static clib_error_t *
+ila_entry_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ila_add_del_entry_args_t args = { 0 };
+ u8 next_hop_set = 0;
+ int ret;
+ clib_error_t *error = 0;
+
+ args.type = ILA_TYPE_IID;
+ args.csum_mode = ILA_CSUM_MODE_NO_ACTION;
+ args.local_adj_index = ~0;
+ args.dir = ILA_DIR_BIDIR;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "type %U", unformat_ila_type, &args.type))
+ ;
+ else if (unformat
+ (line_input, "sir-address %U", unformat_ip6_address,
+ &args.sir_address))
+ ;
+ else if (unformat
+ (line_input, "locator %U", unformat_half_ip6_address,
+ &args.locator))
+ ;
+ else if (unformat
+ (line_input, "csum-mode %U", unformat_ila_csum_mode,
+ &args.csum_mode))
+ ;
+ else if (unformat (line_input, "vnid %x", &args.vnid))
+ ;
+ else if (unformat
+ (line_input, "next-hop %U", unformat_ip6_address,
+ &args.next_hop_address))
+ ;
+ else if (unformat
+ (line_input, "direction %U", unformat_ila_direction, &args.dir))
+ next_hop_set = 1;
+ else if (unformat (line_input, "del"))
+ args.is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!next_hop_set)
+ {
+ error = clib_error_return (0, "Specified a next hop");
+ goto done;
+ }
+
+ if ((ret = ila_add_del_entry (&args)))
+ {
+ error = clib_error_return (0, "ila_add_del_entry returned error %d", ret);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ila_entry_command, static) =
+{
+ .path = "ila entry",
+ .short_help = "ila entry [type <type>] [sir-address <address>] [locator <locator>] [vnid <hex-vnid>]"
+ " [adj-index <adj-index>] [next-hop <next-hop>] [direction (bidir|sir2ila|ila2sir)]"
+ " [csum-mode (no-action|neutral-map|transport-adjust)] [del]",
+ .function = ila_entry_command_fn,
+};
+
+static clib_error_t *
+ila_interface_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u8 disable = 0;
+
+ if (!unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ return clib_error_return (0, "Invalid interface name");
+ }
+
+ if (unformat (input, "disable"))
+ {
+ disable = 1;
+ }
+
+ int ret;
+ if ((ret = ila_interface (sw_if_index, disable)))
+ return clib_error_return (0, "ila_interface returned error %d", ret);
+
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (ila_interface_command, static) =
+{
+ .path = "ila interface",
+ .short_help = "ila interface <interface-name> [disable]",
+ .function = ila_interface_command_fn,
+};
+
+static clib_error_t *
+ila_show_entries_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ila_main_t *ilm = &ila_main;
+ ila_entry_t *e;
+
+ vlib_cli_output (vm, " %U\n", format_ila_entry, vnm, NULL);
+ pool_foreach (e, ilm->entries,
+ ({
+ vlib_cli_output (vm, " %U\n", format_ila_entry, vnm, e);
+ }));
+
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (ila_show_entries_command, static) =
+{
+ .path = "show ila entries",
+ .short_help = "show ila entries",
+ .function = ila_show_entries_command_fn,
+};
diff --git a/src/plugins/ila/ila.h b/src/plugins/ila/ila.h
new file mode 100644
index 00000000..26620983
--- /dev/null
+++ b/src/plugins/ila/ila.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ILA_H
+#define ILA_H
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_node.h>
+
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+#define ila_foreach_type \
+ _(IID, 0, "iid") \
+ _(LUID, 1, "luid") \
+ _(VNID4, 2, "vnid-ip4") \
+ _(VNID6, 3, "vnid-ip6") \
+ _(VNIDM, 4, "vnid-multicast")
+
+typedef enum {
+#define _(i,n,s) ILA_TYPE_##i = n,
+ ila_foreach_type
+#undef _
+} ila_type_t;
+
+#define ila_csum_foreach_type \
+_(NO_ACTION, 0, "no-action") \
+_(NEUTRAL_MAP, 1, "neutral-map") \
+_(ADJUST_TRANSPORT, 2, "adjust-transport")
+
+typedef enum {
+#define _(i,n,s) ILA_CSUM_MODE_##i = n,
+ ila_csum_foreach_type
+#undef _
+ ILA_CSUM_N_TYPES
+} ila_csum_mode_t;
+
+#define ila_foreach_direction \
+_(BIDIR, 0, "bidir") \
+_(SIR2ILA, 1, "sir2ila") \
+_(ILA2SIR, 2, "ila2sir")
+
+typedef enum {
+#define _(i,n,s) ILA_DIR_##i = n,
+ ila_foreach_direction
+#undef _
+} ila_direction_t;
+
+typedef struct {
+ /**
+ * Fib Node base class
+ */
+ fib_node_t ila_fib_node;
+ ila_type_t type;
+ ip6_address_t sir_address;
+ ip6_address_t ila_address;
+ ip6_address_t next_hop;
+ ila_csum_mode_t csum_mode;
+ ila_direction_t dir;
+
+ /**
+ * The FIB entry index for the next-hop
+ */
+ fib_node_index_t next_hop_fib_entry_index;
+
+ /**
+ * The child index on the FIB entry
+ */
+ u32 next_hop_child_index;
+
+ /**
+ * The next DPO in the grpah to follow
+ */
+ dpo_id_t ila_dpo;
+} ila_entry_t;
+
+typedef struct {
+ ila_entry_t *entries; //Pool of ILA entries
+
+ u64 lookup_table_nbuckets;
+ u64 lookup_table_size;
+ clib_bihash_24_8_t id_to_entry_table;
+
+ u32 ip6_lookup_next_index;
+} ila_main_t;
+
+
+typedef struct {
+ ila_type_t type;
+ ip6_address_t sir_address;
+ ip6_address_t next_hop_address;
+ u64 locator;
+ u32 vnid;
+ u32 local_adj_index;
+ ila_csum_mode_t csum_mode;
+ ila_direction_t dir;
+ u8 is_del;
+} ila_add_del_entry_args_t;
+
+int ila_add_del_entry (ila_add_del_entry_args_t * args);
+int ila_interface (u32 sw_if_index, u8 disable);
+
+#endif //ILA_H
diff --git a/src/plugins/ioam.am b/src/plugins/ioam.am
new file mode 100644
index 00000000..4ac69aac
--- /dev/null
+++ b/src/plugins/ioam.am
@@ -0,0 +1,247 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+########################################
+# iOAM Proof of Transit
+########################################
+
+IOAM_POT_SRC = \
+ ioam/lib-pot/pot_util.c \
+ ioam/encap/ip6_ioam_pot.c \
+ ioam/lib-pot/pot_util.h \
+ ioam/lib-pot/math64.h \
+ ioam/lib-pot/pot_api.c
+
+IOAM_POT_NOINST_HDR = \
+ ioam/lib-pot/pot_all_api_h.h \
+ ioam/lib-pot/pot_msg_enum.h \
+ ioam/lib-pot/pot.api.h \
+ ioam/lib-pot/pot_util.h \
+ ioam/lib-pot/math64.h
+
+IOAM_POT_API = ioam/lib-pot/pot.api
+
+ioam_pot_test_plugin_la_SOURCES = \
+ ioam/lib-pot/pot_test.c \
+ ioam/lib-pot/pot_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_pot_test_plugin.la
+
+########################################
+# iOAM trace export for IPv6
+########################################
+
+IOAM_EXPORT_SRC = \
+ioam/export/ioam_export.c \
+ioam/export/node.c \
+ioam/export/ioam_export.api.h \
+ioam/export/ioam_export_thread.c
+
+IOAM_EXPORT_NOINST_HDR = \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/export/ioam_export_msg_enum.h \
+ ioam/export/ioam_export.api.h
+
+IOAM_EXPORT_API = ioam/export/ioam_export.api
+
+ioam_export_test_plugin_la_SOURCES = \
+ ioam/export/ioam_export_test.c \
+ ioam/export/ioam_export_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_export_test_plugin.la
+
+########################################
+# iOAM Trace
+########################################
+IOAM_TRACE_SRC = \
+ ioam/lib-trace/trace_util.c \
+ ioam/encap/ip6_ioam_trace.c \
+ ioam/lib-trace/trace_api.c
+
+IOAM_TRACE_NOINST_HDR = \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/lib-trace/trace_all_api_h.h \
+ ioam/lib-trace/trace_msg_enum.h \
+ ioam/lib-trace/trace.api.h \
+ ioam/lib-trace/trace_util.h \
+ ioam/encap/ip6_ioam_trace.h \
+ ioam/lib-trace/trace_config.h
+
+IOAM_TRACE_API = ioam/lib-trace/trace.api
+
+ioam_trace_test_plugin_la_SOURCES = \
+ ioam/lib-trace/trace_test.c \
+ ioam/lib-trace/trace_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_trace_test_plugin.la
+
+nobase_include_HEADERS += \
+ ioam/lib-trace/trace_util.h \
+ ioam/export-common/ioam_export.h
+
+########################################
+# VxLAN-GPE
+########################################
+IOAM_VXLAN_GPE_SRC = \
+ ioam/lib-vxlan-gpe/ioam_encap.c \
+ ioam/lib-vxlan-gpe/ioam_decap.c \
+ ioam/lib-vxlan-gpe/ioam_transit.c \
+ ioam/lib-vxlan-gpe/ioam_pop.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_api.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_node.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h\
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
+
+IOAM_VXLAN_GPE_NOINST_HDR = \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h \
+ ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h
+
+IOAM_VXLAN_GPE_API = ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
+IOAM_VXLAN_GPE_API += ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
+
+ioam_vxlan_gpe_test_plugin_la_SOURCES = \
+ ioam/lib-vxlan-gpe/vxlan_gpe_test.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_vxlan_gpe_test_plugin.la
+
+vxlan_gpe_ioam_export_test_plugin_la_SOURCES = \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += vxlan_gpe_ioam_export_test_plugin.la
+
+########################################
+# iOAM E2E
+########################################
+
+IOAM_E2E_SRC = \
+ ioam/encap/ip6_ioam_e2e.c \
+ ioam/encap/ip6_ioam_seqno.c \
+ ioam/lib-e2e/ioam_seqno_lib.c
+
+IOAM_E2E_NOINST_HDR = \
+ ioam/encap/ip6_ioam_e2e.h \
+ ioam/encap/ip6_ioam_seqno.h \
+ ioam/lib-e2e/ioam_seqno_lib.h
+
+########################################
+# ipfix collector
+########################################
+
+IPFIX_COLLECTOR_SRC = \
+ ioam/ipfixcollector/ipfixcollector.c \
+ ioam/ipfixcollector/node.c \
+ ioam/ipfixcollector/ipfixcollector.h
+
+########################################
+# iOAM Analyse
+########################################
+
+IOAM_ANALYSE_SRC = \
+ ioam/analyse/ip6/ip6_ioam_analyse.c \
+ ioam/analyse/ip6/node.c \
+ ioam/analyse/ip6/ip6_ioam_analyse.h \
+ ioam/analyse/ioam_summary_export.c \
+ ioam/analyse/ioam_analyse.h \
+ ioam/analyse/ioam_summary_export.h
+
+########################################
+# iOAM record cache and rewrite
+########################################
+
+IOAM_IP6_MANYCAST_SRC = \
+ioam/ip6/ioam_cache.c \
+ioam/ip6/ioam_cache_node.c \
+ioam/ip6/ioam_cache_tunnel_select_node.c \
+ioam/ip6/ioam_cache.api.h
+
+IOAM_IP6_MANYCAST_API = ioam/ip6/ioam_cache.api
+
+IOAM_IP6_MANYCAST_NOINST_HDR = \
+ ioam/ip6/ioam_cache_all_api_h.h \
+ ioam/ip6/ioam_cache_msg_enum.h \
+ ioam/ip6/ioam_cache.api.h
+
+# udp ping
+########################################
+
+UDP_PING_SRC = \
+ ioam/udp-ping/udp_ping_node.c \
+ ioam/udp-ping/udp_ping_util.c \
+ ioam/udp-ping/udp_ping_export.c \
+ ioam/udp-ping/udp_ping_api.c
+
+UDP_PING_NOINST_HDR = \
+ ioam/udp-ping/udp_ping_packet.h \
+ ioam/udp-ping/udp_ping.h \
+ ioam/udp-ping/udp_ping_util.h \
+ ioam/udp-ping/udp_ping_all_api_h.h \
+ ioam/udp-ping/udp_ping_msg_enum.h \
+ ioam/udp-ping/udp_ping.api.h
+
+UDP_PING_API = ioam/udp-ping/udp_ping.api
+
+udp_ping_test_plugin_la_SOURCES = \
+ ioam/udp-ping/udp_ping_test.c \
+ ioam/udp-ping/udp_ping_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += udp_ping_test_plugin.la
+
+########################################
+# iOAM plugins
+########################################
+
+ioam_plugin_la_SOURCES = \
+ $(IOAM_POT_SRC) \
+ $(IOAM_EXPORT_SRC) \
+ $(IOAM_TRACE_SRC) \
+ $(IOAM_VXLAN_GPE_SRC) \
+ $(IOAM_E2E_SRC) \
+ $(IPFIX_COLLECTOR_SRC) \
+ $(IOAM_ANALYSE_SRC) \
+ $(IOAM_IP6_MANYCAST_SRC) \
+ $(UDP_PING_SRC)
+
+API_FILES += \
+ $(IOAM_POT_API) \
+ $(IOAM_EXPORT_API) \
+ $(IOAM_TRACE_API) \
+ $(IOAM_VXLAN_GPE_API) \
+ $(IOAM_IP6_MANYCAST_API) \
+ $(UDP_PING_API)
+
+noinst_HEADERS += \
+ $(IOAM_POT_NOINST_HDR) \
+ $(IOAM_EXPORT_NOINST_HDR) \
+ $(IOAM_TRACE_NOINST_HDR) \
+ $(IOAM_VXLAN_GPE_NOINST_HDR) \
+ $(IOAM_E2E_NOINST_HDR) \
+ $(IOAM_IP6_MANYCAST_NOINST_HDR) \
+ $(UDP_PING_NOINST_HDR)
+
+vppplugins_LTLIBRARIES += ioam_plugin.la
+
+# vi:syntax=automake
diff --git a/src/plugins/ioam/analyse/ioam_analyse.h b/src/plugins/ioam/analyse/ioam_analyse.h
new file mode 100644
index 00000000..ef2865da
--- /dev/null
+++ b/src/plugins/ioam/analyse/ioam_analyse.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IOAM_ANALYSE_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IOAM_ANALYSE_H_
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/types.h>
+#include <ioam/lib-e2e/e2e_util.h>
+#include <ioam/lib-trace/trace_util.h>
+#include <ioam/lib-trace/trace_config.h>
+
+#define IOAM_FLOW_TEMPLATE_ID 260
+#define IOAM_TRACE_MAX_NODES 10
+#define IOAM_MAX_PATHS_PER_FLOW 10
+
+typedef struct
+{
+ u16 ingress_if;
+ u16 egress_if;
+ u32 node_id;
+ u32 state_up;
+} ioam_path_map_t;
+
+/** @brief Analysed iOAM trace data.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** No of nodes in path. */
+ u8 num_nodes;
+
+ /** Data contained in trace - NodeId, TTL, Ingress & Egress Link, Timestamp. */
+ u8 trace_type;
+
+ /** Flag to indicate whether node is allocated. */
+ u8 is_free;
+
+ u8 pad[5];
+
+ /** Actual PATH flow has taken. */
+ ioam_path_map_t path[IOAM_TRACE_MAX_NODES];
+
+ /** Num of pkts in the flow going over path. */
+ u32 pkt_counter;
+
+ /** Num of bytes in the flow going over path. */
+ u32 bytes_counter;
+
+ /** Minumum Dealay for the flow. */
+ u32 min_delay;
+
+ /** Maximum Dealay for the flow. */
+ u32 max_delay;
+
+ /** Average Dealay for the flow. */
+ u32 mean_delay;
+
+ u32 reserve;
+} ioam_analyse_trace_record;
+
+typedef struct
+{
+ ioam_analyse_trace_record path_data[IOAM_MAX_PATHS_PER_FLOW];
+} ioam_analyse_trace_data;
+
+/** @brief Analysed iOAM pot data.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** Number of packets validated (passes through the service chain)
+ within the timestamps. */
+ u32 sfc_validated_count;
+
+ /** Number of packets invalidated (failed through the service chain)
+ within the timestamps. */
+ u32 sfc_invalidated_count;
+} ioam_analyse_pot_data;
+
+/** @brief Analysed iOAM data.
+ @note cache aligned.
+*/
+typedef struct ioam_analyser_data_t_
+{
+ u8 is_free;
+ u8 pad[3];
+
+ /** Num of pkts sent for this flow. */
+ u32 pkt_sent;
+
+ /** Num of pkts matching this flow. */
+ u32 pkt_counter;
+
+ /** Num of bytes matching this flow. */
+ u32 bytes_counter;
+
+ /** Analysed iOAM trace data. */
+ ioam_analyse_trace_data trace_data;
+
+ /** Analysed iOAM pot data. */
+ ioam_analyse_pot_data pot_data;
+
+ /** Analysed iOAM seqno data. */
+ seqno_rx_info seqno_data;
+
+ /** Cache of previously analysed data, useful for export. */
+ struct ioam_analyser_data_t_ *chached_data_list;
+
+ /** Lock to since we use this to export the data in other thread. */
+ volatile u32 *writer_lock;
+} ioam_analyser_data_t;
+
+always_inline f64
+ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len,
+ u8 oneway)
+{
+ u16 size_of_all_traceopts;
+ u8 size_of_traceopt_per_node;
+ u8 num_nodes;
+ u32 *start_elt, *end_elt, *uturn_elt;;
+ u32 start_time, end_time;
+ u8 done = 0;
+
+ size_of_traceopt_per_node = fetch_trace_data_size (trace->ioam_trace_type);
+ // Unknown trace type
+ if (size_of_traceopt_per_node == 0)
+ return 0;
+ size_of_all_traceopts = trace_len; /*ioam_trace_type,data_list_elts_left */
+
+ num_nodes = (u8) (size_of_all_traceopts / size_of_traceopt_per_node);
+ if ((num_nodes == 0) || (num_nodes <= trace->data_list_elts_left))
+ return 0;
+
+ num_nodes -= trace->data_list_elts_left;
+
+ start_elt = trace->elts;
+ end_elt =
+ trace->elts +
+ (u32) ((size_of_traceopt_per_node / sizeof (u32)) * (num_nodes - 1));
+
+ if (oneway && (trace->ioam_trace_type & BIT_TTL_NODEID))
+ {
+ done = 0;
+ do
+ {
+ uturn_elt = start_elt - size_of_traceopt_per_node / sizeof (u32);
+
+ if ((clib_net_to_host_u32 (*start_elt) >> 24) <=
+ (clib_net_to_host_u32 (*uturn_elt) >> 24))
+ done = 1;
+ }
+ while (!done && (start_elt = uturn_elt) != end_elt);
+ }
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ start_elt++;
+ end_elt++;
+ }
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ start_elt++;
+ end_elt++;
+ }
+ start_time = clib_net_to_host_u32 (*start_elt);
+ end_time = clib_net_to_host_u32 (*end_elt);
+
+ return (f64) (end_time - start_time);
+}
+
+always_inline void
+ip6_ioam_analyse_set_paths_down (ioam_analyser_data_t * data)
+{
+ ioam_analyse_trace_data *trace_data;
+ ioam_analyse_trace_record *trace_record;
+ ioam_path_map_t *path;
+ u8 k, i;
+
+ while (__sync_lock_test_and_set (data->writer_lock, 1))
+ ;
+
+ trace_data = &data->trace_data;
+
+ for (i = 0; i < IOAM_MAX_PATHS_PER_FLOW; i++)
+ {
+ trace_record = trace_data->path_data + i;
+
+ if (trace_record->is_free)
+ continue;
+
+ path = trace_record->path;
+
+ for (k = 0; k < trace_record->num_nodes; k++)
+ path[k].state_up = 0;
+ }
+ *(data->writer_lock) = 0;
+}
+
+always_inline void
+ip6_ioam_analyse_hbh_trace_loopback (ioam_analyser_data_t * data,
+ ioam_trace_hdr_t * trace, u16 trace_len)
+{
+ ioam_analyse_trace_data *trace_data;
+ ioam_analyse_trace_record *trace_record;
+ ioam_path_map_t *path;
+ u8 i, j, k, num_nodes, max_nodes;
+ u8 *ptr;
+ u32 nodeid;
+ u16 ingress_if, egress_if;
+ u16 size_of_traceopt_per_node;
+ u16 size_of_all_traceopts;
+
+ while (__sync_lock_test_and_set (data->writer_lock, 1))
+ ;
+
+ trace_data = &data->trace_data;
+
+ size_of_traceopt_per_node = fetch_trace_data_size (trace->ioam_trace_type);
+ if (0 == size_of_traceopt_per_node)
+ goto end;
+
+ size_of_all_traceopts = trace_len;
+
+ ptr = (u8 *) trace->elts;
+ max_nodes = (u8) (size_of_all_traceopts / size_of_traceopt_per_node);
+ num_nodes = max_nodes - trace->data_list_elts_left;
+
+ for (i = 0; i < IOAM_MAX_PATHS_PER_FLOW; i++)
+ {
+ trace_record = trace_data->path_data + i;
+ path = trace_record->path;
+
+ if (trace_record->is_free)
+ continue;
+
+ for (j = max_nodes, k = 0; k < num_nodes; j--, k++)
+ {
+ ptr =
+ (u8 *) ((u8 *) trace->elts +
+ (size_of_traceopt_per_node * (j - 1)));
+
+ nodeid = clib_net_to_host_u32 (*((u32 *) ptr)) & 0x00ffffff;
+ ptr += 4;
+
+ if (nodeid != path[k].node_id)
+ goto end;
+
+ if ((trace->ioam_trace_type == TRACE_TYPE_IF_TS_APP) ||
+ (trace->ioam_trace_type == TRACE_TYPE_IF))
+ {
+ ingress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ ptr += 2;
+ egress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ if ((ingress_if != path[k].ingress_if) ||
+ (egress_if != path[k].egress_if))
+ {
+ goto end;
+ }
+ }
+ /* Found Match - set path hop state to up */
+ path[k].state_up = 1;
+ }
+ }
+end:
+ *(data->writer_lock) = 0;
+}
+
+always_inline int
+ip6_ioam_analyse_hbh_trace (ioam_analyser_data_t * data,
+ ioam_trace_hdr_t * trace, u16 pak_len,
+ u16 trace_len)
+{
+ ioam_analyse_trace_data *trace_data;
+ u16 size_of_traceopt_per_node;
+ u16 size_of_all_traceopts;
+ u8 i, j, k, num_nodes, max_nodes;
+ u8 *ptr;
+ u32 nodeid;
+ u16 ingress_if, egress_if;
+ ioam_path_map_t *path = NULL;
+ ioam_analyse_trace_record *trace_record;
+
+ while (__sync_lock_test_and_set (data->writer_lock, 1))
+ ;
+
+ trace_data = &data->trace_data;
+
+ size_of_traceopt_per_node = fetch_trace_data_size (trace->ioam_trace_type);
+ // Unknown trace type
+ if (size_of_traceopt_per_node == 0)
+ goto DONE;
+ size_of_all_traceopts = trace_len;
+
+ ptr = (u8 *) trace->elts;
+ max_nodes = (u8) (size_of_all_traceopts / size_of_traceopt_per_node);
+ num_nodes = max_nodes - trace->data_list_elts_left;
+
+ for (i = 0; i < IOAM_MAX_PATHS_PER_FLOW; i++)
+ {
+ trace_record = trace_data->path_data + i;
+
+ if (trace_record->is_free ||
+ (num_nodes != trace_record->num_nodes) ||
+ (trace->ioam_trace_type != trace_record->trace_type))
+ continue;
+
+ path = trace_record->path;
+
+ for (j = max_nodes, k = 0; k < num_nodes; j--, k++)
+ {
+ ptr =
+ (u8 *) ((u8 *) trace->elts +
+ (size_of_traceopt_per_node * (j - 1)));
+
+ nodeid = clib_net_to_host_u32 (*((u32 *) ptr)) & 0x00ffffff;
+ ptr += 4;
+
+ if (nodeid != path[k].node_id)
+ break;
+
+ if ((trace->ioam_trace_type == TRACE_TYPE_IF_TS_APP) ||
+ (trace->ioam_trace_type == TRACE_TYPE_IF))
+ {
+ ingress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ ptr += 2;
+ egress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ if ((ingress_if != path[k].ingress_if) ||
+ (egress_if != path[k].egress_if))
+ {
+ break;
+ }
+ }
+ }
+
+ if (k == num_nodes)
+ {
+ goto found_match;
+ }
+ }
+
+ for (i = 0; i < IOAM_MAX_PATHS_PER_FLOW; i++)
+ {
+ trace_record = trace_data->path_data + i;
+ if (trace_record->is_free)
+ {
+ trace_record->is_free = 0;
+ trace_record->num_nodes = num_nodes;
+ trace_record->trace_type = trace->ioam_trace_type;
+ path = trace_data->path_data[i].path;
+ trace_record->pkt_counter = 0;
+ trace_record->bytes_counter = 0;
+ trace_record->min_delay = 0xFFFFFFFF;
+ trace_record->max_delay = 0;
+ trace_record->mean_delay = 0;
+ break;
+ }
+ }
+
+ for (j = max_nodes, k = 0; k < num_nodes; j--, k++)
+ {
+ ptr =
+ (u8 *) ((u8 *) trace->elts + (size_of_traceopt_per_node * (j - 1)));
+
+ path[k].node_id = clib_net_to_host_u32 (*((u32 *) ptr)) & 0x00ffffff;
+ ptr += 4;
+
+ if ((trace->ioam_trace_type == TRACE_TYPE_IF_TS_APP) ||
+ (trace->ioam_trace_type == TRACE_TYPE_IF))
+ {
+ path[k].ingress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ ptr += 2;
+ path[k].egress_if = clib_net_to_host_u16 (*((u16 *) ptr));
+ }
+ }
+
+found_match:
+ /* Set path state to UP */
+ for (k = 0; k < num_nodes; k++)
+ path[k].state_up = 1;
+
+ trace_record->pkt_counter++;
+ trace_record->bytes_counter += pak_len;
+ if (trace->ioam_trace_type & BIT_TIMESTAMP)
+ {
+ /* Calculate time delay */
+ u32 delay = (u32) ip6_ioam_analyse_calc_delay (trace, trace_len, 0);
+ if (delay < trace_record->min_delay)
+ trace_record->min_delay = delay;
+ else if (delay > trace_record->max_delay)
+ trace_record->max_delay = delay;
+
+ u64 sum = (trace_record->mean_delay * data->seqno_data.rx_packets);
+ trace_record->mean_delay =
+ (u32) ((sum + delay) / (data->seqno_data.rx_packets + 1));
+ }
+DONE:
+ *(data->writer_lock) = 0;
+ return 0;
+}
+
+always_inline int
+ip6_ioam_analyse_hbh_e2e (ioam_analyser_data_t * data,
+ ioam_e2e_packet_t * e2e, u16 len)
+{
+ while (__sync_lock_test_and_set (data->writer_lock, 1))
+ ;
+
+ ioam_analyze_seqno (&data->seqno_data,
+ (u64) clib_net_to_host_u32 (e2e->e2e_data));
+
+ *(data->writer_lock) = 0;
+ return 0;
+}
+
+always_inline u8 *
+format_path_map (u8 * s, va_list * args)
+{
+ ioam_path_map_t *pm = va_arg (*args, ioam_path_map_t *);
+ u32 num_of_elts = va_arg (*args, u32);
+ u32 i;
+
+ for (i = 0; i < num_of_elts; i++)
+ {
+ s =
+ format (s,
+ "node_id: 0x%x, ingress_if: 0x%x, egress_if:0x%x, state:%s\n",
+ pm->node_id, pm->ingress_if, pm->egress_if,
+ pm->state_up ? "UP" : "DOWN");
+ pm++;
+ }
+
+ return (s);
+}
+
+always_inline u8 *
+print_analyse_flow (u8 * s, ioam_analyser_data_t * record)
+{
+ int j;
+ ioam_analyse_trace_record *trace_record;
+
+ s = format (s, "pkt_sent : %u\n", record->pkt_sent);
+ s = format (s, "pkt_counter : %u\n", record->pkt_counter);
+ s = format (s, "bytes_counter : %u\n", record->bytes_counter);
+
+ s = format (s, "Trace data: \n");
+
+ for (j = 0; j < IOAM_MAX_PATHS_PER_FLOW; j++)
+ {
+ trace_record = record->trace_data.path_data + j;
+ if (trace_record->is_free)
+ continue;
+
+ s = format (s, "path_map:\n%U", format_path_map,
+ trace_record->path, trace_record->num_nodes);
+ s = format (s, "pkt_counter: %u\n", trace_record->pkt_counter);
+ s = format (s, "bytes_counter: %u\n", trace_record->bytes_counter);
+
+ s = format (s, "min_delay: %u\n", trace_record->min_delay);
+ s = format (s, "max_delay: %u\n", trace_record->max_delay);
+ s = format (s, "mean_delay: %u\n", trace_record->mean_delay);
+ }
+
+ s = format (s, "\nPOT data: \n");
+ s = format (s, "sfc_validated_count : %u\n",
+ record->pot_data.sfc_validated_count);
+ s = format (s, "sfc_invalidated_count : %u\n",
+ record->pot_data.sfc_invalidated_count);
+
+ s = format (s, "\nSeqno Data:\n");
+ s = format (s,
+ "RX Packets : %lu\n"
+ "Lost Packets : %lu\n"
+ "Duplicate Packets : %lu\n"
+ "Reordered Packets : %lu\n",
+ record->seqno_data.rx_packets,
+ record->seqno_data.lost_packets,
+ record->seqno_data.dup_packets,
+ record->seqno_data.reordered_packets);
+
+ s = format (s, "\n");
+ return s;
+}
+
+always_inline void
+ioam_analyse_init_data (ioam_analyser_data_t * data)
+{
+ u16 j;
+ ioam_analyse_trace_data *trace_data;
+
+ data->is_free = 1;
+
+ /* We maintain data corresponding to last IP-Fix export, this may
+ * get extended in future to maintain history of data */
+ vec_validate_aligned (data->chached_data_list, 0, CLIB_CACHE_LINE_BYTES);
+
+ data->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ *(data->writer_lock) = 0;
+
+ trace_data = &(data->trace_data);
+ for (j = 0; j < IOAM_MAX_PATHS_PER_FLOW; j++)
+ trace_data->path_data[j].is_free = 1;
+}
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IOAM_ANALYSE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.c b/src/plugins/ioam/analyse/ioam_summary_export.c
new file mode 100644
index 00000000..af2d39ab
--- /dev/null
+++ b/src/plugins/ioam/analyse/ioam_summary_export.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip6_packet.h>
+#include <ioam/analyse/ioam_summary_export.h>
+#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
+
+u8 *
+ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address, u16 collector_port)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ ipfix_template_header_t *t;
+ ipfix_field_specifier_t *f;
+ ipfix_field_specifier_t *first_field;
+ u8 *rewrite = 0;
+ ip4_ipfix_template_packet_t *tp;
+ u32 field_count = 0;
+ u32 field_index = 0;
+ flow_report_stream_t *stream;
+
+ stream = &frm->streams[fr->stream_index];
+
+ /* Determine field count */
+#define _(field,mask,item,length) \
+ { \
+ field_count++; \
+ fr->fields_to_send = clib_bitmap_set (fr->fields_to_send, \
+ field_index, 1); \
+ } \
+ field_index++;
+
+ foreach_ioam_ipfix_field;
+#undef _
+
+ /* Add Src address, dest address, src port, dest port
+ * path map, number of paths manually */
+ field_count += 6;
+
+ /* allocate rewrite space */
+ vec_validate_aligned (rewrite,
+ sizeof (ip4_ipfix_template_packet_t)
+ + field_count * sizeof (ipfix_field_specifier_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_template_packet_t *) rewrite;
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+ t = (ipfix_template_header_t *) (s + 1);
+ first_field = f = (ipfix_field_specifier_t *) (t + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (collector_port);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
+ udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
+
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id); //fr->domain_id);
+
+ /* Add Src address, dest address, src port, dest port
+ * path map, number of paths manually */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceIPv6Address,
+ sizeof (ip6_address_t));
+ f++;
+
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationIPv6Address,
+ sizeof (ip6_address_t));
+ f++;
+
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ sourceTransportPort, 2);
+ f++;
+
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ destinationTransportPort, 2);
+ f++;
+
+#define _(field,mask,item,length) \
+ { \
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */, \
+ item, length); \
+ f++; \
+ }
+ foreach_ioam_ipfix_field;
+#undef _
+
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ ioamNumberOfPaths, 2);
+ f++;
+
+ /* Add ioamPathMap manually */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
+ ioamPathMap,
+ (sizeof (ioam_path) +
+ (sizeof (ioam_path_map_t) *
+ IOAM_TRACE_MAX_NODES)));
+ f++;
+
+ /* Back to the template packet... */
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+
+ ASSERT (f - first_field);
+ /* Field count in this template */
+ t->id_count = ipfix_id_count (IOAM_FLOW_TEMPLATE_ID, f - first_field);
+
+ /* set length in octets */
+ s->set_id_length =
+ ipfix_set_id_length (2 /* set_id */ , (u8 *) f - (u8 *) s);
+
+ /* message length in octets */
+ h->version_length = version_length ((u8 *) f - (u8 *) h);
+
+ ip->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip);
+ ip->checksum = ip4_header_checksum (ip);
+
+ return rewrite;
+}
+
+u16
+ioam_analyse_add_ipfix_record (flow_report_t * fr,
+ ioam_analyser_data_t * record,
+ vlib_buffer_t * b0, u16 offset,
+ ip6_address_t * src, ip6_address_t * dst,
+ u16 src_port, u16 dst_port)
+{
+ while (__sync_lock_test_and_set (record->writer_lock, 1))
+ ;
+
+ int field_index = 0;
+ u16 tmp;
+ int i, j;
+ u16 num_paths = 0;
+ u16 num_paths_offset;
+
+
+ /* Add IPv6 source address manually */
+ memcpy (b0->data + offset, &src->as_u64[0], sizeof (u64));
+ offset += sizeof (u64);
+ memcpy (b0->data + offset, &src->as_u64[1], sizeof (u64));
+ offset += sizeof (u64);
+
+ /* Add IPv6 destination address manually */
+ memcpy (b0->data + offset, &dst->as_u64[0], sizeof (u64));
+ offset += sizeof (u64);
+ memcpy (b0->data + offset, &dst->as_u64[1], sizeof (u64));
+ offset += sizeof (u64);
+
+ /* Add source port manually */
+ tmp = clib_host_to_net_u16 (src_port);
+ memcpy (b0->data + offset, &tmp, sizeof (u16));
+ offset += sizeof (u16);
+
+ /* Add dest port manually */
+ tmp = clib_host_to_net_u16 (dst_port);
+ memcpy (b0->data + offset, &tmp, sizeof (u16));
+ offset += sizeof (u16);
+
+#define _(field,mask,item,length) \
+ if (clib_bitmap_get (fr->fields_to_send, field_index)) \
+ { \
+ /* Expect only 4 bytes */ \
+ u32 tmp; \
+ tmp = clib_host_to_net_u32((u32)record->field - (u32)record->chached_data_list->field);\
+ memcpy (b0->data + offset, &tmp, length); \
+ offset += length; \
+ }
+ field_index++;
+ foreach_ioam_ipfix_field;
+#undef _
+
+ /* Store num_paths_offset here and update later */
+ num_paths_offset = offset;
+ offset += sizeof (u16);
+
+ /* Add ioamPathMap manually */
+ for (i = 0; i < IOAM_MAX_PATHS_PER_FLOW; i++)
+ {
+ ioam_analyse_trace_record *trace = record->trace_data.path_data + i;
+ ioam_analyse_trace_record *trace_cached =
+ record->chached_data_list->trace_data.path_data + i;
+ ioam_path *path = (ioam_path *) (b0->data + offset);
+
+ if (!trace->is_free)
+ {
+ num_paths++;
+
+ path->num_nodes = trace->num_nodes;
+
+ path->trace_type = trace->trace_type;
+ if (0 < (trace->pkt_counter - trace_cached->pkt_counter))
+ {
+ u64 new_sum = trace->mean_delay * record->seqno_data.rx_packets;
+ u64 old_sum =
+ trace_cached->mean_delay *
+ record->chached_data_list->seqno_data.rx_packets;
+ path->mean_delay =
+ (u32) ((new_sum - old_sum) / (trace->pkt_counter -
+ trace_cached->pkt_counter));
+ path->mean_delay = clib_host_to_net_u32 (path->mean_delay);
+ }
+ else
+ path->mean_delay = 0;
+
+ path->bytes_counter =
+ trace->bytes_counter - trace_cached->bytes_counter;
+ path->bytes_counter = clib_host_to_net_u32 (path->bytes_counter);
+
+ path->pkt_counter = trace->pkt_counter - trace_cached->pkt_counter;
+ path->pkt_counter = clib_host_to_net_u32 (path->pkt_counter);
+ offset += sizeof (ioam_path);
+
+ for (j = 0; j < trace->num_nodes; j++)
+ {
+ path->path[j].node_id =
+ clib_host_to_net_u32 (trace->path[j].node_id);
+ path->path[j].ingress_if =
+ clib_host_to_net_u16 (trace->path[j].ingress_if);
+ path->path[j].egress_if =
+ clib_host_to_net_u16 (trace->path[j].egress_if);
+ path->path[j].state_up = trace->path[j].state_up;
+ }
+
+ //offset += (sizeof(ioam_path_map_t) * trace->num_nodes);
+ offset += (sizeof (ioam_path_map_t) * IOAM_TRACE_MAX_NODES); //FIXME
+ }
+ }
+
+ num_paths = clib_host_to_net_u16 (num_paths);
+ memcpy (b0->data + num_paths_offset, &num_paths, sizeof (u16));
+
+ /* Update cache */
+ *(record->chached_data_list) = *record;
+ record->chached_data_list->chached_data_list = NULL;
+
+ *(record->writer_lock) = 0;
+ return offset;
+}
+
+vlib_frame_t *
+ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
+ vlib_frame_t * f, u32 * to_next, u32 node_index)
+{
+ vlib_buffer_t *b0 = NULL;
+ u32 next_offset = 0;
+ u32 bi0 = ~0;
+ int i;
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s = NULL;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ u32 records_this_buffer;
+ u16 new_l0, old_l0;
+ ip_csum_t sum0;
+ vlib_main_t *vm = frm->vlib_main;
+ ip6_address_t temp;
+ ioam_analyser_data_t *record = NULL;
+ flow_report_stream_t *stream;
+ ioam_analyser_data_t *aggregated_data;
+ u16 data_len;
+
+ stream = &frm->streams[fr->stream_index];
+
+ memset (&temp, 0, sizeof (ip6_address_t));
+
+ aggregated_data = ioam_analyser_main.aggregated_data;
+ data_len = vec_len (aggregated_data);
+
+ vec_foreach_index (i, aggregated_data)
+ {
+ u8 flush = 0;
+ record = aggregated_data + i;
+
+ /* Flush if last entry */
+ if (i == (data_len - 1))
+ flush = 1;
+
+ if (!record->is_free)
+ {
+
+ if (PREDICT_FALSE (b0 == NULL))
+ {
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ break;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ memcpy (b0->data, fr->rewrite, vec_len (fr->rewrite));
+ b0->current_data = 0;
+ b0->current_length = vec_len (fr->rewrite);
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = &tp->ip4;
+ h = &tp->ipfix.h;
+ s = &tp->ipfix.s;
+
+ /* FIXUP: message header export_time */
+ h->export_time = clib_host_to_net_u32 (((u32) time (NULL)));
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = stream->sequence_number++;
+ h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
+ next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
+ records_this_buffer = 0;
+ }
+
+ next_offset = ioam_analyse_add_ipfix_record (fr, record,
+ b0, next_offset,
+ &temp, &temp, 0, 0);
+ records_this_buffer++;
+
+ /* Flush data if packet len is about to reach path mtu */
+ if (next_offset > (frm->path_mtu - 250))
+ flush = 1;
+ }
+
+ if (PREDICT_FALSE (flush && b0))
+ {
+ s->set_id_length = ipfix_set_id_length (IOAM_FLOW_TEMPLATE_ID,
+ next_offset - (sizeof (*ip) +
+ sizeof (*udp) +
+ sizeof (*h)));
+ b0->current_length = next_offset;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16) next_offset);
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ /* RFC 7011 section 10.3.2. */
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ to_next[0] = bi0;
+ f->n_vectors++;
+ to_next++;
+
+ if (f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, node_index, f);
+ f = vlib_get_frame_to_node (vm, node_index);
+ f->n_vectors = 0;
+ to_next = vlib_frame_vector_args (f);
+ }
+ b0 = 0;
+ bi0 = ~0;
+ }
+ }
+
+ return f;
+}
+
+clib_error_t *
+ioam_flow_create (u8 del)
+{
+ vnet_flow_report_add_del_args_t args;
+ int rv;
+ u32 domain_id = 0;
+ flow_report_main_t *frm = &flow_report_main;
+ u16 template_id;
+
+ memset (&args, 0, sizeof (args));
+ args.rewrite_callback = ioam_template_rewrite;
+ args.flow_data_callback = ioam_send_flows;
+ del ? (args.is_add = 0) : (args.is_add = 1);
+ args.domain_id = domain_id;
+
+ rv = vnet_flow_report_add_del (frm, &args, &template_id);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "registration not found...");
+ default:
+ return clib_error_return (0, "vnet_flow_report_add_del returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+ioam_flow_report_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, flow_report_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ioam_flow_report_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.h b/src/plugins/ioam/analyse/ioam_summary_export.h
new file mode 100755
index 00000000..b4355061
--- /dev/null
+++ b/src/plugins/ioam/analyse/ioam_summary_export.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_ioam_flow_report_h__
+#define __included_ip6_ioam_flow_report_h__
+
+#include <ioam/analyse/ioam_analyse.h>
+#include <vnet/flow/flow_report.h>
+
+#define foreach_ioam_ipfix_info_element \
+_(ioamPacketSent, 5239, u32) \
+_(ioamPacketCount, 5237, u32) \
+_(ioamByteCount, 5238, u32) \
+_(ioamPathMap, 5262, u32) \
+_(ioamNumberOfPaths, 5264, u16) \
+_(ioamSfcValidatedCount, 5278, u32) \
+_(ioamSfcInValidatedCount, 5279, u32) \
+_(ioamSeqnoRxCount, 5280, u32) \
+_(ioamSeqnoLostCount, 5281, u32) \
+_(ioamSeqnoReorderedCount, 5282, u32) \
+_(ioamSeqnoDupCount, 5283, u32)
+
+
+typedef enum
+{
+#define _(n,v,t) n = v,
+ foreach_ioam_ipfix_info_element
+#undef _
+} ioam_ipfix_info_element_id_t;
+
+#define foreach_ioam_ipfix_field \
+_(pkt_sent, 0xffffffff, ioamPacketSent, 4) \
+_(pkt_counter, 0xffffffff, ioamPacketCount, 4) \
+_(bytes_counter, 0xffffffff, ioamByteCount, 4) \
+_(pot_data.sfc_validated_count, 0xffffffff, ioamSfcValidatedCount, 4) \
+_(pot_data.sfc_invalidated_count, 0xffffffff, ioamSfcInValidatedCount, 4) \
+_(seqno_data.rx_packets, 0xffffffff, ioamSeqnoRxCount, 4) \
+_(seqno_data.lost_packets, 0xffffffff, ioamSeqnoLostCount, 4) \
+_(seqno_data.reordered_packets, 0xffffffff, ioamSeqnoReorderedCount, 4) \
+_(seqno_data.dup_packets, 0xffffffff, ioamSeqnoDupCount, 4)
+
+clib_error_t *ioam_flow_report_init (vlib_main_t * vm);
+
+typedef struct
+{
+ u8 num_nodes;
+ u8 trace_type;
+ u16 reserve;
+ u32 mean_delay;
+ u32 pkt_counter;
+ u32 bytes_counter;
+ ioam_path_map_t path[0];
+} ioam_path;
+
+clib_error_t *ioam_flow_create (u8 del);
+
+u8 *ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address, u16 collector_port);
+
+u16 ioam_analyse_add_ipfix_record (flow_report_t * fr,
+ ioam_analyser_data_t * record,
+ vlib_buffer_t * b0, u16 offset,
+ ip6_address_t * src, ip6_address_t * dst,
+ u16 src_port, u16 dst_port);
+
+#endif /* __included_ip6_ioam_flow_report_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
new file mode 100644
index 00000000..39442b62
--- /dev/null
+++ b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ioam/analyse/ioam_analyse.h>
+#include <ioam/export-common/ioam_export.h>
+#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
+#include <ioam/analyse/ioam_summary_export.h>
+#include <vnet/ip/ip.h>
+#include <ioam/ipfixcollector/ipfixcollector.h>
+
+extern ioam_export_main_t ioam_export_main;
+static clib_error_t *
+ioam_analyse_enable_disable (vlib_main_t * vm,
+ int is_add, int is_export, int remote_listen)
+{
+ ipfix_client_add_del_t ipfix_reg;
+ clib_error_t *rv = 0;
+
+ ipfix_reg.client_name = format (0, "ip6-hbh-analyse-remote");
+ ipfix_reg.client_node = analyse_node_remote.index;
+ ipfix_reg.ipfix_setid = IPFIX_IOAM_EXPORT_ID;
+
+ if (is_export)
+ {
+ rv = ioam_flow_create (!is_add);
+ if (rv)
+ goto ret;
+ }
+
+ if (is_add)
+ {
+ ip6_ioam_analyse_register_handlers ();
+ if (remote_listen)
+ {
+ ipfix_reg.del = 0;
+ ipfix_collector_reg_setid (vm, &ipfix_reg);
+ }
+ else
+ {
+ ioam_export_set_next_node (&ioam_export_main,
+ (u8 *) "ip6-hbh-analyse-local");
+ }
+ }
+ else
+ {
+ ip6_ioam_analyse_unregister_handlers ();
+ if (remote_listen)
+ {
+ ipfix_reg.del = 1;
+ ipfix_collector_reg_setid (vm, &ipfix_reg);
+ }
+ else
+ ioam_export_reset_next_node (&ioam_export_main);
+ }
+
+ret:
+ vec_free (ipfix_reg.client_name);
+ return rv;
+}
+
+static clib_error_t *
+set_ioam_analyse_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_export = 0;
+ int is_add = 1;
+ int remote_listen = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "export-ipfix-collector"))
+ is_export = 1;
+ else if (unformat (input, "disable"))
+ is_add = 0;
+ else if (unformat (input, "listen-ipfix"))
+ remote_listen = 1;
+ else
+ break;
+ }
+
+ return (ioam_analyse_enable_disable (vm, is_add, is_export, remote_listen));
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ioam_analyse_command, static) = {
+ .path = "set ioam analyse",
+ .short_help = "set ioam analyse [export-ipfix-collector] [disable] [listen-ipfix]",
+ .function = set_ioam_analyse_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ioam_analyse_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_ioam_analyser_main_t *am = &ioam_analyser_main;
+ ioam_analyser_data_t *record = NULL;
+ u8 i;
+ u8 *s = 0;
+
+ vec_reset_length (s);
+ s = format (0, "iOAM Analyse Information: \n");
+ vec_foreach_index (i, am->aggregated_data)
+ {
+ record = am->aggregated_data + i;
+ if (record->is_free)
+ continue;
+
+ s = format (s, "Flow Number: %u\n", i);
+ s = print_analyse_flow (s, record);
+ s = format (s, "\n");
+ }
+ vlib_cli_output (vm, "%v", s);
+
+ vec_free (s);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_ipfix_cmd, static) = {
+ .path = "show ioam analyse ",
+ .short_help = "show ioam analyser information",
+ .function = show_ioam_analyse_cmd_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ioam_analyse_init (vlib_main_t * vm)
+{
+ ip6_ioam_analyser_main_t *am = &ioam_analyser_main;
+ u16 i;
+
+ vec_validate_aligned (am->aggregated_data, 50, CLIB_CACHE_LINE_BYTES);
+ vec_foreach_index (i, am->aggregated_data)
+ {
+ ioam_analyse_init_data (am->aggregated_data + i);
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ioam_analyse_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h
new file mode 100644
index 00000000..5a2a2d70
--- /dev/null
+++ b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IP6_IOAM_ANALYSE_NODE_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IP6_IOAM_ANALYSE_NODE_H_
+
+#include <ioam/analyse/ioam_analyse.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/encap/ip6_ioam_trace.h>
+
+/** @brief IP6-iOAM analyser main structure.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** Array of function pointer to analyse each hop-by-hop option. */
+ int (*analyse_hbh_handler[MAX_IP6_HBH_OPTION]) (u32 flow_id,
+ ip6_hop_by_hop_option_t *
+ opt, u16 len);
+
+ /** This contains the aggregated data from the time VPP started analysing. */
+ ioam_analyser_data_t *aggregated_data;
+
+} ip6_ioam_analyser_main_t;
+
+extern ip6_ioam_analyser_main_t ioam_analyser_main;
+
+extern vlib_node_registration_t analyse_node_local;
+extern vlib_node_registration_t analyse_node_remote;
+
+void ip6_ioam_analyse_register_handlers (void);
+
+void ip6_ioam_analyse_unregister_handlers (void);
+
+clib_error_t *ip6_ioam_analyse_init (vlib_main_t * vm);
+
+inline static ioam_analyser_data_t *
+ioam_analyse_get_data_from_flow_id (u32 flow_id)
+{
+ if (flow_id >= vec_len (ioam_analyser_main.aggregated_data))
+ return NULL;
+
+ if (ioam_analyser_main.aggregated_data[flow_id].is_free)
+ ioam_analyser_main.aggregated_data[flow_id].is_free = 0;
+
+ return (ioam_analyser_main.aggregated_data + flow_id);
+}
+
+always_inline void *
+ip6_ioam_find_hbh_option (ip6_hop_by_hop_header_t * hbh0, u8 option)
+{
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ if (type0 == option)
+ return ((void *) opt0);
+
+ if (0 == type0)
+ {
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ }
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *) opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
+ }
+
+ return NULL;
+}
+
+always_inline int
+ip6_ioam_analyse_compare_path_delay (ip6_hop_by_hop_header_t * hbh0,
+ ip6_hop_by_hop_header_t * hbh1,
+ bool oneway)
+{
+ ioam_trace_option_t *trace0 = NULL, *trace1 = NULL;
+ f64 delay0, delay1;
+
+ trace0 =
+ ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+ trace1 =
+ ip6_ioam_find_hbh_option (hbh1, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+
+ if (PREDICT_FALSE ((trace0 == NULL) && (trace1 == NULL)))
+ return 0;
+
+ if (PREDICT_FALSE (trace1 == NULL))
+ return 1;
+
+ if (PREDICT_FALSE (trace0 == NULL))
+ return -1;
+
+ delay0 = ip6_ioam_analyse_calc_delay (&trace0->trace_hdr,
+ trace0->hdr.length - 2, oneway);
+ delay1 = ip6_ioam_analyse_calc_delay (&trace1->trace_hdr,
+ trace1->hdr.length - 2, oneway);
+
+ return (delay0 - delay1);
+}
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IP6_IOAM_ANALYSE_NODE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/analyse/ip6/node.c b/src/plugins/ioam/analyse/ip6/node.c
new file mode 100644
index 00000000..6db6355e
--- /dev/null
+++ b/src/plugins/ioam/analyse/ip6/node.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/export-common/ioam_export.h>
+#include <ioam/encap/ip6_ioam_trace.h>
+#include <ioam/encap/ip6_ioam_pot.h>
+#include <ioam/lib-pot/pot_util.h>
+#include <ioam/encap/ip6_ioam_e2e.h>
+#include <ioam/analyse/ioam_analyse.h>
+#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
+#include <vnet/plugin/plugin.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_id;
+} analyse_trace_t;
+
+vlib_node_registration_t analyse_node_local;
+vlib_node_registration_t analyse_node_remote;
+
+#define foreach_analyse_error \
+_(ANALYSED, "Packets analysed for summarization") \
+_(FAILED, "Packets analysis failed") \
+
+typedef enum
+{
+#define _(sym,str) ANALYSE_ERROR_##sym,
+ foreach_analyse_error
+#undef _
+ ANALYSE_N_ERROR,
+} analyse_error_t;
+
+static char *analyse_error_strings[] = {
+#define _(sym,string) string,
+ foreach_analyse_error
+#undef _
+};
+
+typedef enum
+{
+ ANALYSE_NEXT_IP4_LOOKUP,
+ ANALYSE_NEXT_IP4_DROP,
+ ANALYSE_N_NEXT,
+} analyse_next_t;
+
+ip6_ioam_analyser_main_t ioam_analyser_main;
+
+/* packet trace format function */
+static u8 *
+format_analyse_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ analyse_trace_t *t = va_arg (*args, analyse_trace_t *);
+
+ s = format (s, "IP6-ioam-analyse: flow_id %d, next index %d",
+ t->flow_id, t->next_index);
+ return s;
+}
+
+always_inline u8
+ioam_analyse_hbh (u32 flow_id,
+ ip6_hop_by_hop_header_t * hbh0,
+ ip6_hop_by_hop_option_t * opt0,
+ ip6_hop_by_hop_option_t * limit0, u16 len)
+{
+ ip6_ioam_analyser_main_t *am = &ioam_analyser_main;
+ u8 type0;
+ u8 error0 = 0;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (am->analyse_hbh_handler[type0])
+ {
+ if (PREDICT_TRUE
+ ((*am->analyse_hbh_handler[type0]) (flow_id, opt0,
+ len) < 0))
+ {
+ error0 = ANALYSE_ERROR_FAILED;
+ return (error0);
+ }
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return (error0);
+}
+
+/**
+ * @brief IPv6 InBandOAM Analyse node.
+ * @node ip6-hbh-analyse-local, ip6-hbh-analyse-remote
+ *
+ * This function receives IP-FIX packets containing IPv6-iOAM records, analyses
+ * them and collects/aggregates the statistics.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer, next index usage
+ *
+ * <em>Uses:</em>
+ * - <code>vlib_buffer_get_current(p0)</code>
+ * - Walks on each ioam record present in IP-Fix record, analyse them and
+ * store the statistics.
+ *
+ * <em>Next Index:</em>
+ * - Dispatches the packet to ip4-lookup if executed under ip6-hbh-analyse-local
+ * node context and to ip4-drop if executed under ip6-hbh-analyse-remote node
+ * context.
+ */
+static uword
+ip6_ioam_analyse_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ analyse_next_t next_index;
+ u32 pkts_analysed = 0;
+ u32 pkts_failed = 0;
+ u8 remote = 0;
+ u32 next0 = ANALYSE_NEXT_IP4_LOOKUP;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (PREDICT_FALSE (analyse_node_remote.index == node->node_index))
+ {
+ remote = 1;
+ next0 = ANALYSE_NEXT_IP4_DROP;
+ }
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ u8 *data, *limit;
+ u16 num_ioam_records;
+
+ /* speculatively enqueue p0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ if (PREDICT_FALSE (remote))
+ {
+ vlib_buffer_advance (p0, -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof
+ (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t)));
+ }
+ data = (u8 *) vlib_buffer_get_current (p0);
+ ip40 = (ip4_header_t *) vlib_buffer_get_current (p0);
+ limit = data + clib_net_to_host_u16 (ip40->length);
+ data += sizeof (ip4_header_t) + sizeof (udp_header_t)
+ + sizeof (ipfix_message_header_t) + sizeof (ipfix_set_header_t);
+
+ num_ioam_records = (limit - data) / DEFAULT_EXPORT_SIZE;
+
+ while (num_ioam_records >= 4)
+ {
+ /* Prefetch next 2 ioam records */
+ {
+ CLIB_PREFETCH (data + (2 * DEFAULT_EXPORT_SIZE),
+ (DEFAULT_EXPORT_SIZE), LOAD);
+ CLIB_PREFETCH (data + (3 * DEFAULT_EXPORT_SIZE),
+ (DEFAULT_EXPORT_SIZE), LOAD);
+ }
+
+ num_ioam_records -= 2;
+
+ ip6_header_t *ip60, *ip61;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
+ u32 flow_id0, flow_id1;
+ u8 error0, error1;
+ ioam_analyser_data_t *data0, *data1;
+ u16 p_len0, p_len1;
+
+ ip60 = (ip6_header_t *) data;
+ ip61 = (ip6_header_t *) (data + DEFAULT_EXPORT_SIZE);
+
+ data += (2 * DEFAULT_EXPORT_SIZE);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip60 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip61 + 1);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
+
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+ limit1 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
+ ((hbh1->length + 1) << 3));
+
+ flow_id0 =
+ clib_net_to_host_u32
+ (ip60->ip_version_traffic_class_and_flow_label) & 0xFFFFF;
+ flow_id1 =
+ clib_net_to_host_u32
+ (ip61->ip_version_traffic_class_and_flow_label) & 0xFFFFF;
+
+ p_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ p_len1 = clib_net_to_host_u16 (ip61->payload_length);
+
+ error0 =
+ ioam_analyse_hbh (flow_id0, hbh0, opt0, limit0, p_len0);
+ error1 =
+ ioam_analyse_hbh (flow_id1, hbh1, opt1, limit1, p_len0);
+
+ if (PREDICT_TRUE ((error0 == 0) && (error1 == 0)))
+ {
+ pkts_analysed += 2;
+ data0 = ioam_analyse_get_data_from_flow_id (flow_id0);
+ data1 = ioam_analyse_get_data_from_flow_id (flow_id1);
+
+ while (__sync_lock_test_and_set (data0->writer_lock, 1))
+ ;
+ data0->pkt_counter++;
+ data0->bytes_counter += p_len0;
+ *(data0->writer_lock) = 0;
+
+ while (__sync_lock_test_and_set (data1->writer_lock, 1))
+ ;
+ data1->pkt_counter++;
+ data1->bytes_counter += p_len1;
+ *(data1->writer_lock) = 0;
+ }
+ else if (error0 == 0)
+ {
+ pkts_analysed++;
+ pkts_failed++;
+
+ data0 = ioam_analyse_get_data_from_flow_id (flow_id0);
+ while (__sync_lock_test_and_set (data0->writer_lock, 1))
+ ;
+ data0->pkt_counter++;
+ data0->bytes_counter += p_len0;
+ *(data0->writer_lock) = 0;
+ }
+ else if (error1 == 0)
+ {
+ pkts_analysed++;
+ pkts_failed++;
+
+ data1 = ioam_analyse_get_data_from_flow_id (flow_id1);
+ while (__sync_lock_test_and_set (data1->writer_lock, 1))
+ ;
+ data1->pkt_counter++;
+ data1->bytes_counter += p_len1;
+ *(data1->writer_lock) = 0;
+ }
+ else
+ pkts_failed += 2;
+ }
+
+ while (num_ioam_records > 0)
+ {
+ num_ioam_records--;
+
+ ip6_header_t *ip60;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u32 flow_id0;
+ u8 error0;
+ ioam_analyser_data_t *data0;
+ u16 p_len0;
+
+ ip60 = (ip6_header_t *) data;
+ data += (1 * DEFAULT_EXPORT_SIZE);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip60 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+
+ flow_id0 =
+ clib_net_to_host_u32
+ (ip60->ip_version_traffic_class_and_flow_label) & 0xFFFFF;
+ p_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ error0 =
+ ioam_analyse_hbh (flow_id0, hbh0, opt0, limit0, p_len0);
+
+ if (PREDICT_TRUE (error0 == 0))
+ {
+ pkts_analysed++;
+ data0 = ioam_analyse_get_data_from_flow_id (flow_id0);
+ while (__sync_lock_test_and_set (data0->writer_lock, 1))
+ ;
+ data0->pkt_counter++;
+ data0->bytes_counter +=
+ clib_net_to_host_u16 (ip60->payload_length);
+ *(data0->writer_lock) = 0;
+ }
+ else
+ pkts_failed++;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index, ANALYSE_ERROR_ANALYSED,
+ pkts_analysed);
+
+ if (PREDICT_FALSE (pkts_failed))
+ vlib_node_increment_counter (vm, node->node_index, ANALYSE_ERROR_FAILED,
+ pkts_failed);
+
+ return frame->n_vectors;
+}
+
+int
+ip6_ioam_analyse_hbh_trace_internal (u32 flow_id,
+ ip6_hop_by_hop_option_t * opt, u16 len)
+{
+ ioam_analyser_data_t *data;
+ ioam_trace_option_t *trace = (ioam_trace_option_t *) opt;
+
+ data = ioam_analyse_get_data_from_flow_id (flow_id);
+ ASSERT (data != NULL);
+
+ (void) ip6_ioam_analyse_hbh_trace (data, &trace->trace_hdr, len,
+ (trace->hdr.length - 2)
+ /*ioam_trace_type,data_list_elts_left */
+ );
+ return 0;
+}
+
+int
+ip6_ioam_analyse_hbh_pot (u32 flow_id, ip6_hop_by_hop_option_t * opt0,
+ u16 len)
+{
+
+ ioam_pot_option_t *pot0;
+ u64 random = 0;
+ u64 cumulative = 0;
+ pot_profile *pot_profile = 0;
+ int ret;
+ ioam_analyser_data_t *data;
+
+ data = ioam_analyse_get_data_from_flow_id (flow_id);
+
+ pot0 = (ioam_pot_option_t *) opt0;
+ random = clib_net_to_host_u64 (pot0->random);
+ cumulative = clib_net_to_host_u64 (pot0->cumulative);
+ pot_profile = pot_profile_get_active ();
+ ret = pot_validate (pot_profile, cumulative, random);
+
+ while (__sync_lock_test_and_set (data->writer_lock, 1))
+ ;
+
+ (0 == ret) ? (data->pot_data.sfc_validated_count++) :
+ (data->pot_data.sfc_invalidated_count++);
+
+ *(data->writer_lock) = 0;
+ return 0;
+}
+
+int
+ip6_ioam_analyse_hbh_e2e_internal (u32 flow_id, ip6_hop_by_hop_option_t * opt,
+ u16 len)
+{
+ ioam_analyser_data_t *data;
+ ioam_e2e_option_t *e2e;
+
+ data = ioam_analyse_get_data_from_flow_id (flow_id);
+ e2e = (ioam_e2e_option_t *) opt;
+ ip6_ioam_analyse_hbh_e2e (data, &e2e->e2e_hdr, len);
+ return 0;
+}
+
+int
+ip6_ioam_analyse_register_hbh_handler (u8 option,
+ int options (u32 flow_id,
+ ip6_hop_by_hop_option_t *
+ opt, u16 len))
+{
+ ip6_ioam_analyser_main_t *am = &ioam_analyser_main;
+
+ ASSERT (option < ARRAY_LEN (am->analyse_hbh_handler));
+
+ /* Already registered */
+ if (am->analyse_hbh_handler[option])
+ return (-1);
+
+ am->analyse_hbh_handler[option] = options;
+
+ return (0);
+}
+
+int
+ip6_ioam_analyse_unregister_hbh_handler (u8 option)
+{
+ ip6_ioam_analyser_main_t *am = &ioam_analyser_main;
+
+ ASSERT (option < ARRAY_LEN (am->analyse_hbh_handler));
+
+ /* Not registered */
+ if (!am->analyse_hbh_handler[option])
+ return (-1);
+
+ am->analyse_hbh_handler[option] = NULL;
+ return (0);
+}
+
+void
+ip6_ioam_analyse_register_handlers ()
+{
+ ip6_ioam_analyse_register_hbh_handler (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST,
+ ip6_ioam_analyse_hbh_trace_internal);
+ ip6_ioam_analyse_register_hbh_handler
+ (HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, ip6_ioam_analyse_hbh_pot);
+ ip6_ioam_analyse_register_hbh_handler (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ip6_ioam_analyse_hbh_e2e_internal);
+}
+
+void
+ip6_ioam_analyse_unregister_handlers ()
+{
+ ip6_ioam_analyse_unregister_hbh_handler
+ (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+ ip6_ioam_analyse_unregister_hbh_handler
+ (HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT);
+ ip6_ioam_analyse_unregister_hbh_handler (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+}
+
+/* *INDENT-OFF* */
+
+/*
+ * Node for IP6 analyse - packets
+ */
+VLIB_REGISTER_NODE (analyse_node_local) = {
+ .function = ip6_ioam_analyse_node_fn,
+ .name = "ip6-hbh-analyse-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_analyse_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (analyse_error_strings),
+ .error_strings = analyse_error_strings,
+ .n_next_nodes = ANALYSE_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [ANALYSE_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [ANALYSE_NEXT_IP4_DROP] = "ip4-drop",
+ },
+};
+
+/*
+ * Node for IP6 analyse - packets
+ */
+VLIB_REGISTER_NODE (analyse_node_remote) =
+{
+ .function = ip6_ioam_analyse_node_fn,
+ .name = "ip6-hbh-analyse-remote",
+ .vector_size = sizeof (u32),
+ .format_trace = format_analyse_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (analyse_error_strings),
+ .error_strings = analyse_error_strings,
+ .n_next_nodes = ANALYSE_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [ANALYSE_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [ANALYSE_NEXT_IP4_DROP] = "ip4-drop",
+ },
+};
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/dir.dox b/src/plugins/ioam/dir.dox
new file mode 100644
index 00000000..f3389b52
--- /dev/null
+++ b/src/plugins/ioam/dir.dox
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Inband OAM (iOAM) implementation
+*/
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.c b/src/plugins/ioam/encap/ip6_ioam_e2e.c
new file mode 100644
index 00000000..cdaf740d
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include "ip6_ioam_e2e.h"
+
+ioam_e2e_main_t ioam_e2e_main;
+
+static u8 * ioam_e2e_trace_handler (u8 * s,
+ ip6_hop_by_hop_option_t *opt)
+{
+ ioam_e2e_option_t * e2e = (ioam_e2e_option_t *)opt;
+ u32 seqno = 0;
+
+ if (e2e)
+ {
+ seqno = clib_net_to_host_u32 (e2e->e2e_hdr.e2e_data);
+ }
+
+ s = format (s, "SeqNo = 0x%Lx", seqno);
+ return s;
+}
+
+int
+ioam_e2e_config_handler (void *data, u8 disable)
+{
+ int *analyse = data;
+
+ /* Register hanlders if enabled */
+ if (!disable)
+ {
+ /* If encap node register for encap handler */
+ if (0 == *analyse)
+ {
+ if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_seqno_encap_handler,
+ ioam_e2e_trace_handler) < 0)
+ {
+ return (-1);
+ }
+ }
+ /* If analyze node then register for decap handler */
+ else
+ {
+ if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_seqno_decap_handler) < 0)
+ {
+ return (-1);
+ }
+ }
+ return 0;
+ }
+
+ /* UnRegister handlers */
+ (void) ip6_hbh_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ (void) ip6_hbh_pop_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ return 0;
+}
+
+int
+ioam_e2e_rewrite_handler (u8 *rewrite_string,
+ u8 *rewrite_size)
+{
+ ioam_e2e_option_t *e2e_option;
+
+ if (rewrite_string && *rewrite_size == sizeof(ioam_e2e_option_t))
+ {
+ e2e_option = (ioam_e2e_option_t *)rewrite_string;
+ e2e_option->hdr.type = HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE
+ | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+ e2e_option->hdr.length = sizeof (ioam_e2e_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ return(0);
+ }
+ return(-1);
+}
+
+u32
+ioam_e2e_flow_handler (u32 ctx, u8 add)
+{
+ ioam_e2e_data_t *data;
+ u16 i;
+
+ if (add)
+ {
+ pool_get(ioam_e2e_main.e2e_data, data);
+ data->flow_ctx = ctx;
+ ioam_seqno_init_data(&data->seqno_data);
+ return ((u32) (data - ioam_e2e_main.e2e_data));
+ }
+
+ /* Delete case */
+ for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++)
+ {
+ if (pool_is_free_index(ioam_e2e_main.e2e_data, i))
+ continue;
+
+ data = pool_elt_at_index(ioam_e2e_main.e2e_data, i);
+ if (data && (data->flow_ctx == ctx))
+ {
+ pool_put_index(ioam_e2e_main.e2e_data, i);
+ return (0);
+ }
+ }
+ return 0;
+}
+
+static clib_error_t *
+ioam_show_e2e_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_e2e_data_t *e2e_data;
+ u8 *s = 0;
+ int i;
+
+ vec_reset_length(s);
+
+ s = format(0, "IOAM E2E information: \n");
+ for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++)
+ {
+ if (pool_is_free_index(ioam_e2e_main.e2e_data, i))
+ continue;
+
+ e2e_data = pool_elt_at_index(ioam_e2e_main.e2e_data, i);
+ s = format(s, "Flow name: %s\n", get_flow_name_from_flow_ctx(e2e_data->flow_ctx));
+
+ s = show_ioam_seqno_cmd_fn(s,
+ &e2e_data->seqno_data,
+ !IOAM_DEAP_ENABLED(e2e_data->flow_ctx));
+ }
+
+ vlib_cli_output(vm, "%v", s);
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (ioam_show_e2e_cmd, static) = {
+ .path = "show ioam e2e ",
+ .short_help = "show ioam e2e information",
+ .function = ioam_show_e2e_cmd_fn,
+};
+
+/*
+ * Init handler E2E headet handling.
+ * Init hanlder registers encap, decap, trace and Rewrite handlers.
+ */
+static clib_error_t *
+ioam_e2e_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ {
+ return(error);
+ }
+
+ /*
+ * As of now we have only PPC under E2E header.
+ */
+ if (ip6_hbh_config_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_e2e_config_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed"));
+ }
+
+ if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ sizeof(ioam_e2e_option_t),
+ ioam_e2e_rewrite_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed"));
+ }
+
+ if (ip6_hbh_flow_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_e2e_flow_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE Flow handler failed"));
+ }
+
+ ioam_e2e_main.vlib_main = vm;
+ ioam_e2e_main.vnet_main = vnet_get_main();
+
+ return (0);
+}
+
+/*
+ * Init function for the E2E lib.
+ * ip6_hop_by_hop_ioam_e2e_init gets called during init.
+ */
+VLIB_INIT_FUNCTION (ioam_e2e_init);
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.h b/src/plugins/ioam/encap/ip6_ioam_e2e.h
new file mode 100644
index 00000000..fb83403d
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_ip6_ioam_e2e_h__
+#define __included_ip6_ioam_e2e_h__
+
+#include <ioam/lib-e2e/e2e_util.h>
+#include "ip6_ioam_seqno.h"
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ ioam_e2e_packet_t e2e_hdr;
+}) ioam_e2e_option_t;
+/* *INDENT-ON* */
+
+typedef struct ioam_e2e_data_t_ {
+ u32 flow_ctx;
+ u32 pad;
+ ioam_seqno_data seqno_data;
+} ioam_e2e_data_t;
+
+typedef struct {
+ ioam_e2e_data_t *e2e_data;
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ioam_e2e_main_t;
+
+extern ioam_e2e_main_t ioam_e2e_main;
+
+static inline ioam_seqno_data *
+ioam_e2ec_get_seqno_data_from_flow_ctx (u32 flow_ctx)
+{
+ ioam_e2e_data_t *data = NULL;
+ u32 index;
+
+ index = get_flow_data_from_flow_ctx(flow_ctx,
+ HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ data = &ioam_e2e_main.e2e_data[index];
+ return &(data->seqno_data);
+}
+
+static inline u32
+ioam_e2e_get_cur_seqno_from_flow_ctx (u32 flow_ctx)
+{
+ ioam_seqno_data *data = NULL;
+
+ data = ioam_e2ec_get_seqno_data_from_flow_ctx(flow_ctx);
+ return data->seq_num;
+}
+
+#endif /* __included_ioam_e2e_h__ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.c b/src/plugins/ioam/encap/ip6_ioam_pot.c
new file mode 100644
index 00000000..9a761233
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip6.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <ioam/encap/ip6_ioam_pot.h>
+#include <ioam/lib-pot/pot_util.h>
+
+#define foreach_ip6_hop_by_hop_ioam_pot_stats \
+ _(PROCESSED, "Pkts with ip6 hop-by-hop pot options") \
+ _(PROFILE_MISS, "Pkts with ip6 hop-by-hop pot options but no profile set") \
+ _(PASSED, "Pkts with POT in Policy") \
+ _(FAILED, "Pkts with POT out of Policy")
+
+static char * ip6_hop_by_hop_ioam_pot_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_ioam_pot_stats
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) IP6_IOAM_POT_##sym,
+ foreach_ip6_hop_by_hop_ioam_pot_stats
+#undef _
+ IP6_IOAM_POT_N_STATS,
+} ip6_ioam_pot_stats_t;
+
+typedef struct {
+ /* stats */
+ u64 counters[ARRAY_LEN(ip6_hop_by_hop_ioam_pot_stats_strings)];
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} ip6_hop_by_hop_ioam_pot_main_t;
+
+ip6_hop_by_hop_ioam_pot_main_t ip6_hop_by_hop_ioam_pot_main;
+
+always_inline void
+ip6_ioam_stats_increment_counter (u32 counter_index, u64 increment)
+{
+ ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 * format_ioam_pot (u8 * s, va_list * args)
+{
+ ioam_pot_option_t * pot0 = va_arg (*args, ioam_pot_option_t *);
+ u64 random, cumulative;
+ random = cumulative = 0;
+ if (pot0)
+ {
+ random = clib_net_to_host_u64 (pot0->random);
+ cumulative = clib_net_to_host_u64 (pot0->cumulative);
+ }
+
+ s = format (s, "random = 0x%Lx, Cumulative = 0x%Lx, Index = 0x%x",
+ random, cumulative, pot0 ? pot0->reserved_profile_id : ~0);
+ return s;
+}
+
+u8 *
+ip6_hbh_ioam_proof_of_transit_trace_handler (u8 *s, ip6_hop_by_hop_option_t *opt)
+{
+ ioam_pot_option_t *pot;
+
+ s = format (s, " POT opt present\n");
+ pot = (ioam_pot_option_t *) opt;
+ s = format (s, " %U\n", format_ioam_pot, pot);
+ return (s);
+}
+
+int
+ip6_hbh_ioam_proof_of_transit_handler (vlib_buffer_t *b,
+ ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt0)
+{
+ ioam_pot_option_t * pot0;
+ u64 random = 0, cumulative = 0;
+ int rv = 0;
+ u8 pot_profile_index;
+ pot_profile *pot_profile = 0, *new_profile = 0;
+ u8 pot_encap = 0;
+
+ pot0 = (ioam_pot_option_t *) opt0;
+ pot_encap = (pot0->random == 0);
+ pot_profile_index = pot_profile_get_active_id();
+ pot_profile = pot_profile_get_active();
+ if (pot_encap && PREDICT_FALSE(!pot_profile))
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1);
+ return(-1);
+ }
+ if (pot_encap)
+ {
+ pot0->reserved_profile_id =
+ pot_profile_index & PROFILE_ID_MASK;
+ pot_profile_incr_usage_stats(pot_profile);
+ }
+ else
+ { /* Non encap node */
+ if (PREDICT_FALSE(pot0->reserved_profile_id !=
+ pot_profile_index || pot_profile == 0))
+ {
+ /* New profile announced by encap node. */
+ new_profile =
+ pot_profile_find(pot0->reserved_profile_id);
+ if (PREDICT_FALSE(new_profile == 0 ||
+ new_profile->valid == 0))
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1);
+ return(-1);
+ }
+ else
+ {
+ pot_profile_index = pot0->reserved_profile_id;
+ pot_profile = new_profile;
+ pot_profile_set_active(pot_profile_index);
+ pot_profile_reset_usage_stats(pot_profile);
+ }
+ }
+ pot_profile_incr_usage_stats(pot_profile);
+ }
+
+ if (pot0->random == 0)
+ {
+ pot0->random = clib_host_to_net_u64(pot_generate_random(pot_profile));
+ pot0->cumulative = 0;
+ }
+ random = clib_net_to_host_u64(pot0->random);
+ cumulative = clib_net_to_host_u64(pot0->cumulative);
+ pot0->cumulative = clib_host_to_net_u64(
+ pot_update_cumulative(pot_profile,
+ cumulative,
+ random));
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROCESSED, 1);
+
+ return (rv);
+}
+
+int
+ip6_hbh_ioam_proof_of_transit_pop_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt0)
+{
+ ioam_pot_option_t * pot0;
+ u64 random = 0;
+ u64 cumulative = 0;
+ int rv = 0;
+ pot_profile *pot_profile = 0;
+ u8 result = 0;
+
+ pot0 = (ioam_pot_option_t *) opt0;
+ random = clib_net_to_host_u64(pot0->random);
+ cumulative = clib_net_to_host_u64(pot0->cumulative);
+ pot_profile = pot_profile_get_active();
+ result = pot_validate (pot_profile,
+ cumulative, random);
+
+ if (result == 1)
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PASSED, 1);
+ }
+ else
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_FAILED, 1);
+ }
+ return (rv);
+}
+
+int ip6_hop_by_hop_ioam_pot_rewrite_handler (u8 *rewrite_string, u8 *rewrite_size)
+{
+ ioam_pot_option_t * pot_option;
+ if (rewrite_string && *rewrite_size == sizeof(ioam_pot_option_t))
+ {
+ pot_option = (ioam_pot_option_t *)rewrite_string;
+ pot_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT
+ | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ pot_option->hdr.length = sizeof (ioam_pot_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ return(0);
+ }
+ return(-1);
+}
+
+static clib_error_t *
+ip6_show_ioam_pot_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for ( i = 0; i < IP6_IOAM_POT_N_STATS; i++)
+ {
+ s = format(s, " %s - %lu\n", ip6_hop_by_hop_ioam_pot_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output(vm, "%v", s);
+ vec_free(s);
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (ip6_show_ioam_pot_cmd, static) = {
+ .path = "show ioam pot",
+ .short_help = "iOAM pot statistics",
+ .function = ip6_show_ioam_pot_cmd_fn,
+};
+
+
+static clib_error_t *
+ip6_hop_by_hop_ioam_pot_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_ioam_pot_main_t * hm = &ip6_hop_by_hop_ioam_pot_main;
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ return(error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main();
+ memset(hm->counters, 0, sizeof(hm->counters));
+
+ if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, ip6_hbh_ioam_proof_of_transit_handler,
+ ip6_hbh_ioam_proof_of_transit_trace_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT failed"));
+
+ if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT,
+ sizeof(ioam_pot_option_t),
+ ip6_hop_by_hop_ioam_pot_rewrite_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT for rewrite failed"));
+
+ if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT,
+ ip6_hbh_ioam_proof_of_transit_pop_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT POP failed"));
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_pot_init);
+
+
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.h b/src/plugins/ioam/encap/ip6_ioam_pot.h
new file mode 100644
index 00000000..01ce4ac5
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_POT_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_POT_H_
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 pot_type;
+ #define PROFILE_ID_MASK 0xF
+ u8 reserved_profile_id; /* 4 bits reserved, 4 bits to carry profile id */
+ u64 random;
+ u64 cumulative;
+}) ioam_pot_option_t;
+/* *INDENT-ON* */
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_POT_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.c b/src/plugins/ioam/encap/ip6_ioam_seqno.c
new file mode 100644
index 00000000..08bf554b
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_seqno.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include "ip6_ioam_seqno.h"
+#include "ip6_ioam_e2e.h"
+
+
+/*
+ * This Routine gets called from IPv6 hop-by-hop option handling.
+ * Only if we are encap node, then add PPC data.
+ * On a Transit(MID) node we dont do anything with E2E headers.
+ * On decap node decap is handled by seperate function.
+ */
+int
+ioam_seqno_encap_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt)
+{
+ u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index;
+ ioam_e2e_option_t * e2e;
+ int rv = 0;
+ ioam_seqno_data *data;
+
+ /* Bypass seqno processing */
+ if (PREDICT_FALSE(opaque_index == 0x7FFFFFFF))
+ return rv;
+
+ data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index);
+ e2e = (ioam_e2e_option_t *) opt;
+ e2e->e2e_hdr.e2e_data = clib_host_to_net_u32(++data->seq_num);
+
+ return (rv);
+}
+
+/*
+ * This Routine gets called on POP/Decap node.
+ */
+int
+ioam_seqno_decap_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt)
+{
+ u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index;
+ ioam_e2e_option_t * e2e;
+ int rv = 0;
+ ioam_seqno_data *data;
+
+ data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index);
+ e2e = (ioam_e2e_option_t *) opt;
+ ioam_analyze_seqno(&data->seqno_rx,
+ (u64) clib_net_to_host_u32(e2e->e2e_hdr.e2e_data));
+
+ return (rv);
+}
diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.h b/src/plugins/ioam/encap/ip6_ioam_seqno.h
new file mode 100644
index 00000000..5c140246
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_seqno.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_ip6_ioam_seqno_h__
+#define __included_ip6_ioam_seqno_h__
+
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/lib-e2e/e2e_util.h>
+
+int ioam_seqno_encap_handler(vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt);
+
+int
+ioam_seqno_decap_handler(vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt);
+
+#endif
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c
new file mode 100644
index 00000000..3ec3ea82
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vpp/app/version.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+#include <vnet/plugin/plugin.h>
+
+#include <ioam/lib-trace/trace_util.h>
+#include <ioam/lib-trace/trace_config.h>
+#include <ioam/encap/ip6_ioam_trace.h>
+#include <ioam/udp-ping/udp_ping.h>
+#include <ioam/udp-ping/udp_ping_packet.h>
+#include <ioam/udp-ping/udp_ping_util.h>
+
+/* Timestamp precision multipliers for seconds, milliseconds, microseconds
+ * and nanoseconds respectively.
+ */
+static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 };
+
+typedef union
+{
+ u64 as_u64;
+ u32 as_u32[2];
+} time_u64_t;
+
+extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+extern ip6_main_t ip6_main;
+
+#define foreach_ip6_hop_by_hop_ioam_trace_stats \
+ _(PROCESSED, "Pkts with ip6 hop-by-hop trace options") \
+ _(PROFILE_MISS, "Pkts with ip6 hop-by-hop trace options but no profile set") \
+ _(UPDATED, "Pkts with trace updated") \
+ _(FULL, "Pkts with trace options but no space") \
+ _(LOOPBACK, "Pkts with trace options Loopback") \
+ _(LOOPBACK_REPLY, "Pkts with trace options Loopback Reply")
+
+static char *ip6_hop_by_hop_ioam_trace_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_ioam_trace_stats
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) IP6_IOAM_TRACE_##sym,
+ foreach_ip6_hop_by_hop_ioam_trace_stats
+#undef _
+ IP6_IOAM_TRACE_N_STATS,
+} ip6_ioam_trace_stats_t;
+
+
+typedef struct
+{
+ /* stats */
+ u64 counters[ARRAY_LEN (ip6_hop_by_hop_ioam_trace_stats_strings)];
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_hop_by_hop_ioam_trace_main_t;
+
+ip6_hop_by_hop_ioam_trace_main_t ip6_hop_by_hop_ioam_trace_main;
+
+always_inline void
+ip6_ioam_trace_stats_increment_counter (u32 counter_index, u64 increment)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 *
+format_ioam_data_list_element (u8 * s, va_list * args)
+{
+ u32 *elt = va_arg (*args, u32 *);
+ u8 *trace_type_p = va_arg (*args, u8 *);
+ u8 trace_type = *trace_type_p;
+
+
+ if (trace_type & BIT_TTL_NODEID)
+ {
+ u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ttl 0x%x node id 0x%x ",
+ ttl_node_id_host_byte_order >> 24,
+ ttl_node_id_host_byte_order & 0x00FFFFFF);
+
+ elt++;
+ }
+
+ if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
+ {
+ u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ingress 0x%x egress 0x%x ",
+ ingress_host_byte_order >> 16,
+ ingress_host_byte_order & 0xFFFF);
+ elt++;
+ }
+
+ if (trace_type & BIT_TIMESTAMP)
+ {
+ u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
+ elt++;
+ }
+
+ if (trace_type & BIT_APPDATA)
+ {
+ u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "app 0x%x ", appdata_in_host_byte_order);
+ elt++;
+ }
+
+ return s;
+}
+
+
+int
+ip6_ioam_trace_get_sizeof_handler (u32 * result)
+{
+ u16 size = 0;
+ u8 trace_data_size = 0;
+ trace_profile *profile = NULL;
+
+ *result = 0;
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ if (PREDICT_FALSE (trace_data_size == 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ size +=
+ sizeof (ioam_trace_option_t) + (profile->num_elts * trace_data_size);
+ *result = size;
+
+ return 0;
+}
+
+
+
+int
+ip6_hop_by_hop_ioam_trace_rewrite_handler (u8 * rewrite_string,
+ u8 * rewrite_size)
+{
+ ioam_trace_option_t *trace_option = NULL;
+ u8 trace_data_size = 0;
+ u8 trace_option_elts = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+ if (PREDICT_FALSE (!rewrite_string))
+ return -1;
+
+ trace_option_elts = profile->num_elts;
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ trace_option = (ioam_trace_option_t *) rewrite_string;
+ trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST |
+ HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ +
+ trace_option_elts * trace_data_size;
+ trace_option->trace_hdr.ioam_trace_type =
+ profile->trace_type & TRACE_TYPE_MASK;
+ trace_option->trace_hdr.data_list_elts_left = trace_option_elts;
+ *rewrite_size =
+ sizeof (ioam_trace_option_t) + (trace_option_elts * trace_data_size);
+
+ return 0;
+}
+
+always_inline void
+ip6_hbh_ioam_loopback_handler (vlib_buffer_t * b, ip6_header_t * ip,
+ ioam_trace_option_t * trace)
+{
+ u32 buf_index;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ vlib_buffer_t *b0;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+ vlib_node_t *next_node;
+ ip6_header_t *ip6;
+ ip6_hop_by_hop_header_t *hbh;
+ ioam_trace_option_t *opt;
+ udp_ping_t *udp;
+
+ next_node = vlib_get_node_by_name (hm->vlib_main, (u8 *) "ip6-lookup");
+ nf = vlib_get_frame_to_node (hm->vlib_main, next_node->index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+
+ b0 = vlib_buffer_copy (hm->vlib_main, b);
+ buf_index = vlib_get_buffer_index (hm->vlib_main, b0);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ ip6 = vlib_buffer_get_current (b0);
+ hbh = (ip6_hop_by_hop_header_t *) (ip6 + 1);
+ opt = (ioam_trace_option_t *)
+ ip6_hbh_get_option (hbh, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+
+ udp = (udp_ping_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ udp_ping_create_reply_from_probe_ip6 (ip6, hbh, udp);
+ ip6_hbh_ioam_trace_set_bit (opt, BIT_LOOPBACK_REPLY);
+
+ *to_next = buf_index;
+ nf->n_vectors++;
+ to_next++;
+
+ vlib_put_frame_to_node (hm->vlib_main, next_node->index, nf);
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_LOOPBACK, 1);
+}
+
+int
+ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 elt_index = 0;
+ ioam_trace_option_t *trace = (ioam_trace_option_t *) opt;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj = adj_get (adj_index);
+ time_u64_t time_u64;
+ u32 *elt;
+ int rv = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+ /* Don't trace loopback reply packets */
+ if (trace->trace_hdr.ioam_trace_type & BIT_LOOPBACK_REPLY)
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_LOOPBACK_REPLY,
+ 1);
+ return rv;
+ }
+
+ time_u64.as_u64 = 0;
+
+ if (PREDICT_TRUE (trace->trace_hdr.data_list_elts_left))
+ {
+ trace->trace_hdr.data_list_elts_left--;
+ /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
+ * to skip to this node's location.
+ */
+ elt_index =
+ trace->trace_hdr.data_list_elts_left *
+ fetch_trace_data_size (trace->trace_hdr.ioam_trace_type) / 4;
+ elt = &trace->trace_hdr.elts[elt_index];
+ if (trace->trace_hdr.ioam_trace_type & BIT_TTL_NODEID)
+ {
+ *elt =
+ clib_host_to_net_u32 ((ip->hop_limit << 24) | profile->node_id);
+ elt++;
+ }
+
+ if (trace->trace_hdr.ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ (adj->rewrite_header.sw_if_index & 0xFFFF);
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+
+ if (trace->trace_hdr.ioam_trace_type & BIT_TIMESTAMP)
+ {
+ /* Send least significant 32 bits */
+ f64 time_f64 =
+ (f64) (((f64) hm->unix_time_0) +
+ (vlib_time_now (hm->vlib_main) - hm->vlib_time_0));
+
+ time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp];
+ *elt = clib_host_to_net_u32 (time_u64.as_u32[0]);
+ elt++;
+ }
+
+ if (trace->trace_hdr.ioam_trace_type & BIT_APPDATA)
+ {
+ /* $$$ set elt0->app_data */
+ *elt = clib_host_to_net_u32 (profile->app_data);
+ elt++;
+ }
+
+
+ if (PREDICT_FALSE (trace->trace_hdr.ioam_trace_type & BIT_LOOPBACK))
+ {
+ /* if loopback flag set then copy the packet
+ * and send it back to source */
+ ip6_hbh_ioam_loopback_handler (b, ip, trace);
+ }
+
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_UPDATED, 1);
+ }
+ else
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_FULL, 1);
+ }
+ return (rv);
+}
+
+u8 *
+ip6_hbh_ioam_trace_data_list_trace_handler (u8 * s,
+ ip6_hop_by_hop_option_t * opt)
+{
+ ioam_trace_option_t *trace;
+ u8 trace_data_size_in_words = 0;
+ u32 *elt;
+ int elt_index = 0;
+
+ trace = (ioam_trace_option_t *) opt;
+ s =
+ format (s, " Trace Type 0x%x , %d elts left\n",
+ trace->trace_hdr.ioam_trace_type,
+ trace->trace_hdr.data_list_elts_left);
+ trace_data_size_in_words =
+ fetch_trace_data_size (trace->trace_hdr.ioam_trace_type) / 4;
+ elt = &trace->trace_hdr.elts[0];
+ while ((u8 *) elt <
+ ((u8 *) (&trace->trace_hdr.elts[0]) + trace->hdr.length - 2
+ /* -2 accounts for ioam_trace_type,elts_left */ ))
+ {
+ s = format (s, " [%d] %U\n", elt_index,
+ format_ioam_data_list_element,
+ elt, &trace->trace_hdr.ioam_trace_type);
+ elt_index++;
+ elt += trace_data_size_in_words;
+ }
+ return (s);
+}
+
+
+static clib_error_t *
+ip6_show_ioam_trace_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for (i = 0; i < IP6_IOAM_TRACE_N_STATS; i++)
+ {
+ s =
+ format (s, " %s - %lu\n", ip6_hop_by_hop_ioam_trace_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = {
+ .path = "show ioam trace",
+ .short_help = "iOAM trace statistics",
+ .function = ip6_show_ioam_trace_cmd_fn,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Inbound OAM",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ return (error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ memset (hm->counters, 0, sizeof (hm->counters));
+
+
+ if (ip6_hbh_register_option
+ (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST,
+ ip6_hbh_ioam_trace_data_list_handler,
+ ip6_hbh_ioam_trace_data_list_trace_handler) < 0)
+ return (clib_error_create
+ ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST failed"));
+
+
+ if (ip6_hbh_add_register_option (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST,
+ sizeof (ioam_trace_option_t),
+ ip6_hop_by_hop_ioam_trace_rewrite_handler)
+ < 0)
+ return (clib_error_create
+ ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST for rewrite failed"));
+
+
+ return (0);
+}
+
+int
+ip6_trace_profile_cleanup (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = 0;
+
+ return 0;
+
+}
+
+
+int
+ip6_trace_profile_setup (void)
+{
+ u32 trace_size = 0;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+
+ if (ip6_ioam_trace_get_sizeof_handler (&trace_size) < 0)
+ return (-1);
+
+ hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = trace_size;
+
+ return (0);
+}
+
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_trace_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.h b/src/plugins/ioam/encap/ip6_ioam_trace.h
new file mode 100644
index 00000000..4eda6110
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * trace_util.h -- Trace Profile Utility header
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_TRACE_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_TRACE_H_
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <ioam/lib-trace/trace_util.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ ioam_trace_hdr_t trace_hdr;
+}) ioam_trace_option_t;
+/* *INDENT-ON* */
+
+always_inline void
+ip6_hbh_ioam_trace_set_bit (ioam_trace_option_t * trace, u8 trace_bit)
+{
+ ioam_trace_set_bit (&trace->trace_hdr, trace_bit);
+}
+
+always_inline void
+ip6_hbh_ioam_trace_reset_bit (ioam_trace_option_t * trace, u8 trace_bit)
+{
+ ioam_trace_reset_bit (&trace->trace_hdr, trace_bit);
+}
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_TRACE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
new file mode 100644
index 00000000..9de0d13b
--- /dev/null
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -0,0 +1,634 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ioam_export_h__
+#define __included_ioam_export_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/udp/udp.h>
+#include <vnet/flow/ipfix_packet.h>
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vlib/threads.h>
+
+typedef struct ioam_export_buffer
+{
+ /* Allocated buffer */
+ u32 buffer_index;
+ u64 touched_at;
+ u8 records_in_this_buffer;
+} ioam_export_buffer_t;
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u16 set_id;
+
+ /* TODO: to support multiple collectors all this has to be grouped and create a vector here */
+ u8 *record_header;
+ u32 sequence_number;
+ u32 domain_id;
+
+ /* ipfix collector, our ip address */
+ ip4_address_t ipfix_collector;
+ ip4_address_t src_address;
+
+ /* Pool of ioam_export_buffer_t */
+ ioam_export_buffer_t *buffer_pool;
+ /* Vector of per thread ioam_export_buffer_t to buffer pool index */
+ u32 *buffer_per_thread;
+ /* Lock per thread to swap buffers between worker and timer process */
+ volatile u32 **lockp;
+
+ /* time scale transform */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+ u32 next_node_index;
+
+ uword my_hbh_slot;
+ u32 export_process_node_index;
+} ioam_export_main_t;
+
+
+#define DEFAULT_EXPORT_SIZE (3 * CLIB_CACHE_LINE_BYTES)
+/*
+ * Number of records in a buffer
+ * ~(MTU (1500) - [ip hdr(40) + UDP(8) + ipfix (24)]) / DEFAULT_EXPORT_SIZE
+ */
+#define DEFAULT_EXPORT_RECORDS 7
+
+inline static void
+ioam_export_set_next_node (ioam_export_main_t * em, u8 * next_node_name)
+{
+ vlib_node_t *next_node;
+
+ next_node = vlib_get_node_by_name (em->vlib_main, next_node_name);
+ em->next_node_index = next_node->index;
+}
+
+inline static void
+ioam_export_reset_next_node (ioam_export_main_t * em)
+{
+ vlib_node_t *next_node;
+
+ next_node = vlib_get_node_by_name (em->vlib_main, (u8 *) "ip4-lookup");
+ em->next_node_index = next_node->index;
+}
+
+always_inline ioam_export_buffer_t *
+ioam_export_get_my_buffer (ioam_export_main_t * em, u32 thread_id)
+{
+
+ if (vec_len (em->buffer_per_thread) > thread_id)
+ return (pool_elt_at_index
+ (em->buffer_pool, em->buffer_per_thread[thread_id]));
+ return (0);
+}
+
+inline static int
+ioam_export_buffer_add_header (ioam_export_main_t * em, vlib_buffer_t * b0)
+{
+ clib_memcpy (b0->data, em->record_header, vec_len (em->record_header));
+ b0->current_data = 0;
+ b0->current_length = vec_len (em->record_header);
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return (1);
+}
+
+inline static int
+ioam_export_init_buffer (ioam_export_main_t * em, vlib_main_t * vm,
+ ioam_export_buffer_t * eb)
+{
+ vlib_buffer_t *b = 0;
+
+ if (!eb)
+ return (-1);
+ /* TODO: Perhaps buffer init from template here */
+ if (vlib_buffer_alloc (vm, &(eb->buffer_index), 1) != 1)
+ return (-2);
+ eb->records_in_this_buffer = 0;
+ eb->touched_at = vlib_time_now (vm);
+ b = vlib_get_buffer (vm, eb->buffer_index);
+ (void) ioam_export_buffer_add_header (em, b);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ return (1);
+}
+
+inline static void
+ioam_export_thread_buffer_free (ioam_export_main_t * em)
+{
+ vlib_main_t *vm = em->vlib_main;
+ ioam_export_buffer_t *eb = 0;
+ int i;
+ for (i = 0; i < vec_len (em->buffer_per_thread); i++)
+ {
+ eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]);
+ if (eb)
+ vlib_buffer_free (vm, &(eb->buffer_index), 1);
+ }
+ for (i = 0; i < vec_len (em->lockp); i++)
+ clib_mem_free ((void *) em->lockp[i]);
+ vec_free (em->buffer_per_thread);
+ pool_free (em->buffer_pool);
+ vec_free (em->lockp);
+ em->buffer_per_thread = 0;
+ em->buffer_pool = 0;
+ em->lockp = 0;
+}
+
+inline static int
+ioam_export_thread_buffer_init (ioam_export_main_t * em, vlib_main_t * vm)
+{
+ int no_of_threads = vec_len (vlib_worker_threads);
+ int i;
+ ioam_export_buffer_t *eb = 0;
+
+ pool_alloc_aligned (em->buffer_pool,
+ no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (em->buffer_per_thread,
+ no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (em->lockp, no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+
+ if (!em->buffer_per_thread || !em->buffer_pool || !em->lockp)
+ {
+ return (-1);
+ }
+ for (i = 0; i < no_of_threads; i++)
+ {
+ eb = 0;
+ pool_get_aligned (em->buffer_pool, eb, CLIB_CACHE_LINE_BYTES);
+ memset (eb, 0, sizeof (*eb));
+ em->buffer_per_thread[i] = eb - em->buffer_pool;
+ if (ioam_export_init_buffer (em, vm, eb) != 1)
+ {
+ ioam_export_thread_buffer_free (em);
+ return (-2);
+ }
+ em->lockp[i] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ memset ((void *) em->lockp[i], 0, CLIB_CACHE_LINE_BYTES);
+ }
+ return (1);
+}
+
+#define IPFIX_IOAM_EXPORT_ID 272
+#define IPFIX_VXLAN_IOAM_EXPORT_ID 273
+
+/* Used to build the rewrite */
+/* data set packet */
+typedef struct
+{
+ ipfix_message_header_t h;
+ ipfix_set_header_t s;
+} ipfix_data_packet_t;
+
+typedef struct
+{
+ ip4_header_t ip4;
+ udp_header_t udp;
+ ipfix_data_packet_t ipfix;
+} ip4_ipfix_data_packet_t;
+
+
+inline static void
+ioam_export_header_cleanup (ioam_export_main_t * em,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vec_free (em->record_header);
+ em->record_header = 0;
+}
+
+inline static int
+ioam_export_header_create (ioam_export_main_t * em,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ u8 *rewrite = 0;
+ ip4_ipfix_data_packet_t *tp;
+
+
+ /* allocate rewrite space */
+ vec_validate_aligned (rewrite,
+ sizeof (ip4_ipfix_data_packet_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_data_packet_t *) rewrite;
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
+ /* FIXUP: UDP length */
+ udp->length = clib_host_to_net_u16 (vec_len (rewrite) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE) - sizeof (*ip));
+
+ /* FIXUP: message header export_time */
+ /* FIXUP: message header sequence_number */
+ h->domain_id = clib_host_to_net_u32 (em->domain_id);
+
+ /*FIXUP: Setid length in octets if records exported are not default */
+ s->set_id_length = ipfix_set_id_length (em->set_id,
+ (sizeof (*s) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE)));
+
+ /* FIXUP: h version and length length in octets if records exported are not default */
+ h->version_length = version_length (sizeof (*h) +
+ (sizeof (*s) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE)));
+
+ /* FIXUP: ip length if records exported are not default */
+ /* FIXUP: ip checksum if records exported are not default */
+ ip->length = clib_host_to_net_u16 (vec_len (rewrite) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE));
+ ip->checksum = ip4_header_checksum (ip);
+ _vec_len (rewrite) = sizeof (ip4_ipfix_data_packet_t);
+ em->record_header = rewrite;
+ return (1);
+}
+
+inline static int
+ioam_export_send_buffer (ioam_export_main_t * em, vlib_main_t * vm,
+ ioam_export_buffer_t * eb)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ ip4_ipfix_data_packet_t *tp;
+ vlib_buffer_t *b0;
+ u16 new_l0, old_l0;
+ ip_csum_t sum0;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ b0 = vlib_get_buffer (vm, eb->buffer_index);
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ /* FIXUP: message header export_time */
+ h->export_time = clib_host_to_net_u32 ((u32)
+ (((f64) em->unix_time_0) +
+ (vlib_time_now (em->vlib_main) -
+ em->vlib_time_0)));
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = clib_host_to_net_u32 (em->sequence_number++);
+
+ /* FIXUP: lengths if different from default */
+ if (PREDICT_FALSE (eb->records_in_this_buffer != DEFAULT_EXPORT_RECORDS))
+ {
+ s->set_id_length = ipfix_set_id_length (em->set_id /* set_id */ ,
+ b0->current_length -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length =
+ version_length (b0->current_length - (sizeof (*ip) + sizeof (*udp)));
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16) b0->current_length);
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+ }
+
+ /* Enqueue pkts to ip4-lookup */
+
+ nf = vlib_get_frame_to_node (vm, em->next_node_index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+ nf->n_vectors = 1;
+ to_next[0] = eb->buffer_index;
+ vlib_put_frame_to_node (vm, em->next_node_index, nf);
+ return (1);
+
+}
+
+#define EXPORT_TIMEOUT (20.0)
+#define THREAD_PERIOD (30.0)
+inline static uword
+ioam_export_process_common (ioam_export_main_t * em, vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f,
+ u32 index)
+{
+ f64 now;
+ f64 timeout = 30.0;
+ uword event_type;
+ uword *event_data = 0;
+ int i;
+ ioam_export_buffer_t *eb = 0, *new_eb = 0;
+ u32 *vec_buffer_indices = 0;
+ u32 *vec_buffer_to_be_sent = 0;
+ u32 *thread_index = 0;
+ u32 new_pool_index = 0;
+
+ em->export_process_node_index = index;
+ /* Wait for Godot... */
+ vlib_process_wait_for_event_or_clock (vm, 1e9);
+ event_type = vlib_process_get_events (vm, &event_data);
+ if (event_type != 1)
+ clib_warning ("bogus kickoff event received, %d", event_type);
+ vec_reset_length (event_data);
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 2: /* Stop and Wait for kickoff again */
+ timeout = 1e9;
+ break;
+ case 1: /* kickoff : Check for unsent buffers */
+ timeout = THREAD_PERIOD;
+ break;
+ case ~0: /* timeout */
+ break;
+ }
+ vec_reset_length (event_data);
+ now = vlib_time_now (vm);
+ /*
+ * Create buffers for threads that are not active enough
+ * to send out the export records
+ */
+ for (i = 0; i < vec_len (em->buffer_per_thread); i++)
+ {
+ /* If the worker thread is processing export records ignore further checks */
+ if (*em->lockp[i] == 1)
+ continue;
+ eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]);
+ if (eb->records_in_this_buffer > 0
+ && now > (eb->touched_at + EXPORT_TIMEOUT))
+ {
+ pool_get_aligned (em->buffer_pool, new_eb,
+ CLIB_CACHE_LINE_BYTES);
+ memset (new_eb, 0, sizeof (*new_eb));
+ if (ioam_export_init_buffer (em, vm, new_eb) == 1)
+ {
+ new_pool_index = new_eb - em->buffer_pool;
+ vec_add (vec_buffer_indices, &new_pool_index, 1);
+ vec_add (vec_buffer_to_be_sent, &em->buffer_per_thread[i],
+ 1);
+ vec_add (thread_index, &i, 1);
+ }
+ else
+ {
+ pool_put (em->buffer_pool, new_eb);
+ /*Give up */
+ goto CLEANUP;
+ }
+ }
+ }
+ if (vec_len (thread_index) != 0)
+ {
+ /*
+ * Now swap the buffers out
+ */
+ for (i = 0; i < vec_len (thread_index); i++)
+ {
+ while (__sync_lock_test_and_set (em->lockp[thread_index[i]], 1))
+ ;
+ em->buffer_per_thread[thread_index[i]] =
+ vec_pop (vec_buffer_indices);
+ *em->lockp[thread_index[i]] = 0;
+ }
+
+ /* Send the buffers */
+ for (i = 0; i < vec_len (vec_buffer_to_be_sent); i++)
+ {
+ eb =
+ pool_elt_at_index (em->buffer_pool, vec_buffer_to_be_sent[i]);
+ ioam_export_send_buffer (em, vm, eb);
+ pool_put (em->buffer_pool, eb);
+ }
+ }
+
+ CLEANUP:
+ /* Free any leftover/unused buffers and everything that was allocated */
+ for (i = 0; i < vec_len (vec_buffer_indices); i++)
+ {
+ new_eb = pool_elt_at_index (em->buffer_pool, vec_buffer_indices[i]);
+ vlib_buffer_free (vm, &new_eb->buffer_index, 1);
+ pool_put (em->buffer_pool, new_eb);
+ }
+ vec_free (vec_buffer_indices);
+ vec_free (vec_buffer_to_be_sent);
+ vec_free (thread_index);
+ }
+ return 0; /* not so much */
+}
+
+#define ioam_export_node_common(EM, VM, N, F, HTYPE, L, V, NEXT, FIXUP_FUNC) \
+do { \
+ u32 n_left_from, *from, *to_next; \
+ export_next_t next_index; \
+ u32 pkts_recorded = 0; \
+ ioam_export_buffer_t *my_buf = 0; \
+ vlib_buffer_t *eb0 = 0; \
+ u32 ebi0 = 0; \
+ from = vlib_frame_vector_args (F); \
+ n_left_from = (F)->n_vectors; \
+ next_index = (N)->cached_next_index; \
+ while (__sync_lock_test_and_set ((EM)->lockp[(VM)->thread_index], 1)); \
+ my_buf = ioam_export_get_my_buffer (EM, (VM)->thread_index); \
+ my_buf->touched_at = vlib_time_now (VM); \
+ while (n_left_from > 0) \
+ { \
+ u32 n_left_to_next; \
+ vlib_get_next_frame (VM, N, next_index, to_next, n_left_to_next); \
+ while (n_left_from >= 4 && n_left_to_next >= 2) \
+ { \
+ u32 next0 = NEXT; \
+ u32 next1 = NEXT; \
+ u32 bi0, bi1; \
+ HTYPE *ip0, *ip1; \
+ vlib_buffer_t *p0, *p1; \
+ u32 ip_len0, ip_len1; \
+ { \
+ vlib_buffer_t *p2, *p3; \
+ p2 = vlib_get_buffer (VM, from[2]); \
+ p3 = vlib_get_buffer (VM, from[3]); \
+ vlib_prefetch_buffer_header (p2, LOAD); \
+ vlib_prefetch_buffer_header (p3, LOAD); \
+ CLIB_PREFETCH (p2->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \
+ CLIB_PREFETCH (p3->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \
+ } \
+ to_next[0] = bi0 = from[0]; \
+ to_next[1] = bi1 = from[1]; \
+ from += 2; \
+ to_next += 2; \
+ n_left_from -= 2; \
+ n_left_to_next -= 2; \
+ p0 = vlib_get_buffer (VM, bi0); \
+ p1 = vlib_get_buffer (VM, bi1); \
+ ip0 = vlib_buffer_get_current (p0); \
+ ip1 = vlib_buffer_get_current (p1); \
+ ip_len0 = \
+ clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \
+ ip_len1 = \
+ clib_net_to_host_u16 (ip1->L) + sizeof (HTYPE); \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER1; \
+ ip_len0 = \
+ ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \
+ ip_len1 = \
+ ip_len1 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len1; \
+ copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \
+ FIXUP_FUNC(eb0, p0); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER1; \
+ copy3cachelines (eb0->data + eb0->current_length, ip1, ip_len1); \
+ FIXUP_FUNC(eb0, p1); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ pkts_recorded += 2; \
+ if (PREDICT_FALSE (((node)->flags & VLIB_NODE_FLAG_TRACE))) \
+ { \
+ if (p0->flags & VLIB_BUFFER_IS_TRACED) \
+ { \
+ export_trace_t *t = \
+ vlib_add_trace (VM, node, p0, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip0->V); \
+ t->next_index = next0; \
+ } \
+ if (p1->flags & VLIB_BUFFER_IS_TRACED) \
+ { \
+ export_trace_t *t = \
+ vlib_add_trace (VM, N, p1, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip1->V); \
+ t->next_index = next1; \
+ } \
+ } \
+ NO_BUFFER1: \
+ vlib_validate_buffer_enqueue_x2 (VM, N, next_index, \
+ to_next, n_left_to_next, \
+ bi0, bi1, next0, next1); \
+ } \
+ while (n_left_from > 0 && n_left_to_next > 0) \
+ { \
+ u32 bi0; \
+ vlib_buffer_t *p0; \
+ u32 next0 = NEXT; \
+ HTYPE *ip0; \
+ u32 ip_len0; \
+ bi0 = from[0]; \
+ to_next[0] = bi0; \
+ from += 1; \
+ to_next += 1; \
+ n_left_from -= 1; \
+ n_left_to_next -= 1; \
+ p0 = vlib_get_buffer (VM, bi0); \
+ ip0 = vlib_buffer_get_current (p0); \
+ ip_len0 = \
+ clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER; \
+ ip_len0 = \
+ ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \
+ copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \
+ FIXUP_FUNC(eb0, p0); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ if (PREDICT_FALSE (((N)->flags & VLIB_NODE_FLAG_TRACE) \
+ && (p0->flags & VLIB_BUFFER_IS_TRACED))) \
+ { \
+ export_trace_t *t = vlib_add_trace (VM, (N), p0, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip0->V); \
+ t->next_index = next0; \
+ } \
+ pkts_recorded += 1; \
+ NO_BUFFER: \
+ vlib_validate_buffer_enqueue_x1 (VM, N, next_index, \
+ to_next, n_left_to_next, \
+ bi0, next0); \
+ } \
+ vlib_put_next_frame (VM, N, next_index, n_left_to_next); \
+ } \
+ vlib_node_increment_counter (VM, export_node.index, \
+ EXPORT_ERROR_RECORDED, pkts_recorded); \
+ *(EM)->lockp[(VM)->thread_index] = 0; \
+} while(0)
+
+#endif /* __included_ioam_export_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
new file mode 100644
index 00000000..caa97e6e
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
@@ -0,0 +1,34 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+autoreply define vxlan_gpe_ioam_export_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_disable;
+
+ /* Collector ip address */
+ u8 collector_address[4];
+ u8 src_address[4];
+
+ /* Src ip address */
+};
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
new file mode 100644
index 00000000..ec43e484
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_ioam_export.c - ioam export API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/export-common/ioam_export.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* define message IDs */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+#define foreach_vxlan_gpe_ioam_export_plugin_api_msg \
+_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE, vxlan_gpe_ioam_export_enable_disable)
+
+ioam_export_main_t vxlan_gpe_ioam_export_main;
+extern vlib_node_registration_t vxlan_export_node;
+
+extern void vxlan_gpe_set_next_override (uword next);
+/* Action function shared between message handler and debug CLI */
+int
+vxlan_gpe_ioam_export_enable_disable (ioam_export_main_t * em,
+ u8 is_disable,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vlib_main_t *vm = em->vlib_main;
+ u32 node_index = vxlan_export_node.index;
+ vlib_node_t *vxlan_gpe_decap_ioam_node = NULL;
+
+ if (is_disable == 0)
+ {
+ if (em->my_hbh_slot == ~0)
+ {
+ /* Hook this export node to vxlan-gpe-decap-ioam-v4 */
+ vxlan_gpe_decap_ioam_node =
+ vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-decap-ioam-v4");
+ if (!vxlan_gpe_decap_ioam_node)
+ {
+ /* node does not exist give up */
+ return (-1);
+ }
+ em->my_hbh_slot =
+ vlib_node_add_next (vm, vxlan_gpe_decap_ioam_node->index,
+ node_index);
+ }
+ if (1 == ioam_export_header_create (em, collector_address, src_address))
+ {
+ ioam_export_thread_buffer_init (em, vm);
+ vxlan_gpe_set_next_override (em->my_hbh_slot);
+ /* Turn on the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 1, 0);
+
+ }
+ else
+ {
+ return (-2);
+ }
+ }
+ else
+ {
+ vxlan_gpe_set_next_override (VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP);
+ ioam_export_header_cleanup (em, collector_address, src_address);
+ ioam_export_thread_buffer_free (em);
+ /* Turn off the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 2, 0);
+
+ }
+
+ return 0;
+}
+
+/* API message handler */
+static void vl_api_vxlan_gpe_ioam_export_enable_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_export_enable_disable_t * mp)
+{
+ vl_api_vxlan_gpe_ioam_export_enable_disable_reply_t *rmp;
+ ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main;
+ int rv;
+
+ rv = vxlan_gpe_ioam_export_enable_disable (sm, (int) (mp->is_disable),
+ (ip4_address_t *)
+ mp->collector_address,
+ (ip4_address_t *)
+ mp->src_address);
+
+ REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY);
+} /* API message handler */
+
+
+
+/* Set up the API message handling tables */
+static clib_error_t *
+vxlan_gpe_ioam_export_plugin_api_hookup (vlib_main_t * vm)
+{
+ ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vxlan_gpe_ioam_export_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (ioam_export_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_vxlan_gpe_ioam_export;
+#undef _
+}
+
+
+static clib_error_t *
+set_vxlan_gpe_ioam_export_ipfix_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ ip4_address_t collector, src;
+ u8 is_disable = 0;
+
+ collector.as_u32 = 0;
+ src.as_u32 = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ if (collector.as_u32 == 0)
+ return clib_error_return (0, "collector address required");
+
+ if (src.as_u32 == 0)
+ return clib_error_return (0, "src address required");
+
+ em->ipfix_collector.as_u32 = collector.as_u32;
+ em->src_address.as_u32 = src.as_u32;
+
+ vlib_cli_output (vm, "Collector %U, src address %U",
+ format_ip4_address, &em->ipfix_collector,
+ format_ip4_address, &em->src_address);
+
+ /* Turn on the export timer process */
+ // vlib_process_signal_event (vm, flow_report_process_node.index,
+ //1, 0);
+ if (0 !=
+ vxlan_gpe_ioam_export_enable_disable (em, is_disable, &collector, &src))
+ {
+ return clib_error_return (0, "Unable to set ioam vxlan-gpe export");
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_vxlan_gpe_ioam_ipfix_command, static) =
+{
+.path = "set vxlan-gpe-ioam export ipfix",
+.short_help = "set vxlan-gpe-ioam export ipfix collector <ip4-address> src <ip4-address>",
+.function = set_vxlan_gpe_ioam_export_ipfix_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+vxlan_gpe_ioam_export_init (vlib_main_t * vm)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ clib_error_t *error = 0;
+ u8 *name;
+
+ em->set_id = IPFIX_VXLAN_IOAM_EXPORT_ID;
+
+ name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ em->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+ em->unix_time_0 = (u32) time (0); /* Store starting time */
+ em->vlib_time_0 = vlib_time_now (vm);
+
+ error = vxlan_gpe_ioam_export_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (em, &api_main);
+
+ em->my_hbh_slot = ~0;
+ em->vlib_main = vm;
+ em->vnet_main = vnet_get_main ();
+ ioam_export_reset_next_node (em);
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_ioam_export_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h
new file mode 100644
index 00000000..6d93f093
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h>
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h
new file mode 100644
index 00000000..cc5698de
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_ioam_export_msg_enum_h
+#define included_vxlan_gpe_ioam_export_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vxlan_gpe_ioam_export_msg_enum_h */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c
new file mode 100644
index 00000000..17d31c95
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_ioam_export_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base export_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} export_test_main_t;
+
+export_test_main_t export_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(vxlan_gpe_ioam_export_enable_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = export_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY, vxlan_gpe_ioam_export_enable_disable_reply)
+
+static int
+api_vxlan_gpe_ioam_export_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ int is_disable = 0;
+ vl_api_vxlan_gpe_ioam_export_enable_disable_t *mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE, mp);
+ mp->is_disable = is_disable;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(vxlan_gpe_ioam_export_enable_disable, "<intfc> [disable]")
+
+static void
+vxlan_gpe_ioam_vat_api_hookup (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vxlan_gpe_ioam_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
new file mode 100644
index 00000000..618278c6
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_export_thread.c
+ */
+#include <vnet/api_errno.h>
+#include <vppinfra/pool.h>
+#include <ioam/export-common/ioam_export.h>
+
+static vlib_node_registration_t vxlan_gpe_ioam_export_process_node;
+extern ioam_export_main_t vxlan_gpe_ioam_export_main;
+
+static uword
+vxlan_gpe_ioam_export_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return (ioam_export_process_common (&vxlan_gpe_ioam_export_main,
+ vm, rt, f,
+ vxlan_gpe_ioam_export_process_node.index));
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_ioam_export_process_node, static) =
+{
+ .function = vxlan_gpe_ioam_export_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vxlan-gpe-ioam-export-process",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
new file mode 100644
index 00000000..1395413a
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <ioam/export-common/ioam_export.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} export_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_export_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ export_trace_t *t = va_arg (*args, export_trace_t *);
+
+ s = format (s, "EXPORT: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t vxlan_export_node;
+extern vlib_node_registration_t export_node;
+extern ioam_export_main_t vxlan_gpe_ioam_export_main;
+
+#define foreach_export_error \
+_(RECORDED, "Packets recorded for export")
+
+typedef enum
+{
+#define _(sym,str) EXPORT_ERROR_##sym,
+ foreach_export_error
+#undef _
+ EXPORT_N_ERROR,
+} export_error_t;
+
+static char *export_error_strings[] = {
+#define _(sym,string) string,
+ foreach_export_error
+#undef _
+};
+
+typedef enum
+{
+ EXPORT_NEXT_VXLAN_GPE_INPUT,
+ EXPORT_N_NEXT,
+} export_next_t;
+
+always_inline void
+copy3cachelines (void *dst, const void *src, size_t n)
+{
+#if 0
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ /* Copy only the first 1/2 cache lines whatever is available */
+ if (n >= 64)
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ if (n >= 128)
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ return;
+ }
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128);
+#endif
+#if 1
+
+ u64 *copy_dst, *copy_src;
+ int i;
+ copy_dst = (u64 *) dst;
+ copy_src = (u64 *) src;
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ for (i = 0; i < n / 64; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+ return;
+ }
+ for (i = 0; i < 3; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+#endif
+}
+
+static void
+vxlan_gpe_export_fixup_func (vlib_buffer_t * export_buf,
+ vlib_buffer_t * pak_buf)
+{
+ /* Todo: on implementing VXLAN GPE analyse */
+}
+
+static uword
+vxlan_gpe_export_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ ioam_export_node_common (em, vm, node, frame, ip4_header_t, length,
+ ip_version_and_header_length,
+ EXPORT_NEXT_VXLAN_GPE_INPUT,
+ vxlan_gpe_export_fixup_func);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for VXLAN-GPE export
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_export_node) =
+{
+ .function = vxlan_gpe_export_node_fn,
+ .name = "vxlan-gpe-ioam-export",
+ .vector_size = sizeof (u32),
+ .format_trace = format_export_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (export_error_strings),
+ .error_strings = export_error_strings,
+ .n_next_nodes = EXPORT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {[EXPORT_NEXT_VXLAN_GPE_INPUT] = "vxlan-gpe-pop-ioam-v4"},
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api
new file mode 100644
index 00000000..bb830561
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export.api
@@ -0,0 +1,34 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+autoreply define ioam_export_ip6_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_disable;
+
+ /* Collector ip address */
+ u8 collector_address[4];
+ u8 src_address[4];
+
+ /* Src ip address */
+};
diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c
new file mode 100644
index 00000000..46ac3d4a
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export.c - ioam export API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/export-common/ioam_export.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+
+/* define message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+#define foreach_ioam_export_plugin_api_msg \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable)
+
+ioam_export_main_t ioam_export_main;
+
+extern vlib_node_registration_t export_node;
+
+/* Action function shared between message handler and debug CLI */
+
+int
+ioam_export_ip6_enable_disable (ioam_export_main_t * em,
+ u8 is_disable,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vlib_main_t *vm = em->vlib_main;
+
+ if (is_disable == 0)
+ {
+ if (1 == ioam_export_header_create (em, collector_address, src_address))
+ {
+ ioam_export_thread_buffer_init (em, vm);
+ ip6_hbh_set_next_override (em->my_hbh_slot);
+ /* Turn on the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 1, 0);
+
+ }
+ else
+ {
+ return (-2);
+ }
+ }
+ else
+ {
+ ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+ ioam_export_header_cleanup (em, collector_address, src_address);
+ ioam_export_thread_buffer_free (em);
+ /* Turn off the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 2, 0);
+
+ }
+
+ return 0;
+}
+
+/* API message handler */
+static void vl_api_ioam_export_ip6_enable_disable_t_handler
+ (vl_api_ioam_export_ip6_enable_disable_t * mp)
+{
+ vl_api_ioam_export_ip6_enable_disable_reply_t *rmp;
+ ioam_export_main_t *sm = &ioam_export_main;
+ int rv;
+
+ rv = ioam_export_ip6_enable_disable (sm, (int) (mp->is_disable),
+ (ip4_address_t *)
+ mp->collector_address,
+ (ip4_address_t *) mp->src_address);
+
+ REPLY_MACRO (VL_API_IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+ioam_export_plugin_api_hookup (vlib_main_t * vm)
+{
+ ioam_export_main_t *sm = &ioam_export_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ioam_export_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (ioam_export_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_ioam_export;
+#undef _
+}
+
+static clib_error_t *
+set_ioam_export_ipfix_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ ip4_address_t collector, src;
+ u8 is_disable = 0;
+
+ collector.as_u32 = 0;
+ src.as_u32 = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ if (collector.as_u32 == 0)
+ return clib_error_return (0, "collector address required");
+
+ if (src.as_u32 == 0)
+ return clib_error_return (0, "src address required");
+
+ em->ipfix_collector.as_u32 = collector.as_u32;
+ em->src_address.as_u32 = src.as_u32;
+
+ vlib_cli_output (vm, "Collector %U, src address %U",
+ format_ip4_address, &em->ipfix_collector,
+ format_ip4_address, &em->src_address);
+
+ /* Turn on the export timer process */
+ // vlib_process_signal_event (vm, flow_report_process_node.index,
+ //1, 0);
+ ioam_export_ip6_enable_disable (em, is_disable, &collector, &src);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ipfix_command, static) =
+{
+.path = "set ioam export ipfix",.short_help =
+ "set ioam export ipfix collector <ip4-address> src <ip4-address>",.
+ function = set_ioam_export_ipfix_command_fn,};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ioam_export_init (vlib_main_t * vm)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 node_index = export_node.index;
+ vlib_node_t *ip6_hbyh_node = NULL;
+
+ em->vlib_main = vm;
+ em->vnet_main = vnet_get_main ();
+ em->set_id = IPFIX_IOAM_EXPORT_ID;
+ ioam_export_reset_next_node (em);
+
+ name = format (0, "ioam_export_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ em->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+ em->unix_time_0 = (u32) time (0); /* Store starting time */
+ em->vlib_time_0 = vlib_time_now (vm);
+
+ error = ioam_export_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (em, &api_main);
+
+ /* Hook this export node to ip6-hop-by-hop */
+ ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop");
+ em->my_hbh_slot = vlib_node_add_next (vm, ip6_hbyh_node->index, node_index);
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ioam_export_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export/ioam_export_all_api_h.h b/src/plugins/ioam/export/ioam_export_all_api_h.h
new file mode 100644
index 00000000..bc4368f2
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/export/ioam_export.api.h>
diff --git a/src/plugins/ioam/export/ioam_export_msg_enum.h b/src/plugins/ioam/export/ioam_export_msg_enum.h
new file mode 100644
index 00000000..c2de7988
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_ioam_export_msg_enum_h
+#define included_ioam_export_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/export/ioam_export_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_ioam_export_msg_enum_h */
diff --git a/src/plugins/ioam/export/ioam_export_test.c b/src/plugins/ioam/export/ioam_export_test.c
new file mode 100644
index 00000000..5023afd7
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_test.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base export_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+
+/* Declare message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} export_test_main_t;
+
+export_test_main_t export_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(ioam_export_ip6_enable_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = export_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY, ioam_export_ip6_enable_disable_reply)
+
+
+static int
+api_ioam_export_ip6_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ int is_disable = 0;
+ vl_api_ioam_export_ip6_enable_disable_t *mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(IOAM_EXPORT_IP6_ENABLE_DISABLE, mp);
+ mp->is_disable = is_disable;
+
+ /* send it... */
+ S(mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(ioam_export_ip6_enable_disable, "<intfc> [disable]")
+
+static void
+ioam_export_vat_api_hookup (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_export_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ ioam_export_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
diff --git a/src/plugins/ioam/export/ioam_export_thread.c b/src/plugins/ioam/export/ioam_export_thread.c
new file mode 100644
index 00000000..5f1d9643
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_thread.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_export_thread.c
+ */
+#include <vnet/api_errno.h>
+#include <vppinfra/pool.h>
+#include <ioam/export-common/ioam_export.h>
+
+static vlib_node_registration_t ioam_export_process_node;
+extern ioam_export_main_t ioam_export_main;
+
+static uword
+ioam_export_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return (ioam_export_process_common(&ioam_export_main,
+ vm, rt, f,
+ ioam_export_process_node.index));
+}
+
+VLIB_REGISTER_NODE (ioam_export_process_node, static) =
+{
+ .function = ioam_export_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ioam-export-process",
+};
diff --git a/src/plugins/ioam/export/node.c b/src/plugins/ioam/export/node.c
new file mode 100644
index 00000000..9b61c902
--- /dev/null
+++ b/src/plugins/ioam/export/node.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/export-common/ioam_export.h>
+
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} export_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_export_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ export_trace_t *t = va_arg (*args, export_trace_t *);
+
+ s = format (s, "EXPORT: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t export_node;
+extern ioam_export_main_t ioam_export_main;
+
+#define foreach_export_error \
+_(RECORDED, "Packets recorded for export")
+
+typedef enum
+{
+#define _(sym,str) EXPORT_ERROR_##sym,
+ foreach_export_error
+#undef _
+ EXPORT_N_ERROR,
+} export_error_t;
+
+static char *export_error_strings[] = {
+#define _(sym,string) string,
+ foreach_export_error
+#undef _
+};
+
+typedef enum
+{
+ EXPORT_NEXT_POP_HBYH,
+ EXPORT_N_NEXT,
+} export_next_t;
+
+always_inline void
+copy3cachelines (void *dst, const void *src, size_t n)
+{
+#if 0
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ /* Copy only the first 1/2 cache lines whatever is available */
+ if (n >= 64)
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ if (n >= 128)
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ return;
+ }
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128);
+#endif
+#if 1
+
+ u64 *copy_dst, *copy_src;
+ int i;
+ copy_dst = (u64 *) dst;
+ copy_src = (u64 *) src;
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ for (i = 0; i < n / 64; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+ return;
+ }
+ for (i = 0; i < 3; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+#endif
+}
+
+static void
+ip6_export_fixup_func (vlib_buffer_t * export_buf, vlib_buffer_t * pak_buf)
+{
+ ip6_header_t *ip6_temp =
+ (ip6_header_t *) (export_buf->data + export_buf->current_length);
+ u32 flow_label_temp =
+ clib_net_to_host_u32(ip6_temp->ip_version_traffic_class_and_flow_label)
+ & 0xFFF00000;
+ flow_label_temp |=
+ IOAM_MASK_DECAP_BIT((vnet_buffer(pak_buf)->l2_classify.opaque_index));
+ ip6_temp->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32(flow_label_temp);
+}
+
+static uword
+ip6_export_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ ioam_export_node_common(em, vm, node, frame, ip6_header_t, payload_length,
+ ip_version_traffic_class_and_flow_label,
+ EXPORT_NEXT_POP_HBYH, ip6_export_fixup_func);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 export
+ */
+VLIB_REGISTER_NODE (export_node) =
+{
+ .function = ip6_export_node_fn,
+ .name = "ip6-export",
+ .vector_size = sizeof (u32),
+ .format_trace = format_export_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (export_error_strings),
+ .error_strings = export_error_strings,
+ .n_next_nodes = EXPORT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [EXPORT_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
+ },
+};
diff --git a/src/plugins/ioam/ioam_plugin_doc.md b/src/plugins/ioam/ioam_plugin_doc.md
new file mode 100644
index 00000000..343abcf7
--- /dev/null
+++ b/src/plugins/ioam/ioam_plugin_doc.md
@@ -0,0 +1,464 @@
+## VPP Inband OAM (iOAM) {#ioam_plugin_doc}
+
+In-band OAM (iOAM) is an implementation study to record operational
+information in the packet while the packet traverses a path between
+two points in the network.
+
+Overview of iOAM can be found in [iOAM-Devnet] page.
+The following IETF drafts detail the motivation and mechanism for
+recording operational information:
+ - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM
+ - [iOAM-ietf-data] - Describes data records that can be collected using iOAM
+ - [iOAM-ietf-transport] - Lists out the transport protocols
+ and mechanism to carry iOAM data records
+ - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT)
+ and mechanisms to operationalize the idea
+
+## Terminology
+In-band OAM is expected to be deployed in a specific domain rather
+than on the overall Internet. The part of the network which employs in-band OAM
+is referred to as **"in-band OAM-domain"**.
+
+In-band OAM data is added to a packet on entering the in-band OAM-domain
+and is removed from the packet when exiting the domain.
+Within the in-band OAM-domain, network nodes that the packet traverses
+may update the in-band OAM data records.
+
+- The node which adds in-band OAM data to the packet is called the
+**"in-band OAM encapsulating node"**.
+
+- The node which removes the in-band OAM data is referred to as the
+**"in-band OAM decapsulating node"**.
+
+- Nodes within the domain which are aware of in-band OAM data and read
+and/or write or process the in-band OAM data are called
+**"in-band OAM transit nodes"**.
+
+## Features supported in the current release
+VPP can function as in-band OAM encapsulating, transit and decapsulating node.
+In this version of VPP in-band OAM data is transported as options in an
+IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
+for IPv6 traffic.
+
+The following iOAM features are supported:
+
+- **In-band OAM Tracing** : In-band OAM supports multiple data records to be
+recorded in the packet as the packet traverses the network.
+These data records offer insights into the operational behavior of the network.
+The following information can be collected in the tracing
+data from the nodes a packet traverses:
+ - Node ID
+ - Ingress interface ID
+ - Egress interface ID
+ - Timestamp
+ - Pre-configured application data
+
+- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
+added to every packet for verifying that a packet traverses a specific
+set of nodes.
+In-band OAM data is updated at every node that is enabled with iOAM
+proof of transit and is used to verify whether a packet traversed
+all the specified nodes. When the verifier receives each packet,
+it can validate whether the packet traversed the specified nodes.
+
+
+## Configuration
+Configuring iOAM involves:
+- Selecting the packets for which iOAM data must be inserted, updated or removed
+ - Selection of packets for iOAM data insertion on iOAM encapsulating node.
+ Selection of packets is done by 5-tuple based classification
+ - Selection of packets for updating iOAM data is implicitly done on the
+ presence of iOAM options in the packet
+ - Selection of packets for removing the iOAM data is done on 5-tuple
+ based classification
+- The kind of data to be collected
+ - Tracing data
+ - Proof of transit
+- Additional details for processing iOAM data to be collected
+ - For trace data - trace type, number of nodes to be recorded in the trace,
+ time stamp precision, etc.
+ - For POT data - configuration of POT profile required to process the POT data
+
+The CLI for configuring iOAM is explained here followed by detailed steps
+and examples to deploy iOAM on VPP as an encapsulating, transit or
+decapsulating iOAM node in the subsequent sub-sections.
+
+VPP iOAM configuration for enabling trace and POT is as follows:
+
+ set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
+ trace-elts <number of trace elements> trace-tsp <0|1|2|3>
+ node-id <node ID in hex> app-data <application data in hex> [pot]
+
+A description of each of the options of the CLI follows:
+- trace-type : An entry in the "Node data List" array of the trace option
+can have different formats, following the needs of the a deployment.
+For example: Some deployments might only be interested
+in recording the node identifiers, whereas others might be interested
+in recording node identifier and timestamp.
+The following types are currently supported:
+ - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
+ ingress and egress interface IDs (16 bits each), timestamp (32 bits),
+ application data (32 bits)
+ - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits),
+ ingress and egress interface IDs (16 bits each)
+ - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits),
+ timestamp (32 bits)
+ - 0x11: Node data to include hop limit (8 bits), node ID (24 bits),
+ application data (32 bits)
+ - 0x19: Node data to include hop limit (8 bits), node ID (24 bits),
+ timestamp (32 bits), application data (32 bits)
+- trace-elts : Defines the length of the node data array in the trace option.
+- trace-tsp : Defines the timestamp precision to use with the enumerated value
+ for precision as follows:
+ - 0 : 32bits timestamp in seconds
+ - 1 : 32bits timestamp in milliseconds
+ - 2 : 32bits timestamp in microseconds
+ - 3 : 32bits timestamp in nanoseconds
+- node-id : Unique identifier for the node, included in the node ID
+ field of the node data in trace option.
+- app-data : The value configured here is included as is in
+application data field of node data in trace option.
+- pot : Enables POT option to be included in the iOAM options.
+
+### Trace configuration
+
+#### On in-band OAM encapsulating node
+ - **Configure classifier and apply ACL** to select packets for
+ iOAM data insertion
+ - Example to enable iOAM data insertion for all the packets
+ towards IPv6 address db06::06:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+ - **Enable tracing** : Specify node ID, maximum number of nodes for which
+ trace data should be recorded, type of data to be included for recording,
+ optionally application data to be included
+ - Example to enable tracing with a maximum of 4 nodes recorded
+ and the data to be recorded to include - hop limit, node id,
+ ingress and egress interface IDs, timestamp (millisecond precision),
+ application data (0x1234):
+
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x1 app-data 0x1234
+
+
+
+#### On in-band OAM transit node
+- The transit node requires trace type, timestamp precision, node ID and
+optionally application data to be configured,
+to update its node data in the trace option.
+
+Example:
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x2 app-data 0x1234
+
+#### On the In-band OAM decapsulating node
+- The decapsulating node similar to encapsulating node requires
+**classification** of the packets to remove iOAM data from.
+ - Example to decapsulate iOAM data for packets towards
+ db06::06, configure classifier and enable it as an ACL as follows:
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+
+- Decapsulating node requires trace type, timestamp precision,
+node ID and optionally application data to be configured,
+to update its node data in the trace option before it is decapsulated.
+
+Example:
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4
+ trace-tsp 1 node-id 0x3 app-data 0x1234
+
+
+### Proof of Transit configuration
+
+For details on proof-of-transit,
+see the IETF draft [iOAM-ietf-proof-of-transit].
+To enable Proof of Transit all the nodes that participate
+and hence are verified for transit need a proof of transit profile.
+A script to generate a proof of transit profile as per the mechanism
+described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet].
+
+The Proof of transit mechanism implemented here is based on
+Shamir's Secret Sharing algorithm.
+The overall algorithm uses two polynomials
+POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
+to be verified for transit.
+POLY-1 is secret and constant. Each node gets a point on POLY-1
+at setup-time and keeps it secret.
+POLY-2 is public, random and per packet.
+Each node is assigned a point on POLY-1 and POLY-2 with the same x index.
+Each node derives its point on POLY-2 each time a packet arrives at it.
+A node then contributes its points on POLY-1 and POLY-2 to construct
+POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and
+forwards it towards the verifier by updating POT data in the packet.
+The verifier constructs POLY-3 from the accumulated value from all the nodes
+and its own points on POLY-1 and POLY-2 and verifies whether
+POLY-3 = POLY-1 + POLY-2. Only the verifier knows POLY-1.
+The solution leverages finite field arithmetic in a field of size "prime number"
+for reasons explained in description of Shamir's secret sharing algorithm.
+
+Here is an explanation of POT profile list and profile configuration CLI to
+realize the above mechanism.
+It is best to use the script provided at [iOAM-Devnet] to generate
+this configuration.
+- **Create POT profile** : set pot profile name <string> id [0-1]
+[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
+lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
+ - name : Profile list name.
+ - id : Profile id, it can be 0 or 1.
+ A maximum of two profiles can be configured per profile list.
+ - validator-key : Secret key configured only on the
+ verifier/decapsulating node used to compare and verify proof of transit.
+ - prime-number : Prime number for finite field arithmetic as required by the
+ proof of transit mechanism.
+ - secret_share : Unique point for each node on the secret polynomial POLY-1.
+ - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on
+ its point (x value used for evaluating the points on the polynomial)
+ on the polynomial used in lagrange extrapolation
+ for reconstructing polynomial (POLY-3).
+ - polynomial2 : Is the pre-evaluated value of the point on
+ 2nd polynomial(POLY-2). This is unique for each node.
+ It is pre-evaluated for all the coefficients of POLY-2 except
+ for the constant part of the polynomial that changes per packet
+ and is received as part of the POT data in the packet.
+ - bits-in-random : To control the size of the random number to be
+ generated. This number has to match the other numbers generated and used
+ in the profile as per the algorithm.
+
+- **Set a configured profile as active/in-use** :
+set pot profile-active name <string> ID [0-1]
+ - name : Name of the profile list to be used for computing
+ POT data per packet.
+ - ID : Identifier of the profile within the list to be used.
+
+#### On In-band OAM encapsulating node
+ - Configure the classifier and apply ACL to select packets for iOAM data insertion.
+ - Example to enable iOAM data insertion for all the packet towards
+ IPv6 address db06::06 -
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node
+ ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+
+ - Configure the proof of transit profile list with profiles.
+Each profile list referred to by a name can contain 2 profiles,
+only one is in use for updating proof of transit data at any time.
+ - Example profile list example with a profile generated from the
+ script to verify transit through 3 nodes is:
+
+
+ vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
+ secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682
+ polynomial2 0xffb543d4a9c bits-in-random 63
+
+ - Enable one of the profiles from the configured profile list as active
+ so that is will be used for calculating proof of transit
+
+Example enable profile ID 0 from profile list example configured above:
+
+
+ vpp# set pot profile-active name example ID 0
+
+
+ - Enable POT option to be inserted
+
+
+ vpp# set ioam rewrite pot
+
+
+#### On in-band OAM transit node
+ - Configure the proof of transit profile list with profiles for transit node.
+Example:
+
+
+ vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
+ secret_share 0x564cdbdec4eb625d lpc 0x1
+ polynomial2 0x23f3a227186a bits-in-random 63
+
+#### On in-band OAM decapsulating node / verifier
+- The decapsulating node, similar to the encapsulating node requires
+classification of the packets to remove iOAM data from.
+ - Example to decapsulate iOAM data for packets towards db06::06
+ configure classifier and enable it as an ACL as follows:
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- To update and verify the proof of transit, POT profile list should be configured.
+ - Example POT profile list configured as follows:
+
+ vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d
+ prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3
+ polynomial2 0x3ff738597ce bits-in-random 63
+
+## Operational data
+
+Following CLIs are available to check iOAM operation:
+- To check iOAM configuration that are effective use "show ioam summary"
+
+Example:
+
+ vpp# show ioam summary
+ REWRITE FLOW CONFIGS - Not configured
+ HOP BY HOP OPTIONS - TRACE CONFIG -
+ Trace Type : 0x1f (31)
+ Trace timestamp precision : 1 (Milliseconds)
+ Num of trace nodes : 4
+ Node-id : 0x2 (2)
+ App Data : 0x1234 (4660)
+ POT OPTION - 1 (Enabled)
+ Try 'show ioam pot and show pot profile' for more information
+
+- To find statistics about packets for which iOAM options were
+added (encapsulating node) and removed (decapsulating node) execute
+*show errors*
+
+Example on encapsulating node:
+
+
+ vpp# show error
+ Count Node Reason
+ 1208804706 ip6-inacl input ACL hits
+ 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
+
+Example on decapsulating node:
+
+ vpp# show error
+ Count Node Reason
+ 69508569 ip6-inacl input ACL hits
+ 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
+
+- To check the POT profiles use "show pot profile"
+
+Example:
+
+ vpp# show pot profile
+ Profile list in use : example
+ POT Profile at index: 0
+ ID : 0
+ Validator : False (0)
+ Secret share : 0x564cdbdec4eb625d (6218586935324795485)
+ Prime number : 0x7fff0000fa884685 (9223090566081300101)
+ 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
+ LPC : 0x1 (1)
+ Bit mask : 0x7fffffffffffffff (9223372036854775807)
+ Profile index in use: 0
+ Pkts passed : 0x36 (54)
+
+- To get statistics of POT for packets use "show ioam pot"
+
+Example at encapsulating or transit node:
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 0
+ Pkts with POT out of Policy - 0
+
+
+Example at decapsulating/verification node:
+
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 54
+ Pkts with POT out of Policy - 0
+
+- Tracing - enable trace of IPv6 packets to view the data inserted and
+collected.
+
+Example when the nodes are receiving data over a DPDK interface:
+Enable tracing using "trace add dpdk-input 20" and
+execute "show trace" to view the iOAM data collected:
+
+
+ vpp# trace add dpdk-input 20
+
+ vpp# show trace
+
+ ------------------- Start of thread 0 vpp_main -------------------
+
+ Packet 1
+
+ 00:00:19:294697: dpdk-input
+ GigabitEthernetb/0/0 rx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ PKT MBUF: port 0, nb_segs 1, pkt_len 214
+ buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
+ packet_type 0x0
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294737: ethernet-input
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ 00:00:19:294753: ip6-input
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294757: ip6-lookup
+ fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294802: ip6-hop-by-hop
+ IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
+ [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
+ app 0x0
+ [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
+ app 0x1234
+ [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
+ app 0x1234
+ [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
+ app 0x1234
+ POT opt present
+ random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
+ 00:00:19:294810: ip6-rewrite
+ tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294814: GigabitEthernetb/0/0-output
+ GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294820: GigabitEthernetb/0/0-tx
+ GigabitEthernetb/0/0 tx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+
+
+[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM>
+[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>
+[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>
+[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>
+[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>
diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api
new file mode 100644
index 00000000..dd9c0186
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache.api
@@ -0,0 +1,29 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* API to control ioam caching */
+
+autoreply define ioam_cache_ip6_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_disable;
+
+};
diff --git a/src/plugins/ioam/ip6/ioam_cache.c b/src/plugins/ioam/ip6/ioam_cache.c
new file mode 100644
index 00000000..4c9997f4
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_cache.c - ioam ip6 API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/ip6/ioam_cache.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+#include "ioam_cache.h"
+
+/* define message IDs */
+#include <ioam/ip6/ioam_cache_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE cm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+#define foreach_ioam_cache_plugin_api_msg \
+_(IOAM_CACHE_IP6_ENABLE_DISABLE, ioam_cache_ip6_enable_disable)
+
+static u8 *
+ioam_e2e_id_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt)
+{
+ ioam_e2e_id_option_t *e2e = (ioam_e2e_id_option_t *) opt;
+
+ if (e2e)
+ {
+ s =
+ format (s, "IP6_HOP_BY_HOP E2E ID = %U\n", format_ip6_address,
+ &(e2e->id));
+ }
+
+
+ return s;
+}
+
+static u8 *
+ioam_e2e_cache_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt)
+{
+ ioam_e2e_cache_option_t *e2e = (ioam_e2e_cache_option_t *) opt;
+
+ if (e2e)
+ {
+ s =
+ format (s, "IP6_HOP_BY_HOP E2E CACHE = pool:%d idx:%d\n",
+ e2e->pool_id, e2e->pool_index);
+ }
+
+
+ return s;
+}
+
+/* Action function shared between message handler and debug CLI */
+int
+ioam_cache_ip6_enable_disable (ioam_cache_main_t * em,
+ ip6_address_t * sr_localsid, u8 is_disable)
+{
+ vlib_main_t *vm = em->vlib_main;
+
+ if (is_disable == 0)
+ {
+ ioam_cache_table_init (vm);
+ em->sr_localsid_cache.as_u64[0] = sr_localsid->as_u64[0];
+ em->sr_localsid_cache.as_u64[1] = sr_localsid->as_u64[1];
+ ip6_hbh_set_next_override (em->cache_hbh_slot);
+ ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID,
+ 0, ioam_e2e_id_trace_handler);
+ ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID,
+ 0, ioam_e2e_cache_trace_handler);
+
+ }
+ else
+ {
+ ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+ ioam_cache_table_destroy (vm);
+ em->sr_localsid_cache.as_u64[0] = 0;
+ em->sr_localsid_cache.as_u64[1] = 0;
+ ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+ ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID);
+ }
+
+ return 0;
+}
+
+/* Action function shared between message handler and debug CLI */
+int
+ioam_tunnel_select_ip6_enable_disable (ioam_cache_main_t * em,
+ u8 criteria,
+ u8 no_of_responses,
+ ip6_address_t * sr_localsid,
+ u8 is_disable)
+{
+ vlib_main_t *vm = em->vlib_main;
+
+ if (is_disable == 0)
+ {
+ ioam_cache_ts_table_init (vm);
+ em->criteria_oneway = criteria;
+ em->wait_for_responses = no_of_responses;
+ em->sr_localsid_ts.as_u64[0] = sr_localsid->as_u64[0];
+ em->sr_localsid_ts.as_u64[1] = sr_localsid->as_u64[1];
+ ip6_hbh_set_next_override (em->ts_hbh_slot);
+ ip6_ioam_ts_cache_set_rewrite ();
+ ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID,
+ 0, ioam_e2e_id_trace_handler);
+ ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID,
+ 0, ioam_e2e_cache_trace_handler);
+
+ /* Turn on the cleanup process */
+ // vlib_process_signal_event (vm, em->cleanup_process_node_index, 1, 0);
+ }
+ else
+ {
+ ioam_cache_ts_timer_node_enable (vm, 0);
+ ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+ em->sr_localsid_ts.as_u64[0] = 0;
+ em->sr_localsid_ts.as_u64[1] = 0;
+ ioam_cache_ts_table_destroy (vm);
+ ip6_ioam_ts_cache_cleanup_rewrite ();
+ ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+ ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID);
+ }
+
+ return 0;
+}
+
+/* API message handler */
+static void vl_api_ioam_cache_ip6_enable_disable_t_handler
+ (vl_api_ioam_cache_ip6_enable_disable_t * mp)
+{
+ vl_api_ioam_cache_ip6_enable_disable_reply_t *rmp;
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ip6_address_t sr_localsid;
+ int rv;
+
+ sr_localsid.as_u64[0] = 0;
+ sr_localsid.as_u64[1] = 0;
+ rv =
+ ioam_cache_ip6_enable_disable (cm, &sr_localsid, (int) (mp->is_disable));
+ REPLY_MACRO (VL_API_IOAM_CACHE_IP6_ENABLE_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+ioam_cache_plugin_api_hookup (vlib_main_t * vm)
+{
+ ioam_cache_main_t *sm = &ioam_cache_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ioam_cache_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (ioam_cache_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_ioam_cache;
+#undef _
+}
+
+static clib_error_t *
+set_ioam_cache_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ioam_cache_main_t *em = &ioam_cache_main;
+ u8 is_disable = 0;
+ ip6_address_t sr_localsid;
+ u8 address_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ is_disable = 1;
+ else if (!address_set
+ && unformat (input, "sr_localsid %U", unformat_ip6_address,
+ &sr_localsid))
+ address_set = 1;
+ else
+ break;
+ }
+
+ if (is_disable == 0 && !address_set)
+ return clib_error_return (0, "Error: SRv6 LocalSID address is mandatory");
+
+ ioam_cache_ip6_enable_disable (em, &sr_localsid, is_disable);
+
+ return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (set_ioam_cache_command, static) =
+{
+.path = "set ioam ip6 cache",.short_help =
+ "set ioam ip6 cache sr_localsid <ip6 address> [disable]",.function =
+ set_ioam_cache_command_fn};
+/* *INDENT_ON* */
+
+#define IOAM_TS_WAIT_FOR_RESPONSES 3
+static clib_error_t *
+set_ioam_tunnel_select_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_cache_main_t *em = &ioam_cache_main;
+ u8 is_disable = 0;
+ u8 one_way = 0;
+ ip6_address_t sr_localsid;
+ u8 address_set = 0;
+ u8 no_of_responses = IOAM_TS_WAIT_FOR_RESPONSES;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ is_disable = 1;
+ else if (unformat (input, "rtt"))
+ one_way = 0;
+ else if (unformat (input, "oneway"))
+ one_way = 1;
+ else if (unformat (input, "wait_for_responses %d", &no_of_responses))
+ ;
+ else if (!address_set
+ && unformat (input, "sr_localsid %U", unformat_ip6_address,
+ &sr_localsid))
+ address_set = 1;
+ else
+ break;
+ }
+ if (is_disable == 0 && !address_set)
+ return clib_error_return (0,
+ "Error: SRv6 LocalSID address is mandatory to receive response.");
+
+ ioam_tunnel_select_ip6_enable_disable (em, one_way, no_of_responses,
+ &sr_localsid, is_disable);
+
+ return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (set_ioam_cache_ts_command, static) =
+{
+.path = "set ioam ip6 sr-tunnel-select",.short_help =
+ "set ioam ip6 sr-tunnel-select [disable] [oneway|rtt] [wait_for_responses <n|default 3>] \
+ [sr_localsid <ip6 address>]",.function = set_ioam_tunnel_select_command_fn};
+/* *INDENT_ON* */
+
+static void
+ioam_cache_table_print (vlib_main_t * vm, u8 verbose)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_entry_t *entry = 0;
+ ioam_cache_ts_entry_t *ts_entry = 0;
+ int no_of_threads = vec_len (vlib_worker_threads);
+ int i;
+
+ pool_foreach (entry, cm->ioam_rewrite_pool, (
+ {
+ vlib_cli_output (vm, "%U",
+ format_ioam_cache_entry,
+ entry);
+ }));
+
+ if (cm->ts_stats)
+ for (i = 0; i < no_of_threads; i++)
+ {
+ vlib_cli_output (vm, "Number of entries in thread-%d selection pool: %lu\n \
+ (pool found to be full: %lu times)", i,
+ cm->ts_stats[i].inuse, cm->ts_stats[i].add_failed);
+
+ if (verbose == 1)
+ vlib_worker_thread_barrier_sync (vm);
+ pool_foreach (ts_entry, cm->ioam_ts_pool[i], (
+ {
+ vlib_cli_output (vm,
+ "%U",
+ format_ioam_cache_ts_entry,
+ ts_entry,
+ (u32)
+ i);
+ }
+ ));
+ vlib_worker_thread_barrier_release (vm);
+ }
+
+}
+
+static clib_error_t *
+show_ioam_cache_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ }
+ ioam_cache_table_print (vm, verbose);
+
+
+ return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (show_ioam_cache_command, static) =
+{
+.path = "show ioam ip6 cache",.short_help =
+ "show ioam ip6 cache [verbose]",.function = show_ioam_cache_command_fn};
+/* *INDENT_ON* */
+
+static clib_error_t *
+ioam_cache_init (vlib_main_t * vm)
+{
+ ioam_cache_main_t *em = &ioam_cache_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 cache_node_index = ioam_cache_node.index;
+ u32 ts_node_index = ioam_cache_ts_node.index;
+ vlib_node_t *ip6_hbyh_node = NULL, *ip6_hbh_pop_node = NULL, *error_node =
+ NULL;
+
+ name = format (0, "ioam_cache_%08x%c", api_version, 0);
+
+ memset (&ioam_cache_main, 0, sizeof (ioam_cache_main));
+ /* Ask for a correctly-sized block of API message decode slots */
+ em->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = ioam_cache_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (em, &api_main);
+
+ /* Hook this node to ip6-hop-by-hop */
+ ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop");
+ em->cache_hbh_slot =
+ vlib_node_add_next (vm, ip6_hbyh_node->index, cache_node_index);
+ em->ts_hbh_slot =
+ vlib_node_add_next (vm, ip6_hbyh_node->index, ts_node_index);
+
+ ip6_hbh_pop_node = vlib_get_node_by_name (vm, (u8 *) "ip6-pop-hop-by-hop");
+ em->ip6_hbh_pop_node_index = ip6_hbh_pop_node->index;
+
+ error_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
+ em->error_node_index = error_node->index;
+ em->vlib_main = vm;
+
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ioam_cache_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h
new file mode 100644
index 00000000..25a8fb65
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache.h
@@ -0,0 +1,903 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ioam_cache_h__
+#define __included_ioam_cache_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/srv6/sr.h>
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/bihash_8_8.h>
+#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+/*
+ * ioam_cache.h
+ * This header contains routines for caching of ioam header and
+ * buffer:
+ * 1 - On application facing node: to cache ioam header recvd
+ * in request and reattach in response to provide round
+ * trip path visibility. Since request response matching
+ * is needed works with TCP and relies on (5 tuples,seq no)
+ * 2 - On M-Anycast server node: This node replicates requests
+ * towards multiple anycast service nodes serving anycast
+ * IP6 address. It evaluates response and forwards the best
+ * response towards the client of requesting the service.
+ * Again since request-response matching is needed, works
+ * with TCP and relies on (5 tuples,seq no) for matching.
+ * To do this it caches SYN-ACK responses for a short time to
+ * evaluate multiple responses received before the selected
+ * SYN-ACK response is forwared and others dropped.
+ *
+ * M-Anycast server cache:
+ * - There is a pool of cache entries per worker thread.
+ * - Cache entry is created when SYN is received expected
+ * number of responses are marked based on number of
+ * SR tunnels for the anycast destination address
+ * - The pool/thread id and pool index are attached in the
+ * message as an ioam option for quick look up.
+ * - When is received SYN-ACK the ioam option containing
+ * thread id + pool index of the cache entry is used to
+ * look up cache entry.
+ * - Cache synchronization:
+ * - This is achieved by cache entry add/del/update all handled
+ * by the same worker/main thread
+ * - Packets from client to threads - syn packets, can be disctributed
+ * based on incoming interface affinity to the cpu core pinned to
+ * the thread or a simple sequence number based distribution
+ * if thread per interface is not scaling
+ * - Response packets from server towards clients - syn-acks, are
+ * forced to the same thread that created the cache entry
+ * using SR and the destination of SR v6 address assigned
+ * to the core/thread. This adderss is sent as an ioam option
+ * in the syn that can be then used on the other side to
+ * populate v6 dst address in the response
+ * - Timeout: timer wheel per thread is used to track the syn-ack wait
+ * time. The timer wheel tick is updated via an input node per thread.
+ *
+ * Application facing node/Service side cache:
+ * - Single pool of cache entries.
+ * - Cache entry is created when SYN is received. Caches the ioam
+ * header. Hash table entry is created based on 5 tuple and
+ * TCP seq no to pool index
+ * - Response SYN-ACK processed by looking up pool index in hash table
+ * and cache entry in the pool is used to get the ioam header rewrite
+ * string. Entry is freed from pool and hash table after use.
+ * - Locking/Synchronization: Currently this functionality is deployed
+ * with main/single thread only. Hence no locking is used.
+ * - Deployment: A VPP node per application server servicing anycast
+ * address is expected. Locking/synchronization needed when the server
+ * /application facing node is started with multiple worker threads.
+ *
+ */
+
+/*
+ * Application facing server side caching:
+ * Cache entry for ioam header
+ * Currently caters to TCP and relies on
+ * TCP - 5 tuples + seqno to cache and reinsert
+ * ioam header b/n TCP request response
+ */
+typedef struct
+{
+ ip6_address_t src_address;
+ ip6_address_t dst_address;
+ u16 src_port;
+ u16 dst_port;
+ u8 protocol;
+ u32 seq_no;
+ ip6_address_t next_hop;
+ u16 my_address_offset;
+ u8 *ioam_rewrite_string;
+} ioam_cache_entry_t;
+
+/*
+ * Cache entry for anycast server selection
+ * Works for TCP as 5 tuple + sequence number
+ * is required for request response matching
+ * max_responses expected is set based on number
+ * of SR tunnels for the dst_address
+ * Timeout or all response_received = max_responses
+ * will clear the entry
+ * buffer_index index of the response msg vlib buffer
+ * that is currently the best response
+ */
+typedef struct
+{
+ u32 pool_id;
+ u32 pool_index;
+ ip6_address_t src_address;
+ ip6_address_t dst_address;
+ u16 src_port;
+ u16 dst_port;
+ u8 protocol;
+ u32 seq_no;
+ u32 buffer_index;
+ ip6_hop_by_hop_header_t *hbh; //pointer to hbh header in the buffer
+ u64 created_at;
+ u8 response_received;
+ u8 max_responses;
+ u32 stop_timer_handle;
+ /** Handle returned from tw_start_timer */
+ u32 timer_handle;
+ /** entry should expire at this clock tick */
+ u32 expected_to_expire;
+} ioam_cache_ts_entry_t;
+
+/*
+ * Per thread tunnel selection cache stats
+ */
+typedef struct
+{
+ u64 inuse;
+ u64 add_failed;
+} ioam_cache_ts_pool_stats_t;
+
+/* Server side: iOAM header caching */
+#define MAX_CACHE_ENTRIES 4096
+/* M-Anycast: Cache for SR tunnel selection */
+#define MAX_CACHE_TS_ENTRIES 1048576
+
+#define IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS (4 * 1024)
+#define IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE (2<<20)
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* Pool of ioam_cache_buffer_t */
+ ioam_cache_entry_t *ioam_rewrite_pool;
+
+ /* For steering packets ioam cache entry is followed by
+ * SR header. This is the SR rewrite template */
+ u8 *sr_rewrite_template;
+ /* The current rewrite string being used */
+ u8 *rewrite;
+ u8 rewrite_pool_index_offset;
+ ip6_address_t sr_localsid_cache;
+
+ u64 lookup_table_nbuckets;
+ u64 lookup_table_size;
+ clib_bihash_8_8_t ioam_rewrite_cache_table;
+
+ /* M-Anycast: Pool of ioam_cache_ts_entry_t per thread */
+ ioam_cache_ts_entry_t **ioam_ts_pool;
+ ioam_cache_ts_pool_stats_t *ts_stats;
+ /** per thread single-wheel */
+ tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+ /*
+ * Selection criteria: oneway delay: Server to M-Anycast
+ * or RTT
+ */
+ bool criteria_oneway;
+ u8 wait_for_responses;
+ ip6_address_t sr_localsid_ts;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+
+ uword cache_hbh_slot;
+ uword ts_hbh_slot;
+ u32 ip6_hbh_pop_node_index;
+ u32 error_node_index;
+ u32 cleanup_process_node_index;
+} ioam_cache_main_t;
+
+ioam_cache_main_t ioam_cache_main;
+
+extern vlib_node_registration_t ioam_cache_node;
+extern vlib_node_registration_t ioam_cache_ts_node;
+
+/* Compute flow hash. We'll use it to select which Sponge to use for this
+ * flow. And other things.
+ * ip6_compute_flow_hash in ip6.h doesnt locate tcp/udp when
+ * ext headers are present. While it could be made to it will be a
+ * performance hit for ECMP flows.
+ * HEnce this function here, with L4 information directly input
+ * Useful when tcp/udp headers are already located in presence of
+ * ext headers
+ */
+always_inline u32
+ip6_compute_flow_hash_ext (const ip6_header_t * ip,
+ u8 protocol,
+ u16 src_port,
+ u16 dst_port, flow_hash_config_t flow_hash_config)
+{
+ u64 a, b, c;
+ u64 t1, t2;
+
+ t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+ t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0;
+
+ t1 = src_port;
+ t2 = dst_port;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ ((t1 << 16) | t2) : ((t2 << 16) | t1);
+
+ hash_mix64 (a, b, c);
+ return (u32) c;
+}
+
+
+/* 2 new ioam E2E options :
+ * 1. HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID: IP6 address
+ * of ioam node that inserted ioam header
+ * 2. HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID: Pool id and index
+ * to look up tunnel select cache entry
+ */
+#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID 30
+#define HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID 31
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 reserved[5];
+ ip6_address_t id;
+ }) ioam_e2e_id_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 pool_id;
+ u32 pool_index;
+ }) ioam_e2e_cache_option_t;
+
+#define IOAM_E2E_ID_OPTION_RND ((sizeof(ioam_e2e_id_option_t) + 7) & ~7)
+#define IOAM_E2E_ID_HBH_EXT_LEN (IOAM_E2E_ID_OPTION_RND >> 3)
+#define IOAM_E2E_CACHE_OPTION_RND ((sizeof(ioam_e2e_cache_option_t) + 7) & ~7)
+#define IOAM_E2E_CACHE_HBH_EXT_LEN (IOAM_E2E_CACHE_OPTION_RND >> 3)
+
+static inline void
+ioam_e2e_id_rewrite_handler (ioam_e2e_id_option_t * e2e_option,
+ ip6_address_t * address)
+{
+ e2e_option->id.as_u64[0] = address->as_u64[0];
+ e2e_option->id.as_u64[1] = address->as_u64[1];
+
+}
+
+/* Following functions are for the caching of ioam header
+ * to enable reattaching it for a complete request-response
+ * message exchange */
+inline static void
+ioam_cache_entry_free (ioam_cache_entry_t * entry)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ if (entry)
+ {
+ vec_free (entry->ioam_rewrite_string);
+ memset (entry, 0, sizeof (*entry));
+ pool_put (cm->ioam_rewrite_pool, entry);
+ }
+}
+
+inline static ioam_cache_entry_t *
+ioam_cache_entry_cleanup (u32 pool_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_entry_t *entry = 0;
+
+ entry = pool_elt_at_index (cm->ioam_rewrite_pool, pool_index);
+ ioam_cache_entry_free (entry);
+ return (0);
+}
+
+inline static ioam_cache_entry_t *
+ioam_cache_lookup (ip6_header_t * ip0, u16 src_port, u16 dst_port, u32 seq_no)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ u32 flow_hash = ip6_compute_flow_hash_ext (ip0, ip0->protocol,
+ src_port, dst_port,
+ IP_FLOW_HASH_DEFAULT |
+ IP_FLOW_HASH_REVERSE_SRC_DST);
+ clib_bihash_kv_8_8_t kv, value;
+
+ kv.key = (u64) flow_hash << 32 | seq_no;
+ kv.value = 0;
+ value.key = 0;
+ value.value = 0;
+
+ if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
+ 0)
+ {
+ ioam_cache_entry_t *entry = 0;
+
+ entry = pool_elt_at_index (cm->ioam_rewrite_pool, value.value);
+ /* match */
+ if (ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
+ ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
+ entry->src_port == dst_port &&
+ entry->dst_port == src_port && entry->seq_no == seq_no)
+ {
+ /* If lookup is successful remove it from the hash */
+ clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 0);
+ return (entry);
+ }
+ else
+ return (0);
+
+ }
+ return (0);
+}
+
+/*
+ * Caches ioam hbh header
+ * Extends the hbh header with option to contain IP6 address of the node
+ * that caches it
+ */
+inline static int
+ioam_cache_add (vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ u16 src_port,
+ u16 dst_port, ip6_hop_by_hop_header_t * hbh0, u32 seq_no)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_entry_t *entry = 0;
+ u32 rewrite_len = 0, e2e_id_offset = 0;
+ u32 pool_index = 0;
+ ioam_e2e_id_option_t *e2e = 0;
+
+ pool_get_aligned (cm->ioam_rewrite_pool, entry, CLIB_CACHE_LINE_BYTES);
+ memset (entry, 0, sizeof (*entry));
+ pool_index = entry - cm->ioam_rewrite_pool;
+
+ clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64,
+ sizeof (ip6_address_t));
+ entry->src_port = src_port;
+ entry->dst_port = dst_port;
+ entry->seq_no = seq_no;
+ rewrite_len = ((hbh0->length + 1) << 3);
+ vec_validate (entry->ioam_rewrite_string, rewrite_len - 1);
+ e2e = ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+ if (e2e)
+ {
+ entry->next_hop.as_u64[0] = e2e->id.as_u64[0];
+ entry->next_hop.as_u64[1] = e2e->id.as_u64[1];
+ }
+ else
+ {
+ return (-1);
+ }
+ e2e_id_offset = (u8 *) e2e - (u8 *) hbh0;
+ /* setup e2e id option to insert v6 address of the node caching it */
+ clib_memcpy (entry->ioam_rewrite_string, hbh0, rewrite_len);
+ hbh0 = (ip6_hop_by_hop_header_t *) entry->ioam_rewrite_string;
+
+ /* suffix rewrite string with e2e ID option */
+ e2e = (ioam_e2e_id_option_t *) (entry->ioam_rewrite_string + e2e_id_offset);
+ ioam_e2e_id_rewrite_handler (e2e, &cm->sr_localsid_cache);
+ entry->my_address_offset = (u8 *) (&e2e->id) - (u8 *) hbh0;
+
+ /* add it to hash, replacing and freeing any collision for now */
+ u32 flow_hash =
+ ip6_compute_flow_hash_ext (ip0, hbh0->protocol, src_port, dst_port,
+ IP_FLOW_HASH_DEFAULT);
+ clib_bihash_kv_8_8_t kv, value;
+ kv.key = (u64) flow_hash << 32 | seq_no;
+ kv.value = 0;
+ if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
+ 0)
+ {
+ /* replace */
+ ioam_cache_entry_cleanup (value.value);
+ }
+ kv.value = pool_index;
+ clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 1);
+ return (0);
+}
+
+/* Creates SR rewrite string
+ * This is appended with ioam header on the server facing
+ * node.
+ * This SR header is necessary to attract packets towards
+ * selected Anycast server.
+ */
+inline static void
+ioam_cache_sr_rewrite_template_create (void)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ip6_address_t *segments = 0;
+ ip6_address_t *this_seg = 0;
+
+ /* This nodes address and the original dest will be
+ * filled when the packet is processed */
+ vec_add2 (segments, this_seg, 1);
+ memset (this_seg, 0xfe, sizeof (ip6_address_t));
+ cm->sr_rewrite_template = ip6_sr_compute_rewrite_string_insert (segments);
+ vec_free (segments);
+}
+
+inline static int
+ioam_cache_table_init (vlib_main_t * vm)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+
+ pool_alloc_aligned (cm->ioam_rewrite_pool,
+ MAX_CACHE_ENTRIES, CLIB_CACHE_LINE_BYTES);
+ cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+ cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
+ cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+
+ clib_bihash_init_8_8 (&cm->ioam_rewrite_cache_table,
+ "ioam rewrite cache table",
+ cm->lookup_table_nbuckets, cm->lookup_table_size);
+ /* Create SR rewrite template */
+ ioam_cache_sr_rewrite_template_create ();
+ return (1);
+}
+
+inline static int
+ioam_cache_table_destroy (vlib_main_t * vm)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_entry_t *entry = 0;
+ /* free pool and hash table */
+ clib_bihash_free_8_8 (&cm->ioam_rewrite_cache_table);
+ pool_foreach (entry, cm->ioam_rewrite_pool, (
+ {
+ ioam_cache_entry_free (entry);
+ }));
+ pool_free (cm->ioam_rewrite_pool);
+ cm->ioam_rewrite_pool = 0;
+ vec_free (cm->sr_rewrite_template);
+ cm->sr_rewrite_template = 0;
+ return (0);
+}
+
+inline static u8 *
+format_ioam_cache_entry (u8 * s, va_list * args)
+{
+ ioam_cache_entry_t *e = va_arg (*args, ioam_cache_entry_t *);
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ int rewrite_len = vec_len (e->ioam_rewrite_string);
+
+ s = format (s, "%d: %U:%d to %U:%d seq_no %lu\n",
+ (e - cm->ioam_rewrite_pool),
+ format_ip6_address, &e->src_address,
+ e->src_port,
+ format_ip6_address, &e->dst_address, e->dst_port, e->seq_no);
+
+ if (rewrite_len)
+ {
+ s = format (s, " %U",
+ format_ip6_hop_by_hop_ext_hdr,
+ (ip6_hop_by_hop_header_t *) e->ioam_rewrite_string,
+ rewrite_len - 1);
+ }
+ return s;
+}
+
+void ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable);
+
+#define IOAM_CACHE_TS_TIMEOUT 1.0 //SYN timeout 1 sec
+#define IOAM_CACHE_TS_TICK 100e-3
+/* Timer delays as multiples of 100ms */
+#define IOAM_CACHE_TS_TIMEOUT_TICKS IOAM_CACHE_TS_TICK*9
+#define TIMER_HANDLE_INVALID ((u32) ~0)
+
+
+void expired_cache_ts_timer_callback (u32 * expired_timers);
+
+/*
+ * Following functions are to manage M-Anycast server selection
+ * cache
+ * There is a per worker thread pool to create a cache entry
+ * for a TCP SYN received. TCP SYN-ACK contians ioam header
+ * with HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID option to point to the
+ * entry.
+ */
+inline static int
+ioam_cache_ts_table_init (vlib_main_t * vm)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ int no_of_threads = vec_len (vlib_worker_threads);
+ int i;
+
+ vec_validate_aligned (cm->ioam_ts_pool, no_of_threads - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->ts_stats, no_of_threads - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->timer_wheels, no_of_threads - 1,
+ CLIB_CACHE_LINE_BYTES);
+ cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+ cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
+ cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+ for (i = 0; i < no_of_threads; i++)
+ {
+ pool_alloc_aligned (cm->ioam_ts_pool[i],
+ MAX_CACHE_TS_ENTRIES, CLIB_CACHE_LINE_BYTES);
+ memset (&cm->ts_stats[i], 0, sizeof (ioam_cache_ts_pool_stats_t));
+ tw_timer_wheel_init_16t_2w_512sl (&cm->timer_wheels[i],
+ expired_cache_ts_timer_callback,
+ IOAM_CACHE_TS_TICK
+ /* timer period 100ms */ ,
+ 10e4);
+ cm->timer_wheels[i].last_run_time = vlib_time_now (vm);
+ }
+ ioam_cache_ts_timer_node_enable (vm, 1);
+ return (1);
+}
+
+always_inline void
+ioam_cache_ts_timer_set (ioam_cache_main_t * cm,
+ ioam_cache_ts_entry_t * entry, u32 interval)
+{
+ entry->timer_handle
+ = tw_timer_start_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
+ entry->pool_index, 1, interval);
+}
+
+always_inline void
+ioam_cache_ts_timer_reset (ioam_cache_main_t * cm,
+ ioam_cache_ts_entry_t * entry)
+{
+ tw_timer_stop_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
+ entry->timer_handle);
+ entry->timer_handle = TIMER_HANDLE_INVALID;
+}
+
+inline static void
+ioam_cache_ts_entry_free (u32 thread_id,
+ ioam_cache_ts_entry_t * entry, u32 node_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ vlib_main_t *vm = cm->vlib_main;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ if (entry)
+ {
+ if (entry->hbh != 0)
+ {
+ nf = vlib_get_frame_to_node (vm, node_index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+ nf->n_vectors = 1;
+ to_next[0] = entry->buffer_index;
+ vlib_put_frame_to_node (vm, node_index, nf);
+ }
+ pool_put (cm->ioam_ts_pool[thread_id], entry);
+ cm->ts_stats[thread_id].inuse--;
+ memset (entry, 0, sizeof (*entry));
+ }
+}
+
+inline static int
+ioam_cache_ts_table_destroy (vlib_main_t * vm)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+ int no_of_threads = vec_len (vlib_worker_threads);
+ int i;
+
+ /* free pool and hash table */
+ for (i = 0; i < no_of_threads; i++)
+ {
+ pool_foreach (entry, cm->ioam_ts_pool[i], (
+ {
+ ioam_cache_ts_entry_free (i,
+ entry,
+ cm->error_node_index);
+ }
+ ));
+ pool_free (cm->ioam_ts_pool[i]);
+ cm->ioam_ts_pool = 0;
+ tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+ }
+ vec_free (cm->ioam_ts_pool);
+ return (0);
+}
+
+inline static int
+ioam_cache_ts_entry_cleanup (u32 thread_id, u32 pool_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+
+ entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+ ioam_cache_ts_entry_free (thread_id, entry, cm->error_node_index);
+ return (0);
+}
+
+/*
+ * Caches buffer for ioam SR tunnel select for Anycast service
+ */
+inline static int
+ioam_cache_ts_add (ip6_header_t * ip0,
+ u16 src_port,
+ u16 dst_port,
+ u32 seq_no,
+ u8 max_responses, u64 now, u32 thread_id, u32 * pool_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+
+ if (cm->ts_stats[thread_id].inuse == MAX_CACHE_TS_ENTRIES)
+ {
+ cm->ts_stats[thread_id].add_failed++;
+ return (-1);
+ }
+
+ pool_get_aligned (cm->ioam_ts_pool[thread_id], entry,
+ CLIB_CACHE_LINE_BYTES);
+ memset (entry, 0, sizeof (*entry));
+ *pool_index = entry - cm->ioam_ts_pool[thread_id];
+
+ clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64,
+ sizeof (ip6_address_t));
+ entry->src_port = src_port;
+ entry->dst_port = dst_port;
+ entry->seq_no = seq_no;
+ entry->response_received = 0;
+ entry->max_responses = max_responses;
+ entry->created_at = now;
+ entry->hbh = 0;
+ entry->buffer_index = 0;
+ entry->pool_id = thread_id;
+ entry->pool_index = *pool_index;
+ ioam_cache_ts_timer_set (cm, entry, IOAM_CACHE_TS_TIMEOUT);
+ cm->ts_stats[thread_id].inuse++;
+ return (0);
+}
+
+inline static void
+ioam_cache_ts_send (u32 thread_id, i32 pool_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+
+ entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+ if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
+ {
+ /* send and free pool entry */
+ ioam_cache_ts_entry_free (thread_id, entry, cm->ip6_hbh_pop_node_index);
+ }
+}
+
+inline static void
+ioam_cache_ts_check_and_send (u32 thread_id, i32 pool_index)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+ entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+ if (entry && entry->hbh)
+ {
+ if (entry->response_received == entry->max_responses ||
+ entry->created_at + IOAM_CACHE_TS_TIMEOUT <=
+ vlib_time_now (cm->vlib_main))
+ {
+ ioam_cache_ts_timer_reset (cm, entry);
+ ioam_cache_ts_send (thread_id, pool_index);
+ }
+ }
+}
+
+inline static int
+ioam_cache_ts_update (u32 thread_id,
+ i32 pool_index,
+ u32 buffer_index, ip6_hop_by_hop_header_t * hbh)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_cache_ts_entry_t *entry = 0;
+ vlib_main_t *vm = cm->vlib_main;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+ if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
+ {
+ /* drop existing buffer */
+ if (entry->hbh != 0)
+ {
+ nf = vlib_get_frame_to_node (vm, cm->error_node_index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+ nf->n_vectors = 1;
+ to_next[0] = entry->buffer_index;
+ vlib_put_frame_to_node (vm, cm->error_node_index, nf);
+ }
+ /* update */
+ entry->buffer_index = buffer_index;
+ entry->hbh = hbh;
+ /* check and send */
+ ioam_cache_ts_check_and_send (thread_id, pool_index);
+ return (0);
+ }
+ return (-1);
+}
+
+/*
+ * looks up the entry based on the e2e option pool index
+ * result = 0 found the entry
+ * result < 0 indicates failture to find an entry
+ */
+inline static int
+ioam_cache_ts_lookup (ip6_header_t * ip0,
+ u8 protocol,
+ u16 src_port,
+ u16 dst_port,
+ u32 seq_no,
+ ip6_hop_by_hop_header_t ** hbh,
+ u32 * pool_index, u8 * thread_id, u8 response_seen)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ip6_hop_by_hop_header_t *hbh0 = 0;
+ ioam_e2e_cache_option_t *e2e = 0;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ e2e =
+ (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset);
+ if ((u8 *) e2e < ((u8 *) hbh0 + ((hbh0->length + 1) << 3))
+ && e2e->hdr.type == HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID)
+ {
+ ioam_cache_ts_entry_t *entry = 0;
+ *pool_index = e2e->pool_index;
+ *thread_id = e2e->pool_id;
+ entry = pool_elt_at_index (cm->ioam_ts_pool[*thread_id], *pool_index);
+ /* match */
+ if (entry &&
+ ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
+ ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
+ entry->src_port == dst_port &&
+ entry->dst_port == src_port && entry->seq_no == seq_no)
+ {
+ *hbh = entry->hbh;
+ entry->response_received += response_seen;
+ return (0);
+ }
+ else if (entry)
+ {
+ return (-1);
+ }
+ }
+ return (-1);
+}
+
+inline static u8 *
+format_ioam_cache_ts_entry (u8 * s, va_list * args)
+{
+ ioam_cache_ts_entry_t *e = va_arg (*args, ioam_cache_ts_entry_t *);
+ u32 thread_id = va_arg (*args, u32);
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ioam_e2e_id_option_t *e2e = 0;
+ vlib_main_t *vm = cm->vlib_main;
+ clib_time_t *ct = &vm->clib_time;
+
+ if (!e)
+ goto end;
+
+ if (e->hbh)
+ {
+ e2e =
+ ip6_ioam_find_hbh_option (e->hbh,
+ HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+
+ s =
+ format (s,
+ "%d: %U:%d to %U:%d seq_no %u buffer %u %U \n\t\tCreated at %U Received %d\n",
+ (e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
+ &e->src_address, e->src_port, format_ip6_address,
+ &e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
+ format_ip6_address, e2e ? &e2e->id : 0, format_time_interval,
+ "h:m:s:u",
+ (e->created_at -
+ vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
+ e->response_received);
+ }
+ else
+ {
+ s =
+ format (s,
+ "%d: %U:%d to %U:%d seq_no %u Buffer %u \n\t\tCreated at %U Received %d\n",
+ (e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
+ &e->src_address, e->src_port, format_ip6_address,
+ &e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
+ format_time_interval, "h:m:s:u",
+ (e->created_at -
+ vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
+ e->response_received);
+ }
+
+end:
+ return s;
+}
+
+/*
+ * Get extended rewrite string for iOAM data in v6
+ * This makes space for an e2e options to carry cache pool info
+ * and manycast server address.
+ * It set the rewrite string per configs in ioam ip6 + new option
+ * for cache along with offset to the option to populate cache
+ * pool id and index
+ */
+static inline int
+ip6_ioam_ts_cache_set_rewrite (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ ip6_hop_by_hop_header_t *hbh;
+ u32 rewrite_len = 0;
+ ioam_e2e_cache_option_t *e2e = 0;
+ ioam_e2e_id_option_t *e2e_id = 0;
+
+ vec_free (cm->rewrite);
+ ip6_ioam_set_rewrite (&(cm->rewrite), hm->has_trace_option,
+ hm->has_pot_option, hm->has_seqno_option);
+ hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
+ rewrite_len = ((hbh->length + 1) << 3);
+ vec_validate (cm->rewrite,
+ rewrite_len - 1 + IOAM_E2E_CACHE_OPTION_RND +
+ IOAM_E2E_ID_OPTION_RND);
+ hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
+ /* setup e2e id option to insert pool id and index of the node caching it */
+ hbh->length += IOAM_E2E_CACHE_HBH_EXT_LEN + IOAM_E2E_ID_HBH_EXT_LEN;
+ cm->rewrite_pool_index_offset = rewrite_len;
+ e2e = (ioam_e2e_cache_option_t *) (cm->rewrite + rewrite_len);
+ e2e->hdr.type = HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID
+ | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+ e2e->hdr.length = sizeof (ioam_e2e_cache_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ e2e->e2e_type = 2;
+ e2e_id =
+ (ioam_e2e_id_option_t *) ((u8 *) e2e + sizeof (ioam_e2e_cache_option_t));
+ e2e_id->hdr.type =
+ HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+ e2e_id->hdr.length =
+ sizeof (ioam_e2e_id_option_t) - sizeof (ip6_hop_by_hop_option_t);
+ e2e_id->e2e_type = 1;
+
+ return (0);
+}
+
+static inline int
+ip6_ioam_ts_cache_cleanup_rewrite (void)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+
+ vec_free (cm->rewrite);
+ cm->rewrite = 0;
+ cm->rewrite_pool_index_offset = 0;
+ return (0);
+}
+#endif /* __included_ioam_cache_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_all_api_h.h b/src/plugins/ioam/ip6/ioam_cache_all_api_h.h
new file mode 100644
index 00000000..61272a51
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/ip6/ioam_cache.api.h>
diff --git a/src/plugins/ioam/ip6/ioam_cache_msg_enum.h b/src/plugins/ioam/ip6/ioam_cache_msg_enum.h
new file mode 100644
index 00000000..8afd067b
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_ioam_cache_msg_enum_h
+#define included_ioam_cache_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_ioam_cache_msg_enum_h */
diff --git a/src/plugins/ioam/ip6/ioam_cache_node.c b/src/plugins/ioam/ip6/ioam_cache_node.c
new file mode 100644
index 00000000..dd27e127
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache_node.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * This file implements caching of ioam header and reattaching
+ * it in response message by performing request-response matching.
+ * Works for TCP SYN/SYN-ACK.
+ * This feature is used for anycast server selection.
+ * ioam data thus cached is used to measure and get complete round trip
+ * network path to help in server selection.
+ * There are 2 graph nodes defined to :
+ * 1. process packets that contain iOAM header and cache it
+ * 2. process TCP SYN-ACKs and reattach ioam header from the
+ * cache corresponding to TCP-SYN
+ * These graph nodes are attached to the vnet graph based on
+ * ioam cache and classifier configs.
+ * e.g.
+ * If db06::06 is the anycast service IP6 address:
+ *
+ * set ioam ip6 cache
+ *
+ * Apply this classifier on interface where requests for anycast service are received:
+ * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3 ip6 dst db06::06
+ * ioam-decap anycast <<< ioam-decap is hooked to cache when set ioam ip6 cache is enabled
+ *
+ * Apply this classifier on interface where responses from anycast service are received:
+ * classify session acl-hit-next ip6-node ip6-add-from-cache-hop-by-hop table-index 0 match l3
+ * ip6 src db06::06 ioam-encap anycast-response
+ *
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/ip6/ioam_cache.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} cache_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_cache_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ cache_trace_t *t = va_arg (*args, cache_trace_t *);
+
+ s = format (s, "CACHE: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+#define foreach_cache_error \
+_(RECORDED, "ip6 iOAM headers cached")
+
+typedef enum
+{
+#define _(sym,str) CACHE_ERROR_##sym,
+ foreach_cache_error
+#undef _
+ CACHE_N_ERROR,
+} cache_error_t;
+
+static char *cache_error_strings[] = {
+#define _(sym,string) string,
+ foreach_cache_error
+#undef _
+};
+
+typedef enum
+{
+ IOAM_CACHE_NEXT_POP_HBYH,
+ IOAM_CACHE_N_NEXT,
+} cache_next_t;
+
+static uword
+ip6_ioam_cache_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ cache_next_t next_index;
+ u32 recorded = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ // TODO: Dual loop
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *p0;
+ u32 next0 = IOAM_CACHE_NEXT_POP_HBYH;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ tcp_header_t *tcp0;
+ u32 tcp_offset0;
+
+ /* speculatively enqueue p0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ if (IP_PROTOCOL_TCP ==
+ ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+ {
+ tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+ if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp0->flags & TCP_FLAG_ACK) == 0)
+ {
+ /* Cache the ioam hbh header */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ if (0 == ioam_cache_add (p0,
+ ip0,
+ clib_net_to_host_u16
+ (tcp0->src_port),
+ clib_net_to_host_u16
+ (tcp0->dst_port), hbh0,
+ clib_net_to_host_u32
+ (tcp0->seq_number) + 1))
+ {
+ recorded++;
+ }
+ }
+ }
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (p0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ cache_trace_t *t =
+ vlib_add_trace (vm, node, p0, sizeof (*t));
+ t->flow_label =
+ clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label);
+ t->next_index = next0;
+ }
+ }
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ioam_cache_node.index,
+ CACHE_ERROR_RECORDED, recorded);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 iOAM header cache
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_node) =
+{
+ .function = ip6_ioam_cache_node_fn,
+ .name = "ip6-ioam-cache",
+ .vector_size = sizeof (u32),
+ .format_trace = format_cache_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (cache_error_strings),
+ .error_strings = cache_error_strings,
+ .n_next_nodes = IOAM_CACHE_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [IOAM_CACHE_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
+ },
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 next_index;
+} ip6_add_from_cache_hbh_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_add_from_cache_hbh_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_add_from_cache_hbh_trace_t *t = va_arg (*args,
+ ip6_add_from_cache_hbh_trace_t
+ *);
+
+ s = format (s, "IP6_ADD_FROM_CACHE_HBH: next index %d", t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_add_from_cache_hbh_node;
+
+#define foreach_ip6_add_from_cache_hbh_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_ADD_FROM_CACHE_HBH_ERROR_##sym,
+ foreach_ip6_add_from_cache_hbh_error
+#undef _
+ IP6_ADD_FROM_CACHE_HBH_N_ERROR,
+} ip6_add_from_cache_hbh_error_t;
+
+static char *ip6_add_from_cache_hbh_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_add_from_cache_hbh_error
+#undef _
+};
+
+#define foreach_ip6_ioam_cache_input_next \
+ _(IP6_LOOKUP, "ip6-lookup") \
+ _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_IOAM_CACHE_INPUT_NEXT_##s,
+ foreach_ip6_ioam_cache_input_next
+#undef _
+ IP6_IOAM_CACHE_INPUT_N_NEXT,
+} ip6_ioam_cache_input_next_t;
+
+
+static uword
+ip6_add_from_cache_hbh_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u8 *rewrite = 0;
+ u32 rewrite_len = 0;
+ u32 sr_rewrite_len = vec_len (cm->sr_rewrite_template);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ // TODO: Dual loop
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_sr_header_t *srh0 = 0;
+ u64 *copy_src0, *copy_dst0;
+ u16 new_l0;
+ tcp_header_t *tcp0;
+ u32 tcp_offset0;
+ ioam_cache_entry_t *entry = 0;
+
+ next0 = IP6_IOAM_CACHE_INPUT_NEXT_IP6_LOOKUP;
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ if (IP_PROTOCOL_TCP !=
+ ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+ {
+ goto TRACE0;
+ }
+ tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+ if (((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK) ||
+ (tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST)
+ {
+ if (0 != (entry = ioam_cache_lookup (ip0,
+ clib_net_to_host_u16
+ (tcp0->src_port),
+ clib_net_to_host_u16
+ (tcp0->dst_port),
+ clib_net_to_host_u32
+ (tcp0->ack_number))))
+ {
+ rewrite = entry->ioam_rewrite_string;
+ rewrite_len = vec_len (rewrite);
+ }
+ else
+ {
+ next0 = IP6_IOAM_CACHE_INPUT_NEXT_DROP;
+ goto TRACE0;
+ }
+ }
+ else
+ goto TRACE0;
+
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - (rewrite_len + sr_rewrite_len));
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+ vlib_buffer_advance (b0, -(word) (rewrite_len + sr_rewrite_len));
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ srh0 = (ip6_sr_header_t *) ((u8 *) hbh0 + rewrite_len);
+ /* $$$ tune, rewrite_len is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_len);
+ clib_memcpy (srh0, cm->sr_rewrite_template, sr_rewrite_len);
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (srh0->segments, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ /* Rewrite the ip6 dst address with the first hop */
+ clib_memcpy (ip0->dst_address.as_u64, entry->next_hop.as_u64,
+ sizeof (ip6_address_t));
+ clib_memcpy (&srh0->segments[1],
+ (u8 *) hbh0 + entry->my_address_offset,
+ sizeof (ip6_address_t));
+ ioam_cache_entry_free (entry);
+
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ srh0->protocol = ip0->protocol;
+ hbh0->protocol = IPPROTO_IPV6_ROUTE;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_len +
+ sr_rewrite_len;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ processed++;
+ TRACE0:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_add_from_cache_hbh_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_add_from_cache_hbh_node.index,
+ IP6_ADD_FROM_CACHE_HBH_ERROR_PROCESSED,
+ processed);
+ return frame->n_vectors;
+}
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
+{
+ .function = ip6_add_from_cache_hbh_node_fn,
+ .name = "ip6-add-from-cache-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_add_from_cache_hbh_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (ip6_add_from_cache_hbh_error_strings),
+ .error_strings = ip6_add_from_cache_hbh_error_strings,
+ /* See ip/lookup.h */
+ .n_next_nodes = IP6_IOAM_CACHE_INPUT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [IP6_IOAM_CACHE_INPUT_NEXT_##s] = n,
+ foreach_ip6_ioam_cache_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_from_cache_hbh_node,
+ ip6_add_from_cache_hbh_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
new file mode 100644
index 00000000..79ee58ec
--- /dev/null
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_cache_tunnel_select_node.c
+ * This file implements anycast server selection using ioam data
+ * attached to anycast service selection.
+ * Anycast service is reachable via multiple servers reachable
+ * over SR tunnels.
+ * Works with TCP Anycast application.
+ * Cache entry is created when TCP SYN is received for anycast destination.
+ * Response TCP SYN ACKs for anycast service is compared and selected
+ * response is forwarded.
+ * The functionality is introduced via graph nodes that are hooked into
+ * vnet graph via classifier configs like below:
+ *
+ * Enable anycast service selection:
+ * set ioam ip6 sr-tunnel-select oneway
+ *
+ * Enable following classifier on the anycast service client facing interface
+ * e.g. anycast service is db06::06 then:
+ * classify session acl-hit-next ip6-node ip6-add-syn-hop-by-hop table-index 0 match l3
+ * ip6 dst db06::06 ioam-encap anycast
+ *
+ * Enable following classifier on the interfaces facing the server of anycast service:
+ * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3
+ * ip6 src db06::06 ioam-decap anycast
+ *
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr.h>
+#include <ioam/ip6/ioam_cache.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} cache_ts_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_cache_ts_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ cache_ts_trace_t *t = va_arg (*args, cache_ts_trace_t *);
+
+ s = format (s, "CACHE: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+#define foreach_cache_ts_error \
+_(RECORDED, "ip6 iOAM headers cached")
+
+typedef enum
+{
+#define _(sym,str) CACHE_TS_ERROR_##sym,
+ foreach_cache_ts_error
+#undef _
+ CACHE_TS_N_ERROR,
+} cache_ts_error_t;
+
+static char *cache_ts_error_strings[] = {
+#define _(sym,string) string,
+ foreach_cache_ts_error
+#undef _
+};
+
+typedef enum
+{
+ IOAM_CACHE_TS_NEXT_POP_HBYH,
+ IOAM_CACHE_TS_ERROR_NEXT_DROP,
+ IOAM_CACHE_TS_N_NEXT,
+} cache_ts_next_t;
+
+static uword
+ip6_ioam_cache_ts_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ u32 n_left_from, *from, *to_next;
+ cache_ts_next_t next_index;
+ u32 recorded = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ // TODO: dual loop
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *p0;
+ u32 next0 = IOAM_CACHE_TS_NEXT_POP_HBYH;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0, *hbh_cmp;
+ tcp_header_t *tcp0;
+ u32 tcp_offset0;
+ u32 cache_ts_index = 0;
+ u8 cache_thread_id = 0;
+ int result = 0;
+ int skip = 0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ if (IP_PROTOCOL_TCP ==
+ ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+ {
+ tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+ if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK)
+ {
+ /* Look up and compare */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ if (0 == ioam_cache_ts_lookup (ip0,
+ hbh0->protocol,
+ clib_net_to_host_u16
+ (tcp0->src_port),
+ clib_net_to_host_u16
+ (tcp0->dst_port),
+ clib_net_to_host_u32
+ (tcp0->ack_number), &hbh_cmp,
+ &cache_ts_index,
+ &cache_thread_id, 1))
+ {
+ /* response seen */
+ result = -1;
+ if (hbh_cmp)
+ result =
+ ip6_ioam_analyse_compare_path_delay (hbh0, hbh_cmp,
+ cm->criteria_oneway);
+ if (result >= 0)
+ {
+ /* current syn/ack is worse than the earlier: Drop */
+ next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+ /* Check if all responses are received or time has exceeded
+ send cached response if yes */
+ ioam_cache_ts_check_and_send (cache_thread_id,
+ cache_ts_index);
+ }
+ else
+ {
+ /* Update cache with this buffer */
+ /* If successfully updated then skip sending it */
+ if (0 ==
+ (result =
+ ioam_cache_ts_update (cache_thread_id,
+ cache_ts_index, bi0,
+ hbh0)))
+ {
+ skip = 1;
+ }
+ else
+ next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+ }
+ }
+ else
+ {
+ next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+ }
+ }
+ else if ((tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST)
+ {
+ /* Look up and compare */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ if (0 == ioam_cache_ts_lookup (ip0, hbh0->protocol, clib_net_to_host_u16 (tcp0->src_port), clib_net_to_host_u16 (tcp0->dst_port), clib_net_to_host_u32 (tcp0->ack_number), &hbh_cmp, &cache_ts_index, &cache_thread_id, 1)) //response seen
+ {
+ next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+ if (hbh_cmp)
+ ioam_cache_ts_check_and_send (cache_thread_id,
+ cache_ts_index);
+ }
+
+ }
+ }
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (p0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ cache_ts_trace_t *t =
+ vlib_add_trace (vm, node, p0, sizeof (*t));
+ t->flow_label =
+ clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label);
+ t->next_index = next0;
+ }
+ }
+ /* verify speculative enqueue, maybe switch current next frame */
+ if (!skip)
+ {
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, ioam_cache_ts_node.index,
+ CACHE_TS_ERROR_RECORDED, recorded);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 iOAM header cache
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_ts_node) =
+{
+ .function = ip6_ioam_cache_ts_node_fn,
+ .name = "ip6-ioam-tunnel-select",
+ .vector_size = sizeof (u32),
+ .format_trace = format_cache_ts_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (cache_ts_error_strings),
+ .error_strings = cache_ts_error_strings,
+ .n_next_nodes = IOAM_CACHE_TS_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [IOAM_CACHE_TS_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop",
+ [IOAM_CACHE_TS_ERROR_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 next_index;
+} ip6_reset_ts_hbh_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_reset_ts_hbh_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_reset_ts_hbh_trace_t *t = va_arg (*args,
+ ip6_reset_ts_hbh_trace_t *);
+
+ s =
+ format (s, "IP6_IOAM_RESET_TUNNEL_SELECT_HBH: next index %d",
+ t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_reset_ts_hbh_node;
+
+#define foreach_ip6_reset_ts_hbh_error \
+_(PROCESSED, "iOAM Syn/Ack Pkts processed") \
+_(SAVED, "iOAM Syn Pkts state saved") \
+_(REMOVED, "iOAM Syn/Ack Pkts state removed")
+
+typedef enum
+{
+#define _(sym,str) IP6_RESET_TS_HBH_ERROR_##sym,
+ foreach_ip6_reset_ts_hbh_error
+#undef _
+ IP6_RESET_TS_HBH_N_ERROR,
+} ip6_reset_ts_hbh_error_t;
+
+static char *ip6_reset_ts_hbh_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_reset_ts_hbh_error
+#undef _
+};
+
+#define foreach_ip6_ioam_cache_ts_input_next \
+ _(IP6_LOOKUP, "ip6-lookup") \
+ _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_IOAM_CACHE_TS_INPUT_NEXT_##s,
+ foreach_ip6_ioam_cache_ts_input_next
+#undef _
+ IP6_IOAM_CACHE_TS_INPUT_N_NEXT,
+} ip6_ioam_cache_ts_input_next_t;
+
+
+static uword
+ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0, cache_ts_added = 0;
+ u64 now;
+ u8 *rewrite = cm->rewrite;
+ u32 rewrite_length = vec_len (rewrite);
+ ioam_e2e_cache_option_t *e2e = 0;
+ u8 no_of_responses = cm->wait_for_responses;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ now = vlib_time_now (vm);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ tcp_header_t *tcp0, *tcp1;
+ u32 tcp_offset0, tcp_offset1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1;
+ u16 new_l0, new_l1;
+ u32 pool_index0 = 0, pool_index1 = 0;
+
+ next0 = next1 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+
+ /* speculatively enqueue b0 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ if (IP_PROTOCOL_TCP !=
+ ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+ {
+ goto NEXT00;
+ }
+ tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+ if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp0->flags & TCP_FLAG_ACK) == 0)
+ {
+ if (no_of_responses > 0)
+ {
+ /* Create TS select entry */
+ if (0 == ioam_cache_ts_add (ip0,
+ clib_net_to_host_u16
+ (tcp0->src_port),
+ clib_net_to_host_u16
+ (tcp0->dst_port),
+ clib_net_to_host_u32
+ (tcp0->seq_number) + 1,
+ no_of_responses, now,
+ vm->thread_index, &pool_index0))
+ {
+ cache_ts_added++;
+ }
+ }
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ e2e =
+ (ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
+ cm->rewrite_pool_index_offset);
+ e2e->pool_id = (u8) vm->thread_index;
+ e2e->pool_index = pool_index0;
+ ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+ ((u8 *) e2e +
+ sizeof (ioam_e2e_cache_option_t)),
+ &cm->sr_localsid_ts);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ processed++;
+ }
+
+ NEXT00:
+ if (IP_PROTOCOL_TCP !=
+ ip6_locate_header (b1, ip1, IP_PROTOCOL_TCP, &tcp_offset1))
+ {
+ goto TRACE00;
+ }
+ tcp1 = (tcp_header_t *) ((u8 *) ip1 + tcp_offset1);
+ if ((tcp1->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp1->flags & TCP_FLAG_ACK) == 0)
+ {
+ if (no_of_responses > 0)
+ {
+ /* Create TS select entry */
+ if (0 == ioam_cache_ts_add (ip1,
+ clib_net_to_host_u16
+ (tcp1->src_port),
+ clib_net_to_host_u16
+ (tcp1->dst_port),
+ clib_net_to_host_u32
+ (tcp1->seq_number) + 1,
+ no_of_responses, now,
+ vm->thread_index, &pool_index1))
+ {
+ cache_ts_added++;
+ }
+ }
+
+ copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length);
+ copy_src1 = (u64 *) ip1;
+
+ copy_dst1[0] = copy_src1[0];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[4] = copy_src1[4];
+
+ vlib_buffer_advance (b1, -(word) rewrite_length);
+ ip1 = vlib_buffer_get_current (b1);
+
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh1, rewrite, rewrite_length);
+ e2e =
+ (ioam_e2e_cache_option_t *) ((u8 *) hbh1 +
+ cm->rewrite_pool_index_offset);
+ e2e->pool_id = (u8) vm->thread_index;
+ e2e->pool_index = pool_index1;
+ ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+ ((u8 *) e2e +
+ sizeof (ioam_e2e_cache_option_t)),
+ &cm->sr_localsid_ts);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh1->protocol = ip1->protocol;
+ ip1->protocol = 0;
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+ processed++;
+ }
+
+ TRACE00:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_reset_ts_hbh_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_reset_ts_hbh_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ tcp_header_t *tcp0;
+ u32 tcp_offset0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_src0, *copy_dst0;
+ u16 new_l0;
+ u32 pool_index0 = 0;
+
+ next0 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP;
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ if (IP_PROTOCOL_TCP !=
+ ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+ {
+ goto TRACE0;
+ }
+ tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+ if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+ (tcp0->flags & TCP_FLAG_ACK) == 0)
+ {
+ if (no_of_responses > 0)
+ {
+ /* Create TS select entry */
+ if (0 == ioam_cache_ts_add (ip0,
+ clib_net_to_host_u16
+ (tcp0->src_port),
+ clib_net_to_host_u16
+ (tcp0->dst_port),
+ clib_net_to_host_u32
+ (tcp0->seq_number) + 1,
+ no_of_responses, now,
+ vm->thread_index, &pool_index0))
+ {
+ cache_ts_added++;
+ }
+ }
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ e2e =
+ (ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
+ cm->rewrite_pool_index_offset);
+ e2e->pool_id = (u8) vm->thread_index;
+ e2e->pool_index = pool_index0;
+ ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+ ((u8 *) e2e +
+ sizeof (ioam_e2e_cache_option_t)),
+ &cm->sr_localsid_ts);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ processed++;
+ }
+ TRACE0:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_reset_ts_hbh_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index,
+ IP6_RESET_TS_HBH_ERROR_PROCESSED, processed);
+ vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index,
+ IP6_RESET_TS_HBH_ERROR_SAVED, cache_ts_added);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
+{
+ .function = ip6_reset_ts_hbh_node_fn,
+ .name = "ip6-add-syn-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_reset_ts_hbh_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (ip6_reset_ts_hbh_error_strings),
+ .error_strings = ip6_reset_ts_hbh_error_strings,
+ /* See ip/lookup.h */
+ .n_next_nodes = IP6_IOAM_CACHE_TS_INPUT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [IP6_IOAM_CACHE_TS_INPUT_NEXT_##s] = n,
+ foreach_ip6_ioam_cache_ts_input_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_reset_ts_hbh_node, ip6_reset_ts_hbh_node_fn)
+/* *INDENT-ON* */
+
+vlib_node_registration_t ioam_cache_ts_timer_tick_node;
+
+typedef struct
+{
+ u32 thread_index;
+} ioam_cache_ts_timer_tick_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ioam_cache_ts_timer_tick_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ioam_cache_ts_timer_tick_trace_t *t =
+ va_arg (*args, ioam_cache_ts_timer_tick_trace_t *);
+
+ s = format (s, "IOAM_CACHE_TS_TIMER_TICK: thread index %d",
+ t->thread_index);
+ return s;
+}
+
+#define foreach_ioam_cache_ts_timer_tick_error \
+ _(TIMER, "Timer events")
+
+typedef enum
+{
+#define _(sym,str) IOAM_CACHE_TS_TIMER_TICK_ERROR_##sym,
+ foreach_ioam_cache_ts_timer_tick_error
+#undef _
+ IOAM_CACHE_TS_TIMER_TICK_N_ERROR,
+} ioam_cache_ts_timer_tick_error_t;
+
+static char *ioam_cache_ts_timer_tick_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ioam_cache_ts_timer_tick_error
+#undef _
+};
+
+void
+ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable)
+{
+ vlib_node_set_state (vm, ioam_cache_ts_timer_tick_node.index,
+ enable ==
+ 0 ? VLIB_NODE_STATE_DISABLED :
+ VLIB_NODE_STATE_POLLING);
+}
+
+void
+expired_cache_ts_timer_callback (u32 * expired_timers)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ int i;
+ u32 pool_index;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 count = 0;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ /* Get pool index and pool id */
+ pool_index = expired_timers[i] & 0x0FFFFFFF;
+
+ /* Handle expiration */
+ ioam_cache_ts_send (thread_index, pool_index);
+ count++;
+ }
+ vlib_node_increment_counter (cm->vlib_main,
+ ioam_cache_ts_timer_tick_node.index,
+ IOAM_CACHE_TS_TIMER_TICK_ERROR_TIMER, count);
+}
+
+static uword
+ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ ioam_cache_main_t *cm = &ioam_cache_main;
+ u32 my_thread_index = vlib_get_thread_index ();
+ struct timespec ts, tsrem;
+
+ tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index],
+ vlib_time_now (vm));
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1000 * 1000 * IOAM_CACHE_TS_TICK;
+ while (nanosleep (&ts, &tsrem) < 0)
+ {
+ ts = tsrem;
+ }
+
+ return 0;
+}
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
+ .function = ioam_cache_ts_timer_tick_node_fn,
+ .name = "ioam-cache-ts-timer-tick",
+ .format_trace = format_ioam_cache_ts_timer_tick_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+
+ .n_errors = ARRAY_LEN(ioam_cache_ts_timer_tick_error_strings),
+ .error_strings = ioam_cache_ts_timer_tick_error_strings,
+
+ .n_next_nodes = 1,
+
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.c b/src/plugins/ioam/ipfixcollector/ipfixcollector.c
new file mode 100644
index 00000000..71b934ec
--- /dev/null
+++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/udp/udp.h>
+#include <ioam/ipfixcollector/ipfixcollector.h>
+
+ipfix_collector_main_t ipfix_collector_main;
+
+/**
+ * @brief IP-FIX SetID registration function.
+ *
+ * This function can be used by other VPP graph nodes to receive IP-FIX packets
+ * with a particular setid.
+ *
+ * @param vlib_main_t Vlib main of the graph node which is interseted in
+ * getting IP-Fix packet.
+ * @param ipfix_client_add_del_t Structure describing the client node which
+ * is interested in getting the IP-Fix packets for
+ * a SetID.
+ *
+ * @returns 0 on success.
+ * @returns Error codes(<0) otherwise.
+ */
+int
+ipfix_collector_reg_setid (vlib_main_t * vm, ipfix_client_add_del_t * info)
+{
+ ipfix_collector_main_t *cm = &ipfix_collector_main;
+ uword *p = NULL;
+ int i;
+ ipfix_client *client = 0;
+
+ if ((!info) || (!info->client_name))
+ return IPFIX_COLLECTOR_ERR_INVALID_PARAM;
+
+ p = hash_get (cm->client_reg_table, info->ipfix_setid);
+ client = p ? pool_elt_at_index (cm->client_reg_pool, (*p)) : NULL;
+
+ if (info->del)
+ {
+ if (!client)
+ return 0; //There is no registered handler, so send success
+
+ hash_unset (cm->client_reg_table, info->ipfix_setid);
+ vec_free (client->client_name);
+ pool_put (cm->client_reg_pool, client);
+ return 0;
+ }
+
+ if (client)
+ return IPFIX_COLLECTOR_ERR_REG_EXISTS;
+
+ pool_get (cm->client_reg_pool, client);
+ i = client - cm->client_reg_pool;
+ client->client_name = vec_dup (info->client_name);
+ client->client_node = info->client_node;
+ client->client_next_node = vlib_node_add_next (vm,
+ ipfix_collector_node.index,
+ client->client_node);
+ client->set_id = info->ipfix_setid;
+
+ hash_set (cm->client_reg_table, info->ipfix_setid, i);
+ return 0;
+}
+
+static clib_error_t *
+ipfix_collector_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+ ipfix_collector_main_t *cm = &ipfix_collector_main;
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+
+ cm->client_reg_pool = NULL;
+ cm->client_reg_table = hash_create (0, sizeof (uword));
+
+ udp_register_dst_port (vm,
+ UDP_DST_PORT_ipfix,
+ ipfix_collector_node.index, 1 /* is_ip4 */ );
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ipfix_collector_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.h b/src/plugins/ioam/ipfixcollector/ipfixcollector.h
new file mode 100644
index 00000000..ee570316
--- /dev/null
+++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IPFIXCOLLECTOR_PLUGIN_IPFIXCOLLECTOR_IPFIXCOLLECTOR_H_
+#define PLUGINS_IPFIXCOLLECTOR_PLUGIN_IPFIXCOLLECTOR_IPFIXCOLLECTOR_H_
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#define IPFIX_COLLECTOR_CLIENT_NAME_MAX 64
+
+#define IPFIX_COLLECTOR_ERR_INVALID_PARAM -1
+#define IPFIX_COLLECTOR_ERR_REG_EXISTS -2
+
+/** @brief Structure other nodes to use for registering with IP-FIX collector.
+*/
+typedef struct
+{
+ /** String containing name of the client interested in getting
+ ip-fix packets. */
+ u8 *client_name;
+
+ /** Node index where packets have to be redirected. */
+ u32 client_node;
+
+ /** Setid of IPFix for which client is intereseted in getting packets. */
+ u16 ipfix_setid;
+
+ /** Add(0) or del(1) operation. */
+ u16 del;
+} ipfix_client_add_del_t;
+
+/** @brief IP-FIX collector internal client structure to store SetID to
+ client node ID.
+*/
+typedef struct
+{
+ /** String containing name of the client interested in getting
+ ip-fix packets. */
+ u8 *client_name;
+
+ /** Node index where packets have to be redirected. */
+ u32 client_node;
+
+ /** ipfix-collector next index where packets have to be redirected. */
+ u32 client_next_node;
+
+ /** Setid of IPFix for which client is intereseted in getting packets. */
+ u16 set_id;
+} ipfix_client;
+
+/** @brief IP-FIX collector main structure to SetID to client node ID mapping.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** Hash table to map IP-FIX setid to a client registration pool. SetId is
+ key to hash map. */
+ uword *client_reg_table;
+
+ /** Pool of Client node information for the IP-FIX SetID. */
+ ipfix_client *client_reg_pool;
+
+ /** Pointer to VLib main for the node - ipfix-collector. */
+ vlib_main_t *vlib_main;
+
+ /** Pointer to vnet main for convenience. */
+ vnet_main_t *vnet_main;
+} ipfix_collector_main_t;
+
+extern vlib_node_registration_t ipfix_collector_node;
+
+extern ipfix_collector_main_t ipfix_collector_main;
+
+/**
+ * @brief IP-FIX SetID registration function.
+ *
+ * This function can be used by other VPP graph nodes to receive IP-FIX packets
+ * with a particular setid.
+ *
+ * @param vlib_main_t Vlib main of the graph node which is interseted in
+ * getting IP-Fix packet.
+ * @param ipfix_client_add_del_t Structure describing the client node which
+ * is interested in getting the IP-Fix packets for
+ * a SetID.
+ *
+ * @returns 0 on success.
+ * @returns Error codes(<0) otherwise.
+ */
+int
+ipfix_collector_reg_setid (vlib_main_t * vm, ipfix_client_add_del_t * info);
+
+always_inline ipfix_client *
+ipfix_collector_get_client (u16 set_id)
+{
+ ipfix_collector_main_t *cm = &ipfix_collector_main;
+ uword *p;
+
+ p = hash_get (cm->client_reg_table, set_id);
+ return (p ? pool_elt_at_index (cm->client_reg_pool, (*p)) : NULL);
+}
+
+#endif /* PLUGINS_IPFIXCOLLECTOR_PLUGIN_IPFIXCOLLECTOR_IPFIXCOLLECTOR_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ipfixcollector/node.c b/src/plugins/ioam/ipfixcollector/node.c
new file mode 100644
index 00000000..fce997ae
--- /dev/null
+++ b/src/plugins/ioam/ipfixcollector/node.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <ioam/ipfixcollector/ipfixcollector.h>
+#include <vnet/flow/ipfix_packet.h>
+
+#define foreach_ipfix_collector_error \
+_(PROCESSED, "Number of IP-Fix packets processed") \
+_(NO_LISTENER, "Number of IP-Fix packets with no listener")
+
+typedef enum
+{
+#define _(sym,str) IPFIX_COLLECTOR_ERROR_##sym,
+ foreach_ipfix_collector_error
+#undef _
+ IPFIX_COLLECTOR_N_ERROR,
+} flowperpkt_error_t;
+
+static char *ipfix_collector_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ipfix_collector_error
+#undef _
+};
+
+typedef enum
+{
+ IPFIX_COLLECTOR_NEXT_DROP,
+ IPFIX_COLLECTOR_N_NEXT,
+} ipfix_collector_next_t;
+
+typedef struct
+{
+ u32 next_node;
+ u16 set_id;
+ u16 pad;
+} ipfix_collector_trace_t;
+
+vlib_node_registration_t ipfix_collector_node;
+
+/* packet trace format function */
+static u8 *
+format_ipfix_collector_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipfix_collector_trace_t *t = va_arg (*args, ipfix_collector_trace_t *);
+
+ s = format (s,
+ "IPFIX_COLLECTOR: set_id %u, next_node %u", t->set_id,
+ t->next_node);
+ return s;
+}
+
+/**
+ * @brief Node to receive IP-Fix packets.
+ * @node ipfix-collector
+ *
+ * This function receives IP-FIX packets and forwards them to other graph nodes
+ * based on SetID field in IP-FIX.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer, next index usage
+ *
+ * <em>Uses:</em>
+ * - <code>vlib_buffer_get_current(p0)</code>
+ * - Parses IP-Fix packet to extract SetId which will be used to decide
+ * next node where packets should be enqueued.
+ *
+ * <em>Next Index:</em>
+ * - Dispatches the packet to other VPP graph nodes based on their registartion
+ * for the IP-Fix SetId using API ipfix_collector_reg_setid().
+ */
+uword
+ipfix_collector_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ word n_no_listener = 0;
+ word n_listener = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ipfix_message_header_t *ipfix0, *ipfix1;
+ ipfix_set_header_t *set0, *set1;
+ u16 set_id0, set_id1;
+ ipfix_client *client0, *client1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data,
+ (sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t)), LOAD);
+ CLIB_PREFETCH (p3->data,
+ (sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t)), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ipfix0 = vlib_buffer_get_current (b0);
+ ipfix1 = vlib_buffer_get_current (b1);
+
+ set0 = (ipfix_set_header_t *) (ipfix0 + 1);
+ set1 = (ipfix_set_header_t *) (ipfix1 + 1);
+
+ set_id0 = (u16) (clib_net_to_host_u32 (set0->set_id_length) >> 16);
+ set_id1 = (u16) (clib_net_to_host_u32 (set1->set_id_length) >> 16);
+
+ client0 = ipfix_collector_get_client (set_id0);
+ client1 = ipfix_collector_get_client (set_id1);
+
+ if (PREDICT_TRUE (NULL != client0))
+ {
+ next0 = client0->client_next_node;
+ n_listener++;
+ }
+ else
+ {
+ next0 = IPFIX_COLLECTOR_NEXT_DROP;
+ n_no_listener++;
+ }
+
+ if (PREDICT_TRUE (NULL != client1))
+ {
+ next1 = client1->client_next_node;
+ n_listener++;
+ }
+ else
+ {
+ next1 = IPFIX_COLLECTOR_NEXT_DROP;
+ n_no_listener++;
+ }
+
+ vlib_buffer_advance (b0,
+ (sizeof (ipfix_message_header_t)
+ + sizeof (ipfix_set_header_t)));
+ vlib_buffer_advance (b1,
+ (sizeof (ipfix_message_header_t)
+ + sizeof (ipfix_set_header_t)));
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipfix_collector_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_node = (client0 ? client0->client_node : 0xFFFFFFFF);
+ tr->set_id = set_id0;
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipfix_collector_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->next_node = (client1 ? client1->client_node : 0xFFFFFFFF);
+ tr->set_id = set_id1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ipfix_message_header_t *ipfix0;
+ ipfix_set_header_t *set0;
+ u16 set_id0;
+ ipfix_client *client0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ipfix0 = vlib_buffer_get_current (b0);
+
+ set0 = (ipfix_set_header_t *) (ipfix0 + 1);
+
+ set_id0 = (u16) (clib_net_to_host_u32 (set0->set_id_length) >> 16);
+
+ client0 = ipfix_collector_get_client (set_id0);
+
+ if (PREDICT_TRUE (NULL != client0))
+ {
+ next0 = client0->client_next_node;
+ n_listener++;
+ }
+ else
+ {
+ next0 = IPFIX_COLLECTOR_NEXT_DROP;
+ n_no_listener++;
+ }
+
+ vlib_buffer_advance (b0,
+ (sizeof (ipfix_message_header_t)
+ + sizeof (ipfix_set_header_t)));
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipfix_collector_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_node = (client0 ? client0->client_node : 0xFFFFFFFF);
+ tr->set_id = set_id0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_error_count (vm, node->node_index,
+ IPFIX_COLLECTOR_ERROR_NO_LISTENER, n_no_listener);
+ vlib_error_count (vm, node->node_index,
+ IPFIX_COLLECTOR_ERROR_PROCESSED, n_listener);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipfix_collector_node) = {
+ .function = ipfix_collector_node_fn,
+ .name = "ipfix-collector",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipfix_collector_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipfix_collector_error_strings),
+ .error_strings = ipfix_collector_error_strings,
+
+ .n_next_nodes = IPFIX_COLLECTOR_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [IPFIX_COLLECTOR_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-e2e/e2e_util.h b/src/plugins/ioam/lib-e2e/e2e_util.h
new file mode 100644
index 00000000..f8a4ebd4
--- /dev/null
+++ b/src/plugins/ioam/lib-e2e/e2e_util.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_E2E_UTIL_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_E2E_UTIL_H_
+
+#include <ioam/lib-e2e/ioam_seqno_lib.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ u8 e2e_type;
+ u8 reserved;
+ u32 e2e_data;
+}) ioam_e2e_packet_t;
+/* *INDENT-ON* */
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_E2E_UTIL_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-e2e/ioam_seqno_lib.c b/src/plugins/ioam/lib-e2e/ioam_seqno_lib.c
new file mode 100644
index 00000000..bf78c1e3
--- /dev/null
+++ b/src/plugins/ioam/lib-e2e/ioam_seqno_lib.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <ioam/lib-e2e/ioam_seqno_lib.h>
+
+u8 *
+show_ioam_seqno_cmd_fn (u8 * s, ioam_seqno_data * seqno_data, u8 enc)
+{
+ seqno_rx_info *rx;
+
+ s = format (s, "SeqNo Data:\n");
+ if (enc)
+ {
+ s = format (s, " Current Seq. Number : %llu\n", seqno_data->seq_num);
+ }
+ else
+ {
+ rx = &seqno_data->seqno_rx;
+ s = show_ioam_seqno_analyse_data_fn (s, rx);
+ }
+
+ format (s, "\n");
+ return s;
+}
+
+u8 *
+show_ioam_seqno_analyse_data_fn (u8 * s, seqno_rx_info * rx)
+{
+ s = format (s, " Highest Seq. Number : %llu\n", rx->bitmap.highest);
+ s = format (s, " Packets received : %llu\n", rx->rx_packets);
+ s = format (s, " Lost packets : %llu\n", rx->lost_packets);
+ s = format (s, " Reordered packets : %llu\n", rx->reordered_packets);
+ s = format (s, " Duplicate packets : %llu\n", rx->dup_packets);
+
+ format (s, "\n");
+ return s;
+}
+
+void
+ioam_seqno_init_data (ioam_seqno_data * data)
+{
+ data->seq_num = 0;
+ ioam_seqno_init_rx_info (&data->seqno_rx);
+ return;
+}
+
+void
+ioam_seqno_init_rx_info (seqno_rx_info * data)
+{
+ seqno_bitmap *bitmap = &data->bitmap;
+ bitmap->window_size = SEQNO_WINDOW_SIZE;
+ bitmap->array_size = SEQNO_WINDOW_ARRAY_SIZE;
+ bitmap->mask = 32 * SEQNO_WINDOW_ARRAY_SIZE - 1;
+ bitmap->array[0] = 0x00000000; /* pretend we haven seen sequence numbers 0 */
+ bitmap->highest = 0;
+
+ data->dup_packets = 0;
+ data->lost_packets = 0;
+ data->reordered_packets = 0;
+ data->rx_packets = 0;
+ return;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-e2e/ioam_seqno_lib.h b/src/plugins/ioam/lib-e2e/ioam_seqno_lib.h
new file mode 100644
index 00000000..6bd38ff2
--- /dev/null
+++ b/src/plugins/ioam/lib-e2e/ioam_seqno_lib.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_IOAM_SEQNO_LIB_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_IOAM_SEQNO_LIB_H_
+
+#include <vppinfra/types.h>
+
+#define SEQ_CHECK_VALUE 0x80000000 /* for seq number wraparound detection */
+
+#define SEQNO_WINDOW_SIZE 2048
+#define SEQNO_WINDOW_ARRAY_SIZE 64
+
+typedef struct seqno_bitmap_
+{
+ u32 window_size;
+ u32 array_size;
+ u32 mask;
+ u32 pad;
+ u64 highest;
+ u64 array[SEQNO_WINDOW_ARRAY_SIZE]; /* Will be alloc to array_size */
+} seqno_bitmap;
+
+typedef struct seqno_rx_info_
+{
+ u64 rx_packets;
+ u64 lost_packets;
+ u64 reordered_packets;
+ u64 dup_packets;
+ seqno_bitmap bitmap;
+} seqno_rx_info;
+
+/* This structure is 64-byte aligned */
+typedef struct ioam_seqno_data_
+{
+ union
+ {
+ u32 seq_num; /* Useful only for encap node */
+ seqno_rx_info seqno_rx;
+ };
+} ioam_seqno_data;
+
+static inline void
+BIT_SET (u64 * p, u32 n)
+{
+ p[n >> 5] |= (1 << (n & 31));
+}
+
+static inline int
+BIT_TEST (u64 * p, u32 n)
+{
+ return p[n >> 5] & (1 << (n & 31));
+}
+
+static void
+BIT_CLEAR (u64 * p, u64 start, int num_bits, u32 mask)
+{
+ int n, t;
+ int start_index = (start >> 5);
+ int mask_index = (mask >> 5);
+
+ start_index &= mask_index;
+ if (start & 0x1f)
+ {
+ int start_bit = (start & 0x1f);
+
+ n = (1 << start_bit) - 1;
+ t = start_bit + num_bits;
+ if (t < 32)
+ {
+ n |= ~((1 << t) - 1);
+ p[start_index] &= n;
+ return;
+ }
+ p[start_index] &= n;
+ start_index = (start_index + 1) & mask_index;
+ num_bits -= (32 - start_bit);
+ }
+ while (num_bits >= 32)
+ {
+ p[start_index] = 0;
+ start_index = (start_index + 1) & mask_index;
+ num_bits -= 32;
+ }
+ n = ~((1 << num_bits) - 1);
+ p[start_index] &= n;
+}
+
+static inline u8
+seqno_check_wraparound (u32 a, u32 b)
+{
+ if ((a != b) && (a > b) && ((a - b) > SEQ_CHECK_VALUE))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Function to analyze the PPC value recevied.
+ * - Updates the bitmap with received sequence number
+ * - counts the received/lost/duplicate/reordered packets
+ */
+inline static void
+ioam_analyze_seqno (seqno_rx_info * seqno_rx, u64 seqno)
+{
+ int diff;
+ static int peer_dead_count;
+ seqno_bitmap *bitmap = &seqno_rx->bitmap;
+
+ seqno_rx->rx_packets++;
+
+ if (seqno > bitmap->highest)
+ { /* new larger sequence number */
+ peer_dead_count = 0;
+ diff = seqno - bitmap->highest;
+ if (diff < bitmap->window_size)
+ {
+ if (diff > 1)
+ { /* diff==1 is *such* a common case it's a win to optimize it */
+ BIT_CLEAR (bitmap->array, bitmap->highest + 1, diff - 1,
+ bitmap->mask);
+ seqno_rx->lost_packets += diff - 1;
+ }
+ }
+ else
+ {
+ seqno_rx->lost_packets += diff - 1;
+ memset (bitmap->array, 0, bitmap->array_size * sizeof (u64));
+ }
+ BIT_SET (bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ return;
+ }
+
+ /* we've seen a bigger seq number before */
+ diff = bitmap->highest - seqno;
+ if (diff >= bitmap->window_size)
+ {
+ if (seqno_check_wraparound (bitmap->highest, seqno))
+ {
+ memset (bitmap->array, 0, bitmap->array_size * sizeof (u64));
+ BIT_SET (bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ return;
+ }
+ else
+ {
+ peer_dead_count++;
+ if (peer_dead_count > 25)
+ {
+ peer_dead_count = 0;
+ memset (bitmap->array, 0, bitmap->array_size * sizeof (u64));
+ BIT_SET (bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ }
+ //ppc_rx->reordered_packets++;
+ }
+ return;
+ }
+
+ if (BIT_TEST (bitmap->array, seqno & bitmap->mask))
+ {
+ seqno_rx->dup_packets++;
+ return; /* Already seen */
+ }
+ seqno_rx->reordered_packets++;
+ seqno_rx->lost_packets--;
+ BIT_SET (bitmap->array, seqno & bitmap->mask);
+ return;
+}
+
+u8 *show_ioam_seqno_analyse_data_fn (u8 * s, seqno_rx_info * rx);
+
+u8 *show_ioam_seqno_cmd_fn (u8 * s, ioam_seqno_data * seqno_data, u8 enc);
+
+void ioam_seqno_init_data (ioam_seqno_data * data);
+
+void ioam_seqno_init_rx_info (seqno_rx_info * data);
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_IOAM_SEQNO_LIB_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-pot/math64.h b/src/plugins/ioam/lib-pot/math64.h
new file mode 100644
index 00000000..4c608a37
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/math64.h
@@ -0,0 +1,159 @@
+/*
+ * math64.h provides the 64 bit unsigned integer add, multiply followed by modulo operation
+ * The linux/math64.h provides divide and multiply 64 bit integers but:
+ * 1. multiply: mul_u64_u64_shr - only returns 64 bits of the result and has to be called
+ * twice to get the complete 128 bits of the result.
+ * 2. Modulo operation of the result of addition and multiplication of u64 that may result
+ * in integers > 64 bits is not supported
+ * Hence this header to combine add/multiply followed by modulo of u64 integrers
+ * always resulting in u64.
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef include_vnet_math64_h
+#define include_vnet_math64_h
+#include <stdint.h>
+
+/*
+ * multiplies and returns result in hi and lo
+ */
+static inline void mul64by64(u64 a, u64 b, u64 * hi, u64 * lo)
+{
+ u64 a_lo = (u64) (uint32_t) a;
+ u64 a_hi = a >> 32;
+ u64 b_lo = (u64) (u32) b;
+ u64 b_hi = b >> 32;
+
+ u64 p0 = a_lo * b_lo;
+ u64 p1 = a_lo * b_hi;
+ u64 p2 = a_hi * b_lo;
+ u64 p3 = a_hi * b_hi;
+
+ u32 cy = (u32) (((p0 >> 32) + (u32) p1 + (u32) p2) >> 32);
+
+ *lo = p0 + (p1 << 32) + (p2 << 32);
+ *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy;
+ return;
+}
+
+#define TWO64 18446744073709551616.0
+
+static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
+{
+ u64 q1, q2, q;
+ u64 p1, p0;
+ double dq;
+
+ /* calculate quotient first pass 53 bits */
+ dq = (TWO64 * (double)x + (double)y) * di;
+
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+ else
+ q1 = dq;
+
+ /* q1 * m to compare the product to the dividend. */
+ mul64by64(q1, m, &p1, &p0);
+
+ /* Adjust quotient. is it > actual result: */
+ if (x < p1 || (x == p1 && y < p0))
+ {
+ /* q1 > quotient. calculate abs remainder */
+ x = p1 - (x + (p0 < y));
+ y = p0 - y;
+
+ /* use the remainder as new dividend to adjust quotient */
+ q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
+ mul64by64(q2, m, &p1, &p0);
+
+ q = q1 - q2;
+ if (x < p1 || (x == p1 && y <= p0))
+ {
+ y = p0 - y;
+ }
+ else
+ {
+ y = p0 - y;
+ y += m;
+ q--;
+ }
+ }
+ else
+ {
+ x = x - (p1 + (y < p0));
+ y = y - p0;
+
+ q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
+ mul64by64(q2, m, &p1, &p0);
+
+ q = q1 + q2;
+ if (x < p1 || (x == p1 && y < p0))
+ {
+ y = y - p0;
+ y += m;
+ q--;
+ }
+ else
+ {
+ y = y - p0;
+ if (y >= m)
+ {
+ y -= m;
+ q++;
+ }
+ }
+ }
+
+ return y;
+}
+
+/*
+ * returns a % p
+ */
+static inline u64 mod64by64(u64 a, u64 p, u64 primeinv)
+{
+ return (mod128by64(0, a, p, primeinv));
+}
+
+static inline void add64(u64 a, u64 b, u64 * whi, u64 * wlo)
+{
+ *wlo = a + b;
+ if (*wlo < a)
+ *whi = 1;
+
+}
+
+/*
+ * returns (a + b)%p
+ */
+static inline u64 add64_mod(u64 a, u64 b, u64 p, double pi)
+{
+ u64 shi = 0, slo = 0;
+
+ add64(a, b, &shi, &slo);
+ return (mod128by64(shi, slo, p, pi));
+}
+
+/*
+ * returns (ab) % p
+ */
+static inline u64 mul64_mod(u64 a, u64 b, u64 p, double pi)
+{
+ u64 phi = 0, plo = 0;
+
+ mul64by64(a, b, &phi, &plo);
+ return (mod128by64(phi, plo, p, pi));
+}
+
+#endif
diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api
new file mode 100644
index 00000000..c377cde0
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot.api
@@ -0,0 +1,105 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief Proof of Transit(POT): Set POT profile
+ @param id - id of the profile
+ @param validator - True/False to indicate if this is verifier
+ @param secret_key - Verification key
+ @param secret_share - Share of the 1st polynomial
+ @param prime - Prime number used for modulo operation
+ @param max_bits - Max bits to be used for Random number generation
+ @param lpc - Lagrange basis polynomial
+ @param polynomial_public - pre-evaluated public polynomial
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+autoreply define pot_profile_add {
+ u32 client_index;
+ u32 context;
+ u8 id;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u8 max_bits;
+ u64 lpc;
+ u64 polynomial_public;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Proof of Transit(POT): Activate POT profile in the list
+ @param id - id of the profile
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+autoreply define pot_profile_activate {
+ u32 client_index;
+ u32 context;
+ u8 id;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Delete POT Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param list_name_len - length of the name of the profile list
+ @param list_name - name of profile list to delete
+*/
+autoreply define pot_profile_del {
+ u32 client_index;
+ u32 context;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Show POT Profiles
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - id of the profile
+*/
+define pot_profile_show_config_dump {
+ u32 client_index;
+ u32 context;
+ u8 id;
+};
+
+/** \brief Show POT profile reply
+ @param id - id of the profile
+ @param validator - True/False to indicate if this is verifier
+ @param secret_key - Verification key
+ @param secret_share - Share of the 1st polynomial
+ @param prime - Prime number used for modulo operation
+ @param max_bits - Max bits to be used for Random number generation
+ @param lpc - Lagrange basis polynomial
+ @param polynomial_public - pre-evaluated public polynomial
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+define pot_profile_show_config_details {
+ u32 context;
+ i32 retval;
+ u8 id;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u64 bit_mask;
+ u64 lpc;
+ u64 polynomial_public;
+};
diff --git a/src/plugins/ioam/lib-pot/pot_all_api_h.h b/src/plugins/ioam/lib-pot/pot_all_api_h.h
new file mode 100644
index 00000000..63967c45
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-pot/pot.api.h>
diff --git a/src/plugins/ioam/lib-pot/pot_api.c b/src/plugins/ioam/lib-pot/pot_api.c
new file mode 100644
index 00000000..cc1b7b76
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_api.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * pot_api.c - Proof of Transit related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-pot/pot_util.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-pot/pot_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this plugin understands */
+#define foreach_pot_plugin_api_msg \
+_(POT_PROFILE_ADD, pot_profile_add) \
+_(POT_PROFILE_ACTIVATE, pot_profile_activate) \
+_(POT_PROFILE_DEL, pot_profile_del) \
+_(POT_PROFILE_SHOW_CONFIG_DUMP, pot_profile_show_config_dump) \
+
+static void vl_api_pot_profile_add_t_handler
+(vl_api_pot_profile_add_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_add_reply_t * rmp;
+ u8 id;
+ pot_profile *profile = NULL;
+ u8 *name = 0;
+
+ if (mp->list_name_len)
+ name = format(0, "%s", mp->list_name);
+
+ pot_profile_list_init(name);
+ id = mp->id;
+ profile = pot_profile_find(id);
+ if (profile) {
+ rv = pot_profile_create(profile,
+ clib_net_to_host_u64(mp->prime),
+ clib_net_to_host_u64(mp->polynomial_public),
+ clib_net_to_host_u64(mp->lpc),
+ clib_net_to_host_u64(mp->secret_share));
+ if (rv != 0)
+ goto ERROROUT;
+ if (1 == mp->validator)
+ (void)pot_set_validator(profile, clib_net_to_host_u64(mp->secret_key));
+ (void)pot_profile_set_bit_mask(profile, mp->max_bits);
+ } else {
+ rv = -3;
+ }
+ ERROROUT:
+ vec_free(name);
+ REPLY_MACRO(VL_API_POT_PROFILE_ADD_REPLY);
+}
+
+static void send_pot_profile_details(vl_api_pot_profile_show_config_dump_t *mp, u8 id)
+{
+ vl_api_pot_profile_show_config_details_t * rmp;
+ pot_main_t * sm = &pot_main;
+ pot_profile *profile = pot_profile_find(id);
+ int rv = 0;
+ if(profile){
+ REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS,
+ rmp->id=id;
+ rmp->validator=profile->validator;
+ rmp->secret_key=clib_host_to_net_u64(profile->secret_key);
+ rmp->secret_share=clib_host_to_net_u64(profile->secret_share);
+ rmp->prime=clib_host_to_net_u64(profile->prime);
+ rmp->bit_mask=clib_host_to_net_u64(profile->bit_mask);
+ rmp->lpc=clib_host_to_net_u64(profile->lpc);
+ rmp->polynomial_public=clib_host_to_net_u64(profile->poly_pre_eval);
+ );
+ }
+ else{
+ REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS,
+ rmp->id=id;
+ rmp->validator=0;
+ rmp->secret_key=0;
+ rmp->secret_share=0;
+ rmp->prime=0;
+ rmp->bit_mask=0;
+ rmp->lpc=0;
+ rmp->polynomial_public=0;
+ );
+ }
+}
+
+static void vl_api_pot_profile_show_config_dump_t_handler
+(vl_api_pot_profile_show_config_dump_t *mp)
+{
+ u8 id = mp->id;
+ u8 dump_call_id = ~0;
+ if(dump_call_id==id){
+ for(id=0;id<MAX_POT_PROFILES;id++)
+ send_pot_profile_details(mp,id);
+ }
+ else
+ send_pot_profile_details(mp,id);
+}
+
+static void vl_api_pot_profile_activate_t_handler
+(vl_api_pot_profile_activate_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_add_reply_t * rmp;
+ u8 id;
+ u8 *name = NULL;
+
+ if (mp->list_name_len)
+ name = format(0, "%s", mp->list_name);
+ if (!pot_profile_list_is_enabled(name)) {
+ rv = -1;
+ } else {
+ id = mp->id;
+ rv = pot_profile_set_active(id);
+ }
+
+ vec_free(name);
+ REPLY_MACRO(VL_API_POT_PROFILE_ACTIVATE_REPLY);
+}
+
+
+static void vl_api_pot_profile_del_t_handler
+(vl_api_pot_profile_del_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_del_reply_t * rmp;
+
+ clear_pot_profiles();
+
+ REPLY_MACRO(VL_API_POT_PROFILE_DEL_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+pot_plugin_api_hookup (vlib_main_t *vm)
+{
+ pot_main_t * sm = &pot_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_pot_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (pot_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_pot;
+#undef _
+}
+
+static clib_error_t * pot_init (vlib_main_t * vm)
+{
+ pot_main_t * sm = &pot_main;
+ clib_error_t * error = 0;
+ u8 * name;
+
+ bzero(sm, sizeof(pot_main));
+ (void)pot_util_init();
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main();
+
+ name = format (0, "ioam_pot_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = pot_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free(name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pot_init);
diff --git a/src/plugins/ioam/lib-pot/pot_msg_enum.h b/src/plugins/ioam/lib-pot/pot_msg_enum.h
new file mode 100644
index 00000000..a4a88bed
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_pot_msg_enum_h
+#define included_pot_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-pot/pot_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_pot_msg_enum_h */
diff --git a/src/plugins/ioam/lib-pot/pot_test.c b/src/plugins/ioam/lib-pot/pot_test.c
new file mode 100644
index 00000000..1c6dd02d
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_test.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * pot_test.c - test harness for pot plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base pot_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <ioam/lib-pot/pot_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} pot_test_main_t;
+
+pot_test_main_t pot_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(pot_profile_add_reply) \
+_(pot_profile_activate_reply) \
+_(pot_profile_del_reply)
+
+#define foreach_custom_reply_retval_handler \
+_(pot_profile_show_config_details, \
+ errmsg(" ID:%d\n",mp->id); \
+ errmsg(" Validator:%d\n",mp->validator); \
+ errmsg(" secret_key:%Lx\n",clib_net_to_host_u64(mp->secret_key)); \
+ errmsg(" secret_share:%Lx\n",clib_net_to_host_u64(mp->secret_share)); \
+ errmsg(" prime:%Lx\n",clib_net_to_host_u64(mp->prime)); \
+ errmsg(" bitmask:%Lx\n",clib_net_to_host_u64(mp->bit_mask)); \
+ errmsg(" lpc:%Lx\n",clib_net_to_host_u64(mp->lpc)); \
+ errmsg(" public poly:%Lx\n",clib_net_to_host_u64(mp->polynomial_public)); \
+ )
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = pot_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n,body) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = pot_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ do{body;}while(0); \
+ }
+foreach_custom_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(POT_PROFILE_ADD_REPLY, pot_profile_add_reply) \
+_(POT_PROFILE_ACTIVATE_REPLY, pot_profile_activate_reply) \
+_(POT_PROFILE_DEL_REPLY, pot_profile_del_reply) \
+_(POT_PROFILE_SHOW_CONFIG_DETAILS, pot_profile_show_config_details)
+
+static int api_pot_profile_add (vat_main_t *vam)
+{
+#define MAX_BITS 64
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_add_t *mp;
+ u8 *name = NULL;
+ u64 prime = 0;
+ u64 secret_share = 0;
+ u64 secret_key = 0;
+ u32 bits = MAX_BITS;
+ u64 lpc = 0, poly2 = 0;
+ u8 id = 0;
+ int rv = 0;
+ int ret;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s", &name))
+ ;
+ else if(unformat(input, "id %d", &id))
+ ;
+ else if (unformat(input, "validator-key 0x%Lx", &secret_key))
+ ;
+ else if (unformat(input, "prime-number 0x%Lx", &prime))
+ ;
+ else if (unformat(input, "secret-share 0x%Lx", &secret_share))
+ ;
+ else if (unformat(input, "polynomial-public 0x%Lx", &poly2))
+ ;
+ else if (unformat(input, "lpc 0x%Lx", &lpc))
+ ;
+ else if (unformat(input, "bits-in-random %u", &bits))
+ {
+ if (bits > MAX_BITS)
+ bits = MAX_BITS;
+ }
+ else
+ break;
+ }
+
+ if (!name)
+ {
+ errmsg ("name required\n");
+ rv = -99;
+ goto OUT;
+ }
+
+ M2(POT_PROFILE_ADD, mp, vec_len(name));
+
+ mp->list_name_len = vec_len(name);
+ clib_memcpy(mp->list_name, name, mp->list_name_len);
+ mp->secret_share = clib_host_to_net_u64(secret_share);
+ mp->polynomial_public = clib_host_to_net_u64(poly2);
+ mp->lpc = clib_host_to_net_u64(lpc);
+ mp->prime = clib_host_to_net_u64(prime);
+ if (secret_key != 0)
+ {
+ mp->secret_key = clib_host_to_net_u64(secret_key);
+ mp->validator = 1;
+ }
+ else
+ {
+ mp->validator = 0;
+ }
+ mp->id = id;
+ mp->max_bits = bits;
+
+ S(mp);
+ W (ret);
+ return ret;
+
+OUT:
+ vec_free(name);
+ return(rv);
+}
+
+static int api_pot_profile_activate (vat_main_t *vam)
+{
+#define MAX_BITS 64
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_activate_t *mp;
+ u8 *name = NULL;
+ u8 id = 0;
+ int rv = 0;
+ int ret;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s", &name))
+ ;
+ else if(unformat(input, "id %d", &id))
+ ;
+ else
+ break;
+ }
+
+ if (!name)
+ {
+ errmsg ("name required\n");
+ rv = -99;
+ goto OUT;
+ }
+
+ M2(POT_PROFILE_ACTIVATE, mp, vec_len(name));
+
+ mp->list_name_len = vec_len(name);
+ clib_memcpy(mp->list_name, name, mp->list_name_len);
+ mp->id = id;
+
+ S(mp);
+ W (ret);
+ return ret;
+
+OUT:
+ vec_free(name);
+ return(rv);
+}
+
+
+static int api_pot_profile_del (vat_main_t *vam)
+{
+ vl_api_pot_profile_del_t *mp;
+ int ret;
+
+ M(POT_PROFILE_DEL, mp);
+ mp->list_name_len = 0;
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_pot_profile_show_config_dump (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_show_config_dump_t *mp;
+ u8 id = 0;
+ int ret;
+
+ while(unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if(unformat(input,"id %d",&id));
+ else
+ break;
+ }
+ M(POT_PROFILE_SHOW_CONFIG_DUMP, mp);
+
+ mp->id = id;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(pot_profile_add, "name <name> id [0-1] " \
+ "prime-number <0xu64> bits-in-random [0-64] " \
+ "secret-share <0xu64> lpc <0xu64> polynomial-public <0xu64> " \
+ "[validator-key <0xu64>] [validity <0xu64>]") \
+_(pot_profile_activate, "name <name> id [0-1] ") \
+_(pot_profile_del, "[id <nn>]") \
+_(pot_profile_show_config_dump, "id [0-1]")
+
+static void
+pot_vat_api_hookup (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_pot_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ pot_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/ioam/lib-pot/pot_util.c b/src/plugins/ioam/lib-pot/pot_util.c
new file mode 100644
index 00000000..a253ad41
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_util.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <vppinfra/mem.h>
+#include "math64.h"
+#include "pot_util.h"
+
+pot_main_t pot_main;
+
+static void pot_profile_cleanup(pot_profile *profile);
+
+static void pot_main_profiles_reset (void)
+{
+ pot_main_t *sm = &pot_main;
+ int i = 0;
+
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ pot_profile_cleanup(&(sm->profile_list[i]));
+ }
+ sm->active_profile_id = 0;
+ if (sm->profile_list_name)
+ vec_free(sm->profile_list_name);
+ sm->profile_list_name = NULL;
+}
+
+int pot_util_init (void)
+{
+ pot_main_profiles_reset();
+
+ return(0);
+}
+
+static void pot_profile_init(pot_profile * new, u8 id)
+{
+ if (new)
+ {
+ memset(new, 0, sizeof(pot_profile));
+ new->id = id;
+ }
+}
+
+pot_profile *pot_profile_find(u8 id)
+{
+ pot_main_t *sm = &pot_main;
+
+ if (id < MAX_POT_PROFILES)
+ {
+ return (&(sm->profile_list[id]));
+ }
+ return (NULL);
+}
+static int pot_profile_name_equal (u8 *name0, u8 *name1)
+{
+ int len0, len1;
+
+ len0 = vec_len (name0);
+ len1 = vec_len (name1);
+ if (len0 != len1)
+ return(0);
+ return (0==strncmp ((char *) name0, (char *)name1, len0));
+}
+
+int pot_profile_list_is_enabled (u8 *name)
+{
+ pot_main_t *sm = &pot_main;
+ return (pot_profile_name_equal(sm->profile_list_name, name));
+}
+
+void pot_profile_list_init(u8 * profile_list_name)
+{
+ pot_main_t *sm = &pot_main;
+ int i = 0;
+
+ /* If it is the same profile list skip reset */
+ if (pot_profile_name_equal(sm->profile_list_name, profile_list_name))
+ {
+ return;
+ }
+
+ pot_main_profiles_reset();
+ if (vec_len(profile_list_name))
+ sm->profile_list_name = (u8 *)vec_dup(profile_list_name);
+ else
+ sm->profile_list_name = 0;
+ sm->active_profile_id = 0;
+
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ pot_profile_init(&(sm->profile_list[i]), i);
+ }
+}
+
+static void pot_profile_cleanup(pot_profile * profile)
+{
+ u16 id = profile->id;
+
+ memset(profile, 0, sizeof(pot_profile));
+ profile->id = id; /* Restore id alone */
+}
+
+int pot_profile_create(pot_profile * profile, u64 prime,
+ u64 poly2, u64 lpc, u64 secret_share)
+{
+ if (profile && !profile->in_use)
+ {
+ pot_profile_cleanup(profile);
+ profile->prime = prime;
+ profile->primeinv = 1.0 / prime;
+ profile->lpc = lpc;
+ profile->poly_pre_eval = poly2;
+ profile->secret_share = secret_share;
+ profile->total_pkts_using_this_profile = 0;
+ profile->valid = 1;
+ return(0);
+ }
+
+ return(-1);
+}
+
+int pot_set_validator(pot_profile * profile, u64 key)
+{
+ if (profile && !profile->in_use)
+ {
+ profile->validator = 1;
+ profile->secret_key = key;
+ return(0);
+ }
+ return(-1);
+}
+
+always_inline u64 pot_update_cumulative_inline(u64 cumulative, u64 random,
+ u64 secret_share, u64 prime, u64 lpc, u64 pre_split, double prime_inv)
+{
+ u64 share_random = 0;
+ u64 cumulative_new = 0;
+
+ /*
+ * calculate split share for random
+ */
+ share_random = add64_mod(pre_split, random, prime, prime_inv);
+
+ /*
+ * lpc * (share_secret + share_random)
+ */
+ share_random = add64_mod(share_random, secret_share, prime, prime_inv);
+ share_random = mul64_mod(share_random, lpc, prime, prime_inv);
+
+ cumulative_new = add64_mod(cumulative, share_random, prime, prime_inv);
+
+ return (cumulative_new);
+}
+
+u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random)
+{
+ if (profile && profile->valid != 0)
+ {
+ return (pot_update_cumulative_inline(cumulative, random, profile->secret_share,
+ profile->prime, profile->lpc, profile->poly_pre_eval,
+ profile->primeinv));
+ }
+ return (0);
+}
+
+always_inline u8 pot_validate_inline(u64 secret, u64 prime, double prime_inv,
+ u64 cumulative, u64 random)
+{
+ if (cumulative == (random + secret))
+ {
+ return (1);
+ }
+ else if (cumulative == add64_mod(random, secret, prime, prime_inv))
+ {
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * return True if the cumulative matches secret from a profile
+ */
+u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random)
+{
+ if (profile && profile->validator)
+ {
+ return (pot_validate_inline(profile->secret_key, profile->prime,
+ profile->primeinv, cumulative, random));
+ }
+ return (0);
+}
+
+/*
+ * Utility function to get random number per pack
+ */
+u64 pot_generate_random(pot_profile * profile)
+{
+ u64 random = 0;
+ int32_t second_half;
+ static u32 seed = 0;
+
+ if (PREDICT_FALSE(!seed))
+ seed = random_default_seed();
+
+ /*
+ * Upper 4 bytes seconds
+ */
+ random = (u64) time(NULL);
+
+ random &= 0xffffffff;
+ random = random << 32;
+ /*
+ * Lower 4 bytes random number
+ */
+ second_half = random_u32(&seed);
+
+ random |= second_half;
+
+ if (PREDICT_TRUE(profile != NULL))
+ {
+ random &= profile->bit_mask;
+ }
+ return (random);
+}
+
+int pot_profile_set_bit_mask(pot_profile * profile, u16 bits)
+{
+ int sizeInBits;
+
+ if (profile && !profile->in_use)
+ {
+ sizeInBits = sizeof(profile->bit_mask) * 8;
+ profile->bit_mask =
+ (bits >=
+ sizeInBits ? (u64) - 1 : (u64) ((u64) 1 << (u64) bits) - 1);
+ return(0);
+ }
+ return(-1);
+}
+
+clib_error_t *clear_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+
+ pot_main_profiles_reset();
+
+ return 0;
+}
+
+void clear_pot_profiles()
+{
+ clear_pot_profile_command_fn(0, 0, 0);
+}
+
+VLIB_CLI_COMMAND(clear_pot_profile_command) =
+{
+.path = "clear pot profile",
+.short_help = "clear pot profile [<index>|all]",
+.function = clear_pot_profile_command_fn,
+};
+
+static clib_error_t *set_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u64 prime;
+ u64 secret_share;
+ u64 secret_key;
+ u8 validator = 0;
+ u32 profile_id = ~0;
+ u32 bits;
+ u64 lpc = 0, poly2 = 0;
+ pot_profile *profile = NULL;
+ u8 *profile_list_name = NULL;
+
+ bits = MAX_BITS;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s",
+ &profile_list_name));
+ else if (unformat(input, "id %d", &profile_id))
+ ;
+ else if (unformat(input, "validate-key 0x%Lx", &secret_key))
+ validator = 1;
+ else if (unformat(input, "prime-number 0x%Lx", &prime))
+ ;
+ else if (unformat(input, "secret_share 0x%Lx", &secret_share))
+ ;
+ else if (unformat(input, "polynomial2 0x%Lx", &poly2))
+ ;
+ else if (unformat(input, "lpc 0x%Lx", &lpc))
+ ;
+ else if (unformat(input, "bits-in-random %d", &bits))
+ {
+ if (bits > MAX_BITS)
+ bits = MAX_BITS;
+ }
+ else
+ break;
+ }
+ if (profile_list_name == 0)
+ {
+ return clib_error_return(0, "Name cannot be null");
+ }
+ pot_profile_list_init(profile_list_name);
+ profile = pot_profile_find(profile_id);
+
+ if (profile)
+ {
+ pot_profile_create(profile, prime, poly2, lpc, secret_share);
+ if (validator)
+ pot_set_validator(profile, secret_key);
+ pot_profile_set_bit_mask(profile, bits);
+ }
+ vec_free(profile_list_name);
+ return 0;
+}
+
+VLIB_CLI_COMMAND(set_pot_profile_command) =
+{
+.path = "set pot profile",
+.short_help = "set pot profile name <string> id [0-1] [validator-key 0xu64] \
+ prime-number 0xu64 secret_share 0xu64 lpc 0xu64 \
+ polynomial2 0xu64 bits-in-random [0-64] ",
+.function = set_pot_profile_command_fn,
+};
+
+static clib_error_t *set_pot_profile_activate_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pot_main_t *sm = &pot_main;
+ u8 *profile_list_name = NULL;
+ u32 id = 0;
+ clib_error_t *result = NULL;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s",
+ &profile_list_name));
+ else if (unformat(input, "id %d", &id))
+ ;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ if (profile_list_name == 0)
+ {
+ return clib_error_return(0, "Name cannot be null");
+ }
+
+ if (!pot_profile_list_is_enabled(profile_list_name)) {
+ result = clib_error_return(0, "%s list is not enabled, profile in use %s",
+ profile_list_name, sm->profile_list_name);
+ } else if (0 != pot_profile_set_active((u8)id)) {
+ result = clib_error_return(0, "Profile %d not defined in %s",
+ id, sm->profile_list_name);
+ }
+ vec_free(profile_list_name);
+ return result;
+}
+
+VLIB_CLI_COMMAND(set_pot_profile_activate_command) =
+{
+.path = "set pot profile-active",
+.short_help = "set pot profile-active name <string> id [0-1]",
+.function = set_pot_profile_activate_command_fn,
+};
+
+static clib_error_t *show_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *p = NULL;
+ u16 i;
+ u8 *s = 0;
+
+ if (vec_len(sm->profile_list_name) == 0)
+ {
+ s = format(s, "POT Profiles not configured\n");
+ vlib_cli_output(vm, "%v", s);
+ return 0;
+ }
+ s = format(s, "Profile list in use : %s\n",sm->profile_list_name);
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ p = pot_profile_find(i);
+ if (p->valid == 0)
+ continue;
+ s = format(s, "POT Profile at index: %d\n", i);
+ s = format(s, " Id : %d\n", p->id);
+ s = format(s, " Validator : %s (%d)\n",
+ (p->validator) ? "True" : "False", p->validator);
+ if (p->validator == 1)
+ s = format(s, " Secret key : 0x%Lx (%Ld)\n",
+ p->secret_key, p->secret_key);
+ s = format(s, " Secret share : 0x%Lx (%Ld)\n",
+ p->secret_share, p->secret_share);
+ s = format(s, " Prime number : 0x%Lx (%Ld)\n",
+ p->prime, p->prime);
+ s = format(s, "2nd polynomial(eval) : 0x%Lx (%Ld)\n",
+ p->poly_pre_eval, p->poly_pre_eval);
+ s = format(s, " LPC : 0x%Lx (%Ld)\n", p->lpc, p->lpc);
+
+ s = format(s, " Bit mask : 0x%Lx (%Ld)\n",
+ p->bit_mask, p->bit_mask);
+ }
+
+ p = pot_profile_find(sm->active_profile_id);
+
+ if (p && p->valid && p->in_use) {
+ s = format(s, "\nProfile index in use: %d\n", sm->active_profile_id);
+ s = format(s, "Pkts passed : 0x%Lx (%Ld)\n",
+ p->total_pkts_using_this_profile,
+ p->total_pkts_using_this_profile);
+ if (pot_is_decap(p))
+ s = format(s, " This is Decap node. \n");
+ } else {
+ s = format(s, "\nProfile index in use: None\n");
+ }
+ vlib_cli_output(vm, "%v", s);
+ vec_free(s);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_pot_profile_command) =
+{
+.path = "show pot profile",
+.short_help = "show pot profile",
+.function = show_pot_profile_command_fn,
+};
diff --git a/src/plugins/ioam/lib-pot/pot_util.h b/src/plugins/ioam/lib-pot/pot_util.h
new file mode 100644
index 00000000..9df31fae
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_util.h
@@ -0,0 +1,195 @@
+/*
+ * pot_util.h -- Proof Of Transit Utility Header
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef include_vnet_pot_util_h
+#define include_vnet_pot_util_h
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#define debug_ioam debug_ioam_fn
+/* Dont change this size 256. This is there across multiple components */
+#define PATH_NAME_SIZE 256
+
+/* Ring size. this should be same as the one in ODL. Do not change this
+ without change in ODL. */
+#define MAX_POT_PROFILES 2
+
+/**
+ * Usage:
+ *
+ * On any node that participates in Proof of Transit:
+ *
+ * Step 1: Initialize this library by calling pot_init()
+ * Step 2: Setup a proof of transit profile that contains all the parameters needed to compute cumulative:
+ * Call these functions:
+ * pot_profile_find
+ * pot_profile_create
+ * pot_profile_set_bit_mask - To setup how large we want the numbers used in the computation and random number <= 64 bits
+ * Step 2a: For validator do this:
+ * pot_set_validator
+ * Step 2b: On initial node enable the profile to be used:
+ * pot_profile_set_active / pot_profile_get_active will return the profile
+ * Step 3a: At the initial node to generate Random number that will be read by all other nodes:
+ * pot_generate_random
+ * Step 3b: At all nodes including initial and verifier call this to compute cumulative:
+ * pot_update_cumulative
+ * Step 4: At the verifier:
+ * pot_validate
+ *
+ */
+
+typedef struct pot_profile_
+{
+ u8 id : 1;
+ u8 valid : 1;
+ u8 in_use : 1;
+ u64 random;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u64 lpc;
+ u64 poly_pre_eval;
+ u64 bit_mask;
+ u64 limit;
+ double primeinv;
+ u64 total_pkts_using_this_profile;
+} pot_profile;
+
+typedef struct {
+ /* Name of the default profile list in use*/
+ u8 *profile_list_name;
+ pot_profile profile_list[MAX_POT_PROFILES];
+ /* number of profiles in the list */
+ u8 active_profile_id : 1;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} pot_main_t;
+
+extern pot_main_t pot_main;
+
+/*
+ * Initialize proof of transit
+ */
+int pot_util_init(void);
+void pot_profile_list_init(u8 * name);
+
+
+/*
+ * Find a pot profile by ID
+ */
+pot_profile *pot_profile_find(u8 id);
+
+static inline u16 pot_profile_get_id(pot_profile * profile)
+{
+ if (profile)
+ {
+ return (profile->id);
+ }
+ return (0);
+}
+
+/* setup and clean up profile */
+int pot_profile_create(pot_profile * profile, u64 prime,
+ u64 poly2, u64 lpc, u64 secret_share);
+/*
+ * Setup profile as a validator
+ */
+int pot_set_validator(pot_profile * profile, u64 key);
+
+/*
+ * Setup max bits to be used for random number generation
+ */
+#define MAX_BITS 64
+int pot_profile_set_bit_mask(pot_profile * profile, u16 bits);
+
+/*
+ * Given a random and cumulative compute the new cumulative for a given profile
+ */
+u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random);
+
+/*
+ * return True if the cumulative matches secret from a profile
+ */
+u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random);
+
+/*
+ * Utility function to get random number per pack
+ */
+u64 pot_generate_random(pot_profile * profile);
+
+
+extern void clear_pot_profiles();
+extern int pot_profile_list_is_enabled(u8 *name);
+
+static inline u8 pot_is_decap(pot_profile * p)
+{
+ return (p->validator == 1);
+}
+
+static inline int pot_profile_set_active (u8 id)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *profile = NULL;
+ pot_profile *current_active_prof = NULL;
+
+ current_active_prof = pot_profile_find(sm->active_profile_id);
+ profile = pot_profile_find(id);
+ if (profile && profile->valid) {
+ sm->active_profile_id = id;
+ current_active_prof->in_use = 0;
+ profile->in_use = 1;
+ return(0);
+ }
+ return(-1);
+}
+static inline u8 pot_profile_get_active_id (void)
+{
+ pot_main_t *sm = &pot_main;
+ return (sm->active_profile_id);
+}
+
+static inline pot_profile * pot_profile_get_active (void)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *profile = NULL;
+ profile = pot_profile_find(sm->active_profile_id);
+ if (profile && profile->in_use)
+ return(profile);
+ return (NULL);
+}
+
+static inline void pot_profile_reset_usage_stats (pot_profile *pow)
+{
+ if (pow) {
+ pow->total_pkts_using_this_profile = 0;
+ }
+}
+
+static inline void pot_profile_incr_usage_stats (pot_profile *pow)
+{
+ if (pow) {
+ pow->total_pkts_using_this_profile++;
+ }
+}
+
+
+#endif
diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api
new file mode 100644
index 00000000..2f45c6e2
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace.api
@@ -0,0 +1,70 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief iOAM6 Trace - Set the iOAM6 trace profile
+ @param trace_type - Type of trace requested
+ @param num_elts - Number of trace elements to be inserted
+ @param node_id - Trace Node ID
+ @param trace_tsp- Timestamp resolution
+ @param app_data - Application specific opaque
+*/
+autoreply define trace_profile_add {
+ u32 client_index;
+ u32 context;
+ u8 trace_type;
+ u8 num_elts;
+ u8 trace_tsp;
+ u32 node_id;
+ u32 app_data;
+};
+
+/** \brief Delete trace Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define trace_profile_del {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show trace Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define trace_profile_show_config {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show trace config response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param trace_type - Type of trace requested
+ @param num_elts - Number of trace elements to be inserted
+ @param node_id - Trace Node ID
+ @param trace_tsp- Timestamp resolution
+ @param app_data - Application specific opaque
+*/
+define trace_profile_show_config_reply {
+ u32 context;
+ i32 retval;
+ u8 trace_type;
+ u8 num_elts;
+ u8 trace_tsp;
+ u32 node_id;
+ u32 app_data;
+};
diff --git a/src/plugins/ioam/lib-trace/trace_all_api_h.h b/src/plugins/ioam/lib-trace/trace_all_api_h.h
new file mode 100644
index 00000000..223f9545
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-trace/trace.api.h>
diff --git a/src/plugins/ioam/lib-trace/trace_api.c b/src/plugins/ioam/lib-trace/trace_api.c
new file mode 100644
index 00000000..6889859b
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_api.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * trace_api.c - iOAM Trace related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-trace/trace_util.h>
+#include <ioam/lib-trace/trace_config.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-trace/trace_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define TRACE_REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* *INDENT-OFF* */
+#define TRACE_REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+/* *INDENT-ON* */
+
+/* List of message types that this plugin understands */
+
+#define foreach_trace_plugin_api_msg \
+_(TRACE_PROFILE_ADD, trace_profile_add) \
+_(TRACE_PROFILE_DEL, trace_profile_del) \
+_(TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config)
+
+static void vl_api_trace_profile_add_t_handler
+ (vl_api_trace_profile_add_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ int rv = 0;
+ vl_api_trace_profile_add_reply_t *rmp;
+ trace_profile *profile = NULL;
+
+ profile = trace_profile_find ();
+ if (profile)
+ {
+ rv =
+ trace_profile_create (profile, mp->trace_type, mp->num_elts,
+ mp->trace_tsp, ntohl (mp->node_id),
+ ntohl (mp->app_data));
+ if (rv != 0)
+ goto ERROROUT;
+ }
+ else
+ {
+ rv = -3;
+ }
+ERROROUT:
+ TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_ADD_REPLY);
+}
+
+
+static void vl_api_trace_profile_del_t_handler
+ (vl_api_trace_profile_del_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ int rv = 0;
+ vl_api_trace_profile_del_reply_t *rmp;
+
+ clear_trace_profiles ();
+
+ TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_DEL_REPLY);
+}
+
+static void vl_api_trace_profile_show_config_t_handler
+ (vl_api_trace_profile_show_config_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ vl_api_trace_profile_show_config_reply_t *rmp;
+ int rv = 0;
+ trace_profile *profile = trace_profile_find ();
+ if (profile->valid)
+ {
+ TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY,
+ rmp->trace_type = profile->trace_type;
+ rmp->num_elts = profile->num_elts;
+ rmp->trace_tsp = profile->trace_tsp;
+ rmp->node_id = htonl (profile->node_id);
+ rmp->app_data = htonl (profile->app_data);
+ );
+ }
+ else
+ {
+ TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY,
+ rmp->trace_type = 0;
+ rmp->num_elts = 0; rmp->trace_tsp = 0;
+ rmp->node_id = 0; rmp->app_data = 0;
+ );
+ }
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+trace_plugin_api_hookup (vlib_main_t * vm)
+{
+ trace_main_t *sm = &trace_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_trace_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (trace_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_trace;
+#undef _
+}
+
+static clib_error_t *
+trace_init (vlib_main_t * vm)
+{
+ trace_main_t *sm = &trace_main;
+ clib_error_t *error = 0;
+ u8 *name;
+
+ bzero (sm, sizeof (trace_main));
+ (void) trace_util_init ();
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main ();
+
+ name = format (0, "ioam_trace_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = trace_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (trace_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_config.h b/src/plugins/ioam/lib-trace/trace_config.h
new file mode 100644
index 00000000..d9fa9ff2
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_config.h
@@ -0,0 +1,41 @@
+/*
+ * trace_config.h -- iOAM trace configuration utility routines
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef include_vnet_trace_config_h
+#define include_vnet_trace_config_h
+
+extern trace_main_t trace_main;
+
+always_inline trace_profile *
+trace_profile_find (void)
+{
+ trace_main_t *sm = &trace_main;
+
+ return (&(sm->profile));
+}
+
+
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_msg_enum.h b/src/plugins/ioam/lib-trace/trace_msg_enum.h
new file mode 100644
index 00000000..78c35665
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_trace_msg_enum_h
+#define included_trace_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-trace/trace_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_trace_msg_enum_h */
diff --git a/src/plugins/ioam/lib-trace/trace_test.c b/src/plugins/ioam/lib-trace/trace_test.c
new file mode 100644
index 00000000..1e287dee
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_test.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * trace_test.c - test harness for trace plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base trace_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <ioam/lib-trace/trace_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} trace_test_main_t;
+
+trace_test_main_t trace_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(trace_profile_add_reply) \
+_(trace_profile_del_reply)
+
+#define foreach_custom_reply_handler \
+_(trace_profile_show_config_reply, \
+ if(mp->trace_type) \
+ { \
+ errmsg(" Trace Type : 0x%x (%d)\n",mp->trace_type, mp->trace_type); \
+ errmsg(" Trace timestamp precision : %d \n",mp->trace_tsp); \
+ errmsg(" Node Id : 0x%x (%d)\n",htonl(mp->node_id), htonl(mp->node_id)); \
+ errmsg(" App Data : 0x%x (%d)\n",htonl(mp->app_data), htonl(mp->app_data)); \
+ } \
+ else errmsg("No valid trace profile configuration found\n");)
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = trace_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n,body) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = trace_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ if(retval>=0)do{body;} while(0); \
+ else errmsg("Error, retval: %d",retval); \
+ }
+foreach_custom_reply_handler;
+#undef _
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(TRACE_PROFILE_ADD_REPLY, trace_profile_add_reply) \
+_(TRACE_PROFILE_DEL_REPLY, trace_profile_del_reply) \
+_(TRACE_PROFILE_SHOW_CONFIG_REPLY, trace_profile_show_config_reply)
+
+static int
+api_trace_profile_add (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_trace_profile_add_t *mp;
+ u8 trace_type = 0;
+ u8 num_elts = 0;
+ u32 node_id = 0;
+ u32 app_data = 0;
+ u8 trace_tsp = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace-type 0x%x", &trace_type))
+ ;
+ else if (unformat (input, "trace-elts %d", &num_elts))
+ ;
+ else if (unformat (input, "trace-tsp %d", &trace_tsp))
+ ;
+ else if (unformat (input, "node-id 0x%x", &node_id))
+ ;
+ else if (unformat (input, "app-data 0x%x", &app_data))
+ ;
+
+ else
+ break;
+ }
+
+
+ M (TRACE_PROFILE_ADD, mp);
+
+ mp->trace_type = trace_type;
+ mp->trace_tsp = trace_tsp;
+ mp->node_id = htonl (node_id);
+ mp->app_data = htonl (app_data);
+ mp->num_elts = num_elts;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+
+static int
+api_trace_profile_del (vat_main_t * vam)
+{
+ vl_api_trace_profile_del_t *mp;
+ int ret;
+
+ M (TRACE_PROFILE_DEL, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_trace_profile_show_config (vat_main_t * vam)
+{
+ vl_api_trace_profile_show_config_t *mp;
+ int ret;
+
+ M (TRACE_PROFILE_SHOW_CONFIG, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(trace_profile_add, ""\
+ "trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex>") \
+_(trace_profile_del, "[id <nn>]") \
+_(trace_profile_show_config, "[id <nn>]")
+
+
+static void
+ioam_trace_vat_api_hookup (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_trace_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ ioam_trace_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_util.c b/src/plugins/ioam/lib-trace/trace_util.c
new file mode 100644
index 00000000..b316a236
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_util.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <vppinfra/mem.h>
+#include "trace_util.h"
+#include "trace_config.h"
+
+trace_main_t trace_main;
+
+static int
+trace_profile_cleanup (trace_profile * profile)
+{
+
+ memset (profile, 0, sizeof (trace_profile));
+ profile->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
+ ip6_trace_profile_cleanup (); /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */
+ return 0;
+
+}
+
+static int
+trace_main_profiles_reset (void)
+{
+ int rv;
+
+ trace_main_t *sm = &trace_main;
+ rv = trace_profile_cleanup (&(sm->profile));
+ return (rv);
+}
+
+int
+trace_util_init (void)
+{
+ int rv;
+
+ rv = trace_main_profiles_reset ();
+ return (rv);
+}
+
+
+int
+trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
+ u32 trace_tsp, u32 node_id, u32 app_data)
+{
+
+ if (!trace_type || !num_elts || !(node_id))
+ {
+ return (-1);
+ }
+ if (profile && !profile->valid)
+ {
+ //rv = trace_profile_cleanup (profile);
+ profile->trace_type = trace_type;
+ profile->num_elts = num_elts;
+ profile->trace_tsp = trace_tsp;
+ profile->node_id = node_id;
+ profile->app_data = app_data;
+ profile->valid = 1;
+
+ /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */
+ ip6_trace_profile_setup ();
+ return (0);
+ }
+
+ return (-1);
+}
+
+
+
+clib_error_t *
+clear_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+
+ trace_main_profiles_reset ();
+ return 0;
+}
+
+void
+clear_trace_profiles (void)
+{
+ clear_trace_profile_command_fn (0, 0, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND(clear_trace_profile_command) =
+{
+.path = "clear ioam-trace profile",
+.short_help = "clear ioam-trace profile [<index>|all]",
+.function = clear_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 trace_type = 0;
+ u8 num_elts = 0;
+ u32 node_id = 0;
+ u32 app_data = 0;
+ u32 trace_tsp = 0;
+ trace_profile *profile = NULL;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace-type 0x%x", &trace_type));
+ else if (unformat (input, "trace-elts %d", &num_elts));
+ else if (unformat (input, "trace-tsp %d", &trace_tsp));
+ else if (unformat (input, "node-id 0x%x", &node_id));
+ else if (unformat (input, "app-data 0x%x", &app_data));
+ else
+ break;
+ }
+ profile = trace_profile_find ();
+ if (profile)
+ {
+ trace_profile_create (profile, trace_type, num_elts, trace_tsp,
+ node_id, app_data);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_trace_profile_command, static) =
+{
+.path = "set ioam-trace profile",
+.short_help = "set ioam-trace \
+ trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> \
+ node-id <node id in hex> app-data <app_data in hex>",
+.function = set_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ trace_profile *p = NULL;
+ u8 *s = 0;
+ p = trace_profile_find ();
+ if (!(p && p->valid))
+ {
+ s = format (s, "\nTrace configuration not valid\n");
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+ }
+ s = format (s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n");
+ s = format (s, " Trace Type : 0x%x (%d)\n",
+ p->trace_type, p->trace_type);
+ s =
+ format (s, " Trace timestamp precision : %d (%s)\n",
+ p->trace_tsp,
+ (p->trace_tsp ==
+ TSP_SECONDS) ? "Seconds" : ((p->trace_tsp ==
+ TSP_MILLISECONDS) ?
+ "Milliseconds"
+ : (((p->trace_tsp ==
+ TSP_MICROSECONDS) ?
+ "Microseconds" :
+ "Nanoseconds"))));
+ s = format (s, " Num of trace nodes : %d\n", p->num_elts);
+ s =
+ format (s, " Node-id : 0x%x (%d)\n",
+ p->node_id, p->node_id);
+ s =
+ format (s, " App Data : 0x%x (%d)\n",
+ p->app_data, p->app_data);
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_trace_profile_command, static) =
+{
+.path = "show ioam-trace profile",
+.short_help = "show ioam-trace profile",
+.function = show_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h
new file mode 100644
index 00000000..61f18d91
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_util.h
@@ -0,0 +1,256 @@
+/*
+ * trace_util.h -- Trace Profile Utility header
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef include_vnet_trace_util_h
+#define include_vnet_trace_util_h
+
+#define debug_ioam debug_ioam_fn
+
+
+/**
+ * Usage:
+ *
+ * On any node that participates in iOAM Trace.
+ *
+ * Step 1: Initialize this library by calling trace_init()
+ * Step 2: Setup a trace profile that contains all the parameters needed to compute cumulative:
+ * Call these functions:
+ * trace_profile_find
+ * trace_profile_create
+ * Step 2a: On initial node enable the profile to be used:
+ * trace_profile_set_active / trace_profile_get_active will return the profile
+ * Step 4: TBD
+ * trace_validate
+ *
+ */
+
+typedef struct trace_profile_
+{
+ u8 valid:1;
+ u8 trace_type;
+ u8 num_elts;
+ /* Configured node-id */
+ u32 node_id;
+ u32 app_data;
+ u32 trace_tsp;
+} trace_profile;
+
+typedef struct
+{
+ /* Name of the default profile list in use */
+ trace_profile profile;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} trace_main_t;
+
+
+/*
+ * Initialize Trace profile
+ */
+int trace_util_init (void);
+
+
+/* setup and clean up profile */
+int trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
+ u32 trace_tsp, u32 node_id, u32 app_data);
+
+void clear_trace_profiles (void);
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+{
+ u8 ioam_trace_type;
+ u8 data_list_elts_left;
+ u32 elts[0]; /* Variable type. So keep it generic */
+}) ioam_trace_hdr_t;
+/* *INDENT-ON* */
+
+
+
+#define BIT_TTL_NODEID (1<<0)
+#define BIT_ING_INTERFACE (1<<1)
+#define BIT_EGR_INTERFACE (1<<2)
+#define BIT_TIMESTAMP (1<<3)
+#define BIT_APPDATA (1<<4)
+#define BIT_LOOPBACK (1<<5)
+#define BIT_LOOPBACK_REPLY (1<<6)
+#define TRACE_TYPE_MASK 0x7F /* Mask of all above bits */
+
+#define TRACE_TYPE_IF_TS_APP_LOOP 0x3F
+
+/*
+ 0x00011111 iOAM-trace-type is 0x00011111 then the format of node
+ data is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ingress_if_id | egress_if_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+#define TRACE_TYPE_IF_TS_APP 0x1f
+typedef struct
+{
+ u32 ttl_node_id;
+ u16 ingress_if;
+ u16 egress_if;
+ u32 timestamp;
+ u32 app_data;
+} ioam_trace_if_ts_app_t;
+
+/*
+ 0x00000111 iOAM-trace-type is 0x00000111 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ingress_if_id | egress_if_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+#define TRACE_TYPE_IF 0x03
+typedef struct
+{
+ u32 ttl_node_id;
+ u16 ingress_if;
+ u16 egress_if;
+} ioam_trace_if_t;
+
+/*
+ 0x00001001 iOAM-trace-type is 0x00001001 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+#define TRACE_TYPE_TS 0x09
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 timestamp;
+} ioam_trace_ts_t;
+
+/*
+ 0x00010001 iOAM-trace-type is 0x00010001 then the format is:
+
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+
+#define TRACE_TYPE_APP 0x11
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 app_data;
+} ioam_trace_app_t;
+
+/*
+
+ 0x00011001 iOAM-trace-type is 0x00011001 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define TRACE_TYPE_TS_APP 0x19
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 timestamp;
+ u32 app_data;
+} ioam_trace_ts_app_t;
+
+static inline u8
+fetch_trace_data_size (u16 trace_type)
+{
+ u8 trace_data_size = 0;
+
+ if ((trace_type & TRACE_TYPE_IF_TS_APP) == TRACE_TYPE_IF_TS_APP)
+ trace_data_size = sizeof (ioam_trace_if_ts_app_t);
+ else if ((trace_type & TRACE_TYPE_IF) == TRACE_TYPE_IF)
+ trace_data_size = sizeof (ioam_trace_if_t);
+ else if ((trace_type & TRACE_TYPE_TS) == TRACE_TYPE_TS)
+ trace_data_size = sizeof (ioam_trace_ts_t);
+ else if ((trace_type & TRACE_TYPE_APP) == TRACE_TYPE_APP)
+ trace_data_size = sizeof (ioam_trace_app_t);
+ else if ((trace_type & TRACE_TYPE_TS_APP) == TRACE_TYPE_TS_APP)
+ trace_data_size = sizeof (ioam_trace_ts_app_t);
+
+ return trace_data_size;
+}
+
+always_inline void
+ioam_trace_set_bit (ioam_trace_hdr_t * trace_hdr, u8 trace_bit)
+{
+ trace_hdr->ioam_trace_type |= trace_bit;
+}
+
+always_inline void
+ioam_trace_reset_bit (ioam_trace_hdr_t * trace_hdr, u8 trace_bit)
+{
+ trace_hdr->ioam_trace_type &= (~trace_bit);
+}
+
+int ioam_trace_get_sizeof_handler (u32 * result);
+int ip6_trace_profile_setup (void);
+int ip6_trace_profile_cleanup (void);
+
+#define TSP_SECONDS 0
+#define TSP_MILLISECONDS 1
+#define TSP_MICROSECONDS 2
+#define TSP_NANOSECONDS 3
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
new file mode 100644
index 00000000..87e57d36
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_decap_ioam_v4_error \
+_(DECAPSULATED, "good packets decapsulated")
+
+static char *vxlan_gpe_decap_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_decap_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_DECAP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_decap_ioam_v4_error
+#undef _
+ VXLAN_GPE_DECAP_IOAM_V4_N_ERROR,
+} vxlan_gpe_decap_ioam_v4_error_t;
+
+
+always_inline void
+vxlan_gpe_decap_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ *next0 = *next1 = hm->decap_v4_next_override;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, next0,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, next1,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+}
+
+
+
+static uword
+vxlan_gpe_decap_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_ipv6)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ next0 = next1 = hm->decap_v4_next_override;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+ vlib_buffer_advance (b1,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+
+ vxlan_gpe_decap_ioam_v4_two_inline (vm, node, ngm, b0, b1,
+ &next0, &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = hm->decap_v4_next_override;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+
+ next0 = hm->decap_v4_next_override;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0,
+ &next0,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+static uword
+vxlan_gpe_decap_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_decap_ioam (vm, node, from_frame, 0);
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
+ .function = vxlan_gpe_decap_ioam_v4,
+ .name = "vxlan-gpe-decap-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_decap_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_decap_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_DECAP_IOAM_V4_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP] = "vxlan-gpe-pop-ioam-v4",
+ [VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
new file mode 100644
index 00000000..1d156544
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_encap_ioam_v4_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char *vxlan_gpe_encap_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_encap_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_ENCAP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_encap_ioam_v4_error
+#undef _
+ VXLAN_GPE_ENCAP_IOAM_V4_N_ERROR,
+} vxlan_gpe_encap_ioam_v4_error_t;
+
+typedef enum
+{
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT
+} vxlan_gpe_encap_ioam_v4_next_t;
+
+
+always_inline void
+vxlan_gpe_encap_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+ *next0 = *next1 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, next0,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, next1,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+}
+
+
+static uword
+vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ next0 = next1 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vxlan_gpe_encap_ioam_v4_two_inline (vm, node, ngm, b0, b1,
+ &next0, &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0,
+ &next0,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
+ .function = vxlan_gpe_encap_ioam_v4,
+ .name = "vxlan-gpe-encap-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_encap_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_encap_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
new file mode 100644
index 00000000..7a4580d8
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_pop_ioam_v4_error \
+_(POPPED, "good packets popped")
+
+static char *vxlan_gpe_pop_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_pop_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_POP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_pop_ioam_v4_error
+#undef _
+ VXLAN_GPE_POP_IOAM_V4_N_ERROR,
+} vxlan_gpe_pop_ioam_v4_error_t;
+
+typedef struct
+{
+ ioam_trace_t fmt_trace;
+} vxlan_gpe_pop_ioam_v4_trace_t;
+
+
+u8 *
+format_vxlan_gpe_pop_ioam_v4_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_pop_ioam_v4_trace_t *t1
+ = va_arg (*args, vxlan_gpe_pop_ioam_v4_trace_t *);
+ ioam_trace_t *t = &(t1->fmt_trace);
+ vxlan_gpe_ioam_option_t *fmt_trace0;
+ vxlan_gpe_ioam_option_t *opt0, *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ u8 type0;
+
+ fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data;
+
+ s = format (s, "VXLAN_GPE_IOAM_POP: next_index %d len %d traced %d",
+ t->next_index, fmt_trace0->length, t->trace_len);
+
+ opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_option_t));
+ break;
+ }
+ }
+
+ return s;
+}
+
+always_inline void
+vxlan_gpe_ioam_pop_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_buffer_t * b0)
+{
+ ip4_header_t *ip0;
+ udp_header_t *udp_hdr0;
+ vxlan_gpe_header_t *gpe_hdr0;
+ vxlan_gpe_ioam_hdr_t *gpe_ioam0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+
+ /* Pop the iOAM data */
+ vlib_buffer_advance (b0,
+ (word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t) +
+ gpe_ioam0->length));
+
+ return;
+}
+
+
+
+always_inline void
+vxlan_gpe_pop_ioam_v4_one_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, u32 * next0)
+{
+ CLIB_UNUSED (ip4_header_t * ip0);
+ CLIB_UNUSED (udp_header_t * udp_hdr0);
+ CLIB_UNUSED (vxlan_gpe_header_t * gpe_hdr0);
+ CLIB_UNUSED (vxlan_gpe_ioam_hdr_t * gpe_ioam0);
+ CLIB_UNUSED (vxlan_gpe_ioam_option_t * opt0);
+ CLIB_UNUSED (vxlan_gpe_ioam_option_t * limit0);
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+
+ /* Pop the iOAM header */
+ ip0 = vlib_buffer_get_current (b0);
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+ opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length);
+
+ /*
+ * Basic validity checks
+ */
+ if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length))
+ {
+ *next0 = VXLAN_GPE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ u8 type0;
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->pop_options[type0])
+ {
+ if ((*hm->pop_options[type0]) (ip0, opt0) < 0)
+ {
+ *next0 = VXLAN_GPE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ }
+ break;
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_hdr_t));
+ }
+
+
+ *next0 =
+ (gpe_ioam0->protocol < VXLAN_GPE_PROTOCOL_MAX) ?
+ ngm->
+ decap_next_node_list[gpe_ioam0->protocol] : VXLAN_GPE_INPUT_NEXT_DROP;
+
+trace00:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_pop_ioam_v4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = gpe_ioam0->length;
+ t->fmt_trace.next_index = *next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->fmt_trace.
+ option_data) ? trace_len : ARRAY_LEN (t->fmt_trace.
+ option_data);
+ t->fmt_trace.trace_len = trace_len;
+ clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len);
+ }
+
+ /* Remove the iOAM header inside the VxLAN-GPE header */
+ vxlan_gpe_ioam_pop_v4 (vm, node, b0);
+ return;
+}
+
+always_inline void
+vxlan_gpe_pop_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, next0);
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b1, next1);
+}
+
+
+
+static uword
+vxlan_gpe_pop_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_ipv6)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vxlan_gpe_pop_ioam_v4_two_inline (vm, node, ngm, b0, b1, &next0,
+ &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, &next0);
+
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+static uword
+vxlan_gpe_pop_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_pop_ioam (vm, node, from_frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
+ .function = vxlan_gpe_pop_ioam_v4,
+ .name = "vxlan-gpe-pop-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_pop_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_pop_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_pop_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
new file mode 100644
index 00000000..60eabc22
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -0,0 +1,187 @@
+ /*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_transit_ioam_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char *vxlan_gpe_transit_ioam_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_transit_ioam_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_TRANSIT_IOAM_ERROR_##sym,
+ foreach_vxlan_gpe_transit_ioam_error
+#undef _
+ VXLAN_GPE_TRANSIT_IOAM_N_ERROR,
+} vxlan_gpe_transit_ioam_error_t;
+
+typedef enum
+{
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT,
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP,
+ VXLAN_GPE_TRANSIT_IOAM_N_NEXT
+} vxlan_gpe_transit_ioam_next_t;
+
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (vxlan_gpe_transit_ioam, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "vxlan-gpe-transit-ioam",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+/* *INDENT-ON* */
+
+static uword
+vxlan_gpe_transit_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ ip4_header_t *ip0;
+ u32 iph_offset = 0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ iph_offset = vnet_buffer (b0)->ip.save_rewrite_length;
+ ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0)
+ + iph_offset);
+
+ /* just forward non ipv4 packets */
+ if (PREDICT_FALSE
+ ((ip0->ip_version_and_header_length & 0xF0) == 0x40))
+ {
+ /* ipv4 packets */
+ udp_header_t *udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ if (PREDICT_FALSE
+ ((ip0->protocol == IP_PROTOCOL_UDP) &&
+ (clib_net_to_host_u16 (udp_hdr0->dst_port) ==
+ UDP_DST_PORT_VXLAN_GPE)))
+ {
+
+ /* Check the iOAM header */
+ vxlan_gpe_header_t *gpe_hdr0 =
+ (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+
+ if (PREDICT_FALSE
+ (gpe_hdr0->protocol == VXLAN_GPE_PROTOCOL_IOAM))
+ {
+ uword *t = NULL;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ fib_prefix_t key4;
+ memset (&key4, 0, sizeof (key4));
+ key4.fp_proto = FIB_PROTOCOL_IP4;
+ key4.fp_addr.ip4.as_u32 = ip0->dst_address.as_u32;
+ t = hash_get_mem (hm->dst_by_ip4, &key4);
+ if (t)
+ {
+
+
+ vlib_buffer_advance (b0,
+ (word) (sizeof
+ (ethernet_header_t)));
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node,
+ b0,
+ &next0,
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP,
+ 1
+ /* use_adj */
+ );
+ vlib_buffer_advance (b0,
+ -(word) (sizeof
+ (ethernet_header_t)));
+ }
+ }
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
+ .function = vxlan_gpe_transit_ioam,
+ .name = "vxlan-gpe-transit-ioam",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_transit_ioam_error_strings),
+ .error_strings = vxlan_gpe_transit_ioam_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_TRANSIT_IOAM_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT] = "interface-output",
+ [VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP] = "error-drop",
+ },
+
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
new file mode 100644
index 00000000..a6761f07
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
@@ -0,0 +1,111 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VxLAN-GPE
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - profile id
+ @param trace_ppc - Trace PPC (none/encap/decap)
+ @param pow_enable - Proof of Work enabled or not flag
+ @param trace_enable - iOAM Trace enabled or not flag
+
+*/
+autoreply define vxlan_gpe_ioam_enable {
+ u32 client_index;
+ u32 context;
+ u16 id;
+ u8 trace_ppc;
+ u8 pow_enable;
+ u8 trace_enable;
+};
+
+/** \brief iOAM for VxLAN-GPE disable
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - profile id
+*/
+autoreply define vxlan_gpe_ioam_disable
+{
+ u32 client_index;
+ u32 context;
+ u16 id;
+};
+
+/** \brief Enable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - VXLAN-GPE VNI
+ @param local - IPv4/6 Address of the local VTEP
+ @param remote - IPv4/6 Address of the remote VTEP
+
+*/
+autoreply define vxlan_gpe_ioam_vni_enable {
+ u32 client_index;
+ u32 context;
+ u32 vni;
+ u8 local[16];
+ u8 remote[16];
+ u8 is_ipv6;
+};
+
+/** \brief Disable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - VXLAN-GPE VNI
+ @param local - IPv4/6 Address of the local VTEP
+ @param remote - IPv4/6 Address of the remote VTEP
+
+*/
+autoreply define vxlan_gpe_ioam_vni_disable {
+ u32 client_index;
+ u32 context;
+ u32 vni;
+ u8 local[16];
+ u8 remote[16];
+ u8 is_ipv6;
+};
+
+/** \brief Enable iOAM for a VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param dst_addr - IPv4/6 Address of the local VTEP
+ @param outer_fib_index- FIB index
+
+*/
+autoreply define vxlan_gpe_ioam_transit_enable {
+ u32 client_index;
+ u32 context;
+ u32 outer_fib_index;
+ u8 dst_addr[16];
+ u8 is_ipv6;
+};
+
+/** \brief Disable iOAM for VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param dst_addr - IPv4/6 Address of the local VTEP
+ @param outer_fib_index- FIB index
+
+*/
+autoreply define vxlan_gpe_ioam_transit_disable {
+ u32 client_index;
+ u32 context;
+ u32 outer_fib_index;
+ u8 dst_addr[16];
+ u8 is_ipv6;
+};
+
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h
new file mode 100644
index 00000000..06fa0d2c
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h>
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
new file mode 100644
index 00000000..634133a4
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_api.c - iOAM VxLAN-GPE related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define VXLAN_GPE_REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* *INDENT-OFF* */
+#define VXLAN_GPE_REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+/* *INDENT-ON* */
+
+/* List of message types that this plugin understands */
+
+#define foreach_vxlan_gpe_plugin_api_msg \
+_(VXLAN_GPE_IOAM_ENABLE, vxlan_gpe_ioam_enable) \
+_(VXLAN_GPE_IOAM_DISABLE, vxlan_gpe_ioam_disable) \
+_(VXLAN_GPE_IOAM_VNI_ENABLE, vxlan_gpe_ioam_vni_enable) \
+_(VXLAN_GPE_IOAM_VNI_DISABLE, vxlan_gpe_ioam_vni_disable) \
+_(VXLAN_GPE_IOAM_TRANSIT_ENABLE, vxlan_gpe_ioam_transit_enable) \
+_(VXLAN_GPE_IOAM_TRANSIT_DISABLE, vxlan_gpe_ioam_transit_disable) \
+
+
+static void vl_api_vxlan_gpe_ioam_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+
+ /* Ignoring the profile id as currently a single profile
+ * is supported */
+ error =
+ vxlan_gpe_ioam_enable (mp->trace_enable, mp->pow_enable, mp->trace_ppc);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_ENABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_disable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+
+ /* Ignoring the profile id as currently a single profile
+ * is supported */
+ error = vxlan_gpe_ioam_disable (0, 0, 0);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_DISABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_vni_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan4_gpe_tunnel_key_t key4;
+ uword *p = NULL;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 vni;
+
+
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&key4.local, &mp->local, sizeof (key4.local));
+ clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
+ vni = clib_net_to_host_u32 (mp->vni);
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ return;
+ }
+
+ if (!p)
+ return;
+
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+
+ error = vxlan_gpe_ioam_set (t, hm->has_trace_option,
+ hm->has_pot_option,
+ hm->has_ppc_option, mp->is_ipv6);
+
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_ENABLE_REPLY);
+}
+
+
+static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_vni_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan4_gpe_tunnel_key_t key4;
+ uword *p = NULL;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ u32 vni;
+
+
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&key4.local, &mp->local, sizeof (key4.local));
+ clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
+ vni = clib_net_to_host_u32 (mp->vni);
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ return;
+ }
+
+ if (!p)
+ return;
+
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+
+ error = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0);
+
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_DISABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_transit_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_transit_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_transit_enable_reply_t *rmp;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+
+ memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4));
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4));
+ }
+ rv = vxlan_gpe_enable_disable_ioam_for_dest (sm->vlib_main,
+ dst_addr,
+ ntohl (mp->outer_fib_index),
+ mp->is_ipv6 ? 0 : 1,
+ 1 /* is_add */ );
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_transit_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_transit_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_transit_disable_reply_t *rmp;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+
+ memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4));
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4));
+ }
+
+ rv = vxlan_gpe_ioam_disable_for_dest (sm->vlib_main,
+ dst_addr,
+ ntohl (mp->outer_fib_index),
+ mp->is_ipv6 ? 0 : 1);
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+vxlan_gpe_plugin_api_hookup (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vxlan_gpe_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (vxlan_gpe_ioam_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_ioam_vxlan_gpe;
+#undef _
+}
+
+static clib_error_t *
+vxlan_gpe_init (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 encap_node_index = vxlan_gpe_encap_ioam_v4_node.index;
+ u32 decap_node_index = vxlan_gpe_decap_ioam_v4_node.index;
+ vlib_node_t *vxlan_gpe_encap_node = NULL;
+ vlib_node_t *vxlan_gpe_decap_node = NULL;
+ uword next_node = 0;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main ();
+ sm->unix_time_0 = (u32) time (0); /* Store starting time */
+ sm->vlib_time_0 = vlib_time_now (vm);
+
+ name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = vxlan_gpe_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ /* Hook the ioam-encap node to vxlan-gpe-encap */
+ vxlan_gpe_encap_node = vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-encap");
+ sm->encap_v4_next_node =
+ vlib_node_add_next (vm, vxlan_gpe_encap_node->index, encap_node_index);
+
+ vxlan_gpe_decap_node =
+ vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input");
+ next_node =
+ vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node);
+
+ vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces));
+ sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword));
+
+ sm->dst_by_ip6 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword));
+
+ vxlan_gpe_ioam_interface_init ();
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
new file mode 100644
index 00000000..8558c505
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
@@ -0,0 +1,770 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/format.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+
+vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
+
+int
+vxlan_gpe_ioam_set_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option,
+ int has_pot_option, int has_ppc_option,
+ u8 ipv6_set)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 size;
+ vxlan_gpe_ioam_hdr_t *vxlan_gpe_ioam_hdr;
+ u8 *current;
+ u8 trace_data_size = 0;
+ u8 pot_data_size = 0;
+
+ if (has_trace_option == 0 && has_pot_option == 0)
+ return -1;
+
+ /* Work out how much space we need */
+ size = sizeof (vxlan_gpe_ioam_hdr_t);
+
+ if (has_trace_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0)
+ {
+ size += sizeof (vxlan_gpe_ioam_option_t);
+ size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE];
+ }
+ if (has_pot_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ size += sizeof (vxlan_gpe_ioam_option_t);
+ size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ }
+
+ t->rewrite_size = size;
+
+ if (!ipv6_set)
+ {
+ vxlan4_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM,
+ hm->encap_v4_next_node);
+ vxlan_gpe_ioam_hdr =
+ (vxlan_gpe_ioam_hdr_t *) (t->rewrite +
+ sizeof (ip4_vxlan_gpe_header_t));
+ }
+ else
+ {
+ vxlan6_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM,
+ VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ vxlan_gpe_ioam_hdr =
+ (vxlan_gpe_ioam_hdr_t *) (t->rewrite +
+ sizeof (ip6_vxlan_gpe_header_t));
+ }
+
+
+ vxlan_gpe_ioam_hdr->type = VXLAN_GPE_PROTOCOL_IOAM;
+ /* Length of the header in octets */
+ vxlan_gpe_ioam_hdr->length = size;
+ vxlan_gpe_ioam_hdr->protocol = t->protocol;
+ current = (u8 *) vxlan_gpe_ioam_hdr + sizeof (vxlan_gpe_ioam_hdr_t);
+
+ if (has_trace_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0)
+ {
+ if (0 != hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] (current,
+ &trace_data_size))
+ return -1;
+ current += trace_data_size;
+ }
+ if (has_pot_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ pot_data_size =
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ if (0 ==
+ hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]
+ (current, &pot_data_size))
+ current += pot_data_size;
+ }
+
+ return 0;
+}
+
+int
+vxlan_gpe_ioam_clear_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option,
+ int has_pot_option, int has_ppc_option,
+ u8 ipv6_set)
+{
+
+ t->rewrite_size = 0;
+
+ if (!ipv6_set)
+ {
+ vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP);
+ }
+ else
+ {
+ vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ }
+
+
+ return 0;
+}
+
+clib_error_t *
+vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set)
+{
+ int rv;
+ rv = vxlan_gpe_ioam_clear_rewrite (t, 0, 0, 0, 0);
+
+ if (rv == 0)
+ {
+ return (0);
+ }
+ else
+ {
+ return clib_error_return_code (0, rv, 0,
+ "vxlan_gpe_ioam_clear_rewrite returned %d",
+ rv);
+ }
+
+}
+
+
+clib_error_t *
+vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set)
+{
+ int rv;
+ rv = vxlan_gpe_ioam_set_rewrite (t, has_trace_option,
+ has_pot_option, has_ppc_option, ipv6_set);
+
+ if (rv == 0)
+ {
+ return (0);
+ }
+ else
+ {
+ return clib_error_return_code (0, rv, 0,
+ "vxlan_gpe_ioam_set_rewrite returned %d",
+ rv);
+ }
+
+}
+
+static void
+vxlan_gpe_set_clear_output_feature_on_intf (vlib_main_t * vm,
+ u32 sw_if_index0, u8 is_add)
+{
+
+
+
+ vnet_feature_enable_disable ("ip4-output", "vxlan-gpe-transit-ioam",
+ sw_if_index0, is_add,
+ 0 /* void *feature_config */ ,
+ 0 /* u32 n_feature_config_bytes */ );
+ return;
+}
+
+void
+vxlan_gpe_clear_output_feature_on_all_intfs (vlib_main_t * vm)
+{
+ vnet_sw_interface_t *si = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ pool_foreach (si, im->sw_interfaces, (
+ {
+ vxlan_gpe_set_clear_output_feature_on_intf
+ (vm, si->sw_if_index, 0);
+ }));
+ return;
+}
+
+
+extern fib_forward_chain_type_t
+fib_entry_get_default_chain_type (const fib_entry_t * fib_entry);
+
+int
+vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm,
+ ip46_address_t dst_addr,
+ u32 outer_fib_index,
+ u8 is_ipv4, u8 is_add)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 fib_index0 = 0;
+ u32 sw_if_index0 = ~0;
+
+ fib_node_index_t fei = ~0;
+ fib_entry_t *fib_entry;
+ u32 adj_index0;
+ ip_adjacency_t *adj0;
+ fib_prefix_t fib_prefix;
+ //fib_forward_chain_type_t fct;
+ load_balance_t *lb_m, *lb_b;
+ const dpo_id_t *dpo0, *dpo1;
+ u32 i, j;
+ //vnet_hw_interface_t *hw;
+
+ if (is_ipv4)
+ {
+ memset (&fib_prefix, 0, sizeof (fib_prefix_t));
+ fib_prefix.fp_len = 32;
+ fib_prefix.fp_proto = FIB_PROTOCOL_IP4;
+ fib_prefix.fp_addr = dst_addr;
+ }
+ else
+ {
+ return 0;
+ }
+
+ fei = fib_table_lookup (fib_index0, &fib_prefix);
+ fib_entry = fib_entry_get (fei);
+
+ //fct = fib_entry_get_default_chain_type (fib_entry);
+
+ if (!dpo_id_is_valid (&fib_entry->fe_lb /*[fct] */ ))
+ {
+ return (-1);
+ }
+
+ lb_m = load_balance_get (fib_entry->fe_lb /*[fct] */ .dpoi_index);
+
+ for (i = 0; i < lb_m->lb_n_buckets; i++)
+ {
+ dpo0 = load_balance_get_bucket_i (lb_m, i);
+
+ if (dpo0->dpoi_type == DPO_LOAD_BALANCE)
+ {
+ lb_b = load_balance_get (dpo0->dpoi_index);
+
+ for (j = 0; j < lb_b->lb_n_buckets; j++)
+ {
+ dpo1 = load_balance_get_bucket_i (lb_b, j);
+
+ if (dpo1->dpoi_type == DPO_ADJACENCY)
+ {
+ adj_index0 = dpo1->dpoi_index;
+
+ if (ADJ_INDEX_INVALID == adj_index0)
+ {
+ continue;
+ }
+
+ adj0 = adj_get (adj_index0);
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+
+ if (~0 == sw_if_index0)
+ {
+ continue;
+ }
+
+
+ if (is_add)
+ {
+ vnet_feature_enable_disable ("ip4-output",
+ "vxlan-gpe-transit-ioam",
+ sw_if_index0, is_add, 0
+ /* void *feature_config */
+ , 0 /* u32 n_feature_config_bytes */
+ );
+
+ vec_validate_init_empty (hm->bool_ref_by_sw_if_index,
+ sw_if_index0, ~0);
+ hm->bool_ref_by_sw_if_index[sw_if_index0] = 1;
+ }
+ else
+ {
+ hm->bool_ref_by_sw_if_index[sw_if_index0] = ~0;
+ }
+ }
+ }
+ }
+ }
+
+ if (is_ipv4)
+ {
+
+ uword *t = NULL;
+ vxlan_gpe_ioam_dest_tunnels_t *t1;
+ fib_prefix_t key4, *key4_copy;
+ hash_pair_t *hp;
+ memset (&key4, 0, sizeof (key4));
+ key4.fp_proto = FIB_PROTOCOL_IP4;
+ key4.fp_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32;
+ t = hash_get_mem (hm->dst_by_ip4, &key4);
+ if (is_add)
+ {
+ if (t)
+ {
+ return 0;
+ }
+ pool_get_aligned (hm->dst_tunnels, t1, CLIB_CACHE_LINE_BYTES);
+ memset (t1, 0, sizeof (*t1));
+ t1->fp_proto = FIB_PROTOCOL_IP4;
+ t1->dst_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32;
+ key4_copy = clib_mem_alloc (sizeof (*key4_copy));
+ clib_memcpy (key4_copy, &key4, sizeof (*key4_copy));
+ hash_set_mem (hm->dst_by_ip4, key4_copy, t1 - hm->dst_tunnels);
+ /*
+ * Attach to the FIB entry for the VxLAN-GPE destination
+ * and become its child. The dest route will invoke a callback
+ * when the fib entry changes, it can be used to
+ * re-program the output feature on the egress interface.
+ */
+
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {.ip4 = t1->dst_addr.ip4,}
+ };
+
+ t1->fib_entry_index =
+ fib_table_entry_special_add (outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
+ t1->sibling_index =
+ fib_entry_child_add (t1->fib_entry_index,
+ hm->fib_entry_type, t1 - hm->dst_tunnels);
+ t1->outer_fib_index = outer_fib_index;
+
+ }
+ else
+ {
+ if (!t)
+ {
+ return 0;
+ }
+ t1 = pool_elt_at_index (hm->dst_tunnels, t[0]);
+ hp = hash_get_pair (hm->dst_by_ip4, &key4);
+ key4_copy = (void *) (hp->key);
+ hash_unset_mem (hm->dst_by_ip4, &key4);
+ clib_mem_free (key4_copy);
+ pool_put (hm->dst_tunnels, t1);
+ }
+ }
+ else
+ {
+ // TBD for IPv6
+ }
+
+ return 0;
+}
+
+void
+vxlan_gpe_refresh_output_feature_on_all_dest (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_ioam_dest_tunnels_t *t;
+ u32 i;
+ if (pool_elts (hm->dst_tunnels) == 0)
+ return;
+ vxlan_gpe_clear_output_feature_on_all_intfs (hm->vlib_main);
+ i = vec_len (hm->bool_ref_by_sw_if_index);
+ vec_free (hm->bool_ref_by_sw_if_index);
+ vec_validate_init_empty (hm->bool_ref_by_sw_if_index, i, ~0);
+ pool_foreach (t, hm->dst_tunnels, (
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest
+ (hm->vlib_main,
+ t->dst_addr,
+ t->outer_fib_index,
+ (t->fp_proto == FIB_PROTOCOL_IP4), 1
+ /* is_add */
+ );
+ }
+ ));
+ return;
+}
+
+void
+vxlan_gpe_clear_output_feature_on_select_intfs (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 sw_if_index0 = 0;
+ for (sw_if_index0 = 0;
+ sw_if_index0 < vec_len (hm->bool_ref_by_sw_if_index); sw_if_index0++)
+ {
+ if (hm->bool_ref_by_sw_if_index[sw_if_index0] == 0xFF)
+ {
+ vxlan_gpe_set_clear_output_feature_on_intf
+ (hm->vlib_main, sw_if_index0, 0);
+ }
+ }
+
+ return;
+}
+
+static clib_error_t *
+vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
+ vm,
+ unformat_input_t
+ * input, vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ ip46_address_t local, remote;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ u8 disable = 0;
+ clib_error_t *rv = 0;
+ vxlan4_gpe_tunnel_key_t key4;
+ vxlan6_gpe_tunnel_key_t key6;
+ uword *p;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "local %U", unformat_ip4_address, &local.ip4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else
+ if (unformat (input, "remote %U", unformat_ip4_address, &remote.ip4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (input, "local %U", unformat_ip6_address, &local.ip6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else
+ if (unformat (input, "remote %U", unformat_ip6_address, &remote.ip6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat (input, "disable"))
+ disable = 1;
+ else
+ break;
+ }
+
+ if (local_set == 0)
+ return clib_error_return (0, "tunnel local address not specified");
+ if (remote_set == 0)
+ return clib_error_return (0, "tunnel remote address not specified");
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ if ((ipv4_set
+ && memcmp (&local.ip4, &remote.ip4,
+ sizeof (local.ip4)) == 0) || (ipv6_set
+ &&
+ memcmp
+ (&local.ip6,
+ &remote.ip6,
+ sizeof (local.ip6)) == 0))
+ return clib_error_return (0, "src and dst addresses are identical");
+ if (vni_set == 0)
+ return clib_error_return (0, "vni not specified");
+ if (!ipv6_set)
+ {
+ key4.local = local.ip4.as_u32;
+ key4.remote = remote.ip4.as_u32;
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ key6.local.as_u64[0] = local.ip6.as_u64[0];
+ key6.local.as_u64[1] = local.ip6.as_u64[1];
+ key6.remote.as_u64[0] = remote.ip6.as_u64[0];
+ key6.remote.as_u64[1] = remote.ip6.as_u64[1];
+ key6.vni = clib_host_to_net_u32 (vni << 8);
+ p = hash_get_mem (gm->vxlan6_gpe_tunnel_by_key, &key6);
+ }
+
+ if (!p)
+ return clib_error_return (0, "VxLAN Tunnel not found");
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+ if (!disable)
+ {
+ rv =
+ vxlan_gpe_ioam_set (t, hm->has_trace_option,
+ hm->has_pot_option, hm->has_ppc_option, ipv6_set);
+ }
+ else
+ {
+ rv = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0);
+ }
+ return rv;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_rewrite_cmd, static) = {
+ .path = "set vxlan-gpe-ioam",
+ .short_help = "set vxlan-gpe-ioam vxlan <src-ip> <dst_ip> <vnid> [disable]",
+ .function = vxlan_gpe_set_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+
+
+clib_error_t *
+vxlan_gpe_ioam_enable (int has_trace_option,
+ int has_pot_option, int has_ppc_option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->has_trace_option = has_trace_option;
+ hm->has_pot_option = has_pot_option;
+ hm->has_ppc_option = has_ppc_option;
+ if (hm->has_trace_option)
+ {
+ vxlan_gpe_trace_profile_setup ();
+ }
+
+ return 0;
+}
+
+clib_error_t *
+vxlan_gpe_ioam_disable (int
+ has_trace_option,
+ int has_pot_option, int has_ppc_option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->has_trace_option = has_trace_option;
+ hm->has_pot_option = has_pot_option;
+ hm->has_ppc_option = has_ppc_option;
+ if (!hm->has_trace_option)
+ {
+ vxlan_gpe_trace_profile_cleanup ();
+ }
+
+ return 0;
+}
+
+void
+vxlan_gpe_set_next_override (uword next)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->decap_v4_next_override = next;
+ return;
+}
+
+static clib_error_t *
+vxlan_gpe_set_ioam_flags_command_fn (vlib_main_t * vm,
+ unformat_input_t
+ * input, vlib_cli_command_t * cmd)
+{
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_ppc_option = 0;
+ clib_error_t *rv = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "ppc encap"))
+ has_ppc_option = PPC_ENCAP;
+ else if (unformat (input, "ppc decap"))
+ has_ppc_option = PPC_DECAP;
+ else if (unformat (input, "ppc none"))
+ has_ppc_option = PPC_NONE;
+ else
+ break;
+ }
+
+
+ rv =
+ vxlan_gpe_ioam_enable (has_trace_option, has_pot_option, has_ppc_option);
+ return rv;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_flags_cmd, static) =
+{
+.path = "set vxlan-gpe-ioam rewrite",
+.short_help = "set vxlan-gpe-ioam [trace] [pot] [ppc <encap|decap>]",
+.function = vxlan_gpe_set_ioam_flags_command_fn,};
+/* *INDENT-ON* */
+
+
+int vxlan_gpe_ioam_disable_for_dest
+ (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index,
+ u8 ipv4_set)
+{
+ vxlan_gpe_ioam_dest_tunnels_t *t;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main,
+ dst_addr, outer_fib_index, ipv4_set,
+ 0);
+ if (pool_elts (hm->dst_tunnels) == 0)
+ {
+ vxlan_gpe_clear_output_feature_on_select_intfs ();
+ return 0;
+ }
+
+ pool_foreach (t, hm->dst_tunnels, (
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest
+ (hm->vlib_main,
+ t->dst_addr,
+ t->outer_fib_index,
+ (t->fp_proto ==
+ FIB_PROTOCOL_IP4), 1 /* is_add */ );
+ }
+ ));
+ vxlan_gpe_clear_output_feature_on_select_intfs ();
+ return (0);
+
+}
+
+static clib_error_t *vxlan_gpe_set_ioam_transit_rewrite_command_fn
+ (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+ u8 dst_addr_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u8 disable = 0;
+ clib_error_t *rv = 0;
+ u32 outer_fib_index = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dst-ip %U", unformat_ip4_address, &dst_addr.ip4))
+ {
+ dst_addr_set = 1;
+ ipv4_set = 1;
+ }
+ else
+ if (unformat
+ (input, "dst-ip %U", unformat_ip6_address, &dst_addr.ip6))
+ {
+ dst_addr_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (input, "outer-fib-index %d", &outer_fib_index))
+ {
+ }
+
+ else if (unformat (input, "disable"))
+ disable = 1;
+ else
+ break;
+ }
+
+ if (dst_addr_set == 0)
+ return clib_error_return (0, "tunnel destination address not specified");
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ if (!disable)
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main,
+ dst_addr, outer_fib_index,
+ ipv4_set, 1);
+ }
+ else
+ {
+ vxlan_gpe_ioam_disable_for_dest
+ (vm, dst_addr, outer_fib_index, ipv4_set);
+ }
+ return rv;
+}
+
+ /* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_transit_rewrite_cmd, static) = {
+ .path = "set vxlan-gpe-ioam-transit",
+ .short_help = "set vxlan-gpe-ioam-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
+ .function = vxlan_gpe_set_ioam_transit_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
+ (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return (vxlan_gpe_ioam_disable (0, 0, 0));
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_clear_ioam_flags_cmd, static) =
+{
+.path = "clear vxlan-gpe-ioam rewrite",
+.short_help = "clear vxlan-gpe-ioam rewrite",
+.function = clear_vxlan_gpe_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+vxlan_gpe_ioam_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+ vxlan_gpe_refresh_output_feature_on_all_dest ();
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+vxlan_gpe_ioam_fib_node_get (fib_node_index_t index)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ return (&hm->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_gpe_ioam_last_lock_gone (fib_node_t * node)
+{
+ ASSERT (0);
+}
+
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_gpe_ioam_vft = {
+ .fnv_get = vxlan_gpe_ioam_fib_node_get,
+ .fnv_last_lock = vxlan_gpe_ioam_last_lock_gone,
+ .fnv_back_walk = vxlan_gpe_ioam_back_walk,
+};
+
+void
+vxlan_gpe_ioam_interface_init (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->fib_entry_type = fib_node_register_new_type (&vxlan_gpe_ioam_vft);
+ return;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
new file mode 100644
index 00000000..0711b87a
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_h__
+#define __included_vxlan_gpe_ioam_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <vnet/ip/ip.h>
+
+
+typedef struct vxlan_gpe_sw_interface_
+{
+ u32 sw_if_index;
+} vxlan_gpe_ioam_sw_interface_t;
+
+typedef struct vxlan_gpe_dest_tunnels_
+{
+ ip46_address_t dst_addr;
+ u32 fp_proto;
+ u32 sibling_index;
+ fib_node_index_t fib_entry_index;
+ u32 outer_fib_index;
+} vxlan_gpe_ioam_dest_tunnels_t;
+
+typedef struct vxlan_gpe_ioam_main_
+{
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+
+ /* Trace option */
+ u8 has_trace_option;
+
+ /* Pot option */
+ u8 has_pot_option;
+
+#define PPC_NONE 0
+#define PPC_ENCAP 1
+#define PPC_DECAP 2
+ u8 has_ppc_option;
+
+#define TSP_SECONDS 0
+#define TSP_MILLISECONDS 1
+#define TSP_MICROSECONDS 2
+#define TSP_NANOSECONDS 3
+
+ /* Array of function pointers to ADD and POP VxLAN-GPE iOAM option handling routines */
+ u8 options_size[256];
+ int (*add_options[256]) (u8 * rewrite_string, u8 * rewrite_size);
+ int (*pop_options[256]) (ip4_header_t * ip, vxlan_gpe_ioam_option_t * opt);
+
+ /* Array of function pointers to iOAM option handling routines */
+ int (*options[256]) (vlib_buffer_t * b, vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj);
+ u8 *(*trace[256]) (u8 * s, vxlan_gpe_ioam_option_t * opt);
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* Override to export for iOAM */
+ uword decap_v4_next_override;
+ uword decap_v6_next_override;
+
+ /* sequence of node graph for encap */
+ uword encap_v4_next_node;
+ uword encap_v6_next_node;
+
+ /* Software interfaces. */
+ vxlan_gpe_ioam_sw_interface_t *sw_interfaces;
+
+ /* hash ip4/ip6 -> list of destinations for doing transit iOAM operation */
+ vxlan_gpe_ioam_dest_tunnels_t *dst_tunnels;
+ uword *dst_by_ip4;
+ uword *dst_by_ip6;
+
+ /** per sw_if_index, to maintain bitmap */
+ u8 *bool_ref_by_sw_if_index;
+ fib_node_type_t fib_entry_type;
+
+ /** State convenience vlib_main_t */
+ vlib_main_t *vlib_main;
+ /** State convenience vnet_main_t */
+ vnet_main_t *vnet_main;
+
+
+} vxlan_gpe_ioam_main_t;
+extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
+
+/*
+ * Primary h-b-h handler trace support
+ */
+typedef struct
+{
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ioam_trace_t;
+
+
+extern vlib_node_registration_t vxlan_gpe_encap_ioam_v4_node;
+extern vlib_node_registration_t vxlan_gpe_decap_ioam_v4_node;
+extern vlib_node_registration_t vxlan_gpe_transit_ioam_v4_node;
+
+clib_error_t *vxlan_gpe_ioam_enable (int has_trace_option, int has_pot_option,
+ int has_ppc_option);
+
+clib_error_t *vxlan_gpe_ioam_disable (int has_trace_option,
+ int has_pot_option, int has_ppc_option);
+
+clib_error_t *vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t,
+ int has_trace_option,
+ int has_pot_option,
+ int has_ppc_option, u8 ipv6_set);
+clib_error_t *vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set);
+
+int vxlan_gpe_ioam_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 *
+ rewrite_string,
+ u8 *
+ rewrite_size));
+
+int vxlan_gpe_add_unregister_option (u8 option);
+
+int vxlan_gpe_ioam_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t *
+ opt, u8 is_ipv4, u8 use_adj),
+ u8 * trace (u8 * s,
+ vxlan_gpe_ioam_option_t *
+ opt));
+int vxlan_gpe_ioam_unregister_option (u8 option);
+
+int vxlan_gpe_trace_profile_setup (void);
+
+int vxlan_gpe_trace_profile_cleanup (void);
+extern void vxlan_gpe_ioam_interface_init (void);
+int
+vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm,
+ ip46_address_t dst_addr,
+ u32 outer_fib_index,
+ u8 is_ipv4, u8 is_add);
+int vxlan_gpe_ioam_disable_for_dest
+ (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index,
+ u8 ipv4_set);
+
+typedef enum
+{
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ VXLAN_GPE_DECAP_IOAM_V4_N_NEXT
+} vxlan_gpe_decap_ioam_v4_next_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
new file mode 100644
index 00000000..a7ef859e
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_packet_h__
+#define __included_vxlan_gpe_ioam_packet_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip.h>
+
+
+
+#define VXLAN_GPE_OPTION_TYPE_IOAM_TRACE 59
+#define VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60
+
+/**
+ * @brief VXLAN GPE Extension (iOAM) Header definition
+ */
+typedef struct
+{
+ u8 type;
+ u8 length;
+ /** Reserved */
+ u8 reserved;
+ /** see vxlan_gpe_protocol_t */
+ u8 protocol;
+} vxlan_gpe_ioam_hdr_t;
+
+/*
+ * @brief VxLAN GPE iOAM Option definition
+ */
+typedef struct
+{
+ /* Option Type */
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} vxlan_gpe_ioam_option_t;
+
+
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
new file mode 100644
index 00000000..f3d03b67
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <ioam/lib-trace/trace_util.h>
+#include <ioam/lib-trace/trace_config.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* Timestamp precision multipliers for seconds, milliseconds, microseconds
+ * and nanoseconds respectively.
+ */
+static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 };
+
+typedef union
+{
+ u64 as_u64;
+ u32 as_u32[2];
+} time_u64_t;
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ vxlan_gpe_ioam_option_t hdr;
+ u8 ioam_trace_type;
+ u8 data_list_elts_left;
+ u32 elts[0]; /* Variable type. So keep it generic */
+}) vxlan_gpe_ioam_trace_option_t;
+/* *INDENT-ON* */
+
+
+#define foreach_vxlan_gpe_ioam_trace_stats \
+ _(SUCCESS, "Pkts updated with TRACE records") \
+ _(FAILED, "Errors in TRACE due to lack of TRACE records")
+
+static char *vxlan_gpe_ioam_trace_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_ioam_trace_stats
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_IOAM_TRACE_##sym,
+ foreach_vxlan_gpe_ioam_trace_stats
+#undef _
+ VXLAN_GPE_IOAM_TRACE_N_STATS,
+} vxlan_gpe_ioam_trace_stats_t;
+
+
+typedef struct
+{
+ /* stats */
+ u64 counters[ARRAY_LEN (vxlan_gpe_ioam_trace_stats_strings)];
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} vxlan_gpe_ioam_trace_main_t;
+
+vxlan_gpe_ioam_trace_main_t vxlan_gpe_ioam_trace_main;
+
+int
+vxlan_gpe_ioam_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * rewrite_size))
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Already registered */
+ if (hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = rewrite_options;
+ hm->options_size[option] = size;
+
+ return (0);
+}
+
+int
+vxlan_gpe_add_unregister_option (u8 option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Not registered */
+ if (!hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = NULL;
+ hm->options_size[option] = 0;
+ return (0);
+}
+
+
+int
+vxlan_gpe_ioam_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj),
+ u8 * trace (u8 * s,
+ vxlan_gpe_ioam_option_t * opt))
+{
+ vxlan_gpe_ioam_main_t *im = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (im->options));
+
+ /* Already registered */
+ if (im->options[option])
+ return (-1);
+
+ im->options[option] = options;
+ im->trace[option] = trace;
+
+ return (0);
+}
+
+int
+vxlan_gpe_ioam_unregister_option (u8 option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Not registered */
+ if (!hm->options[option])
+ return (-1);
+
+ hm->options[option] = NULL;
+ hm->trace[option] = NULL;
+
+ return (0);
+}
+
+
+always_inline void
+vxlan_gpe_ioam_trace_stats_increment_counter (u32 counter_index,
+ u64 increment)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 *
+format_ioam_data_list_element (u8 * s, va_list * args)
+{
+ u32 *elt = va_arg (*args, u32 *);
+ u8 *trace_type_p = va_arg (*args, u8 *);
+ u8 trace_type = *trace_type_p;
+
+
+ if (trace_type & BIT_TTL_NODEID)
+ {
+ u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ttl 0x%x node id 0x%x ",
+ ttl_node_id_host_byte_order >> 24,
+ ttl_node_id_host_byte_order & 0x00FFFFFF);
+
+ elt++;
+ }
+
+ if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
+ {
+ u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ingress 0x%x egress 0x%x ",
+ ingress_host_byte_order >> 16,
+ ingress_host_byte_order & 0xFFFF);
+ elt++;
+ }
+
+ if (trace_type & BIT_TIMESTAMP)
+ {
+ u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
+ elt++;
+ }
+
+ if (trace_type & BIT_APPDATA)
+ {
+ u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "app 0x%x ", appdata_in_host_byte_order);
+ elt++;
+ }
+
+ return s;
+}
+
+
+
+int
+vxlan_gpe_ioam_trace_rewrite_handler (u8 * rewrite_string, u8 * rewrite_size)
+{
+ vxlan_gpe_ioam_trace_option_t *trace_option = NULL;
+ u8 trace_data_size = 0;
+ u8 trace_option_elts = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+ if (PREDICT_FALSE (!rewrite_string))
+ return -1;
+
+ trace_option_elts = profile->num_elts;
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ trace_option = (vxlan_gpe_ioam_trace_option_t *) rewrite_string;
+ trace_option->hdr.type = VXLAN_GPE_OPTION_TYPE_IOAM_TRACE;
+ trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ +
+ trace_option_elts * trace_data_size;
+ trace_option->ioam_trace_type = profile->trace_type & TRACE_TYPE_MASK;
+ trace_option->data_list_elts_left = trace_option_elts;
+ *rewrite_size =
+ sizeof (vxlan_gpe_ioam_trace_option_t) +
+ (trace_option_elts * trace_data_size);
+
+ return 0;
+}
+
+
+int
+vxlan_gpe_ioam_trace_data_list_handler (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj)
+{
+ u8 elt_index = 0;
+ vxlan_gpe_ioam_trace_option_t *trace =
+ (vxlan_gpe_ioam_trace_option_t *) opt;
+ time_u64_t time_u64;
+ u32 *elt;
+ int rv = 0;
+ trace_profile *profile = NULL;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+
+ time_u64.as_u64 = 0;
+
+ if (PREDICT_TRUE (trace->data_list_elts_left))
+ {
+ trace->data_list_elts_left--;
+ /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
+ * to skip to this node's location.
+ */
+ elt_index =
+ trace->data_list_elts_left *
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[elt_index];
+ if (is_ipv4)
+ {
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ ip4_header_t *ip0 = vlib_buffer_get_current (b);
+ /* The transit case is the only case where the TTL decrement happens
+ * before iOAM processing. For now, use the use_adj flag as an overload.
+ * We can probably use a separate flag instead of overloading the use_adj flag.
+ */
+ *elt = clib_host_to_net_u32 (((ip0->ttl - 1 + use_adj) << 24) |
+ profile->node_id);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ u16 tx_if = 0;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+
+ if (use_adj)
+ {
+ ip_adjacency_t *adj = adj_get (adj_index);
+ tx_if = adj->rewrite_header.sw_if_index & 0xFFFF;
+ }
+
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ tx_if;
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+ }
+ else
+ {
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ ip6_header_t *ip0 = vlib_buffer_get_current (b);
+ *elt = clib_host_to_net_u32 ((ip0->hop_limit << 24) |
+ profile->node_id);
+ elt++;
+ }
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ u16 tx_if = 0;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+
+ if (use_adj)
+ {
+ ip_adjacency_t *adj = adj_get (adj_index);
+ tx_if = adj->rewrite_header.sw_if_index & 0xFFFF;
+ }
+
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ tx_if;
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+ }
+
+ if (trace->ioam_trace_type & BIT_TIMESTAMP)
+ {
+ /* Send least significant 32 bits */
+ f64 time_f64 =
+ (f64) (((f64) hm->unix_time_0) +
+ (vlib_time_now (hm->vlib_main) - hm->vlib_time_0));
+
+ time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp];
+ *elt = clib_host_to_net_u32 (time_u64.as_u32[0]);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_APPDATA)
+ {
+ /* $$$ set elt0->app_data */
+ *elt = clib_host_to_net_u32 (profile->app_data);
+ elt++;
+ }
+ vxlan_gpe_ioam_trace_stats_increment_counter
+ (VXLAN_GPE_IOAM_TRACE_SUCCESS, 1);
+ }
+ else
+ {
+ vxlan_gpe_ioam_trace_stats_increment_counter
+ (VXLAN_GPE_IOAM_TRACE_FAILED, 1);
+ }
+ return (rv);
+}
+
+u8 *
+vxlan_gpe_ioam_trace_data_list_trace_handler (u8 * s,
+ vxlan_gpe_ioam_option_t * opt)
+{
+ vxlan_gpe_ioam_trace_option_t *trace;
+ u8 trace_data_size_in_words = 0;
+ u32 *elt;
+ int elt_index = 0;
+
+ trace = (vxlan_gpe_ioam_trace_option_t *) opt;
+ s =
+ format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type,
+ trace->data_list_elts_left);
+ trace_data_size_in_words =
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[0];
+ while ((u8 *) elt < ((u8 *) (&trace->elts[0]) + trace->hdr.length - 2
+ /* -2 accounts for ioam_trace_type,elts_left */ ))
+ {
+ s = format (s, " [%d] %U\n", elt_index,
+ format_ioam_data_list_element,
+ elt, &trace->ioam_trace_type);
+ elt_index++;
+ elt += trace_data_size_in_words;
+ }
+ return (s);
+}
+
+
+static clib_error_t *
+vxlan_gpe_show_ioam_trace_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for (i = 0; i < VXLAN_GPE_IOAM_TRACE_N_STATS; i++)
+ {
+ s = format (s, " %s - %lu\n", vxlan_gpe_ioam_trace_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_show_ioam_trace_cmd, static) = {
+ .path = "show ioam vxlan-gpe trace",
+ .short_help = "iOAM trace statistics",
+ .function = vxlan_gpe_show_ioam_trace_cmd_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+vxlan_gpe_ioam_trace_init (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, vxlan_gpe_init)))
+ return (error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ memset (hm->counters, 0, sizeof (hm->counters));
+
+ if (vxlan_gpe_ioam_register_option
+ (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE,
+ vxlan_gpe_ioam_trace_data_list_handler,
+ vxlan_gpe_ioam_trace_data_list_trace_handler) < 0)
+ return (clib_error_create
+ ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE failed"));
+
+
+ if (vxlan_gpe_ioam_add_register_option
+ (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE,
+ sizeof (vxlan_gpe_ioam_trace_option_t),
+ vxlan_gpe_ioam_trace_rewrite_handler) < 0)
+ return (clib_error_create
+ ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE for rewrite failed"));
+
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_ioam_trace_init);
+
+int
+vxlan_gpe_trace_profile_cleanup (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = 0;
+
+ return 0;
+
+}
+
+static int
+vxlan_gpe_ioam_trace_get_sizeof_handler (u32 * result)
+{
+ u16 size = 0;
+ u8 trace_data_size = 0;
+ trace_profile *profile = NULL;
+
+ *result = 0;
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ if (PREDICT_FALSE (trace_data_size == 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ size +=
+ sizeof (vxlan_gpe_ioam_trace_option_t) +
+ profile->num_elts * trace_data_size;
+ *result = size;
+
+ return 0;
+}
+
+
+int
+vxlan_gpe_trace_profile_setup (void)
+{
+ u32 trace_size = 0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+
+ if (vxlan_gpe_ioam_trace_get_sizeof_handler (&trace_size) < 0)
+ return (-1);
+
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = trace_size;
+
+ return (0);
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
new file mode 100644
index 00000000..c0ad8d9d
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_util_h__
+#define __included_vxlan_gpe_ioam_util_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip.h>
+
+
+typedef struct
+{
+ u32 tunnel_index;
+ ioam_trace_t fmt_trace;
+} vxlan_gpe_ioam_v4_trace_t;
+
+
+static u8 *
+format_vxlan_gpe_ioam_v4_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_ioam_v4_trace_t *t1 = va_arg (*args, vxlan_gpe_ioam_v4_trace_t *);
+ ioam_trace_t *t = &(t1->fmt_trace);
+ vxlan_gpe_ioam_option_t *fmt_trace0;
+ vxlan_gpe_ioam_option_t *opt0, *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ u8 type0;
+
+ fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data;
+
+ s = format (s, "VXLAN-GPE-IOAM: next_index %d len %d traced %d",
+ t->next_index, fmt_trace0->length, t->trace_len);
+
+ opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_option_t));
+ break;
+ }
+ }
+
+ s = format (s, "VXLAN-GPE-IOAM: tunnel %d", t1->tunnel_index);
+ return s;
+}
+
+
+always_inline void
+vxlan_gpe_encap_decap_ioam_v4_one_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ u32 * next0, u32 drop_node_val,
+ u8 use_adj)
+{
+ ip4_header_t *ip0;
+ udp_header_t *udp_hdr0;
+ vxlan_gpe_header_t *gpe_hdr0;
+ vxlan_gpe_ioam_hdr_t *gpe_ioam0;
+ vxlan_gpe_ioam_option_t *opt0;
+ vxlan_gpe_ioam_option_t *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ /* Populate the iOAM header */
+ ip0 = vlib_buffer_get_current (b0);
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+ opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length);
+
+ /*
+ * Basic validity checks
+ */
+ if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length))
+ {
+ *next0 = drop_node_val;
+ return;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ u8 type0;
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->options[type0])
+ {
+ if ((*hm->options[type0]) (b0, opt0, 1 /* is_ipv4 */ ,
+ use_adj) < 0)
+ {
+ *next0 = drop_node_val;
+ return;
+ }
+ }
+ break;
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_hdr_t));
+ }
+
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = gpe_ioam0->length;
+ t->fmt_trace.next_index = *next0;
+ /* Capture the ioam option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->fmt_trace.
+ option_data) ? trace_len : ARRAY_LEN (t->fmt_trace.
+ option_data);
+ t->fmt_trace.trace_len = trace_len;
+ clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len);
+ }
+ return;
+}
+
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h
new file mode 100644
index 00000000..cc0a10a3
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_msg_enum_h
+#define included_vxlan_gpe_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vxlan_gpe_msg_enum_h */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c
new file mode 100644
index 00000000..80e65644
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_test.c - test harness for vxlan_gpe plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+#define __plugin_msg_base vxlan_gpe_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_api_version
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} vxlan_gpe_test_main_t;
+
+vxlan_gpe_test_main_t vxlan_gpe_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(vxlan_gpe_ioam_enable_reply) \
+_(vxlan_gpe_ioam_disable_reply) \
+_(vxlan_gpe_ioam_vni_enable_reply) \
+_(vxlan_gpe_ioam_vni_disable_reply) \
+_(vxlan_gpe_ioam_transit_enable_reply) \
+_(vxlan_gpe_ioam_transit_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = vxlan_gpe_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(VXLAN_GPE_IOAM_ENABLE_REPLY, vxlan_gpe_ioam_enable_reply) \
+_(VXLAN_GPE_IOAM_DISABLE_REPLY, vxlan_gpe_ioam_disable_reply) \
+_(VXLAN_GPE_IOAM_VNI_ENABLE_REPLY, vxlan_gpe_ioam_vni_enable_reply) \
+_(VXLAN_GPE_IOAM_VNI_DISABLE_REPLY, vxlan_gpe_ioam_vni_disable_reply) \
+_(VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY, vxlan_gpe_ioam_transit_enable_reply) \
+_(VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY, vxlan_gpe_ioam_transit_disable_reply) \
+
+static int
+api_vxlan_gpe_ioam_enable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_vxlan_gpe_ioam_enable_t *mp;
+ u32 id = 0;
+ int has_trace_option = 0;
+ int has_pow_option = 0;
+ int has_ppc_option = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pow"))
+ has_pow_option = 1;
+ else if (unformat (input, "ppc encap"))
+ has_ppc_option = PPC_ENCAP;
+ else if (unformat (input, "ppc decap"))
+ has_ppc_option = PPC_DECAP;
+ else if (unformat (input, "ppc none"))
+ has_ppc_option = PPC_NONE;
+ else
+ break;
+ }
+ M (VXLAN_GPE_IOAM_ENABLE, mp);
+ mp->id = htons (id);
+ mp->trace_ppc = has_ppc_option;
+ mp->pow_enable = has_pow_option;
+ mp->trace_enable = has_trace_option;
+
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_vxlan_gpe_ioam_disable (vat_main_t * vam)
+{
+ vl_api_vxlan_gpe_ioam_disable_t *mp;
+ int ret;
+
+ M (VXLAN_GPE_IOAM_DISABLE, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_vxlan_gpe_ioam_vni_enable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_vni_enable_t *mp;
+ ip4_address_t local4, remote4;
+ ip6_address_t local6, remote6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ int ret;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("tunnel local address not specified\n");
+ return -99;
+ }
+ if (remote_set == 0)
+ {
+ errmsg ("tunnel remote address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (vni_set == 0)
+ {
+ errmsg ("vni not specified\n");
+ return -99;
+ }
+
+ M (VXLAN_GPE_IOAM_VNI_ENABLE, mp);
+
+
+ if (ipv6_set)
+ {
+ clib_memcpy (&mp->local, &local6, sizeof (local6));
+ clib_memcpy (&mp->remote, &remote6, sizeof (remote6));
+ }
+ else
+ {
+ clib_memcpy (&mp->local, &local4, sizeof (local4));
+ clib_memcpy (&mp->remote, &remote4, sizeof (remote4));
+ }
+
+ mp->vni = ntohl (vni);
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_vxlan_gpe_ioam_vni_disable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_vni_disable_t *mp;
+ ip4_address_t local4, remote4;
+ ip6_address_t local6, remote6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ int ret;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("tunnel local address not specified\n");
+ return -99;
+ }
+ if (remote_set == 0)
+ {
+ errmsg ("tunnel remote address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (vni_set == 0)
+ {
+ errmsg ("vni not specified\n");
+ return -99;
+ }
+
+ M (VXLAN_GPE_IOAM_VNI_DISABLE, mp);
+
+
+ if (ipv6_set)
+ {
+ clib_memcpy (&mp->local, &local6, sizeof (local6));
+ clib_memcpy (&mp->remote, &remote6, sizeof (remote6));
+ }
+ else
+ {
+ clib_memcpy (&mp->local, &local4, sizeof (local4));
+ clib_memcpy (&mp->remote, &remote4, sizeof (remote4));
+ }
+
+ mp->vni = ntohl (vni);
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_vxlan_gpe_ioam_transit_enable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_transit_enable_t *mp;
+ ip4_address_t local4;
+ ip6_address_t local6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u32 outer_fib_index = 0;
+ int ret;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst-ip %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("destination address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+
+ M (VXLAN_GPE_IOAM_TRANSIT_ENABLE, mp);
+
+
+ if (ipv6_set)
+ {
+ errmsg ("IPv6 currently unsupported");
+ return -1;
+ }
+ else
+ {
+ clib_memcpy (&mp->dst_addr, &local4, sizeof (local4));
+ }
+
+ mp->outer_fib_index = htonl (outer_fib_index);
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_vxlan_gpe_ioam_transit_disable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_transit_disable_t *mp;
+ ip4_address_t local4;
+ ip6_address_t local6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u32 outer_fib_index = 0;
+ int ret;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst-ip %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("destination address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+
+ M (VXLAN_GPE_IOAM_TRANSIT_DISABLE, mp);
+
+
+ if (ipv6_set)
+ {
+ return -1;
+ }
+ else
+ {
+ clib_memcpy (&mp->dst_addr, &local4, sizeof (local4));
+ }
+
+ mp->outer_fib_index = htonl (outer_fib_index);
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(vxlan_gpe_ioam_enable, ""\
+ "[trace] [pow] [ppc <encap|ppc decap>]") \
+_(vxlan_gpe_ioam_disable, "") \
+_(vxlan_gpe_ioam_vni_enable, ""\
+ "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \
+_(vxlan_gpe_ioam_vni_disable, ""\
+ "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \
+_(vxlan_gpe_ioam_transit_enable, ""\
+ "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \
+_(vxlan_gpe_ioam_transit_disable, ""\
+ "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \
+
+
+static void
+vxlan_gpe_vat_api_hookup (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vxlan_gpe_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping.api b/src/plugins/ioam/udp-ping/udp_ping.api
new file mode 100644
index 00000000..87945816
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping.api
@@ -0,0 +1,73 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief UDP-Probe Add/Delete request
+ @param src_ip_address - Source ipv4/v6 address for the udp-ping flow
+ @param dst_ip_address - Destination ipv4/v6 address for the udp-ping flow
+ @param start_src_port - Starting source port of port range for udp-ping
+ @param end_src_port - End source port of port range for udp-ping
+ @param start_dst_port - Starting destination port of port range for udp-ping
+ @param end_dst_port - End destination port of port range for udp-ping
+ @param interval - Time interval in seconds at which udp-probe need to be sent
+ @param is_ipv4 - To determine whether IPv4 or IPv6 address is used
+ @param dis - TRUE is delete, FALSE if Add
+*/
+define udp_ping_add_del_req {
+ u32 client_index;
+ u32 context;
+ u8 src_ip_address[16];
+ u8 dst_ip_address[16];
+ u16 start_src_port;
+ u16 end_src_port;
+ u16 start_dst_port;
+ u16 end_dst_port;
+ u16 interval;
+ u8 is_ipv4;
+ u8 dis;
+ u8 fault_det;
+ u8 reserve[3];
+};
+
+/** \brief Udp-probe add/del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define udp_ping_add_del_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Udp-probe export add/del request
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param enable - If TRUE then enable export else disable
+*/
+define udp_ping_export_req {
+ u32 client_index;
+ u32 context;
+ u32 enable;
+};
+
+/** \brief Udp-probe export add/del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define udp_ping_export_reply {
+ u32 context;
+ i32 retval;
+};
+
diff --git a/src/plugins/ioam/udp-ping/udp_ping.h b/src/plugins/ioam/udp-ping/udp_ping.h
new file mode 100644
index 00000000..26c42019
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_H_
+
+#include <ioam/analyse/ioam_analyse.h>
+
+#define MAX_PING_RETRIES 5
+
+#define EVENT_SIG_RECHECK 2
+
+/** @brief udp-ping session data.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** UDP ping packet */
+ u8 *ping_rewrite;
+
+ /** Ping packet rewrite string len. */
+ u16 rewrite_len;
+
+ /** Number of times ping response was dropped.
+ * If retry > MAX_PING_RETRIES then declare connectivity as down.
+ */
+ u16 retry;
+
+ u16 reserve[2];
+
+ /** Analysed data. */
+ ioam_analyser_data_t analyse_data;
+
+ /** This is used by ioam e2e for identifying flow and add seq number. */
+ u32 flow_ctx;
+
+ /** No of packets sent for this flow. */
+ u32 pak_sent;
+} udp_ping_flow_data;
+
+/** @brief udp-ping flow data.
+ @note cache aligned.
+*/
+typedef struct
+{
+ /** Time at which next udp-ping probe has to be sent out. */
+ f64 next_send_time;
+
+ /** Interval for which ping packet to be sent. */
+ u16 interval;
+
+ u16 reserve[3];
+
+ /** Defines start port of the src port range. */
+ u16 start_src_port;
+
+ /** Defines end port of the src port range. */
+ u16 end_src_port;
+
+ /** Defines start port of the dest port range. */
+ u16 start_dst_port;
+
+ /** Defines end port of the dest port range. */
+ u16 end_dst_port;
+
+ /** Ping statistics. */
+ udp_ping_flow_data *stats;
+
+} udp_ping_flow;
+
+/** @brief udp-ping data.
+*/
+typedef struct
+{
+ /** Local source IPv4/6 address to be used. */
+ ip46_address_t src;
+
+ /** Remote destination IPv4/6 address to be used. */
+ ip46_address_t dst;
+
+ /** Per flow data. */
+ udp_ping_flow udp_data;
+
+ /** To enable fault detection/isolation in network. */
+ u8 fault_det;
+} ip46_udp_ping_flow;
+
+/** @brief udp-ping main data-structure.
+*/
+typedef struct
+{
+ /** Vector od udp-ping data */
+ ip46_udp_ping_flow *ip46_flow;
+
+ /** Stores the time interval at which process node has to wake up. */
+ u64 timer_interval;
+
+ /** Pointer to VLib main for the node - ipfix-collector. */
+ vlib_main_t *vlib_main;
+
+ /** Pointer to vnet main for convenience. */
+ vnet_main_t *vnet_main;
+
+ /** API message ID base */
+ u16 msg_id_base;
+} udp_ping_main_t;
+
+extern udp_ping_main_t udp_ping_main;
+
+void
+ip46_udp_ping_set_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ u16 interval, u8 fault_det, u8 is_disable);
+
+clib_error_t *udp_ping_flow_create (u8 del);
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_all_api_h.h b/src/plugins/ioam/udp-ping/udp_ping_all_api_h.h
new file mode 100644
index 00000000..1ed2e10b
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_all_api_h.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/udp-ping/udp_ping.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_api.c b/src/plugins/ioam/udp-ping/udp_ping_api.c
new file mode 100644
index 00000000..75938731
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_api.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * udp_ping_api.c - UDP Ping related APIs to create
+ * and maintain ping flows
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <ioam/udp-ping/udp_ping.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/udp-ping/udp_ping_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_api_version
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* List of message types that this module understands */
+#define foreach_udp_ping_api_msg \
+ _(UDP_PING_ADD_DEL_REQ, udp_ping_add_del_req) \
+ _(UDP_PING_EXPORT_REQ, udp_ping_export_req) \
+
+static void vl_api_udp_ping_add_del_req_t_handler
+ (vl_api_udp_ping_add_del_req_t * mp)
+{
+ ip46_address_t dst, src;
+ int rv = 0;
+ udp_ping_main_t *sm = &udp_ping_main;
+ vl_api_udp_ping_add_del_reply_t *rmp;
+
+ if (mp->is_ipv4)
+ {
+ rv = -1; //Not supported
+ goto ERROROUT;
+ }
+
+ clib_memcpy ((void *) &src.ip6, (void *) mp->src_ip_address,
+ sizeof (ip6_address_t));
+ clib_memcpy ((void *) &dst.ip6, (void *) mp->dst_ip_address,
+ sizeof (ip6_address_t));
+
+ ip46_udp_ping_set_flow (src, dst,
+ ntohs (mp->start_src_port),
+ ntohs (mp->end_src_port),
+ ntohs (mp->start_dst_port),
+ ntohs (mp->end_dst_port),
+ ntohs (mp->interval), mp->fault_det, mp->dis);
+ rv = 0; //FIXME
+
+ERROROUT:
+ REPLY_MACRO (VL_API_UDP_PING_ADD_DEL_REPLY);
+}
+
+static void vl_api_udp_ping_export_req_t_handler
+ (vl_api_udp_ping_export_req_t * mp)
+{
+ udp_ping_main_t *sm = &udp_ping_main;
+ int rv = 0;
+ vl_api_udp_ping_export_reply_t *rmp;
+
+ (void) udp_ping_flow_create (!mp->enable);
+ rv = 0; //FIXME
+
+ REPLY_MACRO (VL_API_UDP_PING_EXPORT_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+udp_ping_api_hookup (vlib_main_t * vm)
+{
+ udp_ping_main_t *sm = &udp_ping_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_udp_ping_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (udp_ping_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_udp_ping;
+#undef _
+}
+
+static clib_error_t *
+udp_ping_api_init (vlib_main_t * vm)
+{
+ udp_ping_main_t *sm = &udp_ping_main;
+ clib_error_t *error = 0;
+ u8 *name;
+
+ name = format (0, "udp_ping_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = udp_ping_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (udp_ping_api_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_export.c b/src/plugins/ioam/udp-ping/udp_ping_export.c
new file mode 100644
index 00000000..91cbb4bd
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_export.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/flow/flow_report.h>
+#include <ioam/analyse/ioam_summary_export.h>
+#include <vnet/api_errno.h>
+#include <ioam/udp-ping/udp_ping.h>
+
+#define UDP_PING_EXPORT_RECORD_SIZE 400
+
+static u8 *
+udp_ping_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address, u16 collector_port)
+{
+ return ioam_template_rewrite (frm, fr, collector_address,
+ src_address, collector_port);
+}
+
+static vlib_frame_t *
+udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
+ vlib_frame_t * f, u32 * to_next, u32 node_index)
+{
+ vlib_buffer_t *b0 = NULL;
+ u32 next_offset = 0;
+ u32 bi0 = ~0;
+ int i, j;
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s = NULL;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ u32 records_this_buffer;
+ u16 new_l0, old_l0;
+ ip_csum_t sum0;
+ vlib_main_t *vm = frm->vlib_main;
+ flow_report_stream_t *stream;
+ udp_ping_flow_data *stats;
+ ip46_udp_ping_flow *ip46_flow;
+ u16 src_port, dst_port;
+ u16 data_len;
+
+ stream = &frm->streams[fr->stream_index];
+ data_len = vec_len (udp_ping_main.ip46_flow);
+
+ for (i = 0; i < data_len; i++)
+ {
+ if (pool_is_free_index (udp_ping_main.ip46_flow, i))
+ continue;
+
+ ip46_flow = pool_elt_at_index (udp_ping_main.ip46_flow, i);
+ j = 0;
+ for (src_port = ip46_flow->udp_data.start_src_port;
+ src_port <= ip46_flow->udp_data.end_src_port; src_port++)
+ {
+ for (dst_port = ip46_flow->udp_data.start_dst_port;
+ dst_port <= ip46_flow->udp_data.end_dst_port; dst_port++, j++)
+ {
+ stats = ip46_flow->udp_data.stats + j;
+ if (PREDICT_FALSE (b0 == NULL))
+ {
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ break;
+
+
+ b0 = vlib_get_buffer (vm, bi0);
+ memcpy (b0->data, fr->rewrite, vec_len (fr->rewrite));
+ b0->current_data = 0;
+ b0->current_length = vec_len (fr->rewrite);
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = &tp->ip4;
+ h = &tp->ipfix.h;
+ s = &tp->ipfix.s;
+
+ /* FIXUP: message header export_time */
+ h->export_time = clib_host_to_net_u32 (((u32) time (NULL)));
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = stream->sequence_number++;
+ h->sequence_number =
+ clib_host_to_net_u32 (h->sequence_number);
+ next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
+ records_this_buffer = 0;
+ }
+
+ next_offset = ioam_analyse_add_ipfix_record (fr,
+ &stats->analyse_data,
+ b0, next_offset,
+ &ip46_flow->
+ src.ip6,
+ &ip46_flow->
+ dst.ip6, src_port,
+ dst_port);
+
+ //u32 pak_sent = clib_host_to_net_u32(stats->pak_sent);
+ //memcpy (b0->data + next_offset, &pak_sent, sizeof(u32));
+ //next_offset += sizeof(u32);
+
+ records_this_buffer++;
+
+ /* Flush data if packet len is about to reach path mtu */
+ if (next_offset > (frm->path_mtu - UDP_PING_EXPORT_RECORD_SIZE))
+ {
+ b0->current_length = next_offset;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = &tp->ipfix.h;
+ s = &tp->ipfix.s;
+
+ s->set_id_length =
+ ipfix_set_id_length (IOAM_FLOW_TEMPLATE_ID,
+ next_offset - (sizeof (*ip) +
+ sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length =
+ version_length (next_offset -
+ (sizeof (*ip) + sizeof (*udp)));
+
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16) next_offset);
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ to_next[0] = bi0;
+ f->n_vectors++;
+ to_next++;
+
+ if (f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, node_index, f);
+ f = vlib_get_frame_to_node (vm, node_index);
+ f->n_vectors = 0;
+ to_next = vlib_frame_vector_args (f);
+ }
+ b0 = 0;
+ bi0 = ~0;
+ }
+ }
+ }
+ }
+
+ if (b0)
+ {
+ b0->current_length = next_offset;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = &tp->ipfix.h;
+ s = &tp->ipfix.s;
+
+ s->set_id_length = ipfix_set_id_length (IOAM_FLOW_TEMPLATE_ID,
+ next_offset - (sizeof (*ip) +
+ sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length =
+ version_length (next_offset - (sizeof (*ip) + sizeof (*udp)));
+
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16) next_offset);
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ to_next[0] = bi0;
+ f->n_vectors++;
+ to_next++;
+
+ if (f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, node_index, f);
+ f = vlib_get_frame_to_node (vm, node_index);
+ f->n_vectors = 0;
+ to_next = vlib_frame_vector_args (f);
+ }
+ b0 = 0;
+ bi0 = ~0;
+ }
+ return f;
+}
+
+clib_error_t *
+udp_ping_flow_create (u8 del)
+{
+ vnet_flow_report_add_del_args_t args;
+ int rv;
+ u32 domain_id = 0;
+ flow_report_main_t *frm = &flow_report_main;
+ u16 template_id;
+
+ memset (&args, 0, sizeof (args));
+ args.rewrite_callback = udp_ping_template_rewrite;
+ args.flow_data_callback = udp_ping_send_flows;
+ del ? (args.is_add = 0) : (args.is_add = 1);
+ args.domain_id = domain_id;
+ args.src_port = UDP_DST_PORT_ipfix;
+
+ rv = vnet_flow_report_add_del (frm, &args, &template_id);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "registration not found...");
+ default:
+ return clib_error_return (0, "vnet_flow_report_add_del returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_udp_ping_export_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ //int rv;
+ int is_add = 1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "export-ipfix"))
+ is_add = 1;
+ else if (unformat (input, "disable"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (is_add)
+ (void) udp_ping_flow_create (0);
+ else
+ (void) udp_ping_flow_create (1);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_udp_ping_export_command, static) = {
+ .path = "set udp-ping export-ipfix",
+ .short_help = "set udp-ping export-ipfix [disable]",
+ .function = set_udp_ping_export_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+udp_ping_flow_report_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, flow_report_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_ping_flow_report_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_msg_enum.h b/src/plugins/ioam/udp-ping/udp_ping_msg_enum.h
new file mode 100644
index 00000000..dded1884
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_msg_enum.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_udp_ping_msg_enum_h
+#define included_udp_ping_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_udp_ping_msg_enum_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c
new file mode 100644
index 00000000..e1a57955
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_node.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/encap/ip6_ioam_trace.h>
+#include <ioam/encap/ip6_ioam_e2e.h>
+#include <ioam/udp-ping/udp_ping_packet.h>
+#include <ioam/udp-ping/udp_ping.h>
+#include <ioam/udp-ping/udp_ping_util.h>
+#include <vnet/srv6/sr_packet.h>
+
+typedef enum
+{
+ UDP_PING_NEXT_DROP,
+ UDP_PING_NEXT_PUNT,
+ UDP_PING_NEXT_UDP_LOOKUP,
+ UDP_PING_NEXT_ICMP,
+ UDP_PING_NEXT_IP6_LOOKUP,
+ UDP_PING_NEXT_IP6_DROP,
+ UDP_PING_N_NEXT,
+} udp_ping_next_t;
+
+udp_ping_main_t udp_ping_main;
+
+uword
+udp_ping_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f);
+
+extern int
+ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+
+typedef struct
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+ u16 src_port;
+ u16 dst_port;
+ u16 handle;
+ u16 next_index;
+ u8 msg_type;
+} udp_ping_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_udp_ping_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_ping_trace_t *t = va_arg (*args, udp_ping_trace_t *);
+
+ s = format (s, "udp-ping-local: src %U, dst %U, src_port %u, dst_port %u "
+ "handle %u, next_index %u, msg_type %u",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst,
+ t->src_port, t->dst_port,
+ t->handle, t->next_index, t->msg_type);
+ return s;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp_ping_node, static) =
+{
+ .function = udp_ping_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "udp-ping-process",
+};
+/* *INDENT-ON* */
+
+void
+udp_ping_calculate_timer_interval (void)
+{
+ int i;
+ ip46_udp_ping_flow *flow = NULL;
+ u16 min_interval = 0x1e9;
+
+ for (i = 0; i < vec_len (udp_ping_main.ip46_flow); i++)
+ {
+ if (pool_is_free_index (udp_ping_main.ip46_flow, i))
+ continue;
+
+ flow = pool_elt_at_index (udp_ping_main.ip46_flow, i);
+
+ if (min_interval > flow->udp_data.interval)
+ min_interval = flow->udp_data.interval;
+ }
+
+ if (udp_ping_main.timer_interval != min_interval)
+ {
+ udp_ping_main.timer_interval = min_interval;
+ vlib_process_signal_event (udp_ping_main.vlib_main,
+ udp_ping_node.index, EVENT_SIG_RECHECK, 0);
+ }
+}
+
+void
+ip46_udp_ping_set_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ u16 interval, u8 fault_det, u8 is_disable)
+{
+ u8 found = 0;
+ ip46_udp_ping_flow *flow = NULL;
+ int i;
+
+ for (i = 0; i < vec_len (udp_ping_main.ip46_flow); i++)
+ {
+ if (pool_is_free_index (udp_ping_main.ip46_flow, i))
+ continue;
+
+ flow = pool_elt_at_index (udp_ping_main.ip46_flow, i);
+ if ((0 == udp_ping_compare_flow (src, dst,
+ start_src_port, end_src_port,
+ start_dst_port, end_dst_port, flow)))
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ {
+ u16 cur_interval;
+ if (is_disable)
+ {
+ cur_interval = flow->udp_data.interval;
+ udp_ping_free_flow_data (flow);
+ pool_put_index (udp_ping_main.ip46_flow, i);
+ if (udp_ping_main.timer_interval == interval)
+ udp_ping_calculate_timer_interval ();
+ return;
+ }
+
+ cur_interval = flow->udp_data.interval;
+ flow->udp_data.interval = interval;
+ if (udp_ping_main.timer_interval > interval)
+ {
+ udp_ping_main.timer_interval = interval;
+ vlib_process_signal_event (udp_ping_main.vlib_main,
+ udp_ping_node.index,
+ EVENT_SIG_RECHECK, 0);
+ }
+ else if (udp_ping_main.timer_interval == cur_interval)
+ udp_ping_calculate_timer_interval ();
+
+ return;
+ }
+
+ /* Delete operation and item not found */
+ if (is_disable)
+ return;
+
+ /* Alloc new session */
+ pool_get_aligned (udp_ping_main.ip46_flow, flow, CLIB_CACHE_LINE_BYTES);
+ udp_ping_populate_flow (src, dst,
+ start_src_port, end_src_port,
+ start_dst_port, end_dst_port,
+ interval, fault_det, flow);
+
+ udp_ping_create_rewrite (flow, (flow - udp_ping_main.ip46_flow));
+
+ if (udp_ping_main.timer_interval > interval)
+ {
+ udp_ping_main.timer_interval = interval;
+ vlib_process_signal_event (udp_ping_main.vlib_main,
+ udp_ping_node.index, EVENT_SIG_RECHECK, 0);
+ }
+ return;
+}
+
+uword
+unformat_port_range (unformat_input_t * input, va_list * args)
+{
+ u16 *start_port, *end_port;
+ uword c;
+ u8 colon_present = 0;
+
+ start_port = va_arg (*args, u16 *);
+ end_port = va_arg (*args, u16 *);
+
+ *start_port = *end_port = 0;
+ /* Get start port */
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case '0' ... '9':
+ *start_port = ((*start_port) * 10) + (c - '0');
+ break;
+
+ case ':':
+ colon_present = 1;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (colon_present)
+ break;
+ }
+
+ if (!colon_present)
+ return 0;
+
+ /* Get end port */
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case '0' ... '9':
+ *end_port = ((*end_port) * 10) + (c - '0');
+ break;
+
+ default:
+ return 1;
+ }
+ }
+
+ if (end_port < start_port)
+ return 0;
+
+ return 1;
+}
+
+static clib_error_t *
+set_udp_ping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip46_address_t dst, src;
+ u16 start_src_port, end_src_port;
+ u16 start_dst_port, end_dst_port;
+ u32 interval;
+ u8 is_disable = 0;
+ u8 fault_det = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "src %U", unformat_ip46_address, &src, IP46_TYPE_ANY))
+ ;
+ else if (unformat (input, "src-port-range %U",
+ unformat_port_range, &start_src_port, &end_src_port))
+ ;
+ else
+ if (unformat
+ (input, "dst %U", unformat_ip46_address, &dst, IP46_TYPE_ANY))
+ ;
+ else if (unformat (input, "dst-port-range %U",
+ unformat_port_range, &start_dst_port, &end_dst_port))
+ ;
+ else if (unformat (input, "interval %d", &interval))
+ ;
+ else if (unformat (input, "fault-detect"))
+ fault_det = 1;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ ip46_udp_ping_set_flow (src, dst, start_src_port, end_src_port,
+ start_dst_port, end_dst_port, (u16) interval,
+ fault_det, is_disable);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_udp_ping_command, static) =
+{
+ .path = "set udp-ping",
+ .short_help =
+ "set udp-ping src <local IPv6 address> src-port-range <local port range> \
+ dst <remote IPv6 address> dst-port-range <destination port range> \
+ interval <time interval in sec for which ping packet will be sent> \
+ [disable]",
+ .function = set_udp_ping_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_udp_ping_summary_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *s = 0;
+ int i, j;
+ ip46_udp_ping_flow *ip46_flow;
+ u16 src_port, dst_port;
+ udp_ping_flow_data *stats;
+
+ s = format (s, "UDP-Ping data:\n");
+
+ for (i = 0; i < vec_len (udp_ping_main.ip46_flow); i++)
+ {
+ if (pool_is_free_index (udp_ping_main.ip46_flow, i))
+ continue;
+
+ ip46_flow = pool_elt_at_index (udp_ping_main.ip46_flow, i);
+ s = format (s, "Src: %U, Dst: %U\n",
+ format_ip46_address, &ip46_flow->src, IP46_TYPE_ANY,
+ format_ip46_address, &ip46_flow->dst, IP46_TYPE_ANY);
+
+ s = format (s, "Start src port: %u, End src port: %u\n",
+ ip46_flow->udp_data.start_src_port,
+ ip46_flow->udp_data.end_src_port);
+ s = format (s, "Start dst port: %u, End dst port: %u\n",
+ ip46_flow->udp_data.start_dst_port,
+ ip46_flow->udp_data.end_dst_port);
+ s = format (s, "Interval: %u\n", ip46_flow->udp_data.interval);
+
+ j = 0;
+ for (src_port = ip46_flow->udp_data.start_src_port;
+ src_port <= ip46_flow->udp_data.end_src_port; src_port++)
+ {
+ for (dst_port = ip46_flow->udp_data.start_dst_port;
+ dst_port <= ip46_flow->udp_data.end_dst_port; dst_port++)
+ {
+ stats = ip46_flow->udp_data.stats + j;
+ s =
+ format (s, "\nSrc Port - %u, Dst Port - %u, Flow CTX - %u\n",
+ src_port, dst_port, stats->flow_ctx);
+ s =
+ format (s, "Path State - %s\n",
+ (stats->retry > MAX_PING_RETRIES) ? "Down" : "Up");
+ s = format (s, "Path Data:\n");
+ s = print_analyse_flow (s,
+ &ip46_flow->udp_data.
+ stats[j].analyse_data);
+ j++;
+ }
+ }
+ s = format (s, "\n\n");
+ }
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_udp_ping_cmd, static) =
+{
+ .path = "show udp-ping summary",
+ .short_help = "Summary of udp-ping",
+ .function = show_udp_ping_summary_cmd_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief UDP-Ping Process node.
+ * @node udp-ping-process
+ *
+ * This is process node which wakes up when periodically to send
+ * out udp probe packets for all configured sessions.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ */
+uword
+udp_ping_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ f64 now;
+ uword *event_data = 0;
+ int i;
+ ip46_udp_ping_flow *ip46_flow;
+
+ while (1)
+ {
+ vec_reset_length (event_data);
+ vlib_process_wait_for_event_or_clock (vm, udp_ping_main.timer_interval);
+ (void) vlib_process_get_events (vm, &event_data);
+ now = vlib_time_now (vm);
+
+ for (i = 0; i < vec_len (udp_ping_main.ip46_flow); i++)
+ {
+ if (pool_is_free_index (udp_ping_main.ip46_flow, i))
+ continue;
+
+ ip46_flow = pool_elt_at_index (udp_ping_main.ip46_flow, i);
+ if (ip46_flow->udp_data.next_send_time < now)
+ udp_ping_send_ip6_pak (udp_ping_main.vlib_main, ip46_flow);
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief HopByHop analyse function for udp-ping response.
+ *
+ * Walks through all hbh options present in udp-ping response
+ * and uses analyser library for the analysis.
+ *
+ */
+void
+udp_ping_analyse_hbh (vlib_buffer_t * b0,
+ u32 flow_id,
+ u16 src_port,
+ u16 dst_port,
+ ip6_hop_by_hop_option_t * opt0,
+ ip6_hop_by_hop_option_t * limit0, u16 len)
+{
+ u8 type0;
+ ip46_udp_ping_flow *ip46_flow;
+ u16 flow_index;
+ ioam_analyser_data_t *data;
+ ioam_e2e_option_t *e2e;
+ ioam_trace_option_t *trace;
+
+ /* If the packet doesnt match UDP session then return */
+ if (PREDICT_FALSE (pool_is_free_index (udp_ping_main.ip46_flow, flow_id)))
+ return;
+
+ ip46_flow = udp_ping_main.ip46_flow + flow_id;
+ /* Check port is within range */
+ if (PREDICT_FALSE ((src_port < ip46_flow->udp_data.start_src_port) ||
+ (src_port > ip46_flow->udp_data.end_src_port) ||
+ (dst_port < ip46_flow->udp_data.start_dst_port) ||
+ (dst_port > ip46_flow->udp_data.end_dst_port)))
+ return;
+
+ flow_index = (src_port - ip46_flow->udp_data.start_src_port) *
+ (ip46_flow->udp_data.end_dst_port - ip46_flow->udp_data.start_dst_port +
+ 1);
+ flow_index += (dst_port - ip46_flow->udp_data.start_dst_port);
+ data = &(ip46_flow->udp_data.stats[flow_index].analyse_data);
+
+ data->pkt_counter++;
+ data->bytes_counter += len;
+
+ vnet_buffer (b0)->l2_classify.opaque_index =
+ ip46_flow->udp_data.stats[flow_index].flow_ctx;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
+ /* Add trace for here as it hasnt been done yet */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+ trace = (ioam_trace_option_t *) opt0;
+ if (PREDICT_FALSE
+ (trace->trace_hdr.ioam_trace_type & BIT_LOOPBACK_REPLY))
+ {
+ ip6_ioam_analyse_hbh_trace_loopback (data, &trace->trace_hdr,
+ (trace->hdr.length - 2));
+ return;
+ }
+ ip6_hbh_ioam_trace_data_list_handler (b0,
+ vlib_buffer_get_current (b0),
+ opt0);
+ (void) ip6_ioam_analyse_hbh_trace (data, &trace->trace_hdr, len,
+ (trace->hdr.length - 2));
+ break;
+ case HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE:
+ e2e = (ioam_e2e_option_t *) opt0;
+ (void) ip6_ioam_analyse_hbh_e2e (data, &e2e->e2e_hdr, len);
+ break;
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ break;
+ }
+ opt0 = (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ ip46_flow->udp_data.stats[flow_index].retry = 0;
+}
+
+/**
+ * @brief UDP-Ping request/response handler function.
+ *
+ * Checks udp-ping packet type - request/response and handles them.
+ * If not udp-ping packet then, strips off hbh options and enques
+ * packet to protocol registered node to enable next protocol processing.
+ *
+ */
+void
+udp_ping_local_analyse (vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0, u16 * next0)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ *next0 = UDP_PING_NEXT_IP6_DROP;
+
+ if (PREDICT_TRUE (hbh0->protocol == IP_PROTOCOL_UDP))
+ {
+ ip6_hop_by_hop_option_t *opt0;
+ ip6_hop_by_hop_option_t *limit0;
+ u16 p_len0;
+ udp_ping_t *udp0;
+
+ /* Check for udp ping packet */
+ udp0 = (udp_ping_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ if ((udp0->ping_data.probe_marker1 ==
+ clib_host_to_net_u32 (UDP_PING_PROBE_MARKER1)) &&
+ (udp0->ping_data.probe_marker2 ==
+ clib_host_to_net_u32 (UDP_PING_PROBE_MARKER2)))
+ {
+ if (udp0->ping_data.msg_type == UDP_PING_PROBE)
+ {
+ udp_ping_create_reply_from_probe_ip6 (ip0, hbh0, udp0);
+ /* Skip e2e processing */
+ vnet_buffer (b0)->l2_classify.opaque_index = 0x7FFFFFFF;
+ *next0 = UDP_PING_NEXT_IP6_LOOKUP;
+ return;
+ }
+
+ /* Reply */
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+ p_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_ping_analyse_hbh (b0,
+ clib_net_to_host_u16 (udp0->
+ ping_data.sender_handle),
+ clib_net_to_host_u16 (udp0->udp.dst_port),
+ clib_net_to_host_u16 (udp0->udp.src_port),
+ opt0, limit0, p_len0);
+
+ /* UDP Ping packet, so return */
+ return;
+ }
+ }
+
+ /* If next header is SR, then destination may get overwritten to
+ * remote address. So pass it to SR processing as it may be local packet
+ * afterall
+ */
+ if (PREDICT_FALSE (hbh0->protocol == IPPROTO_IPV6_ROUTE))
+ goto end;
+
+ /* Other case remove hbh-ioam headers */
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ ip0->protocol = hbh0->protocol;
+
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+
+end:
+ *next0 = lm->local_next_by_ip_protocol[hbh0->protocol];
+ return;
+}
+
+/**
+ * @brief udp ping request/response packet receive node.
+ * @node udp-ping-local
+ *
+ * This function receives udp ping request/response packets and process them.
+ * For request packets, response is created and sent.
+ * For response packets, they are analysed and results stored.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer, next index usage
+ *
+ * <em>Uses:</em>
+ * - <code>udp_ping_local_analyse(p0, ip0, hbh0, &next0)</code>
+ * - Checks packet type - request/respnse and process them.
+ *
+ * <em>Next Index:</em>
+ * - Dispatches the packet to ip6-lookup/ip6-drop depending on type of packet.
+ */
+static uword
+udp_ping_local_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ udp_ping_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u16 next0, next1;
+ u32 pi0, pi1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* Prefetch 3 cache lines as we need to look deep into packet */
+ CLIB_PREFETCH (p2->data, 3 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, 3 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+
+ udp_ping_local_analyse (p0, ip0, hbh0, &next0);
+ udp_ping_local_analyse (p1, ip1, hbh1, &next1);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (p0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ udp_ping_trace_t *t0 =
+ vlib_add_trace (vm, node, p0, sizeof (*t0));
+ udp_ping_t *udp0;
+
+ /* Check for udp ping packet */
+ udp0 =
+ (udp_ping_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+ t0->src = ip0->src_address;
+ t0->dst = ip0->dst_address;
+ t0->src_port = clib_net_to_host_u16 (udp0->udp.src_port);
+ t0->dst_port = clib_net_to_host_u16 (udp0->udp.dst_port);
+ t0->handle =
+ clib_net_to_host_u16 (udp0->ping_data.sender_handle);
+ t0->msg_type = udp0->ping_data.msg_type;
+ t0->next_index = next0;
+ }
+ if (p1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ udp_ping_trace_t *t1 =
+ vlib_add_trace (vm, node, p1, sizeof (*t1));
+ udp_ping_t *udp1;
+
+ /* Check for udp ping packet */
+ udp1 =
+ (udp_ping_t *) ((u8 *) hbh1 + ((hbh1->length + 1) << 3));
+ t1->src = ip1->src_address;
+ t1->dst = ip1->dst_address;
+ t1->src_port = clib_net_to_host_u16 (udp1->udp.src_port);
+ t1->dst_port = clib_net_to_host_u16 (udp1->udp.dst_port);
+ t1->handle =
+ clib_net_to_host_u16 (udp1->ping_data.sender_handle);
+ t1->msg_type = udp1->ping_data.msg_type;
+ t1->next_index = next1;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u16 next0;
+ u32 pi0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ udp_ping_local_analyse (p0, ip0, hbh0, &next0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (p0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ udp_ping_trace_t *t0 =
+ vlib_add_trace (vm, node, p0, sizeof (*t0));
+ udp_ping_t *udp0;
+
+ /* Check for udp ping packet */
+ udp0 =
+ (udp_ping_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+ t0->src = ip0->src_address;
+ t0->dst = ip0->dst_address;
+ t0->src_port = clib_net_to_host_u16 (udp0->udp.src_port);
+ t0->dst_port = clib_net_to_host_u16 (udp0->udp.dst_port);
+ t0->handle =
+ clib_net_to_host_u16 (udp0->ping_data.sender_handle);
+ t0->msg_type = udp0->ping_data.msg_type;
+ t0->next_index = next0;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+/*
+ * Node for udp-ping-local
+ */
+VLIB_REGISTER_NODE (udp_ping_local, static) =
+{
+ .function = udp_ping_local_node_fn,
+ .name = "udp-ping-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp_ping_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = UDP_PING_N_NEXT,
+ .next_nodes =
+ {
+ [UDP_PING_NEXT_DROP] = "error-drop",
+ [UDP_PING_NEXT_PUNT] = "error-punt",
+ [UDP_PING_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
+ [UDP_PING_NEXT_ICMP] = "ip6-icmp-input",
+ [UDP_PING_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [UDP_PING_NEXT_IP6_DROP] = "ip6-drop",
+ },
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+udp_ping_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+
+ udp_ping_main.vlib_main = vm;
+ udp_ping_main.vnet_main = vnet_get_main ();
+ udp_ping_main.timer_interval = 1e9;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ ip6_register_protocol (IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS,
+ udp_ping_local.index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_ping_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_packet.h b/src/plugins/ioam/udp-ping/udp_ping_packet.h
new file mode 100644
index 00000000..09dcb1c2
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_packet.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_PACKET_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_PACKET_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/udp/udp_packet.h>
+
+#define UDP_PING_PROBE 1
+#define UDP_PING_REPLY 2
+
+#define UDP_PING_PROBE_MARKER1 0xDEAD
+#define UDP_PING_PROBE_MARKER2 0xBEEF
+
+/*
+ * Refer to:
+ * https://tools.ietf.org/html/draft-lapukhov-dataplane-probe-01
+ * 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Probe Marker (1) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Probe Marker (2) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Version | Message Type | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Telemetry Request Vector |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop Limit | Hop Count | Must Be Zero |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Maximum Length | Current Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Sender's Handle | Sequence Number |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * (1) The "Probe Marker" fields are arbitrary 32-bit values generally
+ used by the network elements to identify the packet as a probe
+ packet. These fields should be interpreted as unsigned integer
+ values, stored in network byte order. For example, a network
+ element may be configured to recognize a UDP packet destined to
+ port 31337 and having 0xDEAD 0xBEEF as the values in "Probe
+ Marker" field as an active probe, and treat it respectively.
+
+ (2) "Version Number" is currently set to 1.
+
+ (3) The "Message Type" field value could be either "1" - "Probe" or
+ "2" - "Probe Reply"
+
+ (4) The "Flags" field is 8 bits, and defines the following flags:
+
+ (5)
+ (1) "Overflow" (O-bit) (least significant bit). This bit is
+ set by the network element if the number of records on the
+ packet is at the maximum limit as specified by the packet:
+ i.e. the packet is already "full" of telemetry
+ information.
+
+ (6) "Telemetry Request Vector" is a 32-bit long field that requests
+ well-known inband telemetry information from the network
+ elements on the path. A bit set in this vector translates to a
+ request of a particular type of information. The following
+ types/bits are currently defined, starting with the least
+ significant bit first:
+
+ (1) Bit 0: Device identifier.
+
+ (2) Bit 1: Timestamp.
+
+ (3) Bit 2: Queueing delay.
+
+ (4) Bit 3: Ingress/Egress port identifiers.
+
+ (5) Bit 31: Opaque state snapshot request.
+
+ (7) "Hop Limit" is defined only for "Message Type" of "1"
+ ("Probe"). For "Probe Reply" the "Hop Limit" field must be set
+ to zero. This field is treated as an integer value
+ representing the number of network elements. See the Section 4
+ section on the intended use of the field.
+
+ (8) The "Hop Count" field specifies the current number of hops of
+ capable network elements the packet has transit through. It
+ begins with zero and must be incremented by one for every
+ network element that adds a telemetry record. Combined with a
+ push mechanism, this simplifies the work for the subsequent
+ network element and the packet receiver. The subsequent
+ network element just needs to parse the template and then
+ insert new record(s) immediately after the template.
+
+ (9) The "Max Length" field specifies the maximum length of the
+ telemetry payload in bytes. Given that the sender knows the
+ minimum path MTU, the sender can set the maximum of payload
+ bytes allowed before exceeding the MTU. Thus, a simple
+ comparison between "Current Length" and "Max Length" allows to
+ decide whether or not data could be added.
+
+ (10) The "Current Length" field specifies the current length of data
+ stored in the probe. This field is incremented by eacn network
+ element by the number of bytes it has added with the telemetry
+ data frame.
+
+ (11) The "Sender's Handle" field is set by the sender to allow the
+ receiver to identify a particular originator of probe packets.
+ Along with "Sequence Number" it allows for tracking of packet
+ order and loss within the network.
+
+ *
+ */
+typedef struct
+{
+ u32 probe_marker1;
+ u32 probe_marker2;
+ u8 version;
+ u8 msg_type;
+ u16 flags;
+ u32 tel_req_vec;
+ u8 hop_limit;
+ u8 hop_count;
+ u16 reserve;
+ u16 max_len;
+ u16 cur_len;
+ u16 sender_handle;
+ u16 seq_no;
+} udp_ping_data;
+
+typedef struct
+{
+ udp_header_t udp;
+ udp_ping_data ping_data;
+} udp_ping_t;
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_PACKET_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_test.c b/src/plugins/ioam/udp-ping/udp_ping_test.c
new file mode 100644
index 00000000..4ec11351
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_test.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * udp_ping_test.c - test harness for udp ping plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+
+/* Declare message IDs */
+#include <ioam/udp-ping/udp_ping_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/udp-ping/udp_ping_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} udp_ping_test_main_t;
+
+udp_ping_test_main_t udp_ping_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(udp_ping_add_del_reply) \
+_(udp_ping_export_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = udp_ping_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(UDP_PING_ADD_DEL_REPLY, udp_ping_add_del_reply) \
+_(UDP_PING_EXPORT_REPLY, udp_ping_export_reply) \
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 5.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int
+api_udp_ping_add_del_req (vat_main_t * vam)
+{
+ udp_ping_test_main_t *sm = &udp_ping_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_udp_ping_add_del_req_t *mp;
+ int rv = 0;
+ ip6_address_t dst, src;
+ u32 start_src_port, end_src_port;
+ u32 start_dst_port, end_dst_port;
+ u32 interval;
+ u8 is_disable = 0;
+ f64 timeout;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "src %U", unformat_ip6_address, &src))
+ ;
+ else if (unformat (input, "start-src-port %d", &start_src_port))
+ ;
+ else if (unformat (input, "end-src-port %d", &end_src_port))
+ ;
+ else if (unformat (input, "start-dst-port %d", &start_dst_port))
+ ;
+ else if (unformat (input, "end-dst-port %d", &end_dst_port))
+ ;
+ else if (unformat (input, "dst %U", unformat_ip6_address, &dst))
+ ;
+ else if (unformat (input, "interval %d", &interval))
+ ;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ M (UDP_PING_ADD_DEL_REQ, udp_ping_add);
+
+ clib_memcpy (mp->src_ip_address, &src, 16);
+ clib_memcpy (mp->dst_ip_address, &dst, 16);
+ mp->start_src_port = (u16) start_src_port;
+ mp->end_src_port = (u16) end_src_port;
+ mp->start_dst_port = (u16) start_dst_port;
+ mp->end_dst_port = (u16) end_dst_port;
+ mp->interval = (u16) interval;
+ mp->is_ipv4 = 0;
+ mp->dis = is_disable;
+
+ S;
+ W;
+
+ return (rv);
+}
+
+static int
+api_udp_ping_export_req (vat_main_t * vam)
+{
+ udp_ping_test_main_t *sm = &udp_ping_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_udp_ping_export_req_t *mp;
+ int rv = 0;
+ int is_add = 1;
+ f64 timeout;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "export"))
+ is_add = 1;
+ else if (unformat (input, "disable"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ M (UDP_PING_EXPORT_REQ, udp_ping_export);
+
+ mp->enable = is_add;
+
+ S;
+ W;
+
+ return (rv);
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(udp_ping_add_del_req, "src <local IPv6 address> start-src-port <first local port> "\
+ "end-src-port <last local port> " \
+ "dst <remote IPv6 address> start-dst-port <first destination port> "\
+ "end-dst-port <last destination port> "\
+ "interval <time interval in sec for which ping packet will be sent> "\
+ "[disable]") \
+_(udp_ping_export_req, "export [disable]") \
+
+
+static void
+udp_ping_test_api_hookup (vat_main_t * vam)
+{
+ udp_ping_test_main_t *sm = &udp_ping_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ udp_ping_test_main_t *sm = &udp_ping_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "udp_ping_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ udp_ping_test_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_util.c b/src/plugins/ioam/udp-ping/udp_ping_util.c
new file mode 100644
index 00000000..55f48ea4
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_util.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/encap/ip6_ioam_e2e.h>
+#include <ioam/encap/ip6_ioam_trace.h>
+#include <ioam/udp-ping/udp_ping_packet.h>
+#include <ioam/udp-ping/udp_ping.h>
+
+#define UDP_PING_REWRITE_LEN 1000
+
+u16
+udp_ping_fill_udp_data (udp_ping_t * udp_ping,
+ u16 src_port, u16 dst_port, u8 msg_type, u16 ctx)
+{
+ /* Populate udp ping header */
+ udp_ping->udp.src_port = clib_host_to_net_u16 (src_port);
+ udp_ping->udp.dst_port = clib_host_to_net_u16 (dst_port);
+ udp_ping->udp.length = clib_host_to_net_u16 (sizeof (udp_ping_t));
+ udp_ping->udp.checksum = 0;
+ udp_ping->ping_data.probe_marker1 =
+ clib_host_to_net_u32 (UDP_PING_PROBE_MARKER1);
+ udp_ping->ping_data.probe_marker2 =
+ clib_host_to_net_u32 (UDP_PING_PROBE_MARKER2);
+ udp_ping->ping_data.version = 1;
+ udp_ping->ping_data.msg_type = msg_type;
+ udp_ping->ping_data.flags = clib_host_to_net_u16 (0);
+ udp_ping->ping_data.tel_req_vec = clib_host_to_net_u16 (0);
+ udp_ping->ping_data.hop_limit = 254;
+ udp_ping->ping_data.hop_count = 0;
+ udp_ping->ping_data.reserve = clib_host_to_net_u16 (0);
+ udp_ping->ping_data.max_len =
+ udp_ping->ping_data.cur_len = clib_host_to_net_u16 (0);
+ udp_ping->ping_data.sender_handle = clib_host_to_net_u16 (ctx);
+ udp_ping->ping_data.seq_no = clib_host_to_net_u16 (0);
+
+ return (sizeof (udp_ping_t));
+}
+
+/**
+ * @brief Frame IPv6 udp-ping probe packet.
+ *
+ * Creates IPv6 UDP-Ping probe packet along with iOAM headers.
+ *
+ */
+int
+udp_ping_create_ip6_pak (u8 * buf, /*u16 len, */
+ ip6_address_t src, ip6_address_t dst,
+ u16 src_port, u16 dst_port, u8 msg_type, u16 ctx)
+{
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ //trace_profile *profile = NULL;
+ u16 hbh_len = 0, rnd_size = 0, ip0_len = 0, udp_len = 0;
+ u16 trace_len = 0, trace_data_size = 0;
+ u16 e2e_len = sizeof (ioam_e2e_option_t) - sizeof (ip6_hop_by_hop_option_t);
+ u8 *current = NULL;
+ ioam_trace_option_t *trace_option;
+ ioam_e2e_option_t *e2e;
+
+ ip0 = (ip6_header_t *) buf;
+
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ ip0->hop_limit = 255;
+
+ ip0->src_address = src;
+ ip0->dst_address = dst;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ /* Calculate hbh header len */
+ //profile = trace_profile_find();
+ trace_data_size = fetch_trace_data_size (TRACE_TYPE_IF_TS_APP);
+ /* We need 2 times data for trace as packet traverse back to source */
+ trace_len = sizeof (ioam_trace_option_t) +
+ (5 * trace_data_size * 2) - sizeof (ip6_hop_by_hop_option_t);
+ //(profile->num_elts * trace_data_size * 2);
+ hbh_len = e2e_len + trace_len + sizeof (ip6_hop_by_hop_header_t);
+ rnd_size = (hbh_len + 7) & ~7;
+
+ /* Length of header in 8 octet units, not incl first 8 octets */
+ hbh0->length = (rnd_size >> 3) - 1;
+ hbh0->protocol = IP_PROTOCOL_UDP;
+
+ /* Populate hbh header */
+ current = (u8 *) (hbh0 + 1);
+
+ /* Populate trace */
+ trace_option = (ioam_trace_option_t *) current;
+ trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST |
+ HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ trace_option->hdr.length = trace_len;
+ trace_option->trace_hdr.ioam_trace_type =
+ TRACE_TYPE_IF_TS_APP & TRACE_TYPE_MASK;
+
+ trace_option->trace_hdr.data_list_elts_left = 5 * 2;
+ //profile->num_elts * 2;
+
+ current += trace_option->hdr.length + sizeof (ip6_hop_by_hop_option_t);
+
+ /* Populate e2e */
+ e2e = (ioam_e2e_option_t *) current;
+ e2e->hdr.type = HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE;
+ e2e->hdr.length = e2e_len;
+
+ /* Move past hbh header */
+ current = ((u8 *) hbh0) + ((hbh0->length + 1) << 3);
+
+ /* Populate udp ping header */
+ udp_len = udp_ping_fill_udp_data ((udp_ping_t *) current,
+ src_port, dst_port, msg_type, ctx);
+
+ /* Calculate total length and set it in ip6 header */
+ ip0_len = ((hbh0->length + 1) << 3) + udp_len;
+ //ip0_len = (len > ip0_len) ? len : ip0_len;
+ ip0->payload_length = clib_host_to_net_u16 (ip0_len);
+
+ return (ip0_len + sizeof (ip6_header_t));
+}
+
+int
+udp_ping_compare_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ ip46_udp_ping_flow * flow)
+{
+ if ((0 == ip46_address_cmp (&flow->src, &src)) &&
+ (0 == ip46_address_cmp (&flow->dst, &dst)) &&
+ (flow->udp_data.start_src_port == start_src_port) &&
+ (flow->udp_data.end_src_port == end_src_port) &&
+ (flow->udp_data.start_dst_port == start_dst_port) &&
+ (flow->udp_data.end_dst_port == end_dst_port))
+ {
+ return 0;
+ }
+
+ return -1;
+}
+
+void
+udp_ping_populate_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ u16 interval, u8 fault_det, ip46_udp_ping_flow * flow)
+{
+ flow->src = src;
+ flow->dst = dst;
+ flow->udp_data.start_src_port = start_src_port;
+ flow->udp_data.end_src_port = end_src_port;
+ flow->udp_data.start_dst_port = start_dst_port;
+ flow->udp_data.end_dst_port = end_dst_port;
+ flow->udp_data.interval = interval;
+ flow->udp_data.next_send_time = 0;
+ flow->fault_det = fault_det;
+}
+
+void
+udp_ping_create_rewrite (ip46_udp_ping_flow * flow, u16 ctx)
+{
+ u16 src_port;
+ u16 dst_port;
+ u16 no_flows;
+ int i;
+ udp_ping_flow_data *stats;
+
+ no_flows =
+ (flow->udp_data.end_dst_port - flow->udp_data.start_dst_port) + 1;
+ no_flows *=
+ ((flow->udp_data.end_src_port - flow->udp_data.start_src_port) + 1);
+
+ vec_validate_aligned (flow->udp_data.stats,
+ no_flows - 1, CLIB_CACHE_LINE_BYTES);
+
+ i = 0;
+ for (src_port = flow->udp_data.start_src_port;
+ src_port <= flow->udp_data.end_src_port; src_port++)
+ {
+ for (dst_port = flow->udp_data.start_dst_port;
+ dst_port <= flow->udp_data.end_dst_port; dst_port++)
+ {
+ u8 *rewrite = NULL;
+
+ stats = flow->udp_data.stats + i;
+ ioam_analyse_init_data (&stats->analyse_data);
+ stats->analyse_data.is_free = 0;
+
+ vec_validate (rewrite, UDP_PING_REWRITE_LEN - 1);
+ stats->ping_rewrite = rewrite;
+ stats->rewrite_len =
+ udp_ping_create_ip6_pak (rewrite,
+ flow->src.ip6, flow->dst.ip6,
+ src_port, dst_port, UDP_PING_PROBE, ctx);
+ /* For each flow we need to create ioam e2e flow */
+ stats->flow_ctx = ioam_flow_add (1, (u8 *) "udp_ping"); //FIXME
+ i++;
+ }
+ }
+}
+
+void
+udp_ping_free_flow_data (ip46_udp_ping_flow * flow)
+{
+ int i;
+ udp_ping_flow_data *stats;
+
+ for (i = 0; i < vec_len (flow->udp_data.stats); i++)
+ {
+ stats = flow->udp_data.stats + i;
+ vec_free (stats->ping_rewrite);
+ stats->ping_rewrite = NULL;
+ stats->rewrite_len = 0;
+ }
+
+ vec_free (flow->udp_data.stats);
+ flow->udp_data.stats = NULL;
+}
+
+/**
+ * @brief Create and send ipv6 udp-ping probe packet.
+ *
+ */
+void
+udp_ping_send_ip6_pak (vlib_main_t * vm, ip46_udp_ping_flow * flow)
+{
+ u16 no_pak;
+ u32 *buffers = NULL;
+ int i;
+ vlib_buffer_t *b0;
+ udp_ping_flow_data *stats;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+ vlib_node_t *next_node;
+
+ next_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup");
+ nf = vlib_get_frame_to_node (vm, next_node->index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+
+ no_pak = vec_len (flow->udp_data.stats);
+ vec_validate (buffers, (no_pak - 1));
+ if (vlib_buffer_alloc (vm, buffers, vec_len (buffers)) != no_pak)
+ {
+ //Error
+ return;
+ }
+
+ for (i = 0; i < no_pak; i++)
+ {
+ int bogus;
+ b0 = vlib_get_buffer (vm, buffers[i]);
+ stats = flow->udp_data.stats + i;
+ clib_memcpy (b0->data, stats->ping_rewrite, stats->rewrite_len);
+ b0->current_data = 0;
+ b0->current_length = stats->rewrite_len;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ /* If session is going down, then set path down */
+ if ((stats->retry != 0) && ((stats->retry % MAX_PING_RETRIES) == 0))
+ ip6_ioam_analyse_set_paths_down (&stats->analyse_data);
+
+ stats->retry++;
+ stats->analyse_data.pkt_sent++;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+ vnet_buffer (b0)->l2_classify.opaque_index = stats->flow_ctx;
+
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0);
+ ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip6 + 1);
+ udp_header_t *udp =
+ (udp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+
+ /* If session is down, then set loopback flag in probe.
+ * This is for fault isolation.
+ */
+ if (flow->fault_det && (stats->retry > MAX_PING_RETRIES))
+ {
+ ioam_trace_option_t *opt = (ioam_trace_option_t *)
+ ip6_hbh_get_option (hbh, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+ ip6_hbh_ioam_trace_set_bit (opt, BIT_LOOPBACK);
+ }
+
+ /* Checksum not pre-computed as we intend to vary packet length for every
+ * probe. its isnt done yet, but to be taken up later.
+ */
+ udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus);
+ ASSERT (bogus == 0);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+
+ if (nf->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, next_node->index, nf);
+ nf = vlib_get_frame_to_node (vm, next_node->index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+ }
+ *to_next = buffers[i];
+ nf->n_vectors++;
+ to_next++;
+ }
+ vlib_put_frame_to_node (vm, next_node->index, nf);
+
+ flow->udp_data.next_send_time =
+ vlib_time_now (vm) + flow->udp_data.interval;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/udp-ping/udp_ping_util.h b/src/plugins/ioam/udp-ping/udp_ping_util.h
new file mode 100644
index 00000000..fcaf27bd
--- /dev/null
+++ b/src/plugins/ioam/udp-ping/udp_ping_util.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_UTIL_H_
+#define PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_UTIL_H_
+
+int udp_ping_create_ip6_pak (u8 * buf, /*u16 len, */
+ ip6_address_t src, ip6_address_t dst,
+ u16 src_port, u16 dst_port,
+ u8 msg_type, u16 ctx);
+
+int
+udp_ping_compare_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ ip46_udp_ping_flow * flow);
+
+void
+udp_ping_populate_flow (ip46_address_t src, ip46_address_t dst,
+ u16 start_src_port, u16 end_src_port,
+ u16 start_dst_port, u16 end_dst_port,
+ u16 interval, u8 fault_det,
+ ip46_udp_ping_flow * flow);
+
+void udp_ping_free_flow_data (ip46_udp_ping_flow * flow);
+
+void udp_ping_create_rewrite (ip46_udp_ping_flow * flow, u16 ctx);
+
+void udp_ping_send_ip6_pak (vlib_main_t * vm, ip46_udp_ping_flow * flow);
+
+/**
+ * @brief Create and send ipv6 udp-ping response packet.
+ *
+ */
+always_inline void
+udp_ping_create_reply_from_probe_ip6 (ip6_header_t * ip,
+ ip6_hop_by_hop_header_t * hbh,
+ udp_ping_t * udp)
+{
+ ip6_address_t src;
+ u16 src_port;
+ ioam_trace_option_t *trace;
+
+ src = ip->src_address;
+
+ ip->src_address = ip->dst_address;
+ ip->dst_address = src;
+
+ trace = (ioam_trace_option_t *)
+ ip6_hbh_get_option (hbh, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+ ip6_hbh_ioam_trace_reset_bit (trace, BIT_LOOPBACK);
+
+ /* No need of endian transform */
+ src_port = udp->udp.src_port;
+
+ udp->udp.src_port = udp->udp.dst_port;
+ udp->udp.dst_port = src_port;
+ udp->udp.checksum = 0; //FIXME
+
+ udp->ping_data.msg_type = UDP_PING_REPLY;
+}
+
+#endif /* PLUGINS_IOAM_PLUGIN_IOAM_UDP_PING_UDP_PING_UTIL_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ixge.am b/src/plugins/ixge.am
new file mode 100644
index 00000000..7e61344b
--- /dev/null
+++ b/src/plugins/ixge.am
@@ -0,0 +1,20 @@
+# Copyright (c) 2016 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppplugins_LTLIBRARIES += ixge_plugin.la
+
+ixge_plugin_la_SOURCES = ixge/ixge.c
+
+noinst_HEADERS += ixge/ixge.h
+
+# vi:syntax=automake
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
new file mode 100644
index 00000000..222c148c
--- /dev/null
+++ b/src/plugins/ixge/ixge.c
@@ -0,0 +1,2958 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * WARNING!
+ * This driver is not intended for production use and it is unsupported.
+ * It is provided for educational use only.
+ * Please use supported DPDK driver instead.
+ */
+
+#if __x86_64__ || __i386__
+#include <vppinfra/vector.h>
+
+#ifndef CLIB_HAVE_VEC128
+#warning HACK: ixge driver wont really work, missing u32x4
+typedef unsigned long long u32x4;
+#endif
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/vnet.h>
+#include <ixge/ixge.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#define IXGE_ALWAYS_POLL 0
+
+#define EVENT_SET_FLAGS 0
+#define IXGE_HWBP_RACE_ELOG 0
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+
+/* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */
+#define XGE_PHY_DEV_TYPE_PMA_PMD 1
+#define XGE_PHY_DEV_TYPE_PHY_XS 4
+#define XGE_PHY_ID1 0x2
+#define XGE_PHY_ID2 0x3
+#define XGE_PHY_CONTROL 0x0
+#define XGE_PHY_CONTROL_RESET (1 << 15)
+
+ixge_main_t ixge_main;
+static vlib_node_registration_t ixge_input_node;
+static vlib_node_registration_t ixge_process_node;
+
+static void
+ixge_semaphore_get (ixge_device_t * xd)
+{
+ ixge_main_t *xm = &ixge_main;
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_regs_t *r = xd->regs;
+ u32 i;
+
+ i = 0;
+ while (!(r->software_semaphore & (1 << 0)))
+ {
+ if (i > 0)
+ vlib_process_suspend (vm, 100e-6);
+ i++;
+ }
+ do
+ {
+ r->software_semaphore |= 1 << 1;
+ }
+ while (!(r->software_semaphore & (1 << 1)));
+}
+
+static void
+ixge_semaphore_release (ixge_device_t * xd)
+{
+ ixge_regs_t *r = xd->regs;
+ r->software_semaphore &= ~3;
+}
+
+static void
+ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask)
+{
+ ixge_main_t *xm = &ixge_main;
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_regs_t *r = xd->regs;
+ u32 fw_mask = sw_mask << 5;
+ u32 m, done = 0;
+
+ while (!done)
+ {
+ ixge_semaphore_get (xd);
+ m = r->software_firmware_sync;
+ done = (m & fw_mask) == 0;
+ if (done)
+ r->software_firmware_sync = m | sw_mask;
+ ixge_semaphore_release (xd);
+ if (!done)
+ vlib_process_suspend (vm, 10e-3);
+ }
+}
+
+static void
+ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask)
+{
+ ixge_regs_t *r = xd->regs;
+ ixge_semaphore_get (xd);
+ r->software_firmware_sync &= ~sw_mask;
+ ixge_semaphore_release (xd);
+}
+
+u32
+ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index,
+ u32 v, u32 is_read)
+{
+ ixge_regs_t *r = xd->regs;
+ const u32 busy_bit = 1 << 30;
+ u32 x;
+
+ ASSERT (xd->phy_index < 2);
+ ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index));
+
+ ASSERT (reg_index < (1 << 16));
+ ASSERT (dev_type < (1 << 5));
+ if (!is_read)
+ r->xge_mac.phy_data = v;
+
+ /* Address cycle. */
+ x =
+ reg_index | (dev_type << 16) | (xd->
+ phys[xd->phy_index].mdio_address << 21);
+ r->xge_mac.phy_command = x | busy_bit;
+ /* Busy wait timed to take 28e-6 secs. No suspend. */
+ while (r->xge_mac.phy_command & busy_bit)
+ ;
+
+ r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit;
+ while (r->xge_mac.phy_command & busy_bit)
+ ;
+
+ if (is_read)
+ v = r->xge_mac.phy_data >> 16;
+
+ ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index));
+
+ return v;
+}
+
+static u32
+ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index)
+{
+ return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */
+ 1);
+}
+
+static void
+ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v)
+{
+ (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */
+ 0);
+}
+
+static void
+ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda)
+{
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data);
+ u32 v;
+
+ v = 0;
+ v |= (sda != 0) << 3;
+ v |= (scl != 0) << 1;
+ xd->regs->i2c_control = v;
+}
+
+static void
+ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda)
+{
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data);
+ u32 v;
+
+ v = xd->regs->i2c_control;
+ *sda = (v & (1 << 2)) != 0;
+ *scl = (v & (1 << 0)) != 0;
+}
+
+static u16
+ixge_read_eeprom (ixge_device_t * xd, u32 address)
+{
+ ixge_regs_t *r = xd->regs;
+ u32 v;
+ r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2));
+ /* Wait for done bit. */
+ while (!((v = r->eeprom_read) & (1 << 1)))
+ ;
+ return v >> 16;
+}
+
+static void
+ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable)
+{
+ u32 tx_disable_bit = 1 << 3;
+ if (enable)
+ xd->regs->sdp_control &= ~tx_disable_bit;
+ else
+ xd->regs->sdp_control |= tx_disable_bit;
+}
+
+static void
+ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable)
+{
+ u32 is_10g_bit = 1 << 5;
+ if (enable)
+ xd->regs->sdp_control |= is_10g_bit;
+ else
+ xd->regs->sdp_control &= ~is_10g_bit;
+}
+
+static clib_error_t *
+ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type)
+{
+ u16 a, id, reg_values_addr = 0;
+
+ a = ixge_read_eeprom (xd, 0x2b);
+ if (a == 0 || a == 0xffff)
+ return clib_error_create ("no init sequence in eeprom");
+
+ while (1)
+ {
+ id = ixge_read_eeprom (xd, ++a);
+ if (id == 0xffff)
+ break;
+ reg_values_addr = ixge_read_eeprom (xd, ++a);
+ if (id == sfp_type)
+ break;
+ }
+ if (id != sfp_type)
+ return clib_error_create ("failed to find id 0x%x", sfp_type);
+
+ ixge_software_firmware_sync (xd, 1 << 3);
+ while (1)
+ {
+ u16 v = ixge_read_eeprom (xd, ++reg_values_addr);
+ if (v == 0xffff)
+ break;
+ xd->regs->core_analog_config = v;
+ }
+ ixge_software_firmware_sync_release (xd, 1 << 3);
+
+ /* Make sure laser is off. We'll turn on the laser when
+ the interface is brought up. */
+ ixge_sfp_enable_disable_laser (xd, /* enable */ 0);
+ ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1);
+
+ return 0;
+}
+
+static void
+ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up)
+{
+ u32 v;
+
+ if (is_up)
+ {
+ /* pma/pmd 10g serial SFI. */
+ xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16);
+ xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16;
+
+ v = xd->regs->xge_mac.auto_negotiation_control;
+ v &= ~(7 << 13);
+ v |= (0 << 13);
+ /* Restart autoneg. */
+ v |= (1 << 12);
+ xd->regs->xge_mac.auto_negotiation_control = v;
+
+ while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000))
+ ;
+
+ v = xd->regs->xge_mac.auto_negotiation_control;
+
+ /* link mode 10g sfi serdes */
+ v &= ~(7 << 13);
+ v |= (3 << 13);
+
+ /* Restart autoneg. */
+ v |= (1 << 12);
+ xd->regs->xge_mac.auto_negotiation_control = v;
+
+ xd->regs->xge_mac.link_status;
+ }
+
+ ixge_sfp_enable_disable_laser (xd, /* enable */ is_up);
+
+ /* Give time for link partner to notice that we're up. */
+ if (is_up && vlib_in_process_context (vlib_get_main ()))
+ {
+ vlib_process_suspend (vlib_get_main (), 300e-3);
+ }
+}
+
+always_inline ixge_dma_regs_t *
+get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi)
+{
+ ixge_regs_t *r = xd->regs;
+ ASSERT (qi < 128);
+ if (rt == VLIB_RX)
+ return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64];
+ else
+ return &r->tx_dma[qi];
+}
+
+static clib_error_t *
+ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance);
+ ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0);
+
+ if (is_up)
+ {
+ xd->regs->rx_enable |= 1;
+ xd->regs->tx_dma_control |= 1;
+ dr->control |= 1 << 25;
+ while (!(dr->control & (1 << 25)))
+ ;
+ }
+ else
+ {
+ xd->regs->rx_enable &= ~1;
+ xd->regs->tx_dma_control &= ~1;
+ }
+
+ ixge_sfp_device_up_down (xd, is_up);
+
+ return /* no error */ 0;
+}
+
+static void
+ixge_sfp_phy_init (ixge_device_t * xd)
+{
+ ixge_phy_t *phy = xd->phys + xd->phy_index;
+ i2c_bus_t *ib = &xd->i2c_bus;
+
+ ib->private_data = xd->device_index;
+ ib->put_bits = ixge_i2c_put_bits;
+ ib->get_bits = ixge_i2c_get_bits;
+ vlib_i2c_init (ib);
+
+ vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom);
+
+ if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom))
+ xd->sfp_eeprom.id = SFP_ID_unknown;
+ else
+ {
+ /* FIXME 5 => SR/LR eeprom ID. */
+ clib_error_t *e =
+ ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function);
+ if (e)
+ clib_error_report (e);
+ }
+
+ phy->mdio_address = ~0;
+}
+
+static void
+ixge_phy_init (ixge_device_t * xd)
+{
+ ixge_main_t *xm = &ixge_main;
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_phy_t *phy = xd->phys + xd->phy_index;
+
+ switch (xd->device_id)
+ {
+ case IXGE_82599_sfp:
+ case IXGE_82599_sfp_em:
+ case IXGE_82599_sfp_fcoe:
+ /* others? */
+ return ixge_sfp_phy_init (xd);
+
+ default:
+ break;
+ }
+
+ /* Probe address of phy. */
+ {
+ u32 i, v;
+
+ phy->mdio_address = ~0;
+ for (i = 0; i < 32; i++)
+ {
+ phy->mdio_address = i;
+ v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1);
+ if (v != 0xffff && v != 0)
+ break;
+ }
+
+ /* No PHY found? */
+ if (i >= 32)
+ return;
+ }
+
+ phy->id =
+ ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) |
+ ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2));
+
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",};
+ struct
+ {
+ u32 instance, id, address;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->id = phy->id;
+ ed->address = phy->mdio_address;
+ }
+
+ /* Reset phy. */
+ ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL,
+ XGE_PHY_CONTROL_RESET);
+
+ /* Wait for self-clearning reset bit to clear. */
+ do
+ {
+ vlib_process_suspend (vm, 1e-3);
+ }
+ while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) &
+ XGE_PHY_CONTROL_RESET);
+}
+
+static u8 *
+format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va)
+{
+ ixge_rx_from_hw_descriptor_t *d =
+ va_arg (*va, ixge_rx_from_hw_descriptor_t *);
+ u32 s0 = d->status[0], s2 = d->status[2];
+ u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp;
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%s-owned",
+ (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" :
+ "hw");
+ s =
+ format (s, ", length this descriptor %d, l3 offset %d",
+ d->n_packet_bytes_this_descriptor,
+ IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0));
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET)
+ s = format (s, ", end-of-packet");
+
+ s = format (s, "\n%U", format_white_space, indent);
+
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR)
+ s = format (s, "layer2 error");
+
+ if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2)
+ {
+ s = format (s, "layer 2 type %d", (s0 & 0x1f));
+ return s;
+ }
+
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN)
+ s = format (s, "vlan header 0x%x\n%U", d->vlan_tag,
+ format_white_space, indent);
+
+ if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4)))
+ {
+ s = format (s, "ip4%s",
+ (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" :
+ "");
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED)
+ s = format (s, " checksum %s",
+ (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ?
+ "bad" : "ok");
+ }
+ if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6)))
+ s = format (s, "ip6%s",
+ (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" :
+ "");
+ is_tcp = is_udp = 0;
+ if ((is_ip = (is_ip4 | is_ip6)))
+ {
+ is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0;
+ is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0;
+ if (is_tcp)
+ s = format (s, ", tcp");
+ if (is_udp)
+ s = format (s, ", udp");
+ }
+
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED)
+ s = format (s, ", tcp checksum %s",
+ (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" :
+ "ok");
+ if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)
+ s = format (s, ", udp checksum %s",
+ (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" :
+ "ok");
+
+ return s;
+}
+
+static u8 *
+format_ixge_tx_descriptor (u8 * s, va_list * va)
+{
+ ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *);
+ u32 s0 = d->status0, s1 = d->status1;
+ uword indent = format_get_indent (s);
+ u32 v;
+
+ s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer",
+ d->buffer_address, s1 >> 14, d->n_bytes_this_buffer);
+
+ s = format (s, "\n%U", format_white_space, indent);
+
+ if ((v = (s0 >> 0) & 3))
+ s = format (s, "reserved 0x%x, ", v);
+
+ if ((v = (s0 >> 2) & 3))
+ s = format (s, "mac 0x%x, ", v);
+
+ if ((v = (s0 >> 4) & 0xf) != 3)
+ s = format (s, "type 0x%x, ", v);
+
+ s = format (s, "%s%s%s%s%s%s%s%s",
+ (s0 & (1 << 8)) ? "eop, " : "",
+ (s0 & (1 << 9)) ? "insert-fcs, " : "",
+ (s0 & (1 << 10)) ? "reserved26, " : "",
+ (s0 & (1 << 11)) ? "report-status, " : "",
+ (s0 & (1 << 12)) ? "reserved28, " : "",
+ (s0 & (1 << 13)) ? "is-advanced, " : "",
+ (s0 & (1 << 14)) ? "vlan-enable, " : "",
+ (s0 & (1 << 15)) ? "tx-segmentation, " : "");
+
+ if ((v = s1 & 0xf) != 0)
+ s = format (s, "status 0x%x, ", v);
+
+ if ((v = (s1 >> 4) & 0xf))
+ s = format (s, "context 0x%x, ", v);
+
+ if ((v = (s1 >> 8) & 0x3f))
+ s = format (s, "options 0x%x, ", v);
+
+ return s;
+}
+
+typedef struct
+{
+ ixge_descriptor_t before, after;
+
+ u32 buffer_index;
+
+ u16 device_index;
+
+ u8 queue_index;
+
+ u8 is_start_of_packet;
+
+ /* Copy of VLIB buffer; packet data stored in pre_data. */
+ vlib_buffer_t buffer;
+} ixge_rx_dma_trace_t;
+
+static u8 *
+format_ixge_rx_dma_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ vlib_node_t *node = va_arg (*va, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *);
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index);
+ format_function_t *f;
+ uword indent = format_get_indent (s);
+
+ {
+ vnet_sw_interface_t *sw =
+ vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
+ s =
+ format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw,
+ t->queue_index);
+ }
+
+ s = format (s, "\n%Ubefore: %U",
+ format_white_space, indent,
+ format_ixge_rx_from_hw_descriptor, &t->before);
+ s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx",
+ format_white_space, indent,
+ t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address);
+
+ s = format (s, "\n%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U", format_white_space, indent);
+
+ f = node->format_buffer;
+ if (!f || !t->is_start_of_packet)
+ f = format_hex_bytes;
+ s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
+
+ return s;
+}
+
+#define foreach_ixge_error \
+ _ (none, "no error") \
+ _ (tx_full_drops, "tx ring full drops") \
+ _ (ip4_checksum_error, "ip4 checksum errors") \
+ _ (rx_alloc_fail, "rx buf alloc from free list failed") \
+ _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem")
+
+typedef enum
+{
+#define _(f,s) IXGE_ERROR_##f,
+ foreach_ixge_error
+#undef _
+ IXGE_N_ERROR,
+} ixge_error_t;
+
+always_inline void
+ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd,
+ u32 s00, u32 s02,
+ u8 * next0, u8 * error0, u32 * flags0)
+{
+ u8 is0_ip4, is0_ip6, n0, e0;
+ u32 f0;
+
+ e0 = IXGE_ERROR_none;
+ n0 = IXGE_RX_NEXT_ETHERNET_INPUT;
+
+ is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
+ n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
+
+ e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
+ ? IXGE_ERROR_ip4_checksum_error : e0);
+
+ is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
+ n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
+
+ n0 = (xd->per_interface_next_index != ~0) ?
+ xd->per_interface_next_index : n0;
+
+ /* Check for error. */
+ n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
+
+ f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
+ | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
+ ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
+
+ f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
+ | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
+ ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ *error0 = e0;
+ *next0 = n0;
+ *flags0 = f0;
+}
+
+always_inline void
+ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd,
+ u32 s00, u32 s02,
+ u32 s10, u32 s12,
+ u8 * next0, u8 * error0, u32 * flags0,
+ u8 * next1, u8 * error1, u32 * flags1)
+{
+ u8 is0_ip4, is0_ip6, n0, e0;
+ u8 is1_ip4, is1_ip6, n1, e1;
+ u32 f0, f1;
+
+ e0 = e1 = IXGE_ERROR_none;
+ n0 = n1 = IXGE_RX_NEXT_IP4_INPUT;
+
+ is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
+ is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
+
+ n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
+ n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1;
+
+ e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
+ ? IXGE_ERROR_ip4_checksum_error : e0);
+ e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
+ ? IXGE_ERROR_ip4_checksum_error : e1);
+
+ is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
+ is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
+
+ n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
+ n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1;
+
+ n0 = (xd->per_interface_next_index != ~0) ?
+ xd->per_interface_next_index : n0;
+ n1 = (xd->per_interface_next_index != ~0) ?
+ xd->per_interface_next_index : n1;
+
+ /* Check for error. */
+ n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
+ n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1;
+
+ *error0 = e0;
+ *error1 = e1;
+
+ *next0 = n0;
+ *next1 = n1;
+
+ f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
+ | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
+ ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
+ f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
+ | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
+ ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
+
+ f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
+ | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
+ ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+ f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
+ | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
+ ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ *flags0 = f0;
+ *flags1 = f1;
+}
+
+static void
+ixge_rx_trace (ixge_main_t * xm,
+ ixge_device_t * xd,
+ ixge_dma_queue_t * dq,
+ ixge_descriptor_t * before_descriptors,
+ u32 * before_buffers,
+ ixge_descriptor_t * after_descriptors, uword n_descriptors)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ vlib_node_runtime_t *node = dq->rx.node;
+ ixge_rx_from_hw_descriptor_t *bd;
+ ixge_rx_to_hw_descriptor_t *ad;
+ u32 *b, n_left, is_sop, next_index_sop;
+
+ n_left = n_descriptors;
+ b = before_buffers;
+ bd = &before_descriptors->rx_from_hw;
+ ad = &after_descriptors->rx_to_hw;
+ is_sop = dq->rx.is_start_of_packet;
+ next_index_sop = dq->rx.saved_start_of_packet_next_index;
+
+ while (n_left >= 2)
+ {
+ u32 bi0, bi1, flags0, flags1;
+ vlib_buffer_t *b0, *b1;
+ ixge_rx_dma_trace_t *t0, *t1;
+ u8 next0, error0, next1, error1;
+
+ bi0 = b[0];
+ bi1 = b[1];
+ n_left -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ixge_rx_next_and_error_from_status_x2 (xd,
+ bd[0].status[0], bd[0].status[2],
+ bd[1].status[0], bd[1].status[2],
+ &next0, &error0, &flags0,
+ &next1, &error1, &flags1);
+
+ next_index_sop = is_sop ? next0 : next_index_sop;
+ vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->is_start_of_packet = is_sop;
+ is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ next_index_sop = is_sop ? next1 : next_index_sop;
+ vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0);
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->is_start_of_packet = is_sop;
+ is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ t0->queue_index = dq->queue_index;
+ t1->queue_index = dq->queue_index;
+ t0->device_index = xd->device_index;
+ t1->device_index = xd->device_index;
+ t0->before.rx_from_hw = bd[0];
+ t1->before.rx_from_hw = bd[1];
+ t0->after.rx_to_hw = ad[0];
+ t1->after.rx_to_hw = ad[1];
+ t0->buffer_index = bi0;
+ t1->buffer_index = bi1;
+ memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
+ memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
+ sizeof (t0->buffer.pre_data));
+ memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
+ sizeof (t1->buffer.pre_data));
+
+ b += 2;
+ bd += 2;
+ ad += 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0, flags0;
+ vlib_buffer_t *b0;
+ ixge_rx_dma_trace_t *t0;
+ u8 next0, error0;
+
+ bi0 = b[0];
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ixge_rx_next_and_error_from_status_x1 (xd,
+ bd[0].status[0], bd[0].status[2],
+ &next0, &error0, &flags0);
+
+ next_index_sop = is_sop ? next0 : next_index_sop;
+ vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->is_start_of_packet = is_sop;
+ is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ t0->queue_index = dq->queue_index;
+ t0->device_index = xd->device_index;
+ t0->before.rx_from_hw = bd[0];
+ t0->after.rx_to_hw = ad[0];
+ t0->buffer_index = bi0;
+ memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
+ sizeof (t0->buffer.pre_data));
+
+ b += 1;
+ bd += 1;
+ ad += 1;
+ }
+}
+
+typedef struct
+{
+ ixge_tx_descriptor_t descriptor;
+
+ u32 buffer_index;
+
+ u16 device_index;
+
+ u8 queue_index;
+
+ u8 is_start_of_packet;
+
+ /* Copy of VLIB buffer; packet data stored in pre_data. */
+ vlib_buffer_t buffer;
+} ixge_tx_dma_trace_t;
+
+static u8 *
+format_ixge_tx_dma_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index);
+ format_function_t *f;
+ uword indent = format_get_indent (s);
+
+ {
+ vnet_sw_interface_t *sw =
+ vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
+ s =
+ format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw,
+ t->queue_index);
+ }
+
+ s = format (s, "\n%Udescriptor: %U",
+ format_white_space, indent,
+ format_ixge_tx_descriptor, &t->descriptor);
+
+ s = format (s, "\n%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U", format_white_space, indent);
+
+ f = format_ethernet_header_with_length;
+ if (!f || !t->is_start_of_packet)
+ f = format_hex_bytes;
+ s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
+
+ return s;
+}
+
+typedef struct
+{
+ vlib_node_runtime_t *node;
+
+ u32 is_start_of_packet;
+
+ u32 n_bytes_in_packet;
+
+ ixge_tx_descriptor_t *start_of_packet_descriptor;
+} ixge_tx_state_t;
+
+static void
+ixge_tx_trace (ixge_main_t * xm,
+ ixge_device_t * xd,
+ ixge_dma_queue_t * dq,
+ ixge_tx_state_t * tx_state,
+ ixge_tx_descriptor_t * descriptors,
+ u32 * buffers, uword n_descriptors)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ vlib_node_runtime_t *node = tx_state->node;
+ ixge_tx_descriptor_t *d;
+ u32 *b, n_left, is_sop;
+
+ n_left = n_descriptors;
+ b = buffers;
+ d = descriptors;
+ is_sop = tx_state->is_start_of_packet;
+
+ while (n_left >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ixge_tx_dma_trace_t *t0, *t1;
+
+ bi0 = b[0];
+ bi1 = b[1];
+ n_left -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->is_start_of_packet = is_sop;
+ is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->is_start_of_packet = is_sop;
+ is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ t0->queue_index = dq->queue_index;
+ t1->queue_index = dq->queue_index;
+ t0->device_index = xd->device_index;
+ t1->device_index = xd->device_index;
+ t0->descriptor = d[0];
+ t1->descriptor = d[1];
+ t0->buffer_index = bi0;
+ t1->buffer_index = bi1;
+ memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
+ memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
+ sizeof (t0->buffer.pre_data));
+ memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
+ sizeof (t1->buffer.pre_data));
+
+ b += 2;
+ d += 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ixge_tx_dma_trace_t *t0;
+
+ bi0 = b[0];
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->is_start_of_packet = is_sop;
+ is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ t0->queue_index = dq->queue_index;
+ t0->device_index = xd->device_index;
+ t0->descriptor = d[0];
+ t0->buffer_index = bi0;
+ memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
+ sizeof (t0->buffer.pre_data));
+
+ b += 1;
+ d += 1;
+ }
+}
+
+always_inline uword
+ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1)
+{
+ i32 d = i1 - i0;
+ ASSERT (i0 < q->n_descriptors);
+ ASSERT (i1 < q->n_descriptors);
+ return d < 0 ? q->n_descriptors + d : d;
+}
+
+always_inline uword
+ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1)
+{
+ u32 d = i0 + i1;
+ ASSERT (i0 < q->n_descriptors);
+ ASSERT (i1 < q->n_descriptors);
+ d -= d >= q->n_descriptors ? q->n_descriptors : 0;
+ return d;
+}
+
+always_inline uword
+ixge_tx_descriptor_matches_template (ixge_main_t * xm,
+ ixge_tx_descriptor_t * d)
+{
+ u32 cmp;
+
+ cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0)
+ ^ xm->tx_descriptor_template.status0);
+ if (cmp)
+ return 0;
+ cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1)
+ ^ xm->tx_descriptor_template.status1);
+ if (cmp)
+ return 0;
+
+ return 1;
+}
+
+static uword
+ixge_tx_no_wrap (ixge_main_t * xm,
+ ixge_device_t * xd,
+ ixge_dma_queue_t * dq,
+ u32 * buffers,
+ u32 start_descriptor_index,
+ u32 n_descriptors, ixge_tx_state_t * tx_state)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_tx_descriptor_t *d, *d_sop;
+ u32 n_left = n_descriptors;
+ u32 *to_free = vec_end (xm->tx_buffers_pending_free);
+ u32 *to_tx =
+ vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
+ u32 is_sop = tx_state->is_start_of_packet;
+ u32 len_sop = tx_state->n_bytes_in_packet;
+ u16 template_status = xm->tx_descriptor_template.status0;
+ u32 descriptor_prefetch_rotor = 0;
+
+ ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
+ d = &dq->descriptors[start_descriptor_index].tx;
+ d_sop = is_sop ? d : tx_state->start_of_packet_descriptor;
+
+ while (n_left >= 4)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, fi0, len0;
+ u32 bi1, fi1, len1;
+ u8 is_eop0, is_eop1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
+
+ if ((descriptor_prefetch_rotor & 0x3) == 0)
+ CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE);
+
+ descriptor_prefetch_rotor += 2;
+
+ bi0 = buffers[0];
+ bi1 = buffers[1];
+
+ to_free[0] = fi0 = to_tx[0];
+ to_tx[0] = bi0;
+ to_free += fi0 != 0;
+
+ to_free[0] = fi1 = to_tx[1];
+ to_tx[1] = bi1;
+ to_free += fi1 != 0;
+
+ buffers += 2;
+ n_left -= 2;
+ to_tx += 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+ is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ len0 = b0->current_length;
+ len1 = b1->current_length;
+
+ ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
+ ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1));
+
+ d[0].buffer_address =
+ vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
+ d[1].buffer_address =
+ vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data;
+
+ d[0].n_bytes_this_buffer = len0;
+ d[1].n_bytes_this_buffer = len1;
+
+ d[0].status0 =
+ template_status | (is_eop0 <<
+ IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
+ d[1].status0 =
+ template_status | (is_eop1 <<
+ IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
+
+ len_sop = (is_sop ? 0 : len_sop) + len0;
+ d_sop[0].status1 =
+ IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
+ d += 1;
+ d_sop = is_eop0 ? d : d_sop;
+
+ is_sop = is_eop0;
+
+ len_sop = (is_sop ? 0 : len_sop) + len1;
+ d_sop[0].status1 =
+ IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
+ d += 1;
+ d_sop = is_eop1 ? d : d_sop;
+
+ is_sop = is_eop1;
+ }
+
+ while (n_left > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0, fi0, len0;
+ u8 is_eop0;
+
+ bi0 = buffers[0];
+
+ to_free[0] = fi0 = to_tx[0];
+ to_tx[0] = bi0;
+ to_free += fi0 != 0;
+
+ buffers += 1;
+ n_left -= 1;
+ to_tx += 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
+
+ len0 = b0->current_length;
+
+ ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
+
+ d[0].buffer_address =
+ vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
+
+ d[0].n_bytes_this_buffer = len0;
+
+ d[0].status0 =
+ template_status | (is_eop0 <<
+ IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
+
+ len_sop = (is_sop ? 0 : len_sop) + len0;
+ d_sop[0].status1 =
+ IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
+ d += 1;
+ d_sop = is_eop0 ? d : d_sop;
+
+ is_sop = is_eop0;
+ }
+
+ if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE)
+ {
+ to_tx =
+ vec_elt_at_index (dq->descriptor_buffer_indices,
+ start_descriptor_index);
+ ixge_tx_trace (xm, xd, dq, tx_state,
+ &dq->descriptors[start_descriptor_index].tx, to_tx,
+ n_descriptors);
+ }
+
+ _vec_len (xm->tx_buffers_pending_free) =
+ to_free - xm->tx_buffers_pending_free;
+
+ /* When we are done d_sop can point to end of ring. Wrap it if so. */
+ {
+ ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx;
+
+ ASSERT (d_sop - d_start <= dq->n_descriptors);
+ d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop;
+ }
+
+ tx_state->is_start_of_packet = is_sop;
+ tx_state->start_of_packet_descriptor = d_sop;
+ tx_state->n_bytes_in_packet = len_sop;
+
+ return n_descriptors;
+}
+
+static uword
+ixge_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ ixge_main_t *xm = &ixge_main;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance);
+ ixge_dma_queue_t *dq;
+ u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop;
+ u32 queue_index = 0; /* fixme parameter */
+ ixge_tx_state_t tx_state;
+
+ tx_state.node = node;
+ tx_state.is_start_of_packet = 1;
+ tx_state.start_of_packet_descriptor = 0;
+ tx_state.n_bytes_in_packet = 0;
+
+ from = vlib_frame_vector_args (f);
+
+ dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
+
+ dq->head_index = dq->tx.head_index_write_back[0];
+
+ /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */
+ n_left_tx = dq->n_descriptors - 1;
+ n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index);
+
+ _vec_len (xm->tx_buffers_pending_free) = 0;
+
+ n_descriptors_to_tx = f->n_vectors;
+ n_tail_drop = 0;
+ if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx))
+ {
+ i32 i, n_ok, i_eop, i_sop;
+
+ i_sop = i_eop = ~0;
+ for (i = n_left_tx - 1; i >= 0; i--)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ if (i_sop != ~0 && i_eop != ~0)
+ break;
+ i_eop = i;
+ i_sop = i + 1;
+ }
+ }
+ if (i == 0)
+ n_ok = 0;
+ else
+ n_ok = i_eop + 1;
+
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d, ring full to tx %d head %d tail %d",.format_args =
+ "i2i2i2i2",};
+ struct
+ {
+ u16 instance, to_tx, head, tail;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->to_tx = n_descriptors_to_tx;
+ ed->head = dq->head_index;
+ ed->tail = dq->tail_index;
+ }
+
+ if (n_ok < n_descriptors_to_tx)
+ {
+ n_tail_drop = n_descriptors_to_tx - n_ok;
+ vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop);
+ vlib_error_count (vm, ixge_input_node.index,
+ IXGE_ERROR_tx_full_drops, n_tail_drop);
+ }
+
+ n_descriptors_to_tx = n_ok;
+ }
+
+ dq->tx.n_buffers_on_ring += n_descriptors_to_tx;
+
+ /* Process from tail to end of descriptor ring. */
+ if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors)
+ {
+ u32 n =
+ clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx);
+ n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state);
+ from += n;
+ n_descriptors_to_tx -= n;
+ dq->tail_index += n;
+ ASSERT (dq->tail_index <= dq->n_descriptors);
+ if (dq->tail_index == dq->n_descriptors)
+ dq->tail_index = 0;
+ }
+
+ if (n_descriptors_to_tx > 0)
+ {
+ u32 n =
+ ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state);
+ from += n;
+ ASSERT (n == n_descriptors_to_tx);
+ dq->tail_index += n;
+ ASSERT (dq->tail_index <= dq->n_descriptors);
+ if (dq->tail_index == dq->n_descriptors)
+ dq->tail_index = 0;
+ }
+
+ /* We should only get full packets. */
+ ASSERT (tx_state.is_start_of_packet);
+
+ /* Report status when last descriptor is done. */
+ {
+ u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1;
+ ixge_tx_descriptor_t *d = &dq->descriptors[i].tx;
+ d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS;
+ }
+
+ /* Give new descriptors to hardware. */
+ {
+ ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index);
+
+ CLIB_MEMORY_BARRIER ();
+
+ dr->tail_index = dq->tail_index;
+ }
+
+ /* Free any buffers that are done. */
+ {
+ u32 n = _vec_len (xm->tx_buffers_pending_free);
+ if (n > 0)
+ {
+ vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n);
+ _vec_len (xm->tx_buffers_pending_free) = 0;
+ ASSERT (dq->tx.n_buffers_on_ring >= n);
+ dq->tx.n_buffers_on_ring -= (n - n_tail_drop);
+ }
+ }
+
+ return f->n_vectors;
+}
+
+static uword
+ixge_rx_queue_no_wrap (ixge_main_t * xm,
+ ixge_device_t * xd,
+ ixge_dma_queue_t * dq,
+ u32 start_descriptor_index, u32 n_descriptors)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ vlib_node_runtime_t *node = dq->rx.node;
+ ixge_descriptor_t *d;
+ static ixge_descriptor_t *d_trace_save;
+ static u32 *d_trace_buffers;
+ u32 n_descriptors_left = n_descriptors;
+ u32 *to_rx =
+ vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
+ u32 *to_add;
+ u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index;
+ u32 bi_last = dq->rx.saved_last_buffer_index;
+ u32 next_index_sop = dq->rx.saved_start_of_packet_next_index;
+ u32 is_sop = dq->rx.is_start_of_packet;
+ u32 next_index, n_left_to_next, *to_next;
+ u32 n_packets = 0;
+ u32 n_bytes = 0;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ vlib_buffer_t *b_last, b_dummy;
+
+ ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
+ d = &dq->descriptors[start_descriptor_index];
+
+ b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy;
+ next_index = dq->rx.next_index;
+
+ if (n_trace > 0)
+ {
+ u32 n = clib_min (n_trace, n_descriptors);
+ if (d_trace_save)
+ {
+ _vec_len (d_trace_save) = 0;
+ _vec_len (d_trace_buffers) = 0;
+ }
+ vec_add (d_trace_save, (ixge_descriptor_t *) d, n);
+ vec_add (d_trace_buffers, to_rx, n);
+ }
+
+ {
+ uword l = vec_len (xm->rx_buffers_to_add);
+
+ if (l < n_descriptors_left)
+ {
+ u32 n_to_alloc = 2 * dq->n_descriptors - l;
+ u32 n_allocated;
+
+ vec_resize (xm->rx_buffers_to_add, n_to_alloc);
+
+ _vec_len (xm->rx_buffers_to_add) = l;
+ n_allocated = vlib_buffer_alloc_from_free_list
+ (vm, xm->rx_buffers_to_add + l, n_to_alloc,
+ xm->vlib_buffer_free_list_index);
+ _vec_len (xm->rx_buffers_to_add) += n_allocated;
+
+ /* Handle transient allocation failure */
+ if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left))
+ {
+ if (n_allocated == 0)
+ vlib_error_count (vm, ixge_input_node.index,
+ IXGE_ERROR_rx_alloc_no_physmem, 1);
+ else
+ vlib_error_count (vm, ixge_input_node.index,
+ IXGE_ERROR_rx_alloc_fail, 1);
+
+ n_descriptors_left = l + n_allocated;
+ }
+ n_descriptors = n_descriptors_left;
+ }
+
+ /* Add buffers from end of vector going backwards. */
+ to_add = vec_end (xm->rx_buffers_to_add) - 1;
+ }
+
+ while (n_descriptors_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_descriptors_left >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
+ u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1;
+ u8 is_eop0, error0, next0;
+ u8 is_eop1, error1, next1;
+ ixge_descriptor_t d0, d1;
+
+ vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE);
+ vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE);
+
+ CLIB_PREFETCH (d + 2, 32, STORE);
+
+ d0.as_u32x4 = d[0].as_u32x4;
+ d1.as_u32x4 = d[1].as_u32x4;
+
+ s20 = d0.rx_from_hw.status[2];
+ s21 = d1.rx_from_hw.status[2];
+
+ s00 = d0.rx_from_hw.status[0];
+ s01 = d1.rx_from_hw.status[0];
+
+ if (!
+ ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
+ goto found_hw_owned_descriptor_x2;
+
+ bi0 = to_rx[0];
+ bi1 = to_rx[1];
+
+ ASSERT (to_add - 1 >= xm->rx_buffers_to_add);
+ fi0 = to_add[0];
+ fi1 = to_add[-1];
+
+ to_rx[0] = fi0;
+ to_rx[1] = fi1;
+ to_rx += 2;
+ to_add -= 2;
+
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, bi0));
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, bi1));
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, fi0));
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, fi1));
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See main.c...
+ */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+
+ CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD);
+
+ is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
+ is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
+
+ ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21,
+ &next0, &error0, &flags0,
+ &next1, &error1, &flags1);
+
+ next0 = is_sop ? next0 : next_index_sop;
+ next1 = is_eop0 ? next1 : next0;
+ next_index_sop = next1;
+
+ b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
+ b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ b0->error = node->errors[error0];
+ b1->error = node->errors[error1];
+
+ len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
+ len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor;
+ n_bytes += len0 + len1;
+ n_packets += is_eop0 + is_eop1;
+
+ /* Give new buffers to hardware. */
+ d0.rx_to_hw.tail_address =
+ vlib_get_buffer_data_physical_address (vm, fi0);
+ d1.rx_to_hw.tail_address =
+ vlib_get_buffer_data_physical_address (vm, fi1);
+ d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address;
+ d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address;
+ d[0].as_u32x4 = d0.as_u32x4;
+ d[1].as_u32x4 = d1.as_u32x4;
+
+ d += 2;
+ n_descriptors_left -= 2;
+
+ /* Point to either l2 or l3 header depending on next. */
+ l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
+ ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0;
+ l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT))
+ ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0;
+
+ b0->current_length = len0 - l3_offset0;
+ b1->current_length = len1 - l3_offset1;
+ b0->current_data = l3_offset0;
+ b1->current_data = l3_offset1;
+
+ b_last->next_buffer = is_sop ? ~0 : bi0;
+ b0->next_buffer = is_eop0 ? ~0 : bi1;
+ bi_last = bi1;
+ b_last = b1;
+
+ if (CLIB_DEBUG > 0)
+ {
+ u32 bi_sop0 = is_sop ? bi0 : bi_sop;
+ u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
+
+ if (is_eop0)
+ {
+ u8 *msg = vlib_validate_buffer (vm, bi_sop0,
+ /* follow_buffer_next */ 1);
+ ASSERT (!msg);
+ }
+ if (is_eop1)
+ {
+ u8 *msg = vlib_validate_buffer (vm, bi_sop1,
+ /* follow_buffer_next */ 1);
+ ASSERT (!msg);
+ }
+ }
+ if (0) /* "Dave" version */
+ {
+ u32 bi_sop0 = is_sop ? bi0 : bi_sop;
+ u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
+
+ if (is_eop0)
+ {
+ to_next[0] = bi_sop0;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi_sop0, next0);
+ }
+ if (is_eop1)
+ {
+ to_next[0] = bi_sop1;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi_sop1, next1);
+ }
+ is_sop = is_eop1;
+ bi_sop = bi_sop1;
+ }
+ if (1) /* "Eliot" version */
+ {
+ /* Speculatively enqueue to cached next. */
+ u8 saved_is_sop = is_sop;
+ u32 bi_sop_save = bi_sop;
+
+ bi_sop = saved_is_sop ? bi0 : bi_sop;
+ to_next[0] = bi_sop;
+ to_next += is_eop0;
+ n_left_to_next -= is_eop0;
+
+ bi_sop = is_eop0 ? bi1 : bi_sop;
+ to_next[0] = bi_sop;
+ to_next += is_eop1;
+ n_left_to_next -= is_eop1;
+
+ is_sop = is_eop1;
+
+ if (PREDICT_FALSE
+ (!(next0 == next_index && next1 == next_index)))
+ {
+ /* Undo speculation. */
+ to_next -= is_eop0 + is_eop1;
+ n_left_to_next += is_eop0 + is_eop1;
+
+ /* Re-do both descriptors being careful about where we enqueue. */
+ bi_sop = saved_is_sop ? bi0 : bi_sop_save;
+ if (is_eop0)
+ {
+ if (next0 != next_index)
+ vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
+ else
+ {
+ to_next[0] = bi_sop;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ bi_sop = is_eop0 ? bi1 : bi_sop;
+ if (is_eop1)
+ {
+ if (next1 != next_index)
+ vlib_set_next_frame_buffer (vm, node, next1, bi_sop);
+ else
+ {
+ to_next[0] = bi_sop;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ /* Switch cached next index when next for both packets is the same. */
+ if (is_eop0 && is_eop1 && next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ }
+ }
+ }
+ }
+
+ /* Bail out of dual loop and proceed with single loop. */
+ found_hw_owned_descriptor_x2:
+
+ while (n_descriptors_left > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
+ u8 is_eop0, error0, next0;
+ ixge_descriptor_t d0;
+
+ d0.as_u32x4 = d[0].as_u32x4;
+
+ s20 = d0.rx_from_hw.status[2];
+ s00 = d0.rx_from_hw.status[0];
+
+ if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
+ goto found_hw_owned_descriptor_x1;
+
+ bi0 = to_rx[0];
+ ASSERT (to_add >= xm->rx_buffers_to_add);
+ fi0 = to_add[0];
+
+ to_rx[0] = fi0;
+ to_rx += 1;
+ to_add -= 1;
+
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, bi0));
+ ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
+ vlib_buffer_is_known (vm, fi0));
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited...
+ */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
+ ixge_rx_next_and_error_from_status_x1
+ (xd, s00, s20, &next0, &error0, &flags0);
+
+ next0 = is_sop ? next0 : next_index_sop;
+ next_index_sop = next0;
+
+ b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ b0->error = node->errors[error0];
+
+ len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
+ n_bytes += len0;
+ n_packets += is_eop0;
+
+ /* Give new buffer to hardware. */
+ d0.rx_to_hw.tail_address =
+ vlib_get_buffer_data_physical_address (vm, fi0);
+ d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address;
+ d[0].as_u32x4 = d0.as_u32x4;
+
+ d += 1;
+ n_descriptors_left -= 1;
+
+ /* Point to either l2 or l3 header depending on next. */
+ l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
+ ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0;
+ b0->current_length = len0 - l3_offset0;
+ b0->current_data = l3_offset0;
+
+ b_last->next_buffer = is_sop ? ~0 : bi0;
+ bi_last = bi0;
+ b_last = b0;
+
+ bi_sop = is_sop ? bi0 : bi_sop;
+
+ if (CLIB_DEBUG > 0 && is_eop0)
+ {
+ u8 *msg =
+ vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1);
+ ASSERT (!msg);
+ }
+
+ if (0) /* "Dave" version */
+ {
+ if (is_eop0)
+ {
+ to_next[0] = bi_sop;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi_sop, next0);
+ }
+ }
+ if (1) /* "Eliot" version */
+ {
+ if (PREDICT_TRUE (next0 == next_index))
+ {
+ to_next[0] = bi_sop;
+ to_next += is_eop0;
+ n_left_to_next -= is_eop0;
+ }
+ else
+ {
+ if (next0 != next_index && is_eop0)
+ vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ }
+ }
+ is_sop = is_eop0;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+found_hw_owned_descriptor_x1:
+ if (n_descriptors_left > 0)
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add;
+
+ {
+ u32 n_done = n_descriptors - n_descriptors_left;
+
+ if (n_trace > 0 && n_done > 0)
+ {
+ u32 n = clib_min (n_trace, n_done);
+ ixge_rx_trace (xm, xd, dq,
+ d_trace_save,
+ d_trace_buffers,
+ &dq->descriptors[start_descriptor_index], n);
+ vlib_set_trace_count (vm, node, n_trace - n);
+ }
+ if (d_trace_save)
+ {
+ _vec_len (d_trace_save) = 0;
+ _vec_len (d_trace_buffers) = 0;
+ }
+
+ /* Don't keep a reference to b_last if we don't have to.
+ Otherwise we can over-write a next_buffer pointer after already haven
+ enqueued a packet. */
+ if (is_sop)
+ {
+ b_last->next_buffer = ~0;
+ bi_last = ~0;
+ }
+
+ dq->rx.n_descriptors_done_this_call = n_done;
+ dq->rx.n_descriptors_done_total += n_done;
+ dq->rx.is_start_of_packet = is_sop;
+ dq->rx.saved_start_of_packet_buffer_index = bi_sop;
+ dq->rx.saved_last_buffer_index = bi_last;
+ dq->rx.saved_start_of_packet_next_index = next_index_sop;
+ dq->rx.next_index = next_index;
+ dq->rx.n_bytes += n_bytes;
+
+ return n_packets;
+ }
+}
+
+static uword
+ixge_rx_queue (ixge_main_t * xm,
+ ixge_device_t * xd,
+ vlib_node_runtime_t * node, u32 queue_index)
+{
+ ixge_dma_queue_t *dq =
+ vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index);
+ ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index);
+ uword n_packets = 0;
+ u32 hw_head_index, sw_head_index;
+
+ /* One time initialization. */
+ if (!dq->rx.node)
+ {
+ dq->rx.node = node;
+ dq->rx.is_start_of_packet = 1;
+ dq->rx.saved_start_of_packet_buffer_index = ~0;
+ dq->rx.saved_last_buffer_index = ~0;
+ }
+
+ dq->rx.next_index = node->cached_next_index;
+
+ dq->rx.n_descriptors_done_total = 0;
+ dq->rx.n_descriptors_done_this_call = 0;
+ dq->rx.n_bytes = 0;
+
+ /* Fetch head from hardware and compare to where we think we are. */
+ hw_head_index = dr->head_index;
+ sw_head_index = dq->head_index;
+
+ if (hw_head_index == sw_head_index)
+ goto done;
+
+ if (hw_head_index < sw_head_index)
+ {
+ u32 n_tried = dq->n_descriptors - sw_head_index;
+ n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
+ sw_head_index =
+ ixge_ring_add (dq, sw_head_index,
+ dq->rx.n_descriptors_done_this_call);
+
+ if (dq->rx.n_descriptors_done_this_call != n_tried)
+ goto done;
+ }
+ if (hw_head_index >= sw_head_index)
+ {
+ u32 n_tried = hw_head_index - sw_head_index;
+ n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
+ sw_head_index =
+ ixge_ring_add (dq, sw_head_index,
+ dq->rx.n_descriptors_done_this_call);
+ }
+
+done:
+ dq->head_index = sw_head_index;
+ dq->tail_index =
+ ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total);
+
+ /* Give tail back to hardware. */
+ CLIB_MEMORY_BARRIER ();
+
+ dr->tail_index = dq->tail_index;
+
+ vlib_increment_combined_counter (vnet_main.
+ interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ 0 /* thread_index */ ,
+ xd->vlib_sw_if_index, n_packets,
+ dq->rx.n_bytes);
+
+ return n_packets;
+}
+
+static void
+ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_regs_t *r = xd->regs;
+
+ if (i != 20)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d, %s",.format_args = "i1t1",.n_enum_strings =
+ 16,.enum_strings =
+ {
+ "flow director",
+ "rx miss",
+ "pci exception",
+ "mailbox",
+ "link status change",
+ "linksec key exchange",
+ "manageability event",
+ "reserved23",
+ "sdp0",
+ "sdp1",
+ "sdp2",
+ "sdp3",
+ "ecc", "descriptor handler error", "tcp timer", "other",},};
+ struct
+ {
+ u8 instance;
+ u8 index;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->index = i - 16;
+ }
+ else
+ {
+ u32 v = r->xge_mac.link_status;
+ uword is_up = (v & (1 << 30)) != 0;
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d, link status change 0x%x",.format_args = "i4i4",};
+ struct
+ {
+ u32 instance, link_status;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->link_status = v;
+ xd->link_status_at_last_link_change = v;
+
+ vlib_process_signal_event (vm, ixge_process_node.index,
+ EVENT_SET_FLAGS,
+ ((is_up << 31) | xd->vlib_hw_if_index));
+ }
+}
+
+always_inline u32
+clean_block (u32 * b, u32 * t, u32 n_left)
+{
+ u32 *t0 = t;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+
+ t[0] = bi0 = b[0];
+ b[0] = 0;
+ t += bi0 != 0;
+
+ t[0] = bi1 = b[1];
+ b[1] = 0;
+ t += bi1 != 0;
+
+ t[0] = bi2 = b[2];
+ b[2] = 0;
+ t += bi2 != 0;
+
+ t[0] = bi3 = b[3];
+ b[3] = 0;
+ t += bi3 != 0;
+
+ b += 4;
+ n_left -= 4;
+ }
+
+ while (n_left > 0)
+ {
+ u32 bi0;
+
+ t[0] = bi0 = b[0];
+ b[0] = 0;
+ t += bi0 != 0;
+ b += 1;
+ n_left -= 1;
+ }
+
+ return t - t0;
+}
+
+static void
+ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index)
+{
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_dma_queue_t *dq =
+ vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
+ u32 n_clean, *b, *t, *t0;
+ i32 n_hw_owned_descriptors;
+ i32 first_to_clean, last_to_clean;
+ u64 hwbp_race = 0;
+
+ /* Handle case where head write back pointer update
+ * arrives after the interrupt during high PCI bus loads.
+ */
+ while ((dq->head_index == dq->tx.head_index_write_back[0]) &&
+ dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index))
+ {
+ hwbp_race++;
+ if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1))
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args
+ = "i4i4i4i4",};
+ struct
+ {
+ u32 instance, head_index, tail_index, n_buffers_on_ring;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->head_index = dq->head_index;
+ ed->tail_index = dq->tail_index;
+ ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring;
+ }
+ }
+
+ dq->head_index = dq->tx.head_index_write_back[0];
+ n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index);
+ ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors);
+ n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors;
+
+ if (IXGE_HWBP_RACE_ELOG && hwbp_race)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args
+ = "i4i4i4i4i4",};
+ struct
+ {
+ u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->instance = xd->device_index;
+ ed->head_index = dq->head_index;
+ ed->n_hw_owned_descriptors = n_hw_owned_descriptors;
+ ed->n_clean = n_clean;
+ ed->retries = hwbp_race;
+ }
+
+ /*
+ * This function used to wait until hardware owned zero descriptors.
+ * At high PPS rates, that doesn't happen until the TX ring is
+ * completely full of descriptors which need to be cleaned up.
+ * That, in turn, causes TX ring-full drops and/or long RX service
+ * interruptions.
+ */
+ if (n_clean == 0)
+ return;
+
+ /* Clean the n_clean descriptors prior to the reported hardware head */
+ last_to_clean = dq->head_index - 1;
+ last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors :
+ last_to_clean;
+
+ first_to_clean = (last_to_clean) - (n_clean - 1);
+ first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors :
+ first_to_clean;
+
+ vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
+ t0 = t = xm->tx_buffers_pending_free;
+ b = dq->descriptor_buffer_indices + first_to_clean;
+
+ /* Wrap case: clean from first to end, then start to last */
+ if (first_to_clean > last_to_clean)
+ {
+ t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean);
+ first_to_clean = 0;
+ b = dq->descriptor_buffer_indices;
+ }
+
+ /* Typical case: clean from first to last */
+ if (first_to_clean <= last_to_clean)
+ t += clean_block (b, t, (last_to_clean - first_to_clean) + 1);
+
+ if (t > t0)
+ {
+ u32 n = t - t0;
+ vlib_buffer_free_no_next (vm, t0, n);
+ ASSERT (dq->tx.n_buffers_on_ring >= n);
+ dq->tx.n_buffers_on_ring -= n;
+ _vec_len (xm->tx_buffers_pending_free) = 0;
+ }
+}
+
+/* RX queue interrupts 0 thru 7; TX 8 thru 15. */
+always_inline uword
+ixge_interrupt_is_rx_queue (uword i)
+{
+ return i < 8;
+}
+
+always_inline uword
+ixge_interrupt_is_tx_queue (uword i)
+{
+ return i >= 8 && i < 16;
+}
+
+always_inline uword
+ixge_tx_queue_to_interrupt (uword i)
+{
+ return 8 + i;
+}
+
+always_inline uword
+ixge_rx_queue_to_interrupt (uword i)
+{
+ return 0 + i;
+}
+
+always_inline uword
+ixge_interrupt_rx_queue (uword i)
+{
+ ASSERT (ixge_interrupt_is_rx_queue (i));
+ return i - 0;
+}
+
+always_inline uword
+ixge_interrupt_tx_queue (uword i)
+{
+ ASSERT (ixge_interrupt_is_tx_queue (i));
+ return i - 8;
+}
+
+static uword
+ixge_device_input (ixge_main_t * xm,
+ ixge_device_t * xd, vlib_node_runtime_t * node)
+{
+ ixge_regs_t *r = xd->regs;
+ u32 i, s;
+ uword n_rx_packets = 0;
+
+ s = r->interrupt.status_write_1_to_set;
+ if (s)
+ r->interrupt.status_write_1_to_clear = s;
+
+ /* *INDENT-OFF* */
+ foreach_set_bit (i, s, ({
+ if (ixge_interrupt_is_rx_queue (i))
+ n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i));
+
+ else if (ixge_interrupt_is_tx_queue (i))
+ ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i));
+
+ else
+ ixge_interrupt (xm, xd, i);
+ }));
+ /* *INDENT-ON* */
+
+ return n_rx_packets;
+}
+
+static uword
+ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd;
+ uword n_rx_packets = 0;
+
+ if (node->state == VLIB_NODE_STATE_INTERRUPT)
+ {
+ uword i;
+
+ /* Loop over devices with interrupts. */
+ /* *INDENT-OFF* */
+ foreach_set_bit (i, node->runtime_data[0], ({
+ xd = vec_elt_at_index (xm->devices, i);
+ n_rx_packets += ixge_device_input (xm, xd, node);
+
+ /* Re-enable interrupts since we're going to stay in interrupt mode. */
+ if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ xd->regs->interrupt.enable_write_1_to_set = ~0;
+ }));
+ /* *INDENT-ON* */
+
+ /* Clear mask of devices with pending interrupts. */
+ node->runtime_data[0] = 0;
+ }
+ else
+ {
+ /* Poll all devices for input/interrupts. */
+ vec_foreach (xd, xm->devices)
+ {
+ n_rx_packets += ixge_device_input (xm, xd, node);
+
+ /* Re-enable interrupts when switching out of polling mode. */
+ if (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
+ xd->regs->interrupt.enable_write_1_to_set = ~0;
+ }
+ }
+
+ return n_rx_packets;
+}
+
+static char *ixge_error_strings[] = {
+#define _(n,s) s,
+ foreach_ixge_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ixge_input_node, static) = {
+ .function = ixge_input,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "ixge-input",
+
+ /* Will be enabled if/when hardware is detected. */
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_ixge_rx_dma_trace,
+
+ .n_errors = IXGE_N_ERROR,
+ .error_strings = ixge_error_strings,
+
+ .n_next_nodes = IXGE_RX_N_NEXT,
+ .next_nodes = {
+ [IXGE_RX_NEXT_DROP] = "error-drop",
+ [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input",
+ [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input)
+CLIB_MULTIARCH_SELECT_FN (ixge_input)
+/* *INDENT-ON* */
+
+static u8 *
+format_ixge_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, i);
+ return format (s, "TenGigabitEthernet%U",
+ format_vlib_pci_handle, &xd->pci_device.bus_address);
+}
+
+#define IXGE_COUNTER_IS_64_BIT (1 << 0)
+#define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1)
+
+static u8 ixge_counter_flags[] = {
+#define _(a,f) 0,
+#define _64(a,f) IXGE_COUNTER_IS_64_BIT,
+ foreach_ixge_counter
+#undef _
+#undef _64
+};
+
+static void
+ixge_update_counters (ixge_device_t * xd)
+{
+ /* Byte offset for counter registers. */
+ static u32 reg_offsets[] = {
+#define _(a,f) (a) / sizeof (u32),
+#define _64(a,f) _(a,f)
+ foreach_ixge_counter
+#undef _
+#undef _64
+ };
+ volatile u32 *r = (volatile u32 *) xd->regs;
+ int i;
+
+ for (i = 0; i < ARRAY_LEN (xd->counters); i++)
+ {
+ u32 o = reg_offsets[i];
+ xd->counters[i] += r[o];
+ if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ)
+ r[o] = 0;
+ if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT)
+ xd->counters[i] += (u64) r[o + 1] << (u64) 32;
+ }
+}
+
+static u8 *
+format_ixge_device_id (u8 * s, va_list * args)
+{
+ u32 device_id = va_arg (*args, u32);
+ char *t = 0;
+ switch (device_id)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_ixge_pci_device_id;
+#undef _
+ default:
+ t = 0;
+ break;
+ }
+ if (t == 0)
+ s = format (s, "unknown 0x%x", device_id);
+ else
+ s = format (s, "%s", t);
+ return s;
+}
+
+static u8 *
+format_ixge_link_status (u8 * s, va_list * args)
+{
+ ixge_device_t *xd = va_arg (*args, ixge_device_t *);
+ u32 v = xd->link_status_at_last_link_change;
+
+ s = format (s, "%s", (v & (1 << 30)) ? "up" : "down");
+
+ {
+ char *modes[] = {
+ "1g", "10g parallel", "10g serial", "autoneg",
+ };
+ char *speeds[] = {
+ "unknown", "100m", "1g", "10g",
+ };
+ s = format (s, ", mode %s, speed %s",
+ modes[(v >> 26) & 3], speeds[(v >> 28) & 3]);
+ }
+
+ return s;
+}
+
+static u8 *
+format_ixge_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance);
+ ixge_phy_t *phy = xd->phys + xd->phy_index;
+ uword indent = format_get_indent (s);
+
+ ixge_update_counters (xd);
+ xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status;
+
+ s = format (s, "Intel 8259X: id %U\n%Ulink %U",
+ format_ixge_device_id, xd->device_id,
+ format_white_space, indent + 2, format_ixge_link_status, xd);
+
+ {
+
+ s = format (s, "\n%UPCIe %U", format_white_space, indent + 2,
+ format_vlib_pci_link_speed, &xd->pci_device);
+ }
+
+ s = format (s, "\n%U", format_white_space, indent + 2);
+ if (phy->mdio_address != ~0)
+ s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id);
+ else if (xd->sfp_eeprom.id == SFP_ID_sfp)
+ s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom);
+ else
+ s = format (s, "PHY not found");
+
+ /* FIXME */
+ {
+ ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0);
+ ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0);
+ u32 hw_head_index = dr->head_index;
+ u32 sw_head_index = dq->head_index;
+ u32 nitems;
+
+ nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index);
+ s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring",
+ format_white_space, indent + 2, nitems, dq->n_descriptors);
+
+ s = format (s, "\n%U%d buffers in driver rx cache",
+ format_white_space, indent + 2,
+ vec_len (xm->rx_buffers_to_add));
+
+ s = format (s, "\n%U%d buffers on tx queue 0 ring",
+ format_white_space, indent + 2,
+ xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring);
+ }
+ {
+ u32 i;
+ u64 v;
+ static char *names[] = {
+#define _(a,f) #f,
+#define _64(a,f) _(a,f)
+ foreach_ixge_counter
+#undef _
+#undef _64
+ };
+
+ for (i = 0; i < ARRAY_LEN (names); i++)
+ {
+ v = xd->counters[i] - xd->counters_last_clear[i];
+ if (v != 0)
+ s = format (s, "\n%U%-40U%16Ld",
+ format_white_space, indent + 2,
+ format_c_identifier, names[i], v);
+ }
+ }
+
+ return s;
+}
+
+static void
+ixge_clear_hw_interface_counters (u32 instance)
+{
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, instance);
+ ixge_update_counters (xd);
+ memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters));
+}
+
+/*
+ * Dynamically redirect all pkts from a specific interface
+ * to the specified node
+ */
+static void
+ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ ixge_main_t *xm = &ixge_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ xd->per_interface_next_index = node_index;
+ return;
+ }
+
+ xd->per_interface_next_index =
+ vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index);
+}
+
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ixge_device_class) = {
+ .name = "ixge",
+ .tx_function = ixge_interface_tx,
+ .format_device_name = format_ixge_device_name,
+ .format_device = format_ixge_device,
+ .format_tx_trace = format_ixge_tx_dma_trace,
+ .clear_counters = ixge_clear_hw_interface_counters,
+ .admin_up_down_function = ixge_interface_admin_up_down,
+ .rx_redirect_to_node = ixge_set_interface_next_node,
+};
+/* *INDENT-ON* */
+
+#define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors).
+
+static clib_error_t *
+ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index)
+{
+ ixge_main_t *xm = &ixge_main;
+ vlib_main_t *vm = xm->vlib_main;
+ ixge_dma_queue_t *dq;
+ clib_error_t *error = 0;
+
+ vec_validate (xd->dma_queues[rt], queue_index);
+ dq = vec_elt_at_index (xd->dma_queues[rt], queue_index);
+
+ if (!xm->n_descriptors_per_cache_line)
+ xm->n_descriptors_per_cache_line =
+ CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]);
+
+ if (!xm->n_bytes_in_rx_buffer)
+ xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER;
+ xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024);
+ if (!xm->vlib_buffer_free_list_index)
+ {
+ xm->vlib_buffer_free_list_index =
+ vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer,
+ "ixge rx");
+ ASSERT (xm->vlib_buffer_free_list_index != 0);
+ }
+
+ if (!xm->n_descriptors[rt])
+ xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE;
+
+ dq->queue_index = queue_index;
+ dq->n_descriptors =
+ round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line);
+ dq->head_index = dq->tail_index = 0;
+
+ dq->descriptors =
+ vlib_physmem_alloc_aligned (vm, xm->physmem_region, &error,
+ dq->n_descriptors *
+ sizeof (dq->descriptors[0]),
+ 128 /* per chip spec */ );
+ if (error)
+ return error;
+
+ memset (dq->descriptors, 0,
+ dq->n_descriptors * sizeof (dq->descriptors[0]));
+ vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors);
+
+ if (rt == VLIB_RX)
+ {
+ u32 n_alloc, i;
+
+ n_alloc = vlib_buffer_alloc_from_free_list
+ (vm, dq->descriptor_buffer_indices,
+ vec_len (dq->descriptor_buffer_indices),
+ xm->vlib_buffer_free_list_index);
+ ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices));
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b =
+ vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]);
+ dq->descriptors[i].rx_to_hw.tail_address =
+ vlib_physmem_virtual_to_physical (vm, xm->physmem_region,
+ b->data);
+ }
+ }
+ else
+ {
+ u32 i;
+
+ dq->tx.head_index_write_back =
+ vlib_physmem_alloc (vm, vm->buffer_main->physmem_region, &error,
+ CLIB_CACHE_LINE_BYTES);
+
+ for (i = 0; i < dq->n_descriptors; i++)
+ dq->descriptors[i].tx = xm->tx_descriptor_template;
+
+ vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
+ }
+
+ {
+ ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index);
+ u64 a;
+
+ a =
+ vlib_physmem_virtual_to_physical (vm, vm->buffer_main->physmem_region,
+ dq->descriptors);
+ dr->descriptor_address[0] = a & 0xFFFFFFFF;
+ dr->descriptor_address[1] = a >> (u64) 32;
+ dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]);
+ dq->head_index = dq->tail_index = 0;
+
+ if (rt == VLIB_RX)
+ {
+ ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32);
+ dr->rx_split_control =
+ ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0)
+ | ( /* lo free descriptor threshold (units of 64 descriptors) */
+ (1 << 22)) | ( /* descriptor type: advanced one buffer */
+ (1 << 25)) | ( /* drop if no descriptors available */
+ (1 << 28)));
+
+ /* Give hardware all but last 16 cache lines' worth of descriptors. */
+ dq->tail_index = dq->n_descriptors -
+ 16 * xm->n_descriptors_per_cache_line;
+ }
+ else
+ {
+ /* Make sure its initialized before hardware can get to it. */
+ dq->tx.head_index_write_back[0] = dq->head_index;
+
+ a =
+ vlib_physmem_virtual_to_physical (vm,
+ vm->buffer_main->physmem_region,
+ dq->tx.head_index_write_back);
+ dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a;
+ dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32;
+ }
+
+ /* DMA on 82599 does not work with [13] rx data write relaxed ordering
+ and [12] undocumented set. */
+ if (rt == VLIB_RX)
+ dr->dca_control &= ~((1 << 13) | (1 << 12));
+
+ CLIB_MEMORY_BARRIER ();
+
+ if (rt == VLIB_TX)
+ {
+ xd->regs->tx_dma_control |= (1 << 0);
+ dr->control |= ((32 << 0) /* prefetch threshold */
+ | (64 << 8) /* host threshold */
+ | (0 << 16) /* writeback threshold */ );
+ }
+
+ /* Enable this queue and wait for hardware to initialize
+ before adding to tail. */
+ if (rt == VLIB_TX)
+ {
+ dr->control |= 1 << 25;
+ while (!(dr->control & (1 << 25)))
+ ;
+ }
+
+ /* Set head/tail indices and enable DMA. */
+ dr->head_index = dq->head_index;
+ dr->tail_index = dq->tail_index;
+ }
+
+ return error;
+}
+
+static u32
+ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
+{
+ ixge_device_t *xd;
+ ixge_regs_t *r;
+ u32 old;
+ ixge_main_t *xm = &ixge_main;
+
+ xd = vec_elt_at_index (xm->devices, hw->dev_instance);
+ r = xd->regs;
+
+ old = r->filter_control;
+
+ if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
+ r->filter_control = old | (1 << 9) /* unicast promiscuous */ ;
+ else
+ r->filter_control = old & ~(1 << 9);
+
+ return old;
+}
+
+static void
+ixge_device_init (ixge_main_t * xm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ixge_device_t *xd;
+
+ /* Reset chip(s). */
+ vec_foreach (xd, xm->devices)
+ {
+ ixge_regs_t *r = xd->regs;
+ const u32 reset_bit = (1 << 26) | (1 << 3);
+
+ r->control |= reset_bit;
+
+ /* No need to suspend. Timed to take ~1e-6 secs */
+ while (r->control & reset_bit)
+ ;
+
+ /* Software loaded. */
+ r->extended_control |= (1 << 28);
+
+ ixge_phy_init (xd);
+
+ /* Register ethernet interface. */
+ {
+ u8 addr8[6];
+ u32 i, addr32[2];
+ clib_error_t *error;
+
+ addr32[0] = r->rx_ethernet_address0[0][0];
+ addr32[1] = r->rx_ethernet_address0[0][1];
+ for (i = 0; i < 6; i++)
+ addr8[i] = addr32[i / 4] >> ((i % 4) * 8);
+
+ error = ethernet_register_interface
+ (vnm, ixge_device_class.index, xd->device_index,
+ /* ethernet address */ addr8,
+ &xd->vlib_hw_if_index, ixge_flag_change);
+ if (error)
+ clib_error_report (error);
+ }
+
+ {
+ vnet_sw_interface_t *sw =
+ vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index);
+ xd->vlib_sw_if_index = sw->sw_if_index;
+ }
+
+ ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0);
+
+ xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE;
+
+ ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0);
+
+ /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */
+ r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) |
+ ixge_rx_queue_to_interrupt (0)) << 0);
+
+ r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) |
+ ixge_tx_queue_to_interrupt (0)) << 8);
+
+ /* No use in getting too many interrupts.
+ Limit them to one every 3/4 ring size at line rate
+ min sized packets.
+ No need for this since kernel/vlib main loop provides adequate interrupt
+ limiting scheme. */
+ if (0)
+ {
+ f64 line_rate_max_pps =
+ 10e9 / (8 * (64 + /* interframe padding */ 20));
+ ixge_throttle_queue_interrupt (r, 0,
+ .75 * xm->n_descriptors[VLIB_RX] /
+ line_rate_max_pps);
+ }
+
+ /* Accept all multicast and broadcast packets. Should really add them
+ to the dst_ethernet_address register array. */
+ r->filter_control |= (1 << 10) | (1 << 8);
+
+ /* Enable frames up to size in mac frame size register. */
+ r->xge_mac.control |= 1 << 2;
+ r->xge_mac.rx_max_frame_size = (9216 + 14) << 16;
+
+ /* Enable all interrupts. */
+ if (!IXGE_ALWAYS_POLL)
+ r->interrupt.enable_write_1_to_set = ~0;
+ }
+}
+
+static uword
+ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ixge_main_t *xm = &ixge_main;
+ ixge_device_t *xd;
+ uword event_type, *event_data = 0;
+ f64 timeout, link_debounce_deadline;
+
+ ixge_device_init (xm);
+
+ /* Clear all counters. */
+ vec_foreach (xd, xm->devices)
+ {
+ ixge_update_counters (xd);
+ memset (xd->counters, 0, sizeof (xd->counters));
+ }
+
+ timeout = 30.0;
+ link_debounce_deadline = 1e70;
+
+ while (1)
+ {
+ /* 36 bit stat counters could overflow in ~50 secs.
+ We poll every 30 secs to be conservative. */
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+
+ switch (event_type)
+ {
+ case EVENT_SET_FLAGS:
+ /* 1 ms */
+ link_debounce_deadline = vlib_time_now (vm) + 1e-3;
+ timeout = 1e-3;
+ break;
+
+ case ~0:
+ /* No events found: timer expired. */
+ if (vlib_time_now (vm) > link_debounce_deadline)
+ {
+ vec_foreach (xd, xm->devices)
+ {
+ ixge_regs_t *r = xd->regs;
+ u32 v = r->xge_mac.link_status;
+ uword is_up = (v & (1 << 30)) != 0;
+
+ vnet_hw_interface_set_flags
+ (vnm, xd->vlib_hw_if_index,
+ is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ }
+ link_debounce_deadline = 1e70;
+ timeout = 30.0;
+ }
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+
+ /* Query stats every 30 secs. */
+ {
+ f64 now = vlib_time_now (vm);
+ if (now - xm->time_last_stats_update > 30)
+ {
+ xm->time_last_stats_update = now;
+ vec_foreach (xd, xm->devices) ixge_update_counters (xd);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static vlib_node_registration_t ixge_process_node = {
+ .function = ixge_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ixge-process",
+};
+
+clib_error_t *
+ixge_init (vlib_main_t * vm)
+{
+ ixge_main_t *xm = &ixge_main;
+ clib_error_t *error;
+
+ xm->vlib_main = vm;
+ memset (&xm->tx_descriptor_template, 0,
+ sizeof (xm->tx_descriptor_template));
+ memset (&xm->tx_descriptor_template_mask, 0,
+ sizeof (xm->tx_descriptor_template_mask));
+ xm->tx_descriptor_template.status0 =
+ (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED |
+ IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED |
+ IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS);
+ xm->tx_descriptor_template_mask.status0 = 0xffff;
+ xm->tx_descriptor_template_mask.status1 = 0x00003fff;
+
+ xm->tx_descriptor_template_mask.status0 &=
+ ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET
+ | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS);
+ xm->tx_descriptor_template_mask.status1 &=
+ ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE);
+
+ error = vlib_call_init_function (vm, pci_bus_init);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ixge_init);
+
+
+static void
+ixge_pci_intr_handler (vlib_pci_device_t * dev)
+{
+ ixge_main_t *xm = &ixge_main;
+ vlib_main_t *vm = xm->vlib_main;
+
+ vlib_node_set_interrupt_pending (vm, ixge_input_node.index);
+
+ /* Let node know which device is interrupting. */
+ {
+ vlib_node_runtime_t *rt =
+ vlib_node_get_runtime (vm, ixge_input_node.index);
+ rt->runtime_data[0] |= 1 << dev->private_data;
+ }
+}
+
+static clib_error_t *
+ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev)
+{
+ ixge_main_t *xm = &ixge_main;
+ clib_error_t *error;
+ void *r;
+ ixge_device_t *xd;
+
+ /* Allocate physmem region for DMA buffers */
+ error = vlib_physmem_region_alloc (vm, "ixge decriptors", 2 << 20, 0,
+ VLIB_PHYSMEM_F_INIT_MHEAP,
+ &xm->physmem_region);
+ if (error)
+ return error;
+
+ error = vlib_pci_map_resource (dev, 0, &r);
+ if (error)
+ return error;
+
+ vec_add2 (xm->devices, xd, 1);
+
+ if (vec_len (xm->devices) == 1)
+ {
+ ixge_input_node.function = ixge_input_multiarch_select ();
+ }
+
+ xd->pci_device = dev[0];
+ xd->device_id = xd->pci_device.config0.header.device_id;
+ xd->regs = r;
+ xd->device_index = xd - xm->devices;
+ xd->pci_function = dev->bus_address.function;
+ xd->per_interface_next_index = ~0;
+
+
+ /* Chip found so enable node. */
+ {
+ vlib_node_set_state (vm, ixge_input_node.index,
+ (IXGE_ALWAYS_POLL
+ ? VLIB_NODE_STATE_POLLING
+ : VLIB_NODE_STATE_INTERRUPT));
+
+ dev->private_data = xd->device_index;
+ }
+
+ if (vec_len (xm->devices) == 1)
+ {
+ vlib_register_node (vm, &ixge_process_node);
+ xm->process_node_index = ixge_process_node.index;
+ }
+
+ error = vlib_pci_bus_master_enable (dev);
+
+ if (error)
+ return error;
+
+ return vlib_pci_intr_enable (dev);
+}
+
+/* *INDENT-OFF* */
+PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = {
+ .init_function = ixge_pci_init,
+ .interrupt_handler = ixge_pci_intr_handler,
+ .supported_devices = {
+#define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, },
+ foreach_ixge_pci_device_id
+#undef _
+ { 0 },
+ },
+};
+/* *INDENT-ON* */
+
+void
+ixge_set_next_node (ixge_rx_next_t next, char *name)
+{
+ vlib_node_registration_t *r = &ixge_input_node;
+
+ switch (next)
+ {
+ case IXGE_RX_NEXT_IP4_INPUT:
+ case IXGE_RX_NEXT_IP6_INPUT:
+ case IXGE_RX_NEXT_ETHERNET_INPUT:
+ r->next_nodes[next] = name;
+ break;
+
+ default:
+ clib_warning ("%s: illegal next %d\n", __FUNCTION__, next);
+ break;
+ }
+}
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .default_disabled = 1,
+ .description = "Intel 82599 Family Native Driver (experimental)",
+};
+#endif
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ixge/ixge.h b/src/plugins/ixge/ixge.h
new file mode 100644
index 00000000..42c1bfa5
--- /dev/null
+++ b/src/plugins/ixge/ixge.h
@@ -0,0 +1,1295 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ixge_h
+#define included_ixge_h
+
+#include <vnet/vnet.h>
+#include <vlib/pci/pci.h>
+#include <vlib/i2c.h>
+#include <vnet/ethernet/sfp.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+typedef volatile struct
+{
+ /* [31:7] 128 byte aligned. */
+ u32 descriptor_address[2];
+ u32 n_descriptor_bytes;
+
+ /* [5] rx/tx descriptor dca enable
+ [6] rx packet head dca enable
+ [7] rx packet tail dca enable
+ [9] rx/tx descriptor relaxed order
+ [11] rx/tx descriptor write back relaxed order
+ [13] rx/tx data write/read relaxed order
+ [15] rx head data write relaxed order
+ [31:24] apic id for cpu's cache. */
+ u32 dca_control;
+
+ u32 head_index;
+
+ /* [4:0] tail buffer size (in 1k byte units)
+ [13:8] head buffer size (in 64 byte units)
+ [24:22] lo free descriptors threshold (units of 64 descriptors)
+ [27:25] descriptor type 0 = legacy, 1 = advanced one buffer (e.g. tail),
+ 2 = advanced header splitting (head + tail), 5 = advanced header
+ splitting (head only).
+ [28] drop if no descriptors available. */
+ u32 rx_split_control;
+
+ u32 tail_index;
+ CLIB_PAD_FROM_TO (0x1c, 0x28);
+
+ /* [7:0] rx/tx prefetch threshold
+ [15:8] rx/tx host threshold
+ [24:16] rx/tx write back threshold
+ [25] rx/tx enable
+ [26] tx descriptor writeback flush
+ [30] rx strip vlan enable */
+ u32 control;
+
+ u32 rx_coallesce_control;
+
+ union
+ {
+ struct
+ {
+ /* packets bytes lo hi */
+ u32 stats[3];
+
+ u32 unused;
+ } rx;
+
+ struct
+ {
+ u32 unused[2];
+
+ /* [0] enables head write back. */
+ u32 head_index_write_back_address[2];
+ } tx;
+ };
+} ixge_dma_regs_t;
+
+/* Only advanced descriptors are supported. */
+typedef struct
+{
+ u64 tail_address;
+ u64 head_address;
+} ixge_rx_to_hw_descriptor_t;
+
+typedef struct
+{
+ u32 status[3];
+ u16 n_packet_bytes_this_descriptor;
+ u16 vlan_tag;
+} ixge_rx_from_hw_descriptor_t;
+
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2 (1 << (4 + 11))
+/* Valid if not layer2. */
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4 (1 << (4 + 0))
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT (1 << (4 + 1))
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6 (1 << (4 + 2))
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT (1 << (4 + 3))
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP (1 << (4 + 4))
+#define IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP (1 << (4 + 5))
+#define IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET(s) (((s) >> 21) & 0x3ff)
+
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE (1 << (0 + 0))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET (1 << (0 + 1))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN (1 << (0 + 3))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED (1 << (0 + 4))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED (1 << (0 + 5))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED (1 << (0 + 6))
+#define IXGE_RX_DESCRIPTOR_STATUS2_NOT_UNICAST (1 << (0 + 7))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IS_DOUBLE_VLAN (1 << (0 + 9))
+#define IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR (1 << (0 + 10))
+#define IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR (1 << (20 + 9))
+#define IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR (1 << (20 + 10))
+#define IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR (1 << (20 + 11))
+
+/* For layer2 packets stats0 bottom 3 bits give ether type index from filter. */
+#define IXGE_RX_DESCRIPTOR_STATUS0_LAYER2_ETHERNET_TYPE(s) ((s) & 7)
+
+typedef struct
+{
+ u64 buffer_address;
+ u16 n_bytes_this_buffer;
+ u16 status0;
+ u32 status1;
+#define IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED (3 << 4)
+#define IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED (1 << (8 + 5))
+#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS (8 + 3)
+#define IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS)
+#define IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS (1 << (8 + 1))
+#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET (8 + 0)
+#define IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET)
+#define IXGE_TX_DESCRIPTOR_STATUS1_DONE (1 << 0)
+#define IXGE_TX_DESCRIPTOR_STATUS1_CONTEXT(i) (/* valid */ (1 << 7) | ((i) << 4))
+#define IXGE_TX_DESCRIPTOR_STATUS1_IPSEC_OFFLOAD (1 << (8 + 2))
+#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_TCP_UDP_CHECKSUM (1 << (8 + 1))
+#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_IP4_CHECKSUM (1 << (8 + 0))
+#define IXGE_TX_DESCRIPTOR_STATUS0_N_BYTES_THIS_BUFFER(l) ((l) << 0)
+#define IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET(l) ((l) << 14)
+} ixge_tx_descriptor_t;
+
+typedef struct
+{
+ struct
+ {
+ u8 checksum_start_offset;
+ u8 checksum_insert_offset;
+ u16 checksum_end_offset;
+ } ip, tcp;
+ u32 status0;
+
+ u8 status1;
+
+ /* Byte offset after UDP/TCP header. */
+ u8 payload_offset;
+
+ u16 max_tcp_segment_size;
+} __attribute__ ((packed)) ixge_tx_context_descriptor_t;
+
+typedef union
+{
+ ixge_rx_to_hw_descriptor_t rx_to_hw;
+ ixge_rx_from_hw_descriptor_t rx_from_hw;
+ ixge_tx_descriptor_t tx;
+ u32x4 as_u32x4;
+} ixge_descriptor_t;
+
+typedef volatile struct
+{
+ /* [2] pcie master disable
+ [3] mac reset
+ [26] global device reset */
+ u32 control;
+ u32 control_alias;
+ /* [3:2] device id (0 or 1 for dual port chips)
+ [7] link is up
+ [17:10] num vfs
+ [18] io active
+ [19] pcie master enable status */
+ u32 status_read_only;
+ CLIB_PAD_FROM_TO (0xc, 0x18);
+ /* [14] pf reset done
+ [17] relaxed ordering disable
+ [26] extended vlan enable
+ [28] driver loaded */
+ u32 extended_control;
+ CLIB_PAD_FROM_TO (0x1c, 0x20);
+
+ /* software definable pins.
+ sdp_data [7:0]
+ sdp_is_output [15:8]
+ sdp_is_native [23:16]
+ sdp_function [31:24].
+ */
+ u32 sdp_control;
+ CLIB_PAD_FROM_TO (0x24, 0x28);
+
+ /* [0] i2c clock in
+ [1] i2c clock out
+ [2] i2c data in
+ [3] i2c data out */
+ u32 i2c_control;
+ CLIB_PAD_FROM_TO (0x2c, 0x4c);
+ u32 tcp_timer;
+
+ CLIB_PAD_FROM_TO (0x50, 0x200);
+
+ u32 led_control;
+
+ CLIB_PAD_FROM_TO (0x204, 0x600);
+ u32 core_spare;
+ CLIB_PAD_FROM_TO (0x604, 0x700);
+
+ struct
+ {
+ u32 vflr_events_clear[4];
+ u32 mailbox_interrupt_status[4];
+ u32 mailbox_interrupt_enable[4];
+ CLIB_PAD_FROM_TO (0x730, 0x800);
+ } pf_foo;
+
+ struct
+ {
+ u32 status_write_1_to_clear;
+ CLIB_PAD_FROM_TO (0x804, 0x808);
+ u32 status_write_1_to_set;
+ CLIB_PAD_FROM_TO (0x80c, 0x810);
+ u32 status_auto_clear_enable;
+ CLIB_PAD_FROM_TO (0x814, 0x820);
+
+ /* [11:3] minimum inter-interrupt interval
+ (2e-6 units; 20e-6 units for fast ethernet).
+ [15] low-latency interrupt moderation enable
+ [20:16] low-latency interrupt credit
+ [27:21] interval counter
+ [31] write disable for credit and counter (write only). */
+ u32 throttle0[24];
+
+ u32 enable_write_1_to_set;
+ CLIB_PAD_FROM_TO (0x884, 0x888);
+ u32 enable_write_1_to_clear;
+ CLIB_PAD_FROM_TO (0x88c, 0x890);
+ u32 enable_auto_clear;
+ u32 msi_to_eitr_select;
+ /* [3:0] spd 0-3 interrupt detection enable
+ [4] msi-x enable
+ [5] other clear disable (makes other bits in status not clear on read)
+ etc. */
+ u32 control;
+ CLIB_PAD_FROM_TO (0x89c, 0x900);
+
+ /* Defines interrupt mapping for 128 rx + 128 tx queues.
+ 64 x 4 8 bit entries.
+ For register [i]:
+ [5:0] bit in interrupt status for rx queue 2*i + 0
+ [7] valid bit
+ [13:8] bit for tx queue 2*i + 0
+ [15] valid bit
+ similar for rx 2*i + 1 and tx 2*i + 1. */
+ u32 queue_mapping[64];
+
+ /* tcp timer [7:0] and other interrupts [15:8] */
+ u32 misc_mapping;
+ CLIB_PAD_FROM_TO (0xa04, 0xa90);
+
+ /* 64 interrupts determined by mappings. */
+ u32 status1_write_1_to_clear[4];
+ u32 enable1_write_1_to_set[4];
+ u32 enable1_write_1_to_clear[4];
+ CLIB_PAD_FROM_TO (0xac0, 0xad0);
+ u32 status1_enable_auto_clear[4];
+ CLIB_PAD_FROM_TO (0xae0, 0x1000);
+ } interrupt;
+
+ ixge_dma_regs_t rx_dma0[64];
+
+ CLIB_PAD_FROM_TO (0x2000, 0x2140);
+ u32 dcb_rx_packet_plane_t4_config[8];
+ u32 dcb_rx_packet_plane_t4_status[8];
+ CLIB_PAD_FROM_TO (0x2180, 0x2300);
+
+ /* reg i defines mapping for 4 rx queues starting at 4*i + 0. */
+ u32 rx_queue_stats_mapping[32];
+ u32 rx_queue_stats_control;
+
+ CLIB_PAD_FROM_TO (0x2384, 0x2410);
+ u32 fc_user_descriptor_ptr[2];
+ u32 fc_buffer_control;
+ CLIB_PAD_FROM_TO (0x241c, 0x2420);
+ u32 fc_rx_dma;
+ CLIB_PAD_FROM_TO (0x2424, 0x2430);
+ u32 dcb_packet_plane_control;
+ CLIB_PAD_FROM_TO (0x2434, 0x2f00);
+
+ u32 rx_dma_control;
+ u32 pf_queue_drop_enable;
+ CLIB_PAD_FROM_TO (0x2f08, 0x2f20);
+ u32 rx_dma_descriptor_cache_config;
+ CLIB_PAD_FROM_TO (0x2f24, 0x3000);
+
+ /* 1 bit. */
+ u32 rx_enable;
+ CLIB_PAD_FROM_TO (0x3004, 0x3008);
+ /* [15:0] ether type (little endian)
+ [31:16] opcode (big endian) */
+ u32 flow_control_control;
+ CLIB_PAD_FROM_TO (0x300c, 0x3020);
+ /* 3 bit traffic class for each of 8 priorities. */
+ u32 rx_priority_to_traffic_class;
+ CLIB_PAD_FROM_TO (0x3024, 0x3028);
+ u32 rx_coallesce_data_buffer_control;
+ CLIB_PAD_FROM_TO (0x302c, 0x3190);
+ u32 rx_packet_buffer_flush_detect;
+ CLIB_PAD_FROM_TO (0x3194, 0x3200);
+ u32 flow_control_tx_timers[4]; /* 2 timer values */
+ CLIB_PAD_FROM_TO (0x3210, 0x3220);
+ u32 flow_control_rx_threshold_lo[8];
+ CLIB_PAD_FROM_TO (0x3240, 0x3260);
+ u32 flow_control_rx_threshold_hi[8];
+ CLIB_PAD_FROM_TO (0x3280, 0x32a0);
+ u32 flow_control_refresh_threshold;
+ CLIB_PAD_FROM_TO (0x32a4, 0x3c00);
+ /* For each of 8 traffic classes (units of bytes). */
+ u32 rx_packet_buffer_size[8];
+ CLIB_PAD_FROM_TO (0x3c20, 0x3d00);
+ u32 flow_control_config;
+ CLIB_PAD_FROM_TO (0x3d04, 0x4200);
+
+ struct
+ {
+ u32 pcs_config;
+ CLIB_PAD_FROM_TO (0x4204, 0x4208);
+ u32 link_control;
+ u32 link_status;
+ u32 pcs_debug[2];
+ u32 auto_negotiation;
+ u32 link_partner_ability;
+ u32 auto_negotiation_tx_next_page;
+ u32 auto_negotiation_link_partner_next_page;
+ CLIB_PAD_FROM_TO (0x4228, 0x4240);
+ } gige_mac;
+
+ struct
+ {
+ /* [0] tx crc enable
+ [2] enable frames up to max frame size register [31:16]
+ [10] pad frames < 64 bytes if specified by user
+ [15] loopback enable
+ [16] mdc hi speed
+ [17] turn off mdc between mdio packets */
+ u32 control;
+
+ /* [5] rx symbol error (all bits clear on read)
+ [6] rx illegal symbol
+ [7] rx idle error
+ [8] rx local fault
+ [9] rx remote fault */
+ u32 status;
+
+ u32 pause_and_pace_control;
+ CLIB_PAD_FROM_TO (0x424c, 0x425c);
+ u32 phy_command;
+ u32 phy_data;
+ CLIB_PAD_FROM_TO (0x4264, 0x4268);
+
+ /* [31:16] max frame size in bytes. */
+ u32 rx_max_frame_size;
+ CLIB_PAD_FROM_TO (0x426c, 0x4288);
+
+ /* [0]
+ [2] pcs receive link up? (latch lo)
+ [7] local fault
+ [1]
+ [0] pcs 10g base r capable
+ [1] pcs 10g base x capable
+ [2] pcs 10g base w capable
+ [10] rx local fault
+ [11] tx local fault
+ [15:14] 2 => device present at this address (else not present) */
+ u32 xgxs_status[2];
+
+ u32 base_x_pcs_status;
+
+ /* [0] pass unrecognized flow control frames
+ [1] discard pause frames
+ [2] rx priority flow control enable (only in dcb mode)
+ [3] rx flow control enable. */
+ u32 flow_control;
+
+ /* [3:0] tx lanes change polarity
+ [7:4] rx lanes change polarity
+ [11:8] swizzle tx lanes
+ [15:12] swizzle rx lanes
+ 4 x 2 bit tx lane swap
+ 4 x 2 bit rx lane swap. */
+ u32 serdes_control;
+
+ u32 fifo_control;
+
+ /* [0] force link up
+ [1] autoneg ack2 bit to transmit
+ [6:2] autoneg selector field to transmit
+ [8:7] 10g pma/pmd type 0 => xaui, 1 kx4, 2 cx4
+ [9] 1g pma/pmd type 0 => sfi, 1 => kx/bx
+ [10] disable 10g on without main power
+ [11] restart autoneg on transition to dx power state
+ [12] restart autoneg
+ [15:13] link mode:
+ 0 => 1g no autoneg
+ 1 => 10g kx4 parallel link no autoneg
+ 2 => 1g bx autoneg
+ 3 => 10g sfi serdes
+ 4 => kx4/kx/kr
+ 5 => xgmii 1g/100m
+ 6 => kx4/kx/kr 1g an
+ 7 kx4/kx/kr sgmii.
+ [16] kr support
+ [17] fec requested
+ [18] fec ability
+ etc. */
+ u32 auto_negotiation_control;
+
+ /* [0] signal detect 1g/100m
+ [1] fec signal detect
+ [2] 10g serial pcs fec block lock
+ [3] 10g serial high error rate
+ [4] 10g serial pcs block lock
+ [5] kx/kx4/kr autoneg next page received
+ [6] kx/kx4/kr backplane autoneg next page received
+ [7] link status clear to read
+ [11:8] 10g signal detect (4 lanes) (for serial just lane 0)
+ [12] 10g serial signal detect
+ [16:13] 10g parallel lane sync status
+ [17] 10g parallel align status
+ [18] 1g sync status
+ [19] kx/kx4/kr backplane autoneg is idle
+ [20] 1g autoneg enabled
+ [21] 1g pcs enabled for sgmii
+ [22] 10g xgxs enabled
+ [23] 10g serial fec enabled (forward error detection)
+ [24] 10g kr pcs enabled
+ [25] sgmii enabled
+ [27:26] mac link mode
+ 0 => 1g
+ 1 => 10g parallel
+ 2 => 10g serial
+ 3 => autoneg
+ [29:28] link speed
+ 1 => 100m
+ 2 => 1g
+ 3 => 10g
+ [30] link is up
+ [31] kx/kx4/kr backplane autoneg completed successfully. */
+ u32 link_status;
+
+ /* [17:16] pma/pmd for 10g serial
+ 0 => kr, 2 => sfi
+ [18] disable dme pages */
+ u32 auto_negotiation_control2;
+
+ CLIB_PAD_FROM_TO (0x42ac, 0x42b0);
+ u32 link_partner_ability[2];
+ CLIB_PAD_FROM_TO (0x42b8, 0x42d0);
+ u32 manageability_control;
+ u32 link_partner_next_page[2];
+ CLIB_PAD_FROM_TO (0x42dc, 0x42e0);
+ u32 kr_pcs_control;
+ u32 kr_pcs_status;
+ u32 fec_status[2];
+ CLIB_PAD_FROM_TO (0x42f0, 0x4314);
+ u32 sgmii_control;
+ CLIB_PAD_FROM_TO (0x4318, 0x4324);
+ u32 link_status2;
+ CLIB_PAD_FROM_TO (0x4328, 0x4900);
+ } xge_mac;
+
+ u32 tx_dcb_control;
+ u32 tx_dcb_descriptor_plane_queue_select;
+ u32 tx_dcb_descriptor_plane_t1_config;
+ u32 tx_dcb_descriptor_plane_t1_status;
+ CLIB_PAD_FROM_TO (0x4910, 0x4950);
+
+ /* For each TC in units of 1k bytes. */
+ u32 tx_packet_buffer_thresholds[8];
+ CLIB_PAD_FROM_TO (0x4970, 0x4980);
+ struct
+ {
+ u32 mmw;
+ u32 config;
+ u32 status;
+ u32 rate_drift;
+ } dcb_tx_rate_scheduler;
+ CLIB_PAD_FROM_TO (0x4990, 0x4a80);
+ u32 tx_dma_control;
+ CLIB_PAD_FROM_TO (0x4a84, 0x4a88);
+ u32 tx_dma_tcp_flags_control[2];
+ CLIB_PAD_FROM_TO (0x4a90, 0x4b00);
+ u32 pf_mailbox[64];
+ CLIB_PAD_FROM_TO (0x4c00, 0x5000);
+
+ /* RX */
+ u32 checksum_control;
+ CLIB_PAD_FROM_TO (0x5004, 0x5008);
+ u32 rx_filter_control;
+ CLIB_PAD_FROM_TO (0x500c, 0x5010);
+ u32 management_vlan_tag[8];
+ u32 management_udp_tcp_ports[8];
+ CLIB_PAD_FROM_TO (0x5050, 0x5078);
+ /* little endian. */
+ u32 extended_vlan_ether_type;
+ CLIB_PAD_FROM_TO (0x507c, 0x5080);
+ /* [1] store/dma bad packets
+ [8] accept all multicast
+ [9] accept all unicast
+ [10] accept all broadcast. */
+ u32 filter_control;
+ CLIB_PAD_FROM_TO (0x5084, 0x5088);
+ /* [15:0] vlan ethernet type (0x8100) little endian
+ [28] cfi bit expected
+ [29] drop packets with unexpected cfi bit
+ [30] vlan filter enable. */
+ u32 vlan_control;
+ CLIB_PAD_FROM_TO (0x508c, 0x5090);
+ /* [1:0] hi bit of ethernet address for 12 bit index into multicast table
+ 0 => 47, 1 => 46, 2 => 45, 3 => 43.
+ [2] enable multicast filter
+ */
+ u32 multicast_control;
+ CLIB_PAD_FROM_TO (0x5094, 0x5100);
+ u32 fcoe_rx_control;
+ CLIB_PAD_FROM_TO (0x5104, 0x5108);
+ u32 fc_flt_context;
+ CLIB_PAD_FROM_TO (0x510c, 0x5110);
+ u32 fc_filter_control;
+ CLIB_PAD_FROM_TO (0x5114, 0x5120);
+ u32 rx_message_type_lo;
+ CLIB_PAD_FROM_TO (0x5124, 0x5128);
+ /* [15:0] ethernet type (little endian)
+ [18:16] matche pri in vlan tag
+ [19] priority match enable
+ [25:20] virtualization pool
+ [26] pool enable
+ [27] is fcoe
+ [30] ieee 1588 timestamp enable
+ [31] filter enable.
+ (See ethernet_type_queue_select.) */
+ u32 ethernet_type_queue_filter[8];
+ CLIB_PAD_FROM_TO (0x5148, 0x5160);
+ /* [7:0] l2 ethernet type and
+ [15:8] l2 ethernet type or */
+ u32 management_decision_filters1[8];
+ u32 vf_vm_tx_switch_loopback_enable[2];
+ u32 rx_time_sync_control;
+ CLIB_PAD_FROM_TO (0x518c, 0x5190);
+ u32 management_ethernet_type_filters[4];
+ u32 rx_timestamp_attributes_lo;
+ u32 rx_timestamp_hi;
+ u32 rx_timestamp_attributes_hi;
+ CLIB_PAD_FROM_TO (0x51ac, 0x51b0);
+ u32 pf_virtual_control;
+ CLIB_PAD_FROM_TO (0x51b4, 0x51d8);
+ u32 fc_offset_parameter;
+ CLIB_PAD_FROM_TO (0x51dc, 0x51e0);
+ u32 vf_rx_enable[2];
+ u32 rx_timestamp_lo;
+ CLIB_PAD_FROM_TO (0x51ec, 0x5200);
+ /* 12 bits determined by multicast_control
+ lookup bits in this vector. */
+ u32 multicast_enable[128];
+
+ /* [0] ethernet address [31:0]
+ [1] [15:0] ethernet address [47:32]
+ [31] valid bit.
+ Index 0 is read from eeprom after reset. */
+ u32 rx_ethernet_address0[16][2];
+
+ CLIB_PAD_FROM_TO (0x5480, 0x5800);
+ u32 wake_up_control;
+ CLIB_PAD_FROM_TO (0x5804, 0x5808);
+ u32 wake_up_filter_control;
+ CLIB_PAD_FROM_TO (0x580c, 0x5818);
+ u32 multiple_rx_queue_command_82598;
+ CLIB_PAD_FROM_TO (0x581c, 0x5820);
+ u32 management_control;
+ u32 management_filter_control;
+ CLIB_PAD_FROM_TO (0x5828, 0x5838);
+ u32 wake_up_ip4_address_valid;
+ CLIB_PAD_FROM_TO (0x583c, 0x5840);
+ u32 wake_up_ip4_address_table[4];
+ u32 management_control_to_host;
+ CLIB_PAD_FROM_TO (0x5854, 0x5880);
+ u32 wake_up_ip6_address_table[4];
+
+ /* unicast_and broadcast_and vlan_and ip_address_and
+ etc. */
+ u32 management_decision_filters[8];
+
+ u32 management_ip4_or_ip6_address_filters[4][4];
+ CLIB_PAD_FROM_TO (0x58f0, 0x5900);
+ u32 wake_up_packet_length;
+ CLIB_PAD_FROM_TO (0x5904, 0x5910);
+ u32 management_ethernet_address_filters[4][2];
+ CLIB_PAD_FROM_TO (0x5930, 0x5a00);
+ u32 wake_up_packet_memory[32];
+ CLIB_PAD_FROM_TO (0x5a80, 0x5c00);
+ u32 redirection_table_82598[32];
+ u32 rss_random_keys_82598[10];
+ CLIB_PAD_FROM_TO (0x5ca8, 0x6000);
+
+ ixge_dma_regs_t tx_dma[128];
+
+ u32 pf_vm_vlan_insert[64];
+ u32 tx_dma_tcp_max_alloc_size_requests;
+ CLIB_PAD_FROM_TO (0x8104, 0x8110);
+ u32 vf_tx_enable[2];
+ CLIB_PAD_FROM_TO (0x8118, 0x8120);
+ /* [0] dcb mode enable
+ [1] virtualization mode enable
+ [3:2] number of tcs/qs per pool. */
+ u32 multiple_tx_queues_command;
+ CLIB_PAD_FROM_TO (0x8124, 0x8200);
+ u32 pf_vf_anti_spoof[8];
+ u32 pf_dma_tx_switch_control;
+ CLIB_PAD_FROM_TO (0x8224, 0x82e0);
+ u32 tx_strict_low_latency_queues[4];
+ CLIB_PAD_FROM_TO (0x82f0, 0x8600);
+ u32 tx_queue_stats_mapping_82599[32];
+ u32 tx_queue_packet_counts[32];
+ u32 tx_queue_byte_counts[32][2];
+
+ struct
+ {
+ u32 control;
+ u32 status;
+ u32 buffer_almost_full;
+ CLIB_PAD_FROM_TO (0x880c, 0x8810);
+ u32 buffer_min_ifg;
+ CLIB_PAD_FROM_TO (0x8814, 0x8900);
+ } tx_security;
+
+ struct
+ {
+ u32 index;
+ u32 salt;
+ u32 key[4];
+ CLIB_PAD_FROM_TO (0x8918, 0x8a00);
+ } tx_ipsec;
+
+ struct
+ {
+ u32 capabilities;
+ u32 control;
+ u32 tx_sci[2];
+ u32 sa;
+ u32 sa_pn[2];
+ u32 key[2][4];
+ /* untagged packets, encrypted packets, protected packets,
+ encrypted bytes, protected bytes */
+ u32 stats[5];
+ CLIB_PAD_FROM_TO (0x8a50, 0x8c00);
+ } tx_link_security;
+
+ struct
+ {
+ u32 control;
+ u32 timestamp_value[2];
+ u32 system_time[2];
+ u32 increment_attributes;
+ u32 time_adjustment_offset[2];
+ u32 aux_control;
+ u32 target_time[2][2];
+ CLIB_PAD_FROM_TO (0x8c34, 0x8c3c);
+ u32 aux_time_stamp[2][2];
+ CLIB_PAD_FROM_TO (0x8c4c, 0x8d00);
+ } tx_timesync;
+
+ struct
+ {
+ u32 control;
+ u32 status;
+ CLIB_PAD_FROM_TO (0x8d08, 0x8e00);
+ } rx_security;
+
+ struct
+ {
+ u32 index;
+ u32 ip_address[4];
+ u32 spi;
+ u32 ip_index;
+ u32 key[4];
+ u32 salt;
+ u32 mode;
+ CLIB_PAD_FROM_TO (0x8e34, 0x8f00);
+ } rx_ipsec;
+
+ struct
+ {
+ u32 capabilities;
+ u32 control;
+ u32 sci[2];
+ u32 sa[2];
+ u32 sa_pn[2];
+ u32 key[2][4];
+ /* see datasheet */
+ u32 stats[17];
+ CLIB_PAD_FROM_TO (0x8f84, 0x9000);
+ } rx_link_security;
+
+ /* 4 wake up, 2 management, 2 wake up. */
+ u32 flexible_filters[8][16][4];
+ CLIB_PAD_FROM_TO (0x9800, 0xa000);
+
+ /* 4096 bits. */
+ u32 vlan_filter[128];
+
+ /* [0] ethernet address [31:0]
+ [1] [15:0] ethernet address [47:32]
+ [31] valid bit.
+ Index 0 is read from eeprom after reset. */
+ u32 rx_ethernet_address1[128][2];
+
+ /* select one of 64 pools for each rx address. */
+ u32 rx_ethernet_address_pool_select[128][2];
+ CLIB_PAD_FROM_TO (0xaa00, 0xc800);
+ u32 tx_priority_to_traffic_class;
+ CLIB_PAD_FROM_TO (0xc804, 0xcc00);
+
+ /* In bytes units of 1k. Total packet buffer is 160k. */
+ u32 tx_packet_buffer_size[8];
+
+ CLIB_PAD_FROM_TO (0xcc20, 0xcd10);
+ u32 tx_manageability_tc_mapping;
+ CLIB_PAD_FROM_TO (0xcd14, 0xcd20);
+ u32 dcb_tx_packet_plane_t2_config[8];
+ u32 dcb_tx_packet_plane_t2_status[8];
+ CLIB_PAD_FROM_TO (0xcd60, 0xce00);
+
+ u32 tx_flow_control_status;
+ CLIB_PAD_FROM_TO (0xce04, 0xd000);
+
+ ixge_dma_regs_t rx_dma1[64];
+
+ struct
+ {
+ /* Bigendian ip4 src/dst address. */
+ u32 src_address[128];
+ u32 dst_address[128];
+
+ /* TCP/UDP ports [15:0] src [31:16] dst; bigendian. */
+ u32 tcp_udp_port[128];
+
+ /* [1:0] protocol tcp, udp, sctp, other
+ [4:2] match priority (highest wins)
+ [13:8] pool
+ [25] src address match disable
+ [26] dst address match disable
+ [27] src port match disable
+ [28] dst port match disable
+ [29] protocol match disable
+ [30] pool match disable
+ [31] enable. */
+ u32 control[128];
+
+ /* [12] size bypass
+ [19:13] must be 0x80
+ [20] low-latency interrupt
+ [27:21] rx queue. */
+ u32 interrupt[128];
+ } ip4_filters;
+
+ CLIB_PAD_FROM_TO (0xea00, 0xeb00);
+ /* 4 bit rss output index indexed by 7 bit hash.
+ 128 8 bit fields = 32 registers. */
+ u32 redirection_table_82599[32];
+
+ u32 rss_random_key_82599[10];
+ CLIB_PAD_FROM_TO (0xeba8, 0xec00);
+ /* [15:0] reserved
+ [22:16] rx queue index
+ [29] low-latency interrupt on match
+ [31] enable */
+ u32 ethernet_type_queue_select[8];
+ CLIB_PAD_FROM_TO (0xec20, 0xec30);
+ u32 syn_packet_queue_filter;
+ CLIB_PAD_FROM_TO (0xec34, 0xec60);
+ u32 immediate_interrupt_rx_vlan_priority;
+ CLIB_PAD_FROM_TO (0xec64, 0xec70);
+ u32 rss_queues_per_traffic_class;
+ CLIB_PAD_FROM_TO (0xec74, 0xec90);
+ u32 lli_size_threshold;
+ CLIB_PAD_FROM_TO (0xec94, 0xed00);
+
+ struct
+ {
+ u32 control;
+ CLIB_PAD_FROM_TO (0xed04, 0xed10);
+ u32 table[8];
+ CLIB_PAD_FROM_TO (0xed30, 0xee00);
+ } fcoe_redirection;
+
+ struct
+ {
+ /* [1:0] packet buffer allocation 0 => disabled, else 64k*2^(f-1)
+ [3] packet buffer initialization done
+ [4] perfetch match mode
+ [5] report status in rss field of rx descriptors
+ [7] report status always
+ [14:8] drop queue
+ [20:16] flex 2 byte packet offset (units of 2 bytes)
+ [27:24] max linked list length
+ [31:28] full threshold. */
+ u32 control;
+ CLIB_PAD_FROM_TO (0xee04, 0xee0c);
+
+ u32 data[8];
+
+ /* [1:0] 0 => no action, 1 => add, 2 => remove, 3 => query.
+ [2] valid filter found by query command
+ [3] filter update override
+ [4] ip6 adress table
+ [6:5] l4 protocol reserved, udp, tcp, sctp
+ [7] is ip6
+ [8] clear head/tail
+ [9] packet drop action
+ [10] matched packet generates low-latency interrupt
+ [11] last in linked list
+ [12] collision
+ [15] rx queue enable
+ [22:16] rx queue
+ [29:24] pool. */
+ u32 command;
+
+ CLIB_PAD_FROM_TO (0xee30, 0xee3c);
+ /* ip4 dst/src address, tcp ports, udp ports.
+ set bits mean bit is ignored. */
+ u32 ip4_masks[4];
+ u32 filter_length;
+ u32 usage_stats;
+ u32 failed_usage_stats;
+ u32 filters_match_stats;
+ u32 filters_miss_stats;
+ CLIB_PAD_FROM_TO (0xee60, 0xee68);
+ /* Lookup, signature. */
+ u32 hash_keys[2];
+ /* [15:0] ip6 src address 1 bit per byte
+ [31:16] ip6 dst address. */
+ u32 ip6_mask;
+ /* [0] vlan id
+ [1] vlan priority
+ [2] pool
+ [3] ip protocol
+ [4] flex
+ [5] dst ip6. */
+ u32 other_mask;
+ CLIB_PAD_FROM_TO (0xee78, 0xf000);
+ } flow_director;
+
+ struct
+ {
+ u32 l2_control[64];
+ u32 vlan_pool_filter[64];
+ u32 vlan_pool_filter_bitmap[128];
+ u32 dst_ethernet_address[128];
+ u32 mirror_rule[4];
+ u32 mirror_rule_vlan[8];
+ u32 mirror_rule_pool[8];
+ CLIB_PAD_FROM_TO (0xf650, 0x10010);
+ } pf_bar;
+
+ u32 eeprom_flash_control;
+ /* [0] start
+ [1] done
+ [15:2] address
+ [31:16] read data. */
+ u32 eeprom_read;
+ CLIB_PAD_FROM_TO (0x10018, 0x1001c);
+ u32 flash_access;
+ CLIB_PAD_FROM_TO (0x10020, 0x10114);
+ u32 flash_data;
+ u32 flash_control;
+ u32 flash_read_data;
+ CLIB_PAD_FROM_TO (0x10120, 0x1013c);
+ u32 flash_opcode;
+ u32 software_semaphore;
+ CLIB_PAD_FROM_TO (0x10144, 0x10148);
+ u32 firmware_semaphore;
+ CLIB_PAD_FROM_TO (0x1014c, 0x10160);
+ u32 software_firmware_sync;
+ CLIB_PAD_FROM_TO (0x10164, 0x10200);
+ u32 general_rx_control;
+ CLIB_PAD_FROM_TO (0x10204, 0x11000);
+
+ struct
+ {
+ u32 control;
+ CLIB_PAD_FROM_TO (0x11004, 0x11010);
+ /* [3:0] enable counters
+ [7:4] leaky bucket counter mode
+ [29] reset
+ [30] stop
+ [31] start. */
+ u32 counter_control;
+ /* [7:0],[15:8],[23:16],[31:24] event for counters 0-3.
+ event codes:
+ 0x0 bad tlp
+ 0x10 reqs that reached timeout
+ etc. */
+ u32 counter_event;
+ CLIB_PAD_FROM_TO (0x11018, 0x11020);
+ u32 counters_clear_on_read[4];
+ u32 counter_config[4];
+ struct
+ {
+ u32 address;
+ u32 data;
+ } indirect_access;
+ CLIB_PAD_FROM_TO (0x11048, 0x11050);
+ u32 extended_control;
+ CLIB_PAD_FROM_TO (0x11054, 0x11064);
+ u32 mirrored_revision_id;
+ CLIB_PAD_FROM_TO (0x11068, 0x11070);
+ u32 dca_requester_id_information;
+
+ /* [0] global disable
+ [4:1] mode: 0 => legacy, 1 => dca 1.0. */
+ u32 dca_control;
+ CLIB_PAD_FROM_TO (0x11078, 0x110b0);
+ /* [0] pci completion abort
+ [1] unsupported i/o address
+ [2] wrong byte enable
+ [3] pci timeout */
+ u32 pcie_interrupt_status;
+ CLIB_PAD_FROM_TO (0x110b4, 0x110b8);
+ u32 pcie_interrupt_enable;
+ CLIB_PAD_FROM_TO (0x110bc, 0x110c0);
+ u32 msi_x_pba_clear[8];
+ CLIB_PAD_FROM_TO (0x110e0, 0x12300);
+ } pcie;
+
+ u32 interrupt_throttle1[128 - 24];
+ CLIB_PAD_FROM_TO (0x124a0, 0x14f00);
+
+ u32 core_analog_config;
+ CLIB_PAD_FROM_TO (0x14f04, 0x14f10);
+ u32 core_common_config;
+ CLIB_PAD_FROM_TO (0x14f14, 0x15f14);
+
+ u32 link_sec_software_firmware_interface;
+} ixge_regs_t;
+
+typedef union
+{
+ struct
+ {
+ /* Addresses bigendian. */
+ union
+ {
+ struct
+ {
+ ip6_address_t src_address;
+ u32 unused[1];
+ } ip6;
+ struct
+ {
+ u32 unused[3];
+ ip4_address_t src_address, dst_address;
+ } ip4;
+ };
+
+ /* [15:0] src port (little endian).
+ [31:16] dst port. */
+ u32 tcp_udp_ports;
+
+ /* [15:0] vlan (cfi bit set to 0).
+ [31:16] flex bytes. bigendian. */
+ u32 vlan_and_flex_word;
+
+ /* [14:0] hash
+ [15] bucket valid
+ [31:16] signature (signature filers)/sw-index (perfect match). */
+ u32 hash;
+ };
+
+ u32 as_u32[8];
+} ixge_flow_director_key_t;
+
+always_inline void
+ixge_throttle_queue_interrupt (ixge_regs_t * r,
+ u32 queue_interrupt_index,
+ f64 inter_interrupt_interval_in_secs)
+{
+ volatile u32 *tr =
+ (queue_interrupt_index < ARRAY_LEN (r->interrupt.throttle0)
+ ? &r->interrupt.throttle0[queue_interrupt_index]
+ : &r->interrupt_throttle1[queue_interrupt_index]);
+ ASSERT (queue_interrupt_index < 128);
+ u32 v;
+ i32 i, mask = (1 << 9) - 1;
+
+ i = flt_round_nearest (inter_interrupt_interval_in_secs / 2e-6);
+ i = i < 1 ? 1 : i;
+ i = i >= mask ? mask : i;
+
+ v = tr[0];
+ v &= ~(mask << 3);
+ v |= i << 3;
+ tr[0] = v;
+}
+
+#define foreach_ixge_counter \
+ _ (0x40d0, rx_total_packets) \
+ _64 (0x40c0, rx_total_bytes) \
+ _ (0x41b0, rx_good_packets_before_filtering) \
+ _64 (0x41b4, rx_good_bytes_before_filtering) \
+ _ (0x2f50, rx_dma_good_packets) \
+ _64 (0x2f54, rx_dma_good_bytes) \
+ _ (0x2f5c, rx_dma_duplicated_good_packets) \
+ _64 (0x2f60, rx_dma_duplicated_good_bytes) \
+ _ (0x2f68, rx_dma_good_loopback_packets) \
+ _64 (0x2f6c, rx_dma_good_loopback_bytes) \
+ _ (0x2f74, rx_dma_good_duplicated_loopback_packets) \
+ _64 (0x2f78, rx_dma_good_duplicated_loopback_bytes) \
+ _ (0x4074, rx_good_packets) \
+ _64 (0x4088, rx_good_bytes) \
+ _ (0x407c, rx_multicast_packets) \
+ _ (0x4078, rx_broadcast_packets) \
+ _ (0x405c, rx_64_byte_packets) \
+ _ (0x4060, rx_65_127_byte_packets) \
+ _ (0x4064, rx_128_255_byte_packets) \
+ _ (0x4068, rx_256_511_byte_packets) \
+ _ (0x406c, rx_512_1023_byte_packets) \
+ _ (0x4070, rx_gt_1023_byte_packets) \
+ _ (0x4000, rx_crc_errors) \
+ _ (0x4120, rx_ip_checksum_errors) \
+ _ (0x4004, rx_illegal_symbol_errors) \
+ _ (0x4008, rx_error_symbol_errors) \
+ _ (0x4034, rx_mac_local_faults) \
+ _ (0x4038, rx_mac_remote_faults) \
+ _ (0x4040, rx_length_errors) \
+ _ (0x41a4, rx_xons) \
+ _ (0x41a8, rx_xoffs) \
+ _ (0x40a4, rx_undersize_packets) \
+ _ (0x40a8, rx_fragments) \
+ _ (0x40ac, rx_oversize_packets) \
+ _ (0x40b0, rx_jabbers) \
+ _ (0x40b4, rx_management_packets) \
+ _ (0x40b8, rx_management_drops) \
+ _ (0x3fa0, rx_missed_packets_pool_0) \
+ _ (0x40d4, tx_total_packets) \
+ _ (0x4080, tx_good_packets) \
+ _64 (0x4090, tx_good_bytes) \
+ _ (0x40f0, tx_multicast_packets) \
+ _ (0x40f4, tx_broadcast_packets) \
+ _ (0x87a0, tx_dma_good_packets) \
+ _64 (0x87a4, tx_dma_good_bytes) \
+ _ (0x40d8, tx_64_byte_packets) \
+ _ (0x40dc, tx_65_127_byte_packets) \
+ _ (0x40e0, tx_128_255_byte_packets) \
+ _ (0x40e4, tx_256_511_byte_packets) \
+ _ (0x40e8, tx_512_1023_byte_packets) \
+ _ (0x40ec, tx_gt_1023_byte_packets) \
+ _ (0x4010, tx_undersize_drops) \
+ _ (0x8780, switch_security_violation_packets) \
+ _ (0x5118, fc_crc_errors) \
+ _ (0x241c, fc_rx_drops) \
+ _ (0x2424, fc_last_error_count) \
+ _ (0x2428, fcoe_rx_packets) \
+ _ (0x242c, fcoe_rx_dwords) \
+ _ (0x8784, fcoe_tx_packets) \
+ _ (0x8788, fcoe_tx_dwords) \
+ _ (0x1030, queue_0_rx_count) \
+ _ (0x1430, queue_0_drop_count) \
+ _ (0x1070, queue_1_rx_count) \
+ _ (0x1470, queue_1_drop_count) \
+ _ (0x10b0, queue_2_rx_count) \
+ _ (0x14b0, queue_2_drop_count) \
+ _ (0x10f0, queue_3_rx_count) \
+ _ (0x14f0, queue_3_drop_count) \
+ _ (0x1130, queue_4_rx_count) \
+ _ (0x1530, queue_4_drop_count) \
+ _ (0x1170, queue_5_rx_count) \
+ _ (0x1570, queue_5_drop_count) \
+ _ (0x11b0, queue_6_rx_count) \
+ _ (0x15b0, queue_6_drop_count) \
+ _ (0x11f0, queue_7_rx_count) \
+ _ (0x15f0, queue_7_drop_count) \
+ _ (0x1230, queue_8_rx_count) \
+ _ (0x1630, queue_8_drop_count) \
+ _ (0x1270, queue_9_rx_count) \
+ _ (0x1270, queue_9_drop_count)
+
+
+
+
+typedef enum
+{
+#define _(a,f) IXGE_COUNTER_##f,
+#define _64(a,f) _(a,f)
+ foreach_ixge_counter
+#undef _
+#undef _64
+ IXGE_N_COUNTER,
+} ixge_counter_type_t;
+
+typedef struct
+{
+ u32 mdio_address;
+
+ /* 32 bit ID read from ID registers. */
+ u32 id;
+} ixge_phy_t;
+
+typedef struct
+{
+ /* Cache aligned descriptors. */
+ ixge_descriptor_t *descriptors;
+
+ /* Number of descriptors in table. */
+ u32 n_descriptors;
+
+ /* Software head and tail pointers into descriptor ring. */
+ u32 head_index, tail_index;
+
+ /* Index into dma_queues vector. */
+ u32 queue_index;
+
+ /* Buffer indices corresponding to each active descriptor. */
+ u32 *descriptor_buffer_indices;
+
+ union
+ {
+ struct
+ {
+ u32 *volatile head_index_write_back;
+
+ u32 n_buffers_on_ring;
+ } tx;
+
+ struct
+ {
+ /* Buffer indices to use to replenish each descriptor. */
+ u32 *replenish_buffer_indices;
+
+ vlib_node_runtime_t *node;
+ u32 next_index;
+
+ u32 saved_start_of_packet_buffer_index;
+
+ u32 saved_start_of_packet_next_index;
+ u32 saved_last_buffer_index;
+
+ u32 is_start_of_packet;
+
+ u32 n_descriptors_done_total;
+
+ u32 n_descriptors_done_this_call;
+
+ u32 n_bytes;
+ } rx;
+ };
+} ixge_dma_queue_t;
+
+#define foreach_ixge_pci_device_id \
+ _ (82598, 0x10b6) \
+ _ (82598_bx, 0x1508) \
+ _ (82598af_dual_port, 0x10c6) \
+ _ (82598af_single_port, 0x10c7) \
+ _ (82598at, 0x10c8) \
+ _ (82598at2, 0x150b) \
+ _ (82598eb_sfp_lom, 0x10db) \
+ _ (82598eb_cx4, 0x10dd) \
+ _ (82598_cx4_dual_port, 0x10ec) \
+ _ (82598_da_dual_port, 0x10f1) \
+ _ (82598_sr_dual_port_em, 0x10e1) \
+ _ (82598eb_xf_lr, 0x10f4) \
+ _ (82599_kx4, 0x10f7) \
+ _ (82599_kx4_mezz, 0x1514) \
+ _ (82599_kr, 0x1517) \
+ _ (82599_combo_backplane, 0x10f8) \
+ _ (82599_cx4, 0x10f9) \
+ _ (82599_sfp, 0x10fb) \
+ _ (82599_backplane_fcoe, 0x152a) \
+ _ (82599_sfp_fcoe, 0x1529) \
+ _ (82599_sfp_em, 0x1507) \
+ _ (82599_xaui_lom, 0x10fc) \
+ _ (82599_t3_lom, 0x151c) \
+ _ (x540t, 0x1528)
+
+typedef enum
+{
+#define _(f,n) IXGE_##f = n,
+ foreach_ixge_pci_device_id
+#undef _
+} ixge_pci_device_id_t;
+
+typedef struct
+{
+ /* registers */
+ ixge_regs_t *regs;
+
+ /* Specific next index when using dynamic redirection */
+ u32 per_interface_next_index;
+
+ /* PCI bus info. */
+ vlib_pci_device_t pci_device;
+
+ /* From PCI config space header. */
+ ixge_pci_device_id_t device_id;
+
+ u16 device_index;
+
+ /* 0 or 1. */
+ u16 pci_function;
+
+ /* VLIB interface for this instance. */
+ u32 vlib_hw_if_index, vlib_sw_if_index;
+
+ ixge_dma_queue_t *dma_queues[VLIB_N_RX_TX];
+
+ /* Phy index (0 or 1) and address on MDI bus. */
+ u32 phy_index;
+ ixge_phy_t phys[2];
+
+ /* Value of link_status register at last link change. */
+ u32 link_status_at_last_link_change;
+
+ i2c_bus_t i2c_bus;
+ sfp_eeprom_t sfp_eeprom;
+
+ /* Counters. */
+ u64 counters[IXGE_N_COUNTER], counters_last_clear[IXGE_N_COUNTER];
+} ixge_device_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ /* Vector of devices. */
+ ixge_device_t *devices;
+
+ /* Descriptor ring sizes. */
+ u32 n_descriptors[VLIB_N_RX_TX];
+
+ /* RX buffer size. Must be at least 1k; will be rounded to
+ next largest 1k size. */
+ u32 n_bytes_in_rx_buffer;
+
+ u32 n_descriptors_per_cache_line;
+
+ u32 vlib_buffer_free_list_index;
+
+ u32 process_node_index;
+
+ /* Template and mask for initializing/validating TX descriptors. */
+ ixge_tx_descriptor_t tx_descriptor_template, tx_descriptor_template_mask;
+
+ /* Vector of buffers for which TX is done and can be freed. */
+ u32 *tx_buffers_pending_free;
+
+ u32 *rx_buffers_to_add;
+
+ f64 time_last_stats_update;
+
+ vlib_physmem_region_index_t physmem_region;
+} ixge_main_t;
+
+ixge_main_t ixge_main;
+vnet_device_class_t ixge_device_class;
+
+typedef enum
+{
+ IXGE_RX_NEXT_IP4_INPUT,
+ IXGE_RX_NEXT_IP6_INPUT,
+ IXGE_RX_NEXT_ETHERNET_INPUT,
+ IXGE_RX_NEXT_DROP,
+ IXGE_RX_N_NEXT,
+} ixge_rx_next_t;
+
+void ixge_set_next_node (ixge_rx_next_t, char *);
+
+#endif /* included_ixge_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lb.am b/src/plugins/lb.am
new file mode 100644
index 00000000..352358fa
--- /dev/null
+++ b/src/plugins/lb.am
@@ -0,0 +1,42 @@
+# Copyright (c) 2016 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += lb_test_plugin.la
+vppplugins_LTLIBRARIES += lb_plugin.la
+
+lb_plugin_la_SOURCES = \
+ lb/lb.c \
+ lb/node.c \
+ lb/cli.c \
+ lb/util.c \
+ lb/refcount.c \
+ lb/api.c
+
+BUILT_SOURCES += \
+ lb/lb.api.h \
+ lb/lb.api.json
+
+API_FILES += lb/lb.api
+
+noinst_HEADERS += \
+ lb/lb.h \
+ lb/util.h \
+ lb/refcount.h \
+ lb/lbhash.h \
+ lb/lb.api.h
+
+lb_test_plugin_la_SOURCES = \
+ lb/lb_test.c \
+ lb/lb_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
new file mode 100644
index 00000000..9e3bcd65
--- /dev/null
+++ b/src/plugins/lb/api.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+
+#include <vppinfra/byte_order.h>
+#include <vlibapi/api.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <lb/lb.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+
+/* define message structures */
+#define vl_typedefs
+#include <lb/lb.api.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <lb/lb.api.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lb/lb.api.h>
+#undef vl_api_version
+
+#define vl_msg_name_crc_list
+#include <lb/lb.api.h>
+#undef vl_msg_name_crc_list
+
+
+#define REPLY_MSG_ID_BASE lbm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+setup_message_id_table (lb_main_t * lbm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lbm->msg_id_base);
+ foreach_vl_msg_name_crc_lb;
+#undef _
+}
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+static void
+vl_api_lb_conf_t_handler
+(vl_api_lb_conf_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+
+ rv = lb_conf((ip4_address_t *)&mp->ip4_src_address,
+ (ip6_address_t *)mp->ip6_src_address,
+ mp->sticky_buckets_per_core,
+ mp->flow_timeout);
+
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_conf_t_print
+(vl_api_lb_conf_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_conf ");
+ s = format (s, "%U ", format_ip4_address, (ip4_address_t *)&mp->ip4_src_address);
+ s = format (s, "%U ", format_ip6_address, (ip6_address_t *)mp->ip6_src_address);
+ s = format (s, "%u ", mp->sticky_buckets_per_core);
+ s = format (s, "%u ", mp->flow_timeout);
+ FINISH;
+}
+
+
+static void
+vl_api_lb_add_del_vip_t_handler
+(vl_api_lb_add_del_vip_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+ ip46_address_t prefix;
+ memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6));
+
+ if (mp->is_del) {
+ u32 vip_index;
+ if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index)))
+ rv = lb_vip_del(vip_index);
+ } else {
+ u32 vip_index;
+ lb_vip_type_t type;
+ if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) {
+ type = mp->is_gre4?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
+ } else {
+ type = mp->is_gre4?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
+ }
+
+ rv = lb_vip_add(&prefix, mp->prefix_length, type,
+ mp->new_flows_table_length, &vip_index);
+ }
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_add_del_vip_t_print
+(vl_api_lb_add_del_vip_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_add_del_vip ");
+ s = format (s, "%U ", format_ip46_prefix,
+ (ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY);
+ s = format (s, "%s ", mp->is_gre4?"gre4":"gre6");
+ s = format (s, "%u ", mp->new_flows_table_length);
+ s = format (s, "%s ", mp->is_del?"del":"add");
+ FINISH;
+}
+
+static void
+vl_api_lb_add_del_as_t_handler
+(vl_api_lb_add_del_as_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+ u32 vip_index;
+ if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix,
+ mp->vip_prefix_length, &vip_index)))
+ goto done;
+
+ if (mp->is_del)
+ rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
+ else
+ rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
+
+done:
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_add_del_as_t_print
+(vl_api_lb_add_del_as_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_add_del_as ");
+ s = format (s, "%U ", format_ip46_prefix,
+ (ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY);
+ s = format (s, "%U ", format_ip46_address,
+ (ip46_address_t *)mp->as_address, IP46_TYPE_ANY);
+ s = format (s, "%s ", mp->is_del?"del":"add");
+ FINISH;
+}
+
+/* List of message types that this plugin understands */
+#define foreach_lb_plugin_api_msg \
+_(LB_CONF, lb_conf) \
+_(LB_ADD_DEL_VIP, lb_add_del_vip) \
+_(LB_ADD_DEL_AS, lb_add_del_as)
+
+static clib_error_t * lb_api_init (vlib_main_t * vm)
+{
+ lb_main_t *lbm = &lb_main;
+ u8 *name = format (0, "lb_%08x%c", api_version, 0);
+ lbm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + lbm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_lb_plugin_api_msg;
+#undef _
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (lbm, &api_main);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lb_api_init);
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
new file mode 100644
index 00000000..f6d65201
--- /dev/null
+++ b/src/plugins/lb/cli.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+#include <lb/util.h>
+
+static clib_error_t *
+lb_vip_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t prefix;
+ u8 plen;
+ u32 new_len = 1024;
+ u8 del = 0;
+ int ret;
+ u32 gre4 = 0;
+ lb_vip_type_t type;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY)) {
+ error = clib_error_return (0, "invalid vip prefix: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "new_len %d", &new_len))
+ ;
+ else if (unformat(line_input, "del"))
+ del = 1;
+ else if (unformat(line_input, "encap gre4"))
+ gre4 = 1;
+ else if (unformat(line_input, "encap gre6"))
+ gre4 = 0;
+ else {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+
+ if (ip46_prefix_is_ip4(&prefix, plen)) {
+ type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
+ } else {
+ type = (gre4)?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
+ }
+
+ lb_garbage_collection();
+
+ u32 index;
+ if (!del) {
+ if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) {
+ error = clib_error_return (0, "lb_vip_add error %d", ret);
+ goto done;
+ } else {
+ vlib_cli_output(vm, "lb_vip_add ok %d", index);
+ }
+ } else {
+ if ((ret = lb_vip_find_index(&prefix, plen, &index))) {
+ error = clib_error_return (0, "lb_vip_find_index error %d", ret);
+ goto done;
+ } else if ((ret = lb_vip_del(index))) {
+ error = clib_error_return (0, "lb_vip_del error %d", ret);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lb_vip_command, static) =
+{
+ .path = "lb vip",
+ .short_help = "lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]",
+ .function = lb_vip_command_fn,
+};
+
+static clib_error_t *
+lb_as_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t vip_prefix, as_addr;
+ u8 vip_plen;
+ ip46_address_t *as_array = 0;
+ u32 vip_index;
+ u8 del = 0;
+ int ret;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) {
+ error = clib_error_return (0, "invalid as address: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) {
+ error = clib_error_return (0, "lb_vip_find_index error %d", ret);
+ goto done;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) {
+ vec_add1(as_array, as_addr);
+ } else if (unformat(line_input, "del")) {
+ del = 1;
+ } else {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!vec_len(as_array)) {
+ error = clib_error_return (0, "No AS address provided");
+ goto done;
+ }
+
+ lb_garbage_collection();
+ clib_warning("vip index is %d", vip_index);
+
+ if (del) {
+ if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) {
+ error = clib_error_return (0, "lb_vip_del_ass error %d", ret);
+ goto done;
+ }
+ } else {
+ if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) {
+ error = clib_error_return (0, "lb_vip_add_ass error %d", ret);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free(as_array);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lb_as_command, static) =
+{
+ .path = "lb as",
+ .short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]",
+ .function = lb_as_command_fn,
+};
+
+static clib_error_t *
+lb_conf_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lb_main_t *lbm = &lb_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4 = lbm->ip4_src_address;
+ ip6_address_t ip6 = lbm->ip6_src_address;
+ u32 per_cpu_sticky_buckets = lbm->per_cpu_sticky_buckets;
+ u32 per_cpu_sticky_buckets_log2 = 0;
+ u32 flow_timeout = lbm->flow_timeout;
+ int ret;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "ip4-src-address %U", unformat_ip4_address, &ip4))
+ ;
+ else if (unformat(line_input, "ip6-src-address %U", unformat_ip6_address, &ip6))
+ ;
+ else if (unformat(line_input, "buckets %d", &per_cpu_sticky_buckets))
+ ;
+ else if (unformat(line_input, "buckets-log2 %d", &per_cpu_sticky_buckets_log2)) {
+ if (per_cpu_sticky_buckets_log2 >= 32)
+ return clib_error_return (0, "buckets-log2 value is too high");
+ per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2;
+ } else if (unformat(line_input, "timeout %d", &flow_timeout))
+ ;
+ else {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ lb_garbage_collection();
+
+ if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) {
+ error = clib_error_return (0, "lb_conf error %d", ret);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lb_conf_command, static) =
+{
+ .path = "lb conf",
+ .short_help = "lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] [buckets <n>] [timeout <s>]",
+ .function = lb_conf_command_fn,
+};
+
+static clib_error_t *
+lb_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_cli_output(vm, "%U", format_lb_main);
+ return NULL;
+}
+
+
+VLIB_CLI_COMMAND (lb_show_command, static) =
+{
+ .path = "show lb",
+ .short_help = "show lb",
+ .function = lb_show_command_fn,
+};
+
+static clib_error_t *
+lb_show_vips_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t line_input;
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ u8 verbose = 0;
+
+ if (!unformat_user (input, unformat_line_input, &line_input))
+ return 0;
+
+ if (unformat(&line_input, "verbose"))
+ verbose = 1;
+
+ pool_foreach(vip, lbm->vips, {
+ vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip);
+ });
+
+ unformat_free (&line_input);
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (lb_show_vips_command, static) =
+{
+ .path = "show lb vips",
+ .short_help = "show lb vips [verbose]",
+ .function = lb_show_vips_command_fn,
+};
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
new file mode 100644
index 00000000..32cc669b
--- /dev/null
+++ b/src/plugins/lb/lb.api
@@ -0,0 +1,56 @@
+/** \brief Configure Load-Balancer global parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic.
+ @param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic.
+ @param n_sticky_buckets - Number of buckets *per worker thread* in the
+ established flow table (must be power of 2).
+ @param flow_timeout - Time in seconds after which, if no packet is received
+ for a given flow, the flow is removed from the established flow table.
+*/
+autoreply define lb_conf
+{
+ u32 client_index;
+ u32 context;
+ u32 ip4_src_address;
+ u8 ip6_src_address[16];
+ u32 sticky_buckets_per_core;
+ u32 flow_timeout;
+};
+
+/** \brief Add a virtual address (or prefix)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_prefix - IP address (IPv4 in lower order 32 bits).
+ @param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param is_gre4 - Encap is ip4 GRE (ip6 GRE otherwise).
+ @param new_flows_table_length - Size of the new connections flow table used
+ for this VIP (must be power of 2).
+ @param is_del - The VIP should be removed.
+*/
+autoreply define lb_add_del_vip {
+ u32 client_index;
+ u32 context;
+ u8 ip_prefix[16];
+ u8 prefix_length;
+ u8 is_gre4;
+ u32 new_flows_table_length;
+ u8 is_del;
+};
+
+/** \brief Add an application server for a given VIP
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits).
+ @param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param as_address - The application server address (IPv4 in lower order 32 bits).
+ @param is_del - The AS should be removed.
+*/
+autoreply define lb_add_del_as {
+ u32 client_index;
+ u32 context;
+ u8 vip_ip_prefix[16];
+ u8 vip_prefix_length;
+ u8 as_address[16];
+ u8 is_del;
+};
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
new file mode 100644
index 00000000..cc3f8532
--- /dev/null
+++ b/src/plugins/lb/lb.c
@@ -0,0 +1,841 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <vnet/api_errno.h>
+
+//GC runs at most once every so many seconds
+#define LB_GARBAGE_RUN 60
+
+//After so many seconds. It is assumed that inter-core race condition will not occur.
+#define LB_CONCURRENCY_TIMEOUT 10
+
+lb_main_t lb_main;
+
+#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
+#define lb_put_writer_lock() lb_main.writer_lock[0] = 0
+
+static void lb_as_stack (lb_as_t *as);
+
+
+const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
+const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
+const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
+ };
+
+const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
+const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
+const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
+ };
+
+u32 lb_hash_time_now(vlib_main_t * vm)
+{
+ return (u32) (vlib_time_now(vm) + 10000);
+}
+
+u8 *format_lb_main (u8 * s, va_list * args)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main();
+ lb_main_t *lbm = &lb_main;
+ s = format(s, "lb_main");
+ s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
+ s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
+ s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
+ s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
+
+ u32 thread_index;
+ for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
+ lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
+ if (h) {
+ s = format(s, "core %d\n", thread_index);
+ s = format(s, " timeout: %ds\n", h->timeout);
+ s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
+ }
+ }
+
+ return s;
+}
+
+static char *lb_vip_type_strings[] = {
+ [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
+ [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
+ [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
+ [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
+};
+
+u8 *format_lb_vip_type (u8 * s, va_list * args)
+{
+ lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
+ u32 i;
+ for (i=0; i<LB_VIP_N_TYPES; i++)
+ if (vipt == i)
+ return format(s, lb_vip_type_strings[i]);
+ return format(s, "_WRONG_TYPE_");
+}
+
+uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
+{
+ lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
+ u32 i;
+ for (i=0; i<LB_VIP_N_TYPES; i++)
+ if (unformat(input, lb_vip_type_strings[i])) {
+ *vipt = i;
+ return 1;
+ }
+ return 0;
+}
+
+u8 *format_lb_vip (u8 * s, va_list * args)
+{
+ lb_vip_t *vip = va_arg (*args, lb_vip_t *);
+ return format(s, "%U %U new_size:%u #as:%u%s",
+ format_lb_vip_type, vip->type,
+ format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
+ vip->new_flow_table_mask + 1,
+ pool_elts(vip->as_indexes),
+ (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
+}
+
+u8 *format_lb_as (u8 * s, va_list * args)
+{
+ lb_as_t *as = va_arg (*args, lb_as_t *);
+ return format(s, "%U %s", format_ip46_address,
+ &as->address, IP46_TYPE_ANY,
+ (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
+}
+
+u8 *format_lb_vip_detailed (u8 * s, va_list * args)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = va_arg (*args, lb_vip_t *);
+ uword indent = format_get_indent (s);
+
+ s = format(s, "%U %U [%u] %U%s\n"
+ "%U new_size:%u\n",
+ format_white_space, indent,
+ format_lb_vip_type, vip->type,
+ vip - lbm->vips, format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
+ (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
+ format_white_space, indent,
+ vip->new_flow_table_mask + 1);
+
+ //Print counters
+ s = format(s, "%U counters:\n",
+ format_white_space, indent);
+ u32 i;
+ for (i=0; i<LB_N_VIP_COUNTERS; i++)
+ s = format(s, "%U %s: %d\n",
+ format_white_space, indent,
+ lbm->vip_counters[i].name,
+ vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
+
+
+ s = format(s, "%U #as:%u\n",
+ format_white_space, indent,
+ pool_elts(vip->as_indexes));
+
+ //Let's count the buckets for each AS
+ u32 *count = 0;
+ vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
+ lb_new_flow_entry_t *nfe;
+ vec_foreach(nfe, vip->new_flow_table)
+ count[nfe->as_index]++;
+
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n",
+ format_white_space, indent,
+ format_ip46_address, &as->address, IP46_TYPE_ANY,
+ count[as - lbm->ass],
+ vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
+ as->dpo.dpoi_index,
+ (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
+ });
+
+ vec_free(count);
+
+ /*
+ s = format(s, "%U new flows table:\n", format_white_space, indent);
+ lb_new_flow_entry_t *nfe;
+ vec_foreach(nfe, vip->new_flow_table) {
+ s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index);
+ }
+ */
+ return s;
+}
+
+typedef struct {
+ u32 as_index;
+ u32 last;
+ u32 skip;
+} lb_pseudorand_t;
+
+static int lb_pseudorand_compare(void *a, void *b)
+{
+ lb_as_t *asa, *asb;
+ lb_main_t *lbm = &lb_main;
+ asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
+ asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
+ return memcmp(&asa->address, &asb->address, sizeof(asb->address));
+}
+
+static void lb_vip_garbage_collection(lb_vip_t *vip)
+{
+ lb_main_t *lbm = &lb_main;
+ ASSERT (lbm->writer_lock[0]);
+
+ u32 now = (u32) vlib_time_now(vlib_get_main());
+ if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
+ return;
+
+ vip->last_garbage_collection = now;
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
+ clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
+ (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
+ { //Not referenced
+ fib_entry_child_remove(as->next_hop_fib_entry_index,
+ as->next_hop_child_index);
+ fib_table_entry_delete_index(as->next_hop_fib_entry_index,
+ FIB_SOURCE_RR);
+ as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
+
+ pool_put(vip->as_indexes, as_index);
+ pool_put(lbm->ass, as);
+ }
+ });
+}
+
+void lb_garbage_collection()
+{
+ lb_main_t *lbm = &lb_main;
+ lb_get_writer_lock();
+ lb_vip_t *vip;
+ u32 *to_be_removed_vips = 0, *i;
+ pool_foreach(vip, lbm->vips, {
+ lb_vip_garbage_collection(vip);
+
+ if (!(vip->flags & LB_VIP_FLAGS_USED) &&
+ (pool_elts(vip->as_indexes) == 0)) {
+ vec_add1(to_be_removed_vips, vip - lbm->vips);
+ }
+ });
+
+ vec_foreach(i, to_be_removed_vips) {
+ vip = &lbm->vips[*i];
+ pool_put(lbm->vips, vip);
+ pool_free(vip->as_indexes);
+ }
+
+ vec_free(to_be_removed_vips);
+ lb_put_writer_lock();
+}
+
+static void lb_vip_update_new_flow_table(lb_vip_t *vip)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_new_flow_entry_t *old_table;
+ u32 i, *as_index;
+ lb_new_flow_entry_t *new_flow_table = 0;
+ lb_as_t *as;
+ lb_pseudorand_t *pr, *sort_arr = 0;
+ u32 count;
+
+ ASSERT (lbm->writer_lock[0]); //We must have the lock
+
+ //Check if some AS is configured or not
+ i = 0;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
+ i = 1;
+ goto out; //Not sure 'break' works in this macro-loop
+ }
+ });
+
+out:
+ if (i == 0) {
+ //Only the default. i.e. no AS
+ vec_validate(new_flow_table, vip->new_flow_table_mask);
+ for (i=0; i<vec_len(new_flow_table); i++)
+ new_flow_table[i].as_index = 0;
+
+ goto finished;
+ }
+
+ //First, let's sort the ASs
+ sort_arr = 0;
+ vec_alloc(sort_arr, pool_elts(vip->as_indexes));
+
+ i = 0;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
+ continue;
+
+ sort_arr[i].as_index = as - lbm->ass;
+ i++;
+ });
+ _vec_len(sort_arr) = i;
+
+ vec_sort_with_function(sort_arr, lb_pseudorand_compare);
+
+ //Now let's pseudo-randomly generate permutations
+ vec_foreach(pr, sort_arr) {
+ lb_as_t *as = &lbm->ass[pr->as_index];
+
+ u64 seed = clib_xxhash(as->address.as_u64[0] ^
+ as->address.as_u64[1]);
+ /* We have 2^n buckets.
+ * skip must be prime with 2^n.
+ * So skip must be odd.
+ * MagLev actually state that M should be prime,
+ * but this has a big computation cost (% operation).
+ * Using 2^n is more better (& operation).
+ */
+ pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
+ pr->last = (seed >> 32) & vip->new_flow_table_mask;
+ }
+
+ //Let's create a new flow table
+ vec_validate(new_flow_table, vip->new_flow_table_mask);
+ for (i=0; i<vec_len(new_flow_table); i++)
+ new_flow_table[i].as_index = ~0;
+
+ u32 done = 0;
+ while (1) {
+ vec_foreach(pr, sort_arr) {
+ while (1) {
+ u32 last = pr->last;
+ pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
+ if (new_flow_table[last].as_index == ~0) {
+ new_flow_table[last].as_index = pr->as_index;
+ break;
+ }
+ }
+ done++;
+ if (done == vec_len(new_flow_table))
+ goto finished;
+ }
+ }
+
+ vec_free(sort_arr);
+
+finished:
+
+//Count number of changed entries
+ count = 0;
+ for (i=0; i<vec_len(new_flow_table); i++)
+ if (vip->new_flow_table == 0 ||
+ new_flow_table[i].as_index != vip->new_flow_table[i].as_index)
+ count++;
+
+ old_table = vip->new_flow_table;
+ vip->new_flow_table = new_flow_table;
+ vec_free(old_table);
+}
+
+int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
+ u32 per_cpu_sticky_buckets, u32 flow_timeout)
+{
+ lb_main_t *lbm = &lb_main;
+
+ if (!is_pow2(per_cpu_sticky_buckets))
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+
+ lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
+ lbm->ip4_src_address = *ip4_address;
+ lbm->ip6_src_address = *ip6_address;
+ lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
+ lbm->flow_timeout = flow_timeout;
+ lb_put_writer_lock();
+ return 0;
+}
+
+static
+int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ ip46_prefix_normalize(prefix, plen);
+ pool_foreach(vip, lbm->vips, {
+ if ((vip->flags & LB_AS_FLAGS_USED) &&
+ vip->plen == plen &&
+ vip->prefix.as_u64[0] == prefix->as_u64[0] &&
+ vip->prefix.as_u64[1] == prefix->as_u64[1]) {
+ *vip_index = vip - lbm->vips;
+ return 0;
+ }
+ });
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+{
+ int ret;
+ lb_get_writer_lock();
+ ret = lb_vip_find_index_with_lock(prefix, plen, vip_index);
+ lb_put_writer_lock();
+ return ret;
+}
+
+static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
+{
+ lb_main_t *lbm = &lb_main;
+ ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ lb_as_t *as;
+ u32 *asi;
+ pool_foreach(asi, vip->as_indexes, {
+ as = &lbm->ass[*asi];
+ if (as->vip_index == (vip - lbm->vips) &&
+ as->address.as_u64[0] == address->as_u64[0] &&
+ as->address.as_u64[1] == address->as_u64[1]) {
+ *as_index = as - lbm->ass;
+ return 0;
+ }
+ });
+ return -1;
+}
+
+int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_get_writer_lock();
+ lb_vip_t *vip;
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ ip46_type_t type = lb_vip_is_gre4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
+ u32 *to_be_added = 0;
+ u32 *to_be_updated = 0;
+ u32 i;
+ u32 *ip;
+
+ //Sanity check
+ while (n--) {
+
+ if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
+ if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
+ vec_free(to_be_added);
+ vec_free(to_be_updated);
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ vec_add1(to_be_updated, i);
+ goto next;
+ }
+
+ if (ip46_address_type(&addresses[n]) != type) {
+ vec_free(to_be_added);
+ vec_free(to_be_updated);
+ lb_put_writer_lock();
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+ }
+
+ if (n) {
+ u32 n2 = n;
+ while(n2--) //Check for duplicates
+ if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
+ addresses[n2].as_u64[1] == addresses[n].as_u64[1])
+ goto next;
+ }
+
+ vec_add1(to_be_added, n);
+
+next:
+ continue;
+ }
+
+ //Update reused ASs
+ vec_foreach(ip, to_be_updated) {
+ lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
+ }
+ vec_free(to_be_updated);
+
+ //Create those who have to be created
+ vec_foreach(ip, to_be_added) {
+ lb_as_t *as;
+ u32 *as_index;
+ pool_get(lbm->ass, as);
+ as->address = addresses[*ip];
+ as->flags = LB_AS_FLAGS_USED;
+ as->vip_index = vip_index;
+ pool_get(vip->as_indexes, as_index);
+ *as_index = as - lbm->ass;
+
+ /*
+ * become a child of the FIB entry
+ * so we are informed when its forwarding changes
+ */
+ fib_prefix_t nh = {};
+ if (lb_vip_is_gre4(vip)) {
+ nh.fp_addr.ip4 = as->address.ip4;
+ nh.fp_len = 32;
+ nh.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ nh.fp_addr.ip6 = as->address.ip6;
+ nh.fp_len = 128;
+ nh.fp_proto = FIB_PROTOCOL_IP6;
+ }
+
+ as->next_hop_fib_entry_index =
+ fib_table_entry_special_add(0,
+ &nh,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ as->next_hop_child_index =
+ fib_entry_child_add(as->next_hop_fib_entry_index,
+ lbm->fib_node_type,
+ as - lbm->ass);
+
+ lb_as_stack(as);
+ }
+ vec_free(to_be_added);
+
+ //Recompute flows
+ lb_vip_update_new_flow_table(vip);
+
+ //Garbage collection maybe
+ lb_vip_garbage_collection(vip);
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_main_t *lbm = &lb_main;
+ u32 now = (u32) vlib_time_now(vlib_get_main());
+ u32 *ip = 0;
+
+ lb_vip_t *vip;
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ u32 *indexes = NULL;
+ while (n--) {
+ u32 i;
+ if (lb_as_find_index_vip(vip, &addresses[n], &i)) {
+ vec_free(indexes);
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ if (n) { //Check for duplicates
+ u32 n2 = n - 1;
+ while(n2--) {
+ if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
+ addresses[n2].as_u64[1] == addresses[n].as_u64[1])
+ goto next;
+ }
+ }
+
+ vec_add1(indexes, i);
+next:
+ continue;
+ }
+
+ //Garbage collection maybe
+ lb_vip_garbage_collection(vip);
+
+ if (indexes != NULL) {
+ vec_foreach(ip, indexes) {
+ lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
+ lbm->ass[*ip].last_used = now;
+ }
+
+ //Recompute flows
+ lb_vip_update_new_flow_table(vip);
+ }
+
+ vec_free(indexes);
+ return 0;
+}
+
+int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_get_writer_lock();
+ int ret = lb_vip_del_ass_withlock(vip_index, addresses, n);
+ lb_put_writer_lock();
+ return ret;
+}
+
+/**
+ * Add the VIP adjacency to the ip4 or ip6 fib
+ */
+static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
+{
+ dpo_proto_t proto = 0;
+ dpo_id_t dpo = DPO_INVALID;
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ proto = DPO_PROTO_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ proto = DPO_PROTO_IP6;
+ }
+ dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ proto, vip - lbm->vips);
+ fib_table_entry_special_dpo_add(0,
+ &pfx,
+ FIB_SOURCE_PLUGIN_HI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
+}
+
+/**
+ * Deletes the adjacency associated with the VIP
+ */
+static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
+{
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+ fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
+}
+
+int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ lb_get_writer_lock();
+ ip46_prefix_normalize(prefix, plen);
+
+ if (!lb_vip_find_index_with_lock(prefix, plen, vip_index)) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ if (!is_pow2(new_length)) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+ }
+
+ if (ip46_prefix_is_ip4(prefix, plen) &&
+ (type != LB_VIP_TYPE_IP4_GRE4) &&
+ (type != LB_VIP_TYPE_IP4_GRE6))
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+
+
+ //Allocate
+ pool_get(lbm->vips, vip);
+
+ //Init
+ vip->prefix = *prefix;
+ vip->plen = plen;
+ vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
+ vip->type = type;
+ vip->flags = LB_VIP_FLAGS_USED;
+ vip->as_indexes = 0;
+
+ //Validate counters
+ u32 i;
+ for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
+ vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
+ vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
+ }
+
+ //Configure new flow table
+ vip->new_flow_table_mask = new_length - 1;
+ vip->new_flow_table = 0;
+
+ //Create a new flow hash table full of the default entry
+ lb_vip_update_new_flow_table(vip);
+
+ //Create adjacency to direct traffic
+ lb_vip_add_adjacency(lbm, vip);
+
+ //Return result
+ *vip_index = vip - lbm->vips;
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+int lb_vip_del(u32 vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ lb_get_writer_lock();
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ //FIXME: This operation is actually not working
+ //We will need to remove state before performing this.
+
+ {
+ //Remove all ASs
+ ip46_address_t *ass = 0;
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ vec_add1(ass, as->address);
+ });
+ if (vec_len(ass))
+ lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass));
+ vec_free(ass);
+ }
+
+ //Delete adjacency
+ lb_vip_del_adjacency(lbm, vip);
+
+ //Set the VIP as unused
+ vip->flags &= ~LB_VIP_FLAGS_USED;
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Load Balancer",
+};
+/* *INDENT-ON* */
+
+u8 *format_lb_dpo (u8 * s, va_list * va)
+{
+ index_t index = va_arg (*va, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
+ return format (s, "%U", format_lb_vip, vip);
+}
+
+static void lb_dpo_lock (dpo_id_t *dpo) {}
+static void lb_dpo_unlock (dpo_id_t *dpo) {}
+
+static fib_node_t *
+lb_fib_node_get_node (fib_node_index_t index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_as_t *as = pool_elt_at_index (lbm->ass, index);
+ return (&as->fib_node);
+}
+
+static void
+lb_fib_node_last_lock_gone (fib_node_t *node)
+{
+}
+
+static lb_as_t *
+lb_as_from_fib_node (fib_node_t *node)
+{
+ return ((lb_as_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(lb_as_t, fib_node)));
+}
+
+static void
+lb_as_stack (lb_as_t *as)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = &lbm->vips[as->vip_index];
+ dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6,
+ &as->dpo,
+ fib_entry_contribute_ip_forwarding(
+ as->next_hop_fib_entry_index));
+}
+
+static fib_node_back_walk_rc_t
+lb_fib_node_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ lb_as_stack(lb_as_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+clib_error_t *
+lb_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ lb_main_t *lbm = &lb_main;
+ lb_as_t *default_as;
+ fib_node_vft_t lb_fib_node_vft = {
+ .fnv_get = lb_fib_node_get_node,
+ .fnv_last_lock = lb_fib_node_last_lock_gone,
+ .fnv_back_walk = lb_fib_node_back_walk_notify,
+ };
+ dpo_vft_t lb_vft = {
+ .dv_lock = lb_dpo_lock,
+ .dv_unlock = lb_dpo_unlock,
+ .dv_format = format_lb_dpo,
+ };
+
+ lbm->vips = 0;
+ lbm->per_cpu = 0;
+ vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
+ lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ lbm->writer_lock[0] = 0;
+ lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
+ lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
+ lbm->ip4_src_address.as_u32 = 0xffffffff;
+ lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
+ lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
+ lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
+ lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
+ lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
+
+ //Init AS reference counters
+ vlib_refcount_init(&lbm->as_refcount);
+
+ //Allocate and init default AS.
+ lbm->ass = 0;
+ pool_get(lbm->ass, default_as);
+ default_as->flags = 0;
+ default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
+ default_as->vip_index = ~0;
+ default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
+ default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
+
+#define _(a,b,c) lbm->vip_counters[c].name = b;
+ lb_foreach_vip_counter
+#undef _
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lb_init);
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
new file mode 100644
index 00000000..882b9b30
--- /dev/null
+++ b/src/plugins/lb/lb.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * lb-plugin implements a MagLev-like load balancer.
+ * http://research.google.com/pubs/pub44824.html
+ *
+ * It hasn't been tested for interoperability with the original MagLev
+ * but intends to provide similar functionality.
+ * The load-balancer receives traffic destined to VIP (Virtual IP)
+ * addresses from one or multiple(ECMP) routers.
+ * The load-balancer tunnels the traffic toward many application servers
+ * ensuring session stickyness (i.e. that a single sessions is tunneled
+ * towards a single application server).
+ *
+ */
+
+#ifndef LB_PLUGIN_LB_LB_H_
+#define LB_PLUGIN_LB_LB_H_
+
+#include <lb/util.h>
+#include <lb/refcount.h>
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_table.h>
+
+#include <lb/lbhash.h>
+
+#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
+#define LB_DEFAULT_FLOW_TIMEOUT 40
+
+typedef enum {
+ LB_NEXT_DROP,
+ LB_N_NEXT,
+} lb_next_t;
+
+/**
+ * Each VIP is configured with a set of
+ * application server.
+ */
+typedef struct {
+ /**
+ * Registration to FIB event.
+ */
+ fib_node_t fib_node;
+
+ /**
+ * Destination address used to tunnel traffic towards
+ * that application server.
+ * The address is also used as ID and pseudo-random
+ * seed for the load-balancing process.
+ */
+ ip46_address_t address;
+
+ /**
+ * ASs are indexed by address and VIP Index.
+ * Which means there will be duplicated if the same server
+ * address is used for multiple VIPs.
+ */
+ u32 vip_index;
+
+ /**
+ * Some per-AS flags.
+ * For now only LB_AS_FLAGS_USED is defined.
+ */
+ u8 flags;
+
+#define LB_AS_FLAGS_USED 0x1
+
+ /**
+ * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
+ *
+ * AS removal is based on garbage collection and reference counting.
+ * When an AS is removed, there is a race between configuration core
+ * and worker cores which may still add a reference while it should not
+ * be used. This timestamp is used to not remove the AS while a race condition
+ * may happen.
+ */
+ u32 last_used;
+
+ /**
+ * The FIB entry index for the next-hop
+ */
+ fib_node_index_t next_hop_fib_entry_index;
+
+ /**
+ * The child index on the FIB entry
+ */
+ u32 next_hop_child_index;
+
+ /**
+ * The next DPO in the graph to follow.
+ */
+ dpo_id_t dpo;
+
+} lb_as_t;
+
+format_function_t format_lb_as;
+
+typedef struct {
+ u32 as_index;
+} lb_new_flow_entry_t;
+
+#define lb_foreach_vip_counter \
+ _(NEXT_PACKET, "packet from existing sessions", 0) \
+ _(FIRST_PACKET, "first session packet", 1) \
+ _(UNTRACKED_PACKET, "untracked packet", 2) \
+ _(NO_SERVER, "no server configured", 3)
+
+typedef enum {
+#define _(a,b,c) LB_VIP_COUNTER_##a = c,
+ lb_foreach_vip_counter
+#undef _
+ LB_N_VIP_COUNTERS
+} lb_vip_counter_t;
+
+/**
+ * The load balancer supports IPv4 and IPv6 traffic
+ * and GRE4 and GRE6 encap.
+ */
+typedef enum {
+ LB_VIP_TYPE_IP6_GRE6,
+ LB_VIP_TYPE_IP6_GRE4,
+ LB_VIP_TYPE_IP4_GRE6,
+ LB_VIP_TYPE_IP4_GRE4,
+ LB_VIP_N_TYPES,
+} lb_vip_type_t;
+
+format_function_t format_lb_vip_type;
+unformat_function_t unformat_lb_vip_type;
+
+/**
+ * Load balancing service is provided per VIP.
+ * In this data model, a VIP can be a whole prefix.
+ * But load balancing only
+ * occurs on a per-source-address/port basis. Meaning that if a given source
+ * reuses the same port for multiple destinations within the same VIP,
+ * they will be considered as a single flow.
+ */
+typedef struct {
+
+ //Runtime
+
+ /**
+ * Vector mapping (flow-hash & new_connect_table_mask) to AS index.
+ * This is used for new flows.
+ */
+ lb_new_flow_entry_t *new_flow_table;
+
+ /**
+ * New flows table length - 1
+ * (length MUST be a power of 2)
+ */
+ u32 new_flow_table_mask;
+
+ /**
+ * Last time garbage collection was run to free the ASs.
+ */
+ u32 last_garbage_collection;
+
+ //Not runtime
+
+ /**
+ * A Virtual IP represents a given service delivered
+ * by a set of application servers. It can be a single
+ * address or a prefix.
+ * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address
+ * (i.e. ::/96 prefix).
+ */
+ ip46_address_t prefix;
+
+ /**
+ * The VIP prefix length.
+ * In case of IPv4, plen = 96 + ip4_plen.
+ */
+ u8 plen;
+
+ /**
+ * The type of traffic for this.
+ * LB_TYPE_UNDEFINED if unknown.
+ */
+ lb_vip_type_t type;
+
+ /**
+ * Flags related to this VIP.
+ * LB_VIP_FLAGS_USED means the VIP is active.
+ * When it is not set, the VIP in the process of being removed.
+ * We cannot immediately remove a VIP because the VIP index still may be stored
+ * in the adjacency index.
+ */
+ u8 flags;
+#define LB_VIP_FLAGS_USED 0x1
+
+ /**
+ * Pool of AS indexes used for this VIP.
+ * This also includes ASs that have been removed (but are still referenced).
+ */
+ u32 *as_indexes;
+} lb_vip_t;
+
+#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
+#define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
+format_function_t format_lb_vip;
+format_function_t format_lb_vip_detailed;
+
+typedef struct {
+ /**
+ * Each CPU has its own sticky flow hash table.
+ * One single table is used for all VIPs.
+ */
+ lb_hash_t *sticky_ht;
+} lb_per_cpu_t;
+
+typedef struct {
+ /**
+ * Pool of all Virtual IPs
+ */
+ lb_vip_t *vips;
+
+ /**
+ * Pool of ASs.
+ * ASs are referenced by address and vip index.
+ * The first element (index 0) is special and used only to fill
+ * new_flow_tables when no AS has been configured.
+ */
+ lb_as_t *ass;
+
+ /**
+ * Each AS has an associated reference counter.
+ * As ass[0] has a special meaning, its associated counter
+ * starts at 0 and is decremented instead. i.e. do not use it.
+ */
+ vlib_refcount_t as_refcount;
+
+ /**
+ * Some global data is per-cpu
+ */
+ lb_per_cpu_t *per_cpu;
+
+ /**
+ * Node next index for IP adjacencies, for each of the traffic types.
+ */
+ u32 ip_lookup_next_index[LB_VIP_N_TYPES];
+
+ /**
+ * Source address used in IPv6 encapsulated traffic
+ */
+ ip6_address_t ip6_src_address;
+
+ /**
+ * Source address used for IPv4 encapsulated traffic
+ */
+ ip4_address_t ip4_src_address;
+
+ /**
+ * Number of buckets in the per-cpu sticky hash table.
+ */
+ u32 per_cpu_sticky_buckets;
+
+ /**
+ * Flow timeout in seconds.
+ */
+ u32 flow_timeout;
+
+ /**
+ * Per VIP counter
+ */
+ vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS];
+
+ /**
+ * DPO used to send packet from IP4/6 lookup to LB node.
+ */
+ dpo_type_t dpo_gre4_type;
+ dpo_type_t dpo_gre6_type;
+
+ /**
+ * Node type for registering to fib changes.
+ */
+ fib_node_type_t fib_node_type;
+
+ /**
+ * API dynamically registered base ID.
+ */
+ u16 msg_id_base;
+
+ volatile u32 *writer_lock;
+} lb_main_t;
+
+extern lb_main_t lb_main;
+extern vlib_node_registration_t lb6_node;
+extern vlib_node_registration_t lb4_node;
+
+/**
+ * Fix global load-balancer parameters.
+ * @param ip4_address IPv4 source address used for encapsulated traffic
+ * @param ip6_address IPv6 source address used for encapsulated traffic
+ * @return 0 on success. VNET_LB_ERR_XXX on error
+ */
+int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
+ u32 sticky_buckets, u32 flow_timeout);
+
+int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type,
+ u32 new_length, u32 *vip_index);
+int lb_vip_del(u32 vip_index);
+
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
+
+#define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index))
+
+int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
+int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
+
+u32 lb_hash_time_now(vlib_main_t * vm);
+
+void lb_garbage_collection();
+
+format_function_t format_lb_main;
+
+#endif /* LB_PLUGIN_LB_LB_H_ */
diff --git a/src/plugins/lb/lb_plugin_doc.md b/src/plugins/lb/lb_plugin_doc.md
new file mode 100644
index 00000000..c7885ffb
--- /dev/null
+++ b/src/plugins/lb/lb_plugin_doc.md
@@ -0,0 +1,141 @@
+# Load Balancer plugin for VPP {#lb_plugin_doc}
+
+## Version
+
+The load balancer plugin is currently in *beta* version.
+Both CLIs and APIs are subject to *heavy* changes.
+Wich also means feedback is really welcome regarding features, apis, etc...
+
+## Overview
+
+This plugin provides load balancing for VPP in a way that is largely inspired
+from Google's MagLev: http://research.google.com/pubs/pub44824.html
+
+The load balancer is configured with a set of Virtual IPs (VIP, which can be
+prefixes), and for each VIP, with a set of Application Server addresses (ASs).
+
+Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards
+the different ASs in a way that (tries to) ensure that a given session will
+always be tunneled to the same AS.
+
+Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
+the same encap. type (i.e. IPv4+GRE or IPv6+GRE). Meaning that for a given VIP,
+all AS addresses must be of the same family.
+
+## Performances
+
+The load balancer has been tested up to 1 millions flows and still forwards more
+than 3Mpps per core in such circumstances.
+Although 3Mpps seems already good, it is likely that performances will be improved
+in next versions.
+
+## Configuration
+
+### Global LB parameters
+
+The load balancer needs to be configured with some parameters:
+
+ lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
+ [buckets <n>] [timeout <s>]
+
+ip4-src-address: the source address used to send encap. packets using IPv4.
+
+ip6-src-address: the source address used to send encap. packets using IPv6.
+
+buckets: the *per-thread* established-connexions-table number of buckets.
+
+timeout: the number of seconds a connection will remain in the
+ established-connexions-table while no packet for this flow
+ is received.
+
+
+### Configure the VIPs
+
+ lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]
+
+new_len is the size of the new-connection-table. It should be 1 or 2 orders of
+magnitude bigger than the number of ASs for the VIP in order to ensure a good
+load balancing.
+
+Examples:
+
+ lb vip 2002::/16 encap gre6 new_len 1024
+ lb vip 2003::/16 encap gre4 new_len 2048
+ lb vip 80.0.0.0/8 encap gre6 new_len 16
+ lb vip 90.0.0.0/8 encap gre4 new_len 1024
+
+### Configure the ASs (for each VIP)
+
+ lb as <vip-prefix> [<address> [<address> [...]]] [del]
+
+You can add (or delete) as many ASs at a time (for a single VIP).
+Note that the AS address family must correspond to the VIP encap. IP family.
+
+Examples:
+
+ lb as 2002::/16 2001::2 2001::3 2001::4
+ lb as 2003::/16 10.0.0.1 10.0.0.2
+ lb as 80.0.0.0/8 2001::2
+ lb as 90.0.0.0/8 10.0.0.1
+
+
+
+## Monitoring
+
+The plugin provides quite a bunch of counters and information.
+These are still subject to quite significant changes.
+
+ show lb
+ show lb vip
+ show lb vip verbose
+
+ show node counters
+
+
+## Design notes
+
+### Multi-Threading
+
+MagLev is a distributed system which pseudo-randomly generates a
+new-connections-table based on AS names such that each server configured with
+the same set of ASs ends up with the same table. Connection stickyness is then
+ensured with an established-connections-table. Using ECMP, it is assumed (but
+not relied on) that servers will mostly receive traffic for different flows.
+
+This implementation pushes the parallelism a little bit further by using
+one established-connections table per thread. This is equivalent to assuming
+that RSS will make a job similar to ECMP, and is pretty useful as threads don't
+need to get a lock in order to write in the table.
+
+### Hash Table
+
+A load balancer requires an efficient read and write hash table. The hash table
+used by ip6-forward is very read-efficient, but not so much for writing. In
+addition, it is not a big deal if writing into the hash table fails (again,
+MagLev uses a flow table but does not heaviliy relies on it).
+
+The plugin therefore uses a very specific (and stupid) hash table.
+ - Fixed (and power of 2) number of buckets (configured at runtime)
+ - Fixed (and power of 2) elements per buckets (configured at compilation time)
+
+### Reference counting
+
+When an AS is removed, there is two possible ways to react.
+ - Keep using the AS for established connections
+ - Change AS for established connections (likely to cause error for TCP)
+
+In the first case, although an AS is removed from the configuration, its
+associated state needs to stay around as long as it is used by at least one
+thread.
+
+In order to avoid locks, a specific reference counter is used. The design is quite
+similar to clib counters but:
+ - It is possible to decrease the value
+ - Summing will not zero the per-thread counters
+ - Only the thread can reallocate its own counters vector (to avoid concurrency issues)
+
+This reference counter is lock free, but reading a count of 0 does not mean
+the value can be freed unless it is ensured by *other* means that no other thread
+is concurrently referencing the object. In the case of this plugin, it is assumed
+that no concurrent event will take place after a few seconds.
+
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
new file mode 100644
index 00000000..9b30c18d
--- /dev/null
+++ b/src/plugins/lb/lb_test.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <lb/lb.h>
+
+#define __plugin_msg_base lb_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+//TODO: Move that to vat/plugin_api.c
+//////////////////////////
+uword unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
+ ip46_address_mask_ip4(ip46);
+ return 1;
+ } else if ((type != IP46_TYPE_IP4) &&
+ unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
+ return 1;
+ }
+ return 0;
+}
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+/////////////////////////
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <lb/lb.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <lb/lb.api.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <lb/lb.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <lb/lb.api.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lb/lb.api.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} lb_test_main_t;
+
+lb_test_main_t lb_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(lb_conf_reply) \
+_(lb_add_del_vip_reply) \
+_(lb_add_del_as_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = lb_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+ _(LB_CONF_REPLY, lb_conf_reply) \
+ _(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \
+ _(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply)
+
+static int api_lb_conf (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_lb_conf_t mps, *mp;
+ int ret;
+
+ if (!unformat(i, "%U %U %u %u",
+ unformat_ip4_address, &mps.ip4_src_address,
+ unformat_ip6_address, mps.ip6_src_address,
+ &mps.sticky_buckets_per_core,
+ &mps.flow_timeout)) {
+ errmsg ("invalid arguments\n");
+ return -99;
+ }
+
+ M(LB_CONF, mp);
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_lb_add_del_vip (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_lb_add_del_vip_t mps, *mp;
+ int ret;
+ mps.is_del = 0;
+ mps.is_gre4 = 0;
+
+ if (!unformat(i, "%U",
+ unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) {
+ errmsg ("invalid prefix\n");
+ return -99;
+ }
+
+ if (unformat(i, "gre4")) {
+ mps.is_gre4 = 1;
+ } else if (unformat(i, "gre6")) {
+ mps.is_gre4 = 0;
+ } else {
+ errmsg ("no encap\n");
+ return -99;
+ }
+
+ if (!unformat(i, "%d", &mps.new_flows_table_length)) {
+ errmsg ("no table lentgh\n");
+ return -99;
+ }
+
+ if (unformat(i, "del")) {
+ mps.is_del = 1;
+ }
+
+ M(LB_ADD_DEL_VIP, mp);
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_lb_add_del_as (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_lb_add_del_as_t mps, *mp;
+ int ret;
+ mps.is_del = 0;
+
+ if (!unformat(i, "%U %U",
+ unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY,
+ unformat_ip46_address, mps.as_address)) {
+ errmsg ("invalid prefix or address\n");
+ return -99;
+ }
+
+ if (unformat(i, "del")) {
+ mps.is_del = 1;
+ }
+
+ M(LB_ADD_DEL_AS, mp);
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \
+_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \
+_(lb_add_del_as, "<vip-ip-prefix> <address> [del]")
+
+static void
+lb_vat_api_hookup (vat_main_t *vam)
+{
+ lb_test_main_t * lbtm = &lb_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + lbtm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ lb_test_main_t * lbtm = &lb_test_main;
+
+ u8 * name;
+
+ lbtm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "lb_%08x%c", api_version, 0);
+ lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (lbtm->msg_id_base != (u16) ~0)
+ lb_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h
new file mode 100644
index 00000000..c514fb57
--- /dev/null
+++ b/src/plugins/lb/lbhash.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * vppinfra already includes tons of different hash tables.
+ * MagLev flow table is a bit different. It has to be very efficient
+ * for both writing and reading operations. But it does not need to
+ * be 100% reliable (write can fail). It also needs to recycle
+ * old entries in a lazy way.
+ *
+ * This hash table is the most dummy hash table you can do.
+ * Fixed total size, fixed bucket size.
+ * Advantage is that it could be very efficient (maybe).
+ *
+ */
+
+#ifndef LB_PLUGIN_LB_LBHASH_H_
+#define LB_PLUGIN_LB_LBHASH_H_
+
+#include <vnet/vnet.h>
+
+#if defined (__SSE4_2__)
+#include <immintrin.h>
+#endif
+
+/*
+ * @brief Number of entries per bucket.
+ */
+#define LBHASH_ENTRY_PER_BUCKET 4
+
+#define LB_HASH_DO_NOT_USE_SSE_BUCKETS 0
+
+/*
+ * @brief One bucket contains 4 entries.
+ * Each bucket takes one 64B cache line in memory.
+ */
+typedef struct {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 hash[LBHASH_ENTRY_PER_BUCKET];
+ u32 timeout[LBHASH_ENTRY_PER_BUCKET];
+ u32 vip[LBHASH_ENTRY_PER_BUCKET];
+ u32 value[LBHASH_ENTRY_PER_BUCKET];
+} lb_hash_bucket_t;
+
+typedef struct {
+ u32 buckets_mask;
+ u32 timeout;
+ lb_hash_bucket_t buckets[];
+} lb_hash_t;
+
+#define lb_hash_nbuckets(h) (((h)->buckets_mask) + 1)
+#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET)
+
+#define lb_hash_foreach_bucket(h, bucket) \
+ for (bucket = (h)->buckets; \
+ bucket < (h)->buckets + lb_hash_nbuckets(h); \
+ bucket++)
+
+#define lb_hash_foreach_entry(h, bucket, i) \
+ lb_hash_foreach_bucket(h, bucket) \
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++)
+
+#define lb_hash_foreach_valid_entry(h, bucket, i, now) \
+ lb_hash_foreach_entry(h, bucket, i) \
+ if (!clib_u32_loop_gt((now), bucket->timeout[i]))
+
+static_always_inline
+lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
+{
+ if (!is_pow2(buckets))
+ return NULL;
+
+ // Allocate 1 more bucket for prefetch
+ u32 size = ((u64)&((lb_hash_t *)(0))->buckets[0]) +
+ sizeof(lb_hash_bucket_t) * (buckets + 1);
+ u8 *mem = 0;
+ lb_hash_t *h;
+ vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
+ h = (lb_hash_t *)mem;
+ h->buckets_mask = (buckets - 1);
+ h->timeout = timeout;
+ return h;
+}
+
+static_always_inline
+void lb_hash_free(lb_hash_t *h)
+{
+ u8 *mem = (u8 *)h;
+ vec_free(mem);
+}
+
+#if __SSE4_2__ && !defined (__i386__)
+static_always_inline
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
+{
+ u64 val = 0;
+ val = _mm_crc32_u64(val, k0);
+ val = _mm_crc32_u64(val, k1);
+ val = _mm_crc32_u64(val, k2);
+ val = _mm_crc32_u64(val, k3);
+ val = _mm_crc32_u64(val, k4);
+ return (u32) val;
+}
+#else
+static_always_inline
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
+{
+ u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4;
+ return (u32)clib_xxhash (tmp);
+}
+#endif
+
+static_always_inline
+void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash)
+{
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ CLIB_PREFETCH(bucket, sizeof(*bucket), READ);
+}
+
+static_always_inline
+void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now,
+ u32 *available_index, u32 *found_value)
+{
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ *found_value = ~0;
+ *available_index = ~0;
+#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0
+ u32 bitmask, found_index;
+ __m128i mask;
+
+ // mask[*] = timeout[*] > now
+ mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout),
+ _mm_set1_epi32 (time_now));
+ // bitmask[*] = now <= timeout[*/4]
+ bitmask = (~_mm_movemask_epi8(mask)) & 0xffff;
+ // Get first index with now <= timeout[*], if any.
+ *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index;
+
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->hash),
+ _mm_set1_epi32 (hash)));
+
+ // Load the array of vip values
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->vip),
+ _mm_set1_epi32 (vip)));
+
+ // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip)
+ bitmask = _mm_movemask_epi8(mask);
+ // Get first index, if any
+ found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0;
+ ASSERT(found_index < 4);
+ *found_value = (bitmask)?bucket->value[found_index]:*found_value;
+ bucket->timeout[found_index] =
+ (bitmask)?time_now + ht->timeout:bucket->timeout[found_index];
+#else
+ u32 i;
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) {
+ u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip);
+ u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]);
+ *found_value = (cmp || timeouted)?*found_value:bucket->value[i];
+ bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i];
+ *available_index = (timeouted && (*available_index == ~0))?i:*available_index;
+
+ if (!cmp)
+ return;
+ }
+#endif
+}
+
+static_always_inline
+u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index)
+{
+ return h->buckets[hash & h->buckets_mask].value[available_index];
+}
+
+static_always_inline
+void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip,
+ u32 available_index, u32 time_now)
+{
+ lb_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask];
+ bucket->hash[available_index] = hash;
+ bucket->value[available_index] = value;
+ bucket->timeout[available_index] = time_now + h->timeout;
+ bucket->vip[available_index] = vip;
+}
+
+static_always_inline
+u32 lb_hash_elts(lb_hash_t *h, u32 time_now)
+{
+ u32 tot = 0;
+ lb_hash_bucket_t *bucket;
+ u32 i;
+ lb_hash_foreach_valid_entry(h, bucket, i, time_now) {
+ tot++;
+ }
+ return tot;
+}
+
+#endif /* LB_PLUGIN_LB_LBHASH_H_ */
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
new file mode 100644
index 00000000..4a7485eb
--- /dev/null
+++ b/src/plugins/lb/node.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+
+#include <vnet/gre/packet.h>
+#include <lb/lbhash.h>
+
+#define foreach_lb_error \
+ _(NONE, "no error") \
+ _(PROTO_NOT_SUPPORTED, "protocol not supported")
+
+typedef enum {
+#define _(sym,str) LB_ERROR_##sym,
+ foreach_lb_error
+#undef _
+ LB_N_ERROR,
+} lb_error_t;
+
+static char *lb_error_strings[] = {
+#define _(sym,string) string,
+ foreach_lb_error
+#undef _
+};
+
+typedef struct {
+ u32 vip_index;
+ u32 as_index;
+} lb_trace_t;
+
+u8 *
+format_lb_trace (u8 * s, va_list * args)
+{
+ lb_main_t *lbm = &lb_main;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lb_trace_t *t = va_arg (*args, lb_trace_t *);
+ if (pool_is_free_index(lbm->vips, t->vip_index)) {
+ s = format(s, "lb vip[%d]: This VIP was freed since capture\n");
+ } else {
+ s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]);
+ }
+ if (pool_is_free_index(lbm->ass, t->as_index)) {
+ s = format(s, "lb as[%d]: This AS was freed since capture\n");
+ } else {
+ s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]);
+ }
+ return s;
+}
+
+lb_hash_t *lb_get_sticky_table(u32 thread_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
+ //Check if size changed
+ if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
+ {
+ //Dereference everything in there
+ lb_hash_bucket_t *b;
+ u32 i;
+ lb_hash_foreach_entry(sticky_ht, b, i) {
+ vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
+ vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
+ }
+
+ lb_hash_free(sticky_ht);
+ sticky_ht = NULL;
+ }
+
+ //Create if necessary
+ if (PREDICT_FALSE(sticky_ht == NULL)) {
+ lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
+ sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
+ clib_warning("Regenerated sticky table %p", sticky_ht);
+ }
+
+ ASSERT(sticky_ht);
+
+ //Update timeout
+ sticky_ht->timeout = lbm->flow_timeout;
+ return sticky_ht;
+}
+
+u64
+lb_node_get_other_ports4(ip4_header_t *ip40)
+{
+ return 0;
+}
+
+u64
+lb_node_get_other_ports6(ip6_header_t *ip60)
+{
+ return 0;
+}
+
+static_always_inline u32
+lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4)
+{
+ u32 hash;
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ u64 ports;
+ ip40 = vlib_buffer_get_current (p);
+ if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP ||
+ ip40->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip40 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports4(ip40);
+
+ hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports,
+ 0, 0, 0);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p);
+ u64 ports;
+ if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP ||
+ ip60->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip60 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports6(ip60);
+
+ hash = lb_hash_hash(ip60->src_address.as_u64[0],
+ ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0],
+ ip60->dst_address.as_u64[1],
+ ports);
+ }
+ return hash;
+}
+
+static_always_inline uword
+lb_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame,
+ u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
+ u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
+{
+ lb_main_t *lbm = &lb_main;
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ u32 thread_index = vlib_get_thread_index();
+ u32 lb_time = lb_hash_time_now(vm);
+
+ lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ u32 nexthash0 = 0;
+ if (PREDICT_TRUE(n_left_from > 0))
+ nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ lb_vip_t *vip0;
+ u32 asindex0;
+ u16 len0;
+ u32 available_index0;
+ u8 counter = 0;
+ u32 hash0 = nexthash0;
+
+ if (PREDICT_TRUE(n_left_from > 1))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
+ //Compute next hash and prefetch bucket
+ nexthash0 = lb_node_get_hash(p1, is_input_v4);
+ lb_hash_prefetch_bucket(sticky_ht, nexthash0);
+ //Prefetch for encap, next
+ CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE);
+ }
+
+ if (PREDICT_TRUE(n_left_from > 2))
+ {
+ vlib_buffer_t *p2;
+ p2 = vlib_get_buffer(vm, from[2]);
+ /* prefetch packet header and data */
+ vlib_prefetch_buffer_header(p2, STORE);
+ CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vip0 = pool_elt_at_index (lbm->vips,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ ip40 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip40->length);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+ }
+
+ lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ lb_time, &available_index0, &asindex0);
+
+ if (PREDICT_TRUE(asindex0 != ~0))
+ {
+ //Found an existing entry
+ counter = LB_VIP_COUNTER_NEXT_PACKET;
+ }
+ else if (PREDICT_TRUE(available_index0 != ~0))
+ {
+ //There is an available slot for a new flow
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_FIRST_PACKET;
+ counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter;
+
+ //TODO: There are race conditions with as0 and vip0 manipulation.
+ //Configuration may be changed, vectors resized, etc...
+
+ //Dereference previously used
+ vlib_refcount_add(&lbm->as_refcount, thread_index,
+ lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
+ vlib_refcount_add(&lbm->as_refcount, thread_index,
+ asindex0, 1);
+
+ //Add sticky entry
+ //Note that when there is no AS configured, an entry is configured anyway.
+ //But no configured AS is not something that should happen
+ lb_hash_put(sticky_ht, hash0, asindex0,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ available_index0, lb_time);
+ }
+ else
+ {
+ //Could not store new entry in the table
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_UNTRACKED_PACKET;
+ }
+
+ vlib_increment_simple_counter(&lbm->vip_counters[counter],
+ thread_index,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ 1);
+
+ //Now let's encap
+ {
+ gre_header_t *gre0;
+ if (is_encap_v4)
+ {
+ ip4_header_t *ip40;
+ vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip40 + 1);
+ ip40->src_address = lbm->ip4_src_address;
+ ip40->dst_address = lbm->ass[asindex0].address.ip4;
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->ttl = 128;
+ ip40->fragment_id = 0;
+ ip40->flags_and_fragment_offset = 0;
+ ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+ ip40->protocol = IP_PROTOCOL_GRE;
+ ip40->checksum = ip4_header_checksum (ip40);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip60 + 1);
+ ip60->dst_address = lbm->ass[asindex0].address.ip6;
+ ip60->src_address = lbm->ip6_src_address;
+ ip60->hop_limit = 128;
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+ ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+ ip60->protocol = IP_PROTOCOL_GRE;
+ }
+
+ gre0->flags_and_version = 0;
+ gre0->protocol = (is_input_v4)?
+ clib_host_to_net_u16(0x0800):
+ clib_host_to_net_u16(0x86DD);
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->as_index = asindex0;
+ tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ }
+
+ //Enqueue to next
+ //Note that this is going to error if asindex0 == 0
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ lbm->ass[asindex0].dpo.dpoi_next_node);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+lb6_gre6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 0, 0);
+}
+
+static uword
+lb6_gre4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 0, 1);
+}
+
+static uword
+lb4_gre6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 1, 0);
+}
+
+static uword
+lb4_gre4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 1, 1);
+}
+
+VLIB_REGISTER_NODE (lb6_gre6_node) =
+{
+ .function = lb6_gre6_node_fn,
+ .name = "lb6-gre6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb6_gre4_node) =
+{
+ .function = lb6_gre4_node_fn,
+ .name = "lb6-gre4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb4_gre6_node) =
+{
+ .function = lb4_gre6_node_fn,
+ .name = "lb4-gre6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb4_gre4_node) =
+{
+ .function = lb4_gre4_node_fn,
+ .name = "lb4-gre4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c
new file mode 100644
index 00000000..6f01ab5a
--- /dev/null
+++ b/src/plugins/lb/refcount.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/refcount.h>
+
+void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size)
+{
+ u32 *new_counter = 0, *old_counter;
+ vec_validate(new_counter, size);
+ memcpy(new_counter, per_cpu->counters, per_cpu->length);
+ old_counter = per_cpu->counters;
+ per_cpu->counters = new_counter;
+ CLIB_MEMORY_BARRIER();
+ per_cpu->length = vec_len(new_counter);
+ vec_free(old_counter);
+}
+
+u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
+{
+ u64 count = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 thread_index;
+ for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) {
+ if (r->per_cpu[thread_index].length > index)
+ count += r->per_cpu[thread_index].counters[index];
+ }
+ return count;
+}
+
diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h
new file mode 100644
index 00000000..dcfcb3fe
--- /dev/null
+++ b/src/plugins/lb/refcount.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * vlib provides lock-free counters but those
+ * - Have 16bits per-CPU counter, which may overflow.
+ * - Would only increment.
+ *
+ * This is very similar to vlib counters, but may be used to count reference.
+ * Such a counter includes an arbitrary number of counters. Each counter
+ * is identified by its index. This is used to aggregate per-cpu memory.
+ *
+ * Warning:
+ * This reference counter is lock-free but is not race-condition free.
+ * The counting result is approximate and another mechanism needs to be used
+ * in order to ensure that an object may be freed.
+ *
+ */
+
+#include <vnet/vnet.h>
+
+typedef struct {
+ u32 *counters;
+ u32 length;
+ u32 *reader_lengths;
+ CLIB_CACHE_LINE_ALIGN_MARK(o);
+} vlib_refcount_per_cpu_t;
+
+typedef struct {
+ vlib_refcount_per_cpu_t *per_cpu;
+} vlib_refcount_t;
+
+void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
+
+static_always_inline
+void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
+{
+ vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
+ if (PREDICT_FALSE(counter_index >= per_cpu->length))
+ __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
+
+ per_cpu->counters[counter_index] += v;
+}
+
+u64 vlib_refcount_get(vlib_refcount_t *r, u32 index);
+
+static_always_inline
+void vlib_refcount_init(vlib_refcount_t *r)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ r->per_cpu = 0;
+ vec_validate (r->per_cpu, tm->n_vlib_mains - 1);
+}
+
+
diff --git a/src/plugins/lb/util.c b/src/plugins/lb/util.c
new file mode 100644
index 00000000..d969d168
--- /dev/null
+++ b/src/plugins/lb/util.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/util.h>
+
+void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen)
+{
+ if (plen == 0) {
+ prefix->as_u64[0] = 0;
+ prefix->as_u64[1] = 0;
+ } else if (plen <= 64) {
+ prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen));
+ prefix->as_u64[1] = 0;
+ } else {
+ prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen));
+ }
+
+}
+
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+
+u8 *format_ip46_prefix (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u32 len = va_arg (*args, u32); //va_arg cannot use u8 or u16
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ int is_ip4 = 0;
+ if (type == IP46_TYPE_IP4)
+ is_ip4 = 1;
+ else if (type == IP46_TYPE_IP6)
+ is_ip4 = 0;
+ else
+ is_ip4 = (len >= 96) && ip46_address_is_ip4(ip46);
+
+ return is_ip4 ?
+ format(s, "%U/%d", format_ip4_address, &ip46->ip4, len - 96):
+ format(s, "%U/%d", format_ip6_address, &ip46->ip6, len);
+}
+
diff --git a/src/plugins/lb/util.h b/src/plugins/lb/util.h
new file mode 100644
index 00000000..3f082310
--- /dev/null
+++ b/src/plugins/lb/util.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Non-LB specific stuff comes here
+ */
+
+#ifndef LB_PLUGIN_LB_UTIL_H_
+#define LB_PLUGIN_LB_UTIL_H_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6)
+#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46))
+#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6)
+
+void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen);
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args);
+u8 *format_ip46_prefix (u8 * s, va_list * args);
+
+/**
+ * 32 bits integer comparison for running values.
+ * 1 > 0 is true. But 1 > 0xffffffff also is.
+ */
+#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff)
+
+#endif /* LB_PLUGIN_LB_UTIL_H_ */
diff --git a/src/plugins/memif.am b/src/plugins/memif.am
new file mode 100644
index 00000000..15147e77
--- /dev/null
+++ b/src/plugins/memif.am
@@ -0,0 +1,37 @@
+# Copyright (c) 2017 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppplugins_LTLIBRARIES += memif_plugin.la
+vppapitestplugins_LTLIBRARIES += memif_test_plugin.la
+
+memif_plugin_la_SOURCES = memif/memif.c \
+ memif/memif_api.c \
+ memif/cli.c \
+ memif/node.c \
+ memif/device.c \
+ memif/socket.c \
+ memif/memif_plugin.api.h
+
+memif_test_plugin_la_SOURCES = \
+ memif/memif_test.c memif/memif_plugin.api.h
+
+noinst_HEADERS += memif/memif.h
+
+nobase_apiinclude_HEADERS += \
+ memif/memif_all_api_h.h \
+ memif/memif_msg_enum.h \
+ memif/memif.api.h
+
+API_FILES += memif/memif.api
+
+# vi:syntax=automake
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c
new file mode 100644
index 00000000..e1bd0444
--- /dev/null
+++ b/src/plugins/memif/cli.c
@@ -0,0 +1,365 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <memif/memif.h>
+#include <memif/private.h>
+
+static clib_error_t *
+memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int r;
+ u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
+ memif_create_if_args_t args = { 0 };
+ args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
+ u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "id %u", &args.id))
+ ;
+ else if (unformat (line_input, "socket %s", &args.socket_filename))
+ ;
+ else if (unformat (line_input, "secret %s", &args.secret))
+ ;
+ else if (unformat (line_input, "ring-size %u", &ring_size))
+ ;
+ else if (unformat (line_input, "rx-queues %u", &rx_queues))
+ ;
+ else if (unformat (line_input, "tx-queues %u", &tx_queues))
+ ;
+ else if (unformat (line_input, "buffer-size %u", &args.buffer_size))
+ ;
+ else if (unformat (line_input, "master"))
+ args.is_master = 1;
+ else if (unformat (line_input, "slave"))
+ args.is_master = 0;
+ else if (unformat (line_input, "mode ip"))
+ args.mode = MEMIF_INTERFACE_MODE_IP;
+ else if (unformat (line_input, "hw-addr %U",
+ unformat_ethernet_address, args.hw_addr))
+ args.hw_addr_set = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (!is_pow2 (ring_size))
+ return clib_error_return (0, "ring size must be power of 2");
+
+ args.log2_ring_size = min_log2 (ring_size);
+
+ if (rx_queues > 255 || rx_queues < 1)
+ return clib_error_return (0, "rx queue must be between 1 - 255");
+ if (tx_queues > 255 || tx_queues < 1)
+ return clib_error_return (0, "tx queue must be between 1 - 255");
+
+ args.rx_queues = rx_queues;
+ args.tx_queues = tx_queues;
+
+ r = memif_create_if (vm, &args);
+
+ vec_free (args.socket_filename);
+ vec_free (args.secret);
+
+ if (r <= VNET_API_ERROR_SYSCALL_ERROR_1
+ && r >= VNET_API_ERROR_SYSCALL_ERROR_10)
+ return clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ return clib_error_return (0, "Invalid interface name");
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ return clib_error_return (0, "Interface with same id already exists");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (memif_create_command, static) = {
+ .path = "create memif",
+ .short_help = "create memif [id <id>] [socket <path>] "
+ "[ring-size <size>] [buffer-size <size>] [hw-addr <mac-address>] "
+ "<master|slave> [rx-queues <number>] [tx-queues <number>] "
+ "[mode ip] [secret <string>]",
+ .function = memif_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_hw_interface_t *hw;
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == NULL || memif_device_class.index != hw->dev_class_index)
+ return clib_error_return (0, "not a memif interface");
+
+ mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+ memif_delete_if (vm, mif);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (memif_delete_command, static) = {
+ .path = "delete memif",
+ .short_help = "delete memif {<interface> | sw_if_index <sw_idx>}",
+ .function = memif_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_memif_if_flags (u8 * s, va_list * args)
+{
+ u32 flags = va_arg (*args, u32);
+#define _(a,b,c) if ( flags & (1 << a)) s = format (s, " %s", c);
+ foreach_memif_if_flag
+#undef _
+ return s;
+}
+
+static u8 *
+format_memif_if_mode (u8 * s, va_list * args)
+{
+ memif_if_t *mif = va_arg (*args, memif_if_t *);
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ return format (s, "ethernet");
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ return format (s, "ip");
+ if (mif->mode == MEMIF_INTERFACE_MODE_PUNT_INJECT)
+ return format (s, "punt-inject");
+ return format (s, "unknown mode (%u)", mif->mode);;
+}
+
+static u8 *
+format_memif_queue (u8 * s, va_list * args)
+{
+ memif_if_t *mif = va_arg (*args, memif_if_t *);
+ memif_queue_t *mq = va_arg (*args, memif_queue_t *);
+ uword i = va_arg (*args, uword);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U%s ring %u:\n",
+ format_white_space, indent,
+ (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ?
+ "slave-to-master" : "master-to-slave", i);
+ s = format (s, "%Uregion %u offset %u ring-size %u int-fd %d\n",
+ format_white_space, indent + 4,
+ mq->region, mq->offset, (1 << mq->log2_ring_size), mq->int_fd);
+
+ if (mq->ring)
+ s = format (s, "%Uhead %u tail %u flags 0x%04x interrupts %u\n",
+ format_white_space, indent + 4,
+ mq->ring->head, mq->ring->tail, mq->ring->flags,
+ mq->int_count);
+
+ return s;
+}
+
+static u8 *
+format_memif_descriptor (u8 * s, va_list * args)
+{
+ memif_if_t *mif = va_arg (*args, memif_if_t *);
+ memif_queue_t *mq = va_arg (*args, memif_queue_t *);
+ uword indent = format_get_indent (s);
+ memif_ring_t *ring;
+ u16 ring_size;
+ u16 slot;
+
+ ring_size = 1 << mq->log2_ring_size;
+ ring = mq->ring;
+ if (ring)
+ {
+ s = format (s, "%Udescriptor table:\n", format_white_space, indent);
+ s =
+ format (s,
+ "%Uid flags buf len desc len address offset user address\n",
+ format_white_space, indent);
+ s =
+ format (s,
+ "%U===== ===== ======= ======== ================== ====== ==================\n",
+ format_white_space, indent);
+ for (slot = 0; slot < ring_size; slot++)
+ {
+ s = format (s, "%U%-5d %-5d %-7d %-7d 0x%016lx %-6d 0x%016lx\n",
+ format_white_space, indent, slot,
+ ring->desc[slot].flags, ring->desc[slot].buffer_length,
+ ring->desc[slot].length,
+ mif->regions[ring->desc[slot].region].shm,
+ ring->desc[slot].offset, memif_get_buffer (mif, ring,
+ slot));
+ }
+ s = format (s, "\n");
+ }
+
+ return s;
+}
+
+static clib_error_t *
+memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif;
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_queue_t *mq;
+ uword i;
+ int show_descr = 0;
+ clib_error_t *error = 0;
+ u32 hw_if_index, *hw_if_indices = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ vec_add1 (hw_if_indices, hw_if_index);
+ else if (unformat (input, "descriptors"))
+ show_descr = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (vec_len (hw_if_indices) == 0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (mif, mm->interfaces,
+ vec_add1 (hw_if_indices, mif->hw_if_index);
+ );
+ /* *INDENT-ON* */
+ }
+
+ for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++)
+ {
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]);
+ mif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
+ memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files,
+ mif->socket_file_index);
+ vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name,
+ vnm, mif->sw_if_index);
+ if (mif->remote_name)
+ vlib_cli_output (vm, " remote-name \"%s\"", mif->remote_name);
+ if (mif->remote_if_name)
+ vlib_cli_output (vm, " remote-interface \"%s\"",
+ mif->remote_if_name);
+ vlib_cli_output (vm, " id %d mode %U file %s", mif->id,
+ format_memif_if_mode, mif, msf->filename);
+ vlib_cli_output (vm, " flags%U", format_memif_if_flags, mif->flags);
+ vlib_cli_output (vm, " listener-fd %d conn-fd %d", msf->fd,
+ mif->conn_fd);
+ vlib_cli_output (vm,
+ " num-s2m-rings %u num-m2s-rings %u buffer-size %u",
+ mif->run.num_s2m_rings, mif->run.num_m2s_rings,
+ mif->run.buffer_size);
+
+ if (mif->local_disc_string)
+ vlib_cli_output (vm, " local-disc-reason \"%s\"",
+ mif->local_disc_string);
+ if (mif->remote_disc_string)
+ vlib_cli_output (vm, " remote-disc-reason \"%s\"",
+ mif->remote_disc_string);
+
+ vec_foreach_index (i, mif->tx_queues)
+ {
+ mq = vec_elt_at_index (mif->tx_queues, i);
+ vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i);
+ if (show_descr)
+ vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq);
+ }
+ vec_foreach_index (i, mif->rx_queues)
+ {
+ mq = vec_elt_at_index (mif->rx_queues, i);
+ vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i);
+ if (show_descr)
+ vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq);
+ }
+ }
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (memif_show_command, static) = {
+ .path = "show memif",
+ .short_help = "show memif {<interface>] [descriptors]",
+ .function = memif_show_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+memif_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (memif_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c
new file mode 100644
index 00000000..aff18f2d
--- /dev/null
+++ b/src/plugins/memif/device.c
@@ -0,0 +1,380 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <memif/memif.h>
+#include <memif/private.h>
+
+#define foreach_memif_tx_func_error \
+_(NO_FREE_SLOTS, "no free tx slots") \
+_(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \
+_(PENDING_MSGS, "pending msgs in tx ring") \
+_(NO_TX_QUEUES, "no tx queues")
+
+typedef enum
+{
+#define _(f,s) MEMIF_TX_ERROR_##f,
+ foreach_memif_tx_func_error
+#undef _
+ MEMIF_TX_N_ERROR,
+} memif_tx_func_error_t;
+
+static char *memif_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_memif_tx_func_error
+#undef _
+};
+
+u8 *
+format_memif_device_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = pool_elt_at_index (mm->interfaces, dev_instance);
+
+ s = format (s, "memif%lu/%lu", mif->socket_file_index, mif->id);
+ return s;
+}
+
+static u8 *
+format_memif_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "MEMIF interface");
+ if (verbose)
+ {
+ s = format (s, "\n%U instance %u", format_white_space, indent + 2,
+ dev_instance);
+ }
+ return s;
+}
+
+static u8 *
+format_memif_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+static_always_inline void
+memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_prefetch_buffer_header (b, LOAD);
+ CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+/**
+ * @brief Copy buffer to tx ring
+ *
+ * @param * vm (in)
+ * @param * node (in)
+ * @param * mif (in) pointer to memif interface
+ * @param bi (in) vlib buffer index
+ * @param * ring (in) pointer to memif ring
+ * @param * head (in/out) ring head
+ * @param mask (in) ring size - 1
+ */
+static_always_inline void
+memif_copy_buffer_to_tx_ring (vlib_main_t * vm, vlib_node_runtime_t * node,
+ memif_if_t * mif, u32 bi, memif_ring_t * ring,
+ u16 * head, u16 mask)
+{
+ vlib_buffer_t *b0;
+ void *mb0;
+ u32 total = 0, len;
+
+ mb0 = memif_get_buffer (mif, ring, *head);
+ ring->desc[*head].flags = 0;
+ do
+ {
+ b0 = vlib_get_buffer (vm, bi);
+ len = b0->current_length;
+ if (PREDICT_FALSE (ring->desc[*head].buffer_length < (total + len)))
+ {
+ if (PREDICT_TRUE (total))
+ {
+ ring->desc[*head].length = total;
+ total = 0;
+ ring->desc[*head].flags |= MEMIF_DESC_FLAG_NEXT;
+ *head = (*head + 1) & mask;
+ mb0 = memif_get_buffer (mif, ring, *head);
+ ring->desc[*head].flags = 0;
+ }
+ }
+ if (PREDICT_TRUE (ring->desc[*head].buffer_length >= (total + len)))
+ {
+ clib_memcpy (mb0 + total, vlib_buffer_get_current (b0),
+ CLIB_CACHE_LINE_BYTES);
+ if (len > CLIB_CACHE_LINE_BYTES)
+ clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES + total,
+ vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES,
+ len - CLIB_CACHE_LINE_BYTES);
+ total += len;
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_TRUNC_PACKET,
+ 1);
+ break;
+ }
+ }
+ while ((bi = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0));
+
+ if (PREDICT_TRUE (total))
+ {
+ ring->desc[*head].length = total;
+ *head = (*head + 1) & mask;
+ }
+}
+
+static_always_inline uword
+memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, memif_if_t * mif,
+ memif_ring_type_t type)
+{
+ u8 qid;
+ memif_ring_t *ring;
+ u32 *buffers = vlib_frame_args (frame);
+ u32 n_left = frame->n_vectors;
+ u16 ring_size, mask;
+ u16 head, tail;
+ u16 free_slots;
+ u32 thread_index = vlib_get_thread_index ();
+ u8 tx_queues = vec_len (mif->tx_queues);
+ memif_queue_t *mq;
+
+ if (PREDICT_FALSE (tx_queues == 0))
+ {
+ vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_TX_QUEUES,
+ n_left);
+ goto error;
+ }
+
+ if (tx_queues < vec_len (vlib_mains))
+ {
+ qid = thread_index % tx_queues;
+ clib_spinlock_lock_if_init (&mif->lockp);
+ }
+ else
+ {
+ qid = thread_index;
+ }
+ mq = vec_elt_at_index (mif->tx_queues, qid);
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ /* free consumed buffers */
+
+ head = ring->head;
+ tail = ring->tail;
+
+ if (tail > head)
+ free_slots = tail - head;
+ else
+ free_slots = ring_size - head + tail;
+
+ while (n_left > 5 && free_slots > 1)
+ {
+ if (PREDICT_TRUE (head + 5 < ring_size))
+ {
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 2),
+ CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 3),
+ CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (&ring->desc[head + 4], CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (&ring->desc[head + 5], CLIB_CACHE_LINE_BYTES, STORE);
+ }
+ else
+ {
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 2) % mask),
+ CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 3) % mask),
+ CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (&ring->desc[(head + 4) % mask],
+ CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (&ring->desc[(head + 5) % mask],
+ CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ memif_prefetch_buffer_and_data (vm, buffers[2]);
+ memif_prefetch_buffer_and_data (vm, buffers[3]);
+
+ memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[0], ring, &head,
+ mask);
+ memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[1], ring, &head,
+ mask);
+
+ buffers += 2;
+ n_left -= 2;
+ free_slots -= 2;
+ }
+
+ while (n_left && free_slots)
+ {
+ memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[0], ring, &head,
+ mask);
+ buffers++;
+ n_left--;
+ free_slots--;
+ }
+
+ CLIB_MEMORY_STORE_BARRIER ();
+ ring->head = head;
+
+ clib_spinlock_unlock_if_init (&mif->lockp);
+
+ if (n_left)
+ {
+ vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
+ n_left);
+ }
+
+ if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
+ {
+ u64 b = 1;
+ CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b));
+ mq->int_count++;
+ }
+
+error:
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+static uword
+memif_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ memif_main_t *nm = &memif_main;
+ vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
+ memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+
+ if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
+ return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M);
+ else
+ return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S);
+}
+
+static void
+memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ memif_main_t *apm = &memif_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ mif->per_interface_next_index = node_index;
+ return;
+ }
+
+ mif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index);
+}
+
+static void
+memif_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
+ vnet_hw_interface_rx_mode mode)
+{
+ memif_main_t *mm = &memif_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT;
+ else
+ mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT;
+
+ return 0;
+}
+
+static clib_error_t *
+memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ memif_main_t *mm = &memif_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+ static clib_error_t *error = 0;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
+ else
+ mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP;
+
+ return error;
+}
+
+static clib_error_t *
+memif_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (memif_device_class) = {
+ .name = "memif",
+ .tx_function = memif_interface_tx,
+ .format_device_name = format_memif_device_name,
+ .format_device = format_memif_device,
+ .format_tx_trace = format_memif_tx_trace,
+ .tx_function_n_errors = MEMIF_TX_N_ERROR,
+ .tx_function_error_strings = memif_tx_func_error_strings,
+ .rx_redirect_to_node = memif_set_interface_next_node,
+ .clear_counters = memif_clear_hw_interface_counters,
+ .admin_up_down_function = memif_interface_admin_up_down,
+ .subif_add_del_function = memif_subif_add_del_function,
+ .rx_mode_change_function = memif_interface_rx_mode_change,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class,
+ memif_interface_tx)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api
new file mode 100644
index 00000000..c9632d10
--- /dev/null
+++ b/src/plugins/memif/memif.api
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Create memory interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param role - role of the interface in the connection (master/slave)
+ @param mode - interface mode
+ @param rx_queues - number of rx queues (only valid for slave)
+ #param tx_queues - number of tx queues (only valid for slave)
+ @param id - 32bit integer used to authenticate and match opposite sides
+ of the connection
+ @param socket_filename - filename of the socket to be used for connection
+ establishment
+ @param ring_size - the number of entries of RX/TX rings
+ @param buffer_size - size of the buffer allocated for each ring entry
+ @param hw_addr - interface MAC address
+*/
+define memif_create
+{
+ u32 client_index;
+ u32 context;
+
+ u8 role; /* 0 = master, 1 = slave */
+ u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */
+ u8 rx_queues; /* optional, default is 1 */
+ u8 tx_queues; /* optional, default is 1 */
+ u32 id; /* optional, default is 0 */
+ u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" */
+ u8 secret[24]; /* optional, default is "" */
+ u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */
+ u16 buffer_size; /* optional, default is 2048 bytes */
+ u8 hw_addr[6]; /* optional, randomly generated if not defined */
+};
+
+/** \brief Create memory interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index of the newly created interface
+*/
+define memif_create_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete memory interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface to delete
+*/
+autoreply define memif_delete
+{
+ u32 client_index;
+ u32 context;
+
+ u32 sw_if_index;
+};
+
+/** \brief Memory interface details structure
+ @param context - sender context, to match reply w/ request (memif_dump)
+ @param sw_if_index - index of the interface
+ @param if_name - name of the interface
+ @param hw_addr - interface MAC address
+ @param id - id associated with the interface
+ @param role - role of the interface in the connection (master/slave)
+ @param mode - interface mode
+ @param socket_filename - name of the socket used by this interface
+ to establish new connections
+ @param ring_size - the number of entries of RX/TX rings
+ @param buffer_size - size of the buffer allocated for each ring entry
+ @param admin_up_down - interface administrative status
+ @param link_up_down - interface link status
+
+*/
+define memif_details
+{
+ u32 context;
+
+ u32 sw_if_index;
+ u8 if_name[64];
+ u8 hw_addr[6];
+
+ /* memif specific parameters */
+ u32 id;
+ u8 role; /* 0 = master, 1 = slave */
+ u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */
+ u8 socket_filename[128];
+ u32 ring_size;
+ u16 buffer_size; /* optional, default is 2048 bytes */
+
+ /* 1 = up, 0 = down */
+ u8 admin_up_down;
+ u8 link_up_down;
+};
+
+/** \brief Dump all memory interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define memif_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c
new file mode 100644
index 00000000..a3be49fa
--- /dev/null
+++ b/src/plugins/memif/memif.c
@@ -0,0 +1,819 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/eventfd.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/linux/syscall.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vpp/app/version.h>
+#include <memif/memif.h>
+#include <memif/private.h>
+
+memif_main_t memif_main;
+
+static u32
+memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+static void
+memif_queue_intfd_close (memif_queue_t * mq)
+{
+ if (mq->int_clib_file_index != ~0)
+ {
+ memif_file_del_by_index (mq->int_clib_file_index);
+ mq->int_clib_file_index = ~0;
+ mq->int_fd = -1;
+ }
+ else if (mq->int_fd > -1)
+ {
+ close (mq->int_fd);
+ mq->int_fd = -1;
+ }
+}
+
+void
+memif_disconnect (memif_if_t * mif, clib_error_t * err)
+{
+ memif_main_t *mm = &memif_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_region_t *mr;
+ memif_queue_t *mq;
+ int i;
+
+ if (mif == 0)
+ return;
+
+ DBG ("disconnect %u (%v)", mif->dev_instance, err ? err->what : 0);
+
+ if (err)
+ {
+ clib_error_t *e = 0;
+ mif->local_disc_string = vec_dup (err->what);
+ if (mif->conn_fd > -1)
+ e = memif_msg_send_disconnect (mif, err);
+ clib_error_free (e);
+ }
+
+ /* set interface down */
+ mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING);
+ if (mif->hw_if_index != ~0)
+ vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0);
+
+ /* close connection socket */
+ if (mif->conn_clib_file_index != ~0)
+ {
+ memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files,
+ mif->socket_file_index);
+ hash_unset (msf->dev_instance_by_fd, mif->conn_fd);
+ memif_file_del_by_index (mif->conn_clib_file_index);
+ mif->conn_clib_file_index = ~0;
+ }
+ else if (mif->conn_fd > -1)
+ close (mif->conn_fd);
+ mif->conn_fd = -1;
+
+ vec_foreach_index (i, mif->rx_queues)
+ {
+ mq = vec_elt_at_index (mif->rx_queues, i);
+ if (mq->ring)
+ {
+ int rv;
+ rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, i);
+ if (rv)
+ DBG ("Warning: unable to unassign interface %d, "
+ "queue %d: rc=%d", mif->hw_if_index, i, rv);
+ mq->ring = 0;
+ }
+ }
+
+ /* free tx and rx queues */
+ vec_foreach (mq, mif->rx_queues) memif_queue_intfd_close (mq);
+ vec_free (mif->rx_queues);
+
+ vec_foreach (mq, mif->tx_queues) memif_queue_intfd_close (mq);
+ vec_free (mif->tx_queues);
+
+ /* free memory regions */
+ vec_foreach (mr, mif->regions)
+ {
+ int rv;
+ if ((rv = munmap (mr->shm, mr->region_size)))
+ clib_warning ("munmap failed, rv = %d", rv);
+ if (mr->fd > -1)
+ close (mr->fd);
+ }
+ vec_free (mif->regions);
+
+ mif->remote_pid = 0;
+ vec_free (mif->remote_name);
+ vec_free (mif->remote_if_name);
+ clib_fifo_free (mif->msg_queue);
+}
+
+static clib_error_t *
+memif_int_fd_read_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 qid = uf->private_data & 0xFFFF;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 16);
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
+ u64 b;
+ ssize_t size;
+
+ size = read (uf->file_descriptor, &b, sizeof (b));
+ if (size < 0)
+ {
+ DBG_UNIX_LOG ("Failed to read from socket");
+ return 0;
+ }
+
+ vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, qid);
+ mq->int_count++;
+
+ return 0;
+}
+
+
+clib_error_t *
+memif_connect (memif_if_t * mif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_file_t template = { 0 };
+ memif_region_t *mr;
+ int i;
+
+ DBG ("connect %u", mif->dev_instance);
+
+ vec_free (mif->local_disc_string);
+ vec_free (mif->remote_disc_string);
+
+ vec_foreach (mr, mif->regions)
+ {
+ if (mr->shm)
+ continue;
+
+ if (mr->fd < 0)
+ clib_error_return (0, "no memory region fd");
+
+ if ((mr->shm = mmap (NULL, mr->region_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, mr->fd, 0)) == MAP_FAILED)
+ return clib_error_return_unix (0, "mmap");
+ }
+
+ template.read_function = memif_int_fd_read_ready;
+
+ vec_foreach_index (i, mif->tx_queues)
+ {
+ memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
+
+ mq->ring = mif->regions[mq->region].shm + mq->offset;
+ if (mq->ring->cookie != MEMIF_COOKIE)
+ return clib_error_return (0, "wrong cookie on tx ring %u", i);
+ }
+
+ vec_foreach_index (i, mif->rx_queues)
+ {
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
+ int rv;
+
+ mq->ring = mif->regions[mq->region].shm + mq->offset;
+ if (mq->ring->cookie != MEMIF_COOKIE)
+ return clib_error_return (0, "wrong cookie on tx ring %u", i);
+
+ if (mq->int_fd > -1)
+ {
+ template.file_descriptor = mq->int_fd;
+ template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF);
+ memif_file_add (&mq->int_clib_file_index, &template);
+ }
+ vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0);
+ rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i,
+ VNET_HW_INTERFACE_RX_MODE_DEFAULT);
+ if (rv)
+ clib_warning
+ ("Warning: unable to set rx mode for interface %d queue %d: "
+ "rc=%d", mif->hw_if_index, i, rv);
+ else
+ {
+ vnet_hw_interface_rx_mode rxmode;
+ vnet_hw_interface_get_rx_mode (vnm, mif->hw_if_index, i, &rxmode);
+
+ if (rxmode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT;
+ }
+ }
+
+ mif->flags &= ~MEMIF_IF_FLAG_CONNECTING;
+ mif->flags |= MEMIF_IF_FLAG_CONNECTED;
+
+ vnet_hw_interface_set_flags (vnm, mif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ return 0;
+}
+
+static_always_inline memif_ring_t *
+memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num)
+{
+ if (vec_len (mif->regions) == 0)
+ return NULL;
+ void *p = mif->regions[0].shm;
+ int ring_size =
+ sizeof (memif_ring_t) +
+ sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size);
+ p += (ring_num + type * mif->run.num_s2m_rings) * ring_size;
+
+ return (memif_ring_t *) p;
+}
+
+clib_error_t *
+memif_init_regions_and_queues (memif_if_t * mif)
+{
+ memif_ring_t *ring = NULL;
+ int i, j;
+ u64 buffer_offset;
+ memif_region_t *r;
+ clib_mem_vm_alloc_t alloc = { 0 };
+ clib_error_t *err;
+
+ vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES);
+ r = vec_elt_at_index (mif->regions, 0);
+
+ buffer_offset = (mif->run.num_s2m_rings + mif->run.num_m2s_rings) *
+ (sizeof (memif_ring_t) +
+ sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size));
+
+ r->region_size = buffer_offset +
+ mif->run.buffer_size * (1 << mif->run.log2_ring_size) *
+ (mif->run.num_s2m_rings + mif->run.num_m2s_rings);
+
+ alloc.name = "memif region";
+ alloc.size = r->region_size;
+ alloc.flags = CLIB_MEM_VM_F_SHARED;
+
+ err = clib_mem_vm_ext_alloc (&alloc);
+ if (err)
+ return err;
+
+ r->fd = alloc.fd;
+ r->shm = alloc.addr;
+
+ for (i = 0; i < mif->run.num_s2m_rings; i++)
+ {
+ ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
+ ring->head = ring->tail = 0;
+ ring->cookie = MEMIF_COOKIE;
+ for (j = 0; j < (1 << mif->run.log2_ring_size); j++)
+ {
+ u16 slot = i * (1 << mif->run.log2_ring_size) + j;
+ ring->desc[j].region = 0;
+ ring->desc[j].offset =
+ buffer_offset + (u32) (slot * mif->run.buffer_size);
+ ring->desc[j].buffer_length = mif->run.buffer_size;
+ }
+ }
+ for (i = 0; i < mif->run.num_m2s_rings; i++)
+ {
+ ring = memif_get_ring (mif, MEMIF_RING_M2S, i);
+ ring->head = ring->tail = 0;
+ ring->cookie = MEMIF_COOKIE;
+ for (j = 0; j < (1 << mif->run.log2_ring_size); j++)
+ {
+ u16 slot =
+ (i + mif->run.num_s2m_rings) * (1 << mif->run.log2_ring_size) + j;
+ ring->desc[j].region = 0;
+ ring->desc[j].offset =
+ buffer_offset + (u32) (slot * mif->run.buffer_size);
+ ring->desc[j].buffer_length = mif->run.buffer_size;
+ }
+ }
+
+ ASSERT (mif->tx_queues == 0);
+ vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach_index (i, mif->tx_queues)
+ {
+ memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
+ if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0)
+ return clib_error_return_unix (0, "eventfd[tx queue %u]", i);
+ mq->int_clib_file_index = ~0;
+ mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
+ mq->log2_ring_size = mif->cfg.log2_ring_size;
+ mq->region = 0;
+ mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm;
+ mq->last_head = 0;
+ }
+
+ ASSERT (mif->rx_queues == 0);
+ vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach_index (i, mif->rx_queues)
+ {
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
+ if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0)
+ return clib_error_return_unix (0, "eventfd[rx queue %u]", i);
+ mq->int_clib_file_index = ~0;
+ mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i);
+ mq->log2_ring_size = mif->cfg.log2_ring_size;
+ mq->region = 0;
+ mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm;
+ mq->last_head = 0;
+ }
+
+ return 0;
+}
+
+static uword
+memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif;
+ struct sockaddr_un sun;
+ int sockfd;
+ uword *event_data = 0, event_type;
+ u8 enabled = 0;
+ f64 start_time, last_run_duration = 0, now;
+
+ sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0);
+ if (sockfd < 0)
+ {
+ DBG_UNIX_LOG ("socket AF_UNIX");
+ return 0;
+ }
+ sun.sun_family = AF_UNIX;
+
+ while (1)
+ {
+ if (enabled)
+ vlib_process_wait_for_event_or_clock (vm, (f64) 3 -
+ last_run_duration);
+ else
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ case ~0:
+ break;
+ case MEMIF_PROCESS_EVENT_START:
+ enabled = 1;
+ break;
+ case MEMIF_PROCESS_EVENT_STOP:
+ enabled = 0;
+ continue;
+ default:
+ ASSERT (0);
+ }
+
+ last_run_duration = start_time = vlib_time_now (vm);
+ /* *INDENT-OFF* */
+ pool_foreach (mif, mm->interfaces,
+ ({
+ memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index);
+ /* Allow no more than 10us without a pause */
+ now = vlib_time_now (vm);
+ if (now > start_time + 10e-6)
+ {
+ vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */
+ start_time = vlib_time_now (vm);
+ }
+
+ if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0)
+ continue;
+
+ if (mif->flags & MEMIF_IF_FLAG_CONNECTING)
+ continue;
+
+ if (mif->flags & MEMIF_IF_FLAG_CONNECTED)
+ continue;
+
+ if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
+ {
+ strncpy (sun.sun_path, (char *) msf->filename,
+ sizeof (sun.sun_path) - 1);
+
+ if (connect
+ (sockfd, (struct sockaddr *) &sun,
+ sizeof (struct sockaddr_un)) == 0)
+ {
+ clib_file_t t = { 0 };
+
+ mif->conn_fd = sockfd;
+ t.read_function = memif_slave_conn_fd_read_ready;
+ t.write_function = memif_slave_conn_fd_write_ready;
+ t.error_function = memif_slave_conn_fd_error;
+ t.file_descriptor = mif->conn_fd;
+ t.private_data = mif->dev_instance;
+ memif_file_add (&mif->conn_clib_file_index, &t);
+ hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance);
+
+ mif->flags |= MEMIF_IF_FLAG_CONNECTING;
+
+ /* grab another fd */
+ sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0);
+ if (sockfd < 0)
+ {
+ DBG_UNIX_LOG ("socket AF_UNIX");
+ return 0;
+ }
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+ last_run_duration = vlib_time_now (vm) - last_run_duration;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (memif_process_node,static) = {
+ .function = memif_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "memif-process",
+};
+/* *INDENT-ON* */
+
+int
+memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ vec_elt_at_index (mm->socket_files, mif->socket_file_index);
+ clib_error_t *err;
+
+ mif->flags |= MEMIF_IF_FLAG_DELETING;
+ vec_free (mif->local_disc_string);
+ vec_free (mif->remote_disc_string);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0);
+ vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0);
+
+ err = clib_error_return (0, "interface deleted");
+ memif_disconnect (mif, err);
+ clib_error_free (err);
+
+ /* remove the interface */
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ vnet_delete_hw_interface (vnm, mif->hw_if_index);
+ else
+ ethernet_delete_interface (vnm, mif->hw_if_index);
+ mif->hw_if_index = ~0;
+
+ /* free interface data structures */
+ clib_spinlock_free (&mif->lockp);
+ mhash_unset (&msf->dev_instance_by_id, &mif->id, 0);
+
+ /* remove socket file */
+ if (--(msf->ref_cnt) == 0)
+ {
+ if (msf->is_listener)
+ {
+ uword *x;
+ memif_file_del_by_index (msf->clib_file_index);
+ vec_foreach (x, msf->pending_file_indices)
+ {
+ memif_file_del_by_index (*x);
+ }
+ vec_free (msf->pending_file_indices);
+ }
+ mhash_free (&msf->dev_instance_by_id);
+ hash_free (msf->dev_instance_by_fd);
+ mhash_unset (&mm->socket_file_index_by_filename, msf->filename, 0);
+ vec_free (msf->filename);
+ pool_put (mm->socket_files, msf);
+ }
+
+ memset (mif, 0, sizeof (*mif));
+ pool_put (mm->interfaces, mif);
+
+ if (pool_elts (mm->interfaces) == 0)
+ vlib_process_signal_event (vm, memif_process_node.index,
+ MEMIF_PROCESS_EVENT_STOP, 0);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) =
+{
+ .name = "memif-ip",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+int
+memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_if_t *mif = 0;
+ vnet_sw_interface_t *sw;
+ clib_error_t *error = 0;
+ int ret = 0;
+ uword *p;
+ vnet_hw_interface_t *hw;
+ memif_socket_file_t *msf = 0;
+ u8 *socket_filename;
+ int rv = 0;
+
+ if (args->socket_filename == 0 || args->socket_filename[0] != '/')
+ {
+ clib_error_t *error;
+ error = vlib_unix_recursive_mkdir (vlib_unix_get_runtime_dir ());
+ if (error)
+ {
+ clib_error_free (error);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+
+ if (args->socket_filename == 0)
+ socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
+ MEMIF_DEFAULT_SOCKET_FILENAME, 0);
+ else
+ socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
+ args->socket_filename, 0);
+
+ }
+ else
+ socket_filename = vec_dup (args->socket_filename);
+
+ p = mhash_get (&mm->socket_file_index_by_filename, socket_filename);
+
+ if (p)
+ {
+ msf = vec_elt_at_index (mm->socket_files, p[0]);
+
+ /* existing socket file can be either master or slave but cannot be both */
+ if (!msf->is_listener != !args->is_master)
+ {
+ rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ goto done;
+ }
+
+ p = mhash_get (&msf->dev_instance_by_id, &args->id);
+ if (p)
+ {
+ rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ goto done;
+ }
+ }
+
+ /* Create new socket file */
+ if (msf == 0)
+ {
+ struct stat file_stat;
+ /* If we are creating listener make sure file doesn't exist or if it
+ * exists thn delete it if it is old socket file */
+ if (args->is_master &&
+ (stat ((char *) socket_filename, &file_stat) == 0))
+ {
+ if (S_ISSOCK (file_stat.st_mode))
+ {
+ unlink ((char *) socket_filename);
+ }
+ else
+ {
+ error = clib_error_return (0, "File exists for %s",
+ socket_filename);
+ clib_error_report (error);
+ rv = VNET_API_ERROR_VALUE_EXIST;
+ goto done;
+ }
+ }
+ pool_get (mm->socket_files, msf);
+ memset (msf, 0, sizeof (memif_socket_file_t));
+ mhash_init (&msf->dev_instance_by_id, sizeof (uword),
+ sizeof (memif_interface_id_t));
+ msf->dev_instance_by_fd = hash_create (0, sizeof (uword));
+ msf->filename = socket_filename;
+ msf->fd = -1;
+ msf->is_listener = (args->is_master != 0);
+ socket_filename = 0;
+ mhash_set (&mm->socket_file_index_by_filename, msf->filename,
+ msf - mm->socket_files, 0);
+ DBG ("creating socket file %s", msf->filename);
+ }
+
+ pool_get (mm->interfaces, mif);
+ memset (mif, 0, sizeof (*mif));
+ mif->dev_instance = mif - mm->interfaces;
+ mif->socket_file_index = msf - mm->socket_files;
+ mif->id = args->id;
+ mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0;
+ mif->conn_clib_file_index = ~0;
+ mif->conn_fd = -1;
+ mif->mode = args->mode;
+ if (args->secret)
+ mif->secret = vec_dup (args->secret);
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&mif->lockp);
+
+
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+
+ if (!args->hw_addr_set)
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (args->hw_addr + 2, &rnd, sizeof (rnd));
+ args->hw_addr[0] = 2;
+ args->hw_addr[1] = 0xfe;
+ }
+ error = ethernet_register_interface (vnm, memif_device_class.index,
+ mif->dev_instance, args->hw_addr,
+ &mif->hw_if_index,
+ memif_eth_flag_change);
+ }
+ else if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ {
+ mif->hw_if_index =
+ vnet_register_interface (vnm, memif_device_class.index,
+ mif->dev_instance,
+ memif_ip_hw_if_class.index,
+ mif->dev_instance);
+ }
+ else
+ error = clib_error_return (0, "unsupported interface mode");
+
+ if (error)
+ {
+ clib_error_report (error);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_2;
+ goto error;
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index);
+ mif->sw_if_index = sw->sw_if_index;
+
+ mif->cfg.log2_ring_size = args->log2_ring_size;
+ mif->cfg.buffer_size = args->buffer_size;
+ mif->cfg.num_s2m_rings =
+ args->is_master ? args->rx_queues : args->tx_queues;
+ mif->cfg.num_m2s_rings =
+ args->is_master ? args->tx_queues : args->rx_queues;
+
+ args->sw_if_index = mif->sw_if_index;
+
+ /* If this is new one, start listening */
+ if (msf->is_listener && msf->ref_cnt == 0)
+ {
+ struct sockaddr_un un = { 0 };
+ struct stat file_stat;
+ int on = 1;
+
+ if ((msf->fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0)
+ {
+ ret = VNET_API_ERROR_SYSCALL_ERROR_4;
+ goto error;
+ }
+
+ un.sun_family = AF_UNIX;
+ strncpy ((char *) un.sun_path, (char *) msf->filename,
+ sizeof (un.sun_path) - 1);
+
+ if (setsockopt (msf->fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof (on)) < 0)
+ {
+ ret = VNET_API_ERROR_SYSCALL_ERROR_5;
+ goto error;
+ }
+ if (bind (msf->fd, (struct sockaddr *) &un, sizeof (un)) == -1)
+ {
+ ret = VNET_API_ERROR_SYSCALL_ERROR_6;
+ goto error;
+ }
+ if (listen (msf->fd, 1) == -1)
+ {
+ ret = VNET_API_ERROR_SYSCALL_ERROR_7;
+ goto error;
+ }
+
+ if (stat ((char *) msf->filename, &file_stat) == -1)
+ {
+ ret = VNET_API_ERROR_SYSCALL_ERROR_8;
+ goto error;
+ }
+
+ msf->clib_file_index = ~0;
+ clib_file_t template = { 0 };
+ template.read_function = memif_conn_fd_accept_ready;
+ template.file_descriptor = msf->fd;
+ template.private_data = mif->socket_file_index;
+ memif_file_add (&msf->clib_file_index, &template);
+ }
+
+ msf->ref_cnt++;
+
+ if (args->is_master == 0)
+ mif->flags |= MEMIF_IF_FLAG_IS_SLAVE;
+
+ hw = vnet_get_hw_interface (vnm, mif->hw_if_index);
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ vnet_hw_interface_set_input_node (vnm, mif->hw_if_index,
+ memif_input_node.index);
+
+ mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0);
+
+ if (pool_elts (mm->interfaces) == 1)
+ {
+ vlib_process_signal_event (vm, memif_process_node.index,
+ MEMIF_PROCESS_EVENT_START, 0);
+ }
+ goto done;
+
+error:
+ if (mif->hw_if_index != ~0)
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ vnet_delete_hw_interface (vnm, mif->hw_if_index);
+ else
+ ethernet_delete_interface (vnm, mif->hw_if_index);
+ mif->hw_if_index = ~0;
+ }
+ memif_delete_if (vm, mif);
+ return ret;
+
+done:
+ vec_free (socket_filename);
+ return rv;
+}
+
+
+static clib_error_t *
+memif_init (vlib_main_t * vm)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ memset (mm, 0, sizeof (memif_main_t));
+
+ /* initialize binary API */
+ memif_plugin_api_hookup (vm);
+
+ mhash_init_c_string (&mm->socket_file_index_by_filename, sizeof (uword));
+
+ vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (memif_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Packet Memory Interface (experimetal)",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h
new file mode 100644
index 00000000..11918eab
--- /dev/null
+++ b/src/plugins/memif/memif.h
@@ -0,0 +1,185 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _MEMIF_H_
+#define _MEMIF_H_
+
+#ifndef MEMIF_CACHELINE_SIZE
+#define MEMIF_CACHELINE_SIZE 64
+#endif
+
+#define MEMIF_COOKIE 0x3E31F10
+#define MEMIF_VERSION_MAJOR 1
+#define MEMIF_VERSION_MINOR 0
+#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
+
+/*
+ * Type definitions
+ */
+
+typedef enum memif_msg_type
+{
+ MEMIF_MSG_TYPE_NONE = 0,
+ MEMIF_MSG_TYPE_ACK = 1,
+ MEMIF_MSG_TYPE_HELLO = 2,
+ MEMIF_MSG_TYPE_INIT = 3,
+ MEMIF_MSG_TYPE_ADD_REGION = 4,
+ MEMIF_MSG_TYPE_ADD_RING = 5,
+ MEMIF_MSG_TYPE_CONNECT = 6,
+ MEMIF_MSG_TYPE_CONNECTED = 7,
+ MEMIF_MSG_TYPE_DISCONNECT = 8,
+} memif_msg_type_t;
+
+typedef enum
+{
+ MEMIF_RING_S2M = 0,
+ MEMIF_RING_M2S = 1
+} memif_ring_type_t;
+
+typedef enum
+{
+ MEMIF_INTERFACE_MODE_ETHERNET = 0,
+ MEMIF_INTERFACE_MODE_IP = 1,
+ MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
+} memif_interface_mode_t;
+
+typedef uint16_t memif_region_index_t;
+typedef uint64_t memif_region_offset_t;
+typedef uint64_t memif_region_size_t;
+typedef uint16_t memif_ring_index_t;
+typedef uint32_t memif_interface_id_t;
+typedef uint16_t memif_version_t;
+typedef uint8_t memif_log2_ring_size_t;
+
+/*
+ * Socket messages
+ */
+
+typedef struct __attribute__ ((packed))
+{
+ uint8_t name[32];
+ memif_version_t min_version;
+ memif_version_t max_version;
+ memif_region_index_t max_region;
+ memif_ring_index_t max_m2s_ring;
+ memif_ring_index_t max_s2m_ring;
+ memif_log2_ring_size_t max_log2_ring_size;
+} memif_msg_hello_t;
+
+typedef struct __attribute__ ((packed))
+{
+ memif_version_t version;
+ memif_interface_id_t id;
+ memif_interface_mode_t mode:8;
+ uint8_t secret[24];
+ uint8_t name[32];
+} memif_msg_init_t;
+
+typedef struct __attribute__ ((packed))
+{
+ memif_region_index_t index;
+ memif_region_size_t size;
+} memif_msg_add_region_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint16_t flags;
+#define MEMIF_MSG_ADD_RING_FLAG_S2M (1 << 0)
+ memif_ring_index_t index;
+ memif_region_index_t region;
+ memif_region_offset_t offset;
+ memif_log2_ring_size_t log2_ring_size;
+} memif_msg_add_ring_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint8_t if_name[32];
+} memif_msg_connect_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint8_t if_name[32];
+} memif_msg_connected_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint32_t code;
+ uint8_t string[96];
+} memif_msg_disconnect_t;
+
+typedef struct __attribute__ ((packed, aligned (128)))
+{
+ memif_msg_type_t type:16;
+ union
+ {
+ memif_msg_hello_t hello;
+ memif_msg_init_t init;
+ memif_msg_add_region_t add_region;
+ memif_msg_add_ring_t add_ring;
+ memif_msg_connect_t connect;
+ memif_msg_connected_t connected;
+ memif_msg_disconnect_t disconnect;
+ };
+} memif_msg_t;
+
+_Static_assert (sizeof (memif_msg_t) == 128,
+ "Size of memif_msg_t must be 128");
+
+/*
+ * Ring and Descriptor Layout
+ */
+
+typedef struct __attribute__ ((packed))
+{
+ uint16_t flags;
+#define MEMIF_DESC_FLAG_NEXT (1 << 0)
+ memif_region_index_t region;
+ uint32_t buffer_length;
+ uint32_t length;
+ uint8_t reserved[4];
+ memif_region_offset_t offset;
+ uint64_t metadata;
+} memif_desc_t;
+
+_Static_assert (sizeof (memif_desc_t) == 32,
+ "Size of memif_dsct_t must be 32");
+
+#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
+ uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE)))
+
+typedef struct
+{
+ MEMIF_CACHELINE_ALIGN_MARK (cacheline0);
+ uint32_t cookie;
+ uint16_t flags;
+#define MEMIF_RING_FLAG_MASK_INT 1
+ volatile uint16_t head;
+ MEMIF_CACHELINE_ALIGN_MARK (cacheline1);
+ volatile uint16_t tail;
+ MEMIF_CACHELINE_ALIGN_MARK (cacheline2);
+ memif_desc_t desc[0];
+} memif_ring_t;
+
+#endif /* _MEMIF_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/memif_all_api_h.h b/src/plugins/memif/memif_all_api_h.h
new file mode 100644
index 00000000..9729ec16
--- /dev/null
+++ b/src/plugins/memif/memif_all_api_h.h
@@ -0,0 +1,18 @@
+/*
+ * memif_all_api_h.h - plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <memif/memif.api.h>
diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c
new file mode 100644
index 00000000..07347bc0
--- /dev/null
+++ b/src/plugins/memif/memif_api.c
@@ -0,0 +1,350 @@
+/*
+ *------------------------------------------------------------------
+ * memif_api.c - memif api
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/unix/unix.h>
+#include <memif/memif.h>
+#include <memif/private.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <memif/memif_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <memif/memif_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <memif/memif_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <memif/memif_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <memif/memif_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = htonl (rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = htonl (rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define foreach_memif_plugin_api_msg \
+_(MEMIF_CREATE, memif_create) \
+_(MEMIF_DELETE, memif_delete) \
+_(MEMIF_DUMP, memif_dump) \
+
+/**
+ * @brief Message handler for memif_create API.
+ * @param mp vl_api_memif_create_t * mp the api message
+ */
+void
+vl_api_memif_create_t_handler (vl_api_memif_create_t * mp)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_memif_create_reply_t *rmp;
+ memif_create_if_args_t args = { 0 };
+ u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
+ static const u8 empty_hw_addr[6];
+ int rv = 0;
+
+ /* id */
+ args.id = clib_net_to_host_u32 (mp->id);
+
+ /* socket filename */
+ mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0;
+ if (strlen ((char *) mp->socket_filename) > 0)
+ {
+ vec_validate (args.socket_filename,
+ strlen ((char *) mp->socket_filename));
+ strncpy ((char *) args.socket_filename, (char *) mp->socket_filename,
+ vec_len (args.socket_filename));
+ }
+
+ /* secret */
+ mp->secret[ARRAY_LEN (mp->secret) - 1] = 0;
+ if (strlen ((char *) mp->secret) > 0)
+ {
+ vec_validate (args.secret, strlen ((char *) mp->secret));
+ strncpy ((char *) args.secret, (char *) mp->secret,
+ vec_len (args.secret));
+ }
+
+ /* role */
+ args.is_master = (mp->role == 0);
+
+ /* mode */
+ args.mode = mp->mode;
+
+ /* rx/tx queues */
+ if (args.is_master == 0)
+ {
+ args.rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ args.tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ if (mp->rx_queues)
+ {
+ args.rx_queues = mp->rx_queues;
+ }
+ if (mp->tx_queues)
+ {
+ args.tx_queues = mp->tx_queues;
+ }
+ }
+
+ /* ring size */
+ if (mp->ring_size)
+ {
+ ring_size = ntohl (mp->ring_size);
+ }
+ if (!is_pow2 (ring_size))
+ {
+ rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto reply;
+ }
+ args.log2_ring_size = min_log2 (ring_size);
+
+ /* buffer size */
+ args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
+ if (mp->buffer_size)
+ {
+ args.buffer_size = ntohs (mp->buffer_size);
+ }
+
+ /* MAC address */
+ if (memcmp (mp->hw_addr, empty_hw_addr, 6) != 0)
+ {
+ memcpy (args.hw_addr, mp->hw_addr, 6);
+ args.hw_addr_set = 1;
+ }
+
+ rv = memif_create_if (vm, &args);
+
+ vec_free (args.socket_filename);
+ vec_free (args.secret);
+
+reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY,
+ ({
+ rmp->sw_if_index = htonl (args.sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+/**
+ * @brief Message handler for memif_delete API.
+ * @param mp vl_api_memif_delete_t * mp the api message
+ */
+void
+vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_memif_delete_reply_t *rmp;
+ vnet_hw_interface_t *hi =
+ vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index));
+ memif_if_t *mif;
+ int rv = 0;
+
+ if (hi == NULL || memif_device_class.index != hi->dev_class_index)
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ {
+ mif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
+ rv = memif_delete_if (vm, mif);
+ }
+
+ REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY);
+}
+
+static void
+send_memif_details (unix_shared_memory_queue_t * q,
+ memif_if_t * mif,
+ vnet_sw_interface_t * swif,
+ u8 * interface_name, u32 context)
+{
+ vl_api_memif_details_t *mp;
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files,
+ mif->socket_file_index);
+ vnet_hw_interface_t *hwif;
+
+ hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index);
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = htons (VL_API_MEMIF_DETAILS + mm->msg_id_base);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (swif->sw_if_index);
+ strncpy ((char *) mp->if_name,
+ (char *) interface_name, ARRAY_LEN (mp->if_name) - 1);
+ memcpy (mp->hw_addr, hwif->hw_address, ARRAY_LEN (mp->hw_addr));
+
+ mp->id = clib_host_to_net_u32 (mif->id);
+ mp->role = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? 1 : 0;
+ strncpy ((char *) mp->socket_filename,
+ (char *) msf->filename, ARRAY_LEN (mp->socket_filename) - 1);
+
+ mp->ring_size = htonl (1 << mif->run.log2_ring_size);
+ mp->buffer_size = htons (mif->run.buffer_size);
+
+ mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0;
+ mp->link_up_down = (hwif->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+/**
+ * @brief Message handler for memif_dump API.
+ * @param mp vl_api_memif_dump_t * mp the api message
+ */
+void
+vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp)
+{
+ memif_main_t *mm = &memif_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *swif;
+ memif_if_t *mif;
+ u8 *if_name = 0;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (mif, mm->interfaces,
+ ({
+ swif = vnet_get_sw_interface (vnm, mif->sw_if_index);
+
+ if_name = format (if_name, "%U%c",
+ format_vnet_sw_interface_name,
+ vnm, swif, 0);
+
+ send_memif_details (q, mif, swif, if_name, mp->context);
+ _vec_len (if_name) = 0;
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (if_name);
+}
+
+#define vl_msg_name_crc_list
+#include <memif/memif_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (memif_main_t * mm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + mm->msg_id_base);
+ foreach_vl_msg_name_crc_memif;
+#undef _
+}
+
+/* Set up the API message handling tables */
+clib_error_t *
+memif_plugin_api_hookup (vlib_main_t * vm)
+{
+ memif_main_t *mm = &memif_main;
+ api_main_t *am = &api_main;
+ u8 *name;
+
+ /* Construct the API name */
+ name = format (0, "memif_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ mm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_memif_plugin_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (mm, am);
+
+ vec_free (name);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/memif_msg_enum.h b/src/plugins/memif/memif_msg_enum.h
new file mode 100644
index 00000000..74efee00
--- /dev/null
+++ b/src/plugins/memif/memif_msg_enum.h
@@ -0,0 +1,31 @@
+/*
+ * memif_msg_enum.h - vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_memif_msg_enum_h
+#define included_memif_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <memif/memif_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_memif_msg_enum_h */
diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c
new file mode 100644
index 00000000..4ca7526d
--- /dev/null
+++ b/src/plugins/memif/memif_test.c
@@ -0,0 +1,372 @@
+/*
+ * memif VAT support
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <memif/memif.h>
+#include <memif/private.h>
+
+#define __plugin_msg_base memif_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* declare message IDs */
+#include <memif/memif_msg_enum.h>
+
+/* Get CRC codes of the messages defined outside of this plugin */
+#define vl_msg_name_crc_list
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+/* define message structures */
+#define vl_typedefs
+#include <vpp/api/vpe_all_api_h.h>
+#include <memif/memif_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <memif/memif_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <memif/memif_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <memif/memif_all_api_h.h>
+#undef vl_api_version
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} memif_test_main_t;
+
+memif_test_main_t memif_test_main;
+
+/* standard reply handlers */
+#define foreach_standard_reply_retval_handler \
+_(memif_delete_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = memif_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(MEMIF_CREATE_REPLY, memif_create_reply) \
+_(MEMIF_DELETE_REPLY, memif_delete_reply) \
+_(MEMIF_DETAILS, memif_details)
+
+static uword
+unformat_memif_queues (unformat_input_t * input, va_list * args)
+{
+ u32 *rx_queues = va_arg (*args, u32 *);
+ u32 *tx_queues = va_arg (*args, u32 *);
+
+ if (unformat (input, "rx-queues %u", rx_queues))
+ ;
+ if (unformat (input, "tx-queues %u", tx_queues))
+ ;
+
+ return 1;
+}
+
+/* memif-create API */
+static int
+api_memif_create (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_create_t *mp;
+ u32 id = 0;
+ u8 *socket_filename = 0;
+ u8 *secret = 0;
+ u8 role = 1;
+ u32 ring_size = 0;
+ u32 buffer_size = 0;
+ u8 hw_addr[6] = { 0 };
+ u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ int ret;
+ u8 mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %u", &id))
+ ;
+ else if (unformat (i, "socket %s", &socket_filename))
+ ;
+ else if (unformat (i, "secret %s", &secret))
+ ;
+ else if (unformat (i, "ring_size %u", &ring_size))
+ ;
+ else if (unformat (i, "buffer_size %u", &buffer_size))
+ ;
+ else if (unformat (i, "master"))
+ role = 0;
+ else if (unformat (i, "slave %U",
+ unformat_memif_queues, &rx_queues, &tx_queues))
+ role = 1;
+ else if (unformat (i, "mode ip"))
+ mode = MEMIF_INTERFACE_MODE_IP;
+ else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!is_pow2 (ring_size))
+ {
+ errmsg ("ring size must be power of 2\n");
+ return -99;
+ }
+
+ if (rx_queues > 255 || rx_queues < 1)
+ {
+ errmsg ("rx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ if (tx_queues > 255 || tx_queues < 1)
+ {
+ errmsg ("tx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ M (MEMIF_CREATE, mp);
+
+ mp->mode = mode;
+ mp->id = clib_host_to_net_u32 (id);
+ mp->role = role;
+ mp->ring_size = clib_host_to_net_u32 (ring_size);
+ mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff);
+ if (socket_filename != 0)
+ {
+ strncpy ((char *) mp->socket_filename, (char *) socket_filename, 127);
+ vec_free (socket_filename);
+ }
+ if (secret != 0)
+ {
+ strncpy ((char *) mp->secret, (char *) secret, 16);
+ vec_free (secret);
+ }
+ memcpy (mp->hw_addr, hw_addr, 6);
+ mp->rx_queues = rx_queues;
+ mp->tx_queues = tx_queues;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/* memif-create reply handler */
+static void vl_api_memif_create_reply_t_handler
+ (vl_api_memif_create_reply_t * mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created memif with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+/* memif-delete API */
+static int
+api_memif_delete (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_delete_t *mp;
+ u32 sw_if_index = 0;
+ u8 index_defined = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %u", &sw_if_index))
+ index_defined = 1;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!index_defined)
+ {
+ errmsg ("missing sw_if_index\n");
+ return -99;
+ }
+
+ M (MEMIF_DELETE, mp);
+
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/* memif-dump API */
+static int
+api_memif_dump (vat_main_t * vam)
+{
+ memif_test_main_t *mm = &memif_test_main;
+ vl_api_memif_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for memif_dump");
+ return -99;
+ }
+
+ M (MEMIF_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (mm->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+
+ fformat (vam->ofp, "Sending ping id=%d\n", mm->ping_id);
+
+ vam->result_ready = 0;
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+/* memif-details message handler */
+static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+
+ fformat (vam->ofp, "%s: sw_if_index %u mac %U\n"
+ " id %u socket %s role %s\n"
+ " ring_size %u buffer_size %u\n"
+ " state %s link %s\n",
+ mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address,
+ mp->hw_addr, clib_net_to_host_u32 (mp->id), mp->socket_filename,
+ mp->role ? "slave" : "master",
+ ntohl (mp->ring_size), ntohs (mp->buffer_size),
+ mp->admin_up_down ? "up" : "down",
+ mp->link_up_down ? "up" : "down");
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(memif_create, "[id <id>] [socket <path>] [ring_size <size>] " \
+ "[buffer_size <size>] [hw_addr <mac_address>] " \
+ "[secret <string>] [mode ip] <master|slave>") \
+_(memif_delete, "<sw_if_index>") \
+_(memif_dump, "")
+
+static void
+memif_vat_api_hookup (vat_main_t * vam)
+{
+ memif_test_main_t *mm __attribute__ ((unused)) = &memif_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) \
+ hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ memif_test_main_t *mm = &memif_test_main;
+ u8 *name;
+
+ mm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "memif_%08x%c", api_version, 0);
+ mm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ /* Get the control ping ID */
+#define _(id,n,crc) \
+ const char *id ## _CRC __attribute__ ((unused)) = #n "_" #crc;
+ foreach_vl_msg_name_crc_vpe;
+#undef _
+ mm->ping_id = vl_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+
+ if (mm->msg_id_base != (u16) ~0)
+ memif_vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
new file mode 100644
index 00000000..4acc7149
--- /dev/null
+++ b/src/plugins/memif/node.c
@@ -0,0 +1,533 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <memif/memif.h>
+#include <memif/private.h>
+
+#define foreach_memif_input_error \
+ _(NOT_IP, "not ip packet")
+
+typedef enum
+{
+#define _(f,s) MEMIF_INPUT_ERROR_##f,
+ foreach_memif_input_error
+#undef _
+ MEMIF_INPUT_N_ERROR,
+} memif_input_error_t;
+
+static char *memif_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_memif_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ u16 ring;
+} memif_input_trace_t;
+
+static u8 *
+format_memif_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ memif_input_trace_t *t = va_arg (*args, memif_input_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "memif: hw_if_index %d next-index %d",
+ t->hw_if_index, t->next_index);
+ s = format (s, "\n%Uslot: ring %u", format_white_space, indent + 2,
+ t->ring);
+ return s;
+}
+
+static_always_inline void
+memif_prefetch (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_prefetch_buffer_header (b, STORE);
+ CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static_always_inline void
+memif_buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi,
+ u32 prev_bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
+ vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
+
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = 0;
+}
+
+/**
+ * @brief Copy buffer from rx ring
+ *
+ * @param * vm (in)
+ * @param * mif (in) pointer to memif interface
+ * @param * ring (in) pointer to memif ring
+ * @param * rd (in) pointer to ring data
+ * @param ring_size (in) ring size
+ * @param * n_free_bufs (in/out) the number of free vlib buffers available
+ * @param ** first_b (out) the first vlib buffer pointer
+ * @param * first_bi (out) the first vlib buffer index
+ * @param * bi (in/out) the current buffer index
+ * #param * num_slots (in/out) the number of descriptors available to read
+ *
+ * @return total bytes read from rx ring also written to vlib buffers
+ */
+static_always_inline uword
+memif_copy_buffer_from_rx_ring (vlib_main_t * vm, memif_if_t * mif,
+ memif_ring_t * ring, memif_queue_t * mq,
+ u16 ring_size, u32 n_buffer_bytes,
+ u32 * n_free_bufs, vlib_buffer_t ** first_b,
+ u32 * first_bi, u32 * bi, u16 * num_slots)
+{
+ memif_main_t *nm = &memif_main;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 total_bytes = 0, offset = 0;
+ u32 data_len;
+ u32 bytes_to_copy;
+ void *mb;
+ vlib_buffer_t *b;
+ u16 mask = ring_size - 1;
+ u32 prev_bi;
+ u16 last_head;
+
+ while (*num_slots)
+ {
+ data_len = ring->desc[mq->last_head].length;
+ while (data_len && (*n_free_bufs))
+ {
+ /* get empty buffer */
+ u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+ prev_bi = *bi;
+ *bi = nm->rx_buffers[thread_index][last_buf];
+ b = vlib_get_buffer (vm, *bi);
+ _vec_len (nm->rx_buffers[thread_index]) = last_buf;
+ (*n_free_bufs)--;
+ if (PREDICT_FALSE (*n_free_bufs == 0))
+ {
+ *n_free_bufs +=
+ vlib_buffer_alloc (vm,
+ &nm->rx_buffers[thread_index]
+ [*n_free_bufs], ring_size);
+ _vec_len (nm->rx_buffers[thread_index]) = *n_free_bufs;
+ }
+
+ if (last_buf > 4)
+ {
+ memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]);
+ memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]);
+ }
+
+ /* copy buffer */
+ bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ b->current_data = 0;
+ mb = memif_get_buffer (mif, ring, mq->last_head);
+ clib_memcpy (vlib_buffer_get_current (b), mb + offset,
+ CLIB_CACHE_LINE_BYTES);
+ if (bytes_to_copy > CLIB_CACHE_LINE_BYTES)
+ clib_memcpy (vlib_buffer_get_current (b) + CLIB_CACHE_LINE_BYTES,
+ mb + CLIB_CACHE_LINE_BYTES + offset,
+ bytes_to_copy - CLIB_CACHE_LINE_BYTES);
+
+ /* fill buffer header */
+ b->current_length = bytes_to_copy;
+
+ if (total_bytes == 0)
+ {
+ /* fill buffer metadata */
+ b->total_length_not_including_first_buffer = 0;
+ b->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ *first_bi = *bi;
+ *first_b = vlib_get_buffer (vm, *first_bi);
+ }
+ else
+ memif_buffer_add_to_chain (vm, *bi, *first_bi, prev_bi);
+
+ offset += bytes_to_copy;
+ total_bytes += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ last_head = mq->last_head;
+ /* Advance to next descriptor */
+ mq->last_head = (mq->last_head + 1) & mask;
+ offset = 0;
+ (*num_slots)--;
+ if ((ring->desc[last_head].flags & MEMIF_DESC_FLAG_NEXT) == 0)
+ break;
+ }
+
+ return (total_bytes);
+}
+
+
+static_always_inline u32
+memif_next_from_ip_hdr (vlib_node_runtime_t * node, vlib_buffer_t * b)
+{
+ u8 *ptr = vlib_buffer_get_current (b);
+ u8 v = *ptr & 0xf0;
+
+ if (PREDICT_TRUE (v == 0x40))
+ return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
+ else if (PREDICT_TRUE (v == 0x60))
+ return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+
+ b->error = node->errors[MEMIF_INPUT_ERROR_NOT_IP];
+ return VNET_DEVICE_INPUT_NEXT_DROP;
+}
+
+static_always_inline uword
+memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, memif_if_t * mif,
+ memif_ring_type_t type, u16 qid,
+ memif_interface_mode_t mode)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_ring_t *ring;
+ memif_queue_t *mq;
+ u16 head;
+ u32 next_index;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ memif_main_t *nm = &memif_main;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 n_free_bufs;
+ u32 b0_total, b1_total;
+ u32 thread_index = vlib_get_thread_index ();
+ u16 ring_size, mask, num_slots;
+ u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ mq = vec_elt_at_index (mif->rx_queues, qid);
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ }
+ else
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ }
+
+ n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < ring_size))
+ {
+ vec_validate (nm->rx_buffers[thread_index],
+ ring_size + n_free_bufs - 1);
+ n_free_bufs +=
+ vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
+ ring_size);
+ _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
+ }
+
+ head = ring->head;
+ if (head == mq->last_head)
+ return 0;
+
+ if (head > mq->last_head)
+ num_slots = head - mq->last_head;
+ else
+ num_slots = ring_size - mq->last_head + head;
+
+ while (num_slots)
+ {
+ u32 n_left_to_next;
+ u32 next0 = next_index;
+ u32 next1 = next_index;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (num_slots > 11 && n_left_to_next > 2)
+ {
+ if (PREDICT_TRUE (mq->last_head + 5 < ring_size))
+ {
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 2),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 3),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&ring->desc[mq->last_head + 4],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&ring->desc[mq->last_head + 5],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+ else
+ {
+ CLIB_PREFETCH (memif_get_buffer
+ (mif, ring, (mq->last_head + 2) % mask),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (memif_get_buffer
+ (mif, ring, (mq->last_head + 3) % mask),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&ring->desc[(mq->last_head + 4) % mask],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&ring->desc[(mq->last_head + 5) % mask],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ vlib_buffer_t *first_b0 = 0;
+ u32 bi0 = 0, first_bi0 = 0;
+ b0_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq,
+ ring_size,
+ n_buffer_bytes,
+ &n_free_bufs, &first_b0,
+ &first_bi0, &bi0,
+ &num_slots);
+
+ vlib_buffer_t *first_b1 = 0;
+ u32 bi1 = 0, first_bi1 = 0;
+ b1_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq,
+ ring_size,
+ n_buffer_bytes,
+ &n_free_bufs, &first_b1,
+ &first_bi1, &bi1,
+ &num_slots);
+
+ /* enqueue buffer */
+ to_next[0] = first_bi0;
+ to_next[1] = first_bi1;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ {
+ next0 = memif_next_from_ip_hdr (node, first_b0);
+ next1 = memif_next_from_ip_hdr (node, first_b1);
+ }
+ else if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ if (PREDICT_FALSE (mif->per_interface_next_index != ~0))
+ next0 = next1 = mif->per_interface_next_index;
+ else
+ /* redirect if feature path
+ * enabled */
+ vnet_feature_start_device_input_x2 (mif->sw_if_index,
+ &next0, &next1,
+ first_b0, first_b1);
+ }
+
+ /* trace */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b1);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ /* b0 */
+ if (PREDICT_TRUE (first_b0 != 0))
+ {
+ memif_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
+ }
+ if (n_trace)
+ {
+ /* b1 */
+ if (PREDICT_TRUE (first_b1 != 0))
+ {
+ memif_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next1, first_b1,
+ /* follow_chain */ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
+ }
+ }
+ }
+
+ /* enqueue */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ first_bi1, next0, next1);
+
+ /* next packet */
+ n_rx_packets += 2;
+ n_rx_bytes += b0_total + b1_total;
+ }
+ while (num_slots && n_left_to_next)
+ {
+ vlib_buffer_t *first_b0 = 0;
+ u32 bi0 = 0, first_bi0 = 0;
+ b0_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq,
+ ring_size,
+ n_buffer_bytes,
+ &n_free_bufs, &first_b0,
+ &first_bi0, &bi0,
+ &num_slots);
+
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ {
+ next0 = memif_next_from_ip_hdr (node, first_b0);
+ }
+ else if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ if (PREDICT_FALSE (mif->per_interface_next_index != ~0))
+ next0 = mif->per_interface_next_index;
+ else
+ /* redirect if feature path
+ * enabled */
+ vnet_feature_start_device_input_x1 (mif->sw_if_index,
+ &next0, first_b0);
+ }
+
+ /* trace */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ if (PREDICT_TRUE (first_b0 != 0))
+ {
+ memif_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
+ }
+ }
+
+ /* enqueue buffer */
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* enqueue */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0, next0);
+
+ /* next packet */
+ n_rx_packets++;
+ n_rx_bytes += b0_total;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ CLIB_MEMORY_STORE_BARRIER ();
+ ring->tail = head;
+
+ vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, thread_index,
+ mif->hw_if_index, n_rx_packets,
+ n_rx_bytes);
+
+ return n_rx_packets;
+}
+
+static uword
+memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx = 0;
+ memif_main_t *nm = &memif_main;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+
+ foreach_device_and_queue (dq, rt->devices_and_queues)
+ {
+ memif_if_t *mif;
+ mif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
+ if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) &&
+ (mif->flags & MEMIF_IF_FLAG_CONNECTED))
+ {
+ if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline (vm, node, frame, mif,
+ MEMIF_RING_M2S, dq->queue_id,
+ MEMIF_INTERFACE_MODE_IP);
+ else
+ n_rx += memif_device_input_inline (vm, node, frame, mif,
+ MEMIF_RING_M2S, dq->queue_id,
+ MEMIF_INTERFACE_MODE_ETHERNET);
+ }
+ else
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline (vm, node, frame, mif,
+ MEMIF_RING_S2M, dq->queue_id,
+ MEMIF_INTERFACE_MODE_IP);
+ else
+ n_rx += memif_device_input_inline (vm, node, frame, mif,
+ MEMIF_RING_S2M, dq->queue_id,
+ MEMIF_INTERFACE_MODE_ETHERNET);
+ }
+ }
+ }
+
+ return n_rx;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (memif_input_node) = {
+ .function = memif_input_fn,
+ .name = "memif-input",
+ .sibling_of = "device-input",
+ .format_trace = format_memif_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = MEMIF_INPUT_N_ERROR,
+ .error_strings = memif_input_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (memif_input_node, memif_input_fn)
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h
new file mode 100644
index 00000000..912ec59a
--- /dev/null
+++ b/src/plugins/memif/private.h
@@ -0,0 +1,261 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vppinfra/lock.h>
+
+#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock"
+#define MEMIF_DEFAULT_RING_SIZE 1024
+#define MEMIF_DEFAULT_RX_QUEUES 1
+#define MEMIF_DEFAULT_TX_QUEUES 1
+#define MEMIF_DEFAULT_BUFFER_SIZE 2048
+
+#define MEMIF_MAX_M2S_RING (vec_len (vlib_mains) - 1)
+#define MEMIF_MAX_S2M_RING (vec_len (vlib_mains) - 1)
+#define MEMIF_MAX_REGION 255
+#define MEMIF_MAX_LOG2_RING_SIZE 14
+
+#define MEMIF_DEBUG 0
+
+#if MEMIF_DEBUG == 1
+#define DBG(...) clib_warning(__VA_ARGS__)
+#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__)
+#else
+#define DBG(...)
+#define DBG_UNIX_LOG(...)
+#endif
+
+#if MEMIF_DEBUG == 1
+#define memif_file_add(a, b) do { \
+ ASSERT (*a == ~0); \
+ *a = clib_file_add (&file_main, b); \
+ clib_warning ("clib_file_add fd %d private_data %u idx %u", \
+ (b)->file_descriptor, (b)->private_data, *a); \
+} while (0)
+
+#define memif_file_del(a) do { \
+ clib_warning ("clib_file_del idx %u",a - file_main.file_pool); \
+ clib_file_del (&file_main, a); \
+} while (0)
+
+#define memif_file_del_by_index(a) do { \
+ clib_warning ("clib_file_del idx %u", a); \
+ clib_file_del_by_index (&file_main, a); \
+} while (0)
+#else
+#define memif_file_add(a, b) do { \
+ ASSERT (*a == ~0); \
+ *a = clib_file_add (&file_main, b); \
+} while (0)
+#define memif_file_del(a) clib_file_del(&file_main, a)
+#define memif_file_del_by_index(a) clib_file_del_by_index(&file_main, a)
+#endif
+
+typedef struct
+{
+ u8 *filename;
+ int fd;
+ uword clib_file_index;
+ uword *pending_file_indices;
+ int ref_cnt;
+ int is_listener;
+
+ /* hash of all registered id */
+ mhash_t dev_instance_by_id;
+
+ /* hash of all registered fds */
+ uword *dev_instance_by_fd;
+} memif_socket_file_t;
+
+typedef struct
+{
+ void *shm;
+ memif_region_size_t region_size;
+ int fd;
+} memif_region_t;
+
+typedef struct
+{
+ memif_msg_t msg;
+ int fd;
+} memif_msg_fifo_elt_t;
+
+typedef struct
+{
+ /* ring data */
+ memif_ring_t *ring;
+ memif_log2_ring_size_t log2_ring_size;
+ memif_region_index_t region;
+ memif_region_offset_t offset;
+
+ u16 last_head;
+ u16 last_tail;
+
+ /* interrupts */
+ int int_fd;
+ uword int_clib_file_index;
+ u64 int_count;
+} memif_queue_t;
+
+#define foreach_memif_if_flag \
+ _(0, ADMIN_UP, "admin-up") \
+ _(1, IS_SLAVE, "slave") \
+ _(2, CONNECTING, "connecting") \
+ _(3, CONNECTED, "connected") \
+ _(4, DELETING, "deleting")
+
+typedef enum
+{
+#define _(a, b, c) MEMIF_IF_FLAG_##b = (1 << a),
+ foreach_memif_if_flag
+#undef _
+} memif_if_flag_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ u32 flags;
+ memif_interface_id_t id;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ uword dev_instance;
+ memif_interface_mode_t mode:8;
+
+ u32 per_interface_next_index;
+
+ /* socket connection */
+ uword socket_file_index;
+ int conn_fd;
+ uword conn_clib_file_index;
+ memif_msg_fifo_elt_t *msg_queue;
+ u8 *secret;
+
+ memif_region_t *regions;
+
+ memif_queue_t *rx_queues;
+ memif_queue_t *tx_queues;
+
+ /* remote info */
+ pid_t remote_pid;
+ uid_t remote_uid;
+ gid_t remote_gid;
+ u8 *remote_name;
+ u8 *remote_if_name;
+
+ struct
+ {
+ memif_log2_ring_size_t log2_ring_size;
+ u8 num_s2m_rings;
+ u8 num_m2s_rings;
+ u16 buffer_size;
+ } cfg;
+
+ struct
+ {
+ memif_log2_ring_size_t log2_ring_size;
+ u8 num_s2m_rings;
+ u8 num_m2s_rings;
+ u16 buffer_size;
+ } run;
+
+ /* disconnect strings */
+ u8 *local_disc_string;
+ u8 *remote_disc_string;
+} memif_if_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /** API message ID base */
+ u16 msg_id_base;
+
+ /* pool of all memory interfaces */
+ memif_if_t *interfaces;
+
+ /* pool of all unix socket files */
+ memif_socket_file_t *socket_files;
+ mhash_t socket_file_index_by_filename;
+
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+} memif_main_t;
+
+extern memif_main_t memif_main;
+extern vnet_device_class_t memif_device_class;
+extern vlib_node_registration_t memif_input_node;
+
+enum
+{
+ MEMIF_PROCESS_EVENT_START = 1,
+ MEMIF_PROCESS_EVENT_STOP = 2,
+} memif_process_event_t;
+
+typedef struct
+{
+ memif_interface_id_t id;
+ u8 *socket_filename;
+ u8 *secret;
+ u8 is_master;
+ memif_interface_mode_t mode:8;
+ memif_log2_ring_size_t log2_ring_size;
+ u16 buffer_size;
+ u8 hw_addr_set;
+ u8 hw_addr[6];
+ u8 rx_queues;
+ u8 tx_queues;
+
+ /* return */
+ u32 sw_if_index;
+} memif_create_if_args_t;
+
+int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args);
+int memif_delete_if (vlib_main_t * vm, memif_if_t * mif);
+clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm);
+
+static_always_inline void *
+memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot)
+{
+ u16 region = ring->desc[slot].region;
+ return mif->regions[region].shm + ring->desc[slot].offset;
+}
+
+/* memif.c */
+clib_error_t *memif_init_regions_and_queues (memif_if_t * mif);
+clib_error_t *memif_connect (memif_if_t * mif);
+void memif_disconnect (memif_if_t * mif, clib_error_t * err);
+
+/* socket.c */
+clib_error_t *memif_conn_fd_accept_ready (clib_file_t * uf);
+clib_error_t *memif_master_conn_fd_read_ready (clib_file_t * uf);
+clib_error_t *memif_slave_conn_fd_read_ready (clib_file_t * uf);
+clib_error_t *memif_master_conn_fd_write_ready (clib_file_t * uf);
+clib_error_t *memif_slave_conn_fd_write_ready (clib_file_t * uf);
+clib_error_t *memif_master_conn_fd_error (clib_file_t * uf);
+clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf);
+clib_error_t *memif_msg_send_disconnect (memif_if_t * mif,
+ clib_error_t * err);
+u8 *format_memif_device_name (u8 * s, va_list * args);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c
new file mode 100644
index 00000000..1abc0f11
--- /dev/null
+++ b/src/plugins/memif/socket.c
@@ -0,0 +1,740 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/eventfd.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vpp/app/version.h>
+
+#include <memif/memif.h>
+#include <memif/private.h>
+
+static u8 *
+memif_str2vec (uint8_t * str, int len)
+{
+ u8 *s = 0;
+ int i;
+
+ if (str[0] == 0)
+ return s;
+
+ for (i = 0; i < len; i++)
+ {
+ vec_add1 (s, str[i]);
+ if (str[i] == 0)
+ return s;
+ }
+ vec_add1 (s, 0);
+
+ return s;
+}
+
+static clib_error_t *
+memif_msg_send (int fd, memif_msg_t * msg, int afd)
+{
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ char ctl[CMSG_SPACE (sizeof (int))];
+ int rv;
+
+ iov[0].iov_base = (void *) msg;
+ iov[0].iov_len = sizeof (memif_msg_t);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+
+ if (afd > 0)
+ {
+ struct cmsghdr *cmsg;
+ memset (&ctl, 0, sizeof (ctl));
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+ cmsg = CMSG_FIRSTHDR (&mh);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy (CMSG_DATA (cmsg), &afd, sizeof (int));
+ }
+ rv = sendmsg (fd, &mh, 0);
+ if (rv < 0)
+ return clib_error_return_unix (0, "sendmsg");
+ DBG ("Message type %u sent (fd %d)", msg->type, afd);
+ return 0;
+}
+
+static void
+memif_msg_enq_ack (memif_if_t * mif)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+
+ e->msg.type = MEMIF_MSG_TYPE_ACK;
+ e->fd = -1;
+}
+
+static clib_error_t *
+memif_msg_enq_hello (int fd)
+{
+ u8 *s;
+ memif_msg_t msg = { 0 };
+ memif_msg_hello_t *h = &msg.hello;
+ msg.type = MEMIF_MSG_TYPE_HELLO;
+ h->min_version = MEMIF_VERSION;
+ h->max_version = MEMIF_VERSION;
+ h->max_m2s_ring = MEMIF_MAX_M2S_RING;
+ h->max_s2m_ring = MEMIF_MAX_M2S_RING;
+ h->max_region = MEMIF_MAX_REGION;
+ h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE;
+ s = format (0, "VPP %s%c", VPP_BUILD_VER, 0);
+ strncpy ((char *) h->name, (char *) s, sizeof (h->name) - 1);
+ vec_free (s);
+ return memif_msg_send (fd, &msg, -1);
+}
+
+static void
+memif_msg_enq_init (memif_if_t * mif)
+{
+ u8 *s;
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+ memif_msg_init_t *i = &e->msg.init;
+
+ e->msg.type = MEMIF_MSG_TYPE_INIT;
+ e->fd = -1;
+ i->version = MEMIF_VERSION;
+ i->id = mif->id;
+ i->mode = mif->mode;
+ s = format (0, "VPP %s%c", VPP_BUILD_VER, 0);
+ strncpy ((char *) i->name, (char *) s, sizeof (i->name) - 1);
+ if (mif->secret)
+ strncpy ((char *) i->secret, (char *) mif->secret,
+ sizeof (i->secret) - 1);
+ vec_free (s);
+}
+
+static void
+memif_msg_enq_add_region (memif_if_t * mif, u8 region)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+ memif_msg_add_region_t *ar = &e->msg.add_region;
+
+ e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
+ e->fd = mif->regions[region].fd;
+ ar->index = region;
+ ar->size = mif->regions[region].region_size;
+}
+
+static void
+memif_msg_enq_add_ring (memif_if_t * mif, u8 index, u8 direction)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+ memif_msg_add_ring_t *ar = &e->msg.add_ring;
+ memif_queue_t *mq;
+
+ ASSERT ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0);
+
+ e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
+
+ if (direction == MEMIF_RING_M2S)
+ mq = vec_elt_at_index (mif->rx_queues, index);
+ else
+ mq = vec_elt_at_index (mif->tx_queues, index);
+
+ e->fd = mq->int_fd;
+ ar->index = index;
+ ar->region = mq->region;
+ ar->offset = mq->offset;
+ ar->log2_ring_size = mq->log2_ring_size;
+ ar->flags = (direction == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
+}
+
+static void
+memif_msg_enq_connect (memif_if_t * mif)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+ memif_msg_connect_t *c = &e->msg.connect;
+ u8 *s;
+
+ e->msg.type = MEMIF_MSG_TYPE_CONNECT;
+ e->fd = -1;
+ s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0);
+ strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1);
+ vec_free (s);
+}
+
+static void
+memif_msg_enq_connected (memif_if_t * mif)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_add2 (mif->msg_queue, e);
+ memif_msg_connected_t *c = &e->msg.connected;
+ u8 *s;
+
+ e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
+ e->fd = -1;
+ s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0);
+ strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1);
+ vec_free (s);
+}
+
+clib_error_t *
+memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err)
+{
+ memif_msg_t msg = { 0 };
+ msg.type = MEMIF_MSG_TYPE_DISCONNECT;
+ memif_msg_disconnect_t *d = &msg.disconnect;
+
+ d->code = err->code;
+ strncpy ((char *) d->string, (char *) err->what, sizeof (d->string) - 1);
+
+ return memif_msg_send (mif->conn_fd, &msg, -1);
+}
+
+static clib_error_t *
+memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg)
+{
+ memif_msg_hello_t *h = &msg->hello;
+
+ if (msg->hello.min_version > MEMIF_VERSION ||
+ msg->hello.max_version < MEMIF_VERSION)
+ return clib_error_return (0, "incompatible protocol version");
+
+ mif->run.num_s2m_rings = clib_min (h->max_s2m_ring + 1,
+ mif->cfg.num_s2m_rings);
+ mif->run.num_m2s_rings = clib_min (h->max_m2s_ring + 1,
+ mif->cfg.num_m2s_rings);
+ mif->run.log2_ring_size = clib_min (h->max_log2_ring_size,
+ mif->cfg.log2_ring_size);
+ mif->run.buffer_size = mif->cfg.buffer_size;
+
+ mif->remote_name = memif_str2vec (h->name, sizeof (h->name));
+
+ return 0;
+}
+
+static clib_error_t *
+memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg,
+ clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ vec_elt_at_index (mm->socket_files, uf->private_data);
+ memif_msg_init_t *i = &msg->init;
+ memif_if_t *mif, tmp;
+ clib_error_t *err;
+ uword *p;
+
+ if (i->version != MEMIF_VERSION)
+ {
+ memif_file_del_by_index (uf - file_main.file_pool);
+ return clib_error_return (0, "unsupported version");
+ }
+
+ p = mhash_get (&msf->dev_instance_by_id, &i->id);
+
+ if (!p)
+ {
+ err = clib_error_return (0, "unmatched interface id");
+ goto error;
+ }
+
+ mif = vec_elt_at_index (mm->interfaces, p[0]);
+
+ if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
+ {
+ err = clib_error_return (0, "cannot connect to slave");
+ goto error;
+ }
+
+ if (mif->conn_fd != -1)
+ {
+ err = clib_error_return (0, "already connected");
+ goto error;
+ }
+
+ if (i->mode != mif->mode)
+ {
+ err = clib_error_return (0, "mode mismatch");
+ goto error;
+ }
+
+ mif->conn_fd = uf->file_descriptor;
+ mif->conn_clib_file_index = uf - file_main.file_pool;
+ hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance);
+ mif->remote_name = memif_str2vec (i->name, sizeof (i->name));
+ *mifp = mif;
+
+ if (mif->secret)
+ {
+ u8 *s;
+ int r;
+ s = memif_str2vec (i->secret, sizeof (i->secret));
+ if (s == 0)
+ return clib_error_return (0, "secret required");
+
+ r = vec_cmp (s, mif->secret);
+ vec_free (s);
+
+ if (r)
+ return clib_error_return (0, "incorrect secret");
+ }
+
+ return 0;
+
+error:
+ tmp.conn_fd = uf->file_descriptor;
+ memif_msg_send_disconnect (&tmp, err);
+ memif_file_del_by_index (uf - file_main.file_pool);
+ return err;
+}
+
+static clib_error_t *
+memif_msg_receive_add_region (memif_if_t * mif, memif_msg_t * msg, int fd)
+{
+ memif_msg_add_region_t *ar = &msg->add_region;
+ memif_region_t *mr;
+ if (fd < 0)
+ return clib_error_return (0, "missing memory region fd");
+
+ if (ar->index != vec_len (mif->regions))
+ return clib_error_return (0, "unexpected region index");
+
+ if (ar->index > MEMIF_MAX_REGION)
+ return clib_error_return (0, "too many regions");
+
+ vec_validate_aligned (mif->regions, ar->index, CLIB_CACHE_LINE_BYTES);
+ mr = vec_elt_at_index (mif->regions, ar->index);
+ mr->fd = fd;
+ mr->region_size = ar->size;
+
+ return 0;
+}
+
+static clib_error_t *
+memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd)
+{
+ memif_msg_add_ring_t *ar = &msg->add_ring;
+ memif_queue_t *mq;
+
+ if (fd < 0)
+ return clib_error_return (0, "missing ring interrupt fd");
+
+ if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M)
+ {
+ if (ar->index != vec_len (mif->rx_queues))
+ return clib_error_return (0, "unexpected ring index");
+
+ if (ar->index > MEMIF_MAX_S2M_RING)
+ return clib_error_return (0, "too many rings");
+
+ vec_validate_aligned (mif->rx_queues, ar->index, CLIB_CACHE_LINE_BYTES);
+ mq = vec_elt_at_index (mif->rx_queues, ar->index);
+ mif->run.num_s2m_rings = vec_len (mif->rx_queues);
+ }
+ else
+ {
+ if (ar->index != vec_len (mif->tx_queues))
+ return clib_error_return (0, "unexpected ring index");
+
+ if (ar->index > MEMIF_MAX_M2S_RING)
+ return clib_error_return (0, "too many rings");
+
+ vec_validate_aligned (mif->tx_queues, ar->index, CLIB_CACHE_LINE_BYTES);
+ mq = vec_elt_at_index (mif->tx_queues, ar->index);
+ mif->run.num_m2s_rings = vec_len (mif->tx_queues);
+ }
+
+ mq->int_fd = fd;
+ mq->int_clib_file_index = ~0;
+ mq->log2_ring_size = ar->log2_ring_size;
+ mq->region = ar->region;
+ mq->offset = ar->offset;
+
+ return 0;
+}
+
+static clib_error_t *
+memif_msg_receive_connect (memif_if_t * mif, memif_msg_t * msg)
+{
+ clib_error_t *err;
+ memif_msg_connect_t *c = &msg->connect;
+
+ if ((err = memif_connect (mif)))
+ return err;
+
+ mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name));
+
+ return 0;
+}
+
+static clib_error_t *
+memif_msg_receive_connected (memif_if_t * mif, memif_msg_t * msg)
+{
+ clib_error_t *err;
+ memif_msg_connected_t *c = &msg->connected;
+
+ if ((err = memif_connect (mif)))
+ return err;
+
+ mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name));
+ return 0;
+}
+
+static clib_error_t *
+memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg)
+{
+ memif_msg_disconnect_t *d = &msg->disconnect;
+
+ mif->remote_disc_string = memif_str2vec (d->string, sizeof (d->string));
+ return clib_error_return (0, "disconnect received");
+}
+
+static clib_error_t *
+memif_msg_receive (memif_if_t ** mifp, clib_file_t * uf)
+{
+ char ctl[CMSG_SPACE (sizeof (int)) +
+ CMSG_SPACE (sizeof (struct ucred))] = { 0 };
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ memif_msg_t msg = { 0 };
+ ssize_t size;
+ clib_error_t *err = 0;
+ int fd = -1;
+ int i;
+ memif_if_t *mif = *mifp;
+
+ iov[0].iov_base = (void *) &msg;
+ iov[0].iov_len = sizeof (memif_msg_t);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+
+ /* receive the incoming message */
+ size = recvmsg (uf->file_descriptor, &mh, 0);
+ if (size != sizeof (memif_msg_t))
+ {
+ return (size == 0) ? clib_error_return (0, "disconnected") :
+ clib_error_return_unix (0,
+ "recvmsg: malformed message received on fd %d",
+ uf->file_descriptor);
+ }
+
+ if (mif == 0 && msg.type != MEMIF_MSG_TYPE_INIT)
+ {
+ memif_file_del (uf);
+ return clib_error_return (0, "unexpected message received");
+ }
+
+ /* process anciliary data */
+ struct ucred *cr = 0;
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (&mh);
+ while (cmsg)
+ {
+ if (cmsg->cmsg_level == SOL_SOCKET)
+ {
+ if (cmsg->cmsg_type == SCM_CREDENTIALS)
+ {
+ cr = (struct ucred *) CMSG_DATA (cmsg);
+ }
+ else if (cmsg->cmsg_type == SCM_RIGHTS)
+ {
+ int *fdp = (int *) CMSG_DATA (cmsg);
+ fd = *fdp;
+ }
+ }
+ cmsg = CMSG_NXTHDR (&mh, cmsg);
+ }
+
+ DBG ("Message type %u received", msg.type);
+ /* process the message based on its type */
+ switch (msg.type)
+ {
+ case MEMIF_MSG_TYPE_ACK:
+ break;
+
+ case MEMIF_MSG_TYPE_HELLO:
+ if ((err = memif_msg_receive_hello (mif, &msg)))
+ return err;
+ if ((err = memif_init_regions_and_queues (mif)))
+ return err;
+ memif_msg_enq_init (mif);
+ memif_msg_enq_add_region (mif, 0);
+ vec_foreach_index (i, mif->tx_queues)
+ memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M);
+ vec_foreach_index (i, mif->rx_queues)
+ memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S);
+ memif_msg_enq_connect (mif);
+ break;
+
+ case MEMIF_MSG_TYPE_INIT:
+ if ((err = memif_msg_receive_init (mifp, &msg, uf)))
+ return err;
+ mif = *mifp;
+ mif->remote_pid = cr->pid;
+ mif->remote_uid = cr->uid;
+ mif->remote_gid = cr->gid;
+ memif_msg_enq_ack (mif);
+ break;
+
+ case MEMIF_MSG_TYPE_ADD_REGION:
+ if ((err = memif_msg_receive_add_region (mif, &msg, fd)))
+ return err;
+ memif_msg_enq_ack (mif);
+ break;
+
+ case MEMIF_MSG_TYPE_ADD_RING:
+ if ((err = memif_msg_receive_add_ring (mif, &msg, fd)))
+ return err;
+ memif_msg_enq_ack (mif);
+ break;
+
+ case MEMIF_MSG_TYPE_CONNECT:
+ if ((err = memif_msg_receive_connect (mif, &msg)))
+ return err;
+ memif_msg_enq_connected (mif);
+ break;
+
+ case MEMIF_MSG_TYPE_CONNECTED:
+ if ((err = memif_msg_receive_connected (mif, &msg)))
+ return err;
+ break;
+
+ case MEMIF_MSG_TYPE_DISCONNECT:
+ if ((err = memif_msg_receive_disconnect (mif, &msg)))
+ return err;
+ break;
+
+ default:
+ err = clib_error_return (0, "unknown message type (0x%x)", msg.type);
+ return err;
+ }
+
+ if (clib_fifo_elts (mif->msg_queue) && mif->conn_clib_file_index != ~0)
+ clib_file_set_data_available_to_write (&file_main,
+ mif->conn_clib_file_index, 1);
+ return 0;
+}
+
+clib_error_t *
+memif_master_conn_fd_read_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ pool_elt_at_index (mm->socket_files, uf->private_data);
+ uword *p;
+ memif_if_t *mif = 0;
+ uword conn_clib_file_index = ~0;
+ clib_error_t *err = 0;
+
+ p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
+ if (p)
+ {
+ mif = vec_elt_at_index (mm->interfaces, p[0]);
+ }
+ else
+ {
+ /* This is new connection, remove index from pending vector */
+ int i;
+ vec_foreach_index (i, msf->pending_file_indices)
+ if (msf->pending_file_indices[i] == uf - file_main.file_pool)
+ {
+ conn_clib_file_index = msf->pending_file_indices[i];
+ vec_del1 (msf->pending_file_indices, i);
+ break;
+ }
+ ASSERT (conn_clib_file_index != ~0);
+ }
+ err = memif_msg_receive (&mif, uf);
+ if (err)
+ {
+ memif_disconnect (mif, err);
+ clib_error_free (err);
+ }
+ return 0;
+}
+
+clib_error_t *
+memif_slave_conn_fd_read_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ clib_error_t *err;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
+ err = memif_msg_receive (&mif, uf);
+ if (err)
+ {
+ memif_disconnect (mif, err);
+ clib_error_free (err);
+ }
+ return 0;
+}
+
+static clib_error_t *
+memif_conn_fd_write_ready (clib_file_t * uf, memif_if_t * mif)
+{
+ memif_msg_fifo_elt_t *e;
+ clib_fifo_sub2 (mif->msg_queue, e);
+ clib_file_set_data_available_to_write (&file_main,
+ mif->conn_clib_file_index, 0);
+ memif_msg_send (mif->conn_fd, &e->msg, e->fd);
+ return 0;
+}
+
+clib_error_t *
+memif_master_conn_fd_write_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ pool_elt_at_index (mm->socket_files, uf->private_data);
+ uword *p;
+ memif_if_t *mif;
+
+ p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
+ if (!p)
+ return 0;
+
+ mif = vec_elt_at_index (mm->interfaces, p[0]);
+ return memif_conn_fd_write_ready (uf, mif);
+}
+
+clib_error_t *
+memif_slave_conn_fd_write_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
+ return memif_conn_fd_write_ready (uf, mif);
+}
+
+clib_error_t *
+memif_slave_conn_fd_error (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
+ clib_error_t *err;
+
+ err = clib_error_return (0, "connection fd error");
+ memif_disconnect (mif, err);
+ clib_error_free (err);
+
+ return 0;
+}
+
+clib_error_t *
+memif_master_conn_fd_error (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ pool_elt_at_index (mm->socket_files, uf->private_data);
+ uword *p;
+
+
+ p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
+ if (p)
+ {
+ memif_if_t *mif;
+ clib_error_t *err;
+ mif = vec_elt_at_index (mm->interfaces, p[0]);
+ err = clib_error_return (0, "connection fd error");
+ memif_disconnect (mif, err);
+ clib_error_free (err);
+ }
+ else
+ {
+ int i;
+ vec_foreach_index (i, msf->pending_file_indices)
+ if (msf->pending_file_indices[i] == uf - file_main.file_pool)
+ {
+ vec_del1 (msf->pending_file_indices, i);
+ memif_file_del (uf);
+ return 0;
+ }
+ }
+
+ clib_warning ("Error on unknown file descriptor %d", uf->file_descriptor);
+ memif_file_del (uf);
+ return 0;
+}
+
+
+clib_error_t *
+memif_conn_fd_accept_ready (clib_file_t * uf)
+{
+ memif_main_t *mm = &memif_main;
+ memif_socket_file_t *msf =
+ pool_elt_at_index (mm->socket_files, uf->private_data);
+ int addr_len;
+ struct sockaddr_un client;
+ int conn_fd;
+ clib_file_t template = { 0 };
+ uword clib_file_index = ~0;
+ clib_error_t *err;
+
+
+ addr_len = sizeof (client);
+ conn_fd = accept (uf->file_descriptor,
+ (struct sockaddr *) &client, (socklen_t *) & addr_len);
+
+ if (conn_fd < 0)
+ return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor);
+
+ template.read_function = memif_master_conn_fd_read_ready;
+ template.write_function = memif_master_conn_fd_write_ready;
+ template.error_function = memif_master_conn_fd_error;
+ template.file_descriptor = conn_fd;
+ template.private_data = uf->private_data;
+
+ memif_file_add (&clib_file_index, &template);
+
+ err = memif_msg_enq_hello (conn_fd);
+ if (err)
+ {
+ clib_error_report (err);
+ memif_file_del_by_index (clib_file_index);
+ }
+ else
+ vec_add1 (msf->pending_file_indices, clib_file_index);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat.am b/src/plugins/nat.am
new file mode 100644
index 00000000..b967a716
--- /dev/null
+++ b/src/plugins/nat.am
@@ -0,0 +1,41 @@
+
+# Copyright (c) <current-year> <your-organization>
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += nat_test_plugin.la
+vppplugins_LTLIBRARIES += nat_plugin.la
+
+nat_plugin_la_SOURCES = nat/nat.c \
+ nat/nat_api.c \
+ nat/in2out.c \
+ nat/out2in.c \
+ nat/nat_plugin.api.h \
+ nat/nat_ipfix_logging.c \
+ nat/nat_det.c \
+ nat/nat64.c \
+ nat/nat64_cli.c \
+ nat/nat64_in2out.c \
+ nat/nat64_out2in.c \
+ nat/nat64_db.c
+
+API_FILES += nat/nat.api
+
+nobase_apiinclude_HEADERS += \
+ nat/nat_all_api_h.h \
+ nat/nat_msg_enum.h \
+ nat/nat.api.h
+
+nat_test_plugin_la_SOURCES = \
+ nat/nat_test.c nat/nat_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c
new file mode 100755
index 00000000..8e583313
--- /dev/null
+++ b/src/plugins/nat/in2out.c
@@ -0,0 +1,3683 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/ip4_fib.h>
+#include <nat/nat.h>
+#include <nat/nat_ipfix_logging.h>
+#include <nat/nat_det.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+ u32 is_slow_path;
+} snat_in2out_trace_t;
+
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_in2out_worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
+ char * tag;
+
+ tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
+
+ s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
+ t->sw_if_index, t->next_index, t->session_index);
+
+ return s;
+}
+
+static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
+
+ s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+
+ return s;
+}
+
+static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_worker_handoff_trace_t * t =
+ va_arg (*args, snat_in2out_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
+
+vlib_node_registration_t snat_in2out_node;
+vlib_node_registration_t snat_in2out_slowpath_node;
+vlib_node_registration_t snat_in2out_fast_node;
+vlib_node_registration_t snat_in2out_worker_handoff_node;
+vlib_node_registration_t snat_det_in2out_node;
+vlib_node_registration_t snat_in2out_output_node;
+vlib_node_registration_t snat_in2out_output_slowpath_node;
+vlib_node_registration_t snat_in2out_output_worker_handoff_node;
+vlib_node_registration_t snat_hairpin_dst_node;
+vlib_node_registration_t snat_hairpin_src_node;
+
+
+#define foreach_snat_in2out_error \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
+_(IN2OUT_PACKETS, "Good in2out packets processed") \
+_(OUT_OF_PORTS, "Out of ports") \
+_(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \
+_(BAD_ICMP_TYPE, "unsupported ICMP type") \
+_(NO_TRANSLATION, "No translation") \
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+
+typedef enum {
+#define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
+ foreach_snat_in2out_error
+#undef _
+ SNAT_IN2OUT_N_ERROR,
+} snat_in2out_error_t;
+
+static char * snat_in2out_error_strings[] = {
+#define _(sym,string) string,
+ foreach_snat_in2out_error
+#undef _
+};
+
+typedef enum {
+ SNAT_IN2OUT_NEXT_LOOKUP,
+ SNAT_IN2OUT_NEXT_DROP,
+ SNAT_IN2OUT_NEXT_ICMP_ERROR,
+ SNAT_IN2OUT_NEXT_SLOW_PATH,
+ SNAT_IN2OUT_N_NEXT,
+} snat_in2out_next_t;
+
+typedef enum {
+ SNAT_HAIRPIN_SRC_NEXT_DROP,
+ SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
+ SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
+ SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
+ SNAT_HAIRPIN_SRC_N_NEXT,
+} snat_hairpin_next_t;
+
+/**
+ * @brief Check if packet should be translated
+ *
+ * Packets aimed at outside interface and external addresss with active session
+ * should be translated.
+ *
+ * @param sm NAT main
+ * @param rt NAT runtime data
+ * @param sw_if_index0 index of the inside interface
+ * @param ip0 IPv4 header
+ * @param proto0 NAT protocol
+ * @param rx_fib_index0 RX FIB index
+ *
+ * @returns 0 if packet should be translated otherwise 1
+ */
+static inline int
+snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
+ u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
+ u32 rx_fib_index0)
+{
+ fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {
+ .ip4.as_u32 = ip0->dst_address.as_u32,
+ },
+ };
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
+ ip0->dst_address.as_u32)))
+ return 1;
+
+ fei = fib_table_lookup (rx_fib_index0, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ u32 sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (sw_if_index == ~0)
+ {
+ fei = fib_table_lookup (sm->outside_fib_index, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ sw_if_index = fib_entry_get_resolving_interface (fei);
+ }
+ snat_interface_t *i;
+ pool_foreach (i, sm->interfaces,
+ ({
+ /* NAT packet aimed at outside interface */
+ if ((i->is_inside == 0) && (sw_if_index == i->sw_if_index))
+ return 0;
+ }));
+ }
+
+ return 1;
+}
+
+static inline int
+snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
+ u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
+ u32 rx_fib_index0, u32 thread_index)
+{
+ udp_header_t * udp0 = ip4_next_header (ip0);
+ snat_session_key_t key0, sm0;
+ clib_bihash_kv_8_8_t kv0, value0;
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = sm->outside_fib_index;
+ kv0.key = key0.as_u64;
+
+ /* NAT packet aimed at external address if */
+ /* has active sessions */
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
+ &value0))
+ {
+ /* or is static mappings */
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ return 0;
+ }
+ else
+ return 0;
+
+ return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
+ rx_fib_index0);
+}
+
+static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
+ ip4_header_t * ip0,
+ u32 rx_fib_index0,
+ snat_session_key_t * key0,
+ snat_session_t ** sessionp,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 thread_index)
+{
+ snat_user_t *u;
+ snat_user_key_t user_key;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 oldest_per_user_translation_list_index;
+ dlist_elt_t * oldest_per_user_translation_list_elt;
+ dlist_elt_t * per_user_translation_list_elt;
+ dlist_elt_t * per_user_list_head_elt;
+ u32 session_index;
+ snat_session_key_t key1;
+ u32 address_index = ~0;
+ u32 outside_fib_index;
+ uword * p;
+
+ if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
+ if (! p)
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ outside_fib_index = p[0];
+
+ key1.protocol = key0->protocol;
+ user_key.addr = ip0->src_address;
+ user_key.fib_index = rx_fib_index0;
+ kv0.key = user_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
+ &kv0, &value0))
+ {
+ /* no, make a new one */
+ pool_get (sm->per_thread_data[thread_index].users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = ip0->src_address;
+ u->fib_index = rx_fib_index0;
+
+ pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
+
+ u->sessions_per_user_list_head_index = per_user_list_head_elt -
+ sm->per_thread_data[thread_index].list_pool;
+
+ clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv0.value = u - sm->per_thread_data[thread_index].users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
+ &kv0, 1 /* is_add */);
+ }
+ else
+ {
+ u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
+ value0.value);
+ }
+
+ /* Over quota? Recycle the least recently used dynamic translation */
+ if (u->nsessions >= sm->max_translations_per_user)
+ {
+ /* Remove the oldest dynamic translation */
+ do {
+ oldest_per_user_translation_list_index =
+ clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ ASSERT (oldest_per_user_translation_list_index != ~0);
+
+ /* add it back to the end of the LRU list */
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ u->sessions_per_user_list_head_index,
+ oldest_per_user_translation_list_index);
+ /* Get the list element */
+ oldest_per_user_translation_list_elt =
+ pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
+ oldest_per_user_translation_list_index);
+
+ /* Get the session index from the list element */
+ session_index = oldest_per_user_translation_list_elt->value;
+
+ /* Get the session */
+ s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
+ session_index);
+ } while (snat_is_session_static (s));
+
+ if (snat_is_unk_proto_session (s))
+ {
+ clib_bihash_kv_16_8_t up_kv;
+ nat_ed_ses_key_t key;
+
+ /* Remove from lookup tables */
+ key.l_addr = s->in2out.addr;
+ key.r_addr = s->ext_host_addr;
+ key.fib_index = s->in2out.fib_index;
+ key.proto = s->in2out.port;
+ key.rsvd = 0;
+ key.l_port = 0;
+ up_kv.key[0] = key.as_u64[0];
+ up_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
+ clib_warning ("in2out key del failed");
+
+ key.l_addr = s->out2in.addr;
+ key.fib_index = s->out2in.fib_index;
+ up_kv.key[0] = key.as_u64[0];
+ up_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
+ clib_warning ("out2in key del failed");
+ }
+ else
+ {
+ /* Remove in2out, out2in keys */
+ kv0.key = s->in2out.as_u64;
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out,
+ &kv0, 0 /* is_add */))
+ clib_warning ("in2out key delete failed");
+ kv0.key = s->out2in.as_u64;
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in,
+ &kv0, 0 /* is_add */))
+ clib_warning ("out2in key delete failed");
+
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
+ s->out2in.addr.as_u32,
+ s->in2out.protocol,
+ s->in2out.port,
+ s->out2in.port,
+ s->in2out.fib_index);
+
+ snat_free_outside_address_and_port
+ (sm, thread_index, &s->out2in, s->outside_address_index);
+ }
+ s->outside_address_index = ~0;
+
+ if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
+ &key1, &address_index))
+ {
+ ASSERT(0);
+
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ s->outside_address_index = address_index;
+ }
+ else
+ {
+ u8 static_mapping = 1;
+
+ /* First try to match static mapping by local address and port */
+ if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
+ {
+ static_mapping = 0;
+ /* Try to create dynamic translation */
+ if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
+ thread_index, &key1,
+ &address_index))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ }
+
+ /* Create a new session */
+ pool_get (sm->per_thread_data[thread_index].sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->outside_address_index = address_index;
+
+ if (static_mapping)
+ {
+ u->nstaticsessions++;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ }
+ else
+ {
+ u->nsessions++;
+ }
+
+ /* Create list elts */
+ pool_get (sm->per_thread_data[thread_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[thread_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[thread_index].sessions;
+ s->per_user_index = per_user_translation_list_elt -
+ sm->per_thread_data[thread_index].list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[thread_index].list_pool);
+ }
+
+ s->in2out = *key0;
+ s->out2in = key1;
+ s->out2in.protocol = key0->protocol;
+ s->out2in.fib_index = outside_fib_index;
+ s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+ *sessionp = s;
+
+ /* Add to translation hashes */
+ kv0.key = s->in2out.as_u64;
+ kv0.value = s - sm->per_thread_data[thread_index].sessions;
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
+ 1 /* is_add */))
+ clib_warning ("in2out key add failed");
+
+ kv0.key = s->out2in.as_u64;
+ kv0.value = s - sm->per_thread_data[thread_index].sessions;
+
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
+ 1 /* is_add */))
+ clib_warning ("out2in key add failed");
+
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
+ s->out2in.addr.as_u32,
+ s->in2out.protocol,
+ s->in2out.port,
+ s->out2in.port,
+ s->in2out.fib_index);
+ return next0;
+}
+
+static_always_inline
+snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
+ snat_session_key_t *p_key0)
+{
+ icmp46_header_t *icmp0;
+ snat_session_key_t key0;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0 = 0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ key0.protocol = SNAT_PROTOCOL_ICMP;
+ key0.addr = ip0->src_address;
+ key0.port = echo0->identifier;
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+ key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
+ key0.addr = inner_ip0->dst_address;
+ switch (key0.protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+ key0.port = inner_echo0->identifier;
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
+ break;
+ default:
+ return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
+ }
+ }
+ *p_key0 = key0;
+ return -1; /* success */
+}
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ * and create session if needed
+ *
+ * @param[in,out] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ snat_session_key_t key0;
+ snat_session_t *s0 = 0;
+ u8 dont_translate = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 next0 = ~0;
+ int err;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ err = icmp_get_key (ip0, &key0);
+ if (err != -1)
+ {
+ b0->error = node->errors[err];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
+ &value0))
+ {
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
+ IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) &&
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+
+ if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, thread_index);
+
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ goto out;
+ }
+ else
+ {
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+ icmp0->type != ICMP4_echo_reply &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+
+out:
+ *p_proto = key0.protocol;
+ if (s0)
+ *p_value = s0->out2in;
+ *p_dont_translate = dont_translate;
+ if (d)
+ *(snat_session_t**)d = s0;
+ return next0;
+}
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ *
+ * @param[in] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ snat_session_key_t key0;
+ snat_session_key_t sm0;
+ u8 dont_translate = 0;
+ u8 is_addr_only;
+ u32 next0 = ~0;
+ int err;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ err = icmp_get_key (ip0, &key0);
+ if (err != -1)
+ {
+ b0->error = node->errors[err];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out2;
+ }
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
+ {
+ if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
+ IP_PROTOCOL_ICMP, rx_fib_index0)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+
+ if (icmp_is_error_message (icmp0))
+ {
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+ (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+out:
+ *p_value = sm0;
+out2:
+ *p_proto = key0.protocol;
+ *p_dont_translate = dont_translate;
+ return next0;
+}
+
+static inline u32 icmp_in2out (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 thread_index,
+ void *d,
+ void *e)
+{
+ snat_session_key_t sm0;
+ u8 protocol;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+ u8 dont_translate;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ u16 checksum0;
+ u32 next0_tmp;
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
+ &protocol, &sm0, &dont_translate, d, e);
+ if (next0_tmp != ~0)
+ next0 = next0_tmp;
+ if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
+ goto out;
+
+ sum0 = ip_incremental_checksum (0, icmp0,
+ ntohs(ip0->length) - ip4_header_bytes (ip0));
+ checksum0 = ~ip_csum_fold (sum0);
+ if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
+ {
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ old_addr0 = ip0->src_address.as_u32;
+ new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
+ if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ new_id0 = sm0.port;
+ if (PREDICT_FALSE(new_id0 != echo0->identifier))
+ {
+ old_id0 = echo0->identifier;
+ new_id0 = sm0.port;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+
+ if (!ip4_header_checksum_is_valid (inner_ip0))
+ {
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ old_addr0 = inner_ip0->dst_address.as_u32;
+ inner_ip0->dst_address = sm0.addr;
+ new_addr0 = inner_ip0->dst_address.as_u32;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+ dst_address /* changed member */);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ switch (protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+
+ old_id0 = inner_echo0->identifier;
+ new_id0 = sm0.port;
+ inner_echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
+ new_id0 = sm0.port;
+ ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
+ dst_port);
+ icmp0->checksum = ip_csum_fold (sum0);
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+
+out:
+ return next0;
+}
+
+/**
+ * @brief Hairpinning
+ *
+ * Hairpinning allows two endpoints on the internal side of the NAT to
+ * communicate even if they only use each other's external IP addresses
+ * and ports.
+ *
+ * @param sm NAT main.
+ * @param b0 Vlib buffer.
+ * @param ip0 IP header.
+ * @param udp0 UDP header.
+ * @param tcp0 TCP header.
+ * @param proto0 NAT protocol.
+ */
+static inline void
+snat_hairpinning (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ udp_header_t * udp0,
+ tcp_header_t * tcp0,
+ u32 proto0)
+{
+ snat_session_key_t key0, sm0;
+ snat_session_t * s0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ ip_csum_t sum0;
+ u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
+ u16 new_dst_port0, old_dst_port0;
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = sm->outside_fib_index;
+ kv0.key = key0.as_u64;
+
+ /* Check if destination is static mappings */
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ {
+ new_dst_addr0 = sm0.addr.as_u32;
+ new_dst_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ }
+ /* or active session */
+ else
+ {
+ if (sm->num_workers > 1)
+ ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
+ else
+ ti = sm->num_workers;
+
+ if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
+ {
+ si = value0.value;
+
+ s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ new_dst_port0 = s0->in2out.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ }
+ }
+
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_dst_port0 = tcp0->dst;
+ if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ tcp0->dst = new_dst_port0;
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+ ip4_header_t /* cheat */, length);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ udp0->dst_port = new_dst_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ }
+ }
+}
+
+static inline void
+snat_icmp_hairpinning (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0)
+{
+ snat_session_key_t key0, sm0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
+ ip_csum_t sum0;
+ snat_session_t *s0;
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
+ u16 icmp_id0 = echo0->identifier;
+ key0.addr = ip0->dst_address;
+ key0.port = icmp_id0;
+ key0.protocol = SNAT_PROTOCOL_ICMP;
+ key0.fib_index = sm->outside_fib_index;
+ kv0.key = key0.as_u64;
+
+ if (sm->num_workers > 1)
+ ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
+ else
+ ti = sm->num_workers;
+
+ /* Check if destination is in active sessions */
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
+ &value0))
+ {
+ /* or static mappings */
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ {
+ new_dst_addr0 = sm0.addr.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ }
+ }
+ else
+ {
+ si = value0.value;
+
+ s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ echo0->identifier = s0->in2out.port;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
+ icmp_echo_header_t, identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+ }
+ }
+
+}
+
+static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ f64 now,
+ u32 thread_index,
+ snat_session_t ** p_s0)
+{
+ next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, thread_index, p_s0, 0);
+ snat_session_t * s0 = *p_s0;
+ if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
+ {
+ /* Hairpinning */
+ if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
+ snat_icmp_hairpinning(sm, b0, ip0, icmp0);
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
+ /* Per-user LRU list maintenance for dynamic translations */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ }
+ return next0;
+}
+static inline void
+snat_hairpinning_unknown_proto (snat_main_t *sm,
+ vlib_buffer_t * b,
+ ip4_header_t * ip)
+{
+ u32 old_addr, new_addr = 0, ti = 0;
+ clib_bihash_kv_8_8_t kv, value;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ nat_ed_ses_key_t key;
+ snat_session_key_t m_key;
+ snat_static_mapping_t *m;
+ ip_csum_t sum;
+ snat_session_t *s;
+
+ old_addr = ip->dst_address.as_u32;
+ key.l_addr.as_u32 = ip->dst_address.as_u32;
+ key.r_addr.as_u32 = ip->src_address.as_u32;
+ key.fib_index = sm->outside_fib_index;
+ key.proto = ip->protocol;
+ key.rsvd = 0;
+ key.l_port = 0;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ {
+ m_key.addr = ip->dst_address;
+ m_key.fib_index = sm->outside_fib_index;
+ m_key.port = 0;
+ m_key.protocol = 0;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ return;
+
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
+ new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
+ }
+ else
+ {
+ if (sm->num_workers > 1)
+ ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
+ else
+ ti = sm->num_workers;
+
+ s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
+ if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
+ new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
+ }
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ ip->checksum = ip_csum_fold (sum);
+}
+
+static snat_session_t *
+snat_in2out_unknown_proto (snat_main_t *sm,
+ vlib_buffer_t * b,
+ ip4_header_t * ip,
+ u32 rx_fib_index,
+ u32 thread_index,
+ f64 now,
+ vlib_main_t * vm,
+ vlib_node_runtime_t * node)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ snat_static_mapping_t *m;
+ snat_session_key_t m_key;
+ u32 old_addr, new_addr = 0;
+ ip_csum_t sum;
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t *head, *elt, *oldest;
+ snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+ u32 elt_index, head_index, ses_index, oldest_index;
+ snat_session_t * s;
+ nat_ed_ses_key_t key;
+ u32 address_index = ~0;
+ int i;
+ u8 is_sm = 0;
+
+ old_addr = ip->src_address.as_u32;
+
+ key.l_addr = ip->src_address;
+ key.r_addr = ip->dst_address;
+ key.fib_index = rx_fib_index;
+ key.proto = ip->protocol;
+ key.rsvd = 0;
+ key.l_port = 0;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+
+ if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
+ {
+ s = pool_elt_at_index (tsm->sessions, s_value.value);
+ new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+ }
+ else
+ {
+ if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
+ {
+ b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
+ return 0;
+ }
+
+ u_key.addr = ip->src_address;
+ u_key.fib_index = rx_fib_index;
+ kv.key = u_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ /* no, make a new one */
+ pool_get (tsm->users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = ip->src_address;
+ u->fib_index = rx_fib_index;
+
+ pool_get (tsm->list_pool, head);
+ u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+ clib_dlist_init (tsm->list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv.value = u - tsm->users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
+ }
+ else
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ }
+
+ m_key.addr = ip->src_address;
+ m_key.port = 0;
+ m_key.protocol = 0;
+ m_key.fib_index = rx_fib_index;
+ kv.key = m_key.as_u64;
+
+ /* Try to find static mapping first */
+ if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+ {
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+ new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
+ is_sm = 1;
+ goto create_ses;
+ }
+ /* Fallback to 3-tuple key */
+ else
+ {
+ /* Choose same out address as for TCP/UDP session to same destination */
+ if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tsm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+ while (ses_index != ~0)
+ {
+ s = pool_elt_at_index (tsm->sessions, ses_index);
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+
+ if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
+ {
+ new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+ address_index = s->outside_address_index;
+
+ key.fib_index = sm->outside_fib_index;
+ key.l_addr.as_u32 = new_addr;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ break;
+
+ goto create_ses;
+ }
+ }
+ }
+ key.fib_index = sm->outside_fib_index;
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ {
+ new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
+ address_index = i;
+ goto create_ses;
+ }
+ }
+ return 0;
+ }
+
+create_ses:
+ /* Over quota? Recycle the least recently used dynamic translation */
+ if (u->nsessions >= sm->max_translations_per_user && !is_sm)
+ {
+ /* Remove the oldest dynamic translation */
+ do {
+ oldest_index = clib_dlist_remove_head (
+ tsm->list_pool, u->sessions_per_user_list_head_index);
+
+ ASSERT (oldest_index != ~0);
+
+ /* add it back to the end of the LRU list */
+ clib_dlist_addtail (tsm->list_pool,
+ u->sessions_per_user_list_head_index,
+ oldest_index);
+ /* Get the list element */
+ oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
+
+ /* Get the session index from the list element */
+ ses_index = oldest->value;
+
+ /* Get the session */
+ s = pool_elt_at_index (tsm->sessions, ses_index);
+ } while (snat_is_session_static (s));
+
+ if (snat_is_unk_proto_session (s))
+ {
+ /* Remove from lookup tables */
+ key.l_addr = s->in2out.addr;
+ key.r_addr = s->ext_host_addr;
+ key.fib_index = s->in2out.fib_index;
+ key.proto = s->in2out.port;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
+ clib_warning ("in2out key del failed");
+
+ key.l_addr = s->out2in.addr;
+ key.fib_index = s->out2in.fib_index;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
+ clib_warning ("out2in key del failed");
+ }
+ else
+ {
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
+ s->out2in.addr.as_u32,
+ s->in2out.protocol,
+ s->in2out.port,
+ s->out2in.port,
+ s->in2out.fib_index);
+
+ snat_free_outside_address_and_port (sm, thread_index, &s->out2in,
+ s->outside_address_index);
+
+ /* Remove in2out, out2in keys */
+ kv.key = s->in2out.as_u64;
+ if (clib_bihash_add_del_8_8 (
+ &sm->per_thread_data[thread_index].in2out, &kv, 0))
+ clib_warning ("in2out key del failed");
+ kv.key = s->out2in.as_u64;
+ if (clib_bihash_add_del_8_8 (
+ &sm->per_thread_data[thread_index].out2in, &kv, 0))
+ clib_warning ("out2in key del failed");
+ }
+ }
+ else
+ {
+ /* Create a new session */
+ pool_get (tsm->sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ /* Create list elts */
+ pool_get (tsm->list_pool, elt);
+ clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+ elt->value = s - tsm->sessions;
+ s->per_user_index = elt - tsm->list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+ }
+
+ s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
+ s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
+ s->outside_address_index = address_index;
+ s->out2in.addr.as_u32 = new_addr;
+ s->out2in.fib_index = sm->outside_fib_index;
+ s->in2out.addr.as_u32 = old_addr;
+ s->in2out.fib_index = rx_fib_index;
+ s->in2out.port = s->out2in.port = ip->protocol;
+ if (is_sm)
+ {
+ u->nstaticsessions++;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ }
+ else
+ {
+ u->nsessions++;
+ }
+
+ /* Add to lookup tables */
+ key.l_addr.as_u32 = old_addr;
+ key.r_addr = ip->dst_address;
+ key.proto = ip->protocol;
+ key.fib_index = rx_fib_index;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ s_kv.value = s - tsm->sessions;
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+ clib_warning ("in2out key add failed");
+
+ key.l_addr.as_u32 = new_addr;
+ key.fib_index = sm->outside_fib_index;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+ clib_warning ("out2in key add failed");
+ }
+
+ /* Update IP checksum */
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ /* Accounting */
+ s->last_heard = now;
+ s->total_pkts++;
+ s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+ /* Per-user LRU list maintenance */
+ clib_dlist_remove (tsm->list_pool, s->per_user_index);
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+
+ /* Hairpinning */
+ if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+ snat_hairpinning_unknown_proto(sm, b, ip);
+
+ if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+ return s;
+}
+
+static snat_session_t *
+snat_in2out_lb (snat_main_t *sm,
+ vlib_buffer_t * b,
+ ip4_header_t * ip,
+ u32 rx_fib_index,
+ u32 thread_index,
+ f64 now,
+ vlib_main_t * vm,
+ vlib_node_runtime_t * node)
+{
+ nat_ed_ses_key_t key;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ udp_header_t *udp = ip4_next_header (ip);
+ tcp_header_t *tcp = (tcp_header_t *) udp;
+ snat_session_t *s = 0;
+ snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+ u32 old_addr, new_addr;
+ u16 new_port, old_port;
+ ip_csum_t sum;
+ u32 proto = ip_proto_to_snat_proto (ip->protocol);
+ snat_session_key_t e_key, l_key;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t *head, *elt;
+
+ old_addr = ip->src_address.as_u32;
+
+ key.l_addr = ip->src_address;
+ key.r_addr = ip->dst_address;
+ key.fib_index = rx_fib_index;
+ key.proto = ip->protocol;
+ key.rsvd = 0;
+ key.l_port = udp->src_port;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+
+ if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
+ {
+ s = pool_elt_at_index (tsm->sessions, s_value.value);
+ }
+ else
+ {
+ if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
+ {
+ b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
+ return 0;
+ }
+
+ l_key.addr = ip->src_address;
+ l_key.port = udp->src_port;
+ l_key.protocol = proto;
+ l_key.fib_index = rx_fib_index;
+ if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
+ return 0;
+
+ u_key.addr = ip->src_address;
+ u_key.fib_index = rx_fib_index;
+ kv.key = u_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ /* no, make a new one */
+ pool_get (tsm->users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = ip->src_address;
+ u->fib_index = rx_fib_index;
+
+ pool_get (tsm->list_pool, head);
+ u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+ clib_dlist_init (tsm->list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv.value = u - tsm->users;
+
+ /* add user */
+ if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
+ clib_warning ("user key add failed");
+ }
+ else
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ }
+
+ /* Create a new session */
+ pool_get (tsm->sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
+ s->outside_address_index = ~0;
+ s->in2out = l_key;
+ s->out2in = e_key;
+ u->nstaticsessions++;
+
+ /* Create list elts */
+ pool_get (tsm->list_pool, elt);
+ clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+ elt->value = s - tsm->sessions;
+ s->per_user_index = elt - tsm->list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+
+ /* Add to lookup tables */
+ s_kv.value = s - tsm->sessions;
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+ clib_warning ("in2out-ed key add failed");
+
+ key.l_addr = e_key.addr;
+ key.fib_index = e_key.fib_index;
+ key.l_port = e_key.port;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+ clib_warning ("out2in-ed key add failed");
+ }
+
+ new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+
+ /* Update IP checksum */
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
+ {
+ old_port = tcp->src_port;
+ tcp->src_port = s->out2in.port;
+ new_port = tcp->src_port;
+
+ sum = tcp->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+ sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
+ tcp->checksum = ip_csum_fold(sum);
+ }
+ else
+ {
+ udp->src_port = s->out2in.port;
+ udp->checksum = 0;
+ }
+
+ if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+ /* Accounting */
+ s->last_heard = now;
+ s->total_pkts++;
+ s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+ return s;
+}
+
+static inline uword
+snat_in2out_node_fn_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_slow_path,
+ int is_output_feature)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ f64 now = vlib_time_now (vm);
+ u32 stats_node_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
+ snat_in2out_node.index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, * ip1;
+ ip_csum_t sum0, sum1;
+ u32 new_addr0, old_addr0, new_addr1, old_addr1;
+ u16 old_port0, new_port0, old_port1, new_port1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ icmp46_header_t * icmp0, * icmp1;
+ snat_session_key_t key0, key1;
+ u32 rx_fib_index0, rx_fib_index1;
+ u32 proto0, proto1;
+ snat_session_t * s0 = 0, * s1 = 0;
+ clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
+ u32 iph_offset0 = 0, iph_offset1 = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (is_output_feature)
+ iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+
+ ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
+ iph_offset0);
+
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto0 == ~0))
+ {
+ s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_in2out_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
+ node, next0, now, thread_index, &s0);
+ goto trace00;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace00;
+ }
+ }
+
+ key0.addr = ip0->src_address;
+ key0.port = udp0->src_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (PREDICT_FALSE (clib_bihash_search_8_8 (
+ &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
+ {
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+ ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
+ goto trace00;
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, thread_index);
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace00;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace00;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value0.value == ~0ULL))
+ {
+ if (is_slow_path)
+ {
+ s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace00;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace00;
+ }
+ }
+ else
+ {
+ s0 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+ }
+
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address = s0->out2in.addr;
+ new_addr0 = ip0->src_address.as_u32;
+ if (!is_output_feature)
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->src_port;
+ tcp0->src_port = s0->out2in.port;
+ new_port0 = tcp0->src_port;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = s0->out2in.port;
+ udp0->checksum = 0;
+ }
+
+ /* Hairpinning */
+ if (!is_output_feature)
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = is_slow_path;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ if (is_output_feature)
+ iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
+
+ ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
+ iph_offset1);
+
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+ icmp1 = (icmp46_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index1);
+
+ if (PREDICT_FALSE(ip1->ttl == 1))
+ {
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace01;
+ }
+
+ proto1 = ip_proto_to_snat_proto (ip1->protocol);
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto1 == ~0))
+ {
+ s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
+ thread_index, now, vm, node);
+ if (!s1)
+ next1 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace01;
+ }
+
+ if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = icmp_in2out_slow_path
+ (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
+ next1, now, thread_index, &s1);
+ goto trace01;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace01;
+ }
+ }
+
+ key1.addr = ip1->src_address;
+ key1.port = udp1->src_port;
+ key1.protocol = proto1;
+ key1.fib_index = rx_fib_index1;
+
+ kv1.key = key1.as_u64;
+
+ if (PREDICT_FALSE(clib_bihash_search_8_8 (
+ &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
+ {
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
+ ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature)
+ goto trace01;
+
+ next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
+ &s1, node, next1, thread_index);
+ if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace01;
+ }
+ else
+ {
+ next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace01;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value1.value == ~0ULL))
+ {
+ if (is_slow_path)
+ {
+ s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
+ thread_index, now, vm, node);
+ if (!s1)
+ next1 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace01;
+ }
+ else
+ {
+ next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace01;
+ }
+ }
+ else
+ {
+ s1 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value1.value);
+ }
+ }
+
+ old_addr1 = ip1->src_address.as_u32;
+ ip1->src_address = s1->out2in.addr;
+ new_addr1 = ip1->src_address.as_u32;
+ if (!is_output_feature)
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ old_port1 = tcp1->src_port;
+ tcp1->src_port = s1->out2in.port;
+ new_port1 = tcp1->src_port;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ old_port1 = udp1->src_port;
+ udp1->src_port = s1->out2in.port;
+ udp1->checksum = 0;
+ }
+
+ /* Hairpinning */
+ if (!is_output_feature)
+ snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
+
+ /* Accounting */
+ s1->last_heard = now;
+ s1->total_pkts++;
+ s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s1))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s1->per_user_list_head_index,
+ s1->per_user_index);
+ }
+ trace01:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (s1)
+ t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 old_port0, new_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0;
+ u32 rx_fib_index0;
+ u32 proto0;
+ snat_session_t * s0 = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 iph_offset0 = 0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ if (is_output_feature)
+ iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+
+ ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
+ iph_offset0);
+
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto0 == ~0))
+ {
+ s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_in2out_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
+ goto trace0;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace0;
+ }
+ }
+
+ key0.addr = ip0->src_address;
+ key0.port = udp0->src_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
+ &kv0, &value0))
+ {
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+ ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature)
+ goto trace0;
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, thread_index);
+
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace0;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace0;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value0.value == ~0ULL))
+ {
+ if (is_slow_path)
+ {
+ s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace0;
+ }
+ }
+ else
+ {
+ s0 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+ }
+
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address = s0->out2in.addr;
+ new_addr0 = ip0->src_address.as_u32;
+ if (!is_output_feature)
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->src_port;
+ tcp0->src_port = s0->out2in.port;
+ new_port0 = tcp0->src_port;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = s0->out2in.port;
+ udp0->checksum = 0;
+ }
+
+ /* Hairpinning */
+ if (!is_output_feature)
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+
+ trace0:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = is_slow_path;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, stats_node_index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+static uword
+snat_in2out_fast_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_node) = {
+ .function = snat_in2out_fast_path_fn,
+ .name = "nat44-in2out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
+
+static uword
+snat_in2out_output_fast_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_node) = {
+ .function = snat_in2out_output_fast_path_fn,
+ .name = "nat44-in2out-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
+ snat_in2out_output_fast_path_fn);
+
+static uword
+snat_in2out_slow_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
+ .function = snat_in2out_slow_path_fn,
+ .name = "nat44-in2out-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
+ snat_in2out_slow_path_fn);
+
+static uword
+snat_in2out_output_slow_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
+ .function = snat_in2out_output_slow_path_fn,
+ .name = "nat44-in2out-output-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
+ snat_in2out_output_slow_path_fn);
+
+/**************************/
+/*** deterministic mode ***/
+/**************************/
+static uword
+snat_det_in2out_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ u32 now = (u32) vlib_time_now (vm);
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, * ip1;
+ ip_csum_t sum0, sum1;
+ ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
+ u16 old_port0, new_port0, lo_port0, i0;
+ u16 old_port1, new_port1, lo_port1, i1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ u32 proto0, proto1;
+ snat_det_out_key_t key0, key1;
+ snat_det_map_t * dm0, * dm1;
+ snat_det_session_t * ses0 = 0, * ses1 = 0;
+ u32 rx_fib_index0, rx_fib_index1;
+ icmp46_header_t * icmp0, * icmp1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+ next1 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+ icmp0 = (icmp46_header_t *) udp0;
+
+ next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, thread_index,
+ &ses0, &dm0);
+ goto trace0;
+ }
+
+ dm0 = snat_det_map_by_user(sm, &ip0->src_address);
+ if (PREDICT_FALSE(!dm0))
+ {
+ clib_warning("no match for internal host %U",
+ format_ip4_address, &ip0->src_address);
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+
+ snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
+
+ key0.ext_host_addr = ip0->dst_address;
+ key0.ext_host_port = tcp0->dst;
+
+ ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
+ if (PREDICT_FALSE(!ses0))
+ {
+ for (i0 = 0; i0 < dm0->ports_per_host; i0++)
+ {
+ key0.out_port = clib_host_to_net_u16 (lo_port0 +
+ ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
+
+ if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
+ continue;
+
+ ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
+ break;
+ }
+ if (PREDICT_FALSE(!ses0))
+ {
+ /* too many sessions for user, send ICMP error packet */
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_destination_unreachable_host,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+ }
+
+ new_port0 = ses0->out.out_port;
+
+ old_addr0.as_u32 = ip0->src_address.as_u32;
+ ip0->src_address.as_u32 = new_addr0.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp0->flags & TCP_FLAG_SYN)
+ ses0->state = SNAT_SESSION_TCP_SYN_SENT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
+ ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
+ else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
+ snat_det_ses_close(dm0, ses0);
+ else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
+ ses0->state = SNAT_SESSION_TCP_LAST_ACK;
+ else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
+ ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
+
+ old_port0 = tcp0->src;
+ tcp0->src = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ ses0->state = SNAT_SESSION_UDP_ACTIVE;
+ old_port0 = udp0->src_port;
+ udp0->src_port = new_port0;
+ udp0->checksum = 0;
+ }
+
+ switch(ses0->state)
+ {
+ case SNAT_SESSION_UDP_ACTIVE:
+ ses0->expire = now + sm->udp_timeout;
+ break;
+ case SNAT_SESSION_TCP_SYN_SENT:
+ case SNAT_SESSION_TCP_FIN_WAIT:
+ case SNAT_SESSION_TCP_CLOSE_WAIT:
+ case SNAT_SESSION_TCP_LAST_ACK:
+ ses0->expire = now + sm->tcp_transitory_timeout;
+ break;
+ case SNAT_SESSION_TCP_ESTABLISHED:
+ ses0->expire = now + sm->tcp_established_timeout;
+ break;
+ }
+
+ trace0:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = 0;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (ses0)
+ t->session_index = ses0 - dm0->sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ ip1 = vlib_buffer_get_current (b1);
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip1->ttl == 1))
+ {
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace1;
+ }
+
+ proto1 = ip_proto_to_snat_proto (ip1->protocol);
+
+ if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
+ icmp1 = (icmp46_header_t *) udp1;
+
+ next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
+ rx_fib_index1, node, next1, thread_index,
+ &ses1, &dm1);
+ goto trace1;
+ }
+
+ dm1 = snat_det_map_by_user(sm, &ip1->src_address);
+ if (PREDICT_FALSE(!dm1))
+ {
+ clib_warning("no match for internal host %U",
+ format_ip4_address, &ip0->src_address);
+ next1 = SNAT_IN2OUT_NEXT_DROP;
+ b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace1;
+ }
+
+ snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
+
+ key1.ext_host_addr = ip1->dst_address;
+ key1.ext_host_port = tcp1->dst;
+
+ ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
+ if (PREDICT_FALSE(!ses1))
+ {
+ for (i1 = 0; i1 < dm1->ports_per_host; i1++)
+ {
+ key1.out_port = clib_host_to_net_u16 (lo_port1 +
+ ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
+
+ if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
+ continue;
+
+ ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
+ break;
+ }
+ if (PREDICT_FALSE(!ses1))
+ {
+ /* too many sessions for user, send ICMP error packet */
+
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_destination_unreachable_host,
+ 0);
+ next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace1;
+ }
+ }
+
+ new_port1 = ses1->out.out_port;
+
+ old_addr1.as_u32 = ip1->src_address.as_u32;
+ ip1->src_address.as_u32 = new_addr1.as_u32;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp1->flags & TCP_FLAG_SYN)
+ ses1->state = SNAT_SESSION_TCP_SYN_SENT;
+ else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
+ ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
+ else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
+ else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
+ snat_det_ses_close(dm1, ses1);
+ else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
+ ses1->state = SNAT_SESSION_TCP_LAST_ACK;
+ else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
+ ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
+
+ old_port1 = tcp1->src;
+ tcp1->src = new_port1;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ ses1->state = SNAT_SESSION_UDP_ACTIVE;
+ old_port1 = udp1->src_port;
+ udp1->src_port = new_port1;
+ udp1->checksum = 0;
+ }
+
+ switch(ses1->state)
+ {
+ case SNAT_SESSION_UDP_ACTIVE:
+ ses1->expire = now + sm->udp_timeout;
+ break;
+ case SNAT_SESSION_TCP_SYN_SENT:
+ case SNAT_SESSION_TCP_FIN_WAIT:
+ case SNAT_SESSION_TCP_CLOSE_WAIT:
+ case SNAT_SESSION_TCP_LAST_ACK:
+ ses1->expire = now + sm->tcp_transitory_timeout;
+ break;
+ case SNAT_SESSION_TCP_ESTABLISHED:
+ ses1->expire = now + sm->tcp_established_timeout;
+ break;
+ }
+
+ trace1:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->is_slow_path = 0;
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (ses1)
+ t->session_index = ses1 - dm1->sessions;
+ }
+
+ pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ ip4_address_t new_addr0, old_addr0;
+ u16 old_port0, new_port0, lo_port0, i0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ u32 proto0;
+ snat_det_out_key_t key0;
+ snat_det_map_t * dm0;
+ snat_det_session_t * ses0 = 0;
+ u32 rx_fib_index0;
+ icmp46_header_t * icmp0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+ icmp0 = (icmp46_header_t *) udp0;
+
+ next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, thread_index,
+ &ses0, &dm0);
+ goto trace00;
+ }
+
+ dm0 = snat_det_map_by_user(sm, &ip0->src_address);
+ if (PREDICT_FALSE(!dm0))
+ {
+ clib_warning("no match for internal host %U",
+ format_ip4_address, &ip0->src_address);
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+
+ snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
+
+ key0.ext_host_addr = ip0->dst_address;
+ key0.ext_host_port = tcp0->dst;
+
+ ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
+ if (PREDICT_FALSE(!ses0))
+ {
+ for (i0 = 0; i0 < dm0->ports_per_host; i0++)
+ {
+ key0.out_port = clib_host_to_net_u16 (lo_port0 +
+ ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
+
+ if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
+ continue;
+
+ ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
+ break;
+ }
+ if (PREDICT_FALSE(!ses0))
+ {
+ /* too many sessions for user, send ICMP error packet */
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_destination_unreachable_host,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+ }
+
+ new_port0 = ses0->out.out_port;
+
+ old_addr0.as_u32 = ip0->src_address.as_u32;
+ ip0->src_address.as_u32 = new_addr0.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp0->flags & TCP_FLAG_SYN)
+ ses0->state = SNAT_SESSION_TCP_SYN_SENT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
+ ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
+ else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
+ snat_det_ses_close(dm0, ses0);
+ else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
+ ses0->state = SNAT_SESSION_TCP_LAST_ACK;
+ else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
+ ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
+
+ old_port0 = tcp0->src;
+ tcp0->src = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ ses0->state = SNAT_SESSION_UDP_ACTIVE;
+ old_port0 = udp0->src_port;
+ udp0->src_port = new_port0;
+ udp0->checksum = 0;
+ }
+
+ switch(ses0->state)
+ {
+ case SNAT_SESSION_UDP_ACTIVE:
+ ses0->expire = now + sm->udp_timeout;
+ break;
+ case SNAT_SESSION_TCP_SYN_SENT:
+ case SNAT_SESSION_TCP_FIN_WAIT:
+ case SNAT_SESSION_TCP_CLOSE_WAIT:
+ case SNAT_SESSION_TCP_LAST_ACK:
+ ses0->expire = now + sm->tcp_transitory_timeout;
+ break;
+ case SNAT_SESSION_TCP_ESTABLISHED:
+ ses0->expire = now + sm->tcp_established_timeout;
+ break;
+ }
+
+ trace00:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = 0;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (ses0)
+ t->session_index = ses0 - dm0->sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_det_in2out_node.index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_det_in2out_node) = {
+ .function = snat_det_in2out_node_fn,
+ .name = "nat44-det-in2out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = 3,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ * and create session if needed
+ *
+ * @param[in,out] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ u8 protocol;
+ snat_det_out_key_t key0;
+ u8 dont_translate = 0;
+ u32 next0 = ~0;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+ snat_det_map_t * dm0 = 0;
+ ip4_address_t new_addr0;
+ u16 lo_port0, i0;
+ snat_det_session_t * ses0 = 0;
+ ip4_address_t in_addr;
+ u16 in_port;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ protocol = SNAT_PROTOCOL_ICMP;
+ in_addr = ip0->src_address;
+ in_port = echo0->identifier;
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+ protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
+ in_addr = inner_ip0->dst_address;
+ switch (protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+ in_port = inner_echo0->identifier;
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
+ break;
+ default:
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+ }
+
+ dm0 = snat_det_map_by_user(sm, &in_addr);
+ if (PREDICT_FALSE(!dm0))
+ {
+ clib_warning("no match for internal host %U",
+ format_ip4_address, &in_addr);
+ if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
+ IP_PROTOCOL_ICMP, rx_fib_index0)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ goto out;
+ }
+
+ snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
+
+ key0.ext_host_addr = ip0->dst_address;
+ key0.ext_host_port = 0;
+
+ ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
+ if (PREDICT_FALSE(!ses0))
+ {
+ if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
+ IP_PROTOCOL_ICMP, rx_fib_index0)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ if (icmp0->type != ICMP4_echo_request)
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+ for (i0 = 0; i0 < dm0->ports_per_host; i0++)
+ {
+ key0.out_port = clib_host_to_net_u16 (lo_port0 +
+ ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
+
+ if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
+ continue;
+
+ ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
+ break;
+ }
+ if (PREDICT_FALSE(!ses0))
+ {
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+ goto out;
+ }
+ }
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
+ u32 now = (u32) vlib_time_now (sm->vlib_main);
+
+ ses0->state = SNAT_SESSION_ICMP_ACTIVE;
+ ses0->expire = now + sm->icmp_timeout;
+
+out:
+ *p_proto = protocol;
+ if (ses0)
+ {
+ p_value->addr = new_addr0;
+ p_value->fib_index = sm->outside_fib_index;
+ p_value->port = ses0->out.out_port;
+ }
+ *p_dont_translate = dont_translate;
+ if (d)
+ *(snat_det_session_t**)d = ses0;
+ if (e)
+ *(snat_det_map_t**)e = dm0;
+ return next0;
+}
+
+/**********************/
+/*** worker handoff ***/
+/**********************/
+static inline uword
+snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u8 is_output)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 fq_index;
+ u32 to_node_index;
+
+ ASSERT (vec_len (sm->workers));
+
+ if (is_output)
+ {
+ fq_index = sm->fq_in2out_output_index;
+ to_node_index = sm->in2out_output_node_index;
+ }
+ else
+ {
+ fq_index = sm->fq_in2out_index;
+ to_node_index = sm->in2out_node_index;
+ }
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
+
+ if (PREDICT_FALSE (next_worker_index != thread_index))
+ {
+ do_handoff = 1;
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (fq_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, to_node_index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, to_node_index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+static uword
+snat_in2out_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
+ .function = snat_in2out_worker_handoff_fn,
+ .name = "nat44-in2out-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
+ snat_in2out_worker_handoff_fn);
+
+static uword
+snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
+ .function = snat_in2out_output_worker_handoff_fn,
+ .name = "nat44-in2out-output-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
+ snat_in2out_output_worker_handoff_fn);
+
+static_always_inline int
+is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
+{
+ snat_address_t * ap;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_session_key_t m_key;
+
+ vec_foreach (ap, sm->addresses)
+ {
+ if (ap->addr.as_u32 == dst_addr->as_u32)
+ return 1;
+ }
+
+ m_key.addr.as_u32 = dst_addr->as_u32;
+ m_key.fib_index = sm->outside_fib_index;
+ m_key.port = 0;
+ m_key.protocol = 0;
+ kv.key = m_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ return 1;
+
+ return 0;
+}
+
+static uword
+snat_hairpin_dst_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_header_t * ip0;
+ u32 proto0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+ ip0 = vlib_buffer_get_current (b0);
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ vnet_buffer (b0)->snat.flags = 0;
+ if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
+ {
+ if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+ {
+ udp_header_t * udp0 = ip4_next_header (ip0);
+ tcp_header_t * tcp0 = (tcp_header_t *) udp0;
+
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+ }
+ else if (proto0 == SNAT_PROTOCOL_ICMP)
+ {
+ icmp46_header_t * icmp0 = ip4_next_header (ip0);
+
+ snat_icmp_hairpinning (sm, b0, ip0, icmp0);
+ }
+ else
+ {
+ snat_hairpinning_unknown_proto (sm, b0, ip0);
+ }
+
+ vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
+ clib_warning("is hairpinning");
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
+ .function = snat_hairpin_dst_fn,
+ .name = "nat44-hairpin-dst",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
+ snat_hairpin_dst_fn);
+
+static uword
+snat_hairpin_src_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t *sm = &snat_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ snat_interface_t *i;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
+
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ /* Only packets from NAT inside interface */
+ if ((i->is_inside == 1) && (sw_if_index0 == i->sw_if_index))
+ {
+ if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
+ SNAT_FLAG_HAIRPINNING))
+ {
+ if (PREDICT_TRUE (sm->num_workers > 1))
+ next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
+ else
+ next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
+ }
+ break;
+ }
+ }));
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
+ .function = snat_hairpin_src_fn,
+ .name = "nat44-hairpin-src",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+ .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
+ .next_nodes = {
+ [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
+ [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
+ [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
+ [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
+ snat_hairpin_src_fn);
+
+static uword
+snat_in2out_fast_static_map_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ u32 stats_node_index;
+
+ stats_node_index = snat_in2out_fast_node.index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 old_port0, new_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 proto0;
+ u32 rx_fib_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace0;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, ~0, 0, 0);
+ goto trace0;
+ }
+
+ key0.addr = ip0->src_address;
+ key0.protocol = proto0;
+ key0.port = udp0->src_port;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ next0= SNAT_IN2OUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_port0 != udp0->dst_port))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->src_port;
+ tcp0->src_port = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = new_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ }
+
+ /* Hairpinning */
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ trace0:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, stats_node_index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
+ .function = snat_in2out_fast_static_map_fn,
+ .name = "nat44-in2out-fast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
+ [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);
diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api
new file mode 100644
index 00000000..d7a4a9ef
--- /dev/null
+++ b/src/plugins/nat/nat.api
@@ -0,0 +1,1546 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file nat.api
+ * @brief VPP control-plane API messages.
+ *
+ * This file defines VPP control-plane API messages which are generally
+ * called through a shared memory interface.
+ */
+
+/*
+ * Old "snat" APIs, will be deprecated after 17.10
+ */
+
+/** \brief Add/del NAT44 address range
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param first_ip_address - first IP address
+ @param last_ip_address - last IP address
+ @param vrf_id - VRF id of tenant, ~0 means independent of VRF
+ @param is_add - 1 if add, 0 if delete
+*/
+autoreply define snat_add_address_range {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 first_ip_address[16];
+ u8 last_ip_address[16];
+ u32 vrf_id;
+ u8 is_add;
+};
+
+/** \brief Dump NAT44 addresses
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_address_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 address details response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param ip_address - IP address
+ @param vrf_id - VRF id of tenant, ~0 means independent of VRF
+*/
+define snat_address_details {
+ u32 context;
+ u8 is_ip4;
+ u8 ip_address[16];
+ u32 vrf_id;
+};
+
+/** \brief Enable/disable NAT44 feature on the interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+autoreply define snat_interface_add_del_feature {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump interfaces with NAT44 feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_interface_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 interface details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define snat_interface_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Enable/disbale NAT44 as an interface output feature (postrouting
+ in2out translation)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+autoreply define snat_interface_add_del_output_feature {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump interfaces with NAT44 output feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_interface_output_feature_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 interface with output feature details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define snat_interface_output_feature_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Add/delete NAT44 static mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_ip4 - 1 if address type is IPv4
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IP address
+ @param external_ip_address - external IP address
+ @param protocol - IP protocol
+ @param local_port - local port number
+ @param external_port - external port number
+ @param external_sw_if_index - external interface (if set
+ external_ip_address is ignored, ~0 means not
+ used)
+ @param vfr_id - VRF ID
+*/
+autoreply define snat_add_static_mapping {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ip4;
+ u8 addr_only;
+ u8 local_ip_address[16];
+ u8 external_ip_address[16];
+ u8 protocol;
+ u16 local_port;
+ u16 external_port;
+ u32 external_sw_if_index;
+ u32 vrf_id;
+};
+
+/** \brief Dump NAT44 static mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_static_mapping_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 static mapping details response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IP address
+ @param external_ip_address - external IP address
+ @param protocol - IP protocol
+ @param local_port - local port number
+ @param external_port - external port number
+ @param external_sw_if_index - external interface
+ @param vfr_id - VRF ID
+*/
+define snat_static_mapping_details {
+ u32 context;
+ u8 is_ip4;
+ u8 addr_only;
+ u8 local_ip_address[16];
+ u8 external_ip_address[16];
+ u8 protocol;
+ u16 local_port;
+ u16 external_port;
+ u32 external_sw_if_index;
+ u32 vrf_id;
+};
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define snat_control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+/** \brief Show NAT plugin startup config
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_show_config
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show NAT plugin startup config reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param static_mapping_only - if 1 dynamic translations disabled
+ @param static_mapping_connection_tracking - if 1 create session data
+ @param deterministic - if 1 deterministic mapping
+ @param translation_buckets - number of translation hash buckets
+ @param translation_memory_size - translation hash memory size
+ @param user_buckets - number of user hash buckets
+ @param user_memory_size - user hash memory size
+ @param max_translations_per_user - maximum number of translations per user
+ @param outside_vrf_id - outside VRF id
+ @param inside_vrf_id - default inside VRF id
+*/
+define snat_show_config_reply
+{
+ u32 context;
+ i32 retval;
+ u8 static_mapping_only;
+ u8 static_mapping_connection_tracking;
+ u8 deterministic;
+ u32 translation_buckets;
+ u32 translation_memory_size;
+ u32 user_buckets;
+ u32 user_memory_size;
+ u32 max_translations_per_user;
+ u32 outside_vrf_id;
+ u32 inside_vrf_id;
+};
+
+/** \brief Set NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param worker_mask - NAT workers mask
+*/
+autoreply define snat_set_workers {
+ u32 client_index;
+ u32 context;
+ u64 worker_mask;
+};
+
+/** \brief Dump NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_worker_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT workers details response
+ @param context - sender context, to match reply w/ request
+ @param worker_index - worker index
+ @param lcore_id - lcore ID
+ @param name - worker name
+*/
+define snat_worker_details {
+ u32 context;
+ u32 worker_index;
+ u32 lcore_id;
+ u8 name[64];
+};
+
+/** \brief Add/delete NAT44 pool address from specific interfce
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param sw_if_index - software index of the interface
+*/
+autoreply define snat_add_del_interface_addr {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump NAT44 pool addresses interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_interface_addr_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 pool addresses interfaces details response
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define snat_interface_addr_details {
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Enable/disable NAT IPFIX logging
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param domain_id - observation domain ID
+ @param src_port - source port number
+ @param enable - 1 if enable, 0 if disable
+*/
+autoreply define snat_ipfix_enable_disable {
+ u32 client_index;
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+ u8 enable;
+};
+
+/** \brief Dump NAT44 users
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_user_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 users response
+ @param context - sender context, to match reply w/ request
+ @vrf_id - VRF ID
+ @param is_ip4 - 1 if address type is IPv4
+ @param ip_adress - IP address
+ @param nsessions - number of dynamic sessions
+ @param nstaticsessions - number of static sessions
+*/
+define snat_user_details {
+ u32 context;
+ u32 vrf_id;
+ u8 is_ip4;
+ u8 ip_address[16];
+ u32 nsessions;
+ u32 nstaticsessions;
+};
+
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param user_ip - IP address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define snat_user_session_dump {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 ip_address[16];
+ u32 vrf_id;
+};
+
+/** \brief NAT44 user's sessions response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param outside_ip_address - outside IP address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IP address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param is_static - 1 if session is static
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+*/
+define snat_user_session_details {
+ u32 context;
+ u8 is_ip4;
+ u8 outside_ip_address[16];
+ u16 outside_port;
+ u8 inside_ip_address[16];
+ u16 inside_port;
+ u16 protocol;
+ u8 is_static;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+};
+
+/** \brief Add/delete NAT deterministic mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_addr - inside IP address
+ @param in_plen - inside IP address prefix length
+ @param out_addr - outside IP address
+ @param out_addr - outside IP address prefix length
+*/
+autoreply define snat_add_det_map {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ip4;
+ u8 addr_only;
+ u8 in_addr[16];
+ u8 in_plen;
+ u8 out_addr[16];
+ u8 out_plen;
+};
+
+/** \brief Get outside address and port range from inside address
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_addr - inside IP address
+*/
+define snat_det_forward {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 in_addr[16];
+};
+
+/** \brief Get outside address and port range from inside address
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param out_port_lo - outside port range start
+ @param out_port_hi - outside port range end
+ @param is_ip4 - 1 if address type is IPv4
+ @param out_addr - outside IP address
+*/
+define snat_det_forward_reply {
+ u32 context;
+ i32 retval;
+ u16 out_port_lo;
+ u16 out_port_hi;
+ u8 is_ip4;
+ u8 out_addr[16];
+};
+
+/** \brief Get inside address from outside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param out_port - outside port
+ @param is_ip4 - 1 if address type is IPv4
+ @param out_addr - outside IP address
+*/
+define snat_det_reverse {
+ u32 client_index;
+ u32 context;
+ u16 out_port;
+ u8 is_ip4;
+ u8 out_addr[16];
+};
+
+/** \brief Get inside address from outside address and port reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_addr - inside IP address
+*/
+define snat_det_reverse_reply {
+ u32 context;
+ i32 retval;
+ u8 is_ip4;
+ u8 in_addr[16];
+};
+
+/** \brief Dump NAT deterministic mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_det_map_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT users response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_addr - inside IP address
+ @param in_plen - inside IP address prefix length
+ @param out_addr - outside IP address
+ @param out_plen - outside IP address prefix length
+ @param sharing_ratio - outside to inside address sharing ratio
+ @param ports_per_host - number of ports available to a host
+ @param ses_num - number of sessions belonging to this mapping
+*/
+define snat_det_map_details {
+ u32 context;
+ u8 is_ip4;
+ u8 in_addr[16];
+ u8 in_plen;
+ u8 out_addr[16];
+ u8 out_plen;
+ u32 sharing_ratio;
+ u16 ports_per_host;
+ u32 ses_num;
+};
+
+/** \brief Set values of timeouts for deterministic NAT (seconds, 0 = default)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
+*/
+autoreply define snat_det_set_timeouts {
+ u32 client_index;
+ u32 context;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
+};
+
+/** \brief Get values of timeouts for deterministic NAT (seconds)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_det_get_timeouts {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Get values of timeouts for deterministic NAT reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
+*/
+define snat_det_get_timeouts_reply {
+ u32 context;
+ i32 retval;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
+};
+
+/** \brief Close deterministic NAT session by outside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param out_addr - outside IP address
+ @param out_port - outside port
+ @param ext_addr - external host address
+ @param ext_port - external host port
+*/
+autoreply define snat_det_close_session_out {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 out_addr[16];
+ u16 out_port;
+ u8 ext_addr[16];
+ u16 ext_port;
+};
+
+/** \brief Close deterministic NAT session by inside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_addr - inside IP address
+ @param in_port - inside port
+ @param ext_addr - external host address
+ @param ext_port - external host port
+*/
+autoreply define snat_det_close_session_in {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 in_addr[16];
+ u16 in_port;
+ u8 ext_addr[16];
+ u16 ext_port;
+};
+
+/** \brief Dump determinstic NAT sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param user_addr - address of an inside user whose sessions to dump
+*/
+define snat_det_session_dump {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 user_addr[16];
+};
+
+/** \brief Deterministic NAT sessions reply
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param in_port - inside port
+ @param ext_addr - external host address
+ @param ext_port - external host port
+ @param out_port - outside NAT port
+ @param state - session state
+ @param expire - session expiration timestamp
+*/
+define snat_det_session_details {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u16 in_port;
+ u8 ext_addr[16];
+ u16 ext_port;
+ u16 out_port;
+ u8 state;
+ u32 expire;
+};
+
+/*
+ * Common NAT plugin APIs
+ */
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat_control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define nat_control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+/** \brief Show NAT plugin startup config
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat_show_config
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show NAT plugin startup config reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param static_mapping_only - if 1 dynamic translations disabled
+ @param static_mapping_connection_tracking - if 1 create session data
+ @param deterministic - if 1 deterministic mapping
+ @param translation_buckets - number of translation hash buckets
+ @param translation_memory_size - translation hash memory size
+ @param user_buckets - number of user hash buckets
+ @param user_memory_size - user hash memory size
+ @param max_translations_per_user - maximum number of translations per user
+ @param outside_vrf_id - outside VRF id
+ @param inside_vrf_id - default inside VRF id
+*/
+define nat_show_config_reply
+{
+ u32 context;
+ i32 retval;
+ u8 static_mapping_only;
+ u8 static_mapping_connection_tracking;
+ u8 deterministic;
+ u32 translation_buckets;
+ u32 translation_memory_size;
+ u32 user_buckets;
+ u32 user_memory_size;
+ u32 max_translations_per_user;
+ u32 outside_vrf_id;
+ u32 inside_vrf_id;
+};
+
+/** \brief Set NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param worker_mask - NAT workers mask
+*/
+autoreply define nat_set_workers {
+ u32 client_index;
+ u32 context;
+ u64 worker_mask;
+};
+
+/** \brief Dump NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat_worker_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT workers details response
+ @param context - sender context, to match reply w/ request
+ @param worker_index - worker index
+ @param lcore_id - lcore ID
+ @param name - worker name
+*/
+define nat_worker_details {
+ u32 context;
+ u32 worker_index;
+ u32 lcore_id;
+ u8 name[64];
+};
+
+/** \brief Enable/disable NAT IPFIX logging
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param domain_id - observation domain ID
+ @param src_port - source port number
+ @param enable - 1 if enable, 0 if disable
+*/
+autoreply define nat_ipfix_enable_disable {
+ u32 client_index;
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+ u8 enable;
+};
+
+/*
+ * NAT44 APIs
+ */
+
+/** \brief Add/del NAT44 address range
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param first_ip_address - first IPv4 address
+ @param last_ip_address - last IPv4 address
+ @param vrf_id - VRF id of tenant, ~0 means independent of VRF
+ @param is_add - 1 if add, 0 if delete
+*/
+autoreply define nat44_add_del_address_range {
+ u32 client_index;
+ u32 context;
+ u8 first_ip_address[4];
+ u8 last_ip_address[4];
+ u32 vrf_id;
+ u8 is_add;
+};
+
+/** \brief Dump NAT44 addresses
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_address_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 address details response
+ @param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address
+ @param vrf_id - VRF id of tenant, ~0 means independent of VRF
+*/
+define nat44_address_details {
+ u32 context;
+ u8 ip_address[4];
+ u32 vrf_id;
+};
+
+/** \brief Enable/disable NAT44 feature on the interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+autoreply define nat44_interface_add_del_feature {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump interfaces with NAT44 feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_interface_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 interface details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define nat44_interface_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Enable/disbale NAT44 as an interface output feature (postrouting
+ in2out translation)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+autoreply define nat44_interface_add_del_output_feature {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump interfaces with NAT44 output feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_interface_output_feature_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 interface with output feature details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define nat44_interface_output_feature_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Add/delete NAT44 static mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IPv4 address
+ @param external_ip_address - external IPv4 address
+ @param protocol - IP protocol
+ @param local_port - local port number
+ @param external_port - external port number
+ @param external_sw_if_index - external interface (if set
+ external_ip_address is ignored, ~0 means not
+ used)
+ @param vfr_id - VRF ID
+*/
+autoreply define nat44_add_del_static_mapping {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 addr_only;
+ u8 local_ip_address[4];
+ u8 external_ip_address[4];
+ u8 protocol;
+ u16 local_port;
+ u16 external_port;
+ u32 external_sw_if_index;
+ u32 vrf_id;
+};
+
+/** \brief Dump NAT44 static mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_static_mapping_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 static mapping details response
+ @param context - sender context, to match reply w/ request
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IPv4 address
+ @param external_ip_address - external IPv4 address
+ @param protocol - IP protocol
+ @param local_port - local port number
+ @param external_port - external port number
+ @param external_sw_if_index - external interface
+ @param vfr_id - VRF ID
+*/
+define nat44_static_mapping_details {
+ u32 context;
+ u8 addr_only;
+ u8 local_ip_address[4];
+ u8 external_ip_address[4];
+ u8 protocol;
+ u16 local_port;
+ u16 external_port;
+ u32 external_sw_if_index;
+ u32 vrf_id;
+};
+
+/** \brief Add/delete NAT44 pool address from specific interfce
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param sw_if_index - software index of the interface
+*/
+autoreply define nat44_add_del_interface_addr {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Dump NAT44 pool addresses interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_interface_addr_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 pool addresses interfaces details response
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define nat44_interface_addr_details {
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Dump NAT44 users
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat44_user_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT44 users response
+ @param context - sender context, to match reply w/ request
+ @vrf_id - VRF ID
+ @param ip_adress - IPv4 address
+ @param nsessions - number of dynamic sessions
+ @param nstaticsessions - number of static sessions
+*/
+define nat44_user_details {
+ u32 context;
+ u32 vrf_id;
+ u8 ip_address[4];
+ u32 nsessions;
+ u32 nstaticsessions;
+};
+
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param user_ip - IP address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define nat44_user_session_dump {
+ u32 client_index;
+ u32 context;
+ u8 ip_address[4];
+ u32 vrf_id;
+};
+
+/** \brief NAT44 user's sessions response
+ @param context - sender context, to match reply w/ request
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param is_static - 1 if session is static
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+*/
+define nat44_user_session_details {
+ u32 context;
+ u8 outside_ip_address[4];
+ u16 outside_port;
+ u8 inside_ip_address[4];
+ u16 inside_port;
+ u16 protocol;
+ u8 is_static;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+};
+
+typeonly manual_endian define nat44_lb_addr_port {
+ u8 addr[4];
+ u16 port;
+ u8 probability;
+};
+
+autoreply manual_endian define nat44_add_del_lb_static_mapping {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 external_addr[4];
+ u16 external_port;
+ u8 protocol;
+ u32 vrf_id;
+ u8 local_num;
+ vl_api_nat44_lb_addr_port_t locals[local_num];
+};
+
+define nat44_lb_static_mapping_dump {
+ u32 client_index;
+ u32 context;
+};
+
+manual_endian define nat44_lb_static_mapping_details {
+ u32 context;
+ u8 external_addr[4];
+ u16 external_port;
+ u8 protocol;
+ u32 vrf_id;
+ u8 local_num;
+ vl_api_nat44_lb_addr_port_t locals[local_num];
+};
+
+/*
+ * Deterministic NAT (CGN) APIs
+ */
+
+/** \brief Add/delete NAT deterministic mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_nat44 - 1 if NAT44
+ @param in_addr - inside IP address
+ @param in_plen - inside IP address prefix length
+ @param out_addr - outside IPv4 address
+ @param out_addr - outside IPv4 address prefix length
+*/
+autoreply define nat_det_add_del_map {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_nat44;
+ u8 addr_only;
+ u8 in_addr[16];
+ u8 in_plen;
+ u8 out_addr[4];
+ u8 out_plen;
+};
+
+/** \brief Get outside address and port range from inside address
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_nat44 - 1 if NAT44
+ @param in_addr - inside IP address
+*/
+define nat_det_forward {
+ u32 client_index;
+ u32 context;
+ u8 is_nat44;
+ u8 in_addr[16];
+};
+
+/** \brief Get outside address and port range from inside address
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param out_port_lo - outside port range start
+ @param out_port_hi - outside port range end
+ @param out_addr - outside IPv4 address
+*/
+define nat_det_forward_reply {
+ u32 context;
+ i32 retval;
+ u16 out_port_lo;
+ u16 out_port_hi;
+ u8 out_addr[4];
+};
+
+/** \brief Get inside address from outside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param out_port - outside port
+ @param out_addr - outside IPv4 address
+*/
+define nat_det_reverse {
+ u32 client_index;
+ u32 context;
+ u16 out_port;
+ u8 out_addr[4];
+};
+
+/** \brief Get inside address from outside address and port reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param is_nat44 - 1 if NAT44
+ @param in_addr - inside IP address
+*/
+define nat_det_reverse_reply {
+ u32 context;
+ i32 retval;
+ u8 is_nat44;
+ u8 in_addr[16];
+};
+
+/** \brief Dump NAT deterministic mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat_det_map_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT users response
+ @param context - sender context, to match reply w/ request
+ @param is_nat44 - 1 if NAT44
+ @param in_addr - inside IP address
+ @param in_plen - inside IP address prefix length
+ @param out_addr - outside IPv4 address
+ @param out_plen - outside IPv4 address prefix length
+ @param sharing_ratio - outside to inside address sharing ratio
+ @param ports_per_host - number of ports available to a host
+ @param ses_num - number of sessions belonging to this mapping
+*/
+define nat_det_map_details {
+ u32 context;
+ u8 is_nat44;
+ u8 in_addr[16];
+ u8 in_plen;
+ u8 out_addr[4];
+ u8 out_plen;
+ u32 sharing_ratio;
+ u16 ports_per_host;
+ u32 ses_num;
+};
+
+/** \brief Set values of timeouts for deterministic NAT (seconds, 0 = default)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
+*/
+autoreply define nat_det_set_timeouts {
+ u32 client_index;
+ u32 context;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
+};
+
+/** \brief Get values of timeouts for deterministic NAT (seconds)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat_det_get_timeouts {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Get values of timeouts for deterministic NAT reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
+*/
+define nat_det_get_timeouts_reply {
+ u32 context;
+ i32 retval;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
+};
+
+/** \brief Close deterministic NAT session by outside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param out_addr - outside IPv4 address
+ @param out_port - outside port
+ @param ext_addr - external host IPv4 address
+ @param ext_port - external host port
+*/
+autoreply define nat_det_close_session_out {
+ u32 client_index;
+ u32 context;
+ u8 out_addr[4];
+ u16 out_port;
+ u8 ext_addr[4];
+ u16 ext_port;
+};
+
+/** \brief Close deterministic NAT session by inside address and port
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_nat44 - 1 if NAT44
+ @param in_addr - inside IP address
+ @param in_port - inside port
+ @param ext_addr - external host IP address
+ @param ext_port - external host port
+*/
+autoreply define nat_det_close_session_in {
+ u32 client_index;
+ u32 context;
+ u8 is_nat44;
+ u8 in_addr[16];
+ u16 in_port;
+ u8 ext_addr[16];
+ u16 ext_port;
+};
+
+/** \brief Dump determinstic NAT sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_nat44 - 1 if NAT44
+ @param user_addr - address of an inside user whose sessions to dump
+*/
+define nat_det_session_dump {
+ u32 client_index;
+ u32 context;
+ u8 is_nat44;
+ u8 user_addr[16];
+};
+
+/** \brief Deterministic NAT sessions reply
+ @param context - sender context, to match reply w/ request
+ @param in_port - inside port
+ @param ext_addr - external host address
+ @param ext_port - external host port
+ @param out_port - outside NAT port
+ @param state - session state
+ @param expire - session expiration timestamp
+*/
+define nat_det_session_details {
+ u32 client_index;
+ u32 context;
+ u16 in_port;
+ u8 ext_addr[4];
+ u16 ext_port;
+ u16 out_port;
+ u8 state;
+ u32 expire;
+};
+
+/*
+ * NAT64 APIs
+ */
+
+/** \brief Add/delete address range to NAT64 pool
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param start_addr - start address of the range
+ @param end_addr - end address of the range
+ @param vrf_id - VRF id of tenant, ~0 means independent of VRF
+ @param is_add - 1 if add, 0 if delete
+*/
+autoreply define nat64_add_del_pool_addr_range {
+ u32 client_index;
+ u32 context;
+ u8 start_addr[4];
+ u8 end_addr[4];
+ u32 vrf_id;
+ u8 is_add;
+};
+
+/** \brief Dump NAT64 pool addresses
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat64_pool_addr_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT64 pool address details response
+ @param context - sender context, to match reply w/ request
+ @param address - IPv4 address
+ @param vfr_id - VRF id of tenant, ~0 means independent of VRF
+*/
+define nat64_pool_addr_details {
+ u32 context;
+ u8 address[4];
+ u32 vrf_id;
+};
+
+/** \brief Enable/disable NAT64 feature on the interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface
+ @param is_inside - 1 if inside, 0 if outside
+ @param is_add - 1 if add, 0 if delete
+*/
+autoreply define nat64_add_del_interface {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_inside;
+ u8 is_add;
+};
+
+/** \brief Dump interfaces with NAT64 feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat64_interface_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief NAT64 interface details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - index of the interface
+*/
+define nat64_interface_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Add/delete NAT64 static BIB entry
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param i_addr - inside IPv6 address
+ @param o_addr - outside IPv4 address
+ @param i_port - inside port number
+ @param o_port - outside port number
+ @param vrf_id - VRF id of tenant
+ @param proto - protocol number
+ @param is_add - 1 if add, 0 if delete
+*/
+ autoreply define nat64_add_del_static_bib {
+ u32 client_index;
+ u32 context;
+ u8 i_addr[16];
+ u8 o_addr[4];
+ u16 i_port;
+ u16 o_port;
+ u32 vrf_id;
+ u8 proto;
+ u8 is_add;
+};
+
+/** \brief Dump NAT64 BIB
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param proto - protocol of the BIB: 255 - all BIBs
+ 6 - TCP BIB
+ 17 - UDP BIB
+ 1/58 - ICMP BIB
+ otherwise - "unknown" protocol BIB
+*/
+define nat64_bib_dump {
+ u32 client_index;
+ u32 context;
+ u8 proto;
+};
+
+/** \brief NAT64 BIB details response
+ @param context - sender context, to match reply w/ request
+ @param i_addr - inside IPv6 address
+ @param o_addr - outside IPv4 address
+ @param i_port - inside port number
+ @param o_port - outside port number
+ @param vrf_id - VRF id of tenant
+ @param proto - protocol number
+ @param is_static - 1 if static BIB entry, 0 if dynamic
+ @param ses_num - number of sessions associated with the BIB entry
+*/
+define nat64_bib_details {
+ u32 context;
+ u8 i_addr[16];
+ u8 o_addr[4];
+ u16 i_port;
+ u16 o_port;
+ u32 vrf_id;
+ u8 proto;
+ u8 is_static;
+ u32 ses_num;
+};
+
+/** \brief Set values of timeouts for NAT64 (seconds, 0 = default)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param udp - UDP timeout (default 300sec)
+ @param icmp - ICMP timeout (default 60sec)
+ @param tcp_trans - TCP transitory timeout (default 240sec)
+ @param tcp_est - TCP established timeout (default 7440sec)
+ @param tcp_incoming_syn - TCP incoming SYN timeout (default 6sec)
+*/
+autoreply define nat64_set_timeouts {
+ u32 client_index;
+ u32 context;
+ u32 udp;
+ u32 icmp;
+ u32 tcp_trans;
+ u32 tcp_est;
+ u32 tcp_incoming_syn;
+};
+
+/** \brief Get values of timeouts for NAT64 (seconds)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat64_get_timeouts {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Get values of timeouts for NAT64 reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param udp - UDP timeout
+ @param icmp - ICMP timeout
+ @param tcp_trans - TCP transitory timeout
+ @param tcp_est - TCP established timeout
+ @param tcp_incoming_syn - TCP incoming SYN timeout
+*/
+define nat64_get_timeouts_reply {
+ u32 context;
+ i32 retval;
+ u32 udp;
+ u32 icmp;
+ u32 tcp_trans;
+ u32 tcp_est;
+ u32 tcp_incoming_syn;
+};
+
+/** \brief Dump NAT64 session table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param proto - protocol of the session table: 255 - all STs
+ 6 - TCP ST
+ 17 - UDP ST
+ 1/58 - ICMP ST
+ otherwise - "unknown" proto ST
+*/
+define nat64_st_dump {
+ u32 client_index;
+ u32 context;
+ u8 proto;
+};
+
+/** \brief NAT64 session table details response
+ @param context - sender context, to match reply w/ request
+ @param il_addr - inside IPv6 address of the local host
+ @param ol_addr - outside IPv4 address of the local host
+ @param il_port - inside port number id of the local host/inside ICMP id
+ @param ol_port - outside port number of the local host/outside ICMP id
+ @param il_addr - inside IPv6 address of the remote host
+ @param ol_addr - outside IPv4 address of the remote host
+ @param l_port - port number of the remote host (not used for ICMP)
+ @param vrf_id - VRF id of tenant
+ @param proto - protocol number
+*/
+define nat64_st_details {
+ u32 context;
+ u8 il_addr[16];
+ u8 ol_addr[4];
+ u16 il_port;
+ u16 ol_port;
+ u8 ir_addr[16];
+ u8 or_addr[4];
+ u16 r_port;
+ u32 vrf_id;
+ u8 proto;
+};
+
+/** \brief Add/del NAT64 prefix
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param prefix - NAT64 prefix
+ @param prefix - NAT64 prefix length
+ @param vrf_id - VRF id of tenant
+ @param is_add - 1 if add, 0 if delete
+*/
+autoreply define nat64_add_del_prefix {
+ u32 client_index;
+ u32 context;
+ u8 prefix[16];
+ u8 prefix_len;
+ u32 vrf_id;
+ u8 is_add;
+};
+
+/** \brief Dump NAT64 prefix
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define nat64_prefix_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Dump NAT64 prefix details response
+ @param context - sender context, to match reply w/ request
+ @param prefix - NAT64 prefix
+ @param prefix - NAT64 prefix length
+ @param vrf_id - VRF id of tenant
+*/
+define nat64_prefix_details {
+ u32 context;
+ u8 prefix[16];
+ u8 prefix_len;
+ u32 vrf_id;
+};
diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c
new file mode 100644
index 00000000..9bdb0351
--- /dev/null
+++ b/src/plugins/nat/nat.c
@@ -0,0 +1,3229 @@
+/*
+ * snat.c - simple nat plugin
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/plugin/plugin.h>
+#include <nat/nat.h>
+#include <nat/nat_ipfix_logging.h>
+#include <nat/nat_det.h>
+#include <nat/nat64.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+
+#include <vpp/app/version.h>
+
+snat_main_t snat_main;
+
+
+/* Hook up input features */
+VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-in2out",
+ .runs_before = VNET_FEATURES ("nat44-out2in"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-out2in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-det-in2out",
+ .runs_before = VNET_FEATURES ("nat44-det-out2in"),
+};
+VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-det-out2in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-in2out-worker-handoff",
+ .runs_before = VNET_FEATURES ("nat44-out2in-worker-handoff"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-out2in-worker-handoff",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-in2out-fast",
+ .runs_before = VNET_FEATURES ("nat44-out2in-fast"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-out2in-fast",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-hairpin-dst",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+/* Hook up output features */
+VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "nat44-in2out-output",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "nat44-in2out-output-worker-handoff",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "nat44-hairpin-src",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Network Address Translation",
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Add/del NAT address to FIB.
+ *
+ * Add the external NAT address to the FIB as receive entries. This ensures
+ * that VPP will reply to ARP for this address and we don't need to enable
+ * proxy ARP on the outside interface.
+ *
+ * @param addr IPv4 address.
+ * @param plen address prefix length
+ * @param sw_if_index Interface.
+ * @param is_add If 0 delete, otherwise add.
+ */
+void
+snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
+ int is_add)
+{
+ fib_prefix_t prefix = {
+ .fp_len = p_len,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = addr->as_u32,
+ },
+ };
+ u32 fib_index = ip4_fib_table_get_index_for_sw_if_index(sw_if_index);
+
+ if (is_add)
+ fib_table_entry_update_one_path(fib_index,
+ &prefix,
+ FIB_SOURCE_PLUGIN_HI,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ DPO_PROTO_IP4,
+ NULL,
+ sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ else
+ fib_table_entry_delete(fib_index,
+ &prefix,
+ FIB_SOURCE_PLUGIN_HI);
+}
+
+void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
+{
+ snat_address_t * ap;
+ snat_interface_t *i;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ if (vrf_id != ~0)
+ sm->vrf_mode = 1;
+
+ /* Check if address already exists */
+ vec_foreach (ap, sm->addresses)
+ {
+ if (ap->addr.as_u32 == addr->as_u32)
+ return;
+ }
+
+ vec_add2 (sm->addresses, ap, 1);
+ ap->addr = *addr;
+ if (vrf_id != ~0)
+ ap->fib_index =
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+ else
+ ap->fib_index = ~0;
+#define _(N, i, n, s) \
+ clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); \
+ ap->busy_##n##_ports = 0; \
+ vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
+ foreach_snat_protocol
+#undef _
+
+ /* Add external address to FIB */
+ pool_foreach (i, sm->interfaces,
+ ({
+ if (i->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
+ break;
+ }));
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ if (i->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
+ break;
+ }));
+}
+
+static int is_snat_address_used_in_static_mapping (snat_main_t *sm,
+ ip4_address_t addr)
+{
+ snat_static_mapping_t *m;
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (m->external_addr.as_u32 == addr.as_u32)
+ return 1;
+ }));
+
+ return 0;
+}
+
+void increment_v4_address (ip4_address_t * a)
+{
+ u32 v;
+
+ v = clib_net_to_host_u32(a->as_u32) + 1;
+ a->as_u32 = clib_host_to_net_u32(v);
+}
+
+static void
+snat_add_static_mapping_when_resolved (snat_main_t * sm,
+ ip4_address_t l_addr,
+ u16 l_port,
+ u32 sw_if_index,
+ u16 e_port,
+ u32 vrf_id,
+ snat_protocol_t proto,
+ int addr_only,
+ int is_add)
+{
+ snat_static_map_resolve_t *rp;
+
+ vec_add2 (sm->to_resolve, rp, 1);
+ rp->l_addr.as_u32 = l_addr.as_u32;
+ rp->l_port = l_port;
+ rp->sw_if_index = sw_if_index;
+ rp->e_port = e_port;
+ rp->vrf_id = vrf_id;
+ rp->proto = proto;
+ rp->addr_only = addr_only;
+ rp->is_add = is_add;
+}
+
+/**
+ * @brief Add static mapping.
+ *
+ * Create static mapping between local addr+port and external addr+port.
+ *
+ * @param l_addr Local IPv4 address.
+ * @param e_addr External IPv4 address.
+ * @param l_port Local port number.
+ * @param e_port External port number.
+ * @param vrf_id VRF ID.
+ * @param addr_only If 0 address port and pair mapping, otherwise address only.
+ * @param sw_if_index External port instead of specific IP address.
+ * @param is_add If 0 delete static mapping, otherwise add.
+ *
+ * @returns
+ */
+int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
+ u32 sw_if_index, snat_protocol_t proto, int is_add)
+{
+ snat_main_t * sm = &snat_main;
+ snat_static_mapping_t *m;
+ snat_session_key_t m_key;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_address_t *a = 0;
+ u32 fib_index = ~0;
+ uword * p;
+ snat_interface_t *interface;
+ int i;
+
+ /* If the external address is a specific interface address */
+ if (sw_if_index != ~0)
+ {
+ ip4_address_t * first_int_addr;
+
+ /* Might be already set... */
+ first_int_addr = ip4_interface_first_address
+ (sm->ip4_main, sw_if_index, 0 /* just want the address*/);
+
+ /* DHCP resolution required? */
+ if (first_int_addr == 0)
+ {
+ snat_add_static_mapping_when_resolved
+ (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
+ addr_only, is_add);
+ return 0;
+ }
+ else
+ e_addr.as_u32 = first_int_addr->as_u32;
+ }
+
+ m_key.addr = e_addr;
+ m_key.port = addr_only ? 0 : e_port;
+ m_key.protocol = addr_only ? 0 : proto;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = 0;
+ else
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ if (is_add)
+ {
+ if (m)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ /* Convert VRF id to FIB index */
+ if (vrf_id != ~0)
+ {
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ fib_index = p[0];
+ }
+ /* If not specified use inside VRF id from SNAT plugin startup config */
+ else
+ {
+ fib_index = sm->inside_fib_index;
+ vrf_id = sm->inside_vrf_id;
+ }
+
+ /* Find external address in allocated addresses and reserve port for
+ address and port pair mapping when dynamic translations enabled */
+ if (!addr_only && !(sm->static_mapping_only))
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ /* External port must be unused */
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \
+ return VNET_API_ERROR_INVALID_VALUE; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
+ if (e_port > 1024) \
+ { \
+ a->busy_##n##_ports++; \
+ a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown_protocol");
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ break;
+ }
+ }
+ /* External address must be allocated */
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ pool_get (sm->static_mappings, m);
+ memset (m, 0, sizeof (*m));
+ m->local_addr = l_addr;
+ m->external_addr = e_addr;
+ m->addr_only = addr_only;
+ m->vrf_id = vrf_id;
+ m->fib_index = fib_index;
+ if (!addr_only)
+ {
+ m->local_port = l_port;
+ m->external_port = e_port;
+ m->proto = proto;
+ }
+
+ m_key.addr = m->local_addr;
+ m_key.port = m->local_port;
+ m_key.protocol = m->proto;
+ m_key.fib_index = m->fib_index;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
+
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1);
+
+ if (sm->workers)
+ {
+ ip4_header_t ip = {
+ .src_address = m->local_addr,
+ };
+ m->worker_index = sm->worker_in2out_cb (&ip, m->fib_index);
+ }
+ }
+ else
+ {
+ if (!m)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Free external address port */
+ if (!addr_only && !(sm->static_mapping_only))
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
+ if (e_port > 1024) \
+ { \
+ a->busy_##n##_ports--; \
+ a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown_protocol");
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ break;
+ }
+ }
+ }
+
+ m_key.addr = m->local_addr;
+ m_key.port = m->local_port;
+ m_key.protocol = m->proto;
+ m_key.fib_index = m->fib_index;
+ kv.key = m_key.as_u64;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0);
+
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0);
+
+ /* Delete session(s) for static mapping if exist */
+ if (!(sm->static_mapping_only) ||
+ (sm->static_mapping_only && sm->static_mapping_connection_tracking))
+ {
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t * head, * elt;
+ u32 elt_index, head_index, del_elt_index;
+ u32 ses_index;
+ u64 user_index;
+ snat_session_t * s;
+ snat_main_per_thread_data_t *tsm;
+
+ u_key.addr = m->local_addr;
+ u_key.fib_index = m->fib_index;
+ kv.key = u_key.as_u64;
+ if (sm->num_workers > 1)
+ tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+ if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ user_index = value.value;
+ u = pool_elt_at_index (tsm->users, user_index);
+ if (u->nstaticsessions)
+ {
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tsm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+ while (ses_index != ~0)
+ {
+ s = pool_elt_at_index (tsm->sessions, ses_index);
+ del_elt_index = elt_index;
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+
+ if (!addr_only)
+ {
+ if ((s->out2in.addr.as_u32 != e_addr.as_u32) &&
+ (clib_net_to_host_u16 (s->out2in.port) != e_port))
+ continue;
+ }
+
+ if (snat_is_unk_proto_session (s))
+ {
+ clib_bihash_kv_16_8_t up_kv;
+ nat_ed_ses_key_t up_key;
+ up_key.l_addr = s->in2out.addr;
+ up_key.r_addr = s->ext_host_addr;
+ up_key.fib_index = s->in2out.fib_index;
+ up_key.proto = s->in2out.port;
+ up_key.rsvd = 0;
+ up_key.l_port = 0;
+ up_kv.key[0] = up_key.as_u64[0];
+ up_kv.key[1] = up_key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed,
+ &up_kv, 0))
+ clib_warning ("in2out key del failed");
+
+ up_key.l_addr = s->out2in.addr;
+ up_key.fib_index = s->out2in.fib_index;
+ up_kv.key[0] = up_key.as_u64[0];
+ up_kv.key[1] = up_key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed,
+ &up_kv, 0))
+ clib_warning ("out2in key del failed");
+
+ goto delete;
+ }
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
+ s->out2in.addr.as_u32,
+ s->in2out.protocol,
+ s->in2out.port,
+ s->out2in.port,
+ s->in2out.fib_index);
+
+ value.key = s->in2out.as_u64;
+ if (clib_bihash_add_del_8_8 (&tsm->in2out, &value, 0))
+ clib_warning ("in2out key del failed");
+ value.key = s->out2in.as_u64;
+ if (clib_bihash_add_del_8_8 (&tsm->out2in, &value, 0))
+ clib_warning ("out2in key del failed");
+delete:
+ pool_put (tsm->sessions, s);
+
+ clib_dlist_remove (tsm->list_pool, del_elt_index);
+ pool_put_index (tsm->list_pool, del_elt_index);
+ u->nstaticsessions--;
+
+ if (!addr_only)
+ break;
+ }
+ if (addr_only)
+ {
+ pool_put (tsm->users, u);
+ clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 0);
+ }
+ }
+ }
+ }
+
+ /* Delete static mapping from pool */
+ pool_put (sm->static_mappings, m);
+ }
+
+ if (!addr_only)
+ return 0;
+
+ /* Add/delete external address to FIB */
+ pool_foreach (interface, sm->interfaces,
+ ({
+ if (interface->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
+ break;
+ }));
+ pool_foreach (interface, sm->output_feature_interfaces,
+ ({
+ if (interface->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
+ break;
+ }));
+
+ return 0;
+}
+
+int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
+ snat_protocol_t proto, u32 vrf_id,
+ nat44_lb_addr_port_t *locals, u8 is_add)
+{
+ snat_main_t * sm = &snat_main;
+ snat_static_mapping_t *m;
+ snat_session_key_t m_key;
+ clib_bihash_kv_8_8_t kv, value;
+ u32 fib_index;
+ snat_address_t *a = 0;
+ int i;
+ nat44_lb_addr_port_t *local;
+ u32 worker_index = 0;
+ snat_main_per_thread_data_t *tsm;
+
+ m_key.addr = e_addr;
+ m_key.port = e_port;
+ m_key.protocol = proto;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = 0;
+ else
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ if (is_add)
+ {
+ if (m)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ if (vec_len (locals) < 2)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+
+ /* Find external address in allocated addresses and reserve port for
+ address and port pair mapping when dynamic translations enabled */
+ if (!sm->static_mapping_only)
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ /* External port must be unused */
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \
+ return VNET_API_ERROR_INVALID_VALUE; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
+ if (e_port > 1024) \
+ { \
+ a->busy_##n##_ports++; \
+ a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown_protocol");
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ break;
+ }
+ }
+ /* External address must be allocated */
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ pool_get (sm->static_mappings, m);
+ memset (m, 0, sizeof (*m));
+ m->external_addr = e_addr;
+ m->addr_only = 0;
+ m->vrf_id = vrf_id;
+ m->fib_index = fib_index;
+ m->external_port = e_port;
+ m->proto = proto;
+
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.protocol = m->proto;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1))
+ {
+ clib_warning ("static_mapping_by_external key add failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ /* Assign worker */
+ if (sm->workers)
+ {
+ worker_index = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+ tsm = vec_elt_at_index (sm->per_thread_data, worker_index);
+ m->worker_index = worker_index;
+ }
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+ m_key.port = clib_host_to_net_u16 (m->external_port);
+ kv.key = m_key.as_u64;
+ kv.value = ~0ULL;
+ if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 1))
+ {
+ clib_warning ("static_mapping_by_local key add failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ m_key.fib_index = m->fib_index;
+ for (i = 0; i < vec_len (locals); i++)
+ {
+ m_key.addr = locals[i].addr;
+ m_key.port = locals[i].port;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
+ locals[i].prefix = (i == 0) ? locals[i].probability :\
+ (locals[i - 1].prefix + locals[i].probability);
+ vec_add1 (m->locals, locals[i]);
+
+ m_key.port = clib_host_to_net_u16 (locals[i].port);
+ kv.key = m_key.as_u64;
+ kv.value = ~0ULL;
+ if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 1))
+ {
+ clib_warning ("in2out key add failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ }
+ else
+ {
+ if (!m)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_HI);
+
+ /* Free external address port */
+ if (!sm->static_mapping_only)
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
+ if (e_port > 1024) \
+ { \
+ a->busy_##n##_ports--; \
+ a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown_protocol");
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ break;
+ }
+ }
+ }
+
+ tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.protocol = m->proto;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0))
+ {
+ clib_warning ("static_mapping_by_external key del failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ m_key.port = clib_host_to_net_u16 (m->external_port);
+ kv.key = m_key.as_u64;
+ if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0))
+ {
+ clib_warning ("outi2in key del failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ vec_foreach (local, m->locals)
+ {
+ m_key.addr = local->addr;
+ m_key.port = local->port;
+ m_key.fib_index = m->fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
+ {
+ clib_warning ("static_mapping_by_local key del failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ m_key.port = clib_host_to_net_u16 (local->port);
+ kv.key = m_key.as_u64;
+ if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0))
+ {
+ clib_warning ("in2out key del failed");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free(m->locals);
+
+ pool_put (sm->static_mappings, m);
+ }
+
+ return 0;
+}
+
+int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
+{
+ snat_address_t *a = 0;
+ snat_session_t *ses;
+ u32 *ses_to_be_removed = 0, *ses_index;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_user_key_t user_key;
+ snat_user_t *u;
+ snat_main_per_thread_data_t *tsm;
+ snat_static_mapping_t *m;
+ snat_interface_t *interface;
+ int i;
+
+ /* Find SNAT address */
+ for (i=0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == addr.as_u32)
+ {
+ a = sm->addresses + i;
+ break;
+ }
+ }
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (delete_sm)
+ {
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (m->external_addr.as_u32 == addr.as_u32)
+ (void) snat_add_static_mapping (m->local_addr, m->external_addr,
+ m->local_port, m->external_port,
+ m->vrf_id, m->addr_only, ~0,
+ m->proto, 0);
+ }));
+ }
+ else
+ {
+ /* Check if address is used in some static mapping */
+ if (is_snat_address_used_in_static_mapping(sm, addr))
+ {
+ clib_warning ("address used in static mapping");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+
+ if (a->fib_index != ~0)
+ fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4,
+ FIB_SOURCE_PLUGIN_HI);
+
+ /* Delete sessions using address */
+ if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
+ {
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ pool_foreach (ses, tsm->sessions, ({
+ if (ses->out2in.addr.as_u32 == addr.as_u32)
+ {
+ if (snat_is_unk_proto_session (ses))
+ {
+ clib_bihash_kv_16_8_t up_kv;
+ nat_ed_ses_key_t up_key;
+ up_key.l_addr = ses->in2out.addr;
+ up_key.r_addr = ses->ext_host_addr;
+ up_key.fib_index = ses->in2out.fib_index;
+ up_key.proto = ses->in2out.port;
+ up_key.rsvd = 0;
+ up_key.l_port = 0;
+ up_kv.key[0] = up_key.as_u64[0];
+ up_kv.key[1] = up_key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed,
+ &up_kv, 0))
+ clib_warning ("in2out key del failed");
+
+ up_key.l_addr = ses->out2in.addr;
+ up_key.fib_index = ses->out2in.fib_index;
+ up_kv.key[0] = up_key.as_u64[0];
+ up_kv.key[1] = up_key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed,
+ &up_kv, 0))
+ clib_warning ("out2in key del failed");
+ }
+ else
+ {
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_delete(ses->in2out.addr.as_u32,
+ ses->out2in.addr.as_u32,
+ ses->in2out.protocol,
+ ses->in2out.port,
+ ses->out2in.port,
+ ses->in2out.fib_index);
+ kv.key = ses->in2out.as_u64;
+ clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0);
+ kv.key = ses->out2in.as_u64;
+ clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0);
+ }
+ vec_add1 (ses_to_be_removed, ses - tsm->sessions);
+ clib_dlist_remove (tsm->list_pool, ses->per_user_index);
+ user_key.addr = ses->in2out.addr;
+ user_key.fib_index = ses->in2out.fib_index;
+ kv.key = user_key.as_u64;
+ if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ u->nsessions--;
+ }
+ }
+ }));
+
+ vec_foreach (ses_index, ses_to_be_removed)
+ pool_put_index (tsm->sessions, ses_index[0]);
+
+ vec_free (ses_to_be_removed);
+ }
+ }
+
+ vec_del1 (sm->addresses, i);
+
+ /* Delete external address from FIB */
+ pool_foreach (interface, sm->interfaces,
+ ({
+ if (interface->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
+ break;
+ }));
+ pool_foreach (interface, sm->output_feature_interfaces,
+ ({
+ if (interface->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
+ break;
+ }));
+
+ return 0;
+}
+
+int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+ const char * feature_name;
+ snat_address_t * ap;
+ snat_static_mapping_t * m;
+ snat_det_map_t * dm;
+
+ if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
+ feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
+ else
+ {
+ if (sm->num_workers > 1 && !sm->deterministic)
+ feature_name = is_inside ? "nat44-in2out-worker-handoff" : "nat44-out2in-worker-handoff";
+ else if (sm->deterministic)
+ feature_name = is_inside ? "nat44-det-in2out" : "nat44-det-out2in";
+ else
+ feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
+ }
+
+ vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
+ !is_del, 0, 0);
+
+ if (sm->fq_in2out_index == ~0 && !sm->deterministic && sm->num_workers > 1)
+ sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index, 0);
+
+ if (sm->fq_out2in_index == ~0 && !sm->deterministic && sm->num_workers > 1)
+ sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0);
+
+ pool_foreach (i, sm->interfaces,
+ ({
+ if (i->sw_if_index == sw_if_index)
+ {
+ if (is_del)
+ pool_put (sm->interfaces, i);
+ else
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ goto fib;
+ }
+ }));
+
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_get (sm->interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->is_inside = is_inside;
+
+ /* Add/delete external addresses to FIB */
+fib:
+ if (is_inside)
+ return 0;
+
+ vec_foreach (ap, sm->addresses)
+ snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
+
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (!(m->addr_only))
+ continue;
+
+ snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
+ }));
+
+ pool_foreach (dm, sm->det_maps,
+ ({
+ snat_add_del_addr_to_fib(&dm->out_addr, dm->out_plen, sw_if_index, !is_del);
+ }));
+
+ return 0;
+}
+
+int snat_interface_add_del_output_feature (u32 sw_if_index,
+ u8 is_inside,
+ int is_del)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+ snat_address_t * ap;
+ snat_static_mapping_t * m;
+
+ if (sm->deterministic ||
+ (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)))
+ return VNET_API_ERROR_UNSUPPORTED;
+
+ if (is_inside)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
+ sw_if_index, !is_del, 0, 0);
+ vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
+ sw_if_index, !is_del, 0, 0);
+ goto fq;
+ }
+
+ if (sm->num_workers > 1)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in-worker-handoff",
+ sw_if_index, !is_del, 0, 0);
+ vnet_feature_enable_disable ("ip4-output",
+ "nat44-in2out-output-worker-handoff",
+ sw_if_index, !is_del, 0, 0);
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in", sw_if_index,
+ !is_del, 0, 0);
+ vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
+ sw_if_index, !is_del, 0, 0);
+ }
+
+fq:
+ if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
+ sm->fq_in2out_output_index =
+ vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
+
+ if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
+ sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0);
+
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ if (i->sw_if_index == sw_if_index)
+ {
+ if (is_del)
+ pool_put (sm->output_feature_interfaces, i);
+ else
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ goto fib;
+ }
+ }));
+
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_get (sm->output_feature_interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->is_inside = is_inside;
+
+ /* Add/delete external addresses to FIB */
+fib:
+ if (is_inside)
+ return 0;
+
+ vec_foreach (ap, sm->addresses)
+ snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
+
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (!(m->addr_only))
+ continue;
+
+ snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
+ }));
+
+ return 0;
+}
+
+int snat_set_workers (uword * bitmap)
+{
+ snat_main_t *sm = &snat_main;
+ int i, j = 0;
+
+ if (sm->num_workers < 2)
+ return VNET_API_ERROR_FEATURE_DISABLED;
+
+ if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ vec_free (sm->workers);
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ vec_add1(sm->workers, i);
+ sm->per_thread_data[i].snat_thread_index = j;
+ j++;
+ }));
+
+ sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
+ sm->num_snat_thread = _vec_len (sm->workers);
+
+ return 0;
+}
+
+
+static void
+snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete);
+
+static clib_error_t * snat_init (vlib_main_t * vm)
+{
+ snat_main_t * sm = &snat_main;
+ clib_error_t * error = 0;
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ uword *p;
+ vlib_thread_registration_t *tr;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ uword *bitmap = 0;
+ u32 i;
+ ip4_add_del_interface_address_callback_t cb4;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main();
+ sm->ip4_main = im;
+ sm->ip4_lookup_main = lm;
+ sm->api_main = &api_main;
+ sm->first_worker_index = 0;
+ sm->next_worker = 0;
+ sm->num_workers = 0;
+ sm->num_snat_thread = 1;
+ sm->workers = 0;
+ sm->port_per_thread = 0xffff - 1024;
+ sm->fq_in2out_index = ~0;
+ sm->fq_out2in_index = ~0;
+ sm->udp_timeout = SNAT_UDP_TIMEOUT;
+ sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
+ sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
+ sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
+
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ if (p)
+ {
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr)
+ {
+ sm->num_workers = tr->count;
+ sm->first_worker_index = tr->first_index;
+ }
+ }
+
+ vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
+
+ /* Use all available workers by default */
+ if (sm->num_workers > 1)
+ {
+ for (i=0; i < sm->num_workers; i++)
+ bitmap = clib_bitmap_set (bitmap, i, 1);
+ snat_set_workers(bitmap);
+ clib_bitmap_free (bitmap);
+ }
+ else
+ {
+ sm->per_thread_data[0].snat_thread_index = 0;
+ }
+
+ error = snat_api_init(vm, sm);
+ if (error)
+ return error;
+
+ /* Set up the interface address add/del callback */
+ cb4.function = snat_ip4_add_del_interface_address_cb;
+ cb4.function_opaque = 0;
+
+ vec_add1 (im->add_del_interface_address_callbacks, cb4);
+
+ /* Init IPFIX logging */
+ snat_ipfix_logging_init(vm);
+
+ error = nat64_init(vm);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (snat_init);
+
+void snat_free_outside_address_and_port (snat_main_t * sm,
+ u32 thread_index,
+ snat_session_key_t * k,
+ u32 address_index)
+{
+ snat_address_t *a;
+ u16 port_host_byte_order = clib_net_to_host_u16 (k->port);
+
+ ASSERT (address_index < vec_len (sm->addresses));
+
+ a = sm->addresses + address_index;
+
+ switch (k->protocol)
+ {
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
+ port_host_byte_order) == 1); \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
+ port_host_byte_order, 0); \
+ a->busy_##n##_ports--; \
+ a->busy_##n##_ports_per_thread[thread_index]--; \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown_protocol");
+ return;
+ }
+}
+
+/**
+ * @brief Match NAT44 static mapping.
+ *
+ * @param sm NAT main.
+ * @param match Address and port to match.
+ * @param mapping External or local address and port of the matched mapping.
+ * @param by_external If 0 match by local address otherwise match by external
+ * address.
+ * @param is_addr_only If matched mapping is address only
+ *
+ * @returns 0 if match found otherwise 1.
+ */
+int snat_static_mapping_match (snat_main_t * sm,
+ snat_session_key_t match,
+ snat_session_key_t * mapping,
+ u8 by_external,
+ u8 *is_addr_only)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ snat_static_mapping_t *m;
+ snat_session_key_t m_key;
+ clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
+ u32 rand, lo = 0, hi, mid;
+
+ if (by_external)
+ mapping_hash = &sm->static_mapping_by_external;
+
+ m_key.addr = match.addr;
+ m_key.port = clib_net_to_host_u16 (match.port);
+ m_key.protocol = match.protocol;
+ m_key.fib_index = match.fib_index;
+
+ kv.key = m_key.as_u64;
+
+ if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
+ {
+ /* Try address only mapping */
+ m_key.port = 0;
+ m_key.protocol = 0;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
+ return 1;
+ }
+
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ if (by_external)
+ {
+ if (vec_len (m->locals))
+ {
+ hi = vec_len (m->locals) - 1;
+ rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix);
+ while (lo < hi)
+ {
+ mid = ((hi - lo) >> 1) + lo;
+ (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid);
+ }
+ if (!(m->locals[lo].prefix >= rand))
+ return 1;
+ mapping->addr = m->locals[lo].addr;
+ mapping->port = clib_host_to_net_u16 (m->locals[lo].port);
+ }
+ else
+ {
+ mapping->addr = m->local_addr;
+ /* Address only mapping doesn't change port */
+ mapping->port = m->addr_only ? match.port
+ : clib_host_to_net_u16 (m->local_port);
+ }
+ mapping->fib_index = m->fib_index;
+ mapping->protocol = m->proto;
+ }
+ else
+ {
+ mapping->addr = m->external_addr;
+ /* Address only mapping doesn't change port */
+ mapping->port = m->addr_only ? match.port
+ : clib_host_to_net_u16 (m->external_port);
+ mapping->fib_index = sm->outside_fib_index;
+ }
+
+ if (PREDICT_FALSE(is_addr_only != 0))
+ *is_addr_only = m->addr_only;
+
+ return 0;
+}
+
+static_always_inline u16
+snat_random_port (snat_main_t * sm, u16 min, u16 max)
+{
+ return min + random_u32 (&sm->random_seed) /
+ (random_u32_max() / (max - min + 1) + 1);
+}
+
+int snat_alloc_outside_address_and_port (snat_main_t * sm,
+ u32 fib_index,
+ u32 thread_index,
+ snat_session_key_t * k,
+ u32 * address_indexp)
+{
+ int i;
+ snat_address_t *a;
+ u32 portnum;
+
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ a = sm->addresses + i;
+ if (sm->vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index)
+ continue;
+ switch (k->protocol)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ if (a->busy_##n##_ports_per_thread[thread_index] < sm->port_per_thread) \
+ { \
+ while (1) \
+ { \
+ portnum = (sm->port_per_thread * \
+ sm->per_thread_data[thread_index].snat_thread_index) + \
+ snat_random_port(sm, 1, sm->port_per_thread) + 1024; \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
+ continue; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
+ a->busy_##n##_ports_per_thread[thread_index]++; \
+ a->busy_##n##_ports++; \
+ k->addr = a->addr; \
+ k->port = clib_host_to_net_u16(portnum); \
+ *address_indexp = i; \
+ return 0; \
+ } \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning("unknown protocol");
+ return 1;
+ }
+
+ }
+ /* Totally out of translations to use... */
+ snat_ipfix_logging_addresses_exhausted(0);
+ return 1;
+}
+
+
+static clib_error_t *
+add_address_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ snat_main_t * sm = &snat_main;
+ ip4_address_t start_addr, end_addr, this_addr;
+ u32 start_host_order, end_host_order;
+ u32 vrf_id = ~0;
+ int i, count;
+ int is_add = 1;
+ int rv = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U - %U",
+ unformat_ip4_address, &start_addr,
+ unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (line_input, "tenant-vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr))
+ end_addr = start_addr;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (sm->static_mapping_only)
+ {
+ error = clib_error_return (0, "static mapping only mode");
+ goto done;
+ }
+
+ start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
+ end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
+
+ if (end_host_order < start_host_order)
+ {
+ error = clib_error_return (0, "end address less than start address");
+ goto done;
+ }
+
+ count = (end_host_order - start_host_order) + 1;
+
+ if (count > 1024)
+ clib_warning ("%U - %U, %d addresses...",
+ format_ip4_address, &start_addr,
+ format_ip4_address, &end_addr,
+ count);
+
+ this_addr = start_addr;
+
+ for (i = 0; i < count; i++)
+ {
+ if (is_add)
+ snat_add_address (sm, &this_addr, vrf_id);
+ else
+ rv = snat_del_address (sm, this_addr, 0);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "S-NAT address not exist.");
+ goto done;
+ case VNET_API_ERROR_UNSPECIFIED:
+ error = clib_error_return (0, "S-NAT address used in static mapping.");
+ goto done;
+ default:
+ break;
+ }
+
+ increment_v4_address (&this_addr);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (add_address_command, static) = {
+ .path = "nat44 add address",
+ .short_help = "nat44 add address <ip4-range-start> [- <ip4-range-end>] "
+ "[tenant-vrf <vrf-id>] [del]",
+ .function = add_address_command_fn,
+};
+
+static clib_error_t *
+snat_feature_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 * inside_sw_if_indices = 0;
+ u32 * outside_sw_if_indices = 0;
+ u8 is_output_feature = 0;
+ int is_del = 0;
+ int i;
+
+ sw_if_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "in %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (inside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "out %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (outside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "output-feature"))
+ is_output_feature = 1;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (inside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len(inside_sw_if_indices); i++)
+ {
+ sw_if_index = inside_sw_if_indices[i];
+ if (is_output_feature)
+ {
+ if (snat_interface_add_del_output_feature (sw_if_index, 1, is_del))
+ {
+ error = clib_error_return (0, "%s %U failed",
+ is_del ? "del" : "add",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm,
+ sw_if_index));
+ goto done;
+ }
+ }
+ else
+ {
+ if (snat_interface_add_del (sw_if_index, 1, is_del))
+ {
+ error = clib_error_return (0, "%s %U failed",
+ is_del ? "del" : "add",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm,
+ sw_if_index));
+ goto done;
+ }
+ }
+ }
+ }
+
+ if (vec_len (outside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len(outside_sw_if_indices); i++)
+ {
+ sw_if_index = outside_sw_if_indices[i];
+ if (is_output_feature)
+ {
+ if (snat_interface_add_del_output_feature (sw_if_index, 0, is_del))
+ {
+ error = clib_error_return (0, "%s %U failed",
+ is_del ? "del" : "add",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm,
+ sw_if_index));
+ goto done;
+ }
+ }
+ else
+ {
+ if (snat_interface_add_del (sw_if_index, 0, is_del))
+ {
+ error = clib_error_return (0, "%s %U failed",
+ is_del ? "del" : "add",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm,
+ sw_if_index));
+ goto done;
+ }
+ }
+ }
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (inside_sw_if_indices);
+ vec_free (outside_sw_if_indices);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_snat_command, static) = {
+ .path = "set interface nat44",
+ .function = snat_feature_command_fn,
+ .short_help = "set interface nat44 in <intfc> out <intfc> [output-feature] "
+ "[del]",
+};
+
+uword
+unformat_snat_protocol (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(N, i, n, s) else if (unformat (input, s)) *r = SNAT_PROTOCOL_##N;
+ foreach_snat_protocol
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_snat_protocol (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(N, j, n, str) case SNAT_PROTOCOL_##N: t = (u8 *) str; break;
+ foreach_snat_protocol
+#undef _
+ default:
+ s = format (s, "unknown");
+ return s;
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+static clib_error_t *
+add_static_mapping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t * error = 0;
+ ip4_address_t l_addr, e_addr;
+ u32 l_port = 0, e_port = 0, vrf_id = ~0;
+ int is_add = 1;
+ int addr_only = 1;
+ u32 sw_if_index = ~0;
+ vnet_main_t * vnm = vnet_get_main();
+ int rv;
+ snat_protocol_t proto;
+ u8 proto_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr,
+ &l_port))
+ addr_only = 0;
+ else if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr))
+ ;
+ else if (unformat (line_input, "external %U %u", unformat_ip4_address,
+ &e_addr, &e_port))
+ addr_only = 0;
+ else if (unformat (line_input, "external %U", unformat_ip4_address,
+ &e_addr))
+ ;
+ else if (unformat (line_input, "external %U %u",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ &e_port))
+ addr_only = 0;
+
+ else if (unformat (line_input, "external %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "%U", unformat_snat_protocol, &proto))
+ proto_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!addr_only && !proto_set)
+ {
+ error = clib_error_return (0, "missing protocol");
+ goto done;
+ }
+
+ rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port,
+ vrf_id, addr_only, sw_if_index, proto, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "External port already in use.");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ if (is_add)
+ error = clib_error_return (0, "External addres must be allocated.");
+ else
+ error = clib_error_return (0, "Mapping not exist.");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ error = clib_error_return (0, "No such VRF id.");
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "Mapping already exist.");
+ goto done;
+ default:
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat add static mapping}
+ * Static mapping allows hosts on the external network to initiate connection
+ * to to the local network host.
+ * To create static mapping between local host address 10.0.0.3 port 6303 and
+ * external address 4.4.4.4 port 3606 for TCP protocol use:
+ * vpp# nat44 add static mapping tcp local 10.0.0.3 6303 external 4.4.4.4 3606
+ * If not runnig "static mapping only" NAT plugin mode use before:
+ * vpp# nat44 add address 4.4.4.4
+ * To create static mapping between local and external address use:
+ * vpp# nat44 add static mapping local 10.0.0.3 external 4.4.4.4
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (add_static_mapping_command, static) = {
+ .path = "nat44 add static mapping",
+ .function = add_static_mapping_command_fn,
+ .short_help =
+ "nat44 add static mapping tcp|udp|icmp local <addr> [<port>] external <addr> [<port>] [vrf <table-id>] [del]",
+};
+
+static clib_error_t *
+add_lb_static_mapping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t * error = 0;
+ ip4_address_t l_addr, e_addr;
+ u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0;
+ int is_add = 1;
+ int rv;
+ snat_protocol_t proto;
+ u8 proto_set = 0;
+ nat44_lb_addr_port_t *locals = 0, local;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U:%u probability %u",
+ unformat_ip4_address, &l_addr, &l_port, &probability))
+ {
+ memset (&local, 0, sizeof (local));
+ local.addr = l_addr;
+ local.port = (u16) l_port;
+ local.probability = (u8) probability;
+ vec_add1 (locals, local);
+ }
+ else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
+ &e_addr, &e_port))
+ ;
+ else if (unformat (line_input, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "protocol %U", unformat_snat_protocol,
+ &proto))
+ proto_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (locals) < 2)
+ {
+ error = clib_error_return (0, "at least two local must be set");
+ goto done;
+ }
+
+ if (!proto_set)
+ {
+ error = clib_error_return (0, "missing protocol");
+ goto done;
+ }
+
+ rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, vrf_id,
+ locals, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "External port already in use.");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ if (is_add)
+ error = clib_error_return (0, "External addres must be allocated.");
+ else
+ error = clib_error_return (0, "Mapping not exist.");
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "Mapping already exist.");
+ goto done;
+ default:
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (locals);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = {
+ .path = "nat44 add load-balancing static mapping",
+ .function = add_lb_static_mapping_command_fn,
+ .short_help =
+ "nat44 add load-balancing static mapping protocol tcp|udp external <addr>:<port> local <addr>:<port> probability <n> [vrf <table-id>] [del]",
+};
+
+static clib_error_t *
+set_workers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ uword *bitmap = 0;
+ int rv = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (bitmap == 0)
+ {
+ error = clib_error_return (0, "List of workers must be specified.");
+ goto done;
+ }
+
+ rv = snat_set_workers(bitmap);
+
+ clib_bitmap_free (bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_WORKER:
+ error = clib_error_return (0, "Invalid worker(s).");
+ goto done;
+ case VNET_API_ERROR_FEATURE_DISABLED:
+ error = clib_error_return (0,
+ "Supported only if 2 or more workes available.");
+ goto done;
+ default:
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{set snat workers}
+ * Set NAT workers if 2 or more workers available, use:
+ * vpp# set snat workers 0-2,5
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_workers_command, static) = {
+ .path = "set nat workers",
+ .function = set_workers_command_fn,
+ .short_help =
+ "set nat workers <workers-list>",
+};
+
+static clib_error_t *
+snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 domain_id = 0;
+ u32 src_port = 0;
+ u8 enable = 1;
+ int rv = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "domain %d", &domain_id))
+ ;
+ else if (unformat (line_input, "src-port %d", &src_port))
+ ;
+ else if (unformat (line_input, "disable"))
+ enable = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ rv = snat_ipfix_logging_enable_disable (enable, domain_id, (u16) src_port);
+
+ if (rv)
+ {
+ error = clib_error_return (0, "ipfix logging enable failed");
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat ipfix logging}
+ * To enable NAT IPFIX logging use:
+ * vpp# nat ipfix logging
+ * To set IPFIX exporter use:
+ * vpp# set ipfix exporter collector 10.10.10.3 src 10.10.10.1
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_ipfix_logging_enable_disable_command, static) = {
+ .path = "nat ipfix logging",
+ .function = snat_ipfix_logging_enable_disable_command_fn,
+ .short_help = "nat ipfix logging [domain <domain-id>] [src-port <port>] [disable]",
+};
+
+static u32
+snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
+{
+ snat_main_t *sm = &snat_main;
+ u32 next_worker_index = 0;
+ u32 hash;
+
+ next_worker_index = sm->first_worker_index;
+ hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
+ (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24);
+
+ if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
+ next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
+ else
+ next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
+
+ return next_worker_index;
+}
+
+static u32
+snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0)
+{
+ snat_main_t *sm = &snat_main;
+ udp_header_t *udp;
+ u16 port;
+ snat_session_key_t m_key;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_static_mapping_t *m;
+ nat_ed_ses_key_t key;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ snat_main_per_thread_data_t *tsm;
+ snat_session_t *s;
+ int i;
+ u32 proto;
+
+ /* first try static mappings without port */
+ if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
+ {
+ m_key.addr = ip0->dst_address;
+ m_key.port = 0;
+ m_key.protocol = 0;
+ m_key.fib_index = rx_fib_index0;
+ kv.key = m_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ {
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+ return m->worker_index;
+ }
+ }
+
+ proto = ip_proto_to_snat_proto (ip0->protocol);
+ udp = ip4_next_header (ip0);
+ port = udp->dst_port;
+
+ /* unknown protocol */
+ if (PREDICT_FALSE (proto == ~0))
+ {
+ key.l_addr = ip0->dst_address;
+ key.r_addr = ip0->src_address;
+ key.fib_index = rx_fib_index0;
+ key.proto = ip0->protocol;
+ key.rsvd = 0;
+ key.l_port = 0;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+
+ if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ {
+ for (i = 0; i < _vec_len (sm->per_thread_data); i++)
+ {
+ tsm = vec_elt_at_index (sm->per_thread_data, i);
+ if (!pool_is_free_index(tsm->sessions, s_value.value))
+ {
+ s = pool_elt_at_index (tsm->sessions, s_value.value);
+ if (s->out2in.addr.as_u32 == ip0->dst_address.as_u32 &&
+ s->out2in.port == ip0->protocol &&
+ snat_is_unk_proto_session (s))
+ return i;
+ }
+ }
+ }
+
+ /* if no session use current thread */
+ return vlib_get_thread_index ();
+ }
+
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
+ {
+ icmp46_header_t * icmp = (icmp46_header_t *) udp;
+ icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1);
+ if (!icmp_is_error_message (icmp))
+ port = echo->identifier;
+ else
+ {
+ ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1);
+ proto = ip_proto_to_snat_proto (inner_ip->protocol);
+ void *l4_header = ip4_next_header (inner_ip);
+ switch (proto)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ icmp = (icmp46_header_t*)l4_header;
+ echo = (icmp_echo_header_t *)(icmp + 1);
+ port = echo->identifier;
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ port = ((tcp_udp_header_t*)l4_header)->src_port;
+ break;
+ default:
+ return vlib_get_thread_index ();
+ }
+ }
+ }
+
+ /* try static mappings with port */
+ if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
+ {
+ m_key.addr = ip0->dst_address;
+ m_key.port = clib_net_to_host_u16 (port);
+ m_key.protocol = proto;
+ m_key.fib_index = rx_fib_index0;
+ kv.key = m_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ {
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+ return m->worker_index;
+ }
+ }
+
+ /* worker by outside port */
+ return (u32) ((clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread);
+}
+
+static clib_error_t *
+snat_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ snat_main_t * sm = &snat_main;
+ u32 translation_buckets = 1024;
+ u32 translation_memory_size = 128<<20;
+ u32 user_buckets = 128;
+ u32 user_memory_size = 64<<20;
+ u32 max_translations_per_user = 100;
+ u32 outside_vrf_id = 0;
+ u32 inside_vrf_id = 0;
+ u32 static_mapping_buckets = 1024;
+ u32 static_mapping_memory_size = 64<<20;
+ u8 static_mapping_only = 0;
+ u8 static_mapping_connection_tracking = 0;
+ snat_main_per_thread_data_t *tsm;
+
+ sm->deterministic = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "translation hash buckets %d", &translation_buckets))
+ ;
+ else if (unformat (input, "translation hash memory %d",
+ &translation_memory_size));
+ else if (unformat (input, "user hash buckets %d", &user_buckets))
+ ;
+ else if (unformat (input, "user hash memory %d",
+ &user_memory_size))
+ ;
+ else if (unformat (input, "max translations per user %d",
+ &max_translations_per_user))
+ ;
+ else if (unformat (input, "outside VRF id %d",
+ &outside_vrf_id))
+ ;
+ else if (unformat (input, "inside VRF id %d",
+ &inside_vrf_id))
+ ;
+ else if (unformat (input, "static mapping only"))
+ {
+ static_mapping_only = 1;
+ if (unformat (input, "connection tracking"))
+ static_mapping_connection_tracking = 1;
+ }
+ else if (unformat (input, "deterministic"))
+ sm->deterministic = 1;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ /* for show commands, etc. */
+ sm->translation_buckets = translation_buckets;
+ sm->translation_memory_size = translation_memory_size;
+ /* do not exceed load factor 10 */
+ sm->max_translations = 10 * translation_buckets;
+ sm->user_buckets = user_buckets;
+ sm->user_memory_size = user_memory_size;
+ sm->max_translations_per_user = max_translations_per_user;
+ sm->outside_vrf_id = outside_vrf_id;
+ sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ outside_vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+ sm->inside_vrf_id = inside_vrf_id;
+ sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ inside_vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+ sm->static_mapping_only = static_mapping_only;
+ sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
+
+ if (sm->deterministic)
+ {
+ sm->in2out_node_index = snat_det_in2out_node.index;
+ sm->in2out_output_node_index = ~0;
+ sm->out2in_node_index = snat_det_out2in_node.index;
+ sm->icmp_match_in2out_cb = icmp_match_in2out_det;
+ sm->icmp_match_out2in_cb = icmp_match_out2in_det;
+ }
+ else
+ {
+ sm->worker_in2out_cb = snat_get_worker_in2out_cb;
+ sm->worker_out2in_cb = snat_get_worker_out2in_cb;
+ sm->in2out_node_index = snat_in2out_node.index;
+ sm->in2out_output_node_index = snat_in2out_output_node.index;
+ sm->out2in_node_index = snat_out2in_node.index;
+ if (!static_mapping_only ||
+ (static_mapping_only && static_mapping_connection_tracking))
+ {
+ sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
+ sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
+
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ clib_bihash_init_8_8 (&tsm->in2out, "in2out", translation_buckets,
+ translation_memory_size);
+
+ clib_bihash_init_8_8 (&tsm->out2in, "out2in", translation_buckets,
+ translation_memory_size);
+
+ clib_bihash_init_8_8 (&tsm->user_hash, "users", user_buckets,
+ user_memory_size);
+ }
+
+ clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed",
+ translation_buckets, translation_memory_size);
+
+ clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
+ translation_buckets, translation_memory_size);
+ }
+ else
+ {
+ sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
+ sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
+ }
+ clib_bihash_init_8_8 (&sm->static_mapping_by_local,
+ "static_mapping_by_local", static_mapping_buckets,
+ static_mapping_memory_size);
+
+ clib_bihash_init_8_8 (&sm->static_mapping_by_external,
+ "static_mapping_by_external", static_mapping_buckets,
+ static_mapping_memory_size);
+ }
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (snat_config, "nat");
+
+u8 * format_snat_session_state (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break;
+ foreach_snat_session_state
+#undef _
+ default:
+ t = format (t, "unknown");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+u8 * format_snat_key (u8 * s, va_list * args)
+{
+ snat_session_key_t * key = va_arg (*args, snat_session_key_t *);
+
+ s = format (s, "%U proto %U port %d fib %d",
+ format_ip4_address, &key->addr,
+ format_snat_protocol, key->protocol,
+ clib_net_to_host_u16 (key->port), key->fib_index);
+ return s;
+}
+
+u8 * format_snat_session (u8 * s, va_list * args)
+{
+ snat_main_t * sm __attribute__((unused)) = va_arg (*args, snat_main_t *);
+ snat_session_t * sess = va_arg (*args, snat_session_t *);
+
+ if (snat_is_unk_proto_session (sess))
+ {
+ s = format (s, " i2o %U proto %u fib %u\n",
+ format_ip4_address, &sess->in2out.addr, sess->in2out.port,
+ sess->in2out.fib_index);
+ s = format (s, " o2i %U proto %u fib %u\n",
+ format_ip4_address, &sess->out2in.addr, sess->out2in.port,
+ sess->out2in.fib_index);
+ }
+ else
+ {
+ s = format (s, " i2o %U\n", format_snat_key, &sess->in2out);
+ s = format (s, " o2i %U\n", format_snat_key, &sess->out2in);
+ }
+ if (sess->ext_host_addr.as_u32)
+ s = format (s, " external host %U\n",
+ format_ip4_address, &sess->ext_host_addr);
+ s = format (s, " last heard %.2f\n", sess->last_heard);
+ s = format (s, " total pkts %d, total bytes %lld\n",
+ sess->total_pkts, sess->total_bytes);
+ if (snat_is_session_static (sess))
+ s = format (s, " static translation\n");
+ else
+ s = format (s, " dynamic translation\n");
+ if (sess->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
+ s = format (s, " load-balancing\n");
+
+ return s;
+}
+
+u8 * format_snat_user (u8 * s, va_list * args)
+{
+ snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *);
+ snat_user_t * u = va_arg (*args, snat_user_t *);
+ int verbose = va_arg (*args, int);
+ dlist_elt_t * head, * elt;
+ u32 elt_index, head_index;
+ u32 session_index;
+ snat_session_t * sess;
+
+ s = format (s, "%U: %d dynamic translations, %d static translations\n",
+ format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions);
+
+ if (verbose == 0)
+ return s;
+
+ if (u->nsessions || u->nstaticsessions)
+ {
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (sm->list_pool, head_index);
+
+ elt_index = head->next;
+ elt = pool_elt_at_index (sm->list_pool, elt_index);
+ session_index = elt->value;
+
+ while (session_index != ~0)
+ {
+ sess = pool_elt_at_index (sm->sessions, session_index);
+
+ s = format (s, " %U\n", format_snat_session, sm, sess);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (sm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+ }
+
+ return s;
+}
+
+u8 * format_snat_static_mapping (u8 * s, va_list * args)
+{
+ snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *);
+ nat44_lb_addr_port_t *local;
+
+ if (m->addr_only)
+ s = format (s, "local %U external %U vrf %d",
+ format_ip4_address, &m->local_addr,
+ format_ip4_address, &m->external_addr,
+ m->vrf_id);
+ else
+ {
+ if (vec_len (m->locals))
+ {
+ s = format (s, "%U vrf %d external %U:%d",
+ format_snat_protocol, m->proto,
+ m->vrf_id,
+ format_ip4_address, &m->external_addr, m->external_port);
+ vec_foreach (local, m->locals)
+ s = format (s, "\n local %U:%d probability %d\%",
+ format_ip4_address, &local->addr, local->port,
+ local->probability);
+ }
+ else
+ s = format (s, "%U local %U:%d external %U:%d vrf %d",
+ format_snat_protocol, m->proto,
+ format_ip4_address, &m->local_addr, m->local_port,
+ format_ip4_address, &m->external_addr, m->external_port,
+ m->vrf_id);
+ }
+ return s;
+}
+
+u8 * format_snat_static_map_to_resolve (u8 * s, va_list * args)
+{
+ snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *);
+ vnet_main_t *vnm = vnet_get_main();
+
+ if (m->addr_only)
+ s = format (s, "local %U external %U vrf %d",
+ format_ip4_address, &m->l_addr,
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, m->sw_if_index),
+ m->vrf_id);
+ else
+ s = format (s, "%U local %U:%d external %U:%d vrf %d",
+ format_snat_protocol, m->proto,
+ format_ip4_address, &m->l_addr, m->l_port,
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, m->sw_if_index), m->e_port,
+ m->vrf_id);
+
+ return s;
+}
+
+u8 * format_det_map_ses (u8 * s, va_list * args)
+{
+ snat_det_map_t * det_map = va_arg (*args, snat_det_map_t *);
+ ip4_address_t in_addr, out_addr;
+ u32 in_offset, out_offset;
+ snat_det_session_t * ses = va_arg (*args, snat_det_session_t *);
+ u32 * i = va_arg (*args, u32 *);
+
+ u32 user_index = *i / SNAT_DET_SES_PER_USER;
+ in_addr.as_u32 = clib_host_to_net_u32 (
+ clib_net_to_host_u32(det_map->in_addr.as_u32) + user_index);
+ in_offset = clib_net_to_host_u32(in_addr.as_u32) -
+ clib_net_to_host_u32(det_map->in_addr.as_u32);
+ out_offset = in_offset / det_map->sharing_ratio;
+ out_addr.as_u32 = clib_host_to_net_u32(
+ clib_net_to_host_u32(det_map->out_addr.as_u32) + out_offset);
+ s = format (s, "in %U:%d out %U:%d external host %U:%d state: %U expire: %d\n",
+ format_ip4_address, &in_addr,
+ clib_net_to_host_u16 (ses->in_port),
+ format_ip4_address, &out_addr,
+ clib_net_to_host_u16 (ses->out.out_port),
+ format_ip4_address, &ses->out.ext_host_addr,
+ clib_net_to_host_u16 (ses->out.ext_host_port),
+ format_snat_session_state, ses->state,
+ ses->expire);
+
+ return s;
+}
+
+static clib_error_t *
+show_snat_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int verbose = 0;
+ snat_main_t * sm = &snat_main;
+ snat_user_t * u;
+ snat_static_mapping_t *m;
+ snat_interface_t *i;
+ snat_address_t * ap;
+ vnet_main_t *vnm = vnet_get_main();
+ snat_main_per_thread_data_t *tsm;
+ u32 users_num = 0, sessions_num = 0, *worker, *sw_if_index;
+ uword j = 0;
+ snat_static_map_resolve_t *rp;
+ snat_det_map_t * dm;
+ snat_det_session_t * ses;
+
+ if (unformat (input, "detail"))
+ verbose = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 2;
+
+ if (sm->static_mapping_only)
+ {
+ if (sm->static_mapping_connection_tracking)
+ vlib_cli_output (vm, "NAT plugin mode: static mapping only connection "
+ "tracking");
+ else
+ vlib_cli_output (vm, "NAT plugin mode: static mapping only");
+ }
+ else if (sm->deterministic)
+ {
+ vlib_cli_output (vm, "NAT plugin mode: deterministic mapping");
+ }
+ else
+ {
+ vlib_cli_output (vm, "NAT plugin mode: dynamic translations enabled");
+ }
+
+ if (verbose > 0)
+ {
+ pool_foreach (i, sm->interfaces,
+ ({
+ vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, i->sw_if_index),
+ i->is_inside ? "in" : "out");
+ }));
+
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ vlib_cli_output (vm, "%U output-feature %s",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, i->sw_if_index),
+ i->is_inside ? "in" : "out");
+ }));
+
+ if (vec_len (sm->auto_add_sw_if_indices))
+ {
+ vlib_cli_output (vm, "NAT44 pool addresses interfaces:");
+ vec_foreach (sw_if_index, sm->auto_add_sw_if_indices)
+ {
+ vlib_cli_output (vm, "%U", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, *sw_if_index));
+ }
+ }
+
+ vec_foreach (ap, sm->addresses)
+ {
+ vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr);
+ if (ap->fib_index != ~0)
+ vlib_cli_output (vm, " tenant VRF: %u",
+ ip4_fib_get(ap->fib_index)->table_id);
+ else
+ vlib_cli_output (vm, " tenant VRF independent");
+#define _(N, i, n, s) \
+ vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
+ foreach_snat_protocol
+#undef _
+ }
+ }
+
+ if (sm->num_workers > 1)
+ {
+ vlib_cli_output (vm, "%d workers", vec_len (sm->workers));
+ if (verbose > 0)
+ {
+ vec_foreach (worker, sm->workers)
+ {
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + *worker + sm->first_worker_index;
+ vlib_cli_output (vm, " %s", w->name);
+ }
+ }
+ }
+
+ if (sm->deterministic)
+ {
+ vlib_cli_output (vm, "udp timeout: %dsec", sm->udp_timeout);
+ vlib_cli_output (vm, "tcp-established timeout: %dsec",
+ sm->tcp_established_timeout);
+ vlib_cli_output (vm, "tcp-transitory timeout: %dsec",
+ sm->tcp_transitory_timeout);
+ vlib_cli_output (vm, "icmp timeout: %dsec", sm->icmp_timeout);
+ vlib_cli_output (vm, "%d deterministic mappings",
+ pool_elts (sm->det_maps));
+ if (verbose > 0)
+ {
+ pool_foreach (dm, sm->det_maps,
+ ({
+ vlib_cli_output (vm, "in %U/%d out %U/%d\n",
+ format_ip4_address, &dm->in_addr, dm->in_plen,
+ format_ip4_address, &dm->out_addr, dm->out_plen);
+ vlib_cli_output (vm, " outside address sharing ratio: %d\n",
+ dm->sharing_ratio);
+ vlib_cli_output (vm, " number of ports per inside host: %d\n",
+ dm->ports_per_host);
+ vlib_cli_output (vm, " sessions number: %d\n", dm->ses_num);
+ if (verbose > 1)
+ {
+ vec_foreach_index (j, dm->sessions)
+ {
+ ses = vec_elt_at_index (dm->sessions, j);
+ if (ses->in_port)
+ vlib_cli_output (vm, " %U", format_det_map_ses, dm, ses,
+ &j);
+ }
+ }
+ }));
+ }
+ }
+ else
+ {
+ if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
+ {
+ vlib_cli_output (vm, "%d static mappings",
+ pool_elts (sm->static_mappings));
+
+ if (verbose > 0)
+ {
+ pool_foreach (m, sm->static_mappings,
+ ({
+ vlib_cli_output (vm, "%U", format_snat_static_mapping, m);
+ }));
+ }
+ }
+ else
+ {
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ users_num += pool_elts (tsm->users);
+ sessions_num += pool_elts (tsm->sessions);
+ }
+
+ vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions,"
+ " %d static mappings",
+ users_num,
+ vec_len (sm->addresses),
+ sessions_num,
+ pool_elts (sm->static_mappings));
+
+ if (verbose > 0)
+ {
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed,
+ verbose - 1);
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed,
+ verbose - 1);
+ vec_foreach_index (j, sm->per_thread_data)
+ {
+ tsm = vec_elt_at_index (sm->per_thread_data, j);
+
+ if (pool_elts (tsm->users) == 0)
+ continue;
+
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name,
+ w->lcore_id);
+ vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->in2out,
+ verbose - 1);
+ vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->out2in,
+ verbose - 1);
+ vlib_cli_output (vm, " %d list pool elements",
+ pool_elts (tsm->list_pool));
+
+ pool_foreach (u, tsm->users,
+ ({
+ vlib_cli_output (vm, " %U", format_snat_user, tsm, u,
+ verbose - 1);
+ }));
+ }
+
+ if (pool_elts (sm->static_mappings))
+ {
+ vlib_cli_output (vm, "static mappings:");
+ pool_foreach (m, sm->static_mappings,
+ ({
+ vlib_cli_output (vm, "%U", format_snat_static_mapping, m);
+ }));
+ for (j = 0; j < vec_len (sm->to_resolve); j++)
+ {
+ rp = sm->to_resolve + j;
+ vlib_cli_output (vm, "%U",
+ format_snat_static_map_to_resolve, rp);
+ }
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_snat_command, static) = {
+ .path = "show nat44",
+ .short_help = "show nat44",
+ .function = show_snat_command_fn,
+};
+
+
+static void
+snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ snat_main_t *sm = &snat_main;
+ snat_static_map_resolve_t *rp;
+ u32 *indices_to_delete = 0;
+ int i, j;
+ int rv;
+
+ for (i = 0; i < vec_len(sm->auto_add_sw_if_indices); i++)
+ {
+ if (sw_if_index == sm->auto_add_sw_if_indices[i])
+ {
+ if (!is_delete)
+ {
+ /* Don't trip over lease renewal, static config */
+ for (j = 0; j < vec_len(sm->addresses); j++)
+ if (sm->addresses[j].addr.as_u32 == address->as_u32)
+ return;
+
+ snat_add_address (sm, address, ~0);
+ /* Scan static map resolution vector */
+ for (j = 0; j < vec_len (sm->to_resolve); j++)
+ {
+ rp = sm->to_resolve + j;
+ /* On this interface? */
+ if (rp->sw_if_index == sw_if_index)
+ {
+ /* Add the static mapping */
+ rv = snat_add_static_mapping (rp->l_addr,
+ address[0],
+ rp->l_port,
+ rp->e_port,
+ rp->vrf_id,
+ rp->addr_only,
+ ~0 /* sw_if_index */,
+ rp->proto,
+ rp->is_add);
+ if (rv)
+ clib_warning ("snat_add_static_mapping returned %d",
+ rv);
+ vec_add1 (indices_to_delete, j);
+ }
+ }
+ /* If we resolved any of the outstanding static mappings */
+ if (vec_len(indices_to_delete))
+ {
+ /* Delete them */
+ for (j = vec_len(indices_to_delete)-1; j >= 0; j--)
+ vec_delete(sm->to_resolve, 1, j);
+ vec_free(indices_to_delete);
+ }
+ return;
+ }
+ else
+ {
+ (void) snat_del_address(sm, address[0], 1);
+ return;
+ }
+ }
+ }
+}
+
+
+int snat_add_interface_address (snat_main_t *sm, u32 sw_if_index, int is_del)
+{
+ ip4_main_t * ip4_main = sm->ip4_main;
+ ip4_address_t * first_int_addr;
+ snat_static_map_resolve_t *rp;
+ u32 *indices_to_delete = 0;
+ int i, j;
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index,
+ 0 /* just want the address*/);
+
+ for (i = 0; i < vec_len(sm->auto_add_sw_if_indices); i++)
+ {
+ if (sm->auto_add_sw_if_indices[i] == sw_if_index)
+ {
+ if (is_del)
+ {
+ /* if have address remove it */
+ if (first_int_addr)
+ (void) snat_del_address (sm, first_int_addr[0], 1);
+ else
+ {
+ for (j = 0; j < vec_len (sm->to_resolve); j++)
+ {
+ rp = sm->to_resolve + j;
+ if (rp->sw_if_index == sw_if_index)
+ vec_add1 (indices_to_delete, j);
+ }
+ if (vec_len(indices_to_delete))
+ {
+ for (j = vec_len(indices_to_delete)-1; j >= 0; j--)
+ vec_del1(sm->to_resolve, j);
+ vec_free(indices_to_delete);
+ }
+ }
+ vec_del1(sm->auto_add_sw_if_indices, i);
+ }
+ else
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ return 0;
+ }
+ }
+
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* add to the auto-address list */
+ vec_add1(sm->auto_add_sw_if_indices, sw_if_index);
+
+ /* If the address is already bound - or static - add it now */
+ if (first_int_addr)
+ snat_add_address (sm, first_int_addr, ~0);
+
+ return 0;
+}
+
+static clib_error_t *
+snat_add_interface_address_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index;
+ int rv;
+ int is_del = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ sm->vnet_main, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ rv = snat_add_interface_address (sm, sw_if_index, is_del);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ error = clib_error_return (0, "snat_add_interface_address returned %d",
+ rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = {
+ .path = "nat44 add interface address",
+ .short_help = "nat44 add interface address <interface> [del]",
+ .function = snat_add_interface_address_command_fn,
+};
+
+static clib_error_t *
+snat_det_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t in_addr, out_addr;
+ u32 in_plen, out_plen;
+ int is_add = 1, rv;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "in %U/%u", unformat_ip4_address, &in_addr, &in_plen))
+ ;
+ else if (unformat (line_input, "out %U/%u", unformat_ip4_address, &out_addr, &out_plen))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ rv = snat_det_add_map(sm, &in_addr, (u8) in_plen, &out_addr, (u8)out_plen,
+ is_add);
+
+ if (rv)
+ {
+ error = clib_error_return (0, "snat_det_add_map return %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat deterministic add}
+ * Create bijective mapping of inside address to outside address and port range
+ * pairs, with the purpose of enabling deterministic NAT to reduce logging in
+ * CGN deployments.
+ * To create deterministic mapping between inside network 10.0.0.0/18 and
+ * outside network 1.1.1.0/30 use:
+ * # vpp# nat44 deterministic add in 10.0.0.0/18 out 1.1.1.0/30
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_det_map_command, static) = {
+ .path = "nat44 deterministic add",
+ .short_help = "nat44 deterministic add in <addr>/<plen> out <addr>/<plen> [del]",
+ .function = snat_det_map_command_fn,
+};
+
+static clib_error_t *
+snat_det_forward_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t in_addr, out_addr;
+ u16 lo_port;
+ snat_det_map_t * dm;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_ip4_address, &in_addr))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ dm = snat_det_map_by_user(sm, &in_addr);
+ if (!dm)
+ vlib_cli_output (vm, "no match");
+ else
+ {
+ snat_det_forward (dm, &in_addr, &out_addr, &lo_port);
+ vlib_cli_output (vm, "%U:<%d-%d>", format_ip4_address, &out_addr,
+ lo_port, lo_port + dm->ports_per_host - 1);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat deterministic forward}
+ * Return outside address and port range from inside address for deterministic
+ * NAT.
+ * To obtain outside address and port of inside host use:
+ * vpp# nat44 deterministic forward 10.0.0.2
+ * 1.1.1.0:<1054-1068>
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_det_forward_command, static) = {
+ .path = "nat44 deterministic forward",
+ .short_help = "nat44 deterministic forward <addr>",
+ .function = snat_det_forward_command_fn,
+};
+
+static clib_error_t *
+snat_det_reverse_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t in_addr, out_addr;
+ u32 out_port;
+ snat_det_map_t * dm;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U:%d", unformat_ip4_address, &out_addr, &out_port))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (out_port < 1024 || out_port > 65535)
+ {
+ error = clib_error_return (0, "wrong port, must be <1024-65535>");
+ goto done;
+ }
+
+ dm = snat_det_map_by_out(sm, &out_addr);
+ if (!dm)
+ vlib_cli_output (vm, "no match");
+ else
+ {
+ snat_det_reverse (dm, &out_addr, (u16) out_port, &in_addr);
+ vlib_cli_output (vm, "%U", format_ip4_address, &in_addr);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat deterministic reverse}
+ * Return inside address from outside address and port for deterministic NAT.
+ * To obtain inside host address from outside address and port use:
+ * #vpp nat44 deterministic reverse 1.1.1.1:1276
+ * 10.0.16.16
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_det_reverse_command, static) = {
+ .path = "nat44 deterministic reverse",
+ .short_help = "nat44 deterministic reverse <addr>:<port>",
+ .function = snat_det_reverse_command_fn,
+};
+
+static clib_error_t *
+set_timeout_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "udp %u", &sm->udp_timeout))
+ ;
+ else if (unformat (line_input, "tcp-established %u",
+ &sm->tcp_established_timeout))
+ ;
+ else if (unformat (line_input, "tcp-transitory %u",
+ &sm->tcp_transitory_timeout))
+ ;
+ else if (unformat (line_input, "icmp %u", &sm->icmp_timeout))
+ ;
+ else if (unformat (line_input, "reset"))
+ {
+ sm->udp_timeout = SNAT_UDP_TIMEOUT;
+ sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
+ sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
+ sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{set snat deterministic timeout}
+ * Set values of timeouts for deterministic NAT (in seconds), use:
+ * vpp# set nat44 deterministic timeout udp 120 tcp-established 7500
+ * tcp-transitory 250 icmp 90
+ * To reset default values use:
+ * vpp# set nat44 deterministic timeout reset
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_timeout_command, static) = {
+ .path = "set nat44 deterministic timeout",
+ .function = set_timeout_command_fn,
+ .short_help =
+ "set nat44 deterministic timeout [udp <sec> | tcp-established <sec> "
+ "tcp-transitory <sec> | icmp <sec> | reset]",
+};
+
+static clib_error_t *
+snat_det_close_session_out_fn (vlib_main_t *vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t out_addr, ext_addr, in_addr;
+ u32 out_port, ext_port;
+ snat_det_map_t * dm;
+ snat_det_session_t * ses;
+ snat_det_out_key_t key;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U:%d %U:%d",
+ unformat_ip4_address, &out_addr, &out_port,
+ unformat_ip4_address, &ext_addr, &ext_port))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ dm = snat_det_map_by_out(sm, &out_addr);
+ if (!dm)
+ vlib_cli_output (vm, "no match");
+ else
+ {
+ snat_det_reverse(dm, &ext_addr, (u16)out_port, &in_addr);
+ key.ext_host_addr = out_addr;
+ key.ext_host_port = ntohs((u16)ext_port);
+ key.out_port = ntohs((u16)out_port);
+ ses = snat_det_get_ses_by_out(dm, &out_addr, key.as_u64);
+ if (!ses)
+ vlib_cli_output (vm, "no match");
+ else
+ snat_det_ses_close(dm, ses);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat deterministic close session out}
+ * Close session using outside ip address and port
+ * and external ip address and port, use:
+ * vpp# nat44 deterministic close session out 1.1.1.1:1276 2.2.2.2:2387
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_det_close_sesion_out_command, static) = {
+ .path = "nat44 deterministic close session out",
+ .short_help = "nat44 deterministic close session out "
+ "<out_addr>:<out_port> <ext_addr>:<ext_port>",
+ .function = snat_det_close_session_out_fn,
+};
+
+static clib_error_t *
+snat_det_close_session_in_fn (vlib_main_t *vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ snat_main_t *sm = &snat_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t in_addr, ext_addr;
+ u32 in_port, ext_port;
+ snat_det_map_t * dm;
+ snat_det_session_t * ses;
+ snat_det_out_key_t key;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U:%d %U:%d",
+ unformat_ip4_address, &in_addr, &in_port,
+ unformat_ip4_address, &ext_addr, &ext_port))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ dm = snat_det_map_by_user (sm, &in_addr);
+ if (!dm)
+ vlib_cli_output (vm, "no match");
+ else
+ {
+ key.ext_host_addr = ext_addr;
+ key.ext_host_port = ntohs ((u16)ext_port);
+ ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs((u16)in_port), key);
+ if (!ses)
+ vlib_cli_output (vm, "no match");
+ else
+ snat_det_ses_close(dm, ses);
+ }
+
+done:
+ unformat_free(line_input);
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat deterministic close_session_in}
+ * Close session using inside ip address and port
+ * and external ip address and port, use:
+ * vpp# nat44 deterministic close session in 3.3.3.3:3487 2.2.2.2:2387
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (snat_det_close_session_in_command, static) = {
+ .path = "nat44 deterministic close session in",
+ .short_help = "nat44 deterministic close session in "
+ "<in_addr>:<in_port> <ext_addr>:<ext_port>",
+ .function = snat_det_close_session_in_fn,
+};
diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h
new file mode 100644
index 00000000..e53e924f
--- /dev/null
+++ b/src/plugins/nat/nat.h
@@ -0,0 +1,555 @@
+
+/*
+ * nat.h - NAT plugin definitions
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_nat_h__
+#define __included_nat_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/icmp46_packet.h>
+#include <vnet/api_errno.h>
+#include <vppinfra/bihash_8_8.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/dlist.h>
+#include <vppinfra/error.h>
+#include <vlibapi/api.h>
+
+
+#define SNAT_UDP_TIMEOUT 300
+#define SNAT_UDP_TIMEOUT_MIN 120
+#define SNAT_TCP_TRANSITORY_TIMEOUT 240
+#define SNAT_TCP_ESTABLISHED_TIMEOUT 7440
+#define SNAT_TCP_INCOMING_SYN 6
+#define SNAT_ICMP_TIMEOUT 60
+
+#define SNAT_FLAG_HAIRPINNING (1 << 0)
+
+/* Key */
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t addr;
+ u16 port;
+ u16 protocol:3,
+ fib_index:13;
+ };
+ u64 as_u64;
+ };
+} snat_session_key_t;
+
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t l_addr;
+ ip4_address_t r_addr;
+ u32 fib_index;
+ u16 l_port;
+ u8 proto;
+ u8 rsvd;
+ };
+ u64 as_u64[2];
+ };
+} nat_ed_ses_key_t;
+
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t ext_host_addr;
+ u16 ext_host_port;
+ u16 out_port;
+ };
+ u64 as_u64;
+ };
+} snat_det_out_key_t;
+
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t addr;
+ u32 fib_index;
+ };
+ u64 as_u64;
+ };
+} snat_user_key_t;
+
+
+#define foreach_snat_protocol \
+ _(UDP, 0, udp, "udp") \
+ _(TCP, 1, tcp, "tcp") \
+ _(ICMP, 2, icmp, "icmp")
+
+typedef enum {
+#define _(N, i, n, s) SNAT_PROTOCOL_##N = i,
+ foreach_snat_protocol
+#undef _
+} snat_protocol_t;
+
+
+#define foreach_snat_session_state \
+ _(0, UNKNOWN, "unknown") \
+ _(1, UDP_ACTIVE, "udp-active") \
+ _(2, TCP_SYN_SENT, "tcp-syn-sent") \
+ _(3, TCP_ESTABLISHED, "tcp-established") \
+ _(4, TCP_FIN_WAIT, "tcp-fin-wait") \
+ _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \
+ _(6, TCP_LAST_ACK, "tcp-last-ack") \
+ _(7, ICMP_ACTIVE, "icmp-active")
+
+typedef enum {
+#define _(v, N, s) SNAT_SESSION_##N = v,
+ foreach_snat_session_state
+#undef _
+} snat_session_state_t;
+
+
+#define SNAT_SESSION_FLAG_STATIC_MAPPING 1
+#define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2
+#define SNAT_SESSION_FLAG_LOAD_BALANCING 4
+
+typedef CLIB_PACKED(struct {
+ snat_session_key_t out2in; /* 0-15 */
+
+ snat_session_key_t in2out; /* 16-31 */
+
+ u32 flags; /* 32-35 */
+
+ /* per-user translations */
+ u32 per_user_index; /* 36-39 */
+
+ u32 per_user_list_head_index; /* 40-43 */
+
+ /* Last heard timer */
+ f64 last_heard; /* 44-51 */
+
+ u64 total_bytes; /* 52-59 */
+
+ u32 total_pkts; /* 60-63 */
+
+ /* Outside address */
+ u32 outside_address_index; /* 64-67 */
+
+ /* External host address */
+ ip4_address_t ext_host_addr; /* 68-71 */
+
+}) snat_session_t;
+
+
+typedef struct {
+ ip4_address_t addr;
+ u32 fib_index;
+ u32 sessions_per_user_list_head_index;
+ u32 nsessions;
+ u32 nstaticsessions;
+} snat_user_t;
+
+typedef struct {
+ ip4_address_t addr;
+ u32 fib_index;
+#define _(N, i, n, s) \
+ u16 busy_##n##_ports; \
+ u16 * busy_##n##_ports_per_thread; \
+ uword * busy_##n##_port_bitmap;
+ foreach_snat_protocol
+#undef _
+} snat_address_t;
+
+typedef struct {
+ u16 in_port;
+ snat_det_out_key_t out;
+ u8 state;
+ u32 expire;
+} snat_det_session_t;
+
+typedef struct {
+ ip4_address_t in_addr;
+ u8 in_plen;
+ ip4_address_t out_addr;
+ u8 out_plen;
+ u32 sharing_ratio;
+ u16 ports_per_host;
+ u32 ses_num;
+ /* vector of sessions */
+ snat_det_session_t * sessions;
+} snat_det_map_t;
+
+typedef struct {
+ ip4_address_t addr;
+ u16 port;
+ u8 probability;
+ u8 prefix;
+} nat44_lb_addr_port_t;
+
+typedef struct {
+ ip4_address_t local_addr;
+ ip4_address_t external_addr;
+ u16 local_port;
+ u16 external_port;
+ u8 addr_only;
+ u32 vrf_id;
+ u32 fib_index;
+ snat_protocol_t proto;
+ u32 worker_index;
+ nat44_lb_addr_port_t *locals;
+} snat_static_mapping_t;
+
+typedef struct {
+ u32 sw_if_index;
+ u8 is_inside;
+} snat_interface_t;
+
+typedef struct {
+ ip4_address_t l_addr;
+ u16 l_port;
+ u16 e_port;
+ u32 sw_if_index;
+ u32 vrf_id;
+ snat_protocol_t proto;
+ int addr_only;
+ int is_add;
+} snat_static_map_resolve_t;
+
+typedef struct {
+ /* Main lookup tables */
+ clib_bihash_8_8_t out2in;
+ clib_bihash_8_8_t in2out;
+
+ /* Find-a-user => src address lookup */
+ clib_bihash_8_8_t user_hash;
+
+ /* User pool */
+ snat_user_t * users;
+
+ /* Session pool */
+ snat_session_t * sessions;
+
+ /* Pool of doubly-linked list elements */
+ dlist_elt_t * list_pool;
+
+ u32 snat_thread_index;
+} snat_main_per_thread_data_t;
+
+struct snat_main_s;
+
+typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm,
+ vlib_node_runtime_t *node,
+ u32 thread_index,
+ vlib_buffer_t *b0,
+ ip4_header_t *ip0,
+ u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate,
+ void *d,
+ void *e);
+
+typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, u32 rx_fib_index);
+
+typedef struct snat_main_s {
+ /* Endpoint address dependent sessions lookup tables */
+ clib_bihash_16_8_t out2in_ed;
+ clib_bihash_16_8_t in2out_ed;
+
+ snat_icmp_match_function_t * icmp_match_in2out_cb;
+ snat_icmp_match_function_t * icmp_match_out2in_cb;
+
+ u32 num_workers;
+ u32 first_worker_index;
+ u32 next_worker;
+ u32 * workers;
+ snat_get_worker_function_t * worker_in2out_cb;
+ snat_get_worker_function_t * worker_out2in_cb;
+ u16 port_per_thread;
+ u32 num_snat_thread;
+
+ /* Per thread data */
+ snat_main_per_thread_data_t * per_thread_data;
+
+ /* Find a static mapping by local */
+ clib_bihash_8_8_t static_mapping_by_local;
+
+ /* Find a static mapping by external */
+ clib_bihash_8_8_t static_mapping_by_external;
+
+ /* Static mapping pool */
+ snat_static_mapping_t * static_mappings;
+
+ /* Interface pool */
+ snat_interface_t * interfaces;
+ snat_interface_t * output_feature_interfaces;
+
+ /* Vector of outside addresses */
+ snat_address_t * addresses;
+
+ /* sw_if_indices whose intfc addresses should be auto-added */
+ u32 * auto_add_sw_if_indices;
+
+ /* vector of interface address static mappings to resolve. */
+ snat_static_map_resolve_t *to_resolve;
+
+ /* Randomize port allocation order */
+ u32 random_seed;
+
+ /* Worker handoff index */
+ u32 fq_in2out_index;
+ u32 fq_in2out_output_index;
+ u32 fq_out2in_index;
+
+ /* in2out and out2in node index */
+ u32 in2out_node_index;
+ u32 in2out_output_node_index;
+ u32 out2in_node_index;
+
+ /* Deterministic NAT */
+ snat_det_map_t * det_maps;
+
+ /* Config parameters */
+ u8 static_mapping_only;
+ u8 static_mapping_connection_tracking;
+ u8 deterministic;
+ u32 translation_buckets;
+ u32 translation_memory_size;
+ u32 max_translations;
+ u32 user_buckets;
+ u32 user_memory_size;
+ u32 max_translations_per_user;
+ u32 outside_vrf_id;
+ u32 outside_fib_index;
+ u32 inside_vrf_id;
+ u32 inside_fib_index;
+
+ /* tenant VRF aware address pool activation flag */
+ u8 vrf_mode;
+
+ /* values of various timeouts */
+ u32 udp_timeout;
+ u32 tcp_established_timeout;
+ u32 tcp_transitory_timeout;
+ u32 icmp_timeout;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ ip4_main_t * ip4_main;
+ ip_lookup_main_t * ip4_lookup_main;
+ api_main_t * api_main;
+} snat_main_t;
+
+extern snat_main_t snat_main;
+extern vlib_node_registration_t snat_in2out_node;
+extern vlib_node_registration_t snat_in2out_output_node;
+extern vlib_node_registration_t snat_out2in_node;
+extern vlib_node_registration_t snat_in2out_fast_node;
+extern vlib_node_registration_t snat_out2in_fast_node;
+extern vlib_node_registration_t snat_in2out_worker_handoff_node;
+extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
+extern vlib_node_registration_t snat_out2in_worker_handoff_node;
+extern vlib_node_registration_t snat_det_in2out_node;
+extern vlib_node_registration_t snat_det_out2in_node;
+extern vlib_node_registration_t snat_hairpin_dst_node;
+extern vlib_node_registration_t snat_hairpin_src_node;
+
+void snat_free_outside_address_and_port (snat_main_t * sm,
+ u32 thread_index,
+ snat_session_key_t * k,
+ u32 address_index);
+
+int snat_alloc_outside_address_and_port (snat_main_t * sm,
+ u32 fib_index,
+ u32 thread_index,
+ snat_session_key_t * k,
+ u32 * address_indexp);
+
+int snat_static_mapping_match (snat_main_t * sm,
+ snat_session_key_t match,
+ snat_session_key_t * mapping,
+ u8 by_external,
+ u8 *is_addr_only);
+
+void snat_add_del_addr_to_fib (ip4_address_t * addr,
+ u8 p_len,
+ u32 sw_if_index,
+ int is_add);
+
+format_function_t format_snat_user;
+
+typedef struct {
+ u32 cached_sw_if_index;
+ u32 cached_ip4_address;
+} snat_runtime_t;
+
+/** \brief Check if SNAT session is created from static mapping.
+ @param s SNAT session
+ @return 1 if SNAT session is created from static mapping otherwise 0
+*/
+#define snat_is_session_static(s) s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING
+
+/** \brief Check if SNAT session for unknown protocol.
+ @param s SNAT session
+ @return 1 if SNAT session for unknown protocol otherwise 0
+*/
+#define snat_is_unk_proto_session(s) s->flags & SNAT_SESSION_FLAG_UNKNOWN_PROTO
+
+/*
+ * Why is this here? Because we don't need to touch this layer to
+ * simply reply to an icmp. We need to change id to a unique
+ * value to NAT an echo request/reply.
+ */
+
+typedef struct {
+ u16 identifier;
+ u16 sequence;
+} icmp_echo_header_t;
+
+always_inline u32
+ip_proto_to_snat_proto (u8 ip_proto)
+{
+ u32 snat_proto = ~0;
+
+ snat_proto = (ip_proto == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : snat_proto;
+ snat_proto = (ip_proto == IP_PROTOCOL_TCP) ? SNAT_PROTOCOL_TCP : snat_proto;
+ snat_proto = (ip_proto == IP_PROTOCOL_ICMP) ? SNAT_PROTOCOL_ICMP : snat_proto;
+ snat_proto = (ip_proto == IP_PROTOCOL_ICMP6) ? SNAT_PROTOCOL_ICMP : snat_proto;
+
+ return snat_proto;
+}
+
+always_inline u8
+snat_proto_to_ip_proto (snat_protocol_t snat_proto)
+{
+ u8 ip_proto = ~0;
+
+ ip_proto = (snat_proto == SNAT_PROTOCOL_UDP) ? IP_PROTOCOL_UDP : ip_proto;
+ ip_proto = (snat_proto == SNAT_PROTOCOL_TCP) ? IP_PROTOCOL_TCP : ip_proto;
+ ip_proto = (snat_proto == SNAT_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP : ip_proto;
+
+ return ip_proto;
+}
+
+typedef struct {
+ u16 src_port, dst_port;
+} tcp_udp_header_t;
+
+u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
+void increment_v4_address(ip4_address_t * a);
+void snat_add_address(snat_main_t *sm, ip4_address_t *addr, u32 vrf_id);
+int snat_del_address(snat_main_t *sm, ip4_address_t addr, u8 delete_sm);
+int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
+ u32 sw_if_index, snat_protocol_t proto, int is_add);
+clib_error_t * snat_api_init(vlib_main_t * vm, snat_main_t * sm);
+int snat_set_workers (uword * bitmap);
+int snat_interface_add_del(u32 sw_if_index, u8 is_inside, int is_del);
+int snat_interface_add_del_output_feature(u32 sw_if_index, u8 is_inside,
+ int is_del);
+int snat_add_interface_address(snat_main_t *sm, u32 sw_if_index, int is_del);
+uword unformat_snat_protocol(unformat_input_t * input, va_list * args);
+u8 * format_snat_protocol(u8 * s, va_list * args);
+int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
+ snat_protocol_t proto, u32 vrf_id,
+ nat44_lb_addr_port_t *locals, u8 is_add);
+
+static_always_inline u8
+icmp_is_error_message (icmp46_header_t * icmp)
+{
+ switch(icmp->type)
+ {
+ case ICMP4_destination_unreachable:
+ case ICMP4_time_exceeded:
+ case ICMP4_parameter_problem:
+ case ICMP4_source_quench:
+ case ICMP4_redirect:
+ case ICMP4_alternate_host_address:
+ return 1;
+ }
+ return 0;
+}
+
+static_always_inline u8
+is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
+ u32 ip4_addr)
+{
+ snat_runtime_t *rt = (snat_runtime_t *) node->runtime_data;
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ if (first_int_addr)
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ else
+ rt->cached_ip4_address = 0;
+ }
+
+ if (PREDICT_FALSE(ip4_addr == rt->cached_ip4_address))
+ return 1;
+ else
+ return 0;
+}
+
+always_inline u8
+maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index)
+{
+ if (pool_elts (sm->per_thread_data[thread_index].sessions) >= sm->max_translations)
+ return 1;
+
+ return 0;
+}
+
+#endif /* __included_nat_h__ */
diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c
new file mode 100644
index 00000000..bfcfa9b3
--- /dev/null
+++ b/src/plugins/nat/nat64.c
@@ -0,0 +1,861 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 implementation
+ */
+
+#include <nat/nat64.h>
+#include <nat/nat64_db.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+nat64_main_t nat64_main;
+
+/* *INDENT-OFF* */
+
+/* Hook up input features */
+VNET_FEATURE_INIT (nat64_in2out, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "nat64-in2out",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+VNET_FEATURE_INIT (nat64_out2in, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat64-out2in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+static u8 well_known_prefix[] = {
+ 0x00, 0x64, 0xff, 0x9b,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+/* *INDENT-ON* */
+
+clib_error_t *
+nat64_init (vlib_main_t * vm)
+{
+ nat64_main_t *nm = &nat64_main;
+ clib_error_t *error = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ nm->is_disabled = 0;
+
+ if (tm->n_vlib_mains > 1)
+ {
+ nm->is_disabled = 1;
+ goto error;
+ }
+
+ if (nat64_db_init (&nm->db))
+ {
+ error = clib_error_return (0, "NAT64 DB init failed");
+ goto error;
+ }
+
+ /* set session timeouts to default values */
+ nm->udp_timeout = SNAT_UDP_TIMEOUT;
+ nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
+ nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
+ nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
+ nm->tcp_incoming_syn_timeout = SNAT_TCP_INCOMING_SYN;
+
+error:
+ return error;
+}
+
+int
+nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add)
+{
+ nat64_main_t *nm = &nat64_main;
+ snat_address_t *a = 0;
+ snat_interface_t *interface;
+ int i;
+
+ /* Check if address already exists */
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ if (nm->addr_pool[i].addr.as_u32 == addr->as_u32)
+ {
+ a = nm->addr_pool + i;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (a)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ vec_add2 (nm->addr_pool, a, 1);
+ a->addr = *addr;
+ a->fib_index = 0;
+ if (vrf_id != ~0)
+ a->fib_index =
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+#define _(N, i, n, s) \
+ clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535);
+ foreach_snat_protocol
+#undef _
+ }
+ else
+ {
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (a->fib_index)
+ fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6,
+ FIB_SOURCE_PLUGIN_HI);
+
+#define _(N, id, n, s) \
+ clib_bitmap_free (a->busy_##n##_port_bitmap);
+ foreach_snat_protocol
+#undef _
+ /* Delete sessions using address */
+ nat64_db_free_out_addr (&nm->db, &a->addr);
+ vec_del1 (nm->addr_pool, i);
+ }
+
+ /* Add/del external address to FIB */
+ /* *INDENT-OFF* */
+ pool_foreach (interface, nm->interfaces,
+ ({
+ if (interface->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
+ break;
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+void
+nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
+{
+ nat64_main_t *nm = &nat64_main;
+ snat_address_t *a = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (a, nm->addr_pool)
+ {
+ if (fn (a, ctx))
+ break;
+ };
+ /* *INDENT-ON* */
+}
+
+int
+nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
+{
+ nat64_main_t *nm = &nat64_main;
+ snat_interface_t *interface = 0, *i;
+ snat_address_t *ap;
+ const char *feature_name, *arc_name;
+
+ /* Check if address already exists */
+ /* *INDENT-OFF* */
+ pool_foreach (i, nm->interfaces,
+ ({
+ if (i->sw_if_index == sw_if_index)
+ {
+ interface = i;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (is_add)
+ {
+ if (interface)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ pool_get (nm->interfaces, interface);
+ interface->sw_if_index = sw_if_index;
+ interface->is_inside = is_inside;
+
+ }
+ else
+ {
+ if (!interface)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_put (nm->interfaces, interface);
+ }
+
+ if (!is_inside)
+ {
+ /* *INDENT-OFF* */
+ vec_foreach (ap, nm->addr_pool)
+ snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add);
+ /* *INDENT-ON* */
+ }
+
+ arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
+ feature_name = is_inside ? "nat64-in2out" : "nat64-out2in";
+
+ return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
+ is_add, 0, 0);
+}
+
+void
+nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
+{
+ nat64_main_t *nm = &nat64_main;
+ snat_interface_t *i = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (i, nm->interfaces,
+ ({
+ if (fn (i, ctx))
+ break;
+ }));
+ /* *INDENT-ON* */
+}
+
+int
+nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
+ ip4_address_t * addr, u16 * port)
+{
+ nat64_main_t *nm = &nat64_main;
+ snat_main_t *sm = &snat_main;
+ int i;
+ snat_address_t *a, *ga = 0;
+ u32 portnum;
+
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ a = nm->addr_pool + i;
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ if (a->busy_##n##_ports < (65535-1024)) \
+ { \
+ if (a->fib_index == fib_index) \
+ { \
+ while (1) \
+ { \
+ portnum = random_u32 (&sm->random_seed); \
+ portnum &= 0xFFFF; \
+ if (portnum < 1024) \
+ continue; \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
+ portnum)) \
+ continue; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
+ portnum, 1); \
+ a->busy_##n##_ports++; \
+ *port = portnum; \
+ addr->as_u32 = a->addr.as_u32; \
+ return 0; \
+ } \
+ } \
+ else if (a->fib_index == 0) \
+ ga = a; \
+ } \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning ("unknown protocol");
+ return 1;
+ }
+ }
+
+ if (ga)
+ {
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ while (1) \
+ { \
+ portnum = random_u32 (&sm->random_seed); \
+ portnum &= 0xFFFF; \
+ if (portnum < 1024) \
+ continue; \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
+ portnum)) \
+ continue; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
+ portnum, 1); \
+ a->busy_##n##_ports++; \
+ *port = portnum; \
+ addr->as_u32 = a->addr.as_u32; \
+ return 0; \
+ }
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning ("unknown protocol");
+ return 1;
+ }
+ }
+
+ /* Totally out of translations to use... */
+ //TODO: IPFix
+ return 1;
+}
+
+void
+nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port,
+ snat_protocol_t proto)
+{
+ nat64_main_t *nm = &nat64_main;
+ int i;
+ snat_address_t *a;
+
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ a = nm->addr_pool + i;
+ if (addr->as_u32 != a->addr.as_u32)
+ continue;
+ switch (proto)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
+ port) == 1); \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port, 0); \
+ a->busy_##n##_ports--; \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ clib_warning ("unknown protocol");
+ return;
+ }
+ break;
+ }
+}
+
+int
+nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
+ ip4_address_t * out_addr, u16 in_port,
+ u16 out_port, u8 proto, u32 vrf_id, u8 is_add)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+ snat_protocol_t p = ip_proto_to_snat_proto (proto);
+ ip46_address_t addr;
+ int i;
+ snat_address_t *a;
+
+ addr.as_u64[0] = in_addr->as_u64[0];
+ addr.as_u64[1] = in_addr->as_u64[1];
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &addr, clib_host_to_net_u16 (in_port),
+ proto, fib_index, 1);
+
+ if (is_add)
+ {
+ if (bibe)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ a = nm->addr_pool + i;
+ if (out_addr->as_u32 != a->addr.as_u32)
+ continue;
+ switch (p)
+ {
+#define _(N, j, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
+ out_port)) \
+ return VNET_API_ERROR_INVALID_VALUE; \
+ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
+ out_port, 1); \
+ if (out_port > 1024) \
+ a->busy_##n##_ports++; \
+ break;
+ foreach_snat_protocol
+#undef _
+ default:
+ memset (&addr, 0, sizeof (addr));
+ addr.ip4.as_u32 = out_addr->as_u32;
+ if (nat64_db_bib_entry_find
+ (&nm->db, &addr, 0, proto, fib_index, 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ break;
+ }
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, in_addr, out_addr,
+ clib_host_to_net_u16 (in_port),
+ clib_host_to_net_u16 (out_port), fib_index,
+ proto, 1);
+ if (!bibe)
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ if (!bibe)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ nat64_free_out_addr_and_port (out_addr, out_port, p);
+ nat64_db_bib_entry_free (&nm->db, bibe);
+ }
+
+ return 0;
+}
+
+int
+nat64_set_udp_timeout (u32 timeout)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (timeout == 0)
+ nm->udp_timeout = SNAT_UDP_TIMEOUT;
+ else if (timeout < SNAT_UDP_TIMEOUT_MIN)
+ return VNET_API_ERROR_INVALID_VALUE;
+ else
+ nm->udp_timeout = timeout;
+
+ return 0;
+}
+
+u32
+nat64_get_udp_timeout (void)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ return nm->udp_timeout;
+}
+
+int
+nat64_set_icmp_timeout (u32 timeout)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (timeout == 0)
+ nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
+ else
+ nm->icmp_timeout = timeout;
+
+ return 0;
+}
+
+u32
+nat64_get_icmp_timeout (void)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ return nm->icmp_timeout;
+}
+
+int
+nat64_set_tcp_timeouts (u32 trans, u32 est, u32 incoming_syn)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (trans == 0)
+ nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
+ else
+ nm->tcp_trans_timeout = trans;
+
+ if (est == 0)
+ nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
+ else
+ nm->tcp_est_timeout = est;
+
+ if (incoming_syn == 0)
+ nm->tcp_incoming_syn_timeout = SNAT_TCP_INCOMING_SYN;
+ else
+ nm->tcp_incoming_syn_timeout = incoming_syn;
+
+ return 0;
+}
+
+u32
+nat64_get_tcp_trans_timeout (void)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ return nm->tcp_trans_timeout;
+}
+
+u32
+nat64_get_tcp_est_timeout (void)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ return nm->tcp_est_timeout;
+}
+
+u32
+nat64_get_tcp_incoming_syn_timeout (void)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ return nm->tcp_incoming_syn_timeout;
+}
+
+void
+nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm)
+{
+ nat64_main_t *nm = &nat64_main;
+ u32 now = (u32) vlib_time_now (vm);
+
+ switch (ip_proto_to_snat_proto (ste->proto))
+ {
+ case SNAT_PROTOCOL_ICMP:
+ ste->expire = now + nm->icmp_timeout;
+ return;
+ case SNAT_PROTOCOL_TCP:
+ {
+ switch (ste->tcp_state)
+ {
+ case NAT64_TCP_STATE_V4_INIT:
+ case NAT64_TCP_STATE_V6_INIT:
+ case NAT64_TCP_STATE_V4_FIN_RCV:
+ case NAT64_TCP_STATE_V6_FIN_RCV:
+ case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV:
+ case NAT64_TCP_STATE_TRANS:
+ ste->expire = now + nm->tcp_trans_timeout;
+ return;
+ case NAT64_TCP_STATE_ESTABLISHED:
+ ste->expire = now + nm->tcp_est_timeout;
+ return;
+ default:
+ return;
+ }
+ }
+ case SNAT_PROTOCOL_UDP:
+ ste->expire = now + nm->udp_timeout;
+ return;
+ default:
+ ste->expire = now + nm->udp_timeout;
+ return;
+ }
+}
+
+void
+nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp,
+ u8 is_ip6)
+{
+ switch (ste->tcp_state)
+ {
+ case NAT64_TCP_STATE_CLOSED:
+ {
+ if (tcp->flags & TCP_FLAG_SYN)
+ {
+ if (is_ip6)
+ ste->tcp_state = NAT64_TCP_STATE_V6_INIT;
+ else
+ ste->tcp_state = NAT64_TCP_STATE_V4_INIT;
+ }
+ return;
+ }
+ case NAT64_TCP_STATE_V4_INIT:
+ {
+ if (is_ip6 && (tcp->flags & TCP_FLAG_SYN))
+ ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
+ return;
+ }
+ case NAT64_TCP_STATE_V6_INIT:
+ {
+ if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN))
+ ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
+ return;
+ }
+ case NAT64_TCP_STATE_ESTABLISHED:
+ {
+ if (tcp->flags & TCP_FLAG_FIN)
+ {
+ if (is_ip6)
+ ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV;
+ else
+ ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV;
+ }
+ else if (tcp->flags & TCP_FLAG_RST)
+ {
+ ste->tcp_state = NAT64_TCP_STATE_TRANS;
+ }
+ return;
+ }
+ case NAT64_TCP_STATE_V4_FIN_RCV:
+ {
+ if (is_ip6 && (tcp->flags & TCP_FLAG_FIN))
+ ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
+ return;
+ }
+ case NAT64_TCP_STATE_V6_FIN_RCV:
+ {
+ if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN))
+ ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
+ return;
+ }
+ case NAT64_TCP_STATE_TRANS:
+ {
+ if (!(tcp->flags & TCP_FLAG_RST))
+ ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+int
+nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_prefix_t *p = 0;
+ int i;
+
+ /* Verify prefix length */
+ if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64
+ && plen != 96)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Check if tenant already have prefix */
+ for (i = 0; i < vec_len (nm->pref64); i++)
+ {
+ if (nm->pref64[i].vrf_id == vrf_id)
+ {
+ p = nm->pref64 + i;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (!p)
+ {
+ vec_add2 (nm->pref64, p, 1);
+ p->fib_index =
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+ FIB_SOURCE_PLUGIN_HI);
+ p->vrf_id = vrf_id;
+ }
+
+ p->prefix.as_u64[0] = prefix->as_u64[0];
+ p->prefix.as_u64[1] = prefix->as_u64[1];
+ p->plen = plen;
+ }
+ else
+ {
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_del1 (nm->pref64, i);
+ }
+
+ return 0;
+}
+
+void
+nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_prefix_t *p = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (p, nm->pref64)
+ {
+ if (fn (p, ctx))
+ break;
+ };
+ /* *INDENT-ON* */
+}
+
+void
+nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_prefix_t *p, *gp = 0, *prefix = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (p, nm->pref64)
+ {
+ if (p->fib_index == fib_index)
+ {
+ prefix = p;
+ break;
+ }
+
+ if (p->fib_index == 0)
+ gp = p;
+ };
+ /* *INDENT-ON* */
+
+ if (!prefix)
+ prefix = gp;
+
+ if (prefix)
+ {
+ memset (ip6, 0, 16);
+ memcpy (ip6, &p->prefix, p->plen);
+ switch (p->plen)
+ {
+ case 32:
+ ip6->as_u32[1] = ip4->as_u32;
+ break;
+ case 40:
+ ip6->as_u8[5] = ip4->as_u8[0];
+ ip6->as_u8[6] = ip4->as_u8[1];
+ ip6->as_u8[7] = ip4->as_u8[2];
+ ip6->as_u8[9] = ip4->as_u8[3];
+ break;
+ case 48:
+ ip6->as_u8[6] = ip4->as_u8[0];
+ ip6->as_u8[7] = ip4->as_u8[1];
+ ip6->as_u8[9] = ip4->as_u8[2];
+ ip6->as_u8[10] = ip4->as_u8[3];
+ break;
+ case 56:
+ ip6->as_u8[7] = ip4->as_u8[0];
+ ip6->as_u8[9] = ip4->as_u8[1];
+ ip6->as_u8[10] = ip4->as_u8[2];
+ ip6->as_u8[11] = ip4->as_u8[3];
+ break;
+ case 64:
+ ip6->as_u8[9] = ip4->as_u8[0];
+ ip6->as_u8[10] = ip4->as_u8[1];
+ ip6->as_u8[11] = ip4->as_u8[2];
+ ip6->as_u8[12] = ip4->as_u8[3];
+ break;
+ case 96:
+ ip6->as_u32[3] = ip4->as_u32;
+ break;
+ default:
+ clib_warning ("invalid prefix length");
+ break;
+ }
+ }
+ else
+ {
+ memcpy (ip6, well_known_prefix, 16);
+ ip6->as_u32[3] = ip4->as_u32;
+ }
+}
+
+void
+nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_prefix_t *p, *gp = 0;
+ u8 plen = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (p, nm->pref64)
+ {
+ if (p->fib_index == fib_index)
+ {
+ plen = p->plen;
+ break;
+ }
+
+ if (p->vrf_id == 0)
+ gp = p;
+ };
+ /* *INDENT-ON* */
+
+ if (!plen)
+ {
+ if (gp)
+ plen = gp->plen;
+ else
+ plen = 96;
+ }
+
+ switch (plen)
+ {
+ case 32:
+ ip4->as_u32 = ip6->as_u32[1];
+ break;
+ case 40:
+ ip4->as_u8[0] = ip6->as_u8[5];
+ ip4->as_u8[1] = ip6->as_u8[6];
+ ip4->as_u8[2] = ip6->as_u8[7];
+ ip4->as_u8[3] = ip6->as_u8[9];
+ break;
+ case 48:
+ ip4->as_u8[0] = ip6->as_u8[6];
+ ip4->as_u8[1] = ip6->as_u8[7];
+ ip4->as_u8[2] = ip6->as_u8[9];
+ ip4->as_u8[3] = ip6->as_u8[10];
+ break;
+ case 56:
+ ip4->as_u8[0] = ip6->as_u8[7];
+ ip4->as_u8[1] = ip6->as_u8[9];
+ ip4->as_u8[2] = ip6->as_u8[10];
+ ip4->as_u8[3] = ip6->as_u8[11];
+ break;
+ case 64:
+ ip4->as_u8[0] = ip6->as_u8[9];
+ ip4->as_u8[1] = ip6->as_u8[10];
+ ip4->as_u8[2] = ip6->as_u8[11];
+ ip4->as_u8[3] = ip6->as_u8[12];
+ break;
+ case 96:
+ ip4->as_u32 = ip6->as_u32[3];
+ break;
+ default:
+ clib_warning ("invalid prefix length");
+ break;
+ }
+}
+
+/**
+ * @brief The 'nat64-expire-walk' process's main loop.
+ *
+ * Check expire time for NAT64 sessions.
+ */
+static uword
+nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ while (!nm->is_disabled)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 10.0);
+ vlib_process_get_events (vm, NULL);
+ u32 now = (u32) vlib_time_now (vm);
+
+ nad64_db_st_free_expired (&nm->db, now);
+ }
+
+ return 0;
+}
+
+static vlib_node_registration_t nat64_expire_walk_node;
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = {
+ .function = nat64_expire_walk_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "nat64-expire-walk",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64.h b/src/plugins/nat/nat64.h
new file mode 100644
index 00000000..68224cab
--- /dev/null
+++ b/src/plugins/nat/nat64.h
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 global declarations
+ */
+#ifndef __included_nat64_h__
+#define __included_nat64_h__
+
+#include <nat/nat.h>
+#include <nat/nat64_db.h>
+
+#define foreach_nat64_tcp_ses_state \
+ _(0, CLOSED, "closed") \
+ _(1, V4_INIT, "v4-init") \
+ _(2, V6_INIT, "v6-init") \
+ _(3, ESTABLISHED, "established") \
+ _(4, V4_FIN_RCV, "v4-fin-rcv") \
+ _(5, V6_FIN_RCV, "v6-fin-rcv") \
+ _(6, V6_FIN_V4_FIN_RCV, "v6-fin-v4-fin-rcv") \
+ _(7, TRANS, "trans")
+
+typedef enum
+{
+#define _(v, N, s) NAT64_TCP_STATE_##N = v,
+ foreach_nat64_tcp_ses_state
+#undef _
+} nat64_tcp_ses_state_t;
+
+typedef struct
+{
+ ip6_address_t prefix;
+ u8 plen;
+ u32 vrf_id;
+ u32 fib_index;
+} nat64_prefix_t;
+
+typedef struct
+{
+ /** Interface pool */
+ snat_interface_t *interfaces;
+
+ /** Address pool vector */
+ snat_address_t *addr_pool;
+
+ /** Pref64 vector */
+ nat64_prefix_t *pref64;
+
+ /** BIB and session DB */
+ nat64_db_t db;
+
+ /* values of various timeouts */
+ u32 udp_timeout;
+ u32 icmp_timeout;
+ u32 tcp_trans_timeout;
+ u32 tcp_est_timeout;
+ u32 tcp_incoming_syn_timeout;
+
+ u8 is_disabled;
+
+ snat_main_t *sm;
+} nat64_main_t;
+
+extern nat64_main_t nat64_main;
+extern vlib_node_registration_t nat64_in2out_node;
+extern vlib_node_registration_t nat64_out2in_node;
+
+/**
+ * @brief Add/delete address to NAT64 pool.
+ *
+ * @param addr IPv4 address.
+ * @param vrf_id VRF id of tenant, ~0 means independent of VRF.
+ * @param is_add 1 if add, 0 if delete.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add);
+
+/**
+ * @brief Call back function when walking addresses in NAT64 pool, non-zero
+ * return value stop walk.
+ */
+typedef int (*nat64_pool_addr_walk_fn_t) (snat_address_t * addr, void *ctx);
+
+/**
+ * @brief Walk NAT64 pool.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Enable/disable NAT64 feature on the interface.
+ *
+ * @param sw_if_index Index of the interface.
+ * @param is_inside 1 if inside, 0 if outside.
+ * @param is_add 1 if add, 0 if delete.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add);
+
+/**
+ * @brief Call back function when walking interfaces with NAT64 feature,
+ * non-zero return value stop walk.
+ */
+typedef int (*nat64_interface_walk_fn_t) (snat_interface_t * i, void *ctx);
+
+/**
+ * @brief Walk NAT64 interfaces.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Initialize NAT64.
+ *
+ * @param vm vlib main.
+ *
+ * @return error code.
+ */
+clib_error_t *nat64_init (vlib_main_t * vm);
+
+/**
+ * @brief Add/delete static NAT64 BIB entry.
+ *
+ * @param in_addr Inside IPv6 address.
+ * @param out_addr Outside IPv4 address.
+ * @param in_port Inside port number.
+ * @param out_port Outside port number.
+ * @param proto L4 protocol.
+ * @param vrf_id VRF id of tenant.
+ * @param is_add 1 if add, 0 if delete.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
+ ip4_address_t * out_addr, u16 in_port,
+ u16 out_port, u8 proto, u32 vrf_id,
+ u8 is_add);
+
+/**
+ * @brief Alloce IPv4 address and port pair from NAT64 pool.
+ *
+ * @param fib_index FIB index of tenant.
+ * @param proto L4 protocol.
+ * @param addr Allocated IPv4 address.
+ * @param port Allocated port number.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
+ ip4_address_t * addr, u16 * port);
+
+/**
+ * @brief Free IPv4 address and port pair from NAT64 pool.
+ *
+ * @param addr IPv4 address to free.
+ * @param port Port number to free.
+ * @param proto L4 protocol.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+void nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port,
+ snat_protocol_t proto);
+
+/**
+ * @brief Set UDP session timeout.
+ *
+ * @param timeout Timeout value in seconds (if 0 reset to default value 300sec).
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_set_udp_timeout (u32 timeout);
+
+/**
+ * @brief Get UDP session timeout.
+ *
+ * @returns UDP session timeout in seconds.
+ */
+u32 nat64_get_udp_timeout (void);
+
+/**
+ * @brief Set ICMP session timeout.
+ *
+ * @param timeout Timeout value in seconds (if 0 reset to default value 60sec).
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_set_icmp_timeout (u32 timeout);
+
+/**
+ * @brief Get ICMP session timeout.
+ *
+ * @returns ICMP session timeout in seconds.
+ */
+u32 nat64_get_icmp_timeout (void);
+
+/**
+ * @brief Set TCP session timeouts.
+ *
+ * @param trans Transitory timeout in seconds (if 0 reset to default value 240sec).
+ * @param est Established timeout in seconds (if 0 reset to default value 7440sec).
+ * @param incoming_syn Incoming SYN timeout in seconds (if 0 reset to default value 6sec).
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_set_tcp_timeouts (u32 trans, u32 est, u32 incoming_syn);
+
+/**
+ * @brief Get TCP transitory timeout.
+ *
+ * @returns TCP transitory timeout in seconds.
+ */
+u32 nat64_get_tcp_trans_timeout (void);
+
+/**
+ * @brief Get TCP established timeout.
+ *
+ * @returns TCP established timeout in seconds.
+ */
+u32 nat64_get_tcp_est_timeout (void);
+
+/**
+ * @brief Get TCP incoming SYN timeout.
+ *
+ * @returns TCP incoming SYN timeout in seconds.
+ */
+u32 nat64_get_tcp_incoming_syn_timeout (void);
+
+/**
+ * @brief Reset NAT64 session timeout.
+ *
+ * @param ste Session table entry.
+ * @param vm VLIB main.
+ **/
+void nat64_session_reset_timeout (nat64_db_st_entry_t * ste,
+ vlib_main_t * vm);
+
+/**
+ * @brief Set NAT64 TCP session state.
+ *
+ * @param ste Session table entry.
+ * @param tcp TCP header.
+ * @param is_ip6 1 if IPv6 packet, 0 if IPv4.
+ */
+void nat64_tcp_session_set_state (nat64_db_st_entry_t * ste,
+ tcp_header_t * tcp, u8 is_ip6);
+
+/**
+ * @brief Add/delete NAT64 prefix.
+ *
+ * @param prefix NAT64 prefix.
+ * @param plen Prefix length.
+ * @param vrf_id VRF id of tenant.
+ * @param is_add 1 if add, 0 if delete.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id,
+ u8 is_add);
+
+/**
+ * @brief Call back function when walking addresses in NAT64 prefixes, non-zero
+ * return value stop walk.
+ */
+typedef int (*nat64_prefix_walk_fn_t) (nat64_prefix_t * pref64, void *ctx);
+
+/**
+ * @brief Walk NAT64 prefixes.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx);
+
+/**
+ * Compose IPv4-embedded IPv6 addresses.
+ * @param ip6 IPv4-embedded IPv6 addresses.
+ * @param ip4 IPv4 address.
+ * @param fib_index Tenant FIB index.
+ */
+void nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4,
+ u32 fib_index);
+
+/**
+ * Extract IPv4 address from the IPv4-embedded IPv6 addresses.
+ *
+ * @param ip6 IPv4-embedded IPv6 addresses.
+ * @param ip4 IPv4 address.
+ * @param fib_index Tenant FIB index.
+ */
+void nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4,
+ u32 fib_index);
+
+#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index)
+#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val))
+
+#endif /* __included_nat64_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64_cli.c b/src/plugins/nat/nat64_cli.c
new file mode 100644
index 00000000..f3645bbb
--- /dev/null
+++ b/src/plugins/nat/nat64_cli.c
@@ -0,0 +1,984 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 CLI
+ */
+
+#include <nat/nat64.h>
+#include <nat/nat.h>
+#include <vnet/fib/fib_table.h>
+
+static clib_error_t *
+nat64_add_del_pool_addr_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t start_addr, end_addr, this_addr;
+ u32 start_host_order, end_host_order;
+ int i, count, rv;
+ u32 vrf_id = ~0;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U - %U",
+ unformat_ip4_address, &start_addr,
+ unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (line_input, "tenant-vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr))
+ end_addr = start_addr;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
+ end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
+
+ if (end_host_order < start_host_order)
+ {
+ error = clib_error_return (0, "end address less than start address");
+ goto done;
+ }
+
+ count = (end_host_order - start_host_order) + 1;
+ this_addr = start_addr;
+
+ for (i = 0; i < count; i++)
+ {
+ rv = nat64_add_del_pool_addr (&this_addr, vrf_id, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error =
+ clib_error_return (0, "NAT64 pool address %U not exist.",
+ format_ip4_address, &this_addr);
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error =
+ clib_error_return (0, "NAT64 pool address %U exist.",
+ format_ip4_address, &this_addr);
+ goto done;
+ default:
+ break;
+
+ }
+ increment_v4_address (&this_addr);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static int
+nat64_cli_pool_walk (snat_address_t * ap, void *ctx)
+{
+ vlib_main_t *vm = ctx;
+
+ if (ap->fib_index != ~0)
+ {
+ fib_table_t *fib;
+ fib = fib_table_get (ap->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+ vlib_cli_output (vm, " %U tenant VRF: %u", format_ip4_address,
+ &ap->addr, fib->ft_table_id);
+ }
+ else
+ vlib_cli_output (vm, " %U", format_ip4_address, &ap->addr);
+
+ return 0;
+}
+
+static clib_error_t *
+nat64_show_pool_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ vlib_cli_output (vm, "NAT64 pool:");
+ nat64_pool_addr_walk (nat64_cli_pool_walk, vm);
+
+ return 0;
+}
+
+static clib_error_t *
+nat64_interface_feature_command_fn (vlib_main_t * vm,
+ unformat_input_t *
+ input, vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 *inside_sw_if_indices = 0;
+ u32 *outside_sw_if_indices = 0;
+ u8 is_add = 1;
+ int i, rv;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "in %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (inside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "out %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (outside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (inside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len (inside_sw_if_indices); i++)
+ {
+ sw_if_index = inside_sw_if_indices[i];
+ rv = nat64_add_del_interface (sw_if_index, 1, is_add);
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error =
+ clib_error_return (0, "%U NAT64 feature not enabled.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error =
+ clib_error_return (0, "%U NAT64 feature already enabled.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ case VNET_API_ERROR_INVALID_VALUE:
+ case VNET_API_ERROR_INVALID_VALUE_2:
+ error =
+ clib_error_return (0,
+ "%U NAT64 feature enable/disable failed.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ default:
+ break;
+
+ }
+ }
+ }
+
+ if (vec_len (outside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len (outside_sw_if_indices); i++)
+ {
+ sw_if_index = outside_sw_if_indices[i];
+ rv = nat64_add_del_interface (sw_if_index, 0, is_add);
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error =
+ clib_error_return (0, "%U NAT64 feature not enabled.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error =
+ clib_error_return (0, "%U NAT64 feature already enabled.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ case VNET_API_ERROR_INVALID_VALUE:
+ case VNET_API_ERROR_INVALID_VALUE_2:
+ error =
+ clib_error_return (0,
+ "%U NAT64 feature enable/disable failed.",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+ goto done;
+ default:
+ break;
+
+ }
+ }
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (inside_sw_if_indices);
+ vec_free (outside_sw_if_indices);
+
+ return error;
+}
+
+static int
+nat64_cli_interface_walk (snat_interface_t * i, void *ctx)
+{
+ vlib_main_t *vm = ctx;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vlib_cli_output (vm, " %U %s", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, i->sw_if_index),
+ i->is_inside ? "in" : "out");
+ return 0;
+}
+
+static clib_error_t *
+nat64_show_interfaces_command_fn (vlib_main_t * vm,
+ unformat_input_t *
+ input, vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ vlib_cli_output (vm, "NAT64 interfaces:");
+ nat64_interfaces_walk (nat64_cli_interface_walk, vm);
+
+ return 0;
+}
+
+static clib_error_t *
+nat64_add_del_static_bib_command_fn (vlib_main_t *
+ vm,
+ unformat_input_t
+ * input, vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u8 is_add = 1;
+ ip6_address_t in_addr;
+ ip4_address_t out_addr;
+ u32 in_port = 0;
+ u32 out_port = 0;
+ u32 vrf_id = 0, protocol;
+ snat_protocol_t proto = 0;
+ u8 p = 0;
+ int rv;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U %u", unformat_ip6_address,
+ &in_addr, &in_port))
+ ;
+ else if (unformat (line_input, "%U %u", unformat_ip4_address,
+ &out_addr, &out_port))
+ ;
+ else if (unformat (line_input, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "%U", unformat_snat_protocol, &proto))
+ ;
+ else
+ if (unformat
+ (line_input, "%U %U %u", unformat_ip6_address, &in_addr,
+ unformat_ip4_address, &out_addr, &protocol))
+ p = (u8) protocol;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!p)
+ {
+ if (!in_port)
+ {
+ error =
+ clib_error_return (0, "inside port and address must be set");
+ goto done;
+ }
+
+ if (!out_port)
+ {
+ error =
+ clib_error_return (0, "outside port and address must be set");
+ goto done;
+ }
+
+ p = snat_proto_to_ip_proto (proto);
+ }
+
+ rv =
+ nat64_add_del_static_bib_entry (&in_addr, &out_addr, (u16) in_port,
+ (u16) out_port, p, vrf_id, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "NAT64 BIB entry not exist.");
+ goto done;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "NAT64 BIB entry exist.");
+ goto done;
+ case VNET_API_ERROR_UNSPECIFIED:
+ error = clib_error_return (0, "Crerate NAT64 BIB entry failed.");
+ goto done;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error =
+ clib_error_return (0, "Outside addres %U and port %u already in use.",
+ format_ip4_address, &out_addr, out_port);
+ goto done;
+ default:
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static int
+nat64_cli_bib_walk (nat64_db_bib_entry_t * bibe, void *ctx)
+{
+ vlib_main_t *vm = ctx;
+ fib_table_t *fib;
+
+ fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+
+ switch (bibe->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ case IP_PROTOCOL_TCP:
+ case IP_PROTOCOL_UDP:
+ vlib_cli_output (vm, " %U %u %U %u protocol %U vrf %u %s %u sessions",
+ format_ip6_address, &bibe->in_addr,
+ clib_net_to_host_u16 (bibe->in_port),
+ format_ip4_address, &bibe->out_addr,
+ clib_net_to_host_u16 (bibe->out_port),
+ format_snat_protocol,
+ ip_proto_to_snat_proto (bibe->proto), fib->ft_table_id,
+ bibe->is_static ? "static" : "dynamic", bibe->ses_num);
+ break;
+ default:
+ vlib_cli_output (vm, " %U %U protocol %u vrf %u %s %u sessions",
+ format_ip6_address, &bibe->in_addr,
+ format_ip4_address, &bibe->out_addr,
+ bibe->proto, fib->ft_table_id,
+ bibe->is_static ? "static" : "dynamic", bibe->ses_num);
+ }
+ return 0;
+}
+
+static clib_error_t *
+nat64_show_bib_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u32 proto = ~0;
+ u8 p = 255;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (unformat (line_input, "%U", unformat_snat_protocol, &proto))
+ p = snat_proto_to_ip_proto (proto);
+ else if (unformat (line_input, "unknown"))
+ p = 0;
+ else if (unformat (line_input, "all"))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if (p == 255)
+ vlib_cli_output (vm, "NAT64 BIB entries:");
+ else
+ vlib_cli_output (vm, "NAT64 %U BIB entries:", format_snat_protocol,
+ proto);
+ nat64_db_bib_walk (&nm->db, p, nat64_cli_bib_walk, vm);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat64_set_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u32 timeout, tcp_trans, tcp_est, tcp_incoming_syn;
+
+ tcp_trans = nat64_get_tcp_trans_timeout ();
+ tcp_est = nat64_get_tcp_est_timeout ();
+ tcp_incoming_syn = nat64_get_tcp_incoming_syn_timeout ();
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "udp %u", &timeout))
+ {
+ if (nat64_set_udp_timeout (timeout))
+ {
+ error = clib_error_return (0, "Invalid UDP timeout value");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "icmp %u", &timeout))
+ {
+ if (nat64_set_icmp_timeout (timeout))
+ {
+ error = clib_error_return (0, "Invalid ICMP timeout value");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "tcp-trans %u", &tcp_trans))
+ {
+ if (nat64_set_tcp_timeouts (tcp_trans, tcp_est, tcp_incoming_syn))
+ {
+ error =
+ clib_error_return (0,
+ "Invalid TCP transitory timeouts value");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "tcp-est %u", &tcp_est))
+ {
+ if (nat64_set_tcp_timeouts (tcp_trans, tcp_est, tcp_incoming_syn))
+ {
+ error =
+ clib_error_return (0,
+ "Invalid TCP established timeouts value");
+ goto done;
+ }
+ }
+ else
+ if (unformat (line_input, "tcp-incoming-syn %u", &tcp_incoming_syn))
+ {
+ if (nat64_set_tcp_timeouts (tcp_trans, tcp_est, tcp_incoming_syn))
+ {
+ error =
+ clib_error_return (0,
+ "Invalid TCP incoming SYN timeouts value");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "reset"))
+ {
+ nat64_set_udp_timeout (0);
+ nat64_set_icmp_timeout (0);
+ nat64_set_tcp_timeouts (0, 0, 0);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat64_show_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ vlib_cli_output (vm, "NAT64 session timeouts:");
+ vlib_cli_output (vm, " UDP %usec", nat64_get_udp_timeout ());
+ vlib_cli_output (vm, " ICMP %usec", nat64_get_icmp_timeout ());
+ vlib_cli_output (vm, " TCP transitory %usec",
+ nat64_get_tcp_trans_timeout ());
+ vlib_cli_output (vm, " TCP established %usec",
+ nat64_get_tcp_est_timeout ());
+ vlib_cli_output (vm, " TCP incoming SYN %usec",
+ nat64_get_tcp_incoming_syn_timeout ());
+
+ return 0;
+}
+
+static int
+nat64_cli_st_walk (nat64_db_st_entry_t * ste, void *ctx)
+{
+ vlib_main_t *vm = ctx;
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ fib_table_t *fib;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+
+ u32 vrf_id = fib->ft_table_id;
+
+ if (ste->proto == IP_PROTOCOL_ICMP)
+ vlib_cli_output (vm, " %U %U %u %U %U %u protocol %U vrf %u",
+ format_ip6_address, &bibe->in_addr,
+ format_ip6_address, &ste->in_r_addr,
+ clib_net_to_host_u16 (bibe->in_port),
+ format_ip4_address, &bibe->out_addr,
+ format_ip4_address, &ste->out_r_addr,
+ clib_net_to_host_u16 (bibe->out_port),
+ format_snat_protocol,
+ ip_proto_to_snat_proto (bibe->proto), vrf_id);
+ else if (ste->proto == IP_PROTOCOL_TCP || ste->proto == IP_PROTOCOL_UDP)
+ vlib_cli_output (vm, " %U %u %U %u %U %u %U %u protcol %U vrf %u",
+ format_ip6_address, &bibe->in_addr,
+ clib_net_to_host_u16 (bibe->in_port),
+ format_ip6_address, &ste->in_r_addr,
+ clib_net_to_host_u16 (ste->r_port),
+ format_ip4_address, &bibe->out_addr,
+ clib_net_to_host_u16 (bibe->out_port),
+ format_ip4_address, &ste->out_r_addr,
+ clib_net_to_host_u16 (ste->r_port),
+ format_snat_protocol,
+ ip_proto_to_snat_proto (bibe->proto), vrf_id);
+ else
+ vlib_cli_output (vm, " %U %U %U %U protocol %u vrf %u",
+ format_ip6_address, &bibe->in_addr,
+ format_ip6_address, &ste->in_r_addr,
+ format_ip4_address, &bibe->out_addr,
+ format_ip4_address, &ste->out_r_addr,
+ bibe->proto, vrf_id);
+
+ return 0;
+}
+
+static clib_error_t *
+nat64_show_st_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u32 proto = ~0;
+ u8 p = 255;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (unformat (line_input, "%U", unformat_snat_protocol, &proto))
+ p = snat_proto_to_ip_proto (proto);
+ else if (unformat (line_input, "unknown"))
+ p = 0;
+ else if (unformat (line_input, "all"))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if (p == 255)
+ vlib_cli_output (vm, "NAT64 sessions:");
+ else
+ vlib_cli_output (vm, "NAT64 %U sessions:", format_snat_protocol, proto);
+ nat64_db_st_walk (&nm->db, p, nat64_cli_st_walk, vm);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat64_add_del_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u32 vrf_id = 0;
+ ip6_address_t prefix;
+ u32 plen = 0;
+ int rv;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U/%u", unformat_ip6_address, &prefix, &plen))
+ ;
+ else if (unformat (line_input, "tenant-vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!plen)
+ {
+ error = clib_error_return (0, "NAT64 prefix must be set.");
+ goto done;
+ }
+
+ rv = nat64_add_del_prefix (&prefix, (u8) plen, vrf_id, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "NAT64 prefix not exist.");
+ goto done;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "Invalid prefix length.");
+ goto done;
+ default:
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static int
+nat64_cli_prefix_walk (nat64_prefix_t * p, void *ctx)
+{
+ vlib_main_t *vm = ctx;
+
+ vlib_cli_output (vm, " %U/%u tenant-vrf %u",
+ format_ip6_address, &p->prefix, p->plen, p->vrf_id);
+
+ return 0;
+}
+
+static clib_error_t *
+nat64_show_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return clib_error_return (0,
+ "NAT64 disabled, multi thread not supported");
+
+ vlib_cli_output (vm, "NAT64 prefix:");
+ nat64_prefix_walk (nat64_cli_prefix_walk, vm);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat64 add pool address}
+ * Add/delete NAT64 pool address.
+ * To add single NAT64 pool address use:
+ * vpp# nat64 add pool address 10.1.1.10
+ * To add NAT64 pool address range use:
+ * vpp# nat64 add pool address 10.1.1.2 - 10.1.1.5
+ * To add NAT64 pool address for specific tenant use:
+ * vpp# nat64 add pool address 10.1.1.100 tenant-vrf 100
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat64_add_pool_address_command, static) = {
+ .path = "nat64 add pool address",
+ .short_help = "nat64 add pool address <ip4-range-start> [- <ip4-range-end>] "
+ "[tenant-vrf <vrf-id>] [del]",
+ .function = nat64_add_del_pool_addr_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 pool}
+ * Show NAT64 pool.
+ * vpp# show nat64 pool
+ * NAT64 pool:
+ * 10.1.1.3 tenant VRF: 0
+ * 10.1.1.10 tenant VRF: 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_pool_command, static) = {
+ .path = "show nat64 pool",
+ .short_help = "show nat64 pool",
+ .function = nat64_show_pool_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{set interface nat64}
+ * Enable/disable NAT64 feature on the interface.
+ * To enable NAT64 feature with local (IPv6) network interface
+ * GigabitEthernet0/8/0 and external (IPv4) network interface
+ * GigabitEthernet0/a/0 use:
+ * vpp# set interface nat64 in GigabitEthernet0/8/0 out GigabitEthernet0/a/0
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_interface_nat64_command, static) = {
+ .path = "set interface nat64",
+ .short_help = "set interface nat64 in|out <intfc> [del]",
+ .function = nat64_interface_feature_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 interfaces}
+ * Show interfaces with NAT64 feature.
+ * To show interfaces with NAT64 feature use:
+ * vpp# show nat64 interfaces
+ * NAT64 interfaces:
+ * GigabitEthernet0/8/0 in
+ * GigabitEthernet0/a/0 out
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_interfaces_command, static) = {
+ .path = "show nat64 interfaces",
+ .short_help = "show nat64 interfaces",
+ .function = nat64_show_interfaces_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat64 add static bib}
+ * Add/delete NAT64 static BIB entry.
+ * To create NAT64 satatic BIB entry use:
+ * vpp# nat64 add static bib 2001:db8:c000:221:: 1234 10.1.1.3 5678 tcp
+ * vpp# nat64 add static bib 2001:db8:c000:221:: 1234 10.1.1.3 5678 udp vrf 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat64_add_del_static_bib_command, static) = {
+ .path = "nat64 add static bib",
+ .short_help = "nat64 add static bib <ip6-addr> <port> <ip4-addr> <port> "
+ "tcp|udp|icmp [vfr <table-id>] [del]",
+ .function = nat64_add_del_static_bib_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 bib}
+ * Show NAT64 BIB entries.
+ * To show NAT64 TCP BIB entries use:
+ * vpp# show nat64 bib tcp
+ * NAT64 tcp BIB:
+ * fd01:1::2 6303 10.0.0.3 62303 tcp vrf 0 dynamic 1 sessions
+ * 2001:db8:c000:221:: 1234 10.1.1.3 5678 tcp vrf 0 static 2 sessions
+ * To show NAT64 UDP BIB entries use:
+ * vpp# show nat64 bib udp
+ * NAT64 udp BIB:
+ * fd01:1::2 6304 10.0.0.3 10546 udp vrf 0 dynamic 10 sessions
+ * 2001:db8:c000:221:: 1234 10.1.1.3 5678 udp vrf 10 static 0 sessions
+ * To show NAT64 ICMP BIB entries use:
+ * vpp# show nat64 bib icmp
+ * NAT64 icmp BIB:
+ * fd01:1::2 6305 10.0.0.3 63209 icmp vrf 10 dynamic 1 sessions
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_bib_command, static) = {
+ .path = "show nat64 bib",
+ .short_help = "show nat64 bib all|tcp|udp|icmp|unknown",
+ .function = nat64_show_bib_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{set nat64 timeouts}
+ * Set NAT64 session timeouts (in seconds).
+ * To set NAT64 session timeoutes use use:
+ * vpp# set nat64 timeouts udp 200 icmp 30 tcp-trans 250 tcp-est 7450
+ * To reset NAT64 session timeoutes to default values use:
+ * vpp# set nat64 timeouts reset
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_nat64_timeouts_command, static) = {
+ .path = "set nat64 timeouts",
+ .short_help = "set nat64 timeouts udp <sec> icmp <sec> tcp-trans <sec> "
+ "tcp-est <sec> tcp-incoming-syn <sec> | reset",
+ .function = nat64_set_timeouts_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 timeoutss}
+ * Show NAT64 session timeouts:
+ * vpp# show nat64 timeouts
+ * NAT64 session timeouts:
+ * UDP 300sec
+ * ICMP 60sec
+ * TCP transitory 240sec
+ * TCP established 7440sec
+ * TCP incoming SYN 6sec
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_timeouts_command, static) = {
+ .path = "show nat64 timeouts",
+ .short_help = "show nat64 timeouts",
+ .function = nat64_show_timeouts_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 session table}
+ * Show NAT64 session table.
+ * To show NAT64 TCP session table use:
+ * vpp# show nat64 session table tcp
+ * NAT64 tcp session table:
+ * fd01:1::2 6303 64:ff9b::ac10:202 20 10.0.0.3 62303 172.16.2.2 20 tcp vrf 0
+ * fd01:3::2 6303 64:ff9b::ac10:202 20 10.0.10.3 21300 172.16.2.2 20 tcp vrf 10
+ * To show NAT64 UDP session table use:
+ * #vpp show nat64 session table udp
+ * NAT64 udp session table:
+ * fd01:1::2 6304 64:ff9b::ac10:202 20 10.0.0.3 10546 172.16.2.2 20 udp vrf 0
+ * fd01:3::2 6304 64:ff9b::ac10:202 20 10.0.10.3 58627 172.16.2.2 20 udp vrf 10
+ * fd01:1::2 1235 64:ff9b::a00:3 4023 10.0.0.3 24488 10.0.0.3 4023 udp vrf 0
+ * fd01:1::3 23 64:ff9b::a00:3 24488 10.0.0.3 4023 10.0.0.3 24488 udp vrf 0
+ * To show NAT64 ICMP session table use:
+ * #vpp show nat64 session table icmp
+ * NAT64 icmp session table:
+ * fd01:1::2 64:ff9b::ac10:202 6305 10.0.0.3 172.16.2.2 63209 icmp vrf 0
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_st_command, static) = {
+ .path = "show nat64 session table",
+ .short_help = "show nat64 session table all|tcp|udp|icmp|unknown",
+ .function = nat64_show_st_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat64 add prefix}
+ * Set NAT64 prefix for generating IPv6 representations of IPv4 addresses.
+ * To set NAT64 global prefix use:
+ * vpp# nat64 add prefix 2001:db8::/32
+ * To set NAT64 prefix for specific tenant use:
+ * vpp# nat64 add prefix 2001:db8:122:300::/56 tenant-vrf 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat64_add_del_prefix_command, static) = {
+ .path = "nat64 add prefix",
+ .short_help = "nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] "
+ "[del]",
+ .function = nat64_add_del_prefix_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat64 prefix}
+ * Show NAT64 prefix.
+ * To show NAT64 prefix use:
+ * vpp# show nat64 prefix
+ * NAT64 prefix:
+ * 2001:db8::/32 tenant-vrf 0
+ * 2001:db8:122:300::/56 tenant-vrf 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_nat64_prefix_command, static) = {
+ .path = "show nat64 prefix",
+ .short_help = "show nat64 prefix",
+ .function = nat64_show_prefix_command_fn,
+};
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64_db.c b/src/plugins/nat/nat64_db.c
new file mode 100644
index 00000000..da73ceee
--- /dev/null
+++ b/src/plugins/nat/nat64_db.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 DB
+ */
+#include <nat/nat64_db.h>
+
+int
+nat64_db_init (nat64_db_t * db)
+{
+ u32 bib_buckets = 1024;
+ u32 bib_memory_size = 128 << 20;
+ u32 st_buckets = 2048;
+ u32 st_memory_size = 256 << 20;
+
+ clib_bihash_init_24_8 (&db->bib.in2out, "bib-in2out", bib_buckets,
+ bib_memory_size);
+
+ clib_bihash_init_24_8 (&db->bib.out2in, "bib-out2in", bib_buckets,
+ bib_memory_size);
+
+ clib_bihash_init_48_8 (&db->st.in2out, "st-in2out", st_buckets,
+ st_memory_size);
+
+ clib_bihash_init_48_8 (&db->st.out2in, "st-out2in", st_buckets,
+ st_memory_size);
+
+ return 0;
+}
+
+nat64_db_bib_entry_t *
+nat64_db_bib_entry_create (nat64_db_t * db, ip6_address_t * in_addr,
+ ip4_address_t * out_addr, u16 in_port,
+ u16 out_port, u32 fib_index, u8 proto,
+ u8 is_static)
+{
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_bib_entry_key_t bibe_key;
+ clib_bihash_kv_24_8_t kv;
+
+ /* create pool entry */
+ switch (ip_proto_to_snat_proto (proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ pool_get (db->bib._##n##_bib, bibe); \
+ kv.value = bibe - db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ pool_get (db->bib._unk_proto_bib, bibe);
+ kv.value = bibe - db->bib._unk_proto_bib;
+ break;
+ }
+ memset (bibe, 0, sizeof (*bibe));
+ bibe->in_addr.as_u64[0] = in_addr->as_u64[0];
+ bibe->in_addr.as_u64[1] = in_addr->as_u64[1];
+ bibe->in_port = in_port;
+ bibe->out_addr.as_u32 = out_addr->as_u32;
+ bibe->out_port = out_port;
+ bibe->fib_index = fib_index;
+ bibe->proto = proto;
+ bibe->is_static = is_static;
+
+ /* create hash lookup */
+ bibe_key.addr.as_u64[0] = bibe->in_addr.as_u64[0];
+ bibe_key.addr.as_u64[1] = bibe->in_addr.as_u64[1];
+ bibe_key.fib_index = bibe->fib_index;
+ bibe_key.port = bibe->in_port;
+ bibe_key.proto = bibe->proto;
+ bibe_key.rsvd = 0;
+ kv.key[0] = bibe_key.as_u64[0];
+ kv.key[1] = bibe_key.as_u64[1];
+ kv.key[2] = bibe_key.as_u64[2];
+ clib_bihash_add_del_24_8 (&db->bib.in2out, &kv, 1);
+
+ memset (&bibe_key.addr, 0, sizeof (bibe_key.addr));
+ bibe_key.addr.ip4.as_u32 = bibe->out_addr.as_u32;
+ bibe_key.fib_index = 0;
+ bibe_key.port = bibe->out_port;
+ kv.key[0] = bibe_key.as_u64[0];
+ kv.key[1] = bibe_key.as_u64[1];
+ kv.key[2] = bibe_key.as_u64[2];
+ clib_bihash_add_del_24_8 (&db->bib.out2in, &kv, 1);
+
+ return bibe;
+}
+
+void
+nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe)
+{
+ nat64_db_bib_entry_key_t bibe_key;
+ clib_bihash_kv_24_8_t kv;
+ nat64_db_bib_entry_t *bib;
+ u32 *ste_to_be_free = 0, *ste_index, bibe_index;
+ nat64_db_st_entry_t *st, *ste;
+
+ switch (ip_proto_to_snat_proto (bibe->proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ bib = db->bib._##n##_bib; \
+ st = db->st._##n##_st; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ bib = db->bib._unk_proto_bib;
+ st = db->st._unk_proto_st;
+ break;
+ }
+
+ bibe_index = bibe - bib;
+
+ /* delete ST entries for static BIB entry */
+ if (bibe->is_static)
+ {
+ pool_foreach (ste, st, (
+ {
+ if (ste->bibe_index == bibe_index)
+ vec_add1 (ste_to_be_free, ste - st);}
+ ));
+ vec_foreach (ste_index, ste_to_be_free)
+ nat64_db_st_entry_free (db, pool_elt_at_index (st, ste_index[0]));
+ vec_free (ste_to_be_free);
+ }
+
+ /* delete hash lookup */
+ bibe_key.addr.as_u64[0] = bibe->in_addr.as_u64[0];
+ bibe_key.addr.as_u64[1] = bibe->in_addr.as_u64[1];
+ bibe_key.fib_index = bibe->fib_index;
+ bibe_key.port = bibe->in_port;
+ bibe_key.proto = bibe->proto;
+ bibe_key.rsvd = 0;
+ kv.key[0] = bibe_key.as_u64[0];
+ kv.key[1] = bibe_key.as_u64[1];
+ kv.key[2] = bibe_key.as_u64[2];
+ clib_bihash_add_del_24_8 (&db->bib.in2out, &kv, 0);
+
+ memset (&bibe_key.addr, 0, sizeof (bibe_key.addr));
+ bibe_key.addr.ip4.as_u32 = bibe->out_addr.as_u32;
+ bibe_key.fib_index = 0;
+ bibe_key.port = bibe->out_port;
+ kv.key[0] = bibe_key.as_u64[0];
+ kv.key[1] = bibe_key.as_u64[1];
+ kv.key[2] = bibe_key.as_u64[2];
+ clib_bihash_add_del_24_8 (&db->bib.out2in, &kv, 0);
+
+ /* delete from pool */
+ pool_put (bib, bibe);
+
+}
+
+nat64_db_bib_entry_t *
+nat64_db_bib_entry_find (nat64_db_t * db, ip46_address_t * addr, u16 port,
+ u8 proto, u32 fib_index, u8 is_ip6)
+{
+ nat64_db_bib_entry_t *bibe = 0;
+ nat64_db_bib_entry_key_t bibe_key;
+ clib_bihash_kv_24_8_t kv, value;
+ nat64_db_bib_entry_t *bib;
+
+ switch (ip_proto_to_snat_proto (proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ bib = db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ bib = db->bib._unk_proto_bib;
+ break;
+ }
+
+ bibe_key.addr.as_u64[0] = addr->as_u64[0];
+ bibe_key.addr.as_u64[1] = addr->as_u64[1];
+ bibe_key.fib_index = fib_index;
+ bibe_key.port = port;
+ bibe_key.proto = proto;
+ bibe_key.rsvd = 0;
+
+ kv.key[0] = bibe_key.as_u64[0];
+ kv.key[1] = bibe_key.as_u64[1];
+ kv.key[2] = bibe_key.as_u64[2];
+
+ if (!clib_bihash_search_24_8
+ (is_ip6 ? &db->bib.in2out : &db->bib.out2in, &kv, &value))
+ bibe = pool_elt_at_index (bib, value.value);
+
+ return bibe;
+}
+
+void
+nat64_db_bib_walk (nat64_db_t * db, u8 proto,
+ nat64_db_bib_walk_fn_t fn, void *ctx)
+{
+ nat64_db_bib_entry_t *bib, *bibe;
+
+ if (proto == 255)
+ {
+ /* *INDENT-OFF* */
+ #define _(N, i, n, s) \
+ bib = db->bib._##n##_bib; \
+ pool_foreach (bibe, bib, ({ \
+ if (fn (bibe, ctx)) \
+ return; \
+ }));
+ foreach_snat_protocol
+ #undef _
+ bib = db->bib._unk_proto_bib;
+ pool_foreach (bibe, bib, ({
+ if (fn (bibe, ctx))
+ return;
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ switch (ip_proto_to_snat_proto (proto))
+ {
+ /* *INDENT-OFF* */
+ #define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ bib = db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+ #undef _
+ /* *INDENT-ON* */
+ default:
+ bib = db->bib._unk_proto_bib;
+ break;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (bibe, bib,
+ ({
+ if (fn (bibe, ctx))
+ return;
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+nat64_db_bib_entry_t *
+nat64_db_bib_entry_by_index (nat64_db_t * db, u8 proto, u32 bibe_index)
+{
+ nat64_db_bib_entry_t *bib;
+
+ switch (ip_proto_to_snat_proto (proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ bib = db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ bib = db->bib._unk_proto_bib;
+ break;
+ }
+
+ return pool_elt_at_index (bib, bibe_index);
+}
+
+void
+nat64_db_st_walk (nat64_db_t * db, u8 proto,
+ nat64_db_st_walk_fn_t fn, void *ctx)
+{
+ nat64_db_st_entry_t *st, *ste;
+
+ if (proto == 255)
+ {
+ /* *INDENT-OFF* */
+ #define _(N, i, n, s) \
+ st = db->st._##n##_st; \
+ pool_foreach (ste, st, ({ \
+ if (fn (ste, ctx)) \
+ return; \
+ }));
+ foreach_snat_protocol
+ #undef _
+ st = db->st._unk_proto_st;
+ pool_foreach (ste, st, ({
+ if (fn (ste, ctx))
+ return;
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ switch (ip_proto_to_snat_proto (proto))
+ {
+ /* *INDENT-OFF* */
+ #define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ st = db->st._##n##_st; \
+ break;
+ foreach_snat_protocol
+ #undef _
+ /* *INDENT-ON* */
+ default:
+ st = db->st._unk_proto_st;
+ break;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (ste, st,
+ ({
+ if (fn (ste, ctx))
+ return;
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+nat64_db_st_entry_t *
+nat64_db_st_entry_create (nat64_db_t * db, nat64_db_bib_entry_t * bibe,
+ ip6_address_t * in_r_addr,
+ ip4_address_t * out_r_addr, u16 r_port)
+{
+ nat64_db_st_entry_t *ste;
+ nat64_db_bib_entry_t *bib;
+ nat64_db_st_entry_key_t ste_key;
+ clib_bihash_kv_48_8_t kv;
+
+ /* create pool entry */
+ switch (ip_proto_to_snat_proto (bibe->proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ pool_get (db->st._##n##_st, ste); \
+ kv.value = ste - db->st._##n##_st; \
+ bib = db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ pool_get (db->st._unk_proto_st, ste);
+ kv.value = ste - db->st._unk_proto_st;
+ bib = db->bib._unk_proto_bib;
+ break;
+ }
+ memset (ste, 0, sizeof (*ste));
+ ste->in_r_addr.as_u64[0] = in_r_addr->as_u64[0];
+ ste->in_r_addr.as_u64[1] = in_r_addr->as_u64[1];
+ ste->out_r_addr.as_u32 = out_r_addr->as_u32;
+ ste->r_port = r_port;
+ ste->bibe_index = bibe - bib;
+ ste->proto = bibe->proto;
+
+ /* increment session number for BIB entry */
+ bibe->ses_num++;
+
+ /* create hash lookup */
+ memset (&ste_key, 0, sizeof (ste_key));
+ ste_key.l_addr.as_u64[0] = bibe->in_addr.as_u64[0];
+ ste_key.l_addr.as_u64[1] = bibe->in_addr.as_u64[1];
+ ste_key.r_addr.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ste_key.r_addr.as_u64[1] = ste->in_r_addr.as_u64[1];
+ ste_key.fib_index = bibe->fib_index;
+ ste_key.l_port = bibe->in_port;
+ ste_key.r_port = ste->r_port;
+ ste_key.proto = ste->proto;
+ kv.key[0] = ste_key.as_u64[0];
+ kv.key[1] = ste_key.as_u64[1];
+ kv.key[2] = ste_key.as_u64[2];
+ kv.key[3] = ste_key.as_u64[3];
+ kv.key[4] = ste_key.as_u64[4];
+ kv.key[5] = ste_key.as_u64[5];
+ clib_bihash_add_del_48_8 (&db->st.in2out, &kv, 1);
+
+ memset (&ste_key, 0, sizeof (ste_key));
+ ste_key.l_addr.ip4.as_u32 = bibe->out_addr.as_u32;
+ ste_key.r_addr.ip4.as_u32 = ste->out_r_addr.as_u32;
+ ste_key.l_port = bibe->out_port;
+ ste_key.r_port = ste->r_port;
+ ste_key.proto = ste->proto;
+ kv.key[0] = ste_key.as_u64[0];
+ kv.key[1] = ste_key.as_u64[1];
+ kv.key[2] = ste_key.as_u64[2];
+ kv.key[3] = ste_key.as_u64[3];
+ kv.key[4] = ste_key.as_u64[4];
+ kv.key[5] = ste_key.as_u64[5];
+ clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 1);
+
+ return ste;
+}
+
+void
+nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste)
+{
+ nat64_db_st_entry_t *st;
+ nat64_db_bib_entry_t *bib, *bibe;
+ nat64_db_st_entry_key_t ste_key;
+ clib_bihash_kv_48_8_t kv;
+
+ switch (ip_proto_to_snat_proto (ste->proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ st = db->st._##n##_st; \
+ bib = db->bib._##n##_bib; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ st = db->st._unk_proto_st;
+ bib = db->bib._unk_proto_bib;
+ break;
+ }
+
+ bibe = pool_elt_at_index (bib, ste->bibe_index);
+
+ /* delete hash lookup */
+ memset (&ste_key, 0, sizeof (ste_key));
+ ste_key.l_addr.as_u64[0] = bibe->in_addr.as_u64[0];
+ ste_key.l_addr.as_u64[1] = bibe->in_addr.as_u64[1];
+ ste_key.r_addr.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ste_key.r_addr.as_u64[1] = ste->in_r_addr.as_u64[1];
+ ste_key.fib_index = bibe->fib_index;
+ ste_key.l_port = bibe->in_port;
+ ste_key.r_port = ste->r_port;
+ ste_key.proto = ste->proto;
+ kv.key[0] = ste_key.as_u64[0];
+ kv.key[1] = ste_key.as_u64[1];
+ kv.key[2] = ste_key.as_u64[2];
+ kv.key[3] = ste_key.as_u64[3];
+ kv.key[4] = ste_key.as_u64[4];
+ kv.key[5] = ste_key.as_u64[5];
+ clib_bihash_add_del_48_8 (&db->st.in2out, &kv, 0);
+
+ memset (&ste_key, 0, sizeof (ste_key));
+ ste_key.l_addr.ip4.as_u32 = bibe->out_addr.as_u32;
+ ste_key.r_addr.ip4.as_u32 = ste->out_r_addr.as_u32;
+ ste_key.l_port = bibe->out_port;
+ ste_key.r_port = ste->r_port;
+ ste_key.proto = ste->proto;
+ kv.key[0] = ste_key.as_u64[0];
+ kv.key[1] = ste_key.as_u64[1];
+ kv.key[2] = ste_key.as_u64[2];
+ kv.key[3] = ste_key.as_u64[3];
+ kv.key[4] = ste_key.as_u64[4];
+ kv.key[5] = ste_key.as_u64[5];
+ clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 0);
+
+ /* delete from pool */
+ pool_put (st, ste);
+
+ /* decrement session number for BIB entry */
+ bibe->ses_num--;
+
+ /* delete BIB entry if last session and dynamic */
+ if (!bibe->is_static && !bibe->ses_num)
+ nat64_db_bib_entry_free (db, bibe);
+}
+
+nat64_db_st_entry_t *
+nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr,
+ ip46_address_t * r_addr, u16 l_port, u16 r_port,
+ u8 proto, u32 fib_index, u8 is_ip6)
+{
+ nat64_db_st_entry_t *ste = 0;
+ nat64_db_st_entry_t *st;
+ nat64_db_st_entry_key_t ste_key;
+ clib_bihash_kv_48_8_t kv, value;
+
+ switch (ip_proto_to_snat_proto (proto))
+ {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ case SNAT_PROTOCOL_##N: \
+ st = db->st._##n##_st; \
+ break;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ default:
+ st = db->st._unk_proto_st;
+ break;
+ }
+
+ memset (&ste_key, 0, sizeof (ste_key));
+ ste_key.l_addr.as_u64[0] = l_addr->as_u64[0];
+ ste_key.l_addr.as_u64[1] = l_addr->as_u64[1];
+ ste_key.r_addr.as_u64[0] = r_addr->as_u64[0];
+ ste_key.r_addr.as_u64[1] = r_addr->as_u64[1];
+ ste_key.fib_index = fib_index;
+ ste_key.l_port = l_port;
+ ste_key.r_port = r_port;
+ ste_key.proto = proto;
+ kv.key[0] = ste_key.as_u64[0];
+ kv.key[1] = ste_key.as_u64[1];
+ kv.key[2] = ste_key.as_u64[2];
+ kv.key[3] = ste_key.as_u64[3];
+ kv.key[4] = ste_key.as_u64[4];
+ kv.key[5] = ste_key.as_u64[5];
+
+ if (!clib_bihash_search_48_8
+ (is_ip6 ? &db->st.in2out : &db->st.out2in, &kv, &value))
+ ste = pool_elt_at_index (st, value.value);
+
+ return ste;
+}
+
+void
+nad64_db_st_free_expired (nat64_db_t * db, u32 now)
+{
+ u32 *ste_to_be_free = 0, *ste_index;
+ nat64_db_st_entry_t *st, *ste;
+
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ st = db->st._##n##_st; \
+ pool_foreach (ste, st, ({\
+ if (i == SNAT_PROTOCOL_TCP && !ste->tcp_state) \
+ continue; \
+ if (ste->expire < now) \
+ vec_add1 (ste_to_be_free, ste - st); \
+ })); \
+ vec_foreach (ste_index, ste_to_be_free) \
+ nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); \
+ vec_free (ste_to_be_free); \
+ ste_to_be_free = 0;
+ foreach_snat_protocol
+#undef _
+ st = db->st._unk_proto_st;
+ pool_foreach (ste, st, ({
+ if (ste->expire < now)
+ vec_add1 (ste_to_be_free, ste - st);
+ }));
+ vec_foreach (ste_index, ste_to_be_free)
+ nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0]));
+ vec_free (ste_to_be_free);
+/* *INDENT-ON* */
+}
+
+void
+nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr)
+{
+ u32 *ste_to_be_free = 0, *ste_index;
+ nat64_db_st_entry_t *st, *ste;
+ nat64_db_bib_entry_t *bibe;
+
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ st = db->st._##n##_st; \
+ pool_foreach (ste, st, ({ \
+ bibe = pool_elt_at_index (db->bib._##n##_bib, ste->bibe_index); \
+ if (bibe->out_addr.as_u32 == out_addr->as_u32) \
+ vec_add1 (ste_to_be_free, ste - st); \
+ })); \
+ vec_foreach (ste_index, ste_to_be_free) \
+ nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); \
+ vec_free (ste_to_be_free); \
+ ste_to_be_free = 0;
+ foreach_snat_protocol
+#undef _
+ st = db->st._unk_proto_st;
+ pool_foreach (ste, st, ({
+ bibe = pool_elt_at_index (db->bib._unk_proto_bib, ste->bibe_index);
+ if (bibe->out_addr.as_u32 == out_addr->as_u32)
+ vec_add1 (ste_to_be_free, ste - st);
+ }));
+ vec_foreach (ste_index, ste_to_be_free)
+ nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0]));
+ vec_free (ste_to_be_free);
+/* *INDENT-ON* */
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64_db.h b/src/plugins/nat/nat64_db.h
new file mode 100644
index 00000000..394ca875
--- /dev/null
+++ b/src/plugins/nat/nat64_db.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 DB
+ */
+#ifndef __included_nat64_db_h__
+#define __included_nat64_db_h__
+
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <nat/nat.h>
+
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ip46_address_t addr;
+ u32 fib_index;
+ u16 port;
+ u8 proto;
+ u8 rsvd;
+ };
+ u64 as_u64[3];
+ };
+} nat64_db_bib_entry_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+ ip6_address_t in_addr;
+ u16 in_port;
+ ip4_address_t out_addr;
+ u16 out_port;
+ u32 fib_index;
+ u32 ses_num;
+ u8 proto;
+ u8 is_static;
+}) nat64_db_bib_entry_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* BIBs */
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ nat64_db_bib_entry_t *_##n##_bib;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ nat64_db_bib_entry_t *_unk_proto_bib;
+
+ /* BIB lookup */
+ clib_bihash_24_8_t in2out;
+ clib_bihash_24_8_t out2in;
+} nat64_db_bib_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ip46_address_t l_addr;
+ ip46_address_t r_addr;
+ u32 fib_index;
+ u16 l_port;
+ u16 r_port;
+ u8 proto;
+ u8 rsvd[7];
+ };
+ u64 as_u64[6];
+ };
+} nat64_db_st_entry_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+ ip6_address_t in_r_addr;
+ ip4_address_t out_r_addr;
+ u16 r_port;
+ u32 bibe_index;
+ u32 expire;
+ u8 proto;
+ u8 tcp_state;
+}) nat64_db_st_entry_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* session tables */
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+ nat64_db_st_entry_t *_##n##_st;
+ foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+ nat64_db_st_entry_t *_unk_proto_st;
+
+ /* session lookup */
+ clib_bihash_48_8_t in2out;
+ clib_bihash_48_8_t out2in;
+} nat64_db_st_t;
+
+typedef struct
+{
+ nat64_db_bib_t bib;
+ nat64_db_st_t st;
+} nat64_db_t;
+
+/**
+ * @brief Initialize NAT64 DB.
+ *
+ * @param db NAT64 DB.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat64_db_init (nat64_db_t * db);
+
+/**
+ * @brief Create new NAT64 BIB entry.
+ *
+ * @param db NAT64 DB.
+ * @param in_addr Inside IPv6 address.
+ * @param out_addr Outside IPv4 address.
+ * @param in_port Inside port number.
+ * @param out_port Outside port number.
+ * @param fib_index FIB index.
+ * @param proto L4 protocol.
+ * @param is_static 1 if static, 0 if dynamic.
+ *
+ * @returns BIB entry on success, 0 otherwise.
+ */
+nat64_db_bib_entry_t *nat64_db_bib_entry_create (nat64_db_t * db,
+ ip6_address_t * in_addr,
+ ip4_address_t * out_addr,
+ u16 in_port, u16 out_port,
+ u32 fib_index,
+ u8 proto, u8 is_static);
+
+/**
+ * @brief Free NAT64 BIB entry.
+ *
+ * @param db NAT64 DB.
+ * @param bibe BIB entry.
+ */
+void nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe);
+
+/**
+ * @brief Call back function when walking NAT64 BIB, non-zero
+ * return value stop walk.
+ */
+typedef int (*nat64_db_bib_walk_fn_t) (nat64_db_bib_entry_t * bibe,
+ void *ctx);
+/**
+ * @brief Walk NAT64 BIB.
+ *
+ * @param db NAT64 DB.
+ * @param proto BIB L4 protocol:
+ * - 255 all BIBs
+ * - 6 TCP BIB
+ * - 17 UDP BIB
+ * - 1/58 ICMP BIB
+ * - otherwise "unknown" protocol BIB
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat64_db_bib_walk (nat64_db_t * db, u8 proto,
+ nat64_db_bib_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Find NAT64 BIB entry.
+ *
+ * @param db NAT64 DB.
+ * @param addr IP address.
+ * @param port Port number.
+ * @param proto L4 protocol.
+ * @param fib_index FIB index.
+ * @param is_ip6 1 if find by IPv6 (inside) address, 0 by IPv4 (outside).
+ *
+ * @return BIB entry if found.
+ */
+nat64_db_bib_entry_t *nat64_db_bib_entry_find (nat64_db_t * db,
+ ip46_address_t * addr,
+ u16 port,
+ u8 proto,
+ u32 fib_index, u8 is_ip6);
+
+/**
+ * @brief Get BIB entry by index and protocol.
+ *
+ * @param db NAT64 DB.
+ * @param proto L4 protocol.
+ * @param bibe_index BIB entry index.
+ *
+ * @return BIB entry if found.
+ */
+nat64_db_bib_entry_t *nat64_db_bib_entry_by_index (nat64_db_t * db,
+ u8 proto, u32 bibe_index);
+/**
+ * @brief Create new NAT64 session table entry.
+ *
+ * @param db NAT64 DB.
+ * @param bibe Corresponding BIB entry.
+ * @param in_r_addr Inside IPv6 address of the remote host.
+ * @param out_r_addr Outside IPv4 address of the remote host.
+ * @param r_port Remote host port number.
+ *
+ * @returns BIB entry on success, 0 otherwise.
+ */
+nat64_db_st_entry_t *nat64_db_st_entry_create (nat64_db_t * db,
+ nat64_db_bib_entry_t * bibe,
+ ip6_address_t * in_r_addr,
+ ip4_address_t * out_r_addr,
+ u16 r_port);
+
+/**
+ * @brief Free NAT64 session table entry.
+ *
+ * @param db NAT64 DB.
+ * @param ste Session table entry.
+ */
+void nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste);
+
+/**
+ * @brief Find NAT64 session table entry.
+ *
+ * @param db NAT64 DB.
+ * @param l_addr Local host address.
+ * @param r_addr Remote host address.
+ * @param l_port Local host port number.
+ * @param r_port Remote host port number.
+ * @param proto L4 protocol.
+ * @param fib_index FIB index.
+ * @param is_ip6 1 if find by IPv6 (inside) address, 0 by IPv4 (outside).
+ *
+ * @return BIB entry if found.
+ */
+nat64_db_st_entry_t *nat64_db_st_entry_find (nat64_db_t * db,
+ ip46_address_t * l_addr,
+ ip46_address_t * r_addr,
+ u16 l_port, u16 r_port,
+ u8 proto,
+ u32 fib_index, u8 is_ip6);
+
+/**
+ * @brief Call back function when walking NAT64 session table, non-zero
+ * return value stop walk.
+ */
+typedef int (*nat64_db_st_walk_fn_t) (nat64_db_st_entry_t * ste, void *ctx);
+
+/**
+ * @brief Walk NAT64 session table.
+ *
+ * @param db NAT64 DB.
+ * @param proto L4 protocol:
+ * - 255 all session tables
+ * - 6 TCP session table
+ * - 17 UDP session table
+ * - 1/58 ICMP session table
+ * - otherwise "unknown" protocol session table
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat64_db_st_walk (nat64_db_t * db, u8 proto,
+ nat64_db_st_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Free expired session entries in session tables.
+ *
+ * @param db NAT64 DB.
+ * @param now Current time.
+ */
+void nad64_db_st_free_expired (nat64_db_t * db, u32 now);
+
+/**
+ * @brief Free sessions using specific outside address.
+ *
+ * @param db NAT64 DB.
+ * @param out_addr Outside address to match.
+ */
+void nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr);
+
+#endif /* __included_nat64_db_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64_doc.md b/src/plugins/nat/nat64_doc.md
new file mode 100644
index 00000000..f65b4633
--- /dev/null
+++ b/src/plugins/nat/nat64_doc.md
@@ -0,0 +1,73 @@
+# Stateful NAT64: Network Address and Protocol Translation from IPv6 Clients to IPv4 Servers {#nat64_doc}
+
+## Introduction
+
+Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers using unicast UDP, TCP, or ICMP based on RFC 6146.
+
+## Configuration
+
+### Enable/disable NAT64 feature on the interface
+
+> set interface nat64 in|out <intfc> [del]
+
+in: inside/local/IPv6 network
+out: outside/external/IPv4 network
+intfc: interface name
+
+### Add/delete NAT64 pool address
+
+One or more public IPv4 addresses assigned to a NAT64 are shared among several IPv6-only clients.
+
+> nat64 add pool address <ip4-range-start> [- <ip4-range-end>] [tenant-vrf <tenant-vrf-id>] [del]
+
+ip4-range-start: First IPv4 address of the range
+ip4-range-end: Last IPv4 address of the range (optional, not used for single address)
+tenant-vrf-id: VRF id of the tenant associated with the pool address (optional, if not set pool address is global)
+
+### Add/delete static BIB entry
+
+Stateful NAT64 also supports IPv4-initiated communications to a subset of the IPv6 hosts through staticaly configured bindings.
+
+> nat64 add static bib <ip6-addr> <in-port> <ip4-addr> <out-port> tcp|udp|icmp [vfr <table-id>] [del]
+
+ip6-addr: inside IPv6 address of the host
+in-port: inside port or ICMPv6 identifier
+ip4-addr: outside IPv4 address of the host
+out-port: outside port or ICMPv4 identifier
+table-id: VRF id of the tenant associated with the BIB entry (optional, default use global VRF)
+
+### Set NAT64 session timeouts
+
+Session is deleted when timer expires. If all sessions corresponding to a dynamically create BIB entry are deleted, then the BIB entry is also deleted. When packets are flowing sessiom timer is refreshed to keep the session alive.
+
+> set nat64 timeouts udp <sec> icmp <sec> tcp-trans <sec> tcp-est <sec> tcp-incoming-syn <sec> | reset
+
+udp: UDP session timeout value (default 300sec)
+icmp: ICMP session timeout value (default 60sec)
+tcp-trans: transitory TCP session timeout value (default 240sec)
+tcp-est: established TCP session timeout value (default 7440sec)
+tcp-incoming-syn: incoming SYN TCP session timeout value (default 6sec)
+reset: reset timers to default values
+
+### Set NAT64 prefix
+
+Stateful NAT64 support the algorithm for generating IPv6 representations of IPv4 addresses defined in RFC 6052. If no prefix is configured, Well-Known Prefix (64:ff9b::/96) is used.
+
+> nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] [del]
+
+ip6-prefix: IPv6 prefix
+plen: prefix length (valid values: 32, 40, 48, 56, 64, or 96)
+tenant-vrf: VRF id of the tenant associated with the prefix
+
+### Show commands
+
+> show nat64 pool
+> show nat64 interfaces
+> show nat64 bib tcp|udp|icmp
+> show nat64 session table tcp|udp|icmp
+> show nat64 timeouts
+> show nat64 prefix
+
+## Notes
+
+Multi thread is not supported yet (CLI/API commands are disabled when VPP runs with multiple threads).
diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c
new file mode 100644
index 00000000..f78baff4
--- /dev/null
+++ b/src/plugins/nat/nat64_in2out.c
@@ -0,0 +1,1118 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
+ */
+
+#include <nat/nat64.h>
+#include <vnet/ip/ip6_to_ip4.h>
+#include <vnet/fib/fib_table.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u8 is_slow_path;
+} nat64_in2out_trace_t;
+
+static u8 *
+format_nat64_in2out_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
+ char *tag;
+
+ tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
+
+ s =
+ format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
+ t->next_index);
+
+ return s;
+}
+
+vlib_node_registration_t nat64_in2out_node;
+vlib_node_registration_t nat64_in2out_slowpath_node;
+
+#define foreach_nat64_in2out_error \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \
+_(IN2OUT_PACKETS, "good in2out packets processed") \
+_(NO_TRANSLATION, "no translation") \
+_(UNKNOWN, "unknown")
+
+typedef enum
+{
+#define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
+ foreach_nat64_in2out_error
+#undef _
+ NAT64_IN2OUT_N_ERROR,
+} nat64_in2out_error_t;
+
+static char *nat64_in2out_error_strings[] = {
+#define _(sym,string) string,
+ foreach_nat64_in2out_error
+#undef _
+};
+
+typedef enum
+{
+ NAT64_IN2OUT_NEXT_IP4_LOOKUP,
+ NAT64_IN2OUT_NEXT_IP6_LOOKUP,
+ NAT64_IN2OUT_NEXT_DROP,
+ NAT64_IN2OUT_NEXT_SLOWPATH,
+ NAT64_IN2OUT_N_NEXT,
+} nat64_in2out_next_t;
+
+typedef struct nat64_in2out_set_ctx_t_
+{
+ vlib_buffer_t *b;
+ vlib_main_t *vm;
+} nat64_in2out_set_ctx_t;
+
+/**
+ * @brief Check whether is a hairpinning.
+ *
+ * If the destination IP address of the packet is an IPv4 address assigned to
+ * the NAT64 itself, then the packet is a hairpin packet.
+ *
+ * param dst_addr Destination address of the packet.
+ *
+ * @returns 1 if hairpinning, otherwise 0.
+ */
+static_always_inline int
+is_hairpinning (ip6_address_t * dst_addr)
+{
+ nat64_main_t *nm = &nat64_main;
+ int i;
+
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_in2out_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ udp_header_t *udp = ip6_next_header (ip6);
+ u8 proto = ip6->protocol;
+ u16 sport = udp->src_port;
+ u16 dport = udp->dst_port;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto,
+ fib_index, 1);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1);
+
+ if (!bibe)
+ {
+ u16 out_port;
+ ip4_address_t out_addr;
+ if (nat64_alloc_out_addr_and_port
+ (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
+ &out_port))
+ return -1;
+
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr,
+ sport, clib_host_to_net_u16 (out_port),
+ fib_index, proto, 0);
+ if (!bibe)
+ return -1;
+ }
+
+ nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
+ &daddr.ip4, dport);
+ if (!ste)
+ return -1;
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip4->src_address.as_u32 = bibe->out_addr.as_u32;
+ udp->src_port = bibe->out_port;
+
+ ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
+
+ if (proto == IP_PROTOCOL_TCP)
+ {
+ u16 *checksum;
+ ip_csum_t csum;
+ tcp_header_t *tcp = ip6_next_header (ip6);
+
+ checksum = &tcp->checksum;
+ csum = ip_csum_sub_even (*checksum, sport);
+ csum = ip_csum_add_even (csum, udp->src_port);
+ *checksum = ip_csum_fold (csum);
+ }
+
+ return 0;
+}
+
+static int
+nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_in2out_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ icmp46_header_t *icmp = ip6_next_header (ip6);
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
+ {
+ u16 in_id = ((u16 *) (icmp))[2];
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, in_id, 0,
+ IP_PROTOCOL_ICMP, fib_index, 1);
+
+ if (ste)
+ {
+ bibe =
+ nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP,
+ ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &saddr, in_id,
+ IP_PROTOCOL_ICMP, fib_index, 1);
+
+ if (!bibe)
+ {
+ u16 out_id;
+ ip4_address_t out_addr;
+ if (nat64_alloc_out_addr_and_port
+ (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id))
+ return -1;
+
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
+ &out_addr, in_id,
+ clib_host_to_net_u16 (out_id),
+ fib_index, IP_PROTOCOL_ICMP, 0);
+ if (!bibe)
+ return -1;
+ }
+
+ nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
+ &daddr.ip4, 0);
+ if (!ste)
+ return -1;
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip4->src_address.as_u32 = bibe->out_addr.as_u32;
+ ((u16 *) (icmp))[2] = bibe->out_port;
+
+ ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
+ }
+ else
+ {
+ if (!vec_len (nm->addr_pool))
+ return -1;
+
+ ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
+ nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
+ }
+
+ return 0;
+}
+
+static int
+nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_in2out_set_ctx_t *ctx = arg;
+ nat64_db_st_entry_t *ste;
+ nat64_db_bib_entry_t *bibe;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ u8 proto = ip6->protocol;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ if (proto == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *icmp = ip6_next_header (ip6);
+ u16 in_id = ((u16 *) (icmp))[2];
+ proto = IP_PROTOCOL_ICMP;
+
+ if (!
+ (icmp->type == ICMP4_echo_request
+ || icmp->type == ICMP4_echo_reply))
+ return -1;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, in_id, 0, proto,
+ fib_index, 1);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
+ ((u16 *) (icmp))[2] = bibe->out_port;
+ ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
+ }
+ else
+ {
+ udp_header_t *udp = ip6_next_header (ip6);
+ tcp_header_t *tcp = ip6_next_header (ip6);
+ u16 *checksum;
+ ip_csum_t csum;
+
+ u16 sport = udp->src_port;
+ u16 dport = udp->dst_port;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto,
+ fib_index, 1);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
+ udp->dst_port = bibe->out_port;
+ ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
+
+ if (proto == IP_PROTOCOL_TCP)
+ checksum = &tcp->checksum;
+ else
+ checksum = &udp->checksum;
+ csum = ip_csum_sub_even (*checksum, dport);
+ csum = ip_csum_add_even (csum, udp->dst_port);
+ *checksum = ip_csum_fold (csum);
+ }
+
+ return 0;
+}
+
+typedef struct unk_proto_st_walk_ctx_t_
+{
+ ip6_address_t src_addr;
+ ip6_address_t dst_addr;
+ ip4_address_t out_addr;
+ u32 fib_index;
+ u8 proto;
+} unk_proto_st_walk_ctx_t;
+
+static int
+unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ unk_proto_st_walk_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ ip46_address_t saddr, daddr;
+
+ if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
+ {
+ bibe =
+ nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
+ && bibe->fib_index == ctx->fib_index)
+ {
+ memset (&saddr, 0, sizeof (saddr));
+ saddr.ip4.as_u32 = bibe->out_addr.as_u32;
+ memset (&daddr, 0, sizeof (daddr));
+ nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
+
+ if (nat64_db_st_entry_find
+ (&nm->db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
+ return -1;
+
+ ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_in2out_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr, addr;
+ u32 sw_if_index, fib_index;
+ u8 proto = ip6->protocol;
+ int i;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index,
+ 1);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1);
+
+ if (!bibe)
+ {
+ /* Choose same out address as for TCP/UDP session to same dst */
+ unk_proto_st_walk_ctx_t ctx = {
+ .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
+ .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
+ .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
+ .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
+ .out_addr.as_u32 = 0,
+ .fib_index = fib_index,
+ .proto = proto,
+ };
+
+ nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk,
+ &ctx);
+
+ if (!ctx.out_addr.as_u32)
+ nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk,
+ &ctx);
+
+ /* Verify if out address is not already in use for protocol */
+ memset (&addr, 0, sizeof (addr));
+ addr.ip4.as_u32 = ctx.out_addr.as_u32;
+ if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0))
+ ctx.out_addr.as_u32 = 0;
+
+ if (!ctx.out_addr.as_u32)
+ {
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
+ if (!nat64_db_bib_entry_find
+ (&nm->db, &addr, 0, proto, 0, 0))
+ break;
+ }
+ }
+
+ if (!ctx.out_addr.as_u32)
+ return -1;
+
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
+ &ctx.out_addr, 0, 0, fib_index, proto,
+ 0);
+ if (!bibe)
+ return -1;
+ }
+
+ nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
+ &daddr.ip4, 0);
+ if (!ste)
+ return -1;
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip4->src_address.as_u32 = bibe->out_addr.as_u32;
+ ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
+
+ return 0;
+}
+
+
+
+static int
+nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
+ ip6_header_t * ip6)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ udp_header_t *udp = ip6_next_header (ip6);
+ tcp_header_t *tcp = ip6_next_header (ip6);
+ u8 proto = ip6->protocol;
+ u16 sport = udp->src_port;
+ u16 dport = udp->dst_port;
+ u16 *checksum;
+ ip_csum_t csum;
+
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ if (proto == IP_PROTOCOL_UDP)
+ checksum = &udp->checksum;
+ else
+ checksum = &tcp->checksum;
+
+ csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, sport);
+ csum = ip_csum_sub_even (csum, dport);
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto,
+ fib_index, 1);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1);
+
+ if (!bibe)
+ {
+ u16 out_port;
+ ip4_address_t out_addr;
+ if (nat64_alloc_out_addr_and_port
+ (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
+ &out_port))
+ return -1;
+
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr,
+ sport, clib_host_to_net_u16 (out_port),
+ fib_index, proto, 0);
+ if (!bibe)
+ return -1;
+ }
+
+ nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
+ &daddr.ip4, dport);
+ if (!ste)
+ return -1;
+ }
+
+ nat64_session_reset_timeout (ste, vm);
+
+ sport = udp->src_port = bibe->out_port;
+ nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ memset (&daddr, 0, sizeof (daddr));
+ saddr.ip4.as_u32 = bibe->out_addr.as_u32;
+ daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, 0,
+ 0);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe = nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, 0, 0);
+
+ if (!bibe)
+ return -1;
+
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
+ &saddr.ip4, sport);
+ }
+
+ ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ udp->dst_port = bibe->in_port;
+
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, udp->src_port);
+ csum = ip_csum_add_even (csum, udp->dst_port);
+ *checksum = ip_csum_fold (csum);
+
+ return 0;
+}
+
+static int
+nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
+ ip6_header_t * ip6)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ icmp46_header_t *icmp = ip6_next_header (ip6);
+ ip6_header_t *inner_ip6;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ u8 proto;
+ udp_header_t *udp;
+ tcp_header_t *tcp;
+ u16 *checksum, sport, dport;
+ ip_csum_t csum;
+
+ if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
+ return -1;
+
+ inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ proto = inner_ip6->protocol;
+
+ if (proto == IP_PROTOCOL_ICMP6)
+ return -1;
+
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
+
+ udp = ip6_next_header (inner_ip6);
+ tcp = ip6_next_header (inner_ip6);
+
+ sport = udp->src_port;
+ dport = udp->dst_port;
+
+ if (proto == IP_PROTOCOL_UDP)
+ checksum = &udp->checksum;
+ else
+ checksum = &tcp->checksum;
+
+ csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, sport);
+ csum = ip_csum_sub_even (csum, dport);
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto,
+ fib_index, 1);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ dport = udp->dst_port = bibe->out_port;
+ nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ memset (&daddr, 0, sizeof (daddr));
+ saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
+ daddr.ip4.as_u32 = bibe->out_addr.as_u32;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, 0,
+ 0);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ udp->src_port = bibe->in_port;
+
+ csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, udp->src_port);
+ csum = ip_csum_add_even (csum, udp->dst_port);
+ *checksum = ip_csum_fold (csum);
+
+ if (!vec_len (nm->addr_pool))
+ return -1;
+
+ nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
+ ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
+ ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
+
+ icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, ip6->payload_length);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, icmp,
+ clib_net_to_host_u16 (ip6->payload_length));
+ icmp->checksum = ~ip_csum_fold (csum);
+
+ return 0;
+}
+
+static int
+nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
+ ip6_header_t * ip6)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr, addr;
+ u32 sw_if_index, fib_index;
+ u8 proto = ip6->protocol;
+ int i;
+
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ saddr.as_u64[0] = ip6->src_address.as_u64[0];
+ saddr.as_u64[1] = ip6->src_address.as_u64[1];
+ daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+ daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index,
+ 1);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1);
+
+ if (!bibe)
+ {
+ /* Choose same out address as for TCP/UDP session to same dst */
+ unk_proto_st_walk_ctx_t ctx = {
+ .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
+ .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
+ .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
+ .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
+ .out_addr.as_u32 = 0,
+ .fib_index = fib_index,
+ .proto = proto,
+ };
+
+ nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk,
+ &ctx);
+
+ if (!ctx.out_addr.as_u32)
+ nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk,
+ &ctx);
+
+ /* Verify if out address is not already in use for protocol */
+ memset (&addr, 0, sizeof (addr));
+ addr.ip4.as_u32 = ctx.out_addr.as_u32;
+ if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0))
+ ctx.out_addr.as_u32 = 0;
+
+ if (!ctx.out_addr.as_u32)
+ {
+ for (i = 0; i < vec_len (nm->addr_pool); i++)
+ {
+ addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
+ if (!nat64_db_bib_entry_find
+ (&nm->db, &addr, 0, proto, 0, 0))
+ break;
+ }
+ }
+
+ if (!ctx.out_addr.as_u32)
+ return -1;
+
+ bibe =
+ nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
+ &ctx.out_addr, 0, 0, fib_index, proto,
+ 0);
+ if (!bibe)
+ return -1;
+ }
+
+ nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
+ &daddr.ip4, 0);
+ if (!ste)
+ return -1;
+ }
+
+ nat64_session_reset_timeout (ste, vm);
+
+ nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ memset (&daddr, 0, sizeof (daddr));
+ saddr.ip4.as_u32 = bibe->out_addr.as_u32;
+ daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
+
+ ste = nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, 0, 0);
+
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe = nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, 0, 0);
+
+ if (!bibe)
+ return -1;
+
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
+ &saddr.ip4, 0);
+ }
+
+ ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+ return 0;
+}
+
+static inline uword
+nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, u8 is_slow_path)
+{
+ u32 n_left_from, *from, *to_next;
+ nat64_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ u32 stats_node_index;
+
+ stats_node_index =
+ is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip60;
+ u16 l4_offset0, frag_offset0;
+ u8 l4_protocol0;
+ u32 proto0;
+ nat64_in2out_set_ctx_t ctx0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip60 = vlib_buffer_get_current (b0);
+
+ ctx0.b = b0;
+ ctx0.vm = vm;
+
+ next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
+
+ if (PREDICT_FALSE
+ (ip6_parse
+ (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+ &frag_offset0)))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (l4_protocol0);
+ if (frag_offset0 != 0)
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error =
+ node->errors[NAT64_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
+ goto trace0;
+ }
+
+ if (is_slow_path)
+ {
+ if (PREDICT_TRUE (proto0 == ~0))
+ {
+ if (is_hairpinning (&ip60->dst_address))
+ {
+ next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
+ if (nat64_in2out_unk_proto_hairpinning (vm, b0, ip60))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error =
+ node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ }
+ goto trace0;
+ }
+
+ if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error =
+ node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+ goto trace0;
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto0 == ~0))
+ {
+ next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
+ goto trace0;
+ }
+ }
+
+ if (proto0 == SNAT_PROTOCOL_ICMP)
+ {
+ if (is_hairpinning (&ip60->dst_address))
+ {
+ next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
+ if (nat64_in2out_icmp_hairpinning (vm, b0, ip60))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error =
+ node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ }
+ goto trace0;
+ }
+
+ if (icmp6_to_icmp
+ (b0, nat64_in2out_icmp_set_cb, &ctx0,
+ nat64_in2out_inner_icmp_set_cb, &ctx0))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+ else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+ {
+ if (is_hairpinning (&ip60->dst_address))
+ {
+ next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
+ if (nat64_in2out_tcp_udp_hairpinning (vm, b0, ip60))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error =
+ node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ }
+ goto trace0;
+ }
+
+ if (ip6_to_ip4_tcp_udp
+ (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
+ {
+ next0 = NAT64_IN2OUT_NEXT_DROP;
+ b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+
+ trace0:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat64_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->is_slow_path = is_slow_path;
+ }
+
+ pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, stats_node_index,
+ NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+static uword
+nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return nat64_in2out_node_fn_inline (vm, node, frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_in2out_node) = {
+ .function = nat64_in2out_node_fn,
+ .name = "nat64-in2out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat64_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
+ .error_strings = nat64_in2out_error_strings,
+ .n_next_nodes = NAT64_IN2OUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
+
+static uword
+nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return nat64_in2out_node_fn_inline (vm, node, frame, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
+ .function = nat64_in2out_slowpath_node_fn,
+ .name = "nat64-in2out-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat64_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
+ .error_strings = nat64_in2out_error_strings,
+ .n_next_nodes = NAT64_IN2OUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
+ nat64_in2out_slowpath_node_fn);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat64_out2in.c b/src/plugins/nat/nat64_out2in.c
new file mode 100644
index 00000000..61e88a7f
--- /dev/null
+++ b/src/plugins/nat/nat64_out2in.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
+ */
+
+#include <nat/nat64.h>
+#include <vnet/ip/ip4_to_ip6.h>
+#include <vnet/fib/ip4_fib.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+} nat64_out2in_trace_t;
+
+static u8 *
+format_nat64_out2in_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
+
+ s =
+ format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
+ t->next_index);
+
+ return s;
+}
+
+vlib_node_registration_t nat64_out2in_node;
+
+#define foreach_nat64_out2in_error \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
+_(OUT2IN_PACKETS, "Good out2in packets processed") \
+_(NO_TRANSLATION, "No translation") \
+_(UNKNOWN, "unknown")
+
+typedef enum
+{
+#define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
+ foreach_nat64_out2in_error
+#undef _
+ NAT64_OUT2IN_N_ERROR,
+} nat64_out2in_error_t;
+
+static char *nat64_out2in_error_strings[] = {
+#define _(sym,string) string,
+ foreach_nat64_out2in_error
+#undef _
+};
+
+typedef enum
+{
+ NAT64_OUT2IN_NEXT_LOOKUP,
+ NAT64_OUT2IN_NEXT_DROP,
+ NAT64_OUT2IN_N_NEXT,
+} nat64_out2in_next_t;
+
+typedef struct nat64_out2in_set_ctx_t_
+{
+ vlib_buffer_t *b;
+ vlib_main_t *vm;
+} nat64_out2in_set_ctx_t;
+
+static int
+nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_out2in_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ ip6_address_t ip6_saddr;
+ udp_header_t *udp = ip4_next_header (ip4);
+ tcp_header_t *tcp = ip4_next_header (ip4);
+ u8 proto = ip4->protocol;
+ u16 dport = udp->dst_port;
+ u16 sport = udp->src_port;
+ u32 sw_if_index, fib_index;
+ u16 *checksum;
+ ip_csum_t csum;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ saddr.ip4.as_u32 = ip4->src_address.as_u32;
+ memset (&daddr, 0, sizeof (daddr));
+ daddr.ip4.as_u32 = ip4->dst_address.as_u32;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto,
+ fib_index, 0);
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, fib_index, 0);
+
+ if (!bibe)
+ return -1;
+
+ nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4,
+ sport);
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+
+ ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ udp->dst_port = bibe->in_port;
+
+ if (proto == IP_PROTOCOL_UDP)
+ checksum = &udp->checksum;
+ else
+ checksum = &tcp->checksum;
+ csum = ip_csum_sub_even (*checksum, dport);
+ csum = ip_csum_add_even (csum, udp->dst_port);
+ *checksum = ip_csum_fold (csum);
+
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+ return 0;
+}
+
+static int
+nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_out2in_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ ip6_address_t ip6_saddr;
+ u32 sw_if_index, fib_index;
+ icmp46_header_t *icmp = ip4_next_header (ip4);
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ saddr.ip4.as_u32 = ip4->src_address.as_u32;
+ memset (&daddr, 0, sizeof (daddr));
+ daddr.ip4.as_u32 = ip4->dst_address.as_u32;
+
+ if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
+ {
+ u16 out_id = ((u16 *) (icmp))[2];
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, out_id, 0,
+ IP_PROTOCOL_ICMP, fib_index, 0);
+
+ if (ste)
+ {
+ bibe =
+ nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP,
+ ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &daddr, out_id,
+ IP_PROTOCOL_ICMP, fib_index, 0);
+ if (!bibe)
+ return -1;
+
+ nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4,
+ 0);
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+
+ ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ ((u16 *) (icmp))[2] = bibe->in_port;
+
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+ }
+ else
+ {
+ ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
+ ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
+ ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
+ }
+
+ return 0;
+}
+
+static int
+nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_out2in_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ u32 sw_if_index, fib_index;
+ u8 proto = ip4->protocol;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ saddr.ip4.as_u32 = ip4->src_address.as_u32;
+ memset (&daddr, 0, sizeof (daddr));
+ daddr.ip4.as_u32 = ip4->dst_address.as_u32;
+
+ if (proto == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *icmp = ip4_next_header (ip4);
+ u16 out_id = ((u16 *) (icmp))[2];
+ proto = IP_PROTOCOL_ICMP;
+
+ if (!
+ (icmp->type == ICMP6_echo_request
+ || icmp->type == ICMP6_echo_reply))
+ return -1;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, out_id, 0, proto,
+ fib_index, 0);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+ ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ ((u16 *) (icmp))[2] = bibe->in_port;
+
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+ }
+ else
+ {
+ udp_header_t *udp = ip4_next_header (ip4);
+ tcp_header_t *tcp = ip4_next_header (ip4);
+ u16 dport = udp->dst_port;
+ u16 sport = udp->src_port;
+ u16 *checksum;
+ ip_csum_t csum;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto,
+ fib_index, 0);
+ if (!ste)
+ return -1;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
+ ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
+ udp->src_port = bibe->in_port;
+
+ if (proto == IP_PROTOCOL_UDP)
+ checksum = &udp->checksum;
+ else
+ checksum = &tcp->checksum;
+ if (*checksum)
+ {
+ csum = ip_csum_sub_even (*checksum, sport);
+ csum = ip_csum_add_even (csum, udp->src_port);
+ *checksum = ip_csum_fold (csum);
+ }
+
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+ }
+
+ return 0;
+}
+
+static int
+nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *arg)
+{
+ nat64_main_t *nm = &nat64_main;
+ nat64_out2in_set_ctx_t *ctx = arg;
+ nat64_db_bib_entry_t *bibe;
+ nat64_db_st_entry_t *ste;
+ ip46_address_t saddr, daddr;
+ ip6_address_t ip6_saddr;
+ u32 sw_if_index, fib_index;
+ u8 proto = ip4->protocol;
+
+ sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ memset (&saddr, 0, sizeof (saddr));
+ saddr.ip4.as_u32 = ip4->src_address.as_u32;
+ memset (&daddr, 0, sizeof (daddr));
+ daddr.ip4.as_u32 = ip4->dst_address.as_u32;
+
+ ste =
+ nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, fib_index,
+ 0);
+ if (ste)
+ {
+ bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+ }
+ else
+ {
+ bibe =
+ nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, fib_index, 0);
+
+ if (!bibe)
+ return -1;
+
+ nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
+ ste =
+ nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4, 0);
+ }
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+ ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+
+ ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+ ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+ return 0;
+}
+
+static uword
+nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ nat64_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip4_header_t *ip40;
+ u32 proto0;
+ nat64_out2in_set_ctx_t ctx0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip40 = vlib_buffer_get_current (b0);
+
+ ctx0.b = b0;
+ ctx0.vm = vm;
+
+ next0 = NAT64_OUT2IN_NEXT_LOOKUP;
+
+ proto0 = ip_proto_to_snat_proto (ip40->protocol);
+
+ if (proto0 == SNAT_PROTOCOL_ICMP)
+ {
+ if (icmp_to_icmp6
+ (b0, nat64_out2in_icmp_set_cb, &ctx0,
+ nat64_out2in_inner_icmp_set_cb, &ctx0))
+ {
+ next0 = NAT64_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+ else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+ {
+ if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
+ {
+ next0 = NAT64_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+ else
+ {
+ if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
+ {
+ next0 = NAT64_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ }
+
+ trace0:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat64_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ }
+
+ pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, nat64_out2in_node.index,
+ NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_out2in_node) = {
+ .function = nat64_out2in_node_fn,
+ .name = "nat64-out2in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat64_out2in_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
+ .error_strings = nat64_out2in_error_strings,.n_next_nodes = 2,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
+ [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_all_api_h.h b/src/plugins/nat/nat_all_api_h.h
new file mode 100644
index 00000000..acd9ba1c
--- /dev/null
+++ b/src/plugins/nat/nat_all_api_h.h
@@ -0,0 +1,19 @@
+
+/*
+ * nat_all_api_h.h - skeleton vpp engine plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <nat/nat.api.h>
diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c
new file mode 100644
index 00000000..b56b4436
--- /dev/null
+++ b/src/plugins/nat/nat_api.c
@@ -0,0 +1,3396 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin API implementation
+ */
+
+#include <nat/nat.h>
+#include <nat/nat_det.h>
+#include <nat/nat64.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <nat/nat_msg_enum.h>
+#include <vnet/fib/fib_table.h>
+
+#define vl_api_nat44_lb_addr_port_t_endian vl_noop_handler
+#define vl_api_nat44_add_del_lb_static_mapping_t_endian vl_noop_handler
+#define vl_api_nat44_nat44_lb_static_mapping_details_t_endian vl_noop_handler
+
+/* define message structures */
+#define vl_typedefs
+#include <nat/nat_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <nat/nat_all_api_h.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+#define REPLY_MSG_ID_BASE sm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <nat/nat_all_api_h.h>
+#undef vl_api_version
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+static void
+ vl_api_snat_add_address_range_t_handler
+ (vl_api_snat_add_address_range_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_add_address_range_reply_t *rmp;
+ ip4_address_t this_addr;
+ u32 start_host_order, end_host_order;
+ u32 vrf_id;
+ int i, count;
+ int rv = 0;
+ u32 *tmp;
+
+ if (mp->is_ip4 != 1)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ if (sm->static_mapping_only)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ tmp = (u32 *) mp->first_ip_address;
+ start_host_order = clib_host_to_net_u32 (tmp[0]);
+ tmp = (u32 *) mp->last_ip_address;
+ end_host_order = clib_host_to_net_u32 (tmp[0]);
+
+ count = (end_host_order - start_host_order) + 1;
+
+ vrf_id = clib_host_to_net_u32 (mp->vrf_id);
+
+ if (count > 1024)
+ clib_warning ("%U - %U, %d addresses...",
+ format_ip4_address, mp->first_ip_address,
+ format_ip4_address, mp->last_ip_address, count);
+
+ memcpy (&this_addr.as_u8, mp->first_ip_address, 4);
+
+ for (i = 0; i < count; i++)
+ {
+ if (mp->is_add)
+ snat_add_address (sm, &this_addr, vrf_id);
+ else
+ rv = snat_del_address (sm, this_addr, 0);
+
+ if (rv)
+ goto send_reply;
+
+ increment_v4_address (&this_addr);
+ }
+
+send_reply:
+ REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY);
+}
+
+static void *vl_api_snat_add_address_range_t_print
+ (vl_api_snat_add_address_range_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_add_address_range ");
+ s = format (s, "%U ", format_ip4_address, mp->first_ip_address);
+ if (memcmp (mp->first_ip_address, mp->last_ip_address, 4))
+ {
+ s = format (s, " - %U ", format_ip4_address, mp->last_ip_address);
+ }
+ FINISH;
+}
+
+static void
+ send_snat_address_details
+ (snat_address_t * a, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_address_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_ADDRESS_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ clib_memcpy (rmp->ip_address, &(a->addr), 4);
+ if (a->fib_index != ~0)
+ {
+ fib_table_t *fib = fib_table_get (a->fib_index, FIB_PROTOCOL_IP4);
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+ }
+ else
+ rmp->vrf_id = ~0;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_address_dump_t_handler (vl_api_snat_address_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (a, sm->addresses)
+ send_snat_address_details (a, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_address_dump_t_print
+ (vl_api_snat_address_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_address_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_interface_add_del_feature_t_handler
+ (vl_api_snat_interface_add_del_feature_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_interface_add_del_feature_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_interface_add_del (sw_if_index, mp->is_inside, is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY);
+}
+
+static void *vl_api_snat_interface_add_del_feature_t_print
+ (vl_api_snat_interface_add_del_feature_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_add_del_feature ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_inside ? "in" : "out", mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+ send_snat_interface_details
+ (snat_interface_t * i, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_interface_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_INTERFACE_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->is_inside = i->is_inside;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_interface_dump_t_handler (vl_api_snat_interface_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (i, sm->interfaces,
+ ({
+ send_snat_interface_details(i, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_interface_dump_t_print
+ (vl_api_snat_interface_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_interface_add_del_output_feature_t_handler
+ (vl_api_snat_interface_add_del_output_feature_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_interface_add_del_output_feature_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_interface_add_del_output_feature (sw_if_index, mp->is_inside,
+ is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SNAT_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
+}
+
+static void *vl_api_snat_interface_add_del_output_feature_t_print
+ (vl_api_snat_interface_add_del_output_feature_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_add_del_output_feature ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_inside ? "in" : "out", mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+send_snat_interface_output_feature_details (snat_interface_t * i,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_snat_interface_output_feature_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SNAT_INTERFACE_OUTPUT_FEATURE_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->context = context;
+ rmp->is_inside = i->is_inside;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_snat_interface_output_feature_dump_t_handler
+ (vl_api_snat_interface_output_feature_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ send_snat_interface_output_feature_details(i, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_interface_output_feature_dump_t_print
+ (vl_api_snat_interface_output_feature_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_output_feature_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_add_static_mapping_t_handler
+ (vl_api_snat_add_static_mapping_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_add_static_mapping_reply_t *rmp;
+ ip4_address_t local_addr, external_addr;
+ u16 local_port = 0, external_port = 0;
+ u32 vrf_id, external_sw_if_index;
+ int rv = 0;
+ snat_protocol_t proto;
+
+ if (mp->is_ip4 != 1)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ memcpy (&local_addr.as_u8, mp->local_ip_address, 4);
+ memcpy (&external_addr.as_u8, mp->external_ip_address, 4);
+ if (mp->addr_only == 0)
+ {
+ local_port = clib_net_to_host_u16 (mp->local_port);
+ external_port = clib_net_to_host_u16 (mp->external_port);
+ }
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+ external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
+ proto = ip_proto_to_snat_proto (mp->protocol);
+
+ rv = snat_add_static_mapping (local_addr, external_addr, local_port,
+ external_port, vrf_id, mp->addr_only,
+ external_sw_if_index, proto, mp->is_add);
+
+send_reply:
+ REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY);
+}
+
+static void *vl_api_snat_add_static_mapping_t_print
+ (vl_api_snat_add_static_mapping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_add_static_mapping ");
+ s = format (s, "protocol %d local_addr %U external_addr %U ",
+ mp->protocol,
+ format_ip4_address, mp->local_ip_address,
+ format_ip4_address, mp->external_ip_address);
+
+ if (mp->addr_only == 0)
+ s = format (s, "local_port %d external_port %d ",
+ clib_net_to_host_u16 (mp->local_port),
+ clib_net_to_host_u16 (mp->external_port));
+
+ if (mp->vrf_id != ~0)
+ s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id));
+
+ if (mp->external_sw_if_index != ~0)
+ s = format (s, "external_sw_if_index %d",
+ clib_net_to_host_u32 (mp->external_sw_if_index));
+ FINISH;
+}
+
+static void
+ send_snat_static_mapping_details
+ (snat_static_mapping_t * m, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_static_mapping_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SNAT_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ rmp->addr_only = m->addr_only;
+ clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4);
+ clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4);
+ rmp->local_port = htons (m->local_port);
+ rmp->external_port = htons (m->external_port);
+ rmp->external_sw_if_index = ~0;
+ rmp->vrf_id = htonl (m->vrf_id);
+ rmp->protocol = snat_proto_to_ip_proto (m->proto);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ send_snat_static_map_resolve_details
+ (snat_static_map_resolve_t * m, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_static_mapping_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SNAT_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ rmp->addr_only = m->addr_only;
+ clib_memcpy (rmp->local_ip_address, &(m->l_addr), 4);
+ rmp->local_port = htons (m->l_port);
+ rmp->external_port = htons (m->e_port);
+ rmp->external_sw_if_index = htonl (m->sw_if_index);
+ rmp->vrf_id = htonl (m->vrf_id);
+ rmp->protocol = snat_proto_to_ip_proto (m->proto);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_snat_static_mapping_dump_t_handler
+ (vl_api_snat_static_mapping_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_static_mapping_t *m;
+ snat_static_map_resolve_t *rp;
+ int j;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (!vec_len(m->locals))
+ send_snat_static_mapping_details (m, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+
+ for (j = 0; j < vec_len (sm->to_resolve); j++)
+ {
+ rp = sm->to_resolve + j;
+ send_snat_static_map_resolve_details (rp, q, mp->context);
+ }
+}
+
+static void *vl_api_snat_static_mapping_dump_t_print
+ (vl_api_snat_static_mapping_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_static_mapping_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_control_ping_t_handler (vl_api_snat_control_ping_t * mp)
+{
+ vl_api_snat_control_ping_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SNAT_CONTROL_PING_REPLY,
+ ({
+ rmp->vpe_pid = ntohl (getpid ());
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_control_ping_t_print
+ (vl_api_snat_control_ping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_control_ping ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_show_config_t_handler (vl_api_snat_show_config_t * mp)
+{
+ vl_api_snat_show_config_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SNAT_SHOW_CONFIG_REPLY,
+ ({
+ rmp->translation_buckets = htonl (sm->translation_buckets);
+ rmp->translation_memory_size = htonl (sm->translation_memory_size);
+ rmp->user_buckets = htonl (sm->user_buckets);
+ rmp->user_memory_size = htonl (sm->user_memory_size);
+ rmp->max_translations_per_user = htonl (sm->max_translations_per_user);
+ rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
+ rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
+ rmp->static_mapping_only = sm->static_mapping_only;
+ rmp->static_mapping_connection_tracking =
+ sm->static_mapping_connection_tracking;
+ rmp->deterministic = sm->deterministic;
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_show_config_t_print
+ (vl_api_snat_show_config_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_show_config ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_set_workers_t_handler (vl_api_snat_set_workers_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_set_workers_reply_t *rmp;
+ int rv = 0;
+ uword *bitmap = 0;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ if (sm->num_workers < 2)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ rv = snat_set_workers (bitmap);
+ clib_bitmap_free (bitmap);
+
+send_reply:
+ REPLY_MACRO (VL_API_SNAT_SET_WORKERS_REPLY);
+}
+
+static void *vl_api_snat_set_workers_t_print
+ (vl_api_snat_set_workers_t * mp, void *handle)
+{
+ u8 *s;
+ uword *bitmap = 0;
+ u8 first = 1;
+ int i;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ s = format (0, "SCRIPT: snat_set_workers ");
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ if (first)
+ s = format (s, "%d", i);
+ else
+ s = format (s, ",%d", i);
+ first = 0;
+ }));
+ /* *INDENT-ON* */
+ clib_bitmap_free (bitmap);
+ FINISH;
+}
+
+static void
+ send_snat_worker_details
+ (u32 worker_index, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_worker_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + worker_index + sm->first_worker_index;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_WORKER_DETAILS + sm->msg_id_base);
+ rmp->context = context;
+ rmp->worker_index = htonl (worker_index);
+ rmp->lcore_id = htonl (w->lcore_id);
+ strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_worker_dump_t_handler (vl_api_snat_worker_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ u32 *worker_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (worker_index, sm->workers)
+ send_snat_worker_details(*worker_index, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_worker_dump_t_print
+ (vl_api_snat_worker_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_worker_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_add_del_interface_addr_t_handler
+ (vl_api_snat_add_del_interface_addr_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_add_del_interface_addr_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_add_interface_address (sm, sw_if_index, is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SNAT_ADD_DEL_INTERFACE_ADDR_REPLY);
+}
+
+static void *vl_api_snat_add_del_interface_addr_t_print
+ (vl_api_snat_add_del_interface_addr_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_add_del_interface_addr ");
+ s = format (s, "sw_if_index %d %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+ send_snat_interface_addr_details
+ (u32 sw_if_index, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_interface_addr_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SNAT_INTERFACE_ADDR_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (sw_if_index);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_snat_interface_addr_dump_t_handler
+ (vl_api_snat_interface_addr_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ u32 *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (i, sm->auto_add_sw_if_indices)
+ send_snat_interface_addr_details(*i, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_interface_addr_dump_t_print
+ (vl_api_snat_interface_addr_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_addr_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_ipfix_enable_disable_t_handler
+ (vl_api_snat_ipfix_enable_disable_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_ipfix_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ rv = snat_ipfix_logging_enable_disable (mp->enable,
+ clib_host_to_net_u32
+ (mp->domain_id),
+ clib_host_to_net_u16
+ (mp->src_port));
+
+ REPLY_MACRO (VL_API_SNAT_IPFIX_ENABLE_DISABLE_REPLY);
+}
+
+static void *vl_api_snat_ipfix_enable_disable_t_print
+ (vl_api_snat_ipfix_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_ipfix_enable_disable ");
+ if (mp->domain_id)
+ s = format (s, "domain %d ", clib_net_to_host_u32 (mp->domain_id));
+ if (mp->src_port)
+ s = format (s, "src_port %d ", clib_net_to_host_u16 (mp->src_port));
+ if (!mp->enable)
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void
+ send_snat_user_details
+ (snat_user_t * u, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_user_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ fib_table_t *fib = fib_table_get (u->fib_index, FIB_PROTOCOL_IP4);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_USER_DETAILS + sm->msg_id_base);
+
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+
+ rmp->is_ip4 = 1;
+ clib_memcpy (rmp->ip_address, &(u->addr), 4);
+ rmp->nsessions = ntohl (u->nsessions);
+ rmp->nstaticsessions = ntohl (u->nstaticsessions);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_user_dump_t_handler (vl_api_snat_user_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
+ snat_user_t *u;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (tsm, sm->per_thread_data)
+ vec_foreach (u, tsm->users)
+ send_snat_user_details (u, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_user_dump_t_print
+ (vl_api_snat_user_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_user_dump ");
+
+ FINISH;
+}
+
+static void
+ send_snat_user_session_details
+ (snat_session_t * s, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_user_session_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SNAT_USER_SESSION_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+ rmp->is_static = s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING ? 1 : 0;
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ if (snat_is_unk_proto_session (s))
+ {
+ rmp->outside_port = 0;
+ rmp->inside_port = 0;
+ rmp->protocol = ntohs (s->in2out.port);
+ }
+ else
+ {
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = ntohs (snat_proto_to_ip_proto (s->in2out.protocol));
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_snat_user_session_dump_t_handler
+ (vl_api_snat_user_session_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t key, value;
+ snat_user_key_t ukey;
+ snat_user_t *u;
+ u32 session_index, head_index, elt_index;
+ dlist_elt_t *head, *elt;
+ ip4_header_t ip;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+ if (!mp->is_ip4)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ key.key = ukey.as_u64;
+ if (sm->num_workers)
+ tsm =
+ vec_elt_at_index (sm->per_thread_data,
+ sm->worker_in2out_cb (&ip, ukey.fib_index));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value))
+ return;
+ u = pool_elt_at_index (tsm->users, value.value);
+ if (!u->nsessions && !u->nstaticsessions)
+ return;
+
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tsm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ session_index = elt->value;
+ while (session_index != ~0)
+ {
+ s = pool_elt_at_index (tsm->sessions, session_index);
+
+ send_snat_user_session_details (s, q, mp->context);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+}
+
+static void *vl_api_snat_user_session_dump_t_print
+ (vl_api_snat_user_session_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_user_session_dump ");
+ s = format (s, "ip_address %U vrf_id %d\n",
+ format_ip4_address, mp->ip_address,
+ clib_net_to_host_u32 (mp->vrf_id));
+
+ FINISH;
+}
+
+/******************************************************************/
+/*** detrministic NAT/CGN (old, will be deprecated after 17.10) ***/
+/******************************************************************/
+
+static void
+vl_api_snat_add_det_map_t_handler (vl_api_snat_add_det_map_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_add_det_map_reply_t *rmp;
+ int rv = 0;
+ ip4_address_t in_addr, out_addr;
+
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ rv = snat_det_add_map (sm, &in_addr, mp->in_plen, &out_addr,
+ mp->out_plen, mp->is_add);
+
+ REPLY_MACRO (VL_API_SNAT_ADD_DET_MAP_REPLY);
+}
+
+static void *vl_api_snat_add_det_map_t_print
+ (vl_api_snat_add_det_map_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_add_det_map ");
+ s = format (s, "inside address %U/%d outside address %U/%d\n",
+ format_ip4_address, mp->in_addr, mp->in_plen,
+ format_ip4_address, mp->out_addr, mp->out_plen);
+
+ FINISH;
+}
+
+static void
+vl_api_snat_det_forward_t_handler (vl_api_snat_det_forward_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_forward_reply_t *rmp;
+ int rv = 0;
+ u16 lo_port = 0, hi_port = 0;
+ snat_det_map_t *dm;
+ ip4_address_t in_addr, out_addr;
+
+ out_addr.as_u32 = 0;
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ dm = snat_det_map_by_user (sm, &in_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+
+ snat_det_forward (dm, &in_addr, &out_addr, &lo_port);
+ hi_port = lo_port + dm->ports_per_host - 1;
+
+send_reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SNAT_DET_FORWARD_REPLY,
+ ({
+ rmp->out_port_lo = ntohs (lo_port);
+ rmp->out_port_hi = ntohs (hi_port);
+ rmp->is_ip4 = 1;
+ memset (rmp->out_addr, 0, 16);
+ clib_memcpy (rmp->out_addr, &out_addr, 4);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_det_forward_t_print
+ (vl_api_snat_det_forward_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: smat_det_forward_t");
+ s = format (s, "inside ip address %U\n", format_ip4_address, mp->in_addr);
+
+ FINISH;
+}
+
+static void
+vl_api_snat_det_reverse_t_handler (vl_api_snat_det_reverse_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_reverse_reply_t *rmp;
+ int rv = 0;
+ ip4_address_t out_addr, in_addr;
+ snat_det_map_t *dm;
+
+ in_addr.as_u32 = 0;
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ dm = snat_det_map_by_out (sm, &out_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+
+ snat_det_reverse (dm, &out_addr, htons (mp->out_port), &in_addr);
+
+send_reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SNAT_DET_REVERSE_REPLY,
+ ({
+ rmp->is_ip4 = 1;
+ memset (rmp->in_addr, 0, 16);
+ clib_memcpy (rmp->in_addr, &in_addr, 4);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_det_reverse_t_print
+ (vl_api_snat_det_reverse_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: smat_det_reverse_t");
+ s = format (s, "outside ip address %U outside port %d",
+ format_ip4_address, mp->out_addr, ntohs (mp->out_port));
+
+ FINISH;
+}
+
+static void
+ sent_snat_det_map_details
+ (snat_det_map_t * m, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_det_map_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_DET_MAP_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ clib_memcpy (rmp->in_addr, &m->in_addr, 4);
+ rmp->in_plen = m->in_plen;
+ clib_memcpy (rmp->out_addr, &m->out_addr, 4);
+ rmp->out_plen = m->out_plen;
+ rmp->sharing_ratio = htonl (m->sharing_ratio);
+ rmp->ports_per_host = htons (m->ports_per_host);
+ rmp->ses_num = htonl (m->ses_num);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_det_map_dump_t_handler (vl_api_snat_det_map_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_det_map_t *m;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach(m, sm->det_maps)
+ sent_snat_det_map_details(m, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_det_map_dump_t_print
+ (vl_api_snat_det_map_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_det_map_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_det_set_timeouts_t_handler (vl_api_snat_det_set_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_set_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ sm->udp_timeout = ntohl (mp->udp);
+ sm->tcp_established_timeout = ntohl (mp->tcp_established);
+ sm->tcp_transitory_timeout = ntohl (mp->tcp_transitory);
+ sm->icmp_timeout = ntohl (mp->icmp);
+
+ REPLY_MACRO (VL_API_SNAT_DET_SET_TIMEOUTS_REPLY);
+}
+
+static void *vl_api_snat_det_set_timeouts_t_print
+ (vl_api_snat_det_set_timeouts_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_det_set_timeouts ");
+ s = format (s, "udp %d tcp_established %d tcp_transitory %d icmp %d\n",
+ ntohl (mp->udp),
+ ntohl (mp->tcp_established),
+ ntohl (mp->tcp_transitory), ntohl (mp->icmp));
+
+ FINISH;
+}
+
+static void
+vl_api_snat_det_get_timeouts_t_handler (vl_api_snat_det_get_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_get_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SNAT_DET_GET_TIMEOUTS_REPLY,
+ ({
+ rmp->udp = htonl (sm->udp_timeout);
+ rmp->tcp_established = htonl (sm->tcp_established_timeout);
+ rmp->tcp_transitory = htonl (sm->tcp_transitory_timeout);
+ rmp->icmp = htonl (sm->icmp_timeout);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_snat_det_get_timeouts_t_print
+ (vl_api_snat_det_get_timeouts_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_det_get_timeouts");
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_det_close_session_out_t_handler
+ (vl_api_snat_det_close_session_out_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_close_session_out_reply_t *rmp;
+ ip4_address_t out_addr, ext_addr, in_addr;
+ snat_det_out_key_t key;
+ snat_det_map_t *dm;
+ snat_det_session_t *ses;
+ int rv = 0;
+
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ clib_memcpy (&ext_addr, mp->ext_addr, 4);
+
+ dm = snat_det_map_by_out (sm, &out_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_reverse (dm, &ext_addr, ntohs (mp->out_port), &in_addr);
+ key.ext_host_addr = ext_addr;
+ key.ext_host_port = mp->ext_port;
+ key.out_port = mp->out_port;
+ ses = snat_det_get_ses_by_out (dm, &in_addr, key.as_u64);
+ if (!ses)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_ses_close (dm, ses);
+
+send_reply:
+ REPLY_MACRO (VL_API_SNAT_DET_CLOSE_SESSION_OUT_REPLY);
+}
+
+static void *vl_api_snat_det_close_session_out_t_print
+ (vl_api_snat_det_close_session_out_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_det_close_session_out ");
+ s = format (s, "out_addr %U out_port %d "
+ "ext_addr %U ext_port %d\n",
+ format_ip4_address, mp->out_addr, ntohs (mp->out_port),
+ format_ip4_address, mp->ext_addr, ntohs (mp->ext_port));
+
+ FINISH;
+}
+
+static void
+ vl_api_snat_det_close_session_in_t_handler
+ (vl_api_snat_det_close_session_in_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_det_close_session_in_reply_t *rmp;
+ ip4_address_t in_addr, ext_addr;
+ snat_det_out_key_t key;
+ snat_det_map_t *dm;
+ snat_det_session_t *ses;
+ int rv = 0;
+
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ clib_memcpy (&ext_addr, mp->ext_addr, 4);
+
+ dm = snat_det_map_by_user (sm, &in_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ key.ext_host_addr = ext_addr;
+ key.ext_host_port = mp->ext_port;
+ ses = snat_det_find_ses_by_in (dm, &in_addr, mp->in_port, key);
+ if (!ses)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_ses_close (dm, ses);
+
+send_reply:
+ REPLY_MACRO (VL_API_SNAT_DET_CLOSE_SESSION_OUT_REPLY);
+}
+
+static void *vl_api_snat_det_close_session_in_t_print
+ (vl_api_snat_det_close_session_in_t * mp, void *handle)
+{
+ u8 *s;
+ s = format (0, "SCRIPT: snat_det_close_session_in ");
+ s = format (s, "in_addr %U in_port %d "
+ "ext_addr %U ext_port %d\n",
+ format_ip4_address, mp->in_addr, ntohs (mp->in_port),
+ format_ip4_address, mp->ext_addr, ntohs (mp->ext_port));
+
+ FINISH;
+}
+
+static void
+ send_snat_det_session_details
+ (snat_det_session_t * s, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_det_session_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_DET_SESSION_DETAILS + sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ rmp->in_port = s->in_port;
+ clib_memcpy (rmp->ext_addr, &s->out.ext_host_addr, 4);
+ rmp->ext_port = s->out.ext_host_port;
+ rmp->out_port = s->out.out_port;
+ rmp->state = s->state;
+ rmp->expire = ntohl (s->expire);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_det_session_dump_t_handler (vl_api_snat_det_session_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ ip4_address_t user_addr;
+ snat_det_map_t *dm;
+ snat_det_session_t *s, empty_ses;
+ u16 i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+ if (!mp->is_ip4)
+ return;
+
+ memset (&empty_ses, 0, sizeof (empty_ses));
+ clib_memcpy (&user_addr, mp->user_addr, 4);
+ dm = snat_det_map_by_user (sm, &user_addr);
+ if (!dm)
+ return;
+
+ s = dm->sessions + snat_det_user_ses_offset (&user_addr, dm->in_plen);
+ for (i = 0; i < SNAT_DET_SES_PER_USER; i++)
+ {
+ if (s->out.as_u64)
+ send_snat_det_session_details (s, q, mp->context);
+ s++;
+ }
+}
+
+static void *vl_api_snat_det_session_dump_t_print
+ (vl_api_snat_det_session_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_det_session_dump ");
+ s = format (s, "user_addr %U\n", format_ip4_address, mp->user_addr);
+
+ FINISH;
+}
+
+/******************************/
+/*** Common NAT plugin APIs ***/
+/******************************/
+
+static void
+vl_api_nat_control_ping_t_handler (vl_api_nat_control_ping_t * mp)
+{
+ vl_api_nat_control_ping_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT_CONTROL_PING_REPLY,
+ ({
+ rmp->vpe_pid = ntohl (getpid ());
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_control_ping_t_print (vl_api_nat_control_ping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_control_ping ");
+
+ FINISH;
+}
+
+static void
+vl_api_nat_show_config_t_handler (vl_api_nat_show_config_t * mp)
+{
+ vl_api_nat_show_config_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT_SHOW_CONFIG_REPLY,
+ ({
+ rmp->translation_buckets = htonl (sm->translation_buckets);
+ rmp->translation_memory_size = htonl (sm->translation_memory_size);
+ rmp->user_buckets = htonl (sm->user_buckets);
+ rmp->user_memory_size = htonl (sm->user_memory_size);
+ rmp->max_translations_per_user = htonl (sm->max_translations_per_user);
+ rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
+ rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
+ rmp->static_mapping_only = sm->static_mapping_only;
+ rmp->static_mapping_connection_tracking =
+ sm->static_mapping_connection_tracking;
+ rmp->deterministic = sm->deterministic;
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_show_config_t_print (vl_api_nat_show_config_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_show_config ");
+
+ FINISH;
+}
+
+static void
+vl_api_nat_set_workers_t_handler (vl_api_nat_set_workers_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_snat_set_workers_reply_t *rmp;
+ int rv = 0;
+ uword *bitmap = 0;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ if (sm->num_workers < 2)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ rv = snat_set_workers (bitmap);
+ clib_bitmap_free (bitmap);
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT_SET_WORKERS_REPLY);
+}
+
+static void *
+vl_api_nat_set_workers_t_print (vl_api_nat_set_workers_t * mp, void *handle)
+{
+ u8 *s;
+ uword *bitmap = 0;
+ u8 first = 1;
+ int i;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ s = format (0, "SCRIPT: nat_set_workers ");
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ if (first)
+ s = format (s, "%d", i);
+ else
+ s = format (s, ",%d", i);
+ first = 0;
+ }));
+ /* *INDENT-ON* */
+ clib_bitmap_free (bitmap);
+ FINISH;
+}
+
+static void
+send_nat_worker_details (u32 worker_index, unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat_worker_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + worker_index + sm->first_worker_index;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT_WORKER_DETAILS + sm->msg_id_base);
+ rmp->context = context;
+ rmp->worker_index = htonl (worker_index);
+ rmp->lcore_id = htonl (w->lcore_id);
+ strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat_worker_dump_t_handler (vl_api_nat_worker_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ u32 *worker_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (worker_index, sm->workers)
+ send_nat_worker_details(*worker_index, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_worker_dump_t_print (vl_api_nat_worker_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_worker_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_nat_ipfix_enable_disable_t_handler (vl_api_nat_ipfix_enable_disable_t *
+ mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_ipfix_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ rv = snat_ipfix_logging_enable_disable (mp->enable,
+ clib_host_to_net_u32
+ (mp->domain_id),
+ clib_host_to_net_u16
+ (mp->src_port));
+
+ REPLY_MACRO (VL_API_NAT_IPFIX_ENABLE_DISABLE_REPLY);
+}
+
+static void *
+vl_api_nat_ipfix_enable_disable_t_print (vl_api_nat_ipfix_enable_disable_t *
+ mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_ipfix_enable_disable ");
+ if (mp->domain_id)
+ s = format (s, "domain %d ", clib_net_to_host_u32 (mp->domain_id));
+ if (mp->src_port)
+ s = format (s, "src_port %d ", clib_net_to_host_u16 (mp->src_port));
+ if (!mp->enable)
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+/*************/
+/*** NAT44 ***/
+/*************/
+static void
+ vl_api_nat44_add_del_address_range_t_handler
+ (vl_api_nat44_add_del_address_range_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_add_del_address_range_reply_t *rmp;
+ ip4_address_t this_addr;
+ u32 start_host_order, end_host_order;
+ u32 vrf_id;
+ int i, count;
+ int rv = 0;
+ u32 *tmp;
+
+ if (sm->static_mapping_only)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ tmp = (u32 *) mp->first_ip_address;
+ start_host_order = clib_host_to_net_u32 (tmp[0]);
+ tmp = (u32 *) mp->last_ip_address;
+ end_host_order = clib_host_to_net_u32 (tmp[0]);
+
+ count = (end_host_order - start_host_order) + 1;
+
+ vrf_id = clib_host_to_net_u32 (mp->vrf_id);
+
+ if (count > 1024)
+ clib_warning ("%U - %U, %d addresses...",
+ format_ip4_address, mp->first_ip_address,
+ format_ip4_address, mp->last_ip_address, count);
+
+ memcpy (&this_addr.as_u8, mp->first_ip_address, 4);
+
+ for (i = 0; i < count; i++)
+ {
+ if (mp->is_add)
+ snat_add_address (sm, &this_addr, vrf_id);
+ else
+ rv = snat_del_address (sm, this_addr, 0);
+
+ if (rv)
+ goto send_reply;
+
+ increment_v4_address (&this_addr);
+ }
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT44_ADD_DEL_ADDRESS_RANGE_REPLY);
+}
+
+static void *vl_api_nat44_add_del_address_range_t_print
+ (vl_api_nat44_add_del_address_range_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_add_address_range ");
+ s = format (s, "%U ", format_ip4_address, mp->first_ip_address);
+ if (memcmp (mp->first_ip_address, mp->last_ip_address, 4))
+ {
+ s = format (s, " - %U ", format_ip4_address, mp->last_ip_address);
+ }
+ FINISH;
+}
+
+static void
+send_nat44_address_details (snat_address_t * a,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_nat44_address_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT44_ADDRESS_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->ip_address, &(a->addr), 4);
+ if (a->fib_index != ~0)
+ {
+ fib_table_t *fib = fib_table_get (a->fib_index, FIB_PROTOCOL_IP4);
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+ }
+ else
+ rmp->vrf_id = ~0;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_address_dump_t_handler (vl_api_nat44_address_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (a, sm->addresses)
+ send_nat44_address_details (a, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat44_address_dump_t_print (vl_api_nat44_address_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_address_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_nat44_interface_add_del_feature_t_handler
+ (vl_api_nat44_interface_add_del_feature_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_interface_add_del_feature_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_interface_add_del (sw_if_index, mp->is_inside, is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_FEATURE_REPLY);
+}
+
+static void *vl_api_nat44_interface_add_del_feature_t_print
+ (vl_api_nat44_interface_add_del_feature_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_interface_add_del_feature ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_inside ? "in" : "out", mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+send_nat44_interface_details (snat_interface_t * i,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_nat44_interface_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT44_INTERFACE_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->is_inside = i->is_inside;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_interface_dump_t_handler (vl_api_nat44_interface_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (i, sm->interfaces,
+ ({
+ send_nat44_interface_details(i, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat44_interface_dump_t_print (vl_api_nat44_interface_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_interface_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_nat44_interface_add_del_output_feature_t_handler
+ (vl_api_nat44_interface_add_del_output_feature_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_interface_add_del_output_feature_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_interface_add_del_output_feature (sw_if_index, mp->is_inside,
+ is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
+}
+
+static void *vl_api_nat44_interface_add_del_output_feature_t_print
+ (vl_api_nat44_interface_add_del_output_feature_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_interface_add_del_output_feature ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_inside ? "in" : "out", mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+send_nat44_interface_output_feature_details (snat_interface_t * i,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_interface_output_feature_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_INTERFACE_OUTPUT_FEATURE_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->context = context;
+ rmp->is_inside = i->is_inside;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_nat44_interface_output_feature_dump_t_handler
+ (vl_api_nat44_interface_output_feature_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (i, sm->output_feature_interfaces,
+ ({
+ send_nat44_interface_output_feature_details(i, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_nat44_interface_output_feature_dump_t_print
+ (vl_api_nat44_interface_output_feature_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_interface_output_feature_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_nat44_add_del_static_mapping_t_handler
+ (vl_api_nat44_add_del_static_mapping_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_add_del_static_mapping_reply_t *rmp;
+ ip4_address_t local_addr, external_addr;
+ u16 local_port = 0, external_port = 0;
+ u32 vrf_id, external_sw_if_index;
+ int rv = 0;
+ snat_protocol_t proto;
+
+ memcpy (&local_addr.as_u8, mp->local_ip_address, 4);
+ memcpy (&external_addr.as_u8, mp->external_ip_address, 4);
+ if (mp->addr_only == 0)
+ {
+ local_port = clib_net_to_host_u16 (mp->local_port);
+ external_port = clib_net_to_host_u16 (mp->external_port);
+ }
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+ external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
+ proto = ip_proto_to_snat_proto (mp->protocol);
+
+ rv = snat_add_static_mapping (local_addr, external_addr, local_port,
+ external_port, vrf_id, mp->addr_only,
+ external_sw_if_index, proto, mp->is_add);
+
+ REPLY_MACRO (VL_API_NAT44_ADD_DEL_STATIC_MAPPING_REPLY);
+}
+
+static void *vl_api_nat44_add_del_static_mapping_t_print
+ (vl_api_nat44_add_del_static_mapping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_add_del_static_mapping ");
+ s = format (s, "protocol %d local_addr %U external_addr %U ",
+ mp->protocol,
+ format_ip4_address, mp->local_ip_address,
+ format_ip4_address, mp->external_ip_address);
+
+ if (mp->addr_only == 0)
+ s = format (s, "local_port %d external_port %d ",
+ clib_net_to_host_u16 (mp->local_port),
+ clib_net_to_host_u16 (mp->external_port));
+
+ if (mp->vrf_id != ~0)
+ s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id));
+
+ if (mp->external_sw_if_index != ~0)
+ s = format (s, "external_sw_if_index %d",
+ clib_net_to_host_u32 (mp->external_sw_if_index));
+ FINISH;
+}
+
+static void
+send_nat44_static_mapping_details (snat_static_mapping_t * m,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_static_mapping_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+ rmp->addr_only = m->addr_only;
+ clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4);
+ clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4);
+ rmp->local_port = htons (m->local_port);
+ rmp->external_port = htons (m->external_port);
+ rmp->external_sw_if_index = ~0;
+ rmp->vrf_id = htonl (m->vrf_id);
+ rmp->protocol = snat_proto_to_ip_proto (m->proto);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_static_mapping_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+ rmp->addr_only = m->addr_only;
+ clib_memcpy (rmp->local_ip_address, &(m->l_addr), 4);
+ rmp->local_port = htons (m->l_port);
+ rmp->external_port = htons (m->e_port);
+ rmp->external_sw_if_index = htonl (m->sw_if_index);
+ rmp->vrf_id = htonl (m->vrf_id);
+ rmp->protocol = snat_proto_to_ip_proto (m->proto);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
+ * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_static_mapping_t *m;
+ snat_static_map_resolve_t *rp;
+ int j;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (!vec_len(m->locals))
+ send_nat44_static_mapping_details (m, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+
+ for (j = 0; j < vec_len (sm->to_resolve); j++)
+ {
+ rp = sm->to_resolve + j;
+ send_nat44_static_map_resolve_details (rp, q, mp->context);
+ }
+}
+
+static void *
+vl_api_nat44_static_mapping_dump_t_print (vl_api_nat44_static_mapping_dump_t *
+ mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_static_mapping_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_nat44_add_del_interface_addr_t_handler
+ (vl_api_nat44_add_del_interface_addr_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_add_del_interface_addr_reply_t *rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = snat_add_interface_address (sm, sw_if_index, is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_NAT44_ADD_DEL_INTERFACE_ADDR_REPLY);
+}
+
+static void *vl_api_nat44_add_del_interface_addr_t_print
+ (vl_api_nat44_add_del_interface_addr_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_add_del_interface_addr ");
+ s = format (s, "sw_if_index %d %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+send_nat44_interface_addr_details (u32 sw_if_index,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_interface_addr_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_INTERFACE_ADDR_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (sw_if_index);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_interface_addr_dump_t_handler (vl_api_nat44_interface_addr_dump_t
+ * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ u32 *i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (i, sm->auto_add_sw_if_indices)
+ send_nat44_interface_addr_details(*i, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat44_interface_addr_dump_t_print (vl_api_nat44_interface_addr_dump_t *
+ mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_interface_addr_dump ");
+
+ FINISH;
+}
+
+static void
+send_nat44_user_details (snat_user_t * u, unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_user_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ fib_table_t *fib = fib_table_get (u->fib_index, FIB_PROTOCOL_IP4);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT44_USER_DETAILS + sm->msg_id_base);
+
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+
+ clib_memcpy (rmp->ip_address, &(u->addr), 4);
+ rmp->nsessions = ntohl (u->nsessions);
+ rmp->nstaticsessions = ntohl (u->nstaticsessions);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_user_dump_t_handler (vl_api_nat44_user_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
+ snat_user_t *u;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach (tsm, sm->per_thread_data)
+ vec_foreach (u, tsm->users)
+ send_nat44_user_details (u, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat44_user_dump_t_print (vl_api_nat44_user_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_user_dump ");
+
+ FINISH;
+}
+
+static void
+send_nat44_user_session_details (snat_session_t * s,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_nat44_user_session_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_USER_SESSION_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+ rmp->is_static = s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING ? 1 : 0;
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ if (snat_is_unk_proto_session (s))
+ {
+ rmp->outside_port = 0;
+ rmp->inside_port = 0;
+ rmp->protocol = ntohs (s->in2out.port);
+ }
+ else
+ {
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = ntohs (snat_proto_to_ip_proto (s->in2out.protocol));
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t *
+ mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t key, value;
+ snat_user_key_t ukey;
+ snat_user_t *u;
+ u32 session_index, head_index, elt_index;
+ dlist_elt_t *head, *elt;
+ ip4_header_t ip;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ key.key = ukey.as_u64;
+ if (sm->num_workers)
+ tsm =
+ vec_elt_at_index (sm->per_thread_data,
+ sm->worker_in2out_cb (&ip, ukey.fib_index));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value))
+ return;
+ u = pool_elt_at_index (tsm->users, value.value);
+ if (!u->nsessions && !u->nstaticsessions)
+ return;
+
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tsm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ session_index = elt->value;
+ while (session_index != ~0)
+ {
+ s = pool_elt_at_index (tsm->sessions, session_index);
+
+ send_nat44_user_session_details (s, q, mp->context);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+}
+
+static void *
+vl_api_nat44_user_session_dump_t_print (vl_api_nat44_user_session_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_user_session_dump ");
+ s = format (s, "ip_address %U vrf_id %d\n",
+ format_ip4_address, mp->ip_address,
+ clib_net_to_host_u32 (mp->vrf_id));
+
+ FINISH;
+}
+
+static nat44_lb_addr_port_t *
+unformat_nat44_lb_addr_port (vl_api_nat44_lb_addr_port_t * addr_port_pairs,
+ u8 addr_port_pair_num)
+{
+ u8 i;
+ nat44_lb_addr_port_t *lb_addr_port_pairs = 0, lb_addr_port;
+ vl_api_nat44_lb_addr_port_t *ap;
+
+ for (i = 0; i < addr_port_pair_num; i++)
+ {
+ ap = &addr_port_pairs[i];
+ memset (&lb_addr_port, 0, sizeof (lb_addr_port));
+ clib_memcpy (&lb_addr_port.addr, ap->addr, 4);
+ lb_addr_port.port = clib_net_to_host_u16 (ap->port);
+ lb_addr_port.probability = ap->probability;
+ vec_add1 (lb_addr_port_pairs, lb_addr_port);
+ }
+
+ return lb_addr_port_pairs;
+}
+
+static void
+ vl_api_nat44_add_del_lb_static_mapping_t_handler
+ (vl_api_nat44_add_del_lb_static_mapping_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp;
+ int rv = 0;
+ nat44_lb_addr_port_t *locals = 0;
+ ip4_address_t e_addr;
+ snat_protocol_t proto;
+
+ locals = unformat_nat44_lb_addr_port (mp->locals, mp->local_num);
+ clib_memcpy (&e_addr, mp->external_addr, 4);
+ proto = ip_proto_to_snat_proto (mp->protocol);
+
+ rv =
+ nat44_add_del_lb_static_mapping (e_addr,
+ clib_net_to_host_u16 (mp->external_port),
+ proto, clib_net_to_host_u32 (mp->vrf_id),
+ locals, mp->is_add);
+
+ vec_free (locals);
+
+ REPLY_MACRO (VL_API_NAT44_ADD_DEL_LB_STATIC_MAPPING_REPLY);
+}
+
+static void *vl_api_nat44_add_del_lb_static_mapping_t_print
+ (vl_api_nat44_add_del_lb_static_mapping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_add_del_lb_static_mapping ");
+ s = format (s, "is_add %d\n", mp->is_add);
+
+ FINISH;
+}
+
+static void
+send_nat44_lb_static_mapping_details (snat_static_mapping_t * m,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat44_lb_static_mapping_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat44_lb_addr_port_t *ap;
+ vl_api_nat44_lb_addr_port_t *locals;
+
+ rmp =
+ vl_msg_api_alloc (sizeof (*rmp) +
+ (vec_len (m->locals) * sizeof (nat44_lb_addr_port_t)));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+
+ clib_memcpy (rmp->external_addr, &(m->external_addr), 4);
+ rmp->external_port = ntohs (m->external_port);
+ rmp->protocol = snat_proto_to_ip_proto (m->proto);
+ rmp->vrf_id = ntohl (m->vrf_id);
+ rmp->context = context;
+
+ locals = (vl_api_nat44_lb_addr_port_t *) rmp->locals;
+ vec_foreach (ap, m->locals)
+ {
+ clib_memcpy (locals->addr, &(ap->addr), 4);
+ locals->port = htons (ap->port);
+ locals->probability = ap->probability;
+ locals++;
+ rmp->local_num++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_nat44_lb_static_mapping_dump_t_handler
+ (vl_api_nat44_lb_static_mapping_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_static_mapping_t *m;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (vec_len(m->locals))
+ send_nat44_lb_static_mapping_details (m, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_nat44_lb_static_mapping_dump_t_print
+ (vl_api_nat44_lb_static_mapping_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat44_lb_static_mapping_dump ");
+
+ FINISH;
+}
+
+/*******************************/
+/*** Deterministic NAT (CGN) ***/
+/*******************************/
+
+static void
+vl_api_nat_det_add_del_map_t_handler (vl_api_nat_det_add_del_map_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_add_del_map_reply_t *rmp;
+ int rv = 0;
+ ip4_address_t in_addr, out_addr;
+
+ if (!mp->is_nat44)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ rv = snat_det_add_map (sm, &in_addr, mp->in_plen, &out_addr,
+ mp->out_plen, mp->is_add);
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT_DET_ADD_DEL_MAP_REPLY);
+}
+
+static void *
+vl_api_nat_det_add_del_map_t_print (vl_api_nat_det_add_del_map_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_add_del_map ");
+ s = format (s, "inside address %U/%d outside address %U/%d\n",
+ format_ip4_address, mp->in_addr, mp->in_plen,
+ format_ip4_address, mp->out_addr, mp->out_plen);
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_forward_t_handler (vl_api_nat_det_forward_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_forward_reply_t *rmp;
+ int rv = 0;
+ u16 lo_port = 0, hi_port = 0;
+ snat_det_map_t *dm;
+ ip4_address_t in_addr, out_addr;
+
+ if (!mp->is_nat44)
+ {
+ out_addr.as_u32 = 0;
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ out_addr.as_u32 = 0;
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ dm = snat_det_map_by_user (sm, &in_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+
+ snat_det_forward (dm, &in_addr, &out_addr, &lo_port);
+ hi_port = lo_port + dm->ports_per_host - 1;
+
+send_reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT_DET_FORWARD_REPLY,
+ ({
+ rmp->out_port_lo = ntohs (lo_port);
+ rmp->out_port_hi = ntohs (hi_port);
+ clib_memcpy (rmp->out_addr, &out_addr, 4);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_det_forward_t_print (vl_api_nat_det_forward_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_forward");
+ s = format (s, "inside ip address %U\n", format_ip4_address, mp->in_addr);
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_reverse_t_handler (vl_api_nat_det_reverse_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_reverse_reply_t *rmp;
+ int rv = 0;
+ ip4_address_t out_addr, in_addr;
+ snat_det_map_t *dm;
+
+ in_addr.as_u32 = 0;
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ dm = snat_det_map_by_out (sm, &out_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+
+ snat_det_reverse (dm, &out_addr, htons (mp->out_port), &in_addr);
+
+send_reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT_DET_REVERSE_REPLY,
+ ({
+ rmp->is_nat44 = 1;
+ memset (rmp->in_addr, 0, 16);
+ clib_memcpy (rmp->in_addr, &in_addr, 4);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_det_reverse_t_print (vl_api_nat_det_reverse_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_reverse");
+ s = format (s, "outside ip address %U outside port %d",
+ format_ip4_address, mp->out_addr, ntohs (mp->out_port));
+
+ FINISH;
+}
+
+static void
+sent_nat_det_map_details (snat_det_map_t * m, unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_nat_det_map_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT_DET_MAP_DETAILS + sm->msg_id_base);
+ rmp->is_nat44 = 1;
+ clib_memcpy (rmp->in_addr, &m->in_addr, 4);
+ rmp->in_plen = m->in_plen;
+ clib_memcpy (rmp->out_addr, &m->out_addr, 4);
+ rmp->out_plen = m->out_plen;
+ rmp->sharing_ratio = htonl (m->sharing_ratio);
+ rmp->ports_per_host = htons (m->ports_per_host);
+ rmp->ses_num = htonl (m->ses_num);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat_det_map_dump_t_handler (vl_api_nat_det_map_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ snat_det_map_t *m;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ vec_foreach(m, sm->det_maps)
+ sent_nat_det_map_details(m, q, mp->context);
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_det_map_dump_t_print (vl_api_nat_det_map_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_map_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_set_timeouts_t_handler (vl_api_nat_det_set_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_set_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ sm->udp_timeout = ntohl (mp->udp);
+ sm->tcp_established_timeout = ntohl (mp->tcp_established);
+ sm->tcp_transitory_timeout = ntohl (mp->tcp_transitory);
+ sm->icmp_timeout = ntohl (mp->icmp);
+
+ REPLY_MACRO (VL_API_NAT_DET_SET_TIMEOUTS_REPLY);
+}
+
+static void *
+vl_api_nat_det_set_timeouts_t_print (vl_api_nat_det_set_timeouts_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_set_timeouts ");
+ s = format (s, "udp %d tcp_established %d tcp_transitory %d icmp %d\n",
+ ntohl (mp->udp),
+ ntohl (mp->tcp_established),
+ ntohl (mp->tcp_transitory), ntohl (mp->icmp));
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_get_timeouts_t_handler (vl_api_nat_det_get_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_get_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT_DET_GET_TIMEOUTS_REPLY,
+ ({
+ rmp->udp = htonl (sm->udp_timeout);
+ rmp->tcp_established = htonl (sm->tcp_established_timeout);
+ rmp->tcp_transitory = htonl (sm->tcp_transitory_timeout);
+ rmp->icmp = htonl (sm->icmp_timeout);
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_det_get_timeouts_t_print (vl_api_nat_det_get_timeouts_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_get_timeouts");
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_close_session_out_t_handler (vl_api_nat_det_close_session_out_t
+ * mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_close_session_out_reply_t *rmp;
+ ip4_address_t out_addr, ext_addr, in_addr;
+ snat_det_out_key_t key;
+ snat_det_map_t *dm;
+ snat_det_session_t *ses;
+ int rv = 0;
+
+ clib_memcpy (&out_addr, mp->out_addr, 4);
+ clib_memcpy (&ext_addr, mp->ext_addr, 4);
+
+ dm = snat_det_map_by_out (sm, &out_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_reverse (dm, &ext_addr, ntohs (mp->out_port), &in_addr);
+ key.ext_host_addr = ext_addr;
+ key.ext_host_port = mp->ext_port;
+ key.out_port = mp->out_port;
+ ses = snat_det_get_ses_by_out (dm, &in_addr, key.as_u64);
+ if (!ses)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_ses_close (dm, ses);
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT_DET_CLOSE_SESSION_OUT_REPLY);
+}
+
+static void *
+vl_api_nat_det_close_session_out_t_print (vl_api_nat_det_close_session_out_t *
+ mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_close_session_out ");
+ s = format (s, "out_addr %U out_port %d "
+ "ext_addr %U ext_port %d\n",
+ format_ip4_address, mp->out_addr, ntohs (mp->out_port),
+ format_ip4_address, mp->ext_addr, ntohs (mp->ext_port));
+
+ FINISH;
+}
+
+static void
+vl_api_nat_det_close_session_in_t_handler (vl_api_nat_det_close_session_in_t *
+ mp)
+{
+ snat_main_t *sm = &snat_main;
+ vl_api_nat_det_close_session_in_reply_t *rmp;
+ ip4_address_t in_addr, ext_addr;
+ snat_det_out_key_t key;
+ snat_det_map_t *dm;
+ snat_det_session_t *ses;
+ int rv = 0;
+
+ if (!mp->is_nat44)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ clib_memcpy (&in_addr, mp->in_addr, 4);
+ clib_memcpy (&ext_addr, mp->ext_addr, 4);
+
+ dm = snat_det_map_by_user (sm, &in_addr);
+ if (!dm)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ key.ext_host_addr = ext_addr;
+ key.ext_host_port = mp->ext_port;
+ ses = snat_det_find_ses_by_in (dm, &in_addr, mp->in_port, key);
+ if (!ses)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto send_reply;
+ }
+ snat_det_ses_close (dm, ses);
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT_DET_CLOSE_SESSION_OUT_REPLY);
+}
+
+static void *
+vl_api_nat_det_close_session_in_t_print (vl_api_nat_det_close_session_in_t *
+ mp, void *handle)
+{
+ u8 *s;
+ s = format (0, "SCRIPT: nat_det_close_session_in ");
+ s = format (s, "in_addr %U in_port %d ext_addr %U ext_port %d\n",
+ format_ip4_address, mp->in_addr, ntohs (mp->in_port),
+ format_ip4_address, mp->ext_addr, ntohs (mp->ext_port));
+
+ FINISH;
+}
+
+static void
+send_nat_det_session_details (snat_det_session_t * s,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_nat_det_session_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT_DET_SESSION_DETAILS + sm->msg_id_base);
+ rmp->in_port = s->in_port;
+ clib_memcpy (rmp->ext_addr, &s->out.ext_host_addr, 4);
+ rmp->ext_port = s->out.ext_host_port;
+ rmp->out_port = s->out.out_port;
+ rmp->state = s->state;
+ rmp->expire = ntohl (s->expire);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_nat_det_session_dump_t_handler (vl_api_nat_det_session_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t *sm = &snat_main;
+ ip4_address_t user_addr;
+ snat_det_map_t *dm;
+ snat_det_session_t *s, empty_ses;
+ u16 i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+ if (!mp->is_nat44)
+ return;
+
+ memset (&empty_ses, 0, sizeof (empty_ses));
+ clib_memcpy (&user_addr, mp->user_addr, 4);
+ dm = snat_det_map_by_user (sm, &user_addr);
+ if (!dm)
+ return;
+
+ s = dm->sessions + snat_det_user_ses_offset (&user_addr, dm->in_plen);
+ for (i = 0; i < SNAT_DET_SES_PER_USER; i++)
+ {
+ if (s->out.as_u64)
+ send_nat_det_session_details (s, q, mp->context);
+ s++;
+ }
+}
+
+static void *
+vl_api_nat_det_session_dump_t_print (vl_api_nat_det_session_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat_det_session_dump ");
+ s = format (s, "user_addr %U\n", format_ip4_address, mp->user_addr);
+
+ FINISH;
+}
+
+/*************/
+/*** NAT64 ***/
+/*************/
+
+static void
+ vl_api_nat64_add_del_pool_addr_range_t_handler
+ (vl_api_nat64_add_del_pool_addr_range_t * mp)
+{
+ vl_api_nat64_add_del_pool_addr_range_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ int rv = 0;
+ ip4_address_t this_addr;
+ u32 start_host_order, end_host_order;
+ u32 vrf_id;
+ int i, count;
+ u32 *tmp;
+
+ if (nm->is_disabled)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ tmp = (u32 *) mp->start_addr;
+ start_host_order = clib_host_to_net_u32 (tmp[0]);
+ tmp = (u32 *) mp->end_addr;
+ end_host_order = clib_host_to_net_u32 (tmp[0]);
+
+ count = (end_host_order - start_host_order) + 1;
+
+ vrf_id = clib_host_to_net_u32 (mp->vrf_id);
+
+ memcpy (&this_addr.as_u8, mp->start_addr, 4);
+
+ for (i = 0; i < count; i++)
+ {
+ if ((rv = nat64_add_del_pool_addr (&this_addr, vrf_id, mp->is_add)))
+ goto send_reply;
+
+ increment_v4_address (&this_addr);
+ }
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT64_ADD_DEL_POOL_ADDR_RANGE_REPLY);
+}
+
+static void *vl_api_nat64_add_del_pool_addr_range_t_print
+ (vl_api_nat64_add_del_pool_addr_range_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_add_del_pool_addr_range ");
+ s = format (s, "%U - %U vrf_id %u %s\n",
+ format_ip4_address, mp->start_addr,
+ format_ip4_address, mp->end_addr,
+ ntohl (mp->vrf_id), mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+typedef struct nat64_api_walk_ctx_t_
+{
+ unix_shared_memory_queue_t *q;
+ u32 context;
+} nat64_api_walk_ctx_t;
+
+static int
+nat64_api_pool_walk (snat_address_t * a, void *arg)
+{
+ vl_api_nat64_pool_addr_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_api_walk_ctx_t *ctx = arg;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT64_POOL_ADDR_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->address, &(a->addr), 4);
+ if (a->fib_index != ~0)
+ {
+ fib_table_t *fib = fib_table_get (a->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+ }
+ else
+ rmp->vrf_id = ~0;
+ rmp->context = ctx->context;
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+ return 0;
+}
+
+static void
+vl_api_nat64_pool_addr_dump_t_handler (vl_api_nat64_pool_addr_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ nat64_api_walk_ctx_t ctx = {
+ .q = q,
+ .context = mp->context,
+ };
+
+ nat64_pool_addr_walk (nat64_api_pool_walk, &ctx);
+}
+
+static void *
+vl_api_nat64_pool_addr_dump_t_print (vl_api_nat64_pool_addr_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_pool_addr_dump\n");
+
+ FINISH;
+}
+
+static void
+vl_api_nat64_add_del_interface_t_handler (vl_api_nat64_add_del_interface_t *
+ mp)
+{
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ vl_api_nat64_add_del_interface_reply_t *rmp;
+ int rv = 0;
+
+ if (nm->is_disabled)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv =
+ nat64_add_del_interface (ntohl (mp->sw_if_index), mp->is_inside,
+ mp->is_add);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT64_ADD_DEL_INTERFACE_REPLY);
+}
+
+static void *
+vl_api_nat64_add_del_interface_t_print (vl_api_nat64_add_del_interface_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_add_del_interface ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32 (mp->sw_if_index),
+ mp->is_inside ? "in" : "out", mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static int
+nat64_api_interface_walk (snat_interface_t * i, void *arg)
+{
+ vl_api_nat64_interface_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_api_walk_ctx_t *ctx = arg;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT64_INTERFACE_DETAILS + sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->is_inside = i->is_inside;
+ rmp->context = ctx->context;
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+ return 0;
+}
+
+static void
+vl_api_nat64_interface_dump_t_handler (vl_api_nat64_interface_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ nat64_api_walk_ctx_t ctx = {
+ .q = q,
+ .context = mp->context,
+ };
+
+ nat64_interfaces_walk (nat64_api_interface_walk, &ctx);
+}
+
+static void *
+vl_api_nat64_interface_dump_t_print (vl_api_nat64_interface_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_dump ");
+
+ FINISH;
+}
+
+static void
+ vl_api_nat64_add_del_static_bib_t_handler
+ (vl_api_nat64_add_del_static_bib_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ vl_api_nat64_add_del_static_bib_reply_t *rmp;
+ ip6_address_t in_addr;
+ ip4_address_t out_addr;
+ int rv = 0;
+
+ if (nm->is_disabled)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ memcpy (&in_addr.as_u8, mp->i_addr, 16);
+ memcpy (&out_addr.as_u8, mp->o_addr, 4);
+
+ rv =
+ nat64_add_del_static_bib_entry (&in_addr, &out_addr,
+ clib_net_to_host_u16 (mp->i_port),
+ clib_net_to_host_u16 (mp->o_port),
+ mp->proto,
+ clib_net_to_host_u32 (mp->vrf_id),
+ mp->is_add);
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT64_ADD_DEL_STATIC_BIB_REPLY);
+}
+
+static void *vl_api_nat64_add_del_static_bib_t_print
+ (vl_api_nat64_add_del_static_bib_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_add_del_static_bib ");
+ s = format (s, "protocol %d i_addr %U o_addr %U ",
+ mp->proto,
+ format_ip6_address, mp->i_addr, format_ip4_address, mp->o_addr);
+
+ if (mp->vrf_id != ~0)
+ s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id));
+
+ FINISH;
+}
+
+static int
+nat64_api_bib_walk (nat64_db_bib_entry_t * bibe, void *arg)
+{
+ vl_api_nat64_bib_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_api_walk_ctx_t *ctx = arg;
+ fib_table_t *fib;
+
+ fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT64_BIB_DETAILS + sm->msg_id_base);
+ rmp->context = ctx->context;
+ clib_memcpy (rmp->i_addr, &(bibe->in_addr), 16);
+ clib_memcpy (rmp->o_addr, &(bibe->out_addr), 4);
+ rmp->i_port = bibe->in_port;
+ rmp->o_port = bibe->out_port;
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+ rmp->proto = bibe->proto;
+ rmp->is_static = bibe->is_static;
+ rmp->ses_num = ntohl (bibe->ses_num);
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+ return 0;
+}
+
+static void
+vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ nat64_api_walk_ctx_t ctx = {
+ .q = q,
+ .context = mp->context,
+ };
+
+ nat64_db_bib_walk (&nm->db, mp->proto, nat64_api_bib_walk, &ctx);
+}
+
+static void *
+vl_api_nat64_bib_dump_t_print (vl_api_nat64_bib_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_bib_dump protocol %d", mp->proto);
+
+ FINISH;
+}
+
+static void
+vl_api_nat64_set_timeouts_t_handler (vl_api_nat64_set_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ vl_api_nat64_set_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ if (nm->is_disabled)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ rv = nat64_set_icmp_timeout (ntohl (mp->icmp));
+ if (rv)
+ goto send_reply;
+ rv = nat64_set_udp_timeout (ntohl (mp->udp));
+ if (rv)
+ goto send_reply;
+ rv =
+ nat64_set_tcp_timeouts (ntohl (mp->tcp_trans), ntohl (mp->tcp_est),
+ ntohl (mp->tcp_incoming_syn));
+
+send_reply:
+ REPLY_MACRO (VL_API_NAT64_SET_TIMEOUTS_REPLY);
+}
+
+static void *vl_api_nat64_set_timeouts_t_print
+ (vl_api_nat64_set_timeouts_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_set_timeouts ");
+ s =
+ format (s,
+ "udp %d icmp %d, tcp_trans %d, tcp_est %d, tcp_incoming_syn %d\n",
+ ntohl (mp->udp), ntohl (mp->icmp), ntohl (mp->tcp_trans),
+ ntohl (mp->tcp_est), ntohl (mp->tcp_incoming_syn));
+
+ FINISH;
+}
+
+static void
+vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp)
+{
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ vl_api_nat64_get_timeouts_reply_t *rmp;
+ int rv = 0;
+
+ if (nm->is_disabled)
+ return;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_NAT64_GET_TIMEOUTS_REPLY,
+ ({
+ rmp->udp = htonl (nat64_get_udp_timeout());
+ rmp->icmp = htonl (nat64_get_icmp_timeout());
+ rmp->tcp_trans = htonl (nat64_get_tcp_trans_timeout());
+ rmp->tcp_est = htonl (nat64_get_tcp_est_timeout());
+ rmp->tcp_incoming_syn = htonl (nat64_get_tcp_incoming_syn_timeout());
+ }))
+ /* *INDENT-ON* */
+}
+
+static void *vl_api_nat64_get_timeouts_t_print
+ (vl_api_nat64_get_timeouts_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_get_timeouts");
+
+ FINISH;
+}
+
+static int
+nat64_api_st_walk (nat64_db_st_entry_t * ste, void *arg)
+{
+ vl_api_nat64_st_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_api_walk_ctx_t *ctx = arg;
+ nat64_main_t *nm = &nat64_main;
+ nat64_db_bib_entry_t *bibe;
+ fib_table_t *fib;
+
+ bibe = nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index);
+ if (!bibe)
+ return -1;
+
+ fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6);
+ if (!fib)
+ return -1;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT64_ST_DETAILS + sm->msg_id_base);
+ rmp->context = ctx->context;
+ clib_memcpy (rmp->il_addr, &(bibe->in_addr), 16);
+ clib_memcpy (rmp->ol_addr, &(bibe->out_addr), 4);
+ rmp->il_port = bibe->in_port;
+ rmp->ol_port = bibe->out_port;
+ clib_memcpy (rmp->ir_addr, &(ste->in_r_addr), 16);
+ clib_memcpy (rmp->or_addr, &(ste->out_r_addr), 4);
+ rmp->il_port = ste->r_port;
+ rmp->vrf_id = ntohl (fib->ft_table_id);
+ rmp->proto = ste->proto;
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+ return 0;
+}
+
+static void
+vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ nat64_api_walk_ctx_t ctx = {
+ .q = q,
+ .context = mp->context,
+ };
+
+ nat64_db_st_walk (&nm->db, mp->proto, nat64_api_st_walk, &ctx);
+}
+
+static void *
+vl_api_nat64_st_dump_t_print (vl_api_nat64_st_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_st_dump protocol %d", mp->proto);
+
+ FINISH;
+}
+
+static void
+vl_api_nat64_add_del_prefix_t_handler (vl_api_nat64_add_del_prefix_t * mp)
+{
+ vl_api_nat64_add_del_prefix_reply_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_main_t *nm = &nat64_main;
+ ip6_address_t prefix;
+ int rv = 0;
+
+ if (nm->is_disabled)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ memcpy (&prefix.as_u8, mp->prefix, 16);
+
+ rv =
+ nat64_add_del_prefix (&prefix, mp->prefix_len,
+ clib_net_to_host_u32 (mp->vrf_id), mp->is_add);
+send_reply:
+ REPLY_MACRO (VL_API_NAT64_ADD_DEL_PREFIX_REPLY);
+}
+
+static void *
+vl_api_nat64_add_del_prefix_t_print (vl_api_nat64_add_del_prefix_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_add_del_prefix %U/%u vrf_id %u %s\n",
+ format_ip6_address, mp->prefix, mp->prefix_len,
+ ntohl (mp->vrf_id), mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static int
+nat64_api_prefix_walk (nat64_prefix_t * p, void *arg)
+{
+ vl_api_nat64_prefix_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ nat64_api_walk_ctx_t *ctx = arg;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_NAT64_PREFIX_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->prefix, &(p->prefix), 16);
+ rmp->prefix_len = p->plen;
+ rmp->vrf_id = ntohl (p->vrf_id);
+ rmp->context = ctx->context;
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+ return 0;
+}
+
+static void
+vl_api_nat64_prefix_dump_t_handler (vl_api_nat64_prefix_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ nat64_main_t *nm = &nat64_main;
+
+ if (nm->is_disabled)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ nat64_api_walk_ctx_t ctx = {
+ .q = q,
+ .context = mp->context,
+ };
+
+ nat64_prefix_walk (nat64_api_prefix_walk, &ctx);
+}
+
+static void *
+vl_api_nat64_prefix_dump_t_print (vl_api_nat64_prefix_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: nat64_prefix_dump\n");
+
+ FINISH;
+}
+
+/* List of message types that this plugin understands */
+#define foreach_snat_plugin_api_msg \
+_(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \
+_(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature) \
+_(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping) \
+_(SNAT_CONTROL_PING, snat_control_ping) \
+_(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump) \
+_(SNAT_SHOW_CONFIG, snat_show_config) \
+_(SNAT_ADDRESS_DUMP, snat_address_dump) \
+_(SNAT_INTERFACE_DUMP, snat_interface_dump) \
+_(SNAT_SET_WORKERS, snat_set_workers) \
+_(SNAT_WORKER_DUMP, snat_worker_dump) \
+_(SNAT_ADD_DEL_INTERFACE_ADDR, snat_add_del_interface_addr) \
+_(SNAT_INTERFACE_ADDR_DUMP, snat_interface_addr_dump) \
+_(SNAT_IPFIX_ENABLE_DISABLE, snat_ipfix_enable_disable) \
+_(SNAT_USER_DUMP, snat_user_dump) \
+_(SNAT_USER_SESSION_DUMP, snat_user_session_dump) \
+_(SNAT_INTERFACE_ADD_DEL_OUTPUT_FEATURE, \
+ snat_interface_add_del_output_feature) \
+_(SNAT_INTERFACE_OUTPUT_FEATURE_DUMP, \
+ snat_interface_output_feature_dump) \
+_(SNAT_ADD_DET_MAP, snat_add_det_map) \
+_(SNAT_DET_FORWARD, snat_det_forward) \
+_(SNAT_DET_REVERSE, snat_det_reverse) \
+_(SNAT_DET_MAP_DUMP, snat_det_map_dump) \
+_(SNAT_DET_SET_TIMEOUTS, snat_det_set_timeouts) \
+_(SNAT_DET_GET_TIMEOUTS, snat_det_get_timeouts) \
+_(SNAT_DET_CLOSE_SESSION_OUT, snat_det_close_session_out) \
+_(SNAT_DET_CLOSE_SESSION_IN, snat_det_close_session_in) \
+_(SNAT_DET_SESSION_DUMP, snat_det_session_dump) \
+_(NAT_CONTROL_PING, nat_control_ping) \
+_(NAT_SHOW_CONFIG, nat_show_config) \
+_(NAT_SET_WORKERS, nat_set_workers) \
+_(NAT_WORKER_DUMP, nat_worker_dump) \
+_(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable) \
+_(NAT44_ADD_DEL_ADDRESS_RANGE, nat44_add_del_address_range) \
+_(NAT44_INTERFACE_ADD_DEL_FEATURE, nat44_interface_add_del_feature) \
+_(NAT44_ADD_DEL_STATIC_MAPPING, nat44_add_del_static_mapping) \
+_(NAT44_STATIC_MAPPING_DUMP, nat44_static_mapping_dump) \
+_(NAT44_ADDRESS_DUMP, nat44_address_dump) \
+_(NAT44_INTERFACE_DUMP, nat44_interface_dump) \
+_(NAT44_ADD_DEL_INTERFACE_ADDR, nat44_add_del_interface_addr) \
+_(NAT44_INTERFACE_ADDR_DUMP, nat44_interface_addr_dump) \
+_(NAT44_USER_DUMP, nat44_user_dump) \
+_(NAT44_USER_SESSION_DUMP, nat44_user_session_dump) \
+_(NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE, \
+ nat44_interface_add_del_output_feature) \
+_(NAT44_INTERFACE_OUTPUT_FEATURE_DUMP, \
+ nat44_interface_output_feature_dump) \
+_(NAT44_ADD_DEL_LB_STATIC_MAPPING, nat44_add_del_lb_static_mapping) \
+_(NAT44_LB_STATIC_MAPPING_DUMP, nat44_lb_static_mapping_dump) \
+_(NAT_DET_ADD_DEL_MAP, nat_det_add_del_map) \
+_(NAT_DET_FORWARD, nat_det_forward) \
+_(NAT_DET_REVERSE, nat_det_reverse) \
+_(NAT_DET_MAP_DUMP, nat_det_map_dump) \
+_(NAT_DET_SET_TIMEOUTS, nat_det_set_timeouts) \
+_(NAT_DET_GET_TIMEOUTS, nat_det_get_timeouts) \
+_(NAT_DET_CLOSE_SESSION_OUT, nat_det_close_session_out) \
+_(NAT_DET_CLOSE_SESSION_IN, nat_det_close_session_in) \
+_(NAT_DET_SESSION_DUMP, nat_det_session_dump) \
+_(NAT64_ADD_DEL_POOL_ADDR_RANGE, nat64_add_del_pool_addr_range) \
+_(NAT64_POOL_ADDR_DUMP, nat64_pool_addr_dump) \
+_(NAT64_ADD_DEL_INTERFACE, nat64_add_del_interface) \
+_(NAT64_INTERFACE_DUMP, nat64_interface_dump) \
+_(NAT64_ADD_DEL_STATIC_BIB, nat64_add_del_static_bib) \
+_(NAT64_BIB_DUMP, nat64_bib_dump) \
+_(NAT64_SET_TIMEOUTS, nat64_set_timeouts) \
+_(NAT64_GET_TIMEOUTS, nat64_get_timeouts) \
+_(NAT64_ST_DUMP, nat64_st_dump) \
+_(NAT64_ADD_DEL_PREFIX, nat64_add_del_prefix) \
+_(NAT64_PREFIX_DUMP, nat64_prefix_dump)
+
+/* Set up the API message handling tables */
+static clib_error_t *
+snat_plugin_api_hookup (vlib_main_t * vm)
+{
+ snat_main_t *sm __attribute__ ((unused)) = &snat_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_snat_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <nat/nat_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (snat_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_nat;
+#undef _
+}
+
+static void
+plugin_custom_dump_configure (snat_main_t * sm)
+{
+#define _(n,f) sm->api_main->msg_print_handlers \
+ [VL_API_##n + sm->msg_id_base] \
+ = (void *) vl_api_##f##_t_print;
+ foreach_snat_plugin_api_msg;
+#undef _
+}
+
+clib_error_t *
+snat_api_init (vlib_main_t * vm, snat_main_t * sm)
+{
+ u8 *name;
+ clib_error_t *error = 0;
+
+ name = format (0, "snat_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base =
+ vl_msg_api_get_msg_ids ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = snat_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, sm->api_main);
+
+ plugin_custom_dump_configure (sm);
+
+ vec_free (name);
+
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_det.c b/src/plugins/nat/nat_det.c
new file mode 100644
index 00000000..3af6698c
--- /dev/null
+++ b/src/plugins/nat/nat_det.c
@@ -0,0 +1,158 @@
+/*
+ * snat_det.c - deterministic NAT
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief deterministic NAT
+ */
+
+#include <nat/nat_det.h>
+
+
+/**
+ * @brief Add/delete deterministic NAT mapping.
+ *
+ * Create bijective mapping of inside address to outside address and port range
+ * pairs, with the purpose of enabling deterministic NAT to reduce logging in
+ * CGN deployments.
+ *
+ * @param sm SNAT main.
+ * @param in_addr Inside network address.
+ * @param in_plen Inside network prefix length.
+ * @param out_addr Outside network address.
+ * @param out_plen Outside network prefix length.
+ * @param is_add If 0 delete, otherwise add.
+ */
+int
+snat_det_add_map (snat_main_t * sm, ip4_address_t * in_addr, u8 in_plen,
+ ip4_address_t * out_addr, u8 out_plen, int is_add)
+{
+ snat_det_map_t *det_map;
+ static snat_det_session_t empty_snat_det_session = { 0 };
+ snat_interface_t *i;
+ ip4_address_t in_cmp, out_cmp;
+ u8 found = 0;
+
+ in_cmp.as_u32 = in_addr->as_u32 & ip4_main.fib_masks[in_plen];
+ out_cmp.as_u32 = out_addr->as_u32 & ip4_main.fib_masks[out_plen];
+ vec_foreach (det_map, sm->det_maps)
+ {
+ /* Checking for overlapping addresses to be added here */
+ if (det_map->in_addr.as_u32 == in_cmp.as_u32 &&
+ det_map->in_plen == in_plen &&
+ det_map->out_addr.as_u32 == out_cmp.as_u32 &&
+ det_map->out_plen == out_plen)
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ /* If found, don't add again */
+ if (found && is_add)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ /* If not found, don't delete */
+ if (!found && !is_add)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (is_add)
+ {
+ pool_get (sm->det_maps, det_map);
+ memset (det_map, 0, sizeof (*det_map));
+ det_map->in_addr.as_u32 = in_cmp.as_u32;
+ det_map->in_plen = in_plen;
+ det_map->out_addr.as_u32 = out_cmp.as_u32;
+ det_map->out_plen = out_plen;
+ det_map->sharing_ratio = (1 << (32 - in_plen)) / (1 << (32 - out_plen));
+ det_map->ports_per_host = (65535 - 1023) / det_map->sharing_ratio;
+
+ vec_validate_init_empty (det_map->sessions,
+ SNAT_DET_SES_PER_USER * (1 << (32 - in_plen)) -
+ 1, empty_snat_det_session);
+ }
+ else
+ {
+ vec_free (det_map->sessions);
+ vec_del1 (sm->det_maps, det_map - sm->det_maps);
+ }
+
+ /* Add/del external address range to FIB */
+ /* *INDENT-OFF* */
+ pool_foreach (i, sm->interfaces,
+ ({
+ if (i->is_inside)
+ continue;
+
+ snat_add_del_addr_to_fib(out_addr, out_plen, i->sw_if_index, is_add);
+ break;
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/**
+ * @brief The 'nat-det-expire-walk' process's main loop.
+ *
+ * Check expire time for active sessions.
+ */
+static uword
+snat_det_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ snat_main_t *sm = &snat_main;
+ snat_det_map_t *dm;
+ snat_det_session_t *ses;
+
+ while (sm->deterministic)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 10.0);
+ vlib_process_get_events (vm, NULL);
+ u32 now = (u32) vlib_time_now (vm);
+ /* *INDENT-OFF* */
+ pool_foreach (dm, sm->det_maps,
+ ({
+ vec_foreach(ses, dm->sessions)
+ {
+ /* Delete if session expired */
+ if (ses->in_port && (ses->expire < now))
+ snat_det_ses_close (dm, ses);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+static vlib_node_registration_t snat_det_expire_walk_node;
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (snat_det_expire_walk_node, static) = {
+ .function = snat_det_expire_walk_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name =
+ "nat-det-expire-walk",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_det.h b/src/plugins/nat/nat_det.h
new file mode 100644
index 00000000..2ab7f27e
--- /dev/null
+++ b/src/plugins/nat/nat_det.h
@@ -0,0 +1,196 @@
+/*
+ * snat_det.h - deterministic NAT definitions
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief deterministic NAT definitions
+ */
+
+#ifndef __included_nat_det_h__
+#define __included_nat_det_h__
+
+#include <vnet/ip/ip.h>
+#include <nat/nat.h>
+#include <nat/nat_ipfix_logging.h>
+
+
+#define SNAT_DET_SES_PER_USER 1000
+
+
+int snat_det_add_map (snat_main_t * sm, ip4_address_t * in_addr, u8 in_plen,
+ ip4_address_t * out_addr, u8 out_plen, int is_add);
+
+always_inline int
+is_addr_in_net (ip4_address_t * addr, ip4_address_t * net, u8 plen)
+{
+ if (net->as_u32 == (addr->as_u32 & ip4_main.fib_masks[plen]))
+ return 1;
+ return 0;
+}
+
+always_inline snat_det_map_t *
+snat_det_map_by_user (snat_main_t * sm, ip4_address_t * user_addr)
+{
+ snat_det_map_t *dm;
+
+ /* *INDENT-OFF* */
+ pool_foreach (dm, sm->det_maps,
+ ({
+ if (is_addr_in_net(user_addr, &dm->in_addr, dm->in_plen))
+ return dm;
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+always_inline snat_det_map_t *
+snat_det_map_by_out (snat_main_t * sm, ip4_address_t * out_addr)
+{
+ snat_det_map_t *dm;
+
+ /* *INDENT-OFF* */
+ pool_foreach (dm, sm->det_maps,
+ ({
+ if (is_addr_in_net(out_addr, &dm->out_addr, dm->out_plen))
+ return dm;
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+always_inline void
+snat_det_forward (snat_det_map_t * dm, ip4_address_t * in_addr,
+ ip4_address_t * out_addr, u16 * lo_port)
+{
+ u32 in_offset, out_offset;
+
+ in_offset = clib_net_to_host_u32 (in_addr->as_u32) -
+ clib_net_to_host_u32 (dm->in_addr.as_u32);
+ out_offset = in_offset / dm->sharing_ratio;
+ out_addr->as_u32 =
+ clib_host_to_net_u32 (clib_net_to_host_u32 (dm->out_addr.as_u32) +
+ out_offset);
+ *lo_port = 1024 + dm->ports_per_host * (in_offset % dm->sharing_ratio);
+}
+
+always_inline void
+snat_det_reverse (snat_det_map_t * dm, ip4_address_t * out_addr, u16 out_port,
+ ip4_address_t * in_addr)
+{
+ u32 in_offset1, in_offset2, out_offset;
+
+ out_offset = clib_net_to_host_u32 (out_addr->as_u32) -
+ clib_net_to_host_u32 (dm->out_addr.as_u32);
+ in_offset1 = out_offset * dm->sharing_ratio;
+ in_offset2 = (out_port - 1024) / dm->ports_per_host;
+ in_addr->as_u32 =
+ clib_host_to_net_u32 (clib_net_to_host_u32 (dm->in_addr.as_u32) +
+ in_offset1 + in_offset2);
+}
+
+always_inline u32
+snat_det_user_ses_offset (ip4_address_t * addr, u8 plen)
+{
+ return (clib_net_to_host_u32 (addr->as_u32) & pow2_mask (32 - plen)) *
+ SNAT_DET_SES_PER_USER;
+}
+
+always_inline snat_det_session_t *
+snat_det_get_ses_by_out (snat_det_map_t * dm, ip4_address_t * in_addr,
+ u64 out_key)
+{
+ u32 user_offset;
+ u16 i;
+
+ user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen);
+ for (i = 0; i < SNAT_DET_SES_PER_USER; i++)
+ {
+ if (dm->sessions[i + user_offset].out.as_u64 == out_key)
+ return &dm->sessions[i + user_offset];
+ }
+
+ return 0;
+}
+
+always_inline snat_det_session_t *
+snat_det_find_ses_by_in (snat_det_map_t * dm, ip4_address_t * in_addr,
+ u16 in_port, snat_det_out_key_t out_key)
+{
+ snat_det_session_t *ses;
+ u32 user_offset;
+ u16 i;
+
+ user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen);
+ for (i = 0; i < SNAT_DET_SES_PER_USER; i++)
+ {
+ ses = &dm->sessions[i + user_offset];
+ if (ses->in_port == in_port &&
+ ses->out.ext_host_addr.as_u32 == out_key.ext_host_addr.as_u32 &&
+ ses->out.ext_host_port == out_key.ext_host_port)
+ return &dm->sessions[i + user_offset];
+ }
+
+ return 0;
+}
+
+always_inline snat_det_session_t *
+snat_det_ses_create (snat_det_map_t * dm, ip4_address_t * in_addr,
+ u16 in_port, snat_det_out_key_t * out)
+{
+ u32 user_offset;
+ u16 i;
+
+ user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen);
+
+ for (i = 0; i < SNAT_DET_SES_PER_USER; i++)
+ {
+ if (!dm->sessions[i + user_offset].in_port)
+ {
+ if (__sync_bool_compare_and_swap
+ (&dm->sessions[i + user_offset].in_port, 0, in_port))
+ {
+ dm->sessions[i + user_offset].out.as_u64 = out->as_u64;
+ dm->sessions[i + user_offset].state = SNAT_SESSION_UNKNOWN;
+ dm->sessions[i + user_offset].expire = 0;
+ __sync_add_and_fetch (&dm->ses_num, 1);
+ return &dm->sessions[i + user_offset];
+ }
+ }
+ }
+
+ snat_ipfix_logging_max_entries_per_user (in_addr->as_u32);
+ return 0;
+}
+
+always_inline void
+snat_det_ses_close (snat_det_map_t * dm, snat_det_session_t * ses)
+{
+ if (__sync_bool_compare_and_swap (&ses->in_port, ses->in_port, 0))
+ {
+ ses->out.as_u64 = 0;
+ __sync_add_and_fetch (&dm->ses_num, -1);
+ }
+}
+
+#endif /* __included_nat_det_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_ipfix_logging.c b/src/plugins/nat/nat_ipfix_logging.c
new file mode 100644
index 00000000..18430f5a
--- /dev/null
+++ b/src/plugins/nat/nat_ipfix_logging.c
@@ -0,0 +1,848 @@
+/*
+ * nat_ipfix_logging.c - NAT Events IPFIX logging
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/flow/flow_report.h>
+#include <vlibmemory/api.h>
+#include <nat/nat.h>
+#include <nat/nat_ipfix_logging.h>
+
+snat_ipfix_logging_main_t snat_ipfix_logging_main;
+
+#define NAT44_SESSION_CREATE_LEN 26
+#define NAT_ADDRESSES_EXHAUTED_LEN 13
+#define MAX_ENTRIES_PER_USER_LEN 17
+
+#define NAT44_SESSION_CREATE_FIELD_COUNT 8
+#define NAT_ADDRESSES_EXHAUTED_FIELD_COUNT 3
+#define MAX_ENTRIES_PER_USER_FIELD_COUNT 4
+
+typedef struct
+{
+ u8 nat_event;
+ u32 src_ip;
+ u32 nat_src_ip;
+ snat_protocol_t snat_proto;
+ u16 src_port;
+ u16 nat_src_port;
+ u32 vrf_id;
+} snat_ipfix_logging_nat44_ses_args_t;
+
+typedef struct
+{
+ u32 pool_id;
+} snat_ipfix_logging_addr_exhausted_args_t;
+
+typedef struct
+{
+ u32 src_ip;
+} snat_ipfix_logging_max_entries_per_user_args_t;
+
+#define skip_if_disabled() \
+do { \
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; \
+ if (PREDICT_TRUE (!silm->enabled)) \
+ return; \
+} while (0)
+
+/**
+ * @brief Create an IPFIX template packet rewrite string
+ *
+ * @param frm flow report main
+ * @param fr flow report
+ * @param collector_address collector address
+ * @param src_address source address
+ * @param collector_port collector
+ * @param event NAT event ID
+ * @param quota_event NAT quota exceeded event ID
+ *
+ * @returns template packet
+ */
+static inline u8 *
+snat_template_rewrite (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port,
+ nat_event_t event, quota_exceed_event_t quota_event)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ ipfix_template_header_t *t;
+ ipfix_field_specifier_t *f;
+ ipfix_field_specifier_t *first_field;
+ u8 *rewrite = 0;
+ ip4_ipfix_template_packet_t *tp;
+ u32 field_count = 0;
+ flow_report_stream_t *stream;
+
+ stream = &frm->streams[fr->stream_index];
+ silm->stream_index = fr->stream_index;
+
+ if (event == NAT_ADDRESSES_EXHAUTED)
+ {
+ field_count = NAT_ADDRESSES_EXHAUTED_FIELD_COUNT;
+ silm->addr_exhausted_template_id = fr->template_id;
+ }
+ else if (event == NAT44_SESSION_CREATE)
+ {
+ field_count = NAT44_SESSION_CREATE_FIELD_COUNT;
+ silm->nat44_session_template_id = fr->template_id;
+ }
+ else if (event == QUOTA_EXCEEDED)
+ {
+ if (quota_event == MAX_ENTRIES_PER_USER)
+ {
+ field_count = MAX_ENTRIES_PER_USER_FIELD_COUNT;
+ silm->max_entries_per_user_template_id = fr->template_id;
+ }
+ }
+
+ /* allocate rewrite space */
+ vec_validate_aligned (rewrite,
+ sizeof (ip4_ipfix_template_packet_t)
+ + field_count * sizeof (ipfix_field_specifier_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_template_packet_t *) rewrite;
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+ t = (ipfix_template_header_t *) (s + 1);
+ first_field = f = (ipfix_field_specifier_t *) (t + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (collector_port);
+ udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
+
+ /* FIXUP: message header export_time */
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /* Add TLVs to the template */
+ if (event == NAT_ADDRESSES_EXHAUTED)
+ {
+ f->e_id_length = ipfix_e_id_length (0, observationTimeMilliseconds, 8);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, natEvent, 1);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, natPoolId, 4);
+ f++;
+ }
+ else if (event == NAT44_SESSION_CREATE)
+ {
+ f->e_id_length = ipfix_e_id_length (0, observationTimeMilliseconds, 8);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, natEvent, 1);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, sourceIPv4Address, 4);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, postNATSourceIPv4Address, 4);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, protocolIdentifier, 1);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, sourceTransportPort, 2);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, postNAPTSourceTransportPort, 2);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, ingressVRFID, 4);
+ f++;
+ }
+ else if (event == QUOTA_EXCEEDED)
+ {
+ if (quota_event == MAX_ENTRIES_PER_USER)
+ {
+ f->e_id_length = ipfix_e_id_length (0, observationTimeMilliseconds,
+ 8);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, natEvent, 1);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, natQuotaExceededEvent, 4);
+ f++;
+ f->e_id_length = ipfix_e_id_length (0, sourceIPv4Address, 4);
+ f++;
+ }
+ }
+
+ /* Back to the template packet... */
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+
+ ASSERT (f - first_field);
+ /* Field count in this template */
+ t->id_count = ipfix_id_count (fr->template_id, f - first_field);
+
+ /* set length in octets */
+ s->set_id_length =
+ ipfix_set_id_length (2 /* set_id */ , (u8 *) f - (u8 *) s);
+
+ /* message length in octets */
+ h->version_length = version_length ((u8 *) f - (u8 *) h);
+
+ ip->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip);
+ ip->checksum = ip4_header_checksum (ip);
+
+ return rewrite;
+}
+
+u8 *
+snat_template_rewrite_addr_exhausted (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return snat_template_rewrite (frm, fr, collector_address, src_address,
+ collector_port, NAT_ADDRESSES_EXHAUTED, 0);
+}
+
+u8 *
+snat_template_rewrite_nat44_session (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return snat_template_rewrite (frm, fr, collector_address, src_address,
+ collector_port, NAT44_SESSION_CREATE, 0);
+}
+
+u8 *
+snat_template_rewrite_max_entries_per_usr (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ return snat_template_rewrite (frm, fr, collector_address, src_address,
+ collector_port, QUOTA_EXCEEDED,
+ MAX_ENTRIES_PER_USER);
+}
+
+static inline void
+snat_ipfix_header_create (flow_report_main_t * frm,
+ vlib_buffer_t * b0, u32 * offset)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ flow_report_stream_t *stream;
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h = 0;
+ ipfix_set_header_t *s = 0;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+
+ stream = &frm->streams[silm->stream_index];
+
+ b0->current_data = 0;
+ b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
+ sizeof (*s);
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->flags_and_fragment_offset = 0;
+ ip->src_address.as_u32 = frm->src_address.as_u32;
+ ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->checksum = 0;
+
+ h->export_time = clib_host_to_net_u32 ((u32)
+ (((f64) frm->unix_time_0) +
+ (vlib_time_now (frm->vlib_main) -
+ frm->vlib_time_0)));
+ h->sequence_number = clib_host_to_net_u32 (stream->sequence_number++);
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ *offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
+}
+
+static inline void
+snat_ipfix_send (flow_report_main_t * frm,
+ vlib_frame_t * f, vlib_buffer_t * b0, u16 template_id)
+{
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h = 0;
+ ipfix_set_header_t *s = 0;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ vlib_main_t *vm = frm->vlib_main;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ s->set_id_length = ipfix_set_id_length (template_id,
+ b0->current_length -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length = version_length (b0->current_length -
+ (sizeof (*ip) + sizeof (*udp)));
+
+ ip->length = clib_host_to_net_u16 (b0->current_length);
+ ip->checksum = ip4_header_checksum (ip);
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+}
+
+static void
+snat_ipfix_logging_nat44_ses (u8 nat_event, u32 src_ip, u32 nat_src_ip,
+ snat_protocol_t snat_proto, u16 src_port,
+ u16 nat_src_port, u32 vrf_id, int do_flush)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ vlib_buffer_t *b0 = 0;
+ u32 bi0 = ~0;
+ u32 offset;
+ vlib_main_t *vm = frm->vlib_main;
+ u64 now;
+ vlib_buffer_free_list_t *fl;
+ u8 proto = ~0;
+
+ if (!silm->enabled)
+ return;
+
+ proto = snat_proto_to_ip_proto (snat_proto);
+
+ now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
+ now += silm->milisecond_time_0;
+
+ b0 = silm->nat44_session_buffer;
+
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (do_flush)
+ return;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ clib_warning ("can't allocate buffer for NAT IPFIX event");
+ return;
+ }
+
+ b0 = silm->nat44_session_buffer = vlib_get_buffer (vm, bi0);
+ fl =
+ vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ offset = 0;
+ }
+ else
+ {
+ bi0 = vlib_get_buffer_index (vm, b0);
+ offset = silm->nat44_session_next_record_offset;
+ }
+
+ f = silm->nat44_session_frame;
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ silm->nat44_session_frame = f;
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ if (PREDICT_FALSE (offset == 0))
+ snat_ipfix_header_create (frm, b0, &offset);
+
+ if (PREDICT_TRUE (do_flush == 0))
+ {
+ u64 time_stamp = clib_host_to_net_u64 (now);
+ clib_memcpy (b0->data + offset, &time_stamp, sizeof (time_stamp));
+ offset += sizeof (time_stamp);
+
+ clib_memcpy (b0->data + offset, &nat_event, sizeof (nat_event));
+ offset += sizeof (nat_event);
+
+ clib_memcpy (b0->data + offset, &src_ip, sizeof (src_ip));
+ offset += sizeof (src_ip);
+
+ clib_memcpy (b0->data + offset, &nat_src_ip, sizeof (nat_src_ip));
+ offset += sizeof (nat_src_ip);
+
+ clib_memcpy (b0->data + offset, &proto, sizeof (proto));
+ offset += sizeof (proto);
+
+ clib_memcpy (b0->data + offset, &src_port, sizeof (src_port));
+ offset += sizeof (src_port);
+
+ clib_memcpy (b0->data + offset, &nat_src_port, sizeof (nat_src_port));
+ offset += sizeof (nat_src_port);
+
+ clib_memcpy (b0->data + offset, &vrf_id, sizeof (vrf_id));
+ offset += sizeof (vrf_id);
+
+ b0->current_length += NAT44_SESSION_CREATE_LEN;
+ }
+
+ if (PREDICT_FALSE
+ (do_flush || (offset + NAT44_SESSION_CREATE_LEN) > frm->path_mtu))
+ {
+ snat_ipfix_send (frm, f, b0, silm->nat44_session_template_id);
+ silm->nat44_session_frame = 0;
+ silm->nat44_session_buffer = 0;
+ offset = 0;
+ }
+ silm->nat44_session_next_record_offset = offset;
+}
+
+static void
+snat_ipfix_logging_addr_exhausted (u32 pool_id, int do_flush)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ vlib_buffer_t *b0 = 0;
+ u32 bi0 = ~0;
+ u32 offset;
+ vlib_main_t *vm = frm->vlib_main;
+ u64 now;
+ vlib_buffer_free_list_t *fl;
+ u8 nat_event = NAT_ADDRESSES_EXHAUTED;
+
+ if (!silm->enabled)
+ return;
+
+ now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
+ now += silm->milisecond_time_0;
+
+ b0 = silm->addr_exhausted_buffer;
+
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (do_flush)
+ return;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ clib_warning ("can't allocate buffer for NAT IPFIX event");
+ return;
+ }
+
+ b0 = silm->addr_exhausted_buffer = vlib_get_buffer (vm, bi0);
+ fl =
+ vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ offset = 0;
+ }
+ else
+ {
+ bi0 = vlib_get_buffer_index (vm, b0);
+ offset = silm->addr_exhausted_next_record_offset;
+ }
+
+ f = silm->addr_exhausted_frame;
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ silm->addr_exhausted_frame = f;
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ if (PREDICT_FALSE (offset == 0))
+ snat_ipfix_header_create (frm, b0, &offset);
+
+ if (PREDICT_TRUE (do_flush == 0))
+ {
+ u64 time_stamp = clib_host_to_net_u64 (now);
+ clib_memcpy (b0->data + offset, &time_stamp, sizeof (time_stamp));
+ offset += sizeof (time_stamp);
+
+ clib_memcpy (b0->data + offset, &nat_event, sizeof (nat_event));
+ offset += sizeof (nat_event);
+
+ clib_memcpy (b0->data + offset, &pool_id, sizeof (pool_id));
+ offset += sizeof (pool_id);
+
+ b0->current_length += NAT_ADDRESSES_EXHAUTED_LEN;
+ }
+
+ if (PREDICT_FALSE
+ (do_flush || (offset + NAT_ADDRESSES_EXHAUTED_LEN) > frm->path_mtu))
+ {
+ snat_ipfix_send (frm, f, b0, silm->addr_exhausted_template_id);
+ silm->addr_exhausted_frame = 0;
+ silm->addr_exhausted_buffer = 0;
+ offset = 0;
+ }
+ silm->addr_exhausted_next_record_offset = offset;
+}
+
+static void
+snat_ipfix_logging_max_entries_per_usr (u32 src_ip, int do_flush)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ vlib_buffer_t *b0 = 0;
+ u32 bi0 = ~0;
+ u32 offset;
+ vlib_main_t *vm = frm->vlib_main;
+ u64 now;
+ vlib_buffer_free_list_t *fl;
+ u8 nat_event = QUOTA_EXCEEDED;
+ u32 quota_event = MAX_ENTRIES_PER_USER;
+
+ if (!silm->enabled)
+ return;
+
+ now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
+ now += silm->milisecond_time_0;
+
+ b0 = silm->max_entries_per_user_buffer;
+
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (do_flush)
+ return;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ clib_warning ("can't allocate buffer for NAT IPFIX event");
+ return;
+ }
+
+ b0 = silm->max_entries_per_user_buffer = vlib_get_buffer (vm, bi0);
+ fl =
+ vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ offset = 0;
+ }
+ else
+ {
+ bi0 = vlib_get_buffer_index (vm, b0);
+ offset = silm->max_entries_per_user_next_record_offset;
+ }
+
+ f = silm->max_entries_per_user_frame;
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ silm->max_entries_per_user_frame = f;
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ if (PREDICT_FALSE (offset == 0))
+ snat_ipfix_header_create (frm, b0, &offset);
+
+ if (PREDICT_TRUE (do_flush == 0))
+ {
+ u64 time_stamp = clib_host_to_net_u64 (now);
+ clib_memcpy (b0->data + offset, &time_stamp, sizeof (time_stamp));
+ offset += sizeof (time_stamp);
+
+ clib_memcpy (b0->data + offset, &nat_event, sizeof (nat_event));
+ offset += sizeof (nat_event);
+
+ clib_memcpy (b0->data + offset, &quota_event, sizeof (quota_event));
+ offset += sizeof (quota_event);
+
+ clib_memcpy (b0->data + offset, &src_ip, sizeof (src_ip));
+ offset += sizeof (src_ip);
+
+ b0->current_length += MAX_ENTRIES_PER_USER_LEN;
+ }
+
+ if (PREDICT_FALSE
+ (do_flush || (offset + MAX_ENTRIES_PER_USER_LEN) > frm->path_mtu))
+ {
+ snat_ipfix_send (frm, f, b0, silm->max_entries_per_user_template_id);
+ silm->max_entries_per_user_frame = 0;
+ silm->max_entries_per_user_buffer = 0;
+ offset = 0;
+ }
+ silm->max_entries_per_user_next_record_offset = offset;
+}
+
+static void
+snat_ipfix_logging_nat44_ses_rpc_cb (snat_ipfix_logging_nat44_ses_args_t * a)
+{
+ snat_ipfix_logging_nat44_ses (a->nat_event, a->src_ip, a->nat_src_ip,
+ a->snat_proto, a->src_port, a->nat_src_port,
+ a->vrf_id, 0);
+}
+
+/**
+ * @brief Generate NAT44 session create event
+ *
+ * @param src_ip source IPv4 address
+ * @param nat_src_ip transaltes source IPv4 address
+ * @param snat_proto NAT transport protocol
+ * @param src_port source port
+ * @param nat_src_port translated source port
+ * @param vrf_id VRF ID
+ */
+void
+snat_ipfix_logging_nat44_ses_create (u32 src_ip,
+ u32 nat_src_ip,
+ snat_protocol_t snat_proto,
+ u16 src_port,
+ u16 nat_src_port, u32 vrf_id)
+{
+ snat_ipfix_logging_nat44_ses_args_t a;
+
+ skip_if_disabled ();
+
+ a.nat_event = NAT44_SESSION_CREATE;
+ a.src_ip = src_ip;
+ a.nat_src_ip = nat_src_ip;
+ a.snat_proto = snat_proto;
+ a.src_port = src_port;
+ a.nat_src_port = nat_src_port;
+ a.vrf_id = vrf_id;
+
+ vl_api_rpc_call_main_thread (snat_ipfix_logging_nat44_ses_rpc_cb,
+ (u8 *) & a, sizeof (a));
+}
+
+/**
+ * @brief Generate NAT44 session delete event
+ *
+ * @param src_ip source IPv4 address
+ * @param nat_src_ip transaltes source IPv4 address
+ * @param snat_proto NAT transport protocol
+ * @param src_port source port
+ * @param nat_src_port translated source port
+ * @param vrf_id VRF ID
+ */
+void
+snat_ipfix_logging_nat44_ses_delete (u32 src_ip,
+ u32 nat_src_ip,
+ snat_protocol_t snat_proto,
+ u16 src_port,
+ u16 nat_src_port, u32 vrf_id)
+{
+ snat_ipfix_logging_nat44_ses_args_t a;
+
+ skip_if_disabled ();
+
+ a.nat_event = NAT44_SESSION_DELETE;
+ a.src_ip = src_ip;
+ a.nat_src_ip = nat_src_ip;
+ a.snat_proto = snat_proto;
+ a.src_port = src_port;
+ a.nat_src_port = nat_src_port;
+ a.vrf_id = vrf_id;
+
+ vl_api_rpc_call_main_thread (snat_ipfix_logging_nat44_ses_rpc_cb,
+ (u8 *) & a, sizeof (a));
+}
+
+vlib_frame_t *
+snat_data_callback_nat44_session (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next, u32 node_index)
+{
+ snat_ipfix_logging_nat44_ses (0, 0, 0, 0, 0, 0, 0, 1);
+ return f;
+}
+
+static void
+ snat_ipfix_logging_addr_exhausted_rpc_cb
+ (snat_ipfix_logging_addr_exhausted_args_t * a)
+{
+ snat_ipfix_logging_addr_exhausted (a->pool_id, 0);
+}
+
+/**
+ * @brief Generate NAT addresses exhausted event
+ *
+ * @param pool_id NAT pool ID
+ */
+void
+snat_ipfix_logging_addresses_exhausted (u32 pool_id)
+{
+ //TODO: This event SHOULD be rate limited
+ snat_ipfix_logging_addr_exhausted_args_t a;
+
+ skip_if_disabled ();
+
+ a.pool_id = pool_id;
+
+ vl_api_rpc_call_main_thread (snat_ipfix_logging_addr_exhausted_rpc_cb,
+ (u8 *) & a, sizeof (a));
+}
+
+vlib_frame_t *
+snat_data_callback_addr_exhausted (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next, u32 node_index)
+{
+ snat_ipfix_logging_addr_exhausted (0, 1);
+ return f;
+}
+
+static void
+ snat_ipfix_logging_max_entries_per_usr_rpc_cb
+ (snat_ipfix_logging_max_entries_per_user_args_t * a)
+{
+ snat_ipfix_logging_max_entries_per_usr (a->src_ip, 0);
+}
+
+/**
+ * @brief Generate maximum entries per user exceeded event
+ *
+ * @param src_ip source IPv4 address
+ */
+void
+snat_ipfix_logging_max_entries_per_user (u32 src_ip)
+{
+ //TODO: This event SHOULD be rate limited
+ snat_ipfix_logging_max_entries_per_user_args_t a;
+
+ skip_if_disabled ();
+
+ a.src_ip = src_ip;
+
+ vl_api_rpc_call_main_thread (snat_ipfix_logging_max_entries_per_usr_rpc_cb,
+ (u8 *) & a, sizeof (a));
+}
+
+vlib_frame_t *
+snat_data_callback_max_entries_per_usr (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next, u32 node_index)
+{
+ snat_ipfix_logging_max_entries_per_usr (0, 1);
+ return f;
+}
+
+/**
+ * @brief Enable/disable NAT plugin IPFIX logging
+ *
+ * @param enable 1 if enable, 0 if disable
+ * @param domain_id observation domain ID
+ * @param src_port source port number
+ *
+ * @returns 0 if success
+ */
+int
+snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
+{
+ snat_main_t *sm = &snat_main;
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vnet_flow_report_add_del_args_t a;
+ int rv;
+ u8 e = enable ? 1 : 0;
+
+ if (silm->enabled == e)
+ return 0;
+
+ silm->enabled = e;
+
+ memset (&a, 0, sizeof (a));
+ a.is_add = enable;
+ a.domain_id = domain_id ? domain_id : 1;
+ a.src_port = src_port ? src_port : UDP_DST_PORT_ipfix;
+
+ if (sm->deterministic)
+ {
+ a.rewrite_callback = snat_template_rewrite_max_entries_per_usr;
+ a.flow_data_callback = snat_data_callback_max_entries_per_usr;
+
+ rv = vnet_flow_report_add_del (frm, &a, NULL);
+ if (rv)
+ {
+ clib_warning ("vnet_flow_report_add_del returned %d", rv);
+ return -1;
+ }
+ }
+ else
+ {
+ a.rewrite_callback = snat_template_rewrite_nat44_session;
+ a.flow_data_callback = snat_data_callback_nat44_session;
+
+ rv = vnet_flow_report_add_del (frm, &a, NULL);
+ if (rv)
+ {
+ clib_warning ("vnet_flow_report_add_del returned %d", rv);
+ return -1;
+ }
+
+ a.rewrite_callback = snat_template_rewrite_addr_exhausted;
+ a.flow_data_callback = snat_data_callback_addr_exhausted;
+
+ rv = vnet_flow_report_add_del (frm, &a, NULL);
+ if (rv)
+ {
+ clib_warning ("vnet_flow_report_add_del returned %d", rv);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Initialize NAT plugin IPFIX logging
+ *
+ * @param vm vlib main
+ */
+void
+snat_ipfix_logging_init (vlib_main_t * vm)
+{
+ snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main;
+
+ silm->enabled = 0;
+
+ /* Set up time reference pair */
+ silm->vlib_time_0 = vlib_time_now (vm);
+ silm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;
+}
diff --git a/src/plugins/nat/nat_ipfix_logging.h b/src/plugins/nat/nat_ipfix_logging.h
new file mode 100644
index 00000000..6dbf6627
--- /dev/null
+++ b/src/plugins/nat/nat_ipfix_logging.h
@@ -0,0 +1,79 @@
+/*
+ * nat_ipfix_logging.h - NAT Events IPFIX logging
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_nat_ipfix_logging_h__
+#define __included_nat_ipfix_logging_h__
+
+typedef enum {
+ NAT_ADDRESSES_EXHAUTED = 3,
+ NAT44_SESSION_CREATE = 4,
+ NAT44_SESSION_DELETE = 5,
+ NAT_PORTS_EXHAUSTED = 12,
+ QUOTA_EXCEEDED = 13,
+} nat_event_t;
+
+typedef enum {
+ MAX_ENTRIES_PER_USER = 3,
+} quota_exceed_event_t;
+
+typedef struct {
+ /** NAT plugin IPFIX logging enabled */
+ u8 enabled;
+
+ /** ipfix buffers under construction */
+ vlib_buffer_t *nat44_session_buffer;
+ vlib_buffer_t *addr_exhausted_buffer;
+ vlib_buffer_t *max_entries_per_user_buffer;
+
+ /** frames containing ipfix buffers */
+ vlib_frame_t *nat44_session_frame;
+ vlib_frame_t *addr_exhausted_frame;
+ vlib_frame_t *max_entries_per_user_frame;
+
+ /** next record offset */
+ u32 nat44_session_next_record_offset;
+ u32 addr_exhausted_next_record_offset;
+ u32 max_entries_per_user_next_record_offset;
+
+ /** Time reference pair */
+ u64 milisecond_time_0;
+ f64 vlib_time_0;
+
+ /** template IDs */
+ u16 nat44_session_template_id;
+ u16 addr_exhausted_template_id;
+ u16 max_entries_per_user_template_id;
+
+ /** stream index */
+ u32 stream_index;
+} snat_ipfix_logging_main_t;
+
+extern snat_ipfix_logging_main_t snat_ipfix_logging_main;
+
+void snat_ipfix_logging_init (vlib_main_t * vm);
+int snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port);
+void snat_ipfix_logging_nat44_ses_create (u32 src_ip, u32 nat_src_ip,
+ snat_protocol_t snat_proto,
+ u16 src_port, u16 nat_src_port,
+ u32 vrf_id);
+void snat_ipfix_logging_nat44_ses_delete (u32 src_ip, u32 nat_src_ip,
+ snat_protocol_t snat_proto,
+ u16 src_port, u16 nat_src_port,
+ u32 vrf_id);
+void snat_ipfix_logging_addresses_exhausted(u32 pool_id);
+void snat_ipfix_logging_max_entries_per_user(u32 src_ip);
+
+#endif /* __included_nat_ipfix_logging_h__ */
diff --git a/src/plugins/nat/nat_msg_enum.h b/src/plugins/nat/nat_msg_enum.h
new file mode 100644
index 00000000..710b631c
--- /dev/null
+++ b/src/plugins/nat/nat_msg_enum.h
@@ -0,0 +1,31 @@
+
+/*
+ * nat_msg_enum.h - skeleton vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_nat_msg_enum_h
+#define included_nat_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <nat/nat_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_nat_msg_enum_h */
diff --git a/src/plugins/nat/nat_test.c b/src/plugins/nat/nat_test.c
new file mode 100644
index 00000000..e0b04940
--- /dev/null
+++ b/src/plugins/nat/nat_test.c
@@ -0,0 +1,1167 @@
+
+/*
+ * nat.c - skeleton vpp-api-test plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <nat/nat.h>
+
+#define __plugin_msg_base snat_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <nat/nat_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <nat/nat_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <nat/nat_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <nat/nat_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <nat/nat_all_api_h.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} snat_test_main_t;
+
+snat_test_main_t snat_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(snat_add_address_range_reply) \
+_(snat_interface_add_del_feature_reply) \
+_(snat_add_static_mapping_reply) \
+_(snat_set_workers_reply) \
+_(snat_add_del_interface_addr_reply) \
+_(snat_ipfix_enable_disable_reply) \
+_(snat_add_det_map_reply) \
+_(snat_det_set_timeouts_reply) \
+_(snat_det_close_session_out_reply) \
+_(snat_det_close_session_in_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = snat_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(SNAT_ADD_ADDRESS_RANGE_REPLY, snat_add_address_range_reply) \
+_(SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY, \
+ snat_interface_add_del_feature_reply) \
+_(SNAT_ADD_STATIC_MAPPING_REPLY, snat_add_static_mapping_reply) \
+_(SNAT_CONTROL_PING_REPLY, snat_control_ping_reply) \
+_(SNAT_STATIC_MAPPING_DETAILS, snat_static_mapping_details) \
+_(SNAT_SHOW_CONFIG_REPLY, snat_show_config_reply) \
+_(SNAT_ADDRESS_DETAILS, snat_address_details) \
+_(SNAT_INTERFACE_DETAILS, snat_interface_details) \
+_(SNAT_SET_WORKERS_REPLY, snat_set_workers_reply) \
+_(SNAT_WORKER_DETAILS, snat_worker_details) \
+_(SNAT_ADD_DEL_INTERFACE_ADDR_REPLY, \
+ snat_add_del_interface_addr_reply) \
+_(SNAT_INTERFACE_ADDR_DETAILS, snat_interface_addr_details) \
+_(SNAT_IPFIX_ENABLE_DISABLE_REPLY, \
+ snat_ipfix_enable_disable_reply) \
+_(SNAT_USER_DETAILS, snat_user_details) \
+_(SNAT_USER_SESSION_DETAILS, snat_user_session_details) \
+_(SNAT_ADD_DET_MAP_REPLY, snat_add_det_map_reply) \
+_(SNAT_DET_FORWARD_REPLY, snat_det_forward_reply) \
+_(SNAT_DET_REVERSE_REPLY, snat_det_reverse_reply) \
+_(SNAT_DET_MAP_DETAILS, snat_det_map_details) \
+_(SNAT_DET_SET_TIMEOUTS_REPLY, snat_det_set_timeouts_reply) \
+_(SNAT_DET_GET_TIMEOUTS_REPLY, snat_det_get_timeouts_reply) \
+_(SNAT_DET_CLOSE_SESSION_OUT_REPLY, \
+ snat_det_close_session_out_reply) \
+_(SNAT_DET_CLOSE_SESSION_IN_REPLY, \
+ snat_det_close_session_in_reply) \
+_(SNAT_DET_SESSION_DETAILS, snat_det_session_details)
+
+static int api_snat_add_address_range (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ ip4_address_t start_addr, end_addr;
+ u32 start_host_order, end_host_order;
+ vl_api_snat_add_address_range_t * mp;
+ u8 is_add = 1;
+ int count;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U - %U",
+ unformat_ip4_address, &start_addr,
+ unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (i, "%U", unformat_ip4_address, &start_addr))
+ end_addr = start_addr;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
+ end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
+
+ if (end_host_order < start_host_order)
+ {
+ errmsg ("end address less than start address\n");
+ return -99;
+ }
+
+ count = (end_host_order - start_host_order) + 1;
+
+ if (count > 1024)
+ {
+ errmsg ("%U - %U, %d addresses...\n",
+ format_ip4_address, &start_addr,
+ format_ip4_address, &end_addr,
+ count);
+ }
+
+ M(SNAT_ADD_ADDRESS_RANGE, mp);
+
+ memcpy (mp->first_ip_address, &start_addr, 4);
+ memcpy (mp->last_ip_address, &end_addr, 4);
+ mp->is_ip4 = 1;
+ mp->is_add = is_add;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_snat_interface_add_del_feature (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_interface_add_del_feature_t * mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_inside = 1;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "out"))
+ is_inside = 0;
+ else if (unformat (i, "in"))
+ is_inside = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("interface / sw_if_index required\n");
+ return -99;
+ }
+
+ M(SNAT_INTERFACE_ADD_DEL_FEATURE, mp);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+ mp->is_inside = is_inside;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_snat_add_static_mapping(vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_add_static_mapping_t * mp;
+ u8 external_addr_set = 0;
+ u8 local_addr_set = 0;
+ u8 is_add = 1;
+ u8 addr_only = 1;
+ ip4_address_t local_addr, external_addr;
+ u32 local_port = 0, external_port = 0, vrf_id = ~0;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ u32 proto = ~0;
+ u8 proto_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "local_addr %U", unformat_ip4_address, &local_addr))
+ local_addr_set = 1;
+ else if (unformat (i, "external_addr %U", unformat_ip4_address,
+ &external_addr))
+ external_addr_set = 1;
+ else if (unformat (i, "local_port %u", &local_port))
+ addr_only = 0;
+ else if (unformat (i, "external_port %u", &external_port))
+ addr_only = 0;
+ else if (unformat (i, "external_if %U", unformat_sw_if_index, vam,
+ &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "external_sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (i, "protocol %u", &proto))
+ proto_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!addr_only && !proto_set)
+ {
+ errmsg ("protocol required\n");
+ return -99;
+ }
+
+ if (!local_addr_set)
+ {
+ errmsg ("local addr required\n");
+ return -99;
+ }
+ if (!external_addr_set && !sw_if_index_set)
+ {
+ errmsg ("external addr or interface required\n");
+ return -99;
+ }
+
+ M(SNAT_ADD_STATIC_MAPPING, mp);
+ mp->is_add = is_add;
+ mp->is_ip4 = 1;
+ mp->addr_only = addr_only;
+ mp->local_port = ntohs ((u16) local_port);
+ mp->external_port = ntohs ((u16) external_port);
+ mp->external_sw_if_index = ntohl (sw_if_index);
+ mp->vrf_id = ntohl (vrf_id);
+ mp->protocol = (u8) proto;
+ memcpy (mp->local_ip_address, &local_addr, 4);
+ memcpy (mp->external_ip_address, &external_addr, 4);
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_control_ping_reply_t_handler
+ (vl_api_snat_control_ping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_snat_static_mapping_details_t_handler
+ (vl_api_snat_static_mapping_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ if (mp->addr_only && mp->external_sw_if_index != ~0)
+ fformat (vam->ofp, "%15U%6s%15d%6s%11d%6d\n",
+ format_ip4_address, &mp->local_ip_address, "",
+ ntohl (mp->external_sw_if_index), "",
+ ntohl (mp->vrf_id),
+ mp->protocol);
+ else if (mp->addr_only && mp->external_sw_if_index == ~0)
+ fformat (vam->ofp, "%15U%6s%15U%6s%11d%6d\n",
+ format_ip4_address, &mp->local_ip_address, "",
+ format_ip4_address, &mp->external_ip_address, "",
+ ntohl (mp->vrf_id),
+ mp->protocol);
+ else if (!mp->addr_only && mp->external_sw_if_index != ~0)
+ fformat (vam->ofp, "%15U%6d%15d%6d%11d%6d\n",
+ format_ip4_address, &mp->local_ip_address,
+ ntohs (mp->local_port),
+ ntohl (mp->external_sw_if_index),
+ ntohs (mp->external_port),
+ ntohl (mp->vrf_id),
+ mp->protocol);
+ else
+ fformat (vam->ofp, "%15U%6d%15U%6d%11d%6d\n",
+ format_ip4_address, &mp->local_ip_address,
+ ntohs (mp->local_port),
+ format_ip4_address, &mp->external_ip_address,
+ ntohs (mp->external_port),
+ ntohl (mp->vrf_id),
+ mp->protocol);
+
+}
+
+static int api_snat_static_mapping_dump(vat_main_t * vam)
+{
+ vl_api_snat_static_mapping_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_static_mapping_dump");
+ return -99;
+ }
+
+ fformat (vam->ofp, "%21s%21s\n", "local", "external");
+ fformat (vam->ofp, "%15s%6s%15s%6s%11s%6s\n", "address", "port",
+ "address/if_idx", "port", "vrf", "proto");
+
+ M(SNAT_STATIC_MAPPING_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_show_config_reply_t_handler
+ (vl_api_snat_show_config_reply_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ fformat (vam->ofp, "translation hash buckets %d\n",
+ ntohl (mp->translation_buckets));
+ fformat (vam->ofp, "translation hash memory %d\n",
+ ntohl (mp->translation_memory_size));
+ fformat (vam->ofp, "user hash buckets %d\n", ntohl (mp->user_buckets));
+ fformat (vam->ofp, "user hash memory %d\n", ntohl (mp->user_memory_size));
+ fformat (vam->ofp, "max translations per user %d\n",
+ ntohl (mp->max_translations_per_user));
+ fformat (vam->ofp, "outside VRF id %d\n", ntohl (mp->outside_vrf_id));
+ fformat (vam->ofp, "inside VRF id %d\n", ntohl (mp->inside_vrf_id));
+ if (mp->static_mapping_only)
+ {
+ fformat (vam->ofp, "static mapping only");
+ if (mp->static_mapping_connection_tracking)
+ fformat (vam->ofp, " connection tracking");
+ fformat (vam->ofp, "\n");
+ }
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int api_snat_show_config(vat_main_t * vam)
+{
+ vl_api_snat_show_config_t * mp;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_show_config");
+ return -99;
+ }
+
+ M(SNAT_SHOW_CONFIG, mp);
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_address_details_t_handler
+ (vl_api_snat_address_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "%U\n", format_ip4_address, &mp->ip_address);
+}
+
+static int api_snat_address_dump(vat_main_t * vam)
+{
+ vl_api_snat_address_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_ADDRESS_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_interface_details_t_handler
+ (vl_api_snat_interface_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "sw_if_index %d %s\n", ntohl (mp->sw_if_index),
+ mp->is_inside ? "in" : "out");
+}
+
+static int api_snat_interface_dump(vat_main_t * vam)
+{
+ vl_api_snat_interface_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_INTERFACE_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int api_snat_set_workers (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_set_workers_t * mp;
+ uword *bitmap;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M(SNAT_SET_WORKERS, mp);
+ mp->worker_mask = clib_host_to_net_u64 (bitmap[0]);
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_worker_details_t_handler
+ (vl_api_snat_worker_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "worker_index %d (%s at lcore %u)\n",
+ ntohl (mp->worker_index), mp->name, ntohl (mp->lcore_id));
+}
+
+static int api_snat_worker_dump(vat_main_t * vam)
+{
+ vl_api_snat_worker_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_WORKER_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int api_snat_add_del_interface_addr (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_add_del_interface_addr_t * mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("interface / sw_if_index required\n");
+ return -99;
+ }
+
+ M(SNAT_ADD_DEL_INTERFACE_ADDR, mp);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_interface_addr_details_t_handler
+ (vl_api_snat_interface_addr_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "sw_if_index %d\n", ntohl (mp->sw_if_index));
+}
+
+static int api_snat_interface_addr_dump(vat_main_t * vam)
+{
+ vl_api_snat_interface_addr_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_INTERFACE_ADDR_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int api_snat_ipfix_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_ipfix_enable_disable_t * mp;
+ u32 domain_id = 0;
+ u32 src_port = 0;
+ u8 enable = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "domain %d", &domain_id))
+ ;
+ else if (unformat (i, "src_port %d", &src_port))
+ ;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M(SNAT_IPFIX_ENABLE_DISABLE, mp);
+ mp->domain_id = htonl(domain_id);
+ mp->src_port = htons((u16) src_port);
+ mp->enable = enable;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_user_session_details_t_handler
+ (vl_api_snat_user_session_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat(vam->ofp, "%s session %U:%d to %U:%d protocol id %d "
+ "total packets %d total bytes %d\n",
+ mp->is_static ? "static" : "dynamic",
+ format_ip4_address, mp->inside_ip_address, ntohl(mp->inside_port),
+ format_ip4_address, mp->outside_ip_address, ntohl(mp->outside_port),
+ ntohl(mp->protocol), ntohl(mp->total_pkts), ntohl(mp->total_bytes));
+}
+
+static int api_snat_user_session_dump(vat_main_t * vam)
+{
+ unformat_input_t* i = vam->input;
+ vl_api_snat_user_session_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ ip4_address_t addr;
+ u32 vrf_id = ~0;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ if (unformat (i, "ip_address %U vrf_id %d",
+ unformat_ip4_address, &addr, &vrf_id))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_USER_SESSION_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ memset(mp->ip_address, 0, 16);
+ clib_memcpy(mp->ip_address, &addr, 4);
+ mp->vrf_id = htonl(vrf_id);
+ mp->is_ip4 = 1;
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_user_details_t_handler
+ (vl_api_snat_user_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat(vam->ofp, "user with ip %U with vrf_id %d "
+ "with %d sessions and %d static sessions\n",
+ format_ip4_address, mp->ip_address, ntohl(mp->vrf_id),
+ ntohl(mp->nsessions), ntohl(mp->nstaticsessions));
+}
+
+static int api_snat_user_dump(vat_main_t * vam)
+{
+ vl_api_snat_user_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_USER_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int api_snat_add_det_map (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_add_det_map_t * mp;
+ ip4_address_t in_addr, out_addr;
+ u32 in_plen, out_plen;
+ u8 is_add = 1;
+ int ret;
+
+ if (unformat (i, "in %U/%d out %U/%d",
+ unformat_ip4_address, &in_addr, &in_plen,
+ unformat_ip4_address, &out_addr, &out_plen))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_ADD_DET_MAP, mp);
+ clib_memcpy(mp->in_addr, &in_addr, 4);
+ mp->in_plen = in_plen;
+ clib_memcpy(mp->out_addr, &out_addr, 4);
+ mp->out_plen = out_plen;
+ mp->is_add = is_add;
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_det_forward_reply_t_handler
+ (vl_api_snat_det_forward_reply_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+ i32 retval = ntohl(mp->retval);
+
+ if (retval >= 0)
+ {
+ fformat (vam->ofp, "outside address %U", format_ip4_address, &mp->out_addr);
+ fformat (vam->ofp, " outside port range start %d", ntohs(mp->out_port_lo));
+ fformat (vam->ofp, " outside port range end %d\n", ntohs(mp->out_port_hi));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int api_snat_det_forward (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_det_forward_t * mp;
+ ip4_address_t in_addr;
+ int ret;
+
+ if (unformat (i, "%U", unformat_ip4_address, &in_addr))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_FORWARD, mp);
+ clib_memcpy(mp->in_addr, &in_addr, 4);
+
+ S(mp);
+ W(ret);
+ return ret;
+}
+
+static void vl_api_snat_det_reverse_reply_t_handler
+ (vl_api_snat_det_reverse_reply_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+ i32 retval = ntohl(mp->retval);
+
+ if (retval >= 0)
+ {
+ fformat (vam->ofp, "inside address %U\n", format_ip4_address, &mp->in_addr);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int api_snat_det_reverse (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_det_reverse_t * mp;
+ ip4_address_t out_addr;
+ u32 out_port;
+ int ret;
+
+ if (unformat (i, "%U %d", unformat_ip4_address, &out_addr, &out_port))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_REVERSE, mp);
+ clib_memcpy(mp->out_addr, &out_addr, 4);
+ mp->out_port = htons((u16)out_port);
+
+ S(mp);
+ W(ret);
+ return ret;
+}
+
+static void vl_api_snat_det_map_details_t_handler
+ (vl_api_snat_det_map_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "Deterministic S-NAT mapping in %U/%d out %U/%d "
+ "ports per host %d sharing ratio %d "
+ "number of sessions %d",
+ format_ip4_address, mp->in_addr, mp->in_plen,
+ format_ip4_address, mp->out_addr, mp->out_plen,
+ ntohs(mp->ports_per_host), ntohl(mp->sharing_ratio),
+ ntohl(mp->ses_num));
+}
+
+static int api_snat_det_map_dump(vat_main_t * vam)
+{
+ vl_api_snat_det_map_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_det_map_dump");
+ return -99;
+ }
+
+ M(SNAT_DET_MAP_DUMP, mp);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int api_snat_det_set_timeouts (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_det_set_timeouts_t * mp;
+ u32 udp = SNAT_UDP_TIMEOUT;
+ u32 tcp_established = SNAT_TCP_ESTABLISHED_TIMEOUT;
+ u32 tcp_transitory = SNAT_TCP_TRANSITORY_TIMEOUT;
+ u32 icmp = SNAT_ICMP_TIMEOUT;
+ int ret;
+
+ if (unformat (i, "udp %d", &udp))
+ ;
+ else if (unformat (i, "tcp_established %d", &tcp_established))
+ ;
+ else if (unformat (i, "tcp_transitory %d", &tcp_transitory))
+ ;
+ else if (unformat (i, "icmp %d", &icmp))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_SET_TIMEOUTS, mp);
+ mp->udp = htonl(udp);
+ mp->tcp_established = htonl(tcp_established);
+ mp->tcp_transitory = htonl(tcp_transitory);
+ mp->icmp = htonl(icmp);
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_det_get_timeouts_reply_t_handler
+ (vl_api_snat_det_get_timeouts_reply_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ fformat (vam->ofp, "udp timeout: %dsec\n", ntohl (mp->udp));
+ fformat (vam->ofp, "tcp-established timeout: %dsec",
+ ntohl (mp->tcp_established));
+ fformat (vam->ofp, "tcp-transitory timeout: %dsec",
+ ntohl (mp->tcp_transitory));
+ fformat (vam->ofp, "icmp timeout: %dsec", ntohl (mp->icmp));
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int api_snat_det_get_timeouts(vat_main_t * vam)
+{
+ vl_api_snat_det_get_timeouts_t * mp;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_show_config");
+ return -99;
+ }
+
+ M(SNAT_DET_GET_TIMEOUTS, mp);
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_snat_det_close_session_out (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_det_close_session_out_t * mp;
+ ip4_address_t out_addr, ext_addr;
+ u32 out_port, ext_port;
+ int ret;
+
+ if (unformat (i, "%U:%d %U:%d",
+ unformat_ip4_address, &out_addr, &out_port,
+ unformat_ip4_address, &ext_addr, &ext_port))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_CLOSE_SESSION_OUT, mp);
+ clib_memcpy(mp->out_addr, &out_addr, 4);
+ mp->out_port = ntohs((u16)out_port);
+ clib_memcpy(mp->ext_addr, &ext_addr, 4);
+ mp->ext_port = ntohs((u16)ext_port);
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static int api_snat_det_close_session_in (vat_main_t * vam)
+{
+ unformat_input_t * i = vam->input;
+ vl_api_snat_det_close_session_in_t * mp;
+ ip4_address_t in_addr, ext_addr;
+ u32 in_port, ext_port;
+ int ret;
+
+ if (unformat (i, "%U:%d %U:%d",
+ unformat_ip4_address, &in_addr, &in_port,
+ unformat_ip4_address, &ext_addr, &ext_port))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_CLOSE_SESSION_IN, mp);
+ clib_memcpy(mp->in_addr, &in_addr, 4);
+ mp->in_port = ntohs((u16)in_port);
+ clib_memcpy(mp->ext_addr, &ext_addr, 4);
+ mp->ext_port = ntohs((u16)ext_port);
+
+ S(mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_snat_det_session_details_t_handler
+ (vl_api_snat_det_session_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat(vam->ofp, "deterministic session, external host address %U, "
+ "external host port %d, outer port %d, inside port %d",
+ format_ip4_address, mp->ext_addr, mp->ext_port,
+ mp->out_port, mp->in_port);
+}
+
+static int api_snat_det_session_dump(vat_main_t * vam)
+{
+ unformat_input_t* i = vam->input;
+ vl_api_snat_det_session_dump_t * mp;
+ vl_api_snat_control_ping_t *mp_ping;
+ ip4_address_t user_addr;
+ int ret;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_det_session_dump");
+ return -99;
+ }
+
+ if (unformat (i, "user_addr %U", unformat_ip4_address, &user_addr))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ M(SNAT_DET_SESSION_DUMP, mp);
+ clib_memcpy (&mp->user_addr, &user_addr, 4);
+ S(mp);
+
+ /* Use a control ping for synchronization */
+ M(SNAT_CONTROL_PING, mp_ping);
+ S(mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(snat_add_address_range, "<start-addr> [- <end-addr] [del]") \
+_(snat_interface_add_del_feature, \
+ "<intfc> | sw_if_index <id> [in] [out] [del]") \
+_(snat_add_static_mapping, "local_addr <ip> (external_addr <ip>" \
+ " | external_if <intfc> | external_sw_if_ndex <id>) " \
+ "[local_port <n>] [external_port <n>] [vrf <table-id>] [del] " \
+ "protocol <n>") \
+_(snat_set_workers, "<wokrers_bitmap>") \
+_(snat_static_mapping_dump, "") \
+_(snat_show_config, "") \
+_(snat_address_dump, "") \
+_(snat_interface_dump, "") \
+_(snat_worker_dump, "") \
+_(snat_add_del_interface_addr, \
+ "<intfc> | sw_if_index <id> [del]") \
+_(snat_interface_addr_dump, "") \
+_(snat_ipfix_enable_disable, "[domain <id>] [src_port <n>] " \
+ "[disable]") \
+_(snat_user_dump, "") \
+_(snat_user_session_dump, "ip_address <ip> vrf_id <table-id>") \
+_(snat_add_det_map, "in <in_addr>/<in_plen> out " \
+ "<out_addr>/<out_plen> [del]") \
+_(snat_det_forward, "<in_addr>") \
+_(snat_det_reverse, "<out_addr> <out_port>") \
+_(snat_det_map_dump, "") \
+_(snat_det_set_timeouts, "[udp <sec> | tcp_established <sec> | " \
+ "tcp_transitory <sec> | icmp <sec>]") \
+_(snat_det_get_timeouts, "") \
+_(snat_det_close_session_out, "<out_addr>:<out_port> " \
+ "<ext_addr>:<ext_port>") \
+_(snat_det_close_session_in, "<in_addr>:<in_port> " \
+ "<out_addr>:<out_port>") \
+_(snat_det_session_dump, "ip_address <user_addr>")
+
+static void
+snat_vat_api_hookup (vat_main_t *vam)
+{
+ snat_test_main_t * sm __attribute__((unused)) = &snat_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) \
+ hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "snat_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ snat_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c
new file mode 100755
index 00000000..f250136b
--- /dev/null
+++ b/src/plugins/nat/out2in.c
@@ -0,0 +1,2514 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/ip4_fib.h>
+#include <nat/nat.h>
+#include <nat/nat_ipfix_logging.h>
+#include <nat/nat_det.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+} snat_out2in_trace_t;
+
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_out2in_worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
+
+ s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d",
+ t->sw_if_index, t->next_index, t->session_index);
+ return s;
+}
+
+static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
+
+ s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_worker_handoff_trace_t * t =
+ va_arg (*args, snat_out2in_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
+
+vlib_node_registration_t snat_out2in_node;
+vlib_node_registration_t snat_out2in_fast_node;
+vlib_node_registration_t snat_out2in_worker_handoff_node;
+vlib_node_registration_t snat_det_out2in_node;
+
+#define foreach_snat_out2in_error \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
+_(OUT2IN_PACKETS, "Good out2in packets processed") \
+_(BAD_ICMP_TYPE, "unsupported ICMP type") \
+_(NO_TRANSLATION, "No translation") \
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+
+typedef enum {
+#define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
+ foreach_snat_out2in_error
+#undef _
+ SNAT_OUT2IN_N_ERROR,
+} snat_out2in_error_t;
+
+static char * snat_out2in_error_strings[] = {
+#define _(sym,string) string,
+ foreach_snat_out2in_error
+#undef _
+};
+
+typedef enum {
+ SNAT_OUT2IN_NEXT_DROP,
+ SNAT_OUT2IN_NEXT_LOOKUP,
+ SNAT_OUT2IN_NEXT_ICMP_ERROR,
+ SNAT_OUT2IN_N_NEXT,
+} snat_out2in_next_t;
+
+/**
+ * @brief Create session for static mapping.
+ *
+ * Create NAT session initiated by host from external network with static
+ * mapping.
+ *
+ * @param sm NAT main.
+ * @param b0 Vlib buffer.
+ * @param in2out In2out NAT44 session key.
+ * @param out2in Out2in NAT44 session key.
+ * @param node Vlib node.
+ *
+ * @returns SNAT session if successfully created otherwise 0.
+ */
+static inline snat_session_t *
+create_session_for_static_mapping (snat_main_t *sm,
+ vlib_buffer_t *b0,
+ snat_session_key_t in2out,
+ snat_session_key_t out2in,
+ vlib_node_runtime_t * node,
+ u32 thread_index)
+{
+ snat_user_t *u;
+ snat_user_key_t user_key;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t kv0, value0;
+ dlist_elt_t * per_user_translation_list_elt;
+ dlist_elt_t * per_user_list_head_elt;
+ ip4_header_t *ip0;
+
+ if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
+ return 0;
+ }
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ user_key.addr = in2out.addr;
+ user_key.fib_index = in2out.fib_index;
+ kv0.key = user_key.as_u64;
+
+ /* Ever heard of the "user" = inside ip4 address before? */
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash,
+ &kv0, &value0))
+ {
+ /* no, make a new one */
+ pool_get (sm->per_thread_data[thread_index].users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = in2out.addr;
+ u->fib_index = in2out.fib_index;
+
+ pool_get (sm->per_thread_data[thread_index].list_pool,
+ per_user_list_head_elt);
+
+ u->sessions_per_user_list_head_index = per_user_list_head_elt -
+ sm->per_thread_data[thread_index].list_pool;
+
+ clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv0.value = u - sm->per_thread_data[thread_index].users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash,
+ &kv0, 1 /* is_add */);
+ }
+ else
+ {
+ u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
+ value0.value);
+ }
+
+ pool_get (sm->per_thread_data[thread_index].sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->outside_address_index = ~0;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+ u->nstaticsessions++;
+
+ /* Create list elts */
+ pool_get (sm->per_thread_data[thread_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[thread_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[thread_index].sessions;
+ s->per_user_index =
+ per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[thread_index].list_pool);
+
+ s->in2out = in2out;
+ s->out2in = out2in;
+ s->in2out.protocol = out2in.protocol;
+
+ /* Add to translation hashes */
+ kv0.key = s->in2out.as_u64;
+ kv0.value = s - sm->per_thread_data[thread_index].sessions;
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
+ 1 /* is_add */))
+ clib_warning ("in2out key add failed");
+
+ kv0.key = s->out2in.as_u64;
+ kv0.value = s - sm->per_thread_data[thread_index].sessions;
+
+ if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
+ 1 /* is_add */))
+ clib_warning ("out2in key add failed");
+
+ /* log NAT event */
+ snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
+ s->out2in.addr.as_u32,
+ s->in2out.protocol,
+ s->in2out.port,
+ s->out2in.port,
+ s->in2out.fib_index);
+ return s;
+}
+
+static_always_inline
+snat_out2in_error_t icmp_get_key(ip4_header_t *ip0,
+ snat_session_key_t *p_key0)
+{
+ icmp46_header_t *icmp0;
+ snat_session_key_t key0;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ key0.protocol = SNAT_PROTOCOL_ICMP;
+ key0.addr = ip0->dst_address;
+ key0.port = echo0->identifier;
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+ key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
+ key0.addr = inner_ip0->src_address;
+ switch (key0.protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+ key0.port = inner_echo0->identifier;
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
+ break;
+ default:
+ return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL;
+ }
+ }
+ *p_key0 = key0;
+ return -1; /* success */
+}
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ * and create session if needed
+ *
+ * @param[in,out] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ snat_session_key_t key0;
+ snat_session_key_t sm0;
+ snat_session_t *s0 = 0;
+ u8 dont_translate = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u8 is_addr_only;
+ u32 next0 = ~0;
+ int err;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ key0.protocol = 0;
+
+ err = icmp_get_key (ip0, &key0);
+ if (err != -1)
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
+ &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
+ {
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
+ ip0->dst_address.as_u32)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
+ (icmp0->type != ICMP4_echo_request || !is_addr_only)))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
+ node, thread_index);
+
+ if (!s0)
+ {
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
+ icmp0->type != ICMP4_echo_request &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+
+out:
+ *p_proto = key0.protocol;
+ if (s0)
+ *p_value = s0->in2out;
+ *p_dont_translate = dont_translate;
+ if (d)
+ *(snat_session_t**)d = s0;
+ return next0;
+}
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ *
+ * @param[in] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ snat_session_key_t key0;
+ snat_session_key_t sm0;
+ u8 dont_translate = 0;
+ u8 is_addr_only;
+ u32 next0 = ~0;
+ int err;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ err = icmp_get_key (ip0, &key0);
+ if (err != -1)
+ {
+ b0->error = node->errors[err];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out2;
+ }
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only))
+ {
+ /* Don't NAT packet aimed at the intfc address */
+ if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
+ (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+out:
+ *p_value = sm0;
+out2:
+ *p_proto = key0.protocol;
+ *p_dont_translate = dont_translate;
+ return next0;
+}
+
+static inline u32 icmp_out2in (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 thread_index,
+ void *d,
+ void *e)
+{
+ snat_session_key_t sm0;
+ u8 protocol;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0 = 0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+ u8 dont_translate;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ u16 checksum0;
+ u32 next0_tmp;
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0,
+ &protocol, &sm0, &dont_translate, d, e);
+ if (next0_tmp != ~0)
+ next0 = next0_tmp;
+ if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate)
+ goto out;
+
+ sum0 = ip_incremental_checksum (0, icmp0,
+ ntohs(ip0->length) - ip4_header_bytes (ip0));
+ checksum0 = ~ip_csum_fold (sum0);
+ if (checksum0 != 0 && checksum0 != 0xffff)
+ {
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ old_addr0 = ip0->dst_address.as_u32;
+ new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ new_id0 = sm0.port;
+ if (PREDICT_FALSE(new_id0 != echo0->identifier))
+ {
+ old_id0 = echo0->identifier;
+ new_id0 = sm0.port;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier /* changed member */);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+
+ if (!ip4_header_checksum_is_valid (inner_ip0))
+ {
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ old_addr0 = inner_ip0->src_address.as_u32;
+ inner_ip0->src_address = sm0.addr;
+ new_addr0 = inner_ip0->src_address.as_u32;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+ src_address /* changed member */);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ switch (protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+
+ old_id0 = inner_echo0->identifier;
+ new_id0 = sm0.port;
+ inner_echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
+ new_id0 = sm0.port;
+ ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
+ src_port);
+ icmp0->checksum = ip_csum_fold (sum0);
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+
+out:
+ return next0;
+}
+
+
+static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0, f64 now,
+ u32 thread_index,
+ snat_session_t ** p_s0)
+{
+ next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, thread_index, p_s0, 0);
+ snat_session_t * s0 = *p_s0;
+ if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
+ {
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ }
+ return next0;
+}
+
+static snat_session_t *
+snat_out2in_unknown_proto (snat_main_t *sm,
+ vlib_buffer_t * b,
+ ip4_header_t * ip,
+ u32 rx_fib_index,
+ u32 thread_index,
+ f64 now,
+ vlib_main_t * vm,
+ vlib_node_runtime_t * node)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ snat_static_mapping_t *m;
+ snat_session_key_t m_key;
+ u32 old_addr, new_addr;
+ ip_csum_t sum;
+ nat_ed_ses_key_t key;
+ snat_session_t * s;
+ snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t *head, *elt;
+
+ old_addr = ip->dst_address.as_u32;
+
+ key.l_addr = ip->dst_address;
+ key.r_addr = ip->src_address;
+ key.fib_index = rx_fib_index;
+ key.proto = ip->protocol;
+ key.rsvd = 0;
+ key.l_port = 0;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+
+ if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ {
+ s = pool_elt_at_index (tsm->sessions, s_value.value);
+ new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
+ }
+ else
+ {
+ if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
+ {
+ b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
+ return 0;
+ }
+
+ m_key.addr = ip->dst_address;
+ m_key.port = 0;
+ m_key.protocol = 0;
+ m_key.fib_index = rx_fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ {
+ b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ return 0;
+ }
+
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
+
+ u_key.addr = ip->src_address;
+ u_key.fib_index = m->fib_index;
+ kv.key = u_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ /* no, make a new one */
+ pool_get (tsm->users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = ip->src_address;
+ u->fib_index = rx_fib_index;
+
+ pool_get (tsm->list_pool, head);
+ u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+ clib_dlist_init (tsm->list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv.value = u - tsm->users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1);
+ }
+ else
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ }
+
+ /* Create a new session */
+ pool_get (tsm->sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ext_host_addr.as_u32 = ip->src_address.as_u32;
+ s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ s->outside_address_index = ~0;
+ s->out2in.addr.as_u32 = old_addr;
+ s->out2in.fib_index = rx_fib_index;
+ s->in2out.addr.as_u32 = new_addr;
+ s->in2out.fib_index = m->fib_index;
+ s->in2out.port = s->out2in.port = ip->protocol;
+ u->nstaticsessions++;
+
+ /* Create list elts */
+ pool_get (tsm->list_pool, elt);
+ clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+ elt->value = s - tsm->sessions;
+ s->per_user_index = elt - tsm->list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+
+ /* Add to lookup tables */
+ s_kv.value = s - tsm->sessions;
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+ clib_warning ("out2in key add failed");
+
+ key.l_addr = ip->dst_address;
+ key.fib_index = m->fib_index;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+ clib_warning ("in2out key add failed");
+ }
+
+ /* Update IP checksum */
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
+
+ /* Accounting */
+ s->last_heard = now;
+ s->total_pkts++;
+ s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+ /* Per-user LRU list maintenance */
+ clib_dlist_remove (tsm->list_pool, s->per_user_index);
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+
+ return s;
+}
+
+static snat_session_t *
+snat_out2in_lb (snat_main_t *sm,
+ vlib_buffer_t * b,
+ ip4_header_t * ip,
+ u32 rx_fib_index,
+ u32 thread_index,
+ f64 now,
+ vlib_main_t * vm,
+ vlib_node_runtime_t * node)
+{
+ nat_ed_ses_key_t key;
+ clib_bihash_kv_16_8_t s_kv, s_value;
+ udp_header_t *udp = ip4_next_header (ip);
+ tcp_header_t *tcp = (tcp_header_t *) udp;
+ snat_session_t *s = 0;
+ snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+ snat_session_key_t e_key, l_key;
+ clib_bihash_kv_8_8_t kv, value;
+ u32 old_addr, new_addr;
+ u32 proto = ip_proto_to_snat_proto (ip->protocol);
+ u16 new_port, old_port;
+ ip_csum_t sum;
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t *head, *elt;
+
+ old_addr = ip->dst_address.as_u32;
+
+ key.l_addr = ip->dst_address;
+ key.r_addr = ip->src_address;
+ key.fib_index = rx_fib_index;
+ key.proto = ip->protocol;
+ key.rsvd = 0;
+ key.l_port = udp->dst_port;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+
+ if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+ {
+ s = pool_elt_at_index (tsm->sessions, s_value.value);
+ }
+ else
+ {
+ if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
+ {
+ b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED];
+ return 0;
+ }
+
+ e_key.addr = ip->dst_address;
+ e_key.port = udp->dst_port;
+ e_key.protocol = proto;
+ e_key.fib_index = rx_fib_index;
+ if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0))
+ return 0;
+
+ u_key.addr = l_key.addr;
+ u_key.fib_index = l_key.fib_index;
+ kv.key = u_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+ {
+ /* no, make a new one */
+ pool_get (tsm->users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = l_key.addr;
+ u->fib_index = l_key.fib_index;
+
+ pool_get (tsm->list_pool, head);
+ u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+ clib_dlist_init (tsm->list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv.value = u - tsm->users;
+
+ /* add user */
+ if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
+ clib_warning ("user key add failed");
+ }
+ else
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ }
+
+ /* Create a new session */
+ pool_get (tsm->sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ext_host_addr.as_u32 = ip->src_address.as_u32;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
+ s->outside_address_index = ~0;
+ s->out2in = e_key;
+ s->in2out = l_key;
+ u->nstaticsessions++;
+
+ /* Create list elts */
+ pool_get (tsm->list_pool, elt);
+ clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+ elt->value = s - tsm->sessions;
+ s->per_user_index = elt - tsm->list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+ clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+ s->per_user_index);
+
+ /* Add to lookup tables */
+ s_kv.value = s - tsm->sessions;
+ if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+ clib_warning ("out2in-ed key add failed");
+
+ key.l_addr = l_key.addr;
+ key.fib_index = l_key.fib_index;
+ key.l_port = l_key.port;
+ s_kv.key[0] = key.as_u64[0];
+ s_kv.key[1] = key.as_u64[1];
+ if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+ clib_warning ("in2out-ed key add failed");
+ }
+
+ new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
+
+ /* Update IP checksum */
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
+ {
+ old_port = tcp->dst_port;
+ tcp->dst_port = s->in2out.port;
+ new_port = tcp->dst_port;
+
+ sum = tcp->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
+ tcp->checksum = ip_csum_fold(sum);
+ }
+ else
+ {
+ udp->dst_port = s->in2out.port;
+ udp->checksum = 0;
+ }
+
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
+
+ /* Accounting */
+ s->last_heard = now;
+ s->total_pkts++;
+ s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+ return s;
+}
+
+static uword
+snat_out2in_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ f64 now = vlib_time_now (vm);
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, *ip1;
+ ip_csum_t sum0, sum1;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ u32 new_addr1, old_addr1;
+ u16 new_port1, old_port1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ icmp46_header_t * icmp0, * icmp1;
+ snat_session_key_t key0, key1, sm0, sm1;
+ u32 rx_fib_index0, rx_fib_index1;
+ u32 proto0, proto1;
+ snat_session_t * s0 = 0, * s1 = 0;
+ clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vnet_buffer (b0)->snat.flags = 0;
+ vnet_buffer (b1)->snat.flags = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ {
+ s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace0;
+ }
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
+ goto trace0;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
+ &kv0, &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ /*
+ * Send DHCP packets to the ipv4 stack, or we won't
+ * be able to use dhcp client on the outside interface
+ */
+ if (proto0 != SNAT_PROTOCOL_UDP
+ || (udp0->dst_port
+ != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ thread_index);
+ if (!s0)
+ {
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace0;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value0.value == ~0ULL))
+ {
+ s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
+ now, vm, node);
+ if (!s0)
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ s0 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+ }
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address = s0->in2out.addr;
+ new_addr0 = ip0->dst_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->dst_port;
+ tcp0->dst_port = s0->in2out.port;
+ new_port0 = tcp0->dst_port;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = s0->in2out.port;
+ udp0->checksum = 0;
+ }
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace0:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+
+ ip1 = vlib_buffer_get_current (b1);
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+ icmp1 = (icmp46_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index1);
+
+ if (PREDICT_FALSE(ip1->ttl == 1))
+ {
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace1;
+ }
+
+ proto1 = ip_proto_to_snat_proto (ip1->protocol);
+
+ if (PREDICT_FALSE (proto1 == ~0))
+ {
+ s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1,
+ thread_index, now, vm, node);
+ if (!s1)
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace1;
+ }
+
+ if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = icmp_out2in_slow_path
+ (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
+ next1, now, thread_index, &s1);
+ goto trace1;
+ }
+
+ key1.addr = ip1->dst_address;
+ key1.port = udp1->dst_port;
+ key1.protocol = proto1;
+ key1.fib_index = rx_fib_index1;
+
+ kv1.key = key1.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
+ &kv1, &value1))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key1, &sm1, 1, 0))
+ {
+ b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ /*
+ * Send DHCP packets to the ipv4 stack, or we won't
+ * be able to use dhcp client on the outside interface
+ */
+ if (proto1 != SNAT_PROTOCOL_UDP
+ || (udp1->dst_port
+ != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Create session initiated by host from external network */
+ s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
+ thread_index);
+ if (!s1)
+ {
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace1;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value1.value == ~0ULL))
+ {
+ s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index,
+ now, vm, node);
+ if (!s1)
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace1;
+ }
+ else
+ {
+ s1 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value1.value);
+ }
+ }
+
+ old_addr1 = ip1->dst_address.as_u32;
+ ip1->dst_address = s1->in2out.addr;
+ new_addr1 = ip1->dst_address.as_u32;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ old_port1 = tcp1->dst_port;
+ tcp1->dst_port = s1->in2out.port;
+ new_port1 = tcp1->dst_port;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ old_port1 = udp1->dst_port;
+ udp1->dst_port = s1->in2out.port;
+ udp1->checksum = 0;
+ }
+
+ /* Accounting */
+ s1->last_heard = now;
+ s1->total_pkts++;
+ s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s1))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s1->per_user_list_head_index,
+ s1->per_user_index);
+ }
+ trace1:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (s1)
+ t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 rx_fib_index0;
+ u32 proto0;
+ snat_session_t * s0 = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vnet_buffer (b0)->snat.flags = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ {
+ s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0,
+ thread_index, now, vm, node);
+ if (!s0)
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace00;
+ }
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
+ goto trace00;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in,
+ &kv0, &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ /*
+ * Send DHCP packets to the ipv4 stack, or we won't
+ * be able to use dhcp client on the outside interface
+ */
+ if (proto0 != SNAT_PROTOCOL_UDP
+ || (udp0->dst_port
+ != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
+
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ thread_index);
+ if (!s0)
+ {
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace00;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (value0.value == ~0ULL))
+ {
+ s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index,
+ now, vm, node);
+ if (!s0)
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto trace00;
+ }
+ else
+ {
+ s0 = pool_elt_at_index (
+ sm->per_thread_data[thread_index].sessions,
+ value0.value);
+ }
+ }
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address = s0->in2out.addr;
+ new_addr0 = ip0->dst_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->dst_port;
+ tcp0->dst_port = s0->in2out.port;
+ new_port0 = tcp0->dst_port;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = s0->in2out.port;
+ udp0->checksum = 0;
+ }
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_out2in_node.index,
+ SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_node) = {
+ .function = snat_out2in_node_fn,
+ .name = "nat44-out2in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+ .error_strings = snat_out2in_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
+
+/**************************/
+/*** deterministic mode ***/
+/**************************/
+static uword
+snat_det_out2in_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, * ip1;
+ ip_csum_t sum0, sum1;
+ ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
+ u16 new_port0, old_port0, old_port1, new_port1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ u32 proto0, proto1;
+ snat_det_out_key_t key0, key1;
+ snat_det_map_t * dm0, * dm1;
+ snat_det_session_t * ses0 = 0, * ses1 = 0;
+ u32 rx_fib_index0, rx_fib_index1;
+ icmp46_header_t * icmp0, * icmp1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace0;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+ icmp0 = (icmp46_header_t *) udp0;
+
+ next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, thread_index,
+ &ses0, &dm0);
+ goto trace0;
+ }
+
+ key0.ext_host_addr = ip0->src_address;
+ key0.ext_host_port = tcp0->src;
+ key0.out_port = tcp0->dst;
+
+ dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
+ if (PREDICT_FALSE(!dm0))
+ {
+ clib_warning("unknown dst address: %U",
+ format_ip4_address, &ip0->dst_address);
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+
+ snat_det_reverse(dm0, &ip0->dst_address,
+ clib_net_to_host_u16(tcp0->dst), &new_addr0);
+
+ ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
+ if (PREDICT_FALSE(!ses0))
+ {
+ clib_warning("no match src %U:%d dst %U:%d for user %U",
+ format_ip4_address, &ip0->src_address,
+ clib_net_to_host_u16 (tcp0->src),
+ format_ip4_address, &ip0->dst_address,
+ clib_net_to_host_u16 (tcp0->dst),
+ format_ip4_address, &new_addr0);
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+ new_port0 = ses0->in_port;
+
+ old_addr0 = ip0->dst_address;
+ ip0->dst_address = new_addr0;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
+ snat_det_ses_close(dm0, ses0);
+
+ old_port0 = tcp0->dst;
+ tcp0->dst = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = new_port0;
+ udp0->checksum = 0;
+ }
+
+ trace0:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (ses0)
+ t->session_index = ses0 - dm0->sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip1 = vlib_buffer_get_current (b1);
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip1->ttl == 1))
+ {
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace1;
+ }
+
+ proto1 = ip_proto_to_snat_proto (ip1->protocol);
+
+ if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
+ icmp1 = (icmp46_header_t *) udp1;
+
+ next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1,
+ rx_fib_index1, node, next1, thread_index,
+ &ses1, &dm1);
+ goto trace1;
+ }
+
+ key1.ext_host_addr = ip1->src_address;
+ key1.ext_host_port = tcp1->src;
+ key1.out_port = tcp1->dst;
+
+ dm1 = snat_det_map_by_out(sm, &ip1->dst_address);
+ if (PREDICT_FALSE(!dm1))
+ {
+ clib_warning("unknown dst address: %U",
+ format_ip4_address, &ip1->dst_address);
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace1;
+ }
+
+ snat_det_reverse(dm1, &ip1->dst_address,
+ clib_net_to_host_u16(tcp1->dst), &new_addr1);
+
+ ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64);
+ if (PREDICT_FALSE(!ses1))
+ {
+ clib_warning("no match src %U:%d dst %U:%d for user %U",
+ format_ip4_address, &ip1->src_address,
+ clib_net_to_host_u16 (tcp1->src),
+ format_ip4_address, &ip1->dst_address,
+ clib_net_to_host_u16 (tcp1->dst),
+ format_ip4_address, &new_addr1);
+ next1 = SNAT_OUT2IN_NEXT_DROP;
+ b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace1;
+ }
+ new_port1 = ses1->in_port;
+
+ old_addr1 = ip1->dst_address;
+ ip1->dst_address = new_addr1;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT;
+ else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK)
+ snat_det_ses_close(dm1, ses1);
+
+ old_port1 = tcp1->dst;
+ tcp1->dst = new_port1;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ old_port1 = udp1->dst_port;
+ udp1->dst_port = new_port1;
+ udp1->checksum = 0;
+ }
+
+ trace1:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (ses1)
+ t->session_index = ses1 - dm1->sessions;
+ }
+
+ pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ ip4_address_t new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ u32 proto0;
+ snat_det_out_key_t key0;
+ snat_det_map_t * dm0;
+ snat_det_session_t * ses0 = 0;
+ u32 rx_fib_index0;
+ icmp46_header_t * icmp0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+ icmp0 = (icmp46_header_t *) udp0;
+
+ next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, thread_index,
+ &ses0, &dm0);
+ goto trace00;
+ }
+
+ key0.ext_host_addr = ip0->src_address;
+ key0.ext_host_port = tcp0->src;
+ key0.out_port = tcp0->dst;
+
+ dm0 = snat_det_map_by_out(sm, &ip0->dst_address);
+ if (PREDICT_FALSE(!dm0))
+ {
+ clib_warning("unknown dst address: %U",
+ format_ip4_address, &ip0->dst_address);
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+
+ snat_det_reverse(dm0, &ip0->dst_address,
+ clib_net_to_host_u16(tcp0->dst), &new_addr0);
+
+ ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
+ if (PREDICT_FALSE(!ses0))
+ {
+ clib_warning("no match src %U:%d dst %U:%d for user %U",
+ format_ip4_address, &ip0->src_address,
+ clib_net_to_host_u16 (tcp0->src),
+ format_ip4_address, &ip0->dst_address,
+ clib_net_to_host_u16 (tcp0->dst),
+ format_ip4_address, &new_addr0);
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+ new_port0 = ses0->in_port;
+
+ old_addr0 = ip0->dst_address;
+ ip0->dst_address = new_addr0;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
+ ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT;
+ else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK)
+ snat_det_ses_close(dm0, ses0);
+
+ old_port0 = tcp0->dst;
+ tcp0->dst = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = new_port0;
+ udp0->checksum = 0;
+ }
+
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (ses0)
+ t->session_index = ses0 - dm0->sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_det_out2in_node.index,
+ SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_det_out2in_node) = {
+ .function = snat_det_out2in_node_fn,
+ .name = "nat44-det-out2in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+ .error_strings = snat_out2in_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
+
+/**
+ * Get address and port values to be used for ICMP packet translation
+ * and create session if needed
+ *
+ * @param[in,out] sm NAT main
+ * @param[in,out] node NAT node runtime
+ * @param[in] thread_index thread index
+ * @param[in,out] b0 buffer containing packet to be translated
+ * @param[out] p_proto protocol used for matching
+ * @param[out] p_value address and port after NAT translation
+ * @param[out] p_dont_translate if packet should not be translated
+ * @param d optional parameter
+ * @param e optional parameter
+ */
+u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e)
+{
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0;
+ u8 protocol;
+ snat_det_out_key_t key0;
+ u8 dont_translate = 0;
+ u32 next0 = ~0;
+ icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ ip4_header_t *inner_ip0;
+ void *l4_header = 0;
+ icmp46_header_t *inner_icmp0;
+ snat_det_map_t * dm0 = 0;
+ ip4_address_t new_addr0 = {{0}};
+ snat_det_session_t * ses0 = 0;
+ ip4_address_t out_addr;
+
+ icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (!icmp_is_error_message (icmp0))
+ {
+ protocol = SNAT_PROTOCOL_ICMP;
+ key0.ext_host_addr = ip0->src_address;
+ key0.ext_host_port = 0;
+ key0.out_port = echo0->identifier;
+ out_addr = ip0->dst_address;
+ }
+ else
+ {
+ inner_ip0 = (ip4_header_t *)(echo0+1);
+ l4_header = ip4_next_header (inner_ip0);
+ protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
+ key0.ext_host_addr = inner_ip0->dst_address;
+ out_addr = inner_ip0->src_address;
+ switch (protocol)
+ {
+ case SNAT_PROTOCOL_ICMP:
+ inner_icmp0 = (icmp46_header_t*)l4_header;
+ inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
+ key0.ext_host_port = 0;
+ key0.out_port = inner_echo0->identifier;
+ break;
+ case SNAT_PROTOCOL_UDP:
+ case SNAT_PROTOCOL_TCP:
+ key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port;
+ key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port;
+ break;
+ default:
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+ }
+
+ dm0 = snat_det_map_by_out(sm, &out_addr);
+ if (PREDICT_FALSE(!dm0))
+ {
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
+ ip0->dst_address.as_u32)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ clib_warning("unknown dst address: %U",
+ format_ip4_address, &ip0->dst_address);
+ goto out;
+ }
+
+ snat_det_reverse(dm0, &ip0->dst_address,
+ clib_net_to_host_u16(key0.out_port), &new_addr0);
+
+ ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64);
+ if (PREDICT_FALSE(!ses0))
+ {
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
+ ip0->dst_address.as_u32)))
+ {
+ dont_translate = 1;
+ goto out;
+ }
+ clib_warning("no match src %U:%d dst %U:%d for user %U",
+ format_ip4_address, &key0.ext_host_addr,
+ clib_net_to_host_u16 (key0.ext_host_port),
+ format_ip4_address, &out_addr,
+ clib_net_to_host_u16 (key0.out_port),
+ format_ip4_address, &new_addr0);
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_OUT2IN_NEXT_DROP;
+ goto out;
+ }
+
+ goto out;
+
+out:
+ *p_proto = protocol;
+ if (ses0)
+ {
+ p_value->addr = new_addr0;
+ p_value->fib_index = sm->inside_fib_index;
+ p_value->port = ses0->in_port;
+ }
+ *p_dont_translate = dont_translate;
+ if (d)
+ *(snat_det_session_t**)d = ses0;
+ if (e)
+ *(snat_det_map_t**)e = dm0;
+ return next0;
+}
+
+/**********************/
+/*** worker handoff ***/
+/**********************/
+static uword
+snat_out2in_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 thread_index = vlib_get_thread_index ();
+
+ ASSERT (vec_len (sm->workers));
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
+
+ if (PREDICT_FALSE (next_worker_index != thread_index))
+ {
+ do_handoff = 1;
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, sm->out2in_node_index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, sm->out2in_node_index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
+ .function = snat_out2in_worker_handoff_fn,
+ .name = "nat44-out2in-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
+
+static uword
+snat_out2in_fast_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SNAT_OUT2IN_NEXT_DROP;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 proto0;
+ u32 rx_fib_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+
+ if (PREDICT_FALSE(ip0->ttl == 1))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
+ goto trace00;
+ }
+
+ proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace00;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0,
+ rx_fib_index0, node, next0, ~0, 0, 0);
+ goto trace00;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_port0 != udp0->dst_port))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->dst_port;
+ tcp0->dst_port = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = new_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ }
+
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
+ SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
+ .function = snat_out2in_fast_node_fn,
+ .name = "nat44-out2in-fast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+ .error_strings = snat_out2in_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);
diff --git a/src/plugins/pppoe.am b/src/plugins/pppoe.am
new file mode 100644
index 00000000..06ed60b4
--- /dev/null
+++ b/src/plugins/pppoe.am
@@ -0,0 +1,39 @@
+# Copyright (c) 2017 Intel and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += pppoe_test_plugin.la
+vppplugins_LTLIBRARIES += pppoe_plugin.la
+
+pppoe_plugin_la_SOURCES = \
+ pppoe/pppoe_decap.c \
+ pppoe/pppoe_tap.c \
+ pppoe/pppoe_tap_node.c \
+ pppoe/pppoe.c \
+ pppoe/pppoe_api.c
+
+BUILT_SOURCES += \
+ pppoe/pppoe.api.h \
+ pppoe/pppoe.api.json
+
+API_FILES += pppoe/pppoe.api
+
+nobase_apiinclude_HEADERS += \
+ pppoe/pppoe_all_api_h.h \
+ pppoe/pppoe_msg_enum.h \
+ pppoe/pppoe.api.h
+
+pppoe_test_plugin_la_SOURCES = \
+ pppoe/pppoe_test.c \
+ pppoe/pppoe_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/pppoe/pppoe.api b/src/plugins/pppoe/pppoe.api
new file mode 100644
index 00000000..e8cd989f
--- /dev/null
+++ b/src/plugins/pppoe/pppoe.api
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Set or delete an PPPOE session
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - client_ip and dst_address is ipv6 or not
+ @param session_id - PPPoE session ID
+ @param client_ip - PPPOE session's client address.
+ @param decap_vrf_id - the vrf index for pppoe decaped packet
+ @param client_mac - the client ethernet address
+*/
+define pppoe_add_del_session
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u16 session_id;
+ u8 client_ip[16];
+ u32 decap_vrf_id;
+ u8 client_mac[6];
+};
+
+/** \brief reply for set or delete an PPPOE session
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+*/
+define pppoe_add_del_session_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Dump PPPOE session
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define pppoe_session_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief dump details of an PPPOE session
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+ @param is_ipv6 - client_ip and dst_address is ipv6 or not
+ @param session_id - PPPoE session ID
+ @param client_ip - PPPOE session's client address.
+ @param encap_if_index - the index of tx interface for pppoe encaped packet
+ @param decap_vrf_id - the vrf index for pppoe decaped packet
+ @param local_mac - the local ethernet address
+ @param client_mac - the client ethernet address
+*/
+define pppoe_session_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u16 session_id;
+ u8 client_ip[16];
+ u32 encap_if_index;
+ u32 decap_vrf_id;
+ u8 local_mac[6];
+ u8 client_mac[6];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/pppoe/pppoe.c b/src/plugins/pppoe/pppoe.c
new file mode 100644
index 00000000..e09ac7d9
--- /dev/null
+++ b/src/plugins/pppoe/pppoe.c
@@ -0,0 +1,739 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/dpo/interface_tx_dpo.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <vnet/ppp/packet.h>
+#include <pppoe/pppoe.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/bihash_template.c>
+
+pppoe_main_t pppoe_main;
+
+u8 *
+format_pppoe_session (u8 * s, va_list * args)
+{
+ pppoe_session_t *t = va_arg (*args, pppoe_session_t *);
+ pppoe_main_t *pem = &pppoe_main;
+
+ s = format (s, "[%d] sw-if-index %d client-ip %U session-id %d ",
+ t - pem->sessions, t->sw_if_index,
+ format_ip46_address, &t->client_ip, IP46_TYPE_ANY,
+ t->session_id);
+
+ s = format (s, "encap-if-index %d decap-fib-index %d\n",
+ t->encap_if_index, t->decap_fib_index);
+
+ s = format (s, " local-mac %U client-mac %U",
+ format_ethernet_address, t->local_mac,
+ format_ethernet_address, t->client_mac);
+
+ return s;
+}
+
+static u8 *
+format_pppoe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "pppoe_session%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+pppoe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return /* no error */ 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (pppoe_device_class,static) = {
+ .name = "PPPPOE",
+ .format_device_name = format_pppoe_name,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = pppoe_interface_admin_up_down,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_pppoe_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+static u8 *
+pppoe_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ int len = sizeof (pppoe_header_t) + sizeof (ethernet_header_t);
+ pppoe_main_t *pem = &pppoe_main;
+ pppoe_session_t *t;
+ u32 session_id;
+ u8 *rw = 0;
+
+ session_id = pem->session_index_by_sw_if_index[sw_if_index];
+ t = pool_elt_at_index (pem->sessions, session_id);
+
+ vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ ethernet_header_t *eth_hdr = (ethernet_header_t *) rw;
+ clib_memcpy (eth_hdr->dst_address, t->client_mac, 6);
+ clib_memcpy (eth_hdr->src_address, t->local_mac, 6);
+ eth_hdr->type = clib_host_to_net_u16 (ETHERNET_TYPE_PPPOE_SESSION);
+
+ pppoe_header_t *pppoe = (pppoe_header_t *) (eth_hdr + 1);
+ pppoe->ver_type = PPPOE_VER_TYPE;
+ pppoe->code = 0;
+ pppoe->session_id = clib_host_to_net_u16 (t->session_id);
+ pppoe->length = 0; /* To be filled in at run-time */
+
+ switch (link_type)
+ {
+ case VNET_LINK_IP4:
+ pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip4);
+ break;
+ case VNET_LINK_IP6:
+ pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip6);
+ break;
+ default:
+ break;
+ }
+
+ return rw;
+}
+
+/**
+ * @brief Fixup the adj rewrite post encap. Insert the packet's length
+ */
+static void
+pppoe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b0)
+{
+ pppoe_header_t *pppoe0;
+
+ pppoe0 = vlib_buffer_get_current (b0);
+
+ pppoe0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (pppoe_header_t)
+ - sizeof (ethernet_header_t));
+}
+
+static void
+pppoe_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+{
+ pppoe_main_t *pem = &pppoe_main;
+ dpo_id_t dpo = DPO_INVALID;
+ ip_adjacency_t *adj;
+ pppoe_session_t *t;
+ u32 session_id;
+
+ ASSERT (ADJ_INDEX_INVALID != ai);
+
+ adj = adj_get (ai);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_nbr_midchain_update_rewrite (ai, pppoe_fixup,
+ ADJ_FLAG_NONE,
+ pppoe_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ NULL));
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ * There's no MAC fixup, so the last 2 parameters are 0
+ */
+ adj_mcast_midchain_update_rewrite (ai, pppoe_fixup,
+ ADJ_FLAG_NONE,
+ pppoe_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ NULL), 0, 0);
+ break;
+
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+
+ session_id = pem->session_index_by_sw_if_index[sw_if_index];
+ t = pool_elt_at_index (pem->sessions, session_id);
+ interface_tx_dpo_add_or_lock (vnet_link_to_dpo_proto (adj->ia_link),
+ t->encap_if_index, &dpo);
+
+ adj_nbr_midchain_stack (ai, &dpo);
+
+ dpo_reset (&dpo);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
+{
+ .name = "PPPPOE",
+ .format_header = format_pppoe_header_with_length,
+ .build_rewrite = pppoe_build_rewrite,
+ .update_adjacency = pppoe_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+#define foreach_copy_field \
+_(session_id) \
+_(encap_if_index) \
+_(decap_fib_index) \
+_(client_ip)
+
+static bool
+pppoe_decap_next_is_valid (pppoe_main_t * pem, u32 is_ip6,
+ u32 decap_fib_index)
+{
+ vlib_main_t *vm = pem->vlib_main;
+ u32 input_idx = (!is_ip6) ? ip4_input_node.index : ip6_input_node.index;
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
+
+ return decap_fib_index < r->n_next_nodes;
+}
+
+int vnet_pppoe_add_del_session
+ (vnet_pppoe_add_del_session_args_t * a, u32 * sw_if_indexp)
+{
+ pppoe_main_t *pem = &pppoe_main;
+ pppoe_session_t *t = 0;
+ vnet_main_t *vnm = pem->vnet_main;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ u32 is_ip6 = a->is_ip6;
+ pppoe_entry_key_t cached_key;
+ pppoe_entry_result_t cached_result;
+ u32 bucket;
+ pppoe_entry_key_t key;
+ pppoe_entry_result_t result;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ fib_prefix_t pfx;
+
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+ memset (&pfx, 0, sizeof (pfx));
+
+ if (!is_ip6)
+ {
+ pfx.fp_addr.ip4.as_u32 = a->client_ip.ip4.as_u32;
+ pfx.fp_len = 32;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ }
+ else
+ {
+ pfx.fp_addr.ip6.as_u64[0] = a->client_ip.ip6.as_u64[0];
+ pfx.fp_addr.ip6.as_u64[1] = a->client_ip.ip6.as_u64[1];
+ pfx.fp_len = 128;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+
+ /* Get encap_if_index and local mac address */
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ a->client_mac, clib_host_to_net_u16 (a->session_id),
+ &key, &bucket, &result);
+ a->encap_if_index = result.fields.sw_if_index;
+
+ if (a->encap_if_index == ~0)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ si = vnet_get_sw_interface (vnm, a->encap_if_index);
+ hi = vnet_get_hw_interface (vnm, si->hw_if_index);
+
+
+ if (a->is_add)
+ {
+ /* adding a session: session must not already exist */
+ if (result.fields.session_index != ~0)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ /*if not set explicitly, default to ip4 */
+ if (!pppoe_decap_next_is_valid (pem, is_ip6, a->decap_fib_index))
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ pool_get_aligned (pem->sessions, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ clib_memcpy (t->local_mac, hi->hw_address, 6);
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_copy_field;
+#undef _
+
+ clib_memcpy (t->client_mac, a->client_mac, 6);
+
+ /* update pppoe fib with session_index */
+ result.fields.session_index = t - pem->sessions;
+ pppoe_update_1 (&pem->session_table,
+ a->client_mac, clib_host_to_net_u16 (a->session_id),
+ &key, &bucket, &result);
+
+ vnet_hw_interface_t *hi;
+ if (vec_len (pem->free_pppoe_session_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t *im = &vnm->interface_main;
+ hw_if_index = pem->free_pppoe_session_hw_if_indices
+ [vec_len (pem->free_pppoe_session_hw_if_indices) - 1];
+ _vec_len (pem->free_pppoe_session_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - pem->sessions;
+ hi->hw_instance = hi->dev_instance;
+
+ /* clear old stats of freed session before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock (im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
+ sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters
+ [VNET_INTERFACE_COUNTER_RX],
+ sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters
+ [VNET_INTERFACE_COUNTER_DROP],
+ sw_if_index);
+ vnet_interface_counter_unlock (im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, pppoe_device_class.index, t - pem->sessions,
+ pppoe_hw_class.index, t - pem->sessions);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ vec_validate_init_empty (pem->session_index_by_sw_if_index, sw_if_index,
+ ~0);
+ pem->session_index_by_sw_if_index[sw_if_index] = t - pem->sessions;
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /* add reverse route for client ip */
+ fib_table_entry_path_add (a->decap_fib_index, &pfx,
+ FIB_SOURCE_PLUGIN_HI, FIB_ENTRY_FLAG_NONE,
+ fib_proto_to_dpo (pfx.fp_proto),
+ &pfx.fp_addr, sw_if_index, ~0,
+ 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+
+ }
+ else
+ {
+ /* deleting a session: session must exist */
+ if (result.fields.session_index == ~0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (pem->sessions, result.fields.session_index);
+ sw_if_index = t->sw_if_index;
+
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
+
+ vec_add1 (pem->free_pppoe_session_hw_if_indices, t->hw_if_index);
+
+ pem->session_index_by_sw_if_index[t->sw_if_index] = ~0;
+
+ /* update pppoe fib with session_inde=~0x */
+ result.fields.session_index = ~0;
+ pppoe_update_1 (&pem->session_table,
+ a->client_mac, clib_host_to_net_u16 (a->session_id),
+ &key, &bucket, &result);
+
+
+ /* delete reverse route for client ip */
+ fib_table_entry_path_remove (a->decap_fib_index, &pfx,
+ FIB_SOURCE_PLUGIN_HI,
+ fib_proto_to_dpo (pfx.fp_proto),
+ &pfx.fp_addr,
+ sw_if_index, ~0, 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ pool_put (pem->sessions, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static clib_error_t *
+pppoe_add_del_session_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u16 session_id = 0;
+ ip46_address_t client_ip;
+ u8 is_add = 1;
+ u8 client_ip_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 encap_if_index = 0;
+ u32 decap_fib_index = 0;
+ u8 client_mac[6] = { 0 };
+ u8 client_mac_set = 0;
+ int rv;
+ u32 tmp;
+ vnet_pppoe_add_del_session_args_t _a, *a = &_a;
+ u32 session_sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&client_ip, 0, sizeof client_ip);
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "session-id %d", &session_id))
+ ;
+ else if (unformat (line_input, "client-ip %U",
+ unformat_ip4_address, &client_ip.ip4))
+ {
+ client_ip_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "client-ip %U",
+ unformat_ip6_address, &client_ip.ip6))
+ {
+ client_ip_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &tmp))
+ {
+ if (ipv6_set)
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
+ else
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
+
+ if (decap_fib_index == ~0)
+ {
+ error =
+ clib_error_return (0, "nonexistent decap fib id %d", tmp);
+ goto done;
+ }
+ }
+ else
+ if (unformat
+ (line_input, "client-mac %U", unformat_ethernet_address,
+ client_mac))
+ client_mac_set = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (client_ip_set == 0)
+ {
+ error =
+ clib_error_return (0, "session client ip address not specified");
+ goto done;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ goto done;
+ }
+
+ if (client_mac_set == 0)
+ {
+ error = clib_error_return (0, "session client mac not specified");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+ a->is_ip6 = ipv6_set;
+
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ clib_memcpy (a->client_mac, client_mac, 6);
+
+ rv = vnet_pppoe_add_del_session (a, &session_sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), session_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "session already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "session does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "vnet_pppoe_add_del_session returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a PPPPOE Session.
+ *
+ * @cliexpar
+ * Example of how to create a PPPPOE Session:
+ * @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
+ * client-mac 00:01:02:03:04:05 }
+ * Example of how to delete a PPPPOE Session:
+ * @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
+ * client-mac 00:01:02:03:04:05 del }
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
+ .path = "create pppoe session",
+ .short_help =
+ "create pppoe session client-ip <client-ip> session-id <nn>"
+ " client-mac <client-mac> [decap-vrf-id <nn>] [del]",
+ .function = pppoe_add_del_session_command_fn,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+static clib_error_t *
+show_pppoe_session_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ pppoe_main_t *pem = &pppoe_main;
+ pppoe_session_t *t;
+
+ if (pool_elts (pem->sessions) == 0)
+ vlib_cli_output (vm, "No pppoe sessions configured...");
+
+ pool_foreach (t, pem->sessions,
+ ({
+ vlib_cli_output (vm, "%U",format_pppoe_session, t);
+ }));
+
+ return 0;
+}
+/* *INDENT-ON* */
+
+/*?
+ * Display all the PPPPOE Session entries.
+ *
+ * @cliexpar
+ * Example of how to display the PPPPOE Session entries:
+ * @cliexstart{show pppoe session}
+ * [0] client-ip 10.0.3.1 session_id 13 encap-if-index 0 decap-vrf-id 13 sw_if_index 5
+ * local-mac a0:b0:c0:d0:e0:f0 client-mac 00:01:02:03:04:05
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pppoe_session_command, static) = {
+ .path = "show pppoe session",
+ .short_help = "show pppoe session",
+ .function = show_pppoe_session_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Display the contents of the PPPoE Fib. */
+static clib_error_t *
+show_pppoe_fib_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pppoe_main_t *pem = &pppoe_main;
+ BVT (clib_bihash) * h = &pem->session_table;
+ BVT (clib_bihash_bucket) * b;
+ BVT (clib_bihash_value) * v;
+ pppoe_entry_key_t key;
+ pppoe_entry_result_t result;
+ u32 first_entry = 1;
+ u64 total_entries = 0;
+ int i, j, k;
+ u8 *s = 0;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ if (first_entry)
+ {
+ first_entry = 0;
+ vlib_cli_output (vm,
+ "%=19s%=12s%=13s%=14s",
+ "Mac-Address", "session_id", "sw_if_index",
+ "session_index");
+ }
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+
+ vlib_cli_output (vm,
+ "%=19U%=12d%=13d%=14d",
+ format_ethernet_address, key.fields.mac,
+ clib_net_to_host_u16 (key.fields.session_id),
+ result.fields.sw_if_index == ~0
+ ? -1 : result.fields.sw_if_index,
+ result.fields.session_index == ~0
+ ? -1 : result.fields.session_index);
+ vec_reset_length (s);
+ total_entries++;
+ }
+ v++;
+ }
+ }
+
+ if (total_entries == 0)
+ vlib_cli_output (vm, "no pppoe fib entries");
+ else
+ vlib_cli_output (vm, "%lld pppoe fib entries", total_entries);
+
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command dispays the MAC Address entries of the PPPoE FIB table.
+ * Output can be filtered to just get the number of MAC Addresses or display
+ * each MAC Address.
+ *
+ * @cliexpar
+ * Example of how to display the number of MAC Address entries in the PPPoE
+ * FIB table:
+ * @cliexstart{show pppoe fib}
+ * Mac Address session_id Interface sw_if_index session_index
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0 2 0
+ * 52:54:00:53:18:55 2 GigabitEthernet0/8/1 3 1
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pppoe_fib_command, static) = {
+ .path = "show pppoe fib",
+ .short_help = "show pppoe fib",
+ .function = show_pppoe_fib_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+pppoe_init (vlib_main_t * vm)
+{
+ pppoe_main_t *pem = &pppoe_main;
+
+ pem->vnet_main = vnet_get_main ();
+ pem->vlib_main = vm;
+
+ /* Create the hash table */
+ BV (clib_bihash_init) (&pem->session_table, "pppoe session table",
+ PPPOE_NUM_BUCKETS, PPPOE_MEMORY_SIZE);
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_PPPOE_SESSION,
+ pppoe_input_node.index);
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_PPPOE_DISCOVERY,
+ pppoe_tap_dispatch_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (pppoe_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "PPPoE",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/pppoe/pppoe.h b/src/plugins/pppoe/pppoe.h
new file mode 100644
index 00000000..b06c068f
--- /dev/null
+++ b/src/plugins/pppoe/pppoe.h
@@ -0,0 +1,289 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _PPPOE_H
+#define _PPPOE_H
+
+#include <vnet/plugin/plugin.h>
+#include <vppinfra/lock.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_table.h>
+#include <vlib/vlib.h>
+#include <vppinfra/bihash_8_8.h>
+
+
+typedef struct
+{
+ u8 ver_type;
+ u8 code;
+ u16 session_id;
+ u16 length;
+ u16 ppp_proto;
+} pppoe_header_t;
+
+#define PPPOE_VER_TYPE 0x11
+#define PPPOE_PADS 0x65
+
+typedef struct
+{
+ /* pppoe session_id in HOST byte order */
+ u16 session_id;
+
+ /* session client addresses */
+ ip46_address_t client_ip;
+
+ /* the index of tx interface for pppoe encaped packet */
+ u32 encap_if_index;
+
+ /** FIB indices - inner IP packet lookup here */
+ u32 decap_fib_index;
+
+ u8 local_mac[6];
+ u8 client_mac[6];
+
+ /* vnet intfc index */
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+} pppoe_session_t;
+
+#define foreach_pppoe_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input" ) \
+_(CP_INPUT, "pppoe-tap-dispatch" ) \
+
+typedef enum
+{
+#define _(s,n) PPPOE_INPUT_NEXT_##s,
+ foreach_pppoe_input_next
+#undef _
+ PPPOE_INPUT_N_NEXT,
+} pppoe_input_next_t;
+
+typedef enum
+{
+#define pppoe_error(n,s) PPPOE_ERROR_##n,
+#include <pppoe/pppoe_error.def>
+#undef pppoe_error
+ PPPOE_N_ERROR,
+} pppoe_input_error_t;
+
+
+#define MTU 1500
+#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE)
+#define NUM_BUFFERS_TO_ALLOC 32
+
+/*
+ * The size of pppoe session table
+ */
+#define PPPOE_NUM_BUCKETS (128 * 1024)
+#define PPPOE_MEMORY_SIZE (16<<20)
+
+/* *INDENT-OFF* */
+/*
+ * The PPPoE key is the mac address and session ID
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u16 session_id;
+ u8 mac[6];
+ } fields;
+ struct
+ {
+ u32 w0;
+ u32 w1;
+ } words;
+ u64 raw;
+ };
+} pppoe_entry_key_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+/*
+ * The PPPoE entry results
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 sw_if_index;
+
+ u32 session_index;
+
+ } fields;
+ u64 raw;
+ };
+} pppoe_entry_result_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* For DP: vector of encap session instances, */
+ pppoe_session_t *sessions;
+
+ /* For CP: vector of CP path */
+ BVT (clib_bihash) session_table;
+
+ /* Free vlib hw_if_indices */
+ u32 *free_pppoe_session_hw_if_indices;
+
+ /* Mapping from sw_if_index to session index */
+ u32 *session_index_by_sw_if_index;
+
+ /* used for pppoe cp path */
+ u32 tap_if_index;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+} pppoe_main_t;
+
+extern pppoe_main_t pppoe_main;
+
+extern vlib_node_registration_t pppoe_input_node;
+extern vlib_node_registration_t pppoe_tap_dispatch_node;
+
+typedef struct
+{
+ u8 is_add;
+ u8 is_ip6;
+ u16 session_id;
+ ip46_address_t client_ip;
+ u32 encap_if_index;
+ u32 decap_fib_index;
+ u8 local_mac[6];
+ u8 client_mac[6];
+} vnet_pppoe_add_del_session_args_t;
+
+int vnet_pppoe_add_del_session
+ (vnet_pppoe_add_del_session_args_t * a, u32 * sw_if_indexp);
+
+typedef struct
+{
+ u8 is_add;
+ u32 client_if_index;
+ u32 tap_if_index;
+} vnet_pppoe_add_del_tap_args_t;
+
+always_inline u64
+pppoe_make_key (u8 * mac_address, u16 session_id)
+{
+ u64 temp;
+
+ /*
+ * The mac address in memory is A:B:C:D:E:F
+ * The session_id in register is H:L
+ */
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ /*
+ * Create the in-register key as F:E:D:C:B:A:H:L
+ * In memory the key is L:H:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) << 16;
+ temp = (temp & ~0xffff) | (u64) (session_id);
+#else
+ /*
+ * Create the in-register key as H:L:A:B:C:D:E:F
+ * In memory the key is H:L:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) >> 16;
+ temp = temp | (((u64) session_id) << 48);
+#endif
+
+ return temp;
+}
+
+static_always_inline void
+pppoe_lookup_1 (BVT (clib_bihash) * session_table,
+ pppoe_entry_key_t * cached_key,
+ pppoe_entry_result_t * cached_result,
+ u8 * mac0,
+ u16 session_id0,
+ pppoe_entry_key_t * key0,
+ u32 * bucket0, pppoe_entry_result_t * result0)
+{
+ /* set up key */
+ key0->raw = pppoe_make_key (mac0, session_id0);
+ *bucket0 = ~0;
+
+ if (key0->raw == cached_key->raw)
+ {
+ /* Hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ }
+ else
+ {
+ /* Do a regular session table lookup */
+ BVT (clib_bihash_kv) kv;
+
+ kv.key = key0->raw;
+ kv.value = ~0ULL;
+ BV (clib_bihash_search_inline) (session_table, &kv);
+ result0->raw = kv.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key0->raw;
+ cached_result->raw = result0->raw;
+ }
+}
+
+static_always_inline void
+pppoe_update_1 (BVT (clib_bihash) * session_table,
+ u8 * mac0,
+ u16 session_id0,
+ pppoe_entry_key_t * key0,
+ u32 * bucket0, pppoe_entry_result_t * result0)
+{
+ /* set up key */
+ key0->raw = pppoe_make_key (mac0, session_id0);
+ *bucket0 = ~0;
+
+ /* Update the entry */
+ BVT (clib_bihash_kv) kv;
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+ BV (clib_bihash_add_del) (session_table, &kv, 1 /* is_add */ );
+
+}
+#endif /* _PPPOE_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/pppoe/pppoe_all_api_h.h b/src/plugins/pppoe/pppoe_all_api_h.h
new file mode 100644
index 00000000..393c7680
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_all_api_h.h
@@ -0,0 +1,18 @@
+/*
+ * pppoe_all_api_h.h - plug-in api #include file
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <pppoe/pppoe.api.h>
diff --git a/src/plugins/pppoe/pppoe_api.c b/src/plugins/pppoe/pppoe_api.c
new file mode 100644
index 00000000..9b758460
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_api.c
@@ -0,0 +1,224 @@
+/*
+ *------------------------------------------------------------------
+ * pppoe_api.c - pppoe api
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vppinfra/byte_order.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <pppoe/pppoe.h>
+
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <pppoe/pppoe.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <pppoe/pppoe.api.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <pppoe/pppoe.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <pppoe/pppoe.api.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <pppoe/pppoe.api.h>
+#undef vl_api_version
+
+#define vl_msg_name_crc_list
+#include <pppoe/pppoe.api.h>
+#undef vl_msg_name_crc_list
+
+#define REPLY_MSG_ID_BASE pem->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+setup_message_id_table (pppoe_main_t * pem, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + pem->msg_id_base);
+ foreach_vl_msg_name_crc_pppoe;
+#undef _
+}
+
+#define foreach_pppoe_plugin_api_msg \
+_(PPPOE_ADD_DEL_SESSION, pppoe_add_del_session) \
+_(PPPOE_SESSION_DUMP, pppoe_session_dump)
+
+static void vl_api_pppoe_add_del_session_t_handler
+ (vl_api_pppoe_add_del_session_t * mp)
+{
+ vl_api_pppoe_add_del_session_reply_t *rmp;
+ int rv = 0;
+ u32 decap_fib_index;
+ ip4_main_t *im = &ip4_main;
+ pppoe_main_t *pem = &pppoe_main;
+
+ uword *p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
+ goto out;
+ }
+ decap_fib_index = p[0];
+
+ vnet_pppoe_add_del_session_args_t a = {
+ .is_add = mp->is_add,
+ .is_ip6 = mp->is_ipv6,
+ .decap_fib_index = decap_fib_index,
+ .session_id = ntohs (mp->session_id),
+ .client_ip = to_ip46 (mp->is_ipv6, mp->client_ip),
+ };
+ clib_memcpy (a.client_mac, mp->client_mac, 6);
+
+ u32 sw_if_index = ~0;
+ rv = vnet_pppoe_add_del_session (&a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_PPPOE_ADD_DEL_SESSION_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_pppoe_session_details
+ (pppoe_session_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_pppoe_session_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !ip46_address_is_ip4 (&t->client_ip);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_PPPOE_SESSION_DETAILS);
+ if (is_ipv6)
+ {
+ memcpy (rmp->client_ip, t->client_ip.ip6.as_u8, 16);
+ rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
+ }
+ else
+ {
+ memcpy (rmp->client_ip, t->client_ip.ip4.as_u8, 4);
+ rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
+ }
+ rmp->session_id = htons (t->session_id);
+ rmp->encap_if_index = htonl (t->encap_if_index);
+ clib_memcpy (rmp->local_mac, t->local_mac, 6);
+ clib_memcpy (rmp->client_mac, t->client_mac, 6);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->is_ipv6 = is_ipv6;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_pppoe_session_dump_t_handler (vl_api_pppoe_session_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ pppoe_main_t *pem = &pppoe_main;
+ pppoe_session_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, pem->sessions,
+ ({
+ send_pppoe_session_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (pem->session_index_by_sw_if_index)) ||
+ (~0 == pem->session_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &pem->sessions[pem->session_index_by_sw_if_index[sw_if_index]];
+ send_pppoe_session_details (t, q, mp->context);
+ }
+}
+
+
+static clib_error_t *
+pppoe_api_hookup (vlib_main_t * vm)
+{
+ pppoe_main_t *pem = &pppoe_main;
+
+ u8 *name = format (0, "pppoe_%08x%c", api_version, 0);
+ pem->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + pem->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_pppoe_plugin_api_msg;
+#undef _
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (pem, &api_main);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (pppoe_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/pppoe/pppoe_decap.c b/src/plugins/pppoe/pppoe_decap.c
new file mode 100644
index 00000000..02c82711
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_decap.c
@@ -0,0 +1,422 @@
+/*
+ * decap.c: pppoe session decap packet processing
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ppp/packet.h>
+#include <pppoe/pppoe.h>
+
+typedef struct {
+ u32 next_index;
+ u32 session_index;
+ u32 session_id;
+ u32 error;
+} pppoe_rx_trace_t;
+
+static u8 * format_pppoe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pppoe_rx_trace_t * t = va_arg (*args, pppoe_rx_trace_t *);
+
+ if (t->session_index != ~0)
+ {
+ s = format (s, "PPPoE decap from pppoe_session%d session_id %d next %d error %d",
+ t->session_index, t->session_id, t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "PPPoE decap error - session for session_id %d does not exist",
+ t->session_id);
+ }
+ return s;
+}
+
+static uword
+pppoe_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ pppoe_main_t * pem = &pppoe_main;
+ vnet_main_t * vnm = pem->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ pppoe_entry_key_t cached_key;
+ pppoe_entry_result_t cached_result;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ /* Clear the one-entry cache in case session table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ ethernet_header_t *h0, *h1;
+ pppoe_header_t * pppoe0, * pppoe1;
+ u16 ppp_proto0 = 0, ppp_proto1 = 0;
+ pppoe_session_t * t0, * t1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+ pppoe_entry_key_t key0, key1;
+ pppoe_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ error0 = 0;
+ error1 = 0;
+
+ /* leaves current_data pointing at the pppoe header */
+ pppoe0 = vlib_buffer_get_current (b0);
+ pppoe1 = vlib_buffer_get_current (b1);
+ ppp_proto0 = clib_net_to_host_u16(pppoe0->ppp_proto);
+ ppp_proto1 = clib_net_to_host_u16(pppoe1->ppp_proto);
+
+ /* Manipulate packet 0 */
+ if ((ppp_proto0 != PPP_PROTOCOL_ip4)
+ && (ppp_proto0 != PPP_PROTOCOL_ip6))
+ {
+ error0 = PPPOE_ERROR_CONTROL_PLANE;
+ next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ goto trace0;
+ }
+
+ /* get client mac */
+ vlib_buffer_reset(b0);
+ h0 = vlib_buffer_get_current (b0);
+
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ h0->src_address, pppoe0->session_id,
+ &key0, &bucket0, &result0);
+ if (PREDICT_FALSE (result0.fields.session_index == ~0))
+ {
+ error0 = PPPOE_ERROR_NO_SUCH_SESSION;
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ t0 = pool_elt_at_index (pem->sessions,
+ result0.fields.session_index);
+
+ /* Pop Eth and PPPPoE header */
+ vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*pppoe0));
+
+ next0 = (ppp_proto0==PPP_PROTOCOL_ip4)?
+ PPPOE_INPUT_NEXT_IP4_INPUT
+ : PPPOE_INPUT_NEXT_IP6_INPUT;
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same pppoe session so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pppoe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->session_index = result0.fields.session_index;
+ tr->session_id = clib_net_to_host_u32(pppoe0->session_id);
+ }
+
+
+ /* Manipulate packet 1 */
+ if ((ppp_proto1 != PPP_PROTOCOL_ip4)
+ && (ppp_proto1 != PPP_PROTOCOL_ip6))
+ {
+ error1 = PPPOE_ERROR_CONTROL_PLANE;
+ next1 = PPPOE_INPUT_NEXT_CP_INPUT;
+ goto trace1;
+ }
+
+ /* get client mac */
+ vlib_buffer_reset(b1);
+ h1 = vlib_buffer_get_current (b1);
+
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ h1->src_address, pppoe1->session_id,
+ &key1, &bucket1, &result1);
+ if (PREDICT_FALSE (result1.fields.session_index == ~0))
+ {
+ error1 = PPPOE_ERROR_NO_SUCH_SESSION;
+ next1 = PPPOE_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ t1 = pool_elt_at_index (pem->sessions,
+ result1.fields.session_index);
+
+ /* Pop Eth and PPPPoE header */
+ vlib_buffer_advance(b1, sizeof(*h1)+sizeof(*pppoe1));
+
+ next1 = (ppp_proto1==PPP_PROTOCOL_ip4)?
+ PPPOE_INPUT_NEXT_IP4_INPUT
+ : PPPOE_INPUT_NEXT_IP6_INPUT;
+
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same pppoe session so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+
+ trace1:
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pppoe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->session_index = result1.fields.session_index;
+ tr->session_id = clib_net_to_host_u32(pppoe1->session_id);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ethernet_header_t *h0;
+ pppoe_header_t * pppoe0;
+ u16 ppp_proto0 = 0;
+ pppoe_session_t * t0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+ pppoe_entry_key_t key0;
+ pppoe_entry_result_t result0;
+ u32 bucket0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ error0 = 0;
+
+ /* leaves current_data pointing at the pppoe header */
+ pppoe0 = vlib_buffer_get_current (b0);
+ ppp_proto0 = clib_net_to_host_u16(pppoe0->ppp_proto);
+
+ if ((ppp_proto0 != PPP_PROTOCOL_ip4)
+ && (ppp_proto0 != PPP_PROTOCOL_ip6))
+ {
+ error0 = PPPOE_ERROR_CONTROL_PLANE;
+ next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ goto trace00;
+ }
+
+ /* get client mac */
+ vlib_buffer_reset(b0);
+ h0 = vlib_buffer_get_current (b0);
+
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ h0->src_address, pppoe0->session_id,
+ &key0, &bucket0, &result0);
+ if (PREDICT_FALSE (result0.fields.session_index == ~0))
+ {
+ error0 = PPPOE_ERROR_NO_SUCH_SESSION;
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ t0 = pool_elt_at_index (pem->sessions,
+ result0.fields.session_index);
+
+ /* Pop Eth and PPPPoE header */
+ vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*pppoe0));
+
+ next0 = (ppp_proto0==PPP_PROTOCOL_ip4)?
+ PPPOE_INPUT_NEXT_IP4_INPUT
+ : PPPOE_INPUT_NEXT_IP6_INPUT;
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same pppoe session so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pppoe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->session_index = result0.fields.session_index;
+ tr->session_id = clib_net_to_host_u16(pppoe0->session_id);
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ /* Do we still need this now that session tx stats is kept? */
+ vlib_node_increment_counter (vm, pppoe_input_node.index,
+ PPPOE_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char * pppoe_error_strings[] = {
+#define pppoe_error(n,s) s,
+#include <pppoe/pppoe_error.def>
+#undef pppoe_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pppoe_input_node) = {
+ .function = pppoe_input,
+ .name = "pppoe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PPPOE_N_ERROR,
+ .error_strings = pppoe_error_strings,
+
+ .n_next_nodes = PPPOE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PPPOE_INPUT_NEXT_##s] = n,
+ foreach_pppoe_input_next
+#undef _
+ },
+
+ .format_trace = format_pppoe_rx_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (pppoe_input_node, pppoe_input)
+
+
diff --git a/src/plugins/pppoe/pppoe_error.def b/src/plugins/pppoe/pppoe_error.def
new file mode 100644
index 00000000..a875afd0
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_error.def
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+pppoe_error (DECAPSULATED, "good packets decapsulated")
+pppoe_error (CONTROL_PLANE, "control plane packet")
+pppoe_error (NO_SUCH_SESSION, "no such sessions")
+pppoe_error (BAD_VER_TYPE, "bad version and type in pppoe header")
diff --git a/src/plugins/pppoe/pppoe_msg_enum.h b/src/plugins/pppoe/pppoe_msg_enum.h
new file mode 100644
index 00000000..7ca19189
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_msg_enum.h
@@ -0,0 +1,31 @@
+/*
+ * pppoe_msg_enum.h - vpp engine plug-in message enumeration
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_pppoe_msg_enum_h
+#define included_pppoe_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <pppoe/pppoe_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_pppoe_msg_enum_h */
diff --git a/src/plugins/pppoe/pppoe_tap.c b/src/plugins/pppoe/pppoe_tap.c
new file mode 100644
index 00000000..60cdaafb
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_tap.c
@@ -0,0 +1,89 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <pppoe/pppoe.h>
+#include <vnet/unix/tapcli.h>
+
+static clib_error_t *
+pppoe_add_del_tap_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ pppoe_main_t *pem = &pppoe_main;
+ u8 is_add = 1;
+ u8 tap_if_index_set = 0;
+ u32 tap_if_index = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "tap-if-index %d", &tap_if_index))
+ tap_if_index_set = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (tap_if_index_set == 0)
+ {
+ error = clib_error_return (0, "tap if index not specified");
+ goto done;
+ }
+
+ if (is_add)
+ {
+ pem->tap_if_index = tap_if_index;
+ }
+ else
+ {
+ pem->tap_if_index = ~0;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_pppoe_tap_cmd, static) =
+{
+ .path = "create pppoe tap",
+ .short_help = "create pppoe tap if-name <intfc> [del]",
+ .function = pppoe_add_del_tap_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/pppoe/pppoe_tap_node.c b/src/plugins/pppoe/pppoe_tap_node.c
new file mode 100644
index 00000000..f1e0a501
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_tap_node.c
@@ -0,0 +1,297 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or pemplied.
+ * See the License for the specific language governing permissions and
+ * lpemitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ppp/packet.h>
+#include <pppoe/pppoe.h>
+
+vlib_node_registration_t pppoe_tap_dispatch_node;
+
+#define foreach_pppoe_tap_next \
+_(DROP, "error-drop") \
+_(TUNTAP, "tuntap-tx" ) \
+_(INTERFACE, "interface-output" ) \
+
+typedef enum
+{
+#define _(s,n) PPPOE_TAP_NEXT_##s,
+ foreach_pppoe_tap_next
+#undef _
+ PPPOE_TAP_N_NEXT,
+} pppoe_tap_next_t;
+
+typedef struct {
+ u32 next_index;
+ u32 sw_if_index;
+ u32 tap_if_index;
+ u8 pppoe_code;
+ u16 ppp_proto;
+ u32 error;
+} pppoe_tap_trace_t;
+
+static u8 * format_pppoe_tap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pppoe_tap_trace_t * t = va_arg (*args, pppoe_tap_trace_t *);
+ pppoe_main_t * pem = &pppoe_main;
+
+ if (t->sw_if_index != pem->tap_if_index)
+ {
+ s = format (s, "PPPoE dispatch from sw_if_index %d next %d error %d \n"
+ " pppoe_code 0x%x ppp_proto 0x%x",
+ t->sw_if_index, t->next_index, t->error,
+ t->pppoe_code, t->ppp_proto);
+ }
+ else
+ {
+ s = format (s, "PPPoE dispatch from tap_if_index %d next %d error %d \n"
+ " pppoe_code 0x%x ppp_proto 0x%x",
+ t->tap_if_index, t->next_index, t->error,
+ t->pppoe_code, t->ppp_proto);
+ }
+ return s;
+}
+
+/**
+ * Perform learning on one packet based on the mac table lookup result.
+ * */
+static_always_inline void
+pppoe_learn_process (vlib_node_runtime_t * node,
+ pppoe_main_t * pem,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ pppoe_entry_key_t * key0,
+ pppoe_entry_key_t * cached_key,
+ u32 * bucket0,
+ pppoe_entry_result_t * result0)
+{
+ /* Check mac table lookup result */
+ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0))
+ {
+ /*
+ * The entry was in the table, and the sw_if_index matched, the normal case
+ */
+ return;
+ }
+ else if (result0->fields.sw_if_index == ~0)
+ {
+ /* The entry was not in table, so add it */
+ result0->fields.sw_if_index = sw_if_index0;
+ result0->fields.session_index = ~0;
+ cached_key->raw = ~0; /* invalidate the cache */
+ }
+ else
+ {
+ /* The entry was in the table, but with the wrong sw_if_index mapping (mac move) */
+ result0->fields.sw_if_index = sw_if_index0;
+ }
+
+ /* Update the entry */
+ BVT (clib_bihash_kv) kv;
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+ BV (clib_bihash_add_del) (&pem->session_table, &kv, 1 /* is_add */ );
+}
+
+static uword
+pppoe_tap_dispatch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ pppoe_main_t * pem = &pppoe_main;
+ vnet_main_t * vnm = pem->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ pppoe_entry_key_t cached_key;
+ pppoe_entry_result_t cached_result;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ /* Clear the one-entry cache in case session table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ethernet_header_t *h0;
+ pppoe_header_t * pppoe0;
+ pppoe_entry_key_t key0;
+ pppoe_entry_result_t result0;
+
+ u32 bucket0;
+ u32 next0;
+ u32 error0 = 0;
+ u32 rx_sw_if_index0=~0, tx_sw_if_index0=~0, len0;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ /* leaves current_data pointing at the pppoe header */
+ pppoe0 = vlib_buffer_get_current (b0);
+ rx_sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE (pppoe0->ver_type != PPPOE_VER_TYPE))
+ {
+ error0 = PPPOE_ERROR_BAD_VER_TYPE;
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ vlib_buffer_reset(b0);
+ h0 = vlib_buffer_get_current (b0);
+
+ if(rx_sw_if_index0 == pem->tap_if_index)
+ {
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ h0->dst_address, 0,
+ &key0, &bucket0, &result0);
+ tx_sw_if_index0 = result0.fields.sw_if_index;
+
+ if (PREDICT_FALSE (tx_sw_if_index0 == ~0))
+ {
+ error0 = PPPOE_ERROR_NO_SUCH_SESSION;
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ next0 = PPPOE_TAP_NEXT_INTERFACE;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /* set src mac address */
+ si = vnet_get_sw_interface(vnm, tx_sw_if_index0);
+ hi = vnet_get_hw_interface (vnm, si->hw_if_index);
+ clib_memcpy (vlib_buffer_get_current (b0)+6, hi->hw_address, 6);
+ }
+ else
+ {
+ pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
+ h0->src_address, pppoe0->session_id,
+ &key0, &bucket0, &result0);
+ tx_sw_if_index0 = result0.fields.sw_if_index;
+
+ /* learn client session */
+ pppoe_learn_process (node, pem, b0, rx_sw_if_index0,
+ &key0, &cached_key,
+ &bucket0, &result0);
+
+ next0 = PPPOE_TAP_NEXT_TUNTAP;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = pem->tap_if_index;
+ }
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same pppoe session so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (rx_sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = rx_sw_if_index0;
+ }
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pppoe_tap_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->sw_if_index = tx_sw_if_index0;
+ tr->tap_if_index = pem->tap_if_index;
+ tr->pppoe_code = pppoe0->code;
+ tr->ppp_proto = clib_net_to_host_u16(pppoe0->ppp_proto);
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ /* Do we still need this now that session tx stats is kept? */
+ vlib_node_increment_counter (vm, pppoe_input_node.index,
+ PPPOE_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (pppoe_tap_dispatch_node) = {
+ .function = pppoe_tap_dispatch,
+ .name = "pppoe-tap-dispatch",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = PPPOE_TAP_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PPPOE_TAP_NEXT_##s] = n,
+ foreach_pppoe_tap_next
+#undef _
+ },
+
+ .format_trace = format_pppoe_tap_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (pppoe_tap_dispatch_node, pppoe_tap_dispatch)
+
diff --git a/src/plugins/pppoe/pppoe_test.c b/src/plugins/pppoe/pppoe_test.c
new file mode 100644
index 00000000..2b67d989
--- /dev/null
+++ b/src/plugins/pppoe/pppoe_test.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <pppoe/pppoe.h>
+
+#define __plugin_msg_base pppoe_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+
+uword unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
+ ip46_address_mask_ip4(ip46);
+ return 1;
+ } else if ((type != IP46_TYPE_IP4) &&
+ unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
+ return 1;
+ }
+ return 0;
+}
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+/////////////////////////
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <pppoe/pppoe.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <pppoe/pppoe.api.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <pppoe/pppoe.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <pppoe/pppoe.api.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <pppoe/pppoe.api.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} pppoe_test_main_t;
+
+pppoe_test_main_t pppoe_test_main;
+
+static void vl_api_pppoe_add_del_session_reply_t_handler
+ (vl_api_pppoe_add_del_session_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+ _(PPPOE_ADD_DEL_SESSION_REPLY, pppoe_add_del_session_reply) \
+ _(PPPOE_SESSION_DETAILS, pppoe_session_details)
+
+
+static int
+api_pppoe_add_del_session (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_pppoe_add_del_session_t *mp;
+ u16 session_id = 0;
+ ip46_address_t client_ip;
+ u8 is_add = 1;
+ u8 client_ip_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 decap_vrf_id = 0;
+ u8 client_mac[6] = { 0 };
+ u8 client_mac_set = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&client_ip, 0, sizeof client_ip);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "session_id %d", &session_id))
+ ;
+ else if (unformat (line_input, "client-ip %U",
+ unformat_ip4_address, &client_ip.ip4))
+ {
+ client_ip_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "client-ip %U",
+ unformat_ip6_address, &client_ip.ip6))
+ {
+ client_ip_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &decap_vrf_id))
+ ;
+ else if (unformat (line_input, "client-mac %U", unformat_ethernet_address, client_mac))
+ client_mac_set = 1;
+ else
+ {
+ return -99;
+ }
+ }
+
+ if (client_ip_set == 0)
+ {
+ errmsg ("session client_ip address not specified");
+ return -99;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (client_mac_set == 0)
+ {
+ errmsg("session client mac not specified");
+ return -99;
+ }
+
+ M (PPPOE_ADD_DEL_SESSION, mp);
+
+ if (ipv6_set)
+ {
+ clib_memcpy (mp->client_ip, &client_ip.ip6, sizeof (client_ip.ip6));
+ }
+ else
+ {
+ clib_memcpy (mp->client_ip, &client_ip.ip4, sizeof (client_ip.ip4));
+ }
+
+ mp->decap_vrf_id = ntohl (decap_vrf_id);
+ mp->session_id = ntohl (session_id);
+ mp->is_add = is_add;
+ mp->is_ipv6 = ipv6_set;
+ memcpy (mp->client_mac, client_mac, 6);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_pppoe_session_details_t_handler
+ (vl_api_pppoe_session_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t client_ip = to_ip46 (mp->is_ipv6, mp->client_ip);
+
+ print (vam->ofp, "%11d%14d%24U%14d%14d%30U%30U",
+ ntohl (mp->sw_if_index), ntohl (mp->session_id),
+ format_ip46_address, &client_ip, IP46_TYPE_ANY,
+ ntohl (mp->encap_if_index), ntohl (mp->decap_vrf_id),
+ format_ethernet_address, mp->local_mac,
+ format_ethernet_address, mp->client_mac);
+}
+
+static int
+api_pppoe_session_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_pppoe_session_dump_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%14s%14s%14s",
+ "sw_if_index", "client_ip", "session_id",
+ "encap_if_index", "decap_fib_index",
+ "local-mac", "client-mac");
+ }
+
+ /* Get list of pppoe-session interfaces */
+ M (PPPOE_SESSION_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ W (ret);
+ return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(pppoe_add_del_session, \
+ " client-addr <client-addr> session-id <nn>" \
+ " [encap-if-index <nn>] [decap-next [ip4|ip6|node <name>]]" \
+ " local-mac <local-mac> client-mac <client-mac> [del]") \
+_(pppoe_session_dump, "[<intfc> | sw_if_index <nn>]") \
+
+static void
+pppoe_vat_api_hookup (vat_main_t *vam)
+{
+ pppoe_test_main_t * pem = &pppoe_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + pem->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ pppoe_test_main_t * pem = &pppoe_test_main;
+
+ u8 * name;
+
+ pem->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "pppoe_%08x%c", api_version, 0);
+ pem->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (pem->msg_id_base != (u16) ~0)
+ pppoe_vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/sixrd.am b/src/plugins/sixrd.am
new file mode 100644
index 00000000..0de45088
--- /dev/null
+++ b/src/plugins/sixrd.am
@@ -0,0 +1,26 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+libsixrd_plugin_la_SOURCES = \
+ sixrd/sixrd.c \
+ sixrd/sixrd_dpo.c \
+ sixrd/ip4_sixrd.c \
+ sixrd/ip6_sixrd.c
+
+noinst_HEADERS += \
+ sixrd/sixrd.h \
+ sixrd/sixrd_dpo.h
+
+vppplugins_LTLIBRARIES += libsixrd_plugin.la
+
+# vi:syntax=automake
diff --git a/src/plugins/sixrd/ip4_sixrd.c b/src/plugins/sixrd/ip4_sixrd.c
new file mode 100644
index 00000000..2fb8015d
--- /dev/null
+++ b/src/plugins/sixrd/ip4_sixrd.c
@@ -0,0 +1,127 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+#include "sixrd.h"
+
+static vlib_node_registration_t ip4_sixrd_node;
+
+typedef enum {
+ IP4_SIXRD_NEXT_IP6_LOOKUP,
+ IP4_SIXRD_NEXT_DROP,
+ IP4_SIXRD_N_NEXT,
+} ip4_sixrd_next_t;
+
+/*
+ * ip4_sixrd_sec_check
+ */
+static_always_inline void
+ip4_sixrd_sec_check (sixrd_domain_t *d, ip4_address_t sa4, ip6_address_t sa6, u8 *error)
+{
+ u32 a = sixrd_get_addr(d, sa6.as_u64[0]);
+ clib_warning("Security check: %U %U", format_ip4_address, &a, format_ip4_address, &sa4);
+ if (PREDICT_FALSE(sixrd_get_addr(d, sa6.as_u64[0]) != sa4.as_u32))
+ *error = SIXRD_ERROR_SEC_CHECK;
+}
+
+/*
+ * ip4_sixrd
+ */
+static uword
+ip4_sixrd (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_sixrd_node.index);
+ u32 decap = 0;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = SIXRD_ERROR_NONE;
+ sixrd_domain_t *d0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ u32 sixrd_domain_index0 = ~0;
+ u32 next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip40 = vlib_buffer_get_current(p0);
+
+ /* Throw away anything that isn't IP in IP. */
+ if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_IPV6 && clib_net_to_host_u16(ip40->length) >= 60)) {
+ vlib_buffer_advance(p0, sizeof(ip4_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ d0 = ip4_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip6_address_t *)&ip60->src_address,
+ &sixrd_domain_index0, &error0);
+ } else {
+ error0 = SIXRD_ERROR_BAD_PROTOCOL;
+ }
+ if (d0) {
+ /* SIXRD inbound security check */
+ ip4_sixrd_sec_check(d0, ip40->src_address, ip60->src_address, &error0);
+ }
+
+ next0 = error0 == SIXRD_ERROR_NONE ? IP4_SIXRD_NEXT_IP6_LOOKUP : IP4_SIXRD_NEXT_DROP;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->sixrd_domain_index = sixrd_domain_index0;
+ }
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) decap++;
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter(vm, ip4_sixrd_node.index, SIXRD_ERROR_DECAPSULATED, decap);
+
+ return frame->n_vectors;
+}
+
+static char *sixrd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sixrd_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip4_sixrd_node,static) = {
+ .function = ip4_sixrd,
+ .name = "ip4-sixrd",
+ .vector_size = sizeof(u32),
+ .format_trace = format_sixrd_trace,
+ .n_errors = SIXRD_N_ERROR,
+ .error_strings = sixrd_error_strings,
+ .n_next_nodes = IP4_SIXRD_N_NEXT,
+ .next_nodes = {
+ [IP4_SIXRD_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_SIXRD_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/src/plugins/sixrd/ip6_sixrd.c b/src/plugins/sixrd/ip6_sixrd.c
new file mode 100644
index 00000000..36f3fab3
--- /dev/null
+++ b/src/plugins/sixrd/ip6_sixrd.c
@@ -0,0 +1,129 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * Defines used for testing various optimisation schemes
+ */
+#define SIXRD_ENCAP_DUAL 0
+
+#include "sixrd.h"
+
+static vlib_node_registration_t ip6_sixrd_node;
+
+typedef enum {
+ IP6_SIXRD_NEXT_IP4_LOOKUP,
+ IP6_SIXRD_NEXT_DROP,
+ IP6_SIXRD_N_NEXT,
+} ip6_sixrd_next_t;
+
+/*
+ * ip6_sixrd
+ */
+static uword
+ip6_sixrd (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_sixrd_node.index);
+ u32 encap = 0;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ sixrd_domain_t *d0;
+ u8 error0 = SIXRD_ERROR_NONE;
+ ip6_header_t *ip60;
+ ip4_header_t *ip4h0;
+ u32 next0 = IP6_SIXRD_NEXT_IP4_LOOKUP;
+ u32 sixrd_domain_index0 = ~0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ // p0->current_length = clib_net_to_host_u16(ip40->length);
+ d0 = ip6_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &sixrd_domain_index0);
+ ASSERT(d0);
+
+ /* SIXRD calc */
+ u64 dal60 = clib_net_to_host_u64(ip60->dst_address.as_u64[0]);
+ u32 da40 = sixrd_get_addr(d0, dal60);
+ u16 len = clib_net_to_host_u16(ip60->payload_length) + 60;
+ if (da40 == 0) error0 = SIXRD_ERROR_UNKNOWN;
+
+ /* construct ipv4 header */
+ vlib_buffer_advance(p0, - (sizeof(ip4_header_t)));
+ ip4h0 = vlib_buffer_get_current(p0);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0;
+ ip4h0->ip_version_and_header_length = 0x45;
+ ip4h0->tos = 0;
+ ip4h0->length = clib_host_to_net_u16(len);
+ ip4h0->fragment_id = 0;
+ ip4h0->flags_and_fragment_offset = 0;
+ ip4h0->ttl = 0x40;
+ ip4h0->protocol = IP_PROTOCOL_IPV6;
+ ip4h0->src_address = d0->ip4_src;
+ ip4h0->dst_address.as_u32 = clib_host_to_net_u32(da40);
+ ip4h0->checksum = ip4_header_checksum(ip4h0);
+
+ next0 = error0 == SIXRD_ERROR_NONE ? IP6_SIXRD_NEXT_IP4_LOOKUP : IP6_SIXRD_NEXT_DROP;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->sixrd_domain_index = sixrd_domain_index0;
+ }
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) encap++;
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter(vm, ip6_sixrd_node.index, SIXRD_ERROR_ENCAPSULATED, encap);
+
+ return frame->n_vectors;
+}
+
+static char *sixrd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sixrd_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip6_sixrd_node,static) = {
+ .function = ip6_sixrd,
+ .name = "ip6-sixrd",
+ .vector_size = sizeof(u32),
+ .format_trace = format_sixrd_trace,
+ .n_errors = SIXRD_N_ERROR,
+ .error_strings = sixrd_error_strings,
+ .n_next_nodes = IP6_SIXRD_N_NEXT,
+ .next_nodes = {
+ [IP6_SIXRD_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_SIXRD_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/src/plugins/sixrd/sixrd.c b/src/plugins/sixrd/sixrd.c
new file mode 100644
index 00000000..98387525
--- /dev/null
+++ b/src/plugins/sixrd/sixrd.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sixrd.h"
+#include <vnet/plugin/plugin.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
+#include <vpp/app/version.h>
+
+/*
+ * This code supports the following sixrd modes:
+ *
+ * 32 EA bits (Complete IPv4 address is embedded):
+ * ea_bits_len = 32
+ * IPv4 suffix is embedded:
+ * ea_bits_len = < 32
+ * No embedded address bits (1:1 mode):
+ * ea_bits_len = 0
+ */
+
+int
+sixrd_create_domain (ip6_address_t *ip6_prefix,
+ u8 ip6_prefix_len,
+ ip4_address_t *ip4_prefix,
+ u8 ip4_prefix_len,
+ ip4_address_t *ip4_src,
+ u32 *sixrd_domain_index,
+ u16 mtu)
+{
+ dpo_id_t dpo_v6 = DPO_INVALID, dpo_v4 = DPO_INVALID;
+ sixrd_main_t *mm = &sixrd_main;
+ fib_node_index_t fei;
+ sixrd_domain_t *d;
+
+ /* Get domain index */
+ pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES);
+ memset(d, 0, sizeof (*d));
+ *sixrd_domain_index = d - mm->domains;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip4_src = *ip4_src;
+ d->mtu = mtu;
+
+ if (ip4_prefix_len < 32)
+ d->shift = 64 - ip6_prefix_len + (32 - ip4_prefix_len);
+
+ /* Create IPv6 route/adjacency */
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_prefix_len,
+ .fp_addr = {
+ .ip6 = d->ip6_prefix,
+ },
+ };
+ sixrd_dpo_create(DPO_PROTO_IP6,
+ *sixrd_domain_index,
+ &dpo_v6);
+ fib_table_entry_special_dpo_add(0, &pfx6,
+ FIB_SOURCE_SIXRD,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo_v6);
+ dpo_reset (&dpo_v6);
+
+ /*
+ * Multiple SIXRD domains may share same source IPv4 TEP
+ * In this case the route will exist and be SixRD sourced.
+ * Find the adj (if any) already contributed and modify it
+ */
+ fib_prefix_t pfx4 = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {
+ .ip4 = d->ip4_src,
+ },
+ };
+ fei = fib_table_lookup_exact_match(0, &pfx4);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SIXRD, &dpo))
+ {
+ /*
+ * modify the existing adj to indicate it's shared
+ * skip to route add.
+ * It is locked to pair with the unlock below.
+ */
+ const dpo_id_t *sd_dpo;
+ sixrd_dpo_t *sd;
+
+ ASSERT(DPO_LOAD_BALANCE == dpo.dpoi_type);
+
+ sd_dpo = load_balance_get_bucket(dpo.dpoi_index, 0);
+ sd = sixrd_dpo_get (sd_dpo->dpoi_index);
+
+ sd->sd_domain = ~0;
+ dpo_copy (&dpo_v4, sd_dpo);
+ dpo_reset (&dpo);
+
+ goto route_add;
+ }
+ }
+ /* first time addition of the route */
+ sixrd_dpo_create(DPO_PROTO_IP4,
+ *sixrd_domain_index,
+ &dpo_v4);
+
+route_add:
+ /*
+ * Create ip4 route. This is a reference counted add. If the prefix
+ * already exists and is SixRD sourced, it is now SixRD source n+1 times
+ * and will need to be removed n+1 times.
+ */
+ fib_table_entry_special_dpo_add(0, &pfx4,
+ FIB_SOURCE_SIXRD,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo_v4);
+ dpo_reset (&dpo_v4);
+
+ return 0;
+}
+
+/*
+ * sixrd_delete_domain
+ */
+int
+sixrd_delete_domain (u32 sixrd_domain_index)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_domain_t *d;
+
+ if (pool_is_free_index(mm->domains, sixrd_domain_index)) {
+ clib_warning("SIXRD domain delete: domain does not exist: %d",
+ sixrd_domain_index);
+ return -1;
+ }
+
+ d = pool_elt_at_index(mm->domains, sixrd_domain_index);
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {
+ .ip4 = d->ip4_src,
+ },
+ };
+ fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_SIXRD);
+
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_prefix_len,
+ .fp_addr = {
+ .ip6 = d->ip6_prefix,
+ },
+ };
+ fib_table_entry_special_remove(0, &pfx6, FIB_SOURCE_SIXRD);
+
+ pool_put(mm->domains, d);
+
+ return 0;
+}
+
+static clib_error_t *
+sixrd_add_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip4_address_t ip4_src;
+ u32 ip6_prefix_len=0, ip4_prefix_len=0, sixrd_domain_index;
+ u32 num_m_args = 0;
+ /* Optional arguments */
+ u32 mtu = 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip4-src %U", unformat_ip4_address, &ip4_src))
+ num_m_args++;
+ else if (unformat(line_input, "mtu %d", &mtu))
+ num_m_args++;
+ else {
+ error = clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args < 3) {
+ error = clib_error_return(0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len,
+ &ip4_src, &sixrd_domain_index, mtu);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+sixrd_del_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 num_m_args = 0;
+ u32 sixrd_domain_index;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (! unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "index %d", &sixrd_domain_index))
+ num_m_args++;
+ else {
+ error = clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args != 1) {
+ error = clib_error_return(0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ sixrd_delete_domain(sixrd_domain_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static u8 *
+format_sixrd_domain (u8 *s, va_list *args)
+{
+ sixrd_domain_t *d = va_arg(*args, sixrd_domain_t *);
+ sixrd_main_t *mm = &sixrd_main;
+
+ s = format(s,
+ "[%d] ip6-pfx %U/%d ip4-pfx %U/%d ip4-src %U mtu %d",
+ d - mm->domains,
+ format_ip6_address, &d->ip6_prefix, d->ip6_prefix_len,
+ format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len,
+ format_ip4_address, &d->ip4_src, d->mtu);
+
+ return s;
+}
+
+static clib_error_t *
+show_sixrd_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_domain_t *d;
+
+ if (pool_elts(mm->domains) == 0)
+ vlib_cli_output(vm, "No SIXRD domains are configured...");
+
+ pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_sixrd_domain, d);}));
+
+ return 0;
+
+}
+
+static clib_error_t *
+show_sixrd_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_domain_t *d;
+ int domains = 0, domaincount = 0;
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output (vm, "No SIXRD domains are configured...");
+
+ pool_foreach(d, mm->domains, ({
+ domains += sizeof(*d);
+ domaincount++;
+ }));
+
+ vlib_cli_output(vm, "SIXRD domains structure: %d\n", sizeof (sixrd_domain_t));
+ vlib_cli_output(vm, "SIXRD domains: %d (%d bytes)\n", domaincount, domains);
+
+ return 0;
+}
+
+/*
+ * packet trace format function
+ */
+u8 *
+format_sixrd_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ sixrd_trace_t *t = va_arg (*args, sixrd_trace_t *);
+ u32 sixrd_domain_index = t->sixrd_domain_index;
+
+ s = format(s, "SIXRD domain index: %d", sixrd_domain_index);
+
+ return s;
+}
+
+VLIB_CLI_COMMAND(sixrd_add_domain_command, static) = {
+ .path = "sixrd add domain",
+ .short_help =
+ "sixrd add domain ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> ip4-src <ip4-addr>",
+ .function = sixrd_add_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(sixrd_del_command, static) = {
+ .path = "sixrd del domain",
+ .short_help =
+ "sixrd del domain index <domain>",
+ .function = sixrd_del_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_sixrd_domain_command, static) = {
+ .path = "show sixrd domain",
+ .function = show_sixrd_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_sixrd_stats_command, static) = {
+ .path = "show sixrd stats",
+ .function = show_sixrd_stats_command_fn,
+};
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () ={
+ .version = VPP_BUILD_VER,
+ .description = "IPv6 Rapid Deployment on IPv4 Infrastructure (RFC5969)",
+};
+/* *INDENT-ON* */
+
+static clib_error_t * sixrd_init (vlib_main_t * vm)
+{
+ sixrd_main_t *mm = &sixrd_main;
+
+ mm->vnet_main = vnet_get_main();
+ mm->vlib_main = vm;
+
+ sixrd_dpo_module_init ();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (sixrd_init);
diff --git a/src/plugins/sixrd/sixrd.h b/src/plugins/sixrd/sixrd.h
new file mode 100644
index 00000000..56714c9e
--- /dev/null
+++ b/src/plugins/sixrd/sixrd.h
@@ -0,0 +1,141 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+#include <stdbool.h>
+#include <vppinfra/error.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/ip6_fib.h>
+
+#include "sixrd_dpo.h"
+
+int sixrd_create_domain(ip6_address_t *ip6_prefix, u8 ip6_prefix_len,
+ ip4_address_t *ip4_prefix, u8 ip4_prefix_len,
+ ip4_address_t *ip4_src, u32 *sixrd_domain_index, u16 mtu);
+int sixrd_delete_domain(u32 sixrd_domain_index);
+u8 *format_sixrd_trace(u8 *s, va_list *args);
+
+typedef struct {
+ ip6_address_t ip6_prefix;
+ ip4_address_t ip4_prefix;
+ ip4_address_t ip4_src;
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+
+ /* helpers */
+ u8 shift;
+
+ u16 mtu;
+} sixrd_domain_t;
+
+typedef struct {
+ /* pool of SIXRD domains */
+ sixrd_domain_t *domains;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} sixrd_main_t;
+
+#define foreach_sixrd_error \
+ /* Must be first. */ \
+ _(NONE, "valid SIXRD packets") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(WRONG_ICMP_TYPE, "wrong icmp type") \
+ _(SEC_CHECK, "security check failed") \
+ _(ICMP, "unable to translate ICMP") \
+ _(UNKNOWN, "unknown") \
+ _(NO_DOMAIN, "no domain") \
+ _(ENCAPSULATED, "encapsulated") \
+ _(DECAPSULATED, "decapsulated") \
+ _(TRANSLATED_4TO6, "translated 4 to 6") \
+ _(TRANSLATED_6TO4, "translated 6 to 4") \
+ _(FRAGMENT, "fragment handling error") \
+ _(FRAGMENT_QUEUED, "dropped, missing first fragment") \
+ _(FRAGMENTED, "packets requiring fragmentation") \
+ _(FRAGMENT_PARTS, "fragment parts") \
+ _(MALFORMED, "malformed packet")
+
+typedef enum {
+#define _(sym,str) SIXRD_ERROR_##sym,
+ foreach_sixrd_error
+#undef _
+ SIXRD_N_ERROR,
+ } sixrd_error_t;
+
+typedef struct {
+ u32 sixrd_domain_index;
+} sixrd_trace_t;
+
+sixrd_main_t sixrd_main;
+
+/*
+ * sixrd_get_addr
+ */
+static_always_inline u32
+sixrd_get_addr (sixrd_domain_t *d, u64 dal)
+{
+
+ /* 1:1 mode */
+ if (d->ip4_prefix_len == 32) return (d->ip4_prefix.as_u32);
+
+ /* Grab 32 - ip4_prefix_len bits out of IPv6 address from offset ip6_prefix_len */
+ return (d->ip4_prefix.as_u32 | (u32)(dal >> d->shift));
+}
+
+/*
+ * Get the SIXRD domain from an IPv6 lookup adjacency.
+ */
+static_always_inline sixrd_domain_t *
+ip6_sixrd_get_domain (u32 sdi, u32 *sixrd_domain_index)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_get(sdi);
+
+ ASSERT(sd);
+ *sixrd_domain_index = sd->sd_domain;
+ return pool_elt_at_index(mm->domains, *sixrd_domain_index);
+}
+
+/*
+ * Get the SIXRD domain from an IPv4 lookup adjacency.
+ * If the IPv4 address is not shared, no lookup is required.
+ * The IPv6 address is used otherwise.
+ */
+static_always_inline sixrd_domain_t *
+ip4_sixrd_get_domain (u32 sdi, ip6_address_t *addr,
+ u32 *sixrd_domain_index, u8 *error)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_get(sdi);
+ *sixrd_domain_index = sd->sd_domain;
+ if (*sixrd_domain_index != ~0)
+ return pool_elt_at_index(mm->domains, *sixrd_domain_index);
+
+ u32 lbi = ip6_fib_table_fwding_lookup(&ip6_main, 0, addr);
+ const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0);
+ if (PREDICT_TRUE(dpo->dpoi_type == sixrd_dpo_type))
+ {
+ sd = sixrd_dpo_get(dpo->dpoi_index);
+ *sixrd_domain_index = sd->sd_domain;
+ return pool_elt_at_index(mm->domains, *sixrd_domain_index);
+ }
+ *error = SIXRD_ERROR_NO_DOMAIN;
+ return NULL;
+}
diff --git a/src/plugins/sixrd/sixrd_dpo.c b/src/plugins/sixrd/sixrd_dpo.c
new file mode 100644
index 00000000..88a07935
--- /dev/null
+++ b/src/plugins/sixrd/sixrd_dpo.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sixrd_dpo.h"
+#include <vnet/ip/ip.h>
+
+/**
+ * pool of all MPLS Label DPOs
+ */
+sixrd_dpo_t *sixrd_dpo_pool;
+
+/**
+ * The register SIXRD DPO type
+ */
+dpo_type_t sixrd_dpo_type;
+
+static sixrd_dpo_t *
+sixrd_dpo_alloc (void)
+{
+ sixrd_dpo_t *sd;
+
+ pool_get_aligned(sixrd_dpo_pool, sd, CLIB_CACHE_LINE_BYTES);
+ memset(sd, 0, sizeof(*sd));
+
+ return (sd);
+}
+
+static index_t
+sixrd_dpo_get_index (sixrd_dpo_t *sd)
+{
+ return (sd - sixrd_dpo_pool);
+}
+
+void
+sixrd_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_alloc();
+ sd->sd_domain = domain_index;
+ sd->sd_proto = dproto;
+
+ dpo_set(dpo,
+ sixrd_dpo_type,
+ dproto,
+ sixrd_dpo_get_index(sd));
+}
+
+u8*
+format_sixrd_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_get(index);
+
+ return (format(s, "sixrd:[%d]:%U domain:%d",
+ index,
+ format_dpo_proto, sd->sd_proto,
+ sd->sd_domain));
+}
+
+
+static void
+sixrd_dpo_lock (dpo_id_t *dpo)
+{
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_get(dpo->dpoi_index);
+
+ sd->sd_locks++;
+}
+
+static void
+sixrd_dpo_unlock (dpo_id_t *dpo)
+{
+ sixrd_dpo_t *sd;
+
+ sd = sixrd_dpo_get(dpo->dpoi_index);
+
+ sd->sd_locks--;
+
+ if (0 == sd->sd_locks)
+ {
+ pool_put(sixrd_dpo_pool, sd);
+ }
+}
+
+const static dpo_vft_t sd_vft = {
+ .dv_lock = sixrd_dpo_lock,
+ .dv_unlock = sixrd_dpo_unlock,
+ .dv_format = format_sixrd_dpo,
+};
+
+const static char* const sixrd_ip4_nodes[] =
+{
+ "ip4-sixrd",
+ NULL,
+};
+const static char* const sixrd_ip6_nodes[] =
+{
+ "ip6-sixrd",
+ NULL,
+};
+
+const static char* const * const sixrd_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = sixrd_ip4_nodes,
+ [DPO_PROTO_IP6] = sixrd_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+sixrd_dpo_module_init (void)
+{
+ sixrd_dpo_type = dpo_register_new_type(&sd_vft, sixrd_nodes);
+}
diff --git a/src/plugins/sixrd/sixrd_dpo.h b/src/plugins/sixrd/sixrd_dpo.h
new file mode 100644
index 00000000..17142288
--- /dev/null
+++ b/src/plugins/sixrd/sixrd_dpo.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIXRD_DPO_H__
+#define __SIXRD_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of a 6RD DPO
+ */
+typedef struct sixrd_dpo_t
+{
+ /**
+ * The dat-plane protocol
+ */
+ dpo_proto_t sd_proto;
+
+ /**
+ * the SIXRD domain index
+ */
+ u32 sd_domain;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 sd_locks;
+} sixrd_dpo_t;
+
+extern void sixrd_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern sixrd_dpo_t *sixrd_dpo_pool;
+extern dpo_type_t sixrd_dpo_type;
+
+static inline sixrd_dpo_t *
+sixrd_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(sixrd_dpo_pool, index));
+}
+
+extern void sixrd_dpo_module_init(void);
+
+#endif
diff --git a/src/scripts/version b/src/scripts/version
new file mode 100755
index 00000000..d8728a9f
--- /dev/null
+++ b/src/scripts/version
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P )
+
+cd "$path"
+
+if [ -f .version ]; then
+ vstring=$(cat .version)
+else
+ vstring=$(git describe)
+ if [ $? != 0 ]; then
+ exit 1
+ fi
+fi
+
+TAG=$(echo ${vstring} | cut -d- -f1 | sed -e 's/^v//')
+ADD=$(echo ${vstring} | cut -s -d- -f2)
+
+git rev-parse 2> /dev/null
+if [ $? == 0 ]; then
+ CMT=$(git describe --dirty --match 'v*'| cut -s -d- -f3,4)
+else
+ CMT=$(echo ${vstring} | cut -s -d- -f3,4)
+fi
+CMTR=$(echo $CMT | sed 's/-/_/')
+
+if [ -n "${BUILD_NUMBER}" ]; then
+ BLD="~b${BUILD_NUMBER}"
+fi
+
+if [ "$1" = "rpm-version" ]; then
+ echo ${TAG}
+ exit
+fi
+
+if [ "$1" = "rpm-release" ]; then
+ [ -z "${ADD}" ] && echo release && exit
+ echo ${ADD}${CMTR:+~${CMTR}}${BLD}
+ exit
+fi
+
+ if [ -n "${ADD}" ]; then
+ if [ "$1" = "rpm-string" ]; then
+ echo ${TAG}-${ADD}${CMTR:+~${CMTR}}${BLD}
+ else
+ echo ${TAG}-${ADD}${CMT:+~${CMT}}${BLD}
+ fi
+ else
+ echo ${TAG}-release
+fi
diff --git a/src/scripts/vnet/arp4 b/src/scripts/vnet/arp4
new file mode 100644
index 00000000..acb20da3
--- /dev/null
+++ b/src/scripts/vnet/arp4
@@ -0,0 +1,21 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.0.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+trace add pg-input 100
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
diff --git a/src/scripts/vnet/arp4-mpls b/src/scripts/vnet/arp4-mpls
new file mode 100644
index 00000000..d3d39f3b
--- /dev/null
+++ b/src/scripts/vnet/arp4-mpls
@@ -0,0 +1,24 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.2.2.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/arp6 b/src/scripts/vnet/arp6
new file mode 100644
index 00000000..e6a98935
--- /dev/null
+++ b/src/scripts/vnet/arp6
@@ -0,0 +1,21 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip6-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP6: 2000::2 -> 2001::2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 2000::1/64
+set int ip address loop1 2001::1/64
diff --git a/src/scripts/vnet/bvi b/src/scripts/vnet/bvi
new file mode 100644
index 00000000..2174da0d
--- /dev/null
+++ b/src/scripts/vnet/bvi
@@ -0,0 +1,76 @@
+
+
+set int state tuntap-0 down
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+
+set int state GigabitEthernet2/1/0 up
+cre sub GigabitEthernet2/1/0 1 dot1q 7
+set int state GigabitEthernet2/1/0.1 up
+
+set int state GigabitEthernet2/2/0 up
+cre sub GigabitEthernet2/2/0 1 dot1q 9
+set int state GigabitEthernet2/2/0.1 up
+
+
+loop create
+set int l2 bridge loop0 0 bvi
+set int ip table loop0 0
+set int state loop0 up
+
+
+set int l2 bridge GigabitEthernet2/1/0.1 0
+set int l2 bridge GigabitEthernet2/2/0.1 0
+
+set int l2 tag-rewrite GigabitEthernet2/1/0.1 pop 1
+set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1
+
+l2fib add 00:22:44:06:08:0a 0 GigabitEthernet2/1/0.1 static
+l2fib add 00:02:04:06:08:0a 0 GigabitEthernet2/2/0.1 static
+
+
+ip route table 0 8.0.0.1/32 via loop0
+set ip arp loop0 8.0.0.1 00:02:04:06:08:0a
+
+
+ip route add 1.2.3.3/32 via GigabitEthernet2/1/0 IP4: 00:15:17:61:73:47 -> 00:15:17:61:73:46
+
+cle er
+cle int
+cle run
+
+packet-generator new {
+ name bvi_to_l2
+ limit 100
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d
+ GRE: 1.2.3.3 -> 8.0.0.1 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name l2_to_bvi
+ limit 50
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP4: 0050.56b7.7c83 -> dead.0000.0000 vlan 9
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name l2_to_bvi_via_flood
+ limit 25
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 9
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
+
diff --git a/src/scripts/vnet/dhcp/dhcpd.conf b/src/scripts/vnet/dhcp/dhcpd.conf
new file mode 100644
index 00000000..d4cb3ed7
--- /dev/null
+++ b/src/scripts/vnet/dhcp/dhcpd.conf
@@ -0,0 +1,8 @@
+# add at the bottom
+
+subnet 192.168.0.0 netmask 255.255.0.0 {
+ range 192.168.1.10 192.168.1.254;
+ option routers 192.168.1.1;
+ default-lease-time 15;
+ max-lease-time 15;
+}
diff --git a/src/scripts/vnet/dhcp/left-ping-target.sh b/src/scripts/vnet/dhcp/left-ping-target.sh
new file mode 100644
index 00000000..2edc2a50
--- /dev/null
+++ b/src/scripts/vnet/dhcp/left-ping-target.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# to obtain dhcp address from leftpeer
+dhclient -d -v eth1
diff --git a/src/scripts/vnet/dhcp/leftpeer.conf b/src/scripts/vnet/dhcp/leftpeer.conf
new file mode 100644
index 00000000..458eecf0
--- /dev/null
+++ b/src/scripts/vnet/dhcp/leftpeer.conf
@@ -0,0 +1,17 @@
+set int ip table GigabitEthernet2/2/0 12
+set int ip address GigabitEthernet2/2/0 192.168.1.1/24
+set int state GigabitEthernet2/2/0 up
+
+set int ip table GigabitEthernet2/7/0 11
+set int ip address GigabitEthernet2/7/0 192.168.2.1/24
+set int state GigabitEthernet2/7/0 up
+
+comment { set dhcp proxy server 1.2.3.4 src-address 1.2.3.5 add-option-82 rx-fib-id 0 server-fib-id 0 }
+
+comment { set dhcp proxy server 192.168.2.2 src-address 192.168.2.1 add-option-82 rx-fib-id 12 server-fib-id 11 }
+
+ip route add 0.0.0.0/24 table 11 via local
+ip route add 255.255.255.255/24 table 11 via local
+
+ip route add 0.0.0.0/24 table 12 via local
+ip route add 255.255.255.255/24 table 12 via local
diff --git a/src/scripts/vnet/dhcp/proxy b/src/scripts/vnet/dhcp/proxy
new file mode 100644
index 00000000..42dff2a0
--- /dev/null
+++ b/src/scripts/vnet/dhcp/proxy
@@ -0,0 +1,22 @@
+loop create
+loop create
+
+set int state loop0 up
+set int state loop1 up
+
+set int ip table loop1 1
+set int ip6 table loop1 1
+
+set int ip addr loop0 10.0.0.1/24
+set int ip addr loop0 10.0.1.1/24
+
+set int ip addr loop0 2001::1/64
+set int ip addr loop0 2001:1::1/64
+
+set dhcp proxy server 10.255.0.1 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0
+set dhcp proxy server 10.255.0.2 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0
+set dhcp proxy server 10.255.1.2 src-address 10.0.1.1 server-fib-id 1 rx-fib-id 1
+
+set dhcpv6 proxy server 3001::1 src-address 2001::1 server-fib-id 0 rx-fib-id 0
+set dhcpv6 proxy server 3002::1 src-address 2001:1::1 server-fib-id 1 rx-fib-id 1
+
diff --git a/src/scripts/vnet/icmp b/src/scripts/vnet/icmp
new file mode 100644
index 00000000..1e054e2d
--- /dev/null
+++ b/src/scripts/vnet/icmp
@@ -0,0 +1,16 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.2.3.4 -> 5.6.7.8
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+ip route 5.6.7.8/32 via local
+ip route 1.2.3.4/32 via local
diff --git a/src/scripts/vnet/icmp6 b/src/scripts/vnet/icmp6
new file mode 100644
index 00000000..2a65acba
--- /dev/null
+++ b/src/scripts/vnet/icmp6
@@ -0,0 +1,16 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip6-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP6: ::1 -> ::2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+ip route ::1/128 via local
+ip route ::2/128 via local
diff --git a/src/scripts/vnet/ige b/src/scripts/vnet/ige
new file mode 100644
index 00000000..80d045af
--- /dev/null
+++ b/src/scripts/vnet/ige
@@ -0,0 +1,19 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-lookup
+ size 50-50
+ data {
+ ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1
+ incrementing 30
+ }
+}
+
+comment { tr add pg-input 100 }
+set int ip address GigabitEthernet4/0/0 1.0.0.1/24
+set int ip address GigabitEthernet4/0/1 1.0.0.2/24
+set int state GigabitEthernet4/0/0 up
+set int state GigabitEthernet4/0/1 up
+
+ip route add 1.0.0.3/32 via GigabitEthernet4/0/1 IP4: 00:15:17:61:73:47 -> 00:15:17:61:73:46
+tr add ige-input 10
diff --git a/src/scripts/vnet/ip6 b/src/scripts/vnet/ip6
new file mode 100644
index 00000000..adb27225
--- /dev/null
+++ b/src/scripts/vnet/ip6
@@ -0,0 +1,29 @@
+packet-generator new {
+ name x
+ limit 1
+ node ethernet-input
+ size 64-64
+ no-recycle
+ data {
+ IP6: 1.2.3 -> 4.5.6
+ ICMP: 3002::2 -> 3001::2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 2001:1::1/64
+set int ip address loop1 2001:2::1/64
+
+set ip6 neighbor loop0 2001:1::2 00:00:DD:EE:AA:DD
+set ip6 neighbor loop1 2001:2::2 00:00:DD:EE:AA:EE
+
+ip route add 3001::/64 via 2001:2::2 loop1
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/ip6-hbh b/src/scripts/vnet/ip6-hbh
new file mode 100644
index 00000000..0c6de47f
--- /dev/null
+++ b/src/scripts/vnet/ip6-hbh
@@ -0,0 +1,84 @@
+tap connect tap0
+set int state tap-0 up
+set int ip address tap-0 1::1/64
+packet-generator new {
+ name hbh1
+ limit 1
+ node ip6-input
+ size 48-48
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ hex 0x3B00010403040506
+ incrementing 100
+ }
+}
+packet-generator new {
+ name hbh2
+ limit 1
+ node ip6-input
+ size 48-48
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ hex 0x3B00C10403040506
+ incrementing 100
+ }
+}
+
+packet-generator new {
+ name hbh3
+ limit 1
+ node ip6-input
+ size 48-48
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ hex 0x3BffC10403040506
+ incrementing 100
+ }
+}
+
+packet-generator new {
+ name hbh4
+ limit 1
+ node ip6-input
+ size 64-64
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ hex 0x3BffC10403040506
+ incrementing 100
+ }
+}
+
+packet-generator new {
+ name hbh5
+ limit 1
+ node ip6-input
+ size 56-56
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ length 16
+ hex 0x3B010104030405060106030405060708
+ incrementing 100
+ }
+}
+
+packet-generator new {
+ name hbh6
+ limit 1
+ node ip6-input
+ size 56-56
+ no-recycle
+ data {
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
+ length 16
+ hex 0x3a00050200000100
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
diff --git a/src/scripts/vnet/ixge b/src/scripts/vnet/ixge
new file mode 100644
index 00000000..6722b536
--- /dev/null
+++ b/src/scripts/vnet/ixge
@@ -0,0 +1,15 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-lookup
+ size 50-50
+ data {
+ ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1
+ incrementing 30
+ }
+}
+
+comment { tr add pg-input 100 }
+set int ip address TenGigabitEthernet5/0/0 33.0.1.1/8
+set int state TenGigabitEthernet5/0/0 up
+
diff --git a/src/scripts/vnet/l2efpfilter b/src/scripts/vnet/l2efpfilter
new file mode 100644
index 00000000..307b4436
--- /dev/null
+++ b/src/scripts/vnet/l2efpfilter
@@ -0,0 +1,83 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+set int state tuntap-0 down
+
+
+cre sub GigabitEthernet2/1/0 1 dot1q 1
+cre sub GigabitEthernet2/1/0 2 dot1q 2
+cre sub GigabitEthernet2/1/0 3 dot1q 3
+cre sub GigabitEthernet2/2/0 1 dot1q 1
+cre sub GigabitEthernet2/2/0 100 dot1q 100
+
+set int l2 bridge GigabitEthernet2/1/0 0
+set int l2 bridge GigabitEthernet2/1/0.1 0
+set int l2 bridge GigabitEthernet2/1/0.2 0
+set int l2 bridge GigabitEthernet2/1/0.3 0
+set int l2 bridge GigabitEthernet2/2/0 0
+set int l2 bridge GigabitEthernet2/2/0.1 0
+set int l2 bridge GigabitEthernet2/2/0.100 0
+
+set int l2 tag-rewrite GigabitEthernet2/2/0.1 push dot1q 50
+set int l2 tag-rewrite GigabitEthernet2/1/0.2 translate 1-1 dot1q 100
+set int l2 tag-rewrite GigabitEthernet2/1/0.3 translate 1-1 dot1q 99
+
+set int l2 efp-filter GigabitEthernet2/2/0
+set int l2 efp-filter GigabitEthernet2/2/0.1
+set int l2 efp-filter GigabitEthernet2/2/0.100
+
+
+l2fib add 00:00:00:00:00:11 0 GigabitEthernet2/2/0.1 static
+l2fib add 00:00:00:00:00:22 0 GigabitEthernet2/2/0.100 static
+
+set int state GigabitEthernet2/1/0 up
+set int state GigabitEthernet2/1/0.1 up
+set int state GigabitEthernet2/1/0.2 up
+set int state GigabitEthernet2/1/0.3 up
+set int state GigabitEthernet2/2/0 up
+set int state GigabitEthernet2/2/0.1 up
+set int state GigabitEthernet2/2/0.100 up
+
+
+trace add pg-input 6
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name pre_vtr_fail
+ limit 10
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:11 vlan 1
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name post_vtr_pass
+ limit 20
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name post_vtr_fail
+ limit 50
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 3
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
+
diff --git a/src/scripts/vnet/l2efpfilter_perf b/src/scripts/vnet/l2efpfilter_perf
new file mode 100644
index 00000000..b2f4b490
--- /dev/null
+++ b/src/scripts/vnet/l2efpfilter_perf
@@ -0,0 +1,58 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+set int state tuntap-0 down
+
+
+cre sub GigabitEthernet2/1/0 1 dot1q 1
+cre sub GigabitEthernet2/1/0 2 dot1q 2
+cre sub GigabitEthernet2/1/0 3 dot1q 3
+cre sub GigabitEthernet2/2/0 1 dot1q 1
+cre sub GigabitEthernet2/2/0 100 dot1q 100
+
+set int l2 bridge GigabitEthernet2/1/0 0
+set int l2 bridge GigabitEthernet2/1/0.1 0
+set int l2 bridge GigabitEthernet2/1/0.2 0
+set int l2 bridge GigabitEthernet2/1/0.3 0
+set int l2 bridge GigabitEthernet2/2/0 0
+set int l2 bridge GigabitEthernet2/2/0.1 0
+set int l2 bridge GigabitEthernet2/2/0.100 0
+
+set int l2 tag-rewrite GigabitEthernet2/2/0.1 push dot1q 50
+set int l2 tag-rewrite GigabitEthernet2/1/0.2 translate 1-1 dot1q 100
+set int l2 tag-rewrite GigabitEthernet2/1/0.3 translate 1-1 dot1q 99
+
+set int l2 efp-filter GigabitEthernet2/2/0
+set int l2 efp-filter GigabitEthernet2/2/0.1
+set int l2 efp-filter GigabitEthernet2/2/0.100
+
+
+l2fib add 00:00:00:00:00:11 0 GigabitEthernet2/2/0.1 static
+l2fib add 00:00:00:00:00:22 0 GigabitEthernet2/2/0.100 static
+
+set int state GigabitEthernet2/1/0 up
+set int state GigabitEthernet2/1/0.1 up
+set int state GigabitEthernet2/1/0.2 up
+set int state GigabitEthernet2/1/0.3 up
+set int state GigabitEthernet2/2/0 up
+set int state GigabitEthernet2/2/0.1 up
+set int state GigabitEthernet2/2/0.100 up
+
+
+trace add pg-input 6
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name post_vtr_pass
+ limit 9111003
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+}
diff --git a/src/scripts/vnet/l2fib b/src/scripts/vnet/l2fib
new file mode 100644
index 00000000..81ede171
--- /dev/null
+++ b/src/scripts/vnet/l2fib
@@ -0,0 +1,46 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+
+set int state GigabitEthernet2/2/0 up
+cre sub GigabitEthernet2/2/0 1 dot1q 9
+set int state GigabitEthernet2/2/0.1 up
+set int state tuntap-0 down
+
+set int acl input GigabitEthernet2/1/0
+set int acl output GigabitEthernet2/1/0
+set int acl input GigabitEthernet2/2/0.1
+set int acl output GigabitEthernet2/2/0.1
+
+set int l2 bridge GigabitEthernet2/1/0 0
+set int l2 bridge GigabitEthernet2/2/0.1 0
+
+set int l2 tag-rewrite GigabitEthernet2/1/0 push dot1q 50
+set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1
+
+
+trace add pg-input 6
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name new_input_if_index_mac_move
+ limit 4
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ size 98-98
+ data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+}
+
+packet-generator new {
+ name dmac_hit
+ limit 7
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ size 98-98
+ data { hex 0x00020406080a00224406080a8100000981000011080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+}
diff --git a/src/scripts/vnet/l2fib_perf b/src/scripts/vnet/l2fib_perf
new file mode 100644
index 00000000..638317ff
--- /dev/null
+++ b/src/scripts/vnet/l2fib_perf
@@ -0,0 +1,29 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+
+set int state GigabitEthernet2/2/0 up
+cre sub GigabitEthernet2/2/0 1 dot1q 9
+set int state GigabitEthernet2/2/0.1 up
+set int state tuntap-0 down
+
+set int l2 bridge GigabitEthernet2/1/0 0
+set int l2 bridge GigabitEthernet2/2/0.1 0
+
+l2fib add 00:22:44:06:08:0a 0 GigabitEthernet2/1/0 static
+l2fib add 00:02:04:06:08:0a 0 GigabitEthernet2/2/0.1 static
+
+cle er
+cle int
+cle run
+
+packet-generator new {
+ name perf
+ limit 9111003
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0.1
+ size 98-98
+ data { hex 0x00224406080a00020406080a81000009080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+}
+
diff --git a/src/scripts/vnet/l2fib_xc b/src/scripts/vnet/l2fib_xc
new file mode 100644
index 00000000..35d7342b
--- /dev/null
+++ b/src/scripts/vnet/l2fib_xc
@@ -0,0 +1,31 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+
+set int state GigabitEthernet2/2/0 up
+cre sub GigabitEthernet2/2/0 1 dot1q 9
+set int state GigabitEthernet2/2/0.1 up
+set int state tuntap-0 down
+
+set int acl input GigabitEthernet2/1/0
+set int acl output GigabitEthernet2/1/0
+set int acl input GigabitEthernet2/2/0.1
+set int acl output GigabitEthernet2/2/0.1
+
+set int l2 xc GigabitEthernet2/1/0 GigabitEthernet2/2/0.1
+set int l2 xc GigabitEthernet2/2/0.1 GigabitEthernet2/1/0
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name xc
+ limit 11
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ size 98-98
+ data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+}
+
diff --git a/src/scripts/vnet/l2flood b/src/scripts/vnet/l2flood
new file mode 100644
index 00000000..013462ce
--- /dev/null
+++ b/src/scripts/vnet/l2flood
@@ -0,0 +1,42 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+set int state tuntap-0 down
+
+
+loop create
+cre sub loop0 1 dot1q 1
+cre sub loop0 2 dot1q 2
+cre sub loop0 3 dot1q 3
+cre sub GigabitEthernet2/1/0 1 dot1q 1
+
+
+set int l2 bridge loop0.1 7
+set int l2 bridge loop0.2 7
+set int l2 bridge loop0.3 7
+set int l2 bridge GigabitEthernet2/1/0.1 7
+
+loop cre
+set int l2 bridge loop1 7 bvi
+
+set int state GigabitEthernet2/1/0.1 up
+
+trace add pg-input 6
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name flood
+ limit 1
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 1
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
+ }
+
+}
+
diff --git a/src/scripts/vnet/l2tp b/src/scripts/vnet/l2tp
new file mode 100644
index 00000000..337805aa
--- /dev/null
+++ b/src/scripts/vnet/l2tp
@@ -0,0 +1,134 @@
+
+set int ip address GigabitEthernet2/1/0 1.2.3.4/24
+set int state GigabitEthernet2/1/0 up
+set int state tuntap-0 down
+
+
+cre sub GigabitEthernet2/1/0 1 dot1q 1
+cre sub GigabitEthernet2/1/0 2 dot1q 2
+cre sub GigabitEthernet2/1/0 3 dot1q 3
+cre sub GigabitEthernet2/2/0 1 dot1q 1
+cre sub GigabitEthernet2/2/0 100 dot1q 100
+
+
+set int l2 tag-rewrite GigabitEthernet2/1/0.1 pop 1
+set int l2 tag-rewrite GigabitEthernet2/1/0.2 pop 1
+
+
+l2tp session add client 11::1 our 22::2 l2-interface GigabitEthernet2/1/0.1
+l2tp session add client 11::1 our 22::3 l2-interface GigabitEthernet2/1/0.2 local-session-id 2 l2-sublayer-present
+
+ip route 11::1/128 via GigabitEthernet2/2/0
+set ip6 neighbor GigabitEthernet2/2/0 11::1 00:02:04:06:08:0a
+
+enable ip6 interface GigabitEthernet2/2/0
+
+set int ip6 l2tpv3 GigabitEthernet2/2/0
+
+set int state GigabitEthernet2/1/0 up
+set int state GigabitEthernet2/1/0.1 up
+set int state GigabitEthernet2/1/0.2 up
+set int state GigabitEthernet2/1/0.3 up
+set int state GigabitEthernet2/2/0 up
+
+
+trace add pg-input 2
+
+clear error
+clear run
+clear int
+
+packet-generator new {
+ name decap
+ limit 10
+ size 200-200
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
+ L2TP: 11::1 -> 22::2
+ L2TP: session_id 1 cookie 0xffffffffffffffff
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
+packet-generator new {
+ name decap_bad_sid
+ limit 30
+ size 200-200
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
+ L2TP: 11::1 -> 22::2
+ L2TP: session_id 0x999 cookie 0xffffffffffffffff
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
+packet-generator new {
+ name decap_bad_cookie
+ limit 50
+ size 200-200
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
+ L2TP: 11::1 -> 22::2
+ L2TP: session_id 1 cookie 0x3333ffffffffffff
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
+packet-generator new {
+ name encap
+ limit 100
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 1
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
+packet-generator new {
+ name decap_sublayer
+ limit 300
+ size 200-200
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/2/0
+ data {
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
+ L2TP: 11::1 -> 22::3
+ L2TP: session_id 2 cookie 0xffffffffffffffff l2_sublayer 0
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
+packet-generator new {
+ name encap_sublayer
+ limit 700
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 2
+ UDP: 1.2.3.4 -> 5.6.7.8
+ incrementing 8
+ }
+}
+
diff --git a/src/scripts/vnet/leftpeer/leftpeer-classify b/src/scripts/vnet/leftpeer/leftpeer-classify
new file mode 100755
index 00000000..74285912
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-classify
@@ -0,0 +1,8 @@
+classify table mask l2 src l3 ip4 src buckets 2 miss-next local
+
+comment { classify table mask l3 ip4 src buckets 2 miss-next local }
+set ip classify intfc GigabitEthernet2/2/0 table-index 0
+set int ip address GigabitEthernet2/2/0 192.168.1.1/24
+set int state GigabitEthernet2/2/0 up
+
+classify session hit-next local table-index 0 match l2 src 00:50:56:b7:05:bb l3 ip4 src 192.168.1.2
diff --git a/src/scripts/vnet/leftpeer/leftpeer-classify6 b/src/scripts/vnet/leftpeer/leftpeer-classify6
new file mode 100644
index 00000000..6579d50d
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-classify6
@@ -0,0 +1,5 @@
+classify table mask l3 ip6 src buckets 2 miss-next local
+set ip6 classify intfc GigabitEthernet2/2/0 table-index 0
+set int ip address GigabitEthernet2/2/0 db01::1/64
+set int state GigabitEthernet2/2/0 up
+classify session hit-next local table-index 0 match l3 ip6 src db01::2
diff --git a/src/scripts/vnet/leftpeer/leftpeer-classifyl2 b/src/scripts/vnet/leftpeer/leftpeer-classifyl2
new file mode 100644
index 00000000..6be4b1e5
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-classifyl2
@@ -0,0 +1,8 @@
+set int ip address GigabitEthernet2/2/0 192.168.1.1/24
+set int state GigabitEthernet2/2/0 up
+
+classify table mask l3 ip4 src buckets 2 l2-miss-next ethernet
+
+classify session advance 14 l2-hit-next ip4 table-index 0 match l3 ip4 src 192.168.1.2
+
+set int l2 class intfc GigabitEthernet2/2/0 ip4-table 0
diff --git a/src/scripts/vnet/leftpeer/leftpeer-dhcp b/src/scripts/vnet/leftpeer/leftpeer-dhcp
new file mode 100644
index 00000000..c13a8f3a
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-dhcp
@@ -0,0 +1,23 @@
+loop cre
+set int ip table loop0 12
+set int ip address loop0 192.168.1.1/24
+set int state loop0 up
+
+set int ip table GigabitEthernet2/2/0 12
+comment { set int ip address GigabitEthernet2/2/0 192.168.1.1/24 }
+set int unnumbered GigabitEthernet2/2/0 use loop0
+set int state GigabitEthernet2/2/0 up
+
+set int ip table GigabitEthernet2/7/0 11
+set int ip address GigabitEthernet2/7/0 192.168.2.1/24
+set int state GigabitEthernet2/7/0 up
+
+uncomment { set dhcp proxy server 1.2.3.4 src-address 1.2.3.5 add-option-82 rx-fib-id 0 server-fib-id 0 }
+
+uncomment { set dhcp proxy server 192.168.2.2 src-address 192.168.2.1 add-option-82 rx-fib-id 12 server-fib-id 11 }
+
+ip route add 0.0.0.0/24 table 11 via local
+ip route add 255.255.255.255/24 table 11 via local
+
+ip route add 0.0.0.0/24 table 12 via local
+ip route add 255.255.255.255/24 table 12 via local
diff --git a/src/scripts/vnet/leftpeer/leftpeer-ioam.conf b/src/scripts/vnet/leftpeer/leftpeer-ioam.conf
new file mode 100644
index 00000000..6c1b502c
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-ioam.conf
@@ -0,0 +1,15 @@
+comment { connects to linux ping source eth1 addr db02::2/64 }
+set int ip address GigabitEthernet2/2/0 db02::1/64
+set int state GigabitEthernet2/2/0 up
+
+comment { connects to ioam domain rightpeer eth2 addr db03::3/64 }
+set int ip address GigabitEthernet2/3/0 db03::1/64
+set int state GigabitEthernet2/3/0 up
+
+ioam set rewrite trace-elts 2 pow
+
+set ip6 neighbor GigabitEthernet2/3/0 db03::3 00:50:56:b7:05:cb
+
+ip route add db04::0/64 via db03::3
+
+ioam set destination db04::0/64 add
diff --git a/src/scripts/vnet/leftpeer/leftpeer-l3vxlan.conf b/src/scripts/vnet/leftpeer/leftpeer-l3vxlan.conf
new file mode 100644
index 00000000..a75f9b4c
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-l3vxlan.conf
@@ -0,0 +1,12 @@
+comment { tunnel to rightpeer 6.0.3.3 on vlan 101 }
+
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+comment { configure lc2 eth1 at e.g. 6.0.2.2/24 }
+
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+vxlan tunnel src 6.0.3.1 peer 6.0.3.3 vni 123 adj 6.0.4.4/24
+
diff --git a/src/scripts/vnet/leftpeer/leftpeer-lisp.conf b/src/scripts/vnet/leftpeer/leftpeer-lisp.conf
new file mode 100644
index 00000000..cb3180b7
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-lisp.conf
@@ -0,0 +1,18 @@
+comment { vpe_phase2 configuration }
+
+comment { local client facing interface on vlan 100 }
+comment { configure lc2 eth1 at e.g. 6.0.2.2/24 }
+
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+comment { tunnel to rightpeer on vlan 101 }
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+lisp gpe tunnel src 6.0.3.1 dst 6.0.3.3 next-ip4 decap-next ip4 iid 1133
+set int ip address lisp_gpe_tunnel0 6.0.4.1/24
+set int state lisp_gpe_tunnel0 up
+
+lisp gpe tunnel src 6.0.3.3 dst 6.0.3.1 next-ip4 decap-next ip4 iid 3311
+set int stat lisp_gpe_tunnel1 up
diff --git a/src/scripts/vnet/leftpeer/leftpeer-mpls.conf b/src/scripts/vnet/leftpeer/leftpeer-mpls.conf
new file mode 100644
index 00000000..74bce81b
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-mpls.conf
@@ -0,0 +1,17 @@
+comment { vpe_phase2 configuration }
+
+comment { local client facing interface on vlan 100 }
+comment { configure lc2 eth1 at e.g. 6.0.2.2/24 }
+
+set int ip table GigabitEthernet2/2/0 1
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+comment { tunnel to rightpeer on vlan 101 }
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.3.3
+mpls decap add label 30 fib 1
+
+create mpls gre tunnel src 6.0.3.1 dst 6.0.3.3 intfc 6.0.4.1/24 inner-fib-id 1 outer-fib-id 0
diff --git a/src/scripts/vnet/leftpeer/leftpeer-sr.conf b/src/scripts/vnet/leftpeer/leftpeer-sr.conf
new file mode 100644
index 00000000..a7b962d3
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-sr.conf
@@ -0,0 +1,24 @@
+comment { test sr segment chunk-offset on }
+test sr hmac validate on
+
+comment { trunk to rightpeer }
+set int ip address GigabitEthernet2/3/0 db03::2/64
+enable ip6 interface GigabitEthernet2/3/0
+set int state GigabitEthernet2/3/0 up
+
+comment { subscriber linux-ping-source }
+set int ip address GigabitEthernet2/2/0 db02::2/64
+enable ip6 interface GigabitEthernet2/2/0
+set int state GigabitEthernet2/2/0 up
+
+sr hmac id 2 key Gozzer
+sr hmac id 3 key Hoser
+
+sr tunnel src db01::1 dst db04::1/128 next db03::1 next db04::1 tag db02::2 clean key Gozzer InPE 1
+
+tap connect srlocal hwaddr random
+set int ip6 table tap-0 1
+set int ip address tap-0 db04::99/64
+enable ip6 interface tap-0
+set int state tap-0 up
+ip route add table 1 db02::0/64 lookup in table 0
diff --git a/src/scripts/vnet/leftpeer/leftpeer-vxlan.conf b/src/scripts/vnet/leftpeer/leftpeer-vxlan.conf
new file mode 100644
index 00000000..d50e8bf4
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer-vxlan.conf
@@ -0,0 +1,17 @@
+comment { tunnel to rightpeer 6.0.3.3 on vlan 101 }
+
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+comment { tunnel to thirdpeer 6.0.5.5 on vlan 105 }
+set int ip address GigabitEthernet2/7/0 6.0.5.1/24
+set int state GigabitEthernet2/7/0 up
+
+comment { configure lc2 eth1 at e.g. 6.0.2.2/24 }
+
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+vxlan tunnel src 6.0.3.1 peer 6.0.3.3 peer 6.0.3.5 vni 123
+vxlan l2 GigabitEthernet2/2/0 vni 123
+
diff --git a/src/scripts/vnet/leftpeer/leftpeer.script b/src/scripts/vnet/leftpeer/leftpeer.script
new file mode 100644
index 00000000..f08c8090
--- /dev/null
+++ b/src/scripts/vnet/leftpeer/leftpeer.script
@@ -0,0 +1,9 @@
+l2tp_set_lookup_key lookup_v6_src
+
+sw_interface_add_del_address GigabitEthernet2/3/0 db03::2/64
+sw_interface_set_flags GigabitEthernet2/3/0 admin-up
+
+comment sw_interface_add_del_address GigabitEthernet2/2/0 db02::2/64
+sw_interface_set_flags GigabitEthernet2/2/0 admin-up
+
+l2tp_session_add_del client_address db03::1 our_address db03::2 GigabitEthernet2/2/0 local_session_id 1 remote_session_id 3 local_cookie 11 remote_cookie 33
diff --git a/src/scripts/vnet/lfib/ip4-to-mpls b/src/scripts/vnet/lfib/ip4-to-mpls
new file mode 100644
index 00000000..85753797
--- /dev/null
+++ b/src/scripts/vnet/lfib/ip4-to-mpls
@@ -0,0 +1,26 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.2.2.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/lfib/mpls-pop-to-mpls b/src/scripts/vnet/lfib/mpls-pop-to-mpls
new file mode 100644
index 00000000..2818ac13
--- /dev/null
+++ b/src/scripts/vnet/lfib/mpls-pop-to-mpls
@@ -0,0 +1,28 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 72-72
+ no-recycle
+ data {
+ hex 0x0001e0ff0001f1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+mpls local-label add 30 non-eos mpls-lookup-in-table 0
+mpls local-label add 31 2.2.2.2/32
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/lfib/mpls-to-ip4 b/src/scripts/vnet/lfib/mpls-to-ip4
new file mode 100644
index 00000000..24e235e0
--- /dev/null
+++ b/src/scripts/vnet/lfib/mpls-to-ip4
@@ -0,0 +1,27 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+mpls local-label add 30 eos ip4-lookup-in-table 0
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/lfib/mpls-to-mpls b/src/scripts/vnet/lfib/mpls-to-mpls
new file mode 100644
index 00000000..497dbab3
--- /dev/null
+++ b/src/scripts/vnet/lfib/mpls-to-mpls
@@ -0,0 +1,26 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+mpls local-label add 30 2.2.2.2/32
+
+trace add pg-input 100
diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4
new file mode 100644
index 00000000..eb6bab27
--- /dev/null
+++ b/src/scripts/vnet/mcast/ip4
@@ -0,0 +1,25 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 512-512
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 232.1.1.1
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+create packet-generator interface pg1
+create packet-generator interface pg2
+create packet-generator interface pg3
+
+set int state pg1 up
+set int state pg2 up
+set int state pg3 up
+
+ip mroute add 232.1.1.1 via pg0 Accept
+ip mroute add 232.1.1.1 via pg1 Forward
+ip mroute add 232.1.1.1 via pg2 Forward
+ip mroute add 232.1.1.1 via pg3 Forward
diff --git a/src/scripts/vnet/mpls-o-ethernet/leftpeer.conf b/src/scripts/vnet/mpls-o-ethernet/leftpeer.conf
new file mode 100644
index 00000000..dd37b942
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-ethernet/leftpeer.conf
@@ -0,0 +1,17 @@
+comment { vpe_phase2 configuration }
+
+comment { local client facing interface }
+comment { configure lc2 eth1 at e.g. 6.0.2.2/24 }
+
+set int ip table GigabitEthernet2/2/0 1
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+comment { tunnel to rightpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.4.1
+mpls decap add label 30 fib 1
+
+create mpls ethernet tunnel dst 00:50:56:b7:05:cb adj 6.0.4.1/24 tx-intfc GigabitEthernet2/3/0 fib-id 1
diff --git a/src/scripts/vnet/mpls-o-ethernet/pg b/src/scripts/vnet/mpls-o-ethernet/pg
new file mode 100644
index 00000000..ba5397f7
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-ethernet/pg
@@ -0,0 +1,10 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-ethernet-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+} \ No newline at end of file
diff --git a/src/scripts/vnet/mpls-o-ethernet/rightpeer.conf b/src/scripts/vnet/mpls-o-ethernet/rightpeer.conf
new file mode 100644
index 00000000..7709ce4d
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-ethernet/rightpeer.conf
@@ -0,0 +1,15 @@
+comment { local client facing interface }
+comment { configure lc4 eth1 at e.g. 6.0.4.4/24 }
+
+set int ip table GigabitEthernet2/4/0 1
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.2.1
+mpls decap add label 30 fib 1
+
+create mpls ethernet tunnel dst 00:50:56:b7:05:bf adj 6.0.2.1/24 tx-intfc GigabitEthernet2/3/0 fib-id 1
diff --git a/src/scripts/vnet/mpls-o-ethernet/single.conf b/src/scripts/vnet/mpls-o-ethernet/single.conf
new file mode 100644
index 00000000..2a25d355
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-ethernet/single.conf
@@ -0,0 +1,17 @@
+comment { single node configuration }
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1
+
+mpls encap add label 30 fib 0 dest 2.2.2.2
+mpls decap add label 30 fib 0
+
+create mpls ethernet tunnel dst 00:50:56:b7:05:cb adj 2.2.2.2/32 tx-intfc loop1 fib-id 0
diff --git a/src/scripts/vnet/mpls-o-gre/dhcpd.conf b/src/scripts/vnet/mpls-o-gre/dhcpd.conf
new file mode 100644
index 00000000..f0f659cd
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-gre/dhcpd.conf
@@ -0,0 +1,116 @@
+#
+# Sample configuration file for ISC dhcpd for Debian
+#
+# Attention: If /etc/ltsp/dhcpd.conf exists, that will be used as
+# configuration file instead of this file.
+#
+#
+
+# The ddns-updates-style parameter controls whether or not the server will
+# attempt to do a DNS update when a lease is confirmed. We default to the
+# behavior of the version 2 packages ('none', since DHCP v2 didn't
+# have support for DDNS.)
+ddns-update-style none;
+
+# option definitions common to all supported networks...
+option domain-name "example.org";
+option domain-name-servers ns1.example.org, ns2.example.org;
+
+default-lease-time 600;
+max-lease-time 7200;
+
+# If this DHCP server is the official DHCP server for the local
+# network, the authoritative directive should be uncommented.
+#authoritative;
+
+# Use this to send dhcp log messages to a different log file (you also
+# have to hack syslog.conf to complete the redirection).
+log-facility local7;
+
+# No service will be given on this subnet, but declaring it helps the
+# DHCP server to understand the network topology.
+
+#subnet 10.152.187.0 netmask 255.255.255.0 {
+#}
+
+# This is a very basic subnet declaration.
+
+#subnet 10.254.239.0 netmask 255.255.255.224 {
+# range 10.254.239.10 10.254.239.20;
+# option routers rtr-239-0-1.example.org, rtr-239-0-2.example.org;
+#}
+
+# This declaration allows BOOTP clients to get dynamic addresses,
+# which we don't really recommend.
+
+#subnet 10.254.239.32 netmask 255.255.255.224 {
+# range dynamic-bootp 10.254.239.40 10.254.239.60;
+# option broadcast-address 10.254.239.31;
+# option routers rtr-239-32-1.example.org;
+#}
+
+# A slightly different configuration for an internal subnet.
+#subnet 10.5.5.0 netmask 255.255.255.224 {
+# range 10.5.5.26 10.5.5.30;
+# option domain-name-servers ns1.internal.example.org;
+# option domain-name "internal.example.org";
+# option routers 10.5.5.1;
+# option broadcast-address 10.5.5.31;
+# default-lease-time 600;
+# max-lease-time 7200;
+#}
+
+# Hosts which require special configuration options can be listed in
+# host statements. If no address is specified, the address will be
+# allocated dynamically (if possible), but the host-specific information
+# will still come from the host declaration.
+
+#host passacaglia {
+# hardware ethernet 0:0:c0:5d:bd:95;
+# filename "vmunix.passacaglia";
+# server-name "toccata.fugue.com";
+#}
+
+# Fixed IP addresses can also be specified for hosts. These addresses
+# should not also be listed as being available for dynamic assignment.
+# Hosts for which fixed IP addresses have been specified can boot using
+# BOOTP or DHCP. Hosts for which no fixed address is specified can only
+# be booted with DHCP, unless there is an address range on the subnet
+# to which a BOOTP client is connected which has the dynamic-bootp flag
+# set.
+#host fantasia {
+# hardware ethernet 08:00:07:26:c0:a5;
+# fixed-address fantasia.fugue.com;
+#}
+
+# You can declare a class of clients and then do address allocation
+# based on that. The example below shows a case where all clients
+# in a certain class get addresses on the 10.17.224/24 subnet, and all
+# other clients get addresses on the 10.0.29/24 subnet.
+
+#class "foo" {
+# match if substring (option vendor-class-identifier, 0, 4) = "SUNW";
+#}
+
+#shared-network 224-29 {
+# subnet 10.17.224.0 netmask 255.255.255.0 {
+# option routers rtr-224.example.org;
+# }
+# subnet 10.0.29.0 netmask 255.255.255.0 {
+# option routers rtr-29.example.org;
+# }
+# pool {
+# allow members of "foo";
+# range 10.17.224.10 10.17.224.250;
+# }
+# pool {
+# deny members of "foo";
+# range 10.0.29.10 10.0.29.230;
+# }
+#}
+subnet 6.0.0.0 netmask 255.255.0.0 {
+ range 6.0.2.2 6.0.2.5;
+ option routers 6.0.2.1;
+ default-lease-time 15;
+ max-lease-time 15;
+}
diff --git a/src/scripts/vnet/mpls-o-gre/leftpeer.conf b/src/scripts/vnet/mpls-o-gre/leftpeer.conf
new file mode 100644
index 00000000..149c70c0
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-gre/leftpeer.conf
@@ -0,0 +1,14 @@
+comment { left linux ping target configure at e.g. 6.0.2.2/24 }
+
+set int ip table GigabitEthernet2/2/0 1
+set int ip address GigabitEthernet2/2/0 6.0.2.1/24
+set int state GigabitEthernet2/2/0 up
+
+comment { tunnel to rightpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.1/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.3.3
+mpls decap add label 30 fib 1
+
+create mpls gre tunnel src 6.0.3.1 dst 6.0.3.3 intfc 6.0.4.1/24 inner-fib-id 1 outer-fib-id 0
diff --git a/src/scripts/vnet/mpls-o-gre/rightpeer.conf b/src/scripts/vnet/mpls-o-gre/rightpeer.conf
new file mode 100644
index 00000000..b5bb597d
--- /dev/null
+++ b/src/scripts/vnet/mpls-o-gre/rightpeer.conf
@@ -0,0 +1,14 @@
+comment { right linux ping target configure e.g. 6.0.4.4/24 }
+
+set int ip table GigabitEthernet2/4/0 1
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.3.1
+mpls decap add label 30 fib 1
+
+create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0
diff --git a/src/scripts/vnet/mpls-tunnel b/src/scripts/vnet/mpls-tunnel
new file mode 100644
index 00000000..d04b2970
--- /dev/null
+++ b/src/scripts/vnet/mpls-tunnel
@@ -0,0 +1,87 @@
+packet-generator new {
+ name x0
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.0.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+packet-generator new {
+ name x1
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.1.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+packet-generator new {
+ name x2
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.2.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+packet-generator new {
+ name x3
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.3.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+
+
+trace add pg-input 100
+
+loop create
+set int state loop0 up
+
+set int ip address loop0 10.0.0.1/24
+set ip arp loop0 10.0.0.2 00:00:11:aa:bb:cc
+
+mpls tunnel add via 10.0.0.2 loop0 out-label 33 out-label 34 out-label 35 out-label 36
+set int state mpls-tunnel0 up
+set int ip addr mpls-tunnel0 192.168.0.1/32
+ip route add 2.0.0.2/32 via 192.168.0.2 mpls-tunnel0
+
+
+mpls tunnel add via 10.0.0.2 out-label 33
+set int state mpls-tunnel1 up
+set int ip addr mpls-tunnel1 192.168.1.1/32
+ip route add 2.0.1.2/32 via 192.168.1.2 mpls-tunnel1 out-label 99
+
+mpls tunnel add via 10.0.0.2 loop0 out-label 3
+set int state mpls-tunnel2 up
+set int ip addr mpls-tunnel2 192.168.2.1/32
+ip route add 2.0.2.2/32 via 192.168.2.2 mpls-tunnel2
+
+
+mpls tunnel add l2-only via 10.0.0.2 loop0 out-label 234 out-label 0
+set int state mpls-tunnel3 up
+set int l2 bridge mpls-tunnel3 1
+
+loop create
+set int ip addr loop1 6.0.1.44/24
+set int l2 bridge loop1 1 bvi
+set int l2 learn loop1 disable
+set int state loop1 up
+
+ip route add 2.0.3.2/32 via 6.0.1.45 loop1
diff --git a/src/scripts/vnet/nat44 b/src/scripts/vnet/nat44
new file mode 100644
index 00000000..3292b565
--- /dev/null
+++ b/src/scripts/vnet/nat44
@@ -0,0 +1,41 @@
+create packet-generator interface pg0
+create packet-generator interface pg1
+
+packet-generator new {
+ name f1
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f2
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3005 -> 3006
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+nat44 add address 172.16.1.3
+set int ip address pg0 10.0.0.1/24
+set int ip address pg1 172.16.1.1/24
+set int state pg0 up
+set int state pg1 up
+set ip arp static pg0 10.0.0.3 abcd.abcd.abcd
+set ip arp static pg0 10.0.0.4 abcd.abcd.abcd
+set ip arp static pg1 172.16.1.2 cdef.abcd.abcd
+set int nat44 in pg0 out pg1
diff --git a/src/scripts/vnet/nat44_det b/src/scripts/vnet/nat44_det
new file mode 100644
index 00000000..629772e6
--- /dev/null
+++ b/src/scripts/vnet/nat44_det
@@ -0,0 +1,108 @@
+create packet-generator interface pg0
+create packet-generator interface pg1
+
+packet-generator new {
+ name f1
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 0
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f2
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3005 -> 3006
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f3
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 0
+ interface pg1
+ data {
+ UDP: 172.16.1.2 -> 1.1.1.2
+ UDP: 3001 -> 1141
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f4
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ interface pg1
+ data {
+ UDP: 172.16.1.2 -> 1.1.1.2
+ UDP: 3006 -> 1146
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f5
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 2
+ interface pg0
+ data {
+ UDP: 10.0.0.4 -> 172.16.1.2
+ UDP: 3005 -> 3006
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f6
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ interface pg1
+ data {
+ UDP: 172.16.1.2 -> 1.1.1.2
+ UDP: 3006 -> 1177
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+nat44 deterministic add in 10.0.0.0/21 out 1.1.1.2/32
+set int ip address pg0 10.0.0.1/24
+set int ip address pg1 172.16.1.1/24
+set int state pg0 up
+set int state pg1 up
+set ip arp static pg0 10.0.0.3 abcd.abcd.abcd
+set ip arp static pg0 10.0.0.4 abcd.abcd.abcd
+set ip arp static pg1 172.16.1.2 cdef.abcd.abcd
+set int nat44 in pg0 out pg1
+trace add pg-input 10
diff --git a/src/scripts/vnet/nat44_static b/src/scripts/vnet/nat44_static
new file mode 100644
index 00000000..2b8f25ec
--- /dev/null
+++ b/src/scripts/vnet/nat44_static
@@ -0,0 +1,44 @@
+create packet-generator interface pg0
+create packet-generator interface pg1
+
+packet-generator new {
+ name f1
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 0
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f2
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ interface pg1
+ data {
+ UDP: 172.16.1.2 -> 172.16.1.3
+ UDP: 3001 -> 3000
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+nat44 add address 172.16.1.3
+nat44 add static mapping local 10.0.0.3 external 172.16.1.3
+set int ip address pg0 10.0.0.1/24
+set int ip address pg1 172.16.1.1/24
+set int state pg0 up
+set int state pg1 up
+set ip arp static pg0 10.0.0.3 abcd.abcd.abcd
+set ip arp static pg1 172.16.1.2 cdef.abcd.abcd
+set int nat44 in pg0 out pg1
+trace add pg-input 10
diff --git a/src/scripts/vnet/nat44_static_with_port b/src/scripts/vnet/nat44_static_with_port
new file mode 100644
index 00000000..15bef1be
--- /dev/null
+++ b/src/scripts/vnet/nat44_static_with_port
@@ -0,0 +1,44 @@
+create packet-generator interface pg0
+create packet-generator interface pg1
+
+packet-generator new {
+ name f1
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 0
+ interface pg0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f2
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ interface pg1
+ data {
+ UDP: 172.16.1.2 -> 172.16.1.3
+ UDP: 3001 -> 3000
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+nat44 add address 172.16.1.3
+nat44 add static mapping local 10.0.0.3 3000 external 172.16.1.3 3000
+set int ip address pg0 10.0.0.1/24
+set int ip address pg1 172.16.1.1/24
+set int state pg0 up
+set int state pg1 up
+set ip arp static pg0 10.0.0.3 abcd.abcd.abcd
+set ip arp static pg1 172.16.1.2 cdef.abcd.abcd
+set int nat44 in pg0 out pg1
+trace add pg-input 10
diff --git a/src/scripts/vnet/pcap b/src/scripts/vnet/pcap
new file mode 100644
index 00000000..692e5f27
--- /dev/null
+++ b/src/scripts/vnet/pcap
@@ -0,0 +1,18 @@
+packet-generator new {
+ name x
+ limit 1
+ node ethernet-input
+ no-recycle
+ pcap /home/eliot/pcap-data/ISIS_level1_adjacency.cap
+}
+
+packet-generator new {
+ name y
+ limit 1
+ node hdlc-input
+ no-recycle
+ pcap /home/eliot/pcap-data/ISIS_p2p_adjacency.cap
+}
+
+tr add pg-input 10
+
diff --git a/src/scripts/vnet/probe4 b/src/scripts/vnet/probe4
new file mode 100644
index 00000000..b530e0db
--- /dev/null
+++ b/src/scripts/vnet/probe4
@@ -0,0 +1,11 @@
+ethernet create-interfaces
+set int state fake-eth0 up
+set int ip address fake-eth0 1.0.0.1/24
+comment { error }
+comment { ip probe fake-eth0 1.0.0.1 }
+comment { ip probe fake-eth0 1.2.3.4 }
+comment { error }
+comment { ip probe fake-eth0 1.0.0.2 }
+
+
+
diff --git a/src/scripts/vnet/probe6 b/src/scripts/vnet/probe6
new file mode 100644
index 00000000..a5490c90
--- /dev/null
+++ b/src/scripts/vnet/probe6
@@ -0,0 +1,7 @@
+ethernet create-interfaces
+set int state fake-eth0 up
+set int ip address fake-eth0 2000::1/64
+comment { ip probe fake-eth0 2000::1 }
+
+
+
diff --git a/src/scripts/vnet/rewrite b/src/scripts/vnet/rewrite
new file mode 100644
index 00000000..d41b9dbf
--- /dev/null
+++ b/src/scripts/vnet/rewrite
@@ -0,0 +1,62 @@
+
+
+comment { test ipv4 port/vlan/qinq rewrites by generating arps }
+
+set int state tuntap-0 down
+
+set int ip address GigabitEthernet2/2/0 1.2.5.4/24
+
+cre sub GigabitEthernet2/2/0 1 dot1q 6 exact-match
+set int ip address GigabitEthernet2/2/0.1 1.2.6.4/24
+
+cre sub GigabitEthernet2/2/0 2 dot1ad 7 inner-dot1q 8 exact-match
+set int ip address GigabitEthernet2/2/0.2 1.2.7.4/24
+
+set int state GigabitEthernet2/1/0 up
+set int state GigabitEthernet2/2/0 up
+set int state GigabitEthernet2/2/0.1 up
+set int state GigabitEthernet2/2/0.2 up
+
+trace add pg-input 2
+
+cle er
+cle int
+cle run
+
+packet-generator new {
+ name toport
+ limit 2
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d
+ GRE: 8.0.0.1 -> 1.2.5.6 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name tovlan
+ limit 2
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d
+ GRE: 8.0.0.1 -> 1.2.6.6 mpls_unicast
+ }
+}
+
+packet-generator new {
+ name toqinq
+ limit 2
+ no-recycle
+ node ethernet-input
+ interface GigabitEthernet2/1/0
+ data {
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d
+ GRE: 8.0.0.1 -> 1.2.7.6 mpls_unicast
+ }
+}
+
+
diff --git a/src/scripts/vnet/rightpeer/rightpeer-ioam.conf b/src/scripts/vnet/rightpeer/rightpeer-ioam.conf
new file mode 100644
index 00000000..3e9a8d34
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-ioam.conf
@@ -0,0 +1,14 @@
+comment { connects to right ping target eth3 addr db04::4/64 }
+set int ip address GigabitEthernet2/4/0 db04::1/64
+set int state GigabitEthernet2/4/0 up
+
+comment { connects to ioam domain leftpeer addr db03::1/64 }
+set int ip address GigabitEthernet2/3/0 db03::3/64
+set int state GigabitEthernet2/3/0 up
+
+set ip6 neighbor GigabitEthernet2/3/0 db03::1 00:50:56:b7:05:bf
+set ip6 neighbor GigabitEthernet2/4/0 db04::4 00:50:56:b7:05:d2
+
+ip route add db02::0/64 via db03::1
+
+ioam set destination db04::4/128 pop
diff --git a/src/scripts/vnet/rightpeer/rightpeer-l3vxlan.conf b/src/scripts/vnet/rightpeer/rightpeer-l3vxlan.conf
new file mode 100644
index 00000000..abba1ab0
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-l3vxlan.conf
@@ -0,0 +1,9 @@
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer on vlan 101 }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+vxlan tunnel src 6.0.3.3 peer 6.0.3.1 vni 123 adj 6.0.2.0/24
+
diff --git a/src/scripts/vnet/rightpeer/rightpeer-lisp.conf b/src/scripts/vnet/rightpeer/rightpeer-lisp.conf
new file mode 100644
index 00000000..961204a3
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-lisp.conf
@@ -0,0 +1,16 @@
+comment { local client facing interface }
+comment { configure lc4 eth1 at e.g. 6.0.4.4/24 }
+
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+lisp gpe tunnel src 6.0.3.3 dst 6.0.3.1 next-ip4 decap-next ip4 iid 3311
+set int ip address lisp_gpe_tunnel0 6.0.2.3/24
+set int state lisp_gpe_tunnel0 up
+
+lisp gpe tunnel src 6.0.3.1 dst 6.0.3.3 next-ip4 decap-next ip4 iid 1133
+set int state lisp_gpe_tunnel1 up
diff --git a/src/scripts/vnet/rightpeer/rightpeer-mpls-l2.conf b/src/scripts/vnet/rightpeer/rightpeer-mpls-l2.conf
new file mode 100644
index 00000000..0ce38583
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-mpls-l2.conf
@@ -0,0 +1,24 @@
+comment { vpe_phase1d configuration }
+
+comment { local client facing interface }
+comment { configure lc4 eth1 at e.g. 6.0.4.4/24 }
+
+set int ip table GigabitEthernet2/4/0 1
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.3.1
+mpls decap add label 30 next output GigabitEthernet2/4/0
+
+comment { create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0 l2-only }
+
+comment {set int l2 xconnect GigabitEthernet2/4/0 mpls-gre0 }
+
+uncomment { create mpls ethernet tunnel dst 00:50:56:b7:05:bf adj 6.0.3.1/8 l2-only tx-intfc GigabitEthernet2/3/0 fib-id 1 }
+
+uncomment { set int l2 xconnect GigabitEthernet2/4/0 mpls-eth0 }
+
diff --git a/src/scripts/vnet/rightpeer/rightpeer-mpls.conf b/src/scripts/vnet/rightpeer/rightpeer-mpls.conf
new file mode 100644
index 00000000..fc97ba16
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-mpls.conf
@@ -0,0 +1,17 @@
+comment { vpe_phase1d configuration }
+
+comment { local client facing interface }
+comment { configure lc4 eth1 at e.g. 6.0.4.4/24 }
+
+set int ip table GigabitEthernet2/4/0 1
+set int ip address GigabitEthernet2/4/0 6.0.4.1/24
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+mpls encap add label 30 fib 1 dest 6.0.3.1
+mpls decap add label 30 fib 1
+
+create mpls gre tunnel src 6.0.3.3 dst 6.0.3.1 intfc 6.0.2.1/24 inner-fib-id 1 outer-fib-id 0
diff --git a/src/scripts/vnet/rightpeer/rightpeer-sr.conf b/src/scripts/vnet/rightpeer/rightpeer-sr.conf
new file mode 100644
index 00000000..0b2a98bb
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-sr.conf
@@ -0,0 +1,28 @@
+comment { test sr segment chunk-offset on }
+test sr hmac validate on
+
+comment { trunk to leftpeer }
+set int ip address GigabitEthernet2/3/0 db03::1/64
+enable ip6 interface GigabitEthernet2/3/0
+set int state GigabitEthernet2/3/0 up
+
+comment { subscriber right peer target }
+set int ip address GigabitEthernet2/4/0 db04::2/64
+
+comment { next address to fake out ND on shared LAN segment }
+comment { set int ip address GigabitEthernet2/4/0 db02::13/64 }
+
+enable ip6 interface GigabitEthernet2/4/0
+set int state GigabitEthernet2/4/0 up
+
+sr hmac id 2 key Gozzer
+sr hmac id 3 key Hoser
+
+sr tunnel src db04::1 dst db02::1/128 next db03::2 next db02::1 tag db04::2 clean key Hoser
+
+tap connect srlocal hwaddr random
+set int ip6 table tap-0 1
+set int ip address tap-0 db04::99/64
+enable ip6 interface tap-0
+set int state tap-0 up
+ip route add table 1 db02::0/64 lookup in table 0
diff --git a/src/scripts/vnet/rightpeer/rightpeer-vxlan.conf b/src/scripts/vnet/rightpeer/rightpeer-vxlan.conf
new file mode 100644
index 00000000..bd4c427e
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer-vxlan.conf
@@ -0,0 +1,16 @@
+
+comment { local client facing interface }
+
+comment { configure client lc4 eth1 }
+set int state GigabitEthernet2/4/0 up
+
+comment { tunnel to leftpeer }
+set int ip address GigabitEthernet2/3/0 6.0.3.3/24
+set int state GigabitEthernet2/3/0 up
+
+comment { tunnel to extra ping target }
+set int ip address GigabitEthernet2/7/0 6.0.5.3/24
+set int state GigabitEthernet2/3/0 up
+
+vxlan tunnel src 6.0.3.3 peer 6.0.3.1 peer 6.0.3.5 vni 123
+vxlan l2 GigabitEthernet2/4/0 vni 123
diff --git a/src/scripts/vnet/rightpeer/rightpeer.script b/src/scripts/vnet/rightpeer/rightpeer.script
new file mode 100644
index 00000000..153988ce
--- /dev/null
+++ b/src/scripts/vnet/rightpeer/rightpeer.script
@@ -0,0 +1,9 @@
+l2tp_set_lookup_key lookup_v6_src
+
+sw_interface_add_del_address GigabitEthernet2/3/0 db03::1/64
+sw_interface_set_flags GigabitEthernet2/3/0 admin-up
+
+comment sw_interface_add_del_address GigabitEthernet2/4/0 db02::2/64
+sw_interface_set_flags GigabitEthernet2/4/0 admin-up
+
+l2tp_session_add_del client_address db03::2 our_address db03::1 GigabitEthernet2/4/0 local_session_id 3 remote_session_id 1 local_cookie 33 remote_cookie 11
diff --git a/src/scripts/vnet/rpf b/src/scripts/vnet/rpf
new file mode 100644
index 00000000..571c6b79
--- /dev/null
+++ b/src/scripts/vnet/rpf
@@ -0,0 +1,18 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.2.3.4 -> 5.6.7.8
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+set interface ip source-check pg/stream-0
+ip route 1.2.3.4/32 via pg/stream-0 0x01020304
+ip route 5.6.7.8/32 via drop
+
diff --git a/src/scripts/vnet/rtt-test b/src/scripts/vnet/rtt-test
new file mode 100644
index 00000000..5501b99d
--- /dev/null
+++ b/src/scripts/vnet/rtt-test
@@ -0,0 +1,31 @@
+set int state TenGigabitEthernet4/0/0 up
+set int state TenGigabitEthernet5/0/0 up
+
+set int ip address TenGigabitEthernet4/0/0 1.0.0.1/24
+set int ip address TenGigabitEthernet5/0/0 2.0.0.1/24
+
+ip route table 0 1.0.0.2/32 via TenGigabitEthernet4/0/0 IP4: 00:1b:21:74:5b:04 -> 00:1b:21:79:8e:bc
+ip route table 0 2.0.0.2/32 via TenGigabitEthernet5/0/0 IP4: 00:1b:21:79:8e:bc -> 00:1b:21:74:5b:04
+ip route table 1 2.0.0.2/32 via local
+ip route table 1 1.0.0.2/32 via local
+
+set int ip table TenGigabitEthernet5/0/0 1
+set int ip table TenGigabitEthernet4/0/0 1
+
+comment { trace add rtt-test-tx 100 }
+comment { trace add ixge-input 100 }
+comment { te rtt { 1.0.0.2 -> 2.0.0.2 count 1e4 rate 1e9 size 100 histogram-time 1e-5 } }
+
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 170-170
+ data {
+ ICMP: 1.0.0.2 -> 2.0.0.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+
diff --git a/src/scripts/vnet/source_and_port_range_check b/src/scripts/vnet/source_and_port_range_check
new file mode 100644
index 00000000..dce227b4
--- /dev/null
+++ b/src/scripts/vnet/source_and_port_range_check
@@ -0,0 +1,63 @@
+
+create loop int
+
+set int state loop0 up
+set int ip addr loop0 10.10.10.10/32
+
+packet-generator new {
+ name deny-from-default-route
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.2.3.4 -> 5.6.7.8
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name allow
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 5.6.7.8
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name deny-from-port-range
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 5.6.7.8
+ UDP: 6000 -> 6001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+set ip source-and-port-range-check 1.1.1.0/24 range 2000 - 3000 vrf 99
+
+set interface ip source-and-port-range-check pg0 udp-out-vrf 99
+
+ show ip source-and-port-range-check vrf 99 1.1.1.1
+
+set ip source-and-port-range-check 1.1.1.0/24 range 4000 - 5000 vrf 99
+
+set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99
+
+show ip source-and-port-range-check vrf 99 1.1.1.1
+show ip source-and-port-range-check vrf 99 1.1.2.1
+
+set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99 del
+
+show ip source-and-port-range-check vrf 99 1.1.2.1
+
+tr add pg-input 100
diff --git a/src/scripts/vnet/speed b/src/scripts/vnet/speed
new file mode 100644
index 00000000..d60d671f
--- /dev/null
+++ b/src/scripts/vnet/speed
@@ -0,0 +1,14 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 50-50
+ data {
+ ICMP: 1.2.3.4 -> 5.6.7.8
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+ip route 5.6.7.8/32 via pg/stream-0 000102030405060708090a0b0c0d
diff --git a/src/scripts/vnet/sr/sr_mpls b/src/scripts/vnet/sr/sr_mpls
new file mode 100644
index 00000000..4646372a
--- /dev/null
+++ b/src/scripts/vnet/sr/sr_mpls
@@ -0,0 +1,11 @@
+set interface mpls local0 enable
+sr mpls policy add bsid 20001 next 16001 next 16002 next 16003
+sr mpls steer l3 a::/112 via sr policy bsid 20001
+
+loop create
+set int state loop0 up
+
+set int ip address loop0 11.0.0.1/24
+set ip arp loop0 11.0.0.2 00:00:11:aa:bb:cc
+
+mpls local-label 16001 via 11.0.0.2 loop0 out-label 16001
diff --git a/src/scripts/vnet/srp b/src/scripts/vnet/srp
new file mode 100644
index 00000000..7cc37011
--- /dev/null
+++ b/src/scripts/vnet/srp
@@ -0,0 +1,27 @@
+srp create-interfaces
+srp create-interfaces
+set int hw-class fake-srp0 srp
+set int hw-class fake-srp1 srp
+
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.0.0.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+set int state fake-srp0 up
+set int state fake-srp1 up
+
+set int ip address fake-srp0 1.0.0.1/24
+set int ip address fake-srp1 2.0.0.1/24
+
+
+
diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp
new file mode 100644
index 00000000..b9c23c3a
--- /dev/null
+++ b/src/scripts/vnet/tcp
@@ -0,0 +1,18 @@
+loop create
+set int ip address loop0 192.168.1.1/8
+set int state loop0 up
+
+packet-generator new {
+ name x
+ limit 2048
+ node ip4-input
+ size 100-100
+ interface loop0
+ no-recycle
+ data {
+ TCP: 192.168.1.2 -> 192.168.1.1
+ TCP: 32415 -> 80
+ SYN
+ incrementing 100
+ }
+}
diff --git a/src/scripts/vnet/tcp-test b/src/scripts/vnet/tcp-test
new file mode 100644
index 00000000..52bfbcdd
--- /dev/null
+++ b/src/scripts/vnet/tcp-test
@@ -0,0 +1,6 @@
+int create-ethernet
+set int ip address fake-eth0 1.2.3.4/24
+set int state fake-eth0 up
+ip route 1.2.3.5/32 via local
+trace add tuntap-rx 100
+
diff --git a/src/scripts/vnet/tf-ucs-1 b/src/scripts/vnet/tf-ucs-1
new file mode 100644
index 00000000..efa5f2dc
--- /dev/null
+++ b/src/scripts/vnet/tf-ucs-1
@@ -0,0 +1,16 @@
+comment { connected to Ixia port 1}
+set int ip address TenGigabitEthernet4/0/0 1.0.0.2/8
+
+set int state TenGigabitEthernet4/0/0 up
+
+comment { connected to Ixia port 2}
+set int ip address TenGigabitEthernet4/0/1 2.0.0.2/8
+set int state TenGigabitEthernet4/0/1 up
+
+comment { connected to Ixia port 3}
+set int ip address TenGigabitEthernet6/0/0 3.0.0.2/8
+set int state TenGigabitEthernet6/0/0 up
+
+comment { connected to Ixia port 4}
+set int ip address TenGigabitEthernet6/0/1 4.0.0.2/8
+set int state TenGigabitEthernet6/0/1 up
diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp
new file mode 100644
index 00000000..7dda1eec
--- /dev/null
+++ b/src/scripts/vnet/udp
@@ -0,0 +1,19 @@
+loop create
+set int ip address loop0 192.168.1.1/8
+set int state loop0 up
+
+packet-generator new {
+ name udp
+ limit 512
+ rate 1e4
+ node ip4-input
+ size 100-100
+ interface loop0
+ no-recycle
+ data {
+ UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
+ UDP: 4321 -> 1234
+ length 72
+ incrementing 100
+ }
+}
diff --git a/src/scripts/vnet/uri/afp_setup.cli b/src/scripts/vnet/uri/afp_setup.cli
new file mode 100644
index 00000000..c29afc6f
--- /dev/null
+++ b/src/scripts/vnet/uri/afp_setup.cli
@@ -0,0 +1,5 @@
+create host-interface name vpp1
+set int state host-vpp1 up
+set int ip address host-vpp1 6.0.1.1/24
+trace add af-packet-input 10
+session enable
diff --git a/src/scripts/vnet/uri/dpdk_setup.cli b/src/scripts/vnet/uri/dpdk_setup.cli
new file mode 100644
index 00000000..02bba58f
--- /dev/null
+++ b/src/scripts/vnet/uri/dpdk_setup.cli
@@ -0,0 +1,4 @@
+set int state GigabitEthernet1b/0/0 up
+set int ip address GigabitEthernet1b/0/0 6.0.1.1/24
+trace add dpdk-input 10
+session enable
diff --git a/src/scripts/vnet/uri/dummy_app.py b/src/scripts/vnet/uri/dummy_app.py
new file mode 100644
index 00000000..ff00f2fc
--- /dev/null
+++ b/src/scripts/vnet/uri/dummy_app.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import socket
+import sys
+import time
+
+# action can be reflect or drop
+action = "drop"
+test = 0
+
+def test_data (data, n_rcvd):
+ n_read = len (data);
+ for i in range(n_read):
+ expected = (n_rcvd + i) & 0xff
+ byte_got = ord (data[i])
+ if (byte_got != expected):
+ print("Difference at byte {}. Expected {} got {}"
+ .format(n_rcvd + i, expected, byte_got))
+ return n_read
+
+def handle_connection (connection, client_address):
+ print("Received connection from {}".format(repr(client_address)))
+ n_rcvd = 0
+ try:
+ while True:
+ data = connection.recv(4096)
+ if not data:
+ break;
+ if (test == 1):
+ n_rcvd += test_data (data, n_rcvd)
+ if (action != "drop"):
+ connection.sendall(data)
+ finally:
+ connection.close()
+
+def run_server(ip, port):
+ print("Starting server {}:{}".format(repr(ip), repr(port)))
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ server_address = (ip, int(port))
+ sock.bind(server_address)
+ sock.listen(1)
+
+ while True:
+ connection, client_address = sock.accept()
+ handle_connection (connection, client_address)
+
+def prepare_data():
+ buf = []
+ for i in range (0, pow(2, 16)):
+ buf.append(i & 0xff)
+ return bytearray(buf)
+
+def run_client(ip, port):
+ print("Starting client {}:{}".format(repr(ip), repr(port)))
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ server_address = (ip, port)
+ sock.connect(server_address)
+
+ data = prepare_data()
+ n_rcvd = 0
+ n_sent = len (data)
+ try:
+ sock.sendall(data)
+
+ timeout = time.time() + 2
+ while n_rcvd < n_sent and time.time() < timeout:
+ tmp = sock.recv(1500)
+ tmp = bytearray (tmp)
+ n_read = len(tmp)
+ for i in range(n_read):
+ if (data[n_rcvd + i] != tmp[i]):
+ print("Difference at byte {}. Sent {} got {}"
+ .format(n_rcvd + i, data[n_rcvd + i], tmp[i]))
+ n_rcvd += n_read
+
+ if (n_rcvd < n_sent or n_rcvd > n_sent):
+ print("Sent {} and got back {}".format(n_sent, n_rcvd))
+ else:
+ print("Got back what we've sent!!");
+
+ finally:
+ sock.close()
+
+def run(mode, ip, port):
+ if (mode == "server"):
+ run_server (ip, port)
+ elif (mode == "client"):
+ run_client (ip, port)
+ else:
+ raise Exception("Unknown mode. Only client and server supported")
+
+if __name__ == "__main__":
+ if (len(sys.argv)) < 4:
+ raise Exception("Usage: ./dummy_app <mode> <ip> <port> [<action> <test>]")
+ if (len(sys.argv) == 6):
+ action = sys.argv[4]
+ test = int(sys.argv[5])
+
+ run (sys.argv[1], sys.argv[2], int(sys.argv[3]))
diff --git a/src/scripts/vnet/uri/tap_setup.cli b/src/scripts/vnet/uri/tap_setup.cli
new file mode 100644
index 00000000..1d9a1b36
--- /dev/null
+++ b/src/scripts/vnet/uri/tap_setup.cli
@@ -0,0 +1,5 @@
+tap connect tap0 address 6.0.1.2/24
+set int ip addr tap-0 6.0.1.1/24
+set int state tap-0 up
+trace add tapcli-rx 10
+session enable
diff --git a/src/scripts/vnet/uri/tcp-setup.sh b/src/scripts/vnet/uri/tcp-setup.sh
new file mode 100755
index 00000000..e0b01588
--- /dev/null
+++ b/src/scripts/vnet/uri/tcp-setup.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+function topo_setup
+{
+ ip netns add vppns1
+ ip link add veth_vpp1 type veth peer name vpp1
+ ip link set dev vpp1 up
+ ip link set dev veth_vpp1 up netns vppns1
+
+ ip netns exec vppns1 \
+ bash -c "
+ ip link set dev lo up
+ ip addr add 6.0.1.2/24 dev veth_vpp1
+ "
+
+ ethtool --offload vpp1 rx off tx off
+ ip netns exec vppns1 ethtool --offload veth_vpp1 rx off tx off
+
+}
+
+function topo_clean
+{
+ ip link del dev veth_vpp1 &> /dev/null
+ ip netns del vppns1 &> /dev/null
+}
+
+if [ "$1" == "clean" ] ; then
+ topo_clean
+ exit 0
+else
+ topo_setup
+fi
+
+# to test connectivity do:
+# sudo ip netns exec vppns1 telnet 6.0.1.1 1234
+# to push traffic to the server
+# dd if=/dev/zero bs=1024K count=512 | nc 6.0.1.1
+# to listen for incoming connection from vpp
+# nc -l 1234
diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp
new file mode 100644
index 00000000..c7628f49
--- /dev/null
+++ b/src/scripts/vnet/uri/udp
@@ -0,0 +1,20 @@
+loop create
+set int ip address loop0 6.0.0.1/32
+set int state loop0 up
+
+packet-generator new {
+ name udp
+ limit 512
+ rate 1e4
+ node ip4-input
+ size 100-100
+ interface loop0
+ no-recycle
+ data {
+ UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
+ UDP: 4321 -> 1234
+ length 72
+ incrementing 100
+ }
+}
+session enable
diff --git a/src/scripts/vnet/urpf b/src/scripts/vnet/urpf
new file mode 100644
index 00000000..a4d87527
--- /dev/null
+++ b/src/scripts/vnet/urpf
@@ -0,0 +1,86 @@
+
+create loop int
+
+set int state loop0 up
+set int ip addr loop0 10.10.10.10/24
+
+packet-generator new {
+ name transit-deny
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.2.3.4 -> 2.2.2.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name transit-allow
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 2.2.2.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name transit-allow-from-excemption
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 11.11.12.13 -> 2.2.2.2
+ UDP: 6000 -> 6001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name for-us-allow-from-excemption
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 11.11.12.13 -> 10.10.10.10
+ UDP: 6000 -> 6001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name for-us-allow
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 10.10.10.10
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+tr add pg-input 100
+
+set int ip addr pg0 10.10.11.10/24
+
+set interface ip source-check pg0 strict
+
+ip route add 1.1.1.1/32 via 10.10.11.11 pg0
+ip route add 2.2.2.2/32 via 10.10.10.11 loop0
+
+ip urpf-accept 11.11.0.0/16
+
+#set interface ip source-check pg0 strict del
+#set interface ip source-check pg0 loose
+
+#ip urpf-accept del 11.11.0.0/16
diff --git a/src/scripts/vnet/virl/ip6sr.virl b/src/scripts/vnet/virl/ip6sr.virl
new file mode 100644
index 00000000..5d4d1a0a
--- /dev/null
+++ b/src/scripts/vnet/virl/ip6sr.virl
@@ -0,0 +1,874 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd">
+ <extensions>
+ <entry key="management_network" type="String">flat</entry>
+ <entry key="AutoNetkit.address_family" type="String">dual_stack</entry>
+ <entry key="AutoNetkit.enable_routing" type="Boolean">false</entry>
+ </extensions>
+ <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="195,78" ipv4="192.168.0.2" ipv6="::b:1:0:0:2">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:41:18 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-1
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.2 255.255.255.255
+ ipv6 address ::B:1:0:0:2/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.193 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.5 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:6/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+! route to iosv-2
+ipv6 route ::A:1:1:0:16/128 ::A:1:1:0:7
+! route to iosv-4
+ipv6 route ::A:1:1:0:22/128 ::A:1:1:0:7
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" ipv6="::a:1:1:0:6" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="667,82" ipv4="192.168.0.3" ipv6="::b:1:0:0:3">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:41:10 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-2
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.3 255.255.255.255
+ ipv6 address ::B:1:0:0:3/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.191 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-2
+ ip address 10.0.0.21 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:16/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+ipv6 route ::A:1:1:0:6/128 ::A:1:1:0:17
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.21" ipv6="::a:1:1:0:16" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-3" type="SIMPLE" subtype="IOSv" location="194,186" ipv4="192.168.0.6" ipv6="::b:1:0:0:4">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:27:43 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-3
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.6 255.255.255.255
+ ipv6 address ::B:1:0:0:4/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.192 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.9 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:A/126
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.9" ipv6="::a:1:1:0:a" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="iosv-4" type="SIMPLE" subtype="IOSv" location="668,193" ipv4="192.168.0.8" ipv6="::b:1:0:0:5">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 16:27:43 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-4
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.8 255.255.255.255
+ ipv6 address ::B:1:0:0:5/128
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.194 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-2
+ ip address 10.0.0.33 255.255.255.252
+ duplex auto
+ speed auto
+ media-type rj45
+ ipv6 address ::A:1:1:0:22/126
+!
+! Route to iosv-1
+ipv6 route ::A:1:1:0:6/128 ::A:1:1:0:23
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.33" ipv6="::a:1:1:0:22" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-1" type="SIMPLE" subtype="vPP" location="302,140">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-1
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.6/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:7/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.10/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:b/126
+ set interface state GigabitEthernet0/5/0 up
+ set interface ip address GigabitEthernet0/6/0 10.0.0.13/30
+ set interface ip address GigabitEthernet0/6/0 ::a:1:1:0:e/126
+ set interface state GigabitEthernet0/6/0 up
+ set interface ip address GigabitEthernet0/7/0 10.0.0.17/30
+ set interface ip address GigabitEthernet0/7/0 ::a:1:1:0:12/126
+ set interface state GigabitEthernet0/7/0 up
+ sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:16/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:16 tag 0::a:1:1:0:7 InPE 1 clean
+ sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:22/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:22 tag 0::a:1:1:0:7 InPE 1 clean
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" ipv6="::a:1:1:0:7" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.10" ipv6="::a:1:1:0:b" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="2" name="GigabitEthernet0/6/0" ipv4="10.0.0.13" ipv6="::a:1:1:0:e" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="3" name="GigabitEthernet0/7/0" ipv4="10.0.0.17" ipv6="::a:1:1:0:12" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-2" type="SIMPLE" subtype="vPP" location="541,147">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-2
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.25/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:1a/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.29/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1e/126
+ set interface state GigabitEthernet0/5/0 up
+ set interface ip address GigabitEthernet0/6/0 10.0.0.22/30
+ set interface ip address GigabitEthernet0/6/0 ::a:1:1:0:17/126
+ set interface state GigabitEthernet0/6/0 up
+ set interface ip address GigabitEthernet0/7/0 10.0.0.34/30
+ set interface ip address GigabitEthernet0/7/0 ::a:1:1:0:23/126
+ set interface state GigabitEthernet0/7/0 up
+ sr tunnel src 0::a:1:1:0:16 dst 0::a:1:1:0:6/128 next 0::a:1:1:0:1b next 0::a:1:1:0:e next 0::a:1:1:0:6 tag 0::a:1:1:0:17 InPE 1 clean
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.25" ipv6="::a:1:1:0:1a" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.29" ipv6="::a:1:1:0:1e" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="2" name="GigabitEthernet0/6/0" ipv4="10.0.0.22" ipv6="::a:1:1:0:17" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="3" name="GigabitEthernet0/7/0" ipv4="10.0.0.34" ipv6="::a:1:1:0:23" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-3" type="SIMPLE" subtype="vPP" location="415,89">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-3
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.14/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:f/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.26/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1b/126
+ set interface state GigabitEthernet0/5/0 up
+ comment { fix src rpf drop screws}
+ ip route add ::a:1:1:0:6/128 via drop
+ ip route add ::a:1:1:0:16/128 via drop
+ ip route add ::a:1:1:0:22/128 via drop
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.14" ipv6="::a:1:1:0:f" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.26" ipv6="::a:1:1:0:1b" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <node name="vpp-4" type="SIMPLE" subtype="vPP" location="419,216">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-4
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.18/30
+ set interface ip address GigabitEthernet0/4/0 ::a:1:1:0:13/126
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.30/30
+ set interface ip address GigabitEthernet0/5/0 ::a:1:1:0:1f/126
+ set interface state GigabitEthernet0/5/0 up
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.18" ipv6="::a:1:1:0:13" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.30" ipv6="::a:1:1:0:1f" netPrefixLenV4="30" netPrefixLenV6="126"/>
+ </node>
+ <connection dst="/virl:topology/virl:node[5]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[5]/virl:interface[2]" src="/virl:topology/virl:node[3]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[7]/virl:interface[1]" src="/virl:topology/virl:node[5]/virl:interface[3]"/>
+ <connection dst="/virl:topology/virl:node[8]/virl:interface[1]" src="/virl:topology/virl:node[5]/virl:interface[4]"/>
+ <connection dst="/virl:topology/virl:node[6]/virl:interface[1]" src="/virl:topology/virl:node[7]/virl:interface[2]"/>
+ <connection dst="/virl:topology/virl:node[6]/virl:interface[2]" src="/virl:topology/virl:node[8]/virl:interface[2]"/>
+ <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[6]/virl:interface[3]"/>
+ <connection dst="/virl:topology/virl:node[4]/virl:interface[1]" src="/virl:topology/virl:node[6]/virl:interface[4]"/>
+</topology>
diff --git a/src/scripts/vnet/virl/ip6sr_notes.txt b/src/scripts/vnet/virl/ip6sr_notes.txt
new file mode 100644
index 00000000..5febf2c6
--- /dev/null
+++ b/src/scripts/vnet/virl/ip6sr_notes.txt
@@ -0,0 +1,38 @@
+vpp-1, tunnel 1: iosv-1 vpp-1 vpp-3 vpp-2 iosv-2
+
+iosv-1
+::a:1:1:0:6
+
+to iosv2
+ipv6 route ::a:1:1:0:16/128 ::a:1:1:0:7
+to iosv4
+ipv6 route ::a:1:1:0:22/128 ::a:1:1:0:7
+
+vpp-1
+
+::a:1:1:0:7
+...
+::a:1:1:0:e
+
+sr tunnel src 0::a:1:1:0:6 dst 0::a:1:1:0:16/128 next 0::a:1:1:0:f next 0::a:1:1:0:1a next 0::a:1:1:0:16 tag 0::a:1:1:0:7 InPE 1 clean
+
+vpp-3
+::a:1:1:0:f
+..
+::a:1:1:0:1b
+
+comment {fix src rpf screws}
+ip route add ::a:1:1:0:6/128 via drop
+ip route add ::a:1:1:0:16/128 via drop
+ip route add ::a:1:1:0:22/128 via drop
+vpp-2
+::a:1:1:0:1a
+..
+::a:1:1:0:17
+
+sr tunnel src 0::a:1:1:0:16 dst 0::a:1:1:0:6/128 next 0::a:1:1:0:1b next 0::a:1:1:0:e next 0::a:1:1:0:6 tag 0::a:1:1:0:17 InPE 1 clean
+
+iosv-2
+::a:1:1:0:16
+
+ipv6 route ::a:1:1:0:6/128 ::a:1:1:0:17
diff --git a/src/scripts/vnet/virl/mplsogre.virl b/src/scripts/vnet/virl/mplsogre.virl
new file mode 100644
index 00000000..33dd0329
--- /dev/null
+++ b/src/scripts/vnet/virl/mplsogre.virl
@@ -0,0 +1,319 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd">
+ <extensions>
+ <entry key="management_network" type="String">flat</entry>
+ </extensions>
+ <node name="vpp-1" type="SIMPLE" subtype="vPP" location="467,307" vmImage="vPP-hacker [445e503a-564e-4c14-93e1-9660957c539a]" vmFlavor="vPP-small [e7f7de38-54e3-46f1-92ef-92b0039c1c60]">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-1
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo &quot;UseDNS no&quot; &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.6/30
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.9/30
+ set interface state GigabitEthernet0/5/0 up
+ mpls encap add label 30 fib 0 dest 10.0.0.10
+ mpls decap add label 30 fib 0
+ create mpls gre tunnel src 10.0.0.9 dst 10.0.0.10 intfc 10.0.0.13/30 inner-fib-id 0 outer-fib-id 0
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" netPrefixLenV4="30"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.9" netPrefixLenV4="30"/>
+ </node>
+ <node name="vpp-2" type="SIMPLE" subtype="vPP" location="591,305" vmImage="vPP-hacker [445e503a-564e-4c14-93e1-9660957c539a]" vmFlavor="vPP-small [e7f7de38-54e3-46f1-92ef-92b0039c1c60]">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-2
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo &quot;UseDNS no&quot; &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.10/30
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.14/30
+ set interface state GigabitEthernet0/5/0 up
+ mpls encap add label 30 fib 0 dest 10.0.0.9
+ mpls decap add label 30 fib 0
+ create mpls gre tunnel src 10.0.0.10 dst 10.0.0.9 intfc 10.0.0.5/30 inner-fib-id 0 outer-fib-id 0
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.10" netPrefixLenV4="30"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.14" netPrefixLenV4="30"/>
+ </node>
+ <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="326,311" ipv4="192.168.0.1">
+ <extensions>
+ <entry key="config" type="string">! IOS Config generated on 2015-03-03 17:26
+! by autonetkit_0.15.0
+!
+hostname iosv-1
+boot-start-marker
+boot-end-marker
+!
+no aaa new-model
+!
+!
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+!
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+no service config
+enable password cisco
+ip classless
+ip subnet-zero
+no ip domain lookup
+line vty 0 4
+ transport input ssh telnet
+ exec-timeout 720 0
+ password cisco
+ login
+line con 0
+ password cisco
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.1 255.255.255.255
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ! Configured on launch
+ no ip address
+ duplex auto
+ speed auto
+ no shutdown
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.5 255.255.255.252
+ ip ospf cost 1
+ duplex auto
+ speed auto
+ no shutdown
+!
+!
+!
+router ospf 1
+ network 192.168.0.1 0.0.0.0 area 0
+ log-adjacency-changes
+ passive-interface Loopback0
+ network 10.0.0.4 0.0.0.3 area 0
+!
+!
+router bgp 1
+ bgp router-id 192.168.0.1
+ no synchronization
+! ibgp
+ ! ibgp peers
+ !
+ neighbor 192.168.0.4 remote-as 1
+ neighbor 192.168.0.4 description iBGP peer iosv-2
+ neighbor 192.168.0.4 update-source Loopback0
+!
+!
+ !
+ address-family ipv4
+ network 192.168.0.1 mask 255.255.255.255
+ neighbor 192.168.0.4 activate
+ exit-address-family
+!
+!
+!
+ip route 10.0.0.13 255.255.255.255 10.0.0.6
+end
+</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" netPrefixLenV4="30"/>
+ </node>
+ <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="741,307" ipv4="192.168.0.4">
+ <extensions>
+ <entry key="config" type="string">! IOS Config generated on 2015-03-03 17:26
+! by autonetkit_0.15.0
+!
+hostname iosv-2
+boot-start-marker
+boot-end-marker
+!
+no aaa new-model
+!
+!
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+!
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+no service config
+enable password cisco
+ip classless
+ip subnet-zero
+no ip domain lookup
+line vty 0 4
+ transport input ssh telnet
+ exec-timeout 720 0
+ password cisco
+ login
+line con 0
+ password cisco
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.4 255.255.255.255
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ! Configured on launch
+ no ip address
+ duplex auto
+ speed auto
+ no shutdown
+!
+interface GigabitEthernet0/1
+ description to vpp-2
+ ip address 10.0.0.13 255.255.255.252
+ ip ospf cost 1
+ duplex auto
+ speed auto
+ no shutdown
+!
+!
+!
+router ospf 1
+ network 192.168.0.4 0.0.0.0 area 0
+ log-adjacency-changes
+ passive-interface Loopback0
+ network 10.0.0.12 0.0.0.3 area 0
+!
+!
+router bgp 1
+ bgp router-id 192.168.0.4
+ no synchronization
+! ibgp
+ ! ibgp peers
+ !
+ neighbor 192.168.0.1 remote-as 1
+ neighbor 192.168.0.1 description iBGP peer iosv-1
+ neighbor 192.168.0.1 update-source Loopback0
+!
+!
+ !
+ address-family ipv4
+ network 192.168.0.4 mask 255.255.255.255
+ neighbor 192.168.0.1 activate
+ exit-address-family
+!
+!
+ip route 10.0.0.5 255.255.255.255 10.0.0.14
+!
+end
+</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.13" netPrefixLenV4="30"/>
+ </node>
+ <connection dst="/virl:topology/virl:node[1]/virl:interface[1]" src="/virl:topology/virl:node[3]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[2]"/>
+ <connection dst="/virl:topology/virl:node[4]/virl:interface[1]" src="/virl:topology/virl:node[2]/virl:interface[2]"/>
+</topology>
diff --git a/src/scripts/vnet/virl/simple.virl b/src/scripts/vnet/virl/simple.virl
new file mode 100644
index 00000000..6033c42c
--- /dev/null
+++ b/src/scripts/vnet/virl/simple.virl
@@ -0,0 +1,389 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<topology xmlns="http://www.cisco.com/VIRL" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" schemaVersion="0.9" xsi:schemaLocation="http://www.cisco.com/VIRL https://raw.github.com/CiscoVIRL/schema/v0.9/virl.xsd">
+ <extensions>
+ <entry key="management_network" type="String">flat</entry>
+ </extensions>
+ <node name="iosv-1" type="SIMPLE" subtype="IOSv" location="427,219" ipv4="192.168.0.2">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 14:27:32 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-1
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.2 255.255.255.255
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.167 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.5 255.255.255.252
+ ip ospf cost 1
+ duplex auto
+ speed auto
+ media-type rj45
+!
+router ospf 1
+ passive-interface Loopback0
+ network 10.0.0.4 0.0.0.3 area 0
+ network 192.168.0.2 0.0.0.0 area 0
+!
+router bgp 1
+ bgp router-id 192.168.0.2
+ bgp log-neighbor-changes
+ neighbor 192.168.0.3 remote-as 1
+ neighbor 192.168.0.3 description iBGP peer iosv-2
+ neighbor 192.168.0.3 update-source Loopback0
+ !
+ address-family ipv4
+ network 192.168.0.2 mask 255.255.255.255
+ neighbor 192.168.0.3 activate
+ exit-address-family
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+ip route 10.0.0.9 255.255.255.255 10.0.0.6
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.5" netPrefixLenV4="30"/>
+ </node>
+ <node name="iosv-2" type="SIMPLE" subtype="IOSv" location="704,221" ipv4="192.168.0.3">
+ <extensions>
+ <entry key="config" type="string">!
+! Last configuration change at 14:26:58 UTC Fri Mar 27 2015
+!
+version 15.4
+service timestamps debug datetime msec
+service timestamps log datetime msec
+no service password-encryption
+!
+hostname iosv-2
+!
+boot-start-marker
+boot-end-marker
+!
+!
+enable password cisco
+!
+no aaa new-model
+!
+!
+!
+mmi polling-interval 60
+no mmi auto-configure
+no mmi pvc
+mmi snmp-timeout 180
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+no ip domain lookup
+ip cef
+ipv6 unicast-routing
+ipv6 cef
+!
+multilink bundle-name authenticated
+!
+!
+cts logging verbose
+!
+!
+!
+redundancy
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+!
+interface Loopback0
+ description Loopback
+ ip address 192.168.0.3 255.255.255.255
+!
+interface GigabitEthernet0/0
+ description OOB Management
+ ip address 172.16.1.164 255.255.255.0
+ duplex auto
+ speed auto
+ media-type rj45
+!
+interface GigabitEthernet0/1
+ description to vpp-1
+ ip address 10.0.0.9 255.255.255.252
+ ip ospf cost 1
+ duplex auto
+ speed auto
+ media-type rj45
+!
+router ospf 1
+ passive-interface Loopback0
+ network 10.0.0.8 0.0.0.3 area 0
+ network 192.168.0.3 0.0.0.0 area 0
+!
+router bgp 1
+ bgp router-id 192.168.0.3
+ bgp log-neighbor-changes
+ neighbor 192.168.0.2 remote-as 1
+ neighbor 192.168.0.2 description iBGP peer iosv-1
+ neighbor 192.168.0.2 update-source Loopback0
+ !
+ address-family ipv4
+ network 192.168.0.3 mask 255.255.255.255
+ neighbor 192.168.0.2 activate
+ exit-address-family
+!
+ip forward-protocol nd
+!
+!
+no ip http server
+no ip http secure-server
+ip route 10.0.0.5 255.255.255.255 10.0.0.10
+!
+!
+!
+!
+control-plane
+!
+banner exec `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner incoming `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+banner login `
+**************************************************************************
+* IOSv is strictly limited to use for evaluation, demonstration and IOS *
+* education. IOSv is provided as-is and is not supported by Cisco's *
+* Technical Advisory Center. Any use or disclosure, in whole or in part, *
+* of the IOSv Software or Documentation to any third party for any *
+* purposes is expressly prohibited except as otherwise authorized by *
+* Cisco in writing. *
+**************************************************************************`
+!
+line con 0
+ password cisco
+line aux 0
+line vty 0 4
+ exec-timeout 720 0
+ password cisco
+ login
+ transport input telnet ssh
+!
+no scheduler allocate
+!
+end</entry>
+ <entry key="AutoNetkit.mgmt_ip" type="string"></entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/1" ipv4="10.0.0.9" netPrefixLenV4="30"/>
+ </node>
+ <node name="vpp-1" type="SIMPLE" subtype="vPP" location="561,217">
+ <extensions>
+ <entry key="config" type="string">#cloud-config
+bootcmd:
+- ln -s -t /etc/rc.d /etc/rc.local
+hostname: vpp-1
+manage_etc_hosts: true
+runcmd:
+- start ttyS0
+- systemctl start getty@ttyS0.service
+- systemctl start rc-local
+- sed -i '/^\s*PasswordAuthentication\s\+no/d' /etc/ssh/sshd_config
+- echo "UseDNS no" &gt;&gt; /etc/ssh/sshd_config
+- service ssh restart
+- service sshd restart
+- sed -i 's/no-pci//' /cisco/etc/qn.conf
+- sed -i 's/1024/1024 decimal-interface-names/g' /cisco/etc/qn.conf
+users:
+- default
+- gecos: User configured by VIRL Configuration Engine 0.15.3
+ lock-passwd: false
+ name: cisco
+ plain-text-passwd: cisco
+ shell: /bin/bash
+ ssh-authorized-keys:
+ - VIRL-USER-SSH-PUBLIC-KEY
+ sudo: ALL=(ALL) ALL
+write_files:
+- path: /etc/init/ttyS0.conf
+ owner: root:root
+ content: |
+ # ttyS0 - getty
+ # This service maintains a getty on ttyS0 from the point the system is
+ # started until it is shut down again.
+ start on stopped rc or RUNLEVEL=[12345]
+ stop on runlevel [!12345]
+ respawn
+ exec /sbin/getty -L 115200 ttyS0 vt102
+ permissions: '0644'
+- path: /etc/systemd/system/dhclient@.service
+ content: |
+ [Unit]
+ Description=Run dhclient on %i interface
+ After=network.target
+ [Service]
+ Type=oneshot
+ ExecStart=/sbin/dhclient %i -pf /var/run/dhclient.%i.pid -lf /var/lib/dhclient/dhclient.%i.lease
+ RemainAfterExit=yes
+ owner: root:root
+ permissions: '0644'
+- path: /cisco/etc/vpp-server.conf
+ owner: root:root
+ permissions: '0644'
+ content: |-
+ set interface ip address GigabitEthernet0/4/0 10.0.0.6/30
+ set interface state GigabitEthernet0/4/0 up
+ set interface ip address GigabitEthernet0/5/0 10.0.0.10/30
+ set interface state GigabitEthernet0/5/0 up
+</entry>
+ </extensions>
+ <interface id="0" name="GigabitEthernet0/4/0" ipv4="10.0.0.6" netPrefixLenV4="30"/>
+ <interface id="1" name="GigabitEthernet0/5/0" ipv4="10.0.0.10" netPrefixLenV4="30"/>
+ </node>
+ <connection dst="/virl:topology/virl:node[3]/virl:interface[1]" src="/virl:topology/virl:node[1]/virl:interface[1]"/>
+ <connection dst="/virl:topology/virl:node[2]/virl:interface[1]" src="/virl:topology/virl:node[3]/virl:interface[2]"/>
+</topology>
diff --git a/src/scripts/vnet/vlan b/src/scripts/vnet/vlan
new file mode 100644
index 00000000..076080a6
--- /dev/null
+++ b/src/scripts/vnet/vlan
@@ -0,0 +1,23 @@
+int create-ethernet
+int create-sub fake-eth0 1
+set int state fake-eth0 up
+set int state fake-eth0.1 up
+
+packet-generator new {
+ name x
+ limit 1
+ node ethernet-input
+ interface fake-eth0
+ size 64-64
+ no-recycle
+ data {
+ IP4: 1.2.3 -> 4.5.6 vlan 1
+ ICMP: 1.2.3.4 -> 5.6.7.8
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+tr add pg-input 100
+ip route 5.6.7.8/32 via local
+ip route 1.2.3.4/32 via local
diff --git a/src/scripts/vppctl b/src/scripts/vppctl
new file mode 100755
index 00000000..01369189
--- /dev/null
+++ b/src/scripts/vppctl
@@ -0,0 +1,134 @@
+#! /usr/bin/python
+'''
+Copyright 2016 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from cmd import Cmd
+import os
+import subprocess
+import re
+import sys
+from optparse import OptionParser
+
+try:
+ import readline
+except ImportError:
+ readline = None
+
+persishist = os.path.expanduser('~/.vpphistory')
+persishist_size = 1000
+if not persishist:
+ os.mknod(persishist, stat.S_IFREG)
+
+class Vppctl(Cmd):
+
+ def __init__(self,api_prefix=None):
+ Cmd.__init__(self)
+ self.api_prefix = api_prefix
+
+ def historyWrite(self):
+ if readline:
+ readline.set_history_length(persishist_size)
+ readline.write_history_file(persishist)
+
+ def runVat(self, line):
+ input_prefix = "exec "
+ input_command = input_prefix + line
+ line_remove = '^load_one_plugin:'
+ s = '\n'
+ if ( self.api_prefix is None):
+ command = ['vpp_api_test']
+ else:
+ command = ['vpp_api_test',"chroot prefix %s " % self.api_prefix]
+
+ if os.geteuid() != 0:
+ command = ['sudo'] + command
+
+ vpp_process = subprocess.Popen(command,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ stdout_value = vpp_process.communicate(input_command)[0]
+
+ buffer_stdout = stdout_value.splitlines()
+
+ buffer_stdout[:] = [b for b in buffer_stdout
+ if line_remove not in b]
+
+ for i, num in enumerate(buffer_stdout):
+ buffer_stdout[i] = num.replace('vat# ','')
+
+ stdout_value = s.join(buffer_stdout)
+ print stdout_value
+
+ def do_help(self, line):
+ self.runVat("help")
+
+ def default(self, line):
+ self.runVat(line)
+
+ def do_exit(self, line):
+ self.historyWrite()
+ raise SystemExit
+
+ def emptyline(self):
+ pass
+
+ def do_EOF(self,line):
+ self.historyWrite()
+ sys.stdout.write('\n')
+ raise SystemExit
+
+ def preloop(self):
+ if readline and os.path.exists(persishist):
+ readline.read_history_file(persishist)
+
+ def postcmd(self, stop, line):
+ self.historyWrite()
+
+if __name__ == '__main__':
+ parser = OptionParser()
+ parser.add_option("-p","--prefix",action="store",type="string",dest="prefix")
+ (options,command_args) = parser.parse_args(sys.argv)
+
+ if not len(command_args) > 1:
+ prompt = Vppctl(options.prefix)
+ red_set = '\033[31m'
+ norm_set = '\033[0m'
+ if sys.stdout.isatty():
+ if(options.prefix is None):
+ prompt.prompt = 'vpp# '
+ else:
+ prompt.prompt = '%s# ' % options.prefix
+ try:
+ prompt.cmdloop(red_set + " _______ _ " + norm_set + " _ _____ ___ \n" +
+ red_set + " __/ __/ _ \ (_)__ " + norm_set + " | | / / _ \/ _ \\\n" +
+ red_set + " _/ _// // / / / _ \\" + norm_set + " | |/ / ___/ ___/\n" +
+ red_set + " /_/ /____(_)_/\___/ " + norm_set + "|___/_/ /_/ \n")
+ except KeyboardInterrupt:
+ sys.stdout.write('\n')
+ else:
+ try:
+ prompt.cmdloop()
+ except KeyboardInterrupt:
+ sys.stdout.write('\n')
+ else:
+ del command_args[0]
+ stdout_value = " ".join(command_args)
+ VatAddress = Vppctl(options.prefix)
+ VatAddress.runVat(stdout_value)
+
+
+
diff --git a/src/scripts/vppctl_completion b/src/scripts/vppctl_completion
new file mode 100644
index 00000000..2a64e9ab
--- /dev/null
+++ b/src/scripts/vppctl_completion
@@ -0,0 +1,30 @@
+#Copyright 2017 Intel Corporation
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+_vppctl()
+{
+ local cur prev num opts
+ COMPREPLY=( $(compgen -f ${cur}) )
+ cur="${COMP_WORDS[COMP_CWORD]}"
+ prev="${COMP_WORDS[@]:1}"
+ num="$((${#COMP_WORDS[@]}-1))"
+
+ VPP_CMD_LIST="$(cat /usr/share/vpp/vppctl-cmd-list)"
+
+ opts="$(awk -v prev="^$prev" -v num=$num '{if($0 ~ prev) print $num}' <<< "${VPP_CMD_LIST}")"
+
+ COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
+
+}
+complete -F _vppctl vppctl
diff --git a/src/suffix-rules.mk b/src/suffix-rules.mk
new file mode 100644
index 00000000..9115c55b
--- /dev/null
+++ b/src/suffix-rules.mk
@@ -0,0 +1,27 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Shared suffix rules
+# Please do not set "SUFFIXES = .api.h .api" here
+
+%.api.h: %.api @VPPAPIGEN@
+ @echo " APIGEN " $@ ; \
+ mkdir -p `dirname $@` ; \
+ $(CC) $(CPPFLAGS) -E -P -C -x c $< \
+ | @VPPAPIGEN@ --input - --output $@ --show-name $@ > /dev/null
+
+%.api.json: %.api @VPPAPIGEN@
+ @echo " JSON API" $@ ; \
+ mkdir -p `dirname $@` ; \
+ $(CC) $(CPPFLAGS) -E -P -C -x c $< \
+ | @VPPAPIGEN@ --input - --json $@ > /dev/null
diff --git a/src/svm.am b/src/svm.am
new file mode 100644
index 00000000..7116eb37
--- /dev/null
+++ b/src/svm.am
@@ -0,0 +1,40 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin_PROGRAMS += svmtool svmdbtool
+
+nobase_include_HEADERS += svm/svm.h svm/svm_common.h svm/ssvm.h svm/svmdb.h \
+ svm/svm_fifo.h svm/svm_fifo_segment.h
+
+lib_LTLIBRARIES += libsvm.la libsvmdb.la
+
+libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c
+libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread
+libsvm_la_DEPENDENCIES = libvppinfra.la
+
+svmtool_SOURCES = svm/svmtool.c
+svmtool_LDADD = libsvm.la libvppinfra.la -lpthread -lrt
+
+libsvmdb_la_LIBADD = libvppinfra.la libsvm.la
+libsvmdb_la_DEPENDENCIES = libvppinfra.la libsvm.la
+libsvmdb_la_SOURCES = svm/svmdb.c
+
+svmdbtool_SOURCES = svm/svmdbtool.c
+svmdbtool_LDADD = libsvmdb.la libsvm.la libvppinfra.la -lpthread -lrt
+
+noinst_PROGRAMS += test_svm_fifo1
+test_svm_fifo1_SOURCES = svm/test_svm_fifo1.c
+test_svm_fifo1_LDADD = libsvm.la libvppinfra.la -lpthread -lrt
+test_svm_fifo1_LDFLAGS = -static
+
+# vi:syntax=automake
diff --git a/src/svm/dir.dox b/src/svm/dir.dox
new file mode 100644
index 00000000..83246979
--- /dev/null
+++ b/src/svm/dir.dox
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Doxygen directory documentation */
+/**
+@dir
+@brief Shared virtual memory allocation library.
+*/
diff --git a/src/svm/persist.c b/src/svm/persist.c
new file mode 100644
index 00000000..023c596b
--- /dev/null
+++ b/src/svm/persist.c
@@ -0,0 +1,258 @@
+/*
+ *------------------------------------------------------------------
+ * persist.c - persistent data structure storage test / demo code
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/serialize.h>
+#include <svmdb.h>
+
+typedef struct
+{
+ svmdb_client_t *c;
+} persist_main_t;
+
+persist_main_t persist_main;
+
+typedef struct
+{
+ u8 *string1;
+ u8 *string2;
+} demo_struct2_t;
+
+typedef struct
+{
+ demo_struct2_t *demo2;
+ u8 *name;
+} demo_struct1_t;
+
+/*
+ * Data structures in persistent shared memory, all the time
+ */
+clib_error_t *
+persist_malloc (persist_main_t * pm)
+{
+ demo_struct2_t *demo2;
+ demo_struct1_t *demo1;
+ time_t starttime = time (0);
+ char *datestring = ctime (&starttime);
+ void *oldheap;
+
+ /* Get back the root pointer */
+ demo1 = svmdb_local_get_variable_reference
+ (pm->c, SVMDB_NAMESPACE_VEC, "demo1_location");
+
+ /* It doesnt exist create our data structures */
+ if (demo1 == 0)
+ {
+ /* If you want MP / thread safety, lock the region... */
+ pthread_mutex_lock (&pm->c->db_rp->mutex);
+
+ /* Switch to the shared memory region heap */
+ oldheap = svm_push_data_heap (pm->c->db_rp);
+
+ /* Allocate the top-level structure as a single element vector */
+ vec_validate (demo1, 0);
+
+ /* Allocate the next-level structure as a plain old memory obj */
+ demo2 = clib_mem_alloc (sizeof (*demo2));
+
+ demo1->demo2 = demo2;
+ demo1->name = format (0, "My name is Ishmael%c", 0);
+ demo2->string1 = format (0, "Here is string1%c", 0);
+ demo2->string2 = format (0, "Born at %s%c", datestring, 0);
+
+ /* Back to the process-private heap */
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&pm->c->db_rp->mutex);
+
+ /*
+ * Set the root pointer. Note: this guy switches heaps, locks, etc.
+ * We allocated demo1 as a vector to make this "just work..."
+ */
+ svmdb_local_set_vec_variable (pm->c, "demo1_location",
+ demo1, sizeof (demo1));
+
+ }
+ else
+ {
+ /* retrieve and print data from shared memory */
+ demo2 = demo1->demo2;
+ fformat (stdout, "name: %s\n", demo1->name);
+ fformat (stdout, "demo2 location: %llx\n", demo2);
+ fformat (stdout, "string1: %s\n", demo2->string1);
+ fformat (stdout, "string2: %s\n", demo2->string2);
+ }
+ return 0;
+}
+
+void
+unserialize_demo1 (serialize_main_t * sm, va_list * args)
+{
+ demo_struct1_t **result = va_arg (*args, demo_struct1_t **);
+ demo_struct1_t *demo1;
+ demo_struct2_t *demo2;
+
+ /* Allocate data structures in process private memory */
+ demo1 = clib_mem_alloc (sizeof (*demo1));
+ demo2 = clib_mem_alloc (sizeof (*demo2));
+ demo1->demo2 = demo2;
+
+ /* retrieve data from shared memory checkpoint */
+ unserialize_cstring (sm, (char **) &demo1->name);
+ unserialize_cstring (sm, (char **) &demo2->string1);
+ unserialize_cstring (sm, (char **) &demo2->string2);
+ *result = demo1;
+}
+
+void
+serialize_demo1 (serialize_main_t * sm, va_list * args)
+{
+ demo_struct1_t *demo1 = va_arg (*args, demo_struct1_t *);
+ demo_struct2_t *demo2 = demo1->demo2;
+
+ serialize_cstring (sm, (char *) demo1->name);
+ serialize_cstring (sm, (char *) demo2->string1);
+ serialize_cstring (sm, (char *) demo2->string2);
+}
+
+/* Serialize / unserialize variant */
+clib_error_t *
+persist_serialize (persist_main_t * pm)
+{
+ u8 *checkpoint;
+ serialize_main_t sm;
+
+ demo_struct2_t *demo2;
+ demo_struct1_t *demo1;
+ time_t starttime = time (0);
+ char *datestring = ctime (&starttime);
+
+ /* Get back the root pointer */
+ checkpoint = svmdb_local_get_vec_variable (pm->c, "demo1_checkpoint",
+ sizeof (u8));
+
+ /* It doesnt exist create our data structures */
+ if (checkpoint == 0)
+ {
+ /* Allocate data structures in process-private memory */
+ demo1 = clib_mem_alloc (sizeof (*demo2));
+ vec_validate (demo1, 0);
+ demo2 = clib_mem_alloc (sizeof (*demo2));
+
+ demo1->demo2 = demo2;
+ demo1->name = format (0, "My name is Ishmael%c", 0);
+ demo2->string1 = format (0, "Here is string1%c", 0);
+ demo2->string2 = format (0, "Born at %s%c", datestring, 0);
+
+ /* Create checkpoint */
+ serialize_open_vector (&sm, checkpoint);
+ serialize (&sm, serialize_demo1, demo1);
+ checkpoint = serialize_close_vector (&sm);
+
+ /* Copy checkpoint into shared memory */
+ svmdb_local_set_vec_variable (pm->c, "demo1_checkpoint",
+ checkpoint, sizeof (u8));
+ /* Toss the process-private-memory original.. */
+ vec_free (checkpoint);
+ }
+ else
+ {
+ /* Open the checkpoint */
+ unserialize_open_data (&sm, checkpoint, vec_len (checkpoint));
+ unserialize (&sm, unserialize_demo1, &demo1);
+
+ /* Toss the process-private-memory checkpoint copy */
+ vec_free (checkpoint);
+
+ /* Off we go... */
+ demo2 = demo1->demo2;
+ fformat (stdout, "name: %s\n", demo1->name);
+ fformat (stdout, "demo2 location: %llx\n", demo2);
+ fformat (stdout, "string1: %s\n", demo2->string1);
+ fformat (stdout, "string2: %s\n", demo2->string2);
+ }
+ return 0;
+}
+
+
+int
+main (int argc, char **argv)
+{
+ unformat_input_t _input, *input = &_input;
+ persist_main_t *pm = &persist_main;
+ clib_error_t *error = 0;
+
+ /* Make a 4mb database arena, chroot so it's truly private */
+ pm->c = svmdb_map_chroot_size ("/ptest", 4 << 20);
+
+ ASSERT (pm->c);
+
+ unformat_init_command_line (input, argv);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "malloc"))
+ error = persist_malloc (pm);
+ else if (unformat (input, "serialize"))
+ error = persist_serialize (pm);
+ else
+ {
+ error = clib_error_return (0, "Unknown flavor '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ svmdb_unmap (pm->c);
+
+ if (error)
+ {
+ clib_error_report (error);
+ exit (1);
+ }
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c
new file mode 100644
index 00000000..c04982de
--- /dev/null
+++ b/src/svm/ssvm.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ssvm.h"
+#include "svm_common.h"
+
+int
+ssvm_master_init (ssvm_private_t * ssvm, u32 master_index)
+{
+ svm_main_region_t *smr = svm_get_root_rp ()->data_base;
+ int ssvm_fd;
+ u8 *ssvm_filename;
+ u8 junk = 0;
+ int flags;
+ ssvm_shared_header_t *sh;
+ u64 ticks = clib_cpu_time_now ();
+ u64 randomize_baseva;
+ void *oldheap;
+
+ if (ssvm->ssvm_size == 0)
+ return SSVM_API_ERROR_NO_SIZE;
+
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] creating segment '%s'", getpid (), ssvm->name);
+
+ ASSERT (vec_c_string_is_terminated (ssvm->name));
+ ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0);
+
+ unlink ((char *) ssvm_filename);
+
+ vec_free (ssvm_filename);
+
+ ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR | O_CREAT | O_EXCL, 0777);
+
+ if (ssvm_fd < 0)
+ {
+ clib_unix_warning ("create segment '%s'", ssvm->name);
+ return SSVM_API_ERROR_CREATE_FAILURE;
+ }
+
+ if (fchmod (ssvm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
+ clib_unix_warning ("ssvm segment chmod");
+ if (fchown (ssvm_fd, smr->uid, smr->gid) < 0)
+ clib_unix_warning ("ssvm segment chown");
+
+ if (lseek (ssvm_fd, ssvm->ssvm_size, SEEK_SET) < 0)
+ {
+ clib_unix_warning ("lseek");
+ close (ssvm_fd);
+ return SSVM_API_ERROR_SET_SIZE;
+ }
+
+ if (write (ssvm_fd, &junk, 1) != 1)
+ {
+ clib_unix_warning ("set ssvm size");
+ close (ssvm_fd);
+ return SSVM_API_ERROR_SET_SIZE;
+ }
+
+ flags = MAP_SHARED;
+ if (ssvm->requested_va)
+ flags |= MAP_FIXED;
+
+ randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
+
+ if (ssvm->requested_va)
+ ssvm->requested_va += randomize_baseva;
+
+ sh = ssvm->sh =
+ (ssvm_shared_header_t *) mmap ((void *) ssvm->requested_va,
+ ssvm->ssvm_size, PROT_READ | PROT_WRITE,
+ flags, ssvm_fd, 0);
+
+ if (ssvm->sh == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (ssvm_fd);
+ return SSVM_API_ERROR_MMAP;
+ }
+
+ close (ssvm_fd);
+
+ ssvm->my_pid = getpid ();
+ sh->master_pid = ssvm->my_pid;
+ sh->ssvm_size = ssvm->ssvm_size;
+ sh->heap = mheap_alloc_with_flags
+ (((u8 *) sh) + MMAP_PAGESIZE, ssvm->ssvm_size - MMAP_PAGESIZE,
+ MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE);
+
+ sh->ssvm_va = pointer_to_uword (sh);
+ sh->master_index = master_index;
+
+ oldheap = ssvm_push_heap (sh);
+ sh->name = format (0, "%s%c", ssvm->name, 0);
+ ssvm_pop_heap (oldheap);
+
+ ssvm->i_am_master = 1;
+
+ /* The application has to set set sh->ready... */
+ return 0;
+}
+
+int
+ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds)
+{
+ struct stat stat;
+ int ssvm_fd = -1;
+ ssvm_shared_header_t *sh;
+
+ ASSERT (vec_c_string_is_terminated (ssvm->name));
+ ssvm->i_am_master = 0;
+
+ while (timeout_in_seconds-- > 0)
+ {
+ if (ssvm_fd < 0)
+ ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR, 0777);
+ if (ssvm_fd < 0)
+ {
+ sleep (1);
+ continue;
+ }
+ if (fstat (ssvm_fd, &stat) < 0)
+ {
+ sleep (1);
+ continue;
+ }
+
+ if (stat.st_size > 0)
+ goto map_it;
+ }
+ clib_warning ("slave timeout");
+ return SSVM_API_ERROR_SLAVE_TIMEOUT;
+
+map_it:
+ sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ ssvm_fd, 0);
+ if (sh == MAP_FAILED)
+ {
+ clib_unix_warning ("slave research mmap");
+ close (ssvm_fd);
+ return SSVM_API_ERROR_MMAP;
+ }
+
+ while (timeout_in_seconds-- > 0)
+ {
+ if (sh->ready)
+ goto re_map_it;
+ }
+ close (ssvm_fd);
+ munmap (sh, MMAP_PAGESIZE);
+ clib_warning ("slave timeout 2");
+ return SSVM_API_ERROR_SLAVE_TIMEOUT;
+
+re_map_it:
+ ssvm->requested_va = (u64) sh->ssvm_va;
+ ssvm->ssvm_size = sh->ssvm_size;
+ munmap (sh, MMAP_PAGESIZE);
+
+ sh = ssvm->sh = (void *) mmap ((void *) ssvm->requested_va, ssvm->ssvm_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, ssvm_fd, 0);
+
+ if (sh == MAP_FAILED)
+ {
+ clib_unix_warning ("slave final mmap");
+ close (ssvm_fd);
+ return SSVM_API_ERROR_MMAP;
+ }
+ sh->slave_pid = getpid ();
+ return 0;
+}
+
+void
+ssvm_delete (ssvm_private_t * ssvm)
+{
+ u8 *fn;
+
+ fn = format (0, "/dev/shm/%s%c", ssvm->name, 0);
+
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] unlinking ssvm (%s) backing file '%s'", getpid (),
+ ssvm->name, fn);
+
+ /* Throw away the backing file */
+ if (unlink ((char *) fn) < 0)
+ clib_unix_warning ("unlink segment '%s'", ssvm->name);
+
+ vec_free (fn);
+ vec_free (ssvm->name);
+
+ munmap ((void *) ssvm->requested_va, ssvm->ssvm_size);
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h
new file mode 100644
index 00000000..8466e155
--- /dev/null
+++ b/src/svm/ssvm.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_h__
+#define __included_ssvm_h__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#ifndef MMAP_PAGESIZE
+#define MMAP_PAGESIZE (clib_mem_get_page_size())
+#endif
+
+#define SSVM_N_OPAQUE 7
+
+typedef struct
+{
+ /* Spin-lock */
+ volatile u32 lock;
+ volatile u32 owner_pid;
+ int recursion_count;
+ u32 tag; /* for debugging */
+
+ /* The allocation arena */
+ void *heap;
+
+ /* Segment must be mapped at this address, or no supper */
+ u64 ssvm_va;
+ /* The actual mmap size */
+ u64 ssvm_size;
+ u32 master_pid;
+ u32 slave_pid;
+ u8 *name;
+ void *opaque[SSVM_N_OPAQUE];
+
+ /* Set when the master application thinks it's time to make the donuts */
+ volatile u32 ready;
+
+ /* Needed to make unique MAC addresses, etc. */
+ u32 master_index;
+} ssvm_shared_header_t;
+
+typedef struct
+{
+ ssvm_shared_header_t *sh;
+ u64 ssvm_size;
+ u32 my_pid;
+ u32 vlib_hw_if_index;
+ u8 *name;
+ uword requested_va;
+ int i_am_master;
+ u32 per_interface_next_index;
+ u32 *rx_queue;
+} ssvm_private_t;
+
+always_inline void
+ssvm_lock (ssvm_shared_header_t * h, u32 my_pid, u32 tag)
+{
+ if (h->owner_pid == my_pid)
+ {
+ h->recursion_count++;
+ return;
+ }
+
+ while (__sync_lock_test_and_set (&h->lock, 1))
+ ;
+
+ h->owner_pid = my_pid;
+ h->recursion_count = 1;
+ h->tag = tag;
+}
+
+always_inline void
+ssvm_lock_non_recursive (ssvm_shared_header_t * h, u32 tag)
+{
+ while (__sync_lock_test_and_set (&h->lock, 1))
+ ;
+
+ h->tag = tag;
+}
+
+always_inline void
+ssvm_unlock (ssvm_shared_header_t * h)
+{
+ if (--h->recursion_count == 0)
+ {
+ h->owner_pid = 0;
+ h->tag = 0;
+ CLIB_MEMORY_BARRIER ();
+ h->lock = 0;
+ }
+}
+
+always_inline void
+ssvm_unlock_non_recursive (ssvm_shared_header_t * h)
+{
+ h->tag = 0;
+ CLIB_MEMORY_BARRIER ();
+ h->lock = 0;
+}
+
+static inline void *
+ssvm_push_heap (ssvm_shared_header_t * sh)
+{
+ u8 *oldheap;
+ oldheap = clib_mem_set_heap (sh->heap);
+ return ((void *) oldheap);
+}
+
+static inline void
+ssvm_pop_heap (void *oldheap)
+{
+ clib_mem_set_heap (oldheap);
+}
+
+#define foreach_ssvm_api_error \
+_(NO_NAME, "No shared segment name", -100) \
+_(NO_SIZE, "Size not set (master)", -101) \
+_(CREATE_FAILURE, "Create failed", -102) \
+_(SET_SIZE, "Set size failed", -103) \
+_(MMAP, "mmap failed", -104) \
+_(SLAVE_TIMEOUT, "Slave map timeout", -105)
+
+typedef enum
+{
+#define _(n,s,c) SSVM_API_ERROR_##n = c,
+ foreach_ssvm_api_error
+#undef _
+} ssvm_api_error_enum_t;
+
+#define SSVM_API_ERROR_NO_NAME (-10)
+
+int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index);
+int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds);
+void ssvm_delete (ssvm_private_t * ssvm);
+
+#endif /* __included_ssvm_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm.c b/src/svm/svm.c
new file mode 100644
index 00000000..f97803cd
--- /dev/null
+++ b/src/svm/svm.c
@@ -0,0 +1,1268 @@
+/*
+ *------------------------------------------------------------------
+ * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
+ * library
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#include "svm.h"
+
+static svm_region_t *root_rp;
+static int root_rp_refcount;
+
+#define MAXLOCK 2
+static pthread_mutex_t *mutexes_held[MAXLOCK];
+static int nheld;
+
+svm_region_t *
+svm_get_root_rp (void)
+{
+ return root_rp;
+}
+
+#define MUTEX_DEBUG
+
+static void
+region_lock (svm_region_t * rp, int tag)
+{
+ pthread_mutex_lock (&rp->mutex);
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = getpid ();
+ rp->mutex_owner_tag = tag;
+#endif
+ ASSERT (nheld < MAXLOCK);
+ /*
+ * Keep score of held mutexes so we can try to exit
+ * cleanly if the world comes to an end at the worst possible
+ * moment
+ */
+ mutexes_held[nheld++] = &rp->mutex;
+}
+
+static void
+region_unlock (svm_region_t * rp)
+{
+ int i, j;
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = 0;
+ rp->mutex_owner_tag = 0;
+#endif
+
+ for (i = nheld - 1; i >= 0; i--)
+ {
+ if (mutexes_held[i] == &rp->mutex)
+ {
+ for (j = i; j < MAXLOCK - 1; j++)
+ mutexes_held[j] = mutexes_held[j + 1];
+ nheld--;
+ goto found;
+ }
+ }
+ ASSERT (0);
+
+found:
+ CLIB_MEMORY_BARRIER ();
+ pthread_mutex_unlock (&rp->mutex);
+}
+
+
+static u8 *
+format_svm_flags (u8 * s, va_list * args)
+{
+ uword f = va_arg (*args, uword);
+
+ if (f & SVM_FLAGS_MHEAP)
+ s = format (s, "MHEAP ");
+ if (f & SVM_FLAGS_FILE)
+ s = format (s, "FILE ");
+ if (f & SVM_FLAGS_NODATA)
+ s = format (s, "NODATA ");
+ if (f & SVM_FLAGS_NEED_DATA_INIT)
+ s = format (s, "INIT ");
+
+ return (s);
+}
+
+static u8 *
+format_svm_size (u8 * s, va_list * args)
+{
+ uword size = va_arg (*args, uword);
+
+ if (size >= (1 << 20))
+ {
+ s = format (s, "(%d mb)", size >> 20);
+ }
+ else if (size >= (1 << 10))
+ {
+ s = format (s, "(%d kb)", size >> 10);
+ }
+ else
+ {
+ s = format (s, "(%d bytes)", size);
+ }
+ return (s);
+}
+
+u8 *
+format_svm_region (u8 * s, va_list * args)
+{
+ svm_region_t *rp = va_arg (*args, svm_region_t *);
+ int verbose = va_arg (*args, int);
+ int i;
+ uword lo, hi;
+
+ s = format (s, "%s: base va 0x%x size 0x%x %U\n",
+ rp->region_name, rp->virtual_base,
+ rp->virtual_size, format_svm_size, rp->virtual_size);
+ s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
+ rp->user_ctx, rp->bitmap_size);
+
+ if (verbose)
+ {
+ s = format (s, " flags: 0x%x %U\n", rp->flags,
+ format_svm_flags, rp->flags);
+ s = format (s,
+ " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
+ rp->region_heap, rp->data_base, rp->data_heap);
+ }
+
+ s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
+
+ for (i = 0; i < vec_len (rp->client_pids); i++)
+ s = format (s, "%d ", rp->client_pids[i]);
+
+ s = format (s, "\n");
+
+ if (verbose)
+ {
+ lo = hi = ~0;
+
+ s = format (s, " VM in use: ");
+
+ for (i = 0; i < rp->bitmap_size; i++)
+ {
+ if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
+ {
+ if (lo == ~0)
+ {
+ hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
+ }
+ else
+ {
+ hi = rp->virtual_base + i * MMAP_PAGESIZE;
+ }
+ }
+ else
+ {
+ if (lo != ~0)
+ {
+ hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
+ s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
+ (hi - lo) >> 10);
+ lo = hi = ~0;
+ }
+ }
+ }
+ s = format (s, " rgn heap stats: %U", format_mheap,
+ rp->region_heap, 0);
+ if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap)
+ {
+ s = format (s, "\n data heap stats: %U", format_mheap,
+ rp->data_heap, 1);
+ }
+ s = format (s, "\n");
+ }
+
+ return (s);
+}
+
+/*
+ * rnd_pagesize
+ * Round to a pagesize multiple, presumably 4k works
+ */
+static u64
+rnd_pagesize (u64 size)
+{
+ u64 rv;
+
+ rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
+ return (rv);
+}
+
+/*
+ * svm_data_region_setup
+ */
+static int
+svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
+{
+ int fd;
+ u8 junk = 0;
+ uword map_size;
+
+ map_size = rp->virtual_size - (MMAP_PAGESIZE +
+ (a->pvt_heap_size ? a->pvt_heap_size :
+ SVM_PVT_MHEAP_SIZE));
+
+ if (a->flags & SVM_FLAGS_FILE)
+ {
+ struct stat statb;
+
+ fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
+
+ if (fd < 0)
+ {
+ clib_unix_warning ("open");
+ return -1;
+ }
+
+ if (fstat (fd, &statb) < 0)
+ {
+ clib_unix_warning ("fstat");
+ close (fd);
+ return -2;
+ }
+
+ if (statb.st_mode & S_IFREG)
+ {
+ if (statb.st_size == 0)
+ {
+ if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
+ {
+ clib_unix_warning ("seek region size");
+ close (fd);
+ return -3;
+ }
+ if (write (fd, &junk, 1) != 1)
+ {
+ clib_unix_warning ("set region size");
+ close (fd);
+ return -3;
+ }
+ }
+ else
+ {
+ map_size = rnd_pagesize (statb.st_size);
+ }
+ }
+ else
+ {
+ map_size = a->backing_mmap_size;
+ }
+
+ ASSERT (map_size <= rp->virtual_size -
+ (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
+
+ if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (fd);
+ return -3;
+ }
+ close (fd);
+ rp->backing_file = (char *) format (0, "%s\0", a->backing_file);
+ rp->flags |= SVM_FLAGS_FILE;
+ }
+
+ if (a->flags & SVM_FLAGS_MHEAP)
+ {
+ rp->data_heap =
+ mheap_alloc_with_flags ((void *) (rp->data_base), map_size,
+ MHEAP_FLAG_DISABLE_VM);
+ rp->flags |= SVM_FLAGS_MHEAP;
+ }
+ return 0;
+}
+
+static int
+svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
+{
+ int fd;
+ u8 junk = 0;
+ uword map_size;
+ struct stat statb;
+
+ map_size = rp->virtual_size -
+ (MMAP_PAGESIZE
+ + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE));
+
+ if (a->flags & SVM_FLAGS_FILE)
+ {
+
+ fd = open (a->backing_file, O_RDWR, 0777);
+
+ if (fd < 0)
+ {
+ clib_unix_warning ("open");
+ return -1;
+ }
+
+ if (fstat (fd, &statb) < 0)
+ {
+ clib_unix_warning ("fstat");
+ close (fd);
+ return -2;
+ }
+
+ if (statb.st_mode & S_IFREG)
+ {
+ if (statb.st_size == 0)
+ {
+ if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
+ {
+ clib_unix_warning ("seek region size");
+ close (fd);
+ return -3;
+ }
+ if (write (fd, &junk, 1) != 1)
+ {
+ clib_unix_warning ("set region size");
+ close (fd);
+ return -3;
+ }
+ }
+ else
+ {
+ map_size = rnd_pagesize (statb.st_size);
+ }
+ }
+ else
+ {
+ map_size = a->backing_mmap_size;
+ }
+
+ ASSERT (map_size <= rp->virtual_size
+ - (MMAP_PAGESIZE
+ +
+ (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)));
+
+ if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (fd);
+ return -3;
+ }
+ close (fd);
+ }
+ return 0;
+}
+
+u8 *
+shm_name_from_svm_map_region_args (svm_map_region_args_t * a)
+{
+ u8 *path;
+ u8 *shm_name;
+ u8 *split_point;
+ u8 *mkdir_arg = 0;
+ int root_path_offset = 0;
+ int name_offset = 0;
+
+ if (a->root_path)
+ {
+ /* Tolerate present or absent slashes */
+ if (a->root_path[0] == '/')
+ root_path_offset++;
+
+ /* create the root_path under /dev/shm
+ iterate through path creating directories */
+
+ path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
+ split_point = path + 1;
+ vec_add1 (mkdir_arg, '-');
+
+ while (*split_point)
+ {
+ while (*split_point && *split_point != '/')
+ {
+ vec_add1 (mkdir_arg, *split_point);
+ split_point++;
+ }
+ vec_add1 (mkdir_arg, 0);
+
+ /* ready to descend another level */
+ mkdir_arg[vec_len (mkdir_arg) - 1] = '-';
+ split_point++;
+ }
+ vec_free (mkdir_arg);
+ vec_free (path);
+
+ if (a->name[0] == '/')
+ name_offset = 1;
+
+ shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
+ &a->name[name_offset], 0);
+ }
+ else
+ shm_name = format (0, "%s%c", a->name, 0);
+ return (shm_name);
+}
+
+/*
+ * svm_map_region
+ */
+void *
+svm_map_region (svm_map_region_args_t * a)
+{
+ int svm_fd;
+ svm_region_t *rp;
+ pthread_mutexattr_t attr;
+ pthread_condattr_t cattr;
+ int deadman = 0;
+ u8 junk = 0;
+ void *oldheap;
+ int overhead_space;
+ int rv;
+ uword data_base;
+ int nbits, words, bit;
+ int pid_holding_region_lock;
+ u8 *shm_name;
+ int dead_region_recovery = 0;
+ int time_left;
+ struct stat stat;
+ struct timespec ts, tsrem;
+
+ ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
+ ASSERT (a->name);
+
+ shm_name = shm_name_from_svm_map_region_args (a);
+
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] map region %s: shm_open (%s)",
+ getpid (), a->name, shm_name);
+
+ svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
+
+ if (svm_fd >= 0)
+ {
+ if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
+ clib_unix_warning ("segment chmod");
+ /* This turns out to fail harmlessly if the client starts first */
+ if (fchown (svm_fd, a->uid, a->gid) < 0)
+ clib_unix_warning ("segment chown [ok if client starts first]");
+
+ vec_free (shm_name);
+
+ if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
+ {
+ clib_warning ("seek region size");
+ close (svm_fd);
+ return (0);
+ }
+ if (write (svm_fd, &junk, 1) != 1)
+ {
+ clib_warning ("set region size");
+ close (svm_fd);
+ return (0);
+ }
+
+ rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
+
+ if (rp == (svm_region_t *) MAP_FAILED)
+ {
+ clib_unix_warning ("mmap create");
+ close (svm_fd);
+ return (0);
+ }
+ close (svm_fd);
+ memset (rp, 0, sizeof (*rp));
+
+ if (pthread_mutexattr_init (&attr))
+ clib_unix_warning ("mutexattr_init");
+
+ if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
+ clib_unix_warning ("mutexattr_setpshared");
+
+ if (pthread_mutex_init (&rp->mutex, &attr))
+ clib_unix_warning ("mutex_init");
+
+ if (pthread_mutexattr_destroy (&attr))
+ clib_unix_warning ("mutexattr_destroy");
+
+ if (pthread_condattr_init (&cattr))
+ clib_unix_warning ("condattr_init");
+
+ if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
+ clib_unix_warning ("condattr_setpshared");
+
+ if (pthread_cond_init (&rp->condvar, &cattr))
+ clib_unix_warning ("cond_init");
+
+ if (pthread_condattr_destroy (&cattr))
+ clib_unix_warning ("condattr_destroy");
+
+ region_lock (rp, 1);
+
+ rp->virtual_base = a->baseva;
+ rp->virtual_size = a->size;
+
+ rp->region_heap =
+ mheap_alloc_with_flags (uword_to_pointer
+ (a->baseva + MMAP_PAGESIZE, void *),
+ (a->pvt_heap_size !=
+ 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE,
+ MHEAP_FLAG_DISABLE_VM);
+ oldheap = svm_push_pvt_heap (rp);
+
+ rp->region_name = (char *) format (0, "%s%c", a->name, 0);
+ vec_add1 (rp->client_pids, getpid ());
+
+ nbits = rp->virtual_size / MMAP_PAGESIZE;
+
+ ASSERT (nbits > 0);
+ rp->bitmap_size = nbits;
+ words = (nbits + BITS (uword) - 1) / BITS (uword);
+ vec_validate (rp->bitmap, words - 1);
+
+ overhead_space = MMAP_PAGESIZE /* header */ +
+ ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
+
+ bit = 0;
+ data_base = (uword) rp->virtual_base;
+
+ if (a->flags & SVM_FLAGS_NODATA)
+ rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
+
+ do
+ {
+ clib_bitmap_set_no_check (rp->bitmap, bit, 1);
+ bit++;
+ overhead_space -= MMAP_PAGESIZE;
+ data_base += MMAP_PAGESIZE;
+ }
+ while (overhead_space > 0);
+
+ rp->data_base = (void *) data_base;
+
+ /*
+ * Note: although the POSIX spec guarantees that only one
+ * process enters this block, we have to play games
+ * to hold off clients until e.g. the mutex is ready
+ */
+ rp->version = SVM_VERSION;
+
+ /* setup the data portion of the region */
+
+ rv = svm_data_region_create (a, rp);
+ if (rv)
+ {
+ clib_warning ("data_region_create: %d", rv);
+ }
+
+ region_unlock (rp);
+
+ svm_pop_heap (oldheap);
+
+ return ((void *) rp);
+ }
+ else
+ {
+ svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
+
+ vec_free (shm_name);
+
+ if (svm_fd < 0)
+ {
+ perror ("svm_region_map(mmap open)");
+ return (0);
+ }
+
+ time_left = 20;
+ while (1)
+ {
+ if (0 != fstat (svm_fd, &stat))
+ {
+ clib_warning ("fstat failed: %d", errno);
+ close (svm_fd);
+ return (0);
+ }
+ if (stat.st_size > 0)
+ {
+ break;
+ }
+ if (0 == time_left)
+ {
+ clib_warning ("waiting for resize of shm file timed out");
+ close (svm_fd);
+ return (0);
+ }
+ ts.tv_sec = 0;
+ ts.tv_nsec = 100000000;
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ time_left--;
+ }
+
+ rp = mmap (0, MMAP_PAGESIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
+
+ if (rp == (svm_region_t *) MAP_FAILED)
+ {
+ close (svm_fd);
+ clib_warning ("mmap");
+ return (0);
+ }
+ /*
+ * We lost the footrace to create this region; make sure
+ * the winner has crossed the finish line.
+ */
+ while (rp->version == 0 && deadman++ < 5)
+ {
+ sleep (1);
+ }
+
+ /*
+ * <bleep>-ed?
+ */
+ if (rp->version == 0)
+ {
+ clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
+ close (svm_fd);
+ munmap (rp, a->size);
+ return (0);
+ }
+ /* Remap now that the region has been placed */
+ a->baseva = rp->virtual_base;
+ a->size = rp->virtual_size;
+ munmap (rp, MMAP_PAGESIZE);
+
+ rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, svm_fd, 0);
+ if ((uword) rp == (uword) MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (svm_fd);
+ return (0);
+ }
+
+ if ((uword) rp != rp->virtual_base)
+ {
+ clib_warning ("mmap botch");
+ }
+
+ /*
+ * Try to fix the region mutex if it is held by
+ * a dead process
+ */
+ pid_holding_region_lock = rp->mutex_owner_pid;
+ if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
+ {
+ clib_warning
+ ("region %s mutex held by dead pid %d, tag %d, force unlock",
+ rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
+ /* owner pid is nonexistent */
+ rp->mutex.__data.__owner = 0;
+ rp->mutex.__data.__lock = 0;
+ dead_region_recovery = 1;
+ }
+
+ if (dead_region_recovery)
+ clib_warning ("recovery: attempt to re-lock region");
+
+ region_lock (rp, 2);
+ oldheap = svm_push_pvt_heap (rp);
+ vec_add1 (rp->client_pids, getpid ());
+
+ if (dead_region_recovery)
+ clib_warning ("recovery: attempt svm_data_region_map");
+
+ rv = svm_data_region_map (a, rp);
+ if (rv)
+ {
+ clib_warning ("data_region_map: %d", rv);
+ }
+
+ if (dead_region_recovery)
+ clib_warning ("unlock and continue");
+
+ region_unlock (rp);
+
+ svm_pop_heap (oldheap);
+
+ return ((void *) rp);
+
+ }
+ return 0; /* NOTREACHED */
+}
+
+static void
+svm_mutex_cleanup (void)
+{
+ int i;
+ for (i = 0; i < nheld; i++)
+ {
+ pthread_mutex_unlock (mutexes_held[i]);
+ }
+}
+
+static int
+svm_region_init_internal (svm_map_region_args_t * a)
+{
+ svm_region_t *rp;
+ u64 ticks = clib_cpu_time_now ();
+ uword randomize_baseva;
+
+ /* guard against klutz calls */
+ if (root_rp)
+ return -1;
+
+ root_rp_refcount++;
+
+ atexit (svm_mutex_cleanup);
+
+ /* Randomize the shared-VM base at init time */
+ if (MMAP_PAGESIZE <= (4 << 10))
+ randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
+ else
+ randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
+
+ a->baseva += randomize_baseva;
+
+ rp = svm_map_region (a);
+ if (!rp)
+ return -1;
+
+ region_lock (rp, 3);
+
+ /* Set up the main region data structures */
+ if (rp->flags & SVM_FLAGS_NEED_DATA_INIT)
+ {
+ svm_main_region_t *mp = 0;
+ void *oldheap;
+
+ rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
+
+ oldheap = svm_push_pvt_heap (rp);
+ vec_validate (mp, 0);
+ mp->name_hash = hash_create_string (0, sizeof (uword));
+ mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
+ mp->uid = a->uid;
+ mp->gid = a->gid;
+ rp->data_base = mp;
+ svm_pop_heap (oldheap);
+ }
+ region_unlock (rp);
+ root_rp = rp;
+
+ return 0;
+}
+
+void
+svm_region_init (void)
+{
+ svm_map_region_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ a->root_path = 0;
+ a->name = SVM_GLOBAL_REGION_NAME;
+ a->baseva = SVM_GLOBAL_REGION_BASEVA;
+ a->size = SVM_GLOBAL_REGION_SIZE;
+ a->flags = SVM_FLAGS_NODATA;
+ a->uid = 0;
+ a->gid = 0;
+
+ svm_region_init_internal (a);
+}
+
+int
+svm_region_init_chroot (const char *root_path)
+{
+ svm_map_region_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ a->root_path = root_path;
+ a->name = SVM_GLOBAL_REGION_NAME;
+ a->baseva = SVM_GLOBAL_REGION_BASEVA;
+ a->size = SVM_GLOBAL_REGION_SIZE;
+ a->flags = SVM_FLAGS_NODATA;
+ a->uid = 0;
+ a->gid = 0;
+
+ return svm_region_init_internal (a);
+}
+
+void
+svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
+{
+ svm_map_region_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ a->root_path = root_path;
+ a->name = SVM_GLOBAL_REGION_NAME;
+ a->baseva = SVM_GLOBAL_REGION_BASEVA;
+ a->size = SVM_GLOBAL_REGION_SIZE;
+ a->flags = SVM_FLAGS_NODATA;
+ a->uid = uid;
+ a->gid = gid;
+
+ svm_region_init_internal (a);
+}
+
+void
+svm_region_init_args (svm_map_region_args_t * a)
+{
+ svm_region_init_internal (a);
+}
+
+void *
+svm_region_find_or_create (svm_map_region_args_t * a)
+{
+ svm_main_region_t *mp;
+ svm_region_t *rp;
+ uword need_nbits;
+ int index, i;
+ void *oldheap;
+ uword *p;
+ u8 *name;
+ svm_subregion_t *subp;
+
+ ASSERT (root_rp);
+
+ a->size += MMAP_PAGESIZE +
+ ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
+ a->size = rnd_pagesize (a->size);
+
+ region_lock (root_rp, 4);
+ oldheap = svm_push_pvt_heap (root_rp);
+ mp = root_rp->data_base;
+
+ ASSERT (mp);
+
+ /* Map the named region from the correct chroot environment */
+ if (a->root_path == NULL)
+ a->root_path = (char *) mp->root_path;
+
+ /*
+ * See if this region is already known. If it is, we're
+ * almost done...
+ */
+ p = hash_get_mem (mp->name_hash, a->name);
+
+ if (p)
+ {
+ rp = svm_map_region (a);
+ region_unlock (root_rp);
+ svm_pop_heap (oldheap);
+ return rp;
+ }
+
+ /* Create the region. */
+ ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
+
+ need_nbits = a->size / MMAP_PAGESIZE;
+
+ index = 1; /* $$$ fixme, figure out how many bit to really skip */
+
+ /*
+ * Scan the virtual space allocation bitmap, looking for a large
+ * enough chunk
+ */
+ do
+ {
+ if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
+ {
+ for (i = 0; i < (need_nbits - 1); i++)
+ {
+ if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
+ {
+ index = index + i;
+ goto next;
+ }
+ }
+ break;
+ }
+ index++;
+ next:;
+ }
+ while (index < root_rp->bitmap_size);
+
+ /* Completely out of VM? */
+ if (index >= root_rp->bitmap_size)
+ {
+ clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
+ root_rp->region_name, a->size, a->size);
+ svm_pop_heap (oldheap);
+ region_unlock (root_rp);
+ return 0;
+ }
+
+ /*
+ * Mark virtual space allocated
+ */
+#if CLIB_DEBUG > 1
+ clib_warning ("set %d bits at index %d", need_nbits, index);
+#endif
+
+ for (i = 0; i < need_nbits; i++)
+ {
+ clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
+ }
+
+ /* Place this region where it goes... */
+ a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
+
+ rp = svm_map_region (a);
+
+ pool_get (mp->subregions, subp);
+ name = format (0, "%s%c", a->name, 0);
+ subp->subregion_name = name;
+
+ hash_set_mem (mp->name_hash, name, subp - mp->subregions);
+
+ svm_pop_heap (oldheap);
+
+ region_unlock (root_rp);
+
+ return (rp);
+}
+
+void
+svm_region_unlink (svm_region_t * rp)
+{
+ svm_map_region_args_t _a, *a = &_a;
+ svm_main_region_t *mp;
+ u8 *shm_name;
+
+ ASSERT (root_rp);
+ ASSERT (rp);
+ ASSERT (vec_c_string_is_terminated (rp->region_name));
+
+ mp = root_rp->data_base;
+ ASSERT (mp);
+
+ a->root_path = (char *) mp->root_path;
+ a->name = rp->region_name;
+ shm_name = shm_name_from_svm_map_region_args (a);
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
+ shm_unlink ((const char *) shm_name);
+ vec_free (shm_name);
+}
+
+/*
+ * svm_region_unmap
+ *
+ * Let go of the indicated region. If the calling process
+ * is the last customer, throw it away completely.
+ * The root region mutex guarantees atomicity with respect to
+ * a new region client showing up at the wrong moment.
+ */
+void
+svm_region_unmap (void *rp_arg)
+{
+ int i, mypid = getpid ();
+ int nclients_left;
+ void *oldheap;
+ uword virtual_base, virtual_size;
+ svm_region_t *rp = rp_arg;
+ char *name;
+
+ /*
+ * If we take a signal while holding one or more shared-memory
+ * mutexes, we may end up back here from an otherwise
+ * benign exit handler. Bail out to avoid a recursive
+ * mutex screw-up.
+ */
+ if (nheld)
+ return;
+
+ ASSERT (rp);
+ ASSERT (root_rp);
+
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
+
+ region_lock (root_rp, 5);
+ region_lock (rp, 6);
+
+ oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
+
+ /* Remove the caller from the list of mappers */
+ for (i = 0; i < vec_len (rp->client_pids); i++)
+ {
+ if (rp->client_pids[i] == mypid)
+ {
+ vec_delete (rp->client_pids, 1, i);
+ goto found;
+ }
+ }
+ clib_warning ("pid %d AWOL", mypid);
+
+found:
+
+ svm_pop_heap (oldheap);
+
+ nclients_left = vec_len (rp->client_pids);
+ virtual_base = rp->virtual_base;
+ virtual_size = rp->virtual_size;
+
+ if (nclients_left == 0)
+ {
+ int index, nbits, i;
+ svm_main_region_t *mp;
+ uword *p;
+ svm_subregion_t *subp;
+
+ /* Kill the region, last guy on his way out */
+
+ oldheap = svm_push_pvt_heap (root_rp);
+ name = vec_dup (rp->region_name);
+
+ virtual_base = rp->virtual_base;
+ virtual_size = rp->virtual_size;
+
+ /* Figure out which bits to clear in the root region bitmap */
+ index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
+
+ nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
+
+#if CLIB_DEBUG > 1
+ clib_warning ("clear %d bits at index %d", nbits, index);
+#endif
+ /* Give back the allocated VM */
+ for (i = 0; i < nbits; i++)
+ {
+ clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
+ }
+
+ mp = root_rp->data_base;
+
+ p = hash_get_mem (mp->name_hash, name);
+
+ /* Better never happen ... */
+ if (p == NULL)
+ {
+ region_unlock (rp);
+ region_unlock (root_rp);
+ svm_pop_heap (oldheap);
+ clib_warning ("Region name '%s' not found?", name);
+ return;
+ }
+
+ /* Remove from the root region subregion pool */
+ subp = mp->subregions + p[0];
+ pool_put (mp->subregions, subp);
+
+ hash_unset_mem (mp->name_hash, name);
+
+ vec_free (name);
+
+ region_unlock (rp);
+ svm_region_unlink (rp);
+ munmap ((void *) virtual_base, virtual_size);
+ region_unlock (root_rp);
+ svm_pop_heap (oldheap);
+ return;
+ }
+
+ region_unlock (rp);
+ region_unlock (root_rp);
+
+ munmap ((void *) virtual_base, virtual_size);
+}
+
+/*
+ * svm_region_exit
+ */
+void
+svm_region_exit ()
+{
+ void *oldheap;
+ int i, mypid = getpid ();
+ uword virtual_base, virtual_size;
+
+ /* It felt so nice we did it twice... */
+ if (root_rp == 0)
+ return;
+
+ if (--root_rp_refcount > 0)
+ return;
+
+ /*
+ * If we take a signal while holding one or more shared-memory
+ * mutexes, we may end up back here from an otherwise
+ * benign exit handler. Bail out to avoid a recursive
+ * mutex screw-up.
+ */
+ if (nheld)
+ return;
+
+ region_lock (root_rp, 7);
+ oldheap = svm_push_pvt_heap (root_rp);
+
+ virtual_base = root_rp->virtual_base;
+ virtual_size = root_rp->virtual_size;
+
+ for (i = 0; i < vec_len (root_rp->client_pids); i++)
+ {
+ if (root_rp->client_pids[i] == mypid)
+ {
+ vec_delete (root_rp->client_pids, 1, i);
+ goto found;
+ }
+ }
+ clib_warning ("pid %d AWOL", mypid);
+
+found:
+
+ if (vec_len (root_rp->client_pids) == 0)
+ svm_region_unlink (root_rp);
+
+ region_unlock (root_rp);
+ svm_pop_heap (oldheap);
+
+ root_rp = 0;
+ munmap ((void *) virtual_base, virtual_size);
+}
+
+void
+svm_client_scan_this_region_nolock (svm_region_t * rp)
+{
+ int j;
+ int mypid = getpid ();
+ void *oldheap;
+
+ for (j = 0; j < vec_len (rp->client_pids); j++)
+ {
+ if (mypid == rp->client_pids[j])
+ continue;
+ if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
+ {
+ clib_warning ("%s: cleanup ghost pid %d",
+ rp->region_name, rp->client_pids[j]);
+ /* nb: client vec in rp->region_heap */
+ oldheap = svm_push_pvt_heap (rp);
+ vec_delete (rp->client_pids, 1, j);
+ j--;
+ svm_pop_heap (oldheap);
+ }
+ }
+}
+
+
+/*
+ * Scan svm regions for dead clients
+ */
+void
+svm_client_scan (const char *root_path)
+{
+ int i, j;
+ svm_main_region_t *mp;
+ svm_map_region_args_t *a = 0;
+ svm_region_t *root_rp;
+ svm_region_t *rp;
+ svm_subregion_t *subp;
+ u8 *name = 0;
+ u8 **svm_names = 0;
+ void *oldheap;
+ int mypid = getpid ();
+
+ vec_validate (a, 0);
+
+ svm_region_init_chroot (root_path);
+
+ root_rp = svm_get_root_rp ();
+
+ pthread_mutex_lock (&root_rp->mutex);
+
+ mp = root_rp->data_base;
+
+ for (j = 0; j < vec_len (root_rp->client_pids); j++)
+ {
+ if (mypid == root_rp->client_pids[j])
+ continue;
+ if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
+ {
+ clib_warning ("%s: cleanup ghost pid %d",
+ root_rp->region_name, root_rp->client_pids[j]);
+ /* nb: client vec in root_rp->region_heap */
+ oldheap = svm_push_pvt_heap (root_rp);
+ vec_delete (root_rp->client_pids, 1, j);
+ j--;
+ svm_pop_heap (oldheap);
+ }
+ }
+
+ /*
+ * Snapshoot names, can't hold root rp mutex across
+ * find_or_create.
+ */
+ /* *INDENT-OFF* */
+ pool_foreach (subp, mp->subregions, ({
+ name = vec_dup (subp->subregion_name);
+ vec_add1(svm_names, name);
+ }));
+ /* *INDENT-ON* */
+
+ pthread_mutex_unlock (&root_rp->mutex);
+
+ for (i = 0; i < vec_len (svm_names); i++)
+ {
+ vec_validate (a, 0);
+ a->root_path = root_path;
+ a->name = (char *) svm_names[i];
+ rp = svm_region_find_or_create (a);
+ if (rp)
+ {
+ pthread_mutex_lock (&rp->mutex);
+
+ svm_client_scan_this_region_nolock (rp);
+
+ pthread_mutex_unlock (&rp->mutex);
+ svm_region_unmap (rp);
+ vec_free (svm_names[i]);
+ }
+ vec_free (a);
+ }
+ vec_free (svm_names);
+
+ svm_region_exit ();
+
+ vec_free (a);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm.h b/src/svm/svm.h
new file mode 100644
index 00000000..894c3d95
--- /dev/null
+++ b/src/svm/svm.h
@@ -0,0 +1,107 @@
+/*
+ *------------------------------------------------------------------
+ * svm.h - shared VM allocation, mmap(...MAP_FIXED...)
+ * brain police
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_svm_h__
+#define __included_svm_h__
+
+#include <pthread.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <svm/svm_common.h>
+
+#define MMAP_PAGESIZE (clib_mem_get_page_size())
+
+static inline void *
+svm_mem_alloc (svm_region_t * rp, uword size)
+{
+ u8 *oldheap;
+ ASSERT (rp->flags & SVM_FLAGS_MHEAP);
+ u8 *rv;
+
+ pthread_mutex_lock (&rp->mutex);
+ oldheap = clib_mem_set_heap (rp->data_heap);
+ rv = clib_mem_alloc (size);
+ clib_mem_set_heap (oldheap);
+ pthread_mutex_unlock (&rp->mutex);
+ return (rv);
+}
+
+static inline void *
+svm_mem_alloc_aligned_at_offset (svm_region_t * rp,
+ uword size, uword align, uword offset)
+{
+ u8 *oldheap;
+ ASSERT (rp->flags & SVM_FLAGS_MHEAP);
+ u8 *rv;
+
+ pthread_mutex_lock (&rp->mutex);
+ oldheap = clib_mem_set_heap (rp->data_heap);
+ rv = clib_mem_alloc_aligned_at_offset (size, align, offset,
+ 1 /* yes, call os_out_of_memory */ );
+ clib_mem_set_heap (oldheap);
+ pthread_mutex_unlock (&rp->mutex);
+ return (rv);
+}
+
+static inline void
+svm_mem_free (svm_region_t * rp, void *ptr)
+{
+ u8 *oldheap;
+ ASSERT (rp->flags & SVM_FLAGS_MHEAP);
+
+ pthread_mutex_lock (&rp->mutex);
+ oldheap = clib_mem_set_heap (rp->data_heap);
+ clib_mem_free (ptr);
+ clib_mem_set_heap (oldheap);
+ pthread_mutex_unlock (&rp->mutex);
+
+}
+
+static inline void *
+svm_push_pvt_heap (svm_region_t * rp)
+{
+ u8 *oldheap;
+ oldheap = clib_mem_set_heap (rp->region_heap);
+ return ((void *) oldheap);
+}
+
+static inline void *
+svm_push_data_heap (svm_region_t * rp)
+{
+ u8 *oldheap;
+ oldheap = clib_mem_set_heap (rp->data_heap);
+ return ((void *) oldheap);
+}
+
+static inline void
+svm_pop_heap (void *oldheap)
+{
+ clib_mem_set_heap (oldheap);
+}
+
+#endif /* __included_svm_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h
new file mode 100644
index 00000000..ea3ec87a
--- /dev/null
+++ b/src/svm/svm_common.h
@@ -0,0 +1,135 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_svm_common_h__
+#define __included_svm_common_h__
+
+#include <stdarg.h>
+#include <pthread.h>
+#include <vppinfra/types.h>
+
+#define SVM_VERSION ((1<<16) | 1) /* set to declare region ready. */
+
+#define SVM_FLAGS_MHEAP (1<<0) /* region contains an mheap */
+#define SVM_FLAGS_FILE (1<<1) /* region backed by one or more files */
+#define SVM_FLAGS_NODATA (1<<2) /* region will be further subdivided */
+#define SVM_FLAGS_NEED_DATA_INIT (1<<3)
+
+#define SVM_PVT_MHEAP_SIZE (128<<10) /* region's private mheap (128k) */
+
+typedef struct svm_region_
+{
+ volatile uword version;
+ pthread_mutex_t mutex;
+ pthread_cond_t condvar;
+ int mutex_owner_pid; /* in case of trouble */
+ int mutex_owner_tag;
+ uword flags;
+ uword virtual_base; /* base of the region object */
+ uword virtual_size;
+ void *region_heap;
+ void *data_base; /* data portion base address */
+ void *data_heap; /* data heap, if any */
+ volatile void *user_ctx; /* user context pointer */
+ /* stuff allocated in the region's heap */
+ uword bitmap_size; /* nbits in virtual alloc bitmap */
+ uword *bitmap; /* the bitmap */
+ char *region_name;
+ char *backing_file;
+ char **filenames;
+ uword *client_pids;
+ /* pad */
+
+ /* next page:
+ * (64K) clib heap for the region itself
+ *
+ * data_base -> whatever is in this region
+ */
+
+} svm_region_t;
+
+typedef struct svm_map_region_args_
+{
+ const char *root_path; /* NULL means use the truly global arena */
+ const char *name;
+ u64 baseva;
+ u64 size;
+ u64 pvt_heap_size;
+ uword flags;
+ char *backing_file;
+ uword backing_mmap_size;
+ /* uid, gid to own the svm region(s) */
+ int uid;
+ int gid;
+} svm_map_region_args_t;
+
+
+/*
+ * Memory shared across all router instances. Packet buffers, etc
+ * Base should be "out of the way," and size should be big enough to
+ * cover everything we plan to put here.
+ */
+#define SVM_GLOBAL_REGION_BASEVA 0x30000000
+#define SVM_GLOBAL_REGION_SIZE (64<<20)
+#define SVM_GLOBAL_REGION_NAME "/global_vm"
+
+/*
+ * Memory shared across individual router instances.
+ */
+#define SVM_OVERLAY_REGION_BASEVA \
+ (SVM_GLOBAL_REGION_BASEVA + SVM_GLOBAL_REGION_SIZE)
+#define SVM_OVERLAY_REGION_SIZE (1<<20)
+#define SVM_OVERLAY_REGION_BASENAME "/overlay_vm"
+
+typedef struct
+{
+ u8 *subregion_name;
+} svm_subregion_t;
+
+typedef struct
+{
+ svm_subregion_t *subregions; /* subregion pool */
+ uword *name_hash;
+ u8 *root_path;
+ int uid;
+ int gid;
+} svm_main_region_t;
+
+
+void *svm_region_find_or_create (svm_map_region_args_t * a);
+void svm_region_init (void);
+int svm_region_init_chroot (const char *root_path);
+void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid);
+void svm_region_init_args (svm_map_region_args_t * a);
+void svm_region_exit (void);
+void svm_region_unmap (void *rp_arg);
+void svm_client_scan (const char *root_path);
+void svm_client_scan_this_region_nolock (svm_region_t * rp);
+u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a);
+u8 *format_svm_region (u8 * s, va_list * args);
+
+svm_region_t *svm_get_root_rp (void);
+
+#endif /* __included_svm_common_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
new file mode 100644
index 00000000..42eb1ee8
--- /dev/null
+++ b/src/svm/svm_fifo.c
@@ -0,0 +1,838 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <svm/svm_fifo.h>
+#include <vppinfra/cpu.h>
+
+static inline u8
+position_lt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ < ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u8
+position_leq (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ <= ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u8
+position_gt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ > ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u32
+position_diff (svm_fifo_t * f, u32 posa, u32 posb)
+{
+ return ooo_segment_distance_from_tail (f, posa)
+ - ooo_segment_distance_from_tail (f, posb);
+}
+
+static inline u32
+ooo_segment_end_pos (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return (s->start + s->length) % f->nitems;
+}
+
+u8 *
+format_ooo_segment (u8 * s, va_list * args)
+{
+ svm_fifo_t *f = va_arg (*args, svm_fifo_t *);
+ ooo_segment_t *seg = va_arg (*args, ooo_segment_t *);
+ u32 normalized_start = (seg->start + f->nitems - f->tail) % f->nitems;
+ s = format (s, "[%u, %u], len %u, next %d, prev %d", normalized_start,
+ (normalized_start + seg->length) % f->nitems, seg->length,
+ seg->next, seg->prev);
+ return s;
+}
+
+u8 *
+svm_fifo_dump_trace (u8 * s, svm_fifo_t * f)
+{
+#if SVM_FIFO_TRACE
+ svm_fifo_trace_elem_t *seg = 0;
+ int i = 0;
+
+ if (f->trace)
+ {
+ vec_foreach (seg, f->trace)
+ {
+ s = format (s, "{%u, %u, %u}, ", seg->offset, seg->len, seg->action);
+ i++;
+ if (i % 5 == 0)
+ s = format (s, "\n");
+ }
+ s = format (s, "\n");
+ }
+ return s;
+#else
+ return 0;
+#endif
+}
+
+u8 *
+svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose)
+{
+ int i, trace_len;
+ u8 *data = 0;
+ svm_fifo_trace_elem_t *trace;
+ u32 offset;
+ svm_fifo_t *dummy_fifo;
+
+ if (!f)
+ return s;
+
+#if SVM_FIFO_TRACE
+ trace = f->trace;
+ trace_len = vec_len (trace);
+#else
+ trace = 0;
+ trace_len = 0;
+#endif
+
+ dummy_fifo = svm_fifo_create (f->nitems);
+ memset (f->data, 0xFF, f->nitems);
+
+ vec_validate (data, f->nitems);
+ for (i = 0; i < vec_len (data); i++)
+ data[i] = i;
+
+ for (i = 0; i < trace_len; i++)
+ {
+ offset = trace[i].offset;
+ if (trace[i].action == 1)
+ {
+ if (verbose)
+ s = format (s, "adding [%u, %u]:", trace[i].offset,
+ (trace[i].offset +
+ trace[i].len) % dummy_fifo->nitems);
+ svm_fifo_enqueue_with_offset (dummy_fifo, trace[i].offset,
+ trace[i].len, &data[offset]);
+ }
+ else if (trace[i].action == 2)
+ {
+ if (verbose)
+ s = format (s, "adding [%u, %u]:", 0, trace[i].len);
+ svm_fifo_enqueue_nowait (dummy_fifo, trace[i].len, &data[offset]);
+ }
+ else if (!no_read)
+ {
+ if (verbose)
+ s = format (s, "read: %u", trace[i].len);
+ svm_fifo_dequeue_drop (dummy_fifo, trace[i].len);
+ }
+ if (verbose)
+ s = format (s, "%U", format_svm_fifo, dummy_fifo, 1);
+ }
+
+ s = format (s, "result: %U", format_svm_fifo, dummy_fifo, 1);
+
+ return s;
+}
+
+u8 *
+format_ooo_list (u8 * s, va_list * args)
+{
+ svm_fifo_t *f = va_arg (*args, svm_fifo_t *);
+ u32 ooo_segment_index = f->ooos_list_head;
+ ooo_segment_t *seg;
+
+ while (ooo_segment_index != OOO_SEGMENT_INVALID_INDEX)
+ {
+ seg = pool_elt_at_index (f->ooo_segments, ooo_segment_index);
+ s = format (s, " %U\n", format_ooo_segment, f, seg);
+ ooo_segment_index = seg->next;
+ }
+
+ return s;
+}
+
+u8 *
+format_svm_fifo (u8 * s, va_list * args)
+{
+ svm_fifo_t *f = va_arg (*args, svm_fifo_t *);
+ int verbose = va_arg (*args, int);
+
+ s = format (s, "cursize %u nitems %u has_event %d\n",
+ f->cursize, f->nitems, f->has_event);
+ s = format (s, " head %d tail %d\n", f->head, f->tail);
+
+ if (verbose > 1)
+ s = format
+ (s, " server session %d thread %d client session %d thread %d\n",
+ f->master_session_index, f->master_thread_index,
+ f->client_session_index, f->client_thread_index);
+
+ if (verbose)
+ {
+ s = format (s, " ooo pool %d active elts newest %u\n",
+ pool_elts (f->ooo_segments), f->ooos_newest);
+ if (svm_fifo_has_ooo_data (f))
+ s = format (s, " %U", format_ooo_list, f, verbose);
+ }
+ return s;
+}
+
+/** create an svm fifo, in the current heap. Fails vs blow up the process */
+svm_fifo_t *
+svm_fifo_create (u32 data_size_in_bytes)
+{
+ svm_fifo_t *f;
+ u32 rounded_data_size;
+
+ /* always round fifo data size to the next highest power-of-two */
+ rounded_data_size = (1 << (max_log2 (data_size_in_bytes)));
+ f = clib_mem_alloc_aligned_or_null (sizeof (*f) + rounded_data_size,
+ CLIB_CACHE_LINE_BYTES);
+ if (f == 0)
+ return 0;
+
+ memset (f, 0, sizeof (*f));
+ f->nitems = data_size_in_bytes;
+ f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+ f->refcnt = 1;
+ return (f);
+}
+
+void
+svm_fifo_free (svm_fifo_t * f)
+{
+ ASSERT (f->refcnt > 0);
+
+ if (--f->refcnt == 0)
+ {
+ pool_free (f->ooo_segments);
+ clib_mem_free (f);
+ }
+}
+
+always_inline ooo_segment_t *
+ooo_segment_new (svm_fifo_t * f, u32 start, u32 length)
+{
+ ooo_segment_t *s;
+
+ pool_get (f->ooo_segments, s);
+
+ s->start = start;
+ s->length = length;
+
+ s->prev = s->next = OOO_SEGMENT_INVALID_INDEX;
+
+ return s;
+}
+
+always_inline void
+ooo_segment_del (svm_fifo_t * f, u32 index)
+{
+ ooo_segment_t *cur, *prev = 0, *next = 0;
+ cur = pool_elt_at_index (f->ooo_segments, index);
+
+ if (cur->next != OOO_SEGMENT_INVALID_INDEX)
+ {
+ next = pool_elt_at_index (f->ooo_segments, cur->next);
+ next->prev = cur->prev;
+ }
+
+ if (cur->prev != OOO_SEGMENT_INVALID_INDEX)
+ {
+ prev = pool_elt_at_index (f->ooo_segments, cur->prev);
+ prev->next = cur->next;
+ }
+ else
+ {
+ f->ooos_list_head = cur->next;
+ }
+
+ pool_put (f->ooo_segments, cur);
+}
+
+/**
+ * Add segment to fifo's out-of-order segment list. Takes care of merging
+ * adjacent segments and removing overlapping ones.
+ */
+static void
+ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
+{
+ ooo_segment_t *s, *new_s, *prev, *next, *it;
+ u32 new_index, s_end_pos, s_index;
+ u32 normalized_position, normalized_end_position;
+
+ ASSERT (offset + length <= ooo_segment_distance_from_tail (f, f->head));
+ normalized_position = (f->tail + offset) % f->nitems;
+ normalized_end_position = (f->tail + offset + length) % f->nitems;
+
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
+
+ if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX)
+ {
+ s = ooo_segment_new (f, normalized_position, length);
+ f->ooos_list_head = s - f->ooo_segments;
+ f->ooos_newest = f->ooos_list_head;
+ return;
+ }
+
+ /* Find first segment that starts after new segment */
+ s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+ while (s->next != OOO_SEGMENT_INVALID_INDEX
+ && position_lt (f, s->start, normalized_position))
+ s = pool_elt_at_index (f->ooo_segments, s->next);
+
+ /* If we have a previous and we overlap it, use it as starting point */
+ prev = ooo_segment_get_prev (f, s);
+ if (prev
+ && position_leq (f, normalized_position, ooo_segment_end_pos (f, prev)))
+ {
+ s = prev;
+ s_end_pos = ooo_segment_end_pos (f, s);
+
+ /* Since we have previous, normalized start position cannot be smaller
+ * than prev->start. Check tail */
+ ASSERT (position_lt (f, s->start, normalized_position));
+ goto check_tail;
+ }
+
+ s_index = s - f->ooo_segments;
+ s_end_pos = ooo_segment_end_pos (f, s);
+
+ /* No overlap, add before current segment */
+ if (position_lt (f, normalized_end_position, s->start))
+ {
+ new_s = ooo_segment_new (f, normalized_position, length);
+ new_index = new_s - f->ooo_segments;
+
+ /* Pool might've moved, get segment again */
+ s = pool_elt_at_index (f->ooo_segments, s_index);
+ if (s->prev != OOO_SEGMENT_INVALID_INDEX)
+ {
+ new_s->prev = s->prev;
+ prev = pool_elt_at_index (f->ooo_segments, new_s->prev);
+ prev->next = new_index;
+ }
+ else
+ {
+ /* New head */
+ f->ooos_list_head = new_index;
+ }
+
+ new_s->next = s_index;
+ s->prev = new_index;
+ f->ooos_newest = new_index;
+ return;
+ }
+ /* No overlap, add after current segment */
+ else if (position_gt (f, normalized_position, s_end_pos))
+ {
+ new_s = ooo_segment_new (f, normalized_position, length);
+ new_index = new_s - f->ooo_segments;
+
+ /* Pool might've moved, get segment again */
+ s = pool_elt_at_index (f->ooo_segments, s_index);
+
+ /* Needs to be last */
+ ASSERT (s->next == OOO_SEGMENT_INVALID_INDEX);
+
+ new_s->prev = s_index;
+ s->next = new_index;
+ f->ooos_newest = new_index;
+
+ return;
+ }
+
+ /*
+ * Merge needed
+ */
+
+ /* Merge at head */
+ if (position_lt (f, normalized_position, s->start))
+ {
+ s->start = normalized_position;
+ s->length = position_diff (f, s_end_pos, s->start);
+ f->ooos_newest = s - f->ooo_segments;
+ }
+
+check_tail:
+
+ /* Overlapping tail */
+ if (position_gt (f, normalized_end_position, s_end_pos))
+ {
+ s->length = position_diff (f, normalized_end_position, s->start);
+
+ /* Remove the completely overlapped segments in the tail */
+ it = ooo_segment_next (f, s);
+ while (it && position_leq (f, ooo_segment_end_pos (f, it),
+ normalized_end_position))
+ {
+ next = ooo_segment_next (f, it);
+ ooo_segment_del (f, it - f->ooo_segments);
+ it = next;
+ }
+
+ /* If partial overlap with last, merge */
+ if (it && position_leq (f, it->start, normalized_end_position))
+ {
+ s->length = position_diff (f, ooo_segment_end_pos (f, it),
+ s->start);
+ ooo_segment_del (f, it - f->ooo_segments);
+ }
+ f->ooos_newest = s - f->ooo_segments;
+ }
+}
+
+/**
+ * Removes segments that can now be enqueued because the fifo's tail has
+ * advanced. Returns the number of bytes added to tail.
+ */
+static int
+ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
+{
+ ooo_segment_t *s;
+ u32 index, bytes = 0;
+ i32 diff;
+
+ s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+ diff = ooo_segment_distance_to_tail (f, s->start);
+
+ ASSERT (diff != n_bytes_enqueued);
+
+ if (diff > n_bytes_enqueued)
+ return 0;
+
+ /* If last tail update overlaps one/multiple ooo segments, remove them */
+ while (0 <= diff && diff < n_bytes_enqueued)
+ {
+ index = s - f->ooo_segments;
+
+ /* Segment end is beyond the tail. Advance tail and remove segment */
+ if (s->length > diff)
+ {
+ bytes = s->length - diff;
+ f->tail += bytes;
+ f->tail %= f->nitems;
+ ooo_segment_del (f, index);
+ break;
+ }
+
+ /* If we have next go on */
+ if (s->next != OOO_SEGMENT_INVALID_INDEX)
+ {
+ s = pool_elt_at_index (f->ooo_segments, s->next);
+ diff = ooo_segment_distance_to_tail (f, s->start);
+ ooo_segment_del (f, index);
+ }
+ /* End of search */
+ else
+ {
+ ooo_segment_del (f, index);
+ break;
+ }
+ }
+
+ ASSERT (bytes <= f->nitems);
+ return bytes;
+}
+
+static int
+svm_fifo_enqueue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
+{
+ u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+ u32 cursize, nitems;
+
+ /* read cursize, which can only increase while we're working */
+ cursize = svm_fifo_max_dequeue (f);
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
+
+ if (PREDICT_FALSE (cursize == f->nitems))
+ return -2; /* fifo stuffed */
+
+ nitems = f->nitems;
+
+ /* Number of bytes we're going to copy */
+ total_copy_bytes = (nitems - cursize) < max_bytes ?
+ (nitems - cursize) : max_bytes;
+
+ if (PREDICT_TRUE (copy_from_here != 0))
+ {
+ /* Number of bytes in first copy segment */
+ first_copy_bytes = ((nitems - f->tail) < total_copy_bytes)
+ ? (nitems - f->tail) : total_copy_bytes;
+
+ clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes);
+ f->tail += first_copy_bytes;
+ f->tail = (f->tail == nitems) ? 0 : f->tail;
+
+ /* Number of bytes in second copy segment, if any */
+ second_copy_bytes = total_copy_bytes - first_copy_bytes;
+ if (second_copy_bytes)
+ {
+ clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes,
+ second_copy_bytes);
+ f->tail += second_copy_bytes;
+ f->tail = (f->tail == nitems) ? 0 : f->tail;
+ }
+ }
+ else
+ {
+ ASSERT (0);
+
+ /* Account for a zero-copy enqueue done elsewhere */
+ ASSERT (max_bytes <= (nitems - cursize));
+ f->tail += max_bytes;
+ f->tail = f->tail % nitems;
+ total_copy_bytes = max_bytes;
+ }
+
+ svm_fifo_trace_add (f, f->head, total_copy_bytes, 2);
+
+ /* Any out-of-order segments to collect? */
+ if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX))
+ total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes);
+
+ /* Atomically increase the queue length */
+ ASSERT (cursize + total_copy_bytes <= nitems);
+ __sync_fetch_and_add (&f->cursize, total_copy_bytes);
+
+ return (total_copy_bytes);
+}
+
+#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \
+ { return fn (f, max_bytes, copy_from_here);}
+
+static int
+svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes,
+ u8 * copy_from_here)
+{
+ return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+}
+
+foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE,
+ svm_fifo_enqueue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma);
+
+int
+svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_from_here);
+#endif
+}
+
+/**
+ * Enqueue a future segment.
+ *
+ * Two choices: either copies the entire segment, or copies nothing
+ * Returns 0 of the entire segment was copied
+ * Returns -1 if none of the segment was copied due to lack of space
+ */
+static int
+svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f,
+ u32 offset,
+ u32 required_bytes,
+ u8 * copy_from_here)
+{
+ u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+ u32 cursize, nitems, normalized_offset;
+
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
+
+ /* read cursize, which can only increase while we're working */
+ cursize = svm_fifo_max_dequeue (f);
+ nitems = f->nitems;
+
+ ASSERT (required_bytes < nitems);
+
+ normalized_offset = (f->tail + offset) % nitems;
+
+ /* Will this request fit? */
+ if ((required_bytes + offset) > (nitems - cursize))
+ return -1;
+
+ svm_fifo_trace_add (f, offset, required_bytes, 1);
+
+ ooo_segment_add (f, offset, required_bytes);
+
+ /* Number of bytes we're going to copy */
+ total_copy_bytes = required_bytes;
+
+ /* Number of bytes in first copy segment */
+ first_copy_bytes = ((nitems - normalized_offset) < total_copy_bytes)
+ ? (nitems - normalized_offset) : total_copy_bytes;
+
+ clib_memcpy (&f->data[normalized_offset], copy_from_here, first_copy_bytes);
+
+ /* Number of bytes in second copy segment, if any */
+ second_copy_bytes = total_copy_bytes - first_copy_bytes;
+ if (second_copy_bytes)
+ {
+ normalized_offset += first_copy_bytes;
+ normalized_offset %= nitems;
+
+ ASSERT (normalized_offset == 0);
+
+ clib_memcpy (&f->data[normalized_offset],
+ copy_from_here + first_copy_bytes, second_copy_bytes);
+ }
+
+ return (0);
+}
+
+
+int
+svm_fifo_enqueue_with_offset (svm_fifo_t * f,
+ u32 offset,
+ u32 required_bytes, u8 * copy_from_here)
+{
+ return svm_fifo_enqueue_with_offset_internal (f, offset, required_bytes,
+ copy_from_here);
+}
+
+
+static int
+svm_fifo_dequeue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+ u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+ u32 cursize, nitems;
+
+ /* read cursize, which can only increase while we're working */
+ cursize = svm_fifo_max_dequeue (f);
+ if (PREDICT_FALSE (cursize == 0))
+ return -2; /* nothing in the fifo */
+
+ nitems = f->nitems;
+
+ /* Number of bytes we're going to copy */
+ total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+
+ if (PREDICT_TRUE (copy_here != 0))
+ {
+ /* Number of bytes in first copy segment */
+ first_copy_bytes = ((nitems - f->head) < total_copy_bytes)
+ ? (nitems - f->head) : total_copy_bytes;
+ clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes);
+ f->head += first_copy_bytes;
+ f->head = (f->head == nitems) ? 0 : f->head;
+
+ /* Number of bytes in second copy segment, if any */
+ second_copy_bytes = total_copy_bytes - first_copy_bytes;
+ if (second_copy_bytes)
+ {
+ clib_memcpy (copy_here + first_copy_bytes,
+ &f->data[f->head], second_copy_bytes);
+ f->head += second_copy_bytes;
+ f->head = (f->head == nitems) ? 0 : f->head;
+ }
+ }
+ else
+ {
+ ASSERT (0);
+ /* Account for a zero-copy dequeue done elsewhere */
+ ASSERT (max_bytes <= cursize);
+ f->head += max_bytes;
+ f->head = f->head % nitems;
+ cursize -= max_bytes;
+ total_copy_bytes = max_bytes;
+ }
+
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_copy_bytes);
+ __sync_fetch_and_sub (&f->cursize, total_copy_bytes);
+
+ return (total_copy_bytes);
+}
+
+static int
+svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+ return svm_fifo_dequeue_internal (f, max_bytes, copy_here);
+}
+
+#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE,
+ svm_fifo_dequeue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma);
+
+int
+svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_here);
+#endif
+}
+
+static int
+svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
+{
+ u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+ u32 cursize, nitems, real_head;
+
+ /* read cursize, which can only increase while we're working */
+ cursize = svm_fifo_max_dequeue (f);
+ if (PREDICT_FALSE (cursize < relative_offset))
+ return -2; /* nothing in the fifo */
+
+ nitems = f->nitems;
+ real_head = f->head + relative_offset;
+ real_head = real_head >= nitems ? real_head - nitems : real_head;
+
+ /* Number of bytes we're going to copy */
+ total_copy_bytes = (cursize - relative_offset < max_bytes) ?
+ cursize - relative_offset : max_bytes;
+
+ if (PREDICT_TRUE (copy_here != 0))
+ {
+ /* Number of bytes in first copy segment */
+ first_copy_bytes =
+ ((nitems - real_head) < total_copy_bytes) ?
+ (nitems - real_head) : total_copy_bytes;
+ clib_memcpy (copy_here, &f->data[real_head], first_copy_bytes);
+
+ /* Number of bytes in second copy segment, if any */
+ second_copy_bytes = total_copy_bytes - first_copy_bytes;
+ if (second_copy_bytes)
+ {
+ clib_memcpy (copy_here + first_copy_bytes, &f->data[0],
+ second_copy_bytes);
+ }
+ }
+ return total_copy_bytes;
+}
+
+#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, relative_offset, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma);
+
+int
+svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_peek_ma_multiarch_select ();
+
+ return (*fp) (f, relative_offset, max_bytes, copy_here);
+#endif
+}
+
+int
+svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes)
+{
+ u32 total_drop_bytes, first_drop_bytes, second_drop_bytes;
+ u32 cursize, nitems;
+
+ /* read cursize, which can only increase while we're working */
+ cursize = svm_fifo_max_dequeue (f);
+ if (PREDICT_FALSE (cursize == 0))
+ return -2; /* nothing in the fifo */
+
+ nitems = f->nitems;
+
+ /* Number of bytes we're going to drop */
+ total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+
+ svm_fifo_trace_add (f, f->tail, total_drop_bytes, 3);
+
+ /* Number of bytes in first copy segment */
+ first_drop_bytes =
+ ((nitems - f->head) < total_drop_bytes) ?
+ (nitems - f->head) : total_drop_bytes;
+ f->head += first_drop_bytes;
+ f->head = (f->head == nitems) ? 0 : f->head;
+
+ /* Number of bytes in second drop segment, if any */
+ second_drop_bytes = total_drop_bytes - first_drop_bytes;
+ if (second_drop_bytes)
+ {
+ f->head += second_drop_bytes;
+ f->head = (f->head == nitems) ? 0 : f->head;
+ }
+
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_drop_bytes);
+ __sync_fetch_and_sub (&f->cursize, total_drop_bytes);
+
+ return total_drop_bytes;
+}
+
+u32
+svm_fifo_number_ooo_segments (svm_fifo_t * f)
+{
+ return pool_elts (f->ooo_segments);
+}
+
+ooo_segment_t *
+svm_fifo_first_ooo_segment (svm_fifo_t * f)
+{
+ return pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+}
+
+/**
+ * Set fifo pointers to requested offset
+ */
+void
+svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer)
+{
+ f->head = f->tail = pointer % f->nitems;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
new file mode 100644
index 00000000..84901d02
--- /dev/null
+++ b/src/svm/svm_fifo.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_fifo_h__
+#define __included_ssvm_fifo_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <pthread.h>
+
+/** Out-of-order segment */
+typedef struct
+{
+ u32 next; /**< Next linked-list element pool index */
+ u32 prev; /**< Previous linked-list element pool index */
+
+ u32 start; /**< Start of segment, normalized*/
+ u32 length; /**< Length of segment */
+} ooo_segment_t;
+
+format_function_t format_ooo_segment;
+format_function_t format_ooo_list;
+
+#define SVM_FIFO_TRACE (0)
+#define OOO_SEGMENT_INVALID_INDEX ((u32)~0)
+
+typedef struct
+{
+ u32 offset;
+ u32 len;
+ u32 action;
+} svm_fifo_trace_elem_t;
+
+typedef struct _svm_fifo
+{
+ volatile u32 cursize; /**< current fifo size */
+ u32 nitems;
+ CLIB_CACHE_LINE_ALIGN_MARK (end_cursize);
+
+ volatile u32 has_event; /**< non-zero if deq event exists */
+
+ /* Backpointers */
+ u32 master_session_index;
+ u32 client_session_index;
+ u8 master_thread_index;
+ u8 client_thread_index;
+ u32 segment_manager;
+ CLIB_CACHE_LINE_ALIGN_MARK (end_shared);
+ u32 head;
+ CLIB_CACHE_LINE_ALIGN_MARK (end_consumer);
+
+ /* producer */
+ u32 tail;
+
+ ooo_segment_t *ooo_segments; /**< Pool of ooo segments */
+ u32 ooos_list_head; /**< Head of out-of-order linked-list */
+ u32 ooos_newest; /**< Last segment to have been updated */
+ struct _svm_fifo *next; /**< next in freelist/active chain */
+ struct _svm_fifo *prev; /**< prev in active chain */
+#if SVM_FIFO_TRACE
+ svm_fifo_trace_elem_t *trace;
+#endif
+ u32 freelist_index; /**< aka log2(allocated_size) - const. */
+ i8 refcnt; /**< reference count */
+ CLIB_CACHE_LINE_ALIGN_MARK (data);
+} svm_fifo_t;
+
+#if SVM_FIFO_TRACE
+#define svm_fifo_trace_add(_f, _s, _l, _t) \
+{ \
+ svm_fifo_trace_elem_t *trace_elt; \
+ vec_add2(_f->trace, trace_elt, 1); \
+ trace_elt->offset = _s; \
+ trace_elt->len = _l; \
+ trace_elt->action = _t; \
+}
+#else
+#define svm_fifo_trace_add(_f, _s, _l, _t)
+#endif
+
+u8 *svm_fifo_dump_trace (u8 * s, svm_fifo_t * f);
+u8 *svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose);
+
+static inline u32
+svm_fifo_max_dequeue (svm_fifo_t * f)
+{
+ return f->cursize;
+}
+
+static inline u32
+svm_fifo_max_enqueue (svm_fifo_t * f)
+{
+ return f->nitems - svm_fifo_max_dequeue (f);
+}
+
+static inline u8
+svm_fifo_has_ooo_data (svm_fifo_t * f)
+{
+ return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX;
+}
+
+/**
+ * Sets fifo event flag.
+ *
+ * @return 1 if flag was not set.
+ */
+always_inline u8
+svm_fifo_set_event (svm_fifo_t * f)
+{
+ /* Probably doesn't need to be atomic. Still, better avoid surprises */
+ return __sync_lock_test_and_set (&f->has_event, 1) == 0;
+}
+
+/**
+ * Unsets fifo event flag.
+ */
+always_inline void
+svm_fifo_unset_event (svm_fifo_t * f)
+{
+ /* Probably doesn't need to be atomic. Still, better avoid surprises */
+ __sync_lock_release (&f->has_event);
+}
+
+svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes);
+void svm_fifo_free (svm_fifo_t * f);
+
+int svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes,
+ u8 * copy_from_here);
+int svm_fifo_enqueue_with_offset (svm_fifo_t * f, u32 offset,
+ u32 required_bytes, u8 * copy_from_here);
+int svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here);
+
+int svm_fifo_peek (svm_fifo_t * f, u32 offset, u32 max_bytes, u8 * copy_here);
+int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes);
+u32 svm_fifo_number_ooo_segments (svm_fifo_t * f);
+ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f);
+void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer);
+
+format_function_t format_svm_fifo;
+
+always_inline ooo_segment_t *
+svm_fifo_newest_ooo_segment (svm_fifo_t * f)
+{
+ if (f->ooos_newest == OOO_SEGMENT_INVALID_INDEX)
+ return 0;
+ return pool_elt_at_index (f->ooo_segments, f->ooos_newest);
+}
+
+always_inline void
+svm_fifo_newest_ooo_segment_reset (svm_fifo_t * f)
+{
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
+}
+
+always_inline u32
+ooo_segment_distance_from_tail (svm_fifo_t * f, u32 pos)
+{
+ /* Ambiguous. Assumption is that ooo segments don't touch tail */
+ if (PREDICT_FALSE (pos == f->tail && f->tail == f->head))
+ return f->nitems;
+
+ return (((f->nitems + pos) - f->tail) % f->nitems);
+}
+
+always_inline u32
+ooo_segment_distance_to_tail (svm_fifo_t * f, u32 pos)
+{
+ return (((f->nitems + f->tail) - pos) % f->nitems);
+}
+
+always_inline u32
+ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return ooo_segment_distance_from_tail (f, s->start);
+}
+
+always_inline u32
+ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return ooo_segment_distance_from_tail (f, s->start) + s->length;
+}
+
+always_inline u32
+ooo_segment_length (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return s->length;
+}
+
+always_inline ooo_segment_t *
+ooo_segment_get_prev (svm_fifo_t * f, ooo_segment_t * s)
+{
+ if (s->prev == OOO_SEGMENT_INVALID_INDEX)
+ return 0;
+ return pool_elt_at_index (f->ooo_segments, s->prev);
+}
+
+always_inline ooo_segment_t *
+ooo_segment_next (svm_fifo_t * f, ooo_segment_t * s)
+{
+ if (s->next == OOO_SEGMENT_INVALID_INDEX)
+ return 0;
+ return pool_elt_at_index (f->ooo_segments, s->next);
+}
+
+#endif /* __included_ssvm_fifo_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
new file mode 100644
index 00000000..da2b7935
--- /dev/null
+++ b/src/svm/svm_fifo_segment.c
@@ -0,0 +1,643 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <svm/svm_fifo_segment.h>
+
+svm_fifo_segment_main_t svm_fifo_segment_main;
+
+static void
+allocate_new_fifo_chunk (svm_fifo_segment_header_t * fsh,
+ u32 data_size_in_bytes, int chunk_size)
+{
+ int freelist_index;
+ u32 size;
+ u8 *fifo_space;
+ u32 rounded_data_size;
+ svm_fifo_t *f;
+ int i;
+
+ rounded_data_size = (1 << (max_log2 (data_size_in_bytes)));
+ freelist_index = max_log2 (rounded_data_size)
+ - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+
+ /* Calculate space requirement $$$ round-up data_size_in_bytes */
+ size = (sizeof (*f) + rounded_data_size) * chunk_size;
+
+ /* Allocate fifo space. May fail. */
+ fifo_space = clib_mem_alloc_aligned_at_offset
+ (size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ ,
+ 0 /* os_out_of_memory */ );
+
+ /* Out of space.. */
+ if (fifo_space == 0)
+ return;
+
+ /* Carve fifo space */
+ f = (svm_fifo_t *) fifo_space;
+ for (i = 0; i < chunk_size; i++)
+ {
+ f->freelist_index = freelist_index;
+ f->next = fsh->free_fifos[freelist_index];
+ fsh->free_fifos[freelist_index] = f;
+ fifo_space += sizeof (*f) + rounded_data_size;
+ f = (svm_fifo_t *) fifo_space;
+ }
+}
+
+static void
+preallocate_fifo_pairs (svm_fifo_segment_private_t * s,
+ svm_fifo_segment_create_args_t * a)
+{
+ svm_fifo_segment_header_t *fsh = s->h;
+ u32 rx_fifo_size, tx_fifo_size, pairs_to_allocate;
+ u32 rx_rounded_data_size, tx_rounded_data_size, pair_size;
+ svm_fifo_t *f;
+ u8 *rx_fifo_space, *tx_fifo_space;
+ int rx_freelist_index, tx_freelist_index;
+ int i;
+
+ /* Parameter check */
+ if (a->rx_fifo_size == 0 || a->tx_fifo_size == 0
+ || a->preallocated_fifo_pairs == 0)
+ return;
+
+ if (a->rx_fifo_size < FIFO_SEGMENT_MIN_FIFO_SIZE ||
+ a->rx_fifo_size > FIFO_SEGMENT_MAX_FIFO_SIZE)
+ {
+ clib_warning ("rx fifo_size out of range %d", a->rx_fifo_size);
+ return;
+ }
+
+ if (a->tx_fifo_size < FIFO_SEGMENT_MIN_FIFO_SIZE ||
+ a->tx_fifo_size > FIFO_SEGMENT_MAX_FIFO_SIZE)
+ {
+ clib_warning ("tx fifo_size out of range %d", a->rx_fifo_size);
+ return;
+ }
+
+ rx_rounded_data_size = (1 << (max_log2 (a->rx_fifo_size)));
+
+ rx_freelist_index = max_log2 (a->rx_fifo_size)
+ - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+
+ tx_rounded_data_size = (1 << (max_log2 (a->rx_fifo_size)));
+
+ tx_freelist_index = max_log2 (a->tx_fifo_size)
+ - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+
+ /* Calculate space requirements */
+ pair_size = 2 * sizeof (*f) + rx_rounded_data_size + tx_rounded_data_size;
+ pairs_to_allocate = clib_min (s->ssvm.ssvm_size / pair_size,
+ a->preallocated_fifo_pairs);
+ rx_fifo_size = (sizeof (*f) + rx_rounded_data_size) * pairs_to_allocate;
+ tx_fifo_size = (sizeof (*f) + tx_rounded_data_size) * pairs_to_allocate;
+
+ vec_validate_init_empty (fsh->free_fifos,
+ clib_max (rx_freelist_index, tx_freelist_index),
+ 0);
+ if (0)
+ clib_warning ("rx_fifo_size %u (%d mb), tx_fifo_size %u (%d mb)",
+ rx_fifo_size, rx_fifo_size >> 20,
+ tx_fifo_size, tx_fifo_size >> 20);
+
+ /* Allocate rx fifo space. May fail. */
+ rx_fifo_space = clib_mem_alloc_aligned_at_offset
+ (rx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ ,
+ 0 /* os_out_of_memory */ );
+
+ /* Same for TX */
+ tx_fifo_space = clib_mem_alloc_aligned_at_offset
+ (tx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ ,
+ 0 /* os_out_of_memory */ );
+
+ /* Make sure it worked. Clean up if it didn't... */
+ if (rx_fifo_space == 0 || tx_fifo_space == 0)
+ {
+ if (rx_fifo_space)
+ clib_mem_free (rx_fifo_space);
+ else
+ clib_warning ("rx fifo preallocation failure: size %d npairs %d",
+ a->rx_fifo_size, a->preallocated_fifo_pairs);
+
+ if (tx_fifo_space)
+ clib_mem_free (tx_fifo_space);
+ else
+ clib_warning ("tx fifo preallocation failure: size %d nfifos %d",
+ a->tx_fifo_size, a->preallocated_fifo_pairs);
+ return;
+ }
+
+ /* Carve rx fifo space */
+ f = (svm_fifo_t *) rx_fifo_space;
+ for (i = 0; i < pairs_to_allocate; i++)
+ {
+ f->freelist_index = rx_freelist_index;
+ f->next = fsh->free_fifos[rx_freelist_index];
+ fsh->free_fifos[rx_freelist_index] = f;
+ rx_fifo_space += sizeof (*f) + rx_rounded_data_size;
+ f = (svm_fifo_t *) rx_fifo_space;
+ }
+ /* Carve tx fifo space */
+ f = (svm_fifo_t *) tx_fifo_space;
+ for (i = 0; i < pairs_to_allocate; i++)
+ {
+ f->freelist_index = tx_freelist_index;
+ f->next = fsh->free_fifos[tx_freelist_index];
+ fsh->free_fifos[tx_freelist_index] = f;
+ tx_fifo_space += sizeof (*f) + tx_rounded_data_size;
+ f = (svm_fifo_t *) tx_fifo_space;
+ }
+
+ /* Account for the pairs allocated */
+ a->preallocated_fifo_pairs -= pairs_to_allocate;
+}
+
+/** (master) create an svm fifo segment */
+int
+svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
+{
+ int rv;
+ svm_fifo_segment_private_t *s;
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ void *oldheap;
+
+ /* Allocate a fresh segment */
+ pool_get (sm->segments, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ssvm.ssvm_size = a->segment_size;
+ s->ssvm.i_am_master = 1;
+ s->ssvm.my_pid = getpid ();
+ s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
+ s->ssvm.requested_va = sm->next_baseva;
+
+ rv = ssvm_master_init (&s->ssvm, s - sm->segments);
+
+ if (rv)
+ {
+ _vec_len (s) = vec_len (s) - 1;
+ return (rv);
+ }
+
+ /* Note: requested_va updated due to seg base addr randomization */
+ sm->next_baseva = s->ssvm.requested_va + a->segment_size;
+
+ sh = s->ssvm.sh;
+ oldheap = ssvm_push_heap (sh);
+
+ /* Set up svm_fifo_segment shared header */
+ fsh = clib_mem_alloc (sizeof (*fsh));
+ memset (fsh, 0, sizeof (*fsh));
+ sh->opaque[0] = fsh;
+ s->h = fsh;
+ fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
+ preallocate_fifo_pairs (s, a);
+
+ ssvm_pop_heap (oldheap);
+
+ sh->ready = 1;
+ vec_add1 (a->new_segment_indices, s - sm->segments);
+ return (0);
+}
+
+/** Create an svm fifo segment in process-private memory */
+int
+svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
+{
+ svm_fifo_segment_private_t *s;
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ void *oldheap;
+ u8 **heaps = 0;
+ mheap_t *heap_header;
+ int segment_count = 1;
+ u32 rnd_size = 0;
+ int i;
+
+ if (a->private_segment_count && a->private_segment_size)
+ {
+ u8 *heap;
+ u32 pagesize = clib_mem_get_page_size ();
+ rnd_size = (a->private_segment_size + (pagesize - 1)) & ~pagesize;
+
+ for (i = 0; i < a->private_segment_count; i++)
+ {
+ heap = mheap_alloc (0, rnd_size);
+ if (heap == 0)
+ {
+ clib_unix_warning ("mheap alloc");
+ return -1;
+ }
+ heap_header = mheap_header (heap);
+ heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
+ vec_add1 (heaps, heap);
+ }
+ segment_count = a->private_segment_count;
+ }
+
+ /* Allocate segments */
+ for (i = 0; i < segment_count; i++)
+ {
+ pool_get (sm->segments, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ssvm.ssvm_size = rnd_size;
+ s->ssvm.i_am_master = 1;
+ s->ssvm.my_pid = getpid ();
+ s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
+ s->ssvm.requested_va = ~0;
+
+ /* Allocate a [sic] shared memory header, in process memory... */
+ sh = clib_mem_alloc_aligned (sizeof (*sh), CLIB_CACHE_LINE_BYTES);
+ s->ssvm.sh = sh;
+
+ memset (sh, 0, sizeof (*sh));
+ sh->heap = a->private_segment_count ? heaps[i] : clib_mem_get_heap ();
+
+ /* Set up svm_fifo_segment shared header */
+ fsh = clib_mem_alloc (sizeof (*fsh));
+ memset (fsh, 0, sizeof (*fsh));
+ sh->opaque[0] = fsh;
+ s->h = fsh;
+ fsh->flags = FIFO_SEGMENT_F_IS_PRIVATE;
+ if (!a->private_segment_count)
+ fsh->flags |= FIFO_SEGMENT_F_IS_MAIN_HEAP;
+ fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
+
+ if (a->private_segment_count)
+ {
+ if (i != 0)
+ fsh->flags |= FIFO_SEGMENT_F_IS_PREALLOCATED;
+ oldheap = clib_mem_get_heap ();
+ clib_mem_set_heap (sh->heap);
+ preallocate_fifo_pairs (s, a);
+ clib_mem_set_heap (oldheap);
+ }
+ sh->ready = 1;
+ vec_add1 (a->new_segment_indices, s - sm->segments);
+ }
+ vec_free (heaps);
+ return (0);
+}
+
+/** (slave) attach to an svm fifo segment */
+int
+svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a)
+{
+ int rv;
+ svm_fifo_segment_private_t *s;
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+
+ /* Allocate a fresh segment */
+ pool_get (sm->segments, s);
+ memset (s, 0, sizeof (*s));
+
+ s->ssvm.ssvm_size = a->segment_size;
+ s->ssvm.my_pid = getpid ();
+ s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
+ s->ssvm.requested_va = sm->next_baseva;
+
+ rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds);
+
+ if (rv)
+ {
+ _vec_len (s) = vec_len (s) - 1;
+ return (rv);
+ }
+
+ /* Fish the segment header */
+ sh = s->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+ s->h = fsh;
+
+ vec_add1 (a->new_segment_indices, s - sm->segments);
+ return (0);
+}
+
+void
+svm_fifo_segment_delete (svm_fifo_segment_private_t * s)
+{
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ if (s->h->flags & FIFO_SEGMENT_F_IS_PRIVATE)
+ {
+ /* Don't try to free vpp's heap! */
+ if (!(s->h->flags & FIFO_SEGMENT_F_IS_MAIN_HEAP))
+ mheap_free (s->ssvm.sh->heap);
+ clib_mem_free (s->ssvm.sh);
+ clib_mem_free (s->h);
+ pool_put (sm->segments, s);
+ }
+ else
+ {
+ ssvm_delete (&s->ssvm);
+ pool_put (sm->segments, s);
+ }
+}
+
+svm_fifo_t *
+svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
+ u32 data_size_in_bytes,
+ svm_fifo_segment_freelist_t list_index)
+{
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ svm_fifo_t *f;
+ void *oldheap;
+ int freelist_index;
+
+ /*
+ * 4K minimum. It's not likely that anything good will happen
+ * with a smaller FIFO.
+ */
+ if (data_size_in_bytes < FIFO_SEGMENT_MIN_FIFO_SIZE ||
+ data_size_in_bytes > FIFO_SEGMENT_MAX_FIFO_SIZE)
+ {
+ clib_warning ("fifo size out of range %d", data_size_in_bytes);
+ return 0;
+ }
+
+ freelist_index = max_log2 (data_size_in_bytes)
+ - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+
+ sh = s->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+ ssvm_lock_non_recursive (sh, 1);
+ oldheap = ssvm_push_heap (sh);
+
+ switch (list_index)
+ {
+ case FIFO_SEGMENT_RX_FREELIST:
+ case FIFO_SEGMENT_TX_FREELIST:
+ vec_validate_init_empty (fsh->free_fifos, freelist_index, 0);
+
+ f = fsh->free_fifos[freelist_index];
+ if (PREDICT_FALSE (f == 0))
+ {
+ allocate_new_fifo_chunk (fsh, data_size_in_bytes,
+ FIFO_SEGMENT_ALLOC_CHUNK_SIZE);
+ f = fsh->free_fifos[freelist_index];
+ }
+ if (PREDICT_TRUE (f != 0))
+ {
+ fsh->free_fifos[freelist_index] = f->next;
+ /* (re)initialize the fifo, as in svm_fifo_create */
+ memset (f, 0, sizeof (*f));
+ f->nitems = data_size_in_bytes;
+ f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+ f->refcnt = 1;
+ f->freelist_index = freelist_index;
+ goto found;
+ }
+ /* FALLTHROUGH */
+ case FIFO_SEGMENT_FREELIST_NONE:
+ break;
+
+ default:
+ clib_warning ("ignore bogus freelist %d", list_index);
+ break;
+ }
+
+ /* Note: this can fail, in which case: create another segment */
+ f = svm_fifo_create (data_size_in_bytes);
+ if (PREDICT_FALSE (f == 0))
+ {
+ ssvm_pop_heap (oldheap);
+ ssvm_unlock_non_recursive (sh);
+ return (0);
+ }
+ f->freelist_index = freelist_index;
+
+found:
+ /* If rx_freelist add to active fifos list. When cleaning up segment,
+ * we need a list of active sessions that should be disconnected. Since
+ * both rx and tx fifos keep pointers to the session, it's enough to track
+ * only one. */
+ if (list_index == FIFO_SEGMENT_RX_FREELIST)
+ {
+ if (fsh->fifos)
+ {
+ fsh->fifos->prev = f;
+ f->next = fsh->fifos;
+ }
+ fsh->fifos = f;
+ }
+ fsh->n_active_fifos++;
+
+ ssvm_pop_heap (oldheap);
+ ssvm_unlock_non_recursive (sh);
+ return (f);
+}
+
+void
+svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f,
+ svm_fifo_segment_freelist_t list_index)
+{
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ void *oldheap;
+ int freelist_index;
+
+ ASSERT (f->refcnt > 0);
+
+ if (--f->refcnt > 0)
+ return;
+
+ sh = s->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+ freelist_index = f->freelist_index;
+
+ ASSERT (freelist_index < vec_len (fsh->free_fifos));
+
+ ssvm_lock_non_recursive (sh, 2);
+ oldheap = ssvm_push_heap (sh);
+
+ switch (list_index)
+ {
+ case FIFO_SEGMENT_RX_FREELIST:
+ /* Remove from active list */
+ if (f->prev)
+ f->prev->next = f->next;
+ else
+ fsh->fifos = f->next;
+ if (f->next)
+ f->next->prev = f->prev;
+ /* Fall through: we add only rx fifos to active pool */
+ case FIFO_SEGMENT_TX_FREELIST:
+ /* Add to free list */
+ f->next = fsh->free_fifos[freelist_index];
+ f->prev = 0;
+ fsh->free_fifos[freelist_index] = f;
+ break;
+ case FIFO_SEGMENT_FREELIST_NONE:
+ break;
+
+ default:
+ clib_warning ("ignore bogus freelist %d", list_index);
+ break;
+ }
+
+ if (CLIB_DEBUG)
+ {
+ f->master_session_index = ~0;
+ f->master_thread_index = ~0;
+ }
+
+ fsh->n_active_fifos--;
+ ssvm_pop_heap (oldheap);
+ ssvm_unlock_non_recursive (sh);
+}
+
+void
+svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds)
+{
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+
+ sm->next_baseva = baseva;
+ sm->timeout_in_seconds = timeout_in_seconds;
+}
+
+u32
+svm_fifo_segment_index (svm_fifo_segment_private_t * s)
+{
+ return s - svm_fifo_segment_main.segments;
+}
+
+/**
+ * Retrieve svm segments pool. Used only for debug purposes.
+ */
+svm_fifo_segment_private_t *
+svm_fifo_segment_segments_pool (void)
+{
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ return sm->segments;
+}
+
+/**
+ * Get number of active fifos
+ */
+u32
+svm_fifo_segment_num_fifos (svm_fifo_segment_private_t * fifo_segment)
+{
+ return fifo_segment->h->n_active_fifos;
+}
+
+u32
+svm_fifo_segment_num_free_fifos (svm_fifo_segment_private_t * fifo_segment,
+ u32 fifo_size_in_bytes)
+{
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ svm_fifo_t *f;
+ int i;
+ u32 count = 0, rounded_data_size, freelist_index;
+
+ sh = fifo_segment->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+ /* Count all free fifos? */
+ if (fifo_size_in_bytes == ~0)
+ {
+ for (i = 0; i < vec_len (fsh->free_fifos); i++)
+ {
+ f = fsh->free_fifos[i];
+ if (f == 0)
+ continue;
+
+ while (f)
+ {
+ f = f->next;
+ count++;
+ }
+ }
+ return count;
+ }
+
+ rounded_data_size = (1 << (max_log2 (fifo_size_in_bytes)));
+ freelist_index = max_log2 (rounded_data_size)
+ - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+
+ if (freelist_index > vec_len (fsh->free_fifos))
+ return 0;
+
+ f = fsh->free_fifos[freelist_index];
+ if (f == 0)
+ return 0;
+
+ while (f)
+ {
+ f = f->next;
+ count++;
+ }
+ return count;
+}
+
+/**
+ * Segment format function
+ */
+u8 *
+format_svm_fifo_segment (u8 * s, va_list * args)
+{
+ svm_fifo_segment_private_t *sp
+ = va_arg (*args, svm_fifo_segment_private_t *);
+ int verbose = va_arg (*args, int);
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ svm_fifo_t *f;
+ int i;
+ u32 count;
+ uword indent = format_get_indent (s) + 2;
+
+ sh = sp->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+ s = format (s, "%USegment Heap: %U\n", format_white_space, indent,
+ format_mheap, sh->heap, verbose);
+ s = format (s, "%U segment has %u active fifos\n",
+ format_white_space, indent, svm_fifo_segment_num_fifos (sp));
+
+ for (i = 0; i < vec_len (fsh->free_fifos); i++)
+ {
+ f = fsh->free_fifos[i];
+ if (f == 0)
+ continue;
+ count = 0;
+ while (f)
+ {
+ f = f->next;
+ count++;
+ }
+
+ s = format (s, "%U%-5u Kb: %u free",
+ format_white_space, indent + 2,
+ 1 << (i + max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE) - 10),
+ count);
+ }
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h
new file mode 100644
index 00000000..5b771328
--- /dev/null
+++ b/src/svm/svm_fifo_segment.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_fifo_segment_h__
+#define __included_ssvm_fifo_segment_h__
+
+#include <svm/svm_fifo.h>
+#include <svm/ssvm.h>
+#include <vppinfra/lock.h>
+
+typedef enum
+{
+ FIFO_SEGMENT_FREELIST_NONE = -1,
+ FIFO_SEGMENT_RX_FREELIST = 0,
+ FIFO_SEGMENT_TX_FREELIST,
+ FIFO_SEGMENT_N_FREELISTS
+} svm_fifo_segment_freelist_t;
+
+#define FIFO_SEGMENT_MIN_FIFO_SIZE 4096
+#define FIFO_SEGMENT_MAX_FIFO_SIZE (8<<20) /* 8mb max fifo size */
+#define FIFO_SEGMENT_ALLOC_CHUNK_SIZE 32 /* Allocation quantum */
+
+#define FIFO_SEGMENT_F_IS_PRIVATE 1 << 0 /* Private segment */
+#define FIFO_SEGMENT_F_IS_MAIN_HEAP 1 << 1 /* Segment is main heap */
+#define FIFO_SEGMENT_F_IS_PREALLOCATED 1 << 2 /* Segment is preallocated */
+
+typedef struct
+{
+ svm_fifo_t *fifos; /**< Linked list of active RX fifos */
+ u8 *segment_name; /**< Segment name */
+ svm_fifo_t **free_fifos; /**< Freelists, by fifo size */
+ u32 n_active_fifos; /**< Number of active fifos */
+ u8 flags; /**< Segment flags */
+} svm_fifo_segment_header_t;
+
+typedef struct
+{
+ ssvm_private_t ssvm;
+ svm_fifo_segment_header_t *h;
+} svm_fifo_segment_private_t;
+
+typedef struct
+{
+ volatile u32 lock;
+
+ /** pool of segments */
+ svm_fifo_segment_private_t *segments;
+ /* Where to put the next one */
+ u64 next_baseva;
+ u32 timeout_in_seconds;
+} svm_fifo_segment_main_t;
+
+extern svm_fifo_segment_main_t svm_fifo_segment_main;
+
+typedef struct
+{
+ char *segment_name;
+ u32 segment_size;
+ u32 *new_segment_indices;
+ u32 rx_fifo_size;
+ u32 tx_fifo_size;
+ u32 preallocated_fifo_pairs;
+ u32 private_segment_count;
+ u32 private_segment_size;
+} svm_fifo_segment_create_args_t;
+
+static inline svm_fifo_segment_private_t *
+svm_fifo_segment_get_segment (u32 segment_index)
+{
+ svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main;
+ return vec_elt_at_index (ssm->segments, segment_index);
+}
+
+static inline u8
+svm_fifo_segment_has_fifos (svm_fifo_segment_private_t * fifo_segment)
+{
+ return fifo_segment->h->fifos != 0;
+}
+
+static inline svm_fifo_t *
+svm_fifo_segment_get_fifo_list (svm_fifo_segment_private_t * fifo_segment)
+{
+ return fifo_segment->h->fifos;
+}
+
+#define foreach_ssvm_fifo_segment_api_error \
+_(OUT_OF_SPACE, "Out of space in segment", -200)
+
+typedef enum
+{
+#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c,
+ foreach_ssvm_fifo_segment_api_error
+#undef _
+} ssvm_fifo_segment_api_error_enum_t;
+
+int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a);
+int svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t
+ * a);
+int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a);
+void svm_fifo_segment_delete (svm_fifo_segment_private_t * s);
+
+svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
+ u32 data_size_in_bytes,
+ svm_fifo_segment_freelist_t index);
+void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s,
+ svm_fifo_t * f,
+ svm_fifo_segment_freelist_t index);
+void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds);
+u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s);
+u32 svm_fifo_segment_num_fifos (svm_fifo_segment_private_t * fifo_segment);
+u32 svm_fifo_segment_num_free_fifos (svm_fifo_segment_private_t *
+ fifo_segment, u32 fifo_size_in_bytes);
+
+svm_fifo_segment_private_t *svm_fifo_segment_segments_pool (void);
+format_function_t format_svm_fifo_segment;
+
+#endif /* __included_ssvm_fifo_segment_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_test.c b/src/svm/svm_test.c
new file mode 100644
index 00000000..ab0b9e24
--- /dev/null
+++ b/src/svm/svm_test.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * svm_test.c -- brain police
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+
+#include "svm.h"
+
+
+int
+main (int argc, char **argv)
+{
+ svm_region_t *root_rp, *rp;
+ svm_map_region_args_t *a = 0;
+
+ vec_validate (a, 0);
+
+ root_rp = svm_region_init ();
+
+ ASSERT (root_rp);
+
+ a->name = "/qvnet";
+ a->size = (4 << 10);
+
+ rp = svm_region_find_or_create (root_rp, a);
+
+ ASSERT (rp);
+
+ *((u32 *) rp->data_base) = 0xdeadbeef;
+ svm_region_unmap (root_rp, rp);
+
+ fformat (stdout, "exiting...\n");
+
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c
new file mode 100644
index 00000000..043b0924
--- /dev/null
+++ b/src/svm/svmdb.c
@@ -0,0 +1,676 @@
+/*
+ *------------------------------------------------------------------
+ * svmdb.c -- simple shared memory database
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/serialize.h>
+
+#include "svmdb.h"
+
+static void local_set_variable_nolock (svmdb_client_t * client,
+ svmdb_namespace_t namespace,
+ u8 * var, u8 * val, u32 elsize);
+
+always_inline void
+region_lock (svm_region_t * rp, int tag)
+{
+ pthread_mutex_lock (&rp->mutex);
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = getpid ();
+ rp->mutex_owner_tag = tag;
+#endif
+}
+
+always_inline void
+region_unlock (svm_region_t * rp)
+{
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = 0;
+ rp->mutex_owner_tag = 0;
+#endif
+ pthread_mutex_unlock (&rp->mutex);
+}
+
+svmdb_client_t *
+svmdb_map (svmdb_map_args_t * dba)
+{
+ svmdb_client_t *client = 0;
+ svm_map_region_args_t *a = 0;
+ svm_region_t *db_rp;
+ void *oldheap;
+ svmdb_shm_hdr_t *hp = 0;
+
+ vec_validate (client, 0);
+ vec_validate (a, 0);
+
+ svm_region_init_chroot_uid_gid (dba->root_path, dba->uid, dba->gid);
+
+ a->root_path = dba->root_path;
+ a->name = "/db";
+ a->size = dba->size ? dba->size : SVMDB_DEFAULT_SIZE;
+ a->flags = SVM_FLAGS_MHEAP;
+ a->uid = dba->uid;
+ a->gid = dba->gid;
+
+ db_rp = client->db_rp = svm_region_find_or_create (a);
+
+ ASSERT (db_rp);
+
+ vec_free (a);
+
+ region_lock (client->db_rp, 10);
+ /* Has someone else set up the shared-memory variable table? */
+ if (db_rp->user_ctx)
+ {
+ client->shm = (void *) db_rp->user_ctx;
+ client->pid = getpid ();
+ region_unlock (client->db_rp);
+ ASSERT (client->shm->version == SVMDB_SHM_VERSION);
+ return (client);
+ }
+ /* Nope, it's our problem... */
+
+ if (CLIB_DEBUG > 2)
+ {
+ /* Add a bogus client (pid=0) so the svm won't be deallocated */
+ clib_warning
+ ("[%d] adding fake client (pid=0) so '%s' won't be unlinked",
+ getpid (), db_rp->region_name);
+ oldheap = svm_push_pvt_heap (db_rp);
+ vec_add1 (client->db_rp->client_pids, 0);
+ svm_pop_heap (oldheap);
+ }
+ oldheap = svm_push_data_heap (db_rp);
+
+ vec_validate (hp, 0);
+ hp->version = SVMDB_SHM_VERSION;
+ hp->namespaces[SVMDB_NAMESPACE_STRING]
+ = hash_create_string (0, sizeof (uword));
+ hp->namespaces[SVMDB_NAMESPACE_VEC]
+ = hash_create_string (0, sizeof (uword));
+
+ db_rp->user_ctx = hp;
+ client->shm = hp;
+
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+ client->pid = getpid ();
+
+ return (client);
+}
+
+void
+svmdb_unmap (svmdb_client_t * client)
+{
+ ASSERT (client);
+
+ if (!svm_get_root_rp ())
+ return;
+
+ svm_region_unmap ((void *) client->db_rp);
+ svm_region_exit ();
+ vec_free (client);
+}
+
+static void
+notify_value (svmdb_value_t * v, svmdb_action_t a)
+{
+ int i;
+ int rv;
+ union sigval sv;
+ u32 value;
+ u32 *dead_registrations = 0;
+
+ svmdb_notify_t *np;
+
+ for (i = 0; i < vec_len (v->notifications); i++)
+ {
+ np = vec_elt_at_index (v->notifications, i);
+ if (np->action == a)
+ {
+ value = (np->action << 28) | (np->opaque);
+ sv.sival_ptr = (void *) (uword) value;
+ do
+ {
+ rv = 0;
+ if (sigqueue (np->pid, np->signum, sv) == 0)
+ break;
+ rv = errno;
+ }
+ while (rv == EAGAIN);
+ if (rv == 0)
+ continue;
+ vec_add1 (dead_registrations, i);
+ }
+ }
+
+ for (i = 0; i < vec_len (dead_registrations); i++)
+ {
+ np = vec_elt_at_index (v->notifications, dead_registrations[i]);
+ clib_warning ("dead reg pid %d sig %d action %d opaque %x",
+ np->pid, np->signum, np->action, np->opaque);
+ vec_delete (v->notifications, 1, dead_registrations[i]);
+ }
+ vec_free (dead_registrations);
+}
+
+int
+svmdb_local_add_del_notification (svmdb_client_t * client,
+ svmdb_notification_args_t * a)
+{
+ uword *h;
+ void *oldheap;
+ hash_pair_t *hp;
+ svmdb_shm_hdr_t *shm;
+ u8 *dummy_value = 0;
+ svmdb_value_t *value;
+ svmdb_notify_t *np;
+ int i;
+ int rv = 0;
+
+ ASSERT (a->elsize);
+
+ region_lock (client->db_rp, 18);
+ shm = client->shm;
+ oldheap = svm_push_data_heap (client->db_rp);
+
+ h = shm->namespaces[a->nspace];
+
+ hp = hash_get_pair_mem (h, a->var);
+ if (hp == 0)
+ {
+ local_set_variable_nolock (client, a->nspace, (u8 *) a->var,
+ dummy_value, a->elsize);
+ /* might have moved */
+ h = shm->namespaces[a->nspace];
+ hp = hash_get_pair_mem (h, a->var);
+ ASSERT (hp);
+ }
+
+ value = pool_elt_at_index (shm->values, hp->value[0]);
+
+ for (i = 0; i < vec_len (value->notifications); i++)
+ {
+ np = vec_elt_at_index (value->notifications, i);
+ if ((np->pid == client->pid)
+ && (np->signum == a->signum)
+ && (np->action == a->action) && (np->opaque == a->opaque))
+ {
+ if (a->add_del == 0 /* delete */ )
+ {
+ vec_delete (value->notifications, 1, i);
+ goto out;
+ }
+ else
+ { /* add */
+ clib_warning
+ ("%s: ignore dup reg pid %d signum %d action %d opaque %x",
+ a->var, client->pid, a->signum, a->action, a->opaque);
+ rv = -2;
+ goto out;
+ }
+ }
+ }
+ if (a->add_del == 0)
+ {
+ rv = -3;
+ goto out;
+ }
+
+ vec_add2 (value->notifications, np, 1);
+ np->pid = client->pid;
+ np->signum = a->signum;
+ np->action = a->action;
+ np->opaque = a->opaque;
+
+out:
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+ return rv;
+}
+
+
+static void
+local_unset_variable_nolock (svmdb_client_t * client,
+ svmdb_namespace_t namespace, char *var)
+{
+ uword *h;
+ svmdb_value_t *oldvalue;
+ hash_pair_t *hp;
+
+ h = client->shm->namespaces[namespace];
+ hp = hash_get_pair_mem (h, var);
+ if (hp)
+ {
+ oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]);
+ if (vec_len (oldvalue->notifications))
+ notify_value (oldvalue, SVMDB_ACTION_UNSET);
+ /* zero length value means unset */
+ _vec_len (oldvalue->value) = 0;
+ }
+ client->shm->namespaces[namespace] = h;
+}
+
+void
+svmdb_local_unset_string_variable (svmdb_client_t * client, char *var)
+{
+ void *oldheap;
+
+ region_lock (client->db_rp, 11);
+ oldheap = svm_push_data_heap (client->db_rp);
+ local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var);
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+}
+
+static void
+local_set_variable_nolock (svmdb_client_t * client,
+ svmdb_namespace_t namespace,
+ u8 * var, u8 * val, u32 elsize)
+{
+ uword *h;
+ hash_pair_t *hp;
+ u8 *name;
+ svmdb_shm_hdr_t *shm;
+
+ shm = client->shm;
+ h = shm->namespaces[namespace];
+ hp = hash_get_pair_mem (h, var);
+ if (hp)
+ {
+ svmdb_value_t *oldvalue;
+ oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]);
+ vec_alloc (oldvalue->value, vec_len (val) * elsize);
+ clib_memcpy (oldvalue->value, val, vec_len (val) * elsize);
+ _vec_len (oldvalue->value) = vec_len (val);
+ notify_value (oldvalue, SVMDB_ACTION_SET);
+ }
+ else
+ {
+ svmdb_value_t *newvalue;
+ pool_get (shm->values, newvalue);
+ memset (newvalue, 0, sizeof (*newvalue));
+ newvalue->elsize = elsize;
+ vec_alloc (newvalue->value, vec_len (val) * elsize);
+ clib_memcpy (newvalue->value, val, vec_len (val) * elsize);
+ _vec_len (newvalue->value) = vec_len (val);
+ name = format (0, "%s%c", var, 0);
+ hash_set_mem (h, name, newvalue - shm->values);
+ }
+ shm->namespaces[namespace] = h;
+}
+
+void
+svmdb_local_set_string_variable (svmdb_client_t * client,
+ char *var, char *val)
+{
+ void *oldheap;
+
+ region_lock (client->db_rp, 12);
+ oldheap = svm_push_data_heap (client->db_rp);
+
+ local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var);
+
+ local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING,
+ (u8 *) var, (u8 *) val, 1 /* elsize */ );
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+}
+
+static u8 *
+local_get_variable_nolock (svmdb_client_t * client,
+ svmdb_namespace_t namespace, u8 * var)
+{
+ uword *h;
+ uword *p;
+ svmdb_shm_hdr_t *shm;
+ svmdb_value_t *oldvalue;
+
+ shm = client->shm;
+ h = shm->namespaces[namespace];
+ p = hash_get_mem (h, var);
+ if (p)
+ {
+ oldvalue = pool_elt_at_index (shm->values, p[0]);
+ notify_value (oldvalue, SVMDB_ACTION_GET);
+ return (oldvalue->value);
+ }
+ return 0;
+}
+
+void *
+svmdb_local_get_variable_reference (svmdb_client_t * client,
+ svmdb_namespace_t namespace, char *var)
+{
+ u8 *rv;
+
+ region_lock (client->db_rp, 19);
+ rv = local_get_variable_nolock (client, namespace, (u8 *) var);
+ region_unlock (client->db_rp);
+ return (void *) rv;
+}
+
+char *
+svmdb_local_get_string_variable (svmdb_client_t * client, char *var)
+{
+ u8 *rv = 0;
+
+ region_lock (client->db_rp, 13);
+ rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_STRING, (u8 *) var);
+
+ if (rv && vec_len (rv))
+ {
+ rv = format (0, "%s", rv);
+ vec_add1 (rv, 0);
+ }
+ region_unlock (client->db_rp);
+ return ((char *) rv);
+}
+
+void
+svmdb_local_dump_strings (svmdb_client_t * client)
+{
+ uword *h;
+ u8 *key;
+ u32 value;
+ svmdb_shm_hdr_t *shm = client->shm;
+
+ region_lock (client->db_rp, 14);
+
+ h = client->shm->namespaces[SVMDB_NAMESPACE_STRING];
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem(key, value, h,
+ ({
+ svmdb_value_t *v = pool_elt_at_index (shm->values, value);
+
+ fformat(stdout, "%s: %s\n", key,
+ vec_len(v->value) ? v->value : (u8 *)"(nil)");
+ }));
+ /* *INDENT-ON* */
+ region_unlock (client->db_rp);
+}
+
+int
+svmdb_local_serialize_strings (svmdb_client_t * client, char *filename)
+{
+ uword *h;
+ u8 *key;
+ u32 value;
+ svmdb_shm_hdr_t *shm = client->shm;
+ serialize_main_t _sm, *sm = &_sm;
+ clib_error_t *error = 0;
+ u8 *sanitized_name = 0;
+ int fd = 0;
+
+ if (strstr (filename, "..") || index (filename, '/'))
+ {
+ error = clib_error_return (0, "Illegal characters in filename '%s'",
+ filename);
+ goto out;
+ }
+
+ sanitized_name = format (0, "/tmp/%s%c", filename, 0);
+
+ fd = creat ((char *) sanitized_name, 0644);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "Create '%s'", sanitized_name);
+ goto out;
+ }
+
+ serialize_open_unix_file_descriptor (sm, fd);
+
+ region_lock (client->db_rp, 20);
+
+ h = client->shm->namespaces[SVMDB_NAMESPACE_STRING];
+
+ serialize_likely_small_unsigned_integer (sm, hash_elts (h));
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem(key, value, h,
+ ({
+ svmdb_value_t *v = pool_elt_at_index (shm->values, value);
+
+ /* Omit names with nil values */
+ if (vec_len(v->value))
+ {
+ serialize_cstring (sm, (char *)key);
+ serialize_cstring (sm, (char *)v->value);
+ }
+ }));
+ /* *INDENT-ON* */
+ region_unlock (client->db_rp);
+
+ serialize_close (sm);
+
+out:
+ if (fd > 0 && close (fd) < 0)
+ error = clib_error_return_unix (0, "close fd %d", fd);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
+ return 0;
+}
+
+int
+svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ void *oldheap;
+ clib_error_t *error = 0;
+ u8 *key, *value;
+ int fd = 0;
+ u32 nelts;
+ int i;
+
+ fd = open (filename, O_RDONLY);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "Failed to open '%s'", filename);
+ goto out;
+ }
+
+ unserialize_open_unix_file_descriptor (sm, fd);
+
+ region_lock (client->db_rp, 21);
+ oldheap = svm_push_data_heap (client->db_rp);
+
+ nelts = unserialize_likely_small_unsigned_integer (sm);
+
+ for (i = 0; i < nelts; i++)
+ {
+ unserialize_cstring (sm, (char **) &key);
+ unserialize_cstring (sm, (char **) &value);
+ local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING,
+ key, value, 1 /* elsize */ );
+ vec_free (key);
+ vec_free (value);
+ }
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+
+ serialize_close (sm);
+
+out:
+ if (fd > 0 && close (fd) < 0)
+ error = clib_error_return_unix (0, "close fd %d", fd);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
+ return 0;
+}
+
+void
+svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var)
+{
+ void *oldheap;
+
+ region_lock (client->db_rp, 15);
+ oldheap = svm_push_data_heap (client->db_rp);
+ local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var);
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+}
+
+void
+svmdb_local_set_vec_variable (svmdb_client_t * client,
+ char *var, void *val_arg, u32 elsize)
+{
+ u8 *val = (u8 *) val_arg;
+ void *oldheap;
+
+ region_lock (client->db_rp, 16);
+ oldheap = svm_push_data_heap (client->db_rp);
+
+ local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var);
+ local_set_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var,
+ val, elsize);
+
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+}
+
+void *
+svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, u32 elsize)
+{
+ u8 *rv = 0;
+ u8 *copy = 0;
+
+ region_lock (client->db_rp, 17);
+
+ rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var);
+
+ if (rv && vec_len (rv))
+ {
+ /* Make a copy in process-local memory */
+ vec_alloc (copy, vec_len (rv) * elsize);
+ clib_memcpy (copy, rv, vec_len (rv) * elsize);
+ _vec_len (copy) = vec_len (rv);
+ region_unlock (client->db_rp);
+ return (copy);
+ }
+ region_unlock (client->db_rp);
+ return (0);
+}
+
+void
+svmdb_local_dump_vecs (svmdb_client_t * client)
+{
+ uword *h;
+ u8 *key;
+ u32 value;
+ svmdb_shm_hdr_t *shm;
+
+ region_lock (client->db_rp, 17);
+ shm = client->shm;
+
+ h = client->shm->namespaces[SVMDB_NAMESPACE_VEC];
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem(key, value, h,
+ ({
+ svmdb_value_t *v = pool_elt_at_index (shm->values, value);
+ (void) fformat(stdout, "%s:\n %U (%.2f)\n", key,
+ format_hex_bytes, v->value,
+ vec_len(v->value)*v->elsize, ((f64 *)(v->value))[0]);
+ }));
+ /* *INDENT-ON* */
+
+ region_unlock (client->db_rp);
+}
+
+void *
+svmdb_local_find_or_add_vec_variable (svmdb_client_t * client,
+ char *var, u32 nbytes)
+{
+ void *oldheap;
+ u8 *rv = 0;
+
+ region_lock (client->db_rp, 18);
+ oldheap = svm_push_data_heap (client->db_rp);
+
+ rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var);
+
+ if (rv)
+ {
+ goto out;
+ }
+ else
+ {
+ uword *h;
+ u8 *name;
+ svmdb_shm_hdr_t *shm;
+ svmdb_value_t *newvalue;
+
+ shm = client->shm;
+ h = shm->namespaces[SVMDB_NAMESPACE_VEC];
+
+ pool_get (shm->values, newvalue);
+ memset (newvalue, 0, sizeof (*newvalue));
+ newvalue->elsize = 1;
+ vec_alloc (newvalue->value, nbytes);
+ _vec_len (newvalue->value) = nbytes;
+ name = format (0, "%s%c", var, 0);
+ hash_set_mem (h, name, newvalue - shm->values);
+ shm->namespaces[SVMDB_NAMESPACE_VEC] = h;
+ rv = newvalue->value;
+ }
+
+out:
+ svm_pop_heap (oldheap);
+ region_unlock (client->db_rp);
+ return (rv);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svmdb.h b/src/svm/svmdb.h
new file mode 100644
index 00000000..e35be8aa
--- /dev/null
+++ b/src/svm/svmdb.h
@@ -0,0 +1,135 @@
+/*
+ *------------------------------------------------------------------
+ * svmdb.h - shared VM database
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_svmdb_h__
+#define __included_svmdb_h__
+
+#include "svm.h"
+
+typedef enum
+{
+ SVMDB_ACTION_ILLEGAL = 0,
+ SVMDB_ACTION_GET, /* not clear why anyone would care */
+ SVMDB_ACTION_SET,
+ SVMDB_ACTION_UNSET,
+} svmdb_action_t;
+
+typedef struct
+{
+ int pid;
+ int signum;
+ u32 action:4;
+ u32 opaque:28;
+} svmdb_notify_t;
+
+typedef struct
+{
+ u8 *value;
+ svmdb_notify_t *notifications;
+ u32 elsize;
+} svmdb_value_t;
+
+typedef enum
+{
+ SVMDB_NAMESPACE_STRING = 0,
+ SVMDB_NAMESPACE_VEC,
+ SVMDB_N_NAMESPACES,
+} svmdb_namespace_t;
+
+typedef struct
+{
+ uword version;
+ /* pool of values */
+ svmdb_value_t *values;
+ uword *namespaces[SVMDB_N_NAMESPACES];
+} svmdb_shm_hdr_t;
+
+#define SVMDB_SHM_VERSION 2
+
+typedef struct
+{
+ int flags;
+ int pid;
+ svm_region_t *db_rp;
+ svmdb_shm_hdr_t *shm;
+} svmdb_client_t;
+
+typedef struct
+{
+ int add_del;
+ svmdb_namespace_t nspace;
+ char *var;
+ u32 elsize;
+ int signum;
+ u32 action:4;
+ u32 opaque:28;
+} svmdb_notification_args_t;
+
+typedef struct
+{
+ const char *root_path;
+ uword size;
+ u32 uid;
+ u32 gid;
+} svmdb_map_args_t;
+
+/*
+ * Must be a reasonable number, several mb smaller than
+ * SVM_GLOBAL_REGION_SIZE, or no donut for you...
+ */
+#define SVMDB_DEFAULT_SIZE (4<<20)
+
+svmdb_client_t *svmdb_map (svmdb_map_args_t *);
+
+void svmdb_unmap (svmdb_client_t * client);
+void svmdb_local_unset_string_variable (svmdb_client_t * client, char *var);
+void svmdb_local_set_string_variable (svmdb_client_t * client,
+ char *var, char *val);
+char *svmdb_local_get_string_variable (svmdb_client_t * client, char *var);
+void *svmdb_local_get_variable_reference (svmdb_client_t * client,
+ svmdb_namespace_t ns, char *var);
+
+void svmdb_local_dump_strings (svmdb_client_t * client);
+
+void svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var);
+void svmdb_local_set_vec_variable (svmdb_client_t * client,
+ char *var, void *val, u32 elsize);
+void *svmdb_local_get_vec_variable (svmdb_client_t * client, char *var,
+ u32 elsize);
+void svmdb_local_dump_vecs (svmdb_client_t * client);
+
+int svmdb_local_add_del_notification (svmdb_client_t * client,
+ svmdb_notification_args_t * args);
+
+void *svmdb_local_find_or_add_vec_variable (svmdb_client_t * client,
+ char *var, u32 nbytes);
+
+int svmdb_local_serialize_strings (svmdb_client_t * client, char *filename);
+int svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename);
+
+
+#endif /* __included_svmdb_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svmdbtool.c b/src/svm/svmdbtool.c
new file mode 100644
index 00000000..a0af15fc
--- /dev/null
+++ b/src/svm/svmdbtool.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <pwd.h>
+#include <grp.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/serialize.h>
+#include "svmdb.h"
+
+typedef struct
+{
+ svmdb_map_args_t map_args;
+ int uid, gid;
+ uword size;
+} svmdbtool_main_t;
+
+svmdbtool_main_t svmdbtool_main;
+
+static inline svmdb_map_args_t *
+map_arg_setup (char *chroot_path)
+{
+ svmdbtool_main_t *sm = &svmdbtool_main;
+ svmdb_map_args_t *ma = &sm->map_args;
+
+ memset (ma, 0, sizeof (*ma));
+ ma->root_path = chroot_path;
+ ma->size = sm->size;
+ ma->uid = sm->uid;
+ ma->gid = sm->gid;
+ return ma;
+}
+
+static void
+get_string (char *chroot_path, u8 * vbl)
+{
+ svmdb_client_t *c;
+ char *rv;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ rv = svmdb_local_get_string_variable (c, (char *) vbl);
+
+ fformat (stdout, "%s\n", rv ? rv : "UNSET");
+ vec_free (rv);
+ svmdb_unmap (c);
+}
+
+static void
+set_string (char *chroot_path, u8 * vbl, u8 * value)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+ svmdb_local_set_string_variable (c, (char *) vbl, (char *) value);
+ svmdb_unmap (c);
+}
+
+static void
+unset_string (char *chroot_path, u8 * vbl)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+ svmdb_local_unset_string_variable (c, (char *) vbl);
+ svmdb_unmap (c);
+}
+
+static void
+dump_strings (char *chroot_path)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+ svmdb_local_dump_strings (c);
+ svmdb_unmap (c);
+}
+
+static void
+serialize_strings (char *chroot_path, char *filename)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+ (void) svmdb_local_serialize_strings (c, filename);
+ svmdb_unmap (c);
+}
+
+static void
+unserialize_strings (char *chroot_path, char *filename)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+ (void) svmdb_local_unserialize_strings (c, filename);
+ svmdb_unmap (c);
+}
+
+static void
+test_vlib_vec_rate (char *chroot_path, f64 vr)
+{
+ svmdb_client_t *c;
+ f64 *tv = 0;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ vec_add1 (tv, vr);
+
+ svmdb_local_set_vec_variable (c, "vlib_vector_rate", (char *) tv,
+ sizeof (*tv));
+ svmdb_unmap (c);
+
+ vec_free (tv);
+}
+
+
+
+static void
+test_vec (char *chroot_path, u8 * vbl)
+{
+ svmdb_client_t *c;
+ u64 *tv = 0;
+ int i;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ /* my amp goes to 11 */
+ for (i = 0; i < 11; i++)
+ {
+ vec_add1 (tv, i);
+ }
+
+ svmdb_local_set_vec_variable (c, (char *) vbl, (char *) tv, sizeof (tv[0]));
+ svmdb_unmap (c);
+
+ vec_free (tv);
+}
+
+static void
+fake_install (char *chroot_path, u8 * add_value)
+{
+ svmdb_client_t *c;
+ u8 *v = 0;
+ u8 **values = 0;
+ u8 *oldvalue;
+ u8 *value;
+ int nitems = 0, i;
+ serialize_main_t m;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ oldvalue = svmdb_local_get_vec_variable (c, "installed_sw", 1);
+ if (oldvalue)
+ {
+ unserialize_open_data (&m, oldvalue, vec_len (oldvalue));
+ nitems = unserialize_likely_small_unsigned_integer (&m);
+ for (i = 0; i < nitems; i++)
+ {
+ unserialize_cstring (&m, (char **) &value);
+ vec_add1 (values, value);
+ }
+ vec_free (v);
+ }
+ nitems++;
+ value = format (0, "%s%c", add_value, 0);
+
+ vec_add1 (values, value);
+
+ fformat (stdout, "Resulting installed_sw vector:\n");
+
+ serialize_open_vector (&m, v);
+ serialize_likely_small_unsigned_integer (&m, vec_len (values));
+ for (i = 0; i < vec_len (values); i++)
+ {
+ fformat (stdout, "%s\n", values[i]);
+ serialize_cstring (&m, (char *) values[i]);
+ }
+
+ v = serialize_close_vector (&m);
+
+ svmdb_local_set_vec_variable (c, "installed_sw", v, sizeof (v[0]));
+ svmdb_unmap (c);
+
+ for (i = 0; i < vec_len (values); i++)
+ vec_free (values[i]);
+ vec_free (values);
+}
+
+static void
+sigaction_handler (int signum, siginfo_t * i, void *notused)
+{
+ u32 action, opaque;
+
+ action = (u32) (uword) i->si_ptr;
+ action >>= 28;
+ opaque = (u32) (uword) i->si_ptr;
+ opaque &= ~(0xF0000000);
+
+ clib_warning ("signal %d, action %d, opaque %x", signum, action, opaque);
+}
+
+static void
+test_reg (char *chroot_path, u8 * vbl)
+{
+ svmdb_client_t *c;
+ svmdb_notification_args_t args;
+ svmdb_notification_args_t *a = &args;
+ struct sigaction sa;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = sigaction_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction (SIGUSR2, &sa, 0) < 0)
+ {
+ clib_unix_warning ("sigaction");
+ return;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ c = svmdb_map (ma);
+
+ a->add_del = 1 /* add */ ;
+ a->nspace = SVMDB_NAMESPACE_STRING;
+ a->var = (char *) vbl;
+ a->elsize = 1;
+ a->signum = SIGUSR2;
+ a->action = SVMDB_ACTION_GET;
+ a->opaque = 0x0eadbeef;
+
+ svmdb_local_add_del_notification (c, a);
+
+ (void) svmdb_local_get_string_variable (c, (char *) vbl);
+
+ a->add_del = 0; /* del */
+ svmdb_local_add_del_notification (c, a);
+
+
+
+ svmdb_unmap (c);
+}
+
+static void
+unset_vec (char *chroot_path, u8 * vbl)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ svmdb_local_unset_vec_variable (c, (char *) vbl);
+ svmdb_unmap (c);
+}
+
+static void
+dump_vecs (char *chroot_path)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ svmdb_local_dump_vecs (c);
+ svmdb_unmap (c);
+}
+
+static void
+crash_test (char *chroot_path)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ clib_warning ("Grab region mutex and crash deliberately!");
+ c->db_rp->mutex_owner_pid = getpid ();
+ c->db_rp->mutex_owner_tag = -13;
+ pthread_mutex_lock (&c->db_rp->mutex);
+
+ abort ();
+}
+
+static void
+map_with_size (char *chroot_path, uword size)
+{
+ svmdb_client_t *c;
+ svmdb_map_args_t *ma;
+
+ svmdbtool_main.size = size;
+ ma = map_arg_setup (chroot_path);
+
+ c = svmdb_map (ma);
+
+ svmdb_unmap (c);
+}
+
+int
+main (int argc, char **argv)
+{
+ unformat_input_t input;
+ int parsed = 0;
+ u8 *vbl = 0, *value = 0;
+ char *chroot_path = 0;
+ u8 *chroot_path_u8;
+ u8 *filename;
+ uword size;
+ f64 vr;
+ int uid, gid, rv;
+ struct passwd _pw, *pw;
+ struct group _grp, *grp;
+ char *s, buf[128];
+
+ svmdbtool_main.uid = geteuid ();
+ svmdbtool_main.gid = getegid ();
+
+ unformat_init_command_line (&input, argv);
+
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "get-string %s", &vbl))
+ {
+ get_string (chroot_path, vbl);
+ vec_free (vbl);
+ parsed++;
+ }
+ else if (unformat (&input, "set-string %s %s", &vbl, &value))
+ {
+ set_string (chroot_path, vbl, value);
+ vec_free (vbl);
+ vec_free (value);
+ parsed++;
+ }
+ else if (unformat (&input, "unset-string %s", &vbl))
+ {
+ unset_string (chroot_path, vbl);
+ vec_free (vbl);
+ parsed++;
+ }
+ else if (unformat (&input, "dump-strings"))
+ {
+ dump_strings (chroot_path);
+ parsed++;
+ }
+ else if (unformat (&input, "unset-vec %s", &vbl))
+ {
+ unset_vec (chroot_path, vbl);
+ vec_free (vbl);
+ parsed++;
+ }
+ else if (unformat (&input, "dump-vecs"))
+ {
+ dump_vecs (chroot_path);
+ parsed++;
+ }
+ else if (unformat (&input, "test-vec %s", &vbl))
+ {
+ test_vec (chroot_path, vbl);
+ // vec_free(vbl);
+ parsed++;
+ }
+ else if (unformat (&input, "vlib-vec-rate %f", &vr))
+ {
+ test_vlib_vec_rate (chroot_path, vr);
+ parsed++;
+ }
+ else if (unformat (&input, "test-reg %s", &vbl))
+ {
+ test_reg (chroot_path, vbl);
+ parsed++;
+ }
+ else if (unformat (&input, "crash-test"))
+ {
+ crash_test (chroot_path);
+ }
+ else if (unformat (&input, "chroot %s", &chroot_path_u8))
+ {
+ chroot_path = (char *) chroot_path_u8;
+ }
+ else if (unformat (&input, "fake-install %s", &value))
+ {
+ fake_install (chroot_path, value);
+ parsed++;
+ }
+ else if (unformat (&input, "size %d", &size))
+ {
+ map_with_size (chroot_path, size);
+ parsed++;
+ }
+ else if (unformat (&input, "uid %d", &uid))
+ svmdbtool_main.uid = uid;
+ else if (unformat (&input, "gid %d", &gid))
+ svmdbtool_main.gid = gid;
+ else if (unformat (&input, "uid %s", &s))
+ {
+ /* lookup the username */
+ pw = NULL;
+ rv = getpwnam_r (s, &_pw, buf, sizeof (buf), &pw);
+ if (rv < 0)
+ {
+ fformat (stderr, "cannot fetch username %s", s);
+ exit (1);
+ }
+ if (pw == NULL)
+ {
+ fformat (stderr, "username %s does not exist", s);
+ exit (1);
+ }
+ vec_free (s);
+ svmdbtool_main.uid = pw->pw_uid;
+ }
+ else if (unformat (&input, "gid %s", &s))
+ {
+ /* lookup the group name */
+ grp = NULL;
+ rv = getgrnam_r (s, &_grp, buf, sizeof (buf), &grp);
+ if (rv != 0)
+ {
+ fformat (stderr, "cannot fetch group %s", s);
+ exit (1);
+ }
+ if (grp == NULL)
+ {
+ fformat (stderr, "group %s does not exist", s);
+ exit (1);
+ }
+ vec_free (s);
+ svmdbtool_main.gid = grp->gr_gid;
+ }
+ else if (unformat (&input, "serialize-strings %s", &filename))
+ {
+ vec_add1 (filename, 0);
+ serialize_strings (chroot_path, (char *) filename);
+ parsed++;
+ }
+ else if (unformat (&input, "unserialize-strings %s", &filename))
+ {
+ vec_add1 (filename, 0);
+ unserialize_strings (chroot_path, (char *) filename);
+ parsed++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ unformat_free (&input);
+
+ if (!parsed)
+ {
+ fformat (stdout, "%s: get-string <name> | set-string <name> <value>\n",
+ argv[0]);
+ fformat (stdout, " unset-string <name> | dump-strings\n");
+ fformat (stdout, " test-vec <name> |\n");
+ fformat (stdout, " unset-vec <name> | dump-vecs\n");
+ fformat (stdout, " chroot <prefix> [uid <nnn-or-userid>]\n");
+ fformat (stdout, " [gid <nnn-or-group-name>]\n");
+ }
+
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c
new file mode 100644
index 00000000..01ae4221
--- /dev/null
+++ b/src/svm/svmtool.c
@@ -0,0 +1,528 @@
+/*
+ *------------------------------------------------------------------
+ * svmtool.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#include "svm.h"
+
+
+
+/*
+ * format_all_svm_regions
+ * Maps / unmaps regions. Do NOT call from client code!
+ */
+u8 *
+format_all_svm_regions (u8 * s, va_list * args)
+{
+ int verbose = va_arg (*args, int);
+ svm_region_t *root_rp = svm_get_root_rp ();
+ svm_main_region_t *mp;
+ svm_subregion_t *subp;
+ svm_region_t *rp;
+ svm_map_region_args_t *a = 0;
+ u8 **svm_names = 0;
+ u8 *name = 0;
+ int i;
+
+ ASSERT (root_rp);
+
+ pthread_mutex_lock (&root_rp->mutex);
+
+ s = format (s, "%U", format_svm_region, root_rp, verbose);
+
+ mp = root_rp->data_base;
+
+ /*
+ * Snapshoot names, can't hold root rp mutex across
+ * find_or_create.
+ */
+ /* *INDENT-OFF* */
+ pool_foreach (subp, mp->subregions, ({
+ name = vec_dup (subp->subregion_name);
+ vec_add1(svm_names, name);
+ }));
+ /* *INDENT-ON* */
+
+ pthread_mutex_unlock (&root_rp->mutex);
+
+ for (i = 0; i < vec_len (svm_names); i++)
+ {
+ vec_validate (a, 0);
+ a->name = (char *) svm_names[i];
+ rp = svm_region_find_or_create (a);
+ if (rp)
+ {
+ pthread_mutex_lock (&rp->mutex);
+ s = format (s, "%U", format_svm_region, rp, verbose);
+ pthread_mutex_unlock (&rp->mutex);
+ svm_region_unmap (rp);
+ vec_free (svm_names[i]);
+ }
+ vec_free (a);
+ }
+ vec_free (svm_names);
+ return (s);
+}
+
+void
+show (char *chroot_path, int verbose)
+{
+ svm_map_region_args_t *a = 0;
+
+ vec_validate (a, 0);
+
+ svm_region_init_chroot (chroot_path);
+
+ fformat (stdout, "My pid is %d\n", getpid ());
+
+ fformat (stdout, "%U", format_all_svm_regions, verbose);
+
+ svm_region_exit ();
+
+ vec_free (a);
+}
+
+
+static void *
+svm_map_region_nolock (svm_map_region_args_t * a)
+{
+ int svm_fd;
+ svm_region_t *rp;
+ int deadman = 0;
+ u8 *shm_name;
+
+ ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
+
+ shm_name = shm_name_from_svm_map_region_args (a);
+
+ svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
+
+ if (svm_fd < 0)
+ {
+ perror ("svm_region_map(mmap open)");
+ return (0);
+ }
+ vec_free (shm_name);
+
+ rp = mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
+
+ if (rp == (svm_region_t *) MAP_FAILED)
+ {
+ close (svm_fd);
+ clib_warning ("mmap");
+ return (0);
+ }
+ /*
+ * We lost the footrace to create this region; make sure
+ * the winner has crossed the finish line.
+ */
+ while (rp->version == 0 && deadman++ < 5)
+ {
+ sleep (1);
+ }
+
+ /*
+ * <bleep>-ed?
+ */
+ if (rp->version == 0)
+ {
+ clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
+ munmap (rp, MMAP_PAGESIZE);
+ return (0);
+ }
+ /* Remap now that the region has been placed */
+ a->baseva = rp->virtual_base;
+ a->size = rp->virtual_size;
+ munmap (rp, MMAP_PAGESIZE);
+
+ rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, svm_fd, 0);
+ if ((uword) rp == (uword) MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ return (0);
+ }
+
+ if ((uword) rp != rp->virtual_base)
+ {
+ clib_warning ("mmap botch");
+ }
+
+ if (pthread_mutex_trylock (&rp->mutex))
+ {
+ clib_warning ("rp->mutex LOCKED by pid %d, tag %d, cleared...",
+ rp->mutex_owner_pid, rp->mutex_owner_tag);
+ memset (&rp->mutex, 0, sizeof (rp->mutex));
+
+ }
+ else
+ {
+ clib_warning ("mutex OK...\n");
+ pthread_mutex_unlock (&rp->mutex);
+ }
+
+ return ((void *) rp);
+}
+
+/*
+ * rnd_pagesize
+ * Round to a pagesize multiple, presumably 4k works
+ */
+static u64
+rnd_pagesize (u64 size)
+{
+ u64 rv;
+
+ rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
+ return (rv);
+}
+
+#define MUTEX_DEBUG
+
+always_inline void
+region_lock (svm_region_t * rp, int tag)
+{
+ pthread_mutex_lock (&rp->mutex);
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = getpid ();
+ rp->mutex_owner_tag = tag;
+#endif
+}
+
+always_inline void
+region_unlock (svm_region_t * rp)
+{
+#ifdef MUTEX_DEBUG
+ rp->mutex_owner_pid = 0;
+ rp->mutex_owner_tag = 0;
+#endif
+ pthread_mutex_unlock (&rp->mutex);
+}
+
+
+static void *
+svm_existing_region_map_nolock (void *root_arg, svm_map_region_args_t * a)
+{
+ svm_region_t *root_rp = root_arg;
+ svm_main_region_t *mp;
+ svm_region_t *rp;
+ void *oldheap;
+ uword *p;
+
+ a->size += MMAP_PAGESIZE +
+ (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
+ a->size = rnd_pagesize (a->size);
+
+ region_lock (root_rp, 4);
+ oldheap = svm_push_pvt_heap (root_rp);
+ mp = root_rp->data_base;
+
+ ASSERT (mp);
+
+ p = hash_get_mem (mp->name_hash, a->name);
+
+ if (p)
+ {
+ rp = svm_map_region_nolock (a);
+ region_unlock (root_rp);
+ svm_pop_heap (oldheap);
+ return rp;
+ }
+ return 0;
+
+}
+
+static void
+trace (char *chroot_path, char *name, int enable_disable)
+{
+ svm_map_region_args_t *a = 0;
+ svm_region_t *db_rp;
+ void *oldheap;
+
+ vec_validate (a, 0);
+
+ svm_region_init_chroot (chroot_path);
+
+ a->name = name;
+ a->size = 1 << 20;
+ a->flags = SVM_FLAGS_MHEAP;
+
+ db_rp = svm_region_find_or_create (a);
+
+ ASSERT (db_rp);
+
+ region_lock (db_rp, 20);
+
+ oldheap = svm_push_data_heap (db_rp);
+
+ mheap_trace (db_rp->data_heap, enable_disable);
+
+ svm_pop_heap (oldheap);
+ region_unlock (db_rp);
+
+ svm_region_unmap ((void *) db_rp);
+ svm_region_exit ();
+ vec_free (a);
+}
+
+
+
+static void
+subregion_repair (char *chroot_path)
+{
+ int i;
+ svm_main_region_t *mp;
+ svm_map_region_args_t a;
+ svm_region_t *root_rp;
+ svm_region_t *rp;
+ svm_subregion_t *subp;
+ u8 *name = 0;
+ u8 **svm_names = 0;
+
+ svm_region_init_chroot (chroot_path);
+ root_rp = svm_get_root_rp ();
+
+ pthread_mutex_lock (&root_rp->mutex);
+
+ mp = root_rp->data_base;
+
+ /*
+ * Snapshoot names, can't hold root rp mutex across
+ * find_or_create.
+ */
+ /* *INDENT-OFF* */
+ pool_foreach (subp, mp->subregions, ({
+ name = vec_dup (subp->subregion_name);
+ vec_add1(svm_names, name);
+ }));
+ /* *INDENT-ON* */
+
+ pthread_mutex_unlock (&root_rp->mutex);
+
+ for (i = 0; i < vec_len (svm_names); i++)
+ {
+ memset (&a, 0, sizeof (a));
+ a.root_path = chroot_path;
+ a.name = (char *) svm_names[i];
+ fformat (stdout, "Checking %s region...\n", a.name);
+ rp = svm_existing_region_map_nolock (root_rp, &a);
+ if (rp)
+ {
+ svm_region_unmap (rp);
+ vec_free (svm_names[i]);
+ }
+ }
+ vec_free (svm_names);
+}
+
+void
+repair (char *chroot_path, int crash_root_region)
+{
+ svm_region_t *root_rp = 0;
+ svm_map_region_args_t *a = 0;
+ void *svm_map_region (svm_map_region_args_t * a);
+ int svm_fd;
+ u8 *shm_name;
+
+ fformat (stdout, "our pid: %d\n", getpid ());
+
+ vec_validate (a, 0);
+
+ a->root_path = chroot_path;
+ a->name = SVM_GLOBAL_REGION_NAME;
+ a->baseva = SVM_GLOBAL_REGION_BASEVA;
+ a->size = SVM_GLOBAL_REGION_SIZE;
+ a->flags = SVM_FLAGS_NODATA;
+
+ shm_name = shm_name_from_svm_map_region_args (a);
+
+ svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
+
+ if (svm_fd < 0)
+ {
+ perror ("svm_region_map(mmap open)");
+ goto out;
+ }
+
+ vec_free (shm_name);
+
+ root_rp = mmap (0, MMAP_PAGESIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
+
+ if (root_rp == (svm_region_t *) MAP_FAILED)
+ {
+ close (svm_fd);
+ clib_warning ("mmap");
+ goto out;
+ }
+
+ /* Remap now that the region has been placed */
+ clib_warning ("remap to 0x%x", root_rp->virtual_base);
+
+ a->baseva = root_rp->virtual_base;
+ a->size = root_rp->virtual_size;
+ munmap (root_rp, MMAP_PAGESIZE);
+
+ root_rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, svm_fd, 0);
+ if ((uword) root_rp == (uword) MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ goto out;
+ }
+
+ close (svm_fd);
+
+ if ((uword) root_rp != root_rp->virtual_base)
+ {
+ clib_warning ("mmap botch");
+ goto out;
+ }
+
+ if (pthread_mutex_trylock (&root_rp->mutex))
+ {
+ clib_warning ("root_rp->mutex LOCKED by pid %d, tag %d, cleared...",
+ root_rp->mutex_owner_pid, root_rp->mutex_owner_tag);
+ memset (&root_rp->mutex, 0, sizeof (root_rp->mutex));
+ goto out;
+ }
+ else
+ {
+ clib_warning ("root_rp->mutex OK...\n");
+ pthread_mutex_unlock (&root_rp->mutex);
+ }
+
+out:
+ vec_free (a);
+ /*
+ * Now that the root region is known to be OK,
+ * fix broken subregions
+ */
+ subregion_repair (chroot_path);
+
+ if (crash_root_region)
+ {
+ clib_warning ("Leaving root region locked on purpose...");
+ pthread_mutex_lock (&root_rp->mutex);
+ root_rp->mutex_owner_pid = getpid ();
+ root_rp->mutex_owner_tag = 99;
+ }
+ svm_region_exit ();
+}
+
+int
+main (int argc, char **argv)
+{
+ unformat_input_t input;
+ int parsed = 0;
+ char *name;
+ char *chroot_path = 0;
+ u8 *chroot_u8;
+
+ unformat_init_command_line (&input, argv);
+
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "show-verbose"))
+ {
+ show (chroot_path, 1);
+ parsed++;
+ }
+ else if (unformat (&input, "show"))
+ {
+ show (chroot_path, 0);
+ parsed++;
+ }
+ else if (unformat (&input, "client-scan"))
+ {
+ svm_client_scan (chroot_path);
+ parsed++;
+ }
+ else if (unformat (&input, "repair"))
+ {
+ repair (chroot_path, 0 /* fix it */ );
+ parsed++;
+ }
+ else if (unformat (&input, "crash"))
+ {
+ repair (chroot_path, 1 /* crash it */ );
+ parsed++;
+ }
+ else if (unformat (&input, "trace-on %s", &name))
+ {
+ trace (chroot_path, name, 1);
+ parsed++;
+ }
+ else if (unformat (&input, "trace-off %s", &name))
+ {
+ trace (chroot_path, name, 0);
+ parsed++;
+ }
+ else if (unformat (&input, "chroot %s", &chroot_u8))
+ {
+ chroot_path = (char *) chroot_u8;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ unformat_free (&input);
+
+ if (!parsed)
+ {
+ fformat (stdout,
+ "%s: show | show-verbose | client-scan | trace-on <region-name>\n",
+ argv[0]);
+ fformat (stdout, " trace-off <region-name>\n");
+ }
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c
new file mode 100644
index 00000000..3bdca949
--- /dev/null
+++ b/src/svm/test_svm_fifo1.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "svm_fifo_segment.h"
+
+clib_error_t *
+hello_world (int verbose)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *sp;
+ svm_fifo_t *f;
+ int rv;
+ u8 *test_data;
+ u8 *retrieved_data = 0;
+ clib_error_t *error = 0;
+
+ memset (a, 0, sizeof (*a));
+
+ a->segment_name = "fifo-test1";
+ a->segment_size = 256 << 10;
+ a->rx_fifo_size = 4096;
+ a->tx_fifo_size = 4096;
+ a->preallocated_fifo_pairs = 4;
+
+ rv = svm_fifo_segment_create (a);
+
+ if (rv)
+ return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+ sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+
+ f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
+
+ if (f == 0)
+ return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+ test_data = format (0, "Hello world%c", 0);
+ vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+ while (svm_fifo_max_enqueue (f) >= vec_len (test_data))
+ svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data);
+
+ while (svm_fifo_max_dequeue (f) >= vec_len (test_data))
+ svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data);
+
+ while (svm_fifo_max_enqueue (f) >= vec_len (test_data))
+ svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data);
+
+ while (svm_fifo_max_dequeue (f) >= vec_len (test_data))
+ svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data);
+
+ if (!memcmp (retrieved_data, test_data, vec_len (test_data)))
+ error = clib_error_return (0, "data test OK, got '%s'", retrieved_data);
+ else
+ error = clib_error_return (0, "data test FAIL!");
+
+ svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
+
+ return error;
+}
+
+clib_error_t *
+master (int verbose)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *sp;
+ svm_fifo_t *f;
+ int rv;
+ u8 *test_data;
+ u8 *retrieved_data = 0;
+ int i;
+
+ memset (a, 0, sizeof (*a));
+
+ a->segment_name = "fifo-test1";
+ a->segment_size = 256 << 10;
+
+ rv = svm_fifo_segment_create (a);
+
+ if (rv)
+ return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+ sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+
+ f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
+
+ if (f == 0)
+ return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+ test_data = format (0, "Hello world%c", 0);
+ vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+ for (i = 0; i < 1000; i++)
+ svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data);
+
+ return clib_error_return (0, "master (enqueue) done");
+}
+
+clib_error_t *
+mempig (int verbose)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *sp;
+ svm_fifo_t *f;
+ svm_fifo_t **flist = 0;
+ int rv;
+ int i;
+
+ memset (a, 0, sizeof (*a));
+
+ a->segment_name = "fifo-test1";
+ a->segment_size = 256 << 10;
+
+ rv = svm_fifo_segment_create (a);
+
+ if (rv)
+ return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+ sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+
+ for (i = 0; i < 1000; i++)
+ {
+ f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
+ if (f == 0)
+ break;
+ vec_add1 (flist, f);
+ }
+
+ fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist));
+ for (i = 0; i < vec_len (flist); i++)
+ {
+ f = flist[i];
+ svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
+ }
+
+ _vec_len (flist) = 0;
+
+ for (i = 0; i < 1000; i++)
+ {
+ f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
+ if (f == 0)
+ break;
+ vec_add1 (flist, f);
+ }
+
+ fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist));
+ for (i = 0; i < vec_len (flist); i++)
+ {
+ f = flist[i];
+ svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+offset (int verbose)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *sp;
+ svm_fifo_t *f;
+ int rv;
+ u32 *test_data = 0;
+ u32 *recovered_data = 0;
+ int i;
+
+ memset (a, 0, sizeof (*a));
+
+ a->segment_name = "fifo-test1";
+ a->segment_size = 256 << 10;
+
+ rv = svm_fifo_segment_create (a);
+
+ if (rv)
+ return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+ sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+
+ f = svm_fifo_segment_alloc_fifo (sp, 200 << 10, FIFO_SEGMENT_RX_FREELIST);
+
+ if (f == 0)
+ return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+ for (i = 0; i < (3 * 1024); i++)
+ vec_add1 (test_data, i);
+
+ /* Enqueue the first 1024 u32's */
+ svm_fifo_enqueue_nowait (f, 4096 /* bytes to enqueue */ ,
+ (u8 *) test_data);
+
+ /* Enqueue the third 1024 u32's 2048 ahead of the current tail */
+ svm_fifo_enqueue_with_offset (f, 4096, 4096, (u8 *) & test_data[2048]);
+
+ /* Enqueue the second 1024 u32's at the current tail */
+ svm_fifo_enqueue_nowait (f, 4096 /* bytes to enqueue */ ,
+ (u8 *) & test_data[1024]);
+
+ vec_validate (recovered_data, (3 * 1024) - 1);
+
+ svm_fifo_dequeue_nowait (f, 3 * 4096, (u8 *) recovered_data);
+
+ for (i = 0; i < (3 * 1024); i++)
+ {
+ if (recovered_data[i] != test_data[i])
+ {
+ clib_warning ("[%d] expected %d recovered %d", i,
+ test_data[i], recovered_data[i]);
+ return clib_error_return (0, "offset test FAILED");
+ }
+ }
+
+ return clib_error_return (0, "offset test OK");
+}
+
+clib_error_t *
+slave (int verbose)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *sp;
+ svm_fifo_t *f;
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_header_t *fsh;
+ int rv;
+ u8 *test_data;
+ u8 *retrieved_data = 0;
+ int i;
+
+ memset (a, 0, sizeof (*a));
+
+ a->segment_name = "fifo-test1";
+
+ rv = svm_fifo_segment_attach (a);
+
+ if (rv)
+ return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv);
+
+ sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]);
+ sh = sp->ssvm.sh;
+ fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+ /* might wanna wait.. */
+ f = fsh->fifos;
+
+ /* Lazy bastards united */
+ test_data = format (0, "Hello world%c", 0);
+ vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+ for (i = 0; i < 1000; i++)
+ {
+ svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data);
+ if (memcmp (retrieved_data, test_data, vec_len (retrieved_data)))
+ return clib_error_return (0, "retrieved data incorrect, '%s'",
+ retrieved_data);
+ }
+
+ return clib_error_return (0, "slave (dequeue) done");
+}
+
+
+int
+test_ssvm_fifo1 (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ int verbose = 0;
+ int test_id = 0;
+
+ svm_fifo_segment_init (0x200000000ULL, 20);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "master"))
+ test_id = 1;
+ else if (unformat (input, "slave"))
+ test_id = 2;
+ else if (unformat (input, "mempig"))
+ test_id = 3;
+ else if (unformat (input, "offset"))
+ test_id = 4;
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ switch (test_id)
+ {
+ case 0:
+ error = hello_world (verbose);
+ break;
+
+ case 1:
+ error = master (verbose);
+ break;
+
+ case 2:
+ error = slave (verbose);
+ break;
+
+ case 3:
+ error = mempig (verbose);
+ break;
+
+ case 4:
+ error = offset (verbose);
+ break;
+
+ default:
+ error = clib_error_return (0, "test id %d unknown", test_id);
+ break;
+ }
+
+out:
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ unformat_init_command_line (&i, argv);
+ r = test_ssvm_fifo1 (&i);
+ unformat_free (&i);
+ return r;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/tests/vnet/README b/src/tests/vnet/README
new file mode 100644
index 00000000..10579e50
--- /dev/null
+++ b/src/tests/vnet/README
@@ -0,0 +1,10 @@
+Unit test infrastructure for vnet
+
+To run unit tests do the following:
+
+ 1. build vpp with 'vpp_enable_tests = yes' in build-data/platforms/vpp.mk
+
+ 2. go to build-root/build-$tag-$arch/vnet
+
+ 3. run
+ $ make check
diff --git a/src/tests/vnet/lisp-cp/test_cp_serdes.c b/src/tests/vnet/lisp-cp/test_cp_serdes.c
new file mode 100644
index 00000000..8e8c8455
--- /dev/null
+++ b/src/tests/vnet/lisp-cp/test_cp_serdes.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+#include <vnet/lisp-cp/lisp_cp_messages.h>
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-cp/lisp_msg_serdes.h>
+#include <vlibapi/api.h>
+#include <vnet/lisp-cp/packets.h>
+
+#define _assert(e) \
+ error = CLIB_ERROR_ASSERT (e); \
+ if (error) \
+ goto done;
+
+static void print_chunk(u8 * b, int * offset, int c, char * des)
+{
+ int i, n = offset[0] + c;;
+ for (i = offset[0]; i < n; i++)
+ {
+ printf("0x%02x, ", b[i]);
+ }
+ printf(" // %s\n", des);
+ *offset += c;
+}
+
+void print_map_request(map_request_hdr_t * h)
+{
+#define pchunk(_count, _desc) \
+ print_chunk((u8 *)h, &offset, _count, _desc)
+
+ int offset = 0;
+
+ pchunk(4, "data");
+ pchunk(8, "Nonce");
+ pchunk(2, "Source-EID-AFI");
+ pchunk(4, "Source EID Address");
+ pchunk(2, "ITR-RLOC-AFI 1");
+ pchunk(4, "ITR-RLOC Address 1");
+ pchunk(2, "ITR-RLOC-AFI 2");
+ pchunk(16, "ITR-RLOC Address 2");
+ pchunk(1, "REC: reserved");
+ pchunk(1, "REC: EID mask-len");
+ pchunk(2, "REC: EID-prefix-AFI");
+ pchunk(4, "REC: EID-prefix");
+ printf("\n");
+}
+
+static clib_error_t * test_lisp_msg_push_ecm ()
+{
+ vlib_main_t * vm = vlib_get_main ();
+ clib_error_t * error = 0;
+ gid_address_t la, ra;
+ vlib_buffer_t * b = 0;
+ u32 buff_len = 900;
+ int lp = 0x15, rp = 0x14;
+
+ b = clib_mem_alloc (buff_len);
+ memset((u8 *)b, 0, buff_len);
+ b->current_length = buff_len;
+ b->current_data = sizeof(udp_header_t) + sizeof(ip4_header_t) +
+ sizeof(ecm_hdr_t) + 1;
+
+ la.type = GID_ADDR_IP_PREFIX;
+ la.ippref.addr.ip.v4.as_u32 = 0xa1b2c3d4;
+ la.ippref.addr.version = IP4;
+
+ ra.type = GID_ADDR_IP_PREFIX;
+ ra.ippref.addr.ip.v4.as_u32 = 0x90817263;
+ ra.ippref.addr.version = IP4;
+
+ ecm_hdr_t * lh = lisp_msg_push_ecm (vm, b, lp, rp, &la, &ra);
+
+ u8 expected_ecm_hdr[] = {
+ 0x80, 0x00, 0x00, 0x00
+ };
+ _assert(0 == memcmp(expected_ecm_hdr, lh, sizeof(expected_ecm_hdr)));
+
+ ip4_header_t * ih = (ip4_header_t *) (lh + 1);
+ /* clear ip checksum */
+ memset((u8 *)ih + 10, 0, 2);
+
+ u8 expected_ip4_hdr[] = {
+ 0x45, /* version; IHL */
+ 0x00, /* services */
+ 0x03, 0xa0, /* total length */
+ 0x00, 0x00, /* identification */
+ 0x40, 0x00, /* flags; fragment offset*/
+ 0xff, /* TTL */
+ 0x11, /* protocol */
+ 0x00, 0x00, /* header checksum */
+ 0xd4, 0xc3, 0xb2, 0xa1, /* src IP */
+ 0x63, 0x72, 0x81, 0x90, /* dst IP */
+ };
+ _assert(0 == memcmp(ih, expected_ip4_hdr, sizeof(expected_ip4_hdr)));
+
+ udp_header_t * uh = (udp_header_t *) (ih + 1);
+ /* clear udp checksum */
+ memset((u8 *)uh + 6, 0, 2);
+
+ u8 expected_udp_hdr[] = {
+ 0x00, 0x15, /* src port */
+ 0x00, 0x14, /* dst port */
+ 0x03, 0x8c, /* length */
+ 0x00, 0x00, /* checksum */
+ };
+ _assert(0 == memcmp(uh, expected_udp_hdr, sizeof(expected_udp_hdr)));
+
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t * test_lisp_msg_parse_mapping_record ()
+{
+ clib_error_t * error = 0;
+ locator_t probed;
+ locator_t * locs = 0;
+ vlib_buffer_t * b = 0;
+ gid_address_t eid;
+ u32 buff_len = 500;
+
+ b = clib_mem_alloc (buff_len);
+ memset((u8 *)b, 0, buff_len);
+
+ u8 map_reply_records[] = {
+ /* 1. record */
+ 0x01, 0x02, 0x03, 0x04, /* record TTL */
+ 0x01, /* locator count */
+ 0x00, 0x00, 0x00, /* eid-mask-len; ... */
+ 0x00, 0x00, /* reserved; map-version num */
+ 0x00, 0x01, /* EID-Prefix-AFI */
+ 0x33, 0x44, 0x55, 0x66, /* eid-prefix */
+ /* loc */
+ 0x0a, /* prority */
+ 0x0b, /* weight */
+ 0x0c, /* m-prority */
+ 0x0d, /* m-weight */
+ 0x00, 0x00, /* unused flags */
+ 0x00, 0x01, /* Loc-AFI */
+ 0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
+ };
+ b->current_length = buff_len;
+ clib_memcpy(b->data, map_reply_records, sizeof(map_reply_records));
+
+ lisp_msg_parse_mapping_record (b, &eid, &locs, &probed);
+ _assert(vec_len (locs) == 1);
+ _assert(eid.ippref.addr.ip.v4.as_u32 == 0x66554433);
+ _assert(locs[0].local == 0);
+ _assert(locs[0].address.ippref.addr.ip.v4.as_u32 == 0xddccbbaa);
+ _assert(locs[0].address.type == GID_ADDR_IP_PREFIX);
+ _assert(locs[0].priority == 0xa);
+ _assert(locs[0].weight == 0xb);
+ _assert(locs[0].mpriority == 0xc);
+ _assert(locs[0].mweight == 0xd);
+
+done:
+ clib_mem_free (b);
+ if (locs)
+ vec_free (locs);
+ return error;
+}
+
+static map_request_hdr_t *
+build_map_request (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * rlocs)
+{
+ gid_address_t _seid, * seid = &_seid;
+ gid_address_t _deid, * deid = &_deid;
+ u8 is_smr_invoked = 1;
+ u8 rloc_probe_set = 0;
+ u64 nonce = 0;
+ map_request_hdr_t * h = 0;
+ memset (deid, 0, sizeof (deid[0]));
+ memset (seid, 0, sizeof (seid[0]));
+
+ gid_address_type (seid) = GID_ADDR_IP_PREFIX;
+ ip_address_t * ip_addr = &gid_address_ip (seid);
+ ip_addr_v4 (ip_addr).as_u32 = 0x12345678;
+ seid->ippref.addr.version = IP4;
+
+ gid_address_type (deid) = GID_ADDR_IP_PREFIX;
+ ip_address_t * ip_addr2 = &gid_address_ip (deid);
+ ip_addr_v4 (ip_addr2).as_u32 = 0x9abcdef0;
+ deid->ippref.addr.version = IP4;
+ gid_address_ippref_len (deid) = 24;
+
+ h = lisp_msg_put_mreq (lcm, b, seid, deid, rlocs,
+ is_smr_invoked, rloc_probe_set, &nonce);
+ vec_free(rlocs);
+ return h;
+}
+
+static void
+generate_rlocs (gid_address_t **rlocs, u32 * count)
+{
+ gid_address_t gid_addr_data, * gid_addr = &gid_addr_data;
+ memset (gid_addr, 0, sizeof (gid_addr[0]));
+ ip_address_t * addr = &gid_address_ip (gid_addr);
+
+ gid_address_type (gid_addr) = GID_ADDR_IP_PREFIX;
+
+ ip_addr_version (addr) = IP4;
+ ip_addr_v4 (addr).data_u32 = 0x10203040;
+ vec_add1 (rlocs[0], gid_addr[0]);
+
+ ip_addr_v6 (addr).as_u32[0] = 0xffeeddcc;
+ ip_addr_v6 (addr).as_u32[1] = 0xbbaa9988;
+ ip_addr_v6 (addr).as_u32[2] = 0x77665544;
+ ip_addr_v6 (addr).as_u32[3] = 0x33221100;
+ ip_addr_version (addr) = IP6;
+ vec_add1 (rlocs[0], gid_addr[0]);
+}
+
+static clib_error_t * test_lisp_msg_parse ()
+{
+ gid_address_t eid;
+ lisp_cp_main_t * lcm = vnet_lisp_cp_get_main();
+ map_request_hdr_t *h;
+ gid_address_t gid;
+ clib_error_t * error = 0;
+ vlib_buffer_t * b;
+ gid_address_t * rlocs_decode = 0, * rlocs = 0;
+ u32 rloc_count_parse = 0;
+
+ u8 * data = clib_mem_alloc(500);
+ memset(data, 0, 500);
+ b = (vlib_buffer_t *) data;
+
+ generate_rlocs (&rlocs_decode, &rloc_count_parse);
+ h = build_map_request (lcm, b, rlocs_decode);
+
+ vlib_buffer_pull(b, sizeof(*h));
+ u32 len = lisp_msg_parse_addr(b, &gid);
+ _assert (len == 2 + 4
+ /* Source-EID-AFI field lenght + IPv4 address length */);
+ _assert (gid.ippref.addr.ip.v4.as_u32 == 0x12345678);
+ _assert (gid.ippref.addr.version == IP4);
+
+ u8 rloc_count = MREQ_ITR_RLOC_COUNT(h) + 1;
+ lisp_msg_parse_itr_rlocs (b, &rlocs, rloc_count);
+
+ _assert (vec_len (rlocs) == 2);
+ _assert (rlocs[0].ippref.addr.ip.v4.as_u32 == 0x10203040);
+ _assert (rlocs[0].ippref.addr.version == IP4);
+
+ _assert (rlocs[1].ippref.addr.ip.v6.as_u32[0] == 0xffeeddcc);
+ _assert (rlocs[1].ippref.addr.ip.v6.as_u32[1] == 0xbbaa9988);
+ _assert (rlocs[1].ippref.addr.ip.v6.as_u32[2] == 0x77665544);
+ _assert (rlocs[1].ippref.addr.ip.v6.as_u32[3] == 0x33221100);
+ _assert (rlocs[1].ippref.addr.version == IP6);
+
+ lisp_msg_parse_eid_rec (b, &eid);
+ _assert (eid.ippref.addr.ip.v4.as_u32 == 0x9abcdef0);
+ _assert (eid.ippref.addr.version == IP4);
+ _assert (eid.ippref.len == 24);
+
+done:
+ clib_mem_free (data);
+ if (rlocs)
+ vec_free (rlocs);
+ return error;
+}
+
+static clib_error_t * test_lisp_msg_put_mreq_with_lcaf ()
+{
+ lisp_cp_main_t * lcm = vnet_lisp_cp_get_main ();
+ clib_error_t * error = 0;
+ map_request_hdr_t *h = 0;
+ gid_address_t * rlocs = 0;
+
+ ip_prefix_t ippref;
+ ip_prefix_version (&ippref) = IP4;
+ ip4_address_t * ip = &ip_prefix_v4 (&ippref);
+ ip->as_u32 = 0x11223344;
+
+ gid_address_t g =
+ {
+ .type = GID_ADDR_IP_PREFIX,
+ .ippref = ippref,
+ .vni = 0x90919293,
+ .vni_mask = 0x17
+ };
+ vec_add1 (rlocs, g);
+
+ u8 * data = clib_mem_alloc (500);
+ memset (data, 0, 500);
+
+ h = build_map_request (lcm, (vlib_buffer_t *) data, rlocs);
+
+ /* clear Nonce to simplify comparison */
+ memset ((u8 *)h + 4, 0, 8);
+
+ u8 expected_data[] =
+ {
+ 0x10, 0x40, 0x00, 0x01, /* type; flags; IRC; REC count */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /* nonce */
+ 0x00, 0x01, /* Source-EID-AFI */
+ 0x78, 0x56, 0x34, 0x12, /* Source EID Address */
+
+ /* RLOCs */
+ 0x40, 0x03, /* AFI = LCAF*/
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x17, /* IID mask-len */
+ 0x00, 0x0a, /* lenght */
+ 0x90, 0x91, 0x92, 0x93, /* IID / VNI */
+
+ 0x00, 0x01, /* AFI = ipv4 */
+ 0x44, 0x33, 0x22, 0x11, /* ITR-RLOC Address 1 */
+
+ /* record */
+ 0x00, /* reserved */
+ 0x18, /* EID mask-len */
+ 0x00, 0x01, /* EID-prefix-AFI */
+ 0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
+ };
+
+ _assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
+done:
+ clib_mem_free (data);
+ return error;
+}
+
+static clib_error_t * test_lisp_msg_put_mreq ()
+{
+ lisp_cp_main_t * lcm = vnet_lisp_cp_get_main();
+ clib_error_t * error = 0;
+ map_request_hdr_t *h;
+ gid_address_t * rlocs = 0;
+ u32 rloc_count = 0;
+
+ u8 * data = clib_mem_alloc(500);
+ memset(data, 0, 500);
+
+ generate_rlocs (&rlocs, &rloc_count);
+ h = build_map_request (lcm, (vlib_buffer_t *) data, rlocs);
+
+ /* clear Nonce to simplify comparison */
+ memset((u8 *)h + 4, 0, 8);
+
+ print_map_request(h);
+
+ u8 expected_data[50] = {
+ 0x10, 0x40, 0x01, 0x01, /* type; flags; IRC; REC count */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /* nonce */
+ 0x00, 0x01, /* Source-EID-AFI */
+ 0x78, 0x56, 0x34, 0x12, /* Source EID Address */
+
+ /* RLOCs */
+ 0x00, 0x01, /* ITR-RLOC-AFI 1 */
+ 0x40, 0x30, 0x20, 0x10, /* ITR-RLOC Address 1 */
+ 0x00, 0x02, /* ITR-RLOC-AFI 2 */
+ 0xcc, 0xdd, 0xee, 0xff,
+ 0x88, 0x99, 0xaa, 0xbb,
+ 0x44, 0x55, 0x66, 0x77,
+ 0x00, 0x11, 0x22, 0x33, /* ITR-RLOC Address 2 */
+
+ /* record */
+ 0x00, /* reserved */
+ 0x18, /* EID mask-len */
+ 0x00, 0x01, /* EID-prefix-AFI */
+ 0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
+ };
+ _assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
+
+done:
+ clib_mem_free (data);
+ return error;
+}
+
+/* generate a vector of eid records */
+static mapping_t *
+build_test_map_records ()
+{
+ mapping_t * records = 0;
+
+ mapping_t r = {
+ .ttl = MAP_REGISTER_DEFAULT_TTL,
+ .eid = {
+ .type = GID_ADDR_MAC,
+ .mac = {1, 2, 3, 4, 5, 6},
+ .vni = 0x0
+ }
+ };
+
+ locator_t loc = {
+ .weight = 1,
+ .priority = 2,
+ .local = 1,
+ .address = {
+ .type = GID_ADDR_IP_PREFIX,
+ .ippref = {
+ .addr = {
+ .ip.v4.as_u32 = 0x99887766,
+ .version = IP4
+ }
+ }
+ }
+ };
+ vec_add1 (r.locators, loc);
+ vec_add1 (records, r);
+
+ return records;
+}
+
+static void
+free_test_map_records (mapping_t * maps)
+{
+ mapping_t * map;
+ vec_foreach (map, maps)
+ {
+ vec_free (map->locators);
+ }
+ vec_free (maps);
+}
+
+static clib_error_t *
+test_lisp_map_register ()
+{
+ vlib_buffer_t *b;
+ clib_error_t * error = 0;
+ u64 nonce;
+ u32 msg_len = 0;
+ mapping_t * records = build_test_map_records ();
+
+ u8 * data = clib_mem_alloc(500);
+ memset(data, 0, 500);
+ b = (vlib_buffer_t *) data;
+
+ lisp_msg_put_map_register (b, records, 1 /* want map notify */,
+ 20 /* length of HMAC_SHA_1_96 */,
+ &nonce, &msg_len);
+ free_test_map_records (records);
+
+ /* clear Nonce to simplify comparison */
+ memset((u8 *)b->data + 4, 0, 8);
+
+ /* clear authentication data */
+ memset ((u8 *)b->data + 16, 0, 20);
+
+ u8 expected_data[] = {
+ 0x30, 0x00, 0x01, 0x01, /* type; rsvd; want notify; REC count */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /* nonce */
+ 0x00, 0x00, 0x00, 0x00, /* key id, auth data length:
+ both are zeroes because those are set in another
+ function (see auth_data_len_by_key_id())*/
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /* auth data */
+
+ /* first record */
+ 0x00, 0x00, 0x03, 0x84, /* default ttl (15 minues) */
+ 0x01, 0x00, 0x00, 0x00, /* loc count, eid len, ACT, A */
+ 0x00, 0x00, 0x40, 0x05, /* rsvd, map ver num, AFI = MAC */
+ 0x01, 0x02, 0x03, 0x04,
+ 0x05, 0x06, /* MAC EID */
+
+ /* locator 1 */
+ 0x02, 0x01, 0x00, 0x00, /* prio, weight, mprio, mweight */
+ 0x00, 0x04, 0x00, 0x01, /* flags, AFI = ipv4 */
+ 0x66, 0x77, 0x88, 0x99, /* ipv4 locator address */
+ };
+ _assert (0 == memcmp (expected_data, b->data, sizeof (expected_data)));
+done:
+ clib_mem_free (data);
+ return error;
+}
+
+static vlib_buffer_t *
+create_buffer (u8 * data, u32 data_len)
+{
+ vlib_buffer_t *b;
+
+ u8 *buf_data = clib_mem_alloc(500);
+ memset (buf_data, 0, 500);
+ b = (vlib_buffer_t *)buf_data;
+
+ u8 * p = vlib_buffer_put_uninit (b, data_len);
+ clib_memcpy (p, data, data_len);
+
+ return b;
+}
+
+static clib_error_t *
+test_lisp_parse_map_reply ()
+{
+ clib_error_t * error = 0;
+ u8 map_reply_data[] =
+ {
+ 0x00, 0x00, 0x00, 0x01, /* type; rsvd; mapping count */
+ 0x00, 0x00, 0x00, 0x00,
+ };
+ vlib_buffer_t *b = create_buffer (map_reply_data, sizeof (map_reply_data));
+ map_records_arg_t *mrecs = parse_map_reply (b);
+ _assert (0 == mrecs);
+ clib_mem_free (b);
+
+ u8 map_reply_data2[] =
+ {
+ 0x00, 0x00, 0x00, 0x01, /* type; rsvd */
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /* nonce */
+
+ /* 1. record - incomplete */
+ 0x01, 0x02, 0x03, 0x04, /* record TTL */
+ 0x01, /* locator count */
+ };
+ b = create_buffer (map_reply_data2, sizeof (map_reply_data2));
+ mrecs = parse_map_reply (b);
+ _assert (0 == mrecs);
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t *
+test_lisp_parse_lcaf ()
+{
+ int i;
+ clib_error_t * error = 0;
+ gid_address_t eid;
+ locator_t * locs = 0;
+ locator_t probed;
+ vlib_buffer_t * b = 0;
+ u32 buff_len = 500;
+
+ b = clib_mem_alloc (buff_len);
+ memset ((u8 *)b, 0, buff_len);
+
+ u8 map_reply_records[] =
+ {
+ /* 1. record */
+ 0x01, 0x02, 0x03, 0x04, /* record TTL */
+ 0x03, /* locator count */
+ 0x00, 0x00, 0x00, /* eid-mask-len; ... */
+ 0x00, 0x00, /* reserved; map-version num */
+ 0x00, 0x01, /* EID-Prefix-AFI */
+ 0x33, 0x44, 0x55, 0x66, /* eid-prefix */
+
+ /* 1st locator */
+ 0x0a, /* prority */
+ 0x0b, /* weight */
+ 0x0c, /* m-prority */
+ 0x0d, /* m-weight */
+ 0x00, 0x00, /* unused flags */
+ 0x40, 0x03, /* Loc-AFI = LCAF*/
+
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x18, /* IID mask-len */
+ 0x00, 0x0a, /* lenght */
+ /* LCAF Instance ID */
+ 0x00, 0x00, 0x00, 0x09, /* iid */
+ 0x00, 0x01, /* AFI = ipv4 */
+ 0x10, 0xbb, 0xcc, 0xdd, /* ipv4 loator address */
+
+ /* 2nd locator */
+ 0x07, /* prority */
+ 0x06, /* weight */
+ 0x05, /* m-prority */
+ 0x04, /* m-weight */
+ 0x00, 0x00, /* unused flags */
+ 0x40, 0x03, /* Loc-AFI = LCAF*/
+
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x18, /* IID mask-len */
+ 0x00, 0x16, /* iid length + next AFI lenght */
+ /* LCAF Instance ID */
+ 0x22, 0x44, 0x66, 0x88, /* iid */
+ 0x00, 0x02, /* AFI = ipv6 */
+ 0xcc, 0xdd, 0xee, 0xff,
+ 0x88, 0x99, 0xaa, 0xbb,
+ 0x44, 0x55, 0x66, 0x77,
+ 0x00, 0x11, 0x22, 0x33, /* ipv6 locator address */
+
+ /* 3rd locator */
+ 0x0a, /* prority */
+ 0x0b, /* weight */
+ 0x0c, /* m-prority */
+ 0x0d, /* m-weight */
+ 0x00, 0x00, /* unused flags */
+ 0x00, 0x01, /* Loc-AFI */
+ 0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
+ };
+
+ b->current_length = buff_len;
+ memcpy (b->data, map_reply_records, sizeof (map_reply_records));
+
+ lisp_msg_parse_mapping_record (b, &eid, &locs, &probed);
+ _assert (vec_len (locs) == 3);
+ _assert (eid.ippref.addr.ip.v4.as_u32 == 0x66554433);
+
+ /* check 1st locator - an LCAF with ipv4 */
+ _assert (locs[0].local == 0);
+ _assert (locs[0].priority == 0xa);
+ _assert (locs[0].weight == 0xb);
+ _assert (locs[0].mpriority == 0xc);
+ _assert (locs[0].mweight == 0xd);
+
+ _assert (gid_address_type (&locs[0].address) == GID_ADDR_IP_PREFIX);
+ _assert (gid_address_vni (&locs[0].address) == 0x09);
+ ip_prefix_t * ip_pref = &gid_address_ippref (&locs[0].address);
+ _assert (IP4 == ip_prefix_version (ip_pref));
+
+ /* 2nd locator - LCAF entry with ipv6 address */
+ _assert (locs[1].local == 0);
+ _assert (locs[1].priority == 0x7);
+ _assert (locs[1].weight == 0x6);
+ _assert (locs[1].mpriority == 0x5);
+ _assert (locs[1].mweight == 0x4);
+
+ _assert (gid_address_type (&locs[1].address) == GID_ADDR_IP_PREFIX);
+ _assert (0x22446688 == gid_address_vni (&locs[1].address));
+ ip_pref = &gid_address_ippref (&locs[1].address);
+ _assert (IP6 == ip_prefix_version (ip_pref));
+
+ /* 3rd locator - simple ipv4 address */
+ _assert (gid_address_type (&locs[2].address) == GID_ADDR_IP_PREFIX);
+done:
+ clib_mem_free (b);
+
+ for (i = 0; i < 3; i++)
+ locator_free (&locs[i]);
+ vec_free (locs);
+ return error;
+}
+
+#define foreach_test_case \
+ _(lisp_msg_put_mreq) \
+ _(lisp_msg_put_mreq_with_lcaf) \
+ _(lisp_msg_push_ecm) \
+ _(lisp_msg_parse) \
+ _(lisp_msg_parse_mapping_record) \
+ _(lisp_parse_map_reply) \
+ _(lisp_parse_lcaf) \
+ _(lisp_map_register)
+
+int run_tests (void)
+{
+ clib_error_t * error;
+
+#define _(_test_name) \
+ error = test_ ## _test_name (); \
+ if (error) \
+ { \
+ clib_error_report (error); \
+ return 0; \
+ }
+
+ foreach_test_case
+#undef _
+
+ return 0;
+}
+
+int main()
+{
+ return run_tests ();
+}
+#undef _assert
diff --git a/src/tests/vnet/lisp-cp/test_lisp_types.c b/src/tests/vnet/lisp-cp/test_lisp_types.c
new file mode 100644
index 00000000..7c55a9c1
--- /dev/null
+++ b/src/tests/vnet/lisp-cp/test_lisp_types.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vnet/lisp-cp/lisp_cp_messages.h>
+
+#define _assert(e) \
+ error = CLIB_ERROR_ASSERT (e); \
+ if (error) \
+ goto done;
+
+static clib_error_t * test_locator_type (void)
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid_addr, * gid = &_gid_addr;
+ ip_prefix_t * ippref;
+ gid_address_type (gid) = GID_ADDR_IP_PREFIX;
+ gid_address_ippref_len (gid) = 24;
+ ippref = &gid_address_ippref (gid);
+ ip_prefix_version (ippref) = IP4;
+ ip_prefix_len (ippref) = 0;
+ ip4_address_t * ip4 = &ip_prefix_v4 (ippref);
+ ip4->as_u32 = 0x20304050;
+
+ /* local locator */
+ locator_t loc1, loc2 = {
+ .local = 1,
+ .state = 2,
+ .sw_if_index = 8,
+ .priority = 3,
+ .weight = 100,
+ .mpriority = 4,
+ .mweight = 101
+ };
+ locator_copy (&loc1, &loc2);
+ _assert (0 == locator_cmp (&loc1, &loc2));
+
+ /* remote locator */
+ loc2.local = 0;
+
+ ip_prefix_t nested_ippref;
+ ip_prefix_version (&nested_ippref) = IP4;
+ ip_prefix_len (&nested_ippref) = 0;
+ ip4 = &ip_prefix_v4 (&nested_ippref);
+ ip4->as_u32 = 0x33882299;
+ gid_address_t nested_gid =
+ {
+ .type = GID_ADDR_IP_PREFIX,
+ .ippref = nested_ippref
+ };
+
+ lcaf_t lcaf =
+ {
+ .type = LCAF_INSTANCE_ID,
+ .uni =
+ {
+ .vni_mask_len = 5,
+ .vni = 0xa1b2c3d4,
+ .gid_addr = &nested_gid
+ }
+ };
+ gid_address_type (gid) = GID_ADDR_LCAF;
+ gid_address_lcaf (gid) = lcaf;
+
+ loc2.address = gid[0];
+ locator_copy(&loc1, &loc2);
+
+ _assert (0 == locator_cmp (&loc1, &loc2));
+
+done:
+ locator_free (&loc1);
+ return error;
+}
+
+static clib_error_t * test_gid_parse_ip_pref ()
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid_addr, * gid_addr = &_gid_addr;
+ gid_address_t _gid_addr_copy, * copy = &_gid_addr_copy;
+ u8 data[] =
+ {
+ 0x00, 0x01, /* AFI = IPv4 */
+ 0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
+ };
+
+ u32 len = gid_address_parse (data, gid_addr);
+ _assert (6 == len);
+ gid_address_copy (copy, gid_addr);
+ _assert (0 == gid_address_cmp (copy, gid_addr));
+done:
+ return error;
+}
+
+static clib_error_t * test_gid_parse_mac ()
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid, * gid = &_gid;
+ gid_address_t _gid_copy, * gid_copy = &_gid_copy;
+
+ u8 data[] =
+ {
+ 0x40, 0x05, /* AFI = MAC address */
+ 0x10, 0xbb, 0xcc, 0xdd, /* MAC */
+ 0x77, 0x99,
+ };
+
+ u32 len = gid_address_parse (data, gid);
+ _assert (8 == len);
+ _assert (GID_ADDR_MAC == gid_address_type (gid));
+ gid_address_copy (gid_copy, gid);
+ _assert (0 == gid_address_cmp (gid_copy, gid));
+done:
+ return error;
+}
+
+static clib_error_t *
+test_gid_write_nsh (void)
+{
+ clib_error_t * error = 0;
+
+ u8 * b = clib_mem_alloc(500);
+ memset(b, 0, 500);
+
+ gid_address_t g =
+ {
+ .vni = 0,
+ .nsh.spi = 0x112233,
+ .nsh.si = 0x42,
+ .type = GID_ADDR_NSH,
+ };
+
+ u16 len = gid_address_put (b, &g);
+
+ u8 expected[] =
+ {
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
+ 0x11, 0x00, 0x00, 0x04, /* type = SPI LCAF, length = 4 */
+
+ /* Service Path ID, Service index */
+ 0x11, 0x22, 0x33, 0x42, /* SPI, SI */
+ };
+
+ _assert (sizeof (expected) == len);
+ _assert (0 == memcmp (expected, b, len));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t *
+test_gid_parse_nsh ()
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid_addr, * gid_addr = &_gid_addr;
+ gid_address_t _gid_addr_copy, * copy = &_gid_addr_copy;
+
+ memset (gid_addr, 0, sizeof (gid_addr[0]));
+ memset (copy, 0, sizeof (copy[0]));
+
+ u8 data[] =
+ {
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
+ 0x11, 0x00, 0x00, 0x04, /* type = SPI LCAF, length = 4 */
+
+ /* Service Path ID, Service index */
+ 0x55, 0x99, 0x42, 0x09, /* SPI, SI */
+ };
+
+ u32 len = gid_address_parse (data, gid_addr);
+ _assert (sizeof (data) == len);
+ gid_address_copy (copy, gid_addr);
+ _assert (0 == gid_address_cmp (gid_addr, copy));
+ _assert (GID_ADDR_NSH == gid_address_type (copy));
+ _assert (0 == gid_address_vni (copy));
+ _assert (gid_address_nsh_spi (copy) == 0x559942);
+ _assert (gid_address_nsh_si (copy) == 0x09);
+
+done:
+ gid_address_free (copy);
+ gid_address_free (gid_addr);
+ return error;
+}
+
+static clib_error_t * test_gid_parse_lcaf ()
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid_addr, * gid_addr = &_gid_addr;
+ gid_address_t _gid_addr_copy, * gid_addr_copy = &_gid_addr_copy;
+
+ memset (gid_addr, 0, sizeof (gid_addr[0]));
+ memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
+
+ u8 data[] =
+ {
+ 0x40, 0x03, /* AFI = LCAF*/
+
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x18, /* IID mask-len */
+ 0x00, 0x0a, /* iid length + next AFI lenght */
+ /* LCAF Instance ID */
+ 0x00, 0x00, 0x00, 0x09, /* iid */
+ 0x00, 0x01, /* AFI = ipv4 */
+ 0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
+ };
+ u32 len = gid_address_parse (data, gid_addr);
+ _assert (18 == len);
+ gid_address_copy (gid_addr_copy, gid_addr);
+ _assert (0 == gid_address_cmp (gid_addr_copy, gid_addr));
+ _assert (GID_ADDR_IP_PREFIX == gid_address_type (gid_addr));
+ _assert (9 == gid_address_vni (gid_addr));
+ _assert (0x18 == gid_address_vni_mask (gid_addr));
+ _assert (0xddccbb10 == gid_addr->ippref.addr.ip.v4.as_u32);
+
+done:
+ gid_address_free (gid_addr);
+ gid_address_free (gid_addr_copy);
+ return error;
+}
+
+/* recursive LCAFs are not supported */
+#if 0
+static clib_error_t * test_gid_parse_lcaf_complex ()
+{
+ clib_error_t * error = 0;
+ gid_address_t _gid_addr, * gid_addr = &_gid_addr;
+ gid_address_t _gid_addr_copy, * gid_addr_copy = &_gid_addr_copy;
+
+ memset (gid_addr, 0, sizeof (gid_addr[0]));
+ memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
+
+ u8 data[] =
+ {
+ 0x40, 0x03, /* AFI = LCAF*/
+
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x18, /* IID mask-len */
+ 0x00, 0x0a, /* iid length + next AFI lenght */
+ /* LCAF Instance ID */
+ 0x00, 0x00, 0x00, 0x0b, /* iid */
+
+ 0x40, 0x03, /* AFI = LCAF*/
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x17, /* IID mask-len */
+ 0x00, 0x0a, /* iid length + next AFI lenght */
+ /* LCAF Instance ID */
+ 0x00, 0x00, 0x00, 0x0c, /* iid */
+
+ 0x40, 0x03, /* AFI = LCAF*/
+ /* LCAF header*/
+ 0x00, 0x00, /* reserved1, flags */
+ 0x02, /* type = Instance ID */
+ 0x16, /* IID mask-len */
+ 0x00, 0x16, /* iid length + next AFI lenght */
+ /* LCAF Instance ID */
+ 0x00, 0x00, 0x00, 0x0d, /* iid */
+
+ 0x00, 0x02, /* AFI = IPv6 */
+
+ 0x10, 0xbb, 0xcc, 0xdd,
+ 0x10, 0xbb, 0xcc, 0xdd,
+ 0x10, 0xbb, 0xcc, 0xdd,
+ 0x10, 0xbb, 0xcc, 0xdd, /* ipv6 address */
+ };
+ u32 len = gid_address_parse (data, gid_addr);
+ _assert (54 == len);
+ _assert (gid_addr->type == GID_ADDR_LCAF);
+ gid_address_copy (gid_addr_copy, gid_addr);
+ _assert (0 == gid_address_cmp (gid_addr_copy, gid_addr));
+ _assert (gid_addr_copy->type == GID_ADDR_LCAF);
+
+ lcaf_t * lcaf = &gid_address_lcaf (gid_addr_copy);
+ _assert (lcaf->type == LCAF_INSTANCE_ID);
+ vni_t * v = (vni_t *) lcaf;
+ _assert (v->vni == 0x0b);
+ _assert (v->vni_mask_len == 0x18);
+
+ gid_address_t * tmp = vni_gid (v);
+ _assert (gid_address_type (tmp) == GID_ADDR_LCAF);
+ lcaf = &gid_address_lcaf (tmp);
+ _assert (lcaf->type == LCAF_INSTANCE_ID);
+
+ v = (vni_t *) lcaf;
+ _assert (v->vni == 0x0c);
+ _assert (v->vni_mask_len == 0x17);
+
+ tmp = vni_gid (v);
+ _assert (gid_address_type (tmp) == GID_ADDR_LCAF);
+ lcaf = &gid_address_lcaf (tmp);
+
+ _assert (lcaf->type == LCAF_INSTANCE_ID);
+ v = (vni_t *) lcaf;
+ _assert (v->vni == 0x0d);
+ _assert (v->vni_mask_len == 0x16);
+
+ tmp = vni_gid (v);
+ _assert (gid_address_type (tmp) == GID_ADDR_IP_PREFIX);
+
+ ip_prefix_t * ip_pref = &gid_address_ippref (tmp);
+ ip6_address_t * ip6 = &ip_prefix_v6 (ip_pref);
+ _assert (ip6->as_u32[0] == 0xddccbb10);
+ _assert (ip6->as_u32[1] == 0xddccbb10);
+ _assert (ip6->as_u32[2] == 0xddccbb10);
+ _assert (ip6->as_u32[3] == 0xddccbb10);
+ _assert (ip_prefix_version (ip_pref) == IP6);
+
+done:
+ gid_address_free (gid_addr);
+ gid_address_free (gid_addr_copy);
+ return error;
+}
+#endif
+
+static clib_error_t * test_write_mac_in_lcaf (void)
+{
+ clib_error_t * error = 0;
+
+ u8 * b = clib_mem_alloc(500);
+ memset(b, 0, 500);
+
+ gid_address_t g =
+ {
+ .mac = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6},
+ .vni = 0x01020304,
+ .vni_mask = 0x10,
+ .type = GID_ADDR_MAC,
+ };
+
+ u16 len = gid_address_put (b, &g);
+
+ u8 expected[] =
+ {
+ 0x40, 0x03, /* AFI = LCAF */
+ 0x00, /* reserved1 */
+ 0x00, /* flags */
+ 0x02, /* LCAF type = Instance ID */
+ 0x10, /* IID/IID mask len */
+ 0x00, 0x0c, /* length */
+ 0x01, 0x02, 0x03, 0x04, /* Instance ID / VNI */
+
+ 0x40, 0x05, /* AFI = MAC */
+ 0x01, 0x02, 0x03, 0x04,
+ 0x05, 0x06 /* MAC */
+ };
+ _assert (sizeof (expected) == len);
+ _assert (0 == memcmp (expected, b, len));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t * test_mac_address_write (void)
+{
+ clib_error_t * error = 0;
+
+ u8 * b = clib_mem_alloc(500);
+ memset(b, 0, 500);
+
+ gid_address_t g =
+ {
+ .mac = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6},
+ .type = GID_ADDR_MAC,
+ };
+
+ u16 len = gid_address_put (b, &g);
+ _assert (8 == len);
+
+ u8 expected[] =
+ {
+ 0x40, 0x05, /* AFI = MAC */
+ 0x01, 0x02, 0x03, 0x04,
+ 0x05, 0x06 /* MAC */
+ };
+ _assert (0 == memcmp (expected, b, len));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t *
+test_src_dst_with_vni_serdes (void)
+{
+ clib_error_t * error = 0;
+ u8 * b = clib_mem_alloc (500);
+ memset (b, 0, 500);
+
+ fid_address_t src =
+ {
+ .type = FID_ADDR_IP_PREF,
+ .ippref =
+ {
+ .len = 24,
+ .addr =
+ {
+ .version = IP4,
+ .ip.v4.data = { 0x1, 0x2, 0x3, 0x0 }
+ }
+ }
+ };
+
+ fid_address_t dst =
+ {
+ .type = FID_ADDR_IP_PREF,
+ .ippref =
+ {
+ .len = 16,
+ .addr =
+ {
+ .version = IP4,
+ .ip.v4.data = { 0x9, 0x8, 0x0, 0x0 }
+ }
+ }
+ };
+
+ source_dest_t sd =
+ {
+ .src = src,
+ .dst = dst
+ };
+
+ gid_address_t g =
+ {
+ .sd = sd,
+ .type = GID_ADDR_SRC_DST,
+ .vni = 0x12345678,
+ .vni_mask = 0x9
+ };
+
+ u16 size_to_put = gid_address_size_to_put(&g);
+ _assert (36 == size_to_put);
+ _assert (0 == gid_address_len(&g));
+
+ u16 write_len = gid_address_put (b, &g);
+ printf("sizetoput %d; writelen %d\n", size_to_put, write_len);
+ _assert (size_to_put == write_len);
+
+ u8 expected_data[] =
+ {
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
+ 0x02, 0x09, 0x00, 0x1c, /* LCAF type = IID, IID mask-len, length */
+ 0x12, 0x34, 0x56, 0x78, /* reserved; source-ML, Dest-ML */
+
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
+ 0x0c, 0x00, 0x00, 0x10, /* LCAF type = source/dest key, rsvd, length */
+ 0x00, 0x00, 0x18, 0x10, /* reserved; source-ML, Dest-ML */
+
+ 0x00, 0x01, /* AFI = ip4 */
+ 0x01, 0x02, 0x03, 0x00, /* source */
+
+ 0x00, 0x01, /* AFI = ip4 */
+ 0x09, 0x08, 0x00, 0x00, /* destination */
+ };
+ _assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
+
+ gid_address_t p;
+ memset (&p, 0, sizeof (p));
+ _assert (write_len == gid_address_parse (b, &p));
+ _assert (0 == gid_address_cmp (&g, &p));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t *
+test_src_dst_deser_bad_afi (void)
+{
+ clib_error_t * error = 0;
+
+ u8 expected_data[] =
+ {
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
+ 0x0c, 0x00, 0x00, 0x14, /* LCAF type = source/dest key, rsvd, length */
+ 0x00, 0x00, 0x00, 0x00, /* reserved; source-ML, Dest-ML */
+
+ 0xde, 0xad, /* AFI = bad value */
+ 0x11, 0x22, 0x33, 0x44,
+ 0x55, 0x66, /* source */
+
+ 0x40, 0x05, /* AFI = MAC */
+ 0x10, 0x21, 0x32, 0x43,
+ 0x54, 0x65, /* destination */
+ };
+
+ gid_address_t p;
+ _assert (~0 == gid_address_parse (expected_data, &p));
+done:
+ return error;
+}
+
+static clib_error_t *
+test_src_dst_serdes (void)
+{
+ clib_error_t * error = 0;
+
+ u8 * b = clib_mem_alloc (500);
+ memset (b, 0, 500);
+
+ fid_address_t src =
+ {
+ .type = FID_ADDR_MAC,
+ .mac = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 }
+ };
+
+ fid_address_t dst =
+ {
+ .type = FID_ADDR_MAC,
+ .mac = { 0x10, 0x21, 0x32, 0x43, 0x54, 0x65 }
+ };
+
+ source_dest_t sd =
+ {
+ .src = src,
+ .dst = dst
+ };
+
+ gid_address_t g =
+ {
+ .sd = sd,
+ .type = GID_ADDR_SRC_DST,
+ .vni = 0x0,
+ .vni_mask = 0x0
+ };
+
+ u16 size_to_put = gid_address_size_to_put(&g);
+ _assert (28 == size_to_put);
+ _assert (0 == gid_address_len(&g));
+
+ u16 write_len = gid_address_put (b, &g);
+ _assert (size_to_put == write_len);
+
+ u8 expected_data[] =
+ {
+ 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
+ 0x0c, 0x00, 0x00, 0x14, /* LCAF type = source/dest key, rsvd, length */
+ 0x00, 0x00, 0x00, 0x00, /* reserved; source-ML, Dest-ML */
+
+ 0x40, 0x05, /* AFI = MAC */
+ 0x11, 0x22, 0x33, 0x44,
+ 0x55, 0x66, /* source */
+
+ 0x40, 0x05, /* AFI = MAC */
+ 0x10, 0x21, 0x32, 0x43,
+ 0x54, 0x65, /* destination */
+ };
+ _assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
+
+ gid_address_t p;
+ memset (&p, 0, sizeof (p));
+ _assert (write_len == gid_address_parse (b, &p));
+ _assert (0 == gid_address_cmp (&g, &p));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+static clib_error_t * test_gid_address_write (void)
+{
+ clib_error_t * error = 0;
+ ip_prefix_t ippref_data, * ippref = &ippref_data;
+
+ u8 * b = clib_mem_alloc(500);
+ memset(b, 0, 500);
+
+ ip_prefix_version (ippref) = IP4;
+ ip_prefix_len (ippref) = 9;
+ ip4_address_t * ip4 = &ip_prefix_v4 (ippref);
+ ip4->as_u32 = 0xaabbccdd;
+
+ gid_address_t g =
+ {
+ .ippref = ippref[0],
+ .type = GID_ADDR_IP_PREFIX,
+ .vni = 0x01020304,
+ .vni_mask = 0x18
+ };
+
+ _assert (18 == gid_address_size_to_put (&g));
+ _assert (gid_address_len (&g) == 9);
+
+ u16 write_len = gid_address_put (b, &g);
+ _assert (18 == write_len);
+
+ u8 expected_gid_data[] =
+ {
+ 0x40, 0x03, /* AFI = LCAF */
+ 0x00, /* reserved1 */
+ 0x00, /* flags */
+ 0x02, /* LCAF type = Instance ID */
+ 0x18, /* IID/VNI mask len */
+ 0x00, 0x0a, /* length */
+ 0x01, 0x02, 0x03, 0x04, /* Instance ID / VNI */
+
+ 0x00, 0x01, /* AFI = IPv4 */
+ 0xdd, 0xcc, 0xbb, 0xaa, /* ipv4 addr */
+ };
+ _assert (0 == memcmp (expected_gid_data, b, sizeof (expected_gid_data)));
+done:
+ clib_mem_free (b);
+ return error;
+}
+
+#define foreach_test_case \
+ _(locator_type) \
+ _(gid_parse_ip_pref) \
+ _(gid_parse_mac) \
+ _(gid_parse_lcaf) \
+ _(gid_parse_nsh) \
+ _(gid_write_nsh) \
+ _(mac_address_write) \
+ _(gid_address_write) \
+ _(src_dst_serdes) \
+ _(write_mac_in_lcaf) \
+ _(src_dst_deser_bad_afi) \
+ _(src_dst_with_vni_serdes)
+
+int run_tests (void)
+{
+ clib_error_t * error;
+
+#define _(_test_name) \
+ error = test_ ## _test_name (); \
+ if (error) \
+ { \
+ clib_error_report (error); \
+ return 0; \
+ }
+
+ foreach_test_case
+#undef _
+
+ return 0;
+}
+
+int main()
+{
+ return run_tests ();
+}
+
diff --git a/src/tools/elftool/dir.dox b/src/tools/elftool/dir.dox
new file mode 100644
index 00000000..40426e04
--- /dev/null
+++ b/src/tools/elftool/dir.dox
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/** @dir
+ * @brief VPP instrastructure tools.
+ */
diff --git a/src/tools/elftool/elftool.c b/src/tools/elftool/elftool.c
new file mode 100644
index 00000000..d9d3704b
--- /dev/null
+++ b/src/tools/elftool/elftool.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2008 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/elf.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifndef CLIB_UNIX
+#error "unix only"
+#endif
+
+typedef struct {
+ elf_main_t elf_main;
+ char * input_file;
+ char * output_file;
+ char * set_interpreter;
+ char * set_rpath;
+ int unset_rpath;
+ int verbose;
+ int quiet;
+ int allow_elf_shared;
+ /* for use in the optimized / simplified case */
+ u64 file_size;
+ u64 interpreter_offset;
+ u64 rpath_offset;
+} elf_tool_main_t;
+
+static clib_error_t * elf_set_interpreter (elf_main_t * em,
+ elf_tool_main_t * tm)
+{
+ elf_segment_t * g;
+ elf_section_t * s;
+ clib_error_t * error;
+ char * interp = tm->set_interpreter;
+
+ switch (em->first_header.file_type)
+ {
+ case ELF_EXEC:
+ break;
+
+ case ELF_SHARED:
+ if (tm->allow_elf_shared)
+ break;
+ /* Note flowthrough */
+ default:
+ return clib_error_return (0, "unacceptable file_type");
+ }
+
+ vec_foreach (g, em->segments)
+ {
+ if (g->header.type == ELF_SEGMENT_INTERP)
+ break;
+ }
+
+ if (g >= vec_end (em->segments))
+ return clib_error_return (0, "interpreter not found");
+
+ if (g->header.memory_size < 1 + strlen (interp))
+ return clib_error_return (0, "given interpreter does not fit; must be less than %d bytes (`%s' given)",
+ g->header.memory_size, interp);
+
+ error = elf_get_section_by_start_address (em, g->header.virtual_address, &s);
+ if (error)
+ return error;
+
+ /* Put in new null terminated string. */
+ memset (s->contents, 0, vec_len (s->contents));
+ clib_memcpy (s->contents, interp, strlen (interp));
+
+ return 0;
+}
+
+static void
+delete_rpath_for_section (elf_main_t * em, elf_section_t * s)
+{
+ elf64_dynamic_entry_t * e;
+ elf64_dynamic_entry_t * new_es = 0;
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_RPATH:
+ case ELF_DYNAMIC_ENTRY_RUN_PATH:
+ break;
+
+ default:
+ vec_add1 (new_es, e[0]);
+ break;
+ }
+ }
+
+ /* Pad so as to keep section size constant. */
+ {
+ elf64_dynamic_entry_t e_end;
+ e_end.type = ELF_DYNAMIC_ENTRY_END;
+ e_end.data = 0;
+ while (vec_len (new_es) < vec_len (em->dynamic_entries))
+ vec_add1 (new_es, e_end);
+ }
+
+ vec_free (em->dynamic_entries);
+ em->dynamic_entries = new_es;
+
+ elf_set_dynamic_entries (em);
+}
+
+static void delete_rpath (elf_main_t * em)
+{
+ elf_section_t * s;
+
+ vec_foreach (s, em->sections)
+ {
+ switch (s->header.type)
+ {
+ case ELF_SECTION_DYNAMIC:
+ delete_rpath_for_section (em, s);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static clib_error_t *
+set_rpath_for_section (elf_main_t * em, elf_section_t * s, char * new_rpath)
+{
+ elf64_dynamic_entry_t * e;
+ char * old_rpath;
+ int old_len, new_len = strlen (new_rpath);
+ u8 * new_string_table = vec_dup (em->dynamic_string_table);
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_RPATH:
+ case ELF_DYNAMIC_ENTRY_RUN_PATH:
+ old_rpath = (char *) new_string_table + e->data;
+ old_len = strlen (old_rpath);
+ if (old_len < new_len)
+ return clib_error_return (0, "rpath of `%s' does not fit (old rpath `%s')",
+ new_rpath, old_rpath);
+ strcpy (old_rpath, new_rpath);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ elf_set_section_contents (em, em->dynamic_string_table_section_index,
+ new_string_table,
+ vec_bytes (new_string_table));
+
+ return 0;
+}
+
+static clib_error_t *
+set_rpath (elf_main_t * em, char * rpath)
+{
+ clib_error_t * error = 0;
+ elf_section_t * s;
+
+ vec_foreach (s, em->sections)
+ {
+ switch (s->header.type)
+ {
+ case ELF_SECTION_DYNAMIC:
+ error = set_rpath_for_section (em, s, rpath);
+ if (error)
+ return error;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return error;
+}
+
+static clib_error_t *
+set_interpreter_rpath (elf_tool_main_t * tm)
+{
+ int ifd = -1, ofd = -1;
+ struct stat fd_stat;
+ u8 *idp = 0; /* warning be gone */
+ u64 mmap_length = 0, i;
+ u32 run_length;
+ u8 in_run;
+ u64 offset0 = 0, offset1 = 0;
+ clib_error_t * error = 0;
+ int fix_in_place = 0;
+
+ if (!strcmp (tm->input_file, tm->output_file))
+ fix_in_place = 1;
+
+ ifd = open (tm->input_file, O_RDWR);
+ if (ifd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", tm->input_file);
+ goto done;
+ }
+
+ if (fstat (ifd, &fd_stat) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", tm->input_file);
+ goto done;
+ }
+
+ if (!(fd_stat.st_mode & S_IFREG))
+ {
+ error = clib_error_return (0, "%s is not a regular file", tm->input_file);
+ goto done;
+ }
+
+ mmap_length = fd_stat.st_size;
+ if (mmap_length < 4)
+ {
+ error = clib_error_return (0, "%s too short", tm->input_file);
+ goto done;
+ }
+
+ /* COW-mapping, since we intend to write the fixups */
+ if (fix_in_place)
+ idp = mmap (0, mmap_length, PROT_READ | PROT_WRITE, MAP_SHARED,
+ ifd, /* offset */ 0);
+ else
+ idp = mmap (0, mmap_length, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ ifd, /* offset */ 0);
+ if (~pointer_to_uword (idp) == 0)
+ {
+ mmap_length = 0;
+ error = clib_error_return_unix (0, "mmap `%s'", tm->input_file);
+ goto done;
+ }
+
+ if (idp[0] != 0x7f || idp[1] != 'E' || idp[2] != 'L' || idp[3] != 'F')
+ {
+ error = clib_error_return (0, "not an ELF file '%s'", tm->input_file);
+ goto done;
+ }
+
+ in_run = 0;
+ run_length = 0;
+
+ for (i = 0; i < mmap_length; i++)
+ {
+ if (idp[i] == '/')
+ {
+ if (in_run)
+ run_length++;
+ else
+ {
+ in_run = 1;
+ run_length = 1;
+ }
+ }
+ else
+ {
+ if (in_run && run_length >= 16)
+ {
+ if (offset0 == 0)
+ offset0 = (i - run_length);
+ else if (offset1 == 0)
+ {
+ offset1 = (i - run_length);
+ goto found_both;
+ }
+ }
+ in_run = 0;
+ run_length = 0;
+ }
+ }
+
+ if (offset0 == 0)
+ {
+ error = clib_error_return (0, "no fixup markers in %s",
+ tm->input_file);
+ goto done;
+ }
+
+ found_both:
+ if (0)
+ clib_warning ("offset0 %lld (0x%llx), offset1 %lld (0x%llx)",
+ offset0, offset0, offset1, offset1);
+
+ /* Executable file case */
+ if (offset0 && offset1)
+ {
+ tm->interpreter_offset = offset0;
+ tm->rpath_offset = offset1;
+ }
+ else /* shared library case */
+ {
+ tm->interpreter_offset = 0;
+ tm->rpath_offset = offset0;
+ }
+
+ if (tm->interpreter_offset)
+ clib_memcpy (&idp[tm->interpreter_offset], tm->set_interpreter,
+ strlen (tm->set_interpreter)+1);
+
+ if (tm->rpath_offset)
+ clib_memcpy (&idp[tm->rpath_offset], tm->set_rpath,
+ strlen (tm->set_rpath)+1);
+
+ /* Write the output file... */
+ if (fix_in_place == 0)
+ {
+ ofd = open (tm->output_file, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ if (ofd < 0)
+ {
+ error = clib_error_return_unix (0, "create `%s'", tm->output_file);
+ goto done;
+ }
+
+ if (write (ofd, idp, mmap_length) != mmap_length)
+ error = clib_error_return_unix (0, "write `%s'", tm->output_file);
+ }
+
+ done:
+ if (mmap_length > 0 && idp)
+ munmap (idp, mmap_length);
+ if (ifd >= 0)
+ close (ifd);
+ if (ofd >= 0)
+ close (ofd);
+ return error;
+}
+
+
+int main (int argc, char * argv[])
+{
+ elf_tool_main_t _tm, * tm = &_tm;
+ elf_main_t * em = &tm->elf_main;
+ unformat_input_t i;
+ clib_error_t * error = 0;
+
+ memset (tm, 0, sizeof (tm[0]));
+ unformat_init_command_line (&i, argv);
+
+ while (unformat_check_input (&i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&i, "in %s", &tm->input_file))
+ ;
+ else if (unformat (&i, "out %s", &tm->output_file))
+ ;
+ else if (unformat (&i, "set-interpreter %s", &tm->set_interpreter))
+ ;
+ else if (unformat (&i, "set-rpath %s", &tm->set_rpath))
+ ;
+ else if (unformat (&i, "unset-rpath"))
+ tm->unset_rpath = 1;
+ else if (unformat (&i, "verbose"))
+ tm->verbose = ~0;
+ else if (unformat (&i, "verbose-symbols"))
+ tm->verbose |= FORMAT_ELF_MAIN_SYMBOLS;
+ else if (unformat (&i, "verbose-relocations"))
+ tm->verbose |= FORMAT_ELF_MAIN_RELOCATIONS;
+ else if (unformat (&i, "verbose-dynamic"))
+ tm->verbose |= FORMAT_ELF_MAIN_DYNAMIC;
+ else if (unformat (&i, "quiet"))
+ tm->quiet = 1;
+ else if (unformat (&i, "allow-elf-shared"))
+ tm->allow_elf_shared = 1;
+ else
+ {
+ error = unformat_parse_error (&i);
+ goto done;
+ }
+ }
+
+ if (! tm->input_file)
+ {
+ error = clib_error_return (0, "no input file");
+ goto done;
+ }
+
+ /* Do the typical case a stone-simple way... */
+ if (tm->quiet && tm->set_interpreter && tm->set_rpath && tm->output_file)
+ {
+ error = set_interpreter_rpath (tm);
+ goto done;
+ }
+
+ error = elf_read_file (em, tm->input_file);
+
+ if (error)
+ goto done;
+
+ if (tm->verbose)
+ fformat (stdout, "%U", format_elf_main, em, tm->verbose);
+
+ if (tm->set_interpreter)
+ {
+ error = elf_set_interpreter (em, tm);
+ if (error)
+ goto done;
+ }
+
+ if (tm->set_rpath)
+ {
+ error = set_rpath (em, tm->set_rpath);
+ if (error)
+ goto done;
+ }
+
+ if (tm->unset_rpath)
+ delete_rpath (em);
+
+ if (tm->output_file)
+ error = elf_write_file (em, tm->output_file);
+
+ elf_main_free (em);
+
+ done:
+ if (error)
+ {
+ if (tm->quiet == 0)
+ clib_error_report (error);
+ return 1;
+ }
+ else
+ return 0;
+}
diff --git a/src/tools/g2/clib.c b/src/tools/g2/clib.c
new file mode 100644
index 00000000..845026b6
--- /dev/null
+++ b/src/tools/g2/clib.c
@@ -0,0 +1,157 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2009-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/elog.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include "g2.h"
+
+int widest_track_format;
+
+typedef struct bound_track_ {
+ u32 track;
+ u8 *track_str;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+
+uword *the_evtdef_hash; /* (event-id, event-definition) hash */
+uword *the_trackdef_hash; /* (track-id, track-definition) hash */
+
+elog_main_t elog_main;
+
+void *get_clib_event (unsigned int datum)
+{
+ elog_event_t *ep = vec_elt_at_index (elog_main.events, datum);
+ return (void *)ep;
+}
+
+/*
+ * read_clib_file
+ */
+int read_clib_file(char *clib_file)
+{
+ static FILE *ofp;
+ clib_error_t *error = 0;
+ int i;
+ elog_main_t *em = &elog_main;
+ double starttime, delta;
+
+ vec_free(em->events);
+ vec_free(em->event_types);
+ if (the_trackdef_hash)
+ hash_free(the_trackdef_hash);
+
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+
+ error = elog_read_file (&elog_main, clib_file);
+
+ if (error) {
+ fformat(stderr, "%U", format_clib_error, error);
+ return (1);
+ }
+
+ if (ofp == NULL) {
+ ofp = fdopen(2, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't fdopen(2)?\n");
+ exit(1);
+ }
+ }
+
+ em = &elog_main;
+
+ for (i = 0; i < vec_len (em->tracks); i++) {
+ u32 track_code;
+ bound_track_t * btp;
+ elog_track_t * t;
+ uword * p;
+ int track_strlen;
+
+ t = &em->tracks[i];
+ track_code = i;
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(ofp, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = (u8 *) t->name;
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+
+ track_strlen = strlen((char *)btp->track_str);
+ if (track_strlen > widest_track_format)
+ widest_track_format = track_strlen;
+ }
+
+ initialize_events();
+
+ for (i = 0; i < vec_len (em->event_types); i++) {
+ elog_event_type_t *ep;
+ u8 *tmp;
+
+ ep = vec_elt_at_index(em->event_types, i);
+ tmp = (u8 *) vec_dup(ep->format);
+ vec_add1(tmp,0);
+ add_event_from_clib_file (ep->type_index_plus_one, (char *) tmp, i);
+ vec_free(tmp);
+ }
+
+ finalize_events();
+
+ cpel_event_init(vec_len(em->events));
+
+ starttime = em->events[0].time;
+
+ for (i = 0; i < vec_len (em->events); i++) {
+ elog_event_t *ep;
+
+ ep = vec_elt_at_index(em->events, i);
+
+ delta = ep->time - starttime;
+
+ add_clib_event (delta, ep->track, ep->type + 1, i);
+ }
+
+ cpel_event_finalize();
+
+ set_pid_ax_width(8*widest_track_format);
+
+ return(0);
+}
+
+unsigned int vl(void *a)
+{
+ return vec_len (a);
+}
diff --git a/src/tools/g2/cpel.c b/src/tools/g2/cpel.c
new file mode 100644
index 00000000..8bcc91e6
--- /dev/null
+++ b/src/tools/g2/cpel.c
@@ -0,0 +1,470 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include "g2.h"
+
+typedef struct bound_event_ {
+ u32 event_code;
+ u8 *event_str;
+ u8 *datum_str;
+} bound_event_t;
+
+bound_event_t *bound_events;
+
+int widest_track_format=8;
+
+typedef struct bound_track_ {
+ u32 track;
+ u8 *track_str;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+
+uword *the_strtab_hash; /* (name, base-VA) hash of all string tables */
+uword *the_evtdef_hash; /* (event-id, event-definition) hash */
+uword *the_trackdef_hash; /* (track-id, track-definition) hash */
+u8 *event_strtab; /* event string-table */
+
+void fatal(char *s)
+{
+ fprintf(stderr, "%s", s);
+ exit(1);
+}
+
+typedef enum {
+ PASS1=1,
+ PASS2=2,
+} pass_t;
+
+typedef struct {
+ int (*pass1)(cpel_section_header_t *, int, FILE *);
+ int (*pass2)(cpel_section_header_t *, int, FILE *);
+} section_processor_t;
+
+int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ fprintf(ofp, "Bad (type 0) section, skipped...\n");
+ return(0);
+}
+
+int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ return(0);
+}
+
+int strtab_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ uword *p;
+ u8 *strtab_data_area = (u8 *)(sh+1);
+
+ /* Multiple string tables with the same name are Bad... */
+ p = hash_get_mem(the_strtab_hash, strtab_data_area);
+ if (p) {
+ fprintf(ofp, "Duplicate string table name %s", strtab_data_area);
+ }
+ /*
+ * Looks funny, but we really do want key = first string in the
+ * table, value = address(first string in the table)
+ */
+ hash_set_mem(the_strtab_hash, strtab_data_area, strtab_data_area);
+ if (verbose) {
+ fprintf(ofp, "String Table %s\n", strtab_data_area);
+ }
+ return(0);
+}
+
+int evtdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ event_definition_section_header_t *edh;
+ event_definition_t *ep;
+ u8 *this_strtab;
+ u32 event_code;
+ uword *p;
+ bound_event_t *bp;
+
+ edh = (event_definition_section_header_t *)(sh+1);
+ nevents = ntohl(edh->number_of_event_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Event Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, edh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ initialize_events();
+
+ ep = (event_definition_t *)(edh+1);
+
+ for (i = 0; i < nevents; i++) {
+ event_code = ntohl(ep->event);
+ p = hash_get(the_evtdef_hash, event_code);
+ if (p) {
+ fprintf(ofp, "Event %d redefined, retain first definition\n",
+ event_code);
+ continue;
+ }
+ vec_add2(bound_events, bp, 1);
+ bp->event_code = event_code;
+ bp->event_str = this_strtab + ntohl(ep->event_format);
+ bp->datum_str = this_strtab + ntohl(ep->datum_format);
+ hash_set(the_evtdef_hash, event_code, bp - bound_events);
+
+ add_event_from_cpel_file(event_code, (char *) bp->event_str,
+ (char *)bp->datum_str);
+
+ ep++;
+ }
+
+ finalize_events();
+ return (0);
+}
+
+int trackdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ track_definition_section_header_t *tdh;
+ track_definition_t *tp;
+ u8 *this_strtab;
+ u32 track_code;
+ uword *p;
+ bound_track_t *btp;
+ int track_strlen;
+
+ tdh = (track_definition_section_header_t *)(sh+1);
+ nevents = ntohl(tdh->number_of_track_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Track Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, tdh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ tp = (track_definition_t *)(tdh+1);
+
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(tp->track);
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(ofp, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = this_strtab + ntohl(tp->track_format);
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+
+ track_strlen = strlen((char *)btp->track_str);
+ if (track_strlen > widest_track_format)
+ widest_track_format = track_strlen;
+ tp++;
+ }
+ return (0);
+}
+
+int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ if (verbose) {
+ fprintf(ofp, "Unsupported type %d section\n",
+ ntohl(sh->section_type));
+ }
+ return(0);
+}
+
+int event_pass2(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ event_section_header_t *eh;
+ u32 event_code, track_code, datum;
+ u64 starttime = ~0ULL;
+ int nevents;
+ int i;
+ event_entry_t *ep;
+ u64 now;
+ u64 delta;
+ u32 time0, time1;
+ double d;
+ uword *p;
+
+ eh = (event_section_header_t *)(sh+1);
+ nevents = ntohl(eh->number_of_events);
+ ticks_per_ns = ntohl(eh->clock_ticks_per_second)/1e9;
+ ep = (event_entry_t *)(eh+1);
+
+ p = hash_get_mem(the_strtab_hash, eh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ event_strtab = (u8 *)p[0];
+
+ cpel_event_init(nevents);
+
+ for (i = 0; i < nevents; i++) {
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+
+ /* Convert from bus ticks to usec */
+ d = now;
+ d /= ticks_per_ns;
+
+ now = d;
+
+ if (starttime == ~0ULL)
+ starttime = now;
+
+ delta = now - starttime;
+
+ /* Delta = time since first event, in usec */
+ event_code = ntohl(ep->event_code);
+ track_code = ntohl(ep->track);
+ datum = ntohl(ep->event_datum);
+
+ add_cpel_event(delta, track_code, event_code, datum);
+
+ ep++;
+ }
+ cpel_event_finalize();
+ return(0);
+}
+
+char *strtab_ref(unsigned long datum)
+{
+ return ((char *)(event_strtab + datum));
+}
+
+/*
+ * Note: If necessary, add passes / columns to this table to
+ * handle section order dependencies.
+ */
+
+section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
+{
+ {bad_section, noop_pass}, /* type 0 -- f**ked */
+ {strtab_pass1, noop_pass}, /* type 1 -- STRTAB */
+ {unsupported_pass, noop_pass}, /* type 2 -- SYMTAB */
+ {evtdef_pass1, noop_pass}, /* type 3 -- EVTDEF */
+ {trackdef_pass1, noop_pass}, /* type 4 -- TRACKDEF */
+ {noop_pass, event_pass2}, /* type 5 -- EVENTS */
+};
+
+
+int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
+ pass_t pass)
+{
+ u32 type;
+ type = ntohl(sh->section_type);
+ int rv;
+ int (*fp)(cpel_section_header_t *, int, FILE *);
+
+ if (type > CPEL_NUM_SECTION_TYPES) {
+ fprintf(stderr, "Unknown section type %d\n", type);
+ return(1);
+ }
+ switch(pass) {
+ case PASS1:
+ fp = processors[type].pass1;
+ break;
+
+ case PASS2:
+ fp = processors[type].pass2;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown pass %d\n", pass);
+ return(1);
+ }
+
+ rv = (*fp)(sh, verbose, ofp);
+
+ return(rv);
+}
+
+int cpel_dump_file_header(cpel_file_header_t *fh, int verbose, FILE *ofp)
+{
+ time_t file_time;
+
+ if (verbose) {
+ fprintf(ofp, "CPEL file: %s-endian, version %d\n",
+ ((fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) ?
+ "little" : "big"),
+ fh->endian_version & CPEL_FILE_VERSION_MASK);
+
+ file_time = ntohl(fh->file_date);
+
+ fprintf(ofp, "File created %s", ctime(&file_time));
+ }
+
+ return(0);
+}
+
+
+int cpel_process(u8 *cpel, int verbose, FILE *ofp)
+{
+ cpel_file_header_t *fh;
+ cpel_section_header_t *sh;
+ u16 nsections;
+ u32 section_size;
+ int i;
+
+ /* First, the file header */
+ fh = (cpel_file_header_t *)cpel;
+ if (fh->endian_version != CPEL_FILE_VERSION) {
+ if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
+ fprintf(stderr, "Little endian data format not supported\n");
+ return(1);
+ }
+ fprintf(stderr, "Unsupported file version 0x%x\n",
+ fh->endian_version);
+ return(1);
+ }
+ cpel_dump_file_header(fh, verbose, ofp);
+ nsections = ntohs(fh->nsections);
+
+ /*
+ * Take two passes through the file. PASS1 builds
+ * data structures, PASS2 actually dumps the file.
+ * Just in case the sections are in an unobvious order.
+ */
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ section_size = ntohl(sh->data_length);
+
+ if(verbose) {
+ fprintf(ofp, "Section type %d, size %d\n", ntohl(sh->section_type),
+ section_size);
+ }
+
+ if(process_section(sh, verbose, ofp, PASS1))
+ return(1);
+
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ if(process_section(sh, verbose, ofp, PASS2))
+ return(1);
+ section_size = ntohl(sh->data_length);
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+
+ return(0);
+}
+
+/*
+ * read_cpel_file
+ */
+int read_cpel_file(char *cpel_file)
+{
+ int verbose = 0;
+ int rv;
+ static u8 *cpel;
+ static unsigned long size;
+ static FILE *ofp;
+
+ if (cpel) {
+ unmapfile((char *)cpel, size);
+ hash_free(the_strtab_hash);
+ the_strtab_hash = 0;
+ hash_free(the_evtdef_hash);
+ the_evtdef_hash = 0;
+ hash_free(the_trackdef_hash);
+ the_trackdef_hash = 0;
+ }
+
+ cpel = (u8 *)mapfile((char *)cpel_file, &size);
+ if (cpel == 0) {
+ fprintf(stderr, "Couldn't map %s...\n", cpel_file);
+ exit(1);
+ }
+
+ if (ofp == NULL) {
+ ofp = fdopen(2, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't fdopen(2)?\n");
+ exit(1);
+ }
+ }
+
+ the_strtab_hash = hash_create_string (0, sizeof (uword));
+ the_evtdef_hash = hash_create (0, sizeof (uword));
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+
+ rv = cpel_process(cpel, verbose, ofp);
+
+ set_pid_ax_width(8*widest_track_format);
+
+ return(rv);
+}
+
+static bound_track_t generic_hex_track = {0, (u8 *) "0x%08x"};
+static bound_track_t generic_decimal_track = {0, (u8 *) "%8ld"};
+
+/*
+ * get_track_label
+ */
+char *get_track_label(unsigned long track)
+{
+ uword *p;
+ bound_track_t *tp;
+
+ p = hash_get(the_trackdef_hash, track);
+ if (p) {
+ tp = &bound_tracks[p[0]];
+ } else {
+ if (track > 65535)
+ tp = &generic_hex_track;
+ else
+ tp = &generic_decimal_track;
+ }
+ return((char *)tp->track_str);
+}
diff --git a/src/tools/g2/cpel.h b/src/tools/g2/cpel.h
new file mode 100644
index 00000000..73e4aea5
--- /dev/null
+++ b/src/tools/g2/cpel.h
@@ -0,0 +1,83 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CPEL_H_
+#define _CPEL_H_ 1
+
+typedef struct cpel_file_header_ {
+ unsigned char endian_version;
+ unsigned char pad;
+ unsigned short nsections;
+ unsigned file_date;
+} cpel_file_header_t;
+
+#define CPEL_FILE_LITTLE_ENDIAN 0x80
+#define CPEL_FILE_VERSION 0x01
+#define CPEL_FILE_VERSION_MASK 0x7F
+
+typedef struct cpel_section_header_ {
+ unsigned int section_type;
+ unsigned int data_length; /* does NOT include type and itself */
+} cpel_section_header_t;
+
+#define CPEL_SECTION_STRTAB 1
+/* string at offset 0 is the name of the table */
+
+#define CPEL_SECTION_SYMTAB 2
+#define CPEL_SECTION_EVTDEF 3
+
+typedef struct event_definition_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_event_definitions;
+} event_definition_section_header_t;
+
+typedef struct event_definition_ {
+ unsigned int event;
+ unsigned int event_format;
+ unsigned int datum_format;
+} event_definition_t;
+
+#define CPEL_SECTION_TRACKDEF 4
+
+typedef struct track_definition_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_track_definitions;
+} track_definition_section_header_t;
+
+typedef struct track_definition_ {
+ unsigned int track;
+ unsigned int track_format;
+} track_definition_t;
+
+#define CPEL_SECTION_EVENT 5
+
+typedef struct event_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_events;
+ unsigned int clock_ticks_per_second;
+} event_section_header_t;
+
+typedef struct event_entry_ {
+ unsigned int time[2];
+ unsigned int track;
+ unsigned int event_code;
+ unsigned int event_datum;
+} event_entry_t;
+
+#define CPEL_NUM_SECTION_TYPES 5
+
+#endif /* _CPEL_H_ */
+
diff --git a/src/tools/g2/events.c b/src/tools/g2/events.c
new file mode 100644
index 00000000..6839a435
--- /dev/null
+++ b/src/tools/g2/events.c
@@ -0,0 +1,475 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <gtk/gtk.h>
+#include "g2.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+/*
+ * globals
+ */
+boolean g_little_endian;
+event_t *g_events;
+ulong g_nevents;
+pid_sort_t *g_pids;
+pid_sort_t *g_original_pids;
+int g_npids;
+pid_data_t *g_pid_data_list;
+
+/*
+ * locals
+ */
+pid_data_t **s_pidhash;
+
+/*
+ * config parameters
+ */
+
+double ticks_per_ns=1000.0;
+boolean ticks_per_ns_set;
+
+/****************************************************************************
+* event_init
+****************************************************************************/
+
+void event_init(void)
+{
+ ulong endian;
+ char *ep;
+ char *askstr;
+ int tmp;
+
+ ep = (char *)&endian;
+ endian = 0x12345678;
+ if (*ep != 0x12)
+ g_little_endian = TRUE;
+ else
+ g_little_endian = FALSE;
+
+ askstr = getprop("dont_ask_ticks_per_ns_initially");
+
+ if (askstr && (*askstr == 't' || *askstr == 'T')) {
+ tmp = atol(getprop_default("ticks_per_ns", 0));
+ if (tmp > 0) {
+ ticks_per_ns = tmp;
+ ticks_per_ns_set = TRUE;
+ }
+ }
+}
+
+/****************************************************************************
+* find_or_add_pid
+****************************************************************************/
+
+pid_data_t *find_or_add_pid (ulong pid)
+{
+ pid_data_t *pp;
+ ulong bucket;
+
+ bucket = pid % PIDHASH_NBUCKETS;
+
+ pp = s_pidhash[bucket];
+
+ if (pp == 0) {
+ pp = g_malloc0(sizeof(pid_data_t));
+ pp->pid_value = pid;
+ s_pidhash[bucket] = pp;
+ g_npids++;
+ return(pp);
+ }
+ while (pp) {
+ if (pp->pid_value == pid)
+ return(pp);
+ pp = pp->next;
+ }
+
+ pp = g_malloc0(sizeof(pid_data_t));
+ pp->pid_value = pid;
+ pp->next = s_pidhash[bucket];
+ s_pidhash[bucket] = pp;
+ g_npids++;
+ return(pp);
+}
+
+/****************************************************************************
+* pid_cmp
+****************************************************************************/
+
+int pid_cmp(const void *a1, const void *a2)
+{
+ pid_sort_t *p1 = (pid_sort_t *)a1;
+ pid_sort_t *p2 = (pid_sort_t *)a2;
+
+ if (p1->pid_value < p2->pid_value)
+ return(-1);
+ else if (p1->pid_value == p2->pid_value)
+ return(0);
+ else
+ return(1);
+}
+
+/****************************************************************************
+* make_sorted_pid_vector
+****************************************************************************/
+
+static void make_sorted_pid_vector(void)
+{
+ pid_data_t *pp;
+ pid_data_t **p_previous;
+ pid_sort_t *psp;
+ int i;
+
+ psp = g_pids = g_malloc0(sizeof(pid_sort_t)*g_npids);
+
+ for (i = 0; i < PIDHASH_NBUCKETS; i++) {
+ pp = s_pidhash[i];
+ while(pp) {
+ psp->pid = pp;
+ psp->pid_value = pp->pid_value;
+ psp++;
+ pp = pp->next;
+ }
+ }
+
+ qsort(&g_pids[0], g_npids, sizeof(pid_sort_t), pid_cmp);
+
+ /* put the sort order into the pid objects */
+ psp = g_pids;
+
+ /*
+ * This is rather gross.
+ *
+ * We happen to know that whenever this function is called, the hash table
+ * structure itself is immediately torn down. So the "next" pointers in the
+ * pid_data_t elements are about to become useless.
+ *
+ * So we re-use them, to link all the pid_data_t elements together into a
+ * single unified linked list, with g_pid_data_list pointing to the head.
+ * This means we can walk all the pid_data_t objects if we really want to.
+ * Reading snapshots from disk is one example.
+ *
+ * Alternatively we could just leave the hash table in place; this is
+ * far nicer, but as it happens, trading O(n) lookups for O(1) lookups
+ * isn't actually a problem for the restricted post-tear-down usage. So for
+ * now we take the memory savings and swap our hash table for a list.
+ */
+ p_previous = &g_pid_data_list;
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = i;
+ *p_previous = pp;
+ p_previous = &pp->next;
+ psp++;
+ }
+ *p_previous = NULL;
+
+ /*
+ * Squirrel away original (sorted) vector, so we can
+ * toggle between "chase" mode, snapshots, and the original
+ * display method on short notice
+ */
+ g_original_pids = g_malloc0(sizeof(pid_sort_t)*g_npids);
+ memcpy (g_original_pids, g_pids, sizeof(pid_sort_t)*g_npids);
+}
+
+/****************************************************************************
+* read_events
+****************************************************************************/
+
+void read_events(char *filename)
+{
+ ulong *ulp;
+ ulong size;
+ event_t *ep;
+ raw_event_t *rep;
+ ulonglong start_time=0ULL;
+ ulonglong low_time;
+ boolean once=TRUE;
+ int i;
+ char tmpbuf [128];
+
+ ulp = (ulong *)mapfile(filename, &size);
+
+ if (ulp == NULL) {
+ sprintf(tmpbuf, "Couldn't open %s\n", filename);
+ infobox("Read Event Log Failure", tmpbuf);
+ return;
+ }
+
+ g_nevents = ntohl(*ulp);
+
+ if (size != (g_nevents*sizeof(raw_event_t) + sizeof(g_nevents))) {
+ sprintf(tmpbuf, "%s was damaged, or isn't an event log.\n", filename);
+ infobox("Bad Input File", tmpbuf);
+ g_nevents = 0;
+ unmapfile((char *)ulp, size);
+ return;
+ }
+
+ rep = (raw_event_t *)(ulp+1);
+
+ if (g_events)
+ g_free(g_events);
+
+ g_events = (event_t *)g_malloc(g_nevents * sizeof(event_t));
+ ep = g_events;
+
+ while (g_npids > 0) {
+ g_free((g_pids + g_npids-1)->pid);
+ g_npids--;
+ }
+ if (g_pids) {
+ g_free(g_pids);
+ g_free(g_original_pids);
+ g_pids = 0;
+ g_original_pids = 0;
+ }
+
+ s_pidhash = (pid_data_t **)g_malloc0(
+ PIDHASH_NBUCKETS*sizeof(pid_data_t *));
+
+ /* $$$ add a SEGV handler... */
+ for (i = 0; i < g_nevents; i++) {
+ if (once) {
+ once = FALSE;
+ start_time = ((ulonglong)ntohl(rep->time[0]));
+ start_time <<= 32;
+ low_time = ntohl(rep->time[1]);
+ low_time &= 0xFFFFFFFF;
+ start_time |= low_time;
+ ep->time = 0LL;
+ } else {
+ ep->time = ((ulonglong)ntohl(rep->time[0]));
+ ep->time <<= 32;
+ low_time = ntohl(rep->time[1]);
+ low_time &= 0xFFFFFFFF;
+ ep->time |= low_time;
+ ep->time -= start_time;
+ ep->time /= ticks_per_ns;
+ }
+ ep->code = ntohl(rep->code);
+ ep->pid = find_or_add_pid(ntohl(rep->pid));
+ ep->datum = ntohl(rep->datum);
+ ep->flags = 0;
+ ep++;
+ rep++;
+ }
+
+ unmapfile((char *)ulp, size);
+
+ make_sorted_pid_vector();
+ g_free(s_pidhash);
+ s_pidhash = 0;
+
+ /* Give the view-1 world a chance to reset a few things... */
+ view1_read_events_callback();
+}
+
+static event_t *add_ep;
+
+/****************************************************************************
+* cpel_event_init
+****************************************************************************/
+void cpel_event_init (ulong nevents)
+{
+ g_nevents = nevents;
+ if (g_events)
+ g_free(g_events);
+ add_ep = g_events = (event_t *)g_malloc(g_nevents * sizeof(event_t));
+ while (g_npids > 0) {
+ g_free((g_pids + g_npids-1)->pid);
+ g_npids--;
+ }
+ if (g_pids) {
+ g_free(g_pids);
+ g_free(g_original_pids);
+ g_pids = 0;
+ g_original_pids = 0;
+ }
+ s_pidhash = (pid_data_t **)g_malloc0(
+ PIDHASH_NBUCKETS*sizeof(pid_data_t *));
+}
+
+/****************************************************************************
+* add_cpel_event
+****************************************************************************/
+
+void add_cpel_event(ulonglong delta, ulong track, ulong event, ulong datum)
+{
+ event_t *ep;
+
+ ep = add_ep++;
+ ep->time = delta;
+ ep->pid = find_or_add_pid(track);
+ ep->code = event;
+ ep->datum = datum;
+ ep->flags = 0;
+}
+
+/****************************************************************************
+* add_clib_event
+****************************************************************************/
+
+void add_clib_event(double delta, unsigned short track,
+ unsigned short event, unsigned int index)
+{
+ event_t *ep;
+
+ ep = add_ep++;
+ ep->time = (ulonglong) (delta * 1e9); /* time in intger nanoseconds */
+ ep->pid = find_or_add_pid(track);
+ ep->code = event;
+ ep->datum = index;
+ ep->flags = EVENT_FLAG_CLIB;
+}
+
+/****************************************************************************
+* cpel_event_finalize
+****************************************************************************/
+
+void cpel_event_finalize(void)
+{
+ make_sorted_pid_vector();
+ g_free(s_pidhash);
+ s_pidhash = 0;
+
+ /* Give the view-1 world a chance to reset a few things... */
+ view1_read_events_callback();
+}
+
+/****************************************************************************
+* mapfile
+****************************************************************************/
+
+char *mapfile (char *file, ulong *sizep)
+{
+ struct stat statb;
+ char *rv;
+ int maphfile;
+ size_t mapfsize;
+
+ maphfile = open (file, O_RDONLY);
+
+ if (maphfile < 0)
+ return (NULL);
+
+ if (fstat (maphfile, &statb) < 0) {
+ return (NULL);
+ }
+
+ /* Don't try to mmap directories, FIFOs, semaphores, etc. */
+ if (! (statb.st_mode & S_IFREG)) {
+ return (NULL);
+ }
+
+ mapfsize = statb.st_size;
+
+ if (mapfsize < 3) {
+ close (maphfile);
+ return (NULL);
+ }
+
+ rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);
+
+ if (rv == 0) {
+ g_error ("%s mapping problem, I quit...\n", file);
+ }
+
+ close (maphfile);
+
+ if (madvise (rv, mapfsize, MADV_SEQUENTIAL) < 0) {
+ return (rv);
+ }
+
+ if (sizep) {
+ *sizep = mapfsize;
+ }
+ return (rv);
+}
+
+/****************************************************************************
+* unmapfile
+****************************************************************************/
+
+boolean unmapfile (char *addr, ulong size)
+{
+ if (munmap (addr, size) < 0) {
+ g_warning("Unmap error, addr 0x%lx size 0x%x\n",
+ (unsigned long) addr, (unsigned int)size);
+ return(FALSE);
+ }
+ return(TRUE);
+}
+
+/****************************************************************************
+* find_event_index
+* Binary search for first event whose time is >= t
+****************************************************************************/
+
+int find_event_index (ulonglong t)
+{
+ int index, bottom, top;
+ event_t *ep;
+
+ bottom = g_nevents-1;
+ top = 0;
+
+ while (1) {
+ index = (bottom + top) / 2;
+
+ ep = (g_events + index);
+
+ if (ep->time == t)
+ return(index);
+
+ if (top >= bottom) {
+ while (index > 0 && ep->time > t) {
+ ep--;
+ index--;
+ }
+ while (index < g_nevents && ep->time < t) {
+ ep++;
+ index++;
+ }
+ return(index);
+ }
+
+ if (ep->time < t)
+ top = index + 1;
+ else
+ bottom = index - 1;
+ }
+}
+
+/****************************************************************************
+* events_about
+****************************************************************************/
+
+void events_about (char *tmpbuf)
+{
+ sprintf(tmpbuf+strlen(tmpbuf), "%d total events, %.3f ticks per us\n",
+ (int)g_nevents, ticks_per_ns);
+}
diff --git a/src/tools/g2/g2.h b/src/tools/g2/g2.h
new file mode 100644
index 00000000..f1f268a8
--- /dev/null
+++ b/src/tools/g2/g2.h
@@ -0,0 +1,196 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * typedefs and so forth
+ */
+#include <sys/types.h>
+#include <gtk-2.0/gtk/gtk.h>
+#include <stdio.h>
+#include "props.h"
+
+typedef char boolean;
+typedef unsigned long long ulonglong;
+
+/*
+ * main.c
+ */
+
+GtkWidget *g_mainwindow;
+GtkWidget *g_mainvbox;
+GtkWidget *g_mainhbox;
+
+/*
+ * pointsel.c
+ */
+void point_selector_init(void);
+boolean read_event_definitions (char *filename);
+char *sxerox(char *);
+void pointsel_about(char *);
+void pointsel_next_snapshot(void);
+void initialize_events(void);
+void finalize_events(void);
+
+#define NEVENTS 100000
+
+typedef struct event_def_ {
+ ulong event;
+ char *name;
+ char *format;
+ boolean selected;
+ boolean is_clib;
+ char pad[2];
+} event_def_t;
+
+event_def_t *find_event_definition (ulong code);
+
+event_def_t g_eventdefs[NEVENTS];
+
+/*
+ * config params
+ */
+int c_maxpointsel; /* max # points shown in selector dlg */
+gint c_view1_draw_width;
+gint c_view1_draw_height;
+
+/*
+ * menu1.c
+ */
+
+void menu1_init(void);
+void modal_dialog (char *label_text, char *retry_text, char *default_value,
+ boolean (*cb)(char *));
+void infobox(char *label_text, char *text);
+/*
+ * view1.c
+ */
+GdkFont *g_font;
+GdkColor fg_black, bg_white;
+void view1_init(void);
+void view1_display(void);
+void view1_read_events_callback(void);
+void view1_display_when_idle(void);
+void view1_print_callback(GtkToggleButton *item, gpointer data);
+void view1_about(char *);
+void set_pid_ax_width(int width);
+void set_window_title(const char *filename);
+
+enum view1_tbox_fn {
+ TBOX_DRAW_BOXED = 1, /* note: order counts */
+ TBOX_DRAW_EVENT,
+ TBOX_DRAW_PLAIN,
+ TBOX_PRINT_BOXED,
+ TBOX_PRINT_EVENT,
+ TBOX_PRINT_PLAIN, /* end restriction */
+ TBOX_GETRECT_BOXED,
+ TBOX_GETRECT_EVENT,
+ TBOX_GETRECT_PLAIN,
+};
+
+enum view1_line_fn {
+ LINE_DRAW_BLACK = 1,
+ LINE_DRAW_WHITE,
+ LINE_PRINT,
+};
+
+GdkRectangle *tbox (char *s, int x, int y, enum view1_tbox_fn function);
+void line (int x1, int y1, int x2, int y2, enum view1_line_fn function);
+gint view1_handle_key_press_event (GtkWidget *widget, GdkEventKey *event);
+
+/*
+ * events.c
+ */
+
+void events_about (char *);
+
+typedef struct raw_event {
+ unsigned long time[2];
+ unsigned long pid;
+ unsigned long code;
+ unsigned long datum;
+} raw_event_t;
+
+void event_init(void);
+char *mapfile (char *file, ulong *sizep);
+boolean unmapfile (char *addr, ulong size);
+void read_events (char *);
+int find_event_index (ulonglong t);
+int read_cpel_file(char *file);
+int read_clib_file(char *file);
+void cpel_event_init(ulong);
+void add_event_from_cpel_file(ulong, char * , char *);
+void add_event_from_clib_file(unsigned int event, char *name,
+ unsigned int vec_index);
+void add_cpel_event(ulonglong delta, ulong, ulong, ulong);
+void add_clib_event(double delta, unsigned short track,
+ unsigned short event, unsigned int index);
+void cpel_event_finalize(void);
+void *get_clib_event (unsigned int datum);
+
+typedef struct pid_data {
+ struct pid_data *next;
+ ulong pid_value; /* The actual pid value */
+ ulong pid_index; /* Index in pid sort order */
+} pid_data_t;
+
+#define EVENT_FLAG_SELECT 0x00000001 /* This event is selected */
+#define EVENT_FLAG_SEARCHRSLT 0x00000002 /* This event is the search rslt */
+#define EVENT_FLAG_CLIB 0x00000004 /* clib event */
+
+typedef struct pid_sort {
+ struct pid_data *pid;
+ ulong pid_value;
+ /*
+ * This is a bit of a hack, since this is used only by the view:
+ */
+ unsigned color_index;
+ int selected;
+} pid_sort_t;
+
+typedef struct event {
+ ulonglong time;
+ ulong code;
+ pid_data_t *pid;
+ ulong datum;
+ ulong flags;
+} event_t;
+
+
+boolean g_little_endian;
+event_t *g_events;
+ulong g_nevents;
+pid_sort_t *g_pids;
+pid_sort_t *g_original_pids;
+int g_npids;
+pid_data_t *g_pid_data_list;
+
+#define PIDHASH_NBUCKETS 20021 /* Should be prime */
+
+boolean ticks_per_ns_set;
+double ticks_per_ns;
+
+/*
+ * version.c
+ */
+const char *version_string;
+const char *minor_v_string;
+
+/*
+ * cpel.c
+ */
+char *get_track_label(unsigned long);
+int widest_track_format;
+char *strtab_ref(unsigned long);
diff --git a/src/tools/g2/g2version.c b/src/tools/g2/g2version.c
new file mode 100644
index 00000000..4b6f9313
--- /dev/null
+++ b/src/tools/g2/g2version.c
@@ -0,0 +1,19 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const char *version_string = "G2 (x86_64 GNU/Linux) major version 3.0";
+const char *minor_v_string =
+ "Built Wed Feb 3 10:58:12 EST 2016";
diff --git a/src/tools/g2/main.c b/src/tools/g2/main.c
new file mode 100644
index 00000000..1ec7983a
--- /dev/null
+++ b/src/tools/g2/main.c
@@ -0,0 +1,199 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "g2.h"
+#include "props.h"
+#include <pwd.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <vppinfra/mem.h>
+
+/*
+ * globals
+ */
+
+GtkWidget *g_mainwindow; /* The main window */
+
+/* Graphical object heirarchy
+ *
+ * [main window]
+ * [main vbox]
+ * [main (e.g. file) menubar]
+ * [view hbox]
+ * [view bottom menu]
+ */
+
+GtkWidget *g_mainvbox;
+GtkWidget *g_mainhbox;
+
+gint delete_event(GtkWidget *widget, GdkEvent *event, gpointer data)
+{
+ /* Allow window to be destroyed */
+ return(FALSE);
+}
+
+void destroy(GtkWidget *widget, gpointer data)
+{
+ gtk_main_quit();
+}
+
+int main (int argc, char **argv)
+{
+ char tmpbuf [128];
+ struct passwd *pw;
+ char *event_file = 0;
+ char *cpel_file = 0;
+ char *clib_file =0;
+ char *title = "none";
+ int curarg=1;
+ char *homedir;
+
+ clib_mem_init (0, ((uword)3<<30));
+
+ gtk_init(&argc, &argv);
+
+ homedir = getenv ("HOME");
+ tmpbuf[0] = 0;
+
+ if (homedir) {
+ sprintf(tmpbuf, "%s/.g2", homedir);
+ } else {
+ pw = getpwuid(geteuid());
+ if (pw) {
+ sprintf(tmpbuf, "%s/.g2", pw->pw_dir);
+ }
+ }
+ if (tmpbuf[0])
+ readprops(tmpbuf);
+
+ g_mainwindow = gtk_window_new (GTK_WINDOW_TOPLEVEL);
+
+ gtk_signal_connect (GTK_OBJECT(g_mainwindow), "delete_event",
+ GTK_SIGNAL_FUNC (delete_event), NULL);
+
+ gtk_signal_connect (GTK_OBJECT(g_mainwindow), "destroy",
+ GTK_SIGNAL_FUNC (destroy), NULL);
+
+ gtk_container_set_border_width(GTK_CONTAINER(g_mainwindow), 5);
+
+ g_mainvbox = gtk_vbox_new(FALSE, 0);
+ g_mainhbox = gtk_hbox_new(FALSE, 0);
+
+ /*
+ * init routines
+ */
+
+ menu1_init();
+ point_selector_init();
+ view1_init();
+ event_init();
+
+ /*
+ * Now that we're ready to rock 'n roll, see if we've been asked to
+ * press a few buttons...
+ */
+
+ while (curarg < argc) {
+ if (!strncmp(argv[curarg], "--cpel-input", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ cpel_file = argv[curarg];
+ curarg++;
+ break;
+ }
+ g_error("Missing filename after --cpel-input");
+ }
+ if (!strncmp(argv[curarg], "--clib-input", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ clib_file = argv[curarg];
+ curarg++;
+ break;
+ }
+ g_error("Missing filename after --cpel-input");
+ }
+
+ if (!strncmp(argv[curarg], "--pointdefs", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ read_event_definitions(argv[curarg]);
+ curarg++;
+ continue;
+ }
+ g_error ("Missing filename after --pointdefs\n");
+ }
+ if (!strncmp(argv[curarg], "--event-log", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ event_file = argv[curarg];
+ curarg++;
+ continue;
+ }
+ g_error ("Missing filename after --event-log\n");
+ }
+
+ if (!strncmp(argv[curarg], "--ticks-per-us", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ ticks_per_ns = 0.0;
+ ticks_per_ns = atof(argv[curarg]);
+ if (ticks_per_ns == 0.0) {
+ g_error("ticks-per-ns (%s) didn't convert properly\n",
+ argv[curarg]);
+ }
+ ticks_per_ns_set = TRUE;
+ curarg++;
+ continue;
+ }
+ g_error ("Missing filename after --event-log\n");
+ }
+
+ fprintf(stderr,
+ "g2 [--pointdefs <filename>] [--event-log <filename>]\n");
+ fprintf(stderr, " [--ticks-per-us <value>]\n");
+ fprintf(stderr,
+ " [--cpel-input <filename>] [--clib-input <filename]>\n");
+ fprintf(stderr,
+ "%s\n%s\n", version_string, minor_v_string);
+ exit(0);
+ }
+
+ if (clib_file) {
+ read_clib_file (clib_file);
+ title = clib_file;
+ } else if (cpel_file) {
+ read_cpel_file(cpel_file);
+ title = cpel_file;
+ } else if (event_file) {
+ read_events(event_file);
+ title = event_file;
+ }
+
+ set_window_title(title);
+
+ gtk_signal_connect (GTK_OBJECT (g_mainwindow), "key_press_event",
+ (GtkSignalFunc) view1_handle_key_press_event, NULL);
+ gtk_container_add(GTK_CONTAINER(g_mainvbox), g_mainhbox);
+ gtk_widget_show(g_mainhbox);
+ gtk_container_add(GTK_CONTAINER(g_mainwindow), g_mainvbox);
+ gtk_widget_show(g_mainvbox);
+ gtk_widget_show(g_mainwindow);
+
+ gtk_main();
+ return(0);
+}
diff --git a/src/tools/g2/menu1.c b/src/tools/g2/menu1.c
new file mode 100644
index 00000000..fce81fa6
--- /dev/null
+++ b/src/tools/g2/menu1.c
@@ -0,0 +1,565 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <gtk/gtk.h>
+#define GTK_ENABLE_BROKEN // DGMS
+#include <gtk/gtktext.h>
+#include <stdlib.h>
+#include "g2.h"
+#include <string.h>
+
+/*
+ * locals
+ */
+static GtkWidget *s_mainmenubar;
+static GtkWidget *s_filemenu;
+static GtkWidget *s_readdefs;
+static GtkWidget *s_readevents;
+static GtkWidget *s_readeventsclock;
+static GtkWidget *s_readcpel;
+static GtkWidget *s_readclib;
+static GtkWidget *s_print;
+static GtkWidget *s_quit;
+
+static GtkWidget *s_mainfilemenu;
+static GtkWidget *s_help_general;
+static GtkWidget *s_help_about;
+static GtkWidget *s_mainhelpmenu;
+static GtkWidget *s_helpmenu;
+
+static GtkWidget *s_filesel;
+static GtkWidget *s_eventsel;
+
+typedef struct md_ {
+ GtkWidget *entry;
+ GtkWidget *label;
+ GtkWidget *dialog;
+ boolean (*callback)(char *);
+ char *retry_text;
+} md_t;
+
+char *general_help = "\n"
+"G2 is a performance event visualization tool.\n"
+"\n"
+"To view CPEL-format event data:\n"
+"g2 --cpel <filename>\n"
+"or use the File Menu->Read CPEL file option.\n"
+"\n"
+"To view vppinfra-format (.../open-repo/vppinfra/vppinfra/elog.h) event data:\n"
+"g2 --clib <filename>\n"
+"or use the File Menu->Read clib file option.\n"
+"\n"
+"To toggle event detail boxes, left-mouse-click on an event.\n"
+"\n"
+"To zoom to an area, depress the left mouse button. Move the\n"
+"mouse. Release the mouse.\n"
+"\n"
+"To use the time ruler, depress the right mouse button. Move the\n"
+"mouse. Release when done.\n"
+"\n"
+"To push a track to the bottom, <ctrl><left-mouse>\n"
+"\n"
+"To pull a track to the top, <shift><left-mouse>\n"
+"\n"
+"To selectively color/uncolor a track, <ctrl><shift><left-mouse>\n"
+"\n"
+"To make the mouse scrollwheel faster, press <shift>\n"
+"\n"
+"Hotkeys, supposedly Quake-like:\n"
+" w - zoom-in\n"
+" s - zoom-out\n"
+" a - pan-left\n"
+" d - pan-right\n"
+" r - pan-up\n"
+" f - pan-down\n"
+" t - less traces\n"
+" g - more traces\n"
+"\n"
+" e - toggle summary-mode\n"
+" c - toggle color-mode\n"
+"\n"
+" x - take snapshot\n"
+" z - go to next snapshot\n"
+" p - put snapshots to snapshots.g2 \n"
+" l - load snapshots from snapshots.g2\n"
+"\n"
+"<ctrl>q - quit\n"
+"Send comments / bug reports to the \"fd.io\" mailing list.\n";
+
+/****************************************************************************
+* debug_dialog_callback
+****************************************************************************/
+
+boolean debug_dialog_callback (char *s)
+{
+ g_print("Dialog result: %s", s);
+ return (TRUE);
+}
+
+/****************************************************************************
+* get_dialog_value
+****************************************************************************/
+
+static void get_dialog_value (GtkWidget *dialog, gpointer user_data)
+{
+ md_t *md = (md_t *)user_data;
+ char * cb_arg;
+
+ cb_arg = (char *) gtk_entry_get_text(GTK_ENTRY(md->entry));
+
+ if ((*md->callback)(cb_arg)) {
+ gtk_grab_remove(md->dialog);
+ gtk_widget_destroy(md->dialog);
+ } else {
+ gtk_label_set_text (GTK_LABEL(md->label), md->retry_text);
+ }
+}
+
+/****************************************************************************
+* modal_dialog
+****************************************************************************/
+
+void modal_dialog (char *label_text, char *retry_text, char *default_value,
+ boolean (*cb)(char *))
+{
+ GtkWidget *dialog, *label, *ok_button, *entry;
+ static md_t dlg;
+ md_t *md = &dlg;
+
+ dialog = gtk_dialog_new();
+ label = gtk_label_new(label_text);
+
+ entry = gtk_entry_new();
+ if (default_value)
+ gtk_entry_set_text(GTK_ENTRY(entry), default_value);
+
+ ok_button = gtk_button_new_with_label("OK");
+
+ md->entry = entry;
+ md->label = label;
+ md->retry_text = retry_text;
+ md->dialog = dialog;
+ if (cb)
+ md->callback = cb;
+ else
+ md->callback = debug_dialog_callback;
+
+ gtk_signal_connect (GTK_OBJECT (ok_button), "clicked",
+ GTK_SIGNAL_FUNC(get_dialog_value), (gpointer) md);
+
+ gtk_signal_connect (GTK_OBJECT (entry), "activate",
+ GTK_SIGNAL_FUNC(get_dialog_value), (gpointer) md);
+
+ gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->action_area),
+ entry);
+
+ gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->action_area),
+ ok_button);
+ gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label);
+ gtk_widget_show_all(dialog);
+ gtk_widget_grab_focus(entry);
+ gtk_grab_add(dialog);
+}
+
+/****************************************************************************
+* get_eventdef_name
+****************************************************************************/
+
+static void get_eventdef_name (GtkFileSelection *sel, gpointer user_data)
+{
+ char *filename = (char *) gtk_file_selection_get_filename (
+ GTK_FILE_SELECTION(s_filesel));
+ read_event_definitions(filename);
+ set_window_title(filename);
+}
+
+/****************************************************************************
+* read_eventdef_callback
+****************************************************************************/
+
+static void read_eventdef_callback(GtkToggleButton *item, gpointer data)
+{
+
+ s_filesel = gtk_file_selection_new("Read Event Definitions From...");
+
+ gtk_file_selection_set_filename(GTK_FILE_SELECTION(s_filesel),
+ "../h/elog.h");
+
+ gtk_signal_connect (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC(get_eventdef_name), NULL);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->cancel_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+ gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(s_filesel));
+ gtk_widget_show (s_filesel);
+}
+
+/****************************************************************************
+* get_events_name
+****************************************************************************/
+
+static void get_events_name (GtkFileSelection *sel, gpointer user_data)
+{
+ char *filename = (char *) gtk_file_selection_get_filename (
+ GTK_FILE_SELECTION(s_eventsel));
+ read_events(filename);
+ view1_display_when_idle();
+}
+
+
+/****************************************************************************
+* get_ticks_per_ns
+****************************************************************************/
+
+static boolean get_ticks_per_ns (char *value)
+{
+ double rv;
+
+ rv = atof (value);
+
+ if (rv == 0.0 || rv > 100000)
+ return(FALSE);
+
+ ticks_per_ns = rv;
+ ticks_per_ns_set = TRUE;
+
+ gtk_widget_show(s_eventsel);
+ return(TRUE);
+}
+
+/****************************************************************************
+* read_events_callback
+****************************************************************************/
+
+static void read_events_callback(GtkToggleButton *item, gpointer data)
+{
+ char tmpbuf [32];
+
+ s_eventsel = gtk_file_selection_new("Read Events From...");
+
+ gtk_signal_connect (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_eventsel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC(get_events_name), NULL);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_eventsel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_eventsel);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_eventsel)->cancel_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_eventsel);
+ gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(s_eventsel));
+
+ if (ticks_per_ns_set)
+ gtk_widget_show (s_eventsel);
+ else {
+ sprintf(tmpbuf, "%.3f", ticks_per_ns);
+ modal_dialog ("Please enter clock ticks per nanosecond",
+ "Invalid: Please enter clock ticks per nanosecond",
+ tmpbuf, get_ticks_per_ns);
+ }
+}
+
+/****************************************************************************
+* read_eventclock_callback
+****************************************************************************/
+
+static void read_eventsclock_callback(GtkToggleButton *item, gpointer data)
+{
+ ticks_per_ns_set = FALSE;
+ read_events_callback(item, data);
+}
+
+/****************************************************************************
+* infobox_size_request
+****************************************************************************/
+
+void infobox_size_request (GtkWidget *widget, GtkRequisition *req,
+ gpointer user_data)
+{
+ char *text = (char *)user_data;
+ char *cp;
+ int widest_line_in_chars;
+ int w;
+ int nlines;
+
+ /*
+ * You'd think that the string extent function would work here.
+ * You'd be wrong.
+ */
+ nlines = w = widest_line_in_chars = 0;
+ for (cp = text; *cp; cp++) {
+ if (*cp == '\n') {
+ if (w > widest_line_in_chars) {
+ widest_line_in_chars = w;
+ }
+ w = 0;
+ nlines++;
+ }
+ w++;
+ }
+
+ nlines++;
+
+ req->width = (widest_line_in_chars * 8) + 20;
+ req->height = (nlines * 13) + 10;
+}
+
+/****************************************************************************
+* infobox
+****************************************************************************/
+
+void infobox(char *label_text, char *text)
+{
+ GtkWidget *dialog, *label, *ok_button, *entry;
+ GtkWidget *box;
+
+ dialog = gtk_dialog_new();
+ label = gtk_label_new(label_text);
+
+ entry = gtk_text_new(NULL, NULL);
+
+ gtk_signal_connect (GTK_OBJECT (entry), "size-request",
+ GTK_SIGNAL_FUNC(infobox_size_request),
+ (gpointer) text);
+
+ gtk_text_insert(GTK_TEXT(entry), g_font, &fg_black, &bg_white,
+ text, -1);
+
+ gtk_text_set_editable(GTK_TEXT(entry), FALSE);
+
+ ok_button = gtk_button_new_with_label("OK");
+
+ gtk_signal_connect_object (GTK_OBJECT (ok_button), "clicked",
+ GTK_SIGNAL_FUNC(gtk_widget_destroy),
+ (gpointer) GTK_OBJECT(dialog));
+
+ box = gtk_vbox_new(FALSE, 5);
+
+
+ gtk_box_pack_start(GTK_BOX(box), entry, TRUE, TRUE, 0);
+ gtk_box_pack_start(GTK_BOX(box), ok_button, FALSE, FALSE, 0);
+
+ gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->action_area),
+ box);
+
+ gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label);
+ gtk_widget_show_all(dialog);
+}
+
+/****************************************************************************
+* help_general_callback
+****************************************************************************/
+
+static void help_general_callback(GtkToggleButton *item, gpointer data)
+{
+ infobox("General Help", general_help);
+}
+
+/****************************************************************************
+* help_about_callback
+****************************************************************************/
+
+static void help_about_callback(GtkToggleButton *item, gpointer data)
+{
+ char tmpbuf [1024];
+ sprintf (tmpbuf, "G2 -- Graphical Event Viewer\n\n");
+ view1_about(tmpbuf);
+ pointsel_about(tmpbuf);
+ events_about(tmpbuf);
+ sprintf (tmpbuf+strlen(tmpbuf), "\n%s\n", version_string);
+ sprintf (tmpbuf+strlen(tmpbuf), "%s\n", minor_v_string);
+ infobox("About", tmpbuf);
+}
+
+
+/****************************************************************************
+* get_cpel_name
+****************************************************************************/
+
+static void get_cpel_name (GtkFileSelection *sel, gpointer user_data)
+{
+ char *filename = (char *)gtk_file_selection_get_filename (
+ GTK_FILE_SELECTION(s_filesel));
+ read_cpel_file(filename);
+ set_window_title(filename);
+}
+
+/****************************************************************************
+* get_clib_name
+****************************************************************************/
+
+static void get_clib_name (GtkFileSelection *sel, gpointer user_data)
+{
+ char *filename = (char *) gtk_file_selection_get_filename (
+ GTK_FILE_SELECTION(s_filesel));
+ read_clib_file(filename);
+ set_window_title(filename);
+}
+
+/****************************************************************************
+* read_cpel_callback
+****************************************************************************/
+
+static void read_cpel_callback(GtkToggleButton *item, gpointer data)
+{
+
+ s_filesel = gtk_file_selection_new("Read CPEL data from...");
+
+ gtk_file_selection_set_filename(GTK_FILE_SELECTION(s_filesel),
+ "cpel.out");
+
+ gtk_signal_connect (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC(get_cpel_name), NULL);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->cancel_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+ gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(s_filesel));
+ gtk_widget_show (s_filesel);
+}
+
+/****************************************************************************
+* read_clib_callback
+****************************************************************************/
+
+static void read_clib_callback(GtkToggleButton *item, gpointer data)
+{
+
+ s_filesel = gtk_file_selection_new("Read clib data From...");
+
+ gtk_file_selection_set_filename(GTK_FILE_SELECTION(s_filesel),
+ "clib.out");
+
+ gtk_signal_connect (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC(get_clib_name), NULL);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->ok_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+
+ gtk_signal_connect_object (GTK_OBJECT (
+ GTK_FILE_SELECTION(s_filesel)->cancel_button),
+ "clicked",
+ GTK_SIGNAL_FUNC (gtk_widget_destroy),
+ (gpointer) s_filesel);
+ gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(s_filesel));
+ gtk_widget_show (s_filesel);
+}
+
+/****************************************************************************
+* menu1_init
+****************************************************************************/
+
+void menu1_init(void)
+{
+
+ s_filemenu = gtk_menu_new();
+
+ s_readcpel = gtk_menu_item_new_with_label
+ ("Read CPEL file");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_readcpel);
+ gtk_signal_connect(GTK_OBJECT(s_readcpel), "activate",
+ GTK_SIGNAL_FUNC(read_cpel_callback), 0);
+
+ s_readclib = gtk_menu_item_new_with_label
+ ("Read CLIB file");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_readclib);
+ gtk_signal_connect(GTK_OBJECT(s_readclib), "activate",
+ GTK_SIGNAL_FUNC(read_clib_callback), 0);
+
+ s_readdefs = gtk_menu_item_new_with_label ("Read Event Definitions");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_readdefs);
+ gtk_signal_connect(GTK_OBJECT(s_readdefs), "activate",
+ GTK_SIGNAL_FUNC(read_eventdef_callback), 0);
+
+ s_readevents = gtk_menu_item_new_with_label ("Read Event Log");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_readevents);
+ gtk_signal_connect(GTK_OBJECT(s_readevents), "activate",
+ GTK_SIGNAL_FUNC(read_events_callback), 0);
+
+ s_readeventsclock = gtk_menu_item_new_with_label
+ ("Read Event Log with Different Clock Rate");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_readeventsclock);
+ gtk_signal_connect(GTK_OBJECT(s_readeventsclock), "activate",
+ GTK_SIGNAL_FUNC(read_eventsclock_callback), 0);
+
+ s_print = gtk_menu_item_new_with_label ("Print");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_print);
+ gtk_signal_connect(GTK_OBJECT(s_print), "activate",
+ GTK_SIGNAL_FUNC(view1_print_callback), 0);
+
+ s_quit = gtk_menu_item_new_with_label ("Exit");
+ gtk_menu_append(GTK_MENU(s_filemenu), s_quit);
+ gtk_signal_connect(GTK_OBJECT(s_quit), "activate",
+ GTK_SIGNAL_FUNC(gtk_main_quit), 0);
+
+ s_mainfilemenu = gtk_menu_item_new_with_label("File");
+ gtk_menu_item_set_submenu(GTK_MENU_ITEM(s_mainfilemenu), s_filemenu);
+
+ s_helpmenu = gtk_menu_new();
+
+ s_help_general = gtk_menu_item_new_with_label ("General");
+ gtk_menu_append(GTK_MENU(s_helpmenu), s_help_general);
+ gtk_signal_connect(GTK_OBJECT(s_help_general), "activate",
+ GTK_SIGNAL_FUNC(help_general_callback), 0);
+
+ s_help_about = gtk_menu_item_new_with_label ("About");
+ gtk_menu_append(GTK_MENU(s_helpmenu), s_help_about);
+ gtk_signal_connect(GTK_OBJECT(s_help_about), "activate",
+ GTK_SIGNAL_FUNC(help_about_callback), 0);
+
+ s_mainhelpmenu = gtk_menu_item_new_with_label("Help");
+ gtk_menu_item_set_submenu(GTK_MENU_ITEM(s_mainhelpmenu), s_helpmenu);
+
+ s_mainmenubar = gtk_menu_bar_new();
+ gtk_menu_bar_append(GTK_MENU_BAR(s_mainmenubar), s_mainfilemenu);
+ gtk_menu_bar_append(GTK_MENU_BAR(s_mainmenubar), s_mainhelpmenu);
+ gtk_widget_show_all(s_mainmenubar);
+
+ gtk_box_pack_start(GTK_BOX(g_mainvbox), s_mainmenubar, FALSE, FALSE, 0);
+}
diff --git a/src/tools/g2/mkversion.c b/src/tools/g2/mkversion.c
new file mode 100644
index 00000000..3523fbe6
--- /dev/null
+++ b/src/tools/g2/mkversion.c
@@ -0,0 +1,77 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 1997-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+int main (int argc, char **argv)
+{
+ time_t now;
+ FILE *ofp;
+ char *dateval;
+ char *username;
+ char *userstr;
+ char *datestr;
+ int i;
+ char propname[32];
+ char *propvalue;
+ char timestr[64];
+ char *cp;
+
+ if (argc < 4) {
+ printf ("usage: mkversion ostype version outputfile\n");
+ exit (1);
+ }
+
+ ofp = fopen (argv[3], "w");
+ if (ofp == NULL) {
+ printf ("Couldn't create %s\n", argv[3]);
+ exit (1);
+ }
+
+ now = time (0);
+
+ fprintf (ofp, "/*\n");
+ fprintf (ofp, " * G2 Version Stamp, %s",
+ ctime (&now));
+ fprintf (ofp, " * Automatically generated, hand edits are pointless.\n");
+ fprintf (ofp, " */\n\n");
+
+ fprintf (ofp,
+ "const char *version_string = \"G2 (%s) major version %s\";\n",
+ argv[1], argv[2]);
+
+ username = (char *) cuserid (0);
+
+ strcpy(timestr, ctime(&now));
+
+ cp = timestr;
+
+ while (*cp) {
+ cp++;
+ }
+ if (*--cp == '\n')
+ *cp = 0;
+
+ fprintf (ofp,
+ "const char *minor_v_string = \"Built by %s at %s\";\n",
+ username, timestr);
+
+ exit (0);
+}
+
+
diff --git a/src/tools/g2/pointsel.c b/src/tools/g2/pointsel.c
new file mode 100644
index 00000000..018dc213
--- /dev/null
+++ b/src/tools/g2/pointsel.c
@@ -0,0 +1,854 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <ctype.h>
+#include <string.h>
+#include <gtk/gtk.h>
+#include "g2.h"
+
+/*
+ * globals
+ */
+event_def_t g_eventdefs[NEVENTS];
+
+/*
+ * locals
+ */
+static GtkWidget *s_pointselbox;
+static FILE *s_hfp;
+static FILE *s_elog_hfp;
+static int s_basenum;
+static GtkWidget *s_event_buttons[NEVENTS];
+static int s_min_shown_pointsel;
+static int s_max_shown_pointsel;
+static GtkWidget *s_allbutton;
+static GtkWidget *s_nonebutton;
+static GtkWidget *s_pointselbuttons;
+static GtkWidget *s_ps_vscroll;
+static GtkObject *s_ps_vsadj;
+static int g_neventdefs;
+
+enum button_click {
+ ALL_BUTTON=1,
+ NONE_BUTTON,
+};
+
+/*
+ * config params
+ */
+int c_maxpointsel;
+
+/****************************************************************************
+* recompute_vscrollbar
+****************************************************************************/
+
+static void recompute_ps_vscrollbar (void)
+{
+ GtkAdjustment *adj;
+ ulong limit;
+
+ adj = GTK_ADJUSTMENT(s_ps_vsadj);
+
+#ifdef NOTDEF
+ /* This seems like the right calculation, but seems not to work */
+ if (g_neventdefs > c_maxpointsel)
+ limit = g_neventdefs - c_maxpointsel;
+ else
+ limit = g_neventdefs;
+#else
+ limit = g_neventdefs-1;
+#endif
+
+ adj->lower = (gfloat)0.00;
+ adj->upper = (gfloat)limit;
+ adj->value = (gfloat)0.00;
+ adj->step_increment = (gfloat)1.00;
+ adj->page_increment = (gfloat)(c_maxpointsel / 3);
+ adj->page_size = (gfloat)c_maxpointsel;
+ gtk_adjustment_changed(adj);
+ gtk_adjustment_value_changed(adj);
+ gtk_widget_show(s_ps_vscroll);
+}
+
+/****************************************************************************
+* point_select_callback
+****************************************************************************/
+
+static void point_select_callback(GtkToggleButton *item, gpointer data)
+{
+ int i = (int) (unsigned long long) data;
+
+ g_eventdefs[i].selected = gtk_toggle_button_get_active(
+ GTK_TOGGLE_BUTTON(s_event_buttons[i]));
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* up_button
+****************************************************************************/
+
+static void up_button(void)
+{
+ int i;
+ int increment = c_maxpointsel/4;
+
+ if (s_min_shown_pointsel == 0)
+ return;
+
+ s_min_shown_pointsel -= increment;
+
+ if (s_min_shown_pointsel < 0)
+ s_min_shown_pointsel = 0;
+
+ s_max_shown_pointsel = s_min_shown_pointsel + c_maxpointsel;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ if (i >= s_min_shown_pointsel &&
+ i <= s_max_shown_pointsel)
+ gtk_widget_show(s_event_buttons[i]);
+ else
+ gtk_widget_hide(s_event_buttons[i]);
+ }
+
+}
+
+#ifdef NOTDEF
+/****************************************************************************
+* down_button
+****************************************************************************/
+
+static void down_button(void)
+{
+ int i;
+ int increment = c_maxpointsel/4;
+
+ if (s_max_shown_pointsel == g_neventdefs)
+ return;
+
+ s_max_shown_pointsel += increment;
+
+ if (s_max_shown_pointsel >= g_neventdefs)
+ s_max_shown_pointsel = (g_neventdefs-1);
+
+ s_min_shown_pointsel = s_max_shown_pointsel - c_maxpointsel;
+
+ if (s_min_shown_pointsel < 0)
+ s_min_shown_pointsel = 0;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ if (i >= s_min_shown_pointsel &&
+ i <= s_max_shown_pointsel)
+ gtk_widget_show(s_event_buttons[i]);
+ else
+ gtk_widget_hide(s_event_buttons[i]);
+ }
+
+}
+#endif
+
+/****************************************************************************
+* button_click_callback
+****************************************************************************/
+
+static void button_click_callback(GtkButton *item, gpointer data)
+{
+ int i;
+ enum button_click click = (enum button_click)data;
+
+ switch (click) {
+ case ALL_BUTTON:
+ for (i = 0; i < g_neventdefs; i++) {
+ gtk_toggle_button_set_active (
+ GTK_TOGGLE_BUTTON(s_event_buttons[i]), TRUE);
+ g_eventdefs[i].selected = TRUE;
+ }
+ break;
+
+ case NONE_BUTTON:
+ for (i = 0; i < g_neventdefs; i++) {
+ gtk_toggle_button_set_active (
+ GTK_TOGGLE_BUTTON(s_event_buttons[i]), FALSE);
+ g_eventdefs[i].selected = FALSE;
+ }
+ break;
+ }
+}
+
+/****************************************************************************
+* scroll_callback
+****************************************************************************/
+
+static void scroll_callback (GtkAdjustment *adj, GtkWidget *notused)
+{
+ int i;
+
+ s_min_shown_pointsel = (int)adj->value;
+ s_max_shown_pointsel = s_min_shown_pointsel + c_maxpointsel;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ if (i >= s_min_shown_pointsel &&
+ i <= s_max_shown_pointsel)
+ gtk_widget_show(s_event_buttons[i]);
+ else
+ gtk_widget_hide(s_event_buttons[i]);
+ }
+}
+
+/****************************************************************************
+* point_selector_init
+****************************************************************************/
+
+void point_selector_init(void)
+{
+
+ c_maxpointsel = atol(getprop_default("event_selector_lines", "20"));
+
+ s_pointselbox = gtk_vbox_new(FALSE,5);
+
+ s_pointselbuttons = gtk_hbox_new(FALSE,5);
+
+ s_allbutton = gtk_button_new_with_label("ALL");
+ gtk_widget_show(s_allbutton);
+ s_nonebutton = gtk_button_new_with_label("NONE");
+ gtk_widget_show(s_nonebutton);
+
+ gtk_signal_connect (GTK_OBJECT(s_allbutton), "clicked",
+ GTK_SIGNAL_FUNC(button_click_callback),
+ (gpointer) ALL_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_nonebutton), "clicked",
+ GTK_SIGNAL_FUNC(button_click_callback),
+ (gpointer) NONE_BUTTON);
+
+ gtk_box_pack_start(GTK_BOX(s_pointselbuttons), s_allbutton, FALSE,
+ FALSE, 0);
+ gtk_box_pack_start(GTK_BOX(s_pointselbuttons), s_nonebutton, FALSE,
+ FALSE, 0);
+
+ gtk_widget_show(s_pointselbuttons);
+ gtk_widget_ref(s_pointselbuttons);
+
+ gtk_box_pack_start(GTK_BOX(s_pointselbox), s_pointselbuttons, FALSE,
+ FALSE, 0);
+
+ gtk_box_pack_end (GTK_BOX(g_mainhbox), s_pointselbox,
+ FALSE, FALSE, 0);
+
+ s_ps_vsadj = gtk_adjustment_new(0.0 /* initial value */,
+ 0.0 /* minimum value */,
+ 2000.0 /* maximum value */,
+ 0.1 /* step increment */,
+ 10.0/* page increment */,
+ 10.0/* page size */);
+
+ s_ps_vscroll = gtk_vscrollbar_new (GTK_ADJUSTMENT(s_ps_vsadj));
+ gtk_signal_connect (GTK_OBJECT (s_ps_vsadj), "value-changed",
+ GTK_SIGNAL_FUNC (scroll_callback),
+ (gpointer)s_ps_vscroll);
+ gtk_box_pack_end(GTK_BOX(g_mainhbox), s_ps_vscroll, FALSE, FALSE, 0);
+}
+
+/****************************************************************************
+* sxerox
+****************************************************************************/
+
+char *sxerox (char *s)
+{
+ char *rv;
+
+ /* Note: g_malloc does or dies... */
+ rv = (char *)g_malloc(strlen(s)+1);
+ strcpy (rv, s);
+ return (rv);
+}
+
+/****************************************************************************
+* reset_point_selector
+****************************************************************************/
+
+static void reset_point_selector(void)
+{
+ int i;
+
+ gtk_widget_hide(s_pointselbox);
+ gtk_widget_hide(s_pointselbuttons);
+ gtk_widget_hide(s_ps_vscroll);
+ gtk_container_remove(GTK_CONTAINER(s_pointselbox),
+ s_pointselbuttons);
+
+ for (i = 0; i < g_neventdefs; i++) {
+ if (s_event_buttons[i]) {
+ gtk_container_remove(GTK_CONTAINER(s_pointselbox),
+ s_event_buttons[i]);
+ s_event_buttons[i] = 0;
+ }
+ }
+}
+
+/****************************************************************************
+* create_point_selector
+****************************************************************************/
+
+static void create_point_selector(void)
+{
+ int i;
+ char tmpbuf [1024];
+ event_def_t *ep;
+ GtkWidget *wp;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ ep = &g_eventdefs[i];
+ sprintf(tmpbuf, "[%lu] %s", ep->event,
+ ep->name ? ep->name : "(none)");
+ /* Hack to reduce width of point selectors */
+ if (strlen(tmpbuf) > 50) {
+ tmpbuf[50] = 0;
+ }
+
+ wp = gtk_check_button_new_with_label (tmpbuf);
+ s_event_buttons[i] = wp;
+ gtk_signal_connect (GTK_OBJECT(wp), "toggled",
+ GTK_SIGNAL_FUNC(point_select_callback),
+ (gpointer) (unsigned long long) i);
+ gtk_toggle_button_set_active (
+ GTK_TOGGLE_BUTTON(wp), TRUE);
+ gtk_box_pack_start(GTK_BOX(s_pointselbox), wp, FALSE, FALSE, 0);
+ }
+
+ /* set up scroll parameters by faking an up-button */
+ s_min_shown_pointsel = 1;
+ up_button();
+
+ gtk_box_pack_start(GTK_BOX(s_pointselbox), s_pointselbuttons, FALSE,
+ FALSE, 0);
+ gtk_widget_show(s_pointselbuttons);
+ gtk_widget_show(s_pointselbox);
+ gtk_widget_show(s_ps_vscroll);
+}
+
+/****************************************************************************
+* remove_all_events
+****************************************************************************/
+
+static void remove_all_events(void)
+{
+ event_def_t *ep;
+ int i;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ ep = &g_eventdefs[i];
+ if (!ep->is_clib) {
+ if (ep->name)
+ g_free(ep->name);
+ if(ep->format)
+ g_free(ep->format);
+ }
+ }
+ g_neventdefs = 0;
+}
+
+/****************************************************************************
+* add_event
+****************************************************************************/
+
+static void add_event(ulong event, char *name, char *format)
+{
+ int i;
+ event_def_t *ep;
+
+ if (g_neventdefs >= NEVENTS) {
+ g_error("Too many event definitions, increase NEVENTS!");
+ /*NOTREACHED*/
+ }
+
+ /* Simple dup check, probably not needed very often */
+ for (i = 0; i < g_neventdefs; i++) {
+ if (g_eventdefs[i].event == event) {
+ g_warning("Duplicate def event %lu: first definition retained\n",
+ event);
+ return;
+ }
+ }
+
+ ep = &g_eventdefs[g_neventdefs++];
+
+ ep->event = event;
+ ep->name = sxerox(name);
+ ep->format = sxerox(format);
+ ep->selected = TRUE;
+}
+
+/****************************************************************************
+* add_event_from_cpel_file
+****************************************************************************/
+
+void add_event_from_cpel_file(ulong event, char *event_format,
+ char *datum_format)
+{
+ event_def_t *ep;
+
+ if (g_neventdefs >= NEVENTS) {
+ g_error("Too many event definitions, increase NEVENTS!");
+ /*NOTREACHED*/
+ }
+
+ ep = &g_eventdefs[g_neventdefs++];
+
+ ep->event = event;
+ /*
+ * Duplicate the strings for backward compatibility. Otherwise,
+ * the g_free above will barf because the name/format strings are
+ * actually in mmap'ed memory
+ */
+ ep->name = sxerox(event_format);
+ ep->format = sxerox(datum_format);
+ ep->selected = TRUE;
+}
+
+/****************************************************************************
+* add_event_from_clib_file
+****************************************************************************/
+
+void add_event_from_clib_file(unsigned int event, char *name,
+ unsigned int vec_index)
+{
+ event_def_t *ep;
+
+ if (g_neventdefs >= NEVENTS) {
+ g_error("Too many event definitions, increase NEVENTS!");
+ /*NOTREACHED*/
+ }
+
+ ep = &g_eventdefs[g_neventdefs++];
+
+ ep->event = event;
+
+ ep->name = sxerox(name);
+ ep->format = (void *)(unsigned long long) vec_index;
+ ep->selected = TRUE;
+ ep->is_clib = TRUE;
+}
+
+/****************************************************************************
+* read_header_file - eats header file lines of the form
+*
+* #define EVENT_FOO 123 / * name: %d * /
+*
+****************************************************************************/
+
+static void read_header_file (void)
+{
+ char tmpbuf [1024];
+ char *name, *format;
+ char *cp;
+ unsigned long event;
+ int ev_num_flag;
+
+ while (fgets (tmpbuf, sizeof (tmpbuf), s_hfp))
+ {
+ cp = tmpbuf;
+ ev_num_flag = 0;
+
+ if (strncmp (cp, "#define", 7))
+ continue;
+
+ /* skip #define */
+ while (*cp && !(isspace ((int)*cp)))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ /* skip ws after #define */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ /* skip symbolic name */
+ while (*cp && !(isspace ((int)*cp)))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ /* skip ws after symbolic name */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ event = 0;
+
+ if (!strncmp(cp, "EV_NUM", 6)) {
+ cp += 6;
+ ev_num_flag = 1;
+
+ while (*cp && *cp != '(')
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ cp++;
+
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ }
+
+ /* eat event code. */
+ while (*cp && isdigit ((int)*cp))
+ {
+ event = event * 10 + (*cp - '0');
+ cp++;
+ }
+
+ if (*cp == 0)
+ continue;
+
+ if (ev_num_flag) {
+ while (*cp && *cp != ')')
+ cp++;
+ if (*cp == 0)
+ continue;
+ cp++;
+ event += s_basenum;
+ }
+
+ /* skip ws after event code */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp != '/')
+ continue;
+
+ cp++;
+
+ if (*cp != '*')
+ continue;
+
+ cp++;
+
+ /* skip ws after comment start */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ name = cp;
+
+ /* accumulate name */
+ while (*cp && *cp != ':' && *cp != '*')
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ *cp++ = 0;
+
+ /* skip ws after name: */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0 || *cp == '/')
+ {
+ format = " ";
+ goto write_it;
+ }
+
+ format = cp;
+
+ /* accumulate format string */
+ while (*cp && !isspace ((int)*cp))
+ cp++;
+
+ *cp = 0;
+
+ write_it:
+
+ add_event (event, name, format);
+ }
+}
+
+/****************************************************************************
+* read_header_files - eats header file lines of the form
+*
+* #define FILE1_BASE 100 / * pointdefs: ../vpn/vpn_points.h * /
+*
+****************************************************************************/
+
+static boolean read_header_files (void)
+{
+ char *cp, *name;
+ char tmpbuf [1024];
+ int basenum;
+ boolean rv=FALSE;
+
+ while (fgets (tmpbuf, sizeof (tmpbuf), s_elog_hfp))
+ {
+ cp = tmpbuf;
+
+ if (strncmp (cp, "#define", 7))
+ continue;
+
+ cp += 7;
+
+ /* skip ws after #define */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ /* skip EV_COMPxxx_START */
+ while (*cp && !isspace((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ /* skip ws after EV_COMPxxx_START */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ basenum = atol (cp);
+
+ /* skip #define */
+ while (*cp && (*cp != '/'))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ cp++;
+ if (*cp != '*')
+ continue;
+
+ cp++;
+
+ /* skip ws after comment start */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ if (*cp == 0)
+ continue;
+
+ if (strncmp (cp, "pointdefs:", 10))
+ continue;
+
+ cp += 10;
+
+ /* skip ws after comment start */
+ while (*cp && isspace ((int)*cp))
+ cp++;
+
+ name = cp;
+
+ while (*cp && !isspace ((int)*cp))
+ cp++;
+
+ *cp = 0;
+
+ s_hfp = fopen (name, "rt");
+
+ if (s_hfp == NULL) {
+ g_warning ("Couldn't open header file %s\n", name);
+ continue;
+ }
+ rv = TRUE;
+
+ s_basenum = basenum;
+
+ read_header_file();
+
+ fclose (s_hfp);
+ }
+ return(rv);
+}
+
+/****************************************************************************
+* event_def_cmp
+****************************************************************************/
+
+int event_def_cmp(const void *a1, const void *a2)
+{
+ event_def_t *e1 = (event_def_t *)a1;
+ event_def_t *e2 = (event_def_t *)a2;
+
+ if (e1->event < e2->event)
+ return(-1);
+ else if (e1->event == e2->event)
+ return(0);
+ else
+ return(1);
+}
+
+/****************************************************************************
+* sort_event_definitions
+****************************************************************************/
+
+void sort_event_definitions(void)
+{
+ qsort(&g_eventdefs[0], g_neventdefs, sizeof(event_def_t), event_def_cmp);
+}
+
+static boolean remove_needed=TRUE;
+
+void finalize_events(void)
+{
+ sort_event_definitions();
+ create_point_selector();
+ recompute_ps_vscrollbar();
+ view1_display_when_idle();
+ remove_needed = TRUE;
+}
+
+void initialize_events(void)
+{
+ if (remove_needed) {
+ reset_point_selector();
+ remove_all_events();
+ remove_needed = FALSE;
+ }
+}
+
+/****************************************************************************
+* read_event_definitions
+****************************************************************************/
+
+boolean read_event_definitions (char *filename)
+{
+ char tmpbuf [128];
+
+ initialize_events();
+
+ s_elog_hfp = fopen (filename, "rt");
+ if (s_elog_hfp == NULL) {
+ sprintf (tmpbuf, "Couldn't open %s\n", filename);
+ infobox ("Open Failed", tmpbuf);
+ return(FALSE);
+ }
+ /* Presume "elog.h". Note fallthrough... */
+ if (read_header_files()) {
+ sort_event_definitions();
+ create_point_selector();
+ recompute_ps_vscrollbar();
+ fclose(s_elog_hfp);
+ view1_display_when_idle();
+ remove_needed = TRUE;
+ return(TRUE);
+ }
+ fclose(s_elog_hfp);
+
+ s_hfp = fopen (filename, "rt");
+ if (s_hfp == NULL) {
+ sprintf (tmpbuf, "Couldn't open %s\n", filename);
+ infobox ("Read Event Definition Failure", tmpbuf);
+ return(FALSE);
+ }
+
+ read_header_file();
+
+ /* Happens if the user feeds us the wrong file, for example */
+ if (g_neventdefs == 0) {
+ sprintf (tmpbuf, "No event definitions found in %s\n", filename);
+ infobox ("No Event Definitions?", tmpbuf);
+ return(FALSE);
+ }
+ finalize_events();
+ return(TRUE);
+}
+
+static event_def_t dummy_event;
+static char dummy_string[32];
+
+/****************************************************************************
+* find_event_definition
+* Binary search for first event whose time is >= t
+****************************************************************************/
+
+event_def_t *find_event_definition (ulong code)
+{
+ int index, bottom, top;
+ event_def_t *edp;
+
+ if (g_neventdefs == 0)
+ goto use_dummy;
+
+ bottom = g_neventdefs-1;
+ top = 0;
+
+ while (1) {
+ index = (bottom + top) / 2;
+
+ edp = (g_eventdefs + index);
+
+ if (edp->event == code)
+ return(edp);
+
+ if (top >= bottom) {
+ use_dummy:
+ edp = &dummy_event;
+ edp->selected = TRUE;
+ edp->event = code;
+ edp->format = "0x%x";
+ sprintf (dummy_string, "E%lu", code);
+ edp->name = &dummy_string[0];
+ return(edp);
+ }
+
+ if (edp->event < code)
+ top = index + 1;
+ else
+ bottom = index - 1;
+ }
+}
+
+/****************************************************************************
+* pointsel_next_snapshot
+* Set dialog buttons from snapshot
+****************************************************************************/
+
+void pointsel_next_snapshot(void)
+{
+ int i;
+
+ for (i = 0; i < g_neventdefs; i++) {
+ gtk_toggle_button_set_active (
+ GTK_TOGGLE_BUTTON(s_event_buttons[i]),
+ g_eventdefs[i].selected);
+ }
+}
+
+/****************************************************************************
+* pointsel_about
+****************************************************************************/
+
+void pointsel_about (char *tmpbuf)
+{
+ sprintf (tmpbuf+strlen(tmpbuf), "%d event definitions\n",
+ g_neventdefs);
+}
diff --git a/src/tools/g2/props.c b/src/tools/g2/props.c
new file mode 100644
index 00000000..a23dc050
--- /dev/null
+++ b/src/tools/g2/props.c
@@ -0,0 +1,279 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 1997-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <time.h>
+#include <gtk/gtk.h>
+#include <string.h>
+
+static char *sxerox (char *s);
+void exit(int);
+
+#define NBUCKETS 97
+
+typedef struct prop_ {
+ struct prop_ *next;
+ char *name;
+ char *value;
+} prop_t;
+
+static prop_t *buckets [NBUCKETS];
+static int hash_shifts[4] = {24, 16, 8, 0};
+
+/*
+ * getprop
+ */
+
+char *getprop (char *name)
+{
+ unsigned char *cp;
+ unsigned long hash=0;
+ prop_t *bp;
+ int i=0;
+
+ for (cp = (unsigned char *) name; *cp; cp++)
+ hash ^= (*cp)<<(hash_shifts[(i++)&0x3]);
+
+ bp = buckets [hash%NBUCKETS];
+
+ while (bp && strcmp (bp->name, name)) {
+ bp = bp->next;
+ }
+
+ if (bp == NULL)
+ return (0);
+ else
+ return (bp->value);
+}
+
+/*
+ * getprop_default
+ */
+
+char *getprop_default (char *name, char *def)
+{
+ char *rv;
+ rv = getprop (name);
+ if (rv)
+ return (rv);
+ else
+ return (def);
+}
+
+/*
+ * addprop
+ */
+
+void addprop (char *name, char *value)
+{
+ unsigned char *cp;
+ unsigned long hash=0;
+ prop_t **bpp;
+ prop_t *bp;
+ int i=0;
+
+ bp = (prop_t *)g_malloc (sizeof (prop_t));
+
+ bp->next = 0;
+ bp->name = sxerox (name);
+ bp->value = sxerox (value);
+
+ for (cp = (unsigned char *)name; *cp; cp++)
+ hash ^= (*cp)<<(hash_shifts[(i++)&0x3]);
+
+ bpp = &buckets [hash%NBUCKETS];
+
+ if (*bpp == NULL)
+ *bpp = bp;
+ else {
+ bp->next = *bpp;
+ *bpp = bp;
+ }
+}
+
+/*
+ * sxerox
+ */
+
+static char *sxerox (char *s)
+{
+ char *rv = (char *) g_malloc (strlen (s) + 1);
+ strcpy (rv, s);
+ return rv;
+}
+
+/*
+ * readprops
+ */
+
+#define START 0
+#define READNAME 1
+#define READVALUE 2
+#define C_COMMENT 3
+#define CPP_COMMENT 4
+
+int readprops (char *filename)
+{
+ FILE *ifp;
+ unsigned char c;
+ int state=START;
+ int linenum=1;
+ char namebuf [128];
+ char valbuf [512];
+ int i;
+
+ ifp = fopen (filename, "r");
+
+ if (ifp == NULL)
+ return (-1);
+
+ while (1) {
+
+ readchar:
+ c = getc (ifp);
+
+ again:
+ switch (state) {
+ case START:
+ if (feof (ifp)) {
+ fclose (ifp);
+ return (0);
+ }
+
+ if (c == ' ' || c == '\t')
+ goto readchar;
+
+ if (c == '\n') {
+ linenum++;
+ goto readchar;
+ }
+ if (isalpha (c) || (c == '_')) {
+ state = READNAME;
+ goto again;
+ }
+ if (c == '/') {
+ c = getc (ifp);
+ if (c == '/') {
+ state = CPP_COMMENT;
+ goto readchar;
+ } else if (c == '*') {
+ state = C_COMMENT;
+ goto readchar;
+ } else {
+ fprintf (stderr, "unknown token '/' line %d\n",
+ linenum);
+ exit (1);
+ }
+ }
+ fprintf (stderr, "unknown token '%c' line %d\n",
+ c, linenum);
+ exit (1);
+ break;
+
+ case CPP_COMMENT:
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp))
+ return (0);
+ if (c == '\n') {
+ linenum++;
+ state = START;
+ goto readchar;
+ }
+ }
+ break;
+
+ case C_COMMENT:
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "unterminated comment, line %d\n",
+ linenum);
+ exit (1);
+ }
+ if (c == '*') {
+ staragain:
+ c = getc (ifp);
+ if (c == '/') {
+ state = START;
+ goto readchar;
+ }
+ if (c == '*')
+ goto staragain;
+ }
+ }
+ break;
+
+ case READNAME:
+ i = 0;
+ namebuf[i++] = c;
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF while reading a name, line %d\n",
+ linenum);
+ exit (1);
+ }
+ if ((!isalnum (c)) && (c != '_')) {
+ namebuf [i] = 0;
+ state = READVALUE;
+ goto again;
+ }
+ namebuf [i++] = c;
+ }
+ break;
+
+ case READVALUE:
+ i = 0;
+ while ((c == ' ') || (c == '\t') || (c == '=')) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF while reading a value, line %d\n",
+ linenum);
+ exit (1);
+ }
+ }
+ goto firsttime;
+ while (1) {
+ c = getc (ifp);
+
+ firsttime:
+ if (c == '\\') {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF after '\\', line %d\n",
+ linenum);
+ exit (1);
+ }
+ valbuf[i++] = c;
+ continue;
+ }
+ if (c == '\n') {
+ linenum++;
+ while (valbuf [i-1] == ' ' || valbuf[i-1] == '\t')
+ i--;
+ valbuf[i] = 0;
+ addprop (namebuf, valbuf);
+ state = START;
+ goto readchar;
+ }
+ valbuf[i++] = c;
+ }
+
+ }
+ }
+}
diff --git a/src/tools/g2/props.h b/src/tools/g2/props.h
new file mode 100644
index 00000000..6289941d
--- /dev/null
+++ b/src/tools/g2/props.h
@@ -0,0 +1,21 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 1997-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern char *getprop (char *name);
+extern char *getprop_default (char *name, char *def);
+extern void addprop (char *name, char *value);
+extern int readprops (char *filename);
+extern int writeprops (char *filename);
diff --git a/src/tools/g2/view1.c b/src/tools/g2/view1.c
new file mode 100644
index 00000000..c5f799dc
--- /dev/null
+++ b/src/tools/g2/view1.c
@@ -0,0 +1,3237 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <gtk/gtk.h>
+#include <gdk/gdkkeysyms.h>
+#include "g2.h"
+#include <time.h>
+#include <string.h>
+#include <vppinfra/format.h>
+#include <vppinfra/elog.h>
+
+/*
+ * The main event display view.
+ *
+ * Important variables:
+ *
+ * "da" -- the drawing area, aka the screen representation of the
+ * event view.
+ *
+ * "pm" -- the backing pixmap for the drawing area. Note that
+ * all graphics operations target this backing
+ * store, then call gtk_widget_draw to copy a rectangle from
+ * the backing store onto the screen.
+ *
+ * "s_v1" -- pointer to the current v1_geometry_t object.
+ *
+ * Box heirarchy:
+ * s_view1_vbox
+ * s_view1_hbox
+ * da s_view1_vmenubox
+ * s_view1_topbutton("Top")
+ * s_view1_vscroll (vertical scrollbar)
+ * s_view1_bottombutton("Bottom")
+ * s_view1_hmenubox
+ * s_view1_startbutton("Start");
+ * s_view1_hscroll(horizontal scrollbar)
+ * s_view1_endbutton("End")
+ * s_view1_zoominbutton("Zoomin")
+ * s_view1_searchbutton("Search")
+ * s_view1_searchagainbutton("Search Again")
+ * s_view1_zoomoutbutton("Zoomout")
+ * s_view1_label
+ */
+
+/*
+ * Globals
+ */
+
+GdkFont *g_font; /* a fixed-width font to use */
+/* color format: 0 (for static colors), r (0-64k), g (0-64k), b (0-64k) */
+GdkColor fg_black = {0, 0, 0, 0};
+GdkColor fg_red = {0, 65535, 0, 0};
+GdkColor bg_white = {0, 65535, 65535, 65535};
+static boolean summary_mode = TRUE; /* start out in summary mode */
+static boolean color_mode = FALSE; /* start out in monochrome mode */
+
+/*
+ * Locals
+ */
+
+/*
+ * user_data values passed to view1_button_click_callback,
+ * which is used by the various action buttons noted above
+ */
+enum view1_button_click {
+ TOP_BUTTON=1,
+ BOTTOM_BUTTON,
+ START_BUTTON,
+ ZOOMIN_BUTTON,
+ SEARCH_BUTTON,
+ SEARCH_AGAIN_BUTTON,
+ ZOOMOUT_BUTTON,
+ END_BUTTON,
+ MORE_TRACES_BUTTON,
+ LESS_TRACES_BUTTON,
+ SNAP_BUTTON,
+ NEXT_BUTTON,
+ DEL_BUTTON,
+ CHASE_EVENT_BUTTON,
+ CHASE_DATUM_BUTTON,
+ CHASE_TRACK_BUTTON,
+ UNCHASE_BUTTON,
+ FORWARD_BUTTON,
+ BACKWARD_BUTTON,
+ SUMMARY_BUTTON,
+ NOSUMMARY_BUTTON,
+ SLEW_LEFT_BUTTON,
+ SLEW_RIGHT_BUTTON,
+};
+
+enum chase_mode {
+ CHASE_EVENT=1,
+ CHASE_DATUM,
+ CHASE_TRACK,
+};
+
+enum sc_dir {
+ SRCH_CHASE_FORWARD = 0,
+ SRCH_CHASE_BACKWARD = 1,
+};
+
+static GtkWidget *s_view1_hbox; /* see box heirarchy chart */
+static GtkWidget *s_view1_vbox; /* see box heirarchy chart */
+static GtkWidget *da; /* main drawing area */
+static GdkPixmap *pm; /* and its backing pixmap */
+static GdkCursor *norm_cursor; /* the "normal" cursor */
+
+/*
+ * view geometry parameters
+ *
+ * Remember:
+ * Y increases down the page.
+ * Strip origin is at the top
+ * Payday is Friday
+ * Don't put your fingers in your mouth.
+ *
+ * Most of these values are in pixels
+ */
+
+typedef struct v1_geometry {
+ int pid_ax_width; /* Width of the PID axis */
+ int time_ax_height; /* Height of the time axis */
+ int time_ax_spacing; /* TimeAxis: Space between tick-marks */
+ int strip_height; /* Height of a regular PID trace */
+ int pop_offset; /* Vertical offset of the detail box */
+ int pid_ax_offset; /* Vertical offset of the PID axis */
+ int event_offset; /* Vertical offset of the event boxes */
+ int total_height; /* total height of da, see configure_event */
+ int total_width; /* ditto, for width */
+ double last_time_interval; /* last time interval, in f64 seconds */
+
+ /* Derived values */
+ int first_pid_index; /* Index of first displayed PID */
+ int npids; /* Max number of displayed pids */
+ ulonglong minvistime; /* in usec */
+ ulonglong maxvistime; /* in usec */
+} v1_geometry_t;
+
+
+/* The active geometry object */
+static v1_geometry_t s_v1record;
+static v1_geometry_t *s_v1 = &s_v1record;
+
+/* The color array */
+static GdkColor *s_color;
+
+/* Snapshot ring */
+typedef struct snapshot {
+ struct snapshot *next;
+ /* Screen geometry */
+ v1_geometry_t geometry;
+ boolean show_event[NEVENTS];
+ pid_sort_t *pidvec;
+ /*
+ * Note: not worth recomputing the vertical scrollbar, just save
+ * its value here
+ */
+ gfloat vscroll_value;
+ boolean summary_mode;
+ boolean color_mode;
+} snapshot_t;
+
+static snapshot_t *s_snapshots;
+static snapshot_t *s_cursnap;
+static event_t *s_last_selected_event;
+
+/*
+ * various widgets, see the box heirarchy chart above
+ * The toolkit keeps track of these things, we could lose many of
+ * these pointers.
+ */
+static GtkWidget *s_view1_vmenubox;
+static GtkWidget *s_view1_topbutton;
+static GtkWidget *s_view1_bottombutton;
+static GtkWidget *s_view1_more_traces_button;
+static GtkWidget *s_view1_less_traces_button;
+
+static GtkWidget *s_view1_hmenubox;
+static GtkWidget *s_view1_hmenubox2;
+static GtkWidget *s_view1_startbutton;
+static GtkWidget *s_view1_zoominbutton;
+static GtkWidget *s_view1_searchbutton;
+static GtkWidget *s_view1_srchagainbutton;
+static GtkWidget *s_view1_zoomoutbutton;
+static GtkWidget *s_view1_endbutton;
+
+static GtkWidget *s_view1_snapbutton;
+static GtkWidget *s_view1_nextbutton;
+static GtkWidget *s_view1_delbutton;
+
+static GtkWidget *s_view1_chase_event_button;
+static GtkWidget *s_view1_chase_datum_button;
+static GtkWidget *s_view1_chase_track_button;
+static GtkWidget *s_view1_unchasebutton;
+
+static GtkWidget *s_view1_forward_button;
+static GtkWidget *s_view1_backward_button;
+
+static GtkWidget *s_view1_summary_button;
+static GtkWidget *s_view1_nosummary_button;
+
+static GtkWidget *s_view1_time_slew_right_button;
+static GtkWidget *s_view1_time_slew_left_button;
+
+static GtkWidget *s_view1_hscroll;
+static GtkObject *s_view1_hsadj;
+
+static GtkWidget *s_view1_vscroll;
+static GtkObject *s_view1_vsadj;
+
+static GtkWidget *s_view1_label;
+
+/*
+ * Search context
+ */
+static ulong s_srchcode; /* search event code */
+static int s_srchindex; /* last hit was at this event index */
+static boolean s_result_up; /* The SEARCH RESULT dongle is displayed */
+static boolean s_srchfail_up; /* The status line "Search Failed" is up */
+static int srch_chase_dir; /* search/chase dir, 0=>forward */
+
+
+/*
+ * Print context
+ */
+static int s_print_offset; /* Magic offset added to line, tbox fn codes */
+static FILE *s_printfp;
+
+/*
+ * Forward reference prototypes
+ */
+static void display_pid_axis(v1_geometry_t *vp);
+static void display_event_data(v1_geometry_t *vp);
+static void display_time_axis(v1_geometry_t *vp);
+static void view1_button_click_callback(GtkButton *item, gpointer data);
+
+/*
+ * config params
+ */
+
+gint c_view1_draw_width;
+gint c_view1_draw_height;
+
+/*
+ * Zoom-In / Time Ruler cursor
+ */
+
+#define zi_width 32
+#define zi_height 32
+#define zi_x_hot 22
+#define zi_y_hot 14
+static unsigned char zi_bits[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x88, 0x00,
+ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0xc0, 0x00,
+ 0x00, 0xfc, 0xff, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0xa0, 0x00,
+ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x84, 0x00,
+ 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+static unsigned char zi_bkgd[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x88, 0x00,
+ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0xc0, 0x00,
+ 0x00, 0xfc, 0xff, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0xa0, 0x00,
+ 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x84, 0x00,
+ 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+static GdkCursor *zi_cursor;
+static GdkPixmap *zi_source, *zi_mask;
+
+/*
+ * Frequently-used small computations, best
+ * done correctly once and instantiated.
+ */
+
+/****************************************************************************
+* dtime_per_pixel
+****************************************************************************/
+
+static inline double dtime_per_pixel(v1_geometry_t *vp)
+{
+ return ((double)(vp->maxvistime - vp->minvistime)) /
+ ((double)(vp->total_width - vp->pid_ax_width));
+}
+
+/****************************************************************************
+* message_line
+* Changes the status line. Pass "" to clear the status line.
+****************************************************************************/
+
+void message_line (char *s)
+{
+ gtk_label_set_text (GTK_LABEL(s_view1_label), s);
+}
+
+/****************************************************************************
+* set_window_title
+* Changes the window title to include the specified filename.
+****************************************************************************/
+
+void set_window_title (const char *filename)
+{
+ char title[128];
+ snprintf(title, sizeof(title), "g2 (%s)", filename);
+ gtk_window_set_title(GTK_WINDOW(g_mainwindow), title);
+}
+
+/****************************************************************************
+* recompute_hscrollbar
+* Adjust the horizontal scrollbar's adjustment object.
+*
+* GtkAdjustments are really cool, but have to be set up exactly
+* right or the various client objects screw up completely.
+*
+* Note: this function is *not* called when the user clicks the scrollbar.
+****************************************************************************/
+
+static void recompute_hscrollbar (void)
+{
+ ulonglong current_width;
+ ulonglong event_incdec;
+ GtkAdjustment *adj;
+ event_t *ep;
+
+ if (g_nevents == 0)
+ return;
+
+ ep = (g_events + (g_nevents-1));
+ current_width = s_v1->maxvistime - s_v1->minvistime;
+ event_incdec = (current_width) / 6;
+
+ adj = GTK_ADJUSTMENT(s_view1_hsadj);
+
+ /*
+ * Structure member decoder ring
+ * -----------------------------
+ * lower the minimum possible value
+ * value the current value
+ * upper the maximum possible value
+ * step_increment end button click increment
+ * page_increment click in trough increment
+ * page_size size of currently visible area
+ */
+
+ adj->lower = (gfloat)0.00;
+ adj->value = (gfloat)s_v1->minvistime;
+
+ /* Minor click: move about 1/6 of a page */
+ adj->step_increment = (gfloat)event_incdec;
+
+ /* Major click: move about 1/3 of a page. */
+ adj->page_increment = (gfloat)(2*event_incdec);
+
+ /* allow the user to go a bit past the end */
+ adj->upper = adj->page_increment/3 + (gfloat)(ep->time);
+ adj->page_size = (gfloat)(current_width);
+
+ /*
+ * Tell all clients (e.g. the visible scrollbar) to
+ * make themselves look right
+ */
+ gtk_adjustment_changed(adj);
+ gtk_adjustment_value_changed(adj);
+}
+
+/****************************************************************************
+* recompute_vscrollbar
+* Ditto, for the vertical scrollbar
+****************************************************************************/
+
+static void recompute_vscrollbar (void)
+{
+ GtkAdjustment *adj;
+
+ adj = GTK_ADJUSTMENT(s_view1_vsadj);
+
+ adj->lower = (gfloat)0.00;
+ adj->upper = (gfloat)g_npids;
+ adj->value = (gfloat)0.00;
+ adj->step_increment = 1.00;
+ adj->page_increment = (gfloat)(s_v1->npids / 3);
+ adj->page_size = (gfloat)s_v1->npids;
+ gtk_adjustment_changed(adj);
+ gtk_adjustment_value_changed(adj);
+}
+
+/****************************************************************************
+* format_popbox_string
+****************************************************************************/
+
+elog_main_t elog_main;
+
+void format_popbox_string (char *tmpbuf, int len, event_t *ep, event_def_t *edp)
+{
+ char *fp;
+
+#ifdef NOTDEF
+ sprintf(tmpbuf,"%d:", ep->code);
+#endif
+ if (ep->flags & EVENT_FLAG_CLIB) {
+ elog_event_t *eep;
+ u8 *s;
+
+ eep = get_clib_event (ep->datum);
+
+ s = format (0, "%U", format_elog_event, &elog_main, eep);
+ memcpy (tmpbuf, s, vec_len(s));
+ tmpbuf[vec_len(s)] = 0;
+ vec_free(s);
+ return;
+ }
+
+ snprintf(tmpbuf, len, "%s", edp->name);
+ fp = edp->format;
+ /* Make sure there's a real format string. If so, add it */
+ while (fp && *fp) {
+ if (*fp != ' ') {
+ snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), ": ");
+ /* %s only supported for cpel files */
+ if (fp[1] == 's') {
+ snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf),
+ edp->format, strtab_ref(ep->datum));
+ } else {
+ snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf),
+ edp->format, ep->datum);
+ }
+ return;
+ }
+ fp++;
+ }
+}
+
+/****************************************************************************
+ * add_snapshot
+ ****************************************************************************/
+
+static void add_snapshot(void)
+{
+ int i;
+ snapshot_t *new = g_malloc(sizeof(snapshot_t));
+
+ memcpy(&new->geometry, s_v1, sizeof(new->geometry));
+ for (i = 0; i < NEVENTS; i++) {
+ new->show_event[i] = g_eventdefs[i].selected;
+ }
+ new->pidvec = g_malloc(sizeof(pid_sort_t)*g_npids);
+ memcpy(new->pidvec, g_pids, sizeof(pid_sort_t)*g_npids);
+ new->vscroll_value = GTK_ADJUSTMENT(s_view1_vsadj)->value;
+ new->summary_mode = summary_mode;
+ new->color_mode = color_mode;
+
+ if (s_snapshots) {
+ new->next = s_snapshots;
+ s_snapshots = new;
+ } else {
+ new->next = 0;
+ s_snapshots = new;
+ }
+ s_cursnap = new;
+}
+
+/****************************************************************************
+ * next_snapshot
+ ****************************************************************************/
+
+static void next_snapshot(void)
+{
+ snapshot_t *next;
+ int i;
+ pid_sort_t *psp;
+ pid_data_t *pp;
+
+ if (!s_snapshots) {
+ infobox("No snapshots", "\nNo snapshots in the ring...\n");
+ return;
+ }
+
+ next = s_cursnap->next;
+ if (next == 0)
+ next = s_snapshots;
+
+ s_cursnap = next;
+
+ memcpy(s_v1, &next->geometry, sizeof(next->geometry));
+ for (i = 0; i < NEVENTS; i++) {
+ g_eventdefs[i].selected = next->show_event[i];
+ }
+ memcpy(g_pids, next->pidvec, sizeof(pid_sort_t)*g_npids);
+ color_mode = next->color_mode;
+ /*
+ * Update summary mode via a button push so that the button state is
+ * updated accordingly. (Should ideally clean up the view/controller
+ * separation properly one day.)
+ */
+ if (summary_mode != next->summary_mode) {
+ view1_button_click_callback
+ (NULL, (gpointer)(unsigned long long)
+ (summary_mode ? NOSUMMARY_BUTTON : SUMMARY_BUTTON));
+ }
+
+ /* Fix the pid structure index mappings */
+ psp = g_pids;
+
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = i;
+ psp++;
+ }
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = next->vscroll_value;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ recompute_hscrollbar();
+ pointsel_next_snapshot();
+ view1_display_when_idle();
+}
+
+
+/****************************************************************************
+ * del_snapshot
+ ****************************************************************************/
+
+static void del_snapshot(void)
+{
+ snapshot_t *prev;
+ snapshot_t *this;
+
+ if (!s_snapshots) {
+ infobox("No snapshots", "\nNo snapshots to delete...\n");
+ return;
+ }
+
+ prev = NULL;
+ this = s_snapshots;
+
+ while (this && this != s_cursnap) {
+ prev = this;
+ this = this->next;
+ }
+
+ if (this != s_cursnap) {
+ infobox("BUG", "\nSnapshot AWOL!\n");
+ return;
+ }
+
+ s_cursnap = this->next;
+
+ /* middle of the list? */
+ if (prev) {
+ prev->next = this->next;
+ g_free(this->pidvec);
+ g_free(this);
+ } else { /* start of the list */
+ s_snapshots = this->next;
+ g_free(this->pidvec);
+ g_free(this);
+ }
+
+ /* Note: both will be NULL after last delete */
+ if (s_cursnap == NULL)
+ s_cursnap = s_snapshots;
+}
+
+/****************************************************************************
+ * write_snapshot
+ *
+ * VERY primitive right now - not endian or version independent, and only
+ * writes to "snapshots.g2" in the current directory
+ ****************************************************************************/
+static void write_snapshot(void)
+{
+ FILE *file = NULL;
+ snapshot_t *snap;
+ char *error = NULL;
+ int records = 0;
+
+ if (s_snapshots == NULL) {
+ error = "No snapshots defined";
+ errno = 0;
+ }
+
+ if (!error) {
+ file = fopen("snapshots.g2", "w");
+ if (file == NULL) {
+ error = "Unable to open snapshots.g2";
+ }
+ }
+
+ /*
+ * Simply serialize the arch-dependent binary data, without a care in the
+ * world. Don't come running to me if you try to read it and crash.
+ */
+ for (snap = s_snapshots; !error && snap != NULL; snap = snap->next) {
+ if (fwrite(&snap->geometry,
+ sizeof(snap->geometry), 1, file) != 1 ||
+ fwrite(&snap->show_event,
+ sizeof(snap->show_event), 1, file) != 1 ||
+ fwrite(snap->pidvec,
+ sizeof(pid_sort_t) * g_npids, 1, file) != 1 ||
+ fwrite(&snap->vscroll_value,
+ sizeof(snap->vscroll_value), 1, file) != 1 ||
+ fwrite(&snap->summary_mode,
+ sizeof(snap->summary_mode), 1, file) != 1 ||
+ fwrite(&snap->color_mode,
+ sizeof(snap->color_mode), 1, file) != 1) {
+ error = "Error writing data";
+ }
+ records++;
+ }
+
+ if (!error) {
+ if (fclose(file)) {
+ error = "Unable to close file";
+ }
+ }
+
+ if (error) {
+ infobox(error, strerror(errno));
+ } else {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "Wrote %d snapshots to snapshots.g2",
+ records);
+ message_line(buf);
+ }
+}
+
+/****************************************************************************
+ * read_snapshot
+ *
+ * VERY primitive right now - not endian or version independent, and only reads
+ * from "snapshots.g2" in the current directory
+ ****************************************************************************/
+static void read_snapshot(void)
+{
+ FILE *file;
+ snapshot_t *snap, *next_snap;
+ snapshot_t *new_snaps = NULL;
+ char *error = NULL;
+ int len, i, records = 0;
+ pid_data_t *pp;
+
+ file = fopen("snapshots.g2", "r");
+ if (file == NULL) {
+ error = "Unable to open snapshots.g2";
+ }
+
+ /*
+ * Read in the snapshots and link them together. We insert them backwards,
+ * but that's tolerable. If the data is in anyway not what we expect, we'll
+ * probably crash. Sorry.
+ */
+ while (!error && !feof(file)) {
+ snap = g_malloc(sizeof(*snap));
+ snap->pidvec = NULL; /* so we can free this if there's an error */
+
+ len = fread(&snap->geometry, sizeof(snap->geometry), 1, file);
+ if (len == 0) {
+ /* EOF */
+ g_free(snap);
+ break;
+ } else {
+ /* insert into list straight away */
+ snap->next = new_snaps;
+ new_snaps = snap;
+ }
+ if (len != 1) {
+ error = "Problem reading first item from file";
+ break;
+ }
+ if (fread(&snap->show_event, sizeof(snap->show_event), 1, file) != 1) {
+ error = "Problem reading second item from file";
+ break;
+ }
+ len = sizeof(pid_sort_t) * g_npids;
+ snap->pidvec = g_malloc(len);
+ if (fread(snap->pidvec, len, 1, file) != 1) {
+ error = "Problem reading third item from file";
+ break;
+ }
+ if (fread(&snap->vscroll_value,
+ sizeof(snap->vscroll_value), 1, file) != 1 ||
+ fread(&snap->summary_mode,
+ sizeof(snap->summary_mode), 1, file) != 1 ||
+ fread(&snap->color_mode,
+ sizeof(snap->color_mode), 1, file) != 1) {
+ error = "Problem reading final items from file";
+ break;
+ }
+
+ /*
+ * Fix up the pointers from the sorted pid vector back into our pid
+ * data objects, by walking the linked list of pid_data_t objects for
+ * every one looking for a match. This is O(n^2) grossness, but in real
+ * life there aren't that many pids, and it seems zippy enough.
+ */
+ for (i = 0; i < g_npids; i++) {
+ for (pp = g_pid_data_list; pp != NULL; pp = pp->next) {
+ if (pp->pid_value == snap->pidvec[i].pid_value) {
+ break;
+ }
+ }
+ if (pp != NULL) {
+ snap->pidvec[i].pid = pp;
+ } else {
+ error = "Snapshot file referenced unknown pids";
+ break;
+ }
+ }
+
+ records++;
+ }
+
+ if (!error) {
+ if (fclose(file)) {
+ error = "Unable to close file";
+ }
+ }
+
+ if (error) {
+ /*
+ * Problem - clear up any detritus
+ */
+ infobox(error, strerror(errno));
+ for (snap = new_snaps; snap != NULL; snap = next_snap) {
+ next_snap = snap->next;
+ g_free(snap);
+ g_free(snap->pidvec);
+ }
+ } else {
+ /*
+ * Success! trash the old snapshots and replace with the new
+ */
+ for (snap = s_snapshots; snap != NULL; snap = next_snap) {
+ next_snap = snap->next;
+ g_free(snap->pidvec);
+ g_free(snap);
+ }
+
+ s_cursnap = s_snapshots = new_snaps;
+ }
+
+ if (error) {
+ infobox(error, strerror(errno));
+ } else {
+ char buf[64];
+ snprintf(buf, sizeof(buf),
+ "Read %d snapshots from snapshots.g2", records);
+ message_line(buf);
+ }
+}
+
+/****************************************************************************
+* set_color
+*
+* Set the color for the specified pid_index, or COLOR_DEFAULT to return it
+* to the usual black.
+****************************************************************************/
+#define COLOR_DEFAULT (-1)
+static void set_color(int pid_index)
+{
+ pid_sort_t *psp;
+
+ psp = (g_pids + pid_index);
+
+ if (psp->selected)
+ gdk_gc_set_foreground(da->style->black_gc, &s_color[0]);
+ else if (pid_index == COLOR_DEFAULT || !color_mode) {
+ gdk_gc_set_foreground(da->style->black_gc, &fg_black);
+ } else {
+ gdk_gc_set_foreground(da->style->black_gc,
+ &s_color[g_pids[pid_index].color_index]);
+ }
+}
+
+/****************************************************************************
+* toggle_event_select
+****************************************************************************/
+
+static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp)
+{
+ int pid_index, start_index;
+ int x, y;
+ GdkRectangle *rp;
+ GdkRectangle hit_rect;
+ GdkRectangle dummy;
+ event_t *ep;
+ event_def_t *edp;
+ char tmpbuf [1024];
+ double time_per_pixel;
+
+ if (g_nevents == 0)
+ return 0;
+
+ time_per_pixel = dtime_per_pixel(vp);
+
+ start_index = find_event_index (vp->minvistime);
+
+ /* Too far right? */
+ if (start_index >= g_nevents)
+ return 0;
+
+ /*
+ * To see if the mouse hit a visible event, use a variant
+ * of the event display loop.
+ */
+
+ hit_rect.x = (int)event->x;
+ hit_rect.y = (int)event->y;
+ hit_rect.width = 1;
+ hit_rect.height = 1;
+
+ ep = (g_events + start_index);
+
+ while ((ep->time < vp->maxvistime) &&
+ (ep < (g_events + g_nevents))) {
+ pid_index = ep->pid->pid_index;
+
+ /* First filter: pid out of range */
+ if ((pid_index < vp->first_pid_index) ||
+ (pid_index >= vp->first_pid_index + vp->npids)) {
+ ep++;
+ continue;
+ }
+
+ /* Second filter: event hidden */
+ edp = find_event_definition (ep->code);
+ if (!edp->selected) {
+ ep++;
+ continue;
+ }
+
+ /*
+ * At this point, we know that the point is at least on the
+ * screen. See if the mouse hit within the bounding box
+ */
+
+ /*
+ * $$$$ maybe keep looping until off the edge,
+ * maintain a "best hit", then declare that one the winner?
+ */
+
+ pid_index -= vp->first_pid_index;
+
+ y = pid_index*vp->strip_height + vp->event_offset;
+
+ x = vp->pid_ax_width +
+ (int)(((double)(ep->time - vp->minvistime)) / time_per_pixel);
+
+ /* Perhaps we're trying to toggle the detail box? */
+ if (ep->flags & EVENT_FLAG_SELECT) {
+ /* Figure out the dimensions of the detail box */
+ format_popbox_string(tmpbuf, sizeof(tmpbuf), ep, edp);
+ rp = tbox(tmpbuf, x, y - vp->pop_offset, TBOX_GETRECT_BOXED);
+ if (gdk_rectangle_intersect(rp, &hit_rect, &dummy)) {
+ ep->flags &= ~EVENT_FLAG_SELECT;
+ view1_display_when_idle();
+ return 0;
+ }
+ }
+
+ sprintf(tmpbuf, "%ld", ep->code);
+
+ /* Figure out the dimensions of the regular box */
+ rp = tbox(tmpbuf, x, y, TBOX_GETRECT_EVENT);
+
+ if (gdk_rectangle_intersect(rp, &hit_rect, &dummy)) {
+ /* we hit the rectangle. */
+ if (ep->flags & EVENT_FLAG_SELECT) {
+ ep->flags &= ~EVENT_FLAG_SELECT;
+ view1_display_when_idle();
+ return 0;
+ } else {
+ set_color(ep->pid->pid_index);
+
+ /* It wasn't selected, so put up the detail box */
+ format_popbox_string(tmpbuf, sizeof(tmpbuf), ep, edp);
+ tbox(tmpbuf, x, y - vp->pop_offset, TBOX_DRAW_BOXED);
+ line(x, y-vp->pop_offset, x, y, LINE_DRAW_BLACK);
+ ep->flags |= EVENT_FLAG_SELECT;
+ ep->flags &= ~EVENT_FLAG_SEARCHRSLT;
+ s_last_selected_event = ep;
+ }
+ return 0;
+ }
+ ep++;
+ }
+ return -1;
+}
+
+/****************************************************************************
+* toggle_track_select
+****************************************************************************/
+
+static void toggle_track_select (GdkEventButton *event,
+ v1_geometry_t *vp)
+{
+ int i;
+ int pid_index;
+ int y, delta_y;
+ pid_sort_t *psp;
+
+ if (g_nevents == 0)
+ return;
+
+ /* Scan pid/track axis locations, looking for a match */
+ for (i = 0; i < vp->npids; i++) {
+ y = i*vp->strip_height + vp->pid_ax_offset;
+ delta_y = y - event->y;
+ if (delta_y < 0)
+ delta_y = -delta_y;
+ if (delta_y < 10) {
+ goto found;
+ }
+
+ }
+ infobox("NOTE", "\nNo PID/Track In Range\nPlease Try Again");
+ return;
+
+ found:
+ pid_index = i + vp->first_pid_index;
+ psp = (g_pids + pid_index);
+ psp->selected ^= 1;
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* deselect_tracks
+****************************************************************************/
+static void deselect_tracks (void)
+{
+ int i;
+
+ for (i = 0; i < g_npids; i++)
+ g_pids[i].selected = 0;
+
+}
+
+
+/****************************************************************************
+* move_current_track
+****************************************************************************/
+
+typedef enum { MOVE_TOP, MOVE_BOTTOM } move_type;
+
+static void move_current_track(GdkEventButton *event,
+ v1_geometry_t *vp,
+ move_type type)
+{
+ int i;
+ int pid_index;
+ int y, delta_y;
+ pid_sort_t *new_pidvec;
+ pid_sort_t *psp;
+ pid_sort_t *pold, *pnew;
+ pid_data_t *pp;
+
+ if (g_nevents == 0)
+ return;
+
+ /* Scan pid/track axis locations, looking for a match */
+ for (i = 0; i < vp->npids; i++) {
+ y = i*vp->strip_height + vp->pid_ax_offset;
+ delta_y = y - event->y;
+ if (delta_y < 0)
+ delta_y = -delta_y;
+ if (delta_y < 10) {
+ goto found;
+ }
+
+ }
+ infobox("NOTE", "\nNo PID/Track In Range\nPlease Try Again");
+ return;
+
+ found:
+ pid_index = i + vp->first_pid_index;
+
+ new_pidvec = g_malloc0(sizeof(pid_sort_t)*g_npids);
+ pold = g_pids;
+ pnew = new_pidvec;
+
+ if (type == MOVE_TOP) {
+ /* move to top */
+ *pnew++ = g_pids[pid_index];
+ for (i = 0; i < pid_index; i++)
+ *pnew++ = *pold++;
+ pold++;
+ i++;
+ for (; i < g_npids; i++)
+ *pnew++ = *pold++;
+ } else {
+ /* move to bottom */
+ for (i = 0; i < pid_index; i++)
+ *pnew++ = *pold++;
+ pold++;
+ i++;
+ for (; i < g_npids; i++)
+ *pnew++ = *pold++;
+ *pnew = g_pids[pid_index];
+ }
+
+ g_free(g_pids);
+ g_pids = new_pidvec;
+
+ /*
+ * Revert the pid_index mapping to an identity map,
+ */
+ psp = g_pids;
+
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = i;
+ psp++;
+ }
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* zoom_event
+* Process a zoom gesture. The use of doubles is required to avoid
+* truncating the various variable values, which in turn would lead to
+* some pretty random-looking zoom responses.
+****************************************************************************/
+
+void zoom_event(GdkEventButton *e1, GdkEventButton *e2, v1_geometry_t *vp)
+{
+ double xrange;
+ double time_per_pixel;
+ double width_in_pixels;
+ double center_on_time, width_in_time;
+ double center_on_pixel;
+
+ /*
+ * Clip the zoom area to the event display area.
+ * Otherwise, center_on_time - width_in_time is in hyperspace
+ * to the left of zero
+ */
+
+ if (e1->x < vp->pid_ax_width)
+ e1->x = vp->pid_ax_width;
+
+ if (e2->x < vp->pid_ax_width)
+ e2->x = vp->pid_ax_width;
+
+ if (e2->x == e1->x)
+ goto loser_zoom_repaint;
+
+ xrange = (double) (e2->x - e1->x);
+ if (xrange < 0.00)
+ xrange = -xrange;
+
+ /* Actually, width in pixels of half the zoom area */
+ width_in_pixels = xrange / 2.00;
+ time_per_pixel = dtime_per_pixel(vp);
+ width_in_time = width_in_pixels * time_per_pixel;
+
+ /* Center the screen on the center of the zoom area */
+ center_on_pixel = (double)((e2->x + e1->x) / 2.00) -
+ (double)vp->pid_ax_width;
+ center_on_time = center_on_pixel*time_per_pixel + (double)vp->minvistime;
+
+ /*
+ * Transform back to 64-bit integer microseconds, reset the
+ * scrollbar, schedule a repaint.
+ */
+ vp->minvistime = (ulonglong)(center_on_time - width_in_time);
+ vp->maxvistime = (ulonglong)(center_on_time + width_in_time);
+
+loser_zoom_repaint:
+ recompute_hscrollbar();
+
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* scroll_y
+*
+* Scroll up or down by the specified delta
+*
+****************************************************************************/
+static void scroll_y(int delta)
+{
+ int new_index = s_v1->first_pid_index + delta;
+ if (new_index + s_v1->npids > g_npids)
+ new_index = g_npids - s_v1->npids;
+ if (new_index < 0)
+ new_index = 0;
+
+ if (new_index != s_v1->first_pid_index) {
+ s_v1->first_pid_index = new_index;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = (gdouble)new_index;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ view1_display_when_idle();
+ }
+}
+
+/****************************************************************************
+* view1_handle_key_press_event
+* Relevant definitions in: /usr/include/gtk-1.2/gdk/gdktypes.h
+*
+* This routine implements hotkeys for the Quake generation:
+*
+* W - zoom in
+* S - zoom out
+* A - pan left
+* D - pan right
+* R - pan up
+* F - pan down
+* T - more traces
+* G - fewer traces
+*
+* E - toggle summary mode
+* C - toggle color mode
+*
+* X - take snapshot
+* Z - next snapshot
+* P - persist snapshots to file
+* L - load snapshots from file
+*
+* ctrl-Q - exit
+*
+****************************************************************************/
+gint
+view1_handle_key_press_event (GtkWidget *widget, GdkEventKey *event)
+{
+ long long delta;
+
+ switch (event->keyval) {
+ case GDK_w: // zoom in
+ view1_button_click_callback(NULL, (gpointer)ZOOMIN_BUTTON);
+ break;
+
+ case GDK_s: // zoom out
+ view1_button_click_callback(NULL, (gpointer)ZOOMOUT_BUTTON);
+ break;
+
+ case GDK_a: // pan left
+ delta = (s_v1->maxvistime - s_v1->minvistime) / 6;
+ if (s_v1->minvistime < delta) {
+ delta = s_v1->minvistime;
+ }
+ s_v1->minvistime -= delta;
+ s_v1->maxvistime -= delta;
+ recompute_hscrollbar();
+ break;
+
+ case GDK_d: // pan right
+ delta = (s_v1->maxvistime - s_v1->minvistime) / 6;
+ if (s_v1->maxvistime + delta > g_events[g_nevents - 1].time) {
+ /*
+ * @@@ this doesn't seem to quite reach the far right hand
+ * side correctly - not sure why.
+ */
+ delta = g_events[g_nevents - 1].time - s_v1->maxvistime;
+ }
+ s_v1->minvistime += delta;
+ s_v1->maxvistime += delta;
+ recompute_hscrollbar();
+ break;
+
+ case GDK_r: // pan up
+ scroll_y(-1);
+ break;
+
+ case GDK_f: // pan down
+ scroll_y(+1);
+ break;
+
+ case GDK_t: // fewer tracks
+ view1_button_click_callback(NULL, (gpointer)LESS_TRACES_BUTTON);
+ break;
+
+ case GDK_g: // more tracks
+ view1_button_click_callback(NULL, (gpointer)MORE_TRACES_BUTTON);
+ break;
+
+ case GDK_e: // toggle summary mode
+ view1_button_click_callback
+ (NULL, (gpointer)(unsigned long long)
+ (summary_mode ? NOSUMMARY_BUTTON : SUMMARY_BUTTON));
+ break;
+
+ case GDK_c: // toggle color mode
+ color_mode ^= 1;
+ view1_display_when_idle();
+ break;
+
+ case GDK_p: // persist snapshots
+ write_snapshot();
+ break;
+
+ case GDK_l: // load snapshots
+ read_snapshot();
+ break;
+
+ case GDK_x: // take snapshot
+ view1_button_click_callback(NULL, (gpointer)SNAP_BUTTON);
+ break;
+
+ case GDK_z: // next snapshot
+ view1_button_click_callback(NULL, (gpointer)NEXT_BUTTON);
+ break;
+
+ case GDK_q: // ctrl-q is exit
+ if (event->state & GDK_CONTROL_MASK) {
+ gtk_main_quit();
+ }
+ break;
+ }
+ return TRUE;
+}
+
+/****************************************************************************
+* button_press_event
+* Relevant definitions in: /usr/include/gtk-1.2/gdk/gdktypes.h
+*
+* This routine implements three functions: zoom-to-area, time ruler, and
+* show/hide event detail popup.
+*
+* The left mouse button (button 1) has two simultaneous functions: event
+* detail popup, and zoom-to-area. If the press and release events occur
+* within a small delta-x, it's a detail popup event. Otherwise, it's
+* an area zoom.
+*
+* The right mouse button (button 3) implements the time ruler.
+****************************************************************************/
+
+static gint
+button_press_event (GtkWidget *widget, GdkEventButton *event)
+{
+ static GdkEventButton press1_event;
+ static boolean press1_valid;
+ static GdkEventButton press3_event;
+ static guint32 last_truler_time;
+ static boolean press3_valid;
+ static boolean zoom_bar_up;
+ int time_ax_y, xdelta;
+ char tmpbuf [128];
+ double time_per_pixel;
+
+ time_ax_y = 0;
+
+ switch(event->type) {
+ case GDK_BUTTON_PRESS:
+ /* Capture the appropriate starting point */
+ if (event->button == 1) {
+ press1_valid = TRUE;
+ press1_event = *event;
+ return(TRUE);
+ }
+ if (event->button == 3) {
+ press3_valid = TRUE;
+ press3_event = *event;
+ return(TRUE);
+ }
+ return(TRUE);
+
+ case GDK_BUTTON_RELEASE:
+ /* Time ruler */
+ if (press3_valid) {
+ press3_valid = FALSE;
+ /* Fix the cursor, and repaint the screen from scratch */
+ gdk_window_set_cursor (da->window, norm_cursor);
+ view1_display_when_idle();
+ return(TRUE);
+ }
+ /* Event select / zoom-to-area */
+ if (press1_valid) {
+ press1_valid = FALSE;
+ xdelta = (int)(press1_event.x - event->x);
+ if (xdelta < 0)
+ xdelta = -xdelta;
+
+ /* is the mouse more or less where it started? */
+ if (xdelta < 10) {
+ /* Control-left-mouse => sink the track */
+ /* Shift-left-mouse => raise the track */
+ if ((press1_event.state & GDK_CONTROL_MASK) ==
+ GDK_CONTROL_MASK) {
+ move_current_track(event, s_v1, MOVE_BOTTOM);
+ } else if ((press1_event.state & GDK_SHIFT_MASK) ==
+ GDK_SHIFT_MASK) {
+ move_current_track(event, s_v1, MOVE_TOP);
+ } else {
+ /* No modifiers: toggle the event / select track */
+ if (toggle_event_select(event, s_v1))
+ toggle_track_select(event, s_v1);
+ }
+ /* Repaint to get rid of the zoom bar */
+ if (zoom_bar_up) {
+ /* Fix the cursor and leave. No zoom */
+ gdk_window_set_cursor (da->window, norm_cursor);
+ zoom_bar_up = FALSE;
+ break;
+ }
+ } else { /* mouse moved enough to zoom */
+ zoom_event(&press1_event, event, s_v1);
+ gdk_window_set_cursor (da->window, norm_cursor);
+ zoom_bar_up = FALSE;
+ }
+ } else if (event->button == 4) {
+ /* scroll wheel up */
+ scroll_y(event->state & GDK_SHIFT_MASK ? -10 : -1);
+ } else if (event->button == 5) {
+ /* scroll wheel down */
+ scroll_y(event->state & GDK_SHIFT_MASK ? +10 : +1);
+ }
+ return(TRUE);
+
+ case GDK_MOTION_NOTIFY:
+ /* Button one followed by motion: draw zoom fence and fix cursor */
+ if (press1_valid) {
+ /* Fence, cursor already set */
+ if (zoom_bar_up)
+ return(TRUE);
+
+ xdelta = (int)(press1_event.x - event->x);
+ if (xdelta < 0)
+ xdelta = -xdelta;
+
+ /* Haven't moved enough to declare a zoom sequence yet */
+ if (xdelta < 10)
+ return(TRUE);
+
+ /* Draw the zoom fence, use the key-down X coordinate */
+ time_ax_y = s_v1->npids * s_v1->strip_height + s_v1->pid_ax_offset;
+
+ line((int)(press1_event.x), s_v1->pop_offset,
+ (int)(press1_event.x), time_ax_y, LINE_DRAW_BLACK);
+ tbox("Zoom From Here...", (int)(press1_event.x), s_v1->pop_offset,
+ TBOX_DRAW_BOXED);
+ gdk_window_set_cursor(da->window, zi_cursor);
+ zoom_bar_up = TRUE;
+ return(TRUE);
+ }
+ if (press3_valid) {
+ double nsec;
+
+ gdk_window_set_cursor(da->window, zi_cursor);
+
+ /*
+ * Some filtration is needed on Solaris, or the server will hang
+ */
+ if (event->time - last_truler_time < 75)
+ return(TRUE);
+
+ last_truler_time = event->time;
+
+ line((int)(press3_event.x), s_v1->pop_offset,
+ (int)(press3_event.x), time_ax_y, LINE_DRAW_BLACK);
+
+ xdelta = (int)(press3_event.x - event->x);
+ if (xdelta < 0)
+ xdelta = -xdelta;
+
+ time_per_pixel = ((double)(s_v1->maxvistime - s_v1->minvistime)) /
+ ((double)(s_v1->total_width - s_v1->pid_ax_width));
+
+ time_ax_y = s_v1->npids * s_v1->strip_height + s_v1->pid_ax_offset;
+
+ line((int)(press3_event.x), s_v1->pop_offset,
+ (int)(press3_event.x), time_ax_y, LINE_DRAW_BLACK);
+ /*
+ * Note: use a fixed-width format so it looks like we're
+ * erasing and redrawing the box.
+ */
+ nsec = ((double)xdelta)*time_per_pixel;
+ if (nsec >1e9) {
+ sprintf(tmpbuf, "%8.3f sec ", nsec/1e9);
+ } else if (nsec > 1e6) {
+ sprintf(tmpbuf, "%8.3f msec", nsec/1e6);
+ } else if (nsec > 1e3) {
+ sprintf(tmpbuf, "%8.3f usec", nsec/1e3);
+ } else {
+ sprintf(tmpbuf, "%8.0f nsec", nsec);
+ }
+ s_v1->last_time_interval = nsec;
+ tbox(tmpbuf, (int)(press3_event.x), s_v1->pop_offset,
+ TBOX_DRAW_BOXED);
+ return(TRUE);
+ }
+
+ default:
+ break;
+#ifdef DEBUG
+ g_print("button:\ttype = %d\n", event->type);
+ g_print("\twindow = 0x%x\n", event->window);
+ g_print("\tsend_event = %d\n", event->send_event);
+ g_print("\ttime = %d\n", event->time);
+ g_print("\tx = %6.2f\n", event->x);
+ g_print("\ty = %6.2f\n", event->y);
+ g_print("\tpressure = %6.2f\n", event->pressure);
+ g_print("\txtilt = %6.2f\n", event->xtilt);
+ g_print("\tytilt = %6.2f\n", event->ytilt);
+ g_print("\tstate = %d\n", event->state);
+ g_print("\tbutton = %d\n", event->button);
+ g_print("\tsource = %d\n", event->source);
+ g_print("\tdeviceid = %d\n", event->deviceid);
+ g_print("\tx_root = %6.2f\n", event->x_root);
+ g_print("\ty_root = %6.2f\n", event->y_root);
+ return(TRUE);
+#endif
+ }
+
+ view1_display_when_idle();
+
+ return(TRUE);
+}
+
+/****************************************************************************
+* configure_event
+* Happens when the window manager resizes the viewer's main window.
+****************************************************************************/
+
+static gint
+configure_event (GtkWidget *widget, GdkEventConfigure *event)
+{
+ /* Toss the previous drawing area backing store pixmap */
+ if (pm)
+ gdk_pixmap_unref(pm);
+
+ /* Create a new pixmap, paint it */
+ pm = gdk_pixmap_new(widget->window,
+ widget->allocation.width,
+ widget->allocation.height,
+ -1);
+ gdk_draw_rectangle (pm,
+ widget->style->white_gc,
+ TRUE,
+ 0, 0,
+ widget->allocation.width,
+ widget->allocation.height);
+
+ /* Reset the view geometry parameters, as required */
+ s_v1->total_width = widget->allocation.width;
+ s_v1->total_height = widget->allocation.height;
+ s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) /
+ s_v1->strip_height;
+
+ /* Schedule a repaint */
+ view1_display_when_idle();
+ return(TRUE);
+}
+
+/****************************************************************************
+* expose_event
+* Use backing store to fix the screen.
+****************************************************************************/
+static gint expose_event (GtkWidget *widget, GdkEventExpose *event)
+{
+ gdk_draw_pixmap(widget->window,
+ widget->style->fg_gc[GTK_WIDGET_STATE (widget)],
+ pm,
+ event->area.x, event->area.y,
+ event->area.x, event->area.y,
+ event->area.width, event->area.height);
+
+ return(FALSE);
+}
+
+/****************************************************************************
+* event_search_internal
+* This routine searches forward from s_srchindex, looking for s_srchcode;
+* wraps at the end of the buffer.
+****************************************************************************/
+
+boolean event_search_internal (void)
+{
+ event_t *ep;
+ int i;
+ int index;
+ int pid_index;
+ boolean full_redisplay = FALSE;
+ ulonglong current_width;
+ char tmpbuf [64];
+
+ /* No events yet? Act like the search worked, to avoid a loop */
+ if (g_nevents == 0)
+ return(TRUE);
+
+ ep = (g_events + s_srchindex);
+ ep->flags &= ~EVENT_FLAG_SEARCHRSLT;
+
+ /*
+ * Assume the user wants to search [plus or minus]
+ * from where they are.
+ */
+#ifdef notdef
+ if (ep->time < s_v1->minvistime)
+ s_srchindex = find_event_index (s_v1->minvistime);
+#endif
+
+ for (i = 1; i <= g_nevents; i++) {
+ index = (srch_chase_dir == SRCH_CHASE_BACKWARD) ?
+ (s_srchindex - i) % g_nevents :
+ (i + s_srchindex) % g_nevents;
+
+ ep = (g_events + index);
+
+ if (ep->code == s_srchcode) {
+ if (s_srchfail_up)
+ message_line("");
+ s_srchindex = index;
+ pid_index = ep->pid->pid_index;
+
+ /* Need a vertical scroll? */
+ if ((pid_index < s_v1->first_pid_index) ||
+ (pid_index >= s_v1->first_pid_index + s_v1->npids)) {
+ if (pid_index > (g_npids - s_v1->npids))
+ pid_index = (g_npids - s_v1->npids);
+ s_v1->first_pid_index = pid_index;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value =
+ (gdouble)s_v1->first_pid_index;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ full_redisplay = TRUE;
+ }
+
+ /* Need a horizontal scroll? */
+ if (ep->time < s_v1->minvistime || ep->time > s_v1->maxvistime) {
+ current_width = (s_v1->maxvistime - s_v1->minvistime);
+ if (ep->time < ((current_width+1) / 2)) {
+ s_v1->minvistime = 0ll;
+ s_v1->maxvistime = current_width;
+ } else {
+ s_v1->minvistime = ep->time - ((current_width+1)/2);
+ s_v1->maxvistime = ep->time + ((current_width+1)/2);
+ }
+ recompute_hscrollbar();
+ full_redisplay = TRUE;
+ }
+ ep->flags |= EVENT_FLAG_SEARCHRSLT;
+ full_redisplay = TRUE;
+
+#ifdef NOTDEF
+ if (!full_redisplay){
+ if (!s_result_up) {
+ s_result_up = TRUE;
+ time_per_pixel = dtime_per_pixel(s_v1);
+
+ y = pid_index*s_v1->strip_height + s_v1->event_offset;
+ x = s_v1->pid_ax_width +
+ (int)(((double)(ep->time - s_v1->minvistime)) /
+ time_per_pixel);
+ sprintf(tmpbuf, "SEARCH RESULT");
+ tbox(tmpbuf, x, y - s_v1->pop_offset, TBOX_DRAW_BOXED);
+ line(x, y-s_v1->pop_offset, x, y, LINE_DRAW_BLACK);
+ } else {
+ full_redisplay = TRUE;
+ }
+ }
+#endif
+
+ if (full_redisplay)
+ view1_display_when_idle();
+ return(TRUE);
+ }
+ }
+ sprintf (tmpbuf, "Search for event %ld failed...\n", s_srchcode);
+ message_line(tmpbuf);
+ s_srchfail_up = TRUE;
+ return(TRUE);
+}
+
+/****************************************************************************
+* event_search_callback
+****************************************************************************/
+
+boolean event_search_callback (char *s)
+{
+ /* No events yet? Act like the search worked, to avoid a loop */
+ if (g_nevents == 0)
+ return(TRUE);
+
+ s_srchcode = atol(s);
+
+ if (s_srchcode == 0)
+ return(FALSE);
+
+ return(event_search_internal());
+}
+
+/****************************************************************************
+* event_search
+****************************************************************************/
+
+static void event_search (void)
+{
+ modal_dialog ("Event Search: Please Enter Event Code",
+ "Invalid: Please Reenter Event Code", NULL,
+ event_search_callback);
+}
+
+/****************************************************************************
+* init_track_colors
+****************************************************************************/
+static void init_track_colors(void)
+{
+ int i;
+ unsigned hash;
+ char *label_char;
+ unsigned RGB[3];
+ gboolean dont_care[g_npids];
+
+ /*
+ * If we've already allocated the colors once, then in theory we should
+ * just be able to re-order the GCs already created to match the new track
+ * order; the track -> color mapping doesn't currently change at runtime.
+ * However, it's easier just to allocate everything from fresh. As a nod in
+ * the direction of politeness towards our poor abused X server, we at
+ * least mop up the previously allocated GCs first, although in practice
+ * even omitting this didn't seem to cause a problem.
+ */
+ if (s_color != NULL ) {
+ gdk_colormap_free_colors(gtk_widget_get_colormap(da),
+ s_color, g_npids);
+ memset(s_color, 0, sizeof(GdkColor) * g_npids);
+ } else {
+ /*
+ * First time through: allocate the array to hold the GCs.
+ */
+ s_color = g_malloc(sizeof(GdkColor) * (g_npids+1));
+ }
+
+ /*
+ * Go through and assign a color for each track.
+ */
+ /* Setup entry 0 in the colormap as pure red (for selection) */
+ s_color[0] = fg_red;
+
+ for (i = 1; i < g_npids; i++) {
+ /*
+ * We compute the color from a hash of the thread name. That way we get
+ * a distribution of different colors, and the same thread has the same
+ * color across multiple data sets. Unfortunately, even though the
+ * process name and thread id are invariant across data sets, the
+ * process id isn't, so we want to exclude that from the hash. Since
+ * the pid appears in parentheses after the process name and tid, we
+ * can just stop at the '(' character.
+ *
+ * We could create a substring and use the CLIB Jenkins hash, but given
+ * we're hashing ascii data, a suitable Bernstein hash is pretty much
+ * just as good, and it's easiest just to compute it inline.
+ */
+ label_char = get_track_label(g_pids[i].pid_value);
+ hash = 0;
+ while (*label_char != '\0' && *label_char != '(') {
+ hash = hash * 33 + *label_char++;
+ }
+ hash += hash >> 5; /* even out the lower order bits a touch */
+
+ /*
+ * OK, now we have our hash. We get the color by using the first three
+ * bytes of the hash for the RGB values (expanded from 8 to 16 bits),
+ * and then use the fourth byte to choose one of R, G, B and mask this
+ * one down. This ensures the color can't be too close to white and
+ * therefore hard to see.
+ *
+ * We also drop the top bit of the green, since bright green on its own
+ * is hard to see against white. Generally we err on the side of
+ * keeping it dark, rather than using the full spectrum of colors. This
+ * does result in something of a preponderance of muddy colors and a
+ * bit of a lack of cheery bright ones, but at least you can read
+ * everything. It would be nice to do better.
+ */
+ RGB[0] = (hash & 0xff000000) >> 16;
+ RGB[1] = (hash & 0x007f0000) >> 8;
+ RGB[2] = (hash & 0x0000ff00);
+ RGB[hash % 3] &= 0x1fff;
+
+ {
+ GdkColor color = {0, RGB[0], RGB[1], RGB[2]};
+ s_color[i] = color;
+ g_pids[i].color_index = i;
+ }
+ }
+
+ /*
+ * Actually allocate the colors in one bulk operation. We ignore the return
+ * values.
+ */
+ gdk_colormap_alloc_colors(gtk_widget_get_colormap(da),
+ s_color, g_npids+1, FALSE, TRUE, dont_care);
+}
+
+
+/****************************************************************************
+* chase_event_etc
+* Reorder the pid_index fields so the viewer "chases" the last selected
+* event.
+****************************************************************************/
+
+static void chase_event_etc(enum chase_mode mode)
+{
+ pid_sort_t *psp, *new_pidvec;
+ pid_data_t *pp;
+ event_t *ep;
+ int pids_mapped;
+ ulong code_to_chase;
+ ulong datum_to_chase;
+ ulong pid_to_chase;
+ int i;
+ int winner;
+
+ if (!s_last_selected_event) {
+ infobox("No selected event",
+ "\nPlease select an event and try again...\n");
+ return;
+ }
+
+ /* Clear all index assignments */
+ psp = g_pids;
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = 0xFFFFFFFF;
+ psp++;
+ }
+
+ ep = s_last_selected_event;
+ code_to_chase = ep->code;
+ datum_to_chase = ep->datum;
+ pid_to_chase = ep->pid->pid_value;
+ pids_mapped = 0;
+ new_pidvec = g_malloc0(sizeof(pid_sort_t)*g_npids);
+
+ while (1) {
+ if (srch_chase_dir == SRCH_CHASE_FORWARD) {
+ if (ep >= g_events + g_nevents)
+ break;
+ } else {
+ if (ep < g_events)
+ break;
+ }
+
+ winner = 0;
+ switch(mode) {
+ case CHASE_EVENT:
+ if (ep->code == code_to_chase) {
+ winner = 1;
+ }
+ break;
+
+ case CHASE_DATUM:
+ if (ep->datum == datum_to_chase) {
+ winner = 1;
+ }
+ break;
+
+ case CHASE_TRACK:
+ if (ep->pid->pid_value == pid_to_chase) {
+ winner = 1;
+ }
+ break;
+
+ default:
+ infobox("BUG", "unknown mode in chase_event_etc\n");
+ break;
+ }
+
+ if (winner) {
+ if (ep->pid->pid_index == 0xFFFFFFFF) {
+ ep->pid->pid_index = pids_mapped;
+ new_pidvec[pids_mapped].pid = ep->pid;
+ new_pidvec[pids_mapped].pid_value = ep->pid->pid_value;
+ new_pidvec[pids_mapped].color_index = 0;
+ pids_mapped++;
+ if (pids_mapped == g_npids)
+ break;
+ }
+ }
+ if (srch_chase_dir == SRCH_CHASE_FORWARD)
+ ep++;
+ else
+ ep--;
+ }
+
+ /* Pass 2, first-to-last, to collect stragglers */
+ ep = g_events;
+
+ while (ep < g_events + g_nevents) {
+ if (ep->pid->pid_index == 0xFFFFFFFF) {
+ ep->pid->pid_index = pids_mapped;
+ new_pidvec[pids_mapped].pid = ep->pid;
+ new_pidvec[pids_mapped].pid_value = ep->pid->pid_value;
+ new_pidvec[pids_mapped].color_index = 0;
+ pids_mapped++;
+ if (pids_mapped == g_npids)
+ break;
+ }
+ ep++;
+ }
+
+ if (pids_mapped != g_npids) {
+ infobox("BUG", "\nDidn't map all pids in chase_event_etc\n");
+ }
+
+ g_free (g_pids);
+ g_pids = new_pidvec;
+
+ /*
+ * The new g_pids vector contains the "chase" sort, so we revert
+ * the pid_index mapping to an identity map
+ */
+ psp = g_pids;
+
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = i;
+ psp++;
+ }
+
+ /* AutoScroll the PID axis so we show the first "chased" event */
+ s_v1->first_pid_index = 0;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = 0.00;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ init_track_colors();
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* unchase_event_etc
+* Copy g_original_pids to g_pids, revert index mapping
+****************************************************************************/
+static void unchase_event_etc(void)
+{
+ int i;
+ pid_sort_t *psp;
+ pid_data_t *pp;
+
+ memcpy (g_pids, g_original_pids, sizeof(pid_sort_t)*g_npids);
+
+ /* Fix the pid structure index mappings */
+ psp = g_pids;
+
+ for (i = 0; i < g_npids; i++) {
+ pp = psp->pid;
+ pp->pid_index = i;
+ psp++;
+ }
+
+ /* Scroll PID axis to the top */
+ s_v1->first_pid_index = 0;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = 0.00;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ init_track_colors();
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* print_ps_header
+* To fit a reasonable-sized landscape mode plot onto letter-size paper,
+* scale everything by .75.
+****************************************************************************/
+
+static void print_ps_header (v1_geometry_t *vp, char *filename)
+{
+ time_t now;
+
+ now = time(0);
+
+ fprintf(s_printfp, "%%%%!PS-Adobe-3.0 EPSF-3.0\n");
+ fprintf(s_printfp, "%%%%Creator: G2 Event Viewer\n");
+ fprintf(s_printfp, "%%%%Title: %s\n", filename);
+ fprintf(s_printfp, "%%%%CreationDate: %s", ctime(&now));
+ fprintf(s_printfp, "%%%%DocumentData: Clean7Bit\n");
+ fprintf(s_printfp, "%%%%Origin: 0 0\n");
+ fprintf(s_printfp, "%%%%BoundingBox: 0 0 %d %d\n", vp->total_height,
+ vp->total_width);
+ fprintf(s_printfp, "%%%%LanguageLevel: 2\n");
+ fprintf(s_printfp, "%%%%Pages: 1\n");
+ fprintf(s_printfp, "%%%%Page: 1 1\n");
+ fprintf(s_printfp, "%%%%EOF\n");
+ fprintf(s_printfp, "/Times-Roman findfont\n");
+ fprintf(s_printfp, "12 scalefont\n");
+ fprintf(s_printfp, "setfont\n");
+ fprintf(s_printfp, ".75 .75 scale\n");
+}
+
+/****************************************************************************
+* xrt
+* Xcoordinate rotate and translate. We need to emit postscript that
+* has a reasonable aspect ratio for printing. To do that, we rotate the
+* intended picture by 90 degrees, using the standard 2D rotation
+* formula:
+*
+* Xr = x*cos(theta) - y*sin(theta);
+* Yr = x*sin(theta) + y*cos(theta);
+*
+* If we let theta = 90, this reduces to
+* Xr = -y
+* Yr = x
+*
+* Translate back to the origin in X by adding Ymax, yielding
+* Xrt = Ymax - y
+****************************************************************************/
+
+static inline int xrt(int x, int y)
+{
+ return (s_v1->total_height - y);
+}
+
+static inline int yrt(int x, int y)
+{
+ return(x);
+}
+
+/****************************************************************************
+* print_screen_callback
+****************************************************************************/
+
+static boolean print_screen_callback(char *filename)
+{
+ s_printfp = fopen (filename, "wt");
+
+ if (s_printfp == NULL)
+ return(FALSE);
+
+ /*
+ * This variable allows us to magically turn the view1 display
+ * code into a print-driver, with a minimum of fuss. The idea is to
+ * magically change TBOX_DRAW_XXX into TBOX_PRINT_XXX by adding
+ * the required value, aka s_print_offset.
+ * Make sure to fix g2.h if you mess here, or vice versa.
+ */
+ s_print_offset = TBOX_PRINT_PLAIN - TBOX_DRAW_PLAIN;
+
+ print_ps_header(s_v1, filename);
+
+ display_pid_axis(s_v1);
+ display_event_data(s_v1);
+ display_time_axis(s_v1);
+
+ fclose (s_printfp);
+ s_printfp = 0;
+ s_print_offset = 0;
+
+ /* For tactile feedback */
+ view1_display_when_idle();
+ return(TRUE);
+}
+
+int event_time_cmp (const void *a, const void *b)
+{
+ const event_t *e1 = a;
+ const event_t *e2 = b;
+
+ if (e1->time < e2->time)
+ return -1;
+ else if (e1->time > e2->time)
+ return 1;
+ return 0;
+}
+
+/****************************************************************************
+* slew_tracks
+****************************************************************************/
+static void slew_tracks (v1_geometry_t *vp, enum view1_button_click which)
+{
+ event_t *ep;
+ pid_sort_t *pp;
+ int pid_index;
+ ulonglong delta;
+
+ delta = (ulonglong) (vp->last_time_interval);
+
+ /* Make sure we don't push events to the left of the big bang */
+ if (which == SLEW_LEFT_BUTTON) {
+ for (ep = g_events; ep < (g_events + g_nevents); ep++) {
+ pid_index = ep->pid->pid_index;
+ pp = (g_pids + pid_index);
+
+ if (pp->selected) {
+ if (ep->time < delta) {
+ infobox("Slew Range Error",
+ "\nCan't slew selected data left that far..."
+ "\nEvents would preceed the Big Bang (t=0)...");
+ goto out;
+ }
+ }
+ }
+ }
+
+ for (ep = g_events; ep < (g_events + g_nevents); ep++) {
+ pid_index = ep->pid->pid_index;
+ pp = (g_pids + pid_index);
+
+ if (pp->selected) {
+ if (which == SLEW_LEFT_BUTTON)
+ ep->time -= delta;
+ else
+ ep->time += delta;
+ }
+ }
+
+ /* Re-sort the events, to avoid screwing up the event display */
+ qsort (g_events, g_nevents, sizeof(event_t), event_time_cmp);
+
+ /* De-select tracks */
+ deselect_tracks();
+
+out:
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* view1_button_click_callback
+****************************************************************************/
+
+static void view1_button_click_callback(GtkButton *item, gpointer data)
+{
+ enum view1_button_click click = (enum view1_button_click) data;
+ event_t *ep;
+ ulonglong event_incdec;
+ ulonglong current_width;
+ ulonglong zoom_delta;
+
+ current_width = s_v1->maxvistime - s_v1->minvistime;
+ event_incdec = (current_width) / 3;
+
+ if (event_incdec == 0LL)
+ event_incdec = 1;
+
+ zoom_delta = (s_v1->maxvistime - s_v1->minvistime) / 6;
+
+ switch(click) {
+ case TOP_BUTTON:
+ /* First PID to top of window */
+ s_v1->first_pid_index = 0;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = 0.00;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ break;
+
+ case BOTTOM_BUTTON:
+ s_v1->first_pid_index = g_npids - s_v1->npids;
+ if (s_v1->first_pid_index < 0)
+ s_v1->first_pid_index = 0;
+ GTK_ADJUSTMENT(s_view1_vsadj)->value = (gdouble)s_v1->first_pid_index;
+ gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj));
+ break;
+
+ case SNAP_BUTTON:
+ add_snapshot();
+ break;
+
+ case NEXT_BUTTON:
+ next_snapshot();
+ break;
+
+ case DEL_BUTTON:
+ del_snapshot();
+ break;
+
+ case CHASE_EVENT_BUTTON:
+ chase_event_etc(CHASE_EVENT);
+ break;
+
+ case CHASE_DATUM_BUTTON:
+ chase_event_etc(CHASE_DATUM);
+ break;
+
+ case CHASE_TRACK_BUTTON:
+ chase_event_etc(CHASE_TRACK);
+ break;
+
+ case UNCHASE_BUTTON:
+ unchase_event_etc();
+ break;
+
+ case START_BUTTON:
+ start_button:
+ s_v1->minvistime = 0LL;
+ s_v1->maxvistime = current_width;
+ recompute_hscrollbar();
+ break;
+
+ case ZOOMIN_BUTTON:
+ s_v1->minvistime += zoom_delta;
+ s_v1->maxvistime -= zoom_delta;
+ recompute_hscrollbar();
+ break;
+
+ case SEARCH_AGAIN_BUTTON:
+ if (s_srchcode) {
+ event_search_internal();
+ break;
+ }
+ /* NOTE FALLTHROUGH */
+
+ case SEARCH_BUTTON:
+ event_search();
+ break;
+
+ case ZOOMOUT_BUTTON:
+ if (zoom_delta == 0LL)
+ zoom_delta = 1;
+
+ if (s_v1->minvistime >= zoom_delta) {
+ s_v1->minvistime -= zoom_delta;
+ s_v1->maxvistime += zoom_delta;
+ } else {
+ s_v1->minvistime = 0;
+ s_v1->maxvistime += zoom_delta*2;
+ }
+
+ if ((s_v1->maxvistime - s_v1->minvistime) * 8 >
+ g_events[g_nevents-1].time * 9) {
+ s_v1->minvistime = 0;
+ s_v1->maxvistime = g_events[g_nevents-1].time * 9 / 8;
+ }
+ recompute_hscrollbar();
+ break;
+
+ case END_BUTTON:
+ ep = (g_events + g_nevents - 1);
+ s_v1->maxvistime = ep->time + event_incdec/3;
+ s_v1->minvistime = s_v1->maxvistime - current_width;
+ if (s_v1->minvistime > s_v1->maxvistime)
+ goto start_button;
+ recompute_hscrollbar();
+ break;
+
+ case MORE_TRACES_BUTTON:
+ /* Reduce the strip height to fit more traces on screen */
+ s_v1->strip_height -= 1;
+
+ if (s_v1->strip_height < 1) {
+ s_v1->strip_height = 1;
+ }
+
+ /* Recalculate the number of strips on the screen */
+ s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) /
+ s_v1->strip_height;
+ recompute_vscrollbar();
+ break;
+
+ case LESS_TRACES_BUTTON:
+ /* Increase the strip height to fit fewer on the screen */
+ s_v1->strip_height += 1;
+ if (s_v1->strip_height > 80) {
+ s_v1->strip_height = 80;
+ }
+
+ /* Recalculate the number of strips on the screen */
+ s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) /
+ s_v1->strip_height;
+ recompute_vscrollbar();
+ break;
+
+ case FORWARD_BUTTON:
+ srch_chase_dir = SRCH_CHASE_FORWARD;
+ gtk_widget_hide (s_view1_forward_button);
+ gtk_widget_show (s_view1_backward_button);
+ break;
+
+ case BACKWARD_BUTTON:
+ srch_chase_dir = SRCH_CHASE_BACKWARD;
+ gtk_widget_show (s_view1_forward_button);
+ gtk_widget_hide (s_view1_backward_button);
+ break;
+
+ case SUMMARY_BUTTON:
+ summary_mode = TRUE;
+ gtk_widget_hide (s_view1_summary_button);
+ gtk_widget_show (s_view1_nosummary_button);
+ break;
+
+ case NOSUMMARY_BUTTON:
+ summary_mode = FALSE;
+ gtk_widget_show (s_view1_summary_button);
+ gtk_widget_hide (s_view1_nosummary_button);
+ break;
+
+ case SLEW_LEFT_BUTTON:
+ case SLEW_RIGHT_BUTTON:
+ if (s_v1->last_time_interval < 10e-9) {
+ infobox("slew", "\nNo time interval set...\n");
+ break;
+ }
+ slew_tracks (s_v1, click);
+ break;
+ }
+
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* view1_print_callback
+****************************************************************************/
+
+void view1_print_callback (GtkToggleButton *notused, gpointer nu2)
+{
+ modal_dialog("Print Screen (PostScript format) to file:",
+ "Invalid file: Print Screen to file:",
+ "g2.ps", print_screen_callback);
+}
+
+/****************************************************************************
+* view1_hscroll
+****************************************************************************/
+
+static void view1_hscroll (GtkAdjustment *adj, GtkWidget *notused)
+{
+ ulonglong current_width;
+
+ current_width = (s_v1->maxvistime - s_v1->minvistime);
+
+ s_v1->minvistime = (ulonglong)(adj->value);
+ s_v1->maxvistime = s_v1->minvistime + current_width;
+
+ view1_display_when_idle();
+
+#ifdef NOTDEF
+ g_print ("adj->lower = %.2f\n", adj->lower);
+ g_print ("adj->upper = %.2f\n", adj->upper);
+ g_print ("adj->value = %.2f\n", adj->value);
+ g_print ("adj->step_increment = %.2f\n", adj->step_increment);
+ g_print ("adj->page_increment = %.2f\n", adj->page_increment);
+ g_print ("adj->page_size = %.2f\n", adj->page_size);
+#endif
+}
+
+/****************************************************************************
+* view1_vscroll
+****************************************************************************/
+
+static void view1_vscroll (GtkAdjustment *adj, GtkWidget *notused)
+{
+ s_v1->first_pid_index = (int)adj->value;
+ view1_display_when_idle();
+}
+
+void set_pid_ax_width(int width)
+{
+ s_v1->pid_ax_width = width;
+ view1_display_when_idle();
+}
+
+/****************************************************************************
+* view1_init
+****************************************************************************/
+
+void view1_init(void)
+{
+
+ c_view1_draw_width = atol(getprop_default("drawbox_width", "700"));
+ c_view1_draw_height = atol(getprop_default("drawbox_height", "400"));
+
+ s_v1->pid_ax_width = 80;
+ s_v1->time_ax_height = 80;
+ s_v1->time_ax_spacing = 100;
+ s_v1->strip_height = 25;
+ s_v1->pop_offset = 20;
+ s_v1->pid_ax_offset = 34;
+ s_v1->event_offset = 40;
+ s_v1->total_height = c_view1_draw_height;
+ s_v1->total_width = c_view1_draw_width;
+ s_v1->first_pid_index = 0;
+
+ s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) /
+ s_v1->strip_height;
+
+ s_v1->minvistime = 0;
+ s_v1->maxvistime = 200;
+
+ s_view1_vbox = gtk_vbox_new(FALSE, 5);
+
+ s_view1_hbox = gtk_hbox_new(FALSE, 5);
+
+ da = gtk_drawing_area_new();
+ gtk_drawing_area_size(GTK_DRAWING_AREA(da), c_view1_draw_width,
+ c_view1_draw_height);
+
+#ifdef NOTDEF
+ gtk_signal_connect (GTK_OBJECT (da), "motion_notify_event",
+ (GtkSignalFunc) motion_notify_event, NULL);
+#endif
+
+ gtk_signal_connect (GTK_OBJECT (da), "expose_event",
+ (GtkSignalFunc) expose_event, NULL);
+
+ gtk_signal_connect (GTK_OBJECT(da),"configure_event",
+ (GtkSignalFunc) configure_event, NULL);
+
+ gtk_signal_connect (GTK_OBJECT (da), "button_press_event",
+ (GtkSignalFunc) button_press_event, NULL);
+
+ gtk_signal_connect (GTK_OBJECT (da), "button_release_event",
+ (GtkSignalFunc) button_press_event, NULL);
+
+ gtk_signal_connect (GTK_OBJECT (da), "motion_notify_event",
+ (GtkSignalFunc) button_press_event, NULL);
+
+ gtk_widget_set_events (da, GDK_BUTTON_PRESS_MASK
+ | GDK_BUTTON_RELEASE_MASK | GDK_EXPOSURE_MASK
+ | GDK_BUTTON_MOTION_MASK);
+
+
+ gtk_box_pack_start(GTK_BOX(s_view1_hbox), da, TRUE, TRUE, 0);
+
+ g_font = gdk_font_load ("8x13");
+ if (g_font == NULL) {
+ g_error("Couldn't load 8x13 font...\n");
+ }
+ gdk_font_ref(g_font);
+
+ /* PID axis menu */
+ s_view1_vmenubox = gtk_vbox_new(FALSE, 5);
+
+ s_view1_vsadj = gtk_adjustment_new(0.0 /* initial value */,
+ 0.0 /* minimum value */,
+ 2000.0 /* maximum value */,
+ 0.1 /* step increment */,
+ 10.0/* page increment */,
+ 10.0/* page size */);
+
+ s_view1_vscroll = gtk_vscrollbar_new (GTK_ADJUSTMENT(s_view1_vsadj));
+
+ gtk_signal_connect (GTK_OBJECT (s_view1_vsadj), "value-changed",
+ GTK_SIGNAL_FUNC (view1_vscroll),
+ (gpointer)s_view1_vscroll);
+
+ s_view1_topbutton = gtk_button_new_with_label("Top");
+ s_view1_bottombutton = gtk_button_new_with_label("Bottom");
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_topbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) TOP_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_bottombutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) BOTTOM_BUTTON);
+
+ /* More Traces button and Less Traces button */
+ s_view1_more_traces_button = gtk_button_new_with_label("More Traces");
+ s_view1_less_traces_button = gtk_button_new_with_label("Less Traces");
+ gtk_signal_connect (GTK_OBJECT(s_view1_more_traces_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) MORE_TRACES_BUTTON);
+ gtk_signal_connect (GTK_OBJECT(s_view1_less_traces_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) LESS_TRACES_BUTTON);
+
+#ifdef NOTDEF
+ /* Trick to bottom-justify the menu: */
+ s_view1_pad1 = gtk_vbox_new(FALSE, 0);
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_pad1,
+ TRUE, FALSE, 0);
+
+#endif
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_topbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_vscroll,
+ TRUE, TRUE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_bottombutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_more_traces_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_less_traces_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hbox), s_view1_vmenubox,
+ FALSE, FALSE, 0);
+
+ /* Time axis menu */
+
+ s_view1_hmenubox = gtk_hbox_new(FALSE, 5);
+
+ s_view1_startbutton = gtk_button_new_with_label("Start");
+
+ s_view1_zoominbutton = gtk_button_new_with_label("ZoomIn");
+
+ s_view1_searchbutton = gtk_button_new_with_label("Search");
+
+ s_view1_srchagainbutton = gtk_button_new_with_label("Search Again");
+
+ s_view1_zoomoutbutton = gtk_button_new_with_label("ZoomOut");
+
+ s_view1_endbutton = gtk_button_new_with_label("End");
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_startbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) START_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_zoominbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) ZOOMIN_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_searchbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SEARCH_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_srchagainbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SEARCH_AGAIN_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_zoomoutbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) ZOOMOUT_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_endbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) END_BUTTON);
+
+ s_view1_hsadj = gtk_adjustment_new(0.0 /* initial value */,
+ 0.0 /* minimum value */,
+ 2000.0 /* maximum value */,
+ 0.1 /* step increment */,
+ 10.0/* page increment */,
+ 10.0/* page size */);
+
+ s_view1_hscroll = gtk_hscrollbar_new (GTK_ADJUSTMENT(s_view1_hsadj));
+
+ gtk_signal_connect (GTK_OBJECT (s_view1_hsadj), "value-changed",
+ GTK_SIGNAL_FUNC (view1_hscroll),
+ (gpointer)s_view1_hscroll);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_startbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_hscroll,
+ TRUE, TRUE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_endbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_zoominbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_searchbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_srchagainbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_zoomoutbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hbox,
+ TRUE, TRUE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hmenubox,
+ FALSE, FALSE, 0);
+
+
+ s_view1_hmenubox2 = gtk_hbox_new(FALSE, 5);
+
+ s_view1_snapbutton = gtk_button_new_with_label("Snap");
+
+ s_view1_nextbutton = gtk_button_new_with_label("Next");
+
+ s_view1_delbutton = gtk_button_new_with_label("Del");
+
+ s_view1_chase_event_button = gtk_button_new_with_label("ChaseEvent");
+
+ s_view1_chase_datum_button = gtk_button_new_with_label("ChaseDatum");
+
+ s_view1_chase_track_button = gtk_button_new_with_label("ChaseTrack");
+
+ s_view1_unchasebutton = gtk_button_new_with_label("NoChase");
+
+ s_view1_forward_button = gtk_button_new_with_label("->SrchChase(is<-)");
+ s_view1_backward_button = gtk_button_new_with_label("<-SrchChase(is->)");
+
+ s_view1_summary_button = gtk_button_new_with_label("Summary");
+ s_view1_nosummary_button = gtk_button_new_with_label("NoSummary");
+
+ s_view1_time_slew_left_button = gtk_button_new_with_label("<-TimeSlew");
+ s_view1_time_slew_right_button = gtk_button_new_with_label("TimeSlew->");
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_snapbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SNAP_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_nextbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) NEXT_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_delbutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) DEL_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_chase_event_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) CHASE_EVENT_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_chase_datum_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) CHASE_DATUM_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_chase_track_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) CHASE_TRACK_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_unchasebutton), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) UNCHASE_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_forward_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) FORWARD_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_backward_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) BACKWARD_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_summary_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SUMMARY_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_nosummary_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) NOSUMMARY_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_time_slew_left_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SLEW_LEFT_BUTTON);
+
+ gtk_signal_connect (GTK_OBJECT(s_view1_time_slew_right_button), "clicked",
+ GTK_SIGNAL_FUNC(view1_button_click_callback),
+ (gpointer) SLEW_RIGHT_BUTTON);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hmenubox2,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_snapbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_nextbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_delbutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_chase_event_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_chase_datum_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_chase_track_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_unchasebutton,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_forward_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_backward_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_summary_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_nosummary_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2),
+ s_view1_time_slew_left_button,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2),
+ s_view1_time_slew_right_button,
+ FALSE, FALSE, 0);
+
+ s_view1_label = gtk_label_new(NULL);
+
+ gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_label,
+ FALSE, FALSE, 0);
+
+ gtk_box_pack_start (GTK_BOX(g_mainhbox), s_view1_vbox,
+ TRUE, TRUE, 0);
+
+ gtk_widget_show_all (s_view1_vbox);
+ GTK_WIDGET_SET_FLAGS(da, GTK_CAN_FOCUS);
+ gtk_widget_grab_focus(da);
+
+ gtk_widget_hide (s_view1_forward_button);
+ gtk_widget_hide (summary_mode ? s_view1_summary_button
+ : s_view1_nosummary_button);
+
+ zi_source = gdk_bitmap_create_from_data (NULL, (char *)zi_bits, zi_width,
+ zi_height);
+ zi_mask = gdk_bitmap_create_from_data (NULL, (char *)zi_bkgd, zi_width,
+ zi_height);
+
+ zi_cursor = (GdkCursor *) gdk_cursor_new_from_pixmap (zi_source,
+ zi_mask, &fg_black,
+ &bg_white, zi_x_hot,
+ zi_y_hot);
+ gdk_pixmap_unref (zi_source);
+ gdk_pixmap_unref (zi_mask);
+
+ norm_cursor = (GdkCursor *) gdk_cursor_new (GDK_TOP_LEFT_ARROW);
+}
+
+/****************************************************************************
+* line_print
+****************************************************************************/
+
+void line_print (int x1, int y1, int x2, int y2)
+{
+ fprintf(s_printfp, "newpath\n");
+ fprintf(s_printfp, "%d %d moveto\n", xrt(x1, s_v1->total_height - y1),
+ yrt(x1, s_v1->total_height - y1));
+
+ fprintf(s_printfp, "%d %d lineto\n", xrt (x2, s_v1->total_height - y2),
+ yrt (x2, s_v1->total_height - y2));
+ fprintf(s_printfp, "1 setlinewidth\n");
+ fprintf(s_printfp, "stroke\n");
+}
+
+/****************************************************************************
+* tbox_print
+****************************************************************************/
+GdkRectangle *tbox_print (char *s, int x, int y, enum view1_tbox_fn function,
+ GdkRectangle *rp)
+{
+ if (function == TBOX_PRINT_BOXED) {
+ rp->width -= 4;
+ }
+
+ if ((function == TBOX_PRINT_BOXED) ||
+ (function == TBOX_PRINT_EVENT)) {
+
+ fprintf(s_printfp, "newpath\n");
+ fprintf(s_printfp, "0 setlinewidth\n");
+ fprintf(s_printfp, "%d %d moveto\n",
+ xrt(rp->x, s_v1->total_height - rp->y),
+ yrt(rp->x, s_v1->total_height - rp->y));
+
+ fprintf(s_printfp, "%d %d lineto\n",
+ xrt (rp->x+rp->width, s_v1->total_height - rp->y),
+ yrt (rp->x+rp->width, s_v1->total_height - rp->y));
+
+ fprintf(s_printfp, "%d %d lineto\n",
+ xrt(rp->x+rp->width, s_v1->total_height - (rp->y+rp->height)),
+ yrt(rp->x+rp->width, s_v1->total_height - (rp->y+rp->height)));
+
+ fprintf(s_printfp, "%d %d lineto\n",
+ xrt(rp->x, s_v1->total_height - (rp->y+rp->height)),
+ yrt(rp->x, s_v1->total_height - (rp->y+rp->height)));
+
+ fprintf(s_printfp, "%d %d lineto\n",
+ xrt(rp->x, s_v1->total_height - rp->y),
+ yrt(rp->x, s_v1->total_height - rp->y));
+
+ fprintf(s_printfp, "stroke\n");
+ }
+
+ if ((function == TBOX_PRINT_BOXED) ||
+ (function == TBOX_PRINT_PLAIN)) {
+
+ fprintf(s_printfp, "newpath\n");
+ fprintf(s_printfp, "%d %d moveto\n",
+ xrt(x, s_v1->total_height - (y-2)),
+ yrt(x, s_v1->total_height - (y-2)));
+ fprintf(s_printfp, "gsave\n");
+ fprintf(s_printfp, "90 rotate\n");
+ fprintf(s_printfp, "(%s) show\n", s);
+ fprintf(s_printfp, "grestore\n");
+ }
+
+ return(rp);
+}
+
+/****************************************************************************
+* tbox - draws an optionally boxed string whose lower lefthand
+* corner is at (x, y). As usual, Y is backwards.
+****************************************************************************/
+
+GdkRectangle *tbox (char *s, int x, int y, enum view1_tbox_fn function)
+{
+ static GdkRectangle update_rect;
+ gint lbearing, rbearing, width, ascent, descent;
+
+ gdk_string_extents (g_font, s,
+ &lbearing, &rbearing,
+ &width, &ascent, &descent);
+
+ /*
+ * If we have enough room to display full size events, then just
+ * use the BOXED function instead of the EVENT function.
+ */
+ if (s_v1->strip_height > 9) {
+ switch (function) {
+ case TBOX_DRAW_EVENT: function = TBOX_DRAW_BOXED; break;
+ case TBOX_GETRECT_EVENT: function = TBOX_GETRECT_BOXED; break;
+ case TBOX_PRINT_EVENT: function = TBOX_PRINT_BOXED; break;
+ default:
+ break;
+ /* Nothing */
+ }
+ }
+
+ switch (function) {
+ case TBOX_DRAW_BOXED:
+ gdk_draw_rectangle (pm, da->style->white_gc, TRUE,
+ x, y - (ascent+descent+3), width + 2,
+ ascent + descent + 3);
+
+ gdk_draw_rectangle (pm, da->style->black_gc, FALSE,
+ x, y - (ascent+descent+3), width + 2,
+ ascent + descent + 3);
+
+ gdk_draw_string (pm, g_font, da->style->black_gc,
+ x + 1, y - 1, (const gchar *)s);
+ /* NOTE FALLTHROUGH */
+ case TBOX_GETRECT_BOXED:
+ update_rect.x = x;
+ update_rect.y = y -(ascent+descent+3);
+ update_rect.width = width + 3;
+ update_rect.height = ascent + descent + 4;
+ if (function == TBOX_DRAW_BOXED)
+ gtk_widget_draw (da, &update_rect);
+ break;
+
+ case TBOX_DRAW_EVENT:
+ /* We have a small event to draw...no text */
+ gdk_draw_rectangle (pm, da->style->black_gc, FALSE,
+ x, y - 1, 3, 3);
+ /* NOTE FALLTHROUGH */
+ case TBOX_GETRECT_EVENT:
+ update_rect.x = x;
+ update_rect.y = y - 1;
+ update_rect.width = 4;
+ update_rect.height = 4;
+ if (function == TBOX_DRAW_EVENT)
+ gtk_widget_draw (da, &update_rect);
+ break;
+
+
+ case TBOX_DRAW_PLAIN:
+
+ gdk_draw_string (pm, g_font, da->style->black_gc,
+ x + 1, y - 1, (const gchar *)s);
+ /* NOTE FALLTHROUGH */
+ case TBOX_GETRECT_PLAIN:
+ update_rect.x = x;
+ update_rect.y = y -(ascent+descent+1);
+ update_rect.width = width;
+ update_rect.height = ascent + descent;
+ if (function == TBOX_DRAW_PLAIN)
+ gtk_widget_draw (da, &update_rect);
+ break;
+
+ case TBOX_PRINT_BOXED:
+ update_rect.x = x;
+ update_rect.y = y -(ascent+descent+3);
+ update_rect.width = width + 3;
+ update_rect.height = ascent + descent + 4;
+ /* note fallthrough */
+ case TBOX_PRINT_PLAIN:
+ return(tbox_print(s, x, y, function, &update_rect));
+
+ case TBOX_PRINT_EVENT:
+ /* We have a small event box to print...no text */
+ update_rect.x = x;
+ update_rect.y = y - 1;
+ update_rect.width = 4;
+ update_rect.height = 4;
+ return(tbox_print(s, x, y, function, &update_rect));
+ }
+ return(&update_rect);
+}
+
+/****************************************************************************
+* line
+*
+* For lines there is a primitive batching facility, that doesn't update
+* the drawing area until the batch is complete. This is handy for drawing
+* the pid axis and for summary mode.
+*
+* line_batch_mode contains the state for this:
+*
+* BATCH_OFF: no batching, update for every line
+* BATCH_NEW: just entered a batch, so initialize the area to update from
+* scratch
+* BATCH_EXISTING: have drawn at least one line in batch mode, so the update
+* area should only be expanded from now on to include the
+* union of the "rectangular hull" of all lines
+****************************************************************************/
+
+static enum { BATCH_OFF, BATCH_NEW, BATCH_EXISTING } line_batch_mode;
+static int line_batch_count;
+static int line_minx, line_miny, line_maxx, line_maxy;
+
+void line_batch_start (void)
+{
+ line_batch_mode = BATCH_NEW;
+ line_batch_count = 0;
+}
+
+void line_batch_end (void)
+{
+ GdkRectangle update_rect;
+ if (line_batch_count > 0) {
+ update_rect.x = line_minx;
+ update_rect.y = line_miny;
+ update_rect.width = (line_maxx - line_minx) + 1;
+ update_rect.height = (line_maxy - line_miny) + 1;
+ gtk_widget_draw (da, &update_rect);
+ }
+ line_batch_mode = BATCH_OFF;
+}
+
+void line (int x1, int y1, int x2, int y2, enum view1_line_fn function)
+{
+ GdkRectangle update_rect;
+ GdkGC *gc = NULL;
+
+ switch(function) {
+ case LINE_DRAW_BLACK:
+ gc = da->style->black_gc;
+ break;
+
+ case LINE_DRAW_WHITE:
+ gc = da->style->white_gc;
+ break;
+
+ case LINE_PRINT:
+ line_print (x1, y1, x2, y2);
+ return;
+ }
+
+ gdk_draw_line (pm, gc, x1, y1, x2, y2);
+
+ switch (line_batch_mode) {
+ case BATCH_OFF:
+ update_rect.x = x1;
+ update_rect.y = y1;
+ update_rect.width = (x2-x1) + 1;
+ update_rect.height = (y2-y1) + 1;
+ gtk_widget_draw (da, &update_rect);
+ break;
+
+ case BATCH_NEW:
+ line_minx = x1;
+ line_maxx = x2;
+ line_miny = y1;
+ line_maxy = y2;
+ line_batch_mode = BATCH_EXISTING;
+ line_batch_count = 1;
+ break;
+
+ case BATCH_EXISTING:
+ if (line_minx > x1)
+ line_minx = x1;
+ if (line_miny > y1)
+ line_miny = y1;
+ if (line_maxx < x2)
+ line_maxx = x2;
+ if (line_maxy < y2)
+ line_maxy = y2;
+ line_batch_count++;
+ break;
+ }
+}
+
+
+/****************************************************************************
+* display_pid_axis
+****************************************************************************/
+
+static void display_pid_axis(v1_geometry_t *vp)
+{
+ int y, i, label_tick;
+ int last_printed_y = -vp->strip_height;
+ pid_sort_t *pp;
+ int pid_index;
+ char *label_fmt;
+ char tmpbuf [128];
+
+ /* No pids yet? Outta here */
+ if (g_pids == NULL)
+ return;
+
+ line_batch_start();
+
+ for (i = 0; i < vp->npids; i++) {
+ pid_index = vp->first_pid_index + i;
+ if (pid_index >= g_npids)
+ break;
+
+ pp = (g_pids + pid_index);
+
+ set_color(pid_index);
+
+ label_fmt = get_track_label(pp->pid_value);
+ snprintf(tmpbuf, sizeof(tmpbuf)-1, label_fmt, pp->pid_value);
+
+ y = i*vp->strip_height + vp->pid_ax_offset;
+
+ /*
+ * Have we incremented enough space to have another label not
+ * overlap the previous label?
+ */
+ if (y - last_printed_y > 9) {
+ /* Draw label */
+ tbox(tmpbuf, 0, y +4, TBOX_DRAW_PLAIN+s_print_offset);
+
+ last_printed_y = y;
+
+ /*
+ * And let the line stick out a bit more to indicate this label
+ * relates to the following line.
+ */
+ label_tick = 4;
+ }
+ else {
+ label_tick = 0;
+ }
+
+ /* Draw axis line, but only if the lines aren't too close together */
+ if (vp->strip_height > 4) {
+ line(vp->pid_ax_width - label_tick, y+4*s_print_offset,
+ vp->total_width, y+4*s_print_offset,
+ LINE_DRAW_BLACK+s_print_offset);
+ }
+ }
+
+ set_color(COLOR_DEFAULT);
+ line_batch_end();
+}
+
+/****************************************************************************
+* view1_read_events_callback
+* New event data just showed up, reset a few things.
+****************************************************************************/
+
+void view1_read_events_callback(void)
+{
+ int max_vis_index;
+
+ s_v1->first_pid_index = 0;
+
+ max_vis_index = 300;
+ if (max_vis_index > g_nevents)
+ max_vis_index = g_nevents-1;
+
+ s_v1->minvistime = 0LL;
+ s_v1->maxvistime = (g_events[g_nevents - 1].time * 9)/ 8;
+ s_srchindex = 0;
+ s_srchcode = 0;
+ s_last_selected_event = 0;
+
+ init_track_colors();
+
+ recompute_hscrollbar();
+ recompute_vscrollbar();
+}
+
+/****************************************************************************
+* display_event_data
+****************************************************************************/
+
+static void display_event_data(v1_geometry_t *vp)
+{
+ int start_index;
+ int pid_index;
+ int x, y;
+ event_t *ep;
+ event_def_t *edp;
+ double time_per_pixel;
+ char tmpbuf[1024];
+ GdkRectangle *print_rect;
+ int *last_x_used;
+
+ /* Happens if one loads the event def header first, for example. */
+ if (g_nevents == 0)
+ return;
+
+ time_per_pixel = dtime_per_pixel(vp);
+
+ start_index = find_event_index (vp->minvistime);
+
+ /* Scrolled too far right? */
+ if (start_index >= g_nevents)
+ return;
+
+ ep = (g_events + start_index);
+
+ if (s_print_offset || summary_mode) {
+ last_x_used = (int *)g_malloc0(vp->npids * sizeof(int));
+ } else {
+ last_x_used = NULL;
+ }
+
+ line_batch_start();
+
+ while (ep < (g_events + g_nevents) &&
+ (ep->time < vp->maxvistime)) {
+ pid_index = ep->pid->pid_index;
+ set_color(pid_index);
+
+ /* First filter: pid out of range */
+ if ((pid_index < vp->first_pid_index) ||
+ (pid_index >= vp->first_pid_index + vp->npids)) {
+ ep++;
+ continue;
+ }
+
+ /* Second filter: event hidden */
+ edp = find_event_definition (ep->code);
+ if (!edp->selected) {
+ ep++;
+ continue;
+ }
+
+ /* Display it... */
+
+ pid_index -= vp->first_pid_index;
+
+ y = pid_index*vp->strip_height + vp->event_offset;
+
+ x = vp->pid_ax_width +
+ (int)(((double)(ep->time - vp->minvistime)) / time_per_pixel);
+
+ if (last_x_used != NULL && x < last_x_used[pid_index]) {
+ ep++;
+ continue;
+ }
+
+ if (ep->flags & (EVENT_FLAG_SELECT | EVENT_FLAG_SEARCHRSLT)) {
+ if (ep->flags & EVENT_FLAG_SELECT) {
+ format_popbox_string(tmpbuf, sizeof(tmpbuf), ep, edp);
+#ifdef NOTDEF
+ sprintf(tmpbuf, edp->name);
+ sprintf(tmpbuf+strlen(tmpbuf), ": ");
+ sprintf(tmpbuf+strlen(tmpbuf), edp->format, ep->datum);
+#endif
+ } else {
+ sprintf(tmpbuf, "SEARCH RESULT");
+ }
+ print_rect = tbox(tmpbuf, x, y - vp->pop_offset,
+ TBOX_DRAW_BOXED+s_print_offset);
+ line(x, y-vp->pop_offset, x, y, LINE_DRAW_BLACK+s_print_offset);
+ if (last_x_used != NULL)
+ last_x_used[pid_index] = x + print_rect->width;
+ }
+ if (summary_mode) {
+ int delta = vp->strip_height / 3;
+ if (delta < 1)
+ delta = 1;
+ y = pid_index*vp->strip_height + vp->pid_ax_offset;
+ line(x, y - delta, x, y + delta, LINE_DRAW_BLACK);
+ last_x_used[pid_index] = x + 1;
+ } else {
+ sprintf(tmpbuf, "%ld", ep->code);
+ print_rect = tbox(tmpbuf, x, y, TBOX_DRAW_EVENT+s_print_offset);
+ if (last_x_used != NULL)
+ last_x_used[pid_index] = x + print_rect->width;
+ }
+
+ ep++;
+ }
+ if (last_x_used)
+ g_free(last_x_used);
+ line_batch_end();
+ set_color(COLOR_DEFAULT);
+}
+
+/****************************************************************************
+* display_clear
+****************************************************************************/
+
+static void display_clear(void)
+{
+ GdkRectangle update_rect;
+
+ gdk_draw_rectangle (pm, da->style->white_gc, TRUE,
+ 0, 0, da->allocation.width,
+ da->allocation.height);
+
+ update_rect.x = 0;
+ update_rect.y = 0;
+ update_rect.width = da->allocation.width;
+ update_rect.height = da->allocation.height;
+
+ gtk_widget_draw (da, &update_rect);
+}
+
+/****************************************************************************
+* display_time_axis
+****************************************************************************/
+
+static void display_time_axis(v1_geometry_t *vp)
+{
+ int x, y, i;
+ int xoffset, nticks;
+ char tmpbuf [128];
+ double unit_divisor;
+ double time;
+ char *units;
+ double time_per_pixel;
+
+ y = vp->npids * vp->strip_height + vp->pid_ax_offset;
+
+ x = vp->pid_ax_width;
+
+ nticks = (vp->total_width - vp->pid_ax_width) / vp->time_ax_spacing;
+
+ time_per_pixel = dtime_per_pixel(vp);
+
+ units = "ns";
+ unit_divisor = 1.00;
+
+ if ((vp->maxvistime / unit_divisor) > 1000) {
+ units = "us";
+ unit_divisor = 1000.00;
+ }
+
+ if ((vp->maxvistime / unit_divisor) > 1000) {
+ units = "ms";
+ unit_divisor = 1000.00*1000.00;
+ }
+ if ((vp->maxvistime / unit_divisor) > 1000) {
+ units = "s";
+ unit_divisor = 1000.00*1000.00*1000.00;
+ }
+
+ /* Draw line */
+ line(x, y, vp->total_width, y, LINE_DRAW_BLACK+s_print_offset);
+
+ xoffset = 0;
+
+ for (i = 0; i < nticks; i++) {
+ /* Tick mark */
+ line(x+xoffset, y-3, x+xoffset, y+3, LINE_DRAW_BLACK+s_print_offset);
+
+ time = (double)(x + xoffset - vp->pid_ax_width);
+ time *= time_per_pixel;
+ time += (double)(vp->minvistime);
+ time /= unit_divisor;
+
+ sprintf (tmpbuf, "%.2f%s", time, units);
+
+ tbox(tmpbuf, x+xoffset, y+15, TBOX_DRAW_PLAIN+s_print_offset);
+
+ xoffset += vp->time_ax_spacing;
+ }
+}
+
+/****************************************************************************
+* clear_scoreboard
+* Forget about any temporary displays, they're gone now...
+****************************************************************************/
+
+static void clear_scoreboard(void)
+{
+ s_result_up = FALSE;
+}
+
+/****************************************************************************
+* view1_display
+****************************************************************************/
+
+void view1_display(void)
+{
+ display_clear();
+ display_pid_axis(s_v1);
+ display_event_data(s_v1);
+ display_time_axis(s_v1);
+ clear_scoreboard();
+}
+
+static gint idle_tag;
+
+/****************************************************************************
+* view1_display_eventually
+****************************************************************************/
+
+static void view1_display_eventually(void)
+{
+ gtk_idle_remove(idle_tag);
+ idle_tag = 0;
+ view1_display();
+}
+
+
+/****************************************************************************
+* view1_display_when_idle
+****************************************************************************/
+
+void view1_display_when_idle(void)
+{
+ if (idle_tag == 0) {
+ idle_tag = gtk_idle_add((GtkFunction) view1_display_eventually, 0);
+ }
+}
+
+/****************************************************************************
+* view1_about
+****************************************************************************/
+
+void view1_about (char *tmpbuf)
+{
+ int nsnaps;
+ snapshot_t *snaps;
+
+ sprintf(tmpbuf+strlen(tmpbuf), "Minvistime %lld\nMaxvistime %lld\n",
+ s_v1->minvistime, s_v1->maxvistime);
+ sprintf(tmpbuf+strlen(tmpbuf), "Strip Height %d\n",
+ s_v1->strip_height);
+
+ for (nsnaps = 0, snaps = s_snapshots; snaps; snaps = snaps->next) {
+ nsnaps++;
+ }
+ sprintf(tmpbuf+strlen(tmpbuf), "%d snapshots in the ring\n", nsnaps);
+}
diff --git a/src/tools/perftool/c2cpel.c b/src/tools/perftool/c2cpel.c
new file mode 100644
index 00000000..379c2bc6
--- /dev/null
+++ b/src/tools/perftool/c2cpel.c
@@ -0,0 +1,251 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/mem.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include "cpel_util.h"
+
+static elog_main_t elog_main;
+
+/*
+ * convert_clib_file
+ */
+void convert_clib_file(char *clib_file)
+{
+ clib_error_t *error = 0;
+ int i;
+ elog_main_t *em = &elog_main;
+ double starttime, delta;
+
+ error = elog_read_file (&elog_main, clib_file);
+
+ if (error) {
+ clib_warning("%U", format_clib_error, error);
+ exit (1);
+ }
+
+ em = &elog_main;
+
+ starttime = em->events[0].time;
+
+ for (i = 0; i < vec_len (em->events); i++) {
+ elog_event_t *e; /* clib event */
+ evt_t *ep; /* xxx2cpel event */
+ u8 *s;
+ u64 timestamp;
+ elog_event_type_t *t;
+ u8 *brief_event_name;
+ u8 *track_name;
+ int j;
+
+ e = vec_elt_at_index(em->events, i);
+
+ /* Seconds since start of log */
+ delta = e->time - starttime;
+
+ /* u64 nanoseconds since start of log */
+ timestamp = delta * 1e9;
+
+ s = format (0, "%U%c", format_elog_event, em, e, 0);
+
+ /* allocate an event instance */
+ vec_add2(the_events, ep, 1);
+ ep->timestamp = timestamp;
+
+ /* convert string event code to a real number */
+ t = vec_elt_at_index (em->event_types, e->type);
+
+ /*
+ * Construct a reasonable event name.
+ * Truncate the format string at the first whitespace break
+ * or printf format character.
+ */
+ brief_event_name = format (0, "%s", t->format);
+
+ for (j = 0; j < vec_len (brief_event_name); j++) {
+ if (brief_event_name[j] == ' ' ||
+ brief_event_name[j] == '%' ||
+ brief_event_name[j] == '(') {
+ brief_event_name[j] = 0;
+ break;
+ }
+ }
+ /* Throw away that much of the formatted event */
+ vec_delete (s, j+1, 0);
+
+ ep->event_id = find_or_add_event(brief_event_name, "%s");
+
+ track_name = format (0, "%U%c", format_elog_track, em, e, 0);
+
+ ep->track_id = find_or_add_track (track_name);
+
+ ep->datum = find_or_add_strtab(s);
+
+ vec_free (track_name);
+ vec_free(brief_event_name);
+ vec_free(s);
+ }
+}
+
+u8 *vec_basename (char *s)
+{
+ u8 * rv;
+ char *cp = s;
+
+ while (*cp)
+ cp++;
+
+ cp--;
+
+ while (cp > s && *cp != '/')
+ cp--;
+
+ if (cp > s)
+ cp++;
+
+ rv = format (0, "%s", cp);
+ return rv;
+}
+
+
+int event_compare (const void *a0, const void *a1)
+{
+ evt_t *e0 = (evt_t *)a0;
+ evt_t *e1 = (evt_t *)a1;
+
+ if (e0->timestamp < e1->timestamp)
+ return -1;
+ else if (e0->timestamp > e1->timestamp)
+ return 1;
+ return 0;
+}
+
+int main (int argc, char **argv)
+{
+ int curarg=1;
+ char **inputfiles = 0;
+ char *outputfile = 0;
+ FILE *ofp;
+
+ if (argc < 3)
+ goto usage;
+
+ while (curarg < argc) {
+ if (!strncmp(argv[curarg], "--input-file", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ vec_add1 (inputfiles, argv[curarg]);
+ curarg++;
+ continue;
+ }
+ clib_warning("Missing filename after --input-file\n");
+ exit (1);
+ }
+
+ if (!strncmp(argv[curarg], "--output-file", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ outputfile = argv[curarg];
+ curarg ++;
+ continue;
+ }
+ clib_warning("Missing filename after --output-file\n");
+ exit(1);
+ }
+ vec_add1 (inputfiles, argv[curarg]);
+ curarg++;
+ continue;
+
+ usage:
+ fformat(stderr,
+ "c2cpel [--input-file] <filename> --output-file <filename>\n");
+ exit(1);
+ }
+
+ if (vec_len(inputfiles) == 0 || outputfile == 0)
+ goto usage;
+
+ if (vec_len(inputfiles) > 1)
+ goto usage;
+
+ clib_mem_init (0, ((uword)3<<30));
+
+ cpel_util_init();
+
+ convert_clib_file (inputfiles[0]);
+
+ ofp = fopen (outputfile, "w");
+ if (ofp == NULL) {
+ clib_unix_warning ("couldn't create %s", outputfile);
+ exit (1);
+ }
+
+ alpha_sort_tracks();
+ fixup_event_tracks();
+
+ /*
+ * Four sections: string-table, event definitions, track defs, events.
+ */
+ if (!write_cpel_header(ofp, 4)) {
+ clib_warning ("Error writing cpel header to %s...\n", outputfile);
+ unlink(outputfile);
+ exit(1);
+ }
+
+ if (!write_string_table(ofp)) {
+ clib_warning ("Error writing string table to %s...\n", outputfile);
+ unlink(outputfile);
+ exit(1);
+ }
+
+ if (!write_event_defs(ofp)) {
+ clib_warning ("Error writing event defs to %s...\n", outputfile);
+ unlink(outputfile);
+ exit(1);
+ }
+
+ if (!write_track_defs(ofp)) {
+ clib_warning ("Error writing track defs to %s...\n", outputfile);
+ unlink(outputfile);
+ exit(1);
+ }
+
+ if (!write_events(ofp, (u64) 1e9)) {
+ clib_warning ("Error writing events to %s...\n", outputfile);
+ unlink(outputfile);
+ exit(1);
+
+ }
+ fclose(ofp);
+ exit (0);
+}
diff --git a/src/tools/perftool/cpel.h b/src/tools/perftool/cpel.h
new file mode 100644
index 00000000..0bfb1a68
--- /dev/null
+++ b/src/tools/perftool/cpel.h
@@ -0,0 +1,83 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2005-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CPEL_H_
+#define _CPEL_H_ 1
+
+typedef struct cpel_file_header_ {
+ unsigned char endian_version;
+ unsigned char pad;
+ unsigned short nsections;
+ unsigned int file_date;
+} cpel_file_header_t;
+
+#define CPEL_FILE_LITTLE_ENDIAN 0x80
+#define CPEL_FILE_VERSION 0x01
+#define CPEL_FILE_VERSION_MASK 0x7F
+
+typedef struct cpel_section_header_ {
+ unsigned int section_type;
+ unsigned int data_length; /* does NOT include type and itself */
+} cpel_section_header_t;
+
+#define CPEL_SECTION_STRTAB 1
+/* string at offset 0 is the name of the table */
+
+#define CPEL_SECTION_SYMTAB 2
+#define CPEL_SECTION_EVTDEF 3
+
+typedef struct event_definition_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_event_definitions;
+} event_definition_section_header_t;
+
+typedef struct event_definition_ {
+ unsigned int event;
+ unsigned int event_format;
+ unsigned int datum_format;
+} event_definition_t;
+
+#define CPEL_SECTION_TRACKDEF 4
+
+typedef struct track_definition_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_track_definitions;
+} track_definition_section_header_t;
+
+typedef struct track_definition_ {
+ unsigned int track;
+ unsigned int track_format;
+} track_definition_t;
+
+#define CPEL_SECTION_EVENT 5
+
+typedef struct event_section_header_ {
+ char string_table_name[64];
+ unsigned int number_of_events;
+ unsigned int clock_ticks_per_second;
+} event_section_header_t;
+
+typedef struct event_entry_ {
+ unsigned int time[2];
+ unsigned int track;
+ unsigned int event_code;
+ unsigned int event_datum;
+} event_entry_t;
+
+#define CPEL_NUM_SECTION_TYPES 5
+
+#endif /* _CPEL_H_ */
+
diff --git a/src/tools/perftool/cpel_util.c b/src/tools/perftool/cpel_util.c
new file mode 100644
index 00000000..7ee9b6e2
--- /dev/null
+++ b/src/tools/perftool/cpel_util.c
@@ -0,0 +1,456 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/byte_order.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include "cpel_util.h"
+
+evt_t *the_events;
+
+track_t *the_tracks;
+u32 *track_alpha_map;
+
+event_definition_t *the_event_definitions;
+i64 min_timestamp;
+
+/* Hash tables, used to find previous instances of the same items */
+uword *the_track_hash;
+uword *the_msg_event_hash;
+uword *the_strtab_hash;
+uword *the_pidtid_hash;
+uword *the_pid_to_name_hash;
+u8 *the_strtab;
+
+uword *the_event_id_bitmap;
+
+/*
+ * find_or_add_strtab
+ * Finds or adds a string to the string table
+ */
+u32 find_or_add_strtab(void *s_arg)
+{
+ uword *p;
+ int len;
+ u8 *this_string;
+ u8 *scopy=0;
+ char *s = s_arg;
+
+ p = hash_get_mem(the_strtab_hash, s);
+ if (p) {
+ return (p[0]);
+ }
+
+ /*
+ * Here's a CLIB bear-trap. We can't add the string-table
+ * strings to the to the hash table (directly), since it
+ * expands and moves periodically. All of the hash table
+ * entries turn into dangling references, yadda yadda.
+ */
+
+ len = strlen(s)+1;
+ vec_add2(the_strtab, this_string, len);
+ memcpy(this_string, s, len);
+
+ /* Make a copy which won't be moving around... */
+ vec_validate(scopy, len);
+ memcpy(scopy, s, len);
+
+ hash_set_mem(the_strtab_hash, scopy, this_string - the_strtab);
+
+ return(this_string - the_strtab);
+}
+
+/*
+ * find_or_add_track
+ * returns index in track table
+ */
+u32 find_or_add_track(void *s_arg)
+{
+ uword *p;
+ track_t *this_track;
+ u8 *copy_s;
+ char *s=s_arg;
+
+ p = hash_get_mem(the_track_hash, s);
+ if (p) {
+ return (p[0]);
+ }
+ vec_add2(the_tracks, this_track, 1);
+
+ this_track->original_index = this_track - the_tracks;
+ this_track->strtab_offset = find_or_add_strtab(s);
+
+ copy_s = (u8 *)vec_dup(s);
+
+ hash_set_mem(the_track_hash, copy_s, this_track - the_tracks);
+ return(this_track - the_tracks);
+}
+
+/*
+ * find_or_add_event
+ * Adds an event to the event definition vector and add it to
+ * the event hash table
+ */
+
+u32 find_or_add_event(void *s_arg, char *datum_format)
+{
+ uword *p;
+ u8 *copy_s;
+ event_definition_t *this_event_definition;
+ u32 event_id;
+ char *s=s_arg;
+
+ p = hash_get_mem(the_msg_event_hash, s);
+ if (p) {
+ return (p[0]);
+ }
+ vec_add2(the_event_definitions, this_event_definition, 1);
+
+ /* Allocate a new event-id */
+ event_id = clib_bitmap_first_clear (the_event_id_bitmap);
+ the_event_id_bitmap = clib_bitmap_set(the_event_id_bitmap, event_id, 1);
+ this_event_definition->event = event_id;
+ this_event_definition->event_format = find_or_add_strtab(s);
+ this_event_definition->datum_format = find_or_add_strtab(datum_format);
+
+ copy_s = (u8 *)vec_dup(s);
+
+ hash_set_mem(the_msg_event_hash, copy_s, event_id);
+
+ return(event_id);
+}
+
+/*
+ * write_string_table
+ */
+int write_string_table(FILE *ofp)
+{
+ cpel_section_header_t sh;
+
+ /* Round up string table size */
+ while (vec_len(the_strtab) & 0x7)
+ vec_add1(the_strtab, 0);
+
+ sh.section_type = ntohl(CPEL_SECTION_STRTAB);
+ sh.data_length = ntohl(vec_len(the_strtab));
+
+ if (fwrite(&sh, sizeof(sh), 1, ofp) != 1)
+ return(0);
+
+ if (fwrite(the_strtab, 1, vec_len(the_strtab), ofp) !=
+ vec_len(the_strtab))
+ return(0);
+
+ return(1);
+}
+
+/*
+ * write_cpel_header
+ */
+int write_cpel_header(FILE *ofp, u32 nsections)
+{
+ cpel_file_header_t h;
+
+ h.endian_version = CPEL_FILE_VERSION;
+ h.pad = 0;
+ h.nsections = ntohs(nsections);
+ h.file_date = ntohl(time(0));
+ if (fwrite(&h, sizeof(h), 1, ofp) != 1)
+ return (0);
+
+ return(1);
+}
+
+/*
+ * write_event_defs
+ */
+int write_event_defs(FILE *ofp)
+{
+ cpel_section_header_t sh;
+ event_definition_section_header_t edsh;
+ event_definition_t *this_event_definition;
+ int i;
+
+ /* Next, the event definitions */
+ sh.section_type = ntohl(CPEL_SECTION_EVTDEF);
+ sh.data_length = ntohl(vec_len(the_event_definitions)
+ *sizeof(the_event_definitions[0])
+ + sizeof(event_definition_section_header_t));
+
+ if (fwrite(&sh, sizeof(sh), 1, ofp) != 1)
+ return(0);
+
+ memset(&edsh, 0, sizeof(edsh));
+
+ strcpy(edsh.string_table_name, "FileStrtab");
+ edsh.number_of_event_definitions = ntohl(vec_len(the_event_definitions));
+
+ if (fwrite(&edsh, sizeof(edsh), 1, ofp) != 1)
+ return(0);
+
+ for (i = 0; i < vec_len(the_event_definitions); i++) {
+ this_event_definition = &the_event_definitions[i];
+ /* Endian fixup */
+ this_event_definition->event = ntohl(this_event_definition->event);
+ this_event_definition->event_format =
+ ntohl(this_event_definition->event_format);
+ this_event_definition->datum_format =
+ ntohl(this_event_definition->datum_format);
+
+ if (fwrite(this_event_definition, sizeof(the_event_definitions[0]),
+ 1, ofp) != 1)
+ return(0);
+ }
+ return(1);
+}
+
+/*
+ * ntohll
+ */
+u64 ntohll (u64 x) {
+ if (clib_arch_is_little_endian)
+ x = ((((x >> 0) & 0xff) << 56)
+ | (((x >> 8) & 0xff) << 48)
+ | (((x >> 16) & 0xff) << 40)
+ | (((x >> 24) & 0xff) << 32)
+ | (((x >> 32) & 0xff) << 24)
+ | (((x >> 40) & 0xff) << 16)
+ | (((x >> 48) & 0xff) << 8)
+ | (((x >> 56) & 0xff) << 0));
+
+ return x;
+}
+
+/*
+ * write_events
+ */
+int write_events(FILE *ofp, u64 clock_ticks_per_second)
+{
+ cpel_section_header_t sh;
+ event_section_header_t eh;
+ u32 number_of_events;
+ int i;
+ event_entry_t e;
+ u64 net_timestamp;
+ evt_t *this_event;
+ u32 time0, time1;
+
+ number_of_events = vec_len(the_events);
+
+ sh.section_type = ntohl(CPEL_SECTION_EVENT);
+ sh.data_length = ntohl(number_of_events * sizeof(e) +
+ sizeof(event_section_header_t));
+
+ if (fwrite(&sh, sizeof(sh), 1, ofp) != 1)
+ return(0);
+
+ memset(&eh, 0, sizeof(eh));
+ strcpy(eh.string_table_name, "FileStrtab");
+ eh.number_of_events = ntohl(number_of_events);
+ eh.clock_ticks_per_second = ntohl(clock_ticks_per_second);
+
+ if (fwrite(&eh, sizeof(eh), 1, ofp) != 1)
+ return(0);
+
+ for (i = 0; i < number_of_events; i++) {
+ this_event = &the_events[i];
+ net_timestamp = ntohll(this_event->timestamp);
+
+ time1 = net_timestamp>>32;
+ time0 = net_timestamp & 0xFFFFFFFF;
+
+ e.time[0] = time0;
+ e.time[1] = time1;
+ e.track = ntohl(this_event->track_id);
+ e.event_code = ntohl(this_event->event_id);
+ e.event_datum = ntohl(this_event->datum);
+
+ if (fwrite(&e, sizeof(e), 1, ofp) != 1)
+ return(0);
+ }
+ return(1);
+}
+
+/*
+ * write_track_defs
+ */
+int write_track_defs(FILE *ofp)
+{
+ cpel_section_header_t sh;
+ track_definition_section_header_t tdsh;
+ track_definition_t record;
+ track_definition_t *this_track_definition = &record;
+ int i;
+ event_definition_section_header_t edsh;
+
+ /* Next, the event definitions */
+ sh.section_type = ntohl(CPEL_SECTION_TRACKDEF);
+ sh.data_length = ntohl(vec_len(the_tracks)
+ *sizeof(this_track_definition[0])
+ + sizeof(track_definition_section_header_t));
+
+ if (fwrite(&sh, sizeof(sh), 1, ofp) != 1)
+ return(0);
+
+ memset(&tdsh, 0, sizeof(tdsh));
+
+ strcpy(tdsh.string_table_name, "FileStrtab");
+ tdsh.number_of_track_definitions = ntohl(vec_len(the_tracks));
+
+ if (fwrite(&tdsh, sizeof(edsh), 1, ofp) != 1)
+ return(0);
+
+ for (i = 0; i < vec_len(the_tracks); i++) {
+ this_track_definition->track = ntohl(i);
+ this_track_definition->track_format =
+ ntohl(the_tracks[i].strtab_offset);
+
+ if (fwrite(this_track_definition, sizeof(this_track_definition[0]),
+ 1, ofp) != 1)
+ return(0);
+ }
+ return(1);
+}
+
+void cpel_util_init (void)
+{
+ u8 *eventstr;
+
+ the_strtab_hash = hash_create_string (0, sizeof (uword));
+ the_msg_event_hash = hash_create_string (0, sizeof (uword));
+ the_track_hash = hash_create_string (0, sizeof (uword));
+ the_pidtid_hash = hash_create_string (0, sizeof(uword));
+ the_pid_to_name_hash = hash_create(0, sizeof(uword));
+
+ /* Must be first, or no supper... */
+ find_or_add_strtab("FileStrtab");
+
+ /* Historical canned events, no longer used. */
+ if (0) {
+ /* event 0 (not used) */
+ eventstr = format(0, "PlaceholderNotUsed");
+ vec_add1(eventstr, 0);
+ find_or_add_event(eventstr, "%s");
+ vec_free(eventstr);
+
+ /* event 1 (thread on CPU) */
+ eventstr = format(0, "THREAD/THRUNNING");
+ vec_add1(eventstr, 0);
+ find_or_add_event(eventstr, "%s");
+ vec_free(eventstr);
+
+ /* event 2 (thread ready) */
+ eventstr = format(0, "THREAD/THREADY");
+ vec_add1(eventstr, 0);
+ find_or_add_event(eventstr, "%s");
+ vec_free(eventstr);
+
+ /* event 3 (function enter) */
+ eventstr = format(0, "FUNC/ENTER");
+ vec_add1(eventstr, 0);
+ find_or_add_event(eventstr, "0x%x");
+ vec_free(eventstr);
+
+ /* event 4 (function enter) */
+ eventstr = format(0, "FUNC/EXIT");
+ vec_add1(eventstr, 0);
+ find_or_add_event(eventstr, "0x%x");
+ vec_free(eventstr);
+ }
+}
+
+/*
+ * alpha_compare_tracks
+ */
+static int alpha_compare_tracks(const void *a1, const void *a2)
+{
+ int i;
+ track_t *t1 = (track_t *)a1;
+ track_t *t2 = (track_t *)a2;
+ u8 *s1 = &the_strtab[t1->strtab_offset];
+ u8 *s2 = &the_strtab[t2->strtab_offset];
+
+ for (i = 0; s1[i] && s2[i]; i++) {
+ if (s1[i] < s2[i])
+ return(-1);
+ if (s1[i] > s2[i])
+ return(1);
+ }
+ return(0);
+}
+
+/*
+ * alpha_sort_tracks
+ * Alphabetically sort tracks, set up a mapping
+ * vector so we can quickly map the original track index to
+ * the new/improved/alpha-sorted index
+ */
+void alpha_sort_tracks(void)
+{
+ track_t *this_track;
+ int i;
+
+ qsort(the_tracks, vec_len(the_tracks), sizeof(track_t),
+ alpha_compare_tracks);
+
+ vec_validate(track_alpha_map, vec_len(the_tracks));
+ _vec_len(track_alpha_map) = vec_len(the_tracks);
+
+ for (i = 0; i < vec_len(the_tracks); i++) {
+ this_track = &the_tracks[i];
+ track_alpha_map[this_track->original_index] = i;
+ }
+}
+
+/*
+ * fixup_event_tracks
+ * Use the track alpha mapping to account for the alphabetic
+ * sort performed by the previous routine
+ */
+void fixup_event_tracks(void)
+{
+ int i;
+ u32 old_track;
+
+ for (i = 0; i < vec_len(the_events); i++) {
+ old_track = the_events[i].track_id;
+ the_events[i].track_id = track_alpha_map[old_track];
+ }
+}
+
+/* Indispensable for debugging in gdb... */
+
+u32 vl(void *x)
+{
+ return vec_len(x);
+}
diff --git a/src/tools/perftool/cpel_util.h b/src/tools/perftool/cpel_util.h
new file mode 100644
index 00000000..b76f7a4b
--- /dev/null
+++ b/src/tools/perftool/cpel_util.h
@@ -0,0 +1,68 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __cpel_util_h__
+#define __cpel_util_h__
+
+/*
+ * Our idea of an event, as opposed to a CPEL event
+ */
+typedef struct evt_ {
+ u64 timestamp;
+ u32 track_id;
+ u32 event_id;
+ u32 datum;
+} evt_t;
+
+evt_t *the_events;
+
+/*
+ * Track object, so we can sort the tracks alphabetically and
+ * fix the events later
+ */
+typedef struct track_ {
+ u32 original_index;
+ u32 strtab_offset;
+} track_t;
+
+track_t *the_tracks;
+u32 *track_alpha_map;
+
+event_definition_t *the_event_definitions;
+i64 min_timestamp;
+
+/* Hash tables, used to find previous instances of the same items */
+uword *the_track_hash;
+uword *the_msg_event_hash;
+uword *the_strtab_hash;
+uword *the_pidtid_hash;
+uword *the_pid_to_name_hash;
+u8 *the_strtab;
+
+u32 find_or_add_strtab(void *s_arg);
+u32 find_or_add_track(void *s_arg);
+u32 find_or_add_event(void *s_arg, char *datum_format);
+int write_string_table(FILE *ofp);
+int write_cpel_header(FILE *ofp, u32 nsections);
+int write_event_defs(FILE *ofp);
+u64 ntohll (u64 x);
+int write_events(FILE *ofp, u64 clock_ticks_per_second);
+int write_track_defs(FILE *ofp);
+void cpel_util_init (void);
+void alpha_sort_tracks(void);
+void fixup_event_tracks(void);
+
+#endif /* __cpel_util_h__ */
diff --git a/src/tools/perftool/cpelatency.c b/src/tools/perftool/cpelatency.c
new file mode 100644
index 00000000..7b87d606
--- /dev/null
+++ b/src/tools/perftool/cpelatency.c
@@ -0,0 +1,927 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include <math.h>
+
+char *time_format = "%.03d:%.02d:%.02d:%.03d:%.03d ";
+static char version[] = "cpelatency 2.0";
+
+#define USEC_PER_MS 1000LL
+#define USEC_PER_SECOND (1000*USEC_PER_MS)
+#define USEC_PER_MINUTE (60*USEC_PER_SECOND)
+#define USEC_PER_HOUR (60*USEC_PER_MINUTE)
+
+uword *the_strtab_hash; /* (name, base-VA) hash of all string tables */
+uword *the_evtdef_hash; /* (event-id, event-definition) hash */
+uword *the_trackdef_hash; /* (track-id, track-definition) hash */
+uword *the_pidtid_hash; /* ("pid:xxx tid:yy", track-definition) hash */
+
+f64 ticks_per_us;
+u32 start_event_code = 2; /* default: XR thread ready event */
+u32 end_event_code = 1; /* default: XR thread running event */
+int exclude_kernel_from_summary_stats=1;
+int summary_stats_only;
+int scatterplot;
+u8 *name_filter;
+int have_trackdefs;
+
+typedef enum {
+ SORT_MAX_TIME=1,
+ SORT_MAX_OCCURRENCES,
+ SORT_NAME,
+} sort_t;
+
+sort_t sort_type = SORT_MAX_TIME;
+
+int widest_name_format=5;
+int widest_track_format=20;
+
+typedef struct bound_event_ {
+ u32 event_code;
+ u8 *event_str;
+ u8 *datum_str;
+ u32 is_strtab_ref;
+} bound_event_t;
+
+bound_event_t *bound_events;
+
+typedef struct bound_track_ {
+ u32 track;
+ u8 *track_str;
+ u64 state_start_ticks;
+ u64 *ticks_in_state; /* vector of state occurrences */
+ f64 mean_ticks_in_state;
+ f64 variance_ticks_in_state;
+ f64 total_ticks_in_state;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+
+void fatal(char *s)
+{
+ fprintf(stderr, "%s", s);
+ exit(1);
+}
+
+typedef enum {
+ PASS1=1,
+ PASS2=2,
+} pass_t;
+
+typedef struct {
+ int (*pass1)(cpel_section_header_t *, int, FILE *);
+ int (*pass2)(cpel_section_header_t *, int, FILE *);
+} section_processor_t;
+
+int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ fprintf(ofp, "Bad (type 0) section, skipped...\n");
+ return(0);
+}
+
+int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ return(0);
+}
+
+int strtab_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ uword *p;
+ u8 *strtab_data_area = (u8 *)(sh+1);
+
+ /* Multiple string tables with the same name are Bad... */
+ p = hash_get_mem(the_strtab_hash, strtab_data_area);
+ if (p) {
+ fprintf(ofp, "Duplicate string table name %s", strtab_data_area);
+ }
+ /*
+ * Looks funny, but we really do want key = first string in the
+ * table, value = address(first string in the table)
+ */
+ hash_set_mem(the_strtab_hash, strtab_data_area, strtab_data_area);
+ if (verbose) {
+ fprintf(ofp, "String Table %s\n", strtab_data_area);
+ }
+ return(0);
+}
+
+int evtdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ event_definition_section_header_t *edh;
+ event_definition_t *ep;
+ u8 *this_strtab;
+ u32 event_code;
+ uword *p;
+ bound_event_t *bp;
+ int thislen;
+
+ edh = (event_definition_section_header_t *)(sh+1);
+ nevents = ntohl(edh->number_of_event_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Event Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, edh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ ep = (event_definition_t *)(edh+1);
+
+ for (i = 0; i < nevents; i++) {
+ event_code = ntohl(ep->event);
+ p = hash_get(the_evtdef_hash, event_code);
+ if (p) {
+ fprintf(ofp, "Event %d redefined, retain first definition\n",
+ event_code);
+ continue;
+ }
+ vec_add2(bound_events, bp, 1);
+ bp->event_code = event_code;
+ bp->event_str = this_strtab + ntohl(ep->event_format);
+ bp->datum_str = this_strtab + ntohl(ep->datum_format);
+ bp->is_strtab_ref = 0;
+ /* Decide if the datum format is a %s format => strtab reference */
+ {
+ int j;
+ int seen_percent=0;
+
+ for (j = 0; j < strlen((char *) bp->datum_str); j++) {
+ if (bp->datum_str[j] == '%'){
+ seen_percent=1;
+ continue;
+ }
+ if (seen_percent && bp->datum_str[j] == 's') {
+ bp->is_strtab_ref = 1;
+ }
+ }
+ }
+
+ hash_set(the_evtdef_hash, event_code, bp - bound_events);
+
+ thislen = strlen((char *) bp->event_str);
+ if (thislen > widest_name_format)
+ widest_name_format = thislen;
+
+ ep++;
+ }
+ return (0);
+}
+
+int trackdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ track_definition_section_header_t *tdh;
+ track_definition_t *tp;
+ u8 *this_strtab;
+ u32 track_code;
+ uword *p;
+ bound_track_t *btp;
+ int thislen;
+ u8 *pidstr;
+ u8 *pidtid_str;
+ u8 *cp;
+ int tid, pid;
+
+ tdh = (track_definition_section_header_t *)(sh+1);
+ nevents = ntohl(tdh->number_of_track_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Track Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, tdh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ tp = (track_definition_t *)(tdh+1);
+
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(tp->track);
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(stderr, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = this_strtab + ntohl(tp->track_format);
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+
+ if (verbose) {
+ fprintf(stderr, "adding track '%s'\n", btp->track_str);
+ }
+
+ thislen = strlen((char *) btp->track_str);
+ if (thislen > widest_track_format)
+ widest_track_format = thislen;
+
+ /* convert track_str "eth_server t11(20498)" to "pid:20498 tid:11" */
+ cp = btp->track_str;
+ while (*cp && *cp != '(')
+ cp++;
+ if (!*cp) {
+ fprintf(stderr, "error canonicalizing '%s'\n", btp->track_str);
+ goto out;
+ }
+ pidstr = cp+1; /* remember location of PID */
+
+ while (cp > btp->track_str && *cp != 't')
+ cp--;
+
+ if (cp == btp->track_str) {
+ fprintf(stderr, "error canonicalizing '%s'\n", btp->track_str);
+ goto out;
+ }
+ tid = atol((char *)(cp+1));
+ pid = atol((char *) pidstr);
+ pidtid_str = format(0, "pid:%d tid:%d", pid, tid);
+ vec_add1(pidtid_str, 0);
+
+ /*
+ * Note: duplicates are possible due to thread create /
+ * thread destroy operations.
+ */
+ p = hash_get_mem(the_pidtid_hash, pidtid_str);
+ if (p) {
+ vec_free(pidtid_str);
+ goto out;
+ }
+ hash_set_mem(the_pidtid_hash, pidtid_str, btp - bound_tracks);
+
+ out:
+ tp++;
+ }
+ have_trackdefs = 1;
+ return (0);
+}
+
+int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ if (verbose) {
+ fprintf(ofp, "Unsupported type %d section\n",
+ ntohl(sh->section_type));
+ }
+ return(0);
+}
+
+int event_pass2(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ event_section_header_t *eh;
+ int nevents;
+ int i;
+ uword *p;
+ event_entry_t *ep;
+ u64 now;
+ u32 time0, time1;
+ u32 track_code;
+ u8 *this_strtab;
+ u64 ticks_in_state;
+ bound_track_t *btp;
+ bound_track_t *state_track=0;
+ u8 *pidtid_str;
+ u8 *pidtid_dup;
+ u8 *ecp;
+ u32 event_code;
+
+ eh = (event_section_header_t *)(sh+1);
+ nevents = ntohl(eh->number_of_events);
+ ticks_per_us = ((double)ntohl(eh->clock_ticks_per_second)) / 1e6;
+
+ if (verbose) {
+ fprintf(ofp, "%.3f ticks_per_us\n", ticks_per_us);
+ }
+
+ ep = (event_entry_t *)(eh+1);
+
+ p = hash_get_mem(the_strtab_hash, eh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ /*
+ * Some logger implementation that doesn't produce
+ * trackdef sections, synthesize the bound_tracks vector
+ */
+ if (!have_trackdefs) {
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(ep->track);
+ pidtid_dup = format(0, "%d", track_code);
+ vec_add1(pidtid_dup, 0);
+ p = hash_get_mem(the_pidtid_hash, pidtid_dup);
+ if (!p) {
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = pidtid_dup;
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+ hash_set_mem(the_pidtid_hash, pidtid_dup, btp - bound_tracks);
+ } else {
+ vec_free(pidtid_dup);
+ }
+ ep++;
+ }
+ }
+
+ ep = (event_entry_t *)(eh+1);
+
+ for (i = 0; i < nevents; i++) {
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+
+ event_code = ntohl(ep->event_code);
+
+ /* Find the corresponding track via the pidtid hash table */
+ if (event_code == start_event_code || event_code == end_event_code) {
+ if (have_trackdefs) {
+ pidtid_str = this_strtab + ntohl(ep->event_datum);
+ pidtid_dup = format(0, (char *) pidtid_str);
+ vec_add1(pidtid_dup, 0);
+ ecp = &pidtid_dup[vec_len(pidtid_dup)-1];
+ while (*--ecp == ' ')
+ *ecp = 0;
+ } else {
+ pidtid_dup = format(0, "%d", ntohl(ep->track));
+ vec_add1(pidtid_dup, 0);
+ }
+
+ p = hash_get_mem(the_pidtid_hash, pidtid_dup);
+ if (!p) {
+ fprintf(stderr, "warning: couldn't find '%s'\n",
+ pidtid_dup);
+ vec_free(pidtid_dup);
+ ep++;
+ continue;
+ }
+ state_track = &bound_tracks[p[0]];
+ }
+ /* Found the start-event code ? */
+ if (event_code == start_event_code) {
+ state_track->state_start_ticks = now;
+ } else if (event_code == end_event_code) {
+ /*
+ * Add a ticks-in-state record, unless
+ * e.g. the log started with the exit event
+ */
+ if (state_track->state_start_ticks) {
+ ticks_in_state = now - state_track->state_start_ticks;
+ vec_add1(state_track->ticks_in_state, ticks_in_state);
+ state_track->state_start_ticks = 0;
+ }
+ /* Otherwise, nothing */
+ }
+ ep++;
+ }
+ return(0);
+}
+
+/*
+ * Note: If necessary, add passes / columns to this table to
+ * handle section order dependencies.
+ */
+
+section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
+{
+ {bad_section, noop_pass}, /* type 0 -- f**ked */
+ {strtab_pass1, noop_pass}, /* type 1 -- STRTAB */
+ {unsupported_pass, noop_pass}, /* type 2 -- SYMTAB */
+ {evtdef_pass1, noop_pass}, /* type 3 -- EVTDEF */
+ {trackdef_pass1, noop_pass}, /* type 4 -- TRACKDEF */
+ {noop_pass, event_pass2}, /* type 5 -- EVENTS */
+};
+
+
+int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
+ pass_t pass)
+{
+ u32 type;
+ type = ntohl(sh->section_type);
+ int rv;
+ int (*fp)(cpel_section_header_t *, int, FILE *);
+
+ if (type > CPEL_NUM_SECTION_TYPES) {
+ fprintf(stderr, "Unknown section type %d\n", type);
+ return(1);
+ }
+ switch(pass) {
+ case PASS1:
+ fp = processors[type].pass1;
+ break;
+
+ case PASS2:
+ fp = processors[type].pass2;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown pass %d\n", pass);
+ return(1);
+ }
+
+ rv = (*fp)(sh, verbose, ofp);
+
+ return(rv);
+}
+
+int cpel_dump_file_header(cpel_file_header_t *fh, int verbose, FILE *ofp)
+{
+ time_t file_time;
+
+ if (verbose) {
+ fprintf(ofp, "CPEL file: %s-endian, version %d\n",
+ ((fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) ?
+ "little" : "big"),
+ fh->endian_version & CPEL_FILE_VERSION_MASK);
+
+ file_time = ntohl(fh->file_date);
+
+ fprintf(ofp, "File created %s", ctime(&file_time));
+ fprintf(ofp, "File has %d sections\n",
+ ntohs(fh->nsections));
+ }
+
+ return(0);
+}
+
+
+int cpel_dump(u8 *cpel, int verbose, FILE *ofp)
+{
+ cpel_file_header_t *fh;
+ cpel_section_header_t *sh;
+ u16 nsections;
+ u32 section_size;
+ int i;
+
+ /* First, the file header */
+ fh = (cpel_file_header_t *)cpel;
+ if (fh->endian_version != CPEL_FILE_VERSION) {
+ if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
+ fprintf(stderr, "Little endian data format not supported\n");
+ return(1);
+ }
+ fprintf(stderr, "Unsupported file version 0x%x\n",
+ fh->endian_version);
+ return(1);
+ }
+ cpel_dump_file_header(fh, verbose, ofp);
+ nsections = ntohs(fh->nsections);
+
+ /*
+ * Take two passes through the file. PASS1 builds
+ * data structures, PASS2 actually dumps the file.
+ * Just in case the sections are in an unobvious order.
+ */
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ section_size = ntohl(sh->data_length);
+
+ if(verbose) {
+ fprintf(ofp, "Section type %d, size %d\n", ntohl(sh->section_type),
+ section_size);
+ }
+
+ if(process_section(sh, verbose, ofp, PASS1))
+ return(1);
+
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ if(process_section(sh, verbose, ofp, PASS2))
+ return(1);
+ section_size = ntohl(sh->data_length);
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+ return(0);
+}
+
+void compute_state_statistics(int verbose, FILE *ofp)
+{
+ int i, j;
+ bound_track_t *bp;
+ f64 fticks;
+
+ /* Across the bound tracks */
+ for (i = 0; i < vec_len(bound_tracks); i++) {
+ bp = &bound_tracks[i];
+ bp->mean_ticks_in_state = 0.0;
+ bp->variance_ticks_in_state = 0.0;
+ bp->total_ticks_in_state = 0.0;
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ bp->total_ticks_in_state += (f64) bp->ticks_in_state[j];
+ }
+ /* Compute mean */
+ if (vec_len(bp->ticks_in_state)) {
+ bp->mean_ticks_in_state = bp->total_ticks_in_state /
+ ((f64) vec_len(bp->ticks_in_state));
+ }
+ /* Accumulate sum: (Xi-Xbar)**2 */
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ fticks = bp->ticks_in_state[j];
+ bp->variance_ticks_in_state +=
+ (fticks - bp->mean_ticks_in_state)*
+ (fticks - bp->mean_ticks_in_state);
+ }
+ /* Compute s**2, the unbiased estimator of sigma**2 */
+ if (vec_len(bp->ticks_in_state) > 1) {
+ bp->variance_ticks_in_state /= (f64)
+ (vec_len(bp->ticks_in_state)-1);
+ }
+ }
+}
+
+int track_compare_max (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+ f64 v1, v2;
+
+ v1 = a1->total_ticks_in_state;
+ v2 = a2->total_ticks_in_state;
+
+ if (v1 < v2)
+ return (1);
+ else if (v1 == v2)
+ return (0);
+ else return (-1);
+}
+
+int track_compare_occurrences (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+ f64 v1, v2;
+
+ v1 = (f64) vec_len(a1->ticks_in_state);
+ v2 = (f64) vec_len(a2->ticks_in_state);
+
+ if (v1 < v2)
+ return (1);
+ else if (v1 == v2)
+ return (0);
+ else return (-1);
+}
+
+int track_compare_name (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+
+ return (strcmp((char *)(a1->track_str), (char *)(a2->track_str)));
+}
+
+void sort_state_statistics(sort_t type, FILE *ofp)
+{
+ int (*compare)(const void *, const void *) = 0;
+
+ if (summary_stats_only)
+ return;
+
+ switch(type) {
+ case SORT_MAX_TIME:
+ fprintf(ofp, "Results sorted by max time in state.\n\n");
+ compare = track_compare_max;
+ break;
+
+ case SORT_MAX_OCCURRENCES:
+ fprintf(ofp, "Results sorted by max occurrences of state.\n\n");
+ compare = track_compare_occurrences;
+ break;
+
+ case SORT_NAME:
+ compare = track_compare_name;
+ fprintf(ofp, "Results sorted by process name, thread ID, PID\n\n");
+ break;
+
+ default:
+ fatal("sort type not set?");
+ }
+
+ qsort (bound_tracks, vec_len(bound_tracks),
+ sizeof (bound_track_t), compare);
+}
+
+void print_state_statistics(int verbose, FILE *ofp)
+{
+ int i,j;
+ u8 *trackpad;
+ bound_track_t *bp;
+ f64 total_time = 0.0;
+ f64 total_switches = 0.0;
+
+ trackpad = format(0, "%%-%ds ", widest_track_format);
+ vec_add1(trackpad, 0);
+
+ if (!summary_stats_only) {
+ fprintf(ofp, (char *)trackpad, "ProcName Thread(PID)");
+ fprintf(ofp, " Mean(us) Stdev(us) Total(us) N\n");
+ }
+
+ for (i = 0; i < vec_len(bound_tracks); i++) {
+ bp = &bound_tracks[i];
+ if (bp->mean_ticks_in_state == 0.0)
+ continue;
+
+ if (name_filter &&
+ strncmp((char *)bp->track_str, (char *)name_filter,
+ strlen((char *)name_filter)))
+ continue;
+
+ /*
+ * Exclude kernel threads (e.g. idle thread) from
+ * state statistics
+ */
+ if (exclude_kernel_from_summary_stats &&
+ !strncmp((char *) bp->track_str, "kernel ", 7))
+ continue;
+
+ total_switches += (f64) vec_len(bp->ticks_in_state);
+
+ if (!summary_stats_only) {
+ fprintf(ofp, (char *) trackpad, bp->track_str);
+ fprintf(ofp, "%10.3f +- %10.3f",
+ bp->mean_ticks_in_state / ticks_per_us,
+ sqrt(bp->variance_ticks_in_state)
+ / ticks_per_us);
+ fprintf(ofp, "%12.3f",
+ bp->total_ticks_in_state / ticks_per_us);
+ fprintf(ofp, "%8d\n", vec_len(bp->ticks_in_state));
+ }
+
+ if (scatterplot) {
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ fprintf(ofp, "%.3f\n",
+ (f64)bp->ticks_in_state[j] / ticks_per_us);
+ }
+ }
+
+ total_time += bp->total_ticks_in_state;
+ }
+
+ if (!summary_stats_only)
+ fprintf(ofp, "\n");
+ fprintf(ofp, "Note: the following statistics %s kernel-thread activity.\n",
+ exclude_kernel_from_summary_stats ? "exclude" : "include");
+ if (name_filter)
+ fprintf(ofp,
+ "Note: only pid/proc/threads matching '%s' are included.\n",
+ name_filter);
+
+ fprintf(ofp,
+ "Total time in state: %10.3f (us), Total state occurrences: %.0f\n",
+ total_time / ticks_per_us, total_switches);
+ fprintf(ofp, "Average time in state: %10.3f (us)\n",
+ (total_time / total_switches) / ticks_per_us);
+ fprintf(ofp, "State start event: %d, state end event: %d\n",
+ start_event_code, end_event_code);
+}
+
+char *mapfile (char *file)
+{
+ struct stat statb;
+ char *rv;
+ int maphfile;
+ size_t mapfsize;
+
+ maphfile = open (file, O_RDONLY);
+
+ if (maphfile < 0)
+ {
+ fprintf (stderr, "Couldn't read %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ if (fstat (maphfile, &statb) < 0)
+ {
+ fprintf (stderr, "Couldn't get size of %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ /* Don't try to mmap directories, FIFOs, semaphores, etc. */
+ if (! (statb.st_mode & S_IFREG)) {
+ fprintf (stderr, "%s is not a regular file, skipping it...\n", file);
+ return (NULL);
+ }
+
+ mapfsize = statb.st_size;
+
+ if (mapfsize < 3)
+ {
+ fprintf (stderr, "%s zero-length, skipping it...\n", file);
+ close (maphfile);
+ return (NULL);
+ }
+
+ rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);
+
+ if (rv == 0)
+ {
+ fprintf (stderr, "%s problem mapping, I quit...\n", file);
+ exit (-1);
+ }
+ close (maphfile);
+ return (rv);
+}
+
+/*
+ * main
+ */
+int main (int argc, char **argv)
+{
+ char *cpel_file = 0;
+ char *outputfile = 0;
+ FILE *ofp;
+ char *cpel;
+ int verbose=0;
+ int curarg=1;
+
+ while (curarg < argc) {
+ if (!strncmp(argv[curarg], "--input-file", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ cpel_file = argv[curarg];
+ curarg++;
+ continue;
+ }
+ fatal("Missing filename after --input-file\n");
+ }
+ if (!strncmp(argv[curarg], "--output-file", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ outputfile = argv[curarg];
+ curarg ++;
+ continue;
+ }
+ fatal("Missing filename after --output-file\n");
+ }
+ if (!strncmp(argv[curarg], "--verbose", 3)) {
+ curarg++;
+ verbose++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--scatterplot", 4)) {
+ curarg++;
+ scatterplot=1;
+ continue;
+ }
+
+ if (!strncmp(argv[curarg], "--start-event", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ start_event_code = atol(argv[curarg]);
+ curarg ++;
+ continue;
+ }
+ fatal("Missing integer after --start-event\n");
+ }
+ if (!strncmp(argv[curarg], "--end-event", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ end_event_code = atol(argv[curarg]);
+ curarg ++;
+ continue;
+ }
+ fatal("Missing integer after --end-event\n");
+ }
+ if (!strncmp(argv[curarg], "--max-time-sort", 7)) {
+ sort_type = SORT_MAX_TIME;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--max-occurrence-sort", 7)) {
+ sort_type = SORT_MAX_OCCURRENCES;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--name-sort", 3)) {
+ sort_type = SORT_NAME;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--kernel-included", 3)) {
+ exclude_kernel_from_summary_stats = 0;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--summary", 3)) {
+ summary_stats_only=1;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--filter", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ name_filter = (u8 *) argv[curarg];
+ curarg ++;
+ continue;
+ }
+ fatal("Missing filter string after --filter\n");
+ }
+
+
+ usage:
+ fprintf(stderr,
+ "cpelatency --input-file <filename> [--output-file <filename>]\n");
+ fprintf(stderr,
+ " [--start-event <decimal>] [--verbose]\n");
+ fprintf(stderr,
+ " [--end-event <decimal>]\n");
+ fprintf(stderr,
+ " [--max-time-sort(default) | --max-occurrence-sort |\n");
+
+ fprintf(stderr,
+ " --name-sort-sort] [--kernel-included]\n");
+
+ fprintf(stderr,
+ " [--summary-stats-only] [--scatterplot]\n");
+
+ fprintf(stderr, "%s\n", version);
+ exit(1);
+ }
+
+ if (cpel_file == 0)
+ goto usage;
+
+ cpel = mapfile(cpel_file);
+ if (cpel == 0) {
+ fprintf(stderr, "Couldn't map %s...\n", cpel_file);
+ exit(1);
+ }
+
+ if (!outputfile) {
+ ofp = fdopen(1, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't fdopen(1)?\n");
+ exit(1);
+ }
+ } else {
+ ofp = fopen(outputfile, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't create %s...\n", outputfile);
+ exit(1);
+ }
+ }
+
+ the_strtab_hash = hash_create_string (0, sizeof (uword));
+ the_evtdef_hash = hash_create (0, sizeof (uword));
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+ the_pidtid_hash = hash_create_string (0, sizeof(uword));
+
+ if (cpel_dump((u8 *)cpel, verbose, ofp)) {
+ if (outputfile)
+ unlink(outputfile);
+ }
+
+ compute_state_statistics(verbose, ofp);
+ sort_state_statistics(sort_type, ofp);
+ print_state_statistics(verbose, ofp);
+
+ fclose(ofp);
+ return(0);
+}
diff --git a/src/tools/perftool/cpeldump.c b/src/tools/perftool/cpeldump.c
new file mode 100644
index 00000000..be0a70df
--- /dev/null
+++ b/src/tools/perftool/cpeldump.c
@@ -0,0 +1,641 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/mem.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+
+char *time_format = "%.03d:%.02d:%.02d:%.03d:%.03d ";
+static char version[] = "cpeldump 2.0";
+
+#define USEC_PER_MS 1000LL
+#define USEC_PER_SECOND (1000*USEC_PER_MS)
+#define USEC_PER_MINUTE (60*USEC_PER_SECOND)
+#define USEC_PER_HOUR (60*USEC_PER_MINUTE)
+
+uword *the_strtab_hash; /* (name, base-VA) hash of all string tables */
+uword *the_evtdef_hash; /* (event-id, event-definition) hash */
+uword *the_trackdef_hash; /* (track-id, track-definition) hash */
+
+int widest_name_format=5;
+int widest_track_format=5;
+
+typedef struct bound_event_ {
+ u32 event_code;
+ u8 *event_str;
+ u8 *datum_str;
+ u32 is_strtab_ref;
+} bound_event_t;
+
+bound_event_t *bound_events;
+
+typedef struct bound_track_ {
+ u32 track;
+ u8 *track_str;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+
+void fatal(char *s)
+{
+ fprintf(stderr, "%s", s);
+ exit(1);
+}
+
+typedef enum {
+ PASS1=1,
+ PASS2=2,
+} pass_t;
+
+typedef struct {
+ int (*pass1)(cpel_section_header_t *, int, FILE *);
+ int (*pass2)(cpel_section_header_t *, int, FILE *);
+} section_processor_t;
+
+int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ fprintf(ofp, "Bad (type 0) section, skipped...\n");
+ return(0);
+}
+
+int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ return(0);
+}
+
+int strtab_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ uword *p;
+ u8 *strtab_data_area = (u8 *)(sh+1);
+
+ /* Multiple string tables with the same name are Bad... */
+ p = hash_get_mem(the_strtab_hash, strtab_data_area);
+ if (p) {
+ fprintf(ofp, "Duplicate string table name %s", strtab_data_area);
+ }
+ /*
+ * Looks funny, but we really do want key = first string in the
+ * table, value = address(first string in the table)
+ */
+ hash_set_mem(the_strtab_hash, strtab_data_area, strtab_data_area);
+ if (verbose) {
+ fprintf(stderr, "String Table %s\n", strtab_data_area);
+ }
+ return(0);
+}
+
+int evtdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ event_definition_section_header_t *edh;
+ event_definition_t *ep;
+ u8 *this_strtab;
+ u32 event_code;
+ uword *p;
+ bound_event_t *bp;
+ int thislen;
+
+ edh = (event_definition_section_header_t *)(sh+1);
+ nevents = ntohl(edh->number_of_event_definitions);
+
+ if (verbose) {
+ fprintf(stderr, "Event Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, edh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ ep = (event_definition_t *)(edh+1);
+
+ for (i = 0; i < nevents; i++) {
+ event_code = ntohl(ep->event);
+ p = hash_get(the_evtdef_hash, event_code);
+ if (p) {
+ fprintf(ofp, "Event %d redefined, retain first definition\n",
+ event_code);
+ continue;
+ }
+ vec_add2(bound_events, bp, 1);
+ bp->event_code = event_code;
+ bp->event_str = this_strtab + ntohl(ep->event_format);
+ bp->datum_str = this_strtab + ntohl(ep->datum_format);
+ bp->is_strtab_ref = 0;
+ /* Decide if the datum format is a %s format => strtab reference */
+ {
+ int j;
+ int seen_percent=0;
+
+ for (j = 0; j < strlen((char *)bp->datum_str); j++) {
+ if (bp->datum_str[j] == '%'){
+ seen_percent=1;
+ continue;
+ }
+ if (seen_percent && bp->datum_str[j] == 's') {
+ bp->is_strtab_ref = 1;
+ }
+ }
+ }
+
+ hash_set(the_evtdef_hash, event_code, bp - bound_events);
+
+ thislen = strlen((char *)bp->event_str);
+ if (thislen > widest_name_format)
+ widest_name_format = thislen;
+
+ ep++;
+ }
+ return (0);
+}
+
+int trackdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ track_definition_section_header_t *tdh;
+ track_definition_t *tp;
+ u8 *this_strtab;
+ u32 track_code;
+ uword *p;
+ bound_track_t *btp;
+ int thislen;
+
+ tdh = (track_definition_section_header_t *)(sh+1);
+ nevents = ntohl(tdh->number_of_track_definitions);
+
+ if (verbose) {
+ fprintf(stderr, "Track Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, tdh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ tp = (track_definition_t *)(tdh+1);
+
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(tp->track);
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(ofp, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = this_strtab + ntohl(tp->track_format);
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+
+ thislen = strlen((char *)btp->track_str);
+ if (thislen > widest_track_format)
+ widest_track_format = thislen;
+ tp++;
+ }
+ return (0);
+}
+
+int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ if (verbose) {
+ fprintf(stderr, "Unsupported type %d section\n",
+ ntohl(sh->section_type));
+ }
+ return(0);
+}
+
+int event_pass2(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ event_section_header_t *eh;
+ f64 ticks_per_us;
+ u32 event_code, track_code;
+ u64 starttime = 0xFFFFFFFFFFFFFFFFULL;
+ int nevents;
+ int i;
+ uword *p;
+ event_entry_t *ep;
+ u64 now;
+ u64 delta;
+ u32 hours, minutes, seconds, msec, usec;
+ u32 time0, time1;
+ double d;
+ bound_event_t *bp;
+ bound_event_t generic_event;
+ bound_track_t *tp=0;
+ bound_track_t generic_track;
+ u32 last_track_code;
+ u8 *s, *evtpad, *trackpad;
+ u8 *this_strtab;
+
+ generic_event.event_str = (u8 *)"%d";
+ generic_event.datum_str = (u8 *)"0x%08x";
+ generic_event.is_strtab_ref = 0;
+
+ generic_track.track_str = (u8 *)"%d";
+ last_track_code = 0xdeadbeef;
+
+ eh = (event_section_header_t *)(sh+1);
+ nevents = ntohl(eh->number_of_events);
+ ticks_per_us = ((double)ntohl(eh->clock_ticks_per_second)) / 1e6;
+
+ if (verbose) {
+ fprintf(stderr, "Event section: %d events, %.3f ticks_per_us\n",
+ nevents, ticks_per_us);
+ }
+
+ ep = (event_entry_t *)(eh+1);
+
+ p = hash_get_mem(the_strtab_hash, eh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ evtpad = format(0, "%%-%ds ", widest_name_format);
+ vec_add1(evtpad, 0);
+ trackpad = format(0, "%%-%ds ", widest_track_format);
+ vec_add1(trackpad, 0);
+
+ for (i = 0; i < nevents; i++) {
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+
+ /* Convert from bus ticks to usec */
+ d = now;
+ d /= ticks_per_us;
+
+ now = d;
+
+ if (starttime == 0xFFFFFFFFFFFFFFFFULL)
+ starttime = now;
+
+ delta = now - starttime;
+
+ /* Delta = time since first event, in usec */
+
+ hours = delta / USEC_PER_HOUR;
+ if (hours)
+ delta -= ((u64) hours * USEC_PER_HOUR);
+ minutes = delta / USEC_PER_MINUTE;
+ if (minutes)
+ delta -= ((u64) minutes * USEC_PER_MINUTE);
+ seconds = delta / USEC_PER_SECOND;
+ if (seconds)
+ delta -= ((u64) seconds * USEC_PER_SECOND);
+ msec = delta / USEC_PER_MS;
+ if (msec)
+ delta -= ((u64) msec * USEC_PER_MS);
+
+ usec = delta;
+
+ /* Output the timestamp */
+ fprintf(ofp, time_format, hours, minutes, seconds, msec, usec);
+
+ /* output the track */
+ track_code = ntohl(ep->track);
+
+ if (track_code != last_track_code) {
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ tp = &bound_tracks[p[0]];
+ } else {
+ tp = &generic_track;
+ }
+ }
+ s = format(0, (char *)tp->track_str, track_code);
+ vec_add1(s, 0);
+ fprintf(ofp, (char *)trackpad, s);
+ vec_free(s);
+
+ /* output the event and datum */
+ if (0 && verbose) {
+ fprintf(stderr, "raw event code %d, raw event datum 0x%x\n",
+ ntohl(ep->event_code), ntohl(ep->event_datum));
+ }
+
+ event_code = ntohl(ep->event_code);
+ p = hash_get(the_evtdef_hash, event_code);
+ if (p) {
+ bp = &bound_events[p[0]];
+ } else {
+ bp = &generic_event;
+ }
+ s = format(0, (char *)bp->event_str, ntohl(ep->event_code));
+ vec_add1(s, 0);
+ fprintf(ofp, (char *)evtpad, s);
+ vec_free(s);
+ if (bp->is_strtab_ref) {
+ fprintf(ofp, (char *) bp->datum_str,
+ &this_strtab[ntohl(ep->event_datum)]);
+ } else {
+ fprintf(ofp, (char *) bp->datum_str, ntohl(ep->event_datum));
+ }
+ fputs("\n", ofp);
+ ep++;
+ }
+ vec_free(evtpad);
+ vec_free(trackpad);
+ return(0);
+}
+
+/*
+ * Note: If necessary, add passes / columns to this table to
+ * handle section order dependencies.
+ */
+
+section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
+{
+ {bad_section, noop_pass}, /* type 0 -- f**ked */
+ {strtab_pass1, noop_pass}, /* type 1 -- STRTAB */
+ {unsupported_pass, noop_pass}, /* type 2 -- SYMTAB */
+ {evtdef_pass1, noop_pass}, /* type 3 -- EVTDEF */
+ {trackdef_pass1, noop_pass}, /* type 4 -- TRACKDEF */
+ {noop_pass, event_pass2}, /* type 5 -- EVENTS */
+};
+
+
+int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
+ pass_t pass)
+{
+ u32 type;
+ type = ntohl(sh->section_type);
+ int rv;
+ int (*fp)(cpel_section_header_t *, int, FILE *);
+
+ if (type > CPEL_NUM_SECTION_TYPES) {
+ fprintf(stderr, "Unknown section type %d\n", type);
+ return(1);
+ }
+ switch(pass) {
+ case PASS1:
+ fp = processors[type].pass1;
+ break;
+
+ case PASS2:
+ fp = processors[type].pass2;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown pass %d\n", pass);
+ return(1);
+ }
+
+ rv = (*fp)(sh, verbose, ofp);
+
+ return(rv);
+}
+
+int cpel_dump_file_header(cpel_file_header_t *fh, int verbose, FILE *ofp)
+{
+ time_t file_time;
+
+ if (verbose) {
+ fprintf(stderr, "CPEL file: %s-endian, version %d\n",
+ ((fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) ?
+ "little" : "big"),
+ fh->endian_version & CPEL_FILE_VERSION_MASK);
+
+ file_time = ntohl(fh->file_date);
+
+ fprintf(stderr, "File created %s", ctime(&file_time));
+ fprintf(stderr, "File has %d sections\n",
+ ntohs(fh->nsections));
+ }
+
+ return(0);
+}
+
+
+int cpel_dump(u8 *cpel, int verbose, FILE *ofp)
+{
+ cpel_file_header_t *fh;
+ cpel_section_header_t *sh;
+ u16 nsections;
+ u32 section_size;
+ int i;
+
+ /* First, the file header */
+ fh = (cpel_file_header_t *)cpel;
+ if (fh->endian_version != CPEL_FILE_VERSION) {
+ if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
+ fprintf(stderr, "Little endian data format not supported\n");
+ return(1);
+ }
+ fprintf(stderr, "Unsupported file version 0x%x\n",
+ fh->endian_version);
+ return(1);
+ }
+ cpel_dump_file_header(fh, verbose, ofp);
+ nsections = ntohs(fh->nsections);
+
+ /*
+ * Take two passes through the file. PASS1 builds
+ * data structures, PASS2 actually dumps the file.
+ * Just in case the sections are in an unobvious order.
+ */
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ section_size = ntohl(sh->data_length);
+
+ if(verbose) {
+ fprintf(stderr,
+ "Section type %d, size %d\n", ntohl(sh->section_type),
+ section_size);
+ }
+
+ if(process_section(sh, verbose, ofp, PASS1))
+ return(1);
+
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ if(process_section(sh, verbose, ofp, PASS2))
+ return(1);
+ section_size = ntohl(sh->data_length);
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+ return(0);
+}
+
+
+char *mapfile (char *file)
+{
+ struct stat statb;
+ char *rv;
+ int maphfile;
+ size_t mapfsize;
+
+ maphfile = open (file, O_RDONLY);
+
+ if (maphfile < 0)
+ {
+ fprintf (stderr, "Couldn't read %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ if (fstat (maphfile, &statb) < 0)
+ {
+ fprintf (stderr, "Couldn't get size of %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ /* Don't try to mmap directories, FIFOs, semaphores, etc. */
+ if (! (statb.st_mode & S_IFREG)) {
+ fprintf (stderr, "%s is not a regular file, skipping it...\n", file);
+ return (NULL);
+ }
+
+ mapfsize = statb.st_size;
+
+ if (mapfsize < 3)
+ {
+ fprintf (stderr, "%s zero-length, skipping it...\n", file);
+ close (maphfile);
+ return (NULL);
+ }
+
+ rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);
+
+ if (rv == 0)
+ {
+ fprintf (stderr, "%s problem mapping, I quit...\n", file);
+ exit (-1);
+ }
+ close (maphfile);
+ return (rv);
+}
+
+/*
+ * main
+ */
+int main (int argc, char **argv)
+{
+ char *cpel_file = 0;
+ char *outputfile = 0;
+ FILE *ofp;
+ char *cpel;
+ int verbose=0;
+ int curarg=1;
+
+ while (curarg < argc) {
+ if (!strncmp(argv[curarg], "--input-file", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ cpel_file = argv[curarg];
+ curarg++;
+ continue;
+ }
+ fatal("Missing filename after --input-file\n");
+ }
+ if (!strncmp(argv[curarg], "--output-file", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ outputfile = argv[curarg];
+ curarg ++;
+ continue;
+ }
+ fatal("Missing filename after --output-file\n");
+ }
+ if (!strncmp(argv[curarg], "--verbose", 3)) {
+ curarg++;
+ verbose = 1;
+ continue;
+ }
+
+ usage:
+ fprintf(stderr,
+ "cpeldump --input-file <filename> [--output-file <filename>]\n");
+ fprintf(stderr, "%s\n", version);
+ exit(1);
+ }
+
+ if (cpel_file == 0)
+ goto usage;
+
+ clib_mem_init (0, ((uword)3<<30));
+
+ cpel = mapfile(cpel_file);
+ if (cpel == 0) {
+ fprintf(stderr, "Couldn't map %s...\n", cpel_file);
+ exit(1);
+ }
+
+ if (!outputfile) {
+ ofp = fdopen(1, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't fdopen(1)?\n");
+ exit(1);
+ }
+ } else {
+ ofp = fopen(outputfile, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't create %s...\n", outputfile);
+ exit(1);
+ }
+ }
+
+ the_strtab_hash = hash_create_string (0, sizeof (uword));
+ the_evtdef_hash = hash_create (0, sizeof (uword));
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+
+#ifdef TEST_TRACK_INFO
+ {
+ bound_track_t *btp;
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = 0;
+ btp->track_str = "cpu %d";
+ hash_set(the_trackdef_hash, 0, btp - bound_tracks);
+ hash_set(the_trackdef_hash, 1, btp - bound_tracks);
+ }
+#endif
+
+ if (cpel_dump((u8 *)cpel, verbose, ofp)) {
+ if (outputfile)
+ unlink(outputfile);
+ }
+
+ fclose(ofp);
+ return(0);
+}
diff --git a/src/tools/perftool/cpelinreg.c b/src/tools/perftool/cpelinreg.c
new file mode 100644
index 00000000..115afad7
--- /dev/null
+++ b/src/tools/perftool/cpelinreg.c
@@ -0,0 +1,892 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2008-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Search for O(N**2) functions bracketed by before/after
+ * events. The "before" event's datum is used as a tag, e.g. which function
+ * did we call that's strongly O(N).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+
+FILE *g_ifp;
+char *g_ifile;
+
+typedef unsigned long long ulonglong;
+
+void process_traces (void);
+void record_instance (ulong tag, ulonglong time);
+void report_actors (void);
+void scatterplot_data(void);
+int entry_event, exit_event;
+int nokey;
+char *version = "cpelinreg 2.0";
+int model_these[10];
+int model_index;
+int summary_stats;
+ulonglong first_start_time;
+ulonglong last_end_time;
+ulonglong total_time;
+ulong scatterkey;
+int inline_mokus;
+
+typedef struct bound_track_ {
+ u32 track_code;
+ u32 *start_datum;
+ u8 *dup_event;
+ int state;
+ u64 *start_time;
+ u64 thread_timestamp;
+ u64 time_thread_on_cpu;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+uword *the_trackdef_hash;
+
+
+#define MAXSTACK 128
+
+typedef struct instance_ {
+ struct instance_ *next;
+ ulonglong time;
+}instance_t;
+
+typedef struct actor_ {
+ struct actor_ *next;
+ ulong key;
+ struct instance_ *first;
+ struct instance_ *last;
+ double a;
+ double b;
+ double min;
+ double max;
+ double mean;
+ double r;
+ ulong ninst;
+} actor_t;
+
+#define NBUCKETS 1811
+
+actor_t *hash[NBUCKETS];
+
+actor_t *find_or_create_actor (ulong key)
+{
+ ulong bucket;
+ actor_t *ap;
+ u8 *mem;
+
+ bucket = key % NBUCKETS;
+
+ ap = hash[bucket];
+
+ if (ap == NULL) {
+ /* Ensure 8-byte alignment to avoid (double) alignment faults */
+ mem = malloc(sizeof(*ap) + 4);
+ if (((uword)(mem)) & 0x7)
+ mem += 4;
+ ap = (actor_t *)mem;
+
+ if (ap == NULL) {
+ fprintf (stderr, "out of memory...\n");
+ exit (1);
+ }
+ ap->next = 0;
+ ap->key = key;
+ ap->first = 0;
+ ap->last = 0;
+ ap->a = 0.00;
+ ap->b = 0.00;
+ hash [bucket] = ap;
+ return (ap);
+ }
+
+ while (ap) {
+ if (ap->key == key)
+ return (ap);
+ ap = ap->next;
+ }
+
+ mem = malloc(sizeof(*ap)+4);
+ if (((uword)(mem) & 0x7))
+ mem += 4;
+ ap = (actor_t *)mem;
+
+ if (ap == NULL) {
+ fprintf (stderr, "out of memory...\n");
+ exit (1);
+ }
+ ap->key = key;
+ ap->first = 0;
+ ap->last = 0;
+ ap->a = 0.00;
+ ap->b = 0.00;
+
+ ap->next = hash[bucket];
+ hash[bucket] = ap;
+
+ return (ap);
+}
+
+void record_instance (ulong key, ulonglong time)
+{
+ actor_t *ap;
+ instance_t *ip;
+
+ if (nokey)
+ key = 0;
+
+ ap = find_or_create_actor (key);
+
+ ip = (instance_t *)malloc(sizeof(*ip));
+ if (ip == NULL) {
+ fprintf (stderr, "out of memory...\n");
+ exit (1);
+ }
+ ip->time = time;
+ ip->next = 0;
+
+ if (ap->first == 0) {
+ ap->first = ip;
+ ap->last = ip;
+ ap->ninst = 1;
+ } else {
+ ap->last->next = ip;
+ ap->last = ip;
+ ap->ninst++;
+ }
+}
+
+#define NINSTANCE 200000
+
+double x[NINSTANCE];
+double y[NINSTANCE];
+
+int actor_compare (const void *arg1, const void *arg2)
+{
+ double e10k1, e10k2;
+ actor_t **a1 = (actor_t **)arg1;
+ actor_t **a2 = (actor_t **)arg2;
+ double ninst1, ninst2;
+
+ ninst1 = ((double)((*a1)->ninst));
+ ninst2 = ((double)((*a2)->ninst));
+
+ e10k1 = ninst1 * ((*a1)->mean);
+ e10k2 = ninst2 * ((*a2)->mean);
+
+ if (e10k1 < e10k2)
+ return (1);
+ else if (e10k1 == e10k2)
+ return (0);
+ else
+ return (-1);
+}
+
+void report_actors (void)
+{
+ int i;
+ actor_t *ap;
+ instance_t *ip;
+ int nactors = 0;
+ int ninstance;
+ actor_t **actor_vector;
+ double e10k;
+ extern void linreg (double *x, double *y, int nitems, double *a, double *b,
+ double *minp, double *maxp, double *meanp, double *r);
+
+ for (i = 0; i < NBUCKETS; i++) {
+ ap = hash[i];
+ if (ap == NULL)
+ continue;
+ while (ap) {
+ nactors++;
+ ninstance = 0;
+
+ ip = ap->first;
+
+ while (ip) {
+ if (ninstance < NINSTANCE) {
+ x[ninstance] = ninstance;
+ y[ninstance] = ((double)ip->time);
+ ninstance++;
+ }
+ ip = ip->next;
+ }
+ if (ninstance > 1) {
+#if DEBUG > 0
+ int j;
+
+ for (j = 0; j < ninstance; j++) {
+ printf("x[%d] = %10.2f, y[%d] = %10.2f\n",
+ j, x[j], j, y[j]);
+ }
+#endif
+
+ linreg (x, y, ninstance, &ap->a, &ap->b, &ap->min,
+ &ap->max, &ap->mean, &ap->r);
+ } else {
+ ap->a = 0.00;
+ ap->b = 0.00;
+ }
+
+ ap = ap->next;
+ }
+ }
+
+ actor_vector = (actor_t **)malloc (nactors*sizeof(*actor_vector));
+ nactors = 0;
+
+ for (i = 0; i < NBUCKETS; i++) {
+ ap = hash[i];
+ if (ap == NULL)
+ continue;
+ while (ap) {
+ if ((ap->a != 0.00) || (ap->b != 0.00)) {
+ actor_vector[nactors++] = ap;
+ }
+ ap = ap->next;
+ }
+ }
+
+ qsort (actor_vector, nactors, sizeof (actor_t *), actor_compare);
+
+ if (summary_stats)
+ printf("NInst Offset Slope T(Ninst) Min Max Avg %%InstTime R Key");
+ else
+ printf("NInst Offset Slope T(Ninst) Key");
+
+ for (i = 0; i < model_index; i++) {
+ printf ("T @ %-8d ", model_these[i]);
+ }
+
+ printf ("\n");
+
+ for (i = 0; i < nactors; i++) {
+ int j;
+ double ninst;
+ double pcttot;
+ ap = actor_vector[i];
+ ninst = ap->ninst;
+
+ e10k = ninst * (ap->a + ap->b*((ninst-1.0)/2.0));
+
+ if (ap->ninst) {
+ if (summary_stats) {
+ pcttot = (e10k / ((double)total_time)) * 100.0;
+ printf ("%6ld %11.2f %11.2f %11.2f %11.2f %11.2f %11.2f %11.2f %11.2f 0x%08lx ",
+ ap->ninst, ap->a, ap->b, e10k, ap->min,
+ ap->max, ap->mean, pcttot, ap->r, ap->key);
+ }
+ else
+ printf ("%6ld %11.2f %11.2f %11.2f 0x%08lx ",
+ ap->ninst, ap->a, ap->b, e10k, ap->key);
+
+ for (j = 0; j < model_index; j++) {
+ ninst = model_these[j];
+ e10k = ninst * (ap->a + ap->b*((ninst-1.0)/2.0));
+ printf ("%10.2f ", e10k);
+ }
+ printf ("\n");
+ }
+ }
+}
+
+void scatterplot_data(void)
+{
+ actor_t *ap;
+ int i;
+ instance_t *ip;
+ double time;
+ int count=0;
+
+ for (i = 0; i < NBUCKETS; i++) {
+ ap = hash[i];
+ if (ap == NULL)
+ continue;
+ while (ap) {
+ if (ap->key == scatterkey){
+ ip = ap->first;
+ while (ip) {
+ time = ((double)ip->time);
+ printf ("%d\t%.0f\n", count++, time);
+ ip = ip->next;
+ }
+ return;
+ }
+ ap = ap->next;
+ }
+ }
+}
+
+
+void fatal(char *s)
+{
+ fprintf(stderr, "%s", s);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+typedef enum {
+ PASS1=1,
+} pass_t;
+
+typedef struct {
+ int (*pass1)(cpel_section_header_t *, int, FILE *);
+} section_processor_t;
+
+int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ fprintf(ofp, "Bad (type 0) section, skipped...\n");
+ return(0);
+}
+
+int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ return(0);
+}
+
+int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ if (verbose) {
+ fprintf(ofp, "Unsupported type %d section\n",
+ ntohl(sh->section_type));
+ }
+ return(0);
+}
+
+int trackdef_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ track_definition_section_header_t *tdh;
+ track_definition_t *tp;
+ u32 track_code;
+ uword *p;
+ bound_track_t *btp;
+
+ tdh = (track_definition_section_header_t *)(sh+1);
+ nevents = ntohl(tdh->number_of_track_definitions);
+
+ if (verbose) {
+ fprintf(stderr, "Track Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ tp = (track_definition_t *)(tdh+1);
+
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(tp->track);
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(ofp, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track_code = track_code;
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+ tp++;
+ }
+ return (0);
+}
+
+
+int event_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ event_section_header_t *eh;
+ event_entry_t *ep;
+ f64 ticks_per_us;
+ long output_count;
+ long dup_events = 0;
+ ulonglong end_time = 0;
+ double t;
+ int sp, ancestor;
+ int nevents, i;
+ u64 now;
+ u64 time0, time1;
+ double d;
+ u32 last_track_code = 0xdeafb00b;
+ u32 track_code;
+ u32 event_code, event_datum;
+ bound_track_t *tp = 0;
+ uword *p;
+
+ output_count = 0;
+ total_time = 0;
+
+ eh = (event_section_header_t *)(sh+1);
+ nevents = ntohl(eh->number_of_events);
+ ticks_per_us = ((double)ntohl(eh->clock_ticks_per_second))/1e6;
+
+ if (verbose) {
+ fprintf(ofp, "%.3f ticks_per_us\n", ticks_per_us);
+ }
+
+ ep = (event_entry_t *)(eh+1);
+
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+ d = now;
+ d /= ticks_per_us;
+ first_start_time = d;
+
+ for (i = 0; i < nevents; i++) {
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+
+ /* Convert from bus ticks to usec */
+ d = now;
+ d /= ticks_per_us;
+
+ now = d;
+
+ track_code = ntohl(ep->track);
+ event_code = ntohl(ep->event_code);
+ event_datum = ntohl(ep->event_datum);
+
+ if (track_code != last_track_code) {
+ if (tp) {
+ tp->thread_timestamp += now - tp->time_thread_on_cpu;
+ tp->time_thread_on_cpu = 0;
+ }
+ p = hash_get(the_trackdef_hash, track_code);
+ if (!p) {
+ /* synthesize a new track */
+ vec_add2(bound_tracks, tp, 1);
+ tp->track_code = track_code;
+ hash_set(the_trackdef_hash, track_code, tp - bound_tracks);
+ } else {
+ tp = bound_tracks + p[0];
+ }
+ last_track_code = track_code;
+ tp->time_thread_on_cpu = now;
+ }
+
+ if (event_code != entry_event &&
+ event_code != exit_event) {
+ ep++;
+ continue;
+ }
+
+ again:
+ switch (tp->state) {
+ case 0: /* not in state */
+ /* Another exit event? Stack pop */
+ if (event_code == exit_event) {
+ /* Only if we have something on the stack */
+ if (vec_len(tp->start_datum) > 0) {
+ tp->state = 1;
+ goto again;
+ } else {
+ fprintf (stderr,
+ "End event before start event, key 0x%x.",
+ ntohl(ep->event_datum));
+ fprintf (stderr, " Interpret results carefully...\n");
+ }
+ }
+
+ tp->state = 1;
+ if (vec_len(tp->start_datum) >= MAXSTACK) {
+ int j;
+
+ fprintf (stderr, "stack overflow..\n");
+ for (j = vec_len(tp->start_datum)-1; j >= 0; j--) {
+ fprintf(stderr, "stack[%d]: datum 0x%x\n",
+ j, tp->start_datum[j]);
+ }
+ fprintf (stderr,
+ "Stack overflow... This occurs when "
+ "(start, datum)...(end, datum) events\n"
+ "are not properly paired.\n\n"
+ "A typical scenario looks like this:\n\n"
+ " ...\n"
+ " ELOG(..., START_EVENT, datum);\n"
+ " if (condition)\n"
+ " return; /*oops, forgot the end event*/\n"
+ " ELOG(..., END_EVENT, datum);\n"
+ " ...\n\n"
+ "The datum stack dump (above) should make it clear\n"
+ "where to start looking for a sneak path...\n");
+
+ exit (1);
+ }
+ vec_add1(tp->start_datum, event_datum);
+ vec_add1(tp->start_time, (tp->thread_timestamp + (now - tp->time_thread_on_cpu)));
+#ifdef HAVING_TROUBLE
+ printf ("sp %lld key 0x%x start time %llu\n",
+ (long long) vec_len(tp->start_time)-1, event_datum,
+ (unsigned long long)
+ tp->start_time [vec_len(tp->start_time)-1]);
+ printf ("timestamp %llu, now %llu, thread on cpu %llu\n",
+ (unsigned long long) tp->thread_timestamp,
+ (unsigned long long) now,
+ (unsigned long long) tp->time_thread_on_cpu);
+#endif
+
+
+
+ /*
+ * Multiple identical enter events? If the user knows that
+ * gcc is producing bogus events due to inline functions,
+ * trash the duplicate.
+ */
+ if (inline_mokus
+ && vec_len (tp->start_datum) > 1
+ && tp->start_datum [vec_len(tp->start_datum)-1] ==
+ tp->start_datum [vec_len(tp->start_datum)-2]) {
+ vec_add1 (tp->dup_event, 1);
+ } else {
+ vec_add1 (tp->dup_event, 0);
+ }
+
+
+ ep++;
+ continue;
+
+ case 1: /* in state */
+ /* Another entry event? Stack push*/
+ if (event_code == entry_event) {
+ tp->state = 0;
+ goto again;
+ }
+
+ if (vec_len(tp->start_datum) == 0) {
+ fprintf (stderr, "Stack underflow...\n");
+ exit (1);
+ }
+
+ sp = vec_len(tp->start_time)-1;
+
+ end_time = tp->thread_timestamp + (now - tp->time_thread_on_cpu);
+
+ if (!tp->dup_event[sp]) {
+#ifdef HAVING_TROUBLE
+ printf ("sp %d key 0x%x charged %llu\n", sp,
+ tp->start_datum[sp], end_time - tp->start_time[sp]);
+ printf (" start %llu, end %llu\n", (unsigned long long) tp->start_time[sp],
+ (unsigned long long) end_time);
+#endif
+
+ record_instance (tp->start_datum[sp], (end_time -
+ tp->start_time[sp]));
+
+ /* Factor out our time from surrounding services, if any */
+ for (ancestor = sp-1; ancestor >= 0; ancestor--) {
+#ifdef HAVING_TROUBLE
+ printf ("Factor out %lld from key 0x%08x\n",
+ (end_time - tp->start_time[sp]), tp->start_datum[ancestor]);
+#endif
+ tp->start_time[ancestor] += (end_time - tp->start_time[sp]);
+ }
+ output_count++;
+ total_time += (end_time - tp->start_time[sp]);
+ tp->state = 0;
+ } else {
+ dup_events++;
+ }
+ _vec_len(tp->start_datum) = sp;
+ _vec_len(tp->start_time) = sp;
+ _vec_len(tp->dup_event) = sp;
+ }
+
+ ep++;
+ }
+ last_end_time = now;
+
+ if (scatterkey) {
+ scatterplot_data();
+ exit (0);
+ }
+
+ if (output_count) {
+ t = (double)total_time;
+ printf ("%ld instances of state, %.2f microseconds average\n",
+ output_count, t / output_count);
+
+ printf ("Total instrumented runtime: %.2f microseconds\n",
+ ((double)total_time));
+ printf ("Total runtime: %lld microseconds\n",
+ last_end_time - first_start_time);
+
+ t /= (double)(last_end_time - first_start_time);
+ t *= 100.0;
+
+ if (dup_events) {
+ printf ("Suppressed %ld duplicate state entry events\n",
+ dup_events);
+ }
+ printf ("Instrumented code accounts for %.2f%% of total time.\n\n",
+ t);
+ report_actors();
+ } else {
+ printf ("No instances of state...\n");
+ }
+
+ return(0);
+}
+
+/*
+ * Note: If necessary, add passes / columns to this table to
+ * handle section order dependencies.
+ */
+
+section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
+{
+ {unsupported_pass}, /* type 0 -- f**ked */
+ {noop_pass}, /* type 1 -- STRTAB */
+ {noop_pass}, /* type 2 -- SYMTAB */
+ {noop_pass}, /* type 3 -- EVTDEF */
+ {trackdef_pass}, /* type 4 -- TRACKDEF */
+ {event_pass}, /* type 5 -- EVENTS */
+};
+
+int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
+ pass_t pass)
+{
+ u32 type;
+ type = ntohl(sh->section_type);
+ int rv;
+ int (*fp)(cpel_section_header_t *, int, FILE *);
+
+ if (type > CPEL_NUM_SECTION_TYPES) {
+ fprintf(stderr, "Unknown section type %d\n", type);
+ return(1);
+ }
+ switch(pass) {
+ case PASS1:
+ fp = processors[type].pass1;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown pass %d\n", pass);
+ return(1);
+ }
+
+ rv = (*fp)(sh, verbose, ofp);
+
+ return(rv);
+}
+
+char *mapfile (char *file)
+{
+ struct stat statb;
+ char *rv;
+ int maphfile;
+ size_t mapfsize;
+
+ maphfile = open (file, O_RDONLY);
+
+ if (maphfile < 0)
+ {
+ fprintf (stderr, "Couldn't read %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ if (fstat (maphfile, &statb) < 0)
+ {
+ fprintf (stderr, "Couldn't get size of %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ /* Don't try to mmap directories, FIFOs, semaphores, etc. */
+ if (! (statb.st_mode & S_IFREG)) {
+ fprintf (stderr, "%s is not a regular file, skipping it...\n", file);
+ return (NULL);
+ }
+
+ mapfsize = statb.st_size;
+
+ if (mapfsize < 3)
+ {
+ fprintf (stderr, "%s zero-length, skipping it...\n", file);
+ close (maphfile);
+ return (NULL);
+ }
+
+ rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);
+
+ if (rv == 0)
+ {
+ fprintf (stderr, "%s problem mapping, I quit...\n", file);
+ exit (-1);
+ }
+ close (maphfile);
+ return (rv);
+}
+
+int process_file (u8 *cpel, int verbose)
+{
+ cpel_file_header_t *fh;
+ cpel_section_header_t *sh;
+ u16 nsections;
+ u32 section_size;
+ int i;
+ FILE *ofp = stderr;
+
+ /* First, the file header */
+ fh = (cpel_file_header_t *)cpel;
+ if (fh->endian_version != CPEL_FILE_VERSION) {
+ if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
+ fprintf(stderr, "Little endian data format not supported\n");
+ return(1);
+ }
+ fprintf(stderr, "Unsupported file version 0x%x\n",
+ fh->endian_version);
+ return(1);
+ }
+ nsections = ntohs(fh->nsections);
+
+ /*
+ * Take a passe through the file.
+ */
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ section_size = ntohl(sh->data_length);
+
+ if(verbose) {
+ fprintf(ofp, "Section type %d, size %d\n",
+ ntohl(sh->section_type),
+ section_size);
+ }
+
+ if(process_section(sh, verbose, ofp, PASS1))
+ return(1);
+
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+ return(0);
+}
+
+/****************************************************************************
+* main -
+****************************************************************************/
+
+int main (int argc, char **argv)
+{
+ int curarg = 1;
+ u8 *cpel = 0;
+ int verbose = 0;
+
+ if (argc < 6)
+ {
+ fprintf (stderr, "usage: cpelinreg -i <file>\n");
+ fprintf (stderr, " -s start-event --e end-event [-nokey]\n");
+ fprintf (stderr, " [-m <ninst-to-model>][-xtra-stats]\n");
+ fprintf (stderr, " [-keyscatterplot <hex-key>]\n\n");
+ fprintf (stderr, "%s\n", version);
+ exit (1);
+ }
+
+ while (curarg < argc) {
+ if (!strncmp (argv[curarg], "-ifile", 2)) {
+ curarg++;
+ g_ifile = argv[curarg++];
+ continue;
+ }
+ if (!strncmp (argv[curarg], "-start", 2)) {
+ curarg++;
+ entry_event = atol (argv [curarg++]);
+ continue;
+ }
+ if (!strncmp (argv[curarg], "-end", 2)) {
+ curarg++;
+ exit_event = atol (argv [curarg++]);
+ continue;
+ }
+
+ if (!strncmp(argv[curarg], "-badinlines", 2)) {
+ curarg++;
+ inline_mokus = 1;
+ continue;
+ }
+
+ if (!strncmp (argv[curarg], "-x", 2)) {
+ curarg++;
+ summary_stats=1;
+ continue;
+ }
+ if (!strncmp (argv[curarg], "-nokey", 2)) {
+ curarg++;
+ nokey = 1;
+ continue;
+ }
+ if (!strncmp (argv[curarg], "-keyscatterplot", 2)) {
+ curarg++;
+ sscanf (argv[curarg], "%lx", &scatterkey);
+ curarg++;
+ continue;
+ }
+
+ if (!strncmp (argv[curarg], "-model", 2)) {
+ if (model_index >= sizeof(model_these) / sizeof(int)) {
+ fprintf (stderr, "Too many model requests\n");
+ exit (1);
+ }
+ curarg++;
+ model_these[model_index++] = atol (argv [curarg++]);
+ continue;
+ }
+ if (!strncmp (argv[curarg], "-verbose", 2)) {
+ verbose++;
+ curarg++;
+ continue;
+ }
+
+ fprintf (stderr, "unknown switch '%s'\n", argv[curarg]);
+ exit (1);
+ }
+
+ cpel = (u8 *)mapfile(g_ifile);
+
+ if (cpel == NULL)
+ {
+ fprintf (stderr, "Couldn't open %s\n", g_ifile);
+ exit (1);
+ }
+
+ printf ("Extracting state info from %s\nentry_event %d, exit_event %d\n",
+ g_ifile, entry_event, exit_event);
+ if (nokey) {
+ printf ("All state instances mapped to a single actor chain\n");
+ }
+
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+
+ process_file(cpel, verbose);
+ exit (0);
+}
diff --git a/src/tools/perftool/cpelstate.c b/src/tools/perftool/cpelstate.c
new file mode 100644
index 00000000..3fd9ccb9
--- /dev/null
+++ b/src/tools/perftool/cpelstate.c
@@ -0,0 +1,822 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <time.h>
+#include "cpel.h"
+#include <math.h>
+
+char *time_format = "%.03d:%.02d:%.02d:%.03d:%.03d ";
+static char version[] = "cpelstate 2.0h";
+
+#define USEC_PER_MS 1000LL
+#define USEC_PER_SECOND (1000*USEC_PER_MS)
+#define USEC_PER_MINUTE (60*USEC_PER_SECOND)
+#define USEC_PER_HOUR (60*USEC_PER_MINUTE)
+
+uword *the_strtab_hash; /* (name, base-VA) hash of all string tables */
+uword *the_evtdef_hash; /* (event-id, event-definition) hash */
+uword *the_trackdef_hash; /* (track-id, track-definition) hash */
+
+f64 ticks_per_us;
+u32 state_event_code = 1; /* default: XR thread-on-cpu */
+int exclude_kernel_from_summary_stats=1;
+int summary_stats_only;
+int scatterplot;
+u8 *name_filter;
+
+typedef enum {
+ SORT_MAX_TIME=1,
+ SORT_MAX_OCCURRENCES,
+ SORT_NAME,
+} sort_t;
+
+sort_t sort_type = SORT_MAX_TIME;
+
+int widest_name_format=5;
+int widest_track_format=5;
+
+typedef struct bound_event_ {
+ u32 event_code;
+ u8 *event_str;
+ u8 *datum_str;
+ u32 is_strtab_ref;
+} bound_event_t;
+
+bound_event_t *bound_events;
+
+typedef struct bound_track_ {
+ u32 track;
+ u8 *track_str;
+ u64 *ticks_in_state; /* vector of state occurrences */
+ f64 mean_ticks_in_state;
+ f64 variance_ticks_in_state;
+ f64 total_ticks_in_state;
+} bound_track_t;
+
+bound_track_t *bound_tracks;
+
+void fatal(char *s)
+{
+ fprintf(stderr, "%s", s);
+ exit(1);
+}
+
+typedef enum {
+ PASS1=1,
+ PASS2=2,
+} pass_t;
+
+typedef struct {
+ int (*pass1)(cpel_section_header_t *, int, FILE *);
+ int (*pass2)(cpel_section_header_t *, int, FILE *);
+} section_processor_t;
+
+int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ fprintf(ofp, "Bad (type 0) section, skipped...\n");
+ return(0);
+}
+
+int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ return(0);
+}
+
+int strtab_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ uword *p;
+ u8 *strtab_data_area = (u8 *)(sh+1);
+
+ /* Multiple string tables with the same name are Bad... */
+ p = hash_get_mem(the_strtab_hash, strtab_data_area);
+ if (p) {
+ fprintf(ofp, "Duplicate string table name %s", strtab_data_area);
+ }
+ /*
+ * Looks funny, but we really do want key = first string in the
+ * table, value = address(first string in the table)
+ */
+ hash_set_mem(the_strtab_hash, strtab_data_area, strtab_data_area);
+ if (verbose) {
+ fprintf(ofp, "String Table %s\n", strtab_data_area);
+ }
+ return(0);
+}
+
+int evtdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ event_definition_section_header_t *edh;
+ event_definition_t *ep;
+ u8 *this_strtab;
+ u32 event_code;
+ uword *p;
+ bound_event_t *bp;
+ int thislen;
+
+ edh = (event_definition_section_header_t *)(sh+1);
+ nevents = ntohl(edh->number_of_event_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Event Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, edh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ ep = (event_definition_t *)(edh+1);
+
+ for (i = 0; i < nevents; i++) {
+ event_code = ntohl(ep->event);
+ p = hash_get(the_evtdef_hash, event_code);
+ if (p) {
+ fprintf(ofp, "Event %d redefined, retain first definition\n",
+ event_code);
+ continue;
+ }
+ vec_add2(bound_events, bp, 1);
+ bp->event_code = event_code;
+ bp->event_str = this_strtab + ntohl(ep->event_format);
+ bp->datum_str = this_strtab + ntohl(ep->datum_format);
+ bp->is_strtab_ref = 0;
+ /* Decide if the datum format is a %s format => strtab reference */
+ {
+ int j;
+ int seen_percent=0;
+
+ for (j = 0; j < strlen((char *)(bp->datum_str)); j++) {
+ if (bp->datum_str[j] == '%'){
+ seen_percent=1;
+ continue;
+ }
+ if (seen_percent && bp->datum_str[j] == 's') {
+ bp->is_strtab_ref = 1;
+ }
+ }
+ }
+
+ hash_set(the_evtdef_hash, event_code, bp - bound_events);
+
+ thislen = strlen((char *)bp->event_str);
+ if (thislen > widest_name_format)
+ widest_name_format = thislen;
+
+ ep++;
+ }
+ return (0);
+}
+
+int trackdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ int i, nevents;
+ track_definition_section_header_t *tdh;
+ track_definition_t *tp;
+ u8 *this_strtab;
+ u32 track_code;
+ uword *p;
+ bound_track_t *btp;
+ int thislen;
+
+ tdh = (track_definition_section_header_t *)(sh+1);
+ nevents = ntohl(tdh->number_of_track_definitions);
+
+ if (verbose) {
+ fprintf(ofp, "Track Definition Section: %d definitions\n",
+ nevents);
+ }
+
+ p = hash_get_mem(the_strtab_hash, tdh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+ this_strtab = (u8 *)p[0];
+
+ tp = (track_definition_t *)(tdh+1);
+
+ for (i = 0; i < nevents; i++) {
+ track_code = ntohl(tp->track);
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ fprintf(ofp, "track %d redefined, retain first definition\n",
+ track_code);
+ continue;
+ }
+ vec_add2(bound_tracks, btp, 1);
+ btp->track = track_code;
+ btp->track_str = this_strtab + ntohl(tp->track_format);
+ hash_set(the_trackdef_hash, track_code, btp - bound_tracks);
+
+ thislen = strlen((char *)(btp->track_str));
+ if (thislen > widest_track_format)
+ widest_track_format = thislen;
+ tp++;
+ }
+ return (0);
+}
+
+int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ if (verbose) {
+ fprintf(ofp, "Unsupported type %d section\n",
+ ntohl(sh->section_type));
+ }
+ return(0);
+}
+
+int event_pass2(cpel_section_header_t *sh, int verbose, FILE *ofp)
+{
+ event_section_header_t *eh;
+ u32 track_code;
+ int nevents;
+ int i;
+ uword *p;
+ event_entry_t *ep;
+ u64 now;
+ u32 time0, time1;
+ bound_track_t generic_track;
+ u32 last_track_code;
+ u64 state_start_ticks=0;
+ u64 ticks_in_state;
+ bound_track_t *state_track=0;
+ int in_state=0;
+ generic_track.track_str = (u8 *) "%d";
+ last_track_code = 0xdeafbeef;
+
+ eh = (event_section_header_t *)(sh+1);
+ nevents = ntohl(eh->number_of_events);
+ ticks_per_us = ((double)ntohl(eh->clock_ticks_per_second))/1e6;
+
+ if (verbose) {
+ fprintf(ofp, "%.3f ticks_per_us\n", ticks_per_us);
+ }
+
+ ep = (event_entry_t *)(eh+1);
+
+ p = hash_get_mem(the_strtab_hash, eh->string_table_name);
+ if (!p) {
+ fprintf(ofp, "Fatal: couldn't find string table\n");
+ return(1);
+ }
+
+ for (i = 0; i < nevents; i++) {
+ time0 = ntohl (ep->time[0]);
+ time1 = ntohl (ep->time[1]);
+
+ now = (((u64) time0)<<32) | time1;
+
+ /* Found the state-change event ? */
+ if (ntohl(ep->event_code) == state_event_code) {
+ /*
+ * Add a ticks-in-state record, unless
+ * this is the "prime mover" event instance
+ */
+ if (in_state) {
+ ticks_in_state = now - state_start_ticks;
+ vec_add1(state_track->ticks_in_state, ticks_in_state);
+ }
+ /* switch to now-current track */
+ state_start_ticks = now;
+ track_code = ntohl(ep->track);
+ if (track_code != last_track_code) {
+ p = hash_get(the_trackdef_hash, track_code);
+ if (p) {
+ state_track = &bound_tracks[p[0]];
+ } else {
+ state_track = &generic_track;
+ }
+ last_track_code = track_code;
+ }
+ in_state = 1;
+ }
+ ep++;
+ }
+ return(0);
+}
+
+/*
+ * Note: If necessary, add passes / columns to this table to
+ * handle section order dependencies.
+ */
+
+section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
+{
+ {bad_section, noop_pass}, /* type 0 -- f**ked */
+ {strtab_pass1, noop_pass}, /* type 1 -- STRTAB */
+ {unsupported_pass, noop_pass}, /* type 2 -- SYMTAB */
+ {evtdef_pass1, noop_pass}, /* type 3 -- EVTDEF */
+ {trackdef_pass1, noop_pass}, /* type 4 -- TRACKDEF */
+ {noop_pass, event_pass2}, /* type 5 -- EVENTS */
+};
+
+
+int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
+ pass_t pass)
+{
+ u32 type;
+ type = ntohl(sh->section_type);
+ int rv;
+ int (*fp)(cpel_section_header_t *, int, FILE *);
+
+ if (type > CPEL_NUM_SECTION_TYPES) {
+ fprintf(stderr, "Unknown section type %d\n", type);
+ return(1);
+ }
+ switch(pass) {
+ case PASS1:
+ fp = processors[type].pass1;
+ break;
+
+ case PASS2:
+ fp = processors[type].pass2;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown pass %d\n", pass);
+ return(1);
+ }
+
+ rv = (*fp)(sh, verbose, ofp);
+
+ return(rv);
+}
+
+int cpel_dump_file_header(cpel_file_header_t *fh, int verbose, FILE *ofp)
+{
+ time_t file_time;
+
+ if (verbose) {
+ fprintf(ofp, "CPEL file: %s-endian, version %d\n",
+ ((fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) ?
+ "little" : "big"),
+ fh->endian_version & CPEL_FILE_VERSION_MASK);
+
+ file_time = ntohl(fh->file_date);
+
+ fprintf(ofp, "File created %s", ctime(&file_time));
+ fprintf(ofp, "File has %d sections\n",
+ ntohs(fh->nsections));
+ }
+
+ return(0);
+}
+
+
+int cpel_dump(u8 *cpel, int verbose, FILE *ofp)
+{
+ cpel_file_header_t *fh;
+ cpel_section_header_t *sh;
+ u16 nsections;
+ u32 section_size;
+ int i;
+
+ /* First, the file header */
+ fh = (cpel_file_header_t *)cpel;
+ if (fh->endian_version != CPEL_FILE_VERSION) {
+ if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
+ fprintf(stderr, "Little endian data format not supported\n");
+ return(1);
+ }
+ fprintf(stderr, "Unsupported file version 0x%x\n",
+ fh->endian_version);
+ return(1);
+ }
+ cpel_dump_file_header(fh, verbose, ofp);
+ nsections = ntohs(fh->nsections);
+
+ /*
+ * Take two passes through the file. PASS1 builds
+ * data structures, PASS2 actually dumps the file.
+ * Just in case the sections are in an unobvious order.
+ */
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ section_size = ntohl(sh->data_length);
+
+ if(verbose) {
+ fprintf(ofp, "Section type %d, size %d\n", ntohl(sh->section_type),
+ section_size);
+ }
+
+ if(process_section(sh, verbose, ofp, PASS1))
+ return(1);
+
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+
+ sh = (cpel_section_header_t *)(fh+1);
+ for (i = 0; i < nsections; i++) {
+ if(process_section(sh, verbose, ofp, PASS2))
+ return(1);
+ section_size = ntohl(sh->data_length);
+ sh++;
+ sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
+ }
+ return(0);
+}
+
+void compute_state_statistics(int verbose, FILE *ofp)
+{
+ int i, j;
+ bound_track_t *bp;
+ f64 fticks;
+
+ /* Across the bound tracks */
+ for (i = 0; i < vec_len(bound_tracks); i++) {
+ bp = &bound_tracks[i];
+ bp->mean_ticks_in_state = 0.0;
+ bp->variance_ticks_in_state = 0.0;
+ bp->total_ticks_in_state = 0.0;
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ bp->total_ticks_in_state += (f64) bp->ticks_in_state[j];
+ }
+ /* Compute mean */
+ if (vec_len(bp->ticks_in_state)) {
+ bp->mean_ticks_in_state = bp->total_ticks_in_state /
+ ((f64) vec_len(bp->ticks_in_state));
+ }
+ /* Accumulate sum: (Xi-Xbar)**2 */
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ fticks = bp->ticks_in_state[j];
+ bp->variance_ticks_in_state +=
+ (fticks - bp->mean_ticks_in_state)*
+ (fticks - bp->mean_ticks_in_state);
+ }
+ /* Compute s**2, the unbiased estimator of sigma**2 */
+ if (vec_len(bp->ticks_in_state) > 1) {
+ bp->variance_ticks_in_state /= (f64)
+ (vec_len(bp->ticks_in_state)-1);
+ }
+ }
+}
+
+int track_compare_max (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+ f64 v1, v2;
+
+ v1 = a1->total_ticks_in_state;
+ v2 = a2->total_ticks_in_state;
+
+ if (v1 < v2)
+ return (1);
+ else if (v1 == v2)
+ return (0);
+ else return (-1);
+}
+
+int track_compare_occurrences (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+ f64 v1, v2;
+
+ v1 = (f64) vec_len(a1->ticks_in_state);
+ v2 = (f64) vec_len(a2->ticks_in_state);
+
+ if (v1 < v2)
+ return (1);
+ else if (v1 == v2)
+ return (0);
+ else return (-1);
+}
+
+int track_compare_name (const void *arg1, const void *arg2)
+{
+ bound_track_t *a1 = (bound_track_t *)arg1;
+ bound_track_t *a2 = (bound_track_t *)arg2;
+
+ return (strcmp((char *)(a1->track_str), (char *)(a2->track_str)));
+}
+
+void sort_state_statistics(sort_t type, FILE *ofp)
+{
+ int (*compare)(const void *, const void *)=0;
+
+ if (summary_stats_only)
+ return;
+
+ switch(type) {
+ case SORT_MAX_TIME:
+ fprintf(ofp, "Results sorted by max time in state.\n");
+ compare = track_compare_max;
+ break;
+
+ case SORT_MAX_OCCURRENCES:
+ fprintf(ofp, "Results sorted by max occurrences of state.\n");
+ compare = track_compare_occurrences;
+ break;
+
+ case SORT_NAME:
+ compare = track_compare_name;
+ fprintf(ofp, "Results sorted by process-id/name/thread ID\n");
+ break;
+
+ default:
+ fatal("sort type not set?");
+ }
+
+ qsort (bound_tracks, vec_len(bound_tracks),
+ sizeof (bound_track_t), compare);
+}
+
+void print_state_statistics(int verbose, FILE *ofp)
+{
+ int i,j;
+ u8 *trackpad;
+ bound_track_t *bp;
+ f64 total_time = 0.0;
+ f64 total_switches = 0.0;
+
+ trackpad = format(0, "%%-%ds ", widest_track_format);
+ vec_add1(trackpad, 0);
+
+ if (!summary_stats_only) {
+ fprintf(ofp, (char *)trackpad, "ProcThread");
+ fprintf(ofp, " Mean(us) Stdev(us) Total(us) N\n");
+ }
+
+ for (i = 0; i < vec_len(bound_tracks); i++) {
+ bp = &bound_tracks[i];
+ if (bp->mean_ticks_in_state == 0.0)
+ continue;
+
+ if (name_filter &&
+ strncmp((char *)(bp->track_str), (char *)name_filter,
+ strlen((char *)name_filter)))
+ continue;
+
+ /*
+ * Exclude kernel threads (e.g. idle thread) from
+ * state statistics
+ */
+ if (exclude_kernel_from_summary_stats &&
+ !strncmp((char *)(bp->track_str), "kernel ", 7))
+ continue;
+
+ total_switches += (f64) vec_len(bp->ticks_in_state);
+
+ if (!summary_stats_only) {
+ fprintf(ofp, (char *) trackpad, bp->track_str);
+ fprintf(ofp, "%10.3f +- %10.3f",
+ bp->mean_ticks_in_state / ticks_per_us,
+ sqrt(bp->variance_ticks_in_state)
+ / (f64) ticks_per_us);
+ fprintf(ofp, "%12.3f",
+ bp->total_ticks_in_state / ticks_per_us);
+ fprintf(ofp, "%8d\n", (int)vec_len(bp->ticks_in_state));
+ }
+
+ if (scatterplot) {
+ for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
+ fprintf(ofp, "%.3f\n",
+ (f64)bp->ticks_in_state[j] / ticks_per_us);
+ }
+ }
+
+ total_time += bp->total_ticks_in_state;
+ }
+
+ if (!summary_stats_only)
+ fprintf(ofp, "\n");
+ fprintf(ofp, "Note: the following statistics %s kernel-thread activity.\n",
+ exclude_kernel_from_summary_stats ? "exclude" : "include");
+ if (name_filter)
+ fprintf(ofp,
+ "Note: only pid/proc/threads matching '%s' are included.\n",
+ name_filter);
+
+ fprintf(ofp,
+ "Total runtime: %10.3f (us), Total state switches: %.0f\n",
+ total_time / ticks_per_us, total_switches);
+ fprintf(ofp, "Average time in state: %10.3f (us)\n",
+ (total_time / total_switches) / ticks_per_us);
+}
+
+char *mapfile (char *file)
+{
+ struct stat statb;
+ char *rv;
+ int maphfile;
+ size_t mapfsize;
+
+ maphfile = open (file, O_RDONLY);
+
+ if (maphfile < 0)
+ {
+ fprintf (stderr, "Couldn't read %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ if (fstat (maphfile, &statb) < 0)
+ {
+ fprintf (stderr, "Couldn't get size of %s, skipping it...\n", file);
+ return (NULL);
+ }
+
+ /* Don't try to mmap directories, FIFOs, semaphores, etc. */
+ if (! (statb.st_mode & S_IFREG)) {
+ fprintf (stderr, "%s is not a regular file, skipping it...\n", file);
+ return (NULL);
+ }
+
+ mapfsize = statb.st_size;
+
+ if (mapfsize < 3)
+ {
+ fprintf (stderr, "%s zero-length, skipping it...\n", file);
+ close (maphfile);
+ return (NULL);
+ }
+
+ rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);
+
+ if (rv == 0)
+ {
+ fprintf (stderr, "%s problem mapping, I quit...\n", file);
+ exit (-1);
+ }
+ close (maphfile);
+ return (rv);
+}
+
+/*
+ * main
+ */
+int main (int argc, char **argv)
+{
+ char *cpel_file = 0;
+ char *outputfile = 0;
+ FILE *ofp;
+ char *cpel;
+ int verbose=0;
+ int curarg=1;
+
+ while (curarg < argc) {
+ if (!strncmp(argv[curarg], "--input-file", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ cpel_file = argv[curarg];
+ curarg++;
+ continue;
+ }
+ fatal("Missing filename after --input-file\n");
+ }
+ if (!strncmp(argv[curarg], "--output-file", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ outputfile = argv[curarg];
+ curarg ++;
+ continue;
+ }
+ fatal("Missing filename after --output-file\n");
+ }
+ if (!strncmp(argv[curarg], "--verbose", 3)) {
+ curarg++;
+ verbose++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--scatterplot", 4)) {
+ curarg++;
+ scatterplot=1;
+ continue;
+ }
+
+ if (!strncmp(argv[curarg], "--state-event", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ state_event_code = atol(argv[curarg]);
+ curarg ++;
+ continue;
+ }
+ fatal("Missing integer after --state-event\n");
+ }
+ if (!strncmp(argv[curarg], "--max-time-sort", 7)) {
+ sort_type = SORT_MAX_TIME;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--max-occurrence-sort", 7)) {
+ sort_type = SORT_MAX_OCCURRENCES;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--name-sort", 3)) {
+ sort_type = SORT_NAME;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--kernel-included", 3)) {
+ exclude_kernel_from_summary_stats = 0;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--summary", 3)) {
+ summary_stats_only=1;
+ curarg++;
+ continue;
+ }
+ if (!strncmp(argv[curarg], "--filter", 3)) {
+ curarg ++;
+ if (curarg < argc) {
+ name_filter = (u8 *)argv[curarg];
+ curarg ++;
+ continue;
+ }
+ fatal("Missing filter string after --filter\n");
+ }
+
+
+ usage:
+ fprintf(stderr,
+ "cpelstate --input-file <filename> [--output-file <filename>]\n");
+ fprintf(stderr,
+ " [--state-event <decimal>] [--verbose]\n");
+ fprintf(stderr,
+ " [--max-time-sort(default) | --max-occurrence-sort |\n");
+
+ fprintf(stderr,
+ " --name-sort-sort] [--kernel-included]\n");
+
+ fprintf(stderr,
+ " [--summary-stats-only] [--scatterplot]\n");
+
+ fprintf(stderr, "%s\n", version);
+ exit(1);
+ }
+
+ if (cpel_file == 0)
+ goto usage;
+
+ cpel = mapfile(cpel_file);
+ if (cpel == 0) {
+ fprintf(stderr, "Couldn't map %s...\n", cpel_file);
+ exit(1);
+ }
+
+ if (!outputfile) {
+ ofp = fdopen(1, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't fdopen(1)?\n");
+ exit(1);
+ }
+ } else {
+ ofp = fopen(outputfile, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "Couldn't create %s...\n", outputfile);
+ exit(1);
+ }
+ }
+
+ the_strtab_hash = hash_create_string (0, sizeof (uword));
+ the_evtdef_hash = hash_create (0, sizeof (uword));
+ the_trackdef_hash = hash_create (0, sizeof (uword));
+
+ if (cpel_dump((u8 *) cpel, verbose, ofp)) {
+ if (outputfile)
+ unlink(outputfile);
+ }
+
+ compute_state_statistics(verbose, ofp);
+ sort_state_statistics(sort_type, ofp);
+ print_state_statistics(verbose, ofp);
+
+ fclose(ofp);
+ return(0);
+}
diff --git a/src/tools/perftool/delsvec.c b/src/tools/perftool/delsvec.c
new file mode 100644
index 00000000..724935d3
--- /dev/null
+++ b/src/tools/perftool/delsvec.c
@@ -0,0 +1,315 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Break up a delimited string into a vector of substrings */
+
+#include <stdio.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <stdarg.h>
+
+/*
+ * #define UNIT_TESTS 1
+ * #define MATCH_TRACE 1
+ */
+
+/*
+ * delsvec
+ * break up an input string into a vector of [null-terminated] u8 *'s
+ *
+ * Each supplied delimiter character results in a string in the output
+ * vector, unless the delimiters occur back-to-back. When matched,
+ * a whitespace character in the delimiter consumes an arbitrary
+ * run of whitespace. See the unit tests at the end of this file
+ * for a set of examples.
+ *
+ * Returns a u8 **, or NULL if the input fails to match. It is assumed
+ * that both input and fmt are C strings, not necessarily vectors.
+ *
+ * Output strings are both vectors and proper C strings.
+ */
+
+static u8 **string_cache;
+static u8 **svec_cache;
+
+void delsvec_recycle_this_string (u8 *s)
+{
+ if (s) {
+ _vec_len (s) = 0;
+ vec_add1(string_cache, s);
+ }
+}
+
+void delsvec_recycle_this_svec (u8 **svec)
+{
+ if (svec) {
+ if (svec_cache) {
+ vec_free (svec_cache);
+ }
+ _vec_len (svec) = 0;
+ svec_cache = svec;
+ }
+}
+
+int pvl (char *a)
+{
+ return vec_len(a);
+}
+
+u8 **delsvec(void *input_arg, char *fmt)
+{
+ u8 **rv = 0;
+ int input_index=0;
+ u8 *this;
+ int dirflag=0;
+ int i;
+ u8 *input = input_arg;
+
+ if (svec_cache) {
+ rv = svec_cache;
+ svec_cache = 0;
+ }
+
+ while (fmt) {
+ dirflag=0;
+ if (vec_len (string_cache) > 0) {
+ this = string_cache [vec_len(string_cache)-1];
+ _vec_len (string_cache) = vec_len (string_cache) - 1;
+ } else
+ this = 0;
+ /*
+ * '*' means one of two things: match the rest of the input,
+ * or match as many characters as possible
+ */
+ if (fmt[0] == '*') {
+ fmt++;
+ dirflag=1;
+ /*
+ * no more format: eat rest of string...
+ */
+ if (!fmt[0]) {
+ for (;input[input_index]; input_index++)
+ vec_add1(this, input[input_index]);
+ if (vec_len(this)) {
+ vec_add1(this, 0);
+#ifdef MATCH_TRACE
+ printf("final star-match adds: '%s'\n", this);
+#endif
+ vec_add1(rv, this);
+ } else {
+ vec_add1(string_cache, this);
+ }
+
+ return(rv);
+ }
+ }
+ /*
+ * Left-to-right scan, adding chars until next delimiter char
+ * appears.
+ */
+ if (!dirflag) {
+ while (input[input_index]) {
+ if (input[input_index] == fmt[0]) {
+ /* If we just (exact) matched a whitespace delimiter */
+ if (fmt[0] == ' '){
+ /* scan forward eating whitespace */
+ while (input[input_index] == ' ' ||
+ input[input_index] == '\t' ||
+ input[input_index] == '\n')
+ input_index++;
+ input_index--;
+ }
+ goto found;
+ }
+ /* If we're looking for whitespace */
+ if (fmt[0] == ' ') {
+ /* and we have whitespace */
+ if (input[input_index] == ' ' ||
+ input[input_index] == '\t' ||
+ input[input_index] == '\n') {
+ /* scan forward eating whitespace */
+ while (input[input_index] == ' ' ||
+ input[input_index] == '\t' ||
+ input[input_index] == '\n') {
+ input_index++;
+ }
+ input_index--;
+ goto found;
+ }
+ }
+ /* Not a delimiter, save it */
+ vec_add1(this, input[input_index]);
+ input_index++;
+ }
+ /*
+ * Fell off the wagon, clean up and bail out
+ */
+ bail:
+
+#ifdef MATCH_TRACE
+ printf("failed, fmt[0] = '%c', input[%d]='%s'\n",
+ fmt[0], input_index, &input[input_index]);
+#endif
+ delsvec_recycle_this_string(this);
+ for (i = 0; i < vec_len(rv); i++)
+ delsvec_recycle_this_string(rv[i]);
+ delsvec_recycle_this_svec(rv);
+ return(0);
+
+ found:
+ /*
+ * Delimiter matched
+ */
+ input_index++;
+ fmt++;
+ /*
+ * If we actually accumulated non-delimiter characters,
+ * add them to the result vector
+ */
+ if (vec_len(this)) {
+ vec_add1(this, 0);
+#ifdef MATCH_TRACE
+ printf("match: add '%s'\n", this);
+#endif
+ vec_add1(rv, this);
+ } else {
+ vec_add1(string_cache, this);
+ }
+ } else {
+ /*
+ * right-to-left scan, '*' not at
+ * the end of the delimiter string
+ */
+ i = input_index;
+ while (input[++i])
+ ; /* scan forward */
+ i--;
+ while (i > input_index) {
+ if (input[i] == fmt[0])
+ goto found2;
+
+ if (fmt[0] == ' ' || fmt[0] == '\t' ||
+ fmt[0] == '\n') {
+ if (input[i] == ' ' ||
+ input[i] == '\t' ||
+ input[i] == '\n')
+ goto found2;
+ }
+ i--;
+ }
+ goto bail;
+
+ found2:
+ for (; input_index < i; input_index++) {
+ vec_add1(this, input[input_index]);
+ }
+ input_index++;
+ fmt++;
+ vec_add1(this, 0);
+#ifdef MATCH_TRACE
+ printf("inner '*' match: add '%s'\n", this);
+#endif
+ vec_add1(rv, this);
+ }
+ }
+ return (rv);
+}
+
+#ifdef UNIT_TESTS
+
+typedef struct utest_ {
+ char *string;
+ char *fmt;
+} utest_t;
+
+utest_t tests[] = {
+#ifdef NOTDEF
+ {"Dec 7 08:56",
+ " :*"},
+ {"Dec 17 08:56",
+ " :*"},
+ {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds",
+ " ::. / // [] *"},
+ {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file",
+ "///: ::. : []: *"},
+ /* Expected to fail */
+ {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds",
+ "///: ::. : : *"},
+ /* Expected to fail */
+ {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file",
+ " ::. / // [] *"},
+ {"THIS that and + theother", "*+ *"},
+ {"Dec 12 15:33:07.103 ifmgr/errors 0/RP0/CPU0 3# t2 Failed to open IM connection: No such file or directory", " ::. / // *"},
+ {"Dec 16 21:43:47.328 ifmgr/bulk 0/3/CPU0 t8 Bulk DPC async download complete. Partitions 1, node_count 1, total_out 0, out_offset 0, out_expected 0: No error"," ::. / // *"},
+ {"t:0x53034bd6 CPU:00 PROCESS :PROCCREATE_NAME",
+ ": : :*"},
+ {" pid:1", " *"},
+ {"t:0x53034cbb CPU:00 THREAD :THCREATE pid:1 tid:1",
+ ": : : pid: tid:*"},
+ {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
+ ": : : *"},
+ {"/hfr-base-3.3.85/lib/libttyconnection.dll 0xfc000000 0x0000306c 0xfc027000 0x000001c8 1",
+ " *"},
+ {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_receive:ifmgr/t8:IMC_MSG_MTU_UPDATE:ppp_ma/t1",
+ " ::. // ::::*"},
+
+ {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_send_event:call:ifmgr/t8:124/0:cdp/t1",
+ " ::. // :msg_send_event::::*"},
+
+ {"Feb 28 02:38:26.125 seqtrace 0/1/CPU0 t1 :msg_receive_event:cdp/t1:124/0",
+ " ::. // :msg_receive_event::*"}
+ {"t:0x645dd86d CPU:00 USREVENT:EVENT:100, d0:0x00000002 d1:0x00000000",
+ ": : USREVENT:EVENT:, d0: *"}
+ {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
+ ": : : *"},
+ {"t:0x2ccf9f5a CPU:00 INT_ENTR:0x80000000 (-2147483648) IP:0x002d8b18",
+ ": : INT_ENTR: IP:*"}
+ {"t:0xd473951c CPU:00 KER_EXIT:SCHED_GET/88 ret_val:2 sched_priority:10",
+ ": : KER_EXIT:SCHED_GET : sched_priority:*"}
+ {"t:0x00000123 CPU:01 SYSTEM :FUNC_ENTER thisfn:0x40e62048 call_site:0x00000000",
+ ": : SYSTEM :FUNC_ thisfn: *"},
+ {"t:0x5af8de95 CPU:00 INT_HANDLER_ENTR:0x0000004d (77) PID:8200 IP:0x00000000 AREA:0x0bf9b290", ": : INT_HANDLER_*"},
+#endif
+ {"t:0x6d1ff92f CPU:00 CONTROL: BUFFER sequence = 1053, num_events = 714",
+ ": : CONTROL*"},
+ {"t:0x6d1ff92f CPU:00 CONTROL :TIME msb:0x0000003c lsb(offset):0x6d1ff921",
+ ": : CONTROL*"},
+};
+
+int main (int argc, char **argv)
+{
+ int i, j;
+ u8 **svec;
+
+ for (j = 0; j < ARRAY_LEN(tests); j++) {
+ printf ("input string: '%s'\n", tests[j].string);
+ printf ("delimiter arg: '%s'\n", tests[j].fmt);
+ printf ("parse trace:\n");
+ svec = delsvec(tests[j].string, tests[j].fmt);
+ if (!svec) {
+ printf("index %d failed\n", j);
+ continue;
+ }
+ printf("%d substring vectors\n", vec_len(svec));
+ for (i = 0; i < vec_len(svec); i++) {
+ printf("[%d]: '%s'\n", i, svec[i]);
+ }
+ printf ("-------------------\n");
+ }
+ exit(0);
+}
+#endif
diff --git a/src/tools/perftool/elog_merge.c b/src/tools/perftool/elog_merge.c
new file mode 100644
index 00000000..46b19dd5
--- /dev/null
+++ b/src/tools/perftool/elog_merge.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/elog.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/unix.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+
+int
+elog_merge_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ elog_main_t _em, *em = &_em;
+ u32 verbose;
+ char *dump_file, *merge_file, **merge_files;
+ u8 *tag, **tags;
+ f64 align_tweak;
+ f64 *align_tweaks;
+ uword i;
+ elog_main_t *ems;
+
+ verbose = 0;
+ dump_file = 0;
+ merge_files = 0;
+ tags = 0;
+ align_tweaks = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dump %s", &dump_file))
+ ;
+ else if (unformat (input, "tag %s", &tag))
+ vec_add1 (tags, tag);
+ else if (unformat (input, "merge %s", &merge_file))
+ vec_add1 (merge_files, merge_file);
+
+ else if (unformat (input, "verbose %=", &verbose, 1))
+ ;
+ else if (unformat (input, "align-tweak %f", &align_tweak))
+ vec_add1 (align_tweaks, align_tweak);
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ vec_clone (ems, merge_files);
+
+ /* Supply default tags as needed */
+ if (vec_len (tags) < vec_len (ems))
+ {
+ for (i = vec_len (tags); i < vec_len (ems); i++)
+ vec_add1 (tags, format (0, "F%d%c", i, 0));
+ }
+
+ for (i = 0; i < vec_len (ems); i++)
+ {
+ if ((error = elog_read_file ((i == 0) ? em : &ems[i], merge_files[i])))
+ goto done;
+ if (i > 0)
+ {
+ align_tweak = 0.0;
+ if (i <= vec_len (align_tweaks))
+ align_tweak = align_tweaks[i - 1];
+ elog_merge (em, tags[0], &ems[i], tags[i], align_tweak);
+ tags[0] = 0;
+ }
+ }
+
+ if (dump_file)
+ {
+ if ((error =
+ elog_write_file (em, dump_file, 0 /* do not flush ring */ )))
+ goto done;
+ }
+
+ if (verbose)
+ {
+ elog_event_t *e, *es;
+ es = elog_get_events (em);
+ vec_foreach (e, es)
+ {
+ clib_warning ("%18.9f: %12U %U\n", e->time,
+ format_elog_track, em, e, format_elog_event, em, e);
+ }
+ }
+
+done:
+ if (error)
+ clib_error_report (error);
+ return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ r = elog_merge_main (&i);
+ unformat_free (&i);
+ return r;
+}
+
+/*
+ * GDB callable function: vl - Return vector length of vector
+ */
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+/*
+ * GDB callable function: pe - call pool_elts - number of elements in a pool
+ */
+uword
+pe (void *v)
+{
+ return (pool_elts (v));
+}
+
+/*
+ * GDB callable function: he - call hash_elts - number of elements in a hash
+ */
+uword
+he (void *v)
+{
+ return (hash_elts (v));
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/tools/perftool/linreg.c b/src/tools/perftool/linreg.c
new file mode 100644
index 00000000..084091bb
--- /dev/null
+++ b/src/tools/perftool/linreg.c
@@ -0,0 +1,78 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* see "Numerical Recipies in C, 2nd ed." p 665 */
+
+#include <stdio.h>
+#include <math.h>
+
+/*
+ * linreg
+ * Linear regression of (xi, yi), returns parameters for least-squares
+ * fit y = a + bx. Also, compute Pearson's R.
+ */
+void linreg (double *x, double *y, int nitems, double *a, double *b,
+ double *minp, double *maxp, double *meanp, double *r)
+{
+ double sx = 0.0;
+ double sy = 0.0;
+ double st2 = 0.0;
+ double min = y[0];
+ double max = 0.0;
+ double ss, meanx, meany, t;
+ double errx, erry, prodsum, sqerrx, sqerry;
+ int i;
+
+ *b = 0.0;
+
+ for (i = 0; i < nitems; i++) {
+ sx += x[i];
+ sy += y[i];
+ if (y[i] < min)
+ min = y[i];
+ if (y[i] > max)
+ max = y[i];
+ }
+ ss = nitems;
+ meanx = sx / ss;
+ meany = *meanp = sy / ss;
+ *minp = min;
+ *maxp = max;
+
+ for (i = 0; i < nitems; i++) {
+ t = x[i] - meanx;
+ st2 += t*t;
+ *b += t*y[i];
+ }
+
+ *b /= st2;
+ *a = (sy-sx*(*b))/ss;
+
+ prodsum = 0.0;
+ sqerrx = 0.0;
+ sqerry = 0.0;
+
+ /* Compute numerator of Pearson's R */
+ for (i = 0; i < nitems; i++) {
+ errx = x[i] - meanx;
+ erry = y[i] - meany;
+ prodsum += errx * erry;
+ sqerrx += errx*errx;
+ sqerry += erry*erry;
+ }
+
+ *r = prodsum / (sqrt(sqerrx)*sqrt(sqerry));
+}
diff --git a/src/tools/perftool/new.cpel b/src/tools/perftool/new.cpel
new file mode 100644
index 00000000..b0f35958
--- /dev/null
+++ b/src/tools/perftool/new.cpel
Binary files differ
diff --git a/src/tools/perftool/new.elog b/src/tools/perftool/new.elog
new file mode 100644
index 00000000..2d99bb16
--- /dev/null
+++ b/src/tools/perftool/new.elog
Binary files differ
diff --git a/src/tools/perftool/props.c b/src/tools/perftool/props.c
new file mode 100644
index 00000000..84af5b1c
--- /dev/null
+++ b/src/tools/perftool/props.c
@@ -0,0 +1,280 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2006-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+static char *sxerox (char *s);
+
+#define NBUCKETS 97
+
+typedef struct prop_ {
+ struct prop_ *next;
+ char *name;
+ char *value;
+} prop_t;
+
+static prop_t *buckets [NBUCKETS];
+static int hash_shifts[4] = {24, 16, 8, 0};
+
+/*
+ * getprop
+ */
+
+char *getprop (char *name)
+{
+ unsigned char *cp;
+ unsigned long hash=0;
+ prop_t *bp;
+ int i=0;
+
+ for (cp = (unsigned char *) name; *cp; cp++)
+ hash ^= (*cp)<<(hash_shifts[(i++)&0x3]);
+
+ bp = buckets [hash%NBUCKETS];
+
+ while (bp && strcmp(bp->name, name)) {
+ bp = bp->next;
+ }
+
+ if (bp == NULL)
+ return (0);
+ else
+ return (bp->value);
+}
+
+/*
+ * getprop_default
+ */
+
+char *getprop_default (char *name, char *def)
+{
+ char *rv;
+ rv = getprop (name);
+ if (rv)
+ return (rv);
+ else
+ return (def);
+}
+
+/*
+ * addprop
+ */
+
+void addprop (char *name, char *value)
+{
+ unsigned char *cp;
+ unsigned long hash=0;
+ prop_t **bpp;
+ prop_t *bp;
+ int i=0;
+
+ bp = (prop_t *)malloc (sizeof (prop_t));
+
+ bp->next = 0;
+ bp->name = sxerox (name);
+ bp->value = sxerox (value);
+
+ for (cp = (unsigned char *)name; *cp; cp++)
+ hash ^= (*cp)<<(hash_shifts[(i++)&0x3]);
+
+ bpp = &buckets [hash%NBUCKETS];
+
+ if (*bpp == NULL)
+ *bpp = bp;
+ else {
+ bp->next = *bpp;
+ *bpp = bp;
+ }
+}
+
+/*
+ * sxerox
+ */
+
+static char *sxerox (char *s)
+{
+ char *rv = (char *) malloc (strlen (s) + 1);
+ strcpy (rv, s);
+ return rv;
+}
+
+/*
+ * readprops
+ */
+
+#define START 0
+#define READNAME 1
+#define READVALUE 2
+#define C_COMMENT 3
+#define CPP_COMMENT 4
+
+int readprops (char *filename)
+{
+ FILE *ifp;
+ unsigned char c;
+ int state=START;
+ int linenum=1;
+ char namebuf [128];
+ char valbuf [512];
+ int i;
+
+ ifp = fopen (filename, "r");
+
+ if (ifp == NULL)
+ return (-1);
+
+ while (1) {
+
+ readchar:
+ c = getc (ifp);
+
+ again:
+ switch (state) {
+ case START:
+ if (feof (ifp)) {
+ fclose (ifp);
+ return (0);
+ }
+
+ if (c == ' ' || c == '\t')
+ goto readchar;
+
+ if (c == '\n') {
+ linenum++;
+ goto readchar;
+ }
+ if (isalpha (c) || (c == '_')) {
+ state = READNAME;
+ goto again;
+ }
+ if (c == '/') {
+ c = getc (ifp);
+ if (c == '/') {
+ state = CPP_COMMENT;
+ goto readchar;
+ } else if (c == '*') {
+ state = C_COMMENT;
+ goto readchar;
+ } else {
+ fprintf (stderr, "unknown token '/' line %d\n",
+ linenum);
+ exit(1);
+ }
+ }
+ fprintf (stderr, "unknown token '%c' line %d\n",
+ c, linenum);
+ exit (1);
+ break;
+
+ case CPP_COMMENT:
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp))
+ return (0);
+ if (c == '\n') {
+ linenum++;
+ state = START;
+ goto readchar;
+ }
+ }
+ break;
+
+ case C_COMMENT:
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "unterminated comment, line %d\n",
+ linenum);
+ exit (1);
+ }
+ if (c == '*') {
+ staragain:
+ c = getc (ifp);
+ if (c == '/') {
+ state = START;
+ goto readchar;
+ }
+ if (c == '*')
+ goto staragain;
+ }
+ }
+ break;
+
+ case READNAME:
+ i = 0;
+ namebuf[i++] = c;
+ while (1) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF while reading a name, line %d\n",
+ linenum);
+ exit (1);
+ }
+ if ((!isalnum (c)) && (c != '_')) {
+ namebuf [i] = 0;
+ state = READVALUE;
+ goto again;
+ }
+ namebuf [i++] = c;
+ }
+ break;
+
+ case READVALUE:
+ i = 0;
+ while ((c == ' ') || (c == '\t') || (c == '=')) {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF while reading a value, line %d\n",
+ linenum);
+ exit (1);
+ }
+ }
+ goto firsttime;
+ while (1) {
+ c = getc (ifp);
+
+ firsttime:
+ if (c == '\\') {
+ c = getc (ifp);
+ if (feof (ifp)) {
+ fprintf (stderr, "EOF after '\\', line %d\n",
+ linenum);
+ exit (1);
+ }
+ valbuf[i++] = c;
+ continue;
+ }
+ if (c == '\n') {
+ linenum++;
+ while (valbuf [i-1] == ' ' || valbuf[i-1] == '\t')
+ i--;
+ valbuf[i] = 0;
+ addprop (namebuf, valbuf);
+ state = START;
+ goto readchar;
+ }
+ valbuf[i++] = c;
+ }
+
+ }
+ }
+}
diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y
new file mode 100644
index 00000000..52bb65c5
--- /dev/null
+++ b/src/tools/vppapigen/gram.y
@@ -0,0 +1,91 @@
+%{
+/*
+ * gram.y - message definition language
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern void yyerror (char *s);
+extern int yylex (void);
+
+#define YYSTYPE void *
+
+void generate (YYSTYPE);
+ YYSTYPE add_slist(YYSTYPE, YYSTYPE);
+ YYSTYPE add_define(YYSTYPE, YYSTYPE);
+ YYSTYPE suppress_version(void);
+ YYSTYPE add_defbody(YYSTYPE, YYSTYPE);
+ YYSTYPE add_primtype(YYSTYPE, YYSTYPE, YYSTYPE);
+ YYSTYPE add_complex(YYSTYPE, YYSTYPE);
+ YYSTYPE add_union(YYSTYPE, YYSTYPE);
+ YYSTYPE add_scalar_vbl(YYSTYPE);
+ YYSTYPE add_vector_vbl(YYSTYPE, YYSTYPE);
+ YYSTYPE add_variable_length_vector_vbl(YYSTYPE, YYSTYPE);
+ YYSTYPE set_flags(YYSTYPE, YYSTYPE);
+%}
+
+%token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF
+%token TPACKED DEFINE LCURLY RCURLY STRING UNION
+%token HELPER_STRING COMMA
+%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY
+
+%%
+
+pgm: slist {generate ($1);}
+ ;
+
+slist: slist stmt {$$ = add_slist ($1, $2);}
+ | stmt {$$ = $1;}
+ ;
+
+stmt: flist defn {$$ = set_flags($1, $2);}
+ | defn {$$ = $1;}
+ ;
+
+flist: flist flag {$$ = (YYSTYPE)(unsigned long)
+ ((unsigned long) $1
+ | (unsigned long) $2);}
+ | flag {$$ = $1;}
+ ;
+
+flag:
+ MANUAL_PRINT {$$ = $1;}
+ | MANUAL_ENDIAN {$$ = $1;}
+ | DONT_TRACE {$$ = $1;}
+ | TYPEONLY {$$ = $1;}
+ | AUTOREPLY {$$ = $1;}
+ ;
+
+defn: DEFINE NAME LCURLY defbody RCURLY SEMI
+ {$$ = add_define($2, $4);}
+
+ | NOVERSION SEMI
+ {$$ = suppress_version();}
+ ;
+
+defbody: defbody onedef {$$ = add_defbody($1, $2);}
+ | onedef {$$ = $1;}
+ ;
+
+onedef: PRIMTYPE vbl SEMI {$$ = add_primtype($1, $2, 0);}
+ | TPACKED PRIMTYPE vbl SEMI {$$ = add_primtype($1, $2, $3);}
+ | NAME vbl SEMI {$$ = add_complex($1, $2);}
+ | UNION NAME LCURLY defbody RCURLY SEMI
+ {$$ = add_union($2, $4);}
+ ;
+
+vbl: NAME {$$ = add_scalar_vbl($1);}
+ | NAME LBRACK NUMBER RBRACK {$$ = add_vector_vbl($1, $3);}
+ | NAME LBRACK NAME RBRACK {$$ = add_variable_length_vector_vbl($1, $3);}
+ ;
diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c
new file mode 100644
index 00000000..e6358143
--- /dev/null
+++ b/src/tools/vppapigen/lex.c
@@ -0,0 +1,1120 @@
+/*
+ *------------------------------------------------------------------
+ * lex.c - API generator lexical analyzer
+ *
+ * Copyright (c) 1996-2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "lex.h"
+#include "node.h"
+#include "tools/vppapigen/gram.h"
+#include <vppinfra/clib.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/format.h>
+
+FILE *ifp, *ofp, *pythonfp, *jsonfp;
+char *vlib_app_name = "vpp";
+int dump_tree;
+time_t starttime;
+char *input_filename;
+char *current_filename;
+int current_filename_allocated;
+unsigned long input_crc;
+unsigned long message_crc;
+int yydebug;
+char *push_input_fifo;
+char saved_ungetc_char;
+char have_ungetc_char;
+
+/*
+ * lexer variable definitions
+ */
+
+static const char *version = "0.1";
+static int the_lexer_linenumber = 1;
+static enum lex_state the_lexer_state = START_STATE;
+
+/*
+ * private prototypes
+ */
+static void usage (char *);
+static int name_check (const char *, YYSTYPE *);
+static int name_compare (const char *, const char *);
+extern int yydebug;
+extern YYSTYPE yylval;
+
+unsigned int crc32c_table[256] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+};
+
+static inline unsigned long CRC8 (unsigned long crc,
+ unsigned char d)
+{
+ return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
+}
+static inline unsigned long CRC16 (unsigned long crc,
+ unsigned short d)
+{
+ crc = CRC8 (crc, d & 0xff);
+ d = d >> 8;
+ crc = CRC8 (crc, d & 0xff);
+ return crc;
+}
+
+
+static unsigned long
+crc_eliding_c_comments (const char *buf, unsigned long crc)
+{
+ const char *p;
+ enum { cOTHER, /* */
+ cSTRING, /* "... */
+ cSBACKSLASH, /* "...\ */
+ cCHAR, /* '... */
+ cCBACKSLASH, /* '...\ */
+ cSLASH, /* / */
+ cSLASH_SLASH, /* //... */
+ cSLASH_STAR, /* / *... */
+ cSTAR /* / *...* */
+ } ss = cOTHER;
+
+ for (p = buf; ;) {
+ unsigned char c = *p++;
+
+ switch (c) {
+ case 0:
+ switch (ss) {
+ case cOTHER:
+ return (crc);
+ case cSTRING: case cSBACKSLASH:
+ case cCHAR: case cCBACKSLASH:
+ case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
+ fprintf (stderr, "Inopportune EOF: %s\n", buf);
+ exit (1);
+ }
+ break;
+ case '\"':
+ switch (ss) {
+ case cOTHER: ss = cSTRING; break; /* start string */
+ case cSTRING: ss = cOTHER; break; /* end string */
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
+ case cSLASH_SLASH: continue; /* in comment */
+ case cSLASH_STAR: continue; /* in comment */
+ case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
+ }
+ break;
+ case '\\':
+ switch (ss) {
+ case cOTHER: break;
+ case cSTRING: ss = cSBACKSLASH; break;
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: ss = cCBACKSLASH; break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
+ case cSLASH_SLASH: continue; /* in comment */
+ case cSLASH_STAR: continue; /* in comment */
+ case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
+ }
+ break;
+ case '/':
+ switch (ss) {
+ case cOTHER: ss = cSLASH; continue; /* potential comment */
+ case cSTRING: break;
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
+ case cSLASH_SLASH: continue; /* in comment */
+ case cSLASH_STAR: continue; /* in comment */
+ case cSTAR: ss = cOTHER; continue; /* end of comment */
+ }
+ break;
+ case '*':
+ switch (ss) {
+ case cOTHER: break;
+ case cSTRING: break;
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
+ case cSLASH_SLASH: continue; /* in comment */
+ case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
+ case cSTAR: continue; /* still potential end of comment */
+ }
+ break;
+ case '\n': case '\r': case ' ': case '\t': case '\014':
+ switch (ss) {
+ case cOTHER: continue; /* ignore all whitespace */
+ case cSTRING: break;
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: c = '/'; ss = cOTHER; break;
+ case cSLASH_SLASH:
+ if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
+ continue;
+ case cSLASH_STAR: continue; /* in comment */
+ case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
+ }
+ default:
+ switch (ss) {
+ case cOTHER: break;
+ case cSTRING: break;
+ case cSBACKSLASH: ss = cSTRING; break;
+ case cCHAR: break;
+ case cCBACKSLASH: ss = cCHAR; break;
+ case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
+ case cSLASH_SLASH: continue; /* in comment */
+ case cSLASH_STAR: continue; /* in comment */
+ case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
+ }
+ }
+ crc = CRC8 (crc, c);
+ }
+}
+
+/*
+ * main
+ */
+int main (int argc, char **argv)
+{
+ int curarg = 1;
+ char *ofile=0;
+ char *pythonfile=0;
+ char *jsonfile=0;
+ char *show_name=0;
+
+ while (curarg < argc) {
+ if (!strncmp (argv [curarg], "--verbose", 3)) {
+ fprintf (stderr, "%s version %s\n", argv [0], version);
+ curarg++;
+ continue;
+ }
+
+ if (!strncmp (argv [curarg], "--yydebug", 3)) {
+ yydebug = 1;
+ curarg++;
+ continue;
+ }
+
+ if (!strncmp (argv [curarg], "--dump", 3)) {
+ dump_tree = 1;
+ curarg++;
+ continue;
+ }
+
+ if (!strncmp (argv[curarg], "--show-name", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ show_name = argv[curarg];
+ curarg++;
+ continue;
+ } else {
+ fprintf(stderr, "Missing filename after --show-name \n");
+ exit(1);
+ }
+ }
+
+ if (!strncmp (argv [curarg], "--input", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ input_filename = argv[curarg];
+ if (!strcmp (argv [curarg], "-"))
+ ifp = stdin;
+ else
+ ifp = fopen (argv [curarg], "r");
+ if (ifp == NULL) {
+ fprintf (stderr, "Couldn't open input file %s\n",
+ argv[curarg]);
+ exit (1);
+ }
+ curarg++;
+ } else {
+ fprintf(stderr, "Missing filename after --input\n");
+ exit(1);
+ }
+ continue;
+ }
+ if (!strncmp (argv [curarg], "--output", 3)) {
+ curarg++;
+ if (curarg < argc) {
+ ofp = fopen (argv[curarg], "w");
+ if (ofp == NULL) {
+ fprintf (stderr, "Couldn't open output file %s\n",
+ argv[curarg]);
+ exit (1);
+ }
+ ofile = argv[curarg];
+ curarg++;
+ } else {
+ fprintf(stderr, "Missing filename after --output\n");
+ exit(1);
+ }
+ continue;
+ }
+ if (!strncmp (argv [curarg], "--python", 8)) {
+ curarg++;
+ if (curarg < argc) {
+ if (!strcmp(argv[curarg], "-")) {
+ pythonfp = stdout;
+ } else {
+ pythonfp = fopen(argv[curarg], "w");
+ pythonfile = argv[curarg];
+ }
+ if (pythonfp == NULL) {
+ fprintf (stderr, "Couldn't open python output file %s\n",
+ argv[curarg]);
+ exit (1);
+ }
+ curarg++;
+ } else {
+ fprintf(stderr, "Missing filename after --python\n");
+ exit(1);
+ }
+ continue;
+ }
+ if (!strncmp (argv [curarg], "--json", 6)) {
+ curarg++;
+ if (curarg < argc) {
+ if (!strcmp(argv[curarg], "-")) {
+ jsonfp = stdout;
+ } else {
+ jsonfp = fopen(argv[curarg], "w");
+ jsonfile = argv[curarg];
+ }
+ if (jsonfp == NULL) {
+ fprintf (stderr, "Couldn't open JSON output file %s\n",
+ argv[curarg]);
+ exit (1);
+ }
+ curarg++;
+ } else {
+ fprintf(stderr, "Missing filename after --json\n");
+ exit(1);
+ }
+ continue;
+ }
+ if (!strncmp (argv [curarg], "--app", 4)) {
+ curarg++;
+ if (curarg < argc) {
+ vlib_app_name = argv[curarg];
+ curarg++;
+ } else {
+ fprintf(stderr, "Missing app name after --app\n");
+ exit(1);
+ }
+ continue;
+ }
+
+ usage(argv[0]);
+ exit (1);
+ }
+ if (ofp == NULL) {
+ ofile = 0;
+ }
+ if (pythonfp == NULL) {
+ pythonfile = 0;
+ }
+ if (jsonfp == NULL) {
+ jsonfile = 0;
+ }
+ if (ifp == NULL) {
+ fprintf(stderr, "No input file specified...\n");
+ exit(1);
+ }
+ if (show_name) {
+ input_filename = show_name;
+ }
+
+ starttime = time (0);
+
+ if (yyparse() == 0) {
+ fclose (ifp);
+ curarg -= 2;
+ if (ofile) {
+ printf ("Output written to %s\n", ofile);
+ fclose (ofp);
+ }
+ if (pythonfile) {
+ printf ("Python bindings written to %s\n", pythonfile);
+ fclose (pythonfp);
+ }
+ if (jsonfile) {
+ printf ("JSON bindings written to %s\n", jsonfile);
+ fclose (jsonfp);
+ }
+ }
+ else {
+ fclose (ifp);
+ if (ofp)
+ fclose (ofp);
+ if (ofile) {
+ printf ("Removing %s\n", ofile);
+ unlink (ofile);
+ }
+ if (pythonfile) {
+ printf ("Removing %s\n", pythonfile);
+ unlink (pythonfile);
+ }
+ if (jsonfile) {
+ printf ("Removing %s\n", jsonfile);
+ unlink (jsonfile);
+ }
+ exit (1);
+ }
+ exit (0);
+}
+
+/*
+ * usage
+ */
+static void usage (char *progname)
+{
+ fprintf (stderr,
+ "usage: %s --input <filename> [--output <filename>] "
+ "[--json <filename>] [--python <filename>]\n%s",
+ progname,
+ " [--yydebug] [--dump-tree]\n");
+ exit (1);
+}
+
+/*
+ * yyerror
+ */
+void yyerror (char *s)
+{
+ fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
+}
+
+static char namebuf [MAXNAME];
+
+static inline char
+getc_char (FILE *ifp)
+{
+ char rv;
+
+ if (have_ungetc_char) {
+ have_ungetc_char = 0;
+ return saved_ungetc_char;
+ }
+
+ if (clib_fifo_elts (push_input_fifo)) {
+ clib_fifo_sub1(push_input_fifo, rv);
+ return (rv & 0x7f);
+ }
+ return ((char)(getc(ifp) & 0x7f));
+}
+
+u32 fe (char *fifo)
+{
+ return clib_fifo_elts (fifo);
+}
+
+static inline void
+ungetc_char (char c, FILE *ifp)
+{
+ saved_ungetc_char = c;
+ have_ungetc_char = 1;
+}
+
+void autoreply (void *np_arg)
+{
+ static u8 *s;
+ node_t *np = (node_t *)np_arg;
+ int i;
+
+ vec_reset_length (s);
+
+ s = format (0, " define %s_reply\n", (char *)(np->data[0]));
+ s = format (s, "{\n");
+ s = format (s, " u32 context;\n");
+ s = format (s, " i32 retval;\n");
+ s = format (s, "};\n");
+
+ for (i = 0; i < vec_len (s); i++)
+ clib_fifo_add1 (push_input_fifo, s[i]);
+}
+
+/*
+ * yylex (well, yylex_1: The real yylex below does crc-hackery)
+ */
+static int yylex_1 (void)
+{
+ int nameidx=0;
+ char c;
+ enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
+ LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
+ LP_POST_FILENAME,
+ LP_OTHER
+ } lp_substate = LP_INITIAL_WHITESPACE;
+
+ again:
+ switch (the_lexer_state) {
+ /*
+ * START state -- looking for something interesting
+ */
+ case START_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+
+ switch (c) {
+ case '\n':
+ the_lexer_linenumber++;
+ goto again;
+
+ case '#':
+ the_lexer_state = LINE_PRAGMA_STATE;
+ lp_substate = LP_INITIAL_WHITESPACE;
+ goto again;
+
+ /* FALLTHROUGH */
+ case '\t':
+ case ' ':
+ goto again;
+
+ case '(':
+ return (LPAR);
+
+ case ')':
+ return (RPAR);
+
+ case ';':
+ return (SEMI);
+
+ case '[':
+ return (LBRACK);
+
+ case ']':
+ return (RBRACK);
+
+ case '{':
+ return (LCURLY);
+
+ case '}':
+ return (RCURLY);
+
+ case ',':
+ return (COMMA);
+
+ case '"':
+ nameidx = 0;
+ the_lexer_state = STRING_STATE;
+ goto again;
+
+ case '@':
+ nameidx = 0;
+ the_lexer_state = HELPER_STATE;
+ goto again;
+
+ case '/':
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+
+ if (c == '/') {
+ the_lexer_state = CPP_COMMENT_STATE;
+ goto again;
+ } else if (c == '*') {
+ the_lexer_state = C_COMMENT_STATE;
+ goto again;
+ } else {
+ fprintf (stderr, "unknown token /%c at line %d\n",
+ c, the_lexer_linenumber);
+ return (BARF);
+ }
+
+ case '\\':
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+
+ /* Note fallthrough... */
+
+ default:
+ if (isalpha (c) || c == '_') {
+ namebuf [0] = c;
+ nameidx = 1;
+ the_lexer_state = NAME_STATE;
+ goto again;
+ } else if (isdigit(c)) {
+ namebuf [0] = c;
+ nameidx = 1;
+ the_lexer_state = NUMBER_STATE;
+ goto again;
+ }
+
+ fprintf (stderr, "unknown token %c at line %d\n",
+ c, the_lexer_linenumber);
+ return (BARF);
+ }
+
+ /*
+ * NAME state -- eat the rest of a name
+ */
+ case NAME_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+
+ if (!isalnum (c) && c != '_') {
+ ungetc_char (c, ifp);
+ namebuf [nameidx] = 0;
+ the_lexer_state = START_STATE;
+ return (name_check (namebuf, &yylval));
+ }
+ if (nameidx >= (MAXNAME-1)) {
+ fprintf(stderr, "lex input buffer overflow...\n");
+ exit(1);
+ }
+ namebuf [nameidx++] = c;
+ goto again;
+
+ /*
+ * NUMBER state -- eat the rest of a number
+ */
+ case NUMBER_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+
+ if (!isdigit (c)) {
+ ungetc_char (c, ifp);
+ namebuf [nameidx] = 0;
+ the_lexer_state = START_STATE;
+ yylval = (void *) atol(namebuf);
+ return (NUMBER);
+ }
+ if (nameidx >= (MAXNAME-1)) {
+ fprintf(stderr, "lex input buffer overflow...\n");
+ exit(1);
+ }
+ namebuf [nameidx++] = c;
+ goto again;
+
+ /*
+ * C_COMMENT state -- eat a peach
+ */
+ case C_COMMENT_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ if (c == '*') {
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ if (c == '/') {
+ the_lexer_state = START_STATE;
+ goto again;
+ }
+ }
+ if (c == '\n')
+ the_lexer_linenumber++;
+ goto again;
+
+ /*
+ * CPP_COMMENT state -- eat a plum
+ */
+
+ case CPP_COMMENT_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ if (c == '\n') {
+ the_lexer_linenumber++;
+ the_lexer_state = START_STATE;
+ goto again;
+ }
+ goto again;
+
+ case STRING_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ switch (c) {
+ case '\\':
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ namebuf[nameidx++] = c;
+ goto again;
+
+ case '"':
+ namebuf[nameidx] = 0;
+ yylval = (YYSTYPE) sxerox (namebuf);
+ the_lexer_state = START_STATE;
+ return (STRING);
+
+ default:
+ if (c == '\n')
+ the_lexer_linenumber++;
+
+ if (nameidx >= (MAXNAME-1)) {
+ fprintf(stderr, "lex input buffer overflow...\n");
+ exit(1);
+ }
+ namebuf[nameidx++] = c;
+ goto again;
+ }
+ break;
+
+ case HELPER_STATE:
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ switch (c) {
+ case '\\':
+ c = getc_char (ifp);
+ if (feof (ifp))
+ return (EOF);
+ namebuf[nameidx] = c;
+ goto again;
+
+ case '@':
+ namebuf[nameidx] = 0;
+ yylval = (YYSTYPE) sxerox (namebuf);
+ the_lexer_state = START_STATE;
+ return (HELPER_STRING);
+
+ default:
+ if (c == '\n')
+ the_lexer_linenumber++;
+
+ /*
+ * CPP makes it approximately impossible to
+ * type "#define FOO 123", so we provide a
+ * lexical trick to achieve that result
+ */
+
+ if (c == '$')
+ c = '#';
+
+ if (nameidx >= (MAXNAME-1)) {
+ fprintf(stderr, "lex input buffer overflow...\n");
+ exit(1);
+ }
+ namebuf[nameidx++] = c;
+ goto again;
+ }
+ break;
+
+ case LINE_PRAGMA_STATE:
+ /* We're only interested in lines of the form # 259 "foo.c" 17 */
+
+ switch (lp_substate) {
+
+ case LP_INITIAL_WHITESPACE: /* no number seen yet */
+ c = getc_char(ifp);
+ if (feof(ifp))
+ return(EOF);
+ if (c >= '0' && c <= '9') {
+ namebuf[nameidx++] = c;
+ lp_substate = LP_LINE_NUMBER;
+ } else if (c == '\n') {
+ goto lp_end_of_line;
+ } else if (c != ' ' && c != '\t') {
+ /* Nothing */
+ } else {
+ lp_substate = LP_OTHER;
+ }
+ goto again;
+
+ case LP_LINE_NUMBER: /* eating linenumber */
+ c = getc_char(ifp);
+ if (feof(ifp))
+ return(EOF);
+ if (c >= '0' && c <= '9') {
+ namebuf[nameidx++] = c;
+ } else if (c == ' ' || c == '\t') {
+ namebuf[nameidx++] = 0;
+ the_lexer_linenumber = atol(namebuf);
+ lp_substate = LP_PRE_FILENAME_WHITESPACE;
+ } else if (c == '\n') {
+ goto lp_end_of_line;
+ } else {
+ lp_substate = LP_OTHER;
+ }
+ goto again;
+
+ case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
+ c = getc_char(ifp);
+ if (feof(ifp))
+ return(EOF);
+
+ if (c == '"') {
+ lp_substate = LP_FILENAME;
+ nameidx = 0;
+ } else if (c == ' ' || c == '\t') {
+ /* nothing */
+ } else if (c == '\n') {
+ goto lp_end_of_line;
+ } else {
+ lp_substate = LP_OTHER;
+ }
+ goto again;
+
+ case LP_FILENAME: /* eating filename */
+ c = getc_char(ifp);
+ if (feof(ifp))
+ return(EOF);
+
+ if (c == '"') {
+ lp_substate = LP_POST_FILENAME;
+ namebuf[nameidx] = 0;
+ } else if (c == '\n') {
+ goto lp_end_of_line; /* syntax error... */
+ } else {
+ namebuf[nameidx++] = c;
+ }
+ goto again;
+
+ case LP_POST_FILENAME: /* ignoring rest of line */
+ case LP_OTHER:
+ c = getc_char(ifp);
+ if (feof(ifp))
+ return(EOF);
+
+ if (c == '\n') {
+ if (lp_substate == LP_POST_FILENAME) {
+ if (current_filename_allocated) {
+ current_filename_allocated = 0;
+ free(current_filename);
+ }
+
+ if (!strcmp(namebuf, "<stdin>")) {
+ current_filename = input_filename;
+ } else {
+ current_filename = sxerox(namebuf);
+ current_filename_allocated = 1;
+ }
+ }
+ lp_end_of_line:
+ the_lexer_state = START_STATE;
+ nameidx = 0;
+ }
+ goto again;
+ }
+ break;
+ }
+ fprintf (stderr, "LEXER BUG!\n");
+ exit (1);
+ /* NOTREACHED */
+ return (0);
+}
+
+/*
+ * Parse a token and side-effect input_crc
+ * in a whitespace- and comment-insensitive fashion.
+ */
+int yylex (void)
+{
+ /*
+ * Accumulate a crc32-based signature while processing the
+ * input file. The goal is to come up with a magic number
+ * which changes precisely when the original input file changes
+ * but which ignores whitespace changes.
+ */
+ unsigned long crc = input_crc;
+ int node_type = yylex_1 ();
+ unsigned long crc2 = message_crc;
+ int use_helper_string = 0;
+ unsigned short code;
+
+ switch (node_type) {
+ case PRIMTYPE:
+ case NAME:
+ case NUMBER:
+ case STRING:
+ case HELPER_STRING:
+ use_helper_string = 1;
+ break;
+
+ /* Other node types have no "substate" */
+ /* This code is written in this curious fashion because we
+ * want the generated CRC to be independent of the particular
+ * values a particular version of lex/bison assigned to various states.
+ */
+
+ case RPAR: code = 258; break;
+ case LPAR: code = 259; break;
+ case SEMI: code = 260; break;
+ case LBRACK: code = 261; break;
+ case RBRACK: code = 262; break;
+ case BARF: code = 265; break;
+ case TPACKED: code = 266; break;
+ case DEFINE: code = 267; break;
+ case LCURLY: code = 268; break;
+ case RCURLY: code = 269; break;
+ case UNION: code = 271; break;
+ case COMMA: code = 273; break;
+ case NOVERSION: code = 274; break;
+ case MANUAL_PRINT: code = 275; break;
+ case MANUAL_ENDIAN: code = 276; break;
+ case TYPEONLY: code = 278; break;
+ case DONT_TRACE: code = 279; break;
+ case AUTOREPLY: code = 280; break;
+
+ case EOF: code = ~0; break; /* hysterical compatibility */
+
+ default:
+ fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
+ node_type);
+ exit(1);
+ }
+
+ if (use_helper_string)
+ {
+ /* We know these types accumulated token text into namebuf */
+ /* HELPER_STRING may still contain C comments. Argh. */
+ crc = crc_eliding_c_comments (namebuf, crc);
+ crc2 = crc_eliding_c_comments (namebuf, crc2);
+ } else
+ {
+ crc = CRC16 (crc, code);
+ crc2 = CRC16 (crc2, code);
+ }
+
+ input_crc = crc;
+ message_crc = crc2;
+ return (node_type);
+}
+
+/*
+ * name_check -- see if the name we just ate
+ * matches a known keyword. If so, set yylval
+ * to a new instance of <subclass of node>, and return PARSER_MACRO
+ *
+ * Otherwise, set yylval to sxerox (s) and return NAME
+ */
+
+static struct keytab {
+ char *name;
+ enum node_subclass subclass_id;
+} keytab [] =
+/* Keep the table sorted, binary search used below! */
+{
+ {"autoreply", NODE_AUTOREPLY},
+ {"define", NODE_DEFINE},
+ {"dont_trace", NODE_DONT_TRACE},
+ {"f64", NODE_F64},
+ {"i16", NODE_I16},
+ {"i32", NODE_I32},
+ {"i64", NODE_I64},
+ {"i8", NODE_I8},
+ {"manual_endian", NODE_MANUAL_ENDIAN},
+ {"manual_print", NODE_MANUAL_PRINT},
+ {"noversion", NODE_NOVERSION},
+ {"packed", NODE_PACKED},
+ {"typeonly", NODE_TYPEONLY},
+ {"u16", NODE_U16},
+ {"u32", NODE_U32},
+ {"u64", NODE_U64},
+ {"u8", NODE_U8},
+ {"union", NODE_UNION},
+ {"uword", NODE_UWORD},
+};
+
+static int name_check (const char *s, YYSTYPE *token_value)
+{
+ enum node_subclass subclass_id;
+ int top, bot, mid;
+ int result;
+
+ for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
+ bot >= top; ) {
+ mid = (top + bot) / 2;
+ result = name_compare (s, keytab[mid].name);
+ if (result < 0)
+ bot = mid - 1;
+ else if (result > 0)
+ top = mid + 1;
+ else {
+ subclass_id = keytab[mid].subclass_id;
+
+ switch (subclass_id) {
+ case NODE_U8:
+ case NODE_U16:
+ case NODE_U32:
+ case NODE_U64:
+ case NODE_I8:
+ case NODE_I16:
+ case NODE_I32:
+ case NODE_I64:
+ case NODE_F64:
+ case NODE_UWORD:
+ *token_value = make_node(subclass_id);
+ return (PRIMTYPE);
+
+ case NODE_PACKED:
+ *token_value = make_node(subclass_id);
+ return (TPACKED);
+
+ case NODE_DEFINE:
+ message_crc = 0;
+ *token_value = make_node(subclass_id);
+ return(DEFINE);
+
+ case NODE_MANUAL_PRINT:
+ *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
+ return (MANUAL_PRINT);
+
+ case NODE_MANUAL_ENDIAN:
+ *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
+ return (MANUAL_ENDIAN);
+
+ case NODE_TYPEONLY:
+ *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
+ return(TYPEONLY);
+
+ case NODE_DONT_TRACE:
+ *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
+ return(DONT_TRACE);
+
+ case NODE_AUTOREPLY:
+ *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY;
+ return(AUTOREPLY);
+
+ case NODE_NOVERSION:
+ return(NOVERSION);
+
+ case NODE_UNION:
+ return(UNION);
+
+ default:
+ fprintf (stderr, "fatal: keytab botch!\n");
+ exit (1);
+ }
+ }
+ }
+ *token_value = (YYSTYPE) sxerox (s);
+ return (NAME);
+}
+
+/*
+ * sxerox
+ */
+
+char *sxerox (const char *s)
+{
+ int len = strlen (s);
+ char *rv;
+
+ rv = (char *) malloc (len+1);
+ if (rv == 0) {
+ fprintf(stderr, "Out of memory...");
+ exit (1);
+ }
+
+ strcpy (rv, s);
+ return (rv);
+}
+
+/*
+ * name_compare
+ */
+
+int name_compare (const char *s1, const char *s2)
+{
+ char c1, c2;
+
+ while (*s1 && *s2) {
+ c1 = *s1++;
+ c2 = *s2++;
+
+ c1 = tolower (c1);
+ c2 = tolower (c2);
+ if (c1 < c2)
+ return (-1);
+ else if (c1 > c2)
+ return (1);
+ }
+ if (*s1 < *s2)
+ return (-1);
+ else if (*s1 > *s2)
+ return (1);
+ return (0);
+}
diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h
new file mode 100644
index 00000000..275cf685
--- /dev/null
+++ b/src/tools/vppapigen/lex.h
@@ -0,0 +1,51 @@
+/*
+ *------------------------------------------------------------------
+ * lex.h - definitions for the api generator's lexical
+ * analyzer.
+ *
+ * Copyright (c) 1996-2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _LEX_H_
+#define _LEX_H_ 1
+
+extern int yylex (void);
+extern void yyerror (char *);
+extern int yyparse (void);
+extern void autoreply (void *);
+
+#ifndef YYSTYPE
+#define YYSTYPE void *
+#endif
+
+#include "tools/vppapigen/gram.h"
+
+enum lex_state {
+ START_STATE = 1,
+ NAME_STATE,
+ NUMBER_STATE,
+ C_COMMENT_STATE,
+ CPP_COMMENT_STATE,
+ STRING_STATE,
+ HELPER_STATE,
+ LINE_PRAGMA_STATE,
+};
+
+#define MAXNAME 64000
+
+extern unsigned long input_crc;
+extern unsigned long message_crc;
+
+#endif /* _LEX_H_ */
diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c
new file mode 100644
index 00000000..1f9905ba
--- /dev/null
+++ b/src/tools/vppapigen/node.c
@@ -0,0 +1,1547 @@
+/*
+ *------------------------------------------------------------------
+ * node.c - the api generator's semantic back-end
+ *
+ * Copyright (c) 2004-2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+
+#include "lex.h"
+#include "node.h"
+
+#define YYSTYPE void *
+
+FILE *ofp;
+FILE *pythonfp;
+FILE *jsonfp;
+time_t starttime;
+char *vlib_app_name;
+char *input_filename;
+node_vft_t *the_vft[NODE_N_TYPES];
+static int indent;
+static int dont_output_version;
+int dump_tree;
+static char *fixed_name;
+static char tmpbuf [MAXNAME];
+static char *current_def_name;
+static char *current_union_name;
+static char *current_type_fmt;
+static char *current_type_cast;
+static char current_id;
+static char current_is_complex;
+static char *current_endianfun;
+static char *current_type_name;
+
+void indent_me(FILE *ofp)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ putc(' ', ofp);
+}
+
+char *uppercase (char *s)
+{
+ char *cp;
+
+ cp = tmpbuf;
+
+ while (*s && (cp < tmpbuf + (sizeof(tmpbuf)-1))) {
+ if (*s >= 'a' && *s <= 'z')
+ *cp++ = *s++ - ('a' - 'A');
+ else
+ *cp++ = *s++;
+ }
+ *cp = 0;
+ return(tmpbuf);
+}
+
+char *lowercase (char *s)
+{
+ char *cp;
+
+ cp = tmpbuf;
+
+ while (*s && (cp < tmpbuf + (sizeof(tmpbuf)-1))) {
+ if (*s >= 'A' && *s <= 'Z')
+ *cp++ = *s++ + ('a' - 'A');
+ else
+ *cp++ = *s++;
+ }
+ *cp = 0;
+ return(tmpbuf);
+}
+
+void primtype_recursive_print(node_t *this, i8 *fmt)
+{
+ fputs((char *)fmt, stdout);
+
+ if (this->deeper) {
+ node_vft_t *vftp = the_vft[this->deeper->type];
+ vftp->print(this->deeper);
+ }
+}
+
+void primtype_recursive_generate(node_t *this, enum passid which, FILE *ofp,
+ i8 *type_name, i8 *type_fmt, i8 *type_cast)
+{
+ node_vft_t *vftp;
+
+ current_type_name = (char *)type_name;
+ current_type_cast = (char *)type_cast;
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fputs((char *)type_name, ofp);
+ fputs(" ", ofp);
+ break;
+
+ case PRINTFUN_PASS:
+ current_type_fmt = (char *)type_fmt;
+ break;
+
+ case ENDIANFUN_PASS:
+ vftp = the_vft[this->type];
+ current_endianfun = vftp->endian_converter;
+ break;
+
+ case PYTHON_PASS:
+ fputs("('", pythonfp);
+ fputs((char *)type_name, pythonfp);
+ fputs("', ", pythonfp);
+ break;
+
+ case JSON_PASS:
+ fputs("[\"", jsonfp);
+ fputs((char *)type_name, jsonfp);
+ fputs("\", ", jsonfp);
+ break;
+
+ default:
+ fprintf(stderr, "primtype_recursive_generate: unimp pass %d\n", which);
+ break;
+ }
+
+ if (this->deeper) {
+ vftp = the_vft[this->deeper->type];
+ vftp->generate(this->deeper, which, ofp);
+ }
+}
+
+void node_illegal_print (node_t *this)
+{
+ fprintf(stderr, "node_illegal_print called\n");
+ exit(0);
+}
+
+void node_illegal_generate (node_t *this, enum passid notused, FILE *ofp)
+{
+ fprintf(stderr, "node_illegal_generate called\n");
+ exit(0);
+}
+
+node_vft_t node_illegal_vft = {
+ node_illegal_print,
+ node_illegal_generate,
+ "illegal"
+};
+
+void node_u8_print (node_t *this)
+{
+ primtype_recursive_print(this, "u8 ");
+}
+
+void node_u8_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "u8", "%u", "(unsigned)");
+}
+
+node_vft_t node_u8_vft = {
+ node_u8_print,
+ node_u8_generate,
+ NULL
+};
+
+void node_u16_print (node_t *this)
+{
+ primtype_recursive_print(this, "u16 ");
+}
+
+void node_u16_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "u16", "%u", "(unsigned)");
+}
+
+node_vft_t node_u16_vft = {
+ node_u16_print,
+ node_u16_generate,
+ "clib_net_to_host_u16"
+};
+
+void node_u32_print (node_t *this)
+{
+ primtype_recursive_print(this, "u32 ");
+}
+
+void node_u32_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "u32", "%u", "(unsigned)");
+}
+
+node_vft_t node_u32_vft = {
+ node_u32_print,
+ node_u32_generate,
+ "clib_net_to_host_u32",
+};
+
+void node_u64_print (node_t *this)
+{
+ primtype_recursive_print(this, "u64 ");
+}
+
+void node_u64_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "u64", "%llu",
+ "(long long)");
+}
+
+node_vft_t node_u64_vft = {
+ node_u64_print,
+ node_u64_generate,
+ "clib_net_to_host_u64"
+};
+
+void node_i8_print (node_t *this)
+{
+ primtype_recursive_print(this, "i8 ");
+}
+
+void node_i8_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "i8", "%d", "(int)");
+}
+
+node_vft_t node_i8_vft = {
+ node_i8_print,
+ node_i8_generate,
+ ""
+};
+
+void node_i16_print (node_t *this)
+{
+ primtype_recursive_print(this, "i16 ");
+}
+
+void node_i16_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "i16", "%d", "(int)");
+}
+
+node_vft_t node_i16_vft = {
+ node_i16_print,
+ node_i16_generate,
+ "clib_net_to_host_u16"
+};
+
+void node_i32_print (node_t *this)
+{
+ primtype_recursive_print(this, "i32 ");
+}
+
+void node_i32_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "i32", "%ld", "(long)");
+}
+
+node_vft_t node_i32_vft = {
+ node_i32_print,
+ node_i32_generate,
+ "clib_net_to_host_u32"
+};
+
+void node_i64_print (node_t *this)
+{
+ primtype_recursive_print(this, "i64 ");
+}
+
+void node_i64_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "i64", "%lld",
+ "(long long)");
+}
+
+node_vft_t node_i64_vft = {
+ node_i64_print,
+ node_i64_generate,
+ "clib_net_to_host_u64"
+};
+
+void node_f64_print (node_t *this)
+{
+ primtype_recursive_print(this, "f64 ");
+}
+
+void node_f64_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "f64", "%.2f",
+ "(double)");
+}
+
+node_vft_t node_f64_vft = {
+ node_f64_print,
+ node_f64_generate,
+ " ", /* FP numbers are sent in host byte order */
+};
+
+
+void node_packed_print (node_t *this)
+{
+ primtype_recursive_print (this, "packed ");
+}
+
+void node_packed_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "PACKED", "", "");
+}
+
+node_vft_t node_packed_vft = {
+ node_packed_print,
+ node_packed_generate,
+ 0,
+};
+
+void node_define_print (node_t *this)
+{
+ fprintf(stdout, "define %s {\n", CDATA0);
+ if (this->deeper) {
+ node_vft_t *vftp = the_vft[this->deeper->type];
+ fprintf(stdout, " ");
+ vftp->print(this->deeper);
+ }
+ fprintf(stdout, "};\n");
+}
+
+void node_define_generate (node_t *this, enum passid which, FILE *fp)
+{
+ node_t *child;
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fprintf(fp, "typedef VL_API_PACKED(struct _vl_api_%s {\n", CDATA0);
+ child = this->deeper;
+ indent += 4;
+ while (child) {
+ node_vft_t *vftp = the_vft[child->type];
+ indent_me(fp);
+ vftp->generate(child, which, fp);
+ child = child->peer;
+ }
+ indent -= 4;
+ fprintf(fp, "}) vl_api_%s_t;\n\n", CDATA0);
+ break;
+
+ case ENDIANFUN_PASS:
+ case PRINTFUN_PASS:
+ child = this->deeper;
+ while (child) {
+ node_vft_t *vftp = the_vft[child->type];
+ vftp->generate(child, which, fp);
+ child = child->peer;
+ }
+ break;
+
+ case PYTHON_PASS:
+ fprintf(fp, "('%s',\n", CDATA0);
+ child = this->deeper;
+ indent += 4;
+ while (child) {
+ node_vft_t *vftp = the_vft[child->type];
+ indent_me(fp);
+ vftp->generate(child, which, fp);
+ child = child->peer;
+ }
+ indent -= 4;
+ fprintf(fp, "),\n\n");
+ break;
+
+ case JSON_PASS:
+ fprintf(fp, "[\"%s\",\n", CDATA0);
+ child = this->deeper;
+ indent += 4;
+ while (child) {
+ node_vft_t *vftp = the_vft[child->type];
+ indent_me(fp);
+ vftp->generate(child, which, fp);
+ child = child->peer;
+ fprintf(fp, ",\n");
+ }
+ indent_me(fp);
+ fprintf (fp, "{\"crc\" : \"0x%08x\"}\n", (u32)(uword)CDATA3);
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "]");
+ break;
+
+ default:
+ fprintf(stderr, "node_define_generate: unimp pass %d\n", which);
+ break;
+ }
+}
+
+node_vft_t node_define_vft = {
+ node_define_print,
+ node_define_generate,
+ 0,
+};
+
+void node_union_print (node_t *this)
+{
+ primtype_recursive_print (this, "union ");
+}
+
+void node_union_generate (node_t *this, enum passid which, FILE *fp)
+{
+ node_t *child;
+ node_t *uelem;
+ int case_id=1;
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fprintf(fp, "u8 _%s_which;\n", CDATA0);
+ indent_me(fp);
+ fprintf(fp, "union _%s {\n", CDATA0);
+ child = this->deeper;
+ indent += 4;
+
+ while (child) {
+ node_vft_t *vftp = the_vft[child->type];
+ indent_me(fp);
+ vftp->generate(child, which, fp);
+ child = child->peer;
+ }
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "} %s;\n", CDATA0);
+ break;
+
+ case PRINTFUN_PASS:
+ case ENDIANFUN_PASS:
+ uelem = this->deeper;
+
+ indent_me(fp);
+ fprintf(fp, "switch(a->_%s_which) {\n",
+ CDATA0);
+ indent += 4;
+ current_union_name = CDATA0;
+
+ /* Walk the list of objects in this union */
+ while (uelem) {
+ node_vft_t *vftp = the_vft[uelem->type];
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "case %d:\n", case_id);
+ case_id++;
+ indent += 4;
+ /* Drill down on each element */
+ vftp->generate(uelem, which, fp);
+ indent_me(fp);
+ fprintf(fp, "break;\n");
+ uelem = uelem->peer;
+ }
+ current_union_name = 0;
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "default:\n");
+ indent += 4;
+ indent_me(fp);
+ if (which == PRINTFUN_PASS) {
+ fprintf(fp,
+ "vl_print(handle, \"WARNING: _%s_which not set.\\n\");\n",
+ CDATA0);
+ }
+ indent_me(fp);
+ fprintf(fp, "break;\n");
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "}\n");
+ break;
+
+ default:
+ fprintf(stderr, "node_union_generate: unimp pass %d\n", which);
+ break;
+ }
+}
+
+
+node_vft_t node_union_vft = {
+ node_union_print,
+ node_union_generate,
+ 0,
+};
+
+void node_scalar_print (node_t *this)
+{
+ fprintf(stdout, "%s", CDATA0);
+ primtype_recursive_print (this, "");
+}
+
+void node_scalar_generate (node_t *this, enum passid which, FILE *fp)
+{
+ char *union_prefix = "";
+
+ if (current_union_name) {
+ sprintf(tmpbuf, "%s.", current_union_name);
+ union_prefix = tmpbuf;
+ }
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fprintf(fp, "%s;\n", CDATA0);
+ break;
+
+ case PRINTFUN_PASS:
+ indent_me(fp);
+ if (current_is_complex) {
+ fprintf(fp, "vl_api_%s_t_print(a->%s%s, handle);\n",
+ current_type_name, union_prefix, CDATA0);
+ } else {
+ if (!strcmp(current_type_fmt, "uword")) {
+ fprintf(fp,
+ "vl_print(handle, \"%s%s: \" _uword_fmt \"\\n\", %s a->%s%s);\n",
+ union_prefix, CDATA0, "(_uword_cast)",
+ union_prefix, CDATA0);
+ } else {
+ fprintf(fp,
+ "vl_print(handle, \"%s%s: %s\\n\", %s a->%s%s);\n",
+ union_prefix, CDATA0,
+ current_type_fmt, current_type_cast,
+ union_prefix, CDATA0);
+ }
+ }
+ break;
+
+ case ENDIANFUN_PASS:
+ indent_me(fp);
+ if (current_is_complex) {
+ fprintf(fp, "vl_api%s_t_endian(a->%s%s);\n",
+ current_type_name, union_prefix, CDATA0);
+ } else {
+ /* Current_endianfun == NULL means e.g. it's a u8... */
+ if (current_endianfun) {
+ fprintf(fp, "a->%s%s = %s(a->%s%s);\n", union_prefix,
+ CDATA0, current_endianfun,
+ union_prefix, CDATA0);
+ } else {
+ fprintf(fp, "/* a->%s%s = a->%s%s (no-op) */\n",
+ union_prefix, CDATA0,
+ union_prefix, CDATA0);
+ }
+ }
+ break;
+ case PYTHON_PASS:
+ fprintf(fp, "'%s'),\n", CDATA0);
+ break;
+
+ case JSON_PASS:
+ fprintf(fp, "\"%s\"]", CDATA0);
+ break;
+
+ default:
+ fprintf(stderr, "node_scalar_generate: unimp pass %d\n", which);
+ }
+ if (this->deeper) {
+ fprintf(stderr, "broken recursion in node_scalar_generate\n");
+ }
+}
+
+
+node_vft_t node_scalar_vft = {
+ node_scalar_print,
+ node_scalar_generate,
+ 0,
+};
+
+void node_vector_print (node_t *this)
+{
+ primtype_recursive_print (this, "vector ");
+}
+
+void node_vector_generate (node_t *this, enum passid which, FILE *fp)
+{
+ char *union_prefix = "";
+
+ if (current_union_name) {
+ sprintf(tmpbuf, "%s.", current_union_name);
+ union_prefix = tmpbuf;
+ }
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fprintf(fp, "%s[%d];\n", CDATA0, IDATA1);
+ break;
+
+ case PRINTFUN_PASS:
+ /* Don't bother about "u8 data [0];" et al. */
+ if (IDATA1 == 0)
+ break;
+
+ indent_me(fp);
+ fprintf(fp, "{\n");
+ indent += 4;
+ indent_me(fp);
+ fprintf(fp, "int _i;\n");
+ indent_me(fp);
+ fprintf(fp, "for (_i = 0; _i < %d; _i++) {\n",
+ IDATA1);
+ indent += 4;
+ indent_me(fp);
+ if (current_is_complex) {
+ fprintf(fp, "vl_print(handle, \"%s%s[%%d]: ",
+ union_prefix, CDATA0);
+ fprintf(fp,
+ "vl_print_%s (handle, a->%s%s[_i]);\n",
+ CDATA0, union_prefix, CDATA0);
+ } else {
+ fprintf(fp,
+ "vl_print(handle, \"%s%s[%%d]: %s\\n\", _i, a->%s%s[_i]);\n",
+ union_prefix, CDATA0,
+ current_type_fmt,
+ union_prefix, CDATA0);
+ }
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "}\n");
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "}\n");
+ break;
+
+ case ENDIANFUN_PASS:
+ /* Don't bother about "u8 data [0];" et al. */
+ if (IDATA1 == 0)
+ break;
+ /* If this is a simple endian swap, but the endian swap method is a no-op,
+ * then indicate this is a no-op in a comment.
+ */
+ if (!current_is_complex && current_endianfun == NULL) {
+ indent_me(fp);
+ fprintf(fp, "/* a->%s%s[0..%d] = a->%s%s[0..%d] (no-op) */\n",
+ union_prefix, CDATA0, IDATA1 - 1,
+ union_prefix, CDATA0, IDATA1 - 1);
+ break;
+ }
+
+ indent_me(fp);
+ fprintf(fp, "{\n");
+ indent += 4;
+ indent_me(fp);
+ fprintf(fp, "int _i;\n");
+ indent_me(fp);
+ fprintf(fp, "for (_i = 0; _i < %d; _i++) {\n",
+ IDATA1);
+ indent += 4;
+ indent_me(fp);
+ if (current_is_complex) {
+ fprintf(fp,
+ "vl_api_%s_t_endian (a->%s%s[_i]);\n",
+ current_type_name, union_prefix, CDATA0);
+ } else {
+ fprintf(fp,
+ "a->%s%s[_i] = %s(a->%s%s[_i]);\n",
+ union_prefix, CDATA0,
+ current_endianfun,
+ union_prefix, CDATA0);
+ }
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "}\n");
+ indent -= 4;
+ indent_me(fp);
+ fprintf(fp, "}\n");
+ break;
+ case PYTHON_PASS:
+ if (CDATA2 != 0) { // variable length vector
+ fprintf(fp, "'%s', '%d', '%s'),\n", CDATA0, IDATA1, CDATA2);
+ } else {
+ fprintf(fp, "'%s', '%d'),\n", CDATA0, IDATA1);
+ }
+ break;
+
+ case JSON_PASS:
+ if (CDATA2 != 0) { /* variable length vector */
+ fprintf(fp, "\"%s\", %d, \"%s\"]", CDATA0, IDATA1, CDATA2);
+ } else {
+ fprintf(fp, "\"%s\", %d]", CDATA0, IDATA1);
+ }
+ break;
+
+ default:
+ fprintf(stderr, "node_vector_generate: unimp pass %d\n", which);
+ }
+ if (this->deeper) {
+ fprintf(stderr, "broken recursion in node_vector_generate\n");
+ }
+}
+
+node_vft_t node_vector_vft = {
+ node_vector_print,
+ node_vector_generate,
+ 0,
+};
+
+void node_complex_print (node_t *this)
+{
+ primtype_recursive_print (this, "complex ");
+}
+
+void node_complex_generate (node_t *this, enum passid which, FILE *fp)
+{
+ node_t *deeper;
+ node_vft_t *vftp;
+ char *member_name = "broken!";
+ char *union_prefix = "";
+
+ if (current_union_name) {
+ sprintf(tmpbuf, "%s.", current_union_name);
+ union_prefix = tmpbuf;
+ }
+
+ current_is_complex++;
+
+ switch(which) {
+ case TYPEDEF_PASS:
+ fprintf(fp, "%s ", CDATA0);
+ deeper = this->deeper;
+ if (deeper) {
+ vftp = the_vft[deeper->type];
+ vftp->generate(deeper, which, fp);
+ }
+ break;
+
+ case PRINTFUN_PASS:
+ deeper = this->deeper;
+ while (deeper) {
+ if (deeper->type == NODE_SCALAR ||
+ deeper->type == NODE_VECTOR) {
+ member_name = deeper->data[0];
+ break;
+ }
+ deeper = deeper->deeper;
+ }
+ indent_me(fp);
+ fprintf(fp, "vl_print(handle, \"%s%s ----- \\n\");\n",
+ union_prefix, member_name);
+ indent_me(fp);
+
+ if (deeper && deeper->type == NODE_VECTOR)
+ fprintf(fp, "%s_print(a->%s%s, handle);\n",
+ CDATA0, union_prefix, member_name);
+ else
+ fprintf(fp, "%s_print(&a->%s%s, handle);\n",
+ CDATA0, union_prefix, member_name);
+
+ indent_me(fp);
+ fprintf(fp, "vl_print(handle, \"%s%s ----- END \\n\");\n",
+ union_prefix, member_name);
+ break;
+
+ case ENDIANFUN_PASS:
+ deeper = this->deeper;
+ while (deeper) {
+ if (deeper->type == NODE_SCALAR ||
+ deeper->type == NODE_VECTOR) {
+ member_name = deeper->data[0];
+ break;
+ }
+ deeper = deeper->deeper;
+ }
+
+ indent_me(fp);
+ if (deeper && deeper->type == NODE_VECTOR)
+ fprintf(fp, "%s_endian(a->%s%s);\n",
+ CDATA0, union_prefix, member_name);
+ else
+ fprintf(fp, "%s_endian(&a->%s%s);\n",
+ CDATA0, union_prefix, member_name);
+ break;
+ case PYTHON_PASS:
+ fprintf(fp, "('%s',", CDATA0);
+ deeper = this->deeper;
+ if (deeper) {
+ vftp = the_vft[deeper->type];
+ vftp->generate(deeper, which, fp);
+ }
+ break;
+
+ case JSON_PASS:
+ fprintf(fp, "[\"%s\", ", CDATA0);
+ deeper = this->deeper;
+ if (deeper) {
+ vftp = the_vft[deeper->type];
+ vftp->generate(deeper, which, fp);
+ }
+ break;
+
+ default:
+ fprintf(stderr, "node_complex_generate unimp pass %d...\n", which);
+ break;
+ }
+ current_is_complex--;
+}
+
+node_vft_t node_complex_vft = {
+ node_complex_print,
+ node_complex_generate,
+ 0,
+};
+
+void node_noversion_print (node_t *this)
+{
+ primtype_recursive_print (this, "noversion ");
+}
+
+void node_noversion_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ fprintf(stderr, "node_noversion_generate called...\n");
+}
+
+node_vft_t node_noversion_vft = {
+ node_noversion_print,
+ node_noversion_generate,
+ 0,
+};
+
+void node_uword_print (node_t *this)
+{
+ primtype_recursive_print(this, "uword ");
+}
+
+void node_uword_generate (node_t *this, enum passid which, FILE *ofp)
+{
+ primtype_recursive_generate(this, which, ofp, "uword", "uword", "");
+}
+
+node_vft_t node_uword_vft = {
+ node_uword_print,
+ node_uword_generate,
+ "clib_net_to_host_uword",
+};
+
+node_vft_t *the_vft[NODE_N_TYPES] = {
+ &node_illegal_vft,
+ &node_u8_vft,
+ &node_u16_vft,
+ &node_u32_vft,
+ &node_u64_vft,
+ &node_i8_vft,
+ &node_i16_vft,
+ &node_i32_vft,
+ &node_i64_vft,
+ &node_f64_vft,
+ &node_packed_vft,
+ &node_define_vft,
+ &node_union_vft,
+ &node_scalar_vft,
+ &node_vector_vft,
+ &node_complex_vft,
+ &node_noversion_vft,
+ &node_uword_vft,
+};
+
+void *make_node (enum node_subclass type)
+{
+ node_t *rv;
+
+ rv = (node_t *) malloc (sizeof (*rv));
+ if (rv == 0) {
+ fprintf (stderr, "fatal: make_node out of memory\n");
+ exit (1);
+ }
+ bzero (rv, sizeof (*rv));
+ rv->type = type;
+ return ((void *) rv);
+}
+
+YYSTYPE deeper (YYSTYPE arg1, YYSTYPE arg2)
+{
+ node_t *np1 = (node_t *) arg1;
+ node_t *np2 = (node_t *) arg2;
+ node_t *hook_point;
+
+ hook_point = np1;
+
+ while (hook_point->deeper)
+ hook_point = hook_point->deeper;
+
+ hook_point->deeper = np2;
+ return (arg1);
+}
+
+YYSTYPE addpeer (YYSTYPE arg1, YYSTYPE arg2)
+{
+ node_t *np1 = (node_t *) arg1;
+ node_t *np2 = (node_t *) arg2;
+ node_t *hook_point;
+
+ hook_point = np1;
+
+ while (hook_point->peer)
+ hook_point = hook_point->peer;
+
+ hook_point->peer = np2;
+ return (arg1);
+}
+
+/*
+ * add_slist (stmt_list, stmt)
+ */
+
+YYSTYPE add_slist (YYSTYPE a1, YYSTYPE a2)
+{
+ if (a1 && a2)
+ return (addpeer(a1, a2));
+ else if(a1)
+ return(a1);
+ else
+ return(a2);
+}
+
+/*
+ * add_define (char *name, defn_list);
+ */
+YYSTYPE add_define (YYSTYPE a1, YYSTYPE a2)
+{
+ node_t *np;
+
+ np = make_node(NODE_DEFINE);
+ np->data[0] = a1;
+ np->data[3] = (void *) message_crc;
+ deeper((YYSTYPE)np, a2);
+ return ((YYSTYPE) np);
+}
+
+/*
+ * add_defbody (defn_list, new_defn)
+ */
+YYSTYPE add_defbody (YYSTYPE a1, YYSTYPE a2)
+{
+ return (addpeer(a1, a2));
+}
+
+/*
+ * add_primtype ([packed], primitive type, instance)
+ */
+
+YYSTYPE add_primtype (YYSTYPE a1, YYSTYPE a2, YYSTYPE a3)
+{
+ /* Hook instance to type node */
+ deeper (a1, a2);
+ if (a3) {
+ deeper(a1, a3);
+ }
+ return (a1);
+}
+
+/*
+ * add_complex(char *type_name, instance)
+ */
+
+YYSTYPE add_complex (YYSTYPE a1, YYSTYPE a2)
+{
+ node_t *np;
+
+ np = make_node(NODE_COMPLEX);
+ np->data[0] = (void *) a1;
+
+ deeper((YYSTYPE)np, a2);
+ return ((YYSTYPE) np);
+}
+
+/*
+ * add_union(char *type_name, definition)
+ */
+
+YYSTYPE add_union (YYSTYPE a1, YYSTYPE a2)
+{
+ node_t *np;
+
+ np = make_node(NODE_UNION);
+ np->data[0] = (void *) a1;
+
+ deeper((YYSTYPE)np, a2);
+ return ((YYSTYPE) np);
+}
+
+
+/*
+ * add_vector_vbl (node_t *variable, YYSTYPE size)
+ */
+
+YYSTYPE add_vector_vbl (YYSTYPE a1, YYSTYPE a2)
+{
+ node_t *np;
+
+ np = make_node(NODE_VECTOR);
+ np->data[0] = (void *) a1;
+ np->data[1] = (void *) a2;
+ return ((YYSTYPE) np);
+}
+
+/*
+ * add_vector_vbl (char *vector_name, char *vector_length_var)
+ */
+
+YYSTYPE add_variable_length_vector_vbl (YYSTYPE vector_name, YYSTYPE vector_length_var)
+{
+ node_t *np;
+
+ np = make_node(NODE_VECTOR);
+ np->data[0] = (void *) vector_name;
+ np->data[1] = (void *) 0; // vector size used for vpe.api.h generation (array of length zero)
+ np->data[2] = (void *) vector_length_var; // name of the variable that stores vector length
+ return ((YYSTYPE) np);
+}
+
+/*
+ * add_scalar_vbl (char *name)
+ */
+YYSTYPE add_scalar_vbl (YYSTYPE a1)
+{
+ node_t *np;
+
+ np = make_node(NODE_SCALAR);
+ np->data[0] = (void *) a1;
+ return ((YYSTYPE) np);
+}
+
+/*
+ * set_flags (int flags, msg(=0?))
+ */
+YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2)
+{
+ node_t *np;
+ int flags;
+
+ np = (node_t *)a2;
+ if (!np)
+ return(0);
+
+ flags = (int)(uword) a1;
+
+ np->flags |= flags;
+
+ /* Generate a foo_reply_t right here */
+ if (flags & NODE_FLAG_AUTOREPLY)
+ autoreply(np);
+
+ return (a2);
+}
+/*
+ * suppress_version
+ */
+YYSTYPE suppress_version (void)
+{
+ dont_output_version = 1;
+ return (0);
+}
+
+void dump(node_t *np)
+{
+ node_vft_t *vftp;
+
+ while (np) {
+ vftp = the_vft[np->type];
+ vftp->print(np);
+ np = np->peer;
+ }
+}
+
+char *fixup_input_filename(void)
+{
+ char *cp;
+
+ cp = (char *)input_filename;
+
+ while (*cp)
+ cp++;
+
+ cp--;
+
+ while (cp > input_filename && *cp != '/')
+ cp--;
+ if (*cp == '/')
+ cp++;
+
+ strncpy (tmpbuf, cp, sizeof(tmpbuf)-1);
+
+ cp = tmpbuf;
+
+ while (*cp)
+ cp++;
+
+ cp--;
+
+ while (cp > tmpbuf && *cp != '.')
+ cp--;
+
+ if (*cp == '.')
+ *cp = 0;
+
+ return (sxerox(tmpbuf));
+}
+
+void generate_top_boilerplate(FILE *fp)
+
+{
+ time_t curtime;
+ char *datestring;
+ char *source_date_epoch;
+ if ((source_date_epoch = getenv("SOURCE_DATE_EPOCH")) == NULL || (curtime = (time_t)strtol(source_date_epoch, NULL, 10)) <= 0)
+ curtime = starttime;
+ datestring = asctime(gmtime(&curtime));
+ fixed_name = fixup_input_filename();
+
+ datestring[24] = 0;
+
+ fprintf (fp, "/*\n");
+ fprintf (fp, " * VLIB API definitions %s\n", datestring);
+ fprintf (fp, " * Input file: %s\n", input_filename);
+ fprintf (fp, " * Automatically generated: please edit the input file ");
+ fprintf (fp, "NOT this file!\n");
+ fprintf (fp, " */\n\n");
+ fprintf (fp, "#if defined(vl_msg_id)||defined(vl_union_id)||");
+ fprintf (fp, "defined(vl_printfun) \\\n ||defined(vl_endianfun)||");
+ fprintf (fp, " defined(vl_api_version)||defined(vl_typedefs) \\\n");
+ fprintf (fp, " ||defined(vl_msg_name)||defined(vl_msg_name_crc_list)\n");
+ fprintf (fp, "/* ok, something was selected */\n");
+ fprintf (fp, "#else\n");
+ fprintf (fp, "#warning no content included from %s\n", input_filename);
+ fprintf (fp, "#endif\n\n");
+ fprintf (fp, "#define VL_API_PACKED(x) x __attribute__ ((packed))\n\n");
+}
+
+void generate_bottom_boilerplate(FILE *fp)
+
+{
+ fprintf (fp, "\n#ifdef vl_api_version\n");
+
+ if (dont_output_version) {
+ fprintf (fp, "/* WARNING: API FILE VERSION CHECK DISABLED */\n");
+ input_crc = 0;
+ }
+
+ fprintf (fp, "vl_api_version(%s, 0x%08x)\n\n",
+ fixed_name, (unsigned int)input_crc);
+ fprintf (fp, "#endif\n\n");
+}
+
+void generate_msg_ids(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+
+ fprintf (fp, "\n/****** Message ID / handler enum ******/\n\n");
+ fprintf (fp, "#ifdef vl_msg_id\n");
+
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_TYPEONLY)) {
+ fprintf (fp, "vl_msg_id(VL_API_%s, vl_api_%s_t_handler)\n",
+ uppercase(np->data[0]), (i8 *)np->data[0]);
+ } else {
+ fprintf (fp, "/* typeonly: %s */\n", (i8 *)np->data[0]);
+ }
+ }
+ np = np->peer;
+ }
+ fprintf (fp, "#endif\n");
+
+}
+
+void generate_msg_names(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+
+ fprintf (fp, "\n/****** Message names ******/\n\n");
+
+ fprintf (fp, "#ifdef vl_msg_name\n");
+
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_TYPEONLY)) {
+ fprintf (fp, "vl_msg_name(vl_api_%s_t, %d)\n",
+ (i8 *) np->data[0],
+ (np->flags & NODE_FLAG_DONT_TRACE ? 0 : 1));
+ } else {
+ fprintf (fp, "/* typeonly: %s */\n", (i8 *)np->data[0]);
+ }
+ }
+ np = np->peer;
+ }
+ fprintf (fp, "#endif\n\n");
+}
+
+void generate_msg_name_crc_list (YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ char *unique_suffix, *cp;
+
+ unique_suffix = sxerox(fixed_name);
+
+ cp = unique_suffix;
+ while (*cp && (*cp != '.'))
+ cp++;
+ if (*cp == '.')
+ *cp = 0;
+
+ fprintf (fp, "\n/****** Message name, crc list ******/\n\n");
+
+ fprintf (fp, "#ifdef vl_msg_name_crc_list\n");
+ fprintf (fp, "#define foreach_vl_msg_name_crc_%s ", unique_suffix);
+
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_TYPEONLY)) {
+ fprintf (fp, "\\\n_(VL_API_%s, %s, %08x) ",
+ uppercase (np->data[0]), (i8 *) np->data[0],
+ (u32)(uword)np->data[3]);
+ }
+ }
+ np = np->peer;
+ }
+ fprintf (fp, "\n#endif\n\n");
+ free (unique_suffix);
+}
+
+void generate_typedefs(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+
+ fprintf(fp, "\n/****** Typedefs *****/\n\n");
+ fprintf(fp, "#ifdef vl_typedefs\n\n");
+
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ /* Yeah, this is pedantic */
+ vftp = the_vft[np->type];
+ vftp->generate(np, TYPEDEF_PASS, fp);
+ }
+ np = np->peer;
+ }
+ fprintf(fp, "#endif /* vl_typedefs */\n\n");
+}
+
+void union_walk_one_defn(node_t *np, FILE *fp)
+{
+ node_t *vblp;
+ node_t *uelem;
+
+ /* Walk the list of typed objects in this msg def */
+ while (np) {
+ if (np->type == NODE_UNION) {
+ current_union_name = np->data[0];
+ uelem = np->deeper;
+
+ /* Walk the list of objects in this union */
+ while (uelem) {
+ vblp = uelem->deeper;
+ /* Drill down on each element, find the variable name */
+ while(vblp) {
+ if (vblp->type == NODE_SCALAR ||
+ vblp->type == NODE_VECTOR ||
+ vblp->type == NODE_COMPLEX) {
+ fprintf(ofp, "#define %s_",
+ uppercase(current_def_name));
+ fprintf(ofp, "%s_", uppercase(current_union_name));
+ fprintf(ofp, "%s %d\n",uppercase(vblp->data[0]),
+ current_id);
+ current_id++;
+ break;
+ }
+ vblp = vblp->deeper;
+ }
+ uelem = uelem->peer;
+ }
+ current_union_name = 0;
+ current_id = 1;
+ }
+ np = np->peer;
+ }
+}
+
+void generate_uniondefs(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+
+ fprintf(fp, "/****** Discriminated Union Definitions *****/\n\n");
+ fprintf(fp, "#ifdef vl_union_id\n\n");
+
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ current_id = 1;
+ current_def_name = np->data[0];
+ union_walk_one_defn(np->deeper, fp);
+ }
+ np = np->peer;
+ }
+ fprintf(fp, "\n#endif /* vl_union_id */\n\n");
+}
+
+void generate_printfun(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+
+ fprintf(fp, "/****** Print functions *****/\n\n");
+ fprintf(fp, "#ifdef vl_printfun\n\n");
+
+ fprintf(fp, "#ifdef LP64\n");
+ fputs ("#define _uword_fmt \"%lld\"\n", fp);
+ fputs ("#define _uword_cast (long long)\n", fp);
+ fprintf(fp, "#else\n");
+ fputs("#define _uword_fmt \"%ld\"\n", fp);
+ fputs ("#define _uword_cast long\n", fp);
+ fprintf(fp, "#endif\n\n");
+
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_MANUAL_PRINT)) {
+ fprintf(fp,
+ "static inline void *vl_api_%s_t_print (vl_api_%s_t *a,",
+ (i8 *)np->data[0], (i8 *) np->data[0]);
+ fprintf(fp, "void *handle)\n{\n");
+ /* output the message name */
+ fprintf(fp,
+ " vl_print(handle, \"vl_api_%s_t:\\n\");\n",
+ (i8 *)np->data[0]);
+
+ indent += 4;
+ /* Yeah, this is pedantic */
+ vftp = the_vft[np->type];
+ vftp->generate(np, PRINTFUN_PASS, fp);
+ fprintf(fp, " return handle;\n");
+ fprintf(fp, "}\n\n");
+ indent -= 4;
+ } else {
+ fprintf(fp, "/***** manual: vl_api_%s_t_print *****/\n\n",
+ (i8 *) np->data[0]);
+ }
+ }
+ np = np->peer;
+ }
+ fprintf(fp, "#endif /* vl_printfun */\n\n");
+}
+
+void generate_endianfun(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+
+ fprintf(fp, "\n/****** Endian swap functions *****/\n\n");
+ fprintf(fp, "#ifdef vl_endianfun\n\n");
+ fprintf(fp, "#undef clib_net_to_host_uword\n");
+ fprintf(fp, "#ifdef LP64\n");
+ fprintf(fp, "#define clib_net_to_host_uword clib_net_to_host_u64\n");
+ fprintf(fp, "#else\n");
+ fprintf(fp, "#define clib_net_to_host_uword clib_net_to_host_u32\n");
+ fprintf(fp, "#endif\n\n");
+
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_MANUAL_ENDIAN)) {
+ fprintf(fp,
+ "static inline void vl_api_%s_t_endian (vl_api_%s_t *a)\n{\n",
+ (i8 *) np->data[0], (i8 *) np->data[0]);
+ indent += 4;
+ /* Yeah, this is pedantic */
+ vftp = the_vft[np->type];
+ vftp->generate(np, ENDIANFUN_PASS, fp);
+ fprintf(fp, "}\n\n");
+ indent -= 4;
+ } else {
+ fprintf(fp, "/***** manual: vl_api_%s_t_endian *****/\n\n",
+ (i8 *) np->data[0]);
+ }
+ }
+ np = np->peer;
+ }
+ fprintf(fp, "#endif /* vl_endianfun */\n\n");
+}
+
+void add_msg_ids(YYSTYPE a1)
+{
+ node_t *np = (node_t *)a1;
+ node_t *new_u16;
+ node_t *new_vbl;
+
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE) {
+ if (!(np->flags & NODE_FLAG_TYPEONLY)) {
+ /* add the parse tree for "u16 _vl_msg_id" */
+ new_u16 = make_node(NODE_U16);
+ new_u16->peer = np->deeper;
+ np->deeper = new_u16;
+ new_vbl = make_node(NODE_SCALAR);
+ new_vbl->data[0] = sxerox("_vl_msg_id");
+ new_u16->deeper = new_vbl;
+ }
+ }
+ np = np->peer;
+ }
+}
+
+void generate_python_msg_definitions(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+ fprintf (fp, "messages = [\n");
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE && !(np->flags & NODE_FLAG_TYPEONLY)) {
+ /* Yeah, this is pedantic */
+ vftp = the_vft[np->type];
+ vftp->generate(np, PYTHON_PASS, fp);
+ }
+ np = np->peer;
+ }
+ fprintf (fp, "\n]\n");
+}
+
+static bool
+is_typeonly_check(node_t *np, bool typeonly)
+{
+ bool is_typeonly = (np->flags & NODE_FLAG_TYPEONLY);
+ return (is_typeonly == typeonly);
+}
+
+static void
+generate_json_definitions(YYSTYPE a1, FILE *fp, bool typeonly)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+ indent_me(fp);
+ if (typeonly)
+ fprintf (fp, "\"types\" : [\n");
+ else
+ fprintf (fp, "\"messages\" : [\n");
+
+ /* Walk the top-level node-list */
+ bool comma = false;
+ indent += 4;
+ while (np) {
+ if (np->type == NODE_DEFINE && is_typeonly_check(np, typeonly)) {
+ /* Yeah, this is pedantic */
+ vftp = the_vft[np->type];
+ indent_me(fp);
+ vftp->generate(np, JSON_PASS, fp);
+ comma = true;
+ }
+ np = np->peer;
+ if (comma && np &&
+ np->type == NODE_DEFINE && is_typeonly_check(np, typeonly))
+ fprintf (fp, ",\n");
+
+ }
+ indent -= 4;
+ fprintf (fp, "\n");
+ indent_me(fp);
+ fprintf(fp, "]");
+}
+
+void generate_python_typeonly_definitions(YYSTYPE a1, FILE *fp)
+{
+ node_t *np = (node_t *)a1;
+ node_vft_t *vftp;
+ fprintf (fp, "types = [\n");
+ /* Walk the top-level node-list */
+ while (np) {
+ if (np->type == NODE_DEFINE && (np->flags & NODE_FLAG_TYPEONLY)) {
+ vftp = the_vft[np->type];
+ vftp->generate(np, PYTHON_PASS, fp);
+ }
+ np = np->peer;
+ }
+ fprintf (fp, "\n]\n");
+}
+
+void generate_python(YYSTYPE a1, FILE *fp)
+{
+ generate_python_typeonly_definitions(a1, fp);
+ generate_python_msg_definitions(a1, fp);
+
+ /*
+ * API CRC signature
+ */
+ fprintf (fp, "vl_api_version = 0x%08x\n\n", (unsigned int)input_crc);
+}
+
+void generate_json(YYSTYPE a1, FILE *fp)
+{
+ fprintf (fp, "{\n");
+ indent += 4;
+ generate_json_definitions(a1, fp, true);
+ fprintf (fp, ",\n");
+ generate_json_definitions(a1, fp, false);
+
+ /*
+ * API CRC signature
+ */
+ fprintf (fp, ",\n\"vl_api_version\" :\"0x%08x\"\n",
+ (unsigned int)input_crc);
+ fprintf (fp, "}\n");
+}
+
+void generate(YYSTYPE a1)
+{
+ if (dump_tree) {
+ dump((node_t *)a1);
+ }
+
+ add_msg_ids(a1);
+
+ if (ofp) {
+ generate_top_boilerplate(ofp);
+
+ generate_msg_ids(a1, ofp);
+ generate_msg_names(a1, ofp);
+ generate_msg_name_crc_list(a1, ofp);
+ generate_typedefs(a1, ofp);
+ generate_uniondefs(a1, ofp);
+ generate_printfun(a1, ofp);
+ generate_endianfun(a1, ofp);
+
+ generate_bottom_boilerplate(ofp);
+ }
+ if (pythonfp) {
+ generate_python(a1, pythonfp);
+ }
+ if (jsonfp) {
+ generate_json(a1, jsonfp);
+ }
+}
diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h
new file mode 100644
index 00000000..65bd5d10
--- /dev/null
+++ b/src/tools/vppapigen/node.h
@@ -0,0 +1,96 @@
+/*
+ *------------------------------------------------------------------
+ * node.h - definitions for an API generator
+ *
+ * Copyright (c) 2004-2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _node_h_
+#define _node_h_
+
+/*
+ * Global prototypes
+ */
+
+char *sxerox (const char *s);
+
+enum node_subclass { /* WARNING: indices must match the vft... */
+ NODE_ILLEGAL=0,
+ NODE_U8,
+ NODE_U16,
+ NODE_U32,
+ NODE_U64,
+ NODE_I8,
+ NODE_I16,
+ NODE_I32,
+ NODE_I64,
+ NODE_F64,
+ NODE_PACKED,
+ NODE_DEFINE,
+ NODE_UNION,
+ NODE_SCALAR,
+ NODE_VECTOR,
+ NODE_COMPLEX,
+ NODE_NOVERSION,
+ NODE_UWORD,
+ NODE_N_TYPES, /* number of node types with VFT's */
+
+ /* pseudo-node(s) used in the lexer keyword table, but
+ NOT in need of a VFT... */
+ NODE_TYPEONLY,
+ NODE_MANUAL_PRINT,
+ NODE_MANUAL_ENDIAN,
+ NODE_DONT_TRACE,
+ NODE_AUTOREPLY,
+};
+
+enum passid {
+ TYPEDEF_PASS=1,
+ UNION_DEF_PASS,
+ ENDIANFUN_PASS,
+ PRINTFUN_PASS,
+ PYTHON_PASS,
+ JSON_PASS,
+};
+
+extern void *make_node (enum node_subclass type);
+
+typedef struct node_ {
+ enum node_subclass type;
+ struct node_ *peer;
+ struct node_ *deeper;
+ int flags;
+ void *data[4];
+} node_t;
+
+/* To shut up gcc-4.2.x warnings */
+#define CDATA0 ((char *)(this->data[0]))
+#define IDATA1 ((int)(uword)(this->data[1]))
+#define CDATA2 ((char *)(this->data[2]))
+#define CDATA3 ((char *)(this->data[3]))
+
+#define NODE_FLAG_MANUAL_PRINT (1<<0)
+#define NODE_FLAG_MANUAL_ENDIAN (1<<1)
+#define NODE_FLAG_TYPEONLY (1<<3)
+#define NODE_FLAG_DONT_TRACE (1<<4)
+#define NODE_FLAG_AUTOREPLY (1<<5)
+
+typedef struct node_vft_ {
+ void (*print)(struct node_ *);
+ void (*generate)(struct node_ *, enum passid id, FILE *ofp);
+ char *endian_converter;
+} node_vft_t;
+
+#endif /* _node_h */
diff --git a/src/uri.am b/src/uri.am
new file mode 100644
index 00000000..660f897d
--- /dev/null
+++ b/src/uri.am
@@ -0,0 +1,67 @@
+# Copyright (c) 2017 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lib_LTLIBRARIES += libvppcom.la
+
+libvppcom_la_SOURCES =
+libvppcom_la_DEPENDENCIES = \
+ libvppinfra.la \
+ libvlib.la \
+ libsvmdb.la \
+ libsvm.la \
+ libvlibmemory.la \
+ libvlibmemoryclient.la
+
+libvppcom_la_LIBADD = $(libvppcom_la_DEPENDENCIES) -lpthread
+
+libvppcom_la_SOURCES += \
+ uri/vppcom.c
+
+nobase_include_HEADERS += \
+ uri/vppcom.h
+
+noinst_PROGRAMS += \
+ uri_udp_test \
+ uri_tcp_test \
+ uri_socket_test \
+ uri_socket_server \
+ vcl_test_server \
+ vcl_test_client \
+ sock_test_server \
+ sock_test_client
+
+uri_udp_test_SOURCES = uri/uri_udp_test.c
+uri_udp_test_LDADD = libvlibmemoryclient.la libsvm.la \
+ libvppinfra.la -lpthread -lm -lrt
+
+uri_tcp_test_SOURCES = uri/uri_tcp_test.c
+uri_tcp_test_LDADD = libvlibmemoryclient.la libsvm.la \
+ libvppinfra.la -lpthread -lm -lrt
+
+uri_socket_test_SOURCES = uri/uri_socket_test.c
+uri_socket_test_LDADD = libvppinfra.la -lpthread -lm -lrt
+
+uri_socket_server_SOURCES = uri/uri_socket_server.c
+uri_socket_server_LDADD = libvppinfra.la -lpthread -lm -lrt
+
+vcl_test_server_SOURCES = uri/vcl_test_server.c
+vcl_test_server_LDADD = libvppcom.la
+
+vcl_test_client_SOURCES = uri/vcl_test_client.c
+vcl_test_client_LDADD = libvppcom.la
+
+sock_test_server_SOURCES = uri/sock_test_server.c
+sock_test_client_SOURCES = uri/sock_test_client.c
+
+nobase_include_HEADERS += \
+ uri/sock_test.h
diff --git a/src/uri/sock_test.h b/src/uri/sock_test.h
new file mode 100644
index 00000000..281ba6fd
--- /dev/null
+++ b/src/uri/sock_test.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __sock_test_h__
+#define __sock_test_h__
+
+#include <netdb.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define SOCK_TEST_TOKEN_HELP "#H"
+#define SOCK_TEST_TOKEN_EXIT "#X"
+#define SOCK_TEST_TOKEN_VERBOSE "#V"
+#define SOCK_TEST_TOKEN_TXBUF_SIZE "#T:"
+#define SOCK_TEST_TOKEN_NUM_TEST_SCKTS "#I:"
+#define SOCK_TEST_TOKEN_NUM_WRITES "#N:"
+#define SOCK_TEST_TOKEN_RXBUF_SIZE "#R:"
+#define SOCK_TEST_TOKEN_SHOW_CFG "#C"
+#define SOCK_TEST_TOKEN_RUN_UNI "#U"
+#define SOCK_TEST_TOKEN_RUN_BI "#B"
+
+#define SOCK_TEST_BANNER_STRING \
+ "============================================\n"
+#define SOCK_TEST_SEPARATOR_STRING \
+ " -----------------------------\n"
+
+#define ONE_GIG (1024*1024*1024)
+#define SOCK_TEST_SERVER_PORT 22000
+#define SOCK_TEST_LOCALHOST_IPADDR "127.0.0.1"
+
+#define SOCK_TEST_CFG_CTRL_MAGIC 0xfeedface
+#define SOCK_TEST_CFG_NUM_WRITES_DEF 1000000
+#define SOCK_TEST_CFG_TXBUF_SIZE_DEF 8192
+#define SOCK_TEST_CFG_RXBUF_SIZE_DEF (64*SOCK_TEST_CFG_TXBUF_SIZE_DEF)
+#define SOCK_TEST_CFG_BUF_SIZE_MIN 128
+#define SOCK_TEST_CFG_MAX_TEST_SCKTS 5
+
+typedef enum
+{
+ SOCK_TEST_TYPE_NONE,
+ SOCK_TEST_TYPE_ECHO,
+ SOCK_TEST_TYPE_UNI,
+ SOCK_TEST_TYPE_BI,
+ SOCK_TEST_TYPE_EXIT,
+} sock_test_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint32_t magic;
+ uint32_t test;
+ uint32_t ctrl_handle;
+ uint32_t num_test_sockets;
+ uint32_t verbose;
+ uint64_t rxbuf_size;
+ uint64_t txbuf_size;
+ uint64_t num_writes;
+ uint64_t total_bytes;
+} sock_test_cfg_t;
+
+typedef struct
+{
+ uint64_t rx_xacts;
+ uint64_t rx_bytes;
+ uint32_t rx_eagain;
+ uint32_t rx_incomp;
+ uint64_t tx_xacts;
+ uint64_t tx_bytes;
+ uint32_t tx_eagain;
+ uint32_t tx_incomp;
+ struct timespec start;
+ struct timespec stop;
+} sock_test_stats_t;
+
+typedef struct
+{
+ int fd;
+ uint32_t txbuf_size;
+ char *txbuf;
+ uint32_t rxbuf_size;
+ char *rxbuf;
+ sock_test_cfg_t cfg;
+ sock_test_stats_t stats;
+} sock_test_socket_t;
+
+static inline void
+sock_test_stats_accumulate (sock_test_stats_t * accum,
+ sock_test_stats_t * incr)
+{
+ accum->rx_xacts += incr->rx_xacts;
+ accum->rx_bytes += incr->rx_bytes;
+ accum->rx_eagain += incr->rx_eagain;
+ accum->rx_incomp += incr->rx_incomp;
+ accum->tx_xacts += incr->tx_xacts;
+ accum->tx_bytes += incr->tx_bytes;
+ accum->tx_eagain += incr->tx_eagain;
+ accum->tx_incomp += incr->tx_incomp;
+}
+
+static inline void
+sock_test_cfg_init (sock_test_cfg_t *cfg)
+{
+ cfg->magic = SOCK_TEST_CFG_CTRL_MAGIC;
+ cfg->test = SOCK_TEST_TYPE_NONE;
+ cfg->ctrl_handle = ~0;
+ cfg->num_test_sockets = 1;
+ cfg->verbose = 0;
+ cfg->rxbuf_size = SOCK_TEST_CFG_RXBUF_SIZE_DEF;
+ cfg->num_writes = SOCK_TEST_CFG_NUM_WRITES_DEF;
+ cfg->txbuf_size = SOCK_TEST_CFG_TXBUF_SIZE_DEF;
+ cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
+}
+
+static inline int
+sock_test_cfg_verify (sock_test_cfg_t *cfg, sock_test_cfg_t *valid_cfg)
+{
+ /* Note: txbuf & rxbuf on server are the same buffer,
+ * so txbuf_size is not included in this check.
+ */
+ return ((cfg->magic == valid_cfg->magic)
+ && (cfg->test == valid_cfg->test)
+ && (cfg->verbose == valid_cfg->verbose)
+ && (cfg->rxbuf_size == valid_cfg->rxbuf_size)
+ && (cfg->num_writes == valid_cfg->num_writes)
+ && (cfg->total_bytes == valid_cfg->total_bytes));
+}
+
+static inline void
+sock_test_buf_alloc (sock_test_cfg_t *cfg, uint8_t is_rxbuf, uint8_t **buf,
+ uint32_t *bufsize)
+{
+ uint32_t alloc_size = is_rxbuf ? cfg->rxbuf_size : cfg->txbuf_size;
+ uint8_t *lb = realloc (*buf, (size_t) alloc_size);
+
+ if (lb)
+ {
+ if (is_rxbuf)
+ cfg->rxbuf_size = *bufsize = alloc_size;
+ else
+ cfg->txbuf_size = *bufsize = alloc_size;
+
+ *buf = lb;
+ }
+ else
+ {
+ int errno_val = errno;
+ perror ("ERROR in sock_test_buf_alloc()");
+ fprintf (stderr, "ERROR: Buffer allocation failed (errno = %d)!\n"
+ " Using buffer size %d instead of desired"
+ " size (%d)\n", errno_val, *bufsize, alloc_size);
+ }
+}
+
+static inline void
+sock_test_socket_buf_alloc (sock_test_socket_t *socket)
+{
+ socket->rxbuf_size = socket->cfg.rxbuf_size;
+ socket->txbuf_size = socket->cfg.txbuf_size;
+ sock_test_buf_alloc (&socket->cfg, 0 /* is_rxbuf */ ,
+ (uint8_t **) &socket->txbuf, &socket->txbuf_size);
+ sock_test_buf_alloc (&socket->cfg, 1 /* is_rxbuf */ ,
+ (uint8_t **) &socket->rxbuf, &socket->rxbuf_size);
+}
+
+static inline char *
+sock_test_type_str (sock_test_t t)
+{
+ switch (t)
+ {
+ case SOCK_TEST_TYPE_NONE:
+ return "NONE";
+
+ case SOCK_TEST_TYPE_ECHO:
+ return "ECHO";
+
+ case SOCK_TEST_TYPE_UNI:
+ return "UNI";
+
+ case SOCK_TEST_TYPE_BI:
+ return "BI";
+
+ case SOCK_TEST_TYPE_EXIT:
+ return "EXIT";
+
+ default:
+ return "Unknown";
+ }
+}
+
+static inline void
+sock_test_cfg_dump (sock_test_cfg_t * cfg, uint8_t is_client)
+{
+ char *spc = " ";
+
+ printf (" test config (%p):\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " magic: 0x%08x\n"
+ "%-5s test: %s (%d)\n"
+ " ctrl handle: %d (0x%x)\n"
+ "%-5s num test sockets: %u (0x%08x)\n"
+ "%-5s verbose: %s (%d)\n"
+ "%-5s rxbuf size: %lu (0x%08lx)\n"
+ "%-5s txbuf size: %lu (0x%08lx)\n"
+ "%-5s num writes: %lu (0x%08lx)\n"
+ " client tx bytes: %lu (0x%08lx)\n"
+ SOCK_TEST_SEPARATOR_STRING,
+ (void *) cfg, cfg->magic,
+ is_client && (cfg->test == SOCK_TEST_TYPE_UNI) ?
+ "'"SOCK_TEST_TOKEN_RUN_UNI"'" :
+ is_client && (cfg->test == SOCK_TEST_TYPE_BI) ?
+ "'"SOCK_TEST_TOKEN_RUN_BI"'" : spc,
+ sock_test_type_str (cfg->test), cfg->test,
+ cfg->ctrl_handle, cfg->ctrl_handle,
+ is_client ? "'"SOCK_TEST_TOKEN_NUM_TEST_SCKTS"'" : spc,
+ cfg->num_test_sockets, cfg->num_test_sockets,
+ is_client ? "'"SOCK_TEST_TOKEN_VERBOSE"'" : spc,
+ cfg->verbose ? "on" : "off", cfg->verbose,
+ is_client ? "'"SOCK_TEST_TOKEN_RXBUF_SIZE"'" : spc,
+ cfg->rxbuf_size, cfg->rxbuf_size,
+ is_client ? "'"SOCK_TEST_TOKEN_TXBUF_SIZE"'" : spc,
+ cfg->txbuf_size, cfg->txbuf_size,
+ is_client ? "'"SOCK_TEST_TOKEN_NUM_WRITES"'" : spc,
+ cfg->num_writes, cfg->num_writes,
+ cfg->total_bytes, cfg->total_bytes);
+}
+
+static inline void
+sock_test_stats_dump (char * header, sock_test_stats_t * stats,
+ uint8_t show_rx, uint8_t show_tx,
+ uint8_t verbose)
+{
+ struct timespec diff;
+ double duration, rate;
+ uint64_t total_bytes;
+
+ if ((stats->stop.tv_nsec - stats->start.tv_nsec) < 0)
+ {
+ diff.tv_sec = stats->stop.tv_sec - stats->start.tv_sec - 1;
+ diff.tv_nsec = stats->stop.tv_nsec - stats->start.tv_nsec + 1000000000;
+ }
+ else
+ {
+ diff.tv_sec = stats->stop.tv_sec - stats->start.tv_sec;
+ diff.tv_nsec = stats->stop.tv_nsec - stats->start.tv_nsec;
+ }
+ duration = (double) diff.tv_sec + (1e-9 * diff.tv_nsec);
+
+ total_bytes = stats->tx_bytes + stats->rx_bytes;
+ rate = (double) total_bytes * 8 / duration / ONE_GIG;
+ printf ("\n%s: Streamed %lu bytes\n"
+ " in %lf seconds (%lf Gbps %s-duplex)!\n",
+ header, total_bytes, duration, rate,
+ (show_rx && show_tx) ? "full" : "half");
+
+ if (show_tx)
+ {
+ printf (SOCK_TEST_SEPARATOR_STRING
+ " tx stats (0x%p):\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " writes: %lu (0x%08lx)\n"
+ " tx bytes: %lu (0x%08lx)\n"
+ " tx eagain: %u (0x%08x)\n"
+ " tx incomplete: %u (0x%08x)\n",
+ (void *)stats, stats->tx_xacts, stats->tx_xacts,
+ stats->tx_bytes, stats->tx_bytes,
+ stats->tx_eagain, stats->tx_eagain,
+ stats->tx_incomp, stats->tx_incomp);
+ }
+ if (show_rx)
+ {
+ printf (SOCK_TEST_SEPARATOR_STRING
+ " rx stats (0x%p):\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " reads: %lu (0x%08lx)\n"
+ " rx bytes: %lu (0x%08lx)\n"
+ " rx eagain: %u (0x%08x)\n"
+ " rx incomplete: %u (0x%08x)\n",
+ (void *)stats, stats->rx_xacts, stats->rx_xacts,
+ stats->rx_bytes, stats->rx_bytes,
+ stats->rx_eagain, stats->rx_eagain,
+ stats->rx_incomp, stats->rx_incomp);
+ }
+ if (verbose)
+ printf (" start.tv_sec: %ld\n"
+ " start.tv_nsec: %ld\n"
+ " stop.tv_sec: %ld\n"
+ " stop.tv_nsec: %ld\n",
+ stats->start.tv_sec, stats->start.tv_nsec,
+ stats->stop.tv_sec, stats->stop.tv_nsec);
+
+ printf (SOCK_TEST_SEPARATOR_STRING);
+}
+
+static inline int
+sock_test_read (int fd, uint8_t *buf, uint32_t nbytes,
+ sock_test_stats_t *stats)
+{
+ int rx_bytes, errno_val;
+
+ do
+ {
+ if (stats)
+ stats->rx_xacts++;
+#ifdef VCL_TEST
+ rx_bytes = vppcom_session_read (fd, buf, nbytes);
+
+ if (rx_bytes < 0)
+ {
+ errno = -rx_bytes;
+ rx_bytes = -1;
+ }
+#else
+ rx_bytes = read (fd, buf, nbytes);
+#endif
+ if (stats)
+ {
+ if ((rx_bytes == 0) ||
+ ((rx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK))))
+ stats->rx_eagain++;
+ else if (rx_bytes < nbytes)
+ stats->rx_incomp++;
+ }
+ }
+ while ((rx_bytes == 0) ||
+ ((rx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK))));
+
+ if (rx_bytes < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in sock_test_read()");
+ fprintf (stderr, "ERROR: socket read failed (errno = %d)!\n",
+ errno_val);
+ errno = errno_val;
+ }
+ else if (stats)
+ stats->rx_bytes += rx_bytes;
+
+ return (rx_bytes);
+}
+
+static inline int
+sock_test_write (int fd, uint8_t *buf, uint32_t nbytes,
+ sock_test_stats_t *stats, uint32_t verbose)
+{
+ int tx_bytes = 0;
+ int nbytes_left = nbytes;
+ int rv, errno_val;
+
+ do
+ {
+ if (stats)
+ stats->tx_xacts++;
+#ifdef VCL_TEST
+ rv = vppcom_session_write (fd, buf, nbytes_left);
+ if (rv < 0)
+ {
+ errno = -rv;
+ rv = -1;
+ }
+#else
+ rv = write (fd, buf, nbytes_left);
+#endif
+ if (rv < 0)
+ {
+ if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
+ {
+ if (stats)
+ stats->tx_eagain++;
+ continue;
+ }
+ else
+ break;
+ }
+ tx_bytes += rv;
+
+ if (tx_bytes != nbytes)
+ {
+ nbytes_left = nbytes_left - rv;
+ if (stats)
+ stats->tx_incomp++;
+ if (verbose)
+ {
+ printf ("WARNING: bytes written (%d) != bytes to write (%d)!\n",
+ tx_bytes, nbytes);
+ }
+ }
+
+ } while (tx_bytes != nbytes);
+
+ if (tx_bytes < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in sock_test_write()");
+ fprintf (stderr, "ERROR: socket write failed (errno = %d)!\n",
+ errno_val);
+ }
+ else if (stats)
+ stats->tx_bytes += tx_bytes;
+
+ return (tx_bytes);
+}
+
+#endif /* __sock_test_h__ */
diff --git a/src/uri/sock_test_client.c b/src/uri/sock_test_client.c
new file mode 100644
index 00000000..151c90b2
--- /dev/null
+++ b/src/uri/sock_test_client.c
@@ -0,0 +1,1076 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdio.h>
+#include <time.h>
+#include <arpa/inet.h>
+#include <uri/sock_test.h>
+
+typedef struct
+{
+#ifdef VCL_TEST
+ vppcom_endpt_t server_endpt;
+#endif
+ struct sockaddr_in server_addr;
+ sock_test_socket_t ctrl_socket;
+ sock_test_socket_t *test_socket;
+ uint32_t num_test_sockets;
+ uint8_t dump_cfg;
+} sock_client_main_t;
+
+sock_client_main_t sock_client_main;
+
+
+static int
+sock_test_cfg_sync (sock_test_socket_t * socket)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_cfg_t *rl_cfg = (sock_test_cfg_t *) socket->rxbuf;
+ int rx_bytes, tx_bytes;
+
+ if (socket->cfg.verbose)
+ sock_test_cfg_dump (&socket->cfg, 1 /* is_client */ );
+
+ tx_bytes = sock_test_write (socket->fd, (uint8_t *) & ctrl->cfg,
+ sizeof (ctrl->cfg), NULL, ctrl->cfg.verbose);
+ if (tx_bytes < 0)
+ {
+ fprintf (stderr, "ERROR: write test cfg failed (%d)!\n", tx_bytes);
+ return tx_bytes;
+ }
+
+ rx_bytes = sock_test_read (socket->fd, (uint8_t *) socket->rxbuf,
+ sizeof (sock_test_cfg_t), NULL);
+ if (rx_bytes < 0)
+ return rx_bytes;
+
+ if (rl_cfg->magic != SOCK_TEST_CFG_CTRL_MAGIC)
+ {
+ fprintf (stderr, "ERROR: Bad server reply cfg -- aborting!\n");
+ return -1;
+ }
+ if (socket->cfg.verbose)
+ {
+ printf ("CLIENT (fd %d): Got config back from server.\n", socket->fd);
+ sock_test_cfg_dump (rl_cfg, 1 /* is_client */ );
+ }
+ if ((rx_bytes != sizeof (sock_test_cfg_t))
+ || !sock_test_cfg_verify (rl_cfg, &ctrl->cfg))
+ {
+ fprintf (stderr,
+ "ERROR: Invalid config received from server -- aborting!\n");
+ sock_test_cfg_dump (rl_cfg, 1 /* is_client */ );
+ return -1;
+ }
+ ctrl->cfg.ctrl_handle = ((ctrl->cfg.ctrl_handle == ~0) ?
+ rl_cfg->ctrl_handle : ctrl->cfg.ctrl_handle);
+
+ return 0;
+}
+
+static void
+echo_test_client ()
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_socket_t *tsock;
+ int rx_bytes, tx_bytes, nbytes;
+ uint32_t i, n;
+ int rv;
+ int nfds = 0;
+ fd_set wr_fdset, rd_fdset;
+ fd_set _wfdset, *wfdset = &_wfdset;
+ fd_set _rfdset, *rfdset = &_rfdset;
+
+ FD_ZERO (&wr_fdset);
+ FD_ZERO (&rd_fdset);
+ memset (&ctrl->stats, 0, sizeof (ctrl->stats));
+ ctrl->cfg.total_bytes = nbytes = strlen (ctrl->txbuf) + 1;
+ for (n = 0; n != ctrl->cfg.num_test_sockets; n++)
+ {
+ tsock = &scm->test_socket[n];
+ tsock->cfg = ctrl->cfg;
+ sock_test_socket_buf_alloc (tsock);
+ sock_test_cfg_sync (tsock);
+
+ memcpy (tsock->txbuf, ctrl->txbuf, nbytes);
+ memset (&tsock->stats, 0, sizeof (tsock->stats));
+
+ FD_SET (tsock->fd, &wr_fdset);
+ FD_SET (tsock->fd, &rd_fdset);
+ nfds = ((tsock->fd + 1) > nfds) ? (tsock->fd + 1) : nfds;
+ }
+
+ nfds++;
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ while (n)
+ {
+ _wfdset = wr_fdset;
+ _rfdset = rd_fdset;
+
+#ifdef VCL_TEST
+ rv = vppcom_select (nfds, (uint64_t *) rfdset, (uint64_t *) wfdset,
+ NULL, 0);
+#else
+ {
+ struct timeval timeout;
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 0;
+ rv = select (nfds, rfdset, wfdset, NULL, &timeout);
+ }
+#endif
+ if (rv < 0)
+ {
+ perror ("select()");
+ fprintf (stderr, "\nERROR: select() failed -- aborting test!\n");
+ return;
+ }
+ else if (rv == 0)
+ continue;
+
+ for (i = 0; i < ctrl->cfg.num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+ if (!((tsock->stats.stop.tv_sec == 0) &&
+ (tsock->stats.stop.tv_nsec == 0)))
+ continue;
+
+ if (FD_ISSET (tsock->fd, wfdset) &&
+ (tsock->stats.tx_bytes < ctrl->cfg.total_bytes))
+
+ {
+ tx_bytes =
+ sock_test_write (tsock->fd, (uint8_t *) tsock->txbuf, nbytes,
+ &tsock->stats, ctrl->cfg.verbose);
+ if (tx_bytes < 0)
+ {
+ fprintf (stderr, "\nERROR: sock_test_write(%d) failed "
+ "-- aborting test!\n", tsock->fd);
+ return;
+ }
+
+ printf ("CLIENT (fd %d): TX (%d bytes) - '%s'\n",
+ tsock->fd, tx_bytes, tsock->txbuf);
+ }
+
+ if ((FD_ISSET (tsock->fd, rfdset)) &&
+ (tsock->stats.rx_bytes < ctrl->cfg.total_bytes))
+ {
+ rx_bytes =
+ sock_test_read (tsock->fd, (uint8_t *) tsock->rxbuf,
+ nbytes, &tsock->stats);
+ if (rx_bytes > 0)
+ {
+ printf ("CLIENT (fd %d): RX (%d bytes) - '%s'\n",
+ tsock->fd, rx_bytes, tsock->rxbuf);
+
+ if (tsock->stats.rx_bytes != tsock->stats.tx_bytes)
+ printf
+ ("WARNING: bytes read (%lu) != bytes written (%lu)!\n",
+ tsock->stats.rx_bytes, tsock->stats.tx_bytes);
+ }
+ }
+
+ if (tsock->stats.rx_bytes >= ctrl->cfg.total_bytes)
+ {
+ clock_gettime (CLOCK_REALTIME, &tsock->stats.stop);
+ n--;
+ }
+ }
+ }
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.stop);
+
+ for (i = 0; i < ctrl->cfg.num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+ tsock->stats.start = ctrl->stats.start;
+
+ if (ctrl->cfg.verbose)
+ {
+ static char buf[64];
+
+ sprintf (buf, "CLIENT (fd %d) RESULTS", tsock->fd);
+ sock_test_stats_dump (buf, &tsock->stats,
+ 1 /* show_rx */ , 1 /* show tx */ ,
+ ctrl->cfg.verbose);
+ }
+
+ sock_test_stats_accumulate (&ctrl->stats, &tsock->stats);
+ }
+
+ if (ctrl->cfg.verbose)
+ {
+ sock_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
+ 1 /* show_rx */ , 1 /* show tx */ ,
+ ctrl->cfg.verbose);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+
+ if (ctrl->cfg.verbose > 1)
+ {
+ printf (" ctrl socket info\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " fd: %d (0x%08x)\n"
+ " rxbuf: %p\n"
+ " rxbuf size: %u (0x%08x)\n"
+ " txbuf: %p\n"
+ " txbuf size: %u (0x%08x)\n"
+ SOCK_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd,
+ ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
+ ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ }
+ }
+}
+
+static void
+stream_test_client (sock_test_t test)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_socket_t *tsock;
+ int tx_bytes;
+ uint32_t i, n;
+ int rv;
+ int nfds = 0;
+ fd_set wr_fdset, rd_fdset;
+ fd_set _wfdset, *wfdset = &_wfdset;
+ fd_set _rfdset, *rfdset = (test == SOCK_TEST_TYPE_BI) ? &_rfdset : 0;
+
+ ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
+ ctrl->cfg.ctrl_handle = ~0;
+
+ printf ("\n" SOCK_TEST_BANNER_STRING
+ "CLIENT (fd %d): %s-directional Stream Test!\n\n"
+ "CLIENT (fd %d): Sending config to server on ctrl socket...\n",
+ ctrl->fd, test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd);
+
+ if (sock_test_cfg_sync (ctrl))
+ {
+ fprintf (stderr, "ERROR: test cfg sync failed -- aborting!");
+ return;
+ }
+
+ FD_ZERO (&wr_fdset);
+ FD_ZERO (&rd_fdset);
+ memset (&ctrl->stats, 0, sizeof (ctrl->stats));
+ for (n = 0; n != ctrl->cfg.num_test_sockets; n++)
+ {
+ tsock = &scm->test_socket[n];
+ tsock->cfg = ctrl->cfg;
+ sock_test_socket_buf_alloc (tsock);
+ printf ("CLIENT (fd %d): Sending config to server on "
+ "test socket %d...\n", tsock->fd, n);
+ sock_test_cfg_sync (tsock);
+
+ /* Fill payload with incrementing uint32's */
+ for (i = 0; i < tsock->txbuf_size; i++)
+ tsock->txbuf[i] = i & 0xff;
+
+ memset (&tsock->stats, 0, sizeof (tsock->stats));
+ FD_SET (tsock->fd, &wr_fdset);
+ FD_SET (tsock->fd, &rd_fdset);
+ nfds = ((tsock->fd + 1) > nfds) ? (tsock->fd + 1) : nfds;
+ }
+
+ nfds++;
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ while (n)
+ {
+ _wfdset = wr_fdset;
+ _rfdset = rd_fdset;
+
+#ifdef VCL_TEST
+ rv = vppcom_select (nfds, (uint64_t *) rfdset, (uint64_t *) wfdset,
+ NULL, 0);
+#else
+ {
+ struct timeval timeout;
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 0;
+ rv = select (nfds, rfdset, wfdset, NULL, &timeout);
+ }
+#endif
+ if (rv < 0)
+ {
+ perror ("select()");
+ fprintf (stderr, "\nERROR: select() failed -- aborting test!\n");
+ return;
+ }
+ else if (rv == 0)
+ continue;
+
+ for (i = 0; i < ctrl->cfg.num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+ if (!((tsock->stats.stop.tv_sec == 0) &&
+ (tsock->stats.stop.tv_nsec == 0)))
+ continue;
+
+ if (FD_ISSET (tsock->fd, wfdset) &&
+ (tsock->stats.tx_bytes < ctrl->cfg.total_bytes))
+ {
+ tx_bytes =
+ sock_test_write (tsock->fd, (uint8_t *) tsock->txbuf,
+ ctrl->cfg.txbuf_size, &tsock->stats,
+ ctrl->cfg.verbose);
+ if (tx_bytes < 0)
+ {
+ fprintf (stderr, "\nERROR: sock_test_write(%d) failed "
+ "-- aborting test!\n", tsock->fd);
+ return;
+ }
+ }
+
+ if ((test == SOCK_TEST_TYPE_BI) &&
+ FD_ISSET (tsock->fd, rfdset) &&
+ (tsock->stats.rx_bytes < ctrl->cfg.total_bytes))
+ {
+ (void) sock_test_read (tsock->fd,
+ (uint8_t *) tsock->rxbuf,
+ tsock->rxbuf_size, &tsock->stats);
+ }
+
+ if (((test == SOCK_TEST_TYPE_UNI) &&
+ (tsock->stats.tx_bytes >= ctrl->cfg.total_bytes)) ||
+ ((test == SOCK_TEST_TYPE_BI) &&
+ (tsock->stats.rx_bytes >= ctrl->cfg.total_bytes)))
+ {
+ clock_gettime (CLOCK_REALTIME, &tsock->stats.stop);
+ n--;
+ }
+ }
+ }
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.stop);
+
+ printf ("CLIENT (fd %d): Sending config to server on ctrl socket...\n",
+ ctrl->fd);
+
+ if (sock_test_cfg_sync (ctrl))
+ {
+ fprintf (stderr, "ERROR: test cfg sync failed -- aborting!");
+ return;
+ }
+
+ for (i = 0; i < ctrl->cfg.num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+
+ if (ctrl->cfg.verbose)
+ {
+ static char buf[64];
+
+ sprintf (buf, "CLIENT (fd %d) RESULTS", tsock->fd);
+ sock_test_stats_dump (buf, &tsock->stats,
+ test == SOCK_TEST_TYPE_BI /* show_rx */ ,
+ 1 /* show tx */ , ctrl->cfg.verbose);
+ }
+
+ sock_test_stats_accumulate (&ctrl->stats, &tsock->stats);
+ }
+
+ sock_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
+ test == SOCK_TEST_TYPE_BI /* show_rx */ ,
+ 1 /* show tx */ , ctrl->cfg.verbose);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+
+ if (ctrl->cfg.verbose)
+ {
+ printf (" ctrl socket info\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " fd: %d (0x%08x)\n"
+ " rxbuf: %p\n"
+ " rxbuf size: %u (0x%08x)\n"
+ " txbuf: %p\n"
+ " txbuf size: %u (0x%08x)\n"
+ SOCK_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd,
+ ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
+ ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ }
+
+ ctrl->cfg.test = SOCK_TEST_TYPE_ECHO;
+ if (sock_test_cfg_sync (ctrl))
+ fprintf (stderr, "ERROR: post-test cfg sync failed!");
+
+ printf ("CLIENT (fd %d): %s-directional Stream Test Complete!\n"
+ SOCK_TEST_BANNER_STRING "\n", ctrl->fd,
+ test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni");
+}
+
+static void
+exit_client (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_socket_t *tsock;
+ int i;
+
+ for (i = 0; i < ctrl->cfg.num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+ tsock->cfg.test = SOCK_TEST_TYPE_EXIT;
+
+ /* coverity[COPY_PASTE_ERROR] */
+ if (ctrl->cfg.verbose)
+ {
+ printf ("\nCLIENT (fd %d): Sending exit cfg to server...\n",
+ tsock->fd);
+ sock_test_cfg_dump (&tsock->cfg, 1 /* is_client */ );
+ }
+ (void) sock_test_write (tsock->fd, (uint8_t *) & tsock->cfg,
+ sizeof (tsock->cfg), &tsock->stats,
+ ctrl->cfg.verbose);
+ }
+
+ ctrl->cfg.test = SOCK_TEST_TYPE_EXIT;
+ if (ctrl->cfg.verbose)
+ {
+ printf ("\nCLIENT (fd %d): Sending exit cfg to server...\n", ctrl->fd);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ }
+ (void) sock_test_write (ctrl->fd, (uint8_t *) & ctrl->cfg,
+ sizeof (ctrl->cfg), &ctrl->stats,
+ ctrl->cfg.verbose);
+ printf ("\nCLIENT: So long and thanks for all the fish!\n\n");
+ sleep (1);
+}
+
+static int
+sock_test_connect_test_sockets (uint32_t num_test_sockets)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_socket_t *tsock;
+ int i, rv, errno_val;
+
+ if (num_test_sockets < 1)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (num_test_sockets < scm->num_test_sockets)
+ {
+ for (i = scm->num_test_sockets - 1; i >= num_test_sockets; i--)
+ {
+ tsock = &scm->test_socket[i];
+#ifdef VCL_TEST
+ vppcom_session_close (tsock->fd);
+#else
+ close (tsock->fd);
+#endif
+ free (tsock->txbuf);
+ free (tsock->rxbuf);
+ }
+ }
+
+ else if (num_test_sockets > scm->num_test_sockets)
+ {
+ tsock = realloc (scm->test_socket,
+ sizeof (sock_test_socket_t) * num_test_sockets);
+ if (!tsock)
+ {
+ errno_val = errno;
+ perror ("ERROR in sock_test_connect_test_sockets()");
+ fprintf (stderr, "ERROR: socket failed (errno = %d)!\n", errno_val);
+ return -1;
+ }
+
+ if (!scm->test_socket)
+ memset (tsock, 0, sizeof (*tsock));
+
+ scm->test_socket = tsock;
+ for (i = scm->num_test_sockets; i < num_test_sockets; i++)
+ {
+ tsock = &scm->test_socket[i];
+#ifdef VCL_TEST
+ tsock->fd =
+ vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP,
+ 0 /* is_nonblocking */ );
+ if (tsock->fd < 0)
+ {
+ errno = -tsock->fd;
+ tsock->fd = -1;
+ }
+#else
+ tsock->fd = socket (AF_INET, SOCK_STREAM, 0);
+#endif
+ if (tsock->fd < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in sock_test_connect_test_sockets()");
+ fprintf (stderr, "ERROR: socket failed (errno = %d)!\n",
+ errno_val);
+ return tsock->fd;
+ }
+
+#ifdef VCL_TEST
+ rv = vppcom_session_connect (tsock->fd, &scm->server_endpt);
+#else
+ rv =
+ connect (tsock->fd, (struct sockaddr *) &scm->server_addr,
+ sizeof (scm->server_addr));
+#endif
+ if (rv < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: connect failed (errno = %d)!\n",
+ errno_val);
+ }
+ tsock->cfg = ctrl->cfg;
+ sock_test_socket_buf_alloc (tsock);
+ sock_test_cfg_sync (tsock);
+
+ printf ("CLIENT (fd %d): Test socket %d connected.\n",
+ tsock->fd, i);
+ }
+ }
+
+ scm->num_test_sockets = num_test_sockets;
+ printf ("CLIENT: All sockets (%d) connected!\n", scm->num_test_sockets + 1);
+ return 0;
+}
+
+static void
+dump_help (void)
+{
+#define INDENT "\n "
+
+ printf ("Test configuration commands:"
+ INDENT SOCK_TEST_TOKEN_HELP
+ "\t\t\tDisplay help."
+ INDENT SOCK_TEST_TOKEN_EXIT
+ "\t\t\tExit test client & server."
+ INDENT SOCK_TEST_TOKEN_SHOW_CFG
+ "\t\t\tShow the current test cfg."
+ INDENT SOCK_TEST_TOKEN_RUN_UNI
+ "\t\t\tRun the Uni-directional test."
+ INDENT SOCK_TEST_TOKEN_RUN_BI
+ "\t\t\tRun the Bi-directional test."
+ INDENT SOCK_TEST_TOKEN_VERBOSE
+ "\t\t\tToggle verbose setting."
+ INDENT SOCK_TEST_TOKEN_RXBUF_SIZE
+ "<rxbuf size>\tRx buffer size (bytes)."
+ INDENT SOCK_TEST_TOKEN_TXBUF_SIZE
+ "<txbuf size>\tTx buffer size (bytes)."
+ INDENT SOCK_TEST_TOKEN_NUM_WRITES
+ "<# of writes>\tNumber of txbuf writes to server." "\n");
+}
+
+static void
+cfg_txbuf_size_set (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_TXBUF_SIZE);
+ uint64_t txbuf_size = strtoull ((const char *) p, NULL, 10);
+
+ if (txbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN)
+ {
+ ctrl->cfg.txbuf_size = txbuf_size;
+ ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
+ sock_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
+ (uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ }
+ else
+ fprintf (stderr,
+ "ERROR: Invalid txbuf size (%lu) < minimum buf size (%u)!\n",
+ txbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN);
+}
+
+static void
+cfg_num_writes_set (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_NUM_WRITES);
+ uint32_t num_writes = strtoul ((const char *) p, NULL, 10);
+
+ if (num_writes > 0)
+ {
+ ctrl->cfg.num_writes = num_writes;
+ ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ }
+ else
+ {
+ fprintf (stderr, "ERROR: invalid num writes: %u\n", num_writes);
+ }
+}
+
+static void
+cfg_num_test_sockets_set (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_NUM_TEST_SCKTS);
+ uint32_t num_test_sockets = strtoul ((const char *) p, NULL, 10);
+
+ if ((num_test_sockets > 0) &&
+ (num_test_sockets <= SOCK_TEST_CFG_MAX_TEST_SCKTS))
+ {
+ ctrl->cfg.num_test_sockets = num_test_sockets;
+ sock_test_connect_test_sockets (num_test_sockets);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ }
+ else
+ {
+ fprintf (stderr, "ERROR: invalid num test sockets: %u, (%d max)\n",
+ num_test_sockets, SOCK_TEST_CFG_MAX_TEST_SCKTS);
+ }
+}
+
+static void
+cfg_rxbuf_size_set (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_RXBUF_SIZE);
+ uint64_t rxbuf_size = strtoull ((const char *) p, NULL, 10);
+
+ if (rxbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN)
+ {
+ ctrl->cfg.rxbuf_size = rxbuf_size;
+ sock_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
+ (uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size);
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ }
+ else
+ fprintf (stderr,
+ "ERROR: Invalid rxbuf size (%lu) < minimum buf size (%u)!\n",
+ rxbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN);
+}
+
+static void
+cfg_verbose_toggle (void)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+
+ ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1;
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+
+}
+
+static sock_test_t
+parse_input ()
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ sock_test_t rv = SOCK_TEST_TYPE_NONE;
+
+ if (!strcmp (SOCK_TEST_TOKEN_EXIT, ctrl->txbuf))
+ rv = SOCK_TEST_TYPE_EXIT;
+
+ else if (!strcmp (SOCK_TEST_TOKEN_HELP, ctrl->txbuf))
+ dump_help ();
+
+ else if (!strcmp (SOCK_TEST_TOKEN_SHOW_CFG, ctrl->txbuf))
+ scm->dump_cfg = 1;
+
+ else if (!strcmp (SOCK_TEST_TOKEN_VERBOSE, ctrl->txbuf))
+ cfg_verbose_toggle ();
+
+ else if (!strncmp (SOCK_TEST_TOKEN_TXBUF_SIZE, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_TXBUF_SIZE)))
+ cfg_txbuf_size_set ();
+
+ else if (!strncmp (SOCK_TEST_TOKEN_NUM_TEST_SCKTS, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_NUM_TEST_SCKTS)))
+ cfg_num_test_sockets_set ();
+
+ else if (!strncmp (SOCK_TEST_TOKEN_NUM_WRITES, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_NUM_WRITES)))
+ cfg_num_writes_set ();
+
+ else if (!strncmp (SOCK_TEST_TOKEN_RXBUF_SIZE, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_RXBUF_SIZE)))
+ cfg_rxbuf_size_set ();
+
+ else if (!strncmp (SOCK_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_RUN_UNI)))
+ rv = ctrl->cfg.test = SOCK_TEST_TYPE_UNI;
+
+ else if (!strncmp (SOCK_TEST_TOKEN_RUN_BI, ctrl->txbuf,
+ strlen (SOCK_TEST_TOKEN_RUN_BI)))
+ rv = ctrl->cfg.test = SOCK_TEST_TYPE_BI;
+
+ else
+ rv = SOCK_TEST_TYPE_ECHO;
+
+ return rv;
+}
+
+void
+print_usage_and_exit (void)
+{
+ fprintf (stderr,
+ "sock_test_client [OPTIONS] <ipaddr> <port>\n"
+ " OPTIONS\n"
+ " -h Print this message and exit.\n"
+ " -c Print test config before test.\n"
+ " -w <dir> Write test results to <dir>.\n"
+ " -X Exit after running test.\n"
+ " -E Run Echo test.\n"
+ " -N <num-writes> Test Cfg: number of writes.\n"
+ " -R <rxbuf-size> Test Cfg: rx buffer size.\n"
+ " -T <txbuf-size> Test Cfg: tx buffer size.\n"
+ " -U Run Uni-directional test.\n"
+ " -B Run Bi-directional test.\n"
+ " -V Verbose mode.\n");
+ exit (1);
+}
+
+int
+main (int argc, char **argv)
+{
+ sock_client_main_t *scm = &sock_client_main;
+ sock_test_socket_t *ctrl = &scm->ctrl_socket;
+ int c, rv, errno_val;
+ sock_test_t post_test = SOCK_TEST_TYPE_NONE;
+
+ sock_test_cfg_init (&ctrl->cfg);
+ sock_test_socket_buf_alloc (ctrl);
+
+ opterr = 0;
+ while ((c = getopt (argc, argv, "chn:w:XE:I:N:R:T:UBV")) != -1)
+ switch (c)
+ {
+ case 'c':
+ scm->dump_cfg = 1;
+ break;
+
+ case 's':
+ if (sscanf (optarg, "0x%x", &ctrl->cfg.num_test_sockets) != 1)
+ if (sscanf (optarg, "%u", &ctrl->cfg.num_test_sockets) != 1)
+ {
+ fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c);
+ print_usage_and_exit ();
+ }
+ if (!ctrl->cfg.num_test_sockets ||
+ (ctrl->cfg.num_test_sockets > FD_SETSIZE))
+ {
+ fprintf (stderr, "ERROR: Invalid number of sockets (%d)"
+ "specified for option -%c!\n"
+ " Valid range is 1 - %d\n",
+ ctrl->cfg.num_test_sockets, c, FD_SETSIZE);
+ print_usage_and_exit ();
+ }
+ break;
+
+ case 'w':
+ fprintf (stderr, "Writing test results to files is TBD.\n");
+ break;
+
+ case 'X':
+ post_test = SOCK_TEST_TYPE_EXIT;
+ break;
+
+ case 'E':
+ if (strlen (optarg) > ctrl->txbuf_size)
+ {
+ fprintf (stderr,
+ "ERROR: Option -%c value larger than txbuf size (%d)!\n",
+ optopt, ctrl->txbuf_size);
+ print_usage_and_exit ();
+ }
+ strcpy (ctrl->txbuf, optarg);
+ ctrl->cfg.test = SOCK_TEST_TYPE_ECHO;
+ break;
+
+ case 'I':
+ if (sscanf (optarg, "0x%x", &ctrl->cfg.num_test_sockets) != 1)
+ if (sscanf (optarg, "%d", &ctrl->cfg.num_test_sockets) != 1)
+ {
+ fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c);
+ print_usage_and_exit ();
+ }
+ if (ctrl->cfg.num_test_sockets > SOCK_TEST_CFG_MAX_TEST_SCKTS)
+ {
+ fprintf (stderr, "ERROR: value greater than max number test"
+ " sockets (%d)!", SOCK_TEST_CFG_MAX_TEST_SCKTS);
+ print_usage_and_exit ();
+ }
+ break;
+
+ case 'N':
+ if (sscanf (optarg, "0x%lx", &ctrl->cfg.num_writes) != 1)
+ if (sscanf (optarg, "%ld", &ctrl->cfg.num_writes) != 1)
+ {
+ fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c);
+ print_usage_and_exit ();
+ }
+ ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
+ break;
+
+ case 'R':
+ if (sscanf (optarg, "0x%lx", &ctrl->cfg.rxbuf_size) != 1)
+ if (sscanf (optarg, "%ld", &ctrl->cfg.rxbuf_size) != 1)
+ {
+ fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c);
+ print_usage_and_exit ();
+ }
+ if (ctrl->cfg.rxbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN)
+ {
+ ctrl->rxbuf_size = ctrl->cfg.rxbuf_size;
+ sock_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
+ (uint8_t **) & ctrl->rxbuf,
+ &ctrl->rxbuf_size);
+ }
+ else
+ {
+ fprintf (stderr,
+ "ERROR: rxbuf size (%lu) less than minumum (%u)\n",
+ ctrl->cfg.rxbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN);
+ print_usage_and_exit ();
+ }
+
+ break;
+
+ case 'T':
+ if (sscanf (optarg, "0x%lx", &ctrl->cfg.txbuf_size) != 1)
+ if (sscanf (optarg, "%ld", &ctrl->cfg.txbuf_size) != 1)
+ {
+ fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c);
+ print_usage_and_exit ();
+ }
+ if (ctrl->cfg.txbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN)
+ {
+ ctrl->txbuf_size = ctrl->cfg.txbuf_size;
+ sock_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
+ (uint8_t **) & ctrl->txbuf,
+ &ctrl->txbuf_size);
+ ctrl->cfg.total_bytes =
+ ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
+ }
+ else
+ {
+ fprintf (stderr,
+ "ERROR: txbuf size (%lu) less than minumum (%u)!\n",
+ ctrl->cfg.txbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN);
+ print_usage_and_exit ();
+ }
+ break;
+
+ case 'U':
+ ctrl->cfg.test = SOCK_TEST_TYPE_UNI;
+ break;
+
+ case 'B':
+ ctrl->cfg.test = SOCK_TEST_TYPE_BI;
+ break;
+
+ case 'V':
+ ctrl->cfg.verbose = 1;
+ break;
+
+ case '?':
+ switch (optopt)
+ {
+ case 'E':
+ case 'I':
+ case 'N':
+ case 'R':
+ case 'T':
+ case 'w':
+ fprintf (stderr, "ERROR: Option -%c requires an argument.\n",
+ optopt);
+ break;
+
+ default:
+ if (isprint (optopt))
+ fprintf (stderr, "ERROR: Unknown option `-%c'.\n", optopt);
+ else
+ fprintf (stderr, "ERROR: Unknown option character `\\x%x'.\n",
+ optopt);
+ }
+ /* fall thru */
+ case 'h':
+ default:
+ print_usage_and_exit ();
+ }
+
+ if (argc < (optind + 2))
+ {
+ fprintf (stderr, "ERROR: Insufficient number of arguments!\n");
+ print_usage_and_exit ();
+ }
+
+#ifdef VCL_TEST
+ ctrl->fd = vppcom_app_create ("vcl_test_client");
+ if (ctrl->fd < 0)
+ {
+ errno = -ctrl->fd;
+ ctrl->fd = -1;
+ }
+ else
+ {
+ ctrl->fd = vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP,
+ 0 /* is_nonblocking */ );
+ if (ctrl->fd < 0)
+ {
+ errno = -ctrl->fd;
+ ctrl->fd = -1;
+ }
+ }
+#else
+ ctrl->fd = socket (AF_INET, SOCK_STREAM, 0);
+#endif
+
+ if (ctrl->fd < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: socket failed (errno = %d)!\n", errno_val);
+ return ctrl->fd;
+ }
+
+ memset (&scm->server_addr, 0, sizeof (scm->server_addr));
+
+ scm->server_addr.sin_family = AF_INET;
+ inet_pton (AF_INET, argv[optind++], &(scm->server_addr.sin_addr));
+ scm->server_addr.sin_port = htons (atoi (argv[optind]));
+
+#ifdef VCL_TEST
+ scm->server_endpt.vrf = VPPCOM_VRF_DEFAULT;
+ scm->server_endpt.is_ip4 = (scm->server_addr.sin_family == AF_INET);
+ scm->server_endpt.ip = (uint8_t *) & scm->server_addr.sin_addr;
+ scm->server_endpt.port = (uint16_t) scm->server_addr.sin_port;
+#endif
+
+ do
+ {
+ printf ("\nCLIENT: Connecting to server...\n");
+
+#ifdef VCL_TEST
+ rv = vppcom_session_connect (ctrl->fd, &scm->server_endpt);
+#else
+ rv =
+ connect (ctrl->fd, (struct sockaddr *) &scm->server_addr,
+ sizeof (scm->server_addr));
+#endif
+ if (rv < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: connect failed (errno = %d)!\n",
+ errno_val);
+ }
+
+ sock_test_cfg_sync (ctrl);
+ printf ("CLIENT (fd %d): Control socket connected.\n", ctrl->fd);
+ }
+ while (rv < 0);
+
+ sock_test_connect_test_sockets (ctrl->cfg.num_test_sockets);
+
+ while (ctrl->cfg.test != SOCK_TEST_TYPE_EXIT)
+ {
+ if (scm->dump_cfg)
+ {
+ sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ scm->dump_cfg = 0;
+ }
+
+ switch (ctrl->cfg.test)
+ {
+ case SOCK_TEST_TYPE_ECHO:
+ echo_test_client ();
+ break;
+
+ case SOCK_TEST_TYPE_UNI:
+ case SOCK_TEST_TYPE_BI:
+ stream_test_client (ctrl->cfg.test);
+ break;
+
+ case SOCK_TEST_TYPE_EXIT:
+ continue;
+
+ case SOCK_TEST_TYPE_NONE:
+ default:
+ break;
+ }
+ switch (post_test)
+ {
+ case SOCK_TEST_TYPE_EXIT:
+ switch (ctrl->cfg.test)
+ {
+ case SOCK_TEST_TYPE_EXIT:
+ case SOCK_TEST_TYPE_UNI:
+ case SOCK_TEST_TYPE_BI:
+ case SOCK_TEST_TYPE_ECHO:
+ ctrl->cfg.test = SOCK_TEST_TYPE_EXIT;
+ continue;
+
+ case SOCK_TEST_TYPE_NONE:
+ default:
+ break;
+ }
+ break;
+
+ case SOCK_TEST_TYPE_NONE:
+ case SOCK_TEST_TYPE_ECHO:
+ case SOCK_TEST_TYPE_UNI:
+ case SOCK_TEST_TYPE_BI:
+ default:
+ break;
+ }
+
+ memset (ctrl->txbuf, 0, ctrl->txbuf_size);
+ memset (ctrl->rxbuf, 0, ctrl->rxbuf_size);
+
+ printf ("\nType some characters and hit <return>\n"
+ "('" SOCK_TEST_TOKEN_HELP "' for help): ");
+
+ if (fgets (ctrl->txbuf, ctrl->txbuf_size, stdin) != NULL)
+ {
+ if (strlen (ctrl->txbuf) == 1)
+ {
+ printf ("\nCLIENT: Nothing to send! Please try again...\n");
+ continue;
+ }
+ ctrl->txbuf[strlen (ctrl->txbuf) - 1] = 0; // chomp the newline.
+
+ /* Parse input for keywords */
+ ctrl->cfg.test = parse_input ();
+ }
+ }
+
+ exit_client ();
+#ifdef VCL_TEST
+ vppcom_session_close (ctrl->fd);
+ vppcom_app_destroy ();
+#else
+ close (ctrl->fd);
+#endif
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/sock_test_server.c b/src/uri/sock_test_server.c
new file mode 100644
index 00000000..35046aa0
--- /dev/null
+++ b/src/uri/sock_test_server.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#include <uri/sock_test.h>
+
+typedef struct
+{
+ uint8_t is_alloc;
+ int fd;
+ uint8_t *buf;
+ uint32_t buf_size;
+ sock_test_cfg_t cfg;
+ sock_test_stats_t stats;
+#ifdef VCL_TEST
+ vppcom_endpt_t endpt;
+ uint8_t ip[16];
+#endif
+} sock_server_conn_t;
+
+#define SOCK_SERVER_MAX_TEST_CONN 10
+typedef struct
+{
+ int listen_fd;
+ size_t num_conn;
+ size_t conn_pool_size;
+ sock_server_conn_t *conn_pool;
+ int nfds;
+ fd_set rd_fdset;
+ fd_set wr_fdset;
+ struct timeval timeout;
+} sock_server_main_t;
+
+sock_server_main_t sock_server_main;
+
+static inline int
+get_nfds (void)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ int i, nfds;
+
+ for (nfds = i = 0; i < FD_SETSIZE; i++)
+ {
+ if (FD_ISSET (i, &ssm->rd_fdset) || FD_ISSET (i, &ssm->wr_fdset))
+ nfds = i + 1;
+ }
+ return nfds;
+}
+
+static inline void
+conn_fdset_set (sock_server_conn_t * conn, fd_set * fdset)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+
+ FD_SET (conn->fd, fdset);
+ ssm->nfds = get_nfds ();
+}
+
+static inline void
+conn_fdset_clr (sock_server_conn_t * conn, fd_set * fdset)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+
+ FD_CLR (conn->fd, fdset);
+ ssm->nfds = get_nfds ();
+}
+
+static inline void
+conn_pool_expand (size_t expand_size)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ sock_server_conn_t *conn_pool;
+ size_t new_size = ssm->conn_pool_size + expand_size;
+ int i;
+
+ conn_pool = realloc (ssm->conn_pool, new_size * sizeof (*ssm->conn_pool));
+ if (conn_pool)
+ {
+ for (i = ssm->conn_pool_size; i < new_size; i++)
+ {
+ sock_server_conn_t *conn = &conn_pool[i];
+ memset (conn, 0, sizeof (*conn));
+ sock_test_cfg_init (&conn->cfg);
+ sock_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ ,
+ &conn->buf, &conn->buf_size);
+ conn->cfg.txbuf_size = conn->cfg.rxbuf_size;
+ }
+
+ ssm->conn_pool = conn_pool;
+ ssm->conn_pool_size = new_size;
+ }
+ else
+ {
+ int errno_val = errno;
+ perror ("ERROR in conn_pool_expand()");
+ fprintf (stderr, "ERROR: Memory allocation failed (errno = %d)!\n",
+ errno_val);
+ }
+}
+
+static inline sock_server_conn_t *
+conn_pool_alloc (void)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ int i;
+
+ for (i = 0; i < ssm->conn_pool_size; i++)
+ {
+ if (!ssm->conn_pool[i].is_alloc)
+ {
+#ifdef VCL_TEST
+ ssm->conn_pool[i].endpt.ip = ssm->conn_pool[i].ip;
+#endif
+ ssm->conn_pool[i].is_alloc = 1;
+ return (&ssm->conn_pool[i]);
+ }
+ }
+
+ return 0;
+}
+
+static inline void
+conn_pool_free (sock_server_conn_t * conn)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+
+ conn_fdset_clr (conn, &ssm->rd_fdset);
+ conn_fdset_clr (conn, &ssm->wr_fdset);
+ conn->fd = 0;
+ conn->is_alloc = 0;
+}
+
+static inline void
+sync_config_and_reply (sock_server_conn_t * conn, sock_test_cfg_t * rx_cfg)
+{
+ conn->cfg = *rx_cfg;
+ sock_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ ,
+ &conn->buf, &conn->buf_size);
+ conn->cfg.txbuf_size = conn->cfg.rxbuf_size;
+
+ if (conn->cfg.verbose)
+ {
+ printf ("\nSERVER (fd %d): Replying to cfg message!\n", conn->fd);
+ sock_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ }
+ (void) sock_test_write (conn->fd, (uint8_t *) & conn->cfg,
+ sizeof (conn->cfg), NULL, conn->cfg.verbose);
+}
+
+static void
+stream_test_server_start_stop (sock_server_conn_t * conn,
+ sock_test_cfg_t * rx_cfg)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ int client_fd = conn->fd;
+ sock_test_t test = rx_cfg->test;
+
+ if (rx_cfg->ctrl_handle == conn->fd)
+ {
+ int i;
+ clock_gettime (CLOCK_REALTIME, &conn->stats.stop);
+
+ for (i = 0; i < ssm->conn_pool_size; i++)
+ {
+ sock_server_conn_t *tc = &ssm->conn_pool[i];
+
+ if (tc->cfg.ctrl_handle == conn->fd)
+ {
+ sock_test_stats_accumulate (&conn->stats, &tc->stats);
+
+ if (conn->cfg.verbose)
+ {
+ static char buf[64];
+
+ sprintf (buf, "SERVER (fd %d) RESULTS", tc->fd);
+ sock_test_stats_dump (buf, &tc->stats, 1 /* show_rx */ ,
+ test == SOCK_TEST_TYPE_BI
+ /* show tx */ ,
+ conn->cfg.verbose);
+ }
+ }
+ }
+
+ sock_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ ,
+ (test == SOCK_TEST_TYPE_BI) /* show_tx */ ,
+ conn->cfg.verbose);
+ sock_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ if (conn->cfg.verbose)
+ {
+ printf (" sock server main\n"
+ SOCK_TEST_SEPARATOR_STRING
+ " buf: %p\n"
+ " buf size: %u (0x%08x)\n"
+ SOCK_TEST_SEPARATOR_STRING,
+ conn->buf, conn->buf_size, conn->buf_size);
+ }
+
+ sync_config_and_reply (conn, rx_cfg);
+ printf ("\nSERVER (fd %d): %s-directional Stream Test Complete!\n"
+ SOCK_TEST_BANNER_STRING "\n", conn->fd,
+ test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni");
+ }
+ else
+ {
+ printf ("\n" SOCK_TEST_BANNER_STRING
+ "SERVER (fd %d): %s-directional Stream Test!\n"
+ " Sending client the test cfg to start streaming data...\n",
+ client_fd, test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni");
+
+ rx_cfg->ctrl_handle = (rx_cfg->ctrl_handle == ~0) ? conn->fd :
+ rx_cfg->ctrl_handle;
+
+ sync_config_and_reply (conn, rx_cfg);
+
+ /* read the 1st chunk, record start time */
+ memset (&conn->stats, 0, sizeof (conn->stats));
+ clock_gettime (CLOCK_REALTIME, &conn->stats.start);
+ }
+}
+
+
+static inline void
+stream_test_server (sock_server_conn_t * conn, int rx_bytes)
+{
+ int client_fd = conn->fd;
+ sock_test_t test = conn->cfg.test;
+
+ if (test == SOCK_TEST_TYPE_BI)
+ (void) sock_test_write (client_fd, conn->buf, rx_bytes, &conn->stats,
+ conn->cfg.verbose);
+
+ if (conn->stats.rx_bytes >= conn->cfg.total_bytes)
+ {
+ clock_gettime (CLOCK_REALTIME, &conn->stats.stop);
+ }
+}
+
+static inline void
+new_client (void)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ int client_fd;
+ sock_server_conn_t *conn;
+
+ if (ssm->conn_pool_size < (ssm->num_conn + SOCK_SERVER_MAX_TEST_CONN + 1))
+ conn_pool_expand (SOCK_SERVER_MAX_TEST_CONN + 1);
+
+ conn = conn_pool_alloc ();
+ if (!conn)
+ {
+ fprintf (stderr, "\nERROR: No free connections!\n");
+ return;
+ }
+
+#ifdef VCL_TEST
+ client_fd = vppcom_session_accept (ssm->listen_fd, &conn->endpt,
+ -1.0 /* wait forever */ );
+#else
+ client_fd = accept (ssm->listen_fd, (struct sockaddr *) NULL, NULL);
+#endif
+ if (client_fd < 0)
+ {
+ int errno_val;
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: accept failed (errno = %d)!\n", errno_val);
+ }
+
+ printf ("SERVER: Got a connection -- fd = %d (0x%08x)!\n",
+ client_fd, client_fd);
+
+ conn->fd = client_fd;
+ conn_fdset_set (conn, &ssm->rd_fdset);
+}
+
+int
+main (int argc, char **argv)
+{
+ sock_server_main_t *ssm = &sock_server_main;
+ int client_fd, rv, main_rv = 0;
+ int tx_bytes, rx_bytes, nbytes;
+ sock_server_conn_t *conn;
+ sock_test_cfg_t *rx_cfg;
+ uint32_t xtra = 0;
+ uint64_t xtra_bytes = 0;
+ struct sockaddr_in servaddr;
+ int errno_val;
+ int v, i;
+ uint16_t port = SOCK_TEST_SERVER_PORT;
+ fd_set _rfdset, *rfdset = &_rfdset;
+#ifdef VCL_TEST
+ vppcom_endpt_t endpt;
+#else
+ fd_set _wfdset, *wfdset = &_wfdset;
+#endif
+
+ if ((argc == 2) && (sscanf (argv[1], "%d", &v) == 1))
+ port = (uint16_t) v;
+
+ conn_pool_expand (SOCK_SERVER_MAX_TEST_CONN + 1);
+
+#ifdef VCL_TEST
+ rv = vppcom_app_create ("vcl_test_server");
+ if (rv)
+ {
+ errno = -rv;
+ ssm->listen_fd = -1;
+ }
+ else
+ {
+ ssm->listen_fd =
+ vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP,
+ 0 /* is_nonblocking */ );
+ }
+#else
+ ssm->listen_fd = socket (AF_INET, SOCK_STREAM, 0);
+#endif
+ if (ssm->listen_fd < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: socket() failed (errno = %d)!\n", errno_val);
+ return ssm->listen_fd;
+ }
+
+ memset (&servaddr, 0, sizeof (servaddr));
+
+ servaddr.sin_family = AF_INET;
+ servaddr.sin_addr.s_addr = htonl (INADDR_ANY);
+ servaddr.sin_port = htons (port);
+
+#ifdef VCL_TEST
+ endpt.vrf = VPPCOM_VRF_DEFAULT;
+ endpt.is_ip4 = (servaddr.sin_family == AF_INET);
+ endpt.ip = (uint8_t *) & servaddr.sin_addr;
+ endpt.port = (uint16_t) servaddr.sin_port;
+
+ rv = vppcom_session_bind (ssm->listen_fd, &endpt);
+ if (rv)
+ {
+ errno = -rv;
+ rv = -1;
+ }
+#else
+ rv =
+ bind (ssm->listen_fd, (struct sockaddr *) &servaddr, sizeof (servaddr));
+#endif
+ if (rv < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: bind failed (errno = %d)!\n", errno_val);
+ return rv;
+ }
+
+#ifdef VCL_TEST
+ rv = vppcom_session_listen (ssm->listen_fd, 10);
+ if (rv)
+ {
+ errno = -rv;
+ rv = -1;
+ }
+#else
+ rv = listen (ssm->listen_fd, 10);
+#endif
+ if (rv < 0)
+ {
+ errno_val = errno;
+ perror ("ERROR in main()");
+ fprintf (stderr, "ERROR: listen failed (errno = %d)!\n", errno_val);
+ return rv;
+ }
+
+ FD_ZERO (&ssm->wr_fdset);
+ FD_ZERO (&ssm->rd_fdset);
+
+ FD_SET (ssm->listen_fd, &ssm->rd_fdset);
+ ssm->nfds = ssm->listen_fd + 1;
+
+ printf ("\nSERVER: Waiting for a client to connect on port %d...\n", port);
+
+ while (1)
+ {
+ _rfdset = ssm->rd_fdset;
+
+#ifdef VCL_TEST
+ rv = vppcom_select (ssm->nfds, (uint64_t *) rfdset, NULL, NULL, 0);
+#else
+ {
+ struct timeval timeout;
+ timeout = ssm->timeout;
+ _wfdset = ssm->wr_fdset;
+ rv = select (ssm->nfds, rfdset, wfdset, NULL, &timeout);
+ }
+#endif
+ if (rv < 0)
+ {
+ perror ("select()");
+ fprintf (stderr, "\nERROR: select() failed -- aborting!\n");
+ main_rv = -1;
+ goto done;
+ }
+ else if (rv == 0)
+ continue;
+
+ if (FD_ISSET (ssm->listen_fd, rfdset))
+ new_client ();
+
+ for (i = 0; i < ssm->conn_pool_size; i++)
+ {
+ if (!ssm->conn_pool[i].is_alloc)
+ continue;
+
+ conn = &ssm->conn_pool[i];
+ client_fd = conn->fd;
+
+ if (FD_ISSET (client_fd, rfdset))
+ {
+ rx_bytes = sock_test_read (client_fd, conn->buf,
+ conn->buf_size, &conn->stats);
+ if (rx_bytes > 0)
+ {
+ rx_cfg = (sock_test_cfg_t *) conn->buf;
+ if (rx_cfg->magic == SOCK_TEST_CFG_CTRL_MAGIC)
+ {
+ if (rx_cfg->verbose)
+ {
+ printf ("SERVER (fd %d): Received a cfg message!\n",
+ client_fd);
+ sock_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ }
+
+ if (rx_bytes != sizeof (*rx_cfg))
+ {
+ printf ("SERVER (fd %d): Invalid cfg message "
+ "size (%d)!\n Should be %lu bytes.\n",
+ client_fd, rx_bytes, sizeof (*rx_cfg));
+ conn->cfg.rxbuf_size = 0;
+ conn->cfg.num_writes = 0;
+ if (conn->cfg.verbose)
+ {
+ printf ("SERVER (fd %d): Replying to "
+ "cfg message!\n", client_fd);
+ sock_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ }
+ sock_test_write (client_fd, (uint8_t *) & conn->cfg,
+ sizeof (conn->cfg), NULL,
+ conn->cfg.verbose);
+ continue;
+ }
+
+ switch (rx_cfg->test)
+ {
+ case SOCK_TEST_TYPE_NONE:
+ case SOCK_TEST_TYPE_ECHO:
+ sync_config_and_reply (conn, rx_cfg);
+ break;
+
+ case SOCK_TEST_TYPE_BI:
+ case SOCK_TEST_TYPE_UNI:
+ stream_test_server_start_stop (conn, rx_cfg);
+ break;
+
+ case SOCK_TEST_TYPE_EXIT:
+ printf ("SERVER: Have a great day, "
+ "connection %d!\n", client_fd);
+#ifdef VCL_TEST
+ vppcom_session_close (client_fd);
+#else
+ close (client_fd);
+#endif
+ conn_pool_free (conn);
+
+ if (ssm->nfds == (ssm->listen_fd + 1))
+ {
+ printf ("SERVER: All client connections "
+ "closed.\n\nSERVER: "
+ "May the force be with you!\n\n");
+ goto done;
+ }
+ break;
+
+ default:
+ fprintf (stderr, "ERROR: Unknown test type!\n");
+ sock_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ break;
+ }
+ continue;
+ }
+
+ else if ((conn->cfg.test == SOCK_TEST_TYPE_UNI) ||
+ (conn->cfg.test == SOCK_TEST_TYPE_BI))
+ {
+ stream_test_server (conn, rx_bytes);
+ continue;
+ }
+
+ else if (isascii (conn->buf[0]))
+ {
+ // If it looks vaguely like a string, make sure it's terminated
+ ((char *) conn->buf)[rx_bytes <
+ conn->buf_size ? rx_bytes :
+ conn->buf_size - 1] = 0;
+ printf ("\nSERVER (fd %d): RX (%d bytes) - '%s'\n",
+ conn->fd, rx_bytes, conn->buf);
+ }
+ }
+ else // rx_bytes < 0
+ {
+ if (errno == ECONNRESET)
+ {
+ printf ("\nSERVER: Connection reset by remote peer.\n"
+ " Y'all have a great day now!\n\n");
+ break;
+ }
+ else
+ continue;
+ }
+
+ if (isascii (conn->buf[0]))
+ {
+ // If it looks vaguely like a string, make sure it's terminated
+ ((char *) conn->buf)[rx_bytes <
+ conn->buf_size ? rx_bytes :
+ conn->buf_size - 1] = 0;
+ if (xtra)
+ fprintf (stderr,
+ "ERROR: FIFO not drained in previous test!\n"
+ " extra chunks %u (0x%x)\n"
+ " extra bytes %lu (0x%lx)\n",
+ xtra, xtra, xtra_bytes, xtra_bytes);
+
+ xtra = 0;
+ xtra_bytes = 0;
+
+ if (conn->cfg.verbose)
+ printf ("SERVER (fd %d): Echoing back\n", client_fd);
+
+ nbytes = strlen ((const char *) conn->buf) + 1;
+
+ tx_bytes = sock_test_write (client_fd, conn->buf,
+ nbytes, &conn->stats,
+ conn->cfg.verbose);
+ if (tx_bytes >= 0)
+ printf ("SERVER (fd %d): TX (%d bytes) - '%s'\n",
+ conn->fd, tx_bytes, conn->buf);
+ }
+
+ else // Extraneous read data from non-echo tests???
+ {
+ xtra++;
+ xtra_bytes += rx_bytes;
+ }
+ }
+ }
+ }
+
+done:
+#ifdef VCL_TEST
+ vppcom_session_close (ssm->listen_fd);
+ vppcom_app_destroy ();
+#else
+ close (ssm->listen_fd);
+#endif
+ if (ssm->conn_pool)
+ free (ssm->conn_pool);
+
+ return main_rv;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uri_socket_server.c b/src/uri/uri_socket_server.c
new file mode 100644
index 00000000..4f4c5f30
--- /dev/null
+++ b/src/uri/uri_socket_server.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <vppinfra/format.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <sys/time.h>
+
+volatile int signal_received;
+
+static void
+unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ signal_received = 1;
+}
+
+static void
+setup_signal_handler (void)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ clib_unix_warning ("sigaction %U", format_signal, i);
+ }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ int sockfd, portno, n, sent, accfd, reuse;
+ socklen_t client_addr_len;
+ struct sockaddr_in serv_addr;
+ struct sockaddr_in client;
+ struct hostent *server;
+ u8 *rx_buffer = 0, no_echo = 0;
+ struct timeval start, end;
+ long rcvd = 0;
+ double deltat;
+
+ if (argc > 1 && argc < 3)
+ {
+ fformat (stderr, "usage %s host port\n", argv[0]);
+ exit (0);
+ }
+
+ if (argc >= 4)
+ {
+ no_echo = atoi (argv[3]);
+ portno = atoi (argv[2]);
+ server = gethostbyname (argv[1]);
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
+ }
+ else
+ {
+ /* Defaults */
+ portno = 1234;
+ server = gethostbyname ("6.0.1.1");
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
+ }
+
+
+ setup_signal_handler ();
+
+ sockfd = socket (AF_INET, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ {
+ clib_unix_error ("socket");
+ exit (1);
+ }
+
+ reuse = 1;
+ if (setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR, (const char *) &reuse,
+ sizeof (reuse)) < 0)
+ {
+ clib_unix_error ("setsockopt(SO_REUSEADDR) failed");
+ exit (1);
+ }
+
+ bzero ((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+ bcopy ((char *) server->h_addr,
+ (char *) &serv_addr.sin_addr.s_addr, server->h_length);
+ serv_addr.sin_port = htons (portno);
+ if (bind (sockfd, (const void *) &serv_addr, sizeof (serv_addr)) < 0)
+ {
+ clib_unix_warning ("bind");
+ exit (1);
+ }
+
+ vec_validate (rx_buffer, 128 << 10);
+
+ if (listen (sockfd, 5 /* backlog */ ) < 0)
+ {
+ clib_unix_warning ("listen");
+ close (sockfd);
+ return 1;
+ }
+
+ while (1)
+ {
+ if (signal_received)
+ break;
+
+ client_addr_len = sizeof (struct sockaddr);
+ accfd = accept (sockfd, (struct sockaddr *) &client, &client_addr_len);
+ if (accfd < 0)
+ {
+ clib_unix_warning ("accept");
+ continue;
+ }
+ fformat (stderr, "Accepted connection from: %s : %d\n",
+ inet_ntoa (client.sin_addr), client.sin_port);
+ gettimeofday (&start, NULL);
+
+ while (1)
+ {
+ n = recv (accfd, rx_buffer, vec_len (rx_buffer), 0 /* flags */ );
+ if (n == 0)
+ {
+ /* Graceful exit */
+ close (accfd);
+ gettimeofday (&end, NULL);
+ deltat = (end.tv_sec - start.tv_sec);
+ deltat += (end.tv_usec - start.tv_usec) / 1000000.0;
+ clib_warning ("Finished in %.6f", deltat);
+ clib_warning ("%.4f Gbit/second %s",
+ (((f64) rcvd * 8.0) / deltat / 1e9),
+ no_echo ? "half" : "full");
+ rcvd = 0;
+ break;
+ }
+ if (n < 0)
+ {
+ clib_unix_warning ("recv");
+ close (accfd);
+ break;
+ }
+
+ if (signal_received)
+ break;
+
+ rcvd += n;
+ if (no_echo)
+ continue;
+
+ sent = send (accfd, rx_buffer, n, 0 /* flags */ );
+ if (n < 0)
+ {
+ clib_unix_warning ("send");
+ close (accfd);
+ break;
+ }
+
+ if (sent != n)
+ {
+ clib_warning ("sent %d not %d", sent, n);
+ }
+
+ if (signal_received)
+ break;
+ }
+ }
+
+ close (sockfd);
+
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uri_socket_test.c b/src/uri/uri_socket_test.c
new file mode 100644
index 00000000..4469b03d
--- /dev/null
+++ b/src/uri/uri_socket_test.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <vppinfra/format.h>
+#include <sys/time.h>
+
+int
+main (int argc, char *argv[])
+{
+ int sockfd, portno, n;
+ struct sockaddr_in serv_addr;
+ struct hostent *server;
+ u8 *rx_buffer = 0, *tx_buffer = 0, no_echo = 0, test_bytes = 0;
+ u32 offset;
+ long bytes = 1 << 20, to_send;
+ int i;
+ struct timeval start, end;
+ double deltat;
+
+ if (argc >= 3)
+ {
+ portno = atoi (argv[2]);
+ server = gethostbyname (argv[1]);
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
+
+ argc -= 3;
+ argv += 3;
+
+ if (argc)
+ {
+ bytes = ((long) atoi (argv[0])) << 20;
+ argc--;
+ argv++;
+ }
+ if (argc)
+ {
+ no_echo = atoi (argv[0]);
+ argc--;
+ argv++;
+ }
+ if (argc)
+ {
+ test_bytes = atoi (argv[0]);
+ argc--;
+ argv++;
+ }
+ }
+ else
+ {
+ portno = 1234; // atoi(argv[2]);
+ server = gethostbyname ("6.0.1.1" /* argv[1] */ );
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
+ }
+
+ to_send = bytes;
+ sockfd = socket (AF_INET, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ {
+ clib_unix_error ("socket");
+ exit (1);
+ }
+
+ bzero ((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+ bcopy ((char *) server->h_addr,
+ (char *) &serv_addr.sin_addr.s_addr, server->h_length);
+ serv_addr.sin_port = htons (portno);
+ if (connect (sockfd, (const void *) &serv_addr, sizeof (serv_addr)) < 0)
+ {
+ clib_unix_warning ("connect");
+ exit (1);
+ }
+
+ vec_validate (rx_buffer, 128 << 10);
+ vec_validate (tx_buffer, 128 << 10);
+
+ for (i = 0; i < vec_len (tx_buffer); i++)
+ tx_buffer[i] = (i + 1) % 0xff;
+
+ /*
+ * Send one packet to warm up the RX pipeline
+ */
+ n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
+ if (n != vec_len (tx_buffer))
+ {
+ clib_unix_warning ("write");
+ exit (0);
+ }
+
+ gettimeofday (&start, NULL);
+ while (bytes > 0)
+ {
+ /*
+ * TX
+ */
+ n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
+ if (n != vec_len (tx_buffer))
+ {
+ clib_unix_warning ("write");
+ exit (0);
+ }
+ bytes -= n;
+
+ if (no_echo)
+ continue;
+
+ /*
+ * RX
+ */
+
+ offset = 0;
+ do
+ {
+ n = recv (sockfd, rx_buffer + offset,
+ vec_len (rx_buffer) - offset, 0 /* flags */ );
+ if (n < 0)
+ {
+ clib_unix_warning ("read");
+ exit (0);
+ }
+ offset += n;
+ }
+ while (offset < vec_len (rx_buffer));
+
+ if (test_bytes)
+ {
+ for (i = 0; i < vec_len (rx_buffer); i++)
+ {
+ if (rx_buffer[i] != tx_buffer[i])
+ {
+ clib_warning ("[%d] read 0x%x not 0x%x", rx_buffer[i],
+ tx_buffer[i]);
+ exit (1);
+ }
+ }
+ }
+ }
+ close (sockfd);
+ gettimeofday (&end, NULL);
+
+ deltat = (end.tv_sec - start.tv_sec);
+ deltat += (end.tv_usec - start.tv_usec) / 1000000.0; // us to ms
+ clib_warning ("Finished in %.6f", deltat);
+ clib_warning ("%.4f Gbit/second %s", (((f64) to_send * 8.0) / deltat / 1e9),
+ no_echo ? "half" : "full");
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
new file mode 100755
index 00000000..cb297b55
--- /dev/null
+++ b/src/uri/uri_tcp_test.c
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <svm/svm_fifo_segment.h>
+#include <vlibmemory/api.h>
+#include <vpp/api/vpe_msg_enum.h>
+#include <vnet/session/application_interface.h>
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+typedef struct
+{
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+
+ u64 vpp_session_handle;
+ u64 bytes_received;
+ f64 start;
+} session_t;
+
+typedef enum
+{
+ STATE_START,
+ STATE_ATTACHED,
+ STATE_READY,
+ STATE_DISCONNECTING,
+ STATE_FAILED
+} connection_state_t;
+
+typedef struct
+{
+ /* vpe input queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* API client handle */
+ u32 my_client_index;
+
+ /* The URI we're playing with */
+ u8 *uri;
+
+ /* Session pool */
+ session_t *sessions;
+
+ /* Hash table for disconnect processing */
+ uword *session_index_by_vpp_handles;
+
+ /* intermediate rx buffer */
+ u8 *rx_buf;
+
+ /* URI for slave's connect */
+ u8 *connect_uri;
+
+ u32 connected_session_index;
+
+ int i_am_master;
+
+ /* drop all packets */
+ int drop_packets;
+
+ /* Our event queue */
+ unix_shared_memory_queue_t *our_event_queue;
+
+ /* $$$ single thread only for the moment */
+ unix_shared_memory_queue_t *vpp_event_queue;
+
+ pid_t my_pid;
+
+ /* For deadman timers */
+ clib_time_t clib_time;
+
+ /* State of the connection, shared between msg RX thread and main thread */
+ volatile connection_state_t state;
+
+ /* Signal variables */
+ volatile int time_to_stop;
+ volatile int time_to_print_stats;
+
+ u32 configured_segment_size;
+
+ /* VNET_API_ERROR_FOO -> "Foo" hash table */
+ uword *error_string_by_error_number;
+
+ u8 *connect_test_data;
+ pthread_t client_rx_thread_handle;
+ u32 client_bytes_received;
+ u8 test_return_packets;
+ u64 bytes_to_send;
+
+ /* convenience */
+ svm_fifo_segment_main_t *segment_main;
+} uri_tcp_test_main_t;
+
+uri_tcp_test_main_t uri_tcp_test_main;
+
+#if CLIB_DEBUG > 0
+#define NITER 10000
+#else
+#define NITER 4000000
+#endif
+
+static u8 *
+format_api_error (u8 * s, va_list * args)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ i32 error = va_arg (*args, u32);
+ uword *p;
+
+ p = hash_get (utm->error_string_by_error_number, -error);
+
+ if (p)
+ s = format (s, "%s", p[0]);
+ else
+ s = format (s, "%d", error);
+ return s;
+}
+
+static void
+init_error_string_table (uri_tcp_test_main_t * utm)
+{
+ utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+ foreach_vnet_api_error;
+#undef _
+
+ hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+int
+wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state)
+{
+#if CLIB_DEBUG > 0
+#define TIMEOUT 600.0
+#else
+#define TIMEOUT 600.0
+#endif
+
+ f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT;
+
+ while (clib_time_now (&utm->clib_time) < timeout)
+ {
+ if (utm->state == state)
+ return 0;
+ if (utm->state == STATE_FAILED)
+ return -1;
+ if (utm->time_to_stop == 1)
+ return 0;
+ }
+ clib_warning ("timeout waiting for STATE_READY");
+ return -1;
+}
+
+void
+application_send_attach (uri_tcp_test_main_t * utm)
+{
+ vl_api_application_attach_t *bmp;
+ u32 fifo_size = 4 << 20;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_ATTACH);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ bmp->options[APP_OPTIONS_FLAGS] =
+ APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+ bmp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
+ bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size;
+ bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size;
+ bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
+ bmp->options[SESSION_OPTIONS_SEGMENT_SIZE] = 256 << 20;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+int
+application_attach (uri_tcp_test_main_t * utm)
+{
+ application_send_attach (utm);
+ if (wait_for_state_change (utm, STATE_ATTACHED))
+ {
+ clib_warning ("timeout waiting for STATE_ATTACHED");
+ return -1;
+ }
+ return 0;
+}
+
+void
+application_detach (uri_tcp_test_main_t * utm)
+{
+ vl_api_application_detach_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_DETACH);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+static void
+vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
+ mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ int rv;
+
+ if (mp->retval)
+ {
+ clib_warning ("attach failed: %U", format_api_error,
+ clib_net_to_host_u32 (mp->retval));
+ utm->state = STATE_FAILED;
+ return;
+ }
+
+ if (mp->segment_name_length == 0)
+ {
+ clib_warning ("segment_name_length zero");
+ return;
+ }
+
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+
+ ASSERT (mp->app_event_queue_address);
+
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ if (rv)
+ {
+ clib_warning ("svm_fifo_segment_attach ('%s') failed",
+ mp->segment_name);
+ return;
+ }
+
+ utm->our_event_queue =
+ uword_to_pointer (mp->app_event_queue_address,
+ unix_shared_memory_queue_t *);
+ utm->state = STATE_ATTACHED;
+}
+
+static void
+vl_api_application_detach_reply_t_handler (vl_api_application_detach_reply_t *
+ mp)
+{
+ if (mp->retval)
+ clib_warning ("detach returned with err: %d", mp->retval);
+}
+
+static void
+stop_signal (int signum)
+{
+ uri_tcp_test_main_t *um = &uri_tcp_test_main;
+
+ um->time_to_stop = 1;
+}
+
+static void
+stats_signal (int signum)
+{
+ uri_tcp_test_main_t *um = &uri_tcp_test_main;
+
+ um->time_to_print_stats = 1;
+}
+
+static clib_error_t *
+setup_signal_handlers (void)
+{
+ signal (SIGINT, stats_signal);
+ signal (SIGQUIT, stop_signal);
+ signal (SIGTERM, stop_signal);
+
+ return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("BUG");
+}
+
+int
+connect_to_vpp (char *name)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ api_main_t *am = &api_main;
+
+ if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+ return -1;
+
+ utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ utm->my_client_index = am->my_client_index;
+
+ return 0;
+}
+
+static void
+vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ int rv;
+
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ if (rv)
+ {
+ clib_warning ("svm_fifo_segment_attach ('%s') failed",
+ mp->segment_name);
+ return;
+ }
+ clib_warning ("Mapped new segment '%s' size %d", mp->segment_name,
+ mp->segment_size);
+}
+
+static void
+session_print_stats (uri_tcp_test_main_t * utm, session_t * session)
+{
+ f64 deltat;
+ u64 bytes;
+
+ deltat = clib_time_now (&utm->clib_time) - session->start;
+ bytes = utm->i_am_master ? session->bytes_received : utm->bytes_to_send;
+ fformat (stdout, "Finished in %.6f\n", deltat);
+ fformat (stdout, "%.4f Gbit/second\n", (bytes * 8.0) / deltat / 1e9);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ session_t *session = 0;
+ vl_api_disconnect_session_reply_t *rmp;
+ uword *p;
+ int rv = 0;
+
+ p = hash_get (utm->session_index_by_vpp_handles, mp->handle);
+
+ if (p)
+ {
+ session = pool_elt_at_index (utm->sessions, p[0]);
+ hash_unset (utm->session_index_by_vpp_handles, mp->handle);
+ pool_put (utm->sessions, session);
+ }
+ else
+ {
+ clib_warning ("couldn't find session key %llx", mp->handle);
+ rv = -11;
+ }
+
+// utm->time_to_stop = 1;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+
+ rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+ rmp->retval = rv;
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+ if (session)
+ session_print_stats (utm, session);
+}
+
+static void
+vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ vl_api_reset_session_reply_t *rmp;
+ uword *p;
+ int rv = 0;
+
+ p = hash_get (utm->session_index_by_vpp_handles, mp->handle);
+
+ if (p)
+ {
+ clib_warning ("got reset");
+ /* Cleanup later */
+ utm->time_to_stop = 1;
+ }
+ else
+ {
+ clib_warning ("couldn't find session key %llx", mp->handle);
+ rv = -11;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_RESET_SESSION_REPLY);
+ rmp->retval = rv;
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+void
+client_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
+ session_fifo_event_t * e)
+{
+ svm_fifo_t *rx_fifo;
+ int n_read, bytes, i;
+
+ rx_fifo = e->fifo;
+
+ bytes = svm_fifo_max_dequeue (rx_fifo);
+ /* Allow enqueuing of new event */
+ svm_fifo_unset_event (rx_fifo);
+
+ /* Read the bytes */
+ do
+ {
+ n_read = svm_fifo_dequeue_nowait (rx_fifo,
+ clib_min (vec_len (utm->rx_buf),
+ bytes), utm->rx_buf);
+ if (n_read > 0)
+ {
+ bytes -= n_read;
+ if (utm->test_return_packets)
+ {
+ for (i = 0; i < n_read; i++)
+ {
+ if (utm->rx_buf[i]
+ != ((utm->client_bytes_received + i) & 0xff))
+ {
+ clib_warning ("error at byte %lld, 0x%x not 0x%x",
+ utm->client_bytes_received + i,
+ utm->rx_buf[i],
+ ((utm->client_bytes_received +
+ i) & 0xff));
+ }
+ }
+ }
+ utm->client_bytes_received += n_read;
+ }
+ else
+ {
+ if (n_read == -2)
+ {
+// clib_warning ("weird!");
+ break;
+ }
+ }
+
+ }
+ while (bytes > 0);
+}
+
+void
+client_handle_event_queue (uri_tcp_test_main_t * utm)
+{
+ session_fifo_event_t _e, *e = &_e;;
+
+ unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+ 0 /* nowait */ );
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ client_handle_fifo_event_rx (utm, e);
+ break;
+
+ case FIFO_EVENT_DISCONNECT:
+ return;
+
+ default:
+ clib_warning ("unknown event type %d", e->event_type);
+ break;
+ }
+}
+
+static void *
+client_rx_thread_fn (void *arg)
+{
+ session_fifo_event_t _e, *e = &_e;
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+
+ utm->client_bytes_received = 0;
+ while (1)
+ {
+ unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+ 0 /* nowait */ );
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ client_handle_fifo_event_rx (utm, e);
+ break;
+
+ case FIFO_EVENT_DISCONNECT:
+ return 0;
+ default:
+ clib_warning ("unknown event type %d", e->event_type);
+ break;
+ }
+
+ if (PREDICT_FALSE (utm->time_to_stop == 1))
+ break;
+ }
+ pthread_exit (0);
+}
+
+
+static void
+vl_api_connect_session_reply_t_handler (vl_api_connect_session_reply_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ session_t *session;
+ u32 session_index;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ int rv;
+
+ if (mp->retval)
+ {
+ clib_warning ("connection failed with code: %U", format_api_error,
+ clib_net_to_host_u32 (mp->retval));
+ utm->state = STATE_FAILED;
+ return;
+ }
+
+ utm->vpp_event_queue =
+ uword_to_pointer (mp->vpp_event_queue_address,
+ unix_shared_memory_queue_t *);
+
+ /*
+ * Setup session
+ */
+
+ pool_get (utm->sessions, session);
+ session_index = session - utm->sessions;
+
+ rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
+ rx_fifo->client_session_index = session_index;
+ tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
+ tx_fifo->client_session_index = session_index;
+
+ session->server_rx_fifo = rx_fifo;
+ session->server_tx_fifo = tx_fifo;
+ session->vpp_session_handle = mp->handle;
+ session->start = clib_time_now (&utm->clib_time);
+
+ /* Save handle */
+ utm->connected_session_index = session_index;
+ utm->state = STATE_READY;
+
+ /* Add it to lookup table */
+ hash_set (utm->session_index_by_vpp_handles, mp->handle, session_index);
+
+ /* Start RX thread */
+ rv = pthread_create (&utm->client_rx_thread_handle,
+ NULL /*attr */ , client_rx_thread_fn, 0);
+ if (rv)
+ {
+ clib_warning ("pthread_create returned %d", rv);
+ rv = VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+}
+
+static void
+send_test_chunk (uri_tcp_test_main_t * utm, svm_fifo_t * tx_fifo, int mypid,
+ u32 bytes)
+{
+ u8 *test_data = utm->connect_test_data;
+ u64 bytes_sent = 0;
+ int test_buf_offset = 0;
+ u32 bytes_to_snd;
+ u32 queue_max_chunk = 128 << 10, actual_write;
+ session_fifo_event_t evt;
+ static int serial_number = 0;
+ int rv;
+
+ bytes_to_snd = (bytes == 0) ? vec_len (test_data) : bytes;
+ if (bytes_to_snd > vec_len (test_data))
+ bytes_to_snd = vec_len (test_data);
+
+ while (bytes_to_snd > 0 && !utm->time_to_stop)
+ {
+ actual_write = (bytes_to_snd > queue_max_chunk) ?
+ queue_max_chunk : bytes_to_snd;
+ rv = svm_fifo_enqueue_nowait (tx_fifo, actual_write,
+ test_data + test_buf_offset);
+
+ if (rv > 0)
+ {
+ bytes_to_snd -= rv;
+ test_buf_offset += rv;
+ bytes_sent += rv;
+
+ if (svm_fifo_set_event (tx_fifo))
+ {
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = serial_number++;
+
+ unix_shared_memory_queue_add (utm->vpp_event_queue,
+ (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+ }
+ }
+}
+
+void
+client_send_data (uri_tcp_test_main_t * utm)
+{
+ u8 *test_data = utm->connect_test_data;
+ int mypid = getpid ();
+ session_t *session;
+ svm_fifo_t *tx_fifo;
+ u32 n_iterations, leftover;
+ int i;
+
+ session = pool_elt_at_index (utm->sessions, utm->connected_session_index);
+ tx_fifo = session->server_tx_fifo;
+
+ ASSERT (vec_len (test_data) > 0);
+
+ vec_validate (utm->rx_buf, vec_len (test_data) - 1);
+ n_iterations = utm->bytes_to_send / vec_len (test_data);
+
+ for (i = 0; i < n_iterations; i++)
+ {
+ send_test_chunk (utm, tx_fifo, mypid, 0);
+ if (utm->time_to_stop)
+ break;
+ }
+
+ leftover = utm->bytes_to_send % vec_len (test_data);
+ if (leftover)
+ send_test_chunk (utm, tx_fifo, mypid, leftover);
+
+ if (!utm->drop_packets)
+ {
+ f64 timeout = clib_time_now (&utm->clib_time) + 10;
+
+ /* Wait for the outstanding packets */
+ while (utm->client_bytes_received <
+ vec_len (test_data) * n_iterations + leftover)
+ {
+ if (clib_time_now (&utm->clib_time) > timeout)
+ {
+ clib_warning ("timed out waiting for the missing packets");
+ break;
+ }
+ }
+ }
+ utm->time_to_stop = 1;
+}
+
+void
+client_send_connect (uri_tcp_test_main_t * utm)
+{
+ vl_api_connect_uri_t *cmp;
+ cmp = vl_msg_api_alloc (sizeof (*cmp));
+ memset (cmp, 0, sizeof (*cmp));
+
+ cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
+ cmp->client_index = utm->my_client_index;
+ cmp->context = ntohl (0xfeedface);
+ memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp);
+}
+
+int
+client_connect (uri_tcp_test_main_t * utm)
+{
+ client_send_connect (utm);
+ if (wait_for_state_change (utm, STATE_READY))
+ {
+ clib_warning ("Connect failed");
+ return -1;
+ }
+ return 0;
+}
+
+void
+client_send_disconnect (uri_tcp_test_main_t * utm)
+{
+ session_t *connected_session;
+ vl_api_disconnect_session_t *dmp;
+ connected_session = pool_elt_at_index (utm->sessions,
+ utm->connected_session_index);
+ dmp = vl_msg_api_alloc (sizeof (*dmp));
+ memset (dmp, 0, sizeof (*dmp));
+ dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
+ dmp->client_index = utm->my_client_index;
+ dmp->handle = connected_session->vpp_session_handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & dmp);
+}
+
+int
+client_disconnect (uri_tcp_test_main_t * utm)
+{
+ client_send_disconnect (utm);
+ clib_warning ("Sent disconnect");
+ if (wait_for_state_change (utm, STATE_START))
+ {
+ clib_warning ("Disconnect failed");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+client_test (uri_tcp_test_main_t * utm)
+{
+ int i;
+
+ if (application_attach (utm))
+ return;
+
+ if (client_connect (utm))
+ {
+ application_detach (utm);
+ return;
+ }
+
+ /* Init test data */
+ vec_validate (utm->connect_test_data, 128 * 1024 - 1);
+ for (i = 0; i < vec_len (utm->connect_test_data); i++)
+ utm->connect_test_data[i] = i & 0xff;
+
+ /* Start send */
+ client_send_data (utm);
+
+ /* Disconnect */
+ client_disconnect (utm);
+
+ application_detach (utm);
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+
+ if (mp->retval)
+ {
+ clib_warning ("bind failed: %U", format_api_error,
+ clib_net_to_host_u32 (mp->retval));
+ utm->state = STATE_FAILED;
+ return;
+ }
+
+ utm->state = STATE_READY;
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+
+ if (mp->retval != 0)
+ clib_warning ("returned %d", ntohl (mp->retval));
+
+ utm->state = STATE_START;
+}
+
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ vl_api_accept_session_reply_t *rmp;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ session_t *session;
+ static f64 start_time;
+ u32 session_index;
+ u8 *ip_str;
+
+ if (start_time == 0.0)
+ start_time = clib_time_now (&utm->clib_time);
+
+ ip_str = format (0, "%U", format_ip46_address, &mp->ip, mp->is_ip4);
+ clib_warning ("Accepted session from: %s:%d", ip_str,
+ clib_net_to_host_u16 (mp->port));
+ utm->vpp_event_queue =
+ uword_to_pointer (mp->vpp_event_queue_address,
+ unix_shared_memory_queue_t *);
+
+ /* Allocate local session and set it up */
+ pool_get (utm->sessions, session);
+ session_index = session - utm->sessions;
+
+ rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
+ rx_fifo->client_session_index = session_index;
+ tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
+ tx_fifo->client_session_index = session_index;
+
+ session->server_rx_fifo = rx_fifo;
+ session->server_tx_fifo = tx_fifo;
+
+ /* Add it to lookup table */
+ hash_set (utm->session_index_by_vpp_handles, mp->handle, session_index);
+
+ utm->state = STATE_READY;
+
+ /* Stats printing */
+ if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0)
+ {
+ f64 now = clib_time_now (&utm->clib_time);
+ fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n",
+ pool_elts (utm->sessions), now - start_time,
+ (f64) pool_elts (utm->sessions) / (now - start_time));
+ }
+
+ /*
+ * Send accept reply to vpp
+ */
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+ session->bytes_received = 0;
+ session->start = clib_time_now (&utm->clib_time);
+}
+
+void
+server_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
+ session_fifo_event_t * e)
+{
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ int n_read;
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+ session_t *session;
+ int rv;
+ u32 max_dequeue, offset, max_transfer, rx_buf_len;
+
+ rx_buf_len = vec_len (utm->rx_buf);
+ rx_fifo = e->fifo;
+ session = &utm->sessions[rx_fifo->client_session_index];
+ tx_fifo = session->server_tx_fifo;
+
+ max_dequeue = svm_fifo_max_dequeue (rx_fifo);
+ /* Allow enqueuing of a new event */
+ svm_fifo_unset_event (rx_fifo);
+
+ if (PREDICT_FALSE (max_dequeue == 0))
+ {
+ return;
+ }
+
+ /* Read the max_dequeue */
+ do
+ {
+ max_transfer = clib_min (rx_buf_len, max_dequeue);
+ n_read = svm_fifo_dequeue_nowait (rx_fifo, max_transfer, utm->rx_buf);
+ if (n_read > 0)
+ {
+ max_dequeue -= n_read;
+ session->bytes_received += n_read;
+ }
+
+ /* Reflect if a non-drop session */
+ if (!utm->drop_packets && n_read > 0)
+ {
+ offset = 0;
+ do
+ {
+ rv = svm_fifo_enqueue_nowait (tx_fifo, n_read,
+ &utm->rx_buf[offset]);
+ if (rv > 0)
+ {
+ n_read -= rv;
+ offset += rv;
+ }
+ }
+ while ((rv <= 0 || n_read > 0) && !utm->time_to_stop);
+
+ /* If event wasn't set, add one */
+ if (svm_fifo_set_event (tx_fifo))
+ {
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = e->event_id;
+
+ q = utm->vpp_event_queue;
+ unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 1 /* do wait for mutex */ );
+ }
+ }
+ }
+ while ((n_read < 0 || max_dequeue > 0) && !utm->time_to_stop);
+}
+
+void
+server_handle_event_queue (uri_tcp_test_main_t * utm)
+{
+ session_fifo_event_t _e, *e = &_e;;
+
+ while (1)
+ {
+ unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+ 0 /* nowait */ );
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ server_handle_fifo_event_rx (utm, e);
+ break;
+
+ case FIFO_EVENT_DISCONNECT:
+ return;
+
+ default:
+ clib_warning ("unknown event type %d", e->event_type);
+ break;
+ }
+ if (PREDICT_FALSE (utm->time_to_stop == 1))
+ break;
+ if (PREDICT_FALSE (utm->time_to_print_stats == 1))
+ {
+ utm->time_to_print_stats = 0;
+ fformat (stdout, "%d connections\n", pool_elts (utm->sessions));
+ }
+ }
+}
+
+void
+server_send_listen (uri_tcp_test_main_t * utm)
+{
+ vl_api_bind_uri_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+int
+server_listen (uri_tcp_test_main_t * utm)
+{
+ server_send_listen (utm);
+ if (wait_for_state_change (utm, STATE_READY))
+ {
+ clib_warning ("timeout waiting for STATE_READY");
+ return -1;
+ }
+ return 0;
+}
+
+void
+server_send_unbind (uri_tcp_test_main_t * utm)
+{
+ vl_api_unbind_uri_t *ump;
+
+ ump = vl_msg_api_alloc (sizeof (*ump));
+ memset (ump, 0, sizeof (*ump));
+
+ ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+ ump->client_index = utm->my_client_index;
+ memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump);
+}
+
+int
+server_unbind (uri_tcp_test_main_t * utm)
+{
+ server_send_unbind (utm);
+ if (wait_for_state_change (utm, STATE_START))
+ {
+ clib_warning ("timeout waiting for STATE_START");
+ return -1;
+ }
+ return 0;
+}
+
+void
+server_test (uri_tcp_test_main_t * utm)
+{
+ if (application_attach (utm))
+ return;
+
+ /* Bind to uri */
+ if (server_listen (utm))
+ return;
+
+ /* Enter handle event loop */
+ server_handle_event_queue (utm);
+
+ /* Cleanup */
+ server_send_unbind (utm);
+
+ application_detach (utm);
+
+ fformat (stdout, "Test complete...\n");
+}
+
+static void
+vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
+ mp)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ session_t *session;
+
+ if (mp->retval)
+ {
+ clib_warning ("vpp complained about disconnect: %d",
+ ntohl (mp->retval));
+ }
+
+ utm->state = STATE_START;
+ session = pool_elt_at_index (utm->sessions, utm->connected_session_index);
+ if (session)
+ session_print_stats (utm, session);
+}
+
+#define foreach_uri_msg \
+_(BIND_URI_REPLY, bind_uri_reply) \
+_(UNBIND_URI_REPLY, unbind_uri_reply) \
+_(ACCEPT_SESSION, accept_session) \
+_(CONNECT_SESSION_REPLY, connect_session_reply) \
+_(DISCONNECT_SESSION, disconnect_session) \
+_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \
+_(RESET_SESSION, reset_session) \
+_(APPLICATION_ATTACH_REPLY, application_attach_reply) \
+_(APPLICATION_DETACH_REPLY, application_detach_reply) \
+_(MAP_ANOTHER_SEGMENT, map_another_segment) \
+
+void
+uri_api_hookup (uri_tcp_test_main_t * utm)
+{
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_uri_msg;
+#undef _
+}
+
+int
+main (int argc, char **argv)
+{
+ uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ unformat_input_t _argv, *a = &_argv;
+ u8 *chroot_prefix;
+ u8 *heap, *uri = 0;
+ u8 *bind_uri = (u8 *) "tcp://0.0.0.0/1234";
+ u8 *connect_uri = (u8 *) "tcp://6.0.1.2/1234";
+ u64 bytes_to_send = 64 << 10, mbytes;
+ u32 tmp;
+ mheap_t *h;
+ session_t *session;
+ int i;
+ int i_am_master = 1, drop_packets = 0, test_return_packets = 0;
+
+ clib_mem_init (0, 256 << 20);
+
+ heap = clib_mem_get_per_cpu_heap ();
+ h = mheap_header (heap);
+
+ /* make the main heap thread-safe */
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+ vec_validate (utm->rx_buf, 128 << 10);
+
+ utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
+
+ utm->my_pid = getpid ();
+ utm->configured_segment_size = 1 << 20;
+
+ clib_time_init (&utm->clib_time);
+ init_error_string_table (utm);
+ svm_fifo_segment_init (0x200000000ULL, 20);
+ unformat_init_command_line (a, argv);
+
+ while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (a, "chroot prefix %s", &chroot_prefix))
+ {
+ vl_set_memory_root_path ((char *) chroot_prefix);
+ }
+ else if (unformat (a, "uri %s", &uri))
+ ;
+ else if (unformat (a, "segment-size %dM", &tmp))
+ utm->configured_segment_size = tmp << 20;
+ else if (unformat (a, "segment-size %dG", &tmp))
+ utm->configured_segment_size = tmp << 30;
+ else if (unformat (a, "master"))
+ i_am_master = 1;
+ else if (unformat (a, "slave"))
+ i_am_master = 0;
+ else if (unformat (a, "drop"))
+ drop_packets = 1;
+ else if (unformat (a, "test"))
+ test_return_packets = 1;
+ else if (unformat (a, "mbytes %lld", &mbytes))
+ {
+ bytes_to_send = mbytes << 20;
+ }
+ else if (unformat (a, "gbytes %lld", &mbytes))
+ {
+ bytes_to_send = mbytes << 30;
+ }
+ else
+ {
+ fformat (stderr, "%s: usage [master|slave]\n");
+ exit (1);
+ }
+ }
+
+ if (uri)
+ {
+ utm->uri = format (0, "%s%c", uri, 0);
+ utm->connect_uri = format (0, "%s%c", uri, 0);
+ }
+ else
+ {
+ utm->uri = format (0, "%s%c", bind_uri, 0);
+ utm->connect_uri = format (0, "%s%c", connect_uri, 0);
+ }
+
+ utm->i_am_master = i_am_master;
+ utm->segment_main = &svm_fifo_segment_main;
+ utm->drop_packets = drop_packets;
+ utm->test_return_packets = test_return_packets;
+ utm->bytes_to_send = bytes_to_send;
+ utm->time_to_stop = 0;
+
+ setup_signal_handlers ();
+ uri_api_hookup (utm);
+
+ if (connect_to_vpp (i_am_master ? "uri_tcp_server" : "uri_tcp_client") < 0)
+ {
+ svm_region_exit ();
+ fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+ exit (1);
+ }
+
+ if (i_am_master == 0)
+ {
+ client_test (utm);
+ vl_client_disconnect_from_vlib ();
+ exit (0);
+ }
+
+ /* $$$$ hack preallocation */
+ for (i = 0; i < 200000; i++)
+ {
+ pool_get (utm->sessions, session);
+ memset (session, 0, sizeof (*session));
+ }
+ for (i = 0; i < 200000; i++)
+ pool_put_index (utm->sessions, i);
+
+ server_test (utm);
+
+ vl_client_disconnect_from_vlib ();
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c
new file mode 100644
index 00000000..f50ee688
--- /dev/null
+++ b/src/uri/uri_udp_test.c
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/api/vpe_msg_enum.h>
+#include <svm/svm_fifo_segment.h>
+#include <pthread.h>
+#include <vnet/session/application_interface.h>
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+typedef enum
+{
+ STATE_START,
+ STATE_READY,
+ STATE_FAILED,
+ STATE_DISCONNECTING,
+} connection_state_t;
+
+typedef struct
+{
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+} session_t;
+
+typedef struct
+{
+ /* vpe input queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* API client handle */
+ u32 my_client_index;
+
+ /* The URI we're playing with */
+ u8 *uri;
+
+ /* Session pool */
+ session_t *sessions;
+
+ /* Hash table for disconnect processing */
+ uword *session_index_by_vpp_handles;
+
+ /* fifo segment */
+ svm_fifo_segment_private_t *seg;
+
+ /* intermediate rx buffer */
+ u8 *rx_buf;
+
+ /* URI for connect */
+ u8 *connect_uri;
+
+ int i_am_master;
+
+ /* Our event queue */
+ unix_shared_memory_queue_t *our_event_queue;
+
+ /* $$$ single thread only for the moment */
+ unix_shared_memory_queue_t *vpp_event_queue;
+
+ /* $$$$ hack: cut-through session index */
+ volatile u32 cut_through_session_index;
+
+ /* unique segment name counter */
+ u32 unique_segment_index;
+
+ pid_t my_pid;
+
+ /* pthread handle */
+ pthread_t cut_through_thread_handle;
+
+ /* For deadman timers */
+ clib_time_t clib_time;
+
+ /* State of the connection, shared between msg RX thread and main thread */
+ volatile connection_state_t state;
+
+ volatile int time_to_stop;
+ volatile int time_to_print_stats;
+
+ u32 configured_segment_size;
+
+ /* VNET_API_ERROR_FOO -> "Foo" hash table */
+ uword *error_string_by_error_number;
+
+ /* convenience */
+ svm_fifo_segment_main_t *segment_main;
+
+} uri_udp_test_main_t;
+
+#if CLIB_DEBUG > 0
+#define NITER 10000
+#else
+#define NITER 4000000
+#endif
+
+uri_udp_test_main_t uri_udp_test_main;
+
+static void
+stop_signal (int signum)
+{
+ uri_udp_test_main_t *um = &uri_udp_test_main;
+
+ um->time_to_stop = 1;
+}
+
+static void
+stats_signal (int signum)
+{
+ uri_udp_test_main_t *um = &uri_udp_test_main;
+
+ um->time_to_print_stats = 1;
+}
+
+static clib_error_t *
+setup_signal_handlers (void)
+{
+ signal (SIGINT, stats_signal);
+ signal (SIGQUIT, stop_signal);
+ signal (SIGTERM, stop_signal);
+
+ return 0;
+}
+
+void
+application_send_attach (uri_udp_test_main_t * utm)
+{
+ vl_api_application_attach_t *bmp;
+ u32 fifo_size = 3 << 20;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_ATTACH);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ bmp->options[APP_OPTIONS_FLAGS] =
+ APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+ bmp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
+ bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size;
+ bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size;
+ bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
+ bmp->options[SESSION_OPTIONS_SEGMENT_SIZE] = 256 << 20;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+void
+application_detach (uri_udp_test_main_t * utm)
+{
+ vl_api_application_detach_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_DETACH);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+static void
+vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
+ mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ int rv;
+
+ if (mp->retval)
+ {
+ clib_warning ("attach failed: %d", mp->retval);
+ utm->state = STATE_FAILED;
+ return;
+ }
+
+ if (mp->segment_name_length == 0)
+ {
+ clib_warning ("segment_name_length zero");
+ return;
+ }
+
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+
+ ASSERT (mp->app_event_queue_address);
+
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ if (rv)
+ {
+ clib_warning ("svm_fifo_segment_attach ('%s') failed",
+ mp->segment_name);
+ return;
+ }
+
+ utm->our_event_queue =
+ uword_to_pointer (mp->app_event_queue_address,
+ unix_shared_memory_queue_t *);
+}
+
+static void
+vl_api_application_detach_reply_t_handler (vl_api_application_detach_reply_t *
+ mp)
+{
+ if (mp->retval)
+ clib_warning ("detach returned with err: %d", mp->retval);
+}
+
+u8 *
+format_api_error (u8 * s, va_list * args)
+{
+ uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *);
+ i32 error = va_arg (*args, u32);
+ uword *p;
+
+ p = hash_get (utm->error_string_by_error_number, -error);
+
+ if (p)
+ s = format (s, "%s", p[0]);
+ else
+ s = format (s, "%d", error);
+ return s;
+}
+
+int
+wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state)
+{
+#if CLIB_DEBUG > 0
+#define TIMEOUT 600.0
+#else
+#define TIMEOUT 600.0
+#endif
+
+ f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT;
+
+ while (clib_time_now (&utm->clib_time) < timeout)
+ {
+ if (utm->state == state)
+ return 0;
+ }
+ return -1;
+}
+
+u64 server_bytes_received, server_bytes_sent;
+
+static void *
+cut_through_thread_fn (void *arg)
+{
+ session_t *s;
+ svm_fifo_t *rx_fifo;
+ svm_fifo_t *tx_fifo;
+ u8 *my_copy_buffer = 0;
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ i32 actual_transfer;
+ int rv;
+ u32 buffer_offset;
+
+ while (utm->cut_through_session_index == ~0)
+ ;
+
+ s = pool_elt_at_index (utm->sessions, utm->cut_through_session_index);
+
+ rx_fifo = s->server_rx_fifo;
+ tx_fifo = s->server_tx_fifo;
+
+ vec_validate (my_copy_buffer, 64 * 1024 - 1);
+
+ while (true)
+ {
+ /* We read from the tx fifo and write to the rx fifo */
+ do
+ {
+ actual_transfer = svm_fifo_dequeue_nowait (tx_fifo,
+ vec_len (my_copy_buffer),
+ my_copy_buffer);
+ }
+ while (actual_transfer <= 0);
+
+ server_bytes_received += actual_transfer;
+
+ buffer_offset = 0;
+ while (actual_transfer > 0)
+ {
+ rv = svm_fifo_enqueue_nowait (rx_fifo, actual_transfer,
+ my_copy_buffer + buffer_offset);
+ if (rv > 0)
+ {
+ actual_transfer -= rv;
+ buffer_offset += rv;
+ server_bytes_sent += rv;
+ }
+
+ }
+ if (PREDICT_FALSE (utm->time_to_stop))
+ break;
+ }
+
+ pthread_exit (0);
+}
+
+static void
+udp_client_connect (uri_udp_test_main_t * utm)
+{
+ vl_api_connect_uri_t *cmp;
+ cmp = vl_msg_api_alloc (sizeof (*cmp));
+ memset (cmp, 0, sizeof (*cmp));
+
+ cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
+ cmp->client_index = utm->my_client_index;
+ cmp->context = ntohl (0xfeedface);
+ memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp);
+}
+
+static void
+client_send (uri_udp_test_main_t * utm, session_t * session)
+{
+ int i;
+ u8 *test_data = 0;
+ u64 bytes_received = 0, bytes_sent = 0;
+ i32 bytes_to_read;
+ int rv;
+ f64 before, after, delta, bytes_per_second;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ int buffer_offset, bytes_to_send = 0;
+
+ /*
+ * Prepare test data
+ */
+ vec_validate (test_data, 64 * 1024 - 1);
+ for (i = 0; i < vec_len (test_data); i++)
+ test_data[i] = i & 0xff;
+
+ rx_fifo = session->server_rx_fifo;
+ tx_fifo = session->server_tx_fifo;
+
+ before = clib_time_now (&utm->clib_time);
+
+ vec_validate (utm->rx_buf, vec_len (test_data) - 1);
+
+ for (i = 0; i < NITER; i++)
+ {
+ bytes_to_send = vec_len (test_data);
+ buffer_offset = 0;
+ while (bytes_to_send > 0)
+ {
+ rv = svm_fifo_enqueue_nowait (tx_fifo, bytes_to_send,
+ test_data + buffer_offset);
+
+ if (rv > 0)
+ {
+ bytes_to_send -= rv;
+ buffer_offset += rv;
+ bytes_sent += rv;
+ }
+ }
+
+ bytes_to_read = svm_fifo_max_dequeue (rx_fifo);
+
+ bytes_to_read = vec_len (utm->rx_buf) > bytes_to_read ?
+ bytes_to_read : vec_len (utm->rx_buf);
+
+ buffer_offset = 0;
+ while (bytes_to_read > 0)
+ {
+ rv = svm_fifo_dequeue_nowait (rx_fifo,
+ bytes_to_read,
+ utm->rx_buf + buffer_offset);
+ if (rv > 0)
+ {
+ bytes_to_read -= rv;
+ buffer_offset += rv;
+ bytes_received += rv;
+ }
+ }
+ }
+ while (bytes_received < bytes_sent)
+ {
+ rv =
+ svm_fifo_dequeue_nowait (rx_fifo, vec_len (utm->rx_buf), utm->rx_buf);
+ if (rv > 0)
+ {
+#if CLIB_DEBUG > 0
+ int j;
+ for (j = 0; j < rv; j++)
+ {
+ if (utm->rx_buf[j] != ((bytes_received + j) & 0xff))
+ {
+ clib_warning ("error at byte %lld, 0x%x not 0x%x",
+ bytes_received + j,
+ utm->rx_buf[j],
+ ((bytes_received + j) & 0xff));
+ }
+ }
+#endif
+ bytes_received += (u64) rv;
+ }
+ }
+
+ after = clib_time_now (&utm->clib_time);
+ delta = after - before;
+ bytes_per_second = 0.0;
+
+ if (delta > 0.0)
+ bytes_per_second = (f64) bytes_received / delta;
+
+ fformat (stdout,
+ "Done: %lld recv bytes in %.2f seconds, %.2f bytes/sec...\n\n",
+ bytes_received, delta, bytes_per_second);
+ fformat (stdout,
+ "Done: %lld sent bytes in %.2f seconds, %.2f bytes/sec...\n\n",
+ bytes_sent, delta, bytes_per_second);
+ fformat (stdout,
+ "client -> server -> client round trip: %.2f Gbit/sec \n\n",
+ (bytes_per_second * 8.0) / 1e9);
+}
+
+static void
+uri_udp_client_test (uri_udp_test_main_t * utm)
+{
+ session_t *session;
+
+ application_send_attach (utm);
+ udp_client_connect (utm);
+
+ if (wait_for_state_change (utm, STATE_READY))
+ {
+ clib_warning ("timeout waiting for STATE_READY");
+ return;
+ }
+
+ /* Only works with cut through sessions */
+ session = pool_elt_at_index (utm->sessions, utm->cut_through_session_index);
+
+ client_send (utm, session);
+ application_detach (utm);
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+
+ if (mp->retval)
+ {
+ clib_warning ("bind failed: %d", mp->retval);
+ utm->state = STATE_FAILED;
+ return;
+ }
+
+ utm->state = STATE_READY;
+}
+
+static void
+vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
+{
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ int rv;
+
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ if (rv)
+ {
+ clib_warning ("svm_fifo_segment_attach ('%s') failed",
+ mp->segment_name);
+ return;
+ }
+ clib_warning ("Mapped new segment '%s' size %d", mp->segment_name,
+ mp->segment_size);
+}
+
+/**
+ * Acting as server for redirected connect requests
+ */
+static void
+vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
+{
+ u32 segment_index;
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ svm_fifo_segment_private_t *seg;
+ unix_shared_memory_queue_t *client_q;
+ vl_api_connect_session_reply_t *rmp;
+ session_t *session = 0;
+ int rv = 0;
+
+ /* Create the segment */
+ a->segment_name = (char *) format (0, "%d:segment%d%c", utm->my_pid,
+ utm->unique_segment_index++, 0);
+ a->segment_size = utm->configured_segment_size;
+
+ rv = svm_fifo_segment_create (a);
+ if (rv)
+ {
+ clib_warning ("sm_fifo_segment_create ('%s') failed", a->segment_name);
+ rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+ goto send_reply;
+ }
+
+ vec_add2 (utm->seg, seg, 1);
+
+ segment_index = vec_len (sm->segments) - 1;
+ memcpy (seg, sm->segments + segment_index, sizeof (utm->seg[0]));
+
+ pool_get (utm->sessions, session);
+
+ session->server_rx_fifo = svm_fifo_segment_alloc_fifo
+ (utm->seg, 128 * 1024, FIFO_SEGMENT_RX_FREELIST);
+ ASSERT (session->server_rx_fifo);
+
+ session->server_tx_fifo = svm_fifo_segment_alloc_fifo
+ (utm->seg, 128 * 1024, FIFO_SEGMENT_TX_FREELIST);
+ ASSERT (session->server_tx_fifo);
+
+ session->server_rx_fifo->master_session_index = session - utm->sessions;
+ session->server_tx_fifo->master_session_index = session - utm->sessions;
+ utm->cut_through_session_index = session - utm->sessions;
+
+ rv = pthread_create (&utm->cut_through_thread_handle,
+ NULL /*attr */ , cut_through_thread_fn, 0);
+ if (rv)
+ {
+ clib_warning ("pthread_create returned %d", rv);
+ rv = VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+
+send_reply:
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+
+ rmp->_vl_msg_id = ntohs (VL_API_CONNECT_SESSION_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->segment_name_length = vec_len (a->segment_name);
+ if (session)
+ {
+ rmp->server_rx_fifo = pointer_to_uword (session->server_rx_fifo);
+ rmp->server_tx_fifo = pointer_to_uword (session->server_tx_fifo);
+ }
+
+ memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name));
+
+ vec_free (a->segment_name);
+
+ client_q =
+ uword_to_pointer (mp->client_queue_address, unix_shared_memory_queue_t *);
+ vl_msg_api_send_shmem (client_q, (u8 *) & rmp);
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+
+ if (mp->retval != 0)
+ clib_warning ("returned %d", ntohl (mp->retval));
+
+ utm->state = STATE_START;
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ vl_api_accept_session_reply_t *rmp;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ session_t *session;
+ static f64 start_time;
+
+ if (start_time == 0.0)
+ start_time = clib_time_now (&utm->clib_time);
+
+ utm->vpp_event_queue =
+ uword_to_pointer (mp->vpp_event_queue_address,
+ unix_shared_memory_queue_t *);
+
+ pool_get (utm->sessions, session);
+
+ rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
+ rx_fifo->client_session_index = session - utm->sessions;
+ tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
+ tx_fifo->client_session_index = session - utm->sessions;
+
+ session->server_rx_fifo = rx_fifo;
+ session->server_tx_fifo = tx_fifo;
+
+ hash_set (utm->session_index_by_vpp_handles, mp->handle,
+ session - utm->sessions);
+
+ utm->state = STATE_READY;
+
+ if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0)
+ {
+ f64 now = clib_time_now (&utm->clib_time);
+ fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n",
+ pool_elts (utm->sessions), now - start_time,
+ (f64) pool_elts (utm->sessions) / (now - start_time));
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ session_t *session;
+ vl_api_disconnect_session_reply_t *rmp;
+ uword *p;
+ int rv = 0;
+
+ p = hash_get (utm->session_index_by_vpp_handles, mp->handle);
+
+ if (p)
+ {
+ session = pool_elt_at_index (utm->sessions, p[0]);
+ hash_unset (utm->session_index_by_vpp_handles, mp->handle);
+ pool_put (utm->sessions, session);
+ }
+ else
+ {
+ clib_warning ("couldn't find session key %llx", mp->handle);
+ rv = -11;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+ rmp->retval = rv;
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_connect_session_reply_t_handler (vl_api_connect_session_reply_t * mp)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+
+ ASSERT (utm->i_am_master == 0);
+
+ /* We've been redirected */
+ if (mp->segment_name_length > 0)
+ {
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ svm_fifo_segment_create_args_t _a, *a = &_a;
+ u32 segment_index;
+ session_t *session;
+ svm_fifo_segment_private_t *seg;
+ int rv;
+
+ memset (a, 0, sizeof (*a));
+ a->segment_name = (char *) mp->segment_name;
+
+ sleep (1);
+
+ rv = svm_fifo_segment_attach (a);
+ if (rv)
+ {
+ clib_warning ("sm_fifo_segment_create ('%v') failed",
+ mp->segment_name);
+ return;
+ }
+
+ segment_index = a->new_segment_indices[0];
+ vec_add2 (utm->seg, seg, 1);
+ memcpy (seg, sm->segments + segment_index, sizeof (*seg));
+ sleep (1);
+
+ pool_get (utm->sessions, session);
+ utm->cut_through_session_index = session - utm->sessions;
+
+ session->server_rx_fifo = uword_to_pointer (mp->server_rx_fifo,
+ svm_fifo_t *);
+ ASSERT (session->server_rx_fifo);
+ session->server_tx_fifo = uword_to_pointer (mp->server_tx_fifo,
+ svm_fifo_t *);
+ ASSERT (session->server_tx_fifo);
+ }
+
+ /* security: could unlink /dev/shm/<mp->segment_name> here, maybe */
+
+ utm->state = STATE_READY;
+}
+
+#define foreach_uri_msg \
+_(BIND_URI_REPLY, bind_uri_reply) \
+_(CONNECT_URI, connect_uri) \
+_(CONNECT_SESSION_REPLY, connect_session_reply) \
+_(UNBIND_URI_REPLY, unbind_uri_reply) \
+_(ACCEPT_SESSION, accept_session) \
+_(DISCONNECT_SESSION, disconnect_session) \
+_(MAP_ANOTHER_SEGMENT, map_another_segment) \
+_(APPLICATION_ATTACH_REPLY, application_attach_reply) \
+_(APPLICATION_DETACH_REPLY, application_detach_reply) \
+
+void
+uri_api_hookup (uri_udp_test_main_t * utm)
+{
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_uri_msg;
+#undef _
+
+}
+
+int
+connect_to_vpp (char *name)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ api_main_t *am = &api_main;
+
+ if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+ return -1;
+
+ utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ utm->my_client_index = am->my_client_index;
+
+ return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("BUG");
+}
+
+static void
+init_error_string_table (uri_udp_test_main_t * utm)
+{
+ utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+ foreach_vnet_api_error;
+#undef _
+
+ hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+void
+server_handle_fifo_event_rx (uri_udp_test_main_t * utm,
+ session_fifo_event_t * e)
+{
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ int nbytes;
+
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+ int rv;
+
+ rx_fifo = e->fifo;
+ tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+
+ do
+ {
+ nbytes = svm_fifo_dequeue_nowait (rx_fifo, vec_len (utm->rx_buf),
+ utm->rx_buf);
+ }
+ while (nbytes <= 0);
+ do
+ {
+ rv = svm_fifo_enqueue_nowait (tx_fifo, nbytes, utm->rx_buf);
+ }
+ while (rv == -2);
+
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = e->event_id;
+
+ if (svm_fifo_set_event (tx_fifo))
+ {
+ q = utm->vpp_event_queue;
+ unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+}
+
+void
+server_handle_event_queue (uri_udp_test_main_t * utm)
+{
+ session_fifo_event_t _e, *e = &_e;
+
+ while (1)
+ {
+ unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+ 0 /* nowait */ );
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ server_handle_fifo_event_rx (utm, e);
+ break;
+
+ case FIFO_EVENT_DISCONNECT:
+ return;
+
+ default:
+ clib_warning ("unknown event type %d", e->event_type);
+ break;
+ }
+ if (PREDICT_FALSE (utm->time_to_stop == 1))
+ break;
+ if (PREDICT_FALSE (utm->time_to_print_stats == 1))
+ {
+ utm->time_to_print_stats = 0;
+ fformat (stdout, "%d connections\n", pool_elts (utm->sessions));
+ }
+ }
+}
+
+static void
+server_unbind (uri_udp_test_main_t * utm)
+{
+ vl_api_unbind_uri_t *ump;
+
+ ump = vl_msg_api_alloc (sizeof (*ump));
+ memset (ump, 0, sizeof (*ump));
+
+ ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+ ump->client_index = utm->my_client_index;
+ memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump);
+}
+
+static void
+server_listen (uri_udp_test_main_t * utm)
+{
+ vl_api_bind_uri_t *bmp;
+
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+ bmp->client_index = utm->my_client_index;
+ bmp->context = ntohl (0xfeedface);
+ memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+ vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+}
+
+void
+udp_server_test (uri_udp_test_main_t * utm)
+{
+
+ application_send_attach (utm);
+
+ /* Bind to uri */
+ server_listen (utm);
+
+ if (wait_for_state_change (utm, STATE_READY))
+ {
+ clib_warning ("timeout waiting for STATE_READY");
+ return;
+ }
+
+ server_handle_event_queue (utm);
+
+ /* Cleanup */
+ server_unbind (utm);
+
+ if (wait_for_state_change (utm, STATE_START))
+ {
+ clib_warning ("timeout waiting for STATE_START");
+ return;
+ }
+
+ application_detach (utm);
+
+ fformat (stdout, "Test complete...\n");
+}
+
+int
+main (int argc, char **argv)
+{
+ uri_udp_test_main_t *utm = &uri_udp_test_main;
+ unformat_input_t _argv, *a = &_argv;
+ u8 *chroot_prefix;
+ u8 *heap;
+ u8 *bind_name = (u8 *) "udp://0.0.0.0/1234";
+ u32 tmp;
+ mheap_t *h;
+ session_t *session;
+ int i;
+ int i_am_master = 1;
+
+ clib_mem_init (0, 256 << 20);
+
+ heap = clib_mem_get_per_cpu_heap ();
+ h = mheap_header (heap);
+
+ /* make the main heap thread-safe */
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+ vec_validate (utm->rx_buf, 8192);
+
+ utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
+
+ utm->my_pid = getpid ();
+ utm->configured_segment_size = 1 << 20;
+
+ clib_time_init (&utm->clib_time);
+ init_error_string_table (utm);
+ svm_fifo_segment_init (0x200000000ULL, 20);
+ unformat_init_command_line (a, argv);
+
+ while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (a, "chroot prefix %s", &chroot_prefix))
+ {
+ vl_set_memory_root_path ((char *) chroot_prefix);
+ }
+ else if (unformat (a, "uri %s", &bind_name))
+ ;
+ else if (unformat (a, "segment-size %dM", &tmp))
+ utm->configured_segment_size = tmp << 20;
+ else if (unformat (a, "segment-size %dG", &tmp))
+ utm->configured_segment_size = tmp << 30;
+ else if (unformat (a, "master"))
+ i_am_master = 1;
+ else if (unformat (a, "slave"))
+ i_am_master = 0;
+ else
+ {
+ fformat (stderr, "%s: usage [master|slave]\n");
+ exit (1);
+ }
+ }
+
+ utm->cut_through_session_index = ~0;
+ utm->uri = format (0, "%s%c", bind_name, 0);
+ utm->i_am_master = i_am_master;
+ utm->segment_main = &svm_fifo_segment_main;
+
+ utm->connect_uri = format (0, "udp://6.0.0.1/1234%c", 0);
+
+ setup_signal_handlers ();
+
+ uri_api_hookup (utm);
+
+ if (connect_to_vpp (i_am_master ? "uri_udp_master" : "uri_udp_slave") < 0)
+ {
+ svm_region_exit ();
+ fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+ exit (1);
+ }
+
+ if (i_am_master == 0)
+ {
+ uri_udp_client_test (utm);
+ exit (0);
+ }
+
+ /* $$$$ hack preallocation */
+ for (i = 0; i < 200000; i++)
+ {
+ pool_get (utm->sessions, session);
+ memset (session, 0, sizeof (*session));
+ }
+ for (i = 0; i < 200000; i++)
+ pool_put_index (utm->sessions, i);
+
+ udp_server_test (utm);
+
+ vl_client_disconnect_from_vlib ();
+ exit (0);
+}
+
+#undef vl_api_version
+#define vl_api_version(n,v) static u32 vpe_api_version = v;
+#include <vpp/api/vpe.api.h>
+#undef vl_api_version
+
+void
+vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
+{
+ /*
+ * Send the main API signature in slot 0. This bit of code must
+ * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+ */
+ mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
+}
+
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vcl_test_client.c b/src/uri/vcl_test_client.c
new file mode 100644
index 00000000..7ab8824f
--- /dev/null
+++ b/src/uri/vcl_test_client.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define VCL_TEST
+
+#include <uri/vppcom.h>
+#include <uri/sock_test_client.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vcl_test_server.c b/src/uri/vcl_test_server.c
new file mode 100644
index 00000000..660d705e
--- /dev/null
+++ b/src/uri/vcl_test_server.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define VCL_TEST
+
+#include <uri/vppcom.h>
+#include <uri/sock_test_server.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vppcom.c b/src/uri/vppcom.c
new file mode 100644
index 00000000..c7ae0ea5
--- /dev/null
+++ b/src/uri/vppcom.c
@@ -0,0 +1,2440 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <svm/svm_fifo_segment.h>
+#include <vlibmemory/api.h>
+#include <vpp/api/vpe_msg_enum.h>
+#include <vnet/session/application_interface.h>
+#include <uri/vppcom.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/vec_bootstrap.h>
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+#if (CLIB_DEBUG > 0)
+/* Set VPPCOM_DEBUG 2 for connection debug, 3 for read/write debug output */
+#define VPPCOM_DEBUG 1
+#else
+#define VPPCOM_DEBUG 0
+#endif
+
+/*
+ * VPPCOM Private definitions and functions.
+ */
+typedef enum
+{
+ STATE_APP_START,
+ STATE_APP_CONN_VPP,
+ STATE_APP_ENABLED,
+ STATE_APP_ATTACHED,
+} app_state_t;
+
+typedef enum
+{
+ STATE_START,
+ STATE_CONNECT,
+ STATE_LISTEN,
+ STATE_ACCEPT,
+ STATE_DISCONNECT,
+ STATE_FAILED
+} session_state_t;
+
+typedef struct
+{
+ volatile session_state_t state;
+
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+ u32 sm_seg_index;
+ u64 vpp_session_handle;
+ unix_shared_memory_queue_t *vpp_event_queue;
+
+ /* Socket configuration state */
+ u8 is_server;
+ u8 is_listen;
+ u8 is_cut_thru;
+ u8 is_nonblocking;
+ u32 vrf;
+ u8 is_ip4;
+ u8 ip[16];
+ u16 port;
+ u8 proto;
+ u64 client_queue_address;
+ u64 options[16];
+} session_t;
+
+typedef struct vppcom_cfg_t_
+{
+ u64 heapsize;
+ u64 segment_baseva;
+ u32 segment_size;
+ u32 add_segment_size;
+ u32 preallocated_fifo_pairs;
+ u32 rx_fifo_size;
+ u32 tx_fifo_size;
+ u32 event_queue_size;
+ u32 listen_queue_size;
+ f64 app_timeout;
+ f64 session_timeout;
+ f64 accept_timeout;
+} vppcom_cfg_t;
+
+typedef struct vppcom_main_t_
+{
+ u8 init;
+ u32 *client_session_index_fifo;
+ volatile u32 bind_session_index;
+ u32 tx_event_id;
+ int main_cpu;
+
+ /* vpe input queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* API client handle */
+ u32 my_client_index;
+
+ /* Session pool */
+ clib_spinlock_t sessions_lockp;
+ session_t *sessions;
+
+ /* Hash table for disconnect processing */
+ uword *session_index_by_vpp_handles;
+
+ /* Select bitmaps */
+ clib_bitmap_t *rd_bitmap;
+ clib_bitmap_t *wr_bitmap;
+ clib_bitmap_t *ex_bitmap;
+
+ /* Our event queue */
+ unix_shared_memory_queue_t *app_event_queue;
+
+ /* unique segment name counter */
+ u32 unique_segment_index;
+
+ pid_t my_pid;
+
+ /* For deadman timers */
+ clib_time_t clib_time;
+
+ /* State of the connection, shared between msg RX thread and main thread */
+ volatile app_state_t app_state;
+
+ vppcom_cfg_t cfg;
+
+ /* VNET_API_ERROR_FOO -> "Foo" hash table */
+ uword *error_string_by_error_number;
+} vppcom_main_t;
+
+vppcom_main_t vppcom_main = {.my_client_index = ~0 };
+
+static const char *
+vppcom_app_state_str (app_state_t state)
+{
+ char *st;
+
+ switch (state)
+ {
+ case STATE_APP_START:
+ st = "STATE_APP_START";
+ break;
+
+ case STATE_APP_CONN_VPP:
+ st = "STATE_APP_CONN_VPP";
+ break;
+
+ case STATE_APP_ENABLED:
+ st = "STATE_APP_ENABLED";
+ break;
+
+ case STATE_APP_ATTACHED:
+ st = "STATE_APP_ATTACHED";
+ break;
+
+ default:
+ st = "UNKNOWN_APP_STATE";
+ break;
+ }
+
+ return st;
+}
+
+static const char *
+vppcom_session_state_str (session_state_t state)
+{
+ char *st;
+
+ switch (state)
+ {
+ case STATE_START:
+ st = "STATE_START";
+ break;
+
+ case STATE_CONNECT:
+ st = "STATE_CONNECT";
+ break;
+
+ case STATE_LISTEN:
+ st = "STATE_LISTEN";
+ break;
+
+ case STATE_ACCEPT:
+ st = "STATE_ACCEPT";
+ break;
+
+ case STATE_DISCONNECT:
+ st = "STATE_DISCONNECT";
+ break;
+
+ case STATE_FAILED:
+ st = "STATE_FAILED";
+ break;
+
+ default:
+ st = "UNKNOWN_STATE";
+ break;
+ }
+
+ return st;
+}
+
+/*
+ * VPPCOM Utility Functions
+ */
+static inline int
+vppcom_session_at_index (u32 session_index, session_t * volatile *sess)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+
+ /* Assumes that caller has acquired spinlock: vcm->sessions_lockp */
+ if (PREDICT_FALSE ((session_index == ~0) ||
+ pool_is_free_index (vcm->sessions, session_index)))
+ {
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return VPPCOM_EBADFD;
+ }
+ *sess = pool_elt_at_index (vcm->sessions, session_index);
+ return VPPCOM_OK;
+}
+
+static int
+vppcom_connect_to_vpp (char *app_name)
+{
+ api_main_t *am = &api_main;
+ vppcom_main_t *vcm = &vppcom_main;
+
+ if (VPPCOM_DEBUG > 0)
+ printf ("\nConnecting to VPP api...");
+ if (vl_client_connect_to_vlib ("/vpe-api", app_name, 32) < 0)
+ {
+ clib_warning ("[%d] connect to vpp (%s) failed!",
+ vcm->my_pid, app_name);
+ return VPPCOM_ECONNREFUSED;
+ }
+
+ vcm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ vcm->my_client_index = am->my_client_index;
+ if (VPPCOM_DEBUG > 0)
+ printf (" connected!\n");
+
+ vcm->app_state = STATE_APP_CONN_VPP;
+ return VPPCOM_OK;
+}
+
+static u8 *
+format_api_error (u8 * s, va_list * args)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ i32 error = va_arg (*args, u32);
+ uword *p;
+
+ p = hash_get (vcm->error_string_by_error_number, -error);
+
+ if (p)
+ s = format (s, "%s (%d)", p[0], error);
+ else
+ s = format (s, "%d", error);
+ return s;
+}
+
+static void
+vppcom_init_error_string_table (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+
+ vcm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (vcm->error_string_by_error_number, -v, s);
+ foreach_vnet_api_error;
+#undef _
+
+ hash_set (vcm->error_string_by_error_number, 99, "Misc");
+}
+
+static inline int
+vppcom_wait_for_app_state_change (app_state_t app_state)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ f64 timeout = clib_time_now (&vcm->clib_time) + vcm->cfg.app_timeout;
+
+ while (clib_time_now (&vcm->clib_time) < timeout)
+ {
+ if (vcm->app_state == app_state)
+ return VPPCOM_OK;
+ }
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] timeout waiting for state %s (%d)", vcm->my_pid,
+ vppcom_app_state_str (app_state), app_state);
+ return VPPCOM_ETIMEDOUT;
+}
+
+static inline int
+vppcom_wait_for_session_state_change (u32 session_index,
+ session_state_t state,
+ f64 wait_for_time)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time;
+ session_t *volatile session;
+ int rv;
+
+ do
+ {
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ return rv;
+ }
+ if (session->state == state)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ return VPPCOM_OK;
+ }
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ }
+ while (clib_time_now (&vcm->clib_time) < timeout);
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] timeout waiting for state %s (%d)", vcm->my_pid,
+ vppcom_session_state_str (state), state);
+ return VPPCOM_ETIMEDOUT;
+}
+
+static inline int
+vppcom_wait_for_client_session_index (f64 wait_for_time)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time;
+
+ do
+ {
+ if (clib_fifo_elts (vcm->client_session_index_fifo))
+ return VPPCOM_OK;
+ }
+ while (clib_time_now (&vcm->clib_time) < timeout);
+
+ if (wait_for_time == 0)
+ return VPPCOM_EAGAIN;
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] timeout waiting for client_session_index",
+ vcm->my_pid);
+ return VPPCOM_ETIMEDOUT;
+}
+
+/*
+ * VPP-API message functions
+ */
+static void
+vppcom_send_session_enable_disable (u8 is_enable)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_session_enable_disable_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_SESSION_ENABLE_DISABLE);
+ bmp->client_index = vcm->my_client_index;
+ bmp->context = htonl (0xfeedface);
+ bmp->is_enable = is_enable;
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp);
+}
+
+static int
+vppcom_app_session_enable (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ int rv;
+
+ if (vcm->app_state != STATE_APP_ENABLED)
+ {
+ vppcom_send_session_enable_disable (1 /* is_enabled == TRUE */ );
+ rv = vppcom_wait_for_app_state_change (STATE_APP_ENABLED);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] Session enable timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+ }
+ return VPPCOM_OK;
+}
+
+static void
+ vl_api_session_enable_disable_reply_t_handler
+ (vl_api_session_enable_disable_reply_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+
+ if (mp->retval)
+ {
+ clib_warning ("[%d] session_enable_disable failed: %U", vcm->my_pid,
+ format_api_error, ntohl (mp->retval));
+ }
+ else
+ vcm->app_state = STATE_APP_ENABLED;
+}
+
+static void
+vppcom_app_send_attach (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_application_attach_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_ATTACH);
+ bmp->client_index = vcm->my_client_index;
+ bmp->context = htonl (0xfeedface);
+ bmp->options[APP_OPTIONS_FLAGS] =
+ APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+ bmp->options[SESSION_OPTIONS_SEGMENT_SIZE] = vcm->cfg.segment_size;
+ bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = vcm->cfg.add_segment_size;
+ bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = vcm->cfg.rx_fifo_size;
+ bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = vcm->cfg.tx_fifo_size;
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp);
+}
+
+static int
+vppcom_app_attach (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ int rv;
+
+ vppcom_app_send_attach ();
+ rv = vppcom_wait_for_app_state_change (STATE_APP_ATTACHED);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] application attach timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+ return VPPCOM_OK;
+}
+
+static void
+vppcom_app_detach (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_application_detach_t *bmp;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_DETACH);
+ bmp->client_index = vcm->my_client_index;
+ bmp->context = htonl (0xfeedface);
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp);
+}
+
+static void
+vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
+ mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ static svm_fifo_segment_create_args_t _a;
+ svm_fifo_segment_create_args_t *a = &_a;
+ int rv;
+
+ memset (a, 0, sizeof (*a));
+ if (mp->retval)
+ {
+ clib_warning ("[%d] attach failed: %U", vcm->my_pid,
+ format_api_error, ntohl (mp->retval));
+ return;
+ }
+
+ if (mp->segment_name_length == 0)
+ {
+ clib_warning ("[%d] segment_name_length zero", vcm->my_pid);
+ return;
+ }
+
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+
+ ASSERT (mp->app_event_queue_address);
+
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ vec_reset_length (a->new_segment_indices);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("[%d] svm_fifo_segment_attach ('%s') failed", vcm->my_pid,
+ mp->segment_name);
+ return;
+ }
+
+ vcm->app_event_queue =
+ uword_to_pointer (mp->app_event_queue_address,
+ unix_shared_memory_queue_t *);
+
+ vcm->app_state = STATE_APP_ATTACHED;
+}
+
+static void
+vl_api_application_detach_reply_t_handler (vl_api_application_detach_reply_t *
+ mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+
+ if (mp->retval)
+ clib_warning ("[%d] detach failed: %U", vcm->my_pid, format_api_error,
+ ntohl (mp->retval));
+
+ vcm->app_state = STATE_APP_ENABLED;
+}
+
+static void
+vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
+ mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ uword *p;
+
+ p = hash_get (vcm->session_index_by_vpp_handles, mp->handle);
+ if (p)
+ {
+ session_t *session = 0;
+ int rv;
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (p[0], &session);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, p[0]);
+ }
+ hash_unset (vcm->session_index_by_vpp_handles, mp->handle);
+ session->state = STATE_DISCONNECT;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ }
+ else
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid,
+ mp->handle);
+ }
+
+ if (mp->retval)
+ clib_warning ("[%d] disconnect_session failed: %U", vcm->my_pid,
+ format_api_error, ntohl (mp->retval));
+}
+
+static void
+vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ static svm_fifo_segment_create_args_t _a;
+ svm_fifo_segment_create_args_t *a = &_a;
+ int rv;
+
+ memset (a, 0, sizeof (*a));
+ a->segment_name = (char *) mp->segment_name;
+ a->segment_size = mp->segment_size;
+ /* Attach to the segment vpp created */
+ rv = svm_fifo_segment_attach (a);
+ vec_reset_length (a->new_segment_indices);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("[%d] svm_fifo_segment_attach ('%s') failed",
+ vcm->my_pid, mp->segment_name);
+ return;
+ }
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] mapped new segment '%s' size %d", vcm->my_pid,
+ mp->segment_name, mp->segment_size);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ vl_api_disconnect_session_reply_t *rmp;
+ uword *p;
+ int rv = 0;
+
+ p = hash_get (vcm->session_index_by_vpp_handles, mp->handle);
+ if (p)
+ {
+ int rval;
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rval = vppcom_session_at_index (p[0], &session);
+ if (PREDICT_FALSE (rval))
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, p[0]);
+ }
+ else
+ pool_put (vcm->sessions, session);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ hash_unset (vcm->session_index_by_vpp_handles, mp->handle);
+ }
+ else
+ {
+ clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid,
+ mp->handle);
+ rv = -11;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+
+ rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+ rmp->retval = htonl (rv);
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ vl_api_reset_session_reply_t *rmp;
+ uword *p;
+ int rv = 0;
+
+ p = hash_get (vcm->session_index_by_vpp_handles, mp->handle);
+ if (p)
+ {
+ int rval;
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rval = vppcom_session_at_index (p[0], &session);
+ if (PREDICT_FALSE (rval))
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, p[0]);
+ }
+ else
+ pool_put (vcm->sessions, session);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ hash_unset (vcm->session_index_by_vpp_handles, mp->handle);
+ }
+ else
+ {
+ clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid,
+ mp->handle);
+ rv = -11;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_RESET_SESSION_REPLY);
+ rmp->retval = htonl (rv);
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_connect_session_reply_t_handler (vl_api_connect_session_reply_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session;
+ u32 session_index;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ u8 is_cut_thru = 0;
+ int rv;
+
+ if (mp->retval)
+ {
+ clib_warning ("[%d] connect failed: %U", vcm->my_pid, format_api_error,
+ ntohl (mp->retval));
+ return;
+ }
+
+ session_index = mp->context;
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] session_index = %d 0x%08x", vcm->my_pid,
+ session_index, session_index);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ if (pool_is_free_index (vcm->sessions, session_index))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid %d is closed!",
+ vcm->my_pid, session_index);
+ return;
+ }
+
+ /* We've been redirected */
+ if (mp->segment_name_length > 0)
+ {
+ static svm_fifo_segment_create_args_t _a;
+ svm_fifo_segment_create_args_t *a = &_a;
+
+ is_cut_thru = 1;
+ memset (a, 0, sizeof (*a));
+ a->segment_name = (char *) mp->segment_name;
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] cut-thru segment: %s", vcm->my_pid,
+ a->segment_name);
+ rv = svm_fifo_segment_attach (a);
+ vec_reset_length (a->new_segment_indices);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("[%d] sm_fifo_segment_attach ('%s') failed",
+ vcm->my_pid, a->segment_name);
+ return;
+ }
+ }
+
+ /*
+ * Setup session
+ */
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] client sid %d", vcm->my_pid, session_index);
+
+ session = pool_elt_at_index (vcm->sessions, session_index);
+ session->is_cut_thru = is_cut_thru;
+ session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address,
+ unix_shared_memory_queue_t *);
+
+ rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
+ rx_fifo->client_session_index = session_index;
+ tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
+ tx_fifo->client_session_index = session_index;
+
+ session->server_rx_fifo = rx_fifo;
+ session->server_tx_fifo = tx_fifo;
+ session->vpp_session_handle = mp->handle;
+ session->state = STATE_CONNECT;
+
+ /* Add it to lookup table */
+ hash_set (vcm->session_index_by_vpp_handles, mp->handle, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+}
+
+static void
+vppcom_send_connect_sock (session_t * session, u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_connect_sock_t *cmp;
+
+ /* Assumes caller as acquired the spinlock: vcm->sessions_lockp */
+ session->is_server = 0;
+ cmp = vl_msg_api_alloc (sizeof (*cmp));
+ memset (cmp, 0, sizeof (*cmp));
+ cmp->_vl_msg_id = ntohs (VL_API_CONNECT_SOCK);
+ cmp->client_index = vcm->my_client_index;
+ cmp->context = session_index;
+
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] session_index = %d 0x%08x",
+ vcm->my_pid, session_index, session_index);
+
+ cmp->vrf = session->vrf;
+ cmp->is_ip4 = session->is_ip4;
+ clib_memcpy (cmp->ip, session->ip, sizeof (cmp->ip));
+ cmp->port = session->port;
+ cmp->proto = session->proto;
+ clib_memcpy (cmp->options, session->options, sizeof (cmp->options));
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & cmp);
+}
+
+static int
+vppcom_send_disconnect (u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_disconnect_session_t *dmp;
+ session_t *session = 0;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+
+ dmp = vl_msg_api_alloc (sizeof (*dmp));
+ memset (dmp, 0, sizeof (*dmp));
+ dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
+ dmp->client_index = vcm->my_client_index;
+ dmp->handle = session->vpp_session_handle;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & dmp);
+ return VPPCOM_OK;
+}
+
+static void
+vl_api_bind_sock_reply_t_handler (vl_api_bind_sock_reply_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ int rv;
+
+ if (mp->retval)
+ clib_warning ("[%d] bind failed: %U", vcm->my_pid, format_api_error,
+ ntohl (mp->retval));
+
+ ASSERT (vcm->bind_session_index != ~0);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (vcm->bind_session_index, &session);
+ if (rv == VPPCOM_OK)
+ {
+ session->vpp_session_handle = mp->handle;
+ hash_set (vcm->session_index_by_vpp_handles, mp->handle,
+ vcm->bind_session_index);
+ session->state = mp->retval ? STATE_FAILED : STATE_LISTEN;
+ vcm->bind_session_index = ~0;
+ }
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+}
+
+static void
+vl_api_unbind_sock_reply_t_handler (vl_api_unbind_sock_reply_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (vcm->bind_session_index, &session);
+ if (rv == VPPCOM_OK)
+ {
+ if ((VPPCOM_DEBUG > 1) && (mp->retval))
+ clib_warning ("[%d] unbind failed: %U", vcm->my_pid, format_api_error,
+ ntohl (mp->retval));
+
+ vcm->bind_session_index = ~0;
+ session->state = STATE_START;
+ }
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+}
+
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_accept_session_reply_t *rmp;
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ session_t *session;
+ u32 session_index;
+ int rv = 0;
+
+ if (!clib_fifo_free_elts (vcm->client_session_index_fifo))
+ {
+ clib_warning ("[%d] client session queue is full!", vcm->my_pid);
+ rv = VNET_API_ERROR_QUEUE_FULL;
+ goto send_reply;
+ }
+
+ if (VPPCOM_DEBUG > 1)
+ {
+ u8 *ip_str = format (0, "%U", format_ip46_address, &mp->ip, mp->is_ip4);
+ clib_warning ("[%d] accepted session from: %s:%d", vcm->my_pid, ip_str,
+ clib_net_to_host_u16 (mp->port));
+ vec_free (ip_str);
+ }
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ /* Allocate local session and set it up */
+ pool_get (vcm->sessions, session);
+ memset (session, 0, sizeof (*session));
+ session_index = session - vcm->sessions;
+
+ rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
+ rx_fifo->client_session_index = session_index;
+ tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
+ tx_fifo->client_session_index = session_index;
+
+ session->server_rx_fifo = rx_fifo;
+ session->server_tx_fifo = tx_fifo;
+ session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address,
+ unix_shared_memory_queue_t *);
+ session->state = STATE_ACCEPT;
+ session->is_cut_thru = 0;
+ session->is_server = 1;
+ session->port = ntohs (mp->port);
+ session->is_ip4 = mp->is_ip4;
+ clib_memcpy (session->ip, mp->ip, sizeof (session->ip));
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ /* Add it to lookup table */
+ hash_set (vcm->session_index_by_vpp_handles, mp->handle, session_index);
+
+ clib_fifo_add1 (vcm->client_session_index_fifo, session_index);
+
+ /*
+ * Send accept reply to vpp
+ */
+send_reply:
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+ rmp->retval = htonl (rv);
+ rmp->handle = mp->handle;
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp);
+}
+
+/*
+ * Acting as server for redirected connect requests
+ */
+static void
+vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
+{
+ static svm_fifo_segment_create_args_t _a;
+ svm_fifo_segment_create_args_t *a = &_a;
+ vppcom_main_t *vcm = &vppcom_main;
+ u32 session_index;
+ svm_fifo_segment_private_t *seg;
+ unix_shared_memory_queue_t *client_q;
+ vl_api_connect_session_reply_t *rmp;
+ session_t *session = 0;
+ int rv = 0;
+ svm_fifo_t *rx_fifo;
+ svm_fifo_t *tx_fifo;
+ unix_shared_memory_queue_t *event_q = 0;
+
+ if (!clib_fifo_free_elts (vcm->client_session_index_fifo))
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] client session queue is full!", vcm->my_pid);
+ rv = VNET_API_ERROR_QUEUE_FULL;
+ goto send_reply;
+ }
+
+ /* Create the segment */
+ memset (a, 0, sizeof (*a));
+ a->segment_name = (char *) format ((u8 *) a->segment_name, "%d:segment%d%c",
+ vcm->my_pid, vcm->unique_segment_index++,
+ 0);
+ a->segment_size = vcm->cfg.segment_size;
+ a->preallocated_fifo_pairs = vcm->cfg.preallocated_fifo_pairs;
+ a->rx_fifo_size = vcm->cfg.rx_fifo_size;
+ a->tx_fifo_size = vcm->cfg.tx_fifo_size;
+
+ rv = svm_fifo_segment_create (a);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] svm_fifo_segment_create ('%s') failed",
+ vcm->my_pid, a->segment_name);
+ vec_reset_length (a->new_segment_indices);
+ rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+ goto send_reply;
+ }
+
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] created segment '%s'", vcm->my_pid, a->segment_name);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ pool_get (vcm->sessions, session);
+ memset (session, 0, sizeof (*session));
+ session_index = session - vcm->sessions;
+
+ session->sm_seg_index = a->new_segment_indices[0];
+ vec_reset_length (a->new_segment_indices);
+
+ seg = svm_fifo_segment_get_segment (session->sm_seg_index);
+ rx_fifo = session->server_rx_fifo =
+ svm_fifo_segment_alloc_fifo (seg, vcm->cfg.rx_fifo_size,
+ FIFO_SEGMENT_RX_FREELIST);
+ if (PREDICT_FALSE (!session->server_rx_fifo))
+ {
+ svm_fifo_segment_delete (seg);
+ clib_warning ("[%d] rx fifo alloc failed, size %ld (0x%lx)",
+ vcm->my_pid, vcm->cfg.rx_fifo_size,
+ vcm->cfg.rx_fifo_size);
+ rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ goto send_reply;
+ }
+
+ tx_fifo = session->server_tx_fifo =
+ svm_fifo_segment_alloc_fifo (seg, vcm->cfg.tx_fifo_size,
+ FIFO_SEGMENT_TX_FREELIST);
+ if (PREDICT_FALSE (!session->server_tx_fifo))
+ {
+ svm_fifo_segment_delete (seg);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] tx fifo alloc failed, size %ld (0x%lx)",
+ vcm->my_pid, vcm->cfg.tx_fifo_size,
+ vcm->cfg.tx_fifo_size);
+ rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ goto send_reply;
+ }
+
+ session->server_rx_fifo->master_session_index = session_index;
+ session->server_tx_fifo->master_session_index = session_index;
+ session->client_queue_address = mp->client_queue_address;
+ session->is_cut_thru = 1;
+ session->is_server = 1;
+ session->is_ip4 = mp->is_ip4;
+ session->port = mp->port;
+ {
+ void *oldheap;
+ ssvm_shared_header_t *sh = seg->ssvm.sh;
+
+ ssvm_lock_non_recursive (sh, 1);
+ oldheap = ssvm_push_heap (sh);
+ event_q = session->vpp_event_queue =
+ unix_shared_memory_queue_init (vcm->cfg.event_queue_size,
+ sizeof (session_fifo_event_t),
+ vcm->my_pid, 0 /* signal not sent */ );
+ ssvm_pop_heap (oldheap);
+ ssvm_unlock_non_recursive (sh);
+ }
+ clib_memcpy (session->ip, mp->ip, sizeof (session->ip));
+
+ session->state = STATE_ACCEPT;
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] Connected cut-thru to client: sid %d",
+ vcm->my_pid, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ clib_fifo_add1 (vcm->client_session_index_fifo, session_index);
+
+send_reply:
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+
+ rmp->_vl_msg_id = ntohs (VL_API_CONNECT_SESSION_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+ rmp->segment_name_length = vec_len (a->segment_name);
+ clib_memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name));
+ vec_reset_length (a->segment_name);
+
+ if (event_q)
+ {
+ rmp->vpp_event_queue_address = pointer_to_uword (event_q);
+ rmp->server_rx_fifo = pointer_to_uword (rx_fifo);
+ rmp->server_tx_fifo = pointer_to_uword (tx_fifo);
+ }
+ client_q =
+ uword_to_pointer (mp->client_queue_address, unix_shared_memory_queue_t *);
+
+ ASSERT (client_q);
+ vl_msg_api_send_shmem (client_q, (u8 *) & rmp);
+}
+
+static void
+vppcom_send_bind_sock (session_t * session)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_bind_sock_t *bmp;
+
+ /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
+ session->is_server = 1;
+ bmp = vl_msg_api_alloc (sizeof (*bmp));
+ memset (bmp, 0, sizeof (*bmp));
+
+ bmp->_vl_msg_id = ntohs (VL_API_BIND_SOCK);
+ bmp->client_index = vcm->my_client_index;
+ bmp->context = htonl (0xfeedface);
+ bmp->vrf = session->vrf;
+ bmp->is_ip4 = session->is_ip4;
+ clib_memcpy (bmp->ip, session->ip, sizeof (bmp->ip));
+ bmp->port = session->port;
+ bmp->proto = session->proto;
+ clib_memcpy (bmp->options, session->options, sizeof (bmp->options));
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp);
+}
+
+static void
+vppcom_send_unbind_sock (u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vl_api_unbind_sock_t *ump;
+ session_t *session = 0;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return;
+ }
+
+ ump = vl_msg_api_alloc (sizeof (*ump));
+ memset (ump, 0, sizeof (*ump));
+
+ ump->_vl_msg_id = ntohs (VL_API_UNBIND_SOCK);
+ ump->client_index = vcm->my_client_index;
+ ump->handle = session->vpp_session_handle;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & ump);
+}
+
+static int
+vppcom_session_unbind_cut_thru (session_t * session)
+{
+ svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+ svm_fifo_segment_private_t *seg;
+ int rv = VPPCOM_OK;
+
+ seg = vec_elt_at_index (sm->segments, session->sm_seg_index);
+ svm_fifo_segment_free_fifo (seg, session->server_rx_fifo,
+ FIFO_SEGMENT_RX_FREELIST);
+ svm_fifo_segment_free_fifo (seg, session->server_tx_fifo,
+ FIFO_SEGMENT_TX_FREELIST);
+ svm_fifo_segment_delete (seg);
+
+ return rv;
+}
+
+static int
+vppcom_session_unbind (u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ if (PREDICT_FALSE (pool_is_free_index (vcm->sessions, session_index)))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return VPPCOM_EBADFD;
+ }
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ vcm->bind_session_index = session_index;
+ vppcom_send_unbind_sock (session_index);
+ rv = vppcom_wait_for_session_state_change (session_index, STATE_START,
+ vcm->cfg.session_timeout);
+ if (PREDICT_FALSE (rv))
+ {
+ vcm->bind_session_index = ~0;
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] server unbind timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+ return VPPCOM_OK;
+}
+
+static int
+vppcom_session_disconnect (u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ int rv;
+
+ rv = vppcom_send_disconnect (session_index);
+ if (PREDICT_FALSE (rv))
+ return rv;
+
+ rv = vppcom_wait_for_session_state_change (session_index, STATE_DISCONNECT,
+ vcm->cfg.session_timeout);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] client disconnect timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+ return VPPCOM_OK;
+}
+
+#define foreach_sock_msg \
+_(SESSION_ENABLE_DISABLE_REPLY, session_enable_disable_reply) \
+_(BIND_SOCK_REPLY, bind_sock_reply) \
+_(UNBIND_SOCK_REPLY, unbind_sock_reply) \
+_(ACCEPT_SESSION, accept_session) \
+_(CONNECT_SOCK, connect_sock) \
+_(CONNECT_SESSION_REPLY, connect_session_reply) \
+_(DISCONNECT_SESSION, disconnect_session) \
+_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \
+_(RESET_SESSION, reset_session) \
+_(APPLICATION_ATTACH_REPLY, application_attach_reply) \
+_(APPLICATION_DETACH_REPLY, application_detach_reply) \
+_(MAP_ANOTHER_SEGMENT, map_another_segment)
+
+static void
+vppcom_api_hookup (void)
+{
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_sock_msg;
+#undef _
+}
+
+static void
+vppcom_cfg_init (vppcom_cfg_t * vcl_cfg)
+{
+ ASSERT (vcl_cfg);
+
+ vcl_cfg->heapsize = (256ULL << 20);
+ vcl_cfg->segment_baseva = 0x200000000ULL;
+ vcl_cfg->segment_size = (256 << 20);
+ vcl_cfg->add_segment_size = (128 << 20);
+ vcl_cfg->preallocated_fifo_pairs = 8;
+ vcl_cfg->rx_fifo_size = (1 << 20);
+ vcl_cfg->tx_fifo_size = (1 << 20);
+ vcl_cfg->event_queue_size = 2048;
+ vcl_cfg->listen_queue_size = CLIB_CACHE_LINE_BYTES / sizeof (u32);
+ vcl_cfg->app_timeout = 10 * 60.0;
+ vcl_cfg->session_timeout = 10 * 60.0;
+ vcl_cfg->accept_timeout = 60.0;
+}
+
+static void
+vppcom_cfg_heapsize (char *conf_fname)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vppcom_cfg_t *vcl_cfg = &vcm->cfg;
+ FILE *fp;
+ char inbuf[4096];
+ int argc = 1;
+ char **argv = NULL;
+ char *arg = NULL;
+ char *p;
+ int i;
+ u8 *sizep;
+ u32 size;
+
+ fp = fopen (conf_fname, "r");
+ if (fp == NULL)
+ {
+ if (VPPCOM_DEBUG > 0)
+ fprintf (stderr, "open configuration file '%s' failed\n", conf_fname);
+ goto defaulted;
+ }
+ argv = calloc (1, sizeof (char *));
+ if (argv == NULL)
+ goto defaulted;
+
+ while (1)
+ {
+ if (fgets (inbuf, 4096, fp) == 0)
+ break;
+ p = strtok (inbuf, " \t\n");
+ while (p != NULL)
+ {
+ if (*p == '#')
+ break;
+ argc++;
+ char **tmp = realloc (argv, argc * sizeof (char *));
+ if (tmp == NULL)
+ goto defaulted;
+ argv = tmp;
+ arg = strndup (p, 1024);
+ if (arg == NULL)
+ goto defaulted;
+ argv[argc - 1] = arg;
+ p = strtok (NULL, " \t\n");
+ }
+ }
+
+ fclose (fp);
+ fp = NULL;
+
+ char **tmp = realloc (argv, (argc + 1) * sizeof (char *));
+ if (tmp == NULL)
+ goto defaulted;
+ argv = tmp;
+ argv[argc] = NULL;
+
+ /*
+ * Look for and parse the "heapsize" config parameter.
+ * Manual since none of the clib infra has been bootstrapped yet.
+ *
+ * Format: heapsize <nn>[mM][gG]
+ */
+
+ for (i = 1; i < (argc - 1); i++)
+ {
+ if (!strncmp (argv[i], "heapsize", 8))
+ {
+ sizep = (u8 *) argv[i + 1];
+ size = 0;
+ while (*sizep >= '0' && *sizep <= '9')
+ {
+ size *= 10;
+ size += *sizep++ - '0';
+ }
+ if (size == 0)
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] parse error '%s %s', "
+ "using default heapsize %lld (0x%llx)",
+ vcm->my_pid, argv[i], argv[i + 1],
+ vcl_cfg->heapsize, vcl_cfg->heapsize);
+ goto defaulted;
+ }
+
+ if (*sizep == 'g' || *sizep == 'G')
+ vcl_cfg->heapsize = size << 30;
+ else if (*sizep == 'm' || *sizep == 'M')
+ vcl_cfg->heapsize = size << 20;
+ else
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] parse error '%s %s', "
+ "using default heapsize %lld (0x%llx)",
+ vcm->my_pid, argv[i], argv[i + 1],
+ vcl_cfg->heapsize, vcl_cfg->heapsize);
+ goto defaulted;
+ }
+ }
+ }
+
+defaulted:
+ if (fp != NULL)
+ fclose (fp);
+ if (argv != NULL)
+ free (argv);
+ if (!clib_mem_init (0, vcl_cfg->heapsize))
+ clib_warning ("[%d] vppcom heap allocation failure!", vcm->my_pid);
+ else if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] allocated vppcom heapsize %lld (0x%llx)",
+ vcm->my_pid, vcl_cfg->heapsize, vcl_cfg->heapsize);
+}
+
+static void
+vppcom_cfg_read (char *conf_fname)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vppcom_cfg_t *vcl_cfg = &vcm->cfg;
+ int fd;
+ unformat_input_t _input, *input = &_input;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 vc_cfg_input = 0;
+ u8 *chroot_path;
+ struct stat s;
+ u32 uid, gid;
+
+ fd = open (conf_fname, O_RDONLY);
+ if (fd < 0)
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] open configuration file '%s' failed!",
+ vcm->my_pid, conf_fname);
+ goto file_done;
+ }
+
+ if (fstat (fd, &s) < 0)
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] failed to stat `%s'", vcm->my_pid, conf_fname);
+ goto file_done;
+ }
+
+ if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] not a regular file `%s'", vcm->my_pid,
+ conf_fname);
+ goto file_done;
+ }
+
+ unformat_init_unix_file (input, fd);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ (void) unformat_user (input, unformat_line_input, line_input);
+ unformat_skip_white_space (line_input);
+
+ if (unformat (line_input, "vppcom {"))
+ {
+ vc_cfg_input = 1;
+ continue;
+ }
+
+ if (vc_cfg_input)
+ {
+ if (unformat (line_input, "heapsize %s", &chroot_path))
+ {
+ vec_terminate_c_string (chroot_path);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured heapsize %s, "
+ "actual heapsize %lld (0x%llx)",
+ vcm->my_pid, chroot_path, vcl_cfg->heapsize,
+ vcl_cfg->heapsize);
+ vec_free (chroot_path);
+ }
+ else if (unformat (line_input, "api-prefix %s", &chroot_path))
+ {
+ vec_terminate_c_string (chroot_path);
+ vl_set_memory_root_path ((char *) chroot_path);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured api-prefix %s",
+ vcm->my_pid, chroot_path);
+ chroot_path = 0; /* Don't vec_free() it! */
+ }
+ else if (unformat (line_input, "uid %d", &uid))
+ {
+ vl_set_memory_uid (uid);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured uid %d", vcm->my_pid, uid);
+ }
+ else if (unformat (line_input, "gid %d", &gid))
+ {
+ vl_set_memory_gid (gid);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured gid %d", vcm->my_pid, gid);
+ }
+ else if (unformat (line_input, "segment-baseva 0x%llx",
+ &vcl_cfg->segment_baseva))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured segment_baseva 0x%llx",
+ vcm->my_pid, vcl_cfg->segment_baseva);
+ }
+ else if (unformat (line_input, "segment-size 0x%lx",
+ &vcl_cfg->segment_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured segment_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->segment_size,
+ vcl_cfg->segment_size);
+ }
+ else if (unformat (line_input, "segment-size %ld",
+ &vcl_cfg->segment_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured segment_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->segment_size,
+ vcl_cfg->segment_size);
+ }
+ else if (unformat (line_input, "add-segment-size 0x%lx",
+ &vcl_cfg->add_segment_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning
+ ("[%d] configured add_segment_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->add_segment_size,
+ vcl_cfg->add_segment_size);
+ }
+ else if (unformat (line_input, "add-segment-size %ld",
+ &vcl_cfg->add_segment_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning
+ ("[%d] configured add_segment_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->add_segment_size,
+ vcl_cfg->add_segment_size);
+ }
+ else if (unformat (line_input, "preallocated-fifo-pairs %d",
+ &vcl_cfg->preallocated_fifo_pairs))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured preallocated_fifo_pairs "
+ "%d (0x%x)", vcm->my_pid,
+ vcl_cfg->preallocated_fifo_pairs,
+ vcl_cfg->preallocated_fifo_pairs);
+ }
+ else if (unformat (line_input, "rx-fifo-size 0x%lx",
+ &vcl_cfg->rx_fifo_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured rx_fifo_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->rx_fifo_size,
+ vcl_cfg->rx_fifo_size);
+ }
+ else if (unformat (line_input, "rx-fifo-size %ld",
+ &vcl_cfg->rx_fifo_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured rx_fifo_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->rx_fifo_size,
+ vcl_cfg->rx_fifo_size);
+ }
+ else if (unformat (line_input, "tx-fifo-size 0x%lx",
+ &vcl_cfg->tx_fifo_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured tx_fifo_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->tx_fifo_size,
+ vcl_cfg->tx_fifo_size);
+ }
+ else if (unformat (line_input, "tx-fifo-size %ld",
+ &vcl_cfg->tx_fifo_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured tx_fifo_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->tx_fifo_size,
+ vcl_cfg->tx_fifo_size);
+ }
+ else if (unformat (line_input, "event-queue-size 0x%lx",
+ &vcl_cfg->event_queue_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured event_queue_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->event_queue_size,
+ vcl_cfg->event_queue_size);
+ }
+ else if (unformat (line_input, "event-queue-size %ld",
+ &vcl_cfg->event_queue_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured event_queue_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->event_queue_size,
+ vcl_cfg->event_queue_size);
+ }
+ else if (unformat (line_input, "listen-queue-size 0x%lx",
+ &vcl_cfg->listen_queue_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured listen_queue_size 0x%lx (%ld)",
+ vcm->my_pid, vcl_cfg->listen_queue_size,
+ vcl_cfg->listen_queue_size);
+ }
+ else if (unformat (line_input, "listen-queue-size %ld",
+ &vcl_cfg->listen_queue_size))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured listen_queue_size %ld (0x%lx)",
+ vcm->my_pid, vcl_cfg->listen_queue_size,
+ vcl_cfg->listen_queue_size);
+ }
+ else if (unformat (line_input, "app-timeout %f",
+ &vcl_cfg->app_timeout))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured app_timeout %f",
+ vcm->my_pid, vcl_cfg->app_timeout);
+ }
+ else if (unformat (line_input, "session-timeout %f",
+ &vcl_cfg->session_timeout))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured session_timeout %f",
+ vcm->my_pid, vcl_cfg->session_timeout);
+ }
+ else if (unformat (line_input, "accept-timeout %f",
+ &vcl_cfg->accept_timeout))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] configured accept_timeout %f",
+ vcm->my_pid, vcl_cfg->accept_timeout);
+ }
+ else if (unformat (line_input, "}"))
+ {
+ vc_cfg_input = 0;
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] completed parsing vppcom config!",
+ vcm->my_pid);
+ goto input_done;
+ }
+ else
+ {
+ if (line_input->buffer[line_input->index] != '#')
+ {
+ clib_warning ("[%d] Unknown vppcom config option: '%s'",
+ vcm->my_pid, (char *)
+ &line_input->buffer[line_input->index]);
+ }
+ }
+ }
+ }
+
+input_done:
+ unformat_free (input);
+
+file_done:
+ if (fd >= 0)
+ close (fd);
+}
+
+/*
+ * VPPCOM Public API functions
+ */
+int
+vppcom_app_create (char *app_name)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ vppcom_cfg_t *vcl_cfg = &vcm->cfg;
+ u8 *heap;
+ mheap_t *h;
+ int rv;
+
+ if (!vcm->init)
+ {
+ char *conf_fname;
+
+ vcm->init = 1;
+ vcm->my_pid = getpid ();
+ clib_fifo_validate (vcm->client_session_index_fifo,
+ vcm->cfg.listen_queue_size);
+ vppcom_cfg_init (vcl_cfg);
+ conf_fname = getenv (VPPCOM_CONF_ENV);
+ if (!conf_fname)
+ {
+ conf_fname = VPPCOM_CONF_DEFAULT;
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] getenv '%s' failed!", vcm->my_pid,
+ VPPCOM_CONF_ENV);
+ }
+ vppcom_cfg_heapsize (conf_fname);
+ vppcom_cfg_read (conf_fname);
+ vcm->bind_session_index = ~0;
+ vcm->main_cpu = os_get_thread_index ();
+ heap = clib_mem_get_per_cpu_heap ();
+ h = mheap_header (heap);
+
+ /* make the main heap thread-safe */
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+ vcm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
+
+ clib_time_init (&vcm->clib_time);
+ vppcom_init_error_string_table ();
+ svm_fifo_segment_init (vcl_cfg->segment_baseva,
+ 20 /* timeout in secs */ );
+ clib_spinlock_init (&vcm->sessions_lockp);
+ vppcom_api_hookup ();
+ }
+
+ if (vcm->my_client_index == ~0)
+ {
+ vcm->app_state = STATE_APP_START;
+ rv = vppcom_connect_to_vpp (app_name);
+ if (rv)
+ {
+ clib_warning ("[%s] couldn't connect to VPP.", vcm->my_pid);
+ return rv;
+ }
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sending session enable", vcm->my_pid);
+
+ rv = vppcom_app_session_enable ();
+ if (rv)
+ {
+ clib_warning ("[%d] vppcom_app_session_enable() failed!",
+ vcm->my_pid);
+ return rv;
+ }
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sending app attach", vcm->my_pid);
+
+ rv = vppcom_app_attach ();
+ if (rv)
+ {
+ clib_warning ("[%d] vppcom_app_attach() failed!", vcm->my_pid);
+ return rv;
+ }
+ }
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] app_name '%s', my_client_index %d (0x%x)",
+ vcm->my_pid, app_name, vcm->my_client_index,
+ vcm->my_client_index);
+
+ return VPPCOM_OK;
+}
+
+void
+vppcom_app_destroy (void)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ int rv;
+
+ if (vcm->my_client_index == ~0)
+ return;
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] detaching from VPP, my_client_index %d (0x%x)",
+ vcm->my_pid, vcm->my_client_index, vcm->my_client_index);
+
+ vppcom_app_detach ();
+ rv = vppcom_wait_for_app_state_change (STATE_APP_ENABLED);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] application detach timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ }
+ vl_client_disconnect_from_vlib ();
+ vcm->my_client_index = ~0;
+ vcm->app_state = STATE_APP_START;
+}
+
+int
+vppcom_session_create (u32 vrf, u8 proto, u8 is_nonblocking)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session;
+ u32 session_index;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ pool_get (vcm->sessions, session);
+ session_index = session - vcm->sessions;
+
+ session->vrf = vrf;
+ session->proto = proto;
+ session->state = STATE_START;
+ session->is_nonblocking = is_nonblocking;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
+
+ return (int) session_index;
+}
+
+int
+vppcom_session_close (uint32_t session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
+
+ if (session->is_cut_thru)
+ {
+ if (session->is_server)
+ {
+ rv = vppcom_session_unbind_cut_thru (session);
+ if ((VPPCOM_DEBUG > 0) && (rv < 0))
+ clib_warning ("[%d] unbind cut-thru (session %d) failed, "
+ "rv = %s (%d)",
+ vcm->my_pid, session_index,
+ vppcom_retval_str (rv), rv);
+ }
+ }
+ else if (session->is_server)
+ {
+ rv = vppcom_session_unbind (session_index);
+ if ((VPPCOM_DEBUG > 0) && (rv < 0))
+ clib_warning ("[%d] unbind (session %d) failed, rv = %s (%d)",
+ vcm->my_pid, session_index, vppcom_retval_str (rv), rv);
+ }
+ else
+ {
+ rv = vppcom_session_disconnect (session_index);
+ if ((VPPCOM_DEBUG > 0) && (rv < 0))
+ clib_warning ("[%d] disconnect (session %d) failed, rv = %s (%d)",
+ vcm->my_pid, session_index, vppcom_retval_str (rv), rv);
+ }
+ if (rv < 0)
+ return rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ pool_put_index (vcm->sessions, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ return rv;
+}
+
+int
+vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ int rv;
+ ip46_address_t *ip46;
+
+ if (!ep || !ep->ip)
+ return VPPCOM_EINVAL;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
+
+ session->vrf = ep->vrf;
+ session->is_ip4 = ep->is_ip4;
+ memset (session->ip, 0, sizeof (session->ip));
+ ip46 = (ip46_address_t *) session->ip;
+ *ip46 = to_ip46 (!ep->is_ip4, ep->ip);
+ session->port = ep->port;
+
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ return VPPCOM_OK;
+}
+
+int
+vppcom_session_listen (uint32_t listen_session_index, uint32_t q_len)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *listen_session = 0;
+ int rv;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (listen_session_index, &listen_session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, listen_session_index);
+ return rv;
+ }
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid %d", vcm->my_pid, listen_session_index);
+
+ ASSERT (vcm->bind_session_index == ~0);
+ vcm->bind_session_index = listen_session_index;
+ vppcom_send_bind_sock (listen_session);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ rv =
+ vppcom_wait_for_session_state_change (listen_session_index, STATE_LISTEN,
+ vcm->cfg.session_timeout);
+ if (PREDICT_FALSE (rv))
+ {
+ vcm->bind_session_index = ~0;
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] server listen timed out, rv = %d (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (listen_session_index, &listen_session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, listen_session_index);
+ return rv;
+ }
+ listen_session->is_listen = 1;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ clib_fifo_validate (vcm->client_session_index_fifo, q_len);
+
+ return VPPCOM_OK;
+}
+
+int
+vppcom_session_accept (uint32_t listen_session_index, vppcom_endpt_t * ep,
+ double wait_for_time)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *listen_session = 0;
+ session_t *client_session = 0;
+ u32 client_session_index;
+ int rv;
+ f64 wait_for;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (listen_session_index, &listen_session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, listen_session_index);
+ return rv;
+ }
+
+ if (listen_session->state != STATE_LISTEN)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] session not in listen state, state = %s",
+ vcm->my_pid,
+ vppcom_session_state_str (listen_session->state));
+ return VPPCOM_EBADFD;
+ }
+ wait_for = listen_session->is_nonblocking ? 0 :
+ (wait_for_time < 0) ? vcm->cfg.accept_timeout : wait_for_time;
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid %d: %s (%d)", vcm->my_pid,
+ listen_session_index,
+ vppcom_session_state_str (listen_session->state),
+ listen_session->state);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ while (1)
+ {
+ rv = vppcom_wait_for_client_session_index (wait_for);
+ if (rv)
+ {
+ if ((VPPCOM_DEBUG > 0))
+ clib_warning ("[%d] sid %d, accept timed out, rv = %s (%d)",
+ vcm->my_pid, listen_session_index,
+ vppcom_retval_str (rv), rv);
+ if ((wait_for == 0) || (wait_for_time > 0))
+ return rv;
+ }
+ else
+ break;
+ }
+
+ clib_fifo_sub1 (vcm->client_session_index_fifo, client_session_index);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (client_session_index, &client_session);
+ ASSERT (rv == VPPCOM_OK);
+ ASSERT (client_session->is_ip4 == listen_session->is_ip4);
+
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] Got a request: client sid %d", vcm->my_pid,
+ client_session_index);
+
+ ep->vrf = client_session->vrf;
+ ep->is_cut_thru = client_session->is_cut_thru;
+ ep->is_ip4 = client_session->is_ip4;
+ ep->port = client_session->port;
+ if (client_session->is_ip4)
+ clib_memcpy (ep->ip, client_session->ip, sizeof (ip4_address_t));
+ else
+ clib_memcpy (ep->ip, client_session->ip, sizeof (ip6_address_t));
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ return (int) client_session_index;
+}
+
+int
+vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ int rv;
+ ip46_address_t *ip46;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+
+ if (session->state == STATE_CONNECT)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] session, sid (%d) already connected!",
+ vcm->my_pid, session_index);
+ return VPPCOM_OK;
+ }
+
+ session->vrf = server_ep->vrf;
+ session->is_ip4 = server_ep->is_ip4;
+ memset (session->ip, 0, sizeof (session->ip));
+ ip46 = (ip46_address_t *) session->ip;
+ *ip46 = to_ip46 (!server_ep->is_ip4, server_ep->ip);
+ session->port = server_ep->port;
+
+ if (VPPCOM_DEBUG > 0)
+ {
+ u8 *ip_str = format (0, "%U", format_ip46_address,
+ &session->ip, session->is_ip4);
+ clib_warning ("[%d] connect sid %d to %s server port %d",
+ vcm->my_pid, session_index, ip_str,
+ clib_net_to_host_u16 (session->port));
+ vec_free (ip_str);
+ }
+
+ vppcom_send_connect_sock (session, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ rv = vppcom_wait_for_session_state_change (session_index, STATE_CONNECT,
+ vcm->cfg.session_timeout);
+ if (PREDICT_FALSE (rv))
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] connect timed out, rv = %s (%d)",
+ vcm->my_pid, vppcom_retval_str (rv), rv);
+ return rv;
+ }
+ return VPPCOM_OK;
+}
+
+int
+vppcom_session_read (uint32_t session_index, void *buf, int n)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ svm_fifo_t *rx_fifo;
+ int n_read = 0;
+ int rv;
+ int max_dequeue;
+ char *fifo_str;
+
+ ASSERT (buf);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+
+ if (session->state == STATE_DISCONNECT)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid (%d) has been closed by remote peer!",
+ vcm->my_pid, session_index);
+ return VPPCOM_ECONNRESET;
+ }
+
+ rx_fifo = ((!session->is_cut_thru || session->is_server) ?
+ session->server_rx_fifo : session->server_tx_fifo);
+ fifo_str = ((!session->is_cut_thru || session->is_server) ?
+ "server_rx_fifo" : "server_tx_fifo");
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ max_dequeue = (int) svm_fifo_max_dequeue (rx_fifo);
+ n_read = svm_fifo_dequeue_nowait (rx_fifo, clib_min (n, max_dequeue), buf);
+
+ if (VPPCOM_DEBUG > 2)
+ clib_warning ("[%d] sid %d, read %d bytes from %s (%p)", vcm->my_pid,
+ session_index, n_read, fifo_str, rx_fifo);
+
+ return (n_read <= 0) ? VPPCOM_EAGAIN : n_read;
+}
+
+static inline int
+vppcom_session_read_ready (session_t * session, u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ svm_fifo_t *rx_fifo;
+ int ready = 0;
+
+ /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
+ if (session->state == STATE_DISCONNECT)
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid (%d) has been closed by remote peer!",
+ vcm->my_pid, session_index);
+ return VPPCOM_ECONNRESET;
+ }
+
+ if (session->is_listen)
+ ready = clib_fifo_elts (vcm->client_session_index_fifo);
+ else
+ {
+ rx_fifo = ((!session->is_cut_thru || session->is_server) ?
+ session->server_rx_fifo : session->server_tx_fifo);
+
+ ready = svm_fifo_max_dequeue (rx_fifo);
+ }
+
+ if (VPPCOM_DEBUG > 3)
+ clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid,
+ session_index,
+ session->is_server ? "server_rx_fifo" : "server_tx_fifo",
+ rx_fifo, ready);
+ return ready;
+}
+
+int
+vppcom_session_write (uint32_t session_index, void *buf, int n)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ session_t *session = 0;
+ svm_fifo_t *tx_fifo;
+ unix_shared_memory_queue_t *q;
+ session_fifo_event_t evt;
+ int rv;
+ char *fifo_str;
+ u8 is_nonblocking;
+
+ ASSERT (buf);
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rv))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rv;
+ }
+
+ if (session->state == STATE_DISCONNECT)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid (%d) has been closed by remote peer!",
+ vcm->my_pid, session_index);
+ return VPPCOM_ECONNRESET;
+ }
+
+ tx_fifo = ((!session->is_cut_thru || session->is_server) ?
+ session->server_tx_fifo : session->server_rx_fifo);
+ fifo_str = ((!session->is_cut_thru || session->is_server) ?
+ "server_tx_fifo" : "server_rx_fifo");
+
+ is_nonblocking = session->is_nonblocking;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+
+ do
+ {
+ rv = svm_fifo_enqueue_nowait (tx_fifo, n, buf);
+ }
+ while (!is_nonblocking && (rv <= 0));
+
+ /* If event wasn't set, add one */
+ if (!session->is_cut_thru && (rv > 0) && svm_fifo_set_event (tx_fifo))
+ {
+ int rval;
+
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = vcm->tx_event_id++;
+
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rval = vppcom_session_at_index (session_index, &session);
+ if (PREDICT_FALSE (rval))
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+ vcm->my_pid, session_index);
+ return rval;
+ }
+ q = session->vpp_event_queue;
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ ASSERT (q);
+ unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+
+ if (VPPCOM_DEBUG > 2)
+ clib_warning ("[%d] sid %d, wrote %d bytes to %s (%p)", vcm->my_pid,
+ session_index, rv, fifo_str, tx_fifo);
+
+ return rv;
+}
+
+static inline int
+vppcom_session_write_ready (session_t * session, u32 session_index)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ svm_fifo_t *tx_fifo;
+ char *fifo_str;
+ int rv;
+
+ /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
+ if (session->state == STATE_DISCONNECT)
+ {
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] sid (%d) has been closed by remote peer!",
+ vcm->my_pid, session_index);
+ return VPPCOM_ECONNRESET;
+ }
+
+ tx_fifo = ((!session->is_cut_thru || session->is_server) ?
+ session->server_tx_fifo : session->server_rx_fifo);
+ fifo_str = ((!session->is_cut_thru || session->is_server) ?
+ "server_tx_fifo" : "server_rx_fifo");
+
+ rv = svm_fifo_max_enqueue (tx_fifo);
+
+ if (VPPCOM_DEBUG > 3)
+ clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid,
+ session_index, fifo_str, tx_fifo, rv);
+ return rv;
+}
+
+int
+vppcom_select (unsigned long n_bits, unsigned long *read_map,
+ unsigned long *write_map, unsigned long *except_map,
+ double time_to_wait)
+{
+ vppcom_main_t *vcm = &vppcom_main;
+ u32 session_index;
+ session_t *session = 0;
+ int rv, bits_set = 0;
+ f64 timeout = clib_time_now (&vcm->clib_time) + time_to_wait;
+ u32 minbits = clib_max (n_bits, BITS (uword));
+
+ ASSERT (sizeof (clib_bitmap_t) == sizeof (long int));
+
+ if (read_map)
+ {
+ clib_bitmap_validate (vcm->rd_bitmap, minbits);
+ clib_memcpy (vcm->rd_bitmap, read_map, vec_len (vcm->rd_bitmap));
+ memset (read_map, 0, vec_len (vcm->rd_bitmap));
+ }
+ if (write_map)
+ {
+ clib_bitmap_validate (vcm->wr_bitmap, minbits);
+ clib_memcpy (vcm->wr_bitmap, write_map, vec_len (vcm->wr_bitmap));
+ memset (write_map, 0, vec_len (vcm->wr_bitmap));
+ }
+ if (except_map)
+ {
+ clib_bitmap_validate (vcm->ex_bitmap, minbits);
+ clib_memcpy (vcm->ex_bitmap, except_map, vec_len (vcm->ex_bitmap));
+ memset (except_map, 0, vec_len (vcm->ex_bitmap));
+ }
+
+ do
+ {
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (session_index, vcm->rd_bitmap,
+ ({
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (rv < 0)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] session %d specified in "
+ "read_map is closed.", vcm->my_pid,
+ session_index);
+ bits_set = VPPCOM_EBADFD;
+ goto select_done;
+ }
+
+ rv = vppcom_session_read_ready (session, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (vcm->ex_bitmap &&
+ clib_bitmap_get (vcm->ex_bitmap, session_index) && (rv < 0))
+ {
+ // TBD: clib_warning
+ /* coverity[FORWARD_NULL] */
+ clib_bitmap_set_no_check (except_map, session_index, 1);
+ bits_set++;
+ }
+ else if (rv > 0)
+ {
+ // TBD: clib_warning
+ /* coverity[FORWARD_NULL] */
+ clib_bitmap_set_no_check (read_map, session_index, 1);
+ bits_set++;
+ }
+ }));
+
+ clib_bitmap_foreach (session_index, vcm->wr_bitmap,
+ ({
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (rv < 0)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 0)
+ clib_warning ("[%d] session %d specified in "
+ "write_map is closed.", vcm->my_pid,
+ session_index);
+ bits_set = VPPCOM_EBADFD;
+ goto select_done;
+ }
+
+ rv = vppcom_session_write_ready (session, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (rv > 0 )
+ {
+ // TBD: clib_warning
+ /* coverity[FORWARD_NULL] */
+ clib_bitmap_set_no_check (write_map, session_index, 1);
+ bits_set++;
+ }
+ }));
+
+ clib_bitmap_foreach (session_index, vcm->ex_bitmap,
+ ({
+ clib_spinlock_lock (&vcm->sessions_lockp);
+ rv = vppcom_session_at_index (session_index, &session);
+ if (rv < 0)
+ {
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (VPPCOM_DEBUG > 1)
+ clib_warning ("[%d] session %d specified in "
+ "except_map is closed.", vcm->my_pid,
+ session_index);
+ bits_set = VPPCOM_EBADFD;
+ goto select_done;
+ }
+
+ rv = vppcom_session_read_ready (session, session_index);
+ clib_spinlock_unlock (&vcm->sessions_lockp);
+ if (rv < 0)
+ {
+ // TBD: clib_warning
+ /* coverity[FORWARD_NULL] */
+ clib_bitmap_set_no_check (except_map, session_index, 1);
+ bits_set++;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ while (clib_time_now (&vcm->clib_time) < timeout);
+
+select_done:
+ return (bits_set);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vppcom.h b/src/uri/vppcom.h
new file mode 100644
index 00000000..4b048e03
--- /dev/null
+++ b/src/uri/vppcom.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vppcom_h
+#define included_vppcom_h
+
+#include <netdb.h>
+#include <errno.h>
+
+/*
+ * VPPCOM Public API Definitions, Enums, and Data Structures
+ */
+#define INVALID_SESSION_ID (~0)
+#define VPPCOM_VRF_DEFAULT 0
+#define VPPCOM_CONF_ENV "VPPCOM_CONF"
+#define VPPCOM_CONF_DEFAULT "/etc/vpp/vppcom.conf"
+
+typedef enum
+{
+ VPPCOM_PROTO_TCP = 0,
+ VPPCOM_PROTO_UDP,
+} vppcom_proto_t;
+
+typedef enum
+{
+ VPPCOM_IS_IP6 = 0,
+ VPPCOM_IS_IP4,
+} vppcom_is_ip4_t;
+
+typedef struct vppcom_endpt_t_
+{
+ uint32_t vrf;
+ uint8_t is_cut_thru;
+ uint8_t is_ip4;
+ uint8_t *ip;
+ uint16_t port;
+} vppcom_endpt_t;
+
+typedef enum
+{
+ VPPCOM_OK = 0,
+ VPPCOM_EAGAIN = -EAGAIN,
+ VPPCOM_EINVAL = -EINVAL,
+ VPPCOM_EBADFD = -EBADFD,
+ VPPCOM_EAFNOSUPPORT = -EAFNOSUPPORT,
+ VPPCOM_ECONNRESET = -ECONNRESET,
+ VPPCOM_ECONNREFUSED = -ECONNREFUSED,
+ VPPCOM_ETIMEDOUT = -ETIMEDOUT,
+} vppcom_error_t;
+
+/*
+ * VPPCOM Public API Functions
+ */
+static inline const char *
+vppcom_retval_str (int retval)
+{
+ char *st;
+
+ switch (retval)
+ {
+ case VPPCOM_OK:
+ st = "VPPCOM_OK";
+ break;
+
+ case VPPCOM_EAGAIN:
+ st = "VPPCOM_EAGAIN";
+ break;
+
+ case VPPCOM_EINVAL:
+ st = "VPPCOM_EINVAL";
+ break;
+
+ case VPPCOM_EBADFD:
+ st = "VPPCOM_EBADFD";
+ break;
+
+ case VPPCOM_EAFNOSUPPORT:
+ st = "VPPCOM_EAFNOSUPPORT";
+ break;
+
+ case VPPCOM_ECONNRESET:
+ st = "VPPCOM_ECONNRESET";
+ break;
+
+ case VPPCOM_ECONNREFUSED:
+ st = "VPPCOM_ECONNREFUSED";
+ break;
+
+ case VPPCOM_ETIMEDOUT:
+ st = "VPPCOM_ETIMEDOUT";
+ break;
+
+ default:
+ st = "UNKNOWN_STATE";
+ break;
+ }
+
+ return st;
+}
+
+static inline int
+is_vcom_fd (int fd)
+{
+#define VPPCOM_FD_OFFSET (1 << 30)
+ return (fd >= VPPCOM_FD_OFFSET);
+}
+
+/* TBD: make these constructor/destructor function */
+extern int vppcom_app_create (char *app_name);
+extern void vppcom_app_destroy (void);
+
+extern int vppcom_session_create (uint32_t vrf, uint8_t proto,
+ uint8_t is_nonblocking);
+extern int vppcom_session_close (uint32_t session_index);
+
+extern int vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep);
+extern int vppcom_session_listen (uint32_t session_index, uint32_t q_len);
+extern int vppcom_session_accept (uint32_t session_index,
+ vppcom_endpt_t * client_ep,
+ double wait_for_time);
+
+extern int vppcom_session_connect (uint32_t session_index,
+ vppcom_endpt_t * server_ep);
+extern int vppcom_session_read (uint32_t session_index, void *buf, int n);
+extern int vppcom_session_write (uint32_t session_index, void *buf, int n);
+
+extern int vppcom_select (unsigned long n_bits,
+ unsigned long *read_map,
+ unsigned long *write_map,
+ unsigned long *except_map, double wait_for_time);
+
+#endif /* included_vppcom_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vppcom_test.conf b/src/uri/vppcom_test.conf
new file mode 100644
index 00000000..e5ac4636
--- /dev/null
+++ b/src/uri/vppcom_test.conf
@@ -0,0 +1,25 @@
+# Test VPPCOM config file
+vppcom {
+ heapsize 1
+ api-prefix daw # this is a comment
+ uid 1020 this is also a comment.
+ gid 1020
+# This is yet another comment!
+ segment-baseva 0x300000000
+ segment-size 0x10000000
+ segment-size 268435456
+ add-segment-size 0x1000000
+ add-segment-size 134217728
+ preallocated-fifo-pairs 16
+ rx-fifo-size 3145728
+ rx-fifo-size 0x300000
+ tx-fifo-size 3145728
+ tx-fifo-size 0x300000
+ event-queue-size 1024
+ event-queue-size 0x400
+ listen-queue-size 32
+ listen-queue-size 0x20
+ app-timeout 54.3
+ session-timeout 66.6
+ accept-timeout 0.1
+}
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
new file mode 100644
index 00000000..02300216
--- /dev/null
+++ b/src/vat/api_format.c
@@ -0,0 +1,21027 @@
+/*
+ *------------------------------------------------------------------
+ * api_format.c
+ *
+ * Copyright (c) 2014-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2tp/l2tp.h>
+#include <vnet/vxlan/vxlan.h>
+#include <vnet/gre/gre.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+#include <vnet/l2/l2_classify.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/classify/input_acl.h>
+#include <vnet/classify/policer_classify.h>
+#include <vnet/classify/flow_classify.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#include <inttypes.h>
+#include <vnet/map/map.h>
+#include <vnet/cop/cop.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/policer/xlate.h>
+#include <vnet/span/span.h>
+#include <vnet/policer/policer.h>
+#include <vnet/policer/police.h>
+#include <vnet/mfib/mfib_types.h>
+
+#include "vat/json_format.h"
+
+#include <inttypes.h>
+#include <sys/stat.h>
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+#define __plugin_msg_base 0
+#include <vlibapi/vat_helper_macros.h>
+
+f64
+vat_time_now (vat_main_t * vam)
+{
+#if VPP_API_TEST_BUILTIN
+ return vlib_time_now (vam->vlib_main);
+#else
+ return clib_time_now (&vam->clib_time);
+#endif
+}
+
+void
+errmsg (char *fmt, ...)
+{
+ vat_main_t *vam = &vat_main;
+ va_list va;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ vec_add1 (s, 0);
+
+#if VPP_API_TEST_BUILTIN
+ vlib_cli_output (vam->vlib_main, (char *) s);
+#else
+ {
+ if (vam->ifp != stdin)
+ fformat (vam->ofp, "%s(%d): \n", vam->current_file,
+ vam->input_line_number);
+ fformat (vam->ofp, (char *) s);
+ fflush (vam->ofp);
+ }
+#endif
+
+ vec_free (s);
+}
+
+#if VPP_API_TEST_BUILTIN == 0
+static uword
+api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u8 *if_name;
+ uword *p;
+
+ if (!unformat (input, "%s", &if_name))
+ return 0;
+
+ p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
+ if (p == 0)
+ return 0;
+ *result = p[0];
+ return 1;
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+uword
+unformat_ethernet_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[6];
+
+ if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < 6; i++)
+ if (a[i] >= (1 << 8))
+ return 0;
+
+ for (i = 0; i < 6; i++)
+ result[i] = a[i];
+
+ return 1;
+}
+
+/* Returns ethernet type as an int in host byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ int type;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &type) || unformat (input, "%d", &type))
+ {
+ if (type >= (1 << 16))
+ return 0;
+ *result = type;
+ return 1;
+ }
+ return 0;
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ hex_quads[double_colon_index + i] = 0;
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+uword
+unformat_ipsec_policy_action (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_POLICY_ACTION_##f;
+ foreach_ipsec_policy_action
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+uword
+unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_CRYPTO_ALG_##f;
+ foreach_ipsec_crypto_alg
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ipsec_crypto_alg (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case IPSEC_CRYPTO_ALG_##f: t = (u8 *) str; break;
+ foreach_ipsec_crypto_alg
+#undef _
+ default:
+ return format (s, "unknown");
+ }
+ return format (s, "%s", t);
+}
+
+uword
+unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_INTEG_ALG_##f;
+ foreach_ipsec_integ_alg
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ipsec_integ_alg (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case IPSEC_INTEG_ALG_##f: t = (u8 *) str; break;
+ foreach_ipsec_integ_alg
+#undef _
+ default:
+ return format (s, "unknown");
+ }
+ return format (s, "%s", t);
+}
+
+uword
+unformat_ikev2_auth_method (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IKEV2_AUTH_METHOD_##f;
+ foreach_ikev2_auth_method
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+uword
+unformat_ikev2_id_type (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IKEV2_ID_TYPE_##f;
+ foreach_ikev2_id_type
+#undef _
+ else
+ return 0;
+ return 1;
+}
+#else /* VPP_API_TEST_BUILTIN == 1 */
+static uword
+api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ vat_main_t *vam __attribute__ ((unused)) = va_arg (*args, vat_main_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 *result = va_arg (*args, u32 *);
+ u32 sw_if_index;
+
+ if (!unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ return 0;
+
+ *result = sw_if_index;
+ return 1;
+}
+#endif /* VPP_API_TEST_BUILTIN */
+
+static uword
+unformat_policer_rate_type (unformat_input_t * input, va_list * args)
+{
+ u8 *r = va_arg (*args, u8 *);
+
+ if (unformat (input, "kbps"))
+ *r = SSE2_QOS_RATE_KBPS;
+ else if (unformat (input, "pps"))
+ *r = SSE2_QOS_RATE_PPS;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_round_type (unformat_input_t * input, va_list * args)
+{
+ u8 *r = va_arg (*args, u8 *);
+
+ if (unformat (input, "closest"))
+ *r = SSE2_QOS_ROUND_TO_CLOSEST;
+ else if (unformat (input, "up"))
+ *r = SSE2_QOS_ROUND_TO_UP;
+ else if (unformat (input, "down"))
+ *r = SSE2_QOS_ROUND_TO_DOWN;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_type (unformat_input_t * input, va_list * args)
+{
+ u8 *r = va_arg (*args, u8 *);
+
+ if (unformat (input, "1r2c"))
+ *r = SSE2_QOS_POLICER_TYPE_1R2C;
+ else if (unformat (input, "1r3c"))
+ *r = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697;
+ else if (unformat (input, "2r3c-2698"))
+ *r = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698;
+ else if (unformat (input, "2r3c-4115"))
+ *r = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115;
+ else if (unformat (input, "2r3c-mef5cf1"))
+ *r = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_dscp (unformat_input_t * input, va_list * va)
+{
+ u8 *r = va_arg (*va, u8 *);
+
+ if (0);
+#define _(v,f,str) else if (unformat (input, str)) *r = VNET_DSCP_##f;
+ foreach_vnet_dscp
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_action_type (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_action_params_st *a
+ = va_arg (*va, sse2_qos_pol_action_params_st *);
+
+ if (unformat (input, "drop"))
+ a->action_type = SSE2_QOS_ACTION_DROP;
+ else if (unformat (input, "transmit"))
+ a->action_type = SSE2_QOS_ACTION_TRANSMIT;
+ else if (unformat (input, "mark-and-transmit %U", unformat_dscp, &a->dscp))
+ a->action_type = SSE2_QOS_ACTION_MARK_AND_TRANSMIT;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_classify_table_type (unformat_input_t * input, va_list * va)
+{
+ u32 *r = va_arg (*va, u32 *);
+ u32 tid;
+
+ if (unformat (input, "ip4"))
+ tid = POLICER_CLASSIFY_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = POLICER_CLASSIFY_TABLE_IP6;
+ else if (unformat (input, "l2"))
+ tid = POLICER_CLASSIFY_TABLE_L2;
+ else
+ return 0;
+
+ *r = tid;
+ return 1;
+}
+
+static uword
+unformat_flow_classify_table_type (unformat_input_t * input, va_list * va)
+{
+ u32 *r = va_arg (*va, u32 *);
+ u32 tid;
+
+ if (unformat (input, "ip4"))
+ tid = FLOW_CLASSIFY_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = FLOW_CLASSIFY_TABLE_IP6;
+ else
+ return 0;
+
+ *r = tid;
+ return 1;
+}
+
+static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT;
+static const char *mfib_flag_long_names[] = MFIB_ENTRY_NAMES_LONG;
+static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG;
+static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT;
+
+#if (VPP_API_TEST_BUILTIN==0)
+uword
+unformat_mfib_itf_flags (unformat_input_t * input, va_list * args)
+{
+ mfib_itf_flags_t old, *iflags = va_arg (*args, mfib_itf_flags_t *);
+ mfib_itf_attribute_t attr;
+
+ old = *iflags;
+ FOR_EACH_MFIB_ITF_ATTRIBUTE (attr)
+ {
+ if (unformat (input, mfib_itf_flag_long_names[attr]))
+ *iflags |= (1 << attr);
+ }
+ FOR_EACH_MFIB_ITF_ATTRIBUTE (attr)
+ {
+ if (unformat (input, mfib_itf_flag_names[attr]))
+ *iflags |= (1 << attr);
+ }
+
+ return (old == *iflags ? 0 : 1);
+}
+
+uword
+unformat_mfib_entry_flags (unformat_input_t * input, va_list * args)
+{
+ mfib_entry_flags_t old, *eflags = va_arg (*args, mfib_entry_flags_t *);
+ mfib_entry_attribute_t attr;
+
+ old = *eflags;
+ FOR_EACH_MFIB_ATTRIBUTE (attr)
+ {
+ if (unformat (input, mfib_flag_long_names[attr]))
+ *eflags |= (1 << attr);
+ }
+ FOR_EACH_MFIB_ATTRIBUTE (attr)
+ {
+ if (unformat (input, mfib_flag_names[attr]))
+ *eflags |= (1 << attr);
+ }
+
+ return (old == *eflags ? 0 : 1);
+}
+
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+u8 *
+format_ethernet_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+#endif
+
+static void
+increment_v4_address (ip4_address_t * a)
+{
+ u32 v;
+
+ v = ntohl (a->as_u32) + 1;
+ a->as_u32 = ntohl (v);
+}
+
+static void
+increment_v6_address (ip6_address_t * a)
+{
+ u64 v0, v1;
+
+ v0 = clib_net_to_host_u64 (a->as_u64[0]);
+ v1 = clib_net_to_host_u64 (a->as_u64[1]);
+
+ v1 += 1;
+ if (v1 == 0)
+ v0 += 1;
+ a->as_u64[0] = clib_net_to_host_u64 (v0);
+ a->as_u64[1] = clib_net_to_host_u64 (v1);
+}
+
+static void
+increment_mac_address (u64 * mac)
+{
+ u64 tmp = *mac;
+
+ tmp = clib_net_to_host_u64 (tmp);
+ tmp += 1 << 16; /* skip unused (least significant) octets */
+ tmp = clib_host_to_net_u64 (tmp);
+ *mac = tmp;
+}
+
+static void vl_api_create_loopback_reply_t_handler
+ (vl_api_create_loopback_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_loopback_reply_t_handler_json
+ (vl_api_create_loopback_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_loopback_instance_reply_t_handler
+ (vl_api_create_loopback_instance_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_loopback_instance_reply_t_handler_json
+ (vl_api_create_loopback_instance_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_af_packet_create_reply_t_handler
+ (vl_api_af_packet_create_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+}
+
+static void vl_api_af_packet_create_reply_t_handler_json
+ (vl_api_af_packet_create_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_vlan_subif_reply_t_handler
+ (vl_api_create_vlan_subif_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_vlan_subif_reply_t_handler_json
+ (vl_api_create_vlan_subif_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_subif_reply_t_handler
+ (vl_api_create_subif_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_subif_reply_t_handler_json
+ (vl_api_create_subif_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_interface_name_renumber_reply_t_handler
+ (vl_api_interface_name_renumber_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->regenerate_interface_table = 1;
+ vam->result_ready = 1;
+}
+
+static void vl_api_interface_name_renumber_reply_t_handler_json
+ (vl_api_interface_name_renumber_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+/*
+ * Special-case: build the interface table, maintain
+ * the next loopback sw_if_index vbl.
+ */
+static void vl_api_sw_interface_details_t_handler
+ (vl_api_sw_interface_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *s = format (0, "%s%c", mp->interface_name, 0);
+
+ hash_set_mem (vam->sw_if_index_by_interface_name, s,
+ ntohl (mp->sw_if_index));
+
+ /* In sub interface case, fill the sub interface table entry */
+ if (mp->sw_if_index != mp->sup_sw_if_index)
+ {
+ sw_interface_subif_t *sub = NULL;
+
+ vec_add2 (vam->sw_if_subif_table, sub, 1);
+
+ vec_validate (sub->interface_name, strlen ((char *) s) + 1);
+ strncpy ((char *) sub->interface_name, (char *) s,
+ vec_len (sub->interface_name));
+ sub->sw_if_index = ntohl (mp->sw_if_index);
+ sub->sub_id = ntohl (mp->sub_id);
+
+ sub->sub_dot1ad = mp->sub_dot1ad;
+ sub->sub_number_of_tags = mp->sub_number_of_tags;
+ sub->sub_outer_vlan_id = ntohs (mp->sub_outer_vlan_id);
+ sub->sub_inner_vlan_id = ntohs (mp->sub_inner_vlan_id);
+ sub->sub_exact_match = mp->sub_exact_match;
+ sub->sub_default = mp->sub_default;
+ sub->sub_outer_vlan_id_any = mp->sub_outer_vlan_id_any;
+ sub->sub_inner_vlan_id_any = mp->sub_inner_vlan_id_any;
+
+ /* vlan tag rewrite */
+ sub->vtr_op = ntohl (mp->vtr_op);
+ sub->vtr_push_dot1q = ntohl (mp->vtr_push_dot1q);
+ sub->vtr_tag1 = ntohl (mp->vtr_tag1);
+ sub->vtr_tag2 = ntohl (mp->vtr_tag2);
+ }
+}
+
+static void vl_api_sw_interface_details_t_handler_json
+ (vl_api_sw_interface_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_uint (node, "sup_sw_if_index",
+ ntohl (mp->sup_sw_if_index));
+ vat_json_object_add_uint (node, "l2_address_length",
+ ntohl (mp->l2_address_length));
+ vat_json_object_add_bytes (node, "l2_address", mp->l2_address,
+ sizeof (mp->l2_address));
+ vat_json_object_add_string_copy (node, "interface_name",
+ mp->interface_name);
+ vat_json_object_add_uint (node, "admin_up_down", mp->admin_up_down);
+ vat_json_object_add_uint (node, "link_up_down", mp->link_up_down);
+ vat_json_object_add_uint (node, "link_duplex", mp->link_duplex);
+ vat_json_object_add_uint (node, "link_speed", mp->link_speed);
+ vat_json_object_add_uint (node, "mtu", ntohs (mp->link_mtu));
+ vat_json_object_add_uint (node, "sub_id", ntohl (mp->sub_id));
+ vat_json_object_add_uint (node, "sub_dot1ad", mp->sub_dot1ad);
+ vat_json_object_add_uint (node, "sub_number_of_tags",
+ mp->sub_number_of_tags);
+ vat_json_object_add_uint (node, "sub_outer_vlan_id",
+ ntohs (mp->sub_outer_vlan_id));
+ vat_json_object_add_uint (node, "sub_inner_vlan_id",
+ ntohs (mp->sub_inner_vlan_id));
+ vat_json_object_add_uint (node, "sub_exact_match", mp->sub_exact_match);
+ vat_json_object_add_uint (node, "sub_default", mp->sub_default);
+ vat_json_object_add_uint (node, "sub_outer_vlan_id_any",
+ mp->sub_outer_vlan_id_any);
+ vat_json_object_add_uint (node, "sub_inner_vlan_id_any",
+ mp->sub_inner_vlan_id_any);
+ vat_json_object_add_uint (node, "vtr_op", ntohl (mp->vtr_op));
+ vat_json_object_add_uint (node, "vtr_push_dot1q",
+ ntohl (mp->vtr_push_dot1q));
+ vat_json_object_add_uint (node, "vtr_tag1", ntohl (mp->vtr_tag1));
+ vat_json_object_add_uint (node, "vtr_tag2", ntohl (mp->vtr_tag2));
+ if (mp->sub_dot1ah)
+ {
+ vat_json_object_add_string_copy (node, "pbb_vtr_dmac",
+ format (0, "%U",
+ format_ethernet_address,
+ &mp->b_dmac));
+ vat_json_object_add_string_copy (node, "pbb_vtr_smac",
+ format (0, "%U",
+ format_ethernet_address,
+ &mp->b_smac));
+ vat_json_object_add_uint (node, "pbb_vtr_b_vlanid", mp->b_vlanid);
+ vat_json_object_add_uint (node, "pbb_vtr_i_sid", mp->i_sid);
+ }
+}
+
+#if VPP_API_TEST_BUILTIN == 0
+static void vl_api_sw_interface_event_t_handler
+ (vl_api_sw_interface_event_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ if (vam->interface_event_display)
+ errmsg ("interface flags: sw_if_index %d %s %s",
+ ntohl (mp->sw_if_index),
+ mp->admin_up_down ? "admin-up" : "admin-down",
+ mp->link_up_down ? "link-up" : "link-down");
+}
+#endif
+
+static void vl_api_sw_interface_event_t_handler_json
+ (vl_api_sw_interface_event_t * mp)
+{
+ /* JSON output not supported */
+}
+
+static void
+vl_api_cli_reply_t_handler (vl_api_cli_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->shmem_result = uword_to_pointer (mp->reply_in_shmem, u8 *);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_cli_reply_t_handler_json (vl_api_cli_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ api_main_t *am = &api_main;
+ void *oldheap;
+ u8 *reply;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "reply_in_shmem",
+ ntohl (mp->reply_in_shmem));
+ /* Toss the shared-memory original... */
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
+ vec_free (reply);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_cli_inband_reply_t_handler (vl_api_cli_inband_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->cmd_reply = mp->reply;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_cli_inband_reply_t_handler_json (vl_api_cli_inband_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_string_copy (&node, "reply", mp->reply);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_classify_add_del_table_reply_t_handler
+ (vl_api_classify_add_del_table_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0 &&
+ ((mp->new_table_index != 0xFFFFFFFF) ||
+ (mp->skip_n_vectors != 0xFFFFFFFF) ||
+ (mp->match_n_vectors != 0xFFFFFFFF)))
+ /*
+ * Note: this is just barely thread-safe, depends on
+ * the main thread spinning waiting for an answer...
+ */
+ errmsg ("new index %d, skip_n_vectors %d, match_n_vectors %d",
+ ntohl (mp->new_table_index),
+ ntohl (mp->skip_n_vectors), ntohl (mp->match_n_vectors));
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_classify_add_del_table_reply_t_handler_json
+ (vl_api_classify_add_del_table_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "new_table_index",
+ ntohl (mp->new_table_index));
+ vat_json_object_add_uint (&node, "skip_n_vectors",
+ ntohl (mp->skip_n_vectors));
+ vat_json_object_add_uint (&node, "match_n_vectors",
+ ntohl (mp->match_n_vectors));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_get_node_index_reply_t_handler
+ (vl_api_get_node_index_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("node index %d", ntohl (mp->node_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_get_node_index_reply_t_handler_json
+ (vl_api_get_node_index_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "node_index", ntohl (mp->node_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_get_next_index_reply_t_handler
+ (vl_api_get_next_index_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("next node index %d", ntohl (mp->next_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_get_next_index_reply_t_handler_json
+ (vl_api_get_next_index_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "next_index", ntohl (mp->next_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_add_node_next_reply_t_handler
+ (vl_api_add_node_next_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("next index %d", ntohl (mp->next_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_add_node_next_reply_t_handler_json
+ (vl_api_add_node_next_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "next_index", ntohl (mp->next_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_show_version_reply_t_handler
+ (vl_api_show_version_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ errmsg (" program: %s", mp->program);
+ errmsg (" version: %s", mp->version);
+ errmsg (" build date: %s", mp->build_date);
+ errmsg ("build directory: %s", mp->build_directory);
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void vl_api_show_version_reply_t_handler_json
+ (vl_api_show_version_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_string_copy (&node, "program", mp->program);
+ vat_json_object_add_string_copy (&node, "version", mp->version);
+ vat_json_object_add_string_copy (&node, "build_date", mp->build_date);
+ vat_json_object_add_string_copy (&node, "build_directory",
+ mp->build_directory);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_ip4_arp_event_t_handler (vl_api_ip4_arp_event_t * mp)
+{
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ errmsg ("arp %s event: pid %d address %U new mac %U sw_if_index %d\n",
+ mp->mac_ip ? "mac/ip binding" : "address resolution",
+ ntohl (mp->pid), format_ip4_address, &mp->address,
+ format_ethernet_address, mp->new_mac, sw_if_index);
+}
+
+static void
+vl_api_ip4_arp_event_t_handler_json (vl_api_ip4_arp_event_t * mp)
+{
+ /* JSON output not supported */
+}
+
+static void
+vl_api_ip6_nd_event_t_handler (vl_api_ip6_nd_event_t * mp)
+{
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ errmsg ("ip6 nd %s event: pid %d address %U new mac %U sw_if_index %d\n",
+ mp->mac_ip ? "mac/ip binding" : "address resolution",
+ ntohl (mp->pid), format_ip6_address, mp->address,
+ format_ethernet_address, mp->new_mac, sw_if_index);
+}
+
+static void
+vl_api_ip6_nd_event_t_handler_json (vl_api_ip6_nd_event_t * mp)
+{
+ /* JSON output not supported */
+}
+
+static void
+vl_api_l2_macs_event_t_handler (vl_api_l2_macs_event_t * mp)
+{
+ u32 n_macs = ntohl (mp->n_macs);
+ errmsg ("L2MAC event recived with pid %d cl-idx %d for %d macs: \n",
+ ntohl (mp->pid), mp->client_index, n_macs);
+ int i;
+ for (i = 0; i < n_macs; i++)
+ {
+ vl_api_mac_entry_t *mac = &mp->mac[i];
+ errmsg (" [%d] sw_if_index %d mac_addr %U is_del %d \n",
+ i + 1, ntohl (mac->sw_if_index),
+ format_ethernet_address, mac->mac_addr, mac->is_del);
+ if (i == 1000)
+ break;
+ }
+}
+
+static void
+vl_api_l2_macs_event_t_handler_json (vl_api_l2_macs_event_t * mp)
+{
+ /* JSON output not supported */
+}
+
+#define vl_api_bridge_domain_details_t_endian vl_noop_handler
+#define vl_api_bridge_domain_details_t_print vl_noop_handler
+
+/*
+ * Special-case: build the bridge domain table, maintain
+ * the next bd id vbl.
+ */
+static void vl_api_bridge_domain_details_t_handler
+ (vl_api_bridge_domain_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 n_sw_ifs = ntohl (mp->n_sw_ifs);
+ int i;
+
+ print (vam->ofp, "\n%-3s %-3s %-3s %-3s %-3s %-3s",
+ " ID", "LRN", "FWD", "FLD", "BVI", "#IF");
+
+ print (vam->ofp, "%3d %3d %3d %3d %3d %3d",
+ ntohl (mp->bd_id), mp->learn, mp->forward,
+ mp->flood, ntohl (mp->bvi_sw_if_index), n_sw_ifs);
+
+ if (n_sw_ifs)
+ {
+ vl_api_bridge_domain_sw_if_t *sw_ifs;
+ print (vam->ofp, "\n\n%s %s %s", "sw_if_index", "SHG",
+ "Interface Name");
+
+ sw_ifs = mp->sw_if_details;
+ for (i = 0; i < n_sw_ifs; i++)
+ {
+ u8 *sw_if_name = 0;
+ u32 sw_if_index;
+ hash_pair_t *p;
+
+ sw_if_index = ntohl (sw_ifs->sw_if_index);
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name,
+ ({
+ if ((u32) p->value[0] == sw_if_index)
+ {
+ sw_if_name = (u8 *)(p->key);
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ print (vam->ofp, "%7d %3d %s", sw_if_index,
+ sw_ifs->shg, sw_if_name ? (char *) sw_if_name :
+ "sw_if_index not found!");
+
+ sw_ifs++;
+ }
+ }
+}
+
+static void vl_api_bridge_domain_details_t_handler_json
+ (vl_api_bridge_domain_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node, *array = NULL;
+ u32 n_sw_ifs = ntohl (mp->n_sw_ifs);
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "bd_id", ntohl (mp->bd_id));
+ vat_json_object_add_uint (node, "flood", mp->flood);
+ vat_json_object_add_uint (node, "forward", mp->forward);
+ vat_json_object_add_uint (node, "learn", mp->learn);
+ vat_json_object_add_uint (node, "bvi_sw_if_index",
+ ntohl (mp->bvi_sw_if_index));
+ vat_json_object_add_uint (node, "n_sw_ifs", n_sw_ifs);
+ array = vat_json_object_add (node, "sw_if");
+ vat_json_init_array (array);
+
+
+
+ if (n_sw_ifs)
+ {
+ vl_api_bridge_domain_sw_if_t *sw_ifs;
+ int i;
+
+ sw_ifs = mp->sw_if_details;
+ for (i = 0; i < n_sw_ifs; i++)
+ {
+ node = vat_json_array_add (array);
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index",
+ ntohl (sw_ifs->sw_if_index));
+ vat_json_object_add_uint (node, "shg", sw_ifs->shg);
+ sw_ifs++;
+ }
+ }
+}
+
+static void vl_api_control_ping_reply_t_handler
+ (vl_api_control_ping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_control_ping_reply_t_handler_json
+ (vl_api_control_ping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (VAT_JSON_NONE != vam->json_tree.type)
+ {
+ vat_json_print (vam->ofp, &vam->json_tree);
+ vat_json_free (&vam->json_tree);
+ vam->json_tree.type = VAT_JSON_NONE;
+ }
+ else
+ {
+ /* just print [] */
+ vat_json_init_array (&vam->json_tree);
+ vat_json_print (vam->ofp, &vam->json_tree);
+ vam->json_tree.type = VAT_JSON_NONE;
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_bridge_domain_set_mac_age_reply_t_handler
+ (vl_api_bridge_domain_set_mac_age_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_bridge_domain_set_mac_age_reply_t_handler_json
+ (vl_api_bridge_domain_set_mac_age_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_l2_flags_reply_t_handler (vl_api_l2_flags_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_l2_flags_reply_t_handler_json
+ (vl_api_l2_flags_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "resulting_feature_bitmap",
+ ntohl (mp->resulting_feature_bitmap));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_bridge_flags_reply_t_handler
+ (vl_api_bridge_flags_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_bridge_flags_reply_t_handler_json
+ (vl_api_bridge_flags_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "resulting_feature_bitmap",
+ ntohl (mp->resulting_feature_bitmap));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_tap_connect_reply_t_handler
+ (vl_api_tap_connect_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+
+}
+
+static void vl_api_tap_connect_reply_t_handler_json
+ (vl_api_tap_connect_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+
+}
+
+static void
+vl_api_tap_modify_reply_t_handler (vl_api_tap_modify_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_tap_modify_reply_t_handler_json
+ (vl_api_tap_modify_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_tap_delete_reply_t_handler (vl_api_tap_delete_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_tap_delete_reply_t_handler_json
+ (vl_api_tap_delete_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_mpls_tunnel_add_del_reply_t_handler
+ (vl_api_mpls_tunnel_add_del_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_mpls_tunnel_add_del_reply_t_handler_json
+ (vl_api_mpls_tunnel_add_del_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "tunnel_sw_if_index",
+ ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_l2tpv3_create_tunnel_reply_t_handler
+ (vl_api_l2tpv3_create_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_l2tpv3_create_tunnel_reply_t_handler_json
+ (vl_api_l2tpv3_create_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_gpe_add_del_fwd_entry_reply_t_handler
+ (vl_api_gpe_add_del_fwd_entry_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_gpe_add_del_fwd_entry_reply_t_handler_json
+ (vl_api_gpe_add_del_fwd_entry_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "fwd_entry_index",
+ clib_net_to_host_u32 (mp->fwd_entry_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+u8 *
+format_lisp_transport_protocol (u8 * s, va_list * args)
+{
+ u32 proto = va_arg (*args, u32);
+
+ switch (proto)
+ {
+ case 1:
+ return format (s, "udp");
+ case 2:
+ return format (s, "api");
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+static void vl_api_one_get_transport_protocol_reply_t_handler
+ (vl_api_one_get_transport_protocol_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ u32 proto = mp->protocol;
+ print (vam->ofp, "Transport protocol: %U",
+ format_lisp_transport_protocol, proto);
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_one_get_transport_protocol_reply_t_handler_json
+ (vl_api_one_get_transport_protocol_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *s;
+
+ s = format (0, "%U", format_lisp_transport_protocol, mp->protocol);
+ vec_add1 (s, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_string_copy (&node, "transport-protocol", s);
+
+ vec_free (s);
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_one_add_del_locator_set_reply_t_handler
+ (vl_api_one_add_del_locator_set_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_one_add_del_locator_set_reply_t_handler_json
+ (vl_api_one_add_del_locator_set_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "locator_set_index", ntohl (mp->ls_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_vxlan_add_del_tunnel_reply_t_handler
+ (vl_api_vxlan_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_vxlan_add_del_tunnel_reply_t_handler_json
+ (vl_api_vxlan_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_vxlan_gpe_add_del_tunnel_reply_t_handler
+ (vl_api_vxlan_gpe_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_vxlan_gpe_add_del_tunnel_reply_t_handler_json
+ (vl_api_vxlan_gpe_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_gre_add_del_tunnel_reply_t_handler
+ (vl_api_gre_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_gre_add_del_tunnel_reply_t_handler_json
+ (vl_api_gre_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_create_vhost_user_if_reply_t_handler
+ (vl_api_create_vhost_user_if_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_create_vhost_user_if_reply_t_handler_json
+ (vl_api_create_vhost_user_if_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_ip_address_details_t_handler
+ (vl_api_ip_address_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ static ip_address_details_t empty_ip_address_details = { {0} };
+ ip_address_details_t *address = NULL;
+ ip_details_t *current_ip_details = NULL;
+ ip_details_t *details = NULL;
+
+ details = vam->ip_details_by_sw_if_index[vam->is_ipv6];
+
+ if (!details || vam->current_sw_if_index >= vec_len (details)
+ || !details[vam->current_sw_if_index].present)
+ {
+ errmsg ("ip address details arrived but not stored");
+ errmsg ("ip_dump should be called first");
+ return;
+ }
+
+ current_ip_details = vec_elt_at_index (details, vam->current_sw_if_index);
+
+#define addresses (current_ip_details->addr)
+
+ vec_validate_init_empty (addresses, vec_len (addresses),
+ empty_ip_address_details);
+
+ address = vec_elt_at_index (addresses, vec_len (addresses) - 1);
+
+ clib_memcpy (&address->ip, &mp->ip, sizeof (address->ip));
+ address->prefix_length = mp->prefix_length;
+#undef addresses
+}
+
+static void vl_api_ip_address_details_t_handler_json
+ (vl_api_ip_address_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ if (vam->is_ipv6)
+ {
+ clib_memcpy (&ip6, mp->ip, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "ip", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, mp->ip, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "ip", ip4);
+ }
+ vat_json_object_add_uint (node, "prefix_length", mp->prefix_length);
+}
+
+static void
+vl_api_ip_details_t_handler (vl_api_ip_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ static ip_details_t empty_ip_details = { 0 };
+ ip_details_t *ip = NULL;
+ u32 sw_if_index = ~0;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ vec_validate_init_empty (vam->ip_details_by_sw_if_index[vam->is_ipv6],
+ sw_if_index, empty_ip_details);
+
+ ip = vec_elt_at_index (vam->ip_details_by_sw_if_index[vam->is_ipv6],
+ sw_if_index);
+
+ ip->present = 1;
+}
+
+static void
+vl_api_ip_details_t_handler_json (vl_api_ip_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ vat_json_array_add_uint (&vam->json_tree,
+ clib_net_to_host_u32 (mp->sw_if_index));
+}
+
+static void vl_api_map_domain_details_t_handler_json
+ (vl_api_map_domain_details_t * mp)
+{
+ vat_json_node_t *node = NULL;
+ vat_main_t *vam = &vat_main;
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+
+ vat_json_object_add_uint (node, "domain_index",
+ clib_net_to_host_u32 (mp->domain_index));
+ clib_memcpy (&ip6, mp->ip6_prefix, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "ip6_prefix", ip6);
+ clib_memcpy (&ip4, mp->ip4_prefix, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "ip4_prefix", ip4);
+ clib_memcpy (&ip6, mp->ip6_src, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "ip6_src", ip6);
+ vat_json_object_add_int (node, "ip6_prefix_len", mp->ip6_prefix_len);
+ vat_json_object_add_int (node, "ip4_prefix_len", mp->ip4_prefix_len);
+ vat_json_object_add_int (node, "ip6_src_len", mp->ip6_src_len);
+ vat_json_object_add_int (node, "ea_bits_len", mp->ea_bits_len);
+ vat_json_object_add_int (node, "psid_offset", mp->psid_offset);
+ vat_json_object_add_int (node, "psid_length", mp->psid_length);
+ vat_json_object_add_uint (node, "flags", mp->flags);
+ vat_json_object_add_uint (node, "mtu", clib_net_to_host_u16 (mp->mtu));
+ vat_json_object_add_int (node, "is_translation", mp->is_translation);
+}
+
+static void vl_api_map_domain_details_t_handler
+ (vl_api_map_domain_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ if (mp->is_translation)
+ {
+ print (vam->ofp,
+ "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U/%d (ip6-src) index: %u",
+ format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len,
+ format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len,
+ format_ip6_address, mp->ip6_src, mp->ip6_src_len,
+ clib_net_to_host_u32 (mp->domain_index));
+ }
+ else
+ {
+ print (vam->ofp,
+ "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U (ip6-src) index: %u",
+ format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len,
+ format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len,
+ format_ip6_address, mp->ip6_src,
+ clib_net_to_host_u32 (mp->domain_index));
+ }
+ print (vam->ofp, " ea-len %d psid-offset %d psid-len %d mtu %d %s",
+ mp->ea_bits_len, mp->psid_offset, mp->psid_length, mp->mtu,
+ mp->is_translation ? "map-t" : "");
+}
+
+static void vl_api_map_rule_details_t_handler_json
+ (vl_api_map_rule_details_t * mp)
+{
+ struct in6_addr ip6;
+ vat_json_node_t *node = NULL;
+ vat_main_t *vam = &vat_main;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+
+ vat_json_object_add_uint (node, "psid", clib_net_to_host_u16 (mp->psid));
+ clib_memcpy (&ip6, mp->ip6_dst, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "ip6_dst", ip6);
+}
+
+static void
+vl_api_map_rule_details_t_handler (vl_api_map_rule_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ print (vam->ofp, " %d (psid) %U (ip6-dst)",
+ clib_net_to_host_u16 (mp->psid), format_ip6_address, mp->ip6_dst);
+}
+
+static void
+vl_api_dhcp_compl_event_t_handler (vl_api_dhcp_compl_event_t * mp)
+{
+ errmsg ("DHCP compl event: pid %d %s hostname %s host_addr %U "
+ "router_addr %U host_mac %U",
+ ntohl (mp->pid), mp->is_ipv6 ? "ipv6" : "ipv4", mp->hostname,
+ format_ip4_address, &mp->host_address,
+ format_ip4_address, &mp->router_address,
+ format_ethernet_address, mp->host_mac);
+}
+
+static void vl_api_dhcp_compl_event_t_handler_json
+ (vl_api_dhcp_compl_event_t * mp)
+{
+ /* JSON output not supported */
+}
+
+static void
+set_simple_interface_counter (u8 vnet_counter_type, u32 sw_if_index,
+ u32 counter)
+{
+ vat_main_t *vam = &vat_main;
+ static u64 default_counter = 0;
+
+ vec_validate_init_empty (vam->simple_interface_counters, vnet_counter_type,
+ NULL);
+ vec_validate_init_empty (vam->simple_interface_counters[vnet_counter_type],
+ sw_if_index, default_counter);
+ vam->simple_interface_counters[vnet_counter_type][sw_if_index] = counter;
+}
+
+static void
+set_combined_interface_counter (u8 vnet_counter_type, u32 sw_if_index,
+ interface_counter_t counter)
+{
+ vat_main_t *vam = &vat_main;
+ static interface_counter_t default_counter = { 0, };
+
+ vec_validate_init_empty (vam->combined_interface_counters,
+ vnet_counter_type, NULL);
+ vec_validate_init_empty (vam->combined_interface_counters
+ [vnet_counter_type], sw_if_index, default_counter);
+ vam->combined_interface_counters[vnet_counter_type][sw_if_index] = counter;
+}
+
+static void vl_api_vnet_interface_simple_counters_t_handler
+ (vl_api_vnet_interface_simple_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_interface_combined_counters_t_handler
+ (vl_api_vnet_interface_combined_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_interface_simple_counters_t_handler_json
+ (vl_api_vnet_interface_simple_counters_t * mp)
+{
+ u64 *v_packets;
+ u64 packets;
+ u32 count;
+ u32 first_sw_if_index;
+ int i;
+
+ count = ntohl (mp->count);
+ first_sw_if_index = ntohl (mp->first_sw_if_index);
+
+ v_packets = (u64 *) & mp->data;
+ for (i = 0; i < count; i++)
+ {
+ packets = clib_net_to_host_u64 (clib_mem_unaligned (v_packets, u64));
+ set_simple_interface_counter (mp->vnet_counter_type,
+ first_sw_if_index + i, packets);
+ v_packets++;
+ }
+}
+
+static void vl_api_vnet_interface_combined_counters_t_handler_json
+ (vl_api_vnet_interface_combined_counters_t * mp)
+{
+ interface_counter_t counter;
+ vlib_counter_t *v;
+ u32 first_sw_if_index;
+ int i;
+ u32 count;
+
+ count = ntohl (mp->count);
+ first_sw_if_index = ntohl (mp->first_sw_if_index);
+
+ v = (vlib_counter_t *) & mp->data;
+ for (i = 0; i < count; i++)
+ {
+ counter.packets =
+ clib_net_to_host_u64 (clib_mem_unaligned (&v->packets, u64));
+ counter.bytes =
+ clib_net_to_host_u64 (clib_mem_unaligned (&v->bytes, u64));
+ set_combined_interface_counter (mp->vnet_counter_type,
+ first_sw_if_index + i, counter);
+ v++;
+ }
+}
+
+static u32
+ip4_fib_counters_get_vrf_index_by_vrf_id (u32 vrf_id)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i;
+
+ for (i = 0; i < vec_len (vam->ip4_fib_counters_vrf_id_by_index); i++)
+ {
+ if (vam->ip4_fib_counters_vrf_id_by_index[i] == vrf_id)
+ {
+ return i;
+ }
+ }
+ return ~0;
+}
+
+static u32
+ip6_fib_counters_get_vrf_index_by_vrf_id (u32 vrf_id)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i;
+
+ for (i = 0; i < vec_len (vam->ip6_fib_counters_vrf_id_by_index); i++)
+ {
+ if (vam->ip6_fib_counters_vrf_id_by_index[i] == vrf_id)
+ {
+ return i;
+ }
+ }
+ return ~0;
+}
+
+static void vl_api_vnet_ip4_fib_counters_t_handler
+ (vl_api_vnet_ip4_fib_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_ip4_fib_counters_t_handler_json
+ (vl_api_vnet_ip4_fib_counters_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vl_api_ip4_fib_counter_t *v;
+ ip4_fib_counter_t *counter;
+ struct in_addr ip4;
+ u32 vrf_id;
+ u32 vrf_index;
+ u32 count;
+ int i;
+
+ vrf_id = ntohl (mp->vrf_id);
+ vrf_index = ip4_fib_counters_get_vrf_index_by_vrf_id (vrf_id);
+ if (~0 == vrf_index)
+ {
+ vrf_index = vec_len (vam->ip4_fib_counters_vrf_id_by_index);
+ vec_validate (vam->ip4_fib_counters_vrf_id_by_index, vrf_index);
+ vam->ip4_fib_counters_vrf_id_by_index[vrf_index] = vrf_id;
+ vec_validate (vam->ip4_fib_counters, vrf_index);
+ vam->ip4_fib_counters[vrf_index] = NULL;
+ }
+
+ vec_free (vam->ip4_fib_counters[vrf_index]);
+ v = (vl_api_ip4_fib_counter_t *) & mp->c;
+ count = ntohl (mp->count);
+ for (i = 0; i < count; i++)
+ {
+ vec_validate (vam->ip4_fib_counters[vrf_index], i);
+ counter = &vam->ip4_fib_counters[vrf_index][i];
+ clib_memcpy (&ip4, &v->address, sizeof (ip4));
+ counter->address = ip4;
+ counter->address_length = v->address_length;
+ counter->packets = clib_net_to_host_u64 (v->packets);
+ counter->bytes = clib_net_to_host_u64 (v->bytes);
+ v++;
+ }
+}
+
+static void vl_api_vnet_ip4_nbr_counters_t_handler
+ (vl_api_vnet_ip4_nbr_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_ip4_nbr_counters_t_handler_json
+ (vl_api_vnet_ip4_nbr_counters_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vl_api_ip4_nbr_counter_t *v;
+ ip4_nbr_counter_t *counter;
+ u32 sw_if_index;
+ u32 count;
+ int i;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ count = ntohl (mp->count);
+ vec_validate (vam->ip4_nbr_counters, sw_if_index);
+
+ if (mp->begin)
+ vec_free (vam->ip4_nbr_counters[sw_if_index]);
+
+ v = (vl_api_ip4_nbr_counter_t *) & mp->c;
+ for (i = 0; i < count; i++)
+ {
+ vec_validate (vam->ip4_nbr_counters[sw_if_index], i);
+ counter = &vam->ip4_nbr_counters[sw_if_index][i];
+ counter->address.s_addr = v->address;
+ counter->packets = clib_net_to_host_u64 (v->packets);
+ counter->bytes = clib_net_to_host_u64 (v->bytes);
+ counter->linkt = v->link_type;
+ v++;
+ }
+}
+
+static void vl_api_vnet_ip6_fib_counters_t_handler
+ (vl_api_vnet_ip6_fib_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_ip6_fib_counters_t_handler_json
+ (vl_api_vnet_ip6_fib_counters_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vl_api_ip6_fib_counter_t *v;
+ ip6_fib_counter_t *counter;
+ struct in6_addr ip6;
+ u32 vrf_id;
+ u32 vrf_index;
+ u32 count;
+ int i;
+
+ vrf_id = ntohl (mp->vrf_id);
+ vrf_index = ip6_fib_counters_get_vrf_index_by_vrf_id (vrf_id);
+ if (~0 == vrf_index)
+ {
+ vrf_index = vec_len (vam->ip6_fib_counters_vrf_id_by_index);
+ vec_validate (vam->ip6_fib_counters_vrf_id_by_index, vrf_index);
+ vam->ip6_fib_counters_vrf_id_by_index[vrf_index] = vrf_id;
+ vec_validate (vam->ip6_fib_counters, vrf_index);
+ vam->ip6_fib_counters[vrf_index] = NULL;
+ }
+
+ vec_free (vam->ip6_fib_counters[vrf_index]);
+ v = (vl_api_ip6_fib_counter_t *) & mp->c;
+ count = ntohl (mp->count);
+ for (i = 0; i < count; i++)
+ {
+ vec_validate (vam->ip6_fib_counters[vrf_index], i);
+ counter = &vam->ip6_fib_counters[vrf_index][i];
+ clib_memcpy (&ip6, &v->address, sizeof (ip6));
+ counter->address = ip6;
+ counter->address_length = v->address_length;
+ counter->packets = clib_net_to_host_u64 (v->packets);
+ counter->bytes = clib_net_to_host_u64 (v->bytes);
+ v++;
+ }
+}
+
+static void vl_api_vnet_ip6_nbr_counters_t_handler
+ (vl_api_vnet_ip6_nbr_counters_t * mp)
+{
+ /* not supported */
+}
+
+static void vl_api_vnet_ip6_nbr_counters_t_handler_json
+ (vl_api_vnet_ip6_nbr_counters_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vl_api_ip6_nbr_counter_t *v;
+ ip6_nbr_counter_t *counter;
+ struct in6_addr ip6;
+ u32 sw_if_index;
+ u32 count;
+ int i;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ count = ntohl (mp->count);
+ vec_validate (vam->ip6_nbr_counters, sw_if_index);
+
+ if (mp->begin)
+ vec_free (vam->ip6_nbr_counters[sw_if_index]);
+
+ v = (vl_api_ip6_nbr_counter_t *) & mp->c;
+ for (i = 0; i < count; i++)
+ {
+ vec_validate (vam->ip6_nbr_counters[sw_if_index], i);
+ counter = &vam->ip6_nbr_counters[sw_if_index][i];
+ clib_memcpy (&ip6, &v->address, sizeof (ip6));
+ counter->address = ip6;
+ counter->packets = clib_net_to_host_u64 (v->packets);
+ counter->bytes = clib_net_to_host_u64 (v->bytes);
+ v++;
+ }
+}
+
+static void vl_api_get_first_msg_id_reply_t_handler
+ (vl_api_get_first_msg_id_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+ if (retval >= 0)
+ {
+ errmsg ("first message id %d", ntohs (mp->first_msg_id));
+ }
+}
+
+static void vl_api_get_first_msg_id_reply_t_handler_json
+ (vl_api_get_first_msg_id_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "first_msg_id",
+ (uint) ntohs (mp->first_msg_id));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_get_node_graph_reply_t_handler
+ (vl_api_get_node_graph_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ api_main_t *am = &api_main;
+ i32 retval = ntohl (mp->retval);
+ u8 *pvt_copy, *reply;
+ void *oldheap;
+ vlib_node_t *node;
+ int i;
+
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+
+ /* "Should never happen..." */
+ if (retval != 0)
+ return;
+
+ reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
+ pvt_copy = vec_dup (reply);
+
+ /* Toss the shared-memory original... */
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ vec_free (reply);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ if (vam->graph_nodes)
+ {
+ hash_free (vam->graph_node_index_by_name);
+
+ for (i = 0; i < vec_len (vam->graph_nodes); i++)
+ {
+ node = vam->graph_nodes[i];
+ vec_free (node->name);
+ vec_free (node->next_nodes);
+ vec_free (node);
+ }
+ vec_free (vam->graph_nodes);
+ }
+
+ vam->graph_node_index_by_name = hash_create_string (0, sizeof (uword));
+ vam->graph_nodes = vlib_node_unserialize (pvt_copy);
+ vec_free (pvt_copy);
+
+ for (i = 0; i < vec_len (vam->graph_nodes); i++)
+ {
+ node = vam->graph_nodes[i];
+ hash_set_mem (vam->graph_node_index_by_name, node->name, i);
+ }
+}
+
+static void vl_api_get_node_graph_reply_t_handler_json
+ (vl_api_get_node_graph_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ api_main_t *am = &api_main;
+ void *oldheap;
+ vat_json_node_t node;
+ u8 *reply;
+
+ /* $$$$ make this real? */
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "reply_in_shmem", mp->reply_in_shmem);
+
+ reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
+
+ /* Toss the shared-memory original... */
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ vec_free (reply);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_one_locator_details_t_handler (vl_api_one_locator_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *s = 0;
+
+ if (mp->local)
+ {
+ s = format (s, "%=16d%=16d%=16d",
+ ntohl (mp->sw_if_index), mp->priority, mp->weight);
+ }
+ else
+ {
+ s = format (s, "%=16U%=16d%=16d",
+ mp->is_ipv6 ? format_ip6_address :
+ format_ip4_address,
+ mp->ip_address, mp->priority, mp->weight);
+ }
+
+ print (vam->ofp, "%v", s);
+ vec_free (s);
+}
+
+static void
+vl_api_one_locator_details_t_handler_json (vl_api_one_locator_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+
+ vat_json_object_add_uint (node, "local", mp->local ? 1 : 0);
+ vat_json_object_add_uint (node, "priority", mp->priority);
+ vat_json_object_add_uint (node, "weight", mp->weight);
+
+ if (mp->local)
+ vat_json_object_add_uint (node, "sw_if_index",
+ clib_net_to_host_u32 (mp->sw_if_index));
+ else
+ {
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, mp->ip_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "address", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, mp->ip_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "address", ip4);
+ }
+ }
+}
+
+static void
+vl_api_one_locator_set_details_t_handler (vl_api_one_locator_set_details_t *
+ mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *ls_name = 0;
+
+ ls_name = format (0, "%s", mp->ls_name);
+
+ print (vam->ofp, "%=10d%=15v", clib_net_to_host_u32 (mp->ls_index),
+ ls_name);
+ vec_free (ls_name);
+}
+
+static void
+ vl_api_one_locator_set_details_t_handler_json
+ (vl_api_one_locator_set_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = 0;
+ u8 *ls_name = 0;
+
+ ls_name = format (0, "%s", mp->ls_name);
+ vec_add1 (ls_name, 0);
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "ls_name", ls_name);
+ vat_json_object_add_uint (node, "ls_index",
+ clib_net_to_host_u32 (mp->ls_index));
+ vec_free (ls_name);
+}
+
+typedef struct
+{
+ u32 spi;
+ u8 si;
+} __attribute__ ((__packed__)) lisp_nsh_api_t;
+
+uword
+unformat_nsh_address (unformat_input_t * input, va_list * args)
+{
+ lisp_nsh_api_t *nsh = va_arg (*args, lisp_nsh_api_t *);
+ return unformat (input, "SPI:%d SI:%d", &nsh->spi, &nsh->si);
+}
+
+u8 *
+format_nsh_address_vat (u8 * s, va_list * args)
+{
+ nsh_t *a = va_arg (*args, nsh_t *);
+ return format (s, "SPI:%d SI:%d", clib_net_to_host_u32 (a->spi), a->si);
+}
+
+static u8 *
+format_lisp_flat_eid (u8 * s, va_list * args)
+{
+ u32 type = va_arg (*args, u32);
+ u8 *eid = va_arg (*args, u8 *);
+ u32 eid_len = va_arg (*args, u32);
+
+ switch (type)
+ {
+ case 0:
+ return format (s, "%U/%d", format_ip4_address, eid, eid_len);
+ case 1:
+ return format (s, "%U/%d", format_ip6_address, eid, eid_len);
+ case 2:
+ return format (s, "%U", format_ethernet_address, eid);
+ case 3:
+ return format (s, "%U", format_nsh_address_vat, eid);
+ }
+ return 0;
+}
+
+static u8 *
+format_lisp_eid_vat (u8 * s, va_list * args)
+{
+ u32 type = va_arg (*args, u32);
+ u8 *eid = va_arg (*args, u8 *);
+ u32 eid_len = va_arg (*args, u32);
+ u8 *seid = va_arg (*args, u8 *);
+ u32 seid_len = va_arg (*args, u32);
+ u32 is_src_dst = va_arg (*args, u32);
+
+ if (is_src_dst)
+ s = format (s, "%U|", format_lisp_flat_eid, type, seid, seid_len);
+
+ s = format (s, "%U", format_lisp_flat_eid, type, eid, eid_len);
+
+ return s;
+}
+
+static void
+vl_api_one_eid_table_details_t_handler (vl_api_one_eid_table_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *s = 0, *eid = 0;
+
+ if (~0 == mp->locator_set_index)
+ s = format (0, "action: %d", mp->action);
+ else
+ s = format (0, "%d", clib_net_to_host_u32 (mp->locator_set_index));
+
+ eid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type,
+ mp->eid,
+ mp->eid_prefix_len,
+ mp->seid, mp->seid_prefix_len, mp->is_src_dst);
+ vec_add1 (eid, 0);
+
+ print (vam->ofp, "[%d] %-35s%-20s%-30s%-20d%-20d%-10d%-20s",
+ clib_net_to_host_u32 (mp->vni),
+ eid,
+ mp->is_local ? "local" : "remote",
+ s, clib_net_to_host_u32 (mp->ttl), mp->authoritative,
+ clib_net_to_host_u16 (mp->key_id), mp->key);
+
+ vec_free (s);
+ vec_free (eid);
+}
+
+static void
+vl_api_one_eid_table_details_t_handler_json (vl_api_one_eid_table_details_t
+ * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = 0;
+ u8 *eid = 0;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ if (~0 == mp->locator_set_index)
+ vat_json_object_add_uint (node, "action", mp->action);
+ else
+ vat_json_object_add_uint (node, "locator_set_index",
+ clib_net_to_host_u32 (mp->locator_set_index));
+
+ vat_json_object_add_uint (node, "is_local", mp->is_local ? 1 : 0);
+ if (mp->eid_type == 3)
+ {
+ vat_json_node_t *nsh_json = vat_json_object_add (node, "eid");
+ vat_json_init_object (nsh_json);
+ lisp_nsh_api_t *nsh = (lisp_nsh_api_t *) mp->eid;
+ vat_json_object_add_uint (nsh_json, "spi",
+ clib_net_to_host_u32 (nsh->spi));
+ vat_json_object_add_uint (nsh_json, "si", nsh->si);
+ }
+ else
+ {
+ eid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type,
+ mp->eid,
+ mp->eid_prefix_len,
+ mp->seid, mp->seid_prefix_len, mp->is_src_dst);
+ vec_add1 (eid, 0);
+ vat_json_object_add_string_copy (node, "eid", eid);
+ vec_free (eid);
+ }
+ vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni));
+ vat_json_object_add_uint (node, "ttl", clib_net_to_host_u32 (mp->ttl));
+ vat_json_object_add_uint (node, "authoritative", (mp->authoritative));
+
+ if (mp->key_id)
+ {
+ vat_json_object_add_uint (node, "key_id",
+ clib_net_to_host_u16 (mp->key_id));
+ vat_json_object_add_string_copy (node, "key", mp->key);
+ }
+}
+
+static void
+vl_api_one_stats_details_t_handler (vl_api_one_stats_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *seid = 0, *deid = 0;
+ u8 *(*format_ip_address_fcn) (u8 *, va_list *) = 0;
+
+ deid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type, mp->deid, mp->deid_pref_len, 0, 0, 0);
+
+ seid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type, mp->seid, mp->seid_pref_len, 0, 0, 0);
+
+ vec_add1 (deid, 0);
+ vec_add1 (seid, 0);
+
+ if (mp->is_ip4)
+ format_ip_address_fcn = format_ip4_address;
+ else
+ format_ip_address_fcn = format_ip6_address;
+
+
+ print (vam->ofp, "([%d] %s %s) (%U %U) %u %u",
+ clib_net_to_host_u32 (mp->vni),
+ seid, deid,
+ format_ip_address_fcn, mp->lloc,
+ format_ip_address_fcn, mp->rloc,
+ clib_net_to_host_u32 (mp->pkt_count),
+ clib_net_to_host_u32 (mp->bytes));
+
+ vec_free (deid);
+ vec_free (seid);
+}
+
+static void
+vl_api_one_stats_details_t_handler_json (vl_api_one_stats_details_t * mp)
+{
+ struct in6_addr ip6;
+ struct in_addr ip4;
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = 0;
+ u8 *deid = 0, *seid = 0;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ deid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type, mp->deid, mp->deid_pref_len, 0, 0, 0);
+
+ seid = format (0, "%U", format_lisp_eid_vat,
+ mp->eid_type, mp->seid, mp->seid_pref_len, 0, 0, 0);
+
+ vec_add1 (deid, 0);
+ vec_add1 (seid, 0);
+
+ vat_json_object_add_string_copy (node, "seid", seid);
+ vat_json_object_add_string_copy (node, "deid", deid);
+ vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni));
+
+ if (mp->is_ip4)
+ {
+ clib_memcpy (&ip4, mp->lloc, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "lloc", ip4);
+ clib_memcpy (&ip4, mp->rloc, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "rloc", ip4);
+ }
+ else
+ {
+ clib_memcpy (&ip6, mp->lloc, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "lloc", ip6);
+ clib_memcpy (&ip6, mp->rloc, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "rloc", ip6);
+ }
+ vat_json_object_add_uint (node, "pkt_count",
+ clib_net_to_host_u32 (mp->pkt_count));
+ vat_json_object_add_uint (node, "bytes", clib_net_to_host_u32 (mp->bytes));
+
+ vec_free (deid);
+ vec_free (seid);
+}
+
+static void
+ vl_api_one_eid_table_map_details_t_handler
+ (vl_api_one_eid_table_map_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ u8 *line = format (0, "%=10d%=10d",
+ clib_net_to_host_u32 (mp->vni),
+ clib_net_to_host_u32 (mp->dp_table));
+ print (vam->ofp, "%v", line);
+ vec_free (line);
+}
+
+static void
+ vl_api_one_eid_table_map_details_t_handler_json
+ (vl_api_one_eid_table_map_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "dp_table",
+ clib_net_to_host_u32 (mp->dp_table));
+ vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni));
+}
+
+static void
+ vl_api_one_eid_table_vni_details_t_handler
+ (vl_api_one_eid_table_vni_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ u8 *line = format (0, "%d", clib_net_to_host_u32 (mp->vni));
+ print (vam->ofp, "%v", line);
+ vec_free (line);
+}
+
+static void
+ vl_api_one_eid_table_vni_details_t_handler_json
+ (vl_api_one_eid_table_vni_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni));
+}
+
+static void
+ vl_api_show_one_map_register_fallback_threshold_reply_t_handler
+ (vl_api_show_one_map_register_fallback_threshold_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ vl_api_show_one_map_register_fallback_threshold_reply_t_endian (mp);
+ print (vam->ofp, "fallback threshold value: %d", mp->value);
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_register_fallback_threshold_reply_t_handler_json
+ (vl_api_show_one_map_register_fallback_threshold_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t _node, *node = &_node;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ vl_api_show_one_map_register_fallback_threshold_reply_t_endian (mp);
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "value", mp->value);
+
+ vat_json_print (vam->ofp, node);
+ vat_json_free (node);
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_register_state_reply_t_handler
+ (vl_api_show_one_map_register_state_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ print (vam->ofp, "%s", mp->is_enabled ? "enabled" : "disabled");
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_register_state_reply_t_handler_json
+ (vl_api_show_one_map_register_state_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t _node, *node = &_node;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ u8 *s = format (0, "%s", mp->is_enabled ? "enabled" : "disabled");
+
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "state", s);
+
+ vat_json_print (vam->ofp, node);
+ vat_json_free (node);
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vec_free (s);
+}
+
+static void
+ vl_api_show_one_rloc_probe_state_reply_t_handler
+ (vl_api_show_one_rloc_probe_state_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ print (vam->ofp, "%s", mp->is_enabled ? "enabled" : "disabled");
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_rloc_probe_state_reply_t_handler_json
+ (vl_api_show_one_rloc_probe_state_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t _node, *node = &_node;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ u8 *s = format (0, "%s", mp->is_enabled ? "enabled" : "disabled");
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "state", s);
+
+ vat_json_print (vam->ofp, node);
+ vat_json_free (node);
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vec_free (s);
+}
+
+static void
+ vl_api_show_one_stats_enable_disable_reply_t_handler
+ (vl_api_show_one_stats_enable_disable_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ print (vam->ofp, "%s", mp->is_en ? "enabled" : "disabled");
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_stats_enable_disable_reply_t_handler_json
+ (vl_api_show_one_stats_enable_disable_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t _node, *node = &_node;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ u8 *s = format (0, "%s", mp->is_en ? "enabled" : "disabled");
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "state", s);
+
+ vat_json_print (vam->ofp, node);
+ vat_json_free (node);
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vec_free (s);
+}
+
+static void
+api_gpe_fwd_entry_net_to_host (vl_api_gpe_fwd_entry_t * e)
+{
+ e->dp_table = clib_net_to_host_u32 (e->dp_table);
+ e->fwd_entry_index = clib_net_to_host_u32 (e->fwd_entry_index);
+ e->vni = clib_net_to_host_u32 (e->vni);
+}
+
+static void
+ gpe_fwd_entries_get_reply_t_net_to_host
+ (vl_api_gpe_fwd_entries_get_reply_t * mp)
+{
+ u32 i;
+
+ mp->count = clib_net_to_host_u32 (mp->count);
+ for (i = 0; i < mp->count; i++)
+ {
+ api_gpe_fwd_entry_net_to_host (&mp->entries[i]);
+ }
+}
+
+static u8 *
+format_gpe_encap_mode (u8 * s, va_list * args)
+{
+ u32 mode = va_arg (*args, u32);
+
+ switch (mode)
+ {
+ case 0:
+ return format (s, "lisp");
+ case 1:
+ return format (s, "vxlan");
+ }
+ return 0;
+}
+
+static void
+ vl_api_gpe_get_encap_mode_reply_t_handler
+ (vl_api_gpe_get_encap_mode_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "gpe mode: %U", format_gpe_encap_mode, mp->encap_mode);
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_get_encap_mode_reply_t_handler_json
+ (vl_api_gpe_get_encap_mode_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ u8 *encap_mode = format (0, "%U", format_gpe_encap_mode, mp->encap_mode);
+ vec_add1 (encap_mode, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "gpe_mode", encap_mode);
+
+ vec_free (encap_mode);
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_fwd_entry_path_details_t_handler
+ (vl_api_gpe_fwd_entry_path_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *(*format_ip_address_fcn) (u8 *, va_list *) = 0;
+
+ if (mp->lcl_loc.is_ip4)
+ format_ip_address_fcn = format_ip4_address;
+ else
+ format_ip_address_fcn = format_ip6_address;
+
+ print (vam->ofp, "w:%d %30U %30U", mp->rmt_loc.weight,
+ format_ip_address_fcn, &mp->lcl_loc,
+ format_ip_address_fcn, &mp->rmt_loc);
+}
+
+static void
+lisp_fill_locator_node (vat_json_node_t * n, vl_api_gpe_locator_t * loc)
+{
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (loc->is_ip4)
+ {
+ clib_memcpy (&ip4, loc->addr, sizeof (ip4));
+ vat_json_object_add_ip4 (n, "address", ip4);
+ }
+ else
+ {
+ clib_memcpy (&ip6, loc->addr, sizeof (ip6));
+ vat_json_object_add_ip6 (n, "address", ip6);
+ }
+ vat_json_object_add_uint (n, "weight", loc->weight);
+}
+
+static void
+ vl_api_gpe_fwd_entry_path_details_t_handler_json
+ (vl_api_gpe_fwd_entry_path_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ vat_json_node_t *loc_node;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+
+ loc_node = vat_json_object_add (node, "local_locator");
+ vat_json_init_object (loc_node);
+ lisp_fill_locator_node (loc_node, &mp->lcl_loc);
+
+ loc_node = vat_json_object_add (node, "remote_locator");
+ vat_json_init_object (loc_node);
+ lisp_fill_locator_node (loc_node, &mp->rmt_loc);
+}
+
+static void
+ vl_api_gpe_fwd_entries_get_reply_t_handler
+ (vl_api_gpe_fwd_entries_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_gpe_fwd_entry_t *e;
+
+ if (retval)
+ goto end;
+
+ gpe_fwd_entries_get_reply_t_net_to_host (mp);
+
+ for (i = 0; i < mp->count; i++)
+ {
+ e = &mp->entries[i];
+ print (vam->ofp, "%10d %10d %U %40U", e->fwd_entry_index, e->dp_table,
+ format_lisp_flat_eid, e->eid_type, e->leid, e->leid_prefix_len,
+ format_lisp_flat_eid, e->eid_type, e->reid, e->reid_prefix_len);
+ }
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_fwd_entries_get_reply_t_handler_json
+ (vl_api_gpe_fwd_entries_get_reply_t * mp)
+{
+ u8 *s = 0;
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *e = 0, root;
+ u32 i;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_gpe_fwd_entry_t *fwd;
+
+ if (retval)
+ goto end;
+
+ gpe_fwd_entries_get_reply_t_net_to_host (mp);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < mp->count; i++)
+ {
+ e = vat_json_array_add (&root);
+ fwd = &mp->entries[i];
+
+ vat_json_init_object (e);
+ vat_json_object_add_int (e, "fwd_entry_index", fwd->fwd_entry_index);
+ vat_json_object_add_int (e, "dp_table", fwd->dp_table);
+ vat_json_object_add_int (e, "vni", fwd->vni);
+ vat_json_object_add_int (e, "action", fwd->action);
+
+ s = format (0, "%U", format_lisp_flat_eid, fwd->eid_type, fwd->leid,
+ fwd->leid_prefix_len);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "leid", s);
+ vec_free (s);
+
+ s = format (0, "%U", format_lisp_flat_eid, fwd->eid_type, fwd->reid,
+ fwd->reid_prefix_len);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "reid", s);
+ vec_free (s);
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_native_fwd_rpaths_get_reply_t_handler
+ (vl_api_gpe_native_fwd_rpaths_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_gpe_native_fwd_rpath_t *r;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ {
+ r = &mp->entries[i];
+ print (vam->ofp, "fib_index: %d sw_if_index %d nh %U",
+ clib_net_to_host_u32 (r->fib_index),
+ clib_net_to_host_u32 (r->nh_sw_if_index),
+ r->is_ip4 ? format_ip4_address : format_ip6_address, r->nh_addr);
+ }
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_native_fwd_rpaths_get_reply_t_handler_json
+ (vl_api_gpe_native_fwd_rpaths_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t root, *e;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_gpe_native_fwd_rpath_t *r;
+ u8 *s;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ e = vat_json_array_add (&root);
+ vat_json_init_object (e);
+ r = &mp->entries[i];
+ s =
+ format (0, "%U", r->is_ip4 ? format_ip4_address : format_ip6_address,
+ r->nh_addr);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "ip4", s);
+ vec_free (s);
+
+ vat_json_object_add_uint (e, "fib_index",
+ clib_net_to_host_u32 (r->fib_index));
+ vat_json_object_add_uint (e, "nh_sw_if_index",
+ clib_net_to_host_u32 (r->nh_sw_if_index));
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_fwd_entry_vnis_get_reply_t_handler
+ (vl_api_gpe_fwd_entry_vnis_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ print (vam->ofp, "%d", clib_net_to_host_u32 (mp->vnis[i]));
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_gpe_fwd_entry_vnis_get_reply_t_handler_json
+ (vl_api_gpe_fwd_entry_vnis_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ vat_json_array_add_uint (&root, clib_net_to_host_u32 (mp->vnis[i]));
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_ndp_entries_get_reply_t_handler
+ (vl_api_one_ndp_entries_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ print (vam->ofp, "%U -> %U", format_ip6_address, &mp->entries[i].ip6,
+ format_ethernet_address, mp->entries[i].mac);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_ndp_entries_get_reply_t_handler_json
+ (vl_api_one_ndp_entries_get_reply_t * mp)
+{
+ u8 *s = 0;
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *e = 0, root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_one_ndp_entry_t *arp_entry;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ e = vat_json_array_add (&root);
+ arp_entry = &mp->entries[i];
+
+ vat_json_init_object (e);
+ s = format (0, "%U", format_ethernet_address, arp_entry->mac);
+ vec_add1 (s, 0);
+
+ vat_json_object_add_string_copy (e, "mac", s);
+ vec_free (s);
+
+ s = format (0, "%U", format_ip6_address, &arp_entry->ip6);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "ip6", s);
+ vec_free (s);
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_l2_arp_entries_get_reply_t_handler
+ (vl_api_one_l2_arp_entries_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ print (vam->ofp, "%U -> %U", format_ip4_address, &mp->entries[i].ip4,
+ format_ethernet_address, mp->entries[i].mac);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_l2_arp_entries_get_reply_t_handler_json
+ (vl_api_one_l2_arp_entries_get_reply_t * mp)
+{
+ u8 *s = 0;
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *e = 0, root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_one_l2_arp_entry_t *arp_entry;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ e = vat_json_array_add (&root);
+ arp_entry = &mp->entries[i];
+
+ vat_json_init_object (e);
+ s = format (0, "%U", format_ethernet_address, arp_entry->mac);
+ vec_add1 (s, 0);
+
+ vat_json_object_add_string_copy (e, "mac", s);
+ vec_free (s);
+
+ s = format (0, "%U", format_ip4_address, &arp_entry->ip4);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "ip4", s);
+ vec_free (s);
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_one_ndp_bd_get_reply_t_handler (vl_api_one_ndp_bd_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ {
+ print (vam->ofp, "%d", clib_net_to_host_u32 (mp->bridge_domains[i]));
+ }
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_ndp_bd_get_reply_t_handler_json
+ (vl_api_one_ndp_bd_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ vat_json_array_add_uint (&root,
+ clib_net_to_host_u32 (mp->bridge_domains[i]));
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_l2_arp_bd_get_reply_t_handler
+ (vl_api_one_l2_arp_bd_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ {
+ print (vam->ofp, "%d", clib_net_to_host_u32 (mp->bridge_domains[i]));
+ }
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_l2_arp_bd_get_reply_t_handler_json
+ (vl_api_one_l2_arp_bd_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ vat_json_array_add_uint (&root,
+ clib_net_to_host_u32 (mp->bridge_domains[i]));
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_adjacencies_get_reply_t_handler
+ (vl_api_one_adjacencies_get_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_one_adjacency_t *a;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+
+ for (i = 0; i < n; i++)
+ {
+ a = &mp->adjacencies[i];
+ print (vam->ofp, "%U %40U",
+ format_lisp_flat_eid, a->eid_type, a->leid, a->leid_prefix_len,
+ format_lisp_flat_eid, a->eid_type, a->reid, a->reid_prefix_len);
+ }
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_adjacencies_get_reply_t_handler_json
+ (vl_api_one_adjacencies_get_reply_t * mp)
+{
+ u8 *s = 0;
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *e = 0, root;
+ u32 i, n;
+ int retval = clib_net_to_host_u32 (mp->retval);
+ vl_api_one_adjacency_t *a;
+
+ if (retval)
+ goto end;
+
+ n = clib_net_to_host_u32 (mp->count);
+ vat_json_init_array (&root);
+
+ for (i = 0; i < n; i++)
+ {
+ e = vat_json_array_add (&root);
+ a = &mp->adjacencies[i];
+
+ vat_json_init_object (e);
+ s = format (0, "%U", format_lisp_flat_eid, a->eid_type, a->leid,
+ a->leid_prefix_len);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "leid", s);
+ vec_free (s);
+
+ s = format (0, "%U", format_lisp_flat_eid, a->eid_type, a->reid,
+ a->reid_prefix_len);
+ vec_add1 (s, 0);
+ vat_json_object_add_string_copy (e, "reid", s);
+ vec_free (s);
+ }
+
+ vat_json_print (vam->ofp, &root);
+ vat_json_free (&root);
+
+end:
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_one_map_server_details_t_handler (vl_api_one_map_server_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%=20U",
+ mp->is_ipv6 ? format_ip6_address : format_ip4_address,
+ mp->ip_address);
+}
+
+static void
+ vl_api_one_map_server_details_t_handler_json
+ (vl_api_one_map_server_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, mp->ip_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "map-server", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, mp->ip_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "map-server", ip4);
+ }
+}
+
+static void
+vl_api_one_map_resolver_details_t_handler (vl_api_one_map_resolver_details_t
+ * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%=20U",
+ mp->is_ipv6 ? format_ip6_address : format_ip4_address,
+ mp->ip_address);
+}
+
+static void
+ vl_api_one_map_resolver_details_t_handler_json
+ (vl_api_one_map_resolver_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in6_addr ip6;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, mp->ip_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "map resolver", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, mp->ip_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "map resolver", ip4);
+ }
+}
+
+static void
+vl_api_show_one_status_reply_t_handler (vl_api_show_one_status_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (0 <= retval)
+ {
+ print (vam->ofp, "feature: %s\ngpe: %s",
+ mp->feature_status ? "enabled" : "disabled",
+ mp->gpe_status ? "enabled" : "disabled");
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_status_reply_t_handler_json
+ (vl_api_show_one_status_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *gpe_status = NULL;
+ u8 *feature_status = NULL;
+
+ gpe_status = format (0, "%s", mp->gpe_status ? "enabled" : "disabled");
+ feature_status = format (0, "%s",
+ mp->feature_status ? "enabled" : "disabled");
+ vec_add1 (gpe_status, 0);
+ vec_add1 (feature_status, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "gpe_status", gpe_status);
+ vat_json_object_add_string_copy (&node, "feature_status", feature_status);
+
+ vec_free (gpe_status);
+ vec_free (feature_status);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_get_map_request_itr_rlocs_reply_t_handler
+ (vl_api_one_get_map_request_itr_rlocs_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ print (vam->ofp, "%=20s", mp->locator_set_name);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_one_get_map_request_itr_rlocs_reply_t_handler_json
+ (vl_api_one_get_map_request_itr_rlocs_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "itr-rlocs", mp->locator_set_name);
+
+ vat_json_print (vam->ofp, node);
+ vat_json_free (node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static u8 *
+format_lisp_map_request_mode (u8 * s, va_list * args)
+{
+ u32 mode = va_arg (*args, u32);
+
+ switch (mode)
+ {
+ case 0:
+ return format (0, "dst-only");
+ case 1:
+ return format (0, "src-dst");
+ }
+ return 0;
+}
+
+static void
+ vl_api_show_one_map_request_mode_reply_t_handler
+ (vl_api_show_one_map_request_mode_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (0 <= retval)
+ {
+ u32 mode = mp->mode;
+ print (vam->ofp, "map_request_mode: %U",
+ format_lisp_map_request_mode, mode);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_request_mode_reply_t_handler_json
+ (vl_api_show_one_map_request_mode_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *s = 0;
+ u32 mode;
+
+ mode = mp->mode;
+ s = format (0, "%U", format_lisp_map_request_mode, mode);
+ vec_add1 (s, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "map_request_mode", s);
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vec_free (s);
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_use_petr_reply_t_handler
+ (vl_api_show_one_use_petr_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (0 <= retval)
+ {
+ print (vam->ofp, "%s\n", mp->status ? "enabled" : "disabled");
+ if (mp->status)
+ {
+ print (vam->ofp, "Proxy-ETR address; %U",
+ mp->is_ip4 ? format_ip4_address : format_ip6_address,
+ mp->address);
+ }
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_use_petr_reply_t_handler_json
+ (vl_api_show_one_use_petr_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *status = 0;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+
+ status = format (0, "%s", mp->status ? "enabled" : "disabled");
+ vec_add1 (status, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "status", status);
+ if (mp->status)
+ {
+ if (mp->is_ip4)
+ {
+ clib_memcpy (&ip6, mp->address, sizeof (ip6));
+ vat_json_object_add_ip6 (&node, "address", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, mp->address, sizeof (ip4));
+ vat_json_object_add_ip4 (&node, "address", ip4);
+ }
+ }
+
+ vec_free (status);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_nsh_mapping_reply_t_handler
+ (vl_api_show_one_nsh_mapping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (0 <= retval)
+ {
+ print (vam->ofp, "%-20s%-16s",
+ mp->is_set ? "set" : "not-set",
+ mp->is_set ? (char *) mp->locator_set_name : "");
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_nsh_mapping_reply_t_handler_json
+ (vl_api_show_one_nsh_mapping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *status = 0;
+
+ status = format (0, "%s", mp->is_set ? "yes" : "no");
+ vec_add1 (status, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "is_set", status);
+ if (mp->is_set)
+ {
+ vat_json_object_add_string_copy (&node, "locator_set",
+ mp->locator_set_name);
+ }
+
+ vec_free (status);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_register_ttl_reply_t_handler
+ (vl_api_show_one_map_register_ttl_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vl_api_show_one_map_register_ttl_reply_t_endian (mp);
+
+ if (0 <= retval)
+ {
+ print (vam->ofp, "ttl: %u", mp->ttl);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_show_one_map_register_ttl_reply_t_handler_json
+ (vl_api_show_one_map_register_ttl_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vl_api_show_one_map_register_ttl_reply_t_endian (mp);
+ vat_json_init_object (&node);
+ vat_json_object_add_uint (&node, "ttl", mp->ttl);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_show_one_pitr_reply_t_handler (vl_api_show_one_pitr_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (0 <= retval)
+ {
+ print (vam->ofp, "%-20s%-16s",
+ mp->status ? "enabled" : "disabled",
+ mp->status ? (char *) mp->locator_set_name : "");
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_show_one_pitr_reply_t_handler_json (vl_api_show_one_pitr_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ u8 *status = 0;
+
+ status = format (0, "%s", mp->status ? "enabled" : "disabled");
+ vec_add1 (status, 0);
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "status", status);
+ if (mp->status)
+ {
+ vat_json_object_add_string_copy (&node, "locator_set",
+ mp->locator_set_name);
+ }
+
+ vec_free (status);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static u8 *
+format_policer_type (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+
+ if (i == SSE2_QOS_POLICER_TYPE_1R2C)
+ s = format (s, "1r2c");
+ else if (i == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ s = format (s, "1r3c");
+ else if (i == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698)
+ s = format (s, "2r3c-2698");
+ else if (i == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)
+ s = format (s, "2r3c-4115");
+ else if (i == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1)
+ s = format (s, "2r3c-mef5cf1");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_policer_rate_type (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+
+ if (i == SSE2_QOS_RATE_KBPS)
+ s = format (s, "kbps");
+ else if (i == SSE2_QOS_RATE_PPS)
+ s = format (s, "pps");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_policer_round_type (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+
+ if (i == SSE2_QOS_ROUND_TO_CLOSEST)
+ s = format (s, "closest");
+ else if (i == SSE2_QOS_ROUND_TO_UP)
+ s = format (s, "up");
+ else if (i == SSE2_QOS_ROUND_TO_DOWN)
+ s = format (s, "down");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_policer_action_type (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+
+ if (i == SSE2_QOS_ACTION_DROP)
+ s = format (s, "drop");
+ else if (i == SSE2_QOS_ACTION_TRANSMIT)
+ s = format (s, "transmit");
+ else if (i == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ s = format (s, "mark-and-transmit");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_dscp (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+ char *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case VNET_DSCP_##f: t = str; break;
+ foreach_vnet_dscp
+#undef _
+ default:
+ return format (s, "ILLEGAL");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+static void
+vl_api_policer_details_t_handler (vl_api_policer_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *conform_dscp_str, *exceed_dscp_str, *violate_dscp_str;
+
+ if (mp->conform_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ conform_dscp_str = format (0, "%U", format_dscp, mp->conform_dscp);
+ else
+ conform_dscp_str = format (0, "");
+
+ if (mp->exceed_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ exceed_dscp_str = format (0, "%U", format_dscp, mp->exceed_dscp);
+ else
+ exceed_dscp_str = format (0, "");
+
+ if (mp->violate_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ violate_dscp_str = format (0, "%U", format_dscp, mp->violate_dscp);
+ else
+ violate_dscp_str = format (0, "");
+
+ print (vam->ofp, "Name \"%s\", type %U, cir %u, eir %u, cb %u, eb %u, "
+ "rate type %U, round type %U, %s rate, %s color-aware, "
+ "cir %u tok/period, pir %u tok/period, scale %u, cur lim %u, "
+ "cur bkt %u, ext lim %u, ext bkt %u, last update %llu"
+ "conform action %U%s, exceed action %U%s, violate action %U%s",
+ mp->name,
+ format_policer_type, mp->type,
+ ntohl (mp->cir),
+ ntohl (mp->eir),
+ clib_net_to_host_u64 (mp->cb),
+ clib_net_to_host_u64 (mp->eb),
+ format_policer_rate_type, mp->rate_type,
+ format_policer_round_type, mp->round_type,
+ mp->single_rate ? "single" : "dual",
+ mp->color_aware ? "is" : "not",
+ ntohl (mp->cir_tokens_per_period),
+ ntohl (mp->pir_tokens_per_period),
+ ntohl (mp->scale),
+ ntohl (mp->current_limit),
+ ntohl (mp->current_bucket),
+ ntohl (mp->extended_limit),
+ ntohl (mp->extended_bucket),
+ clib_net_to_host_u64 (mp->last_update_time),
+ format_policer_action_type, mp->conform_action_type,
+ conform_dscp_str,
+ format_policer_action_type, mp->exceed_action_type,
+ exceed_dscp_str,
+ format_policer_action_type, mp->violate_action_type,
+ violate_dscp_str);
+
+ vec_free (conform_dscp_str);
+ vec_free (exceed_dscp_str);
+ vec_free (violate_dscp_str);
+}
+
+static void vl_api_policer_details_t_handler_json
+ (vl_api_policer_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node;
+ u8 *rate_type_str, *round_type_str, *type_str;
+ u8 *conform_action_str, *exceed_action_str, *violate_action_str;
+
+ rate_type_str = format (0, "%U", format_policer_rate_type, mp->rate_type);
+ round_type_str =
+ format (0, "%U", format_policer_round_type, mp->round_type);
+ type_str = format (0, "%U", format_policer_type, mp->type);
+ conform_action_str = format (0, "%U", format_policer_action_type,
+ mp->conform_action_type);
+ exceed_action_str = format (0, "%U", format_policer_action_type,
+ mp->exceed_action_type);
+ violate_action_str = format (0, "%U", format_policer_action_type,
+ mp->violate_action_type);
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "name", mp->name);
+ vat_json_object_add_uint (node, "cir", ntohl (mp->cir));
+ vat_json_object_add_uint (node, "eir", ntohl (mp->eir));
+ vat_json_object_add_uint (node, "cb", clib_net_to_host_u64 (mp->cb));
+ vat_json_object_add_uint (node, "eb", clib_net_to_host_u64 (mp->eb));
+ vat_json_object_add_string_copy (node, "rate_type", rate_type_str);
+ vat_json_object_add_string_copy (node, "round_type", round_type_str);
+ vat_json_object_add_string_copy (node, "type", type_str);
+ vat_json_object_add_uint (node, "single_rate", mp->single_rate);
+ vat_json_object_add_uint (node, "color_aware", mp->color_aware);
+ vat_json_object_add_uint (node, "scale", ntohl (mp->scale));
+ vat_json_object_add_uint (node, "cir_tokens_per_period",
+ ntohl (mp->cir_tokens_per_period));
+ vat_json_object_add_uint (node, "eir_tokens_per_period",
+ ntohl (mp->pir_tokens_per_period));
+ vat_json_object_add_uint (node, "current_limit", ntohl (mp->current_limit));
+ vat_json_object_add_uint (node, "current_bucket",
+ ntohl (mp->current_bucket));
+ vat_json_object_add_uint (node, "extended_limit",
+ ntohl (mp->extended_limit));
+ vat_json_object_add_uint (node, "extended_bucket",
+ ntohl (mp->extended_bucket));
+ vat_json_object_add_uint (node, "last_update_time",
+ ntohl (mp->last_update_time));
+ vat_json_object_add_string_copy (node, "conform_action",
+ conform_action_str);
+ if (mp->conform_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ {
+ u8 *dscp_str = format (0, "%U", format_dscp, mp->conform_dscp);
+ vat_json_object_add_string_copy (node, "conform_dscp", dscp_str);
+ vec_free (dscp_str);
+ }
+ vat_json_object_add_string_copy (node, "exceed_action", exceed_action_str);
+ if (mp->exceed_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ {
+ u8 *dscp_str = format (0, "%U", format_dscp, mp->exceed_dscp);
+ vat_json_object_add_string_copy (node, "exceed_dscp", dscp_str);
+ vec_free (dscp_str);
+ }
+ vat_json_object_add_string_copy (node, "violate_action",
+ violate_action_str);
+ if (mp->violate_action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ {
+ u8 *dscp_str = format (0, "%U", format_dscp, mp->violate_dscp);
+ vat_json_object_add_string_copy (node, "violate_dscp", dscp_str);
+ vec_free (dscp_str);
+ }
+
+ vec_free (rate_type_str);
+ vec_free (round_type_str);
+ vec_free (type_str);
+ vec_free (conform_action_str);
+ vec_free (exceed_action_str);
+ vec_free (violate_action_str);
+}
+
+static void
+vl_api_classify_table_ids_reply_t_handler (vl_api_classify_table_ids_reply_t *
+ mp)
+{
+ vat_main_t *vam = &vat_main;
+ int i, count = ntohl (mp->count);
+
+ if (count > 0)
+ print (vam->ofp, "classify table ids (%d) : ", count);
+ for (i = 0; i < count; i++)
+ {
+ print (vam->ofp, "%d", ntohl (mp->ids[i]));
+ print (vam->ofp, (i < count - 1) ? "," : "");
+ }
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_classify_table_ids_reply_t_handler_json
+ (vl_api_classify_table_ids_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int i, count = ntohl (mp->count);
+
+ if (count > 0)
+ {
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ for (i = 0; i < count; i++)
+ {
+ vat_json_object_add_uint (&node, "table_id", ntohl (mp->ids[i]));
+ }
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ }
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_classify_table_by_interface_reply_t_handler
+ (vl_api_classify_table_by_interface_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 table_id;
+
+ table_id = ntohl (mp->l2_table_id);
+ if (table_id != ~0)
+ print (vam->ofp, "l2 table id : %d", table_id);
+ else
+ print (vam->ofp, "l2 table id : No input ACL tables configured");
+ table_id = ntohl (mp->ip4_table_id);
+ if (table_id != ~0)
+ print (vam->ofp, "ip4 table id : %d", table_id);
+ else
+ print (vam->ofp, "ip4 table id : No input ACL tables configured");
+ table_id = ntohl (mp->ip6_table_id);
+ if (table_id != ~0)
+ print (vam->ofp, "ip6 table id : %d", table_id);
+ else
+ print (vam->ofp, "ip6 table id : No input ACL tables configured");
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_classify_table_by_interface_reply_t_handler_json
+ (vl_api_classify_table_by_interface_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+
+ vat_json_object_add_int (&node, "l2_table_id", ntohl (mp->l2_table_id));
+ vat_json_object_add_int (&node, "ip4_table_id", ntohl (mp->ip4_table_id));
+ vat_json_object_add_int (&node, "ip6_table_id", ntohl (mp->ip6_table_id));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_policer_add_del_reply_t_handler
+ (vl_api_policer_add_del_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ if (retval == 0 && mp->policer_index != 0xFFFFFFFF)
+ /*
+ * Note: this is just barely thread-safe, depends on
+ * the main thread spinning waiting for an answer...
+ */
+ errmsg ("policer index %d", ntohl (mp->policer_index));
+ }
+}
+
+static void vl_api_policer_add_del_reply_t_handler_json
+ (vl_api_policer_add_del_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "policer_index",
+ ntohl (mp->policer_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+/* Format hex dump. */
+u8 *
+format_hex_bytes (u8 * s, va_list * va)
+{
+ u8 *bytes = va_arg (*va, u8 *);
+ int n_bytes = va_arg (*va, int);
+ uword i;
+
+ /* Print short or long form depending on byte count. */
+ uword short_form = n_bytes <= 32;
+ uword indent = format_get_indent (s);
+
+ if (n_bytes == 0)
+ return s;
+
+ for (i = 0; i < n_bytes; i++)
+ {
+ if (!short_form && (i % 32) == 0)
+ s = format (s, "%08x: ", i);
+ s = format (s, "%02x", bytes[i]);
+ if (!short_form && ((i + 1) % 32) == 0 && (i + 1) < n_bytes)
+ s = format (s, "\n%U", format_white_space, indent);
+ }
+
+ return s;
+}
+
+static void
+vl_api_classify_table_info_reply_t_handler (vl_api_classify_table_info_reply_t
+ * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (retval == 0)
+ {
+ print (vam->ofp, "classify table info :");
+ print (vam->ofp, "sessions: %d nexttbl: %d nextnode: %d",
+ ntohl (mp->active_sessions), ntohl (mp->next_table_index),
+ ntohl (mp->miss_next_index));
+ print (vam->ofp, "nbuckets: %d skip: %d match: %d",
+ ntohl (mp->nbuckets), ntohl (mp->skip_n_vectors),
+ ntohl (mp->match_n_vectors));
+ print (vam->ofp, "mask: %U", format_hex_bytes, mp->mask,
+ ntohl (mp->mask_length));
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_classify_table_info_reply_t_handler_json
+ (vl_api_classify_table_info_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ i32 retval = ntohl (mp->retval);
+ if (retval == 0)
+ {
+ vat_json_init_object (&node);
+
+ vat_json_object_add_int (&node, "sessions",
+ ntohl (mp->active_sessions));
+ vat_json_object_add_int (&node, "nexttbl",
+ ntohl (mp->next_table_index));
+ vat_json_object_add_int (&node, "nextnode",
+ ntohl (mp->miss_next_index));
+ vat_json_object_add_int (&node, "nbuckets", ntohl (mp->nbuckets));
+ vat_json_object_add_int (&node, "skip", ntohl (mp->skip_n_vectors));
+ vat_json_object_add_int (&node, "match", ntohl (mp->match_n_vectors));
+ u8 *s = format (0, "%U%c", format_hex_bytes, mp->mask,
+ ntohl (mp->mask_length), 0);
+ vat_json_object_add_string_copy (&node, "mask", s);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ }
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t *
+ mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "next_index: %d advance: %d opaque: %d ",
+ ntohl (mp->hit_next_index), ntohl (mp->advance),
+ ntohl (mp->opaque_index));
+ print (vam->ofp, "mask: %U", format_hex_bytes, mp->match,
+ ntohl (mp->match_length));
+}
+
+static void
+ vl_api_classify_session_details_t_handler_json
+ (vl_api_classify_session_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_int (node, "next_index", ntohl (mp->hit_next_index));
+ vat_json_object_add_int (node, "advance", ntohl (mp->advance));
+ vat_json_object_add_int (node, "opaque", ntohl (mp->opaque_index));
+ u8 *s =
+ format (0, "%U%c", format_hex_bytes, mp->match, ntohl (mp->match_length),
+ 0);
+ vat_json_object_add_string_copy (node, "match", s);
+}
+
+static void vl_api_pg_create_interface_reply_t_handler
+ (vl_api_pg_create_interface_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_pg_create_interface_reply_t_handler_json
+ (vl_api_pg_create_interface_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ i32 retval = ntohl (mp->retval);
+ if (retval == 0)
+ {
+ vat_json_init_object (&node);
+
+ vat_json_object_add_int (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ }
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_policer_classify_details_t_handler
+ (vl_api_policer_classify_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%10d%20d", ntohl (mp->sw_if_index),
+ ntohl (mp->table_index));
+}
+
+static void vl_api_policer_classify_details_t_handler_json
+ (vl_api_policer_classify_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_uint (node, "table_index", ntohl (mp->table_index));
+}
+
+static void vl_api_ipsec_gre_add_del_tunnel_reply_t_handler
+ (vl_api_ipsec_gre_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_ipsec_gre_add_del_tunnel_reply_t_handler_json
+ (vl_api_ipsec_gre_add_del_tunnel_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static void vl_api_flow_classify_details_t_handler
+ (vl_api_flow_classify_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%10d%20d", ntohl (mp->sw_if_index),
+ ntohl (mp->table_index));
+}
+
+static void vl_api_flow_classify_details_t_handler_json
+ (vl_api_flow_classify_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_uint (node, "table_index", ntohl (mp->table_index));
+}
+
+#define vl_api_vnet_interface_simple_counters_t_endian vl_noop_handler
+#define vl_api_vnet_interface_simple_counters_t_print vl_noop_handler
+#define vl_api_vnet_interface_combined_counters_t_endian vl_noop_handler
+#define vl_api_vnet_interface_combined_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip4_fib_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip4_fib_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip4_nbr_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip4_nbr_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip6_nbr_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip6_nbr_counters_t_print vl_noop_handler
+#define vl_api_one_adjacencies_get_reply_t_endian vl_noop_handler
+#define vl_api_one_adjacencies_get_reply_t_print vl_noop_handler
+#define vl_api_one_l2_arp_bd_get_reply_t_print vl_noop_handler
+#define vl_api_one_l2_arp_entries_get_reply_t_endian vl_noop_handler
+#define vl_api_one_l2_arp_entries_get_reply_t_print vl_noop_handler
+#define vl_api_one_l2_arp_bd_get_reply_t_endian vl_noop_handler
+#define vl_api_one_ndp_bd_get_reply_t_endian vl_noop_handler
+#define vl_api_one_ndp_bd_get_reply_t_print vl_noop_handler
+#define vl_api_one_ndp_entries_get_reply_t_print vl_noop_handler
+#define vl_api_one_ndp_entries_get_reply_t_endian vl_noop_handler
+
+/*
+ * Generate boilerplate reply handlers, which
+ * dig the return value out of the xxx_reply_t API message,
+ * stick it into vam->retval, and set vam->result_ready
+ *
+ * Could also do this by pointing N message decode slots at
+ * a single function, but that could break in subtle ways.
+ */
+
+#define foreach_standard_reply_retval_handler \
+_(sw_interface_set_flags_reply) \
+_(sw_interface_add_del_address_reply) \
+_(sw_interface_set_table_reply) \
+_(sw_interface_set_mpls_enable_reply) \
+_(sw_interface_set_vpath_reply) \
+_(sw_interface_set_vxlan_bypass_reply) \
+_(sw_interface_set_vxlan_gpe_bypass_reply) \
+_(sw_interface_set_l2_bridge_reply) \
+_(bridge_domain_add_del_reply) \
+_(sw_interface_set_l2_xconnect_reply) \
+_(l2fib_add_del_reply) \
+_(l2fib_flush_int_reply) \
+_(l2fib_flush_bd_reply) \
+_(ip_add_del_route_reply) \
+_(ip_table_add_del_reply) \
+_(ip_mroute_add_del_reply) \
+_(mpls_route_add_del_reply) \
+_(mpls_table_add_del_reply) \
+_(mpls_ip_bind_unbind_reply) \
+_(proxy_arp_add_del_reply) \
+_(proxy_arp_intfc_enable_disable_reply) \
+_(sw_interface_set_unnumbered_reply) \
+_(ip_neighbor_add_del_reply) \
+_(reset_vrf_reply) \
+_(oam_add_del_reply) \
+_(reset_fib_reply) \
+_(dhcp_proxy_config_reply) \
+_(dhcp_proxy_set_vss_reply) \
+_(dhcp_client_config_reply) \
+_(set_ip_flow_hash_reply) \
+_(sw_interface_ip6_enable_disable_reply) \
+_(sw_interface_ip6_set_link_local_address_reply) \
+_(ip6nd_proxy_add_del_reply) \
+_(sw_interface_ip6nd_ra_prefix_reply) \
+_(sw_interface_ip6nd_ra_config_reply) \
+_(set_arp_neighbor_limit_reply) \
+_(l2_patch_add_del_reply) \
+_(sr_policy_add_reply) \
+_(sr_policy_mod_reply) \
+_(sr_policy_del_reply) \
+_(sr_localsid_add_del_reply) \
+_(sr_steering_add_del_reply) \
+_(classify_add_del_session_reply) \
+_(classify_set_interface_ip_table_reply) \
+_(classify_set_interface_l2_tables_reply) \
+_(l2tpv3_set_tunnel_cookies_reply) \
+_(l2tpv3_interface_enable_disable_reply) \
+_(l2tpv3_set_lookup_key_reply) \
+_(l2_fib_clear_table_reply) \
+_(l2_interface_efp_filter_reply) \
+_(l2_interface_vlan_tag_rewrite_reply) \
+_(modify_vhost_user_if_reply) \
+_(delete_vhost_user_if_reply) \
+_(want_ip4_arp_events_reply) \
+_(want_ip6_nd_events_reply) \
+_(want_l2_macs_events_reply) \
+_(input_acl_set_interface_reply) \
+_(ipsec_spd_add_del_reply) \
+_(ipsec_interface_add_del_spd_reply) \
+_(ipsec_spd_add_del_entry_reply) \
+_(ipsec_sad_add_del_entry_reply) \
+_(ipsec_sa_set_key_reply) \
+_(ipsec_tunnel_if_add_del_reply) \
+_(ikev2_profile_add_del_reply) \
+_(ikev2_profile_set_auth_reply) \
+_(ikev2_profile_set_id_reply) \
+_(ikev2_profile_set_ts_reply) \
+_(ikev2_set_local_key_reply) \
+_(ikev2_set_responder_reply) \
+_(ikev2_set_ike_transforms_reply) \
+_(ikev2_set_esp_transforms_reply) \
+_(ikev2_set_sa_lifetime_reply) \
+_(ikev2_initiate_sa_init_reply) \
+_(ikev2_initiate_del_ike_sa_reply) \
+_(ikev2_initiate_del_child_sa_reply) \
+_(ikev2_initiate_rekey_child_sa_reply) \
+_(delete_loopback_reply) \
+_(bd_ip_mac_add_del_reply) \
+_(map_del_domain_reply) \
+_(map_add_del_rule_reply) \
+_(want_interface_events_reply) \
+_(want_stats_reply) \
+_(cop_interface_enable_disable_reply) \
+_(cop_whitelist_enable_disable_reply) \
+_(sw_interface_clear_stats_reply) \
+_(ioam_enable_reply) \
+_(ioam_disable_reply) \
+_(one_add_del_locator_reply) \
+_(one_add_del_local_eid_reply) \
+_(one_add_del_remote_mapping_reply) \
+_(one_add_del_adjacency_reply) \
+_(one_add_del_map_resolver_reply) \
+_(one_add_del_map_server_reply) \
+_(one_enable_disable_reply) \
+_(one_rloc_probe_enable_disable_reply) \
+_(one_map_register_enable_disable_reply) \
+_(one_map_register_set_ttl_reply) \
+_(one_set_transport_protocol_reply) \
+_(one_map_register_fallback_threshold_reply) \
+_(one_pitr_set_locator_set_reply) \
+_(one_map_request_mode_reply) \
+_(one_add_del_map_request_itr_rlocs_reply) \
+_(one_eid_table_add_del_map_reply) \
+_(one_use_petr_reply) \
+_(one_stats_enable_disable_reply) \
+_(one_add_del_l2_arp_entry_reply) \
+_(one_add_del_ndp_entry_reply) \
+_(one_stats_flush_reply) \
+_(gpe_enable_disable_reply) \
+_(gpe_set_encap_mode_reply) \
+_(gpe_add_del_iface_reply) \
+_(gpe_add_del_native_fwd_rpath_reply) \
+_(af_packet_delete_reply) \
+_(policer_classify_set_interface_reply) \
+_(netmap_create_reply) \
+_(netmap_delete_reply) \
+_(set_ipfix_exporter_reply) \
+_(set_ipfix_classify_stream_reply) \
+_(ipfix_classify_table_add_del_reply) \
+_(flow_classify_set_interface_reply) \
+_(sw_interface_span_enable_disable_reply) \
+_(pg_capture_reply) \
+_(pg_enable_disable_reply) \
+_(ip_source_and_port_range_check_add_del_reply) \
+_(ip_source_and_port_range_check_interface_add_del_reply)\
+_(delete_subif_reply) \
+_(l2_interface_pbb_tag_rewrite_reply) \
+_(punt_reply) \
+_(feature_enable_disable_reply) \
+_(sw_interface_tag_add_del_reply) \
+_(sw_interface_set_mtu_reply) \
+_(p2p_ethernet_add_reply) \
+_(p2p_ethernet_del_reply) \
+_(lldp_config_reply) \
+_(sw_interface_set_lldp_reply) \
+_(tcp_configure_src_addresses_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = &vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n) \
+ static void vl_api_##n##_t_handler_json \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = &vat_main; \
+ vat_json_node_t node; \
+ vat_json_init_object(&node); \
+ vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \
+ vat_json_print(vam->ofp, &node); \
+ vam->retval = ntohl(mp->retval); \
+ vam->result_ready = 1; \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+
+#define foreach_vpe_api_reply_msg \
+_(CREATE_LOOPBACK_REPLY, create_loopback_reply) \
+_(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \
+_(SW_INTERFACE_DETAILS, sw_interface_details) \
+_(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \
+_(CONTROL_PING_REPLY, control_ping_reply) \
+_(CLI_REPLY, cli_reply) \
+_(CLI_INBAND_REPLY, cli_inband_reply) \
+_(SW_INTERFACE_ADD_DEL_ADDRESS_REPLY, \
+ sw_interface_add_del_address_reply) \
+_(SW_INTERFACE_SET_TABLE_REPLY, sw_interface_set_table_reply) \
+_(SW_INTERFACE_SET_MPLS_ENABLE_REPLY, sw_interface_set_mpls_enable_reply) \
+_(SW_INTERFACE_SET_VPATH_REPLY, sw_interface_set_vpath_reply) \
+_(SW_INTERFACE_SET_VXLAN_BYPASS_REPLY, sw_interface_set_vxlan_bypass_reply) \
+_(SW_INTERFACE_SET_VXLAN_GPE_BYPASS_REPLY, sw_interface_set_vxlan_gpe_bypass_reply) \
+_(SW_INTERFACE_SET_L2_XCONNECT_REPLY, \
+ sw_interface_set_l2_xconnect_reply) \
+_(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \
+ sw_interface_set_l2_bridge_reply) \
+_(BRIDGE_DOMAIN_ADD_DEL_REPLY, bridge_domain_add_del_reply) \
+_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \
+_(BRIDGE_DOMAIN_SET_MAC_AGE_REPLY, bridge_domain_set_mac_age_reply) \
+_(L2FIB_ADD_DEL_REPLY, l2fib_add_del_reply) \
+_(L2FIB_FLUSH_INT_REPLY, l2fib_flush_int_reply) \
+_(L2FIB_FLUSH_BD_REPLY, l2fib_flush_bd_reply) \
+_(L2_FLAGS_REPLY, l2_flags_reply) \
+_(BRIDGE_FLAGS_REPLY, bridge_flags_reply) \
+_(TAP_CONNECT_REPLY, tap_connect_reply) \
+_(TAP_MODIFY_REPLY, tap_modify_reply) \
+_(TAP_DELETE_REPLY, tap_delete_reply) \
+_(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details) \
+_(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \
+_(IP_TABLE_ADD_DEL_REPLY, ip_table_add_del_reply) \
+_(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply) \
+_(MPLS_TABLE_ADD_DEL_REPLY, mpls_table_add_del_reply) \
+_(MPLS_ROUTE_ADD_DEL_REPLY, mpls_route_add_del_reply) \
+_(MPLS_IP_BIND_UNBIND_REPLY, mpls_ip_bind_unbind_reply) \
+_(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply) \
+_(PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY, \
+ proxy_arp_intfc_enable_disable_reply) \
+_(MPLS_TUNNEL_ADD_DEL_REPLY, mpls_tunnel_add_del_reply) \
+_(SW_INTERFACE_SET_UNNUMBERED_REPLY, \
+ sw_interface_set_unnumbered_reply) \
+_(IP_NEIGHBOR_ADD_DEL_REPLY, ip_neighbor_add_del_reply) \
+_(RESET_VRF_REPLY, reset_vrf_reply) \
+_(CREATE_VLAN_SUBIF_REPLY, create_vlan_subif_reply) \
+_(CREATE_SUBIF_REPLY, create_subif_reply) \
+_(OAM_ADD_DEL_REPLY, oam_add_del_reply) \
+_(RESET_FIB_REPLY, reset_fib_reply) \
+_(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply) \
+_(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply) \
+_(DHCP_PROXY_DETAILS, dhcp_proxy_details) \
+_(DHCP_CLIENT_CONFIG_REPLY, dhcp_client_config_reply) \
+_(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply) \
+_(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, \
+ sw_interface_ip6_enable_disable_reply) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, \
+ sw_interface_ip6_set_link_local_address_reply) \
+_(IP6ND_PROXY_ADD_DEL_REPLY, ip6nd_proxy_add_del_reply) \
+_(IP6ND_PROXY_DETAILS, ip6nd_proxy_details) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX_REPLY, \
+ sw_interface_ip6nd_ra_prefix_reply) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG_REPLY, \
+ sw_interface_ip6nd_ra_config_reply) \
+_(SET_ARP_NEIGHBOR_LIMIT_REPLY, set_arp_neighbor_limit_reply) \
+_(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \
+_(SR_POLICY_ADD_REPLY, sr_policy_add_reply) \
+_(SR_POLICY_MOD_REPLY, sr_policy_mod_reply) \
+_(SR_POLICY_DEL_REPLY, sr_policy_del_reply) \
+_(SR_LOCALSID_ADD_DEL_REPLY, sr_localsid_add_del_reply) \
+_(SR_STEERING_ADD_DEL_REPLY, sr_steering_add_del_reply) \
+_(CLASSIFY_ADD_DEL_TABLE_REPLY, classify_add_del_table_reply) \
+_(CLASSIFY_ADD_DEL_SESSION_REPLY, classify_add_del_session_reply) \
+_(CLASSIFY_SET_INTERFACE_IP_TABLE_REPLY, \
+classify_set_interface_ip_table_reply) \
+_(CLASSIFY_SET_INTERFACE_L2_TABLES_REPLY, \
+ classify_set_interface_l2_tables_reply) \
+_(GET_NODE_INDEX_REPLY, get_node_index_reply) \
+_(ADD_NODE_NEXT_REPLY, add_node_next_reply) \
+_(L2TPV3_CREATE_TUNNEL_REPLY, l2tpv3_create_tunnel_reply) \
+_(L2TPV3_SET_TUNNEL_COOKIES_REPLY, l2tpv3_set_tunnel_cookies_reply) \
+_(L2TPV3_INTERFACE_ENABLE_DISABLE_REPLY, \
+ l2tpv3_interface_enable_disable_reply) \
+_(L2TPV3_SET_LOOKUP_KEY_REPLY, l2tpv3_set_lookup_key_reply) \
+_(SW_IF_L2TPV3_TUNNEL_DETAILS, sw_if_l2tpv3_tunnel_details) \
+_(VXLAN_ADD_DEL_TUNNEL_REPLY, vxlan_add_del_tunnel_reply) \
+_(VXLAN_TUNNEL_DETAILS, vxlan_tunnel_details) \
+_(GRE_ADD_DEL_TUNNEL_REPLY, gre_add_del_tunnel_reply) \
+_(GRE_TUNNEL_DETAILS, gre_tunnel_details) \
+_(L2_FIB_CLEAR_TABLE_REPLY, l2_fib_clear_table_reply) \
+_(L2_INTERFACE_EFP_FILTER_REPLY, l2_interface_efp_filter_reply) \
+_(L2_INTERFACE_VLAN_TAG_REWRITE_REPLY, l2_interface_vlan_tag_rewrite_reply) \
+_(SW_INTERFACE_VHOST_USER_DETAILS, sw_interface_vhost_user_details) \
+_(CREATE_VHOST_USER_IF_REPLY, create_vhost_user_if_reply) \
+_(MODIFY_VHOST_USER_IF_REPLY, modify_vhost_user_if_reply) \
+_(DELETE_VHOST_USER_IF_REPLY, delete_vhost_user_if_reply) \
+_(SHOW_VERSION_REPLY, show_version_reply) \
+_(L2_FIB_TABLE_DETAILS, l2_fib_table_details) \
+_(VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, vxlan_gpe_add_del_tunnel_reply) \
+_(VXLAN_GPE_TUNNEL_DETAILS, vxlan_gpe_tunnel_details) \
+_(INTERFACE_NAME_RENUMBER_REPLY, interface_name_renumber_reply) \
+_(WANT_IP4_ARP_EVENTS_REPLY, want_ip4_arp_events_reply) \
+_(IP4_ARP_EVENT, ip4_arp_event) \
+_(WANT_IP6_ND_EVENTS_REPLY, want_ip6_nd_events_reply) \
+_(IP6_ND_EVENT, ip6_nd_event) \
+_(WANT_L2_MACS_EVENTS_REPLY, want_l2_macs_events_reply) \
+_(L2_MACS_EVENT, l2_macs_event) \
+_(INPUT_ACL_SET_INTERFACE_REPLY, input_acl_set_interface_reply) \
+_(IP_ADDRESS_DETAILS, ip_address_details) \
+_(IP_DETAILS, ip_details) \
+_(IPSEC_SPD_ADD_DEL_REPLY, ipsec_spd_add_del_reply) \
+_(IPSEC_INTERFACE_ADD_DEL_SPD_REPLY, ipsec_interface_add_del_spd_reply) \
+_(IPSEC_SPD_ADD_DEL_ENTRY_REPLY, ipsec_spd_add_del_entry_reply) \
+_(IPSEC_SAD_ADD_DEL_ENTRY_REPLY, ipsec_sad_add_del_entry_reply) \
+_(IPSEC_SA_SET_KEY_REPLY, ipsec_sa_set_key_reply) \
+_(IPSEC_TUNNEL_IF_ADD_DEL_REPLY, ipsec_tunnel_if_add_del_reply) \
+_(IKEV2_PROFILE_ADD_DEL_REPLY, ikev2_profile_add_del_reply) \
+_(IKEV2_PROFILE_SET_AUTH_REPLY, ikev2_profile_set_auth_reply) \
+_(IKEV2_PROFILE_SET_ID_REPLY, ikev2_profile_set_id_reply) \
+_(IKEV2_PROFILE_SET_TS_REPLY, ikev2_profile_set_ts_reply) \
+_(IKEV2_SET_LOCAL_KEY_REPLY, ikev2_set_local_key_reply) \
+_(IKEV2_SET_RESPONDER_REPLY, ikev2_set_responder_reply) \
+_(IKEV2_SET_IKE_TRANSFORMS_REPLY, ikev2_set_ike_transforms_reply) \
+_(IKEV2_SET_ESP_TRANSFORMS_REPLY, ikev2_set_esp_transforms_reply) \
+_(IKEV2_SET_SA_LIFETIME_REPLY, ikev2_set_sa_lifetime_reply) \
+_(IKEV2_INITIATE_SA_INIT_REPLY, ikev2_initiate_sa_init_reply) \
+_(IKEV2_INITIATE_DEL_IKE_SA_REPLY, ikev2_initiate_del_ike_sa_reply) \
+_(IKEV2_INITIATE_DEL_CHILD_SA_REPLY, ikev2_initiate_del_child_sa_reply) \
+_(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \
+_(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \
+_(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \
+_(DHCP_COMPL_EVENT, dhcp_compl_event) \
+_(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \
+_(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \
+_(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \
+_(MAP_DOMAIN_DETAILS, map_domain_details) \
+_(MAP_RULE_DETAILS, map_rule_details) \
+_(WANT_INTERFACE_EVENTS_REPLY, want_interface_events_reply) \
+_(WANT_STATS_REPLY, want_stats_reply) \
+_(GET_FIRST_MSG_ID_REPLY, get_first_msg_id_reply) \
+_(COP_INTERFACE_ENABLE_DISABLE_REPLY, cop_interface_enable_disable_reply) \
+_(COP_WHITELIST_ENABLE_DISABLE_REPLY, cop_whitelist_enable_disable_reply) \
+_(GET_NODE_GRAPH_REPLY, get_node_graph_reply) \
+_(SW_INTERFACE_CLEAR_STATS_REPLY, sw_interface_clear_stats_reply) \
+_(IOAM_ENABLE_REPLY, ioam_enable_reply) \
+_(IOAM_DISABLE_REPLY, ioam_disable_reply) \
+_(ONE_ADD_DEL_LOCATOR_SET_REPLY, one_add_del_locator_set_reply) \
+_(ONE_ADD_DEL_LOCATOR_REPLY, one_add_del_locator_reply) \
+_(ONE_ADD_DEL_LOCAL_EID_REPLY, one_add_del_local_eid_reply) \
+_(ONE_ADD_DEL_REMOTE_MAPPING_REPLY, one_add_del_remote_mapping_reply) \
+_(ONE_ADD_DEL_ADJACENCY_REPLY, one_add_del_adjacency_reply) \
+_(ONE_ADD_DEL_MAP_RESOLVER_REPLY, one_add_del_map_resolver_reply) \
+_(ONE_ADD_DEL_MAP_SERVER_REPLY, one_add_del_map_server_reply) \
+_(ONE_ENABLE_DISABLE_REPLY, one_enable_disable_reply) \
+_(ONE_MAP_REGISTER_ENABLE_DISABLE_REPLY, \
+ one_map_register_enable_disable_reply) \
+_(ONE_MAP_REGISTER_SET_TTL_REPLY, one_map_register_set_ttl_reply) \
+_(ONE_SET_TRANSPORT_PROTOCOL_REPLY, one_set_transport_protocol_reply) \
+_(ONE_GET_TRANSPORT_PROTOCOL_REPLY, one_get_transport_protocol_reply) \
+_(ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY, \
+ one_map_register_fallback_threshold_reply) \
+_(ONE_RLOC_PROBE_ENABLE_DISABLE_REPLY, \
+ one_rloc_probe_enable_disable_reply) \
+_(ONE_PITR_SET_LOCATOR_SET_REPLY, one_pitr_set_locator_set_reply) \
+_(ONE_USE_PETR_REPLY, one_use_petr_reply) \
+_(ONE_MAP_REQUEST_MODE_REPLY, one_map_request_mode_reply) \
+_(ONE_EID_TABLE_ADD_DEL_MAP_REPLY, one_eid_table_add_del_map_reply) \
+_(ONE_LOCATOR_SET_DETAILS, one_locator_set_details) \
+_(ONE_LOCATOR_DETAILS, one_locator_details) \
+_(ONE_EID_TABLE_DETAILS, one_eid_table_details) \
+_(ONE_EID_TABLE_MAP_DETAILS, one_eid_table_map_details) \
+_(ONE_EID_TABLE_VNI_DETAILS, one_eid_table_vni_details) \
+_(ONE_MAP_RESOLVER_DETAILS, one_map_resolver_details) \
+_(ONE_MAP_SERVER_DETAILS, one_map_server_details) \
+_(ONE_ADJACENCIES_GET_REPLY, one_adjacencies_get_reply) \
+_(ONE_STATS_DETAILS, one_stats_details) \
+_(ONE_STATS_FLUSH_REPLY, one_stats_flush_reply) \
+_(ONE_STATS_ENABLE_DISABLE_REPLY, one_stats_enable_disable_reply) \
+_(SHOW_ONE_STATS_ENABLE_DISABLE_REPLY, \
+ show_one_stats_enable_disable_reply) \
+_(ONE_ADD_DEL_NDP_ENTRY_REPLY, one_add_del_ndp_entry_reply) \
+_(ONE_NDP_BD_GET_REPLY, one_ndp_bd_get_reply) \
+_(ONE_NDP_ENTRIES_GET_REPLY, one_ndp_entries_get_reply) \
+_(ONE_ADD_DEL_L2_ARP_ENTRY_REPLY, one_add_del_l2_arp_entry_reply) \
+_(ONE_L2_ARP_BD_GET_REPLY, one_l2_arp_bd_get_reply) \
+_(ONE_L2_ARP_ENTRIES_GET_REPLY, one_l2_arp_entries_get_reply) \
+_(GPE_SET_ENCAP_MODE_REPLY, gpe_set_encap_mode_reply) \
+_(GPE_GET_ENCAP_MODE_REPLY, gpe_get_encap_mode_reply) \
+_(GPE_ADD_DEL_IFACE_REPLY, gpe_add_del_iface_reply) \
+_(GPE_ENABLE_DISABLE_REPLY, gpe_enable_disable_reply) \
+_(GPE_ADD_DEL_FWD_ENTRY_REPLY, gpe_add_del_fwd_entry_reply) \
+_(GPE_FWD_ENTRY_VNIS_GET_REPLY, gpe_fwd_entry_vnis_get_reply) \
+_(GPE_FWD_ENTRIES_GET_REPLY, gpe_fwd_entries_get_reply) \
+_(GPE_NATIVE_FWD_RPATHS_GET_REPLY, gpe_native_fwd_rpaths_get_reply) \
+_(GPE_ADD_DEL_NATIVE_FWD_RPATH_REPLY, \
+ gpe_add_del_native_fwd_rpath_reply) \
+_(GPE_FWD_ENTRY_PATH_DETAILS, \
+ gpe_fwd_entry_path_details) \
+_(SHOW_ONE_STATUS_REPLY, show_one_status_reply) \
+_(ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS_REPLY, \
+ one_add_del_map_request_itr_rlocs_reply) \
+_(ONE_GET_MAP_REQUEST_ITR_RLOCS_REPLY, \
+ one_get_map_request_itr_rlocs_reply) \
+_(SHOW_ONE_NSH_MAPPING_REPLY, show_one_nsh_mapping_reply) \
+_(SHOW_ONE_PITR_REPLY, show_one_pitr_reply) \
+_(SHOW_ONE_USE_PETR_REPLY, show_one_use_petr_reply) \
+_(SHOW_ONE_MAP_REQUEST_MODE_REPLY, show_one_map_request_mode_reply) \
+_(SHOW_ONE_RLOC_PROBE_STATE_REPLY, show_one_rloc_probe_state_reply) \
+_(SHOW_ONE_MAP_REGISTER_STATE_REPLY, \
+ show_one_map_register_state_reply) \
+_(SHOW_ONE_MAP_REGISTER_TTL_REPLY, show_one_map_register_ttl_reply) \
+_(SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY, \
+ show_one_map_register_fallback_threshold_reply) \
+_(AF_PACKET_CREATE_REPLY, af_packet_create_reply) \
+_(AF_PACKET_DELETE_REPLY, af_packet_delete_reply) \
+_(POLICER_ADD_DEL_REPLY, policer_add_del_reply) \
+_(POLICER_DETAILS, policer_details) \
+_(POLICER_CLASSIFY_SET_INTERFACE_REPLY, policer_classify_set_interface_reply) \
+_(POLICER_CLASSIFY_DETAILS, policer_classify_details) \
+_(NETMAP_CREATE_REPLY, netmap_create_reply) \
+_(NETMAP_DELETE_REPLY, netmap_delete_reply) \
+_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \
+_(MPLS_FIB_DETAILS, mpls_fib_details) \
+_(CLASSIFY_TABLE_IDS_REPLY, classify_table_ids_reply) \
+_(CLASSIFY_TABLE_BY_INTERFACE_REPLY, classify_table_by_interface_reply) \
+_(CLASSIFY_TABLE_INFO_REPLY, classify_table_info_reply) \
+_(CLASSIFY_SESSION_DETAILS, classify_session_details) \
+_(SET_IPFIX_EXPORTER_REPLY, set_ipfix_exporter_reply) \
+_(IPFIX_EXPORTER_DETAILS, ipfix_exporter_details) \
+_(SET_IPFIX_CLASSIFY_STREAM_REPLY, set_ipfix_classify_stream_reply) \
+_(IPFIX_CLASSIFY_STREAM_DETAILS, ipfix_classify_stream_details) \
+_(IPFIX_CLASSIFY_TABLE_ADD_DEL_REPLY, ipfix_classify_table_add_del_reply) \
+_(IPFIX_CLASSIFY_TABLE_DETAILS, ipfix_classify_table_details) \
+_(FLOW_CLASSIFY_SET_INTERFACE_REPLY, flow_classify_set_interface_reply) \
+_(FLOW_CLASSIFY_DETAILS, flow_classify_details) \
+_(SW_INTERFACE_SPAN_ENABLE_DISABLE_REPLY, sw_interface_span_enable_disable_reply) \
+_(SW_INTERFACE_SPAN_DETAILS, sw_interface_span_details) \
+_(GET_NEXT_INDEX_REPLY, get_next_index_reply) \
+_(PG_CREATE_INTERFACE_REPLY, pg_create_interface_reply) \
+_(PG_CAPTURE_REPLY, pg_capture_reply) \
+_(PG_ENABLE_DISABLE_REPLY, pg_enable_disable_reply) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL_REPLY, \
+ ip_source_and_port_range_check_add_del_reply) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL_REPLY, \
+ ip_source_and_port_range_check_interface_add_del_reply) \
+_(IPSEC_GRE_ADD_DEL_TUNNEL_REPLY, ipsec_gre_add_del_tunnel_reply) \
+_(IPSEC_GRE_TUNNEL_DETAILS, ipsec_gre_tunnel_details) \
+_(DELETE_SUBIF_REPLY, delete_subif_reply) \
+_(L2_INTERFACE_PBB_TAG_REWRITE_REPLY, l2_interface_pbb_tag_rewrite_reply) \
+_(PUNT_REPLY, punt_reply) \
+_(IP_FIB_DETAILS, ip_fib_details) \
+_(IP6_FIB_DETAILS, ip6_fib_details) \
+_(FEATURE_ENABLE_DISABLE_REPLY, feature_enable_disable_reply) \
+_(SW_INTERFACE_TAG_ADD_DEL_REPLY, sw_interface_tag_add_del_reply) \
+_(L2_XCONNECT_DETAILS, l2_xconnect_details) \
+_(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \
+_(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \
+_(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) \
+_(P2P_ETHERNET_ADD_REPLY, p2p_ethernet_add_reply) \
+_(P2P_ETHERNET_DEL_REPLY, p2p_ethernet_del_reply) \
+_(LLDP_CONFIG_REPLY, lldp_config_reply) \
+_(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) \
+_(TCP_CONFIGURE_SRC_ADDRESSES_REPLY, tcp_configure_src_addresses_reply)
+
+#define foreach_standalone_reply_msg \
+_(SW_INTERFACE_EVENT, sw_interface_event) \
+_(VNET_INTERFACE_SIMPLE_COUNTERS, vnet_interface_simple_counters) \
+_(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \
+_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \
+_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \
+_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \
+_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters)
+
+typedef struct
+{
+ u8 *name;
+ u32 value;
+} name_sort_t;
+
+
+#define STR_VTR_OP_CASE(op) \
+ case L2_VTR_ ## op: \
+ return "" # op;
+
+static const char *
+str_vtr_op (u32 vtr_op)
+{
+ switch (vtr_op)
+ {
+ STR_VTR_OP_CASE (DISABLED);
+ STR_VTR_OP_CASE (PUSH_1);
+ STR_VTR_OP_CASE (PUSH_2);
+ STR_VTR_OP_CASE (POP_1);
+ STR_VTR_OP_CASE (POP_2);
+ STR_VTR_OP_CASE (TRANSLATE_1_1);
+ STR_VTR_OP_CASE (TRANSLATE_1_2);
+ STR_VTR_OP_CASE (TRANSLATE_2_1);
+ STR_VTR_OP_CASE (TRANSLATE_2_2);
+ }
+
+ return "UNKNOWN";
+}
+
+static int
+dump_sub_interface_table (vat_main_t * vam)
+{
+ const sw_interface_subif_t *sub = NULL;
+
+ if (vam->json_output)
+ {
+ clib_warning
+ ("JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ print (vam->ofp,
+ "%-30s%-12s%-11s%-7s%-5s%-9s%-9s%-6s%-8s%-10s%-10s",
+ "Interface", "sw_if_index",
+ "sub id", "dot1ad", "tags", "outer id",
+ "inner id", "exact", "default", "outer any", "inner any");
+
+ vec_foreach (sub, vam->sw_if_subif_table)
+ {
+ print (vam->ofp,
+ "%-30s%-12d%-11d%-7s%-5d%-9d%-9d%-6d%-8d%-10d%-10d",
+ sub->interface_name,
+ sub->sw_if_index,
+ sub->sub_id, sub->sub_dot1ad ? "dot1ad" : "dot1q",
+ sub->sub_number_of_tags, sub->sub_outer_vlan_id,
+ sub->sub_inner_vlan_id, sub->sub_exact_match, sub->sub_default,
+ sub->sub_outer_vlan_id_any, sub->sub_inner_vlan_id_any);
+ if (sub->vtr_op != L2_VTR_DISABLED)
+ {
+ print (vam->ofp,
+ " vlan-tag-rewrite - op: %-14s [ dot1q: %d "
+ "tag1: %d tag2: %d ]",
+ str_vtr_op (sub->vtr_op), sub->vtr_push_dot1q,
+ sub->vtr_tag1, sub->vtr_tag2);
+ }
+ }
+
+ return 0;
+}
+
+static int
+name_sort_cmp (void *a1, void *a2)
+{
+ name_sort_t *n1 = a1;
+ name_sort_t *n2 = a2;
+
+ return strcmp ((char *) n1->name, (char *) n2->name);
+}
+
+static int
+dump_interface_table (vat_main_t * vam)
+{
+ hash_pair_t *p;
+ name_sort_t *nses = 0, *ns;
+
+ if (vam->json_output)
+ {
+ clib_warning
+ ("JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name,
+ ({
+ vec_add2 (nses, ns, 1);
+ ns->name = (u8 *)(p->key);
+ ns->value = (u32) p->value[0];
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (nses, name_sort_cmp);
+
+ print (vam->ofp, "%-25s%-15s", "Interface", "sw_if_index");
+ vec_foreach (ns, nses)
+ {
+ print (vam->ofp, "%-25s%-15d", ns->name, ns->value);
+ }
+ vec_free (nses);
+ return 0;
+}
+
+static int
+dump_ip_table (vat_main_t * vam, int is_ipv6)
+{
+ const ip_details_t *det = NULL;
+ const ip_address_details_t *address = NULL;
+ u32 i = ~0;
+
+ print (vam->ofp, "%-12s", "sw_if_index");
+
+ vec_foreach (det, vam->ip_details_by_sw_if_index[is_ipv6])
+ {
+ i++;
+ if (!det->present)
+ {
+ continue;
+ }
+ print (vam->ofp, "%-12d", i);
+ print (vam->ofp, " %-30s%-13s", "Address", "Prefix length");
+ if (!det->addr)
+ {
+ continue;
+ }
+ vec_foreach (address, det->addr)
+ {
+ print (vam->ofp,
+ " %-30U%-13d",
+ is_ipv6 ? format_ip6_address : format_ip4_address,
+ address->ip, address->prefix_length);
+ }
+ }
+
+ return 0;
+}
+
+static int
+dump_ipv4_table (vat_main_t * vam)
+{
+ if (vam->json_output)
+ {
+ clib_warning
+ ("JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ return dump_ip_table (vam, 0);
+}
+
+static int
+dump_ipv6_table (vat_main_t * vam)
+{
+ if (vam->json_output)
+ {
+ clib_warning
+ ("JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ return dump_ip_table (vam, 1);
+}
+
+static char *
+counter_type_to_str (u8 counter_type, u8 is_combined)
+{
+ if (!is_combined)
+ {
+ switch (counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_DROP:
+ return "drop";
+ case VNET_INTERFACE_COUNTER_PUNT:
+ return "punt";
+ case VNET_INTERFACE_COUNTER_IP4:
+ return "ip4";
+ case VNET_INTERFACE_COUNTER_IP6:
+ return "ip6";
+ case VNET_INTERFACE_COUNTER_RX_NO_BUF:
+ return "rx-no-buf";
+ case VNET_INTERFACE_COUNTER_RX_MISS:
+ return "rx-miss";
+ case VNET_INTERFACE_COUNTER_RX_ERROR:
+ return "rx-error";
+ case VNET_INTERFACE_COUNTER_TX_ERROR:
+ return "tx-error";
+ default:
+ return "INVALID-COUNTER-TYPE";
+ }
+ }
+ else
+ {
+ switch (counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_RX:
+ return "rx";
+ case VNET_INTERFACE_COUNTER_TX:
+ return "tx";
+ default:
+ return "INVALID-COUNTER-TYPE";
+ }
+ }
+}
+
+static int
+dump_stats_table (vat_main_t * vam)
+{
+ vat_json_node_t node;
+ vat_json_node_t *msg_array;
+ vat_json_node_t *msg;
+ vat_json_node_t *counter_array;
+ vat_json_node_t *counter;
+ interface_counter_t c;
+ u64 packets;
+ ip4_fib_counter_t *c4;
+ ip6_fib_counter_t *c6;
+ ip4_nbr_counter_t *n4;
+ ip6_nbr_counter_t *n6;
+ int i, j;
+
+ if (!vam->json_output)
+ {
+ clib_warning ("dump_stats_table supported only in JSON format");
+ return -99;
+ }
+
+ vat_json_init_object (&node);
+
+ /* interface counters */
+ msg_array = vat_json_object_add (&node, "interface_counters");
+ vat_json_init_array (msg_array);
+ for (i = 0; i < vec_len (vam->simple_interface_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_string_copy (msg, "vnet_counter_type",
+ (u8 *) counter_type_to_str (i, 0));
+ vat_json_object_add_int (msg, "is_combined", 0);
+ counter_array = vat_json_object_add (msg, "data");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->simple_interface_counters[i]); j++)
+ {
+ packets = vam->simple_interface_counters[i][j];
+ vat_json_array_add_uint (counter_array, packets);
+ }
+ }
+ for (i = 0; i < vec_len (vam->combined_interface_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_string_copy (msg, "vnet_counter_type",
+ (u8 *) counter_type_to_str (i, 1));
+ vat_json_object_add_int (msg, "is_combined", 1);
+ counter_array = vat_json_object_add (msg, "data");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->combined_interface_counters[i]); j++)
+ {
+ c = vam->combined_interface_counters[i][j];
+ counter = vat_json_array_add (counter_array);
+ vat_json_init_object (counter);
+ vat_json_object_add_uint (counter, "packets", c.packets);
+ vat_json_object_add_uint (counter, "bytes", c.bytes);
+ }
+ }
+
+ /* ip4 fib counters */
+ msg_array = vat_json_object_add (&node, "ip4_fib_counters");
+ vat_json_init_array (msg_array);
+ for (i = 0; i < vec_len (vam->ip4_fib_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_uint (msg, "vrf_id",
+ vam->ip4_fib_counters_vrf_id_by_index[i]);
+ counter_array = vat_json_object_add (msg, "c");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->ip4_fib_counters[i]); j++)
+ {
+ counter = vat_json_array_add (counter_array);
+ vat_json_init_object (counter);
+ c4 = &vam->ip4_fib_counters[i][j];
+ vat_json_object_add_ip4 (counter, "address", c4->address);
+ vat_json_object_add_uint (counter, "address_length",
+ c4->address_length);
+ vat_json_object_add_uint (counter, "packets", c4->packets);
+ vat_json_object_add_uint (counter, "bytes", c4->bytes);
+ }
+ }
+
+ /* ip6 fib counters */
+ msg_array = vat_json_object_add (&node, "ip6_fib_counters");
+ vat_json_init_array (msg_array);
+ for (i = 0; i < vec_len (vam->ip6_fib_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_uint (msg, "vrf_id",
+ vam->ip6_fib_counters_vrf_id_by_index[i]);
+ counter_array = vat_json_object_add (msg, "c");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->ip6_fib_counters[i]); j++)
+ {
+ counter = vat_json_array_add (counter_array);
+ vat_json_init_object (counter);
+ c6 = &vam->ip6_fib_counters[i][j];
+ vat_json_object_add_ip6 (counter, "address", c6->address);
+ vat_json_object_add_uint (counter, "address_length",
+ c6->address_length);
+ vat_json_object_add_uint (counter, "packets", c6->packets);
+ vat_json_object_add_uint (counter, "bytes", c6->bytes);
+ }
+ }
+
+ /* ip4 nbr counters */
+ msg_array = vat_json_object_add (&node, "ip4_nbr_counters");
+ vat_json_init_array (msg_array);
+ for (i = 0; i < vec_len (vam->ip4_nbr_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_uint (msg, "sw_if_index", i);
+ counter_array = vat_json_object_add (msg, "c");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->ip4_nbr_counters[i]); j++)
+ {
+ counter = vat_json_array_add (counter_array);
+ vat_json_init_object (counter);
+ n4 = &vam->ip4_nbr_counters[i][j];
+ vat_json_object_add_ip4 (counter, "address", n4->address);
+ vat_json_object_add_uint (counter, "link-type", n4->linkt);
+ vat_json_object_add_uint (counter, "packets", n4->packets);
+ vat_json_object_add_uint (counter, "bytes", n4->bytes);
+ }
+ }
+
+ /* ip6 nbr counters */
+ msg_array = vat_json_object_add (&node, "ip6_nbr_counters");
+ vat_json_init_array (msg_array);
+ for (i = 0; i < vec_len (vam->ip6_nbr_counters); i++)
+ {
+ msg = vat_json_array_add (msg_array);
+ vat_json_init_object (msg);
+ vat_json_object_add_uint (msg, "sw_if_index", i);
+ counter_array = vat_json_object_add (msg, "c");
+ vat_json_init_array (counter_array);
+ for (j = 0; j < vec_len (vam->ip6_nbr_counters[i]); j++)
+ {
+ counter = vat_json_array_add (counter_array);
+ vat_json_init_object (counter);
+ n6 = &vam->ip6_nbr_counters[i][j];
+ vat_json_object_add_ip6 (counter, "address", n6->address);
+ vat_json_object_add_uint (counter, "packets", n6->packets);
+ vat_json_object_add_uint (counter, "bytes", n6->bytes);
+ }
+ }
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ return 0;
+}
+
+int
+exec (vat_main_t * vam)
+{
+ api_main_t *am = &api_main;
+ vl_api_cli_t *mp;
+ f64 timeout;
+ void *oldheap;
+ u8 *cmd = 0;
+ unformat_input_t *i = vam->input;
+
+ if (vec_len (i->buffer) == 0)
+ return -1;
+
+ if (vam->exec_mode == 0 && unformat (i, "mode"))
+ {
+ vam->exec_mode = 1;
+ return 0;
+ }
+ if (vam->exec_mode == 1 && (unformat (i, "exit") || unformat (i, "quit")))
+ {
+ vam->exec_mode = 0;
+ return 0;
+ }
+
+
+ M (CLI, mp);
+
+ /*
+ * Copy cmd into shared memory.
+ * In order for the CLI command to work, it
+ * must be a vector ending in \n, not a C-string ending
+ * in \n\0.
+ */
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ vec_validate (cmd, vec_len (vam->input->buffer) - 1);
+ clib_memcpy (cmd, vam->input->buffer, vec_len (vam->input->buffer));
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ mp->cmd_in_shmem = pointer_to_uword (cmd);
+ S (mp);
+ timeout = vat_time_now (vam) + 10.0;
+
+ while (vat_time_now (vam) < timeout)
+ {
+ if (vam->result_ready == 1)
+ {
+ u8 *free_me;
+ if (vam->shmem_result != NULL)
+ print (vam->ofp, "%s", vam->shmem_result);
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ free_me = (u8 *) vam->shmem_result;
+ vec_free (free_me);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+ return 0;
+ }
+ }
+ return -99;
+}
+
+/*
+ * Future replacement of exec() that passes CLI buffers directly in
+ * the API messages instead of an additional shared memory area.
+ */
+static int
+exec_inband (vat_main_t * vam)
+{
+ vl_api_cli_inband_t *mp;
+ unformat_input_t *i = vam->input;
+ int ret;
+
+ if (vec_len (i->buffer) == 0)
+ return -1;
+
+ if (vam->exec_mode == 0 && unformat (i, "mode"))
+ {
+ vam->exec_mode = 1;
+ return 0;
+ }
+ if (vam->exec_mode == 1 && (unformat (i, "exit") || unformat (i, "quit")))
+ {
+ vam->exec_mode = 0;
+ return 0;
+ }
+
+ /*
+ * In order for the CLI command to work, it
+ * must be a vector ending in \n, not a C-string ending
+ * in \n\0.
+ */
+ u32 len = vec_len (vam->input->buffer);
+ M2 (CLI_INBAND, mp, len);
+ clib_memcpy (mp->cmd, vam->input->buffer, len);
+ mp->length = htonl (len);
+
+ S (mp);
+ W2 (ret, print (vam->ofp, "%s", vam->cmd_reply));
+ return ret;
+}
+
+static int
+api_create_loopback (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_loopback_t *mp;
+ vl_api_create_loopback_instance_t *mp_lbi;
+ u8 mac_address[6];
+ u8 mac_set = 0;
+ u8 is_specified = 0;
+ u32 user_instance = 0;
+ int ret;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, mac_address))
+ mac_set = 1;
+ if (unformat (i, "instance %d", &user_instance))
+ is_specified = 1;
+ else
+ break;
+ }
+
+ if (is_specified)
+ {
+ M (CREATE_LOOPBACK_INSTANCE, mp_lbi);
+ mp_lbi->is_specified = is_specified;
+ if (is_specified)
+ mp_lbi->user_instance = htonl (user_instance);
+ if (mac_set)
+ clib_memcpy (mp_lbi->mac_address, mac_address, sizeof (mac_address));
+ S (mp_lbi);
+ }
+ else
+ {
+ /* Construct the API message */
+ M (CREATE_LOOPBACK, mp);
+ if (mac_set)
+ clib_memcpy (mp->mac_address, mac_address, sizeof (mac_address));
+ S (mp);
+ }
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_delete_loopback (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_delete_loopback_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (DELETE_LOOPBACK, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_stats (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_want_stats_t *mp;
+ int enable = -1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (enable == -1)
+ {
+ errmsg ("missing enable|disable");
+ return -99;
+ }
+
+ M (WANT_STATS, mp);
+ mp->enable_disable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_interface_events (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_want_interface_events_t *mp;
+ int enable = -1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (enable == -1)
+ {
+ errmsg ("missing enable|disable");
+ return -99;
+ }
+
+ M (WANT_INTERFACE_EVENTS, mp);
+ mp->enable_disable = enable;
+
+ vam->interface_event_display = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+/* Note: non-static, called once to set up the initial intfc table */
+int
+api_sw_interface_dump (vat_main_t * vam)
+{
+ vl_api_sw_interface_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ hash_pair_t *p;
+ name_sort_t *nses = 0, *ns;
+ sw_interface_subif_t *sub = NULL;
+ int ret;
+
+ /* Toss the old name table */
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name,
+ ({
+ vec_add2 (nses, ns, 1);
+ ns->name = (u8 *)(p->key);
+ ns->value = (u32) p->value[0];
+ }));
+ /* *INDENT-ON* */
+
+ hash_free (vam->sw_if_index_by_interface_name);
+
+ vec_foreach (ns, nses) vec_free (ns->name);
+
+ vec_free (nses);
+
+ vec_foreach (sub, vam->sw_if_subif_table)
+ {
+ vec_free (sub->interface_name);
+ }
+ vec_free (vam->sw_if_subif_table);
+
+ /* recreate the interface name hash table */
+ vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword));
+
+ /* Get list of ethernets */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "Ether", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and local / loopback interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "lo", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and packet-generator interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "pg", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and vxlan-gpe tunnel interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "vxlan_gpe",
+ sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and vxlan tunnel interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "vxlan", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and host (af_packet) interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "host", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and l2tpv3 tunnel interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "l2tpv3_tunnel",
+ sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and GRE tunnel interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "gre", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and LISP-GPE interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "lisp_gpe",
+ sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* and IPSEC tunnel interfaces */
+ M (SW_INTERFACE_DUMP, mp);
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "ipsec", sizeof (mp->name_filter) - 1);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_flags (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_flags_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 admin_up = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "admin-up"))
+ admin_up = 1;
+ else if (unformat (i, "admin-down"))
+ admin_up = 0;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_FLAGS, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->admin_up_down = admin_up;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_clear_stats (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_clear_stats_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_CLEAR_STATS, mp);
+
+ if (sw_if_index_set == 1)
+ mp->sw_if_index = ntohl (sw_if_index);
+ else
+ mp->sw_if_index = ~0;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_add_del_address (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_add_del_address_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1, del_all = 0;
+ u32 address_length = 0;
+ u8 v4_address_set = 0;
+ u8 v6_address_set = 0;
+ ip4_address_t v4address;
+ ip6_address_t v6address;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del-all"))
+ del_all = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U/%d",
+ unformat_ip4_address, &v4address, &address_length))
+ v4_address_set = 1;
+ else if (unformat (i, "%U/%d",
+ unformat_ip6_address, &v6address, &address_length))
+ v6_address_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (v4_address_set && v6_address_set)
+ {
+ errmsg ("both v4 and v6 addresses set");
+ return -99;
+ }
+ if (!v4_address_set && !v6_address_set && !del_all)
+ {
+ errmsg ("no addresses set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_ADD_DEL_ADDRESS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ mp->del_all = del_all;
+ if (v6_address_set)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->address, &v6address, sizeof (v6address));
+ }
+ else
+ {
+ clib_memcpy (mp->address, &v4address, sizeof (v4address));
+ }
+ mp->address_length = address_length;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_mpls_enable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_mpls_enable_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 enable = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else if (unformat (i, "dis"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_MPLS_ENABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = enable;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_table (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_table_t *mp;
+ u32 sw_if_index, vrf_id = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_TABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+ mp->vrf_id = ntohl (vrf_id);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void vl_api_sw_interface_get_table_reply_t_handler
+ (vl_api_sw_interface_get_table_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%d", ntohl (mp->vrf_id));
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+
+}
+
+static void vl_api_sw_interface_get_table_reply_t_handler_json
+ (vl_api_sw_interface_get_table_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_int (&node, "vrf_id", ntohl (mp->vrf_id));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static int
+api_sw_interface_get_table (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_get_table_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_GET_TABLE, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_vpath (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_vpath_t *mp;
+ u32 sw_if_index = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_enable = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ is_enable = 1;
+ else if (unformat (i, "disable"))
+ is_enable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_VPATH, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = is_enable;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_vxlan_bypass (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_vxlan_bypass_t *mp;
+ u32 sw_if_index = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_enable = 1;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ is_enable = 1;
+ else if (unformat (i, "disable"))
+ is_enable = 0;
+ else if (unformat (i, "ip4"))
+ is_ipv6 = 0;
+ else if (unformat (i, "ip6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_VXLAN_BYPASS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = is_enable;
+ mp->is_ipv6 = is_ipv6;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_sw_interface_set_l2_xconnect (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_l2_xconnect_t *mp;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 tx_sw_if_index;
+ u8 tx_sw_if_index_set = 0;
+ u8 enable = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx_sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "tx_sw_if_index %d", &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ else if (unformat (i, "rx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam,
+ &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "tx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam,
+ &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or rx_sw_if_index");
+ return -99;
+ }
+
+ if (enable && (tx_sw_if_index_set == 0))
+ {
+ errmsg ("missing tx interface name or tx_sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_L2_XCONNECT, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->tx_sw_if_index = ntohl (tx_sw_if_index);
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_l2_bridge (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_l2_bridge_t *mp;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u8 bvi = 0;
+ u32 shg = 0;
+ u8 enable = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else
+ if (unformat
+ (i, "%U", api_unformat_sw_if_index, vam, &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "shg %d", &shg))
+ ;
+ else if (unformat (i, "bvi"))
+ bvi = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or sw_if_index");
+ return -99;
+ }
+
+ if (enable && (bd_id_set == 0))
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_L2_BRIDGE, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->bd_id = ntohl (bd_id);
+ mp->shg = (u8) shg;
+ mp->bvi = bvi;
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 bd_id = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else
+ break;
+ }
+
+ M (BRIDGE_DOMAIN_DUMP, mp);
+ mp->bd_id = ntohl (bd_id);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_add_del_t *mp;
+ u32 bd_id = ~0;
+ u8 is_add = 1;
+ u32 flood = 1, forward = 1, learn = 1, uu_flood = 1, arp_term = 0;
+ u8 *bd_tag = NULL;
+ u32 mac_age = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else if (unformat (i, "flood %d", &flood))
+ ;
+ else if (unformat (i, "uu-flood %d", &uu_flood))
+ ;
+ else if (unformat (i, "forward %d", &forward))
+ ;
+ else if (unformat (i, "learn %d", &learn))
+ ;
+ else if (unformat (i, "arp-term %d", &arp_term))
+ ;
+ else if (unformat (i, "mac-age %d", &mac_age))
+ ;
+ else if (unformat (i, "bd-tag %s", &bd_tag))
+ ;
+ else if (unformat (i, "del"))
+ {
+ is_add = 0;
+ flood = uu_flood = forward = learn = 0;
+ }
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ ret = -99;
+ goto done;
+ }
+
+ if (mac_age > 255)
+ {
+ errmsg ("mac age must be less than 256 ");
+ ret = -99;
+ goto done;
+ }
+
+ if ((bd_tag) && (strlen ((char *) bd_tag) > 63))
+ {
+ errmsg ("bd-tag cannot be longer than 63");
+ ret = -99;
+ goto done;
+ }
+
+ M (BRIDGE_DOMAIN_ADD_DEL, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ mp->flood = flood;
+ mp->uu_flood = uu_flood;
+ mp->forward = forward;
+ mp->learn = learn;
+ mp->arp_term = arp_term;
+ mp->is_add = is_add;
+ mp->mac_age = (u8) mac_age;
+ if (bd_tag)
+ strcpy ((char *) mp->bd_tag, (char *) bd_tag);
+
+ S (mp);
+ W (ret);
+
+done:
+ vec_free (bd_tag);
+ return ret;
+}
+
+static int
+api_l2fib_flush_bd (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_flush_bd_t *mp;
+ u32 bd_id = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id));
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (L2FIB_FLUSH_BD, mp);
+
+ mp->bd_id = htonl (bd_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2fib_flush_int (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_flush_int_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index));
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index));
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2FIB_FLUSH_INT, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2fib_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_add_del_t *mp;
+ f64 timeout;
+ u64 mac = 0;
+ u8 mac_set = 0;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1;
+ u8 static_mac = 0;
+ u8 filter_mac = 0;
+ u8 bvi_mac = 0;
+ int count = 1;
+ f64 before = 0;
+ int j;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, &mac))
+ mac_set = 1;
+ else if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "static"))
+ static_mac = 1;
+ else if (unformat (i, "filter"))
+ {
+ filter_mac = 1;
+ static_mac = 1;
+ }
+ else if (unformat (i, "bvi"))
+ {
+ bvi_mac = 1;
+ static_mac = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "count %d", &count))
+ ;
+ else
+ break;
+ }
+
+ if (mac_set == 0)
+ {
+ errmsg ("missing mac address");
+ return -99;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ if (is_add && sw_if_index_set == 0 && filter_mac == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (count > 1)
+ {
+ /* Turn on async mode */
+ vam->async_mode = 1;
+ vam->async_errors = 0;
+ before = vat_time_now (vam);
+ }
+
+ for (j = 0; j < count; j++)
+ {
+ M (L2FIB_ADD_DEL, mp);
+
+ mp->mac = mac;
+ mp->bd_id = ntohl (bd_id);
+ mp->is_add = is_add;
+
+ if (is_add)
+ {
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->static_mac = static_mac;
+ mp->filter_mac = filter_mac;
+ mp->bvi_mac = bvi_mac;
+ }
+ increment_mac_address (&mac);
+ /* send it... */
+ S (mp);
+ }
+
+ if (count > 1)
+ {
+ vl_api_control_ping_t *mp_ping;
+ f64 after;
+
+ /* Shut off async mode */
+ vam->async_mode = 0;
+
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ timeout = vat_time_now (vam) + 1.0;
+ while (vat_time_now (vam) < timeout)
+ if (vam->result_ready == 1)
+ goto out;
+ vam->retval = -99;
+
+ out:
+ if (vam->retval == -99)
+ errmsg ("timeout");
+
+ if (vam->async_errors > 0)
+ {
+ errmsg ("%d asynchronous errors", vam->async_errors);
+ vam->retval = -98;
+ }
+ vam->async_errors = 0;
+ after = vat_time_now (vam);
+
+ print (vam->ofp, "%d routes in %.6f secs, %.2f routes/sec",
+ count, after - before, count / (after - before));
+ }
+ else
+ {
+ int ret;
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+ }
+ /* Return the good/bad news */
+ return (vam->retval);
+}
+
+static int
+api_bridge_domain_set_mac_age (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_set_mac_age_t *mp;
+ u32 bd_id = ~0;
+ u32 mac_age = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id));
+ else if (unformat (i, "mac-age %d", &mac_age));
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ if (mac_age > 255)
+ {
+ errmsg ("mac age must be less than 256 ");
+ return -99;
+ }
+
+ M (BRIDGE_DOMAIN_SET_MAC_AGE, mp);
+
+ mp->bd_id = htonl (bd_id);
+ mp->mac_age = (u8) mac_age;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_flags (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_flags_t *mp;
+ u32 sw_if_index;
+ u32 flags = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "learn"))
+ flags |= L2_LEARN;
+ else if (unformat (i, "forward"))
+ flags |= L2_FWD;
+ else if (unformat (i, "flood"))
+ flags |= L2_FLOOD;
+ else if (unformat (i, "uu-flood"))
+ flags |= L2_UU_FLOOD;
+ else if (unformat (i, "arp-term"))
+ flags |= L2_ARP_TERM;
+ else if (unformat (i, "off"))
+ is_set = 0;
+ else if (unformat (i, "disable"))
+ is_set = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2_FLAGS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->feature_bitmap = ntohl (flags);
+ mp->is_set = is_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_flags (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_flags_t *mp;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u8 is_set = 1;
+ u32 flags = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else if (unformat (i, "learn"))
+ flags |= L2_LEARN;
+ else if (unformat (i, "forward"))
+ flags |= L2_FWD;
+ else if (unformat (i, "flood"))
+ flags |= L2_FLOOD;
+ else if (unformat (i, "uu-flood"))
+ flags |= L2_UU_FLOOD;
+ else if (unformat (i, "arp-term"))
+ flags |= L2_ARP_TERM;
+ else if (unformat (i, "off"))
+ is_set = 0;
+ else if (unformat (i, "disable"))
+ is_set = 0;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (BRIDGE_FLAGS, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ mp->feature_bitmap = ntohl (flags);
+ mp->is_set = is_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bd_ip_mac_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bd_ip_mac_add_del_t *mp;
+ u32 bd_id;
+ u8 is_ipv6 = 0;
+ u8 is_add = 1;
+ u8 bd_id_set = 0;
+ u8 ip_set = 0;
+ u8 mac_set = 0;
+ ip4_address_t v4addr;
+ ip6_address_t v6addr;
+ u8 macaddr[6];
+ int ret;
+
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ {
+ bd_id_set++;
+ }
+ else if (unformat (i, "%U", unformat_ip4_address, &v4addr))
+ {
+ ip_set++;
+ }
+ else if (unformat (i, "%U", unformat_ip6_address, &v6addr))
+ {
+ ip_set++;
+ is_ipv6++;
+ }
+ else if (unformat (i, "%U", unformat_ethernet_address, macaddr))
+ {
+ mac_set++;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+ else if (ip_set == 0)
+ {
+ errmsg ("missing IP address");
+ return -99;
+ }
+ else if (mac_set == 0)
+ {
+ errmsg ("missing MAC address");
+ return -99;
+ }
+
+ M (BD_IP_MAC_ADD_DEL, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ mp->is_ipv6 = is_ipv6;
+ mp->is_add = is_add;
+ if (is_ipv6)
+ clib_memcpy (mp->ip_address, &v6addr, sizeof (v6addr));
+ else
+ clib_memcpy (mp->ip_address, &v4addr, sizeof (v4addr));
+ clib_memcpy (mp->mac_address, macaddr, 6);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_tap_connect (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tap_connect_t *mp;
+ u8 mac_address[6];
+ u8 random_mac = 1;
+ u8 name_set = 0;
+ u8 *tap_name;
+ u8 *tag = 0;
+ ip4_address_t ip4_address;
+ u32 ip4_mask_width;
+ int ip4_address_set = 0;
+ ip6_address_t ip6_address;
+ u32 ip6_mask_width;
+ int ip6_address_set = 0;
+ int ret;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, mac_address))
+ {
+ random_mac = 0;
+ }
+ else if (unformat (i, "random-mac"))
+ random_mac = 1;
+ else if (unformat (i, "tapname %s", &tap_name))
+ name_set = 1;
+ else if (unformat (i, "tag %s", &tag))
+ ;
+ else if (unformat (i, "address %U/%d",
+ unformat_ip4_address, &ip4_address, &ip4_mask_width))
+ ip4_address_set = 1;
+ else if (unformat (i, "address %U/%d",
+ unformat_ip6_address, &ip6_address, &ip6_mask_width))
+ ip6_address_set = 1;
+ else
+ break;
+ }
+
+ if (name_set == 0)
+ {
+ errmsg ("missing tap name");
+ return -99;
+ }
+ if (vec_len (tap_name) > 63)
+ {
+ errmsg ("tap name too long");
+ return -99;
+ }
+ vec_add1 (tap_name, 0);
+
+ if (vec_len (tag) > 63)
+ {
+ errmsg ("tag too long");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (TAP_CONNECT, mp);
+
+ mp->use_random_mac = random_mac;
+ clib_memcpy (mp->mac_address, mac_address, 6);
+ clib_memcpy (mp->tap_name, tap_name, vec_len (tap_name));
+ if (tag)
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+
+ if (ip4_address_set)
+ {
+ mp->ip4_address_set = 1;
+ clib_memcpy (mp->ip4_address, &ip4_address, sizeof (mp->ip4_address));
+ mp->ip4_mask_width = ip4_mask_width;
+ }
+ if (ip6_address_set)
+ {
+ mp->ip6_address_set = 1;
+ clib_memcpy (mp->ip6_address, &ip6_address, sizeof (mp->ip6_address));
+ mp->ip6_mask_width = ip6_mask_width;
+ }
+
+ vec_free (tap_name);
+ vec_free (tag);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_tap_modify (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tap_modify_t *mp;
+ u8 mac_address[6];
+ u8 random_mac = 1;
+ u8 name_set = 0;
+ u8 *tap_name;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "mac %U", unformat_ethernet_address, mac_address))
+ {
+ random_mac = 0;
+ }
+ else if (unformat (i, "random-mac"))
+ random_mac = 1;
+ else if (unformat (i, "tapname %s", &tap_name))
+ name_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing vpp interface name");
+ return -99;
+ }
+ if (name_set == 0)
+ {
+ errmsg ("missing tap name");
+ return -99;
+ }
+ if (vec_len (tap_name) > 63)
+ {
+ errmsg ("tap name too long");
+ }
+ vec_add1 (tap_name, 0);
+
+ /* Construct the API message */
+ M (TAP_MODIFY, mp);
+
+ mp->use_random_mac = random_mac;
+ mp->sw_if_index = ntohl (sw_if_index);
+ clib_memcpy (mp->mac_address, mac_address, 6);
+ clib_memcpy (mp->tap_name, tap_name, vec_len (tap_name));
+ vec_free (tap_name);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_tap_delete (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tap_delete_t *mp;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing vpp interface name");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (TAP_DELETE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_table_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_add_del_t *mp;
+ u32 table_id = ~0;
+ u8 is_ipv6 = 0;
+ u8 is_add = 1;
+ int ret = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "table %d", &table_id))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ errmsg ("missing table-ID");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_TABLE_ADD_DEL, mp);
+
+ mp->table_id = ntohl (table_id);
+ mp->is_ipv6 = is_ipv6;
+ mp->is_add = is_add;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
+api_ip_add_del_route (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_add_del_route_t *mp;
+ u32 sw_if_index = ~0, vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 is_local = 0, is_drop = 0;
+ u8 is_unreach = 0, is_prohibit = 0;
+ u8 create_vrf_if_needed = 0;
+ u8 is_add = 1;
+ u32 next_hop_weight = 1;
+ u8 not_last = 0;
+ u8 is_multipath = 0;
+ u8 address_set = 0;
+ u8 address_length_set = 0;
+ u32 next_hop_table_id = 0;
+ u32 resolve_attempts = 0;
+ u32 dst_address_length = 0;
+ u8 next_hop_set = 0;
+ ip4_address_t v4_dst_address, v4_next_hop_address;
+ ip6_address_t v6_dst_address, v6_next_hop_address;
+ int count = 1;
+ int j;
+ f64 before = 0;
+ u32 random_add_del = 0;
+ u32 *random_vector = 0;
+ uword *random_hash;
+ u32 random_seed = 0xdeaddabe;
+ u32 classify_table_index = ~0;
+ u8 is_classify = 0;
+ u8 resolve_host = 0, resolve_attached = 0;
+ mpls_label_t *next_hop_out_label_stack = NULL;
+ mpls_label_t next_hop_out_label = MPLS_LABEL_INVALID;
+ mpls_label_t next_hop_via_label = MPLS_LABEL_INVALID;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%U", unformat_ip4_address, &v4_dst_address))
+ {
+ address_set = 1;
+ is_ipv6 = 0;
+ }
+ else if (unformat (i, "%U", unformat_ip6_address, &v6_dst_address))
+ {
+ address_set = 1;
+ is_ipv6 = 1;
+ }
+ else if (unformat (i, "/%d", &dst_address_length))
+ {
+ address_length_set = 1;
+ }
+
+ else if (is_ipv6 == 0 && unformat (i, "via %U", unformat_ip4_address,
+ &v4_next_hop_address))
+ {
+ next_hop_set = 1;
+ }
+ else if (is_ipv6 == 1 && unformat (i, "via %U", unformat_ip6_address,
+ &v6_next_hop_address))
+ {
+ next_hop_set = 1;
+ }
+ else if (unformat (i, "resolve-attempts %d", &resolve_attempts))
+ ;
+ else if (unformat (i, "weight %d", &next_hop_weight))
+ ;
+ else if (unformat (i, "drop"))
+ {
+ is_drop = 1;
+ }
+ else if (unformat (i, "null-send-unreach"))
+ {
+ is_unreach = 1;
+ }
+ else if (unformat (i, "null-send-prohibit"))
+ {
+ is_prohibit = 1;
+ }
+ else if (unformat (i, "local"))
+ {
+ is_local = 1;
+ }
+ else if (unformat (i, "classify %d", &classify_table_index))
+ {
+ is_classify = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "not-last"))
+ not_last = 1;
+ else if (unformat (i, "resolve-via-host"))
+ resolve_host = 1;
+ else if (unformat (i, "resolve-via-attached"))
+ resolve_attached = 1;
+ else if (unformat (i, "multipath"))
+ is_multipath = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "create-vrf"))
+ create_vrf_if_needed = 1;
+ else if (unformat (i, "count %d", &count))
+ ;
+ else if (unformat (i, "lookup-in-vrf %d", &next_hop_table_id))
+ ;
+ else if (unformat (i, "next-hop-table %d", &next_hop_table_id))
+ ;
+ else if (unformat (i, "out-label %d", &next_hop_out_label))
+ vec_add1 (next_hop_out_label_stack, ntohl (next_hop_out_label));
+ else if (unformat (i, "via-label %d", &next_hop_via_label))
+ ;
+ else if (unformat (i, "random"))
+ random_add_del = 1;
+ else if (unformat (i, "seed %d", &random_seed))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!next_hop_set && !is_drop && !is_local &&
+ !is_classify && !is_unreach && !is_prohibit &&
+ MPLS_LABEL_INVALID == next_hop_via_label)
+ {
+ errmsg
+ ("next hop / local / drop / unreach / prohibit / classify not set");
+ return -99;
+ }
+
+ if (next_hop_set && MPLS_LABEL_INVALID != next_hop_via_label)
+ {
+ errmsg ("next hop and next-hop via label set");
+ return -99;
+ }
+ if (address_set == 0)
+ {
+ errmsg ("missing addresses");
+ return -99;
+ }
+
+ if (address_length_set == 0)
+ {
+ errmsg ("missing address length");
+ return -99;
+ }
+
+ /* Generate a pile of unique, random routes */
+ if (random_add_del)
+ {
+ u32 this_random_address;
+ random_hash = hash_create (count, sizeof (uword));
+
+ hash_set (random_hash, v4_next_hop_address.as_u32, 1);
+ for (j = 0; j <= count; j++)
+ {
+ do
+ {
+ this_random_address = random_u32 (&random_seed);
+ this_random_address =
+ clib_host_to_net_u32 (this_random_address);
+ }
+ while (hash_get (random_hash, this_random_address));
+ vec_add1 (random_vector, this_random_address);
+ hash_set (random_hash, this_random_address, 1);
+ }
+ hash_free (random_hash);
+ v4_dst_address.as_u32 = random_vector[0];
+ }
+
+ if (count > 1)
+ {
+ /* Turn on async mode */
+ vam->async_mode = 1;
+ vam->async_errors = 0;
+ before = vat_time_now (vam);
+ }
+
+ for (j = 0; j < count; j++)
+ {
+ /* Construct the API message */
+ M2 (IP_ADD_DEL_ROUTE, mp,
+ sizeof (mpls_label_t) * vec_len (next_hop_out_label_stack));
+
+ mp->next_hop_sw_if_index = ntohl (sw_if_index);
+ mp->table_id = ntohl (vrf_id);
+ mp->create_vrf_if_needed = create_vrf_if_needed;
+
+ mp->is_add = is_add;
+ mp->is_drop = is_drop;
+ mp->is_unreach = is_unreach;
+ mp->is_prohibit = is_prohibit;
+ mp->is_ipv6 = is_ipv6;
+ mp->is_local = is_local;
+ mp->is_classify = is_classify;
+ mp->is_multipath = is_multipath;
+ mp->is_resolve_host = resolve_host;
+ mp->is_resolve_attached = resolve_attached;
+ mp->not_last = not_last;
+ mp->next_hop_weight = next_hop_weight;
+ mp->dst_address_length = dst_address_length;
+ mp->next_hop_table_id = ntohl (next_hop_table_id);
+ mp->classify_table_index = ntohl (classify_table_index);
+ mp->next_hop_via_label = ntohl (next_hop_via_label);
+ mp->next_hop_n_out_labels = vec_len (next_hop_out_label_stack);
+ if (0 != mp->next_hop_n_out_labels)
+ {
+ memcpy (mp->next_hop_out_label_stack,
+ next_hop_out_label_stack,
+ vec_len (next_hop_out_label_stack) * sizeof (mpls_label_t));
+ vec_free (next_hop_out_label_stack);
+ }
+
+ if (is_ipv6)
+ {
+ clib_memcpy (mp->dst_address, &v6_dst_address,
+ sizeof (v6_dst_address));
+ if (next_hop_set)
+ clib_memcpy (mp->next_hop_address, &v6_next_hop_address,
+ sizeof (v6_next_hop_address));
+ increment_v6_address (&v6_dst_address);
+ }
+ else
+ {
+ clib_memcpy (mp->dst_address, &v4_dst_address,
+ sizeof (v4_dst_address));
+ if (next_hop_set)
+ clib_memcpy (mp->next_hop_address, &v4_next_hop_address,
+ sizeof (v4_next_hop_address));
+ if (random_add_del)
+ v4_dst_address.as_u32 = random_vector[j + 1];
+ else
+ increment_v4_address (&v4_dst_address);
+ }
+ /* send it... */
+ S (mp);
+ /* If we receive SIGTERM, stop now... */
+ if (vam->do_exit)
+ break;
+ }
+
+ /* When testing multiple add/del ops, use a control-ping to sync */
+ if (count > 1)
+ {
+ vl_api_control_ping_t *mp_ping;
+ f64 after;
+ f64 timeout;
+
+ /* Shut off async mode */
+ vam->async_mode = 0;
+
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ timeout = vat_time_now (vam) + 1.0;
+ while (vat_time_now (vam) < timeout)
+ if (vam->result_ready == 1)
+ goto out;
+ vam->retval = -99;
+
+ out:
+ if (vam->retval == -99)
+ errmsg ("timeout");
+
+ if (vam->async_errors > 0)
+ {
+ errmsg ("%d asynchronous errors", vam->async_errors);
+ vam->retval = -98;
+ }
+ vam->async_errors = 0;
+ after = vat_time_now (vam);
+
+ /* slim chance, but we might have eaten SIGTERM on the first iteration */
+ if (j > 0)
+ count = j;
+
+ print (vam->ofp, "%d routes in %.6f secs, %.2f routes/sec",
+ count, after - before, count / (after - before));
+ }
+ else
+ {
+ int ret;
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+ }
+
+ /* Return the good/bad news */
+ return (vam->retval);
+}
+
+static int
+api_ip_mroute_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_mroute_add_del_t *mp;
+ u32 sw_if_index = ~0, vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 is_local = 0;
+ u8 create_vrf_if_needed = 0;
+ u8 is_add = 1;
+ u8 address_set = 0;
+ u32 grp_address_length = 0;
+ ip4_address_t v4_grp_address, v4_src_address;
+ ip6_address_t v6_grp_address, v6_src_address;
+ mfib_itf_flags_t iflags = 0;
+ mfib_entry_flags_t eflags = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%U %U",
+ unformat_ip4_address, &v4_src_address,
+ unformat_ip4_address, &v4_grp_address))
+ {
+ grp_address_length = 64;
+ address_set = 1;
+ is_ipv6 = 0;
+ }
+ else if (unformat (i, "%U %U",
+ unformat_ip6_address, &v6_src_address,
+ unformat_ip6_address, &v6_grp_address))
+ {
+ grp_address_length = 256;
+ address_set = 1;
+ is_ipv6 = 1;
+ }
+ else if (unformat (i, "%U", unformat_ip4_address, &v4_grp_address))
+ {
+ memset (&v4_src_address, 0, sizeof (v4_src_address));
+ grp_address_length = 32;
+ address_set = 1;
+ is_ipv6 = 0;
+ }
+ else if (unformat (i, "%U", unformat_ip6_address, &v6_grp_address))
+ {
+ memset (&v6_src_address, 0, sizeof (v6_src_address));
+ grp_address_length = 128;
+ address_set = 1;
+ is_ipv6 = 1;
+ }
+ else if (unformat (i, "/%d", &grp_address_length))
+ ;
+ else if (unformat (i, "local"))
+ {
+ is_local = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "create-vrf"))
+ create_vrf_if_needed = 1;
+ else if (unformat (i, "%U", unformat_mfib_itf_flags, &iflags))
+ ;
+ else if (unformat (i, "%U", unformat_mfib_entry_flags, &eflags))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (address_set == 0)
+ {
+ errmsg ("missing addresses\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_MROUTE_ADD_DEL, mp);
+
+ mp->next_hop_sw_if_index = ntohl (sw_if_index);
+ mp->table_id = ntohl (vrf_id);
+ mp->create_vrf_if_needed = create_vrf_if_needed;
+
+ mp->is_add = is_add;
+ mp->is_ipv6 = is_ipv6;
+ mp->is_local = is_local;
+ mp->itf_flags = ntohl (iflags);
+ mp->entry_flags = ntohl (eflags);
+ mp->grp_address_length = grp_address_length;
+ mp->grp_address_length = ntohs (mp->grp_address_length);
+
+ if (is_ipv6)
+ {
+ clib_memcpy (mp->grp_address, &v6_grp_address, sizeof (v6_grp_address));
+ clib_memcpy (mp->src_address, &v6_src_address, sizeof (v6_src_address));
+ }
+ else
+ {
+ clib_memcpy (mp->grp_address, &v4_grp_address, sizeof (v4_grp_address));
+ clib_memcpy (mp->src_address, &v4_src_address, sizeof (v4_src_address));
+
+ }
+
+ /* send it... */
+ S (mp);
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_mpls_table_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_mpls_table_add_del_t *mp;
+ u32 table_id = ~0;
+ u8 is_add = 1;
+ int ret = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "table %d", &table_id))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ errmsg ("missing table-ID");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (MPLS_TABLE_ADD_DEL, mp);
+
+ mp->mt_table_id = ntohl (table_id);
+ mp->mt_is_add = is_add;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
+api_mpls_route_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_mpls_route_add_del_t *mp;
+ u32 sw_if_index = ~0, table_id = 0;
+ u8 create_table_if_needed = 0;
+ u8 is_add = 1;
+ u32 next_hop_weight = 1;
+ u8 is_multipath = 0;
+ u32 next_hop_table_id = 0;
+ u8 next_hop_set = 0;
+ ip4_address_t v4_next_hop_address = {
+ .as_u32 = 0,
+ };
+ ip6_address_t v6_next_hop_address = { {0} };
+ int count = 1;
+ int j;
+ f64 before = 0;
+ u32 classify_table_index = ~0;
+ u8 is_classify = 0;
+ u8 resolve_host = 0, resolve_attached = 0;
+ mpls_label_t next_hop_via_label = MPLS_LABEL_INVALID;
+ mpls_label_t next_hop_out_label = MPLS_LABEL_INVALID;
+ mpls_label_t *next_hop_out_label_stack = NULL;
+ mpls_label_t local_label = MPLS_LABEL_INVALID;
+ u8 is_eos = 0;
+ dpo_proto_t next_hop_proto = DPO_PROTO_IP4;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%d", &local_label))
+ ;
+ else if (unformat (i, "eos"))
+ is_eos = 1;
+ else if (unformat (i, "non-eos"))
+ is_eos = 0;
+ else if (unformat (i, "via %U", unformat_ip4_address,
+ &v4_next_hop_address))
+ {
+ next_hop_set = 1;
+ next_hop_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (i, "via %U", unformat_ip6_address,
+ &v6_next_hop_address))
+ {
+ next_hop_set = 1;
+ next_hop_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (i, "weight %d", &next_hop_weight))
+ ;
+ else if (unformat (i, "create-table"))
+ create_table_if_needed = 1;
+ else if (unformat (i, "classify %d", &classify_table_index))
+ {
+ is_classify = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "resolve-via-host"))
+ resolve_host = 1;
+ else if (unformat (i, "resolve-via-attached"))
+ resolve_attached = 1;
+ else if (unformat (i, "multipath"))
+ is_multipath = 1;
+ else if (unformat (i, "count %d", &count))
+ ;
+ else if (unformat (i, "lookup-in-ip4-table %d", &next_hop_table_id))
+ {
+ next_hop_set = 1;
+ next_hop_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (i, "lookup-in-ip6-table %d", &next_hop_table_id))
+ {
+ next_hop_set = 1;
+ next_hop_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (i, "next-hop-table %d", &next_hop_table_id))
+ ;
+ else if (unformat (i, "via-label %d", &next_hop_via_label))
+ ;
+ else if (unformat (i, "out-label %d", &next_hop_out_label))
+ vec_add1 (next_hop_out_label_stack, ntohl (next_hop_out_label));
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!next_hop_set && !is_classify)
+ {
+ errmsg ("next hop / classify not set");
+ return -99;
+ }
+
+ if (MPLS_LABEL_INVALID == local_label)
+ {
+ errmsg ("missing label");
+ return -99;
+ }
+
+ if (count > 1)
+ {
+ /* Turn on async mode */
+ vam->async_mode = 1;
+ vam->async_errors = 0;
+ before = vat_time_now (vam);
+ }
+
+ for (j = 0; j < count; j++)
+ {
+ /* Construct the API message */
+ M2 (MPLS_ROUTE_ADD_DEL, mp,
+ sizeof (mpls_label_t) * vec_len (next_hop_out_label_stack));
+
+ mp->mr_next_hop_sw_if_index = ntohl (sw_if_index);
+ mp->mr_table_id = ntohl (table_id);
+ mp->mr_create_table_if_needed = create_table_if_needed;
+
+ mp->mr_is_add = is_add;
+ mp->mr_next_hop_proto = next_hop_proto;
+ mp->mr_is_classify = is_classify;
+ mp->mr_is_multipath = is_multipath;
+ mp->mr_is_resolve_host = resolve_host;
+ mp->mr_is_resolve_attached = resolve_attached;
+ mp->mr_next_hop_weight = next_hop_weight;
+ mp->mr_next_hop_table_id = ntohl (next_hop_table_id);
+ mp->mr_classify_table_index = ntohl (classify_table_index);
+ mp->mr_next_hop_via_label = ntohl (next_hop_via_label);
+ mp->mr_label = ntohl (local_label);
+ mp->mr_eos = is_eos;
+
+ mp->mr_next_hop_n_out_labels = vec_len (next_hop_out_label_stack);
+ if (0 != mp->mr_next_hop_n_out_labels)
+ {
+ memcpy (mp->mr_next_hop_out_label_stack,
+ next_hop_out_label_stack,
+ vec_len (next_hop_out_label_stack) * sizeof (mpls_label_t));
+ vec_free (next_hop_out_label_stack);
+ }
+
+ if (next_hop_set)
+ {
+ if (DPO_PROTO_IP4 == next_hop_proto)
+ {
+ clib_memcpy (mp->mr_next_hop,
+ &v4_next_hop_address,
+ sizeof (v4_next_hop_address));
+ }
+ else if (DPO_PROTO_IP6 == next_hop_proto)
+
+ {
+ clib_memcpy (mp->mr_next_hop,
+ &v6_next_hop_address,
+ sizeof (v6_next_hop_address));
+ }
+ }
+ local_label++;
+
+ /* send it... */
+ S (mp);
+ /* If we receive SIGTERM, stop now... */
+ if (vam->do_exit)
+ break;
+ }
+
+ /* When testing multiple add/del ops, use a control-ping to sync */
+ if (count > 1)
+ {
+ vl_api_control_ping_t *mp_ping;
+ f64 after;
+ f64 timeout;
+
+ /* Shut off async mode */
+ vam->async_mode = 0;
+
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ timeout = vat_time_now (vam) + 1.0;
+ while (vat_time_now (vam) < timeout)
+ if (vam->result_ready == 1)
+ goto out;
+ vam->retval = -99;
+
+ out:
+ if (vam->retval == -99)
+ errmsg ("timeout");
+
+ if (vam->async_errors > 0)
+ {
+ errmsg ("%d asynchronous errors", vam->async_errors);
+ vam->retval = -98;
+ }
+ vam->async_errors = 0;
+ after = vat_time_now (vam);
+
+ /* slim chance, but we might have eaten SIGTERM on the first iteration */
+ if (j > 0)
+ count = j;
+
+ print (vam->ofp, "%d routes in %.6f secs, %.2f routes/sec",
+ count, after - before, count / (after - before));
+ }
+ else
+ {
+ int ret;
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+ }
+
+ /* Return the good/bad news */
+ return (vam->retval);
+}
+
+static int
+api_mpls_ip_bind_unbind (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_mpls_ip_bind_unbind_t *mp;
+ u32 ip_table_id = 0;
+ u8 create_table_if_needed = 0;
+ u8 is_bind = 1;
+ u8 is_ip4 = 1;
+ ip4_address_t v4_address;
+ ip6_address_t v6_address;
+ u32 address_length;
+ u8 address_set = 0;
+ mpls_label_t local_label = MPLS_LABEL_INVALID;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U/%d", unformat_ip4_address,
+ &v4_address, &address_length))
+ {
+ is_ip4 = 1;
+ address_set = 1;
+ }
+ else if (unformat (i, "%U/%d", unformat_ip6_address,
+ &v6_address, &address_length))
+ {
+ is_ip4 = 0;
+ address_set = 1;
+ }
+ else if (unformat (i, "%d", &local_label))
+ ;
+ else if (unformat (i, "create-table"))
+ create_table_if_needed = 1;
+ else if (unformat (i, "table-id %d", &ip_table_id))
+ ;
+ else if (unformat (i, "unbind"))
+ is_bind = 0;
+ else if (unformat (i, "bind"))
+ is_bind = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!address_set)
+ {
+ errmsg ("IP addres not set");
+ return -99;
+ }
+
+ if (MPLS_LABEL_INVALID == local_label)
+ {
+ errmsg ("missing label");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (MPLS_IP_BIND_UNBIND, mp);
+
+ mp->mb_create_table_if_needed = create_table_if_needed;
+ mp->mb_is_bind = is_bind;
+ mp->mb_is_ip4 = is_ip4;
+ mp->mb_ip_table_id = ntohl (ip_table_id);
+ mp->mb_mpls_table_id = 0;
+ mp->mb_label = ntohl (local_label);
+ mp->mb_address_length = address_length;
+
+ if (is_ip4)
+ clib_memcpy (mp->mb_address, &v4_address, sizeof (v4_address));
+ else
+ clib_memcpy (mp->mb_address, &v6_address, sizeof (v6_address));
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_proxy_arp_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_proxy_arp_add_del_t *mp;
+ u32 vrf_id = 0;
+ u8 is_add = 1;
+ ip4_address_t lo, hi;
+ u8 range_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "%U - %U", unformat_ip4_address, &lo,
+ unformat_ip4_address, &hi))
+ range_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (range_set == 0)
+ {
+ errmsg ("address range not set");
+ return -99;
+ }
+
+ M (PROXY_ARP_ADD_DEL, mp);
+
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_add = is_add;
+ clib_memcpy (mp->low_address, &lo, sizeof (mp->low_address));
+ clib_memcpy (mp->hi_address, &hi, sizeof (mp->hi_address));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_proxy_arp_intfc_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_proxy_arp_intfc_enable_disable_t *mp;
+ u32 sw_if_index;
+ u8 enable = 1;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (PROXY_ARP_INTFC_ENABLE_DISABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_mpls_tunnel_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_mpls_tunnel_add_del_t *mp;
+
+ u8 is_add = 1;
+ u8 l2_only = 0;
+ u32 sw_if_index = ~0;
+ u32 next_hop_sw_if_index = ~0;
+ u32 next_hop_proto_is_ip4 = 1;
+
+ u32 next_hop_table_id = 0;
+ ip4_address_t v4_next_hop_address = {
+ .as_u32 = 0,
+ };
+ ip6_address_t v6_next_hop_address = { {0} };
+ mpls_label_t next_hop_out_label = MPLS_LABEL_INVALID, *labels = NULL;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del sw_if_index %d", &sw_if_index))
+ is_add = 0;
+ else if (unformat (i, "sw_if_index %d", &next_hop_sw_if_index))
+ ;
+ else if (unformat (i, "via %U",
+ unformat_ip4_address, &v4_next_hop_address))
+ {
+ next_hop_proto_is_ip4 = 1;
+ }
+ else if (unformat (i, "via %U",
+ unformat_ip6_address, &v6_next_hop_address))
+ {
+ next_hop_proto_is_ip4 = 0;
+ }
+ else if (unformat (i, "l2-only"))
+ l2_only = 1;
+ else if (unformat (i, "next-hop-table %d", &next_hop_table_id))
+ ;
+ else if (unformat (i, "out-label %d", &next_hop_out_label))
+ vec_add1 (labels, ntohl (next_hop_out_label));
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M2 (MPLS_TUNNEL_ADD_DEL, mp, sizeof (mpls_label_t) * vec_len (labels));
+
+ mp->mt_next_hop_sw_if_index = ntohl (next_hop_sw_if_index);
+ mp->mt_sw_if_index = ntohl (sw_if_index);
+ mp->mt_is_add = is_add;
+ mp->mt_l2_only = l2_only;
+ mp->mt_next_hop_table_id = ntohl (next_hop_table_id);
+ mp->mt_next_hop_proto_is_ip4 = next_hop_proto_is_ip4;
+
+ mp->mt_next_hop_n_out_labels = vec_len (labels);
+
+ if (0 != mp->mt_next_hop_n_out_labels)
+ {
+ clib_memcpy (mp->mt_next_hop_out_label_stack, labels,
+ sizeof (mpls_label_t) * mp->mt_next_hop_n_out_labels);
+ vec_free (labels);
+ }
+
+ if (next_hop_proto_is_ip4)
+ {
+ clib_memcpy (mp->mt_next_hop,
+ &v4_next_hop_address, sizeof (v4_next_hop_address));
+ }
+ else
+ {
+ clib_memcpy (mp->mt_next_hop,
+ &v6_next_hop_address, sizeof (v6_next_hop_address));
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_unnumbered (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_unnumbered_t *mp;
+ u32 sw_if_index;
+ u32 unnum_sw_index = ~0;
+ u8 is_add = 1;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "unnum_if_index %d", &unnum_sw_index))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_UNNUMBERED, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->unnumbered_sw_if_index = ntohl (unnum_sw_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_neighbor_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_neighbor_add_del_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1;
+ u8 is_static = 0;
+ u8 is_no_fib_entry = 0;
+ u8 mac_address[6];
+ u8 mac_set = 0;
+ u8 v4_address_set = 0;
+ u8 v6_address_set = 0;
+ ip4_address_t v4address;
+ ip6_address_t v6address;
+ int ret;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, mac_address))
+ {
+ mac_set = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "is_static"))
+ is_static = 1;
+ else if (unformat (i, "no-fib-entry"))
+ is_no_fib_entry = 1;
+ else if (unformat (i, "dst %U", unformat_ip4_address, &v4address))
+ v4_address_set = 1;
+ else if (unformat (i, "dst %U", unformat_ip6_address, &v6address))
+ v6_address_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (v4_address_set && v6_address_set)
+ {
+ errmsg ("both v4 and v6 addresses set");
+ return -99;
+ }
+ if (!v4_address_set && !v6_address_set)
+ {
+ errmsg ("no address set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_NEIGHBOR_ADD_DEL, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ mp->is_static = is_static;
+ mp->is_no_adj_fib = is_no_fib_entry;
+ if (mac_set)
+ clib_memcpy (mp->mac_address, mac_address, 6);
+ if (v6_address_set)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->dst_address, &v6address, sizeof (v6address));
+ }
+ else
+ {
+ /* mp->is_ipv6 = 0; via memset in M macro above */
+ clib_memcpy (mp->dst_address, &v4address, sizeof (v4address));
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_reset_vrf (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_reset_vrf_t *mp;
+ u32 vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 vrf_id_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ vrf_id_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (vrf_id_set == 0)
+ {
+ errmsg ("missing vrf id");
+ return -99;
+ }
+
+ M (RESET_VRF, mp);
+
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_create_vlan_subif (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_vlan_subif_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 vlan_id;
+ u8 vlan_id_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vlan %d", &vlan_id))
+ vlan_id_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (vlan_id_set == 0)
+ {
+ errmsg ("missing vlan_id");
+ return -99;
+ }
+ M (CREATE_VLAN_SUBIF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vlan_id = ntohl (vlan_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#define foreach_create_subif_bit \
+_(no_tags) \
+_(one_tag) \
+_(two_tags) \
+_(dot1ad) \
+_(exact_match) \
+_(default_sub) \
+_(outer_vlan_id_any) \
+_(inner_vlan_id_any)
+
+static int
+api_create_subif (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_subif_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 sub_id;
+ u8 sub_id_set = 0;
+ u32 no_tags = 0;
+ u32 one_tag = 0;
+ u32 two_tags = 0;
+ u32 dot1ad = 0;
+ u32 exact_match = 0;
+ u32 default_sub = 0;
+ u32 outer_vlan_id_any = 0;
+ u32 inner_vlan_id_any = 0;
+ u32 tmp;
+ u16 outer_vlan_id = 0;
+ u16 inner_vlan_id = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sub_id %d", &sub_id))
+ sub_id_set = 1;
+ else if (unformat (i, "outer_vlan_id %d", &tmp))
+ outer_vlan_id = tmp;
+ else if (unformat (i, "inner_vlan_id %d", &tmp))
+ inner_vlan_id = tmp;
+
+#define _(a) else if (unformat (i, #a)) a = 1 ;
+ foreach_create_subif_bit
+#undef _
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (sub_id_set == 0)
+ {
+ errmsg ("missing sub_id");
+ return -99;
+ }
+ M (CREATE_SUBIF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->sub_id = ntohl (sub_id);
+
+#define _(a) mp->a = a;
+ foreach_create_subif_bit;
+#undef _
+
+ mp->outer_vlan_id = ntohs (outer_vlan_id);
+ mp->inner_vlan_id = ntohs (inner_vlan_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_oam_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_oam_add_del_t *mp;
+ u32 vrf_id = 0;
+ u8 is_add = 1;
+ ip4_address_t src, dst;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "src %U", unformat_ip4_address, &src))
+ src_set = 1;
+ else if (unformat (i, "dst %U", unformat_ip4_address, &dst))
+ dst_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ errmsg ("missing src addr");
+ return -99;
+ }
+
+ if (dst_set == 0)
+ {
+ errmsg ("missing dst addr");
+ return -99;
+ }
+
+ M (OAM_ADD_DEL, mp);
+
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_add = is_add;
+ clib_memcpy (mp->src_address, &src, sizeof (mp->src_address));
+ clib_memcpy (mp->dst_address, &dst, sizeof (mp->dst_address));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_reset_fib (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_reset_fib_t *mp;
+ u32 vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 vrf_id_set = 0;
+
+ int ret;
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ vrf_id_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (vrf_id_set == 0)
+ {
+ errmsg ("missing vrf id");
+ return -99;
+ }
+
+ M (RESET_FIB, mp);
+
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_dhcp_proxy_config (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_dhcp_proxy_config_t *mp;
+ u32 rx_vrf_id = 0;
+ u32 server_vrf_id = 0;
+ u8 is_add = 1;
+ u8 v4_address_set = 0;
+ u8 v6_address_set = 0;
+ ip4_address_t v4address;
+ ip6_address_t v6address;
+ u8 v4_src_address_set = 0;
+ u8 v6_src_address_set = 0;
+ ip4_address_t v4srcaddress;
+ ip6_address_t v6srcaddress;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "rx_vrf_id %d", &rx_vrf_id))
+ ;
+ else if (unformat (i, "server_vrf_id %d", &server_vrf_id))
+ ;
+ else if (unformat (i, "svr %U", unformat_ip4_address, &v4address))
+ v4_address_set = 1;
+ else if (unformat (i, "svr %U", unformat_ip6_address, &v6address))
+ v6_address_set = 1;
+ else if (unformat (i, "src %U", unformat_ip4_address, &v4srcaddress))
+ v4_src_address_set = 1;
+ else if (unformat (i, "src %U", unformat_ip6_address, &v6srcaddress))
+ v6_src_address_set = 1;
+ else
+ break;
+ }
+
+ if (v4_address_set && v6_address_set)
+ {
+ errmsg ("both v4 and v6 server addresses set");
+ return -99;
+ }
+ if (!v4_address_set && !v6_address_set)
+ {
+ errmsg ("no server addresses set");
+ return -99;
+ }
+
+ if (v4_src_address_set && v6_src_address_set)
+ {
+ errmsg ("both v4 and v6 src addresses set");
+ return -99;
+ }
+ if (!v4_src_address_set && !v6_src_address_set)
+ {
+ errmsg ("no src addresses set");
+ return -99;
+ }
+
+ if (!(v4_src_address_set && v4_address_set) &&
+ !(v6_src_address_set && v6_address_set))
+ {
+ errmsg ("no matching server and src addresses set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (DHCP_PROXY_CONFIG, mp);
+
+ mp->is_add = is_add;
+ mp->rx_vrf_id = ntohl (rx_vrf_id);
+ mp->server_vrf_id = ntohl (server_vrf_id);
+ if (v6_address_set)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->dhcp_server, &v6address, sizeof (v6address));
+ clib_memcpy (mp->dhcp_src_address, &v6srcaddress, sizeof (v6address));
+ }
+ else
+ {
+ clib_memcpy (mp->dhcp_server, &v4address, sizeof (v4address));
+ clib_memcpy (mp->dhcp_src_address, &v4srcaddress, sizeof (v4address));
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+#define vl_api_dhcp_proxy_details_t_endian vl_noop_handler
+#define vl_api_dhcp_proxy_details_t_print vl_noop_handler
+
+static void
+vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u32 i, count = mp->count;
+ vl_api_dhcp_server_t *s;
+
+ if (mp->is_ipv6)
+ print (vam->ofp,
+ "RX Table-ID %d, Source Address %U, VSS FIB-ID %d, VSS OUI %d",
+ ntohl (mp->rx_vrf_id),
+ format_ip6_address, mp->dhcp_src_address,
+ ntohl (mp->vss_oui), ntohl (mp->vss_fib_id));
+ else
+ print (vam->ofp,
+ "RX Table-ID %d, Source Address %U, VSS FIB-ID %d, VSS OUI %d",
+ ntohl (mp->rx_vrf_id),
+ format_ip4_address, mp->dhcp_src_address,
+ ntohl (mp->vss_oui), ntohl (mp->vss_fib_id));
+
+ for (i = 0; i < count; i++)
+ {
+ s = &mp->servers[i];
+
+ if (mp->is_ipv6)
+ print (vam->ofp,
+ " Server Table-ID %d, Server Address %U",
+ ntohl (s->server_vrf_id), format_ip6_address, s->dhcp_server);
+ else
+ print (vam->ofp,
+ " Server Table-ID %d, Server Address %U",
+ ntohl (s->server_vrf_id), format_ip4_address, s->dhcp_server);
+ }
+}
+
+static void vl_api_dhcp_proxy_details_t_handler_json
+ (vl_api_dhcp_proxy_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ u32 i, count = mp->count;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ vl_api_dhcp_server_t *s;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "rx-table-id", ntohl (mp->rx_vrf_id));
+ vat_json_object_add_uint (node, "vss-fib-id", ntohl (mp->vss_fib_id));
+ vat_json_object_add_uint (node, "vss-oui", ntohl (mp->vss_oui));
+
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, &mp->dhcp_src_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "src_address", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, &mp->dhcp_src_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "src_address", ip4);
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ s = &mp->servers[i];
+
+ vat_json_object_add_uint (node, "server-table-id",
+ ntohl (s->server_vrf_id));
+
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip4, &s->dhcp_server, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "src_address", ip4);
+ }
+ else
+ {
+ clib_memcpy (&ip6, &s->dhcp_server, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "server_address", ip6);
+ }
+ }
+}
+
+static int
+api_dhcp_proxy_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_control_ping_t *mp_ping;
+ vl_api_dhcp_proxy_dump_t *mp;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (DHCP_PROXY_DUMP, mp);
+
+ mp->is_ip6 = is_ipv6;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_dhcp_proxy_set_vss (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_dhcp_proxy_set_vss_t *mp;
+ u8 is_ipv6 = 0;
+ u8 is_add = 1;
+ u32 tbl_id;
+ u8 tbl_id_set = 0;
+ u32 oui;
+ u8 oui_set = 0;
+ u32 fib_id;
+ u8 fib_id_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "tbl_id %d", &tbl_id))
+ tbl_id_set = 1;
+ if (unformat (i, "fib_id %d", &fib_id))
+ fib_id_set = 1;
+ if (unformat (i, "oui %d", &oui))
+ oui_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (tbl_id_set == 0)
+ {
+ errmsg ("missing tbl id");
+ return -99;
+ }
+
+ if (fib_id_set == 0)
+ {
+ errmsg ("missing fib id");
+ return -99;
+ }
+ if (oui_set == 0)
+ {
+ errmsg ("missing oui");
+ return -99;
+ }
+
+ M (DHCP_PROXY_SET_VSS, mp);
+ mp->tbl_id = ntohl (tbl_id);
+ mp->fib_id = ntohl (fib_id);
+ mp->oui = ntohl (oui);
+ mp->is_ipv6 = is_ipv6;
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_dhcp_client_config (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_dhcp_client_config_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1;
+ u8 *hostname = 0;
+ u8 disable_event = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "hostname %s", &hostname))
+ ;
+ else if (unformat (i, "disable_event"))
+ disable_event = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (vec_len (hostname) > 63)
+ {
+ errmsg ("hostname too long");
+ }
+ vec_add1 (hostname, 0);
+
+ /* Construct the API message */
+ M (DHCP_CLIENT_CONFIG, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+ clib_memcpy (mp->hostname, hostname, vec_len (hostname));
+ vec_free (hostname);
+ mp->is_add = is_add;
+ mp->want_dhcp_event = disable_event ? 0 : 1;
+ mp->pid = htonl (getpid ());
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_set_ip_flow_hash (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_set_ip_flow_hash_t *mp;
+ u32 vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 vrf_id_set = 0;
+ u8 src = 0;
+ u8 dst = 0;
+ u8 sport = 0;
+ u8 dport = 0;
+ u8 proto = 0;
+ u8 reverse = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ vrf_id_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "src"))
+ src = 1;
+ else if (unformat (i, "dst"))
+ dst = 1;
+ else if (unformat (i, "sport"))
+ sport = 1;
+ else if (unformat (i, "dport"))
+ dport = 1;
+ else if (unformat (i, "proto"))
+ proto = 1;
+ else if (unformat (i, "reverse"))
+ reverse = 1;
+
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (vrf_id_set == 0)
+ {
+ errmsg ("missing vrf id");
+ return -99;
+ }
+
+ M (SET_IP_FLOW_HASH, mp);
+ mp->src = src;
+ mp->dst = dst;
+ mp->sport = sport;
+ mp->dport = dport;
+ mp->proto = proto;
+ mp->reverse = reverse;
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_ip6_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6_enable_disable_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 enable = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_IP6_ENABLE_DISABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_ip6_set_link_local_address (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6_set_link_local_address_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 v6_address_set = 0;
+ ip6_address_t v6address;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U", unformat_ip6_address, &v6address))
+ v6_address_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (!v6_address_set)
+ {
+ errmsg ("no address set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ clib_memcpy (mp->address, &v6address, sizeof (v6address));
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip6nd_proxy_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip6nd_proxy_add_del_t *mp;
+ u32 sw_if_index = ~0;
+ u8 v6_address_set = 0;
+ ip6_address_t v6address;
+ u8 is_del = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%U", unformat_ip6_address, &v6address))
+ v6_address_set = 1;
+ if (unformat (i, "del"))
+ is_del = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (!v6_address_set)
+ {
+ errmsg ("no address set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP6ND_PROXY_ADD_DEL, mp);
+
+ mp->is_del = is_del;
+ mp->sw_if_index = ntohl (sw_if_index);
+ clib_memcpy (mp->address, &v6address, sizeof (v6address));
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip6nd_proxy_dump (vat_main_t * vam)
+{
+ vl_api_ip6nd_proxy_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP6ND_PROXY_DUMP, mp);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_ip6nd_proxy_details_t_handler
+ (vl_api_ip6nd_proxy_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "host %U sw_if_index %d",
+ format_ip6_address, mp->address, ntohl (mp->sw_if_index));
+}
+
+static void vl_api_ip6nd_proxy_details_t_handler_json
+ (vl_api_ip6nd_proxy_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ struct in6_addr ip6;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ clib_memcpy (&ip6, mp->address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "host", ip6);
+}
+
+static int
+api_sw_interface_ip6nd_ra_prefix (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6nd_ra_prefix_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 address_length = 0;
+ u8 v6_address_set = 0;
+ ip6_address_t v6address;
+ u8 use_default = 0;
+ u8 no_advertise = 0;
+ u8 off_link = 0;
+ u8 no_autoconfig = 0;
+ u8 no_onlink = 0;
+ u8 is_no = 0;
+ u32 val_lifetime = 0;
+ u32 pref_lifetime = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U/%d",
+ unformat_ip6_address, &v6address, &address_length))
+ v6_address_set = 1;
+ else if (unformat (i, "val_life %d", &val_lifetime))
+ ;
+ else if (unformat (i, "pref_life %d", &pref_lifetime))
+ ;
+ else if (unformat (i, "def"))
+ use_default = 1;
+ else if (unformat (i, "noadv"))
+ no_advertise = 1;
+ else if (unformat (i, "offl"))
+ off_link = 1;
+ else if (unformat (i, "noauto"))
+ no_autoconfig = 1;
+ else if (unformat (i, "nolink"))
+ no_onlink = 1;
+ else if (unformat (i, "isno"))
+ is_no = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (!v6_address_set)
+ {
+ errmsg ("no address set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6ND_RA_PREFIX, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ clib_memcpy (mp->address, &v6address, sizeof (v6address));
+ mp->address_length = address_length;
+ mp->use_default = use_default;
+ mp->no_advertise = no_advertise;
+ mp->off_link = off_link;
+ mp->no_autoconfig = no_autoconfig;
+ mp->no_onlink = no_onlink;
+ mp->is_no = is_no;
+ mp->val_lifetime = ntohl (val_lifetime);
+ mp->pref_lifetime = ntohl (pref_lifetime);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_ip6nd_ra_config (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6nd_ra_config_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 suppress = 0;
+ u8 managed = 0;
+ u8 other = 0;
+ u8 ll_option = 0;
+ u8 send_unicast = 0;
+ u8 cease = 0;
+ u8 is_no = 0;
+ u8 default_router = 0;
+ u32 max_interval = 0;
+ u32 min_interval = 0;
+ u32 lifetime = 0;
+ u32 initial_count = 0;
+ u32 initial_interval = 0;
+ int ret;
+
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "maxint %d", &max_interval))
+ ;
+ else if (unformat (i, "minint %d", &min_interval))
+ ;
+ else if (unformat (i, "life %d", &lifetime))
+ ;
+ else if (unformat (i, "count %d", &initial_count))
+ ;
+ else if (unformat (i, "interval %d", &initial_interval))
+ ;
+ else if (unformat (i, "suppress") || unformat (i, "surpress"))
+ suppress = 1;
+ else if (unformat (i, "managed"))
+ managed = 1;
+ else if (unformat (i, "other"))
+ other = 1;
+ else if (unformat (i, "ll"))
+ ll_option = 1;
+ else if (unformat (i, "send"))
+ send_unicast = 1;
+ else if (unformat (i, "cease"))
+ cease = 1;
+ else if (unformat (i, "isno"))
+ is_no = 1;
+ else if (unformat (i, "def"))
+ default_router = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6ND_RA_CONFIG, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->max_interval = ntohl (max_interval);
+ mp->min_interval = ntohl (min_interval);
+ mp->lifetime = ntohl (lifetime);
+ mp->initial_count = ntohl (initial_count);
+ mp->initial_interval = ntohl (initial_interval);
+ mp->suppress = suppress;
+ mp->managed = managed;
+ mp->other = other;
+ mp->ll_option = ll_option;
+ mp->send_unicast = send_unicast;
+ mp->cease = cease;
+ mp->is_no = is_no;
+ mp->default_router = default_router;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_set_arp_neighbor_limit (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_set_arp_neighbor_limit_t *mp;
+ u32 arp_nbr_limit;
+ u8 limit_set = 0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "arp_nbr_limit %d", &arp_nbr_limit))
+ limit_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (limit_set == 0)
+ {
+ errmsg ("missing limit value");
+ return -99;
+ }
+
+ M (SET_ARP_NEIGHBOR_LIMIT, mp);
+
+ mp->arp_neighbor_limit = ntohl (arp_nbr_limit);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_patch_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_patch_add_del_t *mp;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 tx_sw_if_index;
+ u8 tx_sw_if_index_set = 0;
+ u8 is_add = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx_sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "tx_sw_if_index %d", &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ else if (unformat (i, "rx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam,
+ &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "tx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam,
+ &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or rx_sw_if_index");
+ return -99;
+ }
+
+ if (tx_sw_if_index_set == 0)
+ {
+ errmsg ("missing tx interface name or tx_sw_if_index");
+ return -99;
+ }
+
+ M (L2_PATCH_ADD_DEL, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->tx_sw_if_index = ntohl (tx_sw_if_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+u8 is_del;
+u8 localsid_addr[16];
+u8 end_psp;
+u8 behavior;
+u32 sw_if_index;
+u32 vlan_index;
+u32 fib_table;
+u8 nh_addr[16];
+
+static int
+api_sr_localsid_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_localsid_add_del_t *mp;
+
+ u8 is_del;
+ ip6_address_t localsid;
+ u8 end_psp = 0;
+ u8 behavior = ~0;
+ u32 sw_if_index;
+ u32 fib_table = ~(u32) 0;
+ ip6_address_t next_hop;
+
+ bool nexthop_set = 0;
+
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_del = 1;
+ else if (unformat (i, "address %U", unformat_ip6_address, &localsid));
+ else if (unformat (i, "next-hop %U", unformat_ip6_address, &next_hop))
+ nexthop_set = 1;
+ else if (unformat (i, "behavior %u", &behavior));
+ else if (unformat (i, "sw_if_index %u", &sw_if_index));
+ else if (unformat (i, "fib-table %u", &fib_table));
+ else if (unformat (i, "end.psp %u", &behavior));
+ else
+ break;
+ }
+
+ M (SR_LOCALSID_ADD_DEL, mp);
+
+ clib_memcpy (mp->localsid_addr, &localsid, sizeof (mp->localsid_addr));
+ if (nexthop_set)
+ clib_memcpy (mp->nh_addr, &next_hop, sizeof (mp->nh_addr));
+ mp->behavior = behavior;
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->fib_table = ntohl (fib_table);
+ mp->end_psp = end_psp;
+ mp->is_del = is_del;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ioam_enable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ioam_enable_t *mp;
+ u32 id = 0;
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_seqno_option = 0;
+ int has_analyse_option = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "seqno"))
+ has_seqno_option = 1;
+ else if (unformat (input, "analyse"))
+ has_analyse_option = 1;
+ else
+ break;
+ }
+ M (IOAM_ENABLE, mp);
+ mp->id = htons (id);
+ mp->seqno = has_seqno_option;
+ mp->analyse = has_analyse_option;
+ mp->pot_enable = has_pot_option;
+ mp->trace_enable = has_trace_option;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_ioam_disable (vat_main_t * vam)
+{
+ vl_api_ioam_disable_t *mp;
+ int ret;
+
+ M (IOAM_DISABLE, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#define foreach_tcp_proto_field \
+_(src_port) \
+_(dst_port)
+
+#define foreach_udp_proto_field \
+_(src_port) \
+_(dst_port)
+
+#define foreach_ip4_proto_field \
+_(src_address) \
+_(dst_address) \
+_(tos) \
+_(length) \
+_(fragment_id) \
+_(ttl) \
+_(protocol) \
+_(checksum)
+
+typedef struct
+{
+ u16 src_port, dst_port;
+} tcpudp_header_t;
+
+#if VPP_API_TEST_BUILTIN == 0
+uword
+unformat_tcp_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u8 *mask = 0;
+ u8 found_something = 0;
+ tcp_header_t *tcp;
+
+#define _(a) u8 a=0;
+ foreach_tcp_proto_field;
+#undef _
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0);
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_tcp_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_tcp_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*tcp) - 1);
+
+ tcp = (tcp_header_t *) mask;
+
+#define _(a) if (a) memset (&tcp->a, 0xff, sizeof (tcp->a));
+ foreach_tcp_proto_field;
+#undef _
+
+ *maskp = mask;
+ return 1;
+}
+
+uword
+unformat_udp_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u8 *mask = 0;
+ u8 found_something = 0;
+ udp_header_t *udp;
+
+#define _(a) u8 a=0;
+ foreach_udp_proto_field;
+#undef _
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0);
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_udp_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_udp_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*udp) - 1);
+
+ udp = (udp_header_t *) mask;
+
+#define _(a) if (a) memset (&udp->a, 0xff, sizeof (udp->a));
+ foreach_udp_proto_field;
+#undef _
+
+ *maskp = mask;
+ return 1;
+}
+
+uword
+unformat_l4_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u16 src_port = 0, dst_port = 0;
+ tcpudp_header_t *tcpudp;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "tcp %U", unformat_tcp_mask, maskp))
+ return 1;
+ else if (unformat (input, "udp %U", unformat_udp_mask, maskp))
+ return 1;
+ else if (unformat (input, "src_port"))
+ src_port = 0xFFFF;
+ else if (unformat (input, "dst_port"))
+ dst_port = 0xFFFF;
+ else
+ return 0;
+ }
+
+ if (!src_port && !dst_port)
+ return 0;
+
+ u8 *mask = 0;
+ vec_validate (mask, sizeof (tcpudp_header_t) - 1);
+
+ tcpudp = (tcpudp_header_t *) mask;
+ tcpudp->src_port = src_port;
+ tcpudp->dst_port = dst_port;
+
+ *maskp = mask;
+
+ return 1;
+}
+
+uword
+unformat_ip4_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u8 *mask = 0;
+ u8 found_something = 0;
+ ip4_header_t *ip;
+
+#define _(a) u8 a=0;
+ foreach_ip4_proto_field;
+#undef _
+ u8 version = 0;
+ u8 hdr_length = 0;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "hdr_length"))
+ hdr_length = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip4_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip4_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip4_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip4_proto_field;
+#undef _
+
+ ip->ip_version_and_header_length = 0;
+
+ if (version)
+ ip->ip_version_and_header_length |= 0xF0;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= 0x0F;
+
+ *maskp = mask;
+ return 1;
+}
+
+#define foreach_ip6_proto_field \
+_(src_address) \
+_(dst_address) \
+_(payload_length) \
+_(hop_limit) \
+_(protocol)
+
+uword
+unformat_ip6_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u8 *mask = 0;
+ u8 found_something = 0;
+ ip6_header_t *ip;
+ u32 ip_version_traffic_class_and_flow_label;
+
+#define _(a) u8 a=0;
+ foreach_ip6_proto_field;
+#undef _
+ u8 version = 0;
+ u8 traffic_class = 0;
+ u8 flow_label = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "traffic-class"))
+ traffic_class = 1;
+ else if (unformat (input, "flow-label"))
+ flow_label = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip6_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip6_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip6_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip6_proto_field;
+#undef _
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= 0xF0000000;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |= 0x0FF00000;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= 0x000FFFFF;
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ *maskp = mask;
+ return 1;
+}
+
+uword
+unformat_l3_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4 %U", unformat_ip4_mask, maskp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_mask, maskp))
+ return 1;
+ else
+ break;
+ }
+ return 0;
+}
+
+uword
+unformat_l2_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u8 *mask = 0;
+ u8 src = 0;
+ u8 dst = 0;
+ u8 proto = 0;
+ u8 tag1 = 0;
+ u8 tag2 = 0;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u8 dot1q = 0;
+ u8 dot1ad = 0;
+ int len = 14;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "src"))
+ src = 1;
+ else if (unformat (input, "dst"))
+ dst = 1;
+ else if (unformat (input, "proto"))
+ proto = 1;
+ else if (unformat (input, "tag1"))
+ tag1 = 1;
+ else if (unformat (input, "tag2"))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1"))
+ cos1 = 1;
+ else if (unformat (input, "cos2"))
+ cos2 = 1;
+ else if (unformat (input, "dot1q"))
+ dot1q = 1;
+ else if (unformat (input, "dot1ad"))
+ dot1ad = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 + dot1q + dot1ad +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1 || dot1q)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2 || dot1ad)
+ len = 22;
+
+ vec_validate (mask, len - 1);
+
+ if (dst)
+ memset (mask, 0xff, 6);
+
+ if (src)
+ memset (mask + 6, 0xff, 6);
+
+ if (tag2 || dot1ad)
+ {
+ /* inner vlan tag */
+ if (tag2)
+ {
+ mask[19] = 0xff;
+ mask[18] = 0x0f;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (proto)
+ mask[21] = mask[20] = 0xff;
+ if (tag1)
+ {
+ mask[15] = 0xff;
+ mask[14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ *maskp = mask;
+ return 1;
+ }
+ if (tag1 | dot1q)
+ {
+ if (tag1)
+ {
+ mask[15] = 0xff;
+ mask[14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[16] = mask[17] = 0xff;
+
+ *maskp = mask;
+ return 1;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[12] = mask[13] = 0xff;
+
+ *maskp = mask;
+ return 1;
+}
+
+uword
+unformat_classify_mask (unformat_input_t * input, va_list * args)
+{
+ u8 **maskp = va_arg (*args, u8 **);
+ u32 *skipp = va_arg (*args, u32 *);
+ u32 *matchp = va_arg (*args, u32 *);
+ u32 match;
+ u8 *mask = 0;
+ u8 *l2 = 0;
+ u8 *l3 = 0;
+ u8 *l4 = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hex %U", unformat_hex_string, &mask))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_mask, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_mask, &l3))
+ ;
+ else if (unformat (input, "l4 %U", unformat_l4_mask, &l4))
+ ;
+ else
+ break;
+ }
+
+ if (l4 && !l3)
+ {
+ vec_free (mask);
+ vec_free (l2);
+ vec_free (l4);
+ return 0;
+ }
+
+ if (mask || l2 || l3 || l4)
+ {
+ if (l2 || l3 || l4)
+ {
+ /* "With a free Ethernet header in every package" */
+ if (l2 == 0)
+ vec_validate (l2, 13);
+ mask = l2;
+ if (vec_len (l3))
+ {
+ vec_append (mask, l3);
+ vec_free (l3);
+ }
+ if (vec_len (l4))
+ {
+ vec_append (mask, l4);
+ vec_free (l4);
+ }
+ }
+
+ /* Scan forward looking for the first significant mask octet */
+ for (i = 0; i < vec_len (mask); i++)
+ if (mask[i])
+ break;
+
+ /* compute (skip, match) params */
+ *skipp = i / sizeof (u32x4);
+ vec_delete (mask, *skipp * sizeof (u32x4), 0);
+
+ /* Pad mask to an even multiple of the vector size */
+ while (vec_len (mask) % sizeof (u32x4))
+ vec_add1 (mask, 0);
+
+ match = vec_len (mask) / sizeof (u32x4);
+
+ for (i = match * sizeof (u32x4); i > 0; i -= sizeof (u32x4))
+ {
+ u64 *tmp = (u64 *) (mask + (i - sizeof (u32x4)));
+ if (*tmp || *(tmp + 1))
+ break;
+ match--;
+ }
+ if (match == 0)
+ clib_warning ("BUG: match 0");
+
+ _vec_len (mask) = match * sizeof (u32x4);
+
+ *matchp = match;
+ *maskp = mask;
+
+ return 1;
+ }
+
+ return 0;
+}
+#endif /* VPP_API_TEST_BUILTIN */
+
+#define foreach_l2_next \
+_(drop, DROP) \
+_(ethernet, ETHERNET_INPUT) \
+_(ip4, IP4_INPUT) \
+_(ip6, IP6_INPUT)
+
+uword
+unformat_l2_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 *miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = L2_INPUT_CLASSIFY_NEXT_##N; goto out;}
+ foreach_l2_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_ip_next \
+_(drop, DROP) \
+_(local, LOCAL) \
+_(rewrite, REWRITE)
+
+uword
+api_unformat_ip_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 *miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = IP_LOOKUP_NEXT_##N; goto out;}
+ foreach_ip_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_acl_next \
+_(deny, DENY)
+
+uword
+api_unformat_acl_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 *miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = ACL_NEXT_INDEX_##N; goto out;}
+ foreach_acl_next;
+#undef _
+
+ if (unformat (input, "permit"))
+ {
+ next_index = ~0;
+ goto out;
+ }
+ else if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+uword
+unformat_policer_precolor (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (unformat (input, "conform-color"))
+ *r = POLICE_CONFORM;
+ else if (unformat (input, "exceed-color"))
+ *r = POLICE_EXCEED;
+ else
+ return 0;
+
+ return 1;
+}
+
+static int
+api_classify_add_del_table (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_classify_add_del_table_t *mp;
+
+ u32 nbuckets = 2;
+ u32 skip = ~0;
+ u32 match = ~0;
+ int is_add = 1;
+ int del_chain = 0;
+ u32 table_index = ~0;
+ u32 next_table_index = ~0;
+ u32 miss_next_index = ~0;
+ u32 memory_size = 32 << 20;
+ u8 *mask = 0;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "del-chain"))
+ {
+ is_add = 0;
+ del_chain = 1;
+ }
+ else if (unformat (i, "buckets %d", &nbuckets))
+ ;
+ else if (unformat (i, "memory_size %d", &memory_size))
+ ;
+ else if (unformat (i, "skip %d", &skip))
+ ;
+ else if (unformat (i, "match %d", &match))
+ ;
+ else if (unformat (i, "table %d", &table_index))
+ ;
+ else if (unformat (i, "mask %U", unformat_classify_mask,
+ &mask, &skip, &match))
+ ;
+ else if (unformat (i, "next-table %d", &next_table_index))
+ ;
+ else if (unformat (i, "miss-next %U", api_unformat_ip_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (i, "l2-miss-next %U", unformat_l2_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (i, "acl-miss-next %U", api_unformat_acl_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (i, "current-data-flag %d", &current_data_flag))
+ ;
+ else if (unformat (i, "current-data-offset %d", &current_data_offset))
+ ;
+ else
+ break;
+ }
+
+ if (is_add && mask == 0)
+ {
+ errmsg ("Mask required");
+ return -99;
+ }
+
+ if (is_add && skip == ~0)
+ {
+ errmsg ("skip count required");
+ return -99;
+ }
+
+ if (is_add && match == ~0)
+ {
+ errmsg ("match count required");
+ return -99;
+ }
+
+ if (!is_add && table_index == ~0)
+ {
+ errmsg ("table index required for delete");
+ return -99;
+ }
+
+ M2 (CLASSIFY_ADD_DEL_TABLE, mp, vec_len (mask));
+
+ mp->is_add = is_add;
+ mp->del_chain = del_chain;
+ mp->table_index = ntohl (table_index);
+ mp->nbuckets = ntohl (nbuckets);
+ mp->memory_size = ntohl (memory_size);
+ mp->skip_n_vectors = ntohl (skip);
+ mp->match_n_vectors = ntohl (match);
+ mp->next_table_index = ntohl (next_table_index);
+ mp->miss_next_index = ntohl (miss_next_index);
+ mp->current_data_flag = ntohl (current_data_flag);
+ mp->current_data_offset = ntohl (current_data_offset);
+ clib_memcpy (mp->mask, mask, vec_len (mask));
+
+ vec_free (mask);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#if VPP_API_TEST_BUILTIN == 0
+uword
+unformat_l4_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+
+ u8 *proto_header = 0;
+ int src_port = 0;
+ int dst_port = 0;
+
+ tcpudp_header_t h;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "src_port %d", &src_port))
+ ;
+ else if (unformat (input, "dst_port %d", &dst_port))
+ ;
+ else
+ return 0;
+ }
+
+ h.src_port = clib_host_to_net_u16 (src_port);
+ h.dst_port = clib_host_to_net_u16 (dst_port);
+ vec_validate (proto_header, sizeof (h) - 1);
+ memcpy (proto_header, &h, sizeof (h));
+
+ *matchp = proto_header;
+
+ return 1;
+}
+
+uword
+unformat_ip4_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+ u8 *match = 0;
+ ip4_header_t *ip;
+ int version = 0;
+ u32 version_val;
+ int hdr_length = 0;
+ u32 hdr_length_val;
+ int src = 0, dst = 0;
+ ip4_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int tos = 0;
+ u32 tos_val;
+ int length = 0;
+ u32 length_val;
+ int fragment_id = 0;
+ u32 fragment_id_val;
+ int ttl = 0;
+ int ttl_val;
+ int checksum = 0;
+ u32 checksum_val;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "hdr_length %d", &hdr_length_val))
+ hdr_length = 1;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip4_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "tos %d", &tos_val))
+ tos = 1;
+ else if (unformat (input, "length %d", &length_val))
+ length = 1;
+ else if (unformat (input, "fragment_id %d", &fragment_id_val))
+ fragment_id = 1;
+ else if (unformat (input, "ttl %d", &ttl_val))
+ ttl = 1;
+ else if (unformat (input, "checksum %d", &checksum_val))
+ checksum = 1;
+ else
+ break;
+ }
+
+ if (version + hdr_length + src + dst + proto + tos + length + fragment_id
+ + ttl + checksum == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof (u32x4));
+
+ ip = (ip4_header_t *) match;
+
+ /* These are realistically matched in practice */
+ if (src)
+ ip->src_address.as_u32 = src_val.as_u32;
+
+ if (dst)
+ ip->dst_address.as_u32 = dst_val.as_u32;
+
+ if (proto)
+ ip->protocol = proto_val;
+
+
+ /* These are not, but they're included for completeness */
+ if (version)
+ ip->ip_version_and_header_length |= (version_val & 0xF) << 4;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= (hdr_length_val & 0xF);
+
+ if (tos)
+ ip->tos = tos_val;
+
+ if (length)
+ ip->length = clib_host_to_net_u16 (length_val);
+
+ if (ttl)
+ ip->ttl = ttl_val;
+
+ if (checksum)
+ ip->checksum = clib_host_to_net_u16 (checksum_val);
+
+ *matchp = match;
+ return 1;
+}
+
+uword
+unformat_ip6_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+ u8 *match = 0;
+ ip6_header_t *ip;
+ int version = 0;
+ u32 version_val;
+ u8 traffic_class = 0;
+ u32 traffic_class_val = 0;
+ u8 flow_label = 0;
+ u8 flow_label_val;
+ int src = 0, dst = 0;
+ ip6_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int payload_length = 0;
+ u32 payload_length_val;
+ int hop_limit = 0;
+ int hop_limit_val;
+ u32 ip_version_traffic_class_and_flow_label;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "traffic_class %d", &traffic_class_val))
+ traffic_class = 1;
+ else if (unformat (input, "flow_label %d", &flow_label_val))
+ flow_label = 1;
+ else if (unformat (input, "src %U", unformat_ip6_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip6_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "payload_length %d", &payload_length_val))
+ payload_length = 1;
+ else if (unformat (input, "hop_limit %d", &hop_limit_val))
+ hop_limit = 1;
+ else
+ break;
+ }
+
+ if (version + traffic_class + flow_label + src + dst + proto +
+ payload_length + hop_limit == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof (u32x4));
+
+ ip = (ip6_header_t *) match;
+
+ if (src)
+ clib_memcpy (&ip->src_address, &src_val, sizeof (ip->src_address));
+
+ if (dst)
+ clib_memcpy (&ip->dst_address, &dst_val, sizeof (ip->dst_address));
+
+ if (proto)
+ ip->protocol = proto_val;
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= (version_val & 0xF) << 28;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |=
+ (traffic_class_val & 0xFF) << 20;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= (flow_label_val & 0xFFFFF);
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ if (payload_length)
+ ip->payload_length = clib_host_to_net_u16 (payload_length_val);
+
+ if (hop_limit)
+ ip->hop_limit = hop_limit_val;
+
+ *matchp = match;
+ return 1;
+}
+
+uword
+unformat_l3_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4 %U", unformat_ip4_match, matchp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_match, matchp))
+ return 1;
+ else
+ break;
+ }
+ return 0;
+}
+
+uword
+unformat_vlan_tag (unformat_input_t * input, va_list * args)
+{
+ u8 *tagp = va_arg (*args, u8 *);
+ u32 tag;
+
+ if (unformat (input, "%d", &tag))
+ {
+ tagp[0] = (tag >> 8) & 0x0F;
+ tagp[1] = tag & 0xFF;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_l2_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+ u8 *match = 0;
+ u8 src = 0;
+ u8 src_val[6];
+ u8 dst = 0;
+ u8 dst_val[6];
+ u8 proto = 0;
+ u16 proto_val;
+ u8 tag1 = 0;
+ u8 tag1_val[2];
+ u8 tag2 = 0;
+ u8 tag2_val[2];
+ int len = 14;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u32 cos1_val = 0;
+ u32 cos2_val = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "src %U", unformat_ethernet_address, &src_val))
+ src = 1;
+ else
+ if (unformat (input, "dst %U", unformat_ethernet_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %U",
+ unformat_ethernet_type_host_byte_order, &proto_val))
+ proto = 1;
+ else if (unformat (input, "tag1 %U", unformat_vlan_tag, tag1_val))
+ tag1 = 1;
+ else if (unformat (input, "tag2 %U", unformat_vlan_tag, tag2_val))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1 %d", &cos1_val))
+ cos1 = 1;
+ else if (unformat (input, "cos2 %d", &cos2_val))
+ cos2 = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2)
+ len = 22;
+
+ vec_validate_aligned (match, len - 1, sizeof (u32x4));
+
+ if (dst)
+ clib_memcpy (match, dst_val, 6);
+
+ if (src)
+ clib_memcpy (match + 6, src_val, 6);
+
+ if (tag2)
+ {
+ /* inner vlan tag */
+ match[19] = tag2_val[1];
+ match[18] = tag2_val[0];
+ if (cos2)
+ match[18] |= (cos2_val & 0x7) << 5;
+ if (proto)
+ {
+ match[21] = proto_val & 0xff;
+ match[20] = proto_val >> 8;
+ }
+ if (tag1)
+ {
+ match[15] = tag1_val[1];
+ match[14] = tag1_val[0];
+ }
+ if (cos1)
+ match[14] |= (cos1_val & 0x7) << 5;
+ *matchp = match;
+ return 1;
+ }
+ if (tag1)
+ {
+ match[15] = tag1_val[1];
+ match[14] = tag1_val[0];
+ if (proto)
+ {
+ match[17] = proto_val & 0xff;
+ match[16] = proto_val >> 8;
+ }
+ if (cos1)
+ match[14] |= (cos1_val & 0x7) << 5;
+
+ *matchp = match;
+ return 1;
+ }
+ if (cos2)
+ match[18] |= (cos2_val & 0x7) << 5;
+ if (cos1)
+ match[14] |= (cos1_val & 0x7) << 5;
+ if (proto)
+ {
+ match[13] = proto_val & 0xff;
+ match[12] = proto_val >> 8;
+ }
+
+ *matchp = match;
+ return 1;
+}
+#endif
+
+uword
+api_unformat_classify_match (unformat_input_t * input, va_list * args)
+{
+ u8 **matchp = va_arg (*args, u8 **);
+ u32 skip_n_vectors = va_arg (*args, u32);
+ u32 match_n_vectors = va_arg (*args, u32);
+
+ u8 *match = 0;
+ u8 *l2 = 0;
+ u8 *l3 = 0;
+ u8 *l4 = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hex %U", unformat_hex_string, &match))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_match, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_match, &l3))
+ ;
+ else if (unformat (input, "l4 %U", unformat_l4_match, &l4))
+ ;
+ else
+ break;
+ }
+
+ if (l4 && !l3)
+ {
+ vec_free (match);
+ vec_free (l2);
+ vec_free (l4);
+ return 0;
+ }
+
+ if (match || l2 || l3 || l4)
+ {
+ if (l2 || l3 || l4)
+ {
+ /* "Win a free Ethernet header in every packet" */
+ if (l2 == 0)
+ vec_validate_aligned (l2, 13, sizeof (u32x4));
+ match = l2;
+ if (vec_len (l3))
+ {
+ vec_append_aligned (match, l3, sizeof (u32x4));
+ vec_free (l3);
+ }
+ if (vec_len (l4))
+ {
+ vec_append_aligned (match, l4, sizeof (u32x4));
+ vec_free (l4);
+ }
+ }
+
+ /* Make sure the vector is big enough even if key is all 0's */
+ vec_validate_aligned
+ (match, ((match_n_vectors + skip_n_vectors) * sizeof (u32x4)) - 1,
+ sizeof (u32x4));
+
+ /* Set size, include skipped vectors */
+ _vec_len (match) = (match_n_vectors + skip_n_vectors) * sizeof (u32x4);
+
+ *matchp = match;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+api_classify_add_del_session (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_classify_add_del_session_t *mp;
+ int is_add = 1;
+ u32 table_index = ~0;
+ u32 hit_next_index = ~0;
+ u32 opaque_index = ~0;
+ u8 *match = 0;
+ i32 advance = 0;
+ u32 skip_n_vectors = 0;
+ u32 match_n_vectors = 0;
+ u32 action = 0;
+ u32 metadata = 0;
+ int ret;
+
+ /*
+ * Warning: you have to supply skip_n and match_n
+ * because the API client cant simply look at the classify
+ * table object.
+ */
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "hit-next %U", api_unformat_ip_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (i, "l2-hit-next %U", unformat_l2_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (i, "acl-hit-next %U", api_unformat_acl_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (i, "policer-hit-next %d", &hit_next_index))
+ ;
+ else if (unformat (i, "%U", unformat_policer_precolor, &opaque_index))
+ ;
+ else if (unformat (i, "opaque-index %d", &opaque_index))
+ ;
+ else if (unformat (i, "skip_n %d", &skip_n_vectors))
+ ;
+ else if (unformat (i, "match_n %d", &match_n_vectors))
+ ;
+ else if (unformat (i, "match %U", api_unformat_classify_match,
+ &match, skip_n_vectors, match_n_vectors))
+ ;
+ else if (unformat (i, "advance %d", &advance))
+ ;
+ else if (unformat (i, "table-index %d", &table_index))
+ ;
+ else if (unformat (i, "action set-ip4-fib-id %d", &metadata))
+ action = 1;
+ else if (unformat (i, "action set-ip6-fib-id %d", &metadata))
+ action = 2;
+ else if (unformat (i, "action %d", &action))
+ ;
+ else if (unformat (i, "metadata %d", &metadata))
+ ;
+ else
+ break;
+ }
+
+ if (table_index == ~0)
+ {
+ errmsg ("Table index required");
+ return -99;
+ }
+
+ if (is_add && match == 0)
+ {
+ errmsg ("Match value required");
+ return -99;
+ }
+
+ M2 (CLASSIFY_ADD_DEL_SESSION, mp, vec_len (match));
+
+ mp->is_add = is_add;
+ mp->table_index = ntohl (table_index);
+ mp->hit_next_index = ntohl (hit_next_index);
+ mp->opaque_index = ntohl (opaque_index);
+ mp->advance = ntohl (advance);
+ mp->action = action;
+ mp->metadata = ntohl (metadata);
+ clib_memcpy (mp->match, match, vec_len (match));
+ vec_free (match);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_classify_set_interface_ip_table (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_classify_set_interface_ip_table_t *mp;
+ u32 sw_if_index;
+ int sw_if_index_set;
+ u32 table_index = ~0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "table %d", &table_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+
+ M (CLASSIFY_SET_INTERFACE_IP_TABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->table_index = ntohl (table_index);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_classify_set_interface_l2_tables (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_classify_set_interface_l2_tables_t *mp;
+ u32 sw_if_index;
+ int sw_if_index_set;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ u32 is_input = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (i, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (i, "other-table %d", &other_table_index))
+ ;
+ else if (unformat (i, "is-input %d", &is_input))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+
+ M (CLASSIFY_SET_INTERFACE_L2_TABLES, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->ip4_table_index = ntohl (ip4_table_index);
+ mp->ip6_table_index = ntohl (ip6_table_index);
+ mp->other_table_index = ntohl (other_table_index);
+ mp->is_input = (u8) is_input;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_set_ipfix_exporter (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_set_ipfix_exporter_t *mp;
+ ip4_address_t collector_address;
+ u8 collector_address_set = 0;
+ u32 collector_port = ~0;
+ ip4_address_t src_address;
+ u8 src_address_set = 0;
+ u32 vrf_id = ~0;
+ u32 path_mtu = ~0;
+ u32 template_interval = ~0;
+ u8 udp_checksum = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "collector_address %U", unformat_ip4_address,
+ &collector_address))
+ collector_address_set = 1;
+ else if (unformat (i, "collector_port %d", &collector_port))
+ ;
+ else if (unformat (i, "src_address %U", unformat_ip4_address,
+ &src_address))
+ src_address_set = 1;
+ else if (unformat (i, "vrf_id %d", &vrf_id))
+ ;
+ else if (unformat (i, "path_mtu %d", &path_mtu))
+ ;
+ else if (unformat (i, "template_interval %d", &template_interval))
+ ;
+ else if (unformat (i, "udp_checksum"))
+ udp_checksum = 1;
+ else
+ break;
+ }
+
+ if (collector_address_set == 0)
+ {
+ errmsg ("collector_address required");
+ return -99;
+ }
+
+ if (src_address_set == 0)
+ {
+ errmsg ("src_address required");
+ return -99;
+ }
+
+ M (SET_IPFIX_EXPORTER, mp);
+
+ memcpy (mp->collector_address, collector_address.data,
+ sizeof (collector_address.data));
+ mp->collector_port = htons ((u16) collector_port);
+ memcpy (mp->src_address, src_address.data, sizeof (src_address.data));
+ mp->vrf_id = htonl (vrf_id);
+ mp->path_mtu = htonl (path_mtu);
+ mp->template_interval = htonl (template_interval);
+ mp->udp_checksum = udp_checksum;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_set_ipfix_classify_stream (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_set_ipfix_classify_stream_t *mp;
+ u32 domain_id = 0;
+ u32 src_port = UDP_DST_PORT_ipfix;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "domain %d", &domain_id))
+ ;
+ else if (unformat (i, "src_port %d", &src_port))
+ ;
+ else
+ {
+ errmsg ("unknown input `%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (SET_IPFIX_CLASSIFY_STREAM, mp);
+
+ mp->domain_id = htonl (domain_id);
+ mp->src_port = htons ((u16) src_port);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipfix_classify_table_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipfix_classify_table_add_del_t *mp;
+ int is_add = -1;
+ u32 classify_table_index = ~0;
+ u8 ip_version = 0;
+ u8 transport_protocol = 255;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "table %d", &classify_table_index))
+ ;
+ else if (unformat (i, "ip4"))
+ ip_version = 4;
+ else if (unformat (i, "ip6"))
+ ip_version = 6;
+ else if (unformat (i, "tcp"))
+ transport_protocol = 6;
+ else if (unformat (i, "udp"))
+ transport_protocol = 17;
+ else
+ {
+ errmsg ("unknown input `%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (is_add == -1)
+ {
+ errmsg ("expecting: add|del");
+ return -99;
+ }
+ if (classify_table_index == ~0)
+ {
+ errmsg ("classifier table not specified");
+ return -99;
+ }
+ if (ip_version == 0)
+ {
+ errmsg ("IP version not specified");
+ return -99;
+ }
+
+ M (IPFIX_CLASSIFY_TABLE_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+ mp->table_id = htonl (classify_table_index);
+ mp->ip_version = ip_version;
+ mp->transport_protocol = transport_protocol;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_get_node_index (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_get_node_index_t *mp;
+ u8 *name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node %s", &name))
+ ;
+ else
+ break;
+ }
+ if (name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+
+ M (GET_NODE_INDEX, mp);
+ clib_memcpy (mp->node_name, name, vec_len (name));
+ vec_free (name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_get_next_index (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_get_next_index_t *mp;
+ u8 *node_name = 0, *next_node_name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node-name %s", &node_name))
+ ;
+ else if (unformat (i, "next-node-name %s", &next_node_name))
+ break;
+ }
+
+ if (node_name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (node_name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+
+ if (next_node_name == 0)
+ {
+ errmsg ("next node name required");
+ return -99;
+ }
+ if (vec_len (next_node_name) >= ARRAY_LEN (mp->next_name))
+ {
+ errmsg ("next node name too long, max %d", ARRAY_LEN (mp->next_name));
+ return -99;
+ }
+
+ M (GET_NEXT_INDEX, mp);
+ clib_memcpy (mp->node_name, node_name, vec_len (node_name));
+ clib_memcpy (mp->next_name, next_node_name, vec_len (next_node_name));
+ vec_free (node_name);
+ vec_free (next_node_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_add_node_next (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_add_node_next_t *mp;
+ u8 *name = 0;
+ u8 *next = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node %s", &name))
+ ;
+ else if (unformat (i, "next %s", &next))
+ ;
+ else
+ break;
+ }
+ if (name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+ if (next == 0)
+ {
+ errmsg ("next node required");
+ return -99;
+ }
+ if (vec_len (next) >= ARRAY_LEN (mp->next_name))
+ {
+ errmsg ("next name too long, max %d", ARRAY_LEN (mp->next_name));
+ return -99;
+ }
+
+ M (ADD_NODE_NEXT, mp);
+ clib_memcpy (mp->node_name, name, vec_len (name));
+ clib_memcpy (mp->next_name, next, vec_len (next));
+ vec_free (name);
+ vec_free (next);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2tpv3_create_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ ip6_address_t client_address, our_address;
+ int client_address_set = 0;
+ int our_address_set = 0;
+ u32 local_session_id = 0;
+ u32 remote_session_id = 0;
+ u64 local_cookie = 0;
+ u64 remote_cookie = 0;
+ u8 l2_sublayer_present = 0;
+ vl_api_l2tpv3_create_tunnel_t *mp;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "client_address %U", unformat_ip6_address,
+ &client_address))
+ client_address_set = 1;
+ else if (unformat (i, "our_address %U", unformat_ip6_address,
+ &our_address))
+ our_address_set = 1;
+ else if (unformat (i, "local_session_id %d", &local_session_id))
+ ;
+ else if (unformat (i, "remote_session_id %d", &remote_session_id))
+ ;
+ else if (unformat (i, "local_cookie %lld", &local_cookie))
+ ;
+ else if (unformat (i, "remote_cookie %lld", &remote_cookie))
+ ;
+ else if (unformat (i, "l2-sublayer-present"))
+ l2_sublayer_present = 1;
+ else
+ break;
+ }
+
+ if (client_address_set == 0)
+ {
+ errmsg ("client_address required");
+ return -99;
+ }
+
+ if (our_address_set == 0)
+ {
+ errmsg ("our_address required");
+ return -99;
+ }
+
+ M (L2TPV3_CREATE_TUNNEL, mp);
+
+ clib_memcpy (mp->client_address, client_address.as_u8,
+ sizeof (mp->client_address));
+
+ clib_memcpy (mp->our_address, our_address.as_u8, sizeof (mp->our_address));
+
+ mp->local_session_id = ntohl (local_session_id);
+ mp->remote_session_id = ntohl (remote_session_id);
+ mp->local_cookie = clib_host_to_net_u64 (local_cookie);
+ mp->remote_cookie = clib_host_to_net_u64 (remote_cookie);
+ mp->l2_sublayer_present = l2_sublayer_present;
+ mp->is_ipv6 = 1;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2tpv3_set_tunnel_cookies (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u64 new_local_cookie = 0;
+ u64 new_remote_cookie = 0;
+ vl_api_l2tpv3_set_tunnel_cookies_t *mp;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "new_local_cookie %lld", &new_local_cookie))
+ ;
+ else if (unformat (i, "new_remote_cookie %lld", &new_remote_cookie))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2TPV3_SET_TUNNEL_COOKIES, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->new_local_cookie = clib_host_to_net_u64 (new_local_cookie);
+ mp->new_remote_cookie = clib_host_to_net_u64 (new_remote_cookie);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2tpv3_interface_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2tpv3_interface_enable_disable_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 enable_disable = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable_disable = 1;
+ else if (unformat (i, "disable"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2TPV3_INTERFACE_ENABLE_DISABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable_disable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2tpv3_set_lookup_key (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2tpv3_set_lookup_key_t *mp;
+ u8 key = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "lookup_v6_src"))
+ key = L2T_LOOKUP_SRC_ADDRESS;
+ else if (unformat (i, "lookup_v6_dst"))
+ key = L2T_LOOKUP_DST_ADDRESS;
+ else if (unformat (i, "lookup_session_id"))
+ key = L2T_LOOKUP_SESSION_ID;
+ else
+ break;
+ }
+
+ if (key == (u8) ~ 0)
+ {
+ errmsg ("l2tp session lookup key unset");
+ return -99;
+ }
+
+ M (L2TPV3_SET_LOOKUP_KEY, mp);
+
+ mp->key = key;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_sw_if_l2tpv3_tunnel_details_t_handler
+ (vl_api_sw_if_l2tpv3_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "* %U (our) %U (client) (sw_if_index %d)",
+ format_ip6_address, mp->our_address,
+ format_ip6_address, mp->client_address,
+ clib_net_to_host_u32 (mp->sw_if_index));
+
+ print (vam->ofp,
+ " local cookies %016llx %016llx remote cookie %016llx",
+ clib_net_to_host_u64 (mp->local_cookie[0]),
+ clib_net_to_host_u64 (mp->local_cookie[1]),
+ clib_net_to_host_u64 (mp->remote_cookie));
+
+ print (vam->ofp, " local session-id %d remote session-id %d",
+ clib_net_to_host_u32 (mp->local_session_id),
+ clib_net_to_host_u32 (mp->remote_session_id));
+
+ print (vam->ofp, " l2 specific sublayer %s\n",
+ mp->l2_sublayer_present ? "preset" : "absent");
+
+}
+
+static void vl_api_sw_if_l2tpv3_tunnel_details_t_handler_json
+ (vl_api_sw_if_l2tpv3_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in6_addr addr;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+
+ clib_memcpy (&addr, mp->our_address, sizeof (addr));
+ vat_json_object_add_ip6 (node, "our_address", addr);
+ clib_memcpy (&addr, mp->client_address, sizeof (addr));
+ vat_json_object_add_ip6 (node, "client_address", addr);
+
+ vat_json_node_t *lc = vat_json_object_add (node, "local_cookie");
+ vat_json_init_array (lc);
+ vat_json_array_add_uint (lc, clib_net_to_host_u64 (mp->local_cookie[0]));
+ vat_json_array_add_uint (lc, clib_net_to_host_u64 (mp->local_cookie[1]));
+ vat_json_object_add_uint (node, "remote_cookie",
+ clib_net_to_host_u64 (mp->remote_cookie));
+
+ printf ("local id: %u", clib_net_to_host_u32 (mp->local_session_id));
+ vat_json_object_add_uint (node, "local_session_id",
+ clib_net_to_host_u32 (mp->local_session_id));
+ vat_json_object_add_uint (node, "remote_session_id",
+ clib_net_to_host_u32 (mp->remote_session_id));
+ vat_json_object_add_string_copy (node, "l2_sublayer",
+ mp->l2_sublayer_present ? (u8 *) "present"
+ : (u8 *) "absent");
+}
+
+static int
+api_sw_if_l2tpv3_tunnel_dump (vat_main_t * vam)
+{
+ vl_api_sw_if_l2tpv3_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Get list of l2tpv3-tunnel interfaces */
+ M (SW_IF_L2TPV3_TUNNEL_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+
+static void vl_api_sw_interface_tap_details_t_handler
+ (vl_api_sw_interface_tap_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%-16s %d",
+ mp->dev_name, clib_net_to_host_u32 (mp->sw_if_index));
+}
+
+static void vl_api_sw_interface_tap_details_t_handler_json
+ (vl_api_sw_interface_tap_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_string_copy (node, "dev_name", mp->dev_name);
+}
+
+static int
+api_sw_interface_tap_dump (vat_main_t * vam)
+{
+ vl_api_sw_interface_tap_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ print (vam->ofp, "\n%-16s %s", "dev_name", "sw_if_index");
+ /* Get list of tap interfaces */
+ M (SW_INTERFACE_TAP_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static uword unformat_vxlan_decap_next
+ (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 tmp;
+
+ if (unformat (input, "l2"))
+ *result = VXLAN_INPUT_NEXT_L2_INPUT;
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static int
+api_vxlan_add_del_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_add_del_tunnel_t *mp;
+ ip46_address_t src, dst;
+ u8 is_add = 1;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_vrf_id = 0;
+ u32 decap_next_index = ~0;
+ u32 vni = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&src, 0, sizeof src);
+ memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
+ {
+ ipv4_set = 1;
+ src_set = 1;
+ }
+ else
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ ipv4_set = 1;
+ dst_set = 1;
+ }
+ else
+ if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6))
+ {
+ ipv6_set = 1;
+ src_set = 1;
+ }
+ else
+ if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ ipv6_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &dst.ip4,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip4_address, &dst.ip4))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &dst.ip6,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip6_address, &dst.ip6))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else
+ if (unformat (line_input, "mcast_sw_if_index %u", &mcast_sw_if_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "decap-next %U",
+ unformat_vxlan_decap_next, &decap_next_index))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ errmsg ("tunnel src address not specified");
+ return -99;
+ }
+ if (dst_set == 0)
+ {
+ errmsg ("tunnel dst address not specified");
+ return -99;
+ }
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel group address not multicast");
+ return -99;
+ }
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ errmsg ("tunnel nonexistent multicast device");
+ return -99;
+ }
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel dst address must be unicast");
+ return -99;
+ }
+
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if ((vni == 0) || (vni >> 24))
+ {
+ errmsg ("vni not specified or out of range");
+ return -99;
+ }
+
+ M (VXLAN_ADD_DEL_TUNNEL, mp);
+
+ if (ipv6_set)
+ {
+ clib_memcpy (mp->src_address, &src.ip6, sizeof (src.ip6));
+ clib_memcpy (mp->dst_address, &dst.ip6, sizeof (dst.ip6));
+ }
+ else
+ {
+ clib_memcpy (mp->src_address, &src.ip4, sizeof (src.ip4));
+ clib_memcpy (mp->dst_address, &dst.ip4, sizeof (dst.ip4));
+ }
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+ mp->mcast_sw_if_index = ntohl (mcast_sw_if_index);
+ mp->vni = ntohl (vni);
+ mp->is_add = is_add;
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_vxlan_tunnel_details_t_handler
+ (vl_api_vxlan_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t src = to_ip46 (mp->is_ipv6, mp->dst_address);
+ ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->src_address);
+
+ print (vam->ofp, "%11d%24U%24U%14d%18d%13d%19d",
+ ntohl (mp->sw_if_index),
+ format_ip46_address, &src, IP46_TYPE_ANY,
+ format_ip46_address, &dst, IP46_TYPE_ANY,
+ ntohl (mp->encap_vrf_id),
+ ntohl (mp->decap_next_index), ntohl (mp->vni),
+ ntohl (mp->mcast_sw_if_index));
+}
+
+static void vl_api_vxlan_tunnel_details_t_handler_json
+ (vl_api_vxlan_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ if (mp->is_ipv6)
+ {
+ struct in6_addr ip6;
+
+ clib_memcpy (&ip6, mp->src_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "src_address", ip6);
+ clib_memcpy (&ip6, mp->dst_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "dst_address", ip6);
+ }
+ else
+ {
+ struct in_addr ip4;
+
+ clib_memcpy (&ip4, mp->src_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "src_address", ip4);
+ clib_memcpy (&ip4, mp->dst_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "dst_address", ip4);
+ }
+ vat_json_object_add_uint (node, "encap_vrf_id", ntohl (mp->encap_vrf_id));
+ vat_json_object_add_uint (node, "decap_next_index",
+ ntohl (mp->decap_next_index));
+ vat_json_object_add_uint (node, "vni", ntohl (mp->vni));
+ vat_json_object_add_uint (node, "is_ipv6", mp->is_ipv6 ? 1 : 0);
+ vat_json_object_add_uint (node, "mcast_sw_if_index",
+ ntohl (mp->mcast_sw_if_index));
+}
+
+static int
+api_vxlan_tunnel_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_vxlan_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%14s%18s%13s%19s",
+ "sw_if_index", "src_address", "dst_address",
+ "encap_vrf_id", "decap_next_index", "vni", "mcast_sw_if_index");
+ }
+
+ /* Get list of vxlan-tunnel interfaces */
+ M (VXLAN_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_gre_add_del_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gre_add_del_tunnel_t *mp;
+ ip4_address_t src4, dst4;
+ ip6_address_t src6, dst6;
+ u8 is_add = 1;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u8 teb = 0;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u32 outer_fib_id = 0;
+ int ret;
+
+ memset (&src4, 0, sizeof src4);
+ memset (&dst4, 0, sizeof dst4);
+ memset (&src6, 0, sizeof src6);
+ memset (&dst6, 0, sizeof dst6);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U", unformat_ip4_address, &src4))
+ {
+ src_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst4))
+ {
+ dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "src %U", unformat_ip6_address, &src6))
+ {
+ src_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id))
+ ;
+ else if (unformat (line_input, "teb"))
+ teb = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ errmsg ("tunnel src address not specified");
+ return -99;
+ }
+ if (dst_set == 0)
+ {
+ errmsg ("tunnel dst address not specified");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+
+ M (GRE_ADD_DEL_TUNNEL, mp);
+
+ if (ipv4_set)
+ {
+ clib_memcpy (&mp->src_address, &src4, 4);
+ clib_memcpy (&mp->dst_address, &dst4, 4);
+ }
+ else
+ {
+ clib_memcpy (&mp->src_address, &src6, 16);
+ clib_memcpy (&mp->dst_address, &dst6, 16);
+ }
+ mp->outer_fib_id = ntohl (outer_fib_id);
+ mp->is_add = is_add;
+ mp->teb = teb;
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_gre_tunnel_details_t_handler
+ (vl_api_gre_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t src = to_ip46 (mp->is_ipv6, mp->src_address);
+ ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->dst_address);
+
+ print (vam->ofp, "%11d%24U%24U%6d%14d",
+ ntohl (mp->sw_if_index),
+ format_ip46_address, &src, IP46_TYPE_ANY,
+ format_ip46_address, &dst, IP46_TYPE_ANY,
+ mp->teb, ntohl (mp->outer_fib_id));
+}
+
+static void vl_api_gre_tunnel_details_t_handler_json
+ (vl_api_gre_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&ip4, &mp->src_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "src_address", ip4);
+ clib_memcpy (&ip4, &mp->dst_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "dst_address", ip4);
+ }
+ else
+ {
+ clib_memcpy (&ip6, &mp->src_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "src_address", ip6);
+ clib_memcpy (&ip6, &mp->dst_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "dst_address", ip6);
+ }
+ vat_json_object_add_uint (node, "teb", mp->teb);
+ vat_json_object_add_uint (node, "outer_fib_id", ntohl (mp->outer_fib_id));
+ vat_json_object_add_uint (node, "is_ipv6", mp->is_ipv6);
+}
+
+static int
+api_gre_tunnel_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gre_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%6s%14s",
+ "sw_if_index", "src_address", "dst_address", "teb",
+ "outer_fib_id");
+ }
+
+ /* Get list of gre-tunnel interfaces */
+ M (GRE_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_fib_clear_table (vat_main_t * vam)
+{
+// unformat_input_t * i = vam->input;
+ vl_api_l2_fib_clear_table_t *mp;
+ int ret;
+
+ M (L2_FIB_CLEAR_TABLE, mp);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_interface_efp_filter (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_efp_filter_t *mp;
+ u32 sw_if_index;
+ u8 enable = 1;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ M (L2_INTERFACE_EFP_FILTER, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#define foreach_vtr_op \
+_("disable", L2_VTR_DISABLED) \
+_("push-1", L2_VTR_PUSH_1) \
+_("push-2", L2_VTR_PUSH_2) \
+_("pop-1", L2_VTR_POP_1) \
+_("pop-2", L2_VTR_POP_2) \
+_("translate-1-1", L2_VTR_TRANSLATE_1_1) \
+_("translate-1-2", L2_VTR_TRANSLATE_1_2) \
+_("translate-2-1", L2_VTR_TRANSLATE_2_1) \
+_("translate-2-2", L2_VTR_TRANSLATE_2_2)
+
+static int
+api_l2_interface_vlan_tag_rewrite (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_vlan_tag_rewrite_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 vtr_op_set = 0;
+ u32 vtr_op = 0;
+ u32 push_dot1q = 1;
+ u32 tag1 = ~0;
+ u32 tag2 = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vtr_op %d", &vtr_op))
+ vtr_op_set = 1;
+#define _(n,v) else if (unformat(i, n)) {vtr_op = v; vtr_op_set = 1;}
+ foreach_vtr_op
+#undef _
+ else if (unformat (i, "push_dot1q %d", &push_dot1q))
+ ;
+ else if (unformat (i, "tag1 %d", &tag1))
+ ;
+ else if (unformat (i, "tag2 %d", &tag2))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if ((sw_if_index_set == 0) || (vtr_op_set == 0))
+ {
+ errmsg ("missing vtr operation or sw_if_index");
+ return -99;
+ }
+
+ M (L2_INTERFACE_VLAN_TAG_REWRITE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vtr_op = ntohl (vtr_op);
+ mp->push_dot1q = ntohl (push_dot1q);
+ mp->tag1 = ntohl (tag1);
+ mp->tag2 = ntohl (tag2);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_create_vhost_user_if (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_vhost_user_if_t *mp;
+ u8 *file_name;
+ u8 is_server = 0;
+ u8 file_name_set = 0;
+ u32 custom_dev_instance = ~0;
+ u8 hwaddr[6];
+ u8 use_custom_mac = 0;
+ u8 *tag = 0;
+ int ret;
+
+ /* Shut up coverity */
+ memset (hwaddr, 0, sizeof (hwaddr));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "socket %s", &file_name))
+ {
+ file_name_set = 1;
+ }
+ else if (unformat (i, "renumber %" PRIu32, &custom_dev_instance))
+ ;
+ else if (unformat (i, "mac %U", unformat_ethernet_address, hwaddr))
+ use_custom_mac = 1;
+ else if (unformat (i, "server"))
+ is_server = 1;
+ else if (unformat (i, "tag %s", &tag))
+ ;
+ else
+ break;
+ }
+
+ if (file_name_set == 0)
+ {
+ errmsg ("missing socket file name");
+ return -99;
+ }
+
+ if (vec_len (file_name) > 255)
+ {
+ errmsg ("socket file name too long");
+ return -99;
+ }
+ vec_add1 (file_name, 0);
+
+ M (CREATE_VHOST_USER_IF, mp);
+
+ mp->is_server = is_server;
+ clib_memcpy (mp->sock_filename, file_name, vec_len (file_name));
+ vec_free (file_name);
+ if (custom_dev_instance != ~0)
+ {
+ mp->renumber = 1;
+ mp->custom_dev_instance = ntohl (custom_dev_instance);
+ }
+ mp->use_custom_mac = use_custom_mac;
+ clib_memcpy (mp->mac_address, hwaddr, 6);
+ if (tag)
+ strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1);
+ vec_free (tag);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_modify_vhost_user_if (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_modify_vhost_user_if_t *mp;
+ u8 *file_name;
+ u8 is_server = 0;
+ u8 file_name_set = 0;
+ u32 custom_dev_instance = ~0;
+ u8 sw_if_index_set = 0;
+ u32 sw_if_index = (u32) ~ 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "socket %s", &file_name))
+ {
+ file_name_set = 1;
+ }
+ else if (unformat (i, "renumber %" PRIu32, &custom_dev_instance))
+ ;
+ else if (unformat (i, "server"))
+ is_server = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing sw_if_index or interface name");
+ return -99;
+ }
+
+ if (file_name_set == 0)
+ {
+ errmsg ("missing socket file name");
+ return -99;
+ }
+
+ if (vec_len (file_name) > 255)
+ {
+ errmsg ("socket file name too long");
+ return -99;
+ }
+ vec_add1 (file_name, 0);
+
+ M (MODIFY_VHOST_USER_IF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_server = is_server;
+ clib_memcpy (mp->sock_filename, file_name, vec_len (file_name));
+ vec_free (file_name);
+ if (custom_dev_instance != ~0)
+ {
+ mp->renumber = 1;
+ mp->custom_dev_instance = ntohl (custom_dev_instance);
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_delete_vhost_user_if (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_delete_vhost_user_if_t *mp;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing sw_if_index or interface name");
+ return -99;
+ }
+
+
+ M (DELETE_VHOST_USER_IF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_sw_interface_vhost_user_details_t_handler
+ (vl_api_sw_interface_vhost_user_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %s",
+ (char *) mp->interface_name,
+ ntohl (mp->sw_if_index), ntohl (mp->virtio_net_hdr_sz),
+ clib_net_to_host_u64 (mp->features), mp->is_server,
+ ntohl (mp->num_regions), (char *) mp->sock_filename);
+ print (vam->ofp, " Status: '%s'", strerror (ntohl (mp->sock_errno)));
+}
+
+static void vl_api_sw_interface_vhost_user_details_t_handler_json
+ (vl_api_sw_interface_vhost_user_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_string_copy (node, "interface_name",
+ mp->interface_name);
+ vat_json_object_add_uint (node, "virtio_net_hdr_sz",
+ ntohl (mp->virtio_net_hdr_sz));
+ vat_json_object_add_uint (node, "features",
+ clib_net_to_host_u64 (mp->features));
+ vat_json_object_add_uint (node, "is_server", mp->is_server);
+ vat_json_object_add_string_copy (node, "sock_filename", mp->sock_filename);
+ vat_json_object_add_uint (node, "num_regions", ntohl (mp->num_regions));
+ vat_json_object_add_uint (node, "sock_errno", ntohl (mp->sock_errno));
+}
+
+static int
+api_sw_interface_vhost_user_dump (vat_main_t * vam)
+{
+ vl_api_sw_interface_vhost_user_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ print (vam->ofp,
+ "Interface name idx hdr_sz features server regions filename");
+
+ /* Get list of vhost-user interfaces */
+ M (SW_INTERFACE_VHOST_USER_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_version (vat_main_t * vam)
+{
+ vl_api_show_version_t *mp;
+ int ret;
+
+ M (SHOW_VERSION, mp);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_vxlan_gpe_add_del_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_add_del_tunnel_t *mp;
+ ip4_address_t local4, remote4;
+ ip6_address_t local6, remote6;
+ u8 is_add = 1;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u8 grp_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_vrf_id = 0;
+ u32 decap_vrf_id = 0;
+ u8 protocol = ~0;
+ u32 vni;
+ u8 vni_set = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ memset (&local4, 0, sizeof local4);
+ memset (&remote4, 0, sizeof remote4);
+ memset (&local6, 0, sizeof local6);
+ memset (&remote6, 0, sizeof remote6);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "local %U",
+ unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &remote4,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip4_address, &remote4))
+ {
+ grp_set = remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &remote6,
+ api_unformat_sw_if_index, vam, &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U",
+ unformat_ip6_address, &remote6))
+ {
+ grp_set = remote_set = 1;
+ ipv6_set = 1;
+ }
+ else
+ if (unformat (line_input, "mcast_sw_if_index %u", &mcast_sw_if_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "decap-vrf-id %d", &decap_vrf_id))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat (line_input, "next-ip4"))
+ protocol = 1;
+ else if (unformat (line_input, "next-ip6"))
+ protocol = 2;
+ else if (unformat (line_input, "next-ethernet"))
+ protocol = 3;
+ else if (unformat (line_input, "next-nsh"))
+ protocol = 4;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("tunnel local address not specified");
+ return -99;
+ }
+ if (remote_set == 0)
+ {
+ errmsg ("tunnel remote address not specified");
+ return -99;
+ }
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ errmsg ("tunnel nonexistent multicast device");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (vni_set == 0)
+ {
+ errmsg ("vni not specified");
+ return -99;
+ }
+
+ M (VXLAN_GPE_ADD_DEL_TUNNEL, mp);
+
+
+ if (ipv6_set)
+ {
+ clib_memcpy (&mp->local, &local6, sizeof (local6));
+ clib_memcpy (&mp->remote, &remote6, sizeof (remote6));
+ }
+ else
+ {
+ clib_memcpy (&mp->local, &local4, sizeof (local4));
+ clib_memcpy (&mp->remote, &remote4, sizeof (remote4));
+ }
+
+ mp->mcast_sw_if_index = ntohl (mcast_sw_if_index);
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_vrf_id = ntohl (decap_vrf_id);
+ mp->protocol = protocol;
+ mp->vni = ntohl (vni);
+ mp->is_add = is_add;
+ mp->is_ipv6 = ipv6_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_vxlan_gpe_tunnel_details_t_handler
+ (vl_api_vxlan_gpe_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t local = to_ip46 (mp->is_ipv6, mp->local);
+ ip46_address_t remote = to_ip46 (mp->is_ipv6, mp->remote);
+
+ print (vam->ofp, "%11d%24U%24U%13d%12d%19d%14d%14d",
+ ntohl (mp->sw_if_index),
+ format_ip46_address, &local, IP46_TYPE_ANY,
+ format_ip46_address, &remote, IP46_TYPE_ANY,
+ ntohl (mp->vni), mp->protocol,
+ ntohl (mp->mcast_sw_if_index),
+ ntohl (mp->encap_vrf_id), ntohl (mp->decap_vrf_id));
+}
+
+
+static void vl_api_vxlan_gpe_tunnel_details_t_handler_json
+ (vl_api_vxlan_gpe_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, &(mp->local[0]), sizeof (ip6));
+ vat_json_object_add_ip6 (node, "local", ip6);
+ clib_memcpy (&ip6, &(mp->remote[0]), sizeof (ip6));
+ vat_json_object_add_ip6 (node, "remote", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, &(mp->local[0]), sizeof (ip4));
+ vat_json_object_add_ip4 (node, "local", ip4);
+ clib_memcpy (&ip4, &(mp->remote[0]), sizeof (ip4));
+ vat_json_object_add_ip4 (node, "remote", ip4);
+ }
+ vat_json_object_add_uint (node, "vni", ntohl (mp->vni));
+ vat_json_object_add_uint (node, "protocol", ntohl (mp->protocol));
+ vat_json_object_add_uint (node, "mcast_sw_if_index",
+ ntohl (mp->mcast_sw_if_index));
+ vat_json_object_add_uint (node, "encap_vrf_id", ntohl (mp->encap_vrf_id));
+ vat_json_object_add_uint (node, "decap_vrf_id", ntohl (mp->decap_vrf_id));
+ vat_json_object_add_uint (node, "is_ipv6", mp->is_ipv6 ? 1 : 0);
+}
+
+static int
+api_vxlan_gpe_tunnel_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_vxlan_gpe_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%13s%15s%19s%14s%14s",
+ "sw_if_index", "local", "remote", "vni",
+ "protocol", "mcast_sw_if_index", "encap_vrf_id", "decap_vrf_id");
+ }
+
+ /* Get list of vxlan-tunnel interfaces */
+ M (VXLAN_GPE_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+
+u8 *
+format_l2_fib_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[2], a[3], a[4], a[5], a[6], a[7]);
+}
+
+static void vl_api_l2_fib_table_details_t_handler
+ (vl_api_l2_fib_table_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%3" PRIu32 " %U %3" PRIu32
+ " %d %d %d",
+ ntohl (mp->bd_id), format_l2_fib_mac_address, &mp->mac,
+ ntohl (mp->sw_if_index), mp->static_mac, mp->filter_mac,
+ mp->bvi_mac);
+}
+
+static void vl_api_l2_fib_table_details_t_handler_json
+ (vl_api_l2_fib_table_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "bd_id", ntohl (mp->bd_id));
+ vat_json_object_add_uint (node, "mac", clib_net_to_host_u64 (mp->mac));
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_uint (node, "static_mac", mp->static_mac);
+ vat_json_object_add_uint (node, "filter_mac", mp->filter_mac);
+ vat_json_object_add_uint (node, "bvi_mac", mp->bvi_mac);
+}
+
+static int
+api_l2_fib_table_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_fib_table_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ print (vam->ofp, "BD-ID Mac Address sw-ndx Static Filter BVI");
+
+ /* Get list of l2 fib entries */
+ M (L2_FIB_TABLE_DUMP, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_interface_name_renumber (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_interface_name_renumber_t *mp;
+ u32 sw_if_index = ~0;
+ u32 new_show_dev_instance = ~0;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", api_unformat_sw_if_index, vam,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "new_show_dev_instance %d",
+ &new_show_dev_instance))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (new_show_dev_instance == ~0)
+ {
+ errmsg ("missing new_show_dev_instance");
+ return -99;
+ }
+
+ M (INTERFACE_NAME_RENUMBER, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->new_show_dev_instance = ntohl (new_show_dev_instance);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_ip4_arp_events (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_want_ip4_arp_events_t *mp;
+ ip4_address_t address;
+ int address_set = 0;
+ u32 enable_disable = 1;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "address %U", unformat_ip4_address, &address))
+ address_set = 1;
+ else if (unformat (line_input, "del"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ if (address_set == 0)
+ {
+ errmsg ("missing addresses");
+ return -99;
+ }
+
+ M (WANT_IP4_ARP_EVENTS, mp);
+ mp->enable_disable = enable_disable;
+ mp->pid = htonl (getpid ());
+ mp->address = address.as_u32;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_ip6_nd_events (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_want_ip6_nd_events_t *mp;
+ ip6_address_t address;
+ int address_set = 0;
+ u32 enable_disable = 1;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "address %U", unformat_ip6_address, &address))
+ address_set = 1;
+ else if (unformat (line_input, "del"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ if (address_set == 0)
+ {
+ errmsg ("missing addresses");
+ return -99;
+ }
+
+ M (WANT_IP6_ND_EVENTS, mp);
+ mp->enable_disable = enable_disable;
+ mp->pid = htonl (getpid ());
+ clib_memcpy (mp->address, &address, sizeof (ip6_address_t));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_l2_macs_events (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_want_l2_macs_events_t *mp;
+ u8 enable_disable = 1;
+ u32 scan_delay = 0;
+ u32 max_macs_in_event = 0;
+ u32 learn_limit = 0;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "learn-limit %d", &learn_limit))
+ ;
+ else if (unformat (line_input, "scan-delay %d", &scan_delay))
+ ;
+ else if (unformat (line_input, "max-entries %d", &max_macs_in_event))
+ ;
+ else if (unformat (line_input, "disable"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ M (WANT_L2_MACS_EVENTS, mp);
+ mp->enable_disable = enable_disable;
+ mp->pid = htonl (getpid ());
+ mp->learn_limit = htonl (learn_limit);
+ mp->scan_delay = (u8) scan_delay;
+ mp->max_macs_in_event = (u8) (max_macs_in_event / 10);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_input_acl_set_interface (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_input_acl_set_interface_t *mp;
+ u32 sw_if_index;
+ int sw_if_index_set;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 l2_table_index = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (i, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (i, "l2-table %d", &l2_table_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (INPUT_ACL_SET_INTERFACE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->ip4_table_index = ntohl (ip4_table_index);
+ mp->ip6_table_index = ntohl (ip6_table_index);
+ mp->l2_table_index = ntohl (l2_table_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_address_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_address_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ipv4"))
+ ipv4_set = 1;
+ else if (unformat (i, "ipv6"))
+ ipv6_set = 1;
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("ipv4 and ipv6 flags cannot be both set");
+ return -99;
+ }
+
+ if ((!ipv4_set) && (!ipv6_set))
+ {
+ errmsg ("no ipv4 nor ipv6 flag set");
+ return -99;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ vam->current_sw_if_index = sw_if_index;
+ vam->is_ipv6 = ipv6_set;
+
+ M (IP_ADDRESS_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = ipv6_set;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_dump (vat_main_t * vam)
+{
+ vl_api_ip_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ unformat_input_t *in = vam->input;
+ int ipv4_set = 0;
+ int ipv6_set = 0;
+ int is_ipv6;
+ int i;
+ int ret;
+
+ while (unformat_check_input (in) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (in, "ipv4"))
+ ipv4_set = 1;
+ else if (unformat (in, "ipv6"))
+ ipv6_set = 1;
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("ipv4 and ipv6 flags cannot be both set");
+ return -99;
+ }
+
+ if ((!ipv4_set) && (!ipv6_set))
+ {
+ errmsg ("no ipv4 nor ipv6 flag set");
+ return -99;
+ }
+
+ is_ipv6 = ipv6_set;
+ vam->is_ipv6 = is_ipv6;
+
+ /* free old data */
+ for (i = 0; i < vec_len (vam->ip_details_by_sw_if_index[is_ipv6]); i++)
+ {
+ vec_free (vam->ip_details_by_sw_if_index[is_ipv6][i].addr);
+ }
+ vec_free (vam->ip_details_by_sw_if_index[is_ipv6]);
+
+ M (IP_DUMP, mp);
+ mp->is_ipv6 = ipv6_set;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_spd_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_add_del_t *mp;
+ u32 spd_id = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+ if (spd_id == ~0)
+ {
+ errmsg ("spd_id must be set");
+ return -99;
+ }
+
+ M (IPSEC_SPD_ADD_DEL, mp);
+
+ mp->spd_id = ntohl (spd_id);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_interface_add_del_spd (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_interface_add_del_spd_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 spd_id = (u32) ~ 0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ }
+
+ if (spd_id == (u32) ~ 0)
+ {
+ errmsg ("spd_id must be set");
+ return -99;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (IPSEC_INTERFACE_ADD_DEL_SPD, mp);
+
+ mp->spd_id = ntohl (spd_id);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_spd_add_del_entry (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_add_del_entry_t *mp;
+ u8 is_add = 1, is_outbound = 0, is_ipv6 = 0, is_ip_any = 1;
+ u32 spd_id = 0, sa_id = 0, protocol = 0, policy = 0;
+ i32 priority = 0;
+ u32 rport_start = 0, rport_stop = (u32) ~ 0;
+ u32 lport_start = 0, lport_stop = (u32) ~ 0;
+ ip4_address_t laddr4_start, laddr4_stop, raddr4_start, raddr4_stop;
+ ip6_address_t laddr6_start, laddr6_stop, raddr6_start, raddr6_stop;
+ int ret;
+
+ laddr4_start.as_u32 = raddr4_start.as_u32 = 0;
+ laddr4_stop.as_u32 = raddr4_stop.as_u32 = (u32) ~ 0;
+ laddr6_start.as_u64[0] = raddr6_start.as_u64[0] = 0;
+ laddr6_start.as_u64[1] = raddr6_start.as_u64[1] = 0;
+ laddr6_stop.as_u64[0] = raddr6_stop.as_u64[0] = (u64) ~ 0;
+ laddr6_stop.as_u64[1] = raddr6_stop.as_u64[1] = (u64) ~ 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ if (unformat (i, "outbound"))
+ is_outbound = 1;
+ if (unformat (i, "inbound"))
+ is_outbound = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else if (unformat (i, "priority %d", &priority))
+ ;
+ else if (unformat (i, "protocol %d", &protocol))
+ ;
+ else if (unformat (i, "lport_start %d", &lport_start))
+ ;
+ else if (unformat (i, "lport_stop %d", &lport_stop))
+ ;
+ else if (unformat (i, "rport_start %d", &rport_start))
+ ;
+ else if (unformat (i, "rport_stop %d", &rport_stop))
+ ;
+ else
+ if (unformat
+ (i, "laddr_start %U", unformat_ip4_address, &laddr4_start))
+ {
+ is_ipv6 = 0;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat (i, "laddr_stop %U", unformat_ip4_address, &laddr4_stop))
+ {
+ is_ipv6 = 0;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat
+ (i, "raddr_start %U", unformat_ip4_address, &raddr4_start))
+ {
+ is_ipv6 = 0;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat (i, "raddr_stop %U", unformat_ip4_address, &raddr4_stop))
+ {
+ is_ipv6 = 0;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat
+ (i, "laddr_start %U", unformat_ip6_address, &laddr6_start))
+ {
+ is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat (i, "laddr_stop %U", unformat_ip6_address, &laddr6_stop))
+ {
+ is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat
+ (i, "raddr_start %U", unformat_ip6_address, &raddr6_start))
+ {
+ is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat (i, "raddr_stop %U", unformat_ip6_address, &raddr6_stop))
+ {
+ is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else
+ if (unformat (i, "action %U", unformat_ipsec_policy_action, &policy))
+ {
+ if (policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ return -99;
+ }
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ }
+
+ M (IPSEC_SPD_ADD_DEL_ENTRY, mp);
+
+ mp->spd_id = ntohl (spd_id);
+ mp->priority = ntohl (priority);
+ mp->is_outbound = is_outbound;
+
+ mp->is_ipv6 = is_ipv6;
+ if (is_ipv6 || is_ip_any)
+ {
+ clib_memcpy (mp->remote_address_start, &raddr6_start,
+ sizeof (ip6_address_t));
+ clib_memcpy (mp->remote_address_stop, &raddr6_stop,
+ sizeof (ip6_address_t));
+ clib_memcpy (mp->local_address_start, &laddr6_start,
+ sizeof (ip6_address_t));
+ clib_memcpy (mp->local_address_stop, &laddr6_stop,
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ clib_memcpy (mp->remote_address_start, &raddr4_start,
+ sizeof (ip4_address_t));
+ clib_memcpy (mp->remote_address_stop, &raddr4_stop,
+ sizeof (ip4_address_t));
+ clib_memcpy (mp->local_address_start, &laddr4_start,
+ sizeof (ip4_address_t));
+ clib_memcpy (mp->local_address_stop, &laddr4_stop,
+ sizeof (ip4_address_t));
+ }
+ mp->protocol = (u8) protocol;
+ mp->local_port_start = ntohs ((u16) lport_start);
+ mp->local_port_stop = ntohs ((u16) lport_stop);
+ mp->remote_port_start = ntohs ((u16) rport_start);
+ mp->remote_port_stop = ntohs ((u16) rport_stop);
+ mp->policy = (u8) policy;
+ mp->sa_id = ntohl (sa_id);
+ mp->is_add = is_add;
+ mp->is_ip_any = is_ip_any;
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_sad_add_del_entry (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_sad_add_del_entry_t *mp;
+ u32 sad_id = 0, spi = 0;
+ u8 *ck = 0, *ik = 0;
+ u8 is_add = 1;
+
+ u8 protocol = IPSEC_PROTOCOL_AH;
+ u8 is_tunnel = 0, is_tunnel_ipv6 = 0;
+ u32 crypto_alg = 0, integ_alg = 0;
+ ip4_address_t tun_src4;
+ ip4_address_t tun_dst4;
+ ip6_address_t tun_src6;
+ ip6_address_t tun_dst6;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "sad_id %d", &sad_id))
+ ;
+ else if (unformat (i, "spi %d", &spi))
+ ;
+ else if (unformat (i, "esp"))
+ protocol = IPSEC_PROTOCOL_ESP;
+ else if (unformat (i, "tunnel_src %U", unformat_ip4_address, &tun_src4))
+ {
+ is_tunnel = 1;
+ is_tunnel_ipv6 = 0;
+ }
+ else if (unformat (i, "tunnel_dst %U", unformat_ip4_address, &tun_dst4))
+ {
+ is_tunnel = 1;
+ is_tunnel_ipv6 = 0;
+ }
+ else if (unformat (i, "tunnel_src %U", unformat_ip6_address, &tun_src6))
+ {
+ is_tunnel = 1;
+ is_tunnel_ipv6 = 1;
+ }
+ else if (unformat (i, "tunnel_dst %U", unformat_ip6_address, &tun_dst6))
+ {
+ is_tunnel = 1;
+ is_tunnel_ipv6 = 1;
+ }
+ else
+ if (unformat
+ (i, "crypto_alg %U", unformat_ipsec_crypto_alg, &crypto_alg))
+ {
+ if (crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 ||
+ crypto_alg >= IPSEC_CRYPTO_N_ALG)
+ {
+ clib_warning ("unsupported crypto-alg: '%U'",
+ format_ipsec_crypto_alg, crypto_alg);
+ return -99;
+ }
+ }
+ else if (unformat (i, "crypto_key %U", unformat_hex_string, &ck))
+ ;
+ else
+ if (unformat
+ (i, "integ_alg %U", unformat_ipsec_integ_alg, &integ_alg))
+ {
+ if (integ_alg < IPSEC_INTEG_ALG_SHA1_96 ||
+ integ_alg >= IPSEC_INTEG_N_ALG)
+ {
+ clib_warning ("unsupported integ-alg: '%U'",
+ format_ipsec_integ_alg, integ_alg);
+ return -99;
+ }
+ }
+ else if (unformat (i, "integ_key %U", unformat_hex_string, &ik))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+
+ }
+
+ M (IPSEC_SAD_ADD_DEL_ENTRY, mp);
+
+ mp->sad_id = ntohl (sad_id);
+ mp->is_add = is_add;
+ mp->protocol = protocol;
+ mp->spi = ntohl (spi);
+ mp->is_tunnel = is_tunnel;
+ mp->is_tunnel_ipv6 = is_tunnel_ipv6;
+ mp->crypto_algorithm = crypto_alg;
+ mp->integrity_algorithm = integ_alg;
+ mp->crypto_key_length = vec_len (ck);
+ mp->integrity_key_length = vec_len (ik);
+
+ if (mp->crypto_key_length > sizeof (mp->crypto_key))
+ mp->crypto_key_length = sizeof (mp->crypto_key);
+
+ if (mp->integrity_key_length > sizeof (mp->integrity_key))
+ mp->integrity_key_length = sizeof (mp->integrity_key);
+
+ if (ck)
+ clib_memcpy (mp->crypto_key, ck, mp->crypto_key_length);
+ if (ik)
+ clib_memcpy (mp->integrity_key, ik, mp->integrity_key_length);
+
+ if (is_tunnel)
+ {
+ if (is_tunnel_ipv6)
+ {
+ clib_memcpy (mp->tunnel_src_address, &tun_src6,
+ sizeof (ip6_address_t));
+ clib_memcpy (mp->tunnel_dst_address, &tun_dst6,
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ clib_memcpy (mp->tunnel_src_address, &tun_src4,
+ sizeof (ip4_address_t));
+ clib_memcpy (mp->tunnel_dst_address, &tun_dst4,
+ sizeof (ip4_address_t));
+ }
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_sa_set_key (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_sa_set_key_t *mp;
+ u32 sa_id;
+ u8 *ck = 0, *ik = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else if (unformat (i, "crypto_key %U", unformat_hex_string, &ck))
+ ;
+ else if (unformat (i, "integ_key %U", unformat_hex_string, &ik))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SA_SET_KEY, mp);
+
+ mp->sa_id = ntohl (sa_id);
+ mp->crypto_key_length = vec_len (ck);
+ mp->integrity_key_length = vec_len (ik);
+
+ if (mp->crypto_key_length > sizeof (mp->crypto_key))
+ mp->crypto_key_length = sizeof (mp->crypto_key);
+
+ if (mp->integrity_key_length > sizeof (mp->integrity_key))
+ mp->integrity_key_length = sizeof (mp->integrity_key);
+
+ if (ck)
+ clib_memcpy (mp->crypto_key, ck, mp->crypto_key_length);
+ if (ik)
+ clib_memcpy (mp->integrity_key, ik, mp->integrity_key_length);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_tunnel_if_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_tunnel_if_add_del_t *mp;
+ u32 local_spi = 0, remote_spi = 0;
+ u32 crypto_alg = 0, integ_alg = 0;
+ u8 *lck = NULL, *rck = NULL;
+ u8 *lik = NULL, *rik = NULL;
+ ip4_address_t local_ip = { {0} };
+ ip4_address_t remote_ip = { {0} };
+ u8 is_add = 1;
+ u8 esn = 0;
+ u8 anti_replay = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "esn"))
+ esn = 1;
+ else if (unformat (i, "anti_replay"))
+ anti_replay = 1;
+ else if (unformat (i, "local_spi %d", &local_spi))
+ ;
+ else if (unformat (i, "remote_spi %d", &remote_spi))
+ ;
+ else if (unformat (i, "local_ip %U", unformat_ip4_address, &local_ip))
+ ;
+ else if (unformat (i, "remote_ip %U", unformat_ip4_address, &remote_ip))
+ ;
+ else if (unformat (i, "local_crypto_key %U", unformat_hex_string, &lck))
+ ;
+ else
+ if (unformat (i, "remote_crypto_key %U", unformat_hex_string, &rck))
+ ;
+ else if (unformat (i, "local_integ_key %U", unformat_hex_string, &lik))
+ ;
+ else if (unformat (i, "remote_integ_key %U", unformat_hex_string, &rik))
+ ;
+ else
+ if (unformat
+ (i, "crypto_alg %U", unformat_ipsec_crypto_alg, &crypto_alg))
+ {
+ if (crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 ||
+ crypto_alg >= IPSEC_CRYPTO_N_ALG)
+ {
+ errmsg ("unsupported crypto-alg: '%U'\n",
+ format_ipsec_crypto_alg, crypto_alg);
+ return -99;
+ }
+ }
+ else
+ if (unformat
+ (i, "integ_alg %U", unformat_ipsec_integ_alg, &integ_alg))
+ {
+ if (integ_alg < IPSEC_INTEG_ALG_SHA1_96 ||
+ integ_alg >= IPSEC_INTEG_N_ALG)
+ {
+ errmsg ("unsupported integ-alg: '%U'\n",
+ format_ipsec_integ_alg, integ_alg);
+ return -99;
+ }
+ }
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_TUNNEL_IF_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+ mp->esn = esn;
+ mp->anti_replay = anti_replay;
+
+ clib_memcpy (mp->local_ip, &local_ip, sizeof (ip4_address_t));
+ clib_memcpy (mp->remote_ip, &remote_ip, sizeof (ip4_address_t));
+
+ mp->local_spi = htonl (local_spi);
+ mp->remote_spi = htonl (remote_spi);
+ mp->crypto_alg = (u8) crypto_alg;
+
+ mp->local_crypto_key_len = 0;
+ if (lck)
+ {
+ mp->local_crypto_key_len = vec_len (lck);
+ if (mp->local_crypto_key_len > sizeof (mp->local_crypto_key))
+ mp->local_crypto_key_len = sizeof (mp->local_crypto_key);
+ clib_memcpy (mp->local_crypto_key, lck, mp->local_crypto_key_len);
+ }
+
+ mp->remote_crypto_key_len = 0;
+ if (rck)
+ {
+ mp->remote_crypto_key_len = vec_len (rck);
+ if (mp->remote_crypto_key_len > sizeof (mp->remote_crypto_key))
+ mp->remote_crypto_key_len = sizeof (mp->remote_crypto_key);
+ clib_memcpy (mp->remote_crypto_key, rck, mp->remote_crypto_key_len);
+ }
+
+ mp->integ_alg = (u8) integ_alg;
+
+ mp->local_integ_key_len = 0;
+ if (lik)
+ {
+ mp->local_integ_key_len = vec_len (lik);
+ if (mp->local_integ_key_len > sizeof (mp->local_integ_key))
+ mp->local_integ_key_len = sizeof (mp->local_integ_key);
+ clib_memcpy (mp->local_integ_key, lik, mp->local_integ_key_len);
+ }
+
+ mp->remote_integ_key_len = 0;
+ if (rik)
+ {
+ mp->remote_integ_key_len = vec_len (rik);
+ if (mp->remote_integ_key_len > sizeof (mp->remote_integ_key))
+ mp->remote_integ_key_len = sizeof (mp->remote_integ_key);
+ clib_memcpy (mp->remote_integ_key, rik, mp->remote_integ_key_len);
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_profile_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_profile_add_del_t *mp;
+ u8 is_add = 1;
+ u8 *name = 0;
+ int ret;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "name %U", unformat_token, valid_chars, &name))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_PROFILE_ADD_DEL, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ mp->is_add = is_add;
+ vec_free (name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_profile_set_auth (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_profile_set_auth_t *mp;
+ u8 *name = 0;
+ u8 *data = 0;
+ u32 auth_method = 0;
+ u8 is_hex = 0;
+ int ret;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %U", unformat_token, valid_chars, &name))
+ vec_add1 (name, 0);
+ else if (unformat (i, "auth_method %U",
+ unformat_ikev2_auth_method, &auth_method))
+ ;
+ else if (unformat (i, "auth_data 0x%U", unformat_hex_string, &data))
+ is_hex = 1;
+ else if (unformat (i, "auth_data %v", &data))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ if (!vec_len (data))
+ {
+ errmsg ("auth_data must be specified");
+ return -99;
+ }
+
+ if (!auth_method)
+ {
+ errmsg ("auth_method must be specified");
+ return -99;
+ }
+
+ M (IKEV2_PROFILE_SET_AUTH, mp);
+
+ mp->is_hex = is_hex;
+ mp->auth_method = (u8) auth_method;
+ mp->data_len = vec_len (data);
+ clib_memcpy (mp->name, name, vec_len (name));
+ clib_memcpy (mp->data, data, vec_len (data));
+ vec_free (name);
+ vec_free (data);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_profile_set_id (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_profile_set_id_t *mp;
+ u8 *name = 0;
+ u8 *data = 0;
+ u8 is_local = 0;
+ u32 id_type = 0;
+ ip4_address_t ip4;
+ int ret;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %U", unformat_token, valid_chars, &name))
+ vec_add1 (name, 0);
+ else if (unformat (i, "id_type %U", unformat_ikev2_id_type, &id_type))
+ ;
+ else if (unformat (i, "id_data %U", unformat_ip4_address, &ip4))
+ {
+ data = vec_new (u8, 4);
+ clib_memcpy (data, ip4.as_u8, 4);
+ }
+ else if (unformat (i, "id_data 0x%U", unformat_hex_string, &data))
+ ;
+ else if (unformat (i, "id_data %v", &data))
+ ;
+ else if (unformat (i, "local"))
+ is_local = 1;
+ else if (unformat (i, "remote"))
+ is_local = 0;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ if (!vec_len (data))
+ {
+ errmsg ("id_data must be specified");
+ return -99;
+ }
+
+ if (!id_type)
+ {
+ errmsg ("id_type must be specified");
+ return -99;
+ }
+
+ M (IKEV2_PROFILE_SET_ID, mp);
+
+ mp->is_local = is_local;
+ mp->id_type = (u8) id_type;
+ mp->data_len = vec_len (data);
+ clib_memcpy (mp->name, name, vec_len (name));
+ clib_memcpy (mp->data, data, vec_len (data));
+ vec_free (name);
+ vec_free (data);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_profile_set_ts (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_profile_set_ts_t *mp;
+ u8 *name = 0;
+ u8 is_local = 0;
+ u32 proto = 0, start_port = 0, end_port = (u32) ~ 0;
+ ip4_address_t start_addr, end_addr;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+ int ret;
+
+ start_addr.as_u32 = 0;
+ end_addr.as_u32 = (u32) ~ 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %U", unformat_token, valid_chars, &name))
+ vec_add1 (name, 0);
+ else if (unformat (i, "protocol %d", &proto))
+ ;
+ else if (unformat (i, "start_port %d", &start_port))
+ ;
+ else if (unformat (i, "end_port %d", &end_port))
+ ;
+ else
+ if (unformat (i, "start_addr %U", unformat_ip4_address, &start_addr))
+ ;
+ else if (unformat (i, "end_addr %U", unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (i, "local"))
+ is_local = 1;
+ else if (unformat (i, "remote"))
+ is_local = 0;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_PROFILE_SET_TS, mp);
+
+ mp->is_local = is_local;
+ mp->proto = (u8) proto;
+ mp->start_port = (u16) start_port;
+ mp->end_port = (u16) end_port;
+ mp->start_addr = start_addr.as_u32;
+ mp->end_addr = end_addr.as_u32;
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_set_local_key (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_set_local_key_t *mp;
+ u8 *file = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "file %v", &file))
+ vec_add1 (file, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (file))
+ {
+ errmsg ("RSA key file must be specified");
+ return -99;
+ }
+
+ if (vec_len (file) > 256)
+ {
+ errmsg ("file name too long");
+ return -99;
+ }
+
+ M (IKEV2_SET_LOCAL_KEY, mp);
+
+ clib_memcpy (mp->key_file, file, vec_len (file));
+ vec_free (file);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_set_responder (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_set_responder_t *mp;
+ int ret;
+ u8 *name = 0;
+ u32 sw_if_index = ~0;
+ ip4_address_t address;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (i, "%U interface %d address %U", unformat_token, valid_chars,
+ &name, &sw_if_index, unformat_ip4_address, &address))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_SET_RESPONDER, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+
+ mp->sw_if_index = sw_if_index;
+ clib_memcpy (mp->address, &address, sizeof (address));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_set_ike_transforms (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_set_ike_transforms_t *mp;
+ int ret;
+ u8 *name = 0;
+ u32 crypto_alg, crypto_key_size, integ_alg, dh_group;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U %d %d %d %d", unformat_token, valid_chars, &name,
+ &crypto_alg, &crypto_key_size, &integ_alg, &dh_group))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_SET_IKE_TRANSFORMS, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+ mp->crypto_alg = crypto_alg;
+ mp->crypto_key_size = crypto_key_size;
+ mp->integ_alg = integ_alg;
+ mp->dh_group = dh_group;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+
+static int
+api_ikev2_set_esp_transforms (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_set_esp_transforms_t *mp;
+ int ret;
+ u8 *name = 0;
+ u32 crypto_alg, crypto_key_size, integ_alg, dh_group;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U %d %d %d %d", unformat_token, valid_chars, &name,
+ &crypto_alg, &crypto_key_size, &integ_alg, &dh_group))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_SET_ESP_TRANSFORMS, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+ mp->crypto_alg = crypto_alg;
+ mp->crypto_key_size = crypto_key_size;
+ mp->integ_alg = integ_alg;
+ mp->dh_group = dh_group;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_set_sa_lifetime (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_set_sa_lifetime_t *mp;
+ int ret;
+ u8 *name = 0;
+ u64 lifetime, lifetime_maxdata;
+ u32 lifetime_jitter, handover;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U %lu %u %u %lu", unformat_token, valid_chars, &name,
+ &lifetime, &lifetime_jitter, &handover,
+ &lifetime_maxdata))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_SET_SA_LIFETIME, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+ mp->lifetime = lifetime;
+ mp->lifetime_jitter = lifetime_jitter;
+ mp->handover = handover;
+ mp->lifetime_maxdata = lifetime_maxdata;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_initiate_sa_init (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_initiate_sa_init_t *mp;
+ int ret;
+ u8 *name = 0;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_token, valid_chars, &name))
+ vec_add1 (name, 0);
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("profile name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("profile name too long");
+ return -99;
+ }
+
+ M (IKEV2_INITIATE_SA_INIT, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_initiate_del_ike_sa (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_initiate_del_ike_sa_t *mp;
+ int ret;
+ u64 ispi;
+
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%lx", &ispi))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IKEV2_INITIATE_DEL_IKE_SA, mp);
+
+ mp->ispi = ispi;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_initiate_del_child_sa (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_initiate_del_child_sa_t *mp;
+ int ret;
+ u32 ispi;
+
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%x", &ispi))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IKEV2_INITIATE_DEL_CHILD_SA, mp);
+
+ mp->ispi = ispi;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ikev2_initiate_rekey_child_sa (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ikev2_initiate_rekey_child_sa_t *mp;
+ int ret;
+ u32 ispi;
+
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%x", &ispi))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IKEV2_INITIATE_REKEY_CHILD_SA, mp);
+
+ mp->ispi = ispi;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/*
+ * MAP
+ */
+static int
+api_map_add_domain (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_map_add_domain_t *mp;
+
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ u32 num_m_args = 0;
+ u32 ip6_prefix_len = 0, ip4_prefix_len = 0, ea_bits_len = 0, psid_offset =
+ 0, psid_length = 0;
+ u8 is_translation = 0;
+ u32 mtu = 0;
+ u32 ip6_src_len = 128;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ip4-pfx %U/%d", unformat_ip4_address,
+ &ip4_prefix, &ip4_prefix_len))
+ num_m_args++;
+ else if (unformat (i, "ip6-pfx %U/%d", unformat_ip6_address,
+ &ip6_prefix, &ip6_prefix_len))
+ num_m_args++;
+ else
+ if (unformat
+ (i, "ip6-src %U/%d", unformat_ip6_address, &ip6_src,
+ &ip6_src_len))
+ num_m_args++;
+ else if (unformat (i, "ip6-src %U", unformat_ip6_address, &ip6_src))
+ num_m_args++;
+ else if (unformat (i, "ea-bits-len %d", &ea_bits_len))
+ num_m_args++;
+ else if (unformat (i, "psid-offset %d", &psid_offset))
+ num_m_args++;
+ else if (unformat (i, "psid-len %d", &psid_length))
+ num_m_args++;
+ else if (unformat (i, "mtu %d", &mtu))
+ num_m_args++;
+ else if (unformat (i, "map-t"))
+ is_translation = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (num_m_args < 3)
+ {
+ errmsg ("mandatory argument(s) missing");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (MAP_ADD_DOMAIN, mp);
+
+ clib_memcpy (mp->ip4_prefix, &ip4_prefix, sizeof (ip4_prefix));
+ mp->ip4_prefix_len = ip4_prefix_len;
+
+ clib_memcpy (mp->ip6_prefix, &ip6_prefix, sizeof (ip6_prefix));
+ mp->ip6_prefix_len = ip6_prefix_len;
+
+ clib_memcpy (mp->ip6_src, &ip6_src, sizeof (ip6_src));
+ mp->ip6_src_prefix_len = ip6_src_len;
+
+ mp->ea_bits_len = ea_bits_len;
+ mp->psid_offset = psid_offset;
+ mp->psid_length = psid_length;
+ mp->is_translation = is_translation;
+ mp->mtu = htons (mtu);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_map_del_domain (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_map_del_domain_t *mp;
+
+ u32 num_m_args = 0;
+ u32 index;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "index %d", &index))
+ num_m_args++;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (num_m_args != 1)
+ {
+ errmsg ("mandatory argument(s) missing");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (MAP_DEL_DOMAIN, mp);
+
+ mp->index = ntohl (index);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_map_add_del_rule (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_map_add_del_rule_t *mp;
+ u8 is_add = 1;
+ ip6_address_t ip6_dst;
+ u32 num_m_args = 0, index, psid = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "index %d", &index))
+ num_m_args++;
+ else if (unformat (i, "psid %d", &psid))
+ num_m_args++;
+ else if (unformat (i, "dst %U", unformat_ip6_address, &ip6_dst))
+ num_m_args++;
+ else if (unformat (i, "del"))
+ {
+ is_add = 0;
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ /* Construct the API message */
+ M (MAP_ADD_DEL_RULE, mp);
+
+ mp->index = ntohl (index);
+ mp->is_add = is_add;
+ clib_memcpy (mp->ip6_dst, &ip6_dst, sizeof (ip6_dst));
+ mp->psid = ntohs (psid);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_map_domain_dump (vat_main_t * vam)
+{
+ vl_api_map_domain_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (MAP_DOMAIN_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_map_rule_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_map_rule_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 domain_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "index %u", &domain_index))
+ ;
+ else
+ break;
+ }
+
+ if (domain_index == ~0)
+ {
+ clib_warning ("parse error: domain index expected");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (MAP_RULE_DUMP, mp);
+
+ mp->domain_index = htonl (domain_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_map_add_domain_reply_t_handler
+ (vl_api_map_add_domain_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_map_add_domain_reply_t_handler_json
+ (vl_api_map_add_domain_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "index", ntohl (mp->index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static int
+api_get_first_msg_id (vat_main_t * vam)
+{
+ vl_api_get_first_msg_id_t *mp;
+ unformat_input_t *i = vam->input;
+ u8 *name;
+ u8 name_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "client %s", &name))
+ name_set = 1;
+ else
+ break;
+ }
+
+ if (name_set == 0)
+ {
+ errmsg ("missing client name");
+ return -99;
+ }
+ vec_add1 (name, 0);
+
+ if (vec_len (name) > 63)
+ {
+ errmsg ("client name too long");
+ return -99;
+ }
+
+ M (GET_FIRST_MSG_ID, mp);
+ clib_memcpy (mp->name, name, vec_len (name));
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_cop_interface_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_cop_interface_enable_disable_t *mp;
+ u32 sw_if_index = ~0;
+ u8 enable_disable = 1;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "disable"))
+ enable_disable = 0;
+ if (unformat (line_input, "enable"))
+ enable_disable = 1;
+ else if (unformat (line_input, "%U", api_unformat_sw_if_index,
+ vam, &sw_if_index))
+ ;
+ else if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (COP_INTERFACE_ENABLE_DISABLE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable_disable;
+
+ /* send it... */
+ S (mp);
+ /* Wait for the reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_cop_whitelist_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_cop_whitelist_enable_disable_t *mp;
+ u32 sw_if_index = ~0;
+ u8 ip4 = 0, ip6 = 0, default_cop = 0;
+ u32 fib_id = 0;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ip4"))
+ ip4 = 1;
+ else if (unformat (line_input, "ip6"))
+ ip6 = 1;
+ else if (unformat (line_input, "default"))
+ default_cop = 1;
+ else if (unformat (line_input, "%U", api_unformat_sw_if_index,
+ vam, &sw_if_index))
+ ;
+ else if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "fib-id %d", &fib_id))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (COP_WHITELIST_ENABLE_DISABLE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->fib_id = ntohl (fib_id);
+ mp->ip4 = ip4;
+ mp->ip6 = ip6;
+ mp->default_cop = default_cop;
+
+ /* send it... */
+ S (mp);
+ /* Wait for the reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_get_node_graph (vat_main_t * vam)
+{
+ vl_api_get_node_graph_t *mp;
+ int ret;
+
+ M (GET_NODE_GRAPH, mp);
+
+ /* send it... */
+ S (mp);
+ /* Wait for the reply */
+ W (ret);
+ return ret;
+}
+
+/* *INDENT-OFF* */
+/** Used for parsing LISP eids */
+typedef CLIB_PACKED(struct{
+ u8 addr[16]; /**< eid address */
+ u32 len; /**< prefix length if IP */
+ u8 type; /**< type of eid */
+}) lisp_eid_vat_t;
+/* *INDENT-ON* */
+
+static uword
+unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
+{
+ lisp_eid_vat_t *a = va_arg (*args, lisp_eid_vat_t *);
+
+ memset (a, 0, sizeof (a[0]));
+
+ if (unformat (input, "%U/%d", unformat_ip4_address, a->addr, &a->len))
+ {
+ a->type = 0; /* ipv4 type */
+ }
+ else if (unformat (input, "%U/%d", unformat_ip6_address, a->addr, &a->len))
+ {
+ a->type = 1; /* ipv6 type */
+ }
+ else if (unformat (input, "%U", unformat_ethernet_address, a->addr))
+ {
+ a->type = 2; /* mac type */
+ }
+ else if (unformat (input, "%U", unformat_nsh_address, a->addr))
+ {
+ a->type = 3; /* NSH type */
+ lisp_nsh_api_t *nsh = (lisp_nsh_api_t *) a->addr;
+ nsh->spi = clib_host_to_net_u32 (nsh->spi);
+ }
+ else
+ {
+ return 0;
+ }
+
+ if ((a->type == 0 && a->len > 32) || (a->type == 1 && a->len > 128))
+ {
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+lisp_eid_size_vat (u8 type)
+{
+ switch (type)
+ {
+ case 0:
+ return 4;
+ case 1:
+ return 16;
+ case 2:
+ return 6;
+ case 3:
+ return 5;
+ }
+ return 0;
+}
+
+static void
+lisp_eid_put_vat (u8 * dst, u8 eid[16], u8 type)
+{
+ clib_memcpy (dst, eid, lisp_eid_size_vat (type));
+}
+
+static int
+api_one_add_del_locator_set (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_locator_set_t *mp;
+ u8 is_add = 1;
+ u8 *locator_set_name = NULL;
+ u8 locator_set_name_set = 0;
+ vl_api_local_locator_t locator, *locators = 0;
+ u32 sw_if_index, priority, weight;
+ u32 data_len = 0;
+
+ int ret;
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "locator-set %s", &locator_set_name))
+ {
+ locator_set_name_set = 1;
+ }
+ else if (unformat (input, "sw_if_index %u p %u w %u",
+ &sw_if_index, &priority, &weight))
+ {
+ locator.sw_if_index = htonl (sw_if_index);
+ locator.priority = priority;
+ locator.weight = weight;
+ vec_add1 (locators, locator);
+ }
+ else
+ if (unformat
+ (input, "iface %U p %u w %u", api_unformat_sw_if_index, vam,
+ &sw_if_index, &priority, &weight))
+ {
+ locator.sw_if_index = htonl (sw_if_index);
+ locator.priority = priority;
+ locator.weight = weight;
+ vec_add1 (locators, locator);
+ }
+ else
+ break;
+ }
+
+ if (locator_set_name_set == 0)
+ {
+ errmsg ("missing locator-set name");
+ vec_free (locators);
+ return -99;
+ }
+
+ if (vec_len (locator_set_name) > 64)
+ {
+ errmsg ("locator-set name too long");
+ vec_free (locator_set_name);
+ vec_free (locators);
+ return -99;
+ }
+ vec_add1 (locator_set_name, 0);
+
+ data_len = sizeof (vl_api_local_locator_t) * vec_len (locators);
+
+ /* Construct the API message */
+ M2 (ONE_ADD_DEL_LOCATOR_SET, mp, data_len);
+
+ mp->is_add = is_add;
+ clib_memcpy (mp->locator_set_name, locator_set_name,
+ vec_len (locator_set_name));
+ vec_free (locator_set_name);
+
+ mp->locator_num = clib_host_to_net_u32 (vec_len (locators));
+ if (locators)
+ clib_memcpy (mp->locators, locators, data_len);
+ vec_free (locators);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_locator_set api_one_add_del_locator_set
+
+static int
+api_one_add_del_locator (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_locator_t *mp;
+ u32 tmp_if_index = ~0;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ u8 sw_if_index_if_name_set = 0;
+ u32 priority = ~0;
+ u8 priority_set = 0;
+ u32 weight = ~0;
+ u8 weight_set = 0;
+ u8 is_add = 1;
+ u8 *locator_set_name = NULL;
+ u8 locator_set_name_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "locator-set %s", &locator_set_name))
+ {
+ locator_set_name_set = 1;
+ }
+ else if (unformat (input, "iface %U", api_unformat_sw_if_index, vam,
+ &tmp_if_index))
+ {
+ sw_if_index_if_name_set = 1;
+ sw_if_index = tmp_if_index;
+ }
+ else if (unformat (input, "sw_if_index %d", &tmp_if_index))
+ {
+ sw_if_index_set = 1;
+ sw_if_index = tmp_if_index;
+ }
+ else if (unformat (input, "p %d", &priority))
+ {
+ priority_set = 1;
+ }
+ else if (unformat (input, "w %d", &weight))
+ {
+ weight_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (locator_set_name_set == 0)
+ {
+ errmsg ("missing locator-set name");
+ return -99;
+ }
+
+ if (sw_if_index_set == 0 && sw_if_index_if_name_set == 0)
+ {
+ errmsg ("missing sw_if_index");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ if (sw_if_index_set != 0 && sw_if_index_if_name_set != 0)
+ {
+ errmsg ("cannot use both params interface name and sw_if_index");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ if (priority_set == 0)
+ {
+ errmsg ("missing locator-set priority");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ if (weight_set == 0)
+ {
+ errmsg ("missing locator-set weight");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ if (vec_len (locator_set_name) > 64)
+ {
+ errmsg ("locator-set name too long");
+ vec_free (locator_set_name);
+ return -99;
+ }
+ vec_add1 (locator_set_name, 0);
+
+ /* Construct the API message */
+ M (ONE_ADD_DEL_LOCATOR, mp);
+
+ mp->is_add = is_add;
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->priority = priority;
+ mp->weight = weight;
+ clib_memcpy (mp->locator_set_name, locator_set_name,
+ vec_len (locator_set_name));
+ vec_free (locator_set_name);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_locator api_one_add_del_locator
+
+uword
+unformat_hmac_key_id (unformat_input_t * input, va_list * args)
+{
+ u32 *key_id = va_arg (*args, u32 *);
+ u8 *s = 0;
+
+ if (unformat (input, "%s", &s))
+ {
+ if (!strcmp ((char *) s, "sha1"))
+ key_id[0] = HMAC_SHA_1_96;
+ else if (!strcmp ((char *) s, "sha256"))
+ key_id[0] = HMAC_SHA_256_128;
+ else
+ {
+ clib_warning ("invalid key_id: '%s'", s);
+ key_id[0] = HMAC_NO_KEY;
+ }
+ }
+ else
+ return 0;
+
+ vec_free (s);
+ return 1;
+}
+
+static int
+api_one_add_del_local_eid (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_local_eid_t *mp;
+ u8 is_add = 1;
+ u8 eid_set = 0;
+ lisp_eid_vat_t _eid, *eid = &_eid;
+ u8 *locator_set_name = 0;
+ u8 locator_set_name_set = 0;
+ u32 vni = 0;
+ u16 key_id = 0;
+ u8 *key = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "vni %d", &vni))
+ {
+ ;
+ }
+ else if (unformat (input, "eid %U", unformat_lisp_eid_vat, eid))
+ {
+ eid_set = 1;
+ }
+ else if (unformat (input, "locator-set %s", &locator_set_name))
+ {
+ locator_set_name_set = 1;
+ }
+ else if (unformat (input, "key-id %U", unformat_hmac_key_id, &key_id))
+ ;
+ else if (unformat (input, "secret-key %_%v%_", &key))
+ ;
+ else
+ break;
+ }
+
+ if (locator_set_name_set == 0)
+ {
+ errmsg ("missing locator-set name");
+ return -99;
+ }
+
+ if (0 == eid_set)
+ {
+ errmsg ("EID address not set!");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ if (key && (0 == key_id))
+ {
+ errmsg ("invalid key_id!");
+ return -99;
+ }
+
+ if (vec_len (key) > 64)
+ {
+ errmsg ("key too long");
+ vec_free (key);
+ return -99;
+ }
+
+ if (vec_len (locator_set_name) > 64)
+ {
+ errmsg ("locator-set name too long");
+ vec_free (locator_set_name);
+ return -99;
+ }
+ vec_add1 (locator_set_name, 0);
+
+ /* Construct the API message */
+ M (ONE_ADD_DEL_LOCAL_EID, mp);
+
+ mp->is_add = is_add;
+ lisp_eid_put_vat (mp->eid, eid->addr, eid->type);
+ mp->eid_type = eid->type;
+ mp->prefix_len = eid->len;
+ mp->vni = clib_host_to_net_u32 (vni);
+ mp->key_id = clib_host_to_net_u16 (key_id);
+ clib_memcpy (mp->locator_set_name, locator_set_name,
+ vec_len (locator_set_name));
+ clib_memcpy (mp->key, key, vec_len (key));
+
+ vec_free (locator_set_name);
+ vec_free (key);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_local_eid api_one_add_del_local_eid
+
+static int
+api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam)
+{
+ u32 dp_table = 0, vni = 0;;
+ unformat_input_t *input = vam->input;
+ vl_api_gpe_add_del_fwd_entry_t *mp;
+ u8 is_add = 1;
+ lisp_eid_vat_t _rmt_eid, *rmt_eid = &_rmt_eid;
+ lisp_eid_vat_t _lcl_eid, *lcl_eid = &_lcl_eid;
+ u8 rmt_eid_set = 0, lcl_eid_set = 0;
+ u32 action = ~0, w;
+ ip4_address_t rmt_rloc4, lcl_rloc4;
+ ip6_address_t rmt_rloc6, lcl_rloc6;
+ vl_api_gpe_locator_t *rmt_locs = 0, *lcl_locs = 0, rloc, *curr_rloc = 0;
+ int ret;
+
+ memset (&rloc, 0, sizeof (rloc));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "reid %U", unformat_lisp_eid_vat, rmt_eid))
+ {
+ rmt_eid_set = 1;
+ }
+ else if (unformat (input, "leid %U", unformat_lisp_eid_vat, lcl_eid))
+ {
+ lcl_eid_set = 1;
+ }
+ else if (unformat (input, "vrf %d", &dp_table))
+ ;
+ else if (unformat (input, "bd %d", &dp_table))
+ ;
+ else if (unformat (input, "vni %d", &vni))
+ ;
+ else if (unformat (input, "w %d", &w))
+ {
+ if (!curr_rloc)
+ {
+ errmsg ("No RLOC configured for setting priority/weight!");
+ return -99;
+ }
+ curr_rloc->weight = w;
+ }
+ else if (unformat (input, "loc-pair %U %U", unformat_ip4_address,
+ &lcl_rloc4, unformat_ip4_address, &rmt_rloc4))
+ {
+ rloc.is_ip4 = 1;
+
+ clib_memcpy (&rloc.addr, &lcl_rloc4, sizeof (lcl_rloc4));
+ rloc.weight = 0;
+ vec_add1 (lcl_locs, rloc);
+
+ clib_memcpy (&rloc.addr, &rmt_rloc4, sizeof (rmt_rloc4));
+ vec_add1 (rmt_locs, rloc);
+ /* weight saved in rmt loc */
+ curr_rloc = &rmt_locs[vec_len (rmt_locs) - 1];
+ }
+ else if (unformat (input, "loc-pair %U %U", unformat_ip6_address,
+ &lcl_rloc6, unformat_ip6_address, &rmt_rloc6))
+ {
+ rloc.is_ip4 = 0;
+ clib_memcpy (&rloc.addr, &lcl_rloc6, sizeof (lcl_rloc6));
+ rloc.weight = 0;
+ vec_add1 (lcl_locs, rloc);
+
+ clib_memcpy (&rloc.addr, &rmt_rloc6, sizeof (rmt_rloc6));
+ vec_add1 (rmt_locs, rloc);
+ /* weight saved in rmt loc */
+ curr_rloc = &rmt_locs[vec_len (rmt_locs) - 1];
+ }
+ else if (unformat (input, "action %d", &action))
+ {
+ ;
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!rmt_eid_set)
+ {
+ errmsg ("remote eid addresses not set");
+ return -99;
+ }
+
+ if (lcl_eid_set && rmt_eid->type != lcl_eid->type)
+ {
+ errmsg ("eid types don't match");
+ return -99;
+ }
+
+ if (0 == rmt_locs && (u32) ~ 0 == action)
+ {
+ errmsg ("action not set for negative mapping");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2 (GPE_ADD_DEL_FWD_ENTRY, mp,
+ sizeof (vl_api_gpe_locator_t) * vec_len (rmt_locs) * 2);
+
+ mp->is_add = is_add;
+ lisp_eid_put_vat (mp->rmt_eid, rmt_eid->addr, rmt_eid->type);
+ lisp_eid_put_vat (mp->lcl_eid, lcl_eid->addr, lcl_eid->type);
+ mp->eid_type = rmt_eid->type;
+ mp->dp_table = clib_host_to_net_u32 (dp_table);
+ mp->vni = clib_host_to_net_u32 (vni);
+ mp->rmt_len = rmt_eid->len;
+ mp->lcl_len = lcl_eid->len;
+ mp->action = action;
+
+ if (0 != rmt_locs && 0 != lcl_locs)
+ {
+ mp->loc_num = clib_host_to_net_u32 (vec_len (rmt_locs) * 2);
+ clib_memcpy (mp->locs, lcl_locs,
+ (sizeof (vl_api_gpe_locator_t) * vec_len (lcl_locs)));
+
+ u32 offset = sizeof (vl_api_gpe_locator_t) * vec_len (lcl_locs);
+ clib_memcpy (((u8 *) mp->locs) + offset, rmt_locs,
+ (sizeof (vl_api_gpe_locator_t) * vec_len (rmt_locs)));
+ }
+ vec_free (lcl_locs);
+ vec_free (rmt_locs);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_add_del_map_server (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_map_server_t *mp;
+ u8 is_add = 1;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ ip4_address_t ipv4;
+ ip6_address_t ipv6;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "%U", unformat_ip4_address, &ipv4))
+ {
+ ipv4_set = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &ipv6))
+ {
+ ipv6_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both eid v4 and v6 addresses set");
+ return -99;
+ }
+
+ if (!ipv4_set && !ipv6_set)
+ {
+ errmsg ("eid addresses not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (ONE_ADD_DEL_MAP_SERVER, mp);
+
+ mp->is_add = is_add;
+ if (ipv6_set)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->ip_address, &ipv6, sizeof (ipv6));
+ }
+ else
+ {
+ mp->is_ipv6 = 0;
+ clib_memcpy (mp->ip_address, &ipv4, sizeof (ipv4));
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_map_server api_one_add_del_map_server
+
+static int
+api_one_add_del_map_resolver (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_map_resolver_t *mp;
+ u8 is_add = 1;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ ip4_address_t ipv4;
+ ip6_address_t ipv6;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "%U", unformat_ip4_address, &ipv4))
+ {
+ ipv4_set = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &ipv6))
+ {
+ ipv6_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both eid v4 and v6 addresses set");
+ return -99;
+ }
+
+ if (!ipv4_set && !ipv6_set)
+ {
+ errmsg ("eid addresses not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (ONE_ADD_DEL_MAP_RESOLVER, mp);
+
+ mp->is_add = is_add;
+ if (ipv6_set)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->ip_address, &ipv6, sizeof (ipv6));
+ }
+ else
+ {
+ mp->is_ipv6 = 0;
+ clib_memcpy (mp->ip_address, &ipv4, sizeof (ipv4));
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_map_resolver api_one_add_del_map_resolver
+
+static int
+api_lisp_gpe_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_gpe_enable_disable_t *mp;
+ u8 is_set = 0;
+ u8 is_en = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ {
+ is_set = 1;
+ is_en = 1;
+ }
+ else if (unformat (input, "disable"))
+ {
+ is_set = 1;
+ is_en = 0;
+ }
+ else
+ break;
+ }
+
+ if (is_set == 0)
+ {
+ errmsg ("Value not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (GPE_ENABLE_DISABLE, mp);
+
+ mp->is_en = is_en;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_rloc_probe_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_rloc_probe_enable_disable_t *mp;
+ u8 is_set = 0;
+ u8 is_en = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ {
+ is_set = 1;
+ is_en = 1;
+ }
+ else if (unformat (input, "disable"))
+ is_set = 1;
+ else
+ break;
+ }
+
+ if (!is_set)
+ {
+ errmsg ("Value not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (ONE_RLOC_PROBE_ENABLE_DISABLE, mp);
+
+ mp->is_enabled = is_en;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_rloc_probe_enable_disable api_one_rloc_probe_enable_disable
+
+static int
+api_one_map_register_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_map_register_enable_disable_t *mp;
+ u8 is_set = 0;
+ u8 is_en = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ {
+ is_set = 1;
+ is_en = 1;
+ }
+ else if (unformat (input, "disable"))
+ is_set = 1;
+ else
+ break;
+ }
+
+ if (!is_set)
+ {
+ errmsg ("Value not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (ONE_MAP_REGISTER_ENABLE_DISABLE, mp);
+
+ mp->is_enabled = is_en;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_map_register_enable_disable api_one_map_register_enable_disable
+
+static int
+api_one_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_enable_disable_t *mp;
+ u8 is_set = 0;
+ u8 is_en = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ {
+ is_set = 1;
+ is_en = 1;
+ }
+ else if (unformat (input, "disable"))
+ {
+ is_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (!is_set)
+ {
+ errmsg ("Value not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (ONE_ENABLE_DISABLE, mp);
+
+ mp->is_en = is_en;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_enable_disable api_one_enable_disable
+
+static int
+api_show_one_map_register_state (vat_main_t * vam)
+{
+ vl_api_show_one_map_register_state_t *mp;
+ int ret;
+
+ M (SHOW_ONE_MAP_REGISTER_STATE, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_map_register_state api_show_one_map_register_state
+
+static int
+api_show_one_rloc_probe_state (vat_main_t * vam)
+{
+ vl_api_show_one_rloc_probe_state_t *mp;
+ int ret;
+
+ M (SHOW_ONE_RLOC_PROBE_STATE, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_rloc_probe_state api_show_one_rloc_probe_state
+
+static int
+api_one_add_del_ndp_entry (vat_main_t * vam)
+{
+ vl_api_one_add_del_ndp_entry_t *mp;
+ unformat_input_t *input = vam->input;
+ u8 is_add = 1;
+ u8 mac_set = 0;
+ u8 bd_set = 0;
+ u8 ip_set = 0;
+ u8 mac[6] = { 0, };
+ u8 ip6[16] = { 0, };
+ u32 bd = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "mac %U", unformat_ethernet_address, mac))
+ mac_set = 1;
+ else if (unformat (input, "ip %U", unformat_ip6_address, ip6))
+ ip_set = 1;
+ else if (unformat (input, "bd %d", &bd))
+ bd_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!bd_set || !ip_set || (!mac_set && is_add))
+ {
+ errmsg ("Missing BD, IP or MAC!");
+ return -99;
+ }
+
+ M (ONE_ADD_DEL_NDP_ENTRY, mp);
+ mp->is_add = is_add;
+ clib_memcpy (mp->mac, mac, 6);
+ mp->bd = clib_host_to_net_u32 (bd);
+ clib_memcpy (mp->ip6, ip6, sizeof (mp->ip6));
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_add_del_l2_arp_entry (vat_main_t * vam)
+{
+ vl_api_one_add_del_l2_arp_entry_t *mp;
+ unformat_input_t *input = vam->input;
+ u8 is_add = 1;
+ u8 mac_set = 0;
+ u8 bd_set = 0;
+ u8 ip_set = 0;
+ u8 mac[6] = { 0, };
+ u32 ip4 = 0, bd = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "mac %U", unformat_ethernet_address, mac))
+ mac_set = 1;
+ else if (unformat (input, "ip %U", unformat_ip4_address, &ip4))
+ ip_set = 1;
+ else if (unformat (input, "bd %d", &bd))
+ bd_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!bd_set || !ip_set || (!mac_set && is_add))
+ {
+ errmsg ("Missing BD, IP or MAC!");
+ return -99;
+ }
+
+ M (ONE_ADD_DEL_L2_ARP_ENTRY, mp);
+ mp->is_add = is_add;
+ clib_memcpy (mp->mac, mac, 6);
+ mp->bd = clib_host_to_net_u32 (bd);
+ mp->ip4 = ip4;
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_ndp_bd_get (vat_main_t * vam)
+{
+ vl_api_one_ndp_bd_get_t *mp;
+ int ret;
+
+ M (ONE_NDP_BD_GET, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_ndp_entries_get (vat_main_t * vam)
+{
+ vl_api_one_ndp_entries_get_t *mp;
+ unformat_input_t *input = vam->input;
+ u8 bd_set = 0;
+ u32 bd = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "bd %d", &bd))
+ bd_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!bd_set)
+ {
+ errmsg ("Expected bridge domain!");
+ return -99;
+ }
+
+ M (ONE_NDP_ENTRIES_GET, mp);
+ mp->bd = clib_host_to_net_u32 (bd);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_l2_arp_bd_get (vat_main_t * vam)
+{
+ vl_api_one_l2_arp_bd_get_t *mp;
+ int ret;
+
+ M (ONE_L2_ARP_BD_GET, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_l2_arp_entries_get (vat_main_t * vam)
+{
+ vl_api_one_l2_arp_entries_get_t *mp;
+ unformat_input_t *input = vam->input;
+ u8 bd_set = 0;
+ u32 bd = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "bd %d", &bd))
+ bd_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!bd_set)
+ {
+ errmsg ("Expected bridge domain!");
+ return -99;
+ }
+
+ M (ONE_L2_ARP_ENTRIES_GET, mp);
+ mp->bd = clib_host_to_net_u32 (bd);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_stats_enable_disable (vat_main_t * vam)
+{
+ vl_api_one_stats_enable_disable_t *mp;
+ unformat_input_t *input = vam->input;
+ u8 is_set = 0;
+ u8 is_en = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ {
+ is_set = 1;
+ is_en = 1;
+ }
+ else if (unformat (input, "disable"))
+ {
+ is_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (!is_set)
+ {
+ errmsg ("Value not set");
+ return -99;
+ }
+
+ M (ONE_STATS_ENABLE_DISABLE, mp);
+ mp->is_en = is_en;
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_stats_enable_disable (vat_main_t * vam)
+{
+ vl_api_show_one_stats_enable_disable_t *mp;
+ int ret;
+
+ M (SHOW_ONE_STATS_ENABLE_DISABLE, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_map_request_mode (vat_main_t * vam)
+{
+ vl_api_show_one_map_request_mode_t *mp;
+ int ret;
+
+ M (SHOW_ONE_MAP_REQUEST_MODE, mp);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_map_request_mode api_show_one_map_request_mode
+
+static int
+api_one_map_request_mode (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_map_request_mode_t *mp;
+ u8 mode = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dst-only"))
+ mode = 0;
+ else if (unformat (input, "src-dst"))
+ mode = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ M (ONE_MAP_REQUEST_MODE, mp);
+
+ mp->mode = mode;
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_map_request_mode api_one_map_request_mode
+
+/**
+ * Enable/disable ONE proxy ITR.
+ *
+ * @param vam vpp API test context
+ * @return return code
+ */
+static int
+api_one_pitr_set_locator_set (vat_main_t * vam)
+{
+ u8 ls_name_set = 0;
+ unformat_input_t *input = vam->input;
+ vl_api_one_pitr_set_locator_set_t *mp;
+ u8 is_add = 1;
+ u8 *ls_name = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "locator-set %s", &ls_name))
+ ls_name_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!ls_name_set)
+ {
+ errmsg ("locator-set name not set!");
+ return -99;
+ }
+
+ M (ONE_PITR_SET_LOCATOR_SET, mp);
+
+ mp->is_add = is_add;
+ clib_memcpy (mp->ls_name, ls_name, vec_len (ls_name));
+ vec_free (ls_name);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_pitr_set_locator_set api_one_pitr_set_locator_set
+
+static int
+api_one_nsh_set_locator_set (vat_main_t * vam)
+{
+ u8 ls_name_set = 0;
+ unformat_input_t *input = vam->input;
+ vl_api_one_nsh_set_locator_set_t *mp;
+ u8 is_add = 1;
+ u8 *ls_name = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "ls %s", &ls_name))
+ ls_name_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!ls_name_set && is_add)
+ {
+ errmsg ("locator-set name not set!");
+ return -99;
+ }
+
+ M (ONE_NSH_SET_LOCATOR_SET, mp);
+
+ mp->is_add = is_add;
+ clib_memcpy (mp->ls_name, ls_name, vec_len (ls_name));
+ vec_free (ls_name);
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_pitr (vat_main_t * vam)
+{
+ vl_api_show_one_pitr_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "lisp status:");
+ }
+
+ M (SHOW_ONE_PITR, mp);
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_pitr api_show_one_pitr
+
+static int
+api_one_use_petr (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_use_petr_t *mp;
+ u8 is_add = 0;
+ ip_address_t ip;
+ int ret;
+
+ memset (&ip, 0, sizeof (ip));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ is_add = 0;
+ else
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (&ip)))
+ {
+ is_add = 1;
+ ip_addr_version (&ip) = IP4;
+ }
+ else
+ if (unformat (input, "%U", unformat_ip6_address, &ip_addr_v6 (&ip)))
+ {
+ is_add = 1;
+ ip_addr_version (&ip) = IP6;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ M (ONE_USE_PETR, mp);
+
+ mp->is_add = is_add;
+ if (is_add)
+ {
+ mp->is_ip4 = ip_addr_version (&ip) == IP4 ? 1 : 0;
+ if (mp->is_ip4)
+ clib_memcpy (mp->address, &ip, 4);
+ else
+ clib_memcpy (mp->address, &ip, 16);
+ }
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_use_petr api_one_use_petr
+
+static int
+api_show_one_nsh_mapping (vat_main_t * vam)
+{
+ vl_api_show_one_use_petr_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "local ONE NSH mapping:");
+ }
+
+ M (SHOW_ONE_NSH_MAPPING, mp);
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_use_petr (vat_main_t * vam)
+{
+ vl_api_show_one_use_petr_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "Proxy-ETR status:");
+ }
+
+ M (SHOW_ONE_USE_PETR, mp);
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_use_petr api_show_one_use_petr
+
+/**
+ * Add/delete mapping between vni and vrf
+ */
+static int
+api_one_eid_table_add_del_map (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_eid_table_add_del_map_t *mp;
+ u8 is_add = 1, vni_set = 0, vrf_set = 0, bd_index_set = 0;
+ u32 vni, vrf, bd_index;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "vrf %d", &vrf))
+ vrf_set = 1;
+ else if (unformat (input, "bd_index %d", &bd_index))
+ bd_index_set = 1;
+ else if (unformat (input, "vni %d", &vni))
+ vni_set = 1;
+ else
+ break;
+ }
+
+ if (!vni_set || (!vrf_set && !bd_index_set))
+ {
+ errmsg ("missing arguments!");
+ return -99;
+ }
+
+ if (vrf_set && bd_index_set)
+ {
+ errmsg ("error: both vrf and bd entered!");
+ return -99;
+ }
+
+ M (ONE_EID_TABLE_ADD_DEL_MAP, mp);
+
+ mp->is_add = is_add;
+ mp->vni = htonl (vni);
+ mp->dp_table = vrf_set ? htonl (vrf) : htonl (bd_index);
+ mp->is_l2 = bd_index_set;
+
+ /* send */
+ S (mp);
+
+ /* wait for reply */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_eid_table_add_del_map api_one_eid_table_add_del_map
+
+uword
+unformat_negative_mapping_action (unformat_input_t * input, va_list * args)
+{
+ u32 *action = va_arg (*args, u32 *);
+ u8 *s = 0;
+
+ if (unformat (input, "%s", &s))
+ {
+ if (!strcmp ((char *) s, "no-action"))
+ action[0] = 0;
+ else if (!strcmp ((char *) s, "natively-forward"))
+ action[0] = 1;
+ else if (!strcmp ((char *) s, "send-map-request"))
+ action[0] = 2;
+ else if (!strcmp ((char *) s, "drop"))
+ action[0] = 3;
+ else
+ {
+ clib_warning ("invalid action: '%s'", s);
+ action[0] = 3;
+ }
+ }
+ else
+ return 0;
+
+ vec_free (s);
+ return 1;
+}
+
+/**
+ * Add/del remote mapping to/from ONE control plane
+ *
+ * @param vam vpp API test context
+ * @return return code
+ */
+static int
+api_one_add_del_remote_mapping (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_remote_mapping_t *mp;
+ u32 vni = 0;
+ lisp_eid_vat_t _eid, *eid = &_eid;
+ lisp_eid_vat_t _seid, *seid = &_seid;
+ u8 is_add = 1, del_all = 0, eid_set = 0, seid_set = 0;
+ u32 action = ~0, p, w, data_len;
+ ip4_address_t rloc4;
+ ip6_address_t rloc6;
+ vl_api_remote_locator_t *rlocs = 0, rloc, *curr_rloc = 0;
+ int ret;
+
+ memset (&rloc, 0, sizeof (rloc));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del-all"))
+ {
+ del_all = 1;
+ }
+ else if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "add"))
+ {
+ is_add = 1;
+ }
+ else if (unformat (input, "eid %U", unformat_lisp_eid_vat, eid))
+ {
+ eid_set = 1;
+ }
+ else if (unformat (input, "seid %U", unformat_lisp_eid_vat, seid))
+ {
+ seid_set = 1;
+ }
+ else if (unformat (input, "vni %d", &vni))
+ {
+ ;
+ }
+ else if (unformat (input, "p %d w %d", &p, &w))
+ {
+ if (!curr_rloc)
+ {
+ errmsg ("No RLOC configured for setting priority/weight!");
+ return -99;
+ }
+ curr_rloc->priority = p;
+ curr_rloc->weight = w;
+ }
+ else if (unformat (input, "rloc %U", unformat_ip4_address, &rloc4))
+ {
+ rloc.is_ip4 = 1;
+ clib_memcpy (&rloc.addr, &rloc4, sizeof (rloc4));
+ vec_add1 (rlocs, rloc);
+ curr_rloc = &rlocs[vec_len (rlocs) - 1];
+ }
+ else if (unformat (input, "rloc %U", unformat_ip6_address, &rloc6))
+ {
+ rloc.is_ip4 = 0;
+ clib_memcpy (&rloc.addr, &rloc6, sizeof (rloc6));
+ vec_add1 (rlocs, rloc);
+ curr_rloc = &rlocs[vec_len (rlocs) - 1];
+ }
+ else if (unformat (input, "action %U",
+ unformat_negative_mapping_action, &action))
+ {
+ ;
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (0 == eid_set)
+ {
+ errmsg ("missing params!");
+ return -99;
+ }
+
+ if (is_add && (~0 == action) && 0 == vec_len (rlocs))
+ {
+ errmsg ("no action set for negative map-reply!");
+ return -99;
+ }
+
+ data_len = vec_len (rlocs) * sizeof (vl_api_remote_locator_t);
+
+ M2 (ONE_ADD_DEL_REMOTE_MAPPING, mp, data_len);
+ mp->is_add = is_add;
+ mp->vni = htonl (vni);
+ mp->action = (u8) action;
+ mp->is_src_dst = seid_set;
+ mp->eid_len = eid->len;
+ mp->seid_len = seid->len;
+ mp->del_all = del_all;
+ mp->eid_type = eid->type;
+ lisp_eid_put_vat (mp->eid, eid->addr, eid->type);
+ lisp_eid_put_vat (mp->seid, seid->addr, seid->type);
+
+ mp->rloc_num = clib_host_to_net_u32 (vec_len (rlocs));
+ clib_memcpy (mp->rlocs, rlocs, data_len);
+ vec_free (rlocs);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_remote_mapping api_one_add_del_remote_mapping
+
+/**
+ * Add/del ONE adjacency. Saves mapping in ONE control plane and updates
+ * forwarding entries in data-plane accordingly.
+ *
+ * @param vam vpp API test context
+ * @return return code
+ */
+static int
+api_one_add_del_adjacency (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_adjacency_t *mp;
+ u32 vni = 0;
+ ip4_address_t leid4, reid4;
+ ip6_address_t leid6, reid6;
+ u8 reid_mac[6] = { 0 };
+ u8 leid_mac[6] = { 0 };
+ u8 reid_type, leid_type;
+ u32 leid_len = 0, reid_len = 0, len;
+ u8 is_add = 1;
+ int ret;
+
+ leid_type = reid_type = (u8) ~ 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "add"))
+ {
+ is_add = 1;
+ }
+ else if (unformat (input, "reid %U/%d", unformat_ip4_address,
+ &reid4, &len))
+ {
+ reid_type = 0; /* ipv4 */
+ reid_len = len;
+ }
+ else if (unformat (input, "reid %U/%d", unformat_ip6_address,
+ &reid6, &len))
+ {
+ reid_type = 1; /* ipv6 */
+ reid_len = len;
+ }
+ else if (unformat (input, "reid %U", unformat_ethernet_address,
+ reid_mac))
+ {
+ reid_type = 2; /* mac */
+ }
+ else if (unformat (input, "leid %U/%d", unformat_ip4_address,
+ &leid4, &len))
+ {
+ leid_type = 0; /* ipv4 */
+ leid_len = len;
+ }
+ else if (unformat (input, "leid %U/%d", unformat_ip6_address,
+ &leid6, &len))
+ {
+ leid_type = 1; /* ipv6 */
+ leid_len = len;
+ }
+ else if (unformat (input, "leid %U", unformat_ethernet_address,
+ leid_mac))
+ {
+ leid_type = 2; /* mac */
+ }
+ else if (unformat (input, "vni %d", &vni))
+ {
+ ;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if ((u8) ~ 0 == reid_type)
+ {
+ errmsg ("missing params!");
+ return -99;
+ }
+
+ if (leid_type != reid_type)
+ {
+ errmsg ("remote and local EIDs are of different types!");
+ return -99;
+ }
+
+ M (ONE_ADD_DEL_ADJACENCY, mp);
+ mp->is_add = is_add;
+ mp->vni = htonl (vni);
+ mp->leid_len = leid_len;
+ mp->reid_len = reid_len;
+ mp->eid_type = reid_type;
+
+ switch (mp->eid_type)
+ {
+ case 0:
+ clib_memcpy (mp->leid, &leid4, sizeof (leid4));
+ clib_memcpy (mp->reid, &reid4, sizeof (reid4));
+ break;
+ case 1:
+ clib_memcpy (mp->leid, &leid6, sizeof (leid6));
+ clib_memcpy (mp->reid, &reid6, sizeof (reid6));
+ break;
+ case 2:
+ clib_memcpy (mp->leid, leid_mac, 6);
+ clib_memcpy (mp->reid, reid_mac, 6);
+ break;
+ default:
+ errmsg ("unknown EID type %d!", mp->eid_type);
+ return 0;
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_adjacency api_one_add_del_adjacency
+
+uword
+unformat_gpe_encap_mode (unformat_input_t * input, va_list * args)
+{
+ u32 *mode = va_arg (*args, u32 *);
+
+ if (unformat (input, "lisp"))
+ *mode = 0;
+ else if (unformat (input, "vxlan"))
+ *mode = 1;
+ else
+ return 0;
+
+ return 1;
+}
+
+static int
+api_gpe_get_encap_mode (vat_main_t * vam)
+{
+ vl_api_gpe_get_encap_mode_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (GPE_GET_ENCAP_MODE, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_gpe_set_encap_mode (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_gpe_set_encap_mode_t *mp;
+ int ret;
+ u32 mode = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_gpe_encap_mode, &mode))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (GPE_SET_ENCAP_MODE, mp);
+
+ mp->mode = mode;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_lisp_gpe_add_del_iface (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_gpe_add_del_iface_t *mp;
+ u8 action_set = 0, is_add = 1, is_l2 = 0, dp_table_set = 0, vni_set = 0;
+ u32 dp_table = 0, vni = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "up"))
+ {
+ action_set = 1;
+ is_add = 1;
+ }
+ else if (unformat (input, "down"))
+ {
+ action_set = 1;
+ is_add = 0;
+ }
+ else if (unformat (input, "table_id %d", &dp_table))
+ {
+ dp_table_set = 1;
+ }
+ else if (unformat (input, "bd_id %d", &dp_table))
+ {
+ dp_table_set = 1;
+ is_l2 = 1;
+ }
+ else if (unformat (input, "vni %d", &vni))
+ {
+ vni_set = 1;
+ }
+ else
+ break;
+ }
+
+ if (action_set == 0)
+ {
+ errmsg ("Action not set");
+ return -99;
+ }
+ if (dp_table_set == 0 || vni_set == 0)
+ {
+ errmsg ("vni and dp_table must be set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (GPE_ADD_DEL_IFACE, mp);
+
+ mp->is_add = is_add;
+ mp->dp_table = clib_host_to_net_u32 (dp_table);
+ mp->is_l2 = is_l2;
+ mp->vni = clib_host_to_net_u32 (vni);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_map_register_fallback_threshold (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_map_register_fallback_threshold_t *mp;
+ u32 value = 0;
+ u8 is_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%u", &value))
+ is_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!is_set)
+ {
+ errmsg ("fallback threshold value is missing!");
+ return -99;
+ }
+
+ M (ONE_MAP_REGISTER_FALLBACK_THRESHOLD, mp);
+ mp->value = clib_host_to_net_u32 (value);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_map_register_fallback_threshold (vat_main_t * vam)
+{
+ vl_api_show_one_map_register_fallback_threshold_t *mp;
+ int ret;
+
+ M (SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+uword
+unformat_lisp_transport_protocol (unformat_input_t * input, va_list * args)
+{
+ u32 *proto = va_arg (*args, u32 *);
+
+ if (unformat (input, "udp"))
+ *proto = 1;
+ else if (unformat (input, "api"))
+ *proto = 2;
+ else
+ return 0;
+
+ return 1;
+}
+
+static int
+api_one_set_transport_protocol (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_set_transport_protocol_t *mp;
+ u8 is_set = 0;
+ u32 protocol = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_lisp_transport_protocol, &protocol))
+ is_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!is_set)
+ {
+ errmsg ("Transport protocol missing!");
+ return -99;
+ }
+
+ M (ONE_SET_TRANSPORT_PROTOCOL, mp);
+ mp->protocol = (u8) protocol;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_get_transport_protocol (vat_main_t * vam)
+{
+ vl_api_one_get_transport_protocol_t *mp;
+ int ret;
+
+ M (ONE_GET_TRANSPORT_PROTOCOL, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_map_register_set_ttl (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_map_register_set_ttl_t *mp;
+ u32 ttl = 0;
+ u8 is_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%u", &ttl))
+ is_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!is_set)
+ {
+ errmsg ("TTL value missing!");
+ return -99;
+ }
+
+ M (ONE_MAP_REGISTER_SET_TTL, mp);
+ mp->ttl = clib_host_to_net_u32 (ttl);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_map_register_ttl (vat_main_t * vam)
+{
+ vl_api_show_one_map_register_ttl_t *mp;
+ int ret;
+
+ M (SHOW_ONE_MAP_REGISTER_TTL, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+/**
+ * Add/del map request itr rlocs from ONE control plane and updates
+ *
+ * @param vam vpp API test context
+ * @return return code
+ */
+static int
+api_one_add_del_map_request_itr_rlocs (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_add_del_map_request_itr_rlocs_t *mp;
+ u8 *locator_set_name = 0;
+ u8 locator_set_name_set = 0;
+ u8 is_add = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (input, "%_%v%_", &locator_set_name))
+ {
+ locator_set_name_set = 1;
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (is_add && !locator_set_name_set)
+ {
+ errmsg ("itr-rloc is not set!");
+ return -99;
+ }
+
+ if (is_add && vec_len (locator_set_name) > 64)
+ {
+ errmsg ("itr-rloc locator-set name too long");
+ vec_free (locator_set_name);
+ return -99;
+ }
+
+ M (ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS, mp);
+ mp->is_add = is_add;
+ if (is_add)
+ {
+ clib_memcpy (mp->locator_set_name, locator_set_name,
+ vec_len (locator_set_name));
+ }
+ else
+ {
+ memset (mp->locator_set_name, 0, sizeof (mp->locator_set_name));
+ }
+ vec_free (locator_set_name);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_add_del_map_request_itr_rlocs api_one_add_del_map_request_itr_rlocs
+
+static int
+api_one_locator_dump (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_one_locator_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 is_index_set = 0, is_name_set = 0;
+ u8 *ls_name = 0;
+ u32 ls_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ls_name %_%v%_", &ls_name))
+ {
+ is_name_set = 1;
+ }
+ else if (unformat (input, "ls_index %d", &ls_index))
+ {
+ is_index_set = 1;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!is_index_set && !is_name_set)
+ {
+ errmsg ("error: expected one of index or name!");
+ return -99;
+ }
+
+ if (is_index_set && is_name_set)
+ {
+ errmsg ("error: only one param expected!");
+ return -99;
+ }
+
+ if (vec_len (ls_name) > 62)
+ {
+ errmsg ("error: locator set name too long!");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=16s%=16s%=16s", "locator", "priority", "weight");
+ }
+
+ M (ONE_LOCATOR_DUMP, mp);
+ mp->is_index_set = is_index_set;
+
+ if (is_index_set)
+ mp->ls_index = clib_host_to_net_u32 (ls_index);
+ else
+ {
+ vec_add1 (ls_name, 0);
+ strncpy ((char *) mp->ls_name, (char *) ls_name,
+ sizeof (mp->ls_name) - 1);
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_locator_dump api_one_locator_dump
+
+static int
+api_one_locator_set_dump (vat_main_t * vam)
+{
+ vl_api_one_locator_set_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ unformat_input_t *input = vam->input;
+ u8 filter = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "local"))
+ {
+ filter = 1;
+ }
+ else if (unformat (input, "remote"))
+ {
+ filter = 2;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=10s%=15s", "ls_index", "ls_name");
+ }
+
+ M (ONE_LOCATOR_SET_DUMP, mp);
+
+ mp->filter = filter;
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_locator_set_dump api_one_locator_set_dump
+
+static int
+api_one_eid_table_map_dump (vat_main_t * vam)
+{
+ u8 is_l2 = 0;
+ u8 mode_set = 0;
+ unformat_input_t *input = vam->input;
+ vl_api_one_eid_table_map_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "l2"))
+ {
+ is_l2 = 1;
+ mode_set = 1;
+ }
+ else if (unformat (input, "l3"))
+ {
+ is_l2 = 0;
+ mode_set = 1;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, input);
+ return -99;
+ }
+ }
+
+ if (!mode_set)
+ {
+ errmsg ("expected one of 'l2' or 'l3' parameter!");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
+ }
+
+ M (ONE_EID_TABLE_MAP_DUMP, mp);
+ mp->is_l2 = is_l2;
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_eid_table_map_dump api_one_eid_table_map_dump
+
+static int
+api_one_eid_table_vni_dump (vat_main_t * vam)
+{
+ vl_api_one_eid_table_vni_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "VNI");
+ }
+
+ M (ONE_EID_TABLE_VNI_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_eid_table_vni_dump api_one_eid_table_vni_dump
+
+static int
+api_one_eid_table_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_one_eid_table_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ u8 mac[6];
+ u8 eid_type = ~0, eid_set = 0;
+ u32 prefix_length = ~0, t, vni = 0;
+ u8 filter = 0;
+ int ret;
+ lisp_nsh_api_t nsh;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "eid %U/%d", unformat_ip4_address, &ip4, &t))
+ {
+ eid_set = 1;
+ eid_type = 0;
+ prefix_length = t;
+ }
+ else if (unformat (i, "eid %U/%d", unformat_ip6_address, &ip6, &t))
+ {
+ eid_set = 1;
+ eid_type = 1;
+ prefix_length = t;
+ }
+ else if (unformat (i, "eid %U", unformat_ethernet_address, mac))
+ {
+ eid_set = 1;
+ eid_type = 2;
+ }
+ else if (unformat (i, "eid %U", unformat_nsh_address, &nsh))
+ {
+ eid_set = 1;
+ eid_type = 3;
+ }
+ else if (unformat (i, "vni %d", &t))
+ {
+ vni = t;
+ }
+ else if (unformat (i, "local"))
+ {
+ filter = 1;
+ }
+ else if (unformat (i, "remote"))
+ {
+ filter = 2;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%-35s%-20s%-30s%-20s%-20s%-10s%-20s", "EID",
+ "type", "ls_index", "ttl", "authoritative", "key_id", "key");
+ }
+
+ M (ONE_EID_TABLE_DUMP, mp);
+
+ mp->filter = filter;
+ if (eid_set)
+ {
+ mp->eid_set = 1;
+ mp->vni = htonl (vni);
+ mp->eid_type = eid_type;
+ switch (eid_type)
+ {
+ case 0:
+ mp->prefix_length = prefix_length;
+ clib_memcpy (mp->eid, &ip4, sizeof (ip4));
+ break;
+ case 1:
+ mp->prefix_length = prefix_length;
+ clib_memcpy (mp->eid, &ip6, sizeof (ip6));
+ break;
+ case 2:
+ clib_memcpy (mp->eid, mac, sizeof (mac));
+ break;
+ case 3:
+ clib_memcpy (mp->eid, &nsh, sizeof (nsh));
+ break;
+ default:
+ errmsg ("unknown EID type %d!", eid_type);
+ return -99;
+ }
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_eid_table_dump api_one_eid_table_dump
+
+static int
+api_lisp_gpe_fwd_entries_get (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gpe_fwd_entries_get_t *mp;
+ u8 vni_set = 0;
+ u32 vni = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vni %d", &vni))
+ {
+ vni_set = 1;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vni_set)
+ {
+ errmsg ("vni not set!");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%10s %10s %s %40s", "fwd_index", "dp_table",
+ "leid", "reid");
+ }
+
+ M (GPE_FWD_ENTRIES_GET, mp);
+ mp->vni = clib_host_to_net_u32 (vni);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define vl_api_gpe_native_fwd_rpaths_get_reply_t_endian vl_noop_handler
+#define vl_api_gpe_native_fwd_rpaths_get_reply_t_print vl_noop_handler
+#define vl_api_gpe_fwd_entry_vnis_get_reply_t_endian vl_noop_handler
+#define vl_api_gpe_fwd_entry_vnis_get_reply_t_print vl_noop_handler
+#define vl_api_gpe_fwd_entries_get_reply_t_endian vl_noop_handler
+#define vl_api_gpe_fwd_entries_get_reply_t_print vl_noop_handler
+#define vl_api_gpe_fwd_entry_path_details_t_endian vl_noop_handler
+#define vl_api_gpe_fwd_entry_path_details_t_print vl_noop_handler
+
+static int
+api_one_adjacencies_get (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_one_adjacencies_get_t *mp;
+ u8 vni_set = 0;
+ u32 vni = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vni %d", &vni))
+ {
+ vni_set = 1;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!vni_set)
+ {
+ errmsg ("vni not set!");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%s %40s", "leid", "reid");
+ }
+
+ M (ONE_ADJACENCIES_GET, mp);
+ mp->vni = clib_host_to_net_u32 (vni);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_adjacencies_get api_one_adjacencies_get
+
+static int
+api_gpe_native_fwd_rpaths_get (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gpe_native_fwd_rpaths_get_t *mp;
+ int ret;
+ u8 ip_family_set = 0, is_ip4 = 1;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ip4"))
+ {
+ ip_family_set = 1;
+ is_ip4 = 1;
+ }
+ else if (unformat (i, "ip6"))
+ {
+ ip_family_set = 1;
+ is_ip4 = 0;
+ }
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!ip_family_set)
+ {
+ errmsg ("ip family not set!");
+ return -99;
+ }
+
+ M (GPE_NATIVE_FWD_RPATHS_GET, mp);
+ mp->is_ip4 = is_ip4;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_gpe_fwd_entry_vnis_get (vat_main_t * vam)
+{
+ vl_api_gpe_fwd_entry_vnis_get_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "VNIs");
+ }
+
+ M (GPE_FWD_ENTRY_VNIS_GET, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_gpe_add_del_native_fwd_rpath (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gpe_add_del_native_fwd_rpath_t *mp;
+ int ret = 0;
+ u8 is_add = 1, ip_set = 0, is_ip4 = 1;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ u32 table_id = 0, nh_sw_if_index = ~0;
+
+ memset (&ip4, 0, sizeof (ip4));
+ memset (&ip6, 0, sizeof (ip6));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "via %U %U", unformat_ip4_address, &ip4,
+ api_unformat_sw_if_index, vam, &nh_sw_if_index))
+ {
+ ip_set = 1;
+ is_ip4 = 1;
+ }
+ else if (unformat (i, "via %U %U", unformat_ip6_address, &ip6,
+ api_unformat_sw_if_index, vam, &nh_sw_if_index))
+ {
+ ip_set = 1;
+ is_ip4 = 0;
+ }
+ else if (unformat (i, "via %U", unformat_ip4_address, &ip4))
+ {
+ ip_set = 1;
+ is_ip4 = 1;
+ nh_sw_if_index = ~0;
+ }
+ else if (unformat (i, "via %U", unformat_ip6_address, &ip6))
+ {
+ ip_set = 1;
+ is_ip4 = 0;
+ nh_sw_if_index = ~0;
+ }
+ else if (unformat (i, "table %d", &table_id))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!ip_set)
+ {
+ errmsg ("nh addr not set!");
+ return -99;
+ }
+
+ M (GPE_ADD_DEL_NATIVE_FWD_RPATH, mp);
+ mp->is_add = is_add;
+ mp->table_id = clib_host_to_net_u32 (table_id);
+ mp->nh_sw_if_index = clib_host_to_net_u32 (nh_sw_if_index);
+ mp->is_ip4 = is_ip4;
+ if (is_ip4)
+ clib_memcpy (mp->nh_addr, &ip4, sizeof (ip4));
+ else
+ clib_memcpy (mp->nh_addr, &ip6, sizeof (ip6));
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_map_server_dump (vat_main_t * vam)
+{
+ vl_api_one_map_server_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "Map server");
+ }
+
+ M (ONE_MAP_SERVER_DUMP, mp);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_map_server_dump api_one_map_server_dump
+
+static int
+api_one_map_resolver_dump (vat_main_t * vam)
+{
+ vl_api_one_map_resolver_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "Map resolver");
+ }
+
+ M (ONE_MAP_RESOLVER_DUMP, mp);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_map_resolver_dump api_one_map_resolver_dump
+
+static int
+api_one_stats_flush (vat_main_t * vam)
+{
+ vl_api_one_stats_flush_t *mp;
+ int ret = 0;
+
+ M (ONE_STATS_FLUSH, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_stats_dump (vat_main_t * vam)
+{
+ vl_api_one_stats_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (ONE_STATS_DUMP, mp);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_show_one_status (vat_main_t * vam)
+{
+ vl_api_show_one_status_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%-20s%-16s", "ONE status", "locator-set");
+ }
+
+ M (SHOW_ONE_STATUS, mp);
+ /* send it... */
+ S (mp);
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_show_lisp_status api_show_one_status
+
+static int
+api_lisp_gpe_fwd_entry_path_dump (vat_main_t * vam)
+{
+ vl_api_gpe_fwd_entry_path_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ unformat_input_t *i = vam->input;
+ u32 fwd_entry_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "index %d", &fwd_entry_index))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == fwd_entry_index)
+ {
+ errmsg ("no index specified!");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "first line");
+ }
+
+ M (GPE_FWD_ENTRY_PATH_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_one_get_map_request_itr_rlocs (vat_main_t * vam)
+{
+ vl_api_one_get_map_request_itr_rlocs_t *mp;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "itr-rlocs:");
+ }
+
+ M (ONE_GET_MAP_REQUEST_ITR_RLOCS, mp);
+ /* send it... */
+ S (mp);
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#define api_lisp_get_map_request_itr_rlocs api_one_get_map_request_itr_rlocs
+
+static int
+api_af_packet_create (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_af_packet_create_t *mp;
+ u8 *host_if_name = 0;
+ u8 hw_addr[6];
+ u8 random_hw_addr = 1;
+ int ret;
+
+ memset (hw_addr, 0, sizeof (hw_addr));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %s", &host_if_name))
+ vec_add1 (host_if_name, 0);
+ else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr))
+ random_hw_addr = 0;
+ else
+ break;
+ }
+
+ if (!vec_len (host_if_name))
+ {
+ errmsg ("host-interface name must be specified");
+ return -99;
+ }
+
+ if (vec_len (host_if_name) > 64)
+ {
+ errmsg ("host-interface name too long");
+ return -99;
+ }
+
+ M (AF_PACKET_CREATE, mp);
+
+ clib_memcpy (mp->host_if_name, host_if_name, vec_len (host_if_name));
+ clib_memcpy (mp->hw_addr, hw_addr, 6);
+ mp->use_random_hw_addr = random_hw_addr;
+ vec_free (host_if_name);
+
+ S (mp);
+
+ /* *INDENT-OFF* */
+ W2 (ret,
+ ({
+ if (ret == 0)
+ fprintf (vam->ofp ? vam->ofp : stderr,
+ " new sw_if_index = %d\n", vam->sw_if_index);
+ }));
+ /* *INDENT-ON* */
+ return ret;
+}
+
+static int
+api_af_packet_delete (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_af_packet_delete_t *mp;
+ u8 *host_if_name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %s", &host_if_name))
+ vec_add1 (host_if_name, 0);
+ else
+ break;
+ }
+
+ if (!vec_len (host_if_name))
+ {
+ errmsg ("host-interface name must be specified");
+ return -99;
+ }
+
+ if (vec_len (host_if_name) > 64)
+ {
+ errmsg ("host-interface name too long");
+ return -99;
+ }
+
+ M (AF_PACKET_DELETE, mp);
+
+ clib_memcpy (mp->host_if_name, host_if_name, vec_len (host_if_name));
+ vec_free (host_if_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_policer_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_policer_add_del_t *mp;
+ u8 is_add = 1;
+ u8 *name = 0;
+ u32 cir = 0;
+ u32 eir = 0;
+ u64 cb = 0;
+ u64 eb = 0;
+ u8 rate_type = 0;
+ u8 round_type = 0;
+ u8 type = 0;
+ u8 color_aware = 0;
+ sse2_qos_pol_action_params_st conform_action, exceed_action, violate_action;
+ int ret;
+
+ conform_action.action_type = SSE2_QOS_ACTION_TRANSMIT;
+ conform_action.dscp = 0;
+ exceed_action.action_type = SSE2_QOS_ACTION_MARK_AND_TRANSMIT;
+ exceed_action.dscp = 0;
+ violate_action.action_type = SSE2_QOS_ACTION_DROP;
+ violate_action.dscp = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "name %s", &name))
+ vec_add1 (name, 0);
+ else if (unformat (i, "cir %u", &cir))
+ ;
+ else if (unformat (i, "eir %u", &eir))
+ ;
+ else if (unformat (i, "cb %u", &cb))
+ ;
+ else if (unformat (i, "eb %u", &eb))
+ ;
+ else if (unformat (i, "rate_type %U", unformat_policer_rate_type,
+ &rate_type))
+ ;
+ else if (unformat (i, "round_type %U", unformat_policer_round_type,
+ &round_type))
+ ;
+ else if (unformat (i, "type %U", unformat_policer_type, &type))
+ ;
+ else if (unformat (i, "conform_action %U", unformat_policer_action_type,
+ &conform_action))
+ ;
+ else if (unformat (i, "exceed_action %U", unformat_policer_action_type,
+ &exceed_action))
+ ;
+ else if (unformat (i, "violate_action %U", unformat_policer_action_type,
+ &violate_action))
+ ;
+ else if (unformat (i, "color-aware"))
+ color_aware = 1;
+ else
+ break;
+ }
+
+ if (!vec_len (name))
+ {
+ errmsg ("policer name must be specified");
+ return -99;
+ }
+
+ if (vec_len (name) > 64)
+ {
+ errmsg ("policer name too long");
+ return -99;
+ }
+
+ M (POLICER_ADD_DEL, mp);
+
+ clib_memcpy (mp->name, name, vec_len (name));
+ vec_free (name);
+ mp->is_add = is_add;
+ mp->cir = cir;
+ mp->eir = eir;
+ mp->cb = cb;
+ mp->eb = eb;
+ mp->rate_type = rate_type;
+ mp->round_type = round_type;
+ mp->type = type;
+ mp->conform_action_type = conform_action.action_type;
+ mp->conform_dscp = conform_action.dscp;
+ mp->exceed_action_type = exceed_action.action_type;
+ mp->exceed_dscp = exceed_action.dscp;
+ mp->violate_action_type = violate_action.action_type;
+ mp->violate_dscp = violate_action.dscp;
+ mp->color_aware = color_aware;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_policer_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_policer_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 *match_name = 0;
+ u8 match_name_valid = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %s", &match_name))
+ {
+ vec_add1 (match_name, 0);
+ match_name_valid = 1;
+ }
+ else
+ break;
+ }
+
+ M (POLICER_DUMP, mp);
+ mp->match_name_valid = match_name_valid;
+ clib_memcpy (mp->match_name, match_name, vec_len (match_name));
+ vec_free (match_name);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_policer_classify_set_interface (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_policer_classify_set_interface_t *mp;
+ u32 sw_if_index;
+ int sw_if_index_set;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 l2_table_index = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (i, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (i, "l2-table %d", &l2_table_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (POLICER_CLASSIFY_SET_INTERFACE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->ip4_table_index = ntohl (ip4_table_index);
+ mp->ip6_table_index = ntohl (ip6_table_index);
+ mp->l2_table_index = ntohl (l2_table_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_policer_classify_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_policer_classify_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 type = POLICER_CLASSIFY_N_TABLES;
+ int ret;
+
+ if (unformat (i, "type %U", unformat_policer_classify_table_type, &type))
+ ;
+ else
+ {
+ errmsg ("classify table type must be specified");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%10s%20s", "Intfc idx", "Classify table");
+ }
+
+ M (POLICER_CLASSIFY_DUMP, mp);
+ mp->type = type;
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_netmap_create (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_netmap_create_t *mp;
+ u8 *if_name = 0;
+ u8 hw_addr[6];
+ u8 random_hw_addr = 1;
+ u8 is_pipe = 0;
+ u8 is_master = 0;
+ int ret;
+
+ memset (hw_addr, 0, sizeof (hw_addr));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %s", &if_name))
+ vec_add1 (if_name, 0);
+ else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr))
+ random_hw_addr = 0;
+ else if (unformat (i, "pipe"))
+ is_pipe = 1;
+ else if (unformat (i, "master"))
+ is_master = 1;
+ else if (unformat (i, "slave"))
+ is_master = 0;
+ else
+ break;
+ }
+
+ if (!vec_len (if_name))
+ {
+ errmsg ("interface name must be specified");
+ return -99;
+ }
+
+ if (vec_len (if_name) > 64)
+ {
+ errmsg ("interface name too long");
+ return -99;
+ }
+
+ M (NETMAP_CREATE, mp);
+
+ clib_memcpy (mp->netmap_if_name, if_name, vec_len (if_name));
+ clib_memcpy (mp->hw_addr, hw_addr, 6);
+ mp->use_random_hw_addr = random_hw_addr;
+ mp->is_pipe = is_pipe;
+ mp->is_master = is_master;
+ vec_free (if_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_netmap_delete (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_netmap_delete_t *mp;
+ u8 *if_name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "name %s", &if_name))
+ vec_add1 (if_name, 0);
+ else
+ break;
+ }
+
+ if (!vec_len (if_name))
+ {
+ errmsg ("interface name must be specified");
+ return -99;
+ }
+
+ if (vec_len (if_name) > 64)
+ {
+ errmsg ("interface name too long");
+ return -99;
+ }
+
+ M (NETMAP_DELETE, mp);
+
+ clib_memcpy (mp->netmap_if_name, if_name, vec_len (if_name));
+ vec_free (if_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_mpls_fib_path_print (vat_main_t * vam, vl_api_fib_path2_t * fp)
+{
+ if (fp->afi == IP46_TYPE_IP6)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip6_address, fp->next_hop);
+ else if (fp->afi == IP46_TYPE_IP4)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip4_address, fp->next_hop);
+}
+
+static void
+vl_api_mpls_fib_path_json_print (vat_json_node_t * node,
+ vl_api_fib_path2_t * fp)
+{
+ struct in_addr ip4;
+ struct in6_addr ip6;
+
+ vat_json_object_add_uint (node, "weight", ntohl (fp->weight));
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index));
+ vat_json_object_add_uint (node, "is_local", fp->is_local);
+ vat_json_object_add_uint (node, "is_drop", fp->is_drop);
+ vat_json_object_add_uint (node, "is_unreach", fp->is_unreach);
+ vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit);
+ vat_json_object_add_uint (node, "next_hop_afi", fp->afi);
+ if (fp->afi == IP46_TYPE_IP4)
+ {
+ clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "next_hop", ip4);
+ }
+ else if (fp->afi == IP46_TYPE_IP6)
+ {
+ clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "next_hop", ip6);
+ }
+}
+
+static void
+vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->mt_count);
+ vl_api_fib_path2_t *fp;
+ i32 i;
+
+ print (vam->ofp, "[%d]: sw_if_index %d via:",
+ ntohl (mp->mt_tunnel_index), ntohl (mp->mt_sw_if_index));
+ fp = mp->mt_paths;
+ for (i = 0; i < count; i++)
+ {
+ vl_api_mpls_fib_path_print (vam, fp);
+ fp++;
+ }
+
+ print (vam->ofp, "");
+}
+
+#define vl_api_mpls_tunnel_details_t_endian vl_noop_handler
+#define vl_api_mpls_tunnel_details_t_print vl_noop_handler
+
+static void
+vl_api_mpls_tunnel_details_t_handler_json (vl_api_mpls_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ int count = ntohl (mp->mt_count);
+ vl_api_fib_path2_t *fp;
+ i32 i;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "tunnel_index",
+ ntohl (mp->mt_tunnel_index));
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->mt_sw_if_index));
+
+ vat_json_object_add_uint (node, "l2_only", mp->mt_l2_only);
+
+ fp = mp->mt_paths;
+ for (i = 0; i < count; i++)
+ {
+ vl_api_mpls_fib_path_json_print (node, fp);
+ fp++;
+ }
+}
+
+static int
+api_mpls_tunnel_dump (vat_main_t * vam)
+{
+ vl_api_mpls_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ i32 index = -1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (vam->input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!unformat (vam->input, "tunnel_index %d", &index))
+ {
+ index = -1;
+ break;
+ }
+ }
+
+ print (vam->ofp, " tunnel_index %d", index);
+
+ M (MPLS_TUNNEL_DUMP, mp);
+ mp->tunnel_index = htonl (index);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+#define vl_api_mpls_fib_details_t_endian vl_noop_handler
+#define vl_api_mpls_fib_details_t_print vl_noop_handler
+
+
+static void
+vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vl_api_fib_path2_t *fp;
+ int i;
+
+ print (vam->ofp,
+ "table-id %d, label %u, ess_bit %u",
+ ntohl (mp->table_id), ntohl (mp->label), mp->eos_bit);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ vl_api_mpls_fib_path_print (vam, fp);
+ fp++;
+ }
+}
+
+static void vl_api_mpls_fib_details_t_handler_json
+ (vl_api_mpls_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vat_json_node_t *node = NULL;
+ vl_api_fib_path2_t *fp;
+ int i;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "table", ntohl (mp->table_id));
+ vat_json_object_add_uint (node, "s_bit", mp->eos_bit);
+ vat_json_object_add_uint (node, "label", ntohl (mp->label));
+ vat_json_object_add_uint (node, "path_count", count);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ vl_api_mpls_fib_path_json_print (node, fp);
+ fp++;
+ }
+}
+
+static int
+api_mpls_fib_dump (vat_main_t * vam)
+{
+ vl_api_mpls_fib_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (MPLS_FIB_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+#define vl_api_ip_fib_details_t_endian vl_noop_handler
+#define vl_api_ip_fib_details_t_print vl_noop_handler
+
+static void
+vl_api_ip_fib_details_t_handler (vl_api_ip_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vl_api_fib_path_t *fp;
+ int i;
+
+ print (vam->ofp,
+ "table-id %d, prefix %U/%d",
+ ntohl (mp->table_id), format_ip4_address, mp->address,
+ mp->address_length);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ if (fp->afi == IP46_TYPE_IP6)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip6_address, fp->next_hop);
+ else if (fp->afi == IP46_TYPE_IP4)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip4_address, fp->next_hop);
+ fp++;
+ }
+}
+
+static void vl_api_ip_fib_details_t_handler_json
+ (vl_api_ip_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vat_json_node_t *node = NULL;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ vl_api_fib_path_t *fp;
+ int i;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "table", ntohl (mp->table_id));
+ clib_memcpy (&ip4, &mp->address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "prefix", ip4);
+ vat_json_object_add_uint (node, "mask_length", mp->address_length);
+ vat_json_object_add_uint (node, "path_count", count);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ vat_json_object_add_uint (node, "weight", ntohl (fp->weight));
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index));
+ vat_json_object_add_uint (node, "is_local", fp->is_local);
+ vat_json_object_add_uint (node, "is_drop", fp->is_drop);
+ vat_json_object_add_uint (node, "is_unreach", fp->is_unreach);
+ vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit);
+ vat_json_object_add_uint (node, "next_hop_afi", fp->afi);
+ if (fp->afi == IP46_TYPE_IP4)
+ {
+ clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "next_hop", ip4);
+ }
+ else if (fp->afi == IP46_TYPE_IP6)
+ {
+ clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "next_hop", ip6);
+ }
+ }
+}
+
+static int
+api_ip_fib_dump (vat_main_t * vam)
+{
+ vl_api_ip_fib_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP_FIB_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_mfib_dump (vat_main_t * vam)
+{
+ vl_api_ip_mfib_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP_MFIB_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void vl_api_ip_neighbor_details_t_handler
+ (vl_api_ip_neighbor_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%c %U %U",
+ (mp->is_static) ? 'S' : 'D',
+ format_ethernet_address, &mp->mac_address,
+ (mp->is_ipv6) ? format_ip6_address : format_ip4_address,
+ &mp->ip_address);
+}
+
+static void vl_api_ip_neighbor_details_t_handler_json
+ (vl_api_ip_neighbor_details_t * mp)
+{
+
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_string_copy (node, "flag",
+ (mp->is_static) ? (u8 *) "static" : (u8 *)
+ "dynamic");
+
+ vat_json_object_add_string_copy (node, "link_layer",
+ format (0, "%U", format_ethernet_address,
+ &mp->mac_address));
+
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&ip6, &mp->ip_address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "ip_address", ip6);
+ }
+ else
+ {
+ clib_memcpy (&ip4, &mp->ip_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "ip_address", ip4);
+ }
+}
+
+static int
+api_ip_neighbor_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_neighbor_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 is_ipv6 = 0;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "ip6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (IP_NEIGHBOR_DUMP, mp);
+ mp->is_ipv6 = (u8) is_ipv6;
+ mp->sw_if_index = ntohl (sw_if_index);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+#define vl_api_ip6_fib_details_t_endian vl_noop_handler
+#define vl_api_ip6_fib_details_t_print vl_noop_handler
+
+static void
+vl_api_ip6_fib_details_t_handler (vl_api_ip6_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vl_api_fib_path_t *fp;
+ int i;
+
+ print (vam->ofp,
+ "table-id %d, prefix %U/%d",
+ ntohl (mp->table_id), format_ip6_address, mp->address,
+ mp->address_length);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ if (fp->afi == IP46_TYPE_IP6)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip6_address, fp->next_hop);
+ else if (fp->afi == IP46_TYPE_IP4)
+ print (vam->ofp,
+ " weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+ "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+ ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+ fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+ format_ip4_address, fp->next_hop);
+ fp++;
+ }
+}
+
+static void vl_api_ip6_fib_details_t_handler_json
+ (vl_api_ip6_fib_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ int count = ntohl (mp->count);
+ vat_json_node_t *node = NULL;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ vl_api_fib_path_t *fp;
+ int i;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "table", ntohl (mp->table_id));
+ clib_memcpy (&ip6, &mp->address, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "prefix", ip6);
+ vat_json_object_add_uint (node, "mask_length", mp->address_length);
+ vat_json_object_add_uint (node, "path_count", count);
+ fp = mp->path;
+ for (i = 0; i < count; i++)
+ {
+ vat_json_object_add_uint (node, "weight", ntohl (fp->weight));
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index));
+ vat_json_object_add_uint (node, "is_local", fp->is_local);
+ vat_json_object_add_uint (node, "is_drop", fp->is_drop);
+ vat_json_object_add_uint (node, "is_unreach", fp->is_unreach);
+ vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit);
+ vat_json_object_add_uint (node, "next_hop_afi", fp->afi);
+ if (fp->afi == IP46_TYPE_IP4)
+ {
+ clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "next_hop", ip4);
+ }
+ else if (fp->afi == IP46_TYPE_IP6)
+ {
+ clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6));
+ vat_json_object_add_ip6 (node, "next_hop", ip6);
+ }
+ }
+}
+
+static int
+api_ip6_fib_dump (vat_main_t * vam)
+{
+ vl_api_ip6_fib_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP6_FIB_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip6_mfib_dump (vat_main_t * vam)
+{
+ vl_api_ip6_mfib_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP6_MFIB_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+int
+api_classify_table_ids (vat_main_t * vam)
+{
+ vl_api_classify_table_ids_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (CLASSIFY_TABLE_IDS, mp);
+ mp->context = 0;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_classify_table_by_interface (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_classify_table_by_interface_t *mp;
+
+ u32 sw_if_index = ~0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (input, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (CLASSIFY_TABLE_BY_INTERFACE, mp);
+ mp->context = 0;
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_classify_table_info (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_classify_table_info_t *mp;
+
+ u32 table_id = ~0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table_id %d", &table_id))
+ ;
+ else
+ break;
+ }
+ if (table_id == ~0)
+ {
+ errmsg ("missing table id");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (CLASSIFY_TABLE_INFO, mp);
+ mp->context = 0;
+ mp->table_id = ntohl (table_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_classify_session_dump (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_classify_session_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+
+ u32 table_id = ~0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table_id %d", &table_id))
+ ;
+ else
+ break;
+ }
+ if (table_id == ~0)
+ {
+ errmsg ("missing table id");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (CLASSIFY_SESSION_DUMP, mp);
+ mp->context = 0;
+ mp->table_id = ntohl (table_id);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ipfix_exporter_details_t_handler (vl_api_ipfix_exporter_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "collector_address %U, collector_port %d, "
+ "src_address %U, vrf_id %d, path_mtu %u, "
+ "template_interval %u, udp_checksum %d",
+ format_ip4_address, mp->collector_address,
+ ntohs (mp->collector_port),
+ format_ip4_address, mp->src_address,
+ ntohl (mp->vrf_id), ntohl (mp->path_mtu),
+ ntohl (mp->template_interval), mp->udp_checksum);
+
+ vam->retval = 0;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_ipfix_exporter_details_t_handler_json
+ (vl_api_ipfix_exporter_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+ struct in_addr collector_address;
+ struct in_addr src_address;
+
+ vat_json_init_object (&node);
+ clib_memcpy (&collector_address, &mp->collector_address,
+ sizeof (collector_address));
+ vat_json_object_add_ip4 (&node, "collector_address", collector_address);
+ vat_json_object_add_uint (&node, "collector_port",
+ ntohs (mp->collector_port));
+ clib_memcpy (&src_address, &mp->src_address, sizeof (src_address));
+ vat_json_object_add_ip4 (&node, "src_address", src_address);
+ vat_json_object_add_int (&node, "vrf_id", ntohl (mp->vrf_id));
+ vat_json_object_add_uint (&node, "path_mtu", ntohl (mp->path_mtu));
+ vat_json_object_add_uint (&node, "template_interval",
+ ntohl (mp->template_interval));
+ vat_json_object_add_int (&node, "udp_checksum", mp->udp_checksum);
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ vam->retval = 0;
+ vam->result_ready = 1;
+}
+
+int
+api_ipfix_exporter_dump (vat_main_t * vam)
+{
+ vl_api_ipfix_exporter_dump_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (IPFIX_EXPORTER_DUMP, mp);
+ mp->context = 0;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipfix_classify_stream_dump (vat_main_t * vam)
+{
+ vl_api_ipfix_classify_stream_dump_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (IPFIX_CLASSIFY_STREAM_DUMP, mp);
+ mp->context = 0;
+
+ S (mp);
+ W (ret);
+ return ret;
+ /* NOTREACHED */
+ return 0;
+}
+
+static void
+ vl_api_ipfix_classify_stream_details_t_handler
+ (vl_api_ipfix_classify_stream_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ print (vam->ofp, "domain_id %d, src_port %d",
+ ntohl (mp->domain_id), ntohs (mp->src_port));
+ vam->retval = 0;
+ vam->result_ready = 1;
+}
+
+static void
+ vl_api_ipfix_classify_stream_details_t_handler_json
+ (vl_api_ipfix_classify_stream_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_uint (&node, "domain_id", ntohl (mp->domain_id));
+ vat_json_object_add_uint (&node, "src_port", ntohs (mp->src_port));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+ vam->retval = 0;
+ vam->result_ready = 1;
+}
+
+static int
+api_ipfix_classify_table_dump (vat_main_t * vam)
+{
+ vl_api_ipfix_classify_table_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%15s%15s%20s", "table_id", "ip_version",
+ "transport_protocol");
+ }
+
+ /* Construct the API message */
+ M (IPFIX_CLASSIFY_TABLE_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+ vl_api_ipfix_classify_table_details_t_handler
+ (vl_api_ipfix_classify_table_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ print (vam->ofp, "%15d%15d%20d", ntohl (mp->table_id), mp->ip_version,
+ mp->transport_protocol);
+}
+
+static void
+ vl_api_ipfix_classify_table_details_t_handler_json
+ (vl_api_ipfix_classify_table_details_t * mp)
+{
+ vat_json_node_t *node = NULL;
+ vat_main_t *vam = &vat_main;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+
+ node = vat_json_array_add (&vam->json_tree);
+ vat_json_init_object (node);
+
+ vat_json_object_add_uint (node, "table_id", ntohl (mp->table_id));
+ vat_json_object_add_uint (node, "ip_version", mp->ip_version);
+ vat_json_object_add_uint (node, "transport_protocol",
+ mp->transport_protocol);
+}
+
+static int
+api_sw_interface_span_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_span_enable_disable_t *mp;
+ u32 src_sw_if_index = ~0;
+ u32 dst_sw_if_index = ~0;
+ u8 state = 3;
+ int ret;
+ u8 is_l2 = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (i, "src %U", api_unformat_sw_if_index, vam, &src_sw_if_index))
+ ;
+ else if (unformat (i, "src_sw_if_index %d", &src_sw_if_index))
+ ;
+ else
+ if (unformat
+ (i, "dst %U", api_unformat_sw_if_index, vam, &dst_sw_if_index))
+ ;
+ else if (unformat (i, "dst_sw_if_index %d", &dst_sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ state = 0;
+ else if (unformat (i, "rx"))
+ state = 1;
+ else if (unformat (i, "tx"))
+ state = 2;
+ else if (unformat (i, "both"))
+ state = 3;
+ else if (unformat (i, "l2"))
+ is_l2 = 1;
+ else
+ break;
+ }
+
+ M (SW_INTERFACE_SPAN_ENABLE_DISABLE, mp);
+
+ mp->sw_if_index_from = htonl (src_sw_if_index);
+ mp->sw_if_index_to = htonl (dst_sw_if_index);
+ mp->state = state;
+ mp->is_l2 = is_l2;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_span_details_t_handler (vl_api_sw_interface_span_details_t
+ * mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *sw_if_from_name = 0;
+ u8 *sw_if_to_name = 0;
+ u32 sw_if_index_from = ntohl (mp->sw_if_index_from);
+ u32 sw_if_index_to = ntohl (mp->sw_if_index_to);
+ char *states[] = { "none", "rx", "tx", "both" };
+ hash_pair_t *p;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name,
+ ({
+ if ((u32) p->value[0] == sw_if_index_from)
+ {
+ sw_if_from_name = (u8 *)(p->key);
+ if (sw_if_to_name)
+ break;
+ }
+ if ((u32) p->value[0] == sw_if_index_to)
+ {
+ sw_if_to_name = (u8 *)(p->key);
+ if (sw_if_from_name)
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ print (vam->ofp, "%20s => %20s (%s)",
+ sw_if_from_name, sw_if_to_name, states[mp->state]);
+}
+
+static void
+ vl_api_sw_interface_span_details_t_handler_json
+ (vl_api_sw_interface_span_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ u8 *sw_if_from_name = 0;
+ u8 *sw_if_to_name = 0;
+ u32 sw_if_index_from = ntohl (mp->sw_if_index_from);
+ u32 sw_if_index_to = ntohl (mp->sw_if_index_to);
+ hash_pair_t *p;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name,
+ ({
+ if ((u32) p->value[0] == sw_if_index_from)
+ {
+ sw_if_from_name = (u8 *)(p->key);
+ if (sw_if_to_name)
+ break;
+ }
+ if ((u32) p->value[0] == sw_if_index_to)
+ {
+ sw_if_to_name = (u8 *)(p->key);
+ if (sw_if_from_name)
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "src-if-index", sw_if_index_from);
+ vat_json_object_add_string_copy (node, "src-if-name", sw_if_from_name);
+ vat_json_object_add_uint (node, "dst-if-index", sw_if_index_to);
+ if (0 != sw_if_to_name)
+ {
+ vat_json_object_add_string_copy (node, "dst-if-name", sw_if_to_name);
+ }
+ vat_json_object_add_uint (node, "state", mp->state);
+}
+
+static int
+api_sw_interface_span_dump (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_sw_interface_span_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 is_l2 = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "l2"))
+ is_l2 = 1;
+ else
+ break;
+ }
+
+ M (SW_INTERFACE_SPAN_DUMP, mp);
+ mp->is_l2 = is_l2;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+int
+api_pg_create_interface (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_pg_create_interface_t *mp;
+
+ u32 if_id = ~0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "if_id %d", &if_id))
+ ;
+ else
+ break;
+ }
+ if (if_id == ~0)
+ {
+ errmsg ("missing pg interface index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (PG_CREATE_INTERFACE, mp);
+ mp->context = 0;
+ mp->interface_id = ntohl (if_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_pg_capture (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_pg_capture_t *mp;
+
+ u32 if_id = ~0;
+ u8 enable = 1;
+ u32 count = 1;
+ u8 pcap_file_set = 0;
+ u8 *pcap_file = 0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "if_id %d", &if_id))
+ ;
+ else if (unformat (input, "pcap %s", &pcap_file))
+ pcap_file_set = 1;
+ else if (unformat (input, "count %d", &count))
+ ;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+ if (if_id == ~0)
+ {
+ errmsg ("missing pg interface index");
+ return -99;
+ }
+ if (pcap_file_set > 0)
+ {
+ if (vec_len (pcap_file) > 255)
+ {
+ errmsg ("pcap file name is too long");
+ return -99;
+ }
+ }
+
+ u32 name_len = vec_len (pcap_file);
+ /* Construct the API message */
+ M (PG_CAPTURE, mp);
+ mp->context = 0;
+ mp->interface_id = ntohl (if_id);
+ mp->is_enabled = enable;
+ mp->count = ntohl (count);
+ mp->pcap_name_length = ntohl (name_len);
+ if (pcap_file_set != 0)
+ {
+ clib_memcpy (mp->pcap_file_name, pcap_file, name_len);
+ }
+ vec_free (pcap_file);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_pg_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_pg_enable_disable_t *mp;
+
+ u8 enable = 1;
+ u8 stream_name_set = 0;
+ u8 *stream_name = 0;
+ int ret;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "stream %s", &stream_name))
+ stream_name_set = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (stream_name_set > 0)
+ {
+ if (vec_len (stream_name) > 255)
+ {
+ errmsg ("stream name too long");
+ return -99;
+ }
+ }
+
+ u32 name_len = vec_len (stream_name);
+ /* Construct the API message */
+ M (PG_ENABLE_DISABLE, mp);
+ mp->context = 0;
+ mp->is_enabled = enable;
+ if (stream_name_set != 0)
+ {
+ mp->stream_name_length = ntohl (name_len);
+ clib_memcpy (mp->stream_name, stream_name, name_len);
+ }
+ vec_free (stream_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_ip_source_and_port_range_check_add_del (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ip_source_and_port_range_check_add_del_t *mp;
+
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u16 this_low;
+ u16 this_hi;
+ ip4_address_t ip4_addr;
+ ip6_address_t ip6_addr;
+ u32 length;
+ u32 tmp, tmp2;
+ u8 prefix_set = 0;
+ u32 vrf_id = ~0;
+ u8 is_add = 1;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length))
+ {
+ prefix_set = 1;
+ }
+ else
+ if (unformat
+ (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length))
+ {
+ prefix_set = 1;
+ is_ipv6 = 1;
+ }
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "port %d", &tmp))
+ {
+ if (tmp == 0 || tmp > 65535)
+ {
+ errmsg ("port %d out of range", tmp);
+ return -99;
+ }
+ this_low = tmp;
+ this_hi = this_low + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else if (unformat (input, "range %d - %d", &tmp, &tmp2))
+ {
+ if ((tmp > tmp2) || (tmp == 0) || (tmp2 > 65535))
+ {
+ errmsg ("incorrect range parameters");
+ return -99;
+ }
+ this_low = tmp;
+ /* Note: in debug CLI +1 is added to high before
+ passing to real fn that does "the work"
+ (ip_source_and_port_range_check_add_del).
+ This fn is a wrapper around the binary API fn a
+ control plane will call, which expects this increment
+ to have occurred. Hence letting the binary API control
+ plane fn do the increment for consistency between VAT
+ and other control planes.
+ */
+ this_hi = tmp2;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else
+ break;
+ }
+
+ if (prefix_set == 0)
+ {
+ errmsg ("<address>/<mask> not specified");
+ return -99;
+ }
+
+ if (vrf_id == ~0)
+ {
+ errmsg ("VRF ID required, not specified");
+ return -99;
+ }
+
+ if (vrf_id == 0)
+ {
+ errmsg
+ ("VRF ID should not be default. Should be distinct VRF for this purpose.");
+ return -99;
+ }
+
+ if (vec_len (low_ports) == 0)
+ {
+ errmsg ("At least one port or port range required");
+ return -99;
+ }
+
+ M (IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+
+ if (is_ipv6)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->address, &ip6_addr, sizeof (ip6_addr));
+ }
+ else
+ {
+ mp->is_ipv6 = 0;
+ clib_memcpy (mp->address, &ip4_addr, sizeof (ip4_addr));
+ }
+
+ mp->mask_length = length;
+ mp->number_of_ranges = vec_len (low_ports);
+
+ clib_memcpy (mp->low_ports, low_ports, vec_len (low_ports));
+ vec_free (low_ports);
+
+ clib_memcpy (mp->high_ports, high_ports, vec_len (high_ports));
+ vec_free (high_ports);
+
+ mp->vrf_id = ntohl (vrf_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+int
+api_ip_source_and_port_range_check_interface_add_del (vat_main_t * vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ip_source_and_port_range_check_interface_add_del_t *mp;
+ u32 sw_if_index = ~0;
+ int vrf_set = 0;
+ u32 tcp_out_vrf_id = ~0, udp_out_vrf_id = ~0;
+ u32 tcp_in_vrf_id = ~0, udp_in_vrf_id = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (input, "tcp-out-vrf %d", &tcp_out_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "udp-out-vrf %d", &udp_out_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "tcp-in-vrf %d", &tcp_in_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "udp-in-vrf %d", &udp_in_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Interface required but not specified");
+ return -99;
+ }
+
+ if (vrf_set == 0)
+ {
+ errmsg ("VRF ID required but not specified");
+ return -99;
+ }
+
+ if (tcp_out_vrf_id == 0
+ || udp_out_vrf_id == 0 || tcp_in_vrf_id == 0 || udp_in_vrf_id == 0)
+ {
+ errmsg
+ ("VRF ID should not be default. Should be distinct VRF for this purpose.");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ mp->tcp_out_vrf_id = ntohl (tcp_out_vrf_id);
+ mp->udp_out_vrf_id = ntohl (udp_out_vrf_id);
+ mp->tcp_in_vrf_id = ntohl (tcp_in_vrf_id);
+ mp->udp_in_vrf_id = ntohl (udp_in_vrf_id);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_gre_add_del_tunnel (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_gre_add_del_tunnel_t *mp;
+ u32 local_sa_id = 0;
+ u32 remote_sa_id = 0;
+ ip4_address_t src_address;
+ ip4_address_t dst_address;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "local_sa %d", &local_sa_id))
+ ;
+ else if (unformat (i, "remote_sa %d", &remote_sa_id))
+ ;
+ else if (unformat (i, "src %U", unformat_ip4_address, &src_address))
+ ;
+ else if (unformat (i, "dst %U", unformat_ip4_address, &dst_address))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_GRE_ADD_DEL_TUNNEL, mp);
+
+ mp->local_sa_id = ntohl (local_sa_id);
+ mp->remote_sa_id = ntohl (remote_sa_id);
+ clib_memcpy (mp->src_address, &src_address, sizeof (src_address));
+ clib_memcpy (mp->dst_address, &dst_address, sizeof (dst_address));
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_punt (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_punt_t *mp;
+ u32 ipv = ~0;
+ u32 protocol = ~0;
+ u32 port = ~0;
+ int is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ip %d", &ipv))
+ ;
+ else if (unformat (i, "protocol %d", &protocol))
+ ;
+ else if (unformat (i, "port %d", &port))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (PUNT, mp);
+
+ mp->is_add = (u8) is_add;
+ mp->ipv = (u8) ipv;
+ mp->l4_protocol = (u8) protocol;
+ mp->l4_port = htons ((u16) port);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_ipsec_gre_tunnel_details_t_handler
+ (vl_api_ipsec_gre_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%11d%15U%15U%14d%14d",
+ ntohl (mp->sw_if_index),
+ format_ip4_address, &mp->src_address,
+ format_ip4_address, &mp->dst_address,
+ ntohl (mp->local_sa_id), ntohl (mp->remote_sa_id));
+}
+
+static void vl_api_ipsec_gre_tunnel_details_t_handler_json
+ (vl_api_ipsec_gre_tunnel_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+ struct in_addr ip4;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ clib_memcpy (&ip4, &mp->src_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "src_address", ip4);
+ clib_memcpy (&ip4, &mp->dst_address, sizeof (ip4));
+ vat_json_object_add_ip4 (node, "dst_address", ip4);
+ vat_json_object_add_uint (node, "local_sa_id", ntohl (mp->local_sa_id));
+ vat_json_object_add_uint (node, "remote_sa_id", ntohl (mp->remote_sa_id));
+}
+
+static int
+api_ipsec_gre_tunnel_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_gre_tunnel_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%15s%15s%14s%14s",
+ "sw_if_index", "src_address", "dst_address",
+ "local_sa_id", "remote_sa_id");
+ }
+
+ /* Get list of gre-tunnel interfaces */
+ M (IPSEC_GRE_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_delete_subif (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_delete_subif_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (DELETE_SUBIF, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#define foreach_pbb_vtr_op \
+_("disable", L2_VTR_DISABLED) \
+_("pop", L2_VTR_POP_2) \
+_("push", L2_VTR_PUSH_2)
+
+static int
+api_l2_interface_pbb_tag_rewrite (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_pbb_tag_rewrite_t *mp;
+ u32 sw_if_index = ~0, vtr_op = ~0;
+ u16 outer_tag = ~0;
+ u8 dmac[6], smac[6];
+ u8 dmac_set = 0, smac_set = 0;
+ u16 vlanid = 0;
+ u32 sid = ~0;
+ u32 tmp;
+ int ret;
+
+ /* Shut up coverity */
+ memset (dmac, 0, sizeof (dmac));
+ memset (smac, 0, sizeof (smac));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "vtr_op %d", &vtr_op))
+ ;
+#define _(n,v) else if (unformat(i, n)) {vtr_op = v;}
+ foreach_pbb_vtr_op
+#undef _
+ else if (unformat (i, "translate_pbb_stag"))
+ {
+ if (unformat (i, "%d", &tmp))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ outer_tag = tmp;
+ }
+ else
+ {
+ errmsg
+ ("translate_pbb_stag operation requires outer tag definition");
+ return -99;
+ }
+ }
+ else if (unformat (i, "dmac %U", unformat_ethernet_address, dmac))
+ dmac_set++;
+ else if (unformat (i, "smac %U", unformat_ethernet_address, smac))
+ smac_set++;
+ else if (unformat (i, "sid %d", &sid))
+ ;
+ else if (unformat (i, "vlanid %d", &tmp))
+ vlanid = tmp;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if ((sw_if_index == ~0) || (vtr_op == ~0))
+ {
+ errmsg ("missing sw_if_index or vtr operation");
+ return -99;
+ }
+ if (((vtr_op == L2_VTR_PUSH_2) || (vtr_op == L2_VTR_TRANSLATE_2_2))
+ && ((dmac_set == 0) || (smac_set == 0) || (sid == ~0)))
+ {
+ errmsg
+ ("push and translate_qinq operations require dmac, smac, sid and optionally vlanid");
+ return -99;
+ }
+
+ M (L2_INTERFACE_PBB_TAG_REWRITE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vtr_op = ntohl (vtr_op);
+ mp->outer_tag = ntohs (outer_tag);
+ clib_memcpy (mp->b_dmac, dmac, sizeof (dmac));
+ clib_memcpy (mp->b_smac, smac, sizeof (smac));
+ mp->b_vlanid = ntohs (vlanid);
+ mp->i_sid = ntohl (sid);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_flow_classify_set_interface (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flow_classify_set_interface_t *mp;
+ u32 sw_if_index;
+ int sw_if_index_set;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (i, "ip6-table %d", &ip6_table_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (FLOW_CLASSIFY_SET_INTERFACE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->ip4_table_index = ntohl (ip4_table_index);
+ mp->ip6_table_index = ntohl (ip6_table_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_flow_classify_dump (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flow_classify_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u8 type = FLOW_CLASSIFY_N_TABLES;
+ int ret;
+
+ if (unformat (i, "type %U", unformat_flow_classify_table_type, &type))
+ ;
+ else
+ {
+ errmsg ("classify table type must be specified");
+ return -99;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%10s%20s", "Intfc idx", "Classify table");
+ }
+
+ M (FLOW_CLASSIFY_DUMP, mp);
+ mp->type = type;
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_feature_enable_disable (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_feature_enable_disable_t *mp;
+ u8 *arc_name = 0;
+ u8 *feature_name = 0;
+ u32 sw_if_index = ~0;
+ u8 enable = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "arc_name %s", &arc_name))
+ ;
+ else if (unformat (i, "feature_name %s", &feature_name))
+ ;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (arc_name == 0)
+ {
+ errmsg ("missing arc name");
+ return -99;
+ }
+ if (vec_len (arc_name) > 63)
+ {
+ errmsg ("arc name too long");
+ }
+
+ if (feature_name == 0)
+ {
+ errmsg ("missing feature name");
+ return -99;
+ }
+ if (vec_len (feature_name) > 63)
+ {
+ errmsg ("feature name too long");
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (FEATURE_ENABLE_DISABLE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = enable;
+ clib_memcpy (mp->arc_name, arc_name, vec_len (arc_name));
+ clib_memcpy (mp->feature_name, feature_name, vec_len (feature_name));
+ vec_free (arc_name);
+ vec_free (feature_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_tag_add_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_tag_add_del_t *mp;
+ u32 sw_if_index = ~0;
+ u8 *tag = 0;
+ u8 enable = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "tag %s", &tag))
+ ;
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "del"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (enable && (tag == 0))
+ {
+ errmsg ("no tag specified");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_TAG_ADD_DEL, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = enable;
+ if (enable)
+ strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1);
+ vec_free (tag);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void vl_api_l2_xconnect_details_t_handler
+ (vl_api_l2_xconnect_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%15d%15d",
+ ntohl (mp->rx_sw_if_index), ntohl (mp->tx_sw_if_index));
+}
+
+static void vl_api_l2_xconnect_details_t_handler_json
+ (vl_api_l2_xconnect_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "rx_sw_if_index",
+ ntohl (mp->rx_sw_if_index));
+ vat_json_object_add_uint (node, "tx_sw_if_index",
+ ntohl (mp->tx_sw_if_index));
+}
+
+static int
+api_l2_xconnect_dump (vat_main_t * vam)
+{
+ vl_api_l2_xconnect_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%15s%15s", "rx_sw_if_index", "tx_sw_if_index");
+ }
+
+ M (L2_XCONNECT_DUMP, mp);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ M (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_mtu (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_mtu_t *mp;
+ u32 sw_if_index = ~0;
+ u32 mtu = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mtu %d", &mtu))
+ ;
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (mtu == 0)
+ {
+ errmsg ("no mtu specified");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_MTU, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->mtu = ntohs ((u16) mtu);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_p2p_ethernet_add (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_p2p_ethernet_add_t *mp;
+ u32 parent_if_index = ~0;
+ u32 sub_id = ~0;
+ u8 remote_mac[6];
+ u8 mac_set = 0;
+ int ret;
+
+ memset (remote_mac, 0, sizeof (remote_mac));
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &parent_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &parent_if_index))
+ ;
+ else
+ if (unformat
+ (i, "remote_mac %U", unformat_ethernet_address, remote_mac))
+ mac_set++;
+ else if (unformat (i, "sub_id %d", &sub_id))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (parent_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (mac_set == 0)
+ {
+ errmsg ("missing remote mac address");
+ return -99;
+ }
+ if (sub_id == ~0)
+ {
+ errmsg ("missing sub-interface id");
+ return -99;
+ }
+
+ M (P2P_ETHERNET_ADD, mp);
+ mp->parent_if_index = ntohl (parent_if_index);
+ mp->subif_id = ntohl (sub_id);
+ clib_memcpy (mp->remote_mac, remote_mac, sizeof (remote_mac));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_p2p_ethernet_del (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_p2p_ethernet_del_t *mp;
+ u32 parent_if_index = ~0;
+ u8 remote_mac[6];
+ u8 mac_set = 0;
+ int ret;
+
+ memset (remote_mac, 0, sizeof (remote_mac));
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &parent_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &parent_if_index))
+ ;
+ else
+ if (unformat
+ (i, "remote_mac %U", unformat_ethernet_address, remote_mac))
+ mac_set++;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (parent_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (mac_set == 0)
+ {
+ errmsg ("missing remote mac address");
+ return -99;
+ }
+
+ M (P2P_ETHERNET_DEL, mp);
+ mp->parent_if_index = ntohl (parent_if_index);
+ clib_memcpy (mp->remote_mac, remote_mac, sizeof (remote_mac));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_lldp_config (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_lldp_config_t *mp;
+ int tx_hold = 0;
+ int tx_interval = 0;
+ u8 *sys_name = NULL;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "system-name %s", &sys_name))
+ ;
+ else if (unformat (i, "tx-hold %d", &tx_hold))
+ ;
+ else if (unformat (i, "tx-interval %d", &tx_interval))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ vec_add1 (sys_name, 0);
+
+ M (LLDP_CONFIG, mp);
+ mp->tx_hold = htonl (tx_hold);
+ mp->tx_interval = htonl (tx_interval);
+ clib_memcpy (mp->system_name, sys_name, vec_len (sys_name));
+ vec_free (sys_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_lldp (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_lldp_t *mp;
+ u32 sw_if_index = ~0;
+ u32 enable = 1;
+ u8 *port_desc = NULL;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ enable = 0;
+ else
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "port-desc %s", &port_desc))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ vec_add1 (port_desc, 0);
+ M (SW_INTERFACE_SET_LLDP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = enable;
+ clib_memcpy (mp->port_desc, port_desc, vec_len (port_desc));
+ vec_free (port_desc);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_tcp_configure_src_addresses (vat_main_t * vam)
+{
+ vl_api_tcp_configure_src_addresses_t *mp;
+ unformat_input_t *i = vam->input;
+ ip4_address_t v4first, v4last;
+ ip6_address_t v6first, v6last;
+ u8 range_set = 0;
+ u32 vrf_id = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U - %U",
+ unformat_ip4_address, &v4first,
+ unformat_ip4_address, &v4last))
+ {
+ if (range_set)
+ {
+ errmsg ("one range per message (range already set)");
+ return -99;
+ }
+ range_set = 1;
+ }
+ else if (unformat (i, "%U - %U",
+ unformat_ip6_address, &v6first,
+ unformat_ip6_address, &v6last))
+ {
+ if (range_set)
+ {
+ errmsg ("one range per message (range already set)");
+ return -99;
+ }
+ range_set = 2;
+ }
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else
+ break;
+ }
+
+ if (range_set == 0)
+ {
+ errmsg ("address range not set");
+ return -99;
+ }
+
+ M (TCP_CONFIGURE_SRC_ADDRESSES, mp);
+ mp->vrf_id = ntohl (vrf_id);
+ /* ipv6? */
+ if (range_set == 2)
+ {
+ mp->is_ipv6 = 1;
+ clib_memcpy (mp->first_address, &v6first, sizeof (v6first));
+ clib_memcpy (mp->last_address, &v6last, sizeof (v6last));
+ }
+ else
+ {
+ mp->is_ipv6 = 0;
+ clib_memcpy (mp->first_address, &v4first, sizeof (v4first));
+ clib_memcpy (mp->last_address, &v4last, sizeof (v4last));
+ }
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+q_or_quit (vat_main_t * vam)
+{
+#if VPP_API_TEST_BUILTIN == 0
+ longjmp (vam->jump_buf, 1);
+#endif
+ return 0; /* not so much */
+}
+
+static int
+q (vat_main_t * vam)
+{
+ return q_or_quit (vam);
+}
+
+static int
+quit (vat_main_t * vam)
+{
+ return q_or_quit (vam);
+}
+
+static int
+comment (vat_main_t * vam)
+{
+ return 0;
+}
+
+static int
+cmd_cmp (void *a1, void *a2)
+{
+ u8 **c1 = a1;
+ u8 **c2 = a2;
+
+ return strcmp ((char *) (c1[0]), (char *) (c2[0]));
+}
+
+static int
+help (vat_main_t * vam)
+{
+ u8 **cmds = 0;
+ u8 *name = 0;
+ hash_pair_t *p;
+ unformat_input_t *i = vam->input;
+ int j;
+
+ if (unformat (i, "%s", &name))
+ {
+ uword *hs;
+
+ vec_add1 (name, 0);
+
+ hs = hash_get_mem (vam->help_by_name, name);
+ if (hs)
+ print (vam->ofp, "usage: %s %s", name, hs[0]);
+ else
+ print (vam->ofp, "No such msg / command '%s'", name);
+ vec_free (name);
+ return 0;
+ }
+
+ print (vam->ofp, "Help is available for the following:");
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->function_by_name,
+ ({
+ vec_add1 (cmds, (u8 *)(p->key));
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (cmds, cmd_cmp);
+
+ for (j = 0; j < vec_len (cmds); j++)
+ print (vam->ofp, "%s", cmds[j]);
+
+ vec_free (cmds);
+ return 0;
+}
+
+static int
+set (vat_main_t * vam)
+{
+ u8 *name = 0, *value = 0;
+ unformat_input_t *i = vam->input;
+
+ if (unformat (i, "%s", &name))
+ {
+ /* The input buffer is a vector, not a string. */
+ value = vec_dup (i->buffer);
+ vec_delete (value, i->index, 0);
+ /* Almost certainly has a trailing newline */
+ if (value[vec_len (value) - 1] == '\n')
+ value[vec_len (value) - 1] = 0;
+ /* Make sure it's a proper string, one way or the other */
+ vec_add1 (value, 0);
+ (void) clib_macro_set_value (&vam->macro_main,
+ (char *) name, (char *) value);
+ }
+ else
+ errmsg ("usage: set <name> <value>");
+
+ vec_free (name);
+ vec_free (value);
+ return 0;
+}
+
+static int
+unset (vat_main_t * vam)
+{
+ u8 *name = 0;
+
+ if (unformat (vam->input, "%s", &name))
+ if (clib_macro_unset (&vam->macro_main, (char *) name) == 1)
+ errmsg ("unset: %s wasn't set", name);
+ vec_free (name);
+ return 0;
+}
+
+typedef struct
+{
+ u8 *name;
+ u8 *value;
+} macro_sort_t;
+
+
+static int
+macro_sort_cmp (void *a1, void *a2)
+{
+ macro_sort_t *s1 = a1;
+ macro_sort_t *s2 = a2;
+
+ return strcmp ((char *) (s1->name), (char *) (s2->name));
+}
+
+static int
+dump_macro_table (vat_main_t * vam)
+{
+ macro_sort_t *sort_me = 0, *sm;
+ int i;
+ hash_pair_t *p;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vam->macro_main.the_value_table_hash,
+ ({
+ vec_add2 (sort_me, sm, 1);
+ sm->name = (u8 *)(p->key);
+ sm->value = (u8 *) (p->value[0]);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (sort_me, macro_sort_cmp);
+
+ if (vec_len (sort_me))
+ print (vam->ofp, "%-15s%s", "Name", "Value");
+ else
+ print (vam->ofp, "The macro table is empty...");
+
+ for (i = 0; i < vec_len (sort_me); i++)
+ print (vam->ofp, "%-15s%s", sort_me[i].name, sort_me[i].value);
+ return 0;
+}
+
+static int
+dump_node_table (vat_main_t * vam)
+{
+ int i, j;
+ vlib_node_t *node, *next_node;
+
+ if (vec_len (vam->graph_nodes) == 0)
+ {
+ print (vam->ofp, "Node table empty, issue get_node_graph...");
+ return 0;
+ }
+
+ for (i = 0; i < vec_len (vam->graph_nodes); i++)
+ {
+ node = vam->graph_nodes[i];
+ print (vam->ofp, "[%d] %s", i, node->name);
+ for (j = 0; j < vec_len (node->next_nodes); j++)
+ {
+ if (node->next_nodes[j] != ~0)
+ {
+ next_node = vam->graph_nodes[node->next_nodes[j]];
+ print (vam->ofp, " [%d] %s", j, next_node->name);
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+value_sort_cmp (void *a1, void *a2)
+{
+ name_sort_t *n1 = a1;
+ name_sort_t *n2 = a2;
+
+ if (n1->value < n2->value)
+ return -1;
+ if (n1->value > n2->value)
+ return 1;
+ return 0;
+}
+
+
+static int
+dump_msg_api_table (vat_main_t * vam)
+{
+ api_main_t *am = &api_main;
+ name_sort_t *nses = 0, *ns;
+ hash_pair_t *hp;
+ int i;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+ ({
+ vec_add2 (nses, ns, 1);
+ ns->name = (u8 *)(hp->key);
+ ns->value = (u32) hp->value[0];
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (nses, value_sort_cmp);
+
+ for (i = 0; i < vec_len (nses); i++)
+ print (vam->ofp, " [%d]: %s", nses[i].value, nses[i].name);
+ vec_free (nses);
+ return 0;
+}
+
+static int
+get_msg_id (vat_main_t * vam)
+{
+ u8 *name_and_crc;
+ u32 message_index;
+
+ if (unformat (vam->input, "%s", &name_and_crc))
+ {
+ message_index = vl_api_get_msg_index (name_and_crc);
+ if (message_index == ~0)
+ {
+ print (vam->ofp, " '%s' not found", name_and_crc);
+ return 0;
+ }
+ print (vam->ofp, " '%s' has message index %d",
+ name_and_crc, message_index);
+ return 0;
+ }
+ errmsg ("name_and_crc required...");
+ return 0;
+}
+
+static int
+search_node_table (vat_main_t * vam)
+{
+ unformat_input_t *line_input = vam->input;
+ u8 *node_to_find;
+ int j;
+ vlib_node_t *node, *next_node;
+ uword *p;
+
+ if (vam->graph_node_index_by_name == 0)
+ {
+ print (vam->ofp, "Node table empty, issue get_node_graph...");
+ return 0;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%s", &node_to_find))
+ {
+ vec_add1 (node_to_find, 0);
+ p = hash_get_mem (vam->graph_node_index_by_name, node_to_find);
+ if (p == 0)
+ {
+ print (vam->ofp, "%s not found...", node_to_find);
+ goto out;
+ }
+ node = vam->graph_nodes[p[0]];
+ print (vam->ofp, "[%d] %s", p[0], node->name);
+ for (j = 0; j < vec_len (node->next_nodes); j++)
+ {
+ if (node->next_nodes[j] != ~0)
+ {
+ next_node = vam->graph_nodes[node->next_nodes[j]];
+ print (vam->ofp, " [%d] %s", j, next_node->name);
+ }
+ }
+ }
+
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error,
+ line_input);
+ return -99;
+ }
+
+ out:
+ vec_free (node_to_find);
+
+ }
+
+ return 0;
+}
+
+
+static int
+script (vat_main_t * vam)
+{
+#if (VPP_API_TEST_BUILTIN==0)
+ u8 *s = 0;
+ char *save_current_file;
+ unformat_input_t save_input;
+ jmp_buf save_jump_buf;
+ u32 save_line_number;
+
+ FILE *new_fp, *save_ifp;
+
+ if (unformat (vam->input, "%s", &s))
+ {
+ new_fp = fopen ((char *) s, "r");
+ if (new_fp == 0)
+ {
+ errmsg ("Couldn't open script file %s", s);
+ vec_free (s);
+ return -99;
+ }
+ }
+ else
+ {
+ errmsg ("Missing script name");
+ return -99;
+ }
+
+ clib_memcpy (&save_input, &vam->input, sizeof (save_input));
+ clib_memcpy (&save_jump_buf, &vam->jump_buf, sizeof (save_jump_buf));
+ save_ifp = vam->ifp;
+ save_line_number = vam->input_line_number;
+ save_current_file = (char *) vam->current_file;
+
+ vam->input_line_number = 0;
+ vam->ifp = new_fp;
+ vam->current_file = s;
+ do_one_file (vam);
+
+ clib_memcpy (&vam->input, &save_input, sizeof (vam->input));
+ clib_memcpy (&vam->jump_buf, &save_jump_buf, sizeof (save_jump_buf));
+ vam->ifp = save_ifp;
+ vam->input_line_number = save_line_number;
+ vam->current_file = (u8 *) save_current_file;
+ vec_free (s);
+
+ return 0;
+#else
+ clib_warning ("use the exec command...");
+ return -99;
+#endif
+}
+
+static int
+echo (vat_main_t * vam)
+{
+ print (vam->ofp, "%v", vam->input->buffer);
+ return 0;
+}
+
+/* List of API message constructors, CLI names map to api_xxx */
+#define foreach_vpe_api_msg \
+_(create_loopback,"[mac <mac-addr>] [instance <instance>]") \
+_(sw_interface_dump,"") \
+_(sw_interface_set_flags, \
+ "<intfc> | sw_if_index <id> admin-up | admin-down link-up | link down") \
+_(sw_interface_add_del_address, \
+ "<intfc> | sw_if_index <id> <ip4-address> | <ip6-address> [del] [del-all] ") \
+_(sw_interface_set_table, \
+ "<intfc> | sw_if_index <id> vrf <table-id> [ipv6]") \
+_(sw_interface_set_mpls_enable, \
+ "<intfc> | sw_if_index [disable | dis]") \
+_(sw_interface_set_vpath, \
+ "<intfc> | sw_if_index <id> enable | disable") \
+_(sw_interface_set_vxlan_bypass, \
+ "<intfc> | sw_if_index <id> [ip4 | ip6] [enable | disable]") \
+_(sw_interface_set_l2_xconnect, \
+ "rx <intfc> | rx_sw_if_index <id> tx <intfc> | tx_sw_if_index <id>\n" \
+ "enable | disable") \
+_(sw_interface_set_l2_bridge, \
+ "{<intfc> | sw_if_index <id>} bd_id <bridge-domain-id>\n" \
+ "[shg <split-horizon-group>] [bvi]\n" \
+ "enable | disable") \
+_(bridge_domain_set_mac_age, "bd_id <bridge-domain-id> mac-age 0-255") \
+_(bridge_domain_add_del, \
+ "bd_id <bridge-domain-id> [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [mac-age 0-255] [bd-tag <tag>] [del]\n") \
+_(bridge_domain_dump, "[bd_id <bridge-domain-id>]\n") \
+_(l2fib_add_del, \
+ "mac <mac-addr> bd_id <bridge-domain-id> [del] | sw_if <intfc> | sw_if_index <id> [static] [filter] [bvi] [count <nn>]\n") \
+_(l2fib_flush_bd, "bd_id <bridge-domain-id>") \
+_(l2fib_flush_int, "<intfc> | sw_if_index <id>") \
+_(l2_flags, \
+ "sw_if <intfc> | sw_if_index <id> [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \
+_(bridge_flags, \
+ "bd_id <bridge-domain-id> [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \
+_(tap_connect, \
+ "tapname <name> mac <mac-addr> | random-mac [tag <string>]") \
+_(tap_modify, \
+ "<vpp-if-name> | sw_if_index <id> tapname <name> mac <mac-addr> | random-mac") \
+_(tap_delete, \
+ "<vpp-if-name> | sw_if_index <id>") \
+_(sw_interface_tap_dump, "") \
+_(ip_table_add_del, \
+ "table-id <n> [ipv6]\n") \
+_(ip_add_del_route, \
+ "<addr>/<mask> via <addr> [table-id <n>]\n" \
+ "[<intfc> | sw_if_index <id>] [resolve-attempts <n>]\n" \
+ "[weight <n>] [drop] [local] [classify <n>] [del]\n" \
+ "[multipath] [count <n>]") \
+_(ip_mroute_add_del, \
+ "<src> <grp>/<mask> [table-id <n>]\n" \
+ "[<intfc> | sw_if_index <id>] [local] [del]") \
+_(mpls_table_add_del, \
+ "table-id <n>\n") \
+_(mpls_route_add_del, \
+ "<label> <eos> via <addr> [table-id <n>]\n" \
+ "[<intfc> | sw_if_index <id>] [resolve-attempts <n>]\n" \
+ "[weight <n>] [drop] [local] [classify <n>] [del]\n" \
+ "[multipath] [count <n>]") \
+_(mpls_ip_bind_unbind, \
+ "<label> <addr/len>") \
+_(mpls_tunnel_add_del, \
+ " via <addr> [table-id <n>]\n" \
+ "sw_if_index <id>] [l2] [del]") \
+_(proxy_arp_add_del, \
+ "<lo-ip4-addr> - <hi-ip4-addr> [vrf <n>] [del]") \
+_(proxy_arp_intfc_enable_disable, \
+ "<intfc> | sw_if_index <id> enable | disable") \
+_(sw_interface_set_unnumbered, \
+ "<intfc> | sw_if_index <id> unnum_if_index <id> [del]") \
+_(ip_neighbor_add_del, \
+ "(<intfc> | sw_if_index <id>) dst <ip46-address> " \
+ "[mac <mac-addr>] [vrf <vrf-id>] [is_static] [del]") \
+_(reset_vrf, "vrf <id> [ipv6]") \
+_(create_vlan_subif, "<intfc> | sw_if_index <id> vlan <n>") \
+_(create_subif, "<intfc> | sw_if_index <id> sub_id <n>\n" \
+ "[outer_vlan_id <n>][inner_vlan_id <n>]\n" \
+ "[no_tags][one_tag][two_tags][dot1ad][exact_match][default_sub]\n" \
+ "[outer_vlan_id_any][inner_vlan_id_any]") \
+_(oam_add_del, "src <ip4-address> dst <ip4-address> [vrf <n>] [del]") \
+_(reset_fib, "vrf <n> [ipv6]") \
+_(dhcp_proxy_config, \
+ "svr <v46-address> src <v46-address>\n" \
+ "rx_vrf_id <nn> server_vrf_id <nn> [del]") \
+_(dhcp_proxy_set_vss, \
+ "tbl_id <n> fib_id <n> oui <n> [ipv6] [del]") \
+_(dhcp_proxy_dump, "ip6") \
+_(dhcp_client_config, \
+ "<intfc> | sw_if_index <id> [hostname <name>] [disable_event] [del]") \
+_(set_ip_flow_hash, \
+ "vrf <n> [src] [dst] [sport] [dport] [proto] [reverse] [ipv6]") \
+_(sw_interface_ip6_enable_disable, \
+ "<intfc> | sw_if_index <id> enable | disable") \
+_(sw_interface_ip6_set_link_local_address, \
+ "<intfc> | sw_if_index <id> <ip6-address>/<mask-width>") \
+_(ip6nd_proxy_add_del, \
+ "<intfc> | sw_if_index <id> <ip6-address>") \
+_(ip6nd_proxy_dump, "") \
+_(sw_interface_ip6nd_ra_prefix, \
+ "<intfc> | sw_if_index <id> <ip6-address>/<mask-width>\n" \
+ "val_life <n> pref_life <n> [def] [noadv] [offl] [noauto]\n" \
+ "[nolink] [isno]") \
+_(sw_interface_ip6nd_ra_config, \
+ "<intfc> | sw_if_index <id> [maxint <n>] [minint <n>]\n" \
+ "[life <n>] [count <n>] [interval <n>] [suppress]\n" \
+ "[managed] [other] [ll] [send] [cease] [isno] [def]") \
+_(set_arp_neighbor_limit, "arp_nbr_limit <n> [ipv6]") \
+_(l2_patch_add_del, \
+ "rx <intfc> | rx_sw_if_index <id> tx <intfc> | tx_sw_if_index <id>\n" \
+ "enable | disable") \
+_(sr_localsid_add_del, \
+ "(del) address <addr> next_hop <addr> behavior <beh>\n" \
+ "fib-table <num> (end.psp) sw_if_index <num>") \
+_(classify_add_del_table, \
+ "buckets <nn> [skip <n>] [match <n>] [memory_size <nn-bytes>]\n" \
+ " [del] [del-chain] mask <mask-value>\n" \
+ " [l2-miss-next | miss-next | acl-miss-next] <name|nn>\n" \
+ " [current-data-flag <n>] [current-data-offset <nn>] [table <nn>]") \
+_(classify_add_del_session, \
+ "[hit-next|l2-hit-next|acl-hit-next|policer-hit-next] <name|nn>\n" \
+ " table-index <nn> skip_n <nn> match_n <nn> match [hex] [l2]\n" \
+ " [l3 [ip4|ip6]] [action set-ip4-fib-id <nn>]\n" \
+ " [action set-ip6-fib-id <nn> | action <n> metadata <nn>] [del]") \
+_(classify_set_interface_ip_table, \
+ "<intfc> | sw_if_index <nn> table <nn>") \
+_(classify_set_interface_l2_tables, \
+ "<intfc> | sw_if_index <nn> [ip4-table <nn>] [ip6-table <nn>]\n" \
+ " [other-table <nn>]") \
+_(get_node_index, "node <node-name") \
+_(add_node_next, "node <node-name> next <next-node-name>") \
+_(l2tpv3_create_tunnel, \
+ "client_address <ip6-addr> our_address <ip6-addr>\n" \
+ "[local_session_id <nn>][remote_session_id <nn>][local_cookie <nn>]\n" \
+ "[remote_cookie <nn>]\n[l2-sublayer-preset]\n") \
+_(l2tpv3_set_tunnel_cookies, \
+ "<intfc> | sw_if_index <nn> [new_local_cookie <nn>]\n" \
+ "[new_remote_cookie <nn>]\n") \
+_(l2tpv3_interface_enable_disable, \
+ "<intfc> | sw_if_index <nn> enable | disable") \
+_(l2tpv3_set_lookup_key, \
+ "lookup_v6_src | lookup_v6_dst | lookup_session_id") \
+_(sw_if_l2tpv3_tunnel_dump, "") \
+_(vxlan_add_del_tunnel, \
+ "src <ip-addr> { dst <ip-addr> | group <mcast-ip-addr>\n" \
+ "{ <intfc> | mcast_sw_if_index <nn> } }\n" \
+ "vni <vni> [encap-vrf-id <nn>] [decap-next <l2|nn>] [del]") \
+_(vxlan_tunnel_dump, "[<intfc> | sw_if_index <nn>]") \
+_(gre_add_del_tunnel, \
+ "src <ip-addr> dst <ip-addr> [outer-fib-id <nn>] [teb] [del]\n") \
+_(gre_tunnel_dump, "[<intfc> | sw_if_index <nn>]") \
+_(l2_fib_clear_table, "") \
+_(l2_interface_efp_filter, "sw_if_index <nn> enable | disable") \
+_(l2_interface_vlan_tag_rewrite, \
+ "<intfc> | sw_if_index <nn> \n" \
+ "[disable][push-[1|2]][pop-[1|2]][translate-1-[1|2]] \n" \
+ "[translate-2-[1|2]] [push_dot1q 0] tag1 <nn> tag2 <nn>") \
+_(create_vhost_user_if, \
+ "socket <filename> [server] [renumber <dev_instance>] " \
+ "[mac <mac_address>]") \
+_(modify_vhost_user_if, \
+ "<intfc> | sw_if_index <nn> socket <filename>\n" \
+ "[server] [renumber <dev_instance>]") \
+_(delete_vhost_user_if, "<intfc> | sw_if_index <nn>") \
+_(sw_interface_vhost_user_dump, "") \
+_(show_version, "") \
+_(vxlan_gpe_add_del_tunnel, \
+ "local <addr> remote <addr> | group <mcast-ip-addr>\n" \
+ "{ <intfc> | mcast_sw_if_index <nn> } }\n" \
+ "vni <nn> [encap-vrf-id <nn>] [decap-vrf-id <nn>]\n" \
+ "[next-ip4][next-ip6][next-ethernet] [next-nsh] [del]\n") \
+_(vxlan_gpe_tunnel_dump, "[<intfc> | sw_if_index <nn>]") \
+_(l2_fib_table_dump, "bd_id <bridge-domain-id>") \
+_(interface_name_renumber, \
+ "<intfc> | sw_if_index <nn> new_show_dev_instance <nn>") \
+_(input_acl_set_interface, \
+ "<intfc> | sw_if_index <nn> [ip4-table <nn>] [ip6-table <nn>]\n" \
+ " [l2-table <nn>] [del]") \
+_(want_ip4_arp_events, "address <ip4-address> [del]") \
+_(want_ip6_nd_events, "address <ip6-address> [del]") \
+_(want_l2_macs_events, "[disable] [learn-limit <n>] [scan-delay <n>] [max-entries <n>]") \
+_(ip_address_dump, "(ipv4 | ipv6) (<intfc> | sw_if_index <id>)") \
+_(ip_dump, "ipv4 | ipv6") \
+_(ipsec_spd_add_del, "spd_id <n> [del]") \
+_(ipsec_interface_add_del_spd, "(<intfc> | sw_if_index <id>)\n" \
+ " spid_id <n> ") \
+_(ipsec_sad_add_del_entry, "sad_id <n> spi <n> crypto_alg <alg>\n" \
+ " crypto_key <hex> tunnel_src <ip4|ip6> tunnel_dst <ip4|ip6>\n" \
+ " integ_alg <alg> integ_key <hex>") \
+_(ipsec_spd_add_del_entry, "spd_id <n> priority <n> action <action>\n" \
+ " (inbound|outbound) [sa_id <n>] laddr_start <ip4|ip6>\n" \
+ " laddr_stop <ip4|ip6> raddr_start <ip4|ip6> raddr_stop <ip4|ip6>\n" \
+ " [lport_start <n> lport_stop <n>] [rport_start <n> rport_stop <n>]" ) \
+_(ipsec_sa_set_key, "sa_id <n> crypto_key <hex> integ_key <hex>") \
+_(ipsec_tunnel_if_add_del, "local_spi <n> remote_spi <n>\n" \
+ " crypto_alg <alg> local_crypto_key <hex> remote_crypto_key <hex>\n" \
+ " integ_alg <alg> local_integ_key <hex> remote_integ_key <hex>\n" \
+ " local_ip <addr> remote_ip <addr> [esn] [anti_replay] [del]\n") \
+_(ikev2_profile_add_del, "name <profile_name> [del]") \
+_(ikev2_profile_set_auth, "name <profile_name> auth_method <method>\n" \
+ "(auth_data 0x<data> | auth_data <data>)") \
+_(ikev2_profile_set_id, "name <profile_name> id_type <type>\n" \
+ "(id_data 0x<data> | id_data <data>) (local|remote)") \
+_(ikev2_profile_set_ts, "name <profile_name> protocol <proto>\n" \
+ "start_port <port> end_port <port> start_addr <ip4> end_addr <ip4>\n" \
+ "(local|remote)") \
+_(ikev2_set_local_key, "file <absolute_file_path>") \
+_(ikev2_set_responder, "<profile_name> interface <interface> address <addr>") \
+_(ikev2_set_ike_transforms, "<profile_name> <crypto alg> <key size> <integrity alg> <DH group>") \
+_(ikev2_set_esp_transforms, "<profile_name> <crypto alg> <key size> <integrity alg> <DH group>") \
+_(ikev2_set_sa_lifetime, "<profile_name> <seconds> <jitter> <handover> <max bytes>") \
+_(ikev2_initiate_sa_init, "<profile_name>") \
+_(ikev2_initiate_del_ike_sa, "<ispi>") \
+_(ikev2_initiate_del_child_sa, "<ispi>") \
+_(ikev2_initiate_rekey_child_sa, "<ispi>") \
+_(delete_loopback,"sw_if_index <nn>") \
+_(bd_ip_mac_add_del, "bd_id <bridge-domain-id> <ip4/6-addr> <mac-addr> [del]") \
+_(map_add_domain, \
+ "ip4-pfx <ip4pfx> ip6-pfx <ip6pfx> " \
+ "ip6-src <ip6addr> " \
+ "ea-bits-len <n> psid-offset <n> psid-len <n>") \
+_(map_del_domain, "index <n>") \
+_(map_add_del_rule, \
+ "index <n> psid <n> dst <ip6addr> [del]") \
+_(map_domain_dump, "") \
+_(map_rule_dump, "index <map-domain>") \
+_(want_interface_events, "enable|disable") \
+_(want_stats,"enable|disable") \
+_(get_first_msg_id, "client <name>") \
+_(cop_interface_enable_disable, "<intfc> | sw_if_index <nn> [disable]") \
+_(cop_whitelist_enable_disable, "<intfc> | sw_if_index <nn>\n" \
+ "fib-id <nn> [ip4][ip6][default]") \
+_(get_node_graph, " ") \
+_(sw_interface_clear_stats,"<intfc> | sw_if_index <nn>") \
+_(ioam_enable, "[trace] [pow] [ppc <encap|decap>]") \
+_(ioam_disable, "") \
+_(one_add_del_locator_set, "locator-set <locator_name> [iface <intf> |" \
+ " sw_if_index <sw_if_index> p <priority> " \
+ "w <weight>] [del]") \
+_(one_add_del_locator, "locator-set <locator_name> " \
+ "iface <intf> | sw_if_index <sw_if_index> " \
+ "p <priority> w <weight> [del]") \
+_(one_add_del_local_eid,"vni <vni> eid " \
+ "<ipv4|ipv6>/<prefix> | <L2 address> " \
+ "locator-set <locator_name> [del]" \
+ "[key-id sha1|sha256 secret-key <secret-key>]")\
+_(one_add_del_map_resolver, "<ip4|6-addr> [del]") \
+_(one_add_del_map_server, "<ip4|6-addr> [del]") \
+_(one_enable_disable, "enable|disable") \
+_(one_map_register_enable_disable, "enable|disable") \
+_(one_map_register_fallback_threshold, "<value>") \
+_(one_rloc_probe_enable_disable, "enable|disable") \
+_(one_add_del_remote_mapping, "add|del vni <vni> eid <dest-eid> " \
+ "[seid <seid>] " \
+ "rloc <locator> p <prio> " \
+ "w <weight> [rloc <loc> ... ] " \
+ "action <action> [del-all]") \
+_(one_add_del_adjacency, "add|del vni <vni> reid <remote-eid> leid " \
+ "<local-eid>") \
+_(one_pitr_set_locator_set, "locator-set <loc-set-name> | del") \
+_(one_use_petr, "ip-address> | disable") \
+_(one_map_request_mode, "src-dst|dst-only") \
+_(one_add_del_map_request_itr_rlocs, "<loc-set-name> [del]") \
+_(one_eid_table_add_del_map, "[del] vni <vni> vrf <vrf>") \
+_(one_locator_set_dump, "[local | remote]") \
+_(one_locator_dump, "ls_index <index> | ls_name <name>") \
+_(one_eid_table_dump, "[eid <ipv4|ipv6>/<prefix> | <mac>] [vni] " \
+ "[local] | [remote]") \
+_(one_add_del_ndp_entry, "[del] mac <mac> bd <bd> ip6 <ip6>") \
+_(one_ndp_bd_get, "") \
+_(one_ndp_entries_get, "bd <bridge-domain>") \
+_(one_add_del_l2_arp_entry, "[del] mac <mac> bd <bd> ip4 <ip4>") \
+_(one_l2_arp_bd_get, "") \
+_(one_l2_arp_entries_get, "bd <bridge-domain>") \
+_(one_stats_enable_disable, "enable|disalbe") \
+_(show_one_stats_enable_disable, "") \
+_(one_eid_table_vni_dump, "") \
+_(one_eid_table_map_dump, "l2|l3") \
+_(one_map_resolver_dump, "") \
+_(one_map_server_dump, "") \
+_(one_adjacencies_get, "vni <vni>") \
+_(one_nsh_set_locator_set, "[del] ls <locator-set-name>") \
+_(show_one_rloc_probe_state, "") \
+_(show_one_map_register_state, "") \
+_(show_one_status, "") \
+_(one_stats_dump, "") \
+_(one_stats_flush, "") \
+_(one_get_map_request_itr_rlocs, "") \
+_(one_map_register_set_ttl, "<ttl>") \
+_(one_set_transport_protocol, "udp|api") \
+_(one_get_transport_protocol, "") \
+_(show_one_nsh_mapping, "") \
+_(show_one_pitr, "") \
+_(show_one_use_petr, "") \
+_(show_one_map_request_mode, "") \
+_(show_one_map_register_ttl, "") \
+_(show_one_map_register_fallback_threshold, "") \
+_(lisp_add_del_locator_set, "locator-set <locator_name> [iface <intf> |"\
+ " sw_if_index <sw_if_index> p <priority> " \
+ "w <weight>] [del]") \
+_(lisp_add_del_locator, "locator-set <locator_name> " \
+ "iface <intf> | sw_if_index <sw_if_index> " \
+ "p <priority> w <weight> [del]") \
+_(lisp_add_del_local_eid,"vni <vni> eid " \
+ "<ipv4|ipv6>/<prefix> | <L2 address> " \
+ "locator-set <locator_name> [del]" \
+ "[key-id sha1|sha256 secret-key <secret-key>]") \
+_(lisp_add_del_map_resolver, "<ip4|6-addr> [del]") \
+_(lisp_add_del_map_server, "<ip4|6-addr> [del]") \
+_(lisp_enable_disable, "enable|disable") \
+_(lisp_map_register_enable_disable, "enable|disable") \
+_(lisp_rloc_probe_enable_disable, "enable|disable") \
+_(lisp_add_del_remote_mapping, "add|del vni <vni> eid <dest-eid> " \
+ "[seid <seid>] " \
+ "rloc <locator> p <prio> " \
+ "w <weight> [rloc <loc> ... ] " \
+ "action <action> [del-all]") \
+_(lisp_add_del_adjacency, "add|del vni <vni> reid <remote-eid> leid " \
+ "<local-eid>") \
+_(lisp_pitr_set_locator_set, "locator-set <loc-set-name> | del") \
+_(lisp_use_petr, "<ip-address> | disable") \
+_(lisp_map_request_mode, "src-dst|dst-only") \
+_(lisp_add_del_map_request_itr_rlocs, "<loc-set-name> [del]") \
+_(lisp_eid_table_add_del_map, "[del] vni <vni> vrf <vrf>") \
+_(lisp_locator_set_dump, "[local | remote]") \
+_(lisp_locator_dump, "ls_index <index> | ls_name <name>") \
+_(lisp_eid_table_dump, "[eid <ipv4|ipv6>/<prefix> | <mac>] [vni] " \
+ "[local] | [remote]") \
+_(lisp_eid_table_vni_dump, "") \
+_(lisp_eid_table_map_dump, "l2|l3") \
+_(lisp_map_resolver_dump, "") \
+_(lisp_map_server_dump, "") \
+_(lisp_adjacencies_get, "vni <vni>") \
+_(gpe_fwd_entry_vnis_get, "") \
+_(gpe_native_fwd_rpaths_get, "ip4 | ip6") \
+_(gpe_add_del_native_fwd_rpath, "[del] via <nh-ip-addr> [iface] " \
+ "[table <table-id>]") \
+_(lisp_gpe_fwd_entries_get, "vni <vni>") \
+_(lisp_gpe_fwd_entry_path_dump, "index <fwd_entry_index>") \
+_(gpe_set_encap_mode, "lisp|vxlan") \
+_(gpe_get_encap_mode, "") \
+_(lisp_gpe_add_del_iface, "up|down") \
+_(lisp_gpe_enable_disable, "enable|disable") \
+_(lisp_gpe_add_del_fwd_entry, "reid <eid> [leid <eid>] vni <vni>" \
+ "vrf/bd <dp_table> loc-pair <lcl_loc> <rmt_loc> w <weight>... [del]") \
+_(show_lisp_rloc_probe_state, "") \
+_(show_lisp_map_register_state, "") \
+_(show_lisp_status, "") \
+_(lisp_get_map_request_itr_rlocs, "") \
+_(show_lisp_pitr, "") \
+_(show_lisp_use_petr, "") \
+_(show_lisp_map_request_mode, "") \
+_(af_packet_create, "name <host interface name> [hw_addr <mac>]") \
+_(af_packet_delete, "name <host interface name>") \
+_(policer_add_del, "name <policer name> <params> [del]") \
+_(policer_dump, "[name <policer name>]") \
+_(policer_classify_set_interface, \
+ "<intfc> | sw_if_index <nn> [ip4-table <nn>] [ip6-table <nn>]\n" \
+ " [l2-table <nn>] [del]") \
+_(policer_classify_dump, "type [ip4|ip6|l2]") \
+_(netmap_create, "name <interface name> [hw-addr <mac>] [pipe] " \
+ "[master|slave]") \
+_(netmap_delete, "name <interface name>") \
+_(mpls_tunnel_dump, "tunnel_index <tunnel-id>") \
+_(mpls_fib_dump, "") \
+_(classify_table_ids, "") \
+_(classify_table_by_interface, "sw_if_index <sw_if_index>") \
+_(classify_table_info, "table_id <nn>") \
+_(classify_session_dump, "table_id <nn>") \
+_(set_ipfix_exporter, "collector_address <ip4> [collector_port <nn>] " \
+ "src_address <ip4> [vrf_id <nn>] [path_mtu <nn>] " \
+ "[template_interval <nn>] [udp_checksum]") \
+_(ipfix_exporter_dump, "") \
+_(set_ipfix_classify_stream, "[domain <domain-id>] [src_port <src-port>]") \
+_(ipfix_classify_stream_dump, "") \
+_(ipfix_classify_table_add_del, "table <table-index> ip4|ip6 [tcp|udp]") \
+_(ipfix_classify_table_dump, "") \
+_(sw_interface_span_enable_disable, "[l2] [src <intfc> | src_sw_if_index <id>] [disable | [[dst <intfc> | dst_sw_if_index <id>] [both|rx|tx]]]") \
+_(sw_interface_span_dump, "[l2]") \
+_(get_next_index, "node-name <node-name> next-node-name <node-name>") \
+_(pg_create_interface, "if_id <nn>") \
+_(pg_capture, "if_id <nnn> pcap <file_name> count <nnn> [disable]") \
+_(pg_enable_disable, "[stream <id>] disable") \
+_(ip_source_and_port_range_check_add_del, \
+ "<ip-addr>/<mask> range <nn>-<nn> vrf <id>") \
+_(ip_source_and_port_range_check_interface_add_del, \
+ "<intf> | sw_if_index <nn> [tcp-out-vrf <id>] [tcp-in-vrf <id>]" \
+ "[udp-in-vrf <id>] [udp-out-vrf <id>]") \
+_(ipsec_gre_add_del_tunnel, \
+ "src <addr> dst <addr> local_sa <sa-id> remote_sa <sa-id> [del]") \
+_(ipsec_gre_tunnel_dump, "[sw_if_index <nn>]") \
+_(delete_subif,"<intfc> | sw_if_index <nn>") \
+_(l2_interface_pbb_tag_rewrite, \
+ "<intfc> | sw_if_index <nn> \n" \
+ "[disable | push | pop | translate_pbb_stag <outer_tag>] \n" \
+ "dmac <mac> smac <mac> sid <nn> [vlanid <nn>]") \
+_(punt, "protocol <l4-protocol> [ip <ver>] [port <l4-port>] [del]") \
+_(flow_classify_set_interface, \
+ "<intfc> | sw_if_index <nn> [ip4-table <nn>] [ip6-table <nn>] [del]") \
+_(flow_classify_dump, "type [ip4|ip6]") \
+_(ip_fib_dump, "") \
+_(ip_mfib_dump, "") \
+_(ip6_fib_dump, "") \
+_(ip6_mfib_dump, "") \
+_(feature_enable_disable, "arc_name <arc_name> " \
+ "feature_name <feature_name> <intfc> | sw_if_index <nn> [disable]") \
+_(sw_interface_tag_add_del, "<intfc> | sw_if_index <nn> tag <text>" \
+"[disable]") \
+_(l2_xconnect_dump, "") \
+_(sw_interface_set_mtu, "<intfc> | sw_if_index <nn> mtu <nn>") \
+_(ip_neighbor_dump, "[ip6] <intfc> | sw_if_index <nn>") \
+_(sw_interface_get_table, "<intfc> | sw_if_index <id> [ipv6]") \
+_(p2p_ethernet_add, "<intfc> | sw_if_index <nn> remote_mac <mac-address> sub_id <id>") \
+_(p2p_ethernet_del, "<intfc> | sw_if_index <nn> remote_mac <mac-address>") \
+_(lldp_config, "system-name <name> tx-hold <nn> tx-interval <nn>") \
+_(sw_interface_set_lldp, "<intfc> | sw_if_index <nn> [port-desc <description>] [disable]") \
+_(tcp_configure_src_addresses, "<ip4|6>first-<ip4|6>last [vrf <id>]")
+
+/* List of command functions, CLI names map directly to functions */
+#define foreach_cli_function \
+_(comment, "usage: comment <ignore-rest-of-line>") \
+_(dump_interface_table, "usage: dump_interface_table") \
+_(dump_sub_interface_table, "usage: dump_sub_interface_table") \
+_(dump_ipv4_table, "usage: dump_ipv4_table") \
+_(dump_ipv6_table, "usage: dump_ipv6_table") \
+_(dump_stats_table, "usage: dump_stats_table") \
+_(dump_macro_table, "usage: dump_macro_table ") \
+_(dump_node_table, "usage: dump_node_table") \
+_(dump_msg_api_table, "usage: dump_msg_api_table") \
+_(get_msg_id, "usage: get_msg_id name_and_crc") \
+_(echo, "usage: echo <message>") \
+_(exec, "usage: exec <vpe-debug-CLI-command>") \
+_(exec_inband, "usage: exec_inband <vpe-debug-CLI-command>") \
+_(help, "usage: help") \
+_(q, "usage: quit") \
+_(quit, "usage: quit") \
+_(search_node_table, "usage: search_node_table <name>...") \
+_(set, "usage: set <variable-name> <value>") \
+_(script, "usage: script <file-name>") \
+_(unset, "usage: unset <variable-name>")
+#define _(N,n) \
+ static void vl_api_##n##_t_handler_uni \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = &vat_main; \
+ if (vam->json_output) { \
+ vl_api_##n##_t_handler_json(mp); \
+ } else { \
+ vl_api_##n##_t_handler(mp); \
+ } \
+ }
+foreach_vpe_api_reply_msg;
+#if VPP_API_TEST_BUILTIN == 0
+foreach_standalone_reply_msg;
+#endif
+#undef _
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler_uni, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#if VPP_API_TEST_BUILTIN == 0
+ foreach_standalone_reply_msg;
+#endif
+#undef _
+
+#if (VPP_API_TEST_BUILTIN==0)
+ vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE);
+
+ vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword));
+
+ vam->function_by_name = hash_create_string (0, sizeof (uword));
+
+ vam->help_by_name = hash_create_string (0, sizeof (uword));
+#endif
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* CLI functions */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, n);
+ foreach_cli_function;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_cli_function;
+#undef _
+}
+
+#if VPP_API_TEST_BUILTIN
+static clib_error_t *
+vat_api_hookup_shim (vlib_main_t * vm)
+{
+ vat_api_hookup (&vat_main);
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vat_api_hookup_shim);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/json_format.c b/src/vat/json_format.c
new file mode 100644
index 00000000..6f316d97
--- /dev/null
+++ b/src/vat/json_format.c
@@ -0,0 +1,304 @@
+/*
+ *------------------------------------------------------------------
+ * json_format.c
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+
+*/
+#include <inttypes.h>
+#include <vat/json_format.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/vec.h>
+
+#define VAT_TAB_WIDTH 2
+
+typedef struct vat_print_ctx_s
+{
+ FILE *ofp;
+ u32 indent;
+} vat_print_ctx_t;
+
+/* Format an IP4 address. */
+static u8 *
+vat_json_format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP6 address. */
+static u8 *
+vat_json_format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+static void
+vat_json_indent_print (vat_print_ctx_t * ctx)
+{
+ int i;
+ for (i = 0; i < ctx->indent * VAT_TAB_WIDTH; i++)
+ {
+ fformat (ctx->ofp, " ");
+ }
+}
+
+static void
+vat_json_indent_line (vat_print_ctx_t * ctx, char *fmt, ...)
+{
+ va_list va;
+
+ vat_json_indent_print (ctx);
+ va_start (va, fmt);
+ va_fformat (ctx->ofp, fmt, &va);
+ va_end (va);
+}
+
+static u8
+is_num_only (vat_json_node_t * p)
+{
+ vat_json_node_t *elem;
+ vec_foreach (elem, p)
+ {
+ if (VAT_JSON_INT != elem->type && VAT_JSON_UINT != elem->type)
+ {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void
+vat_json_print_internal (vat_print_ctx_t * ctx, vat_json_node_t * node)
+{
+#define P(fmt,...) fformat(ctx->ofp, fmt, ##__VA_ARGS__)
+#define PL(fmt,...) fformat(ctx->ofp, fmt"\n", ##__VA_ARGS__)
+#define PPL(fmt,...) vat_json_indent_line(ctx, fmt"\n", ##__VA_ARGS__)
+#define PP(fmt,...) vat_json_indent_line(ctx, fmt, ##__VA_ARGS__)
+#define INCR (ctx->indent++)
+#define DECR (ctx->indent--)
+
+ vat_json_pair_t *pair;
+ u32 i, count;
+ vat_json_node_t *elem;
+ u8 num_only = 0;
+
+ if (!node)
+ {
+ return;
+ }
+
+ switch (node->type)
+ {
+ case VAT_JSON_OBJECT:
+ count = vec_len (node->pairs);
+ if (count >= 1)
+ {
+ PL ("{");
+ INCR;
+ for (i = 0; i < count; i++)
+ {
+ pair = &node->pairs[i];
+ PP ("\"%s\": ", pair->name);
+ vat_json_print_internal (ctx, &pair->value);
+ if (i < count - 1)
+ {
+ P (",");
+ }
+ PL ();
+ }
+ DECR;
+ PP ("}");
+ }
+ else
+ {
+ P ("{}");
+ }
+ break;
+ case VAT_JSON_ARRAY:
+ num_only = is_num_only (node->array);
+ count = vec_len (node->array);
+ if (count >= 1)
+ {
+ if (num_only)
+ P ("[");
+ else
+ PL ("[ ");
+ INCR;
+ for (i = 0; i < count; i++)
+ {
+ elem = &node->array[i];
+ if (!num_only)
+ {
+ vat_json_indent_print (ctx);
+ }
+ vat_json_print_internal (ctx, elem);
+ if (i < count - 1)
+ {
+ if (num_only)
+ {
+ P (", ");
+ }
+ else
+ {
+ P (",");
+ }
+ }
+ if (!num_only)
+ PL ();
+ }
+ DECR;
+ if (!num_only)
+ PP ("]");
+ else
+ P ("]");
+ }
+ else
+ {
+ P ("[]");
+ }
+ break;
+ case VAT_JSON_INT:
+ P ("%d", node->sint);
+ break;
+ case VAT_JSON_UINT:
+ P ("%" PRIu64, node->uint);
+ break;
+ case VAT_JSON_REAL:
+ P ("%f", node->real);
+ break;
+ case VAT_JSON_STRING:
+ P ("\"%s\"", node->string);
+ break;
+ case VAT_JSON_IPV4:
+ P ("\"%U\"", vat_json_format_ip4_address, &node->ip4);
+ break;
+ case VAT_JSON_IPV6:
+ P ("\"%U\"", vat_json_format_ip6_address, &node->ip6);
+ break;
+ default:
+ break;
+ }
+#undef PPL
+#undef PP
+#undef PL
+#undef P
+}
+
+void
+vat_json_print (FILE * ofp, vat_json_node_t * node)
+{
+ vat_print_ctx_t ctx;
+ memset (&ctx, 0, sizeof ctx);
+ ctx.indent = 0;
+ ctx.ofp = ofp;
+ fformat (ofp, "\n");
+ vat_json_print_internal (&ctx, node);
+ fformat (ofp, "\n");
+}
+
+void
+vat_json_free (vat_json_node_t * node)
+{
+ int i = 0;
+
+ if (NULL == node)
+ {
+ return;
+ }
+ switch (node->type)
+ {
+ case VAT_JSON_OBJECT:
+ for (i = 0; i < vec_len (node->pairs); i++)
+ {
+ vat_json_free (&node->pairs[i].value);
+ }
+ if (NULL != node->pairs)
+ {
+ vec_free (node->pairs);
+ }
+ break;
+ case VAT_JSON_ARRAY:
+ for (i = 0; i < vec_len (node->array); i++)
+ {
+ vat_json_free (&node->array[i]);
+ }
+ if (NULL != node->array)
+ {
+ vec_free (node->array);
+ }
+ break;
+ case VAT_JSON_STRING:
+ if (NULL != node->string)
+ {
+ vec_free (node->string);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/json_format.h b/src/vat/json_format.h
new file mode 100644
index 00000000..154fb3df
--- /dev/null
+++ b/src/vat/json_format.h
@@ -0,0 +1,254 @@
+/*
+ *------------------------------------------------------------------
+ * json_format.h
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __JSON_FORMAT_H__
+#define __JSON_FORMAT_H__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <netinet/ip.h>
+
+/* JSON value type */
+typedef enum
+{
+ VAT_JSON_NONE,
+ VAT_JSON_OBJECT,
+ VAT_JSON_ARRAY,
+ VAT_JSON_STRING,
+ VAT_JSON_REAL,
+ VAT_JSON_UINT,
+ VAT_JSON_INT,
+ VAT_JSON_IPV4,
+ VAT_JSON_IPV6,
+ VAT_JSON_MAX
+} vat_json_val_type_t;
+
+typedef struct vat_json_node_s vat_json_node_t;
+typedef struct vat_json_pair_s vat_json_pair_t;
+
+/* JSON object structure */
+struct vat_json_node_s
+{
+ vat_json_val_type_t type;
+ union
+ {
+ vat_json_pair_t *pairs;
+ vat_json_node_t *array;
+ u8 *string;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ u64 uint;
+ i64 sint;
+ f64 real;
+ };
+};
+
+struct vat_json_pair_s
+{
+ const char *name;
+ vat_json_node_t value;
+};
+
+void vat_json_print (FILE * ofp, vat_json_node_t * node);
+void vat_json_free (vat_json_node_t * node);
+
+static_always_inline void
+vat_json_init_object (vat_json_node_t * json)
+{
+ json->type = VAT_JSON_OBJECT;
+ json->pairs = NULL;
+}
+
+static_always_inline void
+vat_json_init_array (vat_json_node_t * json)
+{
+ json->type = VAT_JSON_ARRAY;
+ json->array = NULL;
+}
+
+static_always_inline void
+vat_json_set_string (vat_json_node_t * json, u8 * str)
+{
+ json->type = VAT_JSON_STRING;
+ json->string = str;
+}
+
+static_always_inline void
+vat_json_set_string_copy (vat_json_node_t * json, const u8 * str)
+{
+ u8 *ns = NULL;
+ vec_validate (ns, strlen ((const char *) str));
+ strcpy ((char *) ns, (const char *) str);
+ vec_add1 (ns, '\0');
+ vat_json_set_string (json, ns);
+}
+
+static_always_inline void
+vat_json_set_int (vat_json_node_t * json, i64 num)
+{
+ json->type = VAT_JSON_INT;
+ json->sint = num;
+}
+
+static_always_inline void
+vat_json_set_uint (vat_json_node_t * json, u64 num)
+{
+ json->type = VAT_JSON_UINT;
+ json->uint = num;
+}
+
+static_always_inline void
+vat_json_set_real (vat_json_node_t * json, f64 real)
+{
+ json->type = VAT_JSON_REAL;
+ json->real = real;
+}
+
+static_always_inline void
+vat_json_set_ip4 (vat_json_node_t * json, struct in_addr ip4)
+{
+ json->type = VAT_JSON_IPV4;
+ json->ip4 = ip4;
+}
+
+static_always_inline void
+vat_json_set_ip6 (vat_json_node_t * json, struct in6_addr ip6)
+{
+ json->type = VAT_JSON_IPV6;
+ json->ip6 = ip6;
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_add (vat_json_node_t * json, const char *name)
+{
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ uword pos = vec_len (json->pairs);
+ vec_validate (json->pairs, pos);
+ json->pairs[pos].name = name;
+ return &json->pairs[pos].value;
+}
+
+static_always_inline vat_json_node_t *
+vat_json_array_add (vat_json_node_t * json)
+{
+ ASSERT (VAT_JSON_ARRAY == json->type);
+ uword pos = vec_len (json->array);
+ vec_validate (json->array, pos);
+ return &json->array[pos];
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_add_list (vat_json_node_t * json, const char *name)
+{
+ vat_json_node_t *array_node = vat_json_object_add (json, name);
+ vat_json_init_array (array_node);
+ return array_node;
+}
+
+static_always_inline void
+vat_json_object_add_string_copy (vat_json_node_t * json,
+ const char *name, u8 * str)
+{
+ vat_json_set_string_copy (vat_json_object_add (json, name), str);
+}
+
+static_always_inline void
+vat_json_object_add_uint (vat_json_node_t * json,
+ const char *name, u64 number)
+{
+ vat_json_set_uint (vat_json_object_add (json, name), number);
+}
+
+static_always_inline void
+vat_json_object_add_int (vat_json_node_t * json, const char *name, i64 number)
+{
+ vat_json_set_int (vat_json_object_add (json, name), number);
+}
+
+static_always_inline void
+vat_json_object_add_real (vat_json_node_t * json, const char *name, f64 real)
+{
+ vat_json_set_real (vat_json_object_add (json, name), real);
+}
+
+static_always_inline void
+vat_json_object_add_ip4 (vat_json_node_t * json,
+ const char *name, struct in_addr ip4)
+{
+ vat_json_set_ip4 (vat_json_object_add (json, name), ip4);
+}
+
+static_always_inline void
+vat_json_object_add_ip6 (vat_json_node_t * json,
+ const char *name, struct in6_addr ip6)
+{
+ vat_json_set_ip6 (vat_json_object_add (json, name), ip6);
+}
+
+static_always_inline void
+vat_json_array_add_int (vat_json_node_t * json, i64 number)
+{
+ vat_json_set_int (vat_json_array_add (json), number);
+}
+
+static_always_inline void
+vat_json_array_add_uint (vat_json_node_t * json, u64 number)
+{
+ vat_json_set_uint (vat_json_array_add (json), number);
+}
+
+static_always_inline void
+vat_json_object_add_bytes (vat_json_node_t * json,
+ const char *name, u8 * array, uword size)
+{
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ vat_json_node_t *json_array = vat_json_object_add (json, name);
+ vat_json_init_array (json_array);
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ vat_json_array_add_uint (json_array, array[i]);
+ }
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_get_element (vat_json_node_t * json, const char *name)
+{
+ int i = 0;
+
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ for (i = 0; i < vec_len (json->pairs); i++)
+ {
+ if (0 == strcmp (json->pairs[i].name, name))
+ {
+ return &json->pairs[i].value;
+ }
+ }
+ return NULL;
+}
+
+#endif /* __JSON_FORMAT_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/json_test.c b/src/vat/json_test.c
new file mode 100644
index 00000000..46261694
--- /dev/null
+++ b/src/vat/json_test.c
@@ -0,0 +1,75 @@
+/*
+ *------------------------------------------------------------------
+ * json_test.c
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vat/vat.h>
+#include <vat/json_format.h>
+
+static void
+print_json_test (void)
+{
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_string_copy (&node, "str", (u8 *) "string-value");
+ vat_json_object_add_uint (&node, "ab", 127);
+ vat_json_object_add_real (&node, "pi", 3.14159f);
+ vat_json_print (stdout, &node);
+ vat_json_free (&node);
+
+ vat_json_init_object (&node);
+ vat_json_node_t *a1 = vat_json_object_add (&node, "a1");
+ vat_json_init_object (a1);
+ vat_json_object_add_uint (a1, "b1", 512);
+ vat_json_object_add_string_copy (a1, "b2", (u8 *) "string");
+
+ vat_json_object_add_int (&node, "a2", 2);
+
+ vat_json_node_t *a3 = vat_json_object_add_list (&node, "a3");
+ vat_json_init_array (a3);
+ vat_json_array_add_uint (a3, 1);
+ vat_json_array_add_int (a3, -2);
+ vat_json_array_add_uint (a3, 3);
+
+ vat_json_init_object (vat_json_object_add (&node, "a4"));
+
+ struct in_addr ipv4 = { 0 };
+ struct in6_addr ipv6 = { {{0}} };
+
+ vat_json_object_add_ip4 (&node, "ipv4", ipv4);
+ vat_json_object_add_ip6 (&node, "ipv6", ipv6);
+
+ vat_json_print (stdout, &node);
+ vat_json_free (&node);
+}
+
+int
+main (void)
+{
+ print_json_test ();
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/main.c b/src/vat/main.c
new file mode 100644
index 00000000..b8856cc9
--- /dev/null
+++ b/src/vat/main.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "vat.h"
+#include "plugin.h"
+#include <signal.h>
+
+vat_main_t vat_main;
+
+#include <vlibapi/api_helper_macros.h>
+
+void
+vat_suspend (vlib_main_t * vm, f64 interval)
+{
+ /* do nothing in the standalone version, just return */
+}
+
+void
+fformat_append_cr (FILE * ofp, const char *fmt, ...)
+{
+ va_list va;
+
+ va_start (va, fmt);
+ (void) va_fformat (ofp, (char *) fmt, &va);
+ va_end (va);
+ fformat (ofp, "\n");
+}
+
+int
+connect_to_vpe (char *name)
+{
+ vat_main_t *vam = &vat_main;
+ api_main_t *am = &api_main;
+
+ if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+ return -1;
+
+ vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ vam->my_client_index = am->my_client_index;
+
+ return 0;
+}
+
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("BUG");
+}
+
+
+static u8 *
+format_api_error (u8 * s, va_list * args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ i32 error = va_arg (*args, u32);
+ uword *p;
+
+ p = hash_get (vam->error_string_by_error_number, -error);
+
+ if (p)
+ s = format (s, "%s", p[0]);
+ else
+ s = format (s, "%d", error);
+ return s;
+}
+
+void
+do_one_file (vat_main_t * vam)
+{
+ int rv;
+ int (*fp) (vat_main_t * vam);
+ int arg_len;
+ unformat_input_t _input;
+ u8 *cmdp, *argsp;
+ uword *p;
+ u8 *this_cmd = 0;
+
+ vam->input = &_input;
+
+ /* Used by the "quit" command handler */
+ if (setjmp (vam->jump_buf) != 0)
+ return;
+
+ vam->jump_buf_set = 1;
+
+ while (1)
+ {
+ if (vam->ifp == stdin)
+ {
+ if (vam->exec_mode == 0)
+ rv = write (1, "vat# ", 5);
+ else
+ rv = write (1, "exec# ", 6);
+ }
+
+ _vec_len (vam->inbuf) = 4096;
+
+ if (vam->do_exit ||
+ fgets ((char *) vam->inbuf, vec_len (vam->inbuf), vam->ifp) == 0)
+ break;
+
+ vam->input_line_number++;
+
+ vec_free (this_cmd);
+
+ this_cmd =
+ (u8 *) clib_macro_eval (&vam->macro_main, (char *) vam->inbuf,
+ 1 /* complain */ );
+
+ if (vam->exec_mode == 0)
+ {
+ /* Split input into cmd + args */
+ cmdp = this_cmd;
+
+ while (cmdp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*cmdp == ' ' || *cmdp == '\t' || *cmdp == '\n')
+ {
+ cmdp++;
+ }
+ else
+ break;
+ }
+ argsp = cmdp;
+ while (argsp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n')
+ {
+ argsp++;
+ }
+ else
+ break;
+ }
+ *argsp++ = 0;
+ while (argsp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*argsp == ' ' || *argsp == '\t' || *argsp == '\n')
+ {
+ argsp++;
+ }
+ else
+ break;
+ }
+
+
+ /* Blank input line? */
+ if (*cmdp == 0)
+ continue;
+
+ p = hash_get_mem (vam->function_by_name, cmdp);
+ if (p == 0)
+ {
+ errmsg ("'%s': function not found\n", cmdp);
+ continue;
+ }
+
+ arg_len = strlen ((char *) argsp);
+
+ unformat_init_string (vam->input, (char *) argsp, arg_len);
+ fp = (void *) p[0];
+ }
+ else
+ {
+ unformat_init_string (vam->input, (char *) this_cmd,
+ strlen ((char *) this_cmd));
+ cmdp = this_cmd;
+ fp = exec;
+ }
+
+ rv = (*fp) (vam);
+ if (rv < 0)
+ errmsg ("%s error: %U\n", cmdp, format_api_error, vam, rv);
+ unformat_free (vam->input);
+
+ if (vam->regenerate_interface_table)
+ {
+ vam->regenerate_interface_table = 0;
+ api_sw_interface_dump (vam);
+ }
+ }
+}
+
+static void
+init_error_string_table (vat_main_t * vam)
+{
+
+ vam->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (vam->error_string_by_error_number, -v, s);
+ foreach_vnet_api_error;
+#undef _
+
+ hash_set (vam->error_string_by_error_number, 99, "Misc");
+}
+
+static i8 *
+eval_current_file (macro_main_t * mm, i32 complain)
+{
+ vat_main_t *vam = &vat_main;
+ return ((i8 *) format (0, "%s%c", vam->current_file, 0));
+}
+
+static i8 *
+eval_current_line (macro_main_t * mm, i32 complain)
+{
+ vat_main_t *vam = &vat_main;
+ return ((i8 *) format (0, "%d%c", vam->input_line_number, 0));
+}
+
+static void
+signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ vat_main_t *vam = &vat_main;
+
+ switch (signum)
+ {
+ /* these (caught) signals cause the application to exit */
+ case SIGINT:
+ case SIGTERM:
+ if (vam->jump_buf_set)
+ {
+ vam->do_exit = 1;
+ return;
+ }
+
+ /* FALLTHROUGH on purpose */
+
+ default:
+ break;
+ }
+
+ _exit (1);
+}
+
+static void
+setup_signal_handlers (void)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ clib_unix_warning ("sigaction %U", format_signal, i);
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ vat_main_t *vam = &vat_main;
+ unformat_input_t _argv, *a = &_argv;
+ u8 **input_files = 0;
+ u8 *output_file = 0;
+ u8 *chroot_prefix;
+ u8 *this_input_file;
+ u8 interactive = 1;
+ u8 json_output = 0;
+ u8 *heap;
+ mheap_t *h;
+ int i;
+ f64 timeout;
+
+ clib_mem_init (0, 128 << 20);
+
+ heap = clib_mem_get_per_cpu_heap ();
+ h = mheap_header (heap);
+
+ /* make the main heap thread-safe */
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+ clib_macro_init (&vam->macro_main);
+ clib_macro_add_builtin (&vam->macro_main, "current_file",
+ eval_current_file);
+ clib_macro_add_builtin (&vam->macro_main, "current_line",
+ eval_current_line);
+
+ init_error_string_table (vam);
+
+ unformat_init_command_line (a, argv);
+
+ while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (a, "in %s", &this_input_file))
+ vec_add1 (input_files, this_input_file);
+ else if (unformat (a, "out %s", &output_file))
+ ;
+ else if (unformat (a, "script"))
+ interactive = 0;
+ else if (unformat (a, "json"))
+ json_output = 1;
+ else if (unformat (a, "plugin_path %s", (u8 *) & vat_plugin_path))
+ vec_add1 (vat_plugin_path, 0);
+ else if (unformat (a, "plugin_name_filter %s",
+ (u8 *) & vat_plugin_name_filter))
+ vec_add1 (vat_plugin_name_filter, 0);
+ else if (unformat (a, "chroot prefix %s", &chroot_prefix))
+ {
+ vl_set_memory_root_path ((char *) chroot_prefix);
+ }
+ else
+ {
+ fformat (stderr,
+ "%s: usage [in <f1> ... in <fn>] [out <fn>] [script] [json]\n",
+ argv[0]);
+ exit (1);
+ }
+ }
+
+ if (output_file)
+ vam->ofp = fopen ((char *) output_file, "w");
+ else
+ vam->ofp = stdout;
+
+ if (vam->ofp == NULL)
+ {
+ fformat (stderr, "Couldn't open output file %s\n",
+ output_file ? (char *) output_file : "stdout");
+ exit (1);
+ }
+
+ clib_time_init (&vam->clib_time);
+
+ vat_api_hookup (vam);
+ vat_plugin_api_reference ();
+
+ setup_signal_handlers ();
+
+ if (connect_to_vpe ("vpp_api_test") < 0)
+ {
+ svm_region_exit ();
+ fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+ exit (1);
+ }
+
+ vam->json_output = json_output;
+
+ if (!json_output)
+ {
+ api_sw_interface_dump (vam);
+ }
+
+ vec_validate (vam->inbuf, 4096);
+
+ vam->current_file = (u8 *) "plugin-init";
+ vat_plugin_init (vam);
+
+ for (i = 0; i < vec_len (input_files); i++)
+ {
+ vam->ifp = fopen ((char *) input_files[i], "r");
+ if (vam->ifp == NULL)
+ {
+ fformat (stderr, "Couldn't open input file %s\n", input_files[i]);
+ continue;
+ }
+ vam->current_file = input_files[i];
+ vam->input_line_number = 0;
+ do_one_file (vam);
+ fclose (vam->ifp);
+ }
+
+ if (output_file)
+ fclose (vam->ofp);
+
+ if (interactive)
+ {
+ vam->ifp = stdin;
+ vam->ofp = stdout;
+ vam->current_file = (u8 *) "interactive";
+ do_one_file (vam);
+ fclose (vam->ifp);
+ }
+
+ /*
+ * Particularly when running a script, don't be in a hurry to leave.
+ * A reply message queued to this process will end up constipating
+ * the allocation rings.
+ */
+ timeout = vat_time_now (vam) + 2.0;
+ while (vam->result_ready == 0 && vat_time_now (vam) < timeout)
+ ;
+
+ if (vat_time_now (vam) > timeout)
+ clib_warning ("BUG: message reply spin-wait timeout");
+
+ vl_client_disconnect_from_vlib ();
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/plugin.c b/src/vat/plugin.c
new file mode 100644
index 00000000..20de8c50
--- /dev/null
+++ b/src/vat/plugin.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * plugin.c: plugin handling
+ */
+
+#include <vat/vat.h>
+#include <vat/plugin.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vat_plugin_main;
+
+static int
+load_one_plugin (plugin_main_t * pm, plugin_info_t * pi)
+{
+ void *handle, *register_handle;
+ clib_error_t *(*fp) (vat_main_t *);
+ clib_error_t *error;
+
+ handle = dlopen ((char *) pi->name, RTLD_LAZY);
+
+ /*
+ * Note: this can happen if the plugin has an undefined symbol reference,
+ * so print a warning. Otherwise, the poor slob won't know what happened.
+ * Ask me how I know that...
+ */
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror ());
+ return -1;
+ }
+
+ pi->handle = handle;
+
+ register_handle = dlsym (pi->handle, "vat_plugin_register");
+ if (register_handle == 0)
+ return 0;
+
+ fp = register_handle;
+
+ error = (*fp) (pm->vat_main);
+
+ if (error)
+ {
+ clib_error_report (error);
+ dlclose (handle);
+ return 1;
+ }
+
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+}
+
+static u8 **
+split_plugin_path (plugin_main_t * pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1 (this, path[i]);
+ continue;
+ }
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+int
+vat_load_new_plugins (plugin_main_t * pm)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *) plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ plugin_name = format (0, "%s/%s%c", plugin_path[i],
+ entry->d_name, 0);
+
+ /* unreadable */
+ if (stat ((char *) plugin_name, &statb) < 0)
+ {
+ ignore:
+ vec_free (plugin_name);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG (statb.st_mode))
+ goto ignore;
+
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->file_info = statb;
+
+ if (load_one_plugin (pm, pi))
+ {
+ vec_free (plugin_name);
+ _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ continue;
+ }
+ memset (pi, 0, sizeof (*pi));
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+ return 0;
+}
+
+#define QUOTE_(x) #x
+#define QUOTE(x) QUOTE_(x)
+
+/*
+ * Load plugins from /usr/lib/vpp_api_test_plugins by default
+ */
+char *vat_plugin_path = "/usr/lib/vpp_api_test_plugins";
+
+char *vat_plugin_name_filter = 0;
+
+int
+vat_plugin_init (vat_main_t * vam)
+{
+ plugin_main_t *pm = &vat_plugin_main;
+
+ pm->plugin_path = format (0, "%s%c", vat_plugin_path, 0);
+ if (vat_plugin_name_filter)
+ pm->plugin_name_filter = format (0, "%s%c", vat_plugin_name_filter, 0);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vat_main = vam;
+
+ return vat_load_new_plugins (pm);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/plugin.h b/src/vat/plugin.h
new file mode 100644
index 00000000..559ec52f
--- /dev/null
+++ b/src/vat/plugin.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * plugin.h: plugin handling
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+typedef struct
+{
+ u8 *name;
+ struct stat file_info;
+ void *handle;
+} plugin_info_t;
+
+typedef struct
+{
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* convenience */
+ vat_main_t *vat_main;
+
+} plugin_main_t;
+
+plugin_main_t vat_plugin_main;
+
+int vat_plugin_init (vat_main_t * vam);
+int vat_load_new_plugins (plugin_main_t * pm);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/plugin_api.c b/src/vat/plugin_api.c
new file mode 100644
index 00000000..37c97c91
--- /dev/null
+++ b/src/vat/plugin_api.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vnet/ip/ip.h>
+
+uword
+unformat_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u8 *if_name;
+ uword *p;
+
+ if (!unformat (input, "%s", &if_name))
+ return 0;
+
+ p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
+ if (p == 0)
+ return 0;
+ *result = p[0];
+ return 1;
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+uword
+unformat_ethernet_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[6];
+
+ if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < 6; i++)
+ if (a[i] >= (1 << 8))
+ return 0;
+
+ for (i = 0; i < 6; i++)
+ result[i] = a[i];
+
+ return 1;
+}
+
+/* Returns ethernet type as an int in host byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ int type;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &type) || unformat (input, "%d", &type))
+ {
+ if (type >= (1 << 16))
+ return 0;
+ *result = type;
+ return 1;
+ }
+ return 0;
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ hex_quads[double_colon_index + i] = 0;
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+u8 *
+format_ethernet_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+void
+vat_plugin_api_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/restart.c b/src/vat/restart.c
new file mode 100644
index 00000000..adeee000
--- /dev/null
+++ b/src/vat/restart.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <svm/svmdb.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+
+int
+restart_main_fn (unformat_input_t * i)
+{
+ int verbose = 0;
+ int old_pid;
+ int wait;
+ u8 *chroot_path = 0;
+ svmdb_client_t *svmdb_client;
+ volatile pid_t *pidp;
+ struct stat statb;
+ ino_t old_inode;
+ int sleeps;
+ svmdb_map_args_t _ma, *ma = &_ma;
+
+ struct timespec _req, *req = &_req;
+ struct timespec _rem, *rem = &_rem;
+
+ if (geteuid ())
+ clib_error ("vpp_restart: must be root...");
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "verbose") || unformat (i, "v"))
+ verbose = 1;
+ else if (unformat (i, "chroot %s", &chroot_path))
+ ;
+ else
+ {
+ clib_error ("unknown input `%U'", format_unformat_error, i);
+ return 1;
+ }
+ }
+
+ /*
+ * Step 1: look up the current VPP pid in the shared-memory database
+ */
+ memset (ma, 0, sizeof (*ma));
+ ma->root_path = (char *) chroot_path;
+
+ svmdb_client = svmdb_map (ma);
+
+ pidp = svmdb_local_get_variable_reference (svmdb_client,
+ SVMDB_NAMESPACE_VEC, "vpp_pid");
+ if (pidp == 0)
+ {
+ clib_error ("'vpp_pid' svm variable not found, vpp has never run?");
+ return 2;
+ }
+
+ /* Spin for up to 10 seconds for vpp to start */
+ for (wait = 0; wait < 1000; wait++)
+ {
+ req->tv_sec = 0;
+ req->tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (req, rem) < 0)
+ *req = *rem;
+
+ if (*pidp)
+ goto found2;
+ }
+
+ clib_error ("VPP not runnning...");
+ return 3;
+
+found2:
+
+ old_pid = *pidp;
+
+ /*
+ * Step 2: sanity check the pid we discovered
+ */
+ if (verbose)
+ fformat (stdout, "Sanity check current vpp pid %d\n", old_pid);
+
+ if (kill (old_pid, 0) < 0)
+ {
+ svmdb_unmap (svmdb_client);
+ clib_error ("vpp current pid %d not running...", old_pid);
+ return 2;
+ }
+
+ if (verbose)
+ fformat (stdout, "Sanity check vpp pid %d OK\n", old_pid);
+
+ /*
+ * Step 3: figure out the current vpp <--> client shared-VM file
+ * inode number
+ */
+ if (stat ("/dev/shm/vpe-api", &statb) < 0)
+ {
+ clib_unix_error ("stat fail");
+ return 4;
+ }
+
+ old_inode = statb.st_ino;
+
+ if (verbose)
+ fformat (stdout, "Old inode %u\n", old_inode);
+
+ /* Note: restart wipes out the shared VM database */
+ svmdb_unmap (svmdb_client);
+
+ /*
+ * Step 4: send SIGTERM to vpp.
+ * systemd et al. will restart vpp after wiping out the shared-VM
+ * database and (crucially) the shared API messaging segment
+ */
+
+ if (kill (old_pid, SIGTERM) < 0)
+ {
+ clib_unix_error ("SIGTERM fail");
+ return 3;
+ }
+
+ sleeps = 0;
+
+ /*
+ * Step 5: wait up to 15 seconds for a new incarnation of
+ * the shared-VM API segment to appear.
+ */
+ for (wait = 0; wait < 150; wait++)
+ {
+ if ((stat ("/dev/shm/vpe-api", &statb) < 0)
+ || statb.st_ino == old_inode)
+ {
+ req->tv_sec = 0;
+ req->tv_nsec = 100000 * 1000; /* 100 ms */
+ while (nanosleep (req, rem) < 0)
+ *req = *rem;
+ sleeps++;
+ }
+ else
+ goto new_inode;
+ }
+
+ clib_error ("Timeout waiting for new inode to appear...");
+ return 5;
+
+new_inode:
+ if (verbose && sleeps > 0)
+ fformat (stdout, "Inode sleeps %d\n", sleeps);
+
+ if (verbose)
+ fformat (stdout, "New inode %u\n", statb.st_ino);
+
+ /*
+ * Step 6: remap the SVM database
+ */
+ svmdb_client = svmdb_map (ma);
+
+ pidp = svmdb_local_get_variable_reference (svmdb_client,
+ SVMDB_NAMESPACE_VEC, "vpp_pid");
+ if (pidp == 0)
+ {
+ clib_error ("post_restart: 'vpp_pid' svm variable not found,"
+ "vpp did not restart?");
+ return 2;
+ }
+
+ sleeps = 0;
+
+ /*
+ * Step 7: wait for vpp to publish its new PID
+ */
+
+ /* Spin for up to 15 seconds */
+ for (wait = 0; wait < 150; wait++)
+ {
+ if (*pidp && (*pidp != old_pid))
+ goto restarted;
+ req->tv_sec = 0;
+ req->tv_nsec = 100000 * 1000; /* 100 ms */
+ while (nanosleep (req, rem) < 0)
+ *req = *rem;
+ sleeps++;
+ }
+
+ clib_error ("Timeout waiting for vpp to publish pid after restart...");
+ return 4;
+
+restarted:
+
+ /* Done... */
+
+ if (verbose && sleeps)
+ fformat (stdout, "pid sleeps %d\n", sleeps);
+
+ if (verbose)
+ fformat (stdout, "New PID %d... Restarted...\n", *pidp);
+
+ svmdb_unmap (svmdb_client);
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 64ULL << 20);
+
+ unformat_init_command_line (&i, argv);
+ ret = restart_main_fn (&i);
+ unformat_free (&i);
+ return ret;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vat/vat.h b/src/vat/vat.h
new file mode 100644
index 00000000..233a1c41
--- /dev/null
+++ b/src/vat/vat.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vat_h__
+#define __included_vat_h__
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include "vat/json_format.h"
+
+#include <vlib/vlib.h>
+
+typedef struct
+{
+ u8 *interface_name;
+ u32 sw_if_index;
+ /*
+ * Subinterface ID. A number 0-N to uniquely identify this
+ * subinterface under the super interface
+ */
+ u32 sub_id;
+
+ /* 0 = dot1q, 1=dot1ad */
+ u8 sub_dot1ad;
+
+ /* Number of tags 0-2 */
+ u8 sub_number_of_tags;
+ u16 sub_outer_vlan_id;
+ u16 sub_inner_vlan_id;
+ u8 sub_exact_match;
+ u8 sub_default;
+ u8 sub_outer_vlan_id_any;
+ u8 sub_inner_vlan_id_any;
+
+ /* vlan tag rewrite */
+ u32 vtr_op;
+ u32 vtr_push_dot1q;
+ u32 vtr_tag1;
+ u32 vtr_tag2;
+} sw_interface_subif_t;
+
+typedef struct
+{
+ u8 ip[16];
+ u8 prefix_length;
+} ip_address_details_t;
+
+typedef struct
+{
+ u8 present;
+ ip_address_details_t *addr;
+} ip_details_t;
+
+typedef struct
+{
+ u64 packets;
+ u64 bytes;
+} interface_counter_t;
+
+typedef struct
+{
+ struct in_addr address;
+ u8 address_length;
+ u64 packets;
+ u64 bytes;
+} ip4_fib_counter_t;
+
+typedef struct
+{
+ struct in6_addr address;
+ u8 address_length;
+ u64 packets;
+ u64 bytes;
+} ip6_fib_counter_t;
+
+typedef struct
+{
+ struct in_addr address;
+ vnet_link_t linkt;
+ u64 packets;
+ u64 bytes;
+} ip4_nbr_counter_t;
+
+typedef struct
+{
+ struct in6_addr address;
+ vnet_link_t linkt;
+ u64 packets;
+ u64 bytes;
+} ip6_nbr_counter_t;
+
+typedef struct
+{
+ /* vpe input queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* interface name table */
+ uword *sw_if_index_by_interface_name;
+
+ /* subinterface table */
+ sw_interface_subif_t *sw_if_subif_table;
+
+ /* Graph node table */
+ uword *graph_node_index_by_name;
+ vlib_node_t **graph_nodes;
+
+ /* ip tables */
+ ip_details_t *ip_details_by_sw_if_index[2];
+
+ /* sw_if_index of currently processed interface */
+ u32 current_sw_if_index;
+
+ /* remember that we are dumping ipv6 */
+ u8 is_ipv6;
+
+ /* function table */
+ uword *function_by_name;
+
+ /* help strings */
+ uword *help_by_name;
+
+ /* macro table */
+ macro_main_t macro_main;
+
+ /* Errors by number */
+ uword *error_string_by_error_number;
+
+
+ /* Main thread can spin (w/ timeout) here if needed */
+ u32 async_mode;
+ u32 async_errors;
+ volatile u32 result_ready;
+ volatile i32 retval;
+ volatile u32 sw_if_index;
+ volatile u8 *shmem_result;
+ volatile u8 *cmd_reply;
+
+ /* our client index */
+ u32 my_client_index;
+
+ /* Time is of the essence... */
+ clib_time_t clib_time;
+
+ /* Unwind (so we can quit) */
+ jmp_buf jump_buf;
+ int jump_buf_set;
+ volatile int do_exit;
+
+ /* temporary parse buffer */
+ unformat_input_t *input;
+
+ /* input buffer */
+ u8 *inbuf;
+
+ /* stdio input / output FILEs */
+ FILE *ifp, *ofp;
+ u8 *current_file;
+ u32 input_line_number;
+
+ /* exec mode toggle */
+ int exec_mode;
+
+ /* Regenerate the interface table */
+ volatile int regenerate_interface_table;
+
+ /* flag for JSON output format */
+ u8 json_output;
+
+ /* flag for interface event display */
+ u8 interface_event_display;
+
+ /* JSON tree used in composing dump api call results */
+ vat_json_node_t json_tree;
+
+ /* counters */
+ u64 **simple_interface_counters;
+ interface_counter_t **combined_interface_counters;
+ ip4_fib_counter_t **ip4_fib_counters;
+ u32 *ip4_fib_counters_vrf_id_by_index;
+ ip6_fib_counter_t **ip6_fib_counters;
+ u32 *ip6_fib_counters_vrf_id_by_index;
+ ip4_nbr_counter_t **ip4_nbr_counters;
+ ip6_nbr_counter_t **ip6_nbr_counters;
+
+ /* Convenience */
+ vlib_main_t *vlib_main;
+} vat_main_t;
+
+extern vat_main_t vat_main;
+
+void vat_suspend (vlib_main_t * vm, f64 interval);
+f64 vat_time_now (vat_main_t * vam);
+void errmsg (char *fmt, ...);
+void vat_api_hookup (vat_main_t * vam);
+int api_sw_interface_dump (vat_main_t * vam);
+void do_one_file (vat_main_t * vam);
+int exec (vat_main_t * vam);
+
+/* Plugin API library functions */
+char *vat_plugin_path;
+char *vat_plugin_name_filter;
+void vat_plugin_api_reference (void);
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+uword unformat_ip4_address (unformat_input_t * input, va_list * args);
+uword unformat_ethernet_address (unformat_input_t * input, va_list * args);
+uword unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args);
+uword unformat_ip6_address (unformat_input_t * input, va_list * args);
+u8 *format_ip4_address (u8 * s, va_list * args);
+u8 *format_ip6_address (u8 * s, va_list * args);
+u8 *format_ip46_address (u8 * s, va_list * args);
+u8 *format_ethernet_address (u8 * s, va_list * args);
+
+#if VPP_API_TEST_BUILTIN
+#define print api_cli_output
+void api_cli_output (void *, const char *fmt, ...);
+#else
+#define print fformat_append_cr
+void fformat_append_cr (FILE *, const char *fmt, ...);
+#endif
+
+#endif /* __included_vat_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib-api.am b/src/vlib-api.am
new file mode 100644
index 00000000..677811bc
--- /dev/null
+++ b/src/vlib-api.am
@@ -0,0 +1,84 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lib_LTLIBRARIES += libvlibmemory.la libvlibmemoryclient.la \
+ libvlibsocket.la
+
+libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la
+libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread
+libvlibmemory_la_SOURCES = \
+ vlibapi/api.h \
+ vlibapi/api_helper_macros.h \
+ vlibapi/api_shared.c \
+ vlibapi/node_serialize.c \
+ vlibmemory/api.h \
+ vlibmemory/memclnt.api \
+ vlibmemory/memory_shared.c \
+ vlibmemory/memory_vlib.c \
+ vlibmemory/unix_shared_memory_queue.c \
+ vlibmemory/unix_shared_memory_queue.h \
+ vlibmemory/vl_memory_api_h.h \
+ vlibmemory/vl_memory_msg_enum.h
+
+nobase_include_HEADERS += vlibapi/api.h \
+ vlibapi/api_common.h \
+ vlibapi/api_helper_macros.h \
+ vlibapi/vat_helper_macros.h
+
+libvlibmemoryclient_la_DEPENDENCIES = libvppinfra.la libsvm.la
+libvlibmemoryclient_la_LIBADD = $(libvlibmemoryclient_la_DEPENDENCIES) -lpthread
+libvlibmemoryclient_la_SOURCES = \
+ vlibapi/api_helper_macros.h \
+ vlibapi/api_shared.c \
+ vlibapi/node_unserialize.c \
+ vlibmemory/api.h \
+ vlibmemory/memclnt.api \
+ vlibmemory/memory_client.c \
+ vlibmemory/memory_shared.c \
+ vlibmemory/unix_shared_memory_queue.c \
+ vlibmemory/unix_shared_memory_queue.h \
+ vlibmemory/vl_memory_api_h.h \
+ vlibmemory/vl_memory_msg_enum.h
+
+nobase_include_HEADERS += \
+ vlibmemory/api.h \
+ vlibmemory/api_common.h \
+ vlibmemory/vl_memory_api_h.h \
+ vlibmemory/vl_memory_msg_enum.h \
+ vlibmemory/unix_shared_memory_queue.h \
+ vlibmemory/memclnt.api.h
+
+libvlibsocket_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la
+libvlibsocket_la_LIBADD = $(libvlibsocket_la_DEPENDENCIES)
+libvlibsocket_la_SOURCES = \
+ vlibsocket/api.h \
+ vlibsocket/sockclnt.api \
+ vlibsocket/sockclnt_vlib.c \
+ vlibsocket/socksvr_vlib.c \
+ vlibsocket/vl_socket_api_h.h \
+ vlibsocket/vl_socket_msg_enum.h
+
+nobase_include_HEADERS += \
+ vlibsocket/api.h \
+ vlibsocket/vl_socket_api_h.h \
+ vlibsocket/vl_socket_msg_enum.h \
+ vlibsocket/sockclnt.api.h
+
+BUILT_SOURCES += \
+ vlibsocket/sockclnt.api.h \
+ vlibmemory/memclnt.api.h \
+ vlibmemory/memclnt.api.json
+
+API_FILES += vlibmemory/memclnt.api
+
+# vi:syntax=automake
diff --git a/src/vlib.am b/src/vlib.am
new file mode 100644
index 00000000..067e4afc
--- /dev/null
+++ b/src/vlib.am
@@ -0,0 +1,100 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+lib_LTLIBRARIES += libvlib.la
+libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread
+libvlib_la_DEPENDENCIES = libvppinfra.la
+
+BUILT_SOURCES += vlib/config.h
+
+vlib/config.h:
+ @echo "#define __PRE_DATA_SIZE" @PRE_DATA_SIZE@ > $@
+
+libvlib_la_SOURCES = \
+ vlib/buffer.c \
+ vlib/buffer_serialize.c \
+ vlib/cli.c \
+ vlib/cli.h \
+ vlib/config.h \
+ vlib/counter.c \
+ vlib/error.c \
+ vlib/format.c \
+ vlib/i2c.c \
+ vlib/init.c \
+ vlib/linux/pci.c \
+ vlib/linux/physmem.c \
+ vlib/main.c \
+ vlib/mc.c \
+ vlib/node.c \
+ vlib/node_cli.c \
+ vlib/node_format.c \
+ vlib/pci/pci.c \
+ vlib/threads.c \
+ vlib/threads_cli.c \
+ vlib/trace.c
+
+nobase_include_HEADERS += \
+ vlib/buffer_funcs.h \
+ vlib/buffer_node.h \
+ vlib/buffer.h \
+ vlib/cli.h \
+ vlib/cli_funcs.h \
+ vlib/config.h \
+ vlib/counter.h \
+ vlib/defs.h \
+ vlib/error_funcs.h \
+ vlib/error.h \
+ vlib/format_funcs.h \
+ vlib/global_funcs.h \
+ vlib/i2c.h \
+ vlib/init.h \
+ vlib/main.h \
+ vlib/mc.h \
+ vlib/node_funcs.h \
+ vlib/node.h \
+ vlib/physmem.h \
+ vlib/pci/pci.h \
+ vlib/pci/pci_config.h \
+ vlib/physmem_funcs.h \
+ vlib/threads.h \
+ vlib/trace_funcs.h \
+ vlib/trace.h \
+ vlib/vlib.h
+
+libvlib_la_SOURCES += \
+ vlib/unix/cj.c \
+ vlib/unix/cli.c \
+ vlib/unix/input.c \
+ vlib/unix/main.c \
+ vlib/unix/mc_socket.c \
+ vlib/unix/plugin.c \
+ vlib/unix/plugin.h \
+ vlib/unix/util.c
+
+nobase_include_HEADERS += \
+ vlib/unix/cj.h \
+ vlib/unix/mc_socket.h \
+ vlib/unix/plugin.h \
+ vlib/unix/unix.h
+
+noinst_PROGRAMS += vlib_unix
+
+vlib_unix_SOURCES = \
+ examples/vlib/main_stub.c \
+ examples/vlib/mc_test.c
+
+vlib_unix_LDADD = libvlib.la \
+ libvppinfra.la -lpthread -lm -ldl -lrt
+
+# vi:syntax=automake
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
new file mode 100644
index 00000000..7399b618
--- /dev/null
+++ b/src/vlib/buffer.c
@@ -0,0 +1,1134 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Allocate/free network buffers.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+vlib_buffer_callbacks_t *vlib_buffer_callbacks = 0;
+static u32 vlib_buffer_physmem_sz = 32 << 20;
+
+uword
+vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
+ vlib_buffer_t * b_first)
+{
+ vlib_buffer_t *b = b_first;
+ uword l_first = b_first->current_length;
+ uword l = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ l += b->current_length;
+ }
+ b_first->total_length_not_including_first_buffer = l;
+ b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return l + l_first;
+}
+
+u8 *
+format_vlib_buffer (u8 * s, va_list * args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "current data %d, length %d, free-list %d, clone-count %u",
+ b->current_data, b->current_length,
+ vlib_buffer_get_free_list_index (b), b->n_add_refs);
+
+ if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+ s = format (s, ", totlen-nifb %d",
+ b->total_length_not_including_first_buffer);
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ s = format (s, ", trace 0x%x", b->trace_index);
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ u32 next_buffer = b->next_buffer;
+ b = vlib_get_buffer (vm, next_buffer);
+
+ s =
+ format (s, "\n%Unext-buffer 0x%x, segment length %d, clone-count %u",
+ format_white_space, indent, next_buffer, b->current_length,
+ b->n_add_refs);
+ }
+
+ return s;
+}
+
+u8 *
+format_vlib_buffer_and_data (u8 * s, va_list * args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+
+ s = format (s, "%U, %U",
+ format_vlib_buffer, b,
+ format_hex_bytes, vlib_buffer_get_current (b), 64);
+
+ return s;
+}
+
+static u8 *
+format_vlib_buffer_known_state (u8 * s, va_list * args)
+{
+ vlib_buffer_known_state_t state = va_arg (*args, vlib_buffer_known_state_t);
+ char *t;
+
+ switch (state)
+ {
+ case VLIB_BUFFER_UNKNOWN:
+ t = "unknown";
+ break;
+
+ case VLIB_BUFFER_KNOWN_ALLOCATED:
+ t = "known-allocated";
+ break;
+
+ case VLIB_BUFFER_KNOWN_FREE:
+ t = "known-free";
+ break;
+
+ default:
+ t = "invalid";
+ break;
+ }
+
+ return format (s, "%s", t);
+}
+
+u8 *
+format_vlib_buffer_contents (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_buffer_t *b = va_arg (*va, vlib_buffer_t *);
+
+ while (1)
+ {
+ vec_add (s, vlib_buffer_get_current (b), b->current_length);
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return s;
+}
+
+static u8 *
+vlib_validate_buffer_helper (vlib_main_t * vm,
+ u32 bi,
+ uword follow_buffer_next, uword ** unique_hash)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+
+ if (pool_is_free_index
+ (bm->buffer_free_list_pool, vlib_buffer_get_free_list_index (b)))
+ return format (0, "unknown free list 0x%x",
+ vlib_buffer_get_free_list_index (b));
+
+ fl =
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ vlib_buffer_get_free_list_index (b));
+
+ if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE)
+ return format (0, "current data %d before pre-data", b->current_data);
+
+ if (b->current_data + b->current_length > fl->n_data_bytes)
+ return format (0, "%d-%d beyond end of buffer %d",
+ b->current_data, b->current_length, fl->n_data_bytes);
+
+ if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ vlib_buffer_known_state_t k;
+ u8 *msg, *result;
+
+ k = vlib_buffer_is_known (vm, b->next_buffer);
+ if (k != VLIB_BUFFER_KNOWN_ALLOCATED)
+ return format (0, "next 0x%x: %U",
+ b->next_buffer, format_vlib_buffer_known_state, k);
+
+ if (unique_hash)
+ {
+ if (hash_get (*unique_hash, b->next_buffer))
+ return format (0, "duplicate buffer 0x%x", b->next_buffer);
+
+ hash_set1 (*unique_hash, b->next_buffer);
+ }
+
+ msg = vlib_validate_buffer (vm, b->next_buffer, follow_buffer_next);
+ if (msg)
+ {
+ result = format (0, "next 0x%x: %v", b->next_buffer, msg);
+ vec_free (msg);
+ return result;
+ }
+ }
+
+ return 0;
+}
+
+u8 *
+vlib_validate_buffer (vlib_main_t * vm, u32 bi, uword follow_buffer_next)
+{
+ return vlib_validate_buffer_helper (vm, bi, follow_buffer_next,
+ /* unique_hash */ 0);
+}
+
+u8 *
+vlib_validate_buffers (vlib_main_t * vm,
+ u32 * buffers,
+ uword next_buffer_stride,
+ uword n_buffers,
+ vlib_buffer_known_state_t known_state,
+ uword follow_buffer_next)
+{
+ uword i, *hash;
+ u32 bi, *b = buffers;
+ vlib_buffer_known_state_t k;
+ u8 *msg = 0, *result = 0;
+
+ hash = hash_create (0, 0);
+ for (i = 0; i < n_buffers; i++)
+ {
+ bi = b[0];
+ b += next_buffer_stride;
+
+ /* Buffer is not unique. */
+ if (hash_get (hash, bi))
+ {
+ msg = format (0, "not unique");
+ goto done;
+ }
+
+ k = vlib_buffer_is_known (vm, bi);
+ if (k != known_state)
+ {
+ msg = format (0, "is %U; expected %U",
+ format_vlib_buffer_known_state, k,
+ format_vlib_buffer_known_state, known_state);
+ goto done;
+ }
+
+ msg = vlib_validate_buffer_helper (vm, bi, follow_buffer_next, &hash);
+ if (msg)
+ goto done;
+
+ hash_set1 (hash, bi);
+ }
+
+done:
+ if (msg)
+ {
+ result = format (0, "0x%x: %v", bi, msg);
+ vec_free (msg);
+ }
+ hash_free (hash);
+ return result;
+}
+
+/*
+ * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1
+ * and vlib_mains[0] = &vlib_global_main from the beginning of time.
+ *
+ * The only place which should ever expand vlib_mains is start_workers()
+ * in threads.c. It knows about the bootstrap vector.
+ */
+/* *INDENT-OFF* */
+static struct
+{
+ vec_header_t h;
+ vlib_main_t *vm;
+} __attribute__ ((packed)) __bootstrap_vlib_main_vector
+ __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) =
+{
+ .h.len = 1,
+ .vm = &vlib_global_main,
+};
+/* *INDENT-ON* */
+
+vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm;
+
+
+/* When dubugging validate that given buffers are either known allocated
+ or known free. */
+static void
+vlib_buffer_validate_alloc_free (vlib_main_t * vm,
+ u32 * buffers,
+ uword n_buffers,
+ vlib_buffer_known_state_t expected_state)
+{
+ u32 *b;
+ uword i, bi, is_free;
+
+ if (CLIB_DEBUG == 0)
+ return;
+
+ is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
+ b = buffers;
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_known_state_t known;
+
+ bi = b[0];
+ b += 1;
+ known = vlib_buffer_is_known (vm, bi);
+ if (known != expected_state)
+ {
+ ASSERT (0);
+ vlib_panic_with_msg
+ (vm, "%s %U buffer 0x%x",
+ is_free ? "freeing" : "allocating",
+ format_vlib_buffer_known_state, known, bi);
+ }
+
+ vlib_buffer_set_known_state
+ (vm, bi,
+ is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED);
+ }
+}
+
+void
+vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst,
+ vlib_buffer_free_list_t * src)
+{
+ uword l;
+ u32 *d;
+
+ l = vec_len (src->buffers);
+ if (l > 0)
+ {
+ vec_add2_aligned (dst->buffers, d, l, CLIB_CACHE_LINE_BYTES);
+ clib_memcpy (d, src->buffers, l * sizeof (d[0]));
+ vec_free (src->buffers);
+ }
+}
+
+/* Add buffer free list. */
+static u32
+vlib_buffer_create_free_list_helper (vlib_main_t * vm,
+ u32 n_data_bytes,
+ u32 is_public, u32 is_default, u8 * name)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ int i;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
+ {
+ u32 default_free_free_list_index;
+
+ /* *INDENT-OFF* */
+ default_free_free_list_index =
+ vlib_buffer_create_free_list_helper
+ (vm,
+ /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ /* is_public */ 1,
+ /* is_default */ 1,
+ (u8 *) "default");
+ /* *INDENT-ON* */
+ ASSERT (default_free_free_list_index ==
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
+ return default_free_free_list_index;
+ }
+
+ pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+ f->index = f - bm->buffer_free_list_pool;
+ f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
+ f->min_n_buffers_each_physmem_alloc = VLIB_FRAME_SIZE;
+ f->name = clib_mem_is_vec (name) ? name : format (0, "%s", name);
+
+ /* Setup free buffer template. */
+ vlib_buffer_set_free_list_index (&f->buffer_init_template, f->index);
+ f->buffer_init_template.n_add_refs = 0;
+
+ if (is_public)
+ {
+ uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes);
+ if (!p)
+ hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
+ }
+
+ clib_spinlock_init (&f->global_buffers_lock);
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_buffer_main_t *wbm = vlib_mains[i]->buffer_main;
+ vlib_buffer_free_list_t *wf;
+ pool_get_aligned (wbm->buffer_free_list_pool,
+ wf, CLIB_CACHE_LINE_BYTES);
+ ASSERT (f - bm->buffer_free_list_pool ==
+ wf - wbm->buffer_free_list_pool);
+ wf[0] = f[0];
+ wf->buffers = 0;
+ wf->n_alloc = 0;
+ }
+
+ return f->index;
+}
+
+u32
+vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ return vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 0,
+ /* is_default */ 0,
+ name);
+}
+
+u32
+vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...)
+{
+ u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes);
+
+ if (i == ~0)
+ {
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ i = vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+ }
+
+ return i;
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+ u32 i;
+
+ for (i = 0; i < vec_len (f->buffer_memory_allocated); i++)
+ vm->os_physmem_free (vm, vm->buffer_main->physmem_region,
+ f->buffer_memory_allocated[i]);
+ vec_free (f->name);
+ vec_free (f->buffer_memory_allocated);
+ vec_free (f->buffers);
+}
+
+/* Add buffer free list. */
+void
+vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ u32 merge_index;
+ int i;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ ASSERT (vec_len (f->buffers) == f->n_alloc);
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ vlib_buffer_merge_free_lists (pool_elt_at_index
+ (bm->buffer_free_list_pool, merge_index),
+ f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ bm = vlib_mains[i]->buffer_main;
+ f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);;
+ memset (f, 0xab, sizeof (f[0]));
+ pool_put (bm->buffer_free_list_pool, f);
+ }
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl, uword min_free_buffers)
+{
+ vlib_buffer_t *buffers, *b;
+ vlib_buffer_free_list_t *mfl;
+ int n, n_bytes, i;
+ u32 *bi;
+ u32 n_remaining, n_alloc, n_this_chunk;
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ mfl = vlib_buffer_get_free_list (vlib_mains[0], fl->index);
+ if (vec_len (mfl->global_buffers) > 0)
+ {
+ int n_copy, n_left;
+ clib_spinlock_lock (&mfl->global_buffers_lock);
+ n_copy = clib_min (vec_len (mfl->global_buffers), n);
+ n_left = vec_len (mfl->global_buffers) - n_copy;
+ vec_add_aligned (fl->buffers, mfl->global_buffers + n_left, n_copy,
+ CLIB_CACHE_LINE_BYTES);
+ _vec_len (mfl->global_buffers) = n_left;
+ clib_spinlock_unlock (&mfl->global_buffers_lock);
+ n = min_free_buffers - vec_len (fl->buffers);
+ if (n <= 0)
+ return min_free_buffers;
+ }
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32));
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ n_remaining = n;
+ n_alloc = 0;
+ while (n_remaining > 0)
+ {
+ n_this_chunk = clib_min (n_remaining, 16);
+
+ n_bytes = n_this_chunk * (sizeof (b[0]) + fl->n_data_bytes);
+
+ /* drb: removed power-of-2 ASSERT */
+ buffers =
+ vm->os_physmem_alloc_aligned (vm, vm->buffer_main->physmem_region,
+ n_bytes, sizeof (vlib_buffer_t));
+ if (!buffers)
+ return n_alloc;
+
+ /* Record chunk as being allocated so we can free it later. */
+ vec_add1 (fl->buffer_memory_allocated, buffers);
+
+ fl->n_alloc += n_this_chunk;
+ n_alloc += n_this_chunk;
+ n_remaining -= n_this_chunk;
+
+ b = buffers;
+ vec_add2_aligned (fl->buffers, bi, n_this_chunk, CLIB_CACHE_LINE_BYTES);
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ bi[i] = vlib_get_buffer_index (vm, b);
+
+ if (CLIB_DEBUG > 0)
+ vlib_buffer_set_known_state (vm, bi[i], VLIB_BUFFER_KNOWN_FREE);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ memset (buffers, 0, n_bytes);
+
+ /* Initialize all new buffers. */
+ b = buffers;
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ vlib_buffer_init_for_free_list (b, fl);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, bi, n_this_chunk);
+ }
+ return n_alloc;
+}
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ u32 * alloc_buffers, u32 n_alloc_buffers)
+{
+ u32 *dst, *src;
+ uword len;
+ uword n_filled;
+
+ dst = alloc_buffers;
+
+ n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+ if (n_filled == 0)
+ return 0;
+
+ len = vec_len (free_list->buffers);
+ ASSERT (len >= n_alloc_buffers);
+
+ src = free_list->buffers + len - n_alloc_buffers;
+ clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32));
+
+ _vec_len (free_list->buffers) -= n_alloc_buffers;
+
+ /* Verify that buffers are known free. */
+ vlib_buffer_validate_alloc_free (vm, alloc_buffers,
+ n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE);
+
+ return n_alloc_buffers;
+}
+
+
+/* Allocate a given number of buffers into given array.
+ Returns number actually allocated which will be either zero or
+ number requested. */
+static u32
+vlib_buffer_alloc_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ return alloc_from_free_list
+ (vm,
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+ buffers, n_buffers);
+}
+
+static u32
+vlib_buffer_alloc_from_free_list_internal (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+ return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+void *
+vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ void *rv = bm->buffer_free_callback;
+
+ bm->buffer_free_callback = fp;
+ return rv;
+}
+
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+ u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+ u32 fi;
+ int i;
+ u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
+
+ if (!n_buffers)
+ return;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b;
+ u32 bi = buffers[i];
+
+ b = vlib_get_buffer (vm, bi);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+ fl = vlib_buffer_get_buffer_free_list (vm, b, &fi);
+
+ /* The only current use of this callback: multicast recycle */
+ if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
+ {
+ int j;
+
+ vlib_buffer_add_to_free_list
+ (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
+
+ for (j = 0; j < vec_len (bm->announce_list); j++)
+ {
+ if (fl == bm->announce_list[j])
+ goto already_announced;
+ }
+ vec_add1 (bm->announce_list, fl);
+ already_announced:
+ ;
+ }
+ else
+ {
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
+ {
+ u32 flags, next;
+
+ do
+ {
+ vlib_buffer_t *nb = vlib_get_buffer (vm, bi);
+ flags = nb->flags;
+ next = nb->next_buffer;
+ if (nb->n_add_refs)
+ nb->n_add_refs--;
+ else
+ {
+ vlib_buffer_validate_alloc_free (vm, &bi, 1,
+ VLIB_BUFFER_KNOWN_ALLOCATED);
+ vlib_buffer_add_to_free_list (vm, fl, bi, 1);
+ }
+ bi = next;
+ }
+ while (follow_buffer_next
+ && (flags & VLIB_BUFFER_NEXT_PRESENT));
+
+ }
+ }
+ }
+ if (vec_len (bm->announce_list))
+ {
+ vlib_buffer_free_list_t *fl;
+ for (i = 0; i < vec_len (bm->announce_list); i++)
+ {
+ fl = bm->announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len (bm->announce_list) = 0;
+ }
+}
+
+static void
+vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 1);
+}
+
+static void
+vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 0);
+}
+
+/* Copy template packet data into buffers as they are allocated. */
+static void __attribute__ ((unused))
+vlib_packet_template_buffer_init (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ u32 * buffers, u32 n_buffers)
+{
+ vlib_packet_template_t *t =
+ uword_to_pointer (fl->buffer_init_function_opaque,
+ vlib_packet_template_t *);
+ uword i;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
+ ASSERT (b->current_length == vec_len (t->packet_data));
+ clib_memcpy (vlib_buffer_get_current (b), t->packet_data,
+ b->current_length);
+ }
+}
+
+void
+vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char *fmt, ...)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ va_list va;
+ u8 *name;
+ vlib_buffer_free_list_t *fl;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (bm->cb.vlib_packet_template_init_cb)
+ bm->cb.vlib_packet_template_init_cb (vm, (void *) t, packet_data,
+ n_packet_data_bytes,
+ min_n_buffers_each_physmem_alloc,
+ name);
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+ t->min_n_buffers_each_physmem_alloc = min_n_buffers_each_physmem_alloc;
+
+ t->free_list_index = vlib_buffer_create_free_list_helper
+ (vm, n_packet_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+
+ ASSERT (t->free_list_index != 0);
+ fl = vlib_buffer_get_free_list (vm, t->free_list_index);
+ fl->min_n_buffers_each_physmem_alloc = t->min_n_buffers_each_physmem_alloc;
+
+ fl->buffer_init_function = vlib_packet_template_buffer_init;
+ fl->buffer_init_function_opaque = pointer_to_uword (t);
+
+ fl->buffer_init_template.current_data = 0;
+ fl->buffer_init_template.current_length = n_packet_data_bytes;
+ fl->buffer_init_template.flags = 0;
+ fl->buffer_init_template.n_add_refs = 0;
+ vlib_worker_thread_barrier_release (vm);
+}
+
+void *
+vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t, u32 * bi_result)
+{
+ u32 bi;
+ vlib_buffer_t *b;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ return 0;
+
+ *bi_result = bi;
+
+ b = vlib_get_buffer (vm, bi);
+ clib_memcpy (vlib_buffer_get_current (b),
+ t->packet_data, vec_len (t->packet_data));
+ b->current_length = vec_len (t->packet_data);
+
+ return b->data;
+}
+
+void
+vlib_packet_template_get_packet_helper (vlib_main_t * vm,
+ vlib_packet_template_t * t)
+{
+ word n = t->min_n_buffers_each_physmem_alloc;
+ word l = vec_len (t->packet_data);
+ word n_alloc;
+
+ ASSERT (l > 0);
+ ASSERT (vec_len (t->free_buffers) == 0);
+
+ vec_validate (t->free_buffers, n - 1);
+ n_alloc = vlib_buffer_alloc_from_free_list (vm, t->free_buffers,
+ n, t->free_list_index);
+ _vec_len (t->free_buffers) = n_alloc;
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32
+vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index, void *data, u32 n_data_bytes)
+{
+ u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
+ vlib_buffer_t *b;
+ void *d;
+
+ bi = buffer_index;
+ if (bi == 0
+ && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index))
+ goto out_of_buffers;
+
+ d = data;
+ n_left = n_data_bytes;
+ n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index);
+
+ b = vlib_get_buffer (vm, bi);
+ b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ /* Get to the end of the chain before we try to append data... */
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ while (1)
+ {
+ u32 n;
+
+ ASSERT (n_buffer_bytes >= b->current_length);
+ n_left_this_buffer =
+ n_buffer_bytes - (b->current_data + b->current_length);
+ n = clib_min (n_left_this_buffer, n_left);
+ clib_memcpy (vlib_buffer_get_current (b) + b->current_length, d, n);
+ b->current_length += n;
+ n_left -= n;
+ if (n_left == 0)
+ break;
+
+ d += n;
+ if (1 !=
+ vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1,
+ free_list_index))
+ goto out_of_buffers;
+
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return bi;
+
+out_of_buffers:
+ clib_error ("out of buffers");
+ return bi;
+}
+
+u16
+vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t ** last,
+ void *data, u16 data_len)
+{
+ vlib_buffer_t *l = *last;
+ u32 n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, free_list_index);
+ u16 copied = 0;
+ ASSERT (n_buffer_bytes >= l->current_length + l->current_data);
+ while (data_len)
+ {
+ u16 max = n_buffer_bytes - l->current_length - l->current_data;
+ if (max == 0)
+ {
+ if (1 !=
+ vlib_buffer_alloc_from_free_list (vm, &l->next_buffer, 1,
+ free_list_index))
+ return copied;
+ *last = l = vlib_buffer_chain_buffer (vm, first, l, l->next_buffer);
+ max = n_buffer_bytes - l->current_length - l->current_data;
+ }
+
+ u16 len = (data_len > max) ? max : data_len;
+ clib_memcpy (vlib_buffer_get_current (l) + l->current_length,
+ data + copied, len);
+ vlib_buffer_chain_increase_length (first, l, len);
+ data_len -= len;
+ copied += len;
+ }
+ return copied;
+}
+
+void
+vlib_buffer_add_mem_range (vlib_main_t * vm, uword start, uword size)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ if (bm->buffer_mem_size == 0)
+ {
+ bm->buffer_mem_start = start;
+ bm->buffer_mem_size = size;
+ }
+ else if (start < bm->buffer_mem_start)
+ {
+ bm->buffer_mem_size += bm->buffer_mem_start - start;
+ bm->buffer_mem_start = start;
+ if (size > bm->buffer_mem_size)
+ bm->buffer_mem_size = size;
+ }
+ else if (start > bm->buffer_mem_start)
+ {
+ uword new_size = start - bm->buffer_mem_start + size;
+ if (new_size > bm->buffer_mem_size)
+ bm->buffer_mem_size = new_size;
+ }
+
+ if ((u64) bm->buffer_mem_size >
+ ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
+ {
+ clib_panic ("buffer memory size out of range!");
+ }
+}
+
+static u8 *
+format_vlib_buffer_free_list (u8 * s, va_list * va)
+{
+ vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *);
+ u32 threadnum = va_arg (*va, u32);
+ uword bytes_alloc, bytes_free, n_free, size;
+
+ if (!f)
+ return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
+ "Thread", "Name", "Index", "Size", "Alloc", "Free",
+ "#Alloc", "#Free");
+
+ size = sizeof (vlib_buffer_t) + f->n_data_bytes;
+ n_free = vec_len (f->buffers);
+ bytes_alloc = size * f->n_alloc;
+ bytes_free = size * n_free;
+
+ s = format (s, "%7d%30v%12d%12d%=12U%=12U%=12d%=12d", threadnum,
+ f->name, f->index, f->n_data_bytes,
+ format_memory_size, bytes_alloc,
+ format_memory_size, bytes_free, f->n_alloc, n_free);
+
+ return s;
+}
+
+static clib_error_t *
+show_buffers (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_buffer_main_t *bm;
+ vlib_buffer_free_list_t *f;
+ vlib_main_t *curr_vm;
+ u32 vm_index = 0;
+
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0);
+
+ do
+ {
+ curr_vm = vlib_mains[vm_index];
+ bm = curr_vm->buffer_main;
+
+ /* *INDENT-OFF* */
+ pool_foreach (f, bm->buffer_free_list_pool, ({
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index);
+ }));
+ /* *INDENT-ON* */
+
+ vm_index++;
+ }
+ while (vm_index < vec_len (vlib_mains));
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_buffers_command, static) = {
+ .path = "show buffers",
+ .short_help = "Show packet buffer allocation",
+ .function = show_buffers,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vlib_buffer_main_init (struct vlib_main_t * vm)
+{
+ vlib_buffer_main_t *bm;
+ clib_error_t *error;
+
+ vec_validate (vm->buffer_main, 0);
+ bm = vm->buffer_main;
+
+ if (vlib_buffer_callbacks)
+ {
+ /* external plugin has registered own buffer callbacks
+ so we just copy them and quit */
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ clib_memcpy (&bm->cb, vlib_buffer_callbacks,
+ sizeof (vlib_buffer_callbacks_t));
+ bm->callbacks_registered = 1;
+ return 0;
+ }
+
+ bm->cb.vlib_buffer_alloc_cb = &vlib_buffer_alloc_internal;
+ bm->cb.vlib_buffer_alloc_from_free_list_cb =
+ &vlib_buffer_alloc_from_free_list_internal;
+ bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal;
+ bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal;
+ bm->cb.vlib_buffer_delete_free_list_cb =
+ &vlib_buffer_delete_free_list_internal;
+ clib_spinlock_init (&bm->buffer_known_hash_lockp);
+
+ /* allocate default region */
+ error = vlib_physmem_region_alloc (vm, "buffers",
+ vlib_buffer_physmem_sz, 0,
+ VLIB_PHYSMEM_F_INIT_MHEAP |
+ VLIB_PHYSMEM_F_HAVE_BUFFERS,
+ &bm->physmem_region);
+
+ if (error == 0)
+ return 0;
+
+ clib_error_free (error);
+
+ /* we my be running unpriviledged, so try to allocate fake physmem */
+ error = vlib_physmem_region_alloc (vm, "buffers (fake)",
+ vlib_buffer_physmem_sz, 0,
+ VLIB_PHYSMEM_F_FAKE |
+ VLIB_PHYSMEM_F_INIT_MHEAP |
+ VLIB_PHYSMEM_F_HAVE_BUFFERS,
+ &bm->physmem_region);
+ return error;
+}
+
+static clib_error_t *
+vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ u32 size_in_mb;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "memory-size-in-mb %d", &size_in_mb))
+ vlib_buffer_physmem_sz = size_in_mb << 20;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_buffers_configure, "buffers");
+
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
new file mode 100644
index 00000000..e47dbc6d
--- /dev/null
+++ b/src/vlib/buffer.h
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.h: VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_h
+#define included_vlib_buffer_h
+
+#include <vppinfra/types.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/lock.h>
+#include <vlib/error.h> /* for vlib_error_t */
+
+#include <vlib/config.h> /* for __PRE_DATA_SIZE */
+#define VLIB_BUFFER_DATA_SIZE (2048)
+#define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE
+
+/** \file
+ vlib buffer structure definition and a few select
+ access methods. This structure and the buffer allocation
+ mechanism should perhaps live in vnet, but it would take a lot
+ of typing to make it so.
+*/
+
+/* VLIB buffer representation. */
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ STRUCT_MARK (template_start);
+ /* Offset within data[] that we are currently processing.
+ If negative current header points into predata area. */
+ i16 current_data; /**< signed offset in data[], pre_data[]
+ that we are currently processing.
+ If negative current header points into predata area.
+ */
+ u16 current_length; /**< Nbytes between current data and
+ the end of this buffer.
+ */
+ u32 flags; /**< buffer flags:
+ <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,
+ <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
+ <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
+ <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
+ <br> VLIB_BUFFER_REPL_FAIL: packet replication failure
+ <br> VLIB_BUFFER_RECYCLE: as it says
+ <br> VLIB_BUFFER_FLOW_REPORT: buffer is a flow report,
+ <br> VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header,
+ set to avoid adding it to a flow report
+ <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
+ */
+
+/* any change to the following line requres update of
+ * vlib_buffer_get_free_list_index(...) and
+ * vlib_buffer_set_free_list_index(...) functions */
+#define VLIB_BUFFER_FREE_LIST_INDEX_MASK ((1 << 5) - 1)
+
+#define VLIB_BUFFER_IS_TRACED (1 << 5)
+#define VLIB_BUFFER_LOG2_NEXT_PRESENT (6)
+#define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT)
+#define VLIB_BUFFER_IS_RECYCLED (1 << 7)
+#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 8)
+#define VLIB_BUFFER_REPL_FAIL (1 << 9)
+#define VLIB_BUFFER_RECYCLE (1 << 10)
+#define VLIB_BUFFER_FLOW_REPORT (1 << 11)
+#define VLIB_BUFFER_EXT_HDR_VALID (1 << 12)
+
+ /* User defined buffer flags. */
+#define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n))
+#define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n))
+
+ STRUCT_MARK (template_end);
+
+ u32 next_buffer; /**< Next buffer for this linked-list of buffers.
+ Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set.
+ */
+
+ vlib_error_t error; /**< Error code for buffers to be enqueued
+ to error handler.
+ */
+ u32 current_config_index; /**< Used by feature subgraph arcs to
+ visit enabled feature nodes
+ */
+
+ u8 feature_arc_index; /**< Used to identify feature arcs by intermediate
+ feature node
+ */
+
+ u8 n_add_refs; /**< Number of additional references to this buffer. */
+
+ u8 dont_waste_me[2]; /**< Available space in the (precious)
+ first 32 octets of buffer metadata
+ Before allocating any of it, discussion required!
+ */
+
+ u32 opaque[10]; /**< Opaque data used by sub-graphs for their own purposes.
+ See .../vnet/vnet/buffer.h
+ */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ u32 trace_index; /**< Specifies index into trace buffer
+ if VLIB_PACKET_IS_TRACED flag is set.
+ */
+ u32 recycle_count; /**< Used by L2 path recycle code */
+
+ u32 total_length_not_including_first_buffer;
+ /**< Only valid for first buffer in chain. Current length plus
+ total length given here give total number of bytes in buffer chain.
+ */
+ u32 opaque2[13]; /**< More opaque data, currently unused */
+
+ /***** end of second cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]; /**< Space for inserting data
+ before buffer start.
+ Packet rewrite string will be
+ rewritten backwards and may extend
+ back before buffer->data[0].
+ Must come directly before packet data.
+ */
+
+ u8 data[0]; /**< Packet data. Hardware DMA here */
+} vlib_buffer_t; /* Must be a multiple of 64B. */
+
+#define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE)
+
+/** \brief Prefetch buffer metadata.
+ The first 64 bytes of buffer contains most header information
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+
+#define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type)
+
+always_inline vlib_buffer_t *
+vlib_buffer_next_contiguous (vlib_buffer_t * b, u32 buffer_bytes)
+{
+ return (void *) (b + 1) + buffer_bytes;
+}
+
+always_inline void
+vlib_buffer_struct_is_sane (vlib_buffer_t * b)
+{
+ ASSERT (sizeof (b[0]) % 64 == 0);
+
+ /* Rewrite data must be before and contiguous with packet data. */
+ ASSERT (b->pre_data + VLIB_BUFFER_PRE_DATA_SIZE == b->data);
+}
+
+/** \brief Get pointer to current data to process
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) (b->data + b->current_data)
+*/
+
+always_inline void *
+vlib_buffer_get_current (vlib_buffer_t * b)
+{
+ /* Check bounds. */
+ ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ return b->data + b->current_data;
+}
+
+/** \brief Advance current data pointer by the supplied (signed!) amount
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param l - (word) signed increment
+*/
+always_inline void
+vlib_buffer_advance (vlib_buffer_t * b, word l)
+{
+ ASSERT (b->current_length >= l);
+ b->current_data += l;
+ b->current_length -= l;
+}
+
+/** \brief Check if there is enough space in buffer to advance
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param l - (word) size to check
+ @return - 0 if there is less space than 'l' in buffer
+*/
+always_inline u8
+vlib_buffer_has_space (vlib_buffer_t * b, word l)
+{
+ return b->current_length >= l;
+}
+
+/** \brief Reset current header & length to state they were in when
+ packet was received.
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+*/
+
+always_inline void
+vlib_buffer_reset (vlib_buffer_t * b)
+{
+ b->current_length += clib_max (b->current_data, 0);
+ b->current_data = 0;
+}
+
+/** \brief Get pointer to buffer's opaque data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque
+*/
+always_inline void *
+vlib_get_buffer_opaque (vlib_buffer_t * b)
+{
+ return (void *) b->opaque;
+}
+
+/** \brief Get pointer to buffer's opaque2 data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque2
+*/
+always_inline void *
+vlib_get_buffer_opaque2 (vlib_buffer_t * b)
+{
+ return (void *) b->opaque2;
+}
+
+/** \brief Get pointer to the end of buffer's data
+ * @param b pointer to the buffer
+ * @return pointer to tail of packet's data
+ */
+always_inline u8 *
+vlib_buffer_get_tail (vlib_buffer_t * b)
+{
+ return b->data + b->current_data + b->current_length;
+}
+
+/** \brief Append uninitialized data to buffer
+ * @param b pointer to the buffer
+ * @param size number of uninitialized bytes
+ * @return pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size)
+{
+ void *p = vlib_buffer_get_tail (b);
+ /* XXX make sure there's enough space */
+ b->current_length += size;
+ return p;
+}
+
+/** \brief Prepend uninitialized data to buffer
+ * @param b pointer to the buffer
+ * @param size number of uninitialized bytes
+ * @return pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size)
+{
+ ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size);
+ b->current_data -= size;
+ b->current_length += size;
+
+ return vlib_buffer_get_current (b);
+}
+
+/** \brief Make head room, typically for packet headers
+ * @param b pointer to the buffer
+ * @param size number of head room bytes
+ * @return pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size)
+{
+ ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size);
+ b->current_data += size;
+ return vlib_buffer_get_current (b);
+}
+
+/** \brief Retrieve bytes from buffer head
+ * @param b pointer to the buffer
+ * @param size number of bytes to pull
+ * @return pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_pull (vlib_buffer_t * b, u8 size)
+{
+ if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size)
+ return 0;
+
+ void *data = vlib_buffer_get_current (b);
+ vlib_buffer_advance (b, size);
+ return data;
+}
+
+/* Forward declaration. */
+struct vlib_main_t;
+
+typedef struct vlib_buffer_free_list_t
+{
+ /* Template buffer used to initialize first 16 bytes of buffers
+ allocated on this free list. */
+ vlib_buffer_t buffer_init_template;
+
+ /* Our index into vlib_main_t's buffer_free_list_pool. */
+ u32 index;
+
+ /* Number of data bytes for buffers in this free list. */
+ u32 n_data_bytes;
+
+ /* Number of buffers to allocate when we need to allocate new buffers
+ from physmem heap. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Total number of buffers allocated from this free list. */
+ u32 n_alloc;
+
+ /* Vector of free buffers. Each element is a byte offset into I/O heap. */
+ u32 *buffers;
+
+ /* global vector of free buffers, used only on main thread.
+ Bufers are returned to global buffers only in case when number of
+ buffers on free buffers list grows about threshold */
+ u32 *global_buffers;
+ clib_spinlock_t global_buffers_lock;
+
+ /* Memory chunks allocated for this free list
+ recorded here so they can be freed when free list
+ is deleted. */
+ void **buffer_memory_allocated;
+
+ /* Free list name. */
+ u8 *name;
+
+ /* Callback functions to initialize newly allocated buffers.
+ If null buffers are zeroed. */
+ void (*buffer_init_function) (struct vlib_main_t * vm,
+ struct vlib_buffer_free_list_t * fl,
+ u32 * buffers, u32 n_buffers);
+
+ /* Callback function to announce that buffers have been
+ added to the freelist */
+ void (*buffers_added_to_freelist_function)
+ (struct vlib_main_t * vm, struct vlib_buffer_free_list_t * fl);
+
+ uword buffer_init_function_opaque;
+} __attribute__ ((aligned (16))) vlib_buffer_free_list_t;
+
+typedef struct
+{
+ u32 (*vlib_buffer_alloc_cb) (struct vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers);
+ u32 (*vlib_buffer_alloc_from_free_list_cb) (struct vlib_main_t * vm,
+ u32 * buffers, u32 n_buffers,
+ u32 free_list_index);
+ void (*vlib_buffer_free_cb) (struct vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers);
+ void (*vlib_buffer_free_no_next_cb) (struct vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers);
+ void (*vlib_packet_template_init_cb) (struct vlib_main_t * vm, void *t,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword
+ min_n_buffers_each_physmem_alloc,
+ u8 * name);
+ void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm,
+ u32 free_list_index);
+} vlib_buffer_callbacks_t;
+
+extern vlib_buffer_callbacks_t *vlib_buffer_callbacks;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /* Virtual memory address and size of buffer memory, used for calculating
+ buffer index */
+ uword buffer_mem_start;
+ uword buffer_mem_size;
+ vlib_physmem_region_index_t physmem_region;
+
+ /* Buffer free callback, for subversive activities */
+ u32 (*buffer_free_callback) (struct vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 follow_buffer_next);
+ /* Pool of buffer free lists.
+ Multiple free lists exist for packet generator which uses
+ separate free lists for each packet stream --- so as to avoid
+ initializing static data for each packet generated. */
+ vlib_buffer_free_list_t *buffer_free_list_pool;
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0)
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES VLIB_BUFFER_DATA_SIZE
+
+ /* Hash table mapping buffer size (rounded to next unit of
+ sizeof (vlib_buffer_t)) to free list index. */
+ uword *free_list_by_size;
+
+ /* Hash table mapping buffer index into number
+ 0 => allocated but free, 1 => allocated and not-free.
+ If buffer index is not in hash table then this buffer
+ has never been allocated. */
+ uword *buffer_known_hash;
+ clib_spinlock_t buffer_known_hash_lockp;
+
+ /* List of free-lists needing Blue Light Special announcements */
+ vlib_buffer_free_list_t **announce_list;
+
+ /* Callbacks */
+ vlib_buffer_callbacks_t cb;
+ int callbacks_registered;
+} vlib_buffer_main_t;
+
+void vlib_buffer_add_mem_range (struct vlib_main_t *vm, uword start,
+ uword size);
+clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm);
+
+typedef struct
+{
+ struct vlib_main_t *vlib_main;
+
+ u32 first_buffer, last_buffer;
+
+ union
+ {
+ struct
+ {
+ /* Total accumulated bytes in chain starting with first_buffer. */
+ u32 n_total_data_bytes;
+
+ /* Max number of bytes to accumulate in chain starting with first_buffer.
+ As this limit is reached buffers are enqueued to next node. */
+ u32 max_n_data_bytes_per_chain;
+
+ /* Next node to enqueue buffers to relative to current process node. */
+ u32 next_index;
+
+ /* Free list to use to allocate new buffers. */
+ u32 free_list_index;
+ } tx;
+
+ struct
+ {
+ /* CLIB fifo of buffer indices waiting to be unserialized. */
+ u32 *buffer_fifo;
+
+ /* Event type used to signal that RX buffers have been added to fifo. */
+ uword ready_one_time_event;
+ } rx;
+ };
+} vlib_serialize_buffer_main_t;
+
+void serialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t *vm,
+ vlib_serialize_buffer_main_t * sm);
+void unserialize_open_vlib_buffer (serialize_main_t * m,
+ struct vlib_main_t *vm,
+ vlib_serialize_buffer_main_t * sm);
+
+u32 serialize_close_vlib_buffer (serialize_main_t * m);
+void unserialize_close_vlib_buffer (serialize_main_t * m);
+void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp);
+
+always_inline u32
+serialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t *s = &m->stream;
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ return sm->tx.n_total_data_bytes + s->current_buffer_index +
+ vec_len (s->overflow_buffer);
+}
+
+/*
+ */
+
+/** \brief Compile time buffer trajectory tracing option
+ Turn this on if you run into "bad monkey" contexts,
+ and you want to know exactly which nodes they've visited...
+ See vlib/main.c...
+*/
+#define VLIB_BUFFER_TRACE_TRAJECTORY 0
+
+#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) (b)->pre_data[0]=0
+#else
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
+#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */
+
+#endif /* included_vlib_buffer_h */
+
+#define VLIB_BUFFER_REGISTER_CALLBACKS(x,...) \
+ __VA_ARGS__ vlib_buffer_callbacks_t __##x##_buffer_callbacks; \
+static void __vlib_add_buffer_callbacks_t_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_buffer_callbacks_t_##x (void) \
+{ \
+ if (vlib_buffer_callbacks) \
+ clib_panic ("vlib buffer callbacks already registered"); \
+ vlib_buffer_callbacks = &__##x##_buffer_callbacks; \
+} \
+__VA_ARGS__ vlib_buffer_callbacks_t __##x##_buffer_callbacks
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
new file mode 100644
index 00000000..d51de6be
--- /dev/null
+++ b/src/vlib/buffer_funcs.h
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_funcs.h: VLIB buffer related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_funcs_h
+#define included_vlib_buffer_funcs_h
+
+#include <vppinfra/hash.h>
+
+/** \file
+ vlib buffer access methods.
+*/
+
+
+/** \brief Translate buffer index into buffer pointer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index
+ @return - (vlib_buffer_t *) buffer pointer
+*/
+always_inline vlib_buffer_t *
+vlib_get_buffer (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ uword offset = ((uword) buffer_index) << CLIB_LOG2_CACHE_LINE_BYTES;
+ ASSERT (offset < bm->buffer_mem_size);
+
+ return uword_to_pointer (bm->buffer_mem_start + offset, void *);
+}
+
+/** \brief Translate buffer pointer into buffer index
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param p - (void *) buffer pointer
+ @return - (u32) buffer index
+*/
+
+always_inline u32
+vlib_get_buffer_index (vlib_main_t * vm, void *p)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ uword offset = pointer_to_uword (p) - bm->buffer_mem_start;
+ ASSERT (pointer_to_uword (p) >= bm->buffer_mem_start);
+ ASSERT (offset < bm->buffer_mem_size);
+ ASSERT ((offset % (1 << CLIB_LOG2_CACHE_LINE_BYTES)) == 0);
+ return offset >> CLIB_LOG2_CACHE_LINE_BYTES;
+}
+
+/** \brief Get next buffer in buffer linklist, or zero for end of list.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (vlib_buffer_t *) next buffer, or NULL
+*/
+always_inline vlib_buffer_t *
+vlib_get_next_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ return (b->flags & VLIB_BUFFER_NEXT_PRESENT
+ ? vlib_get_buffer (vm, b->next_buffer) : 0);
+}
+
+uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
+ vlib_buffer_t * b_first);
+
+/** \brief Get length in bytes of the buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ uword len = b->current_length;
+
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
+ return len;
+
+ if (PREDICT_TRUE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
+ return len + b->total_length_not_including_first_buffer;
+
+ return vlib_buffer_length_in_chain_slow_path (vm, b);
+}
+
+/** \brief Get length in bytes of the buffer index buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_index_length_in_chain (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ return vlib_buffer_length_in_chain (vm, b);
+}
+
+/** \brief Copy buffer contents to memory
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index
+ @param contents - (u8 *) memory, <strong>must be large enough</strong>
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents)
+{
+ uword content_len = 0;
+ uword l;
+ vlib_buffer_t *b;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, buffer_index);
+ l = b->current_length;
+ clib_memcpy (contents + content_len, b->data + b->current_data, l);
+ content_len += l;
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ buffer_index = b->next_buffer;
+ }
+
+ return content_len;
+}
+
+/* Return physical address of buffer->data start. */
+always_inline u64
+vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index)
+{
+ return vlib_physmem_offset_to_physical (vm, vm->buffer_main->physmem_region,
+ (((uword) buffer_index) <<
+ CLIB_LOG2_CACHE_LINE_BYTES) +
+ STRUCT_OFFSET_OF (vlib_buffer_t,
+ data));
+}
+
+/** \brief Prefetch buffer metadata by buffer index
+ The first 64 bytes of buffer contains most header information
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+/* Prefetch buffer header given index. */
+#define vlib_prefetch_buffer_with_index(vm,bi,type) \
+ do { \
+ vlib_buffer_t * _b = vlib_get_buffer (vm, bi); \
+ vlib_prefetch_buffer_header (_b, type); \
+ } while (0)
+
+#if 0
+/* Iterate over known allocated vlib bufs. You probably do not want
+ * to do this!
+ @param vm the vlib_main_t
+ @param bi found allocated buffer index
+ @param body operation to perform on buffer index
+ function executes body for each allocated buffer index
+ */
+#define vlib_buffer_foreach_allocated(vm,bi,body) \
+do { \
+ vlib_main_t * _vmain = (vm); \
+ vlib_buffer_main_t * _bmain = &_vmain->buffer_main; \
+ hash_pair_t * _vbpair; \
+ hash_foreach_pair(_vbpair, _bmain->buffer_known_hash, ({ \
+ if (VLIB_BUFFER_KNOWN_ALLOCATED == _vbpair->value[0]) { \
+ (bi) = _vbpair->key; \
+ body; \
+ } \
+ })); \
+} while (0)
+#endif
+
+typedef enum
+{
+ /* Index is unknown. */
+ VLIB_BUFFER_UNKNOWN,
+
+ /* Index is known and free/allocated. */
+ VLIB_BUFFER_KNOWN_FREE,
+ VLIB_BUFFER_KNOWN_ALLOCATED,
+} vlib_buffer_known_state_t;
+
+always_inline vlib_buffer_known_state_t
+vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ clib_spinlock_lock (&bm->buffer_known_hash_lockp);
+ uword *p = hash_get (bm->buffer_known_hash, buffer_index);
+ clib_spinlock_unlock (&bm->buffer_known_hash_lockp);
+ return p ? p[0] : VLIB_BUFFER_UNKNOWN;
+}
+
+always_inline void
+vlib_buffer_set_known_state (vlib_main_t * vm,
+ u32 buffer_index,
+ vlib_buffer_known_state_t state)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ clib_spinlock_lock (&bm->buffer_known_hash_lockp);
+ hash_set (bm->buffer_known_hash, buffer_index, state);
+ clib_spinlock_unlock (&bm->buffer_known_hash_lockp);
+}
+
+/* Validates sanity of a single buffer.
+ Returns format'ed vector with error message if any. */
+u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index,
+ uword follow_chain);
+
+/** \brief Allocate buffers into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+always_inline u32
+vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ ASSERT (bm->cb.vlib_buffer_alloc_cb);
+
+ return bm->cb.vlib_buffer_alloc_cb (vm, buffers, n_buffers);
+}
+
+always_inline u32
+vlib_buffer_round_size (u32 size)
+{
+ return round_pow2 (size, sizeof (vlib_buffer_t));
+}
+
+always_inline u32
+vlib_buffer_get_free_list_index (vlib_buffer_t * b)
+{
+ return b->flags & VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+}
+
+always_inline void
+vlib_buffer_set_free_list_index (vlib_buffer_t * b, u32 index)
+{
+ /* if there is an need for more free lists we should consider
+ storig data in the 2nd cacheline */
+ ASSERT (VLIB_BUFFER_FREE_LIST_INDEX_MASK & 1);
+ ASSERT (index <= VLIB_BUFFER_FREE_LIST_INDEX_MASK);
+
+ b->flags &= ~VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+ b->flags |= index & VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+}
+
+/** \brief Allocate buffers from specific freelist into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+always_inline u32
+vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ ASSERT (bm->cb.vlib_buffer_alloc_from_free_list_cb);
+
+ return bm->cb.vlib_buffer_alloc_from_free_list_cb (vm, buffers, n_buffers,
+ free_list_index);
+}
+
+/** \brief Free buffers
+ Frees the entire buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+always_inline void
+vlib_buffer_free (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ ASSERT (bm->cb.vlib_buffer_free_cb);
+
+ return bm->cb.vlib_buffer_free_cb (vm, buffers, n_buffers);
+}
+
+/** \brief Free buffers, does not free the buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+always_inline void
+vlib_buffer_free_no_next (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ ASSERT (bm->cb.vlib_buffer_free_no_next_cb);
+
+ return bm->cb.vlib_buffer_free_no_next_cb (vm, buffers, n_buffers);
+}
+
+/** \brief Free one buffer
+ Shorthand to free a single buffer chain.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index to free
+*/
+always_inline void
+vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1);
+}
+
+/* Add/delete buffer free lists. */
+u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...);
+always_inline void
+vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ ASSERT (bm->cb.vlib_buffer_delete_free_list_cb);
+
+ bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index);
+}
+
+/* Find already existing public free list with given size or create one. */
+u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...);
+
+/* Merge two free lists */
+void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst,
+ vlib_buffer_free_list_t * src);
+
+/* Make sure we have at least given number of unaligned buffers. */
+void vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm,
+ vlib_buffer_free_list_t *
+ free_list,
+ uword n_unaligned_buffers);
+
+always_inline u32
+vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ size = vlib_buffer_round_size (size);
+ uword *p = hash_get (bm->free_list_by_size, size);
+ return p ? p[0] : ~0;
+}
+
+always_inline vlib_buffer_free_list_t *
+vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b,
+ u32 * index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ u32 i;
+
+ *index = i = vlib_buffer_get_free_list_index (b);
+ return pool_elt_at_index (bm->buffer_free_list_pool, i);
+}
+
+always_inline vlib_buffer_free_list_t *
+vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+
+ /* Sanity: indices must match. */
+ ASSERT (f->index == free_list_index);
+
+ return f;
+}
+
+always_inline u32
+vlib_buffer_free_list_buffer_size (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_free_list_t *f =
+ vlib_buffer_get_free_list (vm, free_list_index);
+ return f->n_data_bytes;
+}
+
+void vlib_aligned_memcpy (void *_dst, void *_src, int n_bytes);
+
+/* Reasonably fast buffer copy routine. */
+always_inline void
+vlib_copy_buffers (u32 * dst, u32 * src, u32 n)
+{
+ while (n >= 4)
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst += 4;
+ src += 4;
+ n -= 4;
+ }
+ while (n > 0)
+ {
+ dst[0] = src[0];
+ dst += 1;
+ src += 1;
+ n -= 1;
+ }
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32 vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index, void *data, u32 n_data_bytes);
+
+/* duplicate all buffers in chain */
+always_inline vlib_buffer_t *
+vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_buffer_t *s, *d, *fd;
+ uword n_alloc, n_buffers = 1;
+ u32 flag_mask = VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ int i;
+
+ s = b;
+ while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ n_buffers++;
+ s = vlib_get_buffer (vm, s->next_buffer);
+ }
+ u32 new_buffers[n_buffers];
+
+ n_alloc = vlib_buffer_alloc (vm, new_buffers, n_buffers);
+
+ /* No guarantee that we'll get all the buffers we asked for */
+ if (PREDICT_FALSE (n_alloc < n_buffers))
+ {
+ if (n_alloc > 0)
+ vlib_buffer_free (vm, new_buffers, n_alloc);
+ return 0;
+ }
+
+ /* 1st segment */
+ s = b;
+ fd = d = vlib_get_buffer (vm, new_buffers[0]);
+ d->current_data = s->current_data;
+ d->current_length = s->current_length;
+ d->flags = s->flags & flag_mask;
+ d->total_length_not_including_first_buffer =
+ s->total_length_not_including_first_buffer;
+ clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque));
+ clib_memcpy (vlib_buffer_get_current (d),
+ vlib_buffer_get_current (s), s->current_length);
+
+ /* next segments */
+ for (i = 1; i < n_buffers; i++)
+ {
+ /* previous */
+ d->next_buffer = new_buffers[i];
+ /* current */
+ s = vlib_get_buffer (vm, s->next_buffer);
+ d = vlib_get_buffer (vm, new_buffers[i]);
+ d->current_data = s->current_data;
+ d->current_length = s->current_length;
+ clib_memcpy (vlib_buffer_get_current (d),
+ vlib_buffer_get_current (s), s->current_length);
+ d->flags = s->flags & flag_mask;
+ }
+
+ return fd;
+}
+
+/** \brief Create multiple clones of buffer and store them in the supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param src_buffer - (u32) source buffer index
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u8) number of buffer clones requested
+ @param head_end_offset - (u16) offset relative to current position
+ where packet head ends
+ @return - (u8) number of buffers actually cloned, may be
+ less than the number requested or zero
+*/
+
+always_inline u8
+vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
+ u8 n_buffers, u16 head_end_offset)
+{
+ u8 i;
+ vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer);
+
+ ASSERT (s->n_add_refs == 0);
+ ASSERT (n_buffers);
+
+ if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2)
+ {
+ buffers[0] = src_buffer;
+ for (i = 1; i < n_buffers; i++)
+ {
+ vlib_buffer_t *d;
+ d = vlib_buffer_copy (vm, s);
+ if (d == 0)
+ return i;
+ buffers[i] = vlib_get_buffer_index (vm, d);
+
+ }
+ return n_buffers;
+ }
+
+ n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers,
+ vlib_buffer_get_free_list_index
+ (s));
+ if (PREDICT_FALSE (n_buffers == 0))
+ {
+ buffers[0] = src_buffer;
+ return 1;
+ }
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]);
+ d->current_data = s->current_data;
+ d->current_length = head_end_offset;
+ vlib_buffer_set_free_list_index (d,
+ vlib_buffer_get_free_list_index (s));
+ d->total_length_not_including_first_buffer =
+ s->total_length_not_including_first_buffer + s->current_length -
+ head_end_offset;
+ d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT;
+ d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+ clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque));
+ clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s),
+ head_end_offset);
+ d->next_buffer = src_buffer;
+ }
+ vlib_buffer_advance (s, head_end_offset);
+ s->n_add_refs = n_buffers - 1;
+ while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ s = vlib_get_buffer (vm, s->next_buffer);
+ s->n_add_refs = n_buffers - 1;
+ }
+
+ return n_buffers;
+}
+
+/** \brief Attach cloned tail to the buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param head - (vlib_buffer_t *) head buffer
+ @param tail - (Vlib buffer_t *) tail buffer to clone and attach to head
+*/
+
+always_inline void
+vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head,
+ vlib_buffer_t * tail)
+{
+ ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+ ASSERT (vlib_buffer_get_free_list_index (head) ==
+ vlib_buffer_get_free_list_index (tail));
+
+ head->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ head->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+ head->flags |= (tail->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID);
+ head->next_buffer = vlib_get_buffer_index (vm, tail);
+ head->total_length_not_including_first_buffer = tail->current_length +
+ tail->total_length_not_including_first_buffer;
+
+next_segment:
+ __sync_add_and_fetch (&tail->n_add_refs, 1);
+
+ if (tail->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ tail = vlib_get_buffer (vm, tail->next_buffer);
+ goto next_segment;
+ }
+}
+
+/* Initializes the buffer as an empty packet with no chained buffers. */
+always_inline void
+vlib_buffer_chain_init (vlib_buffer_t * first)
+{
+ first->total_length_not_including_first_buffer = 0;
+ first->current_length = 0;
+ first->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+}
+
+/* The provided next_bi buffer index is appended to the end of the packet. */
+always_inline vlib_buffer_t *
+vlib_buffer_chain_buffer (vlib_main_t * vm,
+ vlib_buffer_t * first,
+ vlib_buffer_t * last, u32 next_bi)
+{
+ vlib_buffer_t *next_buffer = vlib_get_buffer (vm, next_bi);
+ last->next_buffer = next_bi;
+ last->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ next_buffer->current_length = 0;
+ next_buffer->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ return next_buffer;
+}
+
+/* Increases or decreases the packet length.
+ * It does not allocate or deallocate new buffers.
+ * Therefore, the added length must be compatible
+ * with the last buffer. */
+always_inline void
+vlib_buffer_chain_increase_length (vlib_buffer_t * first,
+ vlib_buffer_t * last, i32 len)
+{
+ last->current_length += len;
+ if (first != last)
+ first->total_length_not_including_first_buffer += len;
+}
+
+/* Copy data to the end of the packet and increases its length.
+ * It does not allocate new buffers.
+ * Returns the number of copied bytes. */
+always_inline u16
+vlib_buffer_chain_append_data (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t * last, void *data, u16 data_len)
+{
+ u32 n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, free_list_index);
+ ASSERT (n_buffer_bytes >= last->current_length + last->current_data);
+ u16 len = clib_min (data_len,
+ n_buffer_bytes - last->current_length -
+ last->current_data);
+ clib_memcpy (vlib_buffer_get_current (last) + last->current_length, data,
+ len);
+ vlib_buffer_chain_increase_length (first, last, len);
+ return len;
+}
+
+/* Copy data to the end of the packet and increases its length.
+ * Allocates additional buffers from the free list if necessary.
+ * Returns the number of copied bytes.
+ * 'last' value is modified whenever new buffers are allocated and
+ * chained and points to the last buffer in the chain. */
+u16
+vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t ** last,
+ void *data, u16 data_len);
+void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * first);
+
+format_function_t format_vlib_buffer, format_vlib_buffer_and_data,
+ format_vlib_buffer_contents;
+
+typedef struct
+{
+ /* Vector of packet data. */
+ u8 *packet_data;
+
+ /* Number of buffers to allocate in each call to physmem
+ allocator. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Buffer free list for this template. */
+ u32 free_list_index;
+
+ u32 *free_buffers;
+} vlib_packet_template_t;
+
+void vlib_packet_template_get_packet_helper (vlib_main_t * vm,
+ vlib_packet_template_t * t);
+
+void vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char *fmt, ...);
+
+void *vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ u32 * bi_result);
+
+always_inline void
+vlib_packet_template_free (vlib_main_t * vm, vlib_packet_template_t * t)
+{
+ vec_free (t->packet_data);
+}
+
+always_inline u32
+unserialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t *s = &m->stream;
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vlib_main_t *vm = sm->vlib_main;
+ u32 n, *f;
+
+ n = s->n_buffer_bytes - s->current_buffer_index;
+ if (sm->last_buffer != ~0)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, sm->last_buffer);
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n += b->current_length;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (f, sm->rx.buffer_fifo, ({
+ n += vlib_buffer_index_length_in_chain (vm, f[0]);
+ }));
+/* *INDENT-ON* */
+
+ return n;
+}
+
+/* Set a buffer quickly into "uninitialized" state. We want this to
+ be extremely cheap and arrange for all fields that need to be
+ initialized to be in the first 128 bits of the buffer. */
+always_inline void
+vlib_buffer_init_for_free_list (vlib_buffer_t * dst,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_t *src = &fl->buffer_init_template;
+
+ /* Make sure vlib_buffer_t is cacheline aligned and sized */
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline0) == 0);
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES);
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline2) ==
+ CLIB_CACHE_LINE_BYTES * 2);
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == vlib_buffer_get_free_list_index (src));
+
+ clib_memcpy (STRUCT_MARK_PTR (dst, template_start),
+ STRUCT_MARK_PTR (src, template_start),
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_end) -
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_start));
+
+ /* Not in the first 16 octets. */
+ dst->n_add_refs = src->n_add_refs;
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst->f == src->f);
+ _(current_data);
+ _(current_length);
+ _(flags);
+#undef _
+ /* ASSERT (dst->total_length_not_including_first_buffer == 0); */
+ /* total_length_not_including_first_buffer is not in the template anymore
+ * so it may actually not zeroed for some buffers. One option is to
+ * uncomment the line lower (comes at a cost), the other, is to just not
+ * care */
+ /* dst->total_length_not_including_first_buffer = 0; */
+ ASSERT (dst->n_add_refs == 0);
+}
+
+always_inline void
+vlib_buffer_add_to_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * f,
+ u32 buffer_index, u8 do_init)
+{
+ vlib_buffer_t *b;
+ b = vlib_get_buffer (vm, buffer_index);
+ if (PREDICT_TRUE (do_init))
+ vlib_buffer_init_for_free_list (b, f);
+ vec_add1_aligned (f->buffers, buffer_index, CLIB_CACHE_LINE_BYTES);
+
+ if (vec_len (f->buffers) > 4 * VLIB_FRAME_SIZE)
+ {
+ vlib_buffer_free_list_t *mf;
+ mf = vlib_buffer_get_free_list (vlib_mains[0], f->index);
+ clib_spinlock_lock (&mf->global_buffers_lock);
+ /* keep last stored buffers, as they are more likely hot in the cache */
+ vec_add_aligned (mf->global_buffers, f->buffers, VLIB_FRAME_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+ vec_delete (f->buffers, VLIB_FRAME_SIZE, 0);
+ clib_spinlock_unlock (&mf->global_buffers_lock);
+ }
+}
+
+always_inline void
+vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0,
+ vlib_buffer_t * dst1,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_t *src = &fl->buffer_init_template;
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == vlib_buffer_get_free_list_index (src));
+
+ clib_memcpy (STRUCT_MARK_PTR (dst0, template_start),
+ STRUCT_MARK_PTR (src, template_start),
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_end) -
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_start));
+
+ clib_memcpy (STRUCT_MARK_PTR (dst1, template_start),
+ STRUCT_MARK_PTR (src, template_start),
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_end) -
+ STRUCT_OFFSET_OF (vlib_buffer_t, template_start));
+
+ /* Not in the first 16 octets. */
+ dst0->n_add_refs = src->n_add_refs;
+ dst1->n_add_refs = src->n_add_refs;
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst0->f == src->f); ASSERT( dst1->f == src->f)
+ _(current_data);
+ _(current_length);
+ _(flags);
+#undef _
+
+ ASSERT (dst0->total_length_not_including_first_buffer == 0);
+ ASSERT (dst1->total_length_not_including_first_buffer == 0);
+ ASSERT (dst0->n_add_refs == 0);
+ ASSERT (dst1->n_add_refs == 0);
+}
+
+#if CLIB_DEBUG > 0
+extern u32 *vlib_buffer_state_validation_lock;
+extern uword *vlib_buffer_state_validation_hash;
+extern void *vlib_buffer_state_heap;
+#endif
+
+static inline void
+vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ uword *p;
+ void *oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ p = hash_get (vlib_buffer_state_validation_hash, b);
+
+ /* If we don't know about b, declare it to be in the expected state */
+ if (!p)
+ {
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+ goto out;
+ }
+
+ if (p[0] != expected)
+ {
+ void cj_stop (void);
+ u32 bi;
+ vlib_main_t *vm = &vlib_global_main;
+
+ cj_stop ();
+
+ bi = vlib_get_buffer_index (vm, b);
+
+ clib_mem_set_heap (oldheap);
+ clib_warning ("%.6f buffer %llx (%d): %s, not %s",
+ vlib_time_now (vm), bi,
+ p[0] ? "busy" : "free", expected ? "busy" : "free");
+ os_panic ();
+ }
+out:
+ CLIB_MEMORY_BARRIER ();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+static inline void
+vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ void *oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+
+ CLIB_MEMORY_BARRIER ();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+#endif /* included_vlib_buffer_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h
new file mode 100644
index 00000000..8a779049
--- /dev/null
+++ b/src/vlib/buffer_node.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_node.h: VLIB buffer handling node helper macros/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_node_h
+#define included_vlib_buffer_node_h
+
+/** \file
+ vlib buffer/node functions
+*/
+
+/** \brief Finish enqueueing two buffers forward in the graph.
+ Standard dual loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0 == next1</code>,
+ which means that the speculative enqueue at the top of the dual loop
+ has correctly dealt with both packets. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param bi1 second buffer index
+ @param next0 actual next index to be used for the first packet
+ @param next1 actual next index to be used for the second packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+
+#define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
+do { \
+ int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \
+ \
+ if (PREDICT_FALSE (enqueue_code != 0)) \
+ { \
+ switch (enqueue_code) \
+ { \
+ case 1: \
+ /* A B A */ \
+ to_next[-2] = bi1; \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ break; \
+ \
+ case 2: \
+ /* A A B */ \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ break; \
+ \
+ case 3: \
+ /* A B B or A B C */ \
+ to_next -= 2; \
+ n_left_to_next += 2; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ if (next0 == next1) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, \
+ n_left_to_next); \
+ next_index = next1; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ } \
+ } \
+ } \
+} while (0)
+
+
+/** \brief Finish enqueueing four buffers forward in the graph.
+ Standard quad loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0 == next1 == next2 == next3</code>,
+ which means that the speculative enqueue at the top of the quad loop
+ has correctly dealt with all four packets. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param bi1 second buffer index
+ @param bi2 third buffer index
+ @param bi3 fourth buffer index
+ @param next0 actual next index to be used for the first packet
+ @param next1 actual next index to be used for the second packet
+ @param next2 actual next index to be used for the third packet
+ @param next3 actual next index to be used for the fourth packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+
+#define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
+do { \
+ /* After the fact: check the [speculative] enqueue to "next" */ \
+ u32 fix_speculation = next_index != next0 || next_index != next1 \
+ || next_index != next2 || next_index != next3; \
+ if (PREDICT_FALSE(fix_speculation)) \
+ { \
+ /* rewind... */ \
+ to_next -= 4; \
+ n_left_to_next += 4; \
+ \
+ /* If bi0 belongs to "next", send it there */ \
+ if (next_index == next0) \
+ { \
+ to_next[0] = bi0; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else /* send it where it needs to go */ \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ \
+ if (next_index == next1) \
+ { \
+ to_next[0] = bi1; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ \
+ if (next_index == next2) \
+ { \
+ to_next[0] = bi2; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next2, bi2); \
+ \
+ if (next_index == next3) \
+ { \
+ to_next[0] = bi3; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next3, bi3); \
+ \
+ /* Change speculation: last 2 packets went to the same node */ \
+ if (next2 == next3) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
+ next_index = next3; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ } \
+ } \
+ } while(0);
+
+/** \brief Finish enqueueing one buffer forward in the graph.
+ Standard single loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0</code>,
+ which means that the speculative enqueue at the top of the single loop
+ has correctly dealt with the packet in hand. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param next0 actual next index to be used for the first packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
+do { \
+ if (PREDICT_FALSE (next0 != next_index)) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
+ next_index = next0; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ \
+ to_next[0] = bi0; \
+ to_next += 1; \
+ n_left_to_next -= 1; \
+ } \
+} while (0)
+
+always_inline uword
+generic_buffer_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword sizeof_trace,
+ void *opaque1,
+ uword opaque2,
+ void (*two_buffers) (vlib_main_t * vm,
+ void *opaque1,
+ uword opaque2,
+ vlib_buffer_t * b0,
+ vlib_buffer_t * b1,
+ u32 * next0, u32 * next1),
+ void (*one_buffer) (vlib_main_t * vm,
+ void *opaque1, uword opaque2,
+ vlib_buffer_t * b0,
+ u32 * next0))
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1, sizeof_trace);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, next0;
+ u32 pi1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 64, LOAD);
+ CLIB_PREFETCH (p3->data, 64, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ u32 pi0, next0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ one_buffer (vm, opaque1, opaque2, p0, &next0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+#endif /* included_vlib_buffer_node_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer_serialize.c b/src/vlib/buffer_serialize.c
new file mode 100644
index 00000000..96a5f0a0
--- /dev/null
+++ b/src/vlib/buffer_serialize.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+static void
+vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t *vm;
+ vlib_serialize_buffer_main_t *sm;
+ uword n, n_bytes_to_write;
+ vlib_buffer_t *last;
+
+ n_bytes_to_write = s->current_buffer_index;
+ sm =
+ uword_to_pointer (s->data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
+ if (serialize_stream_is_end_of_stream (s)
+ || sm->tx.n_total_data_bytes + n_bytes_to_write >
+ sm->tx.max_n_data_bytes_per_chain)
+ {
+ vlib_process_t *p = vlib_get_current_process (vm);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ last->current_length = n_bytes_to_write;
+
+ vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index,
+ sm->first_buffer);
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ sm->tx.n_total_data_bytes = 0;
+ }
+
+ else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
+ {
+ ASSERT (sm->first_buffer == ~0);
+ ASSERT (sm->last_buffer == ~0);
+ n =
+ vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1,
+ sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m,
+ clib_error_create
+ ("vlib_buffer_alloc_from_free_list fails"));
+ sm->last_buffer = sm->first_buffer;
+ s->n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
+ }
+
+ if (n_bytes_to_write > 0)
+ {
+ vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer);
+ n =
+ vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1,
+ sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m,
+ clib_error_create
+ ("vlib_buffer_alloc_from_free_list fails"));
+ sm->tx.n_total_data_bytes += n_bytes_to_write;
+ prev->current_length = n_bytes_to_write;
+ prev->next_buffer = sm->last_buffer;
+ prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ }
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->buffer = vlib_buffer_get_current (last);
+ s->current_buffer_index = 0;
+ ASSERT (last->current_data == s->current_buffer_index);
+ }
+}
+
+static void
+vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t *vm;
+ vlib_serialize_buffer_main_t *sm;
+ vlib_buffer_t *last;
+
+ sm =
+ uword_to_pointer (s->data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ if (serialize_stream_is_end_of_stream (s))
+ return;
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+
+ if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
+ sm->last_buffer = last->next_buffer;
+ else
+ {
+ vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
+ sm->first_buffer = sm->last_buffer = ~0;
+ }
+ }
+
+ if (sm->last_buffer == ~0)
+ {
+ while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
+ {
+ sm->rx.ready_one_time_event =
+ vlib_process_create_one_time_event (vm, vlib_current_process (vm),
+ ~0);
+ vlib_process_wait_for_one_time_event (vm, /* no event data */ 0,
+ sm->rx.ready_one_time_event);
+ }
+
+ clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
+ sm->last_buffer = sm->first_buffer;
+ }
+
+ ASSERT (sm->last_buffer != ~0);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->current_buffer_index = 0;
+ s->buffer = vlib_buffer_get_current (last);
+ s->n_buffer_bytes = last->current_length;
+}
+
+static void
+serialize_open_vlib_helper (serialize_main_t * m,
+ vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm, uword is_read)
+{
+ /* Initialize serialize main but save overflow buffer for re-use between calls. */
+ {
+ u8 *save = m->stream.overflow_buffer;
+ memset (m, 0, sizeof (m[0]));
+ m->stream.overflow_buffer = save;
+ if (save)
+ _vec_len (save) = 0;
+ }
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ if (is_read)
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ else
+ sm->tx.n_total_data_bytes = 0;
+ sm->vlib_main = vm;
+ m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
+ m->stream.data_function_opaque = pointer_to_uword (sm);
+}
+
+void
+serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm)
+{
+ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0);
+}
+
+void
+unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm)
+{
+ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1);
+}
+
+u32
+serialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vlib_buffer_t *last;
+ serialize_stream_t *s = &m->stream;
+
+ last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
+ last->current_length = s->current_buffer_index;
+
+ if (vec_len (s->overflow_buffer) > 0)
+ {
+ sm->last_buffer
+ = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
+ sm->last_buffer == ~0 ? 0 : sm->last_buffer,
+ s->overflow_buffer,
+ vec_len (s->overflow_buffer));
+ _vec_len (s->overflow_buffer) = 0;
+ }
+
+ return sm->first_buffer;
+}
+
+void
+unserialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ if (sm->first_buffer != ~0)
+ vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ if (m->stream.overflow_buffer)
+ _vec_len (m->stream.overflow_buffer) = 0;
+}
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
new file mode 100644
index 00000000..48cf0426
--- /dev/null
+++ b/src/vlib/cli.c
@@ -0,0 +1,1345 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/cpu.h>
+#include <unistd.h>
+#include <ctype.h>
+
+/* Root of all show commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_command, static) = {
+ .path = "show",
+ .short_help = "Show commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all clear commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = {
+ .path = "clear",
+ .short_help = "Clear commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all set commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_set_command, static) = {
+ .path = "set",
+ .short_help = "Set commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all test commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_test_command, static) = {
+ .path = "test",
+ .short_help = "Test commands",
+};
+/* *INDENT-ON* */
+
+/* Returns bitmap of commands which match key. */
+static uword *
+vlib_cli_sub_command_match (vlib_cli_command_t * c, unformat_input_t * input)
+{
+ int i, n;
+ uword *match = 0;
+ vlib_cli_parse_position_t *p;
+
+ unformat_skip_white_space (input);
+
+ for (i = 0;; i++)
+ {
+ uword k;
+
+ k = unformat_get_input (input);
+ switch (k)
+ {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '-':
+ case '_':
+ break;
+
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ case UNFORMAT_END_OF_INPUT:
+ /* White space or end of input removes any non-white
+ matches that were before possible. */
+ if (i < vec_len (c->sub_command_positions)
+ && clib_bitmap_count_set_bits (match) > 1)
+ {
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ for (n = 0; n < vec_len (p->bitmaps); n++)
+ match = clib_bitmap_andnot (match, p->bitmaps[n]);
+ }
+ goto done;
+
+ default:
+ unformat_put_input (input);
+ goto done;
+ }
+
+ if (i >= vec_len (c->sub_command_positions))
+ {
+ no_match:
+ clib_bitmap_free (match);
+ return 0;
+ }
+
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ if (vec_len (p->bitmaps) == 0)
+ goto no_match;
+
+ n = k - p->min_char;
+ if (n < 0 || n >= vec_len (p->bitmaps))
+ goto no_match;
+
+ if (i == 0)
+ match = clib_bitmap_dup (p->bitmaps[n]);
+ else
+ match = clib_bitmap_and (match, p->bitmaps[n]);
+
+ if (clib_bitmap_is_zero (match))
+ goto no_match;
+ }
+
+done:
+ return match;
+}
+
+/* Looks for string based sub-input formatted { SUB-INPUT }. */
+uword
+unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args)
+{
+ unformat_input_t *sub_input = va_arg (*args, unformat_input_t *);
+ u8 *s;
+ uword c;
+
+ while (1)
+ {
+ c = unformat_get_input (i);
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ break;
+
+ case '{':
+ default:
+ /* Put back paren. */
+ if (c != UNFORMAT_END_OF_INPUT)
+ unformat_put_input (i);
+
+ if (c == '{' && unformat (i, "%v", &s))
+ {
+ unformat_init_vector (sub_input, s);
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static vlib_cli_command_t *
+get_sub_command (vlib_cli_main_t * cm, vlib_cli_command_t * parent, u32 si)
+{
+ vlib_cli_sub_command_t *s = vec_elt_at_index (parent->sub_commands, si);
+ return vec_elt_at_index (cm->commands, s->index);
+}
+
+static uword
+unformat_vlib_cli_sub_command (unformat_input_t * i, va_list * args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_cli_command_t *c = va_arg (*args, vlib_cli_command_t *);
+ vlib_cli_command_t **result = va_arg (*args, vlib_cli_command_t **);
+ vlib_cli_main_t *cm = &vm->cli_main;
+ uword *match_bitmap, is_unique, index;
+
+ {
+ vlib_cli_sub_rule_t *sr;
+ vlib_cli_parse_rule_t *r;
+ vec_foreach (sr, c->sub_rules)
+ {
+ void **d;
+ r = vec_elt_at_index (cm->parse_rules, sr->rule_index);
+ vec_add2 (cm->parse_rule_data, d, 1);
+ vec_reset_length (d[0]);
+ if (r->data_size)
+ d[0] = _vec_resize (d[0],
+ /* length increment */ 1,
+ r->data_size,
+ /* header_bytes */ 0,
+ /* data align */ sizeof (uword));
+ if (unformat_user (i, r->unformat_function, vm, d[0]))
+ {
+ *result = vec_elt_at_index (cm->commands, sr->command_index);
+ return 1;
+ }
+ }
+ }
+
+ match_bitmap = vlib_cli_sub_command_match (c, i);
+ is_unique = clib_bitmap_count_set_bits (match_bitmap) == 1;
+ index = ~0;
+ if (is_unique)
+ {
+ index = clib_bitmap_first_set (match_bitmap);
+ *result = get_sub_command (cm, c, index);
+ }
+ clib_bitmap_free (match_bitmap);
+
+ return is_unique;
+}
+
+static int
+vlib_cli_cmp_strings (void *a1, void *a2)
+{
+ u8 *c1 = *(u8 **) a1;
+ u8 *c2 = *(u8 **) a2;
+
+ return vec_cmp (c1, c2);
+}
+
+u8 **
+vlib_cli_get_possible_completions (u8 * str)
+{
+ vlib_cli_command_t *c;
+ vlib_cli_sub_command_t *sc;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_cli_main_t *vcm = &vm->cli_main;
+ uword *match_bitmap = 0;
+ uword index, is_unique, help_next_level;
+ u8 **result = 0;
+ unformat_input_t input;
+ unformat_init_vector (&input, vec_dup (str));
+ c = vec_elt_at_index (vcm->commands, 0);
+
+ /* remove trailing whitespace, except for one of them */
+ while (vec_len (input.buffer) >= 2 &&
+ isspace (input.buffer[vec_len (input.buffer) - 1]) &&
+ isspace (input.buffer[vec_len (input.buffer) - 2]))
+ {
+ vec_del1 (input.buffer, vec_len (input.buffer) - 1);
+ }
+
+ /* if input is empty, directly return list of root commands */
+ if (vec_len (input.buffer) == 0 ||
+ (vec_len (input.buffer) == 1 && isspace (input.buffer[0])))
+ {
+ vec_foreach (sc, c->sub_commands)
+ {
+ vec_add1 (result, (u8 *) sc->name);
+ }
+ goto done;
+ }
+
+ /* add a trailing '?' so that vlib_cli_sub_command_match can find
+ * all commands starting with the input string */
+ vec_add1 (input.buffer, '?');
+
+ while (1)
+ {
+ match_bitmap = vlib_cli_sub_command_match (c, &input);
+ /* no match: return no result */
+ if (match_bitmap == 0)
+ {
+ goto done;
+ }
+ is_unique = clib_bitmap_count_set_bits (match_bitmap) == 1;
+ /* unique match: try to step one subcommand level further */
+ if (is_unique)
+ {
+ /* stop if no more input */
+ if (input.index >= vec_len (input.buffer) - 1)
+ {
+ break;
+ }
+
+ index = clib_bitmap_first_set (match_bitmap);
+ c = get_sub_command (vcm, c, index);
+ clib_bitmap_free (match_bitmap);
+ continue;
+ }
+ /* multiple matches: stop here, return all matches */
+ break;
+ }
+
+ /* remove trailing '?' */
+ vec_del1 (input.buffer, vec_len (input.buffer) - 1);
+
+ /* if we have a space at the end of input, and a unique match,
+ * autocomplete the next level of subcommands */
+ help_next_level = (vec_len (str) == 0) || isspace (str[vec_len (str) - 1]);
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach(index, match_bitmap, {
+ if (help_next_level && is_unique) {
+ c = get_sub_command (vcm, c, index);
+ vec_foreach (sc, c->sub_commands) {
+ vec_add1 (result, (u8*) sc->name);
+ }
+ goto done; /* break doesn't work in this macro-loop */
+ }
+ sc = &c->sub_commands[index];
+ vec_add1(result, (u8*) sc->name);
+ });
+ /* *INDENT-ON* */
+
+done:
+ clib_bitmap_free (match_bitmap);
+ unformat_free (&input);
+
+ if (result)
+ vec_sort_with_function (result, vlib_cli_cmp_strings);
+ return result;
+}
+
+static u8 *
+format_vlib_cli_command_help (u8 * s, va_list * args)
+{
+ vlib_cli_command_t *c = va_arg (*args, vlib_cli_command_t *);
+ int is_long = va_arg (*args, int);
+ if (is_long && c->long_help)
+ s = format (s, "%s", c->long_help);
+ else if (c->short_help)
+ s = format (s, "%s", c->short_help);
+ else
+ s = format (s, "%v commands", c->path);
+ return s;
+}
+
+static u8 *
+format_vlib_cli_parse_rule_name (u8 * s, va_list * args)
+{
+ vlib_cli_parse_rule_t *r = va_arg (*args, vlib_cli_parse_rule_t *);
+ return format (s, "<%U>", format_c_identifier, r->name);
+}
+
+static u8 *
+format_vlib_cli_path (u8 * s, va_list * args)
+{
+ u8 *path = va_arg (*args, u8 *);
+ int i, in_rule;
+ in_rule = 0;
+ for (i = 0; i < vec_len (path); i++)
+ {
+ switch (path[i])
+ {
+ case '%':
+ in_rule = 1;
+ vec_add1 (s, '<'); /* start of <RULE> */
+ break;
+
+ case '_':
+ /* _ -> space in rules. */
+ vec_add1 (s, in_rule ? ' ' : '_');
+ break;
+
+ case ' ':
+ if (in_rule)
+ {
+ vec_add1 (s, '>'); /* end of <RULE> */
+ in_rule = 0;
+ }
+ vec_add1 (s, ' ');
+ break;
+
+ default:
+ vec_add1 (s, path[i]);
+ break;
+ }
+ }
+
+ if (in_rule)
+ vec_add1 (s, '>'); /* terminate <RULE> */
+
+ return s;
+}
+
+static vlib_cli_command_t *
+all_subs (vlib_cli_main_t * cm, vlib_cli_command_t * subs, u32 command_index)
+{
+ vlib_cli_command_t *c = vec_elt_at_index (cm->commands, command_index);
+ vlib_cli_sub_command_t *sc;
+ vlib_cli_sub_rule_t *sr;
+
+ if (c->function)
+ vec_add1 (subs, c[0]);
+
+ vec_foreach (sr, c->sub_rules)
+ subs = all_subs (cm, subs, sr->command_index);
+ vec_foreach (sc, c->sub_commands) subs = all_subs (cm, subs, sc->index);
+
+ return subs;
+}
+
+static int
+vlib_cli_cmp_rule (void *a1, void *a2)
+{
+ vlib_cli_sub_rule_t *r1 = a1;
+ vlib_cli_sub_rule_t *r2 = a2;
+
+ return vec_cmp (r1->name, r2->name);
+}
+
+static int
+vlib_cli_cmp_command (void *a1, void *a2)
+{
+ vlib_cli_command_t *c1 = a1;
+ vlib_cli_command_t *c2 = a2;
+
+ return vec_cmp (c1->path, c2->path);
+}
+
+static clib_error_t *
+vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
+ vlib_cli_main_t * cm,
+ unformat_input_t * input,
+ uword parent_command_index)
+{
+ vlib_cli_command_t *parent, *c;
+ clib_error_t *error = 0;
+ unformat_input_t sub_input;
+ u8 *string;
+ uword is_main_dispatch = cm == &vm->cli_main;
+
+ parent = vec_elt_at_index (cm->commands, parent_command_index);
+ if (is_main_dispatch && unformat (input, "help"))
+ {
+ uword help_at_end_of_line, i;
+
+ help_at_end_of_line =
+ unformat_check_input (input) == UNFORMAT_END_OF_INPUT;
+ while (1)
+ {
+ c = parent;
+ if (unformat_user
+ (input, unformat_vlib_cli_sub_command, vm, c, &parent))
+ ;
+
+ else if (!(unformat_check_input (input) == UNFORMAT_END_OF_INPUT))
+ goto unknown;
+
+ else
+ break;
+ }
+
+ /* help SUB-COMMAND => long format help.
+ "help" at end of line: show all commands. */
+ if (!help_at_end_of_line)
+ vlib_cli_output (vm, "%U", format_vlib_cli_command_help, c,
+ /* is_long */ 1);
+
+ else if (vec_len (c->sub_commands) + vec_len (c->sub_rules) == 0)
+ vlib_cli_output (vm, "%v: no sub-commands", c->path);
+
+ else
+ {
+ vlib_cli_sub_command_t *sc;
+ vlib_cli_sub_rule_t *sr, *subs;
+
+ subs = vec_dup (c->sub_rules);
+
+ /* Add in rules if any. */
+ vec_foreach (sc, c->sub_commands)
+ {
+ vec_add2 (subs, sr, 1);
+ sr->name = sc->name;
+ sr->command_index = sc->index;
+ sr->rule_index = ~0;
+ }
+
+ vec_sort_with_function (subs, vlib_cli_cmp_rule);
+
+ for (i = 0; i < vec_len (subs); i++)
+ {
+ vlib_cli_command_t *d;
+ vlib_cli_parse_rule_t *r;
+
+ d = vec_elt_at_index (cm->commands, subs[i].command_index);
+ r =
+ subs[i].rule_index != ~0 ? vec_elt_at_index (cm->parse_rules,
+ subs
+ [i].rule_index) :
+ 0;
+
+ if (r)
+ vlib_cli_output
+ (vm, " %-30U %U",
+ format_vlib_cli_parse_rule_name, r,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ else
+ vlib_cli_output
+ (vm, " %-30v %U",
+ subs[i].name,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ }
+
+ vec_free (subs);
+ }
+ }
+
+ else if (is_main_dispatch
+ && (unformat (input, "choices") || unformat (input, "?")))
+ {
+ vlib_cli_command_t *sub, *subs;
+
+ subs = all_subs (cm, 0, parent_command_index);
+ vec_sort_with_function (subs, vlib_cli_cmp_command);
+ vec_foreach (sub, subs)
+ vlib_cli_output (vm, " %-40U %U",
+ format_vlib_cli_path, sub->path,
+ format_vlib_cli_command_help, sub, /* is_long */ 0);
+ vec_free (subs);
+ }
+
+ else if (unformat (input, "comment %v", &string))
+ {
+ vec_free (string);
+ }
+
+ else if (unformat (input, "uncomment %U",
+ unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error =
+ vlib_cli_dispatch_sub_commands (vm, cm, &sub_input,
+ parent_command_index);
+ unformat_free (&sub_input);
+ }
+
+ else
+ if (unformat_user (input, unformat_vlib_cli_sub_command, vm, parent, &c))
+ {
+ unformat_input_t *si;
+ uword has_sub_commands =
+ vec_len (c->sub_commands) + vec_len (c->sub_rules) > 0;
+
+ si = input;
+ if (unformat_user (input, unformat_vlib_cli_sub_input, &sub_input))
+ si = &sub_input;
+
+ if (has_sub_commands)
+ error = vlib_cli_dispatch_sub_commands (vm, cm, si, c - cm->commands);
+
+ if (has_sub_commands && !error)
+ /* Found valid sub-command. */ ;
+
+ else if (c->function)
+ {
+ clib_error_t *c_error;
+
+ /* Skip white space for benefit of called function. */
+ unformat_skip_white_space (si);
+
+ if (unformat (si, "?"))
+ {
+ vlib_cli_output (vm, " %-40U %U", format_vlib_cli_path, c->path, format_vlib_cli_command_help, c, /* is_long */
+ 0);
+ }
+ else
+ {
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_sync (vm);
+
+ c_error = c->function (vm, si, c);
+
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_release (vm);
+
+ if (c_error)
+ {
+ error =
+ clib_error_return (0, "%v: %v", c->path, c_error->what);
+ clib_error_free (c_error);
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+
+ return error;
+ }
+ }
+
+ /* Free any previous error. */
+ clib_error_free (error);
+ }
+
+ else if (!error)
+ error = clib_error_return (0, "%v: no sub-commands", c->path);
+
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+ }
+
+ else
+ goto unknown;
+
+ return error;
+
+unknown:
+ if (parent->path)
+ return clib_error_return (0, "%v: unknown input `%U'", parent->path,
+ format_unformat_error, input);
+ else
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+}
+
+
+void vlib_unix_error_report (vlib_main_t *, clib_error_t *)
+ __attribute__ ((weak));
+
+void
+vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+}
+
+/* Process CLI input. */
+void
+vlib_cli_input (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function, uword function_arg)
+{
+ vlib_process_t *cp = vlib_get_current_process (vm);
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error;
+ vlib_cli_output_function_t *save_function;
+ uword save_function_arg;
+
+ save_function = cp->output_function;
+ save_function_arg = cp->output_function_arg;
+
+ cp->output_function = function;
+ cp->output_function_arg = function_arg;
+
+ do
+ {
+ vec_reset_length (cm->parse_rule_data);
+ error = vlib_cli_dispatch_sub_commands (vm, &vm->cli_main, input, /* parent */
+ 0);
+ }
+ while (!error && !unformat (input, "%U", unformat_eof));
+
+ if (error)
+ {
+ vlib_cli_output (vm, "%v", error->what);
+ vlib_unix_error_report (vm, error);
+ clib_error_free (error);
+ }
+
+ cp->output_function = save_function;
+ cp->output_function_arg = save_function_arg;
+}
+
+/* Output to current CLI connection. */
+void
+vlib_cli_output (vlib_main_t * vm, char *fmt, ...)
+{
+ vlib_process_t *cp = vlib_get_current_process (vm);
+ va_list va;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* Terminate with \n if not present. */
+ if (vec_len (s) > 0 && s[vec_len (s) - 1] != '\n')
+ vec_add1 (s, '\n');
+
+ if ((!cp) || (!cp->output_function))
+ fformat (stdout, "%v", s);
+ else
+ cp->output_function (cp->output_function_arg, s, vec_len (s));
+
+ vec_free (s);
+}
+
+static clib_error_t *
+show_memory_usage (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int verbose = 0;
+ clib_error_t *error;
+ u32 index = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ return error;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ vlib_cli_output (vm, "Thread %d %v\n", index, vlib_worker_threads[index].name);
+ vlib_cli_output (vm, "%U\n", format_mheap, clib_per_cpu_mheaps[index], verbose);
+ index++;
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
+ .path = "show memory",
+ .short_help = "Show current memory usage",
+ .function = show_memory_usage,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c);
+ _("Model name", "%U", format_cpu_model_name);
+ _("Microarchitecture", "%U", format_cpu_uarch);
+ _("Flags", "%U", format_cpu_flags);
+ _("Base frequency", "%.2f GHz",
+ ((f64) vm->clib_time.clocks_per_second) * 1e-9);
+#undef _
+ return 0;
+}
+
+/*?
+ * Displays various information about the CPU.
+ *
+ * @cliexpar
+ * @cliexstart{show cpu}
+ * Model name: Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz
+ * Microarchitecture: Broadwell (Broadwell-EP/EX)
+ * Flags: sse3 ssse3 sse41 sse42 avx avx2 aes
+ * Base Frequency: 3.20 GHz
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_cpu_command, static) = {
+ .path = "show cpu",
+ .short_help = "Show cpu information",
+ .function = show_cpu,
+};
+
+/* *INDENT-ON* */
+static clib_error_t *
+enable_disable_memory_trace (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ int enable;
+
+ if (!unformat_user (input, unformat_vlib_enable_disable, &enable))
+ {
+ error = clib_error_return (0, "expecting enable/on or disable/off");
+ goto done;
+ }
+
+ clib_mem_trace (enable);
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = {
+ .path = "memory-trace",
+ .short_help = "Enable/disable memory allocation trace",
+ .function = enable_disable_memory_trace,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ void *heap;
+ mheap_t *mheap;
+
+ if (unformat (input, "on"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
+ mheap = mheap_header(heap);
+ mheap->flags |= MHEAP_FLAG_VALIDATE;
+ // Turn off small object cache because it delays detection of errors
+ mheap->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+ /* *INDENT-ON* */
+
+ }
+ else if (unformat (input, "off"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
+ mheap = mheap_header(heap);
+ mheap->flags &= ~MHEAP_FLAG_VALIDATE;
+ mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+ /* *INDENT-ON* */
+ }
+ else if (unformat (input, "now"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
+ mheap = mheap_header(heap);
+ mheap_validate(heap);
+ });
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "heap validation complete");
+
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_heap_validate,static) = {
+ .path = "test heap-validate",
+ .short_help = "<on/off/now> validate heap on future allocs/frees or right now",
+ .function = test_heap_validate,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ char *newenviron[] = { NULL };
+
+ execve (vm->name, (char **) vm->argv, newenviron);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (restart_cmd,static) = {
+ .path = "restart",
+ .short_help = "restart process",
+ .function = restart_cmd_fn,
+};
+/* *INDENT-ON* */
+
+#ifdef TEST_CODE
+/*
+ * A trivial test harness to verify the per-process output_function
+ * is working correcty.
+ */
+
+static clib_error_t *
+sleep_ten_seconds (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u16 i;
+ u16 my_id = rand ();
+
+ vlib_cli_output (vm, "Starting 10 seconds sleep with id %u\n", my_id);
+
+ for (i = 0; i < 10; i++)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ vlib_cli_output (vm, "Iteration number %u, my id: %u\n", i, my_id);
+ }
+ vlib_cli_output (vm, "Done with sleep with id %u\n", my_id);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ping_command, static) = {
+ .path = "test sleep",
+ .function = sleep_ten_seconds,
+ .short_help = "Sleep for 10 seconds",
+};
+/* *INDENT-ON* */
+#endif /* ifdef TEST_CODE */
+
+static uword
+vlib_cli_normalize_path (char *input, char **result)
+{
+ char *i = input;
+ char *s = 0;
+ uword l = 0;
+ uword index_of_last_space = ~0;
+
+ while (*i != 0)
+ {
+ u8 c = *i++;
+ /* Multiple white space -> single space. */
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (l > 0 && s[l - 1] != ' ')
+ {
+ vec_add1 (s, ' ');
+ l++;
+ }
+ break;
+
+ default:
+ if (l > 0 && s[l - 1] == ' ')
+ index_of_last_space = vec_len (s);
+ vec_add1 (s, c);
+ l++;
+ break;
+ }
+ }
+
+ /* Remove any extra space at end. */
+ if (l > 0 && s[l - 1] == ' ')
+ _vec_len (s) -= 1;
+
+ *result = s;
+ return index_of_last_space;
+}
+
+always_inline uword
+parent_path_len (char *path)
+{
+ word i;
+ for (i = vec_len (path) - 1; i >= 0; i--)
+ {
+ if (path[i] == ' ')
+ return i;
+ }
+ return ~0;
+}
+
+static void
+add_sub_command (vlib_cli_main_t * cm, uword parent_index, uword child_index)
+{
+ vlib_cli_command_t *p, *c;
+ vlib_cli_sub_command_t *sub_c;
+ u8 *sub_name;
+ word i, l;
+
+ p = vec_elt_at_index (cm->commands, parent_index);
+ c = vec_elt_at_index (cm->commands, child_index);
+
+ l = parent_path_len (c->path);
+ if (l == ~0)
+ sub_name = vec_dup ((u8 *) c->path);
+ else
+ {
+ ASSERT (l + 1 < vec_len (c->path));
+ sub_name = 0;
+ vec_add (sub_name, c->path + l + 1, vec_len (c->path) - (l + 1));
+ }
+
+ if (sub_name[0] == '%')
+ {
+ uword *q;
+ vlib_cli_sub_rule_t *sr;
+
+ /* Remove %. */
+ vec_delete (sub_name, 1, 0);
+
+ if (!p->sub_rule_index_by_name)
+ p->sub_rule_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (sub_name[0]),
+ sizeof (uword));
+ q = hash_get_mem (p->sub_rule_index_by_name, sub_name);
+ if (q)
+ {
+ sr = vec_elt_at_index (p->sub_rules, q[0]);
+ ASSERT (sr->command_index == child_index);
+ return;
+ }
+
+ q = hash_get_mem (cm->parse_rule_index_by_name, sub_name);
+ if (!q)
+ {
+ clib_error ("reference to unknown rule `%%%v' in path `%v'",
+ sub_name, c->path);
+ return;
+ }
+
+ hash_set_mem (p->sub_rule_index_by_name, sub_name,
+ vec_len (p->sub_rules));
+ vec_add2 (p->sub_rules, sr, 1);
+ sr->name = sub_name;
+ sr->rule_index = q[0];
+ sr->command_index = child_index;
+ return;
+ }
+
+ if (!p->sub_command_index_by_name)
+ p->sub_command_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* Check if sub-command has already been created. */
+ if (hash_get_mem (p->sub_command_index_by_name, sub_name))
+ {
+ vec_free (sub_name);
+ return;
+ }
+
+ vec_add2 (p->sub_commands, sub_c, 1);
+ sub_c->index = child_index;
+ sub_c->name = sub_name;
+ hash_set_mem (p->sub_command_index_by_name, sub_c->name,
+ sub_c - p->sub_commands);
+
+ vec_validate (p->sub_command_positions, vec_len (sub_c->name) - 1);
+ for (i = 0; i < vec_len (sub_c->name); i++)
+ {
+ int n;
+ vlib_cli_parse_position_t *pos;
+
+ pos = vec_elt_at_index (p->sub_command_positions, i);
+
+ if (!pos->bitmaps)
+ pos->min_char = sub_c->name[i];
+
+ n = sub_c->name[i] - pos->min_char;
+ if (n < 0)
+ {
+ pos->min_char = sub_c->name[i];
+ vec_insert (pos->bitmaps, -n, 0);
+ n = 0;
+ }
+
+ vec_validate (pos->bitmaps, n);
+ pos->bitmaps[n] =
+ clib_bitmap_ori (pos->bitmaps[n], sub_c - p->sub_commands);
+ }
+}
+
+static void
+vlib_cli_make_parent (vlib_cli_main_t * cm, uword ci)
+{
+ uword p_len, pi, *p;
+ char *p_path;
+ vlib_cli_command_t *c, *parent;
+
+ /* Root command (index 0) should have already been added. */
+ ASSERT (vec_len (cm->commands) > 0);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ p_len = parent_path_len (c->path);
+
+ /* No space? Parent is root command. */
+ if (p_len == ~0)
+ {
+ add_sub_command (cm, 0, ci);
+ return;
+ }
+
+ p_path = 0;
+ vec_add (p_path, c->path, p_len);
+
+ p = hash_get_mem (cm->command_index_by_path, p_path);
+
+ /* Parent exists? */
+ if (!p)
+ {
+ /* Parent does not exist; create it. */
+ vec_add2 (cm->commands, parent, 1);
+ parent->path = p_path;
+ hash_set_mem (cm->command_index_by_path, parent->path,
+ parent - cm->commands);
+ pi = parent - cm->commands;
+ }
+ else
+ {
+ pi = p[0];
+ vec_free (p_path);
+ }
+
+ add_sub_command (cm, pi, ci);
+
+ /* Create parent's parent. */
+ if (!p)
+ vlib_cli_make_parent (cm, pi);
+}
+
+always_inline uword
+vlib_cli_command_is_empty (vlib_cli_command_t * c)
+{
+ return (c->long_help == 0 && c->short_help == 0 && c->function == 0);
+}
+
+clib_error_t *
+vlib_cli_register (vlib_main_t * vm, vlib_cli_command_t * c)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error = 0;
+ uword ci, *p;
+ char *normalized_path;
+
+ if ((error = vlib_call_init_function (vm, vlib_cli_init)))
+ return error;
+
+ (void) vlib_cli_normalize_path (c->path, &normalized_path);
+
+ if (!cm->command_index_by_path)
+ cm->command_index_by_path = hash_create_vec ( /* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* See if command already exists with given path. */
+ p = hash_get_mem (cm->command_index_by_path, normalized_path);
+ if (p)
+ {
+ vlib_cli_command_t *d;
+
+ ci = p[0];
+ d = vec_elt_at_index (cm->commands, ci);
+
+ /* If existing command was created via vlib_cli_make_parent
+ replaced it with callers data. */
+ if (vlib_cli_command_is_empty (d))
+ {
+ vlib_cli_command_t save = d[0];
+
+ ASSERT (!vlib_cli_command_is_empty (c));
+
+ /* Copy callers fields. */
+ d[0] = c[0];
+
+ /* Save internal fields. */
+ d->path = save.path;
+ d->sub_commands = save.sub_commands;
+ d->sub_command_index_by_name = save.sub_command_index_by_name;
+ d->sub_command_positions = save.sub_command_positions;
+ d->sub_rules = save.sub_rules;
+ }
+ else
+ error =
+ clib_error_return (0, "duplicate command name with path %v",
+ normalized_path);
+
+ vec_free (normalized_path);
+ if (error)
+ return error;
+ }
+ else
+ {
+ /* Command does not exist: create it. */
+
+ /* Add root command (index 0). */
+ if (vec_len (cm->commands) == 0)
+ {
+ /* Create command with index 0; path is empty string. */
+ vec_resize (cm->commands, 1);
+ }
+
+ ci = vec_len (cm->commands);
+ hash_set_mem (cm->command_index_by_path, normalized_path, ci);
+ vec_add1 (cm->commands, c[0]);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ c->path = normalized_path;
+
+ /* Don't inherit from registration. */
+ c->sub_commands = 0;
+ c->sub_command_index_by_name = 0;
+ c->sub_command_positions = 0;
+ }
+
+ vlib_cli_make_parent (cm, ci);
+ return 0;
+}
+
+clib_error_t *
+vlib_cli_register_parse_rule (vlib_main_t * vm, vlib_cli_parse_rule_t * r_reg)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ vlib_cli_parse_rule_t *r;
+ clib_error_t *error = 0;
+ u8 *r_name;
+ uword *p;
+
+ if (!cm->parse_rule_index_by_name)
+ cm->parse_rule_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (r->name[0]),
+ sizeof (uword));
+
+ /* Make vector copy of name. */
+ r_name = format (0, "%s", r_reg->name);
+
+ if ((p = hash_get_mem (cm->parse_rule_index_by_name, r_name)))
+ {
+ vec_free (r_name);
+ return clib_error_return (0, "duplicate parse rule name `%s'",
+ r_reg->name);
+ }
+
+ vec_add2 (cm->parse_rules, r, 1);
+ r[0] = r_reg[0];
+ r->name = (char *) r_name;
+ hash_set_mem (cm->parse_rule_index_by_name, r->name, r - cm->parse_rules);
+
+ return error;
+}
+
+#if 0
+/* $$$ turn back on again someday, maybe */
+static clib_error_t *vlib_cli_register_parse_rules (vlib_main_t * vm,
+ vlib_cli_parse_rule_t *
+ lo,
+ vlib_cli_parse_rule_t *
+ hi)
+ __attribute__ ((unused))
+{
+ clib_error_t *error = 0;
+ vlib_cli_parse_rule_t *r;
+
+ for (r = lo; r < hi; r = clib_elf_section_data_next (r, 0))
+ {
+ if (!r->name || strlen (r->name) == 0)
+ {
+ error = clib_error_return (0, "parse rule with no name");
+ goto done;
+ }
+
+ error = vlib_cli_register_parse_rule (vm, r);
+ if (error)
+ goto done;
+ }
+
+done:
+ return error;
+}
+#endif
+
+static int
+cli_path_compare (void *a1, void *a2)
+{
+ u8 **s1 = a1;
+ u8 **s2 = a2;
+
+ if ((vec_len (*s1) < vec_len (*s2)) &&
+ memcmp ((char *) *s1, (char *) *s2, vec_len (*s1)) == 0)
+ return -1;
+
+
+ if ((vec_len (*s1) > vec_len (*s2)) &&
+ memcmp ((char *) *s1, (char *) *s2, vec_len (*s2)) == 0)
+ return 1;
+
+ return vec_cmp (*s1, *s2);
+}
+
+static clib_error_t *
+show_cli_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ vlib_cli_command_t *cli;
+ u8 **paths = 0, **s;
+
+ /* *INDENT-OFF* */
+ vec_foreach (cli, cm->commands)
+ if (vec_len (cli->path) > 0)
+ vec_add1 (paths, (u8 *) cli->path);
+
+ vec_sort_with_function (paths, cli_path_compare);
+
+ vec_foreach (s, paths)
+ vlib_cli_output (vm, "%v", *s);
+ /* *INDENT-ON* */
+
+ vec_free (paths);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_cli_command, static) = {
+ .path = "show cli",
+ .short_help = "Show cli commands",
+ .function = show_cli_cmd_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vlib_cli_init (vlib_main_t * vm)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error = 0;
+ vlib_cli_command_t *cmd;
+
+ cmd = cm->cli_command_registrations;
+
+ while (cmd)
+ {
+ error = vlib_cli_register (vm, cmd);
+ if (error)
+ return error;
+ cmd = cmd->next_cli_command;
+ }
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vlib_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli.h b/src/vlib/cli.h
new file mode 100644
index 00000000..e713808f
--- /dev/null
+++ b/src/vlib/cli.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.h: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_h
+#define included_vlib_cli_h
+
+#include <vppinfra/format.h>
+
+struct vlib_cli_command_t;
+
+typedef struct
+{
+ u32 min_char;
+
+ /* Indexed by name[position] - min_char. */
+ uword **bitmaps;
+} vlib_cli_parse_position_t;
+
+typedef struct
+{
+ u8 *name;
+
+ u32 index;
+} vlib_cli_sub_command_t;
+
+typedef struct
+{
+ u8 *name;
+
+ u32 rule_index;
+
+ u32 command_index;
+} vlib_cli_sub_rule_t;
+
+typedef struct
+{
+ char *name;
+ char *short_help;
+ char *long_help;
+
+ /* Number of bytes in parsed data. Zero for vector. */
+ uword data_size;
+
+ unformat_function_t *unformat_function;
+
+ /* Opaque for unformat function. */
+ uword unformat_function_arg[2];
+} vlib_cli_parse_rule_t;
+
+/* CLI command callback function. */
+typedef clib_error_t *(vlib_cli_command_function_t)
+ (struct vlib_main_t * vm,
+ unformat_input_t * input, struct vlib_cli_command_t * cmd);
+
+typedef struct vlib_cli_command_t
+{
+ /* Command path (e.g. "show something").
+ Spaces delimit elements of path. */
+ char *path;
+
+ /* Short/long help strings. */
+ char *short_help;
+ char *long_help;
+
+ /* Callback function. */
+ vlib_cli_command_function_t *function;
+
+ /* Opaque. */
+ uword function_arg;
+
+ /* Known MP-safe? */
+ uword is_mp_safe;
+
+ /* Sub commands for this command. */
+ vlib_cli_sub_command_t *sub_commands;
+
+ /* Hash table mapping name (e.g. last path element) to sub command index. */
+ uword *sub_command_index_by_name;
+
+ /* bitmap[p][c][i] says whether sub-command i has character
+ c in position p. */
+ vlib_cli_parse_position_t *sub_command_positions;
+
+ /* Hash table mapping name (e.g. last path element) to sub rule index. */
+ uword *sub_rule_index_by_name;
+
+ /* Vector of possible parse rules for this path. */
+ vlib_cli_sub_rule_t *sub_rules;
+
+ /* List of CLI commands, built by constructors */
+ struct vlib_cli_command_t *next_cli_command;
+
+} vlib_cli_command_t;
+
+typedef void (vlib_cli_output_function_t) (uword arg,
+ u8 * buffer, uword buffer_bytes);
+typedef struct
+{
+ /* Vector of all known commands. */
+ vlib_cli_command_t *commands;
+
+ /* Hash table mapping normalized path to index into all_commands. */
+ uword *command_index_by_path;
+
+ /* Vector of all known parse rules. */
+ vlib_cli_parse_rule_t *parse_rules;
+
+ /* Hash table mapping parse rule name to index into parse_rule vector. */
+ uword *parse_rule_index_by_name;
+
+ /* Data parsed for rules. */
+ void **parse_rule_data;
+
+ /* registration list added by constructors */
+ vlib_cli_command_t *cli_command_registrations;
+} vlib_cli_main_t;
+
+#define VLIB_CLI_COMMAND(x,...) \
+ __VA_ARGS__ vlib_cli_command_t x; \
+static void __vlib_cli_command_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_cli_command_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ vlib_cli_main_t *cm = &vm->cli_main; \
+ x.next_cli_command = cm->cli_command_registrations; \
+ cm->cli_command_registrations = &x; \
+} \
+__VA_ARGS__ vlib_cli_command_t x
+#define VLIB_CLI_PARSE_RULE(x) \
+ vlib_cli_parse_rule_t x
+/* Output to current CLI connection. */
+void vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...);
+
+/* Process CLI input. */
+void vlib_cli_input (struct vlib_main_t *vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function,
+ uword function_arg);
+
+clib_error_t *vlib_cli_register (struct vlib_main_t *vm,
+ vlib_cli_command_t * c);
+clib_error_t *vlib_cli_register_parse_rule (struct vlib_main_t *vm,
+ vlib_cli_parse_rule_t * c);
+
+uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args);
+
+/* Return an vector of strings consisting of possible auto-completions
+ * for a given input string */
+u8 **vlib_cli_get_possible_completions (u8 * input_str);
+
+#endif /* included_vlib_cli_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli_funcs.h b/src/vlib/cli_funcs.h
new file mode 100644
index 00000000..78aef73b
--- /dev/null
+++ b/src/vlib/cli_funcs.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli_funcs.h: VLIB CLI related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_funcs_h
+#define included_vlib_cli_funcs_h
+
+always_inline void *
+vlib_cli_get_parse_rule_result (vlib_main_t * vm, uword index)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ return vec_elt (cm->parse_rule_data, index);
+}
+
+#endif /* included_vlib_cli_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/counter.c b/src/vlib/counter.c
new file mode 100644
index 00000000..62f4bd66
--- /dev/null
+++ b/src/vlib/counter.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.c: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+void
+vlib_clear_simple_counters (vlib_simple_counter_main_t * cm)
+{
+ counter_t *my_counters;
+ uword i, j;
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+
+ for (j = 0; j < vec_len (my_counters); j++)
+ {
+ my_counters[j] = 0;
+ }
+ }
+}
+
+void
+vlib_clear_combined_counters (vlib_combined_counter_main_t * cm)
+{
+ vlib_counter_t *my_counters;
+ uword i, j;
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+
+ for (j = 0; j < vec_len (my_counters); j++)
+ {
+ my_counters[j].packets = 0;
+ my_counters[j].bytes = 0;
+ }
+ }
+}
+
+void
+vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int i;
+
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
+}
+
+void
+vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int i;
+
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
+}
+
+u32
+vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * cm)
+{
+ ASSERT (cm->counters);
+ return (vec_len (cm->counters[0]));
+}
+
+u32
+vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm)
+{
+ ASSERT (cm->counters);
+ return (vec_len (cm->counters[0]));
+}
+
+void
+serialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+unserialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+serialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+unserialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
new file mode 100644
index 00000000..60e2055d
--- /dev/null
+++ b/src/vlib/counter.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.h: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_counter_h
+#define included_vlib_counter_h
+
+/** \file
+
+ Optimized thread-safe counters.
+
+ Each vlib_[simple|combined]_counter_main_t consists of a per-thread
+ vector of per-object counters.
+
+ The idea is to drastically eliminate atomic operations.
+*/
+
+/** 64bit counters */
+typedef u64 counter_t;
+
+/** A collection of simple counters */
+
+typedef struct
+{
+ counter_t **counters; /**< Per-thread u64 non-atomic counters */
+ counter_t *value_at_last_serialize; /**< Values as of last serialize. */
+ u32 last_incremental_serialize_index; /**< Last counter index
+ serialized incrementally. */
+
+ char *name; /**< The counter collection's name. */
+} vlib_simple_counter_main_t;
+
+/** The number of counters (not the number of per-thread counters) */
+u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
+
+/** Increment a simple counter
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param thread_index - (u32) the current cpu index
+ @param index - (u32) index of the counter to increment
+ @param increment - (u64) quantitiy to add to the counter
+*/
+always_inline void
+vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
+ u32 thread_index, u32 index, u64 increment)
+{
+ counter_t *my_counters;
+
+ my_counters = cm->counters[thread_index];
+ my_counters[index] += increment;
+}
+
+/** Get the value of a simple counter
+ Scrapes the entire set of per-thread counters. Innacurate unless
+ worker threads which might increment the counter are
+ barrier-synchronized
+
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param index - (u32) index of the counter to fetch
+ @returns - (u64) current counter value
+*/
+always_inline counter_t
+vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ counter_t *my_counters;
+ counter_t v;
+ int i;
+
+ ASSERT (index < vlib_simple_counter_n_counters (cm));
+
+ v = 0;
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+ v += my_counters[index];
+ }
+
+ return v;
+}
+
+/** Clear a simple counter
+ Clears the set of per-thread u16 counters, and the u64 counter
+
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param index - (u32) index of the counter to clear
+*/
+always_inline void
+vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ counter_t *my_counters;
+ int i;
+
+ ASSERT (index < vlib_simple_counter_n_counters (cm));
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+ my_counters[index] = 0;
+ }
+}
+
+/** Combined counter to hold both packets and byte differences.
+ */
+typedef struct
+{
+ counter_t packets; /**< packet counter */
+ counter_t bytes; /**< byte counter */
+} vlib_counter_t;
+
+/** Add two combined counters, results in the first counter
+ @param [in,out] a - (vlib_counter_t *) dst counter
+ @param b - (vlib_counter_t *) src counter
+*/
+
+always_inline void
+vlib_counter_add (vlib_counter_t * a, vlib_counter_t * b)
+{
+ a->packets += b->packets;
+ a->bytes += b->bytes;
+}
+
+/** Subtract combined counters, results in the first counter
+ @param [in,out] a - (vlib_counter_t *) dst counter
+ @param b - (vlib_counter_t *) src counter
+*/
+always_inline void
+vlib_counter_sub (vlib_counter_t * a, vlib_counter_t * b)
+{
+ ASSERT (a->packets >= b->packets);
+ ASSERT (a->bytes >= b->bytes);
+ a->packets -= b->packets;
+ a->bytes -= b->bytes;
+}
+
+/** Clear a combined counter
+ @param a - (vlib_counter_t *) counter to clear
+*/
+always_inline void
+vlib_counter_zero (vlib_counter_t * a)
+{
+ a->packets = a->bytes = 0;
+}
+
+/** A collection of combined counters */
+typedef struct
+{
+ vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */
+ vlib_counter_t *value_at_last_serialize; /**< Counter values as of last serialize. */
+ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */
+ char *name; /**< The counter collection's name. */
+} vlib_combined_counter_main_t;
+
+/** The number of counters (not the number of per-thread counters) */
+u32 vlib_combined_counter_n_counters (const vlib_combined_counter_main_t *
+ cm);
+
+/** Clear a collection of simple counters
+ @param cm - (vlib_simple_counter_main_t *) collection to clear
+*/
+void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm);
+
+/** Clear a collection of combined counters
+ @param cm - (vlib_combined_counter_main_t *) collection to clear
+*/
+void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
+
+/** Increment a combined counter
+ @param cm - (vlib_combined_counter_main_t *) comined counter main pointer
+ @param thread_index - (u32) the current cpu index
+ @param index - (u32) index of the counter to increment
+ @param packet_increment - (u64) number of packets to add to the counter
+ @param byte_increment - (u64) number of bytes to add to the counter
+*/
+
+always_inline void
+vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 thread_index,
+ u32 index, u64 n_packets, u64 n_bytes)
+{
+ vlib_counter_t *my_counters;
+
+ /* Use this CPU's counter array */
+ my_counters = cm->counters[thread_index];
+
+ my_counters[index].packets += n_packets;
+ my_counters[index].bytes += n_bytes;
+}
+
+/** Pre-fetch a per-thread combined counter for the given object index */
+always_inline void
+vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
+ u32 thread_index, u32 index)
+{
+ vlib_counter_t *cpu_counters;
+
+ /*
+ * This CPU's index is assumed to already be in cache
+ */
+ cpu_counters = cm->counters[thread_index];
+ CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+
+/** Get the value of a combined counter, never called in the speed path
+ Scrapes the entire set of per-thread counters. Innacurate unless
+ worker threads which might increment the counter are
+ barrier-synchronized
+
+ @param cm - (vlib_combined_counter_main_t *) combined counter main pointer
+ @param index - (u32) index of the combined counter to fetch
+ @param result [out] - (vlib_counter_t *) result stored here
+*/
+
+static inline void
+vlib_get_combined_counter (const vlib_combined_counter_main_t * cm,
+ u32 index, vlib_counter_t * result)
+{
+ vlib_counter_t *my_counters, *counter;
+ int i;
+
+ result->packets = 0;
+ result->bytes = 0;
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+
+ counter = vec_elt_at_index (my_counters, index);
+ result->packets += counter->packets;
+ result->bytes += counter->bytes;
+ }
+}
+
+/** Clear a combined counter
+ Clears the set of per-thread counters.
+
+ @param cm - (vlib_combined_counter_main_t *) combined counter main pointer
+ @param index - (u32) index of the counter to clear
+*/
+always_inline void
+vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
+{
+ vlib_counter_t *my_counters, *counter;
+ int i;
+
+ for (i = 0; i < vec_len (cm->counters); i++)
+ {
+ my_counters = cm->counters[i];
+
+ counter = vec_elt_at_index (my_counters, index);
+ counter->packets = 0;
+ counter->bytes = 0;
+ }
+}
+
+/** validate a simple counter
+ @param cm - (vlib_simple_counter_main_t *) pointer to the counter collection
+ @param index - (u32) index of the counter to validate
+*/
+
+void vlib_validate_simple_counter (vlib_simple_counter_main_t * cm,
+ u32 index);
+/** validate a combined counter
+ @param cm - (vlib_combined_counter_main_t *) pointer to the counter
+ collection
+ @param index - (u32) index of the counter to validate
+*/
+
+void vlib_validate_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 index);
+
+/** Obtain the number of simple or combined counters allocated.
+ A macro which reduces to to vec_len(cm->maxi), the answer in either
+ case.
+
+ @param cm - (vlib_simple_counter_main_t) or
+ (vlib_combined_counter_main_t) the counter collection to interrogate
+ @returns vec_len(cm->maxi)
+*/
+#define vlib_counter_len(cm) vec_len((cm)->maxi)
+
+serialize_function_t serialize_vlib_simple_counter_main,
+ unserialize_vlib_simple_counter_main;
+serialize_function_t serialize_vlib_combined_counter_main,
+ unserialize_vlib_combined_counter_main;
+
+#endif /* included_vlib_counter_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/defs.h b/src/vlib/defs.h
new file mode 100644
index 00000000..ad58bc04
--- /dev/null
+++ b/src/vlib/defs.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * defs.h: VLIB generic C definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_defs_h
+#define included_vlib_defs_h
+
+/* Receive or transmit. */
+typedef enum
+{
+ VLIB_RX,
+ VLIB_TX,
+ VLIB_N_RX_TX = 2, /* Used to size arrays. */
+} vlib_rx_or_tx_t;
+
+#define vlib_foreach_rx_tx(v) for (v = 0; v < VLIB_N_RX_TX; v++)
+
+/* Read/write. */
+typedef enum
+{
+ VLIB_READ,
+ VLIB_WRITE,
+} vlib_read_or_write_t;
+
+/* Up/down. */
+typedef enum
+{
+ VLIB_DOWN = 0,
+ VLIB_UP = 1,
+} vlib_up_or_down_t;
+
+/* Enable/disable. */
+typedef enum
+{
+ VLIB_DISABLE = 0,
+ VLIB_ENABLE = 1,
+} vlib_enable_or_disable_t;
+
+#endif /* included_vlib_defs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/dir.dox b/src/vlib/dir.dox
new file mode 100644
index 00000000..4806e7a9
--- /dev/null
+++ b/src/vlib/dir.dox
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Doxygen directory documentation */
+/**
+@dir
+@brief VLIB application library source.
+*/
+/*? %%clicmd:group_label VLIB application library%% ?*/
+
diff --git a/src/vlib/elog_samples.c b/src/vlib/elog_samples.c
new file mode 100644
index 00000000..a8c800df
--- /dev/null
+++ b/src/vlib/elog_samples.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/elog.h>
+
+static inline void
+elog_four_int_sample (u32 * data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "four int: first %d second %d third %d fourth %d",.format_args =
+ "i4i4i4i4",};
+ struct
+ {
+ u32 data[4];
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->data[0] = data[0];
+ ed->data[1] = data[1];
+ ed->data[2] = data[2];
+ ed->data[3] = data[3];
+}
+
+static inline void
+elog_four_int_track_sample (u32 * data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format =
+ "four_int_track: first %d second %d third %d fourth %d",.format_args =
+ "i4i4i4i4",};
+ struct
+ {
+ u32 data[4];
+ } *ed;
+ ELOG_TRACK (sample_track);
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, sample_track);
+ ed->data[0] = data[0];
+ ed->data[1] = data[1];
+ ed->data[2] = data[2];
+ ed->data[3] = data[3];
+}
+
+static inline void
+elog_enum_sample (u8 which)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "my enum: %s",.format_args = "t1",.n_enum_strings =
+ 2,.enum_strings =
+ {
+ "string 1", "string 2",},};
+ struct
+ {
+ u8 which;
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->which = which;
+}
+
+static inline void
+elog_one_datum_sample (u32 data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "one datum: %d",.format_args = "i4",};
+
+ elog (&vlib_global_main.elog_main, &e, data);
+}
+
+static clib_error_t *
+test_elog_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int i;
+ u32 samples[4];
+
+ for (i = 0; i < 10; i++)
+ {
+ samples[0] = i;
+ samples[1] = i + 1;
+ samples[2] = i + 2;
+ samples[3] = i + 3;
+
+ elog_four_int_sample (samples);
+ elog_four_int_track_sample (samples);
+ elog_enum_sample (0);
+ elog_enum_sample (1);
+ elog_one_datum_sample (i);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_elog_command, static) = {
+ .path = "test elog sample",
+ .short_help = "test elog sample",
+ .function = test_elog_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error.c b/src/vlib/error.c
new file mode 100644
index 00000000..dec90bbe
--- /dev/null
+++ b/src/vlib/error.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.c: VLIB error handler
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/heap.h>
+
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 next_index,
+ u32 drop_error_node, u32 drop_error_code)
+{
+ u32 n_left_this_frame, n_buffers_left, *args, n_args_left;
+ vlib_error_t drop_error;
+
+ drop_error = vlib_error_set (drop_error_node, drop_error_code);
+
+ n_buffers_left = n_buffers;
+ while (n_buffers_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, args, n_args_left);
+
+ n_left_this_frame = clib_min (n_buffers_left, n_args_left);
+ n_buffers_left -= n_left_this_frame;
+ n_args_left -= n_left_this_frame;
+
+ while (n_left_this_frame >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+
+ args[0] = bi0 = buffers[0];
+ args[1] = bi1 = buffers[1];
+ args[2] = bi2 = buffers[2];
+ args[3] = bi3 = buffers[3];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ b0->error = drop_error;
+ b1->error = drop_error;
+ b2->error = drop_error;
+ b3->error = drop_error;
+
+ buffers += 4;
+ args += 4;
+ n_left_this_frame -= 4;
+ }
+
+ while (n_left_this_frame >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ args[0] = bi0 = buffers[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = drop_error;
+
+ buffers += 1;
+ args += 1;
+ n_left_this_frame -= 1;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_args_left);
+ }
+
+ return n_buffers;
+}
+
+/* Convenience node to drop a vector of buffers with a "misc error". */
+static uword
+misc_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return vlib_error_drop_buffers (vm, node, vlib_frame_args (frame),
+ /* buffer stride */ 1,
+ frame->n_vectors,
+ /* next */ 0,
+ node->node_index,
+ /* error */ 0);
+}
+
+static char *misc_drop_buffers_error_strings[] = {
+ [0] = "misc. errors",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = {
+ .function = misc_drop_buffers,
+ .name = "misc-drop-buffers",
+ .vector_size = sizeof (u32),
+ .n_errors = 1,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ "error-drop",
+ },
+ .error_strings = misc_drop_buffers_error_strings,
+};
+/* *INDENT-ON* */
+
+/* Reserves given number of error codes for given node. */
+void
+vlib_register_errors (vlib_main_t * vm,
+ u32 node_index, u32 n_errors, char *error_strings[])
+{
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ uword l;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ /* Free up any previous error strings. */
+ if (n->n_errors > 0)
+ heap_dealloc (em->error_strings_heap, n->error_heap_handle);
+
+ n->n_errors = n_errors;
+ n->error_strings = error_strings;
+
+ if (n_errors == 0)
+ return;
+
+ n->error_heap_index =
+ heap_alloc (em->error_strings_heap, n_errors, n->error_heap_handle);
+
+ l = vec_len (em->error_strings_heap);
+
+ clib_memcpy (vec_elt_at_index (em->error_strings_heap, n->error_heap_index),
+ error_strings, n_errors * sizeof (error_strings[0]));
+
+ /* Allocate a counter/elog type for each error. */
+ vec_validate (em->counters, l - 1);
+ vec_validate (vm->error_elog_event_types, l - 1);
+
+ /* Zero counters for re-registrations of errors. */
+ if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear))
+ clib_memcpy (em->counters + n->error_heap_index,
+ em->counters_last_clear + n->error_heap_index,
+ n_errors * sizeof (em->counters[0]));
+ else
+ memset (em->counters + n->error_heap_index,
+ 0, n_errors * sizeof (em->counters[0]));
+
+ {
+ elog_event_type_t t;
+ uword i;
+
+ memset (&t, 0, sizeof (t));
+ for (i = 0; i < n_errors; i++)
+ {
+ t.format = (char *) format (0, "%v %s: %%d",
+ n->name, error_strings[i]);
+ vm->error_elog_event_types[n->error_heap_index + i] = t;
+ }
+ }
+}
+
+static clib_error_t *
+show_errors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_node_t *n;
+ u32 code, i, ni;
+ u64 c;
+ int index = 0;
+ int verbose = 0;
+ u64 *sums = 0;
+
+ if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+
+ vec_validate (sums, vec_len (em->counters));
+
+ if (verbose)
+ vlib_cli_output (vm, "%=10s%=40s%=20s%=6s", "Count", "Node", "Reason",
+ "Index");
+ else
+ vlib_cli_output (vm, "%=10s%=40s%=6s", "Count", "Node", "Reason");
+
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main(({
+ em = &this_vlib_main->error_main;
+
+ if (verbose)
+ vlib_cli_output(vm, "Thread %u (%v):", index,
+ vlib_worker_threads[index].name);
+
+ for (ni = 0; ni < vec_len (this_vlib_main->node_main.nodes); ni++)
+ {
+ n = vlib_get_node (this_vlib_main, ni);
+ for (code = 0; code < n->n_errors; code++)
+ {
+ i = n->error_heap_index + code;
+ c = em->counters[i];
+ if (i < vec_len (em->counters_last_clear))
+ c -= em->counters_last_clear[i];
+ sums[i] += c;
+
+ if (c == 0 && verbose < 2)
+ continue;
+
+ if (verbose)
+ vlib_cli_output (vm, "%10Ld%=40v%=20s%=6d", c, n->name,
+ em->error_strings_heap[i], i);
+ else
+ vlib_cli_output (vm, "%10d%=40v%s", c, n->name,
+ em->error_strings_heap[i]);
+ }
+ }
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ if (verbose)
+ vlib_cli_output (vm, "Total:");
+
+ for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++)
+ {
+ n = vlib_get_node (vm, ni);
+ for (code = 0; code < n->n_errors; code++)
+ {
+ i = n->error_heap_index + code;
+ if (sums[i])
+ {
+ if (verbose)
+ vlib_cli_output (vm, "%10Ld%=40v%=20s%=10d", sums[i], n->name,
+ em->error_strings_heap[i], i);
+ }
+ }
+ }
+
+ vec_free (sums);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_errors) = {
+ .path = "show errors",
+ .short_help = "Show error counts",
+ .function = show_errors,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_node_counters, static) = {
+ .path = "show node counters",
+ .short_help = "Show node counters",
+ .function = show_errors,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_error_counters (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t *em;
+ u32 i;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main(({
+ em = &this_vlib_main->error_main;
+ vec_validate (em->counters_last_clear, vec_len (em->counters) - 1);
+ for (i = 0; i < vec_len (em->counters); i++)
+ em->counters_last_clear[i] = em->counters[i];
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_error_counters, static) = {
+ .path = "clear errors",
+ .short_help = "Clear error counters",
+ .function = clear_error_counters,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_node_counters, static) = {
+ .path = "clear node counters",
+ .short_help = "Clear node counters",
+ .function = clear_error_counters,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error.h b/src/vlib/error.h
new file mode 100644
index 00000000..df2075c3
--- /dev/null
+++ b/src/vlib/error.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.h: drop/punt error packets
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_h
+#define included_vlib_error_h
+
+/* Combined 16 bit node & 16 bit code as 32 bit number. */
+typedef u32 vlib_error_t;
+
+always_inline u32
+vlib_error_get_node (vlib_error_t e)
+{
+ return e >> 12;
+}
+
+always_inline u32
+vlib_error_get_code (vlib_error_t e)
+{
+ return e & 0xfff;
+}
+
+always_inline vlib_error_t
+vlib_error_set (u32 node_index, u32 code)
+{
+ ASSERT (node_index < (1 << 20));
+ ASSERT (code < (1 << 12));
+ return (node_index << 12) | code;
+}
+
+always_inline vlib_error_t
+vlib_error_set_code (vlib_error_t e, u32 code)
+{
+ ASSERT (vlib_error_get_code (e) == 0);
+ ASSERT (code < (1 << 12));
+ e |= code;
+ return e;
+}
+
+typedef struct
+{
+ /* Error counters. */
+ u64 *counters;
+
+ /* Counter values as of last counter clear. */
+ u64 *counters_last_clear;
+
+ /* Error name strings in heap. Heap index
+ indexes counter vector. */
+ char **error_strings_heap;
+} vlib_error_main_t;
+
+/* Per node error registration. */
+void vlib_register_errors (struct vlib_main_t *vm,
+ u32 node_index,
+ u32 n_errors, char *error_strings[]);
+
+#endif /* included_vlib_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error_funcs.h b/src/vlib/error_funcs.h
new file mode 100644
index 00000000..ab281ba2
--- /dev/null
+++ b/src/vlib/error_funcs.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error_funcs.h: VLIB error handling
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_funcs_h
+#define included_vlib_error_funcs_h
+
+#include <vlib/node_funcs.h>
+
+always_inline void
+vlib_error_elog_count (vlib_main_t * vm, uword counter, uword increment)
+{
+ if (VLIB_ELOG_MAIN_LOOP > 0 && increment > 0)
+ {
+ elog_main_t *em = &vm->elog_main;
+ elog (em, vec_elt_at_index (vm->error_elog_event_types, counter),
+ increment);
+ }
+}
+
+always_inline void
+vlib_error_count (vlib_main_t * vm, uword node_index,
+ uword counter, uword increment)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+
+ ASSERT (counter < n->n_errors);
+ counter += n->error_heap_index;
+
+ ASSERT (counter < vec_len (em->counters));
+ em->counters[counter] += increment;
+
+ vlib_error_elog_count (vm, counter, increment);
+}
+
+/* Drop all buffers in frame with given error code. */
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 error_next_index,
+ u32 error_node, u32 error_code);
+
+#endif /* included_vlib_error_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/format.c b/src/vlib/format.c
new file mode 100644
index 00000000..79a4d686
--- /dev/null
+++ b/src/vlib/format.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format.c: generic network formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 *
+format_vlib_rx_tx (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char *t;
+
+ switch (r)
+ {
+ case VLIB_RX:
+ t = "rx";
+ break;
+ case VLIB_TX:
+ t = "tx";
+ break;
+ default:
+ t = "INVALID";
+ break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+u8 *
+format_vlib_read_write (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char *t;
+
+ switch (r)
+ {
+ case VLIB_READ:
+ t = "read";
+ break;
+ case VLIB_WRITE:
+ t = "write";
+ break;
+ default:
+ t = "INVALID";
+ break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 *
+format_vlib_buffer_data (u8 * s, va_list * args)
+{
+ u8 *data = va_arg (*args, u8 *);
+ u32 n_data_bytes = va_arg (*args, u32);
+ u32 i, is_printable;
+
+ is_printable = 1;
+ for (i = 0; i < n_data_bytes && is_printable; i++)
+ {
+ u8 c = data[i];
+ if (c < 0x20)
+ is_printable = 0;
+ else if (c >= 0x7f)
+ is_printable = 0;
+ }
+
+ if (is_printable)
+ vec_add (s, data, n_data_bytes);
+ else
+ s = format (s, "%U", format_hex_bytes, data, n_data_bytes);
+
+ return s;
+}
+
+/* Enable/on => 1; disable/off => 0. */
+uword
+unformat_vlib_enable_disable (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ int enable;
+
+ if (unformat (input, "enable") || unformat (input, "on"))
+ enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ enable = 0;
+ else
+ return 0;
+
+ *result = enable;
+ return 1;
+}
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword
+unformat_vlib_rx_tx (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ if (unformat (input, "rx"))
+ *result = VLIB_RX;
+ else if (unformat (input, "tx"))
+ *result = VLIB_TX;
+ else
+ return 0;
+ return 1;
+}
+
+/* Parse an int either %d or 0x%x. */
+uword
+unformat_vlib_number (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+
+ return (unformat (input, "0x%x", result) || unformat (input, "%d", result));
+}
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword
+unformat_vlib_number_by_name (unformat_input_t * input, va_list * args)
+{
+ uword *hash = va_arg (*args, uword *);
+ int *result = va_arg (*args, int *);
+ uword *p;
+ u8 *token;
+ int i;
+
+ if (!unformat_user (input, unformat_token, "a-zA-Z0-9_", &token))
+ return 0;
+
+ /* Null terminate. */
+ if (vec_len (token) > 0 && token[vec_len (token) - 1] != 0)
+ vec_add1 (token, 0);
+
+ /* Check for exact match. */
+ p = hash_get_mem (hash, token);
+ if (p)
+ goto done;
+
+ /* Convert to upper case & try match. */
+ for (i = 0; i < vec_len (token); i++)
+ if (token[i] >= 'a' && token[i] <= 'z')
+ token[i] = 'A' + token[i] - 'a';
+ p = hash_get_mem (hash, token);
+
+done:
+ vec_free (token);
+ if (p)
+ *result = p[0];
+ return p != 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/format_funcs.h b/src/vlib/format_funcs.h
new file mode 100644
index 00000000..f60b8940
--- /dev/null
+++ b/src/vlib/format_funcs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format_funcs.h: VLIB formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_format_h
+#define included_vlib_format_h
+
+/* Format vlib_rx_or_tx_t/vlib_read_or_write_t enum as string. */
+u8 *format_vlib_rx_tx (u8 * s, va_list * args);
+u8 *format_vlib_read_write (u8 * s, va_list * args);
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 *format_vlib_buffer_data (u8 * s, va_list * args);
+
+/* Enable/on => 1; disable/off => 0. */
+uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args);
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args);
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args);
+
+/* Parse an int either %d or 0x%x. */
+uword unformat_vlib_number (unformat_input_t * input, va_list * args);
+
+/* Flag to format_vlib_*_header functions to tell them not to recurse
+ into the next layer's header. For example, tells format_vlib_ethernet_header
+ not to format ip header. */
+#define FORMAT_VLIB_HEADER_NO_RECURSION (~0)
+
+#endif /* included_vlib_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
new file mode 100644
index 00000000..9dd01fbf
--- /dev/null
+++ b/src/vlib/global_funcs.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * global_funcs.h: global data structure access functions
+ */
+
+#ifndef included_vlib_global_funcs_h_
+#define included_vlib_global_funcs_h_
+
+always_inline vlib_main_t *
+vlib_get_main (void)
+{
+ vlib_main_t *vm;
+ vm = vlib_mains[vlib_get_thread_index ()];
+ ASSERT (vm);
+ return vm;
+}
+
+always_inline vlib_thread_main_t *
+vlib_get_thread_main ()
+{
+ return &vlib_thread_main;
+}
+
+#endif /* included_vlib_global_funcs_h_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/i2c.c b/src/vlib/i2c.c
new file mode 100644
index 00000000..97f5bb21
--- /dev/null
+++ b/src/vlib/i2c.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/i2c.h>
+
+static inline void
+i2c_delay (i2c_bus_t * b, f64 timeout)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_time_wait (vm, timeout);
+}
+
+static void
+i2c_wait_for_scl (i2c_bus_t * b)
+{
+ f64 t = 0;
+
+ while (t < b->hold_time)
+ {
+ int sda, scl;
+ i2c_delay (b, b->rise_fall_time);
+ b->get_bits (b, &scl, &sda);
+
+ if (scl)
+ return;
+
+ t += b->rise_fall_time;
+ }
+ b->timeout = 1;
+}
+
+static void
+i2c_start (i2c_bus_t * b)
+{
+ b->timeout = 0;
+
+ b->put_bits (b, 1, 1);
+ i2c_wait_for_scl (b);
+
+ if (vlib_i2c_bus_timed_out (b))
+ return;
+
+ b->put_bits (b, 1, 0);
+ i2c_delay (b, b->hold_time);
+ b->put_bits (b, 0, 0);
+ i2c_delay (b, b->hold_time);
+}
+
+static void
+i2c_stop (i2c_bus_t * b)
+{
+ b->put_bits (b, 0, 0);
+ i2c_delay (b, b->rise_fall_time);
+
+ b->put_bits (b, 1, 0);
+ i2c_delay (b, b->hold_time);
+
+ b->put_bits (b, 1, 1);
+ i2c_delay (b, b->hold_time);
+}
+
+static void
+i2c_write_bit (i2c_bus_t * b, int sda)
+{
+ b->put_bits (b, 0, sda);
+ i2c_delay (b, b->rise_fall_time);
+
+ b->put_bits (b, 1, sda);
+ i2c_wait_for_scl (b);
+ i2c_delay (b, b->hold_time);
+
+ b->put_bits (b, 0, sda);
+ i2c_delay (b, b->rise_fall_time);
+}
+
+static void
+i2c_read_bit (i2c_bus_t * b, int *sda)
+{
+ int scl;
+
+ b->put_bits (b, 1, 1);
+ i2c_wait_for_scl (b);
+ i2c_delay (b, b->hold_time);
+
+ b->get_bits (b, &scl, sda);
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+}
+
+static void
+i2c_write_byte (i2c_bus_t * b, u8 data)
+{
+ int i, sda;
+
+ for (i = 7; i >= 0; i--)
+ {
+ i2c_write_bit (b, (data >> i) & 1);
+ if (b->timeout)
+ return;
+ }
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+
+ i2c_read_bit (b, &sda);
+
+ if (sda)
+ b->timeout = 1;
+}
+
+
+static void
+i2c_read_byte (i2c_bus_t * b, u8 * data, int ack)
+{
+ int i, sda;
+
+ *data = 0;
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+
+ for (i = 7; i >= 0; i--)
+ {
+ i2c_read_bit (b, &sda);
+ if (b->timeout)
+ return;
+
+ *data |= (sda != 0) << i;
+ }
+
+ i2c_write_bit (b, ack == 0);
+}
+
+
+void
+vlib_i2c_init (i2c_bus_t * b)
+{
+ f64 tick;
+ if (!b->clock)
+ b->clock = 400000;
+
+ tick = 1.0 / b->clock;
+
+ /* Spend 40% of time in low and high states */
+ if (!b->hold_time)
+ b->hold_time = 0.4 * tick;
+
+ /* Spend 10% of time waiting for rise and fall */
+ if (!b->rise_fall_time)
+ b->rise_fall_time = 0.1 * tick;
+}
+
+void
+vlib_i2c_xfer (i2c_bus_t * bus, i2c_msg_t * msgs)
+{
+ i2c_msg_t *msg;
+ int i;
+
+ vec_foreach (msg, msgs)
+ {
+ i2c_start (bus);
+ i2c_write_byte (bus,
+ (msg->addr << 1) + (msg->flags == I2C_MSG_FLAG_READ));
+
+ if (msg->flags & I2C_MSG_FLAG_READ)
+ for (i = 0; i < msg->len; i++)
+ {
+ i2c_read_byte (bus, &msg->buffer[i], /* ack */ i + 1 != msg->len);
+ if (bus->timeout)
+ goto done;
+ }
+
+ else
+ for (i = 0; i < msg->len; i++)
+ {
+ i2c_write_byte (bus, msg->buffer[i]);
+ if (bus->timeout)
+ goto done;
+ }
+ }
+
+done:
+ i2c_stop (bus);
+}
+
+void
+vlib_i2c_read_eeprom (i2c_bus_t * bus, u8 i2c_addr, u16 start_addr,
+ u16 length, u8 * data)
+{
+ i2c_msg_t *msg = 0;
+ u8 start_address[1];
+
+ vec_validate (msg, 1);
+
+ start_address[0] = start_addr;
+ msg[0].addr = i2c_addr;
+ msg[0].flags = I2C_MSG_FLAG_WRITE;
+ msg[0].buffer = (u8 *) & start_address;
+ msg[0].len = 1;
+
+ msg[1].addr = i2c_addr;
+ msg[1].flags = I2C_MSG_FLAG_READ;
+ msg[1].buffer = data;
+ msg[1].len = length;
+
+ vlib_i2c_xfer (bus, msg);
+
+ vec_free (msg);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/i2c.h b/src/vlib/i2c.h
new file mode 100644
index 00000000..b79bdc75
--- /dev/null
+++ b/src/vlib/i2c.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vlib_i2c_h
+#define included_vlib_i2c_h
+
+#include <vppinfra/types.h>
+
+
+#define I2C_MSG_FLAG_WRITE 0
+#define I2C_MSG_FLAG_READ 1
+
+typedef struct
+{
+ u8 addr;
+ u8 flags;
+ u16 len;
+ u8 *buffer;
+} i2c_msg_t;
+
+typedef struct i2c_bus_t
+{
+ void (*put_bits) (struct i2c_bus_t * b, int scl, int sda);
+ void (*get_bits) (struct i2c_bus_t * b, int *scl, int *sda);
+
+ int timeout;
+ u32 clock;
+ f64 hold_time;
+ f64 rise_fall_time;
+
+ /* Private data */
+ uword private_data;
+
+} i2c_bus_t;
+
+void vlib_i2c_init (i2c_bus_t * bus);
+void vlib_i2c_xfer (i2c_bus_t * bus, i2c_msg_t * msgs);
+void vlib_i2c_read_eeprom (i2c_bus_t * bus, u8 i2c_addr, u16 start_addr,
+ u16 length, u8 * data);
+
+static inline int
+vlib_i2c_bus_timed_out (i2c_bus_t * bus)
+{
+ return bus->timeout;
+}
+
+#endif /* included_vlib_i2c_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/init.c b/src/vlib/init.c
new file mode 100644
index 00000000..8d478451
--- /dev/null
+++ b/src/vlib/init.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.c: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+clib_error_t *
+vlib_call_init_exit_functions (vlib_main_t * vm,
+ _vlib_init_function_list_elt_t * head,
+ int call_once)
+{
+ clib_error_t *error = 0;
+ _vlib_init_function_list_elt_t *i;
+
+ i = head;
+ while (i)
+ {
+ if (call_once && !hash_get (vm->init_functions_called, i->f))
+ {
+ if (call_once)
+ hash_set1 (vm->init_functions_called, i->f);
+ error = i->f (vm);
+ if (error)
+ return error;
+ }
+ i = i->next_init_function;
+ }
+ return error;
+}
+
+clib_error_t *
+vlib_call_all_init_functions (vlib_main_t * vm)
+{
+ /* Call dummy functions to make sure purely static modules are
+ linked in. */
+#define _(f) vlib_##f##_reference ();
+ foreach_vlib_module_reference;
+#undef _
+
+ return vlib_call_init_exit_functions
+ (vm, vm->init_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_main_loop_enter_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_enter_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_main_loop_exit_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_exit_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_config_functions (vlib_main_t * vm,
+ unformat_input_t * input, int is_early)
+{
+ clib_error_t *error = 0;
+ vlib_config_function_runtime_t *c, **all;
+ uword *hash = 0, *p;
+ uword i;
+
+ hash = hash_create_string (0, sizeof (uword));
+ all = 0;
+
+ c = vm->config_function_registrations;
+
+ while (c)
+ {
+ hash_set_mem (hash, c->name, vec_len (all));
+ vec_add1 (all, c);
+ unformat_init (&c->input, 0, 0);
+ c = c->next_registration;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u8 *s, *v;
+
+ if (!unformat (input, "%s %v", &s, &v) || !(p = hash_get_mem (hash, s)))
+ {
+ error = clib_error_create ("unknown input `%s %v'", s, v);
+ goto done;
+ }
+
+ c = all[p[0]];
+ if (vec_len (c->input.buffer) > 0)
+ vec_add1 (c->input.buffer, ' ');
+ vec_add (c->input.buffer, v, vec_len (v));
+ vec_free (v);
+ vec_free (s);
+ }
+
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+
+ /* Is this an early config? Are we doing early configs? */
+ if (is_early ^ c->is_early)
+ continue;
+
+ /* Already called? */
+ if (hash_get (vm->init_functions_called, c->function))
+ continue;
+ hash_set1 (vm->init_functions_called, c->function);
+
+ error = c->function (vm, &c->input);
+ if (error)
+ goto done;
+ }
+
+done:
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+ unformat_free (&c->input);
+ }
+ vec_free (all);
+ hash_free (hash);
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/init.h b/src/vlib/init.h
new file mode 100644
index 00000000..12db3f90
--- /dev/null
+++ b/src/vlib/init.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.h: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_init_h
+#define included_vlib_init_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+
+/* Init/exit functions: called at start/end of main routine. Init
+ functions are typically used to register and setup packet
+ processing nodes. */
+
+typedef clib_error_t *(vlib_init_function_t) (struct vlib_main_t * vm);
+
+typedef struct _vlib_init_function_list_elt
+{
+ struct _vlib_init_function_list_elt *next_init_function;
+ vlib_init_function_t *f;
+} _vlib_init_function_list_elt_t;
+
+/* Configuration functions: called with configuration input just before
+ main polling loop starts. */
+typedef clib_error_t *(vlib_config_function_t) (struct vlib_main_t * vm,
+ unformat_input_t * input);
+
+typedef struct vlib_config_function_runtime_t
+{
+ /* Function to call. Set to null once function has already been called. */
+ vlib_config_function_t *function;
+
+ /* Input for function. */
+ unformat_input_t input;
+
+ /* next config function registration */
+ struct vlib_config_function_runtime_t *next_registration;
+
+ /* To be invoked as soon as the clib heap is available */
+ u8 is_early;
+
+ /* Name used to distinguish input on command line. */
+ char name[32];
+} vlib_config_function_runtime_t;
+
+#define _VLIB_INIT_FUNCTION_SYMBOL(x, type) \
+ _vlib_##type##_function_##x
+
+#define VLIB_INIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, init)
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_exit)
+#define VLIB_CONFIG_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, config)
+
+/* Declaration is global (e.g. not static) so that init functions can
+ be called from other modules to resolve init function depend. */
+
+#define VLIB_DECLARE_INIT_FUNCTION(x, tag) \
+vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x, tag) = x; \
+static void __vlib_add_##tag##_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_##tag##_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ static _vlib_init_function_list_elt_t _vlib_init_function; \
+ _vlib_init_function.next_init_function \
+ = vm->tag##_function_registrations; \
+ vm->tag##_function_registrations = &_vlib_init_function; \
+ _vlib_init_function.f = &x; \
+}
+
+#define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init)
+#define VLIB_WORKER_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,worker_init)
+
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \
+ VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x) \
+VLIB_DECLARE_INIT_FUNCTION(x,main_loop_exit)
+
+#define VLIB_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 0, \
+ }
+
+#define VLIB_EARLY_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 1, \
+ }
+
+/* Call given init function: used for init function dependencies. */
+#define vlib_call_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+/* Don't call given init function: used to suppress parts of the netstack */
+#define vlib_mark_init_function_complete(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \
+ hash_set1 (vm->init_functions_called, _f); \
+ })
+
+#define vlib_call_post_graph_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+#define vlib_call_config_function(vm, x) \
+ ({ \
+ vlib_config_function_runtime_t * _r; \
+ clib_error_t * _error = 0; \
+ extern vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ \
+ _r = &VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ if (! hash_get (vm->init_functions_called, _r->function)) \
+ { \
+ hash_set1 (vm->init_functions_called, _r->function); \
+ _error = _r->function (vm, &_r->input); \
+ } \
+ _error; \
+ })
+
+/* External functions. */
+clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm);
+clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm,
+ unformat_input_t * input,
+ int is_early);
+clib_error_t *vlib_call_all_main_loop_enter_functions (struct vlib_main_t
+ *vm);
+clib_error_t *vlib_call_all_main_loop_exit_functions (struct vlib_main_t *vm);
+clib_error_t *vlib_call_init_exit_functions (struct vlib_main_t *vm,
+ _vlib_init_function_list_elt_t *
+ head, int call_once);
+
+#define foreach_vlib_module_reference \
+ _ (node_cli) \
+ _ (trace_cli)
+
+/* Dummy function to get node_cli.c linked in. */
+#define _(x) void vlib_##x##_reference (void);
+foreach_vlib_module_reference
+#undef _
+#endif /* included_vlib_init_h */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/lex.c b/src/vlib/lex.c
new file mode 100644
index 00000000..1cc8f167
--- /dev/null
+++ b/src/vlib/lex.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+
+vlib_lex_main_t vlib_lex_main;
+
+#define LEX_DEBUG 0
+
+u8 *
+format_vlib_lex_token (u8 * s, va_list * args)
+{
+ vlib_lex_main_t *lm = va_arg (*args, vlib_lex_main_t *);
+ vlib_lex_token_t *t = va_arg (*args, vlib_lex_token_t *);
+
+ if (t->token == VLIB_LEX_word)
+ s = format (s, "%s", t->value.as_pointer);
+ else
+ s = format (s, "%s", lm->lex_token_names[t->token]);
+ return s;
+}
+
+void
+vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv)
+{
+ u8 c;
+ vlib_lex_table_t *t;
+ vlib_lex_table_entry_t *e;
+ uword tv;
+
+ if (PREDICT_FALSE (lm->pushback_sp >= 0))
+ {
+ rv[0] = lm->pushback_vector[lm->pushback_sp--];
+ return;
+ }
+
+ rv->value.as_uword = ~0;
+
+ while (1)
+ {
+ if (PREDICT_FALSE (lm->current_index >= vec_len (lm->input_vector)))
+ {
+ rv->token = VLIB_LEX_eof;
+ return;
+ }
+
+ t = vec_elt_at_index (lm->lex_tables, lm->current_table_index);
+ c = (lm->input_vector[lm->current_index++]) & 0x7f;
+ e = &t->entries[c];
+ lm->current_table_index = e->next_table_index;
+
+ switch (e->action)
+ {
+ case VLIB_LEX_IGNORE:
+ continue;
+
+ case VLIB_LEX_START_NUMBER:
+ lm->current_token_value = 0;
+ /* fallthru */
+
+ case VLIB_LEX_ADD_TO_NUMBER:
+ lm->current_number_base = e->token;
+ lm->current_token_value *= lm->current_number_base;
+ tv = c - '0';
+ if (tv >= lm->current_number_base)
+ {
+ tv = 10 + c - 'A';
+ if (tv >= lm->current_number_base)
+ tv = 10 + c - 'a';
+ }
+ lm->current_token_value += tv;
+ continue;
+
+ case VLIB_LEX_ADD_TO_TOKEN:
+ vec_add1 (lm->token_buffer, c);
+ continue;
+
+ case VLIB_LEX_KEYWORD_CHECK:
+ {
+ uword *p;
+
+ vec_add1 (lm->token_buffer, 0);
+
+ /* It's either a keyword or just a word. */
+ p = hash_get_mem (lm->lex_keywords, lm->token_buffer);
+ if (p)
+ {
+ rv->token = p[0];
+ if (LEX_DEBUG > 0)
+ clib_warning ("keyword '%s' token %s",
+ lm->token_buffer,
+ lm->lex_token_names[rv->token]);
+ }
+ else
+ {
+ /* it's a WORD */
+ rv->token = VLIB_LEX_word;
+ rv->value.as_pointer = vec_dup (lm->token_buffer);
+ if (LEX_DEBUG > 0)
+ clib_warning ("%s, value '%s'",
+ lm->lex_token_names[VLIB_LEX_word],
+ rv->value.as_pointer);
+ }
+ _vec_len (lm->token_buffer) = 0;
+
+ /* Rescan the character which terminated the keyword/word. */
+ lm->current_index--;
+ return;
+ }
+
+ case VLIB_LEX_RETURN_AND_RESCAN:
+ ASSERT (lm->current_index);
+ lm->current_index--;
+ /* note flow-through */
+
+ case VLIB_LEX_RETURN:
+ rv->token = e->token;
+ rv->value.as_uword = lm->current_token_value;
+ lm->current_token_value = ~0;
+ if (LEX_DEBUG > 0)
+ {
+ clib_warning
+ ("table %s char '%c'(0x%02x) next table %s return %s",
+ t->name, c, c, lm->lex_tables[e->next_table_index].name,
+ lm->lex_token_names[e->token]);
+ if (rv->token == VLIB_LEX_number)
+ clib_warning (" numeric value 0x%x (%d)", rv->value,
+ rv->value);
+ }
+ return;
+ }
+ }
+}
+
+u16
+vlib_lex_add_token (vlib_lex_main_t * lm, char *token_name)
+{
+ uword *p;
+ u16 rv;
+
+ p = hash_get_mem (lm->lex_tokens_by_name, token_name);
+
+ if (p)
+ return p[0];
+
+ rv = vec_len (lm->lex_token_names);
+ hash_set_mem (lm->lex_tokens_by_name, token_name, rv);
+ vec_add1 (lm->lex_token_names, token_name);
+
+ return rv;
+}
+
+static u16
+add_keyword (vlib_lex_main_t * lm, char *keyword, char *token_name)
+{
+ uword *p;
+ u16 token;
+
+ p = hash_get_mem (lm->lex_keywords, keyword);
+
+ ASSERT (p == 0);
+
+ token = vlib_lex_add_token (lm, token_name);
+
+ hash_set_mem (lm->lex_keywords, keyword, token);
+ return token;
+}
+
+u16
+vlib_lex_find_or_add_keyword (vlib_lex_main_t * lm, char *keyword,
+ char *token_name)
+{
+ uword *p = hash_get_mem (lm->lex_keywords, keyword);
+ return p ? p[0] : add_keyword (lm, keyword, token_name);
+}
+
+void
+vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index)
+{
+ int i;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t = pool_elt_at_index (lm->lex_tables, table_index);
+
+ for (i = lo; i <= hi; i++)
+ {
+ ASSERT (i < ARRAY_LEN (t->entries));
+ t->entries[i].action = action;
+ t->entries[i].token = token;
+ t->entries[i].next_table_index = next_table_index;
+ }
+}
+
+u16
+vlib_lex_add_table (char *name)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t;
+ uword *p;
+
+ p = hash_get_mem (lm->lex_tables_by_name, name);
+
+ ASSERT (p == 0);
+
+ pool_get_aligned (lm->lex_tables, t, CLIB_CACHE_LINE_BYTES);
+
+ t->name = name;
+
+ hash_set_mem (lm->lex_tables_by_name, name, t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 1, 0x7F, VLIB_LEX_IGNORE, ~0,
+ t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 0, 0, VLIB_LEX_RETURN,
+ VLIB_LEX_eof, t - lm->lex_tables);
+
+ return t - lm->lex_tables;
+}
+
+void
+vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector)
+{
+ if (lm->pushback_vector)
+ _vec_len (lm->pushback_vector) = 0;
+ lm->pushback_sp = -1;
+
+ lm->input_vector = input_vector;
+ lm->current_index = 0;
+}
+
+static clib_error_t *
+lex_onetime_init (vlib_main_t * vm)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+
+ lm->lex_tables_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_tokens_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_keywords = hash_create_string (0, sizeof (uword));
+ lm->pushback_sp = -1;
+
+#define _(f) { u16 tmp = vlib_lex_add_token (lm, #f); ASSERT (tmp == VLIB_LEX_##f); }
+ foreach_vlib_lex_global_token;
+#undef _
+
+ vec_validate (lm->token_buffer, 127);
+ _vec_len (lm->token_buffer) = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lex_onetime_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/lex.h b/src/vlib/lex.h
new file mode 100644
index 00000000..4ae58f46
--- /dev/null
+++ b/src/vlib/lex.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_lex_h
+#define included_vlib_lex_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/pool.h>
+
+#define foreach_vlib_lex_global_token \
+ _ (invalid) \
+ _ (eof) \
+ _ (word) \
+ _ (number) \
+ _ (lt) \
+ _ (gt) \
+ _ (dot) \
+ _ (slash) \
+ _ (qmark) \
+ _ (equals) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (lpar) \
+ _ (rpar)
+
+typedef enum
+{
+#define _(f) VLIB_LEX_##f,
+ foreach_vlib_lex_global_token
+#undef _
+} vlib_lex_global_token_t;
+
+typedef enum
+{
+ VLIB_LEX_IGNORE,
+ VLIB_LEX_ADD_TO_TOKEN,
+ VLIB_LEX_RETURN,
+ VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_KEYWORD_CHECK,
+ VLIB_LEX_START_NUMBER,
+ VLIB_LEX_ADD_TO_NUMBER,
+} vlib_lex_action_t;
+
+typedef struct
+{
+ u16 action;
+ u16 next_table_index;
+ u16 token;
+} vlib_lex_table_entry_t;
+
+typedef struct
+{
+ char *name;
+ vlib_lex_table_entry_t entries[128];
+} vlib_lex_table_t;
+
+typedef struct
+{
+ u32 token;
+
+ union
+ {
+ uword as_uword;
+ void *as_pointer;
+ char *as_string;
+ } value;
+} vlib_lex_token_t;
+
+typedef struct
+{
+ vlib_lex_table_t *lex_tables;
+ uword *lex_tables_by_name;
+
+ /* Vector of token strings. */
+ char **lex_token_names;
+
+ /* Hash mapping c string name to token index. */
+ uword *lex_tokens_by_name;
+
+ /* Hash mapping c string keyword name to token index. */
+ uword *lex_keywords;
+
+ vlib_lex_token_t *pushback_vector;
+
+ i32 pushback_sp;
+
+ u32 current_table_index;
+
+ uword current_token_value;
+
+ uword current_number_base;
+
+ /* Input string we are lex-ing. */
+ u8 *input_vector;
+
+ /* Current index into input vector. */
+ u32 current_index;
+
+ /* Re-used vector for forming token strings and hashing them. */
+ u8 *token_buffer;
+} vlib_lex_main_t;
+
+vlib_lex_main_t vlib_lex_main;
+
+always_inline void
+vlib_lex_cleanup_token (vlib_lex_token_t * t)
+{
+ if (t->token == VLIB_LEX_word)
+ {
+ u8 *tv = t->value.as_pointer;
+ vec_free (tv);
+ }
+}
+
+u16 vlib_lex_add_table (char *name);
+void vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * result);
+u16 vlib_lex_add_token (vlib_lex_main_t * lm, char *token_name);
+void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index);
+void vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector);
+format_function_t format_vlib_lex_token;
+
+#endif /* included_vlib_lex_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
new file mode 100644
index 00000000..790f168a
--- /dev/null
+++ b/src/vlib/linux/pci.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vppinfra/linux/sysfs.h>
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+typedef struct
+{
+ /* /sys/bus/pci/devices/... directory name for this device. */
+ u8 *dev_dir_name;
+
+ /* Resource file descriptors. */
+ int *resource_fds;
+
+ /* File descriptor for config space read/write. */
+ int config_fd;
+
+ /* File descriptor for /dev/uio%d */
+ int uio_fd;
+
+ /* Minor device for uio device. */
+ u32 uio_minor;
+
+ /* Index given by clib_file_add. */
+ u32 clib_file_index;
+
+} linux_pci_device_t;
+
+/* Pool of PCI devices. */
+typedef struct
+{
+ vlib_main_t *vlib_main;
+ linux_pci_device_t *linux_pci_devices;
+} linux_pci_main_t;
+
+extern linux_pci_main_t linux_pci_main;
+
+/* Call to allocate/initialize the pci subsystem.
+ This is not an init function so that users can explicitly enable
+ pci only when it's needed. */
+clib_error_t *pci_bus_init (vlib_main_t * vm);
+
+clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d,
+ char *uio_driver_name);
+
+linux_pci_main_t linux_pci_main;
+
+clib_error_t *
+vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name)
+{
+ clib_error_t *error = 0;
+ u8 *s = 0, *driver_name = 0;
+ DIR *dir = 0;
+ struct dirent *e;
+ int fd, clear_driver_override = 0;
+ u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U",
+ format_vlib_pci_addr, &d->bus_address);
+
+ s = format (s, "%v/driver%c", dev_dir_name, 0);
+ driver_name = clib_sysfs_link_to_name ((char *) s);
+ vec_reset_length (s);
+
+ if (driver_name &&
+ ((strcmp ("vfio-pci", (char *) driver_name) == 0) ||
+ (strcmp ("uio_pci_generic", (char *) driver_name) == 0) ||
+ (strcmp ("igb_uio", (char *) driver_name) == 0)))
+ goto done;
+
+ /* walk trough all linux interfaces and if interface belonging to
+ this device is founf check if interface is admin up */
+ dir = opendir ("/sys/class/net");
+ s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
+
+ if (!dir)
+ {
+ error = clib_error_return (0, "Skipping PCI device %U: failed to "
+ "read /sys/class/net",
+ format_vlib_pci_addr, &d->bus_address);
+ goto done;
+ }
+
+ fd = socket (PF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "socket");
+ goto done;
+ }
+
+ while ((e = readdir (dir)))
+ {
+ struct ifreq ifr;
+ struct ethtool_drvinfo drvinfo;
+
+ if (e->d_name[0] == '.') /* skip . and .. */
+ continue;
+
+ memset (&ifr, 0, sizeof ifr);
+ memset (&drvinfo, 0, sizeof drvinfo);
+ ifr.ifr_data = (char *) &drvinfo;
+ strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1);
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
+ {
+ /* Some interfaces (eg "lo") don't support this ioctl */
+ if ((errno != ENOTSUP) && (errno != ENODEV))
+ clib_unix_warning ("ioctl fetch intf %s bus info error",
+ e->d_name);
+ continue;
+ }
+
+ if (strcmp ((char *) s, drvinfo.bus_info))
+ continue;
+
+ memset (&ifr, 0, sizeof (ifr));
+ strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1);
+ if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+ e->d_name);
+ close (fd);
+ goto done;
+ }
+
+ if (ifr.ifr_flags & IFF_UP)
+ {
+ error = clib_error_return (0, "Skipping PCI device %U as host "
+ "interface %s is up",
+ format_vlib_pci_addr, &d->bus_address,
+ e->d_name);
+ close (fd);
+ goto done;
+ }
+ }
+
+ close (fd);
+ vec_reset_length (s);
+
+ s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
+ clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ vec_reset_length (s);
+
+ s = format (s, "%v/driver_override%c", dev_dir_name, 0);
+ if (access ((char *) s, F_OK) == 0)
+ {
+ clib_sysfs_write ((char *) s, "%s", uio_driver_name);
+ clear_driver_override = 1;
+ }
+ else
+ {
+ vec_reset_length (s);
+ s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0);
+ clib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id,
+ d->device_id);
+ }
+ vec_reset_length (s);
+
+ s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0);
+ clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ vec_reset_length (s);
+
+ if (clear_driver_override)
+ {
+ s = format (s, "%v/driver_override%c", dev_dir_name, 0);
+ clib_sysfs_write ((char *) s, "%c", 0);
+ vec_reset_length (s);
+ }
+
+done:
+ closedir (dir);
+ vec_free (s);
+ vec_free (dev_dir_name);
+ vec_free (driver_name);
+ return error;
+}
+
+
+static clib_error_t *
+scan_uio_dir (void *arg, u8 * path_name, u8 * file_name)
+{
+ linux_pci_device_t *l = arg;
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) file_name, vec_len (file_name));
+
+ if (!unformat (&input, "uio%d", &l->uio_minor))
+ abort ();
+
+ unformat_free (&input);
+ return 0;
+}
+
+static clib_error_t *
+linux_pci_uio_read_ready (clib_file_t * uf)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ vlib_pci_device_t *d;
+ int __attribute__ ((unused)) rv;
+
+ u32 icount;
+ rv = read (uf->file_descriptor, &icount, 4);
+
+ d = pool_elt_at_index (pm->pci_devs, uf->private_data);
+
+ if (d->interrupt_handler)
+ d->interrupt_handler (d);
+
+ vlib_pci_intr_enable (d);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+linux_pci_uio_error_ready (clib_file_t * uf)
+{
+ u32 error_index = (u32) uf->private_data;
+
+ return clib_error_return (0, "pci device %d: error", error_index);
+}
+
+static void
+add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ linux_pci_main_t *lpm = &linux_pci_main;
+ linux_pci_device_t *l;
+
+ pool_get (lpm->linux_pci_devices, l);
+ l[0] = pdev[0];
+
+ l->dev_dir_name = vec_dup (l->dev_dir_name);
+
+ dev->os_handle = l - lpm->linux_pci_devices;
+
+ {
+ u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name);
+ foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */
+ 1);
+ vec_free (uio_dir);
+ }
+
+ {
+ char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0);
+ l->uio_fd = open (uio_name, O_RDWR);
+ if (l->uio_fd < 0)
+ clib_unix_error ("open `%s'", uio_name);
+ vec_free (uio_name);
+ }
+
+ {
+ clib_file_t template = { 0 };
+
+ template.read_function = linux_pci_uio_read_ready;
+ template.file_descriptor = l->uio_fd;
+ template.error_function = linux_pci_uio_error_ready;
+ template.private_data = dev - pm->pci_devs;
+
+ l->clib_file_index = clib_file_add (&file_main, &template);
+ }
+}
+
+static void
+linux_pci_device_free (linux_pci_device_t * l)
+{
+ int i;
+ for (i = 0; i < vec_len (l->resource_fds); i++)
+ if (l->resource_fds[i] > 0)
+ close (l->resource_fds[i]);
+ if (l->config_fd > 0)
+ close (l->config_fd);
+ if (l->uio_fd > 0)
+ close (l->uio_fd);
+ vec_free (l->resource_fds);
+ vec_free (l->dev_dir_name);
+}
+
+/* Configuration space read/write. */
+clib_error_t *
+vlib_pci_read_write_config (vlib_pci_device_t * dev,
+ vlib_read_or_write_t read_or_write,
+ uword address, void *data, u32 n_bytes)
+{
+ linux_pci_main_t *lpm = &linux_pci_main;
+ linux_pci_device_t *p;
+ int n;
+
+ p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle);
+
+ if (read_or_write == VLIB_READ)
+ n = pread (p->config_fd, data, n_bytes, address);
+ else
+ n = pwrite (p->config_fd, data, n_bytes, address);
+
+ if (n != n_bytes)
+ return clib_error_return_unix (0, "%s",
+ read_or_write == VLIB_READ
+ ? "read" : "write");
+
+ return 0;
+}
+
+static clib_error_t *
+os_map_pci_resource_internal (uword os_handle,
+ u32 resource, u8 * addr, void **result)
+{
+ linux_pci_main_t *pm = &linux_pci_main;
+ linux_pci_device_t *p;
+ struct stat stat_buf;
+ u8 *file_name;
+ int fd;
+ clib_error_t *error;
+ int flags = MAP_SHARED;
+
+ error = 0;
+ p = pool_elt_at_index (pm->linux_pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ fd = open ((char *) file_name, O_RDWR);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &stat_buf) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+
+ vec_validate (p->resource_fds, resource);
+ p->resource_fds[resource] = fd;
+ if (addr != 0)
+ flags |= MAP_FIXED;
+
+ *result = mmap (addr,
+ /* size */ stat_buf.st_size,
+ PROT_READ | PROT_WRITE, flags,
+ /* file */ fd,
+ /* offset */ 0);
+ if (*result == (void *) -1)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+done:
+ if (error)
+ {
+ if (fd >= 0)
+ close (fd);
+ }
+ vec_free (file_name);
+ return error;
+}
+
+clib_error_t *
+vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result)
+{
+ return (os_map_pci_resource_internal
+ (dev->os_handle, resource, 0 /* addr */ ,
+ result));
+}
+
+clib_error_t *
+vlib_pci_map_resource_fixed (vlib_pci_device_t * dev,
+ u32 resource, u8 * addr, void **result)
+{
+ return (os_map_pci_resource_internal
+ (dev->os_handle, resource, addr, result));
+}
+
+void
+vlib_pci_free_device (vlib_pci_device_t * dev)
+{
+ linux_pci_main_t *pm = &linux_pci_main;
+ linux_pci_device_t *l;
+
+ l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle);
+ linux_pci_device_free (l);
+ pool_put (pm->linux_pci_devices, l);
+}
+
+pci_device_registration_t * __attribute__ ((unused))
+pci_device_next_registered (pci_device_registration_t * r)
+{
+ uword i;
+
+ /* Null vendor id marks end of initialized list. */
+ for (i = 0; r->supported_devices[i].vendor_id != 0; i++)
+ ;
+
+ return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0]));
+}
+
+static clib_error_t *
+init_device_from_registered (vlib_main_t * vm,
+ vlib_pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ pci_device_registration_t *r;
+ pci_device_id_t *i;
+ clib_error_t *error;
+
+ r = pm->pci_device_registrations;
+
+ while (r)
+ {
+ for (i = r->supported_devices; i->vendor_id != 0; i++)
+ if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id)
+ {
+ error = vlib_pci_bind_to_uio (dev, "uio_pci_generic");
+ if (error)
+ {
+ clib_error_report (error);
+ continue;
+ }
+
+ add_device (dev, pdev);
+ dev->interrupt_handler = r->interrupt_handler;
+ return r->init_function (vm, dev);
+ }
+ r = r->next_registration;
+ }
+ /* No driver, close the PCI config-space FD */
+ close (pdev->config_fd);
+ return 0;
+}
+
+static clib_error_t *
+init_device (vlib_main_t * vm,
+ vlib_pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ return init_device_from_registered (vm, dev, pdev);
+}
+
+static clib_error_t *
+scan_device (void *arg, u8 * dev_dir_name, u8 * ignored)
+{
+ vlib_main_t *vm = arg;
+ vlib_pci_main_t *pm = &pci_main;
+ int fd;
+ u8 *f;
+ clib_error_t *error = 0;
+ vlib_pci_device_t *dev;
+ linux_pci_device_t pdev = { 0 };
+ u32 tmp;
+
+ f = format (0, "%v/config%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDWR);
+
+ /* Try read-only access if write fails. */
+ if (fd < 0)
+ fd = open ((char *) f, O_RDONLY);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", f);
+ goto done;
+ }
+
+ pool_get (pm->pci_devs, dev);
+
+ /* You can only read more that 64 bytes of config space as root; so we try to
+ read the full space but fall back to just the first 64 bytes. */
+ if (read (fd, &dev->config_data, sizeof (dev->config_data)) !=
+ sizeof (dev->config_data)
+ && read (fd, &dev->config0,
+ sizeof (dev->config0)) != sizeof (dev->config0))
+ {
+ pool_put (pm->pci_devs, dev);
+ error = clib_error_return_unix (0, "read `%s'", f);
+ close (fd);
+ goto done;
+ }
+
+ {
+ static pci_config_header_t all_ones;
+ if (all_ones.vendor_id == 0)
+ memset (&all_ones, ~0, sizeof (all_ones));
+
+ if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones)))
+ {
+ pool_put (pm->pci_devs, dev);
+ error = clib_error_return (0, "invalid PCI config for `%s'", f);
+ close (fd);
+ goto done;
+ }
+ }
+
+ if (dev->config0.header.header_type == 0)
+ pci_config_type0_little_to_host (&dev->config0);
+ else
+ pci_config_type1_little_to_host (&dev->config1);
+
+ /* Parse bus, dev, function from directory name. */
+ {
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) dev_dir_name,
+ vec_len (dev_dir_name));
+
+ if (!unformat (&input, "/sys/bus/pci/devices/%U",
+ unformat_vlib_pci_addr, &dev->bus_address))
+ abort ();
+
+ unformat_free (&input);
+
+ }
+
+
+ pdev.config_fd = fd;
+ pdev.dev_dir_name = dev_dir_name;
+
+ hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32,
+ dev - pm->pci_devs);
+
+ vec_reset_length (f);
+ f = format (f, "%v/vpd%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDONLY);
+ if (fd >= 0)
+ {
+ while (1)
+ {
+ u8 tag[3];
+ u8 *data = 0;
+ int len;
+
+ if (read (fd, &tag, 3) != 3)
+ break;
+
+ if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91)
+ break;
+
+ len = (tag[2] << 8) | tag[1];
+ vec_validate (data, len);
+
+ if (read (fd, data, len) != len)
+ {
+ vec_free (data);
+ break;
+ }
+ if (tag[0] == 0x82)
+ dev->product_name = data;
+ else if (tag[0] == 0x90)
+ dev->vpd_r = data;
+ else if (tag[0] == 0x91)
+ dev->vpd_w = data;
+
+ data = 0;
+ }
+ close (fd);
+ }
+
+ dev->numa_node = -1;
+ vec_reset_length (f);
+ f = format (f, "%v/numa_node%c", dev_dir_name, 0);
+ clib_sysfs_read ((char *) f, "%u", &dev->numa_node);
+
+ vec_reset_length (f);
+ f = format (f, "%v/class%c", dev_dir_name, 0);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->device_class = tmp >> 8;
+
+ vec_reset_length (f);
+ f = format (f, "%v/vendor%c", dev_dir_name, 0);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->vendor_id = tmp;
+
+ vec_reset_length (f);
+ f = format (f, "%v/device%c", dev_dir_name, 0);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->device_id = tmp;
+
+ error = init_device (vm, dev, &pdev);
+
+ vec_reset_length (f);
+ f = format (f, "%v/driver%c", dev_dir_name, 0);
+ dev->driver_name = clib_sysfs_link_to_name ((char *) f);
+
+done:
+ vec_free (f);
+ return error;
+}
+
+clib_error_t *
+linux_pci_init (vlib_main_t * vm)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ clib_error_t *error;
+
+ pm->vlib_main = vm;
+
+ if ((error = vlib_call_init_function (vm, unix_input_init)))
+ return error;
+
+ ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32));
+ pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword));
+
+ error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm,
+ /* scan_dirs */ 0);
+
+ /* Complain and continue. might not be root, etc. */
+ if (error)
+ clib_error_report (error);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (linux_pci_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
new file mode 100644
index 00000000..6d3f7c55
--- /dev/null
+++ b/src/vlib/linux/physmem.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.c: Unix physical memory
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+
+#include <vppinfra/linux/syscall.h>
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+#include <vlib/physmem.h>
+#include <vlib/unix/unix.h>
+
+static void *
+unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ uword n_bytes, uword alignment)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ uword lo_offset, hi_offset;
+ uword *to_free = 0;
+
+ if (pr->heap == 0)
+ return 0;
+
+ /* IO memory is always at least cache aligned. */
+ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
+
+ while (1)
+ {
+ mheap_get_aligned (pr->heap, n_bytes,
+ /* align */ alignment,
+ /* align offset */ 0,
+ &lo_offset);
+
+ /* Allocation failed? */
+ if (lo_offset == ~0)
+ break;
+
+ if (pr->flags & VLIB_PHYSMEM_F_FAKE)
+ break;
+
+ /* Make sure allocation does not span DMA physical chunk boundary. */
+ hi_offset = lo_offset + n_bytes - 1;
+
+ if ((lo_offset >> pr->log2_page_size) ==
+ (hi_offset >> pr->log2_page_size))
+ break;
+
+ /* Allocation would span chunk boundary, queue it to be freed as soon as
+ we find suitable chunk. */
+ vec_add1 (to_free, lo_offset);
+ }
+
+ if (to_free != 0)
+ {
+ uword i;
+ for (i = 0; i < vec_len (to_free); i++)
+ mheap_put (pr->heap, to_free[i]);
+ vec_free (to_free);
+ }
+
+ return lo_offset != ~0 ? pr->heap + lo_offset : 0;
+}
+
+static void
+unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ /* Return object to region's heap. */
+ mheap_put (pr->heap, x - pr->heap);
+}
+
+static clib_error_t *
+unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
+ u8 numa_node, u32 flags,
+ vlib_physmem_region_index_t * idx)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ vlib_physmem_region_t *pr;
+ clib_error_t *error = 0;
+ clib_mem_vm_alloc_t alloc = { 0 };
+
+
+ if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0)
+ return clib_error_return (0, "not allowed");
+
+ pool_get (vpm->regions, pr);
+
+ if ((pr - vpm->regions) >= 256)
+ {
+ error = clib_error_return (0, "maximum number of regions reached");
+ goto error;
+ }
+
+ alloc.name = name;
+ alloc.size = size;
+ alloc.numa_node = numa_node;
+ alloc.flags = CLIB_MEM_VM_F_SHARED;
+
+ if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
+ {
+ alloc.flags |= CLIB_MEM_VM_F_HUGETLB;
+ alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC;
+ alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE;
+ }
+ else
+ {
+ alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER;
+ }
+
+ error = clib_mem_vm_ext_alloc (&alloc);
+ if (error)
+ goto error;
+
+ pr->index = pr - vpm->regions;
+ pr->flags = flags;
+ pr->fd = alloc.fd;
+ pr->mem = alloc.addr;
+ pr->log2_page_size = alloc.log2_page_size;
+ pr->n_pages = alloc.n_pages;
+ pr->size = (u64) pr->n_pages << (u64) pr->log2_page_size;
+ pr->page_mask = (1 << pr->log2_page_size) - 1;
+ pr->numa_node = numa_node;
+ pr->name = format (0, "%s", name);
+
+ if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
+ {
+ int i;
+ for (i = 0; i < pr->n_pages; i++)
+ {
+ void *ptr = pr->mem + (i << pr->log2_page_size);
+ int node;
+ move_pages (0, 1, &ptr, 0, &node, 0);
+ if (numa_node != node)
+ {
+ clib_warning ("physmem page for region \'%s\' allocated on the"
+ " wrong numa node (requested %u actual %u)",
+ pr->name, pr->numa_node, node, i);
+ break;
+ }
+ }
+ pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
+ pr->n_pages);
+ }
+
+ if (flags & VLIB_PHYSMEM_F_INIT_MHEAP)
+ {
+ pr->heap = mheap_alloc_with_flags (pr->mem, pr->size,
+ /* Don't want mheap mmap/munmap with IO memory. */
+ MHEAP_FLAG_DISABLE_VM |
+ MHEAP_FLAG_THREAD_SAFE);
+ }
+
+ if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS)
+ {
+ vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size);
+ }
+
+ *idx = pr->index;
+
+ goto done;
+
+error:
+ memset (pr, 0, sizeof (*pr));
+ pool_put (vpm->regions, pr);
+
+done:
+ return error;
+}
+
+static void
+unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+
+ if (pr->fd > 0)
+ close (pr->fd);
+ munmap (pr->mem, pr->size);
+ vec_free (pr->name);
+ pool_put (vpm->regions, pr);
+}
+
+clib_error_t *
+unix_physmem_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+
+ /* Avoid multiple calls. */
+ if (vm->os_physmem_alloc_aligned)
+ return error;
+
+ vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
+ vm->os_physmem_free = unix_physmem_free;
+ vm->os_physmem_region_alloc = unix_physmem_region_alloc;
+ vm->os_physmem_region_free = unix_physmem_region_free;
+
+ return error;
+}
+
+static clib_error_t *
+show_physmem (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ vlib_physmem_region_t *pr;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pr, vpm->regions, (
+ {
+ vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d "
+ "numa-node %u fd %d\n",
+ pr->index, pr->name, (1 << (pr->log2_page_size -10)),
+ pr->n_pages, pr->numa_node, pr->fd);
+ if (pr->heap)
+ vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1);
+ else
+ vlib_cli_output (vm, " no heap\n");
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_physmem_command, static) = {
+ .path = "show physmem",
+ .short_help = "Show physical memory allocation",
+ .function = show_physmem,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.c b/src/vlib/main.c
new file mode 100644
index 00000000..7875f62a
--- /dev/null
+++ b/src/vlib/main.c
@@ -0,0 +1,1816 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: main vector processing loop
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+
+#include <vlib/unix/unix.h>
+#include <vlib/unix/cj.h>
+
+CJ_GLOBAL_LOG_PROTOTYPE;
+
+/* Actually allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
+
+u32 wraps;
+
+always_inline u32
+vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ u32 n_bytes;
+
+ /* Make room for vlib_frame_t plus scalar arguments. */
+ n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes);
+
+ /* Make room for vector arguments.
+ Allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_EXTRA 4
+ n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes;
+
+ /* Magic number is first 32bit number after vector data.
+ Used to make sure that vector data is never overrun. */
+#define VLIB_FRAME_MAGIC (0xabadc0ed)
+ n_bytes += sizeof (u32);
+
+ /* Pad to cache line. */
+ n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES);
+
+ return n_bytes;
+}
+
+always_inline u32 *
+vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node)
+{
+ void *p = f;
+
+ p += vlib_frame_vector_byte_offset (node->scalar_size);
+
+ p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size;
+
+ return p;
+}
+
+static vlib_frame_size_t *
+get_frame_size_info (vlib_node_main_t * nm,
+ u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ uword key = (n_scalar_bytes << 16) | n_vector_bytes;
+ uword *p, i;
+
+ p = hash_get (nm->frame_size_hash, key);
+ if (p)
+ i = p[0];
+ else
+ {
+ i = vec_len (nm->frame_sizes);
+ vec_validate (nm->frame_sizes, i);
+ hash_set (nm->frame_size_hash, key, i);
+ }
+
+ return vec_elt_at_index (nm->frame_sizes, i);
+}
+
+static u32
+vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
+ u32 frame_flags)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_size_t *fs;
+ vlib_node_t *to_node;
+ vlib_frame_t *f;
+ u32 fi, l, n, scalar_size, vector_size;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ scalar_size = to_node->scalar_size;
+ vector_size = to_node->vector_size;
+
+ fs = get_frame_size_info (nm, scalar_size, vector_size);
+ n = vlib_frame_bytes (scalar_size, vector_size);
+ if ((l = vec_len (fs->free_frame_indices)) > 0)
+ {
+ /* Allocate from end of free list. */
+ fi = fs->free_frame_indices[l - 1];
+ f = vlib_get_frame_no_check (vm, fi);
+ _vec_len (fs->free_frame_indices) = l - 1;
+ }
+ else
+ {
+ f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
+ fi = vlib_frame_index_no_check (vm, f);
+ }
+
+ /* Poison frame when debugging. */
+ if (CLIB_DEBUG > 0)
+ memset (f, 0xfe, n);
+
+ /* Insert magic number. */
+ {
+ u32 *magic;
+
+ magic = vlib_frame_find_magic (f, to_node);
+ *magic = VLIB_FRAME_MAGIC;
+ }
+
+ f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
+ f->n_vectors = 0;
+ f->scalar_size = scalar_size;
+ f->vector_size = vector_size;
+
+ fs->n_alloc_frames += 1;
+
+ return fi;
+}
+
+/* Allocate a frame for from FROM_NODE to TO_NODE via TO_NEXT_INDEX.
+ Returns frame index. */
+static u32
+vlib_frame_alloc (vlib_main_t * vm, vlib_node_runtime_t * from_node_runtime,
+ u32 to_next_index)
+{
+ vlib_node_t *from_node;
+
+ from_node = vlib_get_node (vm, from_node_runtime->node_index);
+ ASSERT (to_next_index < vec_len (from_node->next_nodes));
+
+ return vlib_frame_alloc_to_node (vm, from_node->next_nodes[to_next_index],
+ /* frame_flags */ 0);
+}
+
+vlib_frame_t *
+vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index)
+{
+ u32 fi = vlib_frame_alloc_to_node (vm, to_node_index,
+ /* frame_flags */
+ VLIB_FRAME_FREE_AFTER_DISPATCH);
+ return vlib_get_frame (vm, fi);
+}
+
+void
+vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
+{
+ vlib_pending_frame_t *p;
+ vlib_node_t *to_node;
+
+ if (f->n_vectors == 0)
+ return;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+
+ f->flags |= VLIB_FRAME_PENDING;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->node_runtime_index = to_node->runtime_index;
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+}
+
+/* Free given frame. */
+void
+vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *node;
+ vlib_frame_size_t *fs;
+ u32 frame_index;
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ node = vlib_get_node (vm, r->node_index);
+ fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
+
+ frame_index = vlib_frame_index (vm, f);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* No next frames may point to freed frame. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_next_frame_t *nf;
+ vec_foreach (nf, vm->node_main.next_frames)
+ ASSERT (nf->frame_index != frame_index);
+ }
+
+ f->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+
+ vec_add1 (fs->free_frame_indices, frame_index);
+ ASSERT (fs->n_alloc_frames > 0);
+ fs->n_alloc_frames -= 1;
+}
+
+static clib_error_t *
+show_frame_stats (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_size_t *fs;
+
+ vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free");
+ vec_foreach (fs, nm->frame_sizes)
+ {
+ u32 n_alloc = fs->n_alloc_frames;
+ u32 n_free = vec_len (fs->free_frame_indices);
+
+ if (n_alloc + n_free > 0)
+ vlib_cli_output (vm, "%=6d%=12d%=12d",
+ fs - nm->frame_sizes, n_alloc, n_free);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_frame_stats_cli, static) = {
+ .path = "show vlib frame-allocation",
+ .short_help = "Show node dispatch frame statistics",
+ .function = show_frame_stats,
+};
+/* *INDENT-ON* */
+
+/* Change ownership of enqueue rights to given next node. */
+static void
+vlib_next_frame_change_ownership (vlib_main_t * vm,
+ vlib_node_runtime_t * node_runtime,
+ u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *next_frame;
+ vlib_node_t *node, *next_node;
+
+ node = vec_elt (nm->nodes, node_runtime->node_index);
+
+ /* Only internal & input nodes are allowed to call other nodes. */
+ ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL
+ || node->type == VLIB_NODE_TYPE_INPUT
+ || node->type == VLIB_NODE_TYPE_PROCESS);
+
+ ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes);
+
+ next_frame =
+ vlib_node_runtime_get_next_frame (vm, node_runtime, next_index);
+ next_node = vec_elt (nm->nodes, node->next_nodes[next_index]);
+
+ if (next_node->owner_node_index != VLIB_INVALID_NODE_INDEX)
+ {
+ /* Get frame from previous owner. */
+ vlib_next_frame_t *owner_next_frame;
+ vlib_next_frame_t tmp;
+
+ owner_next_frame =
+ vlib_node_get_next_frame (vm,
+ next_node->owner_node_index,
+ next_node->owner_next_index);
+
+ /* Swap target next frame with owner's. */
+ tmp = owner_next_frame[0];
+ owner_next_frame[0] = next_frame[0];
+ next_frame[0] = tmp;
+
+ /*
+ * If next_frame is already pending, we have to track down
+ * all pending frames and fix their next_frame_index fields.
+ */
+ if (next_frame->flags & VLIB_FRAME_PENDING)
+ {
+ vlib_pending_frame_t *p;
+ if (next_frame->frame_index != ~0)
+ {
+ vec_foreach (p, nm->pending_frames)
+ {
+ if (p->frame_index == next_frame->frame_index)
+ {
+ p->next_frame_index =
+ next_frame - vm->node_main.next_frames;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* No previous owner. Take ownership. */
+ next_frame->flags |= VLIB_FRAME_OWNER;
+ }
+
+ /* Record new owner. */
+ next_node->owner_node_index = node->index;
+ next_node->owner_next_index = next_index;
+
+ /* Now we should be owner. */
+ ASSERT (next_frame->flags & VLIB_FRAME_OWNER);
+}
+
+/* Make sure that magic number is still there.
+ Otherwise, it is likely that caller has overrun frame arguments. */
+always_inline void
+validate_frame_magic (vlib_main_t * vm,
+ vlib_frame_t * f, vlib_node_t * n, uword next_index)
+{
+ vlib_node_t *next_node = vlib_get_node (vm, n->next_nodes[next_index]);
+ u32 *magic = vlib_frame_find_magic (f, next_node);
+ ASSERT (VLIB_FRAME_MAGIC == magic[0]);
+}
+
+vlib_frame_t *
+vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index, u32 allocate_new_next_frame)
+{
+ vlib_frame_t *f;
+ vlib_next_frame_t *nf;
+ u32 n_used;
+
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+
+ /* Make sure this next frame owns right to enqueue to destination frame. */
+ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_OWNER)))
+ vlib_next_frame_change_ownership (vm, node, next_index);
+
+ /* ??? Don't need valid flag: can use frame_index == ~0 */
+ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_IS_ALLOCATED)))
+ {
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+ }
+
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Has frame been removed from pending vector (e.g. finished dispatching)?
+ If so we can reuse frame. */
+ if ((nf->flags & VLIB_FRAME_PENDING) && !(f->flags & VLIB_FRAME_PENDING))
+ {
+ nf->flags &= ~VLIB_FRAME_PENDING;
+ f->n_vectors = 0;
+ }
+
+ /* Allocate new frame if current one is already full. */
+ n_used = f->n_vectors;
+ if (n_used >= VLIB_FRAME_SIZE || (allocate_new_next_frame && n_used > 0))
+ {
+ /* Old frame may need to be freed after dispatch, since we'll have
+ two redundant frames from node -> next node. */
+ if (!(nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH))
+ {
+ vlib_frame_t *f_old = vlib_get_frame (vm, nf->frame_index);
+ f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH;
+ }
+
+ /* Allocate new frame to replace full one. */
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+ n_used = f->n_vectors;
+ }
+
+ /* Should have free vectors in frame now. */
+ ASSERT (n_used < VLIB_FRAME_SIZE);
+
+ if (CLIB_DEBUG > 0)
+ {
+ validate_frame_magic (vm, f,
+ vlib_get_node (vm, node->node_index), next_index);
+ }
+
+ return f;
+}
+
+static void
+vlib_put_next_frame_validate (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ u32 next_index, u32 n_vectors_left)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ vlib_node_runtime_t *next_rt;
+ vlib_node_t *next_node;
+ u32 n_before, n_after;
+
+ nf = vlib_node_runtime_get_next_frame (vm, rt, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_after = VLIB_FRAME_SIZE - n_vectors_left;
+ n_before = f->n_vectors;
+
+ ASSERT (n_after >= n_before);
+
+ next_rt = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ nf->node_runtime_index);
+ next_node = vlib_get_node (vm, next_rt->node_index);
+ if (n_after > 0 && next_node->validate_frame)
+ {
+ u8 *msg = next_node->validate_frame (vm, rt, f);
+ if (msg)
+ {
+ clib_warning ("%v", msg);
+ ASSERT (0);
+ }
+ vec_free (msg);
+ }
+}
+
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, u32 n_vectors_left)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ u32 n_vectors_in_frame;
+
+ if (vm->buffer_main->callbacks_registered == 0 && CLIB_DEBUG > 0)
+ vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left);
+
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Make sure that magic number is still there. Otherwise, caller
+ has overrun frame meta data. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *node = vlib_get_node (vm, r->node_index);
+ validate_frame_magic (vm, f, node, next_index);
+ }
+
+ /* Convert # of vectors left -> number of vectors there. */
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_vectors_in_frame = VLIB_FRAME_SIZE - n_vectors_left;
+
+ f->n_vectors = n_vectors_in_frame;
+
+ /* If vectors were added to frame, add to pending vector. */
+ if (PREDICT_TRUE (n_vectors_in_frame > 0))
+ {
+ vlib_pending_frame_t *p;
+ u32 v0, v1;
+
+ r->cached_next_index = next_index;
+
+ if (!(f->flags & VLIB_FRAME_PENDING))
+ {
+ __attribute__ ((unused)) vlib_node_t *node;
+ vlib_node_t *next_node;
+ vlib_node_runtime_t *next_runtime;
+
+ node = vlib_get_node (vm, r->node_index);
+ next_node = vlib_get_next_node (vm, r->node_index, next_index);
+ next_runtime = vlib_node_get_runtime (vm, next_node->index);
+
+ vec_add2 (nm->pending_frames, p, 1);
+
+ p->frame_index = nf->frame_index;
+ p->node_runtime_index = nf->node_runtime_index;
+ p->next_frame_index = nf - nm->next_frames;
+ nf->flags |= VLIB_FRAME_PENDING;
+ f->flags |= VLIB_FRAME_PENDING;
+
+ /*
+ * If we're going to dispatch this frame on another thread,
+ * force allocation of a new frame. Otherwise, we create
+ * a dangling frame reference. Each thread has its own copy of
+ * the next_frames vector.
+ */
+ if (0 && r->thread_index != next_runtime->thread_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
+ }
+ }
+
+ /* Copy trace flag from next_frame and from runtime. */
+ nf->flags |=
+ (nf->flags & VLIB_NODE_FLAG_TRACE) | (r->
+ flags & VLIB_NODE_FLAG_TRACE);
+
+ v0 = nf->vectors_since_last_overflow;
+ v1 = v0 + n_vectors_in_frame;
+ nf->vectors_since_last_overflow = v1;
+ if (PREDICT_FALSE (v1 < v0))
+ {
+ vlib_node_t *node = vlib_get_node (vm, r->node_index);
+ vec_elt (node->n_vectors_by_next_node, next_index) += v0;
+ }
+ }
+}
+
+/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
+never_inline void
+vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_t *n = vlib_get_node (vm, r->node_index);
+
+ n->stats_total.calls += n_calls + r->calls_since_last_overflow;
+ n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
+ n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+ n->stats_total.max_clock = r->max_clock;
+ n->stats_total.max_clock_n = r->max_clock_n;
+
+ r->calls_since_last_overflow = 0;
+ r->vectors_since_last_overflow = 0;
+ r->clocks_since_last_overflow = 0;
+}
+
+always_inline void __attribute__ ((unused))
+vlib_process_sync_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_runtime_t *rt = &p->node_runtime;
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+}
+
+void
+vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
+{
+ vlib_node_runtime_t *rt;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ /* Nothing to do for PROCESS nodes except in main thread */
+ if (vm != &vlib_global_main)
+ return;
+
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+ rt = &p->node_runtime;
+ }
+ else
+ rt =
+ vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
+ n->runtime_index);
+
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+
+ /* Sync up runtime next frame vector counters with main node structure. */
+ {
+ vlib_next_frame_t *nf;
+ uword i;
+ for (i = 0; i < rt->n_next_nodes; i++)
+ {
+ nf = vlib_node_runtime_get_next_frame (vm, rt, i);
+ vec_elt (n->n_vectors_by_next_node, i) +=
+ nf->vectors_since_last_overflow;
+ nf->vectors_since_last_overflow = 0;
+ }
+ }
+}
+
+always_inline u32
+vlib_node_runtime_update_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_calls,
+ uword n_vectors, uword n_clocks)
+{
+ u32 ca0, ca1, v0, v1, cl0, cl1, r;
+
+ cl0 = cl1 = node->clocks_since_last_overflow;
+ ca0 = ca1 = node->calls_since_last_overflow;
+ v0 = v1 = node->vectors_since_last_overflow;
+
+ ca1 = ca0 + n_calls;
+ v1 = v0 + n_vectors;
+ cl1 = cl0 + n_clocks;
+
+ node->calls_since_last_overflow = ca1;
+ node->clocks_since_last_overflow = cl1;
+ node->vectors_since_last_overflow = v1;
+ node->max_clock_n = node->max_clock > n_clocks ?
+ node->max_clock_n : n_vectors;
+ node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
+
+ r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
+
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+ {
+ node->calls_since_last_overflow = ca0;
+ node->clocks_since_last_overflow = cl0;
+ node->vectors_since_last_overflow = v0;
+ vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_update_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_runtime_update_stats (vm, &p->node_runtime,
+ n_calls, n_vectors, n_clocks);
+}
+
+static clib_error_t *
+vlib_cli_elog_clear (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_reset_buffer (&vm->elog_main);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_clear_cli, static) = {
+ .path = "event-logger clear",
+ .short_help = "Clear the event log",
+ .function = vlib_cli_elog_clear,
+};
+/* *INDENT-ON* */
+
+#ifdef CLIB_UNIX
+static clib_error_t *
+elog_save_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+ char *file, *chroot_file;
+ clib_error_t *error = 0;
+
+ if (!unformat (input, "%s", &file))
+ {
+ vlib_cli_output (vm, "expected file name, got `%U'",
+ format_unformat_error, input);
+ return 0;
+ }
+
+ /* It's fairly hard to get "../oopsie" through unformat; just in case */
+ if (strstr (file, "..") || index (file, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'", file);
+ return 0;
+ }
+
+ chroot_file = (char *) format (0, "/tmp/%s%c", file, 0);
+
+ vec_free (file);
+
+ vlib_cli_output (vm, "Saving %wd of %wd events to %s",
+ elog_n_events_in_buffer (em),
+ elog_buffer_capacity (em), chroot_file);
+
+ vlib_worker_thread_barrier_sync (vm);
+ error = elog_write_file (em, chroot_file, 1 /* flush ring */ );
+ vlib_worker_thread_barrier_release (vm);
+ vec_free (chroot_file);
+ return error;
+}
+
+void
+elog_post_mortem_dump (void)
+{
+ vlib_main_t *vm = &vlib_global_main;
+ elog_main_t *em = &vm->elog_main;
+ u8 *filename;
+ clib_error_t *error;
+
+ if (!vm->elog_post_mortem_dump)
+ return;
+
+ filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0);
+ error = elog_write_file (em, (char *) filename, 1 /* flush ring */ );
+ if (error)
+ clib_error_report (error);
+ vec_free (filename);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_save_cli, static) = {
+ .path = "event-logger save",
+ .short_help = "event-logger save <filename> (saves log in /tmp/<filename>)",
+ .function = elog_save_buffer,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_stop (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+
+ em->n_total_events_disable_limit = em->n_total_events;
+
+ vlib_cli_output (vm, "Stopped the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_stop_cli, static) = {
+ .path = "event-logger stop",
+ .short_help = "Stop the event-logger",
+ .function = elog_stop,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_restart (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+
+ em->n_total_events_disable_limit = ~0;
+
+ vlib_cli_output (vm, "Restarted the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_restart_cli, static) = {
+ .path = "event-logger restart",
+ .short_help = "Restart the event-logger",
+ .function = elog_restart,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_resize (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+ u32 tmp;
+
+ /* Stop the parade */
+ elog_reset_buffer (&vm->elog_main);
+
+ if (unformat (input, "%d", &tmp))
+ {
+ elog_alloc (em, tmp);
+ em->n_total_events_disable_limit = ~0;
+ }
+ else
+ return clib_error_return (0, "Must specify how many events in the ring");
+
+ vlib_cli_output (vm, "Resized ring and restarted the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_resize_cli, static) = {
+ .path = "event-logger resize",
+ .short_help = "event-logger resize <nnn>",
+ .function = elog_resize,
+};
+/* *INDENT-ON* */
+
+#endif /* CLIB_UNIX */
+
+static void
+elog_show_buffer_internal (vlib_main_t * vm, u32 n_events_to_show)
+{
+ elog_main_t *em = &vm->elog_main;
+ elog_event_t *e, *es;
+ f64 dt;
+
+ /* Show events in VLIB time since log clock starts after VLIB clock. */
+ dt = (em->init_time.cpu - vm->clib_time.init_cpu_time)
+ * vm->clib_time.seconds_per_clock;
+
+ es = elog_peek_events (em);
+ vlib_cli_output (vm, "%d of %d events in buffer, logger %s", vec_len (es),
+ em->event_ring_size,
+ em->n_total_events < em->n_total_events_disable_limit ?
+ "running" : "stopped");
+ vec_foreach (e, es)
+ {
+ vlib_cli_output (vm, "%18.9f: %U",
+ e->time + dt, format_elog_event, em, e);
+ n_events_to_show--;
+ if (n_events_to_show == 0)
+ break;
+ }
+ vec_free (es);
+
+}
+
+static clib_error_t *
+elog_show_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 n_events_to_show;
+ clib_error_t *error = 0;
+
+ n_events_to_show = 250;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &n_events_to_show))
+ ;
+ else if (unformat (input, "all"))
+ n_events_to_show = ~0;
+ else
+ return unformat_parse_error (input);
+ }
+ elog_show_buffer_internal (vm, n_events_to_show);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_show_cli, static) = {
+ .path = "show event-logger",
+ .short_help = "Show event logger info",
+ .function = elog_show_buffer,
+};
+/* *INDENT-ON* */
+
+void
+vlib_gdb_show_event_log (void)
+{
+ elog_show_buffer_internal (vlib_get_main (), (u32) ~ 0);
+}
+
+static inline void
+vlib_elog_main_loop_event (vlib_main_t * vm,
+ u32 node_index,
+ u64 time, u32 n_vectors, u32 is_return)
+{
+ vlib_main_t *evm = &vlib_global_main;
+ elog_main_t *em = &evm->elog_main;
+
+ if (VLIB_ELOG_MAIN_LOOP && n_vectors)
+ elog_track (em,
+ /* event type */
+ vec_elt_at_index (is_return
+ ? evm->node_return_elog_event_types
+ : evm->node_call_elog_event_types,
+ node_index),
+ /* track */
+ (vm->thread_index ? &vlib_worker_threads[vm->thread_index].
+ elog_track : &em->default_track),
+ /* data to log */ n_vectors);
+}
+
+void
+vlib_dump_context_trace (vlib_main_t * vm, u32 bi)
+{
+ vlib_node_main_t *vnm = &vm->node_main;
+ vlib_buffer_t *b;
+ u8 i, n;
+
+ if (VLIB_BUFFER_TRACE_TRAJECTORY)
+ {
+ b = vlib_get_buffer (vm, bi);
+ n = b->pre_data[0];
+
+ fformat (stderr, "Context trace for bi %d b 0x%llx, visited %d\n",
+ bi, b, n);
+
+ if (n == 0 || n > 20)
+ {
+ fformat (stderr, "n is unreasonable\n");
+ return;
+ }
+
+
+ for (i = 0; i < n; i++)
+ {
+ u32 node_index;
+
+ node_index = b->pre_data[i + 1];
+
+ if (node_index > vec_len (vnm->nodes))
+ {
+ fformat (stderr, "Skip bogus node index %d\n", node_index);
+ continue;
+ }
+
+ fformat (stderr, "%v (%d)\n", vnm->nodes[node_index]->name,
+ node_index);
+ }
+ }
+ else
+ {
+ fformat (stderr,
+ "in vlib/buffers.h, #define VLIB_BUFFER_TRACE_TRAJECTORY 1\n");
+ }
+}
+
+
+static_always_inline u64
+dispatch_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_node_type_t type,
+ vlib_node_state_t dispatch_state,
+ vlib_frame_t * frame, u64 last_time_stamp)
+{
+ uword n, v;
+ u64 t;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ ASSERT (n->type == type);
+ }
+
+ /* Only non-internal nodes may be disabled. */
+ if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state)
+ {
+ ASSERT (type != VLIB_NODE_TYPE_INTERNAL);
+ return last_time_stamp;
+ }
+
+ if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT)
+ && dispatch_state != VLIB_NODE_STATE_INTERRUPT)
+ {
+ u32 c = node->input_main_loops_per_call;
+ /* Only call node when count reaches zero. */
+ if (c)
+ {
+ node->input_main_loops_per_call = c - 1;
+ return last_time_stamp;
+ }
+ }
+
+ /* Speculatively prefetch next frames. */
+ if (node->n_next_nodes > 0)
+ {
+ nf = vec_elt_at_index (nm->next_frames, node->next_frame_index);
+ CLIB_PREFETCH (nf, 4 * sizeof (nf[0]), WRITE);
+ }
+
+ vm->cpu_time_last_node_dispatch = last_time_stamp;
+
+ if (1 /* || vm->thread_index == node->thread_index */ )
+ {
+ vlib_main_t *stat_vm;
+
+ stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+
+ vlib_elog_main_loop_event (vm, node->node_index,
+ last_time_stamp,
+ frame ? frame->n_vectors : 0,
+ /* is_after */ 0);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See ixge.c...
+ */
+ if (VLIB_BUFFER_TRACE_TRAJECTORY && frame)
+ {
+ int i;
+ int log_index;
+ u32 *from;
+ from = vlib_frame_vector_args (frame);
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
+ ASSERT (b->pre_data[0] < 32);
+ log_index = b->pre_data[0]++ + 1;
+ b->pre_data[log_index] = node->node_index;
+ }
+ n = node->function (vm, node, frame);
+ }
+ else
+ n = node->function (vm, node, frame);
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */
+ 1);
+
+ vm->main_loop_vectors_processed += n;
+ vm->main_loop_nodes_processed += n > 0;
+
+ v = vlib_node_runtime_update_stats (stat_vm, node,
+ /* n_calls */ 1,
+ /* n_vectors */ n,
+ /* n_clocks */ t - last_time_stamp);
+
+ /* When in interrupt mode and vector rate crosses threshold switch to
+ polling mode. */
+ if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT)
+ || (dispatch_state == VLIB_NODE_STATE_POLLING
+ && (node->flags
+ & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
+ {
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "%s vector length %d, switching to %s",.format_args =
+ "T4i4t4",.n_enum_strings = 2,.enum_strings =
+ {
+ "interrupt", "polling",},};
+ struct
+ {
+ u32 node_name, vector_length, is_polling;
+ } *ed;
+ vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
+#endif
+
+ if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
+ && v >= nm->polling_threshold_vector_length) &&
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ n->state = VLIB_NODE_STATE_POLLING;
+ node->state = VLIB_NODE_STATE_POLLING;
+ node->flags &=
+ ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+ node->flags |=
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1;
+
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 1;
+#endif
+ }
+ else if (dispatch_state == VLIB_NODE_STATE_POLLING
+ && v <= nm->interrupt_threshold_vector_length)
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ if (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
+ {
+ /* Switch to interrupt mode after dispatch in polling one more time.
+ This allows driver to re-enable interrupts. */
+ n->state = VLIB_NODE_STATE_INTERRUPT;
+ node->state = VLIB_NODE_STATE_INTERRUPT;
+ node->flags &=
+ ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -=
+ 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] +=
+ 1;
+
+ }
+ else
+ {
+ node->flags |=
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 0;
+#endif
+ }
+ }
+ }
+ }
+
+ return t;
+}
+
+static u64
+dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
+ u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_t *f;
+ vlib_next_frame_t *nf, nf_dummy;
+ vlib_node_runtime_t *n;
+ u32 restore_frame_index;
+ vlib_pending_frame_t *p;
+
+ /* See comment below about dangling references to nm->pending_frames */
+ p = nm->pending_frames + pending_frame_index;
+
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ p->node_runtime_index);
+
+ f = vlib_get_frame (vm, p->frame_index);
+ if (p->next_frame_index == VLIB_PENDING_FRAME_NO_NEXT_FRAME)
+ {
+ /* No next frame: so use dummy on stack. */
+ nf = &nf_dummy;
+ nf->flags = f->flags & VLIB_NODE_FLAG_TRACE;
+ nf->frame_index = ~p->frame_index;
+ }
+ else
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* Force allocation of new frame while current frame is being
+ dispatched. */
+ restore_frame_index = ~0;
+ if (nf->frame_index == p->frame_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+ if (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH))
+ restore_frame_index = p->frame_index;
+ }
+
+ /* Frame must be pending. */
+ ASSERT (f->flags & VLIB_FRAME_PENDING);
+ ASSERT (f->n_vectors > 0);
+
+ /* Copy trace flag from next frame to node.
+ Trace flag indicates that at least one vector in the dispatched
+ frame is traced. */
+ n->flags &= ~VLIB_NODE_FLAG_TRACE;
+ n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0;
+ nf->flags &= ~VLIB_FRAME_TRACE;
+
+ last_time_stamp = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INTERNAL,
+ VLIB_NODE_STATE_POLLING,
+ f, last_time_stamp);
+
+ f->flags &= ~VLIB_FRAME_PENDING;
+
+ /* Frame is ready to be used again, so restore it. */
+ if (restore_frame_index != ~0)
+ {
+ /*
+ * We musn't restore a frame that is flagged to be freed. This
+ * shouldn't happen since frames to be freed post dispatch are
+ * those used when the to-node frame becomes full i.e. they form a
+ * sort of queue of frames to a single node. If we get here then
+ * the to-node frame and the pending frame *were* the same, and so
+ * we removed the to-node frame. Therefore this frame is no
+ * longer part of the queue for that node and hence it cannot be
+ * it's overspill.
+ */
+ ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
+
+ /*
+ * NB: dispatching node n can result in the creation and scheduling
+ * of new frames, and hence in the reallocation of nm->pending_frames.
+ * Recompute p, or no supper. This was broken for more than 10 years.
+ */
+ p = nm->pending_frames + pending_frame_index;
+
+ /*
+ * p->next_frame_index can change during node dispatch if node
+ * function decides to change graph hook up.
+ */
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+
+ if (~0 == nf->frame_index)
+ {
+ /* no new frame has been assigned to this node, use the saved one */
+ nf->frame_index = restore_frame_index;
+ f->n_vectors = 0;
+ }
+ else
+ {
+ /* The node has gained a frame, implying packets from the current frame
+ were re-queued to this same node. we don't need the saved one
+ anymore */
+ vlib_frame_free (vm, n, f);
+ }
+ }
+ else
+ {
+ if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
+ {
+ ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
+ vlib_frame_free (vm, n, f);
+ }
+ }
+
+ return last_time_stamp;
+}
+
+always_inline uword
+vlib_process_stack_is_valid (vlib_process_t * p)
+{
+ return p->stack[0] == VLIB_PROCESS_STACK_MAGIC;
+}
+
+typedef struct
+{
+ vlib_main_t *vm;
+ vlib_process_t *process;
+ vlib_frame_t *frame;
+} vlib_process_bootstrap_args_t;
+
+/* Called in process stack. */
+static uword
+vlib_process_bootstrap (uword _a)
+{
+ vlib_process_bootstrap_args_t *a;
+ vlib_main_t *vm;
+ vlib_node_runtime_t *node;
+ vlib_frame_t *f;
+ vlib_process_t *p;
+ uword n;
+
+ a = uword_to_pointer (_a, vlib_process_bootstrap_args_t *);
+
+ vm = a->vm;
+ p = a->process;
+ f = a->frame;
+ node = &p->node_runtime;
+
+ n = node->function (vm, node, f);
+
+ ASSERT (vlib_process_stack_is_valid (p));
+
+ clib_longjmp (&p->return_longjmp, n);
+
+ return n;
+}
+
+/* Called in main stack. */
+static_always_inline uword
+vlib_process_startup (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f)
+{
+ vlib_process_bootstrap_args_t a;
+ uword r;
+
+ a.vm = vm;
+ a.process = p;
+ a.frame = f;
+
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a),
+ (void *) p->stack + (1 << p->log2_n_stack_bytes));
+
+ return r;
+}
+
+static_always_inline uword
+vlib_process_resume (vlib_process_t * p)
+{
+ uword r;
+ p->flags &= ~(VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_RESUME_PENDING);
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ clib_longjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_RESUME);
+ return r;
+}
+
+static u64
+dispatch_process (vlib_main_t * vm,
+ vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *node_runtime = &p->node_runtime;
+ vlib_node_t *node = vlib_get_node (vm, node_runtime->node_index);
+ u64 t;
+ uword n_vectors, is_suspend;
+
+ if (node->state != VLIB_NODE_STATE_POLLING
+ || (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)))
+ return last_time_stamp;
+
+ p->flags |= VLIB_PROCESS_IS_RUNNING;
+
+ t = last_time_stamp;
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t,
+ f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_startup (vm, p, f);
+
+ nm->current_process_index = ~0;
+
+ ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ vlib_pending_frame_t *pf;
+
+ n_vectors = 0;
+ pool_get (nm->suspended_process_frames, pf);
+ pf->node_runtime_index = node->runtime_index;
+ pf->frame_index = f ? vlib_frame_index (vm, f) : ~0;
+ pf->next_frame_index = ~0;
+
+ p->n_suspends += 1;
+ p->suspended_process_frame_index = pf - nm->suspended_process_frames;
+
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ {
+ TWT (tw_timer_wheel) * tw =
+ (TWT (tw_timer_wheel) *) nm->timing_wheel;
+ p->stop_timer_handle =
+ TW (tw_timer_start) (tw,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index) /* [sic] pool idex */ ,
+ 0 /* timer_id */ ,
+ p->resume_clock_interval);
+ }
+ }
+ else
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend,
+ /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ !is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+void
+vlib_start_process (vlib_main_t * vm, uword process_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p = vec_elt (nm->processes, process_index);
+ dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0);
+}
+
+static u64
+dispatch_suspended_process (vlib_main_t * vm,
+ uword process_index, u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *node_runtime;
+ vlib_node_t *node;
+ vlib_frame_t *f;
+ vlib_process_t *p;
+ vlib_pending_frame_t *pf;
+ u64 t, n_vectors, is_suspend;
+
+ t = last_time_stamp;
+
+ p = vec_elt (nm->processes, process_index);
+ if (PREDICT_FALSE (!(p->flags & VLIB_PROCESS_IS_RUNNING)))
+ return last_time_stamp;
+
+ ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT));
+
+ pf =
+ pool_elt_at_index (nm->suspended_process_frames,
+ p->suspended_process_frame_index);
+
+ node_runtime = &p->node_runtime;
+ node = vlib_get_node (vm, node_runtime->node_index);
+ f = pf->frame_index != ~0 ? vlib_get_frame (vm, pf->frame_index) : 0;
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t,
+ f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_resume (p);
+ t = clib_cpu_time_now ();
+
+ nm->current_process_index = ~0;
+
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ /* Suspend it again. */
+ n_vectors = 0;
+ p->n_suspends += 1;
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ {
+ p->stop_timer_handle =
+ TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index) /* [sic] pool idex */ ,
+ 0 /* timer_id */ ,
+ p->resume_clock_interval);
+ }
+ }
+ else
+ {
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+ p->suspended_process_frame_index = ~0;
+ pool_put (nm->suspended_process_frames, pf);
+ }
+
+ t = clib_cpu_time_now ();
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend,
+ /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ !is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+static_always_inline void
+vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ uword i;
+ u64 cpu_time_now;
+ vlib_frame_queue_main_t *fqm;
+ u32 *last_node_runtime_indices = 0;
+
+ /* Initialize pending node vector. */
+ if (is_main)
+ {
+ vec_resize (nm->pending_frames, 32);
+ _vec_len (nm->pending_frames) = 0;
+ }
+
+ /* Mark time of main loop start. */
+ if (is_main)
+ {
+ cpu_time_now = vm->clib_time.last_cpu_time;
+ vm->cpu_time_main_loop_start = cpu_time_now;
+ }
+ else
+ cpu_time_now = clib_cpu_time_now ();
+
+ /* Pre-allocate interupt runtime indices and lock. */
+ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+ vec_alloc (last_node_runtime_indices, 32);
+ if (!is_main)
+ clib_spinlock_init (&nm->pending_interrupt_lock);
+
+ /* Pre-allocate expired nodes. */
+ if (!nm->polling_threshold_vector_length)
+ nm->polling_threshold_vector_length = 10;
+ if (!nm->interrupt_threshold_vector_length)
+ nm->interrupt_threshold_vector_length = 5;
+
+ /* Start all processes. */
+ if (is_main)
+ {
+ uword i;
+ nm->current_process_index = ~0;
+ for (i = 0; i < vec_len (nm->processes); i++)
+ cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
+ cpu_time_now);
+ }
+
+ while (1)
+ {
+ vlib_node_runtime_t *n;
+
+ if (!is_main)
+ {
+ vlib_worker_thread_barrier_check ();
+ vec_foreach (fqm, tm->frame_queue_mains)
+ vlib_frame_queue_dequeue (vm, fqm);
+ }
+
+ /* Process pre-input nodes. */
+ if (is_main)
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ /* Next process input nodes. */
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
+ vm->queue_signal_callback (vm);
+
+ /* Next handle interrupts. */
+ {
+ uword l = _vec_len (nm->pending_interrupt_node_runtime_indices);
+ uword i;
+ if (l > 0)
+ {
+ u32 *tmp;
+ if (!is_main)
+ clib_spinlock_lock (&nm->pending_interrupt_lock);
+ tmp = nm->pending_interrupt_node_runtime_indices;
+ nm->pending_interrupt_node_runtime_indices =
+ last_node_runtime_indices;
+ last_node_runtime_indices = tmp;
+ _vec_len (last_node_runtime_indices) = 0;
+ if (!is_main)
+ clib_spinlock_unlock (&nm->pending_interrupt_lock);
+ for (i = 0; i < l; i++)
+ {
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+ last_node_runtime_indices[i]);
+ cpu_time_now =
+ dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_INTERRUPT,
+ /* frame */ 0,
+ cpu_time_now);
+ }
+ }
+ }
+
+ if (is_main)
+ {
+ /* Check if process nodes have expired from timing wheel. */
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
+ nm->data_from_advancing_timing_wheel =
+ TW (tw_timer_expire_timers_vec)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm),
+ nm->data_from_advancing_timing_wheel);
+
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
+ if (PREDICT_FALSE
+ (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
+ {
+ uword i;
+
+ processes_timing_wheel_data:
+ for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
+ i++)
+ {
+ u32 d = nm->data_from_advancing_timing_wheel[i];
+ u32 di = vlib_timing_wheel_data_get_index (d);
+
+ if (vlib_timing_wheel_data_is_timed_event (d))
+ {
+ vlib_signal_timed_event_data_t *te =
+ pool_elt_at_index (nm->signal_timed_event_data_pool,
+ di);
+ vlib_node_t *n =
+ vlib_get_node (vm, te->process_node_index);
+ vlib_process_t *p =
+ vec_elt (nm->processes, n->runtime_index);
+ void *data;
+ data =
+ vlib_process_signal_event_helper (nm, n, p,
+ te->event_type_index,
+ te->n_data_elts,
+ te->n_data_elt_bytes);
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ clib_memcpy (data, te->inline_event_data,
+ te->n_data_bytes);
+ else
+ {
+ clib_memcpy (data, te->event_data_as_vector,
+ te->n_data_bytes);
+ vec_free (te->event_data_as_vector);
+ }
+ pool_put (nm->signal_timed_event_data_pool, te);
+ }
+ else
+ {
+ cpu_time_now = clib_cpu_time_now ();
+ cpu_time_now =
+ dispatch_suspended_process (vm, di, cpu_time_now);
+ }
+ }
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ }
+ }
+
+ /* Input nodes may have added work to the pending vector.
+ Process pending vector until there is nothing left.
+ All pending vectors will be processed from input -> output. */
+ for (i = 0; i < _vec_len (nm->pending_frames); i++)
+ cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now);
+ /* Reset pending vector for next iteration. */
+ _vec_len (nm->pending_frames) = 0;
+
+ /* Pending internal nodes may resume processes. */
+ if (is_main && _vec_len (nm->data_from_advancing_timing_wheel) > 0)
+ goto processes_timing_wheel_data;
+
+ vlib_increment_main_loop_counter (vm);
+
+ /* Record time stamp in case there are no enabled nodes and above
+ calls do not update time stamp. */
+ cpu_time_now = clib_cpu_time_now ();
+ }
+}
+
+static void
+vlib_main_loop (vlib_main_t * vm)
+{
+ vlib_main_or_worker_loop (vm, /* is_main */ 1);
+}
+
+void
+vlib_worker_loop (vlib_main_t * vm)
+{
+ vlib_main_or_worker_loop (vm, /* is_main */ 0);
+}
+
+vlib_main_t vlib_global_main;
+
+static clib_error_t *
+vlib_main_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ int turn_on_mem_trace = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "memory-trace"))
+ turn_on_mem_trace = 1;
+
+ else if (unformat (input, "elog-events %d",
+ &vm->elog_main.event_ring_size))
+ ;
+ else if (unformat (input, "elog-post-mortem-dump"))
+ vm->elog_post_mortem_dump = 1;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+
+ /* Enable memory trace as early as possible. */
+ if (turn_on_mem_trace)
+ clib_mem_trace (1);
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_main_configure, "vlib");
+
+static void
+dummy_queue_signal_callback (vlib_main_t * vm)
+{
+}
+
+/* Main function. */
+int
+vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
+{
+ clib_error_t *volatile error;
+ vlib_node_main_t *nm = &vm->node_main;
+
+ vm->queue_signal_callback = dummy_queue_signal_callback;
+
+ clib_time_init (&vm->clib_time);
+
+ /* Turn on event log. */
+ if (!vm->elog_main.event_ring_size)
+ vm->elog_main.event_ring_size = 128 << 10;
+ elog_init (&vm->elog_main, vm->elog_main.event_ring_size);
+ elog_enable_disable (&vm->elog_main, 1);
+
+ /* Default name. */
+ if (!vm->name)
+ vm->name = "VLIB";
+
+ if ((error = unix_physmem_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ if ((error = vlib_buffer_main_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ if ((error = vlib_thread_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ /* Register static nodes so that init functions may use them. */
+ vlib_register_all_static_nodes (vm);
+
+ /* Set seed for random number generator.
+ Allow user to specify seed to make random sequence deterministic. */
+ if (!unformat (input, "seed %wd", &vm->random_seed))
+ vm->random_seed = clib_cpu_time_now ();
+ clib_random_buffer_init (&vm->random_buffer, vm->random_seed);
+
+ /* Initialize node graph. */
+ if ((error = vlib_node_main_init (vm)))
+ {
+ /* Arrange for graph hook up error to not be fatal when debugging. */
+ if (CLIB_DEBUG > 0)
+ clib_error_report (error);
+ else
+ goto done;
+ }
+
+ /* See unix/main.c; most likely already set up */
+ if (vm->init_functions_called == 0)
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ if ((error = vlib_call_all_init_functions (vm)))
+ goto done;
+
+ /* Create default buffer free list. */
+ vlib_buffer_get_or_create_free_list (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ "default");
+
+ nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)),
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_validate (nm->data_from_advancing_timing_wheel, 10);
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+
+ /* Create the process timing wheel */
+ TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ 0 /* no callback */ ,
+ 10e-6 /* timer period 10us */ ,
+ ~0 /* max expirations per call */ );
+
+ switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
+ {
+ case VLIB_MAIN_LOOP_EXIT_NONE:
+ vm->main_loop_exit_set = 1;
+ break;
+
+ case VLIB_MAIN_LOOP_EXIT_CLI:
+ goto done;
+
+ default:
+ error = vm->main_loop_error;
+ goto done;
+ }
+
+ if ((error = vlib_call_all_config_functions (vm, input, 0 /* is_early */ )))
+ goto done;
+
+ /* Call all main loop enter functions. */
+ {
+ clib_error_t *sub_error;
+ sub_error = vlib_call_all_main_loop_enter_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ vlib_main_loop (vm);
+
+done:
+ /* Call all exit functions. */
+ {
+ clib_error_t *sub_error;
+ sub_error = vlib_call_all_main_loop_exit_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.h b/src/vlib/main.h
new file mode 100644
index 00000000..4288d6f0
--- /dev/null
+++ b/src/vlib/main.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.h: VLIB main data structure
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_main_h
+#define included_vlib_main_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/random_buffer.h>
+#include <vppinfra/time.h>
+
+#include <pthread.h>
+
+
+/* By default turn off node/error event logging.
+ Override with -DVLIB_ELOG_MAIN_LOOP */
+#ifndef VLIB_ELOG_MAIN_LOOP
+#define VLIB_ELOG_MAIN_LOOP 0
+#endif
+
+typedef struct vlib_main_t
+{
+ /* Instruction level timing state. */
+ clib_time_t clib_time;
+
+ /* Time stamp of last node dispatch. */
+ u64 cpu_time_last_node_dispatch;
+
+ /* Time stamp when main loop was entered (time 0). */
+ u64 cpu_time_main_loop_start;
+
+ /* Incremented once for each main loop. */
+ u32 main_loop_count;
+
+ /* Count of vectors processed this main loop. */
+ u32 main_loop_vectors_processed;
+ u32 main_loop_nodes_processed;
+
+ /* Circular buffer of input node vector counts.
+ Indexed by low bits of
+ (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */
+ u32 vector_counts_per_main_loop[2];
+ u32 node_counts_per_main_loop[2];
+
+ /* Every so often we switch to the next counter. */
+#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
+
+ /* Jump target to exit main loop with given code. */
+ u32 main_loop_exit_set;
+ /* Set e.g. in the SIGTERM signal handler, checked in a safe place... */
+ volatile u32 main_loop_exit_now;
+ clib_longjmp_t main_loop_exit;
+#define VLIB_MAIN_LOOP_EXIT_NONE 0
+#define VLIB_MAIN_LOOP_EXIT_PANIC 1
+ /* Exit via CLI. */
+#define VLIB_MAIN_LOOP_EXIT_CLI 2
+
+ /* Error marker to use when exiting main loop. */
+ clib_error_t *main_loop_error;
+
+ /* Name for e.g. syslog. */
+ char *name;
+
+ /* Start and size of CLIB heap. */
+ void *heap_base;
+ uword heap_size;
+
+ vlib_buffer_main_t *buffer_main;
+
+ vlib_physmem_main_t physmem_main;
+
+ /* Allocate/free buffer memory for DMA transfers, descriptor rings, etc.
+ buffer memory is guaranteed to be cache-aligned. */
+
+ clib_error_t *(*os_physmem_region_alloc) (struct vlib_main_t * vm,
+ char *name, u32 size,
+ u8 numa_node, u32 flags,
+ vlib_physmem_region_index_t *
+ idx);
+
+ void (*os_physmem_region_free) (struct vlib_main_t * vm,
+ vlib_physmem_region_index_t idx);
+
+ void *(*os_physmem_alloc_aligned) (struct vlib_main_t * vm,
+ vlib_physmem_region_index_t idx,
+ uword n_bytes, uword alignment);
+ void (*os_physmem_free) (struct vlib_main_t * vm,
+ vlib_physmem_region_index_t idx, void *x);
+
+ /* Node graph main structure. */
+ vlib_node_main_t node_main;
+
+ /* Command line interface. */
+ vlib_cli_main_t cli_main;
+
+ /* Packet trace buffer. */
+ vlib_trace_main_t trace_main;
+
+ /* Error handling. */
+ vlib_error_main_t error_main;
+
+ /* Punt packets to underlying operating system for when fast switching
+ code does not know what to do. */
+ void (*os_punt_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+
+ /* Multicast distribution. Set to zero for MC disabled. */
+ mc_main_t *mc_main;
+
+ /* Stream index to use for distribution when MC is enabled. */
+ u32 mc_stream_index;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_mc_stream_join;
+
+ /* Event logger. */
+ elog_main_t elog_main;
+
+ /* Node call and return event types. */
+ elog_event_type_t *node_call_elog_event_types;
+ elog_event_type_t *node_return_elog_event_types;
+
+ elog_event_type_t *error_elog_event_types;
+
+ /* Seed for random number generator. */
+ uword random_seed;
+
+ /* Buffer of random data for various uses. */
+ clib_random_buffer_t random_buffer;
+
+ /* Hash table to record which init functions have been called. */
+ uword *init_functions_called;
+
+ /* to compare with node runtime */
+ u32 thread_index;
+
+ void **mbuf_alloc_list;
+
+ /* List of init functions to call, setup by constructors */
+ _vlib_init_function_list_elt_t *init_function_registrations;
+ _vlib_init_function_list_elt_t *worker_init_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_enter_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_exit_function_registrations;
+ _vlib_init_function_list_elt_t *api_init_function_registrations;
+ vlib_config_function_runtime_t *config_function_registrations;
+ mc_serialize_msg_t *mc_msg_registrations; /* mc_main is a pointer... */
+
+ /* control-plane API queue signal pending, length indication */
+ volatile u32 queue_signal_pending;
+ volatile u32 api_queue_nonempty;
+ void (*queue_signal_callback) (struct vlib_main_t *);
+ u8 **argv;
+
+ /* debugging */
+ volatile int parked_at_barrier;
+
+ /* Attempt to do a post-mortem elog dump */
+ int elog_post_mortem_dump;
+
+ /*
+ * Need to call vlib_worker_thread_node_runtime_update before
+ * releasing worker thread barrier. Only valid in vlib_global_main.
+ */
+ int need_vlib_worker_thread_node_runtime_update;
+
+ /*
+ * Barrier epoch - Set to current time, each time barrier_sync or
+ * barrier_release is called with zero recursion.
+ */
+ f64 barrier_epoch;
+
+ /* Earliest barrier can be closed again */
+ f64 barrier_no_close_before;
+
+} vlib_main_t;
+
+/* Global main structure. */
+extern vlib_main_t vlib_global_main;
+
+void vlib_worker_loop (vlib_main_t * vm);
+
+always_inline f64
+vlib_time_now (vlib_main_t * vm)
+{
+ return clib_time_now (&vm->clib_time);
+}
+
+always_inline f64
+vlib_time_now_ticks (vlib_main_t * vm, u64 n)
+{
+ return clib_time_now_internal (&vm->clib_time, n);
+}
+
+/* Busy wait for specified time. */
+always_inline void
+vlib_time_wait (vlib_main_t * vm, f64 wait)
+{
+ f64 t = vlib_time_now (vm);
+ f64 limit = t + wait;
+ while (t < limit)
+ t = vlib_time_now (vm);
+}
+
+/* Time a piece of code. */
+#define vlib_time_code(vm,body) \
+do { \
+ f64 _t[2]; \
+ _t[0] = vlib_time_now (vm); \
+ do { body; } while (0); \
+ _t[1] = vlib_time_now (vm); \
+ clib_warning ("%.7e", _t[1] - _t[0]); \
+} while (0)
+
+#define vlib_wait_with_timeout(vm,suspend_time,timeout_time,test) \
+({ \
+ uword __vlib_wait_with_timeout = 0; \
+ f64 __vlib_wait_time = 0; \
+ while (! (__vlib_wait_with_timeout = (test)) \
+ && __vlib_wait_time < (timeout_time)) \
+ { \
+ vlib_process_suspend (vm, suspend_time); \
+ __vlib_wait_time += suspend_time; \
+ } \
+ __vlib_wait_with_timeout; \
+})
+
+always_inline void
+vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error)
+{
+ vm->main_loop_error = error;
+ clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC);
+}
+
+#define vlib_panic_with_msg(vm,args...) \
+ vlib_panic_with_error (vm, clib_error_return (0, args))
+
+always_inline void
+vlib_panic (vlib_main_t * vm)
+{
+ vlib_panic_with_error (vm, 0);
+}
+
+always_inline u32
+vlib_vector_input_stats_index (vlib_main_t * vm, word delta)
+{
+ u32 i;
+ i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+ ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop)));
+ return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1);
+}
+
+/* Estimate input rate based on previous
+ 2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE
+ samples. */
+always_inline u32
+vlib_last_vectors_per_main_loop (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 n = vm->vector_counts_per_main_loop[i];
+ return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+/* Total ave vector count per iteration of main loop. */
+always_inline f64
+vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+/* Total ave vectors/node count per iteration of main loop. */
+always_inline f64
+vlib_last_vector_length_per_node (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ u32 n = vm->node_counts_per_main_loop[i];
+ return n == 0 ? 0 : (f64) v / (f64) n;
+}
+
+extern u32 wraps;
+
+always_inline void
+vlib_increment_main_loop_counter (vlib_main_t * vm)
+{
+ u32 i, c, n, v, is_wrap;
+
+ c = vm->main_loop_count++;
+
+ is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0;
+
+ if (is_wrap)
+ wraps++;
+
+ i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap);
+
+ v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i];
+ n = is_wrap ? 0 : vm->node_counts_per_main_loop[i];
+
+ v += vm->main_loop_vectors_processed;
+ n += vm->main_loop_nodes_processed;
+ vm->main_loop_vectors_processed = 0;
+ vm->main_loop_nodes_processed = 0;
+ vm->vector_counts_per_main_loop[i] = v;
+ vm->node_counts_per_main_loop[i] = n;
+
+ if (PREDICT_FALSE (vm->main_loop_exit_now))
+ clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
+}
+
+always_inline void vlib_set_queue_signal_callback
+ (vlib_main_t * vm, void (*fp) (vlib_main_t *))
+{
+ vm->queue_signal_callback = fp;
+}
+
+/* Main routine. */
+int vlib_main (vlib_main_t * vm, unformat_input_t * input);
+
+/* Thread stacks, for os_get_thread_index */
+extern u8 **vlib_thread_stacks;
+
+/* Number of thread stacks that the application needs */
+u32 vlib_app_num_thread_stacks_needed (void) __attribute__ ((weak));
+
+extern void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n);
+
+#endif /* included_vlib_main_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/mc.c b/src/vlib/mc.c
new file mode 100644
index 00000000..8fde0913
--- /dev/null
+++ b/src/vlib/mc.c
@@ -0,0 +1,2609 @@
+/*
+ * mc.c: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+/*
+ * 1 to enable msg id training wheels, which are useful for tracking
+ * down catchup and/or partitioned network problems
+ */
+#define MSG_ID_DEBUG 0
+
+static format_function_t format_mc_stream_state;
+
+static u32
+elog_id_for_peer_id (mc_main_t * m, u64 peer_id)
+{
+ uword *p, r;
+ mhash_t *h = &m->elog_id_by_peer_id;
+
+ if (!m->elog_id_by_peer_id.hash)
+ mhash_init (h, sizeof (uword), sizeof (mc_peer_id_t));
+
+ p = mhash_get (h, &peer_id);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%U", m->transport.format_peer_id, peer_id);
+ mhash_set (h, &peer_id, r, /* old_value */ 0);
+ return r;
+}
+
+static u32
+elog_id_for_msg_name (mc_main_t * m, char *msg_name)
+{
+ uword *p, r;
+ uword *h = m->elog_id_by_msg_name;
+ u8 *name_copy;
+
+ if (!h)
+ h = m->elog_id_by_msg_name = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+ m->elog_id_by_msg_name = h;
+
+ return r;
+}
+
+static void
+elog_tx_msg (mc_main_t * m, u32 stream_id, u32 local_sequence,
+ u32 retry_count)
+{
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-msg: stream %d local seq %d attempt %d",
+ .format_args = "i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_id, local_sequence, retry_count;
+ } *ed;
+ ed = ELOG_DATA (m->elog_main, e);
+ ed->stream_id = stream_id;
+ ed->local_sequence = local_sequence;
+ ed->retry_count = retry_count;
+ }
+}
+
+/*
+ * seq_cmp
+ * correctly compare two unsigned sequence numbers.
+ * This function works so long as x and y are within 2**(n-1) of each
+ * other, where n = bits(x, y).
+ *
+ * Magic decoder ring:
+ * seq_cmp == 0 => x and y are equal
+ * seq_cmp < 0 => x is "in the past" with respect to y
+ * seq_cmp > 0 => x is "in the future" with respect to y
+ */
+always_inline i32
+mc_seq_cmp (u32 x, u32 y)
+{
+ return (i32) x - (i32) y;
+}
+
+void *
+mc_get_vlib_buffer (vlib_main_t * vm, u32 n_bytes, u32 * bi_return)
+{
+ u32 n_alloc, bi;
+ vlib_buffer_t *b;
+
+ n_alloc = vlib_buffer_alloc (vm, &bi, 1);
+ ASSERT (n_alloc == 1);
+
+ b = vlib_get_buffer (vm, bi);
+ b->current_length = n_bytes;
+ *bi_return = bi;
+ return (void *) b->data;
+}
+
+static void
+delete_peer_with_index (mc_main_t * mcm, mc_stream_t * s,
+ uword index, int notify_application)
+{
+ mc_stream_peer_t *p = pool_elt_at_index (s->peers, index);
+ ASSERT (p != 0);
+ if (s->config.peer_died && notify_application)
+ s->config.peer_died (mcm, s, p->id);
+
+ s->all_peer_bitmap = clib_bitmap_andnoti (s->all_peer_bitmap, p - s->peers);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "delete peer %s from all_peer_bitmap",
+ .format_args = "T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer;
+ } *ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ }
+ /* Do not delete the pool / hash table entries, or we lose sequence number state */
+}
+
+static mc_stream_peer_t *
+get_or_create_peer_with_id (mc_main_t * mcm,
+ mc_stream_t * s, mc_peer_id_t id, int *created)
+{
+ uword *q = mhash_get (&s->peer_index_by_id, &id);
+ mc_stream_peer_t *p;
+
+ if (q)
+ {
+ p = pool_elt_at_index (s->peers, q[0]);
+ goto done;
+ }
+
+ pool_get (s->peers, p);
+ memset (p, 0, sizeof (p[0]));
+ p->id = id;
+ p->last_sequence_received = ~0;
+ mhash_set (&s->peer_index_by_id, &id, p - s->peers, /* old_value */ 0);
+ if (created)
+ *created = 1;
+
+done:
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "get_or_create %s peer %s stream %d seq %d",
+ .format_args = "t4T4i4i4",
+ .n_enum_strings = 2,
+ .enum_strings = {
+ "old", "new",
+ },
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 is_new, peer, stream_index, rx_sequence;
+ } *ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->is_new = q ? 0 : 1;
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->stream_index = s->index;
+ ed->rx_sequence = p->last_sequence_received;
+ }
+ /* $$$$ Enable or reenable this peer */
+ s->all_peer_bitmap = clib_bitmap_ori (s->all_peer_bitmap, p - s->peers);
+ return p;
+}
+
+static void
+maybe_send_window_open_event (vlib_main_t * vm, mc_stream_t * stream)
+{
+ vlib_one_time_waiting_process_t *p;
+
+ if (pool_elts (stream->retry_pool) >= stream->config.window_size)
+ return;
+
+ vec_foreach (p, stream->procs_waiting_for_open_window)
+ vlib_signal_one_time_waiting_process (vm, p);
+
+ if (stream->procs_waiting_for_open_window)
+ _vec_len (stream->procs_waiting_for_open_window) = 0;
+}
+
+static void
+mc_retry_free (mc_main_t * mcm, mc_stream_t * s, mc_retry_t * r)
+{
+ mc_retry_t record, *retp;
+
+ if (r->unacked_by_peer_bitmap)
+ _vec_len (r->unacked_by_peer_bitmap) = 0;
+
+ if (clib_fifo_elts (s->retired_fifo) >= 2 * s->config.window_size)
+ {
+ clib_fifo_sub1 (s->retired_fifo, record);
+ vlib_buffer_free_one (mcm->vlib_main, record.buffer_index);
+ }
+
+ clib_fifo_add2 (s->retired_fifo, retp);
+
+ retp->buffer_index = r->buffer_index;
+ retp->local_sequence = r->local_sequence;
+
+ r->buffer_index = ~0; /* poison buffer index in this retry */
+}
+
+static void
+mc_resend_retired (mc_main_t * mcm, mc_stream_t * s, u32 local_sequence)
+{
+ mc_retry_t *retry;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "resend-retired: search for local seq %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (retry, s->retired_fifo,
+ ({
+ if (retry->local_sequence == local_sequence)
+ {
+ elog_tx_msg (mcm, s->index, retry-> local_sequence, -13);
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ retry->buffer_index);
+ return;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "resend-retired: FAILED search for local seq %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+}
+
+static uword *
+delete_retry_fifo_elt (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_retry_t * r, uword * dead_peer_bitmap)
+{
+ mc_stream_peer_t *p;
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, stream->peers, ({
+ uword pi = p - stream->peers;
+ uword is_alive = 0 == clib_bitmap_get (r->unacked_by_peer_bitmap, pi);
+
+ if (! is_alive)
+ dead_peer_bitmap = clib_bitmap_ori (dead_peer_bitmap, pi);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "delete_retry_fifo_elt: peer %s is %s",
+ .format_args = "T4t4",
+ .n_enum_strings = 2,
+ .enum_strings = { "alive", "dead", },
+ };
+ struct { u32 peer, is_alive; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->is_alive = is_alive;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ hash_unset (stream->retry_index_by_local_sequence, r->local_sequence);
+ mc_retry_free (mcm, stream, r);
+
+ return dead_peer_bitmap;
+}
+
+always_inline mc_retry_t *
+prev_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->prev_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->prev_index) : 0);
+}
+
+always_inline mc_retry_t *
+next_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->next_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->next_index) : 0);
+}
+
+always_inline void
+remove_retry_from_pool (mc_stream_t * s, mc_retry_t * r)
+{
+ mc_retry_t *p = prev_retry (s, r);
+ mc_retry_t *n = next_retry (s, r);
+
+ if (p)
+ p->next_index = r->next_index;
+ else
+ s->retry_head_index = r->next_index;
+ if (n)
+ n->prev_index = r->prev_index;
+ else
+ s->retry_tail_index = r->prev_index;
+
+ pool_put_index (s->retry_pool, r - s->retry_pool);
+}
+
+static void
+check_retry (mc_main_t * mcm, mc_stream_t * s)
+{
+ mc_retry_t *r;
+ vlib_main_t *vm = mcm->vlib_main;
+ f64 now = vlib_time_now (vm);
+ uword *dead_peer_bitmap = 0;
+ u32 ri, ri_next;
+
+ for (ri = s->retry_head_index; ri != ~0; ri = ri_next)
+ {
+ r = pool_elt_at_index (s->retry_pool, ri);
+ ri_next = r->next_index;
+
+ if (now < r->sent_at + s->config.retry_interval)
+ continue;
+
+ r->n_retries += 1;
+ if (r->n_retries > s->config.retry_limit)
+ {
+ dead_peer_bitmap =
+ delete_retry_fifo_elt (mcm, s, r, dead_peer_bitmap);
+ remove_retry_from_pool (s, r);
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ mc_stream_peer_t *p;
+
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "resend local seq %d attempt %d",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ if (clib_bitmap_get (r->unacked_by_peer_bitmap, p - s->peers))
+ {
+ ELOG_TYPE_DECLARE (ev) = {
+ .format = "resend: needed by peer %s local seq %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 peer, rx_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, ev);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->rx_sequence = r->local_sequence;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 sequence;
+ u32 trail;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->sequence = r->local_sequence;
+ ed->trail = r->n_retries;
+ }
+
+ r->sent_at = vlib_time_now (vm);
+ s->stats.n_retries += 1;
+
+ elog_tx_msg (mcm, s->index, r->local_sequence, r->n_retries);
+
+ mcm->transport.tx_buffer
+ (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY, r->buffer_index);
+ }
+ }
+
+ maybe_send_window_open_event (mcm->vlib_main, s);
+
+ /* Delete any dead peers we've found. */
+ if (!clib_bitmap_is_zero (dead_peer_bitmap))
+ {
+ uword i;
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, dead_peer_bitmap, ({
+ delete_peer_with_index (mcm, s, i, /* notify_application */ 1);
+
+ /* Delete any references to just deleted peer in retry pool. */
+ pool_foreach (r, s->retry_pool, ({
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, i);
+ }));
+ }));
+/* *INDENT-ON* */
+ clib_bitmap_free (dead_peer_bitmap);
+ }
+}
+
+always_inline mc_main_t *
+mc_node_get_main (vlib_node_runtime_t * node)
+{
+ mc_main_t **p = (void *) node->runtime_data;
+ return p[0];
+}
+
+static uword
+mc_retry_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ mc_stream_t *s;
+
+ while (1)
+ {
+ vlib_process_suspend (vm, 1.0);
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_invalid)
+ check_retry (mcm, s);
+ }
+ }
+ return 0; /* not likely */
+}
+
+static void
+send_join_or_leave_request (mc_main_t * mcm, u32 stream_index, u32 is_join)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_join_or_leave_request_t *mp;
+ u32 bi;
+
+ mp = mc_get_vlib_buffer (vm, sizeof (mp[0]), &bi);
+ memset (mp, 0, sizeof (*mp));
+ mp->type = MC_MSG_TYPE_join_or_leave_request;
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->stream_index = stream_index;
+ mp->is_join = is_join;
+
+ mc_byte_swap_msg_join_or_leave_request (mp);
+
+ /*
+ * These msgs are unnumbered, unordered so send on the from-relay
+ * channel.
+ */
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+}
+
+static uword
+mc_join_ager_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ if (mcm->joins_in_progress)
+ {
+ mc_stream_t *s;
+ vlib_one_time_waiting_process_t *p;
+ f64 now = vlib_time_now (vm);
+
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_join_in_progress)
+ continue;
+
+ if (now > s->join_timeout)
+ {
+ s->state = MC_STREAM_STATE_ready;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream %d join timeout",
+ };
+ /* *INDENT-ON* */
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ /* Make sure that this app instance exists as a stream peer,
+ or we may answer a catchup request with a NULL
+ all_peer_bitmap... */
+ (void) get_or_create_peer_with_id
+ (mcm, s, mcm->transport.our_ack_peer_id, /* created */ 0);
+
+ vec_foreach (p, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (vm, p);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+
+ mcm->joins_in_progress--;
+ ASSERT (mcm->joins_in_progress >= 0);
+ }
+ else
+ {
+ /* Resent join request which may have been lost. */
+ send_join_or_leave_request (mcm, s->index, 1 /* is_join */ );
+
+ /* We're *not* alone, retry for as long as it takes */
+ if (mcm->relay_state == MC_RELAY_STATE_SLAVE)
+ s->join_timeout = vlib_time_now (vm) + 2.0;
+
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream %d resend join request",
+ };
+ /* *INDENT-ON* */
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ }
+ }
+ }
+
+ vlib_process_suspend (vm, .5);
+ }
+
+ return 0; /* not likely */
+}
+
+static void
+serialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ char *name = va_arg (*va, char *);
+ serialize_cstring (m, name);
+}
+
+static void
+elog_stream_name (char *buf, int n_buf_bytes, char *v)
+{
+ clib_memcpy (buf, v, clib_min (n_buf_bytes - 1, vec_len (v)));
+ buf[n_buf_bytes - 1] = 0;
+}
+
+static void
+unserialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ char *name;
+ mc_stream_t *s;
+ uword *p;
+
+ unserialize_cstring (m, &name);
+
+ if ((p = hash_get_mem (mcm->stream_index_by_name, name)))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d already named %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = p[0];
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ vec_free (name);
+ return;
+ }
+
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->state = MC_STREAM_STATE_name_known;
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d named %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ hash_set_mem (mcm->stream_index_by_name, name, s->index);
+
+ p = hash_get (mcm->procs_waiting_for_stream_name_by_name, name);
+ if (p)
+ {
+ vlib_one_time_waiting_process_t *wp, **w;
+ w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]);
+ vec_foreach (wp, w[0])
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ pool_put (mcm->procs_waiting_for_stream_name_pool, w);
+ hash_unset_mem (mcm->procs_waiting_for_stream_name_by_name, name);
+ }
+}
+
+/* *INDENT-OFF* */
+MC_SERIALIZE_MSG (mc_register_stream_name_msg, static) =
+{
+ .name = "mc_register_stream_name",
+ .serialize = serialize_mc_register_stream_name,
+ .unserialize = unserialize_mc_register_stream_name,
+};
+/* *INDENT-ON* */
+
+void
+mc_rx_buffer_unserialize (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_peer_id_t peer_id, u32 buffer_index)
+{
+ return mc_unserialize (mcm, stream, buffer_index);
+}
+
+static u8 *
+mc_internal_catchup_snapshot (mc_main_t * mcm,
+ u8 * data_vector,
+ u32 last_global_sequence_processed)
+{
+ serialize_main_t m;
+
+ /* Append serialized data to data vector. */
+ serialize_open_vector (&m, data_vector);
+ m.stream.current_buffer_index = vec_len (data_vector);
+
+ serialize (&m, serialize_mc_main, mcm);
+ return serialize_close_vector (&m);
+}
+
+static void
+mc_internal_catchup (mc_main_t * mcm, u8 * data, u32 n_data_bytes)
+{
+ serialize_main_t s;
+
+ unserialize_open_data (&s, data, n_data_bytes);
+
+ unserialize (&s, unserialize_mc_main, mcm);
+}
+
+/* Overridden from the application layer, not actually used here */
+void mc_stream_join_process_hold (void) __attribute__ ((weak));
+void
+mc_stream_join_process_hold (void)
+{
+}
+
+static u32
+mc_stream_join_helper (mc_main_t * mcm,
+ mc_stream_config_t * config, u32 is_internal)
+{
+ mc_stream_t *s;
+ vlib_main_t *vm = mcm->vlib_main;
+
+ s = 0;
+ if (!is_internal)
+ {
+ uword *p;
+
+ /* Already have a stream with given name? */
+ if ((s = mc_stream_by_name (mcm, config->name)))
+ {
+ /* Already joined and ready? */
+ if (s->state == MC_STREAM_STATE_ready)
+ return s->index;
+ }
+
+ /* First join MC internal stream. */
+ if (!mcm->stream_vector
+ || (mcm->stream_vector[MC_STREAM_INDEX_INTERNAL].state
+ == MC_STREAM_STATE_invalid))
+ {
+ static mc_stream_config_t c = {
+ .name = "mc-internal",
+ .rx_buffer = mc_rx_buffer_unserialize,
+ .catchup = mc_internal_catchup,
+ .catchup_snapshot = mc_internal_catchup_snapshot,
+ };
+
+ c.save_snapshot = config->save_snapshot;
+
+ mc_stream_join_helper (mcm, &c, /* is_internal */ 1);
+ }
+
+ /* If stream is still unknown register this name and wait for
+ sequenced message to name stream. This way all peers agree
+ on stream name to index mappings. */
+ s = mc_stream_by_name (mcm, config->name);
+ if (!s)
+ {
+ vlib_one_time_waiting_process_t *wp, **w;
+ u8 *name_copy = format (0, "%s", config->name);
+
+ mc_serialize_stream (mcm,
+ MC_STREAM_INDEX_INTERNAL,
+ &mc_register_stream_name_msg, config->name);
+
+ /* Wait for this stream to be named. */
+ p =
+ hash_get_mem (mcm->procs_waiting_for_stream_name_by_name,
+ name_copy);
+ if (p)
+ w =
+ pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool,
+ p[0]);
+ else
+ {
+ pool_get (mcm->procs_waiting_for_stream_name_pool, w);
+ if (!mcm->procs_waiting_for_stream_name_by_name)
+ mcm->procs_waiting_for_stream_name_by_name = hash_create_string ( /* elts */ 0, /* value size */
+ sizeof
+ (uword));
+ hash_set_mem (mcm->procs_waiting_for_stream_name_by_name,
+ name_copy,
+ w - mcm->procs_waiting_for_stream_name_pool);
+ w[0] = 0;
+ }
+
+ vec_add2 (w[0], wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+ vec_free (name_copy);
+ }
+
+ /* Name should be known now. */
+ s = mc_stream_by_name (mcm, config->name);
+ ASSERT (s != 0);
+ ASSERT (s->state == MC_STREAM_STATE_name_known);
+ }
+
+ if (!s)
+ {
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ }
+
+ {
+ /* Save name since we could have already used it as hash key. */
+ char *name_save = s->config.name;
+
+ s->config = config[0];
+
+ if (name_save)
+ s->config.name = name_save;
+ }
+
+ if (s->config.window_size == 0)
+ s->config.window_size = 8;
+
+ if (s->config.retry_interval == 0.0)
+ s->config.retry_interval = 1.0;
+
+ /* Sanity. */
+ ASSERT (s->config.retry_interval < 30);
+
+ if (s->config.retry_limit == 0)
+ s->config.retry_limit = 7;
+
+ s->state = MC_STREAM_STATE_join_in_progress;
+ if (!s->peer_index_by_id.hash)
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+
+ /* If we don't hear from someone in 5 seconds, we're alone */
+ s->join_timeout = vlib_time_now (vm) + 5.0;
+ mcm->joins_in_progress++;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d join request %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), s->config.name);
+ }
+
+ send_join_or_leave_request (mcm, s->index, 1 /* join */ );
+
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "join complete stream %d");
+ ELOG (mcm->elog_main, e, s->index);
+ }
+
+ return s->index;
+}
+
+u32
+mc_stream_join (mc_main_t * mcm, mc_stream_config_t * config)
+{
+ return mc_stream_join_helper (mcm, config, /* is_internal */ 0);
+}
+
+void
+mc_stream_leave (mc_main_t * mcm, u32 stream_index)
+{
+ mc_stream_t *s = mc_stream_by_index (mcm, stream_index);
+
+ if (!s)
+ return;
+
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "leave-stream: %d",.format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 index;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->index = stream_index;
+ }
+
+ send_join_or_leave_request (mcm, stream_index, 0 /* is_join */ );
+ mc_stream_free (s);
+ s->state = MC_STREAM_STATE_name_known;
+}
+
+void
+mc_msg_join_or_leave_request_handler (mc_main_t * mcm,
+ mc_msg_join_or_leave_request_t * req,
+ u32 buffer_index)
+{
+ mc_stream_t *s;
+ mc_msg_join_reply_t *rep;
+ u32 bi;
+
+ mc_byte_swap_msg_join_or_leave_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ /* If the peer is joining, create it */
+ if (req->is_join)
+ {
+ mc_stream_t *this_s;
+
+ /* We're not in a position to catch up a peer until all
+ stream joins are complete. */
+ if (0)
+ {
+ /* XXX This is hard to test so we've. */
+ vec_foreach (this_s, mcm->stream_vector)
+ {
+ if (this_s->state != MC_STREAM_STATE_ready
+ && this_s->state != MC_STREAM_STATE_name_known)
+ return;
+ }
+ }
+ else if (mcm->joins_in_progress > 0)
+ return;
+
+ (void) get_or_create_peer_with_id (mcm, s, req->peer_id,
+ /* created */ 0);
+
+ rep = mc_get_vlib_buffer (mcm->vlib_main, sizeof (rep[0]), &bi);
+ memset (rep, 0, sizeof (rep[0]));
+ rep->type = MC_MSG_TYPE_join_reply;
+ rep->stream_index = req->stream_index;
+
+ mc_byte_swap_msg_join_reply (rep);
+ /* These two are already in network byte order... */
+ rep->peer_id = mcm->transport.our_ack_peer_id;
+ rep->catchup_peer_id = mcm->transport.our_catchup_peer_id;
+
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+ }
+ else
+ {
+ if (s->config.peer_died)
+ s->config.peer_died (mcm, s, req->peer_id);
+ }
+}
+
+void
+mc_msg_join_reply_handler (mc_main_t * mcm,
+ mc_msg_join_reply_t * mp, u32 buffer_index)
+{
+ mc_stream_t *s;
+
+ mc_byte_swap_msg_join_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (!s || s->state != MC_STREAM_STATE_join_in_progress)
+ return;
+
+ /* Switch to catchup state; next join reply
+ for this stream will be ignored. */
+ s->state = MC_STREAM_STATE_catchup;
+
+ mcm->joins_in_progress--;
+ mcm->transport.catchup_request_fun (mcm->transport.opaque,
+ mp->stream_index, mp->catchup_peer_id);
+}
+
+void
+mc_wait_for_stream_ready (mc_main_t * m, char *stream_name)
+{
+ mc_stream_t *s;
+
+ while (1)
+ {
+ s = mc_stream_by_name (m, stream_name);
+ if (s)
+ break;
+ vlib_process_suspend (m->vlib_main, .1);
+ }
+
+ /* It's OK to send a message in catchup and ready states. */
+ if (s->state == MC_STREAM_STATE_catchup
+ || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ /* Otherwise we are waiting for a join to finish. */
+ vlib_current_process_wait_for_one_time_event_vector
+ (m->vlib_main, &s->procs_waiting_for_join_done);
+}
+
+u32
+mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index)
+{
+ mc_stream_t *s = mc_stream_by_index (mcm, stream_index);
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_retry_t *r;
+ mc_msg_user_request_t *mp;
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ u32 ri;
+
+ if (!s)
+ return 0;
+
+ if (s->state != MC_STREAM_STATE_ready)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ while (pool_elts (s->retry_pool) >= s->config.window_size)
+ {
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_open_window);
+ }
+
+ pool_get (s->retry_pool, r);
+ ri = r - s->retry_pool;
+
+ r->prev_index = s->retry_tail_index;
+ r->next_index = ~0;
+ s->retry_tail_index = ri;
+
+ if (r->prev_index == ~0)
+ s->retry_head_index = ri;
+ else
+ {
+ mc_retry_t *p = pool_elt_at_index (s->retry_pool, r->prev_index);
+ p->next_index = ri;
+ }
+
+ vlib_buffer_advance (b, -sizeof (mp[0]));
+ mp = vlib_buffer_get_current (b);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ /* mp->transport.global_sequence set by relay agent. */
+ mp->global_sequence = 0xdeadbeef;
+ mp->stream_index = s->index;
+ mp->local_sequence = s->our_local_sequence++;
+ mp->n_data_bytes =
+ vlib_buffer_index_length_in_chain (vm, buffer_index) - sizeof (mp[0]);
+
+ r->buffer_index = buffer_index;
+ r->local_sequence = mp->local_sequence;
+ r->sent_at = vlib_time_now (vm);
+ r->n_retries = 0;
+
+ /* Retry will be freed when all currently known peers have acked. */
+ vec_validate (r->unacked_by_peer_bitmap, vec_len (s->all_peer_bitmap) - 1);
+ vec_copy (r->unacked_by_peer_bitmap, s->all_peer_bitmap);
+
+ hash_set (s->retry_index_by_local_sequence, r->local_sequence,
+ r - s->retry_pool);
+
+ elog_tx_msg (mcm, s->index, mp->local_sequence, r->n_retries);
+
+ mc_byte_swap_msg_user_request (mp);
+
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY, buffer_index);
+
+ s->user_requests_sent++;
+
+ /* return amount of window remaining */
+ return s->config.window_size - pool_elts (s->retry_pool);
+}
+
+void
+mc_msg_user_request_handler (mc_main_t * mcm, mc_msg_user_request_t * mp,
+ u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_t *s;
+ mc_stream_peer_t *peer;
+ i32 seq_cmp_result;
+ static int once = 0;
+
+ mc_byte_swap_msg_user_request (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Not signed up for this stream? Turf-o-matic */
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ return;
+ }
+
+ /* Find peer, including ourselves. */
+ peer = get_or_create_peer_with_id (mcm, s, mp->peer_id,
+ /* created */ 0);
+
+ seq_cmp_result = mc_seq_cmp (mp->local_sequence,
+ peer->last_sequence_received + 1);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "rx-msg: peer %s stream %d rx seq %d seq_cmp %d",
+ .format_args = "T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, stream_index, rx_sequence;
+ i32 seq_cmp_result;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ ed->stream_index = mp->stream_index;
+ ed->rx_sequence = mp->local_sequence;
+ ed->seq_cmp_result = seq_cmp_result;
+ }
+
+ if (0 && mp->stream_index == 1 && once == 0)
+ {
+ once = 1;
+ ELOG_TYPE (e, "FAKE lost msg on stream 1");
+ ELOG (mcm->elog_main, e, 0);
+ return;
+ }
+
+ peer->last_sequence_received += seq_cmp_result == 0;
+ s->user_requests_received++;
+
+ if (seq_cmp_result > 0)
+ peer->stats.n_msgs_from_future += 1;
+
+ /* Send ack even if msg from future */
+ if (1)
+ {
+ mc_msg_user_ack_t *rp;
+ u32 bi;
+
+ rp = mc_get_vlib_buffer (vm, sizeof (rp[0]), &bi);
+ rp->peer_id = mcm->transport.our_ack_peer_id;
+ rp->stream_index = s->index;
+ rp->local_sequence = mp->local_sequence;
+ rp->seq_cmp_result = seq_cmp_result;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-ack: stream %d local seq %d",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = rp->stream_index;
+ ed->local_sequence = rp->local_sequence;
+ }
+
+ mc_byte_swap_msg_user_ack (rp);
+
+ mcm->transport.tx_ack (mcm->transport.opaque, mp->peer_id, bi);
+ /* Msg from past? If so, free the buffer... */
+ if (seq_cmp_result < 0)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ peer->stats.n_msgs_from_past += 1;
+ }
+ }
+
+ if (seq_cmp_result == 0)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ switch (s->state)
+ {
+ case MC_STREAM_STATE_ready:
+ vlib_buffer_advance (b, sizeof (mp[0]));
+ s->config.rx_buffer (mcm, s, mp->peer_id, buffer_index);
+
+ /* Stream vector can change address via rx callback for mc-internal
+ stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+ ASSERT (s != 0);
+ s->last_global_sequence_processed = mp->global_sequence;
+ break;
+
+ case MC_STREAM_STATE_catchup:
+ clib_fifo_add1 (s->catchup_fifo, buffer_index);
+ break;
+
+ default:
+ clib_warning ("stream in unknown state %U",
+ format_mc_stream_state, s->state);
+ break;
+ }
+ }
+}
+
+void
+mc_msg_user_ack_handler (mc_main_t * mcm, mc_msg_user_ack_t * mp,
+ u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ uword *p;
+ mc_stream_t *s;
+ mc_stream_peer_t *peer;
+ mc_retry_t *r;
+ int peer_created = 0;
+
+ mc_byte_swap_msg_user_ack (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "rx-ack: local seq %d peer %s seq_cmp_result %d",
+ .format_args = "i4T4i4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 local_sequence;
+ u32 peer;
+ i32 seq_cmp_result;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->seq_cmp_result = mp->seq_cmp_result;
+ }
+
+ /* Unknown stream? */
+ if (!s)
+ return;
+
+ /* Find the peer which just ack'ed. */
+ peer = get_or_create_peer_with_id (mcm, s, mp->peer_id,
+ /* created */ &peer_created);
+
+ /*
+ * Peer reports message from the future. If it's not in the retry
+ * fifo, look for a retired message.
+ */
+ if (mp->seq_cmp_result > 0)
+ {
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence -
+ mp->seq_cmp_result);
+ if (p == 0)
+ mc_resend_retired (mcm, s, mp->local_sequence - mp->seq_cmp_result);
+
+ /* Normal retry should fix it... */
+ return;
+ }
+
+ /*
+ * Pointer to the indicated retry fifo entry.
+ * Worth hashing because we could use a window size of 100 or 1000.
+ */
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence);
+
+ /*
+ * Is this a duplicate ACK, received after we've retired the
+ * fifo entry. This can happen when learning about new
+ * peers.
+ */
+ if (p == 0)
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s no fifo elt",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ }
+
+ return;
+ }
+
+ r = pool_elt_at_index (s->retry_pool, p[0]);
+
+ /* Make sure that this new peer ACKs our msgs from now on */
+ if (peer_created)
+ {
+ mc_retry_t *later_retry = next_retry (s, r);
+
+ while (later_retry)
+ {
+ later_retry->unacked_by_peer_bitmap =
+ clib_bitmap_ori (later_retry->unacked_by_peer_bitmap,
+ peer - s->peers);
+ later_retry = next_retry (s, later_retry);
+ }
+ }
+
+ ASSERT (mp->local_sequence == r->local_sequence);
+
+ /* If we weren't expecting to hear from this peer */
+ if (!peer_created &&
+ !clib_bitmap_get (r->unacked_by_peer_bitmap, peer - s->peers))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "dup-ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+ if (!clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ return;
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, peer - s->peers);
+
+ /* Not all clients have ack'ed */
+ if (!clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ {
+ return;
+ }
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: retire fifo elt loc seq %d after %d acks",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 npeers;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->npeers = pool_elts (s->peers);
+ }
+
+ hash_unset (s->retry_index_by_local_sequence, mp->local_sequence);
+ mc_retry_free (mcm, s, r);
+ remove_retry_from_pool (s, r);
+ maybe_send_window_open_event (vm, s);
+}
+
+#define EVENT_MC_SEND_CATCHUP_DATA 0
+
+static uword
+mc_catchup_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ uword *event_data = 0;
+ mc_catchup_process_arg_t *args;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len (event_data) = 0;
+ vlib_process_wait_for_event_with_type (vm, &event_data,
+ EVENT_MC_SEND_CATCHUP_DATA);
+
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ args = pool_elt_at_index (mcm->catchup_process_args, event_data[i]);
+
+ mcm->transport.catchup_send_fun (mcm->transport.opaque,
+ args->catchup_opaque,
+ args->catchup_snapshot);
+
+ /* Send function will free snapshot data vector. */
+ pool_put (mcm->catchup_process_args, args);
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+static void
+serialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t *s = va_arg (*va, mc_stream_t *);
+ mc_stream_peer_t *p;
+
+ serialize_integer (m, pool_elts (s->peers), sizeof (u32));
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ u8 * x = serialize_get (m, sizeof (p->id));
+ clib_memcpy (x, p->id.as_u8, sizeof (p->id));
+ serialize_integer (m, p->last_sequence_received,
+ sizeof (p->last_sequence_received));
+ }));
+/* *INDENT-ON* */
+ serialize_bitmap (m, s->all_peer_bitmap);
+}
+
+void
+unserialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t *s = va_arg (*va, mc_stream_t *);
+ u32 i, n_peers;
+ mc_stream_peer_t *p;
+
+ unserialize_integer (m, &n_peers, sizeof (u32));
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+ for (i = 0; i < n_peers; i++)
+ {
+ u8 *x;
+ pool_get (s->peers, p);
+ x = unserialize_get (m, sizeof (p->id));
+ clib_memcpy (p->id.as_u8, x, sizeof (p->id));
+ unserialize_integer (m, &p->last_sequence_received,
+ sizeof (p->last_sequence_received));
+ mhash_set (&s->peer_index_by_id, &p->id, p - s->peers, /* old_value */
+ 0);
+ }
+ s->all_peer_bitmap = unserialize_bitmap (m);
+
+ /* This is really bad. */
+ if (!s->all_peer_bitmap)
+ clib_warning ("BUG: stream %s all_peer_bitmap NULL", s->config.name);
+}
+
+void
+mc_msg_catchup_request_handler (mc_main_t * mcm,
+ mc_msg_catchup_request_t * req,
+ u32 catchup_opaque)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_t *s;
+ mc_catchup_process_arg_t *args;
+
+ mc_byte_swap_msg_catchup_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup-request: from %s stream %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, stream;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->peer = elog_id_for_peer_id (mcm, req->peer_id.as_u64);
+ ed->stream = req->stream_index;
+ }
+
+ /*
+ * The application has to snapshoot its data structures right
+ * here, right now. If we process any messages after
+ * noting the last global sequence we've processed, the client
+ * won't be able to accurately reconstruct our data structures.
+ *
+ * Once the data structures are e.g. vec_dup()'ed, we
+ * send the resulting messages from a separate process, to
+ * make sure that we don't cause a bunch of message retransmissions
+ */
+ pool_get (mcm->catchup_process_args, args);
+
+ args->stream_index = s - mcm->stream_vector;
+ args->catchup_opaque = catchup_opaque;
+ args->catchup_snapshot = 0;
+
+ /* Construct catchup reply and snapshot state for stream to send as
+ catchup reply payload. */
+ {
+ mc_msg_catchup_reply_t *rep;
+ serialize_main_t m;
+
+ vec_resize (args->catchup_snapshot, sizeof (rep[0]));
+
+ rep = (void *) args->catchup_snapshot;
+
+ rep->peer_id = req->peer_id;
+ rep->stream_index = req->stream_index;
+ rep->last_global_sequence_included = s->last_global_sequence_processed;
+
+ /* Setup for serialize to append to catchup snapshot. */
+ serialize_open_vector (&m, args->catchup_snapshot);
+ m.stream.current_buffer_index = vec_len (m.stream.buffer);
+
+ serialize (&m, serialize_mc_stream, s);
+
+ args->catchup_snapshot = serialize_close_vector (&m);
+
+ /* Actually copy internal state */
+ args->catchup_snapshot = s->config.catchup_snapshot
+ (mcm, args->catchup_snapshot, rep->last_global_sequence_included);
+
+ rep = (void *) args->catchup_snapshot;
+ rep->n_data_bytes = vec_len (args->catchup_snapshot) - sizeof (rep[0]);
+
+ mc_byte_swap_msg_catchup_reply (rep);
+ }
+
+ /* now go send it... */
+ vlib_process_signal_event (vm, mcm->catchup_process,
+ EVENT_MC_SEND_CATCHUP_DATA,
+ args - mcm->catchup_process_args);
+}
+
+#define EVENT_MC_UNSERIALIZE_BUFFER 0
+#define EVENT_MC_UNSERIALIZE_CATCHUP 1
+
+void
+mc_msg_catchup_reply_handler (mc_main_t * mcm, mc_msg_catchup_reply_t * mp,
+ u32 catchup_opaque)
+{
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_CATCHUP,
+ pointer_to_uword (mp));
+}
+
+static void
+perform_catchup (mc_main_t * mcm, mc_msg_catchup_reply_t * mp)
+{
+ mc_stream_t *s;
+ i32 seq_cmp_result;
+
+ mc_byte_swap_msg_catchup_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Never heard of this stream or already caught up. */
+ if (!s || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ {
+ serialize_main_t m;
+ mc_stream_peer_t *p;
+ u32 n_stream_bytes;
+
+ /* For offline sim replay: save the entire catchup snapshot... */
+ if (s->config.save_snapshot)
+ s->config.save_snapshot (mcm, /* is_catchup */ 1, mp->data,
+ mp->n_data_bytes);
+
+ unserialize_open_data (&m, mp->data, mp->n_data_bytes);
+ unserialize (&m, unserialize_mc_stream, s);
+
+ /* Make sure we start numbering our messages as expected */
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ if (p->id.as_u64 == mcm->transport.our_ack_peer_id.as_u64)
+ s->our_local_sequence = p->last_sequence_received + 1;
+ }));
+/* *INDENT-ON* */
+
+ n_stream_bytes = m.stream.current_buffer_index;
+
+ /* No need to unserialize close; nothing to free. */
+
+ /* After serialized stream is user's catchup data. */
+ s->config.catchup (mcm, mp->data + n_stream_bytes,
+ mp->n_data_bytes - n_stream_bytes);
+ }
+
+ /* Vector could have been moved by catchup.
+ This can only happen for mc-internal stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ s->last_global_sequence_processed = mp->last_global_sequence_included;
+
+ while (clib_fifo_elts (s->catchup_fifo))
+ {
+ mc_msg_user_request_t *gp;
+ u32 bi;
+ vlib_buffer_t *b;
+
+ clib_fifo_sub1 (s->catchup_fifo, bi);
+
+ b = vlib_get_buffer (mcm->vlib_main, bi);
+ gp = vlib_buffer_get_current (b);
+
+ /* Make sure we're replaying "new" news */
+ seq_cmp_result = mc_seq_cmp (gp->global_sequence,
+ mp->last_global_sequence_included);
+
+ if (seq_cmp_result > 0)
+ {
+ vlib_buffer_advance (b, sizeof (gp[0]));
+ s->config.rx_buffer (mcm, s, gp->peer_id, bi);
+ s->last_global_sequence_processed = gp->global_sequence;
+
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup replay local sequence 0x%x",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup discard local sequence 0x%x",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+
+ vlib_buffer_free_one (mcm->vlib_main, bi);
+ }
+ }
+
+ s->state = MC_STREAM_STATE_ready;
+
+ /* Now that we are caught up wake up joining process. */
+ {
+ vlib_one_time_waiting_process_t *wp;
+ vec_foreach (wp, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+ }
+}
+
+static void
+this_node_maybe_master (mc_main_t * mcm)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_master_assert_t *mp;
+ uword event_type;
+ int timeouts = 0;
+ int is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+ clib_error_t *error;
+ f64 now, time_last_master_assert = -1;
+ u32 bi;
+
+ while (1)
+ {
+ if (!mcm->we_can_be_relay_master)
+ {
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave (config)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+
+ now = vlib_time_now (vm);
+ if (now >= time_last_master_assert + 1)
+ {
+ time_last_master_assert = now;
+ mp = mc_get_vlib_buffer (mcm->vlib_main, sizeof (mp[0]), &bi);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->global_sequence = mcm->relay_global_sequence;
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-massert: peer %s global seq %u",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, global_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ }
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ error =
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_MASTERSHIP, bi);
+ if (error)
+ clib_error_report (error);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (!is_master && timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_MASTER;
+ mcm->relay_master_peer_id =
+ mcm->transport.our_ack_peer_id.as_u64;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become master (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING && mcm->relay_state != MC_RELAY_STATE_SLAVE)
+ {
+ ELOG_TYPE (e, "become slave (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ }
+}
+
+static void
+this_node_slave (mc_main_t * mcm)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ uword event_type;
+ int timeouts = 0;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave");
+ ELOG (mcm->elog_main, e, 0);
+ }
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+ mcm->relay_master_peer_id = ~0ULL;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "timeouts; negoitate mastership");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ timeouts = 0;
+ break;
+ }
+ }
+}
+
+static uword
+mc_mastership_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ switch (mcm->relay_state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ case MC_RELAY_STATE_MASTER:
+ this_node_maybe_master (mcm);
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ this_node_slave (mcm);
+ break;
+ }
+ }
+ return 0; /* not likely */
+}
+
+void
+mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master)
+{
+ if (we_can_be_master != mcm->we_can_be_relay_master)
+ {
+ mcm->we_can_be_relay_master = we_can_be_master;
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_NEGOTIATE, 0);
+ }
+}
+
+void
+mc_msg_master_assert_handler (mc_main_t * mcm, mc_msg_master_assert_t * mp,
+ u32 buffer_index)
+{
+ mc_peer_id_t his_peer_id, our_peer_id;
+ i32 seq_cmp_result;
+ u8 signal_slave = 0;
+ u8 update_global_sequence = 0;
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ his_peer_id = mp->peer_id;
+ our_peer_id = mcm->transport.our_ack_peer_id;
+
+ /* compare the incoming global sequence with ours */
+ seq_cmp_result = mc_seq_cmp (mp->global_sequence,
+ mcm->relay_global_sequence);
+
+ /* If the sender has a lower peer id and the sender's sequence >=
+ our global sequence, we become a slave. Otherwise we are master. */
+ if (mc_peer_id_compare (his_peer_id, our_peer_id) < 0
+ && seq_cmp_result >= 0)
+ {
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_SLAVE, 0);
+ signal_slave = 1;
+ }
+
+ /* Update our global sequence. */
+ if (seq_cmp_result > 0)
+ {
+ mcm->relay_global_sequence = mp->global_sequence;
+ update_global_sequence = 1;
+ }
+
+ {
+ uword *q = mhash_get (&mcm->mastership_peer_index_by_id, &his_peer_id);
+ mc_mastership_peer_t *p;
+
+ if (q)
+ p = vec_elt_at_index (mcm->mastership_peers, q[0]);
+ else
+ {
+ vec_add2 (mcm->mastership_peers, p, 1);
+ p->peer_id = his_peer_id;
+ mhash_set (&mcm->mastership_peer_index_by_id, &p->peer_id,
+ p - mcm->mastership_peers,
+ /* old_value */ 0);
+ }
+ p->time_last_master_assert_received = vlib_time_now (mcm->vlib_main);
+ }
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them.
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "rx-massert: peer %s global seq %u upd %d slave %d",
+ .format_args = "T4i4i1i1",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 peer;
+ u32 global_sequence;
+ u8 update_sequence;
+ u8 slave;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, his_peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ ed->update_sequence = update_global_sequence;
+ ed->slave = signal_slave;
+ }
+}
+
+static void
+mc_serialize_init (mc_main_t * mcm)
+{
+ mc_serialize_msg_t *m;
+ vlib_main_t *vm = vlib_get_main ();
+
+ mcm->global_msg_index_by_name
+ = hash_create_string ( /* elts */ 0, sizeof (uword));
+
+ m = vm->mc_msg_registrations;
+
+ while (m)
+ {
+ m->global_index = vec_len (mcm->global_msgs);
+ hash_set_mem (mcm->global_msg_index_by_name, m->name, m->global_index);
+ vec_add1 (mcm->global_msgs, m);
+ m = m->next_registration;
+ }
+}
+
+clib_error_t *
+mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, va_list * va)
+{
+ mc_stream_t *s;
+ clib_error_t *error;
+ serialize_main_t *m = &mc->serialize_mains[VLIB_TX];
+ vlib_serialize_buffer_main_t *sbm = &mc->serialize_buffer_mains[VLIB_TX];
+ u32 bi, n_before, n_after, n_total, n_this_msg;
+ u32 si, gi;
+
+ if (!sbm->vlib_main)
+ {
+ sbm->tx.max_n_data_bytes_per_chain = 4096;
+ sbm->tx.free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
+ }
+
+ if (sbm->first_buffer == 0)
+ serialize_open_vlib_buffer (m, mc->vlib_main, sbm);
+
+ n_before = serialize_vlib_buffer_n_bytes (m);
+
+ s = mc_stream_by_index (mc, stream_index);
+ gi = msg->global_index;
+ ASSERT (msg == vec_elt (mc->global_msgs, gi));
+
+ si = ~0;
+ if (gi < vec_len (s->stream_msg_index_by_global_index))
+ si = s->stream_msg_index_by_global_index[gi];
+
+ serialize_likely_small_unsigned_integer (m, si);
+
+ /* For first time message is sent, use name to identify message. */
+ if (si == ~0 || MSG_ID_DEBUG)
+ serialize_cstring (m, msg->name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "serialize-msg: %s index %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[2];
+ } *ed;
+ ed = ELOG_DATA (mc->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mc, msg->name);
+ ed->c[1] = si;
+ }
+
+ error = va_serialize (m, va);
+
+ n_after = serialize_vlib_buffer_n_bytes (m);
+ n_this_msg = n_after - n_before;
+ n_total = n_after + sizeof (mc_msg_user_request_t);
+
+ /* For max message size ignore first message where string name is sent. */
+ if (si != ~0)
+ msg->max_n_bytes_serialized =
+ clib_max (msg->max_n_bytes_serialized, n_this_msg);
+
+ if (!multiple_messages_per_vlib_buffer
+ || si == ~0
+ || n_total + msg->max_n_bytes_serialized >
+ mc->transport.max_packet_size)
+ {
+ bi = serialize_close_vlib_buffer (m);
+ sbm->first_buffer = 0;
+ if (!error)
+ mc_stream_send (mc, stream_index, bi);
+ else if (bi != ~0)
+ vlib_buffer_free_one (mc->vlib_main, bi);
+ }
+
+ return error;
+}
+
+clib_error_t *
+mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, ...)
+{
+ vlib_main_t *vm = mc->vlib_main;
+ va_list va;
+ clib_error_t *error;
+
+ if (stream_index == ~0)
+ {
+ if (vm->mc_main && vm->mc_stream_index == ~0)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &vm->procs_waiting_for_mc_stream_join);
+ stream_index = vm->mc_stream_index;
+ }
+
+ va_start (va, msg);
+ error = mc_serialize_va (mc, stream_index,
+ multiple_messages_per_vlib_buffer, msg, &va);
+ va_end (va);
+ return error;
+}
+
+uword
+mc_unserialize_message (mc_main_t * mcm,
+ mc_stream_t * s, serialize_main_t * m)
+{
+ mc_serialize_stream_msg_t *sm;
+ u32 gi, si;
+
+ si = unserialize_likely_small_unsigned_integer (m);
+
+ if (!(si == ~0 || MSG_ID_DEBUG))
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ gi = sm->global_index;
+ }
+ else
+ {
+ char *name;
+
+ unserialize_cstring (m, &name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "unserialize-msg: %s rx index %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[2];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ }
+
+ {
+ uword *p = hash_get_mem (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ }
+
+ /* Unknown message? */
+ if (gi == ~0)
+ {
+ vec_free (name);
+ goto done;
+ }
+
+ vec_validate_init_empty (s->stream_msg_index_by_global_index, gi, ~0);
+ si = s->stream_msg_index_by_global_index[gi];
+
+ /* Stream local index unknown? Create it. */
+ if (si == ~0)
+ {
+ vec_add2 (s->stream_msgs, sm, 1);
+
+ si = sm - s->stream_msgs;
+ sm->global_index = gi;
+ s->stream_msg_index_by_global_index[gi] = si;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "msg-bind: stream %d %s to index %d",
+ .format_args = "i4T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[3];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = s->index;
+ ed->c[1] = elog_id_for_msg_name (mcm, name);
+ ed->c[2] = si;
+ }
+ }
+ else
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ if (gi != sm->global_index && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "msg-id-ERROR: %s index %d expected %d",
+ .format_args = "T4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[3];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = ~0;
+ if (sm->global_index <
+ vec_len (s->stream_msg_index_by_global_index))
+ ed->c[2] =
+ s->stream_msg_index_by_global_index[sm->global_index];
+ }
+ }
+
+ vec_free (name);
+ }
+
+ if (gi != ~0)
+ {
+ mc_serialize_msg_t *msg;
+ msg = vec_elt (mcm->global_msgs, gi);
+ unserialize (m, msg->unserialize, mcm);
+ }
+
+done:
+ return gi != ~0;
+}
+
+void
+mc_unserialize_internal (mc_main_t * mcm, u32 stream_and_buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ serialize_main_t *m = &mcm->serialize_mains[VLIB_RX];
+ vlib_serialize_buffer_main_t *sbm = &mcm->serialize_buffer_mains[VLIB_RX];
+ mc_stream_and_buffer_t *sb;
+ mc_stream_t *stream;
+ u32 buffer_index;
+
+ sb =
+ pool_elt_at_index (mcm->mc_unserialize_stream_and_buffers,
+ stream_and_buffer_index);
+ buffer_index = sb->buffer_index;
+ stream = vec_elt_at_index (mcm->stream_vector, sb->stream_index);
+ pool_put (mcm->mc_unserialize_stream_and_buffers, sb);
+
+ if (stream->config.save_snapshot)
+ {
+ u32 n_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index);
+ static u8 *contents;
+ vec_reset_length (contents);
+ vec_validate (contents, n_bytes - 1);
+ vlib_buffer_contents (vm, buffer_index, contents);
+ stream->config.save_snapshot (mcm, /* is_catchup */ 0, contents,
+ n_bytes);
+ }
+
+ ASSERT (vlib_in_process_context (vm));
+
+ unserialize_open_vlib_buffer (m, vm, sbm);
+
+ clib_fifo_add1 (sbm->rx.buffer_fifo, buffer_index);
+
+ while (unserialize_vlib_buffer_n_bytes (m) > 0)
+ mc_unserialize_message (mcm, stream, m);
+
+ /* Frees buffer. */
+ unserialize_close_vlib_buffer (m);
+}
+
+void
+mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_and_buffer_t *sb;
+ pool_get (mcm->mc_unserialize_stream_and_buffers, sb);
+ sb->stream_index = s->index;
+ sb->buffer_index = buffer_index;
+ vlib_process_signal_event (vm, mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_BUFFER,
+ sb - mcm->mc_unserialize_stream_and_buffers);
+}
+
+static uword
+mc_unserialize_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ uword event_type, *event_data = 0;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len (event_data) = 0;
+
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case EVENT_MC_UNSERIALIZE_BUFFER:
+ for (i = 0; i < vec_len (event_data); i++)
+ mc_unserialize_internal (mcm, event_data[i]);
+ break;
+
+ case EVENT_MC_UNSERIALIZE_CATCHUP:
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u8 *mp = uword_to_pointer (event_data[i], u8 *);
+ perform_catchup (mcm, (void *) mp);
+ vec_free (mp);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+void
+serialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ mc_stream_t *s;
+ mc_serialize_stream_msg_t *sm;
+ mc_serialize_msg_t *msg;
+
+ serialize_integer (m, vec_len (mcm->stream_vector), sizeof (u32));
+ vec_foreach (s, mcm->stream_vector)
+ {
+ /* Stream name. */
+ serialize_cstring (m, s->config.name);
+
+ /* Serialize global names for all sent messages. */
+ serialize_integer (m, vec_len (s->stream_msgs), sizeof (u32));
+ vec_foreach (sm, s->stream_msgs)
+ {
+ msg = vec_elt (mcm->global_msgs, sm->global_index);
+ serialize_cstring (m, msg->name);
+ }
+ }
+}
+
+void
+unserialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ u32 i, n_streams, n_stream_msgs;
+ char *name;
+ mc_stream_t *s;
+ mc_serialize_stream_msg_t *sm;
+
+ unserialize_integer (m, &n_streams, sizeof (u32));
+ for (i = 0; i < n_streams; i++)
+ {
+ unserialize_cstring (m, &name);
+ if (i != MC_STREAM_INDEX_INTERNAL && !mc_stream_by_name (mcm, name))
+ {
+ vec_validate (mcm->stream_vector, i);
+ s = vec_elt_at_index (mcm->stream_vector, i);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+ s->state = MC_STREAM_STATE_name_known;
+ hash_set_mem (mcm->stream_index_by_name, s->config.name, s->index);
+ }
+ else
+ vec_free (name);
+
+ s = vec_elt_at_index (mcm->stream_vector, i);
+
+ vec_free (s->stream_msgs);
+ vec_free (s->stream_msg_index_by_global_index);
+
+ unserialize_integer (m, &n_stream_msgs, sizeof (u32));
+ vec_resize (s->stream_msgs, n_stream_msgs);
+ vec_foreach (sm, s->stream_msgs)
+ {
+ uword *p;
+ u32 si, gi;
+
+ unserialize_cstring (m, &name);
+ p = hash_get (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ si = sm - s->stream_msgs;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "catchup-bind: %s to %d global index %d stream %d",
+ .format_args = "T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 c[4];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = gi;
+ ed->c[3] = s->index;
+ }
+
+ vec_free (name);
+
+ sm->global_index = gi;
+ if (gi != ~0)
+ {
+ vec_validate_init_empty (s->stream_msg_index_by_global_index,
+ gi, ~0);
+ s->stream_msg_index_by_global_index[gi] = si;
+ }
+ }
+ }
+}
+
+void
+mc_main_init (mc_main_t * mcm, char *tag)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ mcm->vlib_main = vm;
+ mcm->elog_main = &vm->elog_main;
+
+ mcm->relay_master_peer_id = ~0ULL;
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+
+ mcm->stream_index_by_name
+ = hash_create_string ( /* elts */ 0, /* value size */ sizeof (uword));
+
+ {
+ vlib_node_registration_t r;
+
+ memset (&r, 0, sizeof (r));
+
+ r.type = VLIB_NODE_TYPE_PROCESS;
+
+ /* Point runtime data to main instance. */
+ r.runtime_data = &mcm;
+ r.runtime_data_bytes = sizeof (&mcm);
+
+ r.name = (char *) format (0, "mc-mastership-%s", tag);
+ r.function = mc_mastership_process;
+ mcm->mastership_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-join-ager-%s", tag);
+ r.function = mc_join_ager_process;
+ mcm->join_ager_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-retry-%s", tag);
+ r.function = mc_retry_process;
+ mcm->retry_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-catchup-%s", tag);
+ r.function = mc_catchup_process;
+ mcm->catchup_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-unserialize-%s", tag);
+ r.function = mc_unserialize_process;
+ mcm->unserialize_process = vlib_register_node (vm, &r);
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ mhash_init (&mcm->elog_id_by_peer_id, sizeof (uword),
+ sizeof (mc_peer_id_t));
+
+ mhash_init (&mcm->mastership_peer_index_by_id, sizeof (uword),
+ sizeof (mc_peer_id_t));
+ mc_serialize_init (mcm);
+}
+
+static u8 *
+format_mc_relay_state (u8 * s, va_list * args)
+{
+ mc_relay_state_t state = va_arg (*args, mc_relay_state_t);
+ char *t = 0;
+ switch (state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ t = "negotiate";
+ break;
+ case MC_RELAY_STATE_MASTER:
+ t = "master";
+ break;
+ case MC_RELAY_STATE_SLAVE:
+ t = "slave";
+ break;
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_mc_stream_state (u8 * s, va_list * args)
+{
+ mc_stream_state_t state = va_arg (*args, mc_stream_state_t);
+ char *t = 0;
+ switch (state)
+ {
+#define _(f) case MC_STREAM_STATE_##f: t = #f; break;
+ foreach_mc_stream_state
+#undef _
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+static int
+mc_peer_comp (void *a1, void *a2)
+{
+ mc_stream_peer_t *p1 = a1;
+ mc_stream_peer_t *p2 = a2;
+
+ return mc_peer_id_compare (p1->id, p2->id);
+}
+
+u8 *
+format_mc_main (u8 * s, va_list * args)
+{
+ mc_main_t *mcm = va_arg (*args, mc_main_t *);
+ mc_stream_t *t;
+ mc_stream_peer_t *p, *ps;
+ uword indent = format_get_indent (s);
+
+ s = format (s, "MC state %U, %d streams joined, global sequence 0x%x",
+ format_mc_relay_state, mcm->relay_state,
+ vec_len (mcm->stream_vector), mcm->relay_global_sequence);
+
+ {
+ mc_mastership_peer_t *mp;
+ f64 now = vlib_time_now (mcm->vlib_main);
+ s = format (s, "\n%UMost recent mastership peers:",
+ format_white_space, indent + 2);
+ vec_foreach (mp, mcm->mastership_peers)
+ {
+ s = format (s, "\n%U%-30U%.4e",
+ format_white_space, indent + 4,
+ mcm->transport.format_peer_id, mp->peer_id,
+ now - mp->time_last_master_assert_received);
+ }
+ }
+
+ vec_foreach (t, mcm->stream_vector)
+ {
+ s = format (s, "\n%Ustream `%s' index %d",
+ format_white_space, indent + 2, t->config.name, t->index);
+
+ s = format (s, "\n%Ustate %U",
+ format_white_space, indent + 4,
+ format_mc_stream_state, t->state);
+
+ s =
+ format (s,
+ "\n%Uretries: interval %.0f sec, limit %d, pool elts %d, %Ld sent",
+ format_white_space, indent + 4, t->config.retry_interval,
+ t->config.retry_limit, pool_elts (t->retry_pool),
+ t->stats.n_retries - t->stats_last_clear.n_retries);
+
+ s = format (s, "\n%U%Ld/%Ld user requests sent/received",
+ format_white_space, indent + 4,
+ t->user_requests_sent, t->user_requests_received);
+
+ s = format (s, "\n%U%d peers, local/global sequence 0x%x/0x%x",
+ format_white_space, indent + 4,
+ pool_elts (t->peers),
+ t->our_local_sequence, t->last_global_sequence_processed);
+
+ ps = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (p, t->peers,
+ ({
+ if (clib_bitmap_get (t->all_peer_bitmap, p - t->peers))
+ vec_add1 (ps, p[0]);
+ }));
+ /* *INDENT-ON* */
+ vec_sort_with_function (ps, mc_peer_comp);
+ s = format (s, "\n%U%=30s%10s%16s%16s",
+ format_white_space, indent + 6,
+ "Peer", "Last seq", "Retries", "Future");
+
+ vec_foreach (p, ps)
+ {
+ s = format (s, "\n%U%-30U0x%08x%16Ld%16Ld%s",
+ format_white_space, indent + 6,
+ mcm->transport.format_peer_id, p->id.as_u64,
+ p->last_sequence_received,
+ p->stats.n_msgs_from_past -
+ p->stats_last_clear.n_msgs_from_past,
+ p->stats.n_msgs_from_future -
+ p->stats_last_clear.n_msgs_from_future,
+ (mcm->transport.our_ack_peer_id.as_u64 ==
+ p->id.as_u64 ? " (self)" : ""));
+ }
+ vec_free (ps);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/mc.h b/src/vlib/mc.h
new file mode 100644
index 00000000..dc95b0e9
--- /dev/null
+++ b/src/vlib/mc.h
@@ -0,0 +1,687 @@
+/*
+ * mc.h: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vlib_mc_h
+#define included_vlib_mc_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/mhash.h>
+#include <vlib/node.h>
+
+#ifndef MC_EVENT_LOGGING
+#define MC_EVENT_LOGGING 1
+#endif
+
+always_inline uword
+mc_need_byte_swap (void)
+{
+ return CLIB_ARCH_IS_LITTLE_ENDIAN;
+}
+
+/*
+ * Used to uniquely identify hosts.
+ * For IP4 this would be ip4_address plus tcp/udp port.
+ */
+typedef union
+{
+ u8 as_u8[8];
+ u64 as_u64;
+} mc_peer_id_t;
+
+always_inline mc_peer_id_t
+mc_byte_swap_peer_id (mc_peer_id_t i)
+{
+ /* Peer id is already in network byte order. */
+ return i;
+}
+
+always_inline int
+mc_peer_id_compare (mc_peer_id_t a, mc_peer_id_t b)
+{
+ return memcmp (a.as_u8, b.as_u8, sizeof (a.as_u8));
+}
+
+/* Assert mastership. Lowest peer_id amount all peers wins mastership.
+ Only sent/received over mastership channel (MC_TRANSPORT_MASTERSHIP).
+ So, we don't need a message opcode. */
+typedef CLIB_PACKED (struct
+ {
+ /* Peer id asserting mastership. */
+ mc_peer_id_t peer_id;
+ /* Global sequence number asserted. */
+ u32 global_sequence;}) mc_msg_master_assert_t;
+
+always_inline void
+mc_byte_swap_msg_master_assert (mc_msg_master_assert_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ }
+}
+
+#define foreach_mc_msg_type \
+ _ (master_assert) \
+ _ (join_or_leave_request) \
+ _ (join_reply) \
+ _ (user_request) \
+ _ (user_ack) \
+ _ (catchup_request) \
+ _ (catchup_reply)
+
+typedef enum
+{
+#define _(f) MC_MSG_TYPE_##f,
+ foreach_mc_msg_type
+#undef _
+} mc_relay_msg_type_t;
+
+/* Request to join a given stream. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct
+ {
+mc_peer_id_t peer_id; mc_relay_msg_type_t type:32;
+ /* MC_MSG_TYPE_join_or_leave_request */
+ /* Stream to join or leave. */
+ u32 stream_index;
+ /* join = 1, leave = 0 */
+ u8 is_join;}) mc_msg_join_or_leave_request_t;
+
+always_inline void
+mc_byte_swap_msg_join_or_leave_request (mc_msg_join_or_leave_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Join reply. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct
+ {
+mc_peer_id_t peer_id; mc_relay_msg_type_t type:32;
+ /* MC_MSG_TYPE_join_reply */
+ u32 stream_index;
+ /* Peer ID to contact to catchup with this stream. */
+ mc_peer_id_t catchup_peer_id;}) mc_msg_join_reply_t;
+
+always_inline void
+mc_byte_swap_msg_join_reply (mc_msg_join_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->catchup_peer_id = mc_byte_swap_peer_id (r->catchup_peer_id);
+ }
+}
+
+/* Generic (application) request. Multicast over MC_TRANSPORT_USER_REQUEST_TO_RELAY and then
+ relayed by relay master after filling in global sequence number. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id; u32 stream_index;
+ /* Global sequence number as filled in by relay master. */
+ u32 global_sequence;
+ /* Local sequence number as filled in by peer sending message. */
+ u32 local_sequence;
+ /* Size of request data. */
+ u32 n_data_bytes;
+ /* Opaque request data. */
+ u8 data[0];}) mc_msg_user_request_t;
+
+always_inline void
+mc_byte_swap_msg_user_request (mc_msg_user_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+/* Sent unicast over ACK channel. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id;
+ u32 global_sequence; u32 stream_index;
+ u32 local_sequence;
+ i32 seq_cmp_result;}) mc_msg_user_ack_t;
+
+always_inline void
+mc_byte_swap_msg_user_ack (mc_msg_user_ack_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->seq_cmp_result = clib_byte_swap_i32 (r->seq_cmp_result);
+ }
+}
+
+/* Sent/received unicast over catchup channel (e.g. using TCP). */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id;
+ u32 stream_index;}) mc_msg_catchup_request_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_request (mc_msg_catchup_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Sent/received unicast over catchup channel. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id; u32 stream_index;
+ /* Last global sequence number included in catchup data. */
+ u32 last_global_sequence_included;
+ /* Size of catchup data. */
+ u32 n_data_bytes;
+ /* Catchup data. */
+ u8 data[0];}) mc_msg_catchup_reply_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_reply (mc_msg_catchup_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->last_global_sequence_included =
+ clib_byte_swap_u32 (r->last_global_sequence_included);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+typedef struct _mc_serialize_msg
+{
+ /* Name for this type. */
+ char *name;
+
+ /* Functions to serialize/unserialize data. */
+ serialize_function_t *serialize;
+ serialize_function_t *unserialize;
+
+ /* Maximum message size in bytes when serialized.
+ If zero then this will be set to the largest sent message. */
+ u32 max_n_bytes_serialized;
+
+ /* Opaque to use for first argument to serialize/unserialize function. */
+ u32 opaque;
+
+ /* Index in global message vector. */
+ u32 global_index;
+
+ /* Registration list */
+ struct _mc_serialize_msg *next_registration;
+} mc_serialize_msg_t;
+
+typedef struct
+{
+ /* Index into global message vector. */
+ u32 global_index;
+} mc_serialize_stream_msg_t;
+
+#define MC_SERIALIZE_MSG(x,...) \
+ __VA_ARGS__ mc_serialize_msg_t x; \
+static void __mc_serialize_msg_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __mc_serialize_msg_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->mc_msg_registrations; \
+ vm->mc_msg_registrations = &x; \
+} \
+__VA_ARGS__ mc_serialize_msg_t x
+
+typedef enum
+{
+ MC_TRANSPORT_MASTERSHIP,
+ MC_TRANSPORT_JOIN,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ MC_TRANSPORT_USER_REQUEST_FROM_RELAY,
+ MC_N_TRANSPORT_TYPE,
+} mc_transport_type_t;
+
+typedef struct
+{
+ clib_error_t *(*tx_buffer) (void *opaque, mc_transport_type_t type,
+ u32 buffer_index);
+
+ clib_error_t *(*tx_ack) (void *opaque, mc_peer_id_t peer_id,
+ u32 buffer_index);
+
+ /* Returns catchup opaque. */
+ uword (*catchup_request_fun) (void *opaque, u32 stream_index,
+ mc_peer_id_t catchup_peer_id);
+
+ void (*catchup_send_fun) (void *opaque, uword catchup_opaque,
+ u8 * data_vector);
+
+ /* Opaque passed to callbacks. */
+ void *opaque;
+
+ mc_peer_id_t our_ack_peer_id;
+ mc_peer_id_t our_catchup_peer_id;
+
+ /* Max packet size (MTU) for this transport.
+ For IP this is interface MTU less IP + UDP header size. */
+ u32 max_packet_size;
+
+ format_function_t *format_peer_id;
+} mc_transport_t;
+
+typedef struct
+{
+ /* Count of messages received from this peer from the past/future
+ (with seq_cmp != 0). */
+ u64 n_msgs_from_past;
+ u64 n_msgs_from_future;
+} mc_stream_peer_stats_t;
+
+typedef struct
+{
+ /* ID of this peer. */
+ mc_peer_id_t id;
+
+ /* The last sequence we received from this peer. */
+ u32 last_sequence_received;
+
+ mc_stream_peer_stats_t stats, stats_last_clear;
+} mc_stream_peer_t;
+
+typedef struct
+{
+ u32 buffer_index;
+
+ /* Cached copy of local sequence number from buffer. */
+ u32 local_sequence;
+
+ /* Number of times this buffer has been sent (retried). */
+ u32 n_retries;
+
+ /* Previous/next retries in doubly-linked list. */
+ u32 prev_index, next_index;
+
+ /* Bitmap of all peers which have acked this msg */
+ uword *unacked_by_peer_bitmap;
+
+ /* Message send or resend time */
+ f64 sent_at;
+} mc_retry_t;
+
+typedef struct
+{
+ /* Number of retries sent for this stream. */
+ u64 n_retries;
+} mc_stream_stats_t;
+
+struct mc_main_t;
+struct mc_stream_t;
+
+typedef struct
+{
+ /* Stream name. */
+ char *name;
+
+ /* Number of outstanding messages. */
+ u32 window_size;
+
+ /* Retry interval, in seconds */
+ f64 retry_interval;
+
+ /* Retry limit */
+ u32 retry_limit;
+
+ /* User rx buffer callback */
+ void (*rx_buffer) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream,
+ mc_peer_id_t peer_id, u32 buffer_index);
+
+ /* User callback to create a snapshot */
+ u8 *(*catchup_snapshot) (struct mc_main_t * mc_main,
+ u8 * snapshot_vector,
+ u32 last_global_sequence_included);
+
+ /* User callback to replay a snapshot */
+ void (*catchup) (struct mc_main_t * mc_main,
+ u8 * snapshot_data, u32 n_snapshot_data_bytes);
+
+ /* Callback to save a snapshot for offline replay */
+ void (*save_snapshot) (struct mc_main_t * mc_main,
+ u32 is_catchup,
+ u8 * snapshot_data, u32 n_snapshot_data_bytes);
+
+ /* Called when a peer dies */
+ void (*peer_died) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream, mc_peer_id_t peer_id);
+} mc_stream_config_t;
+
+#define foreach_mc_stream_state \
+ _ (invalid) \
+ _ (name_known) \
+ _ (join_in_progress) \
+ _ (catchup) \
+ _ (ready)
+
+typedef enum
+{
+#define _(f) MC_STREAM_STATE_##f,
+ foreach_mc_stream_state
+#undef _
+} mc_stream_state_t;
+
+typedef struct mc_stream_t
+{
+ mc_stream_config_t config;
+
+ mc_stream_state_t state;
+
+ /* Index in stream pool. */
+ u32 index;
+
+ /* Stream index 0 is always for MC internal use. */
+#define MC_STREAM_INDEX_INTERNAL 0
+
+ mc_retry_t *retry_pool;
+
+ /* Head and tail index of retry pool. */
+ u32 retry_head_index, retry_tail_index;
+
+ /*
+ * Country club for recently retired messages
+ * If the set of peers is expanding and a new peer
+ * misses a message, we can easily retire the FIFO
+ * element before we even know about the new peer
+ */
+ mc_retry_t *retired_fifo;
+
+ /* Hash mapping local sequence to retry pool index. */
+ uword *retry_index_by_local_sequence;
+
+ /* catch-up fifo of VLIB buffer indices.
+ start recording when catching up. */
+ u32 *catchup_fifo;
+
+ mc_stream_stats_t stats, stats_last_clear;
+
+ /* Peer pool. */
+ mc_stream_peer_t *peers;
+
+ /* Bitmap with ones for all peers in peer pool. */
+ uword *all_peer_bitmap;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t peer_index_by_id;
+
+ /* Timeout, in case we're alone in the world */
+ f64 join_timeout;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_join_done;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_open_window;
+
+ /* Next sequence number to use */
+ u32 our_local_sequence;
+
+ /*
+ * Last global sequence we processed.
+ * When supplying catchup data, we need to tell
+ * the client precisely where to start replaying
+ */
+ u32 last_global_sequence_processed;
+
+ /* Vector of unique messages we've sent on this stream. */
+ mc_serialize_stream_msg_t *stream_msgs;
+
+ /* Vector global message index into per stream message index. */
+ u32 *stream_msg_index_by_global_index;
+
+ /* Hashed by message name. */
+ uword *stream_msg_index_by_name;
+
+ u64 user_requests_sent;
+ u64 user_requests_received;
+} mc_stream_t;
+
+always_inline void
+mc_stream_free (mc_stream_t * s)
+{
+ pool_free (s->retry_pool);
+ hash_free (s->retry_index_by_local_sequence);
+ clib_fifo_free (s->catchup_fifo);
+ pool_free (s->peers);
+ mhash_free (&s->peer_index_by_id);
+ vec_free (s->procs_waiting_for_join_done);
+ vec_free (s->procs_waiting_for_open_window);
+}
+
+always_inline void
+mc_stream_init (mc_stream_t * s)
+{
+ memset (s, 0, sizeof (s[0]));
+ s->retry_head_index = s->retry_tail_index = ~0;
+}
+
+typedef struct
+{
+ u32 stream_index;
+ u32 catchup_opaque;
+ u8 *catchup_snapshot;
+} mc_catchup_process_arg_t;
+
+typedef enum
+{
+ MC_RELAY_STATE_NEGOTIATE,
+ MC_RELAY_STATE_MASTER,
+ MC_RELAY_STATE_SLAVE,
+} mc_relay_state_t;
+
+typedef struct
+{
+ mc_peer_id_t peer_id;
+
+ f64 time_last_master_assert_received;
+} mc_mastership_peer_t;
+
+typedef struct
+{
+ u32 stream_index;
+ u32 buffer_index;
+} mc_stream_and_buffer_t;
+
+typedef struct mc_main_t
+{
+ mc_relay_state_t relay_state;
+
+ /* Mastership */
+ u32 we_can_be_relay_master;
+
+ u64 relay_master_peer_id;
+
+ mc_mastership_peer_t *mastership_peers;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t mastership_peer_index_by_id;
+
+ /* The transport we're using. */
+ mc_transport_t transport;
+
+ /* Last-used global sequence number. */
+ u32 relay_global_sequence;
+
+ /* Vector of streams. */
+ mc_stream_t *stream_vector;
+
+ /* Hash table mapping stream name to pool index. */
+ uword *stream_index_by_name;
+
+ uword *procs_waiting_for_stream_name_by_name;
+
+ vlib_one_time_waiting_process_t **procs_waiting_for_stream_name_pool;
+
+ int joins_in_progress;
+
+ mc_catchup_process_arg_t *catchup_process_args;
+
+ /* Node indices for mastership, join ager,
+ retry and catchup processes. */
+ u32 mastership_process;
+ u32 join_ager_process;
+ u32 retry_process;
+ u32 catchup_process;
+ u32 unserialize_process;
+
+ /* Global vector of messages. */
+ mc_serialize_msg_t **global_msgs;
+
+ /* Hash table mapping message name to index. */
+ uword *global_msg_index_by_name;
+
+ /* Shared serialize/unserialize main. */
+ serialize_main_t serialize_mains[VLIB_N_RX_TX];
+
+ vlib_serialize_buffer_main_t serialize_buffer_mains[VLIB_N_RX_TX];
+
+ /* Convenience variables */
+ struct vlib_main_t *vlib_main;
+ elog_main_t *elog_main;
+
+ /* Maps 64 bit peer id to elog string table offset for this formatted peer id. */
+ mhash_t elog_id_by_peer_id;
+
+ uword *elog_id_by_msg_name;
+
+ /* For mc_unserialize. */
+ mc_stream_and_buffer_t *mc_unserialize_stream_and_buffers;
+} mc_main_t;
+
+always_inline mc_stream_t *
+mc_stream_by_name (mc_main_t * m, char *name)
+{
+ uword *p = hash_get (m->stream_index_by_name, name);
+ return p ? vec_elt_at_index (m->stream_vector, p[0]) : 0;
+}
+
+always_inline mc_stream_t *
+mc_stream_by_index (mc_main_t * m, u32 i)
+{
+ return i < vec_len (m->stream_vector) ? m->stream_vector + i : 0;
+}
+
+always_inline void
+mc_clear_stream_stats (mc_main_t * m)
+{
+ mc_stream_t *s;
+ mc_stream_peer_t *p;
+ vec_foreach (s, m->stream_vector)
+ {
+ s->stats_last_clear = s->stats;
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ p->stats_last_clear = p->stats;
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+/* Declare all message handlers. */
+#define _(f) void mc_msg_##f##_handler (mc_main_t * mcm, mc_msg_##f##_t * msg, u32 buffer_index);
+foreach_mc_msg_type
+#undef _
+ u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t *);
+
+void mc_stream_leave (mc_main_t * mcm, u32 stream_index);
+
+void mc_wait_for_stream_ready (mc_main_t * m, char *stream_name);
+
+u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index);
+
+void mc_main_init (mc_main_t * mcm, char *tag);
+
+void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master);
+
+void *mc_get_vlib_buffer (struct vlib_main_t *vm, u32 n_bytes,
+ u32 * bi_return);
+
+format_function_t format_mc_main;
+
+clib_error_t *mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, ...);
+
+clib_error_t *mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, va_list * va);
+
+#define mc_serialize_stream(mc,si,msg,args...) \
+ mc_serialize_internal((mc),(si),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize(mc,msg,args...) \
+ mc_serialize_internal((mc),(~0),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize2(mc,add,msg,args...) \
+ mc_serialize_internal((mc),(~0),(add),(msg),(msg)->serialize,args)
+
+void mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index);
+uword mc_unserialize_message (mc_main_t * mcm, mc_stream_t * s,
+ serialize_main_t * m);
+
+serialize_function_t serialize_mc_main, unserialize_mc_main;
+
+always_inline uword
+mc_max_message_size_in_bytes (mc_main_t * mcm)
+{
+ return mcm->transport.max_packet_size - sizeof (mc_msg_user_request_t);
+}
+
+always_inline word
+mc_serialize_n_bytes_left (mc_main_t * mcm, serialize_main_t * m)
+{
+ return mc_max_message_size_in_bytes (mcm) -
+ serialize_vlib_buffer_n_bytes (m);
+}
+
+void unserialize_mc_stream (serialize_main_t * m, va_list * va);
+void mc_stream_join_process_hold (void);
+
+#endif /* included_vlib_mc_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node.c b/src/vlib/node.c
new file mode 100644
index 00000000..e6739dc7
--- /dev/null
+++ b/src/vlib/node.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.c: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Query node given name. */
+vlib_node_t *
+vlib_get_node_by_name (vlib_main_t * vm, u8 * name)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ uword *p;
+ u8 *key = name;
+ if (!clib_mem_is_heap_object (key))
+ key = format (0, "%s", key);
+ p = hash_get (nm->node_by_name, key);
+ if (key != name)
+ vec_free (key);
+ return p ? vec_elt (nm->nodes, p[0]) : 0;
+}
+
+static void
+node_set_elog_name (vlib_main_t * vm, uword node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ elog_event_type_t *t;
+
+ t = vec_elt_at_index (vm->node_call_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v-call: %%d%c", n->name, 0);
+
+ t = vec_elt_at_index (vm->node_return_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v-return: %%d%c", n->name, 0);
+
+ n->name_elog_string = elog_string (&vm->elog_main, "%v%c", n->name, 0);
+}
+
+static void
+vlib_worker_thread_node_rename (u32 node_index)
+{
+ int i;
+ vlib_main_t *vm;
+ vlib_node_t *n;
+
+ if (vec_len (vlib_mains) == 1)
+ return;
+
+ vm = vlib_mains[0];
+ n = vlib_get_node (vm, node_index);
+
+ ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_main_t *vm_worker = vlib_mains[i];
+ vlib_node_t *n_worker = vlib_get_node (vm_worker, node_index);
+
+ n_worker->name = n->name;
+ n_worker->name_elog_string = n->name_elog_string;
+ }
+}
+
+void
+vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...)
+{
+ va_list va;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+
+ va_start (va, fmt);
+ hash_unset (nm->node_by_name, n->name);
+ vec_free (n->name);
+ n->name = va_format (0, fmt, &va);
+ va_end (va);
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ node_set_elog_name (vm, node_index);
+
+ /* Propagate the change to all worker threads */
+ vlib_worker_thread_node_rename (node_index);
+}
+
+static void
+vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *r, *s;
+ vlib_node_t *node, *next_node;
+ vlib_next_frame_t *nf;
+ vlib_pending_frame_t *pf;
+ i32 i, j, n_insert;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ node = vec_elt (nm->nodes, node_index);
+ r = vlib_node_get_runtime (vm, node_index);
+
+ n_insert = vec_len (node->next_nodes) - r->n_next_nodes;
+ if (n_insert > 0)
+ {
+ i = r->next_frame_index + r->n_next_nodes;
+ vec_insert (nm->next_frames, n_insert, i);
+
+ /* Initialize newly inserted next frames. */
+ for (j = 0; j < n_insert; j++)
+ vlib_next_frame_init (nm->next_frames + i + j);
+
+ /* Relocate other next frames at higher indices. */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ s = vlib_node_get_runtime (vm, j);
+ if (j != node_index && s->next_frame_index >= i)
+ s->next_frame_index += n_insert;
+ }
+
+ /* Pending frames may need to be relocated also. */
+ vec_foreach (pf, nm->pending_frames)
+ {
+ if (pf->next_frame_index != VLIB_PENDING_FRAME_NO_NEXT_FRAME
+ && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }
+ /* *INDENT-OFF* */
+ pool_foreach (pf, nm->suspended_process_frames, ({
+ if (pf->next_frame_index != ~0 && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }));
+ /* *INDENT-ON* */
+
+ r->n_next_nodes = vec_len (node->next_nodes);
+ }
+
+ /* Set frame's node runtime index. */
+ next_node = vlib_get_node (vm, node->next_nodes[next_index]);
+ nf = nm->next_frames + r->next_frame_index + next_index;
+ nf->node_runtime_index = next_node->runtime_index;
+
+ vlib_worker_thread_node_runtime_update ();
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+uword
+vlib_node_get_next (vlib_main_t * vm, uword node_index, uword next_node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *node;
+ uword *p;
+
+ node = vec_elt (nm->nodes, node_index);
+
+ /* Runtime has to be initialized. */
+ ASSERT (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED);
+
+ if ((p = hash_get (node->next_slot_by_node, next_node_index)))
+ {
+ return p[0];
+ }
+
+ return (~0);
+}
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node_index,
+ uword next_node_index, uword slot)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *node, *next;
+ uword *p;
+
+ node = vec_elt (nm->nodes, node_index);
+ next = vec_elt (nm->nodes, next_node_index);
+
+ /* Runtime has to be initialized. */
+ ASSERT (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED);
+
+ if ((p = hash_get (node->next_slot_by_node, next_node_index)))
+ {
+ /* Next already exists: slot must match. */
+ if (slot != ~0)
+ ASSERT (slot == p[0]);
+ return p[0];
+ }
+
+ if (slot == ~0)
+ slot = vec_len (node->next_nodes);
+
+ vec_validate_init_empty (node->next_nodes, slot, ~0);
+ vec_validate (node->n_vectors_by_next_node, slot);
+
+ node->next_nodes[slot] = next_node_index;
+ hash_set (node->next_slot_by_node, next_node_index, slot);
+
+ vlib_node_runtime_update (vm, node_index, slot);
+
+ next->prev_node_bitmap = clib_bitmap_ori (next->prev_node_bitmap,
+ node_index);
+
+ /* Siblings all get same node structure. */
+ {
+ uword sib_node_index, sib_slot;
+ vlib_node_t *sib_node;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (sib_node_index, node->sibling_bitmap, ({
+ sib_node = vec_elt (nm->nodes, sib_node_index);
+ if (sib_node != node)
+ {
+ sib_slot = vlib_node_add_next_with_slot (vm, sib_node_index, next_node_index, slot);
+ ASSERT (sib_slot == slot);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return slot;
+}
+
+/* Add named next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node, char *name, uword slot)
+{
+ vlib_node_main_t *nm;
+ vlib_node_t *n, *n_next;
+
+ nm = &vm->node_main;
+ n = vlib_get_node (vm, node);
+
+ n_next = vlib_get_node_by_name (vm, (u8 *) name);
+ if (!n_next)
+ {
+ if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED)
+ return ~0;
+
+ if (slot == ~0)
+ slot = clib_max (vec_len (n->next_node_names),
+ vec_len (n->next_nodes));
+ vec_validate (n->next_node_names, slot);
+ n->next_node_names[slot] = name;
+ return slot;
+ }
+
+ return vlib_node_add_next_with_slot (vm, node, n_next->index, slot);
+}
+
+static void
+node_elog_init (vlib_main_t * vm, uword ni)
+{
+ elog_event_type_t t;
+
+ memset (&t, 0, sizeof (t));
+
+ /* 2 event types for this node: one when node function is called.
+ One when it returns. */
+ vec_validate (vm->node_call_elog_event_types, ni);
+ vm->node_call_elog_event_types[ni] = t;
+
+ vec_validate (vm->node_return_elog_event_types, ni);
+ vm->node_return_elog_event_types[ni] = t;
+
+ node_set_elog_name (vm, ni);
+}
+
+#ifdef CLIB_UNIX
+#define STACK_ALIGN (clib_mem_get_page_size())
+#else
+#define STACK_ALIGN CLIB_CACHE_LINE_BYTES
+#endif
+
+static void
+register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ u32 page_size = clib_mem_get_page_size ();
+ int i;
+
+ if (CLIB_DEBUG > 0)
+ {
+ /* Default (0) type should match INTERNAL. */
+ vlib_node_t zero = { 0 };
+ ASSERT (VLIB_NODE_TYPE_INTERNAL == zero.type);
+ }
+
+ ASSERT (r->function != 0);
+
+ n = clib_mem_alloc_no_fail (sizeof (n[0]));
+ memset (n, 0, sizeof (n[0]));
+ n->index = vec_len (nm->nodes);
+
+ vec_add1 (nm->nodes, n);
+
+ /* Name is always a vector so it can be formatted with %v. */
+ if (clib_mem_is_heap_object (vec_header (r->name, 0)))
+ n->name = vec_dup ((u8 *) r->name);
+ else
+ n->name = format (0, "%s", r->name);
+
+ if (!nm->node_by_name)
+ nm->node_by_name = hash_create_vec ( /* size */ 32,
+ sizeof (n->name[0]), sizeof (uword));
+
+ /* Node names must be unique. */
+ {
+ vlib_node_t *o = vlib_get_node_by_name (vm, n->name);
+ if (o)
+ clib_error ("more than one node named `%v'", n->name);
+ }
+
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ r->index = n->index; /* save index in registration */
+ n->function = r->function;
+
+ /* Node index of next sibling will be filled in by vlib_node_main_init. */
+ n->sibling_of = r->sibling_of;
+ if (r->sibling_of && r->n_next_nodes > 0)
+ clib_error ("sibling node should not have any next nodes `%v'", n->name);
+
+ if (r->type == VLIB_NODE_TYPE_INTERNAL)
+ ASSERT (r->vector_size > 0);
+
+#define _(f) n->f = r->f
+
+ _(type);
+ _(flags);
+ _(state);
+ _(scalar_size);
+ _(vector_size);
+ _(format_buffer);
+ _(unformat_buffer);
+ _(format_trace);
+ _(validate_frame);
+
+ /* Register error counters. */
+ vlib_register_errors (vm, n->index, r->n_errors, r->error_strings);
+ node_elog_init (vm, n->index);
+
+ _(runtime_data_bytes);
+ if (r->runtime_data_bytes > 0)
+ {
+ vec_resize (n->runtime_data, r->runtime_data_bytes);
+ if (r->runtime_data)
+ clib_memcpy (n->runtime_data, r->runtime_data, r->runtime_data_bytes);
+ }
+
+ vec_resize (n->next_node_names, r->n_next_nodes);
+ for (i = 0; i < r->n_next_nodes; i++)
+ n->next_node_names[i] = r->next_nodes[i];
+
+ vec_validate_init_empty (n->next_nodes, r->n_next_nodes - 1, ~0);
+ vec_validate (n->n_vectors_by_next_node, r->n_next_nodes - 1);
+
+ n->owner_node_index = n->owner_next_index = ~0;
+
+ /* Initialize node runtime. */
+ {
+ vlib_node_runtime_t *rt;
+ u32 i;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p;
+ uword log2_n_stack_bytes;
+
+ log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15);
+
+#ifdef CLIB_UNIX
+ /*
+ * Bump the stack size if running over a kernel with a large page size,
+ * and the stack isn't any too big to begin with. Otherwise, we'll
+ * trip over the stack guard page for sure.
+ */
+ if ((page_size > (4 << 10)) && log2_n_stack_bytes < 19)
+ {
+ if ((1 << log2_n_stack_bytes) <= page_size)
+ log2_n_stack_bytes = min_log2 (page_size) + 1;
+ else
+ log2_n_stack_bytes++;
+ }
+#endif
+
+ p = clib_mem_alloc_aligned_at_offset
+ (sizeof (p[0]) + (1 << log2_n_stack_bytes),
+ STACK_ALIGN, STRUCT_OFFSET_OF (vlib_process_t, stack),
+ 0 /* no, don't call os_out_of_memory */ );
+ if (p == 0)
+ clib_panic ("failed to allocate process stack (%d bytes)",
+ 1 << log2_n_stack_bytes);
+
+ memset (p, 0, sizeof (p[0]));
+ p->log2_n_stack_bytes = log2_n_stack_bytes;
+
+ /* Process node's runtime index is really index into process
+ pointer vector. */
+ n->runtime_index = vec_len (nm->processes);
+
+ vec_add1 (nm->processes, p);
+
+ /* Paint first stack word with magic number so we can at least
+ detect process stack overruns. */
+ p->stack[0] = VLIB_PROCESS_STACK_MAGIC;
+
+ /* Node runtime is stored inside of process. */
+ rt = &p->node_runtime;
+
+#ifdef CLIB_UNIX
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (p->stack, page_size, PROT_READ) < 0)
+ clib_unix_warning ("process stack");
+#endif
+
+ }
+ else
+ {
+ vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
+ /* align */ CLIB_CACHE_LINE_BYTES);
+ n->runtime_index = rt - nm->nodes_by_type[n->type];
+ }
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ nm->input_node_counts_by_state[n->state] += 1;
+
+ rt->function = n->function;
+ rt->flags = n->flags;
+ rt->state = n->state;
+ rt->node_index = n->index;
+
+ rt->n_next_nodes = r->n_next_nodes;
+ rt->next_frame_index = vec_len (nm->next_frames);
+
+ vec_resize (nm->next_frames, rt->n_next_nodes);
+ for (i = 0; i < rt->n_next_nodes; i++)
+ vlib_next_frame_init (nm->next_frames + rt->next_frame_index + i);
+
+ vec_resize (rt->errors, r->n_errors);
+ for (i = 0; i < vec_len (rt->errors); i++)
+ rt->errors[i] = vlib_error_set (n->index, i);
+
+ STATIC_ASSERT_SIZEOF (vlib_node_runtime_t, 128);
+ ASSERT (vec_len (n->runtime_data) <= VLIB_NODE_RUNTIME_DATA_SIZE);
+
+ if (vec_len (n->runtime_data) > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ vec_len (n->runtime_data));
+
+ vec_free (n->runtime_data);
+ }
+}
+
+/* Register new packet processing node. */
+u32
+vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+{
+ register_node (vm, r);
+ return r->index;
+}
+
+static uword
+null_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u16 n_vectors = frame->n_vectors;
+
+ vlib_node_increment_counter (vm, node->node_index, 0, n_vectors);
+ vlib_buffer_free (vm, vlib_frame_args (frame), n_vectors);
+ vlib_frame_free (vm, node, frame);
+
+ return n_vectors;
+}
+
+void
+vlib_register_all_static_nodes (vlib_main_t * vm)
+{
+ vlib_node_registration_t *r;
+
+ static char *null_node_error_strings[] = {
+ "blackholed packets",
+ };
+
+ static vlib_node_registration_t null_node_reg = {
+ .function = null_node_fn,
+ .vector_size = sizeof (u32),
+ .name = "null-node",
+ .n_errors = 1,
+ .error_strings = null_node_error_strings,
+ };
+
+ /* make sure that node index 0 is not used by
+ real node */
+ register_node (vm, &null_node_reg);
+
+ r = vm->node_main.node_registrations;
+ while (r)
+ {
+ register_node (vm, r);
+ r = r->next_registration;
+ }
+}
+
+clib_error_t *
+vlib_node_main_init (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ clib_error_t *error = 0;
+ vlib_node_t *n;
+ uword ni;
+
+ nm->frame_size_hash = hash_create (0, sizeof (uword));
+ nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
+
+ /* Generate sibling relationships */
+ {
+ vlib_node_t *n, *sib;
+ uword si;
+
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ n = vec_elt (nm->nodes, ni);
+
+ if (!n->sibling_of)
+ continue;
+
+ sib = vlib_get_node_by_name (vm, (u8 *) n->sibling_of);
+ if (!sib)
+ {
+ error = clib_error_create ("sibling `%s' not found for node `%v'",
+ n->sibling_of, n->name);
+ goto done;
+ }
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (si, sib->sibling_bitmap, ({
+ vlib_node_t * m = vec_elt (nm->nodes, si);
+
+ /* Connect all of sibling's siblings to us. */
+ m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index);
+
+ /* Connect us to all of sibling's siblings. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si);
+ }));
+ /* *INDENT-ON* */
+
+ /* Connect sibling to us. */
+ sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index);
+
+ /* Connect us to sibling. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index);
+ }
+ }
+
+ /* Resolve next names into next indices. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_node_names); i++)
+ {
+ char *a = n->next_node_names[i];
+
+ if (!a)
+ continue;
+
+ if (~0 == vlib_node_add_named_next_with_slot (vm, n->index, a, i))
+ {
+ error = clib_error_create
+ ("node `%v' refers to unknown node `%s'", n->name, a);
+ goto done;
+ }
+ }
+
+ vec_free (n->next_node_names);
+ }
+
+ /* Set previous node pointers. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ vlib_node_t *n_next;
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] >= vec_len (nm->nodes))
+ continue;
+
+ n_next = vec_elt (nm->nodes, n->next_nodes[i]);
+ n_next->prev_node_bitmap =
+ clib_bitmap_ori (n_next->prev_node_bitmap, n->index);
+ }
+ }
+
+ {
+ vlib_next_frame_t *nf;
+ vlib_node_runtime_t *r;
+ vlib_node_t *next;
+ uword i;
+
+ vec_foreach (r, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ if (r->n_next_nodes == 0)
+ continue;
+
+ n = vlib_get_node (vm, r->node_index);
+ nf = vec_elt_at_index (nm->next_frames, r->next_frame_index);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ next = vlib_get_node (vm, n->next_nodes[i]);
+
+ /* Validate node runtime indices are correctly initialized. */
+ ASSERT (nf[i].node_runtime_index == next->runtime_index);
+
+ nf[i].flags = 0;
+ if (next->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)
+ nf[i].flags |= VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ }
+ }
+ }
+
+done:
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node.h b/src/vlib/node.h
new file mode 100644
index 00000000..2acd61ce
--- /dev/null
+++ b/src/vlib/node.h
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.h: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_node_h
+#define included_vlib_node_h
+
+#include <vppinfra/cpu.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/lock.h>
+#include <vlib/trace.h> /* for vlib_trace_filter_t */
+
+/* Forward declaration. */
+struct vlib_node_runtime_t;
+struct vlib_frame_t;
+
+/* Internal nodes (including output nodes) move data from node to
+ node (or out of the graph for output nodes). */
+typedef uword (vlib_node_function_t) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ struct vlib_frame_t * frame);
+
+typedef enum
+{
+ /* An internal node on the call graph (could be output). */
+ VLIB_NODE_TYPE_INTERNAL,
+
+ /* Nodes which input data into the processing graph.
+ Input nodes are called for each iteration of main loop. */
+ VLIB_NODE_TYPE_INPUT,
+
+ /* Nodes to be called before all input nodes.
+ Used, for example, to clean out driver TX rings before
+ processing input. */
+ VLIB_NODE_TYPE_PRE_INPUT,
+
+ /* "Process" nodes which can be suspended and later resumed. */
+ VLIB_NODE_TYPE_PROCESS,
+
+ VLIB_N_NODE_TYPE,
+} vlib_node_type_t;
+
+typedef struct _vlib_node_registration
+{
+ /* Vector processing function for this node. */
+ vlib_node_function_t *function;
+
+ /* Node name. */
+ char *name;
+
+ /* Name of sibling (if applicable). */
+ char *sibling_of;
+
+ /* Node index filled in by registration. */
+ u32 index;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Error strings indexed by error code for this node. */
+ char **error_strings;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t *format_buffer;
+ unformat_function_t *unformat_buffer;
+
+ /* Trace format/unformat for this node. */
+ format_function_t *format_trace;
+ unformat_function_t *unformat_trace;
+
+ /* Function to validate incoming frames. */
+ u8 *(*validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+
+ /* Per-node runtime data. */
+ void *runtime_data;
+
+ /* Process stack size. */
+ u16 process_log2_n_stack_bytes;
+
+ /* Number of bytes of per-node run time data. */
+ u8 runtime_data_bytes;
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Number of next node names that follow. */
+ u16 n_next_nodes;
+
+ /* Constructor link-list, don't ask... */
+ struct _vlib_node_registration *next_registration;
+
+ /* Names of next nodes which this node feeds into. */
+ char *next_nodes[];
+
+} vlib_node_registration_t;
+
+#define VLIB_REGISTER_NODE(x,...) \
+ __VA_ARGS__ vlib_node_registration_t x; \
+static void __vlib_add_node_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_node_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->node_main.node_registrations; \
+ vm->node_main.node_registrations = &x; \
+} \
+__VA_ARGS__ vlib_node_registration_t x
+
+#if CLIB_DEBUG > 0
+#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn)
+#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn)
+#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn)
+#else
+#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( struct vlib_main_t * vm, \
+ struct vlib_node_runtime_t * node, \
+ struct vlib_frame_t * frame) \
+ { return fn (vm, node, frame); }
+
+#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
+ foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn)
+
+#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \
+ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
+ CLIB_MULTIARCH_SELECT_FN(fn, static inline) \
+ static void __attribute__((__constructor__)) \
+ __vlib_node_function_multiarch_select_##node (void) \
+ { node.function = fn ## _multiarch_select(); }
+#endif
+
+always_inline vlib_node_registration_t *
+vlib_node_next_registered (vlib_node_registration_t * c)
+{
+ c =
+ clib_elf_section_data_next (c,
+ c->n_next_nodes * sizeof (c->next_nodes[0]));
+ return c;
+}
+
+typedef struct
+{
+ /* Total calls, clock ticks and vector elements processed for this node. */
+ u64 calls, vectors, clocks, suspends;
+ u64 max_clock;
+ u64 max_clock_n;
+} vlib_node_stats_t;
+
+#define foreach_vlib_node_state \
+ /* Input node is called each iteration of main loop. \
+ This is the default (zero). */ \
+ _ (POLLING) \
+ /* Input node is called when device signals an interrupt. */ \
+ _ (INTERRUPT) \
+ /* Input node is never called. */ \
+ _ (DISABLED)
+
+typedef enum
+{
+#define _(f) VLIB_NODE_STATE_##f,
+ foreach_vlib_node_state
+#undef _
+ VLIB_N_NODE_STATE,
+} vlib_node_state_t;
+
+typedef struct vlib_node_t
+{
+ /* Vector processing function for this node. */
+ vlib_node_function_t *function;
+
+ /* Node name. */
+ u8 *name;
+
+ /* Node name index in elog string table. */
+ u32 name_elog_string;
+
+ /* Total statistics for this node. */
+ vlib_node_stats_t stats_total;
+
+ /* Saved values as of last clear (or zero if never cleared).
+ Current values are always stats_total - stats_last_clear. */
+ vlib_node_stats_t stats_last_clear;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Node index. */
+ u32 index;
+
+ /* Index of corresponding node runtime. */
+ u32 runtime_index;
+
+ /* Runtime data for this node. */
+ void *runtime_data;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Processing function keeps frame. Tells node dispatching code not
+ to free frame after dispatch is done. */
+#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)
+
+ /* Node counts as output/drop/punt node for stats purposes. */
+#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
+#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
+#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
+#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)
+
+ /* Set if current node runtime has traced vectors. */
+#define VLIB_NODE_FLAG_TRACE (1 << 5)
+
+#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
+#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Number of bytes of run time data. */
+ u8 runtime_data_bytes;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Handle/index in error heap for this node. */
+ u32 error_heap_handle;
+ u32 error_heap_index;
+
+ /* Error strings indexed by error code for this node. */
+ char **error_strings;
+
+ /* Vector of next node names.
+ Only used before next_nodes array is initialized. */
+ char **next_node_names;
+
+ /* Next node indices for this node. */
+ u32 *next_nodes;
+
+ /* Name of node that we are sibling of. */
+ char *sibling_of;
+
+ /* Bitmap of all of this node's siblings. */
+ uword *sibling_bitmap;
+
+ /* Total number of vectors sent to each next node. */
+ u64 *n_vectors_by_next_node;
+
+ /* Hash table mapping next node index into slot in
+ next_nodes vector. Quickly determines whether this node
+ is connected to given next node and, if so, with which slot. */
+ uword *next_slot_by_node;
+
+ /* Bitmap of node indices which feed this node. */
+ uword *prev_node_bitmap;
+
+ /* Node/next-index which own enqueue rights with to this node. */
+ u32 owner_node_index, owner_next_index;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t *format_buffer;
+ unformat_function_t *unformat_buffer;
+
+ /* Trace buffer format/unformat for this node. */
+ format_function_t *format_trace;
+
+ /* Function to validate incoming frames. */
+ u8 *(*validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+ /* for pretty-printing, not typically valid */
+ u8 *state_string;
+} vlib_node_t;
+
+#define VLIB_INVALID_NODE_INDEX ((u32) ~0)
+
+/* Max number of vector elements to process at once per node. */
+#define VLIB_FRAME_SIZE 256
+#define VLIB_FRAME_ALIGN CLIB_CACHE_LINE_BYTES
+
+/* Calling frame (think stack frame) for a node. */
+typedef struct vlib_frame_t
+{
+ /* Frame flags. */
+ u16 flags;
+
+ /* Number of scalar bytes in arguments. */
+ u8 scalar_size;
+
+ /* Number of bytes per vector argument. */
+ u8 vector_size;
+
+ /* Number of vector elements currently in frame. */
+ u16 n_vectors;
+
+ /* Scalar and vector arguments to next node. */
+ u8 arguments[0];
+} vlib_frame_t;
+
+typedef struct
+{
+ /* Frame index. */
+ u32 frame_index;
+
+ /* Node runtime for this next. */
+ u32 node_runtime_index;
+
+ /* Next frame flags. */
+ u32 flags;
+
+ /* Reflects node frame-used flag for this next. */
+#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH \
+ VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
+
+ /* This next frame owns enqueue to node
+ corresponding to node_runtime_index. */
+#define VLIB_FRAME_OWNER (1 << 15)
+
+ /* Set when frame has been allocated for this next. */
+#define VLIB_FRAME_IS_ALLOCATED VLIB_NODE_FLAG_IS_OUTPUT
+
+ /* Set when frame has been added to pending vector. */
+#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP
+
+ /* Set when frame is to be freed after dispatch. */
+#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT
+
+ /* Set when frame has traced packets. */
+#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE
+
+ /* Number of vectors enqueue to this next since last overflow. */
+ u32 vectors_since_last_overflow;
+} vlib_next_frame_t;
+
+always_inline void
+vlib_next_frame_init (vlib_next_frame_t * nf)
+{
+ memset (nf, 0, sizeof (nf[0]));
+ nf->frame_index = ~0;
+ nf->node_runtime_index = ~0;
+}
+
+/* A frame pending dispatch by main loop. */
+typedef struct
+{
+ /* Node and runtime for this frame. */
+ u32 node_runtime_index;
+
+ /* Frame index (in the heap). */
+ u32 frame_index;
+
+ /* Start of next frames for this node. */
+ u32 next_frame_index;
+
+ /* Special value for next_frame_index when there is no next frame. */
+#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
+} vlib_pending_frame_t;
+
+typedef struct vlib_node_runtime_t
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); /**< cacheline mark */
+
+ vlib_node_function_t *function; /**< Node function to call. */
+
+ vlib_error_t *errors; /**< Vector of errors for this node. */
+
+#if __SIZEOF_POINTER__ == 4
+ u8 pad[8];
+#endif
+
+ u32 clocks_since_last_overflow; /**< Number of clock cycles. */
+
+ u32 max_clock; /**< Maximum clock cycle for an
+ invocation. */
+
+ u32 max_clock_n; /**< Number of vectors in the recorded
+ max_clock. */
+
+ u32 calls_since_last_overflow; /**< Number of calls. */
+
+ u32 vectors_since_last_overflow; /**< Number of vector elements
+ processed by this node. */
+
+ u32 next_frame_index; /**< Start of next frames for this
+ node. */
+
+ u32 node_index; /**< Node index. */
+
+ u32 input_main_loops_per_call; /**< For input nodes: decremented
+ on each main loop interation until
+ it reaches zero and function is
+ called. Allows some input nodes to
+ be called more than others. */
+
+ u32 main_loop_count_last_dispatch; /**< Saved main loop counter of last
+ dispatch of this node. */
+
+ u32 main_loop_vector_stats[2];
+
+ u16 flags; /**< Copy of main node flags. */
+
+ u16 state; /**< Input node state. */
+
+ u16 n_next_nodes;
+
+ u16 cached_next_index; /**< Next frame index that vector
+ arguments were last enqueued to
+ last time this node ran. Set to
+ zero before first run of this
+ node. */
+
+ u16 thread_index; /**< thread this node runs on */
+
+ u8 runtime_data[0]; /**< Function dependent
+ node-runtime data. This data is
+ thread local, and it is not
+ cloned from main thread. It needs
+ to be initialized for each thread
+ before it is used unless
+ runtime_data template exists in
+ vlib_node_t. */
+}
+vlib_node_runtime_t;
+
+#define VLIB_NODE_RUNTIME_DATA_SIZE (sizeof (vlib_node_runtime_t) - STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data))
+
+typedef struct
+{
+ /* Number of allocated frames for this scalar/vector size. */
+ u32 n_alloc_frames;
+
+ /* Vector of free frame indices for this scalar/vector size. */
+ u32 *free_frame_indices;
+} vlib_frame_size_t;
+
+typedef struct
+{
+ /* Users opaque value for event type. */
+ uword opaque;
+} vlib_process_event_type_t;
+
+typedef struct
+{
+ /* Node runtime for this process. */
+ vlib_node_runtime_t node_runtime;
+
+ /* Where to longjmp when process is done. */
+ clib_longjmp_t return_longjmp;
+
+#define VLIB_PROCESS_RETURN_LONGJMP_RETURN ((uword) ~0 - 0)
+#define VLIB_PROCESS_RETURN_LONGJMP_SUSPEND ((uword) ~0 - 1)
+
+ /* Where to longjmp to resume node after suspend. */
+ clib_longjmp_t resume_longjmp;
+#define VLIB_PROCESS_RESUME_LONGJMP_SUSPEND 0
+#define VLIB_PROCESS_RESUME_LONGJMP_RESUME 1
+
+ u16 flags;
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK (1 << 0)
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT (1 << 1)
+ /* Set to indicate that this process has been added to resume vector. */
+#define VLIB_PROCESS_RESUME_PENDING (1 << 2)
+
+ /* Process function is currently running. */
+#define VLIB_PROCESS_IS_RUNNING (1 << 3)
+
+ /* Size of process stack. */
+ u16 log2_n_stack_bytes;
+
+ u32 suspended_process_frame_index;
+
+ /* Number of times this process was suspended. */
+ u32 n_suspends;
+
+ /* Vectors of pending event data indexed by event type index. */
+ void **pending_event_data_by_type_index;
+
+ /* Bitmap of event type-indices with non-empty vectors. */
+ uword *non_empty_event_type_bitmap;
+
+ /* Bitmap of event type-indices which are one time events. */
+ uword *one_time_event_type_bitmap;
+
+ /* Type is opaque pointer -- typically a pointer to an event handler
+ function. Hash table to map opaque to a type index. */
+ uword *event_type_index_by_type_opaque;
+
+ /* Pool of currently valid event types. */
+ vlib_process_event_type_t *event_type_pool;
+
+ /*
+ * When suspending saves clock time (10us ticks) when process
+ * is to be resumed.
+ */
+ u64 resume_clock_interval;
+
+ /* Handle from timer code, to cancel an unexpired timer */
+ u32 stop_timer_handle;
+
+ /* Default output function and its argument for any CLI outputs
+ within the process. */
+ vlib_cli_output_function_t *output_function;
+ uword output_function_arg;
+
+#ifdef CLIB_UNIX
+ /* Pad to a multiple of the page size so we can mprotect process stacks */
+#define PAGE_SIZE_MULTIPLE 0x1000
+#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT __attribute__ ((aligned (PAGE_SIZE_MULTIPLE)))
+#else
+#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT
+#endif
+
+ /* Process stack. Starts here and extends 2^log2_n_stack_bytes
+ bytes. */
+
+#define VLIB_PROCESS_STACK_MAGIC (0xdead7ead)
+ u32 stack[0] ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT;
+} vlib_process_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)));
+
+#ifdef CLIB_UNIX
+ /* Ensure that the stack is aligned on the multiple of the page size */
+typedef char
+ assert_process_stack_must_be_aligned_exactly_to_page_size_multiple[(sizeof
+ (vlib_process_t)
+ -
+ PAGE_SIZE_MULTIPLE)
+ ==
+ 0 ? 0 :
+ -1];
+#endif
+
+typedef struct
+{
+ u32 node_index;
+
+ u32 one_time_event;
+} vlib_one_time_waiting_process_t;
+
+typedef struct
+{
+ u16 n_data_elts;
+
+ u16 n_data_elt_bytes;
+
+ /* n_data_elts * n_data_elt_bytes */
+ u32 n_data_bytes;
+
+ /* Process node & event type to be used to signal event. */
+ u32 process_node_index;
+
+ u32 event_type_index;
+
+ union
+ {
+ u8 inline_event_data[64 - 3 * sizeof (u32) - 2 * sizeof (u16)];
+
+ /* Vector of event data used only when data does not fit inline. */
+ u8 *event_data_as_vector;
+ };
+}
+vlib_signal_timed_event_data_t;
+
+always_inline uword
+vlib_timing_wheel_data_is_timed_event (u32 d)
+{
+ return d & 1;
+}
+
+always_inline u32
+vlib_timing_wheel_data_set_suspended_process (u32 i)
+{
+ return 0 + 2 * i;
+}
+
+always_inline u32
+vlib_timing_wheel_data_set_timed_event (u32 i)
+{
+ return 1 + 2 * i;
+}
+
+always_inline uword
+vlib_timing_wheel_data_get_index (u32 d)
+{
+ return d / 2;
+}
+
+typedef struct
+{
+ /* Public nodes. */
+ vlib_node_t **nodes;
+
+ /* Node index hashed by node name. */
+ uword *node_by_name;
+
+ u32 flags;
+#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)
+
+ /* Nodes segregated by type for cache locality.
+ Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
+ vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
+
+ /* Node runtime indices for input nodes with pending interrupts. */
+ u32 *pending_interrupt_node_runtime_indices;
+ clib_spinlock_t pending_interrupt_lock;
+
+ /* Input nodes are switched from/to interrupt to/from polling mode
+ when average vector length goes above/below polling/interrupt
+ thresholds. */
+ u32 polling_threshold_vector_length;
+ u32 interrupt_threshold_vector_length;
+
+ /* Vector of next frames. */
+ vlib_next_frame_t *next_frames;
+
+ /* Vector of internal node's frames waiting to be called. */
+ vlib_pending_frame_t *pending_frames;
+
+ /* Timing wheel for scheduling time-based node dispatch. */
+ void *timing_wheel;
+
+ vlib_signal_timed_event_data_t *signal_timed_event_data_pool;
+
+ /* Opaque data vector added via timing_wheel_advance. */
+ u32 *data_from_advancing_timing_wheel;
+
+ /* CPU time of next process to be ready on timing wheel. */
+ f64 time_next_process_ready;
+
+ /* Vector of process nodes.
+ One for each node of type VLIB_NODE_TYPE_PROCESS. */
+ vlib_process_t **processes;
+
+ /* Current running process or ~0 if no process running. */
+ u32 current_process_index;
+
+ /* Pool of pending process frames. */
+ vlib_pending_frame_t *suspended_process_frames;
+
+ /* Vector of event data vectors pending recycle. */
+ void **recycled_event_data_vectors;
+
+ /* Current counts of nodes in each state. */
+ u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
+
+ /* Hash of (scalar_size,vector_size) to frame_sizes index. */
+ uword *frame_size_hash;
+
+ /* Per-size frame allocation information. */
+ vlib_frame_size_t *frame_sizes;
+
+ /* Time of last node runtime stats clear. */
+ f64 time_last_runtime_stats_clear;
+
+ /* Node registrations added by constructors */
+ vlib_node_registration_t *node_registrations;
+} vlib_node_main_t;
+
+
+#define FRAME_QUEUE_MAX_NELTS 32
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u64 head;
+ u64 head_hint;
+ u64 tail;
+ u32 n_in_use;
+ u32 nelts;
+ u32 written;
+ u32 threshold;
+ i32 n_vectors[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_trace_t;
+
+typedef struct
+{
+ u64 count[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_nelt_counter_t;
+
+#endif /* included_vlib_node_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
new file mode 100644
index 00000000..62ab2e64
--- /dev/null
+++ b/src/vlib/node_cli.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_cli.c: node CLI
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+static int
+node_cmp (void *a1, void *a2)
+{
+ vlib_node_t **n1 = a1;
+ vlib_node_t **n2 = a2;
+
+ return vec_cmp (n1[0]->name, n2[0]->name);
+}
+
+static clib_error_t *
+show_node_graph (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ u32 node_index;
+
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, 0);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, n);
+ }
+ else
+ {
+ vlib_node_t **nodes = vec_dup (nm->nodes);
+ uword i;
+
+ vec_sort_with_function (nodes, node_cmp);
+
+ for (i = 0; i < vec_len (nodes); i++)
+ vlib_cli_output (vm, "%U\n\n", format_vlib_node_graph, nm, nodes[i]);
+
+ vec_free (nodes);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_node_graph_command, static) = {
+ .path = "show vlib graph",
+ .short_help = "Show packet processing node graph",
+ .function = show_node_graph,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_vlib_node_stats (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ int max = va_arg (*va, int);
+ f64 v;
+ char *state;
+ u8 *ns;
+ u8 *misc_info = 0;
+ u64 c, p, l, d;
+ f64 x;
+ f64 maxc, maxcn;
+ u32 maxn;
+ uword indent;
+
+ if (!n)
+ {
+ if (max)
+ return format (s,
+ "%=30s%=17s%=16s%=16s%=16s%=16s",
+ "Name", "Max Node Clocks", "Vectors at Max",
+ "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
+ else
+ return format (s,
+ "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
+ "Name", "State", "Calls", "Vectors", "Suspends",
+ "Clocks", "Vectors/Call");
+ }
+
+ indent = format_get_indent (s);
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+ p = n->stats_total.vectors - n->stats_last_clear.vectors;
+ d = n->stats_total.suspends - n->stats_last_clear.suspends;
+ maxc = (f64) n->stats_total.max_clock;
+ maxn = n->stats_total.max_clock_n;
+ if (n->stats_total.max_clock_n)
+ maxcn = (f64) n->stats_total.max_clock / (f64) maxn;
+ else
+ maxcn = 0.0;
+
+ /* Clocks per packet, per call or per suspend. */
+ x = 0;
+ if (p > 0)
+ x = (f64) l / (f64) p;
+ else if (c > 0)
+ x = (f64) l / (f64) c;
+ else if (d > 0)
+ x = (f64) l / (f64) d;
+
+ if (c > 0)
+ v = (double) p / (double) c;
+ else
+ v = 0;
+
+ state = "active";
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+
+ /* Show processes with events pending. This helps spot bugs where events are not
+ being handled. */
+ if (!clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ misc_info = format (misc_info, "events pending, ");
+
+ switch (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT))
+ {
+ default:
+ if (!(p->flags & VLIB_PROCESS_IS_RUNNING))
+ state = "done";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK:
+ state = "time wait";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT:
+ state = "event wait";
+ break;
+
+ case (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK):
+ state =
+ "any wait";
+ break;
+ }
+ }
+ else if (n->type != VLIB_NODE_TYPE_INTERNAL)
+ {
+ state = "polling";
+ if (n->state == VLIB_NODE_STATE_DISABLED)
+ state = "disabled";
+ else if (n->state == VLIB_NODE_STATE_INTERRUPT)
+ state = "interrupt wait";
+ }
+
+ ns = n->name;
+
+ if (max)
+ s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",
+ ns, maxc, maxn, maxcn, x, v);
+ else
+ s = format (s, "%-30v%=12s%16Ld%16Ld%16Ld%16.2e%16.2f", ns, state,
+ c, p, d, x, v);
+
+ if (ns != n->name)
+ vec_free (ns);
+
+ if (misc_info)
+ {
+ s = format (s, "\n%U%v", format_white_space, indent + 4, misc_info);
+ vec_free (misc_info);
+ }
+
+ return s;
+}
+
+static clib_error_t *
+show_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ f64 time_now;
+ u32 node_index;
+ vlib_node_t ***node_dups = 0;
+ f64 *vectors_per_main_loop = 0;
+ f64 *last_vector_length_per_node = 0;
+
+ time_now = vlib_time_now (vm);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_node_sync_stats (vm, n);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);
+ }
+ else
+ {
+ vlib_node_t **nodes;
+ uword i, j;
+ f64 dt;
+ u64 n_input, n_output, n_drop, n_punt;
+ u64 n_internal_vectors, n_internal_calls;
+ u64 n_clocks, l, v, c, d;
+ int brief = 1;
+ int max = 0;
+ vlib_main_t **stat_vms = 0, *stat_vm;
+
+ /* Suppress nodes with zero calls since last clear */
+ if (unformat (input, "brief") || unformat (input, "b"))
+ brief = 1;
+ if (unformat (input, "verbose") || unformat (input, "v"))
+ brief = 0;
+ if (unformat (input, "max") || unformat (input, "m"))
+ max = 1;
+
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+
+ /*
+ * Barrier sync across stats scraping.
+ * Otherwise, the counts will be grossly inaccurate.
+ */
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+
+ nodes = vec_dup (nm->nodes);
+
+ vec_add1 (node_dups, nodes);
+ vec_add1 (vectors_per_main_loop,
+ vlib_last_vectors_per_main_loop_as_f64 (stat_vm));
+ vec_add1 (last_vector_length_per_node,
+ vlib_last_vector_length_per_node (stat_vm));
+ }
+ vlib_worker_thread_barrier_release (vm);
+
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nodes = node_dups[j];
+
+ vec_sort_with_function (nodes, node_cmp);
+
+ n_input = n_output = n_drop = n_punt = n_clocks = 0;
+ n_internal_vectors = n_internal_calls = 0;
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ n = nodes[i];
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ n_clocks += l;
+
+ v = n->stats_total.vectors - n->stats_last_clear.vectors;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+
+ switch (n->type)
+ {
+ default:
+ continue;
+
+ case VLIB_NODE_TYPE_INTERNAL:
+ n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
+ n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
+ n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
+ if (!(n->flags & VLIB_NODE_FLAG_IS_OUTPUT))
+ {
+ n_internal_vectors += v;
+ n_internal_calls += c;
+ }
+ if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)
+ n_input += v;
+ break;
+
+ case VLIB_NODE_TYPE_INPUT:
+ n_input += v;
+ break;
+ }
+ }
+
+ if (vec_len (vlib_mains) > 1)
+ {
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ if (j > 0)
+ vlib_cli_output (vm, "---------------");
+
+ if (w->lcore_id > -1)
+ vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name,
+ w->lcore_id);
+ else
+ vlib_cli_output (vm, "Thread %d %s", j, w->name);
+ }
+
+ dt = time_now - nm->time_last_runtime_stats_clear;
+ vlib_cli_output
+ (vm,
+ "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f"
+ "\n vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+ dt,
+ (n_internal_calls > 0
+ ? (f64) n_internal_vectors / (f64) n_internal_calls
+ : 0),
+ 1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE,
+ vectors_per_main_loop[j],
+ last_vector_length_per_node[j],
+ (f64) n_input / dt,
+ (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
+
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ c =
+ nodes[i]->stats_total.calls -
+ nodes[i]->stats_last_clear.calls;
+ d =
+ nodes[i]->stats_total.suspends -
+ nodes[i]->stats_last_clear.suspends;
+ if (c || d || !brief)
+ {
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm,
+ nodes[i], max);
+ }
+ }
+ vec_free (nodes);
+ }
+ vec_free (stat_vms);
+ vec_free (node_dups);
+ vec_free (vectors_per_main_loop);
+ vec_free (last_vector_length_per_node);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
+ .path = "show runtime",
+ .short_help = "Show packet processing runtime",
+ .function = show_node_runtime,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm;
+ vlib_node_t *n;
+ int i, j;
+ vlib_main_t **stat_vms = 0, *stat_vm;
+ vlib_node_runtime_t *r;
+
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ n->stats_last_clear = n->stats_total;
+
+ r = vlib_node_get_runtime (stat_vm, n->index);
+ r->max_clock = 0;
+ }
+ /* Note: input/output rates computed using vlib_global_main */
+ nm->time_last_runtime_stats_clear = vlib_time_now (vm);
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+
+ vec_free (stat_vms);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_node_runtime_command, static) = {
+ .path = "clear runtime",
+ .short_help = "Clear packet processing runtime statistics",
+ .function = clear_node_runtime,
+};
+/* *INDENT-ON* */
+
+/* Dummy function to get us linked in. */
+void
+vlib_node_cli_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_format.c b/src/vlib/node_format.c
new file mode 100644
index 00000000..e9dde40f
--- /dev/null
+++ b/src/vlib/node_format.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_format.c: node formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 *
+format_vlib_node_graph (u8 * s, va_list * va)
+{
+ vlib_node_main_t *nm = va_arg (*va, vlib_node_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ int i, j;
+ uword indent;
+ typedef struct
+ {
+ u32 next_node;
+ u32 next_slot;
+ u32 prev_node;
+ } tmp_t;
+ tmp_t *tmps = 0;
+ tmp_t empty = {.next_node = ~0,.prev_node = ~0 };
+
+ if (!n)
+ return format (s, "%=26s%=26s%=26s", "Name", "Next", "Previous");
+
+ s = format (s, "%-26v", n->name);
+
+ indent = format_get_indent (s);
+
+ for (i = j = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] == VLIB_INVALID_NODE_INDEX)
+ continue;
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].next_node = n->next_nodes[i];
+ tmps[j].next_slot = i;
+ j++;
+ }
+
+ j = 0;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, n->prev_node_bitmap, ({
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].prev_node = i;
+ j++;
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (tmps); i++)
+ {
+ if (i > 0)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ if (tmps[i].next_node != ~0)
+ {
+ vlib_node_t *x;
+ u8 *t = 0;
+
+ x = vec_elt (nm->nodes, tmps[i].next_node);
+ t = format (t, "%v [%d]", x->name, tmps[i].next_slot);
+ s = format (s, "%=26v", t);
+ vec_free (t);
+ }
+ else
+ s = format (s, "%26s", "");
+
+ if (tmps[i].prev_node != ~0)
+ {
+ vlib_node_t *x;
+ x = vec_elt (nm->nodes, tmps[i].prev_node);
+ s = format (s, "%=26v", x->name);
+ }
+ }
+
+ vec_free (tmps);
+
+ return s;
+}
+
+u8 *
+format_vlib_node_and_next (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t *n_next;
+ u32 *ni;
+
+ ni = vec_elt_at_index (n->next_nodes, next_index);
+ n_next = vlib_get_node (vm, ni[0]);
+ return format (s, "%v -> %v", n->name, n_next->name);
+}
+
+u8 *
+format_vlib_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+
+ return format (s, "%v", n->name);
+}
+
+u8 *
+format_vlib_next_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t *next = vlib_get_next_node (vm, node_index, next_index);
+ return format (s, "%v", next->name);
+}
+
+/* Parse node name -> node index. */
+uword
+unformat_vlib_node (unformat_input_t * input, va_list * args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+
+ return unformat_user (input, unformat_hash_vec_string,
+ vm->node_main.node_by_name, result);
+}
+
+u8 *
+format_vlib_time (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ f64 time = va_arg (*va, f64);
+ return format (s, "%12.4f", time);
+}
+
+u8 *
+format_vlib_cpu_time (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u64 cpu_time = va_arg (*va, u64);
+ f64 dt;
+
+ dt =
+ (cpu_time -
+ vm->clib_time.init_cpu_time) * vm->clib_time.seconds_per_clock;
+ return format (s, "%U", format_vlib_time, vm, dt);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
new file mode 100644
index 00000000..0734476c
--- /dev/null
+++ b/src/vlib/node_funcs.h
@@ -0,0 +1,1175 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_funcs.h: processing nodes global functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** \file
+ vlib node functions
+*/
+
+
+#ifndef included_vlib_node_funcs_h
+#define included_vlib_node_funcs_h
+
+#include <vppinfra/fifo.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+
+/** \brief Get vlib node by index.
+ @warning This function will ASSERT if @c i is out of range.
+ @param vm vlib_main_t pointer, varies by thread
+ @param i node index.
+ @return pointer to the requested vlib_node_t.
+*/
+
+always_inline vlib_node_t *
+vlib_get_node (vlib_main_t * vm, u32 i)
+{
+ return vec_elt (vm->node_main.nodes, i);
+}
+
+/** \brief Get vlib node by graph arc (next) index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of original node
+ @param next_index graph arc index
+ @return pointer to the vlib_node_t at the end of the indicated arc
+*/
+
+always_inline vlib_node_t *
+vlib_get_next_node (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+
+ n = vec_elt (nm->nodes, node_index);
+ ASSERT (next_index < vec_len (n->next_nodes));
+ return vlib_get_node (vm, n->next_nodes[next_index]);
+}
+
+/** \brief Get node runtime by node index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of node
+ @return pointer to the indicated vlib_node_runtime_t
+*/
+
+always_inline vlib_node_runtime_t *
+vlib_node_get_runtime (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vec_elt (nm->nodes, node_index);
+ vlib_process_t *p;
+ if (n->type != VLIB_NODE_TYPE_PROCESS)
+ return vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ else
+ {
+ p = vec_elt (nm->processes, n->runtime_index);
+ return &p->node_runtime;
+ }
+}
+
+/** \brief Get node runtime private data by node index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @return pointer to the indicated vlib_node_runtime_t private data
+*/
+
+always_inline void *
+vlib_node_get_runtime_data (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, node_index);
+ return r->runtime_data;
+}
+
+/** \brief Set node runtime private data.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param runtime_data arbitrary runtime private data
+ @param n_runtime_data_bytes size of runtime private data
+*/
+
+always_inline void
+vlib_node_set_runtime_data (vlib_main_t * vm, u32 node_index,
+ void *runtime_data, u32 n_runtime_data_bytes)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, node_index);
+
+ n->runtime_data_bytes = n_runtime_data_bytes;
+ vec_free (n->runtime_data);
+ vec_add (n->runtime_data, runtime_data, n_runtime_data_bytes);
+
+ ASSERT (vec_len (n->runtime_data) <= sizeof (vlib_node_runtime_t) -
+ STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data));
+
+ if (vec_len (n->runtime_data) > 0)
+ clib_memcpy (r->runtime_data, n->runtime_data, vec_len (n->runtime_data));
+}
+
+/** \brief Set node dispatch state.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param new_state new state for node, see vlib_node_state_t
+*/
+always_inline void
+vlib_node_set_state (vlib_main_t * vm, u32 node_index,
+ vlib_node_state_t new_state)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ vlib_node_runtime_t *r;
+
+ n = vec_elt (nm->nodes, node_index);
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ r = &p->node_runtime;
+
+ /* When disabling make sure flags are cleared. */
+ p->flags &= ~(VLIB_PROCESS_RESUME_PENDING
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT);
+ }
+ else
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+
+ ASSERT (new_state < VLIB_N_NODE_STATE);
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ {
+ ASSERT (nm->input_node_counts_by_state[n->state] > 0);
+ nm->input_node_counts_by_state[n->state] -= 1;
+ nm->input_node_counts_by_state[new_state] += 1;
+ }
+
+ n->state = new_state;
+ r->state = new_state;
+}
+
+/** \brief Get node dispatch state.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @return state for node, see vlib_node_state_t
+*/
+always_inline vlib_node_state_t
+vlib_node_get_state (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ n = vec_elt (nm->nodes, node_index);
+ return n->state;
+}
+
+always_inline void
+vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vec_elt (nm->nodes, node_index);
+ ASSERT (n->type == VLIB_NODE_TYPE_INPUT);
+ clib_spinlock_lock_if_init (&nm->pending_interrupt_lock);
+ vec_add1 (nm->pending_interrupt_node_runtime_indices, n->runtime_index);
+ clib_spinlock_unlock_if_init (&nm->pending_interrupt_lock);
+}
+
+always_inline vlib_process_t *
+vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ ASSERT (node->type == VLIB_NODE_TYPE_PROCESS);
+ return vec_elt (nm->processes, node->runtime_index);
+}
+
+/* Fetches frame with given handle. */
+always_inline vlib_frame_t *
+vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t *f;
+ f = vm->heap_base + (frame_index * VLIB_FRAME_ALIGN);
+ return f;
+}
+
+always_inline u32
+vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
+{
+ uword i;
+
+ ASSERT (((uword) f & (VLIB_FRAME_ALIGN - 1)) == 0);
+
+ i = ((u8 *) f - (u8 *) vm->heap_base);
+ ASSERT ((i / VLIB_FRAME_ALIGN) <= 0xFFFFFFFFULL);
+
+ return i / VLIB_FRAME_ALIGN;
+}
+
+always_inline vlib_frame_t *
+vlib_get_frame (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t *f = vlib_get_frame_no_check (vm, frame_index);
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+ return f;
+}
+
+always_inline u32
+vlib_frame_index (vlib_main_t * vm, vlib_frame_t * f)
+{
+ uword i = vlib_frame_index_no_check (vm, f);
+ ASSERT (vlib_get_frame (vm, i) == f);
+ return i;
+}
+
+/* Byte alignment for vector arguments. */
+#define VLIB_FRAME_VECTOR_ALIGN (1 << 4)
+
+always_inline u32
+vlib_frame_vector_byte_offset (u32 scalar_size)
+{
+ return round_pow2 (sizeof (vlib_frame_t) + scalar_size,
+ VLIB_FRAME_VECTOR_ALIGN);
+}
+
+/** \brief Get pointer to frame vector data.
+ @param f vlib_frame_t pointer
+ @return pointer to first vector element in frame
+*/
+always_inline void *
+vlib_frame_vector_args (vlib_frame_t * f)
+{
+ return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size);
+}
+
+/** \brief Get pointer to frame scalar data.
+
+ @warning This is almost certainly not the function you wish to call.
+ See @ref vlib_frame_vector_args instead.
+
+ @param f vlib_frame_t pointer
+
+ @return arbitrary node scalar data
+
+ @sa vlib_frame_vector_args
+*/
+always_inline void *
+vlib_frame_args (vlib_frame_t * f)
+{
+ return vlib_frame_vector_args (f) - f->scalar_size;
+}
+
+always_inline vlib_next_frame_t *
+vlib_node_runtime_get_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * n, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+
+ ASSERT (next_index < n->n_next_nodes);
+ nf = vec_elt_at_index (nm->next_frames, n->next_frame_index + next_index);
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *node, *next;
+ node = vec_elt (nm->nodes, n->node_index);
+ next = vec_elt (nm->nodes, node->next_nodes[next_index]);
+ ASSERT (nf->node_runtime_index == next->runtime_index);
+ }
+
+ return nf;
+}
+
+/** \brief Get pointer to frame by (@c node_index, @c next_index).
+
+ @warning This is not a function that you should call directly.
+ See @ref vlib_get_next_frame instead.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param next_index graph arc index
+
+ @return pointer to the requested vlib_next_frame_t
+
+ @sa vlib_get_next_frame
+*/
+
+always_inline vlib_next_frame_t *
+vlib_node_get_next_frame (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ vlib_node_runtime_t *r;
+
+ n = vec_elt (nm->nodes, node_index);
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ return vlib_node_runtime_get_next_frame (vm, r, next_index);
+}
+
+vlib_frame_t *vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index,
+ u32 alloc_new_frame);
+
+#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \
+do { \
+ vlib_frame_t * _f \
+ = vlib_get_next_frame_internal ((vm), (node), (next_index), \
+ (alloc_new_frame)); \
+ u32 _n = _f->n_vectors; \
+ (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
+ (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
+} while (0)
+
+
+/** \brief Get pointer to next frame vector data by
+ (@c vlib_node_runtime_t, @c next_index).
+ Standard single/dual loop boilerplate element.
+ @attention This is a MACRO, with SIDE EFFECTS.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index requested graph arc index
+
+ @return @c vectors -- pointer to next available vector slot
+ @return @c n_vectors_left -- number of vector slots available
+*/
+#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 0)
+
+#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 1)
+
+/** \brief Release pointer to next frame vector data.
+ Standard single/dual loop boilerplate element.
+ @param vm vlib_main_t pointer, varies by thread
+ @param r current node vlib_node_runtime_t pointer
+ @param next_index graph arc index
+ @param n_packets_left number of slots still available in vector
+*/
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, u32 n_packets_left);
+
+/* Combination get plus put. Returns vector argument just added. */
+#define vlib_set_next_frame(vm,node,next_index,v) \
+({ \
+ uword _n_left; \
+ vlib_get_next_frame ((vm), (node), (next_index), (v), _n_left); \
+ ASSERT (_n_left > 0); \
+ vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \
+ (v); \
+})
+
+always_inline void
+vlib_set_next_frame_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index, u32 buffer_index)
+{
+ u32 *p;
+ p = vlib_set_next_frame (vm, node, next_index, p);
+ p[0] = buffer_index;
+}
+
+vlib_frame_t *vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index);
+void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index,
+ vlib_frame_t * f);
+
+always_inline uword
+vlib_in_process_context (vlib_main_t * vm)
+{
+ return vm->node_main.current_process_index != ~0;
+}
+
+always_inline vlib_process_t *
+vlib_get_current_process (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ if (vlib_in_process_context (vm))
+ return vec_elt (nm->processes, nm->current_process_index);
+ return 0;
+}
+
+always_inline uword
+vlib_current_process (vlib_main_t * vm)
+{
+ return vlib_get_current_process (vm)->node_runtime.node_index;
+}
+
+/** Returns TRUE if a process suspend time is less than 10us
+ @param dt - remaining poll time in seconds
+ @returns 1 if dt < 10e-6, 0 otherwise
+*/
+always_inline uword
+vlib_process_suspend_time_is_zero (f64 dt)
+{
+ return dt < 10e-6;
+}
+
+/** Suspend a vlib cooperative multi-tasking thread for a period of time
+ @param vm - vlib_main_t *
+ @param dt - suspend interval in seconds
+ @returns VLIB_PROCESS_RESUME_LONGJMP_RESUME, routinely ignored
+*/
+
+always_inline uword
+vlib_process_suspend (vlib_main_t * vm, f64 dt)
+{
+ uword r;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p = vec_elt (nm->processes, nm->current_process_index);
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return VLIB_PROCESS_RESUME_LONGJMP_RESUME;
+
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ /* expiration time in 10us ticks */
+ p->resume_clock_interval = dt * 1e5;
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_free_event_type (vlib_process_t * p, uword t,
+ uword is_one_time_event)
+{
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+ pool_put_index (p->event_type_pool, t);
+ if (is_one_time_event)
+ p->one_time_event_type_bitmap =
+ clib_bitmap_andnoti (p->one_time_event_type_bitmap, t);
+}
+
+always_inline void
+vlib_process_maybe_free_event_type (vlib_process_t * p, uword t)
+{
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+ if (clib_bitmap_get (p->one_time_event_type_bitmap, t))
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_get_event_data (vlib_main_t * vm,
+ uword * return_event_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ vlib_process_event_type_t *et;
+ uword t;
+ void *event_data_vector;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return 0;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ ASSERT (_vec_len (p->pending_event_data_by_type_index[t]) > 0);
+ event_data_vector = p->pending_event_data_by_type_index[t];
+ p->pending_event_data_by_type_index[t] = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value and possibly index. */
+ *return_event_type_opaque = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return event_data_vector;
+}
+
+/* Return event data vector for later reuse. We reuse event data to avoid
+ repeatedly allocating event vectors in cases where we care about speed. */
+always_inline void
+vlib_process_put_event_data (vlib_main_t * vm, void *event_data)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vec_add1 (nm->recycled_event_data_vectors, event_data);
+}
+
+/** Return the first event type which has occurred and a vector of per-event
+ data of that type, or a timeout indication
+
+ @param vm - vlib_main_t pointer
+ @param data_vector - pointer to a (uword *) vector to receive event data
+ @returns either an event type and a vector of per-event instance data,
+ or ~0 to indicate a timeout.
+*/
+
+always_inline uword
+vlib_process_get_events (vlib_main_t * vm, uword ** data_vector)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ vlib_process_event_type_t *et;
+ uword r, t, l;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return t;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value. */
+ r = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return r;
+}
+
+always_inline uword
+vlib_process_get_events_helper (vlib_process_t * p, uword t,
+ uword ** data_vector)
+{
+ uword l;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return l;
+}
+
+/* As above but query as specified type of event. Returns number of
+ events found. */
+always_inline uword
+vlib_process_get_events_with_type (vlib_main_t * vm, uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword t, *h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ if (!h)
+ /* This can happen when an event has not yet been
+ signaled with given opaque type. */
+ return 0;
+
+ t = h[0];
+ if (!clib_bitmap_get (p->non_empty_event_type_bitmap, t))
+ return 0;
+
+ return vlib_process_get_events_helper (p, t, data_vector);
+}
+
+always_inline uword *
+vlib_process_wait_for_event (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ if (clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return p->non_empty_event_type_bitmap;
+}
+
+always_inline uword
+vlib_process_wait_for_one_time_event (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ ASSERT (!pool_is_free_index (p->event_type_pool, with_type_index));
+ while (!clib_bitmap_get (p->non_empty_event_type_bitmap, with_type_index))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return vlib_process_get_events_helper (p, with_type_index, data_vector);
+}
+
+always_inline uword
+vlib_process_wait_for_event_with_type (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r, *h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ while (!h || !clib_bitmap_get (p->non_empty_event_type_bitmap, h[0]))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+
+ /* See if unknown event type has been signaled now. */
+ if (!h)
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ }
+
+ return vlib_process_get_events_helper (p, h[0], data_vector);
+}
+
+/** Suspend a cooperative multi-tasking thread
+ Waits for an event, or for the indicated number of seconds to elapse
+ @param vm - vlib_main_t pointer
+ @param dt - timeout, in seconds.
+ @returns the remaining time interval
+*/
+
+always_inline f64
+vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ f64 wakeup_time;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ if (vlib_process_suspend_time_is_zero (dt)
+ || !clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ return dt;
+
+ wakeup_time = vlib_time_now (vm) + dt;
+
+ /* Suspend waiting for both clock and event to occur. */
+ p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK);
+
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ p->resume_clock_interval = dt * 1e5;
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ /* Return amount of time still left to sleep.
+ If <= 0 then we've been waken up by the clock (and not an event). */
+ return wakeup_time - vlib_time_now (vm);
+}
+
+always_inline vlib_process_event_type_t *
+vlib_process_new_event_type (vlib_process_t * p, uword with_type_opaque)
+{
+ vlib_process_event_type_t *et;
+ pool_get (p->event_type_pool, et);
+ et->opaque = with_type_opaque;
+ return et;
+}
+
+always_inline uword
+vlib_process_create_one_time_event (vlib_main_t * vm, uword node_index,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ vlib_process_event_type_t *et;
+ uword t;
+
+ et = vlib_process_new_event_type (p, with_type_opaque);
+ t = et - p->event_type_pool;
+ p->one_time_event_type_bitmap =
+ clib_bitmap_ori (p->one_time_event_type_bitmap, t);
+ return t;
+}
+
+always_inline void
+vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index,
+ uword t)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+
+ ASSERT (clib_bitmap_get (p->one_time_event_type_bitmap, t));
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_signal_event_helper (vlib_node_main_t * nm,
+ vlib_node_t * n,
+ vlib_process_t * p,
+ uword t,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ uword p_flags, add_to_pending, delete_from_wheel;
+ void *data_to_be_written_by_caller;
+
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+
+ vec_validate (p->pending_event_data_by_type_index, t);
+
+ /* Resize data vector and return caller's data to be written. */
+ {
+ void *data_vec = p->pending_event_data_by_type_index[t];
+ uword l;
+
+ if (!data_vec && vec_len (nm->recycled_event_data_vectors))
+ {
+ data_vec = vec_pop (nm->recycled_event_data_vectors);
+ _vec_len (data_vec) = 0;
+ }
+
+ l = vec_len (data_vec);
+
+ data_vec = _vec_resize (data_vec,
+ /* length_increment */ n_data_elts,
+ /* total size after increment */
+ (l + n_data_elts) * n_data_elt_bytes,
+ /* header_bytes */ 0, /* data_align */ 0);
+
+ p->pending_event_data_by_type_index[t] = data_vec;
+ data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes;
+ }
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_ori (p->non_empty_event_type_bitmap, t);
+
+ p_flags = p->flags;
+
+ /* Event was already signalled? */
+ add_to_pending = (p_flags & VLIB_PROCESS_RESUME_PENDING) == 0;
+
+ /* Process will resume when suspend time elapses? */
+ delete_from_wheel = 0;
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ {
+ /* Waiting for both event and clock? */
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)
+ delete_from_wheel = 1;
+ else
+ /* Waiting only for clock. Event will be queue and may be
+ handled when timer expires. */
+ add_to_pending = 0;
+ }
+
+ /* Never add current process to pending vector since current process is
+ already running. */
+ add_to_pending &= nm->current_process_index != n->runtime_index;
+
+ if (add_to_pending)
+ {
+ u32 x = vlib_timing_wheel_data_set_suspended_process (n->runtime_index);
+ p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING;
+ vec_add1 (nm->data_from_advancing_timing_wheel, x);
+ if (delete_from_wheel)
+ TW (tw_timer_stop) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ p->stop_timer_handle);
+ }
+
+ return data_to_be_written_by_caller;
+}
+
+always_inline void *
+vlib_process_signal_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ uword *h, t;
+
+ /* Must be in main thread */
+ ASSERT (vlib_get_thread_index () == 0);
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (!h)
+ {
+ vlib_process_event_type_t *et =
+ vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ n_data_elt_bytes);
+}
+
+always_inline void *
+vlib_process_signal_event_at_time (vlib_main_t * vm,
+ f64 dt,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ uword *h, t;
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (!h)
+ {
+ vlib_process_event_type_t *et =
+ vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ n_data_elt_bytes);
+ else
+ {
+ vlib_signal_timed_event_data_t *te;
+
+ pool_get_aligned (nm->signal_timed_event_data_pool, te, sizeof (te[0]));
+
+ te->n_data_elts = n_data_elts;
+ te->n_data_elt_bytes = n_data_elt_bytes;
+ te->n_data_bytes = n_data_elts * n_data_elt_bytes;
+
+ /* Assert that structure fields are big enough. */
+ ASSERT (te->n_data_elts == n_data_elts);
+ ASSERT (te->n_data_elt_bytes == n_data_elt_bytes);
+ ASSERT (te->n_data_bytes == n_data_elts * n_data_elt_bytes);
+
+ te->process_node_index = n->runtime_index;
+ te->event_type_index = t;
+
+ p->stop_timer_handle =
+ TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ vlib_timing_wheel_data_set_timed_event
+ (te - nm->signal_timed_event_data_pool),
+ 0 /* timer_id */ ,
+ (vlib_time_now (vm) + dt) * 1e5);
+
+ /* Inline data big enough to hold event? */
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ return te->inline_event_data;
+ else
+ {
+ te->event_data_as_vector = 0;
+ vec_resize (te->event_data_as_vector, te->n_data_bytes);
+ return te->event_data_as_vector;
+ }
+ }
+}
+
+always_inline void *
+vlib_process_signal_one_time_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_index,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts,
+ n_data_elt_bytes);
+}
+
+always_inline void
+vlib_process_signal_event (vlib_main_t * vm,
+ uword node_index, uword type_opaque, uword data)
+{
+ uword *d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */ , sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_process_signal_event_pointer (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque, void *data)
+{
+ void **d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */ , sizeof (data));
+ d[0] = data;
+}
+
+/**
+ * Signal event to process from any thread.
+ *
+ * When in doubt, use this.
+ */
+always_inline void
+vlib_process_signal_event_mt (vlib_main_t * vm,
+ uword node_index, uword type_opaque, uword data)
+{
+ if (vlib_get_thread_index () != 0)
+ {
+ vlib_process_signal_event_mt_args_t args = {
+ .node_index = node_index,
+ .type_opaque = type_opaque,
+ .data = data,
+ };
+ vlib_rpc_call_main_thread (vlib_process_signal_event_mt_helper,
+ (u8 *) & args, sizeof (args));
+ }
+ else
+ vlib_process_signal_event (vm, node_index, type_opaque, data);
+}
+
+always_inline void
+vlib_process_signal_one_time_event (vlib_main_t * vm,
+ uword node_index,
+ uword type_index, uword data)
+{
+ uword *d =
+ vlib_process_signal_one_time_event_data (vm, node_index, type_index,
+ 1 /* elts */ , sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t * p)
+{
+ vlib_process_signal_one_time_event (vm, p->node_index, p->one_time_event,
+ /* data */ ~0);
+ memset (p, ~0, sizeof (p[0]));
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ ** wps)
+{
+ vlib_one_time_waiting_process_t *wp;
+ vec_foreach (wp, *wps) vlib_signal_one_time_waiting_process (vm, wp);
+ vec_free (*wps);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ * p)
+{
+ p->node_index = vlib_current_process (vm);
+ p->one_time_event = vlib_process_create_one_time_event (vm, p->node_index, /* type opaque */
+ ~0);
+ vlib_process_wait_for_one_time_event (vm,
+ /* don't care about data */ 0,
+ p->one_time_event);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ ** wps)
+{
+ vlib_one_time_waiting_process_t *wp;
+ vec_add2 (*wps, wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+}
+
+always_inline u32
+vlib_node_runtime_update_main_loop_vector_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_vectors)
+{
+ u32 i, d, vi0, vi1;
+ u32 i0, i1;
+
+ ASSERT (is_pow2 (ARRAY_LEN (node->main_loop_vector_stats)));
+ i = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ & (ARRAY_LEN (node->main_loop_vector_stats) - 1));
+ i0 = i ^ 0;
+ i1 = i ^ 1;
+ d = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ -
+ (node->main_loop_count_last_dispatch >>
+ VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE));
+ vi0 = node->main_loop_vector_stats[i0];
+ vi1 = node->main_loop_vector_stats[i1];
+ vi0 = d == 0 ? vi0 : 0;
+ vi1 = d <= 1 ? vi1 : 0;
+ vi0 += n_vectors;
+ node->main_loop_vector_stats[i0] = vi0;
+ node->main_loop_vector_stats[i1] = vi1;
+ node->main_loop_count_last_dispatch = vm->main_loop_count;
+ /* Return previous counter. */
+ return node->main_loop_vector_stats[i1];
+}
+
+always_inline f64
+vlib_node_vectors_per_main_loop_as_float (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */
+ 0);
+ return (f64) v / (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+always_inline u32
+vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */
+ 0);
+ return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+void
+vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f);
+
+/* Return the edge index if present, ~0 otherwise */
+uword vlib_node_get_next (vlib_main_t * vm, uword node, uword next_node);
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node, uword next_node, uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_next (vlib_main_t * vm, uword node, uword next_node)
+{
+ return vlib_node_add_next_with_slot (vm, node, next_node, ~0);
+}
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node, char *next_name, uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_named_next (vlib_main_t * vm, uword node, char *name)
+{
+ return vlib_node_add_named_next_with_slot (vm, node, name, ~0);
+}
+
+/* Query node given name. */
+vlib_node_t *vlib_get_node_by_name (vlib_main_t * vm, u8 * name);
+
+/* Rename a node. */
+void vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...);
+
+/* Register new packet processing node. Nodes can be registered
+ dynamically via this call or statically via the VLIB_REGISTER_NODE
+ macro. */
+u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r);
+
+/* Register all static nodes registered via VLIB_REGISTER_NODE. */
+void vlib_register_all_static_nodes (vlib_main_t * vm);
+
+/* Start a process. */
+void vlib_start_process (vlib_main_t * vm, uword process_index);
+
+/* Sync up runtime and main node stats. */
+void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n);
+
+/* Node graph initialization function. */
+clib_error_t *vlib_node_main_init (vlib_main_t * vm);
+
+format_function_t format_vlib_node_graph;
+format_function_t format_vlib_node_name;
+format_function_t format_vlib_next_node_name;
+format_function_t format_vlib_node_and_next;
+format_function_t format_vlib_cpu_time;
+format_function_t format_vlib_time;
+/* Parse node name -> node index. */
+unformat_function_t unformat_vlib_node;
+
+always_inline void
+vlib_node_increment_counter (vlib_main_t * vm, u32 node_index,
+ u32 counter_index, u64 increment)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+ u32 node_counter_base_index = n->error_heap_index;
+ em->counters[node_counter_base_index + counter_index] += increment;
+}
+
+#endif /* included_vlib_node_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse.c b/src/vlib/parse.c
new file mode 100644
index 00000000..1c4500ce
--- /dev/null
+++ b/src/vlib/parse.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+#define PARSE_DEBUG 0
+
+u16 word_type_index, number_type_index, eof_type_index, rule_eof_type_index,
+ plus_type_index, minus_type_index, star_type_index, slash_type_index,
+ lpar_type_index, rpar_type_index;
+
+u8 *
+format_vlib_parse_value (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_type_t *type;
+ vlib_parse_value_t *v;
+ u16 type_index;
+
+ s = format (s, "%d items:\n", vec_len (pm->parse_value));
+ vec_foreach (v, pm->parse_value)
+ {
+ type_index = v->type;
+ type = pool_elt_at_index (pm->parse_types, type_index);
+ if (type->format_value)
+ s = format (s, "[%d]: %U\n", v - pm->parse_value,
+ type->format_value, v);
+ else
+ s = format (s, "[%d]: (nofun)\n", v - pm->parse_value);
+ }
+ return s;
+}
+
+static u8 *
+format_vlib_parse_match (u8 * s, va_list * args)
+{
+ vlib_parse_match_t m = va_arg (*args, vlib_parse_match_t);
+ char *t = 0;
+ switch (m)
+ {
+#define _(a) case VLIB_PARSE_##a: t = #a; break;
+ foreach_parse_match_type
+#undef _
+ default:
+ t = 0;
+ break;
+ }
+
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", m);
+}
+
+static u8 *
+format_vlib_parse_item (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_item_t *item = va_arg (*args, vlib_parse_item_t *);
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == word_type_index)
+ s = format (s, "%s", item->value.as_pointer);
+ else
+ s = format (s, "<%s>", type->name);
+ return s;
+}
+
+static u8 *
+format_vlib_parse_graph (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_graph_t *node = va_arg (*args, vlib_parse_graph_t *);
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ /* $$$ hash table */
+ /* *INDENT-OFF* */
+ pool_foreach (type, pm->parse_types,
+ ({
+ if (type->rule_index == node - pm->parse_graph)
+ s = format (s, "\n<%s>\n", type->name);
+ }));
+/* *INDENT-ON* */
+
+ if (pm->root_index == (node - pm->parse_graph))
+ s = format (s, "\n<root>\n");
+
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ s = format (s, "[%d] %U ", node - pm->parse_graph,
+ format_vlib_parse_item, pm, item);
+
+ if (node->peer == (u32) ~ 0)
+ s = format (s, "peer nil ");
+ else
+ s = format (s, "peer %4u ", node->peer);
+
+ if (node->deeper == (u32) ~ 0)
+ s = format (s, "deeper nil ");
+ else
+ s = format (s, "deeper %4u ", node->deeper);
+
+ return s;
+}
+
+void
+dump_parse_graph (void)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_parse_graph_t *node;
+
+ /* *INDENT-OFF* */
+ pool_foreach (node, pm->parse_graph, ({
+ fformat(stdout, "%U\n", format_vlib_parse_graph, pm, node);
+ }));
+/* *INDENT-ON* */
+}
+
+always_inline void
+parse_cleanup_value (vlib_parse_main_t * pm, vlib_parse_value_t * pv)
+{
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, pv->type);
+ if (type->value_cleanup_function)
+ type->value_cleanup_function (pv);
+}
+
+static void
+parse_reset (vlib_parse_main_t * pm, u8 * input)
+{
+ vlib_lex_token_t *t;
+ vlib_parse_value_t *pv;
+
+ vlib_lex_reset (pm->lex_main, input);
+
+ vec_foreach (t, pm->tokens) vlib_lex_cleanup_token (t);
+
+ vec_foreach (pv, pm->parse_value) parse_cleanup_value (pm, pv);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ pm->current_token_index = 0;
+}
+
+static void
+parse_help (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_main_t *vm = pm->vlib_main;
+ u8 *help_input;
+ int i;
+
+ help_input = vec_dup (pm->lex_main->input_vector);
+
+ for (i = vec_len (help_input) - 1; i >= 0; i--)
+ if (help_input[i] == '?')
+ {
+ help_input[i] = 0;
+ _vec_len (help_input) = i;
+ break;
+ }
+
+ for (i = vec_len (help_input) - 1; i >= 0; i--)
+ {
+ if (help_input[i] != ' ' && help_input[i] != '\t')
+ break;
+ help_input[i] = 0;
+ break;
+ }
+ _vec_len (help_input) = i + 1;
+
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == eof_type_index && vec_len (pm->match_items) == 0)
+ /* do nothing */ ;
+ else if (item->type == word_type_index)
+ vlib_cli_output (vm, "%s %s\n", help_input, item->value.as_pointer);
+ else
+ vlib_cli_output (vm, "%s <%s>\n", help_input, type->name);
+ index = node->peer;
+ }
+ vec_free (help_input);
+}
+
+static vlib_parse_match_t
+parse_eval_internal (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_parse_value_t value, *pv;
+ vlib_parse_match_t rv;
+ u32 *partial_matches = 0;
+ vlib_lex_token_t *t;
+ u32 save_token_index = (u32) ~ 0, save_match_items = 0;
+ int had_value = 0;
+
+ if (pm->current_token_index >= vec_len (pm->tokens))
+ return VLIB_PARSE_MATCH_FAIL;
+
+ /* current token */
+ t = vec_elt_at_index (pm->tokens, pm->current_token_index);
+
+ /* Help ? */
+ if (PREDICT_FALSE (t->token == VLIB_LEX_qmark))
+ {
+ parse_help (pm, index);
+ _vec_len (pm->match_items) = 0;
+ return VLIB_PARSE_MATCH_DONE;
+ }
+
+ /* Across all peers at this level of the parse graph */
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ /*
+ * Save the token index. We may have to back up several
+ * trie plies. Type-specific match functions can consume
+ * multiple tokens, and they may not be optimally careful
+ */
+ save_token_index = pm->current_token_index;
+ save_match_items = vec_len (pm->match_items);
+ vec_add1 (pm->match_items, node->item);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("Try to match token %U against node %d",
+ format_vlib_lex_token, pm->lex_main, t, index);
+
+ /* Call the type-specific match function */
+ rv = type->match_function (pm, type, t, &value);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("returned %U", format_vlib_parse_match, rv);
+
+ switch (rv)
+ {
+ case VLIB_PARSE_MATCH_VALUE:
+ /*
+ * Matched, and returned a value to append to the
+ * set of args passed to the action function
+ */
+ value.type = item->type;
+ vec_add1 (pm->parse_value, value);
+ had_value = 1;
+ /* fallthrough */
+
+ case VLIB_PARSE_MATCH_FULL:
+ unambiguous_partial_match:
+ /* Consume the matched token */
+ pm->current_token_index++;
+
+ /* continue matching along this path */
+ rv = parse_eval_internal (pm, node->deeper);
+
+ /* this is not the right path */
+ if (rv == VLIB_PARSE_MATCH_FAIL)
+ {
+ if (had_value)
+ {
+ /* Delete the value */
+ value = pm->parse_value[vec_len (pm->parse_value) - 1];
+ parse_cleanup_value (pm, &value);
+ _vec_len (pm->parse_value) -= 1;
+ }
+ /* Continue with the next sibling */
+ pm->current_token_index = save_token_index;
+ _vec_len (pm->match_items) = save_match_items;
+ index = node->peer;
+ break;
+ }
+ return rv;
+
+ case VLIB_PARSE_MATCH_PARTIAL:
+ /* Partial (substring) match, remember it but keep going */
+ vec_add1 (partial_matches, node - pm->parse_graph);
+ index = node->peer;
+ break;
+
+ case VLIB_PARSE_MATCH_FAIL:
+ /* Continue with the next sibling */
+ index = node->peer;
+ _vec_len (pm->match_items) = save_match_items;
+ break;
+
+ case VLIB_PARSE_MATCH_DONE:
+ /* Parse complete, invoke the action function */
+ if (PARSE_DEBUG > 0)
+ clib_warning ("parse_value: %U", format_vlib_parse_value, pm);
+
+ {
+ vlib_parse_eval_function_t *f = item->value.as_pointer;
+ if (f)
+ rv = f (pm, item, pm->parse_value);
+ }
+
+ vec_foreach (pv, pm->parse_value) parse_cleanup_value (pm, pv);
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->match_items) = 0;
+ return rv;
+
+ case VLIB_PARSE_MATCH_AMBIGUOUS:
+ case VLIB_PARSE_MATCH_EVAL_FAIL:
+ case VLIB_PARSE_MATCH_RULE:
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+ }
+ }
+
+ /*
+ * Out of siblings. If we have exactly one partial match
+ * we win
+ */
+ if (vec_len (partial_matches) == 1)
+ {
+ index = partial_matches[0];
+ node = pool_elt_at_index (pm->parse_graph, index);
+ vec_free (partial_matches);
+ goto unambiguous_partial_match;
+ }
+
+ /* Ordinary loser */
+ rv = VLIB_PARSE_MATCH_FAIL;
+
+ /* Ambiguous loser */
+ if (vec_len (partial_matches) > 1)
+ {
+ vec_free (partial_matches);
+ rv = VLIB_PARSE_MATCH_AMBIGUOUS;
+ }
+
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+}
+
+vlib_parse_match_t
+rule_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ vlib_parse_match_t rv;
+ static int recursion_level;
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: try to match type %s graph index %d",
+ recursion_level, type->name, type->rule_index);
+ recursion_level++;
+ rv = parse_eval_internal (pm, type->rule_index);
+ recursion_level--;
+
+ /* Break the recusive unwind here... */
+ if (rv == VLIB_PARSE_MATCH_RULE)
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s matched", recursion_level, type->name);
+
+ return VLIB_PARSE_MATCH_FULL;
+ }
+ else
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s returns %U", recursion_level, type->name,
+ format_vlib_parse_match, rv);
+ }
+ return rv;
+}
+
+static int
+parse_eval (vlib_parse_main_t * pm, u8 * input)
+{
+ vlib_lex_token_t *t;
+
+ parse_reset (pm, input);
+
+ /* Tokenize the entire input vector */
+ do
+ {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ }
+ while (t->token != VLIB_LEX_eof);
+
+ /* Feed it to the parser */
+ return parse_eval_internal (pm, pm->root_index);
+}
+
+/* Temporary vlib stub */
+vlib_parse_match_t
+vlib_parse_eval (u8 * input)
+{
+ return parse_eval (&vlib_parse_main, input);
+}
+
+u16
+parse_type_find_or_create (vlib_parse_main_t * pm, vlib_parse_type_t * t)
+{
+ uword *p;
+ vlib_parse_type_t *n;
+ u8 *name_copy;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, t->name);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_types, n);
+ *n = *t;
+ n->rule_index = (u32) ~ 0;
+
+ name_copy = format (0, "%s%c", n->name, 0);
+
+ hash_set_mem (pm->parse_type_by_name_hash, name_copy, n - pm->parse_types);
+ return n - pm->parse_types;
+}
+
+u16
+parse_type_find_by_name (vlib_parse_main_t * pm, char *name)
+{
+ uword *p;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, name);
+ if (p)
+ return p[0];
+
+ return (u16) ~ 0;
+}
+
+u32
+parse_item_find_or_create (vlib_parse_main_t * pm, vlib_parse_item_t * item)
+{
+ uword *p;
+ vlib_parse_item_t *i;
+
+ /* Exact match the entire item */
+ p = mhash_get (&pm->parse_item_hash, item);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_items, i);
+ *i = *item;
+
+ mhash_set (&pm->parse_item_hash, i, i - pm->parse_items, 0);
+ return i - pm->parse_items;
+}
+
+static void
+parse_type_and_graph_init (vlib_parse_main_t * pm)
+{
+ u32 eof_index;
+ vlib_parse_type_t type;
+ vlib_parse_item_t item;
+
+ memset (&type, 0, sizeof (type));
+
+#define foreach_token_type \
+ _ (eof) \
+ _ (rule_eof) \
+ _ (word) \
+ _ (number) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (slash) \
+ _ (lpar) \
+ _ (rpar)
+
+#define _(a) a##_type_index = parse_type_find_by_name (pm, #a);
+ foreach_token_type
+#undef _
+ memset (&item, 0, sizeof (item));
+ item.type = eof_type_index;
+
+ eof_index = parse_item_find_or_create (pm, &item);
+ pm->root_index = (u32) ~ 0;
+
+#if 0
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = eof_index;
+ pm->root_index = 0;
+#endif
+}
+
+
+
+static void
+tokenize (vlib_parse_main_t * pm, parse_registration_t * pr)
+{
+ vlib_lex_token_t *t;
+ pm->register_input = format (pm->register_input,
+ "%s%c", pr->initializer, 0);
+
+ parse_reset (pm, pm->register_input);
+
+ do
+ {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ }
+ while (t->token != VLIB_LEX_eof);
+ _vec_len (pm->register_input) = 0;
+}
+
+static int
+is_typed_rule (vlib_parse_main_t * pm)
+{
+ vlib_lex_token_t *t = vec_elt_at_index (pm->tokens, 0);
+
+ /* <mytype> = blah blah blah */
+ if (vec_len (pm->tokens) >= 4
+ && t[0].token == VLIB_LEX_lt
+ && t[1].token == VLIB_LEX_word
+ && t[2].token == VLIB_LEX_gt && t[3].token == VLIB_LEX_equals)
+ return 1;
+ return 0;
+}
+
+static int
+token_matches_graph_node (vlib_parse_main_t * pm,
+ vlib_lex_token_t * t,
+ vlib_parse_graph_t * node,
+ vlib_parse_item_t * item,
+ vlib_parse_type_t * type, u32 * token_increment)
+{
+ /* EOFs don't match */
+ if (t->token == VLIB_LEX_eof)
+ return 0;
+
+ /* New chain element is a word */
+ if (t->token == VLIB_LEX_word)
+ {
+ /* but the item in hand is not a word */
+ if (item->type != word_type_index)
+ return 0;
+
+ /* Or it's not this particular word */
+ if (strcmp (t->value.as_pointer, item->value.as_pointer))
+ return 0;
+ *token_increment = 1;
+ return 1;
+ }
+ /* New chain element is a type-name: < TYPE-NAME > */
+ if (t->token == VLIB_LEX_lt)
+ {
+ u16 token_type_index;
+
+ /* < TYPE > */
+ if (t[1].token != VLIB_LEX_word || t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning (0, "broken type name in '%s'", pm->register_input);
+ return 0;
+ }
+
+ token_type_index = parse_type_find_by_name (pm, t[1].value.as_pointer);
+ if (token_type_index == (u16) ~ 0)
+ {
+ clib_warning (0, "unknown type '%s'", t[1].value.as_pointer);
+ return 0;
+ }
+
+ /* Its a known type but does not match. */
+ if (item->type != token_type_index)
+ return 0;
+
+ *token_increment = 3;
+ return 1;
+ }
+ clib_warning ("BUG: t->token = %d", t->token);
+ return 0;
+}
+
+u32
+generate_subgraph_from_tokens (vlib_parse_main_t * pm,
+ vlib_lex_token_t * t,
+ u32 * new_subgraph_depth,
+ parse_registration_t * pr, int not_a_rule)
+{
+ vlib_parse_graph_t *g, *last_g;
+ vlib_parse_item_t new_item;
+ u32 rv = (u32) ~ 0, new_item_index, last_index = (u32) ~ 0;
+ u16 token_type_index;
+ u32 depth = 0;
+
+ while (t < pm->tokens + vec_len (pm->tokens))
+ {
+ memset (&new_item, 0, sizeof (new_item));
+
+ if (t->token == VLIB_LEX_word)
+ {
+ new_item.type = word_type_index;
+ new_item.value.as_pointer = vec_dup ((u8 *) t->value.as_pointer);
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else if (t->token == VLIB_LEX_lt)
+ {
+ if (t[1].token != VLIB_LEX_word || t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning ("broken type name in '%s'", pm->register_input);
+ goto screwed;
+ }
+ token_type_index = parse_type_find_by_name (pm,
+ t[1].value.as_pointer);
+ if (token_type_index == (u16) ~ 0)
+ {
+ clib_warning ("unknown type 2 '%s'", t[1].value.as_pointer);
+ goto screwed;
+ }
+
+ new_item.type = token_type_index;
+ new_item.value.as_pointer = 0;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t += 3; /* skip < <type-name> and > */
+ }
+ else if (t->token == VLIB_LEX_eof)
+ {
+ screwed:
+ new_item.type = not_a_rule ? eof_type_index : rule_eof_type_index;
+ new_item.value.as_pointer = pr->eof_match;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else
+ {
+ clib_warning ("unexpected token %U index %d in '%s'",
+ format_vlib_lex_token, pm->lex_main, t,
+ t - pm->tokens, pm->register_input);
+ goto screwed;
+ }
+
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = new_item_index;
+ depth++;
+
+ if (rv == (u32) ~ 0)
+ {
+ rv = g - pm->parse_graph;
+ last_index = rv;
+ }
+ else
+ {
+ last_g = pool_elt_at_index (pm->parse_graph, last_index);
+ last_index = last_g->deeper = g - pm->parse_graph;
+ }
+ }
+ *new_subgraph_depth = depth;
+ return rv;
+}
+
+static u32
+measure_depth (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ u32 max = 0;
+ u32 depth;
+
+ if (index == (u32) ~ 0)
+ return 0;
+
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ if (item->type == eof_type_index)
+ return 1;
+
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ depth = measure_depth (pm, node->deeper);
+ if (max < depth)
+ max = depth;
+ index = node->peer;
+ }
+
+ return max + 1;
+}
+
+static void
+add_subgraph_to_graph (vlib_parse_main_t * pm,
+ u32 last_matching_index,
+ u32 graph_root_index,
+ u32 new_subgraph_index, u32 new_subgraph_depth)
+{
+ vlib_parse_graph_t *parent_node;
+ int new_subgraph_longest = 1;
+ u32 current_peer_index;
+ u32 current_depth;
+ vlib_parse_graph_t *current_peer = 0;
+ vlib_parse_graph_t *new_subgraph_node =
+ pool_elt_at_index (pm->parse_graph, new_subgraph_index);
+
+ /*
+ * Case 1: top-level peer. Splice into the top-level
+ * peer chain according to rule depth
+ */
+ if (last_matching_index == (u32) ~ 0)
+ {
+ u32 index = graph_root_index;
+ while (1)
+ {
+ current_peer = pool_elt_at_index (pm->parse_graph, index);
+ current_depth = measure_depth (pm, index);
+ if (current_depth < new_subgraph_depth
+ || current_peer->peer == (u32) ~ 0)
+ break;
+ index = current_peer->peer;
+ }
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ return;
+ }
+
+ parent_node = pool_elt_at_index (pm->parse_graph, last_matching_index);
+ current_peer_index = parent_node->deeper;
+
+ while (current_peer_index != (u32) ~ 0)
+ {
+ current_peer = pool_elt_at_index (pm->parse_graph, current_peer_index);
+ current_depth = measure_depth (pm, current_peer_index);
+ if (current_depth < new_subgraph_depth)
+ break;
+ new_subgraph_longest = 0;
+ current_peer_index = current_peer->peer;
+ }
+
+ ASSERT (current_peer);
+
+ if (new_subgraph_longest)
+ {
+ new_subgraph_node->peer = parent_node->deeper;
+ parent_node->deeper = new_subgraph_index;
+ }
+ else
+ {
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ }
+}
+
+static clib_error_t *
+parse_register_one (vlib_parse_main_t * pm, parse_registration_t * pr)
+{
+ u32 graph_root_index;
+ u16 subgraph_type_index = (u16) ~ 0;
+ vlib_parse_type_t *subgraph_type = 0;
+ vlib_lex_token_t *t;
+ vlib_parse_graph_t *node;
+ u32 node_index, last_index, token_increment, new_subgraph_index;
+ u32 new_subgraph_depth, last_matching_index;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ int use_main_graph = 1;
+
+ tokenize (pm, pr);
+
+ /* A typed rule? */
+ if (is_typed_rule (pm))
+ {
+ /* Get the type and its current subgraph root, if any */
+ t = vec_elt_at_index (pm->tokens, 1);
+ subgraph_type_index = parse_type_find_by_name (pm, t->value.as_pointer);
+ if (subgraph_type_index == (u16) ~ 0)
+ return clib_error_return (0, "undeclared type '%s'",
+ t->value.as_pointer);
+ subgraph_type =
+ pool_elt_at_index (pm->parse_types, subgraph_type_index);
+ graph_root_index = subgraph_type->rule_index;
+ /* Skip "mytype> = */
+ t += 3;
+ use_main_graph = 0;
+ }
+ else
+ {
+ /* top-level graph */
+ graph_root_index = pm->root_index;
+ t = vec_elt_at_index (pm->tokens, 0);
+ }
+
+ last_matching_index = (u32) ~ 0;
+ last_index = node_index = graph_root_index;
+
+ /* Find the first token which isn't already being parsed */
+ while (t < pm->tokens + vec_len (pm->tokens) && node_index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, node_index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+ last_index = node_index;
+
+ if (token_matches_graph_node
+ (pm, t, node, item, type, &token_increment))
+ {
+ t += token_increment;
+ last_matching_index = node_index;
+ node_index = node->deeper;
+ }
+ else
+ node_index = node->peer;
+ }
+
+ new_subgraph_index =
+ generate_subgraph_from_tokens (pm, t, &new_subgraph_depth, pr,
+ use_main_graph);
+
+ /* trivial cases: first graph node or first type rule */
+ if (graph_root_index == (u32) ~ 0)
+ {
+ if (use_main_graph)
+ pm->root_index = new_subgraph_index;
+ else
+ subgraph_type->rule_index = new_subgraph_index;
+ return 0;
+ }
+
+ add_subgraph_to_graph (pm, last_matching_index, graph_root_index,
+ new_subgraph_index, new_subgraph_depth);
+ return 0;
+}
+
+static clib_error_t *
+parse_register (vlib_main_t * vm,
+ parse_registration_t * lo,
+ parse_registration_t * hi, vlib_parse_main_t * pm)
+{
+ parse_registration_t *pr;
+
+ for (pr = lo; pr < hi; pr = vlib_elf_section_data_next (pr, 0))
+ vec_add1 (pm->parse_registrations, pr);
+
+ return 0;
+}
+
+static clib_error_t *
+parse_register_one_type (vlib_parse_main_t * pm, vlib_parse_type_t * rp)
+{
+ (void) parse_type_find_or_create (pm, (vlib_parse_type_t *) rp);
+ return 0;
+}
+
+static clib_error_t *
+parse_type_register (vlib_main_t * vm,
+ vlib_parse_type_t * lo,
+ vlib_parse_type_t * hi, vlib_parse_main_t * pm)
+{
+ clib_error_t *error = 0;
+ vlib_parse_type_t *ptr;
+
+ for (ptr = lo; ptr < hi; ptr = vlib_elf_section_data_next (ptr, 0))
+ {
+ error = parse_register_one_type (pm, ptr);
+ if (error)
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+clib_error_t *vlib_stdlex_init (vlib_main_t * vm) __attribute__ ((weak));
+clib_error_t *
+vlib_stdlex_init (vlib_main_t * vm)
+{
+ (void) vlib_lex_add_table ("ignore_everything");
+ return 0;
+}
+
+static int
+compute_rule_length (parse_registration_t * r)
+{
+ int length, i;
+ vlib_parse_main_t *pm = &vlib_parse_main;
+
+ if (r->rule_length)
+ return r->rule_length;
+
+ length = 0;
+
+ tokenize (pm, r);
+ length = vec_len (pm->tokens);
+
+ /* Account for "<foo> = " in "<foo> = bar" etc. */
+ if (is_typed_rule (pm))
+ length -= 2;
+
+ for (i = 0; i < vec_len (pm->tokens); i++)
+ {
+ switch (pm->tokens[i].token)
+ {
+ case VLIB_LEX_lt:
+ case VLIB_LEX_gt:
+ length -= 1;
+
+ default:
+ break;
+ }
+ }
+
+ ASSERT (length > 0);
+ r->rule_length = length;
+ return length;
+}
+
+static int
+rule_length_compare (parse_registration_t * r1, parse_registration_t * r2)
+{
+ compute_rule_length (r1);
+ compute_rule_length (r2);
+ /* Descending sort */
+ return r2->rule_length - r1->rule_length;
+}
+
+
+static clib_error_t *
+parse_init (vlib_main_t * vm)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_elf_section_bounds_t *b, *bounds;
+ clib_error_t *error = 0;
+ parse_registration_t *rule;
+ int i;
+
+ if ((error = vlib_call_init_function (vm, lex_onetime_init)))
+ return error;
+
+ if ((error = vlib_stdlex_init (vm)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, parse_builtin_init)))
+ return error;
+
+ pm->vlib_main = vm;
+ pm->lex_main = lm;
+
+ mhash_init (&pm->parse_item_hash, sizeof (u32), sizeof (vlib_parse_item_t));
+ pm->parse_type_by_name_hash = hash_create_string (0, sizeof (u32));
+
+ vec_validate (pm->parse_value, 16);
+ vec_validate (pm->tokens, 16);
+ vec_validate (pm->register_input, 32);
+ vec_validate (pm->match_items, 16);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ _vec_len (pm->register_input) = 0;
+ _vec_len (pm->match_items) = 0;
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_type_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_type_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ parse_type_and_graph_init (pm);
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ vec_sort_with_function (pm->parse_registrations, rule_length_compare);
+
+ for (i = 0; i < vec_len (pm->parse_registrations); i++)
+ {
+ rule = pm->parse_registrations[i];
+ parse_register_one (pm, rule);
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (parse_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse.h b/src/vlib/parse.h
new file mode 100644
index 00000000..036e7447
--- /dev/null
+++ b/src/vlib/parse.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_parse_h
+#define included_vlib_parse_h
+
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+#include <vppinfra/mhash.h>
+
+typedef struct
+{
+ /* Word aligned value. */
+ union
+ {
+ u8 as_u8[32 - 1 * sizeof (u16)];
+ void *as_pointer;
+ uword as_uword;
+ word as_word;
+ u64 as_u64;
+ } value;
+
+ /* 16 bit type at end so that 30 bytes of value are aligned. */
+ u16 type;
+} __attribute ((packed))
+ vlib_parse_value_t;
+
+/* Instance of a type. */
+ typedef struct
+ {
+ u32
+ type;
+
+ u32
+ origin;
+
+ u32
+ help_index;
+
+ union
+ {
+ void *
+ as_pointer;
+ uword
+ as_uword;
+ } value;
+ } vlib_parse_item_t;
+
+ typedef struct
+ {
+ /* Index of item for this node. */
+ u32
+ item;
+
+ /* Graph index of peer (sibling) node (linked list of peers). */
+ u32
+ peer;
+
+ /* Graph index of deeper (child) node (linked list of children). */
+ u32
+ deeper;
+ } vlib_parse_graph_t;
+
+#define foreach_parse_match_type \
+ _(MATCH_DONE) \
+ _(MATCH_RULE) \
+ _(MATCH_FAIL) \
+ _(MATCH_FULL) \
+ _(MATCH_VALUE) \
+ _(MATCH_PARTIAL) \
+ _(MATCH_AMBIGUOUS) \
+ _(MATCH_EVAL_FAIL)
+
+ typedef enum
+ {
+#define _(a) VLIB_PARSE_##a,
+ foreach_parse_match_type
+#undef _
+ } vlib_parse_match_t;
+
+ struct vlib_parse_type;
+ struct vlib_parse_main;
+
+ typedef
+ vlib_parse_match_t (vlib_parse_match_function_t)
+ (struct vlib_parse_main *,
+ struct vlib_parse_type *, vlib_lex_token_t *, vlib_parse_value_t *);
+ typedef void (vlib_parse_value_cleanup_function_t) (vlib_parse_value_t
+ *);
+
+ typedef struct vlib_parse_type
+ {
+ /* Type name. */
+ char *
+ name;
+
+ vlib_parse_match_function_t *
+ match_function;
+
+ vlib_parse_value_cleanup_function_t *
+ value_cleanup_function;
+
+ format_function_t *
+ format_value;
+
+ u32
+ rule_index;
+ } vlib_parse_type_t;
+
+ typedef struct
+ {
+ char *
+ initializer;
+ void *
+ eof_match;
+ int
+ rule_length;
+ } parse_registration_t;
+
+ typedef struct vlib_parse_main
+ {
+ /* (type, origin, help, value) tuples */
+ vlib_parse_item_t *
+ parse_items;
+ mhash_t
+ parse_item_hash;
+
+ /* (item, peer, deeper) tuples */
+ vlib_parse_graph_t *
+ parse_graph;
+ u32
+ root_index;
+
+ u8 *
+ register_input;
+
+ /* parser types */
+ vlib_parse_type_t *
+ parse_types;
+ uword *
+ parse_type_by_name_hash;
+
+ /* Vector of MATCH_VALUEs */
+ vlib_parse_value_t *
+ parse_value;
+ u32 *
+ match_items;
+
+ /* Parse registrations */
+ parse_registration_t **
+ parse_registrations;
+
+ /* Token vector */
+ vlib_lex_token_t *
+ tokens;
+ u32
+ current_token_index;
+
+ vlib_lex_main_t *
+ lex_main;
+ vlib_main_t *
+ vlib_main;
+ } vlib_parse_main_t;
+
+ vlib_parse_main_t
+ vlib_parse_main;
+
+ typedef
+ vlib_parse_match_t (vlib_parse_eval_function_t)
+ (vlib_parse_main_t *, vlib_parse_item_t *, vlib_parse_value_t *);
+
+vlib_parse_match_t
+vlib_parse_eval (u8 * input);
+
+ format_function_t format_vlib_parse_value;
+
+/* FIXME need these to be global? */
+ vlib_parse_match_function_t rule_match, eof_match, word_match,
+ number_match;
+
+#define _PARSE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_registration,parse_registration_t,parse_registrations)
+
+#define PARSE_INIT(x, s, e) \
+static _PARSE_REGISTRATION_DATA(x) = { \
+ .initializer = s, \
+ .eof_match = e, \
+};
+
+#define _PARSE_TYPE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_type_registration,vlib_parse_type_t, \
+parse_type_registrations)
+
+#define PARSE_TYPE_INIT(n, m, c, f) \
+static _PARSE_TYPE_REGISTRATION_DATA(n) = { \
+ .name = #n, \
+ .match_function = m, \
+ .value_cleanup_function = c, \
+ .format_value = f, \
+};
+
+#endif /* included_vlib_parse_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse_builtin.c b/src/vlib/parse_builtin.c
new file mode 100644
index 00000000..0ce716b5
--- /dev/null
+++ b/src/vlib/parse_builtin.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+always_inline void *
+parse_last_match_value (vlib_parse_main_t * pm)
+{
+ vlib_parse_item_t *i;
+ i = pool_elt_at_index (pm->parse_items,
+ vec_elt (pm->match_items,
+ vec_len (pm->match_items) - 1));
+ return i->value.as_pointer;
+}
+
+vlib_parse_match_t
+eof_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ return t->token ==
+ VLIB_LEX_eof ? VLIB_PARSE_MATCH_DONE : VLIB_PARSE_MATCH_FAIL;
+}
+
+PARSE_TYPE_INIT (eof, eof_match, 0 /* cleanup value */ ,
+ 0 /* format value */ );
+
+vlib_parse_match_t
+rule_eof_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ vlib_parse_match_function_t *fp = parse_last_match_value (pm);
+ pm->current_token_index--;
+ return fp ? fp (pm, type, t, valuep) : VLIB_PARSE_MATCH_RULE;
+}
+
+PARSE_TYPE_INIT (rule_eof, rule_eof_match, 0, 0);
+
+vlib_parse_match_t
+word_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ u8 *tv, *iv;
+ int i;
+
+ if (t->token != VLIB_LEX_word)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ tv = t->value.as_pointer;
+ iv = parse_last_match_value (pm);
+
+ for (i = 0; tv[i]; i++)
+ {
+ if (tv[i] != iv[i])
+ return VLIB_PARSE_MATCH_FAIL;
+ }
+
+ return iv[i] == 0 ? VLIB_PARSE_MATCH_FULL : VLIB_PARSE_MATCH_PARTIAL;
+}
+
+PARSE_TYPE_INIT (word, word_match, 0 /* clnup value */ ,
+ 0 /* format value */ );
+
+vlib_parse_match_t
+number_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ if (t->token == VLIB_LEX_number)
+ {
+ valuep->value.as_uword = t->value.as_uword;
+ return VLIB_PARSE_MATCH_VALUE;
+ }
+ return VLIB_PARSE_MATCH_FAIL;
+}
+
+static u8 *
+format_value_number (u8 * s, va_list * args)
+{
+ vlib_parse_value_t *v = va_arg (*args, vlib_parse_value_t *);
+ uword a = v->value.as_uword;
+
+ if (BITS (uword) == 64)
+ s = format (s, "%lld(0x%llx)", a, a);
+ else
+ s = format (s, "%ld(0x%lx)", a, a);
+ return s;
+}
+
+PARSE_TYPE_INIT (number, number_match, 0 /* cln value */ ,
+ format_value_number /* fmt value */ );
+
+
+#define foreach_vanilla_lex_match_function \
+ _(plus) \
+ _(minus) \
+ _(star) \
+ _(slash) \
+ _(lpar) \
+ _(rpar)
+
+#define LEX_MATCH_DEBUG 0
+
+#define _(name) \
+vlib_parse_match_t name##_match (vlib_parse_main_t *pm, \
+ vlib_parse_type_t *type, \
+ vlib_lex_token_t *t, \
+ vlib_parse_value_t *valuep) \
+{ \
+ if (LEX_MATCH_DEBUG > 0) \
+ clib_warning ("against %U returns %s", \
+ format_vlib_lex_token, pm->lex_main, t, \
+ (t->token == VLIB_LEX_##name) \
+ ? "VLIB_PARSE_MATCH_FULL" : \
+ "VLIB_PARSE_MATCH_FAIL"); \
+ if (t->token == VLIB_LEX_##name) \
+ return VLIB_PARSE_MATCH_FULL; \
+ return VLIB_PARSE_MATCH_FAIL; \
+} \
+ \
+PARSE_TYPE_INIT (name, name##_match, 0 /* cln value */, \
+ 0 /* fmt val */);
+
+foreach_vanilla_lex_match_function
+#undef _
+/* So we're linked in. */
+static clib_error_t *
+parse_builtin_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (parse_builtin_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci.c b/src/vlib/pci/pci.c
new file mode 100644
index 00000000..7100064d
--- /dev/null
+++ b/src/vlib/pci/pci.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+vlib_pci_main_t pci_main;
+
+vlib_pci_device_t *
+vlib_get_pci_device (vlib_pci_addr_t * addr)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ uword *p;
+ p = hash_get (pm->pci_dev_index_by_pci_addr, addr->as_u32);
+
+ if (p == 0)
+ return 0;
+
+ return vec_elt_at_index (pm->pci_devs, p[0]);
+}
+
+static clib_error_t *
+show_pci_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ vlib_pci_device_t *d;
+ int show_all = 0;
+ u8 *s = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ show_all = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vlib_cli_output (vm, "%-13s%-5s%-12s%-13s%-16s%-32s%s",
+ "Address", "Sock", "VID:PID", "Link Speed", "Driver",
+ "Product Name", "Vital Product Data");
+
+ /* *INDENT-OFF* */
+ pool_foreach (d, pm->pci_devs, ({
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && !show_all)
+ continue;
+
+ vec_reset_length (s);
+
+ if (d->numa_node >= 0)
+ s = format (s, " %d", d->numa_node);
+
+ vlib_cli_output (vm, "%-13U%-5v%04x:%04x %-13U%-16s%-32v%U",
+ format_vlib_pci_addr, &d->bus_address, s,
+ d->vendor_id, d->device_id,
+ format_vlib_pci_link_speed, d,
+ d->driver_name ? (char *) d->driver_name : "",
+ d->product_name,
+ format_vlib_pci_vpd, d->vpd_r, 0);
+ }));
+/* *INDENT-ON* */
+
+ vec_free (s);
+ return 0;
+}
+
+uword
+unformat_vlib_pci_addr (unformat_input_t * input, va_list * args)
+{
+ vlib_pci_addr_t *addr = va_arg (*args, vlib_pci_addr_t *);
+ u32 x[4];
+
+ if (!unformat (input, "%x:%x:%x.%x", &x[0], &x[1], &x[2], &x[3]))
+ return 0;
+
+ addr->domain = x[0];
+ addr->bus = x[1];
+ addr->slot = x[2];
+ addr->function = x[3];
+
+ return 1;
+}
+
+u8 *
+format_vlib_pci_addr (u8 * s, va_list * va)
+{
+ vlib_pci_addr_t *addr = va_arg (*va, vlib_pci_addr_t *);
+ return format (s, "%04x:%02x:%02x.%x", addr->domain, addr->bus,
+ addr->slot, addr->function);
+}
+
+u8 *
+format_vlib_pci_handle (u8 * s, va_list * va)
+{
+ vlib_pci_addr_t *addr = va_arg (*va, vlib_pci_addr_t *);
+ return format (s, "%x/%x/%x", addr->bus, addr->slot, addr->function);
+}
+
+u8 *
+format_vlib_pci_link_speed (u8 * s, va_list * va)
+{
+ vlib_pci_device_t *d = va_arg (*va, vlib_pci_device_t *);
+ pcie_config_regs_t *r =
+ pci_config_find_capability (&d->config0, PCI_CAP_ID_PCIE);
+ int width;
+
+ if (!r)
+ return format (s, "unknown");
+
+ width = (r->link_status >> 4) & 0x3f;
+
+ if ((r->link_status & 0xf) == 1)
+ return format (s, "2.5 GT/s x%u", width);
+ if ((r->link_status & 0xf) == 2)
+ return format (s, "5.0 GT/s x%u", width);
+ if ((r->link_status & 0xf) == 3)
+ return format (s, "8.0 GT/s x%u", width);
+ return format (s, "unknown");
+}
+
+u8 *
+format_vlib_pci_vpd (u8 * s, va_list * args)
+{
+ u8 *data = va_arg (*args, u8 *);
+ u8 *id = va_arg (*args, u8 *);
+ uword indent = format_get_indent (s);
+ char *string_types[] = { "PN", "EC", "SN", "MN", 0 };
+ uword p = 0;
+ int first_line = 1;
+
+ if (vec_len (data) < 3)
+ return s;
+
+ while (p + 3 < vec_len (data))
+ {
+
+ if (data[p] == 0 && data[p + 1] == 0)
+ return s;
+
+ if (p + data[p + 2] > vec_len (data))
+ return s;
+
+ if (id == 0)
+ {
+ int is_string = 0;
+ char **c = string_types;
+
+ while (c[0])
+ {
+ if (*(u16 *) & data[p] == *(u16 *) c[0])
+ is_string = 1;
+ c++;
+ }
+
+ if (data[p + 2])
+ {
+ if (!first_line)
+ s = format (s, "\n%U", format_white_space, indent);
+ else
+ {
+ first_line = 0;
+ s = format (s, " ");
+ }
+
+ s = format (s, "%c%c: ", data[p], data[p + 1]);
+ if (is_string)
+ vec_add (s, data + p + 3, data[p + 2]);
+ else
+ {
+ int i;
+ const int max_bytes = 8;
+ s = format (s, "0x");
+ for (i = 0; i < clib_min (data[p + 2], max_bytes); i++)
+ s = format (s, " %02x", data[p + 3 + i]);
+
+ if (data[p + 2] > max_bytes)
+ s = format (s, " ...");
+ }
+ }
+ }
+ else if (*(u16 *) & data[p] == *(u16 *) id)
+ {
+ vec_add (s, data + p + 3, data[p + 2]);
+ return s;
+ }
+
+ p += 3 + data[p + 2];
+ }
+
+ return s;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pci_command, static) = {
+ .path = "show pci",
+ .short_help = "show pci [all]",
+ .function = show_pci_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+pci_bus_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (pci_bus_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
new file mode 100644
index 00000000..21410809
--- /dev/null
+++ b/src/vlib/pci/pci.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.h: PCI definitions.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_pci_h
+#define included_vlib_pci_h
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci_config.h>
+
+typedef CLIB_PACKED (union
+ {
+ struct
+ {
+u16 domain; u8 bus; u8 slot: 5; u8 function:3;};
+ u32 as_u32;}) vlib_pci_addr_t;
+
+typedef struct vlib_pci_device
+{
+ /* Operating system handle for this device. */
+ uword os_handle;
+
+ vlib_pci_addr_t bus_address;
+
+ /* First 64 bytes of configuration space. */
+ union
+ {
+ pci_config_type0_regs_t config0;
+ pci_config_type1_regs_t config1;
+ u8 config_data[256];
+ };
+
+ /* Interrupt handler */
+ void (*interrupt_handler) (struct vlib_pci_device * dev);
+
+ /* Driver name */
+ u8 *driver_name;
+
+ /* Numa Node */
+ int numa_node;
+
+ /* Device data */
+ u16 device_class;
+ u16 vendor_id;
+ u16 device_id;
+
+ /* Vital Product Data */
+ u8 *product_name;
+ u8 *vpd_r;
+ u8 *vpd_w;
+
+ /* Private data */
+ uword private_data;
+
+} vlib_pci_device_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+} pci_device_id_t;
+
+typedef struct _pci_device_registration
+{
+ /* Driver init function. */
+ clib_error_t *(*init_function) (vlib_main_t * vm, vlib_pci_device_t * dev);
+
+ /* Interrupt handler */
+ void (*interrupt_handler) (vlib_pci_device_t * dev);
+
+ /* List of registrations */
+ struct _pci_device_registration *next_registration;
+
+ /* Vendor/device ids supported by this driver. */
+ pci_device_id_t supported_devices[];
+} pci_device_registration_t;
+
+/* Pool of PCI devices. */
+typedef struct
+{
+ vlib_main_t *vlib_main;
+ vlib_pci_device_t *pci_devs;
+ pci_device_registration_t *pci_device_registrations;
+ uword *pci_dev_index_by_pci_addr;
+} vlib_pci_main_t;
+
+extern vlib_pci_main_t pci_main;
+
+#define PCI_REGISTER_DEVICE(x,...) \
+ __VA_ARGS__ pci_device_registration_t x; \
+static void __vlib_add_pci_device_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_pci_device_registration_##x (void) \
+{ \
+ vlib_pci_main_t * pm = &pci_main; \
+ x.next_registration = pm->pci_device_registrations; \
+ pm->pci_device_registrations = &x; \
+} \
+__VA_ARGS__ pci_device_registration_t x
+
+clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d,
+ char *uio_driver_name);
+
+/* Configuration space read/write. */
+clib_error_t *vlib_pci_read_write_config (vlib_pci_device_t * dev,
+ vlib_read_or_write_t read_or_write,
+ uword address,
+ void *data, u32 n_bytes);
+
+#define _(t) \
+static inline clib_error_t * \
+vlib_pci_read_config_##t (vlib_pci_device_t * dev, \
+ uword address, t * data) \
+{ \
+ return vlib_pci_read_write_config (dev, VLIB_READ,address, data, \
+ sizeof (data[0])); \
+}
+
+_(u32);
+_(u16);
+_(u8);
+
+#undef _
+
+#define _(t) \
+static inline clib_error_t * \
+vlib_pci_write_config_##t (vlib_pci_device_t * dev, uword address, \
+ t * data) \
+{ \
+ return vlib_pci_read_write_config (dev, VLIB_WRITE, \
+ address, data, sizeof (data[0])); \
+}
+
+_(u32);
+_(u16);
+_(u8);
+
+#undef _
+
+static inline clib_error_t *
+vlib_pci_intr_enable (vlib_pci_device_t * dev)
+{
+ u16 command;
+ clib_error_t *err;
+
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ command &= ~PCI_COMMAND_INTX_DISABLE;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+static inline clib_error_t *
+vlib_pci_intr_disable (vlib_pci_device_t * dev)
+{
+ u16 command;
+ clib_error_t *err;
+
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ command |= PCI_COMMAND_INTX_DISABLE;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+static inline clib_error_t *
+vlib_pci_bus_master_enable (vlib_pci_device_t * dev)
+{
+ clib_error_t *err;
+ u16 command;
+
+ /* Set bus master enable (BME) */
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ if (command & PCI_COMMAND_BUS_MASTER)
+ return 0;
+
+ command |= PCI_COMMAND_BUS_MASTER;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+clib_error_t *vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource,
+ void **result);
+
+clib_error_t *vlib_pci_map_resource_fixed (vlib_pci_device_t * dev,
+ u32 resource, u8 * addr,
+ void **result);
+
+vlib_pci_device_t *vlib_get_pci_device (vlib_pci_addr_t * addr);
+/* Free's device. */
+void vlib_pci_free_device (vlib_pci_device_t * dev);
+
+unformat_function_t unformat_vlib_pci_addr;
+format_function_t format_vlib_pci_addr;
+format_function_t format_vlib_pci_handle;
+format_function_t format_vlib_pci_link_speed;
+format_function_t format_vlib_pci_vpd;
+
+#endif /* included_vlib_pci_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci_config.h b/src/vlib/pci/pci_config.h
new file mode 100644
index 00000000..92e56af6
--- /dev/null
+++ b/src/vlib/pci/pci_config.h
@@ -0,0 +1,731 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.h: PCI definitions.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_pci_config_h
+#define included_vlib_pci_config_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum
+{
+ PCI_CLASS_NOT_DEFINED = 0x0000,
+ PCI_CLASS_NOT_DEFINED_VGA = 0x0001,
+
+ PCI_CLASS_STORAGE_SCSI = 0x0100,
+ PCI_CLASS_STORAGE_IDE = 0x0101,
+ PCI_CLASS_STORAGE_FLOPPY = 0x0102,
+ PCI_CLASS_STORAGE_IPI = 0x0103,
+ PCI_CLASS_STORAGE_RAID = 0x0104,
+ PCI_CLASS_STORAGE_OTHER = 0x0180,
+ PCI_CLASS_STORAGE = 0x0100,
+
+ PCI_CLASS_NETWORK_ETHERNET = 0x0200,
+ PCI_CLASS_NETWORK_TOKEN_RING = 0x0201,
+ PCI_CLASS_NETWORK_FDDI = 0x0202,
+ PCI_CLASS_NETWORK_ATM = 0x0203,
+ PCI_CLASS_NETWORK_OTHER = 0x0280,
+ PCI_CLASS_NETWORK = 0x0200,
+
+ PCI_CLASS_DISPLAY_VGA = 0x0300,
+ PCI_CLASS_DISPLAY_XGA = 0x0301,
+ PCI_CLASS_DISPLAY_3D = 0x0302,
+ PCI_CLASS_DISPLAY_OTHER = 0x0380,
+ PCI_CLASS_DISPLAY = 0x0300,
+
+ PCI_CLASS_MULTIMEDIA_VIDEO = 0x0400,
+ PCI_CLASS_MULTIMEDIA_AUDIO = 0x0401,
+ PCI_CLASS_MULTIMEDIA_PHONE = 0x0402,
+ PCI_CLASS_MULTIMEDIA_OTHER = 0x0480,
+ PCI_CLASS_MULTIMEDIA = 0x0400,
+
+ PCI_CLASS_MEMORY_RAM = 0x0500,
+ PCI_CLASS_MEMORY_FLASH = 0x0501,
+ PCI_CLASS_MEMORY_OTHER = 0x0580,
+ PCI_CLASS_MEMORY = 0x0500,
+
+ PCI_CLASS_BRIDGE_HOST = 0x0600,
+ PCI_CLASS_BRIDGE_ISA = 0x0601,
+ PCI_CLASS_BRIDGE_EISA = 0x0602,
+ PCI_CLASS_BRIDGE_MC = 0x0603,
+ PCI_CLASS_BRIDGE_PCI = 0x0604,
+ PCI_CLASS_BRIDGE_PCMCIA = 0x0605,
+ PCI_CLASS_BRIDGE_NUBUS = 0x0606,
+ PCI_CLASS_BRIDGE_CARDBUS = 0x0607,
+ PCI_CLASS_BRIDGE_RACEWAY = 0x0608,
+ PCI_CLASS_BRIDGE_OTHER = 0x0680,
+ PCI_CLASS_BRIDGE = 0x0600,
+
+ PCI_CLASS_COMMUNICATION_SERIAL = 0x0700,
+ PCI_CLASS_COMMUNICATION_PARALLEL = 0x0701,
+ PCI_CLASS_COMMUNICATION_MULTISERIAL = 0x0702,
+ PCI_CLASS_COMMUNICATION_MODEM = 0x0703,
+ PCI_CLASS_COMMUNICATION_OTHER = 0x0780,
+ PCI_CLASS_COMMUNICATION = 0x0700,
+
+ PCI_CLASS_SYSTEM_PIC = 0x0800,
+ PCI_CLASS_SYSTEM_DMA = 0x0801,
+ PCI_CLASS_SYSTEM_TIMER = 0x0802,
+ PCI_CLASS_SYSTEM_RTC = 0x0803,
+ PCI_CLASS_SYSTEM_PCI_HOTPLUG = 0x0804,
+ PCI_CLASS_SYSTEM_OTHER = 0x0880,
+ PCI_CLASS_SYSTEM = 0x0800,
+
+ PCI_CLASS_INPUT_KEYBOARD = 0x0900,
+ PCI_CLASS_INPUT_PEN = 0x0901,
+ PCI_CLASS_INPUT_MOUSE = 0x0902,
+ PCI_CLASS_INPUT_SCANNER = 0x0903,
+ PCI_CLASS_INPUT_GAMEPORT = 0x0904,
+ PCI_CLASS_INPUT_OTHER = 0x0980,
+ PCI_CLASS_INPUT = 0x0900,
+
+ PCI_CLASS_DOCKING_GENERIC = 0x0a00,
+ PCI_CLASS_DOCKING_OTHER = 0x0a80,
+ PCI_CLASS_DOCKING = 0x0a00,
+
+ PCI_CLASS_PROCESSOR_386 = 0x0b00,
+ PCI_CLASS_PROCESSOR_486 = 0x0b01,
+ PCI_CLASS_PROCESSOR_PENTIUM = 0x0b02,
+ PCI_CLASS_PROCESSOR_ALPHA = 0x0b10,
+ PCI_CLASS_PROCESSOR_POWERPC = 0x0b20,
+ PCI_CLASS_PROCESSOR_MIPS = 0x0b30,
+ PCI_CLASS_PROCESSOR_CO = 0x0b40,
+ PCI_CLASS_PROCESSOR = 0x0b00,
+
+ PCI_CLASS_SERIAL_FIREWIRE = 0x0c00,
+ PCI_CLASS_SERIAL_ACCESS = 0x0c01,
+ PCI_CLASS_SERIAL_SSA = 0x0c02,
+ PCI_CLASS_SERIAL_USB = 0x0c03,
+ PCI_CLASS_SERIAL_FIBER = 0x0c04,
+ PCI_CLASS_SERIAL_SMBUS = 0x0c05,
+ PCI_CLASS_SERIAL = 0x0c00,
+
+ PCI_CLASS_INTELLIGENT_I2O = 0x0e00,
+ PCI_CLASS_INTELLIGENT = 0x0e00,
+
+ PCI_CLASS_SATELLITE_TV = 0x0f00,
+ PCI_CLASS_SATELLITE_AUDIO = 0x0f01,
+ PCI_CLASS_SATELLITE_VOICE = 0x0f03,
+ PCI_CLASS_SATELLITE_DATA = 0x0f04,
+ PCI_CLASS_SATELLITE = 0x0f00,
+
+ PCI_CLASS_CRYPT_NETWORK = 0x1000,
+ PCI_CLASS_CRYPT_ENTERTAINMENT = 0x1001,
+ PCI_CLASS_CRYPT_OTHER = 0x1080,
+ PCI_CLASS_CRYPT = 0x1000,
+
+ PCI_CLASS_SP_DPIO = 0x1100,
+ PCI_CLASS_SP_OTHER = 0x1180,
+ PCI_CLASS_SP = 0x1100,
+} pci_device_class_t;
+
+static inline pci_device_class_t
+pci_device_class_base (pci_device_class_t c)
+{
+ return c & ~0xff;
+}
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+typedef struct
+{
+ u16 vendor_id;
+ u16 device_id;
+
+ u16 command;
+#define PCI_COMMAND_IO (1 << 0) /* Enable response in I/O space */
+#define PCI_COMMAND_MEMORY (1 << 1) /* Enable response in Memory space */
+#define PCI_COMMAND_BUS_MASTER (1 << 2) /* Enable bus mastering */
+#define PCI_COMMAND_SPECIAL (1 << 3) /* Enable response to special cycles */
+#define PCI_COMMAND_WRITE_INVALIDATE (1 << 4) /* Use memory write and invalidate */
+#define PCI_COMMAND_VGA_PALETTE_SNOOP (1 << 5)
+#define PCI_COMMAND_PARITY (1 << 6)
+#define PCI_COMMAND_WAIT (1 << 7) /* Enable address/data stepping */
+#define PCI_COMMAND_SERR (1 << 8) /* Enable SERR */
+#define PCI_COMMAND_BACK_TO_BACK_WRITE (1 << 9)
+#define PCI_COMMAND_INTX_DISABLE (1 << 10) /* INTx Emulation Disable */
+
+ u16 status;
+#define PCI_STATUS_INTX_PENDING (1 << 3)
+#define PCI_STATUS_CAPABILITY_LIST (1 << 4)
+#define PCI_STATUS_66MHZ (1 << 5) /* Support 66 Mhz PCI 2.1 bus */
+#define PCI_STATUS_UDF (1 << 6) /* Support User Definable Features (obsolete) */
+#define PCI_STATUS_BACK_TO_BACK_WRITE (1 << 7) /* Accept fast-back to back */
+#define PCI_STATUS_PARITY_ERROR (1 << 8) /* Detected parity error */
+#define PCI_STATUS_DEVSEL_GET(x) ((x >> 9) & 3) /* DEVSEL timing */
+#define PCI_STATUS_DEVSEL_FAST (0 << 9)
+#define PCI_STATUS_DEVSEL_MEDIUM (1 << 9)
+#define PCI_STATUS_DEVSEL_SLOW (2 << 9)
+#define PCI_STATUS_SIG_TARGET_ABORT (1 << 11) /* Set on target abort */
+#define PCI_STATUS_REC_TARGET_ABORT (1 << 12) /* Master ack of " */
+#define PCI_STATUS_REC_MASTER_ABORT (1 << 13) /* Set on master abort */
+#define PCI_STATUS_SIG_SYSTEM_ERROR (1 << 14) /* Set when we drive SERR */
+#define PCI_STATUS_DETECTED_PARITY_ERROR (1 << 15)
+
+ u8 revision_id;
+ u8 programming_interface_class; /* Reg. Level Programming Interface */
+
+ pci_device_class_t device_class:16;
+
+ u8 cache_size;
+ u8 latency_timer;
+
+ u8 header_type;
+#define PCI_HEADER_TYPE_NORMAL 0
+#define PCI_HEADER_TYPE_BRIDGE 1
+#define PCI_HEADER_TYPE_CARDBUS 2
+
+ u8 bist;
+#define PCI_BIST_CODE_MASK 0x0f /* Return result */
+#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
+#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
+} pci_config_header_t;
+
+/* Byte swap config header. */
+always_inline void
+pci_config_header_little_to_host (pci_config_header_t * r)
+{
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ _(vendor_id, u16);
+ _(device_id, u16);
+ _(command, u16);
+ _(status, u16);
+ _(device_class, u16);
+#undef _
+}
+
+/* Header type 0 (normal devices) */
+typedef struct
+{
+ pci_config_header_t header;
+
+ /*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back. Only
+ * 1 bits are decoded.
+ */
+ u32 base_address[6];
+
+ u16 cardbus_cis;
+
+ u16 subsystem_vendor_id;
+ u16 subsystem_id;
+
+ u32 rom_address;
+#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
+#define PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
+
+ u8 first_capability_offset;
+ CLIB_PAD_FROM_TO (0x35, 0x3c);
+
+ u8 interrupt_line;
+ u8 interrupt_pin;
+ u8 min_grant;
+ u8 max_latency;
+
+ u8 capability_data[0];
+} pci_config_type0_regs_t;
+
+always_inline void
+pci_config_type0_little_to_host (pci_config_type0_regs_t * r)
+{
+ int i;
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+ pci_config_header_little_to_host (&r->header);
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ for (i = 0; i < ARRAY_LEN (r->base_address); i++)
+ _(base_address[i], u32);
+ _(cardbus_cis, u16);
+ _(subsystem_vendor_id, u16);
+ _(subsystem_id, u16);
+ _(rom_address, u32);
+#undef _
+}
+
+/* Header type 1 (PCI-to-PCI bridges) */
+typedef struct
+{
+ pci_config_header_t header;
+
+ u32 base_address[2];
+
+ /* Primary/secondary bus number. */
+ u8 primary_bus;
+ u8 secondary_bus;
+
+ /* Highest bus number behind the bridge */
+ u8 subordinate_bus;
+
+ u8 secondary_bus_latency_timer;
+
+ /* I/O range behind bridge. */
+ u8 io_base, io_limit;
+
+ /* Secondary status register, only bit 14 used */
+ u16 secondary_status;
+
+ /* Memory range behind bridge in units of 64k bytes. */
+ u16 memory_base, memory_limit;
+#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
+
+ u16 prefetchable_memory_base, prefetchable_memory_limit;
+#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define PCI_PREF_RANGE_TYPE_32 0x00
+#define PCI_PREF_RANGE_TYPE_64 0x01
+#define PCI_PREF_RANGE_MASK (~0x0fUL)
+
+ u32 prefetchable_memory_base_upper_32bits;
+ u32 prefetchable_memory_limit_upper_32bits;
+ u16 io_base_upper_16bits;
+ u16 io_limit_upper_16bits;
+
+ /* Same as for type 0. */
+ u8 capability_list_offset;
+ CLIB_PAD_FROM_TO (0x35, 0x37);
+
+ u32 rom_address;
+ CLIB_PAD_FROM_TO (0x3c, 0x3e);
+
+ u16 bridge_control;
+#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
+#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
+#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
+#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
+#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
+#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+
+ u8 capability_data[0];
+} pci_config_type1_regs_t;
+
+always_inline void
+pci_config_type1_little_to_host (pci_config_type1_regs_t * r)
+{
+ int i;
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+ pci_config_header_little_to_host (&r->header);
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ for (i = 0; i < ARRAY_LEN (r->base_address); i++)
+ _(base_address[i], u32);
+ _(secondary_status, u16);
+ _(memory_base, u16);
+ _(memory_limit, u16);
+ _(prefetchable_memory_base, u16);
+ _(prefetchable_memory_limit, u16);
+ _(prefetchable_memory_base_upper_32bits, u32);
+ _(prefetchable_memory_limit_upper_32bits, u32);
+ _(io_base_upper_16bits, u16);
+ _(io_limit_upper_16bits, u16);
+ _(rom_address, u32);
+ _(bridge_control, u16);
+#undef _
+}
+
+/* Capabilities. */
+typedef enum pci_capability_type
+{
+ /* Power Management */
+ PCI_CAP_ID_PM = 1,
+
+ /* Accelerated Graphics Port */
+ PCI_CAP_ID_AGP = 2,
+
+ /* Vital Product Data */
+ PCI_CAP_ID_VPD = 3,
+
+ /* Slot Identification */
+ PCI_CAP_ID_SLOTID = 4,
+
+ /* Message Signalled Interrupts */
+ PCI_CAP_ID_MSI = 5,
+
+ /* CompactPCI HotSwap */
+ PCI_CAP_ID_CHSWP = 6,
+
+ /* PCI-X */
+ PCI_CAP_ID_PCIX = 7,
+
+ /* Hypertransport. */
+ PCI_CAP_ID_HYPERTRANSPORT = 8,
+
+ /* PCI Standard Hot-Plug Controller */
+ PCI_CAP_ID_SHPC = 0xc,
+
+ /* PCI Express */
+ PCI_CAP_ID_PCIE = 0x10,
+
+ /* MSI-X */
+ PCI_CAP_ID_MSIX = 0x11,
+} pci_capability_type_t;
+
+/* Common header for capabilities. */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ enum pci_capability_type type:8;
+ u8 next_offset;}) pci_capability_regs_t;
+/* *INDENT-ON* */
+
+always_inline void *
+pci_config_find_capability (pci_config_type0_regs_t * t, int cap_type)
+{
+ pci_capability_regs_t *c;
+ u32 next_offset;
+ u32 ttl = 48;
+
+ if (!(t->header.status & PCI_STATUS_CAPABILITY_LIST))
+ return 0;
+
+ next_offset = t->first_capability_offset;
+ while (ttl-- && next_offset >= 0x40)
+ {
+ c = (void *) t + (next_offset & ~3);
+ if ((u8) c->type == 0xff)
+ break;
+ if (c->type == cap_type)
+ return c;
+ next_offset = c->next_offset;
+ }
+ return 0;
+}
+
+/* Power Management Registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 capabilities;
+#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */
+#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */
+#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */
+#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */
+#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */
+#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
+#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
+#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */
+#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */
+#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
+#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
+#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
+#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
+#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
+ u16 control;
+#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
+#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
+#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
+#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
+#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
+ u8 extensions;
+#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */
+#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */
+ u8 data;}) pci_power_management_regs_t;
+/* *INDENT-ON* */
+
+/* AGP registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u8 version;
+ u8 rest_of_capability_flags; u32 status; u32 command;
+ /* Command & status common bits. */
+#define PCI_AGP_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */
+#define PCI_AGP_SBA 0x0200 /* Sideband addressing supported */
+#define PCI_AGP_64BIT 0x0020 /* 64-bit addressing supported */
+#define PCI_AGP_ALLOW_TRANSACTIONS 0x0100 /* Allow processing of AGP transactions */
+#define PCI_AGP_FW 0x0010 /* FW transfers supported/forced */
+#define PCI_AGP_RATE4 0x0004 /* 4x transfer rate supported */
+#define PCI_AGP_RATE2 0x0002 /* 2x transfer rate supported */
+#define PCI_AGP_RATE1 0x0001 /* 1x transfer rate supported */
+ }) pci_agp_regs_t;
+/* *INDENT-ON* */
+
+/* Vital Product Data */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 address;
+#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */
+#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */
+ u32 data;}) pci_vpd_regs_t;
+/* *INDENT-ON* */
+
+/* Slot Identification */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u8 esr;
+#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */
+#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
+ u8 chassis;}) pci_sid_regs_t;
+/* *INDENT-ON* */
+
+/* Message Signalled Interrupts registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 flags;
+#define PCI_MSI_FLAGS_ENABLE (1 << 0) /* MSI feature enabled */
+#define PCI_MSI_FLAGS_GET_MAX_QUEUE_SIZE(x) ((x >> 1) & 0x7)
+#define PCI_MSI_FLAGS_MAX_QUEUE_SIZE(x) (((x) & 0x7) << 1)
+#define PCI_MSI_FLAGS_GET_QUEUE_SIZE(x) ((x >> 4) & 0x7)
+#define PCI_MSI_FLAGS_QUEUE_SIZE(x) (((x) & 0x7) << 4)
+#define PCI_MSI_FLAGS_64BIT (1 << 7) /* 64-bit addresses allowed */
+#define PCI_MSI_FLAGS_MASKBIT (1 << 8) /* 64-bit mask bits allowed */
+ u32 address; u32 data; u32 mask_bits;}) pci_msi32_regs_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 flags;
+ u32 address[2];
+ u32 data; u32 mask_bits;}) pci_msi64_regs_t;
+/* *INDENT-ON* */
+
+/* CompactPCI Hotswap Register */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 control_status;
+#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */
+#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */
+#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */
+#define PCI_CHSWP_LOO 0x08 /* LED On / Off */
+#define PCI_CHSWP_PI 0x30 /* Programming Interface */
+#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */
+#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */
+ }) pci_chswp_regs_t;
+/* *INDENT-ON* */
+
+/* PCIX registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 command;
+#define PCIX_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */
+#define PCIX_CMD_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCIX_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */
+#define PCIX_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */
+#define PCIX_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */
+ u32 status;
+#define PCIX_STATUS_DEVFN 0x000000ff /* A copy of devfn */
+#define PCIX_STATUS_BUS 0x0000ff00 /* A copy of bus nr */
+#define PCIX_STATUS_64BIT 0x00010000 /* 64-bit device */
+#define PCIX_STATUS_133MHZ 0x00020000 /* 133 MHz capable */
+#define PCIX_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */
+#define PCIX_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */
+#define PCIX_STATUS_COMPLEX 0x00100000 /* Device Complexity */
+#define PCIX_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */
+#define PCIX_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */
+#define PCIX_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */
+#define PCIX_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */
+#define PCIX_STATUS_266MHZ 0x40000000 /* 266 MHz capable */
+#define PCIX_STATUS_533MHZ 0x80000000 /* 533 MHz capable */
+ }) pcix_config_regs_t;
+/* *INDENT-ON* */
+
+static inline int
+pcie_size_to_code (int bytes)
+{
+ ASSERT (is_pow2 (bytes));
+ ASSERT (bytes <= 4096);
+ return min_log2 (bytes) - 7;
+}
+
+static inline int
+pcie_code_to_size (int code)
+{
+ int size = 1 << (code + 7);
+ ASSERT (size <= 4096);
+ return size;
+}
+
+/* PCI Express capability registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 pcie_capabilities;
+#define PCIE_CAP_VERSION(x) (((x) >> 0) & 0xf)
+#define PCIE_CAP_DEVICE_TYPE(x) (((x) >> 4) & 0xf)
+#define PCIE_DEVICE_TYPE_ENDPOINT 0
+#define PCIE_DEVICE_TYPE_LEGACY_ENDPOINT 1
+#define PCIE_DEVICE_TYPE_ROOT_PORT 4
+ /* Upstream/downstream port of PCI Express switch. */
+#define PCIE_DEVICE_TYPE_SWITCH_UPSTREAM 5
+#define PCIE_DEVICE_TYPE_SWITCH_DOWNSTREAM 6
+#define PCIE_DEVICE_TYPE_PCIE_TO_PCI_BRIDGE 7
+#define PCIE_DEVICE_TYPE_PCI_TO_PCIE_BRIDGE 8
+ /* Root complex integrated endpoint. */
+#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_ENDPOINT 9
+#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_EVENT_COLLECTOR 10
+#define PCIE_CAP_SLOW_IMPLEMENTED (1 << 8)
+#define PCIE_CAP_MSI_IRQ(x) (((x) >> 9) & 0x1f)
+ u32 dev_capabilities;
+#define PCIE_DEVCAP_MAX_PAYLOAD(x) (128 << (((x) >> 0) & 0x7))
+#define PCIE_DEVCAP_PHANTOM_BITS(x) (((x) >> 3) & 0x3)
+#define PCIE_DEVCAP_EXTENTED_TAG (1 << 5)
+#define PCIE_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */
+#define PCIE_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */
+#define PCIE_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
+#define PCIE_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */
+#define PCIE_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
+#define PCIE_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
+#define PCIE_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
+ u16 dev_control;
+#define PCIE_CTRL_CERE 0x0001 /* Correctable Error Reporting En. */
+#define PCIE_CTRL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
+#define PCIE_CTRL_FERE 0x0004 /* Fatal Error Reporting Enable */
+#define PCIE_CTRL_URRE 0x0008 /* Unsupported Request Reporting En. */
+#define PCIE_CTRL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+#define PCIE_CTRL_MAX_PAYLOAD(n) (((n) & 7) << 5)
+#define PCIE_CTRL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
+#define PCIE_CTRL_PHANTOM 0x0200 /* Phantom Functions Enable */
+#define PCIE_CTRL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
+#define PCIE_CTRL_NOSNOOP_EN 0x0800 /* Enable No Snoop */
+#define PCIE_CTRL_MAX_READ_REQUEST(n) (((n) & 7) << 12)
+ u16 dev_status;
+#define PCIE_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
+#define PCIE_DEVSTA_TRPND 0x20 /* Transactions Pending */
+ u32 link_capabilities; u16 link_control; u16 link_status;
+ u32 slot_capabilities;
+ u16 slot_control; u16 slot_status; u16 root_control;
+#define PCIE_RTCTL_SECEE 0x01 /* System Error on Correctable Error */
+#define PCIE_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */
+#define PCIE_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */
+#define PCIE_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */
+#define PCIE_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */
+ u16 root_capabilities;
+ u32 root_status;
+ u32 dev_capabilities2;
+ u16 dev_control2;
+ u16 dev_status2;
+ u32 link_capabilities2;
+ u16 link_control2;
+ u16 link_status2;
+ u32 slot_capabilities2; u16 slot_control2;
+ u16 slot_status2;}) pcie_config_regs_t;
+/* *INDENT-ON* */
+
+/* PCI express extended capabilities. */
+typedef enum pcie_capability_type
+{
+ PCIE_CAP_ADVANCED_ERROR = 1,
+ PCIE_CAP_VC = 2,
+ PCIE_CAP_DSN = 3,
+ PCIE_CAP_PWR = 4,
+} pcie_capability_type_t;
+
+/* Common header for capabilities. */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+enum pcie_capability_type type:16; u16 version: 4; u16 next_capability:12;})
+ /* *INDENT-ON* */
+pcie_capability_regs_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pcie_capability_regs_t header; u32 uncorrectable_status;
+#define PCIE_ERROR_UNC_LINK_TRAINING (1 << 0)
+#define PCIE_ERROR_UNC_DATA_LINK_PROTOCOL (1 << 4)
+#define PCIE_ERROR_UNC_SURPRISE_DOWN (1 << 5)
+#define PCIE_ERROR_UNC_POISONED_TLP (1 << 12)
+#define PCIE_ERROR_UNC_FLOW_CONTROL (1 << 13)
+#define PCIE_ERROR_UNC_COMPLETION_TIMEOUT (1 << 14)
+#define PCIE_ERROR_UNC_COMPLETER_ABORT (1 << 15)
+#define PCIE_ERROR_UNC_UNEXPECTED_COMPLETION (1 << 16)
+#define PCIE_ERROR_UNC_RX_OVERFLOW (1 << 17)
+#define PCIE_ERROR_UNC_MALFORMED_TLP (1 << 18)
+#define PCIE_ERROR_UNC_CRC_ERROR (1 << 19)
+#define PCIE_ERROR_UNC_UNSUPPORTED_REQUEST (1 << 20)
+ u32 uncorrectable_mask;
+ u32 uncorrectable_severity; u32 correctable_status;
+#define PCIE_ERROR_COR_RX_ERROR (1 << 0)
+#define PCIE_ERROR_COR_BAD_TLP (1 << 6)
+#define PCIE_ERROR_COR_BAD_DLLP (1 << 7)
+#define PCIE_ERROR_COR_REPLAY_ROLLOVER (1 << 8)
+#define PCIE_ERROR_COR_REPLAY_TIMER (1 << 12)
+#define PCIE_ERROR_COR_ADVISORY (1 << 13)
+ u32 correctable_mask;
+ u32 control;
+ u32 log[4];
+ u32 root_command;
+ u32 root_status; u16 correctable_error_source;
+ u16 error_source;}) pcie_advanced_error_regs_t;
+/* *INDENT-ON* */
+
+/* Virtual Channel */
+#define PCI_VC_PORT_REG1 4
+#define PCI_VC_PORT_REG2 8
+#define PCI_VC_PORT_CTRL 12
+#define PCI_VC_PORT_STATUS 14
+#define PCI_VC_RES_CAP 16
+#define PCI_VC_RES_CTRL 20
+#define PCI_VC_RES_STATUS 26
+
+/* Power Budgeting */
+#define PCI_PWR_DSR 4 /* Data Select Register */
+#define PCI_PWR_DATA 8 /* Data Register */
+#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */
+#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */
+#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */
+#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
+#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */
+#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */
+#define PCI_PWR_CAP 12 /* Capability */
+#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+
+#endif /* included_vlib_pci_config_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/physmem.h b/src/vlib/physmem.h
new file mode 100644
index 00000000..a7fed124
--- /dev/null
+++ b/src/vlib/physmem.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.h: virtual <-> physical memory mapping for VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_physmem_h
+#define included_vlib_physmem_h
+
+typedef u8 vlib_physmem_region_index_t;
+
+typedef struct
+{
+ vlib_physmem_region_index_t index;
+ void *mem;
+ uword size;
+ int fd;
+ u8 log2_page_size;
+ u16 n_pages;
+ u32 page_mask;
+
+ void *heap;
+ u32 flags;
+#define VLIB_PHYSMEM_F_INIT_MHEAP (1<<0)
+#define VLIB_PHYSMEM_F_HAVE_BUFFERS (1<<1)
+#define VLIB_PHYSMEM_F_FAKE (1<<2)
+
+ u8 numa_node;
+ u64 *page_table;
+ u8 *name;
+} vlib_physmem_region_t;
+
+
+
+typedef struct
+{
+ vlib_physmem_region_t *regions;
+} vlib_physmem_main_t;
+
+#endif /* included_vlib_physmem_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/physmem_funcs.h b/src/vlib/physmem_funcs.h
new file mode 100644
index 00000000..dbb8d9de
--- /dev/null
+++ b/src/vlib/physmem_funcs.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.h: virtual <-> physical memory mapping for VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_physmem_funcs_h
+#define included_vlib_physmem_funcs_h
+
+always_inline vlib_physmem_region_t *
+vlib_physmem_get_region (vlib_main_t * vm, u8 index)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ return pool_elt_at_index (vpm->regions, index);
+}
+
+always_inline u64
+vlib_physmem_offset_to_physical (vlib_main_t * vm,
+ vlib_physmem_region_index_t idx, uword o)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ uword page_index = o >> pr->log2_page_size;
+ ASSERT (o < pr->size);
+ ASSERT (pr->page_table[page_index] != 0);
+ return (vec_elt (pr->page_table, page_index) + (o & pr->page_mask));
+}
+
+always_inline int
+vlib_physmem_is_virtual (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ uword p)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ return p >= pointer_to_uword (pr->mem)
+ && p < (pointer_to_uword (pr->mem) + pr->size);
+}
+
+always_inline uword
+vlib_physmem_offset_of (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ void *p)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ uword a = pointer_to_uword (p);
+ uword o;
+
+ ASSERT (vlib_physmem_is_virtual (vm, idx, a));
+ o = a - pointer_to_uword (pr->mem);
+
+ /* Offset must fit in 32 bits. */
+ ASSERT ((uword) o == a - pointer_to_uword (pr->mem));
+
+ return o;
+}
+
+always_inline void *
+vlib_physmem_at_offset (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ uword offset)
+{
+ vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
+ ASSERT (offset < pr->size);
+ return uword_to_pointer (pointer_to_uword (pr->mem) + offset, void *);
+}
+
+always_inline void *
+vlib_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ clib_error_t ** error,
+ uword n_bytes, uword alignment)
+{
+ void *r = vm->os_physmem_alloc_aligned (vm, idx, n_bytes, alignment);
+ if (!r)
+ *error =
+ clib_error_return (0, "failed to allocate %wd bytes of I/O memory",
+ n_bytes);
+ else
+ *error = 0;
+ return r;
+}
+
+/* By default allocate I/O memory with cache line alignment. */
+always_inline void *
+vlib_physmem_alloc (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ clib_error_t ** error, uword n_bytes)
+{
+ return vlib_physmem_alloc_aligned (vm, idx, error, n_bytes,
+ CLIB_CACHE_LINE_BYTES);
+}
+
+always_inline void
+vlib_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx,
+ void *mem)
+{
+ return vm->os_physmem_free (vm, idx, mem);
+}
+
+always_inline u64
+vlib_physmem_virtual_to_physical (vlib_main_t * vm,
+ vlib_physmem_region_index_t idx, void *mem)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ vlib_physmem_region_t *pr = pool_elt_at_index (vpm->regions, idx);
+ uword o = mem - pr->mem;
+ return vlib_physmem_offset_to_physical (vm, idx, o);
+}
+
+
+always_inline clib_error_t *
+vlib_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
+ u8 numa_node, u32 flags,
+ vlib_physmem_region_index_t * idx)
+{
+ return vm->os_physmem_region_alloc (vm, name, size, numa_node, flags, idx);
+}
+
+always_inline void
+vlib_physmem_region_free (struct vlib_main_t *vm,
+ vlib_physmem_region_index_t idx)
+{
+ vm->os_physmem_region_free (vm, idx);
+}
+
+#endif /* included_vlib_physmem_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
new file mode 100644
index 00000000..be8daa64
--- /dev/null
+++ b/src/vlib/threads.c
@@ -0,0 +1,1820 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE
+
+#include <signal.h>
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vlib/unix/cj.h>
+
+DECLARE_CJ_GLOBAL_LOG;
+
+#define FRAME_QUEUE_NELTS 32
+
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+vlib_worker_thread_t *vlib_worker_threads;
+vlib_thread_main_t vlib_thread_main;
+
+/*
+ * Barrier tracing can be enabled on a normal build to collect information
+ * on barrier use, including timings and call stacks. Deliberately not
+ * keyed off CLIB_DEBUG, because that can add significant overhead which
+ * imapacts observed timings.
+ */
+
+#ifdef BARRIER_TRACING
+ /*
+ * Output of barrier tracing can be to syslog or elog as suits
+ */
+#ifdef BARRIER_TRACING_ELOG
+static u32
+elog_id_for_msg_name (const char *msg_name)
+{
+ uword *p, r;
+ static uword *h;
+ u8 *name_copy;
+
+ if (!h)
+ h = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+
+ return r;
+}
+
+ /*
+ * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't
+ * defined
+ */
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d#%s(O:%dus:%dus)(%dus)",
+ .format_args = "i4T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 count, caller, t_entry, t_open, t_closed;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_open = (int) (1000000.0 * t_open);
+ ed->t_closed = (int) (1000000.0 * t_closed);
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d(%dus)%s",
+ .format_args = "i4i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 depth, t_entry, caller;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus)%d>",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, depth;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level;
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus){%d}(C:%dus)#%d>",
+ .format_args = "i4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, t_update_main, t_closed_total, count;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_update_main = (int) (1000000.0 * t_update_main);
+ ed->t_closed_total = (int) (1000000.0 * t_closed_total);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+
+ /* Reset context for next trace */
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#else
+char barrier_trace[65536];
+char *btp = barrier_trace;
+
+ /*
+ * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING
+ * isn't defined
+ */
+
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ btp += sprintf (btp, "<%u#%s",
+ (unsigned int) vlib_worker_threads[0].barrier_sync_count,
+ vlib_worker_threads[0].barrier_caller);
+
+ if (vlib_worker_threads[0].barrier_context)
+ {
+ btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context);
+
+ }
+
+ btp += sprintf (btp, "(O:%dus:%dus)(%dus):",
+ (int) (1000000.0 * t_entry),
+ (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed));
+
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ btp += sprintf (btp, "<%u(%dus)%s:",
+ (int) vlib_worker_threads[0].recursion_level - 1,
+ (int) (1000000.0 * t_entry),
+ vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry),
+ (int) vlib_worker_threads[0].recursion_level);
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+
+ btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry));
+ if (t_update_main > 0)
+ {
+ btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main));
+ }
+
+ btp += sprintf (btp, "(C:%dus)#%u>",
+ (int) (1000000.0 * t_closed_total),
+ (int) vlib_worker_threads[0].barrier_sync_count);
+
+ /* Dump buffer to syslog, and reset for next trace */
+ fformat (stderr, "BTRC %s\n", barrier_trace);
+ btp = barrier_trace;
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#endif
+#else
+
+ /* Null functions for default case where barrier tracing isn't used */
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+}
+#endif
+
+uword
+os_get_nthreads (void)
+{
+ u32 len;
+
+ len = vec_len (vlib_thread_stacks);
+ if (len == 0)
+ return 1;
+ else
+ return len;
+}
+
+void
+vlib_set_thread_name (char *name)
+{
+ int pthread_setname_np (pthread_t __target_thread, const char *__name);
+ int rv;
+ pthread_t thread = pthread_self ();
+
+ if (thread)
+ {
+ rv = pthread_setname_np (thread, name);
+ if (rv)
+ clib_warning ("pthread_setname_np returned %d", rv);
+ }
+}
+
+static int
+sort_registrations_by_no_clone (void *a0, void *a1)
+{
+ vlib_thread_registration_t **tr0 = a0;
+ vlib_thread_registration_t **tr1 = a1;
+
+ return ((i32) ((*tr0)->no_data_structure_clone)
+ - ((i32) ((*tr1)->no_data_structure_clone)));
+}
+
+static uword *
+clib_sysfs_list_to_bitmap (char *filename)
+{
+ FILE *fp;
+ uword *r = 0;
+
+ fp = fopen (filename, "r");
+
+ if (fp != NULL)
+ {
+ u8 *buffer = 0;
+ vec_validate (buffer, 256 - 1);
+ if (fgets ((char *) buffer, 256, fp))
+ {
+ unformat_input_t in;
+ unformat_init_string (&in, (char *) buffer,
+ strlen ((char *) buffer));
+ if (unformat (&in, "%U", unformat_bitmap_list, &r) != 1)
+ clib_warning ("unformat_bitmap_list failed");
+ unformat_free (&in);
+ }
+ vec_free (buffer);
+ fclose (fp);
+ }
+ return r;
+}
+
+
+/* Called early in the init sequence */
+
+clib_error_t *
+vlib_thread_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_worker_thread_t *w;
+ vlib_thread_registration_t *tr;
+ u32 n_vlib_mains = 1;
+ u32 first_index = 1;
+ u32 i;
+ uword *avail_cpu;
+
+ /* get bitmaps of active cpu cores and sockets */
+ tm->cpu_core_bitmap =
+ clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
+ tm->cpu_socket_bitmap =
+ clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
+
+ avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
+
+ /* skip cores */
+ for (i = 0; i < tm->skip_cores; i++)
+ {
+ uword c = clib_bitmap_first_set (avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0, "no available cpus to skip");
+
+ avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ }
+
+ /* grab cpu for main thread */
+ if (!tm->main_lcore)
+ {
+ tm->main_lcore = clib_bitmap_first_set (avail_cpu);
+ if (tm->main_lcore == (u8) ~ 0)
+ return clib_error_return (0, "no available cpus to be used for the"
+ " main thread");
+ }
+ else
+ {
+ if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the main thread", tm->main_lcore);
+ }
+ avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
+
+ /* assume that there is socket 0 only if there is no data from sysfs */
+ if (!tm->cpu_socket_bitmap)
+ tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
+
+ /* pin main thread to main_lcore */
+ if (tm->cb.vlib_thread_set_lcore_cb)
+ {
+ tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
+ }
+ else
+ {
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ CPU_SET (tm->main_lcore, &cpuset);
+ pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+ }
+
+ /* as many threads as stacks... */
+ vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ /* Preallocate thread 0 */
+ _vec_len (vlib_worker_threads) = 1;
+ w = vlib_worker_threads;
+ w->thread_mheap = clib_mem_get_heap ();
+ w->thread_stack = vlib_thread_stacks[0];
+ w->lcore_id = tm->main_lcore;
+ w->lwp = syscall (SYS_gettid);
+ w->thread_id = pthread_self ();
+ tm->n_vlib_mains = 1;
+
+ if (tm->sched_policy != ~0)
+ {
+ struct sched_param sched_param;
+ if (!sched_getparam (w->lwp, &sched_param))
+ {
+ if (tm->sched_priority != ~0)
+ sched_param.sched_priority = tm->sched_priority;
+ sched_setscheduler (w->lwp, tm->sched_policy, &sched_param);
+ }
+ }
+
+ /* assign threads to cores and set n_vlib_mains */
+ tr = tm->next;
+
+ while (tr)
+ {
+ vec_add1 (tm->registrations, tr);
+ tr = tr->next;
+ }
+
+ vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone);
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ int j;
+ tr = tm->registrations[i];
+ tr->first_index = first_index;
+ first_index += tr->count;
+ n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
+
+ /* construct coremask */
+ if (tr->use_pthreads || !tr->count)
+ continue;
+
+ if (tr->coremask)
+ {
+ uword c;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tr->coremask, ({
+ if (clib_bitmap_get(avail_cpu, c) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the '%s' thread",c, tr->name);
+
+ avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
+ }));
+/* *INDENT-ON* */
+
+ }
+ else
+ {
+ for (j = 0; j < tr->count; j++)
+ {
+ uword c = clib_bitmap_first_set (avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0,
+ "no available cpus to be used for"
+ " the '%s' thread", tr->name);
+
+ avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
+ }
+ }
+ }
+
+ clib_bitmap_free (avail_cpu);
+
+ tm->n_vlib_mains = n_vlib_mains;
+
+ vec_validate_aligned (vlib_worker_threads, first_index - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+vlib_frame_queue_t *
+vlib_frame_queue_alloc (int nelts)
+{
+ vlib_frame_queue_t *fq;
+
+ fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES);
+ memset (fq, 0, sizeof (*fq));
+ fq->nelts = nelts;
+ fq->vector_threshold = 128; // packets
+ vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES);
+
+ if (1)
+ {
+ if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->tail unaligned\n");
+ if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->head unaligned\n");
+ if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->elts unaligned\n");
+
+ if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
+ fformat (stderr, "WARNING: fq->elts[0] size %d\n",
+ sizeof (fq->elts[0]));
+ if (nelts & (nelts - 1))
+ {
+ fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
+ abort ();
+ }
+ }
+
+ return (fq);
+}
+
+void vl_msg_api_handler_no_free (void *) __attribute__ ((weak));
+void
+vl_msg_api_handler_no_free (void *v)
+{
+}
+
+/* Turned off, save as reference material... */
+#if 0
+static inline int
+vlib_frame_queue_dequeue_internal (int thread_id,
+ vlib_main_t * vm, vlib_node_main_t * nm)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ vlib_frame_t *f;
+ vlib_pending_frame_t *p;
+ vlib_node_runtime_t *r;
+ u32 node_runtime_index;
+ int msg_type;
+ u64 before;
+ int processed = 0;
+
+ ASSERT (vm == vlib_mains[thread_id]);
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
+ return processed;
+
+ elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+ if (!elt->valid)
+ return processed;
+
+ before = clib_cpu_time_now ();
+
+ f = elt->frame;
+ node_runtime_index = elt->node_runtime_index;
+ msg_type = elt->msg_type;
+
+ switch (msg_type)
+ {
+ case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
+ vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors);
+ /* note fallthrough... */
+ case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
+ r = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ node_runtime_index);
+ vlib_frame_free (vm, r, f);
+ break;
+ case VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME:
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+ f->flags |= (VLIB_FRAME_PENDING | VLIB_FRAME_FREE_AFTER_DISPATCH);
+ p->node_runtime_index = elt->node_runtime_index;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+ fq->dequeue_vectors += (u64) f->n_vectors;
+ break;
+ case VLIB_FRAME_QUEUE_ELT_API_MSG:
+ vl_msg_api_handler_no_free (f);
+ break;
+ default:
+ clib_warning ("bogus frame queue message, type %d", msg_type);
+ break;
+ }
+ elt->valid = 0;
+ fq->dequeues++;
+ fq->dequeue_ticks += clib_cpu_time_now () - before;
+ CLIB_MEMORY_BARRIER ();
+ fq->head++;
+ processed++;
+ }
+ ASSERT (0);
+ return processed;
+}
+
+int
+vlib_frame_queue_dequeue (int thread_id,
+ vlib_main_t * vm, vlib_node_main_t * nm)
+{
+ return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
+}
+
+int
+vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t * frame,
+ vlib_frame_queue_msg_type_t type)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index];
+ vlib_frame_queue_elt_t *elt;
+ u32 save_count;
+ u64 new_tail;
+ u64 before = clib_cpu_time_now ();
+
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head + fq->nelts)
+ {
+ f64 b4 = vlib_time_now_ticks (vm, before);
+ vlib_worker_thread_barrier_check (vm, b4);
+ /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
+ // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
+ }
+
+ elt = fq->elts + (new_tail & (fq->nelts - 1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ {
+ }
+
+ /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */
+ save_count = frame->n_vectors;
+
+ elt->frame = frame;
+ elt->node_runtime_index = node_runtime_index;
+ elt->msg_type = type;
+ CLIB_MEMORY_BARRIER ();
+ elt->valid = 1;
+
+ return save_count;
+}
+#endif /* 0 */
+
+/* To be called by vlib worker threads upon startup */
+void
+vlib_worker_thread_init (vlib_worker_thread_t * w)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ /*
+ * Note: disabling signals in worker threads as follows
+ * prevents the api post-mortem dump scheme from working
+ * {
+ * sigset_t s;
+ * sigfillset (&s);
+ * pthread_sigmask (SIG_SETMASK, &s, 0);
+ * }
+ */
+
+ clib_mem_set_heap (w->thread_mheap);
+
+ if (vec_len (tm->thread_prefix) && w->registration->short_name)
+ {
+ w->name = format (0, "%v_%s_%d%c", tm->thread_prefix,
+ w->registration->short_name, w->instance_id, '\0');
+ vlib_set_thread_name ((char *) w->name);
+ }
+
+ if (!w->registration->use_pthreads)
+ {
+
+ /* Initial barrier sync, for both worker and i/o threads */
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+ }
+}
+
+void *
+vlib_worker_thread_bootstrap_fn (void *arg)
+{
+ void *rv;
+ vlib_worker_thread_t *w = arg;
+
+ w->lwp = syscall (SYS_gettid);
+ w->thread_id = pthread_self ();
+
+ __os_thread_index = w - vlib_worker_threads;
+
+ rv = (void *) clib_calljmp
+ ((uword (*)(uword)) w->thread_function,
+ (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
+ /* NOTREACHED, we hope */
+ return rv;
+}
+
+static clib_error_t *
+vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
+{
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ void *(*fp_arg) (void *) = fp;
+
+ w->lcore_id = lcore_id;
+ if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads)
+ return tm->cb.vlib_launch_thread_cb (fp, (void *) w, lcore_id);
+ else
+ {
+ pthread_t worker;
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ CPU_SET (lcore_id, &cpuset);
+
+ if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w))
+ return clib_error_return_unix (0, "pthread_create");
+
+ if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset))
+ return clib_error_return_unix (0, "pthread_setaffinity_np");
+
+ return 0;
+ }
+}
+
+static clib_error_t *
+start_workers (vlib_main_t * vm)
+{
+ int i, j;
+ vlib_worker_thread_t *w;
+ vlib_main_t *vm_clone;
+ void *oldheap;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_thread_registration_t *tr;
+ vlib_node_runtime_t *rt;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+ u32 worker_thread_index;
+ u8 *main_heap = clib_mem_get_per_cpu_heap ();
+ mheap_t *main_heap_header = mheap_header (main_heap);
+
+ vec_reset_length (vlib_worker_threads);
+
+ /* Set up the main thread */
+ vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
+ w->elog_track.name = "main thread";
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (vec_len (tm->thread_prefix))
+ {
+ w->name = format (0, "%v_main%c", tm->thread_prefix, '\0');
+ vlib_set_thread_name ((char *) w->name);
+ }
+
+ /*
+ * Truth of the matter: we always use at least two
+ * threads. So, make the main heap thread-safe
+ * and make the event log thread-safe.
+ */
+ main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
+ vm->elog_main.lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ vm->elog_main.lock[0] = 0;
+
+ if (n_vlib_mains > 1)
+ {
+ /* Replace hand-crafted length-1 vector with a real vector */
+ vlib_mains = 0;
+
+ vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ _vec_len (vlib_mains) = 0;
+ vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES);
+
+ vlib_worker_threads->wait_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+ vlib_worker_threads->workers_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
+ vlib_worker_threads->node_reforks_required =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
+ /* Ask for an initial barrier sync */
+ *vlib_worker_threads->workers_at_barrier = 0;
+ *vlib_worker_threads->wait_at_barrier = 1;
+
+ /* Without update or refork */
+ *vlib_worker_threads->node_reforks_required = 0;
+ vm->need_vlib_worker_thread_node_runtime_update = 0;
+
+ /* init timing */
+ vm->barrier_epoch = 0;
+ vm->barrier_no_close_before = 0;
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_buffer_main_t *bm_clone;
+ vlib_buffer_free_list_t *fl_clone, *fl_orig;
+ vlib_buffer_free_list_t *orig_freelist_pool;
+ int k;
+
+ tr = tm->registrations[i];
+
+ if (tr->count == 0)
+ continue;
+
+ for (k = 0; k < tr->count; k++)
+ {
+ vlib_node_t *n;
+
+ vec_add2 (vlib_worker_threads, w, 1);
+ if (tr->mheap_size)
+ w->thread_mheap =
+ mheap_alloc (0 /* use VM */ , tr->mheap_size);
+ else
+ w->thread_mheap = main_heap;
+
+ w->thread_stack =
+ vlib_thread_stack_init (w - vlib_worker_threads);
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = k;
+ w->registration = tr;
+
+ w->elog_track.name =
+ (char *) format (0, "%s %d", tr->name, k + 1);
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (tr->no_data_structure_clone)
+ continue;
+
+ /* Fork vlib_global_main et al. Look for bugs here */
+ oldheap = clib_mem_set_heap (w->thread_mheap);
+
+ vm_clone = clib_mem_alloc (sizeof (*vm_clone));
+ clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
+
+ vm_clone->thread_index = worker_thread_index;
+ vm_clone->heap_base = w->thread_mheap;
+ vm_clone->mbuf_alloc_list = 0;
+ vm_clone->init_functions_called =
+ hash_create (0, /* value bytes */ 0);
+ memset (&vm_clone->random_buffer, 0,
+ sizeof (vm_clone->random_buffer));
+
+ nm = &vlib_mains[0]->node_main;
+ nm_clone = &vm_clone->node_main;
+ /* fork next frames array, preserving node runtime indices */
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+ u32 save_flags;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ nf->flags = save_flags;
+ }
+
+ /* fork the frame dispatch queue */
+ nm_clone->pending_frames = 0;
+ vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
+ _vec_len (nm_clone->pending_frames) = 0;
+
+ /* fork nodes */
+ nm_clone->nodes = 0;
+
+ /* Allocate all nodes in single block for speed */
+ n = clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*n));
+
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ clib_memcpy (n, nm->nodes[j], sizeof (*n));
+ /* none of the copied nodes have enqueue rights given out */
+ n->owner_node_index = VLIB_INVALID_NODE_INDEX;
+ memset (&n->stats_total, 0, sizeof (n->stats_total));
+ memset (&n->stats_last_clear, 0,
+ sizeof (n->stats_last_clear));
+ vec_add1 (nm_clone->nodes, n);
+ n++;
+ }
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+ vec_foreach (rt,
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy initial runtime_data from node */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy initial runtime_data from node */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ nm_clone->processes = vec_dup (nm->processes);
+
+ /* zap the (per worker) frame freelists, etc */
+ nm_clone->frame_sizes = 0;
+ nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
+
+ /* Packet trace buffers are guaranteed to be empty, nothing to do here */
+
+ clib_mem_set_heap (oldheap);
+ vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES);
+
+ vm_clone->error_main.counters =
+ vec_dup (vlib_mains[0]->error_main.counters);
+ vm_clone->error_main.counters_last_clear =
+ vec_dup (vlib_mains[0]->error_main.counters_last_clear);
+
+ /* Fork the vlib_buffer_main_t free lists, etc. */
+ bm_clone = vec_dup (vm_clone->buffer_main);
+ vm_clone->buffer_main = bm_clone;
+
+ orig_freelist_pool = bm_clone->buffer_free_list_pool;
+ bm_clone->buffer_free_list_pool = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fl_orig, orig_freelist_pool,
+ ({
+ pool_get_aligned (bm_clone->buffer_free_list_pool,
+ fl_clone, CLIB_CACHE_LINE_BYTES);
+ ASSERT (fl_orig - orig_freelist_pool
+ == fl_clone - bm_clone->buffer_free_list_pool);
+
+ fl_clone[0] = fl_orig[0];
+ fl_clone->buffers = 0;
+ fl_clone->n_alloc = 0;
+ }));
+/* *INDENT-ON* */
+
+ worker_thread_index++;
+ }
+ }
+ }
+ else
+ {
+ /* only have non-data-structure copy threads to create... */
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ tr = tm->registrations[i];
+
+ for (j = 0; j < tr->count; j++)
+ {
+ vec_add2 (vlib_worker_threads, w, 1);
+ if (tr->mheap_size)
+ w->thread_mheap =
+ mheap_alloc (0 /* use VM */ , tr->mheap_size);
+ else
+ w->thread_mheap = main_heap;
+ w->thread_stack =
+ vlib_thread_stack_init (w - vlib_worker_threads);
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = j;
+ w->elog_track.name =
+ (char *) format (0, "%s %d", tr->name, j + 1);
+ w->registration = tr;
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+ }
+ }
+ }
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ clib_error_t *err;
+ int j;
+
+ tr = tm->registrations[i];
+
+ if (tr->use_pthreads || tm->use_pthreads)
+ {
+ for (j = 0; j < tr->count; j++)
+ {
+ w = vlib_worker_threads + worker_thread_index++;
+ err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
+ w, 0);
+ if (err)
+ clib_error_report (err);
+ }
+ }
+ else
+ {
+ uword c;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tr->coremask, ({
+ w = vlib_worker_threads + worker_thread_index++;
+ err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
+ w, c);
+ if (err)
+ clib_error_report (err);
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ vlib_worker_thread_barrier_sync (vm);
+ vlib_worker_thread_barrier_release (vm);
+ return 0;
+}
+
+VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
+
+
+static inline void
+worker_thread_node_runtime_update_internal (void)
+{
+ int i, j;
+ vlib_main_t *vm;
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_main_t *vm_clone;
+ vlib_node_runtime_t *rt;
+ never_inline void
+ vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls,
+ uword n_vectors, uword n_clocks);
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ vm = vlib_mains[0];
+ nm = &vm->node_main;
+
+ ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
+
+ /*
+ * Scrape all runtime stats, so we don't lose node runtime(s) with
+ * pending counts, or throw away worker / io thread counts.
+ */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *n;
+ n = nm->nodes[j];
+ vlib_node_sync_stats (vm, n);
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_node_t *n;
+
+ vm_clone = vlib_mains[i];
+ nm_clone = &vm_clone->node_main;
+
+ for (j = 0; j < vec_len (nm_clone->nodes); j++)
+ {
+ n = nm_clone->nodes[j];
+
+ rt = vlib_node_get_runtime (vm_clone, n->index);
+ vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0);
+ }
+ }
+
+ /* Per-worker clone rebuilds are now done on each thread */
+}
+
+
+void
+vlib_worker_thread_node_refork (void)
+{
+ vlib_main_t *vm, *vm_clone;
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_node_t **old_nodes_clone;
+ vlib_node_runtime_t *rt, *old_rt;
+
+ vlib_node_t *new_n_clone;
+
+ int j;
+
+ vm = vlib_mains[0];
+ nm = &vm->node_main;
+ vm_clone = vlib_get_main ();
+ nm_clone = &vm_clone->node_main;
+
+ /* Re-clone error heap */
+ u64 *old_counters = vm_clone->error_main.counters;
+ u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
+
+ clib_memcpy (&vm_clone->error_main, &vm->error_main,
+ sizeof (vm->error_main));
+ j = vec_len (vm->error_main.counters) - 1;
+ vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
+ vm_clone->error_main.counters = old_counters;
+ vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+
+ nm_clone = &vm_clone->node_main;
+ vec_free (nm_clone->next_frames);
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+ u32 save_flags;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ nf->flags = save_flags;
+ }
+
+ old_nodes_clone = nm_clone->nodes;
+ nm_clone->nodes = 0;
+
+ /* re-fork nodes */
+
+ /* Allocate all nodes in single block for speed */
+ new_n_clone =
+ clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*new_n_clone));
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *old_n_clone;
+ vlib_node_t *new_n;
+
+ new_n = nm->nodes[j];
+ old_n_clone = old_nodes_clone[j];
+
+ clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
+ /* none of the copied nodes have enqueue rights given out */
+ new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+
+ if (j >= vec_len (old_nodes_clone))
+ {
+ /* new node, set to zero */
+ memset (&new_n_clone->stats_total, 0,
+ sizeof (new_n_clone->stats_total));
+ memset (&new_n_clone->stats_last_clear, 0,
+ sizeof (new_n_clone->stats_last_clear));
+ }
+ else
+ {
+ /* Copy stats if the old data is valid */
+ clib_memcpy (&new_n_clone->stats_total,
+ &old_n_clone->stats_total,
+ sizeof (new_n_clone->stats_total));
+ clib_memcpy (&new_n_clone->stats_last_clear,
+ &old_n_clone->stats_last_clear,
+ sizeof (new_n_clone->stats_last_clear));
+
+ /* keep previous node state */
+ new_n_clone->state = old_n_clone->state;
+ }
+ vec_add1 (nm_clone->nodes, new_n_clone);
+ new_n_clone++;
+ }
+ /* Free the old node clones */
+ clib_mem_free (old_nodes_clone[0]);
+
+ vec_free (old_nodes_clone);
+
+
+ /* re-clone internal nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
+
+ vec_free (old_rt);
+
+ /* re-clone input nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
+
+ vec_free (old_rt);
+
+ nm_clone->processes = vec_dup (nm->processes);
+}
+
+void
+vlib_worker_thread_node_runtime_update (void)
+{
+ /*
+ * Make a note that we need to do a node runtime update
+ * prior to releasing the barrier.
+ */
+ vlib_global_main.need_vlib_worker_thread_node_runtime_update = 1;
+}
+
+u32
+unformat_sched_policy (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = SCHED_POLICY_##f;
+ foreach_sched_policy
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+cpu_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ vlib_thread_registration_t *tr;
+ uword *p;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u8 *name;
+ u64 coremask;
+ uword *bitmap;
+ u32 count;
+
+ tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword));
+
+ tm->n_thread_stacks = 1; /* account for main thread */
+ tm->sched_policy = ~0;
+ tm->sched_priority = ~0;
+
+ tr = tm->next;
+
+ while (tr)
+ {
+ hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword) tr);
+ tr = tr->next;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "use-pthreads"))
+ tm->use_pthreads = 1;
+ else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
+ ;
+ else if (unformat (input, "main-core %u", &tm->main_lcore))
+ ;
+ else if (unformat (input, "skip-cores %u", &tm->skip_cores))
+ ;
+ else if (unformat (input, "coremask-%s %llx", &name, &coremask))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0,
+ "coremask cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = clib_bitmap_set_multiple
+ (tr->coremask, 0, coremask, BITS (coremask));
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else if (unformat (input, "corelist-%s %U", &name, unformat_bitmap_list,
+ &bitmap))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0,
+ "corelist cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = bitmap;
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else
+ if (unformat
+ (input, "scheduler-policy %U", unformat_sched_policy,
+ &tm->sched_policy))
+ ;
+ else if (unformat (input, "scheduler-priority %u", &tm->sched_priority))
+ ;
+ else if (unformat (input, "%s %u", &name, &count))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type 3 '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr->fixed_count)
+ return clib_error_return
+ (0, "number of %s threads not configurable", tr->name);
+ tr->count = count;
+ }
+ else
+ break;
+ }
+
+ if (tm->sched_priority != ~0)
+ {
+ if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR)
+ {
+ u32 prio_max = sched_get_priority_max (tm->sched_policy);
+ u32 prio_min = sched_get_priority_min (tm->sched_policy);
+ if (tm->sched_priority > prio_max)
+ tm->sched_priority = prio_max;
+ if (tm->sched_priority < prio_min)
+ tm->sched_priority = prio_min;
+ }
+ else
+ {
+ return clib_error_return
+ (0,
+ "scheduling priority (%d) is not allowed for `normal` scheduling policy",
+ tm->sched_priority);
+ }
+ }
+ tr = tm->next;
+
+ if (!tm->thread_prefix)
+ tm->thread_prefix = format (0, "vpp");
+
+ while (tr)
+ {
+ tm->n_thread_stacks += tr->count;
+ tm->n_pthreads += tr->count * tr->use_pthreads;
+ tm->n_threads += tr->count * (tr->use_pthreads == 0);
+ tr = tr->next;
+ }
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
+
+#if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
+void
+__sync_fetch_and_add_8 (void)
+{
+ fformat (stderr, "%s called\n", __FUNCTION__);
+ abort ();
+}
+
+void
+__sync_add_and_fetch_8 (void)
+{
+ fformat (stderr, "%s called\n", __FUNCTION__);
+ abort ();
+}
+#endif
+
+void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak));
+void
+vnet_main_fixup (vlib_fork_fixup_t which)
+{
+}
+
+void
+vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (vlib_mains == 0)
+ return;
+
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_worker_thread_barrier_sync (vm);
+
+ switch (which)
+ {
+ case VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX:
+ vnet_main_fixup (VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ vlib_worker_thread_barrier_release (vm);
+}
+
+ /*
+ * Enforce minimum open time to minimize packet loss due to Rx overflow,
+ * based on a test based heuristic that barrier should be open for at least
+ * 3 time as long as it is closed (with an upper bound of 1ms because by that
+ * point it is probably too late to make a difference)
+ */
+
+#ifndef BARRIER_MINIMUM_OPEN_LIMIT
+#define BARRIER_MINIMUM_OPEN_LIMIT 0.001
+#endif
+
+#ifndef BARRIER_MINIMUM_OPEN_FACTOR
+#define BARRIER_MINIMUM_OPEN_FACTOR 3
+#endif
+
+void
+vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
+{
+ f64 deadline;
+ f64 now;
+ f64 t_entry;
+ f64 t_open;
+ f64 t_closed;
+ u32 count;
+
+ if (vec_len (vlib_mains) < 2)
+ return;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+ count = vec_len (vlib_mains) - 1;
+
+ /* Record entry relative to last close */
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
+ /* Tolerate recursive calls */
+ if (++vlib_worker_threads[0].recursion_level > 1)
+ {
+ barrier_trace_sync_rec (t_entry);
+ return;
+ }
+
+ vlib_worker_threads[0].barrier_sync_count++;
+
+ /* Enforce minimum barrier open time to minimize packet loss */
+ ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT));
+ while ((now = vlib_time_now (vm)) < vm->barrier_no_close_before)
+ ;
+
+ /* Record time of closure */
+ t_open = now - vm->barrier_epoch;
+ vm->barrier_epoch = now;
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 1;
+ while (*vlib_worker_threads->workers_at_barrier != count)
+ {
+ if ((now = vlib_time_now (vm)) > deadline)
+ {
+ fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic ();
+ }
+ }
+
+ t_closed = now - vm->barrier_epoch;
+
+ barrier_trace_sync (t_entry, t_open, t_closed);
+
+}
+
+void
+vlib_worker_thread_barrier_release (vlib_main_t * vm)
+{
+ f64 deadline;
+ f64 now;
+ f64 minimum_open;
+ f64 t_entry;
+ f64 t_closed_total;
+ f64 t_update_main = 0.0;
+ int refork_needed = 0;
+
+ if (vec_len (vlib_mains) < 2)
+ return;
+
+ ASSERT (vlib_get_thread_index () == 0);
+
+
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
+ if (--vlib_worker_threads[0].recursion_level > 0)
+ {
+ barrier_trace_release_rec (t_entry);
+ return;
+ }
+
+ /* Update (all) node runtimes before releasing the barrier, if needed */
+ if (vm->need_vlib_worker_thread_node_runtime_update)
+ {
+ /* Do stats elements on main thread */
+ worker_thread_node_runtime_update_internal ();
+ vm->need_vlib_worker_thread_node_runtime_update = 0;
+
+ /* Do per thread rebuilds in parallel */
+ refork_needed = 1;
+ clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+ (vec_len (vlib_mains) - 1));
+ now = vlib_time_now (vm);
+ t_update_main = now - vm->barrier_epoch;
+ }
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 0;
+
+ while (*vlib_worker_threads->workers_at_barrier > 0)
+ {
+ if ((now = vlib_time_now (vm)) > deadline)
+ {
+ fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic ();
+ }
+ }
+
+ /* Wait for reforks before continuing */
+ if (refork_needed)
+ {
+ now = vlib_time_now (vm);
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
+
+ while (*vlib_worker_threads->node_reforks_required > 0)
+ {
+ if ((now = vlib_time_now (vm)) > deadline)
+ {
+ fformat (stderr, "%s: worker thread refork deadlock\n",
+ __FUNCTION__);
+ os_panic ();
+ }
+ }
+ }
+
+ t_closed_total = now - vm->barrier_epoch;
+
+ minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
+
+ if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
+ {
+ minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
+ }
+
+ vm->barrier_no_close_before = now + minimum_open;
+
+ /* Record barrier epoch (used to enforce minimum open time) */
+ vm->barrier_epoch = now;
+
+ barrier_trace_release (t_entry, t_closed_total, t_update_main);
+
+}
+
+/*
+ * Check the frame queue to see if any frames are available.
+ * If so, pull the packets off the frames and put them to
+ * the handoff node.
+ */
+int
+vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
+{
+ u32 thread_id = vm->thread_index;
+ vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ u32 *from, *to;
+ vlib_frame_t *f;
+ int msg_type;
+ int processed = 0;
+ u32 n_left_to_node;
+ u32 vectors = 0;
+
+ ASSERT (fq);
+ ASSERT (vm == vlib_mains[thread_id]);
+
+ if (PREDICT_FALSE (fqm->node_index == ~0))
+ return 0;
+ /*
+ * Gather trace data for frame queues
+ */
+ if (PREDICT_FALSE (fq->trace))
+ {
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 elix;
+
+ fqt = &fqm->frame_queue_traces[thread_id];
+
+ fqt->nelts = fq->nelts;
+ fqt->head = fq->head;
+ fqt->head_hint = fq->head_hint;
+ fqt->tail = fq->tail;
+ fqt->threshold = fq->vector_threshold;
+ fqt->n_in_use = fqt->tail - fqt->head;
+ if (fqt->n_in_use >= fqt->nelts)
+ {
+ // if beyond max then use max
+ fqt->n_in_use = fqt->nelts - 1;
+ }
+
+ /* Record the number of elements in use in the histogram */
+ fqh = &fqm->frame_queue_histogram[thread_id];
+ fqh->count[fqt->n_in_use]++;
+
+ /* Record a snapshot of the elements in use */
+ for (elix = 0; elix < fqt->nelts; elix++)
+ {
+ elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
+ if (1 || elt->valid)
+ {
+ fqt->n_vectors[elix] = elt->n_vectors;
+ }
+ }
+ fqt->written = 1;
+ }
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+
+ elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+ if (!elt->valid)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+
+ from = elt->buffer_index;
+ msg_type = elt->msg_type;
+
+ ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
+ ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
+
+ f = vlib_get_frame_to_node (vm, fqm->node_index);
+
+ to = vlib_frame_vector_args (f);
+
+ n_left_to_node = elt->n_vectors;
+
+ while (n_left_to_node >= 4)
+ {
+ to[0] = from[0];
+ to[1] = from[1];
+ to[2] = from[2];
+ to[3] = from[3];
+ to += 4;
+ from += 4;
+ n_left_to_node -= 4;
+ }
+
+ while (n_left_to_node > 0)
+ {
+ to[0] = from[0];
+ to++;
+ from++;
+ n_left_to_node--;
+ }
+
+ vectors += elt->n_vectors;
+ f->n_vectors = elt->n_vectors;
+ vlib_put_frame_to_node (vm, fqm->node_index, f);
+
+ elt->valid = 0;
+ elt->n_vectors = 0;
+ elt->msg_type = 0xfefefefe;
+ CLIB_MEMORY_BARRIER ();
+ fq->head++;
+ processed++;
+
+ /*
+ * Limit the number of packets pushed into the graph
+ */
+ if (vectors >= fq->vector_threshold)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+ }
+ ASSERT (0);
+ return processed;
+}
+
+void
+vlib_worker_thread_fn (void *arg)
+{
+ vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *e;
+
+ ASSERT (vm->thread_index == vlib_get_thread_index ());
+
+ vlib_worker_thread_init (w);
+ clib_time_init (&vm->clib_time);
+ clib_mem_set_heap (w->thread_mheap);
+
+ /* Wait until the dpdk init sequence is complete */
+ while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+
+ e = vlib_call_init_exit_functions
+ (vm, vm->worker_init_function_registrations, 1 /* call_once */ );
+ if (e)
+ clib_error_report (e);
+
+ vlib_worker_loop (vm);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
+ .name = "workers",
+ .short_name = "wk",
+ .function = vlib_worker_thread_fn,
+};
+/* *INDENT-ON* */
+
+u32
+vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ vlib_frame_queue_t *fq;
+ int i;
+
+ if (frame_queue_nelts == 0)
+ frame_queue_nelts = FRAME_QUEUE_NELTS;
+
+ vec_add2 (tm->frame_queue_mains, fqm, 1);
+
+ fqm->node_index = node_index;
+
+ vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
+ _vec_len (fqm->vlib_frame_queues) = 0;
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ fq = vlib_frame_queue_alloc (frame_queue_nelts);
+ vec_add1 (fqm->vlib_frame_queues, fq);
+ }
+
+ return (fqm - tm->frame_queue_mains);
+}
+
+int
+vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ if (tm->extern_thread_mgmt)
+ return -1;
+
+ tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb;
+ tm->extern_thread_mgmt = 1;
+ return 0;
+}
+
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+ args)
+{
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event (vlib_get_main (), args->node_index,
+ args->type_opaque, args->data);
+}
+
+void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size)
+{
+ if (rpc_call_main_thread_cb_fn)
+ {
+ void (*fp) (void *, u8 *, u32) = rpc_call_main_thread_cb_fn;
+ (*fp) (callback, args, arg_size);
+ }
+ else
+ clib_warning ("BUG: rpc_call_main_thread_cb_fn NULL!");
+}
+
+clib_error_t *
+threads_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (threads_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
new file mode 100644
index 00000000..8931584b
--- /dev/null
+++ b/src/vlib/threads.h
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_threads_h
+#define included_vlib_threads_h
+
+#include <vlib/main.h>
+#include <linux/sched.h>
+
+/*
+ * To enable detailed tracing of barrier usage, including call stacks and
+ * timings, define BARRIER_TRACING here or in relevant TAGS. If also used
+ * with CLIB_DEBUG, timing will _not_ be representative of normal code
+ * execution.
+ *
+ */
+
+// #define BARRIER_TRACING 1
+
+/*
+ * Two options for barrier tracing output: syslog & elog.
+ */
+
+// #define BARRIER_TRACING_ELOG 1
+
+extern vlib_main_t **vlib_mains;
+
+void vlib_set_thread_name (char *name);
+
+/* arg is actually a vlib__thread_t * */
+typedef void (vlib_thread_function_t) (void *arg);
+
+typedef struct vlib_thread_registration_
+{
+ /* constructor generated list of thread registrations */
+ struct vlib_thread_registration_ *next;
+
+ /* config parameters */
+ char *name;
+ char *short_name;
+ vlib_thread_function_t *function;
+ uword mheap_size;
+ int fixed_count;
+ u32 count;
+ int no_data_structure_clone;
+ u32 frame_queue_nelts;
+
+ /* All threads of this type run on pthreads */
+ int use_pthreads;
+ u32 first_index;
+ uword *coremask;
+} vlib_thread_registration_t;
+
+/*
+ * Frames have their cpu / vlib_main_t index in the low-order N bits
+ * Make VLIB_MAX_CPUS a power-of-two, please...
+ */
+
+#ifndef VLIB_MAX_CPUS
+#define VLIB_MAX_CPUS 256
+#endif
+
+#if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS
+#error Please increase number of per-cpu mheaps
+#endif
+
+#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */
+#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
+
+#define VLIB_LOG2_THREAD_STACK_SIZE (21)
+#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
+
+typedef enum
+{
+ VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
+} vlib_frame_queue_msg_type_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 valid;
+ u32 msg_type;
+ u32 n_vectors;
+ u32 last_n_vectors;
+
+ /* 256 * 4 = 1024 bytes, even mult of cache line size */
+ u32 buffer_index[VLIB_FRAME_SIZE];
+}
+vlib_frame_queue_elt_t;
+
+typedef struct
+{
+ /* First cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *wait_at_barrier;
+ volatile u32 *workers_at_barrier;
+
+ /* Second Cache Line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ void *thread_mheap;
+ u8 *thread_stack;
+ void (*thread_function) (void *);
+ void *thread_function_arg;
+ i64 recursion_level;
+ elog_track_t elog_track;
+ u32 instance_id;
+ vlib_thread_registration_t *registration;
+ u8 *name;
+ u64 barrier_sync_count;
+#ifdef BARRIER_TRACING
+ const char *barrier_caller;
+ const char *barrier_context;
+#endif
+ volatile u32 *node_reforks_required;
+
+ long lwp;
+ int lcore_id;
+ pthread_t thread_id;
+} vlib_worker_thread_t;
+
+extern vlib_worker_thread_t *vlib_worker_threads;
+
+typedef struct
+{
+ /* enqueue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u64 tail;
+ u64 enqueues;
+ u64 enqueue_ticks;
+ u64 enqueue_vectors;
+ u32 enqueue_full_events;
+
+ /* dequeue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ volatile u64 head;
+ u64 dequeues;
+ u64 dequeue_ticks;
+ u64 dequeue_vectors;
+ u64 trace;
+ u64 vector_threshold;
+
+ /* dequeue hint to enqueue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ volatile u64 head_hint;
+
+ /* read-only, constant, shared */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
+ vlib_frame_queue_elt_t *elts;
+ u32 nelts;
+}
+vlib_frame_queue_t;
+
+typedef struct
+{
+ u32 node_index;
+ vlib_frame_queue_t **vlib_frame_queues;
+
+ /* for frame queue tracing */
+ frame_queue_trace_t *frame_queue_traces;
+ frame_queue_nelt_counter_t *frame_queue_histogram;
+} vlib_frame_queue_main_t;
+
+typedef struct
+{
+ uword node_index;
+ uword type_opaque;
+ uword data;
+} vlib_process_signal_event_mt_args_t;
+
+/* Called early, in thread 0's context */
+clib_error_t *vlib_thread_init (vlib_main_t * vm);
+
+int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t * frame,
+ vlib_frame_queue_msg_type_t type);
+
+int
+vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
+
+void vlib_worker_thread_node_runtime_update (void);
+
+void vlib_create_worker_threads (vlib_main_t * vm, int n,
+ void (*thread_function) (void *));
+
+void vlib_worker_thread_init (vlib_worker_thread_t * w);
+u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
+
+/* Check for a barrier sync request every 30ms */
+#define BARRIER_SYNC_DELAY (0.030000)
+
+#if CLIB_DEBUG > 0
+/* long barrier timeout, for gdb... */
+#define BARRIER_SYNC_TIMEOUT (600.1)
+#else
+#define BARRIER_SYNC_TIMEOUT (1.0)
+#endif
+
+#ifdef BARRIER_TRACING
+#define vlib_worker_thread_barrier_sync(X) {vlib_worker_threads[0].barrier_caller=__FUNCTION__;vlib_worker_thread_barrier_sync_int(X);}
+#else
+#define vlib_worker_thread_barrier_sync(X) vlib_worker_thread_barrier_sync_int(X)
+#endif
+
+
+void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm);
+void vlib_worker_thread_barrier_release (vlib_main_t * vm);
+void vlib_worker_thread_node_refork (void);
+
+static_always_inline uword
+vlib_get_thread_index (void)
+{
+ return __os_thread_index;
+}
+
+always_inline void
+vlib_smp_unsafe_warning (void)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ if (vlib_get_thread_index ())
+ fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
+ }
+}
+
+typedef enum
+{
+ VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0,
+ VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX,
+} vlib_fork_fixup_t;
+
+void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
+
+#define foreach_vlib_main(body) \
+do { \
+ vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \
+ int ii; \
+ \
+ for (ii = 0; ii < vec_len (vlib_mains); ii++) \
+ { \
+ this_vlib_main = vlib_mains[ii]; \
+ ASSERT (ii == 0 || \
+ this_vlib_main->parked_at_barrier == 1); \
+ if (this_vlib_main) \
+ vec_add1 (__vlib_mains, this_vlib_main); \
+ } \
+ \
+ for (ii = 0; ii < vec_len (__vlib_mains); ii++) \
+ { \
+ this_vlib_main = __vlib_mains[ii]; \
+ /* body uses this_vlib_main... */ \
+ (body); \
+ } \
+ vec_free (__vlib_mains); \
+} while (0);
+
+#define foreach_sched_policy \
+ _(SCHED_OTHER, OTHER, "other") \
+ _(SCHED_BATCH, BATCH, "batch") \
+ _(SCHED_IDLE, IDLE, "idle") \
+ _(SCHED_FIFO, FIFO, "fifo") \
+ _(SCHED_RR, RR, "rr")
+
+typedef enum
+{
+#define _(v,f,s) SCHED_POLICY_##f = v,
+ foreach_sched_policy
+#undef _
+ SCHED_POLICY_N,
+} sched_policy_t;
+
+typedef struct
+{
+ clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w,
+ unsigned lcore_id);
+ clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 lcore);
+} vlib_thread_callbacks_t;
+
+typedef struct
+{
+ /* Link list of registrations, built by constructors */
+ vlib_thread_registration_t *next;
+
+ /* Vector of registrations, w/ non-data-structure clones at the top */
+ vlib_thread_registration_t **registrations;
+
+ uword *thread_registrations_by_name;
+
+ vlib_worker_thread_t *worker_threads;
+
+ /*
+ * Launch all threads as pthreads,
+ * not eal_rte_launch (strict affinity) threads
+ */
+ int use_pthreads;
+
+ /* Number of vlib_main / vnet_main clones */
+ u32 n_vlib_mains;
+
+ /* Number of thread stacks to create */
+ u32 n_thread_stacks;
+
+ /* Number of pthreads */
+ u32 n_pthreads;
+
+ /* Number of threads */
+ u32 n_threads;
+
+ /* Number of cores to skip, must match the core mask */
+ u32 skip_cores;
+
+ /* Thread prefix name */
+ u8 *thread_prefix;
+
+ /* main thread lcore */
+ u8 main_lcore;
+
+ /* Bitmap of available CPU cores */
+ uword *cpu_core_bitmap;
+
+ /* Bitmap of available CPU sockets (NUMA nodes) */
+ uword *cpu_socket_bitmap;
+
+ /* Worker handoff queues */
+ vlib_frame_queue_main_t *frame_queue_mains;
+
+ /* worker thread initialization barrier */
+ volatile u32 worker_thread_release;
+
+ /* scheduling policy */
+ u32 sched_policy;
+
+ /* scheduling policy priority */
+ u32 sched_priority;
+
+ /* callbacks */
+ vlib_thread_callbacks_t cb;
+ int extern_thread_mgmt;
+} vlib_thread_main_t;
+
+extern vlib_thread_main_t vlib_thread_main;
+
+#include <vlib/global_funcs.h>
+
+#define VLIB_REGISTER_THREAD(x,...) \
+ __VA_ARGS__ vlib_thread_registration_t x; \
+static void __vlib_add_thread_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_thread_registration_##x (void) \
+{ \
+ vlib_thread_main_t * tm = &vlib_thread_main; \
+ x.next = tm->next; \
+ tm->next = &x; \
+} \
+__VA_ARGS__ vlib_thread_registration_t x
+
+always_inline u32
+vlib_num_workers ()
+{
+ return vlib_thread_main.n_vlib_mains - 1;
+}
+
+always_inline u32
+vlib_get_worker_thread_index (u32 worker_index)
+{
+ return worker_index + 1;
+}
+
+always_inline u32
+vlib_get_worker_index (u32 thread_index)
+{
+ return thread_index - 1;
+}
+
+always_inline u32
+vlib_get_current_worker_index ()
+{
+ return vlib_get_thread_index () - 1;
+}
+
+static inline void
+vlib_worker_thread_barrier_check (void)
+{
+ if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
+ {
+ vlib_main_t *vm;
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+ if (CLIB_DEBUG > 0)
+ {
+ vm = vlib_get_main ();
+ vm->parked_at_barrier = 1;
+ }
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+ if (CLIB_DEBUG > 0)
+ vm->parked_at_barrier = 0;
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+
+ if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required))
+ {
+ vlib_worker_thread_node_refork ();
+ clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+ -1);
+ while (*vlib_worker_threads->node_reforks_required)
+ ;
+ }
+ }
+}
+
+always_inline vlib_main_t *
+vlib_get_worker_vlib_main (u32 worker_index)
+{
+ vlib_main_t *vm;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ ASSERT (worker_index < tm->n_vlib_mains - 1);
+ vm = vlib_mains[worker_index + 1];
+ ASSERT (vm);
+ return vm;
+}
+
+static inline void
+vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf)
+{
+ CLIB_MEMORY_BARRIER ();
+ hf->valid = 1;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index)
+{
+ vlib_frame_queue_t *fq;
+ vlib_frame_queue_elt_t *elt;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_frame_queue_main_t *fqm =
+ vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+ u64 new_tail;
+
+ fq = fqm->vlib_frame_queues[index];
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head_hint + fq->nelts)
+ vlib_worker_thread_barrier_check ();
+
+ elt = fq->elts + (new_tail & (fq->nelts - 1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ ;
+
+ elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
+ elt->last_n_vectors = elt->n_vectors = 0;
+
+ return elt;
+}
+
+static inline vlib_frame_queue_t *
+is_vlib_frame_queue_congested (u32 frame_queue_index,
+ u32 index,
+ u32 queue_hi_thresh,
+ vlib_frame_queue_t **
+ handoff_queue_by_worker_index)
+{
+ vlib_frame_queue_t *fq;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_frame_queue_main_t *fqm =
+ vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+ fq = handoff_queue_by_worker_index[index];
+ if (fq != (vlib_frame_queue_t *) (~0))
+ return fq;
+
+ fq = fqm->vlib_frame_queues[index];
+ ASSERT (fq);
+
+ if (PREDICT_FALSE (fq->tail >= (fq->head_hint + queue_hi_thresh)))
+ {
+ /* a valid entry in the array will indicate the queue has reached
+ * the specified threshold and is congested
+ */
+ handoff_queue_by_worker_index[index] = fq;
+ fq->enqueue_full_events++;
+ return fq;
+ }
+
+ return NULL;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
+ u32 vlib_worker_index,
+ vlib_frame_queue_elt_t **
+ handoff_queue_elt_by_worker_index)
+{
+ vlib_frame_queue_elt_t *elt;
+
+ if (handoff_queue_elt_by_worker_index[vlib_worker_index])
+ return handoff_queue_elt_by_worker_index[vlib_worker_index];
+
+ elt = vlib_get_frame_queue_elt (frame_queue_index, vlib_worker_index);
+
+ handoff_queue_elt_by_worker_index[vlib_worker_index] = elt;
+
+ return elt;
+}
+
+u8 *vlib_thread_stack_init (uword thread_index);
+int vlib_thread_cb_register (struct vlib_main_t *vm,
+ vlib_thread_callbacks_t * cb);
+extern void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+ args);
+void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
+
+#endif /* included_vlib_threads_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c
new file mode 100644
index 00000000..02bdea5c
--- /dev/null
+++ b/src/vlib/threads_cli.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE
+
+#include <vppinfra/format.h>
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vlib/unix/unix.h>
+
+static u8 *
+format_sched_policy_and_priority (u8 * s, va_list * args)
+{
+ long i = va_arg (*args, long);
+ struct sched_param sched_param;
+ u8 *t = 0;
+
+ switch (sched_getscheduler (i))
+ {
+#define _(v,f,str) case SCHED_POLICY_##f: t = (u8 *) str; break;
+ foreach_sched_policy
+#undef _
+ }
+ if (sched_getparam (i, &sched_param) == 0)
+ return format (s, "%s (%d)", t, sched_param.sched_priority);
+ else
+ return format (s, "%s (n/a)", t);
+}
+
+static clib_error_t *
+show_threads_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_worker_thread_t *w;
+ int i;
+
+ vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s",
+ "ID", "Name", "Type", "LWP", "Sched Policy (Priority)",
+ "lcore", "Core", "Socket", "State");
+
+#if !defined(__powerpc64__)
+ for (i = 0; i < vec_len (vlib_worker_threads); i++)
+ {
+ w = vlib_worker_threads + i;
+ u8 *line = NULL;
+
+ line = format (line, "%-7d%-20s%-12s%-8d",
+ i,
+ w->name ? w->name : (u8 *) "",
+ w->registration ? w->registration->name : "", w->lwp);
+
+ line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp);
+
+ int lcore = -1;
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ int ret = -1;
+
+ ret =
+ pthread_getaffinity_np (w->thread_id, sizeof (cpu_set_t), &cpuset);
+ if (!ret)
+ {
+ int c;
+ for (c = 0; c < CPU_SETSIZE; c++)
+ if (CPU_ISSET (c, &cpuset))
+ {
+ if (lcore > -1)
+ {
+ lcore = -2;
+ break;
+ }
+ lcore = c;
+ }
+ }
+ else
+ {
+ lcore = w->lcore_id;
+ }
+
+ if (lcore > -1)
+ {
+ const char *sys_cpu_path = "/sys/devices/system/cpu/cpu";
+ int socket_id = -1;
+ int core_id = -1;
+ u8 *p = 0;
+
+ p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0);
+ clib_sysfs_read ((char *) p, "%d", &core_id);
+
+ vec_reset_length (p);
+ p =
+ format (p,
+ "%s%u/topology/physical_package_id%c",
+ sys_cpu_path, lcore, 0);
+ clib_sysfs_read ((char *) p, "%d", &socket_id);
+ vec_free (p);
+
+ line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id);
+ }
+ else
+ {
+ line =
+ format (line, "%-7s%-7s%-7s%", (lcore == -2) ? "M" : "n/a", "n/a",
+ "n/a");
+ }
+
+ vlib_cli_output (vm, "%v", line);
+ vec_free (line);
+ }
+#endif
+
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_threads_command, static) = {
+ .path = "show threads",
+ .short_help = "Show threads",
+ .function = show_threads_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * Trigger threads to grab frame queue trace data
+ */
+static clib_error_t *
+trace_frame_queue (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ u32 num_fq;
+ u32 fqix;
+ u32 enable = 2;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ enable = 1;
+ else if (unformat (line_input, "off"))
+ enable = 0;
+ else if (unformat (line_input, "index %u", &index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (enable > 1)
+ {
+ error = clib_error_return (0, "expecting on or off");
+ goto done;
+ }
+
+ if (vec_len (tm->frame_queue_mains) == 0)
+ {
+ error = clib_error_return (0, "no worker handoffs exist");
+ goto done;
+ }
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ {
+ error = clib_error_return (0,
+ "expecting valid worker handoff queue index");
+ goto done;
+ }
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ goto done;
+ }
+
+ // Allocate storage for trace if necessary
+ vec_validate_aligned (fqm->frame_queue_traces, num_fq - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (fqm->frame_queue_histogram, num_fq - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqt = &fqm->frame_queue_traces[fqix];
+ fqh = &fqm->frame_queue_histogram[fqix];
+
+ memset (fqt->n_vectors, 0xff, sizeof (fqt->n_vectors));
+ fqt->written = 0;
+ memset (fqh, 0, sizeof (*fqh));
+ fqm->vlib_frame_queues[fqix]->trace = enable;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = {
+ .path = "trace frame-queue",
+ .short_help = "trace frame-queue (on|off)",
+ .function = trace_frame_queue,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Adding two counters and compute percent of total
+ * Round up, e.g. 0.000001 => 1%
+ */
+static u32
+compute_percent (u64 * two_counters, u64 total)
+{
+ if (total == 0)
+ {
+ return 0;
+ }
+ else
+ {
+ return (((two_counters[0] + two_counters[1]) * 100) +
+ (total - 1)) / total;
+ }
+}
+
+/*
+ * Display frame queue trace data gathered by threads.
+ */
+static clib_error_t *
+show_frame_queue_internal (vlib_main_t * vm,
+ vlib_frame_queue_main_t * fqm, u32 histogram)
+{
+ clib_error_t *error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 num_fq;
+ u32 fqix;
+
+ num_fq = vec_len (fqm->frame_queue_traces);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No trace data for frame queues\n");
+ return error;
+ }
+
+ if (histogram)
+ {
+ vlib_cli_output (vm, "0-1 2-3 4-5 6-7 8-9 10-11 12-13 14-15 "
+ "16-17 18-19 20-21 22-23 24-25 26-27 28-29 30-31\n");
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqt = &(fqm->frame_queue_traces[fqix]);
+
+ vlib_cli_output (vm, "Thread %d %v\n", fqix,
+ vlib_worker_threads[fqix].name);
+
+ if (fqt->written == 0)
+ {
+ vlib_cli_output (vm, " no trace data\n");
+ continue;
+ }
+
+ if (histogram)
+ {
+ fqh = &(fqm->frame_queue_histogram[fqix]);
+ u32 nelt;
+ u64 total = 0;
+
+ for (nelt = 0; nelt < FRAME_QUEUE_MAX_NELTS; nelt++)
+ {
+ total += fqh->count[nelt];
+ }
+
+ /*
+ * Print in pairs to condense the output.
+ * Allow entries with 0 counts to be clearly identified, by rounding up.
+ * Any non-zero value will be displayed as at least one percent. This
+ * also means the sum of percentages can be > 100, but that is fine. The
+ * histogram is counted from the last time "trace frame on" was issued.
+ */
+ vlib_cli_output (vm,
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% "
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%%\n",
+ compute_percent (&fqh->count[0], total),
+ compute_percent (&fqh->count[2], total),
+ compute_percent (&fqh->count[4], total),
+ compute_percent (&fqh->count[6], total),
+ compute_percent (&fqh->count[8], total),
+ compute_percent (&fqh->count[10], total),
+ compute_percent (&fqh->count[12], total),
+ compute_percent (&fqh->count[14], total),
+ compute_percent (&fqh->count[16], total),
+ compute_percent (&fqh->count[18], total),
+ compute_percent (&fqh->count[20], total),
+ compute_percent (&fqh->count[22], total),
+ compute_percent (&fqh->count[24], total),
+ compute_percent (&fqh->count[26], total),
+ compute_percent (&fqh->count[28], total),
+ compute_percent (&fqh->count[30], total));
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ " vector-threshold %d ring size %d in use %d\n",
+ fqt->threshold, fqt->nelts, fqt->n_in_use);
+ vlib_cli_output (vm, " head %12d head_hint %12d tail %12d\n",
+ fqt->head, fqt->head_hint, fqt->tail);
+ vlib_cli_output (vm,
+ " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[0], fqt->n_vectors[1],
+ fqt->n_vectors[2], fqt->n_vectors[3],
+ fqt->n_vectors[4], fqt->n_vectors[5],
+ fqt->n_vectors[6], fqt->n_vectors[7],
+ fqt->n_vectors[8], fqt->n_vectors[9],
+ fqt->n_vectors[10], fqt->n_vectors[11],
+ fqt->n_vectors[12], fqt->n_vectors[13],
+ fqt->n_vectors[14], fqt->n_vectors[15]);
+
+ if (fqt->nelts > 16)
+ {
+ vlib_cli_output (vm,
+ " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[16], fqt->n_vectors[17],
+ fqt->n_vectors[18], fqt->n_vectors[19],
+ fqt->n_vectors[20], fqt->n_vectors[21],
+ fqt->n_vectors[22], fqt->n_vectors[23],
+ fqt->n_vectors[24], fqt->n_vectors[25],
+ fqt->n_vectors[26], fqt->n_vectors[27],
+ fqt->n_vectors[28], fqt->n_vectors[29],
+ fqt->n_vectors[30], fqt->n_vectors[31]);
+ }
+ }
+
+ }
+ return error;
+}
+
+static clib_error_t *
+show_frame_queue_trace (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error;
+
+ vec_foreach (fqm, tm->frame_queue_mains)
+ {
+ vlib_cli_output (vm, "Worker handoff queue index %u (next node '%U'):",
+ fqm - tm->frame_queue_mains,
+ format_vlib_node_name, vm, fqm->node_index);
+ error = show_frame_queue_internal (vm, fqm, 0);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
+static clib_error_t *
+show_frame_queue_histogram (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error;
+
+ vec_foreach (fqm, tm->frame_queue_mains)
+ {
+ vlib_cli_output (vm, "Worker handoff queue index %u (next node '%U'):",
+ fqm - tm->frame_queue_mains,
+ format_vlib_node_name, vm, fqm->node_index);
+ error = show_frame_queue_internal (vm, fqm, 1);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_frame_queue_trace,static) = {
+ .path = "show frame-queue",
+ .short_help = "show frame-queue trace",
+ .function = show_frame_queue_trace,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_frame_queue_histogram,static) = {
+ .path = "show frame-queue histogram",
+ .short_help = "show frame-queue histogram",
+ .function = show_frame_queue_histogram,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Modify the number of elements on the frame_queues
+ */
+static clib_error_t *
+test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 nelts = 0;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "nelts %u", &nelts))
+ ;
+ else if (unformat (line_input, "index %u", &index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ {
+ error = clib_error_return (0,
+ "expecting valid worker handoff queue index");
+ goto done;
+ }
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+ if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32))
+ {
+ error = clib_error_return (0, "expecting 4,8,16,32");
+ goto done;
+ }
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ goto done;
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqm->vlib_frame_queues[fqix]->nelts = nelts;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = {
+ .path = "test frame-queue nelts",
+ .short_help = "test frame-queue nelts (4,8,16,32)",
+ .function = test_frame_queue_nelts,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Modify the max number of packets pulled off the frame queues
+ */
+static clib_error_t *
+test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 threshold = ~(u32) 0;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "threshold %u", &threshold))
+ ;
+ else if (unformat (line_input, "index %u", &index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ {
+ error = clib_error_return (0,
+ "expecting valid worker handoff queue index");
+ goto done;
+ }
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+
+ if (threshold == ~(u32) 0)
+ {
+ vlib_cli_output (vm, "expecting threshold value\n");
+ goto done;
+ }
+
+ if (threshold == 0)
+ threshold = ~0;
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ goto done;
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqm->vlib_frame_queues[fqix]->vector_threshold = threshold;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = {
+ .path = "test frame-queue threshold",
+ .short_help = "test frame-queue threshold N (0=no limit)",
+ .function = test_frame_queue_threshold,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace.c b/src/vlib/trace.c
new file mode 100644
index 00000000..6d487ae1
--- /dev/null
+++ b/src/vlib/trace.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.c: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace)
+{
+ u32 n_left, *from;
+
+ n_left = n_buffers;
+ from = buffers;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u8 *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t1, b1->data + b1->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+/* Free up all trace buffer memory. */
+always_inline void
+clear_trace_buffer (void)
+{
+ int i;
+ vlib_trace_main_t *tm;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ tm = &this_vlib_main->trace_main;
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ tm->trace_active_hint = 0;
+
+ for (i = 0; i < vec_len (tm->trace_buffer_pool); i++)
+ if (! pool_is_free_index (tm->trace_buffer_pool, i))
+ vec_free (tm->trace_buffer_pool[i]);
+ pool_free (tm->trace_buffer_pool);
+ clib_mem_set_heap (mainheap);
+ }));
+ /* *INDENT-ON* */
+}
+
+static u8 *
+format_vlib_trace (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_trace_header_t *h = va_arg (*va, vlib_trace_header_t *);
+ vlib_trace_header_t *e = vec_end (h);
+ vlib_node_t *node, *prev_node;
+ clib_time_t *ct = &vm->clib_time;
+ f64 t;
+
+ prev_node = 0;
+ while (h < e)
+ {
+ node = vlib_get_node (vm, h->node_index);
+
+ if (node != prev_node)
+ {
+ t =
+ (h->time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+ s =
+ format (s, "\n%U: %v", format_time_interval, "h:m:s:u", t,
+ node->name);
+ }
+ prev_node = node;
+
+ if (node->format_trace)
+ s = format (s, "\n %U", node->format_trace, vm, node, h->data);
+ else
+ s = format (s, "\n %U", node->format_buffer, h->data);
+
+ h = vlib_trace_header_next (h);
+ }
+
+ return s;
+}
+
+/* Root of all trace cli commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (trace_cli_command,static) = {
+ .path = "trace",
+ .short_help = "Packet tracer commands",
+};
+/* *INDENT-ON* */
+
+static int
+trace_cmp (void *a1, void *a2)
+{
+ vlib_trace_header_t **t1 = a1;
+ vlib_trace_header_t **t2 = a2;
+ i64 dt = t1[0]->time - t2[0]->time;
+ return dt < 0 ? -1 : (dt > 0 ? +1 : 0);
+}
+
+/*
+ * Return 1 if this packet passes the trace filter, or 0 otherwise
+ */
+u32
+filter_accept (vlib_trace_main_t * tm, vlib_trace_header_t * h)
+{
+ vlib_trace_header_t *e = vec_end (h);
+
+ if (tm->filter_flag == 0)
+ return 1;
+
+ if (tm->filter_flag == FILTER_FLAG_INCLUDE)
+ {
+ while (h < e)
+ {
+ if (h->node_index == tm->filter_node_index)
+ return 1;
+ h = vlib_trace_header_next (h);
+ }
+ return 0;
+ }
+ else /* FILTER_FLAG_EXCLUDE */
+ {
+ while (h < e)
+ {
+ if (h->node_index == tm->filter_node_index)
+ return 0;
+ h = vlib_trace_header_next (h);
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Remove traces from the trace buffer pool that don't pass the filter
+ */
+void
+trace_apply_filter (vlib_main_t * vm)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t **h;
+ vlib_trace_header_t ***traces_to_remove = 0;
+ u32 index;
+ u32 trace_index;
+ u32 n_accepted;
+
+ u32 accept;
+
+ if (tm->filter_flag == FILTER_FLAG_NONE)
+ return;
+
+ /*
+ * Ideally we would retain the first N traces that pass the filter instead
+ * of any N traces.
+ */
+ n_accepted = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (h, tm->trace_buffer_pool,
+ ({
+ accept = filter_accept(tm, h[0]);
+
+ if ((n_accepted == tm->filter_count) || !accept)
+ vec_add1 (traces_to_remove, h);
+ else
+ n_accepted++;
+ }));
+ /* *INDENT-ON* */
+
+ /* remove all traces that we don't want to keep */
+ for (index = 0; index < vec_len (traces_to_remove); index++)
+ {
+ trace_index = traces_to_remove[index] - tm->trace_buffer_pool;
+ _vec_len (tm->trace_buffer_pool[trace_index]) = 0;
+ pool_put_index (tm->trace_buffer_pool, trace_index);
+ }
+
+ vec_free (traces_to_remove);
+}
+
+static clib_error_t *
+cli_show_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t *tm;
+ vlib_trace_header_t **h, **traces;
+ u32 i, index = 0;
+ char *fmt;
+ u8 *s = 0;
+ u32 max;
+
+ /*
+ * By default display only this many traces. To display more, explicitly
+ * specify a max. This prevents unexpectedly huge outputs.
+ */
+ max = 50;
+ while (unformat_check_input (input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "max %d", &max))
+ ;
+ else
+ return clib_error_create ("expected 'max COUNT', got `%U'",
+ format_unformat_error, input);
+ }
+
+
+ /* Get active traces from pool. */
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ fmt = "------------------- Start of thread %d %s -------------------\n";
+ s = format (s, fmt, index, vlib_worker_threads[index].name);
+
+ tm = &this_vlib_main->trace_main;
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ trace_apply_filter(this_vlib_main);
+
+ traces = 0;
+ pool_foreach (h, tm->trace_buffer_pool,
+ ({
+ vec_add1 (traces, h[0]);
+ }));
+
+ if (vec_len (traces) == 0)
+ {
+ clib_mem_set_heap (mainheap);
+ s = format (s, "No packets in trace buffer\n");
+ goto done;
+ }
+
+ /* Sort them by increasing time. */
+ vec_sort_with_function (traces, trace_cmp);
+
+ for (i = 0; i < vec_len (traces); i++)
+ {
+ if (i == max)
+ {
+ vlib_cli_output (vm, "Limiting display to %d packets."
+ " To display more specify max.", max);
+ goto done;
+ }
+
+ clib_mem_set_heap (mainheap);
+
+ s = format (s, "Packet %d\n%U\n\n", i + 1,
+ format_vlib_trace, vm, traces[i]);
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ }
+
+ done:
+ vec_free (traces);
+ clib_mem_set_heap (mainheap);
+
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_trace_cli,static) = {
+ .path = "show trace",
+ .short_help = "Show trace buffer [max COUNT]",
+ .function = cli_show_trace_buffer,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cli_add_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_trace_main_t *tm;
+ vlib_trace_node_t *tn;
+ u32 node_index, add;
+ u8 verbose = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U %d",
+ unformat_vlib_node, vm, &node_index, &add))
+ ;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ {
+ error = clib_error_create ("expected NODE COUNT, got `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main ((
+ {
+ void *oldheap;
+ tm = &this_vlib_main->trace_main;
+ tm->trace_active_hint = 1;
+ tm->verbose = verbose;
+ oldheap =
+ clib_mem_set_heap (this_vlib_main->heap_base);
+ vec_validate (tm->nodes, node_index);
+ tn = tm->nodes + node_index;
+ tn->limit += add; clib_mem_set_heap (oldheap);
+ }));
+ /* *INDENT-ON* */
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (add_trace_cli,static) = {
+ .path = "trace add",
+ .short_help = "Trace given number of packets",
+ .function = cli_add_trace_buffer,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Configure a filter for packet traces.
+ *
+ * This supplements the packet trace feature so that only packets matching
+ * the filter are included in the trace. Currently the only filter is to
+ * keep packets that include a certain node in the trace or exclude a certain
+ * node in the trace.
+ *
+ * The count of traced packets in the "trace add" command is still used to
+ * create a certain number of traces. The "trace filter" command specifies
+ * how many of those packets should be retained in the trace.
+ *
+ * For example, 1Mpps of traffic is arriving and one of those packets is being
+ * dropped. To capture the trace for only that dropped packet, you can do:
+ * trace filter include error-drop 1
+ * trace add dpdk-input 1000000
+ * <wait one second>
+ * show trace
+ *
+ * Note that the filter could be implemented by capturing all traces and just
+ * reducing traces displayed by the "show trace" function. But that would
+ * require a lot of memory for storing the traces, making that infeasible.
+ *
+ * To remove traces from the trace pool that do not include a certain node
+ * requires that the trace be "complete" before applying the filter. To
+ * accomplish this, the trace pool is filtered upon each iteraction of the
+ * main vlib loop. Doing so keeps the number of allocated traces down to a
+ * reasonably low number. This requires that tracing for a buffer is not
+ * performed after the vlib main loop interation completes. i.e. you can't
+ * save away a buffer temporarily then inject it back into the graph and
+ * expect that the trace_index is still valid (such as a traffic manager might
+ * do). A new trace buffer should be allocated for those types of packets.
+ *
+ * The filter can be extended to support multiple nodes and other match
+ * criteria (e.g. input sw_if_index, mac address) but for now just checks if
+ * a specified node is in the trace or not in the trace.
+ */
+static clib_error_t *
+cli_filter_trace (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ u32 filter_node_index;
+ u32 filter_flag;
+ u32 filter_count;
+ void *mainheap;
+
+ if (unformat (input, "include %U %d",
+ unformat_vlib_node, vm, &filter_node_index, &filter_count))
+ {
+ filter_flag = FILTER_FLAG_INCLUDE;
+ }
+ else if (unformat (input, "exclude %U %d",
+ unformat_vlib_node, vm, &filter_node_index,
+ &filter_count))
+ {
+ filter_flag = FILTER_FLAG_EXCLUDE;
+ }
+ else if (unformat (input, "none"))
+ {
+ filter_flag = FILTER_FLAG_NONE;
+ filter_node_index = 0;
+ filter_count = 0;
+ }
+ else
+ return
+ clib_error_create
+ ("expected 'include NODE COUNT' or 'exclude NODE COUNT' or 'none', got `%U'",
+ format_unformat_error, input);
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ tm = &this_vlib_main->trace_main;
+ tm->filter_node_index = filter_node_index;
+ tm->filter_flag = filter_flag;
+ tm->filter_count = filter_count;
+
+ /*
+ * Clear the trace limits to stop any in-progress tracing
+ * Prevents runaway trace allocations when the filter changes (or is removed)
+ */
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ vec_free (tm->nodes);
+ clib_mem_set_heap (mainheap);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (filter_trace_cli,static) = {
+ .path = "trace filter",
+ .short_help = "filter trace output - include NODE COUNT | exclude NODE COUNT | none",
+ .function = cli_filter_trace,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cli_clear_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clear_trace_buffer ();
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_trace_cli,static) = {
+ .path = "clear trace",
+ .short_help = "Clear trace buffer and free memory",
+ .function = cli_clear_trace_buffer,
+};
+/* *INDENT-ON* */
+
+/* Dummy function to get us linked in. */
+void
+vlib_trace_cli_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace.h b/src/vlib/trace.h
new file mode 100644
index 00000000..fc0fc5c8
--- /dev/null
+++ b/src/vlib/trace.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_h
+#define included_vlib_trace_h
+
+#include <vppinfra/pool.h>
+
+typedef struct
+{
+ /* CPU time stamp trace was made. */
+ u64 time;
+
+ /* Node which generated this trace. */
+ u32 node_index;
+
+ /* Number of data words in this trace. */
+ u32 n_data;
+
+ /* Trace data follows. */
+ u8 data[0];
+} vlib_trace_header_t;
+
+typedef struct
+{
+ /* Current number of traces in buffer. */
+ u32 count;
+
+ /* Max. number of traces to be added to buffer. */
+ u32 limit;
+} vlib_trace_node_t;
+
+typedef struct
+{
+ /* Pool of trace buffers. */
+ vlib_trace_header_t **trace_buffer_pool;
+
+ u32 last_main_loop_count;
+ u32 filter_node_index;
+ u32 filter_flag;
+#define FILTER_FLAG_NONE 0
+#define FILTER_FLAG_INCLUDE 1
+#define FILTER_FLAG_EXCLUDE 2
+ u32 filter_count;
+
+ /* set on trace add, cleared on clear trace */
+ u32 trace_active_hint;
+
+ /* Per node trace counts. */
+ vlib_trace_node_t *nodes;
+
+ /* verbosity */
+ int verbose;
+} vlib_trace_main_t;
+
+#endif /* included_vlib_trace_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace_funcs.h b/src/vlib/trace_funcs.h
new file mode 100644
index 00000000..5280eae9
--- /dev/null
+++ b/src/vlib/trace_funcs.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace_funcs.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_funcs_h
+#define included_vlib_trace_funcs_h
+
+always_inline void
+vlib_validate_trace (vlib_trace_main_t * tm, vlib_buffer_t * b)
+{
+ /*
+ * this assert seems right, but goes off constantly.
+ * disabling it appears to make the pain go away
+ */
+ ASSERT (1 || b->flags & VLIB_BUFFER_IS_TRACED);
+ ASSERT (!pool_is_free_index (tm->trace_buffer_pool, b->trace_index));
+}
+
+always_inline void *
+vlib_add_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * r, vlib_buffer_t * b, u32 n_data_bytes)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t *h;
+ u32 n_data_words;
+
+ vlib_validate_trace (tm, b);
+
+ n_data_bytes = round_pow2 (n_data_bytes, sizeof (h[0]));
+ n_data_words = n_data_bytes / sizeof (h[0]);
+ vec_add2_aligned (tm->trace_buffer_pool[b->trace_index], h,
+ 1 + n_data_words, sizeof (h[0]));
+
+ h->time = vm->cpu_time_last_node_dispatch;
+ h->n_data = n_data_words;
+ h->node_index = r->node_index;
+
+ return h->data;
+}
+
+always_inline vlib_trace_header_t *
+vlib_trace_header_next (vlib_trace_header_t * h)
+{
+ return h + 1 + h->n_data;
+}
+
+always_inline void
+vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_validate_trace (tm, b);
+ _vec_len (tm->trace_buffer_pool[b->trace_index]) = 0;
+ pool_put_index (tm->trace_buffer_pool, b->trace_index);
+}
+
+always_inline void
+vlib_trace_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r, u32 next_index)
+{
+ vlib_next_frame_t *nf;
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ nf->flags |= VLIB_FRAME_TRACE;
+}
+
+void trace_apply_filter (vlib_main_t * vm);
+
+/* Mark buffer as traced and allocate trace buffer. */
+always_inline void
+vlib_trace_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, vlib_buffer_t * b, int follow_chain)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t **h;
+
+ /*
+ * Apply filter to existing traces to keep number of allocated traces low.
+ * Performed each time around the main loop.
+ */
+ if (tm->last_main_loop_count != vm->main_loop_count)
+ {
+ tm->last_main_loop_count = vm->main_loop_count;
+ trace_apply_filter (vm);
+ }
+
+ vlib_trace_next_frame (vm, r, next_index);
+
+ pool_get (tm->trace_buffer_pool, h);
+
+ do
+ {
+ b->flags |= VLIB_BUFFER_IS_TRACED;
+ b->trace_index = h - tm->trace_buffer_pool;
+ }
+ while (follow_chain && (b = vlib_get_next_buffer (vm, b)));
+}
+
+always_inline void
+vlib_buffer_copy_trace_flag (vlib_main_t * vm, vlib_buffer_t * b,
+ u32 bi_target)
+{
+ vlib_buffer_t *b_target = vlib_get_buffer (vm, bi_target);
+ b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
+ b_target->trace_index = b->trace_index;
+}
+
+always_inline u32
+vlib_get_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_node_t *tn;
+ int n;
+
+ if (rt->node_index >= vec_len (tm->nodes))
+ return 0;
+ tn = tm->nodes + rt->node_index;
+ n = tn->limit - tn->count;
+ ASSERT (n >= 0);
+
+ return n;
+}
+
+always_inline void
+vlib_set_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt, u32 count)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_node_t *tn = vec_elt_at_index (tm->nodes, rt->node_index);
+
+ ASSERT (count <= tn->limit);
+ tn->count = tn->limit - count;
+}
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace);
+
+#endif /* included_vlib_trace_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c
new file mode 100644
index 00000000..7c1e9475
--- /dev/null
+++ b/src/vlib/unix/cj.c
@@ -0,0 +1,272 @@
+/*
+ *------------------------------------------------------------------
+ * cj.c
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/**
+ * @file
+ * Circular joournal diagnostic mechanism.
+ *
+ * The @c cj thread-safe circular log buffer scheme is occasionally useful
+ * when chasing bugs. Calls to it should not be checked in.
+ */
+/*? %%clicmd:group_label Circular Journal %% ?*/
+/*? %%syscfg:group_label Circular Journal %% ?*/
+
+#include <stdio.h>
+#include <vlib/vlib.h>
+
+#include <vlib/unix/cj.h>
+
+cj_main_t cj_main;
+
+void
+cj_log (u32 type, void *data0, void *data1)
+{
+ u64 new_tail;
+ cj_main_t *cjm = &cj_main;
+ cj_record_t *r;
+
+ if (cjm->enable == 0)
+ return;
+
+ new_tail = __sync_add_and_fetch (&cjm->tail, 1);
+
+ r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]);
+ r->time = vlib_time_now (cjm->vlib_main);
+ r->thread_index = vlib_get_thread_index ();
+ r->type = type;
+ r->data[0] = pointer_to_uword (data0);
+ r->data[1] = pointer_to_uword (data1);
+}
+
+void
+cj_stop (void)
+{
+ cj_main_t *cjm = &cj_main;
+
+ cjm->enable = 0;
+}
+
+
+clib_error_t *
+cj_init (vlib_main_t * vm)
+{
+ cj_main_t *cjm = &cj_main;
+
+ cjm->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (cj_init);
+
+static clib_error_t *
+cj_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ cj_main_t *cjm = &cj_main;
+ int matched = 0;
+ int enable = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "records %d", &cjm->num_records))
+ matched = 1;
+ else if (unformat (input, "on"))
+ enable = 1;
+ else
+ return clib_error_return (0, "cj_config: unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ if (matched == 0)
+ return 0;
+
+ cjm->num_records = max_pow2 (cjm->num_records);
+ vec_validate (cjm->records, cjm->num_records - 1);
+ memset (cjm->records, 0xff, cjm->num_records * sizeof (cj_record_t));
+ cjm->tail = ~0;
+ cjm->enable = enable;
+
+ return 0;
+}
+
+/*?
+ * Configure the circular journal diagnostic mechanism. This is only useful
+ * if you, the deveoper, have written code to make use of the circular
+ * journal.
+ *
+ * @cfgcmd{records, &lt;number&gt;}
+ * Configure the number of records to allocate for the circular journal.
+ *
+ * @cfgcmd{on}
+ * Enable the collection of records in the circular journal at the
+ * earliest opportunity.
+?*/
+VLIB_CONFIG_FUNCTION (cj_config, "cj");
+
+void
+cj_enable_disable (int is_enable)
+{
+ cj_main_t *cjm = &cj_main;
+
+ if (cjm->num_records)
+ cjm->enable = is_enable;
+ else
+ vlib_cli_output (cjm->vlib_main, "CJ not configured...");
+}
+
+static inline void
+cj_dump_one_record (cj_record_t * r)
+{
+ fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
+ r->thread_index, r->time, r->type,
+ (long long unsigned int) r->data[0],
+ (long long unsigned int) r->data[1]);
+}
+
+static void
+cj_dump_internal (u8 filter0_enable, u64 filter0,
+ u8 filter1_enable, u64 filter1)
+{
+ cj_main_t *cjm = &cj_main;
+ cj_record_t *r;
+ u32 i, index;
+
+ if (cjm->num_records == 0)
+ {
+ fprintf (stderr, "CJ not configured...\n");
+ return;
+ }
+
+ if (cjm->tail == (u64) ~ 0)
+ {
+ fprintf (stderr, "No data collected...\n");
+ return;
+ }
+
+ /* Has the trace wrapped? */
+ index = (cjm->tail + 1) & (cjm->num_records - 1);
+ r = &(cjm->records[index]);
+
+ if (r->thread_index != (u32) ~ 0)
+ {
+ /* Yes, dump from tail + 1 to the end */
+ for (i = index; i < cjm->num_records; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip;
+ cj_dump_one_record (r);
+ skip:
+ r++;
+ }
+ }
+ /* dump from the beginning through the final tail */
+ r = cjm->records;
+ for (i = 0; i <= cjm->tail; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip2;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip2;
+ cj_dump_one_record (r);
+ skip2:
+ r++;
+ }
+}
+
+void
+cj_dump (void)
+{
+ cj_dump_internal (0, 0, 0, 0);
+}
+
+void
+cj_dump_filter_data0 (u64 filter0)
+{
+ cj_dump_internal (1 /* enable f0 */ , filter0, 0, 0);
+}
+
+void
+cj_dump_filter_data1 (u64 filter1)
+{
+ cj_dump_internal (0, 0, 1 /* enable f1 */ , filter1);
+}
+
+void
+cj_dump_filter_data12 (u64 filter0, u64 filter1)
+{
+ cj_dump_internal (1, filter0, 1, filter1);
+}
+
+static clib_error_t *
+cj_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int is_enable = -1;
+ int is_dump = -1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable") || unformat (input, "on"))
+ is_enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ is_enable = 0;
+ else if (unformat (input, "dump"))
+ is_dump = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_enable >= 0)
+ cj_enable_disable (is_enable);
+
+ if (is_dump > 0)
+ cj_dump ();
+
+ return 0;
+}
+
+/*?
+ * Enable, disable the collection of diagnostic data into a
+ * circular journal or dump the circular journal diagnostic data.
+ * This is only useful if you, the deveoper, have written code to make
+ * use of the circular journal.
+ *
+ * When dumping the data it is formatted and sent to @c stderr of the
+ * VPP process; when running VPP in <code>unix interactive</code> mode
+ * this is typically the same place as the Debug CLI.
+?*/
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cj_command,static) = {
+ .path = "cj",
+ .short_help = "cj <enable | disable | dump>",
+ .function = cj_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h
new file mode 100644
index 00000000..d0a1d46e
--- /dev/null
+++ b/src/vlib/unix/cj.h
@@ -0,0 +1,79 @@
+/*
+ *------------------------------------------------------------------
+ * cj.h
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_cj_h__
+#define __included_cj_h__
+
+typedef struct
+{
+ f64 time;
+ u32 thread_index;
+ u32 type;
+ u64 data[2];
+} cj_record_t;
+
+typedef struct
+{
+ volatile u64 tail;
+ cj_record_t *records;
+ u32 num_records;
+ volatile u32 enable;
+
+ vlib_main_t *vlib_main;
+} cj_main_t;
+
+void cj_log (u32 type, void *data0, void *data1);
+
+/*
+ * Supply in application main, so we can log from any library...
+ * Declare a weak reference in the library, off you go.
+ */
+
+#define DECLARE_CJ_GLOBAL_LOG \
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+ \
+unsigned __cj_type; \
+void * __cj_data0; \
+void * __cj_data1; \
+ \
+void \
+cj_global_log (unsigned type, void * data0, void * data1) \
+{ \
+ __cj_type = type; \
+ __cj_data0 = data0; \
+ __cj_data1 = data1; \
+}
+
+#define CJ_GLOBAL_LOG_PROTOTYPE
+void
+cj_global_log (unsigned type, void *data0, void *data1)
+__attribute__ ((weak));
+
+void cj_stop (void);
+
+#endif /* __included_cj_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c
new file mode 100644
index 00000000..be3c813a
--- /dev/null
+++ b/src/vlib/unix/cli.c
@@ -0,0 +1,3468 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: Unix stdin/socket CLI.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * @file
+ * @brief Unix stdin/socket command line interface.
+ * Provides a command line interface so humans can interact with VPP.
+ * This is predominantly a debugging and testing mechanism.
+ */
+/*? %%clicmd:group_label Command line session %% ?*/
+/*? %%syscfg:group_label Command line session %% ?*/
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/timer.h>
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <termios.h>
+#include <signal.h>
+#include <unistd.h>
+#include <arpa/telnet.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/** ANSI escape code. */
+#define ESC "\x1b"
+
+/** ANSI Control Sequence Introducer. */
+#define CSI ESC "["
+
+/** ANSI clear screen. */
+#define ANSI_CLEAR CSI "2J" CSI "1;1H"
+/** ANSI reset color settings. */
+#define ANSI_RESET CSI "0m"
+/** ANSI Start bold text. */
+#define ANSI_BOLD CSI "1m"
+/** ANSI Stop bold text. */
+#define ANSI_DIM CSI "2m"
+/** ANSI Start dark red text. */
+#define ANSI_DRED ANSI_DIM CSI "31m"
+/** ANSI Start bright red text. */
+#define ANSI_BRED ANSI_BOLD CSI "31m"
+/** ANSI clear line cursor is on. */
+#define ANSI_CLEARLINE CSI "2K"
+/** ANSI scroll screen down one line. */
+#define ANSI_SCROLLDN CSI "1T"
+/** ANSI save cursor position. */
+#define ANSI_SAVECURSOR CSI "s"
+/** ANSI restore cursor position if previously saved. */
+#define ANSI_RESTCURSOR CSI "u"
+
+/** Maximum depth into a byte stream from which to compile a Telnet
+ * protocol message. This is a saftey measure. */
+#define UNIX_CLI_MAX_DEPTH_TELNET 24
+
+/** Minimum terminal width we will accept */
+#define UNIX_CLI_MIN_TERMINAL_WIDTH 1
+/** Maximum terminal width we will accept */
+#define UNIX_CLI_MAX_TERMINAL_WIDTH 512
+/** Minimum terminal height we will accept */
+#define UNIX_CLI_MIN_TERMINAL_HEIGHT 1
+/** Maximum terminal height we will accept */
+#define UNIX_CLI_MAX_TERMINAL_HEIGHT 512
+
+
+/** A CLI banner line. */
+typedef struct
+{
+ u8 *line; /**< The line to print. */
+ u32 length; /**< The length of the line without terminating NUL. */
+} unix_cli_banner_t;
+
+#define _(a) { .line = (u8 *)(a), .length = sizeof(a) - 1 }
+/** Plain welcome banner. */
+static unix_cli_banner_t unix_cli_banner[] = {
+ _(" _______ _ _ _____ ___ \n"),
+ _(" __/ __/ _ \\ (_)__ | | / / _ \\/ _ \\\n"),
+ _(" _/ _// // / / / _ \\ | |/ / ___/ ___/\n"),
+ _(" /_/ /____(_)_/\\___/ |___/_/ /_/ \n"),
+ _("\n")
+};
+
+/** ANSI color welcome banner. */
+static unix_cli_banner_t unix_cli_banner_color[] = {
+ _(ANSI_BRED " _______ _ " ANSI_RESET " _ _____ ___ \n"),
+ _(ANSI_BRED " __/ __/ _ \\ (_)__ " ANSI_RESET " | | / / _ \\/ _ \\\n"),
+ _(ANSI_BRED " _/ _// // / / / _ \\" ANSI_RESET " | |/ / ___/ ___/\n"),
+ _(ANSI_BRED " /_/ /____(_)_/\\___/" ANSI_RESET " |___/_/ /_/ \n"),
+ _("\n")
+};
+
+#undef _
+
+/** Pager line index */
+typedef struct
+{
+ /** Index into pager_vector */
+ u32 line;
+
+ /** Offset of the string in the line */
+ u32 offset;
+
+ /** Length of the string in the line */
+ u32 length;
+} unix_cli_pager_index_t;
+
+
+/** Unix CLI session. */
+typedef struct
+{
+ /** The file index held by unix.c */
+ u32 clib_file_index;
+
+ /** Vector of output pending write to file descriptor. */
+ u8 *output_vector;
+
+ /** Vector of input saved by Unix input node to be processed by
+ CLI process. */
+ u8 *input_vector;
+
+ /** This session has command history. */
+ u8 has_history;
+ /** Array of vectors of commands in the history. */
+ u8 **command_history;
+ /** The command currently pointed at by the history cursor. */
+ u8 *current_command;
+ /** How far from the end of the history array the user has browsed. */
+ i32 excursion;
+
+ /** Maximum number of history entries this session will store. */
+ u32 history_limit;
+
+ /** Current command line counter */
+ u32 command_number;
+
+ /** The string being searched for in the history. */
+ u8 *search_key;
+ /** If non-zero then the CLI is searching in the history array.
+ * - @c -1 means search backwards.
+ * - @c 1 means search forwards.
+ */
+ int search_mode;
+
+ /** Position of the insert cursor on the current input line */
+ u32 cursor;
+
+ /** Line mode or char mode */
+ u8 line_mode;
+
+ /** Set if the CRLF mode wants CR + LF */
+ u8 crlf_mode;
+
+ /** Can we do ANSI output? */
+ u8 ansi_capable;
+
+ /** Has the session started? */
+ u8 started;
+
+ /** Disable the pager? */
+ u8 no_pager;
+
+ /** Whether the session is interactive or not.
+ * Controls things like initial banner, the CLI prompt etc. */
+ u8 is_interactive;
+
+ /** Whether the session is attached to a socket. */
+ u8 is_socket;
+
+ /** If EPIPE has been detected, prevent further write-related
+ * activity on the descriptor.
+ */
+ u8 has_epipe;
+
+ /** Pager buffer */
+ u8 **pager_vector;
+
+ /** Index of line fragments in the pager buffer */
+ unix_cli_pager_index_t *pager_index;
+
+ /** Line number of top of page */
+ u32 pager_start;
+
+ /** Terminal width */
+ u32 width;
+
+ /** Terminal height */
+ u32 height;
+
+ /** Process node identifier */
+ u32 process_node_index;
+} unix_cli_file_t;
+
+/** Resets the pager buffer and other data.
+ * @param f The CLI session whose pager needs to be reset.
+ */
+always_inline void
+unix_cli_pager_reset (unix_cli_file_t * f)
+{
+ u8 **p;
+
+ f->pager_start = 0;
+
+ vec_free (f->pager_index);
+ f->pager_index = 0;
+
+ vec_foreach (p, f->pager_vector)
+ {
+ vec_free (*p);
+ }
+ vec_free (f->pager_vector);
+ f->pager_vector = 0;
+}
+
+/** Release storage used by a CLI session.
+ * @param f The CLI session whose storage needs to be released.
+ */
+always_inline void
+unix_cli_file_free (unix_cli_file_t * f)
+{
+ vec_free (f->output_vector);
+ vec_free (f->input_vector);
+ unix_cli_pager_reset (f);
+}
+
+/** CLI actions */
+typedef enum
+{
+ UNIX_CLI_PARSE_ACTION_NOACTION = 0, /**< No action */
+ UNIX_CLI_PARSE_ACTION_CRLF, /**< Carriage return, newline or enter */
+ UNIX_CLI_PARSE_ACTION_TAB, /**< Tab key */
+ UNIX_CLI_PARSE_ACTION_ERASE, /**< Erase cursor left */
+ UNIX_CLI_PARSE_ACTION_ERASERIGHT, /**< Erase cursor right */
+ UNIX_CLI_PARSE_ACTION_UP, /**< Up arrow */
+ UNIX_CLI_PARSE_ACTION_DOWN, /**< Down arrow */
+ UNIX_CLI_PARSE_ACTION_LEFT, /**< Left arrow */
+ UNIX_CLI_PARSE_ACTION_RIGHT, /**< Right arrow */
+ UNIX_CLI_PARSE_ACTION_HOME, /**< Home key (jump to start of line) */
+ UNIX_CLI_PARSE_ACTION_END, /**< End key (jump to end of line) */
+ UNIX_CLI_PARSE_ACTION_WORDLEFT, /**< Jump cursor to start of left word */
+ UNIX_CLI_PARSE_ACTION_WORDRIGHT, /**< Jump cursor to start of right word */
+ UNIX_CLI_PARSE_ACTION_ERASELINELEFT, /**< Erase line to left of cursor */
+ UNIX_CLI_PARSE_ACTION_ERASELINERIGHT, /**< Erase line to right & including cursor */
+ UNIX_CLI_PARSE_ACTION_CLEAR, /**< Clear the terminal */
+ UNIX_CLI_PARSE_ACTION_REVSEARCH, /**< Search backwards in command history */
+ UNIX_CLI_PARSE_ACTION_FWDSEARCH, /**< Search forwards in command history */
+ UNIX_CLI_PARSE_ACTION_YANK, /**< Undo last erase action */
+ UNIX_CLI_PARSE_ACTION_TELNETIAC, /**< Telnet control code */
+
+ UNIX_CLI_PARSE_ACTION_PAGER_CRLF, /**< Enter pressed (CR, CRLF, LF, etc) */
+ UNIX_CLI_PARSE_ACTION_PAGER_QUIT, /**< Exit the pager session */
+ UNIX_CLI_PARSE_ACTION_PAGER_NEXT, /**< Scroll to next page */
+ UNIX_CLI_PARSE_ACTION_PAGER_DN, /**< Scroll to next line */
+ UNIX_CLI_PARSE_ACTION_PAGER_UP, /**< Scroll to previous line */
+ UNIX_CLI_PARSE_ACTION_PAGER_TOP, /**< Scroll to first line */
+ UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM, /**< Scroll to last line */
+ UNIX_CLI_PARSE_ACTION_PAGER_PGDN, /**< Scroll to next page */
+ UNIX_CLI_PARSE_ACTION_PAGER_PGUP, /**< Scroll to previous page */
+ UNIX_CLI_PARSE_ACTION_PAGER_REDRAW, /**< Clear and redraw the page on the terminal */
+ UNIX_CLI_PARSE_ACTION_PAGER_SEARCH, /**< Search the pager buffer */
+
+ UNIX_CLI_PARSE_ACTION_PARTIALMATCH, /**< Action parser found a partial match */
+ UNIX_CLI_PARSE_ACTION_NOMATCH /**< Action parser did not find any match */
+} unix_cli_parse_action_t;
+
+/** @brief Mapping of input buffer strings to action values.
+ * @note This won't work as a hash since we need to be able to do
+ * partial matches on the string.
+ */
+typedef struct
+{
+ u8 *input; /**< Input string to match. */
+ u32 len; /**< Length of input without final NUL. */
+ unix_cli_parse_action_t action; /**< Action to take when matched. */
+} unix_cli_parse_actions_t;
+
+/** @brief Given a capital ASCII letter character return a @c NUL terminated
+ * string with the control code for that letter.
+ *
+ * @param c An ASCII character.
+ * @return A @c NUL terminated string of type @c u8[].
+ *
+ * @par Example
+ * @c CTL('A') returns <code>{ 0x01, 0x00 }</code> as a @c u8[].
+ */
+#define CTL(c) (u8[]){ (c) - '@', 0 }
+
+#define _(a,b) { .input = (u8 *)(a), .len = sizeof(a) - 1, .action = (b) }
+/**
+ * Patterns to match on a CLI input stream.
+ * @showinitializer
+ */
+static unix_cli_parse_actions_t unix_cli_parse_strings[] = {
+ /* Line handling */
+ _("\r\n", UNIX_CLI_PARSE_ACTION_CRLF), /* Must be before '\r' */
+ _("\n", UNIX_CLI_PARSE_ACTION_CRLF),
+ _("\r\0", UNIX_CLI_PARSE_ACTION_CRLF), /* Telnet does this */
+ _("\r", UNIX_CLI_PARSE_ACTION_CRLF),
+
+ /* Unix shell control codes */
+ _(CTL ('B'), UNIX_CLI_PARSE_ACTION_LEFT),
+ _(CTL ('F'), UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(CTL ('P'), UNIX_CLI_PARSE_ACTION_UP),
+ _(CTL ('N'), UNIX_CLI_PARSE_ACTION_DOWN),
+ _(CTL ('A'), UNIX_CLI_PARSE_ACTION_HOME),
+ _(CTL ('E'), UNIX_CLI_PARSE_ACTION_END),
+ _(CTL ('D'), UNIX_CLI_PARSE_ACTION_ERASERIGHT),
+ _(CTL ('U'), UNIX_CLI_PARSE_ACTION_ERASELINELEFT),
+ _(CTL ('K'), UNIX_CLI_PARSE_ACTION_ERASELINERIGHT),
+ _(CTL ('Y'), UNIX_CLI_PARSE_ACTION_YANK),
+ _(CTL ('L'), UNIX_CLI_PARSE_ACTION_CLEAR),
+ _(ESC "b", UNIX_CLI_PARSE_ACTION_WORDLEFT), /* Alt-B */
+ _(ESC "f", UNIX_CLI_PARSE_ACTION_WORDRIGHT), /* Alt-F */
+ _("\b", UNIX_CLI_PARSE_ACTION_ERASE), /* ^H */
+ _("\x7f", UNIX_CLI_PARSE_ACTION_ERASE), /* Backspace */
+ _("\t", UNIX_CLI_PARSE_ACTION_TAB), /* ^I */
+
+ /* VT100 Normal mode - Broadest support */
+ _(CSI "A", UNIX_CLI_PARSE_ACTION_UP),
+ _(CSI "B", UNIX_CLI_PARSE_ACTION_DOWN),
+ _(CSI "C", UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(CSI "D", UNIX_CLI_PARSE_ACTION_LEFT),
+ _(CSI "H", UNIX_CLI_PARSE_ACTION_HOME),
+ _(CSI "F", UNIX_CLI_PARSE_ACTION_END),
+ _(CSI "3~", UNIX_CLI_PARSE_ACTION_ERASERIGHT), /* Delete */
+ _(CSI "1;5D", UNIX_CLI_PARSE_ACTION_WORDLEFT), /* C-Left */
+ _(CSI "1;5C", UNIX_CLI_PARSE_ACTION_WORDRIGHT), /* C-Right */
+
+ /* VT100 Application mode - Some Gnome Terminal functions use these */
+ _(ESC "OA", UNIX_CLI_PARSE_ACTION_UP),
+ _(ESC "OB", UNIX_CLI_PARSE_ACTION_DOWN),
+ _(ESC "OC", UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(ESC "OD", UNIX_CLI_PARSE_ACTION_LEFT),
+ _(ESC "OH", UNIX_CLI_PARSE_ACTION_HOME),
+ _(ESC "OF", UNIX_CLI_PARSE_ACTION_END),
+
+ /* ANSI X3.41-1974 - sent by Microsoft Telnet and PuTTY */
+ _(CSI "1~", UNIX_CLI_PARSE_ACTION_HOME),
+ _(CSI "4~", UNIX_CLI_PARSE_ACTION_END),
+
+ /* Emacs-ish history search */
+ _(CTL ('S'), UNIX_CLI_PARSE_ACTION_FWDSEARCH),
+ _(CTL ('R'), UNIX_CLI_PARSE_ACTION_REVSEARCH),
+
+ /* Other protocol things */
+ _("\xff", UNIX_CLI_PARSE_ACTION_TELNETIAC), /* IAC */
+ _("\0", UNIX_CLI_PARSE_ACTION_NOACTION), /* NUL */
+ _(NULL, UNIX_CLI_PARSE_ACTION_NOMATCH)
+};
+
+/**
+ * Patterns to match when a CLI session is in the pager.
+ * @showinitializer
+ */
+static unix_cli_parse_actions_t unix_cli_parse_pager[] = {
+ /* Line handling */
+ _("\r\n", UNIX_CLI_PARSE_ACTION_PAGER_CRLF), /* Must be before '\r' */
+ _("\n", UNIX_CLI_PARSE_ACTION_PAGER_CRLF),
+ _("\r\0", UNIX_CLI_PARSE_ACTION_PAGER_CRLF), /* Telnet does this */
+ _("\r", UNIX_CLI_PARSE_ACTION_PAGER_CRLF),
+
+ /* Pager commands */
+ _(" ", UNIX_CLI_PARSE_ACTION_PAGER_NEXT),
+ _("q", UNIX_CLI_PARSE_ACTION_PAGER_QUIT),
+ _(CTL ('L'), UNIX_CLI_PARSE_ACTION_PAGER_REDRAW),
+ _(CTL ('R'), UNIX_CLI_PARSE_ACTION_PAGER_REDRAW),
+ _("/", UNIX_CLI_PARSE_ACTION_PAGER_SEARCH),
+
+ /* VT100 */
+ _(CSI "A", UNIX_CLI_PARSE_ACTION_PAGER_UP),
+ _(CSI "B", UNIX_CLI_PARSE_ACTION_PAGER_DN),
+ _(CSI "H", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(CSI "F", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+
+ /* VT100 Application mode */
+ _(ESC "OA", UNIX_CLI_PARSE_ACTION_PAGER_UP),
+ _(ESC "OB", UNIX_CLI_PARSE_ACTION_PAGER_DN),
+ _(ESC "OH", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(ESC "OF", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+
+ /* ANSI X3.41-1974 */
+ _(CSI "1~", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(CSI "4~", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+ _(CSI "5~", UNIX_CLI_PARSE_ACTION_PAGER_PGUP),
+ _(CSI "6~", UNIX_CLI_PARSE_ACTION_PAGER_PGDN),
+
+ /* Other protocol things */
+ _("\xff", UNIX_CLI_PARSE_ACTION_TELNETIAC), /* IAC */
+ _("\0", UNIX_CLI_PARSE_ACTION_NOACTION), /* NUL */
+ _(NULL, UNIX_CLI_PARSE_ACTION_NOMATCH)
+};
+
+#undef _
+
+/** CLI session events. */
+typedef enum
+{
+ UNIX_CLI_PROCESS_EVENT_READ_READY, /**< A file descriptor has data to be read. */
+ UNIX_CLI_PROCESS_EVENT_QUIT, /**< A CLI session wants to close. */
+} unix_cli_process_event_type_t;
+
+/** CLI global state. */
+typedef struct
+{
+ /** Prompt string for CLI. */
+ u8 *cli_prompt;
+
+ /** Vec pool of CLI sessions. */
+ unix_cli_file_t *cli_file_pool;
+
+ /** Vec pool of unused session indices. */
+ u32 *unused_cli_process_node_indices;
+
+ /** The session index of the stdin cli */
+ u32 stdin_cli_file_index;
+
+ /** File pool index of current input. */
+ u32 current_input_file_index;
+} unix_cli_main_t;
+
+/** CLI global state */
+static unix_cli_main_t unix_cli_main;
+
+/**
+ * @brief Search for a byte sequence in the action list.
+ *
+ * Searches the @ref unix_cli_parse_actions_t list in @a a for a match with
+ * the bytes in @a input of maximum length @a ilen bytes.
+ * When a match is made @a *matched indicates how many bytes were matched.
+ * Returns a value from the enum @ref unix_cli_parse_action_t to indicate
+ * whether no match was found, a partial match was found or a complete
+ * match was found and what action, if any, should be taken.
+ *
+ * @param[in] a Actions list to search within.
+ * @param[in] input String fragment to search for.
+ * @param[in] ilen Length of the string in 'input'.
+ * @param[out] matched Pointer to an integer that will contain the number
+ * of bytes matched when a complete match is found.
+ *
+ * @return Action from @ref unix_cli_parse_action_t that the string fragment
+ * matches.
+ * @ref UNIX_CLI_PARSE_ACTION_PARTIALMATCH is returned when the
+ * whole input string matches the start of at least one action.
+ * @ref UNIX_CLI_PARSE_ACTION_NOMATCH is returned when there is no
+ * match at all.
+ */
+static unix_cli_parse_action_t
+unix_cli_match_action (unix_cli_parse_actions_t * a,
+ u8 * input, u32 ilen, i32 * matched)
+{
+ u8 partial = 0;
+
+ while (a->input)
+ {
+ if (ilen >= a->len)
+ {
+ /* see if the start of the input buffer exactly matches the current
+ * action string. */
+ if (memcmp (input, a->input, a->len) == 0)
+ {
+ *matched = a->len;
+ return a->action;
+ }
+ }
+ else
+ {
+ /* if the first ilen characters match, flag this as a partial -
+ * meaning keep collecting bytes in case of a future match */
+ if (memcmp (input, a->input, ilen) == 0)
+ partial = 1;
+ }
+
+ /* check next action */
+ a++;
+ }
+
+ return partial ?
+ UNIX_CLI_PARSE_ACTION_PARTIALMATCH : UNIX_CLI_PARSE_ACTION_NOMATCH;
+}
+
+
+/** Add bytes to the output vector and then flagg the I/O system that bytes
+ * are available to be sent.
+ */
+static void
+unix_cli_add_pending_output (clib_file_t * uf,
+ unix_cli_file_t * cf,
+ u8 * buffer, uword buffer_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_add (cf->output_vector, buffer, buffer_bytes);
+ if (vec_len (cf->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/** Delete all bytes from the output vector and flag the I/O system
+ * that no more bytes are available to be sent.
+ */
+static void
+unix_cli_del_pending_output (clib_file_t * uf,
+ unix_cli_file_t * cf, uword n_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_delete (cf->output_vector, n_bytes, 0);
+ if (vec_len (cf->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/** @brief A bit like strchr with a buffer length limit.
+ * Search a buffer for the first instance of a character up to the limit of
+ * the buffer length. If found then return the position of that character.
+ *
+ * The key departure from strchr is that if the character is not found then
+ * return the buffer length.
+ *
+ * @param chr The byte value to search for.
+ * @param str The buffer in which to search for the value.
+ * @param len The depth into the buffer to search.
+ *
+ * @return The index of the first occurence of \c chr. If \c chr is not
+ * found then \c len instead.
+ */
+always_inline word
+unix_vlib_findchr (u8 chr, u8 * str, word len)
+{
+ word i = 0;
+ for (i = 0; i < len; i++, str++)
+ {
+ if (*str == chr)
+ return i;
+ }
+ return len;
+}
+
+/** @brief Send a buffer to the CLI stream if possible, enqueue it otherwise.
+ * Attempts to write given buffer to the file descriptor of the given
+ * Unix CLI session. If that session already has data in the output buffer
+ * or if the write attempt tells us to try again later then the given buffer
+ * is appended to the pending output buffer instead.
+ *
+ * This is typically called only from \c unix_vlib_cli_output_cooked since
+ * that is where CRLF handling occurs or from places where we explicitly do
+ * not want cooked handling.
+ *
+ * @param cf Unix CLI session of the desired stream to write to.
+ * @param uf The Unix file structure of the desired stream to write to.
+ * @param buffer Pointer to the buffer that needs to be written.
+ * @param buffer_bytes The number of bytes from \c buffer to write.
+ */
+static void
+unix_vlib_cli_output_raw (unix_cli_file_t * cf,
+ clib_file_t * uf, u8 * buffer, uword buffer_bytes)
+{
+ int n = 0;
+
+ if (cf->has_epipe) /* don't try writing anything */
+ return;
+
+ if (vec_len (cf->output_vector) == 0)
+ {
+ if (cf->is_socket)
+ /* If it's a socket we use MSG_NOSIGNAL to prevent SIGPIPE */
+ n = send (uf->file_descriptor, buffer, buffer_bytes, MSG_NOSIGNAL);
+ else
+ n = write (uf->file_descriptor, buffer, buffer_bytes);
+ }
+
+ if (n < 0 && errno != EAGAIN)
+ {
+ if (errno == EPIPE)
+ {
+ /* connection closed on us */
+ unix_main_t *um = &unix_main;
+ cf->has_epipe = 1;
+ vlib_process_signal_event (um->vlib_main, cf->process_node_index,
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ uf->private_data);
+ }
+ else
+ {
+ clib_unix_warning ("write");
+ }
+ }
+ else if ((word) n < (word) buffer_bytes)
+ {
+ /* We got EAGAIN or we already have stuff in the buffer;
+ * queue up whatever didn't get sent for later. */
+ if (n < 0)
+ n = 0;
+ unix_cli_add_pending_output (uf, cf, buffer + n, buffer_bytes - n);
+ }
+}
+
+/** @brief Process a buffer for CRLF handling before outputting it to the CLI.
+ *
+ * @param cf Unix CLI session of the desired stream to write to.
+ * @param uf The Unix file structure of the desired stream to write to.
+ * @param buffer Pointer to the buffer that needs to be written.
+ * @param buffer_bytes The number of bytes from \c buffer to write.
+ */
+static void
+unix_vlib_cli_output_cooked (unix_cli_file_t * cf,
+ clib_file_t * uf,
+ u8 * buffer, uword buffer_bytes)
+{
+ word end = 0, start = 0;
+
+ while (end < buffer_bytes)
+ {
+ if (cf->crlf_mode)
+ {
+ /* iterate the line on \n's so we can insert a \r before it */
+ end = unix_vlib_findchr ('\n',
+ buffer + start,
+ buffer_bytes - start) + start;
+ }
+ else
+ {
+ /* otherwise just send the whole buffer */
+ end = buffer_bytes;
+ }
+
+ unix_vlib_cli_output_raw (cf, uf, buffer + start, end - start);
+
+ if (cf->crlf_mode)
+ {
+ if (end < buffer_bytes)
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\r\n", 2);
+ end++; /* skip the \n that we already sent */
+ }
+ start = end;
+ }
+ }
+}
+
+/** @brief Output the CLI prompt */
+static void
+unix_cli_cli_prompt (unix_cli_file_t * cf, clib_file_t * uf)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+
+ if (cf->is_interactive) /* Only interactive sessions get a prompt */
+ unix_vlib_cli_output_raw (cf, uf, cm->cli_prompt,
+ vec_len (cm->cli_prompt));
+}
+
+/** @brief Output a pager prompt and show number of buffered lines */
+static void
+unix_cli_pager_prompt (unix_cli_file_t * cf, clib_file_t * uf)
+{
+ u8 *prompt;
+ u32 h;
+
+ h = cf->pager_start + (cf->height - 1);
+ if (h > vec_len (cf->pager_index))
+ h = vec_len (cf->pager_index);
+
+ prompt = format (0, "\r%s-- more -- (%d-%d/%d)%s",
+ cf->ansi_capable ? ANSI_BOLD : "",
+ cf->pager_start + 1,
+ h,
+ vec_len (cf->pager_index),
+ cf->ansi_capable ? ANSI_RESET : "");
+
+ unix_vlib_cli_output_cooked (cf, uf, prompt, vec_len (prompt));
+
+ vec_free (prompt);
+}
+
+/** @brief Output a pager "skipping" message */
+static void
+unix_cli_pager_message (unix_cli_file_t * cf, clib_file_t * uf,
+ char *message, char *postfix)
+{
+ u8 *prompt;
+
+ prompt = format (0, "\r%s-- %s --%s%s",
+ cf->ansi_capable ? ANSI_BOLD : "",
+ message, cf->ansi_capable ? ANSI_RESET : "", postfix);
+
+ unix_vlib_cli_output_cooked (cf, uf, prompt, vec_len (prompt));
+
+ vec_free (prompt);
+}
+
+/** @brief Erase the printed pager prompt */
+static void
+unix_cli_pager_prompt_erase (unix_cli_file_t * cf, clib_file_t * uf)
+{
+ if (cf->ansi_capable)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEARLINE,
+ sizeof (ANSI_CLEARLINE) - 1);
+ }
+ else
+ {
+ int i;
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ for (i = 0; i < cf->width - 1; i++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ }
+}
+
+/** @brief Uses an ANSI escape sequence to move the cursor */
+static void
+unix_cli_ansi_cursor (unix_cli_file_t * cf, clib_file_t * uf, u16 x, u16 y)
+{
+ u8 *str;
+
+ str = format (0, "%s%d;%dH", CSI, y, x);
+
+ unix_vlib_cli_output_cooked (cf, uf, str, vec_len (str));
+
+ vec_free (str);
+}
+
+/** Redraw the currently displayed page of text.
+ * @param cf CLI session to redraw the pager buffer of.
+ * @param uf Unix file of the CLI session.
+ */
+static void
+unix_cli_pager_redraw (unix_cli_file_t * cf, clib_file_t * uf)
+{
+ unix_cli_pager_index_t *pi = NULL;
+ u8 *line = NULL;
+ word i;
+
+ /* No active pager? Do nothing. */
+ if (!vec_len (cf->pager_index))
+ return;
+
+ if (cf->ansi_capable)
+ {
+ /* If we have ANSI, send the clear screen sequence */
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEAR,
+ sizeof (ANSI_CLEAR) - 1);
+ }
+ else
+ {
+ /* Otherwise make sure we're on a blank line */
+ unix_cli_pager_prompt_erase (cf, uf);
+ }
+
+ /* (Re-)send the current page of content */
+ for (i = 0; i < cf->height - 1 &&
+ i + cf->pager_start < vec_len (cf->pager_index); i++)
+ {
+ pi = &cf->pager_index[cf->pager_start + i];
+ line = cf->pager_vector[pi->line] + pi->offset;
+
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ unix_cli_pager_prompt (cf, uf);
+}
+
+/** @brief Process and add a line to the pager index.
+ * In normal operation this function will take the given character string
+ * found in @c line and with length @c len_or_index and iterates the over the
+ * contents, adding each line of text discovered within it to the
+ * pager index. Lines are identified by newlines ("<code>\\n</code>") and by
+ * strings longer than the width of the terminal.
+ *
+ * If instead @c line is @c NULL then @c len_or_index is taken to mean the
+ * index of an existing line in the pager buffer; this simply means that the
+ * input line does not need to be cloned since we alreayd have it. This is
+ * typical if we are reindexing the pager buffer.
+ *
+ * @param cf The CLI session whose pager we are adding to.
+ * @param line The string of text to be indexed into the pager buffer.
+ * If @c line is @c NULL then the mode of operation
+ * changes slightly; see the description above.
+ * @param len_or_index If @c line is a pointer to a string then this parameter
+ * indicates the length of that string; Otherwise this
+ * value provides the index in the pager buffer of an
+ * existing string to be indexed.
+ */
+static void
+unix_cli_pager_add_line (unix_cli_file_t * cf, u8 * line, word len_or_index)
+{
+ u8 *p;
+ word i, j, k;
+ word line_index, len;
+ u32 width = cf->width;
+ unix_cli_pager_index_t *pi;
+
+ if (line == NULL)
+ {
+ /* Use a line already in the pager buffer */
+ line_index = len_or_index;
+ p = cf->pager_vector[line_index];
+ len = vec_len (p);
+ }
+ else
+ {
+ len = len_or_index;
+ /* Add a copy of the raw string to the pager buffer */
+ p = vec_new (u8, len);
+ clib_memcpy (p, line, len);
+
+ /* store in pager buffer */
+ line_index = vec_len (cf->pager_vector);
+ vec_add1 (cf->pager_vector, p);
+ }
+
+ i = 0;
+ while (i < len)
+ {
+ /* Find the next line, or run to terminal width, or run to EOL */
+ int l = len - i;
+ j = unix_vlib_findchr ((u8) '\n', p, l < width ? l : width);
+
+ if (j < l && p[j] == '\n') /* incl \n */
+ j++;
+
+ /* Add the line to the index */
+ k = vec_len (cf->pager_index);
+ vec_validate (cf->pager_index, k);
+ pi = &cf->pager_index[k];
+
+ pi->line = line_index;
+ pi->offset = i;
+ pi->length = j;
+
+ i += j;
+ p += j;
+ }
+}
+
+/** @brief Reindex entire pager buffer.
+ * Resets the current pager index and then re-adds the lines in the pager
+ * buffer to the index.
+ *
+ * Additionally this function attempts to retain the current page start
+ * line offset by searching for the same top-of-screen line in the new index.
+ *
+ * @param cf The CLI session whose pager buffer should be reindexed.
+ */
+static void
+unix_cli_pager_reindex (unix_cli_file_t * cf)
+{
+ word i, old_line, old_offset;
+ unix_cli_pager_index_t *pi;
+
+ /* If there is nothing in the pager buffer then make sure the index
+ * is empty and move on.
+ */
+ if (cf->pager_vector == 0)
+ {
+ vec_reset_length (cf->pager_index);
+ return;
+ }
+
+ /* Retain a pointer to the current page start line so we can
+ * find it later
+ */
+ pi = &cf->pager_index[cf->pager_start];
+ old_line = pi->line;
+ old_offset = pi->offset;
+
+ /* Re-add the buffered lines to the index */
+ vec_reset_length (cf->pager_index);
+ vec_foreach_index (i, cf->pager_vector)
+ {
+ unix_cli_pager_add_line (cf, NULL, i);
+ }
+
+ /* Attempt to re-locate the previously stored page start line */
+ vec_foreach_index (i, cf->pager_index)
+ {
+ pi = &cf->pager_index[i];
+
+ if (pi->line == old_line &&
+ (pi->offset <= old_offset || pi->offset + pi->length > old_offset))
+ {
+ /* Found it! */
+ cf->pager_start = i;
+ break;
+ }
+ }
+
+ /* In case the start line was not found (rare), ensure the pager start
+ * index is within bounds
+ */
+ if (cf->pager_start >= vec_len (cf->pager_index))
+ {
+ if (!cf->height || vec_len (cf->pager_index) < (cf->height - 1))
+ cf->pager_start = 0;
+ else
+ cf->pager_start = vec_len (cf->pager_index) - (cf->height - 1);
+ }
+}
+
+/** VLIB CLI output function.
+ *
+ * If the terminal has a pager configured then this function takes care
+ * of collating output into the pager buffer; ensuring only the first page
+ * is displayed and any lines in excess of the first page are buffered.
+ *
+ * If the maximum number of index lines in the buffer is exceeded then the
+ * pager is cancelled and the contents of the current buffer are sent to the
+ * terminal.
+ *
+ * If there is no pager configured then the output is sent directly to the
+ * terminal.
+ *
+ * @param cli_file_index Index of the CLI session where this output is
+ * directed.
+ * @param buffer String of printabe bytes to be output.
+ * @param buffer_bytes The number of bytes in @c buffer to be output.
+ */
+static void
+unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ clib_file_t *uf;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+
+ if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, buffer, buffer_bytes);
+ }
+ else
+ {
+ word row = vec_len (cf->pager_index);
+ u8 *line;
+ unix_cli_pager_index_t *pi;
+
+ /* Index and add the output lines to the pager buffer. */
+ unix_cli_pager_add_line (cf, buffer, buffer_bytes);
+
+ /* Now iterate what was added to display the lines.
+ * If we reach the bottom of the page, display a prompt.
+ */
+ while (row < vec_len (cf->pager_index))
+ {
+ if (row < cf->height - 1)
+ {
+ /* output this line */
+ pi = &cf->pager_index[row];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+
+ /* if the last line didn't end in newline, and we're at the
+ * bottom of the page, add a newline */
+ if (line[pi->length - 1] != '\n' && row == cf->height - 2)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ }
+ else
+ {
+ /* Display the pager prompt every 10 lines */
+ if (!(row % 10))
+ unix_cli_pager_prompt (cf, uf);
+ }
+ row++;
+ }
+
+ /* Check if we went over the pager buffer limit */
+ if (vec_len (cf->pager_index) > um->cli_pager_buffer_limit)
+ {
+ /* Stop using the pager for the remainder of this CLI command */
+ cf->no_pager = 2;
+
+ /* If we likely printed the prompt, erase it */
+ if (vec_len (cf->pager_index) > cf->height - 1)
+ unix_cli_pager_prompt_erase (cf, uf);
+
+ /* Dump out the contents of the buffer */
+ for (row = cf->pager_start + (cf->height - 1);
+ row < vec_len (cf->pager_index); row++)
+ {
+ pi = &cf->pager_index[row];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+
+ unix_cli_pager_reset (cf);
+ }
+ }
+}
+
+/** Identify whether a terminal type is ANSI capable.
+ *
+ * Compares the string given in @c term with a list of terminal types known
+ * to support ANSI escape sequences.
+ *
+ * This list contains, for example, @c xterm, @c screen and @c ansi.
+ *
+ * @param term A string with a terminal type in it.
+ * @param len The length of the string in @c term.
+ *
+ * @return @c 1 if the terminal type is recognized as supporting ANSI
+ * terminal sequences; @c 0 otherwise.
+ */
+static u8
+unix_cli_terminal_type_ansi (u8 * term, uword len)
+{
+ /* This may later be better done as a hash of some sort. */
+#define _(a) do { \
+ if (strncasecmp(a, (char *)term, (size_t)len) == 0) return 1; \
+ } while(0)
+
+ _("xterm");
+ _("xterm-color");
+ _("xterm-256color"); /* iTerm on Mac */
+ _("screen");
+ _("screen-256color"); /* Screen and tmux */
+ _("ansi"); /* Microsoft Telnet */
+#undef _
+
+ return 0;
+}
+
+/** Identify whether a terminal type is non-interactive.
+ *
+ * Compares the string given in @c term with a list of terminal types known
+ * to be non-interactive, as send by tools such as @c vppctl .
+ *
+ * This list contains, for example, @c vppctl.
+ *
+ * @param term A string with a terminal type in it.
+ * @param len The length of the string in @c term.
+ *
+ * @return @c 1 if the terminal type is recognized as being non-interactive;
+ * @c 0 otherwise.
+ */
+static u8
+unix_cli_terminal_type_noninteractive (u8 * term, uword len)
+{
+ /* This may later be better done as a hash of some sort. */
+#define _(a) do { \
+ if (strncasecmp(a, (char *)term, (size_t)len) == 0) return 1; \
+ } while(0)
+
+ _("vppctl");
+#undef _
+
+ return 0;
+}
+
+/** Set a session to be non-interactive. */
+static void
+unix_cli_set_session_noninteractive (unix_cli_file_t * cf)
+{
+ /* Non-interactive sessions don't get these */
+ cf->is_interactive = 0;
+ cf->no_pager = 1;
+ cf->history_limit = 0;
+ cf->has_history = 0;
+ cf->line_mode = 1;
+}
+
+/** @brief Emit initial welcome banner and prompt on a connection. */
+static void
+unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ unix_cli_banner_t *banner;
+ int i, len;
+
+ /* Mark the session as started if we get here */
+ cf->started = 1;
+
+ if (!(cf->is_interactive)) /* No banner for non-interactive sessions */
+ return;
+
+ /*
+ * Put the first bytes directly into the buffer so that further output is
+ * queued until everything is ready. (oterwise initial prompt can appear
+ * mid way through VPP initialization)
+ */
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r", 1);
+
+ if (!um->cli_no_banner)
+ {
+ if (cf->ansi_capable)
+ {
+ banner = unix_cli_banner_color;
+ len = ARRAY_LEN (unix_cli_banner_color);
+ }
+ else
+ {
+ banner = unix_cli_banner;
+ len = ARRAY_LEN (unix_cli_banner);
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ unix_vlib_cli_output_cooked (cf, uf,
+ banner[i].line, banner[i].length);
+ }
+ }
+
+ /* Prompt. */
+ unix_cli_cli_prompt (cf, uf);
+
+}
+
+/** @brief A failsafe triggered on a timer to ensure we send the prompt
+ * to telnet sessions that fail to negotiate the terminal type. */
+static void
+unix_cli_file_welcome_timer (any arg, f64 delay)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ (void) delay;
+
+ /* Check the connection didn't close already */
+ if (pool_is_free_index (cm->cli_file_pool, (uword) arg))
+ return;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, (uword) arg);
+
+ if (!cf->started)
+ unix_cli_file_welcome (cm, cf);
+}
+
+/** @brief A mostly no-op Telnet state machine.
+ * Process Telnet command bytes in a way that ensures we're mostly
+ * transparent to the Telnet protocol. That is, it's mostly a no-op.
+ *
+ * @return -1 if we need more bytes, otherwise a positive integer number of
+ * bytes to consume from the input_vector, not including the initial
+ * IAC byte.
+ */
+static i32
+unix_cli_process_telnet (unix_main_t * um,
+ unix_cli_file_t * cf,
+ clib_file_t * uf, u8 * input_vector, uword len)
+{
+ /* Input_vector starts at IAC byte.
+ * See if we have a complete message; if not, return -1 so we wait for more.
+ * if we have a complete message, consume those bytes from the vector.
+ */
+ i32 consume = 0;
+
+ if (len == 1)
+ return -1; /* want more bytes */
+
+ switch (input_vector[1])
+ {
+ case IAC:
+ /* two IAC's in a row means to pass through 0xff.
+ * since that makes no sense here, just consume it.
+ */
+ consume = 1;
+ break;
+
+ case WILL:
+ case WONT:
+ case DO:
+ case DONT:
+ /* Expect 3 bytes */
+ if (vec_len (input_vector) < 3)
+ return -1; /* want more bytes */
+
+ consume = 2;
+ break;
+
+ case SB:
+ {
+ /* Sub option - search ahead for IAC SE to end it */
+ i32 i;
+ for (i = 3; i < len && i < UNIX_CLI_MAX_DEPTH_TELNET; i++)
+ {
+ if (input_vector[i - 1] == IAC && input_vector[i] == SE)
+ {
+ /* We have a complete message; see if we care about it */
+ switch (input_vector[2])
+ {
+ case TELOPT_TTYPE:
+ if (input_vector[3] != 0)
+ break;
+ {
+ /* See if the the terminal type is recognized */
+ u8 *term = input_vector + 4;
+ uword len = i - 5;
+
+ /* See if the terminal type is ANSI capable */
+ cf->ansi_capable =
+ unix_cli_terminal_type_ansi (term, len);
+
+ /* See if the terminal type indicates non-interactive */
+ if (unix_cli_terminal_type_noninteractive (term, len))
+ unix_cli_set_session_noninteractive (cf);
+ }
+
+ /* If session not started, we can release the pause */
+ if (!cf->started)
+ /* Send the welcome banner and initial prompt */
+ unix_cli_file_welcome (&unix_cli_main, cf);
+ break;
+
+ case TELOPT_NAWS:
+ /* Window size */
+ if (i != 8) /* check message is correct size */
+ break;
+
+ cf->width =
+ clib_net_to_host_u16 (*((u16 *) (input_vector + 3)));
+ if (cf->width > UNIX_CLI_MAX_TERMINAL_WIDTH)
+ cf->width = UNIX_CLI_MAX_TERMINAL_WIDTH;
+ if (cf->width < UNIX_CLI_MIN_TERMINAL_WIDTH)
+ cf->width = UNIX_CLI_MIN_TERMINAL_WIDTH;
+
+ cf->height =
+ clib_net_to_host_u16 (*((u16 *) (input_vector + 5)));
+ if (cf->height > UNIX_CLI_MAX_TERMINAL_HEIGHT)
+ cf->height = UNIX_CLI_MAX_TERMINAL_HEIGHT;
+ if (cf->height < UNIX_CLI_MIN_TERMINAL_HEIGHT)
+ cf->height = UNIX_CLI_MIN_TERMINAL_HEIGHT;
+
+ /* reindex pager buffer */
+ unix_cli_pager_reindex (cf);
+ /* redraw page */
+ unix_cli_pager_redraw (cf, uf);
+ break;
+
+ default:
+ break;
+ }
+ /* Consume it all */
+ consume = i;
+ break;
+ }
+ }
+
+ if (i == UNIX_CLI_MAX_DEPTH_TELNET)
+ consume = 1; /* hit max search depth, advance one byte */
+
+ if (consume == 0)
+ return -1; /* want more bytes */
+
+ break;
+ }
+
+ case GA:
+ case EL:
+ case EC:
+ case AO:
+ case IP:
+ case BREAK:
+ case DM:
+ case NOP:
+ case SE:
+ case EOR:
+ case ABORT:
+ case SUSP:
+ case xEOF:
+ /* Simple one-byte messages */
+ consume = 1;
+ break;
+
+ case AYT:
+ /* Are You There - trigger a visible response */
+ consume = 1;
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "fd.io VPP\n", 10);
+ break;
+
+ default:
+ /* Unknown command! Eat the IAC byte */
+ break;
+ }
+
+ return consume;
+}
+
+/** @brief Process actionable input.
+ * Based on the \c action process the input; this typically involves
+ * searching the command history or editing the current command line.
+ */
+static int
+unix_cli_line_process_one (unix_cli_main_t * cm,
+ unix_main_t * um,
+ unix_cli_file_t * cf,
+ clib_file_t * uf,
+ u8 input, unix_cli_parse_action_t action)
+{
+ u8 *prev;
+ u8 *save = 0;
+ u8 **possible_commands;
+ int j, delta;
+
+ switch (action)
+ {
+ case UNIX_CLI_PARSE_ACTION_NOACTION:
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_REVSEARCH:
+ case UNIX_CLI_PARSE_ACTION_FWDSEARCH:
+ if (!cf->has_history || !cf->history_limit)
+ break;
+ if (cf->search_mode == 0)
+ {
+ /* Erase the current command (if any) */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ if (action == UNIX_CLI_PARSE_ACTION_REVSEARCH)
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+ cf->cursor = 0;
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_REVSEARCH)
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+
+ cf->excursion += cf->search_mode;
+ goto search_again;
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASELINELEFT:
+ /* Erase the command from the cursor to the start */
+
+ /* Shimmy forwards to the new end of line position */
+ delta = vec_len (cf->current_command) - cf->cursor;
+ for (j = cf->cursor; j > delta; j--)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ /* Zap from here to the end of what is currently displayed */
+ for (; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ /* Get back to the start of the line */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ j = vec_len (cf->current_command) - cf->cursor;
+ memmove (cf->current_command, cf->current_command + cf->cursor, j);
+ _vec_len (cf->current_command) = j;
+
+ /* Print the new contents */
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command, j);
+ /* Shimmy back to the start */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor = 0;
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASELINERIGHT:
+ /* Erase the command from the cursor to the end */
+
+ /* Zap from cursor to end of what is currently displayed */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ /* Get back to where we were */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ /* Truncate the line at the cursor */
+ _vec_len (cf->current_command) = cf->cursor;
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_LEFT:
+ if (cf->cursor > 0)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor--;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_RIGHT:
+ if (cf->cursor < vec_len (cf->current_command))
+ {
+ /* have to emit the character under the cursor */
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor, 1);
+ cf->cursor++;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_UP:
+ case UNIX_CLI_PARSE_ACTION_DOWN:
+ if (!cf->has_history || !cf->history_limit)
+ break;
+ cf->search_mode = 0;
+ /* Erase the command */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ if (vec_len (cf->command_history))
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_UP)
+ delta = -1;
+ else
+ delta = 1;
+
+ cf->excursion += delta;
+
+ if (cf->excursion == vec_len (cf->command_history))
+ {
+ /* down-arrowed to last entry - want a blank line */
+ _vec_len (cf->current_command) = 0;
+ }
+ else if (cf->excursion < 0)
+ {
+ /* up-arrowed over the start to the end, want a blank line */
+ cf->excursion = vec_len (cf->command_history);
+ _vec_len (cf->current_command) = 0;
+ }
+ else
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) - 1)
+ /* down-arrowed past end - wrap to start */
+ cf->excursion = 0;
+
+ /* Print the command at the current position */
+ prev = cf->command_history[cf->excursion];
+ vec_validate (cf->current_command, vec_len (prev) - 1);
+
+ clib_memcpy (cf->current_command, prev, vec_len (prev));
+ _vec_len (cf->current_command) = vec_len (prev);
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
+ vec_len (cf->current_command));
+ }
+ }
+ cf->cursor = vec_len (cf->current_command);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_HOME:
+ if (vec_len (cf->current_command) && cf->cursor > 0)
+ {
+ while (cf->cursor)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor--;
+ }
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_END:
+ if (vec_len (cf->current_command) &&
+ cf->cursor < vec_len (cf->current_command))
+ {
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ vec_len (cf->current_command) -
+ cf->cursor);
+ cf->cursor = vec_len (cf->current_command);
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_WORDLEFT:
+ if (vec_len (cf->current_command) && cf->cursor > 0)
+ {
+ j = cf->cursor;
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+
+ while (j && isspace (cf->current_command[j]))
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+ }
+ while (j && !isspace (cf->current_command[j]))
+ {
+ if (isspace (cf->current_command[j - 1]))
+ break;
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+ }
+
+ cf->cursor = j;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_WORDRIGHT:
+ if (vec_len (cf->current_command) &&
+ cf->cursor < vec_len (cf->current_command))
+ {
+ int e = vec_len (cf->current_command);
+ j = cf->cursor;
+ while (j < e && !isspace (cf->current_command[j]))
+ j++;
+ while (j < e && isspace (cf->current_command[j]))
+ j++;
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j - cf->cursor);
+ cf->cursor = j;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_ERASE:
+ if (vec_len (cf->current_command))
+ {
+ if (cf->cursor == vec_len (cf->current_command))
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+ _vec_len (cf->current_command)--;
+ cf->cursor--;
+ }
+ else if (cf->cursor > 0)
+ {
+ /* shift everything at & to the right of the cursor left by 1 */
+ j = vec_len (cf->current_command) - cf->cursor;
+ memmove (cf->current_command + cf->cursor - 1,
+ cf->current_command + cf->cursor, j);
+ _vec_len (cf->current_command)--;
+ cf->cursor--;
+ /* redraw the rest of the line */
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " \b\b", 3);
+ /* and shift the terminal cursor back where it should be */
+ while (--j)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ }
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASERIGHT:
+ if (vec_len (cf->current_command))
+ {
+ if (cf->cursor < vec_len (cf->current_command))
+ {
+ /* shift everything to the right of the cursor left by 1 */
+ j = vec_len (cf->current_command) - cf->cursor - 1;
+ memmove (cf->current_command + cf->cursor,
+ cf->current_command + cf->cursor + 1, j);
+ _vec_len (cf->current_command)--;
+ /* redraw the rest of the line */
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " \b", 2);
+ /* and shift the terminal cursor back where it should be */
+ if (j)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ while (--j)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ }
+ }
+ }
+ else if (input == 'D' - '@')
+ {
+ /* ^D with no command entered = quit */
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "quit\n", 5);
+ vlib_process_signal_event (um->vlib_main,
+ vlib_current_process (um->vlib_main),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cf - cm->cli_file_pool);
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_CLEAR:
+ /* If we're in ANSI mode, clear the screen.
+ * Then redraw the prompt and any existing command input, then put
+ * the cursor back where it was in that line.
+ */
+ if (cf->ansi_capable)
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEAR,
+ sizeof (ANSI_CLEAR) - 1);
+ else
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ unix_vlib_cli_output_raw (cf, uf,
+ cm->cli_prompt, vec_len (cm->cli_prompt));
+ unix_vlib_cli_output_raw (cf, uf,
+ cf->current_command,
+ vec_len (cf->current_command));
+ for (j = cf->cursor; j < vec_len (cf->current_command); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_TAB:
+ if (cf->cursor < vec_len (cf->current_command))
+ {
+ /* if we are in the middle of a line, complete only if
+ * the cursor points to whitespace */
+ if (isspace (cf->current_command[cf->cursor]))
+ {
+ /* save and clear any input that is after the cursor */
+ vec_resize (save, vec_len (cf->current_command) - cf->cursor);
+ clib_memcpy (save, cf->current_command + cf->cursor,
+ vec_len (cf->current_command) - cf->cursor);
+ _vec_len (cf->current_command) = cf->cursor;
+ }
+ else
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\a", 1);
+ break;
+ }
+ }
+ possible_commands =
+ vlib_cli_get_possible_completions (cf->current_command);
+ if (vec_len (possible_commands) == 1)
+ {
+ u32 j = cf->cursor;
+ u8 *completed = possible_commands[0];
+
+ /* find the last word of current_command */
+ while (j >= 1 && !isspace (cf->current_command[j - 1]))
+ {
+ j--;
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\b", 1);
+ }
+ _vec_len (cf->current_command) = j;
+
+ /* replace it with the newly expanded command */
+ vec_append (cf->current_command, completed);
+
+ /* echo to the terminal */
+ unix_vlib_cli_output_raw (cf, uf, completed, vec_len (completed));
+
+ /* add one trailing space if needed */
+ if (vec_len (save) == 0)
+ {
+ vec_add1 (cf->current_command, ' ');
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) " ", 1);
+ }
+
+ cf->cursor = vec_len (cf->current_command);
+
+ }
+ else if (vec_len (possible_commands) >= 2)
+ {
+ u8 **possible_command;
+ uword max_command_len = 0, min_command_len = ~0;
+ u32 i, j;
+
+ vec_foreach (possible_command, possible_commands)
+ {
+ if (vec_len (*possible_command) > max_command_len)
+ {
+ max_command_len = vec_len (*possible_command);
+ }
+ if (vec_len (*possible_command) < min_command_len)
+ {
+ min_command_len = vec_len (*possible_command);
+ }
+ }
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ i = 0;
+ vec_foreach (possible_command, possible_commands)
+ {
+ if (i + max_command_len >= cf->width)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ i = 0;
+ }
+ unix_vlib_cli_output_raw (cf, uf, *possible_command,
+ vec_len (*possible_command));
+ for (j = vec_len (*possible_command); j < max_command_len + 2;
+ j++)
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) " ", 1);
+ }
+ i += max_command_len + 2;
+ }
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ /* rewrite prompt */
+ unix_cli_cli_prompt (cf, uf);
+ unix_vlib_cli_output_raw (cf, uf, cf->current_command,
+ vec_len (cf->current_command));
+
+ /* count length of last word */
+ j = cf->cursor;
+ i = 0;
+ while (j >= 1 && !isspace (cf->current_command[j - 1]))
+ {
+ j--;
+ i++;
+ }
+
+ /* determine smallest common command */
+ for (; i < min_command_len; i++)
+ {
+ u8 common = '\0';
+ int stop = 0;
+ vec_foreach (possible_command, possible_commands)
+ {
+ if (common == '\0')
+ {
+ common = (*possible_command)[i];
+ }
+ else if (common != (*possible_command)[i])
+ {
+ stop = 1;
+ break;
+ }
+ }
+ if (!stop)
+ {
+ vec_add1 (cf->current_command, common);
+ cf->cursor++;
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) & common, 1);
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ else
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\a", 1);
+ }
+
+ if (vec_len (save) > 0)
+ {
+ /* restore remaining input if tab was hit in the middle of a line */
+ unix_vlib_cli_output_raw (cf, uf, save, vec_len (save));
+ for (j = 0; j < vec_len (save); j++)
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\b", 1);
+ }
+ vec_append (cf->current_command, save);
+ vec_free (save);
+ }
+ vec_free (possible_commands);
+
+ break;
+ case UNIX_CLI_PARSE_ACTION_YANK:
+ /* TODO */
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_QUIT:
+ pager_quit:
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_reset (cf);
+ unix_cli_cli_prompt (cf, uf);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_NEXT:
+ case UNIX_CLI_PARSE_ACTION_PAGER_PGDN:
+ /* show next page of the buffer */
+ if (cf->height + cf->pager_start < vec_len (cf->pager_index))
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = m;
+ j < vec_len (cf->pager_index) && cf->pager_start < m;
+ j++, cf->pager_start++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_PAGER_NEXT)
+ /* no more in buffer, exit, but only if it was <space> */
+ goto pager_quit;
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_DN:
+ case UNIX_CLI_PARSE_ACTION_PAGER_CRLF:
+ /* display the next line of the buffer */
+ if (cf->pager_start < vec_len (cf->pager_index) - (cf->height - 1))
+ {
+ u8 *line;
+ unix_cli_pager_index_t *pi;
+
+ unix_cli_pager_prompt_erase (cf, uf);
+ pi = &cf->pager_index[cf->pager_start + (cf->height - 1)];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ cf->pager_start++;
+ /* if the last line didn't end in newline, add a newline */
+ if (line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_PAGER_CRLF)
+ /* no more in buffer, exit, but only if it was <enter> */
+ goto pager_quit;
+ }
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_UP:
+ /* scroll the page back one line */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start--;
+ if (cf->ansi_capable)
+ {
+ pi = &cf->pager_index[cf->pager_start];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_SCROLLDN,
+ sizeof (ANSI_SCROLLDN) - 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_SAVECURSOR,
+ sizeof (ANSI_SAVECURSOR) - 1);
+ unix_cli_ansi_cursor (cf, uf, 1, 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_CLEARLINE,
+ sizeof (ANSI_CLEARLINE) - 1);
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_RESTCURSOR,
+ sizeof (ANSI_RESTCURSOR) - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start;
+ j < vec_len (cf->pager_index) && j < m; j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_TOP:
+ /* back to the first page of the buffer */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start = 0;
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start; j < vec_len (cf->pager_index) && j < m;
+ j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM:
+ /* skip to the last page of the buffer */
+ if (cf->pager_start < vec_len (cf->pager_index) - (cf->height - 1))
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start = vec_len (cf->pager_index) - (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_message (cf, uf, "skipping", "\n");
+ for (j = cf->pager_start; j < vec_len (cf->pager_index); j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_PGUP:
+ /* wander back one page in the buffer */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+ int m;
+
+ if (cf->pager_start >= cf->height)
+ cf->pager_start -= cf->height - 1;
+ else
+ cf->pager_start = 0;
+ m = cf->pager_start + cf->height - 1;
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start; j < vec_len (cf->pager_index) && j < m;
+ j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_REDRAW:
+ /* Redraw the current pager screen */
+ unix_cli_pager_redraw (cf, uf);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_SEARCH:
+ /* search forwards in the buffer */
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_CRLF:
+ crlf:
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ if (cf->has_history && cf->history_limit)
+ {
+ if (cf->command_history
+ && vec_len (cf->command_history) >= cf->history_limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ /* Don't add blank lines to the cmd history */
+ if (vec_len (cf->current_command))
+ {
+ /* Don't duplicate the previous command */
+ j = vec_len (cf->command_history);
+ if (j == 0 ||
+ (vec_len (cf->current_command) !=
+ vec_len (cf->command_history[j - 1])
+ || memcmp (cf->current_command, cf->command_history[j - 1],
+ vec_len (cf->current_command)) != 0))
+ {
+ /* copy the command to the history */
+ u8 *c = 0;
+ vec_append (c, cf->current_command);
+ vec_add1 (cf->command_history, c);
+ cf->command_number++;
+ }
+ }
+ cf->excursion = vec_len (cf->command_history);
+ }
+
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ cf->cursor = 0;
+
+ return 0;
+
+ case UNIX_CLI_PARSE_ACTION_PARTIALMATCH:
+ case UNIX_CLI_PARSE_ACTION_NOMATCH:
+ if (vec_len (cf->pager_index))
+ {
+ /* no-op for now */
+ }
+ else if (cf->has_history && cf->search_mode && isprint (input))
+ {
+ int k, limit, offset;
+ u8 *item;
+
+ vec_add1 (cf->search_key, input);
+
+ search_again:
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) - 1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) - 1;
+
+ item = cf->command_history[cf->excursion];
+
+ limit = (vec_len (cf->search_key) > vec_len (item)) ?
+ vec_len (item) : vec_len (cf->search_key);
+
+ for (offset = 0; offset <= vec_len (item) - limit; offset++)
+ {
+ for (k = 0; k < limit; k++)
+ {
+ if (item[k + offset] != cf->search_key[k])
+ goto next_offset;
+ }
+ goto found_at_offset;
+
+ next_offset:
+ ;
+ }
+ goto next;
+
+ found_at_offset:
+ for (j = 0; j < vec_len (cf->current_command); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+
+ vec_validate (cf->current_command, vec_len (item) - 1);
+ clib_memcpy (cf->current_command, item, vec_len (item));
+ _vec_len (cf->current_command) = vec_len (item);
+
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
+ vec_len (cf->current_command));
+ cf->cursor = vec_len (cf->current_command);
+ goto found;
+
+ next:
+ cf->excursion += cf->search_mode;
+ }
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\nNo match...", 12);
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ cf->cursor = 0;
+ goto crlf;
+ }
+ else if (isprint (input)) /* skip any errant control codes */
+ {
+ if (cf->cursor == vec_len (cf->current_command))
+ {
+ /* Append to end */
+ vec_add1 (cf->current_command, input);
+ cf->cursor++;
+
+ /* Echo the character back to the client */
+ unix_vlib_cli_output_raw (cf, uf, &input, 1);
+ }
+ else
+ {
+ /* Insert at cursor: resize +1 byte, move everything over */
+ j = vec_len (cf->current_command) - cf->cursor;
+ vec_add1 (cf->current_command, (u8) 'A');
+ memmove (cf->current_command + cf->cursor + 1,
+ cf->current_command + cf->cursor, j);
+ cf->current_command[cf->cursor] = input;
+ /* Redraw the line */
+ j++;
+ unix_vlib_cli_output_raw (cf, uf,
+ cf->current_command + cf->cursor, j);
+ /* Put terminal cursor back */
+ while (--j)
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\b", 1);
+ cf->cursor++;
+ }
+ }
+ else
+ {
+ /* no-op - not printable or otherwise not actionable */
+ }
+
+ found:
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_TELNETIAC:
+ break;
+ }
+ return 1;
+}
+
+/** @brief Process input bytes on a stream to provide line editing and
+ * command history in the CLI. */
+static int
+unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um,
+ clib_file_main_t * fm, unix_cli_file_t * cf)
+{
+ clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ int i;
+
+ for (i = 0; i < vec_len (cf->input_vector); i++)
+ {
+ unix_cli_parse_action_t action;
+ i32 matched = 0;
+ unix_cli_parse_actions_t *a;
+
+ /* If we're in the pager mode, search the pager actions */
+ a =
+ vec_len (cf->pager_index) ? unix_cli_parse_pager :
+ unix_cli_parse_strings;
+
+ /* See if the input buffer is some sort of control code */
+ action = unix_cli_match_action (a, &cf->input_vector[i],
+ vec_len (cf->input_vector) - i,
+ &matched);
+
+ switch (action)
+ {
+ case UNIX_CLI_PARSE_ACTION_PARTIALMATCH:
+ if (i)
+ {
+ /* There was a partial match which means we need more bytes
+ * than the input buffer currently has.
+ * Since the bytes before here have been processed, shift
+ * the remaining contents to the start of the input buffer.
+ */
+ vec_delete (cf->input_vector, i, 0);
+ }
+ return 1; /* wait for more */
+
+ case UNIX_CLI_PARSE_ACTION_TELNETIAC:
+ /* process telnet options */
+ matched = unix_cli_process_telnet (um, cf, uf,
+ cf->input_vector + i,
+ vec_len (cf->input_vector) - i);
+ if (matched < 0)
+ {
+ /* There was a partial match which means we need more bytes
+ * than the input buffer currently has.
+ */
+ if (i)
+ {
+ /*
+ * Since the bytes before here have been processed, shift
+ * the remaining contents to the start of the input buffer.
+ */
+ vec_delete (cf->input_vector, i, 0);
+ }
+ return 1; /* wait for more */
+ }
+ break;
+
+ default:
+ /* If telnet option processing switched us to line mode, get us
+ * out of here!
+ */
+ if (cf->line_mode)
+ {
+ vec_delete (cf->input_vector, i, 0);
+ cf->current_command = cf->input_vector;
+ return 0;
+ }
+
+ /* process the action */
+ if (!unix_cli_line_process_one (cm, um, cf, uf,
+ cf->input_vector[i], action))
+ {
+ /* CRLF found. Consume the bytes from the input_vector */
+ vec_delete (cf->input_vector, i + matched, 0);
+ /* And tell our caller to execute cf->input_command */
+ return 0;
+ }
+ }
+
+ i += matched;
+ }
+
+ vec_reset_length (cf->input_vector);
+ return 1;
+}
+
+/** @brief Process input to a CLI session. */
+static void
+unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *uf;
+ unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ unformat_input_t input;
+ int vlib_parse_eval (u8 *);
+
+ cm->current_input_file_index = cli_file_index;
+
+more:
+ /* Try vlibplex first. Someday... */
+ if (0 && vlib_parse_eval (cf->input_vector) == 0)
+ goto done;
+
+
+ if (cf->line_mode)
+ {
+ /* just treat whatever we got as a complete line of input */
+ cf->current_command = cf->input_vector;
+ }
+ else
+ {
+ /* Line edit, echo, etc. */
+ if (unix_cli_line_edit (cm, um, fm, cf))
+ /* want more input */
+ return;
+ }
+
+ if (um->log_fd)
+ {
+ static u8 *lv;
+ vec_reset_length (lv);
+ lv = format (lv, "%U[%d]: %v",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ cli_file_index, cf->current_command);
+ int rv __attribute__ ((unused)) = write (um->log_fd, lv, vec_len (lv));
+ }
+
+ /* Build an unformat structure around our command */
+ unformat_init_vector (&input, cf->current_command);
+
+ /* Remove leading white space from input. */
+ (void) unformat (&input, "");
+
+ cf->pager_start = 0; /* start a new pager session */
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ vlib_cli_input (um->vlib_main, &input, unix_vlib_cli_output,
+ cli_file_index);
+
+ /* Zero buffer since otherwise unformat_free will call vec_free on it. */
+ input.buffer = 0;
+
+ unformat_free (&input);
+
+ /* Re-fetch pointer since pool may have moved. */
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+
+done:
+ /* reset vector; we'll re-use it later */
+ if (cf->line_mode)
+ {
+ vec_reset_length (cf->input_vector);
+ cf->current_command = 0;
+ }
+ else
+ {
+ vec_reset_length (cf->current_command);
+ }
+
+ if (cf->no_pager == 2)
+ {
+ /* Pager was programmatically disabled */
+ unix_cli_pager_message (cf, uf, "pager buffer overflowed", "\n");
+ cf->no_pager = um->cli_no_pager;
+ }
+
+ if (vec_len (cf->pager_index) == 0
+ || vec_len (cf->pager_index) < cf->height)
+ {
+ /* There was no need for the pager */
+ unix_cli_pager_reset (cf);
+
+ /* Prompt. */
+ unix_cli_cli_prompt (cf, uf);
+ }
+ else
+ {
+ /* Display the pager prompt */
+ unix_cli_pager_prompt (cf, uf);
+ }
+
+ /* Any residual data in the input vector? */
+ if (vec_len (cf->input_vector))
+ goto more;
+
+ /* For non-interactive sessions send a NUL byte.
+ * Specifically this is because vppctl needs to see some traffic in
+ * order to move on to closing the session. Commands with no output
+ * would thus cause vppctl to hang indefinitely in non-interactive mode
+ * since there is also no prompt sent after the command completes.
+ */
+ if (!cf->is_interactive)
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\0", 1);
+}
+
+/** Destroy a CLI session.
+ * @note If we destroy the @c stdin session this additionally signals
+ * the shutdown of VPP.
+ */
+static void
+unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_file_t *cf;
+ clib_file_t *uf;
+ int i;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+
+ /* Quit/EOF on stdin means quit program. */
+ if (uf->file_descriptor == STDIN_FILENO)
+ clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
+
+ vec_free (cf->current_command);
+ vec_free (cf->search_key);
+
+ for (i = 0; i < vec_len (cf->command_history); i++)
+ vec_free (cf->command_history[i]);
+
+ vec_free (cf->command_history);
+
+ clib_file_del (fm, uf);
+
+ unix_cli_file_free (cf);
+ pool_put (cm->cli_file_pool, cf);
+}
+
+/** Handle system events. */
+static uword
+unix_cli_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ uword i, *data = 0;
+
+ while (1)
+ {
+ unix_cli_process_event_type_t event_type;
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &data);
+
+ switch (event_type)
+ {
+ case UNIX_CLI_PROCESS_EVENT_READ_READY:
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_process_input (cm, data[i]);
+ break;
+
+ case UNIX_CLI_PROCESS_EVENT_QUIT:
+ /* Kill this process. */
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_kill (cm, data[i]);
+ goto done;
+ }
+
+ if (data)
+ _vec_len (data) = 0;
+ }
+
+done:
+ vec_free (data);
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* Add node index so we can re-use this process later. */
+ vec_add1 (cm->unused_cli_process_node_indices, rt->node_index);
+
+ return 0;
+}
+
+/** Called when a CLI session file descriptor can be written to without
+ * blocking. */
+static clib_error_t *
+unix_cli_write_ready (clib_file_t * uf)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ /* Flush output vector. */
+ if (cf->is_socket)
+ /* If it's a socket we use MSG_NOSIGNAL to prevent SIGPIPE */
+ n = send (uf->file_descriptor,
+ cf->output_vector, vec_len (cf->output_vector), MSG_NOSIGNAL);
+ else
+ n = write (uf->file_descriptor,
+ cf->output_vector, vec_len (cf->output_vector));
+
+ if (n < 0 && errno != EAGAIN)
+ {
+ if (errno == EPIPE)
+ {
+ /* connection closed on us */
+ unix_main_t *um = &unix_main;
+ cf->has_epipe = 1;
+ vlib_process_signal_event (um->vlib_main, cf->process_node_index,
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ uf->private_data);
+ }
+ else
+ {
+ return clib_error_return_unix (0, "write");
+ }
+ }
+
+ else if (n > 0)
+ unix_cli_del_pending_output (uf, cf, n);
+
+ return /* no error */ 0;
+}
+
+/** Called when a CLI session file descriptor has data to be read. */
+static clib_error_t *
+unix_cli_read_ready (clib_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ uword l;
+ int n, n_read, n_try;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ n = n_try = 4096;
+ while (n == n_try)
+ {
+ l = vec_len (cf->input_vector);
+ vec_resize (cf->input_vector, l + n_try);
+
+ n = read (uf->file_descriptor, cf->input_vector + l, n_try);
+
+ /* Error? */
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "read");
+
+ n_read = n < 0 ? 0 : n;
+ _vec_len (cf->input_vector) = l + n_read;
+ }
+
+ if (!(n < 0))
+ vlib_process_signal_event (um->vlib_main,
+ cf->process_node_index,
+ (n_read == 0
+ ? UNIX_CLI_PROCESS_EVENT_QUIT
+ : UNIX_CLI_PROCESS_EVENT_READ_READY),
+ /* event data */ uf->private_data);
+
+ return /* no error */ 0;
+}
+
+/** Called when a CLI session file descriptor has an error condition. */
+static clib_error_t *
+unix_cli_error_detected (clib_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+ cf->has_epipe = 1; /* prevent writes while the close is pending */
+ vlib_process_signal_event (um->vlib_main,
+ cf->process_node_index,
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ /* event data */ uf->private_data);
+
+ return /* no error */ 0;
+}
+
+/** Store a new CLI session.
+ * @param name The name of the session.
+ * @param fd The file descriptor for the session I/O.
+ * @return The session ID.
+ */
+static u32
+unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_file_t *cf;
+ clib_file_t template = { 0 };
+ vlib_main_t *vm = um->vlib_main;
+ vlib_node_t *n;
+
+ name = (char *) format (0, "unix-cli-%s", name);
+
+ if (vec_len (cm->unused_cli_process_node_indices) > 0)
+ {
+ uword l = vec_len (cm->unused_cli_process_node_indices);
+
+ /* Find node and give it new name. */
+ n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]);
+ vec_free (n->name);
+ n->name = (u8 *) name;
+
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+
+ _vec_len (cm->unused_cli_process_node_indices) = l - 1;
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = unix_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ };
+
+ r.name = name;
+ vlib_register_node (vm, &r);
+ vec_free (name);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ pool_get (cm->cli_file_pool, cf);
+ memset (cf, 0, sizeof (*cf));
+
+ template.read_function = unix_cli_read_ready;
+ template.write_function = unix_cli_write_ready;
+ template.error_function = unix_cli_error_detected;
+ template.file_descriptor = fd;
+ template.private_data = cf - cm->cli_file_pool;
+
+ cf->process_node_index = n->index;
+ cf->clib_file_index = clib_file_add (fm, &template);
+ cf->output_vector = 0;
+ cf->input_vector = 0;
+
+ vlib_start_process (vm, n->runtime_index);
+
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+ p->output_function = unix_vlib_cli_output;
+ p->output_function_arg = cf - cm->cli_file_pool;
+
+ return cf - cm->cli_file_pool;
+}
+
+/** Telnet listening socket has a new connection. */
+static clib_error_t *
+unix_cli_listen_read_ready (clib_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ clib_socket_t *s = &um->cli_listen_socket;
+ clib_socket_t client;
+ char *client_name;
+ clib_error_t *error;
+ unix_cli_file_t *cf;
+ u32 cf_index;
+
+ error = clib_socket_accept (s, &client);
+ if (error)
+ return error;
+
+ client_name = (char *) format (0, "%U%c", format_sockaddr, &client.peer, 0);
+
+ cf_index = unix_cli_file_add (cm, client_name, client.fd);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+ cf->is_socket = 1;
+
+ /* No longer need CLIB version of socket. */
+ clib_socket_free (&client);
+ vec_free (client_name);
+
+ /* if we're supposed to run telnet session in character mode (default) */
+ if (um->cli_line_mode == 0)
+ {
+ /*
+ * Set telnet client character mode, echo on, suppress "go-ahead".
+ * Technically these should be negotiated, but this works.
+ */
+ u8 charmode_option[] = {
+ IAC, WONT, TELOPT_LINEMODE, /* server will do char-by-char */
+ IAC, DONT, TELOPT_LINEMODE, /* client should do char-by-char */
+ IAC, WILL, TELOPT_SGA, /* server willl supress GA */
+ IAC, DO, TELOPT_SGA, /* client should supress Go Ahead */
+ IAC, WILL, TELOPT_ECHO, /* server will do echo */
+ IAC, DONT, TELOPT_ECHO, /* client should not echo */
+ IAC, DO, TELOPT_TTYPE, /* client should tell us its term type */
+ IAC, SB, TELOPT_TTYPE, 1, IAC, SE, /* now tell me ttype */
+ IAC, DO, TELOPT_NAWS, /* client should tell us its window sz */
+ IAC, SB, TELOPT_NAWS, 1, IAC, SE, /* now tell me window size */
+ };
+
+ /* Enable history on this CLI */
+ cf->history_limit = um->cli_history_limit;
+ cf->has_history = cf->history_limit != 0;
+
+ /* This is an interactive session until we decide otherwise */
+ cf->is_interactive = 1;
+
+ /* Make sure this session is in line mode */
+ cf->line_mode = 0;
+
+ /* We need CRLF */
+ cf->crlf_mode = 1;
+
+ /* Setup the pager */
+ cf->no_pager = um->cli_no_pager;
+
+ /* Send the telnet options */
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ unix_vlib_cli_output_raw (cf, uf, charmode_option,
+ ARRAY_LEN (charmode_option));
+
+ /* In case the client doesn't negotiate terminal type, use
+ * a timer to kick off the initial prompt. */
+ timer_call (unix_cli_file_welcome_timer, cf_index, 1);
+ }
+
+ return error;
+}
+
+/** The system terminal has informed us that the window size
+ * has changed.
+ */
+static void
+unix_cli_resize_interrupt (int signum)
+{
+ clib_file_main_t *fm = &file_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool,
+ cm->stdin_cli_file_index);
+ clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ struct winsize ws;
+ (void) signum;
+
+ /* Terminal resized, fetch the new size */
+ if (ioctl (STDIN_FILENO, TIOCGWINSZ, &ws) < 0)
+ {
+ /* "Should never happen..." */
+ clib_unix_warning ("TIOCGWINSZ");
+ /* We can't trust ws.XXX... */
+ return;
+ }
+
+ cf->width = ws.ws_col;
+ if (cf->width > UNIX_CLI_MAX_TERMINAL_WIDTH)
+ cf->width = UNIX_CLI_MAX_TERMINAL_WIDTH;
+ if (cf->width < UNIX_CLI_MIN_TERMINAL_WIDTH)
+ cf->width = UNIX_CLI_MIN_TERMINAL_WIDTH;
+
+ cf->height = ws.ws_row;
+ if (cf->height > UNIX_CLI_MAX_TERMINAL_HEIGHT)
+ cf->height = UNIX_CLI_MAX_TERMINAL_HEIGHT;
+ if (cf->height < UNIX_CLI_MIN_TERMINAL_HEIGHT)
+ cf->height = UNIX_CLI_MIN_TERMINAL_HEIGHT;
+
+ /* Reindex the pager buffer */
+ unix_cli_pager_reindex (cf);
+
+ /* Redraw the page */
+ unix_cli_pager_redraw (cf, uf);
+}
+
+/** Handle configuration directives in the @em unix section. */
+static clib_error_t *
+unix_cli_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ int flags;
+ clib_error_t *error = 0;
+ unix_cli_file_t *cf;
+ u32 cf_index;
+ struct termios tio;
+ struct sigaction sa;
+ struct winsize ws;
+ u8 *term;
+
+ /* We depend on unix flags being set. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ /* Set stdin to be non-blocking. */
+ if ((flags = fcntl (STDIN_FILENO, F_GETFL, 0)) < 0)
+ flags = 0;
+ (void) fcntl (STDIN_FILENO, F_SETFL, flags | O_NONBLOCK);
+
+ cf_index = unix_cli_file_add (cm, "stdin", STDIN_FILENO);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+ cm->stdin_cli_file_index = cf_index;
+
+ /* If stdin is a tty and we are using chacracter mode, enable
+ * history on the CLI and set the tty line discipline accordingly. */
+ if (isatty (STDIN_FILENO) && um->cli_line_mode == 0)
+ {
+ /* Capture terminal resize events */
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_handler = unix_cli_resize_interrupt;
+ if (sigaction (SIGWINCH, &sa, 0) < 0)
+ clib_panic ("sigaction");
+
+ /* Retrieve the current terminal size */
+ ioctl (STDIN_FILENO, TIOCGWINSZ, &ws);
+ cf->width = ws.ws_col;
+ cf->height = ws.ws_row;
+
+ if (cf->width == 0 || cf->height == 0)
+ {
+ /*
+ * We have a tty, but no size. Use defaults.
+ * vpp "unix interactive" inside emacs + gdb ends up here.
+ */
+ cf->width = 80;
+ cf->height = 24;
+ }
+
+ /* Setup the history */
+ cf->history_limit = um->cli_history_limit;
+ cf->has_history = cf->history_limit != 0;
+
+ /* Setup the pager */
+ cf->no_pager = um->cli_no_pager;
+
+ /* This is an interactive session until we decide otherwise */
+ cf->is_interactive = 1;
+
+ /* We're going to be in char by char mode */
+ cf->line_mode = 0;
+
+ /* Save the original tty state so we can restore it later */
+ tcgetattr (STDIN_FILENO, &um->tio_stdin);
+ um->tio_isset = 1;
+
+ /* Tweak the tty settings */
+ tio = um->tio_stdin;
+ /* echo off, canonical mode off, ext'd input processing off */
+ tio.c_lflag &= ~(ECHO | ICANON | IEXTEN);
+ tio.c_cc[VMIN] = 1; /* 1 byte at a time */
+ tio.c_cc[VTIME] = 0; /* no timer */
+ tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio);
+
+ /* See if we can do ANSI/VT100 output */
+ term = (u8 *) getenv ("TERM");
+ if (term != NULL)
+ {
+ int len = strlen ((char *) term);
+ cf->ansi_capable = unix_cli_terminal_type_ansi (term, len);
+ if (unix_cli_terminal_type_noninteractive (term, len))
+ unix_cli_set_session_noninteractive (cf);
+ }
+ }
+ else
+ {
+ /* No tty, so make sure the session doesn't have tty-like features */
+ unix_cli_set_session_noninteractive (cf);
+ }
+
+ /* Send banner and initial prompt */
+ unix_cli_file_welcome (cm, cf);
+ }
+
+ /* If we have socket config, LISTEN, otherwise, don't */
+ clib_socket_t *s = &um->cli_listen_socket;
+ if (s->config && s->config[0] != 0)
+ {
+ /* CLI listen. */
+ clib_file_t template = { 0 };
+
+ /* mkdir of file socketu, only under /run */
+ if (strncmp (s->config, "/run", 4) == 0)
+ {
+ u8 *tmp = format (0, "%s", s->config);
+ int i = vec_len (tmp);
+ while (i && tmp[--i] != '/')
+ ;
+
+ tmp[i] = 0;
+
+ if (i)
+ vlib_unix_recursive_mkdir ((char *) tmp);
+ vec_free (tmp);
+ }
+
+ s->flags = CLIB_SOCKET_F_IS_SERVER | /* listen, don't connect */
+ CLIB_SOCKET_F_ALLOW_GROUP_WRITE; /* PF_LOCAL socket only */
+ error = clib_socket_init (s);
+
+ if (error)
+ return error;
+
+ template.read_function = unix_cli_listen_read_ready;
+ template.file_descriptor = s->fd;
+
+ clib_file_add (fm, &template);
+ }
+
+ /* Set CLI prompt. */
+ if (!cm->cli_prompt)
+ cm->cli_prompt = format (0, "VLIB: ");
+
+ return 0;
+}
+
+/*?
+ * This module has no configurable parameters.
+?*/
+VLIB_CONFIG_FUNCTION (unix_cli_config, "unix-cli");
+
+/** Called when VPP is shutting down, this restores the system
+ * terminal state if previously saved.
+ */
+static clib_error_t *
+unix_cli_exit (vlib_main_t * vm)
+{
+ unix_main_t *um = &unix_main;
+
+ /* If stdin is a tty and we saved the tty state, reset the tty state */
+ if (isatty (STDIN_FILENO) && um->tio_isset)
+ tcsetattr (STDIN_FILENO, TCSAFLUSH, &um->tio_stdin);
+
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_cli_exit);
+
+/** Set the CLI prompt.
+ * @param prompt The C string to set the prompt to.
+ * @note This setting is global; it impacts all current
+ * and future CLI sessions.
+ */
+void
+vlib_unix_cli_set_prompt (char *prompt)
+{
+ char *fmt = (prompt[strlen (prompt) - 1] == ' ') ? "%s" : "%s ";
+ unix_cli_main_t *cm = &unix_cli_main;
+ if (cm->cli_prompt)
+ vec_free (cm->cli_prompt);
+ cm->cli_prompt = format (0, fmt, prompt);
+}
+
+/** CLI command to quit the terminal session.
+ * @note If this is a stdin session then this will
+ * shutdown VPP also.
+ */
+static clib_error_t *
+unix_cli_quit (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool,
+ cm->current_input_file_index);
+
+ /* Cosmetic: suppress the final prompt from appearing before we die */
+ cf->is_interactive = 0;
+ cf->started = 1;
+
+ vlib_process_signal_event (vm,
+ vlib_current_process (vm),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cm->current_input_file_index);
+ return 0;
+}
+
+/*?
+ * Terminates the current CLI session.
+ *
+ * If VPP is running in @em interactive mode and this is the console session
+ * (that is, the session on @c stdin) then this will also terminate VPP.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (unix_cli_quit_command, static) = {
+ .path = "quit",
+ .short_help = "Exit CLI",
+ .function = unix_cli_quit,
+};
+/* *INDENT-ON* */
+
+/** CLI command to execute a VPP command script. */
+static clib_error_t *
+unix_cli_exec (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ char *file_name;
+ int fd;
+ unformat_input_t sub_input;
+ clib_error_t *error;
+
+ file_name = 0;
+ fd = -1;
+ error = 0;
+
+ if (!unformat (input, "%s", &file_name))
+ {
+ error = clib_error_return (0, "expecting file name, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "failed to open `%s'", file_name);
+ goto done;
+ }
+
+ /* Make sure its a regular file. */
+ {
+ struct stat s;
+
+ if (fstat (fd, &s) < 0)
+ {
+ error = clib_error_return_unix (0, "failed to stat `%s'", file_name);
+ goto done;
+ }
+
+ if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ error = clib_error_return (0, "not a regular file `%s'", file_name);
+ goto done;
+ }
+ }
+
+ unformat_init_unix_file (&sub_input, fd);
+
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ unformat_free (&sub_input);
+
+done:
+ if (fd > 0)
+ close (fd);
+ vec_free (file_name);
+
+ return error;
+}
+
+/*?
+ * Executes a sequence of CLI commands which are read from a file.
+ *
+ * If a command is unrecognised or otherwise invalid then the usual CLI
+ * feedback will be generated, however execution of subsequent commands
+ * from the file will continue.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_exec, static) = {
+ .path = "exec",
+ .short_help = "Execute commands from file",
+ .function = unix_cli_exec,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show various unix error statistics. */
+static clib_error_t *
+unix_show_errors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ clib_error_t *error = 0;
+ int i, n_errors_to_show;
+ unix_error_history_t *unix_errors = 0;
+
+ n_errors_to_show = 1 << 30;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!unformat (input, "%d", &n_errors_to_show))
+ {
+ error =
+ clib_error_return (0,
+ "expecting integer number of errors to show, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ n_errors_to_show =
+ clib_min (ARRAY_LEN (um->error_history), n_errors_to_show);
+
+ i =
+ um->error_history_index >
+ 0 ? um->error_history_index - 1 : ARRAY_LEN (um->error_history) - 1;
+
+ while (n_errors_to_show > 0)
+ {
+ unix_error_history_t *eh = um->error_history + i;
+
+ if (!eh->error)
+ break;
+
+ vec_add1 (unix_errors, eh[0]);
+ n_errors_to_show -= 1;
+ if (i == 0)
+ i = ARRAY_LEN (um->error_history) - 1;
+ else
+ i--;
+ }
+
+ if (vec_len (unix_errors) == 0)
+ vlib_cli_output (vm, "no Unix errors so far");
+ else
+ {
+ vlib_cli_output (vm, "%Ld total errors seen", um->n_total_errors);
+ for (i = vec_len (unix_errors) - 1; i >= 0; i--)
+ {
+ unix_error_history_t *eh = vec_elt_at_index (unix_errors, i);
+ vlib_cli_output (vm, "%U: %U",
+ format_time_interval, "h:m:s:u", eh->time,
+ format_clib_error, eh->error);
+ }
+ vlib_cli_output (vm, "%U: time now",
+ format_time_interval, "h:m:s:u", vlib_time_now (vm));
+ }
+
+done:
+ vec_free (unix_errors);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
+ .path = "show unix-errors",
+ .short_help = "Show Unix system call error history",
+ .function = unix_show_errors,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show session command history. */
+static clib_error_t *
+unix_cli_show_history (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ int i, j;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (!cf->is_interactive)
+ return clib_error_return (0, "invalid for non-interactive sessions");
+
+ if (cf->has_history && cf->history_limit)
+ {
+ i = 1 + cf->command_number - vec_len (cf->command_history);
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ vlib_cli_output (vm, "%d %v\n", i + j, cf->command_history[j]);
+ }
+ else
+ {
+ vlib_cli_output (vm, "History not enabled.\n");
+ }
+
+ return 0;
+}
+
+/*?
+ * Displays the command history for the current session, if any.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_show_history, static) = {
+ .path = "history",
+ .short_help = "Show current session command history",
+ .function = unix_cli_show_history,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show terminal status. */
+static clib_error_t *
+unix_cli_show_terminal (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ vlib_node_t *n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+ n = vlib_get_node (vm, cf->process_node_index);
+
+ vlib_cli_output (vm, "Terminal name: %v\n", n->name);
+ vlib_cli_output (vm, "Terminal mode: %s\n", cf->line_mode ?
+ "line-by-line" : "char-by-char");
+ vlib_cli_output (vm, "Terminal width: %d\n", cf->width);
+ vlib_cli_output (vm, "Terminal height: %d\n", cf->height);
+ vlib_cli_output (vm, "ANSI capable: %s\n",
+ cf->ansi_capable ? "yes" : "no");
+ vlib_cli_output (vm, "Interactive: %s\n",
+ cf->is_interactive ? "yes" : "no");
+ vlib_cli_output (vm, "History enabled: %s%s\n",
+ cf->has_history ? "yes" : "no", !cf->has_history
+ || cf->history_limit ? "" :
+ " (disabled by history limit)");
+ if (cf->has_history)
+ vlib_cli_output (vm, "History limit: %d\n", cf->history_limit);
+ vlib_cli_output (vm, "Pager enabled: %s%s%s\n",
+ cf->no_pager ? "no" : "yes",
+ cf->no_pager
+ || cf->height ? "" : " (disabled by terminal height)",
+ cf->no_pager
+ || um->cli_pager_buffer_limit ? "" :
+ " (disabled by buffer limit)");
+ if (!cf->no_pager)
+ vlib_cli_output (vm, "Pager limit: %d\n", um->cli_pager_buffer_limit);
+ vlib_cli_output (vm, "CRLF mode: %s\n",
+ cf->crlf_mode ? "CR+LF" : "LF");
+
+ return 0;
+}
+
+/*?
+ * Displays various information about the state of the current terminal
+ * session.
+ *
+ * @cliexpar
+ * @cliexstart{show terminal}
+ * Terminal name: unix-cli-stdin
+ * Terminal mode: char-by-char
+ * Terminal width: 123
+ * Terminal height: 48
+ * ANSI capable: yes
+ * Interactive: yes
+ * History enabled: yes
+ * History limit: 50
+ * Pager enabled: yes
+ * Pager limit: 100000
+ * CRLF mode: LF
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_show_terminal, static) = {
+ .path = "show terminal",
+ .short_help = "Show current session terminal settings",
+ .function = unix_cli_show_terminal,
+};
+/* *INDENT-ON* */
+
+/** CLI command to display a list of CLI sessions. */
+static clib_error_t *
+unix_cli_show_cli_sessions (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ //unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ clib_file_main_t *fm = &file_main;
+ unix_cli_file_t *cf;
+ clib_file_t *uf;
+ vlib_node_t *n;
+
+ vlib_cli_output (vm, "%-5s %-5s %-20s %s", "PNI", "FD", "Name", "Flags");
+
+#define fl(x, y) ( (x) ? toupper((y)) : tolower((y)) )
+ /* *INDENT-OFF* */
+ pool_foreach (cf, cm->cli_file_pool, ({
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ n = vlib_get_node (vm, cf->process_node_index);
+ vlib_cli_output (vm,
+ "%-5d %-5d %-20v %c%c%c%c%c\n",
+ cf->process_node_index,
+ uf->file_descriptor,
+ n->name,
+ fl (cf->is_interactive, 'i'),
+ fl (cf->is_socket, 's'),
+ fl (cf->line_mode, 'l'),
+ fl (cf->has_epipe, 'p'),
+ fl (cf->ansi_capable, 'a'));
+ }));
+ /* *INDENT-ON* */
+#undef fl
+
+ return 0;
+}
+
+/*?
+ * Displays a summary of all the current CLI sessions.
+ *
+ * Typically used to diagnose connection issues with the CLI
+ * socket.
+ *
+ * @cliexpar
+ * @cliexstart{show cli-sessions}
+ * PNI FD Name Flags
+ * 343 0 unix-cli-stdin IslpA
+ * 344 7 unix-cli-local:20 ISlpA
+ * 346 8 unix-cli-local:21 iSLpa
+ * @cliexend
+
+ * In this example we have the debug console of the running process
+ * on stdin/out, we have an interactive socket session and we also
+ * have a non-interactive socket session.
+ *
+ * Fields:
+ *
+ * - @em PNI: Process node index.
+ * - @em FD: Unix file descriptor.
+ * - @em Name: Name of the session.
+ * - @em Flags: Various flags that describe the state of the session.
+ *
+ * @em Flags have the following meanings; lower-case typically negates
+ * upper-case:
+ *
+ * - @em I Interactive session.
+ * - @em S Connected by socket.
+ * - @em s Not a socket, likely stdin.
+ * - @em L Line-by-line mode.
+ * - @em l Char-by-char mode.
+ * - @em P EPIPE detected on connection; it will close soon.
+ * - @em A ANSI-capable terminal.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_show_cli_sessions, static) = {
+ .path = "show cli-sessions",
+ .short_help = "Show current CLI sessions",
+ .function = unix_cli_show_cli_sessions,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal pager settings. */
+static clib_error_t *
+unix_cli_set_terminal_pager (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (!cf->is_interactive)
+ return clib_error_return (0, "invalid for non-interactive sessions");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ cf->no_pager = 0;
+ else if (unformat (line_input, "off"))
+ cf->no_pager = 1;
+ else if (unformat (line_input, "limit %u", &um->cli_pager_buffer_limit))
+ vlib_cli_output (vm,
+ "Pager limit set to %u lines; note, this is global.\n",
+ um->cli_pager_buffer_limit);
+ else
+ {
+ error = clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Enables or disables the terminal pager for this session. Generally
+ * this defaults to enabled.
+ *
+ * Additionally allows the pager buffer size to be set; though note that
+ * this value is set globally and not per session.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_pager, static) = {
+ .path = "set terminal pager",
+ .short_help = "set terminal pager [on|off] [limit <lines>]",
+ .function = unix_cli_set_terminal_pager,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal history settings. */
+static clib_error_t *
+unix_cli_set_terminal_history (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 limit;
+ clib_error_t *error = 0;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (!cf->is_interactive)
+ return clib_error_return (0, "invalid for non-interactive sessions");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ cf->has_history = 1;
+ else if (unformat (line_input, "off"))
+ cf->has_history = 0;
+ else if (unformat (line_input, "limit %u", &cf->history_limit))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ /* If we reduced history size, or turned it off, purge the history */
+ limit = cf->has_history ? cf->history_limit : 0;
+
+ while (cf->command_history && vec_len (cf->command_history) >= limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Enables or disables the command history function of the current
+ * terminal. Generally this defaults to enabled.
+ *
+ * This command also allows the maximum size of the history buffer for
+ * this session to be altered.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_history, static) = {
+ .path = "set terminal history",
+ .short_help = "set terminal history [on|off] [limit <lines>]",
+ .function = unix_cli_set_terminal_history,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal ANSI settings. */
+static clib_error_t *
+unix_cli_set_terminal_ansi (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (!cf->is_interactive)
+ return clib_error_return (0, "invalid for non-interactive sessions");
+
+ if (unformat (input, "on"))
+ cf->ansi_capable = 1;
+ else if (unformat (input, "off"))
+ cf->ansi_capable = 0;
+ else
+ return clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, input);
+
+ return 0;
+}
+
+/*?
+ * Enables or disables the use of ANSI control sequences by this terminal.
+ * The default will vary based on terminal detection at the start of the
+ * session.
+ *
+ * ANSI control sequences are used in a small number of places to provide,
+ * for example, color text output and to control the cursor in the pager.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_ansi, static) = {
+ .path = "set terminal ansi",
+ .short_help = "set terminal ansi [on|off]",
+ .function = unix_cli_set_terminal_ansi,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+unix_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (unix_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/dir.dox b/src/vlib/unix/dir.dox
new file mode 100644
index 00000000..1380fa56
--- /dev/null
+++ b/src/vlib/unix/dir.dox
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief VLIB Unix interface
+
+VLIB application library Unix interface layer.
+
+*/
+/*? %%clicmd:group_label Unix Interface %% ?*/
+/*? %%syscfg:group_label Unix Interface %% ?*/
+
diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c
new file mode 100644
index 00000000..ecd31791
--- /dev/null
+++ b/src/vlib/unix/input.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * input.c: Unix file input
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <signal.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+
+/* FIXME autoconf */
+#define HAVE_LINUX_EPOLL
+
+#ifdef HAVE_LINUX_EPOLL
+
+#include <sys/epoll.h>
+
+typedef struct
+{
+ int epoll_fd;
+ struct epoll_event *epoll_events;
+
+ /* Statistics. */
+ u64 epoll_files_ready;
+ u64 epoll_waits;
+} linux_epoll_main_t;
+
+static linux_epoll_main_t linux_epoll_main;
+
+static void
+linux_epoll_file_update (clib_file_t * f, unix_file_update_type_t update_type)
+{
+ clib_file_main_t *fm = &file_main;
+ linux_epoll_main_t *em = &linux_epoll_main;
+ struct epoll_event e;
+ int op;
+
+ memset (&e, 0, sizeof (e));
+
+ e.events = EPOLLIN;
+ if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
+ e.events |= EPOLLOUT;
+ if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
+ e.events |= EPOLLET;
+ e.data.u32 = f - fm->file_pool;
+
+ op = -1;
+
+ switch (update_type)
+ {
+ case UNIX_FILE_UPDATE_ADD:
+ op = EPOLL_CTL_ADD;
+ break;
+
+ case UNIX_FILE_UPDATE_MODIFY:
+ op = EPOLL_CTL_MOD;
+ break;
+
+ case UNIX_FILE_UPDATE_DELETE:
+ op = EPOLL_CTL_DEL;
+ break;
+
+ default:
+ clib_warning ("unknown update_type %d", update_type);
+ return;
+ }
+
+ if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
+ clib_unix_warning ("epoll_ctl");
+}
+
+static uword
+linux_epoll_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ unix_main_t *um = &unix_main;
+ clib_file_main_t *fm = &file_main;
+ linux_epoll_main_t *em = &linux_epoll_main;
+ struct epoll_event *e;
+ int n_fds_ready;
+
+ {
+ vlib_node_main_t *nm = &vm->node_main;
+ u32 ticks_until_expiration;
+ f64 timeout;
+ int timeout_ms = 0, max_timeout_ms = 10;
+ f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
+
+ /* If we're not working very hard, decide how long to sleep */
+ if (vector_rate < 2 && vm->api_queue_nonempty == 0
+ && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
+ {
+ ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel);
+
+ /* Nothing on the fast wheel, sleep 10ms */
+ if (ticks_until_expiration == TW_SLOTS_PER_RING)
+ {
+ timeout = 10e-3;
+ timeout_ms = max_timeout_ms;
+ }
+ else
+ {
+ timeout = (f64) ticks_until_expiration *1e-5;
+ if (timeout < 1e-3)
+ timeout_ms = 0;
+ else
+ {
+ timeout_ms = timeout * 1e3;
+ /* Must be between 1 and 10 ms. */
+ timeout_ms = clib_max (1, timeout_ms);
+ timeout_ms = clib_min (max_timeout_ms, timeout_ms);
+ }
+ }
+ node->input_main_loops_per_call = 0;
+ }
+ else /* busy */
+ {
+ /* Don't come back for a respectable number of dispatch cycles */
+ node->input_main_loops_per_call = 1024;
+ }
+
+ /* Allow any signal to wakeup our sleep. */
+ {
+ static sigset_t unblock_all_signals;
+ n_fds_ready = epoll_pwait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms, &unblock_all_signals);
+
+ /* This kludge is necessary to run over absurdly old kernels */
+ if (n_fds_ready < 0 && errno == ENOSYS)
+ {
+ n_fds_ready = epoll_wait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events), timeout_ms);
+ }
+ }
+ }
+
+ if (n_fds_ready < 0)
+ {
+ if (unix_error_is_fatal (errno))
+ vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
+
+ /* non fatal error (e.g. EINTR). */
+ return 0;
+ }
+
+ em->epoll_waits += 1;
+ em->epoll_files_ready += n_fds_ready;
+
+ for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
+ {
+ u32 i = e->data.u32;
+ clib_file_t *f = pool_elt_at_index (fm->file_pool, i);
+ clib_error_t *errors[4];
+ int n_errors = 0;
+
+ if (PREDICT_TRUE (!(e->events & EPOLLERR)))
+ {
+ if (e->events & EPOLLIN)
+ {
+ errors[n_errors] = f->read_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ if (e->events & EPOLLOUT)
+ {
+ errors[n_errors] = f->write_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+ else
+ {
+ if (f->error_function)
+ {
+ errors[n_errors] = f->error_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ else
+ close (f->file_descriptor);
+ }
+
+ ASSERT (n_errors < ARRAY_LEN (errors));
+ for (i = 0; i < n_errors; i++)
+ {
+ unix_save_error (um, errors[i]);
+ }
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
+ .function = linux_epoll_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "unix-epoll-input",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+linux_epoll_input_init (vlib_main_t * vm)
+{
+ linux_epoll_main_t *em = &linux_epoll_main;
+ clib_file_main_t *fm = &file_main;
+
+ /* Allocate some events. */
+ vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+
+ em->epoll_fd = epoll_create (vec_len (em->epoll_events));
+ if (em->epoll_fd < 0)
+ return clib_error_return_unix (0, "epoll_create");
+
+ fm->file_update = linux_epoll_file_update;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (linux_epoll_input_init);
+
+#endif /* HAVE_LINUX_EPOLL */
+
+static clib_error_t *
+unix_input_init (vlib_main_t * vm)
+{
+ return vlib_call_init_function (vm, linux_epoll_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_input_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
new file mode 100644
index 00000000..f286c870
--- /dev/null
+++ b/src/vlib/unix/main.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: Unix main routine
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/plugin.h>
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+/** Default CLI pager limit is not configured in startup.conf */
+#define UNIX_CLI_DEFAULT_PAGER_LIMIT 100000
+
+/** Default CLI history depth if not configured in startup.conf */
+#define UNIX_CLI_DEFAULT_HISTORY 50
+
+char *vlib_default_runtime_dir __attribute__ ((weak));
+char *vlib_default_runtime_dir = "vlib";
+
+unix_main_t unix_main;
+clib_file_main_t file_main;
+
+static clib_error_t *
+unix_main_init (vlib_main_t * vm)
+{
+ unix_main_t *um = &unix_main;
+ um->vlib_main = vm;
+ return vlib_call_init_function (vm, unix_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_main_init);
+
+static void
+unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ uword fatal = 0;
+ u8 *msg = 0;
+
+ msg = format (msg, "received signal %U, PC %U",
+ format_signal, signum, format_ucontext_pc, uc);
+
+ if (signum == SIGSEGV)
+ msg = format (msg, ", faulting address %p", si->si_addr);
+
+ switch (signum)
+ {
+ /* these (caught) signals cause the application to exit */
+ case SIGTERM:
+ if (unix_main.vlib_main->main_loop_exit_set)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting...");
+ unix_main.vlib_main->main_loop_exit_now = 1;
+ }
+ break;
+ /* fall through */
+ case SIGQUIT:
+ case SIGINT:
+ case SIGILL:
+ case SIGBUS:
+ case SIGSEGV:
+ case SIGHUP:
+ case SIGFPE:
+ fatal = 1;
+ break;
+
+ /* by default, print a message and continue */
+ default:
+ fatal = 0;
+ break;
+ }
+
+ /* Null terminate. */
+ vec_add1 (msg, 0);
+
+ if (fatal)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+ os_exit (1);
+ }
+ else
+ clib_warning ("%s", msg);
+
+ vec_free (msg);
+}
+
+static clib_error_t *
+setup_signal_handlers (unix_main_t * um)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ return clib_error_return_unix (0, "sigaction %U", format_signal, i);
+ }
+
+ return 0;
+}
+
+static void
+unix_error_handler (void *arg, u8 * msg, int msg_len)
+{
+ unix_main_t *um = arg;
+
+ /* Echo to stderr when interactive. */
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ CLIB_UNUSED (int r) = write (2, msg, msg_len);
+ }
+ else
+ {
+ char save = msg[msg_len - 1];
+
+ /* Null Terminate. */
+ msg[msg_len - 1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len - 1] = save;
+ }
+}
+
+void
+vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+ unix_main_t *um = &unix_main;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE || error == 0)
+ return;
+
+ {
+ char save;
+ u8 *msg;
+ u32 msg_len;
+
+ msg = error->what;
+ msg_len = vec_len (msg);
+
+ /* Null Terminate. */
+ save = msg[msg_len - 1];
+ msg[msg_len - 1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len - 1] = save;
+ }
+}
+
+static uword
+startup_config_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ unix_main_t *um = &unix_main;
+ u8 *buf = 0;
+ uword l, n = 1;
+
+ vlib_process_suspend (vm, 2.0);
+
+ while (um->unix_config_complete == 0)
+ vlib_process_suspend (vm, 0.1);
+
+ if (um->startup_config_filename)
+ {
+ unformat_input_t sub_input;
+ int fd;
+ struct stat s;
+ char *fn = (char *) um->startup_config_filename;
+
+ fd = open (fn, O_RDONLY);
+ if (fd < 0)
+ {
+ clib_warning ("failed to open `%s'", fn);
+ return 0;
+ }
+
+ if (fstat (fd, &s) < 0)
+ {
+ clib_warning ("failed to stat `%s'", fn);
+ bail:
+ close (fd);
+ return 0;
+ }
+
+ if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ clib_warning ("not a regular file: `%s'", fn);
+ goto bail;
+ }
+
+ while (n > 0)
+ {
+ l = vec_len (buf);
+ vec_resize (buf, 4096);
+ n = read (fd, buf + l, 4096);
+ if (n > 0)
+ {
+ _vec_len (buf) = l + n;
+ if (n < 4096)
+ break;
+ }
+ else
+ break;
+ }
+ if (um->log_fd && vec_len (buf))
+ {
+ u8 *lv = 0;
+ lv = format (lv, "%U: ***** Startup Config *****\n%v",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ buf);
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_reset_length (lv);
+ lv = format (lv, "%U: ***** End Startup Config *****\n",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ );
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_free (lv);
+ }
+
+ if (vec_len (buf))
+ {
+ unformat_init_vector (&sub_input, buf);
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ /* frees buf for us */
+ unformat_free (&sub_input);
+ }
+ close (fd);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (startup_config_node,static) = {
+ .function = startup_config_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "startup-config-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+unix_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t *um = &unix_main;
+ clib_error_t *error = 0;
+ gid_t gid;
+ int pidfd = -1;
+
+ /* Defaults */
+ um->cli_pager_buffer_limit = UNIX_CLI_DEFAULT_PAGER_LIMIT;
+ um->cli_history_limit = UNIX_CLI_DEFAULT_HISTORY;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ char *cli_prompt;
+ if (unformat (input, "interactive"))
+ um->flags |= UNIX_FLAG_INTERACTIVE;
+ else if (unformat (input, "nodaemon"))
+ um->flags |= UNIX_FLAG_NODAEMON;
+ else if (unformat (input, "cli-prompt %s", &cli_prompt))
+ vlib_unix_cli_set_prompt (cli_prompt);
+ else
+ if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config))
+ ;
+ else if (unformat (input, "runtime-dir %s", &um->runtime_dir))
+ ;
+ else if (unformat (input, "cli-line-mode"))
+ um->cli_line_mode = 1;
+ else if (unformat (input, "cli-no-banner"))
+ um->cli_no_banner = 1;
+ else if (unformat (input, "cli-no-pager"))
+ um->cli_no_pager = 1;
+ else if (unformat (input, "cli-pager-buffer-limit %d",
+ &um->cli_pager_buffer_limit))
+ ;
+ else
+ if (unformat (input, "cli-history-limit %d", &um->cli_history_limit))
+ ;
+ else if (unformat (input, "coredump-size"))
+ {
+ uword coredump_size = 0;
+ if (unformat (input, "unlimited"))
+ {
+ coredump_size = RLIM_INFINITY;
+ }
+ else
+ if (!unformat (input, "%U", unformat_memory_size, &coredump_size))
+ {
+ return clib_error_return (0,
+ "invalid coredump-size parameter `%U'",
+ format_unformat_error, input);
+ }
+ const struct rlimit new_limit = { coredump_size, coredump_size };
+ if (0 != setrlimit (RLIMIT_CORE, &new_limit))
+ {
+ clib_unix_warning ("prlimit() failed");
+ }
+ }
+ else if (unformat (input, "full-coredump"))
+ {
+ int fd;
+
+ fd = open ("/proc/self/coredump_filter", O_WRONLY);
+ if (fd >= 0)
+ {
+ if (write (fd, "0x6f\n", 5) != 5)
+ clib_unix_warning ("coredump filter write failed!");
+ close (fd);
+ }
+ else
+ clib_unix_warning ("couldn't open /proc/self/coredump_filter");
+ }
+ else if (unformat (input, "startup-config %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "exec %s", &um->startup_config_filename))
+ ;
+ else if (unformat (input, "log %s", &um->log_filename))
+ {
+ um->log_fd = open ((char *) um->log_filename,
+ O_CREAT | O_WRONLY | O_APPEND, 0644);
+ if (um->log_fd < 0)
+ {
+ clib_warning ("couldn't open log '%s'\n", um->log_filename);
+ um->log_fd = 0;
+ }
+ else
+ {
+ u8 *lv = 0;
+ lv = format (0, "%U: ***** Start: PID %d *****\n",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ getpid ());
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_free (lv);
+ }
+ }
+ else if (unformat (input, "gid %U", unformat_unix_gid, &gid))
+ {
+ if (setegid (gid) == -1)
+ return clib_error_return_unix (0, "setegid");
+ }
+ else if (unformat (input, "pidfile %s", &um->pidfile))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (um->runtime_dir == 0)
+ {
+ uid_t uid = geteuid ();
+ if (uid == 00)
+ um->runtime_dir = format (0, "/run/%s%c",
+ vlib_default_runtime_dir, 0);
+ else
+ um->runtime_dir = format (0, "/run/user/%u/%s%c", uid,
+ vlib_default_runtime_dir, 0);
+ }
+
+ error = setup_signal_handlers (um);
+ if (error)
+ return error;
+
+ if (um->pidfile)
+ {
+ if ((error = vlib_unix_validate_runtime_file (um,
+ (char *) um->pidfile,
+ &um->pidfile)))
+ return error;
+
+ if (((pidfd = open ((char *) um->pidfile,
+ O_CREAT | O_WRONLY | O_TRUNC, 0644)) < 0))
+ {
+ return clib_error_return_unix (0, "open");
+ }
+ }
+
+ if (!(um->flags & UNIX_FLAG_INTERACTIVE))
+ {
+ openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON);
+ clib_error_register_handler (unix_error_handler, um);
+
+ if (!(um->flags & UNIX_FLAG_NODAEMON) && daemon ( /* chdir to / */ 0,
+ /* stdin/stdout/stderr -> /dev/null */
+ 0) < 0)
+ clib_error_return (0, "daemon () fails");
+ }
+
+ if (pidfd >= 0)
+ {
+ u8 *lv = format (0, "%d", getpid ());
+ if (write (pidfd, (char *) lv, vec_len (lv)) != vec_len (lv))
+ {
+ vec_free (lv);
+ close (pidfd);
+ return clib_error_return_unix (0, "write");
+ }
+ vec_free (lv);
+ close (pidfd);
+ }
+
+ um->unix_config_complete = 1;
+
+ return 0;
+}
+
+/* unix { ... } configuration. */
+/*?
+ *
+ * @cfgcmd{interactive}
+ * Attach CLI to stdin/out and provide a debugging command line interface.
+ * Implies @c nodaemon.
+ *
+ * @cfgcmd{nodaemon}
+ * Do not fork or background the VPP process. Typically used when invoking
+ * VPP applications from a process monitor.
+ *
+ * @cfgcmd{exec, &lt;filename&gt;}
+ * @par <code>startup-config &lt;filename&gt;</code>
+ * Read startup operational configuration from @c filename.
+ * The contents of the file will be performed as though entered at the CLI.
+ * The two keywords are aliases for the same function; if both are specified,
+ * only the last will have an effect.
+ *
+ * @cfgcmd{log, &lt;filename&gt;}
+ * Logs the startup configuration and all subsequent CLI commands in
+ * @c filename.
+ * Very useful in situations where folks don't remember or can't be bothered
+ * to include CLI commands in bug reports.
+ *
+ * @cfgcmd{pidfile, &lt;filename&gt;}
+ * Writes the pid of the main thread in @c filename.
+ *
+ * @cfgcmd{full-coredump}
+ * Ask the Linux kernel to dump all memory-mapped address regions, instead
+ * of just text+data+bss.
+ *
+ * @cfgcmd{runtime-dir}
+ * Define directory where VPP is going to store all runtime files.
+ * Default is /run/vpp.
+ *
+ * @cfgcmd{cli-listen, &lt;address:port&gt;}
+ * Bind the CLI to listen at the address and port given. @clocalhost
+ * on TCP port @c 5002, given as <tt>cli-listen localhost:5002</tt>,
+ * is typical.
+ *
+ * @cfgcmd{cli-line-mode}
+ * Disable character-by-character I/O on stdin. Useful when combined with,
+ * for example, <tt>emacs M-x gud-gdb</tt>.
+ *
+ * @cfgcmd{cli-prompt, &lt;string&gt;}
+ * Configure the CLI prompt to be @c string.
+ *
+ * @cfgcmd{cli-history-limit, &lt;nn&gt;}
+ * Limit commmand history to @c nn lines. A value of @c 0
+ * disables command history. Default value: @c 50
+ *
+ * @cfgcmd{cli-no-banner}
+ * Disable the login banner on stdin and Telnet connections.
+ *
+ * @cfgcmd{cli-no-pager}
+ * Disable the output pager.
+ *
+ * @cfgcmd{cli-pager-buffer-limit, &lt;nn&gt;}
+ * Limit pager buffer to @c nn lines of output.
+ * A value of @c 0 disables the pager. Default value: @c 100000
+?*/
+VLIB_EARLY_CONFIG_FUNCTION (unix_config, "unix");
+
+static clib_error_t *
+unix_exit (vlib_main_t * vm)
+{
+ /* Close syslog connection. */
+ closelog ();
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_exit);
+
+u8 **vlib_thread_stacks;
+
+static uword
+thread0 (uword arg)
+{
+ vlib_main_t *vm = (vlib_main_t *) arg;
+ unformat_input_t input;
+ int i;
+
+ unformat_init_command_line (&input, (char **) vm->argv);
+ i = vlib_main (vm, &input);
+ unformat_free (&input);
+
+ return i;
+}
+
+u8 *
+vlib_thread_stack_init (uword thread_index)
+{
+ vec_validate (vlib_thread_stacks, thread_index);
+ vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned
+ (VLIB_THREAD_STACK_SIZE, VLIB_THREAD_STACK_SIZE);
+
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (vlib_thread_stacks[thread_index],
+ clib_mem_get_page_size (), PROT_READ) < 0)
+ clib_unix_warning ("thread stack");
+ return vlib_thread_stacks[thread_index];
+}
+
+int
+vlib_unix_main (int argc, char *argv[])
+{
+ vlib_main_t *vm = &vlib_global_main; /* one and only time for this! */
+ unformat_input_t input;
+ clib_error_t *e;
+ int i;
+
+ vm->argv = (u8 **) argv;
+ vm->name = argv[0];
+ vm->heap_base = clib_mem_get_heap ();
+ ASSERT (vm->heap_base);
+
+ unformat_init_command_line (&input, (char **) vm->argv);
+ if ((e = vlib_plugin_config (vm, &input)))
+ {
+ clib_error_report (e);
+ return 1;
+ }
+ unformat_free (&input);
+
+ i = vlib_plugin_early_init (vm);
+ if (i)
+ return i;
+
+ unformat_init_command_line (&input, (char **) vm->argv);
+ if (vm->init_functions_called == 0)
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ e = vlib_call_all_config_functions (vm, &input, 1 /* early */ );
+ if (e != 0)
+ {
+ clib_error_report (e);
+ return 1;
+ }
+ unformat_free (&input);
+
+ vlib_thread_stack_init (0);
+
+ __os_thread_index = 0;
+
+ i = clib_calljmp (thread0, (uword) vm,
+ (void *) (vlib_thread_stacks[0] +
+ VLIB_THREAD_STACK_SIZE));
+ return i;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c
new file mode 100644
index 00000000..3f1cd99d
--- /dev/null
+++ b/src/vlib/unix/mc_socket.c
@@ -0,0 +1,1050 @@
+/*
+ * mc_socket.c: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/mc_socket.h>
+
+#include <sys/ioctl.h> /* for FIONBIO */
+#include <netinet/tcp.h> /* for TCP_NODELAY */
+#include <net/if.h> /* for struct ifreq */
+
+static u8 *
+format_socket_peer_id (u8 * s, va_list * args)
+{
+ u64 peer_id_as_u64 = va_arg (*args, u64);
+ mc_peer_id_t peer_id;
+ peer_id.as_u64 = peer_id_as_u64;
+ u32 a = mc_socket_peer_id_get_address (peer_id);
+ u32 p = mc_socket_peer_id_get_port (peer_id);
+
+ s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));
+
+ return s;
+}
+
+typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
+ u32 buffer_index);
+
+always_inline void
+msg_handler (mc_main_t * mcm,
+ u32 buffer_index, u32 handler_frees_buffer, void *_h)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_handler_t *h = _h;
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ void *the_msg = vlib_buffer_get_current (b);
+
+ h (mcm, the_msg, buffer_index);
+ if (!handler_frees_buffer)
+ vlib_buffer_free_one (vm, buffer_index);
+}
+
+static uword
+append_buffer_index_to_iovec (vlib_main_t * vm,
+ u32 buffer_index, struct iovec **iovs_return)
+{
+ struct iovec *i;
+ vlib_buffer_t *b;
+ u32 bi = buffer_index;
+ u32 l = 0;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, bi);
+ vec_add2 (*iovs_return, i, 1);
+ i->iov_base = vlib_buffer_get_current (b);
+ i->iov_len = b->current_length;
+ l += i->iov_len;
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ bi = b->next_buffer;
+ }
+
+ return l;
+}
+
+static clib_error_t *
+sendmsg_helper (mc_socket_main_t * msm,
+ int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
+{
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ struct msghdr h;
+ word n_bytes, n_bytes_tx, n_retries;
+
+ memset (&h, 0, sizeof (h));
+ h.msg_name = tx_addr;
+ h.msg_namelen = sizeof (tx_addr[0]);
+
+ if (msm->iovecs)
+ _vec_len (msm->iovecs) = 0;
+
+ n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
+ ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
+ if (n_bytes > msm->mc_main.transport.max_packet_size)
+ clib_error ("sending packet larger than interace MTU %d bytes", n_bytes);
+
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_retries = 0;
+ while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
+ && errno == EAGAIN)
+ n_retries++;
+ if (n_bytes_tx != n_bytes)
+ {
+ clib_unix_warning ("sendmsg");
+ return 0;
+ }
+ if (n_retries)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "sendmsg-helper: %d retries",.format_args = "i4",};
+ struct
+ {
+ u32 retries;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->retries = n_retries;
+ }
+ return 0;
+}
+
+static clib_error_t *
+tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t *ms = &msm->multicast_sockets[type];
+ clib_error_t *error;
+ error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
+ if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
+{
+ struct sockaddr_in tx_addr;
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ clib_error_t *error;
+
+ memset (&tx_addr, 0, sizeof (tx_addr));
+ tx_addr.sin_family = AF_INET;
+ tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
+ tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
+
+ error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+recvmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in *rx_addr,
+ u32 * buffer_index, u32 drop_message)
+{
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ vlib_buffer_t *b;
+ uword n_left, n_alloc, n_mtu, i, i_rx;
+ const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ word n_bytes_left;
+
+ /* Make sure we have at least a MTU worth of buffers. */
+ n_mtu = msm->rx_mtu_n_buffers;
+ n_left = vec_len (msm->rx_buffers);
+ if (n_left < n_mtu)
+ {
+ uword max_alloc = 8 * n_mtu;
+ vec_validate (msm->rx_buffers, max_alloc - 1);
+ n_alloc =
+ vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
+ _vec_len (msm->rx_buffers) = n_left + n_alloc;
+ }
+
+ ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
+ vec_validate (msm->iovecs, n_mtu - 1);
+
+ /* Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ i_rx = vec_len (msm->rx_buffers) - 1;
+ for (i = 0; i < n_mtu; i++)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
+ msm->iovecs[i].iov_base = b->data;
+ msm->iovecs[i].iov_len = buffer_size;
+ }
+ _vec_len (msm->iovecs) = n_mtu;
+
+ {
+ struct msghdr h;
+
+ memset (&h, 0, sizeof (h));
+ if (rx_addr)
+ {
+ h.msg_name = rx_addr;
+ h.msg_namelen = sizeof (rx_addr[0]);
+ }
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_bytes_left = recvmsg (socket, &h, 0);
+ if (n_bytes_left < 0)
+ return clib_error_return_unix (0, "recvmsg");
+ }
+
+ if (drop_message)
+ {
+ *buffer_index = ~0;
+ return 0;
+ }
+
+ *buffer_index = msm->rx_buffers[i_rx];
+ while (1)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
+
+ b->flags = 0;
+ b->current_data = 0;
+ b->current_length =
+ n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+
+ n_bytes_left -= buffer_size;
+
+ if (n_bytes_left <= 0)
+ break;
+
+ i_rx--;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = msm->rx_buffers[i_rx];
+ }
+
+ _vec_len (msm->rx_buffers) = i_rx;
+
+ return 0 /* no error */ ;
+}
+
+static clib_error_t *
+mastership_socket_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_multicast_socket_t *ms =
+ &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ msg_handler (mcm, bi,
+ /* handler_frees_buffer */ 0,
+ mc_msg_master_assert_handler);
+
+ return error;
+}
+
+static clib_error_t *
+to_relay_socket_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t *ms_to_relay =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
+ mc_multicast_socket_t *ms_from_relay =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t *error;
+ u32 bi;
+ u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+
+ /* Not the ordering master? Turf the msg */
+ error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ !is_master);
+
+ /* If we are the master, number and rebroadcast the msg. */
+ if (!error && is_master)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
+ mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
+ mcm->relay_global_sequence++;
+ error =
+ sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
+ bi);
+ vlib_buffer_free_one (vm, bi);
+ }
+
+ return error;
+}
+
+static clib_error_t *
+from_relay_socket_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_multicast_socket_t *ms =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ {
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
+ mc_msg_user_request_handler);
+ }
+ return error;
+}
+
+static clib_error_t *
+join_socket_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);
+
+ switch (clib_host_to_net_u32 (mp->type))
+ {
+ case MC_MSG_TYPE_join_or_leave_request:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_or_leave_request_handler);
+ break;
+
+ case MC_MSG_TYPE_join_reply:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_reply_handler);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+ack_socket_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ 0);
+ if (!error)
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_user_ack_handler);
+ return error;
+}
+
+static void
+catchup_cleanup (mc_socket_main_t * msm,
+ mc_socket_catchup_t * c, clib_file_main_t * um,
+ clib_file_t * uf)
+{
+ hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
+ clib_file_del (um, uf);
+ vec_free (c->input_vector);
+ vec_free (c->output_vector);
+ pool_put (msm->catchups, c);
+}
+
+static mc_socket_catchup_t *
+find_catchup_from_file_descriptor (mc_socket_main_t * msm,
+ int file_descriptor)
+{
+ uword *p =
+ hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
+ return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
+}
+
+static clib_error_t *
+catchup_socket_read_ready (clib_file_t * uf, int is_server)
+{
+ clib_file_main_t *um = &file_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ word l, n, is_eof;
+
+ l = vec_len (c->input_vector);
+ vec_resize (c->input_vector, 4096);
+ n =
+ read (uf->file_descriptor, c->input_vector + l,
+ vec_len (c->input_vector) - l);
+ is_eof = n == 0;
+
+ if (n < 0)
+ {
+ if (errno == EAGAIN)
+ n = 0;
+ else
+ {
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return_unix (0, "read");
+ }
+ }
+
+ _vec_len (c->input_vector) = l + n;
+
+ if (is_eof && vec_len (c->input_vector) > 0)
+ {
+ if (is_server)
+ {
+ mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
+ c - msm->catchups);
+ _vec_len (c->input_vector) = 0;
+ }
+ else
+ {
+ mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
+ c - msm->catchups);
+ c->input_vector = 0; /* reply handler is responsible for freeing vector */
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+
+ return 0 /* no error */ ;
+}
+
+static clib_error_t *
+catchup_server_read_ready (clib_file_t * uf)
+{
+ return catchup_socket_read_ready (uf, /* is_server */ 1);
+}
+
+static clib_error_t *
+catchup_client_read_ready (clib_file_t * uf)
+{
+ if (MC_EVENT_LOGGING)
+ {
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+
+ ELOG_TYPE (e, "catchup_client_read_ready");
+ ELOG (&vm->elog_main, e, 0);
+ }
+ return catchup_socket_read_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_write_ready (clib_file_t * uf, int is_server)
+{
+ clib_file_main_t *um = &file_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ clib_error_t *error = 0;
+ int n;
+
+ if (c->connect_in_progress)
+ {
+ u32 len, value;
+
+ c->connect_in_progress = 0;
+ len = sizeof (value);
+ if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
+ {
+ error = clib_error_return_unix (0, "getsockopt SO_ERROR");
+ goto error_quit;
+ }
+ if (value != 0)
+ {
+ error =
+ clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID,
+ "connect fails");
+ goto error_quit;
+ }
+ }
+
+ while (1)
+ {
+ u32 n_this_write;
+
+ n_this_write =
+ clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
+ msm->rx_mtu_n_bytes -
+ 64 /* ip + tcp + option allowance */ );
+
+ if (n_this_write <= 0)
+ break;
+
+ do
+ {
+ n = write (uf->file_descriptor,
+ c->output_vector + c->output_vector_n_written,
+ n_this_write);
+ }
+ while (n < 0 && errno == EAGAIN);
+
+ if (n < 0)
+ {
+ error = clib_error_return_unix (0, "write");
+ goto error_quit;
+ }
+ c->output_vector_n_written += n;
+ }
+
+ if (c->output_vector_n_written >= vec_len (c->output_vector))
+ {
+ if (!is_server)
+ {
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ /* Send EOF to other side. */
+ shutdown (uf->file_descriptor, SHUT_WR);
+ return error;
+ }
+ else
+ {
+ error_quit:
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+catchup_server_write_ready (clib_file_t * uf)
+{
+ return catchup_socket_write_ready (uf, /* is_server */ 1);
+}
+
+static clib_error_t *
+catchup_client_write_ready (clib_file_t * uf)
+{
+ return catchup_socket_write_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_error_ready (clib_file_t * uf)
+{
+ clib_file_main_t *um = &file_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return (0, "error");
+}
+
+static clib_error_t *
+catchup_listen_read_ready (clib_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ struct sockaddr_in client_addr;
+ int client_len;
+ mc_socket_catchup_t *c;
+ clib_file_t template = { 0 };
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (c[0]));
+
+ client_len = sizeof (client_addr);
+
+ /* Acquires the non-blocking attrib from the server socket. */
+ c->socket = accept (uf->file_descriptor,
+ (struct sockaddr *) &client_addr,
+ (socklen_t *) & client_len);
+
+ if (c->socket < 0)
+ {
+ pool_put (msm->catchups, c);
+ return clib_error_return_unix (0, "accept");
+ }
+
+ if (MC_EVENT_LOGGING)
+ {
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "catchup accepted from 0x%lx",.format_args = "i4",};
+ struct
+ {
+ u32 addr;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->addr = ntohl (client_addr.sin_addr.s_addr);
+ }
+
+ /* Disable the Nagle algorithm, ship catchup pkts immediately */
+ {
+ int one = 1;
+ if ((setsockopt (c->socket, IPPROTO_TCP,
+ TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
+ {
+ clib_unix_warning ("catchup socket: set TCP_NODELAY");
+ }
+ }
+
+ template.read_function = catchup_server_read_ready;
+ template.write_function = catchup_server_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = pointer_to_uword (msm);
+ c->clib_file_index = clib_file_add (&file_main, &template);
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket,
+ c - msm->catchups);
+
+ return 0;
+}
+
+/* Return and bind to an unused port. */
+static word
+find_and_bind_to_free_port (word sock, word port)
+{
+ for (; port < 1 << 16; port++)
+ {
+ struct sockaddr_in a;
+
+ memset (&a, 0, sizeof (a)); /* Warnings be gone */
+
+ a.sin_family = PF_INET;
+ a.sin_addr.s_addr = INADDR_ANY;
+ a.sin_port = htons (port);
+
+ if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
+ break;
+ }
+
+ return port < 1 << 16 ? port : -1;
+}
+
+static clib_error_t *
+setup_mutlicast_socket (mc_socket_main_t * msm,
+ mc_multicast_socket_t * ms,
+ char *type, uword udp_port)
+{
+ int one = 1;
+ struct ip_mreq mcast_req;
+
+ if (!msm->multicast_ttl)
+ msm->multicast_ttl = 1;
+
+ /* mastership (multicast) TX socket */
+ if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ return clib_error_return_unix (0, "%s socket", type);
+
+ {
+ u8 ttl = msm->multicast_ttl;
+
+ if ((setsockopt (ms->socket, IPPROTO_IP,
+ IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
+ return clib_error_return_unix (0, "%s set multicast ttl", type);
+ }
+
+ if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
+ 0)
+ return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
+
+ memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
+ ms->tx_addr.sin_family = AF_INET;
+ ms->tx_addr.sin_addr.s_addr =
+ htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ ms->tx_addr.sin_port = htons (udp_port);
+
+ if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
+ sizeof (ms->tx_addr)) < 0)
+ return clib_error_return_unix (0, "%s bind", type);
+
+ memset (&mcast_req, 0, sizeof (mcast_req));
+ mcast_req.imr_multiaddr.s_addr =
+ htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
+
+ if ((setsockopt (ms->socket, IPPROTO_IP,
+ IP_ADD_MEMBERSHIP, (void *) &mcast_req,
+ sizeof (mcast_req))) < 0)
+ return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
+ type);
+
+ if (ioctl (ms->socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "%s set FIONBIO", type);
+
+ /* FIXME remove this when we support tx_ready. */
+ {
+ u32 len = 1 << 20;
+ socklen_t sl = sizeof (len);
+ if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
+ clib_unix_error ("setsockopt");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+socket_setup (mc_socket_main_t * msm)
+{
+ int one = 1;
+ clib_error_t *error;
+ u32 port;
+
+ if (!msm->base_multicast_udp_port_host_byte_order)
+ msm->base_multicast_udp_port_host_byte_order =
+ 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
+ - 1);
+
+ port = msm->base_multicast_udp_port_host_byte_order;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_MASTERSHIP], "mastership",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_JOIN],
+ "join", port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
+ "to relay", port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
+ "from relay", port++);
+ if (error)
+ return error;
+
+ /* ACK rx socket */
+ msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (msm->ack_socket < 0)
+ return clib_error_return_unix (0, "ack socket");
+
+ msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
+
+ if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "ack socket FIONBIO");
+
+ msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
+ if (msm->catchup_server_socket < 0)
+ return clib_error_return_unix (0, "catchup server socket");
+
+ msm->catchup_tcp_port =
+ find_and_bind_to_free_port (msm->catchup_server_socket, port++);
+
+ if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "catchup server socket FIONBIO");
+
+ if (listen (msm->catchup_server_socket, 5) < 0)
+ return clib_error_return_unix (0, "catchup server socket listen");
+
+ /* epoll setup for multicast mastership socket */
+ {
+ clib_file_t template = { 0 };
+
+ template.read_function = mastership_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+
+ /* epoll setup for multicast to_relay socket */
+ template.read_function = to_relay_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+
+ /* epoll setup for multicast from_relay socket */
+ template.read_function = from_relay_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+
+ template.read_function = join_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+
+ /* epoll setup for ack rx socket */
+ template.read_function = ack_socket_read_ready;
+ template.file_descriptor = msm->ack_socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+
+ /* epoll setup for TCP catchup server */
+ template.read_function = catchup_listen_read_ready;
+ template.file_descriptor = msm->catchup_server_socket;
+ template.private_data = (uword) msm;
+ clib_file_add (&file_main, &template);
+ }
+
+ return 0;
+}
+
+static void *
+catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
+ u8 * set_output_vector)
+{
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ c->clib_file_index);
+ u8 *result = 0;
+
+ if (set_output_vector)
+ c->output_vector = set_output_vector;
+ else
+ vec_add2 (c->output_vector, result, n_bytes);
+ if (vec_len (c->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return result;
+}
+
+static uword
+catchup_request_fun (void *transport_main,
+ u32 stream_index, mc_peer_id_t catchup_peer_id)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_socket_catchup_t *c;
+ struct sockaddr_in addr;
+ clib_file_main_t *um = &file_main;
+ int one = 1;
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (*c));
+
+ c->socket = socket (AF_INET, SOCK_STREAM, 0);
+ if (c->socket < 0)
+ {
+ clib_unix_warning ("socket");
+ return 0;
+ }
+
+ if (ioctl (c->socket, FIONBIO, &one) < 0)
+ {
+ clib_unix_warning ("FIONBIO");
+ return 0;
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
+ addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
+
+ c->connect_in_progress = 1;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "connecting to peer 0x%Lx",.format_args = "i8",};
+ struct
+ {
+ u64 peer;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->peer = catchup_peer_id.as_u64;
+ }
+
+ if (connect (c->socket, (const void *) &addr, sizeof (addr))
+ < 0 && errno != EINPROGRESS)
+ {
+ clib_unix_warning ("connect to %U fails",
+ format_socket_peer_id, catchup_peer_id);
+ return 0;
+ }
+
+ {
+ clib_file_t template = { 0 };
+
+ template.read_function = catchup_client_read_ready;
+ template.write_function = catchup_client_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = (uword) msm;
+ c->clib_file_index = clib_file_add (um, &template);
+
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket,
+ c - msm->catchups);
+ }
+
+ {
+ mc_msg_catchup_request_t *mp;
+ mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */
+ 0);
+ mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
+ mp->stream_index = stream_index;
+ mc_byte_swap_msg_catchup_request (mp);
+ }
+
+ return c - msm->catchups;
+}
+
+static void
+catchup_send_fun (void *transport_main, uword opaque, u8 * data)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
+ mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
+ catchup_add_pending_output (c, 0, data);
+}
+
+static int
+find_interface_ip4_address (char *if_name, u32 * ip4_address, u32 * mtu)
+{
+ int fd;
+ struct ifreq ifr;
+ struct sockaddr_in *sa;
+
+ /* Dig up our IP address */
+ fd = socket (PF_INET, AF_INET, 0);
+ if (fd < 0)
+ {
+ clib_unix_error ("socket");
+ return -1;
+ }
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
+ if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
+ {
+ clib_unix_error ("ioctl(SIOCFIGADDR)");
+ close (fd);
+ return -1;
+ }
+
+ sa = (void *) &ifr.ifr_addr;
+ clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
+
+ if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
+ {
+ close (fd);
+ return -1;
+ }
+ if (mtu)
+ *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);
+
+ close (fd);
+
+ return 0;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe)
+{
+ clib_error_t *error;
+ mc_main_t *mcm;
+ u32 mtu;
+
+ mcm = &msm->mc_main;
+
+ /* 239.255.0.7 */
+ if (!msm->multicast_tx_ip4_address_host_byte_order)
+ msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
+
+ {
+ u32 i, a, win;
+
+ win = 0;
+ if (msm->multicast_interface_name)
+ {
+ win =
+ !find_interface_ip4_address (msm->multicast_interface_name, &a,
+ &mtu);
+ }
+ else
+ {
+ for (i = 0; i < n_intfcs_to_probe; i++)
+ if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
+ {
+ win = 1;
+ msm->multicast_interface_name = intfc_probe_list[i];
+ break;
+ }
+ }
+
+ if (!win)
+ return clib_error_return (0, "can't find interface ip4 address");
+
+ msm->if_ip4_address_net_byte_order = a;
+ }
+
+ msm->rx_mtu_n_bytes = mtu;
+ msm->rx_mtu_n_buffers =
+ msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ msm->rx_mtu_n_buffers +=
+ (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0;
+
+ error = socket_setup (msm);
+ if (error)
+ return error;
+
+ mcm->transport.our_ack_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
+ msm->ack_udp_port);
+
+ mcm->transport.our_catchup_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
+ msm->catchup_tcp_port);
+
+ mcm->transport.tx_buffer = tx_buffer;
+ mcm->transport.tx_ack = tx_ack;
+ mcm->transport.catchup_request_fun = catchup_request_fun;
+ mcm->transport.catchup_send_fun = catchup_send_fun;
+ mcm->transport.format_peer_id = format_socket_peer_id;
+ mcm->transport.opaque = msm;
+ mcm->transport.max_packet_size = mtu;
+
+ mc_main_init (mcm, "socket");
+
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/mc_socket.h b/src/vlib/unix/mc_socket.h
new file mode 100644
index 00000000..3686c824
--- /dev/null
+++ b/src/vlib/unix/mc_socket.h
@@ -0,0 +1,137 @@
+/*
+ * mc_socket.h: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_mc_socket_h__
+#define __included_mc_socket_h__
+
+#include <vlib/unix/unix.h>
+#include <netinet/in.h>
+
+typedef struct
+{
+ int socket;
+ struct sockaddr_in tx_addr;
+} mc_multicast_socket_t;
+
+/* TCP catchup socket */
+typedef struct
+{
+ int socket;
+ u32 clib_file_index;
+
+ u8 *input_vector;
+ u8 *output_vector;
+ u32 output_vector_n_written;
+
+ u32 connect_in_progress;
+} mc_socket_catchup_t;
+
+typedef struct mc_socket_main_t
+{
+ mc_main_t mc_main;
+
+ /* Multicast mastership/to-relay/from-relay sockets. */
+ mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE];
+
+ /* Unicast UDP ack sockets */
+ int ack_socket;
+
+ /* TCP catchup server socket */
+ int catchup_server_socket;
+
+ /* Pool of stream-private catchup sockets */
+ mc_socket_catchup_t *catchups;
+
+ uword *catchup_index_by_file_descriptor;
+
+ u32 rx_mtu_n_bytes;
+
+ /* Receive MTU in bytes and VLIB buffers. */
+ u32 rx_mtu_n_buffers;
+
+ /* Vector of RX VLIB buffers. */
+ u32 *rx_buffers;
+ /* Vector of scatter/gather descriptors for sending/receiving VLIB buffers
+ via kernel. */
+ struct iovec *iovecs;
+
+ /* IP address of interface to use for multicast. */
+ u32 if_ip4_address_net_byte_order;
+
+ u32 ack_udp_port;
+ u32 catchup_tcp_port;
+
+ /* Interface on which to listen for multicasts. */
+ char *multicast_interface_name;
+
+ /* Multicast address to use (e.g. 0xefff0000).
+ Host byte order. */
+ u32 multicast_tx_ip4_address_host_byte_order;
+
+ /* TTL to use for multicasts. */
+ u32 multicast_ttl;
+
+ /* Multicast ports for mastership, joins, etc. will be chosen
+ starting at the given port in host byte order.
+ A total of MC_N_TRANSPORT_TYPE ports will be used. */
+ u32 base_multicast_udp_port_host_byte_order;
+} mc_socket_main_t;
+
+always_inline u32
+mc_socket_peer_id_get_address (mc_peer_id_t i)
+{
+ u32 a = ((i.as_u8[0] << 24)
+ | (i.as_u8[1] << 16) | (i.as_u8[2] << 8) | (i.as_u8[3] << 0));
+ return clib_host_to_net_u32 (a);
+}
+
+always_inline u32
+mc_socket_peer_id_get_port (mc_peer_id_t i)
+{
+ return clib_host_to_net_u16 ((i.as_u8[4] << 8) | i.as_u8[5]);
+}
+
+static_always_inline mc_peer_id_t
+mc_socket_set_peer_id (u32 address_net_byte_order, u32 port_host_byte_order)
+{
+ mc_peer_id_t i;
+ u32 a = ntohl (address_net_byte_order);
+ u32 p = port_host_byte_order;
+ i.as_u8[0] = (a >> 24) & 0xff;
+ i.as_u8[1] = (a >> 16) & 0xff;
+ i.as_u8[2] = (a >> 8) & 0xff;
+ i.as_u8[3] = (a >> 0) & 0xff;
+ i.as_u8[4] = (p >> 8) & 0xff;
+ i.as_u8[5] = (p >> 0) & 0xff;
+ i.as_u8[6] = 0;
+ i.as_u8[7] = 0;
+ return i;
+}
+
+clib_error_t *mc_socket_main_init (mc_socket_main_t * msm,
+ char **intfc_probe_list,
+ int n_intfcs_to_probe);
+#endif /* __included_mc_socket_h__ */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c
new file mode 100644
index 00000000..c2741aaa
--- /dev/null
+++ b/src/vlib/unix/plugin.c
@@ -0,0 +1,553 @@
+/*
+ * plugin.c: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/unix/plugin.h>
+#include <vppinfra/elf.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vlib_plugin_main;
+
+char *vlib_plugin_path __attribute__ ((weak));
+char *vlib_plugin_path = "";
+char *vlib_plugin_app_version __attribute__ ((weak));
+char *vlib_plugin_app_version = "";
+
+void *
+vlib_get_plugin_symbol (char *plugin_name, char *symbol_name)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ uword *p;
+ plugin_info_t *pi;
+
+ if ((p = hash_get_mem (pm->plugin_by_name_hash, plugin_name)) == 0)
+ return 0;
+
+ pi = vec_elt_at_index (pm->plugin_info, p[0]);
+ return dlsym (pi->handle, symbol_name);
+}
+
+static char *
+str_array_to_vec (char *array, int len)
+{
+ char c, *r = 0;
+ int n = 0;
+
+ do
+ {
+ c = array[n];
+ vec_add1 (r, c);
+ }
+ while (c && ++n < len);
+
+ if (c)
+ vec_add1 (r, 0);
+
+ return r;
+}
+
+static int
+load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init)
+{
+ void *handle;
+ clib_error_t *error;
+ elf_main_t em = { 0 };
+ elf_section_t *section;
+ u8 *data;
+ char *version_required;
+ vlib_plugin_registration_t *reg;
+ plugin_config_t *pc = 0;
+ uword *p;
+
+ if (elf_read_file (&em, (char *) pi->filename))
+ return -1;
+
+ error = elf_get_section_by_name (&em, ".vlib_plugin_registration",
+ &section);
+ if (error)
+ {
+ clib_warning ("Not a plugin: %s\n", (char *) pi->name);
+ return -1;
+ }
+
+ data = elf_get_section_contents (&em, section->index, 1);
+ reg = (vlib_plugin_registration_t *) data;
+
+ if (vec_len (data) != sizeof (*reg))
+ {
+ clib_warning ("vlib_plugin_registration size mismatch in plugin %s\n",
+ (char *) pi->name);
+ goto error;
+ }
+
+ p = hash_get_mem (pm->config_index_by_name, pi->name);
+ if (p)
+ {
+ pc = vec_elt_at_index (pm->configs, p[0]);
+ if (pc->is_disabled)
+ {
+ clib_warning ("Plugin disabled: %s", pi->name);
+ goto error;
+ }
+ if (reg->default_disabled && pc->is_enabled == 0)
+ {
+ clib_warning ("Plugin disabled (default): %s", pi->name);
+ goto error;
+ }
+ }
+ else if (reg->default_disabled)
+ {
+ clib_warning ("Plugin disabled (default): %s", pi->name);
+ goto error;
+ }
+
+ version_required = str_array_to_vec ((char *) &reg->version_required,
+ sizeof (reg->version_required));
+
+ if ((strlen (version_required) > 0) &&
+ (strncmp (vlib_plugin_app_version, version_required,
+ strlen (version_required))))
+ {
+ clib_warning ("Plugin %s version mismatch: %s != %s",
+ pi->name, vlib_plugin_app_version, reg->version_required);
+ if (!(pc && pc->skip_version_check == 1))
+ {
+ vec_free (version_required);
+ goto error;
+ }
+ }
+
+ vec_free (version_required);
+ vec_free (data);
+ elf_main_free (&em);
+
+ handle = dlopen ((char *) pi->filename, RTLD_LAZY);
+
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror ());
+ clib_warning ("Failed to load plugin '%s'", pi->name);
+ os_exit (1);
+ }
+
+ pi->handle = handle;
+
+ reg = dlsym (pi->handle, "vlib_plugin_registration");
+
+ if (reg == 0)
+ {
+ /* This should never happen unless somebody chagnes registration macro */
+ clib_warning ("Missing plugin registration in plugin '%s'", pi->name);
+ os_exit (1);
+ }
+
+ pi->reg = reg;
+ pi->version = str_array_to_vec ((char *) &reg->version,
+ sizeof (reg->version));
+
+ if (reg->early_init)
+ {
+ clib_error_t *(*ei) (vlib_main_t *);
+ void *h;
+
+ h = dlsym (pi->handle, reg->early_init);
+ if (h)
+ {
+ ei = h;
+ error = (*ei) (pm->vlib_main);
+ if (error)
+ {
+ clib_error_report (error);
+ os_exit (1);
+ }
+ }
+ else
+ clib_warning ("Plugin %s: early init function %s set but not found",
+ (char *) pi->name, reg->early_init);
+ }
+
+ if (reg->description)
+ clib_warning ("Loaded plugin: %s (%s)", pi->name, reg->description);
+ else
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+error:
+ vec_free (data);
+ elf_main_free (&em);
+ return -1;
+}
+
+static u8 **
+split_plugin_path (plugin_main_t * pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1 (this, path[i]);
+ continue;
+ }
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+static int
+plugin_name_sort_cmp (void *a1, void *a2)
+{
+ plugin_info_t *p1 = a1;
+ plugin_info_t *p2 = a2;
+
+ return strcmp ((char *) p1->name, (char *) p2->name);
+}
+
+int
+vlib_load_new_plugins (plugin_main_t * pm, int from_early_init)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ u32 *load_fail_indices = 0;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *) plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+ u8 *filename;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ filename = format (0, "%s/%s%c", plugin_path[i], entry->d_name, 0);
+
+ /* Only accept .so */
+ char *ext = strrchr ((const char *) filename, '.');
+ /* unreadable */
+ if (!ext || (strcmp (ext, ".so") != 0) ||
+ stat ((char *) filename, &statb) < 0)
+ {
+ ignore:
+ vec_free (filename);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG (statb.st_mode))
+ goto ignore;
+
+ plugin_name = format (0, "%s%c", entry->d_name, 0);
+ /* Have we seen this plugin already? */
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ /* No, add it to the plugin vector */
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->filename = filename;
+ pi->file_info = statb;
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+
+
+ /*
+ * Sort the plugins by name. This is important.
+ * API traces contain absolute message numbers.
+ * Loading plugins in directory (vs. alphabetical) order
+ * makes trace replay incredibly fragile.
+ */
+ vec_sort_with_function (pm->plugin_info, plugin_name_sort_cmp);
+
+ /*
+ * Attempt to load the plugins
+ */
+ for (i = 0; i < vec_len (pm->plugin_info); i++)
+ {
+ pi = vec_elt_at_index (pm->plugin_info, i);
+
+ if (load_one_plugin (pm, pi, from_early_init))
+ {
+ /* Make a note of any which fail to load */
+ vec_add1 (load_fail_indices, i);
+ hash_unset_mem (pm->plugin_by_name_hash, pi->name);
+ vec_free (pi->name);
+ vec_free (pi->filename);
+ }
+ }
+
+ /* Remove plugin info vector elements corresponding to load failures */
+ if (vec_len (load_fail_indices) > 0)
+ {
+ for (i = vec_len (load_fail_indices) - 1; i >= 0; i--)
+ vec_delete (pm->plugin_info, 1, load_fail_indices[i]);
+ vec_free (load_fail_indices);
+ }
+
+ /* Recreate the plugin name hash */
+ for (i = 0; i < vec_len (pm->plugin_info); i++)
+ {
+ pi = vec_elt_at_index (pm->plugin_info, i);
+ hash_unset_mem (pm->plugin_by_name_hash, pi->name);
+ hash_set_mem (pm->plugin_by_name_hash, pi->name, pi - pm->plugin_info);
+ }
+
+ return 0;
+}
+
+int
+vlib_plugin_early_init (vlib_main_t * vm)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+
+ if (pm->plugin_path == 0)
+ pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
+
+ clib_warning ("plugin path %s", pm->plugin_path);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vlib_main = vm;
+
+ return vlib_load_new_plugins (pm, 1 /* from_early_init */ );
+}
+
+static clib_error_t *
+vlib_plugins_show_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ u8 *s = 0;
+ u8 *key = 0;
+ uword value = 0;
+ int index = 1;
+ plugin_info_t *pi;
+
+ s = format (s, " Plugin path is: %s\n\n", pm->plugin_path);
+ s = format (s, " %-41s%-33s%s\n", "Plugin", "Version", "Description");
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem (key, value, pm->plugin_by_name_hash,
+ {
+ if (key != 0)
+ {
+ pi = vec_elt_at_index (pm->plugin_info, value);
+ s = format (s, "%3d. %-40s %-32s %s\n", index, key, pi->version,
+ pi->reg->description ? pi->reg->description : "");
+ index++;
+ }
+ });
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (plugins_show_cmd, static) =
+{
+ .path = "show plugins",
+ .short_help = "show loaded plugins",
+ .function = vlib_plugins_show_cmd_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ plugin_config_t *pc;
+ clib_error_t *error = 0;
+ uword *p;
+ int is_enable = 0;
+ int is_disable = 0;
+ int skip_version_check = 0;
+
+ if (pm->config_index_by_name == 0)
+ pm->config_index_by_name = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (pm->config_index_by_name, name);
+
+ if (p)
+ {
+ error = clib_error_return (0, "plugin '%s' already configured", name);
+ goto done;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ is_enable = 1;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else if (unformat (input, "skip-version-check"))
+ skip_version_check = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_enable && is_disable)
+ {
+ error = clib_error_return (0, "please specify either enable or disable"
+ " for plugin '%s'", name);
+ goto done;
+ }
+
+ vec_add2 (pm->configs, pc, 1);
+ hash_set_mem (pm->config_index_by_name, name, pc - pm->configs);
+ pc->is_enabled = is_enable;
+ pc->is_disabled = is_disable;
+ pc->skip_version_check = skip_version_check;
+ pc->name = name;
+
+done:
+ return error;
+}
+
+clib_error_t *
+vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ clib_error_t *error = 0;
+ unformat_input_t in;
+
+ unformat_init (&in, 0, 0);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u8 *s, *v;
+ if (unformat (input, "%s %v", &s, &v))
+ {
+ if (strncmp ((const char *) s, "plugins", 8) == 0)
+ {
+ if (vec_len (in.buffer) > 0)
+ vec_add1 (in.buffer, ' ');
+ vec_add (in.buffer, v, vec_len (v));
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ vec_free (v);
+ vec_free (s);
+ }
+done:
+ input = &in;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u8 *s = 0;
+ if (unformat (input, "path %s", &s))
+ pm->plugin_path = s;
+ else if (unformat (input, "plugin %s %U", &s,
+ unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error = config_one_plugin (vm, (char *) s, &sub_input);
+ unformat_free (&sub_input);
+ if (error)
+ goto done2;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ {
+ vec_free (s);
+ goto done2;
+ }
+ }
+ }
+
+done2:
+ unformat_free (&in);
+ return error;
+}
+
+/* discard whole 'plugins' section, as it is already consumed prior to
+ plugin load */
+static clib_error_t *
+plugins_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ u8 *junk;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%s", &junk))
+ {
+ vec_free (junk);
+ return 0;
+ }
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (plugins_config, "plugins");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/plugin.h b/src/vlib/unix/plugin.h
new file mode 100644
index 00000000..d9801ec4
--- /dev/null
+++ b/src/vlib/unix/plugin.h
@@ -0,0 +1,126 @@
+/*
+ * plugin.h: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/*
+ * vlib plugin scheme
+ *
+ * Almost anything which can be made to work in a vlib unix
+ * application will also work in a vlib plugin.
+ *
+ * The elf-section magic which registers static objects
+ * works so long as plugins are preset when the vlib unix process
+ * starts. But wait: there's more...
+ *
+ * If an application calls vlib_load_new_plugins() -- possibly after
+ * changing vlib_plugin_main.plugin_path / vlib_plugin_main.plugin_name_filter,
+ * -- new plugins will be loaded. That, in turn, allows considerable
+ * flexibility in terms of adding feature code or fixing bugs without
+ * requiring the data-plane process to restart.
+ *
+ * When the plugin mechanism loads a plugin, it uses dlsym to locate
+ * and call the plugin's function vlib_plugin_register() if it exists.
+ * A plugin which expects to be loaded after the vlib application
+ * starts uses this callback to modify the application. If vlib_plugin_register
+ * returns non-zero, the plugin mechanism dlclose()'s the plugin.
+ *
+ * Applications control the plugin search path and name filter by
+ * declaring the variables vlib_plugin_path and vlib_plugin_name_filter.
+ * libvlib.la supplies weak references for these symbols which
+ * effectively disable the scheme. In order for the elf-section magic to
+ * work, static plugins must be loaded at the earliest possible moment.
+ *
+ * An application can change these parameters at any time and call
+ * vlib_load_new_plugins().
+ */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ u8 default_disabled;
+ const char version[32];
+ const char version_required[32];
+ const char *early_init;
+ const char *description;
+}) vlib_plugin_registration_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u8 *name;
+ u8 *filename;
+ struct stat file_info;
+ void *handle;
+
+ /* plugin registration */
+ vlib_plugin_registration_t *reg;
+ char *version;
+} plugin_info_t;
+
+typedef struct
+{
+ char *name;
+ u8 is_disabled;
+ u8 is_enabled;
+ u8 skip_version_check;
+} plugin_config_t;
+
+typedef struct
+{
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* plugin configs and hash by name */
+ plugin_config_t *configs;
+ uword *config_index_by_name;
+
+ /* usual */
+ vlib_main_t *vlib_main;
+} plugin_main_t;
+
+extern plugin_main_t vlib_plugin_main;
+
+clib_error_t *vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input);
+int vlib_plugin_early_init (vlib_main_t * vm);
+int vlib_load_new_plugins (plugin_main_t * pm, int from_early_init);
+void *vlib_get_plugin_symbol (char *plugin_name, char *symbol_name);
+
+#define VLIB_PLUGIN_REGISTER() \
+ vlib_plugin_registration_t vlib_plugin_registration \
+ __attribute__((__section__(".vlib_plugin_registration")))
+
+#endif /* __included_plugin_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h
new file mode 100644
index 00000000..4c8566b7
--- /dev/null
+++ b/src/vlib/unix/unix.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix.h: Unix specific main state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_unix_h
+#define included_unix_unix_h
+
+#include <vppinfra/file.h>
+#include <vppinfra/socket.h>
+#include <termios.h>
+
+typedef struct
+{
+ f64 time;
+ clib_error_t *error;
+} unix_error_history_t;
+
+typedef struct
+{
+ /* Back pointer to main structure. */
+ vlib_main_t *vlib_main;
+
+ u32 flags;
+ /* Run interactively or as daemon (background process). */
+#define UNIX_FLAG_INTERACTIVE (1 << 0)
+#define UNIX_FLAG_NODAEMON (1 << 1)
+
+ /* CLI listen socket. */
+ clib_socket_t cli_listen_socket;
+
+ /* Circular buffer of last unix errors. */
+ unix_error_history_t error_history[128];
+ u32 error_history_index;
+ u64 n_total_errors;
+
+ /* startup-config filename */
+ u8 *startup_config_filename;
+
+ /* runtime directory path */
+ u8 *runtime_dir;
+
+ /* pidfile filename */
+ u8 *pidfile;
+
+ /* unix config complete */
+ volatile int unix_config_complete;
+
+ /* CLI log file. GIGO. */
+ u8 *log_filename;
+ int log_fd;
+
+ /* Don't put CLI connections into character mode */
+ int cli_line_mode;
+
+ /* Maximum amount of command line history to keep per session */
+ u32 cli_history_limit;
+
+ /* Suppress the welcome banner at CLI session start */
+ int cli_no_banner;
+
+ /* Maximum pager buffer size */
+ u32 cli_pager_buffer_limit;
+
+ /* Suppress the pager */
+ int cli_no_pager;
+
+ /* Store the original state of stdin when it's a tty */
+ struct termios tio_stdin;
+ int tio_isset;
+} unix_main_t;
+
+/* Global main structure. */
+extern unix_main_t unix_main;
+extern clib_file_main_t file_main;
+
+always_inline void
+unix_save_error (unix_main_t * um, clib_error_t * error)
+{
+ unix_error_history_t *eh = um->error_history + um->error_history_index;
+ clib_error_free_vector (eh->error);
+ eh->error = error;
+ eh->time = vlib_time_now (um->vlib_main);
+ um->n_total_errors += 1;
+ if (++um->error_history_index >= ARRAY_LEN (um->error_history))
+ um->error_history_index = 0;
+}
+
+/* Main function for Unix VLIB. */
+int vlib_unix_main (int argc, char *argv[]);
+
+clib_error_t *unix_physmem_init (vlib_main_t * vm);
+
+/* Set prompt for CLI. */
+void vlib_unix_cli_set_prompt (char *prompt);
+
+static inline unix_main_t *
+vlib_unix_get_main (void)
+{
+ return &unix_main;
+}
+
+static inline char *
+vlib_unix_get_runtime_dir (void)
+{
+ return (char *) unix_main.runtime_dir;
+}
+
+/* thread stack array; vec_len = max number of threads */
+extern u8 **vlib_thread_stacks;
+
+/* utils */
+
+clib_error_t *foreach_directory_file (char *dir_name,
+ clib_error_t * (*f) (void *arg,
+ u8 * path_name,
+ u8 * file_name),
+ void *arg, int scan_dirs);
+
+clib_error_t *vlib_unix_recursive_mkdir (char *path);
+
+clib_error_t *vlib_unix_validate_runtime_file (unix_main_t * um,
+ const char *path,
+ u8 ** full_path);
+
+#endif /* included_unix_unix_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c
new file mode 100644
index 00000000..5472751e
--- /dev/null
+++ b/src/vlib/unix/util.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+clib_error_t *
+foreach_directory_file (char *dir_name,
+ clib_error_t * (*f) (void *arg, u8 * path_name,
+ u8 * file_name), void *arg,
+ int scan_dirs)
+{
+ DIR *d;
+ struct dirent *e;
+ clib_error_t *error = 0;
+ u8 *s, *t;
+
+ d = opendir (dir_name);
+ if (!d)
+ {
+ if (errno == ENOENT)
+ return 0;
+ return clib_error_return_unix (0, "open `%s'", dir_name);
+ }
+
+ s = t = 0;
+ while (1)
+ {
+ e = readdir (d);
+ if (!e)
+ break;
+ if (scan_dirs)
+ {
+ if (e->d_type == DT_DIR
+ && (!strcmp (e->d_name, ".") || !strcmp (e->d_name, "..")))
+ continue;
+ }
+ else
+ {
+ if (e->d_type == DT_DIR)
+ continue;
+ }
+
+ s = format (s, "%s/%s", dir_name, e->d_name);
+ t = format (t, "%s", e->d_name);
+ error = f (arg, s, t);
+ _vec_len (s) = 0;
+ _vec_len (t) = 0;
+
+ if (error)
+ break;
+ }
+
+ vec_free (s);
+ closedir (d);
+
+ return error;
+}
+
+clib_error_t *
+vlib_unix_recursive_mkdir (char *path)
+{
+ clib_error_t *error = 0;
+ char *c = 0;
+ int i = 0;
+
+ while (path[i] != 0)
+ {
+ if (c && path[i] == '/')
+ {
+ vec_add1 (c, 0);
+ if ((mkdir (c, 0755)) && (errno != EEXIST))
+ {
+ error = clib_error_return_unix (0, "mkdir '%s'", c);
+ goto done;
+ }
+ _vec_len (c)--;
+ }
+ vec_add1 (c, path[i]);
+ i++;
+ }
+
+ if ((mkdir (path, 0755)) && (errno != EEXIST))
+ {
+ error = clib_error_return_unix (0, "mkdir '%s'", path);
+ goto done;
+ }
+
+done:
+ vec_free (c);
+
+ return error;
+}
+
+clib_error_t *
+vlib_unix_validate_runtime_file (unix_main_t * um,
+ const char *path, u8 ** full_path)
+{
+ u8 *fp = 0;
+ char *last_slash = 0;
+
+ if (path[0] == '\0')
+ {
+ return clib_error_return (0, "path is an empty string");
+ }
+ else if (strncmp (path, "../", 3) == 0 || strstr (path, "/../"))
+ {
+ return clib_error_return (0, "'..' not allowed in runtime path");
+ }
+ else if (path[0] == '/')
+ {
+ /* Absolute path. Has to start with runtime directory */
+ if (strncmp ((char *) um->runtime_dir, path,
+ strlen ((char *) um->runtime_dir)))
+ {
+ return clib_error_return (0,
+ "file %s is not in runtime directory %s",
+ path, um->runtime_dir);
+ }
+ fp = format (0, "%s%c", path, '\0');
+ }
+ else
+ {
+ /* Relative path, just append to runtime */
+ fp = format (0, "%s/%s%c", um->runtime_dir, path, '\0');
+ }
+
+ /* We don't want to create a directory out of the last file */
+ if ((last_slash = strrchr ((char *) fp, '/')) != NULL)
+ *last_slash = '\0';
+
+ clib_error_t *error = vlib_unix_recursive_mkdir ((char *) fp);
+
+ if (last_slash != NULL)
+ *last_slash = '/';
+
+ if (error)
+ vec_free (fp);
+
+ *full_path = fp;
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/vlib.h b/src/vlib/vlib.h
new file mode 100644
index 00000000..eed5c5bc
--- /dev/null
+++ b/src/vlib/vlib.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vlib.h: top-level include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_h
+#define included_vlib_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/elf_clib.h>
+
+/* Generic definitions. */
+#include <vlib/defs.h>
+
+/* Forward declarations of structs to avoid circular dependencies. */
+struct vlib_main_t;
+
+/* All includes in alphabetical order. */
+#include <vlib/physmem.h>
+#include <vlib/buffer.h>
+#include <vlib/cli.h>
+#include <vlib/counter.h>
+#include <vlib/error.h>
+#include <vlib/init.h>
+#include <vlib/mc.h>
+#include <vlib/node.h>
+#include <vlib/trace.h>
+
+/* Main include depends on other vlib/ includes so we put it last. */
+#include <vlib/main.h>
+
+/* Inline/extern function declarations. */
+#include <vlib/threads.h>
+#include <vlib/physmem_funcs.h>
+#include <vlib/buffer_funcs.h>
+#include <vlib/cli_funcs.h>
+#include <vlib/error_funcs.h>
+#include <vlib/format_funcs.h>
+#include <vlib/node_funcs.h>
+#include <vlib/trace_funcs.h>
+#include <vlib/global_funcs.h>
+
+#include <vlib/buffer_node.h>
+
+#endif /* included_vlib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/vlib_process_doc.h b/src/vlib/vlib_process_doc.h
new file mode 100644
index 00000000..a47c5e4b
--- /dev/null
+++ b/src/vlib/vlib_process_doc.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#error do not #include this file!
+
+/** \file
+
+ Cooperative multi-tasking thread support.
+
+ Vlib provides a lightweight cooperative multi-tasking thread
+ model. Context switching costs a setjmp/longjump pair. It's not
+ unreasonable to put vlib threads to sleep for 10us.
+
+ The graph node scheduler invokes these processes in much the same
+ way as traditional vector-processing run-to-completion graph
+ nodes; plus-or-minus a setjmp/longjmp pair required to switch
+ stacks. Simply set the vlib_node_registration_t type field to
+ VLIB_NODE_TYPE_PROCESS. Process is a misnomer; these are threads.
+
+ As of this writing, the default stack size is 2<<15;
+ 32kb. Initialize the node registration's
+ process_log2_n_stack_bytes member as needed. The graph node
+ dispatcher makes some effort to detect stack overrun. We map a
+ no-access page below each thread stack.
+
+ Process node dispatch functions are expected to be while(1) { }
+ loops which suspend when not otherwise occupied, and which must
+ not run for unreasonably long periods of time. Unreasonably long
+ is an application-dependent concept. Over the years, we have
+ constructed frame-size sensitive control-plane nodes which will
+ use a much higher fraction of the available CPU bandwidth when the
+ frame size is low. Classic example: modifying forwarding
+ tables. So long as the table-builder leaves the forwarding tables
+ in a valid state, one can suspend the table builder to avoid
+ dropping packets as a result of control-plane activity.
+
+ Process nodes can suspend for fixed amounts of time, or until another
+ entity signals an event, or both. See the example below.
+
+ When running in VLIB process context, one must pay strict attention to
+ loop invariant issues. If one walks a data structure and calls a
+ function which may suspend, one had best know by construction that it
+ cannot change. Often, it s best to simply make a snapshot copy of a
+ data structure, walk the copy at leisure, then free the copy.
+
+ Here's an example:
+
+ <code><pre>
+ \#define EXAMPLE_POLL_PERIOD 10.0
+
+ static uword
+ example_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+ {
+ f64 poll_time_remaining;
+ uword event_type, *event_data = 0;
+
+ poll_time_remaining = EXAMPLE_POLL_PERIOD;
+ while (1)
+ {
+ int i;
+
+ // Sleep until next periodic call due,
+ // or until we receive event(s)
+ //
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, poll_time_remaining);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: // no events => timeout
+ break;
+
+ case EVENT1:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_event1 (mm, event_data[i]);
+ break;
+
+ case EVENT2:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_event2 (vm, event_data[i]);
+ break;
+
+ // ... and so forth for each event type
+
+ default:
+ // This should never happen...
+ clib_warning ("BUG: unhandled event type %d",
+ event_type);
+ break;
+ }
+ vec_reset_length (event_data);
+
+ // Timer expired, call periodic function
+ if (vlib_process_suspend_time_is_zero (poll_time_remaining))
+ {
+ example_periodic (vm);
+ poll_time_remaining = EXAMPLE_POLL_PERIOD;
+ }
+ }
+ // NOTREACHED
+ return 0;
+ }
+
+ static VLIB_REGISTER_NODE (example_node) = {
+ .function = example_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "example-process",
+ };
+ </pre></code>
+
+ In this example, the VLIB process node waits for an event to
+ occur, or for 10 seconds to elapse. The code demuxes on the event
+ type, calling the appropriate handler function.
+
+ Each call to vlib_process_get_events returns a vector of
+ per-event-type data passed to successive vlib_process_signal_event
+ calls; vec_len (event_data) >= 1. It is an error to process only
+ event_data[0].
+
+ Resetting the event_data vector-length to 0 by calling
+ vec_reset_length (event_data) - instead of calling vec_free (...)
+ - means that the event scheme doesn t burn cycles continuously
+ allocating and freeing the event data vector. This is a common
+ coding pattern, well worth using when appropriate.
+*/
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
new file mode 100644
index 00000000..49f5d5fb
--- /dev/null
+++ b/src/vlibapi/api.h
@@ -0,0 +1,131 @@
+/*
+ *------------------------------------------------------------------
+ * api.h
+ *
+ * Copyright (c) 2009-2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_api_h
+#define included_api_h
+
+#include <stddef.h>
+#include <vppinfra/error.h>
+#include <svm/svm.h>
+#include <vlib/vlib.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api_common.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED
+(struct
+ {
+ u8 endian; u8 wrapped;
+ u32 nitems;
+}) vl_api_trace_file_header_t;
+/* *INDENT-ON* */
+
+int vl_msg_api_trace_save (api_main_t * am,
+ vl_api_trace_which_t which, FILE * fp);
+
+#define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init)
+
+/* Call given init function: used for init function dependencies. */
+#define vlib_call_api_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \
+ vlib_init_function_t * _f = _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+
+#define _VL_MSG_API_FUNCTION_SYMBOL(x, type) \
+ _vl_msg_api_##type##_function_##x
+
+#define VL_MSG_API_FUNCTION_SYMBOL(x) \
+ _VL_MSG_API_FUNCTION_SYMBOL(x, reaper)
+
+#define VLIB_DECLARE_REAPER_FUNCTION(x, tag) \
+vl_msg_api_init_function_t * _VL_MSG_API_FUNCTION_SYMBOL (x, tag) = x; \
+static void __vl_msg_api_add_##tag##_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+ \
+static void __vl_msg_api_add_##tag##_function_##x (void) \
+{ \
+ api_main_t * am = &api_main; \
+ static _vl_msg_api_function_list_elt_t _vl_msg_api_function; \
+ _vl_msg_api_function.next_init_function \
+ = am->tag##_function_registrations; \
+ am->tag##_function_registrations = &_vl_msg_api_function; \
+ _vl_msg_api_function.f = &x; \
+}
+
+#define VL_MSG_API_REAPER_FUNCTION(x) VLIB_DECLARE_REAPER_FUNCTION(x,reaper)
+
+/* Call reaper function with client index */
+#define vl_msg_api_call_reaper_function(ci) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (reaper); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (reaper); \
+ clib_error_t * _error = 0; \
+ _error = _f (ci); \
+ })
+
+static inline u32
+vl_msg_api_get_msg_length_inline (void *msg_arg)
+{
+ u8 *msg = (u8 *) msg_arg;
+
+ msgbuf_t *header = (msgbuf_t *) (msg - offsetof (msgbuf_t, data));
+
+ return clib_net_to_host_u32 (header->data_len);
+}
+
+int vl_msg_api_rx_trace_enabled (api_main_t * am);
+int vl_msg_api_tx_trace_enabled (api_main_t * am);
+void vl_msg_api_trace (api_main_t * am, vl_api_trace_t * tp, void *msg);
+int vl_msg_api_trace_onoff (api_main_t * am, vl_api_trace_which_t which,
+ int onoff);
+int vl_msg_api_trace_free (api_main_t * am, vl_api_trace_which_t which);
+int vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
+ u32 nitems);
+void vl_msg_api_handler_with_vm_node (api_main_t * am,
+ void *the_msg, vlib_main_t * vm,
+ vlib_node_runtime_t * node);
+vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
+ vl_api_trace_which_t which);
+void vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string,
+ u32 id);
+/* node_serialize.c prototypes */
+u8 *vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
+ u32 max_threads, int include_nexts,
+ int include_stats);
+vlib_node_t **vlib_node_unserialize (u8 * vector);
+u32 vl_msg_api_get_msg_length (void *msg_arg);
+
+#endif /* included_api_h */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
new file mode 100644
index 00000000..8e6ab0ff
--- /dev/null
+++ b/src/vlibapi/api_common.h
@@ -0,0 +1,330 @@
+/*
+ *------------------------------------------------------------------
+ * api_common.h
+ *
+ * Copyright (c) 2009-2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_api_common_h
+#define included_api_common_h
+
+/** \file API common definitions
+ * See api_doc.md for more info
+ */
+
+#include <vppinfra/clib_error.h>
+#include <svm/svm_common.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+/** API registration types
+ */
+typedef enum
+{
+ REGISTRATION_TYPE_FREE = 0,
+ REGISTRATION_TYPE_SHMEM, /**< Shared memory connection */
+ REGISTRATION_TYPE_SOCKET_LISTEN, /**< Socket listener */
+ REGISTRATION_TYPE_SOCKET_SERVER, /**< Socket server */
+ REGISTRATION_TYPE_SOCKET_CLIENT, /**< Socket client */
+} vl_registration_type_t;
+
+/** An API client registration, only in vpp/vlib */
+
+typedef struct vl_api_registration_
+{
+ vl_registration_type_t registration_type; /**< type */
+
+ /** Index in VLIB's brain (not shared memory). */
+ u32 vl_api_registration_pool_index;
+
+ u8 *name; /**< Client name */
+
+ /** shared memory only: pointer to client input queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* socket server and client */
+ u32 clib_file_index; /**< Socket only: file index */
+ i8 *unprocessed_input; /**< Socket only: pending input */
+ u32 unprocessed_msg_length; /**< Socket only: unprocssed length */
+ u8 *output_vector; /**< Socket only: output vecto */
+
+ /* socket client only */
+ u32 server_handle; /**< Socket client only: server handle */
+ u32 server_index; /**< Socket client only: server index */
+} vl_api_registration_t;
+
+
+/** Trace configuration for a single message */
+typedef struct
+{
+ int size; /**< for sanity checking */
+ int trace_enable; /**< trace this message */
+ int replay_enable; /**< This message can be replayed */
+} trace_cfg_t;
+
+/**
+ * API trace state
+ */
+typedef struct
+{
+ u8 endian; /**< trace endianness */
+ u8 enabled; /**< trace is enabled */
+ u8 wrapped; /**< trace has wrapped */
+ u8 pad;
+ u32 nitems; /**< Number of trace records */
+ u32 curindex; /**< Current index in circular buffer */
+ u8 **traces; /**< Trace ring */
+} vl_api_trace_t;
+
+/** Trace RX / TX enum */
+typedef enum
+{
+ VL_API_TRACE_TX,
+ VL_API_TRACE_RX,
+} vl_api_trace_which_t;
+
+#define VL_API_LITTLE_ENDIAN 0x00
+#define VL_API_BIG_ENDIAN 0x01
+
+/** Message range (belonging to a plugin) */
+typedef struct
+{
+ u8 *name; /**< name of the plugin */
+ u16 first_msg_id; /**< first assigned message ID */
+ u16 last_msg_id; /**< last assigned message ID */
+} vl_api_msg_range_t;
+
+/** Message configuration definition */
+typedef struct
+{
+ int id; /**< the message ID */
+ char *name; /**< the message name */
+ u32 crc; /**< message definition CRC */
+ void *handler; /**< the message handler */
+ void *cleanup; /**< non-default message cleanup handler */
+ void *endian; /**< message endian function */
+ void *print; /**< message print function */
+ int size; /**< message size */
+ int traced; /**< is this message to be traced? */
+ int replay; /**< is this message to be replayed? */
+ int message_bounce; /**< do not free message after processing */
+ int is_mp_safe; /**< worker thread barrier required? */
+} vl_msg_api_msg_config_t;
+
+/** Message header structure */
+typedef struct msgbuf_
+{
+ unix_shared_memory_queue_t *q; /**< message allocated in this shmem ring */
+ u32 data_len; /**< message length not including header */
+ u32 gc_mark_timestamp; /**< message garbage collector mark TS */
+ u8 data[0]; /**< actual message begins here */
+} msgbuf_t;
+
+/* api_shared.c prototypes */
+void vl_msg_api_handler (void *the_msg);
+void vl_msg_api_handler_no_free (void *the_msg);
+void vl_msg_api_handler_no_trace_no_free (void *the_msg);
+void vl_msg_api_trace_only (void *the_msg);
+void vl_msg_api_cleanup_handler (void *the_msg);
+void vl_msg_api_replay_handler (void *the_msg);
+void vl_msg_api_socket_handler (void *the_msg);
+void vl_msg_api_set_handlers (int msg_id, char *msg_name,
+ void *handler,
+ void *cleanup,
+ void *endian,
+ void *print, int msg_size, int traced);
+void vl_msg_api_clean_handlers (int msg_id);
+void vl_msg_api_config (vl_msg_api_msg_config_t *);
+void vl_msg_api_set_cleanup_handler (int msg_id, void *fp);
+void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
+
+void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
+void vl_msg_api_barrier_release (void) __attribute__ ((weak));
+#ifdef BARRIER_TRACING
+void vl_msg_api_barrier_trace_context (const char *context)
+ __attribute__ ((weak));
+#else
+#define vl_msg_api_barrier_trace_context(X)
+#endif
+void vl_msg_api_free (void *);
+void vl_noop_handler (void *mp);
+void vl_msg_api_increment_missing_client_counter (void);
+void vl_msg_api_post_mortem_dump (void);
+void vl_msg_api_post_mortem_dump_enable_disable (int enable);
+void vl_msg_api_register_pd_handler (void *handler,
+ u16 msg_id_host_byte_order);
+int vl_msg_api_pd_handler (void *mp, int rv);
+
+void vl_msg_api_set_first_available_msg_id (u16 first_avail);
+u16 vl_msg_api_get_msg_ids (const char *name, int n);
+u32 vl_api_get_msg_index (u8 * name_and_crc);
+
+typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index);
+
+typedef struct _vl_msg_api_init_function_list_elt
+{
+ struct _vl_msg_api_init_function_list_elt *next_init_function;
+ vl_msg_api_init_function_t *f;
+} _vl_msg_api_function_list_elt_t;
+
+/** API main structure, used by both vpp and binary API clients */
+typedef struct
+{
+ /** Message handler vector */
+ void (**msg_handlers) (void *);
+ /** Plaform-dependent (aka hardware) message handler vector */
+ int (**pd_msg_handlers) (void *, int);
+
+ /** non-default message cleanup handler vector */
+ void (**msg_cleanup_handlers) (void *);
+
+ /** Message endian handler vector */
+ void (**msg_endian_handlers) (void *);
+
+ /** Message print function vector */
+ void (**msg_print_handlers) (void *, void *);
+
+ /** Message name vector */
+ const char **msg_names;
+
+ /** Don't automatically free message buffer vetor */
+ u8 *message_bounce;
+
+ /** Message is mp safe vector */
+ u8 *is_mp_safe;
+
+ /** Allocator ring vectors (in shared memory) */
+ struct ring_alloc_ *arings;
+
+ /** Number of times that the ring allocator failed */
+ u32 ring_misses;
+
+ /** Number of garbage-collected message buffers */
+ u32 garbage_collects;
+
+ /** Number of missing clients / failed message sends */
+ u32 missing_clients;
+
+ /** Received message trace configuration */
+ vl_api_trace_t *rx_trace;
+
+ /** Sent message trace configuration */
+ vl_api_trace_t *tx_trace;
+
+ /** Print every received message */
+ int msg_print_flag;
+
+ /** Current trace configuration */
+ trace_cfg_t *api_trace_cfg;
+
+ /** Current process PID */
+ int our_pid;
+
+ /** Binary api segment descriptor */
+ svm_region_t *vlib_rp;
+
+ /** Vector of all mapped shared-VM segments */
+ svm_region_t **mapped_shmem_regions;
+
+ /** Binary API shared-memory segment header pointer */
+ struct vl_shmem_hdr_ *shmem_hdr;
+
+ /** vlib/vpp only: vector of client registrations */
+ vl_api_registration_t **vl_clients;
+
+ /** vlib/vpp only: serialized (message, name, crc) table */
+ u8 *serialized_message_table_in_shmem;
+
+ /** First available message ID, for theplugin msg allocator */
+ u16 first_available_msg_id;
+
+ /** Message range by name hash */
+ uword *msg_range_by_name;
+
+ /** vector of message ranges */
+ vl_api_msg_range_t *msg_ranges;
+
+ /** uid for the api shared memory region */
+ int api_uid;
+
+ /** gid for the api shared memory region */
+ int api_gid;
+
+ /** base virtual address for global VM region */
+ u64 global_baseva;
+
+ /** size of the global VM region */
+ u64 global_size;
+
+ /** size of the API region */
+ u64 api_size;
+
+ /** size of the global VM private mheap */
+ u64 global_pvt_heap_size;
+
+ /** size of the api private mheap */
+ u64 api_pvt_heap_size;
+
+ /** Peer input queue pointer */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /**
+ * All VLIB-side message handlers use my_client_index to identify
+ * the queue / client. This works in sim replay.
+ */
+ int my_client_index;
+ /**
+ * This is the (shared VM) address of the registration,
+ * don't use it to id the connection since it can't possibly
+ * work in simulator replay.
+ */
+ vl_api_registration_t *my_registration;
+
+ /** (Historical) signal-based queue non-empty signal, to be removed */
+ i32 vlib_signal;
+
+ /** vpp/vlib input queue length */
+ u32 vlib_input_queue_length;
+
+ /** client message index hash table */
+ uword *msg_index_by_name_and_crc;
+
+ /** Shared VM binary API region name */
+ const char *region_name;
+
+ /** Chroot path to the shared memory API files */
+ const char *root_path;
+
+ /** Replay in progress? */
+ int replay_in_progress;
+
+ /** Dump (msg-name, crc) snapshot here at startup */
+ u8 *save_msg_table_filename;
+
+ /** List of API client reaper functions */
+ _vl_msg_api_function_list_elt_t *reaper_function_registrations;
+
+} api_main_t;
+
+extern api_main_t api_main;
+
+#endif /* included_api_common_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/api_doc.md b/src/vlibapi/api_doc.md
new file mode 100644
index 00000000..2e7ae09a
--- /dev/null
+++ b/src/vlibapi/api_doc.md
@@ -0,0 +1,352 @@
+# Binary API support {#api_doc}
+
+VPP provides a binary API scheme to allow a wide variety of client codes to
+program data-plane tables. As of this writing, there are hundreds of binary
+APIs.
+
+Messages are defined in `*.api` files. Today, there are about 50 api files,
+with more arriving as folks add programmable features. The API file compiler
+sources reside in @ref src/tools/vppapigen.
+
+From @ref src/vnet/interface.api, here's a typical request/response message
+definition:
+
+```{.c}
+ autoreply define sw_interface_set_flags
+ {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = up, 0 = down */
+ u8 admin_up_down;
+ };
+```
+
+To a first approximation, the API compiler renders this definition into
+`build-root/.../vpp/include/vnet/interface.api.h` as follows:
+
+```{.c}
+ /****** Message ID / handler enum ******/
+ #ifdef vl_msg_id
+ vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler)
+ vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler)
+ #endif
+
+ /****** Message names ******/
+ #ifdef vl_msg_name
+ vl_msg_name(vl_api_sw_interface_set_flags_t, 1)
+ vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1)
+ #endif
+
+ /****** Message name, crc list ******/
+ #ifdef vl_msg_name_crc_list
+ #define foreach_vl_msg_name_crc_interface \
+ _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \
+ _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \
+ #endif
+
+ /****** Typedefs *****/
+ #ifdef vl_typedefs
+ typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags {
+ u16 _vl_msg_id;
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 admin_up_down;
+ }) vl_api_sw_interface_set_flags_t;
+
+ typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply {
+ u16 _vl_msg_id;
+ u32 context;
+ i32 retval;
+ }) vl_api_sw_interface_set_flags_reply_t;
+
+ ...
+ #endif /* vl_typedefs */
+```
+
+To change the admin state of an interface, a binary api client sends a
+@ref vl_api_sw_interface_set_flags_t to VPP, which will respond with a
+@ref vl_api_sw_interface_set_flags_reply_t message.
+
+Multiple layers of software, transport types, and shared libraries
+implement a variety of features:
+
+* API message allocation, tracing, pretty-printing, and replay.
+* Message transport via global shared memory, pairwise/private shared
+ memory, and sockets.
+* Barrier synchronization of worker threads across thread-unsafe
+ message handlers.
+
+Correctly-coded message handlers know nothing about the transport used to
+deliver messages to/from VPP. It's reasonably straighforward to use multiple
+API message transport types simultaneously.
+
+For historical reasons, binary api messages are (putatively) sent in network
+byte order. As of this writing, we're seriously considering whether that
+choice makes sense.
+
+
+## Message Allocation
+
+Since binary API messages are always processed in order, we allocate messages
+using a ring allocator whenever possible. This scheme is extremely fast when
+compared with a traditional memory allocator, and doesn't cause heap
+fragmentation. See
+@ref src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal().
+
+Regardless of transport, binary api messages always follow a @ref msgbuf_t
+header:
+
+```{.c}
+ typedef struct msgbuf_
+ {
+ unix_shared_memory_queue_t *q;
+ u32 data_len;
+ u32 gc_mark_timestamp;
+ u8 data[0];
+ } msgbuf_t;
+```
+
+This structure makes it easy to trace messages without having to
+decode them - simply save data_len bytes - and allows
+@ref vl_msg_api_free() to rapidly dispose of message buffers:
+
+```{.c}
+ void
+ vl_msg_api_free (void *a)
+ {
+ msgbuf_t *rv;
+ api_main_t *am = &api_main;
+
+ rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+ /*
+ * Here's the beauty of the scheme. Only one proc/thread has
+ * control of a given message buffer. To free a buffer, we just
+ * clear the queue field, and leave. No locks, no hits, no errors...
+ */
+ if (rv->q)
+ {
+ rv->q = 0;
+ rv->gc_mark_timestamp = 0;
+ return;
+ }
+ <snip>
+ }
+```
+
+## Message Tracing and Replay
+
+It's extremely important that VPP can capture and replay sizeable binary API
+traces. System-level issues involving hundreds of thousands of API
+transactions can be re-run in a second or less. Partial replay allows one to
+binary-search for the point where the wheels fall off. One can add scaffolding
+to the data plane, to trigger when complex conditions obtain.
+
+With binary API trace, print, and replay, system-level bug reports of the form
+"after 300,000 API transactions, the VPP data-plane stopped forwarding
+traffic, FIX IT!" can be solved offline.
+
+More often than not, one discovers that a control-plane client
+misprograms the data plane after a long time or under complex
+circumstances. Without direct evidence, "it's a data-plane problem!"
+
+See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file(),
+and @ref src/vlibapi/api_shared.c. See also the debug CLI command "api trace"
+
+## Client connection details
+
+Establishing a binary API connection to VPP from a C-language client
+is easy:
+
+```{.c}
+ int
+ connect_to_vpe (char *client_name, int client_message_queue_length)
+ {
+ vat_main_t *vam = &vat_main;
+ api_main_t *am = &api_main;
+
+ if (vl_client_connect_to_vlib ("/vpe-api", client_name,
+ client_message_queue_length) < 0)
+ return -1;
+
+ /* Memorize vpp's binary API message input queue address */
+ vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ /* And our client index */
+ vam->my_client_index = am->my_client_index;
+ return 0;
+ }
+```
+
+32 is a typical value for client_message_queue_length. VPP cannot
+block when it needs to send an API message to a binary API client, and
+the VPP-side binary API message handlers are very fast. When sending
+asynchronous messages, make sure to scrape the binary API rx ring with
+some enthusiasm.
+
+### binary API message RX pthread
+
+Calling @ref vl_client_connect_to_vlib spins up a binary API message RX
+pthread:
+
+```{.c}
+ static void *
+ rx_thread_fn (void *arg)
+ {
+ unix_shared_memory_queue_t *q;
+ memory_client_main_t *mm = &memory_client_main;
+ api_main_t *am = &api_main;
+
+ q = am->vl_input_queue;
+
+ /* So we can make the rx thread terminate cleanly */
+ if (setjmp (mm->rx_thread_jmpbuf) == 0)
+ {
+ mm->rx_thread_jmpbuf_valid = 1;
+ while (1)
+ {
+ vl_msg_api_queue_handler (q);
+ }
+ }
+ pthread_exit (0);
+ }
+```
+
+To handle the binary API message queue yourself, use
+@ref vl_client_connect_to_vlib_no_rx_pthread.
+
+In turn, vl_msg_api_queue_handler(...) uses mutex/condvar signalling
+to wake up, process VPP -> client traffic, then sleep. VPP supplies a
+condvar broadcast when the VPP -> client API message queue transitions
+from empty to nonempty.
+
+VPP checks its own binary API input queue at a very high rate. VPP
+invokes message handlers in "process" context [aka cooperative
+multitasking thread context] at a variable rate, depending on
+data-plane packet processing requirements.
+
+## Client disconnection details
+
+To disconnect from VPP, call @ref vl_client_disconnect_from_vlib.
+Please arrange to call this function if the client application
+terminates abnormally. VPP makes every effort to hold a decent funeral
+for dead clients, but VPP can't guarantee to free leaked memory in the
+shared binary API segment.
+
+## Sending binary API messages to VPP
+
+The point of the exercise is to send binary API messages to VPP, and
+to receive replies from VPP. Many VPP binary APIs comprise a client
+request message, and a simple status reply. For example, to
+set the admin status of an interface, one codes:
+
+```{.c}
+ vl_api_sw_interface_set_flags_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS);
+ mp->client_index = api_main.my_client_index;
+ mp->sw_if_index = clib_host_to_net_u32 (<interface-sw-if-index>);
+ vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp);
+```
+
+Key points:
+
+* Use @ref vl_msg_api_alloc to allocate message buffers
+
+* Allocated message buffers are not initialized, and must be presumed
+ to contain trash.
+
+* Don't forget to set the _vl_msg_id field!
+
+* As of this writing, binary API message IDs and data are sent in
+ network byte order
+
+* The client-library global data structure @ref api_main keeps track
+ of sufficient pointers and handles used to communicate with VPP
+
+## Receiving binary API messages from VPP
+
+Unless you've made other arrangements (see @ref
+vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a
+separate rx pthread*. Synchronization with the client application main
+thread is the responsibility of the application!
+
+Set up message handlers about as follows:
+
+```{.c}
+ #define vl_typedefs /* define message structures */
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_typedefs
+
+ /* declare message handlers for each api */
+
+ #define vl_endianfun /* define message structures */
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_endianfun
+
+ /* instantiate all the print functions we know about */
+ #define vl_print(handle, ...)
+ #define vl_printfun
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_printfun
+
+ /* Define a list of all message that the client handles */
+ #define foreach_vpe_api_reply_msg \
+ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply)
+
+ static clib_error_t *
+ my_api_hookup (vlib_main_t * vm)
+ {
+ api_main_t *am = &api_main;
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+ #undef _
+
+ return 0;
+ }
+```
+
+The key API used to establish message handlers is @ref
+vl_msg_api_set_handlers , which sets values in multiple parallel
+vectors in the @ref api_main_t structure. As of this writing: not all
+vector element values can be set through the API. You'll see sporadic
+API message registrations followed by minor adjustments of this form:
+
+```{.c}
+ /*
+ * Thread-safe API messages
+ */
+ am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
+ am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h
new file mode 100644
index 00000000..052cc6e7
--- /dev/null
+++ b/src/vlibapi/api_helper_macros.h
@@ -0,0 +1,261 @@
+/*
+ *------------------------------------------------------------------
+ * api_helper_macros.h - message handler helper macros
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __api_helper_macros_h__
+#define __api_helper_macros_h__
+
+#define f64_endian(a)
+#define f64_print(a,b)
+
+#ifndef REPLY_MSG_ID_BASE
+#define REPLY_MSG_ID_BASE 0
+#endif
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO3(t, n, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO4(t, n, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ u8 is_error = 0; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc_or_null (sizeof (*rmp) + n); \
+ if (!rmp) \
+ { \
+ /* if there isn't enough memory, try to allocate */ \
+ /* some at least for returning an error */ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ if (!rmp) \
+ return; \
+ \
+ memset (rmp, 0, sizeof (*rmp)); \
+ rv = VNET_API_ERROR_TABLE_TOO_BIG; \
+ is_error = 1; \
+ } \
+ rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ if (!is_error) \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* "trust, but verify" */
+
+static inline uword
+vnet_sw_if_index_is_api_valid (u32 sw_if_index)
+{
+ return vnet_sw_interface_is_api_valid (vnet_get_main (), sw_if_index);
+}
+
+#define VALIDATE_SW_IF_INDEX(mp) \
+ do { u32 __sw_if_index = ntohl(mp->sw_if_index); \
+ if (!vnet_sw_if_index_is_api_valid(__sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_SW_IF_INDEX_LABEL \
+do { \
+bad_sw_if_index: \
+ ; \
+} while (0);
+
+#define VALIDATE_RX_SW_IF_INDEX(mp) \
+ do { u32 __rx_sw_if_index = ntohl(mp->rx_sw_if_index); \
+ if (!vnet_sw_if_index_is_api_valid(__rx_sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_rx_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_RX_SW_IF_INDEX_LABEL \
+do { \
+bad_rx_sw_if_index: \
+ ; \
+} while (0);
+
+#define VALIDATE_TX_SW_IF_INDEX(mp) \
+ do { u32 __tx_sw_if_index = ntohl(mp->tx_sw_if_index); \
+ if (!vnet_sw_if_index_is_api_valid(__tx_sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_tx_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_TX_SW_IF_INDEX_LABEL \
+do { \
+bad_tx_sw_if_index: \
+ ; \
+} while (0);
+
+#define VALIDATE_BD_ID(mp) \
+ do { u32 __rx_bd_id = ntohl(mp->bd_id); \
+ if (__rx_bd_id > L2_BD_ID_MAX) { \
+ rv = VNET_API_ERROR_BD_ID_EXCEED_MAX; \
+ goto bad_bd_id; \
+ } \
+} while(0);
+
+#define BAD_BD_ID_LABEL \
+do { \
+bad_bd_id: \
+ ; \
+} while (0);
+
+#define pub_sub_handler(lca,UCA) \
+static void vl_api_want_##lca##_t_handler ( \
+ vl_api_want_##lca##_t *mp) \
+{ \
+ vpe_api_main_t *vam = &vpe_api_main; \
+ vpe_client_registration_t *rp; \
+ vl_api_want_##lca##_reply_t *rmp; \
+ uword *p; \
+ i32 rv = 0; \
+ \
+ p = hash_get (vam->lca##_registration_hash, mp->client_index); \
+ if (p) { \
+ if (mp->enable_disable) { \
+ clib_warning ("pid %d: already enabled...", mp->pid); \
+ rv = VNET_API_ERROR_INVALID_REGISTRATION; \
+ goto reply; \
+ } else { \
+ rp = pool_elt_at_index (vam->lca##_registrations, p[0]); \
+ pool_put (vam->lca##_registrations, rp); \
+ hash_unset (vam->lca##_registration_hash, \
+ mp->client_index); \
+ goto reply; \
+ } \
+ } \
+ if (mp->enable_disable == 0) { \
+ clib_warning ("pid %d: already disabled...", mp->pid); \
+ rv = VNET_API_ERROR_INVALID_REGISTRATION; \
+ goto reply; \
+ } \
+ pool_get (vam->lca##_registrations, rp); \
+ rp->client_index = mp->client_index; \
+ rp->client_pid = mp->pid; \
+ hash_set (vam->lca##_registration_hash, rp->client_index, \
+ rp - vam->lca##_registrations); \
+ \
+reply: \
+ REPLY_MACRO (VL_API_WANT_##UCA##_REPLY); \
+}
+
+#define foreach_registration_hash \
+_(interface_events) \
+_(to_netconf_server) \
+_(from_netconf_server) \
+_(to_netconf_client) \
+_(from_netconf_client) \
+_(oam_events) \
+_(bfd_events) \
+_(wc_ip6_nd_events) \
+_(wc_ip4_arp_events)
+
+typedef struct
+{
+ u32 client_index; /* in memclnt registration pool */
+ u32 client_pid;
+} vpe_client_registration_t;
+
+struct _vl_api_ip4_arp_event;
+struct _vl_api_ip6_nd_event;
+
+typedef struct
+{
+#define _(a) uword *a##_registration_hash; \
+ vpe_client_registration_t * a##_registrations;
+ foreach_registration_hash
+#undef _
+ /* notifications happen really early in the game */
+ u8 link_state_process_up;
+
+ /* ip4 arp event registration pool */
+ struct _vl_api_ip4_arp_event *arp_events;
+
+ /* ip6 nd event registration pool */
+ struct _vl_api_ip6_nd_event *nd_events;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} vpe_api_main_t;
+
+extern vpe_api_main_t vpe_api_main;
+
+#endif /* __api_helper_macros_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
new file mode 100644
index 00000000..27898c3b
--- /dev/null
+++ b/src/vlibapi/api_shared.c
@@ -0,0 +1,925 @@
+/*
+ *------------------------------------------------------------------
+ * api_shared.c - API message handling, common code for both clients
+ * and the vlib process itself.
+ *
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vppinfra/elog.h>
+
+/* *INDENT-OFF* */
+api_main_t api_main =
+ {
+ .region_name = "/unset",
+ .api_uid = -1,
+ .api_gid = -1,
+ };
+/* *INDENT-ON* */
+
+void
+vl_msg_api_increment_missing_client_counter (void)
+{
+ api_main_t *am = &api_main;
+ am->missing_clients++;
+}
+
+int
+vl_msg_api_rx_trace_enabled (api_main_t * am)
+{
+ return (am->rx_trace && am->rx_trace->enabled);
+}
+
+int
+vl_msg_api_tx_trace_enabled (api_main_t * am)
+{
+ return (am->tx_trace && am->tx_trace->enabled);
+}
+
+/*
+ * vl_msg_api_trace
+ */
+void
+vl_msg_api_trace (api_main_t * am, vl_api_trace_t * tp, void *msg)
+{
+ u8 **this_trace;
+ u8 **old_trace;
+ u8 *msg_copy;
+ u32 length;
+ trace_cfg_t *cfgp;
+ u16 msg_id = ntohs (*((u16 *) msg));
+ msgbuf_t *header = (msgbuf_t *) (((u8 *) msg) - offsetof (msgbuf_t, data));
+
+ cfgp = am->api_trace_cfg + msg_id;
+
+ if (!cfgp || !cfgp->trace_enable)
+ return;
+
+ msg_copy = 0;
+
+ if (tp->nitems == 0)
+ {
+ clib_warning ("tp->nitems is 0");
+ return;
+ }
+
+ if (vec_len (tp->traces) < tp->nitems)
+ {
+ vec_add1 (tp->traces, 0);
+ this_trace = tp->traces + vec_len (tp->traces) - 1;
+ }
+ else
+ {
+ tp->wrapped = 1;
+ old_trace = tp->traces + tp->curindex++;
+ if (tp->curindex == tp->nitems)
+ tp->curindex = 0;
+ vec_free (*old_trace);
+ this_trace = old_trace;
+ }
+
+ length = clib_net_to_host_u32 (header->data_len);
+
+ vec_validate (msg_copy, length - 1);
+ clib_memcpy (msg_copy, msg, length);
+ *this_trace = msg_copy;
+}
+
+int
+vl_msg_api_trace_onoff (api_main_t * am, vl_api_trace_which_t which,
+ int onoff)
+{
+ vl_api_trace_t *tp;
+ int rv;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ if (tp == 0)
+ {
+ vl_msg_api_trace_configure (am, which, 1024);
+ tp = am->tx_trace;
+ }
+ break;
+
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ if (tp == 0)
+ {
+ vl_msg_api_trace_configure (am, which, 1024);
+ tp = am->rx_trace;
+ }
+ break;
+
+ default:
+ /* duh? */
+ return -1;
+ }
+
+ /* Configured? */
+ if (tp == 0 || tp->nitems == 0)
+ return -1;
+
+ rv = tp->enabled;
+ tp->enabled = onoff;
+
+ return rv;
+}
+
+int
+vl_msg_api_trace_free (api_main_t * am, vl_api_trace_which_t which)
+{
+ vl_api_trace_t *tp;
+ int i;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ break;
+
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ break;
+
+ default:
+ /* duh? */
+ return -1;
+ }
+
+ /* Configured? */
+ if (!tp || tp->nitems == 0)
+ return -1;
+
+ tp->curindex = 0;
+ tp->wrapped = 0;
+
+ for (i = 0; i < vec_len (tp->traces); i++)
+ {
+ vec_free (tp->traces[i]);
+ }
+ vec_free (tp->traces);
+
+ return 0;
+}
+
+int
+vl_msg_api_trace_save (api_main_t * am, vl_api_trace_which_t which, FILE * fp)
+{
+ vl_api_trace_t *tp;
+ vl_api_trace_file_header_t fh;
+ int i;
+ u8 *msg;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ break;
+
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ break;
+
+ default:
+ /* duh? */
+ return -1;
+ }
+
+ /* Configured, data present? */
+ if (tp == 0 || tp->nitems == 0 || vec_len (tp->traces) == 0)
+ return -1;
+
+ /* "Dare to be stupid" check */
+ if (fp == 0)
+ {
+ return -2;
+ }
+
+ /* Write the file header */
+ fh.nitems = vec_len (tp->traces);
+ fh.endian = tp->endian;
+ fh.wrapped = tp->wrapped;
+
+ if (fwrite (&fh, sizeof (fh), 1, fp) != 1)
+ {
+ return (-10);
+ }
+
+ /* No-wrap case */
+ if (tp->wrapped == 0)
+ {
+ /*
+ * Note: vec_len return 0 when fed a NULL pointer.
+ * Unfortunately, the static analysis tool doesn't
+ * figure it out, hence the suppressed warnings.
+ * What a great use of my time.
+ */
+ for (i = 0; i < vec_len (tp->traces); i++)
+ {
+ u32 msg_length;
+ /*sa_ignore NO_NULL_CHK */
+ msg = tp->traces[i];
+ /*
+ * This retarded check required to pass
+ * [sic] SA-checking.
+ */
+ if (!msg)
+ continue;
+
+ msg_length = clib_host_to_net_u32 (vec_len (msg));
+ if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
+ != sizeof (msg_length))
+ {
+ return (-14);
+ }
+ if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
+ {
+ return (-11);
+ }
+ }
+ }
+ else
+ {
+ /* Wrap case: write oldest -> end of buffer */
+ for (i = tp->curindex; i < vec_len (tp->traces); i++)
+ {
+ u32 msg_length;
+ msg = tp->traces[i];
+ /*
+ * This retarded check required to pass
+ * [sic] SA-checking
+ */
+ if (!msg)
+ continue;
+
+ msg_length = clib_host_to_net_u32 (vec_len (msg));
+ if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
+ != sizeof (msg_length))
+ {
+ return (-14);
+ }
+
+ if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
+ {
+ return (-12);
+ }
+ }
+ /* write beginning of buffer -> oldest-1 */
+ for (i = 0; i < tp->curindex; i++)
+ {
+ u32 msg_length;
+ /*sa_ignore NO_NULL_CHK */
+ msg = tp->traces[i];
+ /*
+ * This retarded check required to pass
+ * [sic] SA-checking
+ */
+ if (!msg)
+ continue;
+
+ msg_length = clib_host_to_net_u32 (vec_len (msg));
+ if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
+ != sizeof (msg_length))
+ {
+ return (-14);
+ }
+
+ if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
+ {
+ return (-13);
+ }
+ }
+ }
+ return 0;
+}
+
+int
+vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
+ u32 nitems)
+{
+ vl_api_trace_t *tp;
+ int was_on = 0;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ if (tp == 0)
+ {
+ vec_validate (am->tx_trace, 0);
+ tp = am->tx_trace;
+ }
+ break;
+
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ if (tp == 0)
+ {
+ vec_validate (am->rx_trace, 0);
+ tp = am->rx_trace;
+ }
+
+ break;
+
+ default:
+ return -1;
+
+ }
+
+ if (tp->enabled)
+ {
+ was_on = vl_msg_api_trace_onoff (am, which, 0);
+ }
+ if (tp->traces)
+ {
+ vl_msg_api_trace_free (am, which);
+ }
+
+ memset (tp, 0, sizeof (*tp));
+
+ if (clib_arch_is_big_endian)
+ {
+ tp->endian = VL_API_BIG_ENDIAN;
+ }
+ else
+ {
+ tp->endian = VL_API_LITTLE_ENDIAN;
+ }
+
+ tp->nitems = nitems;
+ if (was_on)
+ {
+ (void) vl_msg_api_trace_onoff (am, which, was_on);
+ }
+ return 0;
+}
+
+void
+vl_msg_api_barrier_sync (void)
+{
+}
+
+void
+vl_msg_api_barrier_release (void)
+{
+}
+
+always_inline void
+msg_handler_internal (api_main_t * am,
+ void *the_msg, int trace_it, int do_it, int free_it)
+{
+ u16 id = ntohs (*((u16 *) the_msg));
+ u8 *(*print_fp) (void *, void *);
+
+ if (id < vec_len (am->msg_handlers) && am->msg_handlers[id])
+ {
+ if (trace_it)
+ vl_msg_api_trace (am, am->rx_trace, the_msg);
+
+ if (am->msg_print_flag)
+ {
+ fformat (stdout, "[%d]: %s\n", id, am->msg_names[id]);
+ print_fp = (void *) am->msg_print_handlers[id];
+ if (print_fp == 0)
+ {
+ fformat (stdout, " [no registered print fn]\n");
+ }
+ else
+ {
+ (*print_fp) (the_msg, stdout);
+ }
+ }
+
+ if (do_it)
+ {
+ if (!am->is_mp_safe[id])
+ {
+ vl_msg_api_barrier_trace_context (am->msg_names[id]);
+ vl_msg_api_barrier_sync ();
+ }
+ (*am->msg_handlers[id]) (the_msg);
+ if (!am->is_mp_safe[id])
+ vl_msg_api_barrier_release ();
+ }
+ }
+ else
+ {
+ clib_warning ("no handler for msg id %d", id);
+ }
+
+ if (free_it)
+ vl_msg_api_free (the_msg);
+}
+
+/* set to 1 if you want before/after message handler event logging */
+#define ELOG_API_MESSAGE_HANDLERS 0
+
+#if ELOG_API_MESSAGE_HANDLERS > 0
+static u32
+elog_id_for_msg_name (vlib_main_t * vm, char *msg_name)
+{
+ uword *p, r;
+ static uword *h;
+ u8 *name_copy;
+
+ if (!h)
+ h = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (&vm->elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+
+ return r;
+}
+#endif
+
+/* This is only to be called from a vlib/vnet app */
+void
+vl_msg_api_handler_with_vm_node (api_main_t * am,
+ void *the_msg, vlib_main_t * vm,
+ vlib_node_runtime_t * node)
+{
+ u16 id = ntohs (*((u16 *) the_msg));
+ u8 *(*handler) (void *, void *, void *);
+
+#if ELOG_API_MESSAGE_HANDLERS > 0
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "api-msg: %s",
+ .format_args = "T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ if (id < vec_len (am->msg_names))
+ ed->c = elog_id_for_msg_name (vm, am->msg_names[id]);
+ else
+ ed->c = elog_id_for_msg_name (vm, "BOGUS");
+ }
+#endif
+
+ if (id < vec_len (am->msg_handlers) && am->msg_handlers[id])
+ {
+ handler = (void *) am->msg_handlers[id];
+
+ if (am->rx_trace && am->rx_trace->enabled)
+ vl_msg_api_trace (am, am->rx_trace, the_msg);
+
+ if (!am->is_mp_safe[id])
+ {
+ vl_msg_api_barrier_trace_context (am->msg_names[id]);
+ vl_msg_api_barrier_sync ();
+ }
+ (*handler) (the_msg, vm, node);
+ if (!am->is_mp_safe[id])
+ vl_msg_api_barrier_release ();
+ }
+ else
+ {
+ clib_warning ("no handler for msg id %d", id);
+ }
+
+ /*
+ * Special-case, so we can e.g. bounce messages off the vnet
+ * main thread without copying them...
+ */
+ if (!(am->message_bounce[id]))
+ vl_msg_api_free (the_msg);
+
+#if ELOG_API_MESSAGE_HANDLERS > 0
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "api-msg-done: %s",
+ .format_args = "T4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 c;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ if (id < vec_len (am->msg_names))
+ ed->c = elog_id_for_msg_name (vm, am->msg_names[id]);
+ else
+ ed->c = elog_id_for_msg_name (vm, "BOGUS");
+ }
+#endif
+}
+
+void
+vl_msg_api_handler (void *the_msg)
+{
+ api_main_t *am = &api_main;
+
+ msg_handler_internal (am, the_msg,
+ (am->rx_trace
+ && am->rx_trace->enabled) /* trace_it */ ,
+ 1 /* do_it */ , 1 /* free_it */ );
+}
+
+void
+vl_msg_api_handler_no_free (void *the_msg)
+{
+ api_main_t *am = &api_main;
+ msg_handler_internal (am, the_msg,
+ (am->rx_trace
+ && am->rx_trace->enabled) /* trace_it */ ,
+ 1 /* do_it */ , 0 /* free_it */ );
+}
+
+void
+vl_msg_api_handler_no_trace_no_free (void *the_msg)
+{
+ api_main_t *am = &api_main;
+ msg_handler_internal (am, the_msg, 0 /* trace_it */ , 1 /* do_it */ ,
+ 0 /* free_it */ );
+}
+
+/*
+ * Add a trace record to the API message trace buffer, if
+ * API message tracing is enabled. Handy for adding sufficient
+ * data to the trace to reproduce autonomous state, as opposed to
+ * state downloaded via control-plane API messages. Example: the NAT
+ * application creates database entries based on packet traffic, not
+ * control-plane messages.
+ *
+ */
+void
+vl_msg_api_trace_only (void *the_msg)
+{
+ api_main_t *am = &api_main;
+
+ msg_handler_internal (am, the_msg,
+ (am->rx_trace
+ && am->rx_trace->enabled) /* trace_it */ ,
+ 0 /* do_it */ , 0 /* free_it */ );
+}
+
+void
+vl_msg_api_cleanup_handler (void *the_msg)
+{
+ api_main_t *am = &api_main;
+ u16 id = ntohs (*((u16 *) the_msg));
+
+ if (PREDICT_FALSE (id >= vec_len (am->msg_cleanup_handlers)))
+ {
+ clib_warning ("_vl_msg_id too large: %d\n", id);
+ return;
+ }
+ if (am->msg_cleanup_handlers[id])
+ (*am->msg_cleanup_handlers[id]) (the_msg);
+
+ vl_msg_api_free (the_msg);
+}
+
+/*
+ * vl_msg_api_replay_handler
+ */
+void
+vl_msg_api_replay_handler (void *the_msg)
+{
+ api_main_t *am = &api_main;
+
+ u16 id = ntohs (*((u16 *) the_msg));
+
+ if (PREDICT_FALSE (id >= vec_len (am->msg_handlers)))
+ {
+ clib_warning ("_vl_msg_id too large: %d\n", id);
+ return;
+ }
+ /* do NOT trace the message... */
+ if (am->msg_handlers[id])
+ (*am->msg_handlers[id]) (the_msg);
+ /* do NOT free the message buffer... */
+}
+
+u32
+vl_msg_api_get_msg_length (void *msg_arg)
+{
+ return vl_msg_api_get_msg_length_inline (msg_arg);
+}
+
+/*
+ * vl_msg_api_socket_handler
+ */
+void
+vl_msg_api_socket_handler (void *the_msg)
+{
+ api_main_t *am = &api_main;
+
+ msg_handler_internal (am, the_msg,
+ (am->rx_trace
+ && am->rx_trace->enabled) /* trace_it */ ,
+ 1 /* do_it */ , 0 /* free_it */ );
+}
+
+#define foreach_msg_api_vector \
+_(msg_names) \
+_(msg_handlers) \
+_(msg_cleanup_handlers) \
+_(msg_endian_handlers) \
+_(msg_print_handlers) \
+_(api_trace_cfg) \
+_(message_bounce) \
+_(is_mp_safe)
+
+void
+vl_msg_api_config (vl_msg_api_msg_config_t * c)
+{
+ api_main_t *am = &api_main;
+
+ ASSERT (c->id > 0);
+
+#define _(a) vec_validate (am->a, c->id);
+ foreach_msg_api_vector;
+#undef _
+
+ if (am->msg_handlers[c->id] && am->msg_handlers[c->id] != c->handler)
+ clib_warning
+ ("BUG: re-registering 'vl_api_%s_t_handler'."
+ "Handler was %llx, replaced by %llx",
+ c->name, am->msg_handlers[c->id], c->handler);
+
+ am->msg_names[c->id] = c->name;
+ am->msg_handlers[c->id] = c->handler;
+ am->msg_cleanup_handlers[c->id] = c->cleanup;
+ am->msg_endian_handlers[c->id] = c->endian;
+ am->msg_print_handlers[c->id] = c->print;
+ am->message_bounce[c->id] = c->message_bounce;
+ am->is_mp_safe[c->id] = c->is_mp_safe;
+
+ am->api_trace_cfg[c->id].size = c->size;
+ am->api_trace_cfg[c->id].trace_enable = c->traced;
+ am->api_trace_cfg[c->id].replay_enable = c->replay;
+}
+
+/*
+ * vl_msg_api_set_handlers
+ * preserve the old API for a while
+ */
+void
+vl_msg_api_set_handlers (int id, char *name, void *handler, void *cleanup,
+ void *endian, void *print, int size, int traced)
+{
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+
+ memset (c, 0, sizeof (*c));
+
+ c->id = id;
+ c->name = name;
+ c->handler = handler;
+ c->cleanup = cleanup;
+ c->endian = endian;
+ c->print = print;
+ c->traced = traced;
+ c->replay = 1;
+ c->message_bounce = 0;
+ c->is_mp_safe = 0;
+ vl_msg_api_config (c);
+}
+
+void
+vl_msg_api_clean_handlers (int msg_id)
+{
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+
+ memset (c, 0, sizeof (*c));
+
+ c->id = msg_id;
+ vl_msg_api_config (c);
+}
+
+void
+vl_msg_api_set_cleanup_handler (int msg_id, void *fp)
+{
+ api_main_t *am = &api_main;
+ ASSERT (msg_id > 0);
+
+ vec_validate (am->msg_cleanup_handlers, msg_id);
+ am->msg_cleanup_handlers[msg_id] = fp;
+}
+
+void
+vl_msg_api_queue_handler (unix_shared_memory_queue_t * q)
+{
+ uword msg;
+
+ while (!unix_shared_memory_queue_sub (q, (u8 *) & msg, 0))
+ vl_msg_api_handler ((void *) msg);
+}
+
+vl_api_trace_t *
+vl_msg_api_trace_get (api_main_t * am, vl_api_trace_which_t which)
+{
+ switch (which)
+ {
+ case VL_API_TRACE_RX:
+ return am->rx_trace;
+ case VL_API_TRACE_TX:
+ return am->tx_trace;
+ default:
+ return 0;
+ }
+}
+
+void
+vl_noop_handler (void *mp)
+{
+}
+
+
+static u8 post_mortem_dump_enabled;
+
+void
+vl_msg_api_post_mortem_dump_enable_disable (int enable)
+{
+ post_mortem_dump_enabled = enable;
+}
+
+void
+vl_msg_api_post_mortem_dump (void)
+{
+ api_main_t *am = &api_main;
+ FILE *fp;
+ char filename[64];
+ int rv;
+
+ if (post_mortem_dump_enabled == 0)
+ return;
+
+ snprintf (filename, sizeof (filename), "/tmp/api_post_mortem.%d",
+ getpid ());
+
+ fp = fopen (filename, "w");
+ if (fp == NULL)
+ {
+ rv = write (2, "Couldn't create ", 16);
+ rv = write (2, filename, strlen (filename));
+ rv = write (2, "\n", 1);
+ return;
+ }
+ rv = vl_msg_api_trace_save (am, VL_API_TRACE_RX, fp);
+ fclose (fp);
+ if (rv < 0)
+ {
+ rv = write (2, "Failed to save post-mortem API trace to ", 40);
+ rv = write (2, filename, strlen (filename));
+ rv = write (2, "\n", 1);
+ }
+
+}
+
+/* Layered message handling support */
+
+void
+vl_msg_api_register_pd_handler (void *fp, u16 msg_id_host_byte_order)
+{
+ api_main_t *am = &api_main;
+
+ /* Mild idiot proofing */
+ if (msg_id_host_byte_order > 10000)
+ clib_warning ("msg_id_host_byte_order endian issue? %d arg vs %d",
+ msg_id_host_byte_order,
+ clib_net_to_host_u16 (msg_id_host_byte_order));
+ vec_validate (am->pd_msg_handlers, msg_id_host_byte_order);
+ am->pd_msg_handlers[msg_id_host_byte_order] = fp;
+}
+
+int
+vl_msg_api_pd_handler (void *mp, int rv)
+{
+ api_main_t *am = &api_main;
+ int (*fp) (void *, int);
+ u16 msg_id;
+
+ if (clib_arch_is_little_endian)
+ msg_id = clib_net_to_host_u16 (*((u16 *) mp));
+ else
+ msg_id = *((u16 *) mp);
+
+ if (msg_id >= vec_len (am->pd_msg_handlers)
+ || am->pd_msg_handlers[msg_id] == 0)
+ return rv;
+
+ fp = am->pd_msg_handlers[msg_id];
+ rv = (*fp) (mp, rv);
+ return rv;
+}
+
+void
+vl_msg_api_set_first_available_msg_id (u16 first_avail)
+{
+ api_main_t *am = &api_main;
+
+ am->first_available_msg_id = first_avail;
+}
+
+u16
+vl_msg_api_get_msg_ids (const char *name, int n)
+{
+ api_main_t *am = &api_main;
+ u8 *name_copy;
+ vl_api_msg_range_t *rp;
+ uword *p;
+ u16 rv;
+
+ if (am->msg_range_by_name == 0)
+ am->msg_range_by_name = hash_create_string (0, sizeof (uword));
+
+ name_copy = format (0, "%s%c", name, 0);
+
+ p = hash_get_mem (am->msg_range_by_name, name_copy);
+ if (p)
+ {
+ clib_warning ("WARNING: duplicate message range registration for '%s'",
+ name_copy);
+ vec_free (name_copy);
+ return ((u16) ~ 0);
+ }
+
+ if (n < 0 || n > 1024)
+ {
+ clib_warning
+ ("WARNING: bad number of message-IDs (%d) requested by '%s'",
+ n, name_copy);
+ vec_free (name_copy);
+ return ((u16) ~ 0);
+ }
+
+ vec_add2 (am->msg_ranges, rp, 1);
+
+ rv = rp->first_msg_id = am->first_available_msg_id;
+ am->first_available_msg_id += n;
+ rp->last_msg_id = am->first_available_msg_id - 1;
+ rp->name = name_copy;
+
+ hash_set_mem (am->msg_range_by_name, name_copy, rp - am->msg_ranges);
+
+ return rv;
+}
+
+void
+vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string, u32 id)
+{
+ uword *p;
+
+ if (am->msg_index_by_name_and_crc == 0)
+ am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (am->msg_index_by_name_and_crc, string);
+ if (p)
+ {
+ clib_warning ("attempt to redefine '%s' ignored...", string);
+ return;
+ }
+
+ hash_set_mem (am->msg_index_by_name_and_crc, string, id);
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
new file mode 100644
index 00000000..50e5c41c
--- /dev/null
+++ b/src/vlibapi/node_serialize.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+
+#include <vppinfra/serialize.h>
+
+extern void vl_msg_api_barrier_sync (void);
+extern void vl_msg_api_barrier_release (void);
+
+/* serialized representation of state strings */
+
+#define foreach_state_string_code \
+_(STATE_DONE, "done") \
+_(STATE_DISABLED, "disabled") \
+_(STATE_TIME_WAIT, "time wait") \
+_(STATE_EVENT_WAIT, "event wait") \
+_(STATE_ANY_WAIT, "any wait") \
+_(STATE_POLLING, "polling") \
+_(STATE_INTERRUPT_WAIT, "interrupt wait") \
+_(STATE_INTERNAL, "internal")
+
+typedef enum
+{
+#define _(a,b) a,
+ foreach_state_string_code
+#undef _
+} state_string_enum_t;
+
+static char *state_strings[] = {
+#define _(a,b) b,
+ foreach_state_string_code
+#undef _
+};
+
+/*
+ * Serialize a vlib_node_main_t. Appends the result to vector.
+ * Pass 0 to create a new vector, use vec_reset_length(vector)
+ * to recycle a vector / avoid memory allocation, etc.
+ * Switch heaps before/after to serialize into API client shared memory.
+ */
+
+u8 *
+vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
+ u32 max_threads, int include_nexts, int include_stats)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *n;
+ static vlib_node_t ***node_dups;
+ vlib_node_t **nodes;
+ static vlib_main_t **stat_vms;
+ vlib_main_t *stat_vm;
+ u8 *namep;
+ u32 name_bytes;
+ uword i, j, k;
+ u64 l, v, c, d;
+ state_string_enum_t state_code;
+ u32 threads_to_serialize;
+
+ vec_reset_length (node_dups);
+
+ if (vec_len (stat_vms) == 0)
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+ }
+
+ threads_to_serialize = clib_min (max_threads, vec_len (stat_vms));
+
+ /*
+ * Barrier sync across stats scraping.
+ * Otherwise, the counts will be grossly inaccurate.
+ */
+ vl_msg_api_barrier_sync ();
+
+ for (j = 0; j < threads_to_serialize; j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ if (include_stats)
+ {
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+ }
+
+ nodes = vec_dup (nm->nodes);
+
+ vec_add1 (node_dups, nodes);
+ }
+ vl_msg_api_barrier_release ();
+
+ serialize_open_vector (sm, vector);
+
+ serialize_likely_small_unsigned_integer (sm, vec_len (stat_vms));
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nodes = node_dups[j];
+
+ serialize_likely_small_unsigned_integer (sm, vec_len (nodes));
+
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ n = nodes[i];
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ v = n->stats_total.vectors - n->stats_last_clear.vectors;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+ d = n->stats_total.suspends - n->stats_last_clear.suspends;
+
+ state_code = STATE_INTERNAL;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+
+ switch (p->flags
+ & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT))
+ {
+ default:
+ if (!(p->flags & VLIB_PROCESS_IS_RUNNING))
+ state_code = STATE_DONE;
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK:
+ state_code = STATE_TIME_WAIT;
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT:
+ state_code = STATE_EVENT_WAIT;
+ break;
+
+ case (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK):
+ state_code =
+ STATE_ANY_WAIT;
+ break;
+ }
+ }
+ else if (n->type != VLIB_NODE_TYPE_INTERNAL)
+ {
+ state_code = STATE_POLLING;
+ if (n->state == VLIB_NODE_STATE_DISABLED)
+ state_code = STATE_DISABLED;
+ else if (n->state == VLIB_NODE_STATE_INTERRUPT)
+ state_code = STATE_INTERRUPT_WAIT;
+ }
+
+ /* See unserialize_cstring */
+ name_bytes = vec_len (n->name);
+ serialize_likely_small_unsigned_integer (sm, name_bytes);
+ namep = serialize_get (sm, name_bytes);
+ memcpy (namep, n->name, name_bytes);
+
+ serialize_likely_small_unsigned_integer (sm, (u64) state_code);
+ serialize_likely_small_unsigned_integer (sm, n->type);
+
+ if (include_nexts)
+ {
+ serialize_likely_small_unsigned_integer
+ (sm, vec_len (n->next_nodes));
+ for (k = 0; k < vec_len (n->next_nodes); k++)
+ serialize_likely_small_unsigned_integer (sm,
+ n->next_nodes[k]);
+ }
+ else
+ serialize_likely_small_unsigned_integer (sm, 0);
+
+ if (include_stats)
+ {
+ /* stats present */
+ serialize_likely_small_unsigned_integer (sm, 1);
+ /* total clocks */
+ serialize_integer (sm, l, 8);
+ /* Total calls */
+ serialize_integer (sm, c, 8);
+ /* Total vectors */
+ serialize_integer (sm, v, 8);
+ /* Total suspends */
+ serialize_integer (sm, d, 8);
+ }
+ else /* no stats */
+ serialize_likely_small_unsigned_integer (sm, 0);
+ }
+ vec_free (nodes);
+ }
+ return (serialize_close_vector (sm));
+}
+
+vlib_node_t ***
+vlib_node_unserialize (u8 * vector)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ u32 nnodes, nnexts;
+ u32 nstat_vms;
+ vlib_node_t *node;
+ vlib_node_t **nodes;
+ vlib_node_t ***nodes_by_thread = 0;
+ int i, j, k;
+ u64 l, v, c, d;
+ state_string_enum_t state_code;
+ int stats_present;
+
+ serialize_open_vector (sm, vector);
+
+ nstat_vms = unserialize_likely_small_unsigned_integer (sm);
+
+ vec_validate (nodes_by_thread, nstat_vms - 1);
+ _vec_len (nodes_by_thread) = 0;
+
+ for (i = 0; i < nstat_vms; i++)
+ {
+ nnodes = unserialize_likely_small_unsigned_integer (sm);
+
+ nodes = 0;
+ vec_validate (nodes, nnodes - 1);
+ vec_add1 (nodes_by_thread, nodes);
+
+ for (j = 0; j < nnodes; j++)
+ {
+ node = 0;
+ vec_validate (node, 0);
+ nodes[j] = node;
+
+ unserialize_cstring (sm, (char **) &(node->name));
+ state_code = unserialize_likely_small_unsigned_integer (sm);
+ node->state_string = (u8 *) state_strings[state_code];
+
+ node->type = unserialize_likely_small_unsigned_integer (sm);
+ nnexts = unserialize_likely_small_unsigned_integer (sm);
+ if (nnexts > 0)
+ vec_validate (node->next_nodes, nnexts - 1);
+ for (k = 0; k < nnexts; k++)
+ node->next_nodes[k] =
+ unserialize_likely_small_unsigned_integer (sm);
+
+ stats_present = unserialize_likely_small_unsigned_integer (sm);
+
+ if (stats_present)
+ {
+ /* total clocks */
+ unserialize_integer (sm, &l, 8);
+ node->stats_total.clocks = l;
+ node->stats_last_clear.clocks = 0;
+
+ /* Total calls */
+ unserialize_integer (sm, &c, 8);
+ node->stats_total.calls = c;
+
+ /* Total vectors */
+ unserialize_integer (sm, &v, 8);
+ node->stats_total.vectors = v;
+
+ /* Total suspends */
+ unserialize_integer (sm, &d, 8);
+ node->stats_total.suspends = d;
+ }
+ }
+ }
+ return nodes_by_thread;
+}
+
+#if TEST_CODE
+
+static clib_error_t *
+test_node_serialize_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ u8 *vector = 0;
+ vlib_node_t ***nodes_by_thread;
+ vlib_node_t **nodes;
+ vlib_node_t *node;
+ vlib_node_t *next_node;
+ int i, j, k;
+ u32 max_threads = (u32) ~ 0;
+ int include_nexts = 0;
+ int include_stats = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "max-threads %d", &max_threads))
+ ;
+ else if (unformat (input, "stats"))
+ include_stats = 1;
+ else if (unformat (input, "nexts"))
+ include_nexts = 1;
+ else
+ break;
+ }
+
+ /*
+ * Keep the number of memcpy ops to a minimum (e.g. 1).
+ * The current size of the serialized vector is
+ * slightly under 4K.
+ */
+ vec_validate (vector, 16383);
+ vec_reset_length (vector);
+
+ vector = vlib_node_serialize (nm, vector, max_threads,
+ include_nexts, include_stats);
+
+ vlib_cli_output (vm, "result vector %d bytes", vec_len (vector));
+
+ nodes_by_thread = vlib_node_unserialize (vector);
+
+ vec_free (vector);
+
+ for (i = 0; i < vec_len (nodes_by_thread); i++)
+ {
+ nodes = nodes_by_thread[i];
+
+ vlib_cli_output (vm, "thread %d", i);
+
+ for (j = 0; j < vec_len (nodes); j++)
+ {
+ node = nodes[j];
+
+ vlib_cli_output (vm, "[%d] %s state %s", j, node->name,
+ node->state_string);
+
+ vlib_cli_output
+ (vm, " clocks %lld calls %lld suspends"
+ " %lld vectors %lld",
+ node->stats_total.clocks,
+ node->stats_total.calls,
+ node->stats_total.suspends, node->stats_total.vectors);
+
+ for (k = 0; k < vec_len (node->next_nodes); k++)
+ {
+ if (node->next_nodes[k] != ~0)
+ {
+ next_node = nodes[node->next_nodes[k]];
+ vlib_cli_output (vm, " [%d] %s", k, next_node->name);
+ }
+ }
+ }
+ }
+
+ for (j = 0; j < vec_len (nodes_by_thread); j++)
+ {
+ nodes = nodes_by_thread[j];
+
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ vec_free (nodes[i]->name);
+ vec_free (nodes[i]->next_nodes);
+ vec_free (nodes[i]);
+ }
+ vec_free (nodes);
+ }
+ vec_free (nodes_by_thread);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_node_serialize_node, static) = {
+ .path = "test node serialize",
+ .short_help = "test node serialize [max-threads NN] nexts stats",
+ .function = test_node_serialize_command_fn,
+};
+/* *INDENT-ON* */
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/node_unserialize.c b/src/vlibapi/node_unserialize.c
new file mode 100644
index 00000000..bb0edfed
--- /dev/null
+++ b/src/vlibapi/node_unserialize.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+
+#include <vppinfra/serialize.h>
+
+/* serialized representation of state strings */
+
+#define foreach_state_string_code \
+_(STATE_DONE, "done") \
+_(STATE_DISABLED, "disabled") \
+_(STATE_TIME_WAIT, "time wait") \
+_(STATE_EVENT_WAIT, "event wait") \
+_(STATE_ANY_WAIT, "any wait") \
+_(STATE_POLLING, "polling") \
+_(STATE_INTERRUPT_WAIT, "interrupt wait") \
+_(STATE_INTERNAL, "internal")
+
+typedef enum
+{
+#define _(a,b) a,
+ foreach_state_string_code
+#undef _
+} state_string_enum_t;
+
+static char *state_strings[] = {
+#define _(a,b) b,
+ foreach_state_string_code
+#undef _
+};
+
+vlib_node_t ***
+vlib_node_unserialize (u8 * vector)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ u32 nnodes, nnexts;
+ u32 nstat_vms;
+ vlib_node_t *node;
+ vlib_node_t **nodes;
+ vlib_node_t ***nodes_by_thread = 0;
+ int i, j, k;
+ u64 l, v, c, d;
+ state_string_enum_t state_code;
+ int stats_present;
+
+ serialize_open_vector (sm, vector);
+
+ nstat_vms = unserialize_likely_small_unsigned_integer (sm);
+
+ vec_validate (nodes_by_thread, nstat_vms - 1);
+ _vec_len (nodes_by_thread) = 0;
+
+ for (i = 0; i < nstat_vms; i++)
+ {
+ nnodes = unserialize_likely_small_unsigned_integer (sm);
+
+ nodes = 0;
+ vec_validate (nodes, nnodes - 1);
+ vec_add1 (nodes_by_thread, nodes);
+
+ for (j = 0; j < nnodes; j++)
+ {
+ node = 0;
+ vec_validate (node, 0);
+ nodes[j] = node;
+
+ unserialize_cstring (sm, (char **) &(node->name));
+ state_code = unserialize_likely_small_unsigned_integer (sm);
+ node->state_string = (u8 *) state_strings[state_code];
+
+ node->type = unserialize_likely_small_unsigned_integer (sm);
+ nnexts = unserialize_likely_small_unsigned_integer (sm);
+ if (nnexts > 0)
+ vec_validate (node->next_nodes, nnexts - 1);
+ for (k = 0; k < nnexts; k++)
+ node->next_nodes[k] =
+ unserialize_likely_small_unsigned_integer (sm);
+
+ stats_present = unserialize_likely_small_unsigned_integer (sm);
+
+ if (stats_present)
+ {
+ /* total clocks */
+ unserialize_integer (sm, &l, 8);
+ node->stats_total.clocks = l;
+ node->stats_last_clear.clocks = 0;
+
+ /* Total calls */
+ unserialize_integer (sm, &c, 8);
+ node->stats_total.calls = c;
+
+ /* Total vectors */
+ unserialize_integer (sm, &v, 8);
+ node->stats_total.vectors = v;
+
+ /* Total suspends */
+ unserialize_integer (sm, &d, 8);
+ node->stats_total.suspends = d;
+ }
+ }
+ }
+ return nodes_by_thread;
+}
+
+#if TEST_CODE
+
+static clib_error_t *
+test_node_serialize_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ u8 *vector = 0;
+ vlib_node_t ***nodes_by_thread;
+ vlib_node_t **nodes;
+ vlib_node_t *node;
+ vlib_node_t *next_node;
+ int i, j, k;
+ u32 max_threads = (u32) ~ 0;
+ int include_nexts = 0;
+ int include_stats = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "max-threads %d", &max_threads))
+ ;
+ else if (unformat (input, "stats"))
+ include_stats = 1;
+ else if (unformat (input, "nexts"))
+ include_nexts = 1;
+ else
+ break;
+ }
+
+ /*
+ * Keep the number of memcpy ops to a minimum (e.g. 1).
+ * The current size of the serialized vector is
+ * slightly under 4K.
+ */
+ vec_validate (vector, 16383);
+ vec_reset_length (vector);
+
+ vector = vlib_node_serialize (nm, vector, max_threads,
+ include_nexts, include_stats);
+
+ vlib_cli_output (vm, "result vector %d bytes", vec_len (vector));
+
+ nodes_by_thread = vlib_node_unserialize (vector);
+
+ vec_free (vector);
+
+ for (i = 0; i < vec_len (nodes_by_thread); i++)
+ {
+ nodes = nodes_by_thread[i];
+
+ vlib_cli_output (vm, "thread %d", i);
+
+ for (j = 0; j < vec_len (nodes); j++)
+ {
+ node = nodes[j];
+
+ vlib_cli_output (vm, "[%d] %s state %s", j, node->name,
+ node->state_string);
+
+ vlib_cli_output
+ (vm, " clocks %lld calls %lld suspends"
+ " %lld vectors %lld",
+ node->stats_total.clocks,
+ node->stats_total.calls,
+ node->stats_total.suspends, node->stats_total.vectors);
+
+ for (k = 0; k < vec_len (node->next_nodes); k++)
+ {
+ if (node->next_nodes[k] != ~0)
+ {
+ next_node = nodes[node->next_nodes[k]];
+ vlib_cli_output (vm, " [%d] %s", k, next_node->name);
+ }
+ }
+ }
+ }
+
+ for (j = 0; j < vec_len (nodes_by_thread); j++)
+ {
+ nodes = nodes_by_thread[j];
+
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ vec_free (nodes[i]->name);
+ vec_free (nodes[i]->next_nodes);
+ vec_free (nodes[i]);
+ }
+ vec_free (nodes);
+ }
+ vec_free (nodes_by_thread);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_node_serialize_node, static) = {
+ .path = "test node serialize",
+ .short_help = "test node serialize [max-threads NN] nexts stats",
+ .function = test_node_serialize_command_fn,
+};
+/* *INDENT-ON* */
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibapi/vat_helper_macros.h b/src/vlibapi/vat_helper_macros.h
new file mode 100644
index 00000000..57ad520b
--- /dev/null
+++ b/src/vlibapi/vat_helper_macros.h
@@ -0,0 +1,76 @@
+/*
+ *------------------------------------------------------------------
+ * vat_helper_macros.h - collect api client helper macros in one place
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#ifndef __vat_helper_macros_h__
+#define __vat_helper_macros_h__
+
+/* M: construct, but don't yet send a message */
+#define M(T, mp) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T+__plugin_msg_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T, mp, n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T+__plugin_msg_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S(mp) (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W(ret) \
+do { \
+ f64 timeout = vat_time_now (vam) + 1.0; \
+ ret = -99; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ ret = vam->retval; \
+ break; \
+ } \
+ vat_suspend (vam->vlib_main, 1e-5); \
+ } \
+} while(0);
+
+/* W2: wait for results, with timeout */
+#define W2(ret, body) \
+do { \
+ f64 timeout = vat_time_now (vam) + 1.0; \
+ ret = -99; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ (body); \
+ ret = vam->retval; \
+ break; \
+ } \
+ vat_suspend (vam->vlib_main, 1e-5); \
+ } \
+} while(0);
+
+
+#endif /* __vat_helper_macros_h__ */
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
new file mode 100644
index 00000000..2a1438fd
--- /dev/null
+++ b/src/vlibmemory/api.h
@@ -0,0 +1,61 @@
+/*
+ *------------------------------------------------------------------
+ * api.h
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vlibmemory_api_h
+#define included_vlibmemory_api_h
+
+#include <svm/svm.h>
+#include <vlib/vlib.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api_common.h>
+
+static inline u32
+vl_msg_api_handle_get_epoch (u32 index)
+{
+ return (index & VL_API_EPOCH_MASK);
+}
+
+static inline u32
+vl_msg_api_handle_get_index (u32 index)
+{
+ return (index >> VL_API_EPOCH_SHIFT);
+}
+
+static inline u32
+vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch)
+{
+ u32 handle;
+ ASSERT (index < 0x00FFFFFF);
+
+ handle = (index << VL_API_EPOCH_SHIFT) | (epoch & VL_API_EPOCH_MASK);
+ return handle;
+}
+
+void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno);
+#endif /* included_vlibmemory_api_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h
new file mode 100644
index 00000000..19daecdf
--- /dev/null
+++ b/src/vlibmemory/api_common.h
@@ -0,0 +1,139 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vlibmemory_api_common_h
+#define included_vlibmemory_api_common_h
+
+#include <svm/svm_common.h>
+#include <vlibapi/api_common.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+/* Allocated in shared memory */
+
+/*
+ * Ring-allocation scheme for client API messages
+ *
+ * Only one proc/thread has control of a given message buffer.
+ * To free a buffer allocated from one of these rings, we clear
+ * a field in the buffer (header), and leave.
+ *
+ * No locks, no hits, no errors...
+ */
+typedef struct ring_alloc_
+{
+ unix_shared_memory_queue_t *rp;
+ u16 size;
+ u16 nitems;
+ u32 hits;
+ u32 misses;
+} ring_alloc_t;
+
+/*
+ * Initializers for the (shared-memory) rings
+ * _(size, n). Note: each msg has an 8 byte header.
+ * Might want to change that to an index sometime.
+ */
+#define foreach_vl_aring_size \
+_(64+8, 1024) \
+_(256+8, 128) \
+_(1024+8, 64)
+
+#define foreach_clnt_aring_size \
+_(1024+8, 1024) \
+_(2048+8, 128) \
+_(4096+8, 8)
+
+typedef struct vl_shmem_hdr_
+{
+ int version;
+
+ /* getpid () for the VLIB client process */
+ volatile int vl_pid;
+
+ /* Client sends VLIB msgs here. */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* Vector of rings; one for each size. */
+
+ /* VLIB allocates buffers to send msgs to clients here. */
+ ring_alloc_t *vl_rings;
+
+ /* Clients allocate buffer to send msgs to VLIB here. */
+ ring_alloc_t *client_rings;
+
+ /* Number of detected application restarts */
+ u32 application_restarts;
+
+ /* Number of messages reclaimed during application restart */
+ u32 restart_reclaims;
+
+ /* Number of garbage-collected messages */
+ u32 garbage_collects;
+
+} vl_shmem_hdr_t;
+
+#define VL_SHM_VERSION 2
+
+#define VL_API_EPOCH_MASK 0xFF
+#define VL_API_EPOCH_SHIFT 8
+
+void *vl_msg_api_alloc (int nbytes);
+void *vl_msg_api_alloc_or_null (int nbytes);
+void *vl_msg_api_alloc_as_if_client (int nbytes);
+void *vl_msg_api_alloc_as_if_client_or_null (int nbytes);
+void vl_msg_api_free (void *a);
+int vl_map_shmem (const char *region_name, int is_vlib);
+void vl_register_mapped_shmem_region (svm_region_t * rp);
+void vl_unmap_shmem (void);
+void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem);
+int vl_client_connect (const char *name, int ctx_quota, int input_queue_size);
+void vl_client_disconnect (void);
+unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index);
+vl_api_registration_t *vl_api_client_index_to_registration (u32 index);
+int vl_client_api_map (const char *region_name);
+void vl_client_api_unmap (void);
+void vl_set_memory_region_name (const char *name);
+void vl_set_memory_root_path (const char *root_path);
+void vl_set_memory_uid (int uid);
+void vl_set_memory_gid (int gid);
+void vl_set_global_memory_baseva (u64 baseva);
+void vl_set_global_memory_size (u64 size);
+void vl_set_api_memory_size (u64 size);
+void vl_set_global_pvt_heap_size (u64 size);
+void vl_set_api_pvt_heap_size (u64 size);
+void vl_client_disconnect_from_vlib (void);
+int vl_client_connect_to_vlib (const char *svm_name, const char *client_name,
+ int rx_queue_size);
+int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
+ const char *client_name,
+ int rx_queue_size);
+u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+u32 vl_api_memclnt_create_internal (char *, unix_shared_memory_queue_t *);
+
+#endif /* included_vlibmemory_api_common_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
new file mode 100644
index 00000000..32e51407
--- /dev/null
+++ b/src/vlibmemory/memclnt.api
@@ -0,0 +1,114 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Create a client registration
+ */
+manual_print
+define memclnt_create {
+ i32 ctx_quota; /* requested punt context quota */
+ u32 context; /* opaque value to be returned in the reply */
+ u64 input_queue; /* client's queue */
+ u8 name[64]; /* for show, find by name, whatever */
+ u32 api_versions[8]; /* client-server pairs use as desired */
+};
+
+define memclnt_create_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* handle by which vlib knows this client */
+ u32 index; /* index, used e.g. by API trace replay */
+ u32 context; /* opaque value from the create request */
+ u64 message_table; /* serialized message table in shmem */
+};
+
+/*
+ * Delete a client registration
+ */
+manual_print
+define memclnt_delete {
+ u32 index; /* index, used e.g. by API trace replay */
+ u64 handle; /* handle by which vlib knows this client */
+};
+
+define memclnt_delete_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* in case the client wonders */
+};
+
+/*
+ * Client RX thread exit
+ */
+define rx_thread_exit {
+ u8 dummy;
+};
+
+/*
+ * Client RX thread suspend
+ */
+define memclnt_rx_thread_suspend {
+ u8 dummy;
+};
+
+/*
+ * Client read timeout
+ */
+define memclnt_read_timeout {
+ u8 dummy;
+};
+
+/*
+ * RPC
+ */
+autoreply define rpc_call {
+ u32 client_index;
+ u32 context;
+ u64 function;
+ u8 multicast;
+ u8 need_barrier_sync;
+ u8 send_reply;
+ u8 data[0];
+};
+
+/*
+ * Lookup message-ID base by name
+ */
+define get_first_msg_id {
+ u32 client_index;
+ u32 context;
+ u8 name[64];
+};
+
+define get_first_msg_id_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+ u16 first_msg_id;
+};
+
+/*
+ * Trace the plugin message-id allocator
+ * so we stand a chance of dealing with different sets of plugins
+ * at api trace replay time
+ */
+
+manual_print define trace_plugin_msg_ids
+{
+ u32 client_index;
+ u32 context;
+ u8 plugin_name[128];
+ u16 first_msg_id;
+ u16 last_msg_id;
+};
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
new file mode 100644
index 00000000..8a60a322
--- /dev/null
+++ b/src/vlibmemory/memory_client.c
@@ -0,0 +1,536 @@
+/*
+ *------------------------------------------------------------------
+ * memory_client.c - API message handling, client code.
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) clib_warning (__VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+typedef struct
+{
+ u8 rx_thread_jmpbuf_valid;
+ u8 connected_to_vlib;
+ jmp_buf rx_thread_jmpbuf;
+ pthread_t rx_thread_handle;
+ /* Plugin message base lookup scheme */
+ volatile u8 first_msg_id_reply_ready;
+ u16 first_msg_id_reply;
+} memory_client_main_t;
+
+memory_client_main_t memory_client_main;
+
+static void *
+rx_thread_fn (void *arg)
+{
+ unix_shared_memory_queue_t *q;
+ memory_client_main_t *mm = &memory_client_main;
+ api_main_t *am = &api_main;
+ int i;
+
+ q = am->vl_input_queue;
+
+ /* So we can make the rx thread terminate cleanly */
+ if (setjmp (mm->rx_thread_jmpbuf) == 0)
+ {
+ mm->rx_thread_jmpbuf_valid = 1;
+ /*
+ * Find an unused slot in the per-cpu-mheaps array,
+ * and grab it for this thread. We need to be able to
+ * push/pop the thread heap without affecting other thread(s).
+ */
+ if (__os_thread_index == 0)
+ {
+ for (i = 0; i < ARRAY_LEN (clib_per_cpu_mheaps); i++)
+ {
+ if (clib_per_cpu_mheaps[i] == 0)
+ {
+ /* Copy the main thread mheap pointer */
+ clib_per_cpu_mheaps[i] = clib_per_cpu_mheaps[0];
+ __os_thread_index = i;
+ break;
+ }
+ }
+ ASSERT (__os_thread_index > 0);
+ }
+ while (1)
+ vl_msg_api_queue_handler (q);
+ }
+ pthread_exit (0);
+}
+
+static void
+vl_api_rx_thread_exit_t_handler (vl_api_rx_thread_exit_t * mp)
+{
+ memory_client_main_t *mm = &memory_client_main;
+ vl_msg_api_free (mp);
+ longjmp (mm->rx_thread_jmpbuf, 1);
+}
+
+static void
+vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ api_main_t *am = &api_main;
+ u8 *tblv;
+ u32 nmsgs;
+ int i;
+ u8 *name_and_crc;
+ u32 msg_index;
+
+ am->my_client_index = mp->index;
+ am->my_registration = (vl_api_registration_t *) (uword) mp->handle;
+
+ /* Clean out any previous hash table (unlikely) */
+ if (am->msg_index_by_name_and_crc)
+ {
+ int i;
+ u8 **keys = 0;
+ hash_pair_t *hp;
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+ ({
+ vec_add1 (keys, (u8 *) hp->key);
+ }));
+ /* *INDENT-ON* */
+ for (i = 0; i < vec_len (keys); i++)
+ vec_free (keys[i]);
+ vec_free (keys);
+ }
+
+ am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+ /* Recreate the vnet-side API message handler table */
+ tblv = uword_to_pointer (mp->message_table, u8 *);
+ unserialize_open_data (sm, tblv, vec_len (tblv));
+ unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+ for (i = 0; i < nmsgs; i++)
+ {
+ msg_index = unserialize_likely_small_unsigned_integer (sm);
+ unserialize_cstring (sm, (char **) &name_and_crc);
+ hash_set_mem (am->msg_index_by_name_and_crc, name_and_crc, msg_index);
+ }
+}
+
+static void
+noop_handler (void *notused)
+{
+}
+
+int
+vl_client_connect (const char *name, int ctx_quota, int input_queue_size)
+{
+ svm_region_t *svm;
+ vl_api_memclnt_create_t *mp;
+ vl_api_memclnt_create_reply_t *rp;
+ unix_shared_memory_queue_t *vl_input_queue;
+ vl_shmem_hdr_t *shmem_hdr;
+ int rv = 0;
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ if (am->my_registration)
+ {
+ clib_warning ("client %s already connected...", name);
+ return -1;
+ }
+
+ if (am->vlib_rp == 0)
+ {
+ clib_warning ("am->vlib_rp NULL");
+ return -1;
+ }
+
+ svm = am->vlib_rp;
+ shmem_hdr = am->shmem_hdr;
+
+ if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0)
+ {
+ clib_warning ("shmem_hdr / input queue NULL");
+ return -1;
+ }
+
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+ vl_input_queue =
+ unix_shared_memory_queue_init (input_queue_size, sizeof (uword),
+ getpid (), 0);
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+
+ am->my_client_index = ~0;
+ am->my_registration = 0;
+ am->vl_input_queue = vl_input_queue;
+
+ mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_create_t));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE);
+ mp->ctx_quota = ctx_quota;
+ mp->input_queue = (uword) vl_input_queue;
+ strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
+
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+ while (1)
+ {
+ int qstatus;
+ struct timespec ts, tsrem;
+ int i;
+
+ /* Wait up to 10 seconds */
+ for (i = 0; i < 1000; i++)
+ {
+ qstatus = unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp,
+ 1 /* nowait */ );
+ if (qstatus == 0)
+ goto read_one_msg;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ }
+ /* Timeout... */
+ clib_warning ("memclnt_create_reply timeout");
+ return -1;
+
+ read_one_msg:
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_REPLY)
+ {
+ clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
+ continue;
+ }
+ rv = clib_net_to_host_u32 (rp->response);
+
+ vl_msg_api_handler ((void *) rp);
+ break;
+ }
+ return (rv);
+}
+
+static void
+vl_api_memclnt_delete_reply_t_handler (vl_api_memclnt_delete_reply_t * mp)
+{
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+ unix_shared_memory_queue_free (am->vl_input_queue);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+ svm_pop_heap (oldheap);
+
+ am->my_client_index = ~0;
+ am->my_registration = 0;
+ am->vl_input_queue = 0;
+}
+
+void
+vl_client_disconnect (void)
+{
+ vl_api_memclnt_delete_t *mp;
+ vl_api_memclnt_delete_reply_t *rp;
+ unix_shared_memory_queue_t *vl_input_queue;
+ vl_shmem_hdr_t *shmem_hdr;
+ time_t begin;
+ api_main_t *am = &api_main;
+
+ ASSERT (am->vlib_rp);
+ shmem_hdr = am->shmem_hdr;
+ ASSERT (shmem_hdr && shmem_hdr->vl_input_queue);
+
+ vl_input_queue = am->vl_input_queue;
+
+ mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE);
+ mp->index = am->my_client_index;
+ mp->handle = (uword) am->my_registration;
+
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+ /*
+ * Have to be careful here, in case the client is disconnecting
+ * because e.g. the vlib process died, or is unresponsive.
+ */
+
+ begin = time (0);
+ while (1)
+ {
+ time_t now;
+
+ now = time (0);
+
+ if (now >= (begin + 2))
+ {
+ clib_warning ("peer unresponsive, give up");
+ am->my_client_index = ~0;
+ am->my_registration = 0;
+ am->shmem_hdr = 0;
+ break;
+ }
+ if (unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1) < 0)
+ continue;
+
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ vl_msg_api_handler ((void *) rp);
+ continue;
+ }
+ vl_msg_api_handler ((void *) rp);
+ break;
+ }
+}
+
+#define foreach_api_msg \
+_(RX_THREAD_EXIT, rx_thread_exit) \
+_(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \
+_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
+
+
+int
+vl_client_api_map (const char *region_name)
+{
+ int rv;
+
+ if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
+ {
+ return rv;
+ }
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_msg;
+#undef _
+ return 0;
+}
+
+void
+vl_client_api_unmap (void)
+{
+ vl_unmap_shmem ();
+}
+
+static int
+connect_to_vlib_internal (const char *svm_name,
+ const char *client_name,
+ int rx_queue_size, int want_pthread)
+{
+ int rv = 0;
+ memory_client_main_t *mm = &memory_client_main;
+
+ if ((rv = vl_client_api_map (svm_name)))
+ {
+ clib_warning ("vl_client_api map rv %d", rv);
+ return rv;
+ }
+
+ if (vl_client_connect (client_name, 0 /* punt quota */ ,
+ rx_queue_size /* input queue */ ) < 0)
+ {
+ vl_client_api_unmap ();
+ return -1;
+ }
+
+ /* Start the rx queue thread */
+
+ if (want_pthread)
+ {
+ rv = pthread_create (&mm->rx_thread_handle,
+ NULL /*attr */ , rx_thread_fn, 0);
+ if (rv)
+ clib_warning ("pthread_create returned %d", rv);
+ }
+
+ mm->connected_to_vlib = 1;
+ return 0;
+}
+
+int
+vl_client_connect_to_vlib (const char *svm_name,
+ const char *client_name, int rx_queue_size)
+{
+ return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
+ 1 /* want pthread */ );
+}
+
+int
+vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
+ const char *client_name,
+ int rx_queue_size)
+{
+ return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
+ 0 /* want pthread */ );
+}
+
+void
+vl_client_disconnect_from_vlib (void)
+{
+ memory_client_main_t *mm = &memory_client_main;
+ api_main_t *am = &api_main;
+ uword junk;
+
+ if (mm->rx_thread_jmpbuf_valid)
+ {
+ vl_api_rx_thread_exit_t *ep;
+ ep = vl_msg_api_alloc (sizeof (*ep));
+ ep->_vl_msg_id = ntohs (VL_API_RX_THREAD_EXIT);
+ vl_msg_api_send_shmem (am->vl_input_queue, (u8 *) & ep);
+ pthread_join (mm->rx_thread_handle, (void **) &junk);
+ }
+ if (mm->connected_to_vlib)
+ {
+ vl_client_disconnect ();
+ vl_client_api_unmap ();
+ }
+ memset (mm, 0, sizeof (*mm));
+}
+
+static void vl_api_get_first_msg_id_reply_t_handler
+ (vl_api_get_first_msg_id_reply_t * mp)
+{
+ memory_client_main_t *mm = &memory_client_main;
+ i32 retval = ntohl (mp->retval);
+
+ mm->first_msg_id_reply = (retval >= 0) ? ntohs (mp->first_msg_id) : ~0;
+ mm->first_msg_id_reply_ready = 1;
+}
+
+u16
+vl_client_get_first_plugin_msg_id (const char *plugin_name)
+{
+ vl_api_get_first_msg_id_t *mp;
+ api_main_t *am = &api_main;
+ memory_client_main_t *mm = &memory_client_main;
+ f64 timeout;
+ void *old_handler;
+ clib_time_t clib_time;
+ u16 rv = ~0;
+
+ if (strlen (plugin_name) + 1 > sizeof (mp->name))
+ return (rv);
+
+ memset (&clib_time, 0, sizeof (clib_time));
+ clib_time_init (&clib_time);
+
+ /* Push this plugin's first_msg_id_reply handler */
+ old_handler = am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY];
+ am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = (void *)
+ vl_api_get_first_msg_id_reply_t_handler;
+
+ /* Ask the data-plane for the message-ID base of the indicated plugin */
+ mm->first_msg_id_reply_ready = 0;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID);
+ mp->client_index = am->my_client_index;
+ strncpy ((char *) mp->name, plugin_name, sizeof (mp->name) - 1);
+
+ vl_msg_api_send_shmem (am->shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+ /* Synchronously wait for the answer */
+ do
+ {
+ timeout = clib_time_now (&clib_time) + 1.0;
+
+ while (clib_time_now (&clib_time) < timeout)
+ {
+ if (mm->first_msg_id_reply_ready == 1)
+ {
+ rv = mm->first_msg_id_reply;
+ goto result;
+ }
+ }
+ /* Restore old handler */
+ am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+
+ return rv;
+ }
+ while (0);
+
+result:
+
+ /* Restore the old handler */
+ am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+
+ if (rv == (u16) ~ 0)
+ clib_warning ("plugin '%s' not registered", plugin_name);
+
+ return rv;
+}
+
+void
+vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
+{
+ clib_warning ("STUB called...");
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
new file mode 100644
index 00000000..8c646908
--- /dev/null
+++ b/src/vlibmemory/memory_shared.c
@@ -0,0 +1,666 @@
+/*
+ *------------------------------------------------------------------
+ * memclnt_shared.c - API message handling, common code for both clients
+ * and the vlib process itself.
+ *
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+static inline void *
+vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
+{
+ int i;
+ msgbuf_t *rv;
+ ring_alloc_t *ap;
+ unix_shared_memory_queue_t *q;
+ void *oldheap;
+ vl_shmem_hdr_t *shmem_hdr;
+ api_main_t *am = &api_main;
+
+ shmem_hdr = am->shmem_hdr;
+
+ if (shmem_hdr == 0)
+ {
+ clib_warning ("shared memory header NULL");
+ return 0;
+ }
+
+ /* account for the msgbuf_t header */
+ nbytes += sizeof (msgbuf_t);
+
+ if (shmem_hdr->vl_rings == 0)
+ {
+ clib_warning ("vl_rings NULL");
+ ASSERT (0);
+ abort ();
+ }
+
+ if (shmem_hdr->client_rings == 0)
+ {
+ clib_warning ("client_rings NULL");
+ ASSERT (0);
+ abort ();
+ }
+
+ ap = pool ? shmem_hdr->vl_rings : shmem_hdr->client_rings;
+ for (i = 0; i < vec_len (ap); i++)
+ {
+ /* Too big? */
+ if (nbytes > ap[i].size)
+ {
+ continue;
+ }
+
+ q = ap[i].rp;
+ if (pool == 0)
+ {
+ pthread_mutex_lock (&q->mutex);
+ }
+ rv = (msgbuf_t *) (&q->data[0] + q->head * q->elsize);
+ /*
+ * Is this item still in use?
+ */
+ if (rv->q)
+ {
+ u32 now = (u32) time (0);
+
+ if (PREDICT_TRUE (rv->gc_mark_timestamp == 0))
+ rv->gc_mark_timestamp = now;
+ else
+ {
+ if (now - rv->gc_mark_timestamp > 10)
+ {
+ if (CLIB_DEBUG > 0)
+ {
+ u16 *msg_idp, msg_id;
+ clib_warning
+ ("garbage collect pool %d ring %d index %d", pool, i,
+ q->head);
+ msg_idp = (u16 *) (rv->data);
+ msg_id = clib_net_to_host_u16 (*msg_idp);
+ if (msg_id < vec_len (api_main.msg_names))
+ clib_warning ("msg id %d name %s", (u32) msg_id,
+ api_main.msg_names[msg_id]);
+ }
+ shmem_hdr->garbage_collects++;
+ goto collected;
+ }
+ }
+
+
+ /* yes, loser; try next larger pool */
+ ap[i].misses++;
+ if (pool == 0)
+ pthread_mutex_unlock (&q->mutex);
+ continue;
+ }
+ collected:
+
+ /* OK, we have a winner */
+ ap[i].hits++;
+ /*
+ * Remember the source queue, although we
+ * don't need to know the queue to free the item.
+ */
+ rv->q = q;
+ rv->gc_mark_timestamp = 0;
+ q->head++;
+ if (q->head == q->maxsize)
+ q->head = 0;
+
+ if (pool == 0)
+ pthread_mutex_unlock (&q->mutex);
+ goto out;
+ }
+
+ /*
+ * Request too big, or head element of all size-compatible rings
+ * still in use. Fall back to shared-memory malloc.
+ */
+ am->ring_misses++;
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+ if (may_return_null)
+ {
+ rv = clib_mem_alloc_or_null (nbytes);
+ if (PREDICT_FALSE (rv == 0))
+ {
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+ return 0;
+ }
+ }
+ else
+ rv = clib_mem_alloc (nbytes);
+
+ rv->q = 0;
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+out:
+ rv->data_len = htonl (nbytes - sizeof (msgbuf_t));
+ return (rv->data);
+}
+
+void *
+vl_msg_api_alloc (int nbytes)
+{
+ int pool;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+
+ /*
+ * Clients use pool-0, vlib proc uses pool 1
+ */
+ pool = (am->our_pid == shmem_hdr->vl_pid);
+ return vl_msg_api_alloc_internal (nbytes, pool, 0 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_or_null (int nbytes)
+{
+ int pool;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+
+ pool = (am->our_pid == shmem_hdr->vl_pid);
+ return vl_msg_api_alloc_internal (nbytes, pool, 1 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_as_if_client (int nbytes)
+{
+ return vl_msg_api_alloc_internal (nbytes, 0, 0 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_as_if_client_or_null (int nbytes)
+{
+ return vl_msg_api_alloc_internal (nbytes, 0, 1 /* may_return_null */ );
+}
+
+void
+vl_msg_api_free (void *a)
+{
+ msgbuf_t *rv;
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+ /*
+ * Here's the beauty of the scheme. Only one proc/thread has
+ * control of a given message buffer. To free a buffer, we just clear the
+ * queue field, and leave. No locks, no hits, no errors...
+ */
+ if (rv->q)
+ {
+ rv->q = 0;
+ rv->gc_mark_timestamp = 0;
+ return;
+ }
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+ clib_mem_free (rv);
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+}
+
+static void
+vl_msg_api_free_nolock (void *a)
+{
+ msgbuf_t *rv;
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+ /*
+ * Here's the beauty of the scheme. Only one proc/thread has
+ * control of a given message buffer. To free a buffer, we just clear the
+ * queue field, and leave. No locks, no hits, no errors...
+ */
+ if (rv->q)
+ {
+ rv->q = 0;
+ return;
+ }
+
+ oldheap = svm_push_data_heap (am->vlib_rp);
+ clib_mem_free (rv);
+ svm_pop_heap (oldheap);
+}
+
+void
+vl_set_memory_root_path (const char *name)
+{
+ api_main_t *am = &api_main;
+
+ am->root_path = name;
+}
+
+void
+vl_set_memory_uid (int uid)
+{
+ api_main_t *am = &api_main;
+
+ am->api_uid = uid;
+}
+
+void
+vl_set_memory_gid (int gid)
+{
+ api_main_t *am = &api_main;
+
+ am->api_gid = gid;
+}
+
+void
+vl_set_global_memory_baseva (u64 baseva)
+{
+ api_main_t *am = &api_main;
+
+ am->global_baseva = baseva;
+}
+
+void
+vl_set_global_memory_size (u64 size)
+{
+ api_main_t *am = &api_main;
+
+ am->global_size = size;
+}
+
+void
+vl_set_api_memory_size (u64 size)
+{
+ api_main_t *am = &api_main;
+
+ am->api_size = size;
+}
+
+void
+vl_set_global_pvt_heap_size (u64 size)
+{
+ api_main_t *am = &api_main;
+
+ am->global_pvt_heap_size = size;
+}
+
+void
+vl_set_api_pvt_heap_size (u64 size)
+{
+ api_main_t *am = &api_main;
+
+ am->api_pvt_heap_size = size;
+}
+
+int
+vl_map_shmem (const char *region_name, int is_vlib)
+{
+ svm_map_region_args_t _a, *a = &_a;
+ svm_region_t *vlib_rp, *root_rp;
+ void *oldheap;
+ vl_shmem_hdr_t *shmem_hdr = 0;
+ api_main_t *am = &api_main;
+ int i, rv;
+ struct timespec ts, tsrem;
+ u32 vlib_input_queue_length;
+ char *vpe_api_region_suffix = "-vpe-api";
+
+ memset (a, 0, sizeof (*a));
+
+ if (strstr (region_name, vpe_api_region_suffix))
+ {
+ u8 *root_path = format (0, "%s", region_name);
+ _vec_len (root_path) = (vec_len (root_path) -
+ strlen (vpe_api_region_suffix));
+ vec_terminate_c_string (root_path);
+ a->root_path = (const char *) root_path;
+ am->root_path = (const char *) root_path;
+ }
+
+ if (is_vlib == 0)
+ {
+ rv = svm_region_init_chroot (am->root_path);
+ if (rv)
+ return rv;
+ }
+
+ if (a->root_path != NULL)
+ {
+ a->name = "/vpe-api";
+ }
+ else
+ a->name = region_name;
+ a->size = am->api_size ? am->api_size : (16 << 20);
+ a->flags = SVM_FLAGS_MHEAP;
+ a->uid = am->api_uid;
+ a->gid = am->api_gid;
+ a->pvt_heap_size = am->api_pvt_heap_size;
+
+ vlib_rp = svm_region_find_or_create (a);
+
+ if (vlib_rp == 0)
+ return (-2);
+
+ pthread_mutex_lock (&vlib_rp->mutex);
+ /* Has someone else set up the shared-memory variable table? */
+ if (vlib_rp->user_ctx)
+ {
+ am->shmem_hdr = (void *) vlib_rp->user_ctx;
+ am->our_pid = getpid ();
+ if (is_vlib)
+ {
+ unix_shared_memory_queue_t *q;
+ uword old_msg;
+ /*
+ * application restart. Reset cached pids, API message
+ * rings, list of clients; otherwise, various things
+ * fail. (e.g. queue non-empty notification)
+ */
+
+ /* ghosts keep the region from disappearing properly */
+ svm_client_scan_this_region_nolock (vlib_rp);
+ am->shmem_hdr->application_restarts++;
+ q = am->shmem_hdr->vl_input_queue;
+ am->shmem_hdr->vl_pid = getpid ();
+ q->consumer_pid = am->shmem_hdr->vl_pid;
+ /* Drain the input queue, freeing msgs */
+ for (i = 0; i < 10; i++)
+ {
+ if (pthread_mutex_trylock (&q->mutex) == 0)
+ {
+ pthread_mutex_unlock (&q->mutex);
+ goto mutex_ok;
+ }
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ }
+ /* Mutex buggered, "fix" it */
+ memset (&q->mutex, 0, sizeof (q->mutex));
+ clib_warning ("forcibly release main input queue mutex");
+
+ mutex_ok:
+ am->vlib_rp = vlib_rp;
+ while (unix_shared_memory_queue_sub (q,
+ (u8 *) & old_msg,
+ 1 /* nowait */ )
+ != -2 /* queue underflow */ )
+ {
+ vl_msg_api_free_nolock ((void *) old_msg);
+ am->shmem_hdr->restart_reclaims++;
+ }
+ pthread_mutex_unlock (&vlib_rp->mutex);
+ root_rp = svm_get_root_rp ();
+ ASSERT (root_rp);
+ /* Clean up the root region client list */
+ pthread_mutex_lock (&root_rp->mutex);
+ svm_client_scan_this_region_nolock (root_rp);
+ pthread_mutex_unlock (&root_rp->mutex);
+ }
+ else
+ {
+ pthread_mutex_unlock (&vlib_rp->mutex);
+ }
+ am->vlib_rp = vlib_rp;
+ vec_add1 (am->mapped_shmem_regions, vlib_rp);
+ return 0;
+ }
+ /* Clients simply have to wait... */
+ if (!is_vlib)
+ {
+ pthread_mutex_unlock (&vlib_rp->mutex);
+
+ /* Wait up to 100 seconds... */
+ for (i = 0; i < 10000; i++)
+ {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ if (vlib_rp->user_ctx)
+ goto ready;
+ }
+ /* Clean up and leave... */
+ svm_region_unmap (vlib_rp);
+ clib_warning ("region init fail");
+ return (-2);
+
+ ready:
+ am->shmem_hdr = (void *) vlib_rp->user_ctx;
+ am->our_pid = getpid ();
+ am->vlib_rp = vlib_rp;
+ vec_add1 (am->mapped_shmem_regions, vlib_rp);
+ return 0;
+ }
+
+ /* Nope, it's our problem... */
+
+ oldheap = svm_push_data_heap (vlib_rp);
+
+ vec_validate (shmem_hdr, 0);
+ shmem_hdr->version = VL_SHM_VERSION;
+
+ /* vlib main input queue */
+ vlib_input_queue_length = 1024;
+ if (am->vlib_input_queue_length)
+ vlib_input_queue_length = am->vlib_input_queue_length;
+
+ shmem_hdr->vl_input_queue =
+ unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword),
+ getpid (), am->vlib_signal);
+
+ /* Set up the msg ring allocator */
+#define _(sz,n) \
+ do { \
+ ring_alloc_t _rp; \
+ _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+ _rp.size = (sz); \
+ _rp.nitems = n; \
+ _rp.hits = 0; \
+ _rp.misses = 0; \
+ vec_add1(shmem_hdr->vl_rings, _rp); \
+ } while (0);
+
+ foreach_vl_aring_size;
+#undef _
+
+#define _(sz,n) \
+ do { \
+ ring_alloc_t _rp; \
+ _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+ _rp.size = (sz); \
+ _rp.nitems = n; \
+ _rp.hits = 0; \
+ _rp.misses = 0; \
+ vec_add1(shmem_hdr->client_rings, _rp); \
+ } while (0);
+
+ foreach_clnt_aring_size;
+#undef _
+
+ am->shmem_hdr = shmem_hdr;
+ am->vlib_rp = vlib_rp;
+ am->our_pid = getpid ();
+ if (is_vlib)
+ am->shmem_hdr->vl_pid = am->our_pid;
+
+ svm_pop_heap (oldheap);
+
+ /*
+ * After absolutely everything that a client might see is set up,
+ * declare the shmem region valid
+ */
+ vlib_rp->user_ctx = shmem_hdr;
+
+ pthread_mutex_unlock (&vlib_rp->mutex);
+ vec_add1 (am->mapped_shmem_regions, vlib_rp);
+ return 0;
+}
+
+void
+vl_register_mapped_shmem_region (svm_region_t * rp)
+{
+ api_main_t *am = &api_main;
+
+ vec_add1 (am->mapped_shmem_regions, rp);
+}
+
+void
+vl_unmap_shmem (void)
+{
+ svm_region_t *rp;
+ int i;
+ api_main_t *am = &api_main;
+
+ if (!svm_get_root_rp ())
+ return;
+
+ for (i = 0; i < vec_len (am->mapped_shmem_regions); i++)
+ {
+ rp = am->mapped_shmem_regions[i];
+ svm_region_unmap (rp);
+ }
+
+ vec_free (am->mapped_shmem_regions);
+ am->shmem_hdr = 0;
+
+ svm_region_exit ();
+ /* $$$ more careful cleanup, valgrind run... */
+ vec_free (am->msg_handlers);
+ vec_free (am->msg_endian_handlers);
+ vec_free (am->msg_print_handlers);
+}
+
+void
+vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem)
+{
+ api_main_t *am = &api_main;
+ uword *trace = (uword *) elem;
+
+ if (am->tx_trace && am->tx_trace->enabled)
+ vl_msg_api_trace (am, am->tx_trace, (void *) trace[0]);
+
+ (void) unix_shared_memory_queue_add (q, elem, 0 /* nowait */ );
+}
+
+void
+vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem)
+{
+ api_main_t *am = &api_main;
+ uword *trace = (uword *) elem;
+
+ if (am->tx_trace && am->tx_trace->enabled)
+ vl_msg_api_trace (am, am->tx_trace, (void *) trace[0]);
+
+ (void) unix_shared_memory_queue_add_nolock (q, elem);
+}
+
+u32
+vl_api_get_msg_index (u8 * name_and_crc)
+{
+ api_main_t *am = &api_main;
+ uword *p;
+
+ if (am->msg_index_by_name_and_crc)
+ {
+ p = hash_get_mem (am->msg_index_by_name_and_crc, name_and_crc);
+ if (p)
+ return p[0];
+ }
+ return ~0;
+}
+
+static inline vl_api_registration_t *
+vl_api_client_index_to_registration_internal (u32 handle)
+{
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ api_main_t *am = &api_main;
+ u32 index;
+
+ index = vl_msg_api_handle_get_index (handle);
+ if ((am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK)
+ != vl_msg_api_handle_get_epoch (handle))
+ {
+ vl_msg_api_increment_missing_client_counter ();
+ return 0;
+ }
+
+ regpp = am->vl_clients + index;
+
+ if (pool_is_free (am->vl_clients, regpp))
+ {
+ vl_msg_api_increment_missing_client_counter ();
+ return 0;
+ }
+ regp = *regpp;
+ return (regp);
+}
+
+vl_api_registration_t *
+vl_api_client_index_to_registration (u32 index)
+{
+ return (vl_api_client_index_to_registration_internal (index));
+}
+
+unix_shared_memory_queue_t *
+vl_api_client_index_to_input_queue (u32 index)
+{
+ vl_api_registration_t *regp;
+ api_main_t *am = &api_main;
+
+ /* Special case: vlib trying to send itself a message */
+ if (index == (u32) ~ 0)
+ return (am->shmem_hdr->vl_input_queue);
+
+ regp = vl_api_client_index_to_registration_internal (index);
+ if (!regp)
+ return 0;
+ return (regp->vl_input_queue);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
new file mode 100644
index 00000000..d305ea61
--- /dev/null
+++ b/src/vlibmemory/memory_vlib.c
@@ -0,0 +1,2405 @@
+/*
+ *------------------------------------------------------------------
+ * memory_vlib.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <pthread.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/elog.h>
+#include <stdarg.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/**
+ * @file
+ * @brief Binary API messaging via shared memory
+ * Low-level, primary provisioning interface
+ */
+/*? %%clicmd:group_label Binary API CLI %% ?*/
+/*? %%syscfg:group_label Binary API configuration %% ?*/
+
+#define TRACE_VLIB_MEMORY_QUEUE 0
+
+#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+static inline void *
+vl_api_memclnt_create_t_print (vl_api_memclnt_create_t * a, void *handle)
+{
+ vl_print (handle, "vl_api_memclnt_create_t:\n");
+ vl_print (handle, "name: %s\n", a->name);
+ vl_print (handle, "input_queue: 0x%wx\n", a->input_queue);
+ vl_print (handle, "context: %u\n", (unsigned) a->context);
+ vl_print (handle, "ctx_quota: %ld\n", (long) a->ctx_quota);
+ return handle;
+}
+
+static inline void *
+vl_api_memclnt_delete_t_print (vl_api_memclnt_delete_t * a, void *handle)
+{
+ vl_print (handle, "vl_api_memclnt_delete_t:\n");
+ vl_print (handle, "index: %u\n", (unsigned) a->index);
+ vl_print (handle, "handle: 0x%wx\n", a->handle);
+ return handle;
+}
+
+static inline void *
+vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a,
+ void *handle)
+{
+ vl_print (handle, "vl_api_trace_plugin_msg_ids: %s first %u last %u\n",
+ a->plugin_name,
+ clib_host_to_net_u16 (a->first_msg_id),
+ clib_host_to_net_u16 (a->last_msg_id));
+ return handle;
+}
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+ __attribute__ ((weak));
+
+void
+vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+ static int count;
+
+ if (count++ < 5)
+ clib_warning ("need to link against -lvlibsocket, msg not sent!");
+}
+
+void
+vl_msg_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+ if (PREDICT_FALSE (rp->registration_type > REGISTRATION_TYPE_SHMEM))
+ {
+ vl_socket_api_send (rp, elem);
+ }
+ else
+ {
+ vl_msg_api_send_shmem (rp->vl_input_queue, elem);
+ }
+}
+
+u8 *
+vl_api_serialize_message_table (api_main_t * am, u8 * vector)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ hash_pair_t *hp;
+ u32 nmsg = hash_elts (am->msg_index_by_name_and_crc);
+
+ serialize_open_vector (sm, vector);
+
+ /* serialize the count */
+ serialize_integer (sm, nmsg, sizeof (u32));
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+ ({
+ serialize_likely_small_unsigned_integer (sm, hp->value[0]);
+ serialize_cstring (sm, (char *) hp->key);
+ }));
+ /* *INDENT-ON* */
+
+ return serialize_close_vector (sm);
+}
+
+/*
+ * vl_api_memclnt_create_internal
+ */
+
+u32
+vl_api_memclnt_create_internal (char *name, unix_shared_memory_queue_t * q)
+{
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ svm_region_t *svm;
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ ASSERT (vlib_get_thread_index () == 0);
+ pool_get (am->vl_clients, regpp);
+
+ svm = am->vlib_rp;
+
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+ *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
+
+ regp = *regpp;
+ memset (regp, 0, sizeof (*regp));
+ regp->registration_type = REGISTRATION_TYPE_SHMEM;
+ regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+
+ regp->vl_input_queue = q;
+ regp->name = format (0, "%s%c", name, 0);
+
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+ return vl_msg_api_handle_from_index_and_epoch
+ (regp->vl_api_registration_pool_index,
+ am->shmem_hdr->application_restarts);
+}
+
+
+/*
+ * vl_api_memclnt_create_t_handler
+ */
+void
+vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
+{
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ vl_api_memclnt_create_reply_t *rp;
+ svm_region_t *svm;
+ unix_shared_memory_queue_t *q;
+ int rv = 0;
+ void *oldheap;
+ api_main_t *am = &api_main;
+
+ /*
+ * This is tortured. Maintain a vlib-address-space private
+ * pool of client registrations. We use the shared-memory virtual
+ * address of client structure as a handle, to allow direct
+ * manipulation of context quota vbls from the client library.
+ *
+ * This scheme causes trouble w/ API message trace replay, since
+ * some random VA from clib_mem_alloc() certainly won't
+ * occur in the Linux sim. The (very) few places
+ * that care need to use the pool index.
+ *
+ * Putting the registration object(s) into a pool in shared memory and
+ * using the pool index as a handle seems like a great idea.
+ * Unfortunately, each and every reference to that pool would need
+ * to be protected by a mutex:
+ *
+ * Client VLIB
+ * ------ ----
+ * convert pool index to
+ * pointer.
+ * <deschedule>
+ * expand pool
+ * <deschedule>
+ * kaboom!
+ */
+
+ pool_get (am->vl_clients, regpp);
+
+ svm = am->vlib_rp;
+
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+ *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
+
+ regp = *regpp;
+ memset (regp, 0, sizeof (*regp));
+ regp->registration_type = REGISTRATION_TYPE_SHMEM;
+ regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+
+ q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword)
+ mp->input_queue;
+
+ regp->name = format (0, "%s", mp->name);
+ vec_add1 (regp->name, 0);
+
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+
+ ASSERT (am->serialized_message_table_in_shmem);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY);
+ rp->handle = (uword) regp;
+ rp->index = vl_msg_api_handle_from_index_and_epoch
+ (regp->vl_api_registration_pool_index,
+ am->shmem_hdr->application_restarts);
+ rp->context = mp->context;
+ rp->response = ntohl (rv);
+ rp->message_table =
+ pointer_to_uword (am->serialized_message_table_in_shmem);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rp);
+}
+
+static int
+call_reaper_functions (u32 client_index)
+{
+ clib_error_t *error = 0;
+ _vl_msg_api_function_list_elt_t *i;
+
+ i = api_main.reaper_function_registrations;
+ while (i)
+ {
+ error = i->f (client_index);
+ if (error)
+ clib_error_report (error);
+ i = i->next_init_function;
+ }
+ return 0;
+}
+
+/*
+ * vl_api_memclnt_delete_t_handler
+ */
+void
+vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp)
+{
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ vl_api_memclnt_delete_reply_t *rp;
+ svm_region_t *svm;
+ void *oldheap;
+ api_main_t *am = &api_main;
+ u32 handle, client_index, epoch;
+
+ handle = mp->index;
+
+ if (call_reaper_functions (handle))
+ return;
+
+ epoch = vl_msg_api_handle_get_epoch (handle);
+ client_index = vl_msg_api_handle_get_index (handle);
+
+ if (epoch != (am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK))
+ {
+ clib_warning
+ ("Stale clnt delete index %d old epoch %d cur epoch %d",
+ client_index, epoch,
+ (am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK));
+ return;
+ }
+
+ regpp = am->vl_clients + client_index;
+
+ if (!pool_is_free (am->vl_clients, regpp))
+ {
+ regp = *regpp;
+ svm = am->vlib_rp;
+
+ /* $$$ check the input queue for e.g. punted sf's */
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY);
+ rp->handle = mp->handle;
+ rp->response = 1;
+
+ vl_msg_api_send_shmem (regp->vl_input_queue, (u8 *) & rp);
+
+ if (client_index != regp->vl_api_registration_pool_index)
+ {
+ clib_warning ("mismatch client_index %d pool_index %d",
+ client_index, regp->vl_api_registration_pool_index);
+ vl_msg_api_free (rp);
+ return;
+ }
+
+ /* No dangling references, please */
+ *regpp = 0;
+
+ pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index);
+
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+ /* Poison the old registration */
+ memset (regp, 0xF1, sizeof (*regp));
+ clib_mem_free (regp);
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+ }
+ else
+ {
+ clib_warning ("unknown client ID %d", mp->index);
+ }
+}
+
+void
+vl_api_get_first_msg_id_t_handler (vl_api_get_first_msg_id_t * mp)
+{
+ vl_api_get_first_msg_id_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ uword *p;
+ api_main_t *am = &api_main;
+ vl_api_msg_range_t *rp;
+ u8 name[64];
+ u16 first_msg_id = ~0;
+ int rv = -7; /* VNET_API_ERROR_INVALID_VALUE */
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ if (am->msg_range_by_name == 0)
+ goto out;
+
+ strncpy ((char *) name, (char *) mp->name, ARRAY_LEN (name) - 1);
+
+ p = hash_get_mem (am->msg_range_by_name, name);
+ if (p == 0)
+ goto out;
+
+ rp = vec_elt_at_index (am->msg_ranges, p[0]);
+
+ first_msg_id = rp->first_msg_id;
+ rv = 0;
+
+out:
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->first_msg_id = ntohs (first_msg_id);
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+#define foreach_vlib_api_msg \
+_(MEMCLNT_CREATE, memclnt_create) \
+_(MEMCLNT_DELETE, memclnt_delete) \
+_(GET_FIRST_MSG_ID, get_first_msg_id)
+
+/*
+ * vl_api_init
+ */
+static int
+memory_api_init (const char *region_name)
+{
+ int rv;
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+
+ memset (c, 0, sizeof (*c));
+
+ if ((rv = vl_map_shmem (region_name, 1 /* is_vlib */ )) < 0)
+ return rv;
+
+#define _(N,n) do { \
+ c->id = VL_API_##N; \
+ c->name = #n; \
+ c->handler = vl_api_##n##_t_handler; \
+ c->cleanup = vl_noop_handler; \
+ c->endian = vl_api_##n##_t_endian; \
+ c->print = vl_api_##n##_t_print; \
+ c->size = sizeof(vl_api_##n##_t); \
+ c->traced = 1; /* trace, so these msgs print */ \
+ c->replay = 0; /* don't replay client create/delete msgs */ \
+ c->message_bounce = 0; /* don't bounce this message */ \
+ vl_msg_api_config(c);} while (0);
+
+ foreach_vlib_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define foreach_histogram_bucket \
+_(400) \
+_(200) \
+_(100) \
+_(10)
+
+typedef enum
+{
+#define _(n) SLEEP_##n##_US,
+ foreach_histogram_bucket
+#undef _
+ SLEEP_N_BUCKETS,
+} histogram_index_t;
+
+static u64 vector_rate_histogram[SLEEP_N_BUCKETS];
+
+static void memclnt_queue_callback (vlib_main_t * vm);
+
+/*
+ * Callback to send ourselves a plugin numbering-space trace msg
+ */
+static void
+send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id)
+{
+ vl_api_trace_plugin_msg_ids_t *mp;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q;
+
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_TRACE_PLUGIN_MSG_IDS);
+ strncpy ((char *) mp->plugin_name, (char *) name,
+ sizeof (mp->plugin_name) - 1);
+ mp->first_msg_id = clib_host_to_net_u16 (first_msg_id);
+ mp->last_msg_id = clib_host_to_net_u16 (last_msg_id);
+
+ q = shmem_hdr->vl_input_queue;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static uword
+memclnt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ uword mp;
+ vl_shmem_hdr_t *shm;
+ unix_shared_memory_queue_t *q;
+ clib_error_t *e;
+ int rv;
+ api_main_t *am = &api_main;
+ f64 dead_client_scan_time;
+ f64 sleep_time, start_time;
+ f64 vector_rate;
+ int i;
+ u8 *serialized_message_table = 0;
+ svm_region_t *svm;
+ void *oldheap;
+
+ vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
+
+ if ((rv = memory_api_init (am->region_name)) < 0)
+ {
+ clib_warning ("memory_api_init returned %d, wait for godot...", rv);
+ vlib_process_suspend (vm, 1e70);
+ }
+
+ shm = am->shmem_hdr;
+ ASSERT (shm);
+ q = shm->vl_input_queue;
+ ASSERT (q);
+
+ e = vlib_call_init_exit_functions
+ (vm, vm->api_init_function_registrations, 1 /* call_once */ );
+ if (e)
+ clib_error_report (e);
+
+ sleep_time = 20.0;
+ dead_client_scan_time = vlib_time_now (vm) + 20.0;
+
+ /*
+ * Send plugin message range messages for each plugin we loaded
+ */
+ for (i = 0; i < vec_len (am->msg_ranges); i++)
+ {
+ vl_api_msg_range_t *rp = am->msg_ranges + i;
+ send_one_plugin_msg_ids_msg (rp->name, rp->first_msg_id,
+ rp->last_msg_id);
+ }
+
+ /*
+ * Snapshoot the api message table.
+ */
+ serialized_message_table = vl_api_serialize_message_table (am, 0);
+
+ svm = am->vlib_rp;
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+
+ am->serialized_message_table_in_shmem = vec_dup (serialized_message_table);
+
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+
+ /*
+ * Save the api message table snapshot, if configured
+ */
+ if (am->save_msg_table_filename)
+ {
+ int fd, rv;
+ u8 *chroot_file;
+ if (strstr ((char *) am->save_msg_table_filename, "..")
+ || index ((char *) am->save_msg_table_filename, '/'))
+ {
+ clib_warning ("illegal save-message-table filename '%s'",
+ am->save_msg_table_filename);
+ goto skip_save;
+ }
+
+ chroot_file = format (0, "/tmp/%s%c", am->save_msg_table_filename, 0);
+
+ fd = creat ((char *) chroot_file, 0644);
+
+ if (fd < 0)
+ {
+ clib_unix_warning ("creat");
+ goto skip_save;
+ }
+ rv = write (fd, serialized_message_table,
+ vec_len (serialized_message_table));
+
+ if (rv != vec_len (serialized_message_table))
+ clib_unix_warning ("write");
+
+ rv = close (fd);
+ if (rv < 0)
+ clib_unix_warning ("close");
+
+ vec_free (chroot_file);
+ }
+
+skip_save:
+ vec_free (serialized_message_table);
+
+ /* $$$ pay attention to frame size, control CPU usage */
+ while (1)
+ {
+ uword event_type __attribute__ ((unused));
+ i8 *headp;
+ int need_broadcast;
+
+ /*
+ * There's a reason for checking the queue before
+ * sleeping. If the vlib application crashes, it's entirely
+ * possible for a client to enqueue a connect request
+ * during the process restart interval.
+ *
+ * Unless some force of physics causes the new incarnation
+ * of the application to process the request, the client will
+ * sit and wait for Godot...
+ */
+ vector_rate = vlib_last_vector_length_per_node (vm);
+ start_time = vlib_time_now (vm);
+ while (1)
+ {
+ pthread_mutex_lock (&q->mutex);
+ if (q->cursize == 0)
+ {
+ vm->api_queue_nonempty = 0;
+ pthread_mutex_unlock (&q->mutex);
+
+ if (TRACE_VLIB_MEMORY_QUEUE)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "q-underflow: len %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 len;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->len = 0;
+ }
+ sleep_time = 20.0;
+ break;
+ }
+
+ headp = (i8 *) (q->data + sizeof (uword) * q->head);
+ clib_memcpy (&mp, headp, sizeof (uword));
+
+ q->head++;
+ need_broadcast = (q->cursize == q->maxsize / 2);
+ q->cursize--;
+
+ if (PREDICT_FALSE (q->head == q->maxsize))
+ q->head = 0;
+ pthread_mutex_unlock (&q->mutex);
+ if (need_broadcast)
+ (void) pthread_cond_broadcast (&q->condvar);
+
+ vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node);
+
+ /* Allow no more than 10us without a pause */
+ if (vlib_time_now (vm) > start_time + 10e-6)
+ {
+ int index = SLEEP_400_US;
+ if (vector_rate > 40.0)
+ sleep_time = 400e-6;
+ else if (vector_rate > 20.0)
+ {
+ index = SLEEP_200_US;
+ sleep_time = 200e-6;
+ }
+ else if (vector_rate >= 1.0)
+ {
+ index = SLEEP_100_US;
+ sleep_time = 100e-6;
+ }
+ else
+ {
+ index = SLEEP_10_US;
+ sleep_time = 10e-6;
+ }
+ vector_rate_histogram[index] += 1;
+ break;
+ }
+ }
+
+ event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time);
+ vm->queue_signal_pending = 0;
+ vlib_process_get_events (vm, 0 /* event_data */ );
+
+ if (vlib_time_now (vm) > dead_client_scan_time)
+ {
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ unix_shared_memory_queue_t *q;
+ static u32 *dead_indices;
+ static u32 *confused_indices;
+
+ vec_reset_length (dead_indices);
+ vec_reset_length (confused_indices);
+
+ /* *INDENT-OFF* */
+ pool_foreach (regpp, am->vl_clients,
+ ({
+ regp = *regpp;
+ if (regp)
+ {
+ q = regp->vl_input_queue;
+ if (kill (q->consumer_pid, 0) < 0)
+ {
+ vec_add1(dead_indices, regpp - am->vl_clients);
+ }
+ }
+ else
+ {
+ clib_warning ("NULL client registration index %d",
+ regpp - am->vl_clients);
+ vec_add1 (confused_indices, regpp - am->vl_clients);
+ }
+ }));
+ /* *INDENT-ON* */
+ /* This should "never happen," but if it does, fix it... */
+ if (PREDICT_FALSE (vec_len (confused_indices) > 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (confused_indices); i++)
+ {
+ pool_put_index (am->vl_clients, confused_indices[i]);
+ }
+ }
+
+ if (PREDICT_FALSE (vec_len (dead_indices) > 0))
+ {
+ int i;
+ svm_region_t *svm;
+ void *oldheap;
+
+ /* Allow the application to clean up its registrations */
+ for (i = 0; i < vec_len (dead_indices); i++)
+ {
+ regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+ if (regpp)
+ {
+ u32 handle;
+
+ handle = vl_msg_api_handle_from_index_and_epoch
+ (dead_indices[i], shm->application_restarts);
+ (void) call_reaper_functions (handle);
+ }
+ }
+
+ svm = am->vlib_rp;
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+
+ for (i = 0; i < vec_len (dead_indices); i++)
+ {
+ regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+ if (regpp)
+ {
+ /* Poison the old registration */
+ memset (*regpp, 0xF3, sizeof (**regpp));
+ clib_mem_free (*regpp);
+ /* no dangling references, please */
+ *regpp = 0;
+ }
+ else
+ {
+ svm_pop_heap (oldheap);
+ clib_warning ("Duplicate free, client index %d",
+ regpp - am->vl_clients);
+ oldheap = svm_push_data_heap (svm);
+ }
+ }
+
+ svm_client_scan_this_region_nolock (am->vlib_rp);
+
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+ for (i = 0; i < vec_len (dead_indices); i++)
+ pool_put_index (am->vl_clients, dead_indices[i]);
+ }
+
+ dead_client_scan_time = vlib_time_now (vm) + 20.0;
+ }
+
+ if (TRACE_VLIB_MEMORY_QUEUE)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "q-awake: len %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 len;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->len = q->cursize;
+ }
+ }
+
+ return 0;
+}
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (memclnt_node,static) = {
+ .function = memclnt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "api-rx-from-ring",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+vl_api_show_histogram_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cli_cmd)
+{
+ u64 total_counts = 0;
+ int i;
+
+ for (i = 0; i < SLEEP_N_BUCKETS; i++)
+ {
+ total_counts += vector_rate_histogram[i];
+ }
+
+ if (total_counts == 0)
+ {
+ vlib_cli_output (vm, "No control-plane activity.");
+ return 0;
+ }
+
+#define _(n) \
+ do { \
+ f64 percent; \
+ percent = ((f64) vector_rate_histogram[SLEEP_##n##_US]) \
+ / (f64) total_counts; \
+ percent *= 100.0; \
+ vlib_cli_output (vm, "Sleep %3d us: %llu, %.2f%%",n, \
+ vector_rate_histogram[SLEEP_##n##_US], \
+ percent); \
+ } while (0);
+ foreach_histogram_bucket;
+#undef _
+
+ return 0;
+}
+
+/*?
+ * Display the binary api sleep-time histogram
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) =
+{
+ .path = "show api histogram",
+ .short_help = "show api histogram",
+ .function = vl_api_show_histogram_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_clear_histogram_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cli_cmd)
+{
+ int i;
+
+ for (i = 0; i < SLEEP_N_BUCKETS; i++)
+ vector_rate_histogram[i] = 0;
+ return 0;
+}
+
+/*?
+ * Clear the binary api sleep-time histogram
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) =
+{
+ .path = "clear api histogram",
+ .short_help = "clear api histogram",
+ .function = vl_api_clear_histogram_command,
+};
+/* *INDENT-ON* */
+
+static void
+memclnt_queue_callback (vlib_main_t * vm)
+{
+ static volatile int *cursizep;
+
+ if (PREDICT_FALSE (cursizep == 0))
+ {
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q;
+
+ if (shmem_hdr == 0)
+ return;
+
+ q = shmem_hdr->vl_input_queue;
+ if (q == 0)
+ return;
+ cursizep = &q->cursize;
+ }
+
+ if (*cursizep >= 1)
+ {
+ vm->queue_signal_pending = 1;
+ vm->api_queue_nonempty = 1;
+ vlib_process_signal_event (vm, memclnt_node.index,
+ /* event_type */ 0, /* event_data */ 0);
+ }
+}
+
+void
+vl_enable_disable_memory_api (vlib_main_t * vm, int enable)
+{
+ vlib_node_set_state (vm, memclnt_node.index,
+ (enable
+ ? VLIB_NODE_STATE_POLLING
+ : VLIB_NODE_STATE_DISABLED));
+}
+
+static uword
+api_rx_from_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ uword n_packets = frame->n_vectors;
+ uword n_left_from;
+ u32 *from;
+ static u8 *long_msg;
+
+ vec_validate (long_msg, 4095);
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_args (frame);
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ void *msg;
+ uword msg_len;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ from += 1;
+ n_left_from -= 1;
+
+ msg = b0->data + b0->current_data;
+ msg_len = b0->current_length;
+ if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ ASSERT (long_msg != 0);
+ _vec_len (long_msg) = 0;
+ vec_add (long_msg, msg, msg_len);
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ msg = b0->data + b0->current_data;
+ msg_len = b0->current_length;
+ vec_add (long_msg, msg, msg_len);
+ }
+ msg = long_msg;
+ }
+ vl_msg_api_handler_no_trace_no_free (msg);
+ }
+
+ /* Free what we've been given. */
+ vlib_buffer_free (vm, vlib_frame_args (frame), n_packets);
+
+ return n_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (api_rx_from_node_node,static) = {
+ .function = api_rx_from_node,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .vector_size = 4,
+ .name = "api-rx-from-node",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+setup_memclnt_exit (vlib_main_t * vm)
+{
+ atexit (vl_unmap_shmem);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (setup_memclnt_exit);
+
+
+static clib_error_t *
+vl_api_ring_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+ int i;
+ ring_alloc_t *ap;
+ vl_shmem_hdr_t *shmem_hdr;
+ api_main_t *am = &api_main;
+
+ shmem_hdr = am->shmem_hdr;
+
+ if (shmem_hdr == 0)
+ {
+ vlib_cli_output (vm, "Shared memory segment not initialized...\n");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n",
+ "Owner", "Size", "Nitems", "Hits", "Misses");
+
+ ap = shmem_hdr->vl_rings;
+
+ for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++)
+ {
+ vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
+ "vlib", ap->size, ap->nitems, ap->hits, ap->misses);
+ ap++;
+ }
+
+ ap = shmem_hdr->client_rings;
+
+ for (i = 0; i < vec_len (shmem_hdr->client_rings); i++)
+ {
+ vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
+ "clnt", ap->size, ap->nitems, ap->hits, ap->misses);
+ ap++;
+ }
+
+ vlib_cli_output (vm, "%d ring miss fallback allocations\n",
+ am->ring_misses);
+
+ vlib_cli_output
+ (vm, "%d application restarts, %d reclaimed msgs, %d garbage collects\n",
+ shmem_hdr->application_restarts,
+ shmem_hdr->restart_reclaims, shmem_hdr->garbage_collects);
+ return 0;
+}
+
+void dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+ __attribute__ ((weak));
+
+void
+dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+{
+}
+
+static clib_error_t *
+vl_api_client_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+ vl_api_registration_t **regpp, *regp;
+ unix_shared_memory_queue_t *q;
+ char *health;
+ api_main_t *am = &api_main;
+ u32 *confused_indices = 0;
+
+ if (!pool_elts (am->vl_clients))
+ goto socket_clients;
+ vlib_cli_output (vm, "Shared memory clients");
+ vlib_cli_output (vm, "%16s %8s %14s %18s %s",
+ "Name", "PID", "Queue Length", "Queue VA", "Health");
+
+ /* *INDENT-OFF* */
+ pool_foreach (regpp, am->vl_clients,
+ ({
+ regp = *regpp;
+
+ if (regp)
+ {
+ q = regp->vl_input_queue;
+ if (kill (q->consumer_pid, 0) < 0)
+ {
+ health = "DEAD";
+ }
+ else
+ {
+ health = "alive";
+ }
+ vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n",
+ regp->name, q->consumer_pid, q->cursize,
+ q, health);
+ }
+ else
+ {
+ clib_warning ("NULL client registration index %d",
+ regpp - am->vl_clients);
+ vec_add1 (confused_indices, regpp - am->vl_clients);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* This should "never happen," but if it does, fix it... */
+ if (PREDICT_FALSE (vec_len (confused_indices) > 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (confused_indices); i++)
+ {
+ pool_put_index (am->vl_clients, confused_indices[i]);
+ }
+ }
+ vec_free (confused_indices);
+
+ if (am->missing_clients)
+ vlib_cli_output (vm, "%u messages with missing clients",
+ am->missing_clients);
+socket_clients:
+ dump_socket_clients (vm, am);
+
+ return 0;
+}
+
+static clib_error_t *
+vl_api_status_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+ api_main_t *am = &api_main;
+
+ // check if rx_trace and tx_trace are not null pointers
+
+ if (am->rx_trace == 0)
+ {
+ vlib_cli_output (vm, "RX Trace disabled\n");
+ }
+ else
+ {
+ if (am->rx_trace->enabled == 0)
+ vlib_cli_output (vm, "RX Trace disabled\n");
+ else
+ vlib_cli_output (vm, "RX Trace enabled\n");
+ }
+
+ if (am->tx_trace == 0)
+ {
+ vlib_cli_output (vm, "TX Trace disabled\n");
+ }
+ else
+ {
+ if (am->tx_trace->enabled == 0)
+ vlib_cli_output (vm, "TX Trace disabled\n");
+ else
+ vlib_cli_output (vm, "TX Trace enabled\n");
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_command, static) =
+{
+ .path = "show api",
+ .short_help = "Show API information",
+};
+/* *INDENT-ON* */
+
+/*?
+ * Display binary api message allocation ring statistics
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_ring_command, static) =
+{
+ .path = "show api ring-stats",
+ .short_help = "Message ring statistics",
+ .function = vl_api_ring_command,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Display current api client connections
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_clients_command, static) =
+{
+ .path = "show api clients",
+ .short_help = "Client information",
+ .function = vl_api_client_command,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Display the current api message tracing status
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_status_command, static) =
+{
+ .path = "show api trace-status",
+ .short_help = "Display API trace status",
+ .function = vl_api_status_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_message_table_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cli_cmd)
+{
+ api_main_t *am = &api_main;
+ int i;
+ int verbose = 0;
+
+ if (unformat (input, "verbose"))
+ verbose = 1;
+
+
+ if (verbose == 0)
+ vlib_cli_output (vm, "%-4s %s", "ID", "Name");
+ else
+ vlib_cli_output (vm, "%-4s %-40s %6s %7s", "ID", "Name", "Bounce",
+ "MP-safe");
+
+ for (i = 1; i < vec_len (am->msg_names); i++)
+ {
+ if (verbose == 0)
+ {
+ vlib_cli_output (vm, "%-4d %s", i,
+ am->msg_names[i] ? am->msg_names[i] :
+ " [no handler]");
+ }
+ else
+ {
+ vlib_cli_output (vm, "%-4d %-40s %6d %7d", i,
+ am->msg_names[i] ? am->msg_names[i] :
+ " [no handler]", am->message_bounce[i],
+ am->is_mp_safe[i]);
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * Display the current api message decode tables
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) =
+{
+ .path = "show api message-table",
+ .short_help = "Message Table",
+ .function = vl_api_message_table_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_trace_command (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+ u32 nitems = 1024;
+ vl_api_trace_which_t which = VL_API_TRACE_RX;
+ api_main_t *am = &api_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "rx nitems %u", &nitems) || unformat (input, "rx"))
+ goto configure;
+ else if (unformat (input, "tx nitems %u", &nitems)
+ || unformat (input, "tx"))
+ {
+ which = VL_API_TRACE_RX;
+ goto configure;
+ }
+ else if (unformat (input, "on rx"))
+ {
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
+ }
+ else if (unformat (input, "on tx"))
+ {
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 1);
+ }
+ else if (unformat (input, "on"))
+ {
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
+ }
+ else if (unformat (input, "off"))
+ {
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
+ }
+ else if (unformat (input, "free"))
+ {
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
+ vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
+ vl_msg_api_trace_free (am, VL_API_TRACE_RX);
+ vl_msg_api_trace_free (am, VL_API_TRACE_TX);
+ }
+ else if (unformat (input, "debug on"))
+ {
+ am->msg_print_flag = 1;
+ }
+ else if (unformat (input, "debug off"))
+ {
+ am->msg_print_flag = 0;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+
+configure:
+ if (vl_msg_api_trace_configure (am, which, nitems))
+ {
+ vlib_cli_output (vm, "warning: trace configure error (%d, %d)",
+ which, nitems);
+ }
+
+ return 0;
+}
+
+/*?
+ * Control the binary API trace mechanism
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (trace, static) =
+{
+ .path = "set api-trace [on][on tx][on rx][off][free][debug on][debug off]",
+ .short_help = "API trace",
+ .function = vl_api_trace_command,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vlibmemory_init (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+ svm_map_region_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ a->root_path = am->root_path;
+ a->name = SVM_GLOBAL_REGION_NAME;
+ a->baseva = (am->global_baseva != 0) ?
+ am->global_baseva : SVM_GLOBAL_REGION_BASEVA;
+ a->size = (am->global_size != 0) ? am->global_size : SVM_GLOBAL_REGION_SIZE;
+ a->flags = SVM_FLAGS_NODATA;
+ a->uid = am->api_uid;
+ a->gid = am->api_gid;
+ a->pvt_heap_size =
+ (am->global_pvt_heap_size !=
+ 0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE;
+
+ svm_region_init_args (a);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vlibmemory_init);
+
+void
+vl_set_memory_region_name (const char *name)
+{
+ api_main_t *am = &api_main;
+
+ am->region_name = name;
+}
+
+static int
+range_compare (vl_api_msg_range_t * a0, vl_api_msg_range_t * a1)
+{
+ int len0, len1, clen;
+
+ len0 = vec_len (a0->name);
+ len1 = vec_len (a1->name);
+ clen = len0 < len1 ? len0 : len1;
+ return (strncmp ((char *) a0->name, (char *) a1->name, clen));
+}
+
+static u8 *
+format_api_msg_range (u8 * s, va_list * args)
+{
+ vl_api_msg_range_t *rp = va_arg (*args, vl_api_msg_range_t *);
+
+ if (rp == 0)
+ s = format (s, "%-50s%9s%9s", "Name", "First-ID", "Last-ID");
+ else
+ s = format (s, "%-50s%9d%9d", rp->name, rp->first_msg_id,
+ rp->last_msg_id);
+
+ return s;
+}
+
+static clib_error_t *
+vl_api_show_plugin_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cli_cmd)
+{
+ api_main_t *am = &api_main;
+ vl_api_msg_range_t *rp = 0;
+ int i;
+
+ if (vec_len (am->msg_ranges) == 0)
+ {
+ vlib_cli_output (vm, "No plugin API message ranges configured...");
+ return 0;
+ }
+
+ rp = vec_dup (am->msg_ranges);
+
+ vec_sort_with_function (rp, range_compare);
+
+ vlib_cli_output (vm, "Plugin API message ID ranges...\n");
+ vlib_cli_output (vm, "%U", format_api_msg_range, 0 /* header */ );
+
+ for (i = 0; i < vec_len (rp); i++)
+ vlib_cli_output (vm, "%U", format_api_msg_range, rp + i);
+
+ vec_free (rp);
+
+ return 0;
+}
+
+/*?
+ * Display the plugin binary API message range table
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) =
+{
+ .path = "show api plugin",
+ .short_help = "show api plugin",
+ .function = vl_api_show_plugin_command,
+};
+/* *INDENT-ON* */
+
+static void
+vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
+{
+ vl_api_rpc_call_reply_t *rmp;
+ int (*fp) (void *);
+ i32 rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (mp->function == 0)
+ {
+ rv = -1;
+ clib_warning ("rpc NULL function pointer");
+ }
+
+ else
+ {
+ if (mp->need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
+ fp = uword_to_pointer (mp->function, int (*)(void *));
+ rv = fp (mp->data);
+
+ if (mp->need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+ }
+
+ if (mp->send_reply)
+ {
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+ if (q)
+ {
+ rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = rv;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+ }
+ }
+ if (mp->multicast)
+ {
+ clib_warning ("multicast not yet implemented...");
+ }
+}
+
+static void
+vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp)
+{
+ clib_warning ("unimplemented");
+}
+
+always_inline void
+vl_api_rpc_call_main_thread_inline (void *fp, u8 * data, u32 data_length,
+ u8 force_rpc)
+{
+ vl_api_rpc_call_t *mp;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q;
+
+ /* Main thread: call the function directly */
+ if ((force_rpc == 0) && (vlib_get_thread_index () == 0))
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ void (*call_fp) (void *);
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ call_fp = fp;
+ call_fp (data);
+
+ vlib_worker_thread_barrier_release (vm);
+ return;
+ }
+
+ /* Any other thread, actually do an RPC call... */
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length);
+
+ memset (mp, 0, sizeof (*mp));
+ clib_memcpy (mp->data, data, data_length);
+ mp->_vl_msg_id = ntohs (VL_API_RPC_CALL);
+ mp->function = pointer_to_uword (fp);
+ mp->need_barrier_sync = 1;
+
+ /*
+ * Use the "normal" control-plane mechanism for the main thread.
+ * Well, almost. if the main input queue is full, we cannot
+ * block. Otherwise, we can expect a barrier sync timeout.
+ */
+ q = shmem_hdr->vl_input_queue;
+
+ while (pthread_mutex_trylock (&q->mutex))
+ vlib_worker_thread_barrier_check ();
+
+ while (PREDICT_FALSE (unix_shared_memory_queue_is_full (q)))
+ {
+ pthread_mutex_unlock (&q->mutex);
+ vlib_worker_thread_barrier_check ();
+ while (pthread_mutex_trylock (&q->mutex))
+ vlib_worker_thread_barrier_check ();
+ }
+
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+
+ pthread_mutex_unlock (&q->mutex);
+}
+
+/*
+ * Check if called from worker threads.
+ * If so, make rpc call of fp through shmem.
+ * Otherwise, call fp directly
+ */
+void
+vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+{
+ vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
+ 0);
+}
+
+/*
+ * Always make rpc call of fp through shmem, useful for calling from threads
+ * not setup as worker threads, such as DPDK callback thread
+ */
+void
+vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+{
+ vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
+ 1);
+}
+
+static void
+vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
+{
+ api_main_t *am = &api_main;
+ vl_api_msg_range_t *rp;
+ uword *p;
+
+ /* Noop (except for tracing) during normal operation */
+ if (am->replay_in_progress == 0)
+ return;
+
+ p = hash_get_mem (am->msg_range_by_name, mp->plugin_name);
+ if (p == 0)
+ {
+ clib_warning ("WARNING: traced plugin '%s' not in current image",
+ mp->plugin_name);
+ return;
+ }
+
+ rp = vec_elt_at_index (am->msg_ranges, p[0]);
+ if (rp->first_msg_id != clib_net_to_host_u16 (mp->first_msg_id))
+ {
+ clib_warning ("WARNING: traced plugin '%s' first message id %d not %d",
+ mp->plugin_name, clib_net_to_host_u16 (mp->first_msg_id),
+ rp->first_msg_id);
+ }
+
+ if (rp->last_msg_id != clib_net_to_host_u16 (mp->last_msg_id))
+ {
+ clib_warning ("WARNING: traced plugin '%s' last message id %d not %d",
+ mp->plugin_name, clib_net_to_host_u16 (mp->last_msg_id),
+ rp->last_msg_id);
+ }
+}
+
+#define foreach_rpc_api_msg \
+_(RPC_CALL,rpc_call) \
+_(RPC_CALL_REPLY,rpc_call_reply)
+
+#define foreach_plugin_trace_msg \
+_(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
+
+/*
+ * Set the rpc callback at our earliest possible convenience.
+ * This avoids ordering issues between thread_init() -> start_workers and
+ * an init function which we could define here. If we ever intend to use
+ * vlib all by itself, we can't create a link-time dependency on
+ * an init function here and a typical "call foo_init first"
+ * guitar lick.
+ */
+
+extern void *rpc_call_main_thread_cb_fn;
+
+static clib_error_t *
+rpc_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 0 /* do not trace */);
+ foreach_rpc_api_msg;
+#undef _
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1 /* do trace */);
+ foreach_plugin_trace_msg;
+#undef _
+
+ /* No reason to halt the parade to create a trace record... */
+ am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
+ rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread;
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (rpc_api_hookup);
+
+typedef enum
+{
+ DUMP,
+ CUSTOM_DUMP,
+ REPLAY,
+ INITIALIZERS,
+} vl_api_replay_t;
+
+u8 *
+format_vl_msg_api_trace_status (u8 * s, va_list * args)
+{
+ api_main_t *am = va_arg (*args, api_main_t *);
+ vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t);
+ vl_api_trace_t *tp;
+ char *trace_name;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ trace_name = "TX trace";
+ break;
+
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ trace_name = "RX trace";
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (tp == 0)
+ {
+ s = format (s, "%s: not yet configured.\n", trace_name);
+ return s;
+ }
+
+ s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n",
+ trace_name, vec_len (tp->traces), tp->nitems,
+ tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not");
+ return s;
+}
+
+void vl_msg_api_custom_dump_configure (api_main_t * am)
+ __attribute__ ((weak));
+void
+vl_msg_api_custom_dump_configure (api_main_t * am)
+{
+}
+
+static void
+vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
+ u32 first_index, u32 last_index,
+ vl_api_replay_t which)
+{
+ vl_api_trace_file_header_t *hp;
+ int i, fd;
+ struct stat statb;
+ size_t file_size;
+ u8 *msg;
+ u8 endian_swap_needed = 0;
+ api_main_t *am = &api_main;
+ u8 *tmpbuf = 0;
+ u32 nitems;
+ void **saved_print_handlers = 0;
+
+ fd = open ((char *) filename, O_RDONLY);
+
+ if (fd < 0)
+ {
+ vlib_cli_output (vm, "Couldn't open %s\n", filename);
+ return;
+ }
+
+ if (fstat (fd, &statb) < 0)
+ {
+ vlib_cli_output (vm, "Couldn't stat %s\n", filename);
+ close (fd);
+ return;
+ }
+
+ if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp)))
+ {
+ vlib_cli_output (vm, "File not plausible: %s\n", filename);
+ close (fd);
+ return;
+ }
+
+ file_size = statb.st_size;
+ file_size = (file_size + 4095) & ~(4096);
+
+ hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+ if (hp == (vl_api_trace_file_header_t *) MAP_FAILED)
+ {
+ vlib_cli_output (vm, "mmap failed: %s\n", filename);
+ close (fd);
+ return;
+ }
+ close (fd);
+
+ if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN)
+ || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN))
+ endian_swap_needed = 1;
+
+ if (endian_swap_needed)
+ nitems = ntohl (hp->nitems);
+ else
+ nitems = hp->nitems;
+
+ if (last_index == (u32) ~ 0)
+ {
+ last_index = nitems - 1;
+ }
+
+ if (first_index >= nitems || last_index >= nitems)
+ {
+ vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n",
+ first_index, last_index, nitems - 1);
+ munmap (hp, file_size);
+ return;
+ }
+ if (hp->wrapped)
+ vlib_cli_output (vm,
+ "Note: wrapped/incomplete trace, results may vary\n");
+
+ if (which == CUSTOM_DUMP)
+ {
+ saved_print_handlers = (void **) vec_dup (am->msg_print_handlers);
+ vl_msg_api_custom_dump_configure (am);
+ }
+
+
+ msg = (u8 *) (hp + 1);
+
+ for (i = 0; i < first_index; i++)
+ {
+ trace_cfg_t *cfgp;
+ int size;
+ u16 msg_id;
+
+ size = clib_host_to_net_u32 (*(u32 *) msg);
+ msg += sizeof (u32);
+
+ if (clib_arch_is_little_endian)
+ msg_id = ntohs (*((u16 *) msg));
+ else
+ msg_id = *((u16 *) msg);
+
+ cfgp = am->api_trace_cfg + msg_id;
+ if (!cfgp)
+ {
+ vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+ munmap (hp, file_size);
+ return;
+ }
+ msg += size;
+ }
+
+ if (which == REPLAY)
+ am->replay_in_progress = 1;
+
+ for (; i <= last_index; i++)
+ {
+ trace_cfg_t *cfgp;
+ u16 *msg_idp;
+ u16 msg_id;
+ int size;
+
+ if (which == DUMP)
+ vlib_cli_output (vm, "---------- trace %d -----------\n", i);
+
+ size = clib_host_to_net_u32 (*(u32 *) msg);
+ msg += sizeof (u32);
+
+ if (clib_arch_is_little_endian)
+ msg_id = ntohs (*((u16 *) msg));
+ else
+ msg_id = *((u16 *) msg);
+
+ cfgp = am->api_trace_cfg + msg_id;
+ if (!cfgp)
+ {
+ vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+ munmap (hp, file_size);
+ vec_free (tmpbuf);
+ am->replay_in_progress = 0;
+ return;
+ }
+
+ /* Copy the buffer (from the read-only mmap'ed file) */
+ vec_validate (tmpbuf, size - 1 + sizeof (uword));
+ clib_memcpy (tmpbuf + sizeof (uword), msg, size);
+ memset (tmpbuf, 0xf, sizeof (uword));
+
+ /*
+ * Endian swap if needed. All msg data is supposed to be
+ * in network byte order. All msg handlers are supposed to
+ * know that. The generic message dumpers don't know that.
+ * One could fix apigen, I suppose.
+ */
+ if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed)
+ {
+ void (*endian_fp) (void *);
+ if (msg_id >= vec_len (am->msg_endian_handlers)
+ || (am->msg_endian_handlers[msg_id] == 0))
+ {
+ vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
+ munmap (hp, file_size);
+ vec_free (tmpbuf);
+ am->replay_in_progress = 0;
+ return;
+ }
+ endian_fp = am->msg_endian_handlers[msg_id];
+ (*endian_fp) (tmpbuf + sizeof (uword));
+ }
+
+ /* msg_id always in network byte order */
+ if (clib_arch_is_little_endian)
+ {
+ msg_idp = (u16 *) (tmpbuf + sizeof (uword));
+ *msg_idp = msg_id;
+ }
+
+ switch (which)
+ {
+ case CUSTOM_DUMP:
+ case DUMP:
+ if (msg_id < vec_len (am->msg_print_handlers) &&
+ am->msg_print_handlers[msg_id])
+ {
+ u8 *(*print_fp) (void *, void *);
+
+ print_fp = (void *) am->msg_print_handlers[msg_id];
+ (*print_fp) (tmpbuf + sizeof (uword), vm);
+ }
+ else
+ {
+ vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
+ msg_id);
+ break;
+ }
+ break;
+
+ case INITIALIZERS:
+ if (msg_id < vec_len (am->msg_print_handlers) &&
+ am->msg_print_handlers[msg_id])
+ {
+ u8 *s;
+ int j;
+ u8 *(*print_fp) (void *, void *);
+
+ print_fp = (void *) am->msg_print_handlers[msg_id];
+
+ vlib_cli_output (vm, "/*");
+
+ (*print_fp) (tmpbuf + sizeof (uword), vm);
+ vlib_cli_output (vm, "*/\n");
+
+ s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
+ am->msg_names[msg_id], i,
+ am->api_trace_cfg[msg_id].size);
+
+ for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
+ {
+ if ((j & 7) == 0)
+ s = format (s, "\n ");
+ s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]);
+ }
+ s = format (s, "\n};\n%c", 0);
+ vlib_cli_output (vm, (char *) s);
+ vec_free (s);
+ }
+ break;
+
+ case REPLAY:
+ if (msg_id < vec_len (am->msg_print_handlers) &&
+ am->msg_print_handlers[msg_id] && cfgp->replay_enable)
+ {
+ void (*handler) (void *);
+
+ handler = (void *) am->msg_handlers[msg_id];
+
+ if (!am->is_mp_safe[msg_id])
+ vl_msg_api_barrier_sync ();
+ (*handler) (tmpbuf + sizeof (uword));
+ if (!am->is_mp_safe[msg_id])
+ vl_msg_api_barrier_release ();
+ }
+ else
+ {
+ if (cfgp->replay_enable)
+ vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
+ msg_id);
+ break;
+ }
+ break;
+ }
+
+ _vec_len (tmpbuf) = 0;
+ msg += size;
+ }
+
+ if (saved_print_handlers)
+ {
+ clib_memcpy (am->msg_print_handlers, saved_print_handlers,
+ vec_len (am->msg_print_handlers) * sizeof (void *));
+ vec_free (saved_print_handlers);
+ }
+
+ munmap (hp, file_size);
+ vec_free (tmpbuf);
+ am->replay_in_progress = 0;
+}
+
+static clib_error_t *
+api_trace_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 nitems = 256 << 10;
+ api_main_t *am = &api_main;
+ vl_api_trace_which_t which = VL_API_TRACE_RX;
+ u8 *filename;
+ u32 first = 0;
+ u32 last = (u32) ~ 0;
+ FILE *fp;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "on") || unformat (input, "enable"))
+ {
+ if (unformat (input, "nitems %d", &nitems))
+ ;
+ vl_msg_api_trace_configure (am, which, nitems);
+ vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+ }
+ else if (unformat (input, "off"))
+ {
+ vl_msg_api_trace_onoff (am, which, 0);
+ }
+ else if (unformat (input, "save %s", &filename))
+ {
+ u8 *chroot_filename;
+ if (strstr ((char *) filename, "..")
+ || index ((char *) filename, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'",
+ filename);
+ return 0;
+ }
+
+ chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+
+ vec_free (filename);
+
+ fp = fopen ((char *) chroot_filename, "w");
+ if (fp == NULL)
+ {
+ vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
+ return 0;
+ }
+ rv = vl_msg_api_trace_save (am, which, fp);
+ fclose (fp);
+ if (rv == -1)
+ vlib_cli_output (vm, "API Trace data not present\n");
+ else if (rv == -2)
+ vlib_cli_output (vm, "File for writing is closed\n");
+ else if (rv == -10)
+ vlib_cli_output (vm, "Error while writing header to file\n");
+ else if (rv == -11)
+ vlib_cli_output (vm, "Error while writing trace to file\n");
+ else if (rv == -12)
+ vlib_cli_output (vm,
+ "Error while writing end of buffer trace to file\n");
+ else if (rv == -13)
+ vlib_cli_output (vm,
+ "Error while writing start of buffer trace to file\n");
+ else if (rv < 0)
+ vlib_cli_output (vm, "Unkown error while saving: %d", rv);
+ else
+ vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
+ vec_free (chroot_filename);
+ }
+ else if (unformat (input, "dump %s", &filename))
+ {
+ vl_msg_api_process_file (vm, filename, first, last, DUMP);
+ }
+ else if (unformat (input, "custom-dump %s", &filename))
+ {
+ vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP);
+ }
+ else if (unformat (input, "replay %s", &filename))
+ {
+ vl_msg_api_process_file (vm, filename, first, last, REPLAY);
+ }
+ else if (unformat (input, "initializers %s", &filename))
+ {
+ vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS);
+ }
+ else if (unformat (input, "tx"))
+ {
+ which = VL_API_TRACE_TX;
+ }
+ else if (unformat (input, "first %d", &first))
+ {
+ ;
+ }
+ else if (unformat (input, "last %d", &last))
+ {
+ ;
+ }
+ else if (unformat (input, "status"))
+ {
+ vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status,
+ am, which);
+ }
+ else if (unformat (input, "free"))
+ {
+ vl_msg_api_trace_onoff (am, which, 0);
+ vl_msg_api_trace_free (am, which);
+ }
+ else if (unformat (input, "post-mortem-on"))
+ vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+ else if (unformat (input, "post-mortem-off"))
+ vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ );
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+/*?
+ * Display, replay, or save a binary API trace
+?*/
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (api_trace_command, static) =
+{
+ .path = "api trace",
+ .short_help =
+ "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
+ .function = api_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+api_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+ u32 nitems = 256 << 10;
+ vl_api_trace_which_t which = VL_API_TRACE_RX;
+ api_main_t *am = &api_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "on") || unformat (input, "enable"))
+ {
+ if (unformat (input, "nitems %d", &nitems))
+ ;
+ vl_msg_api_trace_configure (am, which, nitems);
+ vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+ vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+ }
+ else if (unformat (input, "save-api-table %s",
+ &am->save_msg_table_filename))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+/*?
+ * This module has three configuration parameters:
+ * "on" or "enable" - enables binary api tracing
+ * "nitems <nnn>" - sets the size of the circular buffer to <nnn>
+ * "save-api-table <filename>" - dumps the API message table to /tmp/<filename>
+?*/
+VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
+
+static clib_error_t *
+api_queue_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+ api_main_t *am = &api_main;
+ u32 nitems;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "length %d", &nitems) ||
+ (unformat (input, "len %d", &nitems)))
+ {
+ if (nitems >= 1024)
+ am->vlib_input_queue_length = nitems;
+ else
+ clib_warning ("vlib input queue length %d too small, ignored",
+ nitems);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (api_queue_config_fn, "api-queue");
+
+static u8 *
+extract_name (u8 * s)
+{
+ u8 *rv;
+
+ rv = vec_dup (s);
+
+ while (vec_len (rv) && rv[vec_len (rv)] != '_')
+ _vec_len (rv)--;
+
+ rv[vec_len (rv)] = 0;
+
+ return rv;
+}
+
+static u8 *
+extract_crc (u8 * s)
+{
+ int i;
+ u8 *rv;
+
+ rv = vec_dup (s);
+
+ for (i = vec_len (rv) - 1; i >= 0; i--)
+ {
+ if (rv[i] == '_')
+ {
+ vec_delete (rv, i + 1, 0);
+ break;
+ }
+ }
+ return rv;
+}
+
+typedef struct
+{
+ u8 *name_and_crc;
+ u8 *name;
+ u8 *crc;
+ u32 msg_index;
+ int which;
+} msg_table_unserialize_t;
+
+static int
+table_id_cmp (void *a1, void *a2)
+{
+ msg_table_unserialize_t *n1 = a1;
+ msg_table_unserialize_t *n2 = a2;
+
+ return (n1->msg_index - n2->msg_index);
+}
+
+static int
+table_name_and_crc_cmp (void *a1, void *a2)
+{
+ msg_table_unserialize_t *n1 = a1;
+ msg_table_unserialize_t *n2 = a2;
+
+ return strcmp ((char *) n1->name_and_crc, (char *) n2->name_and_crc);
+}
+
+static clib_error_t *
+dump_api_table_file_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *filename = 0;
+ api_main_t *am = &api_main;
+ serialize_main_t _sm, *sm = &_sm;
+ clib_error_t *error;
+ u32 nmsgs;
+ u32 msg_index;
+ u8 *name_and_crc;
+ int compare_current = 0;
+ int numeric_sort = 0;
+ msg_table_unserialize_t *table = 0, *item;
+ u32 i;
+ u32 ndifferences = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "file %s", &filename))
+ ;
+ else if (unformat (input, "compare-current")
+ || unformat (input, "compare"))
+ compare_current = 1;
+ else if (unformat (input, "numeric"))
+ numeric_sort = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (numeric_sort && compare_current)
+ return clib_error_return
+ (0, "Comparison and numeric sorting are incompatible");
+
+ if (filename == 0)
+ return clib_error_return (0, "File not specified");
+
+ /* Load the serialized message table from the table dump */
+
+ error = unserialize_open_unix_file (sm, (char *) filename);
+
+ if (error)
+ return error;
+
+ unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+ for (i = 0; i < nmsgs; i++)
+ {
+ msg_index = unserialize_likely_small_unsigned_integer (sm);
+ unserialize_cstring (sm, (char **) &name_and_crc);
+ vec_add2 (table, item, 1);
+ item->msg_index = msg_index;
+ item->name_and_crc = name_and_crc;
+ item->name = extract_name (name_and_crc);
+ item->crc = extract_crc (name_and_crc);
+ item->which = 0; /* file */
+ }
+ serialize_close (sm);
+
+ /* Compare with the current image? */
+ if (compare_current)
+ {
+ /* Append the current message table */
+ u8 *tblv = vec_dup (am->serialized_message_table_in_shmem);
+
+ serialize_open_vector (sm, tblv);
+ unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+ for (i = 0; i < nmsgs; i++)
+ {
+ msg_index = unserialize_likely_small_unsigned_integer (sm);
+ unserialize_cstring (sm, (char **) &name_and_crc);
+
+ vec_add2 (table, item, 1);
+ item->msg_index = msg_index;
+ item->name_and_crc = name_and_crc;
+ item->name = extract_name (name_and_crc);
+ item->crc = extract_crc (name_and_crc);
+ item->which = 1; /* current_image */
+ }
+ }
+
+ /* Sort the table. */
+ if (numeric_sort)
+ vec_sort_with_function (table, table_id_cmp);
+ else
+ vec_sort_with_function (table, table_name_and_crc_cmp);
+
+ if (compare_current)
+ {
+ ndifferences = 0;
+
+ /*
+ * In this case, the recovered table will have two entries per
+ * API message. So, if entries i and i+1 match, the message definitions
+ * are identical. Otherwise, the crc is different, or a message is
+ * present in only one of the tables.
+ */
+ vlib_cli_output (vm, "%=60s %s", "Message Name", "Result");
+
+ for (i = 0; i < vec_len (table);)
+ {
+ /* Last message lonely? */
+ if (i == vec_len (table) - 1)
+ {
+ ndifferences++;
+ goto last_unique;
+ }
+
+ /* Identical pair? */
+ if (!strncmp
+ ((char *) table[i].name_and_crc,
+ (char *) table[i + 1].name_and_crc,
+ vec_len (table[i].name_and_crc)))
+ {
+ i += 2;
+ continue;
+ }
+
+ ndifferences++;
+
+ /* Only in one of two tables? */
+ if (strncmp ((char *) table[i].name, (char *) table[i + 1].name,
+ vec_len (table[i].name)))
+ {
+ last_unique:
+ vlib_cli_output (vm, "%-60s only in %s",
+ table[i].name, table[i].which ?
+ "image" : "file");
+ i++;
+ continue;
+ }
+ /* In both tables, but with different signatures */
+ vlib_cli_output (vm, "%-60s definition changed", table[i].name);
+ i += 2;
+ }
+ if (ndifferences == 0)
+ vlib_cli_output (vm, "No api message signature differences found.");
+ else
+ vlib_cli_output (vm, "Found %u api message signature differences",
+ ndifferences);
+ goto cleanup;
+ }
+
+ /* Dump the table, sorted as shown above */
+ vlib_cli_output (vm, "%=60s %=8s %=10s", "Message name", "MsgID", "CRC");
+
+ for (i = 0; i < vec_len (table); i++)
+ {
+ item = table + i;
+ vlib_cli_output (vm, "%-60s %8u %10s", item->name,
+ item->msg_index, item->crc);
+ }
+
+cleanup:
+ for (i = 0; i < vec_len (table); i++)
+ {
+ vec_free (table[i].name_and_crc);
+ vec_free (table[i].name);
+ vec_free (table[i].crc);
+ }
+
+ vec_free (table);
+
+ return 0;
+}
+
+/*?
+ * Displays a serialized API message decode table, sorted by message name
+ *
+ * @cliexpar
+ * @cliexstart{show api dump file <filename>}
+ * Message name MsgID CRC
+ * accept_session 407 8e2a127e
+ * accept_session_reply 408 67d8c22a
+ * add_node_next 549 e4202993
+ * add_node_next_reply 550 e89d6eed
+ * etc.
+ * @cliexend
+?*/
+
+/*?
+ * Compares a serialized API message decode table with the current image
+ *
+ * @cliexpar
+ * @cliexstart{show api dump file <filename> compare}
+ * ip_add_del_route definition changed
+ * ip_table_add_del definition changed
+ * l2_macs_event only in image
+ * vnet_ip4_fib_counters only in file
+ * vnet_ip4_nbr_counters only in file
+ * @cliexend
+?*/
+
+/*?
+ * Display a serialized API message decode table, compare a saved
+ * decode table with the current image, to establish API differences.
+ *
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (dump_api_table_file, static) =
+{
+ .path = "show api dump",
+ .short_help = "show api dump file <filename> [numeric | compare-current]",
+ .function = dump_api_table_file_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c
new file mode 100644
index 00000000..4db4851c
--- /dev/null
+++ b/src/vlibmemory/unix_shared_memory_queue.c
@@ -0,0 +1,385 @@
+/*
+ *------------------------------------------------------------------
+ * unix_shared_memory_queue.c - unidirectional shared-memory queues
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cache.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <signal.h>
+
+/*
+ * unix_shared_memory_queue_init
+ *
+ * nels = number of elements on the queue
+ * elsize = element size, presumably 4 and cacheline-size will
+ * be popular choices.
+ * pid = consumer pid
+ *
+ * The idea is to call this function in the queue consumer,
+ * and e-mail the queue pointer to the producer(s).
+ *
+ * The vpp process / main thread allocates one of these
+ * at startup; its main input queue. The vpp main input queue
+ * has a pointer to it in the shared memory segment header.
+ *
+ * You probably want to be on an svm data heap before calling this
+ * function.
+ */
+unix_shared_memory_queue_t *
+unix_shared_memory_queue_init (int nels,
+ int elsize,
+ int consumer_pid,
+ int signal_when_queue_non_empty)
+{
+ unix_shared_memory_queue_t *q;
+ pthread_mutexattr_t attr;
+ pthread_condattr_t cattr;
+
+ q = clib_mem_alloc_aligned (sizeof (unix_shared_memory_queue_t)
+ + nels * elsize, CLIB_CACHE_LINE_BYTES);
+ memset (q, 0, sizeof (*q));
+
+ q->elsize = elsize;
+ q->maxsize = nels;
+ q->consumer_pid = consumer_pid;
+ q->signal_when_queue_non_empty = signal_when_queue_non_empty;
+
+ memset (&attr, 0, sizeof (attr));
+ memset (&cattr, 0, sizeof (cattr));
+
+ if (pthread_mutexattr_init (&attr))
+ clib_unix_warning ("mutexattr_init");
+ if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
+ clib_unix_warning ("pthread_mutexattr_setpshared");
+ if (pthread_mutex_init (&q->mutex, &attr))
+ clib_unix_warning ("mutex_init");
+ if (pthread_mutexattr_destroy (&attr))
+ clib_unix_warning ("mutexattr_destroy");
+ if (pthread_condattr_init (&cattr))
+ clib_unix_warning ("condattr_init");
+ /* prints funny-looking messages in the Linux target */
+ if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
+ clib_unix_warning ("condattr_setpshared");
+ if (pthread_cond_init (&q->condvar, &cattr))
+ clib_unix_warning ("cond_init1");
+ if (pthread_condattr_destroy (&cattr))
+ clib_unix_warning ("cond_init2");
+
+ return (q);
+}
+
+/*
+ * unix_shared_memory_queue_free
+ */
+void
+unix_shared_memory_queue_free (unix_shared_memory_queue_t * q)
+{
+ (void) pthread_mutex_destroy (&q->mutex);
+ (void) pthread_cond_destroy (&q->condvar);
+ clib_mem_free (q);
+}
+
+void
+unix_shared_memory_queue_lock (unix_shared_memory_queue_t * q)
+{
+ pthread_mutex_lock (&q->mutex);
+}
+
+void
+unix_shared_memory_queue_unlock (unix_shared_memory_queue_t * q)
+{
+ pthread_mutex_unlock (&q->mutex);
+}
+
+int
+unix_shared_memory_queue_is_full (unix_shared_memory_queue_t * q)
+{
+ return q->cursize == q->maxsize;
+}
+
+/*
+ * unix_shared_memory_queue_add_nolock
+ */
+int
+unix_shared_memory_queue_add_nolock (unix_shared_memory_queue_t * q,
+ u8 * elem)
+{
+ i8 *tailp;
+ int need_broadcast = 0;
+
+ if (PREDICT_FALSE (q->cursize == q->maxsize))
+ {
+ while (q->cursize == q->maxsize)
+ {
+ (void) pthread_cond_wait (&q->condvar, &q->mutex);
+ }
+ }
+
+ tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+ clib_memcpy (tailp, elem, q->elsize);
+
+ q->tail++;
+ q->cursize++;
+
+ need_broadcast = (q->cursize == 1);
+
+ if (q->tail == q->maxsize)
+ q->tail = 0;
+
+ if (need_broadcast)
+ {
+ (void) pthread_cond_broadcast (&q->condvar);
+ if (q->signal_when_queue_non_empty)
+ kill (q->consumer_pid, q->signal_when_queue_non_empty);
+ }
+ return 0;
+}
+
+int
+unix_shared_memory_queue_add_raw (unix_shared_memory_queue_t * q, u8 * elem)
+{
+ i8 *tailp;
+
+ if (PREDICT_FALSE (q->cursize == q->maxsize))
+ {
+ while (q->cursize == q->maxsize)
+ ;
+ }
+
+ tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+ clib_memcpy (tailp, elem, q->elsize);
+
+ q->tail++;
+ q->cursize++;
+
+ if (q->tail == q->maxsize)
+ q->tail = 0;
+ return 0;
+}
+
+
+/*
+ * unix_shared_memory_queue_add
+ */
+int
+unix_shared_memory_queue_add (unix_shared_memory_queue_t * q,
+ u8 * elem, int nowait)
+{
+ i8 *tailp;
+ int need_broadcast = 0;
+
+ if (nowait)
+ {
+ /* zero on success */
+ if (pthread_mutex_trylock (&q->mutex))
+ {
+ return (-1);
+ }
+ }
+ else
+ pthread_mutex_lock (&q->mutex);
+
+ if (PREDICT_FALSE (q->cursize == q->maxsize))
+ {
+ if (nowait)
+ {
+ pthread_mutex_unlock (&q->mutex);
+ return (-2);
+ }
+ while (q->cursize == q->maxsize)
+ {
+ (void) pthread_cond_wait (&q->condvar, &q->mutex);
+ }
+ }
+
+ tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+ clib_memcpy (tailp, elem, q->elsize);
+
+ q->tail++;
+ q->cursize++;
+
+ need_broadcast = (q->cursize == 1);
+
+ if (q->tail == q->maxsize)
+ q->tail = 0;
+
+ if (need_broadcast)
+ {
+ (void) pthread_cond_broadcast (&q->condvar);
+ if (q->signal_when_queue_non_empty)
+ kill (q->consumer_pid, q->signal_when_queue_non_empty);
+ }
+ pthread_mutex_unlock (&q->mutex);
+
+ return 0;
+}
+
+/*
+ * unix_shared_memory_queue_add2
+ */
+int
+unix_shared_memory_queue_add2 (unix_shared_memory_queue_t * q, u8 * elem,
+ u8 * elem2, int nowait)
+{
+ i8 *tailp;
+ int need_broadcast = 0;
+
+ if (nowait)
+ {
+ /* zero on success */
+ if (pthread_mutex_trylock (&q->mutex))
+ {
+ return (-1);
+ }
+ }
+ else
+ pthread_mutex_lock (&q->mutex);
+
+ if (PREDICT_FALSE (q->cursize + 1 == q->maxsize))
+ {
+ if (nowait)
+ {
+ pthread_mutex_unlock (&q->mutex);
+ return (-2);
+ }
+ while (q->cursize + 1 == q->maxsize)
+ {
+ (void) pthread_cond_wait (&q->condvar, &q->mutex);
+ }
+ }
+
+ tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+ clib_memcpy (tailp, elem, q->elsize);
+
+ q->tail++;
+ q->cursize++;
+
+ if (q->tail == q->maxsize)
+ q->tail = 0;
+
+ need_broadcast = (q->cursize == 1);
+
+ tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+ clib_memcpy (tailp, elem2, q->elsize);
+
+ q->tail++;
+ q->cursize++;
+
+ if (q->tail == q->maxsize)
+ q->tail = 0;
+
+ if (need_broadcast)
+ {
+ (void) pthread_cond_broadcast (&q->condvar);
+ if (q->signal_when_queue_non_empty)
+ kill (q->consumer_pid, q->signal_when_queue_non_empty);
+ }
+ pthread_mutex_unlock (&q->mutex);
+
+ return 0;
+}
+
+/*
+ * unix_shared_memory_queue_sub
+ */
+int
+unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q,
+ u8 * elem, int nowait)
+{
+ i8 *headp;
+ int need_broadcast = 0;
+
+ if (nowait)
+ {
+ /* zero on success */
+ if (pthread_mutex_trylock (&q->mutex))
+ {
+ return (-1);
+ }
+ }
+ else
+ pthread_mutex_lock (&q->mutex);
+
+ if (PREDICT_FALSE (q->cursize == 0))
+ {
+ if (nowait)
+ {
+ pthread_mutex_unlock (&q->mutex);
+ return (-2);
+ }
+ while (q->cursize == 0)
+ {
+ (void) pthread_cond_wait (&q->condvar, &q->mutex);
+ }
+ }
+
+ headp = (i8 *) (&q->data[0] + q->elsize * q->head);
+ clib_memcpy (elem, headp, q->elsize);
+
+ q->head++;
+ /* $$$$ JFC shouldn't this be == 0? */
+ if (q->cursize == q->maxsize)
+ need_broadcast = 1;
+
+ q->cursize--;
+
+ if (q->head == q->maxsize)
+ q->head = 0;
+
+ if (need_broadcast)
+ (void) pthread_cond_broadcast (&q->condvar);
+
+ pthread_mutex_unlock (&q->mutex);
+
+ return 0;
+}
+
+int
+unix_shared_memory_queue_sub_raw (unix_shared_memory_queue_t * q, u8 * elem)
+{
+ i8 *headp;
+
+ if (PREDICT_FALSE (q->cursize == 0))
+ {
+ while (q->cursize == 0)
+ ;
+ }
+
+ headp = (i8 *) (&q->data[0] + q->elsize * q->head);
+ clib_memcpy (elem, headp, q->elsize);
+
+ q->head++;
+ q->cursize--;
+
+ if (q->head == q->maxsize)
+ q->head = 0;
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h
new file mode 100644
index 00000000..27de3218
--- /dev/null
+++ b/src/vlibmemory/unix_shared_memory_queue.h
@@ -0,0 +1,70 @@
+/*
+ *------------------------------------------------------------------
+ * unix_shared_memory_queue.h - shared-memory queues
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_unix_shared_memory_queue_h
+#define included_unix_shared_memory_queue_h
+
+#include <pthread.h>
+
+typedef struct _unix_shared_memory_queue
+{
+ pthread_mutex_t mutex; /* 8 bytes */
+ pthread_cond_t condvar; /* 8 bytes */
+ int head;
+ int tail;
+ volatile int cursize;
+ int maxsize;
+ int elsize;
+ int consumer_pid;
+ int signal_when_queue_non_empty;
+ char data[0];
+} unix_shared_memory_queue_t;
+
+unix_shared_memory_queue_t *unix_shared_memory_queue_init (int nels,
+ int elsize,
+ int consumer_pid,
+ int
+ signal_when_queue_non_empty);
+void unix_shared_memory_queue_free (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_add (unix_shared_memory_queue_t * q, u8 * elem,
+ int nowait);
+int unix_shared_memory_queue_add2 (unix_shared_memory_queue_t * q, u8 * elem,
+ u8 * elem2, int nowait);
+int unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q, u8 * elem,
+ int nowait);
+void unix_shared_memory_queue_lock (unix_shared_memory_queue_t * q);
+void unix_shared_memory_queue_unlock (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_is_full (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_add_nolock (unix_shared_memory_queue_t * q,
+ u8 * elem);
+
+int unix_shared_memory_queue_sub_raw (unix_shared_memory_queue_t * q,
+ u8 * elem);
+int unix_shared_memory_queue_add_raw (unix_shared_memory_queue_t * q,
+ u8 * elem);
+
+#endif /* included_unix_shared_memory_queue_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/vl_memory_api_h.h b/src/vlibmemory/vl_memory_api_h.h
new file mode 100644
index 00000000..c1ae79b1
--- /dev/null
+++ b/src/vlibmemory/vl_memory_api_h.h
@@ -0,0 +1,32 @@
+/*
+ *------------------------------------------------------------------
+ * vl_memory_api_h.h - memory API headers, in a specific order.
+ *
+ * Copyright (c) 2009-2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Add to the bottom of the #include list, or elves will steal your
+ * keyboard in the middle of the night!
+ */
+#include <vlibmemory/memclnt.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/vl_memory_msg_enum.h b/src/vlibmemory/vl_memory_msg_enum.h
new file mode 100644
index 00000000..974c0c21
--- /dev/null
+++ b/src/vlibmemory/vl_memory_msg_enum.h
@@ -0,0 +1,42 @@
+/*
+ *------------------------------------------------------------------
+ * vl_memory_msg_enum.h - Our view of how to number API messages
+ * Clients have their own view, which has to agree with ours.
+ *
+ * Copyright (c) 2009-2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __VL_MSG_ENUM_H__
+#define __VL_MSG_ENUM_H__
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+ VL_ILLEGAL_MESSAGE_ID = 0,
+#include <vlibmemory/vl_memory_api_h.h>
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* __VL_MSG_ENUM_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/api.h b/src/vlibsocket/api.h
new file mode 100644
index 00000000..d7b7055d
--- /dev/null
+++ b/src/vlibsocket/api.h
@@ -0,0 +1,87 @@
+/*
+ *------------------------------------------------------------------
+ * api.h
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vlibsocket_api_h
+#define included_vlibsocket_api_h
+
+#include <vlibapi/api.h>
+
+typedef struct
+{
+ /* Server port number */
+ int portno;
+
+ /* By default, localhost... */
+ u32 bind_address;
+
+ /*
+ * (listen, server, client) registrations. Shared memory
+ * registrations are in shared memory
+ */
+ vl_api_registration_t *registration_pool;
+ /*
+ * Chain-drag variables, so message API handlers
+ * (generally) don't know whether they're talking to a socket
+ * or to a shared-memory connection.
+ */
+ vl_api_registration_t *current_rp;
+ clib_file_t *current_uf;
+ /* One input buffer, shared across all sockets */
+ i8 *input_buffer;
+} socket_main_t;
+
+extern socket_main_t socket_main;
+
+void socksvr_add_pending_output (clib_file_t * uf,
+ struct vl_api_registration_ *cf,
+ u8 * buffer, uword buffer_bytes);
+
+#define SOCKSVR_DEFAULT_PORT 32741 /* whatever */
+
+void vl_free_socket_registration_index (u32 pool_index);
+void vl_socket_process_msg (clib_file_t * uf,
+ struct vl_api_registration_ *rp, i8 * input_v);
+clib_error_t *vl_socket_read_ready (clib_file_t * uf);
+void vl_socket_add_pending_output (clib_file_t * uf,
+ struct vl_api_registration_ *rp,
+ u8 * buffer, uword buffer_bytes);
+clib_error_t *vl_socket_write_ready (clib_file_t * uf);
+void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem);
+void vl_socket_api_send_with_data (vl_api_registration_t * rp,
+ u8 * elem, u8 * data_vector);
+void vl_socket_api_send_with_length (vl_api_registration_t * rp,
+ u8 * elem, u32 msg_length);
+void vl_socket_api_send_with_length_no_free (vl_api_registration_t * rp,
+ u8 * elem, u32 msg_length);
+u32 sockclnt_open_index (char *client_name, char *hostname, int port);
+void sockclnt_close_index (u32 index);
+void vl_client_msg_api_send (vl_api_registration_t * cm, u8 * elem);
+vl_api_registration_t *sockclnt_get_registration (u32 index);
+void socksvr_set_port (u16 port);
+void socksvr_set_bind_address (u32 bind_address);
+
+#endif /* included_vlibsocket_api_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/sock_test.c b/src/vlibsocket/sock_test.c
new file mode 100644
index 00000000..3bded08f
--- /dev/null
+++ b/src/vlibsocket/sock_test.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#define SOCKCLNT_SERVER_PORT 32741 /* whatever */
+
+typedef signed char i8;
+typedef signed short i16;
+typedef signed int i32;
+typedef signed long long i64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef unsigned long uword;
+
+#define VL_API_PACKED(x) x __attribute__ ((packed))
+
+typedef VL_API_PACKED (struct _vl_api_sockclnt_create
+ {
+ u16 _vl_msg_id; u8 name[64];
+ u32 context;
+ }) vl_api_sockclnt_create_t;
+
+typedef VL_API_PACKED (struct _vl_api_sockclnt_create_reply
+ {
+ u16 _vl_msg_id;
+ i32 response; u64 handle; u32 index; u32 context;
+ }) vl_api_sockclnt_create_reply_t;
+
+typedef VL_API_PACKED (struct _vl_api_sockclnt_delete
+ {
+ u16 _vl_msg_id; u32 index;
+ u64 handle;
+ }) vl_api_sockclnt_delete_t;
+
+typedef VL_API_PACKED (struct _vl_api_sockclnt_delete_reply
+ {
+ u16 _vl_msg_id; i32 response; u64 handle;
+ }) vl_api_sockclnt_delete_reply_t;
+
+void
+error (char *msg)
+{
+ perror (msg);
+ exit (0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ int sockfd, portno, n;
+ struct sockaddr_in serv_addr;
+ struct hostent *server;
+ char buffer[256];
+ int i;
+ u32 nbytes;
+ vl_api_sockclnt_create_t *mp;
+ vl_api_sockclnt_create_reply_t *rp;
+ char *rdptr;
+ int total_bytes;
+
+ for (i = 0; i < 1; i++)
+ {
+ portno = SOCKCLNT_SERVER_PORT;
+ sockfd = socket (AF_INET, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ error ("ERROR opening socket");
+ server = gethostbyname ("localhost");
+ if (server == NULL)
+ {
+ fprintf (stderr, "ERROR, no such host\n");
+ exit (0);
+ }
+ bzero ((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+ bcopy ((char *) server->h_addr,
+ (char *) &serv_addr.sin_addr.s_addr, server->h_length);
+ serv_addr.sin_port = htons (portno);
+ if (connect (sockfd, (const void *) &serv_addr, sizeof (serv_addr)) < 0)
+ error ("ERROR connecting");
+
+ memset (buffer, 0, sizeof (buffer));
+
+ mp = (vl_api_sockclnt_create_t *) buffer;
+ mp->_vl_msg_id = ntohs (8); /* VL_API_SOCKCLNT_CREATE */
+ strncpy ((char *) mp->name, "socket-test", sizeof (mp->name) - 1);
+ mp->name[sizeof (mp->name) - 1] = 0;
+ mp->context = 0xfeedface;
+ /* length of the message, including the length itself */
+ nbytes = sizeof (*mp) + sizeof (nbytes);
+ nbytes = ntohl (nbytes);
+ n = write (sockfd, &nbytes, sizeof (nbytes));
+ if (n < 0)
+ error ("ERROR writing len to socket");
+ n = write (sockfd, mp, sizeof (*mp));
+ if (n < 0)
+ error ("ERROR writing msg to socket");
+
+ memset (buffer, 0, sizeof (buffer));
+
+ total_bytes = 0;
+ rdptr = buffer;
+ do
+ {
+ n = read (sockfd, rdptr, sizeof (buffer) - (rdptr - buffer));
+ if (n < 0)
+ error ("ERROR reading from socket");
+ printf ("read %d bytes\n", n);
+ total_bytes += n;
+ rdptr += n;
+ }
+ while (total_bytes < sizeof (vl_api_sockclnt_create_reply_t) + 4);
+
+ rp = (vl_api_sockclnt_create_reply_t *) (buffer + 4);
+ /* VL_API_SOCKCLNT_CREATE_REPLY */
+ if (ntohs (rp->_vl_msg_id) != 9)
+ {
+ printf ("WARNING: msg id %d\n", ntohs (rp->_vl_msg_id));
+ }
+
+ printf ("response %d, handle 0x%llx, index %d, context 0x%x\n",
+ ntohl (rp->response), rp->handle, rp->index, rp->context);
+ close (sockfd);
+ }
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/sockclnt.api b/src/vlibsocket/sockclnt.api
new file mode 100644
index 00000000..e0a7505f
--- /dev/null
+++ b/src/vlibsocket/sockclnt.api
@@ -0,0 +1,50 @@
+/* Hey Emacs use -*- mode: C -*- */
+
+/*
+ *------------------------------------------------------------------
+ * sockclnt.api - API message(s) to hook up clients, pass traffic
+ * to client processes via TCP sockets
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Create a socket client registration
+ */
+define sockclnt_create {
+ u8 name[64]; /* for show, find by name, whatever */
+ u32 context; /* opaque value to be returned in the reply */
+};
+
+define sockclnt_create_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* handle by which vlib knows this client */
+ u32 index; /* index, used e.g. by API trace replay */
+ u32 context; /* opaque value from the create request */
+};
+
+/*
+ * Delete a client registration
+ */
+define sockclnt_delete {
+ u32 index; /* index, used e.g. by API trace replay */
+ u64 handle; /* handle by which vlib knows this client */
+};
+
+define sockclnt_delete_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* in case the client wonders */
+};
+
diff --git a/src/vlibsocket/sockclnt_vlib.c b/src/vlibsocket/sockclnt_vlib.c
new file mode 100644
index 00000000..760ad944
--- /dev/null
+++ b/src/vlibsocket/sockclnt_vlib.c
@@ -0,0 +1,209 @@
+/*
+ *------------------------------------------------------------------
+ * sockclnt_vlib.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <vppinfra/byte_order.h>
+#include <netdb.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <vlibsocket/vl_socket_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_printfun
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_endianfun
+
+static void
+vl_api_sockclnt_create_reply_t_handler (vl_api_sockclnt_create_reply_t * mp)
+{
+ vl_api_registration_t *rp = socket_main.current_rp;
+
+ rp->server_handle = mp->handle;
+ rp->server_index = mp->index;
+}
+
+static void
+vl_api_sockclnt_delete_reply_t_handler (vl_api_sockclnt_delete_reply_t * mp)
+{
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *uf = socket_main.current_uf;
+ vl_api_registration_t *rp = socket_main.current_rp;
+
+ clib_file_del (fm, uf);
+ vl_free_socket_registration_index (rp->vl_api_registration_pool_index);
+}
+
+u32
+sockclnt_open_index (char *client_name, char *hostname, int port)
+{
+ vl_api_registration_t *rp;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ int sockfd;
+ int one = 1;
+ int rv;
+ struct sockaddr_in serv_addr;
+ struct hostent *server;
+ vl_api_sockclnt_create_t *mp;
+ char my_hostname[64];
+
+ server = gethostbyname (hostname);
+ if (server == NULL)
+ {
+ clib_warning ("Couldn't translate server name %s", hostname);
+ return ~0;
+ }
+
+ /* Set up non-blocking server socket on CLIENT_API_SERVER_PORT */
+ sockfd = socket (AF_INET, SOCK_STREAM, 0);
+
+ if (sockfd < 0)
+ {
+ clib_unix_warning ("socket");
+ return ~0;
+ }
+
+ bzero ((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+ bcopy ((char *) server->h_addr,
+ (char *) &serv_addr.sin_addr.s_addr, server->h_length);
+ serv_addr.sin_port = htons (port);
+
+ if (connect (sockfd, (const void *) &serv_addr, sizeof (serv_addr)) < 0)
+ {
+ clib_unix_warning ("Connect failure to (%s, %d)", hostname, port);
+ close (sockfd);
+ return ~0;
+ }
+
+ rv = ioctl (sockfd, FIONBIO, &one);
+ if (rv < 0)
+ {
+ clib_unix_warning ("FIONBIO");
+ close (sockfd);
+ return ~0;
+ }
+
+ pool_get (socket_main.registration_pool, rp);
+ memset (rp, 0, sizeof (*rp));
+ rp->registration_type = REGISTRATION_TYPE_SOCKET_CLIENT;
+ rp->vl_api_registration_pool_index = rp - socket_main.registration_pool;
+
+ template.read_function = vl_socket_read_ready;
+ template.write_function = vl_socket_write_ready;
+ template.file_descriptor = sockfd;
+ template.private_data = rp - socket_main.registration_pool;
+
+ rp->clib_file_index = clib_file_add (fm, &template);
+ rp->name = format (0, "%s:%d", hostname, port);
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_CREATE);
+ mp->context = rp - socket_main.registration_pool;
+
+ if (gethostname (my_hostname, sizeof (my_hostname)) < 0)
+ {
+ clib_unix_warning ("gethostname");
+ strncpy (my_hostname, "unknown!", sizeof (my_hostname) - 1);
+ }
+ strncpy ((char *) mp->name, my_hostname, sizeof (mp->name) - 1);
+
+ vl_msg_api_send (rp, (u8 *) mp);
+ return rp - socket_main.registration_pool;
+}
+
+void
+sockclnt_close_index (u32 index)
+{
+ vl_api_sockclnt_delete_t *mp;
+ vl_api_registration_t *rp;
+
+ /* Don't crash / assert if fed garbage */
+ if (pool_is_free_index (socket_main.registration_pool, index))
+ {
+ clib_warning ("registration_pool index %d already free", index);
+ return;
+ }
+ rp = pool_elt_at_index (socket_main.registration_pool, index);
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_DELETE);
+ mp->handle = rp->server_handle;
+ mp->index = rp->server_index;
+ vl_msg_api_send (rp, (u8 *) mp);
+}
+
+vl_api_registration_t *
+sockclnt_get_registration (u32 index)
+{
+ return pool_elt_at_index (socket_main.registration_pool, index);
+}
+
+/*
+ * Both rx and tx msgs MUST be initialized, or we'll have
+ * precisely no idea how many bytes to write into the API trace...
+ */
+#define foreach_sockclnt_api_msg \
+_(SOCKCLNT_CREATE_REPLY, sockclnt_create_reply) \
+_(SOCKCLNT_DELETE_REPLY, sockclnt_delete_reply)
+
+
+static clib_error_t *
+sockclnt_vlib_api_init (vlib_main_t * vm)
+{
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_sockclnt_api_msg;
+#undef _
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sockclnt_vlib_api_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/socksvr_vlib.c b/src/vlibsocket/socksvr_vlib.c
new file mode 100644
index 00000000..31b33df5
--- /dev/null
+++ b/src/vlibsocket/socksvr_vlib.c
@@ -0,0 +1,706 @@
+/*
+ *------------------------------------------------------------------
+ * socksvr_vlib.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <vppinfra/byte_order.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include <vlibsocket/api.h>
+#include <vlibmemory/api.h>
+
+#include <vlibsocket/vl_socket_msg_enum.h> /* enumerate all vlib messages */
+
+#define vl_typedefs /* define message structures */
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_printfun
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibsocket/vl_socket_api_h.h>
+#undef vl_endianfun
+
+socket_main_t socket_main;
+
+void
+dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+{
+ vl_api_registration_t *reg;
+ socket_main_t *sm = &socket_main;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f;
+
+ /*
+ * Must have at least one active client, not counting the
+ * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket
+ */
+ if (pool_elts (sm->registration_pool) < 2)
+ return;
+
+ vlib_cli_output (vm, "TCP socket clients");
+ vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc");
+ /* *INDENT-OFF* */
+ pool_foreach (reg, sm->registration_pool,
+ ({
+ if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) {
+ f = pool_elt_at_index (fm->file_pool, reg->clib_file_index);
+ vlib_cli_output (vm, "%16s %8d",
+ reg->name, f->file_descriptor);
+ }
+ }));
+/* *INDENT-ON* */
+}
+
+void
+vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+ u32 nbytes = 4; /* for the length... */
+ u16 msg_id = ntohs (*(u16 *) elem);
+ u32 msg_length;
+ u32 tmp;
+ api_main_t *am = &api_main;
+
+ ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
+
+ if (msg_id >= vec_len (am->api_trace_cfg))
+ {
+ clib_warning ("id out of range: %d", msg_id);
+ vl_msg_api_free ((void *) elem);
+ return;
+ }
+
+ msg_length = am->api_trace_cfg[msg_id].size;
+ nbytes += msg_length;
+ tmp = clib_host_to_net_u32 (nbytes);
+
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ (u8 *) & tmp, sizeof (tmp));
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ elem, msg_length);
+ vl_msg_api_free ((void *) elem);
+}
+
+void
+vl_socket_api_send_with_data (vl_api_registration_t * rp,
+ u8 * elem, u8 * data_vector)
+{
+ u32 nbytes = 4; /* for the length... */
+ u16 msg_id = ntohs (*(u16 *) elem);
+ u32 msg_length;
+ u32 tmp;
+ api_main_t *am = &api_main;
+
+ ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
+
+ if (msg_id >= vec_len (am->api_trace_cfg))
+ {
+ clib_warning ("id out of range: %d", msg_id);
+ vec_free (data_vector);
+ vl_msg_api_free ((void *) elem);
+ return;
+ }
+
+ msg_length = am->api_trace_cfg[msg_id].size;
+ nbytes += msg_length;
+ nbytes += vec_len (data_vector);
+
+ /* Length in network byte order */
+ tmp = clib_host_to_net_u32 (nbytes);
+
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ (u8 *) & tmp, sizeof (tmp));
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ elem, msg_length);
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ data_vector, vec_len (data_vector));
+ vl_msg_api_free ((void *) elem);
+}
+
+static inline void
+vl_socket_api_send_with_length_internal (vl_api_registration_t * rp,
+ u8 * elem, u32 msg_length, int free)
+{
+ u32 nbytes = 4; /* for the length... */
+ u16 msg_id = ntohs (*(u16 *) elem);
+ u32 tmp;
+ api_main_t *am = &api_main;
+
+ ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
+
+ if (msg_id >= vec_len (am->api_trace_cfg))
+ {
+ clib_warning ("id out of range: %d", msg_id);
+ if (free)
+ vl_msg_api_free ((void *) elem);
+ return;
+ }
+
+ nbytes += msg_length;
+
+ /* Length in network byte order */
+ tmp = clib_host_to_net_u32 (nbytes);
+
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ (u8 *) & tmp, sizeof (tmp));
+ vl_socket_add_pending_output (rp->clib_file_index
+ + file_main.file_pool,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ elem, msg_length);
+ if (free)
+ vl_msg_api_free ((void *) elem);
+}
+
+void
+vl_socket_api_send_with_length (vl_api_registration_t * rp,
+ u8 * elem, u32 msg_length)
+{
+ vl_socket_api_send_with_length_internal (rp, elem, msg_length,
+ 1 /* free */ );
+}
+
+void
+vl_socket_api_send_with_length_no_free (vl_api_registration_t * rp,
+ u8 * elem, u32 msg_length)
+{
+ vl_socket_api_send_with_length_internal (rp, elem, msg_length,
+ 0 /* free */ );
+}
+
+void
+vl_free_socket_registration_index (u32 pool_index)
+{
+ vl_api_registration_t *rp;
+ if (pool_is_free_index (socket_main.registration_pool, pool_index))
+ {
+ clib_warning ("main pool index %d already free", pool_index);
+ return;
+ }
+ rp = pool_elt_at_index (socket_main.registration_pool, pool_index);
+
+ ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE);
+ vec_free (rp->name);
+ vec_free (rp->unprocessed_input);
+ vec_free (rp->output_vector);
+ rp->registration_type = REGISTRATION_TYPE_FREE;
+ pool_put (socket_main.registration_pool, rp);
+}
+
+static inline void
+socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp,
+ i8 * input_v)
+{
+ u8 *the_msg = (u8 *) (input_v + sizeof (u32));
+ socket_main.current_uf = uf;
+ socket_main.current_rp = rp;
+ vl_msg_api_socket_handler (the_msg);
+ socket_main.current_uf = 0;
+ socket_main.current_rp = 0;
+}
+
+clib_error_t *
+vl_socket_read_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ vl_api_registration_t *rp;
+ int n;
+ i8 *msg_buffer = 0;
+ u32 msg_len;
+ u32 save_input_buffer_length = vec_len (socket_main.input_buffer);
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+
+ n = read (uf->file_descriptor, socket_main.input_buffer,
+ vec_len (socket_main.input_buffer));
+
+ if (n <= 0 && errno != EAGAIN)
+ {
+ clib_file_del (fm, uf);
+
+ if (!pool_is_free (socket_main.registration_pool, rp))
+ {
+ u32 index = rp - socket_main.registration_pool;
+ vl_free_socket_registration_index (index);
+ }
+ else
+ {
+ clib_warning ("client index %d already free?",
+ rp->vl_api_registration_pool_index);
+ }
+ return 0;
+ }
+
+ _vec_len (socket_main.input_buffer) = n;
+
+ /*
+ * Look for bugs here. This code is tricky because
+ * data read from a stream socket does honor message
+ * boundaries. In the case of a long message (>4K bytes)
+ * we have to do (at least) 2 reads, etc.
+ */
+ do
+ {
+ if (vec_len (rp->unprocessed_input))
+ {
+ vec_append (rp->unprocessed_input, socket_main.input_buffer);
+ msg_buffer = rp->unprocessed_input;
+ msg_len = rp->unprocessed_msg_length;
+ }
+ else
+ {
+ msg_buffer = socket_main.input_buffer;
+ msg_len = 0;
+ }
+
+ if (msg_len == 0)
+ {
+ /* Length may be split across two reads */
+ if (vec_len (msg_buffer) < sizeof (u32))
+ goto save_and_split;
+
+ /* total length, including msg_len itself, in network byte order */
+ msg_len = clib_net_to_host_u32 (*((u32 *) msg_buffer));
+ }
+
+ /* Happens if the client sent msg_len == 0 */
+ if (msg_len == 0)
+ {
+ clib_warning ("msg_len == 0");
+ goto turf_it;
+ }
+
+ /* We don't have the entire message yet. */
+ if (msg_len > vec_len (msg_buffer))
+ {
+ save_and_split:
+ /*
+ * if we were using the shared input buffer,
+ * save the fragment.
+ */
+ if (msg_buffer == socket_main.input_buffer)
+ {
+ ASSERT (vec_len (rp->unprocessed_input) == 0);
+ vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1);
+ clib_memcpy (rp->unprocessed_input, msg_buffer,
+ vec_len (msg_buffer));
+ _vec_len (rp->unprocessed_input) = vec_len (msg_buffer);
+ }
+ _vec_len (socket_main.input_buffer) = save_input_buffer_length;
+ rp->unprocessed_msg_length = msg_len;
+ return 0;
+ }
+
+ socket_process_msg (uf, rp, msg_buffer);
+ if (n > msg_len)
+ vec_delete (msg_buffer, msg_len, 0);
+ else
+ _vec_len (msg_buffer) = 0;
+ n -= msg_len;
+ msg_len = 0;
+ rp->unprocessed_msg_length = 0;
+ }
+ while (n > 0);
+
+turf_it:
+ _vec_len (socket_main.input_buffer) = save_input_buffer_length;
+
+ return 0;
+}
+
+void
+vl_socket_add_pending_output (clib_file_t * uf,
+ vl_api_registration_t * rp,
+ u8 * buffer, uword buffer_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_add (rp->output_vector, buffer, buffer_bytes);
+ if (vec_len (rp->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+static void
+socket_del_pending_output (clib_file_t * uf,
+ vl_api_registration_t * rp, uword n_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_delete (rp->output_vector, n_bytes, 0);
+ if (vec_len (rp->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+clib_error_t *
+vl_socket_write_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ vl_api_registration_t *rp;
+ int n;
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+
+ /* Flush output vector. */
+ n = write (uf->file_descriptor,
+ rp->output_vector, vec_len (rp->output_vector));
+
+ if (n < 0)
+ {
+#if DEBUG > 2
+ clib_warning ("write error, close the file...\n");
+#endif
+ clib_file_del (fm, uf);
+
+ vl_free_socket_registration_index (rp - socket_main.registration_pool);
+ return 0;
+ }
+
+ else if (n > 0)
+ socket_del_pending_output (uf, rp, n);
+
+ return 0;
+}
+
+clib_error_t *
+vl_socket_error_ready (clib_file_t * uf)
+{
+ vl_api_registration_t *rp;
+ clib_file_main_t *fm = &file_main;
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+ clib_file_del (fm, uf);
+ vl_free_socket_registration_index (rp - socket_main.registration_pool);
+
+ return 0;
+}
+
+void
+socksvr_file_add (clib_file_main_t * fm, int fd)
+{
+ vl_api_registration_t *rp;
+ clib_file_t template = { 0 };
+
+ pool_get (socket_main.registration_pool, rp);
+ memset (rp, 0, sizeof (*rp));
+
+ template.read_function = vl_socket_read_ready;
+ template.write_function = vl_socket_write_ready;
+ template.error_function = vl_socket_error_ready;
+ template.file_descriptor = fd;
+ template.private_data = rp - socket_main.registration_pool;
+
+ rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER;
+ rp->vl_api_registration_pool_index = rp - socket_main.registration_pool;
+ rp->clib_file_index = clib_file_add (fm, &template);
+}
+
+static clib_error_t *
+socksvr_accept_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ struct sockaddr_in client_addr;
+ int client_fd;
+ int client_len;
+
+ client_len = sizeof (client_addr);
+
+ /*
+ * Supposedly acquires the non-blocking attrib from the
+ * server socket.
+ */
+ client_fd = accept (uf->file_descriptor,
+ (struct sockaddr *) &client_addr,
+ (socklen_t *) & client_len);
+
+ if (client_fd < 0)
+ return clib_error_return_unix (0, "socksvr_accept_ready: accept");
+
+ socksvr_file_add (fm, client_fd);
+ return 0;
+}
+
+static clib_error_t *
+socksvr_bogus_write (clib_file_t * uf)
+{
+ clib_warning ("why am I here?");
+ return 0;
+}
+
+/*
+ * vl_api_sockclnt_create_t_handler
+ */
+void
+vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
+{
+ vl_api_registration_t *regp;
+ vl_api_sockclnt_create_reply_t *rp;
+ int rv = 1;
+
+ regp = socket_main.current_rp;
+
+ ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER);
+
+ regp->name = format (0, "%s%c", mp->name, 0);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY);
+ rp->handle = (uword) regp;
+ rp->index = (uword) regp->vl_api_registration_pool_index;
+ rp->context = mp->context;
+ rp->response = htonl (rv);
+
+ vl_msg_api_send (regp, (u8 *) rp);
+}
+
+/*
+ * vl_api_sockclnt_delete_t_handler
+ */
+void
+vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp)
+{
+ vl_api_registration_t *regp;
+ vl_api_sockclnt_delete_reply_t *rp;
+
+ if (!pool_is_free_index (socket_main.registration_pool, mp->index))
+ {
+ regp = pool_elt_at_index (socket_main.registration_pool, mp->index);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY);
+ rp->handle = mp->handle;
+ rp->response = htonl (1);
+
+ vl_msg_api_send (regp, (u8 *) rp);
+
+ clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index);
+
+ vl_free_socket_registration_index (mp->index);
+ }
+ else
+ {
+ clib_warning ("unknown client ID %d", mp->index);
+ }
+}
+
+#define foreach_vlib_api_msg \
+_(SOCKCLNT_CREATE, sockclnt_create) \
+_(SOCKCLNT_DELETE, sockclnt_delete)
+
+static clib_error_t *
+socksvr_api_init (vlib_main_t * vm)
+{
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ int sockfd;
+ int one = 1;
+ int rv;
+ struct sockaddr_in serv_addr;
+ vl_api_registration_t *rp;
+ u16 portno;
+ u32 bind_address;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vlib_api_msg;
+#undef _
+
+ vec_resize (socket_main.input_buffer, 4096);
+
+ /* Set up non-blocking server socket on CLIENT_API_SERVER_PORT */
+ sockfd = socket (AF_INET, SOCK_STREAM, 0);
+
+ if (sockfd < 0)
+ {
+ return clib_error_return_unix (0, "socket");
+ }
+
+ rv = ioctl (sockfd, FIONBIO, &one);
+ if (rv < 0)
+ {
+ close (sockfd);
+ return clib_error_return_unix (0, "FIONBIO");
+ }
+
+ rv = setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one));
+ if (rv < 0)
+ {
+ close (sockfd);
+ return clib_error_return_unix (0, "SO_REUSEADDR");
+ }
+
+ bzero ((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+
+ if (socket_main.bind_address)
+ bind_address = socket_main.bind_address;
+ else
+ bind_address = INADDR_LOOPBACK;
+
+ if (socket_main.portno)
+ portno = socket_main.portno;
+ else
+ portno = SOCKSVR_DEFAULT_PORT;
+
+ serv_addr.sin_port = clib_host_to_net_u16 (portno);
+ serv_addr.sin_addr.s_addr = clib_host_to_net_u32 (bind_address);
+
+ if (bind (sockfd, (struct sockaddr *) &serv_addr, sizeof (serv_addr)) < 0)
+ {
+ close (sockfd);
+ return clib_error_return_unix (0, "bind");
+ }
+
+ rv = listen (sockfd, 5);
+ if (rv < 0)
+ {
+ close (sockfd);
+ return clib_error_return_unix (0, "listen");
+ }
+
+ pool_get (socket_main.registration_pool, rp);
+ memset (rp, 0, sizeof (*rp));
+
+ rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN;
+
+ template.read_function = socksvr_accept_ready;
+ template.write_function = socksvr_bogus_write;
+ template.file_descriptor = sockfd;
+ template.private_data = rp - socket_main.registration_pool;
+
+ rp->clib_file_index = clib_file_add (fm, &template);
+ return 0;
+}
+
+static clib_error_t *
+socket_exit (vlib_main_t * vm)
+{
+ clib_file_main_t *fm = &file_main;
+ vl_api_registration_t *rp;
+
+ /* Defensive driving in case something wipes out early */
+ if (socket_main.registration_pool)
+ {
+ u32 index;
+ /* *INDENT-OFF* */
+ pool_foreach (rp, socket_main.registration_pool, ({
+ clib_file_del (fm, fm->file_pool + rp->clib_file_index);
+ index = rp->vl_api_registration_pool_index;
+ vl_free_socket_registration_index (index);
+ }));
+/* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit);
+
+static clib_error_t *
+socksvr_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ int portno;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "port %d", &portno))
+ {
+ socket_main.portno = portno;
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+ return socksvr_api_init (vm);
+}
+
+VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr");
+
+/* argument in host byte order */
+void
+socksvr_set_port (u16 port)
+{
+ socket_main.portno = port;
+}
+
+/* argument in host byte order */
+void
+socksvr_set_bind_address (u32 bind_address)
+{
+ socket_main.bind_address = bind_address;
+}
+
+clib_error_t *
+vlibsocket_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vlibsocket_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/vl_socket_api_h.h b/src/vlibsocket/vl_socket_api_h.h
new file mode 100644
index 00000000..7fc53cef
--- /dev/null
+++ b/src/vlibsocket/vl_socket_api_h.h
@@ -0,0 +1,33 @@
+/*
+ *------------------------------------------------------------------
+ * vl_socket_api_h.h - all API headers, in a specific order.
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Add to the bottom of the #include list, or elves will steal your
+ * keyboard in the middle of the night!
+ */
+#include <vlibmemory/vl_memory_api_h.h>
+#include <vlibsocket/sockclnt.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibsocket/vl_socket_msg_enum.h b/src/vlibsocket/vl_socket_msg_enum.h
new file mode 100644
index 00000000..cdc61a5b
--- /dev/null
+++ b/src/vlibsocket/vl_socket_msg_enum.h
@@ -0,0 +1,42 @@
+/*
+ *------------------------------------------------------------------
+ * vl_msg_enum.h - Our view of how to number API messages
+ * Clients have their own view, which has to agree with ours.
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __VL_MSG_ENUM_H__
+#define __VL_MSG_ENUM_H__
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+ VL_ILLEGAL_MESSAGE_ID = 0,
+#include <vlibsocket/vl_socket_api_h.h>
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* __VL_MSG_ENUM_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet.am b/src/vnet.am
new file mode 100644
index 00000000..1cec3a75
--- /dev/null
+++ b/src/vnet.am
@@ -0,0 +1,1074 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lib_LTLIBRARIES += libvnet.la
+
+libvnet_la_SOURCES =
+libvnet_la_DEPENDENCIES = \
+ libvppinfra.la \
+ libvlib.la \
+ libsvmdb.la \
+ libsvm.la \
+ libvlibmemory.la
+
+libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt
+
+if WITH_LIBSSL
+libvnet_la_LIBADD += -lcrypto
+endif
+
+########################################
+# Generic stuff
+########################################
+libvnet_la_SOURCES += \
+ vnet/config.c \
+ vnet/devices/devices.c \
+ vnet/handoff.c \
+ vnet/interface.c \
+ vnet/interface_api.c \
+ vnet/interface_cli.c \
+ vnet/interface_format.c \
+ vnet/interface_output.c \
+ vnet/misc.c \
+ vnet/replication.c
+
+nobase_include_HEADERS += \
+ vnet/api_errno.h \
+ vnet/buffer.h \
+ vnet/config.h \
+ vnet/devices/devices.h \
+ vnet/global_funcs.h \
+ vnet/handoff.h \
+ vnet/interface.h \
+ vnet/interface.api.h \
+ vnet/interface_funcs.h \
+ vnet/ip/ip4_to_ip6.h \
+ vnet/ip/ip6_to_ip4.h \
+ vnet/l3_types.h \
+ vnet/pipeline.h \
+ vnet/replication.h \
+ vnet/vnet.h \
+ vnet/vnet_all_api_h.h \
+ vnet/vnet_msg_enum.h \
+ vnet/util/radix.h
+
+API_FILES += vnet/interface.api
+
+########################################
+# Policer infra
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/policer/node_funcs.c \
+ vnet/policer/policer.c \
+ vnet/policer/xlate.c \
+ vnet/policer/policer_api.c
+
+nobase_include_HEADERS += \
+ vnet/policer/police.h \
+ vnet/policer/policer.h \
+ vnet/policer/xlate.h \
+ vnet/policer/policer.api.h
+
+API_FILES += vnet/policer/policer.api
+
+########################################
+# Cop - junk filter
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/cop/cop.c \
+ vnet/cop/node1.c \
+ vnet/cop/ip4_whitelist.c \
+ vnet/cop/ip6_whitelist.c \
+ vnet/cop/cop_api.c
+
+nobase_include_HEADERS += \
+ vnet/cop/cop.h \
+ vnet/cop/cop.api.h
+
+API_FILES += vnet/cop/cop.api
+
+########################################
+# Layer 2 protocols go here
+########################################
+
+########################################
+# Layer 2 protocol: Ethernet
+########################################
+libvnet_la_SOURCES += \
+ vnet/ethernet/arp.c \
+ vnet/ethernet/format.c \
+ vnet/ethernet/init.c \
+ vnet/ethernet/interface.c \
+ vnet/ethernet/node.c \
+ vnet/ethernet/pg.c \
+ vnet/ethernet/sfp.c \
+ vnet/ethernet/p2p_ethernet.c \
+ vnet/ethernet/p2p_ethernet_input.c \
+ vnet/ethernet/p2p_ethernet_api.c
+
+nobase_include_HEADERS += \
+ vnet/ethernet/arp_packet.h \
+ vnet/ethernet/error.def \
+ vnet/ethernet/ethernet.h \
+ vnet/ethernet/packet.h \
+ vnet/ethernet/types.def \
+ vnet/ethernet/sfp.h \
+ vnet/ethernet/p2p_ethernet.api.h \
+ vnet/ethernet/p2p_ethernet.h
+
+API_FILES += vnet/ethernet/p2p_ethernet.api
+
+########################################
+# Layer 2 protocol: Ethernet bridging
+########################################
+libvnet_la_SOURCES += \
+ vnet/l2/feat_bitmap.c \
+ vnet/l2/l2_api.c \
+ vnet/l2/l2_bd.c \
+ vnet/l2/l2_bvi.c \
+ vnet/l2/l2_input_classify.c \
+ vnet/l2/l2_output_classify.c \
+ vnet/l2/l2_efp_filter.c \
+ vnet/l2/l2_fib.c \
+ vnet/l2/l2_flood.c \
+ vnet/l2/l2_fwd.c \
+ vnet/l2/l2_input_acl.c \
+ vnet/l2/l2_input.c \
+ vnet/l2/l2_input_vtr.c \
+ vnet/l2/l2_learn.c \
+ vnet/l2/l2_output_acl.c \
+ vnet/l2/l2_output.c \
+ vnet/l2/l2_patch.c \
+ vnet/l2/l2_rw.c \
+ vnet/l2/l2_vtr.c \
+ vnet/l2/l2_xcrw.c
+
+nobase_include_HEADERS += \
+ vnet/l2/feat_bitmap.h \
+ vnet/l2/l2_input.h \
+ vnet/l2/l2_output.h \
+ vnet/l2/l2_vtr.h \
+ vnet/l2/l2_input_vtr.h \
+ vnet/l2/l2_efp_filter.h \
+ vnet/l2/l2_fwd.h \
+ vnet/l2/l2_bd.h \
+ vnet/l2/l2_bvi.h \
+ vnet/l2/l2_flood.h \
+ vnet/l2/l2_fib.h \
+ vnet/l2/l2_rw.h \
+ vnet/l2/l2_xcrw.h \
+ vnet/l2/l2_classify.h \
+ vnet/l2/l2.api.h
+
+API_FILES += vnet/l2/l2.api
+
+########################################
+# Layer 2 protocol: SRP
+########################################
+libvnet_la_SOURCES += \
+ vnet/srp/format.c \
+ vnet/srp/interface.c \
+ vnet/srp/node.c \
+ vnet/srp/pg.c
+
+nobase_include_HEADERS += \
+ vnet/srp/packet.h \
+ vnet/srp/srp.h
+
+########################################
+# Layer 2 protocol: PPP
+########################################
+libvnet_la_SOURCES += \
+ vnet/ppp/node.c \
+ vnet/ppp/pg.c \
+ vnet/ppp/ppp.c
+
+nobase_include_HEADERS += \
+ vnet/ppp/error.def \
+ vnet/ppp/ppp.h \
+ vnet/ppp/packet.h
+
+########################################
+# Layer 2 protocol: HDLC
+########################################
+libvnet_la_SOURCES += \
+ vnet/hdlc/node.c \
+ vnet/hdlc/pg.c \
+ vnet/hdlc/hdlc.c
+
+nobase_include_HEADERS += \
+ vnet/hdlc/error.def \
+ vnet/hdlc/hdlc.h \
+ vnet/hdlc/packet.h
+
+########################################
+# Layer 2 protocol: LLC
+########################################
+libvnet_la_SOURCES += \
+ vnet/llc/llc.c \
+ vnet/llc/node.c \
+ vnet/llc/pg.c
+
+nobase_include_HEADERS += \
+ vnet/llc/llc.h
+
+########################################
+# Layer 2 protocol: SNAP
+########################################
+libvnet_la_SOURCES += \
+ vnet/snap/snap.c \
+ vnet/snap/node.c \
+ vnet/snap/pg.c
+
+nobase_include_HEADERS += \
+ vnet/snap/snap.h
+
+########################################
+# Layer 2 / vxlan
+########################################
+libvnet_la_SOURCES += \
+ vnet/vxlan/vxlan.c \
+ vnet/vxlan/encap.c \
+ vnet/vxlan/decap.c \
+ vnet/vxlan/vxlan_api.c
+
+nobase_include_HEADERS += \
+ vnet/vxlan/vxlan.h \
+ vnet/vxlan/vxlan_packet.h \
+ vnet/vxlan/vxlan_error.def \
+ vnet/vxlan/vxlan.api.h
+
+API_FILES += vnet/vxlan/vxlan.api
+
+########################################
+# Layer 2 / CDP
+########################################
+libvnet_la_SOURCES += \
+ vnet/cdp/cdp_input.c \
+ vnet/cdp/cdp_node.c \
+ vnet/cdp/cdp_periodic.c
+
+nobase_include_HEADERS += \
+ vnet/cdp/cdp_protocol.h
+
+########################################
+# Layer 2 / LLDP
+########################################
+libvnet_la_SOURCES += \
+ vnet/lldp/lldp_input.c \
+ vnet/lldp/lldp_node.c \
+ vnet/lldp/lldp_output.c \
+ vnet/lldp/lldp_cli.c \
+ vnet/lldp/lldp_api.c
+
+nobase_include_HEADERS += \
+ vnet/lldp/lldp_protocol.h \
+ vnet/lldp/lldp.h \
+ vnet/lldp/lldp.api.h
+
+API_FILES += vnet/lldp/lldp.api
+
+########################################
+# Layer 2/3 "classify"
+########################################
+libvnet_la_SOURCES += \
+ vnet/classify/vnet_classify.c \
+ vnet/classify/ip_classify.c \
+ vnet/classify/input_acl.c \
+ vnet/classify/policer_classify.c \
+ vnet/classify/flow_classify.c \
+ vnet/classify/flow_classify_node.c \
+ vnet/classify/vnet_classify.h \
+ vnet/classify/classify_api.c
+
+nobase_include_HEADERS += \
+ vnet/classify/vnet_classify.h \
+ vnet/classify/input_acl.h \
+ vnet/classify/policer_classify.h \
+ vnet/classify/flow_classify.h \
+ vnet/classify/classify.api.h
+
+API_FILES += vnet/classify/classify.api
+
+########################################
+# Layer 3 protocols go here
+########################################
+
+########################################
+# Layer 3 protocol: IP v4/v6
+########################################
+libvnet_la_SOURCES += \
+ vnet/ip/format.c \
+ vnet/ip/icmp4.c \
+ vnet/ip/icmp6.c \
+ vnet/ip/ip46_cli.c \
+ vnet/ip/ip4_format.c \
+ vnet/ip/ip4_forward.c \
+ vnet/ip/ip4_input.c \
+ vnet/ip/ip4_mtrie.c \
+ vnet/ip/ip4_pg.c \
+ vnet/ip/ip4_source_and_port_range_check.c \
+ vnet/ip/ip4_source_check.c \
+ vnet/ip/ip6_format.c \
+ vnet/ip/ip6_forward.c \
+ vnet/ip/ip6_hop_by_hop.c \
+ vnet/ip/ip6_input.c \
+ vnet/ip/ip6_neighbor.c \
+ vnet/ip/ip6_pg.c \
+ vnet/ip/ip_api.c \
+ vnet/ip/ip_checksum.c \
+ vnet/ip/ip_frag.c \
+ vnet/ip/ip.h \
+ vnet/ip/ip_init.c \
+ vnet/ip/ip_input_acl.c \
+ vnet/ip/lookup.c \
+ vnet/ip/ping.c \
+ vnet/ip/punt.c
+
+nobase_include_HEADERS += \
+ vnet/ip/format.h \
+ vnet/ip/icmp46_packet.h \
+ vnet/ip/icmp4.h \
+ vnet/ip/icmp6.h \
+ vnet/ip/igmp_packet.h \
+ vnet/ip/ip.api.h \
+ vnet/ip/ip4_error.h \
+ vnet/ip/ip4.h \
+ vnet/ip/ip4_mtrie.h \
+ vnet/ip/ip4_packet.h \
+ vnet/ip/ip6_error.h \
+ vnet/ip/ip6.h \
+ vnet/ip/ip6_hop_by_hop.h \
+ vnet/ip/ip6_hop_by_hop_packet.h \
+ vnet/ip/ip6_packet.h \
+ vnet/ip/ip6_neighbor.h \
+ vnet/ip/ip.h \
+ vnet/ip/ip_packet.h \
+ vnet/ip/ip_source_and_port_range_check.h \
+ vnet/ip/lookup.h \
+ vnet/ip/ports.def \
+ vnet/ip/protocols.def \
+ vnet/ip/punt_error.def \
+ vnet/ip/punt.h
+
+API_FILES += vnet/ip/ip.api
+
+########################################
+# Bidirectional Forwarding Detection
+########################################
+
+nobase_include_HEADERS += \
+ vnet/bfd/bfd_protocol.h \
+ vnet/bfd/bfd_main.h \
+ vnet/bfd/bfd_api.h \
+ vnet/bfd/bfd_udp.h \
+ vnet/bfd/bfd.api.h
+
+libvnet_la_SOURCES += \
+ vnet/bfd/bfd_api.h \
+ vnet/bfd/bfd_udp.c \
+ vnet/bfd/bfd_main.c \
+ vnet/bfd/bfd_protocol.c \
+ vnet/bfd/bfd_cli.c \
+ vnet/bfd/bfd_api.c
+
+API_FILES += vnet/bfd/bfd.api
+
+########################################
+# Layer 3 protocol: IPSec
+########################################
+if WITH_LIBSSL
+libvnet_la_SOURCES += \
+ vnet/ipsec/ipsec.c \
+ vnet/ipsec/ipsec_cli.c \
+ vnet/ipsec/ipsec_format.c \
+ vnet/ipsec/ipsec_input.c \
+ vnet/ipsec/ipsec_if.c \
+ vnet/ipsec/ipsec_if_in.c \
+ vnet/ipsec/ipsec_if_out.c \
+ vnet/ipsec/esp_encrypt.c \
+ vnet/ipsec/esp_decrypt.c \
+ vnet/ipsec/ikev2.c \
+ vnet/ipsec/ikev2_crypto.c \
+ vnet/ipsec/ikev2_cli.c \
+ vnet/ipsec/ikev2_payload.c \
+ vnet/ipsec/ikev2_format.c \
+ vnet/ipsec/ipsec_api.c
+
+API_FILES += vnet/ipsec/ipsec.api
+endif
+
+libvnet_la_SOURCES += \
+ vnet/ipsec/ipsec_output.c
+
+nobase_include_HEADERS += \
+ vnet/ipsec/ipsec.h \
+ vnet/ipsec/esp.h \
+ vnet/ipsec/ikev2.h \
+ vnet/ipsec/ikev2_priv.h \
+ vnet/ipsec/ipsec.api.h
+
+########################################
+# Layer 3 protocol: osi
+########################################
+libvnet_la_SOURCES += \
+ vnet/osi/node.c \
+ vnet/osi/osi.c \
+ vnet/osi/pg.c
+
+nobase_include_HEADERS += \
+ vnet/osi/osi.h
+
+########################################
+# Layer 3 protocol: MAP
+########################################
+libvnet_la_SOURCES += \
+ vnet/map/map.c \
+ vnet/map/map_dpo.c \
+ vnet/map/ip4_map.c \
+ vnet/map/ip6_map.c \
+ vnet/map/ip4_map_t.c \
+ vnet/map/ip6_map_t.c \
+ vnet/map/map_api.c
+
+nobase_include_HEADERS += \
+ vnet/map/map.h \
+ vnet/map/map_dpo.h \
+ vnet/map/map.api.h
+
+API_FILES += vnet/map/map.api
+
+if ENABLE_TESTS
+TESTS += test_map
+test_map_SOURCES = \
+ vnet/map/test.c
+test_map_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+
+test_map_LDADD = libvnet.la libvppinfra.la libvlib.la \
+ -lpthread -lvlibmemory -ldl -lsvm -lrt
+
+test_map_LDFLAGS = -static
+endif
+
+########################################
+# Layer 4 protocol: tcp
+########################################
+libvnet_la_SOURCES += \
+ vnet/tcp/tcp_api.c \
+ vnet/tcp/tcp_format.c \
+ vnet/tcp/tcp_pg.c \
+ vnet/tcp/tcp_syn_filter4.c \
+ vnet/tcp/tcp_output.c \
+ vnet/tcp/tcp_input.c \
+ vnet/tcp/tcp_newreno.c \
+ vnet/tcp/builtin_client.c \
+ vnet/tcp/builtin_server.c \
+ vnet/tcp/builtin_http_server.c \
+ vnet/tcp/builtin_proxy.c \
+ vnet/tcp/tcp_test.c \
+ vnet/tcp/tcp.c
+
+nobase_include_HEADERS += \
+ vnet/tcp/tcp_packet.h \
+ vnet/tcp/tcp_timer.h \
+ vnet/tcp/tcp_debug.h \
+ vnet/tcp/tcp.h \
+ vnet/tcp/tcp.api.h
+
+API_FILES += vnet/tcp/tcp.api
+
+########################################
+# Layer 4 protocol: udp
+########################################
+libvnet_la_SOURCES += \
+ vnet/udp/udp.c \
+ vnet/udp/udp_input.c \
+ vnet/udp/builtin_server.c \
+ vnet/udp/udp_format.c \
+ vnet/udp/udp_local.c \
+ vnet/udp/udp_pg.c
+
+nobase_include_HEADERS += \
+ vnet/udp/udp_error.def \
+ vnet/udp/udp.h \
+ vnet/udp/udp_packet.h
+
+########################################
+# Tunnel protocol: gre
+########################################
+libvnet_la_SOURCES += \
+ vnet/gre/gre.c \
+ vnet/gre/node.c \
+ vnet/gre/interface.c \
+ vnet/gre/pg.c \
+ vnet/gre/gre_api.c
+
+nobase_include_HEADERS += \
+ vnet/gre/gre.h \
+ vnet/gre/packet.h \
+ vnet/gre/error.def \
+ vnet/gre/gre.api.h
+
+API_FILES += vnet/gre/gre.api
+
+########################################
+# Tunnel protocol: l2tpv3
+########################################
+libvnet_la_SOURCES += \
+ vnet/l2tp/l2tp.c \
+ vnet/l2tp/encap.c \
+ vnet/l2tp/decap.c \
+ vnet/l2tp/pg.c \
+ vnet/l2tp/l2tp_api.c
+
+nobase_include_HEADERS += \
+ vnet/l2tp/l2tp.h \
+ vnet/l2tp/packet.h \
+ vnet/l2tp/l2tp.api.h
+
+API_FILES += vnet/l2tp/l2tp.api
+
+########################################
+# Tunnel protocol: gre+mpls
+########################################
+libvnet_la_SOURCES += \
+ vnet/mpls/mpls.c \
+ vnet/mpls/mpls_lookup.c \
+ vnet/mpls/mpls_output.c \
+ vnet/mpls/mpls_features.c \
+ vnet/mpls/mpls_input.c \
+ vnet/mpls/interface.c \
+ vnet/mpls/mpls_tunnel.c \
+ vnet/mpls/pg.c \
+ vnet/mpls/mpls_api.c
+
+nobase_include_HEADERS += \
+ vnet/mpls/mpls.h \
+ vnet/mpls/mpls_types.h \
+ vnet/mpls/mpls_tunnel.h \
+ vnet/mpls/packet.h \
+ vnet/mpls/error.def \
+ vnet/mpls/mpls.api.h
+
+API_FILES += vnet/mpls/mpls.api
+
+########################################
+# Tunnel protocol: vxlan-gpe
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/vxlan-gpe/vxlan_gpe.c \
+ vnet/vxlan-gpe/encap.c \
+ vnet/vxlan-gpe/decap.c \
+ vnet/vxlan-gpe/vxlan_gpe_api.c
+
+nobase_include_HEADERS += \
+ vnet/vxlan-gpe/vxlan_gpe.h \
+ vnet/vxlan-gpe/vxlan_gpe_packet.h \
+ vnet/vxlan-gpe/vxlan_gpe_error.def \
+ vnet/vxlan-gpe/vxlan_gpe.api.h
+
+API_FILES += vnet/vxlan-gpe/vxlan_gpe.api
+
+########################################
+# Tunnel protocol: ipsec+gre
+########################################
+libvnet_la_SOURCES += \
+ vnet/ipsec-gre/ipsec_gre.c \
+ vnet/ipsec-gre/node.c \
+ vnet/ipsec-gre/interface.c \
+ vnet/ipsec-gre/ipsec_gre_api.c
+
+nobase_include_HEADERS += \
+ vnet/ipsec-gre/ipsec_gre.h \
+ vnet/ipsec-gre/error.def \
+ vnet/ipsec-gre/ipsec_gre.api.h
+
+API_FILES += vnet/ipsec-gre/ipsec_gre.api
+
+########################################
+# LISP control plane: lisp-cp
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/lisp-cp/lisp_types.c \
+ vnet/lisp-cp/lisp_cp_dpo.c \
+ vnet/lisp-cp/control.c \
+ vnet/lisp-cp/gid_dictionary.c \
+ vnet/lisp-cp/lisp_msg_serdes.c \
+ vnet/lisp-cp/packets.c \
+ vnet/lisp-cp/one_cli.c \
+ vnet/lisp-cp/lisp_cli.c \
+ vnet/lisp-cp/one_api.c \
+ vnet/lisp-cp/lisp_api.c
+
+nobase_include_HEADERS += \
+ vnet/lisp-cp/lisp_types.h \
+ vnet/lisp-cp/packets.h \
+ vnet/lisp-cp/gid_dictionary.h \
+ vnet/lisp-cp/lisp_cp_messages.h \
+ vnet/lisp-cp/lisp_msg_serdes.h \
+ vnet/lisp-cp/control.h \
+ vnet/lisp-cp/one.api.h \
+ vnet/lisp-cp/lisp.api.h
+
+API_FILES += vnet/lisp-cp/lisp.api
+API_FILES += vnet/lisp-cp/one.api
+
+if ENABLE_TESTS
+LDS = \
+ libvppinfra.la \
+ libvnet.la \
+ libvlib.la \
+ libsvm.la \
+ libsvmdb.la \
+ libvlibmemory.la \
+ -lpthread -ldl -lrt -lm
+
+TESTS += test_cp_serdes test_lisp_types
+
+test_cp_serdes_SOURCES = \
+ tests/vnet/lisp-cp/test_cp_serdes.c \
+ vnet/lisp-cp/lisp_msg_serdes.c \
+ vnet/lisp-cp/lisp_types.c \
+ vnet/lisp-cp/packets.c \
+ vnet/ip/ip_checksum.c
+
+test_lisp_types_SOURCES = \
+ tests/vnet/lisp-cp/test_lisp_types.c \
+ vnet/lisp-cp/lisp_types.c
+
+test_cp_serdes_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_lisp_types_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+
+test_cp_serdes_LDADD = $(LDS)
+test_lisp_types_LDADD = $(LDS)
+endif
+
+########################################
+# Tunnel protocol: lisp-gpe
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/lisp-gpe/lisp_gpe.c \
+ vnet/lisp-gpe/lisp_gpe_sub_interface.c \
+ vnet/lisp-gpe/lisp_gpe_adjacency.c \
+ vnet/lisp-gpe/lisp_gpe_tunnel.c \
+ vnet/lisp-gpe/lisp_gpe_fwd_entry.c \
+ vnet/lisp-gpe/lisp_gpe_tenant.c \
+ vnet/lisp-gpe/interface.c \
+ vnet/lisp-gpe/decap.c \
+ vnet/lisp-gpe/lisp_gpe_api.c
+
+nobase_include_HEADERS += \
+ vnet/lisp-gpe/lisp_gpe.h \
+ vnet/lisp-gpe/lisp_gpe_fwd_entry.h \
+ vnet/lisp-gpe/lisp_gpe_tenant.h \
+ vnet/lisp-gpe/lisp_gpe_packet.h \
+ vnet/lisp-gpe/lisp_gpe_error.def \
+ vnet/lisp-gpe/lisp_gpe.api.h
+
+API_FILES += vnet/lisp-gpe/lisp_gpe.api
+
+########################################
+# DHCP client
+########################################
+libvnet_la_SOURCES += \
+ vnet/dhcp/client.c \
+ vnet/dhcp/client.h \
+ vnet/dhcp/dhcp_api.c
+
+nobase_include_HEADERS += \
+ vnet/dhcp/client.h \
+ vnet/dhcp/dhcp.api.h
+
+API_FILES += vnet/dhcp/dhcp.api
+
+########################################
+# DHCP proxy
+########################################
+libvnet_la_SOURCES += \
+ vnet/dhcp/dhcp6_proxy_node.c \
+ vnet/dhcp/dhcp4_proxy_node.c \
+ vnet/dhcp/dhcp_proxy.c
+
+nobase_include_HEADERS += \
+ vnet/dhcp/dhcp4_packet.h \
+ vnet/dhcp/dhcp6_packet.h \
+ vnet/dhcp/dhcp_proxy.h \
+ vnet/dhcp/dhcp6_proxy_error.def \
+ vnet/dhcp/dhcp4_proxy_error.def
+
+########################################
+# ipv6 segment routing
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/srv6/sr.c \
+ vnet/srv6/sr_localsid.c \
+ vnet/srv6/sr_policy_rewrite.c \
+ vnet/srv6/sr_steering.c \
+ vnet/srv6/sr_api.c
+
+nobase_include_HEADERS += \
+ vnet/srv6/sr_packet.h \
+ vnet/srv6/sr.h \
+ vnet/srv6/sr.api.h
+
+API_FILES += vnet/srv6/sr.api
+
+########################################
+# mpls segment routing
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/srmpls/sr_mpls_policy.c \
+ vnet/srmpls/sr_mpls_steering.c
+
+
+nobase_include_HEADERS += \
+ vnet/srmpls/sr.h
+
+########################################
+# IPFIX / netflow v10
+########################################
+libvnet_la_SOURCES += \
+ vnet/flow/flow_report.c \
+ vnet/flow/flow_api.c
+
+nobase_include_HEADERS += \
+ vnet/flow/flow_report.h \
+ vnet/flow/ipfix_info_elements.h \
+ vnet/flow/ipfix_packet.h \
+ vnet/flow/flow.api.h
+
+API_FILES += vnet/flow/flow.api
+
+########################################
+# IPFIX classify code
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/flow/flow_report_classify.c
+
+nobase_include_HEADERS += \
+ vnet/flow/flow_report_classify.h
+
+########################################
+# lawful intercept
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/lawful-intercept/lawful_intercept.c \
+ vnet/lawful-intercept/node.c
+
+nobase_include_HEADERS += \
+ vnet/lawful-intercept/lawful_intercept.h
+
+########################################
+# SPAN (port mirroring)
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/span/span_api.c \
+ vnet/span/span.c \
+ vnet/span/node.c
+
+nobase_include_HEADERS += \
+ vnet/span/span.api.h \
+ vnet/span/span.h
+
+API_FILES += vnet/span/span.api
+
+########################################
+# Packet generator
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/pg/cli.c \
+ vnet/pg/edit.c \
+ vnet/pg/init.c \
+ vnet/pg/input.c \
+ vnet/pg/output.c \
+ vnet/pg/stream.c
+
+nobase_include_HEADERS += \
+ vnet/pg/pg.h \
+ vnet/pg/edit.h
+
+########################################
+# virtio
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/devices/virtio/vhost-user.c \
+ vnet/devices/virtio/vhost_user_api.c
+
+nobase_include_HEADERS += \
+ vnet/devices/virtio/vhost-user.h \
+ vnet/devices/virtio/vhost_user.api.h
+
+API_FILES += vnet/devices/virtio/vhost_user.api
+
+########################################
+# ssvm ethernet
+########################################
+libvnet_la_SOURCES += \
+ vnet/devices/ssvm/ssvm_eth.c \
+ vnet/devices/ssvm/node.c
+
+nobase_include_HEADERS += \
+ vnet/devices/ssvm/ssvm_eth.h
+
+########################################
+# session managmeent
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/session/session.c \
+ vnet/session/session_lookup.c \
+ vnet/session/session_node.c \
+ vnet/session/transport_interface.c \
+ vnet/session/application.c \
+ vnet/session/session_cli.c \
+ vnet/session/application_interface.c \
+ vnet/session/segment_manager.c \
+ vnet/session/session_api.c
+
+nobase_include_HEADERS += \
+ vnet/session/session.h \
+ vnet/session/stream_session.h \
+ vnet/session/session_lookup.h \
+ vnet/session/application.h \
+ vnet/session/transport.h \
+ vnet/session/transport_interface.h \
+ vnet/session/application_interface.h \
+ vnet/session/session_debug.h \
+ vnet/session/segment_manager.h \
+ vnet/session/session.api.h
+
+API_FILES += vnet/session/session.api
+
+########################################
+# Linux packet interface
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/devices/af_packet/af_packet.c \
+ vnet/devices/af_packet/device.c \
+ vnet/devices/af_packet/node.c \
+ vnet/devices/af_packet/cli.c \
+ vnet/devices/af_packet/af_packet_api.c
+
+nobase_include_HEADERS += \
+ vnet/devices/af_packet/af_packet.h \
+ vnet/devices/af_packet/af_packet.api.h
+
+API_FILES += vnet/devices/af_packet/af_packet.api
+
+########################################
+# NETMAP interface
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/devices/netmap/netmap.c \
+ vnet/devices/netmap/device.c \
+ vnet/devices/netmap/node.c \
+ vnet/devices/netmap/cli.c \
+ vnet/devices/netmap/netmap_api.c
+
+nobase_include_HEADERS += \
+ vnet/devices/netmap/netmap.h \
+ vnet/devices/netmap/netmap.api.h
+
+API_FILES += vnet/devices/netmap/netmap.api
+
+########################################
+# Driver feature graph arc support
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/feature/feature.c \
+ vnet/feature/registration.c
+
+nobase_include_HEADERS += \
+ vnet/feature/feature.h
+
+########################################
+# Unix kernel related
+########################################
+
+# FIXME: vnet/unix/hgshm.c
+
+libvnet_la_SOURCES += \
+ vnet/unix/gdb_funcs.c \
+ vnet/unix/pcap.c \
+ vnet/unix/tap_api.c \
+ vnet/unix/tapcli.c \
+ vnet/unix/tuntap.c
+
+nobase_include_HEADERS += \
+ vnet/unix/pcap.h \
+ vnet/unix/tuntap.h \
+ vnet/unix/tap.api.h \
+ vnet/unix/tapcli.h
+
+API_FILES += vnet/unix/tap.api
+
+########################################
+# FIB
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/fib/fib.c \
+ vnet/fib/fib_test.c \
+ vnet/fib/ip4_fib.c \
+ vnet/fib/ip6_fib.c \
+ vnet/fib/mpls_fib.c \
+ vnet/fib/fib_table.c \
+ vnet/fib/fib_walk.c \
+ vnet/fib/fib_types.c \
+ vnet/fib/fib_node.c \
+ vnet/fib/fib_node_list.c \
+ vnet/fib/fib_entry.c \
+ vnet/fib/fib_entry_src.c \
+ vnet/fib/fib_entry_src_rr.c \
+ vnet/fib/fib_entry_src_interface.c \
+ vnet/fib/fib_entry_src_default_route.c \
+ vnet/fib/fib_entry_src_special.c \
+ vnet/fib/fib_entry_src_api.c \
+ vnet/fib/fib_entry_src_adj.c \
+ vnet/fib/fib_entry_src_mpls.c \
+ vnet/fib/fib_entry_src_lisp.c \
+ vnet/fib/fib_entry_cover.c \
+ vnet/fib/fib_entry_delegate.c \
+ vnet/fib/fib_path_list.c \
+ vnet/fib/fib_path.c \
+ vnet/fib/fib_path_ext.c \
+ vnet/fib/fib_urpf_list.c \
+ vnet/fib/fib_attached_export.c \
+ vnet/fib/fib_bfd.c
+
+nobase_include_HEADERS += \
+ vnet/fib/fib.h \
+ vnet/fib/fib_api.h \
+ vnet/fib/ip4_fib.h \
+ vnet/fib/ip6_fib.h \
+ vnet/fib/fib_types.h \
+ vnet/fib/fib_table.h \
+ vnet/fib/fib_node.h \
+ vnet/fib/fib_node_list.h \
+ vnet/fib/fib_entry.h \
+ vnet/fib/fib_entry_delegate.h
+
+########################################
+# ADJ
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/adj/adj_nbr.c \
+ vnet/adj/adj_glean.c \
+ vnet/adj/adj_midchain.c \
+ vnet/adj/adj_mcast.c \
+ vnet/adj/adj_l2.c \
+ vnet/adj/adj_nsh.c \
+ vnet/adj/adj.c \
+ vnet/adj/rewrite.c \
+ vnet/adj/adj_bfd.c \
+ vnet/adj/adj_delegate.c
+
+nobase_include_HEADERS += \
+ vnet/adj/adj.h \
+ vnet/adj/adj_types.h \
+ vnet/adj/adj_glean.h \
+ vnet/adj/adj_nsh.h \
+ vnet/adj/adj_nbr.h \
+ vnet/adj/rewrite.h
+
+########################################
+# Data-Plane Objects
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/dpo/dpo.c \
+ vnet/dpo/drop_dpo.c \
+ vnet/dpo/ip_null_dpo.c \
+ vnet/dpo/punt_dpo.c \
+ vnet/dpo/receive_dpo.c \
+ vnet/dpo/load_balance.c \
+ vnet/dpo/load_balance_map.c \
+ vnet/dpo/lookup_dpo.c \
+ vnet/dpo/classify_dpo.c \
+ vnet/dpo/replicate_dpo.c \
+ vnet/dpo/interface_rx_dpo.c \
+ vnet/dpo/interface_tx_dpo.c \
+ vnet/dpo/mpls_disposition.c \
+ vnet/dpo/mpls_label_dpo.c
+
+nobase_include_HEADERS += \
+ vnet/dpo/load_balance.h \
+ vnet/dpo/drop_dpo.h \
+ vnet/dpo/lookup_dpo.h \
+ vnet/dpo/punt_dpo.h \
+ vnet/dpo/classify_dpo.h \
+ vnet/dpo/receive_dpo.h \
+ vnet/dpo/ip_null_dpo.h \
+ vnet/dpo/replicate_dpo.h \
+ vnet/dpo/dpo.h
+
+########################################
+# Multicast FIB
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/mfib/mfib_test.c \
+ vnet/mfib/mfib_forward.c \
+ vnet/mfib/ip4_mfib.c \
+ vnet/mfib/ip6_mfib.c \
+ vnet/mfib/mfib_types.c \
+ vnet/mfib/mfib_signal.c \
+ vnet/mfib/mfib_itf.c \
+ vnet/mfib/mfib_entry.c \
+ vnet/mfib/mfib_table.c
+
+nobase_include_HEADERS += \
+ vnet/mfib/ip4_mfib.h \
+ vnet/mfib/mfib_types.h \
+ vnet/mfib/mfib_table.h
+
+########################################
+# Utilities
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/util/radix.c
+
+########################################
+# Plugin client library
+########################################
+
+nobase_include_HEADERS += \
+ vnet/plugin/plugin.h
+
+pcap2pg_SOURCES = \
+ vnet/unix/pcap2pg.c \
+ vnet/unix/pcap.h
+
+pcap2pg_LDFLAGS = -static
+pcap2pg_LDADD = libvnet.la libvppinfra.la -lpthread libvlibmemory.la -lm -ldl
+
+noinst_PROGRAMS += pcap2pg
+
+# vi:syntax=automake
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
new file mode 100644
index 00000000..f8496913
--- /dev/null
+++ b/src/vnet/adj/adj.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/fib/fib_node_list.h>
+
+/* Adjacency packet/byte counters indexed by adjacency index. */
+vlib_combined_counter_main_t adjacency_counters;
+
+/*
+ * the single adj pool
+ */
+ip_adjacency_t *adj_pool;
+
+/**
+ * @brief Global Config for enabling per-adjacency counters.
+ * By default these are disabled.
+ */
+int adj_per_adj_counters;
+
+always_inline void
+adj_poison (ip_adjacency_t * adj)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ memset (adj, 0xfe, sizeof (adj[0]));
+ }
+}
+
+ip_adjacency_t *
+adj_alloc (fib_protocol_t proto)
+{
+ ip_adjacency_t *adj;
+
+ pool_get_aligned(adj_pool, adj, CLIB_CACHE_LINE_BYTES);
+
+ adj_poison(adj);
+
+ /* Make sure certain fields are always initialized. */
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter(&adjacency_counters,
+ adj_get_index(adj));
+
+ fib_node_init(&adj->ia_node,
+ FIB_NODE_TYPE_ADJ);
+
+ adj->ia_nh_proto = proto;
+ adj->ia_flags = 0;
+ adj->rewrite_header.sw_if_index = ~0;
+ adj->rewrite_header.flags = 0;
+ adj->lookup_next_index = 0;
+ adj->ia_delegates = NULL;
+
+ /* lest it become a midchain in the future */
+ memset(&adj->sub_type.midchain.next_dpo, 0,
+ sizeof(adj->sub_type.midchain.next_dpo));
+
+ return (adj);
+}
+
+static int
+adj_index_is_special (adj_index_t adj_index)
+{
+ if (ADJ_INDEX_INVALID == adj_index)
+ return (!0);
+
+ return (0);
+}
+
+/**
+ * @brief Pretty print helper function for formatting specific adjacencies.
+ * @param s - input string to format
+ * @param args - other args passed to format function such as:
+ * - vnet_main_t
+ * - ip_lookup_main_t
+ * - adj_index
+ */
+u8 *
+format_ip_adjacency (u8 * s, va_list * args)
+{
+ format_ip_adjacency_flags_t fiaf;
+ ip_adjacency_t * adj;
+ u32 adj_index;
+
+ adj_index = va_arg (*args, u32);
+ fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+ adj = adj_get(adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U", format_adj_nbr, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ s = format (s, "%U", format_adj_glean, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ s = format (s, "%U", format_adj_midchain, adj_index, 2);
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ s = format (s, "%U", format_adj_mcast, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ s = format (s, "%U", format_adj_mcast_midchain, adj_index, 0);
+ break;
+ default:
+ break;
+ }
+
+ if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+ {
+ adj_delegate_type_t adt;
+ adj_delegate_t *aed;
+ vlib_counter_t counts;
+
+ vlib_get_combined_counter(&adjacency_counters, adj_index, &counts);
+ s = format (s, "\n counts:[%Ld:%Ld]", counts.packets, counts.bytes);
+ s = format (s, "\n locks:%d", adj->ia_node.fn_locks);
+ s = format(s, "\n delegates:\n ");
+ FOR_EACH_ADJ_DELEGATE(adj, adt, aed,
+ {
+ s = format(s, " %U\n", format_adj_deletegate, aed);
+ });
+
+ s = format(s, "\n children:\n ");
+ s = fib_node_children_format(adj->ia_node.fn_children, s);
+ }
+
+ return s;
+}
+
+/*
+ * adj_last_lock_gone
+ *
+ * last lock/reference to the adj has gone, we no longer need it.
+ */
+static void
+adj_last_lock_gone (ip_adjacency_t *adj)
+{
+ vlib_main_t * vm = vlib_get_main();
+
+ ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
+ ADJ_DBG(adj, "last-lock-gone");
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo_reset(&adj->sub_type.midchain.next_dpo);
+ /* FALL THROUGH */
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_REWRITE:
+ /*
+ * complete and incomplete nbr adjs
+ */
+ adj_nbr_remove(adj_get_index(adj),
+ adj->ia_nh_proto,
+ adj->ia_link,
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_glean_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ adj_mcast_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ /*
+ * type not stored in any DB from which we need to remove it
+ */
+ break;
+ }
+
+ vlib_worker_thread_barrier_release(vm);
+
+ fib_node_deinit(&adj->ia_node);
+ ASSERT(0 == vec_len(adj->ia_delegates));
+ vec_free(adj->ia_delegates);
+ pool_put(adj_pool, adj);
+}
+
+void
+adj_lock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "lock");
+ fib_node_lock(&adj->ia_node);
+}
+
+void
+adj_unlock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "unlock");
+ ASSERT(adj);
+
+ fib_node_unlock(&adj->ia_node);
+}
+
+u32
+adj_child_add (adj_index_t adj_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ if (adj_index_is_special(adj_index))
+ {
+ return (~0);
+ }
+
+ return (fib_node_child_add(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ child_type,
+ child_index));
+}
+
+void
+adj_child_remove (adj_index_t adj_index,
+ u32 sibling_index)
+{
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ fib_node_child_remove(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ sibling_index);
+}
+
+/*
+ * Context for the walk to update the cached feture flags.
+ */
+typedef struct adj_feature_update_t_
+{
+ u8 arc;
+ u8 enable;
+} adj_feature_update_ctx_t;
+
+static adj_walk_rc_t
+adj_feature_update_walk_cb (adj_index_t ai,
+ void *arg)
+{
+ adj_feature_update_ctx_t *ctx = arg;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ /*
+ * this ugly mess matches the feature arc that is changing with affected
+ * adjacencies
+ */
+ if (((ctx->arc == ip6_main.lookup_main.output_feature_arc_index) &&
+ (VNET_LINK_IP6 == adj->ia_link)) ||
+ ((ctx->arc == ip4_main.lookup_main.output_feature_arc_index) &&
+ (VNET_LINK_IP4 == adj->ia_link)) ||
+ ((ctx->arc == mpls_main.output_feature_arc_index) &&
+ (VNET_LINK_MPLS == adj->ia_link)))
+ {
+ if (ctx->enable)
+ adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
+ else
+ adj->rewrite_header.flags &= ~VNET_REWRITE_HAS_FEATURES;
+ }
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+adj_feature_update (u32 sw_if_index,
+ u8 arc_index,
+ u8 is_enable)
+{
+ /*
+ * Walk all the adjacencies on the interface to update the cached
+ * 'has-features' flag
+ */
+ adj_feature_update_ctx_t ctx = {
+ .arc = arc_index,
+ .enable = is_enable,
+ };
+ adj_walk (sw_if_index, adj_feature_update_walk_cb, &ctx);
+}
+
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+void
+adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ /*
+ * walk all the neighbor adjacencies
+ */
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_IP_PROTOCOL(proto)
+ {
+ adj_nbr_walk(sw_if_index, proto, cb, ctx);
+ adj_mcast_walk(sw_if_index, proto, cb, ctx);
+ }
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+vnet_link_t
+adj_get_link_type (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->ia_link);
+}
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+u32
+adj_get_sw_if_index (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->rewrite_header.sw_if_index);
+}
+
+/**
+ * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding
+ * 0 is down, !0 is up.
+ */
+int
+adj_is_up (adj_index_t ai)
+{
+ const adj_delegate_t *aed;
+
+ aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ if (NULL == aed)
+ {
+ /*
+ * no BFD tracking - resolved
+ */
+ return (!0);
+ }
+ else
+ {
+ /*
+ * defer to the state of the BFD tracking
+ */
+ return (ADJ_BFD_STATE_UP == aed->ad_bfd_state);
+ }
+}
+
+/**
+ * @brief Return the rewrite string of the adjacency
+ */
+const u8*
+adj_get_rewrite (adj_index_t ai)
+{
+ vnet_rewrite_header_t *rw;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+ rw = &adj->rewrite_header;
+
+ ASSERT (rw->data_bytes != 0xfefe);
+
+ return (rw->data - rw->data_bytes);
+}
+
+static fib_node_t *
+adj_get_node (fib_node_index_t index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ return (&adj->ia_node);
+}
+
+#define ADJ_FROM_NODE(_node) \
+ ((ip_adjacency_t*)((char*)_node - STRUCT_OFFSET_OF(ip_adjacency_t, ia_node)))
+
+static void
+adj_node_last_lock_gone (fib_node_t *node)
+{
+ adj_last_lock_gone(ADJ_FROM_NODE(node));
+}
+
+static fib_node_back_walk_rc_t
+adj_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * Que pasa. yo soj en el final!
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * Adjacency's graph node virtual function table
+ */
+static const fib_node_vft_t adj_vft = {
+ .fnv_get = adj_get_node,
+ .fnv_last_lock = adj_node_last_lock_gone,
+ .fnv_back_walk = adj_back_walk_notify,
+};
+
+static clib_error_t *
+adj_module_init (vlib_main_t * vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_ADJ, &adj_vft);
+
+ adj_nbr_module_init();
+ adj_glean_module_init();
+ adj_midchain_module_init();
+ adj_mcast_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (adj_module_init);
+
+static clib_error_t *
+adj_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+ int summary = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "sum"))
+ summary = 1;
+ else if (unformat (input, "summary"))
+ summary = 1;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (summary)
+ {
+ vlib_cli_output (vm, "Number of adjacenies: %d", pool_elts(adj_pool));
+ vlib_cli_output (vm, "Per-adjacency counters: %s",
+ (adj_are_counters_enabled() ?
+ "enabled":
+ "disabled"));
+ }
+ else
+ {
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ if (pool_is_free_index(adj_pool, ai))
+ {
+ vlib_cli_output (vm, "adjacency %d invalid", ai);
+ return 0;
+ }
+
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach_index(ai, adj_pool,
+ ({
+ if (~0 != sw_if_index &&
+ sw_if_index != adj_get_sw_if_index(ai))
+ {
+ }
+ else
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ return 0;
+}
+
+/*?
+ * Show all adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj}
+ * [@0]
+ * [@1] glean: loop0
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (adj_show_command, static) = {
+ .path = "show adj",
+ .short_help = "show adj [<adj_index>] [interface] [summary]",
+ .function = adj_show,
+};
+
+/**
+ * @brief CLI invoked function to enable/disable per-adj counters
+ */
+static clib_error_t *
+adj_cli_counters_set (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = NULL;
+ int enable = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (enable != ~0)
+ {
+ /* user requested something sensible */
+ adj_per_adj_counters = enable;
+ }
+ else
+ {
+ error = clib_error_return (0, "specify 'enable' or 'disable'");
+ }
+
+ return (error);
+}
+
+/*?
+ * Enabe/disble per-adjacency counters. This is optional because it comes with
+ * a non-negligible performance cost.
+ ?*/
+VLIB_CLI_COMMAND (adj_cli_counters_set_command, static) = {
+ .path = "adjacency counters",
+ .short_help = "adjacency counters [enable|disable]",
+ .function = adj_cli_counters_set,
+};
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
new file mode 100644
index 00000000..ed5eb1f1
--- /dev/null
+++ b/src/vnet/adj/adj.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An adjacency is a representation of an attached L3 peer.
+ *
+ * Adjacency Sub-types:
+ * - neighbour: a representation of an attached L3 peer.
+ * Key:{addr,interface,link/ether-type}
+ * SHARED
+ * - glean: used to drive ARP/ND for packets destined to a local sub-net.
+ * 'glean' mean use the packet's destination address as the target
+ * address in the ARP packet.
+ * UNSHARED. Only one per-interface.
+ * - midchain: a nighbour adj on a virtual/tunnel interface.
+ *
+ * The API to create and update the adjacency is very sub-type specific. This
+ * is intentional as it encourages the user to carefully consider which adjacency
+ * sub-type they are really using, and hence assign it data in the appropriate
+ * sub-type space in the union of sub-types. This prevents the adj becoming a
+ * disorganised dumping group for 'my features needs a u16 somewhere' data. It
+ * is important to enforce this approach as space in the adjacency is a premium,
+ * as we need it to fit in 1 cache line.
+ *
+ * the API is also based around an index to an ajdacency not a raw pointer. This
+ * is so the user doesn't suffer the same limp inducing firearm injuries that
+ * the author suffered as the adjacenices can realloc.
+ */
+
+#ifndef __ADJ_H__
+#define __ADJ_H__
+
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/rewrite.h>
+
+/** @brief Common (IP4/IP6) next index stored in adjacency. */
+typedef enum
+{
+ /** Adjacency to drop this packet. */
+ IP_LOOKUP_NEXT_DROP,
+ /** Adjacency to punt this packet. */
+ IP_LOOKUP_NEXT_PUNT,
+
+ /** This packet is for one of our own IP addresses. */
+ IP_LOOKUP_NEXT_LOCAL,
+
+ /** This packet matches an "incomplete adjacency" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_ARP,
+
+ /** This packet matches an "interface route" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_GLEAN,
+
+ /** This packet is to be rewritten and forwarded to the next
+ processing node. This is typically the output interface but
+ might be another node for further output processing. */
+ IP_LOOKUP_NEXT_REWRITE,
+
+ /** This packets follow a mid-chain adjacency */
+ IP_LOOKUP_NEXT_MIDCHAIN,
+
+ /** This packets needs to go to ICMP error */
+ IP_LOOKUP_NEXT_ICMP_ERROR,
+
+ /** Multicast Adjacency. */
+ IP_LOOKUP_NEXT_MCAST,
+
+ /** Multicast Midchain Adjacency. An Adjacency for sending macst packets
+ * on a tunnel/virtual interface */
+ IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+
+ IP_LOOKUP_N_NEXT,
+} __attribute__ ((packed)) ip_lookup_next_t;
+
+typedef enum
+{
+ IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
+} ip4_lookup_next_t;
+
+typedef enum
+{
+ /* Hop-by-hop header handling */
+ IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
+ IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
+ IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
+ IP6_LOOKUP_N_NEXT,
+} ip6_lookup_next_t;
+
+#define IP4_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
+ [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
+ [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
+}
+
+#define IP6_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
+ [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
+ [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
+ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
+}
+
+/**
+ * Forward delcartion
+ */
+struct ip_adjacency_t_;
+
+/**
+ * @brief A function type for post-rewrite fixups on midchain adjacency
+ */
+typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm,
+ struct ip_adjacency_t_ * adj,
+ vlib_buffer_t * b0);
+
+/**
+ * @brief Flags on an IP adjacency
+ */
+typedef enum ip_adjacency_flags_t_
+{
+ ADJ_FLAG_NONE = 0,
+
+ /**
+ * Currently a sync walk is active. Used to prevent re-entrant walking
+ */
+ ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << 0),
+
+ /**
+ * Packets TX through the midchain do not increment the interface
+ * counters. This should be used when the adj is associated with an L2
+ * interface and that L2 interface is in a bridege domain. In that case
+ * the packet will have traversed the interface's TX node, and hence have
+ * been counted, before it traverses ths midchain
+ */
+ ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << 1),
+} __attribute__ ((packed)) adj_flags_t;
+
+/**
+ * @brief IP unicast adjacency.
+ * @note cache aligned.
+ *
+ * An adjacency is a represenation of a peer on a particular link.
+ */
+typedef struct ip_adjacency_t_
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /**
+ * Linkage into the FIB node grpah. First member since this type
+ * has 8 byte alignment requirements.
+ */
+ fib_node_t ia_node;
+
+ /**
+ * Next hop after ip4-lookup.
+ * This is not accessed in the rewrite nodes.
+ * 1-bytes
+ */
+ ip_lookup_next_t lookup_next_index;
+
+ /**
+ * link/ether-type
+ * 1 bytes
+ */
+ vnet_link_t ia_link;
+
+ /**
+ * The protocol of the neighbor/peer. i.e. the protocol with
+ * which to interpret the 'next-hop' attirbutes of the sub-types.
+ * 1-btyes
+ */
+ fib_protocol_t ia_nh_proto;
+
+ /**
+ * Flags on the adjacency
+ * 1-bytes
+ */
+ adj_flags_t ia_flags;
+
+ union
+ {
+ /**
+ * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
+ *
+ * neighbour adjacency sub-type;
+ */
+ struct
+ {
+ ip46_address_t next_hop;
+ } nbr;
+ /**
+ * IP_LOOKUP_NEXT_MIDCHAIN
+ *
+ * A nbr adj that is also recursive. Think tunnels.
+ * A nbr adj can transition to be of type MDICHAIN
+ * so be sure to leave the two structs with the next_hop
+ * fields aligned.
+ */
+ struct
+ {
+ /**
+ * The recursive next-hop.
+ * This field MUST be at the same memory location as
+ * sub_type.nbr.next_hop
+ */
+ ip46_address_t next_hop;
+ /**
+ * The next DPO to use
+ */
+ dpo_id_t next_dpo;
+ /**
+ * A function to perform the post-rewrite fixup
+ */
+ adj_midchain_fixup_t fixup_func;
+ } midchain;
+ /**
+ * IP_LOOKUP_NEXT_GLEAN
+ *
+ * Glean the address to ARP for from the packet's destination.
+ * Technically these aren't adjacencies, i.e. they are not a
+ * representation of a peer. One day we might untangle this coupling
+ * and use a new Glean DPO.
+ */
+ struct
+ {
+ ip46_address_t receive_addr;
+ } glean;
+ } sub_type;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ /* Rewrite in second/third cache lines */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+
+ /**
+ * more control plane members that do not fit on the first cacheline
+ */
+ /**
+ * A sorted vector of delegates
+ */
+ struct adj_delegate_t_ *ia_delegates;
+
+} ip_adjacency_t;
+
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0),
+ "IP adjacency cachline 0 is not offset");
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES),
+ "IP adjacency cachline 1 is more than one cachline size offset");
+
+/**
+ * @brief
+ * Take a reference counting lock on the adjacency
+ */
+extern void adj_lock(adj_index_t adj_index);
+/**
+ * @brief
+ * Release a reference counting lock on the adjacency
+ */
+extern void adj_unlock(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Add a child dependent to an adjacency. The child will
+ * thus be informed via its registerd back-walk function
+ * when the adjacency state changes.
+ */
+extern u32 adj_child_add(adj_index_t adj_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+/**
+ * @brief
+ * Remove a child dependent
+ */
+extern void adj_child_remove(adj_index_t adj_index,
+ u32 sibling_index);
+
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+extern void adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern vnet_link_t adj_get_link_type (adj_index_t ai);
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+extern u32 adj_get_sw_if_index (adj_index_t ai);
+
+/**
+ * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding.
+ * 0 is down, !0 is up.
+ */
+extern int adj_is_up (adj_index_t ai);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern const u8* adj_get_rewrite (adj_index_t ai);
+
+/**
+ * @brief Notify the adjacency subsystem that the features settings for
+ * an interface have changed
+ */
+extern void adj_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable);
+
+/**
+ * @brief
+ * The global adjacnecy pool. Exposed for fast/inline data-plane access
+ */
+extern ip_adjacency_t *adj_pool;
+
+/**
+ * @brief
+ * Adjacency packet counters
+ */
+extern vlib_combined_counter_main_t adjacency_counters;
+
+/**
+ * @brief Global Config for enabling per-adjacency counters
+ * This is configurable because it comes with a non-negligible
+ * performance cost. */
+extern int adj_per_adj_counters;
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline ip_adjacency_t *
+adj_get (adj_index_t adj_index)
+{
+ return (vec_elt_at_index(adj_pool, adj_index));
+}
+
+/**
+ * @brief Get the global configuration option for enabling per-adj counters
+ */
+static inline int
+adj_are_counters_enabled (void)
+{
+ return (adj_per_adj_counters);
+}
+
+#endif
diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c
new file mode 100644
index 00000000..3d294c46
--- /dev/null
+++ b/src/vnet/adj/adj_bfd.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/bfd/bfd_main.h>
+
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/fib/fib_walk.h>
+
+static adj_bfd_state_t
+adj_bfd_bfd_state_to_fib (bfd_state_e bstate)
+{
+ switch (bstate)
+ {
+ case BFD_STATE_up:
+ return (ADJ_BFD_STATE_UP);
+ case BFD_STATE_down:
+ case BFD_STATE_admin_down:
+ case BFD_STATE_init:
+ return (ADJ_BFD_STATE_DOWN);
+ }
+ return (ADJ_BFD_STATE_DOWN);
+}
+
+static void
+adj_bfd_update_walk (adj_index_t ai)
+{
+ /*
+ * initiate a backwalk of dependent children
+ * to notify of the state change of this adj.
+ */
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ };
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &ctx);
+}
+
+/**
+ * @brief Callback function registered with BFD module to receive notifications
+ * of the CRUD of BFD sessions
+ * would be static but for the fact it's called from the unit-tests
+ */
+void
+adj_bfd_notify (bfd_listen_event_e event,
+ const bfd_session_t *session)
+{
+ const bfd_udp_key_t *key;
+ fib_protocol_t fproto;
+ adj_delegate_t *aed;
+ adj_index_t ai;
+
+ if (BFD_HOP_TYPE_SINGLE != session->hop_type)
+ {
+ /*
+ * multi-hop BFD sessions attach directly to the FIB entry
+ * single-hop adj to the associate adjacency.
+ */
+ return;
+ }
+
+ key = &session->udp.key;
+
+ fproto = (ip46_address_is_ip4 (&key->peer_addr) ?
+ FIB_PROTOCOL_IP4:
+ FIB_PROTOCOL_IP6);
+
+ /*
+ * find the adj that corresponds to the BFD session.
+ */
+ ai = adj_nbr_add_or_lock(fproto,
+ fib_proto_to_link(fproto),
+ &key->peer_addr,
+ key->sw_if_index);
+
+ switch (event)
+ {
+ case BFD_LISTEN_EVENT_CREATE:
+ /*
+ * The creation of a new session
+ */
+ if ((ADJ_INDEX_INVALID != ai) &&
+ (aed = adj_delegate_get(adj_get(ai),
+ ADJ_DELEGATE_BFD)))
+ {
+ /*
+ * already got state for this adj
+ */
+ }
+ else
+ {
+ /*
+ * lock the adj. add the delegate.
+ * Lockinging the adj prevents it being removed and thus maintains
+ * the BFD derived states
+ */
+ adj_lock(ai);
+
+ aed = adj_delegate_find_or_add(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ /*
+ * pretend the session is up and skip the walk.
+ * If we set it down then we get traffic loss on new children.
+ * if we walk then we lose traffic for existing children. Wait
+ * for the first BFD UP/DOWN before we let the session's state
+ * influence forwarding.
+ */
+ aed->ad_bfd_state = ADJ_BFD_STATE_UP;
+ aed->ad_bfd_index = session->bs_idx;
+ }
+ break;
+
+ case BFD_LISTEN_EVENT_UPDATE:
+ /*
+ * state change up/dowm and
+ */
+ aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ if (NULL != aed)
+ {
+ aed->ad_bfd_state = adj_bfd_bfd_state_to_fib(session->local_state);
+ adj_bfd_update_walk(ai);
+ }
+ /*
+ * else
+ * not an adj with BFD state
+ */
+ break;
+
+ case BFD_LISTEN_EVENT_DELETE:
+ /*
+ * session has been removed.
+ */
+
+ if (adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD))
+ {
+ /*
+ * has an associated BFD tracking delegate
+ * remove the BFD tracking deletgate, update children, then
+ * unlock the adj
+ */
+ adj_delegate_remove(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ adj_bfd_update_walk(ai);
+ adj_unlock(ai);
+ }
+ /*
+ * else
+ * no BFD associated state
+ */
+ break;
+ }
+
+ /*
+ * unlock match of the add-or-lock at the start
+ */
+ adj_unlock(ai);
+}
+
+static clib_error_t *
+adj_bfd_main_init (vlib_main_t * vm)
+{
+ clib_error_t * error = NULL;
+
+ if ((error = vlib_call_init_function (vm, bfd_main_init)))
+ return (error);
+
+ bfd_register_listener(adj_bfd_notify);
+
+ return (error);
+}
+
+VLIB_INIT_FUNCTION (adj_bfd_main_init);
diff --git a/src/vnet/adj/adj_delegate.c b/src/vnet/adj/adj_delegate.c
new file mode 100644
index 00000000..701b36e2
--- /dev/null
+++ b/src/vnet/adj/adj_delegate.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+
+static adj_delegate_t *
+adj_delegate_find_i (const ip_adjacency_t *adj,
+ adj_delegate_type_t type,
+ u32 *index)
+{
+ adj_delegate_t *delegate;
+ int ii;
+
+ ii = 0;
+ vec_foreach(delegate, adj->ia_delegates)
+ {
+ if (delegate->ad_type == type)
+ {
+ if (NULL != index)
+ *index = ii;
+
+ return (delegate);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+adj_delegate_t *
+adj_delegate_get (const ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+{
+ return (adj_delegate_find_i(adj, type, NULL));
+}
+
+void
+adj_delegate_remove (ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+{
+ adj_delegate_t *aed;
+ u32 index = ~0;
+
+ aed = adj_delegate_find_i(adj, type, &index);
+
+ ASSERT(NULL != aed);
+
+ vec_del1(adj->ia_delegates, index);
+}
+
+static int
+adj_delegate_cmp_for_sort (void * v1,
+ void * v2)
+{
+ adj_delegate_t *delegate1 = v1, *delegate2 = v2;
+
+ return (delegate1->ad_type - delegate2->ad_type);
+}
+
+static void
+adj_delegate_init (ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+
+{
+ adj_delegate_t delegate = {
+ .ad_adj_index = adj_get_index(adj),
+ .ad_type = type,
+ };
+
+ vec_add1(adj->ia_delegates, delegate);
+ vec_sort_with_function(adj->ia_delegates,
+ adj_delegate_cmp_for_sort);
+}
+
+adj_delegate_t *
+adj_delegate_find_or_add (ip_adjacency_t *adj,
+ adj_delegate_type_t adt)
+{
+ adj_delegate_t *delegate;
+
+ delegate = adj_delegate_get(adj, adt);
+
+ if (NULL == delegate)
+ {
+ adj_delegate_init(adj, adt);
+ }
+
+ return (adj_delegate_get(adj, adt));
+}
+
+/**
+ * typedef for printing a delegate
+ */
+typedef u8 * (*adj_delegate_format_t)(const adj_delegate_t *aed,
+ u8 *s);
+
+/**
+ * Print a delegate that represents BFD tracking
+ */
+static u8 *
+adj_delegate_fmt_bfd (const adj_delegate_t *aed,
+ u8 *s)
+{
+ s = format(s, "BFD:[state:%d index:%d]",
+ aed->ad_bfd_state,
+ aed->ad_bfd_index);
+
+ return (s);
+}
+
+/**
+ * A delegate type to formatter map
+ */
+static adj_delegate_format_t aed_formatters[] =
+{
+ [ADJ_DELEGATE_BFD] = adj_delegate_fmt_bfd,
+};
+
+u8 *
+format_adj_deletegate (u8 * s, va_list * args)
+{
+ adj_delegate_t *aed;
+
+ aed = va_arg (*args, adj_delegate_t *);
+
+ return (aed_formatters[aed->ad_type](aed, s));
+}
diff --git a/src/vnet/adj/adj_delegate.h b/src/vnet/adj/adj_delegate.h
new file mode 100644
index 00000000..17651203
--- /dev/null
+++ b/src/vnet/adj/adj_delegate.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_DELEGATE_T__
+#define __ADJ_DELEGATE_T__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * Delegate types
+ */
+typedef enum adj_delegate_type_t_ {
+ /**
+ * BFD session state
+ */
+ ADJ_DELEGATE_BFD,
+} adj_delegate_type_t;
+
+#define FOR_EACH_ADJ_DELEGATE(_adj, _adt, _aed, _body) \
+{ \
+ for (_adt = ADJ_DELEGATE_BFD; \
+ _adt <= ADJ_DELEGATE_BFD; \
+ _adt++) \
+ { \
+ _aed = adj_delegate_get(_adj, _adt); \
+ if (NULL != _aed) { \
+ _body; \
+ } \
+ } \
+}
+
+/**
+ * Distillation of the BFD session states into a go/no-go for using
+ * the associated tracked adjacency
+ */
+typedef enum adj_bfd_state_t_
+{
+ ADJ_BFD_STATE_DOWN,
+ ADJ_BFD_STATE_UP,
+} adj_bfd_state_t;
+
+/**
+ * A Delagate is a means to implement the Delagation design pattern;
+ * the extension of an object's functionality through the composition of,
+ * and delgation to, other objects.
+ * These 'other' objects are delegates. Delagates are thus attached to
+ * ADJ objects to extend their functionality.
+ */
+typedef struct adj_delegate_t_
+{
+ /**
+ * The ADJ entry object to which the delagate is attached
+ */
+ adj_index_t ad_adj_index;
+
+ /**
+ * The delagate type
+ */
+ adj_delegate_type_t ad_type;
+
+ /**
+ * A union of data for the different delegate types
+ */
+ union
+ {
+ /**
+ * BFD delegate daa
+ */
+ struct {
+ /**
+ * BFD session state
+ */
+ adj_bfd_state_t ad_bfd_state;
+ /**
+ * BFD session index
+ */
+ u32 ad_bfd_index;
+ };
+ };
+} adj_delegate_t;
+
+extern void adj_delegate_remove(ip_adjacency_t *adj,
+ adj_delegate_type_t type);
+
+extern adj_delegate_t *adj_delegate_find_or_add(ip_adjacency_t *adj,
+ adj_delegate_type_t fdt);
+extern adj_delegate_t *adj_delegate_get(const ip_adjacency_t *adj,
+ adj_delegate_type_t type);
+
+extern u8 *format_adj_deletegate(u8 * s, va_list * args);
+
+#endif
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
new file mode 100644
index 00000000..8d86e2a9
--- /dev/null
+++ b/src/vnet/adj/adj_glean.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * The 'DB' of all glean adjs.
+ * There is only one glean per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_gleans[FIB_PROTOCOL_MAX];
+
+static inline vlib_node_registration_t*
+adj_get_glean_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (&ip4_glean_node);
+ case FIB_PROTOCOL_IP6:
+ return (&ip6_glean_node);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+/*
+ * adj_glean_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The glean adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_glean_add_or_lock (fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_gleans[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ {
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_GLEAN;
+ adj->ia_nh_proto = proto;
+ adj_gleans[proto][sw_if_index] = adj_get_index(adj);
+
+ if (NULL != nh_addr)
+ {
+ adj->sub_type.glean.receive_addr = *nh_addr;
+ }
+
+ adj->rewrite_header.data_bytes = 0;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_proto_2_nd(proto),
+ sw_if_index,
+ adj_get_glean_node(proto)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+ else
+ {
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+ }
+
+ adj_lock(adj_get_index(adj));
+
+ return (adj_get_index(adj));
+}
+
+void
+adj_glean_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_gleans[proto]));
+
+ adj_gleans[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_glean_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_glean_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_glean_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_glean_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interfaces on the HW
+ */
+ uword sw_flags;
+
+ sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+ 0);
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ (void*) sw_flags);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ adj_glean_hw_interface_state_change);
+
+static clib_error_t *
+adj_glean_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
+
+u8*
+format_adj_glean (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ return (format(s, "%U-glean: %U",
+ format_fib_protocol, adj->ia_nh_proto,
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index)));
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_glean_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_glean,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ * object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const glean_ip4_nodes[] =
+{
+ "ip4-glean",
+ NULL,
+};
+const static char* const glean_ip6_nodes[] =
+{
+ "ip6-glean",
+ NULL,
+};
+
+const static char* const * const glean_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = glean_ip4_nodes,
+ [DPO_PROTO_IP6] = glean_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+adj_glean_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes);
+}
diff --git a/src/vnet/adj/adj_glean.h b/src/vnet/adj/adj_glean.h
new file mode 100644
index 00000000..640bd2f9
--- /dev/null
+++ b/src/vnet/adj/adj_glean.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Glean Adjacency
+ *
+ * A gleean adjacency represent the need to discover new peers on an
+ * attached link. Packets that hit a glean adjacency will generate an
+ * ARP/ND packet addessesed to the packet's destination address.
+ * Note this is different to an incomplete neighbour adjacency, which
+ * does not send ARP/ND requests to the packet's destination address,
+ * but instead to the next-hop address of the adjacency itself.
+ */
+
+#ifndef __ADJ_GLEAN_H__
+#define __ADJ_GLEAN_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing glean adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to glean
+ *
+ * @param sw_if_index
+ * The interface on which to glean
+ *
+ * @param nh_addr
+ * the address applied to the interface on which to glean. This
+ * as the source address in packets when the ARP/ND packet is sent
+ */
+extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr);
+
+/**
+ * @brief Format/display a glean adjacency.
+ */
+extern u8* format_adj_glean(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_glean_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
new file mode 100644
index 00000000..2c123c54
--- /dev/null
+++ b/src/vnet/adj/adj_internal.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_INTERNAL_H__
+#define __ADJ_INTERNAL_H__
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_nsh.h>
+
+/**
+ * big switch to turn on Adjacency debugging
+ */
+#undef ADJ_DEBUG
+
+/*
+ * Debug macro
+ */
+#ifdef ADJ_DEBUG
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ clib_warning("adj:[%d:%p]:" _fmt, \
+ _adj - adj_pool, _adj, \
+ ##_args); \
+}
+#else
+#define ADJ_DBG(_e, _fmt, _args...)
+#endif
+
+static inline u32
+adj_get_rewrite_node (vnet_link_t linkt)
+{
+ switch (linkt) {
+ case VNET_LINK_IP4:
+ return (ip4_rewrite_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_rewrite_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_output_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_rewrite_node.index);
+ case VNET_LINK_NSH:
+ return (adj_nsh_rewrite_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static inline vnet_link_t
+adj_fib_proto_2_nd (fib_protocol_t fp)
+{
+ switch (fp)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (VNET_LINK_ARP);
+ case FIB_PROTOCOL_IP6:
+ return (VNET_LINK_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (VNET_LINK_MPLS);
+ }
+ return (0);
+}
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline adj_index_t
+adj_get_index (ip_adjacency_t *adj)
+{
+ return (adj - adj_pool);
+}
+
+extern void adj_nbr_update_rewrite_internal(ip_adjacency_t *adj,
+ ip_lookup_next_t adj_next_index,
+ u32 complete_next_index,
+ u32 next_index,
+ u8 *rewrite);
+extern void adj_midchain_setup(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags);
+
+extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
+
+extern void adj_nbr_remove(adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+extern void adj_glean_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+extern void adj_mcast_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+
+#endif
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
new file mode 100644
index 00000000..20d70dd4
--- /dev/null
+++ b/src/vnet/adj/adj_l2.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * @brief Trace data for a L2 Midchain
+ */
+typedef struct adj_l2_trace_t_ {
+ /** Adjacency index taken. */
+ u32 adj_index;
+} adj_l2_trace_t;
+
+static u8 *
+format_adj_l2_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *);
+
+ s = format (s, "adj-idx %d : %U",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ return s;
+}
+
+typedef enum adj_l2_rewrite_next_t_
+{
+ ADJ_L2_REWRITE_NEXT_DROP,
+} adj_l2_rewrite_next_t;
+
+always_inline uword
+adj_l2_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_midchain)
+{
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ u32 thread_index = vlib_get_thread_index();
+ ethernet_main_t * em = &ethernet_main;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ char *h0;
+ u32 pi0, rw_len0, adj_index0, next0 = 0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ h0 = vlib_buffer_get_current (p0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
+
+ vlib_increment_combined_counter(&adjacency_counters,
+ thread_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes)))
+ {
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /*
+ * Follow the feature ARC. this will result eventually in
+ * the midchain-tx node
+ */
+ vnet_feature_arc_start(em->output_feature_arc_index, tx_sw_if_index0, &next0, p0);
+ }
+ else
+ {
+ /* can't fragment L2 */
+ next0 = ADJ_L2_REWRITE_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_l2_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+adj_l2_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_l2_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_l2_rewrite_node) = {
+ .function = adj_l2_rewrite,
+ .name = "adj-l2-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_rewrite_node, adj_l2_rewrite)
+
+VLIB_REGISTER_NODE (adj_l2_midchain_node) = {
+ .function = adj_l2_midchain,
+ .name = "adj-l2-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_midchain_node, adj_l2_midchain)
diff --git a/src/vnet/adj/adj_l2.h b/src/vnet/adj/adj_l2.h
new file mode 100644
index 00000000..3aa1c74b
--- /dev/null
+++ b/src/vnet/adj/adj_l2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_L2_H__
+#define __ADJ_L2_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_l2_midchain_node;
+extern vlib_node_registration_t adj_l2_rewrite_node;
+
+#endif
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
new file mode 100644
index 00000000..da06cd00
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/ip/ip.h>
+
+/*
+ * The 'DB' of all mcast adjs.
+ * There is only one mcast per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX];
+
+static u32
+adj_get_mcast_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_rewrite_mcast_node.index);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_rewrite_mcast_node.index);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+/*
+ * adj_mcast_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The mcast adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_mcast_add_or_lock (fib_protocol_t proto,
+ vnet_link_t link_type,
+ u32 sw_if_index)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ {
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST;
+ adj->ia_nh_proto = proto;
+ adj->ia_link = link_type;
+ adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
+ adj_lock(adj_get_index(adj));
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_mcast_node(proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
+
+ /*
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
+ */
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
+ adj_get_index(adj));
+ }
+ else
+ {
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+ adj_lock(adj_get_index(adj));
+ }
+
+ return (adj_get_index(adj));
+}
+
+/**
+ * adj_mcast_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_update_rewrite (adj_index_t adj_index,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST,
+ adj_get_mcast_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ /*
+ * set the fields corresponding to the mcast IP address rewrite
+ * The mask must be stored in network byte order, since the packet's
+ * IP address will also be in network order.
+ */
+ adj->rewrite_header.dst_mcast_offset = offset;
+ adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
+}
+
+/**
+ * adj_mcast_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_midchain_update_rewrite (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
+ adj_midchain_setup(adj_index, fixup, flags);
+
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+ adj_get_mcast_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+
+ /*
+ * set the fields corresponding to the mcast IP address rewrite
+ * The mask must be stored in network byte order, since the packet's
+ * IP address will also be in network order.
+ */
+ adj->rewrite_header.dst_mcast_offset = offset;
+ adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
+}
+
+void
+adj_mcast_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_mcasts[proto]));
+
+ adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_mcast_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each mcast on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+ ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_mcast_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_mcast_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interfaces on the HW
+ */
+ uword sw_flags;
+
+ sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+ 0);
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_mcast_hw_sw_interface_state_change,
+ (void*) sw_flags);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ adj_mcast_hw_interface_state_change);
+
+static clib_error_t *
+adj_mcast_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each mcast on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+ ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete);
+
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+void
+adj_mcast_walk (u32 sw_if_index,
+ fib_protocol_t proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (vec_len(adj_mcasts[proto]) > sw_if_index)
+ {
+ if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+ {
+ cb(adj_mcasts[proto][sw_if_index], ctx);
+ }
+ }
+}
+
+u8*
+format_adj_mcast (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format(s, "%U-mcast: ",
+ format_fib_protocol, adj->ia_nh_proto);
+ if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)
+ s = format(s, "[features] ");
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+u8*
+format_adj_mcast_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format(s, "%U-mcast-midchain: ",
+ format_fib_protocol, adj->ia_nh_proto);
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header,
+ sizeof (adj->rewrite_data), 0);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_mcast_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_mcast,
+};
+const static dpo_vft_t adj_mcast_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_mcast_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ * object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_ip4_nodes[] =
+{
+ "ip4-rewrite-mcast",
+ NULL,
+};
+const static char* const adj_mcast_ip6_nodes[] =
+{
+ "ip6-rewrite-mcast",
+ NULL,
+};
+
+const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = adj_mcast_ip4_nodes,
+ [DPO_PROTO_IP6] = adj_mcast_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ * object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_midchain_ip4_nodes[] =
+{
+ "ip4-mcast-midchain",
+ NULL,
+};
+const static char* const adj_mcast_midchain_ip6_nodes[] =
+{
+ "ip6-mcast-midchain",
+ NULL,
+};
+
+const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = adj_mcast_midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = adj_mcast_midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+/**
+ * @brief Return the size of the adj DB.
+ * This is only for testing purposes so an efficient implementation is not needed
+ */
+u32
+adj_mcast_db_size (void)
+{
+ u32 n_adjs, sw_if_index;
+ fib_protocol_t proto;
+
+ n_adjs = 0;
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ for (sw_if_index = 0;
+ sw_if_index < vec_len(adj_mcasts[proto]);
+ sw_if_index++)
+ {
+ if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+ {
+ n_adjs++;
+ }
+ }
+ }
+
+ return (n_adjs);
+}
+
+void
+adj_mcast_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MCAST,
+ &adj_mcast_dpo_vft,
+ adj_mcast_nodes);
+ dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN,
+ &adj_mcast_midchain_dpo_vft,
+ adj_mcast_midchain_nodes);
+}
diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h
new file mode 100644
index 00000000..bfb0d6f6
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Mcast Adjacency
+ *
+ * The multicast adjacency forwards IP traffic on an interface toward a multicast
+ * group address. This is a different type of adjacency to a unicast adjacency
+ * since the application of the MAC header is different, and so the VLIB node
+ * visited is also different. DPO types have different VLIB nodes.
+ */
+
+#ifndef __ADJ_MCAST_H__
+#define __ADJ_MCAST_H__
+
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_midchain.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing mcast adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to mcast
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param sw_if_index
+ * The interface on which to mcast
+ */
+extern adj_index_t adj_mcast_add_or_lock(fib_protocol_t proto,
+ vnet_link_t link_type,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ *
+ * @param
+ * The offset in the rewrite a which to write in packet's
+ * IP Address
+ *
+ * @param
+ * The mask to apply to the packet berfore the rewrite.
+ */
+extern void adj_mcast_update_rewrite(adj_index_t adj_index,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask);
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny and
+ * Convert the adjacency into a midchain
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_mcast_midchain_update_rewrite(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask);
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+extern void adj_mcast_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Format/display a mcast adjacency.
+ */
+extern u8* format_adj_mcast(u8* s, va_list *ap);
+extern u8* format_adj_mcast_midchain(u8* s, va_list *ap);
+
+/**
+ * @brief Get the sze of the mcast adj DB. Test purposes only.
+ */
+extern u32 adj_mcast_db_size(void);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_mcast_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
new file mode 100644
index 00000000..e9a510b0
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_nsh.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/fib/fib_walk.h>
+
+/**
+ * The two midchain tx feature node indices
+ */
+static u32 adj_midchain_tx_feature_node[VNET_LINK_NUM];
+static u32 adj_midchain_tx_no_count_feature_node[VNET_LINK_NUM];
+
+/**
+ * @brief Trace data for packets traversing the midchain tx node
+ */
+typedef struct adj_midchain_tx_trace_t_
+{
+ /**
+ * @brief the midchain adj we are traversing
+ */
+ adj_index_t ai;
+} adj_midchain_tx_trace_t;
+
+always_inline uword
+adj_midchain_tx_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int interface_count)
+{
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ u32 next_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 thread_index = vm->thread_index;
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next > 4)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 bi1, adj_index1, next1;
+ const ip_adjacency_t * adj1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+ u32 bi2, adj_index2, next2;
+ const ip_adjacency_t * adj2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 bi3, adj_index3, next3;
+ const ip_adjacency_t * adj3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p4, * p5;
+ vlib_buffer_t * p6, * p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+ bi2 = from[2];
+ to_next[2] = bi2;
+ bi3 = from[3];
+ to_next[3] = bi3;
+
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer(vm, bi0);
+ b1 = vlib_get_buffer(vm, bi1);
+ b2 = vlib_get_buffer(vm, bi2);
+ b3 = vlib_get_buffer(vm, bi3);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ adj_index2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+ adj_index3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ adj1 = adj_get(adj_index1);
+ adj2 = adj_get(adj_index2);
+ adj3 = adj_get(adj_index3);
+
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ dpo1 = &adj1->sub_type.midchain.next_dpo;
+ dpo2 = &adj2->sub_type.midchain.next_dpo;
+ dpo3 = &adj3->sub_type.midchain.next_dpo;
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+ next2 = dpo2->dpoi_next_node;
+ next3 = dpo3->dpoi_next_node;
+
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer(b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ vnet_buffer(b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj1->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b1));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj2->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj3->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b3));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->ai = adj_index1;
+ }
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->ai = adj_index2;
+ }
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->ai = adj_index3;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_adj_midchain_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
+
+ s = format(s, "adj-midchain:[%d]:%U", tr->ai,
+ format_ip_adjacency, tr->ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (s);
+}
+
+static uword
+adj_midchain_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 1));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = {
+ .function = adj_midchain_tx,
+ .name = "adj-midchain-tx",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+static uword
+adj_midchain_tx_no_count (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 0));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = {
+ .function = adj_midchain_tx_no_count,
+ .name = "adj-midchain-tx-no-count",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VNET_FEATURE_INIT (adj_midchain_tx_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_ETHERNET],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_nsh, static) = {
+ .arc_name = "nsh-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_NSH],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_nsh, static) = {
+ .arc_name = "nsh-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_NSH],
+};
+
+static inline u32
+adj_get_midchain_node (vnet_link_t link)
+{
+ switch (link) {
+ case VNET_LINK_IP4:
+ return (ip4_midchain_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_midchain_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_midchain_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_midchain_node.index);
+ case VNET_LINK_NSH:
+ return (adj_nsh_midchain_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static u8
+adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+{
+ u8 arc = (u8) ~0;
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP4:
+ {
+ arc = ip4_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_IP6:
+ {
+ arc = ip6_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_MPLS:
+ {
+ arc = mpls_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ETHERNET:
+ {
+ arc = ethernet_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_NSH:
+ {
+ arc = nsh_main_dummy.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ARP:
+ ASSERT(0);
+ break;
+ }
+
+ ASSERT (arc != (u8) ~0);
+
+ return (arc);
+}
+
+static u32
+adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
+{
+ return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
+ adj_midchain_tx_no_count_node.index :
+ adj_midchain_tx_node.index);
+}
+
+static u32
+adj_nbr_midchain_get_feature_node (ip_adjacency_t *adj)
+{
+ if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT)
+ {
+ return (adj_midchain_tx_no_count_feature_node[adj->ia_link]);
+ }
+
+ return (adj_midchain_tx_feature_node[adj->ia_link]);
+}
+
+/**
+ * adj_midchain_setup
+ *
+ * Setup the adj as a mid-chain
+ */
+void
+adj_midchain_setup (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags)
+{
+ u32 feature_index, tx_node;
+ ip_adjacency_t *adj;
+ u8 arc_index;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ adj->sub_type.midchain.fixup_func = fixup;
+ adj->ia_flags |= flags;
+
+ arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj);
+ feature_index = adj_nbr_midchain_get_feature_node(adj);
+ tx_node = adj_nbr_midchain_get_tx_node(adj);
+
+ vnet_feature_enable_disable_with_index (arc_index, feature_index,
+ adj->rewrite_header.sw_if_index,
+ 1 /* enable */, 0, 0);
+
+ /*
+ * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
+ * The graph arc used/created here is from the midchain-tx node to the
+ * child's registered node. This is because post adj processing the next
+ * node are any output features, then the midchain-tx. from there we
+ * need to get to the stacked child's node.
+ */
+ dpo_stack_from_node(tx_node,
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
+ adj_midchain_setup(adj_index, fixup, flags);
+
+ /*
+ * update the rewirte with the workers paused.
+ */
+ adj_nbr_update_rewrite_internal(adj,
+ IP_LOOKUP_NEXT_MIDCHAIN,
+ adj_get_midchain_node(adj->ia_link),
+ adj_nbr_midchain_get_tx_node(adj),
+ rewrite);
+}
+
+/**
+ * adj_nbr_midchain_unstack
+ *
+ * Unstack the adj. stack it on drop
+ */
+void
+adj_nbr_midchain_unstack (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * stack on the drop
+ */
+ dpo_stack(DPO_ADJACENCY_MIDCHAIN,
+ vnet_link_to_dpo_proto(adj->ia_link),
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+ CLIB_MEMORY_BARRIER();
+}
+
+/**
+ * adj_nbr_midchain_stack
+ */
+void
+adj_nbr_midchain_stack (adj_index_t adj_index,
+ const dpo_id_t *next)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) ||
+ (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index));
+
+ dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
+ &adj->sub_type.midchain.next_dpo,
+ next);
+}
+
+u8*
+format_adj_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ u32 indent = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, " %U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), indent);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a midchain
+ * object.
+ *
+ * this means that these graph nodes are ones from which a midchain is the
+ * parent object in the DPO-graph.
+ */
+const static char* const midchain_ip4_nodes[] =
+{
+ "ip4-midchain",
+ NULL,
+};
+const static char* const midchain_ip6_nodes[] =
+{
+ "ip6-midchain",
+ NULL,
+};
+const static char* const midchain_mpls_nodes[] =
+{
+ "mpls-midchain",
+ NULL,
+};
+const static char* const midchain_ethernet_nodes[] =
+{
+ "adj-l2-midchain",
+ NULL,
+};
+const static char* const midchain_nsh_nodes[] =
+{
+ "adj-nsh-midchain",
+ NULL,
+};
+
+const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = midchain_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes,
+ [DPO_PROTO_NSH] = midchain_nsh_nodes,
+};
+
+void
+adj_midchain_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
+}
diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h
new file mode 100644
index 00000000..27ca1d33
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Midchain Adjacency sub-type. These adjs represent an L3 peer on a
+ * tunnel interface. The tunnel's adjacency is thus not the end of the chain,
+ * and needs to stack on/link to another chain (or portion of the graph) to
+ * reach the tunnel's destination.
+ */
+
+#ifndef __ADJ_MIDCHAIN_H__
+#define __ADJ_MIDCHAIN_H__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief
+ * Convert an existing neighbour adjacency into a midchain
+ *
+ * @param adj_index
+ * The index of the neighbour adjacency.
+ *
+ * @param post_rewrite_node
+ * The VLIB graph node that provides the post-encap fixup.
+ * where 'fixup' is e.g., correcting chksum, length, etc.
+ *
+ * @param rewrite
+ * The rewrite.
+ */
+extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * [re]stack a midchain. 'Stacking' is the act of forming parent-child
+ * relationships in the data-plane graph.
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ *
+ * @param dpo
+ * The parent DPO to stack onto (i.e. become a child of).
+ */
+extern void adj_nbr_midchain_stack(adj_index_t adj_index,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * unstack a midchain. This will break the chain between the midchain and
+ * the next graph section. This is a implemented as stack-on-drop
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ */
+extern void adj_nbr_midchain_unstack(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_midchain_module_init(void);
+
+/**
+ * @brief
+ * Format a midchain adjacency
+ */
+extern u8* format_adj_midchain(u8* s, va_list *ap);
+
+#endif
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
new file mode 100644
index 00000000..3d450d1f
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.c
@@ -0,0 +1,1124 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * Vector Hash tables of neighbour (traditional) adjacencies
+ * Key: interface(for the vector index), address (and its proto),
+ * link-type/ether-type.
+ */
+static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
+
+// FIXME SIZE APPROPRIATELY. ASK DAVEB.
+#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
+#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+
+#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
+{ \
+ _key.key[0] = (_nh)->as_u64[0]; \
+ _key.key[1] = (_nh)->as_u64[1]; \
+ _key.key[2] = (_lt); \
+}
+
+#define ADJ_NBR_ITF_OK(_proto, _itf) \
+ (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
+ (NULL != adj_nbr_tables[_proto][sw_if_index]))
+
+static void
+adj_nbr_insert (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ adj_index_t adj_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
+ {
+ vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
+ }
+ if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
+ {
+ adj_nbr_tables[nh_proto][sw_if_index] =
+ clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
+ CLIB_CACHE_LINE_BYTES);
+ memset(adj_nbr_tables[nh_proto][sw_if_index],
+ 0,
+ sizeof(BVT(clib_bihash)));
+
+ BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
+ "Adjacency Neighbour table",
+ ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+ }
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = adj_index;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
+}
+
+void
+adj_nbr_remove (adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = ai;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
+}
+
+static adj_index_t
+adj_nbr_find (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return (ADJ_INDEX_INVALID);
+
+ if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
+ &kv, &kv) < 0)
+ {
+ return (ADJ_INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static inline u32
+adj_get_nd_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_arp_node.index);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_discover_neighbor_node.index);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (ip4_arp_node.index);
+}
+
+/**
+ * @brief Check and set feature flags if o/p interface has any o/p features.
+ */
+static void
+adj_nbr_evaluate_feature (adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+ vnet_feature_main_t *fm = &feature_main;
+ i16 feature_count;
+ u8 arc_index;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP4:
+ arc_index = ip4_main.lookup_main.output_feature_arc_index;
+ break;
+ case VNET_LINK_IP6:
+ arc_index = ip6_main.lookup_main.output_feature_arc_index;
+ break;
+ case VNET_LINK_MPLS:
+ arc_index = mpls_main.output_feature_arc_index;
+ break;
+ default:
+ return;
+ }
+
+ sw_if_index = adj->rewrite_header.sw_if_index;
+ if (vec_len(fm->feature_count_by_sw_if_index[arc_index]) > sw_if_index)
+ {
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+ if (feature_count > 0)
+ adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
+ }
+
+ return;
+}
+
+static ip_adjacency_t*
+adj_nbr_alloc (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj_nbr_insert(nh_proto, link_type, nh_addr,
+ sw_if_index,
+ adj_get_index(adj));
+
+ /*
+ * since we just added the ADJ we have no rewrite string for it,
+ * so its for ARP
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ adj->sub_type.nbr.next_hop = *nh_addr;
+ adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+
+ adj_nbr_evaluate_feature (adj_get_index(adj));
+ return (adj);
+}
+
+/*
+ * adj_nbr_add_or_lock
+ *
+ * Add an adjacency for the neighbour requested.
+ *
+ * The key for an adj is:
+ * - the Next-hops protocol (i.e. v4 or v6)
+ * - the address of the next-hop
+ * - the interface the next-hop is reachable through
+ */
+adj_index_t
+adj_nbr_add_or_lock (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj_index = adj_get_index(adj);
+ adj_lock(adj_index);
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_nd_node(nh_proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
+
+ /*
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
+ */
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
+ }
+ else
+ {
+ adj_lock(adj_index);
+ }
+
+ return (adj_index);
+}
+
+adj_index_t
+adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ }
+ else
+ {
+ adj = adj_get(adj_index);
+ }
+
+ adj_lock(adj_get_index(adj));
+ adj_nbr_update_rewrite(adj_get_index(adj),
+ ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ rewrite);
+
+ return (adj_get_index(adj));
+}
+
+/**
+ * adj_nbr_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite (adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
+ {
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
+ adj_get_rewrite_node(adj->ia_link),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+ else
+ {
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
+ adj_get_nd_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+}
+
+/**
+ * adj_nbr_update_rewrite_internal
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ ip_lookup_next_t adj_next_index,
+ u32 this_node,
+ u32 next_node,
+ u8 *rewrite)
+{
+ ip_adjacency_t *walk_adj;
+ adj_index_t walk_ai;
+ vlib_main_t * vm;
+ u32 old_next;
+ int do_walk;
+
+ vm = vlib_get_main();
+ old_next = adj->lookup_next_index;
+
+ walk_ai = adj_get_index(adj);
+ if (VNET_LINK_MPLS == adj->ia_link)
+ {
+ /*
+ * The link type MPLS has no children in the control plane graph, it only
+ * has children in the data-palne graph. The backwalk is up the former.
+ * So we need to walk from its IP cousin.
+ */
+ walk_ai = adj_nbr_find(adj->ia_nh_proto,
+ fib_proto_to_link(adj->ia_nh_proto),
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ }
+
+ /*
+ * Don't call the walk re-entrantly
+ */
+ if (ADJ_INDEX_INVALID != walk_ai)
+ {
+ walk_adj = adj_get(walk_ai);
+ if (ADJ_FLAG_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
+ {
+ do_walk = 0;
+ }
+ else
+ {
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ walk_adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
+ do_walk = 1;
+ }
+ }
+ else
+ {
+ do_walk = 0;
+ }
+
+ /*
+ * lock the adjacencies that are affected by updates this walk will provoke.
+ * Since the aim of the walk is to update children to link to a different
+ * DPO, this adj will no longer be in use and its lock count will drop to 0.
+ * We don't want it to be deleted as part of this endevour.
+ */
+ adj_lock(adj_get_index(adj));
+ adj_lock(walk_ai);
+
+ /*
+ * Updating a rewrite string is not atomic;
+ * - the rewrite string is too long to write in one instruction
+ * - when swapping from incomplete to complete, we also need to update
+ * the VLIB graph next-index of the adj.
+ * ideally we would only want to suspend forwarding via this adj whilst we
+ * do this, but we do not have that level of granularity - it's suspend all
+ * worker threads or nothing.
+ * The other chioces are:
+ * - to mark the adj down and back walk so child load-balances drop this adj
+ * from the set.
+ * - update the next_node index of this adj to point to error-drop
+ * both of which will mean for MAC change we will drop for this adj
+ * which is not acceptable. However, when the adj changes type (from
+ * complete to incomplete and vice-versa) the child DPOs, which have the
+ * VLIB graph next node index, will be sending packets to the wrong graph
+ * node. So from the options above, updating the next_node of the adj to
+ * be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
+ * arp/midchain always be valid w.r.t. a mis-match of adj type and node type
+ * (i.e. a rewrite adj in the arp node). This is not enforcable. Getting it
+ * wrong will lead to hard to find bugs since its a race condition. So we
+ * choose the more reliable method of updating the children to use the drop,
+ * then switching adj's type, then updating the children again. Did I mention
+ * that this doesn't happen often...
+ * So we need to distinguish between the two cases:
+ * 1 - mac change
+ * 2 - adj type change
+ */
+ if (do_walk &&
+ old_next != adj_next_index &&
+ ADJ_INDEX_INVALID != walk_ai)
+ {
+ /*
+ * the adj is changing type. we need to fix all children so that they
+ * stack momentarily on a drop, while the adj changes. If we don't do
+ * this the children will send packets to a VLIB graph node that does
+ * not correspond to the adj's type - and it goes downhill from there.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
+ /*
+ * force this walk to be synchrous. if we don't and a node in the graph
+ * (a heavily shared path-list) chooses to back-ground the walk (make it
+ * async) then it will pause and we will do the adj update below, before
+ * all the children are updated. not good.
+ */
+ .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+
+ /*
+ * If we are just updating the MAC string of the adj (which we also can't
+ * do atomically), then we need to stop packets switching through the adj.
+ * We can't do that on a per-adj basis, so it's all the packets.
+ * If we are updating the type, and we walked back to the children above,
+ * then this barrier serves to flush the queues/frames.
+ */
+ vlib_worker_thread_barrier_sync(vm);
+
+ adj->lookup_next_index = adj_next_index;
+
+ if (NULL != rewrite)
+ {
+ /*
+ * new rewrite provided.
+ * fill in the adj's rewrite string, and build the VLIB graph arc.
+ */
+ vnet_rewrite_set_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data),
+ rewrite,
+ vec_len(rewrite));
+ vec_free(rewrite);
+ }
+ else
+ {
+ vnet_rewrite_clear_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data));
+ }
+ adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
+ this_node,
+ next_node);
+
+ /*
+ * done with the rewirte update - let the workers loose.
+ */
+ vlib_worker_thread_barrier_release(vm);
+
+ if (do_walk &&
+ (old_next != adj->lookup_next_index) &&
+ (ADJ_INDEX_INVALID != walk_ai))
+ {
+ /*
+ * backwalk to the children so they can stack on the now updated
+ * adjacency
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ if (do_walk)
+ {
+ walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
+ }
+
+ adj_unlock(adj_get_index(adj));
+ adj_unlock(walk_ai);
+}
+
+typedef struct adj_db_count_ctx_t_ {
+ u64 count;
+} adj_db_count_ctx_t;
+
+static void
+adj_db_count (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_db_count_ctx_t * ctx = arg;
+ ctx->count++;
+}
+
+u32
+adj_nbr_db_size (void)
+{
+ adj_db_count_ctx_t ctx = {
+ .count = 0,
+ };
+ fib_protocol_t proto;
+ u32 sw_if_index = 0;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ if (NULL != adj_nbr_tables[proto][sw_if_index])
+ {
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_db_count,
+ &ctx);
+ }
+ }
+ }
+ return (ctx.count);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+} adj_walk_ctx_t;
+
+static void
+adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_walk_ctx_t *ctx = arg;
+
+ // FIXME: can't stop early...
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+void
+adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_cb,
+ &awc);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_nh_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+ const ip46_address_t *awc_nh;
+} adj_walk_nh_ctx_t;
+
+static void
+adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ ip_adjacency_t *adj;
+ adj_walk_nh_ctx_t *ctx = arg;
+
+ adj = adj_get(kvp->value);
+
+ if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh))
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip4 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * Flags associated with the interface state walks
+ */
+typedef enum adj_nbr_interface_flags_t_
+{
+ ADJ_NBR_INTERFACE_UP = (1 << 0),
+} adj_nbr_interface_flags_t;
+
+/**
+ * Context for the state change walk of the DB
+ */
+typedef struct adj_nbr_interface_state_change_ctx_t_
+{
+ /**
+ * Flags on the interface
+ */
+ adj_nbr_interface_flags_t flags;
+} adj_nbr_interface_state_change_ctx_t;
+
+static adj_walk_rc_t
+adj_nbr_interface_state_change_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface state has changed. Do this synchronously
+ * since this is the walk that provides convergence
+ */
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = ((ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ /*
+ * the force sync applies only as far as the first fib_entry.
+ * And it's the fib_entry's we need to converge away from
+ * the adjacencies on the now down link
+ */
+ .fnbw_flags = (!(ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_FLAG_FORCE_SYNC :
+ 0),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * @brief Registered function for SW interface state changes
+ */
+static clib_error_t *
+adj_nbr_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ &ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_sw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ ctx);
+ }
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_nbr_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interface on the HW
+ */
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ &ctx);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_hw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+static adj_walk_rc_t
+adj_nbr_interface_delete_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface has been deleted.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * adj_nbr_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+adj_nbr_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ fib_protocol_t proto;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_delete_one,
+ NULL);
+ }
+
+ return (NULL);
+
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
+
+
+static adj_walk_rc_t
+adj_nbr_show_one (adj_index_t ai,
+ void *arg)
+{
+ vlib_cli_output (arg, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static clib_error_t *
+adj_nbr_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else if (~0 != sw_if_index)
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ else
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * Show all neighbour adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj nbr}
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show adj nbr",
+ .short_help = "show adj nbr [<adj_index>] [interface]",
+ .function = adj_nbr_show,
+};
+
+static ip46_type_t
+adj_proto_to_46 (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (IP46_TYPE_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (IP46_TYPE_IP6);
+ default:
+ return (IP46_TYPE_IP4);
+ }
+ return (IP46_TYPE_IP4);
+}
+
+u8*
+format_adj_nbr_incomplete (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "arp-%U", format_vnet_link, adj->ia_link);
+ s = format (s, ": via %U",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index));
+
+ return (s);
+}
+
+u8*
+format_adj_nbr (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+static void
+adj_mem_show (void)
+{
+ fib_show_memory_usage("Adjacency",
+ pool_elts(adj_pool),
+ pool_len(adj_pool),
+ sizeof(ip_adjacency_t));
+}
+
+const static dpo_vft_t adj_nbr_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr,
+ .dv_mem_show = adj_mem_show,
+};
+const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr_incomplete,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
+ * object.
+ *
+ * this means that these graph nodes are ones from which a nbr is the
+ * parent object in the DPO-graph.
+ */
+const static char* const nbr_ip4_nodes[] =
+{
+ "ip4-rewrite",
+ NULL,
+};
+const static char* const nbr_ip6_nodes[] =
+{
+ "ip6-rewrite",
+ NULL,
+};
+const static char* const nbr_mpls_nodes[] =
+{
+ "mpls-output",
+ NULL,
+};
+const static char* const nbr_ethernet_nodes[] =
+{
+ "adj-l2-rewrite",
+ NULL,
+};
+const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes,
+};
+
+const static char* const nbr_incomplete_ip4_nodes[] =
+{
+ "ip4-arp",
+ NULL,
+};
+const static char* const nbr_incomplete_ip6_nodes[] =
+{
+ "ip6-discover-neighbor",
+ NULL,
+};
+const static char* const nbr_incomplete_mpls_nodes[] =
+{
+ "mpls-adj-incomplete",
+ NULL,
+};
+
+const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
+};
+
+void
+adj_nbr_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY,
+ &adj_nbr_dpo_vft,
+ nbr_nodes);
+ dpo_register(DPO_ADJACENCY_INCOMPLETE,
+ &adj_nbr_incompl_dpo_vft,
+ nbr_incomplete_nodes);
+}
diff --git a/src/vnet/adj/adj_nbr.h b/src/vnet/adj/adj_nbr.h
new file mode 100644
index 00000000..293766b8
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * Neighbour Adjacency sub-type. These adjs represent an L3 peer on a
+ * connected link.
+ */
+
+#ifndef __ADJ_NBR_H__
+#define __ADJ_NBR_H__
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ */
+extern adj_index_t adj_nbr_add_or_lock(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite);
+/**
+ * @brief When adding a rewrite to an adjacency these are flags that
+ * apply to that rewrite
+ */
+typedef enum adj_nbr_rewrite_flag_t_
+{
+ ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is incomplete, i.e. that it describes the
+ * ARP/ND rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is complete, i.e. that it fully describes
+ * the link-layer addressing for the desintation.
+ * The opposite of this is an incomplete rewrite that describes the ARP/ND
+ * rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0),
+} adj_nbr_rewrite_flag_t;
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * Format aa incomplete neigbour (ARP) adjacency
+ */
+extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Format a neigbour (REWRITE) adjacency
+ */
+extern u8* format_adj_nbr(u8* s, va_list *ap);
+
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface
+ */
+extern void adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_nbr_module_init(void);
+
+/**
+ * @brief
+ * Return the size of the adjacency database. for testing purposes
+ */
+extern u32 adj_nbr_db_size(void);
+
+#endif
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
new file mode 100644
index 00000000..128570b0
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_nsh.h>
+#include <vnet/ip/ip.h>
+
+nsh_main_dummy_t nsh_main_dummy;
+
+/**
+ * @brief Trace data for a NSH Midchain
+ */
+typedef struct adj_nsh_trace_t_ {
+ /** Adjacency index taken. */
+ u32 adj_index;
+} adj_nsh_trace_t;
+
+static u8 *
+format_adj_nsh_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_nsh_trace_t * t = va_arg (*args, adj_nsh_trace_t *);
+
+ s = format (s, "adj-idx %d : %U",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ return s;
+}
+
+typedef enum adj_nsh_rewrite_next_t_
+{
+ ADJ_NSH_REWRITE_NEXT_DROP,
+} adj_gpe_rewrite_next_t;
+
+always_inline uword
+adj_nsh_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_midchain)
+{
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ u32 thread_index = vlib_get_thread_index();
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ char *h0;
+ u32 pi0, rw_len0, adj_index0, next0 = 0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ h0 = vlib_buffer_get_current (p0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = adj_get (adj_index0);
+
+ /* Guess we are only writing on simple IP4 header. */
+ vnet_rewrite_one_header(adj0[0], h0, sizeof(ip4_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+
+ vlib_increment_combined_counter(&adjacency_counters,
+ thread_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes)))
+ {
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /*
+ * Follow the feature ARC. this will result eventually in
+ * the midchain-tx node
+ */
+ vnet_feature_arc_start (nsh_main_dummy.output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ else
+ {
+ /* can't fragment NSH */
+ next0 = ADJ_NSH_REWRITE_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_nsh_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+adj_nsh_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_nsh_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_nsh_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_nsh_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_nsh_rewrite_node) = {
+ .function = adj_nsh_rewrite,
+ .name = "adj-nsh-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_nsh_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_rewrite_node, adj_nsh_rewrite)
+
+VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
+ .function = adj_nsh_midchain,
+ .name = "adj-nsh-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_nsh_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_midchain_node, adj_nsh_midchain)
+
+/* Built-in ip4 tx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (nsh_output, static) =
+{
+ .arc_name = "nsh-output",
+ .start_nodes = VNET_FEATURES ("adj-nsh-midchain"),
+ .arc_index_ptr = &nsh_main_dummy.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (nsh_tx_drop, static) =
+{
+ .arc_name = "nsh-output",
+ .node_name = "error-drop",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_nsh.h b/src/vnet/adj/adj_nsh.h
new file mode 100644
index 00000000..5501fbb9
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_NSH_H__
+#define __ADJ_NSH_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_nsh_midchain_node;
+extern vlib_node_registration_t adj_nsh_rewrite_node;
+
+typedef struct _nsh_main_dummy
+{
+ u8 output_feature_arc_index;
+} nsh_main_dummy_t;
+
+extern nsh_main_dummy_t nsh_main_dummy;
+
+#endif
diff --git a/src/vnet/adj/adj_types.h b/src/vnet/adj/adj_types.h
new file mode 100644
index 00000000..cf90c084
--- /dev/null
+++ b/src/vnet/adj/adj_types.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_TYPES_H__
+#define __ADJ_TYPES_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an adi_index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo bar(adj_index_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 adj_index_t;
+
+/**
+ * @brief Invalid ADJ index - used when no adj is known
+ * likewise blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define ADJ_INDEX_INVALID ((u32)~0)
+
+/**
+ * @brief return codes from a adjacency walker callback function
+ */
+typedef enum adj_walk_rc_t_
+{
+ ADJ_WALK_RC_STOP,
+ ADJ_WALK_RC_CONTINUE,
+} adj_walk_rc_t;
+
+/**
+ * @brief Call back function when walking adjacencies
+ */
+typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c
new file mode 100644
index 00000000..47fb74df
--- /dev/null
+++ b/src/vnet/adj/rewrite.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * rewrite.c: packet rewrite
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/lookup.h>
+
+void
+vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0,
+ vnet_rewrite_data_t * rw0,
+ word n_left, uword most_likely_size)
+{
+ uword n_done =
+ round_pow2 (most_likely_size, sizeof (rw0[0])) / sizeof (rw0[0]);
+
+ p0 -= n_done;
+ rw0 -= n_done;
+
+ /* As we enter the cleanup loop, p0 and rw0 point to the last chunk written
+ by the fast path. Hence, the constant 1, which the
+ vnet_rewrite_copy_one macro renders as p0[-1] = rw0[-1]. */
+
+ while (n_left > 0)
+ {
+ vnet_rewrite_copy_one (p0, rw0, 1);
+ p0--;
+ rw0--;
+ n_left--;
+ }
+}
+
+u8 *
+format_vnet_rewrite (u8 * s, va_list * args)
+{
+ vnet_rewrite_header_t *rw = va_arg (*args, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*args, u32);
+ CLIB_UNUSED (uword indent) = va_arg (*args, u32);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (rw->sw_if_index != ~0)
+ {
+ vnet_sw_interface_t *si;
+ si = vnet_get_sw_interface_safe (vnm, rw->sw_if_index);
+ if (NULL != si)
+ s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si);
+ else
+ s = format (s, "DELETED:%d", rw->sw_if_index);
+ }
+
+ /* Format rewrite string. */
+ if (rw->data_bytes > 0)
+
+ s = format (s, "%U",
+ format_hex_bytes,
+ rw->data + max_data_bytes - rw->data_bytes, rw->data_bytes);
+
+ return s;
+}
+
+u32
+vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (hw->output_node_index);
+}
+
+void
+vnet_rewrite_init (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
+{
+ rw->sw_if_index = sw_if_index;
+ rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
+ rw->max_l3_packet_bytes =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+}
+
+void
+vnet_rewrite_for_sw_interface (vnet_main_t * vnm,
+ vnet_link_t link_type,
+ u32 sw_if_index,
+ u32 node_index,
+ void *dst_address,
+ vnet_rewrite_header_t * rw,
+ u32 max_rewrite_bytes)
+{
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+ u8 *rewrite = NULL;
+
+ vnet_rewrite_init (vnm, sw_if_index, node_index,
+ vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
+ rw);
+
+ ASSERT (hc->build_rewrite);
+ rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address);
+
+ ASSERT (vec_len (rewrite) < max_rewrite_bytes);
+ vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite,
+ vec_len (rewrite));
+ vec_free (rewrite);
+}
+
+void
+vnet_rewrite_for_tunnel (vnet_main_t * vnm,
+ u32 tx_sw_if_index,
+ u32 rewrite_node_index,
+ u32 post_rewrite_node_index,
+ vnet_rewrite_header_t * rw,
+ u8 * rewrite_data, u32 rewrite_length)
+{
+ ip_adjacency_t *adj = 0;
+ /*
+ * Installed into vnet_buffer(b)->sw_if_index[VLIB_TX] e.g.
+ * by ip4_rewrite_inline. If the post-rewrite node injects into
+ * ipX-forward, this will be interpreted as a FIB number.
+ */
+ rw->sw_if_index = tx_sw_if_index;
+ rw->next_index = vlib_node_add_next (vnm->vlib_main, rewrite_node_index,
+ post_rewrite_node_index);
+ rw->max_l3_packet_bytes = (u16) ~ 0; /* we can't know at this point */
+
+ ASSERT (rewrite_length < sizeof (adj->rewrite_data));
+ /* Leave room for ethernet + VLAN tag */
+ vnet_rewrite_set_data_internal (rw, sizeof (adj->rewrite_data),
+ rewrite_data, rewrite_length);
+}
+
+void
+serialize_vnet_rewrite (serialize_main_t * m, va_list * va)
+{
+ vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*va, u32);
+ u8 *p;
+
+ serialize_integer (m, rw->sw_if_index, sizeof (rw->sw_if_index));
+ serialize_integer (m, rw->data_bytes, sizeof (rw->data_bytes));
+ serialize_integer (m, rw->max_l3_packet_bytes,
+ sizeof (rw->max_l3_packet_bytes));
+ p = serialize_get (m, rw->data_bytes);
+ clib_memcpy (p, vnet_rewrite_get_data_internal (rw, max_data_bytes),
+ rw->data_bytes);
+}
+
+void
+unserialize_vnet_rewrite (serialize_main_t * m, va_list * va)
+{
+ vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*va, u32);
+ u8 *p;
+
+ /* It is up to user to fill these in. */
+ rw->next_index = ~0;
+
+ unserialize_integer (m, &rw->sw_if_index, sizeof (rw->sw_if_index));
+ unserialize_integer (m, &rw->data_bytes, sizeof (rw->data_bytes));
+ unserialize_integer (m, &rw->max_l3_packet_bytes,
+ sizeof (rw->max_l3_packet_bytes));
+ p = unserialize_get (m, rw->data_bytes);
+ clib_memcpy (vnet_rewrite_get_data_internal (rw, max_data_bytes), p,
+ rw->data_bytes);
+}
+
+u8 *
+vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->build_rewrite);
+ return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address));
+}
+
+
+void
+vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->update_adjacency);
+ hc->update_adjacency (vnm, sw_if_index, ai);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
new file mode 100644
index 00000000..1dea72f5
--- /dev/null
+++ b/src/vnet/adj/rewrite.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * rewrite.h: packet rewrite
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_rewrite_h
+#define included_vnet_rewrite_h
+
+#include <vlib/vlib.h>
+#include <vnet/l3_types.h>
+
+/* Consider using vector types for speed? */
+typedef uword vnet_rewrite_data_t;
+
+/**
+ * Flags associated with the rewrite/adjacency
+ */
+typedef enum vnet_rewrite_flags_t_
+{
+ /**
+ * This adjacency/interface has output features configured
+ */
+ VNET_REWRITE_HAS_FEATURES = (1 << 0),
+} __attribute__ ((packed)) vnet_rewrite_flags_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /* Interface to mark re-written packets with. */
+ u32 sw_if_index;
+
+ /* Next node to feed after packet rewrite is done. */
+ u16 next_index;
+
+ /* Number of bytes in rewrite data. */
+ u16 data_bytes;
+
+ /* Max packet size layer 3 (MTU) for output interface.
+ Used for MTU check after packet rewrite. */
+ u16 max_l3_packet_bytes;
+
+ /* Data-plane flags on the adjacency/rewrite */
+ vnet_rewrite_flags_t flags;
+
+ /* When dynamically writing a multicast destination L2 addresss
+ * this is the offset from the IP address at which to write in the
+ * IP->MAC address translation.
+ */
+ u8 dst_mcast_offset;
+
+ /* The mask to apply to the lower 4 bytes of the IP address before ORing
+ * into the destinaiton MAC address */
+ u32 dst_mcast_mask;
+
+ /* Rewrite string starting at end and going backwards. */
+ u8 data[0];
+}) vnet_rewrite_header_t;
+/* *INDENT-ON* */
+
+/**
+ * At 16 bytes of rewrite herader we have enought space left for a IPv6
+ * (40 bytes) + LISP-GPE (8 bytes) in the cache line
+ */
+STATIC_ASSERT (sizeof (vnet_rewrite_header_t) <= 16,
+ "Rewrite header too big");
+
+/*
+ Helper macro for declaring rewrite string w/ given max-size.
+
+ Typical usage:
+ typedef struct {
+ //
+ int a, b;
+
+ // Total adjacency is 64 bytes.
+ vnet_rewrite_declare(64 - 2*sizeof(int)) rw;
+ } my_adjacency_t;
+*/
+#define vnet_declare_rewrite(total_bytes) \
+struct { \
+ vnet_rewrite_header_t rewrite_header; \
+ \
+ u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)]; \
+}
+
+always_inline void
+vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+ /* Sanity check values carefully for this memset operation */
+ ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+
+ rw->data_bytes = 0;
+ memset (rw->data, 0xfe, max_size);
+}
+
+always_inline void
+vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
+ int max_size, void *data, int data_bytes)
+{
+ /* Sanity check values carefully for this memset operation */
+ ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+ ASSERT ((data_bytes >= 0) && (data_bytes < max_size));
+
+ rw->data_bytes = data_bytes;
+ clib_memcpy (rw->data + max_size - data_bytes, data, data_bytes);
+ memset (rw->data, 0xfe, max_size - data_bytes);
+}
+
+#define vnet_rewrite_set_data(rw,data,data_bytes) \
+ vnet_rewrite_set_data_internal (&((rw).rewrite_header), \
+ sizeof ((rw).rewrite_data), \
+ (data), \
+ (data_bytes))
+
+always_inline void *
+vnet_rewrite_get_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+ ASSERT (rw->data_bytes <= max_size);
+ return rw->data + max_size - rw->data_bytes;
+}
+
+#define vnet_rewrite_get_data(rw) \
+ vnet_rewrite_get_data_internal (&((rw).rewrite_header), sizeof ((rw).rewrite_data))
+
+always_inline void
+vnet_rewrite_copy_one (vnet_rewrite_data_t * p0, vnet_rewrite_data_t * rw0,
+ int i)
+{
+ p0[-i] = rw0[-i];
+}
+
+void vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0,
+ vnet_rewrite_data_t * rw0,
+ word n_left, uword most_likely_size);
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u64 a;
+ u32 b;
+ u16 c;
+}) eh_copy_t;
+/* *INDENT-ON* */
+
+always_inline void
+_vnet_rewrite_one_header (vnet_rewrite_header_t * h0,
+ void *packet0, int max_size, int most_likely_size)
+{
+ vnet_rewrite_data_t *p0 = packet0;
+ vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size);
+ word n_left0;
+
+ /* 0xfefe => poisoned adjacency => crash */
+ ASSERT (h0->data_bytes != 0xfefe);
+
+ if (PREDICT_TRUE (h0->data_bytes == sizeof (eh_copy_t)))
+ {
+ eh_copy_t *s, *d;
+ s = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t));
+ d = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t));
+ clib_memcpy (d, s, sizeof (eh_copy_t));
+ return;
+ }
+
+
+#define _(i) \
+ do { \
+ if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \
+ vnet_rewrite_copy_one (p0, rw0, (i)); \
+ } while (0)
+
+ _(4);
+ _(3);
+ _(2);
+ _(1);
+
+#undef _
+
+ n_left0 = (int)
+ (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1))
+ / (int) sizeof (rw0[0]);
+ if (PREDICT_FALSE (n_left0 > 0))
+ vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size);
+}
+
+always_inline void
+_vnet_rewrite_two_headers (vnet_rewrite_header_t * h0,
+ vnet_rewrite_header_t * h1,
+ void *packet0,
+ void *packet1, int max_size, int most_likely_size)
+{
+ vnet_rewrite_data_t *p0 = packet0;
+ vnet_rewrite_data_t *p1 = packet1;
+ vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size);
+ vnet_rewrite_data_t *rw1 = (vnet_rewrite_data_t *) (h1->data + max_size);
+ word n_left0, n_left1;
+ int slow_path;
+
+ /* 0xfefe => poisoned adjacency => crash */
+ ASSERT (h0->data_bytes != 0xfefe);
+ ASSERT (h1->data_bytes != 0xfefe);
+
+ /* Arithmetic calculation: bytes0 == bytes1 == 14 */
+ slow_path = h0->data_bytes ^ h1->data_bytes;
+ slow_path += h0->data_bytes ^ sizeof (eh_copy_t);
+
+ if (PREDICT_TRUE (slow_path == 0))
+ {
+ eh_copy_t *s0, *d0, *s1, *d1;
+ s0 = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t));
+ d0 = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t));
+ clib_memcpy (d0, s0, sizeof (eh_copy_t));
+ s1 = (eh_copy_t *) (h1->data + max_size - sizeof (eh_copy_t));
+ d1 = (eh_copy_t *) (((u8 *) packet1) - sizeof (eh_copy_t));
+ clib_memcpy (d1, s1, sizeof (eh_copy_t));
+ return;
+ }
+
+#define _(i) \
+ do { \
+ if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \
+ { \
+ vnet_rewrite_copy_one (p0, rw0, (i)); \
+ vnet_rewrite_copy_one (p1, rw1, (i)); \
+ } \
+ } while (0)
+
+ _(4);
+ _(3);
+ _(2);
+ _(1);
+
+#undef _
+
+ n_left0 = (int)
+ (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1))
+ / (int) sizeof (rw0[0]);
+ n_left1 = (int)
+ (((int) h1->data_bytes - most_likely_size) + (sizeof (rw1[0]) - 1))
+ / (int) sizeof (rw1[0]);
+
+ if (PREDICT_FALSE (n_left0 > 0 || n_left1 > 0))
+ {
+ vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size);
+ vnet_rewrite_copy_slow_path (p1, rw1, n_left1, most_likely_size);
+ }
+}
+
+#define vnet_rewrite_one_header(rw0,p0,most_likely_size) \
+ _vnet_rewrite_one_header (&((rw0).rewrite_header), (p0), \
+ sizeof ((rw0).rewrite_data), \
+ (most_likely_size))
+
+#define vnet_rewrite_two_headers(rw0,rw1,p0,p1,most_likely_size) \
+ _vnet_rewrite_two_headers (&((rw0).rewrite_header), &((rw1).rewrite_header), \
+ (p0), (p1), \
+ sizeof ((rw0).rewrite_data), \
+ (most_likely_size))
+
+always_inline void
+_vnet_fixup_one_header (vnet_rewrite_header_t * h0,
+ u8 * addr, u32 addr_len, u8 * packet0)
+{
+ if (PREDICT_TRUE (h0->dst_mcast_mask))
+ {
+ /* location to write to in the packet */
+ u8 *p0 = packet0 - h0->dst_mcast_offset;
+ u32 *p1 = (u32 *) p0;
+ /* location to copy from in the L3 dest address */
+ u32 *a0 = (u32 *) (addr + addr_len - sizeof (h0->dst_mcast_mask));
+
+ *p1 |= (*a0 & h0->dst_mcast_mask);
+ }
+}
+
+#define vnet_fixup_one_header(rw0,addr,p0) \
+ _vnet_fixup_one_header (&((rw0).rewrite_header), \
+ (u8*)(addr), sizeof((*addr)), \
+ (u8*)(p0))
+
+#define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0)
+/** Deprecated */
+void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+ vnet_link_t packet_type,
+ u32 sw_if_index,
+ u32 node_index,
+ void *dst_address,
+ vnet_rewrite_header_t * rw,
+ u32 max_rewrite_bytes);
+
+u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index);
+
+void vnet_rewrite_init (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ u32 this_node,
+ u32 next_node, vnet_rewrite_header_t * rw);
+
+u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ vnet_link_t packet_type,
+ const void *dst_address);
+void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index, u32 ai);
+
+format_function_t format_vnet_rewrite;
+
+serialize_function_t serialize_vnet_rewrite, unserialize_vnet_rewrite;
+
+#endif /* included_vnet_rewrite_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
new file mode 100644
index 00000000..22cfaee4
--- /dev/null
+++ b/src/vnet/api_errno.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_api_errno_h
+#define included_vnet_api_errno_h
+
+#define foreach_vnet_api_error \
+_(UNSPECIFIED, -1, "Unspecified Error") \
+_(INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \
+_(NO_SUCH_FIB, -3, "No such FIB / VRF") \
+_(NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \
+_(NO_SUCH_LABEL, -5, "No such label") \
+_(NO_SUCH_ENTRY, -6, "No such entry") \
+_(INVALID_VALUE, -7, "Invalid value") \
+_(INVALID_VALUE_2, -8, "Invalid value #2") \
+_(UNIMPLEMENTED, -9, "Unimplemented") \
+_(INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \
+_(SYSCALL_ERROR_1, -11, "System call error #1") \
+_(SYSCALL_ERROR_2, -12, "System call error #2") \
+_(SYSCALL_ERROR_3, -13, "System call error #3") \
+_(SYSCALL_ERROR_4, -14, "System call error #4") \
+_(SYSCALL_ERROR_5, -15, "System call error #5") \
+_(SYSCALL_ERROR_6, -16, "System call error #6") \
+_(SYSCALL_ERROR_7, -17, "System call error #7") \
+_(SYSCALL_ERROR_8, -18, "System call error #8") \
+_(SYSCALL_ERROR_9, -19, "System call error #9") \
+_(SYSCALL_ERROR_10, -20, "System call error #10") \
+_(FEATURE_DISABLED, -30, "Feature disabled by configuration") \
+_(INVALID_REGISTRATION, -31, "Invalid registration") \
+_(NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \
+_(UNKNOWN_DESTINATION, -51, "Unknown destination") \
+_(PREFIX_MATCHES_NEXT_HOP, -52, "Prefix matches next hop") \
+_(NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \
+_(NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \
+_(INVALID_VLAN, -55, "Invalid VLAN") \
+_(VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \
+_(INVALID_SRC_ADDRESS, -57, "Invalid src address") \
+_(INVALID_DST_ADDRESS, -58, "Invalid dst address") \
+_(ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \
+_(ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \
+_(ADDRESS_NOT_LINK_LOCAL, -61, "Address not link-local") \
+_(IP6_NOT_ENABLED, -62, "ip6 not enabled") \
+_(IN_PROGRESS, 10, "Operation in progress") \
+_(NO_SUCH_NODE, -63, "No such graph node") \
+_(NO_SUCH_NODE2, -64, "No such graph node #2") \
+_(NO_SUCH_TABLE, -65, "No such table") \
+_(NO_SUCH_TABLE2, -66, "No such table #2") \
+_(NO_SUCH_TABLE3, -67, "No such table #3") \
+_(SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \
+_(SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \
+_(INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \
+_(INVALID_INTERFACE, -71, "Invalid interface") \
+_(INVALID_VLAN_TAG_COUNT, -72, "Invalid number of tags for requested operation") \
+_(INVALID_ARGUMENT, -73, "Invalid argument") \
+_(UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \
+_(TUNNEL_EXIST, -75, "Tunnel already exists") \
+_(INVALID_DECAP_NEXT, -76, "Invalid decap-next") \
+_(RESPONSE_NOT_READY, -77, "Response not ready") \
+_(NOT_CONNECTED, -78, "Not connected to the data plane") \
+_(IF_ALREADY_EXISTS, -79, "Interface already exists") \
+_(BOND_SLAVE_NOT_ALLOWED, -80, "Operation not allowed on slave of BondEthernet") \
+_(VALUE_EXIST, -81, "Value already exists") \
+_(SAME_SRC_DST, -82, "Source and destination are the same") \
+_(IP6_MULTICAST_ADDRESS_NOT_PRESENT, -83, "IP6 multicast address required") \
+_(SR_POLICY_NAME_NOT_PRESENT, -84, "Segement routing policy name required") \
+_(NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
+_(ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
+_(UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
+_(FAILED_TO_ATTACH_TO_JAVA_THREAD, -88, "Failed to attach to Java thread") \
+_(INVALID_WORKER, -89, "Invalid worker thread") \
+_(LISP_DISABLED, -90, "LISP is disabled") \
+_(CLASSIFY_TABLE_NOT_FOUND, -91, "Classify table not found") \
+_(INVALID_EID_TYPE, -92, "Unsupported LSIP EID type") \
+_(CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
+_(INCORRECT_ADJACENCY_TYPE, -94, "Invalid adjacency type for this operation") \
+_(EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, "Operation would exceed configured capacity of ranges") \
+_(EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, "Operation would exceed capacity of number of ports") \
+_(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \
+_(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \
+_(TABLE_TOO_BIG, -99, "Table too big") \
+_(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
+_(BFD_EEXIST, -101, "Duplicate BFD object") \
+_(BFD_ENOENT, -102, "No such BFD object") \
+_(BFD_EINUSE, -103, "BFD object in use") \
+_(BFD_NOTSUPP, -104, "BFD feature not supported") \
+_(ADDRESS_IN_USE, -105, "Address in use") \
+_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \
+_(QUEUE_FULL, -107, "Queue full") \
+_(UNKNOWN_URI_TYPE, -108, "Unknown URI type") \
+_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \
+_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \
+_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
+_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \
+_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
+_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
+_(SESSION_CONNECT_FAIL, -115, "Session failed to connect") \
+_(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \
+_(SVM_SEGMENT_CREATE_FAIL, -117, "svm segment create fail") \
+_(APPLICATION_NOT_ATTACHED, -118, "application not attached") \
+_(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \
+_(BD_IN_USE, -120, "Bridge domain has member interfaces") \
+_(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \
+_(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \
+_(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \
+_(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \
+_(INVALID_QUEUE, -125, "Invalid queue") \
+_(UNSUPPORTED, -126, "Unsupported") \
+_(DUPLICATE_IF_ADDRESS, -127, "Address already present on another interface")
+
+typedef enum
+{
+#define _(a,b,c) VNET_API_ERROR_##a = (b),
+ foreach_vnet_api_error
+#undef _
+ VNET_API_N_ERROR,
+} vnet_api_error_t;
+
+#endif /* included_vnet_api_errno_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api
new file mode 100644
index 00000000..7bcaa4c3
--- /dev/null
+++ b/src/vnet/bfd/bfd.api
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Set BFD echo source
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to use as echo source
+*/
+autoreply define bfd_udp_set_echo_source
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Delete BFD echo source
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define bfd_udp_del_echo_source
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Add UDP BFD session on interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param desired_min_tx - desired min transmit interval (microseconds)
+ @param required_min_rx - required min receive interval (microseconds)
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param detect_mult - detect multiplier (# of packets missed before connection goes down)
+ @param is_authenticated - non-zero if authentication is required
+ @param bfd_key_id - key id sent out in BFD packets (if is_authenticated)
+ @param conf_key_id - id of already configured key (if is_authenticated)
+*/
+autoreply define bfd_udp_add
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 desired_min_tx;
+ u32 required_min_rx;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 detect_mult;
+ u8 is_authenticated;
+ u8 bfd_key_id;
+ u32 conf_key_id;
+};
+
+/** \brief Modify UDP BFD session on interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param desired_min_tx - desired min transmit interval (microseconds)
+ @param required_min_rx - required min receive interval (microseconds)
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param detect_mult - detect multiplier (# of packets missed before connection goes down)
+*/
+autoreply define bfd_udp_mod
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 desired_min_tx;
+ u32 required_min_rx;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 detect_mult;
+};
+
+/** \brief Delete UDP BFD session on interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+*/
+autoreply define bfd_udp_del
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+};
+
+/** \brief Get all BFD sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define bfd_udp_session_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief BFD session details structure
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param state - session state
+ @param is_authenticated - non-zero if authentication in-use, zero otherwise
+ @param bfd_key_id - ID of key currently in-use if auth is on
+ @param conf_key_id - configured key ID for this session
+ @param required_min_rx - required min receive interval (microseconds)
+ @param desired_min_tx - desired min transmit interval (microseconds)
+ @param detect_mult - detect multiplier (# of packets missed before connection goes down)
+*/
+define bfd_udp_session_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 state;
+ u8 is_authenticated;
+ u8 bfd_key_id;
+ u32 conf_key_id;
+ u32 required_min_rx;
+ u32 desired_min_tx;
+ u8 detect_mult;
+};
+
+/** \brief Set flags of BFD UDP session
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param admin_up_down - set the admin state, 1 = up, 0 = down
+*/
+autoreply define bfd_udp_session_set_flags
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 admin_up_down;
+};
+
+/** \brief Register for BFD events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+*/
+autoreply define want_bfd_events
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief BFD UDP - add/replace key to configuration
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param conf_key_id - key ID to add/replace/delete
+ @param key_len - length of key (must be non-zero)
+ @param auth_type - authentication type (RFC 5880/4.1/Auth Type)
+ @param key - key data
+*/
+autoreply define bfd_auth_set_key
+{
+ u32 client_index;
+ u32 context;
+ u32 conf_key_id;
+ u8 key_len;
+ u8 auth_type;
+ u8 key[20];
+};
+
+/** \brief BFD UDP - delete key from configuration
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param conf_key_id - key ID to add/replace/delete
+ @param key_len - length of key (must be non-zero)
+ @param key - key data
+*/
+autoreply define bfd_auth_del_key
+{
+ u32 client_index;
+ u32 context;
+ u32 conf_key_id;
+};
+
+/** \brief Get a list of configured authentication keys
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define bfd_auth_keys_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief BFD authentication key details
+ @param context - sender context, to match reply w/ request
+ @param conf_key_id - configured key ID
+ @param use_count - how many BFD sessions currently use this key
+ @param auth_type - authentication type (RFC 5880/4.1/Auth Type)
+*/
+define bfd_auth_keys_details
+{
+ u32 context;
+ u32 conf_key_id;
+ u32 use_count;
+ u8 auth_type;
+};
+
+/** \brief BFD UDP - activate/change authentication
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param is_delayed - change is applied once peer applies the change (on first received packet with this auth)
+ @param bfd_key_id - key id sent out in BFD packets
+ @param conf_key_id - id of already configured key
+*/
+autoreply define bfd_udp_auth_activate
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 is_delayed;
+ u8 bfd_key_id;
+ u32 conf_key_id;
+};
+
+/** \brief BFD UDP - deactivate authentication
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface
+ @param local_addr - local address
+ @param peer_addr - peer address
+ @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
+ @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet)
+*/
+autoreply define bfd_udp_auth_deactivate
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 local_addr[16];
+ u8 peer_addr[16];
+ u8 is_ipv6;
+ u8 is_delayed;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c
new file mode 100644
index 00000000..185c03cf
--- /dev/null
+++ b/src/vnet/bfd/bfd_api.c
@@ -0,0 +1,411 @@
+/*
+ *------------------------------------------------------------------
+ * bfd_api.c - bfd api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/**
+ * @file
+ * @brief BFD binary API implementation
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/bfd/bfd_main.h>
+#include <vnet/bfd/bfd_api.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+ _ (BFD_UDP_ADD, bfd_udp_add) \
+ _ (BFD_UDP_MOD, bfd_udp_mod) \
+ _ (BFD_UDP_DEL, bfd_udp_del) \
+ _ (BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \
+ _ (BFD_UDP_SESSION_SET_FLAGS, bfd_udp_session_set_flags) \
+ _ (WANT_BFD_EVENTS, want_bfd_events) \
+ _ (BFD_AUTH_SET_KEY, bfd_auth_set_key) \
+ _ (BFD_AUTH_DEL_KEY, bfd_auth_del_key) \
+ _ (BFD_AUTH_KEYS_DUMP, bfd_auth_keys_dump) \
+ _ (BFD_UDP_AUTH_ACTIVATE, bfd_udp_auth_activate) \
+ _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) \
+ _ (BFD_UDP_SET_ECHO_SOURCE, bfd_udp_set_echo_source) \
+ _ (BFD_UDP_DEL_ECHO_SOURCE, bfd_udp_del_echo_source)
+
+pub_sub_handler (bfd_events, BFD_EVENTS);
+
+#define BFD_UDP_API_PARAM_COMMON_CODE \
+ ip46_address_t local_addr; \
+ memset (&local_addr, 0, sizeof (local_addr)); \
+ ip46_address_t peer_addr; \
+ memset (&peer_addr, 0, sizeof (peer_addr)); \
+ if (mp->is_ipv6) \
+ { \
+ clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); \
+ clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); \
+ } \
+ else \
+ { \
+ clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); \
+ clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); \
+ }
+
+#define BFD_UDP_API_PARAM_FROM_MP(mp) \
+ clib_net_to_host_u32 (mp->sw_if_index), &local_addr, &peer_addr
+
+static void
+vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp)
+{
+ vl_api_bfd_udp_add_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_add_session (BFD_UDP_API_PARAM_FROM_MP (mp),
+ clib_net_to_host_u32 (mp->desired_min_tx),
+ clib_net_to_host_u32 (mp->required_min_rx),
+ mp->detect_mult, mp->is_authenticated,
+ clib_net_to_host_u32 (mp->conf_key_id),
+ mp->bfd_key_id);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_ADD_REPLY);
+}
+
+static void
+vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp)
+{
+ vl_api_bfd_udp_mod_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_mod_session (BFD_UDP_API_PARAM_FROM_MP (mp),
+ clib_net_to_host_u32 (mp->desired_min_tx),
+ clib_net_to_host_u32 (mp->required_min_rx),
+ mp->detect_mult);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_MOD_REPLY);
+}
+
+static void
+vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp)
+{
+ vl_api_bfd_udp_del_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_del_session (BFD_UDP_API_PARAM_FROM_MP (mp));
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_DEL_REPLY);
+}
+
+void
+send_bfd_udp_session_details (unix_shared_memory_queue_t * q, u32 context,
+ bfd_session_t * bs)
+{
+ if (bs->transport != BFD_TRANSPORT_UDP4 &&
+ bs->transport != BFD_TRANSPORT_UDP6)
+ {
+ return;
+ }
+
+ vl_api_bfd_udp_session_details_t *mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_BFD_UDP_SESSION_DETAILS);
+ mp->context = context;
+ mp->state = bs->local_state;
+ bfd_udp_session_t *bus = &bs->udp;
+ bfd_udp_key_t *key = &bus->key;
+ mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index);
+ mp->is_ipv6 = !(ip46_address_is_ip4 (&key->local_addr));
+ if ((!bs->auth.is_delayed && bs->auth.curr_key) ||
+ (bs->auth.is_delayed && bs->auth.next_key))
+ {
+ mp->is_authenticated = 1;
+ }
+ if (bs->auth.is_delayed && bs->auth.next_key)
+ {
+ mp->bfd_key_id = bs->auth.next_bfd_key_id;
+ mp->conf_key_id = clib_host_to_net_u32 (bs->auth.next_key->conf_key_id);
+ }
+ else if (!bs->auth.is_delayed && bs->auth.curr_key)
+ {
+ mp->bfd_key_id = bs->auth.curr_bfd_key_id;
+ mp->conf_key_id = clib_host_to_net_u32 (bs->auth.curr_key->conf_key_id);
+ }
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (mp->local_addr, &key->local_addr,
+ sizeof (key->local_addr));
+ clib_memcpy (mp->peer_addr, &key->peer_addr, sizeof (key->peer_addr));
+ }
+ else
+ {
+ clib_memcpy (mp->local_addr, key->local_addr.ip4.data,
+ sizeof (key->local_addr.ip4.data));
+ clib_memcpy (mp->peer_addr, key->peer_addr.ip4.data,
+ sizeof (key->peer_addr.ip4.data));
+ }
+
+ mp->required_min_rx =
+ clib_host_to_net_u32 (bs->config_required_min_rx_usec);
+ mp->desired_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec);
+ mp->detect_mult = bs->local_detect_mult;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+void
+bfd_event (bfd_main_t * bm, bfd_session_t * bs)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vpe_client_registration_t *reg;
+ unix_shared_memory_queue_t *q;
+ /* *INDENT-OFF* */
+ pool_foreach (reg, vam->bfd_events_registrations, ({
+ q = vl_api_client_index_to_input_queue (reg->client_index);
+ if (q)
+ {
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ /* fallthrough */
+ case BFD_TRANSPORT_UDP6:
+ send_bfd_udp_session_details (q, 0, bs);
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (q == 0)
+ return;
+
+ bfd_session_t *bs = NULL;
+ /* *INDENT-OFF* */
+ pool_foreach (bs, bfd_main.sessions, ({
+ if (bs->transport == BFD_TRANSPORT_UDP4 ||
+ bs->transport == BFD_TRANSPORT_UDP6)
+ send_bfd_udp_session_details (q, mp->context, bs);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_bfd_udp_session_set_flags_t_handler (vl_api_bfd_udp_session_set_flags_t
+ * mp)
+{
+ vl_api_bfd_udp_session_set_flags_reply_t *rmp;
+ int rv;
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_session_set_flags (BFD_UDP_API_PARAM_FROM_MP (mp),
+ mp->admin_up_down);
+
+ REPLY_MACRO (VL_API_BFD_UDP_SESSION_SET_FLAGS_REPLY);
+}
+
+static void
+vl_api_bfd_auth_set_key_t_handler (vl_api_bfd_auth_set_key_t * mp)
+{
+ vl_api_bfd_auth_set_key_reply_t *rmp;
+ int rv = bfd_auth_set_key (clib_net_to_host_u32 (mp->conf_key_id),
+ mp->auth_type, mp->key_len, mp->key);
+
+ REPLY_MACRO (VL_API_BFD_AUTH_SET_KEY_REPLY);
+}
+
+static void
+vl_api_bfd_auth_del_key_t_handler (vl_api_bfd_auth_del_key_t * mp)
+{
+ vl_api_bfd_auth_del_key_reply_t *rmp;
+ int rv = bfd_auth_del_key (clib_net_to_host_u32 (mp->conf_key_id));
+
+ REPLY_MACRO (VL_API_BFD_AUTH_DEL_KEY_REPLY);
+}
+
+static void
+vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (q == 0)
+ return;
+
+ bfd_auth_key_t *key = NULL;
+ vl_api_bfd_auth_keys_details_t *rmp = NULL;
+
+ /* *INDENT-OFF* */
+ pool_foreach (key, bfd_main.auth_keys, ({
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_BFD_AUTH_KEYS_DETAILS);
+ rmp->context = mp->context;
+ rmp->conf_key_id = clib_host_to_net_u32 (key->conf_key_id);
+ rmp->auth_type = key->auth_type;
+ rmp->use_count = clib_host_to_net_u32 (key->use_count);
+ vl_msg_api_send_shmem (q, (u8 *)&rmp);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_bfd_udp_auth_activate_t_handler (vl_api_bfd_udp_auth_activate_t * mp)
+{
+ vl_api_bfd_udp_auth_activate_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_auth_activate (BFD_UDP_API_PARAM_FROM_MP (mp),
+ clib_net_to_host_u32 (mp->conf_key_id),
+ mp->bfd_key_id, mp->is_delayed);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_AUTH_ACTIVATE_REPLY);
+}
+
+static void
+vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t *
+ mp)
+{
+ vl_api_bfd_udp_auth_deactivate_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv =
+ bfd_udp_auth_deactivate (BFD_UDP_API_PARAM_FROM_MP (mp), mp->is_delayed);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_AUTH_DEACTIVATE_REPLY);
+}
+
+static void
+vl_api_bfd_udp_set_echo_source_t_handler (vl_api_bfd_udp_set_echo_source_t *
+ mp)
+{
+ vl_api_bfd_udp_set_echo_source_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = bfd_udp_set_echo_source (clib_net_to_host_u32 (mp->sw_if_index));
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_BFD_UDP_SET_ECHO_SOURCE_REPLY);
+}
+
+static void
+vl_api_bfd_udp_del_echo_source_t_handler (vl_api_bfd_udp_del_echo_source_t *
+ mp)
+{
+ vl_api_bfd_udp_del_echo_source_reply_t *rmp;
+ int rv;
+
+ rv = bfd_udp_del_echo_source ();
+
+ REPLY_MACRO (VL_API_BFD_UDP_DEL_ECHO_SOURCE_REPLY);
+}
+
+/*
+ * bfd_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id, n, crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_bfd;
+#undef _
+}
+
+static clib_error_t *
+bfd_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N, n) \
+ vl_msg_api_set_handlers (VL_API_##N, #n, vl_api_##n##_t_handler, \
+ vl_noop_handler, vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, sizeof (vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (bfd_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h
new file mode 100644
index 00000000..9f0509d5
--- /dev/null
+++ b/src/vnet/bfd/bfd_api.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD API declarations
+ */
+#ifndef __included_bfd_api_h__
+#define __included_bfd_api_h__
+
+#include <vnet/api_errno.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_bfd_transport(F) \
+ F (UDP4, "ip4-rewrite") \
+ F (UDP6, "ip6-rewrite")
+
+typedef enum
+{
+#define F(t, n) BFD_TRANSPORT_##t,
+ foreach_bfd_transport (F)
+#undef F
+} bfd_transport_e;
+
+/**
+ * @brief create a new bfd session
+ */
+vnet_api_error_t
+bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 desired_min_tx_usec, u32 required_min_rx_usec,
+ u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id);
+
+/**
+ * @brief modify existing session
+ */
+vnet_api_error_t
+bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 desired_min_tx_usec, u32 required_min_rx_usec,
+ u8 detect_mult);
+
+/**
+ * @brief delete existing session
+ */
+vnet_api_error_t bfd_udp_del_session (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr);
+
+/**
+ * @brief set session admin down/up
+ */
+vnet_api_error_t bfd_udp_session_set_flags (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u8 admin_up_down);
+
+/**
+ * @brief create or modify bfd authentication key
+ */
+vnet_api_error_t bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len,
+ const u8 * key);
+
+/**
+ * @brief delete existing authentication key
+ */
+vnet_api_error_t bfd_auth_del_key (u32 conf_key_id);
+
+/**
+ * @brief activate authentication for existing session
+ */
+vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 conf_key_id, u8 bfd_key_id,
+ u8 is_delayed);
+
+/**
+ * @brief deactivate authentication for existing session
+ */
+vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u8 is_delayed);
+
+/**
+ * @brief set echo-source interface
+ */
+vnet_api_error_t bfd_udp_set_echo_source (u32 loopback_sw_if_index);
+
+/**
+ * @brief unset echo-source interface
+ */
+vnet_api_error_t bfd_udp_del_echo_source ();
+
+#endif /* __included_bfd_api_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c
new file mode 100644
index 00000000..b2cd8df2
--- /dev/null
+++ b/src/vnet/bfd/bfd_cli.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD CLI implementation
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/cli.h>
+#include <vppinfra/format.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/format.h>
+#include <vnet/bfd/bfd_api.h>
+#include <vnet/bfd/bfd_main.h>
+
+static u8 *
+format_bfd_session_cli (u8 * s, va_list * args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ bfd_main_t *bm = va_arg (*args, bfd_main_t *);
+ bfd_session_t *bs = va_arg (*args, bfd_session_t *);
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ s = format (s, "%=10u %-32s %20U %20U\n", bs->bs_idx, "IPv4 address",
+ format_ip4_address, bs->udp.key.local_addr.ip4.as_u8,
+ format_ip4_address, bs->udp.key.peer_addr.ip4.as_u8);
+ break;
+ case BFD_TRANSPORT_UDP6:
+ s = format (s, "%=10u %-32s %20U %20U\n", bs->bs_idx, "IPv6 address",
+ format_ip6_address, &bs->udp.key.local_addr.ip6,
+ format_ip6_address, &bs->udp.key.peer_addr.ip6);
+ break;
+ }
+ s = format (s, "%10s %-32s %20s %20s\n", "", "Session state",
+ bfd_state_string (bs->local_state),
+ bfd_state_string (bs->remote_state));
+ s = format (s, "%10s %-32s %20s %20s\n", "", "Diagnostic code",
+ bfd_diag_code_string (bs->local_diag),
+ bfd_diag_code_string (bs->remote_diag));
+ s = format (s, "%10s %-32s %20u %20u\n", "", "Detect multiplier",
+ bs->local_detect_mult, bs->remote_detect_mult);
+ s = format (s, "%10s %-32s %20u %20llu\n", "",
+ "Required Min Rx Interval (usec)",
+ bs->config_required_min_rx_usec, bs->remote_min_rx_usec);
+ s = format (s, "%10s %-32s %20u %20u\n", "",
+ "Desired Min Tx Interval (usec)",
+ bs->config_desired_min_tx_usec, bfd_clocks_to_usec (bm,
+ bs->remote_desired_min_tx_clocks));
+ s =
+ format (s, "%10s %-32s %20u\n", "", "Transmit interval",
+ bfd_clocks_to_usec (bm, bs->transmit_interval_clocks));
+ u64 now = clib_cpu_time_now ();
+ u8 *tmp = NULL;
+ if (bs->last_tx_clocks > 0)
+ {
+ tmp = format (tmp, "%.2fs ago", (now - bs->last_tx_clocks) *
+ vm->clib_time.seconds_per_clock);
+ s = format (s, "%10s %-32s %20v\n", "", "Last control frame tx", tmp);
+ vec_reset_length (tmp);
+ }
+ if (bs->last_rx_clocks)
+ {
+ tmp = format (tmp, "%.2fs ago", (now - bs->last_rx_clocks) *
+ vm->clib_time.seconds_per_clock);
+ s = format (s, "%10s %-32s %20v\n", "", "Last control frame rx", tmp);
+ vec_reset_length (tmp);
+ }
+ s =
+ format (s, "%10s %-32s %20u %20llu\n", "", "Min Echo Rx Interval (usec)",
+ 1, bs->remote_min_echo_rx_usec);
+ if (bs->echo)
+ {
+ s = format (s, "%10s %-32s %20u\n", "", "Echo transmit interval",
+ bfd_clocks_to_usec (bm, bs->echo_transmit_interval_clocks));
+ tmp = format (tmp, "%.2fs ago", (now - bs->echo_last_tx_clocks) *
+ vm->clib_time.seconds_per_clock);
+ s = format (s, "%10s %-32s %20v\n", "", "Last echo frame tx", tmp);
+ vec_reset_length (tmp);
+ tmp = format (tmp, "%.6fs",
+ (bs->echo_last_rx_clocks - bs->echo_last_tx_clocks) *
+ vm->clib_time.seconds_per_clock);
+ s =
+ format (s, "%10s %-32s %20v\n", "", "Last echo frame roundtrip time",
+ tmp);
+ }
+ vec_free (tmp);
+ tmp = NULL;
+ s = format (s, "%10s %-32s %20s %20s\n", "", "Demand mode", "no",
+ bs->remote_demand ? "yes" : "no");
+ s = format (s, "%10s %-32s %20s\n", "", "Poll state",
+ bfd_poll_state_string (bs->poll_state));
+ if (bs->auth.curr_key)
+ {
+ s = format (s, "%10s %-32s %20u\n", "", "Authentication config key ID",
+ bs->auth.curr_key->conf_key_id);
+ s = format (s, "%10s %-32s %20u\n", "", "Authentication BFD key ID",
+ bs->auth.curr_bfd_key_id);
+ s = format (s, "%10s %-32s %20u %20u\n", "", "Sequence number",
+ bs->auth.local_seq_number, bs->auth.remote_seq_number);
+ }
+ return s;
+}
+
+static clib_error_t *
+show_bfd (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ bfd_main_t *bm = &bfd_main;
+ bfd_session_t *bs = NULL;
+
+ if (unformat (input, "keys"))
+ {
+ bfd_auth_key_t *key = NULL;
+ u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID",
+ "Type", "Use Count");
+ /* *INDENT-OFF* */
+ pool_foreach (key, bm->auth_keys, {
+ s = format (s, "%10u %-25s %10u\n", key->conf_key_id,
+ bfd_auth_type_str (key->auth_type), key->use_count);
+ });
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "%v\n", s);
+ vec_free (s);
+ vlib_cli_output (vm, "Number of configured BFD keys: %lu\n",
+ (u64) pool_elts (bm->auth_keys));
+ }
+ else if (unformat (input, "sessions"))
+ {
+ u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property",
+ "Local value", "Remote value");
+ /* *INDENT-OFF* */
+ pool_foreach (bs, bm->sessions, {
+ s = format (s, "%U", format_bfd_session_cli, vm, bm, bs);
+ });
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n",
+ (u64) pool_elts (bm->sessions));
+ }
+ else if (unformat (input, "echo-source"))
+ {
+ int is_set;
+ u32 sw_if_index;
+ int have_usable_ip4;
+ ip4_address_t ip4;
+ int have_usable_ip6;
+ ip6_address_t ip6;
+ bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4,
+ &have_usable_ip6, &ip6);
+ if (is_set)
+ {
+ vnet_sw_interface_t *sw_if =
+ vnet_get_sw_interface_safe (&vnet_main, sw_if_index);
+ vnet_hw_interface_t *hw_if =
+ vnet_get_hw_interface (&vnet_main, sw_if->hw_if_index);
+ u8 *s = format (NULL, "UDP echo source is: %v\n", hw_if->name);
+ s = format (s, "IPv4 address usable as echo source: ");
+ if (have_usable_ip4)
+ {
+ s = format (s, "%U\n", format_ip4_address, &ip4);
+ }
+ else
+ {
+ s = format (s, "none\n");
+ }
+ s = format (s, "IPv6 address usable as echo source: ");
+ if (have_usable_ip6)
+ {
+ s = format (s, "%U\n", format_ip6_address, &ip6);
+ }
+ else
+ {
+ s = format (s, "none\n");
+ }
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ }
+ else
+ {
+ vlib_cli_output (vm, "UDP echo source is not set.\n");
+ }
+ }
+ else
+ {
+ vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n",
+ (u64) pool_elts (bm->sessions));
+ vlib_cli_output (vm, "Number of configured BFD keys: %lu\n",
+ (u64) pool_elts (bm->auth_keys));
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_bfd_command, static) = {
+ .path = "show bfd",
+ .short_help = "show bfd [keys|sessions|echo-source]",
+ .function = show_bfd,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_vnet_api_errno (u8 * s, va_list * args)
+{
+ vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t);
+#define _(a, b, c) \
+ case b: \
+ s = format (s, "%s", c); \
+ break;
+ switch (api_error)
+ {
+ foreach_vnet_api_error default:s = format (s, "UNKNOWN");
+ break;
+ }
+ return s;
+}
+
+static clib_error_t *
+bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+ int have_key_id = 0;
+ u32 key_id = 0;
+ u8 *vec_auth_type = NULL;
+ bfd_auth_type_e auth_type = BFD_AUTH_TYPE_reserved;
+ u8 *secret = NULL;
+ static const u8 keyed_sha1[] = "keyed-sha1";
+ static const u8 meticulous_keyed_sha1[] = "meticulous-keyed-sha1";
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "conf-key-id %u", &key_id))
+ {
+ have_key_id = 1;
+ }
+ else if (unformat (input, "type %U", unformat_token, "a-zA-Z0-9-",
+ &vec_auth_type))
+ {
+ if (vec_len (vec_auth_type) == sizeof (keyed_sha1) - 1 &&
+ 0 == memcmp (vec_auth_type, keyed_sha1,
+ sizeof (keyed_sha1) - 1))
+ {
+ auth_type = BFD_AUTH_TYPE_keyed_sha1;
+ }
+ else if (vec_len (vec_auth_type) ==
+ sizeof (meticulous_keyed_sha1) - 1 &&
+ 0 == memcmp (vec_auth_type, meticulous_keyed_sha1,
+ sizeof (meticulous_keyed_sha1) - 1))
+ {
+ auth_type = BFD_AUTH_TYPE_meticulous_keyed_sha1;
+ }
+ else
+ {
+ ret = clib_error_return (0, "invalid type `%v'", vec_auth_type);
+ goto out;
+ }
+ }
+ else if (unformat (input, "secret %U", unformat_hex_string, &secret))
+ {
+ /* nothing to do here */
+ }
+ else
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ if (!have_key_id)
+ {
+ ret =
+ clib_error_return (0, "required parameter missing: `conf-key-id'");
+ goto out;
+ }
+ if (!vec_auth_type)
+ {
+ ret = clib_error_return (0, "required parameter missing: `type'");
+ goto out;
+ }
+ if (!secret)
+ {
+ ret = clib_error_return (0, "required parameter missing: `secret'");
+ goto out;
+ }
+
+ vnet_api_error_t rv =
+ bfd_auth_set_key (key_id, auth_type, vec_len (secret), secret);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0, "`bfd_auth_set_key' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ }
+
+out:
+ vec_free (vec_auth_type);
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
+ .path = "bfd key set",
+ .short_help = "bfd key set"
+ " conf-key-id <id>"
+ " type <keyed-sha1|meticulous-keyed-sha1> "
+ " secret <secret>",
+ .function = bfd_cli_key_add,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+ u32 key_id = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!unformat (input, "conf-key-id %u", &key_id))
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ vnet_api_error_t rv = bfd_auth_del_key (key_id);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0, "`bfd_auth_del_key' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = {
+ .path = "bfd key del",
+ .short_help = "bfd key del conf-key-id <id>",
+ .function = bfd_cli_key_del,
+};
+/* *INDENT-ON* */
+
+#define INTERFACE_STR "interface"
+#define LOCAL_ADDR_STR "local-addr"
+#define PEER_ADDR_STR "peer-addr"
+#define CONF_KEY_ID_STR "conf-key-id"
+#define BFD_KEY_ID_STR "bfd-key-id"
+#define DESIRED_MIN_TX_STR "desired-min-tx"
+#define REQUIRED_MIN_RX_STR "required-min-rx"
+#define DETECT_MULT_STR "detect-mult"
+#define ADMIN_STR "admin"
+#define DELAYED_STR "delayed"
+
+static const unsigned mandatory = 1;
+static const unsigned optional = 0;
+
+#define DECLARE(t, n, s, r, ...) \
+ int have_##n = 0; \
+ t n;
+
+#define UNFORMAT(t, n, s, r, ...) \
+ if (unformat (input, s " " __VA_ARGS__, &n)) \
+ { \
+ something_parsed = 1; \
+ have_##n = 1; \
+ }
+
+#if __GNUC__ >= 6
+#define PRAGMA_STR1 \
+ _Pragma ("GCC diagnostic ignored \"-Wtautological-compare\"");
+#define PRAGMA_STR2 _Pragma ("GCC diagnostic pop");
+#else
+#define PRAGMA_STR1
+#define PRAGMA_STR2
+#endif
+
+#define CHECK_MANDATORY(t, n, s, r, ...) \
+ PRAGMA_STR1 \
+ if (mandatory == r && !have_##n) \
+ PRAGMA_STR2 \
+ { \
+ ret = clib_error_return (0, "Required parameter `%s' missing.", s); \
+ goto out; \
+ }
+
+static clib_error_t *
+bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_add_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
+ F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u")
+
+ foreach_bfd_cli_udp_session_add_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_add_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_add_cli_param (CHECK_MANDATORY);
+
+ if (1 == have_conf_key_id + have_bfd_key_id)
+ {
+ ret = clib_error_return (0, "Incompatible parameter combination, `%s' "
+ "and `%s' must be either both specified or none",
+ CONF_KEY_ID_STR, BFD_KEY_ID_STR);
+ goto out;
+ }
+
+ if (detect_mult > 255)
+ {
+ ret = clib_error_return (0, "%s value `%u' out of range <1,255>",
+ DETECT_MULT_STR, detect_mult);
+ goto out;
+ }
+
+ if (have_bfd_key_id && bfd_key_id > 255)
+ {
+ ret = clib_error_return (0, "%s value `%u' out of range <1,255>",
+ BFD_KEY_ID_STR, bfd_key_id);
+ goto out;
+ }
+
+ vnet_api_error_t rv =
+ bfd_udp_add_session (sw_if_index, &local_addr, &peer_addr, desired_min_tx,
+ required_min_rx,
+ detect_mult, have_conf_key_id, conf_key_id,
+ bfd_key_id);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_add_add_session' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
+ .path = "bfd udp session add",
+ .short_help = "bfd udp session add"
+ " interface <interface>"
+ " local-addr <local-address>"
+ " peer-addr <peer-address>"
+ " desired-min-tx <desired min tx interval>"
+ " required-min-rx <required min rx interval>"
+ " detect-mult <detect multiplier> "
+ "["
+ " conf-key-id <config key ID>"
+ " bfd-key-id <BFD key ID>"
+ "]",
+ .function = bfd_cli_udp_session_add,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u")
+
+ foreach_bfd_cli_udp_session_mod_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_mod_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_mod_cli_param (CHECK_MANDATORY);
+
+ if (detect_mult > 255)
+ {
+ ret = clib_error_return (0, "%s value `%u' out of range <1,255>",
+ DETECT_MULT_STR, detect_mult);
+ goto out;
+ }
+
+ vnet_api_error_t rv =
+ bfd_udp_mod_session (sw_if_index, &local_addr, &peer_addr,
+ desired_min_tx, required_min_rx, detect_mult);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_udp_mod_session' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
+ .path = "bfd udp session mod",
+ .short_help = "bfd udp session mod interface"
+ " <interface> local-addr"
+ " <local-address> peer-addr"
+ " <peer-address> desired-min-tx"
+ " <desired min tx interval> required-min-rx"
+ " <required min rx interval> detect-mult"
+ " <detect multiplier> ",
+ .function = bfd_cli_udp_session_mod,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_del_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address)
+
+ foreach_bfd_cli_udp_session_del_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_del_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_del_cli_param (CHECK_MANDATORY);
+
+ vnet_api_error_t rv =
+ bfd_udp_del_session (sw_if_index, &local_addr, &peer_addr);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_udp_del_session' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
+ .path = "bfd udp session del",
+ .short_help = "bfd udp session del interface"
+ " <interface> local-addr"
+ " <local-address> peer-addr"
+ "<peer-address> ",
+ .function = bfd_cli_udp_session_del,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
+ &admin_up_down_token)
+
+ foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_set_flags_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_set_flags_cli_param (CHECK_MANDATORY);
+
+ u8 admin_up_down;
+ static const char up[] = "up";
+ static const char down[] = "down";
+ if (!memcmp (admin_up_down_token, up, sizeof (up) - 1))
+ {
+ admin_up_down = 1;
+ }
+ else if (!memcmp (admin_up_down_token, down, sizeof (down) - 1))
+ {
+ admin_up_down = 0;
+ }
+ else
+ {
+ ret =
+ clib_error_return (0, "Unrecognized value for `%s' parameter: `%v'",
+ ADMIN_STR, admin_up_down_token);
+ goto out;
+ }
+ vnet_api_error_t rv = bfd_udp_session_set_flags (sw_if_index, &local_addr,
+ &peer_addr, admin_up_down);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_udp_session_set_flags' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
+ .path = "bfd udp session set-flags",
+ .short_help = "bfd udp session set-flags"
+ " interface <interface>"
+ " local-addr <local-address>"
+ " peer-addr <peer-address>"
+ " admin <up|down>",
+ .function = bfd_cli_udp_session_set_flags,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_session_auth_activate (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
+ F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u")
+
+ foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_auth_activate_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_auth_activate_cli_param (CHECK_MANDATORY);
+
+ u8 is_delayed = 0;
+ if (have_delayed_token)
+ {
+ static const char yes[] = "yes";
+ static const char no[] = "no";
+ if (!memcmp (delayed_token, yes, sizeof (yes) - 1))
+ {
+ is_delayed = 1;
+ }
+ else if (!memcmp (delayed_token, no, sizeof (no) - 1))
+ {
+ is_delayed = 0;
+ }
+ else
+ {
+ ret =
+ clib_error_return (0,
+ "Unrecognized value for `%s' parameter: `%v'",
+ DELAYED_STR, delayed_token);
+ goto out;
+ }
+ }
+
+ if (have_bfd_key_id && bfd_key_id > 255)
+ {
+ ret = clib_error_return (0, "%s value `%u' out of range <1,255>",
+ BFD_KEY_ID_STR, bfd_key_id);
+ goto out;
+ }
+
+ vnet_api_error_t rv =
+ bfd_udp_auth_activate (sw_if_index, &local_addr, &peer_addr, conf_key_id,
+ bfd_key_id, is_delayed);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_udp_auth_activate' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = {
+ .path = "bfd udp session auth activate",
+ .short_help = "bfd udp session auth activate"
+ " interface <interface>"
+ " local-addr <local-address>"
+ " peer-addr <peer-address>"
+ " conf-key-id <config key ID>"
+ " bfd-key-id <BFD key ID>"
+ " [ delayed <yes|no> ]",
+ .function = bfd_cli_udp_session_auth_activate,
+};
+
+static clib_error_t *
+bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input,
+ CLIB_UNUSED (vlib_cli_command_t *lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ unformat_ip46_address) \
+ F (u8 *, delayed_token, DELAYED_STR, optional, "%v")
+
+ foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_session_auth_deactivate_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_session_auth_deactivate_cli_param (CHECK_MANDATORY);
+
+ u8 is_delayed = 0;
+ if (have_delayed_token)
+ {
+ static const char yes[] = "yes";
+ static const char no[] = "no";
+ if (!memcmp (delayed_token, yes, sizeof (yes) - 1))
+ {
+ is_delayed = 1;
+ }
+ else if (!memcmp (delayed_token, no, sizeof (no) - 1))
+ {
+ is_delayed = 0;
+ }
+ else
+ {
+ ret = clib_error_return (
+ 0, "Unrecognized value for `%s' parameter: `%v'", DELAYED_STR,
+ delayed_token);
+ goto out;
+ }
+ }
+
+ vnet_api_error_t rv = bfd_udp_auth_deactivate (sw_if_index, &local_addr,
+ &peer_addr, is_delayed);
+ if (rv)
+ {
+ ret = clib_error_return (
+ 0, "`bfd_udp_auth_deactivate' API call failed, rv=%d:%U", (int)rv,
+ format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
+ .path = "bfd udp session auth deactivate",
+ .short_help = "bfd udp session auth deactivate"
+ " interface <interface>"
+ " local-addr <local-address>"
+ " peer-addr <peer-address>"
+ "[ delayed <yes|no> ]",
+ .function = bfd_cli_udp_session_auth_deactivate,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ clib_error_t *ret = NULL;
+#define foreach_bfd_cli_udp_set_echo_source_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main)
+
+ foreach_bfd_cli_udp_set_echo_source_cli_param (DECLARE);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int something_parsed = 0;
+ foreach_bfd_cli_udp_set_echo_source_cli_param (UNFORMAT);
+
+ if (!something_parsed)
+ {
+ ret = clib_error_return (0, "Unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ foreach_bfd_cli_udp_set_echo_source_cli_param (CHECK_MANDATORY);
+
+ vnet_api_error_t rv = bfd_udp_set_echo_source (sw_if_index);
+ if (rv)
+ {
+ ret =
+ clib_error_return (0,
+ "`bfd_udp_set_echo_source' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = {
+ .path = "bfd udp echo-source set",
+ .short_help = "bfd udp echo-source set interface <interface>",
+ .function = bfd_cli_udp_set_echo_source,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ vnet_api_error_t rv = bfd_udp_del_echo_source ();
+ if (rv)
+ {
+ return clib_error_return (0,
+ "`bfd_udp_del_echo_source' API call failed, rv=%d:%U",
+ (int) rv, format_vnet_api_errno, rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = {
+ .path = "bfd udp echo-source del",
+ .short_help = "bfd udp echo-source del",
+ .function = bfd_cli_udp_del_echo_source,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h
new file mode 100644
index 00000000..a06e934f
--- /dev/null
+++ b/src/vnet/bfd/bfd_debug.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD global declarations
+ */
+#ifndef __included_bfd_debug_h__
+#define __included_bfd_debug_h__
+
+/* controls debug prints */
+#define BFD_DEBUG (0)
+
+#if BFD_DEBUG
+#define BFD_DEBUG_FILE_DEF \
+ static const char *__file = NULL; \
+ { \
+ __file = strrchr (__FILE__, '/'); \
+ if (__file) \
+ { \
+ ++__file; \
+ } \
+ else \
+ { \
+ __file = __FILE__; \
+ } \
+ }
+
+#define BFD_DBG(fmt, ...) \
+ do \
+ { \
+ BFD_DEBUG_FILE_DEF \
+ static u8 *_s = NULL; \
+ vlib_main_t *vm = vlib_get_main (); \
+ _s = format (_s, "%6.02f:DBG:%s:%d:%s():" fmt, vlib_time_now (vm), \
+ __file, __LINE__, __func__, ##__VA_ARGS__); \
+ printf ("%.*s\n", vec_len (_s), _s); \
+ vec_reset_length (_s); \
+ } \
+ while (0);
+
+#define BFD_ERR(fmt, ...) \
+ do \
+ { \
+ BFD_DEBUG_FILE_DEF \
+ static u8 *_s = NULL; \
+ vlib_main_t *vm = vlib_get_main (); \
+ _s = format (_s, "%6.02f:ERR:%s:%d:%s():" fmt, vlib_time_now (vm), \
+ __file, __LINE__, __func__, ##__VA_ARGS__); \
+ printf ("%.*s\n", vec_len (_s), _s); \
+ vec_reset_length (_s); \
+ } \
+ while (0);
+
+#define BFD_CLK_FMT "%luus/%lu clocks/%.2fs"
+#define BFD_CLK_PRN(clocks) \
+ (u64) ((((f64)clocks) / vlib_get_main ()->clib_time.clocks_per_second) * \
+ USEC_PER_SECOND), \
+ (clocks), \
+ (((f64)clocks) / vlib_get_main ()->clib_time.clocks_per_second)
+
+#else
+#define BFD_DBG(...)
+#define BFD_ERR(...)
+#endif
+
+#endif /* __included_bfd_debug_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_doc.md b/src/vnet/bfd/bfd_doc.md
new file mode 100644
index 00000000..7d7606e4
--- /dev/null
+++ b/src/vnet/bfd/bfd_doc.md
@@ -0,0 +1,374 @@
+# BFD module {#bfd_doc}
+
+## Overview
+
+Bidirectional Forwarding Detection in VPP currently supports single-hop UDP
+transport based on RFC 5880 and RFC 5881.
+
+## Usage
+
+### General usage
+
+BFD sessions are created using APIs only. The following CLIs are implemented,
+which call the APIs to manipulate the BFD:
+
+#### Show commands:
+
+> show bfd [keys|sessions|echo-source]
+
+Show the existing keys, sessions or echo-source.
+
+#### Key manipulation
+
+##### Create a new key or modify an existing key
+
+> bfd key set conf-key-id <id> type <keyed-sha1|meticulous-keyed-sha1> secret <secret>
+
+Parameters:
+
+* conf-key-id - local configuration key ID, used to uniquely identify this key
+* type - type of the key
+* secret - shared secret (hex data)
+
+Example:
+
+> bfd key set conf-key-id 2368880803 type meticulous-keyed-sha1 secret 69d685b0d990cdba46872706dc
+
+Notes:
+
+* in-use key cannot be modified
+
+##### Delete an existing key
+
+> bfd key del conf-key-id <id>
+
+Parameters:
+
+* conf-key-id - local configuration key ID, used to uniquely identify this key
+
+Example:
+
+> bfd key del conf-key-id 2368880803
+
+Notes:
+
+* in-use key cannot be deleted
+
+##### Create a new (plain or authenticated) BFD session
+
+> bfd udp session add interface <interface> local-addr <address> peer-addr <address> desired-min-tx <interval> required-min-rx <interval> detect-mult <multiplier> [ conf-key-id <ID> bfd-key-id <ID> ]
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+* desired-min-tx - desired minimum tx interval (microseconds)
+* required-min-rx - required minimum rx interval (microseconds)
+* detect-mult - detect multiplier (must be non-zero)
+* conf-key-id - local configuration key ID
+* bfd-key-id - BFD key ID, as carried in BFD control frames
+
+Example:
+
+> bfd udp session add interface pg0 local-addr fd01:1::1 peer-addr fd01:1::2 desired-min-tx 100000 required-min-rx 100000 detect-mult 3 conf-key-id 1029559112 bfd-key-id 13
+
+Notes:
+
+* if conf-key-id and bfd-key-id are not specified, session is non-authenticated
+* desired-min-tx controls desired transmission rate of both control frames and echo packets
+
+##### Modify BFD session
+
+> bfd udp session mod interface <interface> local-addr <address> peer-addr <address> desired-min-tx <interval> required-min-rx <interval> detect-mult <multiplier>
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+* desired-min-tx - desired minimum tx interval (microseconds)
+* required-min-rx - required minimum rx interval (microseconds)
+* detect-mult - detect multiplier (must be non-zero)
+
+Example:
+
+> bfd udp session mod interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 desired-min-tx 300000 required-min-rx 200000 detect-mult 12
+
+Notes:
+
+* desired-min-tx controls desired transmission rate of both control frames and echo packets
+
+##### Delete an existing BFD session
+
+> bfd udp session del interface <interface> local-addr <address> peer-addr<address>
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+
+Example:
+
+> bfd udp session del interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
+
+##### Set session admin-up or admin-down
+
+> bfd udp session set-flags interface <interface> local-addr <address> peer-addr <address> admin <up|down>
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+* admin - up/down based on desired action
+
+Example:
+
+> bfd udp session set-flags admin down interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
+
+##### Activate/change authentication for existing session
+
+> bfd udp session auth activate interface <interface> local-addr <address> peer-addr <address> conf-key-id <ID> bfd-key-id <ID> [ delayed <yes|no> ]
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+* conf-key-id - local configuration key ID
+* bfd-key-id - BFD key ID, as carried in BFD control frames
+* delayed - is yes then this action is delayed until the peer performs the same action
+
+Example:
+
+> bfd udp session auth activate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 conf-key-id 540928695 bfd-key-id 239 delayed yes
+
+Notes:
+
+* see [Delayed option] for more information
+
+##### Deactivate authentication for existing session
+
+> bfd udp session auth deactivate interface <interface> local-addr <address> peer-addr <address> [ delayed <yes|no> ]
+
+Parameters:
+
+* interface - interface to which this session is tied to
+* local-addr - local address (ipv4 or ipv6)
+* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+* delayed - is yes then this action is delayed until the peer performs the same action
+
+Example:
+
+> bfd udp session auth deactivate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
+
+Notes:
+
+* see [Delayed option] for more information
+
+##### Set echo-source interface
+
+> bfd udp echo-source set interface <interface>
+
+Parameters:
+
+* interface - interface used for getting source address for echo packets
+
+Example:
+
+> bfd udp echo-source set interface loop0
+
+##### Delete echo-source interface
+
+> bfd udp echo-source del
+
+Example:
+
+> bfd udp echo-source del
+
+### Authentication
+
+BFD sessions should be authenticated for security purposes. SHA1 and meticulous
+SHA1 authentication is supported by VPP. First, authentication keys are
+configured in VPP and afterwards they can be used by sessions.
+
+There are two key IDs in the scope of BFD session:
+
+* configuration key ID is the internal unique key ID inside VPP and is never
+ communicated to any peer, it serves only the purpose of identifying the key
+* BFD key ID is the key ID carried in BFD control frames and is used for
+ verifying authentication
+
+#### Turning auth on/off
+
+Authentication can be turned on or off at any time. Care must be taken however,
+to either synchronize the authentication manipulation with peer's actions
+to avoid the session going down.
+
+##### Delayed option
+
+Delayed option is useful for synchronizing authentication changes with a peer.
+If it's specified, then authentication change is not performed immediately.
+In this case, VPP continues to transmit packets using the old authentication
+method (unauthenticated or using old sha1 key). If a packet is received, which
+does not pass the current authentication, then VPP tries to authenticate it
+using the new method (which might be none, if deactivating authentication)
+and if it passes, then the new authentication method is put in use.
+
+The recommended procedure for enabling/changing/disabling session
+authentication is:
+
+1. perform authentication change on vpp's side with delayed option set to yes
+2. perform authentication change on peer's side (without delayed option)
+
+Notes:
+
+* if both peers use delayed option at the same time, the change will never
+ be carried out, since none of the peers will see any packet with the new
+ authentication which could trigger the change
+* remote peer does not need to support or even be aware of this mechanism
+ for it to work properly
+
+
+### Echo function
+
+Echo function is used by VPP whenever a peer declares the willingness
+to support it, echo-source is set and it contains a usable subnet (see below).
+When echo function is switched on, the required min rx interval advertised
+to peer is set to 1 second (or the configured value, if its higher).
+
+#### Echo source address
+
+Because echo packets are only looped back (and not processed in any way)
+by a peer, it's necessary to set the source address in a way which avoids
+packet drop due to spoofing protection by VPP. Per RFC, the source address
+should not be in the subnet set on the interface over which the echo packets
+are sent. Also, it must not be any VPP-local address, otherwise the packet
+gets dropped on receipt by VPP. The solution is to create a loopback interface
+with a (private) IPv4/IPv6 subnet assigned as echo-source. The BFD then picks
+an unused address from the subnet by flipping the last bit and uses that as
+source address in the echo packets, thus meeting RFC recommendation while
+avoiding spoofing protection.
+
+Example: if 10.10.10.3/31 is the subnet, then 10.10.10.2 will be used as
+ source address in (IPv4) echo packets
+
+### Demand mode
+
+Demand mode is respected by VPP, but not used locally. The only scenario when
+demand mode could make sense currently is when echo is active. Because echo
+packets are inherently insecure against an adversary looping them back a poll
+sequence would be required for slow periodic connectivity verification anyway.
+It's more efficient to just ask the remote peer to send slow periodic control
+frames without VPP initiating periodic poll sequences.
+
+### Admin-down
+
+Session may be put admin-down at any time. This immediately causes the state
+to be changed to AdminDown and remain so unless the session is put admin-up.
+
+## BFD implementation notes
+
+Because BFD can work over different transport layers, the BFD code is separated
+into core BFD functionality - main module implemented in bfd_main.c
+and transport-specific code implemented in bfd_udp.c.
+
+### Main module
+
+Main module is responsible for handling all the BFD functionality defined
+in RFC 5880.
+
+#### Internal API
+
+Internal APIs defined in bfd_main.h are called from transport-specific code
+to create/modify/delete
+
+#### Packet receipt
+
+When a packet is received by the transport layer, it is forwarded to main
+module (to main thread) via an RPC call. At this point, the authentication has
+been verified, so the packet is consumed, session parameters are updated
+accordingly and state change (if applicable). Based on these, the timeouts
+are adjusted if required and an event is sent to the process node to wake up
+and recalculate sleep time.
+
+#### Packet transmit
+
+Main module allocates a vlib_buffer_t, creates the required BFD frame (control
+or echo in it), then calls the transport layer to add the transport layer.
+Then a frame containing the buffer to the aprropriate node is created
+and enqueued.
+
+#### Process node
+
+Main module implements one process node which is a simple loop. The process
+node gets next timeout from the timer wheel, sleeps until the timeout expires
+and then calls a timeout routine which drives the state machine for each
+session which timed out. The sleep is interrupted externally via vlib event,
+when a session is added or modified in a way which might require timer wheel
+manipulation. In this case the caller inserts the necessary timeout to timer
+wheel and then signals the process node to wake up early, handle possible
+timeouts and recalculate the sleep time again.
+
+#### State machine
+
+Default state of BFD session when created is Down, per RFC 5880. State changes
+to Init, Up or Down based on events like received state from peer and timeouts.
+The session state can be set AdminDown using a binary API, which prevents it
+from going to any other state, until this limitation is removed. This state
+is advertised to peers in slow periodic control frames.
+
+For each session, the following timeouts are maintained:
+
+1. tx timeout - used for sending out control frames
+2. rx timeout - used for detecting session timeout
+3. echo tx timeout - used for sending out echo frames
+3. echo rx timeout - used for detecting session timeout based on echo
+
+These timeouts are maintained in cpu clocks and recalculated when appropriate
+(e.g. rx timeout is bumped when a packet is received, keeping the session
+alive). Only the earliest timeout is inserted into the timer wheel at a time
+and timer wheel events are never deleted, rather spurious events are ignored.
+This allows efficient operation, like not inserting events into timing wheel
+for each packet received or ignoring left-over events in case a bfd session
+gets removed and a new one is recreated with the same session index.
+
+#### Authentication keys management
+
+Authentication keys are managed internally in a pool, with each key tracking
+it's use count. The removal/modification is only allowed if the key is not in
+use.
+
+### UDP module
+
+UDP module is responsible for:
+
+1. public APIs/CLIs to configure BFD over UDP.
+2. support code called by main module to encapsulate/decapsulate BFD packets
+
+This module implements two graph nodes - for consuming ipv4 and ipv6 packets
+target at BFD ports 3874 and 3875.
+
+#### Packet receipt
+
+BFD packet receipt receipt starts in the bfd udp graph nodes. Since the code
+needs to verify IP/UDP header data, it relies on ip4-local (and ip6-local)
+nodes to store pointers to the appropriate headers. First, your discriminator
+is extracted from BFD packet and used to lookup the existing session. In case
+it's zero, the pair of IP addresses and sw_if_index is used to lookup session.
+Then, main module is called to verify the authentication, if present.
+Afterwards a check is made if the IP/UDP headers are correct. If yes, then
+an RPC call is made to the main thread to consume the packet and take action
+upon it.
+
+#### Packet transmission
+
+When process node decides that there is a need to transmit the packet, it
+creates a buffer, fills the BFD frame data in and calls the UDP module to
+add the transport layer. This is a simple operation for the control frames
+consisting of just adding UDP/IP headers based on session data. For echo
+frames, an additional step, looking at the echo-source interface and picking
+and address is performed and if this fails, then the packet cannot be
+transmitted and an error is returned to main thread.
diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c
new file mode 100644
index 00000000..b58a5132
--- /dev/null
+++ b/src/vnet/bfd/bfd_main.c
@@ -0,0 +1,2058 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD nodes implementation
+ */
+
+#if WITH_LIBSSL > 0
+#include <openssl/sha.h>
+#endif
+
+#if __SSE4_2__
+#include <x86intrin.h>
+#endif
+
+#include <vppinfra/random.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/xxhash.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/bfd/bfd_debug.h>
+#include <vnet/bfd/bfd_protocol.h>
+#include <vnet/bfd/bfd_main.h>
+
+static u64
+bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret)
+{
+ u64 checksum = 0;
+#if __SSE4_2__ && !defined (__i386__)
+ checksum = _mm_crc32_u64 (0, discriminator);
+ checksum = _mm_crc32_u64 (checksum, expire_time);
+ checksum = _mm_crc32_u64 (checksum, secret);
+#else
+ checksum = clib_xxhash (discriminator ^ expire_time ^ secret);
+#endif
+ return checksum;
+}
+
+static u64
+bfd_usec_to_clocks (const bfd_main_t * bm, u64 us)
+{
+ return bm->cpu_cps * ((f64) us / USEC_PER_SECOND);
+}
+
+u32
+bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks)
+{
+ return (clocks / bm->cpu_cps) * USEC_PER_SECOND;
+}
+
+static vlib_node_registration_t bfd_process_node;
+
+u8 *
+format_bfd_auth_key (u8 * s, va_list * args)
+{
+ const bfd_auth_key_t *key = va_arg (*args, bfd_auth_key_t *);
+ if (key)
+ {
+ s = format (s, "{auth-type=%u:%s, conf-key-id=%u, use-count=%u}, ",
+ key->auth_type, bfd_auth_type_str (key->auth_type),
+ key->conf_key_id, key->use_count);
+ }
+ else
+ {
+ s = format (s, "{none}");
+ }
+ return s;
+}
+
+/*
+ * We actually send all bfd pkts to the "error" node after scanning
+ * them, so the graph node has only one next-index. The "error-drop"
+ * node automatically bumps our per-node packet counters for us.
+ */
+typedef enum
+{
+ BFD_INPUT_NEXT_NORMAL,
+ BFD_INPUT_N_NEXT,
+} bfd_input_next_t;
+
+static void bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
+ int handling_wakeup);
+
+static void
+bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs)
+{
+ bs->local_state = BFD_STATE_down;
+ bs->local_diag = BFD_DIAG_CODE_no_diag;
+ bs->remote_state = BFD_STATE_down;
+ bs->remote_discr = 0;
+ bs->hop_type = BFD_HOP_TYPE_SINGLE;
+ bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_USEC;
+ bs->config_desired_min_tx_clocks = bm->default_desired_min_tx_clocks;
+ bs->effective_desired_min_tx_clocks = bm->default_desired_min_tx_clocks;
+ bs->remote_min_rx_usec = 1;
+ bs->remote_min_rx_clocks = bfd_usec_to_clocks (bm, bs->remote_min_rx_usec);
+ bs->remote_min_echo_rx_usec = 0;
+ bs->remote_min_echo_rx_clocks = 0;
+ bs->remote_demand = 0;
+ bs->auth.remote_seq_number = 0;
+ bs->auth.remote_seq_number_known = 0;
+ bs->auth.local_seq_number = random_u32 (&bm->random_seed);
+ bs->echo_secret = random_u32 (&bm->random_seed);
+}
+
+static void
+bfd_set_diag (bfd_session_t * bs, bfd_diag_code_e code)
+{
+ if (bs->local_diag != code)
+ {
+ BFD_DBG ("set local_diag, bs_idx=%d: '%d:%s'", bs->bs_idx, code,
+ bfd_diag_code_string (code));
+ bs->local_diag = code;
+ }
+}
+
+static void
+bfd_set_state (bfd_main_t * bm, bfd_session_t * bs,
+ bfd_state_e new_state, int handling_wakeup)
+{
+ if (bs->local_state != new_state)
+ {
+ BFD_DBG ("Change state, bs_idx=%d: %s->%s", bs->bs_idx,
+ bfd_state_string (bs->local_state),
+ bfd_state_string (new_state));
+ bs->local_state = new_state;
+ bfd_on_state_change (bm, bs, clib_cpu_time_now (), handling_wakeup);
+ }
+}
+
+const char *
+bfd_poll_state_string (bfd_poll_state_e state)
+{
+ switch (state)
+ {
+#define F(x) \
+ case BFD_POLL_##x: \
+ return "BFD_POLL_" #x;
+ foreach_bfd_poll_state (F)
+#undef F
+ }
+ return "UNKNOWN";
+}
+
+static void
+bfd_set_poll_state (bfd_session_t * bs, bfd_poll_state_e state)
+{
+ if (bs->poll_state != state)
+ {
+ BFD_DBG ("Setting poll state=%s, bs_idx=%u",
+ bfd_poll_state_string (state), bs->bs_idx);
+ bs->poll_state = state;
+ }
+}
+
+static void
+bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+{
+ bs->transmit_interval_clocks =
+ clib_max (bs->effective_desired_min_tx_clocks, bs->remote_min_rx_clocks);
+ BFD_DBG ("Recalculated transmit interval " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->transmit_interval_clocks));
+}
+
+static void
+bfd_recalc_echo_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+{
+ bs->echo_transmit_interval_clocks =
+ clib_max (bs->effective_desired_min_tx_clocks,
+ bs->remote_min_echo_rx_clocks);
+ BFD_DBG ("Recalculated echo transmit interval " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->echo_transmit_interval_clocks));
+}
+
+static void
+bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
+{
+ if (bs->local_detect_mult > 1)
+ {
+ /* common case - 75-100% of transmit interval */
+ bs->tx_timeout_clocks = bs->last_tx_clocks +
+ (1 - .25 * (random_f64 (&bm->random_seed))) *
+ bs->transmit_interval_clocks;
+ if (bs->tx_timeout_clocks < now)
+ {
+ /*
+ * the timeout is in the past, which means that either remote
+ * demand mode was set or performance/clock issues ...
+ */
+ BFD_DBG ("Missed %lu transmit events (now is %lu, calc "
+ "tx_timeout is %lu)",
+ (now - bs->tx_timeout_clocks) /
+ bs->transmit_interval_clocks, now, bs->tx_timeout_clocks);
+ bs->tx_timeout_clocks = now;
+ }
+ }
+ else
+ {
+ /* special case - 75-90% of transmit interval */
+ bs->tx_timeout_clocks = bs->last_tx_clocks +
+ (.9 - .15 * (random_f64 (&bm->random_seed))) *
+ bs->transmit_interval_clocks;
+ if (bs->tx_timeout_clocks < now)
+ {
+ /*
+ * the timeout is in the past, which means that either remote
+ * demand mode was set or performance/clock issues ...
+ */
+ BFD_DBG ("Missed %lu transmit events (now is %lu, calc "
+ "tx_timeout is %lu)",
+ (now - bs->tx_timeout_clocks) /
+ bs->transmit_interval_clocks, now, bs->tx_timeout_clocks);
+ bs->tx_timeout_clocks = now;
+ }
+ }
+ if (bs->tx_timeout_clocks)
+ {
+ BFD_DBG ("Next transmit in %lu clocks/%.02fs@%lu",
+ bs->tx_timeout_clocks - now,
+ (bs->tx_timeout_clocks - now) / bm->cpu_cps,
+ bs->tx_timeout_clocks);
+ }
+}
+
+static void
+bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
+{
+ bs->echo_tx_timeout_clocks =
+ bs->echo_last_tx_clocks + bs->echo_transmit_interval_clocks;
+ if (bs->echo_tx_timeout_clocks < now)
+ {
+ /* huh, we've missed it already, transmit now */
+ BFD_DBG ("Missed %lu echo transmit events (now is %lu, calc tx_timeout "
+ "is %lu)",
+ (now - bs->echo_tx_timeout_clocks) /
+ bs->echo_transmit_interval_clocks,
+ now, bs->echo_tx_timeout_clocks);
+ bs->echo_tx_timeout_clocks = now;
+ }
+ BFD_DBG ("Next echo transmit in %lu clocks/%.02fs@%lu",
+ bs->echo_tx_timeout_clocks - now,
+ (bs->echo_tx_timeout_clocks - now) / bm->cpu_cps,
+ bs->echo_tx_timeout_clocks);
+}
+
+static void
+bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs)
+{
+ if (bs->local_state == BFD_STATE_init || bs->local_state == BFD_STATE_up)
+ {
+ bs->detection_time_clocks =
+ bs->remote_detect_mult *
+ clib_max (bs->effective_required_min_rx_clocks,
+ bs->remote_desired_min_tx_clocks);
+ BFD_DBG ("Recalculated detection time %lu clocks/%.2fs",
+ bs->detection_time_clocks,
+ bs->detection_time_clocks / bm->cpu_cps);
+ }
+}
+
+static void
+bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now,
+ int handling_wakeup)
+{
+ u64 next = 0;
+ u64 rx_timeout = 0;
+ u64 tx_timeout = 0;
+ if (BFD_STATE_up == bs->local_state)
+ {
+ rx_timeout = bs->last_rx_clocks + bs->detection_time_clocks;
+ }
+ if (BFD_STATE_up != bs->local_state ||
+ (!bs->remote_demand && bs->remote_min_rx_usec) ||
+ BFD_POLL_NOT_NEEDED != bs->poll_state)
+ {
+ tx_timeout = bs->tx_timeout_clocks;
+ }
+ if (tx_timeout && rx_timeout)
+ {
+ next = clib_min (tx_timeout, rx_timeout);
+ }
+ else if (tx_timeout)
+ {
+ next = tx_timeout;
+ }
+ else if (rx_timeout)
+ {
+ next = rx_timeout;
+ }
+ if (bs->echo && next > bs->echo_tx_timeout_clocks)
+ {
+ next = bs->echo_tx_timeout_clocks;
+ }
+ BFD_DBG ("bs_idx=%u, tx_timeout=%lu, echo_tx_timeout=%lu, rx_timeout=%lu, "
+ "next=%s",
+ bs->bs_idx, tx_timeout, bs->echo_tx_timeout_clocks, rx_timeout,
+ next == tx_timeout
+ ? "tx" : (next == bs->echo_tx_timeout_clocks ? "echo tx" : "rx"));
+ /* sometimes the wheel expires an event a bit sooner than requested, account
+ for that here */
+ if (next && (now + bm->wheel_inaccuracy > bs->wheel_time_clocks ||
+ next < bs->wheel_time_clocks || !bs->wheel_time_clocks))
+ {
+ bs->wheel_time_clocks = next;
+ BFD_DBG ("timing_wheel_insert(%p, %lu (%ld clocks/%.2fs in the "
+ "future), %u);",
+ &bm->wheel, bs->wheel_time_clocks,
+ (i64) bs->wheel_time_clocks - clib_cpu_time_now (),
+ (i64) (bs->wheel_time_clocks - clib_cpu_time_now ()) /
+ bm->cpu_cps, bs->bs_idx);
+ timing_wheel_insert (&bm->wheel, bs->wheel_time_clocks, bs->bs_idx);
+ if (!handling_wakeup)
+ {
+ vlib_process_signal_event (bm->vlib_main,
+ bm->bfd_process_node_index,
+ BFD_EVENT_RESCHEDULE, bs->bs_idx);
+ }
+ }
+}
+
+static void
+bfd_set_effective_desired_min_tx (bfd_main_t * bm,
+ bfd_session_t * bs, u64 now,
+ u64 desired_min_tx_clocks)
+{
+ bs->effective_desired_min_tx_clocks = desired_min_tx_clocks;
+ BFD_DBG ("Set effective desired min tx to " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->effective_desired_min_tx_clocks));
+ bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_tx_interval (bm, bs);
+ bfd_recalc_echo_tx_interval (bm, bs);
+ bfd_calc_next_tx (bm, bs, now);
+}
+
+static void
+bfd_set_effective_required_min_rx (bfd_main_t * bm,
+ bfd_session_t * bs,
+ u64 required_min_rx_clocks)
+{
+ bs->effective_required_min_rx_clocks = required_min_rx_clocks;
+ BFD_DBG ("Set effective required min rx to " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->effective_required_min_rx_clocks));
+ bfd_recalc_detection_time (bm, bs);
+}
+
+static void
+bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs,
+ u64 now, u32 remote_required_min_rx_usec)
+{
+ if (bs->remote_min_rx_usec != remote_required_min_rx_usec)
+ {
+ bs->remote_min_rx_usec = remote_required_min_rx_usec;
+ bs->remote_min_rx_clocks =
+ bfd_usec_to_clocks (bm, remote_required_min_rx_usec);
+ BFD_DBG ("Set remote min rx to " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->remote_min_rx_clocks));
+ bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_tx_interval (bm, bs);
+ }
+}
+
+static void
+bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs,
+ u64 now,
+ u32 remote_required_min_echo_rx_usec)
+{
+ if (bs->remote_min_echo_rx_usec != remote_required_min_echo_rx_usec)
+ {
+ bs->remote_min_echo_rx_usec = remote_required_min_echo_rx_usec;
+ bs->remote_min_echo_rx_clocks =
+ bfd_usec_to_clocks (bm, bs->remote_min_echo_rx_usec);
+ BFD_DBG ("Set remote min echo rx to " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->remote_min_echo_rx_clocks));
+ bfd_recalc_echo_tx_interval (bm, bs);
+ }
+}
+
+static void
+bfd_notify_listeners (bfd_main_t * bm,
+ bfd_listen_event_e event, const bfd_session_t * bs)
+{
+ bfd_notify_fn_t *fn;
+ vec_foreach (fn, bm->listeners)
+ {
+ (*fn) (event, bs);
+ }
+}
+
+void
+bfd_session_start (bfd_main_t * bm, bfd_session_t * bs)
+{
+ BFD_DBG ("\nStarting session: %U", format_bfd_session, bs);
+ bfd_set_effective_required_min_rx (bm, bs,
+ bs->config_required_min_rx_clocks);
+ bfd_recalc_tx_interval (bm, bs);
+ vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index,
+ BFD_EVENT_NEW_SESSION, bs->bs_idx);
+ bfd_notify_listeners (bm, BFD_LISTEN_EVENT_CREATE, bs);
+}
+
+void
+bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down)
+{
+ bfd_main_t *bm = &bfd_main;
+ u64 now = clib_cpu_time_now ();
+ if (admin_up_down)
+ {
+ BFD_DBG ("Session set admin-up, bs-idx=%u", bs->bs_idx);
+ bfd_set_state (bm, bs, BFD_STATE_down, 0);
+ bfd_set_diag (bs, BFD_DIAG_CODE_no_diag);
+ bfd_calc_next_tx (bm, bs, now);
+ bfd_set_timer (bm, bs, now, 0);
+ }
+ else
+ {
+ BFD_DBG ("Session set admin-down, bs-idx=%u", bs->bs_idx);
+ bfd_set_diag (bs, BFD_DIAG_CODE_admin_down);
+ bfd_set_state (bm, bs, BFD_STATE_admin_down, 0);
+ bfd_calc_next_tx (bm, bs, now);
+ bfd_set_timer (bm, bs, now, 0);
+ }
+}
+
+u8 *
+bfd_input_format_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
+ const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data;
+ if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head))
+ {
+ s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n"
+ " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
+ "detect_mult=%u, length=%u\n",
+ bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
+ bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
+ bfd_pkt_get_state (pkt),
+ bfd_state_string (bfd_pkt_get_state (pkt)),
+ bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
+ bfd_pkt_get_control_plane_independent (pkt),
+ bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
+ bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult,
+ pkt->head.length);
+ if (t->len >= sizeof (bfd_pkt_t) &&
+ pkt->head.length >= sizeof (bfd_pkt_t))
+ {
+ s = format (s, " my discriminator: %u\n",
+ clib_net_to_host_u32 (pkt->my_disc));
+ s = format (s, " your discriminator: %u\n",
+ clib_net_to_host_u32 (pkt->your_disc));
+ s = format (s, " desired min tx interval: %u\n",
+ clib_net_to_host_u32 (pkt->des_min_tx));
+ s = format (s, " required min rx interval: %u\n",
+ clib_net_to_host_u32 (pkt->req_min_rx));
+ s = format (s, " required min echo rx interval: %u",
+ clib_net_to_host_u32 (pkt->req_min_echo_rx));
+ }
+ if (t->len >= sizeof (bfd_pkt_with_common_auth_t) &&
+ pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) &&
+ bfd_pkt_get_auth_present (pkt))
+ {
+ const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt;
+ const bfd_auth_common_t *common = &with_auth->common_auth;
+ s = format (s, "\n auth len: %u\n", common->len);
+ s = format (s, " auth type: %u:%s\n", common->type,
+ bfd_auth_type_str (common->type));
+ if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
+ pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) &&
+ (BFD_AUTH_TYPE_keyed_sha1 == common->type ||
+ BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type))
+ {
+ const bfd_pkt_with_sha1_auth_t *with_sha1 = (void *) pkt;
+ const bfd_auth_sha1_t *sha1 = &with_sha1->sha1_auth;
+ s = format (s, " seq num: %u\n",
+ clib_net_to_host_u32 (sha1->seq_num));
+ s = format (s, " key id: %u\n", sha1->key_id);
+ s = format (s, " hash: %U", format_hex_bytes, sha1->hash,
+ sizeof (sha1->hash));
+ }
+ }
+ else
+ {
+ s = format (s, "\n");
+ }
+ }
+
+ return s;
+}
+
+static void
+bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
+ int handling_wakeup)
+{
+ BFD_DBG ("\nState changed: %U", format_bfd_session, bs);
+ bfd_event (bm, bs);
+ switch (bs->local_state)
+ {
+ case BFD_STATE_admin_down:
+ bs->echo = 0;
+ bfd_set_effective_desired_min_tx (bm, bs, now,
+ clib_max
+ (bs->config_desired_min_tx_clocks,
+ bm->default_desired_min_tx_clocks));
+ bfd_set_effective_required_min_rx (bm, bs,
+ bs->config_required_min_rx_clocks);
+ bfd_set_timer (bm, bs, now, handling_wakeup);
+ break;
+ case BFD_STATE_down:
+ bs->echo = 0;
+ bfd_set_effective_desired_min_tx (bm, bs, now,
+ clib_max
+ (bs->config_desired_min_tx_clocks,
+ bm->default_desired_min_tx_clocks));
+ bfd_set_effective_required_min_rx (bm, bs,
+ bs->config_required_min_rx_clocks);
+ bfd_set_timer (bm, bs, now, handling_wakeup);
+ break;
+ case BFD_STATE_init:
+ bs->echo = 0;
+ bfd_set_effective_desired_min_tx (bm, bs, now,
+ bs->config_desired_min_tx_clocks);
+ bfd_set_timer (bm, bs, now, handling_wakeup);
+ break;
+ case BFD_STATE_up:
+ bfd_set_effective_desired_min_tx (bm, bs, now,
+ bs->config_desired_min_tx_clocks);
+ if (BFD_POLL_NOT_NEEDED == bs->poll_state)
+ {
+ bfd_set_effective_required_min_rx (bm, bs,
+ bs->config_required_min_rx_clocks);
+ }
+ bfd_set_timer (bm, bs, now, handling_wakeup);
+ break;
+ }
+ bfd_notify_listeners (bm, BFD_LISTEN_EVENT_UPDATE, bs);
+}
+
+static void
+bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ bfd_main_t * bm, bfd_session_t * bs, u64 now)
+{
+ /*
+ * if remote demand mode is set and we need to do a poll, set the next
+ * timeout so that the session wakes up immediately
+ */
+ if (bs->remote_demand && BFD_POLL_NEEDED == bs->poll_state &&
+ bs->poll_state_start_or_timeout_clocks < now)
+ {
+ bs->tx_timeout_clocks = now;
+ }
+ bfd_recalc_detection_time (bm, bs);
+ bfd_set_timer (bm, bs, now, 0);
+}
+
+static void
+bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+{
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx);
+ bfd_add_udp4_transport (vm, bi, bs, 0 /* is_echo */ );
+ break;
+ case BFD_TRANSPORT_UDP6:
+ BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx);
+ bfd_add_udp6_transport (vm, bi, bs, 0 /* is_echo */ );
+ break;
+ }
+}
+
+static int
+bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+{
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx);
+ return bfd_transport_udp4 (vm, bi, bs);
+ break;
+ case BFD_TRANSPORT_UDP6:
+ BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx);
+ return bfd_transport_udp6 (vm, bi, bs);
+ break;
+ }
+ return 0;
+}
+
+static int
+bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+{
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx);
+ return bfd_add_udp4_transport (vm, bi, bs, 1 /* is_echo */ );
+ break;
+ case BFD_TRANSPORT_UDP6:
+ BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx);
+ return bfd_add_udp6_transport (vm, bi, bs, 1 /* is_echo */ );
+ break;
+ }
+ return 0;
+}
+
+static int
+bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+{
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx);
+ return bfd_transport_udp4 (vm, bi, bs);
+ break;
+ case BFD_TRANSPORT_UDP6:
+ BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx);
+ return bfd_transport_udp6 (vm, bi, bs);
+ break;
+ }
+ return 0;
+}
+
+#if WITH_LIBSSL > 0
+static void
+bfd_add_sha1_auth_section (vlib_buffer_t * b, bfd_session_t * bs)
+{
+ bfd_pkt_with_sha1_auth_t *pkt = vlib_buffer_get_current (b);
+ bfd_auth_sha1_t *auth = &pkt->sha1_auth;
+ b->current_length += sizeof (*auth);
+ pkt->pkt.head.length += sizeof (*auth);
+ bfd_pkt_set_auth_present (&pkt->pkt);
+ memset (auth, 0, sizeof (*auth));
+ auth->type_len.type = bs->auth.curr_key->auth_type;
+ /*
+ * only meticulous authentication types require incrementing seq number
+ * for every message, but doing so doesn't violate the RFC
+ */
+ ++bs->auth.local_seq_number;
+ auth->type_len.len = sizeof (bfd_auth_sha1_t);
+ auth->key_id = bs->auth.curr_bfd_key_id;
+ auth->seq_num = clib_host_to_net_u32 (bs->auth.local_seq_number);
+ /*
+ * first copy the password into the packet, then calculate the hash
+ * and finally replace the password with the calculated hash
+ */
+ clib_memcpy (auth->hash, bs->auth.curr_key->key,
+ sizeof (bs->auth.curr_key->key));
+ unsigned char hash[sizeof (auth->hash)];
+ SHA1 ((unsigned char *) pkt, sizeof (*pkt), hash);
+ BFD_DBG ("hashing: %U", format_hex_bytes, pkt, sizeof (*pkt));
+ clib_memcpy (auth->hash, hash, sizeof (hash));
+}
+#endif
+
+static void
+bfd_add_auth_section (vlib_buffer_t * b, bfd_session_t * bs)
+{
+ if (bs->auth.curr_key)
+ {
+ const bfd_auth_type_e auth_type = bs->auth.curr_key->auth_type;
+ switch (auth_type)
+ {
+ case BFD_AUTH_TYPE_reserved:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_simple_password:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_keyed_md5:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_md5:
+ clib_warning ("Internal error, unexpected BFD auth type '%d'",
+ auth_type);
+ break;
+#if WITH_LIBSSL > 0
+ case BFD_AUTH_TYPE_keyed_sha1:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_sha1:
+ bfd_add_sha1_auth_section (b, bs);
+ break;
+#else
+ case BFD_AUTH_TYPE_keyed_sha1:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_sha1:
+ clib_warning ("Internal error, unexpected BFD auth type '%d'",
+ auth_type);
+ break;
+#endif
+ }
+ }
+}
+
+static int
+bfd_is_echo_possible (bfd_session_t * bs)
+{
+ if (BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state &&
+ bs->remote_min_echo_rx_usec > 0)
+ {
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP4);
+ case BFD_TRANSPORT_UDP6:
+ return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP6);
+ }
+ }
+ return 0;
+}
+
+static void
+bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs,
+ vlib_buffer_t * b)
+{
+ bfd_pkt_t *pkt = vlib_buffer_get_current (b);
+ u32 bfd_length = 0;
+ bfd_length = sizeof (bfd_pkt_t);
+ memset (pkt, 0, sizeof (*pkt));
+ bfd_pkt_set_version (pkt, 1);
+ bfd_pkt_set_diag_code (pkt, bs->local_diag);
+ bfd_pkt_set_state (pkt, bs->local_state);
+ pkt->head.detect_mult = bs->local_detect_mult;
+ pkt->head.length = bfd_length;
+ pkt->my_disc = bs->local_discr;
+ pkt->your_disc = bs->remote_discr;
+ pkt->des_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec);
+ if (bs->echo)
+ {
+ pkt->req_min_rx =
+ clib_host_to_net_u32 (bfd_clocks_to_usec
+ (bm, bs->effective_required_min_rx_clocks));
+ }
+ else
+ {
+ pkt->req_min_rx =
+ clib_host_to_net_u32 (bs->config_required_min_rx_usec);
+ }
+ pkt->req_min_echo_rx = clib_host_to_net_u32 (1);
+ b->current_length = bfd_length;
+}
+
+static void
+bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ bfd_main_t * bm, bfd_session_t * bs, u64 now)
+{
+ if (!bfd_is_echo_possible (bs))
+ {
+ BFD_DBG ("\nSwitching off echo function: %U", format_bfd_session, bs);
+ bs->echo = 0;
+ return;
+ }
+ /* sometimes the wheel expires an event a bit sooner than requested,
+ account
+ for that here */
+ if (now + bm->wheel_inaccuracy >= bs->echo_tx_timeout_clocks)
+ {
+ BFD_DBG ("\nSending echo packet: %U", format_bfd_session, bs);
+ u32 bi;
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ ASSERT (b->current_data == 0);
+ memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+ bfd_echo_pkt_t *pkt = vlib_buffer_get_current (b);
+ memset (pkt, 0, sizeof (*pkt));
+ pkt->discriminator = bs->local_discr;
+ pkt->expire_time_clocks =
+ now + bs->echo_transmit_interval_clocks * bs->local_detect_mult;
+ pkt->checksum =
+ bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks,
+ bs->echo_secret);
+ b->current_length = sizeof (*pkt);
+ if (!bfd_echo_add_transport_layer (vm, bi, bs))
+ {
+ BFD_ERR ("cannot send echo packet out, turning echo off");
+ bs->echo = 0;
+ vlib_buffer_free_one (vm, bi);
+ return;
+ }
+ if (!bfd_transport_echo (vm, bi, bs))
+ {
+ BFD_ERR ("cannot send echo packet out, turning echo off");
+ bs->echo = 0;
+ vlib_buffer_free_one (vm, bi);
+ return;
+ }
+ bs->echo_last_tx_clocks = now;
+ bfd_calc_next_echo_tx (bm, bs, now);
+ }
+ else
+ {
+ BFD_DBG
+ ("No need to send echo packet now, now is %lu, tx_timeout is %lu",
+ now, bs->echo_tx_timeout_clocks);
+ }
+}
+
+static void
+bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ bfd_main_t * bm, bfd_session_t * bs, u64 now)
+{
+ if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state)
+ {
+ BFD_DBG ("Remote min rx interval is zero, not sending periodic control "
+ "frame");
+ return;
+ }
+ if (BFD_POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand &&
+ BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state)
+ {
+ /*
+ * A system MUST NOT periodically transmit BFD Control packets if Demand
+ * mode is active on the remote system (bfd.RemoteDemandMode is 1,
+ * bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll
+ * Sequence is not being transmitted.
+ */
+ BFD_DBG ("Remote demand is set, not sending periodic control frame");
+ return;
+ }
+ /*
+ * sometimes the wheel expires an event a bit sooner than requested, account
+ * for that here
+ */
+ if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks)
+ {
+ BFD_DBG ("\nSending periodic control frame: %U", format_bfd_session,
+ bs);
+ u32 bi;
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ ASSERT (b->current_data == 0);
+ memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+ bfd_init_control_frame (bm, bs, b);
+ switch (bs->poll_state)
+ {
+ case BFD_POLL_NEEDED:
+ if (now < bs->poll_state_start_or_timeout_clocks)
+ {
+ BFD_DBG ("Cannot start a poll sequence yet, need to wait "
+ "for " BFD_CLK_FMT,
+ BFD_CLK_PRN (bs->poll_state_start_or_timeout_clocks -
+ now));
+ break;
+ }
+ bs->poll_state_start_or_timeout_clocks = now;
+ bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS);
+ /* fallthrough */
+ case BFD_POLL_IN_PROGRESS:
+ case BFD_POLL_IN_PROGRESS_AND_QUEUED:
+ bfd_pkt_set_poll (vlib_buffer_get_current (b));
+ BFD_DBG ("Setting poll bit in packet, bs_idx=%u", bs->bs_idx);
+ break;
+ case BFD_POLL_NOT_NEEDED:
+ /* fallthrough */
+ break;
+ }
+ bfd_add_auth_section (b, bs);
+ bfd_add_transport_layer (vm, bi, bs);
+ if (!bfd_transport_control_frame (vm, bi, bs))
+ {
+ vlib_buffer_free_one (vm, bi);
+ }
+ bs->last_tx_clocks = now;
+ bfd_calc_next_tx (bm, bs, now);
+ }
+ else
+ {
+ BFD_DBG
+ ("No need to send control frame now, now is %lu, tx_timeout is %lu",
+ now, bs->tx_timeout_clocks);
+ }
+}
+
+void
+bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
+ bfd_main_t * bm, bfd_session_t * bs,
+ int is_local)
+{
+ BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx);
+ bfd_init_control_frame (bm, bs, b);
+ bfd_pkt_set_final (vlib_buffer_get_current (b));
+ bfd_add_auth_section (b, bs);
+ u32 bi = vlib_get_buffer_index (vm, b);
+ bfd_add_transport_layer (vm, bi, bs);
+ bs->last_tx_clocks = clib_cpu_time_now ();
+ /*
+ * RFC allows to include changes in final frame, so if there were any
+ * pending, we already did that, thus we can clear any pending poll needs
+ */
+ bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED);
+}
+
+static void
+bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now,
+ int handling_wakeup)
+{
+ /*
+ * sometimes the wheel expires an event a bit sooner than requested, account
+ * for that here
+ */
+ if (bs->last_rx_clocks + bs->detection_time_clocks <=
+ now + bm->wheel_inaccuracy)
+ {
+ BFD_DBG ("Rx timeout, session goes down");
+ bfd_set_diag (bs, BFD_DIAG_CODE_det_time_exp);
+ bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup);
+ /*
+ * If the remote system does not receive any
+ * BFD Control packets for a Detection Time, it SHOULD reset
+ * bfd.RemoteMinRxInterval to its initial value of 1 (per section 6.8.1,
+ * since it is no longer required to maintain previous session state)
+ * and then can transmit at its own rate.
+ */
+ bfd_set_remote_required_min_rx (bm, bs, now, 1);
+ }
+ else if (bs->echo &&
+ bs->echo_last_rx_clocks +
+ bs->echo_transmit_interval_clocks * bs->local_detect_mult <=
+ now + bm->wheel_inaccuracy)
+ {
+ BFD_DBG ("Echo rx timeout, session goes down");
+ bfd_set_diag (bs, BFD_DIAG_CODE_echo_failed);
+ bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup);
+ }
+}
+
+void
+bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
+ bfd_session_t * bs, u64 now)
+{
+ BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx);
+ switch (bs->local_state)
+ {
+ case BFD_STATE_admin_down:
+ bfd_send_periodic (vm, rt, bm, bs, now);
+ break;
+ case BFD_STATE_down:
+ bfd_send_periodic (vm, rt, bm, bs, now);
+ break;
+ case BFD_STATE_init:
+ bfd_check_rx_timeout (bm, bs, now, 1);
+ bfd_send_periodic (vm, rt, bm, bs, now);
+ break;
+ case BFD_STATE_up:
+ bfd_check_rx_timeout (bm, bs, now, 1);
+ if (BFD_POLL_NOT_NEEDED == bs->poll_state && !bs->echo &&
+ bfd_is_echo_possible (bs))
+ {
+ /* switch on echo function as main detection method now */
+ BFD_DBG ("Switching on echo function, bs_idx=%u", bs->bs_idx);
+ bs->echo = 1;
+ bs->echo_last_rx_clocks = now;
+ bs->echo_tx_timeout_clocks = now;
+ bfd_set_effective_required_min_rx (bm, bs,
+ clib_max
+ (bm->min_required_min_rx_while_echo_clocks,
+ bs->config_required_min_rx_clocks));
+ bfd_set_poll_state (bs, BFD_POLL_NEEDED);
+ }
+ bfd_send_periodic (vm, rt, bm, bs, now);
+ if (bs->echo)
+ {
+ bfd_send_echo (vm, rt, bm, bs, now);
+ }
+ break;
+ }
+}
+
+/*
+ * bfd process node function
+ */
+static uword
+bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ bfd_main_t *bm = &bfd_main;
+ u32 *expired = 0;
+ uword event_type, *event_data = 0;
+
+ /* So we can send events to the bfd process */
+ bm->bfd_process_node_index = bfd_process_node.index;
+
+ while (1)
+ {
+ u64 now = clib_cpu_time_now ();
+ u64 next_expire = timing_wheel_next_expiring_elt_time (&bm->wheel);
+ BFD_DBG ("timing_wheel_next_expiring_elt_time(%p) returns %lu",
+ &bm->wheel, next_expire);
+ if ((i64) next_expire < 0)
+ {
+ BFD_DBG ("wait for event without timeout");
+ (void) vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ }
+ else
+ {
+ f64 timeout = ((i64) next_expire - (i64) now) / bm->cpu_cps;
+ BFD_DBG ("wait for event with timeout %.02f", timeout);
+ if (timeout < 0)
+ {
+ BFD_DBG ("negative timeout, already expired, skipping wait");
+ event_type = ~0;
+ }
+ else
+ {
+ (void) vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ }
+ }
+ now = clib_cpu_time_now ();
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ /* nothing to do here */
+ break;
+ case BFD_EVENT_RESCHEDULE:
+ /* nothing to do here - reschedule is done automatically after
+ * each event or timeout */
+ break;
+ case BFD_EVENT_NEW_SESSION:
+ if (!pool_is_free_index (bm->sessions, *event_data))
+ {
+ bfd_session_t *bs =
+ pool_elt_at_index (bm->sessions, *event_data);
+ bfd_send_periodic (vm, rt, bm, bs, now);
+ bfd_set_timer (bm, bs, now, 1);
+ }
+ else
+ {
+ BFD_DBG ("Ignoring event for non-existent session index %u",
+ (u32) * event_data);
+ }
+ break;
+ case BFD_EVENT_CONFIG_CHANGED:
+ if (!pool_is_free_index (bm->sessions, *event_data))
+ {
+ bfd_session_t *bs =
+ pool_elt_at_index (bm->sessions, *event_data);
+ bfd_on_config_change (vm, rt, bm, bs, now);
+ }
+ else
+ {
+ BFD_DBG ("Ignoring event for non-existent session index %u",
+ (u32) * event_data);
+ }
+ break;
+ default:
+ clib_warning ("BUG: event type 0x%wx", event_type);
+ break;
+ }
+ BFD_DBG ("advancing wheel, now is %lu", now);
+ BFD_DBG ("timing_wheel_advance (%p, %lu, %p, 0);", &bm->wheel, now,
+ expired);
+ expired = timing_wheel_advance (&bm->wheel, now, expired, 0);
+ BFD_DBG ("Expired %d elements", vec_len (expired));
+ u32 *p = NULL;
+ vec_foreach (p, expired)
+ {
+ const u32 bs_idx = *p;
+ if (!pool_is_free_index (bm->sessions, bs_idx))
+ {
+ bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx);
+ bfd_on_timeout (vm, rt, bm, bs, now);
+ bfd_set_timer (bm, bs, now, 1);
+ }
+ }
+ if (expired)
+ {
+ _vec_len (expired) = 0;
+ }
+ if (event_data)
+ {
+ _vec_len (event_data) = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * bfd process node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bfd_process_node, static) = {
+ .function = bfd_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "bfd-process",
+ .n_next_nodes = 0,
+ .next_nodes = {},
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ // bfd_main_t *bm = &bfd_main;
+ // vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ /* TODO */
+ }
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down);
+
+static clib_error_t *
+bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ // bfd_main_t *bm = &bfd_main;
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ {
+ /* TODO */
+ }
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down);
+
+void
+bfd_register_listener (bfd_notify_fn_t fn)
+{
+ bfd_main_t *bm = &bfd_main;
+
+ vec_add1 (bm->listeners, fn);
+}
+
+/*
+ * setup function
+ */
+static clib_error_t *
+bfd_main_init (vlib_main_t * vm)
+{
+#if BFD_DEBUG
+ setbuf (stdout, NULL);
+#endif
+ bfd_main_t *bm = &bfd_main;
+ bm->random_seed = random_default_seed ();
+ bm->vlib_main = vm;
+ bm->vnet_main = vnet_get_main ();
+ memset (&bm->wheel, 0, sizeof (bm->wheel));
+ bm->cpu_cps = vm->clib_time.clocks_per_second;
+ BFD_DBG ("cps is %.2f", bm->cpu_cps);
+ bm->default_desired_min_tx_clocks =
+ bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_USEC);
+ bm->min_required_min_rx_while_echo_clocks =
+ bfd_usec_to_clocks (bm, BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO);
+ const u64 now = clib_cpu_time_now ();
+ timing_wheel_init (&bm->wheel, now, bm->cpu_cps);
+ bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (bfd_main_init);
+
+bfd_session_t *
+bfd_get_session (bfd_main_t * bm, bfd_transport_e t)
+{
+ bfd_session_t *result;
+ pool_get (bm->sessions, result);
+ memset (result, 0, sizeof (*result));
+ result->bs_idx = result - bm->sessions;
+ result->transport = t;
+ const unsigned limit = 1000;
+ unsigned counter = 0;
+ do
+ {
+ result->local_discr = random_u32 (&bm->random_seed);
+ if (counter > limit)
+ {
+ clib_warning ("Couldn't allocate unused session discriminator even "
+ "after %u tries!", limit);
+ pool_put (bm->sessions, result);
+ return NULL;
+ }
+ ++counter;
+ }
+ while (hash_get (bm->session_by_disc, result->local_discr));
+ bfd_set_defaults (bm, result);
+ hash_set (bm->session_by_disc, result->local_discr, result->bs_idx);
+ return result;
+}
+
+void
+bfd_put_session (bfd_main_t * bm, bfd_session_t * bs)
+{
+ bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs);
+ if (bs->auth.curr_key)
+ {
+ --bs->auth.curr_key->use_count;
+ }
+ if (bs->auth.next_key)
+ {
+ --bs->auth.next_key->use_count;
+ }
+ hash_unset (bm->session_by_disc, bs->local_discr);
+ pool_put (bm->sessions, bs);
+}
+
+bfd_session_t *
+bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx)
+{
+ if (!pool_is_free_index (bm->sessions, bs_idx))
+ {
+ return pool_elt_at_index (bm->sessions, bs_idx);
+ }
+ return NULL;
+}
+
+bfd_session_t *
+bfd_find_session_by_disc (bfd_main_t * bm, u32 disc)
+{
+ uword *p = hash_get (bfd_main.session_by_disc, disc);
+ if (p)
+ {
+ return pool_elt_at_index (bfd_main.sessions, *p);
+ }
+ return NULL;
+}
+
+/**
+ * @brief verify bfd packet - common checks
+ *
+ * @param pkt
+ *
+ * @return 1 if bfd packet is valid
+ */
+int
+bfd_verify_pkt_common (const bfd_pkt_t * pkt)
+{
+ if (1 != bfd_pkt_get_version (pkt))
+ {
+ BFD_ERR ("BFD verification failed - unexpected version: '%d'",
+ bfd_pkt_get_version (pkt));
+ return 0;
+ }
+ if (pkt->head.length < sizeof (bfd_pkt_t) ||
+ (bfd_pkt_get_auth_present (pkt) &&
+ pkt->head.length < sizeof (bfd_pkt_with_common_auth_t)))
+ {
+ BFD_ERR ("BFD verification failed - unexpected length: '%d' (auth "
+ "present: %d)",
+ pkt->head.length, bfd_pkt_get_auth_present (pkt));
+ return 0;
+ }
+ if (!pkt->head.detect_mult)
+ {
+ BFD_ERR ("BFD verification failed - unexpected detect-mult: '%d'",
+ pkt->head.detect_mult);
+ return 0;
+ }
+ if (bfd_pkt_get_multipoint (pkt))
+ {
+ BFD_ERR ("BFD verification failed - unexpected multipoint: '%d'",
+ bfd_pkt_get_multipoint (pkt));
+ return 0;
+ }
+ if (!pkt->my_disc)
+ {
+ BFD_ERR ("BFD verification failed - unexpected my-disc: '%d'",
+ pkt->my_disc);
+ return 0;
+ }
+ if (!pkt->your_disc)
+ {
+ const u8 pkt_state = bfd_pkt_get_state (pkt);
+ if (pkt_state != BFD_STATE_down && pkt_state != BFD_STATE_admin_down)
+ {
+ BFD_ERR ("BFD verification failed - unexpected state: '%s' "
+ "(your-disc is zero)", bfd_state_string (pkt_state));
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void
+bfd_session_switch_auth_to_next (bfd_session_t * bs)
+{
+ BFD_DBG ("Switching authentication key from %U to %U for bs_idx=%u",
+ format_bfd_auth_key, bs->auth.curr_key, format_bfd_auth_key,
+ bs->auth.next_key, bs->bs_idx);
+ bs->auth.is_delayed = 0;
+ if (bs->auth.curr_key)
+ {
+ --bs->auth.curr_key->use_count;
+ }
+ bs->auth.curr_key = bs->auth.next_key;
+ bs->auth.next_key = NULL;
+ bs->auth.curr_bfd_key_id = bs->auth.next_bfd_key_id;
+}
+
+static int
+bfd_auth_type_is_meticulous (bfd_auth_type_e auth_type)
+{
+ if (BFD_AUTH_TYPE_meticulous_keyed_md5 == auth_type ||
+ BFD_AUTH_TYPE_meticulous_keyed_sha1 == auth_type)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+bfd_verify_pkt_auth_seq_num (bfd_session_t * bs,
+ u32 received_seq_num, int is_meticulous)
+{
+ /*
+ * RFC 5880 6.8.1:
+ *
+ * This variable MUST be set to zero after no packets have been
+ * received on this session for at least twice the Detection Time.
+ */
+ u64 now = clib_cpu_time_now ();
+ if (now - bs->last_rx_clocks > bs->detection_time_clocks * 2)
+ {
+ BFD_DBG ("BFD peer unresponsive for %lu clocks, which is > 2 * "
+ "detection_time=%u clocks, resetting remote_seq_number_known "
+ "flag",
+ now - bs->last_rx_clocks, bs->detection_time_clocks * 2);
+ bs->auth.remote_seq_number_known = 0;
+ }
+ if (bs->auth.remote_seq_number_known)
+ {
+ /* remote sequence number is known, verify its validity */
+ const u32 max_u32 = 0xffffffff;
+ /* the calculation might wrap, account for the special case... */
+ if (bs->auth.remote_seq_number > max_u32 - 3 * bs->local_detect_mult)
+ {
+ /*
+ * special case
+ *
+ * x y z
+ * |----------+----------------------------+-----------|
+ * 0 ^ ^ 0xffffffff
+ * | remote_seq_num------+
+ * |
+ * +-----(remote_seq_num + 3*detect_mult) % * 0xffffffff
+ *
+ * x + y + z = 0xffffffff
+ * x + z = 3 * detect_mult
+ */
+ const u32 z = max_u32 - bs->auth.remote_seq_number;
+ const u32 x = 3 * bs->local_detect_mult - z;
+ if (received_seq_num > x &&
+ received_seq_num < bs->auth.remote_seq_number + is_meticulous)
+ {
+ BFD_ERR
+ ("Recvd sequence number=%u out of ranges <0, %u>, <%u, %u>",
+ received_seq_num, x,
+ bs->auth.remote_seq_number + is_meticulous, max_u32);
+ return 0;
+ }
+ }
+ else
+ {
+ /* regular case */
+ const u32 min = bs->auth.remote_seq_number + is_meticulous;
+ const u32 max =
+ bs->auth.remote_seq_number + 3 * bs->local_detect_mult;
+ if (received_seq_num < min || received_seq_num > max)
+ {
+ BFD_ERR ("Recvd sequence number=%u out of range <%u, %u>",
+ received_seq_num, min, max);
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+static int
+bfd_verify_pkt_auth_key_sha1 (const bfd_pkt_t * pkt, u32 pkt_size,
+ bfd_session_t * bs, u8 bfd_key_id,
+ bfd_auth_key_t * auth_key)
+{
+ ASSERT (auth_key->auth_type == BFD_AUTH_TYPE_keyed_sha1 ||
+ auth_key->auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1);
+
+ u8 result[SHA_DIGEST_LENGTH];
+ bfd_pkt_with_common_auth_t *with_common = (void *) pkt;
+ if (pkt_size < sizeof (*with_common))
+ {
+ BFD_ERR ("Packet size too small to hold authentication common header");
+ return 0;
+ }
+ if (with_common->common_auth.type != auth_key->auth_type)
+ {
+ BFD_ERR ("BFD auth type mismatch, packet auth=%d:%s doesn't match "
+ "in-use auth=%d:%s",
+ with_common->common_auth.type,
+ bfd_auth_type_str (with_common->common_auth.type),
+ auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
+ return 0;
+ }
+ bfd_pkt_with_sha1_auth_t *with_sha1 = (void *) pkt;
+ if (pkt_size < sizeof (*with_sha1) ||
+ with_sha1->sha1_auth.type_len.len < sizeof (with_sha1->sha1_auth))
+ {
+ BFD_ERR
+ ("BFD size mismatch, payload size=%u, expected=%u, auth_len=%u, "
+ "expected=%u", pkt_size, sizeof (*with_sha1),
+ with_sha1->sha1_auth.type_len.len, sizeof (with_sha1->sha1_auth));
+ return 0;
+ }
+ if (with_sha1->sha1_auth.key_id != bfd_key_id)
+ {
+ BFD_ERR
+ ("BFD key ID mismatch, packet key ID=%u doesn't match key ID=%u%s",
+ with_sha1->sha1_auth.key_id, bfd_key_id,
+ bs->
+ auth.is_delayed ? " (but a delayed auth change is scheduled)" : "");
+ return 0;
+ }
+ SHA_CTX ctx;
+ if (!SHA1_Init (&ctx))
+ {
+ BFD_ERR ("SHA1_Init failed");
+ return 0;
+ }
+ /* ignore last 20 bytes - use the actual key data instead pkt data */
+ if (!SHA1_Update (&ctx, with_sha1,
+ sizeof (*with_sha1) - sizeof (with_sha1->sha1_auth.hash)))
+ {
+ BFD_ERR ("SHA1_Update failed");
+ return 0;
+ }
+ if (!SHA1_Update (&ctx, auth_key->key, sizeof (auth_key->key)))
+ {
+ BFD_ERR ("SHA1_Update failed");
+ return 0;
+ }
+ if (!SHA1_Final (result, &ctx))
+ {
+ BFD_ERR ("SHA1_Final failed");
+ return 0;
+ }
+ if (0 == memcmp (result, with_sha1->sha1_auth.hash, SHA_DIGEST_LENGTH))
+ {
+ return 1;
+ }
+ BFD_ERR ("SHA1 hash: %U doesn't match the expected value: %U",
+ format_hex_bytes, with_sha1->sha1_auth.hash, SHA_DIGEST_LENGTH,
+ format_hex_bytes, result, SHA_DIGEST_LENGTH);
+ return 0;
+}
+
+static int
+bfd_verify_pkt_auth_key (const bfd_pkt_t * pkt, u32 pkt_size,
+ bfd_session_t * bs, u8 bfd_key_id,
+ bfd_auth_key_t * auth_key)
+{
+ switch (auth_key->auth_type)
+ {
+ case BFD_AUTH_TYPE_reserved:
+ clib_warning ("Internal error, unexpected auth_type=%d:%s",
+ auth_key->auth_type,
+ bfd_auth_type_str (auth_key->auth_type));
+ return 0;
+ case BFD_AUTH_TYPE_simple_password:
+ clib_warning
+ ("Internal error, not implemented, unexpected auth_type=%d:%s",
+ auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
+ return 0;
+ case BFD_AUTH_TYPE_keyed_md5:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_md5:
+ clib_warning
+ ("Internal error, not implemented, unexpected auth_type=%d:%s",
+ auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
+ return 0;
+ case BFD_AUTH_TYPE_keyed_sha1:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_sha1:
+#if WITH_LIBSSL > 0
+ do
+ {
+ const u32 seq_num = clib_net_to_host_u32 (((bfd_pkt_with_sha1_auth_t
+ *) pkt)->
+ sha1_auth.seq_num);
+ return bfd_verify_pkt_auth_seq_num (bs, seq_num,
+ bfd_auth_type_is_meticulous
+ (auth_key->auth_type))
+ && bfd_verify_pkt_auth_key_sha1 (pkt, pkt_size, bs, bfd_key_id,
+ auth_key);
+ }
+ while (0);
+#else
+ clib_warning
+ ("Internal error, attempt to use SHA1 without SSL support");
+ return 0;
+#endif
+ }
+ return 0;
+}
+
+/**
+ * @brief verify bfd packet - authentication
+ *
+ * @param pkt
+ *
+ * @return 1 if bfd packet is valid
+ */
+int
+bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, bfd_session_t * bs)
+{
+ if (bfd_pkt_get_auth_present (pkt))
+ {
+ /* authentication present in packet */
+ if (!bs->auth.curr_key)
+ {
+ /* currently not using authentication - can we turn it on? */
+ if (bs->auth.is_delayed && bs->auth.next_key)
+ {
+ /* yes, switch is scheduled - make sure the auth is valid */
+ if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs,
+ bs->auth.next_bfd_key_id,
+ bs->auth.next_key))
+ {
+ /* auth matches next key, do the switch, packet is valid */
+ bfd_session_switch_auth_to_next (bs);
+ return 1;
+ }
+ }
+ }
+ else
+ {
+ /* yes, using authentication, verify the key */
+ if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs,
+ bs->auth.curr_bfd_key_id,
+ bs->auth.curr_key))
+ {
+ /* verification passed, packet is valid */
+ return 1;
+ }
+ else
+ {
+ /* verification failed - but maybe we need to switch key */
+ if (bs->auth.is_delayed && bs->auth.next_key)
+ {
+ /* delayed switch present, verify if that key works */
+ if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs,
+ bs->auth.next_bfd_key_id,
+ bs->auth.next_key))
+ {
+ /* auth matches next key, switch key, packet is valid */
+ bfd_session_switch_auth_to_next (bs);
+ return 1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* authentication in packet not present */
+ if (pkt_size > sizeof (*pkt))
+ {
+ BFD_ERR ("BFD verification failed - unexpected packet size '%d' "
+ "(auth not present)", pkt_size);
+ return 0;
+ }
+ if (bs->auth.curr_key)
+ {
+ /* currently authenticating - could we turn it off? */
+ if (bs->auth.is_delayed && !bs->auth.next_key)
+ {
+ /* yes, delayed switch to NULL key is scheduled */
+ bfd_session_switch_auth_to_next (bs);
+ return 1;
+ }
+ }
+ else
+ {
+ /* no auth in packet, no auth in use - packet is valid */
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void
+bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx)
+{
+ bfd_session_t *bs = bfd_find_session_by_idx (bm, bs_idx);
+ if (!bs || (pkt->your_disc && pkt->your_disc != bs->local_discr))
+ {
+ return;
+ }
+ BFD_DBG ("Scanning bfd packet, bs_idx=%d", bs->bs_idx);
+ bs->remote_discr = pkt->my_disc;
+ bs->remote_state = bfd_pkt_get_state (pkt);
+ bs->remote_demand = bfd_pkt_get_demand (pkt);
+ bs->remote_diag = bfd_pkt_get_diag_code (pkt);
+ u64 now = clib_cpu_time_now ();
+ bs->last_rx_clocks = now;
+ if (bfd_pkt_get_auth_present (pkt))
+ {
+ bfd_auth_type_e auth_type =
+ ((bfd_pkt_with_common_auth_t *) (pkt))->common_auth.type;
+ switch (auth_type)
+ {
+ case BFD_AUTH_TYPE_reserved:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_simple_password:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_keyed_md5:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_md5:
+ clib_warning ("Internal error, unexpected auth_type=%d:%s",
+ auth_type, bfd_auth_type_str (auth_type));
+ break;
+ case BFD_AUTH_TYPE_keyed_sha1:
+ /* fallthrough */
+ case BFD_AUTH_TYPE_meticulous_keyed_sha1:
+ do
+ {
+ bfd_pkt_with_sha1_auth_t *with_sha1 =
+ (bfd_pkt_with_sha1_auth_t *) pkt;
+ bs->auth.remote_seq_number =
+ clib_net_to_host_u32 (with_sha1->sha1_auth.seq_num);
+ bs->auth.remote_seq_number_known = 1;
+ BFD_DBG ("Received sequence number %u",
+ bs->auth.remote_seq_number);
+ }
+ while (0);
+ }
+ }
+ bs->remote_desired_min_tx_clocks =
+ bfd_usec_to_clocks (bm, clib_net_to_host_u32 (pkt->des_min_tx));
+ bs->remote_detect_mult = pkt->head.detect_mult;
+ bfd_set_remote_required_min_rx (bm, bs, now,
+ clib_net_to_host_u32 (pkt->req_min_rx));
+ bfd_set_remote_required_min_echo_rx (bm, bs, now,
+ clib_net_to_host_u32
+ (pkt->req_min_echo_rx));
+ if (bfd_pkt_get_final (pkt))
+ {
+ if (BFD_POLL_IN_PROGRESS == bs->poll_state)
+ {
+ BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx);
+ bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED);
+ if (BFD_STATE_up == bs->local_state)
+ {
+ bfd_set_effective_required_min_rx (bm, bs,
+ clib_max (bs->echo *
+ bm->min_required_min_rx_while_echo_clocks,
+ bs->config_required_min_rx_clocks));
+ }
+ }
+ else if (BFD_POLL_IN_PROGRESS_AND_QUEUED == bs->poll_state)
+ {
+ /*
+ * next poll sequence must be delayed by at least the round trip
+ * time, so calculate that here
+ */
+ BFD_DBG ("Next poll sequence can commence in " BFD_CLK_FMT,
+ BFD_CLK_PRN (now -
+ bs->poll_state_start_or_timeout_clocks));
+ bs->poll_state_start_or_timeout_clocks =
+ now + (now - bs->poll_state_start_or_timeout_clocks);
+ BFD_DBG
+ ("Poll sequence terminated, but another is needed, bs_idx=%u",
+ bs->bs_idx);
+ bfd_set_poll_state (bs, BFD_POLL_NEEDED);
+ }
+ }
+ bfd_calc_next_tx (bm, bs, now);
+ bfd_set_timer (bm, bs, now, 0);
+ if (BFD_STATE_admin_down == bs->local_state)
+ {
+ BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u",
+ bs->bs_idx);
+ return;
+ }
+ if (BFD_STATE_admin_down == bs->remote_state)
+ {
+ bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down);
+ bfd_set_state (bm, bs, BFD_STATE_down, 0);
+ }
+ else if (BFD_STATE_down == bs->local_state)
+ {
+ if (BFD_STATE_down == bs->remote_state)
+ {
+ bfd_set_diag (bs, BFD_DIAG_CODE_no_diag);
+ bfd_set_state (bm, bs, BFD_STATE_init, 0);
+ }
+ else if (BFD_STATE_init == bs->remote_state)
+ {
+ bfd_set_diag (bs, BFD_DIAG_CODE_no_diag);
+ bfd_set_state (bm, bs, BFD_STATE_up, 0);
+ }
+ }
+ else if (BFD_STATE_init == bs->local_state)
+ {
+ if (BFD_STATE_up == bs->remote_state ||
+ BFD_STATE_init == bs->remote_state)
+ {
+ bfd_set_diag (bs, BFD_DIAG_CODE_no_diag);
+ bfd_set_state (bm, bs, BFD_STATE_up, 0);
+ }
+ }
+ else /* BFD_STATE_up == bs->local_state */
+ {
+ if (BFD_STATE_down == bs->remote_state)
+ {
+ bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down);
+ bfd_set_state (bm, bs, BFD_STATE_down, 0);
+ }
+ }
+}
+
+int
+bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b)
+{
+ bfd_echo_pkt_t *pkt = NULL;
+ if (b->current_length != sizeof (*pkt))
+ {
+ return 0;
+ }
+ pkt = vlib_buffer_get_current (b);
+ bfd_session_t *bs = bfd_find_session_by_disc (bm, pkt->discriminator);
+ if (!bs)
+ {
+ return 0;
+ }
+ BFD_DBG ("Scanning bfd echo packet, bs_idx=%d", bs->bs_idx);
+ u64 checksum =
+ bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks,
+ bs->echo_secret);
+ if (checksum != pkt->checksum)
+ {
+ BFD_DBG ("Invalid echo packet, checksum mismatch");
+ return 1;
+ }
+ u64 now = clib_cpu_time_now ();
+ if (pkt->expire_time_clocks < now)
+ {
+ BFD_DBG ("Stale packet received, expire time %lu < now %lu",
+ pkt->expire_time_clocks, now);
+ }
+ else
+ {
+ bs->echo_last_rx_clocks = now;
+ }
+ return 1;
+}
+
+u8 *
+format_bfd_session (u8 * s, va_list * args)
+{
+ const bfd_session_t *bs = va_arg (*args, bfd_session_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "bs_idx=%u local-state=%s remote-state=%s\n"
+ "%Ulocal-discriminator=%u remote-discriminator=%u\n"
+ "%Ulocal-diag=%s echo-active=%s\n"
+ "%Udesired-min-tx=%u required-min-rx=%u\n"
+ "%Urequired-min-echo-rx=%u detect-mult=%u\n"
+ "%Uremote-min-rx=%u remote-min-echo-rx=%u\n"
+ "%Uremote-demand=%s poll-state=%s\n"
+ "%Uauth: local-seq-num=%u remote-seq-num=%u\n"
+ "%U is-delayed=%s\n"
+ "%U curr-key=%U\n"
+ "%U next-key=%U",
+ bs->bs_idx, bfd_state_string (bs->local_state),
+ bfd_state_string (bs->remote_state), format_white_space, indent,
+ bs->local_discr, bs->remote_discr, format_white_space, indent,
+ bfd_diag_code_string (bs->local_diag),
+ (bs->echo ? "yes" : "no"), format_white_space, indent,
+ bs->config_desired_min_tx_usec, bs->config_required_min_rx_usec,
+ format_white_space, indent, 1, bs->local_detect_mult,
+ format_white_space, indent, bs->remote_min_rx_usec,
+ bs->remote_min_echo_rx_usec, format_white_space, indent,
+ (bs->remote_demand ? "yes" : "no"),
+ bfd_poll_state_string (bs->poll_state), format_white_space,
+ indent, bs->auth.local_seq_number, bs->auth.remote_seq_number,
+ format_white_space, indent,
+ (bs->auth.is_delayed ? "yes" : "no"), format_white_space,
+ indent, format_bfd_auth_key, bs->auth.curr_key,
+ format_white_space, indent, format_bfd_auth_key,
+ bs->auth.next_key);
+ return s;
+}
+
+unsigned
+bfd_auth_type_supported (bfd_auth_type_e auth_type)
+{
+ if (auth_type == BFD_AUTH_TYPE_keyed_sha1 ||
+ auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+vnet_api_error_t
+bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id,
+ u8 bfd_key_id, u8 is_delayed)
+{
+ bfd_main_t *bm = &bfd_main;
+ const uword *key_idx_p =
+ hash_get (bm->auth_key_by_conf_key_id, conf_key_id);
+ if (!key_idx_p)
+ {
+ clib_warning ("Authentication key with config ID %u doesn't exist)",
+ conf_key_id);
+ return VNET_API_ERROR_BFD_ENOENT;
+ }
+ const uword key_idx = *key_idx_p;
+ bfd_auth_key_t *key = pool_elt_at_index (bm->auth_keys, key_idx);
+ if (is_delayed)
+ {
+ if (bs->auth.next_key == key)
+ {
+ /* already using this key, no changes required */
+ return 0;
+ }
+ bs->auth.next_key = key;
+ bs->auth.next_bfd_key_id = bfd_key_id;
+ bs->auth.is_delayed = 1;
+ }
+ else
+ {
+ if (bs->auth.curr_key == key)
+ {
+ /* already using this key, no changes required */
+ return 0;
+ }
+ if (bs->auth.curr_key)
+ {
+ --bs->auth.curr_key->use_count;
+ }
+ bs->auth.curr_key = key;
+ bs->auth.curr_bfd_key_id = bfd_key_id;
+ bs->auth.is_delayed = 0;
+ }
+ ++key->use_count;
+ BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs);
+ return 0;
+}
+
+vnet_api_error_t
+bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed)
+{
+#if WITH_LIBSSL > 0
+ if (!is_delayed)
+ {
+ /* not delayed - deactivate the current key right now */
+ if (bs->auth.curr_key)
+ {
+ --bs->auth.curr_key->use_count;
+ bs->auth.curr_key = NULL;
+ }
+ bs->auth.is_delayed = 0;
+ }
+ else
+ {
+ /* delayed - mark as so */
+ bs->auth.is_delayed = 1;
+ }
+ /*
+ * clear the next key unconditionally - either the auth change is not delayed
+ * in which case the caller expects the session to not use authentication
+ * from this point forward, or it is delayed, in which case the next_key
+ * needs to be set to NULL to make it so in the future
+ */
+ if (bs->auth.next_key)
+ {
+ --bs->auth.next_key->use_count;
+ bs->auth.next_key = NULL;
+ }
+ BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs);
+ return 0;
+#else
+ clib_warning ("SSL missing, cannot deactivate BFD authentication");
+ return VNET_API_ERROR_BFD_NOTSUPP;
+#endif
+}
+
+vnet_api_error_t
+bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult)
+{
+ if (bs->local_detect_mult != detect_mult ||
+ bs->config_desired_min_tx_usec != desired_min_tx_usec ||
+ bs->config_required_min_rx_usec != required_min_rx_usec)
+ {
+ BFD_DBG ("\nChanging session params: %U", format_bfd_session, bs);
+ switch (bs->poll_state)
+ {
+ case BFD_POLL_NOT_NEEDED:
+ if (BFD_STATE_up == bs->local_state ||
+ BFD_STATE_init == bs->local_state)
+ {
+ /* poll sequence is not needed for detect multiplier change */
+ if (bs->config_desired_min_tx_usec != desired_min_tx_usec ||
+ bs->config_required_min_rx_usec != required_min_rx_usec)
+ {
+ bfd_set_poll_state (bs, BFD_POLL_NEEDED);
+ }
+ }
+ break;
+ case BFD_POLL_NEEDED:
+ case BFD_POLL_IN_PROGRESS_AND_QUEUED:
+ /*
+ * nothing to do - will be handled in the future poll which is
+ * already scheduled for execution
+ */
+ break;
+ case BFD_POLL_IN_PROGRESS:
+ /* poll sequence is not needed for detect multiplier change */
+ if (bs->config_desired_min_tx_usec != desired_min_tx_usec ||
+ bs->config_required_min_rx_usec != required_min_rx_usec)
+ {
+ BFD_DBG ("Poll in progress, queueing extra poll, bs_idx=%u",
+ bs->bs_idx);
+ bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS_AND_QUEUED);
+ }
+ }
+
+ bs->local_detect_mult = detect_mult;
+ bs->config_desired_min_tx_usec = desired_min_tx_usec;
+ bs->config_desired_min_tx_clocks =
+ bfd_usec_to_clocks (bm, desired_min_tx_usec);
+ bs->config_required_min_rx_usec = required_min_rx_usec;
+ bs->config_required_min_rx_clocks =
+ bfd_usec_to_clocks (bm, required_min_rx_usec);
+ BFD_DBG ("\nChanged session params: %U", format_bfd_session, bs);
+
+ vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index,
+ BFD_EVENT_CONFIG_CHANGED, bs->bs_idx);
+ }
+ else
+ {
+ BFD_DBG ("Ignore parameter change - no change, bs_idx=%u", bs->bs_idx);
+ }
+ return 0;
+}
+
+vnet_api_error_t
+bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len,
+ const u8 * key_data)
+{
+#if WITH_LIBSSL > 0
+ bfd_auth_key_t *auth_key = NULL;
+ if (!key_len || key_len > bfd_max_key_len_for_auth_type (auth_type))
+ {
+ clib_warning ("Invalid authentication key length for auth_type=%d:%s "
+ "(key_len=%u, must be "
+ "non-zero, expected max=%u)",
+ auth_type, bfd_auth_type_str (auth_type), key_len,
+ (u32) bfd_max_key_len_for_auth_type (auth_type));
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ if (!bfd_auth_type_supported (auth_type))
+ {
+ clib_warning ("Unsupported auth type=%d:%s", auth_type,
+ bfd_auth_type_str (auth_type));
+ return VNET_API_ERROR_BFD_NOTSUPP;
+ }
+ bfd_main_t *bm = &bfd_main;
+ uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id);
+ if (key_idx_p)
+ {
+ /* modifying existing key - must not be used */
+ const uword key_idx = *key_idx_p;
+ auth_key = pool_elt_at_index (bm->auth_keys, key_idx);
+ if (auth_key->use_count > 0)
+ {
+ clib_warning ("Authentication key with conf ID %u in use by %u BFD "
+ "session(s) - cannot modify",
+ conf_key_id, auth_key->use_count);
+ return VNET_API_ERROR_BFD_EINUSE;
+ }
+ }
+ else
+ {
+ /* adding new key */
+ pool_get (bm->auth_keys, auth_key);
+ auth_key->conf_key_id = conf_key_id;
+ hash_set (bm->auth_key_by_conf_key_id, conf_key_id,
+ auth_key - bm->auth_keys);
+ }
+ auth_key->auth_type = auth_type;
+ memset (auth_key->key, 0, sizeof (auth_key->key));
+ clib_memcpy (auth_key->key, key_data, key_len);
+ return 0;
+#else
+ clib_warning ("SSL missing, cannot manipulate authentication keys");
+ return VNET_API_ERROR_BFD_NOTSUPP;
+#endif
+}
+
+vnet_api_error_t
+bfd_auth_del_key (u32 conf_key_id)
+{
+#if WITH_LIBSSL > 0
+ bfd_auth_key_t *auth_key = NULL;
+ bfd_main_t *bm = &bfd_main;
+ uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id);
+ if (key_idx_p)
+ {
+ /* deleting existing key - must not be used */
+ const uword key_idx = *key_idx_p;
+ auth_key = pool_elt_at_index (bm->auth_keys, key_idx);
+ if (auth_key->use_count > 0)
+ {
+ clib_warning ("Authentication key with conf ID %u in use by %u BFD "
+ "session(s) - cannot delete",
+ conf_key_id, auth_key->use_count);
+ return VNET_API_ERROR_BFD_EINUSE;
+ }
+ hash_unset (bm->auth_key_by_conf_key_id, conf_key_id);
+ memset (auth_key, 0, sizeof (*auth_key));
+ pool_put (bm->auth_keys, auth_key);
+ }
+ else
+ {
+ /* no such key */
+ clib_warning ("Authentication key with conf ID %u does not exist",
+ conf_key_id);
+ return VNET_API_ERROR_BFD_ENOENT;
+ }
+ return 0;
+#else
+ clib_warning ("SSL missing, cannot manipulate authentication keys");
+ return VNET_API_ERROR_BFD_NOTSUPP;
+#endif
+}
+
+bfd_main_t bfd_main;
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h
new file mode 100644
index 00000000..93adac3d
--- /dev/null
+++ b/src/vnet/bfd/bfd_main.h
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD global declarations
+ */
+#ifndef __included_bfd_main_h__
+#define __included_bfd_main_h__
+
+#include <vppinfra/timing_wheel.h>
+#include <vnet/vnet.h>
+#include <vnet/bfd/bfd_protocol.h>
+#include <vnet/bfd/bfd_udp.h>
+
+#define foreach_bfd_mode(F) \
+ F (asynchronous) \
+ F (demand)
+
+typedef enum
+{
+#define F(x) BFD_MODE_##x,
+ foreach_bfd_mode (F)
+#undef F
+} bfd_mode_e;
+
+typedef struct
+{
+ /** global configuration key ID */
+ u32 conf_key_id;
+
+ /** keeps track of how many sessions reference this key */
+ u32 use_count;
+
+ /**
+ * key data directly usable for bfd purposes - already padded with zeroes
+ * (so we don't need the actual length)
+ */
+ u8 key[20];
+
+ /** authentication type for this key */
+ bfd_auth_type_e auth_type;
+} bfd_auth_key_t;
+
+#define foreach_bfd_poll_state(F) \
+ F (NOT_NEEDED) \
+ F (NEEDED) \
+ F (IN_PROGRESS) \
+ F (IN_PROGRESS_AND_QUEUED)
+
+typedef enum
+{
+#define F(x) BFD_POLL_##x,
+ foreach_bfd_poll_state (F)
+#undef F
+} bfd_poll_state_e;
+
+/**
+ * hop types
+ */
+#define foreach_bfd_hop(F) \
+ F (SINGLE, "single") \
+ F (MULTI, "multi") \
+
+typedef enum
+{
+#define F(sym, str) BFD_HOP_TYPE_##sym,
+ foreach_bfd_hop (F)
+#undef F
+} bfd_hop_type_e;
+
+typedef struct bfd_session_s
+{
+ /** index in bfd_main.sessions pool */
+ u32 bs_idx;
+
+ /** session state */
+ bfd_state_e local_state;
+
+ /** remote session state */
+ bfd_state_e remote_state;
+
+ /** BFD hop type */
+ bfd_hop_type_e hop_type;
+
+ /** local diagnostics */
+ bfd_diag_code_e local_diag;
+
+ /** remote diagnostics */
+ bfd_diag_code_e remote_diag;
+
+ /** local discriminator */
+ u32 local_discr;
+
+ /** remote discriminator */
+ u32 remote_discr;
+
+ /** configured desired min tx interval (microseconds) */
+ u32 config_desired_min_tx_usec;
+
+ /** configured desired min tx interval (clocks) */
+ u64 config_desired_min_tx_clocks;
+
+ /** effective desired min tx interval (clocks) */
+ u64 effective_desired_min_tx_clocks;
+
+ /** configured required min rx interval (microseconds) */
+ u32 config_required_min_rx_usec;
+
+ /** configured required min rx interval (clocks) */
+ u64 config_required_min_rx_clocks;
+
+ /** effective required min rx interval (clocks) */
+ u64 effective_required_min_rx_clocks;
+
+ /** remote min rx interval (microseconds) */
+ u64 remote_min_rx_usec;
+
+ /** remote min rx interval (clocks) */
+ u64 remote_min_rx_clocks;
+
+ /** remote min echo rx interval (microseconds) */
+ u64 remote_min_echo_rx_usec;
+
+ /** remote min echo rx interval (clocks) */
+ u64 remote_min_echo_rx_clocks;
+
+ /** remote desired min tx interval (clocks) */
+ u64 remote_desired_min_tx_clocks;
+
+ /** configured detect multiplier */
+ u8 local_detect_mult;
+
+ /** 1 if remote system sets demand mode, 0 otherwise */
+ u8 remote_demand;
+
+ /** remote detect multiplier */
+ u8 remote_detect_mult;
+
+ /** 1 is echo function is active, 0 otherwise */
+ u8 echo;
+
+ /** set to value of timer in timing wheel, 0 if never set */
+ u64 wheel_time_clocks;
+
+ /** transmit interval */
+ u64 transmit_interval_clocks;
+
+ /** next time at which to transmit a packet */
+ u64 tx_timeout_clocks;
+
+ /** timestamp of last packet transmitted */
+ u64 last_tx_clocks;
+
+ /** timestamp of last packet received */
+ u64 last_rx_clocks;
+
+ /** transmit interval for echo packets */
+ u64 echo_transmit_interval_clocks;
+
+ /** next time at which to transmit echo packet */
+ u64 echo_tx_timeout_clocks;
+
+ /** timestamp of last echo packet transmitted */
+ u64 echo_last_tx_clocks;
+
+ /** timestamp of last echo packet received */
+ u64 echo_last_rx_clocks;
+
+ /** secret used for calculating/checking checksum of echo packets */
+ u32 echo_secret;
+
+ /** detection time */
+ u64 detection_time_clocks;
+
+ /** state info regarding poll sequence */
+ bfd_poll_state_e poll_state;
+
+ /**
+ * helper for delayed poll sequence - marks either start of running poll
+ * sequence or timeout, after which we can start the next poll sequnce
+ */
+ u64 poll_state_start_or_timeout_clocks;
+
+ /** authentication information */
+ struct
+ {
+ /** current key in use */
+ bfd_auth_key_t *curr_key;
+
+ /**
+ * set to next key to use if delayed switch is enabled - in that case
+ * the key is switched when first incoming packet is signed with next_key
+ */
+ bfd_auth_key_t *next_key;
+
+ /** sequence number incremented occasionally or always (if meticulous) */
+ u32 local_seq_number;
+
+ /** remote sequence number */
+ u32 remote_seq_number;
+
+ /** set to 1 if remote sequence number is known */
+ u8 remote_seq_number_known;
+
+ /** current key ID sent out in bfd packet */
+ u8 curr_bfd_key_id;
+
+ /** key ID to use when switched to next_key */
+ u8 next_bfd_key_id;
+
+ /**
+ * set to 1 if delayed action is pending, which might be activation
+ * of authentication, change of key or deactivation
+ */
+ u8 is_delayed;
+ } auth;
+
+ /** transport type for this session */
+ bfd_transport_e transport;
+
+ /** union of transport-specific data */
+ union
+ {
+ bfd_udp_session_t udp;
+ };
+} bfd_session_t;
+
+/**
+ * listener events
+ */
+#define foreach_bfd_listen_event(F) \
+ F (CREATE, "sesion-created") \
+ F (UPDATE, "session-updated") \
+ F (DELETE, "session-deleted")
+
+typedef enum
+{
+#define F(sym, str) BFD_LISTEN_EVENT_##sym,
+ foreach_bfd_listen_event (F)
+#undef F
+} bfd_listen_event_e;
+
+/**
+ * session nitification call back function type
+ */
+typedef void (*bfd_notify_fn_t) (bfd_listen_event_e, const bfd_session_t *);
+
+typedef struct
+{
+ /** pool of bfd sessions context data */
+ bfd_session_t *sessions;
+
+ /** timing wheel for scheduling timeouts */
+ timing_wheel_t wheel;
+
+ /** timing wheel inaccuracy, in clocks */
+ u64 wheel_inaccuracy;
+
+ /** hashmap - bfd session by discriminator */
+ u32 *session_by_disc;
+
+ /** background process node index */
+ u32 bfd_process_node_index;
+
+ /** convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /** cpu clocks per second */
+ f64 cpu_cps;
+
+ /** default desired min tx in clocks */
+ u64 default_desired_min_tx_clocks;
+
+ /** minimum required min rx while echo function is active - clocks */
+ u64 min_required_min_rx_while_echo_clocks;
+
+ /** for generating random numbers */
+ u32 random_seed;
+
+ /** pool of authentication keys */
+ bfd_auth_key_t *auth_keys;
+
+ /** hashmap - index in pool auth_keys by conf_key_id */
+ u32 *auth_key_by_conf_key_id;
+
+ /** A vector of callback notification functions */
+ bfd_notify_fn_t *listeners;
+} bfd_main_t;
+
+extern bfd_main_t bfd_main;
+
+/** Packet counters */
+#define foreach_bfd_error(F) \
+ F (NONE, "good bfd packets (processed)") \
+ F (BAD, "invalid bfd packets") \
+ F (DISABLED, "bfd packets received on disabled interfaces")
+
+typedef enum
+{
+#define F(sym, str) BFD_ERROR_##sym,
+ foreach_bfd_error (F)
+#undef F
+ BFD_N_ERROR,
+} bfd_error_t;
+
+/** bfd packet trace capture */
+typedef struct
+{
+ u32 len;
+ u8 data[400];
+} bfd_input_trace_t;
+
+enum
+{
+ BFD_EVENT_RESCHEDULE = 1,
+ BFD_EVENT_NEW_SESSION,
+ BFD_EVENT_CONFIG_CHANGED,
+} bfd_process_event_e;
+
+/* *INDENT-OFF* */
+/** echo packet structure */
+typedef CLIB_PACKED (struct {
+ /** local discriminator */
+ u32 discriminator;
+ /** expire time of this packet - clocks */
+ u64 expire_time_clocks;
+ /** checksum - based on discriminator, local secret and expire time */
+ u64 checksum;
+}) bfd_echo_pkt_t;
+/* *INDENT-ON* */
+
+u8 *bfd_input_format_trace (u8 * s, va_list * args);
+bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_e t);
+void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs);
+bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx);
+bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc);
+void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs);
+void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * bfd, u32 bs_idx);
+int bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b);
+int bfd_verify_pkt_common (const bfd_pkt_t * pkt);
+int bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size,
+ bfd_session_t * bs);
+void bfd_event (bfd_main_t * bm, bfd_session_t * bs);
+void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
+ bfd_main_t * bm, bfd_session_t * bs,
+ int is_local);
+u8 *format_bfd_session (u8 * s, va_list * args);
+u8 *format_bfd_auth_key (u8 * s, va_list * args);
+void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down);
+unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type);
+vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id,
+ u8 bfd_key_id, u8 is_delayed);
+vnet_api_error_t bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed);
+vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec,
+ u8 detect_mult);
+
+u32 bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks);
+const char *bfd_poll_state_string (bfd_poll_state_e state);
+
+#define USEC_PER_MS 1000LL
+#define USEC_PER_SECOND (1000 * USEC_PER_MS)
+
+/** default, slow transmission interval for BFD packets, per spec at least 1s */
+#define BFD_DEFAULT_DESIRED_MIN_TX_USEC USEC_PER_SECOND
+
+/**
+ * minimum required min rx set locally when echo function is used, per spec
+ * should be set to at least 1s
+ */
+#define BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO USEC_PER_SECOND
+
+/**
+ * Register a callback function to receive session notifications.
+ */
+void bfd_register_listener (bfd_notify_fn_t fn);
+
+#endif /* __included_bfd_main_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c
new file mode 100644
index 00000000..cd51e91a
--- /dev/null
+++ b/src/vnet/bfd/bfd_protocol.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD protocol implementation
+ */
+#include <vnet/bfd/bfd_protocol.h>
+
+u8
+bfd_pkt_get_version (const bfd_pkt_t * pkt)
+{
+ return pkt->head.vers_diag >> 5;
+}
+
+void
+bfd_pkt_set_version (bfd_pkt_t * pkt, int version)
+{
+ pkt->head.vers_diag =
+ (version << 5) | (pkt->head.vers_diag & ((1 << 5) - 1));
+}
+
+u8
+bfd_pkt_get_diag_code (const bfd_pkt_t * pkt)
+{
+ return pkt->head.vers_diag & ((1 << 5) - 1);
+}
+
+void
+bfd_pkt_set_diag_code (bfd_pkt_t * pkt, int value)
+{
+ pkt->head.vers_diag =
+ (pkt->head.vers_diag & ~((1 << 5) - 1)) | (value & ((1 << 5) - 1));
+}
+
+u8
+bfd_pkt_get_state (const bfd_pkt_t * pkt)
+{
+ return pkt->head.sta_flags >> 6;
+}
+
+void
+bfd_pkt_set_state (bfd_pkt_t * pkt, int value)
+{
+ pkt->head.sta_flags = (value << 6) | (pkt->head.sta_flags & ((1 << 6) - 1));
+}
+
+u8
+bfd_pkt_get_poll (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 5) & 1;
+}
+
+void
+bfd_pkt_set_poll (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 5;
+}
+
+u8
+bfd_pkt_get_final (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 4) & 1;
+}
+
+void
+bfd_pkt_set_final (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 4;
+}
+
+u8
+bfd_pkt_get_control_plane_independent (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 3) & 1;
+}
+
+#if 0
+void
+bfd_pkt_set_control_plane_independent (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 3;
+}
+#endif
+
+u8
+bfd_pkt_get_auth_present (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 2) & 1;
+}
+
+void
+bfd_pkt_set_auth_present (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 2;
+}
+
+u8
+bfd_pkt_get_demand (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 1) & 1;
+}
+
+#if 0
+void
+bfd_pkt_set_demand (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 1;
+}
+#endif
+
+u8
+bfd_pkt_get_multipoint (const bfd_pkt_t * pkt)
+{
+ return (pkt->head.sta_flags >> 0) & 1;
+}
+
+#if 0
+void
+bfd_pkt_set_multipoint (bfd_pkt_t * pkt)
+{
+ pkt->head.sta_flags |= 1 << 0;
+}
+#endif
+
+u32
+bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type)
+{
+#define F(t, l, n, s) \
+ if (auth_type == t) \
+ { \
+ return l; \
+ }
+ foreach_bfd_auth_type (F);
+#undef F
+ return 0;
+}
+
+const char *
+bfd_auth_type_str (bfd_auth_type_e auth_type)
+{
+#define F(t, l, n, s) \
+ if (auth_type == t) \
+ { \
+ return s; \
+ }
+ foreach_bfd_auth_type (F);
+#undef F
+ return "UNKNOWN";
+}
+
+const char *
+bfd_diag_code_string (bfd_diag_code_e diag)
+{
+#define F(n, t, s) \
+ case BFD_DIAG_CODE_NAME (t): \
+ return s;
+ switch (diag)
+ {
+ foreach_bfd_diag_code (F)}
+ return "UNKNOWN";
+#undef F
+}
+
+const char *
+bfd_state_string (bfd_state_e state)
+{
+#define F(n, t, s) \
+ case BFD_STATE_NAME (t): \
+ return s;
+ switch (state)
+ {
+ foreach_bfd_state (F)}
+ return "UNKNOWN";
+#undef F
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h
new file mode 100644
index 00000000..210c561b
--- /dev/null
+++ b/src/vnet/bfd/bfd_protocol.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_bfd_protocol_h__
+#define __included_bfd_protocol_h__
+/**
+ * @file
+ * @brief BFD protocol declarations
+ */
+
+#include <vppinfra/types.h>
+#include <vppinfra/clib.h>
+
+/* auth type value, max key length, name, description */
+#define foreach_bfd_auth_type(F) \
+ F (0, 0, reserved, "Reserved") \
+ F (1, 16, simple_password, "Simple Password") \
+ F (2, 16, keyed_md5, "Keyed MD5") \
+ F (3, 16, meticulous_keyed_md5, "Meticulous Keyed MD5") \
+ F (4, 20, keyed_sha1, "Keyed SHA1") \
+ F (5, 20, meticulous_keyed_sha1, "Meticulous Keyed SHA1")
+
+#define BFD_AUTH_TYPE_NAME(t) BFD_AUTH_TYPE_##t
+
+typedef enum
+{
+#define F(n, l, t, s) BFD_AUTH_TYPE_NAME (t) = n,
+ foreach_bfd_auth_type (F)
+#undef F
+} bfd_auth_type_e;
+
+/**
+ * @brief get the maximum length of key data for given auth type
+ */
+u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type);
+const char *bfd_auth_type_str (bfd_auth_type_e auth_type);
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+}) bfd_auth_common_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /*
+ * 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format
+
+ * If the Authentication Present (A) bit is set in the header, and the
+ * Authentication Type field contains 4 (Keyed SHA1) or 5 (Meticulous
+ * Keyed SHA1), the Authentication Section has the following format:
+
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Auth Type | Auth Len | Auth Key ID | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Sequence Number |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Auth Key/Hash... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ bfd_auth_common_t type_len;
+ u8 key_id;
+ u8 reserved;
+ u32 seq_num;
+ /*
+ * Auth Key/Hash
+
+ * This field carries the 20-byte SHA1 hash for the packet. When the
+ * hash is calculated, the shared SHA1 key is stored in this field,
+ * padded to a length of 20 bytes with trailing zero bytes if needed.
+ * The shared key MUST be encoded and configured to section 6.7.4.
+ */
+ u8 hash[20];
+}) bfd_auth_sha1_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /*
+ * The Mandatory Section of a BFD Control packet has the following
+ * format:
+
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | My Discriminator |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Your Discriminator |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Desired Min TX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Required Min RX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Required Min Echo RX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ struct
+ {
+ u8 vers_diag;
+ u8 sta_flags;
+ u8 detect_mult;
+ u8 length;
+ } head;
+ u32 my_disc;
+ u32 your_disc;
+ u32 des_min_tx;
+ u32 req_min_rx;
+ u32 req_min_echo_rx;
+}) bfd_pkt_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ bfd_pkt_t pkt;
+ bfd_auth_common_t common_auth;
+}) bfd_pkt_with_common_auth_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ bfd_pkt_t pkt;
+ bfd_auth_sha1_t sha1_auth;
+}) bfd_pkt_with_sha1_auth_t;
+/* *INDENT-ON* */
+
+u8 bfd_pkt_get_version (const bfd_pkt_t * pkt);
+void bfd_pkt_set_version (bfd_pkt_t * pkt, int version);
+u8 bfd_pkt_get_diag_code (const bfd_pkt_t * pkt);
+void bfd_pkt_set_diag_code (bfd_pkt_t * pkt, int value);
+u8 bfd_pkt_get_state (const bfd_pkt_t * pkt);
+void bfd_pkt_set_state (bfd_pkt_t * pkt, int value);
+u8 bfd_pkt_get_poll (const bfd_pkt_t * pkt);
+void bfd_pkt_set_final (bfd_pkt_t * pkt);
+u8 bfd_pkt_get_final (const bfd_pkt_t * pkt);
+void bfd_pkt_set_poll (bfd_pkt_t * pkt);
+u8 bfd_pkt_get_control_plane_independent (const bfd_pkt_t * pkt);
+void bfd_pkt_set_control_plane_independent (bfd_pkt_t * pkt);
+u8 bfd_pkt_get_auth_present (const bfd_pkt_t * pkt);
+void bfd_pkt_set_auth_present (bfd_pkt_t * pkt);
+u8 bfd_pkt_get_demand (const bfd_pkt_t * pkt);
+void bfd_pkt_set_demand (bfd_pkt_t * pkt);
+u8 bfd_pkt_get_multipoint (const bfd_pkt_t * pkt);
+void bfd_pkt_set_multipoint (bfd_pkt_t * pkt);
+
+/* BFD diagnostic codes */
+#define foreach_bfd_diag_code(F) \
+ F (0, no_diag, "No Diagnostic") \
+ F (1, det_time_exp, "Control Detection Time Expired") \
+ F (2, echo_failed, "Echo Function Failed") \
+ F (3, neighbor_sig_down, "Neighbor Signaled Session Down") \
+ F (4, fwd_plain_reset, "Forwarding Plane Reset") \
+ F (5, path_down, "Path Down") \
+ F (6, concat_path_down, "Concatenated Path Down") \
+ F (7, admin_down, "Administratively Down") \
+ F (8, reverse_concat_path_down, "Reverse Concatenated Path Down")
+
+#define BFD_DIAG_CODE_NAME(t) BFD_DIAG_CODE_##t
+
+typedef enum
+{
+#define F(n, t, s) BFD_DIAG_CODE_NAME (t) = n,
+ foreach_bfd_diag_code (F)
+#undef F
+} bfd_diag_code_e;
+
+const char *bfd_diag_code_string (bfd_diag_code_e diag);
+
+/* BFD state values */
+#define foreach_bfd_state(F) \
+ F (0, admin_down, "AdminDown") \
+ F (1, down, "Down") \
+ F (2, init, "Init") \
+ F (3, up, "Up")
+
+#define BFD_STATE_NAME(t) BFD_STATE_##t
+
+typedef enum
+{
+#define F(n, t, s) BFD_STATE_NAME (t) = n,
+ foreach_bfd_state (F)
+#undef F
+} bfd_state_e;
+
+const char *bfd_state_string (bfd_state_e state);
+
+#endif /* __included_bfd_protocol_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c
new file mode 100644
index 00000000..533d98d6
--- /dev/null
+++ b/src/vnet/bfd/bfd_udp.c
@@ -0,0 +1,1516 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD UDP transport layer implementation
+ */
+#include <vppinfra/types.h>
+#include <vlibmemory/api.h>
+#include <vlib/vlib.h>
+#include <vlib/buffer.h>
+#include <vnet/ip/format.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/icmp46_packet.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/bfd/bfd_debug.h>
+#include <vnet/bfd/bfd_udp.h>
+#include <vnet/bfd/bfd_main.h>
+#include <vnet/bfd/bfd_api.h>
+
+typedef struct
+{
+ bfd_main_t *bfd_main;
+ /* hashmap - bfd session index by bfd key - used for CLI/API lookup, where
+ * discriminator is unknown */
+ mhash_t bfd_session_idx_by_bfd_key;
+ /* convenience variable */
+ vnet_main_t *vnet_main;
+ /* flag indicating whether echo_source_sw_if_index holds a valid value */
+ int echo_source_is_set;
+ /* loopback interface used to get echo source ip */
+ u32 echo_source_sw_if_index;
+ /* node index of "ip4-arp" node */
+ u32 ip4_arp_idx;
+ /* node index of "ip6-discover-neighbor" node */
+ u32 ip6_ndp_idx;
+ /* node index of "ip4-rewrite" node */
+ u32 ip4_rewrite_idx;
+ /* node index of "ip6-rewrite" node */
+ u32 ip6_rewrite_idx;
+} bfd_udp_main_t;
+
+static vlib_node_registration_t bfd_udp4_input_node;
+static vlib_node_registration_t bfd_udp6_input_node;
+static vlib_node_registration_t bfd_udp_echo4_input_node;
+static vlib_node_registration_t bfd_udp_echo6_input_node;
+
+bfd_udp_main_t bfd_udp_main;
+
+vnet_api_error_t
+bfd_udp_set_echo_source (u32 sw_if_index)
+{
+ vnet_sw_interface_t *sw_if =
+ vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, sw_if_index);
+ if (sw_if)
+ {
+ bfd_udp_main.echo_source_sw_if_index = sw_if_index;
+ bfd_udp_main.echo_source_is_set = 1;
+ return 0;
+ }
+ return VNET_API_ERROR_BFD_ENOENT;
+}
+
+vnet_api_error_t
+bfd_udp_del_echo_source (u32 sw_if_index)
+{
+ bfd_udp_main.echo_source_sw_if_index = ~0;
+ bfd_udp_main.echo_source_is_set = 0;
+ return 0;
+}
+
+int
+bfd_udp_is_echo_available (bfd_transport_e transport)
+{
+ if (!bfd_udp_main.echo_source_is_set)
+ {
+ BFD_DBG ("UDP echo source not set - echo not available");
+ return 0;
+ }
+ /*
+ * for the echo to work, we need a loopback interface with at least one
+ * address with netmask length at most 31 (ip4) or 127 (ip6) so that we can
+ * pick an unused address from that subnet
+ */
+ vnet_sw_interface_t *sw_if =
+ vnet_get_sw_interface_safe (bfd_udp_main.vnet_main,
+ bfd_udp_main.echo_source_sw_if_index);
+ if (sw_if && sw_if->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ if (BFD_TRANSPORT_UDP4 == transport)
+ {
+ ip4_main_t *im = &ip4_main;
+ ip_interface_address_t *ia = NULL;
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia,
+ bfd_udp_main.echo_source_sw_if_index,
+ 0 /* honor unnumbered */, ({
+ if (ia->address_length <= 31)
+ {
+ return 1;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ else if (BFD_TRANSPORT_UDP6 == transport)
+ {
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia = NULL;
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia,
+ bfd_udp_main.echo_source_sw_if_index,
+ 0 /* honor unnumbered */, ({
+ if (ia->address_length <= 127)
+ {
+ return 1;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ BFD_DBG ("No usable IP address for UDP echo - echo not available");
+ return 0;
+}
+
+static u16
+bfd_udp_bs_idx_to_sport (u32 bs_idx)
+{
+ /* The source port MUST be in the range 49152 through 65535. The same UDP
+ * source port number MUST be used for all BFD Control packets associated
+ * with a particular session. The source port number SHOULD be unique among
+ * all BFD sessions on the system. If more than 16384 BFD sessions are
+ * simultaneously active, UDP source port numbers MAY be reused on
+ * multiple sessions, but the number of distinct uses of the same UDP
+ * source port number SHOULD be minimized.
+ */
+ return 49152 + bs_idx % (65535 - 49152 + 1);
+}
+
+int
+bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
+{
+ if (!bfd_udp_main.echo_source_is_set)
+ {
+ BFD_ERR ("cannot find ip4 address, echo source not set");
+ return 0;
+ }
+ ip_interface_address_t *ia = NULL;
+ ip4_main_t *im = &ip4_main;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (
+ &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
+ 0 /* honor unnumbered */, ({
+ ip4_address_t *x =
+ ip_interface_address_get_address (&im->lookup_main, ia);
+ if (ia->address_length <= 31)
+ {
+ addr->as_u32 = clib_host_to_net_u32 (x->as_u32);
+ /*
+ * flip the last bit to get a different address, might be network,
+ * we don't care ...
+ */
+ addr->as_u32 ^= 1;
+ addr->as_u32 = clib_net_to_host_u32 (addr->as_u32);
+ return 1;
+ }
+ }));
+ /* *INDENT-ON* */
+ BFD_ERR ("cannot find ip4 address, no usable address found");
+ return 0;
+}
+
+int
+bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
+{
+ if (!bfd_udp_main.echo_source_is_set)
+ {
+ BFD_ERR ("cannot find ip6 address, echo source not set");
+ return 0;
+ }
+ ip_interface_address_t *ia = NULL;
+ ip6_main_t *im = &ip6_main;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (
+ &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
+ 0 /* honor unnumbered */, ({
+ ip6_address_t *x =
+ ip_interface_address_get_address (&im->lookup_main, ia);
+ if (ia->address_length <= 127)
+ {
+ *addr = *x;
+ addr->as_u8[15] ^= 1; /* flip the last bit of the address */
+ return 1;
+ }
+ }));
+ /* *INDENT-ON* */
+ BFD_ERR ("cannot find ip6 address, no usable address found");
+ return 0;
+}
+
+void
+bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index,
+ int *have_usable_ip4, ip4_address_t * ip4,
+ int *have_usable_ip6, ip6_address_t * ip6)
+{
+ if (bfd_udp_main.echo_source_is_set)
+ {
+ *is_set = 1;
+ *sw_if_index = bfd_udp_main.echo_source_sw_if_index;
+ *have_usable_ip4 = bfd_udp_get_echo_src_ip4 (ip4);
+ *have_usable_ip6 = bfd_udp_get_echo_src_ip6 (ip6);
+ }
+ else
+ {
+ *is_set = 0;
+ }
+}
+
+int
+bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs,
+ int is_echo)
+{
+ const bfd_udp_session_t *bus = &bs->udp;
+ const bfd_udp_key_t *key = &bus->key;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index;
+ vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index;
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ typedef struct
+ {
+ ip4_header_t ip4;
+ udp_header_t udp;
+ } ip4_udp_headers;
+ ip4_udp_headers *headers = NULL;
+ vlib_buffer_advance (b, -sizeof (*headers));
+ headers = vlib_buffer_get_current (b);
+ memset (headers, 0, sizeof (*headers));
+ headers->ip4.ip_version_and_header_length = 0x45;
+ headers->ip4.ttl = 255;
+ headers->ip4.protocol = IP_PROTOCOL_UDP;
+ headers->udp.src_port =
+ clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx));
+ if (is_echo)
+ {
+ int rv;
+ if (!(rv = bfd_udp_get_echo_src_ip4 (&headers->ip4.src_address)))
+ {
+ return rv;
+ }
+ headers->ip4.dst_address.as_u32 = key->local_addr.ip4.as_u32;
+ headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo4);
+ }
+ else
+ {
+ headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32;
+ headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32;
+ headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4);
+ }
+
+ /* fix ip length, checksum and udp length */
+ const u16 ip_length = vlib_buffer_length_in_chain (vm, b);
+
+ headers->ip4.length = clib_host_to_net_u16 (ip_length);
+ headers->ip4.checksum = ip4_header_checksum (&headers->ip4);
+
+ const u16 udp_length = ip_length - (sizeof (headers->ip4));
+ headers->udp.length = clib_host_to_net_u16 (udp_length);
+ return 1;
+}
+
+int
+bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs,
+ int is_echo)
+{
+ const bfd_udp_session_t *bus = &bs->udp;
+ const bfd_udp_key_t *key = &bus->key;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index;
+ vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index;
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+ typedef struct
+ {
+ ip6_header_t ip6;
+ udp_header_t udp;
+ } ip6_udp_headers;
+ ip6_udp_headers *headers = NULL;
+ vlib_buffer_advance (b, -sizeof (*headers));
+ headers = vlib_buffer_get_current (b);
+ memset (headers, 0, sizeof (*headers));
+ headers->ip6.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ headers->ip6.hop_limit = 255;
+ headers->ip6.protocol = IP_PROTOCOL_UDP;
+ headers->udp.src_port =
+ clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx));
+ if (is_echo)
+ {
+ int rv;
+ if (!(rv = bfd_udp_get_echo_src_ip6 (&headers->ip6.src_address)))
+ {
+ return rv;
+ }
+ clib_memcpy (&headers->ip6.dst_address, &key->local_addr.ip6,
+ sizeof (headers->ip6.dst_address));
+
+ headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo6);
+ }
+ else
+ {
+ clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6,
+ sizeof (headers->ip6.src_address));
+ clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6,
+ sizeof (headers->ip6.dst_address));
+ headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6);
+ }
+
+ /* fix ip payload length and udp length */
+ const u16 udp_length =
+ vlib_buffer_length_in_chain (vm, b) - (sizeof (headers->ip6));
+ headers->udp.length = clib_host_to_net_u16 (udp_length);
+ headers->ip6.payload_length = headers->udp.length;
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ headers->udp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b, &headers->ip6, &bogus);
+ ASSERT (bogus == 0);
+ if (headers->udp.checksum == 0)
+ {
+ headers->udp.checksum = 0xffff;
+ }
+ return 1;
+}
+
+static void
+bfd_create_frame_to_next_node (vlib_main_t * vm, u32 bi, u32 next_node)
+{
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, next_node, f);
+}
+
+int
+bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
+{
+ const bfd_udp_session_t *bus = &bs->udp;
+ ip_adjacency_t *adj = adj_get (bus->adj_index);
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ *next_node = bfd_udp_main.ip4_arp_idx;
+ return 1;
+ case BFD_TRANSPORT_UDP6:
+ *next_node = bfd_udp_main.ip6_ndp_idx;
+ return 1;
+ }
+ break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ switch (bs->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ *next_node = bfd_udp_main.ip4_rewrite_idx;
+ return 1;
+ case BFD_TRANSPORT_UDP6:
+ *next_node = bfd_udp_main.ip6_rewrite_idx;
+ return 1;
+ }
+ break;
+ default:
+ /* drop */
+ break;
+ }
+ return 0;
+}
+
+int
+bfd_transport_udp4 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+{
+ u32 next_node;
+ int rv = bfd_udp_calc_next_node (bs, &next_node);
+ if (rv)
+ {
+ bfd_create_frame_to_next_node (vm, bi, next_node);
+ }
+ return rv;
+}
+
+int
+bfd_transport_udp6 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+{
+ u32 next_node;
+ int rv = bfd_udp_calc_next_node (bs, &next_node);
+ if (rv)
+ {
+ bfd_create_frame_to_next_node (vm, bi, next_node);
+ }
+ return 1;
+}
+
+static bfd_session_t *
+bfd_lookup_session (bfd_udp_main_t * bum, const bfd_udp_key_t * key)
+{
+ uword *p = mhash_get (&bum->bfd_session_idx_by_bfd_key, key);
+ if (p)
+ {
+ return bfd_find_session_by_idx (bum->bfd_main, *p);
+ }
+ return 0;
+}
+
+static void
+bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr)
+{
+ memset (key, 0, sizeof (*key));
+ key->sw_if_index = sw_if_index;
+ key->local_addr.as_u64[0] = local_addr->as_u64[0];
+ key->local_addr.as_u64[1] = local_addr->as_u64[1];
+ key->peer_addr.as_u64[0] = peer_addr->as_u64[0];
+ key->peer_addr.as_u64[1] = peer_addr->as_u64[1];
+}
+
+static vnet_api_error_t
+bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ bfd_session_t ** bs_out)
+{
+ /* get a pool entry and if we end up not needing it, give it back */
+ bfd_transport_e t = BFD_TRANSPORT_UDP4;
+ if (!ip46_address_is_ip4 (local_addr))
+ {
+ t = BFD_TRANSPORT_UDP6;
+ }
+ bfd_session_t *bs = bfd_get_session (bum->bfd_main, t);
+ if (!bs)
+ {
+ bfd_put_session (bum->bfd_main, bs);
+ return VNET_API_ERROR_BFD_EAGAIN;
+ }
+ bfd_udp_session_t *bus = &bs->udp;
+ memset (bus, 0, sizeof (*bus));
+ bfd_udp_key_t *key = &bus->key;
+ bfd_udp_key_init (key, sw_if_index, local_addr, peer_addr);
+ const bfd_session_t *tmp = bfd_lookup_session (bum, key);
+ if (tmp)
+ {
+ clib_warning ("duplicate bfd-udp session, existing bs_idx=%d",
+ tmp->bs_idx);
+ bfd_put_session (bum->bfd_main, bs);
+ return VNET_API_ERROR_BFD_EEXIST;
+ }
+ mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL);
+ BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U",
+ bs->bs_idx, key->sw_if_index, format_ip46_address,
+ &key->local_addr, IP46_TYPE_ANY, format_ip46_address,
+ &key->peer_addr, IP46_TYPE_ANY);
+ if (BFD_TRANSPORT_UDP4 == t)
+ {
+ bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
+ &key->peer_addr,
+ key->sw_if_index);
+ BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) "
+ "returns %d", format_ip46_address, &key->peer_addr,
+ IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ }
+ else
+ {
+ bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+ &key->peer_addr,
+ key->sw_if_index);
+ BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) "
+ "returns %d", format_ip46_address, &key->peer_addr,
+ IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ }
+ *bs_out = bs;
+ return bfd_session_set_params (bum->bfd_main, bs, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult);
+}
+
+static vnet_api_error_t
+bfd_udp_validate_api_input (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr)
+{
+ vnet_sw_interface_t *sw_if =
+ vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, sw_if_index);
+ u8 local_ip_valid = 0;
+ ip_interface_address_t *ia = NULL;
+ if (!sw_if)
+ {
+ clib_warning ("got NULL sw_if");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ if (ip46_address_is_ip4 (local_addr))
+ {
+ if (!ip46_address_is_ip4 (peer_addr))
+ {
+ clib_warning ("IP family mismatch");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ ip4_main_t *im = &ip4_main;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (
+ &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
+ ip4_address_t *x =
+ ip_interface_address_get_address (&im->lookup_main, ia);
+ if (x->as_u32 == local_addr->ip4.as_u32)
+ {
+ /* valid address for this interface */
+ local_ip_valid = 1;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if (ip46_address_is_ip4 (peer_addr))
+ {
+ clib_warning ("IP family mismatch");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ ip6_main_t *im = &ip6_main;
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (
+ &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
+ ip6_address_t *x =
+ ip_interface_address_get_address (&im->lookup_main, ia);
+ if (local_addr->ip6.as_u64[0] == x->as_u64[0] &&
+ local_addr->ip6.as_u64[1] == x->as_u64[1])
+ {
+ /* valid address for this interface */
+ local_ip_valid = 1;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (!local_ip_valid)
+ {
+ clib_warning ("address not found on interface");
+ return VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
+ }
+
+ return 0;
+}
+
+static vnet_api_error_t
+bfd_udp_find_session_by_api_input (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ bfd_session_t ** bs_out)
+{
+ vnet_api_error_t rv =
+ bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr);
+ if (!rv)
+ {
+ bfd_udp_main_t *bum = &bfd_udp_main;
+ bfd_udp_key_t key;
+ bfd_udp_key_init (&key, sw_if_index, local_addr, peer_addr);
+ bfd_session_t *bs = bfd_lookup_session (bum, &key);
+ if (bs)
+ {
+ *bs_out = bs;
+ }
+ else
+ {
+ clib_warning
+ ("BFD session not found (sw_if_index=%u, local=%U, peer=%U",
+ sw_if_index, format_ip46_address, local_addr, IP46_TYPE_ANY,
+ format_ip46_address, peer_addr, IP46_TYPE_ANY);
+ return VNET_API_ERROR_BFD_ENOENT;
+ }
+ }
+ return rv;
+}
+
+static vnet_api_error_t
+bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr)
+{
+ vnet_api_error_t rv =
+ bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr);
+ if (rv)
+ {
+ return rv;
+ }
+ if (detect_mult < 1)
+ {
+ clib_warning ("detect_mult < 1");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ if (desired_min_tx_usec < 1)
+ {
+ clib_warning ("desired_min_tx_usec < 1");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ return 0;
+}
+
+static void
+bfd_udp_del_session_internal (bfd_session_t * bs)
+{
+ bfd_udp_main_t *bum = &bfd_udp_main;
+ BFD_DBG ("free bfd-udp session, bs_idx=%d", bs->bs_idx);
+ mhash_unset (&bum->bfd_session_idx_by_bfd_key, &bs->udp.key, NULL);
+ adj_unlock (bs->udp.adj_index);
+ bfd_put_session (bum->bfd_main, bs);
+}
+
+vnet_api_error_t
+bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 desired_min_tx_usec, u32 required_min_rx_usec,
+ u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id)
+{
+ vnet_api_error_t rv =
+ bfd_api_verify_common (sw_if_index, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult,
+ local_addr, peer_addr);
+ bfd_session_t *bs = NULL;
+ if (!rv)
+ {
+ rv =
+ bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index,
+ desired_min_tx_usec,
+ required_min_rx_usec, detect_mult,
+ local_addr, peer_addr, &bs);
+ }
+ if (!rv && is_authenticated)
+ {
+#if WITH_LIBSSL > 0
+ rv = bfd_auth_activate (bs, conf_key_id, bfd_key_id,
+ 0 /* is not delayed */ );
+#else
+ clib_warning ("SSL missing, cannot add authenticated BFD session");
+ rv = VNET_API_ERROR_BFD_NOTSUPP;
+#endif
+ if (rv)
+ {
+ bfd_udp_del_session_internal (bs);
+ }
+ }
+ if (!rv)
+ {
+ bfd_session_start (bfd_udp_main.bfd_main, bs);
+ }
+
+ return rv;
+}
+
+vnet_api_error_t
+bfd_udp_mod_session (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult)
+{
+ bfd_session_t *bs = NULL;
+ vnet_api_error_t rv =
+ bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr,
+ &bs);
+ if (rv)
+ {
+ return rv;
+ }
+
+ return bfd_session_set_params (bfd_udp_main.bfd_main, bs,
+ desired_min_tx_usec, required_min_rx_usec,
+ detect_mult);
+}
+
+vnet_api_error_t
+bfd_udp_del_session (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr)
+{
+ bfd_session_t *bs = NULL;
+ vnet_api_error_t rv =
+ bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr,
+ &bs);
+ if (rv)
+ {
+ return rv;
+ }
+ bfd_udp_del_session_internal (bs);
+ return 0;
+}
+
+vnet_api_error_t
+bfd_udp_session_set_flags (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr, u8 admin_up_down)
+{
+ bfd_session_t *bs = NULL;
+ vnet_api_error_t rv =
+ bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr,
+ &bs);
+ if (rv)
+ {
+ return rv;
+ }
+ bfd_session_set_flags (bs, admin_up_down);
+ return 0;
+}
+
+vnet_api_error_t
+bfd_udp_auth_activate (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr,
+ u32 conf_key_id, u8 key_id, u8 is_delayed)
+{
+#if WITH_LIBSSL > 0
+ bfd_session_t *bs = NULL;
+ vnet_api_error_t rv =
+ bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr,
+ &bs);
+ if (rv)
+ {
+ return rv;
+ }
+ return bfd_auth_activate (bs, conf_key_id, key_id, is_delayed);
+#else
+ clib_warning ("SSL missing, cannot activate BFD authentication");
+ return VNET_API_ERROR_BFD_NOTSUPP;
+#endif
+}
+
+vnet_api_error_t
+bfd_udp_auth_deactivate (u32 sw_if_index,
+ const ip46_address_t * local_addr,
+ const ip46_address_t * peer_addr, u8 is_delayed)
+{
+ bfd_session_t *bs = NULL;
+ vnet_api_error_t rv =
+ bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr,
+ &bs);
+ if (rv)
+ {
+ return rv;
+ }
+ return bfd_auth_deactivate (bs, is_delayed);
+}
+
+typedef enum
+{
+ BFD_UDP_INPUT_NEXT_NORMAL,
+ BFD_UDP_INPUT_NEXT_REPLY_ARP,
+ BFD_UDP_INPUT_NEXT_REPLY_REWRITE,
+ BFD_UDP_INPUT_N_NEXT,
+} bfd_udp_input_next_t;
+
+/* Packet counters - BFD control frames */
+#define foreach_bfd_udp_error(F) \
+ F (NONE, "good bfd packets (processed)") \
+ F (BAD, "invalid bfd packets")
+
+#define F(sym, string) static char BFD_UDP_ERR_##sym##_STR[] = string;
+foreach_bfd_udp_error (F);
+#undef F
+
+static char *bfd_udp_error_strings[] = {
+#define F(sym, string) BFD_UDP_ERR_##sym##_STR,
+ foreach_bfd_udp_error (F)
+#undef F
+};
+
+typedef enum
+{
+#define F(sym, str) BFD_UDP_ERROR_##sym,
+ foreach_bfd_udp_error (F)
+#undef F
+ BFD_UDP_N_ERROR,
+} bfd_udp_error_t;
+
+/* Packet counters - BFD ECHO packets */
+#define foreach_bfd_udp_echo_error(F) \
+ F (NONE, "good bfd echo packets (processed)") \
+ F (BAD, "invalid bfd echo packets")
+
+#define F(sym, string) static char BFD_UDP_ECHO_ERR_##sym##_STR[] = string;
+foreach_bfd_udp_echo_error (F);
+#undef F
+
+static char *bfd_udp_echo_error_strings[] = {
+#define F(sym, string) BFD_UDP_ECHO_ERR_##sym##_STR,
+ foreach_bfd_udp_echo_error (F)
+#undef F
+};
+
+typedef enum
+{
+#define F(sym, str) BFD_UDP_ECHO_ERROR_##sym,
+ foreach_bfd_udp_echo_error (F)
+#undef F
+ BFD_UDP_ECHO_N_ERROR,
+} bfd_udp_echo_error_t;
+
+static void
+bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
+ udp_header_t ** udp)
+{
+ /* sanity check first */
+ const i32 start = vnet_buffer (b)->l3_hdr_offset;
+ if (start < 0 && start < sizeof (b->pre_data))
+ {
+ BFD_ERR ("Start of ip header is before pre_data, ignoring");
+ *ip4 = NULL;
+ *udp = NULL;
+ return;
+ }
+ *ip4 = (ip4_header_t *) (b->data + start);
+ if ((u8 *) * ip4 > (u8 *) vlib_buffer_get_current (b))
+ {
+ BFD_ERR ("Start of ip header is beyond current data, ignoring");
+ *ip4 = NULL;
+ *udp = NULL;
+ return;
+ }
+ *udp = (udp_header_t *) ((*ip4) + 1);
+}
+
+static bfd_udp_error_t
+bfd_udp4_verify_transport (const ip4_header_t * ip4,
+ const udp_header_t * udp, const bfd_session_t * bs)
+{
+ const bfd_udp_session_t *bus = &bs->udp;
+ const bfd_udp_key_t *key = &bus->key;
+ if (ip4->src_address.as_u32 != key->peer_addr.ip4.as_u32)
+ {
+ BFD_ERR ("IPv4 src addr mismatch, got %U, expected %U",
+ format_ip4_address, ip4->src_address.as_u8, format_ip4_address,
+ key->peer_addr.ip4.as_u8);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32)
+ {
+ BFD_ERR ("IPv4 dst addr mismatch, got %U, expected %U",
+ format_ip4_address, ip4->dst_address.as_u8, format_ip4_address,
+ key->local_addr.ip4.as_u8);
+ return BFD_UDP_ERROR_BAD;
+ }
+ const u8 expected_ttl = 255;
+ if (ip4->ttl != expected_ttl)
+ {
+ BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl,
+ expected_ttl);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (clib_net_to_host_u16 (udp->src_port) < 49152)
+ {
+ BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>",
+ udp->src_port);
+ }
+ return BFD_UDP_ERROR_NONE;
+}
+
+typedef struct
+{
+ u32 bs_idx;
+ bfd_pkt_t pkt;
+} bfd_rpc_update_t;
+
+static void
+bfd_rpc_update_session_cb (const bfd_rpc_update_t * a)
+{
+ bfd_consume_pkt (bfd_udp_main.bfd_main, &a->pkt, a->bs_idx);
+}
+
+static void
+bfd_rpc_update_session (u32 bs_idx, const bfd_pkt_t * pkt)
+{
+ /* packet length was already verified to be correct by the caller */
+ const u32 data_size = sizeof (bfd_rpc_update_t) -
+ STRUCT_SIZE_OF (bfd_rpc_update_t, pkt) + pkt->head.length;
+ u8 data[data_size];
+ bfd_rpc_update_t *update = (bfd_rpc_update_t *) data;
+ update->bs_idx = bs_idx;
+ clib_memcpy (&update->pkt, pkt, pkt->head.length);
+ vl_api_rpc_call_main_thread (bfd_rpc_update_session_cb, data, data_size);
+}
+
+static bfd_udp_error_t
+bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_buffer_t * b, bfd_session_t ** bs_out)
+{
+ const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
+ if (sizeof (*pkt) > b->current_length)
+ {
+ BFD_ERR
+ ("Payload size %d too small to hold bfd packet of minimum size %d",
+ b->current_length, sizeof (*pkt));
+ return BFD_UDP_ERROR_BAD;
+ }
+ ip4_header_t *ip4;
+ udp_header_t *udp;
+ bfd_udp4_find_headers (b, &ip4, &udp);
+ if (!ip4 || !udp)
+ {
+ BFD_ERR ("Couldn't find ip4 or udp header");
+ return BFD_UDP_ERROR_BAD;
+ }
+ const u32 udp_payload_length = udp->length - sizeof (*udp);
+ if (pkt->head.length > udp_payload_length)
+ {
+ BFD_ERR
+ ("BFD packet length is larger than udp payload length (%u > %u)",
+ pkt->head.length, udp_payload_length);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (!bfd_verify_pkt_common (pkt))
+ {
+ return BFD_UDP_ERROR_BAD;
+ }
+ bfd_session_t *bs = NULL;
+ if (pkt->your_disc)
+ {
+ BFD_DBG ("Looking up BFD session using discriminator %u",
+ pkt->your_disc);
+ bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc);
+ }
+ else
+ {
+ bfd_udp_key_t key;
+ memset (&key, 0, sizeof (key));
+ key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ key.local_addr.ip4.as_u32 = ip4->dst_address.as_u32;
+ key.peer_addr.ip4.as_u32 = ip4->src_address.as_u32;
+ BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, "
+ "peer=%U)",
+ key.sw_if_index, format_ip4_address, key.local_addr.ip4.as_u8,
+ format_ip4_address, key.peer_addr.ip4.as_u8);
+ bs = bfd_lookup_session (&bfd_udp_main, &key);
+ }
+ if (!bs)
+ {
+ BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
+ return BFD_UDP_ERROR_BAD;
+ }
+ BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
+ if (!bfd_verify_pkt_auth (pkt, b->current_length, bs))
+ {
+ BFD_ERR ("Packet verification failed, dropping packet");
+ return BFD_UDP_ERROR_BAD;
+ }
+ bfd_udp_error_t err;
+ if (BFD_UDP_ERROR_NONE != (err = bfd_udp4_verify_transport (ip4, udp, bs)))
+ {
+ return err;
+ }
+ bfd_rpc_update_session (bs->bs_idx, pkt);
+ *bs_out = bs;
+ return BFD_UDP_ERROR_NONE;
+}
+
+static void
+bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6,
+ udp_header_t ** udp)
+{
+ /* sanity check first */
+ const i32 start = vnet_buffer (b)->l3_hdr_offset;
+ if (start < 0 && start < sizeof (b->pre_data))
+ {
+ BFD_ERR ("Start of ip header is before pre_data, ignoring");
+ *ip6 = NULL;
+ *udp = NULL;
+ return;
+ }
+ *ip6 = (ip6_header_t *) (b->data + start);
+ if ((u8 *) * ip6 > (u8 *) vlib_buffer_get_current (b))
+ {
+ BFD_ERR ("Start of ip header is beyond current data, ignoring");
+ *ip6 = NULL;
+ *udp = NULL;
+ return;
+ }
+ if ((*ip6)->protocol != IP_PROTOCOL_UDP)
+ {
+ BFD_ERR ("Unexpected protocol in IPv6 header '%u', expected '%u' (== "
+ "IP_PROTOCOL_UDP)", (*ip6)->protocol, IP_PROTOCOL_UDP);
+ *ip6 = NULL;
+ *udp = NULL;
+ return;
+ }
+ *udp = (udp_header_t *) ((*ip6) + 1);
+}
+
+static bfd_udp_error_t
+bfd_udp6_verify_transport (const ip6_header_t * ip6,
+ const udp_header_t * udp, const bfd_session_t * bs)
+{
+ const bfd_udp_session_t *bus = &bs->udp;
+ const bfd_udp_key_t *key = &bus->key;
+ if (ip6->src_address.as_u64[0] != key->peer_addr.ip6.as_u64[0] &&
+ ip6->src_address.as_u64[1] != key->peer_addr.ip6.as_u64[1])
+ {
+ BFD_ERR ("IP src addr mismatch, got %U, expected %U",
+ format_ip6_address, ip6, format_ip6_address,
+ &key->peer_addr.ip6);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (ip6->dst_address.as_u64[0] != key->local_addr.ip6.as_u64[0] &&
+ ip6->dst_address.as_u64[1] != key->local_addr.ip6.as_u64[1])
+ {
+ BFD_ERR ("IP dst addr mismatch, got %U, expected %U",
+ format_ip6_address, ip6, format_ip6_address,
+ &key->local_addr.ip6);
+ return BFD_UDP_ERROR_BAD;
+ }
+ const u8 expected_hop_limit = 255;
+ if (ip6->hop_limit != expected_hop_limit)
+ {
+ BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u",
+ ip6->hop_limit, expected_hop_limit);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (clib_net_to_host_u16 (udp->src_port) < 49152)
+ {
+ BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>",
+ udp->src_port);
+ }
+ return BFD_UDP_ERROR_NONE;
+}
+
+static bfd_udp_error_t
+bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_buffer_t * b, bfd_session_t ** bs_out)
+{
+ const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
+ if (sizeof (*pkt) > b->current_length)
+ {
+ BFD_ERR
+ ("Payload size %d too small to hold bfd packet of minimum size %d",
+ b->current_length, sizeof (*pkt));
+ return BFD_UDP_ERROR_BAD;
+ }
+ ip6_header_t *ip6;
+ udp_header_t *udp;
+ bfd_udp6_find_headers (b, &ip6, &udp);
+ if (!ip6 || !udp)
+ {
+ BFD_ERR ("Couldn't find ip6 or udp header");
+ return BFD_UDP_ERROR_BAD;
+ }
+ const u32 udp_payload_length = udp->length - sizeof (*udp);
+ if (pkt->head.length > udp_payload_length)
+ {
+ BFD_ERR
+ ("BFD packet length is larger than udp payload length (%u > %u)",
+ pkt->head.length, udp_payload_length);
+ return BFD_UDP_ERROR_BAD;
+ }
+ if (!bfd_verify_pkt_common (pkt))
+ {
+ return BFD_UDP_ERROR_BAD;
+ }
+ bfd_session_t *bs = NULL;
+ if (pkt->your_disc)
+ {
+ BFD_DBG ("Looking up BFD session using discriminator %u",
+ pkt->your_disc);
+ bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc);
+ }
+ else
+ {
+ bfd_udp_key_t key;
+ memset (&key, 0, sizeof (key));
+ key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ key.local_addr.ip6.as_u64[0] = ip6->dst_address.as_u64[0];
+ key.local_addr.ip6.as_u64[1] = ip6->dst_address.as_u64[1];
+ key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0];
+ key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1];
+ BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, "
+ "peer=%U)",
+ key.sw_if_index, format_ip6_address, &key.local_addr,
+ format_ip6_address, &key.peer_addr);
+ bs = bfd_lookup_session (&bfd_udp_main, &key);
+ }
+ if (!bs)
+ {
+ BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
+ return BFD_UDP_ERROR_BAD;
+ }
+ BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
+ if (!bfd_verify_pkt_auth (pkt, b->current_length, bs))
+ {
+ BFD_ERR ("Packet verification failed, dropping packet");
+ return BFD_UDP_ERROR_BAD;
+ }
+ bfd_udp_error_t err;
+ if (BFD_UDP_ERROR_NONE != (err = bfd_udp6_verify_transport (ip6, udp, bs)))
+ {
+ return err;
+ }
+ bfd_rpc_update_session (bs->bs_idx, pkt);
+ *bs_out = bs;
+ return BFD_UDP_ERROR_NONE;
+}
+
+/*
+ * Process a frame of bfd packets
+ * Expect 1 packet / frame
+ */
+static uword
+bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f, int is_ipv6)
+{
+ u32 n_left_from, *from;
+ bfd_input_trace_t *t0;
+
+ from = vlib_frame_vector_args (f); /* array of buffer indices */
+ n_left_from = f->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0, error0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bfd_session_t *bs = NULL;
+
+ /* If this pkt is traced, snapshot the data */
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ int len;
+ t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
+ len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
+ : sizeof (t0->data);
+ t0->len = len;
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0), len);
+ }
+
+ /* scan this bfd pkt. error0 is the counter index to bmp */
+ if (is_ipv6)
+ {
+ error0 = bfd_udp6_scan (vm, rt, b0, &bs);
+ }
+ else
+ {
+ error0 = bfd_udp4_scan (vm, rt, b0, &bs);
+ }
+ b0->error = rt->errors[error0];
+
+ next0 = BFD_UDP_INPUT_NEXT_NORMAL;
+ if (BFD_UDP_ERROR_NONE == error0)
+ {
+ /*
+ * if everything went fine, check for poll bit, if present, re-use
+ * the buffer and based on (now updated) session parameters, send
+ * the final packet back
+ */
+ const bfd_pkt_t *pkt = vlib_buffer_get_current (b0);
+ if (bfd_pkt_get_poll (pkt))
+ {
+ b0->current_data = 0;
+ b0->current_length = 0;
+ memset (vnet_buffer (b0), 0, sizeof (*vnet_buffer (b0)));
+ bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, bs,
+ 0);
+ if (is_ipv6)
+ {
+ vlib_node_increment_counter (vm, bfd_udp6_input_node.index,
+ b0->error, 1);
+ }
+ else
+ {
+ vlib_node_increment_counter (vm, bfd_udp4_input_node.index,
+ b0->error, 1);
+ }
+ const bfd_udp_session_t *bus = &bs->udp;
+ ip_adjacency_t *adj = adj_get (bus->adj_index);
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP;
+ break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE;
+ break;
+ default:
+ /* drop */
+ break;
+ }
+ }
+ }
+ vlib_set_next_frame_buffer (vm, rt, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return f->n_vectors;
+}
+
+static uword
+bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return bfd_udp_input (vm, rt, f, 0);
+}
+
+/*
+ * bfd input graph node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
+ .function = bfd_udp4_input,
+ .name = "bfd-udp4-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_strings = bfd_udp_error_strings,
+
+ .format_trace = bfd_input_format_trace,
+
+ .n_next_nodes = BFD_UDP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+ [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop",
+ [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip4-arp",
+ [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup",
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return bfd_udp_input (vm, rt, f, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
+ .function = bfd_udp6_input,
+ .name = "bfd-udp6-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_strings = bfd_udp_error_strings,
+
+ .format_trace = bfd_input_format_trace,
+
+ .n_next_nodes = BFD_UDP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+ [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop",
+ [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip6-discover-neighbor",
+ [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip6-lookup",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * Process a frame of bfd echo packets
+ * Expect 1 packet / frame
+ */
+static uword
+bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f, int is_ipv6)
+{
+ u32 n_left_from, *from;
+ bfd_input_trace_t *t0;
+
+ from = vlib_frame_vector_args (f); /* array of buffer indices */
+ n_left_from = f->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* If this pkt is traced, snapshot the data */
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ int len;
+ t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
+ len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
+ : sizeof (t0->data);
+ t0->len = len;
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0), len);
+ }
+
+ if (bfd_consume_echo_pkt (bfd_udp_main.bfd_main, b0))
+ {
+ b0->error = rt->errors[BFD_UDP_ERROR_NONE];
+ next0 = BFD_UDP_INPUT_NEXT_NORMAL;
+ }
+ else
+ {
+ /* loop back the packet */
+ b0->error = rt->errors[BFD_UDP_ERROR_NONE];
+ if (is_ipv6)
+ {
+ vlib_node_increment_counter (vm, bfd_udp_echo6_input_node.index,
+ b0->error, 1);
+ }
+ else
+ {
+ vlib_node_increment_counter (vm, bfd_udp_echo4_input_node.index,
+ b0->error, 1);
+ }
+ next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE;
+ }
+
+ vlib_set_next_frame_buffer (vm, rt, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return f->n_vectors;
+}
+
+static uword
+bfd_udp_echo4_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ return bfd_udp_echo_input (vm, rt, f, 0);
+}
+
+u8 *
+bfd_echo_input_format_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const bfd_udp_echo_input_trace_t *t =
+ va_arg (*args, bfd_udp_echo_input_trace_t *);
+ if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head))
+ {
+ s = format (s, "BFD ECHO:\n");
+ s = format (s, " data: %U", format_hexdump, t->data, t->len);
+ }
+
+ return s;
+}
+
+/*
+ * bfd input graph node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
+ .function = bfd_udp_echo4_input,
+ .name = "bfd-udp-echo4-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = BFD_UDP_ECHO_N_ERROR,
+ .error_strings = bfd_udp_error_strings,
+
+ .format_trace = bfd_echo_input_format_trace,
+
+ .n_next_nodes = BFD_UDP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+ [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop",
+ [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip4-arp",
+ [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup",
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ return bfd_udp_echo_input (vm, rt, f, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
+ .function = bfd_udp_echo6_input,
+ .name = "bfd-udp-echo6-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = BFD_UDP_ECHO_N_ERROR,
+ .error_strings = bfd_udp_echo_error_strings,
+
+ .format_trace = bfd_echo_input_format_trace,
+
+ .n_next_nodes = BFD_UDP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+ [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop",
+ [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip6-discover-neighbor",
+ [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip6-lookup",
+ },
+};
+
+/* *INDENT-ON* */
+
+static clib_error_t *
+bfd_udp_sw_if_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_create)
+{
+ bfd_session_t **to_be_freed = NULL;
+ BFD_DBG ("sw_if_add_del called, sw_if_index=%u, is_create=%u", sw_if_index,
+ is_create);
+ if (!is_create)
+ {
+ bfd_session_t *bs;
+ pool_foreach (bs, bfd_udp_main.bfd_main->sessions,
+ {
+ if (bs->transport != BFD_TRANSPORT_UDP4 &&
+ bs->transport != BFD_TRANSPORT_UDP6)
+ {
+ continue;}
+ if (bs->udp.key.sw_if_index != sw_if_index)
+ {
+ continue;}
+ vec_add1 (to_be_freed, bs);}
+ );
+ }
+ bfd_session_t **bs;
+ vec_foreach (bs, to_be_freed)
+ {
+ clib_warning ("removal of sw_if_index=%u forces removal of bfd session "
+ "with bs_idx=%u", sw_if_index, (*bs)->bs_idx);
+ bfd_session_set_flags (*bs, 0);
+ bfd_udp_del_session_internal (*bs);
+ }
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (bfd_udp_sw_if_add_del);
+
+/*
+ * setup function
+ */
+static clib_error_t *
+bfd_udp_init (vlib_main_t * vm)
+{
+ mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword),
+ sizeof (bfd_udp_key_t));
+ bfd_udp_main.bfd_main = &bfd_main;
+ bfd_udp_main.vnet_main = vnet_get_main ();
+ udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1);
+ udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0);
+ udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4,
+ bfd_udp_echo4_input_node.index, 1);
+ udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6,
+ bfd_udp_echo6_input_node.index, 0);
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp");
+ ASSERT (node);
+ bfd_udp_main.ip4_arp_idx = node->index;
+ node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor");
+ ASSERT (node);
+ bfd_udp_main.ip6_ndp_idx = node->index;
+ node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite");
+ ASSERT (node);
+ bfd_udp_main.ip4_rewrite_idx = node->index;
+ node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
+ ASSERT (node);
+ bfd_udp_main.ip6_rewrite_idx = node->index;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (bfd_udp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h
new file mode 100644
index 00000000..87868104
--- /dev/null
+++ b/src/vnet/bfd/bfd_udp.h
@@ -0,0 +1,124 @@
+/* * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief BFD UDP transport layer declarations
+ */
+
+#ifndef __included_bfd_udp_h__
+#define __included_bfd_udp_h__
+
+#include <vppinfra/clib.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/bfd/bfd_api.h>
+
+/* *INDENT-OFF* */
+/** identifier of BFD session based on UDP transport only */
+typedef CLIB_PACKED (struct {
+ union {
+ /** interface to which the session is tied - single-hop */
+ u32 sw_if_index;
+ /** the FIB index the peer is in - multi-hop*/
+ u32 fib_index;
+ };
+ /** local address */
+ ip46_address_t local_addr;
+ /** peer address */
+ ip46_address_t peer_addr;
+}) bfd_udp_key_t;
+/* *INDENT-ON* */
+
+/** UDP transport specific data embedded in bfd_session's union */
+typedef struct
+{
+ /** key identifying this session */
+ bfd_udp_key_t key;
+ /** adjacency index returned from adj lock call */
+ adj_index_t adj_index;
+} bfd_udp_session_t;
+
+/** bfd udp echo packet trace capture */
+typedef struct
+{
+ u32 len;
+ u8 data[400];
+} bfd_udp_echo_input_trace_t;
+
+struct bfd_session_s;
+
+/**
+ * @brief add the necessary transport layer by prepending it to existing data
+ *
+ *
+ * @param is_echo 1 if this is echo packet, 0 if control frame
+ *
+ * @return 1 on success, 0 on failure
+ */
+int bfd_add_udp4_transport (vlib_main_t * vm, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
+
+/**
+ * @brief add the necessary transport layer by prepending it to existing data
+ *
+ * @param is_echo 1 if this is echo packet, 0 if control frame
+ *
+ * @return 1 on success, 0 on failure
+ */
+int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
+
+/**
+ * @brief transport packet over udpv4
+ *
+ * @param is_echo 1 if this is echo packet, 0 if control frame
+ *
+ * @return 1 on success, 0 on failure
+ */
+int bfd_transport_udp4 (vlib_main_t * vm, u32 bi,
+ const struct bfd_session_s *bs);
+
+/**
+ * @brief transport packet over udpv6
+ *
+ * @param is_echo 1 if this is echo packet, 0 if control frame
+ *
+ * @return 1 on success, 0 on failure
+ */
+int bfd_transport_udp6 (vlib_main_t * vm, u32 bi,
+ const struct bfd_session_s *bs);
+
+/**
+ * @brief check if the bfd udp layer is echo-capable at this time
+ *
+ * @return 1 if available, 0 otherwise
+ */
+int bfd_udp_is_echo_available (bfd_transport_e transport);
+
+/**
+ * @brief get echo source information - used by CLI
+ */
+void bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index,
+ int *have_usable_ip4, ip4_address_t * ip4,
+ int *have_usable_ip6, ip6_address_t * ip6);
+
+#endif /* __included_bfd_udp_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bfd/dir.dox b/src/vnet/bfd/dir.dox
new file mode 100644
index 00000000..b9a5978f
--- /dev/null
+++ b/src/vnet/bfd/dir.dox
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Bidirectional Forwarding Detection (BFD) implementation
+*/
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
new file mode 100644
index 00000000..fbefe7c2
--- /dev/null
+++ b/src/vnet/buffer.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vnet/buffer.h: vnet buffer flags
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_buffer_h
+#define included_vnet_buffer_h
+
+#include <vlib/vlib.h>
+
+#define foreach_vnet_buffer_field \
+ _( 1, L4_CHECKSUM_COMPUTED) \
+ _( 2, L4_CHECKSUM_CORRECT) \
+ _( 3, VLAN_2_DEEP) \
+ _( 4, VLAN_1_DEEP) \
+ _( 8, SPAN_CLONE) \
+ _( 6, HANDOFF_NEXT_VALID) \
+ _( 7, LOCALLY_ORIGINATED) \
+ _( 8, IS_IP4) \
+ _( 9, IS_IP6) \
+ _(10, OFFLOAD_IP_CKSUM) \
+ _(11, OFFLOAD_TCP_CKSUM) \
+ _(12, OFFLOAD_UDP_CKSUM)
+
+#define VNET_BUFFER_FLAGS_VLAN_BITS \
+ (VNET_BUFFER_F_VLAN_1_DEEP | VNET_BUFFER_F_VLAN_2_DEEP)
+
+enum
+{
+#define _(bit, name) VNET_BUFFER_F_##name = (1 << LOG2_VLIB_BUFFER_FLAG_USER(bit)),
+ foreach_vnet_buffer_field
+#undef _
+};
+
+enum
+{
+#define _(bit, name) VNET_BUFFER_F_LOG2_##name = LOG2_VLIB_BUFFER_FLAG_USER(bit),
+ foreach_vnet_buffer_field
+#undef _
+};
+
+
+#define foreach_buffer_opaque_union_subtype \
+_(ip) \
+_(swt) \
+_(l2) \
+_(l2t) \
+_(gre) \
+_(l2_classify) \
+_(handoff) \
+_(policer) \
+_(ipsec) \
+_(map) \
+_(map_t) \
+_(ip_frag) \
+_(tcp)
+
+/*
+ * vnet stack buffer opaque array overlay structure.
+ * The vnet_buffer_opaque_t *must* be the same size as the
+ * vlib_buffer_t "opaque" structure member, 32 bytes.
+ *
+ * When adding a union type, please add a stanza to
+ * foreach_buffer_opaque_union_subtype (directly above).
+ * Code in vnet_interface_init(...) verifies the size
+ * of the union, and will announce any deviations in an
+ * impossible-to-miss manner.
+ */
+typedef struct
+{
+ u32 sw_if_index[VLIB_N_RX_TX];
+ i16 l2_hdr_offset;
+ i16 l3_hdr_offset;
+ i16 l4_hdr_offset;
+
+ union
+ {
+ /* IP4/6 buffer opaque. */
+ struct
+ {
+ /* Adjacency from destination IP address lookup [VLIB_TX].
+ Adjacency from source IP address lookup [VLIB_RX].
+ This gets set to ~0 until source lookup is performed. */
+ u32 adj_index[VLIB_N_RX_TX];
+
+ union
+ {
+ struct
+ {
+ /* Flow hash value for this packet computed from IP src/dst address
+ protocol and ports. */
+ u32 flow_hash;
+
+ /* next protocol */
+ u32 save_protocol;
+
+ /* Rewrite length */
+ u32 save_rewrite_length;
+
+ /* MFIB RPF ID */
+ u32 rpf_id;
+ };
+
+ /* ICMP */
+ struct
+ {
+ u8 type;
+ u8 code;
+ u32 data;
+ } icmp;
+ };
+
+ } ip;
+
+ /*
+ * MPLS:
+ * data copied from the MPLS header that was popped from the packet
+ * during the look-up.
+ */
+ struct
+ {
+ u8 ttl;
+ u8 exp;
+ u8 first;
+ } mpls;
+
+ /* ip4-in-ip6 softwire termination, only valid there */
+ struct
+ {
+ u8 swt_disable;
+ u32 mapping_index;
+ } swt;
+
+ /* l2 bridging path, only valid there */
+ struct opaque_l2
+ {
+ u32 feature_bitmap;
+ u16 bd_index; /* bridge-domain index */
+ u8 l2_len; /* ethernet header length */
+ u8 shg; /* split-horizon group */
+ u16 l2fib_sn; /* l2fib bd/int seq_num */
+ u8 bd_age; /* aging enabled */
+ } l2;
+
+ /* l2tpv3 softwire encap, only valid there */
+ struct
+ {
+ u32 pad[4]; /* do not overlay w/ ip.adj_index[0,1] */
+ u8 next_index;
+ u32 session_index;
+ } l2t;
+
+ struct
+ {
+ u32 src, dst;
+ } gre;
+
+ /* L2 classify */
+ struct
+ {
+ struct opaque_l2 pad;
+ union
+ {
+ u32 table_index;
+ u32 opaque_index;
+ };
+ u64 hash;
+ } l2_classify;
+
+ /* IO - worker thread handoff */
+ struct
+ {
+ u32 next_index;
+ } handoff;
+
+ /* vnet policer */
+ struct
+ {
+ u32 pad[8 - VLIB_N_RX_TX - 1]; /* to end of opaque */
+ u32 index;
+ } policer;
+
+ /* interface output features */
+ struct
+ {
+ u32 flags;
+ u32 sad_index;
+ } ipsec;
+
+ /* MAP */
+ struct
+ {
+ u16 mtu;
+ } map;
+
+ /* MAP-T */
+ struct
+ {
+ u32 map_domain_index;
+ struct
+ {
+ u32 saddr, daddr;
+ u16 frag_offset; //Fragmentation header offset
+ u16 l4_offset; //L4 header overall offset
+ u8 l4_protocol; //The final protocol number
+ } v6; //Used by ip6_map_t only
+ u16 checksum_offset; //L4 checksum overall offset
+ u16 mtu; //Exit MTU
+ } map_t;
+
+ /* IP Fragmentation */
+ struct
+ {
+ u16 header_offset;
+ u16 mtu;
+ u8 next_index;
+ u8 flags; //See ip_frag.h
+ } ip_frag;
+
+ /* COP - configurable junk filter(s) */
+ struct
+ {
+ /* Current configuration index. */
+ u32 current_config_index;
+ } cop;
+
+ /* LISP */
+ struct
+ {
+ /* overlay address family */
+ u16 overlay_afi;
+ } lisp;
+
+ /* Driver rx feature */
+ struct
+ {
+ u32 saved_next_index; /**< saved by drivers for short-cut */
+ u16 buffer_advance;
+ } device_input_feat;
+
+ /* TCP */
+ struct
+ {
+ u32 connection_index;
+ u32 seq_number;
+ u32 seq_end;
+ u32 ack_number;
+ u16 hdr_offset; /**< offset relative to ip hdr */
+ u16 data_offset; /**< offset relative to ip hdr */
+ u16 data_len; /**< data len */
+ u8 flags;
+ } tcp;
+
+ /* SNAT */
+ struct
+ {
+ u32 flags;
+ } snat;
+
+ u32 unused[6];
+ };
+} vnet_buffer_opaque_t;
+
+/*
+ * The opaque field of the vlib_buffer_t is intepreted as a
+ * vnet_buffer_opaque_t. Hence it should be big enough to accommodate one.
+ */
+STATIC_ASSERT (sizeof (vnet_buffer_opaque_t) <=
+ STRUCT_SIZE_OF (vlib_buffer_t, opaque),
+ "VNET buffer meta-data too large for vlib_buffer");
+
+#define vnet_buffer(b) ((vnet_buffer_opaque_t *) (b)->opaque)
+
+/* Full cache line (64 bytes) of additional space */
+typedef struct
+{
+ union
+ {
+ };
+} vnet_buffer_opaque2_t;
+
+
+
+#endif /* included_vnet_buffer_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cdp/cdp.pg b/src/vnet/cdp/cdp.pg
new file mode 100644
index 00000000..b6ba1865
--- /dev/null
+++ b/src/vnet/cdp/cdp.pg
@@ -0,0 +1,7 @@
+packet-generator new {
+ name cdp
+ limit 1
+ node cdp-input
+ size 374-374
+ data { hex 0x02b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2043333735304520536f66747761726520284333373530452d554e4956455253414c2d4d292c2056657273696f6e2031322e32283335295345352c2052454c4541534520534f4654574152452028666331290a436f707972696768742028632920313938362d3230303720627920436973636f2053797374656d732c20496e632e0a436f6d70696c6564205468752031392d4a756c2d30372031363a3137206279206e616368656e00060018636973636f2057532d4333373530452d3234544400020011000000010101cc0004000000000003001b54656e4769676162697445746865726e6574312f302f3100040008000000280008002400000c011200000000ffffffff010221ff000000000000001e7a50f000ff000000090004000a00060001000b0005010012000500001300050000160011000000010101cc000400000000001a00100000000100000000ffffffff }
+}
diff --git a/src/vnet/cdp/cdp_input.c b/src/vnet/cdp/cdp_input.c
new file mode 100644
index 00000000..3574de68
--- /dev/null
+++ b/src/vnet/cdp/cdp_input.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cdp/cdp_node.h>
+
+cdp_main_t cdp_main;
+
+#define DEBUG_TLV_DUMP 0 /* 1=> dump TLV's to stdout while processing them */
+
+/* Reliable multicast messages we use to keep peers updated */
+mc_serialize_msg_t serialize_cdp_neighbor_msg;
+mc_serialize_msg_t serialize_cdp_keepalive_msg;
+
+/*
+ * ported from an unspecified Cisco cdp implementation.
+ * Compute / return in HOST byte order. 0 => good checksum.
+ */
+u16
+cdp_checksum (void *p, int count)
+{
+ u32 sum;
+ u16 i, *data;
+
+ data = p;
+ sum = 0;
+ while (count > 1)
+ {
+ sum += ntohs (*data);
+ data++;
+ count -= 2;
+ }
+
+ if (count > 0)
+ sum += *(char *) data;
+
+ while (sum >> 16)
+ {
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ }
+
+ i = (i16) sum;
+ return (~i);
+}
+
+/* TLV handler table */
+typedef struct
+{
+ char *name;
+ u32 tlv_id;
+ void *format;
+ void *process;
+} tlv_handler_t;
+
+static tlv_handler_t tlv_handlers[];
+
+/* Display a generic TLV as a set of hex bytes */
+static u8 *
+format_generic_tlv (u8 * s, va_list * va)
+{
+ cdp_tlv_t *t = va_arg (*va, cdp_tlv_t *);
+ tlv_handler_t *h = &tlv_handlers[t->t];
+
+ s = format (s, "%s(%d): %U\n", h->name,
+ t->t, format_hex_bytes, t->v, t->l - sizeof (*t));
+ return s;
+}
+
+/* Ignore / skip a TLV we don't support */
+static cdp_error_t
+process_generic_tlv (cdp_main_t * cm, cdp_neighbor_t * n, cdp_tlv_t * t)
+{
+#if DEBUG_TLV_DUMP > 0
+ fformat (stdout, "%U", format_generic_tlv, t);
+#endif
+
+ return CDP_ERROR_NONE;
+}
+
+/* print a text tlv */
+static u8 *
+format_text_tlv (u8 * s, va_list * va)
+{
+ cdp_tlv_t *t = va_arg (*va, cdp_tlv_t *);
+ tlv_handler_t *h = &tlv_handlers[t->t];
+ int i;
+
+ s = format (s, "%s(%d): ", h->name, t->t);
+
+ for (i = 0; i < (t->l - sizeof (*t)); i++)
+ vec_add1 (s, t->v[i]);
+
+ vec_add1 (s, '\n');
+ return s;
+}
+
+#if DEBUG_TLV_DUMP == 0
+/* gcc warning be gone */
+CLIB_UNUSED (static cdp_error_t
+ process_text_tlv (cdp_main_t * cm, cdp_neighbor_t * n,
+ cdp_tlv_t * t));
+#endif
+
+/* process / skip a generic text TLV that we don't support */
+static cdp_error_t
+process_text_tlv (cdp_main_t * cm, cdp_neighbor_t * n, cdp_tlv_t * t)
+{
+#if DEBUG_TLV_DUMP > 0
+ fformat (stdout, "%U\n", format_text_tlv, t);
+#endif
+
+ return CDP_ERROR_NONE;
+}
+
+/* per-TLV format function definitions */
+#define format_unused_tlv format_generic_tlv
+#define format_device_name_tlv format_text_tlv
+#define format_address_tlv format_generic_tlv
+#define format_port_id_tlv format_text_tlv
+#define format_capabilities_tlv format_generic_tlv
+#define format_version_tlv format_text_tlv
+#define format_platform_tlv format_text_tlv
+#define format_ipprefix_tlv format_generic_tlv
+#define format_hello_tlv format_generic_tlv
+#define format_vtp_domain_tlv format_generic_tlv
+#define format_native_vlan_tlv format_generic_tlv
+#define format_duplex_tlv format_generic_tlv
+#define format_appl_vlan_tlv format_generic_tlv
+#define format_trigger_tlv format_generic_tlv
+#define format_power_tlv format_generic_tlv
+#define format_mtu_tlv format_generic_tlv
+#define format_trust_tlv format_generic_tlv
+#define format_cos_tlv format_generic_tlv
+#define format_sysname_tlv format_generic_tlv
+#define format_sysobject_tlv format_generic_tlv
+#define format_mgmt_addr_tlv format_generic_tlv
+#define format_physical_loc_tlv format_generic_tlv
+#define format_mgmt_addr2_tlv format_generic_tlv
+#define format_power_requested_tlv format_generic_tlv
+#define format_power_available_tlv format_generic_tlv
+#define format_port_unidirectional_tlv format_generic_tlv
+#define format_unknown_28_tlv format_generic_tlv
+#define format_energywise_tlv format_generic_tlv
+#define format_unknown_30_tlv format_generic_tlv
+#define format_spare_poe_tlv format_generic_tlv
+
+/* tlv ID=0 is a mistake */
+static cdp_error_t
+process_unused_tlv (cdp_main_t * cm, cdp_neighbor_t * n, cdp_tlv_t * t)
+{
+ return CDP_ERROR_BAD_TLV;
+}
+
+/* list of text TLV's that we snapshoot */
+#define foreach_text_to_struct_tlv \
+_(device_name,DEBUG_TLV_DUMP) \
+_(version,DEBUG_TLV_DUMP) \
+_(platform,DEBUG_TLV_DUMP) \
+_(port_id,DEBUG_TLV_DUMP)
+
+#define _(z,dbg) \
+static \
+cdp_error_t process_##z##_tlv (cdp_main_t *cm, cdp_neighbor_t *n, \
+ cdp_tlv_t *t) \
+{ \
+ int i; \
+ if (dbg) \
+ fformat(stdout, "%U\n", format_text_tlv, t); \
+ \
+ if (n->z) \
+ _vec_len(n->z) = 0; \
+ \
+ for (i = 0; i < (t->l - sizeof (*t)); i++) \
+ vec_add1(n->z, t->v[i]); \
+ \
+ vec_add1(n->z, 0); \
+ \
+ return CDP_ERROR_NONE; \
+}
+
+foreach_text_to_struct_tlv
+#undef _
+#define process_address_tlv process_generic_tlv
+#define process_capabilities_tlv process_generic_tlv
+#define process_ipprefix_tlv process_generic_tlv
+#define process_hello_tlv process_generic_tlv
+#define process_vtp_domain_tlv process_generic_tlv
+#define process_native_vlan_tlv process_generic_tlv
+#define process_duplex_tlv process_generic_tlv
+#define process_appl_vlan_tlv process_generic_tlv
+#define process_trigger_tlv process_generic_tlv
+#define process_power_tlv process_generic_tlv
+#define process_mtu_tlv process_generic_tlv
+#define process_trust_tlv process_generic_tlv
+#define process_cos_tlv process_generic_tlv
+#define process_sysname_tlv process_generic_tlv
+#define process_sysobject_tlv process_generic_tlv
+#define process_mgmt_addr_tlv process_generic_tlv
+#define process_physical_loc_tlv process_generic_tlv
+#define process_mgmt_addr2_tlv process_generic_tlv
+#define process_power_requested_tlv process_generic_tlv
+#define process_power_available_tlv process_generic_tlv
+#define process_port_unidirectional_tlv process_generic_tlv
+#define process_unknown_28_tlv process_generic_tlv
+#define process_energywise_tlv process_generic_tlv
+#define process_unknown_30_tlv process_generic_tlv
+#define process_spare_poe_tlv process_generic_tlv
+static tlv_handler_t tlv_handlers[] = {
+#define _(a) {#a, CDP_TLV_##a, format_##a##_tlv, process_##a##_tlv},
+ foreach_cdp_tlv_type
+#undef _
+};
+
+#if DEBUG_TLV_DUMP == 0
+CLIB_UNUSED (static u8 * format_cdp_hdr (u8 * s, va_list * va));
+#endif
+
+static u8 *
+format_cdp_hdr (u8 * s, va_list * va)
+{
+ cdp_hdr_t *h = va_arg (*va, cdp_hdr_t *);
+
+ s = format (s, "version %d, ttl %d(secs), cksum 0x%04x\n",
+ h->version, h->ttl, h->checksum);
+ return s;
+}
+
+static cdp_error_t
+process_cdp_hdr (cdp_main_t * cm, cdp_neighbor_t * n, cdp_hdr_t * h)
+{
+#if DEBUG_TLV_DUMP > 0
+ fformat (stdout, "%U", format_cdp_hdr, h);
+#endif
+
+ if (h->version != 1 && h->version != 2)
+ return CDP_ERROR_PROTOCOL_VERSION;
+
+ n->ttl_in_seconds = h->ttl;
+
+ return CDP_ERROR_NONE;
+}
+
+/* scan a cdp packet; header, then tlv's */
+static int
+cdp_packet_scan (cdp_main_t * cm, cdp_neighbor_t * n)
+{
+ u8 *cur = n->last_rx_pkt;
+ cdp_hdr_t *h;
+ cdp_tlv_t *tlv;
+ cdp_error_t e = CDP_ERROR_NONE;
+ tlv_handler_t *handler;
+ cdp_error_t (*fp) (cdp_main_t *, cdp_neighbor_t *, cdp_tlv_t *);
+ u16 computed_checksum;
+
+ computed_checksum = cdp_checksum (cur, vec_len (cur));
+
+ if (computed_checksum)
+ return CDP_ERROR_CHECKSUM;
+
+ h = (cdp_hdr_t *) cur;
+
+ e = process_cdp_hdr (cm, n, h);
+ if (e)
+ return e;
+
+ cur = (u8 *) (h + 1);
+
+ while (cur < n->last_rx_pkt + vec_len (n->last_rx_pkt) - 1)
+ {
+ tlv = (cdp_tlv_t *) cur;
+ tlv->t = ntohs (tlv->t);
+ tlv->l = ntohs (tlv->l);
+ if (tlv->t >= ARRAY_LEN (tlv_handlers))
+ return CDP_ERROR_BAD_TLV;
+ handler = &tlv_handlers[tlv->t];
+ fp = handler->process;
+ e = (*fp) (cm, n, tlv);
+ if (e)
+ return e;
+ /* tlv length includes (t, l) */
+ cur += tlv->l;
+ }
+
+ return CDP_ERROR_NONE;
+}
+
+/*
+ * cdp input routine
+ */
+cdp_error_t
+cdp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
+{
+ cdp_main_t *cm = &cdp_main;
+ cdp_neighbor_t *n;
+ uword *p, nbytes;
+ cdp_error_t e;
+ uword last_packet_signature;
+
+ /* find or create a neighbor pool entry for the (sw) interface
+ upon which we received this pkt */
+ p = hash_get (cm->neighbor_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ if (p == 0)
+ {
+ pool_get (cm->neighbors, n);
+ memset (n, 0, sizeof (*n));
+ n->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ n->packet_template_index = (u8) ~ 0;
+ hash_set (cm->neighbor_by_sw_if_index, n->sw_if_index,
+ n - cm->neighbors);
+ }
+ else
+ {
+ n = pool_elt_at_index (cm->neighbors, p[0]);
+ }
+
+ /*
+ * typical clib idiom. Don't repeatedly allocate and free
+ * the per-neighbor rx buffer. Reset its apparent length to zero
+ * and reuse it.
+ */
+
+ if (n->last_rx_pkt)
+ _vec_len (n->last_rx_pkt) = 0;
+
+ /* cdp disabled on this interface, we're done */
+ if (n->disabled)
+ return CDP_ERROR_DISABLED;
+
+ /*
+ * Make sure the per-neighbor rx buffer is big enough to hold
+ * the data we're about to copy
+ */
+ vec_validate (n->last_rx_pkt, vlib_buffer_length_in_chain (vm, b0) - 1);
+
+ /*
+ * Coalesce / copy e the buffer chain into the per-neighbor
+ * rx buffer
+ */
+ nbytes = vlib_buffer_contents (vm, bi0, n->last_rx_pkt);
+ ASSERT (nbytes <= vec_len (n->last_rx_pkt));
+
+ /*
+ * Compute Jenkins hash of the new packet, decide if we need to
+ * actually parse through the TLV's. CDP packets are all identical,
+ * so unless we time out the peer, we don't need to process the packet.
+ */
+ last_packet_signature =
+ hash_memory (n->last_rx_pkt, vec_len (n->last_rx_pkt), 0xd00b);
+
+ if (n->last_packet_signature_valid &&
+ n->last_packet_signature == last_packet_signature)
+ {
+ e = CDP_ERROR_CACHE_HIT;
+ }
+ else
+ {
+ /* Actually scan the packet */
+ e = cdp_packet_scan (cm, n);
+ n->last_packet_signature_valid = 1;
+ n->last_packet_signature = last_packet_signature;
+ }
+
+ if (e == CDP_ERROR_NONE)
+ {
+ n->last_heard = vlib_time_now (vm);
+ }
+
+ return e;
+}
+
+/*
+ * setup neighbor hash table
+ */
+static clib_error_t *
+cdp_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ cdp_main_t *cm = &cdp_main;
+ void vnet_cdp_node_reference (void);
+
+ vnet_cdp_node_reference ();
+
+ if ((error = vlib_call_init_function (vm, cdp_periodic_init)))
+ return error;
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+ cm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (cdp_init);
+
+
+static u8 *
+format_cdp_neighbors (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ cdp_main_t *cm = va_arg (*va, cdp_main_t *);
+ vnet_main_t *vnm = &vnet_main;
+ cdp_neighbor_t *n;
+ vnet_hw_interface_t *hw;
+
+ s = format (s,
+ "%=25s %=15s %=25s %=10s\n",
+ "Our Port", "Peer System", "Peer Port", "Last Heard");
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, cm->neighbors,
+ ({
+ hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
+
+ if (n->disabled == 0)
+ s = format (s, "%=25s %=15s %=25s %=10.1f\n",
+ hw->name, n->device_name, n->port_id,
+ n->last_heard);
+ }));
+ /* *INDENT-ON* */
+ return s;
+}
+
+
+static clib_error_t *
+show_cdp (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ cdp_main_t *cm = &cdp_main;
+
+ vlib_cli_output (vm, "%U\n", format_cdp_neighbors, vm, cm);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_cdp_command, static) = {
+ .path = "show cdp",
+ .short_help = "Show cdp command",
+ .function = show_cdp,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * packet trace format function, very similar to
+ * cdp_packet_scan except that we call the per TLV format
+ * functions instead of the per TLV processing functions
+ */
+u8 *
+cdp_input_format_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ cdp_input_trace_t *t = va_arg (*args, cdp_input_trace_t *);
+ u8 *cur;
+ cdp_hdr_t *h;
+ cdp_tlv_t *tlv;
+ tlv_handler_t *handler;
+ u8 *(*fp) (cdp_tlv_t *);
+
+ cur = t->data;
+
+ h = (cdp_hdr_t *) cur;
+ s = format (s, "%U", format_cdp_hdr, h);
+
+ cur = (u8 *) (h + 1);
+
+ while (cur < t->data + t->len)
+ {
+ tlv = (cdp_tlv_t *) cur;
+ tlv->t = ntohs (tlv->t);
+ tlv->l = ntohs (tlv->l);
+ if (tlv->t >= ARRAY_LEN (tlv_handlers))
+ {
+ s = format (s, "BAD_TLV\n");
+ break;
+ }
+ handler = &tlv_handlers[tlv->t];
+ fp = handler->format;
+ s = format (s, " %U", fp, tlv);
+ /* tlv length includes (t, l) */
+ cur += tlv->l;
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cdp/cdp_node.c b/src/vnet/cdp/cdp_node.c
new file mode 100644
index 00000000..39ac4a90
--- /dev/null
+++ b/src/vnet/cdp/cdp_node.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cdp/cdp_node.h>
+#include <vnet/ethernet/packet.h>
+
+static vlib_node_registration_t cdp_process_node;
+
+/** \file
+
+ 2 x CDP graph nodes: an "interior" node to process
+ incoming announcements, and a "process" node to periodically
+ send announcements.
+
+ The interior node is neither pipelined nor dual-looped, because
+ it would be very unusual to see more than one CDP packet in
+ a given input frame. So, it's a very simple / straighforward
+ example.
+*/
+
+/*
+ * packet counter strings
+ * Dump these counters via the "show error" CLI command
+ */
+static char *cdp_error_strings[] = {
+#define _(sym,string) string,
+ foreach_cdp_error
+#undef _
+};
+
+/*
+ * We actually send all cdp pkts to the "error" node after scanning
+ * them, so the graph node has only one next-index. The "error-drop"
+ * node automatically bumps our per-node packet counters for us.
+ */
+typedef enum
+{
+ CDP_INPUT_NEXT_NORMAL,
+ CDP_INPUT_N_NEXT,
+} cdp_next_t;
+
+/*
+ * Process a frame of cdp packets
+ * Expect 1 packet / frame
+ */
+static uword
+cdp_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+ cdp_input_trace_t *t0;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0, error0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = CDP_INPUT_NEXT_NORMAL;
+
+ /* scan this cdp pkt. error0 is the counter index to bump */
+ error0 = cdp_input (vm, b0, bi0);
+ b0->error = node->errors[error0];
+
+ /* If this pkt is traced, snapshoot the data */
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ int len;
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ len = (b0->current_length < sizeof (t0->data))
+ ? b0->current_length : sizeof (t0->data);
+ t0->len = len;
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0), len);
+ }
+ /* push this pkt to the next graph node, always error-drop */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/*
+ * cdp input graph node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (cdp_input_node, static) = {
+ .function = cdp_node_fn,
+ .name = "cdp-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = CDP_N_ERROR,
+ .error_strings = cdp_error_strings,
+
+ .format_trace = cdp_input_format_trace,
+
+ .n_next_nodes = CDP_INPUT_N_NEXT,
+ .next_nodes = {
+ [CDP_INPUT_NEXT_NORMAL] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * cdp periodic function
+ */
+static uword
+cdp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ cdp_main_t *cm = &cdp_main;
+ f64 poll_time_remaining;
+ uword event_type, *event_data = 0;
+
+ /* So we can send events to the cdp process */
+ cm->cdp_process_node_index = cdp_process_node.index;
+
+ /* Dynamically register the cdp input node with the snap classifier */
+ snap_register_input_protocol (vm, "cdp-input", 0xC /* ieee_oui, Cisco */ ,
+ 0x2000 /* protocol CDP */ ,
+ cdp_input_node.index);
+
+ snap_register_input_protocol (vm, "cdp-input", 0xC /* ieee_oui, Cisco */ ,
+ 0x2004 /* protocol CDP */ ,
+ cdp_input_node.index);
+
+#if 0 /* retain for reference */
+ /* with the hdlc classifier */
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_cdp, cdp_input_node.index);
+#endif
+
+ /* with ethernet input (for SRP) */
+ ethernet_register_input_type (vm, ETHERNET_TYPE_CDP /* CDP */ ,
+ cdp_input_node.index);
+
+ poll_time_remaining = 10.0 /* seconds */ ;
+ while (1)
+ {
+ /* sleep until next poll time, or msg serialize event occurs */
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, poll_time_remaining);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+
+ default:
+ clib_warning ("BUG: event type 0x%wx", event_type);
+ break;
+ }
+ if (event_data)
+ _vec_len (event_data) = 0;
+
+ /* peer timeout scan, send announcements */
+ if (vlib_process_suspend_time_is_zero (poll_time_remaining))
+ {
+ cdp_periodic (vm);
+ poll_time_remaining = 10.0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * cdp periodic node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (cdp_process_node, static) = {
+ .function = cdp_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "cdp-process",
+};
+/* *INDENT-ON* */
+
+void
+vnet_cdp_node_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cdp/cdp_node.h b/src/vnet/cdp/cdp_node.h
new file mode 100644
index 00000000..7028ddca
--- /dev/null
+++ b/src/vnet/cdp/cdp_node.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_cdp_node_h__
+#define __included_cdp_node_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/snap/snap.h>
+#include <vnet/hdlc/hdlc.h>
+#include <vnet/hdlc/packet.h>
+
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+
+#include <vnet/cdp/cdp_protocol.h>
+
+typedef enum
+{
+ CDP_PACKET_TEMPLATE_ETHERNET,
+ CDP_PACKET_TEMPLATE_HDLC,
+ CDP_PACKET_TEMPLATE_SRP,
+ CDP_N_PACKET_TEMPLATES,
+} cdp_packet_template_id_t;
+
+typedef struct
+{
+ /* neighbor's vlib software interface index */
+ u32 sw_if_index;
+
+ /* Timers */
+ f64 last_heard;
+ f64 last_sent;
+
+ /* Neighbor time-to-live (usually 180s) */
+ u8 ttl_in_seconds;
+
+ /* "no cdp run" or similar */
+ u8 disabled;
+
+ /* tx packet template id for this neighbor */
+ u8 packet_template_index;
+
+ /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
+ u8 last_packet_signature_valid;
+ uword last_packet_signature;
+
+ /* Info we actually keep about each neighbor */
+ u8 *device_name;
+ u8 *version;
+ u8 *port_id;
+ u8 *platform;
+
+ /* last received packet, for the J-hash optimization */
+ u8 *last_rx_pkt;
+} cdp_neighbor_t;
+
+#define foreach_neighbor_string_field \
+_(device_name) \
+_(version) \
+_(port_id) \
+_(platform)
+
+typedef struct
+{
+ /* pool of cdp neighbors */
+ cdp_neighbor_t *neighbors;
+
+ /* tx pcap debug enable */
+ u8 tx_pcap_debug;
+
+ /* rapidly find a neighbor by vlib software interface index */
+ uword *neighbor_by_sw_if_index;
+
+ /* Background process node index */
+ u32 cdp_process_node_index;
+
+ /* Packet templates for different encap types */
+ vlib_packet_template_t packet_templates[CDP_N_PACKET_TEMPLATES];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} cdp_main_t;
+
+cdp_main_t cdp_main;
+
+/* Packet counters */
+#define foreach_cdp_error \
+_ (NONE, "good cdp packets (processed)") \
+_ (CACHE_HIT, "good cdp packets (cache hit)") \
+_ (BAD_TLV, "cdp packets with bad TLVs") \
+_ (PROTOCOL_VERSION, "cdp packets with bad protocol versions") \
+_ (CHECKSUM, "cdp packets with bad checksums") \
+_ (DISABLED, "cdp packets received on disabled interfaces")
+
+typedef enum
+{
+#define _(sym,str) CDP_ERROR_##sym,
+ foreach_cdp_error
+#undef _
+ CDP_N_ERROR,
+} cdp_error_t;
+
+/* cdp packet trace capture */
+typedef struct
+{
+ u32 len;
+ u8 data[400];
+} cdp_input_trace_t;
+
+typedef enum
+{
+ CDP_EVENT_SEND_NEIGHBOR,
+ CDP_EVENT_SEND_KEEPALIVE,
+} cdp_process_event_t;
+
+
+cdp_error_t cdp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0);
+void cdp_periodic (vlib_main_t * vm);
+void cdp_keepalive (cdp_main_t * cm, cdp_neighbor_t * n);
+u16 cdp_checksum (void *p, int count);
+u8 *cdp_input_format_trace (u8 * s, va_list * args);
+
+serialize_function_t serialize_cdp_main, unserialize_cdp_main;
+
+#endif /* __included_cdp_node_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cdp/cdp_periodic.c b/src/vnet/cdp/cdp_periodic.c
new file mode 100644
index 00000000..8899c49c
--- /dev/null
+++ b/src/vnet/cdp/cdp_periodic.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cdp/cdp_node.h>
+#include <vppinfra/hash.h>
+#include <vnet/unix/pcap.h>
+#include <vnet/srp/srp.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+#include <vnet/srp/packet.h>
+
+/*
+ * Generate a set of specific CDP TLVs.
+ *
+ * $$$ eventually these need to fish better data from
+ * other data structures; e.g. the hostname, software version info
+ * etc.
+ */
+
+static void
+add_device_name_tlv (vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ cdp_tlv_t *t = (cdp_tlv_t *) * t0p;
+
+ t->t = htons (CDP_TLV_device_name);
+ t->l = htons (3 + sizeof (*t));
+ clib_memcpy (&t->v, "VPP", 3);
+
+ *t0p += ntohs (t->l);
+}
+
+static void
+add_port_id_tlv (vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ cdp_tlv_t *t = (cdp_tlv_t *) * t0p;
+
+ t->t = htons (CDP_TLV_port_id);
+ t->l = htons (vec_len (hw->name) + sizeof (*t));
+ clib_memcpy (&t->v, hw->name, vec_len (hw->name));
+ *t0p += ntohs (t->l);
+}
+
+static void
+add_version_tlv (vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ cdp_tlv_t *t = (cdp_tlv_t *) * t0p;
+
+ t->t = htons (CDP_TLV_version);
+ t->l = htons (12 + sizeof (*t));
+ clib_memcpy (&t->v, "VPP Software", 12);
+ *t0p += ntohs (t->l);
+}
+
+static void
+add_platform_tlv (vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ cdp_tlv_t *t = (cdp_tlv_t *) * t0p;
+
+ t->t = htons (CDP_TLV_platform);
+ t->l = htons (2 + sizeof (*t));
+ clib_memcpy (&t->v, "SW", 2);
+ *t0p += ntohs (t->l);
+}
+
+static void
+add_capability_tlv (vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ cdp_tlv_t *t = (cdp_tlv_t *) * t0p;
+ u32 capabilities;
+
+ t->t = htons (CDP_TLV_capabilities);
+ t->l = htons (4 + sizeof (*t));
+ capabilities = CDP_ROUTER_DEVICE;
+ capabilities = htonl (capabilities);
+ clib_memcpy (&t->v, &capabilities, sizeof (capabilities));
+ *t0p += ntohs (t->l);
+}
+
+static void
+add_tlvs (cdp_main_t * cm, vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ add_device_name_tlv (hw, t0p);
+ add_port_id_tlv (hw, t0p);
+ add_version_tlv (hw, t0p);
+ add_platform_tlv (hw, t0p);
+ add_capability_tlv (hw, t0p);
+}
+
+/*
+ * send a cdp pkt on an ethernet interface
+ */
+static void
+send_ethernet_hello (cdp_main_t * cm, cdp_neighbor_t * n, int count)
+{
+ u32 *to_next;
+ ethernet_llc_snap_and_cdp_header_t *h0;
+ vnet_hw_interface_t *hw;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+ u16 checksum;
+ int nbytes_to_checksum;
+ int i;
+ vlib_frame_t *f;
+ vlib_main_t *vm = cm->vlib_main;
+ vnet_main_t *vnm = cm->vnet_main;
+
+ for (i = 0; i < count; i++)
+ {
+ /*
+ * see cdp_periodic_init() to understand what's already painted
+ * into the buffer by the packet template mechanism
+ */
+ h0 = vlib_packet_template_get_packet
+ (vm, &cm->packet_templates[n->packet_template_index], &bi0);
+
+ if (!h0)
+ break;
+
+ /* Add the interface's ethernet source address */
+ hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
+
+ clib_memcpy (h0->ethernet.src_address, hw->hw_address,
+ vec_len (hw->hw_address));
+
+ t0 = (u8 *) & h0->cdp.data;
+
+ /* add TLVs */
+ add_tlvs (cm, hw, &t0);
+
+ /* add the cdp packet checksum */
+ nbytes_to_checksum = t0 - (u8 *) & h0->cdp;
+ checksum = cdp_checksum (&h0->cdp, nbytes_to_checksum);
+ h0->cdp.checksum = htons (checksum);
+
+ /* Set the outbound packet length */
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_length = nbytes_to_checksum + sizeof (*h0)
+ - sizeof (cdp_hdr_t);
+
+ /* And the outbound interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index;
+
+ /* Set the 802.3 ethernet length */
+ h0->ethernet.len = htons (b0->current_length
+ - sizeof (ethernet_802_3_header_t));
+
+ /* And output the packet on the correct interface */
+ f = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, f);
+ n->last_sent = vlib_time_now (vm);
+ }
+}
+
+/*
+ * send a cdp pkt on an hdlc interface
+ */
+static void
+send_hdlc_hello (cdp_main_t * cm, cdp_neighbor_t * n, int count)
+{
+ u32 *to_next;
+ hdlc_and_cdp_header_t *h0;
+ vnet_hw_interface_t *hw;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+ u16 checksum;
+ int nbytes_to_checksum;
+ int i;
+ vlib_frame_t *f;
+ vlib_main_t *vm = cm->vlib_main;
+ vnet_main_t *vnm = cm->vnet_main;
+
+ for (i = 0; i < count; i++)
+ {
+ /*
+ * see cdp_periodic_init() to understand what's already painted
+ * into the buffer by the packet template mechanism
+ */
+ h0 = vlib_packet_template_get_packet
+ (vm, &cm->packet_templates[n->packet_template_index], &bi0);
+
+ hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
+
+ t0 = (u8 *) & h0->cdp.data;
+
+ /* add TLVs */
+ add_tlvs (cm, hw, &t0);
+
+ /* add the cdp packet checksum */
+ nbytes_to_checksum = t0 - (u8 *) & h0->cdp;
+ checksum = cdp_checksum (&h0->cdp, nbytes_to_checksum);
+ h0->cdp.checksum = htons (checksum);
+
+ /* Set the outbound packet length */
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_length = nbytes_to_checksum + sizeof (*h0)
+ - sizeof (cdp_hdr_t);
+
+ /* And output the packet on the correct interface */
+ f = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, f);
+ n->last_sent = vlib_time_now (vm);
+ }
+}
+
+/*
+ * send a cdp pkt on an srp interface
+ */
+static void
+send_srp_hello (cdp_main_t * cm, cdp_neighbor_t * n, int count)
+{
+ u32 *to_next;
+ srp_and_cdp_header_t *h0;
+ vnet_hw_interface_t *hw;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+ u16 checksum;
+ int nbytes_to_checksum;
+ int i;
+ vlib_frame_t *f;
+ vlib_main_t *vm = cm->vlib_main;
+ vnet_main_t *vnm = cm->vnet_main;
+
+ for (i = 0; i < count; i++)
+ {
+ /*
+ * see cdp_periodic_init() to understand what's already painted
+ * into the buffer by the packet template mechanism
+ */
+ h0 = vlib_packet_template_get_packet
+ (vm, &cm->packet_templates[n->packet_template_index], &bi0);
+
+ hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
+
+ t0 = (u8 *) & h0->cdp.data;
+
+ /* add TLVs */
+ add_tlvs (cm, hw, &t0);
+
+ /* Add the interface's ethernet source address */
+ clib_memcpy (h0->ethernet.src_address, hw->hw_address,
+ vec_len (hw->hw_address));
+
+ /* add the cdp packet checksum */
+ nbytes_to_checksum = t0 - (u8 *) & h0->cdp;
+ checksum = cdp_checksum (&h0->cdp, nbytes_to_checksum);
+ h0->cdp.checksum = htons (checksum);
+
+ /* Set the outbound packet length */
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_length = nbytes_to_checksum + sizeof (*h0)
+ - sizeof (cdp_hdr_t);
+
+ /* And output the packet on the correct interface */
+ f = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, f);
+ n->last_sent = vlib_time_now (vm);
+ }
+}
+
+/*
+ * Decide which cdp packet template to use
+ */
+static int
+pick_packet_template (cdp_main_t * cm, cdp_neighbor_t * n)
+{
+ n->packet_template_index = CDP_PACKET_TEMPLATE_ETHERNET;
+
+ return 0;
+}
+
+/* Send a cdp neighbor announcement */
+static void
+send_hello (cdp_main_t * cm, cdp_neighbor_t * n, int count)
+{
+ if (n->packet_template_index == (u8) ~ 0)
+ {
+ /* If we don't know how to talk to this peer, don't try again */
+ if (pick_packet_template (cm, n))
+ {
+ n->last_sent = 1e70;
+ return;
+ }
+ }
+
+ switch (n->packet_template_index)
+ {
+ case CDP_PACKET_TEMPLATE_ETHERNET:
+ send_ethernet_hello (cm, n, count);
+ break;
+
+ case CDP_PACKET_TEMPLATE_HDLC:
+ send_hdlc_hello (cm, n, count);
+ break;
+
+ case CDP_PACKET_TEMPLATE_SRP:
+ send_srp_hello (cm, n, count);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ n->last_sent = vlib_time_now (cm->vlib_main);
+}
+
+static void
+delete_neighbor (cdp_main_t * cm, cdp_neighbor_t * n, int want_broadcast)
+{
+ hash_unset (cm->neighbor_by_sw_if_index, n->sw_if_index);
+ vec_free (n->device_name);
+ vec_free (n->version);
+ vec_free (n->port_id);
+ vec_free (n->platform);
+ vec_free (n->last_rx_pkt);
+ pool_put (cm->neighbors, n);
+}
+
+void
+cdp_periodic (vlib_main_t * vm)
+{
+ cdp_main_t *cm = &cdp_main;
+ cdp_neighbor_t *n;
+ f64 now = vlib_time_now (vm);
+ vnet_sw_interface_t *sw;
+ static u32 *delete_list = 0;
+ int i;
+ static cdp_neighbor_t **n_list = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, cm->neighbors,
+ ({
+ vec_add1 (n_list, n);
+ }));
+ /* *INDENT-ON* */
+
+ /* Across all cdp neighbors known to the system */
+ for (i = 0; i < vec_len (n_list); i++)
+ {
+ n = n_list[i];
+
+ /* "no cdp run" provisioned on the interface? */
+ if (n->disabled == 1)
+ continue;
+
+ sw = vnet_get_sw_interface (cm->vnet_main, n->sw_if_index);
+
+ /* Interface shutdown or rx timeout? */
+ if (!(sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ || (now > (n->last_heard + (f64) n->ttl_in_seconds)))
+ /* add to list of neighbors to delete */
+ vec_add1 (delete_list, n - cm->neighbors);
+ else if (n->last_sent == 0.0)
+ /* First time, send 3 hellos */
+ send_hello (cm, n, 3 /* three to begin with */ );
+ else if (now > (n->last_sent + (((f64) n->ttl_in_seconds) / 6.0)))
+ /* Normal keepalive, send one */
+ send_hello (cm, n, 1 /* one as a keepalive */ );
+ }
+
+ for (i = 0; i < vec_len (delete_list); i++)
+ {
+ n = vec_elt_at_index (cm->neighbors, delete_list[i]);
+ delete_neighbor (cm, n, 1);
+ }
+ if (delete_list)
+ _vec_len (delete_list) = 0;
+ if (n_list)
+ _vec_len (n_list) = 0;
+}
+
+static clib_error_t *
+cdp_periodic_init (vlib_main_t * vm)
+{
+ cdp_main_t *cm = &cdp_main;
+
+ /* Create the ethernet cdp hello packet template */
+ {
+ ethernet_llc_snap_and_cdp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Send to 01:00:0c:cc:cc */
+ h.ethernet.dst_address[0] = 0x01;
+ /* h.ethernet.dst_address[1] = 0x00; (memset) */
+ h.ethernet.dst_address[2] = 0x0C;
+ h.ethernet.dst_address[3] = 0xCC;
+ h.ethernet.dst_address[4] = 0xCC;
+ h.ethernet.dst_address[5] = 0xCC;
+
+ /* leave src address blank (fill in at send time) */
+
+ /* leave length blank (fill in at send time) */
+
+ /* LLC */
+ h.llc.dst_sap = h.llc.src_sap = 0xAA; /* SNAP */
+ h.llc.control = 0x03; /* UI (no extended control bytes) */
+
+ /* SNAP */
+ /* h.snap.oui[0] = 0x00; (memset) */
+ /* h.snap.oui[1] = 0x00; (memset) */
+ h.snap.oui[2] = 0x0C; /* Cisco = 0x00000C */
+ h.snap.protocol = htons (0x2000); /* CDP = 0x2000 */
+
+ /* CDP */
+ h.cdp.version = 2;
+ h.cdp.ttl = 180;
+
+ vlib_packet_template_init
+ (vm, &cm->packet_templates[CDP_PACKET_TEMPLATE_ETHERNET],
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "cdp-ethernet");
+ }
+
+#if 0 /* retain for reference */
+
+ /* Create the hdlc cdp hello packet template */
+ {
+ hdlc_and_cdp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ h.hdlc.address = 0x0f;
+ /* h.hdlc.control = 0; (memset) */
+ h.hdlc.protocol = htons (0x2000); /* CDP = 0x2000 */
+
+ /* CDP */
+ h.cdp.version = 2;
+ h.cdp.ttl = 180;
+
+ vlib_packet_template_init
+ (vm, &cm->packet_templates[CDP_PACKET_TEMPLATE_HDLC],
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "cdp-hdlc");
+ }
+
+ /* Create the srp cdp hello packet template */
+ {
+ srp_and_cdp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Send to 01:00:0c:cc:cc */
+ h.ethernet.dst_address[0] = 0x01;
+ /* h.ethernet.dst_address[1] = 0x00; (memset) */
+ h.ethernet.dst_address[2] = 0x0C;
+ h.ethernet.dst_address[3] = 0xCC;
+ h.ethernet.dst_address[4] = 0xCC;
+ h.ethernet.dst_address[5] = 0xCC;
+
+ /* leave src address blank (fill in at send time) */
+
+ /* The srp header is filled in at xmt */
+ h.srp.ttl = 1;
+ h.srp.priority = 7;
+ h.srp.mode = SRP_MODE_data;
+ srp_header_compute_parity (&h.srp);
+
+ /* Inner ring and parity will be set at send time */
+
+ h.ethernet.type = htons (0x2000); /* CDP = 0x2000 */
+
+ /* CDP */
+ h.cdp.version = 2;
+ h.cdp.ttl = 180;
+
+ vlib_packet_template_init
+ (vm, &cm->packet_templates[CDP_PACKET_TEMPLATE_SRP],
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "cdp-srp");
+ }
+#endif
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (cdp_periodic_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cdp/cdp_protocol.h b/src/vnet/cdp/cdp_protocol.h
new file mode 100644
index 00000000..dc6c66d5
--- /dev/null
+++ b/src/vnet/cdp/cdp_protocol.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_cdp_protocol_h__
+#define __included_cdp_protocol_h__
+
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/llc/llc.h>
+#include <vnet/snap/snap.h>
+#include <vnet/srp/packet.h>
+
+typedef CLIB_PACKED (struct
+ {
+ u8 version;
+ u8 ttl;
+ u16 checksum; /* 1's complement of the 1's complement sum */
+ u8 data[0];
+ }) cdp_hdr_t;
+
+typedef struct
+{
+ u8 dst_address[6];
+ u8 src_address[6];
+ u16 len;
+} ethernet_802_3_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ethernet_802_3_header_t ethernet;
+ llc_header_t llc; snap_header_t snap; cdp_hdr_t cdp;
+ }) ethernet_llc_snap_and_cdp_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ hdlc_header_t hdlc; cdp_hdr_t cdp;
+ }) hdlc_and_cdp_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ srp_header_t srp;
+ ethernet_header_t ethernet; cdp_hdr_t cdp;
+ }) srp_and_cdp_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u16 t;
+ u16 l;
+ u8 v[0];
+ }) cdp_tlv_t;
+
+/*
+ * TLV codes.
+ */
+#define foreach_cdp_tlv_type \
+_(unused) \
+_(device_name) /* uniquely identifies the device */ \
+_(address) /* list of addresses this device has */ \
+_(port_id) /* port CDP packet was sent out on */ \
+_(capabilities) /* funct. capabilities of the device */ \
+_(version) /* version */ \
+_(platform) /* hardware platform of this device */ \
+_(ipprefix) /* An IP network prefix */ \
+_(hello) /* Pprotocol piggyback hello msg */ \
+_(vtp_domain) /* VTP management domain */ \
+_(native_vlan) /* Native VLAN number */ \
+_(duplex) /* The interface duplex mode */ \
+_(appl_vlan) /* Appliance VLAN-ID TLV */ \
+_(trigger) /* For sending trigger TLV msgs. */ \
+_(power) /* Power consumption of that device */ \
+_(mtu) /* MTU defined for sending intf. */ \
+_(trust) /* Extended trust TLV */ \
+_(cos) /* COS for Untrusted Port TLV */ \
+_(sysname) /* System name (FQDN of device) */ \
+_(sysobject) /* OID of sysObjectID MIB object */ \
+_(mgmt_addr) /* SNMP manageable addrs. of device */ \
+_(physical_loc) /* Physical Location of the device */ \
+_(mgmt_addr2) /* External Port-ID */ \
+_(power_requested) \
+_(power_available) \
+_(port_unidirectional) \
+_(unknown_28) \
+_(energywise) \
+_(unknown_30) \
+_(spare_poe)
+
+typedef enum
+{
+#define _(t) CDP_TLV_##t,
+ foreach_cdp_tlv_type
+#undef _
+} cdp_tlv_code_t;
+
+/*
+ The address TLV looks as follows:
+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Number of addresses |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | IDRP encoded address |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ An address is encoded in IDRP format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | PT | PT Length | Protocol (variable) ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Address length | Address (variable) ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ PT: Protocol type
+ 1 = NLPID format
+ 2 = 802.2 format
+
+ PT Length:
+ Length of protocol field, 1 for PT = 1, and either 3 or 8 for
+ 802.2 format depending if SNAP is used for PT = 2.
+
+ The encodings for the other protocols have the following format:
+
+ field: <SSAP><DSAP><CTRL><-------OUI------><protocl_TYPE>
+ | | | | | | | | |
+ bytes: 0 1 2 3 4 5 6 7 8
+
+ where the first 3 bytes are 0xAAAA03 for SNAP encoded addresses.
+ The OUI is 000000 for ethernet and <protocl_TYPE>
+ is the assigned Ethernet type code for the particular protocol.
+ e.g. for DECnet the encoding is AAAA03 000000 6003.
+ for IPv6 the encoding is AAAA03 000000 86DD
+*/
+
+/*
+ * Capabilities.
+ */
+
+#define CDP_ROUTER_DEVICE 0x0001
+#define CDP_TB_DEVICE 0x0002
+#define CDP_SRB_DEVICE 0x0004
+#define CDP_SWITCH_DEVICE 0x0008
+#define CDP_HOST_DEVICE 0x0010
+#define CDP_IGMP_DEVICE 0x0020
+#define CDP_REPEATER_DEVICE 0x0040
+
+/*
+ The protocol-hello TLV looks as follows:
+
+ 0 1 2 3
+ 012345678901234567890123456789012345678
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | OUI |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Protocol ID |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | up to 27 bytes of message |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ * These macros define the valid values for the Duplex TLV.
+ */
+#define CDP_DUPLEX_TLV_HALF 0x0
+#define CDP_DUPLEX_TLV_FULL 0x1
+
+#endif /* __included_cdp_protocol_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/classify/README b/src/vnet/classify/README
new file mode 100644
index 00000000..1ef5ab5a
--- /dev/null
+++ b/src/vnet/classify/README
@@ -0,0 +1,180 @@
+=== vnet classifier theory of operation ===
+
+The vnet classifier trades off simplicity and perf / scale
+characteristics. At a certain level, it's a dumb robot. Given an
+incoming packet, search an ordered list of (mask, match) tables. If
+the classifier finds a matching entry, take the indicated action. If
+not, take a last-resort action.
+
+We use the MMX-unit to match or hash 16 octets at a time. For hardware
+backward compatibility, the code does not [currently] use 256-bit
+(32-octet) vector instructions.
+
+Effective use of the classifier centers around building table lists
+which "hit" as soon as practicable. In many cases, established
+sessions hit in the first table. In this mode of operation, the
+classifier easily processes multiple MPPS / core - even with millions
+of sessions in the data base. Searching 357 tables on a regular basis
+will neatly solve the halting problem.
+
+==== Basic operation ====
+
+The classifier mask-and-match operation proceeds as follows. Given a
+starting classifier table index, lay hands on the indicated mask
+vector. When building tables, we arrange for the mask to obey
+mmx-unit (16-octet) alignment.
+
+We know that the first octet of packet data starts on a cache-line
+boundary. Further, it's reasonably likely that folks won't want to use
+the generalized classifier on the L2 header; preferring to decode the
+Ethertype manually. That scheme makes it easy to select among ip4 /
+ip6 / MPLS, etc. classifier table sets.
+
+A no-vlan-tag L2 header is 14 octets long. A typical ipv4 header
+begins with the octets 0x4500: version=4, header_length=5, DSCP=0,
+ECN=0. If one doesn't intend to classify on (DSCP, ECN) - the typical
+case - we program the classifier to skip the first 16-octet vector.
+
+To classify untagged ipv4 packets on source address, we program the
+classifier to skip one vector, and mask-and-match one vector.
+
+The basic match-and-match operation looks like this:
+
+ switch (t->match_n_vectors)
+ {
+ case 1:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ break;
+
+ case 2:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ break;
+
+ <etc>
+ }
+
+ result_mask = u32x4_zero_byte_mask (result);
+ if (result_mask == 0xffff)
+ return (v);
+
+Net of setup, it costs a couple of clock cycles to mask-and-match 16
+octets.
+
+At the risk of belaboring an obvious point, the control-plane
+'''must''' pay attention to detail. When skipping one (or more)
+vectors, masks and matches must reflect that decision. See
+.../vnet/vnet/classify/vnet_classify.c:unformat_classify_[mask|match]. Note
+that vec_validate (xxx, 13) creates a 14-element vector.
+
+==== Creating a classifier table ====
+
+To create a new classifier table via the control-plane API, send a
+"classify_add_del_table" message. The underlying action routine,
+vnet_classify_add_del_table(...), is located in
+.../vnet/vnet/classify/vnet_classify.c, and has the following
+prototype:
+
+ int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ int is_add)
+
+Pass cm = &vnet_classify_main if calling this routine directly. Mask,
+skip(_n_vectors) and match(_n_vectors) are as described above. Mask
+need not be aligned, but it must be match*16 octets in length. To
+avoid having your head explode, be absolutely certain that '''only'''
+the bits you intend to match on are set.
+
+The classifier uses thread-safe, no-reader-locking-required
+bounded-index extensible hashing. Nbuckets is the [fixed] size of the
+hash bucket vector. The algorithm works in constant time regardless of
+hash collisions, but wastes space when the bucket array is too
+small. A good rule of thumb: let nbuckets = approximate number of
+entries expected.
+
+At a signficant cost in complexity, it would be possible to resize the
+bucket array dynamically. We have no plans to implement that function.
+
+Each classifier table has its own clib mheap memory allocation
+arena. To pick the memory_size parameter, note that each classifier
+table entry needs 16*(1 + match_n_vectors) bytes. Within reason, aim a
+bit high. Clib mheap memory uses o/s level virtual memory - not wired
+or hugetlb memory - so it's best not to scrimp on size.
+
+The "next_table_index" parameter is as described: the pool index in
+vnet_classify_main.tables of the next table to search. Code ~0 to
+indicate the end of the table list. 0 is a valid table index!
+
+We often create classification tables in reverse order -
+last-table-searched to first-table-searched - so we can easily set
+this parameter. Of course, one can manually adjust the data structure
+after-the-fact.
+
+Specific classifier client nodes - for example,
+.../vnet/vnet/classify/ip_classify.c - interpret the "miss_next_index"
+parameter as a vpp graph-node next index. When packet classification
+fails to produce a match, ip_classify_inline sends packets to the
+indicated disposition. A classifier application might program this
+parameter to send packets which don't match an existing session to a
+"first-sign-of-life, create-new-session" node.
+
+Finally, the is_add parameter indicates whether to add or delete the
+indicated table. The delete case implicitly terminates all sessions
+with extreme prejudice, by freeing the specified clib mheap.
+
+==== Creating a classifier session ====
+
+To create a new classifier session via the control-plane API, send a
+"classify_add_del_session" message. The underlying action routine,
+vnet_classify_add_del_session(...), is located in
+.../vnet/vnet/classify/vnet_classify.c, and has the following
+prototype:
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ int is_add)
+
+Pass cm = &vnet_classify_main if calling this routine directly. Table
+index specifies the table which receives the new session / contains
+the session to delete depending on is_add.
+
+Match is the key for the indicated session. It need not be aligned,
+but it must be table->match_n_vectors*16 octets in length. As a
+courtesy, vnet_classify_add_del_session applies the table's mask to
+the stored key-value. In this way, one can create a session by passing
+unmasked (packet_data + offset) as the "match" parameter, and end up
+with unconfusing session keys.
+
+Specific classifier client nodes - for example,
+.../vnet/vnet/classify/ip_classify.c - interpret the per-session
+hit_next_index parameter as a vpp graph-node next index. When packet
+classification produces a match, ip_classify_inline sends packets to
+the indicated disposition.
+
+ip4/6_classify place the per-session opaque_index parameter into
+vnet_buffer(b)->l2_classify.opaque_index; a slight misnomer, but
+anyhow classifier applications can send session-hit packets to
+specific graph nodes, with useful values in buffer metadata. Depending
+on the required semantics, we send known-session traffic to a certain
+node, with e.g. a session pool index in buffer metadata. It's totally
+up to the control-plane and the specific use-case.
+
+Finally, nodes such as ip4/6-classify apply the advance parameter as a
+[signed!] argument to vlib_buffer_advance(...); to "consume" a
+networking layer. Example: if we classify incoming tunneled IP packets
+by (inner) source/dest address and source/dest port, we might choose
+to decapsulate and reencapsulate the inner packet. In such a case,
+program the advance parameter to perform the tunnel decapsulation, and
+program next_index to send traffic to a node which uses
+e.g. opaque_index to output traffic on a specific tunnel interface.
diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api
new file mode 100644
index 00000000..cacb9bed
--- /dev/null
+++ b/src/vnet/classify/classify.api
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Add/Delete classification table request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add- if non-zero add the table, else delete it
+ @param del_chain - if non-zero delete the whole chain of tables
+ @param table_index - if add, reuturns index of the created table, else specifies the table to delete
+ @param nbuckets - number of buckets when adding a table
+ @param memory_size - memory size when adding a table
+ @param match_n_vectors - number of match vectors
+ @param next_table_index - index of next table
+ @param miss_next_index - index of miss table
+ @param current_data_flag - option to use current node's packet payload
+ as the starting point from where packets are classified,
+ This option is only valid for L2/L3 input ACL for now.
+ 0: by default, classify data from the buffer's start location
+ 1: classify packets from VPP node’s current data pointer
+ @param current_data_offset - a signed value to shift the start location of
+ the packet to be classified
+ For example, if input IP ACL node is used, L2 header’s first byte
+ can be accessible by configuring current_data_offset to -14
+ if there is no vlan tag.
+ This is valid only if current_data_flag is set to 1.
+ @param mask[] - match mask
+*/
+define classify_add_del_table
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 del_chain;
+ u32 table_index;
+ u32 nbuckets;
+ u32 memory_size;
+ u32 skip_n_vectors;
+ u32 match_n_vectors;
+ u32 next_table_index;
+ u32 miss_next_index;
+ u32 current_data_flag;
+ i32 current_data_offset;
+ u8 mask[0];
+};
+
+/** \brief Add/Delete classification table response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the table add/del requst
+ @param new_table_index - for add, returned index of the new table
+ @param skip_n_vectors - for add, returned value of skip_n_vectors in table
+ @param match_n_vectors -for add, returned value of match_n_vectors in table
+*/
+define classify_add_del_table_reply
+{
+ u32 context;
+ i32 retval;
+ u32 new_table_index;
+ u32 skip_n_vectors;
+ u32 match_n_vectors;
+};
+
+/** \brief Classify add / del session request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add session if non-zero, else delete
+ @param table_index - index of the table to add/del the session, required
+ @param hit_next_index - for add, hit_next_index of new session, required
+ @param opaque_index - for add, opaque_index of new session
+ @param advance -for add, advance value for session
+ @param action -
+ 0: no action (by default)
+ metadata is not used.
+ 1: Classified IP packets will be looked up from the
+ specified ipv4 fib table (configured by metadata as VRF id).
+ Only valid for L3 input ACL node
+ 2: Classified IP packets will be looked up from the
+ specified ipv6 fib table (configured by metadata as VRF id).
+ Only valid for L3 input ACL node
+ @param metadata - valid only if action != 0
+ VRF id if action is 1 or 2.
+ @param match[] - for add, match value for session, required
+*/
+autoreply define classify_add_del_session
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 table_index;
+ u32 hit_next_index;
+ u32 opaque_index;
+ i32 advance;
+ u8 action;
+ u32 metadata;
+ u8 match[0];
+};
+
+/** \brief Set/unset policer classify interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set/unset policer classify
+ @param ip4_table_index - ip4 classify table index (~0 for skip)
+ @param ip6_table_index - ip6 classify table index (~0 for skip)
+ @param l2_table_index - l2 classify table index (~0 for skip)
+ @param is_add - Set if non-zero, else unset
+ Note: User is recommeneded to use just one valid table_index per call.
+ (ip4_table_index, ip6_table_index, or l2_table_index)
+*/
+autoreply define policer_classify_set_interface
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u32 l2_table_index;
+ u8 is_add;
+};
+
+/** \brief Get list of policer classify interfaces and tables
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param type - classify table type
+*/
+define policer_classify_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 type;
+};
+
+/** \brief Policer iclassify operational state response.
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software interface index
+ @param table_index - classify table index
+*/
+define policer_classify_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u32 table_index;
+};
+
+/** \brief Classify get table IDs request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define classify_table_ids
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for classify get table IDs request
+ @param context - sender context which was passed in the request
+ @param count - number of ids returned in response
+ @param ids - array of classify table ids
+*/
+define classify_table_ids_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ u32 ids[count];
+};
+
+/** \brief Classify table ids by interface index request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface
+*/
+define classify_table_by_interface
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Reply for classify table id by interface index request
+ @param context - sender context which was passed in the request
+ @param count - number of ids returned in response
+ @param sw_if_index - index of the interface
+ @param l2_table_id - l2 classify table index
+ @param ip4_table_id - ip4 classify table index
+ @param ip6_table_id - ip6 classify table index
+*/
+define classify_table_by_interface_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+ u32 l2_table_id;
+ u32 ip4_table_id;
+ u32 ip6_table_id;
+};
+
+/** \brief Classify table info
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_id - classify table index
+*/
+define classify_table_info
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+};
+
+/** \brief Reply for classify table info request
+ @param context - sender context which was passed in the request
+ @param count - number of ids returned in response
+ @param table_id - classify table index
+ @param nbuckets - number of buckets when adding a table
+ @param match_n_vectors - number of match vectors
+ @param skip_n_vectors - number of skip_n_vectors
+ @param active_sessions - number of sessions (active entries)
+ @param next_table_index - index of next table
+ @param miss_next_index - index of miss table
+ @param mask[] - match mask
+*/
+define classify_table_info_reply
+{
+ u32 context;
+ i32 retval;
+ u32 table_id;
+ u32 nbuckets;
+ u32 match_n_vectors;
+ u32 skip_n_vectors;
+ u32 active_sessions;
+ u32 next_table_index;
+ u32 miss_next_index;
+ u32 mask_length;
+ u8 mask[mask_length];
+};
+
+/** \brief Classify sessions dump request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_id - classify table index
+*/
+define classify_session_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+};
+
+/** \brief Reply for classify table session dump request
+ @param context - sender context which was passed in the request
+ @param count - number of ids returned in response
+ @param table_id - classify table index
+ @param hit_next_index - hit_next_index of session
+ @param opaque_index - for add, opaque_index of session
+ @param advance - advance value of session
+ @param match[] - match value for session
+*/
+define classify_session_details
+{
+ u32 context;
+ i32 retval;
+ u32 table_id;
+ u32 hit_next_index;
+ i32 advance;
+ u32 opaque_index;
+ u32 match_length;
+ u8 match[match_length];
+};
+
+/** \brief Set/unset flow classify interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set/unset flow classify
+ @param ip4_table_index - ip4 classify table index (~0 for skip)
+ @param ip6_table_index - ip6 classify table index (~0 for skip)
+ @param l2_table_index - l2 classify table index (~0 for skip)
+ @param is_add - Set if non-zero, else unset
+ Note: User is recommeneded to use just one valid table_index per call.
+ (ip4_table_index, ip6_table_index, or l2_table_index)
+*/
+autoreply define flow_classify_set_interface {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u8 is_add;
+};
+
+/** \brief Get list of flow classify interfaces and tables
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param type - classify table type
+*/
+define flow_classify_dump {
+ u32 client_index;
+ u32 context;
+ u8 type;
+};
+
+/** \brief Flow classify operational state response.
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software interface index
+ @param table_index - classify table index
+*/
+define flow_classify_details {
+ u32 context;
+ u32 sw_if_index;
+ u32 table_index;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c
new file mode 100644
index 00000000..24c7a2b9
--- /dev/null
+++ b/src/vnet/classify/classify_api.c
@@ -0,0 +1,547 @@
+/*
+ *------------------------------------------------------------------
+ * classify_api.c - classify api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+#include <vnet/classify/policer_classify.h>
+#include <vnet/classify/flow_classify.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \
+_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \
+_(CLASSIFY_TABLE_IDS,classify_table_ids) \
+_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \
+_(CLASSIFY_TABLE_INFO,classify_table_info) \
+_(CLASSIFY_SESSION_DUMP,classify_session_dump) \
+_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \
+_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \
+_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \
+_(FLOW_CLASSIFY_DUMP, flow_classify_dump)
+
+#define foreach_classify_add_del_table_field \
+_(table_index) \
+_(nbuckets) \
+_(memory_size) \
+_(skip_n_vectors) \
+_(match_n_vectors) \
+_(next_table_index) \
+_(miss_next_index) \
+_(current_data_flag) \
+_(current_data_offset)
+
+static void vl_api_classify_add_del_table_t_handler
+ (vl_api_classify_add_del_table_t * mp)
+{
+ vl_api_classify_add_del_table_reply_t *rmp;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ vnet_classify_table_t *t;
+ int rv;
+
+#define _(a) u32 a;
+ foreach_classify_add_del_table_field;
+#undef _
+
+#define _(a) a = ntohl(mp->a);
+ foreach_classify_add_del_table_field;
+#undef _
+
+ /* The underlying API fails silently, on purpose, so check here */
+ if (mp->is_add == 0) /* delete */
+ {
+ if (pool_is_free_index (cm->tables, table_index))
+ {
+ rv = VNET_API_ERROR_NO_SUCH_TABLE;
+ goto out;
+ }
+ }
+ else /* add or update */
+ {
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ table_index = ~0;
+ }
+
+ rv = vnet_classify_add_del_table
+ (cm, mp->mask, nbuckets, memory_size,
+ skip_n_vectors, match_n_vectors,
+ next_table_index, miss_next_index, &table_index,
+ current_data_flag, current_data_offset, mp->is_add, mp->del_chain);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY,
+ ({
+ if (rv == 0 && mp->is_add)
+ {
+ t = pool_elt_at_index (cm->tables, table_index);
+ rmp->skip_n_vectors = ntohl(t->skip_n_vectors);
+ rmp->match_n_vectors = ntohl(t->match_n_vectors);
+ rmp->new_table_index = ntohl(table_index);
+ }
+ else
+ {
+ rmp->skip_n_vectors = ~0;
+ rmp->match_n_vectors = ~0;
+ rmp->new_table_index = ~0;
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void vl_api_classify_add_del_session_t_handler
+ (vl_api_classify_add_del_session_t * mp)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ vl_api_classify_add_del_session_reply_t *rmp;
+ int rv;
+ u32 table_index, hit_next_index, opaque_index, metadata;
+ i32 advance;
+ u8 action;
+
+ table_index = ntohl (mp->table_index);
+ hit_next_index = ntohl (mp->hit_next_index);
+ opaque_index = ntohl (mp->opaque_index);
+ advance = ntohl (mp->advance);
+ action = mp->action;
+ metadata = ntohl (mp->metadata);
+
+ rv = vnet_classify_add_del_session
+ (cm, table_index, mp->match, hit_next_index, opaque_index,
+ advance, action, metadata, mp->is_add);
+
+ REPLY_MACRO (VL_API_CLASSIFY_ADD_DEL_SESSION_REPLY);
+}
+
+static void
+ vl_api_policer_classify_set_interface_t_handler
+ (vl_api_policer_classify_set_interface_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_classify_set_interface_reply_t *rmp;
+ int rv;
+ u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index;
+
+ ip4_table_index = ntohl (mp->ip4_table_index);
+ ip6_table_index = ntohl (mp->ip6_table_index);
+ l2_table_index = ntohl (mp->l2_table_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = vnet_set_policer_classify_intfc (vm, sw_if_index, ip4_table_index,
+ ip6_table_index, l2_table_index,
+ mp->is_add);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_POLICER_CLASSIFY_SET_INTERFACE_REPLY);
+}
+
+static void
+send_policer_classify_details (u32 sw_if_index,
+ u32 table_index,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_policer_classify_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_POLICER_CLASSIFY_DETAILS);
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->table_index = htonl (table_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_policer_classify_dump_t_handler (vl_api_policer_classify_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ policer_classify_main_t *pcm = &policer_classify_main;
+ u32 *vec_tbl;
+ int i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type];
+
+ if (vec_len (vec_tbl))
+ {
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt (vec_tbl, i) == ~0)
+ continue;
+
+ send_policer_classify_details (i, vec_elt (vec_tbl, i), q,
+ mp->context);
+ }
+ }
+}
+
+static void
+vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ vnet_classify_table_t *t;
+ u32 *table_ids = 0;
+ u32 count;
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, cm->tables,
+ ({
+ vec_add1 (table_ids, ntohl(t - cm->tables));
+ }));
+ /* *INDENT-ON* */
+ count = vec_len (table_ids);
+
+ vl_api_classify_table_ids_reply_t *rmp;
+ rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32));
+ rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_IDS_REPLY);
+ rmp->context = mp->context;
+ rmp->count = ntohl (count);
+ clib_memcpy (rmp->ids, table_ids, count * sizeof (u32));
+ rmp->retval = 0;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+
+ vec_free (table_ids);
+}
+
+static void
+ vl_api_classify_table_by_interface_t_handler
+ (vl_api_classify_table_by_interface_t * mp)
+{
+ vl_api_classify_table_by_interface_reply_t *rmp;
+ int rv = 0;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 *acl = 0;
+
+ vec_validate (acl, INPUT_ACL_N_TABLES - 1);
+ vec_set (acl, ~0);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ input_acl_main_t *am = &input_acl_main;
+
+ int if_idx;
+ u32 type;
+
+ for (type = 0; type < INPUT_ACL_N_TABLES; type++)
+ {
+ u32 *vec_tbl = am->classify_table_index_by_sw_if_index[type];
+ if (vec_len (vec_tbl))
+ {
+ for (if_idx = 0; if_idx < vec_len (vec_tbl); if_idx++)
+ {
+ if (vec_elt (vec_tbl, if_idx) == ~0 || sw_if_index != if_idx)
+ {
+ continue;
+ }
+ acl[type] = vec_elt (vec_tbl, if_idx);
+ }
+ }
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY,
+ ({
+ rmp->sw_if_index = ntohl(sw_if_index);
+ rmp->l2_table_id = ntohl(acl[INPUT_ACL_TABLE_L2]);
+ rmp->ip4_table_id = ntohl(acl[INPUT_ACL_TABLE_IP4]);
+ rmp->ip6_table_id = ntohl(acl[INPUT_ACL_TABLE_IP6]);
+ }));
+ /* *INDENT-ON* */
+ vec_free (acl);
+}
+
+static void
+vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vl_api_classify_table_info_reply_t *rmp = 0;
+
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 table_id = ntohl (mp->table_id);
+ vnet_classify_table_t *t;
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, cm->tables,
+ ({
+ if (table_id == t - cm->tables)
+ {
+ rmp = vl_msg_api_alloc_as_if_client
+ (sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4));
+ rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY);
+ rmp->context = mp->context;
+ rmp->table_id = ntohl(table_id);
+ rmp->nbuckets = ntohl(t->nbuckets);
+ rmp->match_n_vectors = ntohl(t->match_n_vectors);
+ rmp->skip_n_vectors = ntohl(t->skip_n_vectors);
+ rmp->active_sessions = ntohl(t->active_elements);
+ rmp->next_table_index = ntohl(t->next_table_index);
+ rmp->miss_next_index = ntohl(t->miss_next_index);
+ rmp->mask_length = ntohl(t->match_n_vectors * sizeof (u32x4));
+ clib_memcpy(rmp->mask, t->mask, t->match_n_vectors * sizeof(u32x4));
+ rmp->retval = 0;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (rmp == 0)
+ {
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TABLE_INFO_REPLY));
+ rmp->context = mp->context;
+ rmp->retval = ntohl (VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+send_classify_session_details (unix_shared_memory_queue_t * q,
+ u32 table_id,
+ u32 match_length,
+ vnet_classify_entry_t * e, u32 context)
+{
+ vl_api_classify_session_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_SESSION_DETAILS);
+ rmp->context = context;
+ rmp->table_id = ntohl (table_id);
+ rmp->hit_next_index = ntohl (e->next_index);
+ rmp->advance = ntohl (e->advance);
+ rmp->opaque_index = ntohl (e->opaque_index);
+ rmp->match_length = ntohl (match_length);
+ clib_memcpy (rmp->match, e->key, match_length);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ unix_shared_memory_queue_t *q;
+
+ u32 table_id = ntohl (mp->table_id);
+ vnet_classify_table_t *t;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, cm->tables,
+ ({
+ if (table_id == t - cm->tables)
+ {
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * v, * save_v;
+ int i, j, k;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ continue;
+
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+ v = vnet_classify_entry_at_index
+ (t, save_v, j*t->entries_per_page + k);
+ if (vnet_classify_entry_is_free (v))
+ continue;
+
+ send_classify_session_details
+ (q, table_id, t->match_n_vectors * sizeof (u32x4),
+ v, mp->context);
+ }
+ }
+ }
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_flow_classify_set_interface_t_handler
+ (vl_api_flow_classify_set_interface_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_flow_classify_set_interface_reply_t *rmp;
+ int rv;
+ u32 sw_if_index, ip4_table_index, ip6_table_index;
+
+ ip4_table_index = ntohl (mp->ip4_table_index);
+ ip6_table_index = ntohl (mp->ip6_table_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = vnet_set_flow_classify_intfc (vm, sw_if_index, ip4_table_index,
+ ip6_table_index, mp->is_add);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_FLOW_CLASSIFY_SET_INTERFACE_REPLY);
+}
+
+static void
+send_flow_classify_details (u32 sw_if_index,
+ u32 table_index,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_flow_classify_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_FLOW_CLASSIFY_DETAILS);
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->table_index = htonl (table_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_flow_classify_dump_t_handler (vl_api_flow_classify_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ flow_classify_main_t *pcm = &flow_classify_main;
+ u32 *vec_tbl;
+ int i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type];
+
+ if (vec_len (vec_tbl))
+ {
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt (vec_tbl, i) == ~0)
+ continue;
+
+ send_flow_classify_details (i, vec_elt (vec_tbl, i), q,
+ mp->context);
+ }
+ }
+}
+
+/*
+ * classify_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_classify;
+#undef _
+}
+
+static clib_error_t *
+classify_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (classify_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c
new file mode 100644
index 00000000..0a624204
--- /dev/null
+++ b/src/vnet/classify/flow_classify.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/classify/flow_classify.h>
+
+static void
+vnet_flow_classify_feature_enable (vlib_main_t * vnm,
+ flow_classify_main_t * fcm,
+ u32 sw_if_index,
+ flow_classify_table_id_t tid,
+ int feature_enable)
+{
+ vnet_feature_config_main_t *vfcm;
+ u8 arc;
+
+ if (tid == FLOW_CLASSIFY_TABLE_IP4)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-flow-classify",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip4-unicast");
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-flow-classify",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip6-unicast");
+ }
+
+ vfcm = vnet_get_feature_arc_config_main (arc);
+ fcm->vnet_config_main[tid] = &vfcm->config_main;
+}
+
+int vnet_set_flow_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 is_add)
+{
+ flow_classify_main_t * fcm = &flow_classify_main;
+ vnet_classify_main_t * vcm = fcm->vnet_classify_main;
+ u32 pct[FLOW_CLASSIFY_N_TABLES] = {ip4_table_index, ip6_table_index};
+ u32 ti;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ for (ti = 0; ti < FLOW_CLASSIFY_N_TABLES; ti++)
+ {
+ if (pct[ti] == ~0)
+ continue;
+
+ if (pool_is_free_index (vcm->tables, pct[ti]))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ vec_validate_init_empty
+ (fcm->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0);
+
+ /* Reject any DEL operation with wrong sw_if_index */
+ if (!is_add &&
+ (pct[ti] != fcm->classify_table_index_by_sw_if_index[ti][sw_if_index]))
+ {
+ clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete",
+ sw_if_index, pct[ti]);
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ /* Return ok on ADD operaton if feature is already enabled */
+ if (is_add &&
+ fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0)
+ return 0;
+
+ vnet_flow_classify_feature_enable (vm, fcm, sw_if_index, ti, is_add);
+
+ if (is_add)
+ fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = pct[ti];
+ else
+ fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0;
+ }
+
+
+ return 0;
+}
+
+static clib_error_t *
+set_flow_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 is_add = 1;
+ u32 idx_cnt = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "interface %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ idx_cnt++;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ idx_cnt++;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface must be specified.");
+
+ if (!idx_cnt)
+ return clib_error_return (0, "Table index should be specified.");
+
+ if (idx_cnt > 1)
+ return clib_error_return (0, "Only one table index per API is allowed.");
+
+ rv = vnet_set_flow_classify_intfc(vm, sw_if_index, ip4_table_index,
+ ip6_table_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_input_acl_command, static) = {
+ .path = "set flow classify",
+ .short_help =
+ "set flow classify interface <int> [ip4-table <index>]\n"
+ " [ip6-table <index>] [del]",
+ .function = set_flow_classify_command_fn,
+};
+
+static uword
+unformat_table_type (unformat_input_t * input, va_list * va)
+{
+ u32 * r = va_arg (*va, u32 *);
+ u32 tid;
+
+ if (unformat (input, "ip4"))
+ tid = FLOW_CLASSIFY_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = FLOW_CLASSIFY_TABLE_IP6;
+ else
+ return 0;
+
+ *r = tid;
+ return 1;
+}
+static clib_error_t *
+show_flow_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flow_classify_main_t * fcm = &flow_classify_main;
+ u32 type = FLOW_CLASSIFY_N_TABLES;
+ u32 * vec_tbl;
+ int i;
+
+ if (unformat (input, "type %U", unformat_table_type, &type))
+ ;
+ else
+ return clib_error_return (0, "Type must be specified.");;
+
+ if (type == FLOW_CLASSIFY_N_TABLES)
+ return clib_error_return (0, "Invalid table type.");
+
+ vec_tbl = fcm->classify_table_index_by_sw_if_index[type];
+
+ if (vec_len(vec_tbl))
+ vlib_cli_output (vm, "%10s%20s\t\t%s", "Intfc idx", "Classify table",
+ "Interface name");
+ else
+ vlib_cli_output (vm, "No tables configured.");
+
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt(vec_tbl, i) == ~0)
+ continue;
+
+ vlib_cli_output (vm, "%10d%20d\t\t%U", i, vec_elt(vec_tbl, i),
+ format_vnet_sw_if_index_name, fcm->vnet_main, i);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_flow_classify_command, static) = {
+ .path = "show classify flow",
+ .short_help = "show classify flow type [ip4|ip6]",
+ .function = show_flow_classify_command_fn,
+};
diff --git a/src/vnet/classify/flow_classify.h b/src/vnet/classify/flow_classify.h
new file mode 100644
index 00000000..3ae04cd7
--- /dev/null
+++ b/src/vnet/classify/flow_classify.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_flow_classify_h__
+#define __included_vnet_flow_classify_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum {
+ FLOW_CLASSIFY_TABLE_IP4,
+ FLOW_CLASSIFY_TABLE_IP6,
+ FLOW_CLASSIFY_N_TABLES,
+} flow_classify_table_id_t;
+
+typedef enum {
+ FLOW_CLASSIFY_NEXT_INDEX_DROP,
+ FLOW_CLASSIFY_NEXT_INDEX_N_NEXT,
+} flow_classify_next_index_t;
+
+typedef struct {
+ /* Classifier table vectors */
+ u32 * classify_table_index_by_sw_if_index [FLOW_CLASSIFY_N_TABLES];
+
+ /* Convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ vnet_classify_main_t * vnet_classify_main;
+ vnet_config_main_t * vnet_config_main [FLOW_CLASSIFY_N_TABLES];
+} flow_classify_main_t;
+
+flow_classify_main_t flow_classify_main;
+
+int vnet_set_flow_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 is_add);
+
+#endif /* __included_vnet_flow_classify_h__ */
diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c
new file mode 100644
index 00000000..d3261d33
--- /dev/null
+++ b/src/vnet/classify/flow_classify_node.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/classify/flow_classify.h>
+#include <vnet/classify/vnet_classify.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} flow_classify_trace_t;
+
+static u8 *
+format_flow_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ flow_classify_trace_t * t = va_arg (*args, flow_classify_trace_t *);
+
+ s = format (s, "FLOW_CLASSIFY: sw_if_index %d next %d table %d offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+#define foreach_flow_classify_error \
+_(MISS, "Flow classify misses") \
+_(HIT, "Flow classify hits") \
+_(CHAIN_HIT, "Flow classify hits after chain walk") \
+_(DROP, "Flow classify action drop")
+
+typedef enum {
+#define _(sym,str) FLOW_CLASSIFY_ERROR_##sym,
+ foreach_flow_classify_error
+#undef _
+ FLOW_CLASSIFY_N_ERROR,
+} flow_classify_error_t;
+
+static char * flow_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_flow_classify_error
+#undef _
+};
+
+static inline uword
+flow_classify_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ flow_classify_table_id_t tid)
+{
+ u32 n_left_from, * from, * to_next;
+ flow_classify_next_index_t next_index;
+ flow_classify_main_t * fcm = &flow_classify_main;
+ vnet_classify_main_t * vcm = fcm->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ u32 drop = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ u8 * h0, * h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t * t0, * t1;
+
+ /* Prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = b1->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ vnet_buffer(b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer(b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ u8 * h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = FLOW_CLASSIFY_NEXT_INDEX_DROP;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+ u64 hash0;
+ u8 * h0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]);
+ vnet_classify_table_t * tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer(p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer(p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* Speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ vnet_get_config_data (fcm->vnet_config_main[tid],
+ &b0->current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ hits++;
+ }
+ else
+ {
+ misses++;
+ vnet_classify_add_del_session (vcm, table_index0,
+ h0, ~0, 0, 0, 0, 0, 1);
+ /* increment counter */
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ }
+ }
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ flow_classify_trace_t * t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (t0 && e0) ? vnet_classify_get_offset (t0, e0): ~0;
+ }
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ FLOW_CLASSIFY_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ FLOW_CLASSIFY_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ FLOW_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ FLOW_CLASSIFY_ERROR_DROP,
+ drop);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_flow_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return flow_classify_inline(vm, node, frame, FLOW_CLASSIFY_TABLE_IP4);
+}
+
+VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
+ .function = ip4_flow_classify,
+ .name = "ip4-flow-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flow_classify_trace,
+ .n_errors = ARRAY_LEN(flow_classify_error_strings),
+ .error_strings = flow_classify_error_strings,
+ .n_next_nodes = FLOW_CLASSIFY_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_flow_classify_node, ip4_flow_classify);
+
+static uword
+ip6_flow_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return flow_classify_inline(vm, node, frame, FLOW_CLASSIFY_TABLE_IP6);
+}
+
+VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
+ .function = ip6_flow_classify,
+ .name = "ip6-flow-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flow_classify_trace,
+ .n_errors = ARRAY_LEN(flow_classify_error_strings),
+ .error_strings = flow_classify_error_strings,
+ .n_next_nodes = FLOW_CLASSIFY_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_flow_classify_node, ip6_flow_classify);
+
+
+static clib_error_t *
+flow_classify_init (vlib_main_t *vm)
+{
+ flow_classify_main_t * fcm = &flow_classify_main;
+
+ fcm->vlib_main = vm;
+ fcm->vnet_main = vnet_get_main();
+ fcm->vnet_classify_main = &vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (flow_classify_init);
diff --git a/src/vnet/classify/input_acl.c b/src/vnet/classify/input_acl.c
new file mode 100644
index 00000000..c446f2d6
--- /dev/null
+++ b/src/vnet/classify/input_acl.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+input_acl_main_t input_acl_main;
+
+static int
+vnet_inacl_ip_feature_enable (vlib_main_t * vnm,
+ input_acl_main_t *am,
+ u32 sw_if_index,
+ input_acl_table_id_t tid,
+ int feature_enable)
+{
+
+ if (tid == INPUT_ACL_TABLE_L2)
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ACL,
+ feature_enable);
+ }
+ else
+ { /* IP[46] */
+ vnet_feature_config_main_t *fcm;
+ u8 arc;
+
+ if (tid == INPUT_ACL_TABLE_IP4)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-inacl",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip4-unicast");
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-inacl",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip6-unicast");
+ }
+
+ fcm = vnet_get_feature_arc_config_main (arc);
+ am->vnet_config_main[tid] = &fcm->config_main;
+ }
+
+ return 0;
+}
+
+int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add)
+{
+ input_acl_main_t * am = &input_acl_main;
+ vnet_classify_main_t * vcm = am->vnet_classify_main;
+ u32 acl[INPUT_ACL_N_TABLES] = {ip4_table_index, ip6_table_index,
+ l2_table_index};
+ u32 ti;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ for (ti = 0; ti < INPUT_ACL_N_TABLES; ti++)
+ {
+ if (acl[ti] == ~0)
+ continue;
+
+ if (pool_is_free_index (vcm->tables, acl[ti]))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ vec_validate_init_empty
+ (am->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0);
+
+ /* Reject any DEL operation with wrong sw_if_index */
+ if (!is_add &&
+ (acl[ti] != am->classify_table_index_by_sw_if_index[ti][sw_if_index]))
+ {
+ clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete",
+ sw_if_index, acl[ti]);
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ /* Return ok on ADD operaton if feature is already enabled */
+ if (is_add &&
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0)
+ return 0;
+
+ vnet_inacl_ip_feature_enable (vm, am, sw_if_index, ti, is_add);
+
+ if (is_add)
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] = acl[ti];
+ else
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_input_acl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 l2_table_index = ~0;
+ u32 is_add = 1;
+ u32 idx_cnt = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ idx_cnt++;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ idx_cnt++;
+ else if (unformat (input, "l2-table %d", &l2_table_index))
+ idx_cnt++;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface must be specified.");
+
+ if (!idx_cnt)
+ return clib_error_return (0, "Table index should be specified.");
+
+ if (idx_cnt > 1)
+ return clib_error_return (0, "Only one table index per API is allowed.");
+
+ rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index,
+ ip6_table_index, l2_table_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*
+ * Configure interface to enable/disble input ACL feature:
+ * intfc - interface name to be configured as input ACL
+ * Ip4-table <index> [del] - enable/disable IP4 input ACL
+ * Ip6-table <index> [del] - enable/disable IP6 input ACL
+ * l2-table <index> [del] - enable/disable Layer2 input ACL
+ *
+ * Note: Only one table index per API call is allowed.
+ *
+ */
+VLIB_CLI_COMMAND (set_input_acl_command, static) = {
+ .path = "set interface input acl",
+ .short_help =
+ "set interface input acl intfc <int> [ip4-table <index>]\n"
+ " [ip6-table <index>] [l2-table <index>] [del]",
+ .function = set_input_acl_command_fn,
+};
+
+clib_error_t *input_acl_init (vlib_main_t *vm)
+{
+ input_acl_main_t * am = &input_acl_main;
+ clib_error_t * error = 0;
+
+ if ((error = vlib_call_init_function (vm, ip_inacl_init)))
+ return error;
+
+ am->vlib_main = vm;
+ am->vnet_main = vnet_get_main();
+ am->vnet_classify_main = &vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (input_acl_init);
+
+uword unformat_acl_type (unformat_input_t * input, va_list * args)
+{
+ u32 * acl_type = va_arg (*args, u32 *);
+ u32 tid = INPUT_ACL_N_TABLES;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4"))
+ tid = INPUT_ACL_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = INPUT_ACL_TABLE_IP6;
+ else if (unformat (input, "l2"))
+ tid = INPUT_ACL_TABLE_L2;
+ else
+ break;
+ }
+
+ *acl_type = tid;
+ return 1;
+}
+
+u8 * format_vnet_inacl_info (u8 * s, va_list * va)
+{
+ input_acl_main_t * am = va_arg (*va, input_acl_main_t *);
+ int sw_if_idx = va_arg (*va, int);
+ u32 tid = va_arg (*va, u32);
+
+ if (tid == ~0)
+ {
+ s = format (s, "%10s%20s\t\t%s", "Intfc idx", "Classify table",
+ "Interface name");
+ return s;
+ }
+
+ s = format (s, "%10d%20d\t\t%U", sw_if_idx, tid,
+ format_vnet_sw_if_index_name, am->vnet_main, sw_if_idx);
+
+ return s;
+}
+
+static clib_error_t *
+show_inacl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ input_acl_main_t * am = &input_acl_main;
+ u32 type = INPUT_ACL_N_TABLES;
+ int i;
+ u32 * vec_tbl;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "type %U", unformat_acl_type, &type))
+ ;
+ else
+ break;
+ }
+
+ if (type == INPUT_ACL_N_TABLES)
+ return clib_error_return (0, "Invalid input ACL table type.");
+
+ vec_tbl = am->classify_table_index_by_sw_if_index[type];
+
+ if (vec_len(vec_tbl))
+ vlib_cli_output (vm, "%U", format_vnet_inacl_info, am, ~0 /* hdr */, ~0);
+ else
+ vlib_cli_output (vm, "No input ACL tables configured");
+
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt(vec_tbl, i) == ~0)
+ continue;
+
+ vlib_cli_output (vm, "%U", format_vnet_inacl_info,
+ am, i, vec_elt(vec_tbl, i));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_inacl_command, static) = {
+ .path = "show inacl",
+ .short_help = "show inacl type [ip4|ip6|l2]",
+ .function = show_inacl_command_fn,
+};
diff --git a/src/vnet/classify/input_acl.h b/src/vnet/classify/input_acl.h
new file mode 100644
index 00000000..7ffc189f
--- /dev/null
+++ b/src/vnet/classify/input_acl.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_input_acl_h__
+#define __included_vnet_input_acl_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum {
+ INPUT_ACL_TABLE_IP4,
+ INPUT_ACL_TABLE_IP6,
+ INPUT_ACL_TABLE_L2,
+ INPUT_ACL_N_TABLES,
+} input_acl_table_id_t;
+
+typedef enum {
+ ACL_NEXT_INDEX_DENY,
+ ACL_NEXT_INDEX_N_NEXT,
+} acl_next_index_t;
+
+typedef struct {
+
+ /* classifier table vectors */
+ u32 * classify_table_index_by_sw_if_index [INPUT_ACL_N_TABLES];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ vnet_classify_main_t * vnet_classify_main;
+ vnet_config_main_t * vnet_config_main [INPUT_ACL_N_TABLES];
+} input_acl_main_t;
+
+extern input_acl_main_t input_acl_main;
+
+int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add);
+
+#endif /* __included_vnet_input_acl_h__ */
diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c
new file mode 100644
index 00000000..44973ae5
--- /dev/null
+++ b/src/vnet/classify/ip_classify.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/dpo/classify_dpo.h>
+
+typedef struct {
+ u32 next_index;
+ u32 table_index;
+ u32 entry_index;
+} ip_classify_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_classify_trace_t * t = va_arg (*args, ip_classify_trace_t *);
+
+ s = format (s, "IP_CLASSIFY: next_index %d, table %d, entry %d",
+ t->next_index, t->table_index, t->entry_index);
+ return s;
+}
+
+vlib_node_registration_t ip4_classify_node;
+vlib_node_registration_t ip6_classify_node;
+
+#define foreach_ip_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk")
+
+typedef enum {
+#define _(sym,str) IP_CLASSIFY_ERROR_##sym,
+ foreach_ip_classify_error
+#undef _
+ IP_CLASSIFY_N_ERROR,
+} ip_classify_error_t;
+
+static char * ip_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_classify_error
+#undef _
+};
+
+static inline uword
+ip_classify_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ vnet_classify_main_t * vcm = &vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ u32 n_next;
+
+ if (is_ip4) {
+ n_next = IP4_LOOKUP_N_NEXT;
+ } else {
+ n_next = IP6_LOOKUP_N_NEXT;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ u8 * h0, * h1;
+ u32 cd_index0, cd_index1;
+ classify_dpo_t *cd0, * cd1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t * t0, * t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = (void *)vlib_buffer_get_current(b0) -
+ ethernet_buffer_header_size(b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = (void *)vlib_buffer_get_current(b1) -
+ ethernet_buffer_header_size(b1);
+
+ cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ cd0 = classify_dpo_get(cd_index0);
+ table_index0 = cd0->cd_table_index;
+
+ cd_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ cd1 = classify_dpo_get(cd_index1);
+ table_index1 = cd1->cd_table_index;
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ vnet_buffer(b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer(b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ u8 * h0;
+ u32 cd_index0;
+ classify_dpo_t *cd0;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = (void *)vlib_buffer_get_current(b0) -
+ ethernet_buffer_header_size(b0);
+
+ cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ cd0 = classify_dpo_get(cd_index0);
+ table_index0 = cd0->cd_table_index;
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = IP_LOOKUP_NEXT_DROP;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+ u64 hash0;
+ u8 * h0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]);
+ vnet_classify_table_t * tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer(p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer(p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+ vnet_buffer(b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0,
+ now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < node->n_next_nodes)?
+ e0->next_index:next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < node->n_next_nodes)?
+ e0->next_index:next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->entry_index = e0 ? e0 - t0->entries : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_classify_inline (vm, node, frame, 1 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip4_classify_node) = {
+ .function = ip4_classify,
+ .name = "ip4-classify",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_ip_classify_trace,
+ .n_errors = ARRAY_LEN(ip_classify_error_strings),
+ .error_strings = ip_classify_error_strings,
+
+ .n_next_nodes = 0,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_classify_node, ip4_classify)
+
+static uword
+ip6_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_classify_inline (vm, node, frame, 0 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip6_classify_node) = {
+ .function = ip6_classify,
+ .name = "ip6-classify",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip6-lookup",
+ .format_trace = format_ip_classify_trace,
+ .n_errors = ARRAY_LEN(ip_classify_error_strings),
+ .error_strings = ip_classify_error_strings,
+
+ .n_next_nodes = 0,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_classify_node, ip6_classify)
+
+static clib_error_t *
+ip_classify_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_classify_init);
diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c
new file mode 100644
index 00000000..569234fb
--- /dev/null
+++ b/src/vnet/classify/policer_classify.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/classify/policer_classify.h>
+
+static void
+vnet_policer_classify_feature_enable (vlib_main_t * vnm,
+ policer_classify_main_t * pcm,
+ u32 sw_if_index,
+ policer_classify_table_id_t tid,
+ int feature_enable)
+{
+ if (tid == POLICER_CLASSIFY_TABLE_L2)
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_POLICER_CLAS,
+ feature_enable);
+ }
+ else
+ {
+ vnet_feature_config_main_t * fcm;
+ u8 arc;
+
+ if (tid == POLICER_CLASSIFY_TABLE_IP4)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-policer-classify",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip4-unicast");
+ }
+
+ else
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-policer-classify",
+ sw_if_index, feature_enable, 0, 0);
+ arc = vnet_get_feature_arc_index ("ip6-unicast");
+ }
+
+ fcm = vnet_get_feature_arc_config_main (arc);
+ pcm->vnet_config_main[tid] = &fcm->config_main;
+ }
+}
+
+int vnet_set_policer_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add)
+{
+ policer_classify_main_t * pcm = &policer_classify_main;
+ vnet_classify_main_t * vcm = pcm->vnet_classify_main;
+ u32 pct[POLICER_CLASSIFY_N_TABLES] = {ip4_table_index, ip6_table_index,
+ l2_table_index};
+ u32 ti;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ for (ti = 0; ti < POLICER_CLASSIFY_N_TABLES; ti++)
+ {
+ if (pct[ti] == ~0)
+ continue;
+
+ if (pool_is_free_index (vcm->tables, pct[ti]))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ vec_validate_init_empty
+ (pcm->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0);
+
+ /* Reject any DEL operation with wrong sw_if_index */
+ if (!is_add &&
+ (pct[ti] != pcm->classify_table_index_by_sw_if_index[ti][sw_if_index]))
+ {
+ clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete",
+ sw_if_index, pct[ti]);
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ /* Return ok on ADD operaton if feature is already enabled */
+ if (is_add &&
+ pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0)
+ return 0;
+
+ vnet_policer_classify_feature_enable (vm, pcm, sw_if_index, ti, is_add);
+
+ if (is_add)
+ pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = pct[ti];
+ else
+ pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0;
+ }
+
+
+ return 0;
+}
+
+static clib_error_t *
+set_policer_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 l2_table_index = ~0;
+ u32 is_add = 1;
+ u32 idx_cnt = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "interface %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ idx_cnt++;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ idx_cnt++;
+ else if (unformat (input, "l2-table %d", &l2_table_index))
+ idx_cnt++;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface must be specified.");
+
+ if (!idx_cnt)
+ return clib_error_return (0, "Table index should be specified.");
+
+ if (idx_cnt > 1)
+ return clib_error_return (0, "Only one table index per API is allowed.");
+
+ rv = vnet_set_policer_classify_intfc(vm, sw_if_index, ip4_table_index,
+ ip6_table_index, l2_table_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
+ .path = "set policer classify",
+ .short_help =
+ "set policer classify interface <int> [ip4-table <index>]\n"
+ " [ip6-table <index>] [l2-table <index>] [del]",
+ .function = set_policer_classify_command_fn,
+};
+
+static uword
+unformat_table_type (unformat_input_t * input, va_list * va)
+{
+ u32 * r = va_arg (*va, u32 *);
+ u32 tid;
+
+ if (unformat (input, "ip4"))
+ tid = POLICER_CLASSIFY_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = POLICER_CLASSIFY_TABLE_IP6;
+ else if (unformat (input, "l2"))
+ tid = POLICER_CLASSIFY_TABLE_L2;
+ else
+ return 0;
+
+ *r = tid;
+ return 1;
+}
+static clib_error_t *
+show_policer_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ policer_classify_main_t * pcm = &policer_classify_main;
+ u32 type = POLICER_CLASSIFY_N_TABLES;
+ u32 * vec_tbl;
+ int i;
+
+ if (unformat (input, "type %U", unformat_table_type, &type))
+ ;
+ else
+ return clib_error_return (0, "Type must be specified.");;
+
+ if (type == POLICER_CLASSIFY_N_TABLES)
+ return clib_error_return (0, "Invalid table type.");
+
+ vec_tbl = pcm->classify_table_index_by_sw_if_index[type];
+
+ if (vec_len(vec_tbl))
+ vlib_cli_output (vm, "%10s%20s\t\t%s", "Intfc idx", "Classify table",
+ "Interface name");
+ else
+ vlib_cli_output (vm, "No tables configured.");
+
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt(vec_tbl, i) == ~0)
+ continue;
+
+ vlib_cli_output (vm, "%10d%20d\t\t%U", i, vec_elt(vec_tbl, i),
+ format_vnet_sw_if_index_name, pcm->vnet_main, i);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_policer_classify_command, static) = {
+ .path = "show classify policer",
+ .short_help = "show classify policer type [ip4|ip6|l2]",
+ .function = show_policer_classify_command_fn,
+};
diff --git a/src/vnet/classify/policer_classify.h b/src/vnet/classify/policer_classify.h
new file mode 100644
index 00000000..30656444
--- /dev/null
+++ b/src/vnet/classify/policer_classify.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_policer_classify_h__
+#define __included_vnet_policer_classify_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum {
+ POLICER_CLASSIFY_TABLE_IP4,
+ POLICER_CLASSIFY_TABLE_IP6,
+ POLICER_CLASSIFY_TABLE_L2,
+ POLICER_CLASSIFY_N_TABLES,
+} policer_classify_table_id_t;
+
+typedef enum {
+ POLICER_CLASSIFY_NEXT_INDEX_DROP,
+ POLICER_CLASSIFY_NEXT_INDEX_N_NEXT,
+} policer_classify_next_index_t;
+
+typedef struct {
+ /* Classifier table vectors */
+ u32 * classify_table_index_by_sw_if_index [POLICER_CLASSIFY_N_TABLES];
+
+ /* L2 next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* Convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ vnet_classify_main_t * vnet_classify_main;
+ vnet_config_main_t * vnet_config_main [POLICER_CLASSIFY_N_TABLES];
+} policer_classify_main_t;
+
+policer_classify_main_t policer_classify_main;
+
+int vnet_set_policer_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add);
+
+#endif /* __included_vnet_policer_classify_h__ */
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
new file mode 100644
index 00000000..57d86748
--- /dev/null
+++ b/src/vnet/classify/vnet_classify.c
@@ -0,0 +1,2642 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/l2/l2_classify.h> /* for L2_INPUT_CLASSIFY_NEXT_xxx */
+#include <vnet/fib/fib_table.h>
+
+vnet_classify_main_t vnet_classify_main;
+
+#if VALIDATION_SCAFFOLDING
+/* Validation scaffolding */
+void mv (vnet_classify_table_t * t)
+{
+ void * oldheap;
+
+ oldheap = clib_mem_set_heap (t->mheap);
+ clib_mem_validate();
+ clib_mem_set_heap (oldheap);
+}
+
+void rogue (vnet_classify_table_t * t)
+{
+ int i, j, k;
+ vnet_classify_entry_t * v, * save_v;
+ u32 active_elements = 0;
+ vnet_classify_bucket_t * b;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ continue;
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+ v = vnet_classify_entry_at_index
+ (t, save_v, j*t->entries_per_page + k);
+
+ if (vnet_classify_entry_is_busy (v))
+ active_elements++;
+ }
+ }
+ }
+
+ if (active_elements != t->active_elements)
+ clib_warning ("found %u expected %u elts", active_elements,
+ t->active_elements);
+}
+#else
+void mv (vnet_classify_table_t * t) { }
+void rogue (vnet_classify_table_t * t) { }
+#endif
+
+void vnet_classify_register_unformat_l2_next_index_fn (unformat_function_t * fn)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ vec_add1 (cm->unformat_l2_next_index_fns, fn);
+}
+
+void vnet_classify_register_unformat_ip_next_index_fn (unformat_function_t * fn)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ vec_add1 (cm->unformat_ip_next_index_fns, fn);
+}
+
+void
+vnet_classify_register_unformat_acl_next_index_fn (unformat_function_t * fn)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ vec_add1 (cm->unformat_acl_next_index_fns, fn);
+}
+
+void
+vnet_classify_register_unformat_policer_next_index_fn (unformat_function_t * fn)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ vec_add1 (cm->unformat_policer_next_index_fns, fn);
+}
+
+void vnet_classify_register_unformat_opaque_index_fn (unformat_function_t * fn)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ vec_add1 (cm->unformat_opaque_index_fns, fn);
+}
+
+vnet_classify_table_t *
+vnet_classify_new_table (vnet_classify_main_t *cm,
+ u8 * mask, u32 nbuckets, u32 memory_size,
+ u32 skip_n_vectors,
+ u32 match_n_vectors)
+{
+ vnet_classify_table_t * t;
+ void * oldheap;
+
+ nbuckets = 1 << (max_log2 (nbuckets));
+
+ pool_get_aligned (cm->tables, t, CLIB_CACHE_LINE_BYTES);
+ memset(t, 0, sizeof (*t));
+
+ vec_validate_aligned (t->mask, match_n_vectors - 1, sizeof(u32x4));
+ clib_memcpy (t->mask, mask, match_n_vectors * sizeof (u32x4));
+
+ t->next_table_index = ~0;
+ t->nbuckets = nbuckets;
+ t->log2_nbuckets = max_log2 (nbuckets);
+ t->match_n_vectors = match_n_vectors;
+ t->skip_n_vectors = skip_n_vectors;
+ t->entries_per_page = 2;
+
+ t->mheap = mheap_alloc (0 /* use VM */, memory_size);
+
+ vec_validate_aligned (t->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES);
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ t->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ t->writer_lock[0] = 0;
+
+ clib_mem_set_heap (oldheap);
+ return (t);
+}
+
+void vnet_classify_delete_table_index (vnet_classify_main_t *cm,
+ u32 table_index, int del_chain)
+{
+ vnet_classify_table_t * t;
+
+ /* Tolerate multiple frees, up to a point */
+ if (pool_is_free_index (cm->tables, table_index))
+ return;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+ if (del_chain && t->next_table_index != ~0)
+ /* Recursively delete the entire chain */
+ vnet_classify_delete_table_index (cm, t->next_table_index, del_chain);
+
+ vec_free (t->mask);
+ vec_free (t->buckets);
+ mheap_free (t->mheap);
+
+ pool_put (cm->tables, t);
+}
+
+static vnet_classify_entry_t *
+vnet_classify_entry_alloc (vnet_classify_table_t * t, u32 log2_pages)
+{
+ vnet_classify_entry_t * rv = 0;
+ u32 required_length;
+ void * oldheap;
+
+ ASSERT (t->writer_lock[0]);
+ required_length =
+ (sizeof(vnet_classify_entry_t) + (t->match_n_vectors*sizeof(u32x4)))
+ * t->entries_per_page * (1<<log2_pages);
+
+ if (log2_pages >= vec_len (t->freelists) || t->freelists [log2_pages] == 0)
+ {
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ vec_validate (t->freelists, log2_pages);
+
+ rv = clib_mem_alloc_aligned (required_length, CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ goto initialize;
+ }
+ rv = t->freelists[log2_pages];
+ t->freelists[log2_pages] = rv->next_free;
+
+initialize:
+ ASSERT(rv);
+
+ memset (rv, 0xff, required_length);
+ return rv;
+}
+
+static void
+vnet_classify_entry_free (vnet_classify_table_t * t,
+ vnet_classify_entry_t * v, u32 log2_pages)
+{
+ ASSERT (t->writer_lock[0]);
+
+ ASSERT(vec_len (t->freelists) > log2_pages);
+
+ v->next_free = t->freelists[log2_pages];
+ t->freelists[log2_pages] = v;
+}
+
+static inline void make_working_copy
+(vnet_classify_table_t * t, vnet_classify_bucket_t * b)
+{
+ vnet_classify_entry_t * v;
+ vnet_classify_bucket_t working_bucket __attribute__((aligned (8)));
+ void * oldheap;
+ vnet_classify_entry_t * working_copy;
+ u32 thread_index = vlib_get_thread_index();
+ int working_copy_length, required_length;
+
+ if (thread_index >= vec_len (t->working_copies))
+ {
+ oldheap = clib_mem_set_heap (t->mheap);
+ vec_validate (t->working_copies, thread_index);
+ vec_validate (t->working_copy_lengths, thread_index);
+ t->working_copy_lengths[thread_index] = -1;
+ clib_mem_set_heap (oldheap);
+ }
+
+ /*
+ * working_copies are per-cpu so that near-simultaneous
+ * updates from multiple threads will not result in sporadic, spurious
+ * lookup failures.
+ */
+ working_copy = t->working_copies[thread_index];
+ working_copy_length = t->working_copy_lengths[thread_index];
+ required_length =
+ (sizeof(vnet_classify_entry_t) + (t->match_n_vectors*sizeof(u32x4)))
+ * t->entries_per_page * (1<<b->log2_pages);
+
+ t->saved_bucket.as_u64 = b->as_u64;
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ if (required_length > working_copy_length)
+ {
+ if (working_copy)
+ clib_mem_free (working_copy);
+ working_copy =
+ clib_mem_alloc_aligned (required_length, CLIB_CACHE_LINE_BYTES);
+ t->working_copies[thread_index] = working_copy;
+ }
+
+ clib_mem_set_heap (oldheap);
+
+ v = vnet_classify_get_entry (t, b->offset);
+
+ clib_memcpy (working_copy, v, required_length);
+
+ working_bucket.as_u64 = b->as_u64;
+ working_bucket.offset = vnet_classify_get_offset (t, working_copy);
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = working_bucket.as_u64;
+ t->working_copies[thread_index] = working_copy;
+}
+
+static vnet_classify_entry_t *
+split_and_rehash (vnet_classify_table_t * t,
+ vnet_classify_entry_t * old_values, u32 old_log2_pages,
+ u32 new_log2_pages)
+{
+ vnet_classify_entry_t * new_values, * v, * new_v;
+ int i, j, length_in_entries;
+
+ new_values = vnet_classify_entry_alloc (t, new_log2_pages);
+ length_in_entries = (1<<old_log2_pages) * t->entries_per_page;
+
+ for (i = 0; i < length_in_entries; i++)
+ {
+ u64 new_hash;
+
+ v = vnet_classify_entry_at_index (t, old_values, i);
+
+ if (vnet_classify_entry_is_busy (v))
+ {
+ /* Hack so we can use the packet hash routine */
+ u8 * key_minus_skip;
+ key_minus_skip = (u8 *) v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ new_hash = vnet_classify_hash_packet (t, key_minus_skip);
+ new_hash >>= t->log2_nbuckets;
+ new_hash &= (1<<new_log2_pages) - 1;
+
+ for (j = 0; j < t->entries_per_page; j++)
+ {
+ new_v = vnet_classify_entry_at_index (t, new_values,
+ new_hash + j);
+
+ if (vnet_classify_entry_is_free (new_v))
+ {
+ clib_memcpy (new_v, v, sizeof (vnet_classify_entry_t)
+ + (t->match_n_vectors * sizeof (u32x4)));
+ new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ goto doublebreak;
+ }
+ }
+ /* Crap. Tell caller to try again */
+ vnet_classify_entry_free (t, new_values, new_log2_pages);
+ return 0;
+ doublebreak:
+ ;
+ }
+ }
+ return new_values;
+}
+
+static vnet_classify_entry_t *
+split_and_rehash_linear (vnet_classify_table_t * t,
+ vnet_classify_entry_t * old_values,
+ u32 old_log2_pages,
+ u32 new_log2_pages)
+{
+ vnet_classify_entry_t * new_values, * v, * new_v;
+ int i, j, new_length_in_entries, old_length_in_entries;
+
+ new_values = vnet_classify_entry_alloc (t, new_log2_pages);
+ new_length_in_entries = (1<<new_log2_pages) * t->entries_per_page;
+ old_length_in_entries = (1<<old_log2_pages) * t->entries_per_page;
+
+ j = 0;
+ for (i = 0; i < old_length_in_entries; i++)
+ {
+ v = vnet_classify_entry_at_index (t, old_values, i);
+
+ if (vnet_classify_entry_is_busy (v))
+ {
+ for (; j < new_length_in_entries; j++)
+ {
+ new_v = vnet_classify_entry_at_index (t, new_values, j);
+
+ if (vnet_classify_entry_is_busy (new_v))
+ {
+ clib_warning ("BUG: linear rehash new entry not free!");
+ continue;
+ }
+ clib_memcpy (new_v, v, sizeof (vnet_classify_entry_t)
+ + (t->match_n_vectors * sizeof (u32x4)));
+ new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ j++;
+ goto doublebreak;
+ }
+ /*
+ * Crap. Tell caller to try again.
+ * This should never happen...
+ */
+ clib_warning ("BUG: linear rehash failed!");
+ vnet_classify_entry_free (t, new_values, new_log2_pages);
+ return 0;
+ }
+ doublebreak:
+ ;
+ }
+
+ return new_values;
+}
+
+static void
+vnet_classify_entry_claim_resource (vnet_classify_entry_t *e)
+{
+ switch (e->action)
+ {
+ case CLASSIFY_ACTION_SET_IP4_FIB_INDEX:
+ fib_table_lock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
+ break;
+ case CLASSIFY_ACTION_SET_IP6_FIB_INDEX:
+ fib_table_lock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY);
+ break;
+ }
+}
+
+static void
+vnet_classify_entry_release_resource (vnet_classify_entry_t *e)
+{
+ switch (e->action)
+ {
+ case CLASSIFY_ACTION_SET_IP4_FIB_INDEX:
+ fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
+ break;
+ case CLASSIFY_ACTION_SET_IP6_FIB_INDEX:
+ fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY);
+ break;
+ }
+}
+
+int vnet_classify_add_del (vnet_classify_table_t * t,
+ vnet_classify_entry_t * add_v,
+ int is_add)
+{
+ u32 bucket_index;
+ vnet_classify_bucket_t * b, tmp_b;
+ vnet_classify_entry_t * v, * new_v, * save_new_v, * working_copy, * save_v;
+ u32 value_index;
+ int rv = 0;
+ int i;
+ u64 hash, new_hash;
+ u32 limit;
+ u32 old_log2_pages, new_log2_pages;
+ u32 thread_index = vlib_get_thread_index();
+ u8 * key_minus_skip;
+ int resplit_once = 0;
+ int mark_bucket_linear;
+
+ ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
+
+ key_minus_skip = (u8 *) add_v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ hash = vnet_classify_hash_packet (t, key_minus_skip);
+
+ bucket_index = hash & (t->nbuckets-1);
+ b = &t->buckets[bucket_index];
+
+ hash >>= t->log2_nbuckets;
+
+ while (__sync_lock_test_and_set (t->writer_lock, 1))
+ ;
+
+ /* First elt in the bucket? */
+ if (b->offset == 0)
+ {
+ if (is_add == 0)
+ {
+ rv = -1;
+ goto unlock;
+ }
+
+ v = vnet_classify_entry_alloc (t, 0 /* new_log2_pages */);
+ clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof (u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ vnet_classify_entry_claim_resource (v);
+
+ tmp_b.as_u64 = 0;
+ tmp_b.offset = vnet_classify_get_offset (t, v);
+
+ b->as_u64 = tmp_b.as_u64;
+ t->active_elements ++;
+
+ goto unlock;
+ }
+
+ make_working_copy (t, b);
+
+ save_v = vnet_classify_get_entry (t, t->saved_bucket.offset);
+ value_index = hash & ((1<<t->saved_bucket.log2_pages)-1);
+ limit = t->entries_per_page;
+ if (PREDICT_FALSE (b->linear_search))
+ {
+ value_index = 0;
+ limit *= (1<<b->log2_pages);
+ }
+
+ if (is_add)
+ {
+ /*
+ * For obvious (in hindsight) reasons, see if we're supposed to
+ * replace an existing key, then look for an empty slot.
+ */
+
+ for (i = 0; i < limit; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4)))
+ {
+ clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ vnet_classify_entry_claim_resource (v);
+
+ CLIB_MEMORY_BARRIER();
+ /* Restore the previous (k,v) pairs */
+ b->as_u64 = t->saved_bucket.as_u64;
+ goto unlock;
+ }
+ }
+ for (i = 0; i < limit; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (vnet_classify_entry_is_free (v))
+ {
+ clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ vnet_classify_entry_claim_resource (v);
+
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = t->saved_bucket.as_u64;
+ t->active_elements ++;
+ goto unlock;
+ }
+ }
+ /* no room at the inn... split case... */
+ }
+ else
+ {
+ for (i = 0; i < limit; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4)))
+ {
+ vnet_classify_entry_release_resource (v);
+ memset (v, 0xff, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags |= VNET_CLASSIFY_ENTRY_FREE;
+
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = t->saved_bucket.as_u64;
+ t->active_elements --;
+ goto unlock;
+ }
+ }
+ rv = -3;
+ b->as_u64 = t->saved_bucket.as_u64;
+ goto unlock;
+ }
+
+ old_log2_pages = t->saved_bucket.log2_pages;
+ new_log2_pages = old_log2_pages + 1;
+ working_copy = t->working_copies[thread_index];
+
+ if (t->saved_bucket.linear_search)
+ goto linear_resplit;
+
+ mark_bucket_linear = 0;
+
+ new_v = split_and_rehash (t, working_copy, old_log2_pages, new_log2_pages);
+
+ if (new_v == 0)
+ {
+ try_resplit:
+ resplit_once = 1;
+ new_log2_pages++;
+
+ new_v = split_and_rehash (t, working_copy, old_log2_pages,
+ new_log2_pages);
+ if (new_v == 0)
+ {
+ mark_linear:
+ new_log2_pages--;
+
+ linear_resplit:
+ /* pinned collisions, use linear search */
+ new_v = split_and_rehash_linear (t, working_copy, old_log2_pages,
+ new_log2_pages);
+ /* A new linear-search bucket? */
+ if (!t->saved_bucket.linear_search)
+ t->linear_buckets ++;
+ mark_bucket_linear = 1;
+ }
+ }
+
+ /* Try to add the new entry */
+ save_new_v = new_v;
+
+ key_minus_skip = (u8 *) add_v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ new_hash = vnet_classify_hash_packet_inline (t, key_minus_skip);
+ new_hash >>= t->log2_nbuckets;
+ new_hash &= (1<<new_log2_pages) - 1;
+
+ limit = t->entries_per_page;
+ if (mark_bucket_linear)
+ {
+ limit *= (1<<new_log2_pages);
+ new_hash = 0;
+ }
+
+ for (i = 0; i < limit; i++)
+ {
+ new_v = vnet_classify_entry_at_index (t, save_new_v, new_hash + i);
+
+ if (vnet_classify_entry_is_free (new_v))
+ {
+ clib_memcpy (new_v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ vnet_classify_entry_claim_resource (new_v);
+
+ goto expand_ok;
+ }
+ }
+ /* Crap. Try again */
+ vnet_classify_entry_free (t, save_new_v, new_log2_pages);
+ new_log2_pages++;
+
+ if (resplit_once)
+ goto mark_linear;
+ else
+ goto try_resplit;
+
+ expand_ok:
+ tmp_b.log2_pages = new_log2_pages;
+ tmp_b.offset = vnet_classify_get_offset (t, save_new_v);
+ tmp_b.linear_search = mark_bucket_linear;
+
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = tmp_b.as_u64;
+ t->active_elements ++;
+ v = vnet_classify_get_entry (t, t->saved_bucket.offset);
+ vnet_classify_entry_free (t, v, old_log2_pages);
+
+ unlock:
+ CLIB_MEMORY_BARRIER();
+ t->writer_lock[0] = 0;
+ return rv;
+}
+
+typedef CLIB_PACKED(struct {
+ ethernet_header_t eh;
+ ip4_header_t ip;
+}) classify_data_or_mask_t;
+
+u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h)
+{
+ return vnet_classify_hash_packet_inline (t, h);
+}
+
+vnet_classify_entry_t *
+vnet_classify_find_entry (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now)
+{
+ return vnet_classify_find_entry_inline (t, h, hash, now);
+}
+
+static u8 * format_classify_entry (u8 * s, va_list * args)
+ {
+ vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *);
+ vnet_classify_entry_t * e = va_arg (*args, vnet_classify_entry_t *);
+
+ s = format
+ (s, "[%u]: next_index %d advance %d opaque %d action %d metadata %d\n",
+ vnet_classify_get_offset (t, e), e->next_index, e->advance,
+ e->opaque_index, e->action, e->metadata);
+
+
+ s = format (s, " k: %U\n", format_hex_bytes, e->key,
+ t->match_n_vectors * sizeof(u32x4));
+
+ if (vnet_classify_entry_is_busy (e))
+ s = format (s, " hits %lld, last_heard %.2f\n",
+ e->hits, e->last_heard);
+ else
+ s = format (s, " entry is free\n");
+ return s;
+ }
+
+u8 * format_classify_table (u8 * s, va_list * args)
+{
+ vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *);
+ int verbose = va_arg (*args, int);
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * v, * save_v;
+ int i, j, k;
+ u64 active_elements = 0;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ {
+ if (verbose > 1)
+ s = format (s, "[%d]: empty\n", i);
+ continue;
+ }
+
+ if (verbose)
+ {
+ s = format (s, "[%d]: heap offset %d, elts %d, %s\n", i,
+ b->offset, (1<<b->log2_pages)*t->entries_per_page,
+ b->linear_search ? "LINEAR" : "normal");
+ }
+
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+
+ v = vnet_classify_entry_at_index (t, save_v,
+ j*t->entries_per_page + k);
+
+ if (vnet_classify_entry_is_free (v))
+ {
+ if (verbose > 1)
+ s = format (s, " %d: empty\n",
+ j * t->entries_per_page + k);
+ continue;
+ }
+ if (verbose)
+ {
+ s = format (s, " %d: %U\n",
+ j * t->entries_per_page + k,
+ format_classify_entry, t, v);
+ }
+ active_elements++;
+ }
+ }
+ }
+
+ s = format (s, " %lld active elements\n", active_elements);
+ s = format (s, " %d free lists\n", vec_len (t->freelists));
+ s = format (s, " %d linear-search buckets\n", t->linear_buckets);
+ return s;
+}
+
+int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ u8 current_data_flag,
+ i16 current_data_offset,
+ int is_add,
+ int del_chain)
+{
+ vnet_classify_table_t * t;
+
+ if (is_add)
+ {
+ if (*table_index == ~0) /* add */
+ {
+ if (memory_size == 0)
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+
+ if (nbuckets == 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ t = vnet_classify_new_table (cm, mask, nbuckets, memory_size,
+ skip, match);
+ t->next_table_index = next_table_index;
+ t->miss_next_index = miss_next_index;
+ t->current_data_flag = current_data_flag;
+ t->current_data_offset = current_data_offset;
+ *table_index = t - cm->tables;
+ }
+ else /* update */
+ {
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ t = pool_elt_at_index (cm->tables, *table_index);
+
+ t->next_table_index = next_table_index;
+ }
+ return 0;
+ }
+
+ vnet_classify_delete_table_index (cm, *table_index, del_chain);
+ return 0;
+}
+
+#define foreach_tcp_proto_field \
+_(src) \
+_(dst)
+
+#define foreach_udp_proto_field \
+_(src_port) \
+_(dst_port)
+
+#define foreach_ip4_proto_field \
+_(src_address) \
+_(dst_address) \
+_(tos) \
+_(length) \
+_(fragment_id) \
+_(ttl) \
+_(protocol) \
+_(checksum)
+
+uword unformat_tcp_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ tcp_header_t * tcp;
+
+#define _(a) u8 a=0;
+ foreach_tcp_proto_field;
+#undef _
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0) ;
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_tcp_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_tcp_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*tcp) - 1);
+
+ tcp = (tcp_header_t *) mask;
+
+#define _(a) if (a) memset (&tcp->a, 0xff, sizeof (tcp->a));
+ foreach_tcp_proto_field;
+#undef _
+
+ *maskp = mask;
+ return 1;
+}
+
+uword unformat_udp_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ udp_header_t * udp;
+
+#define _(a) u8 a=0;
+ foreach_udp_proto_field;
+#undef _
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0) ;
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_udp_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_udp_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*udp) - 1);
+
+ udp = (udp_header_t *) mask;
+
+#define _(a) if (a) memset (&udp->a, 0xff, sizeof (udp->a));
+ foreach_udp_proto_field;
+#undef _
+
+ *maskp = mask;
+ return 1;
+}
+
+typedef struct {
+ u16 src_port, dst_port;
+} tcpudp_header_t;
+
+uword unformat_l4_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u16 src_port = 0, dst_port = 0;
+ tcpudp_header_t * tcpudp;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "tcp %U", unformat_tcp_mask, maskp))
+ return 1;
+ else if (unformat (input, "udp %U", unformat_udp_mask, maskp))
+ return 1;
+ else if (unformat (input, "src_port"))
+ src_port = 0xFFFF;
+ else if (unformat (input, "dst_port"))
+ dst_port = 0xFFFF;
+ else
+ return 0;
+ }
+
+ if (!src_port && !dst_port)
+ return 0;
+
+ u8 * mask = 0;
+ vec_validate (mask, sizeof (tcpudp_header_t) - 1);
+
+ tcpudp = (tcpudp_header_t *) mask;
+ tcpudp->src_port = src_port;
+ tcpudp->dst_port = dst_port;
+
+ *maskp = mask;
+
+ return 1;
+}
+
+uword unformat_ip4_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ ip4_header_t * ip;
+
+#define _(a) u8 a=0;
+ foreach_ip4_proto_field;
+#undef _
+ u8 version = 0;
+ u8 hdr_length = 0;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "hdr_length"))
+ hdr_length = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip4_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip4_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip4_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip4_proto_field;
+#undef _
+
+ ip->ip_version_and_header_length = 0;
+
+ if (version)
+ ip->ip_version_and_header_length |= 0xF0;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= 0x0F;
+
+ *maskp = mask;
+ return 1;
+}
+
+#define foreach_ip6_proto_field \
+_(src_address) \
+_(dst_address) \
+_(payload_length) \
+_(hop_limit) \
+_(protocol)
+
+uword unformat_ip6_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ ip6_header_t * ip;
+ u32 ip_version_traffic_class_and_flow_label;
+
+#define _(a) u8 a=0;
+ foreach_ip6_proto_field;
+#undef _
+ u8 version = 0;
+ u8 traffic_class = 0;
+ u8 flow_label = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "traffic-class"))
+ traffic_class = 1;
+ else if (unformat (input, "flow-label"))
+ flow_label = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip6_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip6_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip6_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip6_proto_field;
+#undef _
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= 0xF0000000;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |= 0x0FF00000;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= 0x000FFFFF;
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ *maskp = mask;
+ return 1;
+}
+
+uword unformat_l3_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4 %U", unformat_ip4_mask, maskp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_mask, maskp))
+ return 1;
+ else
+ break;
+ }
+ return 0;
+}
+
+uword unformat_l2_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 src = 0;
+ u8 dst = 0;
+ u8 proto = 0;
+ u8 tag1 = 0;
+ u8 tag2 = 0;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u8 dot1q = 0;
+ u8 dot1ad = 0;
+ int len = 14;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "src"))
+ src = 1;
+ else if (unformat (input, "dst"))
+ dst = 1;
+ else if (unformat (input, "proto"))
+ proto = 1;
+ else if (unformat (input, "tag1"))
+ tag1 = 1;
+ else if (unformat (input, "tag2"))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1"))
+ cos1 = 1;
+ else if (unformat (input, "cos2"))
+ cos2 = 1;
+ else if (unformat (input, "dot1q"))
+ dot1q = 1;
+ else if (unformat (input, "dot1ad"))
+ dot1ad = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 + dot1q + dot1ad +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1 || dot1q)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2 || dot1ad)
+ len = 22;
+
+ vec_validate (mask, len-1);
+
+ if (dst)
+ memset (mask, 0xff, 6);
+
+ if (src)
+ memset (mask + 6, 0xff, 6);
+
+ if (tag2 || dot1ad)
+ {
+ /* inner vlan tag */
+ if (tag2)
+ {
+ mask[19] = 0xff;
+ mask[18] = 0x0f;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (proto)
+ mask[21] = mask [20] = 0xff;
+ if (tag1)
+ {
+ mask [15] = 0xff;
+ mask [14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ *maskp = mask;
+ return 1;
+ }
+ if (tag1 | dot1q)
+ {
+ if (tag1)
+ {
+ mask [15] = 0xff;
+ mask [14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[16] = mask [17] = 0xff;
+ *maskp = mask;
+ return 1;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[12] = mask [13] = 0xff;
+
+ *maskp = mask;
+ return 1;
+}
+
+uword unformat_classify_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u32 * skipp = va_arg (*args, u32 *);
+ u32 * matchp = va_arg (*args, u32 *);
+ u32 match;
+ u8 * mask = 0;
+ u8 * l2 = 0;
+ u8 * l3 = 0;
+ u8 * l4 = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "hex %U", unformat_hex_string, &mask))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_mask, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_mask, &l3))
+ ;
+ else if (unformat (input, "l4 %U", unformat_l4_mask, &l4))
+ ;
+ else
+ break;
+ }
+
+ if (l4 && !l3) {
+ vec_free (mask);
+ vec_free (l2);
+ vec_free (l4);
+ return 0;
+ }
+
+ if (mask || l2 || l3 || l4)
+ {
+ if (l2 || l3 || l4)
+ {
+ /* "With a free Ethernet header in every package" */
+ if (l2 == 0)
+ vec_validate (l2, 13);
+ mask = l2;
+ if (l3)
+ {
+ vec_append (mask, l3);
+ vec_free (l3);
+ }
+ if (l4)
+ {
+ vec_append (mask, l4);
+ vec_free (l4);
+ }
+ }
+
+ /* Scan forward looking for the first significant mask octet */
+ for (i = 0; i < vec_len (mask); i++)
+ if (mask[i])
+ break;
+
+ /* compute (skip, match) params */
+ *skipp = i / sizeof(u32x4);
+ vec_delete (mask, *skipp * sizeof(u32x4), 0);
+
+ /* Pad mask to an even multiple of the vector size */
+ while (vec_len (mask) % sizeof (u32x4))
+ vec_add1 (mask, 0);
+
+ match = vec_len (mask) / sizeof (u32x4);
+
+ for (i = match*sizeof(u32x4); i > 0; i-= sizeof(u32x4))
+ {
+ u64 *tmp = (u64 *)(mask + (i-sizeof(u32x4)));
+ if (*tmp || *(tmp+1))
+ break;
+ match--;
+ }
+ if (match == 0)
+ clib_warning ("BUG: match 0");
+
+ _vec_len (mask) = match * sizeof(u32x4);
+
+ *matchp = match;
+ *maskp = mask;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+#define foreach_l2_input_next \
+_(drop, DROP) \
+_(ethernet, ETHERNET_INPUT) \
+_(ip4, IP4_INPUT) \
+_(ip6, IP6_INPUT) \
+_(li, LI)
+
+uword unformat_l2_input_next_index (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+ int i;
+
+ /* First try registered unformat fns, allowing override... */
+ for (i = 0; i < vec_len (cm->unformat_l2_next_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_l2_next_index_fns[i], &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+ }
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = L2_INPUT_CLASSIFY_NEXT_##N; goto out;}
+ foreach_l2_input_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_l2_output_next \
+_(drop, DROP)
+
+uword unformat_l2_output_next_index (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+ int i;
+
+ /* First try registered unformat fns, allowing override... */
+ for (i = 0; i < vec_len (cm->unformat_l2_next_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_l2_next_index_fns[i], &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+ }
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = L2_OUTPUT_CLASSIFY_NEXT_##N; goto out;}
+ foreach_l2_output_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_ip_next \
+_(drop, DROP) \
+_(rewrite, REWRITE)
+
+uword unformat_ip_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 next_index = 0;
+ u32 tmp;
+ int i;
+
+ /* First try registered unformat fns, allowing override... */
+ for (i = 0; i < vec_len (cm->unformat_ip_next_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_ip_next_index_fns[i], &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+ }
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = IP_LOOKUP_NEXT_##N; goto out;}
+ foreach_ip_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_acl_next \
+_(deny, DENY)
+
+uword unformat_acl_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * next_indexp = va_arg (*args, u32 *);
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 next_index = 0;
+ u32 tmp;
+ int i;
+
+ /* First try registered unformat fns, allowing override... */
+ for (i = 0; i < vec_len (cm->unformat_acl_next_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_acl_next_index_fns[i], &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+ }
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = ACL_NEXT_INDEX_##N; goto out;}
+ foreach_acl_next;
+#undef _
+
+ if (unformat (input, "permit"))
+ {
+ next_index = ~0;
+ goto out;
+ }
+ else if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *next_indexp = next_index;
+ return 1;
+}
+
+uword unformat_policer_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * next_indexp = va_arg (*args, u32 *);
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 next_index = 0;
+ u32 tmp;
+ int i;
+
+ /* First try registered unformat fns, allowing override... */
+ for (i = 0; i < vec_len (cm->unformat_policer_next_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_policer_next_index_fns[i], &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+ }
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *next_indexp = next_index;
+ return 1;
+}
+
+static clib_error_t *
+classify_table_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 nbuckets = 2;
+ u32 skip = ~0;
+ u32 match = ~0;
+ int is_add = 1;
+ int del_chain = 0;
+ u32 table_index = ~0;
+ u32 next_table_index = ~0;
+ u32 miss_next_index = ~0;
+ u32 memory_size = 2<<20;
+ u32 tmp;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+
+ u8 * mask = 0;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "del-chain"))
+ {
+ is_add = 0;
+ del_chain = 1;
+ }
+ else if (unformat (input, "buckets %d", &nbuckets))
+ ;
+ else if (unformat (input, "skip %d", &skip))
+ ;
+ else if (unformat (input, "match %d", &match))
+ ;
+ else if (unformat (input, "table %d", &table_index))
+ ;
+ else if (unformat (input, "mask %U", unformat_classify_mask,
+ &mask, &skip, &match))
+ ;
+ else if (unformat (input, "memory-size %uM", &tmp))
+ memory_size = tmp<<20;
+ else if (unformat (input, "memory-size %uG", &tmp))
+ memory_size = tmp<<30;
+ else if (unformat (input, "next-table %d", &next_table_index))
+ ;
+ else if (unformat (input, "miss-next %U", unformat_ip_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "l2-input-miss-next %U", unformat_l2_input_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "l2-output-miss-next %U", unformat_l2_output_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "acl-miss-next %U", unformat_acl_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "current-data-flag %d", &current_data_flag))
+ ;
+ else if (unformat (input, "current-data-offset %d", &current_data_offset))
+ ;
+
+ else
+ break;
+ }
+
+ if (is_add && mask == 0 && table_index == ~0)
+ return clib_error_return (0, "Mask required");
+
+ if (is_add && skip == ~0 && table_index == ~0)
+ return clib_error_return (0, "skip count required");
+
+ if (is_add && match == ~0 && table_index == ~0)
+ return clib_error_return (0, "match count required");
+
+ if (!is_add && table_index == ~0)
+ return clib_error_return (0, "table index required for delete");
+
+ rv = vnet_classify_add_del_table (cm, mask, nbuckets, memory_size,
+ skip, match, next_table_index, miss_next_index, &table_index,
+ current_data_flag, current_data_offset, is_add, del_chain);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_classify_add_del_table returned %d",
+ rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (classify_table, static) = {
+ .path = "classify table",
+ .short_help =
+ "classify table [miss-next|l2-miss_next|acl-miss-next <next_index>]"
+ "\n mask <mask-value> buckets <nn> [skip <n>] [match <n>]"
+ "\n [current-data-flag <n>] [current-data-offset <n>] [table <n>]"
+ "\n [del] [del-chain]",
+ .function = classify_table_command_fn,
+};
+
+static u8 * format_vnet_classify_table (u8 * s, va_list * args)
+{
+ vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *);
+ int verbose = va_arg (*args, int);
+ u32 index = va_arg (*args, u32);
+ vnet_classify_table_t * t;
+
+ if (index == ~0)
+ {
+ s = format (s, "%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl",
+ "NextNode", verbose ? "Details" : "");
+ return s;
+ }
+
+ t = pool_elt_at_index (cm->tables, index);
+ s = format (s, "%10u%10d%10d%10d", index, t->active_elements,
+ t->next_table_index, t->miss_next_index);
+
+ s = format (s, "\n Heap: %U", format_mheap, t->mheap, 0 /*verbose*/);
+
+ s = format (s, "\n nbuckets %d, skip %d match %d flag %d offset %d",
+ t->nbuckets, t->skip_n_vectors, t->match_n_vectors,
+ t->current_data_flag, t->current_data_offset);
+ s = format (s, "\n mask %U", format_hex_bytes, t->mask,
+ t->match_n_vectors * sizeof (u32x4));
+ s = format (s, "\n linear-search buckets %d\n", t->linear_buckets);
+
+ if (verbose == 0)
+ return s;
+
+ s = format (s, "\n%U", format_classify_table, t, verbose);
+
+ return s;
+}
+
+static clib_error_t *
+show_classify_tables_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ vnet_classify_table_t * t;
+ u32 match_index = ~0;
+ u32 * indices = 0;
+ int verbose = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "index %d", &match_index))
+ ;
+ else if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ break;
+ }
+
+ pool_foreach (t, cm->tables,
+ ({
+ if (match_index == ~0 || (match_index == t - cm->tables))
+ vec_add1 (indices, t - cm->tables);
+ }));
+
+ if (vec_len(indices))
+ {
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose,
+ ~0 /* hdr */);
+ for (i = 0; i < vec_len (indices); i++)
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm,
+ verbose, indices[i]);
+ }
+ else
+ vlib_cli_output (vm, "No classifier tables configured");
+
+ vec_free (indices);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_classify_table_command, static) = {
+ .path = "show classify tables",
+ .short_help = "show classify tables [index <nn>]",
+ .function = show_classify_tables_command_fn,
+};
+
+uword unformat_l4_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+
+ u8 * proto_header = 0;
+ int src_port = 0;
+ int dst_port = 0;
+
+ tcpudp_header_t h;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "src_port %d", &src_port))
+ ;
+ else if (unformat (input, "dst_port %d", &dst_port))
+ ;
+ else
+ return 0;
+ }
+
+ h.src_port = clib_host_to_net_u16(src_port);
+ h.dst_port = clib_host_to_net_u16(dst_port);
+ vec_validate(proto_header, sizeof(h)-1);
+ memcpy(proto_header, &h, sizeof(h));
+
+ *matchp = proto_header;
+
+ return 1;
+}
+
+uword unformat_ip4_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ ip4_header_t * ip;
+ int version = 0;
+ u32 version_val;
+ int hdr_length = 0;
+ u32 hdr_length_val;
+ int src = 0, dst = 0;
+ ip4_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int tos = 0;
+ u32 tos_val;
+ int length = 0;
+ u32 length_val;
+ int fragment_id = 0;
+ u32 fragment_id_val;
+ int ttl = 0;
+ int ttl_val;
+ int checksum = 0;
+ u32 checksum_val;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "hdr_length %d", &hdr_length_val))
+ hdr_length = 1;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip4_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "tos %d", &tos_val))
+ tos = 1;
+ else if (unformat (input, "length %d", &length_val))
+ length = 1;
+ else if (unformat (input, "fragment_id %d", &fragment_id_val))
+ fragment_id = 1;
+ else if (unformat (input, "ttl %d", &ttl_val))
+ ttl = 1;
+ else if (unformat (input, "checksum %d", &checksum_val))
+ checksum = 1;
+ else
+ break;
+ }
+
+ if (version + hdr_length + src + dst + proto + tos + length + fragment_id
+ + ttl + checksum == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4));
+
+ ip = (ip4_header_t *) match;
+
+ /* These are realistically matched in practice */
+ if (src)
+ ip->src_address.as_u32 = src_val.as_u32;
+
+ if (dst)
+ ip->dst_address.as_u32 = dst_val.as_u32;
+
+ if (proto)
+ ip->protocol = proto_val;
+
+
+ /* These are not, but they're included for completeness */
+ if (version)
+ ip->ip_version_and_header_length |= (version_val & 0xF)<<4;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= (hdr_length_val & 0xF);
+
+ if (tos)
+ ip->tos = tos_val;
+
+ if (length)
+ ip->length = clib_host_to_net_u16 (length_val);
+
+ if (ttl)
+ ip->ttl = ttl_val;
+
+ if (checksum)
+ ip->checksum = clib_host_to_net_u16 (checksum_val);
+
+ *matchp = match;
+ return 1;
+}
+
+uword unformat_ip6_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ ip6_header_t * ip;
+ int version = 0;
+ u32 version_val;
+ u8 traffic_class = 0;
+ u32 traffic_class_val;
+ u8 flow_label = 0;
+ u8 flow_label_val;
+ int src = 0, dst = 0;
+ ip6_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int payload_length = 0;
+ u32 payload_length_val;
+ int hop_limit = 0;
+ int hop_limit_val;
+ u32 ip_version_traffic_class_and_flow_label;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "traffic_class %d", &traffic_class_val))
+ traffic_class = 1;
+ else if (unformat (input, "flow_label %d", &flow_label_val))
+ flow_label = 1;
+ else if (unformat (input, "src %U", unformat_ip6_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip6_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "payload_length %d", &payload_length_val))
+ payload_length = 1;
+ else if (unformat (input, "hop_limit %d", &hop_limit_val))
+ hop_limit = 1;
+ else
+ break;
+ }
+
+ if (version + traffic_class + flow_label + src + dst + proto +
+ payload_length + hop_limit == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4));
+
+ ip = (ip6_header_t *) match;
+
+ if (src)
+ clib_memcpy (&ip->src_address, &src_val, sizeof (ip->src_address));
+
+ if (dst)
+ clib_memcpy (&ip->dst_address, &dst_val, sizeof (ip->dst_address));
+
+ if (proto)
+ ip->protocol = proto_val;
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= (version_val & 0xF) << 28;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |= (traffic_class_val & 0xFF) << 20;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= (flow_label_val & 0xFFFFF);
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ if (payload_length)
+ ip->payload_length = clib_host_to_net_u16 (payload_length_val);
+
+ if (hop_limit)
+ ip->hop_limit = hop_limit_val;
+
+ *matchp = match;
+ return 1;
+}
+
+uword unformat_l3_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4 %U", unformat_ip4_match, matchp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_match, matchp))
+ return 1;
+ /* $$$$ add mpls */
+ else
+ break;
+ }
+ return 0;
+}
+
+uword unformat_vlan_tag (unformat_input_t * input, va_list * args)
+{
+ u8 * tagp = va_arg (*args, u8 *);
+ u32 tag;
+
+ if (unformat(input, "%d", &tag))
+ {
+ tagp[0] = (tag>>8) & 0x0F;
+ tagp[1] = tag & 0xFF;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword unformat_l2_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ u8 src = 0;
+ u8 src_val[6];
+ u8 dst = 0;
+ u8 dst_val[6];
+ u8 proto = 0;
+ u16 proto_val;
+ u8 tag1 = 0;
+ u8 tag1_val [2];
+ u8 tag2 = 0;
+ u8 tag2_val [2];
+ int len = 14;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u32 cos1_val = 0;
+ u32 cos2_val = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "src %U", unformat_ethernet_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ethernet_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %U",
+ unformat_ethernet_type_host_byte_order, &proto_val))
+ proto = 1;
+ else if (unformat (input, "tag1 %U", unformat_vlan_tag, tag1_val))
+ tag1 = 1;
+ else if (unformat (input, "tag2 %U", unformat_vlan_tag, tag2_val))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1 %d", &cos1_val))
+ cos1 = 1;
+ else if (unformat (input, "cos2 %d", &cos2_val))
+ cos2 = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2)
+ len = 22;
+
+ vec_validate_aligned (match, len-1, sizeof(u32x4));
+
+ if (dst)
+ clib_memcpy (match, dst_val, 6);
+
+ if (src)
+ clib_memcpy (match + 6, src_val, 6);
+
+ if (tag2)
+ {
+ /* inner vlan tag */
+ match[19] = tag2_val[1];
+ match[18] = tag2_val[0];
+ if (cos2)
+ match [18] |= (cos2_val & 0x7) << 5;
+ if (proto)
+ {
+ match[21] = proto_val & 0xff;
+ match[20] = proto_val >> 8;
+ }
+ if (tag1)
+ {
+ match [15] = tag1_val[1];
+ match [14] = tag1_val[0];
+ }
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+ *matchp = match;
+ return 1;
+ }
+ if (tag1)
+ {
+ match [15] = tag1_val[1];
+ match [14] = tag1_val[0];
+ if (proto)
+ {
+ match[17] = proto_val & 0xff;
+ match[16] = proto_val >> 8;
+ }
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+
+ *matchp = match;
+ return 1;
+ }
+ if (cos2)
+ match [18] |= (cos2_val & 0x7) << 5;
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+ if (proto)
+ {
+ match[13] = proto_val & 0xff;
+ match[12] = proto_val >> 8;
+ }
+
+ *matchp = match;
+ return 1;
+}
+
+
+uword unformat_classify_match (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *);
+ u8 ** matchp = va_arg (*args, u8 **);
+ u32 table_index = va_arg (*args, u32);
+ vnet_classify_table_t * t;
+
+ u8 * match = 0;
+ u8 * l2 = 0;
+ u8 * l3 = 0;
+ u8 * l4 = 0;
+
+ if (pool_is_free_index (cm->tables, table_index))
+ return 0;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "hex %U", unformat_hex_string, &match))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_match, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_match, &l3))
+ ;
+ else if (unformat (input, "l4 %U", unformat_l4_match, &l4))
+ ;
+ else
+ break;
+ }
+
+ if (l4 && !l3) {
+ vec_free (match);
+ vec_free (l2);
+ vec_free (l4);
+ return 0;
+ }
+
+ if (match || l2 || l3 || l4)
+ {
+ if (l2 || l3 || l4)
+ {
+ /* "Win a free Ethernet header in every packet" */
+ if (l2 == 0)
+ vec_validate_aligned (l2, 13, sizeof(u32x4));
+ match = l2;
+ if (l3)
+ {
+ vec_append_aligned (match, l3, sizeof(u32x4));
+ vec_free (l3);
+ }
+ if (l4)
+ {
+ vec_append_aligned (match, l4, sizeof(u32x4));
+ vec_free (l4);
+ }
+ }
+
+ /* Make sure the vector is big enough even if key is all 0's */
+ vec_validate_aligned
+ (match, ((t->match_n_vectors + t->skip_n_vectors) * sizeof(u32x4)) - 1,
+ sizeof(u32x4));
+
+ /* Set size, include skipped vectors*/
+ _vec_len (match) = (t->match_n_vectors+t->skip_n_vectors) * sizeof(u32x4);
+
+ *matchp = match;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ u8 action,
+ u32 metadata,
+ int is_add)
+{
+ vnet_classify_table_t * t;
+ vnet_classify_entry_5_t _max_e __attribute__((aligned (16)));
+ vnet_classify_entry_t * e;
+ int i, rv;
+
+ if (pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+
+ e = (vnet_classify_entry_t *)&_max_e;
+ e->next_index = hit_next_index;
+ e->opaque_index = opaque_index;
+ e->advance = advance;
+ e->hits = 0;
+ e->last_heard = 0;
+ e->flags = 0;
+ e->action = action;
+ if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX)
+ e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ metadata,
+ FIB_SOURCE_CLASSIFY);
+ else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
+ metadata,
+ FIB_SOURCE_CLASSIFY);
+ else
+ e->metadata = 0;
+
+ /* Copy key data, honoring skip_n_vectors */
+ clib_memcpy (&e->key, match + t->skip_n_vectors * sizeof (u32x4),
+ t->match_n_vectors * sizeof (u32x4));
+
+ /* Clear don't-care bits; likely when dynamically creating sessions */
+ for (i = 0; i < t->match_n_vectors; i++)
+ e->key[i] &= t->mask[i];
+
+ rv = vnet_classify_add_del (t, e, is_add);
+
+ vnet_classify_entry_release_resource(e);
+
+ if (rv)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return 0;
+}
+
+static clib_error_t *
+classify_session_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ int is_add = 1;
+ u32 table_index = ~0;
+ u32 hit_next_index = ~0;
+ u64 opaque_index = ~0;
+ u8 * match = 0;
+ i32 advance = 0;
+ u32 action = 0;
+ u32 metadata = 0;
+ int i, rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "hit-next %U", unformat_ip_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "l2-input-hit-next %U", unformat_l2_input_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "l2-output-hit-next %U", unformat_l2_output_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "acl-hit-next %U", unformat_acl_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "policer-hit-next %U",
+ unformat_policer_next_index, &hit_next_index))
+ ;
+ else if (unformat (input, "opaque-index %lld", &opaque_index))
+ ;
+ else if (unformat (input, "match %U", unformat_classify_match,
+ cm, &match, table_index))
+ ;
+ else if (unformat (input, "advance %d", &advance))
+ ;
+ else if (unformat (input, "table-index %d", &table_index))
+ ;
+ else if (unformat (input, "action set-ip4-fib-id %d", &metadata))
+ action = 1;
+ else if (unformat (input, "action set-ip6-fib-id %d", &metadata))
+ action = 2;
+ else
+ {
+ /* Try registered opaque-index unformat fns */
+ for (i = 0; i < vec_len (cm->unformat_opaque_index_fns); i++)
+ {
+ if (unformat (input, "%U", cm->unformat_opaque_index_fns[i],
+ &opaque_index))
+ goto found_opaque;
+ }
+ break;
+ }
+ found_opaque:
+ ;
+ }
+
+ if (table_index == ~0)
+ return clib_error_return (0, "Table index required");
+
+ if (is_add && match == 0)
+ return clib_error_return (0, "Match value required");
+
+ rv = vnet_classify_add_del_session (cm, table_index, match,
+ hit_next_index,
+ opaque_index, advance,
+ action, metadata, is_add);
+
+ switch(rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_classify_add_del_session returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (classify_session_command, static) = {
+ .path = "classify session",
+ .short_help =
+ "classify session [hit-next|l2-hit-next|"
+ "acl-hit-next <next_index>|policer-hit-next <policer_name>]"
+ "\n table-index <nn> match [hex] [l2] [l3 ip4] [opaque-index <index>]"
+ "\n [action set-ip4-fib-id <n>] [action set-ip6-fib-id <n>] [del]",
+ .function = classify_session_command_fn,
+};
+
+static uword
+unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ u64 * opaquep = va_arg (*args, u64 *);
+ u32 sw_if_index;
+
+ if (unformat (input, "opaque-sw_if_index %U", unformat_vnet_sw_interface,
+ vnet_get_main(), &sw_if_index))
+ {
+ *opaquep = sw_if_index;
+ return 1;
+ }
+ return 0;
+}
+
+static uword
+unformat_ip_next_node (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * next_indexp = va_arg (*args, u32 *);
+ u32 node_index;
+ u32 next_index = ~0;
+
+ if (unformat (input, "ip6-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next (cm->vlib_main,
+ ip6_classify_node.index, node_index);
+ }
+ else if (unformat (input, "ip4-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next (cm->vlib_main,
+ ip4_classify_node.index, node_index);
+ }
+ else
+ return 0;
+
+ *next_indexp = next_index;
+ return 1;
+}
+
+static uword
+unformat_acl_next_node (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * next_indexp = va_arg (*args, u32 *);
+ u32 node_index;
+ u32 next_index;
+
+ if (unformat (input, "ip6-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next (cm->vlib_main,
+ ip6_inacl_node.index, node_index);
+ }
+ else if (unformat (input, "ip4-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next (cm->vlib_main,
+ ip4_inacl_node.index, node_index);
+ }
+ else
+ return 0;
+
+ *next_indexp = next_index;
+ return 1;
+}
+
+static uword
+unformat_l2_input_next_node (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * next_indexp = va_arg (*args, u32 *);
+ u32 node_index;
+ u32 next_index;
+
+ if (unformat (input, "input-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next
+ (cm->vlib_main, l2_input_classify_node.index, node_index);
+
+ *next_indexp = next_index;
+ return 1;
+ }
+ return 0;
+}
+
+static uword
+unformat_l2_output_next_node (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 * next_indexp = va_arg (*args, u32 *);
+ u32 node_index;
+ u32 next_index;
+
+ if (unformat (input, "output-node %U", unformat_vlib_node,
+ cm->vlib_main, &node_index))
+ {
+ next_index = vlib_node_add_next
+ (cm->vlib_main, l2_output_classify_node.index, node_index);
+
+ *next_indexp = next_index;
+ return 1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+vnet_classify_init (vlib_main_t * vm)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main();
+
+ vnet_classify_register_unformat_opaque_index_fn
+ (unformat_opaque_sw_if_index);
+
+ vnet_classify_register_unformat_ip_next_index_fn
+ (unformat_ip_next_node);
+
+ vnet_classify_register_unformat_l2_next_index_fn
+ (unformat_l2_input_next_node);
+
+ vnet_classify_register_unformat_l2_next_index_fn
+ (unformat_l2_output_next_node);
+
+ vnet_classify_register_unformat_acl_next_index_fn
+ (unformat_acl_next_node);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_classify_init);
+
+#define TEST_CODE 1
+
+#if TEST_CODE > 0
+
+typedef struct
+{
+ ip4_address_t addr;
+ int in_table;
+} test_entry_t;
+
+typedef struct
+{
+ test_entry_t *entries;
+
+ /* test parameters */
+ u32 buckets;
+ u32 sessions;
+ u32 iterations;
+ u32 memory_size;
+ ip4_address_t src;
+ vnet_classify_table_t *table;
+ u32 table_index;
+ int verbose;
+
+ /* Random seed */
+ u32 seed;
+
+ /* Test data */
+ classify_data_or_mask_t * mask;
+ classify_data_or_mask_t * data;
+
+ /* convenience */
+ vnet_classify_main_t *classify_main;
+ vlib_main_t *vlib_main;
+
+} test_classify_main_t;
+
+static test_classify_main_t test_classify_main;
+
+static clib_error_t *
+test_classify_churn (test_classify_main_t *tm)
+{
+ classify_data_or_mask_t *mask, *data;
+ vlib_main_t *vm = tm->vlib_main;
+ test_entry_t *ep;
+ u8 *mp = 0, *dp = 0;
+ u32 tmp;
+ int i, rv;
+
+ vec_validate_aligned (mp, 3 * sizeof(u32x4), sizeof(u32x4));
+ vec_validate_aligned (dp, 3 * sizeof(u32x4), sizeof(u32x4));
+
+ mask = (classify_data_or_mask_t *) mp;
+ data = (classify_data_or_mask_t *) dp;
+
+ /* Mask on src address */
+ memset (&mask->ip.src_address, 0xff, 4);
+
+ tmp = clib_host_to_net_u32 (tm->src.as_u32);
+
+ for (i = 0; i < tm->sessions; i++)
+ {
+ vec_add2 (tm->entries, ep, 1);
+ ep->addr.as_u32 = clib_host_to_net_u32 (tmp);
+ ep->in_table = 0;
+ tmp++;
+ }
+
+ tm->table = vnet_classify_new_table (tm->classify_main,
+ (u8 *)mask,
+ tm->buckets,
+ tm->memory_size,
+ 0 /* skip */,
+ 3 /* vectors to match */);
+ tm->table->miss_next_index = IP_LOOKUP_NEXT_DROP;
+ tm->table_index = tm->table - tm->classify_main->tables;
+ vlib_cli_output (vm, "Created table %d, buckets %d",
+ tm->table_index, tm->buckets);
+
+ vlib_cli_output (vm, "Initialize: add %d (approx. half of %d sessions)...",
+ tm->sessions/2, tm->sessions);
+
+ for (i = 0; i < tm->sessions/2; i++)
+ {
+ ep = vec_elt_at_index (tm->entries, i);
+
+ data->ip.src_address.as_u32 = ep->addr.as_u32;
+ ep->in_table = 1;
+
+ rv = vnet_classify_add_del_session (tm->classify_main,
+ tm->table_index,
+ (u8 *) data,
+ IP_LOOKUP_NEXT_DROP,
+ i /* opaque_index */,
+ 0 /* advance */,
+ 0 /* action*/,
+ 0 /* metadata */,
+ 1 /* is_add */);
+
+ if (rv != 0)
+ clib_warning ("add: returned %d", rv);
+
+ if (tm->verbose)
+ vlib_cli_output (vm, "add: %U", format_ip4_address,
+ &ep->addr.as_u32);
+ }
+
+ vlib_cli_output (vm, "Execute %d random add/delete operations",
+ tm->iterations);
+
+ for (i = 0; i < tm->iterations; i++)
+ {
+ int index, is_add;
+
+ /* Pick a random entry */
+ index = random_u32 (&tm->seed) % tm->sessions;
+
+ ep = vec_elt_at_index (tm->entries, index);
+
+ data->ip.src_address.as_u32 = ep->addr.as_u32;
+
+ /* If it's in the table, remove it. Else, add it */
+ is_add = !ep->in_table;
+
+ if (tm->verbose)
+ vlib_cli_output (vm, "%s: %U",
+ is_add ? "add" : "del",
+ format_ip4_address,
+ &ep->addr.as_u32);
+
+ rv = vnet_classify_add_del_session (tm->classify_main,
+ tm->table_index,
+ (u8 *) data,
+ IP_LOOKUP_NEXT_DROP,
+ i /* opaque_index */,
+ 0 /* advance */,
+ 0 /* action*/,
+ 0 /* metadata */,
+ is_add);
+ if (rv != 0)
+ vlib_cli_output (vm,
+ "%s[%d]: %U returned %d", is_add ? "add" : "del",
+ index,
+ format_ip4_address,
+ &ep->addr.as_u32, rv);
+ else
+ ep->in_table = is_add;
+ }
+
+ vlib_cli_output (vm, "Remove remaining %d entries from the table",
+ tm->table->active_elements);
+
+ for (i = 0; i < tm->sessions; i++)
+ {
+ u8 * key_minus_skip;
+ u64 hash;
+ vnet_classify_entry_t * e;
+
+ ep = tm->entries + i;
+ if (ep->in_table == 0)
+ continue;
+
+ data->ip.src_address.as_u32 = ep->addr.as_u32;
+
+ hash = vnet_classify_hash_packet (tm->table, (u8 *) data);
+
+ e = vnet_classify_find_entry (tm->table,
+ (u8 *) data, hash, 0 /* time_now */);
+ if (e == 0)
+ {
+ clib_warning ("Couldn't find %U index %d which should be present",
+ format_ip4_address, ep->addr, i);
+ continue;
+ }
+
+ key_minus_skip = (u8 *)e->key;
+ key_minus_skip -= tm->table->skip_n_vectors * sizeof (u32x4);
+
+ rv = vnet_classify_add_del_session
+ (tm->classify_main,
+ tm->table_index,
+ key_minus_skip,
+ IP_LOOKUP_NEXT_DROP,
+ i /* opaque_index */,
+ 0 /* advance */, 0, 0,
+ 0 /* is_add */);
+
+ if (rv != 0)
+ clib_warning ("del: returned %d", rv);
+
+ if (tm->verbose)
+ vlib_cli_output (vm, "del: %U", format_ip4_address,
+ &ep->addr.as_u32);
+ }
+
+ vlib_cli_output (vm, "%d entries remain, MUST be zero",
+ tm->table->active_elements);
+
+ vlib_cli_output (vm, "Table after cleanup: \n%U\n",
+ format_classify_table, tm->table, 0 /* verbose */);
+
+ vec_free (mp);
+ vec_free (dp);
+
+ vnet_classify_delete_table_index (tm->classify_main,
+ tm->table_index, 1 /* del_chain */);
+ tm->table = 0;
+ tm->table_index = ~0;
+ vec_free(tm->entries);
+
+ return 0;
+}
+
+static clib_error_t *
+test_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ test_classify_main_t *tm = &test_classify_main;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ u32 tmp;
+ int which = 0;
+ clib_error_t * error = 0;
+
+ tm->buckets = 1024;
+ tm->sessions = 8192;
+ tm->iterations = 8192;
+ tm->memory_size = 64<<20;
+ tm->src.as_u32 = clib_net_to_host_u32 (0x0100000A);
+ tm->table = 0;
+ tm->seed = 0xDEADDABE;
+ tm->classify_main = cm;
+ tm->vlib_main = vm;
+ tm->verbose = 0;
+
+ /* Default starting address 1.0.0.10 */
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "sessions %d", &tmp))
+ tm->sessions = tmp;
+ else if (unformat (input, "src %U", unformat_ip4_address, &tm->src.as_u32))
+ ;
+ else if (unformat (input, "buckets %d", &tm->buckets))
+ ;
+ else if (unformat (input, "memory-size %uM", &tmp))
+ tm->memory_size = tmp<<20;
+ else if (unformat (input, "memory-size %uG", &tmp))
+ tm->memory_size = tmp<<30;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "verbose"))
+ tm->verbose = 1;
+
+ else if (unformat (input, "iterations %d", &tm->iterations))
+ ;
+ else if (unformat (input, "churn-test"))
+ which = 0;
+ else
+ break;
+ }
+
+ switch (which)
+ {
+ case 0:
+ error = test_classify_churn (tm);
+ break;
+ default:
+ error = clib_error_return (0, "No such test");
+ break;
+ }
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (test_classify_command, static) = {
+ .path = "test classify",
+ .short_help =
+ "test classify [src <ip>] [sessions <nn>] [buckets <nn>] [seed <nnn>]\n"
+ " [memory-size <nn>[M|G]]\n"
+ " [churn-test]",
+ .function = test_classify_command_fn,
+};
+#endif /* TEST_CODE */
diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h
new file mode 100644
index 00000000..1eb5b14d
--- /dev/null
+++ b/src/vnet/classify/vnet_classify.h
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vnet_classify_h__
+#define __included_vnet_classify_h__
+
+#include <stdarg.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/api_errno.h> /* for API error numbers */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/xxhash.h>
+
+extern vlib_node_registration_t ip4_classify_node;
+extern vlib_node_registration_t ip6_classify_node;
+
+#define CLASSIFY_TRACE 0
+
+#if !defined( __aarch64__) && !defined(__arm__)
+#define CLASSIFY_USE_SSE //Allow usage of SSE operations
+#endif
+
+#define U32X4_ALIGNED(p) PREDICT_TRUE((((intptr_t)p) & 0xf) == 0)
+
+/*
+ * Classify table option to process packets
+ * CLASSIFY_FLAG_USE_CURR_DATA:
+ * - classify packets starting from VPP node’s current data pointer
+ */
+#define CLASSIFY_FLAG_USE_CURR_DATA 1
+
+/*
+ * Classify session action
+ * CLASSIFY_ACTION_SET_IP4_FIB_INDEX:
+ * - Classified IP packets will be looked up
+ * from the specified ipv4 fib table
+ * CLASSIFY_ACTION_SET_IP6_FIB_INDEX:
+ * - Classified IP packets will be looked up
+ * from the specified ipv6 fib table
+ */
+typedef enum vnet_classify_action_t_
+{
+ CLASSIFY_ACTION_SET_IP4_FIB_INDEX = 1,
+ CLASSIFY_ACTION_SET_IP6_FIB_INDEX = 2,
+} __attribute__ ((packed)) vnet_classify_action_t;
+
+struct _vnet_classify_main;
+typedef struct _vnet_classify_main vnet_classify_main_t;
+
+#define foreach_size_in_u32x4 \
+_(1) \
+_(2) \
+_(3) \
+_(4) \
+_(5)
+
+typedef CLIB_PACKED(struct _vnet_classify_entry {
+ /* Graph node next index */
+ u32 next_index;
+
+ /* put into vnet_buffer(b)->l2_classfy.opaque_index */
+ union {
+ struct {
+ u32 opaque_index;
+ /* advance on hit, note it's a signed quantity... */
+ i32 advance;
+ };
+ u64 opaque_count;
+ };
+
+ /* Really only need 1 bit */
+ u8 flags;
+#define VNET_CLASSIFY_ENTRY_FREE (1<<0)
+
+ vnet_classify_action_t action;
+ u16 metadata;
+
+ /* Hit counter, last heard time */
+ union {
+ u64 hits;
+ struct _vnet_classify_entry * next_free;
+ };
+
+ f64 last_heard;
+
+ /* Must be aligned to a 16-octet boundary */
+ u32x4 key[0];
+}) vnet_classify_entry_t;
+
+static inline int vnet_classify_entry_is_free (vnet_classify_entry_t * e)
+{
+ return e->flags & VNET_CLASSIFY_ENTRY_FREE;
+}
+
+static inline int vnet_classify_entry_is_busy (vnet_classify_entry_t * e)
+{
+ return ((e->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
+}
+
+/* Need these to con the vector allocator */
+#define _(size) \
+typedef CLIB_PACKED(struct { \
+ u32 pad0[4]; \
+ u64 pad1[2]; \
+ u32x4 key[size]; \
+}) vnet_classify_entry_##size##_t;
+foreach_size_in_u32x4;
+#undef _
+
+typedef struct {
+ union {
+ struct {
+ u32 offset;
+ u8 linear_search;
+ u8 pad[2];
+ u8 log2_pages;
+ };
+ u64 as_u64;
+ };
+} vnet_classify_bucket_t;
+
+typedef struct {
+ /* Mask to apply after skipping N vectors */
+ u32x4 *mask;
+ /* Buckets and entries */
+ vnet_classify_bucket_t * buckets;
+ vnet_classify_entry_t * entries;
+
+ /* Config parameters */
+ u32 match_n_vectors;
+ u32 skip_n_vectors;
+ u32 nbuckets;
+ u32 log2_nbuckets;
+ u32 linear_buckets;
+ int entries_per_page;
+ u32 active_elements;
+ u32 current_data_flag;
+ int current_data_offset;
+ u32 data_offset;
+ /* Index of next table to try */
+ u32 next_table_index;
+
+ /* Miss next index, return if next_table_index = 0 */
+ u32 miss_next_index;
+
+ /* Per-bucket working copies, one per thread */
+ vnet_classify_entry_t ** working_copies;
+ int *working_copy_lengths;
+ vnet_classify_bucket_t saved_bucket;
+
+ /* Free entry freelists */
+ vnet_classify_entry_t **freelists;
+
+ u8 * name;
+
+ /* Private allocation arena, protected by the writer lock */
+ void * mheap;
+
+ /* Writer (only) lock for this table */
+ volatile u32 * writer_lock;
+
+} vnet_classify_table_t;
+
+struct _vnet_classify_main {
+ /* Table pool */
+ vnet_classify_table_t * tables;
+
+ /* Registered next-index, opaque unformat fcns */
+ unformat_function_t ** unformat_l2_next_index_fns;
+ unformat_function_t ** unformat_ip_next_index_fns;
+ unformat_function_t ** unformat_acl_next_index_fns;
+ unformat_function_t ** unformat_policer_next_index_fns;
+ unformat_function_t ** unformat_opaque_index_fns;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+};
+
+extern vnet_classify_main_t vnet_classify_main;
+
+u8 * format_classify_table (u8 * s, va_list * args);
+
+u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h);
+
+static inline u64
+vnet_classify_hash_packet_inline (vnet_classify_table_t * t,
+ u8 * h)
+{
+ u32x4 *mask;
+
+ union {
+ u32x4 as_u32x4;
+ u64 as_u64[2];
+ } xor_sum __attribute__((aligned(sizeof(u32x4))));
+
+ ASSERT(t);
+ mask = t->mask;
+#ifdef CLASSIFY_USE_SSE
+ if (U32X4_ALIGNED(h)) { //SSE can't handle unaligned data
+ u32x4 *data = (u32x4 *)h;
+ xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0];
+ switch (t->match_n_vectors)
+ {
+ case 5:
+ xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4];
+ /* FALLTHROUGH */
+ case 4:
+ xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3];
+ /* FALLTHROUGH */
+ case 3:
+ xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2];
+ /* FALLTHROUGH */
+ case 2:
+ xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ abort();
+ }
+ } else
+#endif /* CLASSIFY_USE_SSE */
+ {
+ u32 skip_u64 = t->skip_n_vectors * 2;
+ u64 *data64 = (u64 *)h;
+ xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *)mask)[0];
+ xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *)mask)[1];
+ switch (t->match_n_vectors)
+ {
+ case 5:
+ xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *)mask)[8];
+ xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *)mask)[9];
+ /* FALLTHROUGH */
+ case 4:
+ xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *)mask)[6];
+ xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *)mask)[7];
+ /* FALLTHROUGH */
+ case 3:
+ xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *)mask)[4];
+ xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *)mask)[5];
+ /* FALLTHROUGH */
+ case 2:
+ xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *)mask)[2];
+ xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *)mask)[3];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+
+ default:
+ abort();
+ }
+ }
+
+ return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]);
+}
+
+static inline void
+vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash)
+{
+ u32 bucket_index;
+
+ ASSERT (is_pow2(t->nbuckets));
+
+ bucket_index = hash & (t->nbuckets - 1);
+
+ CLIB_PREFETCH(&t->buckets[bucket_index], CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+static inline vnet_classify_entry_t *
+vnet_classify_get_entry (vnet_classify_table_t * t, uword offset)
+{
+ u8 * hp = t->mheap;
+ u8 * vp = hp + offset;
+
+ return (void *) vp;
+}
+
+static inline uword vnet_classify_get_offset (vnet_classify_table_t * t,
+ vnet_classify_entry_t * v)
+{
+ u8 * hp, * vp;
+
+ hp = (u8 *) t->mheap;
+ vp = (u8 *) v;
+
+ ASSERT((vp - hp) < 0x100000000ULL);
+ return vp - hp;
+}
+
+static inline vnet_classify_entry_t *
+vnet_classify_entry_at_index (vnet_classify_table_t * t,
+ vnet_classify_entry_t * e,
+ u32 index)
+{
+ u8 * eu8;
+
+ eu8 = (u8 *)e;
+
+ eu8 += index * (sizeof (vnet_classify_entry_t) +
+ (t->match_n_vectors * sizeof (u32x4)));
+
+ return (vnet_classify_entry_t *) eu8;
+}
+
+static inline void
+vnet_classify_prefetch_entry (vnet_classify_table_t * t,
+ u64 hash)
+{
+ u32 bucket_index;
+ u32 value_index;
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * e;
+
+ bucket_index = hash & (t->nbuckets - 1);
+
+ b = &t->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return;
+
+ hash >>= t->log2_nbuckets;
+
+ e = vnet_classify_get_entry (t, b->offset);
+ value_index = hash & ((1<<b->log2_pages)-1);
+
+ e = vnet_classify_entry_at_index (t, e, value_index);
+
+ CLIB_PREFETCH(e, CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+vnet_classify_entry_t *
+vnet_classify_find_entry (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now);
+
+static inline vnet_classify_entry_t *
+vnet_classify_find_entry_inline (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now)
+{
+ vnet_classify_entry_t * v;
+ u32x4 *mask, *key;
+ union {
+ u32x4 as_u32x4;
+ u64 as_u64[2];
+ } result __attribute__((aligned(sizeof(u32x4))));
+ vnet_classify_bucket_t * b;
+ u32 value_index;
+ u32 bucket_index;
+ u32 limit;
+ int i;
+
+ bucket_index = hash & (t->nbuckets-1);
+ b = &t->buckets[bucket_index];
+ mask = t->mask;
+
+ if (b->offset == 0)
+ return 0;
+
+ hash >>= t->log2_nbuckets;
+
+ v = vnet_classify_get_entry (t, b->offset);
+ value_index = hash & ((1<<b->log2_pages)-1);
+ limit = t->entries_per_page;
+ if (PREDICT_FALSE (b->linear_search))
+ {
+ value_index = 0;
+ limit *= (1<<b->log2_pages);
+ }
+
+ v = vnet_classify_entry_at_index (t, v, value_index);
+
+#ifdef CLASSIFY_USE_SSE
+ if (U32X4_ALIGNED(h)) {
+ u32x4 *data = (u32x4 *) h;
+ for (i = 0; i < limit; i++) {
+ key = v->key;
+ result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ switch (t->match_n_vectors)
+ {
+ case 5:
+ result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4];
+ /* FALLTHROUGH */
+ case 4:
+ result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3];
+ /* FALLTHROUGH */
+ case 3:
+ result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2];
+ /* FALLTHROUGH */
+ case 2:
+ result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ abort();
+ }
+
+ if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) {
+ if (PREDICT_TRUE(now)) {
+ v->hits++;
+ v->last_heard = now;
+ }
+ return (v);
+ }
+ v = vnet_classify_entry_at_index (t, v, 1);
+ }
+ } else
+#endif /* CLASSIFY_USE_SSE */
+ {
+ u32 skip_u64 = t->skip_n_vectors * 2;
+ u64 *data64 = (u64 *)h;
+ for (i = 0; i < limit; i++) {
+ key = v->key;
+
+ result.as_u64[0] = (data64[0 + skip_u64] & ((u64 *)mask)[0]) ^ ((u64 *)key)[0];
+ result.as_u64[1] = (data64[1 + skip_u64] & ((u64 *)mask)[1]) ^ ((u64 *)key)[1];
+ switch (t->match_n_vectors)
+ {
+ case 5:
+ result.as_u64[0] |= (data64[8 + skip_u64] & ((u64 *)mask)[8]) ^ ((u64 *)key)[8];
+ result.as_u64[1] |= (data64[9 + skip_u64] & ((u64 *)mask)[9]) ^ ((u64 *)key)[9];
+ /* FALLTHROUGH */
+ case 4:
+ result.as_u64[0] |= (data64[6 + skip_u64] & ((u64 *)mask)[6]) ^ ((u64 *)key)[6];
+ result.as_u64[1] |= (data64[7 + skip_u64] & ((u64 *)mask)[7]) ^ ((u64 *)key)[7];
+ /* FALLTHROUGH */
+ case 3:
+ result.as_u64[0] |= (data64[4 + skip_u64] & ((u64 *)mask)[4]) ^ ((u64 *)key)[4];
+ result.as_u64[1] |= (data64[5 + skip_u64] & ((u64 *)mask)[5]) ^ ((u64 *)key)[5];
+ /* FALLTHROUGH */
+ case 2:
+ result.as_u64[0] |= (data64[2 + skip_u64] & ((u64 *)mask)[2]) ^ ((u64 *)key)[2];
+ result.as_u64[1] |= (data64[3 + skip_u64] & ((u64 *)mask)[3]) ^ ((u64 *)key)[3];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ abort();
+ }
+
+ if (result.as_u64[0] == 0 && result.as_u64[1] == 0) {
+ if (PREDICT_TRUE(now)) {
+ v->hits++;
+ v->last_heard = now;
+ }
+ return (v);
+ }
+
+ v = vnet_classify_entry_at_index (t, v, 1);
+ }
+ }
+ return 0;
+}
+
+vnet_classify_table_t *
+vnet_classify_new_table (vnet_classify_main_t *cm,
+ u8 * mask, u32 nbuckets, u32 memory_size,
+ u32 skip_n_vectors,
+ u32 match_n_vectors);
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ u8 action,
+ u32 metadata,
+ int is_add);
+
+int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ u8 current_data_flag,
+ i16 current_data_offset,
+ int is_add,
+ int del_chain);
+
+unformat_function_t unformat_ip4_mask;
+unformat_function_t unformat_ip6_mask;
+unformat_function_t unformat_l3_mask;
+unformat_function_t unformat_l2_mask;
+unformat_function_t unformat_classify_mask;
+unformat_function_t unformat_l2_next_index;
+unformat_function_t unformat_ip_next_index;
+unformat_function_t unformat_ip4_match;
+unformat_function_t unformat_ip6_match;
+unformat_function_t unformat_l3_match;
+unformat_function_t unformat_l4_match;
+unformat_function_t unformat_vlan_tag;
+unformat_function_t unformat_l2_match;
+unformat_function_t unformat_classify_match;
+
+void vnet_classify_register_unformat_ip_next_index_fn
+(unformat_function_t * fn);
+
+void vnet_classify_register_unformat_l2_next_index_fn
+(unformat_function_t * fn);
+
+void vnet_classify_register_unformat_acl_next_index_fn
+(unformat_function_t * fn);
+
+void vnet_classify_register_unformat_policer_next_index_fn
+(unformat_function_t * fn);
+
+void vnet_classify_register_unformat_opaque_index_fn (unformat_function_t * fn);
+
+#endif /* __included_vnet_classify_h__ */
diff --git a/src/vnet/config.c b/src/vnet/config.c
new file mode 100644
index 00000000..03189d77
--- /dev/null
+++ b/src/vnet/config.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * config.c: feature configuration
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+
+static vnet_config_feature_t *
+duplicate_feature_vector (vnet_config_feature_t * feature_vector)
+{
+ vnet_config_feature_t *result, *f;
+
+ result = vec_dup (feature_vector);
+ vec_foreach (f, result) f->feature_config = vec_dup (f->feature_config);
+
+ return result;
+}
+
+static void
+free_feature_vector (vnet_config_feature_t * feature_vector)
+{
+ vnet_config_feature_t *f;
+
+ vec_foreach (f, feature_vector) vnet_config_feature_free (f);
+ vec_free (feature_vector);
+}
+
+static u32
+add_next (vlib_main_t * vm,
+ vnet_config_main_t * cm, u32 last_node_index, u32 this_node_index)
+{
+ u32 i, ni = ~0;
+
+ if (last_node_index != ~0)
+ return vlib_node_add_next (vm, last_node_index, this_node_index);
+
+ for (i = 0; i < vec_len (cm->start_node_indices); i++)
+ {
+ u32 tmp;
+ tmp =
+ vlib_node_add_next (vm, cm->start_node_indices[i], this_node_index);
+ if (ni == ~0)
+ ni = tmp;
+ /* Start nodes to first must agree on next indices. */
+ ASSERT (ni == tmp);
+ }
+
+ return ni;
+}
+
+static vnet_config_t *
+find_config_with_features (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ vnet_config_feature_t * feature_vector)
+{
+ u32 last_node_index = ~0;
+ vnet_config_feature_t *f;
+ u32 *config_string;
+ uword *p;
+ vnet_config_t *c;
+
+ config_string = cm->config_string_temp;
+ cm->config_string_temp = 0;
+ if (config_string)
+ _vec_len (config_string) = 0;
+
+ vec_foreach (f, feature_vector)
+ {
+ /* Connect node graph. */
+ f->next_index = add_next (vm, cm, last_node_index, f->node_index);
+ last_node_index = f->node_index;
+
+ /* Store next index in config string. */
+ vec_add1 (config_string, f->next_index);
+
+ /* Store feature config. */
+ vec_add (config_string, f->feature_config, vec_len (f->feature_config));
+ }
+
+ /* Terminate config string with next for end node. */
+ if (last_node_index == ~0 || last_node_index != cm->end_node_index)
+ {
+ u32 next_index = add_next (vm, cm, last_node_index, cm->end_node_index);
+ vec_add1 (config_string, next_index);
+ }
+
+ /* See if config string is unique. */
+ p = hash_get_mem (cm->config_string_hash, config_string);
+ if (p)
+ {
+ /* Not unique. Share existing config. */
+ cm->config_string_temp = config_string; /* we'll use it again later. */
+ free_feature_vector (feature_vector);
+ c = pool_elt_at_index (cm->config_pool, p[0]);
+ }
+ else
+ {
+ u32 *d;
+
+ pool_get (cm->config_pool, c);
+ c->index = c - cm->config_pool;
+ c->features = feature_vector;
+ c->config_string_vector = config_string;
+
+ /* Allocate copy of config string in heap.
+ VLIB buffers will maintain pointers to heap as they read out
+ configuration data. */
+ c->config_string_heap_index
+ = heap_alloc (cm->config_string_heap, vec_len (config_string) + 1,
+ c->config_string_heap_handle);
+
+ /* First element in heap points back to pool index. */
+ d =
+ vec_elt_at_index (cm->config_string_heap,
+ c->config_string_heap_index);
+ d[0] = c->index;
+ clib_memcpy (d + 1, config_string, vec_bytes (config_string));
+ hash_set_mem (cm->config_string_hash, config_string, c->index);
+
+ c->reference_count = 0; /* will be incremented by caller. */
+ }
+
+ return c;
+}
+
+void
+vnet_config_init (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ char *start_node_names[],
+ int n_start_node_names,
+ char *feature_node_names[], int n_feature_node_names)
+{
+ vlib_node_t *n;
+ u32 i;
+
+ memset (cm, 0, sizeof (cm[0]));
+
+ cm->config_string_hash =
+ hash_create_vec (0,
+ STRUCT_SIZE_OF (vnet_config_t, config_string_vector[0]),
+ sizeof (uword));
+
+ ASSERT (n_feature_node_names >= 1);
+
+ vec_resize (cm->start_node_indices, n_start_node_names);
+ for (i = 0; i < n_start_node_names; i++)
+ {
+ n = vlib_get_node_by_name (vm, (u8 *) start_node_names[i]);
+ /* Given node name must exist. */
+ ASSERT (n != 0);
+ cm->start_node_indices[i] = n->index;
+ }
+
+ vec_resize (cm->node_index_by_feature_index, n_feature_node_names);
+ for (i = 0; i < n_feature_node_names; i++)
+ {
+ if (!feature_node_names[i])
+ cm->node_index_by_feature_index[i] = ~0;
+ else
+ {
+ n = vlib_get_node_by_name (vm, (u8 *) feature_node_names[i]);
+ /* Given node may exist in plug-in library which is not present */
+ if (n)
+ {
+ if (i + 1 == n_feature_node_names)
+ cm->end_node_index = n->index;
+ cm->node_index_by_feature_index[i] = n->index;
+ }
+ else
+ cm->node_index_by_feature_index[i] = ~0;
+ }
+ }
+}
+
+static void
+remove_reference (vnet_config_main_t * cm, vnet_config_t * c)
+{
+ ASSERT (c->reference_count > 0);
+ c->reference_count -= 1;
+ if (c->reference_count == 0)
+ {
+ hash_unset (cm->config_string_hash, c->config_string_vector);
+ vnet_config_free (cm, c);
+ pool_put (cm->config_pool, c);
+ }
+}
+
+static int
+feature_cmp (void *a1, void *a2)
+{
+ vnet_config_feature_t *f1 = a1;
+ vnet_config_feature_t *f2 = a2;
+
+ return (int) f1->feature_index - f2->feature_index;
+}
+
+always_inline u32 *
+vnet_get_config_heap (vnet_config_main_t * cm, u32 ci)
+{
+ return heap_elt_at_index (cm->config_string_heap, ci);
+}
+
+u32
+vnet_config_add_feature (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_string_heap_index,
+ u32 feature_index,
+ void *feature_config, u32 n_feature_config_bytes)
+{
+ vnet_config_t *old, *new;
+ vnet_config_feature_t *new_features, *f;
+ u32 n_feature_config_u32s;
+ u32 node_index = vec_elt (cm->node_index_by_feature_index, feature_index);
+
+ if (node_index == ~0) // feature node does not exist
+ return config_string_heap_index; // return original config index
+
+ if (config_string_heap_index == ~0)
+ {
+ old = 0;
+ new_features = 0;
+ }
+ else
+ {
+ u32 *p = vnet_get_config_heap (cm, config_string_heap_index);
+ old = pool_elt_at_index (cm->config_pool, p[-1]);
+ new_features = old->features;
+ if (new_features)
+ new_features = duplicate_feature_vector (new_features);
+ }
+
+ vec_add2 (new_features, f, 1);
+ f->feature_index = feature_index;
+ f->node_index = node_index;
+
+ n_feature_config_u32s =
+ round_pow2 (n_feature_config_bytes,
+ sizeof (f->feature_config[0])) /
+ sizeof (f->feature_config[0]);
+ vec_add (f->feature_config, feature_config, n_feature_config_u32s);
+
+ /* Sort (prioritize) features. */
+ if (vec_len (new_features) > 1)
+ vec_sort_with_function (new_features, feature_cmp);
+
+ if (old)
+ remove_reference (cm, old);
+
+ new = find_config_with_features (vm, cm, new_features);
+ new->reference_count += 1;
+
+ /*
+ * User gets pointer to config string first element
+ * (which defines the pool index
+ * this config string comes from).
+ */
+ vec_validate (cm->config_pool_index_by_user_index,
+ new->config_string_heap_index + 1);
+ cm->config_pool_index_by_user_index[new->config_string_heap_index + 1]
+ = new - cm->config_pool;
+ return new->config_string_heap_index + 1;
+}
+
+u32
+vnet_config_del_feature (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_string_heap_index,
+ u32 feature_index,
+ void *feature_config, u32 n_feature_config_bytes)
+{
+ vnet_config_t *old, *new;
+ vnet_config_feature_t *new_features, *f;
+ u32 n_feature_config_u32s;
+
+ {
+ u32 *p = vnet_get_config_heap (cm, config_string_heap_index);
+
+ old = pool_elt_at_index (cm->config_pool, p[-1]);
+ }
+
+ n_feature_config_u32s =
+ round_pow2 (n_feature_config_bytes,
+ sizeof (f->feature_config[0])) /
+ sizeof (f->feature_config[0]);
+
+ /* Find feature with same index and opaque data. */
+ vec_foreach (f, old->features)
+ {
+ if (f->feature_index == feature_index
+ && vec_len (f->feature_config) == n_feature_config_u32s
+ && (n_feature_config_u32s == 0
+ || !memcmp (f->feature_config, feature_config,
+ n_feature_config_bytes)))
+ break;
+ }
+
+ /* Feature not found. */
+ if (f >= vec_end (old->features))
+ return config_string_heap_index; // return original config index
+
+ new_features = duplicate_feature_vector (old->features);
+ f = new_features + (f - old->features);
+ vnet_config_feature_free (f);
+ vec_delete (new_features, 1, f - new_features);
+
+ /* must remove old from config_pool now as it may be expanded and change
+ memory location if the following function find_config_with_features()
+ adds a new config because none of existing config's has matching features
+ and so can be reused */
+ remove_reference (cm, old);
+ new = find_config_with_features (vm, cm, new_features);
+ new->reference_count += 1;
+
+ vec_validate (cm->config_pool_index_by_user_index,
+ new->config_string_heap_index + 1);
+ cm->config_pool_index_by_user_index[new->config_string_heap_index + 1]
+ = new - cm->config_pool;
+ return new->config_string_heap_index + 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/config.h b/src/vnet/config.h
new file mode 100644
index 00000000..b77a7794
--- /dev/null
+++ b/src/vnet/config.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * config.h: feature configuration
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_config_h
+#define included_vnet_config_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/heap.h>
+
+typedef struct
+{
+ /* Features are prioritized by index. Smaller indices get
+ performed first. */
+ u32 feature_index;
+
+ /* VLIB node which performs feature. */
+ u32 node_index;
+
+ /* Next index relative to previous node or main node. */
+ u32 next_index;
+
+ /* Opaque per feature configuration data. */
+ u32 *feature_config;
+} vnet_config_feature_t;
+
+always_inline void
+vnet_config_feature_free (vnet_config_feature_t * f)
+{
+ vec_free (f->feature_config);
+}
+
+typedef struct
+{
+ /* Sorted vector of features for this configuration. */
+ vnet_config_feature_t *features;
+
+ /* Config string as vector for hashing. */
+ u32 *config_string_vector;
+
+ /* Config string including all next indices and feature data as a vector. */
+ u32 config_string_heap_index, config_string_heap_handle;
+
+ /* Index in main pool. */
+ u32 index;
+
+ /* Number of interfaces/traffic classes that reference this config. */
+ u32 reference_count;
+} vnet_config_t;
+
+typedef struct
+{
+ /* Pool of configs. Index 0 is always null config and is never deleted. */
+ vnet_config_t *config_pool;
+
+ /* Hash table mapping vector config string to config pool index. */
+ uword *config_string_hash;
+
+ /* Global heap of configuration data. */
+ u32 *config_string_heap;
+
+ /* Node index which starts/ends feature processing. */
+ u32 *start_node_indices, end_node_index;
+
+ /* Interior feature processing nodes (not including start and end nodes). */
+ u32 *node_index_by_feature_index;
+
+ /* vnet_config pool index by user index */
+ u32 *config_pool_index_by_user_index;
+
+ /* Temporary vector for holding config strings. Used to avoid continually
+ allocating vectors. */
+ u32 *config_string_temp;
+} vnet_config_main_t;
+
+always_inline void
+vnet_config_free (vnet_config_main_t * cm, vnet_config_t * c)
+{
+ vnet_config_feature_t *f;
+ vec_foreach (f, c->features) vnet_config_feature_free (f);
+ vec_free (c->features);
+ heap_dealloc (cm->config_string_heap, c->config_string_heap_handle);
+ vec_free (c->config_string_vector);
+}
+
+always_inline void *
+vnet_get_config_data (vnet_config_main_t * cm,
+ u32 * config_index, u32 * next_index, u32 n_data_bytes)
+{
+ u32 i, n, *d;
+
+ i = *config_index;
+
+ d = heap_elt_at_index (cm->config_string_heap, i);
+
+ n = round_pow2 (n_data_bytes, sizeof (d[0])) / sizeof (d[0]);
+
+ /* Last 32 bits are next index. */
+ *next_index = d[n];
+
+ /* Advance config index to next config. */
+ *config_index = (i + n + 1);
+
+ /* Return config data to user for this feature. */
+ return (void *) d;
+}
+
+void vnet_config_init (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ char *start_node_names[],
+ int n_start_node_names,
+ char *feature_node_names[], int n_feature_node_names);
+
+/* Calls to add/delete features from configurations. */
+u32 vnet_config_add_feature (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_id,
+ u32 feature_index,
+ void *feature_config,
+ u32 n_feature_config_bytes);
+
+u32 vnet_config_del_feature (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_id,
+ u32 feature_index,
+ void *feature_config,
+ u32 n_feature_config_bytes);
+
+u8 *vnet_config_format_features (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_index, u8 * s);
+
+#endif /* included_vnet_config_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api
new file mode 100644
index 00000000..69316001
--- /dev/null
+++ b/src/vnet/cop/cop.api
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /** \brief cop: enable/disable junk filtration features on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_inded - desired interface
+ @param enable_disable - 1 => enable, 0 => disable
+*/
+
+autoreply define cop_interface_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable_disable;
+};
+
+/** \brief cop: enable/disable whitelist filtration features on an interface
+ Note: the supplied fib_id must match in order to remove the feature!
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface handle, physical interfaces only
+ @param fib_id - fib identifier for the whitelist / blacklist fib
+ @param ip4 - 1 => enable ip4 filtration, 0=> disable ip4 filtration
+ @param ip6 - 1 => enable ip6 filtration, 0=> disable ip6 filtration
+ @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it
+*/
+
+autoreply define cop_whitelist_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 fib_id;
+ u8 ip4;
+ u8 ip6;
+ u8 default_cop;
+};
+
+/** \brief get_node_graph - get a copy of the vpp node graph
+ including the current set of graph arcs.
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+ /*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/cop/cop.c b/src/vnet/cop/cop.c
new file mode 100644
index 00000000..465d6c97
--- /dev/null
+++ b/src/vnet/cop/cop.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cop/cop.h>
+
+cop_main_t cop_main;
+
+static clib_error_t *
+cop_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ cop_main_t * cm = &cop_main;
+ cop_config_data_t _data, *data = &_data;
+ vlib_main_t * vm = cm->vlib_main;
+ vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);;
+ cop_config_main_t * ccm;
+ int address_family;
+ u32 ci, default_next;
+
+ memset (data, 0, sizeof(*data));
+
+ /*
+ * Ignore local interface, pg interfaces. $$$ need a #define for the
+ * first "real" interface. The answer is 5 at the moment.
+ */
+ if (hi->dev_class_index == vnet_local_interface_device_class.index)
+ return 0;
+
+ for (address_family = VNET_COP_IP4; address_family < VNET_N_COPS;
+ address_family++)
+ {
+ ccm = &cm->cop_config_mains[address_family];
+
+ /*
+ * Once-only code to initialize the per-address-family
+ * cop feature subgraphs.
+ * Since the (single) start-node, cop-input, must be able
+ * to push pkts into three separate subgraphs, we
+ * use a unified cop_feature_type_t enumeration.
+ */
+
+ if (!(ccm->config_main.node_index_by_feature_index))
+ {
+ switch (address_family)
+ {
+ case VNET_COP_IP4:
+ {
+ static char * start_nodes[] = { "cop-input" };
+ static char * feature_nodes[] = {
+ [IP4_RX_COP_WHITELIST] = "ip4-cop-whitelist",
+ [IP4_RX_COP_INPUT] = "ip4-input",
+ };
+
+ vnet_config_init (vm, &ccm->config_main,
+ start_nodes, ARRAY_LEN(start_nodes),
+ feature_nodes, ARRAY_LEN(feature_nodes));
+ }
+ break;
+ case VNET_COP_IP6:
+ {
+ static char * start_nodes[] = { "cop-input" };
+ static char * feature_nodes[] = {
+ [IP6_RX_COP_WHITELIST] = "ip6-cop-whitelist",
+ [IP6_RX_COP_INPUT] = "ip6-input",
+ };
+ vnet_config_init (vm, &ccm->config_main,
+ start_nodes, ARRAY_LEN(start_nodes),
+ feature_nodes, ARRAY_LEN(feature_nodes));
+ }
+ break;
+
+ case VNET_COP_DEFAULT:
+ {
+ static char * start_nodes[] = { "cop-input" };
+ static char * feature_nodes[] = {
+ [DEFAULT_RX_COP_WHITELIST] = "default-cop-whitelist",
+ [DEFAULT_RX_COP_INPUT] = "ethernet-input",
+ };
+ vnet_config_init (vm, &ccm->config_main,
+ start_nodes, ARRAY_LEN(start_nodes),
+ feature_nodes, ARRAY_LEN(feature_nodes));
+ }
+ break;
+
+ default:
+ clib_warning ("bug");
+ break;
+ }
+ }
+ vec_validate_init_empty (ccm->config_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ci = ccm->config_index_by_sw_if_index[sw_if_index];
+
+ /* Create a sensible initial config: send pkts to xxx-input */
+ if (address_family == VNET_COP_IP4)
+ default_next = IP4_RX_COP_INPUT;
+ else if (address_family == VNET_COP_IP6)
+ default_next = IP6_RX_COP_INPUT;
+ else
+ default_next = DEFAULT_RX_COP_INPUT;
+
+ if (is_add)
+ ci = vnet_config_add_feature (vm, &ccm->config_main,
+ ci,
+ default_next,
+ data, sizeof(*data));
+ else
+ ci = vnet_config_del_feature (vm, &ccm->config_main,
+ ci,
+ default_next,
+ data, sizeof(*data));
+
+ ccm->config_index_by_sw_if_index[sw_if_index] = ci;
+ }
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (cop_sw_interface_add_del);
+
+static clib_error_t *
+cop_init (vlib_main_t *vm)
+{
+ cop_main_t * cm = &cop_main;
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip4_whitelist_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_whitelist_init)))
+ return error;
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (cop_init);
+
+int cop_interface_enable_disable (u32 sw_if_index, int enable_disable)
+{
+ cop_main_t * cm = &cop_main;
+ vnet_sw_interface_t * sw;
+ int rv;
+ u32 node_index = enable_disable ? cop_input_node.index : ~0;
+
+ /* Not a physical port? */
+ sw = vnet_get_sw_interface (cm->vnet_main, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ /*
+ * Redirect pkts from the driver to the cop node.
+ * Returns VNET_API_ERROR_UNIMPLEMENTED if the h/w driver
+ * doesn't implement the API.
+ *
+ * Node_index = ~0 => shut off redirection
+ */
+ rv = vnet_hw_interface_rx_redirect_to_node (cm->vnet_main, sw_if_index,
+ node_index);
+ return rv;
+}
+
+static clib_error_t *
+cop_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cop_main_t * cm = &cop_main;
+ u32 sw_if_index = ~0;
+ int enable_disable = 1;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "disable"))
+ enable_disable = 0;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ cm->vnet_main, &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ rv = cop_interface_enable_disable (sw_if_index, enable_disable);
+
+ switch(rv) {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return
+ (0, "Invalid interface, only works on physical ports");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0, "Device driver doesn't support redirection");
+ break;
+
+ default:
+ return clib_error_return (0, "cop_interface_enable_disable returned %d",
+ rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cop_interface_command, static) = {
+ .path = "cop interface",
+ .short_help =
+ "cop interface <interface-name> [disable]",
+ .function = cop_enable_disable_command_fn,
+};
+
+
+int cop_whitelist_enable_disable (cop_whitelist_enable_disable_args_t *a)
+{
+ cop_main_t * cm = &cop_main;
+ vlib_main_t * vm = cm->vlib_main;
+ ip4_main_t * im4 = &ip4_main;
+ ip6_main_t * im6 = &ip6_main;
+ int address_family;
+ int is_add;
+ cop_config_main_t * ccm;
+ u32 next_to_add_del = 0;
+ uword * p;
+ u32 fib_index = 0;
+ u32 ci;
+ cop_config_data_t _data, *data=&_data;
+
+ /*
+ * Enable / disable whitelist processing on the specified interface
+ */
+
+ for (address_family = VNET_COP_IP4; address_family < VNET_N_COPS;
+ address_family++)
+ {
+ ccm = &cm->cop_config_mains[address_family];
+
+ switch(address_family)
+ {
+ case VNET_COP_IP4:
+ is_add = (a->ip4 != 0);
+ next_to_add_del = IP4_RX_COP_WHITELIST;
+ /* configured opaque data must match, or no supper */
+ p = hash_get (im4->fib_index_by_table_id, a->fib_id);
+ if (p)
+ fib_index = p[0];
+ else
+ {
+ if (is_add)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ else
+ continue;
+ }
+ break;
+
+ case VNET_COP_IP6:
+ is_add = (a->ip6 != 0);
+ next_to_add_del = IP6_RX_COP_WHITELIST;
+ p = hash_get (im6->fib_index_by_table_id, a->fib_id);
+ if (p)
+ fib_index = p[0];
+ else
+ {
+ if (is_add)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ else
+ continue;
+ }
+ break;
+
+ case VNET_COP_DEFAULT:
+ is_add = (a->default_cop != 0);
+ next_to_add_del = DEFAULT_RX_COP_WHITELIST;
+ break;
+
+ default:
+ clib_warning ("BUG");
+ }
+
+ ci = ccm->config_index_by_sw_if_index[a->sw_if_index];
+ data->fib_index = fib_index;
+
+ if (is_add)
+ ci = vnet_config_add_feature (vm, &ccm->config_main,
+ ci,
+ next_to_add_del,
+ data, sizeof (*data));
+ else
+ ci = vnet_config_del_feature (vm, &ccm->config_main,
+ ci,
+ next_to_add_del,
+ data, sizeof (*data));
+
+ ccm->config_index_by_sw_if_index[a->sw_if_index] = ci;
+ }
+ return 0;
+}
+
+static clib_error_t *
+cop_whitelist_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cop_main_t * cm = &cop_main;
+ u32 sw_if_index = ~0;
+ u8 ip4 = 0;
+ u8 ip6 = 0;
+ u8 default_cop = 0;
+ u32 fib_id = 0;
+ int rv;
+ cop_whitelist_enable_disable_args_t _a, * a = &_a;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4"))
+ ip4 = 1;
+ else if (unformat (input, "ip6"))
+ ip6 = 1;
+ else if (unformat (input, "default"))
+ default_cop = 1;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ cm->vnet_main, &sw_if_index))
+ ;
+ else if (unformat (input, "fib-id %d", &fib_id))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ a->sw_if_index = sw_if_index;
+ a->ip4 = ip4;
+ a->ip6 = ip6;
+ a->default_cop = default_cop;
+ a->fib_id = fib_id;
+
+ rv = cop_whitelist_enable_disable (a);
+
+ switch(rv) {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return
+ (0, "Invalid interface, only works on physical ports");
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return
+ (0, "Invalid fib");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0, "Device driver doesn't support redirection");
+ break;
+
+ default:
+ return clib_error_return (0, "cop_whitelist_enable_disable returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cop_whitelist_command, static) = {
+ .path = "cop whitelist",
+ .short_help =
+ "cop whitelist <interface-name> [ip4][ip6][default][fib-id <NN>][disable]",
+ .function = cop_whitelist_enable_disable_command_fn,
+};
+
diff --git a/src/vnet/cop/cop.h b/src/vnet/cop/cop.h
new file mode 100644
index 00000000..eb5f1dfd
--- /dev/null
+++ b/src/vnet/cop/cop.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __vnet_cop_h__
+#define __vnet_cop_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+typedef enum {
+ VNET_COP_IP4,
+ VNET_COP_IP6,
+ VNET_COP_DEFAULT,
+ VNET_N_COPS,
+} vnet_cop_t;
+
+typedef enum {
+ /* First check src address against whitelist */
+ IP4_RX_COP_WHITELIST,
+ IP6_RX_COP_WHITELIST,
+ DEFAULT_RX_COP_WHITELIST,
+
+ /* Pkts not otherwise dropped go to xxx-input */
+ IP4_RX_COP_INPUT,
+ IP6_RX_COP_INPUT,
+ DEFAULT_RX_COP_INPUT,
+
+ /* Going, going, gone... */
+ RX_COP_DROP,
+
+ COP_RX_N_FEATURES,
+} cop_feature_type_t;
+
+typedef struct {
+ vnet_config_main_t config_main;
+ u32 * config_index_by_sw_if_index;
+} cop_config_main_t;
+
+typedef struct {
+ u32 fib_index;
+} cop_config_data_t;
+
+typedef struct {
+ cop_config_main_t cop_config_mains[VNET_N_COPS];
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} cop_main_t;
+
+cop_main_t cop_main;
+
+extern vlib_node_registration_t cop_input_node;
+
+int cop_interface_enable_disable (u32 sw_if_index, int enable_disable);
+
+typedef struct {
+ u32 sw_if_index;
+ u8 ip4;
+ u8 ip6;
+ u8 default_cop;
+ u32 fib_id;
+} cop_whitelist_enable_disable_args_t;
+
+int cop_whitelist_enable_disable (cop_whitelist_enable_disable_args_t *a);
+
+#endif /* __vnet_cop_h__ */
diff --git a/src/vnet/cop/cop_api.c b/src/vnet/cop/cop_api.c
new file mode 100644
index 00000000..95d5a9fe
--- /dev/null
+++ b/src/vnet/cop/cop_api.c
@@ -0,0 +1,141 @@
+/*
+ *------------------------------------------------------------------
+ * cop_api.c - cop api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/cop/cop.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(COP_INTERFACE_ENABLE_DISABLE, cop_interface_enable_disable) \
+_(COP_WHITELIST_ENABLE_DISABLE, cop_whitelist_enable_disable)
+
+static void vl_api_cop_interface_enable_disable_t_handler
+ (vl_api_cop_interface_enable_disable_t * mp)
+{
+ vl_api_cop_interface_enable_disable_reply_t *rmp;
+ int rv;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int enable_disable;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ enable_disable = (int) mp->enable_disable;
+
+ rv = cop_interface_enable_disable (sw_if_index, enable_disable);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_COP_INTERFACE_ENABLE_DISABLE_REPLY);
+}
+
+static void vl_api_cop_whitelist_enable_disable_t_handler
+ (vl_api_cop_whitelist_enable_disable_t * mp)
+{
+ vl_api_cop_whitelist_enable_disable_reply_t *rmp;
+ cop_whitelist_enable_disable_args_t _a, *a = &_a;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ a->sw_if_index = sw_if_index;
+ a->ip4 = mp->ip4;
+ a->ip6 = mp->ip6;
+ a->default_cop = mp->default_cop;
+ a->fib_id = ntohl (mp->fib_id);
+
+ rv = cop_whitelist_enable_disable (a);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_COP_WHITELIST_ENABLE_DISABLE_REPLY);
+}
+
+/*
+ * cop_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_cop;
+#undef _
+}
+
+static clib_error_t *
+cop_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (cop_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
new file mode 100644
index 00000000..1b5e336b
--- /dev/null
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cop/cop.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
+
+typedef struct {
+ u32 next_index;
+ u32 sw_if_index;
+} ip4_cop_whitelist_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip4_cop_whitelist_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_cop_whitelist_trace_t * t = va_arg (*args, ip4_cop_whitelist_trace_t *);
+
+ s = format (s, "IP4_COP_WHITELIST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip4_cop_whitelist_node;
+
+#define foreach_ip4_cop_whitelist_error \
+_(DROPPED, "ip4 cop whitelist packets dropped")
+
+typedef enum {
+#define _(sym,str) IP4_COP_WHITELIST_ERROR_##sym,
+ foreach_ip4_cop_whitelist_error
+#undef _
+ IP4_COP_WHITELIST_N_ERROR,
+} ip4_cop_whitelist_error_t;
+
+static char * ip4_cop_whitelist_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_cop_whitelist_error
+#undef _
+};
+
+static uword
+ip4_cop_whitelist_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ cop_feature_type_t next_index;
+ cop_main_t *cm = &cop_main;
+ vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
+ u32 thread_index = vm->thread_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, * ip1;
+ cop_config_main_t * ccm0, * ccm1;
+ cop_config_data_t * c0, * c1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ u32 lb_index0, lb_index1;
+ const load_balance_t * lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ccm0 = cm->cop_config_mains + VNET_COP_IP4;
+
+ c0 = vnet_get_config_data
+ (&ccm0->config_main,
+ &vnet_buffer (b0)->cop.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ &ip0->src_address, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ &ip0->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ ASSERT (lb_index0
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
+ &ip0->src_address));
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
+ {
+ b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
+ next0 = RX_COP_DROP;
+ }
+
+ b1 = vlib_get_buffer (vm, bi1);
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ ip1 = vlib_buffer_get_current (b1);
+
+ ccm1 = cm->cop_config_mains + VNET_COP_IP4;
+
+ c1 = vnet_get_config_data
+ (&ccm1->config_main,
+ &vnet_buffer (b1)->cop.current_config_index,
+ &next1,
+ sizeof (c1[0]));
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
+
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
+
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ &ip1->src_address, 2);
+
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ &ip1->src_address, 3);
+
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ ASSERT (lb_index1
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c1->fib_index),
+ &ip1->src_address));
+ lb1 = load_balance_get (lb_index1);
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, b0)
+ + sizeof(ethernet_header_t));
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, b1)
+ + sizeof(ethernet_header_t));
+
+
+ if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE))
+ {
+ b1->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
+ next1 = RX_COP_DROP;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ cop_config_main_t *ccm0;
+ cop_config_data_t *c0;
+ ip4_fib_mtrie_t * mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 lb_index0;
+ const load_balance_t * lb0;
+ const dpo_id_t *dpo0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ccm0 = cm->cop_config_mains + VNET_COP_IP4;
+
+ c0 = vnet_get_config_data
+ (&ccm0->config_main,
+ &vnet_buffer (b0)->cop.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ &ip0->src_address, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ &ip0->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ ASSERT (lb_index0
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
+ &ip0->src_address));
+
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, b0)
+ + sizeof(ethernet_header_t));
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
+ {
+ b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
+ next0 = RX_COP_DROP;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_cop_whitelist_node) = {
+ .function = ip4_cop_whitelist_node_fn,
+ .name = "ip4-cop-whitelist",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_cop_whitelist_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip4_cop_whitelist_error_strings),
+ .error_strings = ip4_cop_whitelist_error_strings,
+
+ .n_next_nodes = COP_RX_N_FEATURES,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [IP4_RX_COP_WHITELIST] = "ip4-cop-whitelist",
+ [IP6_RX_COP_WHITELIST] = "ip6-cop-whitelist",
+ [DEFAULT_RX_COP_WHITELIST] = "default-cop-whitelist",
+ [IP4_RX_COP_INPUT] = "ip4-input",
+ [IP6_RX_COP_INPUT] = "ip6-input",
+ [DEFAULT_RX_COP_INPUT] = "ethernet-input",
+ [RX_COP_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_cop_whitelist_node, ip4_cop_whitelist_node_fn)
+
+static clib_error_t *
+ip4_whitelist_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_whitelist_init);
diff --git a/src/vnet/cop/ip6_whitelist.c b/src/vnet/cop/ip6_whitelist.c
new file mode 100644
index 00000000..f3fe62e3
--- /dev/null
+++ b/src/vnet/cop/ip6_whitelist.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/cop/cop.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+
+typedef struct {
+ u32 next_index;
+ u32 sw_if_index;
+} ip6_cop_whitelist_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip6_cop_whitelist_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_cop_whitelist_trace_t * t = va_arg (*args, ip6_cop_whitelist_trace_t *);
+
+ s = format (s, "IP6_COP_WHITELIST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_cop_whitelist_node;
+
+#define foreach_ip6_cop_whitelist_error \
+_(DROPPED, "ip6 cop whitelist packets dropped")
+
+typedef enum {
+#define _(sym,str) IP6_COP_WHITELIST_ERROR_##sym,
+ foreach_ip6_cop_whitelist_error
+#undef _
+ IP6_COP_WHITELIST_N_ERROR,
+} ip6_cop_whitelist_error_t;
+
+static char * ip6_cop_whitelist_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_cop_whitelist_error
+#undef _
+};
+
+static uword
+ip6_cop_whitelist_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ cop_feature_type_t next_index;
+ cop_main_t *cm = &cop_main;
+ ip6_main_t * im6 = &ip6_main;
+ vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
+ u32 thread_index = vm->thread_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ip6_header_t * ip0, * ip1;
+ cop_config_main_t * ccm0, * ccm1;
+ cop_config_data_t * c0, * c1;
+ u32 lb_index0, lb_index1;
+ const load_balance_t * lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ccm0 = cm->cop_config_mains + VNET_COP_IP6;
+
+ c0 = vnet_get_config_data
+ (&ccm0->config_main,
+ &vnet_buffer (b0)->cop.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+
+ lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index,
+ &ip0->src_address);
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
+ {
+ b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
+ next0 = RX_COP_DROP;
+ }
+
+ b1 = vlib_get_buffer (vm, bi1);
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ ip1 = vlib_buffer_get_current (b1);
+
+ ccm1 = cm->cop_config_mains + VNET_COP_IP6;
+
+ c1 = vnet_get_config_data
+ (&ccm1->config_main,
+ &vnet_buffer (b1)->cop.current_config_index,
+ &next1,
+ sizeof (c1[0]));
+
+ lb_index1 = ip6_fib_table_fwding_lookup (im6, c1->fib_index,
+ &ip1->src_address);
+
+ lb1 = load_balance_get (lb_index1);
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, b0)
+ + sizeof(ethernet_header_t));
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, b1)
+ + sizeof(ethernet_header_t));
+
+ if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE))
+ {
+ b1->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
+ next1 = RX_COP_DROP;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip6_header_t * ip0;
+ cop_config_main_t *ccm0;
+ cop_config_data_t *c0;
+ u32 lb_index0;
+ const load_balance_t * lb0;
+ const dpo_id_t *dpo0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ccm0 = cm->cop_config_mains + VNET_COP_IP6;
+
+ c0 = vnet_get_config_data
+ (&ccm0->config_main,
+ &vnet_buffer (b0)->cop.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+
+ lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index,
+ &ip0->src_address);
+
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ vlib_increment_combined_counter
+ (vcm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, b0)
+ + sizeof(ethernet_header_t));
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
+ {
+ b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
+ next0 = RX_COP_DROP;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_cop_whitelist_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_cop_whitelist_node) = {
+ .function = ip6_cop_whitelist_node_fn,
+ .name = "ip6-cop-whitelist",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_cop_whitelist_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip6_cop_whitelist_error_strings),
+ .error_strings = ip6_cop_whitelist_error_strings,
+
+ .n_next_nodes = COP_RX_N_FEATURES,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [IP4_RX_COP_WHITELIST] = "ip4-cop-whitelist",
+ [IP6_RX_COP_WHITELIST] = "ip6-cop-whitelist",
+ [DEFAULT_RX_COP_WHITELIST] = "default-cop-whitelist",
+ [IP4_RX_COP_INPUT] = "ip4-input",
+ [IP6_RX_COP_INPUT] = "ip6-input",
+ [DEFAULT_RX_COP_INPUT] = "ethernet-input",
+ [RX_COP_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_cop_whitelist_node, ip6_cop_whitelist_node_fn)
+
+static clib_error_t *
+ip6_whitelist_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_whitelist_init);
diff --git a/src/vnet/cop/node1.c b/src/vnet/cop/node1.c
new file mode 100644
index 00000000..b448b531
--- /dev/null
+++ b/src/vnet/cop/node1.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/cop/cop.h>
+
+typedef struct {
+ u32 next_index;
+ u32 sw_if_index;
+} cop_input_trace_t;
+
+/* packet trace format function */
+static u8 * format_cop_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ cop_input_trace_t * t = va_arg (*args, cop_input_trace_t *);
+
+ s = format (s, "COP_INPUT: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t cop_input_node;
+
+#define foreach_cop_input_error \
+_(PROCESSED, "COP input packets processed")
+
+typedef enum {
+#define _(sym,str) COP_INPUT_ERROR_##sym,
+ foreach_cop_input_error
+#undef _
+ COP_INPUT_N_ERROR,
+} cop_input_error_t;
+
+static char * cop_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_cop_input_error
+#undef _
+};
+
+static uword
+cop_input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ cop_feature_type_t next_index;
+ cop_main_t *cm = &cop_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * en0, * en1;
+ cop_config_main_t * ccm0, * ccm1;
+ u32 advance0, advance1;
+ int proto0, proto1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ en0 = vlib_buffer_get_current (b0);
+ en1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ proto0 = VNET_COP_DEFAULT;
+ proto1 = VNET_COP_DEFAULT;
+ advance0 = 0;
+ advance1 = 0;
+
+ if (en0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4))
+ {
+ proto0 = VNET_COP_IP4;
+ advance0 = sizeof(ethernet_header_t);
+ }
+ else if (en0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6))
+ {
+ proto0 = VNET_COP_IP6;
+ advance0 = sizeof(ethernet_header_t);
+ }
+
+ if (en1->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4))
+ {
+ proto1 = VNET_COP_IP4;
+ advance1 = sizeof(ethernet_header_t);
+ }
+ else if (en1->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6))
+ {
+ proto1 = VNET_COP_IP6;
+ advance1 = sizeof(ethernet_header_t);
+ }
+
+ ccm0 = cm->cop_config_mains + proto0;
+ ccm1 = cm->cop_config_mains + proto1;
+ vnet_buffer(b0)->cop.current_config_index =
+ ccm0->config_index_by_sw_if_index [sw_if_index0];
+
+ vnet_buffer(b1)->cop.current_config_index =
+ ccm1->config_index_by_sw_if_index [sw_if_index1];
+
+ vlib_buffer_advance (b0, advance0);
+ vlib_buffer_advance (b1, advance1);
+
+ vnet_get_config_data (&ccm0->config_main,
+ &vnet_buffer(b0)->cop.current_config_index,
+ &next0, 0 /* bytes of config data */);
+
+ vnet_get_config_data (&ccm1->config_main,
+ &vnet_buffer(b1)->cop.current_config_index,
+ &next1, 0 /* bytes of config data */);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ cop_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ cop_input_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *en0;
+ cop_config_main_t *ccm0;
+ u32 advance0;
+ int proto0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /*
+ * Direct from the driver, we should be at offset 0
+ * aka at &b0->data[0]
+ */
+ ASSERT (b0->current_data == 0);
+
+ en0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ proto0 = VNET_COP_DEFAULT;
+ advance0 = 0;
+
+ if (en0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4))
+ {
+ proto0 = VNET_COP_IP4;
+ advance0 = sizeof(ethernet_header_t);
+ }
+ else if (en0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6))
+ {
+ proto0 = VNET_COP_IP6;
+ advance0 = sizeof(ethernet_header_t);
+ }
+
+ ccm0 = cm->cop_config_mains + proto0;
+ vnet_buffer(b0)->cop.current_config_index =
+ ccm0->config_index_by_sw_if_index [sw_if_index0];
+
+ vlib_buffer_advance (b0, advance0);
+
+ vnet_get_config_data (&ccm0->config_main,
+ &vnet_buffer(b0)->cop.current_config_index,
+ &next0, 0 /* bytes of config data */);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ cop_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, cop_input_node.index,
+ COP_INPUT_ERROR_PROCESSED, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (cop_input_node) = {
+ .function = cop_input_node_fn,
+ .name = "cop-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_cop_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(cop_input_error_strings),
+ .error_strings = cop_input_error_strings,
+
+ .n_next_nodes = COP_RX_N_FEATURES,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [IP4_RX_COP_WHITELIST] = "ip4-cop-whitelist",
+ [IP6_RX_COP_WHITELIST] = "ip6-cop-whitelist",
+ [DEFAULT_RX_COP_WHITELIST] = "default-cop-whitelist",
+ [IP4_RX_COP_INPUT] = "ip4-input",
+ [IP6_RX_COP_INPUT] = "ip6-input",
+ [DEFAULT_RX_COP_INPUT] = "ethernet-input",
+ [RX_COP_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (cop_input_node, cop_input_node_fn)
+
+#define foreach_cop_stub \
+_(default-cop-whitelist, default_cop_whitelist)
+
+#define _(n,f) \
+ \
+static uword \
+f##_node_fn (vlib_main_t * vm, \
+ vlib_node_runtime_t * node, \
+ vlib_frame_t * frame) \
+{ \
+ clib_warning ("BUG: stub function called"); \
+ return 0; \
+} \
+ \
+VLIB_REGISTER_NODE (f##_input_node) = { \
+ .function = f##_node_fn, \
+ .name = #n, \
+ .vector_size = sizeof (u32), \
+ .type = VLIB_NODE_TYPE_INTERNAL, \
+ \
+ .n_errors = 0, \
+ .error_strings = 0, \
+ \
+ .n_next_nodes = 0, \
+};
+
+foreach_cop_stub;
+
+
+
+
+
+
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api
new file mode 100644
index 00000000..8d40ad60
--- /dev/null
+++ b/src/vnet/devices/af_packet/af_packet.api
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if_name - interface name
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+*/
+define af_packet_create
+{
+ u32 client_index;
+ u32 context;
+
+ u8 host_if_name[64];
+ u8 hw_addr[6];
+ u8 use_random_hw_addr;
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if_name - interface name
+*/
+autoreply define af_packet_delete
+{
+ u32 client_index;
+ u32 context;
+
+ u8 host_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
new file mode 100644
index 00000000..32696014
--- /dev/null
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -0,0 +1,433 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/devices/af_packet/af_packet.h>
+
+#define AF_PACKET_DEBUG_SOCKET 0
+
+#define AF_PACKET_TX_FRAMES_PER_BLOCK 1024
+#define AF_PACKET_TX_FRAME_SIZE (2048 * 5)
+#define AF_PACKET_TX_BLOCK_NR 1
+#define AF_PACKET_TX_FRAME_NR (AF_PACKET_TX_BLOCK_NR * \
+ AF_PACKET_TX_FRAMES_PER_BLOCK)
+#define AF_PACKET_TX_BLOCK_SIZE (AF_PACKET_TX_FRAME_SIZE * \
+ AF_PACKET_TX_FRAMES_PER_BLOCK)
+
+#define AF_PACKET_RX_FRAMES_PER_BLOCK 1024
+#define AF_PACKET_RX_FRAME_SIZE (2048 * 5)
+#define AF_PACKET_RX_BLOCK_NR 1
+#define AF_PACKET_RX_FRAME_NR (AF_PACKET_RX_BLOCK_NR * \
+ AF_PACKET_RX_FRAMES_PER_BLOCK)
+#define AF_PACKET_RX_BLOCK_SIZE (AF_PACKET_RX_FRAME_SIZE * \
+ AF_PACKET_RX_FRAMES_PER_BLOCK)
+
+#if AF_PACKET_DEBUG_SOCKET == 1
+#define DBG_SOCK(args...) clib_warning(args);
+#else
+#define DBG_SOCK(args...)
+#endif
+
+/*defined in net/if.h but clashes with dpdk headers */
+unsigned int if_nametoindex (const char *ifname);
+
+typedef struct tpacket_req tpacket_req_t;
+
+static u32
+af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
+ u32 flags)
+{
+ clib_error_t *error;
+ u8 *s;
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hi->dev_instance);
+
+ if (ETHERNET_INTERFACE_FLAG_MTU == (flags & ETHERNET_INTERFACE_FLAG_MTU))
+ {
+ s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0);
+
+ error = clib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes);
+ vec_free (s);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+af_packet_fd_read_ready (clib_file_t * uf)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 idx = uf->private_data;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
+
+ apm->pending_input_bitmap =
+ clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
+
+ /* Schedule the rx node */
+ vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0);
+
+ return 0;
+}
+
+static int
+is_bridge (const u8 * host_if_name)
+{
+ u8 *s;
+ DIR *dir = NULL;
+
+ s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
+ dir = opendir ((char *) s);
+ vec_free (s);
+
+ if (dir)
+ {
+ closedir (dir);
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
+ tpacket_req_t * tx_req, int *fd, u8 ** ring)
+{
+ int ret, err;
+ struct sockaddr_ll sll;
+ int ver = TPACKET_V2;
+ socklen_t req_sz = sizeof (struct tpacket_req);
+ u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr +
+ tx_req->tp_block_size * tx_req->tp_block_nr;
+
+ if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
+ {
+ DBG_SOCK ("Failed to create socket");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if ((err =
+ setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver))) < 0)
+ {
+ DBG_SOCK ("Failed to set rx packet interface version");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ int opt = 1;
+ if ((err =
+ setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt))) < 0)
+ {
+ DBG_SOCK ("Failed to set packet tx ring error handling option");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if ((err =
+ setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz)) < 0)
+ {
+ DBG_SOCK ("Failed to set packet rx ring options");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if ((err =
+ setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz)) < 0)
+ {
+ DBG_SOCK ("Failed to set packet rx ring options");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ *ring =
+ mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd,
+ 0);
+ if (*ring == MAP_FAILED)
+ {
+ DBG_SOCK ("mmap failure");
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ memset (&sll, 0, sizeof (sll));
+ sll.sll_family = PF_PACKET;
+ sll.sll_protocol = htons (ETH_P_ALL);
+ sll.sll_ifindex = host_if_index;
+
+ if ((err = bind (*fd, (struct sockaddr *) &sll, sizeof (sll))) < 0)
+ {
+ DBG_SOCK ("Failed to bind rx packet socket (error %d)", err);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ return 0;
+error:
+ if (*fd >= 0)
+ close (*fd);
+ *fd = -1;
+ return ret;
+}
+
+int
+af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
+ u32 * sw_if_index)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ int ret, fd = -1;
+ struct tpacket_req *rx_req = 0;
+ struct tpacket_req *tx_req = 0;
+ u8 *ring = 0;
+ af_packet_if_t *apif = 0;
+ u8 hw_addr[6];
+ clib_error_t *error;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ uword *p;
+ uword if_index;
+ u8 *host_if_name_dup = vec_dup (host_if_name);
+ int host_if_index = -1;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
+ if (p)
+ {
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ }
+
+ vec_validate (rx_req, 0);
+ rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
+ rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
+ rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
+ rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;
+
+ vec_validate (tx_req, 0);
+ tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
+ tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
+ tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
+ tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;
+
+ host_if_index = if_nametoindex ((const char *) host_if_name);
+
+ if (!host_if_index)
+ {
+ DBG_SOCK ("Wrong host interface name");
+ return VNET_API_ERROR_INVALID_INTERFACE;
+ }
+
+ ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);
+
+ if (ret != 0)
+ goto error;
+
+ ret = is_bridge (host_if_name);
+
+ if (ret == 0) /* is a bridge, ignore state */
+ host_if_index = -1;
+
+ /* So far everything looks good, let's create interface */
+ pool_get (apm->interfaces, apif);
+ if_index = apif - apm->interfaces;
+
+ apif->host_if_index = host_if_index;
+ apif->fd = fd;
+ apif->rx_ring = ring;
+ apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
+ apif->rx_req = rx_req;
+ apif->tx_req = tx_req;
+ apif->host_if_name = host_if_name_dup;
+ apif->per_interface_next_index = ~0;
+ apif->next_tx_frame = 0;
+ apif->next_rx_frame = 0;
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&apif->lockp);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = af_packet_fd_read_ready;
+ template.file_descriptor = fd;
+ template.private_data = if_index;
+ template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
+ apif->clib_file_index = clib_file_add (&file_main, &template);
+ }
+
+ /*use configured or generate random MAC address */
+ if (hw_addr_set)
+ clib_memcpy (hw_addr, hw_addr_set, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ error = ethernet_register_interface (vnm, af_packet_device_class.index,
+ if_index, hw_addr, &apif->hw_if_index,
+ af_packet_eth_flag_change);
+
+ if (error)
+ {
+ memset (apif, 0, sizeof (*apif));
+ pool_put (apm->interfaces, apif);
+ clib_error_report (error);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
+ hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
+ apif->sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
+ af_packet_input_node.index);
+
+ vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0, /* queue */
+ ~0 /* any cpu */ );
+
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ vnet_hw_interface_set_rx_mode (vnm, apif->hw_if_index, 0,
+ VNET_HW_INTERFACE_RX_MODE_INTERRUPT);
+
+ mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
+ 0);
+ if (sw_if_index)
+ *sw_if_index = apif->sw_if_index;
+
+ return 0;
+
+error:
+ vec_free (host_if_name_dup);
+ vec_free (rx_req);
+ vec_free (tx_req);
+ return ret;
+}
+
+int
+af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ uword *p;
+ uword if_index;
+ u32 ring_sz;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ clib_warning ("Host interface %s does not exist", host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ apif = pool_elt_at_index (apm->interfaces, p[0]);
+ if_index = apif - apm->interfaces;
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
+ vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0);
+
+ /* clean up */
+ if (apif->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
+ apif->clib_file_index = ~0;
+ }
+ else
+ close (apif->fd);
+
+ ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
+ apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
+ if (munmap (apif->rx_ring, ring_sz))
+ clib_warning ("Host interface %s could not free rx/tx ring",
+ host_if_name);
+ apif->rx_ring = NULL;
+ apif->tx_ring = NULL;
+ apif->fd = -1;
+
+ vec_free (apif->rx_req);
+ apif->rx_req = NULL;
+ vec_free (apif->tx_req);
+ apif->tx_req = NULL;
+
+ vec_free (apif->host_if_name);
+ apif->host_if_name = NULL;
+ apif->host_if_index = -1;
+
+ mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
+
+ ethernet_delete_interface (vnm, apif->hw_if_index);
+
+ pool_put (apm->interfaces, apif);
+
+ return 0;
+}
+
+static clib_error_t *
+af_packet_init (vlib_main_t * vm)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ memset (apm, 0, sizeof (af_packet_main_t));
+
+ mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (af_packet_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
new file mode 100644
index 00000000..95c7e7cf
--- /dev/null
+++ b/src/vnet/devices/af_packet/af_packet.h
@@ -0,0 +1,73 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.h - linux kernel packet interface header file
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vppinfra/lock.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ u8 *host_if_name;
+ int host_if_index;
+ int fd;
+ struct tpacket_req *rx_req;
+ struct tpacket_req *tx_req;
+ u8 *rx_ring;
+ u8 *tx_ring;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 clib_file_index;
+
+ u32 next_rx_frame;
+ u32 next_tx_frame;
+
+ u32 per_interface_next_index;
+ u8 is_admin_up;
+} af_packet_if_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ af_packet_if_t *interfaces;
+
+ /* bitmap of pending rx interfaces */
+ uword *pending_input_bitmap;
+
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+} af_packet_main_t;
+
+af_packet_main_t af_packet_main;
+extern vnet_device_class_t af_packet_device_class;
+extern vlib_node_registration_t af_packet_input_node;
+
+int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name,
+ u8 * hw_addr_set, u32 * sw_if_index);
+int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c
new file mode 100644
index 00000000..414c838c
--- /dev/null
+++ b/src/vnet/devices/af_packet/af_packet_api.c
@@ -0,0 +1,143 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet_api.c - af-packet api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/devices/af_packet/af_packet.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(AF_PACKET_CREATE, af_packet_create) \
+_(AF_PACKET_DELETE, af_packet_delete)
+
+static void
+vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_af_packet_create_reply_t *rmp;
+ int rv = 0;
+ u8 *host_if_name = NULL;
+ u32 sw_if_index;
+
+ host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (host_if_name, 0);
+
+ rv = af_packet_create_if (vm, host_if_name,
+ mp->use_random_hw_addr ? 0 : mp->hw_addr,
+ &sw_if_index);
+
+ vec_free (host_if_name);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_AF_PACKET_CREATE_REPLY,
+ ({
+ rmp->sw_if_index = clib_host_to_net_u32(sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_af_packet_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *host_if_name = NULL;
+
+ host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (host_if_name, 0);
+
+ rv = af_packet_delete_if (vm, host_if_name);
+
+ vec_free (host_if_name);
+
+ REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
+}
+
+/*
+ * af_packet_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_af_packet;
+#undef _
+}
+
+static clib_error_t *
+af_packet_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c
new file mode 100644
index 00000000..44dc5179
--- /dev/null
+++ b/src/vnet/devices/af_packet/cli.c
@@ -0,0 +1,211 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/devices/af_packet/af_packet.h>
+
+/**
+ * @file
+ * @brief CLI for Host Interface Device Driver.
+ *
+ * This file contains the source code for CLI for the host interface.
+ */
+
+static clib_error_t *
+af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ u8 hwaddr[6];
+ u8 *hw_addr_ptr = 0;
+ u32 sw_if_index;
+ int r;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ if (unformat
+ (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr))
+ hw_addr_ptr = hwaddr;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ r = af_packet_create_if (vm, host_if_name, hw_addr_ptr, &sw_if_index);
+
+ if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
+ {
+ error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ {
+ error = clib_error_return (0, "Invalid interface name");
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ {
+ error = clib_error_return (0, "Interface elready exists");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+
+done:
+ vec_free (host_if_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Create a host interface that will attach to a linux AF_PACKET
+ * interface, one side of a veth pair. The veth pair must already
+ * exist. Once created, a new host interface will exist in VPP
+ * with the name '<em>host-<ifname></em>', where '<em><ifname></em>'
+ * is the name of the specified veth pair. Use the
+ * '<em>show interface</em>' command to display host interface details.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * @cliexpar
+ * Example of how to create a host interface tied to one side of an
+ * existing linux veth pair named vpp1:
+ * @cliexstart{create host-interface name vpp1}
+ * host-vpp1
+ * @cliexend
+ * Once the host interface is created, enable the interface using:
+ * @cliexcmd{set interface state host-vpp1 up}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (af_packet_create_command, static) = {
+ .path = "create host-interface",
+ .short_help = "create host-interface name <ifname> [hw-addr <mac-addr>]",
+ .function = af_packet_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ af_packet_delete_if (vm, host_if_name);
+
+done:
+ vec_free (host_if_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Delete a host interface. Use the linux interface name to identify
+ * the host interface to be deleted. In VPP, host interfaces are
+ * named as '<em>host-<ifname></em>', where '<em><ifname></em>'
+ * is the name of the linux interface.
+ *
+ * @cliexpar
+ * Example of how to delete a host interface named host-vpp1:
+ * @cliexcmd{delete host-interface name vpp1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (af_packet_delete_command, static) = {
+ .path = "delete host-interface",
+ .short_help = "delete host-interface name <ifname>",
+ .function = af_packet_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+af_packet_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (af_packet_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
new file mode 100644
index 00000000..e01b1c71
--- /dev/null
+++ b/src/vnet/devices/af_packet/device.c
@@ -0,0 +1,354 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/devices/af_packet/af_packet.h>
+
+#define foreach_af_packet_tx_func_error \
+_(FRAME_NOT_READY, "tx frame not ready") \
+_(TXRING_EAGAIN, "tx sendto temporary failure") \
+_(TXRING_FATAL, "tx sendto fatal failure") \
+_(TXRING_OVERRUN, "tx ring overrun")
+
+typedef enum
+{
+#define _(f,s) AF_PACKET_TX_ERROR_##f,
+ foreach_af_packet_tx_func_error
+#undef _
+ AF_PACKET_TX_N_ERROR,
+} af_packet_tx_func_error_t;
+
+static char *af_packet_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_tx_func_error
+#undef _
+};
+
+
+static u8 *
+format_af_packet_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "host-%s", apif->host_if_name);
+ return s;
+}
+
+static u8 *
+format_af_packet_device (u8 * s, va_list * args)
+{
+ s = format (s, "Linux PACKET socket interface");
+ return s;
+}
+
+static u8 *
+format_af_packet_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+static uword
+af_packet_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ u32 *buffers = vlib_frame_args (frame);
+ u32 n_left = frame->n_vectors;
+ u32 n_sent = 0;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, rd->dev_instance);
+ int block = 0;
+ u32 block_size = apif->tx_req->tp_block_size;
+ u32 frame_size = apif->tx_req->tp_frame_size;
+ u32 frame_num = apif->tx_req->tp_frame_nr;
+ u8 *block_start = apif->tx_ring + block * block_size;
+ u32 tx_frame = apif->next_tx_frame;
+ struct tpacket2_hdr *tph;
+ u32 frame_not_ready = 0;
+
+ clib_spinlock_lock_if_init (&apif->lockp);
+
+ while (n_left > 0)
+ {
+ u32 len;
+ u32 offset = 0;
+ vlib_buffer_t *b0;
+ n_left--;
+ u32 bi = buffers[0];
+ buffers++;
+
+ tph = (struct tpacket2_hdr *) (block_start + tx_frame * frame_size);
+
+ if (PREDICT_FALSE
+ (tph->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+ {
+ frame_not_ready++;
+ goto next;
+ }
+
+ do
+ {
+ b0 = vlib_get_buffer (vm, bi);
+ len = b0->current_length;
+ clib_memcpy ((u8 *) tph +
+ TPACKET_ALIGN (sizeof (struct tpacket2_hdr)) + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+ while ((bi =
+ (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0));
+
+ tph->tp_len = tph->tp_snaplen = offset;
+ tph->tp_status = TP_STATUS_SEND_REQUEST;
+ n_sent++;
+ next:
+ /* check if we've exhausted the ring */
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ break;
+
+ tx_frame = (tx_frame + 1) % frame_num;
+ }
+
+ CLIB_MEMORY_BARRIER ();
+
+ if (PREDICT_TRUE (n_sent))
+ {
+ apif->next_tx_frame = tx_frame;
+
+ if (PREDICT_FALSE (sendto (apif->fd, NULL, 0,
+ MSG_DONTWAIT, NULL, 0) == -1))
+ {
+ /* Uh-oh, drop & move on, but count whether it was fatal or not.
+ * Note that we have no reliable way to properly determine the
+ * disposition of the packets we just enqueued for delivery.
+ */
+ vlib_error_count (vm, node->node_index,
+ unix_error_is_fatal (errno) ?
+ AF_PACKET_TX_ERROR_TXRING_FATAL :
+ AF_PACKET_TX_ERROR_TXRING_EAGAIN, n_sent);
+ }
+ }
+
+ clib_spinlock_unlock_if_init (&apif->lockp);
+
+ if (PREDICT_FALSE (frame_not_ready))
+ vlib_error_count (vm, node->node_index,
+ AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
+
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
+ n_left);
+
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ apif->per_interface_next_index = node_index;
+ return;
+ }
+
+ apif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
+ node_index);
+}
+
+static void
+af_packet_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+ int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
+ struct ifreq ifr;
+
+ if (0 > fd)
+ {
+ clib_unix_warning ("af_packet_%s could not open socket",
+ apif->host_if_name);
+ return 0;
+ }
+
+ /* if interface is a bridge ignore */
+ if (apif->host_if_index < 0)
+ goto error; /* no error */
+
+ /* use host_if_index in case host name has changed */
+ ifr.ifr_ifindex = apif->host_if_index;
+ if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
+ {
+ clib_unix_warning ("af_packet_%s ioctl could not retrieve eth name",
+ apif->host_if_name);
+ goto error;
+ }
+
+ apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0)
+ {
+ clib_unix_warning ("af_packet_%s error: %d",
+ apif->is_admin_up ? "up" : "down", rv);
+ goto error;
+ }
+
+ if (apif->is_admin_up)
+ {
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ ifr.ifr_flags |= IFF_UP;
+ }
+ else
+ {
+ hw_flags = 0;
+ ifr.ifr_flags &= ~IFF_UP;
+ }
+
+ if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0)
+ {
+ clib_unix_warning ("af_packet_%s error: %d",
+ apif->is_admin_up ? "up" : "down", rv);
+ goto error;
+ }
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+error:
+ if (0 <= fd)
+ close (fd);
+
+ return 0; /* no error */
+}
+
+static clib_error_t *
+af_packet_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+static clib_error_t *af_packet_set_mac_address_function
+ (struct vnet_hw_interface_t *hi, char *address)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hi->dev_instance);
+ int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
+ struct ifreq ifr;
+
+ if (0 > fd)
+ {
+ clib_unix_warning ("af_packet_%s could not open socket",
+ apif->host_if_name);
+ return 0;
+ }
+
+ /* if interface is a bridge ignore */
+ if (apif->host_if_index < 0)
+ goto error; /* no error */
+
+ /* use host_if_index in case host name has changed */
+ ifr.ifr_ifindex = apif->host_if_index;
+ if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
+ {
+ clib_unix_warning
+ ("af_packet_%s ioctl could not retrieve eth name, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+ clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+
+ if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
+ {
+ clib_unix_warning ("af_packet_%s ioctl could not set mac, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+error:
+
+ if (0 <= fd)
+ close (fd);
+
+ return 0; /* no error */
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (af_packet_device_class) = {
+ .name = "af-packet",
+ .tx_function = af_packet_interface_tx,
+ .format_device_name = format_af_packet_device_name,
+ .format_device = format_af_packet_device,
+ .format_tx_trace = format_af_packet_tx_trace,
+ .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
+ .tx_function_error_strings = af_packet_tx_func_error_strings,
+ .rx_redirect_to_node = af_packet_set_interface_next_node,
+ .clear_counters = af_packet_clear_hw_interface_counters,
+ .admin_up_down_function = af_packet_interface_admin_up_down,
+ .subif_add_del_function = af_packet_subif_add_del_function,
+ .mac_addr_change_function = af_packet_set_mac_address_function,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (af_packet_device_class,
+ af_packet_interface_tx)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/dir.dox b/src/vnet/devices/af_packet/dir.dox
new file mode 100644
index 00000000..78991c6d
--- /dev/null
+++ b/src/vnet/devices/af_packet/dir.dox
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Host Interface Implementation.
+
+This directory contains the source code for Host Interface driver. The
+Host Interface driver leverages the DPDK AF_PACKET driver.
+
+
+*/
+/*? %%clicmd:group_label Host Interface %% ?*/
+/*? %%syscfg:group_label Host Interface %% ?*/
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
new file mode 100644
index 00000000..99c91f38
--- /dev/null
+++ b/src/vnet/devices/af_packet/node.c
@@ -0,0 +1,310 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/devices/af_packet/af_packet.h>
+
+#define foreach_af_packet_input_error
+
+typedef enum
+{
+#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
+ foreach_af_packet_input_error
+#undef _
+ AF_PACKET_INPUT_N_ERROR,
+} af_packet_input_error_t;
+
+static char *af_packet_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ int block;
+ struct tpacket2_hdr tph;
+} af_packet_input_trace_t;
+
+static u8 *
+format_af_packet_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "af_packet: hw_if_index %d next-index %d",
+ t->hw_if_index, t->next_index);
+
+ s =
+ format (s,
+ "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2,
+ format_white_space, indent + 4,
+ t->tph.tp_status,
+ t->tph.tp_len,
+ t->tph.tp_snaplen,
+ t->tph.tp_mac,
+ t->tph.tp_net,
+ format_white_space, indent + 4,
+ t->tph.tp_sec,
+ t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ , t->tph.tp_vlan_tpid
+#endif
+ );
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
+ vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
+
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = 0;
+}
+
+always_inline uword
+af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, af_packet_if_t * apif)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ struct tpacket2_hdr *tph;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ u32 block = 0;
+ u32 rx_frame;
+ u32 n_free_bufs;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 block_size = apif->rx_req->tp_block_size;
+ u32 frame_size = apif->rx_req->tp_frame_size;
+ u32 frame_num = apif->rx_req->tp_frame_nr;
+ u8 *block_start = apif->rx_ring + block * block_size;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 thread_index = vlib_get_thread_index ();
+ u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
+
+ if (apif->per_interface_next_index != ~0)
+ next_index = apif->per_interface_next_index;
+
+ n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (apm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs +=
+ vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
+ VLIB_FRAME_SIZE);
+ _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
+ }
+
+ rx_frame = apif->next_rx_frame;
+ tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
+ {
+ vlib_buffer_t *b0 = 0, *first_b0 = 0;
+ u32 next0 = next_index;
+
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
+ n_left_to_next)
+ {
+ u32 data_len = tph->tp_snaplen;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0, prev_bi0;
+
+ while (data_len)
+ {
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (apm->rx_buffers[thread_index]) - 1;
+ prev_bi0 = bi0;
+ bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ u32 vlan_len = 0;
+ u32 bytes_copied = 0;
+ b0->current_data = 0;
+ /* Kernel removes VLAN headers, so reconstruct VLAN */
+ if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
+ {
+ if (PREDICT_TRUE (offset == 0))
+ {
+ clib_memcpy (vlib_buffer_get_current (b0),
+ (u8 *) tph + tph->tp_mac,
+ sizeof (ethernet_header_t));
+ ethernet_header_t *eth = vlib_buffer_get_current (b0);
+ ethernet_vlan_header_t *vlan =
+ (ethernet_vlan_header_t *) (eth + 1);
+ vlan->priority_cfi_and_id =
+ clib_host_to_net_u16 (tph->tp_vlan_tci);
+ vlan->type = eth->type;
+ eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ vlan_len = sizeof (ethernet_vlan_header_t);
+ bytes_copied = sizeof (ethernet_header_t);
+ }
+ }
+ clib_memcpy (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset + bytes_copied,
+ (bytes_to_copy - bytes_copied));
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy + vlan_len;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+ }
+ else
+ buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
+
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ n_rx_packets++;
+ n_rx_bytes += tph->tp_snaplen;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* trace */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0);
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ af_packet_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next0, first_b0, /* follow_chain */
+ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = apif->hw_if_index;
+ clib_memcpy (&tr->tph, tph, sizeof (struct tpacket2_hdr));
+ }
+
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0, b0);
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0, next0);
+
+ /* next packet */
+ tph->tp_status = TP_STATUS_KERNEL;
+ rx_frame = (rx_frame + 1) % frame_num;
+ tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ apif->next_rx_frame = rx_frame;
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+ return n_rx_packets;
+}
+
+static uword
+af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx_packets = 0;
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+
+ foreach_device_and_queue (dq, rt->devices_and_queues)
+ {
+ af_packet_if_t *apif;
+ apif = vec_elt_at_index (apm->interfaces, dq->dev_instance);
+ if (apif->is_admin_up)
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (af_packet_input_node) = {
+ .function = af_packet_input_fn,
+ .name = "af-packet-input",
+ .sibling_of = "device-input",
+ .format_trace = format_af_packet_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = AF_PACKET_INPUT_N_ERROR,
+ .error_strings = af_packet_input_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (af_packet_input_node, af_packet_input_fn)
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
new file mode 100644
index 00000000..a38ecd2d
--- /dev/null
+++ b/src/vnet/devices/devices.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+vnet_device_main_t vnet_device_main;
+
+static uword
+device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (device_input_node) = {
+ .function = device_input_fn,
+ .name = "device-input",
+ .runtime_data_bytes = sizeof (vnet_device_input_runtime_t),
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES,
+ .next_nodes = VNET_DEVICE_INPUT_NEXT_NODES,
+};
+
+/* Table defines how much we need to advance current data pointer
+ in the buffer if we shortcut to l3 nodes */
+
+const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
+device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES /
+ CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
+{
+ [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t),
+ [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t),
+ [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t),
+ [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t),
+};
+
+VNET_FEATURE_ARC_INIT (device_input, static) =
+{
+ .arc_name = "device-input",
+ .start_nodes = VNET_FEATURES ("device-input"),
+ .arc_index_ptr = &feature_main.device_input_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+ .arc_name = "device-input",
+ .node_name = "l2-patch",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+ .arc_name = "device-input",
+ .node_name = "worker-handoff",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+ .arc_name = "device-input",
+ .node_name = "span-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+ .arc_name = "device-input",
+ .node_name = "p2p-ethernet-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+ .arc_name = "device-input",
+ .node_name = "ethernet-input",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static int
+vnet_device_queue_sort (void *a1, void *a2)
+{
+ vnet_device_and_queue_t *dq1 = a1;
+ vnet_device_and_queue_t *dq2 = a2;
+
+ if (dq1->dev_instance > dq2->dev_instance)
+ return 1;
+ else if (dq1->dev_instance < dq2->dev_instance)
+ return -1;
+ else if (dq1->queue_id > dq2->queue_id)
+ return 1;
+ else if (dq1->queue_id < dq2->queue_id)
+ return -1;
+ else
+ return 0;
+}
+
+static void
+vnet_device_queue_update (vnet_main_t * vnm, vnet_device_input_runtime_t * rt)
+{
+ vnet_device_and_queue_t *dq;
+ vnet_hw_interface_t *hw;
+
+ vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ hw = vnet_get_hw_interface (vnm, dq->hw_if_index);
+ vec_validate (hw->dq_runtime_index_by_queue, dq->queue_id);
+ hw->dq_runtime_index_by_queue[dq->queue_id] = dq - rt->devices_and_queues;
+ }
+}
+
+void
+vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id, uword thread_index)
+{
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vlib_main_t *vm, *vm0;
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ASSERT (hw->input_node_index > 0);
+
+ if (vdm->first_worker_thread_index == 0)
+ thread_index = 0;
+
+ if (thread_index != 0 &&
+ (thread_index < vdm->first_worker_thread_index ||
+ thread_index > vdm->last_worker_thread_index))
+ {
+ thread_index = vdm->next_worker_thread_index++;
+ if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)
+ vdm->next_worker_thread_index = vdm->first_worker_thread_index;
+ }
+
+ vm = vlib_mains[thread_index];
+ vm0 = vlib_get_main ();
+
+ vlib_worker_thread_barrier_sync (vm0);
+
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+
+ vec_add2 (rt->devices_and_queues, dq, 1);
+ dq->hw_if_index = hw_if_index;
+ dq->dev_instance = hw->dev_instance;
+ dq->queue_id = queue_id;
+ dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ rt->enabled_node_state = VLIB_NODE_STATE_POLLING;
+
+ vnet_device_queue_update (vnm, rt);
+ vec_validate (hw->input_node_thread_index_by_queue, queue_id);
+ vec_validate (hw->rx_mode_by_queue, queue_id);
+ hw->input_node_thread_index_by_queue[queue_id] = thread_index;
+ hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING;
+
+ vlib_worker_thread_barrier_release (vm0);
+
+ vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
+}
+
+int
+vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id)
+{
+ vlib_main_t *vm, *vm0;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ uword old_thread_index;
+ vnet_hw_interface_rx_mode mode;
+
+ if (hw->input_node_thread_index_by_queue == 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ old_thread_index = hw->input_node_thread_index_by_queue[queue_id];
+
+ vm = vlib_mains[old_thread_index];
+
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
+ {
+ mode = dq->mode;
+ goto delete;
+ }
+
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+delete:
+
+ vm0 = vlib_get_main ();
+ vlib_worker_thread_barrier_sync (vm0);
+ vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
+ vnet_device_queue_update (vnm, rt);
+ hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN;
+ vlib_worker_thread_barrier_release (vm0);
+
+ if (vec_len (rt->devices_and_queues) == 0)
+ vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
+ else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ /*
+ * if the deleted interface is polling, we may need to set the node state
+ * to interrupt if there is no more polling interface for this device's
+ * corresponding thread. This is because mixed interfaces
+ * (polling and interrupt), assigned to the same thread, set the
+ * thread to polling prior to the deletion.
+ */
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ return 0;
+ }
+ rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
+ vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
+ }
+
+ return 0;
+}
+
+
+int
+vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id, vnet_hw_interface_rx_mode mode)
+{
+ vlib_main_t *vm;
+ uword thread_index;
+ vnet_device_and_queue_t *dq;
+ vlib_node_state_t enabled_node_state;
+ ASSERT (mode < VNET_HW_INTERFACE_NUM_RX_MODES);
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_input_runtime_t *rt;
+ int is_polling = 0;
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT)
+ mode = hw->default_rx_mode;
+
+ if (hw->input_node_thread_index_by_queue == 0 || hw->rx_mode_by_queue == 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ if (hw->rx_mode_by_queue[queue_id] == mode)
+ return 0;
+
+ if (mode != VNET_HW_INTERFACE_RX_MODE_POLLING &&
+ (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE) == 0)
+ return VNET_API_ERROR_UNSUPPORTED;
+
+ if ((vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1) ||
+ (vec_len (hw->rx_mode_by_queue) < queue_id + 1))
+ return VNET_API_ERROR_INVALID_QUEUE;
+
+ hw->rx_mode_by_queue[queue_id] = mode;
+ thread_index = hw->input_node_thread_index_by_queue[queue_id];
+ vm = vlib_mains[thread_index];
+
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
+ dq->mode = mode;
+ if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ is_polling = 1;
+ }
+
+ if (is_polling)
+ enabled_node_state = VLIB_NODE_STATE_POLLING;
+ else
+ enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
+
+ if (rt->enabled_node_state != enabled_node_state)
+ {
+ rt->enabled_node_state = enabled_node_state;
+ if (vlib_node_get_state (vm, hw->input_node_index) !=
+ VLIB_NODE_STATE_DISABLED)
+ vlib_node_set_state (vm, hw->input_node_index, enabled_node_state);
+ }
+
+ return 0;
+}
+
+int
+vnet_hw_interface_get_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id, vnet_hw_interface_rx_mode * mode)
+{
+ vlib_main_t *vm;
+ uword thread_index;
+ vnet_device_and_queue_t *dq;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_input_runtime_t *rt;
+
+ if (hw->input_node_thread_index_by_queue == 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ if ((vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1) ||
+ (vec_len (hw->rx_mode_by_queue) < queue_id + 1))
+ return VNET_API_ERROR_INVALID_QUEUE;
+
+ thread_index = hw->input_node_thread_index_by_queue[queue_id];
+ vm = vlib_mains[thread_index];
+
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
+ {
+ *mode = dq->mode;
+ return 0;
+ }
+
+ return VNET_API_ERROR_INVALID_INTERFACE;
+}
+
+
+
+static clib_error_t *
+vnet_device_init (vlib_main_t * vm)
+{
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_thread_registration_t *tr;
+ uword *p;
+
+ vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+ if (tr && tr->count > 0)
+ {
+ vdm->first_worker_thread_index = tr->first_index;
+ vdm->next_worker_thread_index = tr->first_index;
+ vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
+ }
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_device_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
new file mode 100644
index 00000000..b74e3713
--- /dev/null
+++ b/src/vnet/devices/devices.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_vnet_device_h
+#define included_vnet_vnet_device_h
+
+#include <vnet/unix/pcap.h>
+#include <vnet/l3_types.h>
+
+typedef enum
+{
+ VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT,
+ VNET_DEVICE_INPUT_NEXT_IP4_INPUT,
+ VNET_DEVICE_INPUT_NEXT_IP6_INPUT,
+ VNET_DEVICE_INPUT_NEXT_MPLS_INPUT,
+ VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT,
+ VNET_DEVICE_INPUT_NEXT_DROP,
+ VNET_DEVICE_INPUT_N_NEXT_NODES,
+} vnet_device_input_next_t;
+
+#define VNET_DEVICE_INPUT_NEXT_NODES { \
+ [VNET_DEVICE_INPUT_NEXT_DROP] = "error-drop", \
+ [VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", \
+ [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = "ip4-input-no-checksum", \
+ [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input", \
+ [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = "ip6-input", \
+ [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input", \
+}
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /* total input packet counter */
+ u64 aggregate_rx_packets;
+} vnet_device_per_worker_data_t;
+
+typedef struct
+{
+ vnet_device_per_worker_data_t *workers;
+ uword first_worker_thread_index;
+ uword last_worker_thread_index;
+ uword next_worker_thread_index;
+} vnet_device_main_t;
+
+typedef struct
+{
+ u32 hw_if_index;
+ u32 dev_instance;
+ u16 queue_id;
+ vnet_hw_interface_rx_mode mode;
+ u32 interrupt_pending;
+} vnet_device_and_queue_t;
+
+typedef struct
+{
+ vnet_device_and_queue_t *devices_and_queues;
+ vlib_node_state_t enabled_node_state;
+} vnet_device_input_runtime_t;
+
+extern vnet_device_main_t vnet_device_main;
+extern vlib_node_registration_t device_input_node;
+extern const u32 device_input_next_node_advance[];
+
+static inline void
+vnet_hw_interface_set_input_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ hw->input_node_index = node_index;
+}
+
+void vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id, uword thread_index);
+int vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id);
+int vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id,
+ vnet_hw_interface_rx_mode mode);
+int vnet_hw_interface_get_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id,
+ vnet_hw_interface_rx_mode * mode);
+
+static inline u64
+vnet_get_aggregate_rx_packets (void)
+{
+ vnet_device_main_t *vdm = &vnet_device_main;
+ u64 sum = 0;
+ vnet_device_per_worker_data_t *pwd;
+
+ vec_foreach (pwd, vdm->workers) sum += pwd->aggregate_rx_packets;
+
+ return sum;
+}
+
+static inline void
+vnet_device_increment_rx_packets (u32 thread_index, u64 count)
+{
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vnet_device_per_worker_data_t *pwd;
+
+ pwd = vec_elt_at_index (vdm->workers, thread_index);
+ pwd->aggregate_rx_packets += count;
+}
+
+static_always_inline vnet_device_and_queue_t *
+vnet_get_device_and_queue (vlib_main_t * vm, vlib_node_runtime_t * node)
+{
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ return rt->devices_and_queues;
+}
+
+static_always_inline uword
+vnet_get_device_input_thread_index (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue));
+ return hw->input_node_thread_index_by_queue[queue_id];
+}
+
+static_always_inline void
+vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id)
+{
+ vlib_main_t *vm;
+ vnet_hw_interface_t *hw;
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ uword idx;
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ idx = vnet_get_device_input_thread_index (vnm, hw_if_index, queue_id);
+ vm = vlib_mains[idx];
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+ idx = hw->dq_runtime_index_by_queue[queue_id];
+ dq = vec_elt_at_index (rt->devices_and_queues, idx);
+ dq->interrupt_pending = 1;
+
+ vlib_node_set_interrupt_pending (vm, hw->input_node_index);
+}
+
+#define foreach_device_and_queue(var,vec) \
+ for (var = (vec); var < vec_end (vec); var++) \
+ if (clib_smp_swap (&((var)->interrupt_pending), 0) || \
+ var->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+
+#endif /* included_vnet_vnet_device_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c
new file mode 100644
index 00000000..71363294
--- /dev/null
+++ b/src/vnet/devices/netmap/cli.c
@@ -0,0 +1,236 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/devices/netmap/net_netmap.h>
+#include <vnet/devices/netmap/netmap.h>
+
+static clib_error_t *
+netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ u8 hwaddr[6];
+ u8 *hw_addr_ptr = 0;
+ int r;
+ u8 is_pipe = 0;
+ u8 is_master = 0;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ if (unformat
+ (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr))
+ hw_addr_ptr = hwaddr;
+ else if (unformat (line_input, "pipe"))
+ is_pipe = 1;
+ else if (unformat (line_input, "master"))
+ is_master = 1;
+ else if (unformat (line_input, "slave"))
+ is_master = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ r =
+ netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master,
+ &sw_if_index);
+
+ if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
+ {
+ error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ {
+ error = clib_error_return (0, "Invalid interface name");
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ {
+ error = clib_error_return (0, "Interface already exists");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * '<em>netmap</em>' is a framework for very fast packet I/O from userspace.
+ * '<em>VALE</em>' is an equally fast in-kernel software switch using the
+ * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared
+ * memory packet transport channel. Together, they provide a high speed
+ * user-space interface that allows VPP to patch into a linux namespace, a
+ * linux container, or a physical NIC without the use of DPDK. Netmap/VALE
+ * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded
+ * before netmap interfaces can be created.
+ * - https://github.com/luigirizzo/netmap - Netmap/VALE repo.
+ * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE,
+ * which is a snapshot of the Netmap/VALE repo with minor changes to work
+ * with containers and modified kernel drivers to work with NICs.
+ *
+ * Create a netmap interface that will attach to a linux interface.
+ * The interface must already exist. Once created, a new netmap interface
+ * will exist in VPP with the name '<em>netmap-<ifname></em>', where
+ * '<em><ifname></em>' takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>'
+ * instance should be created.
+ *
+ * - <b>master | slave</b> - Optional flag to indicate whether VPP should
+ * be the master or slave of the '<em>netmap pipe</em>'. Only considered
+ * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered.
+ *
+ * @cliexpar
+ * Example of how to create a netmap interface tied to the linux
+ * namespace '<em>vpp1</em>':
+ * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master}
+ * netmap-vale00:vpp1
+ * @cliexend
+ * Once the netmap interface is created, enable the interface using:
+ * @cliexcmd{set interface state netmap-vale00:vpp1 up}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_create_command, static) = {
+ .path = "create netmap",
+ .short_help = "create netmap name <ifname>|valeXXX:YYY "
+ "[hw-addr <mac-addr>] [pipe] [master|slave]",
+ .function = netmap_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ netmap_delete_if (vm, host_if_name);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Delete a netmap interface. Use the '<em><ifname></em>' to identify
+ * the netmap interface to be deleted. In VPP, netmap interfaces are
+ * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>'
+ * takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * @cliexpar
+ * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>':
+ * @cliexcmd{delete netmap name vale00:vpp1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_delete_command, static) = {
+ .path = "delete netmap",
+ .short_help = "delete netmap name <ifname>|valeXXX:YYY",
+ .function = netmap_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+netmap_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/device.c b/src/vnet/devices/netmap/device.c
new file mode 100644
index 00000000..aea9ddf4
--- /dev/null
+++ b/src/vnet/devices/netmap/device.c
@@ -0,0 +1,256 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/devices/netmap/net_netmap.h>
+#include <vnet/devices/netmap/netmap.h>
+
+#define foreach_netmap_tx_func_error \
+_(NO_FREE_SLOTS, "no free tx slots") \
+_(PENDING_MSGS, "pending msgs in tx ring")
+
+typedef enum
+{
+#define _(f,s) NETMAP_TX_ERROR_##f,
+ foreach_netmap_tx_func_error
+#undef _
+ NETMAP_TX_N_ERROR,
+} netmap_tx_func_error_t;
+
+static char *netmap_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_tx_func_error
+#undef _
+};
+
+
+static u8 *
+format_netmap_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ netmap_main_t *apm = &netmap_main;
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "netmap-%s", nif->host_if_name);
+ return s;
+}
+
+static u8 *
+format_netmap_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "NETMAP interface");
+ if (verbose)
+ {
+ s = format (s, "\n%U version %d flags 0x%x"
+ "\n%U region %u memsize 0x%x offset 0x%x"
+ "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u",
+ format_white_space, indent + 2,
+ nif->req->nr_version,
+ nif->req->nr_flags,
+ format_white_space, indent + 2,
+ nif->mem_region,
+ nif->req->nr_memsize,
+ nif->req->nr_offset,
+ format_white_space, indent + 2,
+ nif->req->nr_tx_slots,
+ nif->req->nr_rx_slots,
+ nif->req->nr_tx_rings, nif->req->nr_rx_rings);
+ }
+ return s;
+}
+
+static u8 *
+format_netmap_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+static uword
+netmap_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ netmap_main_t *nm = &netmap_main;
+ u32 *buffers = vlib_frame_args (frame);
+ u32 n_left = frame->n_vectors;
+ f64 const time_constant = 1e3;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance);
+ int cur_ring;
+
+ clib_spinlock_lock_if_init (&nif->lockp);
+
+ cur_ring = nif->first_tx_ring;
+
+ while (n_left && cur_ring <= nif->last_tx_ring)
+ {
+ struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring);
+ int n_free_slots = nm_ring_space (ring);
+ uint cur = ring->cur;
+
+ if (nm_tx_pending (ring))
+ {
+ if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0)
+ clib_unix_warning ("NIOCTXSYNC");
+ clib_cpu_time_wait (time_constant);
+
+ if (nm_tx_pending (ring) && !n_free_slots)
+ {
+ cur_ring++;
+ continue;
+ }
+ }
+
+ while (n_left && n_free_slots)
+ {
+ vlib_buffer_t *b0 = 0;
+ u32 bi = buffers[0];
+ u32 len;
+ u32 offset = 0;
+ buffers++;
+
+ struct netmap_slot *slot = &ring->slot[cur];
+
+ do
+ {
+ b0 = vlib_get_buffer (vm, bi);
+ len = b0->current_length;
+ /* memcpy */
+ clib_memcpy ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+ while ((bi = b0->next_buffer));
+
+ slot->len = offset;
+ cur = (cur + 1) % ring->num_slots;
+ n_free_slots--;
+ n_left--;
+ }
+ CLIB_MEMORY_BARRIER ();
+ ring->head = ring->cur = cur;
+ }
+
+ if (n_left < frame->n_vectors)
+ ioctl (nif->fd, NIOCTXSYNC, NULL);
+
+ clib_spinlock_unlock_if_init (&nif->lockp);
+
+ if (n_left)
+ vlib_error_count (vm, node->node_index,
+ (n_left ==
+ frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS :
+ NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left);
+
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ nif->per_interface_next_index = node_index;
+ return;
+ }
+
+ nif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), netmap_input_node.index,
+ node_index);
+}
+
+static void
+netmap_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+
+ nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (nif->is_admin_up)
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ else
+ hw_flags = 0;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (netmap_device_class) = {
+ .name = "netmap",
+ .tx_function = netmap_interface_tx,
+ .format_device_name = format_netmap_device_name,
+ .format_device = format_netmap_device,
+ .format_tx_trace = format_netmap_tx_trace,
+ .tx_function_n_errors = NETMAP_TX_N_ERROR,
+ .tx_function_error_strings = netmap_tx_func_error_strings,
+ .rx_redirect_to_node = netmap_set_interface_next_node,
+ .clear_counters = netmap_clear_hw_interface_counters,
+ .admin_up_down_function = netmap_interface_admin_up_down,
+ .subif_add_del_function = netmap_subif_add_del_function,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH(netmap_device_class,
+ netmap_interface_tx)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/dir.dox b/src/vnet/devices/netmap/dir.dox
new file mode 100644
index 00000000..7ddbf947
--- /dev/null
+++ b/src/vnet/devices/netmap/dir.dox
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief netmap Interface Implementation.
+
+This directory contains the source code for the netmap driver.
+
+*/
+/*? %%clicmd:group_label netmap %% ?*/
+/*? %%syscfg:group_label netmap %% ?*/
diff --git a/src/vnet/devices/netmap/net_netmap.h b/src/vnet/devices/netmap/net_netmap.h
new file mode 100644
index 00000000..fd4253b7
--- /dev/null
+++ b/src/vnet/devices/netmap/net_netmap.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
+ *
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
+ *
+ * This API is also used to communicate with the VALE software switch
+ */
+
+#ifndef _NET_NETMAP_H_
+#define _NET_NETMAP_H_
+
+#define NETMAP_API 11 /* current API version */
+
+#define NETMAP_MIN_API 11 /* min and max versions accepted */
+#define NETMAP_MAX_API 15
+/*
+ * Some fields should be cache-aligned to reduce contention.
+ * The alignment is architecture and OS dependent, but rather than
+ * digging into OS headers to find the exact value we use an estimate
+ * that should cover most architectures.
+ */
+#define NM_CACHE_ALIGN 128
+
+/*
+ * --- Netmap data structures ---
+ *
+ * The userspace data structures used by netmap are shown below.
+ * They are allocated by the kernel and mmap()ed by userspace threads.
+ * Pointers are implemented as memory offsets or indexes,
+ * so that they can be easily dereferenced in kernel and userspace.
+
+ KERNEL (opaque, obviously)
+
+ ====================================================================
+ |
+ USERSPACE | struct netmap_ring
+ +---->+---------------+
+ / | head,cur,tail |
+ struct netmap_if (nifp, 1 per fd) / | buf_ofs |
+ +---------------+ / | other fields |
+ | ni_tx_rings | / +===============+
+ | ni_rx_rings | / | buf_idx, len | slot[0]
+ | | / | flags, ptr |
+ | | / +---------------+
+ +===============+ / | buf_idx, len | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
+ | txring_ofs[1] | +---------------+
+ (tx+1 entries) (num_slots entries)
+ | txring_ofs[t] | | buf_idx, len | slot[n-1]
+ +---------------+ | flags, ptr |
+ | rxring_ofs[0] | +---------------+
+ | rxring_ofs[1] |
+ (rx+1 entries)
+ | rxring_ofs[r] |
+ +---------------+
+
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
+ * struct netmap_if pointing to struct netmap_ring's.
+ *
+ * There is one netmap_ring per physical NIC ring, plus one tx/rx ring
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
+ *
+ * All physical/host stack ports share the same memory region,
+ * so that zero-copy can be implemented between them.
+ * VALE switch ports instead have separate memory regions.
+ *
+ * The netmap_ring is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer (MTU-sized and residing in the
+ * mmapped region), its length and some flags. An extra 64-bit pointer
+ * is provided for user-supplied buffers in the tx path.
+ *
+ * In user space, the buffer address is computed as
+ * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
+ *
+ * Added in NETMAP_API 11:
+ *
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ * the same memory pool. The desired number of buffers must be in
+ * nr_arg3. The ioctl may return fewer buffers, depending on memory
+ * availability. nr_arg3 will return the actual value, and, once
+ * mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ * The buffers are linked to each other using the first uint32_t
+ * as the index. On close, ni_bufs_head must point to the list of
+ * buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ * allocated in the same memory space. The number of extra rings
+ * is in nr_arg1, and is advisory. This is a no-op on NICs where
+ * the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ * space with a parent device. The ifname indicates the parent device,
+ * which must already exist. Flags in nr_flags indicate if we want to
+ * bind the master or slave side, the index (from nr_ringid)
+ * is just a cookie and does not need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ * the content of specific rings, also from the same memory space.
+ *
+ * Extra flags in nr_flags support the above functions.
+ * Application libraries may use the following naming scheme:
+ * netmap:foo all NIC ring pairs
+ * netmap:foo^ only host ring pair
+ * netmap:foo+ all NIC ring + host ring pairs
+ * netmap:foo-k the k-th NIC ring pair
+ * netmap:foo{k PIPE ring pair k, master side
+ * netmap:foo}k PIPE ring pair k, slave side
+ */
+
+/*
+ * struct netmap_slot is a buffer descriptor
+ */
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* length for this slot */
+ uint16_t flags; /* buf changed, etc. */
+ uint64_t ptr; /* pointer for indirect buffers */
+};
+
+/*
+ * The following flags control how the slot is used
+ */
+
+#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
+ /*
+ * must be set whenever buf_idx is changed (as it might be
+ * necessary to recompute the physical address and mapping)
+ *
+ * It is also set by the kernel whenever the buf_idx is
+ * changed internally (e.g., by pipes). Applications may
+ * use this information to know when they can reuse the
+ * contents of previously prepared buffers.
+ */
+
+#define NS_REPORT 0x0002 /* ask the hardware to report results */
+ /*
+ * Request notification when slot is used by the hardware.
+ * Normally transmit completions are handled lazily and
+ * may be unreported. This flag lets us know when a slot
+ * has been sent (e.g. to terminate the sender).
+ */
+
+#define NS_FORWARD 0x0004 /* pass packet 'forward' */
+ /*
+ * (Only for physical ports, rx rings with NR_FORWARD set).
+ * Slot released to the kernel (i.e. before ring->head) with
+ * this flag set are passed to the peer ring (host/NIC),
+ * thus restoring the host-NIC connection for these slots.
+ * This supports efficient traffic monitoring or firewalling.
+ */
+
+#define NS_NO_LEARN 0x0008 /* disable bridge learning */
+ /*
+ * On a VALE switch, do not 'learn' the source port for
+ * this buffer.
+ */
+
+#define NS_INDIRECT 0x0010 /* userspace buffer */
+ /*
+ * (VALE tx rings only) data is in a userspace buffer,
+ * whose address is in the 'ptr' field in the slot.
+ */
+
+#define NS_MOREFRAG 0x0020 /* packet has more fragments */
+ /*
+ * (VALE ports only)
+ * Set on all but the last slot of a multi-segment packet.
+ * The 'len' field refers to the individual fragment.
+ */
+
+#define NS_PORT_SHIFT 8
+#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
+ /*
+ * The high 8 bits of the flag, if not zero, indicate the
+ * destination port for the VALE switch, overriding
+ * the lookup table.
+ */
+
+#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
+ /*
+ * (VALE rx rings only) the high 8 bits
+ * are the number of fragments.
+ */
+
+
+/*
+ * struct netmap_ring
+ *
+ * Netmap representation of a TX or RX ring (also known as "queue").
+ * This is a queue implemented as a fixed-size circular array.
+ * At the software level the important fields are: head, cur, tail.
+ *
+ * In TX rings:
+ *
+ * head first slot available for transmission.
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] can be used for new packets to send;
+ * 'head' and 'cur' must be incremented as slots are filled
+ * with new packets to be sent;
+ * 'cur' can be moved further ahead if we need more space
+ * for new transmissions. XXX todo (2014-03-12)
+ *
+ * In RX rings:
+ *
+ * head first valid received packet
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] contain received packets;
+ * 'head' and 'cur' must be incremented as slots are consumed
+ * and can be returned to the kernel;
+ * 'cur' can be moved further ahead if we want to wait for
+ * new packets without returning the previous ones.
+ *
+ * DATA OWNERSHIP/LOCKING:
+ * The netmap_ring, and all slots and buffers in the range
+ * [head .. tail-1] are owned by the user program;
+ * the kernel only accesses them during a netmap system call
+ * and in the user thread context.
+ *
+ * Other slots and buffers are reserved for use by the kernel
+ */
+struct netmap_ring {
+ /*
+ * buf_ofs is meant to be used through macros.
+ * It contains the offset of the buffer region from this
+ * descriptor.
+ */
+ const int64_t buf_ofs;
+ const uint32_t num_slots; /* number of slots in the ring. */
+ const uint32_t nr_buf_size;
+ const uint16_t ringid;
+ const uint16_t dir; /* 0: tx, 1: rx */
+
+ uint32_t head; /* (u) first user slot */
+ uint32_t cur; /* (u) wakeup point */
+ uint32_t tail; /* (k) first kernel slot */
+
+ uint32_t flags;
+
+ struct timeval ts; /* (k) time of last *sync() */
+
+ /* opaque room for a mutex or similar object */
+#if !defined(_WIN32) || defined(__CYGWIN__)
+ uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128];
+#else
+ uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128];
+#endif
+
+ /* the slots follow. This struct has variable size */
+ struct netmap_slot slot[0]; /* array of slots. */
+};
+
+
+/*
+ * RING FLAGS
+ */
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+ /*
+ * updates the 'ts' field on each netmap syscall. This saves
+ * saves a separate gettimeofday(), and is not much worse than
+ * software timestamps generated in the interrupt handler.
+ */
+
+#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
+ /*
+ * Enables the NS_FORWARD slot flag for the ring.
+ */
+
+
+/*
+ * Netmap representation of an interface and its queue(s).
+ * This is initialized by the kernel when binding a file
+ * descriptor to a port, and should be considered as readonly
+ * by user programs. The kernel never uses it.
+ *
+ * There is one netmap_if for each file descriptor on which we want
+ * to select/poll.
+ * select/poll operates on one or all pairs depending on the value of
+ * nmr_queueid passed on the ioctl.
+ */
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ const uint32_t ni_version; /* API version, currently unused */
+ const uint32_t ni_flags; /* properties */
+#define NI_PRIV_MEM 0x1 /* private memory region */
+
+ /*
+ * The number of packet rings available in netmap mode.
+ * Physical NICs can have different numbers of tx and rx rings.
+ * Physical NICs also have a 'host' ring pair.
+ * Additionally, clients can request additional ring pairs to
+ * be used for internal communication.
+ */
+ const uint32_t ni_tx_rings; /* number of HW tx rings */
+ const uint32_t ni_rx_rings; /* number of HW rx rings */
+
+ uint32_t ni_bufs_head; /* head index for extra bufs */
+ uint32_t ni_spare1[5];
+ /*
+ * The following array contains the offset of each netmap ring
+ * from this structure, in the following order:
+ * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
+ * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
+ *
+ * The area is filled up by the kernel on NIOCREGIF,
+ * and then only read by userspace code.
+ */
+ const ssize_t ring_ofs[0];
+};
+
+
+#ifndef NIOCREGIF
+/*
+ * ioctl names and related fields
+ *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid.
+ * These are non blocking and take no argument.
+ *
+ * NIOCGINFO takes a struct ifreq, the interface name is the input,
+ * the outputs are number of queues and number of descriptor
+ * for each queue (useful to set number of threads etc.).
+ * The info returned is only advisory and may change before
+ * the interface is bound to a file descriptor.
+ *
+ * NIOCREGIF takes an interface name within a struct nmre,
+ * and activates netmap mode on the interface (if possible).
+ *
+ * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
+ * can pass it down to other NIC-related ioctls.
+ *
+ * The actual argument (struct nmreq) has a number of options to request
+ * different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
+ *
+ * nr_name (in)
+ * The name of the port (em0, valeXXX:YYY, etc.)
+ * limited to IFNAMSIZ for backward compatibility.
+ *
+ * nr_version (in/out)
+ * Must match NETMAP_API as used in the kernel, error otherwise.
+ * Always returns the desired value on output.
+ *
+ * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
+ * On input, non-zero values may be used to reconfigure the port
+ * according to the requested values, but this is not guaranteed.
+ * On output the actual values in use are reported.
+ *
+ * nr_ringid (in)
+ * Indicates how rings should be bound to the file descriptors.
+ * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ * are used to indicate the ring number, and nr_flags specifies
+ * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ * the binding as follows:
+ * 0 (default) binds all physical rings
+ * NETMAP_HW_RING | ring number binds a single ring pair
+ * NETMAP_SW_RING binds only the host tx/rx rings
+ *
+ * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
+ * packets on tx rings only if POLLOUT is set.
+ * The default is to push any pending packet.
+ *
+ * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ * packets on rx rings also when POLLIN is NOT set.
+ * The default is to touch the rx ring only with POLLIN.
+ * Note that this is the opposite of TX because it
+ * reflects the common usage.
+ *
+ * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ * NETMAP_PRIV_MEM is set on return for ports that do not use
+ * the global memory allocator.
+ * This information is not significant and applications
+ * should look at the region id in nr_arg2
+ *
+ * nr_flags is the recommended mode to indicate which rings should
+ * be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in) The number of extra rings to be reserved.
+ * Especially when allocating a VALE port the system only
+ * allocates the amount of memory needed for the port.
+ * If more shared memory rings are desired (e.g. for pipes),
+ * the first invocation for the same basename/allocator
+ * should specify a suitable number. Memory cannot be
+ * extended after the first allocation without closing
+ * all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ * On input, 0 means the system decides autonomously,
+ * other values may try to select a specific region.
+ * On return the actual value is reported.
+ * Region '1' is the global allocator, normally shared
+ * by all interfaces. Other values are private regions.
+ * If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out) number of extra buffers to be allocated.
+ *
+ *
+ *
+ * nr_cmd (in) if non-zero indicates a special command:
+ * NETMAP_BDG_ATTACH and nr_name = vale*:ifname
+ * attaches the NIC to the switch; nr_ringid specifies
+ * which rings to use. Used by vale-ctl -a ...
+ * nr_arg1 = NETMAP_BDG_HOST also attaches the host port
+ * as in vale-ctl -h ...
+ *
+ * NETMAP_BDG_DETACH and nr_name = vale*:ifname
+ * disconnects a previously attached NIC.
+ * Used by vale-ctl -d ...
+ *
+ * NETMAP_BDG_LIST
+ * list the configuration of VALE switches.
+ *
+ * NETMAP_BDG_VNET_HDR
+ * Set the virtio-net header length used by the client
+ * of a VALE switch port.
+ *
+ * NETMAP_BDG_NEWIF
+ * create a persistent VALE port with name nr_name.
+ * Used by vale-ctl -n ...
+ *
+ * NETMAP_BDG_DELIF
+ * delete a persistent VALE port. Used by vale-ctl -d ...
+ *
+ * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
+ *
+ *
+ *
+ */
+
+
+/*
+ * struct nmreq overlays a struct ifreq (just the name)
+ */
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_version; /* API version */
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
+
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
+#define NETMAP_SW_RING 0x2000 /* only host ring pair */
+
+#define NETMAP_RING_MASK 0x0fff /* the ring number */
+
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
+
+#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
+
+ uint16_t nr_cmd;
+#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
+#define NETMAP_BDG_DETACH 2 /* detach the NIC */
+#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */
+#define NETMAP_BDG_LIST 4 /* get bridge's info */
+#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
+#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
+#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
+#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */
+#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */
+#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */
+#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */
+#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */
+ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
+#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
+
+ uint16_t nr_arg2;
+ uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
+ uint32_t nr_flags;
+ /* various modes, extends nr_ringid */
+ uint32_t spare2[1];
+};
+
+#define NR_REG_MASK 0xf /* values for nr_flags */
+enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
+ NR_REG_ALL_NIC = 1,
+ NR_REG_SW = 2,
+ NR_REG_NIC_SW = 3,
+ NR_REG_ONE_NIC = 4,
+ NR_REG_PIPE_MASTER = 5,
+ NR_REG_PIPE_SLAVE = 6,
+};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX 0x100
+#define NR_MONITOR_RX 0x200
+#define NR_ZCOPY_MON 0x400
+/* request exclusive access to the selected rings */
+#define NR_EXCLUSIVE 0x800
+/* request ptnetmap host support */
+#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
+#define NR_PTNETMAP_HOST 0x1000
+#define NR_RX_RINGS_ONLY 0x2000
+#define NR_TX_RINGS_ONLY 0x4000
+/* Applications set this flag if they are able to deal with virtio-net headers,
+ * that is send/receive frames that start with a virtio-net header.
+ * If not set, NIOCREGIF will fail with netmap ports that require applications
+ * to use those headers. If the flag is set, the application can use the
+ * NETMAP_VNET_HDR_GET command to figure out the header length. */
+#define NR_ACCEPT_VNET_HDR 0x8000
+
+
+/*
+ * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined
+ * in ws2def.h but not sure if they are in the form we need.
+ * XXX so we redefine them
+ * in a convenient way to use for DeviceIoControl signatures
+ */
+#ifdef _WIN32
+#undef _IO // ws2def.h
+#define _WIN_NM_IOCTL_TYPE 40000
+#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_BUFFERED, FILE_ANY_ACCESS )
+#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_OUT_DIRECT, FILE_ANY_ACCESS )
+
+#define _IOWR(_c, _n, _s) _IO(_c, _n)
+
+/* We havesome internal sysctl in addition to the externally visible ones */
+#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT
+#define NETMAP_POLL _IO('i', 162)
+
+/* and also two setsockopt for sysctl emulation */
+#define NETMAP_SETSOCKOPT _IO('i', 140)
+#define NETMAP_GETSOCKOPT _IO('i', 141)
+
+
+//These linknames are for the Netmap Core Driver
+#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP"
+#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap"
+
+//Definition of a structure used to pass a virtual address within an IOCTL
+typedef struct _MEMORY_ENTRY {
+ PVOID pUsermodeVirtualAddress;
+} MEMORY_ENTRY, *PMEMORY_ENTRY;
+
+typedef struct _POLL_REQUEST_DATA {
+ int events;
+ int timeout;
+ int revents;
+} POLL_REQUEST_DATA;
+
+#endif /* _WIN32 */
+
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
+#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
+#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
+#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
+#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */
+#endif /* !NIOCREGIF */
+
+
+/*
+ * Helper functions for kernel and userspace
+ */
+
+/*
+ * check if space is available in the ring.
+ */
+static inline int
+nm_ring_empty(struct netmap_ring *ring)
+{
+ return (ring->cur == ring->tail);
+}
+
+/*
+ * Opaque structure that is passed to an external kernel
+ * module via ioctl(fd, NIOCCONFIG, req) for a user-owned
+ * bridge port (at this point ephemeral VALE interface).
+ */
+#define NM_IFRDATA_LEN 256
+struct nm_ifreq {
+ char nifr_name[IFNAMSIZ];
+ char data[NM_IFRDATA_LEN];
+};
+
+/*
+ * netmap kernel thread configuration
+ */
+/* bhyve/vmm.ko MSIX parameters for IOCTL */
+struct ptn_vmm_ioctl_msix {
+ uint64_t msg;
+ uint64_t addr;
+};
+
+/* IOCTL parameters */
+struct nm_kth_ioctl {
+ u_long com;
+ /* TODO: use union */
+ union {
+ struct ptn_vmm_ioctl_msix msix;
+ } data;
+};
+
+/* Configuration of a ptnetmap ring */
+struct ptnet_ring_cfg {
+ uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */
+ uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */
+ struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */
+};
+#endif /* _NET_NETMAP_H_ */
diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api
new file mode 100644
index 00000000..8dc698b9
--- /dev/null
+++ b/src/vnet/devices/netmap/netmap.api
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Create netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param is_pipe - is pipe
+ @param is_master - 0=slave, 1=master
+*/
+autoreply define netmap_create
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+ u8 hw_addr[6];
+ u8 use_random_hw_addr;
+ u8 is_pipe;
+ u8 is_master;
+};
+
+/** \brief Delete netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+*/
+autoreply define netmap_delete
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c
new file mode 100644
index 00000000..fc49ed62
--- /dev/null
+++ b/src/vnet/devices/netmap/netmap.c
@@ -0,0 +1,312 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <vnet/devices/netmap/net_netmap.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/netmap/netmap.h>
+
+static u32
+netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
+ u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+static clib_error_t *
+netmap_fd_read_ready (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ u32 idx = uf->private_data;
+
+ nm->pending_input_bitmap =
+ clib_bitmap_set (nm->pending_input_bitmap, idx, 1);
+
+ /* Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, netmap_input_node.index);
+
+ return 0;
+}
+
+static void
+close_netmap_if (netmap_main_t * nm, netmap_if_t * nif)
+{
+ if (nif->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index);
+ nif->clib_file_index = ~0;
+ }
+ else if (nif->fd > -1)
+ close (nif->fd);
+
+ if (nif->mem_region)
+ {
+ netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region];
+ if (--reg->refcnt == 0)
+ {
+ munmap (reg->mem, reg->region_size);
+ reg->region_size = 0;
+ }
+ }
+
+
+ mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name,
+ &nif->if_index);
+ vec_free (nif->host_if_name);
+ vec_free (nif->req);
+
+ memset (nif, 0, sizeof (*nif));
+ pool_put (nm->interfaces, nif);
+}
+
+int
+netmap_worker_thread_enable ()
+{
+ /* if worker threads are enabled, switch to polling mode */
+ foreach_vlib_main ((
+ {
+ vlib_node_set_state (this_vlib_main,
+ netmap_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }));
+
+ return 0;
+}
+
+int
+netmap_worker_thread_disable ()
+{
+ foreach_vlib_main ((
+ {
+ vlib_node_set_state (this_vlib_main,
+ netmap_input_node.index,
+ VLIB_NODE_STATE_INTERRUPT);
+ }));
+
+ return 0;
+}
+
+int
+netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index)
+{
+ netmap_main_t *nm = &netmap_main;
+ int ret = 0;
+ netmap_if_t *nif = 0;
+ u8 hw_addr[6];
+ clib_error_t *error = 0;
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ uword *p;
+ struct nmreq *req = 0;
+ netmap_mem_region_t *reg;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int fd;
+
+ p = mhash_get (&nm->if_index_by_host_if_name, if_name);
+ if (p)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ fd = open ("/dev/netmap", O_RDWR);
+ if (fd < 0)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ pool_get (nm->interfaces, nif);
+ nif->if_index = nif - nm->interfaces;
+ nif->fd = fd;
+ nif->clib_file_index = ~0;
+
+ vec_validate (req, 0);
+ nif->req = req;
+ req->nr_version = NETMAP_API;
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ if (is_pipe)
+ req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE;
+ else
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ req->nr_flags |= NR_ACCEPT_VNET_HDR;
+ snprintf (req->nr_name, IFNAMSIZ, "%s", if_name);
+ req->nr_name[IFNAMSIZ - 1] = 0;
+
+ if (ioctl (nif->fd, NIOCREGIF, req))
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+
+ nif->mem_region = req->nr_arg2;
+ vec_validate (nm->mem_regions, nif->mem_region);
+ reg = &nm->mem_regions[nif->mem_region];
+ if (reg->region_size == 0)
+ {
+ reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ clib_warning ("mem %p", reg->mem);
+ if (reg->mem == MAP_FAILED)
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+ reg->region_size = req->nr_memsize;
+ }
+ reg->refcnt++;
+
+ nif->nifp = NETMAP_IF (reg->mem, req->nr_offset);
+ nif->first_rx_ring = 0;
+ nif->last_rx_ring = 0;
+ nif->first_tx_ring = 0;
+ nif->last_tx_ring = 0;
+ nif->host_if_name = if_name;
+ nif->per_interface_next_index = ~0;
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&nif->lockp);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = netmap_fd_read_ready;
+ template.file_descriptor = nif->fd;
+ template.private_data = nif->if_index;
+ nif->clib_file_index = clib_file_add (&file_main, &template);
+ }
+
+ /*use configured or generate random MAC address */
+ if (hw_addr_set)
+ memcpy (hw_addr, hw_addr_set, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ error = ethernet_register_interface (vnm, netmap_device_class.index,
+ nif->if_index, hw_addr,
+ &nif->hw_if_index,
+ netmap_eth_flag_change);
+
+ if (error)
+ {
+ clib_error_report (error);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index);
+ nif->sw_if_index = sw->sw_if_index;
+
+ mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0);
+
+ if (sw_if_index)
+ *sw_if_index = nif->sw_if_index;
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1)
+ netmap_worker_thread_enable ();
+
+ return 0;
+
+error:
+ close_netmap_if (nm, nif);
+ return ret;
+}
+
+int
+netmap_delete_if (vlib_main_t * vm, u8 * host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif;
+ uword *p;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ p = mhash_get (&nm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ clib_warning ("Host interface %s does not exist", host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ nif = pool_elt_at_index (nm->interfaces, p[0]);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0);
+
+ ethernet_delete_interface (vnm, nif->hw_if_index);
+
+ close_netmap_if (nm, nif);
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0)
+ netmap_worker_thread_disable ();
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_init (vlib_main_t * vm)
+{
+ netmap_main_t *nm = &netmap_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_thread_registration_t *tr;
+ uword *p;
+
+ memset (nm, 0, sizeof (netmap_main_t));
+
+ nm->input_cpu_first_index = 0;
+ nm->input_cpu_count = 1;
+
+ /* find out which cpus will be used for input */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+
+ if (tr && tr->count > 0)
+ {
+ nm->input_cpu_first_index = tr->first_index;
+ nm->input_cpu_count = tr->count;
+ }
+
+ mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h
new file mode 100644
index 00000000..04731890
--- /dev/null
+++ b/src/vnet/devices/netmap/netmap.h
@@ -0,0 +1,166 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/*
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <vppinfra/lock.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ u8 *host_if_name;
+ uword if_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 clib_file_index;
+
+ u32 per_interface_next_index;
+ u8 is_admin_up;
+
+ /* netmap */
+ struct nmreq *req;
+ u16 mem_region;
+ int fd;
+ struct netmap_if *nifp;
+ u16 first_tx_ring;
+ u16 last_tx_ring;
+ u16 first_rx_ring;
+ u16 last_rx_ring;
+
+} netmap_if_t;
+
+typedef struct
+{
+ char *mem;
+ u32 region_size;
+ int refcnt;
+} netmap_mem_region_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ netmap_if_t *interfaces;
+
+ /* bitmap of pending rx interfaces */
+ uword *pending_input_bitmap;
+
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+
+ /* vector of memory regions */
+ netmap_mem_region_t *mem_regions;
+
+ /* first cpu index */
+ u32 input_cpu_first_index;
+
+ /* total cpu count */
+ u32 input_cpu_count;
+} netmap_main_t;
+
+netmap_main_t netmap_main;
+extern vnet_device_class_t netmap_device_class;
+extern vlib_node_registration_t netmap_input_node;
+
+int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index);
+int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name);
+
+
+/* Macros and helper functions from sys/net/netmap_user.h */
+
+#ifdef _NET_NETMAP_H_
+
+#define _NETMAP_OFFSET(type, ptr, offset) \
+ ((type)(void *)((char *)(ptr) + (offset)))
+
+#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
+
+#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index] )
+
+#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
+
+#define NETMAP_BUF(ring, index) \
+ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size )
+
+static inline uint32_t
+nm_ring_next (struct netmap_ring *ring, uint32_t i)
+{
+ return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1);
+}
+
+
+/*
+ * Return 1 if we have pending transmissions in the tx ring.
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
+ */
+static inline int
+nm_tx_pending (struct netmap_ring *ring)
+{
+ return nm_ring_next (ring, ring->tail) != ring->head;
+}
+
+static inline uint32_t
+nm_ring_space (struct netmap_ring *ring)
+{
+ int ret = ring->tail - ring->cur;
+ if (ret < 0)
+ ret += ring->num_slots;
+ return ret;
+}
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/netmap_api.c b/src/vnet/devices/netmap/netmap_api.c
new file mode 100644
index 00000000..9a393b1f
--- /dev/null
+++ b/src/vnet/devices/netmap/netmap_api.c
@@ -0,0 +1,137 @@
+/*
+ *------------------------------------------------------------------
+ * netmap_api.c - netmap api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/devices/netmap/netmap.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(NETMAP_CREATE, netmap_create) \
+_(NETMAP_DELETE, netmap_delete) \
+
+static void
+vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_create_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv =
+ netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr,
+ mp->is_pipe, mp->is_master, 0);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY);
+}
+
+static void
+vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv = netmap_delete_if (vm, if_name);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY);
+}
+
+/*
+ * netmap_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_netmap;
+#undef _
+}
+
+static clib_error_t *
+netmap_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (netmap_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c
new file mode 100644
index 00000000..e120eeae
--- /dev/null
+++ b/src/vnet/devices/netmap/node.c
@@ -0,0 +1,302 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/devices/netmap/net_netmap.h>
+#include <vnet/devices/netmap/netmap.h>
+
+#define foreach_netmap_input_error
+
+typedef enum
+{
+#define _(f,s) NETMAP_INPUT_ERROR_##f,
+ foreach_netmap_input_error
+#undef _
+ NETMAP_INPUT_N_ERROR,
+} netmap_input_error_t;
+
+static char *netmap_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ struct netmap_slot slot;
+} netmap_input_trace_t;
+
+static u8 *
+format_netmap_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "netmap: hw_if_index %d next-index %d",
+ t->hw_if_index, t->next_index);
+ s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u",
+ format_white_space, indent + 2,
+ t->slot.flags, t->slot.len, t->slot.buf_idx);
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
+ vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
+
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = 0;
+}
+
+always_inline uword
+netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, netmap_if_t * nif)
+{
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ netmap_main_t *nm = &netmap_main;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 n_free_bufs;
+ struct netmap_ring *ring;
+ int cur_ring;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (nif->per_interface_next_index != ~0)
+ next_index = nif->per_interface_next_index;
+
+ n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (nm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs +=
+ vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
+ VLIB_FRAME_SIZE);
+ _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
+ }
+
+ cur_ring = nif->first_rx_ring;
+ while (cur_ring <= nif->last_rx_ring && n_free_bufs)
+ {
+ int r = 0;
+ u32 cur_slot_index;
+ ring = NETMAP_RXRING (nif->nifp, cur_ring);
+ r = nm_ring_space (ring);
+
+ if (!r)
+ {
+ cur_ring++;
+ continue;
+ }
+
+ if (r > n_free_bufs)
+ r = n_free_bufs;
+
+ cur_slot_index = ring->cur;
+ while (r)
+ {
+ u32 n_left_to_next;
+ u32 next0 = next_index;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (r && n_left_to_next)
+ {
+ vlib_buffer_t *first_b0 = 0;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0, prev_bi0;
+ u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots;
+ u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots;
+ struct netmap_slot *slot = &ring->slot[cur_slot_index];
+ u32 data_len = slot->len;
+
+ /* prefetch 2 slots in advance */
+ CLIB_PREFETCH (&ring->slot[next2_slot_index],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ /* prefetch start of next packet */
+ CLIB_PREFETCH (NETMAP_BUF
+ (ring, ring->slot[next_slot_index].buf_idx),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ while (data_len && n_free_bufs)
+ {
+ vlib_buffer_t *b0;
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (nm->rx_buffers[thread_index]) - 1;
+ prev_bi0 = bi0;
+ bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer;
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ b0->current_data = 0;
+ clib_memcpy (vlib_buffer_get_current (b0),
+ (u8 *) NETMAP_BUF (ring,
+ slot->buf_idx) + offset,
+ bytes_to_copy);
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ nif->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+ }
+ else
+ buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
+
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+
+ /* trace */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0);
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ if (PREDICT_TRUE (first_b0 != 0))
+ {
+ netmap_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = nif->hw_if_index;
+ memcpy (&tr->slot, slot, sizeof (struct netmap_slot));
+ }
+ }
+
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input_x1 (nif->sw_if_index, &next0,
+ first_b0);
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ next0);
+
+ /* next packet */
+ n_rx_packets++;
+ n_rx_bytes += slot->len;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+ cur_slot_index = next_slot_index;
+
+ r--;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ ring->head = ring->cur = cur_slot_index;
+ cur_ring++;
+ }
+
+ if (n_rx_packets)
+ ioctl (nif->fd, NIOCRXSYNC, NULL);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+
+ return n_rx_packets;
+}
+
+static uword
+netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ int i;
+ u32 n_rx_packets = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nmi;
+
+ for (i = 0; i < vec_len (nm->interfaces); i++)
+ {
+ nmi = vec_elt_at_index (nm->interfaces, i);
+ if (nmi->is_admin_up &&
+ (i % nm->input_cpu_count) ==
+ (thread_index - nm->input_cpu_first_index))
+ n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (netmap_input_node) = {
+ .function = netmap_input_fn,
+ .name = "netmap-input",
+ .sibling_of = "device-input",
+ .format_trace = format_netmap_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = NETMAP_INPUT_N_ERROR,
+ .error_strings = netmap_input_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (netmap_input_node, netmap_input_fn)
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c
new file mode 100644
index 00000000..b7a8db05
--- /dev/null
+++ b/src/vnet/devices/ssvm/node.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ssvm_eth.h"
+
+vlib_node_registration_t ssvm_eth_input_node;
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+} ssvm_eth_input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ssvm_eth_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ssvm_eth_input_trace_t *t = va_arg (*args, ssvm_eth_input_trace_t *);
+
+ s = format (s, "SSVM_ETH_INPUT: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ssvm_eth_input_node;
+
+#define foreach_ssvm_eth_input_error \
+_(NO_BUFFERS, "Rx packet drops (no buffers)")
+
+typedef enum
+{
+#define _(sym,str) SSVM_ETH_INPUT_ERROR_##sym,
+ foreach_ssvm_eth_input_error
+#undef _
+ SSVM_ETH_INPUT_N_ERROR,
+} ssvm_eth_input_error_t;
+
+static char *ssvm_eth_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ssvm_eth_input_error
+#undef _
+};
+
+typedef enum
+{
+ SSVM_ETH_INPUT_NEXT_DROP,
+ SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT,
+ SSVM_ETH_INPUT_NEXT_IP4_INPUT,
+ SSVM_ETH_INPUT_NEXT_IP6_INPUT,
+ SSVM_ETH_INPUT_NEXT_MPLS_INPUT,
+ SSVM_ETH_INPUT_N_NEXT,
+} ssvm_eth_input_next_t;
+
+static inline uword
+ssvm_eth_device_input (ssvm_eth_main_t * em,
+ ssvm_private_t * intfc, vlib_node_runtime_t * node)
+{
+ ssvm_shared_header_t *sh = intfc->sh;
+ vlib_main_t *vm = em->vlib_main;
+ unix_shared_memory_queue_t *q;
+ ssvm_eth_queue_elt_t *elt, *elts;
+ u32 elt_index;
+ u32 my_pid = intfc->my_pid;
+ int rx_queue_index;
+ u32 n_to_alloc = VLIB_FRAME_SIZE * 2;
+ u32 n_allocated, n_present_in_cache;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ vlib_buffer_free_list_t *fl;
+ u32 n_left_to_next, *to_next;
+ u32 next0;
+ u32 n_buffers;
+ u32 n_available;
+ u32 bi0, saved_bi0;
+ vlib_buffer_t *b0, *prev;
+ u32 saved_cache_size = 0;
+ ethernet_header_t *eh0;
+ u16 type0;
+ u32 n_rx_bytes = 0, l3_offset0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node);
+ volatile u32 *lock;
+ u32 *elt_indices;
+ uword n_trace = vlib_get_trace_count (vm, node);
+
+ /* Either side down? buh-bye... */
+ if (pointer_to_uword (sh->opaque[MASTER_ADMIN_STATE_INDEX]) == 0 ||
+ pointer_to_uword (sh->opaque[SLAVE_ADMIN_STATE_INDEX]) == 0)
+ return 0;
+
+ if (intfc->i_am_master)
+ q = (unix_shared_memory_queue_t *) (sh->opaque[TO_MASTER_Q_INDEX]);
+ else
+ q = (unix_shared_memory_queue_t *) (sh->opaque[TO_SLAVE_Q_INDEX]);
+
+ /* Nothing to do? */
+ if (q->cursize == 0)
+ return 0;
+
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ vec_reset_length (intfc->rx_queue);
+
+ lock = (u32 *) q;
+ while (__sync_lock_test_and_set (lock, 1))
+ ;
+ while (q->cursize > 0)
+ {
+ unix_shared_memory_queue_sub_raw (q, (u8 *) & elt_index);
+ ASSERT (elt_index < 2048);
+ vec_add1 (intfc->rx_queue, elt_index);
+ }
+ CLIB_MEMORY_BARRIER ();
+ *lock = 0;
+
+ n_present_in_cache = vec_len (em->buffer_cache);
+
+ if (vec_len (em->buffer_cache) < vec_len (intfc->rx_queue) * 2)
+ {
+ vec_validate (em->buffer_cache,
+ n_to_alloc + vec_len (em->buffer_cache) - 1);
+ n_allocated =
+ vlib_buffer_alloc (vm, &em->buffer_cache[n_present_in_cache],
+ n_to_alloc);
+
+ n_present_in_cache += n_allocated;
+ _vec_len (em->buffer_cache) = n_present_in_cache;
+ }
+
+ elts = (ssvm_eth_queue_elt_t *) (sh->opaque[CHUNK_POOL_INDEX]);
+
+ n_buffers = vec_len (intfc->rx_queue);
+ rx_queue_index = 0;
+
+ while (n_buffers > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_buffers > 0 && n_left_to_next > 0)
+ {
+ elt = elts + intfc->rx_queue[rx_queue_index];
+
+ saved_cache_size = n_present_in_cache;
+ if (PREDICT_FALSE (saved_cache_size == 0))
+ {
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ goto out;
+ }
+ saved_bi0 = bi0 = em->buffer_cache[--n_present_in_cache];
+ b0 = vlib_get_buffer (vm, bi0);
+ prev = 0;
+
+ while (1)
+ {
+ vlib_buffer_init_for_free_list (b0, fl);
+
+ b0->current_data = elt->current_data_hint;
+ b0->current_length = elt->length_this_buffer;
+ b0->total_length_not_including_first_buffer =
+ elt->total_length_not_including_first_buffer;
+
+ clib_memcpy (b0->data + b0->current_data, elt->data,
+ b0->current_length);
+
+ if (PREDICT_FALSE (prev != 0))
+ prev->next_buffer = bi0;
+
+ if (PREDICT_FALSE (elt->flags & SSVM_BUFFER_NEXT_PRESENT))
+ {
+ prev = b0;
+ if (PREDICT_FALSE (n_present_in_cache == 0))
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ goto out;
+ }
+ bi0 = em->buffer_cache[--n_present_in_cache];
+ b0 = vlib_get_buffer (vm, bi0);
+ }
+ else
+ break;
+ }
+
+ saved_cache_size = n_present_in_cache;
+
+ to_next[0] = saved_bi0;
+ to_next++;
+ n_left_to_next--;
+
+ b0 = vlib_get_buffer (vm, saved_bi0);
+ eh0 = vlib_buffer_get_current (b0);
+
+ type0 = clib_net_to_host_u16 (eh0->type);
+
+ next0 = SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT;
+
+ if (type0 == ETHERNET_TYPE_IP4)
+ next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT;
+ else if (type0 == ETHERNET_TYPE_IP6)
+ next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT;
+ else if (type0 == ETHERNET_TYPE_MPLS)
+ next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT;
+
+ l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT ||
+ next0 == SSVM_ETH_INPUT_NEXT_IP6_INPUT ||
+ next0 == SSVM_ETH_INPUT_NEXT_MPLS_INPUT) ?
+ sizeof (ethernet_header_t) : 0);
+
+ n_rx_bytes += b0->current_length
+ + b0->total_length_not_including_first_buffer;
+
+ b0->current_data += l3_offset0;
+ b0->current_length -= l3_offset0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = intfc->vlib_hw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See main.c...
+ */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ ssvm_eth_input_trace_t *tr;
+
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 1);
+ vlib_set_trace_count (vm, node, --n_trace);
+
+ tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+
+ tr->next_index = next0;
+ tr->sw_if_index = intfc->vlib_hw_if_index;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ n_buffers--;
+ rx_queue_index++;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+out:
+ if (em->buffer_cache)
+ _vec_len (em->buffer_cache) = saved_cache_size;
+ else
+ ASSERT (saved_cache_size == 0);
+
+ ssvm_lock (sh, my_pid, 2);
+
+ ASSERT (vec_len (intfc->rx_queue) > 0);
+
+ n_available = (u32) pointer_to_uword (sh->opaque[CHUNK_POOL_NFREE]);
+ elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]);
+
+ clib_memcpy (&elt_indices[n_available], intfc->rx_queue,
+ vec_len (intfc->rx_queue) * sizeof (u32));
+
+ n_available += vec_len (intfc->rx_queue);
+ sh->opaque[CHUNK_POOL_NFREE] = uword_to_pointer (n_available, void *);
+
+ ssvm_unlock (sh);
+
+ vlib_error_count (vm, node->node_index, SSVM_ETH_INPUT_ERROR_NO_BUFFERS,
+ n_buffers);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, thread_index,
+ intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, rx_queue_index);
+
+ return rx_queue_index;
+}
+
+static uword
+ssvm_eth_input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+ ssvm_private_t *intfc;
+ uword n_rx_packets = 0;
+
+ vec_foreach (intfc, em->intfcs)
+ {
+ n_rx_packets += ssvm_eth_device_input (em, intfc, node);
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ssvm_eth_input_node) = {
+ .function = ssvm_eth_input_node_fn,
+ .name = "ssvm_eth_input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ssvm_eth_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ .n_errors = ARRAY_LEN(ssvm_eth_input_error_strings),
+ .error_strings = ssvm_eth_input_error_strings,
+
+ .n_next_nodes = SSVM_ETH_INPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SSVM_ETH_INPUT_NEXT_DROP] = "error-drop",
+ [SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [SSVM_ETH_INPUT_NEXT_IP4_INPUT] = "ip4-input",
+ [SSVM_ETH_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ssvm_eth_input_node, ssvm_eth_input_node_fn)
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/ssvm/ssvm_eth.c b/src/vnet/devices/ssvm/ssvm_eth.c
new file mode 100644
index 00000000..db4fafa9
--- /dev/null
+++ b/src/vnet/devices/ssvm/ssvm_eth.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ssvm_eth.h"
+
+ssvm_eth_main_t ssvm_eth_main;
+
+#define foreach_ssvm_eth_tx_func_error \
+_(RING_FULL, "Tx packet drops (ring full)") \
+_(NO_BUFFERS, "Tx packet drops (no buffers)") \
+_(ADMIN_DOWN, "Tx packet drops (admin down)")
+
+typedef enum
+{
+#define _(f,s) SSVM_ETH_TX_ERROR_##f,
+ foreach_ssvm_eth_tx_func_error
+#undef _
+ SSVM_ETH_TX_N_ERROR,
+} ssvm_eth_tx_func_error_t;
+
+static u32 ssvm_eth_flag_change (vnet_main_t * vnm,
+ vnet_hw_interface_t * hi, u32 flags);
+
+int
+ssvm_eth_create (ssvm_eth_main_t * em, u8 * name, int is_master)
+{
+ ssvm_private_t *intfc;
+ void *oldheap;
+ clib_error_t *e;
+ unix_shared_memory_queue_t *q;
+ ssvm_shared_header_t *sh;
+ ssvm_eth_queue_elt_t *elts;
+ u32 *elt_indices;
+ u8 enet_addr[6];
+ int i, rv;
+
+ vec_add2 (em->intfcs, intfc, 1);
+
+ intfc->ssvm_size = em->segment_size;
+ intfc->i_am_master = 1;
+ intfc->name = name;
+ intfc->my_pid = getpid ();
+ if (is_master == 0)
+ {
+ rv = ssvm_slave_init (intfc, 20 /* timeout in seconds */ );
+ if (rv < 0)
+ return rv;
+ goto create_vnet_interface;
+ }
+
+ intfc->requested_va = em->next_base_va;
+ em->next_base_va += em->segment_size;
+ rv = ssvm_master_init (intfc, intfc - em->intfcs /* master index */ );
+
+ if (rv < 0)
+ return rv;
+
+ /* OK, segment created, set up queues and so forth. */
+
+ sh = intfc->sh;
+ oldheap = ssvm_push_heap (sh);
+
+ q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32),
+ 0 /* consumer pid not interesting */ ,
+ 0 /* signal not sent */ );
+ sh->opaque[TO_MASTER_Q_INDEX] = (void *) q;
+ q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32),
+ 0 /* consumer pid not interesting */ ,
+ 0 /* signal not sent */ );
+ sh->opaque[TO_SLAVE_Q_INDEX] = (void *) q;
+
+ /*
+ * Preallocate the requested number of buffer chunks
+ * There must be a better way to do this, etc.
+ * Add some slop to avoid pool reallocation, which will not go well
+ */
+ elts = 0;
+ elt_indices = 0;
+
+ vec_validate_aligned (elts, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (elt_indices, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES);
+
+ for (i = 0; i < em->nbuffers; i++)
+ elt_indices[i] = i;
+
+ sh->opaque[CHUNK_POOL_INDEX] = (void *) elts;
+ sh->opaque[CHUNK_POOL_FREELIST_INDEX] = (void *) elt_indices;
+ sh->opaque[CHUNK_POOL_NFREE] = (void *) (uword) em->nbuffers;
+
+ ssvm_pop_heap (oldheap);
+
+create_vnet_interface:
+
+ sh = intfc->sh;
+
+ memset (enet_addr, 0, sizeof (enet_addr));
+ enet_addr[0] = 2;
+ enet_addr[1] = 0xFE;
+ enet_addr[2] = is_master;
+ enet_addr[5] = sh->master_index;
+
+ e = ethernet_register_interface
+ (em->vnet_main, ssvm_eth_device_class.index, intfc - em->intfcs,
+ /* ethernet address */ enet_addr,
+ &intfc->vlib_hw_if_index, ssvm_eth_flag_change);
+
+ if (e)
+ {
+ clib_error_report (e);
+ /* $$$$ unmap offending region? */
+ return VNET_API_ERROR_INVALID_INTERFACE;
+ }
+
+ /* Declare link up */
+ vnet_hw_interface_set_flags (em->vnet_main, intfc->vlib_hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ /* Let the games begin... */
+ if (is_master)
+ sh->ready = 1;
+ return 0;
+}
+
+static clib_error_t *
+ssvm_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ u8 *name;
+ int is_master = 1;
+ int i, rv;
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "base-va %llx", &em->next_base_va))
+ ;
+ else if (unformat (input, "segment-size %lld", &em->segment_size))
+ em->segment_size = 1ULL << (max_log2 (em->segment_size));
+ else if (unformat (input, "nbuffers %lld", &em->nbuffers))
+ ;
+ else if (unformat (input, "queue-elts %lld", &em->queue_elts))
+ ;
+ else if (unformat (input, "slave"))
+ is_master = 0;
+ else if (unformat (input, "%s", &name))
+ vec_add1 (em->names, name);
+ else
+ break;
+ }
+
+ /* No configured instances, we're done... */
+ if (vec_len (em->names) == 0)
+ return 0;
+
+ for (i = 0; i < vec_len (em->names); i++)
+ {
+ rv = ssvm_eth_create (em, em->names[i], is_master);
+ if (rv < 0)
+ return clib_error_return (0, "ssvm_eth_create '%s' failed, error %d",
+ em->names[i], rv);
+ }
+
+ vlib_node_set_state (vm, ssvm_eth_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (ssvm_config, "ssvm_eth");
+
+
+static clib_error_t *
+ssvm_eth_init (vlib_main_t * vm)
+{
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+
+ if (((sizeof (ssvm_eth_queue_elt_t) / CLIB_CACHE_LINE_BYTES)
+ * CLIB_CACHE_LINE_BYTES) != sizeof (ssvm_eth_queue_elt_t))
+ clib_warning ("ssvm_eth_queue_elt_t size %d not a multiple of %d",
+ sizeof (ssvm_eth_queue_elt_t), CLIB_CACHE_LINE_BYTES);
+
+ em->vlib_main = vm;
+ em->vnet_main = vnet_get_main ();
+ em->elog_main = &vm->elog_main;
+
+ /* default config param values... */
+
+ em->next_base_va = 0x600000000ULL;
+ /*
+ * Allocate 2 full superframes in each dir (256 x 2 x 2 x 2048 bytes),
+ * 2mb; double that so we have plenty of space... 4mb
+ */
+ em->segment_size = 8 << 20;
+ em->nbuffers = 1024;
+ em->queue_elts = 512;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ssvm_eth_init);
+
+static char *ssvm_eth_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_ssvm_eth_tx_func_error
+#undef _
+};
+
+static u8 *
+format_ssvm_eth_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+
+ s = format (s, "ssvmEthernet%d", i);
+ return s;
+}
+
+static u8 *
+format_ssvm_eth_device (u8 * s, va_list * args)
+{
+ s = format (s, "SSVM Ethernet");
+ return s;
+}
+
+static u8 *
+format_ssvm_eth_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+
+static uword
+ssvm_eth_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ ssvm_private_t *intfc = vec_elt_at_index (em->intfcs, rd->dev_instance);
+ ssvm_shared_header_t *sh = intfc->sh;
+ unix_shared_memory_queue_t *q;
+ u32 *from;
+ u32 n_left;
+ ssvm_eth_queue_elt_t *elts, *elt, *prev_elt;
+ u32 my_pid = intfc->my_pid;
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u32 size_this_buffer;
+ u32 chunks_this_buffer;
+ u8 i_am_master = intfc->i_am_master;
+ u32 elt_index;
+ int is_ring_full, interface_down;
+ int i;
+ volatile u32 *queue_lock;
+ u32 n_to_alloc = VLIB_FRAME_SIZE;
+ u32 n_allocated, n_present_in_cache, n_available;
+ u32 *elt_indices;
+
+ if (i_am_master)
+ q = (unix_shared_memory_queue_t *) sh->opaque[TO_SLAVE_Q_INDEX];
+ else
+ q = (unix_shared_memory_queue_t *) sh->opaque[TO_MASTER_Q_INDEX];
+
+ queue_lock = (u32 *) q;
+
+ from = vlib_frame_vector_args (f);
+ n_left = f->n_vectors;
+ is_ring_full = 0;
+ interface_down = 0;
+
+ n_present_in_cache = vec_len (em->chunk_cache);
+
+ /* admin / link up/down check */
+ if (sh->opaque[MASTER_ADMIN_STATE_INDEX] == 0 ||
+ sh->opaque[SLAVE_ADMIN_STATE_INDEX] == 0)
+ {
+ interface_down = 1;
+ goto out;
+ }
+
+ ssvm_lock (sh, my_pid, 1);
+
+ elts = (ssvm_eth_queue_elt_t *) (sh->opaque[CHUNK_POOL_INDEX]);
+ elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]);
+ n_available = (u32) pointer_to_uword (sh->opaque[CHUNK_POOL_NFREE]);
+
+ if (n_present_in_cache < n_left * 2)
+ {
+ vec_validate (em->chunk_cache, n_to_alloc + n_present_in_cache - 1);
+
+ n_allocated = n_to_alloc < n_available ? n_to_alloc : n_available;
+
+ if (PREDICT_TRUE (n_allocated > 0))
+ {
+ clib_memcpy (&em->chunk_cache[n_present_in_cache],
+ &elt_indices[n_available - n_allocated],
+ sizeof (u32) * n_allocated);
+ }
+
+ n_present_in_cache += n_allocated;
+ n_available -= n_allocated;
+ sh->opaque[CHUNK_POOL_NFREE] = uword_to_pointer (n_available, void *);
+ _vec_len (em->chunk_cache) = n_present_in_cache;
+ }
+
+ ssvm_unlock (sh);
+
+ while (n_left)
+ {
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ size_this_buffer = vlib_buffer_length_in_chain (vm, b0);
+ chunks_this_buffer = (size_this_buffer + (SSVM_BUFFER_SIZE - 1))
+ / SSVM_BUFFER_SIZE;
+
+ /* If we're not going to be able to enqueue the buffer, tail drop. */
+ if (q->cursize >= q->maxsize)
+ {
+ is_ring_full = 1;
+ break;
+ }
+
+ prev_elt = 0;
+ elt_index = ~0;
+ for (i = 0; i < chunks_this_buffer; i++)
+ {
+ if (PREDICT_FALSE (n_present_in_cache == 0))
+ goto out;
+
+ elt_index = em->chunk_cache[--n_present_in_cache];
+ elt = elts + elt_index;
+
+ elt->type = SSVM_PACKET_TYPE;
+ elt->flags = 0;
+ elt->total_length_not_including_first_buffer =
+ b0->total_length_not_including_first_buffer;
+ elt->length_this_buffer = b0->current_length;
+ elt->current_data_hint = b0->current_data;
+ elt->owner = !i_am_master;
+ elt->tag = 1;
+
+ clib_memcpy (elt->data, b0->data + b0->current_data,
+ b0->current_length);
+
+ if (PREDICT_FALSE (prev_elt != 0))
+ prev_elt->next_index = elt - elts;
+
+ if (PREDICT_FALSE (i < (chunks_this_buffer - 1)))
+ {
+ elt->flags = SSVM_BUFFER_NEXT_PRESENT;
+ ASSERT (b0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ }
+ prev_elt = elt;
+ }
+
+ while (__sync_lock_test_and_set (queue_lock, 1))
+ ;
+
+ unix_shared_memory_queue_add_raw (q, (u8 *) & elt_index);
+ CLIB_MEMORY_BARRIER ();
+ *queue_lock = 0;
+
+ from++;
+ n_left--;
+ }
+
+out:
+ if (PREDICT_FALSE (n_left))
+ {
+ if (is_ring_full)
+ vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_RING_FULL,
+ n_left);
+ else if (interface_down)
+ vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_ADMIN_DOWN,
+ n_left);
+ else
+ vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_NO_BUFFERS,
+ n_left);
+
+ vlib_buffer_free (vm, from, n_left);
+ }
+ else
+ vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors);
+
+ if (PREDICT_TRUE (vec_len (em->chunk_cache)))
+ _vec_len (em->chunk_cache) = n_present_in_cache;
+
+ return f->n_vectors;
+}
+
+static void
+ssvm_eth_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+ssvm_eth_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+ ssvm_private_t *intfc = vec_elt_at_index (em->intfcs, hif->dev_instance);
+ ssvm_shared_header_t *sh;
+
+ /* publish link-state in shared-memory, to discourage buffer-wasting */
+ sh = intfc->sh;
+ if (intfc->i_am_master)
+ sh->opaque[MASTER_ADMIN_STATE_INDEX] = (void *) is_up;
+ else
+ sh->opaque[SLAVE_ADMIN_STATE_INDEX] = (void *) is_up;
+
+ return 0;
+}
+
+static clib_error_t *
+ssvm_eth_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/*
+ * Dynamically redirect all pkts from a specific interface
+ * to the specified node
+ */
+static void
+ssvm_eth_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ ssvm_private_t *intfc = pool_elt_at_index (em->intfcs, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ intfc->per_interface_next_index = node_index;
+ return;
+ }
+
+ intfc->per_interface_next_index =
+ vlib_node_add_next (em->vlib_main, ssvm_eth_input_node.index, node_index);
+}
+
+static u32
+ssvm_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ssvm_eth_device_class) = {
+ .name = "ssvm-eth",
+ .tx_function = ssvm_eth_interface_tx,
+ .tx_function_n_errors = SSVM_ETH_TX_N_ERROR,
+ .tx_function_error_strings = ssvm_eth_tx_func_error_strings,
+ .format_device_name = format_ssvm_eth_device_name,
+ .format_device = format_ssvm_eth_device,
+ .format_tx_trace = format_ssvm_eth_tx_trace,
+ .clear_counters = ssvm_eth_clear_hw_interface_counters,
+ .admin_up_down_function = ssvm_eth_interface_admin_up_down,
+ .subif_add_del_function = ssvm_eth_subif_add_del_function,
+ .rx_redirect_to_node = ssvm_eth_set_interface_next_node,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ssvm_eth_device_class,
+ ssvm_eth_interface_tx)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/ssvm/ssvm_eth.h b/src/vnet/devices/ssvm/ssvm_eth.h
new file mode 100644
index 00000000..f877df3c
--- /dev/null
+++ b/src/vnet/devices/ssvm/ssvm_eth.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_eth_h__
+#define __included_ssvm_eth_h__
+
+#include <vnet/vnet.h>
+
+#include <vppinfra/elog.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/elog.h>
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+#include <svm/ssvm.h>
+
+extern vnet_device_class_t ssvm_eth_device_class;
+extern vlib_node_registration_t ssvm_eth_input_node;
+
+#define SSVM_BUFFER_SIZE \
+ (VLIB_BUFFER_DATA_SIZE + VLIB_BUFFER_PRE_DATA_SIZE)
+#define SSVM_PACKET_TYPE 1
+
+typedef struct
+{
+ /* Type of queue element */
+ u8 type;
+ u8 flags;
+#define SSVM_BUFFER_NEXT_PRESENT (1<<0)
+ u8 owner;
+ u8 tag;
+ i16 current_data_hint;
+ u16 length_this_buffer;
+ u16 total_length_not_including_first_buffer;
+ u16 pad;
+ u32 next_index;
+ /* offset 16 */
+ u8 data[SSVM_BUFFER_SIZE];
+ /* pad to an even multiple of 64 octets */
+ u8 pad2[CLIB_CACHE_LINE_BYTES - 16];
+} ssvm_eth_queue_elt_t;
+
+typedef struct
+{
+ /* vector of point-to-point connections */
+ ssvm_private_t *intfcs;
+
+ u32 *buffer_cache;
+ u32 *chunk_cache;
+
+ /* Configurable parameters */
+ /* base address for next placement */
+ u64 next_base_va;
+ u64 segment_size;
+ u64 nbuffers;
+ u64 queue_elts;
+
+ /* Segment names */
+ u8 **names;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ elog_main_t *elog_main;
+} ssvm_eth_main_t;
+
+ssvm_eth_main_t ssvm_eth_main;
+
+typedef enum
+{
+ CHUNK_POOL_FREELIST_INDEX = 0,
+ CHUNK_POOL_INDEX,
+ CHUNK_POOL_NFREE,
+ TO_MASTER_Q_INDEX,
+ TO_SLAVE_Q_INDEX,
+ MASTER_ADMIN_STATE_INDEX,
+ SLAVE_ADMIN_STATE_INDEX,
+} ssvm_eth_opaque_index_t;
+
+/*
+ * debug scaffolding.
+ */
+static inline void
+ssvm_eth_validate_freelists (int need_lock)
+{
+#if CLIB_DEBUG > 0
+ ssvm_eth_main_t *em = &ssvm_eth_main;
+ ssvm_private_t *intfc;
+ ssvm_shared_header_t *sh;
+ u32 *elt_indices;
+ u32 n_available;
+ int i;
+
+ for (i = 0; i < vec_len (em->intfcs); i++)
+ {
+ intfc = em->intfcs + i;
+ sh = intfc->sh;
+ u32 my_pid = intfc->my_pid;
+
+ if (need_lock)
+ ssvm_lock (sh, my_pid, 15);
+
+ elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]);
+ n_available = (u32) (uword) (sh->opaque[CHUNK_POOL_NFREE]);
+
+ for (i = 0; i < n_available; i++)
+ ASSERT (elt_indices[i] < 2048);
+
+ if (need_lock)
+ ssvm_unlock (sh);
+ }
+#endif
+}
+
+#endif /* __included_ssvm_eth_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/dir.dox b/src/vnet/devices/virtio/dir.dox
new file mode 100644
index 00000000..50150799
--- /dev/null
+++ b/src/vnet/devices/virtio/dir.dox
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief vHost User Interface Implementation.
+
+This directory contains the source code for vHost User driver.
+
+*/
+/*? %%clicmd:group_label vHost User %% ?*/
+/*? %%syscfg:group_label vHost User %% ?*/
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
new file mode 100644
index 00000000..19ad9ab1
--- /dev/null
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -0,0 +1,3671 @@
+/*
+ *------------------------------------------------------------------
+ * vhost.c - vhost-user
+ *
+ * Copyright (c) 2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+#include <sys/vfs.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/devices/virtio/vhost-user.h>
+
+/**
+ * @file
+ * @brief vHost User Device Driver.
+ *
+ * This file contains the source code for vHost User interface.
+ */
+
+
+#define VHOST_DEBUG_VQ 0
+
+#define DBG_SOCK(args...) \
+ { \
+ vhost_user_main_t *_vum = &vhost_user_main; \
+ if (_vum->debug) \
+ clib_warning(args); \
+ };
+
+#if VHOST_DEBUG_VQ == 1
+#define DBG_VQ(args...) clib_warning(args);
+#else
+#define DBG_VQ(args...)
+#endif
+
+/*
+ * When an RX queue is down but active, received packets
+ * must be discarded. This value controls up to how many
+ * packets will be discarded during each round.
+ */
+#define VHOST_USER_DOWN_DISCARD_COUNT 256
+
+/*
+ * When the number of available buffers gets under this threshold,
+ * RX node will start discarding packets.
+ */
+#define VHOST_USER_RX_BUFFER_STARVATION 32
+
+/*
+ * On the receive side, the host should free descriptors as soon
+ * as possible in order to avoid TX drop in the VM.
+ * This value controls the number of copy operations that are stacked
+ * before copy is done for all and descriptors are given back to
+ * the guest.
+ * The value 64 was obtained by testing (48 and 128 were not as good).
+ */
+#define VHOST_USER_RX_COPY_THRESHOLD 64
+/*
+ * On the transmit side, we keep processing the buffers from vlib in the while
+ * loop and prepare the copy order to be executed later. However, the static
+ * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
+ * entries. In order to not corrupt memory, we have to do the copy when the
+ * static array reaches the copy threshold. We subtract 40 in case the code
+ * goes into the inner loop for a maximum of 64k frames which may require
+ * more array entries.
+ */
+#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40)
+
+#define UNIX_GET_FD(unixfd_idx) \
+ (unixfd_idx != ~0) ? \
+ pool_elt_at_index (file_main.file_pool, \
+ unixfd_idx)->file_descriptor : -1;
+
+#define foreach_virtio_trace_flags \
+ _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
+ _ (SINGLE_DESC, 1, "Single descriptor packet") \
+ _ (INDIRECT, 2, "Indirect descriptor") \
+ _ (MAP_ERROR, 4, "Memory mapping error")
+
+typedef enum
+{
+#define _(n,i,s) VIRTIO_TRACE_F_##n,
+ foreach_virtio_trace_flags
+#undef _
+} virtio_trace_flag_t;
+
+vlib_node_registration_t vhost_user_input_node;
+
+#define foreach_vhost_user_tx_func_error \
+ _(NONE, "no error") \
+ _(NOT_READY, "vhost vring not ready") \
+ _(DOWN, "vhost interface is down") \
+ _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
+ _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
+ _(MMAP_FAIL, "mmap failure") \
+ _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
+
+typedef enum
+{
+#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
+ foreach_vhost_user_tx_func_error
+#undef _
+ VHOST_USER_TX_FUNC_N_ERROR,
+} vhost_user_tx_func_error_t;
+
+static char *vhost_user_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_vhost_user_tx_func_error
+#undef _
+};
+
+#define foreach_vhost_user_input_func_error \
+ _(NO_ERROR, "no error") \
+ _(NO_BUFFER, "no available buffer") \
+ _(MMAP_FAIL, "mmap failure") \
+ _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
+ _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
+ _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
+
+typedef enum
+{
+#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
+ foreach_vhost_user_input_func_error
+#undef _
+ VHOST_USER_INPUT_FUNC_N_ERROR,
+} vhost_user_input_func_error_t;
+
+static char *vhost_user_input_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_vhost_user_input_func_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+static vhost_user_main_t vhost_user_main = {
+ .mtu_bytes = 1518,
+};
+
+VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
+ .name = "vhost-user",
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_vhost_user_interface_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
+ show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ s = format (s, "VirtualEthernet0/0/%d", i);
+ return s;
+}
+
+static int
+vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
+{
+ // FIXME: check if the new dev instance is already used
+ vhost_user_main_t *vum = &vhost_user_main;
+ vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
+ hi->dev_instance, ~0);
+
+ vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
+ new_dev_instance;
+
+ DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d",
+ hi->dev_instance, new_dev_instance);
+
+ return 0;
+}
+
+static_always_inline void *
+map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
+{
+ int i = *hint;
+ if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
+ ((vui->regions[i].guest_phys_addr +
+ vui->regions[i].memory_size) > addr)))
+ {
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+#if __SSE4_2__
+ __m128i rl, rh, al, ah, r;
+ al = _mm_set1_epi64x (addr + 1);
+ ah = _mm_set1_epi64x (addr);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_and_si128 (rl, rh);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
+
+ r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
+ i = __builtin_ctzll (_mm_movemask_epi8 (r) |
+ (1 << VHOST_MEMORY_MAX_NREGIONS));
+
+ if (i < vui->nregions)
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+
+#else
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if ((vui->regions[i].guest_phys_addr <= addr) &&
+ ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
+ addr))
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+ }
+#endif
+ DBG_VQ ("failed to map guest mem addr %llx", addr);
+ *hint = 0;
+ return 0;
+}
+
+static inline void *
+map_user_mem (vhost_user_intf_t * vui, uword addr)
+{
+ int i;
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if ((vui->regions[i].userspace_addr <= addr) &&
+ ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
+ addr))
+ {
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].userspace_addr);
+ }
+ }
+ return 0;
+}
+
+static long
+get_huge_page_size (int fd)
+{
+ struct statfs s;
+ fstatfs (fd, &s);
+ return s.f_bsize;
+}
+
+static void
+unmap_all_mem_regions (vhost_user_intf_t * vui)
+{
+ int i, r;
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if (vui->region_mmap_addr[i] != (void *) -1)
+ {
+
+ long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
+
+ ssize_t map_sz = (vui->regions[i].memory_size +
+ vui->regions[i].mmap_offset +
+ page_sz - 1) & ~(page_sz - 1);
+
+ r =
+ munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
+ map_sz);
+
+ DBG_SOCK
+ ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
+ vui->region_mmap_addr[i], map_sz, page_sz);
+
+ vui->region_mmap_addr[i] = (void *) -1;
+
+ if (r == -1)
+ {
+ clib_warning ("failed to unmap memory region (errno %d)",
+ errno);
+ }
+ close (vui->region_mmap_fd[i]);
+ }
+ }
+ vui->nregions = 0;
+}
+
+static void
+vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
+{
+ //Let's try to assign one queue to each thread
+ u32 qid = 0;
+ u32 thread_index = 0;
+ vui->use_tx_spinlock = 0;
+ while (1)
+ {
+ for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
+ {
+ vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
+ if (!rxvq->started || !rxvq->enabled)
+ continue;
+
+ vui->per_cpu_tx_qid[thread_index] = qid;
+ thread_index++;
+ if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
+ return;
+ }
+ //We need to loop, meaning the spinlock has to be used
+ vui->use_tx_spinlock = 1;
+ if (thread_index == 0)
+ {
+ //Could not find a single valid one
+ for (thread_index = 0;
+ thread_index < vlib_get_thread_main ()->n_vlib_mains;
+ thread_index++)
+ {
+ vui->per_cpu_tx_qid[thread_index] = 0;
+ }
+ return;
+ }
+ }
+}
+
+/**
+ * @brief Unassign existing interface/queue to thread mappings and re-assign
+ * new interface/queue to thread mappings
+ */
+static void
+vhost_user_rx_thread_placement ()
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ vhost_user_vring_t *txvq;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 qid;
+ int rv;
+ u16 *queue;
+
+ // Scrap all existing mappings for all interfaces/queues
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vec_foreach (queue, vui->rx_queues)
+ {
+ rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
+ *queue);
+ if (rv)
+ clib_warning ("Warning: unable to unassign interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
+ }
+ vec_reset_length (vui->rx_queues);
+ });
+ /* *INDENT-ON* */
+
+ // Create the rx_queues for all interfaces
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
+ {
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ if (txvq->started)
+ {
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
+ /* Set polling as the default */
+ txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ vec_add1 (vui->rx_queues, qid);
+ }
+ }
+ });
+ /* *INDENT-ON* */
+
+ // Assign new mappings for all interfaces/queues
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
+ vhost_user_input_node.index);
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
+ ~0);
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
+ txvq->mode);
+ if (rv)
+ clib_warning ("Warning: unable to set rx mode for interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
+ }
+ });
+ /* *INDENT-ON* */
+}
+
+/** @brief Returns whether at least one TX and one RX vring are enabled */
+int
+vhost_user_intf_ready (vhost_user_intf_t * vui)
+{
+ int i, found[2] = { }; //RX + TX
+
+ for (i = 0; i < VHOST_VRING_MAX_N; i++)
+ if (vui->vrings[i].started && vui->vrings[i].enabled)
+ found[i & 1] = 1;
+
+ return found[0] && found[1];
+}
+
+static void
+vhost_user_update_iface_state (vhost_user_intf_t * vui)
+{
+ /* if we have pointers to descriptor table, go up */
+ int is_up = vhost_user_intf_ready (vui);
+ if (is_up != vui->is_up)
+ {
+ DBG_SOCK ("interface %d %s", vui->sw_if_index,
+ is_up ? "ready" : "down");
+ vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
+ is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP :
+ 0);
+ vui->is_up = is_up;
+ }
+ vhost_user_rx_thread_placement ();
+ vhost_user_tx_thread_placement (vui);
+}
+
+static void
+vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
+{
+ u32 qid;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ qid = ifq & 0xff;
+ if ((qid & 1) == 0)
+ /* Only care about the odd number, or TX, virtqueue */
+ return;
+
+ if (vhost_user_intf_ready (vui))
+ // qid >> 1 is to convert virtqueue number to vring queue index
+ vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
+}
+
+static clib_error_t *
+vhost_user_callfd_read_ready (clib_file_t * uf)
+{
+ __attribute__ ((unused)) int n;
+ u8 buff[8];
+
+ n = read (uf->file_descriptor, ((char *) &buff), 8);
+
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_kickfd_read_ready (clib_file_t * uf)
+{
+ __attribute__ ((unused)) int n;
+ u8 buff[8];
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
+ uf->private_data >> 8);
+ u32 qid = uf->private_data & 0xff;
+
+ n = read (uf->file_descriptor, ((char *) &buff), 8);
+ DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
+ if (!vui->vrings[qid].started ||
+ (vhost_user_intf_ready (vui) != vui->is_up))
+ {
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+ vui->vrings[qid].started = 1;
+ vhost_user_update_iface_state (vui);
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ }
+
+ vhost_user_set_interrupt_pending (vui, uf->private_data);
+ return 0;
+}
+
+/**
+ * @brief Try once to lock the vring
+ * @return 0 on success, non-zero on failure.
+ */
+static inline int
+vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
+{
+ return __sync_lock_test_and_set (vui->vring_locks[qid], 1);
+}
+
+/**
+ * @brief Spin until the vring is successfully locked
+ */
+static inline void
+vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
+{
+ while (vhost_user_vring_try_lock (vui, qid))
+ ;
+}
+
+/**
+ * @brief Unlock the vring lock
+ */
+static inline void
+vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
+{
+ *vui->vring_locks[qid] = 0;
+}
+
+static inline void
+vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
+{
+ vhost_user_vring_t *vring = &vui->vrings[qid];
+ memset (vring, 0, sizeof (*vring));
+ vring->kickfd_idx = ~0;
+ vring->callfd_idx = ~0;
+ vring->errfd = -1;
+
+ /*
+ * We have a bug with some qemu 2.5, and this may be a fix.
+ * Feel like interpretation holy text, but this is from vhost-user.txt.
+ * "
+ * One queue pair is enabled initially. More queues are enabled
+ * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
+ * "
+ * Don't know who's right, but this is what DPDK does.
+ */
+ if (qid == 0 || qid == 1)
+ vring->enabled = 1;
+}
+
+static inline void
+vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
+{
+ vhost_user_vring_t *vring = &vui->vrings[qid];
+ if (vring->kickfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vring->kickfd_idx);
+ clib_file_del (&file_main, uf);
+ vring->kickfd_idx = ~0;
+ }
+ if (vring->callfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vring->callfd_idx);
+ clib_file_del (&file_main, uf);
+ vring->callfd_idx = ~0;
+ }
+ if (vring->errfd != -1)
+ {
+ close (vring->errfd);
+ vring->errfd = -1;
+ }
+ vhost_user_vring_init (vui, qid);
+}
+
+static inline void
+vhost_user_if_disconnect (vhost_user_intf_t * vui)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ int q;
+
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+
+ if (vui->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ vui->clib_file_index = ~0;
+ }
+
+ vui->is_up = 0;
+
+ for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ vhost_user_vring_close (vui, q);
+
+ unmap_all_mem_regions (vui);
+ DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index);
+}
+
+#define VHOST_LOG_PAGE 0x1000
+static_always_inline void
+vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
+ u64 addr, u64 len, u8 is_host_address)
+{
+ if (PREDICT_TRUE (vui->log_base_addr == 0
+ || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
+ {
+ return;
+ }
+ if (is_host_address)
+ {
+ addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
+ }
+ if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
+ {
+ DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
+ return;
+ }
+
+ CLIB_MEMORY_BARRIER ();
+ u64 page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len)
+ {
+ ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
+ page++;
+ }
+}
+
+static_always_inline void
+vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len)
+{
+ vhost_user_log_dirty_pages_2 (vui, addr, len, 0);
+}
+
+#define vhost_user_log_dirty_ring(vui, vq, member) \
+ if (PREDICT_FALSE(vq->log_used)) { \
+ vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
+ sizeof(vq->used->member)); \
+ }
+
+static clib_error_t *
+vhost_user_socket_read (clib_file_t * uf)
+{
+ int n, i;
+ int fd, number_of_fds = 0;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ vhost_user_msg_t msg;
+ struct msghdr mh;
+ struct iovec iov[1];
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ struct cmsghdr *cmsg;
+ u8 q;
+ clib_file_t template = { 0 };
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
+
+ memset (&mh, 0, sizeof (mh));
+ memset (control, 0, sizeof (control));
+
+ for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
+ fds[i] = -1;
+
+ /* set the payload */
+ iov[0].iov_base = (void *) &msg;
+ iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
+
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = control;
+ mh.msg_controllen = sizeof (control);
+
+ n = recvmsg (uf->file_descriptor, &mh, 0);
+
+ /* Stop workers to avoid end of the world */
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+
+ if (n != VHOST_USER_MSG_HDR_SZ)
+ {
+ if (n == -1)
+ {
+ DBG_SOCK ("recvmsg returned error %d %s", errno, strerror (errno));
+ }
+ else
+ {
+ DBG_SOCK ("n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
+ n, VHOST_USER_MSG_HDR_SZ);
+ }
+ goto close_socket;
+ }
+
+ if (mh.msg_flags & MSG_CTRUNC)
+ {
+ DBG_SOCK ("MSG_CTRUNC is set");
+ goto close_socket;
+ }
+
+ cmsg = CMSG_FIRSTHDR (&mh);
+
+ if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS) &&
+ (cmsg->cmsg_len - CMSG_LEN (0) <=
+ VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
+ {
+ number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
+ clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
+ }
+
+ /* version 1, no reply bit set */
+ if ((msg.flags & 7) != 1)
+ {
+ DBG_SOCK ("malformed message received. closing socket");
+ goto close_socket;
+ }
+
+ {
+ int rv;
+ rv =
+ read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
+ msg.size);
+ if (rv < 0)
+ {
+ DBG_SOCK ("read failed %s", strerror (errno));
+ goto close_socket;
+ }
+ else if (rv != msg.size)
+ {
+ DBG_SOCK ("message too short (read %dB should be %dB)", rv, msg.size);
+ goto close_socket;
+ }
+ }
+
+ switch (msg.request)
+ {
+ case VHOST_USER_GET_FEATURES:
+ msg.flags |= 4;
+ msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) |
+ (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) |
+ (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) |
+ (1ULL << FEAT_VHOST_F_LOG_ALL) |
+ (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
+ (1ULL << FEAT_VIRTIO_NET_F_MQ) |
+ (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) |
+ (1ULL << FEAT_VIRTIO_F_VERSION_1);
+ msg.u64 &= vui->feature_mask;
+ msg.size = sizeof (msg.u64);
+ DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx",
+ vui->hw_if_index, msg.u64);
+ break;
+
+ case VHOST_USER_SET_FEATURES:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
+ vui->hw_if_index, msg.u64);
+
+ vui->features = msg.u64;
+
+ if (vui->features &
+ ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << FEAT_VIRTIO_F_VERSION_1)))
+ vui->virtio_net_hdr_sz = 12;
+ else
+ vui->virtio_net_hdr_sz = 10;
+
+ vui->is_any_layout =
+ (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
+
+ ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+ vui->is_up = 0;
+
+ /*for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ vhost_user_vring_close(&vui->vrings[q]); */
+
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
+ vui->hw_if_index, msg.memory.nregions);
+
+ if ((msg.memory.nregions < 1) ||
+ (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
+ {
+
+ DBG_SOCK ("number of mem regions must be between 1 and %i",
+ VHOST_MEMORY_MAX_NREGIONS);
+
+ goto close_socket;
+ }
+
+ if (msg.memory.nregions != number_of_fds)
+ {
+ DBG_SOCK ("each memory region must have FD");
+ goto close_socket;
+ }
+ unmap_all_mem_regions (vui);
+ for (i = 0; i < msg.memory.nregions; i++)
+ {
+ clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i],
+ sizeof (vhost_user_memory_region_t));
+
+ long page_sz = get_huge_page_size (fds[i]);
+
+ /* align size to 2M page */
+ ssize_t map_sz = (vui->regions[i].memory_size +
+ vui->regions[i].mmap_offset +
+ page_sz - 1) & ~(page_sz - 1);
+
+ vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fds[i], 0);
+ vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
+ vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
+ vui->regions[i].memory_size;
+
+ DBG_SOCK
+ ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
+ "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i],
+ page_sz);
+
+ if (vui->region_mmap_addr[i] == MAP_FAILED)
+ {
+ clib_warning ("failed to map memory. errno is %d", errno);
+ goto close_socket;
+ }
+ vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
+ vui->region_mmap_fd[i] = fds[i];
+ }
+ vui->nregions = msg.memory.nregions;
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+
+ if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
+ (msg.state.num == 0) || /* it cannot be zero */
+ ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */
+ goto close_socket;
+ vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
+ break;
+
+ case VHOST_USER_SET_VRING_ADDR:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
+ vui->hw_if_index, msg.state.index);
+
+ if (msg.state.index >= VHOST_VRING_MAX_N)
+ {
+ DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ADDR:"
+ " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
+ goto close_socket;
+ }
+
+ if (msg.size < sizeof (msg.addr))
+ {
+ DBG_SOCK ("vhost message is too short (%d < %d)",
+ msg.size, sizeof (msg.addr));
+ goto close_socket;
+ }
+
+ vui->vrings[msg.state.index].desc = (vring_desc_t *)
+ map_user_mem (vui, msg.addr.desc_user_addr);
+ vui->vrings[msg.state.index].used = (vring_used_t *)
+ map_user_mem (vui, msg.addr.used_user_addr);
+ vui->vrings[msg.state.index].avail = (vring_avail_t *)
+ map_user_mem (vui, msg.addr.avail_user_addr);
+
+ if ((vui->vrings[msg.state.index].desc == NULL) ||
+ (vui->vrings[msg.state.index].used == NULL) ||
+ (vui->vrings[msg.state.index].avail == NULL))
+ {
+ DBG_SOCK ("failed to map user memory for hw_if_index %d",
+ vui->hw_if_index);
+ goto close_socket;
+ }
+
+ vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
+ vui->vrings[msg.state.index].log_used =
+ (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
+
+ /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
+ the ring is initialized in an enabled state. */
+ if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
+ {
+ vui->vrings[msg.state.index].enabled = 1;
+ }
+
+ vui->vrings[msg.state.index].last_used_idx =
+ vui->vrings[msg.state.index].last_avail_idx =
+ vui->vrings[msg.state.index].used->idx;
+
+ /* tell driver that we don't want interrupts */
+ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
+ break;
+
+ case VHOST_USER_RESET_OWNER:
+ DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index);
+ break;
+
+ case VHOST_USER_SET_VRING_CALL:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+
+ /* if there is old fd, delete and close it */
+ if (vui->vrings[q].callfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->vrings[q].callfd_idx);
+ clib_file_del (&file_main, uf);
+ vui->vrings[q].callfd_idx = ~0;
+ }
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ {
+ DBG_SOCK ("More than one fd received !");
+ goto close_socket;
+ }
+
+ template.read_function = vhost_user_callfd_read_ready;
+ template.file_descriptor = fds[0];
+ template.private_data =
+ ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
+ vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
+ }
+ else
+ vui->vrings[q].callfd_idx = ~0;
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+
+ if (vui->vrings[q].kickfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->vrings[q].kickfd_idx);
+ clib_file_del (&file_main, uf);
+ vui->vrings[q].kickfd_idx = ~0;
+ }
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ {
+ DBG_SOCK ("More than one fd received !");
+ goto close_socket;
+ }
+
+ template.read_function = vhost_user_kickfd_read_ready;
+ template.file_descriptor = fds[0];
+ template.private_data =
+ (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
+ q;
+ vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
+ }
+ else
+ {
+ //When no kickfd is set, the queue is initialized as started
+ vui->vrings[q].kickfd_idx = ~0;
+ vui->vrings[q].started = 1;
+ }
+
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+
+ if (vui->vrings[q].errfd != -1)
+ close (vui->vrings[q].errfd);
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ goto close_socket;
+
+ vui->vrings[q].errfd = fds[0];
+ }
+ else
+ vui->vrings[q].errfd = -1;
+
+ break;
+
+ case VHOST_USER_SET_VRING_BASE:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+
+ vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ if (msg.state.index >= VHOST_VRING_MAX_N)
+ {
+ DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:"
+ " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
+ goto close_socket;
+ }
+
+ /*
+ * Copy last_avail_idx from the vring before closing it because
+ * closing the vring also initializes the vring last_avail_idx
+ */
+ msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
+ msg.flags |= 4;
+ msg.size = sizeof (msg.state);
+
+ /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
+ vhost_user_vring_close (vui, msg.state.index);
+ DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+ break;
+
+ case VHOST_USER_NONE:
+ DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index);
+
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ {
+ DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index);
+
+ if (msg.size != sizeof (msg.log))
+ {
+ DBG_SOCK
+ ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
+ msg.size, sizeof (msg.log));
+ goto close_socket;
+ }
+
+ if (!
+ (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
+ {
+ DBG_SOCK
+ ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
+ goto close_socket;
+ }
+
+ fd = fds[0];
+ /* align size to 2M page */
+ long page_sz = get_huge_page_size (fd);
+ ssize_t map_sz =
+ (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
+
+ vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ DBG_SOCK
+ ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
+ map_sz, msg.log.offset, fd, vui->log_base_addr);
+
+ if (vui->log_base_addr == MAP_FAILED)
+ {
+ clib_warning ("failed to map memory. errno is %d", errno);
+ goto close_socket;
+ }
+
+ vui->log_base_addr += msg.log.offset;
+ vui->log_size = msg.log.size;
+
+ msg.flags |= 4;
+ msg.size = sizeof (msg.u64);
+
+ break;
+ }
+
+ case VHOST_USER_SET_LOG_FD:
+ DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
+
+ break;
+
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ msg.flags |= 4;
+ msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
+ (1 << VHOST_USER_PROTOCOL_F_MQ);
+ msg.size = sizeof (msg.u64);
+ DBG_SOCK
+ ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - reply 0x%016llx",
+ vui->hw_if_index, msg.u64);
+ break;
+
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ DBG_SOCK
+ ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%016llx",
+ vui->hw_if_index, msg.u64);
+
+ vui->protocol_features = msg.u64;
+
+ break;
+
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.flags |= 4;
+ msg.u64 = VHOST_VRING_MAX_N;
+ msg.size = sizeof (msg.u64);
+ DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
+ vui->hw_if_index, msg.u64);
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
+ vui->hw_if_index, msg.state.num ? "enable" : "disable",
+ msg.state.index);
+ if (msg.state.index >= VHOST_VRING_MAX_N)
+ {
+ DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ENABLE:"
+ " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
+ goto close_socket;
+ }
+
+ vui->vrings[msg.state.index].enabled = msg.state.num;
+ break;
+
+ default:
+ DBG_SOCK ("unknown vhost-user message %d received. closing socket",
+ msg.request);
+ goto close_socket;
+ }
+
+ /* if we need to reply */
+ if (msg.flags & 4)
+ {
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ DBG_SOCK ("could not send message response");
+ goto close_socket;
+ }
+ }
+
+ vhost_user_update_iface_state (vui);
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ return 0;
+
+close_socket:
+ vhost_user_if_disconnect (vui);
+ vhost_user_update_iface_state (vui);
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_socket_error (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ DBG_SOCK ("socket error on if %d", vui->sw_if_index);
+ vlib_worker_thread_barrier_sync (vm);
+ vhost_user_if_disconnect (vui);
+ vhost_user_rx_thread_placement ();
+ vlib_worker_thread_barrier_release (vm);
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_socksvr_accept_ready (clib_file_t * uf)
+{
+ int client_fd, client_len;
+ struct sockaddr_un client;
+ clib_file_t template = { 0 };
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ client_len = sizeof (client);
+ client_fd = accept (uf->file_descriptor,
+ (struct sockaddr *) &client,
+ (socklen_t *) & client_len);
+
+ if (client_fd < 0)
+ return clib_error_return_unix (0, "accept");
+
+ DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index);
+ template.read_function = vhost_user_socket_read;
+ template.error_function = vhost_user_socket_error;
+ template.file_descriptor = client_fd;
+ template.private_data = vui - vhost_user_main.vhost_user_interfaces;
+ vui->clib_file_index = clib_file_add (&file_main, &template);
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ error = vlib_call_init_function (vm, ip4_init);
+ if (error)
+ return error;
+
+ vum->coalesce_frames = 32;
+ vum->coalesce_time = 1e-3;
+
+ vec_validate (vum->cpus, tm->n_vlib_mains - 1);
+
+ vhost_cpu_t *cpu;
+ vec_foreach (cpu, vum->cpus)
+ {
+ /* This is actually not necessary as validate already zeroes it
+ * Just keeping the loop here for later because I am lazy. */
+ cpu->rx_buffers_len = 0;
+ }
+
+ vum->random = random_default_seed ();
+
+ mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vhost_user_init);
+
+static u8 *
+format_vhost_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
+ vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
+ t->device_index);
+
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
+
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U %U queue %d\n", format_white_space, indent,
+ format_vnet_sw_interface_name, vnm, sw, t->qid);
+
+ s = format (s, "%U virtio flags:\n", format_white_space, indent);
+#define _(n,i,st) \
+ if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
+ s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
+ foreach_virtio_trace_flags
+#undef _
+ s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
+ format_white_space, indent, t->first_desc_len);
+
+ s = format (s, "%U flags 0x%02x gso_type %u\n",
+ format_white_space, indent,
+ t->hdr.hdr.flags, t->hdr.hdr.gso_type);
+
+ if (vui->virtio_net_hdr_sz == 12)
+ s = format (s, "%U num_buff %u",
+ format_white_space, indent, t->hdr.num_buffers);
+
+ return s;
+}
+
+void
+vhost_user_rx_trace (vhost_trace_t * t,
+ vhost_user_intf_t * vui, u16 qid,
+ vlib_buffer_t * b, vhost_user_vring_t * txvq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 last_avail_idx = txvq->last_avail_idx;
+ u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
+ vring_desc_t *hdr_desc = 0;
+ virtio_net_hdr_mrg_rxbuf_t *hdr;
+ u32 hint = 0;
+
+ memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &txvq->desc[desc_current];
+ if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
+ }
+ if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+ }
+ if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
+ !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+ }
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+
+ if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
+ }
+ else
+ {
+ u32 len = vui->virtio_net_hdr_sz;
+ memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
+ }
+}
+
+static inline void
+vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u64 x = 1;
+ int fd = UNIX_GET_FD (vq->callfd_idx);
+ int rv;
+
+ rv = write (fd, &x, sizeof (x));
+ if (rv <= 0)
+ {
+ clib_unix_warning
+ ("Error: Could not write to unix socket for callfd %d", fd);
+ return;
+ }
+
+ vq->n_since_last_int = 0;
+ vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
+}
+
+static_always_inline u32
+vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
+ u16 copy_len, u32 * map_hint)
+{
+ void *src0, *src1, *src2, *src3;
+ if (PREDICT_TRUE (copy_len >= 4))
+ {
+ if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
+ return 1;
+ if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
+ return 1;
+
+ while (PREDICT_TRUE (copy_len >= 4))
+ {
+ src0 = src2;
+ src1 = src3;
+
+ if (PREDICT_FALSE
+ (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
+ return 1;
+ if (PREDICT_FALSE
+ (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
+ return 1;
+
+ CLIB_PREFETCH (src2, 64, LOAD);
+ CLIB_PREFETCH (src3, 64, LOAD);
+
+ clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len);
+ clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len);
+ copy_len -= 2;
+ cpy += 2;
+ }
+ }
+ while (copy_len)
+ {
+ if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
+ return 1;
+ clib_memcpy ((void *) cpy->dst, src0, cpy->len);
+ copy_len -= 1;
+ cpy += 1;
+ }
+ return 0;
+}
+
+/**
+ * Try to discard packets from the tx ring (VPP RX path).
+ * Returns the number of discarded packets.
+ */
+u32
+vhost_user_rx_discard_packet (vlib_main_t * vm,
+ vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq, u32 discard_max)
+{
+ /*
+ * On the RX side, each packet corresponds to one descriptor
+ * (it is the same whether it is a shallow descriptor, chained, or indirect).
+ * Therefore, discarding a packet is like discarding a descriptor.
+ */
+ u32 discarded_packets = 0;
+ u32 avail_idx = txvq->avail->idx;
+ while (discarded_packets != discard_max)
+ {
+ if (avail_idx == txvq->last_avail_idx)
+ goto out;
+
+ u16 desc_chain_head =
+ txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
+ txvq->last_avail_idx++;
+ txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
+ desc_chain_head;
+ txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
+ vhost_user_log_dirty_ring (vui, txvq,
+ ring[txvq->last_used_idx & txvq->qsz_mask]);
+ txvq->last_used_idx++;
+ discarded_packets++;
+ }
+
+out:
+ CLIB_MEMORY_BARRIER ();
+ txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+ return discarded_packets;
+}
+
+/*
+ * In case of overflow, we need to rewind the array of allocated buffers.
+ */
+static void
+vhost_user_input_rewind_buffers (vlib_main_t * vm,
+ vhost_cpu_t * cpu, vlib_buffer_t * b_head)
+{
+ u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+ vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
+ b_current->current_length = 0;
+ b_current->flags = 0;
+ while (b_current != b_head)
+ {
+ cpu->rx_buffers_len++;
+ bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+ b_current = vlib_get_buffer (vm, bi_current);
+ b_current->current_length = 0;
+ b_current->flags = 0;
+ }
+ cpu->rx_buffers_len++;
+}
+
+static u32
+vhost_user_if_input (vlib_main_t * vm,
+ vhost_user_main_t * vum,
+ vhost_user_intf_t * vui,
+ u16 qid, vlib_node_runtime_t * node,
+ vnet_hw_interface_rx_mode mode)
+{
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ u16 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u16 n_left;
+ u32 n_left_to_next, *to_next;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 map_hint = 0;
+ u16 thread_index = vlib_get_thread_index ();
+ u16 copy_len = 0;
+
+ {
+ /* do we have pending interrupts ? */
+ vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
+ f64 now = vlib_time_now (vm);
+
+ if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
+ vhost_user_send_call (vm, txvq);
+
+ if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
+ vhost_user_send_call (vm, rxvq);
+ }
+
+ /*
+ * For adaptive mode, it is optimized to reduce interrupts.
+ * If the scheduler switches the input node to polling due
+ * to burst of traffic, we tell the driver no interrupt.
+ * When the traffic subsides, the scheduler switches the node back to
+ * interrupt mode. We must tell the driver we want interrupt.
+ */
+ if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if ((node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ /* Tell driver we want notification */
+ txvq->used->flags = 0;
+ else
+ /* Tell driver we don't want notification */
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ }
+
+ if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
+ return 0;
+
+ n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
+
+ /* nothing to do */
+ if (PREDICT_FALSE (n_left == 0))
+ return 0;
+
+ if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
+ {
+ /*
+ * Discard input packet if interface is admin down or vring is not
+ * enabled.
+ * "For example, for a networking device, in the disabled state
+ * client must not supply any new RX packets, but must process
+ * and discard any TX packets."
+ */
+ vhost_user_rx_discard_packet (vm, vui, txvq,
+ VHOST_USER_DOWN_DISCARD_COUNT);
+ return 0;
+ }
+
+ if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1)))
+ {
+ /*
+ * Informational error logging when VPP is not
+ * receiving packets fast enough.
+ */
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
+ }
+
+ if (n_left > VLIB_FRAME_SIZE)
+ n_left = VLIB_FRAME_SIZE;
+
+ /*
+ * For small packets (<2kB), we will not need more than one vlib buffer
+ * per packet. In case packets are bigger, we will just yeld at some point
+ * in the loop and come back later. This is not an issue as for big packet,
+ * processing cost really comes from the memory copy.
+ * The assumption is that big packets will fit in 40 buffers.
+ */
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 ||
+ vum->cpus[thread_index].rx_buffers_len < 40))
+ {
+ u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+ vum->cpus[thread_index].rx_buffers_len +=
+ vlib_buffer_alloc_from_free_list (vm,
+ vum->cpus[thread_index].rx_buffers +
+ curr_len,
+ VHOST_USER_RX_BUFFERS_N - curr_len,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (PREDICT_FALSE
+ (vum->cpus[thread_index].rx_buffers_len <
+ VHOST_USER_RX_BUFFER_STARVATION))
+ {
+ /* In case of buffer starvation, discard some packets from the queue
+ * and log the event.
+ * We keep doing best effort for the remaining packets. */
+ u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+ n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
+ flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
+
+ n_left -= flush;
+ vlib_increment_simple_counter (vnet_main.
+ interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ vlib_get_thread_index (),
+ vui->sw_if_index, flush);
+
+ vlib_error_count (vm, vhost_user_input_node.index,
+ VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
+ }
+ }
+
+ while (n_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b_head, *b_current;
+ u32 bi_current;
+ u16 desc_current;
+ u32 desc_data_offset;
+ vring_desc_t *desc_table = txvq->desc;
+
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
+ {
+ /* Not enough rx_buffers
+ * Note: We yeld on 1 so we don't need to do an additional
+ * check for the next buffer prefetch.
+ */
+ n_left = 0;
+ break;
+ }
+
+ desc_current =
+ txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
+ vum->cpus[thread_index].rx_buffers_len--;
+ bi_current = (vum->cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].rx_buffers_len];
+ b_head = b_current = vlib_get_buffer (vm, bi_current);
+ to_next[0] = bi_current; //We do that now so we can forget about bi_current
+ to_next++;
+ n_left_to_next--;
+
+ vlib_prefetch_buffer_with_index (vm,
+ (vum->
+ cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].
+ rx_buffers_len - 1], LOAD);
+
+ /* Just preset the used descriptor id and length for later */
+ txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
+ desc_current;
+ txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
+ vhost_user_log_dirty_ring (vui, txvq,
+ ring[txvq->last_used_idx &
+ txvq->qsz_mask]);
+
+ /* The buffer should already be initialized */
+ b_head->total_length_not_including_first_buffer = 0;
+ b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if (PREDICT_FALSE (n_trace))
+ {
+ //TODO: next_index is not exactly known at that point
+ vlib_trace_buffer (vm, node, next_index, b_head,
+ /* follow_chain */ 0);
+ vhost_trace_t *t0 =
+ vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
+ vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
+ n_trace--;
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ /* This depends on the setup but is very consistent
+ * So I think the CPU branch predictor will make a pretty good job
+ * at optimizing the decision. */
+ if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+ {
+ desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
+ &map_hint);
+ desc_current = 0;
+ if (PREDICT_FALSE (desc_table == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ goto out;
+ }
+ }
+
+ if (PREDICT_TRUE (vui->is_any_layout) ||
+ (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
+ {
+ /* ANYLAYOUT or single buffer */
+ desc_data_offset = vui->virtio_net_hdr_sz;
+ }
+ else
+ {
+ /* CSR case without ANYLAYOUT, skip 1st buffer */
+ desc_data_offset = desc_table[desc_current].len;
+ }
+
+ while (1)
+ {
+ /* Get more input if necessary. Or end of packet. */
+ if (desc_data_offset == desc_table[desc_current].len)
+ {
+ if (PREDICT_FALSE (desc_table[desc_current].flags &
+ VIRTQ_DESC_F_NEXT))
+ {
+ desc_current = desc_table[desc_current].next;
+ desc_data_offset = 0;
+ }
+ else
+ {
+ goto out;
+ }
+ }
+
+ /* Get more output if necessary. Or end of packet. */
+ if (PREDICT_FALSE
+ (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
+ {
+ if (PREDICT_FALSE
+ (vum->cpus[thread_index].rx_buffers_len == 0))
+ {
+ /* Cancel speculation */
+ to_next--;
+ n_left_to_next++;
+
+ /*
+ * Checking if there are some left buffers.
+ * If not, just rewind the used buffers and stop.
+ * Note: Scheduled copies are not cancelled. This is
+ * not an issue as they would still be valid. Useless,
+ * but valid.
+ */
+ vhost_user_input_rewind_buffers (vm,
+ &vum->cpus
+ [thread_index],
+ b_head);
+ n_left = 0;
+ goto stop;
+ }
+
+ /* Get next output */
+ vum->cpus[thread_index].rx_buffers_len--;
+ u32 bi_next =
+ (vum->cpus[thread_index].rx_buffers)[vum->cpus
+ [thread_index].rx_buffers_len];
+ b_current->next_buffer = bi_next;
+ b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ bi_current = bi_next;
+ b_current = vlib_get_buffer (vm, bi_current);
+ }
+
+ /* Prepare a copy order executed later for the data */
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
+ copy_len++;
+ u32 desc_data_l =
+ desc_table[desc_current].len - desc_data_offset;
+ cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
+ cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
+ cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
+ b_current->current_length);
+ cpy->src = desc_table[desc_current].addr + desc_data_offset;
+
+ desc_data_offset += cpy->len;
+
+ b_current->current_length += cpy->len;
+ b_head->total_length_not_including_first_buffer += cpy->len;
+ }
+
+ out:
+ CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD);
+
+ n_rx_bytes += b_head->total_length_not_including_first_buffer;
+ n_rx_packets++;
+
+ b_head->total_length_not_including_first_buffer -=
+ b_head->current_length;
+
+ /* consume the descriptor and return it as used */
+ txvq->last_avail_idx++;
+ txvq->last_used_idx++;
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
+
+ vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
+ vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ b_head->error = 0;
+
+ {
+ u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
+ b_head);
+
+ u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi, next0);
+ }
+
+ n_left--;
+
+ /*
+ * Although separating memory copies from virtio ring parsing
+ * is beneficial, we can offer to perform the copies from time
+ * to time in order to free some space in the ring.
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
+ {
+ if (PREDICT_FALSE
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_BARRIER ();
+ txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+ }
+ }
+ stop:
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Do the memory copies */
+ if (PREDICT_FALSE
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_BARRIER ();
+ txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+
+ /* interrupt (call) handling */
+ if ((txvq->callfd_idx != ~0) &&
+ !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ {
+ txvq->n_since_last_int += n_rx_packets;
+
+ if (txvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, txvq);
+ }
+
+ /* increase rx counters */
+ vlib_increment_combined_counter
+ (vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+
+ return n_rx_packets;
+}
+
+static uword
+vhost_user_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ uword n_rx_packets = 0;
+ vhost_user_intf_t *vui;
+ vnet_device_input_runtime_t *rt =
+ (vnet_device_input_runtime_t *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (clib_smp_swap (&dq->interrupt_pending, 0) ||
+ (node->state == VLIB_NODE_STATE_POLLING))
+ {
+ vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
+ n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
+ dq->mode);
+ }
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_input_node) = {
+ .function = vhost_user_input,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "vhost-user-input",
+ .sibling_of = "device-input",
+
+ /* Will be enabled if/when hardware is detected. */
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_vhost_trace,
+
+ .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
+ .error_strings = vhost_user_input_func_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input)
+/* *INDENT-ON* */
+
+
+void
+vhost_user_tx_trace (vhost_trace_t * t,
+ vhost_user_intf_t * vui, u16 qid,
+ vlib_buffer_t * b, vhost_user_vring_t * rxvq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 last_avail_idx = rxvq->last_avail_idx;
+ u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
+ vring_desc_t *hdr_desc = 0;
+ u32 hint = 0;
+
+ memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &rxvq->desc[desc_current];
+ if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
+ }
+ if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+ }
+ if (!(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
+ !(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+ }
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+}
+
+static_always_inline u32
+vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
+ u16 copy_len, u32 * map_hint)
+{
+ void *dst0, *dst1, *dst2, *dst3;
+ if (PREDICT_TRUE (copy_len >= 4))
+ {
+ if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
+ return 1;
+ if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
+ return 1;
+ while (PREDICT_TRUE (copy_len >= 4))
+ {
+ dst0 = dst2;
+ dst1 = dst3;
+
+ if (PREDICT_FALSE
+ (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
+ return 1;
+ if (PREDICT_FALSE
+ (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
+ return 1;
+
+ CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD);
+ CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD);
+
+ clib_memcpy (dst0, (void *) cpy[0].src, cpy[0].len);
+ clib_memcpy (dst1, (void *) cpy[1].src, cpy[1].len);
+
+ vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
+ vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
+ copy_len -= 2;
+ cpy += 2;
+ }
+ }
+ while (copy_len)
+ {
+ if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
+ return 1;
+ clib_memcpy (dst0, (void *) cpy->src, cpy->len);
+ vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
+ copy_len -= 1;
+ cpy += 1;
+ }
+ return 0;
+}
+
+
+static uword
+vhost_user_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *buffers = vlib_frame_args (frame);
+ u32 n_left = frame->n_vectors;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
+ u32 qid = ~0;
+ vhost_user_vring_t *rxvq;
+ u8 error;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 map_hint = 0;
+ u8 retry = 8;
+ u16 copy_len;
+ u16 tx_headers_len;
+
+ if (PREDICT_FALSE (!vui->admin_up))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_DOWN;
+ goto done3;
+ }
+
+ if (PREDICT_FALSE (!vui->is_up))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
+ goto done3;
+ }
+
+ qid =
+ VHOST_VRING_IDX_RX (*vec_elt_at_index
+ (vui->per_cpu_tx_qid, thread_index));
+ rxvq = &vui->vrings[qid];
+ if (PREDICT_FALSE (vui->use_tx_spinlock))
+ vhost_user_vring_lock (vui, qid);
+
+retry:
+ error = VHOST_USER_TX_FUNC_ERROR_NONE;
+ tx_headers_len = 0;
+ copy_len = 0;
+ while (n_left > 0)
+ {
+ vlib_buffer_t *b0, *current_b0;
+ u16 desc_head, desc_index, desc_len;
+ vring_desc_t *desc_table;
+ uword buffer_map_addr;
+ u32 buffer_len;
+ u16 bytes_left;
+
+ if (PREDICT_TRUE (n_left > 1))
+ vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vum->cpus[thread_index].current_trace =
+ vlib_add_trace (vm, node, b0,
+ sizeof (*vum->cpus[thread_index].current_trace));
+ vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
+ vui, qid / 2, b0, rxvq);
+ }
+
+ if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
+ rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
+
+ /* Go deeper in case of indirect descriptor
+ * I don't know of any driver providing indirect for RX. */
+ if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui, rxvq->desc[desc_index].addr,
+ &map_hint))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+
+ desc_len = vui->virtio_net_hdr_sz;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+
+ {
+ // Get a header from the header array
+ virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &vum->cpus[thread_index].tx_headers[tx_headers_len];
+ tx_headers_len++;
+ hdr->hdr.flags = 0;
+ hdr->hdr.gso_type = 0;
+ hdr->num_buffers = 1; //This is local, no need to check
+
+ // Prepare a copy order executed later for the header
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
+ copy_len++;
+ cpy->len = vui->virtio_net_hdr_sz;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) hdr;
+ }
+
+ buffer_map_addr += vui->virtio_net_hdr_sz;
+ buffer_len -= vui->virtio_net_hdr_sz;
+ bytes_left = b0->current_length;
+ current_b0 = b0;
+ while (1)
+ {
+ if (buffer_len == 0)
+ { //Get new output
+ if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT)
+ {
+ //Next one is chained
+ desc_index = desc_table[desc_index].next;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ }
+ else if (vui->virtio_net_hdr_sz == 12) //MRG is available
+ {
+ virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
+
+ //Move from available to used buffer
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
+ desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
+ desc_len;
+ vhost_user_log_dirty_ring (vui, rxvq,
+ ring[rxvq->last_used_idx &
+ rxvq->qsz_mask]);
+
+ rxvq->last_avail_idx++;
+ rxvq->last_used_idx++;
+ hdr->num_buffers++;
+ desc_len = 0;
+
+ if (PREDICT_FALSE
+ (rxvq->last_avail_idx == rxvq->avail->idx))
+ {
+ //Dequeue queued descriptors for this packet
+ rxvq->last_used_idx -= hdr->num_buffers - 1;
+ rxvq->last_avail_idx -= hdr->num_buffers - 1;
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
+ rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ //It is seriously unlikely that a driver will put indirect descriptor
+ //after non-indirect descriptor.
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui,
+ rxvq->desc[desc_index].addr,
+ &map_hint))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ }
+ else
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
+ goto done;
+ }
+ }
+
+ {
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
+ copy_len++;
+ cpy->len = bytes_left;
+ cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) vlib_buffer_get_current (current_b0) +
+ current_b0->current_length - bytes_left;
+
+ bytes_left -= cpy->len;
+ buffer_len -= cpy->len;
+ buffer_map_addr += cpy->len;
+ desc_len += cpy->len;
+
+ CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ // Check if vlib buffer has more data. If not, get more or break.
+ if (PREDICT_TRUE (!bytes_left))
+ {
+ if (PREDICT_FALSE
+ (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
+ bytes_left = current_b0->current_length;
+ }
+ else
+ {
+ //End of packet
+ break;
+ }
+ }
+ }
+
+ //Move from available to used ring
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
+ vhost_user_log_dirty_ring (vui, rxvq,
+ ring[rxvq->last_used_idx & rxvq->qsz_mask]);
+ rxvq->last_avail_idx++;
+ rxvq->last_used_idx++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vum->cpus[thread_index].current_trace->hdr =
+ vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
+ }
+
+ n_left--; //At the end for error counting when 'goto done' is invoked
+
+ /*
+ * Do the copy periodically to prevent
+ * vum->cpus[thread_index].copy array overflow and corrupt memory
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
+ {
+ if (PREDICT_FALSE
+ (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_BARRIER ();
+ rxvq->used->idx = rxvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, rxvq, idx);
+ }
+ buffers++;
+ }
+
+done:
+ //Do the memory copies
+ if (PREDICT_FALSE
+ (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+
+ CLIB_MEMORY_BARRIER ();
+ rxvq->used->idx = rxvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, rxvq, idx);
+
+ /*
+ * When n_left is set, error is always set to something too.
+ * In case error is due to lack of remaining buffers, we go back up and
+ * retry.
+ * The idea is that it is better to waste some time on packets
+ * that have been processed already than dropping them and get
+ * more fresh packets with a good likelyhood that they will be dropped too.
+ * This technique also gives more time to VM driver to pick-up packets.
+ * In case the traffic flows from physical to virtual interfaces, this
+ * technique will end-up leveraging the physical NIC buffer in order to
+ * absorb the VM's CPU jitter.
+ */
+ if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
+ {
+ retry--;
+ goto retry;
+ }
+
+ /* interrupt (call) handling */
+ if ((rxvq->callfd_idx != ~0) &&
+ !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ {
+ rxvq->n_since_last_int += frame->n_vectors - n_left;
+
+ if (rxvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, rxvq);
+ }
+
+ vhost_user_vring_unlock (vui, qid);
+
+done3:
+ if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
+ {
+ vlib_error_count (vm, node->node_index, error, n_left);
+ vlib_increment_simple_counter
+ (vnet_main.interface_main.sw_if_counters
+ + VNET_INTERFACE_COUNTER_DROP,
+ thread_index, vui->sw_if_index, n_left);
+ }
+
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_intf_t *vui;
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword event_type, *event_data = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u16 *queue;
+ f64 now, poll_time_remaining;
+ f64 next_timeout;
+ u8 stop_timer = 0;
+
+ while (1)
+ {
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ /*
+ * Use the remaining timeout if it is less than coalesce time to avoid
+ * resetting the existing timer in the middle of expiration
+ */
+ timeout = poll_time_remaining;
+ if (vlib_process_suspend_time_is_zero (timeout) ||
+ (timeout > vum->coalesce_time))
+ timeout = vum->coalesce_time;
+
+ now = vlib_time_now (vm);
+ switch (event_type)
+ {
+ case VHOST_USER_EVENT_STOP_TIMER:
+ stop_timer = 1;
+ break;
+
+ case VHOST_USER_EVENT_START_TIMER:
+ stop_timer = 0;
+ if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+ break;
+ /* fall through */
+
+ case ~0:
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ next_timeout = timeout;
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
+ vhost_user_vring_t *txvq =
+ &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+
+ if (txvq->n_since_last_int)
+ {
+ if (now >= txvq->int_deadline)
+ vhost_user_send_call (vm, txvq);
+ else
+ next_timeout = txvq->int_deadline - now;
+ }
+
+ if (rxvq->n_since_last_int)
+ {
+ if (now >= rxvq->int_deadline)
+ vhost_user_send_call (vm, rxvq);
+ else
+ next_timeout = rxvq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
+ });
+ /* *INDENT-ON* */
+ break;
+
+ default:
+ clib_warning ("BUG: unhandled event type %d", event_type);
+ break;
+ }
+ /* No less than 1 millisecond */
+ if (timeout < 1e-3)
+ timeout = 1e-3;
+ if (stop_timer)
+ timeout = 3153600000.0;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+ .function = vhost_user_send_interrupt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
+ u32 qid, vnet_hw_interface_rx_mode mode)
+{
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+
+ if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if (txvq->kickfd_idx == ~0)
+ {
+ // We cannot support interrupt mode if the driver opts out
+ return clib_error_return (0, "Driver does not support interrupt");
+ }
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ vum->ifq_count++;
+ // Start the timer if this is the first encounter on interrupt
+ // interface/queue
+ if ((vum->ifq_count == 1) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
+ }
+ else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
+ vum->ifq_count)
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ }
+ }
+
+ txvq->mode = mode;
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
+ txvq->used->flags = 0;
+ else
+ {
+ clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
+ hw_if_index, qid);
+ return clib_error_return (0, "unsupported");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+
+ vui->admin_up = is_up;
+
+ if (is_up && vui->is_up)
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return /* no error */ 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
+ .name = "vhost-user",
+ .tx_function = vhost_user_tx,
+ .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
+ .tx_function_error_strings = vhost_user_tx_func_error_strings,
+ .format_device_name = format_vhost_user_interface_name,
+ .name_renumber = vhost_user_name_renumber,
+ .admin_up_down_function = vhost_user_interface_admin_up_down,
+ .rx_mode_change_function = vhost_user_interface_rx_mode_change,
+ .format_tx_trace = format_vhost_trace,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class,
+ vhost_user_tx)
+/* *INDENT-ON* */
+
+static uword
+vhost_user_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ struct sockaddr_un sun;
+ int sockfd;
+ clib_file_t template = { 0 };
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword *event_data = 0;
+
+ sockfd = -1;
+ sun.sun_family = AF_UNIX;
+ template.read_function = vhost_user_socket_read;
+ template.error_function = vhost_user_socket_error;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ timeout = 3.0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+
+ if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
+ if (vui->clib_file_index == ~0)
+ {
+ if ((sockfd < 0) &&
+ ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
+ {
+ /*
+ * 1st time error or new error for this interface,
+ * spit out the message and record the error
+ */
+ if (!vui->sock_errno || (vui->sock_errno != errno))
+ {
+ clib_unix_warning
+ ("Error: Could not open unix socket for %s",
+ vui->sock_filename);
+ vui->sock_errno = errno;
+ }
+ continue;
+ }
+
+ /* try to connect */
+ strncpy (sun.sun_path, (char *) vui->sock_filename,
+ sizeof (sun.sun_path) - 1);
+
+ /* Avoid hanging VPP if the other end does not accept */
+ if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
+ clib_unix_warning ("fcntl");
+
+ if (connect (sockfd, (struct sockaddr *) &sun,
+ sizeof (struct sockaddr_un)) == 0)
+ {
+ /* Set the socket to blocking as it was before */
+ if (fcntl(sockfd, F_SETFL, 0) < 0)
+ clib_unix_warning ("fcntl2");
+
+ vui->sock_errno = 0;
+ template.file_descriptor = sockfd;
+ template.private_data =
+ vui - vhost_user_main.vhost_user_interfaces;
+ vui->clib_file_index = clib_file_add (&file_main, &template);
+
+ /* This sockfd is considered consumed */
+ sockfd = -1;
+ }
+ else
+ {
+ vui->sock_errno = errno;
+ }
+ }
+ else
+ {
+ /* check if socket is alive */
+ int error = 0;
+ socklen_t len = sizeof (error);
+ int fd = UNIX_GET_FD(vui->clib_file_index);
+ int retval =
+ getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
+
+ if (retval)
+ {
+ DBG_SOCK ("getsockopt returned %d", retval);
+ vhost_user_if_disconnect (vui);
+ }
+ }
+ }
+ });
+ /* *INDENT-ON* */
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
+ .function = vhost_user_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-process",
+};
+/* *INDENT-ON* */
+
+/**
+ * Disables and reset interface structure.
+ * It can then be either init again, or removed from used interfaces.
+ */
+static void
+vhost_user_term_if (vhost_user_intf_t * vui)
+{
+ int q;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ // disconnect interface sockets
+ vhost_user_if_disconnect (vui);
+ vhost_user_update_iface_state (vui);
+
+ for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ {
+ clib_mem_free ((void *) vui->vring_locks[q]);
+ }
+
+ if (vui->unix_server_index != ~0)
+ {
+ //Close server socket
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->unix_server_index);
+ clib_file_del (&file_main, uf);
+ vui->unix_server_index = ~0;
+ unlink (vui->sock_filename);
+ }
+
+ mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index);
+}
+
+int
+vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ int rv = 0;
+ vnet_hw_interface_t *hwif;
+ u16 *queue;
+
+ if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
+ hwif->dev_class_index != vhost_user_dev_class.index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ DBG_SOCK ("Deleting vhost-user interface %s (instance %d)",
+ hwif->name, hwif->dev_instance);
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *txvq;
+
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ if ((vum->ifq_count > 0) &&
+ ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ {
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ break;
+ }
+ }
+ }
+
+ // Disable and reset interface
+ vhost_user_term_if (vui);
+
+ // Reset renumbered iface
+ if (hwif->dev_instance <
+ vec_len (vum->show_dev_instance_by_real_dev_instance))
+ vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
+
+ // Delete ethernet interface
+ ethernet_delete_interface (vnm, vui->hw_if_index);
+
+ // Back to pool
+ pool_put (vum->vhost_user_interfaces, vui);
+
+ return rv;
+}
+
+static clib_error_t *
+vhost_user_exit (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vhost_user_delete_if (vnm, vm, vui->sw_if_index);
+ });
+ /* *INDENT-ON* */
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
+
+/**
+ * Open server unix socket on specified sock_filename.
+ */
+static int
+vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
+{
+ int rv = 0;
+ struct sockaddr_un un = { };
+ int fd;
+ /* create listening socket */
+ if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ un.sun_family = AF_UNIX;
+ strncpy ((char *) un.sun_path, (char *) sock_filename,
+ sizeof (un.sun_path) - 1);
+
+ /* remove if exists */
+ unlink ((char *) sock_filename);
+
+ if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_2;
+ goto error;
+ }
+
+ if (listen (fd, 1) == -1)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ goto error;
+ }
+
+ *sock_fd = fd;
+ return 0;
+
+error:
+ close (fd);
+ return rv;
+}
+
+/**
+ * Create ethernet interface for vhost user interface.
+ */
+static void
+vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_intf_t * vui, u8 * hwaddress)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u8 hwaddr[6];
+ clib_error_t *error;
+
+ /* create hw and sw interface */
+ if (hwaddress)
+ {
+ clib_memcpy (hwaddr, hwaddress, 6);
+ }
+ else
+ {
+ random_u32 (&vum->random);
+ clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
+ hwaddr[0] = 2;
+ hwaddr[1] = 0xfe;
+ }
+
+ error = ethernet_register_interface
+ (vnm,
+ vhost_user_dev_class.index,
+ vui - vum->vhost_user_interfaces /* device instance */ ,
+ hwaddr /* ethernet address */ ,
+ &vui->hw_if_index, 0 /* flag change */ );
+
+ if (error)
+ clib_error_report (error);
+
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+}
+
+/*
+ * Initialize vui with specified attributes
+ */
+static void
+vhost_user_vui_init (vnet_main_t * vnm,
+ vhost_user_intf_t * vui,
+ int server_sock_fd,
+ const char *sock_filename,
+ u64 feature_mask, u32 * sw_if_index)
+{
+ vnet_sw_interface_t *sw;
+ int q;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vnet_hw_interface_t *hw;
+
+ hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
+ sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
+ if (server_sock_fd != -1)
+ {
+ clib_file_t template = { 0 };
+ template.read_function = vhost_user_socksvr_accept_ready;
+ template.file_descriptor = server_sock_fd;
+ template.private_data = vui - vum->vhost_user_interfaces; //hw index
+ vui->unix_server_index = clib_file_add (&file_main, &template);
+ }
+ else
+ {
+ vui->unix_server_index = ~0;
+ }
+
+ vui->sw_if_index = sw->sw_if_index;
+ strncpy (vui->sock_filename, sock_filename,
+ ARRAY_LEN (vui->sock_filename) - 1);
+ vui->sock_errno = 0;
+ vui->is_up = 0;
+ vui->feature_mask = feature_mask;
+ vui->clib_file_index = ~0;
+ vui->log_base_addr = 0;
+ vui->if_index = vui - vum->vhost_user_interfaces;
+ mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index, 0);
+
+ for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ vhost_user_vring_init (vui, q);
+
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+
+ if (sw_if_index)
+ *sw_if_index = vui->sw_if_index;
+
+ for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ {
+ vui->vring_locks[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ memset ((void *) vui->vring_locks[q], 0, CLIB_CACHE_LINE_BYTES);
+ }
+
+ vec_validate (vui->per_cpu_tx_qid,
+ vlib_get_thread_main ()->n_vlib_mains - 1);
+ vhost_user_tx_thread_placement (vui);
+}
+
+int
+vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
+ const char *sock_filename,
+ u8 is_server,
+ u32 * sw_if_index,
+ u64 feature_mask,
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
+{
+ vhost_user_intf_t *vui = NULL;
+ u32 sw_if_idx = ~0;
+ int rv = 0;
+ int server_sock_fd = -1;
+ vhost_user_main_t *vum = &vhost_user_main;
+ uword *if_index;
+
+ if (sock_filename == NULL || !(strlen (sock_filename) > 0))
+ {
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
+ if (if_index)
+ {
+ if (sw_if_index)
+ {
+ vui = &vum->vhost_user_interfaces[*if_index];
+ *sw_if_index = vui->sw_if_index;
+ }
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+ }
+
+ if (is_server)
+ {
+ if ((rv =
+ vhost_user_init_server_sock (sock_filename, &server_sock_fd)) != 0)
+ {
+ return rv;
+ }
+ }
+
+ pool_get (vhost_user_main.vhost_user_interfaces, vui);
+
+ vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
+ vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
+ feature_mask, &sw_if_idx);
+
+ if (renumber)
+ vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
+
+ if (sw_if_index)
+ *sw_if_index = sw_if_idx;
+
+ // Process node must connect
+ vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ return rv;
+}
+
+int
+vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
+ const char *sock_filename,
+ u8 is_server,
+ u32 sw_if_index,
+ u64 feature_mask, u8 renumber, u32 custom_dev_instance)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui = NULL;
+ u32 sw_if_idx = ~0;
+ int server_sock_fd = -1;
+ int rv = 0;
+ vnet_hw_interface_t *hwif;
+ uword *if_index;
+
+ if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
+ hwif->dev_class_index != vhost_user_dev_class.index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (sock_filename == NULL || !(strlen (sock_filename) > 0))
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
+ vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+
+ /*
+ * Disallow changing the interface to have the same path name
+ * as other interface
+ */
+ if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
+ if (if_index && (*if_index != vui->if_index))
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+
+ // First try to open server socket
+ if (is_server)
+ if ((rv = vhost_user_init_server_sock (sock_filename,
+ &server_sock_fd)) != 0)
+ return rv;
+
+ vhost_user_term_if (vui);
+ vhost_user_vui_init (vnm, vui, server_sock_fd,
+ sock_filename, feature_mask, &sw_if_idx);
+
+ if (renumber)
+ vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
+
+ // Process node must connect
+ vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ return rv;
+}
+
+clib_error_t *
+vhost_user_connect_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *sock_filename = NULL;
+ u32 sw_if_index;
+ u8 is_server = 0;
+ u64 feature_mask = (u64) ~ (0ULL);
+ u8 renumber = 0;
+ u32 custom_dev_instance = ~0;
+ u8 hwaddr[6];
+ u8 *hw = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "socket %s", &sock_filename))
+ ;
+ else if (unformat (line_input, "server"))
+ is_server = 1;
+ else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
+ ;
+ else
+ if (unformat
+ (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
+ hw = hwaddr;
+ else if (unformat (line_input, "renumber %d", &custom_dev_instance))
+ {
+ renumber = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vnet_main_t *vnm = vnet_get_main ();
+
+ int rv;
+ if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
+ is_server, &sw_if_index, feature_mask,
+ renumber, custom_dev_instance, hw)))
+ {
+ error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+
+done:
+ vec_free (sock_filename);
+ unformat_free (line_input);
+
+ return error;
+}
+
+clib_error_t *
+vhost_user_delete_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat
+ (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ {
+ vnet_hw_interface_t *hwif =
+ vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hwif == NULL ||
+ vhost_user_dev_class.index != hwif->dev_class_index)
+ {
+ error = clib_error_return (0, "Not a vhost interface");
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vhost_user_delete_if (vnm, vm, sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+int
+vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_intf_details_t ** out_vuids)
+{
+ int rv = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ vhost_user_intf_details_t *r_vuids = NULL;
+ vhost_user_intf_details_t *vuid = NULL;
+ u32 *hw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ u8 *s = NULL;
+ int i;
+
+ if (!out_vuids)
+ return -1;
+
+ pool_foreach (vui, vum->vhost_user_interfaces,
+ vec_add1 (hw_if_indices, vui->hw_if_index);
+ );
+
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
+
+ vec_add2 (r_vuids, vuid, 1);
+ vuid->sw_if_index = vui->sw_if_index;
+ vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
+ vuid->features = vui->features;
+ vuid->num_regions = vui->nregions;
+ vuid->is_server = vui->unix_server_index != ~0;
+ vuid->sock_errno = vui->sock_errno;
+ strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
+ ARRAY_LEN (vuid->sock_filename) - 1);
+
+ s = format (s, "%v%c", hi->name, 0);
+
+ strncpy ((char *) vuid->if_name, (char *) s,
+ ARRAY_LEN (vuid->if_name) - 1);
+ _vec_len (s) = 0;
+ }
+
+ vec_free (s);
+ vec_free (hw_if_indices);
+
+ *out_vuids = r_vuids;
+
+ return rv;
+}
+
+clib_error_t *
+show_vhost_user_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ u32 hw_if_index, *hw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ u16 *queue;
+ u32 ci;
+ int i, j, q;
+ int show_descr = 0;
+ struct feat_struct
+ {
+ u8 bit;
+ char *str;
+ };
+ struct feat_struct *feat_entry;
+
+ static struct feat_struct feat_array[] = {
+#define _(s,b) { .str = #s, .bit = b, },
+ foreach_virtio_net_feature
+#undef _
+ {.str = NULL}
+ };
+
+#define foreach_protocol_feature \
+ _(VHOST_USER_PROTOCOL_F_MQ) \
+ _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+
+ static struct feat_struct proto_feat_array[] = {
+#define _(s) { .str = #s, .bit = s},
+ foreach_protocol_feature
+#undef _
+ {.str = NULL}
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ vec_add1 (hw_if_indices, hw_if_index);
+ }
+ else if (unformat (input, "descriptors") || unformat (input, "desc"))
+ show_descr = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ if (vec_len (hw_if_indices) == 0)
+ {
+ pool_foreach (vui, vum->vhost_user_interfaces,
+ vec_add1 (hw_if_indices, vui->hw_if_index);
+ );
+ }
+ vlib_cli_output (vm, "Virtio vhost-user interfaces");
+ vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
+ vum->coalesce_frames, vum->coalesce_time);
+ vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d",
+ vum->ifq_count);
+
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
+ vlib_cli_output (vm, "Interface: %s (ifindex %d)",
+ hi->name, hw_if_indices[i]);
+
+ vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
+ " features mask (0x%llx): \n"
+ " features (0x%llx): \n",
+ vui->virtio_net_hdr_sz, vui->feature_mask,
+ vui->features);
+
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vui->features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+
+ vlib_cli_output (vm, " protocol features (0x%llx)",
+ vui->protocol_features);
+ feat_entry = (struct feat_struct *) &proto_feat_array;
+ while (feat_entry->str)
+ {
+ if (vui->protocol_features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+
+ vlib_cli_output (vm, "\n");
+
+ vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
+ vui->sock_filename,
+ (vui->unix_server_index != ~0) ? "server" : "client",
+ strerror (vui->sock_errno));
+
+ vlib_cli_output (vm, " rx placement: ");
+
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ uword thread_index;
+ vnet_hw_interface_rx_mode mode;
+
+ thread_index = vnet_get_device_input_thread_index (vnm,
+ vui->hw_if_index,
+ *queue);
+ vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
+ vlib_cli_output (vm, " thread %d on vring %d, %U\n",
+ thread_index, VHOST_VRING_IDX_TX (*queue),
+ format_vnet_hw_interface_rx_mode, mode);
+ }
+
+ vlib_cli_output (vm, " tx placement: %s\n",
+ vui->use_tx_spinlock ? "spin-lock" : "lock-free");
+
+ vec_foreach_index (ci, vui->per_cpu_tx_qid)
+ {
+ vlib_cli_output (vm, " thread %d on vring %d\n", ci,
+ VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci]));
+ }
+
+ vlib_cli_output (vm, "\n");
+
+ vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
+
+ if (vui->nregions)
+ {
+ vlib_cli_output (vm,
+ " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
+ vlib_cli_output (vm,
+ " ====== ===== ================== ================== ================== ================== ==================\n");
+ }
+ for (j = 0; j < vui->nregions; j++)
+ {
+ vlib_cli_output (vm,
+ " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ j, vui->region_mmap_fd[j],
+ vui->regions[j].guest_phys_addr,
+ vui->regions[j].memory_size,
+ vui->regions[j].userspace_addr,
+ vui->regions[j].mmap_offset,
+ pointer_to_uword (vui->region_mmap_addr[j]));
+ }
+ for (q = 0; q < VHOST_VRING_MAX_N; q++)
+ {
+ if (!vui->vrings[q].started)
+ continue;
+
+ vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
+ (q & 1) ? "RX" : "TX",
+ vui->vrings[q].enabled ? "" : " disabled");
+
+ vlib_cli_output (vm,
+ " qsz %d last_avail_idx %d last_used_idx %d\n",
+ vui->vrings[q].qsz_mask + 1,
+ vui->vrings[q].last_avail_idx,
+ vui->vrings[q].last_used_idx);
+
+ if (vui->vrings[q].avail && vui->vrings[q].used)
+ vlib_cli_output (vm,
+ " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
+ vui->vrings[q].avail->flags,
+ vui->vrings[q].avail->idx,
+ vui->vrings[q].used->flags,
+ vui->vrings[q].used->idx);
+
+ int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
+ int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
+ vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
+ kickfd, callfd, vui->vrings[q].errfd);
+
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " id addr len flags next user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== ==================\n");
+ for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
+ {
+ u32 mem_hint = 0;
+ vlib_cli_output (vm,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
+ j, vui->vrings[q].desc[j].addr,
+ vui->vrings[q].desc[j].len,
+ vui->vrings[q].desc[j].flags,
+ vui->vrings[q].desc[j].next,
+ pointer_to_uword (map_guest_mem
+ (vui,
+ vui->vrings[q].desc[j].
+ addr, &mem_hint)));
+ }
+ }
+ }
+ vlib_cli_output (vm, "\n");
+ }
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/*
+ * CLI functions
+ */
+
+/*?
+ * Create a vHost User interface. Once created, a new virtual interface
+ * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
+ * is the next free index.
+ *
+ * There are several parameters associated with a vHost interface:
+ *
+ * - <b>socket <socket-filename></b> - Name of the linux socket used by QEMU/VM and
+ * VPP to manage the vHost interface. If socket does not already exist, VPP will
+ * create the socket.
+ *
+ * - <b>server</b> - Optional flag to indicate that VPP should be the server for the
+ * linux socket. If not provided, VPP will be the client.
+ *
+ * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
+ * startup. By default, all supported features will be advertised. Otherwise,
+ * provide the set of features desired.
+ * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
+ * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
+ * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
+ * - 0x000400000 (22) - VIRTIO_NET_F_MQ
+ * - 0x004000000 (26) - VHOST_F_LOG_ALL
+ * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
+ * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
+ * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
+ * - 0x100000000 (32) - VIRTIO_F_VERSION_1
+ *
+ * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * - <b>renumber <dev_instance></b> - Optional parameter which allows the instance
+ * in the name to be specified. If instance already exists, name will be used
+ * anyway and multiple instances will have the same name. Use with caution.
+ *
+ * - <b>mode [interrupt | polling]</b> - Optional parameter specifying
+ * the input thread polling policy.
+ *
+ * @cliexpar
+ * Example of how to create a vhost interface with VPP as the client and all features enabled:
+ * @cliexstart{create vhost-user socket /tmp/vhost1.sock}
+ * VirtualEthernet0/0/0
+ * @cliexend
+ * Example of how to create a vhost interface with VPP as the server and with just
+ * multiple queues enabled:
+ * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000}
+ * VirtualEthernet0/0/1
+ * @cliexend
+ * Once the vHost interface is created, enable the interface using:
+ * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
+ .path = "create vhost-user",
+ .short_help = "create vhost-user socket <socket-filename> [server] "
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
+ .function = vhost_user_connect_command_fn,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Delete a vHost User interface using the interface name or the
+ * software interface index. Use the '<em>show interface</em>'
+ * command to determine the software interface index. On deletion,
+ * the linux socket will not be deleted.
+ *
+ * @cliexpar
+ * Example of how to delete a vhost interface by name:
+ * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
+ * Example of how to delete a vhost interface by software interface index:
+ * @cliexcmd{delete vhost-user sw_if_index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
+ .path = "delete vhost-user",
+ .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
+ .function = vhost_user_delete_command_fn,
+};
+
+/*?
+ * Display the attributes of a single vHost User interface (provide interface
+ * name), multiple vHost User interfaces (provide a list of interface names seperated
+ * by spaces) or all Vhost User interfaces (omit an interface name to display all
+ * vHost interfaces).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to display a vhost interface:
+ * @cliexstart{show vhost-user VirtualEthernet0/0/0}
+ * Virtio vhost-user interfaces
+ * Global:
+ * coalesce frames 32 time 1e-3
+ * Interface: VirtualEthernet0/0/0 (ifindex 1)
+ * virtio_net_hdr_sz 12
+ * features mask (0xffffffffffffffff):
+ * features (0x50408000):
+ * VIRTIO_NET_F_MRG_RXBUF (15)
+ * VIRTIO_NET_F_MQ (22)
+ * VIRTIO_F_INDIRECT_DESC (28)
+ * VHOST_USER_F_PROTOCOL_FEATURES (30)
+ * protocol features (0x3)
+ * VHOST_USER_PROTOCOL_F_MQ (0)
+ * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
+ *
+ * socket filename /tmp/vhost1.sock type client errno "Success"
+ *
+ * rx placement:
+ * thread 1 on vring 1
+ * thread 1 on vring 5
+ * thread 2 on vring 3
+ * thread 2 on vring 7
+ * tx placement: spin-lock
+ * thread 0 on vring 0
+ * thread 1 on vring 2
+ * thread 2 on vring 0
+ *
+ * Memory regions (total 2)
+ * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
+ * ====== ===== ================== ================== ================== ================== ==================
+ * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000
+ * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000
+ *
+ * Virtqueue 0 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 62 callfd 64 errfd -1
+ *
+ * Virtqueue 1 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 65 callfd 66 errfd -1
+ *
+ * Virtqueue 2 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 63 callfd 70 errfd -1
+ *
+ * Virtqueue 3 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 72 callfd 74 errfd -1
+ *
+ * Virtqueue 4 (TX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 76 callfd 78 errfd -1
+ *
+ * Virtqueue 5 (RX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 80 callfd 82 errfd -1
+ *
+ * Virtqueue 6 (TX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 84 callfd 86 errfd -1
+ *
+ * Virtqueue 7 (RX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 88 callfd 90 errfd -1
+ *
+ * @cliexend
+ *
+ * The optional '<em>descriptors</em>' parameter will display the same output as
+ * the previous example but will include the descriptor table for each queue.
+ * The output is truncated below:
+ * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
+ * Virtio vhost-user interfaces
+ * Global:
+ * coalesce frames 32 time 1e-3
+ * Interface: VirtualEthernet0/0/0 (ifindex 1)
+ * virtio_net_hdr_sz 12
+ * features mask (0xffffffffffffffff):
+ * features (0x50408000):
+ * VIRTIO_NET_F_MRG_RXBUF (15)
+ * VIRTIO_NET_F_MQ (22)
+ * :
+ * Virtqueue 0 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 62 callfd 64 errfd -1
+ *
+ * descriptor table:
+ * id addr len flags next user_addr
+ * ===== ================== ===== ====== ===== ==================
+ * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
+ * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
+ * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
+ * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
+ * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
+ * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
+ * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
+ * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
+ * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
+ * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
+ * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
+ * :
+ * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
+ * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
+ * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
+ * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
+ * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
+ * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
+ * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
+ *
+ * Virtqueue 1 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * :
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
+ .path = "show vhost-user",
+ .short_help = "show vhost-user [<interface> [<interface> [..]]] [descriptors]",
+ .function = show_vhost_user_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+debug_vhost_user_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u8 onoff = 0;
+ u8 input_found = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "missing argument");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (input_found)
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if (unformat (line_input, "on"))
+ {
+ input_found = 1;
+ onoff = 1;
+ }
+ else if (unformat (line_input, "off"))
+ {
+ input_found = 1;
+ onoff = 0;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vum->debug = onoff;
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (debug_vhost_user_command, static) = {
+ .path = "debug vhost-user",
+ .short_help = "debug vhost-user <on | off>",
+ .function = debug_vhost_user_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
+ ;
+ else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
+ ;
+ else if (unformat (input, "dont-dump-memory"))
+ vum->dont_dump_vhost_user_memory = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+/* vhost-user { ... } configuration. */
+VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
+
+void
+vhost_user_unmap_all (void)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ if (vum->dont_dump_vhost_user_memory)
+ {
+ pool_foreach (vui, vum->vhost_user_interfaces,
+ unmap_all_mem_regions (vui);
+ );
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h
new file mode 100644
index 00000000..105b92b7
--- /dev/null
+++ b/src/vnet/devices/virtio/vhost-user.h
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VIRTIO_VHOST_USER_H__
+#define __VIRTIO_VHOST_USER_H__
+/* vhost-user data structures */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_USER_MSG_HDR_SZ 12
+#define VHOST_VRING_MAX_SIZE 32768
+#define VHOST_VRING_MAX_N 16 //8TX + 8RX
+#define VHOST_VRING_IDX_RX(qid) (2*qid)
+#define VHOST_VRING_IDX_TX(qid) (2*qid + 1)
+
+#define VHOST_USER_VRING_NOFD_MASK 0x100
+#define VIRTQ_DESC_F_NEXT 1
+#define VIRTQ_DESC_F_INDIRECT 4
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+#define VHOST_VRING_F_LOG 0
+
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
+
+/* If multiqueue is provided by host, then we suppport it. */
+#define VIRTIO_NET_CTRL_MQ 4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
+
+#define VRING_USED_F_NO_NOTIFY 1
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+#define foreach_virtio_net_feature \
+ _ (VIRTIO_NET_F_MRG_RXBUF, 15) \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17) \
+ _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) \
+ _ (VIRTIO_NET_F_MQ, 22) \
+ _ (VHOST_F_LOG_ALL, 26) \
+ _ (VIRTIO_F_ANY_LAYOUT, 27) \
+ _ (VIRTIO_F_INDIRECT_DESC, 28) \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
+ _ (VIRTIO_F_VERSION_1, 32)
+
+
+typedef enum
+{
+#define _(f,n) FEAT_##f = (n),
+ foreach_virtio_net_feature
+#undef _
+} virtio_net_feature_t;
+
+int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
+ const char *sock_filename, u8 is_server,
+ u32 * sw_if_index, u64 feature_mask,
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr);
+int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
+ const char *sock_filename, u8 is_server,
+ u32 sw_if_index, u64 feature_mask,
+ u8 renumber, u32 custom_dev_instance);
+int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
+ u32 sw_if_index);
+
+/* *INDENT-OFF* */
+typedef struct vhost_user_memory_region
+{
+ u64 guest_phys_addr;
+ u64 memory_size;
+ u64 userspace_addr;
+ u64 mmap_offset;
+} __attribute ((packed)) vhost_user_memory_region_t;
+
+typedef struct vhost_user_memory
+{
+ u32 nregions;
+ u32 padding;
+ vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute ((packed)) vhost_user_memory_t;
+
+typedef struct
+{
+ u32 index, num;
+} __attribute ((packed)) vhost_vring_state_t;
+
+typedef struct
+{
+ u32 index, flags;
+ u64 desc_user_addr, used_user_addr, avail_user_addr, log_guest_addr;
+} __attribute ((packed)) vhost_vring_addr_t;
+
+typedef struct vhost_user_log
+{
+ u64 size;
+ u64 offset;
+} __attribute ((packed)) vhost_user_log_t;
+
+typedef enum vhost_user_req
+{
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_MAX
+} vhost_user_req_t;
+
+// vring_desc I/O buffer descriptor
+typedef struct
+{
+ uint64_t addr; // packet data buffer address
+ uint32_t len; // packet data buffer size
+ uint16_t flags; // (see below)
+ uint16_t next; // optional index next descriptor in chain
+} __attribute ((packed)) vring_desc_t;
+
+typedef struct
+{
+ uint16_t flags;
+ volatile uint16_t idx;
+ uint16_t ring[VHOST_VRING_MAX_SIZE];
+} __attribute ((packed)) vring_avail_t;
+
+typedef struct
+{
+ uint16_t flags;
+ uint16_t idx;
+ struct /* vring_used_elem */
+ {
+ uint32_t id;
+ uint32_t len;
+ } ring[VHOST_VRING_MAX_SIZE];
+} __attribute ((packed)) vring_used_t;
+
+typedef struct
+{
+ u8 flags;
+ u8 gso_type;
+ u16 hdr_len;
+ u16 gso_size;
+ u16 csum_start;
+ u16 csum_offset;
+} __attribute ((packed)) virtio_net_hdr_t;
+
+typedef struct {
+ virtio_net_hdr_t hdr;
+ u16 num_buffers;
+} __attribute ((packed)) virtio_net_hdr_mrg_rxbuf_t;
+
+typedef struct vhost_user_msg {
+ vhost_user_req_t request;
+ u32 flags;
+ u32 size;
+ union
+ {
+ u64 u64;
+ vhost_vring_state_t state;
+ vhost_vring_addr_t addr;
+ vhost_user_memory_t memory;
+ vhost_user_log_t log;
+ };
+} __attribute ((packed)) vhost_user_msg_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u16 qsz_mask;
+ u16 last_avail_idx;
+ u16 last_used_idx;
+ u16 n_since_last_int;
+ vring_desc_t *desc;
+ vring_avail_t *avail;
+ vring_used_t *used;
+ f64 int_deadline;
+ u8 started;
+ u8 enabled;
+ u8 log_used;
+ //Put non-runtime in a different cache line
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ int errfd;
+ u32 callfd_idx;
+ u32 kickfd_idx;
+ u64 log_guest_addr;
+
+ /* The rx queue policy (interrupt/adaptive/polling) for this queue */
+ u32 mode;
+} vhost_user_vring_t;
+
+#define VHOST_USER_EVENT_START_TIMER 1
+#define VHOST_USER_EVENT_STOP_TIMER 2
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 is_up;
+ u32 admin_up;
+ u32 unix_server_index;
+ u32 clib_file_index;
+ char sock_filename[256];
+ int sock_errno;
+ uword if_index;
+ u32 hw_if_index, sw_if_index;
+
+ //Feature negotiation
+ u64 features;
+ u64 feature_mask;
+ u64 protocol_features;
+
+ //Memory region information
+ u32 nregions;
+ vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
+ void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
+ u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
+ u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
+ u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
+
+ //Virtual rings
+ vhost_user_vring_t vrings[VHOST_VRING_MAX_N];
+ volatile u32 *vring_locks[VHOST_VRING_MAX_N];
+
+ int virtio_net_hdr_sz;
+ int is_any_layout;
+
+ void *log_base_addr;
+ u64 log_size;
+
+ /* Whether to use spinlock or per_cpu_tx_qid assignment */
+ u8 use_tx_spinlock;
+ u16 *per_cpu_tx_qid;
+
+ /* Vector of active rx queues for this interface */
+ u16 *rx_queues;
+} vhost_user_intf_t;
+
+typedef struct
+{
+ uword dst;
+ uword src;
+ u32 len;
+} vhost_copy_t;
+
+typedef struct
+{
+ u16 qid; /** The interface queue index (Not the virtio vring idx) */
+ u16 device_index; /** The device index */
+ u32 virtio_ring_flags; /** Runtime queue flags **/
+ u16 first_desc_len; /** Length of the first data descriptor **/
+ virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
+} vhost_trace_t;
+
+
+#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
+#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
+
+typedef struct
+{
+ u32 rx_buffers_len;
+ u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
+
+ virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
+ vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
+
+ /* This is here so it doesn't end-up
+ * using stack or registers. */
+ vhost_trace_t *current_trace;
+} vhost_cpu_t;
+
+typedef struct
+{
+ mhash_t if_index_by_sock_name;
+ u32 mtu_bytes;
+ vhost_user_intf_t *vhost_user_interfaces;
+ u32 *show_dev_instance_by_real_dev_instance;
+ u32 coalesce_frames;
+ f64 coalesce_time;
+ int dont_dump_vhost_user_memory;
+
+ /** Per-CPU data for vhost-user */
+ vhost_cpu_t *cpus;
+
+ /** Pseudo random iterator */
+ u32 random;
+
+ /* The number of rx interface/queue pairs in interrupt mode */
+ u32 ifq_count;
+
+ /* debug on or off */
+ u8 debug;
+} vhost_user_main_t;
+
+typedef struct
+{
+ u8 if_name[64];
+ u32 sw_if_index;
+ u32 virtio_net_hdr_sz;
+ u64 features;
+ u8 is_server;
+ u8 sock_filename[256];
+ u32 num_regions;
+ int sock_errno;
+} vhost_user_intf_details_t;
+
+int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_intf_details_t ** out_vuids);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api
new file mode 100644
index 00000000..28d5e891
--- /dev/null
+++ b/src/vnet/devices/virtio/vhost_user.api
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief vhost-user interface create request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+ @param use_custom_mac - enable or disable the use of the provided hardware address
+ @param mac_address - hardware address to use if 'use_custom_mac' is set
+*/
+define create_vhost_user_if
+{
+ u32 client_index;
+ u32 context;
+ u8 is_server;
+ u8 sock_filename[256];
+ u8 renumber;
+ u32 custom_dev_instance;
+ u8 use_custom_mac;
+ u8 mac_address[6];
+ u8 tag[64];
+};
+
+/** \brief vhost-user interface create response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - interface the operation is applied to
+*/
+define create_vhost_user_if_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief vhost-user interface modify request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+*/
+autoreply define modify_vhost_user_if
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_server;
+ u8 sock_filename[256];
+ u8 renumber;
+ u32 custom_dev_instance;
+};
+
+/** \brief vhost-user interface delete request
+ @param client_index - opaque cookie to identify the sender
+*/
+autoreply define delete_vhost_user_if
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Vhost-user interface details structure (fix this)
+ @param sw_if_index - index of the interface
+ @param interface_name - name of interface
+ @param virtio_net_hdr_sz - net header size
+ @param features - interface features
+ @param is_server - vhost-user server socket
+ @param sock_filename - socket filename
+ @param num_regions - number of used memory regions
+*/
+define sw_interface_vhost_user_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u32 virtio_net_hdr_sz;
+ u64 features;
+ u8 is_server;
+ u8 sock_filename[256];
+ u32 num_regions;
+ i32 sock_errno;
+};
+
+define sw_interface_vhost_user_dump
+{
+ u32 client_index;
+ u32 context;
+};
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
new file mode 100644
index 00000000..78599241
--- /dev/null
+++ b/src/vnet/devices/virtio/vhost_user_api.c
@@ -0,0 +1,254 @@
+/*
+ *------------------------------------------------------------------
+ * vhost-user_api.c - vhost-user api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/devices/virtio/vhost-user.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(CREATE_VHOST_USER_IF, create_vhost_user_if) \
+_(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \
+_(DELETE_VHOST_USER_IF, delete_vhost_user_if) \
+_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump)
+
+/*
+ * WARNING: replicated pending api refactor completion
+ */
+static void
+send_sw_interface_event_deleted (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index)
+{
+ vl_api_sw_interface_event_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ mp->admin_up_down = 0;
+ mp->link_up_down = 0;
+ mp->deleted = 1;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_create_vhost_user_if_reply_t *rmp;
+ u32 sw_if_index = (u32) ~ 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename,
+ mp->is_server, &sw_if_index, (u64) ~ 0,
+ mp->renumber, ntohl (mp->custom_dev_instance),
+ (mp->use_custom_mac) ? mp->mac_address : NULL);
+
+ /* Remember an interface tag for the new interface */
+ if (rv == 0)
+ {
+ /* If a tag was supplied... */
+ if (mp->tag[0])
+ {
+ /* Make sure it's a proper C-string */
+ mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
+ u8 *tag = format (0, "%s%c", mp->tag, 0);
+ vnet_set_sw_interface_tag (vnm, tag, sw_if_index);
+ }
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_modify_vhost_user_if_reply_t *rmp;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename,
+ mp->is_server, sw_if_index, (u64) ~ 0,
+ mp->renumber, ntohl (mp->custom_dev_instance));
+
+ REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
+}
+
+static void
+vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_delete_vhost_user_if_reply_t *rmp;
+ vpe_api_main_t *vam = &vpe_api_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ rv = vhost_user_delete_if (vnm, vm, sw_if_index);
+
+ REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY);
+ if (!rv)
+ {
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ vnet_clear_sw_interface_tag (vnm, sw_if_index);
+ send_sw_interface_event_deleted (vam, q, sw_if_index);
+ }
+}
+
+static void
+send_sw_interface_vhost_user_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ vhost_user_intf_details_t * vui,
+ u32 context)
+{
+ vl_api_sw_interface_vhost_user_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_VHOST_USER_DETAILS);
+ mp->sw_if_index = ntohl (vui->sw_if_index);
+ mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
+ mp->features = clib_net_to_host_u64 (vui->features);
+ mp->is_server = vui->is_server;
+ mp->num_regions = ntohl (vui->num_regions);
+ mp->sock_errno = ntohl (vui->sock_errno);
+ mp->context = context;
+
+ strncpy ((char *) mp->sock_filename,
+ (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1);
+ strncpy ((char *) mp->interface_name,
+ (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+ vl_api_sw_interface_vhost_user_dump_t_handler
+ (vl_api_sw_interface_vhost_user_dump_t * mp)
+{
+ int rv = 0;
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vhost_user_intf_details_t *ifaces = NULL;
+ vhost_user_intf_details_t *vuid = NULL;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ rv = vhost_user_dump_ifs (vnm, vm, &ifaces);
+ if (rv)
+ return;
+
+ vec_foreach (vuid, ifaces)
+ {
+ send_sw_interface_vhost_user_details (am, q, vuid, mp->context);
+ }
+ vec_free (ifaces);
+}
+
+/*
+ * vhost-user_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_vhost_user;
+#undef _
+}
+
+static clib_error_t *
+vhost_user_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vhost_user_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c
new file mode 100644
index 00000000..5986438b
--- /dev/null
+++ b/src/vnet/dhcp/client.c
@@ -0,0 +1,1135 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/dhcp/client.h>
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/fib/fib_table.h>
+
+dhcp_client_main_t dhcp_client_main;
+static u8 * format_dhcp_client_state (u8 * s, va_list * va);
+static vlib_node_registration_t dhcp_client_process_node;
+
+static void
+dhcp_client_add_rx_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
+{
+ /* Install a local entry for the offered address */
+ fib_prefix_t rx =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4 = c->leased_address,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ fib_table_entry_special_add(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &rx,
+ FIB_SOURCE_DHCP,
+ (FIB_ENTRY_FLAG_LOCAL));
+
+ /* And add the server's address as uRPF exempt so we can accept
+ * local packets from it */
+ fib_prefix_t server =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4 = c->dhcp_server,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ fib_table_entry_special_add(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &server,
+ FIB_SOURCE_URPF_EXEMPT,
+ (FIB_ENTRY_FLAG_DROP));
+}
+
+static void
+dhcp_client_remove_rx_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
+{
+ fib_prefix_t rx =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4 = c->leased_address,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &rx,
+ FIB_SOURCE_DHCP);
+ fib_prefix_t server =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4 = c->dhcp_server,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &server,
+ FIB_SOURCE_URPF_EXEMPT);
+}
+
+static void
+dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
+{
+ /*
+ * Install any/all info gleaned from dhcp, right here
+ */
+ ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index,
+ (void *) &c->leased_address,
+ c->subnet_mask_width, 0 /*is_del*/);
+}
+
+static void
+dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
+{
+ /*
+ * Remove any/all info gleaned from dhcp, right here. Caller(s)
+ * have not wiped out the info yet.
+ */
+
+ ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index,
+ (void *) &c->leased_address,
+ c->subnet_mask_width, 1 /*is_del*/);
+}
+
+static void
+set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c)
+{
+ /* Acquire the L2 rewrite string for the indicated sw_if_index */
+ c->l2_rewrite = vnet_build_rewrite_for_sw_interface(
+ dcm->vnet_main,
+ c->sw_if_index,
+ VNET_LINK_IP4,
+ 0 /* broadcast */);
+}
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+static void
+dhcp_client_proc_callback (uword * client_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event (vm, dhcp_client_process_node.index,
+ EVENT_DHCP_CLIENT_WAKEUP, *client_index);
+}
+
+/*
+ * dhcp_client_for_us - server-to-client callback.
+ * Called from proxy_node.c:dhcp_proxy_to_client_input().
+ * This function first decides that the packet in question is
+ * actually for the dhcp client code in case we're also acting as
+ * a dhcp proxy. Ay caramba, what a folly!
+ */
+int dhcp_client_for_us (u32 bi, vlib_buffer_t * b,
+ ip4_header_t * ip,
+ udp_header_t * udp,
+ dhcp_header_t * dhcp)
+{
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ vlib_main_t * vm = dcm->vlib_main;
+ dhcp_client_t * c;
+ uword * p;
+ f64 now = vlib_time_now (dcm->vlib_main);
+ u8 dhcp_message_type = 0;
+ dhcp_option_t * o;
+
+ /*
+ * Doing dhcp client on this interface?
+ * Presumably we will always receive dhcp clnt for-us pkts on
+ * the interface that's asking for an address.
+ */
+ p = hash_get (dcm->client_by_sw_if_index,
+ vnet_buffer(b)->sw_if_index [VLIB_RX]);
+ if (p == 0)
+ return 0; /* no */
+
+ c = pool_elt_at_index (dcm->clients, p[0]);
+
+ /* Mixing dhcp relay and dhcp proxy? DGMS... */
+ if (c->state == DHCP_BOUND && c->retry_count == 0)
+ return 0;
+
+ /* parse through the packet, learn what we can */
+ if (dhcp->your_ip_address.as_u32)
+ c->leased_address.as_u32 = dhcp->your_ip_address.as_u32;
+
+ c->dhcp_server.as_u32 = dhcp->server_ip_address.as_u32;
+
+ o = (dhcp_option_t *) dhcp->options;
+
+ while (o->option != 0xFF /* end of options */ &&
+ (u8 *) o < (b->data + b->current_data + b->current_length))
+ {
+ switch (o->option)
+ {
+ case 53: /* dhcp message type */
+ dhcp_message_type = o->data[0];
+ break;
+
+ case 51: /* lease time */
+ {
+ u32 lease_time_in_seconds =
+ clib_host_to_net_u32 (o->data_as_u32[0]);
+ c->lease_expires = now + (f64) lease_time_in_seconds;
+ c->lease_lifetime = lease_time_in_seconds;
+ /* Set a sensible default, in case we don't get opt 58 */
+ c->lease_renewal_interval = lease_time_in_seconds / 2;
+ }
+ break;
+
+ case 58: /* lease renew time in seconds */
+ {
+ u32 lease_renew_time_in_seconds =
+ clib_host_to_net_u32 (o->data_as_u32[0]);
+ c->lease_renewal_interval = lease_renew_time_in_seconds;
+ }
+ break;
+
+ case 54: /* dhcp server address */
+ c->dhcp_server.as_u32 = o->data_as_u32[0];
+ break;
+
+ case 1: /* subnet mask */
+ {
+ u32 subnet_mask =
+ clib_host_to_net_u32 (o->data_as_u32[0]);
+ c->subnet_mask_width = count_set_bits (subnet_mask);
+ }
+ break;
+ case 3: /* router address */
+ {
+ u32 router_address = o->data_as_u32[0];
+ c->router_address.as_u32 = router_address;
+ }
+ break;
+
+ case 12: /* hostname */
+ {
+ /* Replace the existing hostname if necessary */
+ vec_free (c->hostname);
+ vec_validate (c->hostname, o->length - 1);
+ clib_memcpy (c->hostname, o->data, o->length);
+ }
+ break;
+
+ /* $$$$ Your message in this space, parse more options */
+ default:
+ break;
+ }
+
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ switch (c->state)
+ {
+ case DHCP_DISCOVER:
+ if (dhcp_message_type != DHCP_PACKET_OFFER)
+ {
+ clib_warning ("sw_if_index %d state %U message type %d",
+ c->sw_if_index, format_dhcp_client_state,
+ c->state, dhcp_message_type);
+ c->next_transmit = now + 5.0;
+ break;
+ }
+ /*
+ * in order to accept unicasted ACKs we need to configure the offered
+ * address on the interface. However, at this point we may not know the
+ * subnet-mask (an OFFER may not contain it). So add a temporary receice
+ * and uRPF excempt entry
+ */
+ dhcp_client_add_rx_address (dcm, c);
+
+ /* Received an offer, go send a request */
+ c->state = DHCP_REQUEST;
+ c->retry_count = 0;
+ c->next_transmit = 0; /* send right now... */
+ /* Poke the client process, which will send the request */
+ uword client_id = c - dcm->clients;
+ vl_api_rpc_call_main_thread (dhcp_client_proc_callback,
+ (u8 *) &client_id, sizeof (uword));
+ break;
+
+ case DHCP_BOUND:
+ case DHCP_REQUEST:
+ if (dhcp_message_type != DHCP_PACKET_ACK)
+ {
+ clib_warning ("sw_if_index %d state %U message type %d",
+ c->sw_if_index, format_dhcp_client_state,
+ c->state, dhcp_message_type);
+ c->next_transmit = now + 5.0;
+ break;
+ }
+ /* OK, we own the address (etc), add to the routing table(s) */
+ if (c->state == DHCP_REQUEST)
+ {
+ void (*fp)(u32, u32, u8 *, u8, u8, u8 *, u8 *, u8 *) = c->event_callback;
+
+ /* replace the temporary RX address with the correct subnet */
+ dhcp_client_remove_rx_address (dcm, c);
+ dhcp_client_acquire_address (dcm, c);
+
+ /*
+ * Configure default IP route:
+ */
+ if (c->router_address.as_u32)
+ {
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ ip46_address_t nh =
+ {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_add (fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ NULL, // no label stack
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ /*
+ * Call the user's event callback to report DHCP information
+ */
+ if (fp)
+ (*fp) (c->client_index, /* clinet index */
+ c->pid,
+ c->hostname,
+ c->subnet_mask_width,
+ 0, /* is_ipv6 */
+ (u8 *)&c->leased_address, /* host IP address */
+ (u8 *)&c->router_address, /* router IP address */
+ (u8 *)(c->l2_rewrite + 6));/* host MAC address */
+ }
+
+ c->state = DHCP_BOUND;
+ c->retry_count = 0;
+ c->next_transmit = now + (f64) c->lease_renewal_interval;
+ c->lease_expires = now + (f64) c->lease_lifetime;
+ break;
+
+ default:
+ clib_warning ("client %d bogus state %d",
+ c - dcm->clients, c->state);
+ break;
+ }
+
+ /* drop the pkt, return 1 */
+ vlib_buffer_free (vm, &bi, 1);
+ return 1;
+}
+
+static void
+send_dhcp_pkt (dhcp_client_main_t * dcm, dhcp_client_t * c,
+ dhcp_packet_type_t type, int is_broadcast)
+{
+ vlib_main_t * vm = dcm->vlib_main;
+ vnet_main_t * vnm = dcm->vnet_main;
+ vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index);
+ vnet_sw_interface_t * sup_sw
+ = vnet_get_sup_sw_interface (vnm, c->sw_if_index);
+ vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, c->sw_if_index);
+ vlib_buffer_t * b;
+ u32 bi;
+ ip4_header_t * ip;
+ udp_header_t * udp;
+ dhcp_header_t * dhcp;
+ u32 * to_next;
+ vlib_frame_t * f;
+ dhcp_option_t * o;
+ u16 udp_length, ip_length;
+
+ /* Interface(s) down? */
+ if ((hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
+ return;
+ if ((sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0)
+ return;
+ if ((sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0)
+ return;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1) {
+ clib_warning ("buffer allocation failure");
+ c->next_transmit = 0;
+ return;
+ }
+
+ /* Build a dhcpv4 pkt from whole cloth */
+ b = vlib_get_buffer (vm, bi);
+
+ ASSERT (b->current_data == 0);
+
+ vnet_buffer(b)->sw_if_index[VLIB_RX] = c->sw_if_index;
+ if (is_broadcast)
+ {
+ f = vlib_get_frame_to_node (vm, hw->output_node_index);
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = c->sw_if_index;
+ clib_memcpy (b->data, c->l2_rewrite, vec_len(c->l2_rewrite));
+ ip = (void *)
+ (((u8 *)vlib_buffer_get_current (b)) + vec_len (c->l2_rewrite));
+ }
+ else
+ {
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */
+ ip = vlib_buffer_get_current (b);
+ }
+
+ /* Enqueue the packet right now */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+
+ if (is_broadcast)
+ vlib_put_frame_to_node (vm, hw->output_node_index, f);
+ else
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+
+ udp = (udp_header_t *)(ip+1);
+ dhcp = (dhcp_header_t *)(udp+1);
+
+ /* $$$ optimize, maybe */
+ memset (ip, 0, sizeof (*ip) + sizeof (*udp) + sizeof (*dhcp));
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 128;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ if (is_broadcast)
+ {
+ /* src = 0.0.0.0, dst = 255.255.255.255 */
+ ip->dst_address.as_u32 = ~0;
+ }
+ else
+ {
+ /* Renewing an active lease, plain old ip4 src/dst */
+ ip->src_address.as_u32 = c->leased_address.as_u32;
+ ip->dst_address.as_u32 = c->dhcp_server.as_u32;
+ }
+
+ udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server);
+
+ /* Send the interface MAC address */
+ clib_memcpy (dhcp->client_hardware_address, c->l2_rewrite + 6, 6);
+
+ /* Lease renewal, set up client_ip_address */
+ if (is_broadcast == 0)
+ dhcp->client_ip_address.as_u32 = c->leased_address.as_u32;
+
+ dhcp->opcode = 1; /* request, all we send */
+ dhcp->hardware_type = 1; /* ethernet */
+ dhcp->hardware_address_length = 6;
+ dhcp->transaction_identifier = c->transaction_id;
+ dhcp->flags = clib_host_to_net_u16(is_broadcast ? DHCP_FLAG_BROADCAST : 0);
+ dhcp->magic_cookie.as_u32 = DHCP_MAGIC;
+
+ o = (dhcp_option_t * )dhcp->options;
+
+ /* Send option 53, the DHCP message type */
+ o->option = DHCP_PACKET_OPTION_MSG_TYPE;
+ o->length = 1;
+ o->data[0] = type;
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+
+ /* Send option 57, max msg length */
+ if (0 /* not needed, apparently */)
+ {
+ o->option = 57;
+ o->length = 2;
+ {
+ u16 *o2 = (u16 *) o->data;
+ *o2 = clib_host_to_net_u16 (1152);
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+ }
+
+ /*
+ * If server ip address is available with non-zero value,
+ * option 54 (DHCP Server Identifier) is sent.
+ */
+ if (c->dhcp_server.as_u32)
+ {
+ o->option = 54;
+ o->length = 4;
+ clib_memcpy (o->data, &c->dhcp_server.as_u32, 4);
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ /* send option 50, requested IP address */
+ if (c->leased_address.as_u32)
+ {
+ o->option = 50;
+ o->length = 4;
+ clib_memcpy (o->data, &c->leased_address.as_u32, 4);
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ /* send option 12, host name */
+ if (vec_len (c->hostname))
+ {
+ o->option = 12;
+ o->length = vec_len (c->hostname);
+ clib_memcpy (o->data, c->hostname, vec_len (c->hostname));
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ /* send option 61, client_id */
+ if (vec_len (c->client_identifier))
+ {
+ o->option = 61;
+ o->length = vec_len (c->client_identifier);
+ clib_memcpy (o->data, c->client_identifier,
+ vec_len (c->client_identifier));
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ /* $$ maybe send the client s/w version if anyone cares */
+
+ /*
+ * send option 55, parameter request list
+ * The current list - see below, matches the Linux dhcp client's list
+ * Any specific dhcp server config and/or dhcp server may or may
+ * not yield specific options.
+ */
+ o->option = 55;
+ o->length = vec_len (c->option_55_data);
+ clib_memcpy (o->data, c->option_55_data, vec_len(c->option_55_data));
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+
+ /* End of list */
+ o->option = 0xff;
+ o->length = 0;
+ o++;
+
+ b->current_length = ((u8 *)o) - b->data;
+
+ /* fix ip length, checksum and udp length */
+ ip_length = vlib_buffer_length_in_chain (vm, b);
+ if (is_broadcast)
+ ip_length -= vec_len (c->l2_rewrite);
+
+ ip->length = clib_host_to_net_u16(ip_length);
+ ip->checksum = ip4_header_checksum(ip);
+
+ udp_length = ip_length - (sizeof (*ip));
+ udp->length = clib_host_to_net_u16 (udp_length);
+}
+
+static int
+dhcp_discover_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now)
+{
+ /*
+ * State machine "DISCOVER" state. Send a dhcp discover packet,
+ * eventually back off the retry rate.
+ */
+ send_dhcp_pkt (dcm, c, DHCP_PACKET_DISCOVER, 1 /* is_broadcast */);
+
+ c->retry_count++;
+ if (c->retry_count > 10)
+ c->next_transmit = now + 5.0;
+ else
+ c->next_transmit = now + 1.0;
+ return 0;
+}
+
+static int
+dhcp_request_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now)
+{
+ /*
+ * State machine "REQUEST" state. Send a dhcp request packet,
+ * eventually drop back to the discover state.
+ */
+ send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 1 /* is_broadcast */);
+
+ c->retry_count++;
+ if (c->retry_count > 7 /* lucky you */)
+ {
+ c->state = DHCP_DISCOVER;
+ c->next_transmit = now;
+ c->retry_count = 0;
+ return 1;
+ }
+ c->next_transmit = now + 1.0;
+ return 0;
+}
+
+static int
+dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now)
+{
+ /*
+ * State machine "BOUND" state. Send a dhcp request packet,
+ * eventually, when the lease expires, forget the dhcp data
+ * and go back to the stone age.
+ */
+ send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 0 /* is_broadcast */);
+
+ c->retry_count++;
+ if (c->retry_count > 10)
+ c->next_transmit = now + 5.0;
+ else
+ c->next_transmit = now + 1.0;
+
+ if (now > c->lease_expires)
+ {
+ if (c->router_address.as_u32)
+ {
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ ip46_address_t nh = {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ DPO_PROTO_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ dhcp_client_release_address (dcm, c);
+ c->state = DHCP_DISCOVER;
+ c->next_transmit = now;
+ c->retry_count = 0;
+ /* Wipe out any memory of the address we had... */
+ c->leased_address.as_u32 = 0;
+ c->subnet_mask_width = 0;
+ c->router_address.as_u32 = 0;
+ c->lease_renewal_interval = 0;
+ c->dhcp_server.as_u32 = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static f64 dhcp_client_sm (f64 now, f64 timeout, uword pool_index)
+{
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ dhcp_client_t * c;
+
+ /* deleted, pooched, yadda yadda yadda */
+ if (pool_is_free_index (dcm->clients, pool_index))
+ return timeout;
+
+ c = pool_elt_at_index (dcm->clients, pool_index);
+
+ /* Time for us to do something with this client? */
+ if (now < c->next_transmit)
+ return timeout;
+
+ again:
+ switch (c->state)
+ {
+ case DHCP_DISCOVER: /* send a discover */
+ if (dhcp_discover_state (dcm, c, now))
+ goto again;
+ break;
+
+ case DHCP_REQUEST: /* send a request */
+ if (dhcp_request_state (dcm, c, now))
+ goto again;
+ break;
+
+ case DHCP_BOUND: /* bound, renew needed? */
+ if (dhcp_bound_state (dcm, c, now))
+ goto again;
+ break;
+
+ default:
+ clib_warning ("dhcp client %d bogus state %d",
+ c - dcm->clients, c->state);
+ break;
+ }
+
+ if (c->next_transmit < now + timeout)
+ return c->next_transmit - now;
+
+ return timeout;
+}
+
+static uword
+dhcp_client_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ f64 timeout = 100.0;
+ f64 now;
+ uword event_type;
+ uword * event_data = 0;
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ dhcp_client_t * c;
+ int i;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+
+ now = vlib_time_now (vm);
+
+ switch (event_type)
+ {
+ case EVENT_DHCP_CLIENT_WAKEUP:
+ for (i = 0; i < vec_len (event_data); i++)
+ timeout = dhcp_client_sm (now, timeout, event_data[i]);
+ break;
+
+ case ~0:
+ pool_foreach (c, dcm->clients,
+ ({
+ timeout = dhcp_client_sm (now, timeout,
+ (uword)(c - dcm->clients));
+ }));
+ if (pool_elts (dcm->clients) == 0)
+ timeout = 100.0;
+ break;
+ }
+
+ vec_reset_length (event_data);
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+
+VLIB_REGISTER_NODE (dhcp_client_process_node,static) = {
+ .function = dhcp_client_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dhcp-client-process",
+ .process_log2_n_stack_bytes = 16,
+};
+
+static u8 * format_dhcp_client_state (u8 * s, va_list * va)
+{
+ dhcp_client_state_t state = va_arg (*va, dhcp_client_state_t);
+ char * str = "BOGUS!";
+
+ switch (state)
+ {
+#define _(a) \
+ case a: \
+ str = #a; \
+ break;
+ foreach_dhcp_client_state;
+#undef _
+ default:
+ break;
+ }
+
+ s = format (s, "%s", str);
+ return s;
+}
+
+static u8 * format_dhcp_client (u8 * s, va_list * va)
+{
+ dhcp_client_main_t * dcm = va_arg (*va, dhcp_client_main_t *);
+ dhcp_client_t * c = va_arg (*va, dhcp_client_t *);
+ int verbose = va_arg (*va, int);
+
+ s = format (s, "[%d] %U state %U ", c - dcm->clients,
+ format_vnet_sw_if_index_name, dcm->vnet_main, c->sw_if_index,
+ format_dhcp_client_state, c->state);
+
+ if (c->leased_address.as_u32)
+ s = format (s, "addr %U/%d gw %U\n",
+ format_ip4_address, &c->leased_address,
+ c->subnet_mask_width, format_ip4_address, &c->router_address);
+ else
+ s = format (s, "no address\n");
+
+ if (verbose)
+ {
+ s = format (s, "retry count %d, next xmt %.2f",
+ c->retry_count, c->next_transmit);
+ }
+ return s;
+}
+
+static clib_error_t *
+show_dhcp_client_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ dhcp_client_t * c;
+ int verbose = 0;
+ u32 sw_if_index = ~0;
+ uword * p;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U",
+ unformat_vnet_sw_interface, dcm->vnet_main,
+ &sw_if_index))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index != ~0)
+ {
+ p = hash_get (dcm->client_by_sw_if_index, sw_if_index);
+ if (p == 0)
+ return clib_error_return (0, "dhcp client not configured");
+ c = pool_elt_at_index (dcm->clients, p[0]);
+ vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose);
+ return 0;
+ }
+
+ pool_foreach (c, dcm->clients,
+ ({
+ vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_dhcp_client_command, static) = {
+ .path = "show dhcp client",
+ .short_help = "show dhcp client [intfc <intfc>][verbose]",
+ .function = show_dhcp_client_command_fn,
+};
+
+
+int dhcp_client_add_del (dhcp_client_add_del_args_t * a)
+{
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ vlib_main_t * vm = dcm->vlib_main;
+ dhcp_client_t * c;
+ uword * p;
+ fib_prefix_t all_1s =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4.as_u32 = 0xffffffff,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ p = hash_get (dcm->client_by_sw_if_index, a->sw_if_index);
+
+ if ((p && a->is_add) || (!p && a->is_add == 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (a->is_add)
+ {
+ pool_get (dcm->clients, c);
+ memset (c, 0, sizeof (*c));
+ c->state = DHCP_DISCOVER;
+ c->sw_if_index = a->sw_if_index;
+ c->client_index = a->client_index;
+ c->pid = a->pid;
+ c->event_callback = a->event_callback;
+ c->option_55_data = a->option_55_data;
+ c->hostname = a->hostname;
+ c->client_identifier = a->client_identifier;
+ do {
+ c->transaction_id = random_u32 (&dcm->seed);
+ } while (c->transaction_id == 0);
+ set_l2_rewrite (dcm, c);
+ hash_set (dcm->client_by_sw_if_index, a->sw_if_index, c - dcm->clients);
+
+ /* this add is ref counted by FIB so we can add for each itf */
+ fib_table_entry_special_add(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_1s,
+ FIB_SOURCE_DHCP,
+ FIB_ENTRY_FLAG_LOCAL);
+
+ /*
+ * enable the interface to RX IPv4 packets
+ * this is also ref counted
+ */
+ ip4_sw_interface_enable_disable (c->sw_if_index, 1);
+
+ vlib_process_signal_event (vm, dhcp_client_process_node.index,
+ EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients);
+ }
+ else
+ {
+ c = pool_elt_at_index (dcm->clients, p[0]);
+
+ fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_1s,
+ FIB_SOURCE_DHCP);
+
+ if (c->router_address.as_u32)
+ {
+ ip46_address_t nh = {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ DPO_PROTO_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ dhcp_client_remove_rx_address (dcm, c);
+ dhcp_client_release_address (dcm, c);
+ ip4_sw_interface_enable_disable (c->sw_if_index, 0);
+
+ vec_free (c->option_55_data);
+ vec_free (c->hostname);
+ vec_free (c->client_identifier);
+ vec_free (c->l2_rewrite);
+ hash_unset (dcm->client_by_sw_if_index, c->sw_if_index);
+ pool_put (dcm->clients, c);
+ }
+ return 0;
+}
+
+int
+dhcp_client_config (vlib_main_t * vm,
+ u32 sw_if_index,
+ u8 * hostname,
+ u8 * client_id,
+ u32 is_add,
+ u32 client_index,
+ void * event_callback,
+ u32 pid)
+{
+ dhcp_client_add_del_args_t _a, *a = &_a;
+ int rv;
+
+ memset (a, 0, sizeof (*a));
+ a->is_add = is_add;
+ a->sw_if_index = sw_if_index;
+ a->client_index = client_index;
+ a->pid = pid;
+ a->event_callback = event_callback;
+ vec_validate(a->hostname, strlen((char *)hostname) - 1);
+ strncpy((char *)a->hostname, (char *)hostname, vec_len(a->hostname));
+ vec_validate(a->client_identifier, strlen((char *)client_id) - 1);
+ strncpy((char *)a->client_identifier, (char *)client_id, vec_len(a->client_identifier));
+
+ /*
+ * Option 55 request list. These data precisely match
+ * the Ubuntu dhcp client. YMMV.
+ */
+
+ /* Subnet Mask */
+ vec_add1 (a->option_55_data, 1);
+ /* Broadcast address */
+ vec_add1 (a->option_55_data, 28);
+ /* time offset */
+ vec_add1 (a->option_55_data, 2);
+ /* Router */
+ vec_add1 (a->option_55_data, 3);
+ /* Domain Name */
+ vec_add1 (a->option_55_data, 15);
+ /* DNS */
+ vec_add1 (a->option_55_data, 6);
+ /* Domain search */
+ vec_add1 (a->option_55_data, 119);
+ /* Host name */
+ vec_add1 (a->option_55_data, 12);
+ /* NetBIOS name server */
+ vec_add1 (a->option_55_data, 44);
+ /* NetBIOS Scope */
+ vec_add1 (a->option_55_data, 47);
+ /* MTU */
+ vec_add1 (a->option_55_data, 26);
+ /* Classless static route */
+ vec_add1 (a->option_55_data, 121);
+ /* NTP servers */
+ vec_add1 (a->option_55_data, 42);
+
+ rv = dhcp_client_add_del (a);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_VALUE:
+
+ vec_free (a->hostname);
+ vec_free (a->client_identifier);
+ vec_free (a->option_55_data);
+
+ if (is_add)
+ clib_warning ("dhcp client already enabled on intf_idx %d",
+ sw_if_index);
+ else
+ clib_warning ("dhcp client not enabled on on intf_idx %d",
+ sw_if_index);
+ break;
+
+ default:
+ clib_warning ("dhcp_client_add_del returned %d", rv);
+ }
+
+ return rv;
+}
+
+static clib_error_t *
+dhcp_client_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+ u32 sw_if_index;
+ u8 * hostname = 0;
+ u8 sw_if_index_set = 0;
+ int is_add = 1;
+ dhcp_client_add_del_args_t _a, *a = &_a;
+ int rv;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U",
+ unformat_vnet_sw_interface, dcm->vnet_main,
+ &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (input, "hostname %v", &hostname))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ return clib_error_return (0, "interface not specified");
+
+ memset (a, 0, sizeof (*a));
+ a->is_add = is_add;
+ a->sw_if_index = sw_if_index;
+ a->hostname = hostname;
+ a->client_identifier = format (0, "vpe 1.0%c", 0);
+
+ /*
+ * Option 55 request list. These data precisely match
+ * the Ubuntu dhcp client. YMMV.
+ */
+
+ /* Subnet Mask */
+ vec_add1 (a->option_55_data, 1);
+ /* Broadcast address */
+ vec_add1 (a->option_55_data, 28);
+ /* time offset */
+ vec_add1 (a->option_55_data, 2);
+ /* Router */
+ vec_add1 (a->option_55_data, 3);
+ /* Domain Name */
+ vec_add1 (a->option_55_data, 15);
+ /* DNS */
+ vec_add1 (a->option_55_data, 6);
+ /* Domain search */
+ vec_add1 (a->option_55_data, 119);
+ /* Host name */
+ vec_add1 (a->option_55_data, 12);
+ /* NetBIOS name server */
+ vec_add1 (a->option_55_data, 44);
+ /* NetBIOS Scope */
+ vec_add1 (a->option_55_data, 47);
+ /* MTU */
+ vec_add1 (a->option_55_data, 26);
+ /* Classless static route */
+ vec_add1 (a->option_55_data, 121);
+ /* NTP servers */
+ vec_add1 (a->option_55_data, 42);
+
+ rv = dhcp_client_add_del (a);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_VALUE:
+
+ vec_free (a->hostname);
+ vec_free (a->client_identifier);
+ vec_free (a->option_55_data);
+ if (is_add)
+ return clib_error_return (0, "dhcp client already enabled on %U",
+ format_vnet_sw_if_index_name,
+ dcm->vnet_main, sw_if_index);
+ else
+ return clib_error_return (0, "dhcp client not enabled on %U",
+ format_vnet_sw_if_index_name,
+ dcm->vnet_main, sw_if_index);
+ break;
+
+ default:
+ vlib_cli_output (vm, "dhcp_client_add_del returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (dhcp_client_set_command, static) = {
+ .path = "set dhcp client",
+ .short_help = "set dhcp client [del] intfc <interface> [hostname <name>]",
+ .function = dhcp_client_set_command_fn,
+};
+
+static clib_error_t *
+dhcp_client_init (vlib_main_t * vm)
+{
+ dhcp_client_main_t * dcm = &dhcp_client_main;
+
+ dcm->vlib_main = vm;
+ dcm->vnet_main = vnet_get_main();
+ dcm->seed = 0xdeaddabe;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (dhcp_client_init);
diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h
new file mode 100644
index 00000000..509d5d4c
--- /dev/null
+++ b/src/vnet/dhcp/client.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * client.h: dhcp client
+ */
+
+#ifndef included_dhcp_client_h
+#define included_dhcp_client_h
+
+#include <vnet/ip/ip.h>
+#include <vnet/dhcp/dhcp4_packet.h>
+
+#define foreach_dhcp_client_state \
+_(DHCP_DISCOVER) \
+_(DHCP_REQUEST) \
+_(DHCP_BOUND)
+
+typedef enum {
+#define _(a) a,
+ foreach_dhcp_client_state
+#undef _
+} dhcp_client_state_t;
+
+typedef struct {
+ dhcp_client_state_t state;
+
+ /* the interface in question */
+ u32 sw_if_index;
+
+ /* State machine retry counter */
+ u32 retry_count;
+
+ /* Send next pkt at this time */
+ f64 next_transmit;
+ f64 lease_expires;
+
+ /* DHCP transaction ID, a random number */
+ u32 transaction_id;
+
+ /* leased address, other learned info DHCP */
+ ip4_address_t leased_address; /* from your_ip_address field */
+ ip4_address_t dhcp_server;
+ u32 subnet_mask_width; /* option 1 */
+ ip4_address_t router_address; /* option 3 */
+ u32 lease_renewal_interval; /* option 51 */
+ u32 lease_lifetime; /* option 59 */
+
+ /* Requested data (option 55) */
+ u8 * option_55_data;
+
+ u8 * l2_rewrite;
+
+ /* hostname and software client identifiers */
+ u8 * hostname;
+ u8 * client_identifier; /* software version, e.g. vpe 1.0*/
+
+ /* Information used for event callback */
+ u32 client_index;
+ u32 pid;
+ void * event_callback;
+} dhcp_client_t;
+
+typedef struct {
+ /* DHCP client pool */
+ dhcp_client_t * clients;
+ uword * client_by_sw_if_index;
+ u32 seed;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} dhcp_client_main_t;
+
+typedef struct {
+ int is_add;
+ u32 sw_if_index;
+
+ /* vectors, consumed by dhcp client code */
+ u8 * hostname;
+ u8 * client_identifier;
+
+ /* Bytes containing requested option numbers */
+ u8 * option_55_data;
+
+ /* Information used for event callback */
+ u32 client_index;
+ u32 pid;
+ void * event_callback;
+} dhcp_client_add_del_args_t;
+
+dhcp_client_main_t dhcp_client_main;
+
+#define EVENT_DHCP_CLIENT_WAKEUP 1
+
+int dhcp_client_for_us (u32 bi0,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ udp_header_t * u0,
+ dhcp_header_t * dh0);
+
+int dhcp_client_config (vlib_main_t * vm,
+ u32 sw_if_index,
+ u8 * hostname,
+ u8 * client_id,
+ u32 is_add,
+ u32 client_index,
+ void *event_callback,
+ u32 pid);
+
+#endif /* included_dhcp_client_h */
diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api
new file mode 100644
index 00000000..c632c087
--- /dev/null
+++ b/src/vnet/dhcp/dhcp.api
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief DHCP Proxy config add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param rx_vrf_id - Rx/interface vrf id
+ @param server_vrf_id - server vrf id
+ @param if_ipv6 - ipv6 if non-zero, else ipv4
+ @param is_add - add the config if non-zero, else delete
+ @param insert_circuit_id - option82 suboption 1 fib number
+ @param dhcp_server[] - server address
+ @param dhcp_src_address[] - <fix this, need details>
+*/
+autoreply define dhcp_proxy_config
+{
+ u32 client_index;
+ u32 context;
+ u32 rx_vrf_id;
+ u32 server_vrf_id;
+ u8 is_ipv6;
+ u8 is_add;
+ u8 dhcp_server[16];
+ u8 dhcp_src_address[16];
+};
+
+/** \brief DHCP Proxy set / unset vss request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param tbl_id - table id
+ @param oui - first part of vpn id
+ @param fib_id - second part of vpn id
+ @param is_ipv6 - ip6 if non-zero, else ip4
+ @param is_add - set vss if non-zero, else delete
+*/
+autoreply define dhcp_proxy_set_vss
+{
+ u32 client_index;
+ u32 context;
+ u32 tbl_id;
+ u32 oui;
+ u32 fib_id;
+ u8 is_ipv6;
+ u8 is_add;
+};
+
+/** \brief DHCP Client config add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface for DHCP client
+ @param hostname - hostname
+ @param client_id - Client ID - option 61
+ @param is_add - add the config if non-zero, else delete
+ @param want_dhcp_event - DHCP event sent to the sender
+ via dhcp_compl_event API message if non-zero
+ @param pid - sender's pid
+*/
+autoreply define dhcp_client_config
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 hostname[64];
+ u8 client_id[64];
+ u8 is_add;
+ u8 want_dhcp_event;
+ u32 pid;
+};
+
+/** \brief Tell client about a DHCP completion event
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param mask_width - The length of the subnet mask assigned
+ @param host_address - Host IP address
+ @param router_address - Router IP address
+ @param host_mac - Host MAC address
+*/
+define dhcp_compl_event
+{
+ u32 client_index;
+ u32 pid;
+ u8 hostname[64];
+ u8 is_ipv6;
+ u8 mask_width;
+ u8 host_address[16];
+ u8 router_address[16];
+ u8 host_mac[6];
+};
+
+/** \brief Dump DHCP proxy table
+ @param client_index - opaque cookie to identify the sender
+ @param True for IPv6 proxy table
+*/
+define dhcp_proxy_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ip6;
+};
+
+typeonly manual_print manual_endian define dhcp_server
+{
+ u32 server_vrf_id;
+ u8 dhcp_server[16];
+};
+
+/** \brief Tell client about a DHCP completion event
+ @param client_index - opaque cookie to identify the sender
+*/
+manual_endian manual_print define dhcp_proxy_details
+{
+ u32 context;
+ u32 rx_vrf_id;
+ u32 vss_oui;
+ u32 vss_fib_id;
+ u8 is_ipv6;
+ u8 dhcp_src_address[16];
+ u8 count;
+ vl_api_dhcp_server_t servers[count];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/dhcp/dhcp4_packet.h b/src/vnet/dhcp/dhcp4_packet.h
new file mode 100644
index 00000000..07829f48
--- /dev/null
+++ b/src/vnet/dhcp/dhcp4_packet.h
@@ -0,0 +1,66 @@
+#ifndef included_vnet_dhcp4_packet_h
+#define included_vnet_dhcp4_packet_h
+
+/*
+ * DHCP packet format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip4_packet.h>
+
+typedef struct {
+ u8 opcode; /* 1 = request, 2 = reply */
+ u8 hardware_type; /* 1 = ethernet */
+ u8 hardware_address_length;
+ u8 hops;
+ u32 transaction_identifier;
+ u16 seconds;
+ u16 flags;
+#define DHCP_FLAG_BROADCAST (1<<15)
+ ip4_address_t client_ip_address;
+ ip4_address_t your_ip_address; /* use this one */
+ ip4_address_t server_ip_address;
+ ip4_address_t gateway_ip_address; /* use option 3, not this one */
+ u8 client_hardware_address[16];
+ u8 server_name[64];
+ u8 boot_filename[128];
+ ip4_address_t magic_cookie;
+ u8 options[0];
+} dhcp_header_t;
+
+typedef struct {
+ u8 option;
+ u8 length;
+ union {
+ u8 data[0];
+ u32 data_as_u32[0];
+ };
+} __attribute__((packed)) dhcp_option_t;
+
+typedef enum {
+ DHCP_PACKET_DISCOVER=1,
+ DHCP_PACKET_OFFER,
+ DHCP_PACKET_REQUEST,
+ DHCP_PACKET_ACK=5,
+} dhcp_packet_type_t;
+
+typedef enum dhcp_packet_option_t_
+{
+ DHCP_PACKET_OPTION_MSG_TYPE = 53,
+} dhcp_packet_option_t;
+
+/* charming antique: 99.130.83.99 is the dhcp magic cookie */
+#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363))
+
+#endif /* included_vnet_dhcp4_packet_h */
diff --git a/src/vnet/dhcp/dhcp4_proxy_error.def b/src/vnet/dhcp/dhcp4_proxy_error.def
new file mode 100644
index 00000000..adf04808
--- /dev/null
+++ b/src/vnet/dhcp/dhcp4_proxy_error.def
@@ -0,0 +1,32 @@
+/*
+ * dhcp_proxy_error.def: dhcp proxy errors
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+dhcp_proxy_error (NONE, "no error")
+dhcp_proxy_error (NO_SERVER, "no dhcp server configured")
+dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server")
+dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients")
+dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82")
+dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing")
+dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value")
+dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value")
+dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure")
+dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address")
+dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server")
+dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields")
+dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.")
+dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.")
+
diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c
new file mode 100644
index 00000000..339a7885
--- /dev/null
+++ b/src/vnet/dhcp/dhcp4_proxy_node.c
@@ -0,0 +1,1068 @@
+/*
+ * proxy_node.c: dhcp proxy node processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/dhcp/client.h>
+#include <vnet/fib/ip4_fib.h>
+
+static char * dhcp_proxy_error_strings[] = {
+#define dhcp_proxy_error(n,s) s,
+#include <vnet/dhcp/dhcp4_proxy_error.def>
+#undef dhcp_proxy_error
+};
+
+#define foreach_dhcp_proxy_to_server_input_next \
+ _ (DROP, "error-drop") \
+ _ (LOOKUP, "ip4-lookup") \
+ _ (SEND_TO_CLIENT, "dhcp-proxy-to-client")
+
+typedef enum {
+#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s,
+ foreach_dhcp_proxy_to_server_input_next
+#undef _
+ DHCP_PROXY_TO_SERVER_INPUT_N_NEXT,
+} dhcp_proxy_to_server_input_next_t;
+
+typedef struct {
+ /* 0 => to server, 1 => to client */
+ int which;
+ ip4_address_t trace_ip4_address;
+ u32 error;
+ u32 sw_if_index;
+ u32 original_sw_if_index;
+} dhcp_proxy_trace_t;
+
+#define VPP_DHCP_OPTION82_SUB1_SIZE 6
+#define VPP_DHCP_OPTION82_SUB5_SIZE 6
+#define VPP_DHCP_OPTION82_VSS_SIZE 12
+#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \
+ VPP_DHCP_OPTION82_SUB5_SIZE + \
+ VPP_DHCP_OPTION82_VSS_SIZE +3)
+
+static vlib_node_registration_t dhcp_proxy_to_server_node;
+static vlib_node_registration_t dhcp_proxy_to_client_node;
+
+static u8 *
+format_dhcp_proxy_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *);
+
+ if (t->which == 0)
+ s = format (s, "DHCP proxy: sent to server %U\n",
+ format_ip4_address, &t->trace_ip4_address, t->error);
+ else
+ s = format (s, "DHCP proxy: broadcast to client from %U\n",
+ format_ip4_address, &t->trace_ip4_address);
+
+ if (t->error != (u32)~0)
+ s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]);
+
+ s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n",
+ t->original_sw_if_index, t->sw_if_index);
+
+ return s;
+}
+
+static u8 *
+format_dhcp_proxy_header_with_length (u8 * s, va_list * args)
+{
+ dhcp_header_t * h = va_arg (*args, dhcp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "dhcp header truncated");
+
+ s = format (s, "DHCP Proxy");
+
+ return s;
+}
+
+static uword
+dhcp_proxy_to_server_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0;
+ u32 pkts_no_interface_address=0;
+ u32 pkts_too_big=0;
+ ip4_main_t * im = &ip4_main;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ udp_header_t * u0;
+ dhcp_header_t * h0;
+ ip4_header_t * ip0;
+ u32 next0;
+ u32 old0, new0;
+ ip_csum_t sum0;
+ u32 error0 = (u32) ~0;
+ u32 sw_if_index = 0;
+ u32 original_sw_if_index = 0;
+ u8 *end = NULL;
+ u32 fib_index;
+ dhcp_proxy_t *proxy;
+ dhcp_server_t *server;
+ u32 rx_sw_if_index;
+ dhcp_option_t *o;
+ u32 len = 0;
+ vlib_buffer_free_list_t *fl;
+ u8 is_discover = 0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ /*
+ * udp_local hands us the DHCP header, need udp hdr,
+ * ip hdr to relay to server
+ */
+ vlib_buffer_advance (b0, -(sizeof(*u0)));
+ u0 = vlib_buffer_get_current (b0);
+
+ /* This blows. Return traffic has src_port = 67, dst_port = 67 */
+ if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server))
+ {
+ vlib_buffer_advance (b0, sizeof(*u0));
+ next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT;
+ error0 = 0;
+ pkts_to_client++;
+ goto do_enqueue;
+ }
+
+ rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index];
+ proxy = dhcp_get_proxy(dpm, fib_index, FIB_PROTOCOL_IP4);
+
+ if (PREDICT_FALSE (NULL == proxy))
+ {
+ error0 = DHCP_PROXY_ERROR_NO_SERVER;
+ next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_server++;
+ goto do_trace;
+ }
+
+ server = &proxy->dhcp_servers[0];
+ vlib_buffer_advance (b0, -(sizeof(*ip0)));
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* disable UDP checksum */
+ u0->checksum = 0;
+ sum0 = ip0->checksum;
+ old0 = ip0->dst_address.as_u32;
+ new0 = server->dhcp_server.ip4.as_u32;
+ ip0->dst_address.as_u32 = server->dhcp_server.ip4.as_u32;
+ sum0 = ip_csum_update (sum0, old0, new0,
+ ip4_header_t /* structure */,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ sum0 = ip0->checksum;
+ old0 = ip0->src_address.as_u32;
+ new0 = proxy->dhcp_src_address.ip4.as_u32;
+ ip0->src_address.as_u32 = new0;
+ sum0 = ip_csum_update (sum0, old0, new0,
+ ip4_header_t /* structure */,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ /* Send to DHCP server via the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] =
+ server->server_fib_index;
+
+ h0->gateway_ip_address.as_u32 = proxy->dhcp_src_address.ip4.as_u32;
+ pkts_to_server++;
+
+ o = (dhcp_option_t *) h0->options;
+
+ fib_index = im->fib_index_by_sw_if_index
+ [vnet_buffer(b0)->sw_if_index[VLIB_RX]];
+
+ end = b0->data + b0->current_data + b0->current_length;
+ /* TLVs are not performance-friendly... */
+ while (o->option != 0xFF /* end of options */ && (u8 *)o < end)
+ {
+ if (DHCP_PACKET_OPTION_MSG_TYPE == o->option)
+ {
+ if (DHCP_PACKET_DISCOVER == o->data[0])
+ {
+ is_discover = 1;
+ }
+ }
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+
+ fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0));
+ // start write at (option*)o, some packets have padding
+ if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes)
+ {
+ next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_too_big++;
+ goto do_trace;
+ }
+
+ if ((o->option == 0xFF) && ((u8 *)o <= end))
+ {
+ vnet_main_t *vnm = vnet_get_main();
+ u16 old_l0, new_l0;
+ ip4_address_t _ia0, * ia0 = &_ia0;
+ dhcp_vss_t *vss;
+ vnet_sw_interface_t *swif;
+ sw_if_index = 0;
+ original_sw_if_index = 0;
+
+ original_sw_if_index = sw_if_index =
+ vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ swif = vnet_get_sw_interface (vnm, sw_if_index);
+ if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ sw_if_index = swif->unnumbered_sw_if_index;
+
+ /*
+ * Get the first ip4 address on the [client-side]
+ * RX interface, if not unnumbered. otherwise use
+ * the loopback interface's ip address.
+ */
+ ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0);
+
+ if (ia0 == 0)
+ {
+ error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS;
+ next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_interface_address++;
+ goto do_trace;
+ }
+
+ /* Add option 82 */
+ o->option = 82; /* option 82 */
+ o->length = 12; /* 12 octets to follow */
+ o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */
+ o->data[1] = 4; /* length of suboption */
+ o->data[2] = (original_sw_if_index >> 24) & 0xFF;
+ o->data[3] = (original_sw_if_index >> 16) & 0xFF;
+ o->data[4] = (original_sw_if_index >> 8) & 0xFF;
+ o->data[5] = (original_sw_if_index >> 0) & 0xFF;
+ o->data[6] = 5; /* suboption 5 (client RX intfc address) */
+ o->data[7] = 4; /* length 4 */
+ o->data[8] = ia0->as_u8[0];
+ o->data[9] = ia0->as_u8[1];
+ o->data[10] = ia0->as_u8[2];
+ o->data[11] = ia0->as_u8[3];
+ o->data[12] = 0xFF;
+
+ vss = dhcp_get_vss_info (dpm, fib_index, FIB_PROTOCOL_IP4);
+ if (NULL != vss)
+ {
+ u32 opt82_fib_id=0, opt82_oui=0;
+
+ opt82_oui = vss->oui;
+ opt82_fib_id = vss->fib_id;
+
+ o->data[12] = 151; /* vss suboption */
+ if (255 == opt82_fib_id) {
+ o->data[13] = 1; /* length */
+ o->data[14] = 255; /* vss option type */
+ o->data[15] = 152; /* vss control suboption */
+ o->data[16] = 0; /* length */
+ /* and a new "end-of-options" option (0xff) */
+ o->data[17] = 0xFF;
+ o->length += 5;
+ } else {
+ o->data[13] = 8; /* length */
+ o->data[14] = 1; /* vss option type */
+ o->data[15] = (opt82_oui >> 16) & 0xff;
+ o->data[16] = (opt82_oui >> 8) & 0xff;
+ o->data[17] = (opt82_oui ) & 0xff;
+ o->data[18] = (opt82_fib_id >> 24) & 0xff;
+ o->data[19] = (opt82_fib_id >> 16) & 0xff;
+ o->data[20] = (opt82_fib_id >> 8) & 0xff;
+ o->data[21] = (opt82_fib_id) & 0xff;
+ o->data[22] = 152; /* vss control suboption */
+ o->data[23] = 0; /* length */
+
+ /* and a new "end-of-options" option (0xff) */
+ o->data[24] = 0xFF;
+ o->length += 12;
+ }
+ }
+
+ len = o->length + 3;
+ b0->current_length += len;
+ /* Fix IP header length and checksum */
+ old_l0 = ip0->length;
+ new_l0 = clib_net_to_host_u16 (old_l0);
+ new_l0 += len;
+ new_l0 = clib_host_to_net_u16 (new_l0);
+ ip0->length = new_l0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ /* Fix UDP length */
+ new_l0 = clib_net_to_host_u16 (u0->length);
+ new_l0 += len;
+ u0->length = clib_host_to_net_u16 (new_l0);
+ } else {
+ vlib_node_increment_counter
+ (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_OPTION_82_ERROR, 1);
+ }
+
+ next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP;
+
+ /*
+ * If we have multiple servers configured and this is the
+ * client's discover message, then send copies to each of
+ * those servers
+ */
+ if (is_discover && vec_len(proxy->dhcp_servers) > 1)
+ {
+ u32 ii;
+
+ for (ii = 1; ii < vec_len(proxy->dhcp_servers); ii++)
+ {
+ vlib_buffer_t *c0;
+ u32 ci0;
+
+ c0 = vlib_buffer_copy(vm, b0);
+ ci0 = vlib_get_buffer_index(vm, c0);
+ server = &proxy->dhcp_servers[ii];
+
+ ip0 = vlib_buffer_get_current (c0);
+
+ sum0 = ip0->checksum;
+ old0 = ip0->dst_address.as_u32;
+ new0 = server->dhcp_server.ip4.as_u32;
+ ip0->dst_address.as_u32 = server->dhcp_server.ip4.as_u32;
+ sum0 = ip_csum_update (sum0, old0, new0,
+ ip4_header_t /* structure */,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ to_next[0] = ci0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ ci0, next0);
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcp_proxy_trace_t *tr;
+
+ tr = vlib_add_trace (vm, node, c0, sizeof (*tr));
+ tr->which = 0; /* to server */
+ tr->error = error0;
+ tr->original_sw_if_index = original_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP)
+ tr->trace_ip4_address.as_u32 = server->dhcp_server.ip4.as_u32;
+ }
+
+ if (PREDICT_FALSE(0 == n_left_to_next))
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ }
+ }
+ }
+ do_trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->which = 0; /* to server */
+ tr->error = error0;
+ tr->original_sw_if_index = original_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP)
+ tr->trace_ip4_address.as_u32 =
+ proxy->dhcp_servers[0].dhcp_server.ip4.as_u32;
+ }
+
+ do_enqueue:
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_RELAY_TO_CLIENT,
+ pkts_to_client);
+ vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_RELAY_TO_SERVER,
+ pkts_to_server);
+ vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_NO_SERVER,
+ pkts_no_server);
+ vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS,
+ pkts_no_interface_address);
+ vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index,
+ DHCP_PROXY_ERROR_PKT_TOO_BIG,
+ pkts_too_big);
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = {
+ .function = dhcp_proxy_to_server_input,
+ .name = "dhcp-proxy-to-server",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = DHCP_PROXY_N_ERROR,
+ .error_strings = dhcp_proxy_error_strings,
+
+ .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n,
+ foreach_dhcp_proxy_to_server_input_next
+#undef _
+ },
+
+ .format_buffer = format_dhcp_proxy_header_with_length,
+ .format_trace = format_dhcp_proxy_trace,
+#if 0
+ .unformat_buffer = unformat_dhcp_proxy_header,
+#endif
+};
+
+static uword
+dhcp_proxy_to_client_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, * from;
+ ethernet_main_t *em = ethernet_get_main (vm);
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ udp_header_t * u0;
+ dhcp_header_t * h0;
+ ip4_header_t * ip0 = 0;
+ ip4_address_t * ia0 = 0;
+ u32 old0, new0;
+ ip_csum_t sum0;
+ ethernet_interface_t *ei0;
+ ethernet_header_t *mac0;
+ vnet_hw_interface_t *hi0;
+ vlib_frame_t *f0;
+ u32 * to_next0;
+ u32 sw_if_index = ~0;
+ vnet_sw_interface_t *si0;
+ u32 error0 = (u32)~0;
+ vnet_sw_interface_t *swif;
+ u32 fib_index;
+ dhcp_proxy_t *proxy;
+ dhcp_server_t *server;
+ u32 original_sw_if_index = (u32) ~0;
+ ip4_address_t relay_addr = {
+ .as_u32 = 0,
+ };
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ /*
+ * udp_local hands us the DHCP header, need udp hdr,
+ * ip hdr to relay to client
+ */
+ vlib_buffer_advance (b0, -(sizeof(*u0)));
+ u0 = vlib_buffer_get_current (b0);
+
+ vlib_buffer_advance (b0, -(sizeof(*ip0)));
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Consumed by dhcp client code? */
+ if (dhcp_client_for_us (bi0, b0, ip0, u0, h0))
+ continue;
+
+ if (1 /* dpm->insert_option_82 */)
+ {
+ dhcp_option_t *o = (dhcp_option_t *) h0->options;
+ dhcp_option_t *sub;
+
+ /* Parse through TLVs looking for option 82.
+ The circuit-ID is the FIB number we need
+ to track down the client-facing interface */
+
+ while (o->option != 0xFF /* end of options */ &&
+ (u8 *) o < (b0->data + b0->current_data + b0->current_length))
+ {
+ if (o->option == 82)
+ {
+ u32 vss_exist = 0;
+ u32 vss_ctrl = 0;
+ sub = (dhcp_option_t *) &o->data[0];
+ while (sub->option != 0xFF /* end of options */ &&
+ (u8 *) sub < (u8 *)(o + o->length)) {
+ /* If this is one of ours, it will have
+ total length 12, circuit-id suboption type,
+ and the sw_if_index */
+ if (sub->option == 1 && sub->length == 4)
+ {
+ sw_if_index = ((sub->data[0] << 24) |
+ (sub->data[1] << 16) |
+ (sub->data[2] << 8) |
+ (sub->data[3]));
+ }
+ else if (sub->option == 5 && sub->length == 4)
+ {
+ relay_addr.as_u8[0] = sub->data[0];
+ relay_addr.as_u8[1] = sub->data[1];
+ relay_addr.as_u8[2] = sub->data[2];
+ relay_addr.as_u8[3] = sub->data[3];
+ }
+ else if (sub->option == 151 &&
+ sub->length == 7 &&
+ sub->data[0] == 1)
+ vss_exist = 1;
+ else if (sub->option == 152 && sub->length == 0)
+ vss_ctrl = 1;
+ sub = (dhcp_option_t *)
+ (((uword) sub) + (sub->length + 2));
+ }
+ if (vss_ctrl && vss_exist)
+ vlib_node_increment_counter
+ (vm, dhcp_proxy_to_client_node.index,
+ DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1);
+
+ }
+ o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
+ }
+ }
+
+ if (sw_if_index == (u32)~0)
+ {
+ error0 = DHCP_PROXY_ERROR_NO_OPTION_82;
+
+ drop_packet:
+ vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index,
+ error0, 1);
+ f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index);
+ to_next0 = vlib_frame_vector_args (f0);
+ to_next0[0] = bi0;
+ f0->n_vectors = 1;
+ vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0);
+ goto do_trace;
+ }
+
+ if (relay_addr.as_u32 == 0)
+ {
+ error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR;
+ goto drop_packet;
+ }
+
+ if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index))
+ {
+ error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF;
+ goto drop_packet;
+ }
+
+ fib_index = im->fib_index_by_sw_if_index [sw_if_index];
+ proxy = dhcp_get_proxy(dpm, fib_index, FIB_PROTOCOL_IP4);
+
+ if (PREDICT_FALSE (NULL == proxy))
+ {
+ error0 = DHCP_PROXY_ERROR_NO_SERVER;
+ goto drop_packet;
+ }
+
+ vec_foreach(server, proxy->dhcp_servers)
+ {
+ if (ip0->src_address.as_u32 == server->dhcp_server.ip4.as_u32)
+ {
+ goto server_found;
+ }
+ }
+
+ error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS;
+ goto drop_packet;
+
+ server_found:
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ swif = vnet_get_sw_interface (vnm, sw_if_index);
+ original_sw_if_index = sw_if_index;
+ if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ sw_if_index = swif->unnumbered_sw_if_index;
+
+ ia0 = ip4_interface_first_address (&ip4_main, sw_if_index, 0);
+ if (ia0 == 0)
+ {
+ error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS;
+ goto drop_packet;
+ }
+
+ if (relay_addr.as_u32 != ia0->as_u32)
+ {
+ error0 = DHCP_PROXY_ERROR_BAD_YIADDR;
+ goto drop_packet;
+ }
+
+ u0->checksum = 0;
+ u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client);
+ sum0 = ip0->checksum;
+ old0 = ip0->dst_address.as_u32;
+ new0 = 0xFFFFFFFF;
+ ip0->dst_address.as_u32 = new0;
+ sum0 = ip_csum_update (sum0, old0, new0,
+ ip4_header_t /* structure */,
+ dst_address /* offset of changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ sum0 = ip0->checksum;
+ old0 = ip0->src_address.as_u32;
+ new0 = ia0->as_u32;
+ ip0->src_address.as_u32 = new0;
+ sum0 = ip_csum_update (sum0, old0, new0,
+ ip4_header_t /* structure */,
+ src_address /* offset of changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ vlib_buffer_advance (b0, -(sizeof(ethernet_header_t)));
+ si0 = vnet_get_sw_interface (vnm, original_sw_if_index);
+ if (si0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ vlib_buffer_advance (b0, -4 /* space for VLAN tag */);
+
+ mac0 = vlib_buffer_get_current (b0);
+
+ hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index);
+ ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance);
+ clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address));
+ memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address));
+ mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ?
+ clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800);
+
+ if (si0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ {
+ u32 * vlan_tag = (u32 *)(mac0+1);
+ u32 tmp;
+ tmp = (si0->sub.id << 16) | 0x0800;
+ *vlan_tag = clib_host_to_net_u32 (tmp);
+ }
+
+ /* $$$ This needs to be rewritten, for sure */
+ f0 = vlib_get_frame_to_node (vm, hi0->output_node_index);
+ to_next0 = vlib_frame_vector_args (f0);
+ to_next0[0] = bi0;
+ f0->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi0->output_node_index, f0);
+
+ do_trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->which = 1; /* to client */
+ tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0;
+ tr->error = error0;
+ tr->original_sw_if_index = original_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ }
+ }
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = {
+ .function = dhcp_proxy_to_client_input,
+ .name = "dhcp-proxy-to-client",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = DHCP_PROXY_N_ERROR,
+ .error_strings = dhcp_proxy_error_strings,
+ .format_buffer = format_dhcp_proxy_header_with_length,
+ .format_trace = format_dhcp_proxy_trace,
+#if 0
+ .unformat_buffer = unformat_dhcp_proxy_header,
+#endif
+};
+
+static clib_error_t *
+dhcp4_proxy_init (vlib_main_t * vm)
+{
+ dhcp_proxy_main_t * dm = &dhcp_proxy_main;
+ vlib_node_t * error_drop_node;
+
+ error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
+ dm->error_drop_node_index = error_drop_node->index;
+
+ udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client,
+ dhcp_proxy_to_client_node.index, 1 /* is_ip4 */);
+
+ udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server,
+ dhcp_proxy_to_server_node.index, 1 /* is_ip4 */);
+
+ return 0;
+}
+
+
+VLIB_INIT_FUNCTION (dhcp4_proxy_init);
+
+int
+dhcp4_proxy_set_server (ip46_address_t *addr,
+ ip46_address_t *src_addr,
+ u32 rx_table_id,
+ u32 server_table_id,
+ int is_del)
+{
+ u32 rx_fib_index = 0;
+ int rc = 0;
+
+ const fib_prefix_t all_1s =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4.as_u32 = 0xffffffff,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ if (ip46_address_is_zero(addr))
+ return VNET_API_ERROR_INVALID_DST_ADDRESS;
+
+ if (ip46_address_is_zero(src_addr))
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
+
+ rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+ rx_table_id,
+ FIB_SOURCE_DHCP);
+
+ if (is_del)
+ {
+ if (dhcp_proxy_server_del (FIB_PROTOCOL_IP4, rx_fib_index,
+ addr, server_table_id))
+ {
+ fib_table_entry_special_remove(rx_fib_index,
+ &all_1s,
+ FIB_SOURCE_DHCP);
+ fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
+ }
+ }
+ else
+ {
+ if (dhcp_proxy_server_add (FIB_PROTOCOL_IP4,
+ addr, src_addr,
+ rx_fib_index, server_table_id))
+ {
+ fib_table_entry_special_add(rx_fib_index,
+ &all_1s,
+ FIB_SOURCE_DHCP,
+ FIB_ENTRY_FLAG_LOCAL);
+ fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
+ }
+ }
+ fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
+
+ return (rc);
+}
+
+static clib_error_t *
+dhcp4_proxy_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip46_address_t server_addr, src_addr;
+ u32 server_table_id = 0, rx_table_id = 0;
+ int is_del = 0;
+ int set_src = 0, set_server = 0;
+
+ memset(&server_addr, 0, sizeof(server_addr));
+ memset(&src_addr, 0, sizeof(src_addr));
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "server %U",
+ unformat_ip4_address, &server_addr.ip4))
+ set_server = 1;
+ else if (unformat (input, "server-fib-id %d", &server_table_id))
+ ;
+ else if (unformat (input, "rx-fib-id %d", &rx_table_id))
+ ;
+ else if (unformat(input, "src-address %U",
+ unformat_ip4_address, &src_addr.ip4))
+ set_src = 1;
+ else if (unformat (input, "delete") ||
+ unformat (input, "del"))
+ is_del = 1;
+ else
+ break;
+ }
+
+ if (is_del || (set_server && set_src))
+ {
+ int rv;
+
+ rv = dhcp4_proxy_set_server (&server_addr, &src_addr, rx_table_id,
+ server_table_id, is_del);
+ switch (rv)
+ {
+ case 0:
+ return 0;
+
+ case VNET_API_ERROR_INVALID_DST_ADDRESS:
+ return clib_error_return (0, "Invalid server address");
+
+ case VNET_API_ERROR_INVALID_SRC_ADDRESS:
+ return clib_error_return (0, "Invalid src address");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return
+ (0, "Fib id %d: no per-fib DHCP server configured", rx_table_id);
+
+ default:
+ return clib_error_return (0, "BUG: rv %d", rv);
+ }
+ }
+ else
+ return clib_error_return (0, "parse error`%U'",
+ format_unformat_error, input);
+}
+
+VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = {
+ .path = "set dhcp proxy",
+ .short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [server-fib-id <n>] [rx-fib-id <n>]",
+ .function = dhcp4_proxy_set_command_fn,
+};
+
+static u8 *
+format_dhcp4_proxy_server (u8 * s, va_list * args)
+{
+ dhcp_proxy_t *proxy = va_arg (*args, dhcp_proxy_t *);
+ ip4_fib_t * rx_fib, * server_fib;
+ dhcp_server_t *server;
+
+ if (proxy == 0)
+ {
+ s = format (s, "%=14s%=16s%s", "RX FIB", "Src Address",
+ "Servers FIB,Address");
+ return s;
+ }
+
+ rx_fib = ip4_fib_get(proxy->rx_fib_index);
+
+ s = format (s, "%=14u%=16U",
+ rx_fib->table_id,
+ format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY);
+
+ vec_foreach(server, proxy->dhcp_servers)
+ {
+ server_fib = ip4_fib_get(server->server_fib_index);
+ s = format (s, "%u,%U ",
+ server_fib->table_id,
+ format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY);
+ }
+ return s;
+}
+
+static int
+dhcp4_proxy_show_walk (dhcp_proxy_t *server,
+ void *ctx)
+{
+ vlib_main_t * vm = ctx;
+
+ vlib_cli_output (vm, "%U", format_dhcp4_proxy_server, server);
+
+ return (1);
+}
+
+static clib_error_t *
+dhcp4_proxy_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "%U", format_dhcp4_proxy_server, NULL /* header line */);
+
+ dhcp_proxy_walk(FIB_PROTOCOL_IP4, dhcp4_proxy_show_walk, vm);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = {
+ .path = "show dhcp proxy",
+ .short_help = "Display dhcp proxy server info",
+ .function = dhcp4_proxy_show_command_fn,
+};
+
+static clib_error_t *
+dhcp_option_82_vss_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0, got_new_vpn_id=0;
+ u32 oui=0, fib_id=0, tbl_id=~0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat(input, "delete") || unformat(input, "del"))
+ is_del = 1;
+ else if (unformat (input, "oui %d", &oui))
+ got_new_vpn_id = 1;
+ else if (unformat (input, "vpn-id %d", &fib_id))
+ got_new_vpn_id = 1;
+ else if (unformat (input, "table %d", &tbl_id))
+ got_new_vpn_id = 1;
+ else
+ break;
+ }
+ if (tbl_id == ~0)
+ return clib_error_return (0, "no table ID specified.");
+
+ if (is_del || got_new_vpn_id)
+ {
+ int rv;
+ rv = dhcp_proxy_set_vss(FIB_PROTOCOL_IP4, tbl_id, oui, fib_id, is_del);
+ switch (rv)
+ {
+ case 0:
+ return 0;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d",
+ oui, fib_id, tbl_id);
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "option 82 vss for table %d not found in in pool.",
+ tbl_id);
+ default:
+ return clib_error_return (0, "BUG: rv %d", rv);
+ }
+ }
+ else
+ return clib_error_return (0, "parse error`%U'",
+ format_unformat_error, input);
+}
+
+VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = {
+ .path = "set dhcp option-82 vss",
+ .short_help = "set dhcp option-82 vss [del] table <table id> oui <oui> vpn-id <vpn-id>",
+ .function = dhcp_option_82_vss_fn,
+};
+
+static clib_error_t *
+dhcp_vss_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+
+{
+ dhcp_vss_walk(FIB_PROTOCOL_IP4, dhcp_vss_show_walk, vm);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = {
+ .path = "show dhcp vss",
+ .short_help = "show dhcp VSS",
+ .function = dhcp_vss_show_command_fn,
+};
+
+static clib_error_t *
+dhcp_option_82_address_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+
+{
+ vnet_main_t *vnm = vnet_get_main();
+ u32 sw_if_index0=0, sw_if_index;
+ vnet_sw_interface_t *swif;
+ ip4_address_t *ia0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat(input, "%U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index0))
+ {
+ swif = vnet_get_sw_interface (vnm, sw_if_index0);
+ sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ?
+ swif->unnumbered_sw_if_index : sw_if_index0;
+ ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0);
+ if (ia0)
+ {
+ vlib_cli_output (vm, "%=20s%=20s", "interface",
+ "source IP address");
+
+ vlib_cli_output (vm, "%=20U%=20U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip4_address, ia0);
+ }
+ else
+ vlib_cli_output (vm, "%=34s %=20U",
+ "No IPv4 address configured on",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index);
+ }
+ else
+ break;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = {
+ .path = "show dhcp option-82-address interface",
+ .short_help = "show dhcp option-82-address interface <interface>",
+ .function = dhcp_option_82_address_show_command_fn,
+};
diff --git a/src/vnet/dhcp/dhcp6_packet.h b/src/vnet/dhcp/dhcp6_packet.h
new file mode 100644
index 00000000..ddcde7a0
--- /dev/null
+++ b/src/vnet/dhcp/dhcp6_packet.h
@@ -0,0 +1,183 @@
+#ifndef included_vnet_dhcp6_packet_h
+#define included_vnet_dhcp6_packet_h
+
+/*
+ * DHCP packet format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip6_packet.h>
+
+// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN
+// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN
+#define DHCP_MAX_RELAY_ADDR 16
+#define PROTO_UDP 17
+#define DHCPV6_CLIENT_PORT 546
+#define DHCPV6_SERVER_PORT 547
+#define HOP_COUNT_LIMIT 32
+#define DHCPV6_CISCO_ENT_NUM 9
+
+/*
+ * DHCPv6 message types
+ */
+typedef enum dhcpv6_msg_type_{
+ DHCPV6_MSG_SOLICIT = 1,
+ DHCPV6_MSG_ADVERTISE = 2,
+ DHCPV6_MSG_REQUEST = 3,
+ DHCPV6_MSG_CONFIRM = 4,
+ DHCPV6_MSG_RENEW = 5,
+ DHCPV6_MSG_REBIND = 6,
+ DHCPV6_MSG_REPLY = 7,
+ DHCPV6_MSG_RELEASE = 8,
+ DHCPV6_MSG_DECLINE = 9,
+ DHCPV6_MSG_RECONFIGURE = 10,
+ DHCPV6_MSG_INFORMATION_REQUEST = 11,
+ DHCPV6_MSG_RELAY_FORW = 12,
+ DHCPV6_MSG_RELAY_REPL = 13,
+} dhcpv6_msg_type_t;
+
+/*
+ * DHCPv6 options types
+ */
+enum {
+ DHCPV6_OPTION_CLIENTID = 1,
+ DHCPV6_OPTION_SERVERID = 2,
+ DHCPV6_OPTION_IA_NA = 3,
+ DHCPV6_OPTION_IA_TA = 4,
+ DHCPV6_OPTION_IAADDR = 5,
+ DHCPV6_OPTION_ORO = 6,
+ DHCPV6_OPTION_PREFERENCE = 7,
+ DHCPV6_OPTION_ELAPSED_TIME = 8,
+ DHCPV6_OPTION_RELAY_MSG = 9,
+ DHCPV6_OPTION_AUTH = 11,
+ DHCPV6_OPTION_UNICAST = 12,
+ DHCPV6_OPTION_STATUS_CODE = 13,
+ DHCPV6_OPTION_RAPID_COMMIT = 14,
+ DHCPV6_OPTION_USER_CLASS = 15,
+ DHCPV6_OPTION_VENDOR_CLASS = 16,
+ DHCPV6_OPTION_VENDOR_OPTS = 17,
+ DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this
+ DHCPV6_OPTION_RECONF_MSG = 19,
+ DHCPV6_OPTION_RECONF_ACCEPT = 20,
+ DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this
+ DHCPV6_OPTION_VSS = 68, // relay agent fills this
+ DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79,
+ DHCPV6_OPTION_MAX
+};
+
+/*
+* DHCPv6 status codes
+ */
+enum {
+ DHCPV6_STATUS_SUCCESS = 0,
+ DHCPV6_STATUS_UNSPEC_FAIL = 1,
+ DHCPV6_STATUS_NOADDRS_AVAIL = 2,
+ DHCPV6_STATUS_NO_BINDING = 3,
+ DHCPV6_STATUS_NOT_ONLINK = 4,
+ DHCPV6_STATUS_USE_MULTICAST = 5,
+};
+
+/*
+ * DHCPv6 DUID types
+ */
+enum {
+ DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */
+ DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */
+ DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */
+};
+
+//Structure for DHCPv6 payload from client
+typedef struct dhcpv6_hdr_ {
+ union {
+ u8 msg_type; //DHCP msg type
+ u32 xid; // transaction id
+ }u;
+ u8 data[0];
+} dhcpv6_header_t;
+
+
+
+typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ {
+ dhcpv6_header_t *pkt;
+ u32 pkt_len;
+ u32 dhcpv6_len; //DHCPv6 payload load
+// if_ordinal iod;
+ u32 if_index;
+ u32 ctx_id;
+ char ctx_name[32+1];
+ u8 dhcp_msg_type;
+}) dhcpv6_relay_ctx_t;
+
+//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts
+typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ {
+ u8 msg_type;
+ u8 hop_count;
+ ip6_address_t link_addr;
+ ip6_address_t peer_addr;
+ u8 data[0];
+}) dhcpv6_relay_hdr_t;
+
+typedef enum dhcp_stats_action_type_ {
+ DHCP_STATS_ACTION_FORWARDED=1,
+ DHCP_STATS_ACTION_RECEIVED,
+ DHCP_STATS_ACTION_DROPPED
+} dhcp_stats_action_type_t;
+//Generic counters for a packet
+typedef struct dhcp_stats_counters_ {
+ u64 rx_pkts; //counter for received pkts
+ u64 tx_pkts; //counter for forwarded pkts
+ u64 drops; //counter for dropped pkts
+} dhcp_stats_counters_t;
+
+
+typedef enum dhcpv6_stats_drop_reason_ {
+ DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1,
+ DHCPV6_RELAY_PKT_DROP_MAX_HOPS,
+ DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL,
+ DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF,
+ DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT,
+ DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL,
+ DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT,
+} dhcpv6_stats_drop_reason_t;
+
+typedef CLIB_PACKED (struct {
+ u16 option;
+ u16 length;
+ u8 data[0];
+}) dhcpv6_option_t;
+
+typedef CLIB_PACKED (struct {
+ dhcpv6_option_t opt;
+ u32 int_idx;
+}) dhcpv6_int_id_t;
+
+typedef CLIB_PACKED (struct {
+ dhcpv6_option_t opt;
+ u8 data[8]; // data[0]:type, data[1..7]: VPN ID
+}) dhcpv6_vss_t;
+
+typedef CLIB_PACKED (struct {
+ dhcpv6_option_t opt;
+ u32 ent_num;
+ u32 rmt_id;
+}) dhcpv6_rmt_id_t;
+
+typedef CLIB_PACKED (struct {
+ dhcpv6_option_t opt;
+ u16 link_type;
+ u8 data[6]; // data[0]:data[5]: MAC address
+}) dhcpv6_client_mac_t;
+
+
+#endif /* included_vnet_dhcp6_packet_h */
diff --git a/src/vnet/dhcp/dhcp6_proxy_error.def b/src/vnet/dhcp/dhcp6_proxy_error.def
new file mode 100644
index 00000000..55fa7317
--- /dev/null
+++ b/src/vnet/dhcp/dhcp6_proxy_error.def
@@ -0,0 +1,29 @@
+/*
+ * dhcp_proxy_error.def: dhcp proxy errors
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+dhcpv6_proxy_error (NONE, "no error")
+dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured")
+dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server")
+dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients")
+dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address")
+dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.")
+dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.")
+dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option")
+dhcpv6_proxy_error (NO_RELAY_MESSAGE_OPTION, "DHCPv6 reply packets without relay message option")
+dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.")
+dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.")
+dhcpv6_proxy_error (WRONG_INTERFACE_ID_OPTION, "DHCPv6 reply to invalid interface.")
diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c
new file mode 100644
index 00000000..ce7a8fca
--- /dev/null
+++ b/src/vnet/dhcp/dhcp6_proxy_node.c
@@ -0,0 +1,1147 @@
+/*
+ * dhcp6_proxy_node.c: dhcpv6 proxy node processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/dhcp/dhcp6_packet.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/fib/fib.h>
+
+static char * dhcpv6_proxy_error_strings[] = {
+#define dhcpv6_proxy_error(n,s) s,
+#include <vnet/dhcp/dhcp6_proxy_error.def>
+#undef dhcpv6_proxy_error
+};
+
+#define foreach_dhcpv6_proxy_to_server_input_next \
+ _ (DROP, "error-drop") \
+ _ (LOOKUP, "ip6-lookup") \
+ _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client")
+
+
+typedef enum {
+#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s,
+ foreach_dhcpv6_proxy_to_server_input_next
+#undef _
+ DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT,
+} dhcpv6_proxy_to_server_input_next_t;
+
+typedef struct {
+ /* 0 => to server, 1 => to client */
+ int which;
+ u8 packet_data[64];
+ u32 error;
+ u32 sw_if_index;
+ u32 original_sw_if_index;
+} dhcpv6_proxy_trace_t;
+
+static vlib_node_registration_t dhcpv6_proxy_to_server_node;
+static vlib_node_registration_t dhcpv6_proxy_to_client_node;
+
+/* all DHCP servers address */
+static ip6_address_t all_dhcpv6_server_address;
+static ip6_address_t all_dhcpv6_server_relay_agent_address;
+
+static u8 *
+format_dhcpv6_proxy_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *);
+
+ if (t->which == 0)
+ s = format (s, "DHCPV6 proxy: sent to server %U",
+ format_ip6_address, &t->packet_data, sizeof (ip6_address_t));
+ else
+ s = format (s, "DHCPV6 proxy: sent to client from %U",
+ format_ip6_address, &t->packet_data, sizeof (ip6_address_t));
+ if (t->error != (u32)~0)
+ s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]);
+
+ s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n",
+ t->original_sw_if_index, t->sw_if_index);
+
+ return s;
+}
+
+static u8 *
+format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args)
+{
+ dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "dhcpv6 header truncated");
+
+ s = format (s, "DHCPV6 Proxy");
+
+ return s;
+}
+/* get first interface address */
+static ip6_address_t *
+ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia = 0;
+ ip6_address_t * result = 0;
+
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ if ((a->as_u8[0] & 0xe0) == 0x20 ||
+ (a->as_u8[0] & 0xfe) == 0xfc) {
+ result = a;
+ break;
+ }
+ }));
+ return result;
+}
+
+static inline void copy_ip6_address (ip6_address_t *dst,
+ ip6_address_t *src)
+{
+ dst->as_u64[0] = src->as_u64[0];
+ dst->as_u64[1] = src->as_u64[1];
+}
+
+static uword
+dhcpv6_proxy_to_server_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0;
+ u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0;
+ u32 pkts_no_src_address=0;
+ u32 pkts_wrong_msg_type=0;
+ u32 pkts_too_big=0;
+ ip6_main_t * im = &ip6_main;
+ ip6_address_t * src;
+ int bogus_length;
+ dhcp_proxy_t *proxy;
+ dhcp_server_t *server;
+ u32 rx_fib_idx = 0, server_fib_idx = 0;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vnet_main_t *vnm = vnet_get_main();
+ u32 sw_if_index = 0;
+ u32 rx_sw_if_index = 0;
+ vnet_sw_interface_t *swif;
+ u32 bi0;
+ vlib_buffer_t * b0;
+ udp_header_t * u0, *u1;
+ dhcpv6_header_t * h0; // client msg hdr
+ ip6_header_t * ip0, *ip1;
+ ip6_address_t _ia0, *ia0=&_ia0;
+ u32 next0;
+ u32 error0 = (u32) ~0;
+ dhcpv6_option_t *fwd_opt;
+ dhcpv6_relay_hdr_t *r1;
+ u16 len;
+ dhcpv6_int_id_t *id1;
+ dhcpv6_vss_t *vss1;
+ dhcpv6_client_mac_t *cmac; // client mac
+ ethernet_header_t * e_h0;
+ u8 client_src_mac[6];
+ vlib_buffer_free_list_t *fl;
+ dhcp_vss_t *vss;
+ u8 is_solicit = 0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ /*
+ * udp_local hands us the DHCPV6 header.
+ */
+ u0 = (void *)h0 -(sizeof(*u0));
+ ip0 = (void *)u0 -(sizeof(*ip0));
+ e_h0 = (void *)ip0 - ethernet_buffer_header_size(b0);
+
+ clib_memcpy(client_src_mac, e_h0->src_address, 6);
+
+ switch (h0->u.msg_type) {
+ case DHCPV6_MSG_SOLICIT:
+ case DHCPV6_MSG_REQUEST:
+ case DHCPV6_MSG_CONFIRM:
+ case DHCPV6_MSG_RENEW:
+ case DHCPV6_MSG_REBIND:
+ case DHCPV6_MSG_RELEASE:
+ case DHCPV6_MSG_DECLINE:
+ case DHCPV6_MSG_INFORMATION_REQUEST:
+ case DHCPV6_MSG_RELAY_FORW:
+ /* send to server */
+ break;
+ case DHCPV6_MSG_RELAY_REPL:
+ /* send to client */
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT;
+ error0 = 0;
+ pkts_to_client++;
+ goto do_enqueue;
+ default:
+ /* drop the packet */
+ pkts_wrong_msg_type++;
+ error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ goto do_trace;
+
+ }
+
+ /* Send to DHCPV6 server via the configured FIB */
+ rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_idx = im->mfib_index_by_sw_if_index [rx_sw_if_index];
+ proxy = dhcp_get_proxy(dpm, rx_fib_idx, FIB_PROTOCOL_IP6);
+
+ if (PREDICT_FALSE (NULL == proxy))
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_server++;
+ goto do_trace;
+ }
+
+ server = &proxy->dhcp_servers[0];
+ server_fib_idx = server->server_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx;
+
+
+ /* relay-option header pointer */
+ vlib_buffer_advance(b0, -(sizeof(*fwd_opt)));
+ fwd_opt = vlib_buffer_get_current(b0);
+ /* relay message header pointer */
+ vlib_buffer_advance(b0, -(sizeof(*r1)));
+ r1 = vlib_buffer_get_current(b0);
+
+ vlib_buffer_advance(b0, -(sizeof(*u1)));
+ u1 = vlib_buffer_get_current(b0);
+
+ vlib_buffer_advance(b0, -(sizeof(*ip1)));
+ ip1 = vlib_buffer_get_current(b0);
+
+ /* fill in all that rubbish... */
+ len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t);
+ copy_ip6_address(&r1->peer_addr, &ip0->src_address);
+
+ r1->msg_type = DHCPV6_MSG_RELAY_FORW;
+ fwd_opt->length = clib_host_to_net_u16(len);
+ fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG);
+
+ r1->hop_count++;
+ r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count;
+
+ if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT))
+ {
+ error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_exceeding_max_hop++;
+ goto do_trace;
+ }
+
+
+ /* If relay-fwd and src address is site or global unicast address */
+ if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW &&
+ ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 ||
+ (ip0->src_address.as_u8[0] & 0xfe) == 0xfc))
+ {
+ /* Set link address to zero */
+ r1->link_addr.as_u64[0] = 0;
+ r1->link_addr.as_u64[1] = 0;
+ goto link_address_set;
+ }
+
+ /* if receiving interface is unnumbered, use receiving interface
+ * IP address as link address, otherwise use the loopback interface
+ * IP address as link address.
+ */
+
+ swif = vnet_get_sw_interface (vnm, rx_sw_if_index);
+ if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ sw_if_index = swif->unnumbered_sw_if_index;
+
+ ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index);
+ if (ia0 == 0)
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_interface_address++;
+ goto do_trace;
+ }
+
+ copy_ip6_address(&r1->link_addr, ia0);
+
+ link_address_set:
+ fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0));
+
+ if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac))
+ > fl->n_data_bytes)
+ {
+ error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_too_big++;
+ goto do_trace;
+ }
+
+ id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length);
+ b0->current_length += (sizeof (*id1));
+
+ id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID);
+ id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index));
+ id1->int_idx = clib_host_to_net_u32(rx_sw_if_index);
+
+ u1->length =0;
+ if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW)
+ {
+ cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length);
+ b0->current_length += (sizeof (*cmac));
+ cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) -
+ sizeof(cmac->opt));
+ cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS);
+ cmac->link_type = clib_host_to_net_u16(1); // ethernet
+ clib_memcpy(cmac->data, client_src_mac, 6);
+ u1->length += sizeof(*cmac);
+ }
+
+ vss = dhcp_get_vss_info(dpm, rx_fib_idx, FIB_PROTOCOL_IP6);
+
+ if (NULL != vss) {
+ vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length);
+ b0->current_length += (sizeof (*vss1));
+ vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) -
+ sizeof(vss1->opt));
+ vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS);
+ vss1->data[0] = 1; // type
+ vss1->data[1] = vss->oui >>16 & 0xff;
+ vss1->data[2] = vss->oui >>8 & 0xff;
+ vss1->data[3] = vss->oui & 0xff;
+ vss1->data[4] = vss->fib_id >> 24 & 0xff;
+ vss1->data[5] = vss->fib_id >> 16 & 0xff;
+ vss1->data[6] = vss->fib_id >> 8 & 0xff;
+ vss1->data[7] = vss->fib_id & 0xff;
+ u1->length += sizeof(*vss1);
+ }
+
+ pkts_to_server++;
+ u1->checksum = 0;
+ u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client);
+ u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server);
+
+ u1->length =
+ clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) +
+ sizeof(*r1) + sizeof(*fwd_opt) +
+ sizeof(*u1) + sizeof(*id1) + u1->length);
+
+ memset(ip1, 0, sizeof(*ip1));
+ ip1->ip_version_traffic_class_and_flow_label = 0x60;
+ ip1->payload_length = u1->length;
+ ip1->protocol = PROTO_UDP;
+ ip1->hop_limit = HOP_COUNT_LIMIT;
+ src = ((server->dhcp_server.ip6.as_u64[0] ||
+ server->dhcp_server.ip6.as_u64[1]) ?
+ &server->dhcp_server.ip6 :
+ &all_dhcpv6_server_address);
+ copy_ip6_address(&ip1->dst_address, src);
+
+
+ ia0 = ip6_interface_first_global_or_site_address
+ (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+ src = (proxy->dhcp_src_address.ip6.as_u64[0] ||
+ proxy->dhcp_src_address.ip6.as_u64[1]) ?
+ &proxy->dhcp_src_address.ip6 : ia0;
+ if (ia0 == 0)
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS;
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+ pkts_no_src_address++;
+ goto do_trace;
+ }
+
+ copy_ip6_address (&ip1->src_address, src);
+
+
+ u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1,
+ &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP;
+
+ is_solicit = (DHCPV6_MSG_SOLICIT == h0->u.msg_type);
+
+ /*
+ * If we have multiple servers configured and this is the
+ * client's discover message, then send copies to each of
+ * those servers
+ */
+ if (is_solicit && vec_len(proxy->dhcp_servers) > 1)
+ {
+ u32 ii;
+
+ for (ii = 1; ii < vec_len(proxy->dhcp_servers); ii++)
+ {
+ vlib_buffer_t *c0;
+ u32 ci0;
+
+ c0 = vlib_buffer_copy(vm, b0);
+ ci0 = vlib_get_buffer_index(vm, c0);
+ server = &proxy->dhcp_servers[ii];
+
+ ip0 = vlib_buffer_get_current (c0);
+
+ src = ((server->dhcp_server.ip6.as_u64[0] ||
+ server->dhcp_server.ip6.as_u64[1]) ?
+ &server->dhcp_server.ip6 :
+ &all_dhcpv6_server_address);
+ copy_ip6_address(&ip1->dst_address, src);
+
+ to_next[0] = ci0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ ci0, next0);
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcpv6_proxy_trace_t *tr;
+
+ tr = vlib_add_trace (vm, node, c0, sizeof (*tr));
+ tr->which = 0; /* to server */
+ tr->error = error0;
+ tr->original_sw_if_index = rx_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ if (next0 == DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP)
+ copy_ip6_address((ip6_address_t *)&tr->packet_data[0],
+ &server->dhcp_server.ip6);
+ }
+
+ if (PREDICT_FALSE(0 == n_left_to_next))
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ }
+ }
+ }
+
+ do_trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->which = 0; /* to server */
+ tr->error = error0;
+ tr->original_sw_if_index = rx_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0)
+ copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &server->dhcp_server.ip6);
+ }
+
+ do_enqueue:
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT,
+ pkts_to_client);
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_RELAY_TO_SERVER,
+ pkts_to_server);
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS,
+ pkts_no_interface_address);
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE,
+ pkts_wrong_msg_type);
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS,
+ pkts_no_src_address);
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index,
+ DHCPV6_PROXY_ERROR_PKT_TOO_BIG,
+ pkts_too_big);
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = {
+ .function = dhcpv6_proxy_to_server_input,
+ .name = "dhcpv6-proxy-to-server",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = DHCPV6_PROXY_N_ERROR,
+ .error_strings = dhcpv6_proxy_error_strings,
+
+ .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n,
+ foreach_dhcpv6_proxy_to_server_input_next
+#undef _
+ },
+
+ .format_buffer = format_dhcpv6_proxy_header_with_length,
+ .format_trace = format_dhcpv6_proxy_trace,
+#if 0
+ .unformat_buffer = unformat_dhcpv6_proxy_header,
+#endif
+};
+
+static uword
+dhcpv6_proxy_to_client_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+
+ u32 n_left_from, * from;
+ ethernet_main_t *em = ethernet_get_main (vm);
+ dhcp_proxy_main_t * dm = &dhcp_proxy_main;
+ dhcp_proxy_t *proxy;
+ dhcp_server_t *server;
+ vnet_main_t * vnm = vnet_get_main();
+ int bogus_length;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ udp_header_t * u0, *u1=0;
+ dhcpv6_relay_hdr_t * h0;
+ ip6_header_t * ip1 = 0, *ip0;
+ ip6_address_t _ia0, * ia0 = &_ia0;
+ ip6_address_t client_address;
+ ethernet_interface_t *ei0;
+ ethernet_header_t *mac0;
+ vnet_hw_interface_t *hi0;
+ vlib_frame_t *f0;
+ u32 * to_next0;
+ u32 sw_if_index = ~0;
+ u32 original_sw_if_index = ~0;
+ vnet_sw_interface_t *si0;
+ u32 error0 = (u32)~0;
+ vnet_sw_interface_t *swif;
+ dhcpv6_option_t *r0 = 0, *o;
+ u16 len = 0;
+ u8 interface_opt_flag = 0;
+ u8 relay_msg_opt_flag = 0;
+ ip6_main_t * im = &ip6_main;
+ u32 server_fib_idx, client_fib_idx;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ if (DHCPV6_MSG_RELAY_REPL != h0->msg_type)
+ {
+ error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE;
+
+ drop_packet:
+ vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index,
+ error0, 1);
+
+ f0 = vlib_get_frame_to_node (vm, dm->error_drop_node_index);
+ to_next0 = vlib_frame_vector_args (f0);
+ to_next0[0] = bi0;
+ f0->n_vectors = 1;
+ vlib_put_frame_to_node (vm, dm->error_drop_node_index, f0);
+ goto do_trace;
+ }
+ /* hop count seems not need to be checked */
+ if (HOP_COUNT_LIMIT < h0->hop_count)
+ {
+ error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS;
+ goto drop_packet;
+ }
+ u0 = (void *)h0 -(sizeof(*u0));
+ ip0 = (void *)u0 -(sizeof(*ip0));
+
+ vlib_buffer_advance (b0, sizeof(*h0));
+ o = vlib_buffer_get_current (b0);
+
+ /* Parse through TLVs looking for option 18 (DHCPV6_OPTION_INTERFACE_ID)
+ _and_ option 9 (DHCPV6_OPTION_RELAY_MSG) option which must be there.
+ Currently assuming no other options need to be processed
+ The interface-ID is the FIB number we need
+ to track down the client-facing interface */
+
+ while ((u8 *) o < (b0->data + b0->current_data + b0->current_length))
+ {
+ if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option))
+ {
+ interface_opt_flag = 1;
+ if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index))
+ sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx);
+ if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index))
+ {
+ error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION;
+ goto drop_packet;
+ }
+ }
+ if (DHCPV6_OPTION_RELAY_MSG == clib_net_to_host_u16(o->option))
+ {
+ relay_msg_opt_flag = 1;
+ r0 = vlib_buffer_get_current (b0);
+ }
+ if ((relay_msg_opt_flag == 1) && (interface_opt_flag == 1))
+ break;
+ vlib_buffer_advance (b0, sizeof(*o) + clib_net_to_host_u16(o->length));
+ o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o));
+ }
+
+ if ((relay_msg_opt_flag == 0) || (r0 == 0))
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_RELAY_MESSAGE_OPTION;
+ goto drop_packet;
+ }
+
+ if ((u32)~0 == sw_if_index)
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION;
+ goto drop_packet;
+ }
+
+ //Advance buffer to start of encapsulated DHCPv6 message
+ vlib_buffer_advance (b0, sizeof(*r0));
+
+ client_fib_idx = im->mfib_index_by_sw_if_index[sw_if_index];
+ proxy = dhcp_get_proxy(dm, client_fib_idx, FIB_PROTOCOL_IP6);
+
+ if (NULL == proxy)
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
+ goto drop_packet;
+ }
+
+ server_fib_idx = im->fib_index_by_sw_if_index
+ [vnet_buffer(b0)->sw_if_index[VLIB_RX]];
+
+ vec_foreach(server, proxy->dhcp_servers)
+ {
+ if (server_fib_idx == server->server_fib_index &&
+ ip0->src_address.as_u64[0] == server->dhcp_server.ip6.as_u64[0] &&
+ ip0->src_address.as_u64[1] == server->dhcp_server.ip6.as_u64[1])
+ {
+ goto server_found;
+ }
+ }
+
+ //drop packet if not from server with configured address or FIB
+ error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS;
+ goto drop_packet;
+
+ server_found:
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index
+ = sw_if_index;
+
+ swif = vnet_get_sw_interface (vnm, original_sw_if_index);
+ if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ sw_if_index = swif->unnumbered_sw_if_index;
+
+
+ /*
+ * udp_local hands us the DHCPV6 header, need udp hdr,
+ * ip hdr to relay to client
+ */
+ vlib_buffer_advance (b0, -(sizeof(*u1)));
+ u1 = vlib_buffer_get_current (b0);
+
+ vlib_buffer_advance (b0, -(sizeof(*ip1)));
+ ip1 = vlib_buffer_get_current (b0);
+
+ copy_ip6_address(&client_address, &h0->peer_addr);
+
+ ia0 = ip6_interface_first_address (&ip6_main, sw_if_index);
+ if (ia0 == 0)
+ {
+ error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS;
+ goto drop_packet;
+ }
+
+ len = clib_net_to_host_u16(r0->length);
+ memset(ip1, 0, sizeof(*ip1));
+ copy_ip6_address(&ip1->dst_address, &client_address);
+ u1->checksum = 0;
+ u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server);
+ u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client);
+ u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t));
+
+ ip1->ip_version_traffic_class_and_flow_label =
+ ip0->ip_version_traffic_class_and_flow_label &
+ 0x00000fff;
+ ip1->payload_length = u1->length;
+ ip1->protocol = PROTO_UDP;
+ ip1->hop_limit = HOP_COUNT_LIMIT;
+ copy_ip6_address(&ip1->src_address, ia0);
+
+ u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1,
+ &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ vlib_buffer_advance (b0, -(sizeof(ethernet_header_t)));
+ si0 = vnet_get_sw_interface (vnm, original_sw_if_index);
+ if (si0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ vlib_buffer_advance (b0, -4 /* space for VLAN tag */);
+
+ mac0 = vlib_buffer_get_current (b0);
+
+ hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index);
+ ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance);
+ clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address));
+ memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address));
+ mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ?
+ clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd);
+
+ if (si0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ {
+ u32 * vlan_tag = (u32 *)(mac0+1);
+ u32 tmp;
+ tmp = (si0->sub.id << 16) | 0x0800;
+ *vlan_tag = clib_host_to_net_u32 (tmp);
+ }
+
+ /* $$$ consider adding a dynamic next to the graph node, for performance */
+ f0 = vlib_get_frame_to_node (vm, hi0->output_node_index);
+ to_next0 = vlib_frame_vector_args (f0);
+ to_next0[0] = bi0;
+ f0->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi0->output_node_index, f0);
+
+ do_trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->which = 1; /* to client */
+ if (ia0)
+ copy_ip6_address((ip6_address_t*)tr->packet_data, ia0);
+ tr->error = error0;
+ tr->original_sw_if_index = original_sw_if_index;
+ tr->sw_if_index = sw_if_index;
+ }
+ }
+ return from_frame->n_vectors;
+
+}
+
+VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = {
+ .function = dhcpv6_proxy_to_client_input,
+ .name = "dhcpv6-proxy-to-client",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = DHCPV6_PROXY_N_ERROR,
+ .error_strings = dhcpv6_proxy_error_strings,
+ .format_buffer = format_dhcpv6_proxy_header_with_length,
+ .format_trace = format_dhcpv6_proxy_trace,
+#if 0
+ .unformat_buffer = unformat_dhcpv6_proxy_header,
+#endif
+};
+
+static clib_error_t *
+dhcp6_proxy_init (vlib_main_t * vm)
+{
+ dhcp_proxy_main_t * dm = &dhcp_proxy_main;
+ vlib_node_t * error_drop_node;
+
+ error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
+ dm->error_drop_node_index = error_drop_node->index;
+
+ /* RFC says this is the dhcpv6 server address */
+ all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000);
+ all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003);
+
+ /* RFC says this is the server and agent address */
+ all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000);
+ all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002);
+
+ udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client,
+ dhcpv6_proxy_to_client_node.index, 0 /* is_ip6 */);
+
+ udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server,
+ dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (dhcp6_proxy_init);
+
+int
+dhcp6_proxy_set_server (ip46_address_t *addr,
+ ip46_address_t *src_addr,
+ u32 rx_table_id,
+ u32 server_table_id,
+ int is_del)
+{
+ u32 rx_fib_index = 0;
+ int rc = 0;
+
+ const mfib_prefix_t all_dhcp_servers = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6 = all_dhcpv6_server_relay_agent_address,
+ }
+ };
+
+ if (ip46_address_is_zero(addr))
+ return VNET_API_ERROR_INVALID_DST_ADDRESS;
+
+ if (ip46_address_is_zero(src_addr))
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
+
+ rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
+ rx_table_id,
+ MFIB_SOURCE_DHCP);
+
+ if (is_del)
+ {
+ if (dhcp_proxy_server_del (FIB_PROTOCOL_IP6, rx_fib_index,
+ addr, server_table_id))
+ {
+ mfib_table_entry_delete(rx_fib_index,
+ &all_dhcp_servers,
+ MFIB_SOURCE_DHCP);
+ mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
+ }
+ }
+ else
+ {
+ const fib_route_path_t path_for_us = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ if (dhcp_proxy_server_add (FIB_PROTOCOL_IP6, addr, src_addr,
+ rx_fib_index, server_table_id))
+ {
+ mfib_table_entry_path_update(rx_fib_index,
+ &all_dhcp_servers,
+ MFIB_SOURCE_DHCP,
+ &path_for_us,
+ MFIB_ITF_FLAG_FORWARD);
+ /*
+ * Each interface that is enabled in this table, needs to be added
+ * as an accepting interface, but this is not easily doable in VPP.
+ * So we cheat. Add a flag to the entry that indicates accept form
+ * any interface.
+ * We will still only accept on v6 enabled interfaces, since the
+ * input feature ensures this.
+ */
+ mfib_table_entry_update(rx_fib_index,
+ &all_dhcp_servers,
+ MFIB_SOURCE_DHCP,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+ mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
+ }
+ }
+
+ mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
+
+ return (rc);
+}
+
+static clib_error_t *
+dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip46_address_t addr, src_addr;
+ int set_server = 0, set_src_address = 0;
+ u32 rx_table_id = 0, server_table_id = 0;
+ int is_del = 0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "server %U",
+ unformat_ip6_address, &addr.ip6))
+ set_server = 1;
+ else if (unformat(input, "src-address %U",
+ unformat_ip6_address, &src_addr.ip6))
+ set_src_address =1;
+ else if (unformat (input, "server-fib-id %d", &server_table_id))
+ ;
+ else if (unformat (input, "rx-fib-id %d", &rx_table_id))
+ ;
+ else if (unformat (input, "delete") ||
+ unformat (input, "del"))
+ is_del = 1;
+ else
+ break;
+ }
+
+ if (is_del || (set_server && set_src_address))
+ {
+ int rv;
+
+ rv = dhcp6_proxy_set_server (&addr, &src_addr, rx_table_id,
+ server_table_id, is_del);
+
+ //TODO: Complete the errors
+ switch (rv)
+ {
+ case 0:
+ return 0;
+
+ case VNET_API_ERROR_INVALID_DST_ADDRESS:
+ return clib_error_return (0, "Invalid server address");
+
+ case VNET_API_ERROR_INVALID_SRC_ADDRESS:
+ return clib_error_return (0, "Invalid src address");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return
+ (0, "Fib id %d: no per-fib DHCP server configured", rx_table_id);
+
+ default:
+ return clib_error_return (0, "BUG: rv %d", rv);
+ }
+ }
+ else
+ return clib_error_return (0, "parse error`%U'",
+ format_unformat_error, input);
+}
+
+VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = {
+ .path = "set dhcpv6 proxy",
+ .short_help = "set dhcpv6 proxy [del] server <ipv6-addr> src-address <ipv6-addr> "
+ "[server-fib-id <fib-id>] [rx-fib-id <fib-id>] ",
+ .function = dhcpv6_proxy_set_command_fn,
+};
+
+static u8 *
+format_dhcp6_proxy_server (u8 * s, va_list * args)
+{
+ dhcp_proxy_t * proxy = va_arg (*args, dhcp_proxy_t *);
+ fib_table_t *server_fib;
+ dhcp_server_t *server;
+ ip6_mfib_t *rx_fib;
+
+ if (proxy == 0)
+ {
+ s = format (s, "%=14s%=16s%s", "RX FIB", "Src Address",
+ "Servers FIB,Address");
+ return s;
+ }
+
+ rx_fib = ip6_mfib_get(proxy->rx_fib_index);
+
+ s = format (s, "%=14u%=16U",
+ rx_fib->table_id,
+ format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY);
+
+ vec_foreach(server, proxy->dhcp_servers)
+ {
+ server_fib = fib_table_get(server->server_fib_index,
+ FIB_PROTOCOL_IP6);
+ s = format (s, "%u,%U ",
+ server_fib->ft_table_id,
+ format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY);
+ }
+
+ return s;
+}
+
+static int
+dhcp6_proxy_show_walk (dhcp_proxy_t *proxy,
+ void *ctx)
+{
+ vlib_main_t * vm = ctx;
+
+ vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, proxy);
+
+ return (1);
+}
+
+static clib_error_t *
+dhcpv6_proxy_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, NULL /* header line */);
+
+ dhcp_proxy_walk(FIB_PROTOCOL_IP6, dhcp6_proxy_show_walk, vm);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = {
+ .path = "show dhcpv6 proxy",
+ .short_help = "Display dhcpv6 proxy info",
+ .function = dhcpv6_proxy_show_command_fn,
+};
+
+static clib_error_t *
+dhcpv6_vss_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0, got_new_vss=0;
+ u32 oui=0;
+ u32 fib_id=0, tbl_id=~0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "oui %d", &oui))
+ got_new_vss = 1;
+ else if (unformat (input, "vpn-id %d", &fib_id))
+ got_new_vss = 1;
+ else if (unformat (input, "table %d", &tbl_id))
+ got_new_vss = 1;
+ else if (unformat(input, "delete") || unformat(input, "del"))
+ is_del = 1;
+ else
+ break;
+ }
+
+ if (tbl_id ==~0)
+ return clib_error_return (0, "no table ID specified.");
+
+ if (is_del || got_new_vss)
+ {
+ int rv;
+
+ rv = dhcp_proxy_set_vss(FIB_PROTOCOL_IP6, tbl_id, oui, fib_id, is_del);
+ switch (rv)
+ {
+ case 0:
+ return 0;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.",
+ oui, fib_id, tbl_id);
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "vss for table %d not found in pool.",
+ tbl_id);
+
+ default:
+ return clib_error_return (0, "BUG: rv %d", rv);
+ }
+ }
+ else
+ return clib_error_return (0, "parse error`%U'",
+ format_unformat_error, input);
+
+}
+
+VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = {
+ .path = "set dhcpv6 vss",
+ .short_help = "set dhcpv6 vss table <table-id> oui <oui> vpn-idx <vpn-idx>",
+ .function = dhcpv6_vss_command_fn,
+};
+
+static clib_error_t *
+dhcpv6_vss_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+
+{
+ dhcp_vss_walk(FIB_PROTOCOL_IP6, dhcp_vss_show_walk, vm);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = {
+ .path = "show dhcpv6 vss",
+ .short_help = "show dhcpv6 VSS",
+ .function = dhcpv6_vss_show_command_fn,
+};
+
+static clib_error_t *
+dhcpv6_link_address_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+
+{
+ vnet_main_t *vnm = vnet_get_main();
+ u32 sw_if_index0=0, sw_if_index;
+ vnet_sw_interface_t *swif;
+ ip6_address_t *ia0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat(input, "%U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index0))
+ {
+ swif = vnet_get_sw_interface (vnm, sw_if_index0);
+ sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ?
+ swif->unnumbered_sw_if_index : sw_if_index0;
+ ia0 = ip6_interface_first_address(&ip6_main, sw_if_index);
+ if (ia0)
+ {
+ vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address");
+
+ vlib_cli_output (vm, "%=20U%=48U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,
+ format_ip6_address, ia0);
+ } else
+ vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ } else
+ break;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = {
+ .path = "show dhcpv6 link-address interface",
+ .short_help = "show dhcpv6 link-address interface <interface>",
+ .function = dhcpv6_link_address_show_command_fn,
+};
diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c
new file mode 100644
index 00000000..d6984f2d
--- /dev/null
+++ b/src/vnet/dhcp/dhcp_api.c
@@ -0,0 +1,290 @@
+/*
+ *------------------------------------------------------------------
+ * dhcp_api.c - dhcp api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/dhcp/client.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \
+_(DHCP_PROXY_DUMP,dhcp_proxy_dump) \
+_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \
+_(DHCP_CLIENT_CONFIG, dhcp_client_config)
+
+
+static void
+vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp)
+{
+ vl_api_dhcp_proxy_set_vss_reply_t *rmp;
+ int rv;
+
+ rv = dhcp_proxy_set_vss ((mp->is_ipv6 ?
+ FIB_PROTOCOL_IP6 :
+ FIB_PROTOCOL_IP4),
+ ntohl (mp->tbl_id),
+ ntohl (mp->oui),
+ ntohl (mp->fib_id), (int) mp->is_add == 0);
+
+ REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY);
+}
+
+
+static void vl_api_dhcp_proxy_config_t_handler
+ (vl_api_dhcp_proxy_config_t * mp)
+{
+ vl_api_dhcp_proxy_set_vss_reply_t *rmp;
+ ip46_address_t src, server;
+ int rv = -1;
+
+ if (mp->is_ipv6)
+ {
+ clib_memcpy (&src.ip6, mp->dhcp_src_address, sizeof (src.ip6));
+ clib_memcpy (&server.ip6, mp->dhcp_server, sizeof (server.ip6));
+
+ rv = dhcp6_proxy_set_server (&server,
+ &src,
+ (u32) ntohl (mp->rx_vrf_id),
+ (u32) ntohl (mp->server_vrf_id),
+ (int) (mp->is_add == 0));
+ }
+ else
+ {
+ ip46_address_reset (&src);
+ ip46_address_reset (&server);
+
+ clib_memcpy (&src.ip4, mp->dhcp_src_address, sizeof (src.ip4));
+ clib_memcpy (&server.ip4, mp->dhcp_server, sizeof (server.ip4));
+
+ rv = dhcp4_proxy_set_server (&server,
+ &src,
+ (u32) ntohl (mp->rx_vrf_id),
+ (u32) ntohl (mp->server_vrf_id),
+ (int) (mp->is_add == 0));
+ }
+
+
+ REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY);
+}
+
+static void
+vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ dhcp_proxy_dump ((mp->is_ip6 == 1 ?
+ FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), q, mp->context);
+}
+
+void
+dhcp_send_details (fib_protocol_t proto,
+ void *opaque, u32 context, dhcp_proxy_t * proxy)
+{
+ vl_api_dhcp_proxy_details_t *mp;
+ unix_shared_memory_queue_t *q = opaque;
+ vl_api_dhcp_server_t *v_server;
+ dhcp_server_t *server;
+ fib_table_t *s_fib;
+ dhcp_vss_t *vss;
+ u32 count;
+ size_t n;
+
+ count = vec_len (proxy->dhcp_servers);
+ n = sizeof (*mp) + (count * sizeof (vl_api_dhcp_server_t));
+ mp = vl_msg_api_alloc (n);
+ if (!mp)
+ return;
+ memset (mp, 0, n);
+ mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_DETAILS);
+ mp->context = context;
+ mp->count = count;
+
+ mp->is_ipv6 = (proto == FIB_PROTOCOL_IP6);
+ mp->rx_vrf_id =
+ htonl (dhcp_proxy_rx_table_get_table_id (proto, proxy->rx_fib_index));
+
+ vss = dhcp_get_vss_info (&dhcp_proxy_main, proxy->rx_fib_index, proto);
+
+ if (NULL != vss)
+ {
+ mp->vss_oui = htonl (vss->oui);
+ mp->vss_fib_id = htonl (vss->fib_id);
+ }
+
+ vec_foreach_index (count, proxy->dhcp_servers)
+ {
+ server = &proxy->dhcp_servers[count];
+ v_server = &mp->servers[count];
+
+ s_fib = fib_table_get (server->server_fib_index, proto);
+
+ v_server->server_vrf_id = htonl (s_fib->ft_table_id);
+
+ if (mp->is_ipv6)
+ {
+ memcpy (v_server->dhcp_server, &server->dhcp_server.ip6, 16);
+ }
+ else
+ {
+ /* put the address in the first bytes */
+ memcpy (v_server->dhcp_server, &server->dhcp_server.ip4, 4);
+ }
+ }
+
+ if (mp->is_ipv6)
+ {
+ memcpy (mp->dhcp_src_address, &proxy->dhcp_src_address.ip6, 16);
+ }
+ else
+ {
+ /* put the address in the first bytes */
+ memcpy (mp->dhcp_src_address, &proxy->dhcp_src_address.ip4, 4);
+ }
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+void
+dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname,
+ u8 mask_width, u8 is_ipv6, u8 * host_address,
+ u8 * router_address, u8 * host_mac)
+{
+ unix_shared_memory_queue_t *q;
+ vl_api_dhcp_compl_event_t *mp;
+ u32 len;
+
+ q = vl_api_client_index_to_input_queue (client_index);
+ if (!q)
+ return;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ mp->client_index = client_index;
+ mp->pid = pid;
+ mp->is_ipv6 = is_ipv6;
+ len = (vec_len (hostname) < 63) ? vec_len (hostname) : 63;
+ clib_memcpy (&mp->hostname, hostname, len);
+ mp->hostname[len] = 0;
+ mp->mask_width = mask_width;
+ clib_memcpy (&mp->host_address[0], host_address, 16);
+ clib_memcpy (&mp->router_address[0], router_address, 16);
+
+ if (NULL != host_mac)
+ clib_memcpy (&mp->host_mac[0], host_mac, 6);
+
+ mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void vl_api_dhcp_client_config_t_handler
+ (vl_api_dhcp_client_config_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dhcp_client_config_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = dhcp_client_config (vm, ntohl (mp->sw_if_index),
+ mp->hostname, mp->client_id,
+ mp->is_add, mp->client_index,
+ mp->want_dhcp_event ? dhcp_compl_event_callback :
+ NULL, mp->pid);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY);
+}
+
+/*
+ * dhcp_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_dhcp;
+#undef _
+}
+
+static clib_error_t *
+dhcp_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (dhcp_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c
new file mode 100644
index 00000000..1784906b
--- /dev/null
+++ b/src/vnet/dhcp/dhcp_proxy.c
@@ -0,0 +1,351 @@
+/*
+ * proxy_node.c: common dhcp v4 and v6 proxy node processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+
+/**
+ * @brief Shard 4/6 instance of DHCP main
+ */
+dhcp_proxy_main_t dhcp_proxy_main;
+
+static void
+dhcp_proxy_rx_table_lock (fib_protocol_t proto,
+ u32 fib_index)
+{
+ if (FIB_PROTOCOL_IP4 == proto)
+ fib_table_lock(fib_index, proto, FIB_SOURCE_DHCP);
+ else
+ mfib_table_lock(fib_index, proto, MFIB_SOURCE_DHCP);
+}
+
+static void
+dhcp_proxy_rx_table_unlock (fib_protocol_t proto,
+ u32 fib_index)
+{
+ if (FIB_PROTOCOL_IP4 == proto)
+ fib_table_unlock(fib_index, proto, FIB_SOURCE_DHCP);
+ else
+ mfib_table_unlock(fib_index, proto, MFIB_SOURCE_DHCP);
+}
+
+ u32
+dhcp_proxy_rx_table_get_table_id (fib_protocol_t proto,
+ u32 fib_index)
+{
+ if (FIB_PROTOCOL_IP4 == proto)
+ {
+ fib_table_t *fib;
+
+ fib = fib_table_get(fib_index, proto);
+
+ return (fib->ft_table_id);
+ }
+ else
+ {
+ mfib_table_t *mfib;
+
+ mfib = mfib_table_get(fib_index, proto);
+
+ return (mfib->mft_table_id);
+ }
+}
+
+void
+dhcp_proxy_walk (fib_protocol_t proto,
+ dhcp_proxy_walk_fn_t fn,
+ void *ctx)
+{
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ dhcp_proxy_t * server;
+ u32 server_index, i;
+
+ vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index[proto])
+ {
+ server_index = dpm->dhcp_server_index_by_rx_fib_index[proto][i];
+ if (~0 == server_index)
+ continue;
+
+ server = pool_elt_at_index (dpm->dhcp_servers[proto], server_index);
+
+ if (!fn(server, ctx))
+ break;
+ }
+}
+
+void
+dhcp_vss_walk (fib_protocol_t proto,
+ dhcp_vss_walk_fn_t fn,
+ void *ctx)
+{
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ mfib_table_t *mfib;
+ dhcp_vss_t * vss;
+ u32 vss_index, i;
+ fib_table_t *fib;
+
+ vec_foreach_index (i, dpm->vss_index_by_rx_fib_index[proto])
+ {
+ vss_index = dpm->vss_index_by_rx_fib_index[proto][i];
+ if (~0 == vss_index)
+ continue;
+
+ vss = pool_elt_at_index (dpm->vss[proto], vss_index);
+
+ if (FIB_PROTOCOL_IP4 == proto)
+ {
+ fib = fib_table_get(i, proto);
+
+ if (!fn(vss, fib->ft_table_id, ctx))
+ break;
+ }
+ else
+ {
+ mfib = mfib_table_get(i, proto);
+
+ if (!fn(vss, mfib->mft_table_id, ctx))
+ break;
+ }
+ }
+}
+
+static u32
+dhcp_proxy_server_find (dhcp_proxy_t *proxy,
+ fib_protocol_t proto,
+ ip46_address_t *addr,
+ u32 server_table_id)
+{
+ dhcp_server_t *server;
+ u32 ii, fib_index;
+
+ vec_foreach_index(ii, proxy->dhcp_servers)
+ {
+ server = &proxy->dhcp_servers[ii];
+ fib_index = fib_table_find(proto, server_table_id);
+
+ if (ip46_address_is_equal(&server->dhcp_server,
+ addr) &&
+ (server->server_fib_index == fib_index))
+ {
+ return (ii);
+ }
+ }
+ return (~0);
+}
+
+int
+dhcp_proxy_server_del (fib_protocol_t proto,
+ u32 rx_fib_index,
+ ip46_address_t *addr,
+ u32 server_table_id)
+{
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ dhcp_proxy_t *proxy = 0;
+
+ proxy = dhcp_get_proxy(dpm, rx_fib_index, proto);
+
+ if (NULL != proxy)
+ {
+ dhcp_server_t *server;
+ u32 index;
+
+ index = dhcp_proxy_server_find(proxy, proto, addr, server_table_id);
+
+ if (~0 != index)
+ {
+ server = &proxy->dhcp_servers[index];
+ fib_table_unlock (server->server_fib_index, proto, FIB_SOURCE_DHCP);
+
+ vec_del1(proxy->dhcp_servers, index);
+
+ if (0 == vec_len(proxy->dhcp_servers))
+ {
+ /* no servers left, delete the proxy config */
+ dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = ~0;
+ vec_free(proxy->dhcp_servers);
+ pool_put (dpm->dhcp_servers[proto], proxy);
+ return (1);
+ }
+ }
+ }
+
+ /* the proxy still exists */
+ return (0);
+}
+
+int
+dhcp_proxy_server_add (fib_protocol_t proto,
+ ip46_address_t *addr,
+ ip46_address_t *src_address,
+ u32 rx_fib_index,
+ u32 server_table_id)
+{
+ dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+ dhcp_proxy_t * proxy = 0;
+ int new = 0;
+
+ proxy = dhcp_get_proxy(dpm, rx_fib_index, proto);
+
+ if (NULL == proxy)
+ {
+ vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index[proto],
+ rx_fib_index,
+ ~0);
+
+ pool_get (dpm->dhcp_servers[proto], proxy);
+ memset (proxy, 0, sizeof (*proxy));
+ new = 1;
+
+ dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] =
+ proxy - dpm->dhcp_servers[proto];
+
+ proxy->dhcp_src_address = *src_address;
+ proxy->rx_fib_index = rx_fib_index;
+ }
+ else
+ {
+ if (~0 != dhcp_proxy_server_find(proxy, proto, addr, server_table_id))
+ {
+ return (new);
+ }
+ }
+
+ dhcp_server_t server = {
+ .dhcp_server = *addr,
+ .server_fib_index = fib_table_find_or_create_and_lock(proto,
+ server_table_id,
+ FIB_SOURCE_DHCP),
+ };
+
+ vec_add1(proxy->dhcp_servers, server);
+
+ return (new);
+}
+
+typedef struct dhcp4_proxy_dump_walk_ctx_t_
+{
+ fib_protocol_t proto;
+ void *opaque;
+ u32 context;
+} dhcp_proxy_dump_walk_cxt_t;
+
+static int
+dhcp_proxy_dump_walk (dhcp_proxy_t *proxy,
+ void *arg)
+{
+ dhcp_proxy_dump_walk_cxt_t *ctx = arg;
+
+ dhcp_send_details(ctx->proto,
+ ctx->opaque,
+ ctx->context,
+ proxy);
+
+ return (1);
+}
+
+void
+dhcp_proxy_dump (fib_protocol_t proto,
+ void *opaque,
+ u32 context)
+{
+ dhcp_proxy_dump_walk_cxt_t ctx = {
+ .proto = proto,
+ .opaque = opaque,
+ .context = context,
+ };
+ dhcp_proxy_walk(proto, dhcp_proxy_dump_walk, &ctx);
+}
+
+int
+dhcp_vss_show_walk (dhcp_vss_t *vss,
+ u32 rx_table_id,
+ void *ctx)
+{
+ vlib_main_t * vm = ctx;
+
+ vlib_cli_output (vm, "%=6d%=6d%=12d",
+ rx_table_id,
+ vss->oui,
+ vss->fib_id);
+
+ return (1);
+}
+
+int dhcp_proxy_set_vss (fib_protocol_t proto,
+ u32 tbl_id,
+ u32 oui,
+ u32 fib_id,
+ int is_del)
+{
+ dhcp_proxy_main_t *dm = &dhcp_proxy_main;
+ dhcp_vss_t *v = NULL;
+ u32 rx_fib_index;
+ int rc = 0;
+
+ if (proto == FIB_PROTOCOL_IP4)
+ rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id,
+ FIB_SOURCE_DHCP);
+ else
+ rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id,
+ MFIB_SOURCE_DHCP);
+ v = dhcp_get_vss_info(dm, rx_fib_index, proto);
+
+ if (NULL != v)
+ {
+ if (is_del)
+ {
+ /* release the lock held on the table when the VSS
+ * info was created */
+ dhcp_proxy_rx_table_unlock (proto, rx_fib_index);
+
+ pool_put (dm->vss[proto], v);
+ dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = ~0;
+ }
+ else
+ {
+ /* this is a modify */
+ v->fib_id = fib_id;
+ v->oui = oui;
+ }
+ }
+ else
+ {
+ if (is_del)
+ rc = VNET_API_ERROR_NO_SUCH_ENTRY;
+ else
+ {
+ /* create a new entry */
+ vec_validate_init_empty(dm->vss_index_by_rx_fib_index[proto],
+ rx_fib_index, ~0);
+
+ /* hold a lock on the table whilst the VSS info exist */
+ pool_get (dm->vss[proto], v);
+ v->fib_id = fib_id;
+ v->oui = oui;
+
+ dm->vss_index_by_rx_fib_index[proto][rx_fib_index] =
+ v - dm->vss[proto];
+ dhcp_proxy_rx_table_lock (proto, rx_fib_index);
+ }
+ }
+
+ /* Release the lock taken during the create_or_lock at the start */
+ dhcp_proxy_rx_table_unlock (proto, rx_fib_index);
+
+ return (rc);
+}
diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h
new file mode 100644
index 00000000..ef2bc0a1
--- /dev/null
+++ b/src/vnet/dhcp/dhcp_proxy.h
@@ -0,0 +1,286 @@
+/*
+ * dhcp_proxy.h: DHCP v4 & v6 proxy common functions/types
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_dhcp_proxy_h
+#define included_dhcp_proxy_h
+
+#include <vnet/vnet.h>
+#include <vnet/dhcp/dhcp4_packet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+#include <vnet/udp/udp.h>
+
+typedef enum {
+#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n,
+#include <vnet/dhcp/dhcp4_proxy_error.def>
+#undef dhcp_proxy_error
+ DHCP_PROXY_N_ERROR,
+} dhcp_proxy_error_t;
+
+typedef enum {
+#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n,
+#include <vnet/dhcp/dhcp6_proxy_error.def>
+#undef dhcpv6_proxy_error
+ DHCPV6_PROXY_N_ERROR,
+} dhcpv6_proxy_error_t;
+
+
+/**
+ * @brief The Virtual Sub-net Selection information for a given RX FIB
+ */
+typedef struct dhcp_vss_t_ {
+ /**
+ * @brief ?? RFC doesn't say
+ */
+ u32 oui;
+ /**
+ * @brief VPN-ID
+ */
+ u32 fib_id;
+} dhcp_vss_t;
+
+/**
+ * @brief A representation of a single DHCP Server within a given VRF config
+ */
+typedef struct dhcp_server_t_
+{
+ /**
+ * @brief The address of the DHCP server to which to relay the client's
+ * messages
+ */
+ ip46_address_t dhcp_server;
+
+ /**
+ * @brief The FIB index (not the external Table-ID) in which the server
+ * is reachable.
+ */
+ u32 server_fib_index;
+} dhcp_server_t;
+
+/**
+ * @brief A DHCP proxy represenation fpr per-client VRF config
+ */
+typedef struct dhcp_proxy_t_ {
+ /**
+ * @brief The set of DHCP servers to which messages are relayed.
+ * If multiple servers are configured then discover/solict messages
+ * are relayed to each. A cookie is maintained for the relay, and only
+ * one message is replayed to the client, based on the presence of the
+ * cookie.
+ * The expectation is there are only 1 or 2 servers, hence no fancy DB.
+ */
+ dhcp_server_t *dhcp_servers;
+
+ /**
+ * @brief Hash table of pending requets key'd on the clients MAC address
+ */
+ uword *dhcp_pending;
+
+ /**
+ * @brief A lock for the pending request DB.
+ */
+ int lock;
+
+ /**
+ * @brief The source address to use in relayed messaes
+ */
+ ip46_address_t dhcp_src_address;
+
+ /**
+ * @brief The FIB index (not the external Table-ID) in which the client
+ * is resides.
+ */
+ u32 rx_fib_index;
+} dhcp_proxy_t;
+
+#define DHCP_N_PROTOS (FIB_PROTOCOL_IP6 + 1)
+
+/**
+ * @brief Collection of global DHCP proxy data
+ */
+typedef struct {
+ /* Pool of DHCP servers */
+ dhcp_proxy_t *dhcp_servers[DHCP_N_PROTOS];
+
+ /* Pool of selected DHCP server. Zero is the default server */
+ u32 * dhcp_server_index_by_rx_fib_index[DHCP_N_PROTOS];
+
+ /* to drop pkts in server-to-client direction */
+ u32 error_drop_node_index;
+
+ dhcp_vss_t *vss[DHCP_N_PROTOS];
+
+ /* hash lookup specific vrf_id -> option 82 vss suboption */
+ u32 *vss_index_by_rx_fib_index[DHCP_N_PROTOS];
+} dhcp_proxy_main_t;
+
+extern dhcp_proxy_main_t dhcp_proxy_main;
+
+/**
+ * @brief Send the details of a proxy session to the API client during a dump
+ */
+void dhcp_send_details (fib_protocol_t proto,
+ void *opaque,
+ u32 context,
+ dhcp_proxy_t *proxy);
+
+/**
+ * @brief Show (on CLI) a VSS config during a show walk
+ */
+int dhcp_vss_show_walk (dhcp_vss_t *vss,
+ u32 rx_table_id,
+ void *ctx);
+
+/**
+ * @brief Configure/set a new VSS info
+ */
+int dhcp_proxy_set_vss(fib_protocol_t proto,
+ u32 vrf_id,
+ u32 oui,
+ u32 fib_id,
+ int is_del);
+
+/**
+ * @brief Dump the proxy configs to the API
+ */
+void dhcp_proxy_dump(fib_protocol_t proto,
+ void *opaque,
+ u32 context);
+
+/**
+ * @brief Add a new DHCP proxy server configuration.
+ * @return 1 is the config is new,
+ * 0 otherwise (implying a modify of an existing)
+ */
+int dhcp_proxy_server_add(fib_protocol_t proto,
+ ip46_address_t *addr,
+ ip46_address_t *src_address,
+ u32 rx_fib_iindex,
+ u32 server_table_id);
+
+/**
+ * @brief Delete a DHCP proxy config
+ * @return 1 if the proxy is deleted, 0 otherwise
+ */
+int dhcp_proxy_server_del(fib_protocol_t proto,
+ u32 rx_fib_index,
+ ip46_address_t *addr,
+ u32 server_table_id);
+
+u32
+dhcp_proxy_rx_table_get_table_id (fib_protocol_t proto,
+ u32 fib_index);
+
+/**
+ * @brief Callback function invoked for each DHCP proxy entry
+ * return 0 to break the walk, non-zero otherwise.
+ */
+typedef int (*dhcp_proxy_walk_fn_t)(dhcp_proxy_t *server,
+ void *ctx);
+
+/**
+ * @brief Walk/Visit each DHCP proxy server
+ */
+void dhcp_proxy_walk(fib_protocol_t proto,
+ dhcp_proxy_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Callback function invoked for each DHCP VSS entry
+ * return 0 to break the walk, non-zero otherwise.
+ */
+typedef int (*dhcp_vss_walk_fn_t)(dhcp_vss_t *server,
+ u32 rx_table_id,
+ void *ctx);
+
+/**
+ * @brief Walk/Visit each DHCP proxy VSS
+ */
+void dhcp_vss_walk(fib_protocol_t proto,
+ dhcp_vss_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Lock a proxy object to prevent simultaneous access of its
+ * pending store
+ */
+void dhcp_proxy_lock (dhcp_proxy_t *server);
+
+/**
+ * @brief Lock a proxy object to prevent simultaneous access of its
+ * pending store
+ */
+void dhcp_proxy_unlock (dhcp_proxy_t *server);
+
+/**
+ * @brief Get the VSS data for the FIB index
+ */
+static inline dhcp_vss_t *
+dhcp_get_vss_info (dhcp_proxy_main_t *dm,
+ u32 rx_fib_index,
+ fib_protocol_t proto)
+{
+ dhcp_vss_t *v = NULL;
+
+ if (vec_len(dm->vss_index_by_rx_fib_index[proto]) > rx_fib_index &&
+ dm->vss_index_by_rx_fib_index[proto][rx_fib_index] != ~0)
+ {
+ v = pool_elt_at_index (
+ dm->vss[proto],
+ dm->vss_index_by_rx_fib_index[proto][rx_fib_index]);
+ }
+
+ return (v);
+}
+
+/**
+ * @brief Get the DHCP proxy server data for the FIB index
+ */
+static inline dhcp_proxy_t *
+dhcp_get_proxy (dhcp_proxy_main_t *dm,
+ u32 rx_fib_index,
+ fib_protocol_t proto)
+{
+ dhcp_proxy_t *s = NULL;
+
+ if (vec_len(dm->dhcp_server_index_by_rx_fib_index[proto]) > rx_fib_index &&
+ dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] != ~0)
+ {
+ s = pool_elt_at_index (
+ dm->dhcp_servers[proto],
+ dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index]);
+ }
+
+ return (s);
+}
+
+int dhcp6_proxy_set_server (ip46_address_t *addr,
+ ip46_address_t *src_addr,
+ u32 rx_table_id,
+ u32 server_table_id,
+ int is_del);
+int dhcp4_proxy_set_server (ip46_address_t *addr,
+ ip46_address_t *src_addr,
+ u32 rx_table_id,
+ u32 server_table_id,
+ int is_del);
+
+#endif /* included_dhcp_proxy_h */
diff --git a/src/vnet/dir.dox b/src/vnet/dir.dox
new file mode 100644
index 00000000..8bc2c4cf
--- /dev/null
+++ b/src/vnet/dir.dox
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Red Hat and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Interface.
+
+This is a high level directory which contains sub-directories for all the
+networking protocols. Also contained within this directory are more generic
+CLI command files such as interface management.
+
+*/
+/*? %%clicmd:group_label Interface %% ?*/
+/*? %%syscfg:group_label Interface %% ?*/
diff --git a/src/vnet/dpo/classify_dpo.c b/src/vnet/dpo/classify_dpo.c
new file mode 100644
index 00000000..9e7886c9
--- /dev/null
+++ b/src/vnet/dpo/classify_dpo.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+classify_dpo_t *classify_dpo_pool;
+
+static classify_dpo_t *
+classify_dpo_alloc (void)
+{
+ classify_dpo_t *cd;
+
+ pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES);
+ memset(cd, 0, sizeof(*cd));
+
+ return (cd);
+}
+
+static index_t
+classify_dpo_get_index (classify_dpo_t *cd)
+{
+ return (cd - classify_dpo_pool);
+}
+
+index_t
+classify_dpo_create (dpo_proto_t proto,
+ u32 classify_table_index)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_alloc();
+ cd->cd_proto = proto;
+ cd->cd_table_index = classify_table_index;
+
+ return (classify_dpo_get_index(cd));
+}
+
+u8*
+format_classify_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(index);
+
+ return (format(s, "%U-classify:[%d]:table:%d",
+ format_dpo_proto, cd->cd_proto,
+ index, cd->cd_table_index));
+}
+
+static void
+classify_dpo_lock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks++;
+}
+
+static void
+classify_dpo_unlock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks--;
+
+ if (0 == cd->cd_locks)
+ {
+ pool_put(classify_dpo_pool, cd);
+ }
+}
+
+static void
+classify_dpo_mem_show (void)
+{
+ fib_show_memory_usage("Classify",
+ pool_elts(classify_dpo_pool),
+ pool_len(classify_dpo_pool),
+ sizeof(classify_dpo_t));
+}
+
+const static dpo_vft_t cd_vft = {
+ .dv_lock = classify_dpo_lock,
+ .dv_unlock = classify_dpo_unlock,
+ .dv_format = format_classify_dpo,
+ .dv_mem_show = classify_dpo_mem_show,
+};
+
+const static char* const classify_ip4_nodes[] =
+{
+ "ip4-classify",
+ NULL,
+};
+const static char* const classify_ip6_nodes[] =
+{
+ "ip6-classify",
+ NULL,
+};
+const static char* const * const classify_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = classify_ip4_nodes,
+ [DPO_PROTO_IP6] = classify_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+classify_dpo_module_init (void)
+{
+ dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes);
+}
diff --git a/src/vnet/dpo/classify_dpo.h b/src/vnet/dpo/classify_dpo.h
new file mode 100644
index 00000000..48f4b2bf
--- /dev/null
+++ b/src/vnet/dpo/classify_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLASSIFY_DPO_H__
+#define __CLASSIFY_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct classify_dpo_t
+{
+ dpo_proto_t cd_proto;
+
+ u32 cd_table_index;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 cd_locks;
+} classify_dpo_t;
+
+extern index_t classify_dpo_create(dpo_proto_t proto,
+ u32 classify_table_index);
+
+extern u8* format_classify_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern classify_dpo_t *classify_dpo_pool;
+
+static inline classify_dpo_t *
+classify_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(classify_dpo_pool, index));
+}
+
+extern void classify_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
new file mode 100644
index 00000000..bd18b66b
--- /dev/null
+++ b/src/vnet/dpo/dpo.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ *
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/dpo/interface_tx_dpo.h>
+#include <vnet/dpo/mpls_disposition.h>
+
+/**
+ * Array of char* names for the DPO types and protos
+ */
+static const char* dpo_type_names[] = DPO_TYPES;
+static const char* dpo_proto_names[] = DPO_PROTOS;
+
+/**
+ * @brief Vector of virtual function tables for the DPO types
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static dpo_vft_t *dpo_vfts;
+
+/**
+ * @brief vector of graph node names associated with each DPO type and protocol.
+ *
+ * dpo_nodes[child_type][child_proto][node_X] = node_name;
+ * i.e.
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup"
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance"
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static const char* const * const ** dpo_nodes;
+
+/**
+ * @brief Vector of edge indicies from parent DPO nodes to child
+ *
+ * dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge_index
+ *
+ * This array is derived at init time from the dpo_nodes above. Note that
+ * the third dimension in dpo_nodes is lost, hence, the edge index from each
+ * node MUST be the same.
+ * Including both the child and parent protocol is required to support the
+ * case where it changes as the grapth is traversed, most notablly when an
+ * MPLS label is popped.
+ *
+ * Note that this array is child type specific, not child instance specific.
+ */
+static u32 ****dpo_edges;
+
+/**
+ * @brief The DPO type value that can be assigend to the next dynamic
+ * type registration.
+ */
+static dpo_type_t dpo_dynamic = DPO_LAST;
+
+dpo_proto_t
+vnet_link_to_dpo_proto (vnet_link_t linkt)
+{
+ switch (linkt)
+ {
+ case VNET_LINK_IP6:
+ return (DPO_PROTO_IP6);
+ case VNET_LINK_IP4:
+ return (DPO_PROTO_IP4);
+ case VNET_LINK_MPLS:
+ return (DPO_PROTO_MPLS);
+ case VNET_LINK_ETHERNET:
+ return (DPO_PROTO_ETHERNET);
+ case VNET_LINK_NSH:
+ return (DPO_PROTO_NSH);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+vnet_link_t
+dpo_proto_to_link (dpo_proto_t dp)
+{
+ switch (dp)
+ {
+ case DPO_PROTO_IP6:
+ return (VNET_LINK_IP6);
+ case DPO_PROTO_IP4:
+ return (VNET_LINK_IP4);
+ case DPO_PROTO_MPLS:
+ return (VNET_LINK_MPLS);
+ case DPO_PROTO_ETHERNET:
+ return (VNET_LINK_ETHERNET);
+ case DPO_PROTO_NSH:
+ return (VNET_LINK_NSH);
+ }
+ return (~0);
+}
+
+u8 *
+format_dpo_type (u8 * s, va_list * args)
+{
+ dpo_type_t type = va_arg (*args, int);
+
+ s = format(s, "%s", dpo_type_names[type]);
+
+ return (s);
+}
+
+u8 *
+format_dpo_id (u8 * s, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t*);
+ u32 indent = va_arg (*args, u32);
+
+ s = format(s, "[@%d]: ", dpo->dpoi_next_node);
+
+ if (NULL != dpo_vfts[dpo->dpoi_type].dv_format)
+ {
+ return (format(s, "%U",
+ dpo_vfts[dpo->dpoi_type].dv_format,
+ dpo->dpoi_index,
+ indent));
+ }
+
+ switch (dpo->dpoi_type)
+ {
+ case DPO_FIRST:
+ s = format(s, "unset");
+ break;
+ default:
+ s = format(s, "unknown");
+ break;
+ }
+ return (s);
+}
+
+u8 *
+format_dpo_proto (u8 * s, va_list * args)
+{
+ dpo_proto_t proto = va_arg (*args, int);
+
+ return (format(s, "%s", dpo_proto_names[proto]));
+}
+
+void
+dpo_set (dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index)
+{
+ dpo_id_t tmp = *dpo;
+
+ dpo->dpoi_type = type;
+ dpo->dpoi_proto = proto,
+ dpo->dpoi_index = index;
+
+ if (DPO_ADJACENCY == type)
+ {
+ /*
+ * set the adj subtype
+ */
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE;
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN;
+ break;
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ dpo->dpoi_type = DPO_ADJACENCY_MCAST_MIDCHAIN;
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ dpo->dpoi_type = DPO_ADJACENCY_MCAST;
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ dpo->dpoi_type = DPO_ADJACENCY_GLEAN;
+ break;
+ default:
+ break;
+ }
+ }
+ dpo_lock(dpo);
+ dpo_unlock(&tmp);
+}
+
+void
+dpo_reset (dpo_id_t *dpo)
+{
+ dpo_id_t tmp = DPO_INVALID;
+
+ /*
+ * use the atomic copy operation.
+ */
+ dpo_copy(dpo, &tmp);
+}
+
+/**
+ * \brief
+ * Compare two Data-path objects
+ *
+ * like memcmp, return 0 is matching, !0 otherwise.
+ */
+int
+dpo_cmp (const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2)
+{
+ int res;
+
+ res = dpo1->dpoi_type - dpo2->dpoi_type;
+
+ if (0 != res) return (res);
+
+ return (dpo1->dpoi_index - dpo2->dpoi_index);
+}
+
+void
+dpo_copy (dpo_id_t *dst,
+ const dpo_id_t *src)
+{
+ dpo_id_t tmp = *dst;
+
+ /*
+ * the destination is written in a single u64 write - hence atomically w.r.t
+ * any packets inflight.
+ */
+ *((u64*)dst) = *(u64*)src;
+
+ dpo_lock(dst);
+ dpo_unlock(&tmp);
+}
+
+int
+dpo_is_adj (const dpo_id_t *dpo)
+{
+ return ((dpo->dpoi_type == DPO_ADJACENCY) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_GLEAN));
+}
+
+static u32 *
+dpo_default_get_next_node (const dpo_id_t *dpo)
+{
+ u32 *node_indices = NULL;
+ const char *node_name;
+ u32 ii = 0;
+
+ node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii];
+ while (NULL != node_name)
+ {
+ vlib_node_t *node;
+
+ node = vlib_get_node_by_name(vlib_get_main(), (u8*) node_name);
+ ASSERT(NULL != node);
+ vec_add1(node_indices, node->index);
+
+ ++ii;
+ node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii];
+ }
+
+ return (node_indices);
+}
+
+void
+dpo_register (dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ vec_validate(dpo_vfts, type);
+ dpo_vfts[type] = *vft;
+ if (NULL == dpo_vfts[type].dv_get_next_node)
+ {
+ dpo_vfts[type].dv_get_next_node = dpo_default_get_next_node;
+ }
+
+ vec_validate(dpo_nodes, type);
+ dpo_nodes[type] = nodes;
+}
+
+dpo_type_t
+dpo_register_new_type (const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ dpo_type_t type = dpo_dynamic++;
+
+ dpo_register(type, vft, nodes);
+
+ return (type);
+}
+
+void
+dpo_lock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_lock(dpo);
+}
+
+void
+dpo_unlock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_unlock(dpo);
+}
+
+
+static u32
+dpo_get_next_node (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ const dpo_id_t *parent_dpo)
+{
+ dpo_proto_t parent_proto;
+ dpo_type_t parent_type;
+
+ parent_type = parent_dpo->dpoi_type;
+ parent_proto = parent_dpo->dpoi_proto;
+
+ vec_validate(dpo_edges, child_type);
+ vec_validate(dpo_edges[child_type], child_proto);
+ vec_validate(dpo_edges[child_type][child_proto], parent_type);
+ vec_validate_init_empty(
+ dpo_edges[child_type][child_proto][parent_type],
+ parent_proto, ~0);
+
+ /*
+ * if the edge index has not yet been created for this node to node transistion
+ */
+ if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
+ {
+ vlib_node_t *child_node;
+ u32 *parent_indices;
+ vlib_main_t *vm;
+ u32 edge, *pi, cc;
+
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node);
+ ASSERT(NULL != dpo_nodes[child_type]);
+ ASSERT(NULL != dpo_nodes[child_type][child_proto]);
+
+ cc = 0;
+ parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent_dpo);
+
+ vlib_worker_thread_barrier_sync(vm);
+
+ /*
+ * create a graph arc from each of the child's registered node types,
+ * to each of the parent's.
+ */
+ while (NULL != dpo_nodes[child_type][child_proto][cc])
+ {
+ child_node =
+ vlib_get_node_by_name(vm,
+ (u8*) dpo_nodes[child_type][child_proto][cc]);
+
+ vec_foreach(pi, parent_indices)
+ {
+ edge = vlib_node_add_next(vm, child_node->index, *pi);
+
+ if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
+ {
+ dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge;
+ }
+ else
+ {
+ ASSERT(dpo_edges[child_type][child_proto][parent_type][parent_proto] == edge);
+ }
+ }
+ cc++;
+ }
+
+ vlib_worker_thread_barrier_release(vm);
+ vec_free(parent_indices);
+ }
+
+ return (dpo_edges[child_type][child_proto][parent_type][parent_proto]);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+static void
+dpo_stack_i (u32 edge,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ /*
+ * in order to get an atomic update of the parent we create a temporary,
+ * from a copy of the child, and add the next_node. then we copy to the parent
+ */
+ dpo_id_t tmp = DPO_INVALID;
+ dpo_copy(&tmp, parent);
+
+ /*
+ * get the edge index for the parent to child VLIB graph transisition
+ */
+ tmp.dpoi_next_node = edge;
+
+ /*
+ * this update is atomic.
+ */
+ dpo_copy(dpo, &tmp);
+
+ dpo_reset(&tmp);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child-parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+void
+dpo_stack (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. A new VLIB graph arc is created from the child node passed
+ * to the nodes registered by the parent. The VLIB infra will ensure this arc
+ * is added only once.
+ */
+void
+dpo_stack_from_node (u32 child_node_index,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_type_t parent_type;
+ u32 *parent_indices;
+ vlib_main_t *vm;
+ u32 edge, *pi;
+
+ edge = 0;
+ parent_type = parent->dpoi_type;
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node);
+ parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent);
+ ASSERT(parent_indices);
+
+ /*
+ * This loop is purposefully written with the worker thread lock in the
+ * inner loop because;
+ * 1) the likelihood that the edge does not exist is smaller
+ * 2) the likelihood there is more than one node is even smaller
+ * so we are optimising for not need to take the lock
+ */
+ vec_foreach(pi, parent_indices)
+ {
+ edge = vlib_node_get_next(vm, child_node_index, *pi);
+
+ if (~0 == edge)
+ {
+ vlib_worker_thread_barrier_sync(vm);
+
+ edge = vlib_node_add_next(vm, child_node_index, *pi);
+
+ vlib_worker_thread_barrier_release(vm);
+ }
+ }
+ dpo_stack_i(edge, dpo, parent);
+}
+
+static clib_error_t *
+dpo_module_init (vlib_main_t * vm)
+{
+ drop_dpo_module_init();
+ punt_dpo_module_init();
+ receive_dpo_module_init();
+ load_balance_module_init();
+ mpls_label_dpo_module_init();
+ classify_dpo_module_init();
+ lookup_dpo_module_init();
+ ip_null_dpo_module_init();
+ replicate_module_init();
+ interface_rx_dpo_module_init();
+ interface_tx_dpo_module_init();
+ mpls_disp_dpo_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(dpo_module_init);
+
+static clib_error_t *
+dpo_memory_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ dpo_vft_t *vft;
+
+ vlib_cli_output (vm, "DPO memory");
+ vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s totals",
+ "Name","Size", "in-use", "allocated");
+
+ vec_foreach(vft, dpo_vfts)
+ {
+ if (NULL != vft->dv_mem_show)
+ vft->dv_mem_show();
+ }
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+/*?
+ * The '<em>sh dpo memory </em>' command displays the memory usage for each
+ * data-plane object type.
+ *
+ * @cliexpar
+ * @cliexstart{show dpo memory}
+ * DPO memory
+ * Name Size in-use /allocated totals
+ * load-balance 64 12 / 12 768/768
+ * Adjacency 256 1 / 1 256/256
+ * Receive 24 5 / 5 120/120
+ * Lookup 12 0 / 0 0/0
+ * Classify 12 0 / 0 0/0
+ * MPLS label 24 0 / 0 0/0
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_fib_memory, static) = {
+ .path = "show dpo memory",
+ .function = dpo_memory_show,
+ .short_help = "show dpo memory",
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
new file mode 100644
index 00000000..33562968
--- /dev/null
+++ b/src/vnet/dpo/dpo.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP's data-path.
+ *
+ * The DPO can be considered to be like is a base class that is specialised
+ * by other objects to provide concreate actions
+ *
+ * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of
+ * instances.
+ */
+
+#ifndef __DPO_H__
+#define __DPO_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo barindex_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 index_t;
+
+/**
+ * @brief Invalid index - used when no index is known
+ * blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define INDEX_INVALID ((index_t)(~0))
+
+/**
+ * @brief Data path protocol.
+ * Actions performed on packets in the data-plane can be described and represented
+ * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions
+ * required during ADJACENCY processing can be protocol dependent. For example,
+ * the adjacency rewrite node performs a ip4 checksum calculation, ip6 and MPLS
+ * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol
+ * dependent, and thus each graph edge/arc is too.
+ * When programming a DPO's next node arc from child to parent it is thus required
+ * to know the parent's data-path protocol so the correct arc index can be used.
+ */
+typedef enum dpo_proto_t_
+{
+ DPO_PROTO_IP4 = 0,
+ DPO_PROTO_IP6,
+ DPO_PROTO_MPLS,
+ DPO_PROTO_ETHERNET,
+ DPO_PROTO_NSH,
+} __attribute__((packed)) dpo_proto_t;
+
+#define DPO_PROTO_NUM ((dpo_proto_t)(DPO_PROTO_NSH+1))
+#define DPO_PROTO_NONE ((dpo_proto_t)(DPO_PROTO_NUM+1))
+
+#define DPO_PROTOS { \
+ [DPO_PROTO_IP4] = "ip4", \
+ [DPO_PROTO_IP6] = "ip6", \
+ [DPO_PROTO_ETHERNET] = "ethernet", \
+ [DPO_PROTO_MPLS] = "mpls", \
+ [DPO_PROTO_NSH] = "nsh", \
+}
+
+#define FOR_EACH_DPO_PROTO(_proto) \
+ for (_proto = DPO_PROTO_IP4; \
+ _proto <= DPO_PROTO_NSH; \
+ _proto++)
+
+/**
+ * @brief Common types of data-path objects
+ * New types can be dynamically added using dpo_register_new_type()
+ */
+typedef enum dpo_type_t_ {
+ /**
+ * A non-zero value first so we can spot unitialisation errors
+ */
+ DPO_FIRST,
+ DPO_DROP,
+ DPO_IP_NULL,
+ DPO_PUNT,
+ /**
+ * @brief load-balancing over a choice of [un]equal cost paths
+ */
+ DPO_LOAD_BALANCE,
+ DPO_REPLICATE,
+ DPO_ADJACENCY,
+ DPO_ADJACENCY_INCOMPLETE,
+ DPO_ADJACENCY_MIDCHAIN,
+ DPO_ADJACENCY_GLEAN,
+ DPO_ADJACENCY_MCAST,
+ DPO_ADJACENCY_MCAST_MIDCHAIN,
+ DPO_RECEIVE,
+ DPO_LOOKUP,
+ DPO_LISP_CP,
+ DPO_CLASSIFY,
+ DPO_MPLS_LABEL,
+ DPO_MPLS_DISPOSITION,
+ DPO_MFIB_ENTRY,
+ DPO_INTERFACE_RX,
+ DPO_INTERFACE_TX,
+ DPO_LAST,
+} __attribute__((packed)) dpo_type_t;
+
+#define DPO_TYPE_NUM DPO_LAST
+
+#define DPO_TYPES { \
+ [DPO_FIRST] = "dpo-invalid", \
+ [DPO_DROP] = "dpo-drop", \
+ [DPO_IP_NULL] = "dpo-ip-null", \
+ [DPO_PUNT] = "dpo-punt", \
+ [DPO_ADJACENCY] = "dpo-adjacency", \
+ [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \
+ [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \
+ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \
+ [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast", \
+ [DPO_ADJACENCY_MCAST_MIDCHAIN] = "dpo-adj-mcast-midchain", \
+ [DPO_RECEIVE] = "dpo-receive", \
+ [DPO_LOOKUP] = "dpo-lookup", \
+ [DPO_LOAD_BALANCE] = "dpo-load-balance", \
+ [DPO_REPLICATE] = "dpo-replicate", \
+ [DPO_LISP_CP] = "dpo-lisp-cp", \
+ [DPO_CLASSIFY] = "dpo-classify", \
+ [DPO_MPLS_LABEL] = "dpo-mpls-label", \
+ [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
+ [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
+ [DPO_INTERFACE_RX] = "dpo-interface-rx", \
+ [DPO_INTERFACE_TX] = "dpo-interface-tx" \
+}
+
+/**
+ * @brief The identity of a DPO is a combination of its type and its
+ * instance number/index of objects of that type
+ */
+typedef struct dpo_id_t_ {
+ /**
+ * the type
+ */
+ dpo_type_t dpoi_type;
+ /**
+ * the data-path protocol of the type.
+ */
+ dpo_proto_t dpoi_proto;
+ /**
+ * The next VLIB node to follow.
+ */
+ u16 dpoi_next_node;
+ /**
+ * the index of objects of that type
+ */
+ index_t dpoi_index;
+} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t;
+
+STATIC_ASSERT(sizeof(dpo_id_t) <= sizeof(u64),
+ "DPO ID is greater than sizeof u64 "
+ "atomic updates need to be revisited");
+
+/**
+ * @brief An initialiser for DPOs declared on the stack.
+ * Thenext node is set to 0 since VLIB graph nodes should set 0 index to drop.
+ */
+#define DPO_INVALID \
+{ \
+ .dpoi_type = DPO_FIRST, \
+ .dpoi_proto = DPO_PROTO_NONE, \
+ .dpoi_index = INDEX_INVALID, \
+ .dpoi_next_node = 0, \
+}
+
+/**
+ * @brief Return true if the DPO object is valid, i.e. has been initialised.
+ */
+static inline int
+dpo_id_is_valid (const dpo_id_t *dpoi)
+{
+ return (dpoi->dpoi_type != DPO_FIRST &&
+ dpoi->dpoi_index != INDEX_INVALID);
+}
+
+extern dpo_proto_t vnet_link_to_dpo_proto(vnet_link_t linkt);
+
+/**
+ * @brief
+ * Take a reference counting lock on the DPO
+ */
+extern void dpo_lock(dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Release a reference counting lock on the DPO
+ */
+extern void dpo_unlock(dpo_id_t *dpo);
+
+/**
+ * @brief Set/create a DPO ID
+ * The DPO will be locked.
+ *
+ * @param dpo
+ * The DPO object to configure
+ *
+ * @param type
+ * The dpo_type_t of the DPO
+ *
+ * @param proto
+ * The dpo_proto_t of the DPO
+ *
+ * @param index
+ * The type specific index of the DPO
+ */
+extern void dpo_set(dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index);
+
+/**
+ * @brief reset a DPO ID
+ * The DPO will be unlocked.
+ *
+ * @param dpo
+ * The DPO object to reset
+ */
+extern void dpo_reset(dpo_id_t *dpo);
+
+/**
+ * @brief compare two DPOs for equality
+ */
+extern int dpo_cmp(const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2);
+
+/**
+ * @brief
+ * atomic copy a data-plane object.
+ * This is safe to use when the dst DPO is currently switching packets
+ */
+extern void dpo_copy(dpo_id_t *dst,
+ const dpo_id_t *src);
+
+/**
+ * @brief Return TRUE is the DPO is any type of adjacency
+ */
+extern int dpo_is_adj(const dpo_id_t *dpo);
+
+/**
+ * @biref Format a DPO_id_t oject
+ */
+extern u8 *format_dpo_id(u8 * s, va_list * args);
+
+/**
+ * @biref format a DPO type
+ */
+extern u8 *format_dpo_type(u8 * s, va_list * args);
+
+/**
+ * @brief format a DPO protocol
+ */
+extern u8 *format_dpo_proto(u8 * s, va_list * args);
+
+/**
+ * @brief format a DPO protocol
+ */
+extern vnet_link_t dpo_proto_to_link(dpo_proto_t dp);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created. The child_type and child_proto
+ * are used to get the VLID nodes from which the arcs are added.
+ *
+ * @param child_type
+ * Child DPO type.
+ *
+ * @param child_proto
+ * Child DPO proto
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack(dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent_dpo);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created, from the child_node passed.
+ *
+ * @param child_node
+ * The VLIB grpah node index to create an arc from to the parent
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack_from_node(u32 child_node,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent);
+
+/**
+ * @brief A lock function registered for a DPO type
+ */
+typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An unlock function registered for a DPO type
+ */
+typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An memory usage show command
+ */
+typedef void (*dpo_mem_show_t)(void);
+
+/**
+ * @brief Given a DPO instance return a vector of node indices that
+ * the type/instance will use.
+ */
+typedef u32* (*dpo_get_next_node_t)(const dpo_id_t *dpo);
+
+/**
+ * @brief A virtual function table regisitered for a DPO type
+ */
+typedef struct dpo_vft_t_
+{
+ /**
+ * A reference counting lock function
+ */
+ dpo_lock_fn_t dv_lock;
+ /**
+ * A reference counting unlock function
+ */
+ dpo_lock_fn_t dv_unlock;
+ /**
+ * A format function
+ */
+ format_function_t *dv_format;
+ /**
+ * A show memory usage function
+ */
+ dpo_mem_show_t dv_mem_show;
+ /**
+ * A function to get the next VLIB node given an instance
+ * of the DPO. If this is null, then the node's name MUST be
+ * retreiveable from the nodes names array passed in the register
+ * function
+ */
+ dpo_get_next_node_t dv_get_next_node;
+} dpo_vft_t;
+
+
+/**
+ * @brief For a given DPO type Register:
+ * - a virtual function table
+ * - a NULL terminated array of graph nodes from which that object type
+ * will originate packets, i.e. the nodes in which the object type will be
+ * the parent DPO in the DP graph. The ndoes are per-data-path protocol
+ * (see above).
+ *
+ * @param type
+ * The type being registered.
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ */
+extern void dpo_register(dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+/**
+ * @brief Create and register a new DPO type.
+ *
+ * This can be used by plugins to create new DPO types that are not listed
+ * in dpo_type_t enum
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ *
+ * @return The new dpo_type_t
+ */
+extern dpo_type_t dpo_register_new_type(const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+#endif
diff --git a/src/vnet/dpo/drop_dpo.c b/src/vnet/dpo/drop_dpo.c
new file mode 100644
index 00000000..a1821ddd
--- /dev/null
+++ b/src/vnet/dpo/drop_dpo.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t drop_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+drop_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&drop_dpos[proto], DPO_DROP, proto, proto);
+
+ return (&drop_dpos[proto]);
+}
+
+int
+dpo_is_drop (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_DROP);
+}
+
+static void
+drop_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the drop
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+drop_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_drop_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+
+ return (format(s, "dpo-drop %U", format_dpo_proto, index));
+}
+
+const static dpo_vft_t drop_vft = {
+ .dv_lock = drop_dpo_lock,
+ .dv_unlock = drop_dpo_unlock,
+ .dv_format = format_drop_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a drop
+ * object.
+ *
+ * this means that these graph nodes are ones from which a drop is the
+ * parent object in the DPO-graph.
+ */
+const static char* const drop_ip4_nodes[] =
+{
+ "ip4-drop",
+ NULL,
+};
+const static char* const drop_ip6_nodes[] =
+{
+ "ip6-drop",
+ NULL,
+};
+const static char* const drop_mpls_nodes[] =
+{
+ "mpls-drop",
+ NULL,
+};
+const static char* const drop_ethernet_nodes[] =
+{
+ "error-drop",
+ NULL,
+};
+const static char* const drop_nsh_nodes[] =
+{
+ "error-drop",
+ NULL,
+};
+const static char* const * const drop_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = drop_ip4_nodes,
+ [DPO_PROTO_IP6] = drop_ip6_nodes,
+ [DPO_PROTO_MPLS] = drop_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = drop_ethernet_nodes,
+ [DPO_PROTO_NSH] = drop_nsh_nodes,
+};
+
+void
+drop_dpo_module_init (void)
+{
+ dpo_register(DPO_DROP, &drop_vft, drop_nodes);
+}
diff --git a/src/vnet/dpo/drop_dpo.h b/src/vnet/dpo/drop_dpo.h
new file mode 100644
index 00000000..436df36c
--- /dev/null
+++ b/src/vnet/dpo/drop_dpo.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The Drop DPO will drop all packets, no questions asked. It is valid
+ * for any packet protocol.
+ */
+
+#ifndef __DROP_DPO_H__
+#define __DROP_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_drop(const dpo_id_t *dpo);
+
+extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto);
+
+extern void drop_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
new file mode 100644
index 00000000..a624f514
--- /dev/null
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/fib/fib_node.h>
+
+/*
+ * The 'DB' of interface DPOs.
+ * There is only one per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static index_t *interface_rx_dpo_db[DPO_PROTO_NUM];
+
+static interface_rx_dpo_t *
+interface_rx_dpo_alloc (void)
+{
+ interface_rx_dpo_t *ido;
+
+ pool_get(interface_rx_dpo_pool, ido);
+
+ return (ido);
+}
+
+static inline interface_rx_dpo_t *
+interface_rx_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+ ASSERT(DPO_INTERFACE_RX == dpo->dpoi_type);
+
+ return (interface_rx_dpo_get(dpo->dpoi_index));
+}
+
+static inline index_t
+interface_rx_dpo_get_index (interface_rx_dpo_t *ido)
+{
+ return (ido - interface_rx_dpo_pool);
+}
+
+static void
+interface_rx_dpo_lock (dpo_id_t *dpo)
+{
+ interface_rx_dpo_t *ido;
+
+ ido = interface_rx_dpo_get_from_dpo(dpo);
+ ido->ido_locks++;
+}
+
+static void
+interface_rx_dpo_unlock (dpo_id_t *dpo)
+{
+ interface_rx_dpo_t *ido;
+
+ ido = interface_rx_dpo_get_from_dpo(dpo);
+ ido->ido_locks--;
+
+ if (0 == ido->ido_locks)
+ {
+ interface_rx_dpo_db[ido->ido_proto][ido->ido_sw_if_index] =
+ INDEX_INVALID;
+ pool_put(interface_rx_dpo_pool, ido);
+ }
+}
+
+/*
+ * interface_rx_dpo_add_or_lock
+ *
+ * Add/create and lock a new or lock an existing for the interface DPO
+ * on the interface and protocol given
+ */
+void
+interface_rx_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ dpo_id_t *dpo)
+{
+ interface_rx_dpo_t *ido;
+
+ vec_validate_init_empty(interface_rx_dpo_db[proto],
+ sw_if_index,
+ INDEX_INVALID);
+
+ if (INDEX_INVALID == interface_rx_dpo_db[proto][sw_if_index])
+ {
+ ido = interface_rx_dpo_alloc();
+
+ ido->ido_sw_if_index = sw_if_index;
+ ido->ido_proto = proto;
+
+ interface_rx_dpo_db[proto][sw_if_index] =
+ interface_rx_dpo_get_index(ido);
+ }
+ else
+ {
+ ido = interface_rx_dpo_get(interface_rx_dpo_db[proto][sw_if_index]);
+ }
+
+ dpo_set(dpo, DPO_INTERFACE_RX, proto, interface_rx_dpo_get_index(ido));
+}
+
+
+static clib_error_t *
+interface_rx_dpo_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ */
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
+ interface_rx_dpo_interface_state_change);
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+interface_rx_dpo_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ interface_rx_dpo_hw_interface_state_change);
+
+static clib_error_t *
+interface_rx_dpo_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
+ interface_rx_dpo_interface_delete);
+
+u8*
+format_interface_rx_dpo (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ interface_rx_dpo_t *ido = interface_rx_dpo_get(index);
+
+ return (format(s, "%U-dpo: %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
+ format_dpo_proto, ido->ido_proto));
+}
+
+static void
+interface_rx_dpo_mem_show (void)
+{
+ fib_show_memory_usage("Interface",
+ pool_elts(interface_rx_dpo_pool),
+ pool_len(interface_rx_dpo_pool),
+ sizeof(interface_rx_dpo_t));
+}
+
+
+const static dpo_vft_t interface_rx_dpo_vft = {
+ .dv_lock = interface_rx_dpo_lock,
+ .dv_unlock = interface_rx_dpo_unlock,
+ .dv_format = format_interface_rx_dpo,
+ .dv_mem_show = interface_rx_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ * object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const interface_rx_dpo_ip4_nodes[] =
+{
+ "interface-rx-dpo-ip4",
+ NULL,
+};
+const static char* const interface_rx_dpo_ip6_nodes[] =
+{
+ "interface-rx-dpo-ip6",
+ NULL,
+};
+const static char* const interface_rx_dpo_l2_nodes[] =
+{
+ "interface-rx-dpo-l2",
+ NULL,
+};
+
+const static char* const * const interface_rx_dpo_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = interface_rx_dpo_ip4_nodes,
+ [DPO_PROTO_IP6] = interface_rx_dpo_ip6_nodes,
+ [DPO_PROTO_ETHERNET] = interface_rx_dpo_l2_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+interface_rx_dpo_module_init (void)
+{
+ dpo_register(DPO_INTERFACE_RX,
+ &interface_rx_dpo_vft,
+ interface_rx_dpo_nodes);
+}
+
+/**
+ * @brief Interface DPO trace data
+ */
+typedef struct interface_rx_dpo_trace_t_
+{
+ u32 sw_if_index;
+} interface_rx_dpo_trace_t;
+
+typedef enum interface_rx_dpo_next_t_
+{
+ INTERFACE_RX_DPO_DROP = 0,
+ INTERFACE_RX_DPO_INPUT = 1,
+} interface_rx_dpo_next_t;
+
+always_inline uword
+interface_rx_dpo_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index ();
+ vnet_interface_main_t *im;
+
+ im = &vnet_get_main ()->interface_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next > 2)
+ {
+ const interface_rx_dpo_t *ido0, *ido1;
+ u32 bi0, idoi0, bi1, idoi1;
+ vlib_buffer_t *b0, *b1;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ ido0 = interface_rx_dpo_get(idoi0);
+ ido1 = interface_rx_dpo_get(idoi1);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index;
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ ido0->ido_sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ ido1->ido_sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b1));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ interface_rx_dpo_trace_t *tr0;
+
+ tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
+ tr0->sw_if_index = ido0->ido_sw_if_index;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ interface_rx_dpo_trace_t *tr1;
+
+ tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
+ tr1->sw_if_index = ido1->ido_sw_if_index;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1,
+ INTERFACE_RX_DPO_INPUT,
+ INTERFACE_RX_DPO_INPUT);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const interface_rx_dpo_t * ido0;
+ vlib_buffer_t * b0;
+ u32 bi0, idoi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ ido0 = interface_rx_dpo_get(idoi0);
+
+ /* Swap the RX interface of the packet to the one the
+ * interface DPR represents */
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+
+ /* Bump the interface's RX coutners */
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ ido0->ido_sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ interface_rx_dpo_trace_t *tr;
+
+ tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->sw_if_index = ido0->ido_sw_if_index;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0,
+ INTERFACE_RX_DPO_INPUT);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_interface_rx_dpo_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ interface_rx_dpo_trace_t * t = va_arg (*args, interface_rx_dpo_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U sw_if_index:%d",
+ format_white_space, indent,
+ t->sw_if_index);
+ return s;
+}
+
+static uword
+interface_rx_dpo_ip4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+static uword
+interface_rx_dpo_ip6 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+static uword
+interface_rx_dpo_l2 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+VLIB_REGISTER_NODE (interface_rx_dpo_ip4_node) = {
+ .function = interface_rx_dpo_ip4,
+ .name = "interface-rx-dpo-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_interface_rx_dpo_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [INTERFACE_RX_DPO_DROP] = "ip4-drop",
+ [INTERFACE_RX_DPO_INPUT] = "ip4-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_ip4_node,
+ interface_rx_dpo_ip4)
+
+VLIB_REGISTER_NODE (interface_rx_dpo_ip6_node) = {
+ .function = interface_rx_dpo_ip6,
+ .name = "interface-rx-dpo-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_interface_rx_dpo_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [INTERFACE_RX_DPO_DROP] = "ip6-drop",
+ [INTERFACE_RX_DPO_INPUT] = "ip6-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_ip6_node,
+ interface_rx_dpo_ip6)
+
+VLIB_REGISTER_NODE (interface_rx_dpo_l2_node) = {
+ .function = interface_rx_dpo_l2,
+ .name = "interface-rx-dpo-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_interface_rx_dpo_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [INTERFACE_RX_DPO_DROP] = "error-drop",
+ [INTERFACE_RX_DPO_INPUT] = "l2-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_l2_node,
+ interface_rx_dpo_l2)
diff --git a/src/vnet/dpo/interface_rx_dpo.h b/src/vnet/dpo/interface_rx_dpo.h
new file mode 100644
index 00000000..edecce08
--- /dev/null
+++ b/src/vnet/dpo/interface_rx_dpo.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERFACE_RX_DPO_H__
+#define __INTERFACE_RX_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * The data-path object representing a change of receive interface.
+ * If a packet encounters an object of this type in the data-path, it's
+ * RX interface is changed.
+ */
+typedef struct interface_rx_dpo_t_
+{
+ /**
+ * The Software interface index that the packets will be given
+ * as the ingress/rx interface
+ */
+ u32 ido_sw_if_index;
+
+ /**
+ * next VLIB node. A '<proto>-input' node.
+ */
+ u32 ido_next_node;
+
+ /**
+ * DPO protocol that the packets will have as they 'ingress'
+ * on this interface
+ */
+ dpo_proto_t ido_proto;
+
+ /**
+ * number of locks.
+ */
+ u16 ido_locks;
+} interface_rx_dpo_t;
+
+extern void interface_rx_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ dpo_id_t *dpo);
+
+extern void interface_rx_dpo_module_init(void);
+
+/**
+ * @brief pool of all interface DPOs
+ */
+interface_rx_dpo_t *interface_rx_dpo_pool;
+
+static inline interface_rx_dpo_t *
+interface_rx_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(interface_rx_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c
new file mode 100644
index 00000000..f7c8bfda
--- /dev/null
+++ b/src/vnet/dpo/interface_tx_dpo.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/interface_tx_dpo.h>
+#include <vnet/adj/rewrite.h>
+
+/*
+ * We do not lock nor unlock these DPOs since there is nothing to lock
+ * all we do is construct DPO object wrappers around a sw_if_index
+ */
+static void
+interface_tx_dpo_lock (dpo_id_t *dpo)
+{
+}
+
+static void
+interface_tx_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+/*
+ * interface_tx_dpo_add_or_lock
+ *
+ * construct DPO object wrappers around a sw_if_index
+ */
+void
+interface_tx_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ dpo_id_t *dpo)
+{
+ dpo_set(dpo, DPO_INTERFACE_TX, proto, sw_if_index);
+}
+
+u8*
+format_interface_tx_dpo (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+
+ return (format(s, "%U-dpo:",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm, index)));
+}
+
+static void
+interface_tx_dpo_mem_show (void)
+{
+}
+
+u32*
+interface_tx_dpo_get_next_node (const dpo_id_t *dpo)
+{
+ u32 *node_indices = NULL;
+
+ /*
+ * return the interface's TX node for the wrapped sw_if_index
+ */
+ vec_add1(node_indices,
+ vnet_tx_node_index_for_sw_interface(vnet_get_main(),
+ dpo->dpoi_index));
+
+ return (node_indices);
+}
+
+const static dpo_vft_t interface_tx_dpo_vft = {
+ .dv_lock = interface_tx_dpo_lock,
+ .dv_unlock = interface_tx_dpo_unlock,
+ .dv_format = format_interface_tx_dpo,
+ .dv_mem_show = interface_tx_dpo_mem_show,
+ .dv_get_next_node = interface_tx_dpo_get_next_node,
+};
+
+void
+interface_tx_dpo_module_init (void)
+{
+ dpo_register(DPO_INTERFACE_TX, &interface_tx_dpo_vft, NULL);
+}
+
diff --git a/src/vnet/dpo/interface_tx_dpo.h b/src/vnet/dpo/interface_tx_dpo.h
new file mode 100644
index 00000000..0c560ada
--- /dev/null
+++ b/src/vnet/dpo/interface_tx_dpo.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing transmitting the packet on a n interface.
+ * This is a convenient DPO wrapper around a simple interface transmit and thus
+ * allows us to represent direct interface transmit in the DPO model.
+ */
+
+#ifndef __INTERFACE_TX_DPO_H__
+#define __INTERFACE_TX_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern void interface_tx_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ dpo_id_t *dpo);
+
+extern void interface_tx_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/ip_null_dpo.c b/src/vnet/dpo/ip_null_dpo.c
new file mode 100644
index 00000000..22682e4e
--- /dev/null
+++ b/src/vnet/dpo/ip_null_dpo.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * @brief A representation of the IP_NULL DPO
+ */
+typedef struct ip_null_dpo_t_
+{
+ /**
+ * @brief The action to take on a packet
+ */
+ ip_null_dpo_action_t ind_action;
+ /**
+ * @brief The next VLIB node
+ */
+ u32 ind_next_index;
+ /**
+ * rate limits
+ */
+} ip_null_dpo_t;
+
+/**
+ * @brief the IP_NULL dpos are shared by all routes, hence they are global.
+ * As the neame implies this is only for IP, hence 2.
+ */
+static ip_null_dpo_t ip_null_dpos[2 * IP_NULL_DPO_ACTION_NUM] = {
+ [0] = {
+ /* proto ip4, no action */
+ .ind_action = IP_NULL_ACTION_NONE,
+ },
+ [1] = {
+ /* proto ip4, action send unreach */
+ .ind_action = IP_NULL_ACTION_SEND_ICMP_UNREACH,
+ },
+ [2] = {
+ /* proto ip4, action send unreach */
+ .ind_action = IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+ },
+ [3] = {
+ /* proto ip6, no action */
+ .ind_action = IP_NULL_ACTION_NONE,
+ },
+ [4] = {
+ /* proto ip6, action send unreach */
+ .ind_action = IP_NULL_ACTION_SEND_ICMP_UNREACH,
+ },
+ [5] = {
+ /* proto ip6, action send unreach */
+ .ind_action = IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+ },
+};
+
+/**
+ * @brief Action strings
+ */
+const char *ip_null_action_strings[] = IP_NULL_ACTIONS;
+
+void
+ip_null_dpo_add_and_lock (dpo_proto_t proto,
+ ip_null_dpo_action_t action,
+ dpo_id_t *dpo)
+{
+ int i;
+
+ ASSERT((proto == DPO_PROTO_IP4) ||
+ (proto == DPO_PROTO_IP6));
+ ASSERT(action < IP_NULL_DPO_ACTION_NUM);
+
+ i = (proto == DPO_PROTO_IP4 ? 0 : 1);
+
+ dpo_set(dpo, DPO_IP_NULL, proto, (i*IP_NULL_DPO_ACTION_NUM) + action);
+}
+
+always_inline const ip_null_dpo_t*
+ip_null_dpo_get (index_t indi)
+{
+ return (&ip_null_dpos[indi]);
+}
+
+static void
+ip_null_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the ip_null, they are const global and
+ * never die.
+ */
+}
+static void
+ip_null_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_ip_null_dpo (u8 *s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ const ip_null_dpo_t *ind;
+ dpo_proto_t proto;
+
+ ind = ip_null_dpo_get(index);
+ proto = (index < IP_NULL_DPO_ACTION_NUM ? DPO_PROTO_IP4 : DPO_PROTO_IP6);
+
+ return (format(s, "%U-null action:%s",
+ format_dpo_proto, proto,
+ ip_null_action_strings[ind->ind_action]));
+}
+
+const static dpo_vft_t ip_null_vft = {
+ .dv_lock = ip_null_dpo_lock,
+ .dv_unlock = ip_null_dpo_unlock,
+ .dv_format = format_ip_null_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a ip_null
+ * object.
+ *
+ * this means that these graph nodes are ones from which a ip_null is the
+ * parent object in the DPO-graph.
+ */
+const static char* const ip4_null_nodes[] =
+{
+ "ip4-null",
+ NULL,
+};
+const static char* const ip6_null_nodes[] =
+{
+ "ip6-null",
+ NULL,
+};
+
+const static char* const * const ip_null_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = ip4_null_nodes,
+ [DPO_PROTO_IP6] = ip6_null_nodes,
+};
+
+typedef struct ip_null_dpo_trace_t_
+{
+ index_t ind_index;
+} ip_null_dpo_trace_t;
+
+/**
+ * @brief Exit nodes from a IP_NULL
+ */
+typedef enum ip_null_next_t_
+{
+ IP_NULL_NEXT_DROP,
+ IP_NULL_NEXT_ICMP,
+ IP_NULL_NEXT_NUM,
+} ip_null_next_t;
+
+always_inline uword
+ip_null_dpo_switch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u8 is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-1)
+ {
+ uword i;
+ u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been not-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 a0, b0, c0, m0, drop0;
+ vlib_buffer_t *p0;
+ u32 bi0, indi0, next0;
+ const ip_null_dpo_t *ind0;
+ uword bm0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ /* lookup dst + src mac */
+ indi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ ind0 = ip_null_dpo_get(indi0);
+ next0 = IP_NULL_NEXT_DROP;
+
+ /*
+ * rate limit - don't DoS the sender.
+ */
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+
+ a0 ^= ip0->dst_address.data_u32;
+ b0 ^= ip0->src_address.data_u32;
+
+ hash_v3_finalize32 (a0, b0, c0);
+ }
+ else
+ {
+ ip6_header_t *ip0 = vlib_buffer_get_current (p0);
+
+ a0 ^= ip0->dst_address.as_u32[0];
+ b0 ^= ip0->src_address.as_u32[0];
+ c0 ^= ip0->src_address.as_u32[1];
+
+ hash_v3_mix32 (a0, b0, c0);
+
+ a0 ^= ip0->dst_address.as_u32[1];
+ b0 ^= ip0->src_address.as_u32[2];
+ c0 ^= ip0->src_address.as_u32[3];
+
+ hash_v3_finalize32 (a0, b0, c0);
+ }
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ if (PREDICT_FALSE(!drop0))
+ {
+ if (is_ip4)
+ {
+ /*
+ * There's a trade-off here. This conditinal statement
+ * versus a graph node per-condition. Given the number
+ * expect number of packets to reach a null route is 0
+ * we favour the run-time cost over the graph complexity
+ */
+ if (IP_NULL_ACTION_SEND_ICMP_UNREACH == ind0->ind_action)
+ {
+ next0 = IP_NULL_NEXT_ICMP;
+ icmp4_error_set_vnet_buffer(
+ p0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_destination_unreachable_host,
+ 0);
+ }
+ else if (IP_NULL_ACTION_SEND_ICMP_PROHIBIT == ind0->ind_action)
+ {
+ next0 = IP_NULL_NEXT_ICMP;
+ icmp4_error_set_vnet_buffer(
+ p0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_host_administratively_prohibited,
+ 0);
+ }
+ }
+ else
+ {
+ if (IP_NULL_ACTION_SEND_ICMP_UNREACH == ind0->ind_action)
+ {
+ next0 = IP_NULL_NEXT_ICMP;
+ icmp6_error_set_vnet_buffer(
+ p0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_no_route_to_destination,
+ 0);
+ }
+ else if (IP_NULL_ACTION_SEND_ICMP_PROHIBIT == ind0->ind_action)
+ {
+ next0 = IP_NULL_NEXT_ICMP;
+ icmp6_error_set_vnet_buffer(
+ p0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_destination_administratively_prohibited,
+ 0);
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_null_dpo_trace_t *tr = vlib_add_trace (vm, node, p0,
+ sizeof (*tr));
+ tr->ind_index = indi0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_ip_null_dpo_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_null_dpo_trace_t *t = va_arg (*args, ip_null_dpo_trace_t *);
+
+ s = format (s, "%U", format_ip_null_dpo, t->ind_index, 0);
+ return s;
+}
+
+static uword
+ip4_null_dpo_switch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip_null_dpo_switch(vm, node, frame, 1));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip4_null_dpo_node) = {
+ .function = ip4_null_dpo_switch,
+ .name = "ip4-null",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip_null_dpo_trace,
+ .n_next_nodes = IP_NULL_NEXT_NUM,
+ .next_nodes = {
+ [IP_NULL_NEXT_DROP] = "ip4-drop",
+ [IP_NULL_NEXT_ICMP] = "ip4-icmp-error",
+ },
+};
+
+static uword
+ip6_null_dpo_switch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip_null_dpo_switch(vm, node, frame, 0));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip6_null_dpo_node) = {
+ .function = ip6_null_dpo_switch,
+ .name = "ip6-null",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip_null_dpo_trace,
+ .n_next_nodes = IP_NULL_NEXT_NUM,
+ .next_nodes = {
+ [IP_NULL_NEXT_DROP] = "ip6-drop",
+ [IP_NULL_NEXT_ICMP] = "ip6-icmp-error",
+ },
+};
+
+void
+ip_null_dpo_module_init (void)
+{
+ dpo_register(DPO_IP_NULL, &ip_null_vft, ip_null_nodes);
+}
diff --git a/src/vnet/dpo/ip_null_dpo.h b/src/vnet/dpo/ip_null_dpo.h
new file mode 100644
index 00000000..002a2a70
--- /dev/null
+++ b/src/vnet/dpo/ip_null_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The IP NULL DPO represents the rubbish bin for IP traffic. Without specifying an
+ * action (i.e. send IMCP type X to sender) it is equivalent to using a drop DPO.
+ * However, in contrast to the drop DPO any route that resovles via a NULL, is
+ * considered to 'resolved' by FIB, i.e. a IP NULL is used when the control plane
+ * is explicitly expressing the desire to drop packets. Drop DPOs are used
+ * internally by FIB when resolution is not possible.
+ *
+ * Any replies to sender are rate limited.
+ */
+
+#ifndef __IP_NULL_DPO_H__
+#define __IP_NULL_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief Actions to take when a packet encounters the NULL DPO
+ */
+typedef enum ip_null_dpo_action_t_
+{
+ IP_NULL_ACTION_NONE,
+ IP_NULL_ACTION_SEND_ICMP_UNREACH,
+ IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+} ip_null_dpo_action_t;
+
+#define IP_NULL_ACTIONS { \
+ [IP_NULL_ACTION_NONE] = "discard", \
+ [IP_NULL_ACTION_SEND_ICMP_UNREACH] = "send-unreachable", \
+ [IP_NULL_ACTION_SEND_ICMP_PROHIBIT] = "send-prohibited", \
+}
+
+#define IP_NULL_DPO_ACTION_NUM (IP_NULL_ACTION_SEND_ICMP_PROHIBIT+1)
+
+extern void ip_null_dpo_add_and_lock (dpo_proto_t proto,
+ ip_null_dpo_action_t action,
+ dpo_id_t *dpo);
+
+extern void ip_null_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
new file mode 100644
index 00000000..af054f1c
--- /dev/null
+++ b/src/vnet/dpo/load_balance.c
@@ -0,0 +1,1115 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vppinfra/math.h> /* for fabs */
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_urpf_list.h>
+
+/*
+ * distribution error tolerance for load-balancing
+ */
+const f64 multipath_next_hop_error_tolerance = 0.1;
+
+#undef LB_DEBUG
+
+#ifdef LB_DEBUG
+#define LB_DBG(_lb, _fmt, _args...) \
+{ \
+ u8* _tmp =NULL; \
+ clib_warning("lb:[%s]:" _fmt, \
+ load_balance_format(load_balance_get_index((_lb)), \
+ 0, _tmp), \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define LB_DBG(_p, _fmt, _args...)
+#endif
+
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+load_balance_t *load_balance_pool;
+
+/**
+ * The one instance of load-balance main
+ */
+load_balance_main_t load_balance_main;
+
+f64
+load_balance_get_multipath_tolerance (void)
+{
+ return (multipath_next_hop_error_tolerance);
+}
+
+static inline index_t
+load_balance_get_index (const load_balance_t *lb)
+{
+ return (lb - load_balance_pool);
+}
+
+static inline dpo_id_t*
+load_balance_get_buckets (load_balance_t *lb)
+{
+ if (LB_HAS_INLINE_BUCKETS(lb))
+ {
+ return (lb->lb_buckets_inline);
+ }
+ else
+ {
+ return (lb->lb_buckets);
+ }
+}
+
+static load_balance_t *
+load_balance_alloc_i (void)
+{
+ load_balance_t *lb;
+
+ pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
+ memset(lb, 0, sizeof(*lb));
+
+ lb->lb_map = INDEX_INVALID;
+ lb->lb_urpf = INDEX_INVALID;
+ vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+
+ return (lb);
+}
+
+static u8*
+load_balance_format (index_t lbi,
+ load_balance_format_flags_t flags,
+ u32 indent,
+ u8 *s)
+{
+ vlib_counter_t to, via;
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+ u32 i;
+
+ lb = load_balance_get(lbi);
+ vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
+ vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
+ buckets = load_balance_get_buckets(lb);
+
+ s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
+ s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
+ s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+ s = format(s, "uRPF:%d ", lb->lb_urpf);
+ s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
+ if (0 != via.packets)
+ {
+ s = format(s, " via:[%Ld:%Ld]",
+ via.packets, via.bytes);
+ }
+ s = format(s, "]");
+
+ if (INDEX_INVALID != lb->lb_map)
+ {
+ s = format(s, "\n%U%U",
+ format_white_space, indent+4,
+ format_load_balance_map, lb->lb_map, indent+4);
+ }
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ s = format(s, "\n%U[%d] %U",
+ format_white_space, indent+2,
+ i,
+ format_dpo_id,
+ &buckets[i], indent+6);
+ }
+ return (s);
+}
+
+u8*
+format_load_balance (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(*args, index_t);
+ load_balance_format_flags_t flags = va_arg(*args, load_balance_format_flags_t);
+
+ return (load_balance_format(lbi, flags, 0, s));
+}
+static u8*
+format_load_balance_dpo (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(*args, index_t);
+ u32 indent = va_arg(*args, u32);
+
+ return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
+}
+
+
+static load_balance_t *
+load_balance_create_i (u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_alloc_i();
+ lb->lb_hash_config = fhc;
+ lb->lb_n_buckets = num_buckets;
+ lb->lb_n_buckets_minus_1 = num_buckets-1;
+ lb->lb_proto = lb_proto;
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+ }
+
+ LB_DBG(lb, "create");
+
+ return (lb);
+}
+
+index_t
+load_balance_create (u32 n_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
+}
+
+static inline void
+load_balance_set_bucket_i (load_balance_t *lb,
+ u32 bucket,
+ dpo_id_t *buckets,
+ const dpo_id_t *next)
+{
+ dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
+}
+
+void
+load_balance_set_bucket (index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next)
+{
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+
+ lb = load_balance_get(lbi);
+ buckets = load_balance_get_buckets(lb);
+
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ load_balance_set_bucket_i(lb, bucket, buckets, next);
+}
+
+int
+load_balance_is_drop (const dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ if (DPO_LOAD_BALANCE != dpo->dpoi_type)
+ return (0);
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ if (1 == lb->lb_n_buckets)
+ {
+ return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
+ }
+ return (0);
+}
+
+void
+load_balance_set_fib_entry_flags (index_t lbi,
+ fib_entry_flag_t flags)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(lbi);
+ lb->lb_fib_entry_flags = flags;
+}
+
+
+void
+load_balance_set_urpf (index_t lbi,
+ index_t urpf)
+{
+ load_balance_t *lb;
+ index_t old;
+
+ lb = load_balance_get(lbi);
+
+ /*
+ * packets in flight we see this change. but it's atomic, so :P
+ */
+ old = lb->lb_urpf;
+ lb->lb_urpf = urpf;
+
+ fib_urpf_list_unlock(old);
+ fib_urpf_list_lock(urpf);
+}
+
+index_t
+load_balance_get_urpf (index_t lbi)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(lbi);
+
+ return (lb->lb_urpf);
+}
+
+const dpo_id_t *
+load_balance_get_bucket (index_t lbi,
+ u32 bucket)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(lbi);
+
+ return (load_balance_get_bucket_i(lb, bucket));
+}
+
+static int
+next_hop_sort_by_weight (const load_balance_path_t * n1,
+ const load_balance_path_t * n2)
+{
+ return ((int) n1->path_weight - (int) n2->path_weight);
+}
+
+/* Given next hop vector is over-written with normalized one with sorted weights and
+ with weights corresponding to the number of adjacencies for each next hop.
+ Returns number of adjacencies in block. */
+u32
+ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
+ load_balance_path_t ** normalized_next_hops,
+ u32 *sum_weight_in,
+ f64 multipath_next_hop_error_tolerance)
+{
+ load_balance_path_t * nhs;
+ uword n_nhs, n_adj, n_adj_left, i, sum_weight;
+ f64 norm, error;
+
+ n_nhs = vec_len (raw_next_hops);
+ ASSERT (n_nhs > 0);
+ if (n_nhs == 0)
+ return 0;
+
+ /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
+ nhs = *normalized_next_hops;
+ vec_validate (nhs, 2*n_nhs - 1);
+
+ /* Fast path: 1 next hop in block. */
+ n_adj = n_nhs;
+ if (n_nhs == 1)
+ {
+ nhs[0] = raw_next_hops[0];
+ nhs[0].path_weight = 1;
+ _vec_len (nhs) = 1;
+ sum_weight = 1;
+ goto done;
+ }
+
+ else if (n_nhs == 2)
+ {
+ int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
+
+ /* Fast sort. */
+ nhs[0] = raw_next_hops[cmp];
+ nhs[1] = raw_next_hops[cmp ^ 1];
+
+ /* Fast path: equal cost multipath with 2 next hops. */
+ if (nhs[0].path_weight == nhs[1].path_weight)
+ {
+ nhs[0].path_weight = nhs[1].path_weight = 1;
+ _vec_len (nhs) = 2;
+ sum_weight = 2;
+ goto done;
+ }
+ }
+ else
+ {
+ clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+ qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
+ }
+
+ /* Find total weight to normalize weights. */
+ sum_weight = 0;
+ for (i = 0; i < n_nhs; i++)
+ sum_weight += nhs[i].path_weight;
+
+ /* In the unlikely case that all weights are given as 0, set them all to 1. */
+ if (sum_weight == 0)
+ {
+ for (i = 0; i < n_nhs; i++)
+ nhs[i].path_weight = 1;
+ sum_weight = n_nhs;
+ }
+
+ /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+ for (i = 0; i < n_nhs; i++)
+ nhs[n_nhs + i].path_weight = nhs[i].path_weight;
+
+ /* Try larger and larger power of 2 sized adjacency blocks until we
+ find one where traffic flows to within 1% of specified weights. */
+ for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ {
+ error = 0;
+
+ norm = n_adj / ((f64) sum_weight);
+ n_adj_left = n_adj;
+ for (i = 0; i < n_nhs; i++)
+ {
+ f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
+ word n = flt_round_nearest (nf);
+
+ n = n > n_adj_left ? n_adj_left : n;
+ n_adj_left -= n;
+ error += fabs (nf - n);
+ nhs[i].path_weight = n;
+
+ if (0 == nhs[i].path_weight)
+ {
+ /*
+ * when the weight skew is high (norm is small) and n == nf.
+ * without this correction the path with a low weight would have
+ * no represenation in the load-balanace - don't want that.
+ * If the weight skew is high so the load-balance has many buckets
+ * to allow it. pays ya money takes ya choice.
+ */
+ error = n_adj;
+ break;
+ }
+ }
+
+ nhs[0].path_weight += n_adj_left;
+
+ /* Less than 5% average error per adjacency with this size adjacency block? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj)
+ {
+ /* Truncate any next hops with zero weight. */
+ _vec_len (nhs) = i;
+ break;
+ }
+ }
+
+done:
+ /* Save vector for next call. */
+ *normalized_next_hops = nhs;
+ *sum_weight_in = sum_weight;
+ return n_adj;
+}
+
+static load_balance_path_t *
+load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
+ dpo_proto_t drop_proto)
+{
+ if (0 == vec_len(nhs))
+ {
+ load_balance_path_t *new_nhs = NULL, *nh;
+
+ /*
+ * we need something for the load-balance. so use the drop
+ */
+ vec_add2(new_nhs, nh, 1);
+
+ nh->path_weight = 1;
+ dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+
+ return (new_nhs);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+ load_balance_path_t *nhs,
+ dpo_id_t *buckets,
+ u32 n_buckets)
+{
+ load_balance_path_t * nh;
+ u16 ii, bucket;
+
+ bucket = 0;
+
+ /*
+ * the next-hops have normalised weights. that means their sum is the number
+ * of buckets we need to fill.
+ */
+ vec_foreach (nh, nhs)
+ {
+ for (ii = 0; ii < nh->path_weight; ii++)
+ {
+ ASSERT(bucket < n_buckets);
+ load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+ }
+ }
+}
+
+static inline void
+load_balance_set_n_buckets (load_balance_t *lb,
+ u32 n_buckets)
+{
+ lb->lb_n_buckets = n_buckets;
+ lb->lb_n_buckets_minus_1 = n_buckets-1;
+}
+
+void
+load_balance_multipath_update (const dpo_id_t *dpo,
+ const load_balance_path_t * raw_nhs,
+ load_balance_flags_t flags)
+{
+ load_balance_path_t *nh, *nhs, *fixed_nhs;
+ u32 sum_of_weights, n_buckets, ii;
+ index_t lbmi, old_lbmi;
+ load_balance_t *lb;
+ dpo_id_t *tmp_dpo;
+
+ nhs = NULL;
+
+ ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+ lb = load_balance_get(dpo->dpoi_index);
+ fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
+ n_buckets =
+ ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
+ raw_nhs :
+ fixed_nhs),
+ &nhs,
+ &sum_of_weights,
+ multipath_next_hop_error_tolerance);
+
+ ASSERT (n_buckets >= vec_len (raw_nhs));
+
+ /*
+ * Save the old load-balance map used, and get a new one if required.
+ */
+ old_lbmi = lb->lb_map;
+ if (flags & LOAD_BALANCE_FLAG_USES_MAP)
+ {
+ lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
+ }
+ else
+ {
+ lbmi = INDEX_INVALID;
+ }
+
+ if (0 == lb->lb_n_buckets)
+ {
+ /*
+ * first time initialisation. no packets inflight, so we can write
+ * at leisure.
+ */
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * This is a modification of an existing load-balance.
+ * We need to ensure that packets inflight see a consistent state, that
+ * is the number of reported buckets the LB has (read from
+ * lb_n_buckets_minus_1) is not more than it actually has. So if the
+ * number of buckets is increasing, we must update the bucket array first,
+ * then the reported number. vice-versa if the number of buckets goes down.
+ */
+ if (n_buckets == lb->lb_n_buckets)
+ {
+ /*
+ * no change in the number of buckets. we can simply fill what
+ * is new over what is old.
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else if (n_buckets > lb->lb_n_buckets)
+ {
+ /*
+ * we have more buckets. the old load-balance map (if there is one)
+ * will remain valid, i.e. mapping to indices within range, so we
+ * update it last.
+ */
+ if (n_buckets > LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new increased number of buckets is crossing the threshold
+ * from the inline storage to out-line. Alloc the outline buckets
+ * first, then fixup the number. then reset the inlines.
+ */
+ ASSERT(NULL == lb->lb_buckets);
+ vec_validate_aligned(lb->lb_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ CLIB_MEMORY_BARRIER();
+
+ for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
+ {
+ dpo_reset(&lb->lb_buckets_inline[ii]);
+ }
+ }
+ else
+ {
+ if (n_buckets <= LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * we are not crossing the threshold and it's still inline buckets.
+ * we can write the new on the old..
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ }
+ else
+ {
+ /*
+ * we are not crossing the threshold. We need a new bucket array to
+ * hold the increased number of choices.
+ */
+ dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
+
+ new_buckets = NULL;
+ old_buckets = load_balance_get_buckets(lb);
+
+ vec_validate_aligned(new_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
+ CLIB_MEMORY_BARRIER();
+ lb->lb_buckets = new_buckets;
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ vec_foreach(tmp_dpo, old_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(old_buckets);
+ }
+ }
+
+ /*
+ * buckets fixed. ready for the MAP update.
+ */
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * bucket size shrinkage.
+ * Any map we have will be based on the old
+ * larger number of buckets, so will be translating to indices
+ * out of range. So the new MAP must be installed first.
+ */
+ lb->lb_map = lbmi;
+ CLIB_MEMORY_BARRIER();
+
+
+ if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new decreased number of buckets is crossing the threshold
+ * from out-line storage to inline:
+ * 1 - Fill the inline buckets,
+ * 2 - fixup the number (and this point the inline buckets are
+ * used).
+ * 3 - free the outline buckets
+ */
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets_inline,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ vec_foreach(tmp_dpo, lb->lb_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(lb->lb_buckets);
+ }
+ else
+ {
+ /*
+ * not crossing the threshold.
+ * 1 - update the number to the smaller size
+ * 2 - write the new buckets
+ * 3 - reset those no longer used.
+ */
+ dpo_id_t *buckets;
+ u32 old_n_buckets;
+
+ old_n_buckets = lb->lb_n_buckets;
+ buckets = load_balance_get_buckets(lb);
+
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ load_balance_fill_buckets(lb, nhs,
+ buckets,
+ n_buckets);
+
+ for (ii = n_buckets; ii < old_n_buckets; ii++)
+ {
+ dpo_reset(&buckets[ii]);
+ }
+ }
+ }
+ }
+
+ vec_foreach (nh, nhs)
+ {
+ dpo_reset(&nh->path_dpo);
+ }
+ vec_free(nhs);
+ vec_free(fixed_nhs);
+
+ load_balance_map_unlock(old_lbmi);
+}
+
+static void
+load_balance_lock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks++;
+}
+
+static void
+load_balance_destroy (load_balance_t *lb)
+{
+ dpo_id_t *buckets;
+ int i;
+
+ buckets = load_balance_get_buckets(lb);
+
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ dpo_reset(&buckets[i]);
+ }
+
+ LB_DBG(lb, "destroy");
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_free(lb->lb_buckets);
+ }
+
+ fib_urpf_list_unlock(lb->lb_urpf);
+ load_balance_map_unlock(lb->lb_map);
+
+ pool_put(load_balance_pool, lb);
+}
+
+static void
+load_balance_unlock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks--;
+
+ if (0 == lb->lb_locks)
+ {
+ load_balance_destroy(lb);
+ }
+}
+
+static void
+load_balance_mem_show (void)
+{
+ fib_show_memory_usage("load-balance",
+ pool_elts(load_balance_pool),
+ pool_len(load_balance_pool),
+ sizeof(load_balance_t));
+ load_balance_map_show_mem();
+}
+
+const static dpo_vft_t lb_vft = {
+ .dv_lock = load_balance_lock,
+ .dv_unlock = load_balance_unlock,
+ .dv_format = format_load_balance_dpo,
+ .dv_mem_show = load_balance_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
+ * object.
+ *
+ * this means that these graph nodes are ones from which a load-balance is the
+ * parent object in the DPO-graph.
+ *
+ * We do not list all the load-balance nodes, such as the *-lookup. instead
+ * we are relying on the correct use of the .sibling_of field when setting
+ * up these sibling nodes.
+ */
+const static char* const load_balance_ip4_nodes[] =
+{
+ "ip4-load-balance",
+ NULL,
+};
+const static char* const load_balance_ip6_nodes[] =
+{
+ "ip6-load-balance",
+ NULL,
+};
+const static char* const load_balance_mpls_nodes[] =
+{
+ "mpls-load-balance",
+ NULL,
+};
+const static char* const load_balance_l2_nodes[] =
+{
+ "l2-load-balance",
+ NULL,
+};
+const static char* const load_balance_nsh_nodes[] =
+{
+ "nsh-load-balance",
+ NULL,
+};
+const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = load_balance_ip4_nodes,
+ [DPO_PROTO_IP6] = load_balance_ip6_nodes,
+ [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
+ [DPO_PROTO_NSH] = load_balance_nsh_nodes,
+};
+
+void
+load_balance_module_init (void)
+{
+ index_t lbi;
+
+ dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
+
+ /*
+ * Special LB with index zero. we need to define this since the v4 mtrie
+ * assumes an index of 0 implies the ply is empty. therefore all 'real'
+ * adjs need a non-zero index.
+ * This should never be used, but just in case, stack it on a drop.
+ */
+ lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+ load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
+
+ load_balance_map_module_init();
+}
+
+static clib_error_t *
+load_balance_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance, lbi,
+ LOAD_BALANCE_FORMAT_DETAIL);
+ }
+ else
+ {
+ load_balance_t *lb;
+
+ pool_foreach(lb, load_balance_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance,
+ load_balance_get_index(lb),
+ LOAD_BALANCE_FORMAT_NONE);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_show_command, static) = {
+ .path = "show load-balance",
+ .short_help = "show load-balance [<index>]",
+ .function = load_balance_show,
+};
+
+
+always_inline u32
+ip_flow_hash (void *data)
+{
+ ip4_header_t *iph = (ip4_header_t *) data;
+
+ if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+ return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+ else
+ return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+ return (*((u64 *) m) & 0xffffffffffff);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+ ethernet_header_t *eh;
+ u64 a, b, c;
+ uword is_ip, eh_size;
+ u16 eh_type;
+
+ eh = vlib_buffer_get_current (b0);
+ eh_type = clib_net_to_host_u16 (eh->type);
+ eh_size = ethernet_buffer_header_size (b0);
+
+ is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+ /* since we have 2 cache lines, use them */
+ if (is_ip)
+ a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+ else
+ a = eh->type;
+
+ b = mac_to_u64 ((u8 *) eh->dst_address);
+ c = mac_to_u64 ((u8 *) eh->src_address);
+ hash_mix64 (a, b, c);
+
+ return (u32) c;
+}
+
+typedef struct load_balance_trace_t_
+{
+ index_t lb_index;
+} load_balance_trace_t;
+
+static uword
+l2_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0, lbi0, next0;
+ const dpo_id_t *dpo0;
+ const load_balance_t *lb0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* lookup dst + src mac */
+ lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ lb0 = load_balance_get(lbi0);
+
+ vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ vnet_buffer(b0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->lb_index = lbi0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_l2_load_balance_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+ s = format (s, "L2-load-balance: index %d", t->lb_index);
+ return s;
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (l2_load_balance_node) = {
+ .function = l2_load_balance,
+ .name = "l2-load-balance",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_l2_load_balance_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+static uword
+nsh_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0, lbi0, next0, *nsh0;
+ const dpo_id_t *dpo0;
+ const load_balance_t *lb0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ lb0 = load_balance_get(lbi0);
+
+ /* SPI + SI are the second word of the NSH header */
+ nsh0 = vlib_buffer_get_current (b0);
+ vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ vnet_buffer(b0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->lb_index = lbi0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_nsh_load_balance_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+ s = format (s, "NSH-load-balance: index %d", t->lb_index);
+ return s;
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (nsh_load_balance_node) = {
+ .function = nsh_load_balance,
+ .name = "nsh-load-balance",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_nsh_load_balance_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
new file mode 100644
index 00000000..b901c5be
--- /dev/null
+++ b/src/vnet/dpo/load_balance.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * The load-balance object represents an ECMP choice. The buckets of a load
+ * balance object point to the sub-graph after the choice is made.
+ * THe load-balance object is also object type returned from a FIB table lookup.
+ * As such it needs to represent the case where there is only one coice. It may
+ * seem like overkill to use a load-balance object in this case, but the reason
+ * is for performance. If the load-balance object were not the result of the FIB
+ * lookup, then some other object would be. The case where there was ECMP
+ * this other object would need a load-balance as a parent and hence just add
+ * an unnecessary indirection.
+ *
+ * It is also the object in the DP that represents a via-fib-entry in a recursive
+ * route.
+ *
+ */
+
+#ifndef __LOAD_BALANCE_H__
+#define __LOAD_BALANCE_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/fib_entry.h>
+
+/**
+ * Load-balance main
+ */
+typedef struct load_balance_main_t_
+{
+ vlib_combined_counter_main_t lbm_to_counters;
+ vlib_combined_counter_main_t lbm_via_counters;
+} load_balance_main_t;
+
+extern load_balance_main_t load_balance_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define LB_NUM_INLINE_BUCKETS 4
+
+/**
+ * @brief One path from an [EU]CMP set that the client wants to add to a
+ * load-balance object
+ */
+typedef struct load_balance_path_t_ {
+ /**
+ * ID of the Data-path object.
+ */
+ dpo_id_t path_dpo;
+
+ /**
+ * The index of the FIB path
+ */
+ fib_node_index_t path_index;
+
+ /**
+ * weight for the path.
+ */
+ u32 path_weight;
+} load_balance_path_t;
+
+/**
+ * The FIB DPO provieds;
+ * - load-balancing over the next DPOs in the chain/graph
+ * - per-route counters
+ */
+typedef struct load_balance_t_ {
+ /**
+ * number of buckets in the load-balance. always a power of 2.
+ */
+ u16 lb_n_buckets;
+ /**
+ * number of buckets in the load-balance - 1. used in the switch path
+ * as part of the hash calculation.
+ */
+ u16 lb_n_buckets_minus_1;
+
+ /**
+ * The protocol of packets that traverse this LB.
+ * need in combination with the flow hash config to determine how to hash.
+ * u8.
+ */
+ dpo_proto_t lb_proto;
+
+ /**
+ * Flags from the load-balance's associated fib_entry_t
+ */
+ fib_entry_flag_t lb_fib_entry_flags;
+
+ /**
+ * The number of locks, which is approximately the number of users,
+ * of this load-balance.
+ * Load-balance objects of via-entries are heavily shared by recursives,
+ * so the lock count is a u32.
+ */
+ u32 lb_locks;
+
+ /**
+ * index of the load-balance map, INVALID if this LB does not use one
+ */
+ index_t lb_map;
+
+ /**
+ * This is the index of the uRPF list for this LB
+ */
+ index_t lb_urpf;
+
+ /**
+ * the hash config to use when selecting a bucket. this is a u16
+ */
+ flow_hash_config_t lb_hash_config;
+
+ /**
+ * Vector of buckets containing the next DPOs, sized as lbo_num
+ */
+ dpo_id_t *lb_buckets;
+
+ /**
+ * The rest of the cache line is used for buckets. In the common case
+ * where there there are less than 4 buckets, then the buckets are
+ * on the same cachlie and we save ourselves a pointer dereferance in
+ * the data-path.
+ */
+ dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS];
+} load_balance_t;
+
+STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
+ "A load_balance object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum load_balance_format_flags_t_ {
+ LOAD_BALANCE_FORMAT_NONE,
+ LOAD_BALANCE_FORMAT_DETAIL = (1 << 0),
+} load_balance_format_flags_t;
+
+/**
+ * Flags controlling load-balance creation and modification
+ */
+typedef enum load_balance_flags_t_ {
+ LOAD_BALANCE_FLAG_NONE = 0,
+ LOAD_BALANCE_FLAG_USES_MAP = (1 << 0),
+} load_balance_flags_t;
+
+extern index_t load_balance_create(u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc);
+extern void load_balance_multipath_update(
+ const dpo_id_t *dpo,
+ const load_balance_path_t * raw_next_hops,
+ load_balance_flags_t flags);
+
+extern void load_balance_set_bucket(index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next);
+extern void load_balance_set_urpf(index_t lbi,
+ index_t urpf);
+extern void load_balance_set_fib_entry_flags(index_t lbi,
+ fib_entry_flag_t flags);
+extern index_t load_balance_get_urpf(index_t lbi);
+
+extern u8* format_load_balance(u8 * s, va_list * args);
+
+extern const dpo_id_t *load_balance_get_bucket(index_t lbi,
+ u32 bucket);
+extern int load_balance_is_drop(const dpo_id_t *dpo);
+
+extern f64 load_balance_get_multipath_tolerance(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_t *load_balance_pool;
+static inline load_balance_t*
+load_balance_get (index_t lbi)
+{
+ return (pool_elt_at_index(load_balance_pool, lbi));
+}
+
+#define LB_HAS_INLINE_BUCKETS(_lb) \
+ ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+load_balance_get_bucket_i (const load_balance_t *lb,
+ u32 bucket)
+{
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+ {
+ return (&lb->lb_buckets_inline[bucket]);
+ }
+ else
+ {
+ return (&lb->lb_buckets[bucket]);
+ }
+}
+
+extern void load_balance_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/load_balance_map.c b/src/vnet/dpo/load_balance_map.c
new file mode 100644
index 00000000..4e27e5db
--- /dev/null
+++ b/src/vnet/dpo/load_balance_map.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * A hash-table of load-balance maps by path index.
+ * this provides the fast lookup of the LB map when a path goes down
+ */
+static uword *lb_maps_by_path_index;
+
+/**
+ * A hash-table of load-balance maps by set of paths.
+ * This provides the LB map sharing.
+ * LB maps do not necessarily use all the paths in the list, since
+ * the entry that is requesting the map, may not have an out-going
+ * label for each of the paths.
+ */
+static uword *load_balance_map_db;
+
+typedef enum load_balance_map_path_flags_t_
+{
+ LOAD_BALANCE_MAP_PATH_UP = (1 << 0),
+ LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
+} __attribute__ ((packed)) load_balance_map_path_flags_t;
+
+typedef struct load_balance_map_path_t_ {
+ /**
+ * Index of the path
+ */
+ fib_node_index_t lbmp_index;
+
+ /**
+ * Sibling Index in the list of all maps with this path index
+ */
+ fib_node_index_t lbmp_sibling;
+
+ /**
+ * the normalised wegiht of the path
+ */
+ u32 lbmp_weight;
+
+ /**
+ * The sate of the path
+ */
+ load_balance_map_path_flags_t lbmp_flags;
+} load_balance_map_path_t;
+
+/**
+ * The global pool of LB maps
+ */
+load_balance_map_t *load_balance_map_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \
+ { \
+ clib_warning("lbm: FIXME" _fmt, \
+ ##_args); \
+ }
+#else
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
+#endif
+
+static index_t
+load_balance_map_get_index (load_balance_map_t *lbm)
+{
+ return (lbm - load_balance_map_pool);
+}
+
+u8*
+format_load_balance_map (u8 *s, va_list ap)
+{
+ index_t lbmi = va_arg(ap, index_t);
+ u32 indent = va_arg(ap, u32);
+ load_balance_map_t *lbm;
+ u32 n_buckets, ii;
+
+ lbm = load_balance_map_get(lbmi);
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
+ s = format(s, "\n%U index:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", ii);
+ }
+ s = format(s, "\n%U map:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", lbm->lbm_buckets[ii]);
+ }
+
+ return (s);
+}
+
+
+static uword
+load_balance_map_hash (load_balance_map_t *lbm)
+{
+ u32 old_lbm_hash, new_lbm_hash, hash;
+ load_balance_map_path_t *lb_path;
+
+ new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
+
+ vec_foreach (lb_path, lbm->lbm_paths)
+ {
+ hash = lb_path->lbmp_index;
+ hash_mix32(hash, old_lbm_hash, new_lbm_hash);
+ }
+
+ return (new_lbm_hash);
+}
+
+always_inline uword
+load_balance_map_db_hash_key_from_index (uword index)
+{
+ return 1 + 2*index;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_is_index (uword key)
+{
+ return key & 1;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_2_index (uword key)
+{
+ ASSERT (load_balance_map_db_hash_key_is_index (key));
+ return key / 2;
+}
+
+static load_balance_map_t*
+load_balance_map_db_get_from_hash_key (uword key)
+{
+ load_balance_map_t *lbm;
+
+ if (load_balance_map_db_hash_key_is_index (key))
+ {
+ index_t lbm_index;
+
+ lbm_index = load_balance_map_db_hash_key_2_index(key);
+ lbm = load_balance_map_get(lbm_index);
+ }
+ else
+ {
+ lbm = uword_to_pointer (key, load_balance_map_t *);
+ }
+
+ return (lbm);
+}
+
+static uword
+load_balance_map_db_hash_key_sum (hash_t * h,
+ uword key)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_db_get_from_hash_key(key);
+
+ return (load_balance_map_hash(lbm));
+}
+
+static uword
+load_balance_map_db_hash_key_equal (hash_t * h,
+ uword key1,
+ uword key2)
+{
+ load_balance_map_t *lbm1, *lbm2;
+
+ lbm1 = load_balance_map_db_get_from_hash_key(key1);
+ lbm2 = load_balance_map_db_get_from_hash_key(key2);
+
+ return (load_balance_map_hash(lbm1) ==
+ load_balance_map_hash(lbm2));
+}
+
+static index_t
+load_balance_map_db_find (load_balance_map_t *lbm)
+{
+ uword *p;
+
+ p = hash_get(load_balance_map_db, lbm);
+
+ if (NULL != p)
+ {
+ return p[0];
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+load_balance_map_db_insert (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ fib_node_list_t list;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
+
+ /*
+ * insert into the DB based on the set of paths.
+ */
+ hash_set (load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)),
+ load_balance_map_get_index(lbm));
+
+ /*
+ * insert into each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ if (NULL == p)
+ {
+ list = fib_node_list_create();
+ hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
+ }
+ else
+ {
+ list = p[0];
+ }
+
+ lbmp->lbmp_sibling =
+ fib_node_list_push_front(list,
+ 0, FIB_NODE_TYPE_FIRST,
+ load_balance_map_get_index(lbm));
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
+}
+
+static void
+load_balance_map_db_remove (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
+
+ hash_unset(load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)));
+
+ /*
+ * remove from each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ ASSERT(NULL != p);
+
+ fib_node_list_remove(p[0], lbmp->lbmp_sibling);
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
+}
+
+/**
+ * @brief from the paths that are usable, fill the Map.
+ */
+static void
+load_balance_map_fill (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ u32 n_buckets, bucket, ii, jj;
+ u16 *tmp_buckets;
+
+ tmp_buckets = NULL;
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ /*
+ * run throught the set of paths once, and build a vector of the
+ * indices that are usable. we do this is a scratch space, since we
+ * need to refer to it multiple times as we build the real buckets.
+ */
+ vec_validate(tmp_buckets, n_buckets-1);
+
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ tmp_buckets[jj++] = bucket++;
+ }
+ }
+ else
+ {
+ bucket += lbmp->lbmp_weight;
+ }
+ }
+ _vec_len(tmp_buckets) = jj;
+
+ /*
+ * If the number of temporaries written is as many as we need, implying
+ * all paths were up, then we can simply copy the scratch area over the
+ * actual buckets' memory
+ */
+ if (jj == n_buckets)
+ {
+ memcpy(lbm->lbm_buckets,
+ tmp_buckets,
+ sizeof(lbm->lbm_buckets[0]) * n_buckets);
+ }
+ else
+ {
+ /*
+ * one or more paths are down.
+ */
+ if (0 == vec_len(tmp_buckets))
+ {
+ /*
+ * if the scratch area is empty, then no paths are usable.
+ * they will all drop. so use them all, lest we account drops
+ * against only one.
+ */
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ }
+ }
+ else
+ {
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ bucket++;
+ }
+ }
+ else
+ {
+ /*
+ * path is unusable
+ * cycle through the scratch space selecting a index.
+ * this means we load balance, in the intended ratio,
+ * over the paths that are still usable.
+ */
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = tmp_buckets[jj];
+ jj = (jj + 1) % vec_len(tmp_buckets);
+ bucket++;
+ }
+ }
+ }
+ }
+ }
+
+ vec_free(tmp_buckets);
+}
+
+static load_balance_map_t*
+load_balance_map_alloc (const load_balance_path_t *paths)
+{
+ load_balance_map_t *lbm;
+ u32 ii;
+
+ pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
+ memset(lbm, 0, sizeof(*lbm));
+
+ vec_validate(lbm->lbm_paths, vec_len(paths)-1);
+
+ vec_foreach_index(ii, paths)
+ {
+ lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index;
+ lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
+ }
+
+ return (lbm);
+}
+
+static load_balance_map_t *
+load_balance_map_init (load_balance_map_t *lbm,
+ u32 n_buckets,
+ u32 sum_of_weights)
+{
+ lbm->lbm_sum_of_norm_weights = sum_of_weights;
+ vec_validate(lbm->lbm_buckets, n_buckets-1);
+
+ load_balance_map_db_insert(lbm);
+
+ load_balance_map_fill(lbm);
+
+ return (lbm);
+}
+
+static void
+load_balance_map_destroy (load_balance_map_t *lbm)
+{
+ vec_free(lbm->lbm_paths);
+ vec_free(lbm->lbm_buckets);
+ pool_put(load_balance_map_pool, lbm);
+}
+
+index_t
+load_balance_map_add_or_lock (u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *paths)
+{
+ load_balance_map_t *tmp, *lbm;
+ index_t lbmi;
+
+ tmp = load_balance_map_alloc(paths);
+
+ lbmi = load_balance_map_db_find(tmp);
+
+ if (INDEX_INVALID == lbmi)
+ {
+ lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
+ }
+ else
+ {
+ lbm = load_balance_map_get(lbmi);
+ load_balance_map_destroy(tmp);
+ }
+
+ lbm->lbm_locks++;
+
+ return (load_balance_map_get_index(lbm));
+}
+
+void
+load_balance_map_lock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks++;
+}
+
+void
+load_balance_map_unlock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ if (INDEX_INVALID == lbmi)
+ {
+ return;
+ }
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks--;
+
+ if (0 == lbm->lbm_locks)
+ {
+ load_balance_map_db_remove(lbm);
+ load_balance_map_destroy(lbm);
+ }
+}
+
+static int
+load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
+ void *ctx)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(fptr->fnp_index);
+
+ load_balance_map_fill(lbm);
+
+ return (!0);
+}
+
+/**
+ * @brief the state of a path has changed (it has no doubt gone down).
+ * This is the trigger to perform a PIC edge cutover and update the maps
+ * to exclude this path.
+ */
+void
+load_balance_map_path_state_change (fib_node_index_t path_index)
+{
+ uword *p;
+
+ /*
+ * re-stripe the buckets for each affect MAP
+ */
+ p = hash_get(lb_maps_by_path_index, path_index);
+
+ if (NULL == p)
+ return;
+
+ fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
+}
+
+/**
+ * @brief Make/add a new or lock an existing Load-balance map
+ */
+void
+load_balance_map_module_init (void)
+{
+ load_balance_map_db =
+ hash_create2 (/* elts */ 0,
+ /* user */ 0,
+ /* value_bytes */ sizeof (index_t),
+ load_balance_map_db_hash_key_sum,
+ load_balance_map_db_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+
+ lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
+}
+
+void
+load_balance_map_show_mem (void)
+{
+ fib_show_memory_usage("Load-Balance Map",
+ pool_elts(load_balance_map_pool),
+ pool_len(load_balance_map_pool),
+ sizeof(load_balance_map_t));
+}
+
+static clib_error_t *
+load_balance_map_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbmi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbmi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbmi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
+ }
+ else
+ {
+ load_balance_map_t *lbm;
+
+ pool_foreach(lbm, load_balance_map_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance_map,
+ load_balance_map_get_index(lbm), 0);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
+ .path = "show load-balance-map",
+ .short_help = "show load-balance-map [<index>]",
+ .function = load_balance_map_show,
+};
diff --git a/src/vnet/dpo/load_balance_map.h b/src/vnet/dpo/load_balance_map.h
new file mode 100644
index 00000000..237f24b0
--- /dev/null
+++ b/src/vnet/dpo/load_balance_map.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+
+#ifndef __LOAD_BALANCE_MAP_H__
+#define __LOAD_BALANCE_MAP_H__
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
+
+struct load_balance_map_path_t_;
+
+/**
+ */
+typedef struct load_balance_map_t_ {
+ /**
+ * The buckets of the map that provide the index to index translation.
+ * In the first cacheline.
+ */
+ u16 *lbm_buckets;
+
+ /**
+ * the vector of paths this MAP represents
+ */
+ struct load_balance_map_path_t_ *lbm_paths;
+
+ /**
+ * the sum of the normalised weights. cache for convenience
+ */
+ u32 lbm_sum_of_norm_weights;
+
+ /**
+ * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts
+ */
+ u32 lbm_locks;
+} load_balance_map_t;
+
+extern index_t load_balance_map_add_or_lock(u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *norm_paths);
+
+extern void load_balance_map_lock(index_t lmbi);
+extern void load_balance_map_unlock(index_t lbmi);
+
+extern void load_balance_map_path_state_change(fib_node_index_t path_index);
+
+extern u8* format_load_balance_map(u8 *s, va_list ap);
+extern void load_balance_map_show_mem(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_map_t *load_balance_map_pool;
+
+static inline load_balance_map_t*
+load_balance_map_get (index_t lbmi)
+{
+ return (pool_elt_at_index(load_balance_map_pool, lbmi));
+}
+
+static inline u16
+load_balance_map_translate (index_t lbmi,
+ u16 bucket)
+{
+ load_balance_map_t*lbm;
+
+ lbm = load_balance_map_get(lbmi);
+
+ return (lbm->lbm_buckets[bucket]);
+}
+
+static inline const dpo_id_t *
+load_balance_get_fwd_bucket (const load_balance_t *lb,
+ u16 bucket)
+{
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ if (INDEX_INVALID != lb->lb_map)
+ {
+ bucket = load_balance_map_translate(lb->lb_map, bucket);
+ }
+
+ if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+ {
+ return (&lb->lb_buckets_inline[bucket]);
+ }
+ else
+ {
+ return (&lb->lb_buckets[bucket]);
+ }
+}
+
+extern void load_balance_map_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
new file mode 100644
index 00000000..af189eda
--- /dev/null
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/mpls/mpls_lookup.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
+
+static const char *const lookup_input_names[] = LOOKUP_INPUTS;
+static const char *const lookup_cast_names[] = LOOKUP_CASTS;
+
+/**
+ * @brief Enumeration of the lookup subtypes
+ */
+typedef enum lookup_sub_type_t_
+{
+ LOOKUP_SUB_TYPE_SRC,
+ LOOKUP_SUB_TYPE_DST,
+ LOOKUP_SUB_TYPE_DST_MCAST,
+ LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE,
+} lookup_sub_type_t;
+#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1)
+
+#define FOR_EACH_LOOKUP_SUB_TYPE(_st) \
+ for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++)
+
+/**
+ * @brief pool of all MPLS Label DPOs
+ */
+lookup_dpo_t *lookup_dpo_pool;
+
+/**
+ * @brief An array of registered DPO type values for the sub-types
+ */
+static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM];
+
+static lookup_dpo_t *
+lookup_dpo_alloc (void)
+{
+ lookup_dpo_t *lkd;
+
+ pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES);
+
+ return (lkd);
+}
+
+static index_t
+lookup_dpo_get_index (lookup_dpo_t *lkd)
+{
+ return (lkd - lookup_dpo_pool);
+}
+
+static void
+lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_cast_t cast,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+ dpo_type_t type;
+
+ lkd = lookup_dpo_alloc();
+ lkd->lkd_fib_index = fib_index;
+ lkd->lkd_proto = proto;
+ lkd->lkd_input = input;
+ lkd->lkd_table = table_config;
+ lkd->lkd_cast = cast;
+
+ /*
+ * use the input type to select the lookup sub-type
+ */
+ type = 0;
+
+ switch (input)
+ {
+ case LOOKUP_INPUT_SRC_ADDR:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC];
+ break;
+ case LOOKUP_INPUT_DST_ADDR:
+ switch (table_config)
+ {
+ case LOOKUP_TABLE_FROM_INPUT_INTERFACE:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE];
+ break;
+ case LOOKUP_TABLE_FROM_CONFIG:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST];
+ break;
+ }
+ if (LOOKUP_MULTICAST == cast)
+ {
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST];
+ }
+ }
+
+ if (0 == type)
+ {
+ dpo_reset(dpo);
+ }
+ else
+ {
+ dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd));
+ }
+}
+
+void
+lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_cast_t cast,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ if (LOOKUP_UNICAST == cast)
+ {
+ fib_table_lock(fib_index,
+ dpo_proto_to_fib(proto),
+ FIB_SOURCE_RR);
+ }
+ else
+ {
+ mfib_table_lock(fib_index,
+ dpo_proto_to_fib(proto),
+ MFIB_SOURCE_RR);
+ }
+ }
+ lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo);
+}
+
+void
+lookup_dpo_add_or_lock_w_table_id (u32 table_id,
+ dpo_proto_t proto,
+ lookup_cast_t cast,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID;
+
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ if (LOOKUP_UNICAST == cast)
+ {
+ fib_index =
+ fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+ table_id,
+ FIB_SOURCE_RR);
+ }
+ else
+ {
+ fib_index =
+ mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+ table_id,
+ MFIB_SOURCE_RR);
+ }
+ }
+
+ ASSERT(FIB_NODE_INDEX_INVALID != fib_index);
+ lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo);
+}
+
+u8*
+format_lookup_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(index);
+
+ if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table)
+ {
+ s = format(s, "%s,%s lookup in interface's %U table",
+ lookup_input_names[lkd->lkd_input],
+ lookup_cast_names[lkd->lkd_cast],
+ format_dpo_proto, lkd->lkd_proto);
+ }
+ else
+ {
+ if (LOOKUP_UNICAST == lkd->lkd_cast)
+ {
+ s = format(s, "%s,%s lookup in %U",
+ lookup_input_names[lkd->lkd_input],
+ lookup_cast_names[lkd->lkd_cast],
+ format_fib_table_name, lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ else
+ {
+ s = format(s, "%s,%s lookup in %U",
+ lookup_input_names[lkd->lkd_input],
+ lookup_cast_names[lkd->lkd_cast],
+ format_mfib_table_name, lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ }
+ return (s);
+}
+
+static void
+lookup_dpo_lock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks++;
+}
+
+static void
+lookup_dpo_unlock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks--;
+
+ if (0 == lkd->lkd_locks)
+ {
+ if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table)
+ {
+ if (LOOKUP_UNICAST == lkd->lkd_cast)
+ {
+ fib_table_unlock(lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto),
+ FIB_SOURCE_RR);
+ }
+ else
+ {
+ mfib_table_unlock(lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto),
+ MFIB_SOURCE_RR);
+ }
+ }
+ pool_put(lookup_dpo_pool, lkd);
+ }
+}
+
+always_inline void
+ip4_src_fib_lookup_one (u32 src_fib_index0,
+ const ip4_address_t * addr0,
+ u32 * src_adj_index0)
+{
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_fib_mtrie_t * mtrie0;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+}
+
+always_inline void
+ip4_src_fib_lookup_two (u32 src_fib_index0,
+ u32 src_fib_index1,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ u32 * src_adj_index0,
+ u32 * src_adj_index1)
+{
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
+
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+}
+
+/**
+ * @brief Lookup trace data
+ */
+typedef struct lookup_trace_t_
+{
+ union {
+ ip46_address_t addr;
+ mpls_unicast_header_t hdr;
+ };
+ fib_node_index_t fib_index;
+ index_t lbi;
+} lookup_trace_t;
+
+
+always_inline uword
+lookup_dpo_ip4_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next > 2)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+ flow_hash_config_t flow_hash_config0;
+ const ip4_address_t *input_addr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip4_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 bi1, lkdi1, lbi1, fib_index1, next1, hash_c1;
+ flow_hash_config_t flow_hash_config1;
+ const ip4_address_t *input_addr1;
+ const load_balance_t *lb1;
+ const lookup_dpo_t * lkd1;
+ const ip4_header_t * ip1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ b1 = vlib_get_buffer (vm, bi1);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* dst lookup was done by ip4 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkdi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+ lkd1 = lookup_dpo_get(lkdi1);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip4_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ ip4_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ fib_index1 = lkd1->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr0 = &ip0->src_address;
+ input_addr1 = &ip1->src_address;
+ }
+ else
+ {
+ input_addr0 = &ip0->dst_address;
+ input_addr1 = &ip1->dst_address;
+ }
+
+ /* do lookup */
+ ip4_src_fib_lookup_two (fib_index0, fib_index1,
+ input_addr0, input_addr1,
+ &lbi0, &lbi1);
+ lb0 = load_balance_get(lbi0);
+ lb1 = load_balance_get(lbi1);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = fib_index1;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, flow_hash_config1);
+ }
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i(lb1,
+ (hash_c1 &
+ (lb1->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, b1));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip4 = *input_addr0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->fib_index = fib_index1;
+ tr->lbi = lbi1;
+ tr->addr.ip4 = *input_addr1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+ flow_hash_config_t flow_hash_config0;
+ const ip4_address_t *input_addr;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip4_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip4 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip4_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr = &ip0->src_address;
+ }
+ else
+ {
+ input_addr = &ip0->dst_address;
+ }
+
+ /* do lookup */
+ ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0);
+ lb0 = load_balance_get(lbi0);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip4 = *input_addr;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U fib-index:%d addr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_ip46_address, &t->addr, IP46_TYPE_ANY,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_ip4_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_node) = {
+ .function = lookup_ip4_dst,
+ .name = "lookup-ip4-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst)
+
+always_inline uword
+lookup_ip4_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = {
+ .function = lookup_ip4_dst_itf,
+ .name = "lookup-ip4-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf)
+
+always_inline uword
+lookup_ip4_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_src_node) = {
+ .function = lookup_ip4_src,
+ .name = "lookup-ip4-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip4-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src)
+
+always_inline uword
+lookup_dpo_ip6_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr,
+ int table_from_interface)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next > 2)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+ flow_hash_config_t flow_hash_config0;
+ const ip6_address_t *input_addr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip6_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 bi1, lkdi1, lbi1, fib_index1, next1, hash_c1;
+ flow_hash_config_t flow_hash_config1;
+ const ip6_address_t *input_addr1;
+ const load_balance_t *lb1;
+ const lookup_dpo_t * lkd1;
+ const ip6_header_t * ip1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ b1 = vlib_get_buffer (vm, bi1);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* dst lookup was done by ip6 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkdi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+ lkd1 = lookup_dpo_get(lkdi1);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip6_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ ip6_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ fib_index1 = lkd1->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr0 = &ip0->src_address;
+ input_addr1 = &ip1->src_address;
+ }
+ else
+ {
+ input_addr0 = &ip0->dst_address;
+ input_addr1 = &ip1->dst_address;
+ }
+
+ /* do src lookup */
+ lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+ fib_index0,
+ input_addr0);
+ lbi1 = ip6_fib_table_fwding_lookup(&ip6_main,
+ fib_index1,
+ input_addr1);
+ lb0 = load_balance_get(lbi0);
+ lb1 = load_balance_get(lbi1);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = fib_index1;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, flow_hash_config1);
+ }
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i(lb1,
+ (hash_c1 &
+ (lb1->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, b1));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip6 = *input_addr0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->fib_index = fib_index1;
+ tr->lbi = lbi1;
+ tr->addr.ip6 = *input_addr1;
+ }
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1,
+ next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+ flow_hash_config_t flow_hash_config0;
+ const ip6_address_t *input_addr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip6_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip6 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip6_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr0 = &ip0->src_address;
+ }
+ else
+ {
+ input_addr0 = &ip0->dst_address;
+ }
+
+ /* do src lookup */
+ lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+ fib_index0,
+ input_addr0);
+ lb0 = load_balance_get(lbi0);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip6 = *input_addr0;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip6_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_node) = {
+ .function = lookup_ip6_dst,
+ .name = "lookup-ip6-dst",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst)
+
+always_inline uword
+lookup_ip6_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_itf_node) = {
+ .function = lookup_ip6_dst_itf,
+ .name = "lookup-ip6-dst-itf",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_itf_node, lookup_ip6_dst_itf)
+
+always_inline uword
+lookup_ip6_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_src_node) = {
+ .function = lookup_ip6_src,
+ .name = "lookup-ip6-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src)
+
+always_inline uword
+lookup_dpo_mpls_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0, hash0;
+ const mpls_unicast_header_t * hdr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ hdr0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by mpls lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ mpls_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /* do lookup */
+ lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+
+ if (MPLS_IS_REPLICATE & lbi0)
+ {
+ next0 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ (lbi0 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb0 = load_balance_get(lbi0);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(hdr0, lb0->lb_hash_config);
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0,
+ (hash0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ vnet_buffer (b0)->mpls.ttl = ((char*)hdr0)[3];
+ vnet_buffer (b0)->mpls.exp = (((char*)hdr0)[2] & 0xe) >> 1;
+ vnet_buffer (b0)->mpls.first = 1;
+ vlib_buffer_advance(b0, sizeof(*hdr0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_mpls_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ mpls_unicast_header_t hdr;
+
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%U fib-index:%d hdr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_mpls_header, hdr,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_mpls_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_node) = {
+ .function = lookup_mpls_dst,
+ .name = "lookup-mpls-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst)
+
+always_inline uword
+lookup_mpls_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = {
+ .function = lookup_mpls_dst_itf,
+ .name = "lookup-mpls-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf)
+
+typedef enum lookup_ip_dst_mcast_next_t_ {
+ LOOKUP_IP_DST_MCAST_NEXT_RPF,
+ LOOKUP_IP_DST_MCAST_N_NEXT,
+} mfib_forward_lookup_next_t;
+
+always_inline uword
+lookup_dpo_ip_dst_mcast_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int is_v4)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = LOOKUP_IP_DST_MCAST_NEXT_RPF;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, fib_index0, next0;
+ const lookup_dpo_t * lkd0;
+ fib_node_index_t mfei0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* dst lookup was done by mpls lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+ fib_index0 = lkd0->lkd_fib_index;
+ next0 = LOOKUP_IP_DST_MCAST_NEXT_RPF;
+
+ if (is_v4)
+ {
+ ip4_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0),
+ &ip0->src_address,
+ &ip0->dst_address,
+ 64);
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = mfei0;
+ tr->addr.ip4 = ip0->dst_address;
+ }
+ }
+ else
+ {
+ ip6_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0),
+ &ip0->src_address,
+ &ip0->dst_address);
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = mfei0;
+ tr->addr.ip6 = ip0->dst_address;
+ }
+ }
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = mfei0;
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip4_dst_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_mcast_node) = {
+ .function = lookup_ip4_dst_mcast,
+ .name = "lookup-ip4-dst-mcast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_lookup_trace,
+ .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT,
+ .next_nodes = {
+ [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip4-mfib-forward-rpf",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_mcast_node,
+ lookup_ip4_dst_mcast)
+
+always_inline uword
+lookup_ip6_dst_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_mcast_node) = {
+ .function = lookup_ip6_dst_mcast,
+ .name = "lookup-ip6-dst-mcast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_lookup_trace,
+ .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT,
+ .next_nodes = {
+ [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip6-mfib-forward-rpf",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_mcast_node,
+ lookup_ip6_dst_mcast)
+
+static void
+lookup_dpo_mem_show (void)
+{
+ fib_show_memory_usage("Lookup",
+ pool_elts(lookup_dpo_pool),
+ pool_len(lookup_dpo_pool),
+ sizeof(lookup_dpo_t));
+}
+
+const static dpo_vft_t lkd_vft = {
+ .dv_lock = lookup_dpo_lock,
+ .dv_unlock = lookup_dpo_unlock,
+ .dv_format = format_lookup_dpo,
+};
+const static dpo_vft_t lkd_vft_w_mem_show = {
+ .dv_lock = lookup_dpo_lock,
+ .dv_unlock = lookup_dpo_unlock,
+ .dv_format = format_lookup_dpo,
+ .dv_mem_show = lookup_dpo_mem_show,
+};
+
+const static char* const lookup_src_ip4_nodes[] =
+{
+ "lookup-ip4-src",
+ NULL,
+};
+const static char* const lookup_src_ip6_nodes[] =
+{
+ "lookup-ip6-src",
+ NULL,
+};
+const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_src_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_src_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static char* const lookup_dst_ip4_nodes[] =
+{
+ "lookup-ip4-dst",
+ NULL,
+};
+const static char* const lookup_dst_ip6_nodes[] =
+{
+ "lookup-ip6-dst",
+ NULL,
+};
+const static char* const lookup_dst_mpls_nodes[] =
+{
+ "lookup-mpls-dst",
+ NULL,
+};
+const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes,
+};
+
+const static char* const lookup_dst_mcast_ip4_nodes[] =
+{
+ "lookup-ip4-dst-mcast",
+ NULL,
+};
+const static char* const lookup_dst_mcast_ip6_nodes[] =
+{
+ "lookup-ip6-dst-mcast",
+ NULL,
+};
+const static char* const * const lookup_dst_mcast_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_mcast_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_mcast_ip6_nodes,
+};
+
+const static char* const lookup_dst_from_interface_ip4_nodes[] =
+{
+ "lookup-ip4-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_ip6_nodes[] =
+{
+ "lookup-ip6-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_mpls_nodes[] =
+{
+ "lookup-mpls-dst-itf",
+ NULL,
+};
+const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_from_interface_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_from_interface_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes,
+};
+
+
+void
+lookup_dpo_module_init (void)
+{
+ dpo_register(DPO_LOOKUP, &lkd_vft_w_mem_show, NULL);
+
+ /*
+ * There are various sorts of lookup; src or dst addr v4 /v6 etc.
+ * there isn't an object type for each (there is only the lookup_dpo_t),
+ * but, for performance reasons, there is a data plane function, and hence
+ * VLIB node for each. VLIB graph node construction is based on DPO types
+ * so we create sub-types.
+ */
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] =
+ dpo_register_new_type(&lkd_vft, lookup_src_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_mcast_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_from_interface_nodes);
+}
diff --git a/src/vnet/dpo/lookup_dpo.h b/src/vnet/dpo/lookup_dpo.h
new file mode 100644
index 00000000..7dfd0385
--- /dev/null
+++ b/src/vnet/dpo/lookup_dpo.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOOKUP_DPO_H__
+#define __LOOKUP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_input_t_ {
+ LOOKUP_INPUT_SRC_ADDR,
+ LOOKUP_INPUT_DST_ADDR,
+} __attribute__ ((packed)) lookup_input_t;
+
+#define LOOKUP_INPUTS { \
+ [LOOKUP_INPUT_SRC_ADDR] = "src-address", \
+ [LOOKUP_INPUT_DST_ADDR] = "dst-address", \
+}
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_table_t_ {
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ LOOKUP_TABLE_FROM_CONFIG,
+} __attribute__ ((packed)) lookup_table_t;
+
+#define LOOKUP_TABLES { \
+ [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface", \
+ [LOOKUP_INPUT_DST_ADDR] = "table-configured", \
+}
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_cast_t_ {
+ LOOKUP_UNICAST,
+ LOOKUP_MULTICAST,
+} __attribute__ ((packed)) lookup_cast_t;
+
+#define LOOKUP_CASTS { \
+ [LOOKUP_UNICAST] = "unicast", \
+ [LOOKUP_MULTICAST] = "multicast", \
+}
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct lookup_dpo_t
+{
+ /**
+ * The FIB, or interface from which to get a FIB, in which to perform
+ * the next lookup;
+ */
+ fib_node_index_t lkd_fib_index;
+
+ /**
+ * The protocol of the FIB for the lookup, and hence
+ * the protocol of the packet
+ */
+ dpo_proto_t lkd_proto;
+
+ /**
+ * Switch to use src or dst address
+ */
+ lookup_input_t lkd_input;
+
+ /**
+ * Switch to use the table index passed, or the table of the input interface
+ */
+ lookup_table_t lkd_table;
+
+ /**
+ * Unicast of rmulticast FIB lookup
+ */
+ lookup_cast_t lkd_cast;
+
+ /**
+ * Number of locks
+ */
+ u16 lkd_locks;
+} lookup_dpo_t;
+
+extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_cast_t cast,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id,
+ dpo_proto_t proto,
+ lookup_cast_t cast,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+
+extern u8* format_lookup_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern lookup_dpo_t *lookup_dpo_pool;
+
+static inline lookup_dpo_t *
+lookup_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(lookup_dpo_pool, index));
+}
+
+extern void lookup_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c
new file mode 100644
index 00000000..5dc33fcf
--- /dev/null
+++ b/src/vnet/dpo/mpls_disposition.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_disposition.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_disp_dpo_t *mpls_disp_dpo_pool;
+
+static mpls_disp_dpo_t *
+mpls_disp_dpo_alloc (void)
+{
+ mpls_disp_dpo_t *mdd;
+
+ pool_get_aligned(mpls_disp_dpo_pool, mdd, CLIB_CACHE_LINE_BYTES);
+ memset(mdd, 0, sizeof(*mdd));
+
+ dpo_reset(&mdd->mdd_dpo);
+
+ return (mdd);
+}
+
+static index_t
+mpls_disp_dpo_get_index (mpls_disp_dpo_t *mdd)
+{
+ return (mdd - mpls_disp_dpo_pool);
+}
+
+index_t
+mpls_disp_dpo_create (dpo_proto_t payload_proto,
+ fib_rpf_id_t rpf_id,
+ const dpo_id_t *dpo)
+{
+ mpls_disp_dpo_t *mdd;
+
+ mdd = mpls_disp_dpo_alloc();
+
+ mdd->mdd_payload_proto = payload_proto;
+ mdd->mdd_rpf_id = rpf_id;
+
+ dpo_stack(DPO_MPLS_DISPOSITION,
+ mdd->mdd_payload_proto,
+ &mdd->mdd_dpo,
+ dpo);
+
+ return (mpls_disp_dpo_get_index(mdd));
+}
+
+u8*
+format_mpls_disp_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ u32 indent = va_arg (*args, u32);
+ mpls_disp_dpo_t *mdd;
+
+ mdd = mpls_disp_dpo_get(index);
+
+ s = format(s, "mpls-disposition:[%d]:[%U]",
+ index,
+ format_dpo_proto, mdd->mdd_payload_proto);
+
+ s = format(s, "\n%U", format_white_space, indent);
+ s = format(s, "%U", format_dpo_id, &mdd->mdd_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+mpls_disp_dpo_lock (dpo_id_t *dpo)
+{
+ mpls_disp_dpo_t *mdd;
+
+ mdd = mpls_disp_dpo_get(dpo->dpoi_index);
+
+ mdd->mdd_locks++;
+}
+
+static void
+mpls_disp_dpo_unlock (dpo_id_t *dpo)
+{
+ mpls_disp_dpo_t *mdd;
+
+ mdd = mpls_disp_dpo_get(dpo->dpoi_index);
+
+ mdd->mdd_locks--;
+
+ if (0 == mdd->mdd_locks)
+ {
+ dpo_reset(&mdd->mdd_dpo);
+ pool_put(mpls_disp_dpo_pool, mdd);
+ }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label disposition
+ * node.
+ */
+typedef struct mpls_label_disposition_trace_t_
+{
+ index_t mdd;
+} mpls_label_disposition_trace_t;
+
+always_inline uword
+mpls_label_disposition_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 payload_is_ip4,
+ u8 payload_is_ip6)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ mpls_disp_dpo_t *mdd0, *mdd1;
+ u32 bi0, mddi0, bi1, mddi1;
+ vlib_buffer_t * b0, *b1;
+ u32 next0, next1;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), STORE);
+ }
+
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* dst lookup was done by ip4 lookup */
+ mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mddi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ mdd0 = mpls_disp_dpo_get(mddi0);
+ mdd1 = mpls_disp_dpo_get(mddi1);
+
+ if (payload_is_ip4)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ }
+ else if (payload_is_ip6)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ }
+
+ next0 = mdd0->mdd_dpo.dpoi_next_node;
+ next1 = mdd1->mdd_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index;
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mdd1->mdd_dpo.dpoi_index;
+ vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id;
+ vnet_buffer(b1)->ip.rpf_id = mdd1->mdd_rpf_id;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_disposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+
+ tr->mdd = mddi0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_disposition_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->mdd = mddi1;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ mpls_disp_dpo_t *mdd0;
+ vlib_buffer_t * b0;
+ u32 bi0, mddi0;
+ u32 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* dst lookup was done by ip4 lookup */
+ mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mdd0 = mpls_disp_dpo_get(mddi0);
+
+ if (payload_is_ip4)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ }
+ else if (payload_is_ip6)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ }
+ else
+ {
+ }
+
+ next0 = mdd0->mdd_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index;
+ vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_disposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->mdd = mddi0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_disposition_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ CLIB_UNUSED (mpls_label_disposition_trace_t * t);
+
+ t = va_arg (*args, mpls_label_disposition_trace_t *);
+
+ s = format(s, "disp:%d", t->mdd);
+ return (s);
+}
+
+static uword
+ip4_mpls_label_disposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_disposition_inline(vm, node, frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_node) = {
+ .function = ip4_mpls_label_disposition,
+ .name = "ip4-mpls-label-disposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_disposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_disposition_node,
+ ip4_mpls_label_disposition)
+
+static uword
+ip6_mpls_label_disposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_disposition_inline(vm, node, frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_node) = {
+ .function = ip6_mpls_label_disposition,
+ .name = "ip6-mpls-label-disposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_disposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip6-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_disposition_node,
+ ip6_mpls_label_disposition)
+
+static void
+mpls_disp_dpo_mem_show (void)
+{
+ fib_show_memory_usage("MPLS label",
+ pool_elts(mpls_disp_dpo_pool),
+ pool_len(mpls_disp_dpo_pool),
+ sizeof(mpls_disp_dpo_t));
+}
+
+const static dpo_vft_t mdd_vft = {
+ .dv_lock = mpls_disp_dpo_lock,
+ .dv_unlock = mpls_disp_dpo_unlock,
+ .dv_format = format_mpls_disp_dpo,
+ .dv_mem_show = mpls_disp_dpo_mem_show,
+};
+
+const static char* const mpls_label_disp_ip4_nodes[] =
+{
+ "ip4-mpls-label-disposition",
+ NULL,
+};
+const static char* const mpls_label_disp_ip6_nodes[] =
+{
+ "ip6-mpls-label-disposition",
+ NULL,
+};
+const static char* const * const mpls_label_disp_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = mpls_label_disp_ip4_nodes,
+ [DPO_PROTO_IP6] = mpls_label_disp_ip6_nodes,
+};
+
+
+void
+mpls_disp_dpo_module_init (void)
+{
+ dpo_register(DPO_MPLS_DISPOSITION, &mdd_vft, mpls_label_disp_nodes);
+}
diff --git a/src/vnet/dpo/mpls_disposition.h b/src/vnet/dpo/mpls_disposition.h
new file mode 100644
index 00000000..9c015083
--- /dev/null
+++ b/src/vnet/dpo/mpls_disposition.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_DISP_DPO_H__
+#define __MPLS_DISP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_disp_dpo_t
+{
+ /**
+ * Next DPO in the graph
+ */
+ dpo_id_t mdd_dpo;
+
+ /**
+ * The protocol of the payload/packets that are being encapped
+ */
+ dpo_proto_t mdd_payload_proto;
+
+ /**
+ * RPF-ID (if this is an mcast disposition)
+ */
+ fib_rpf_id_t mdd_rpf_id;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 mdd_locks;
+} mpls_disp_dpo_t;
+
+/**
+ * @brief Assert that the MPLS label object is less than a cache line in size.
+ * Should this get any bigger then we will need to reconsider how many labels
+ * can be pushed in one object.
+ */
+_Static_assert((sizeof(mpls_disp_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+ "MPLS Disposition DPO is larger than one cache line.");
+
+/**
+ * @brief Create an MPLS label object
+ *
+ * @param payload_proto The ptocool of the payload packets that will
+ * be imposed with this label header.
+ * @param dpo The parent of the created MPLS label object
+ */
+extern index_t mpls_disp_dpo_create(dpo_proto_t payload_proto,
+ fib_rpf_id_t rpf_id,
+ const dpo_id_t *dpo);
+
+extern u8* format_mpls_disp_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_disp_dpo_t *mpls_disp_dpo_pool;
+
+static inline mpls_disp_dpo_t *
+mpls_disp_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(mpls_disp_dpo_pool, index));
+}
+
+extern void mpls_disp_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
new file mode 100644
index 00000000..2a6e7dd5
--- /dev/null
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -0,0 +1,703 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static mpls_label_dpo_t *
+mpls_label_dpo_alloc (void)
+{
+ mpls_label_dpo_t *mld;
+
+ pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES);
+ memset(mld, 0, sizeof(*mld));
+
+ dpo_reset(&mld->mld_dpo);
+
+ return (mld);
+}
+
+static index_t
+mpls_label_dpo_get_index (mpls_label_dpo_t *mld)
+{
+ return (mld - mpls_label_dpo_pool);
+}
+
+index_t
+mpls_label_dpo_create (mpls_label_t *label_stack,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ dpo_proto_t payload_proto,
+ const dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+ u32 ii;
+
+ mld = mpls_label_dpo_alloc();
+ mld->mld_n_labels = vec_len(label_stack);
+ mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
+ mld->mld_payload_proto = payload_proto;
+
+ /*
+ * construct label rewrite headers for each value value passed.
+ * get the header in network byte order since we will paint it
+ * on a packet in the data-plane
+ */
+
+ for (ii = 0; ii < mld->mld_n_labels-1; ii++)
+ {
+ vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
+ vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, 255);
+ vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, 0);
+ vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, MPLS_NON_EOS);
+ mld->mld_hdr[ii].label_exp_s_ttl =
+ clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+ }
+
+ /*
+ * the inner most label
+ */
+ ii = mld->mld_n_labels-1;
+
+ vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
+ vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, ttl);
+ vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, exp);
+ vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, eos);
+ mld->mld_hdr[ii].label_exp_s_ttl =
+ clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+
+ /*
+ * stack this label objct on its parent.
+ */
+ dpo_stack(DPO_MPLS_LABEL,
+ mld->mld_payload_proto,
+ &mld->mld_dpo,
+ dpo);
+
+ return (mpls_label_dpo_get_index(mld));
+}
+
+u8*
+format_mpls_label_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ u32 indent = va_arg (*args, u32);
+ mpls_unicast_header_t hdr;
+ mpls_label_dpo_t *mld;
+ u32 ii;
+
+ s = format(s, "mpls-label:[%d]:", index);
+
+ if (pool_is_free_index(mpls_label_dpo_pool, index))
+ {
+ /*
+ * the packet trace can be printed after the DPO has been deleted
+ */
+ return (s);
+ }
+
+ mld = mpls_label_dpo_get(index);
+
+ for (ii = 0; ii < mld->mld_n_labels; ii++)
+ {
+ hdr.label_exp_s_ttl =
+ clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+ s = format(s, "%U", format_mpls_header, hdr);
+ }
+
+ s = format(s, "\n%U", format_white_space, indent);
+ s = format(s, "%U", format_dpo_id, &mld->mld_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+mpls_label_dpo_lock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks++;
+}
+
+static void
+mpls_label_dpo_unlock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks--;
+
+ if (0 == mld->mld_locks)
+ {
+ dpo_reset(&mld->mld_dpo);
+ pool_put(mpls_label_dpo_pool, mld);
+ }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_label_imposition_trace_t_
+{
+ /**
+ * The MPLS header imposed
+ */
+ mpls_unicast_header_t hdr;
+} mpls_label_imposition_trace_t;
+
+always_inline mpls_unicast_header_t *
+mpls_label_paint (vlib_buffer_t * b0,
+ mpls_label_dpo_t *mld0,
+ u8 ttl0)
+{
+ mpls_unicast_header_t *hdr0;
+
+ vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+
+ hdr0 = vlib_buffer_get_current(b0);
+
+ if (1 == mld0->mld_n_labels)
+ {
+ /* optimise for the common case of one label */
+ *hdr0 = mld0->mld_hdr[0];
+ }
+ else
+ {
+ clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+ hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+ }
+ /* fixup the TTL for the inner most label */
+ ((char*)hdr0)[3] = ttl0;
+
+ return (hdr0);
+}
+
+always_inline uword
+mpls_label_imposition_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 payload_is_ip4,
+ u8 payload_is_ip6,
+ u8 payload_is_ethernet)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3;
+ mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3;
+ mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3;
+ vlib_buffer_t * b0, *b1, * b2, *b3;
+ u32 next0, next1, next2, next3;
+ u8 ttl0, ttl1,ttl2, ttl3 ;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, *p4, *p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ }
+
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* dst lookup was done by ip4 lookup */
+ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+ mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
+ mld0 = mpls_label_dpo_get(mldi0);
+ mld1 = mpls_label_dpo_get(mldi1);
+ mld2 = mpls_label_dpo_get(mldi2);
+ mld3 = mpls_label_dpo_get(mldi3);
+
+ if (payload_is_ip4)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ ip4_header_t * ip0 = vlib_buffer_get_current(b0);
+ ip4_header_t * ip1 = vlib_buffer_get_current(b1);
+ ip4_header_t * ip2 = vlib_buffer_get_current(b2);
+ ip4_header_t * ip3 = vlib_buffer_get_current(b3);
+ u32 checksum0;
+ u32 checksum1;
+ u32 checksum2;
+ u32 checksum3;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100);
+ checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+ checksum1 += checksum1 >= 0xffff;
+ checksum2 += checksum2 >= 0xffff;
+ checksum3 += checksum3 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ip1->checksum = checksum1;
+ ip2->checksum = checksum2;
+ ip3->checksum = checksum3;
+
+ ip0->ttl -= 1;
+ ip1->ttl -= 1;
+ ip2->ttl -= 1;
+ ip3->ttl -= 1;
+
+ ttl1 = ip1->ttl;
+ ttl0 = ip0->ttl;
+ ttl3 = ip3->ttl;
+ ttl2 = ip2->ttl;
+ }
+ else if (payload_is_ip6)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ ip6_header_t * ip0 = vlib_buffer_get_current(b0);
+ ip6_header_t * ip1 = vlib_buffer_get_current(b1);
+ ip6_header_t * ip2 = vlib_buffer_get_current(b2);
+ ip6_header_t * ip3 = vlib_buffer_get_current(b3);
+
+ ip0->hop_limit -= 1;
+ ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
+
+ ttl0 = ip0->hop_limit;
+ ttl1 = ip1->hop_limit;
+ ttl2 = ip2->hop_limit;
+ ttl3 = ip3->hop_limit;
+ }
+ else if (payload_is_ethernet)
+ {
+ /*
+ * nothing to chang ein the ethernet header
+ */
+ ttl0 = ttl1 = ttl2 = ttl3 = 255;
+ }
+ else
+ {
+ /*
+ * else, the packet to be encapped is an MPLS packet
+ */
+ if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first))
+ {
+ /*
+ * The first label to be imposed on the packet. this is a label swap.
+ * in which case we stashed the TTL and EXP bits in the
+ * packet in the lookup node
+ */
+ ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
+
+ ttl0 = vnet_buffer(b0)->mpls.ttl - 1;
+ }
+ else
+ {
+ /*
+ * not the first label. implying we are recusring down a chain of
+ * output labels.
+ * Each layer is considered a new LSP - hence the TTL is reset.
+ */
+ ttl0 = 255;
+ }
+ if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first))
+ {
+ ASSERT(1 != vnet_buffer (b1)->mpls.ttl);
+ ttl1 = vnet_buffer(b1)->mpls.ttl - 1;
+ }
+ else
+ {
+ ttl1 = 255;
+ }
+ if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first))
+ {
+ ASSERT(1 != vnet_buffer (b2)->mpls.ttl);
+
+ ttl2 = vnet_buffer(b2)->mpls.ttl - 1;
+ }
+ else
+ {
+ ttl2 = 255;
+ }
+ if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first))
+ {
+ ASSERT(1 != vnet_buffer (b3)->mpls.ttl);
+ ttl3 = vnet_buffer(b3)->mpls.ttl - 1;
+ }
+ else
+ {
+ ttl3 = 255;
+ }
+ }
+ vnet_buffer(b0)->mpls.first = 0;
+ vnet_buffer(b1)->mpls.first = 0;
+ vnet_buffer(b2)->mpls.first = 0;
+ vnet_buffer(b3)->mpls.first = 0;
+
+ /* Paint the MPLS header */
+ hdr0 = mpls_label_paint(b0, mld0, ttl0);
+ hdr1 = mpls_label_paint(b1, mld1, ttl1);
+ hdr2 = mpls_label_paint(b2, mld2, ttl2);
+ hdr3 = mpls_label_paint(b3, mld3, ttl3);
+
+ next0 = mld0->mld_dpo.dpoi_next_node;
+ next1 = mld1->mld_dpo.dpoi_next_node;
+ next2 = mld2->mld_dpo.dpoi_next_node;
+ next3 = mld3->mld_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
+ vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index;
+ vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->hdr = *hdr0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->hdr = *hdr1;
+ }
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ tr->hdr = *hdr2;
+ }
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->hdr = *hdr3;
+ }
+
+ vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next,
+ n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ mpls_unicast_header_t *hdr0;
+ mpls_label_dpo_t *mld0;
+ vlib_buffer_t * b0;
+ u32 bi0, mldi0;
+ u32 next0;
+ u8 ttl;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* dst lookup was done by ip4 lookup */
+ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mld0 = mpls_label_dpo_get(mldi0);
+
+ if (payload_is_ip4)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ ip4_header_t * ip0 = vlib_buffer_get_current(b0);
+ u32 checksum0;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ip0->ttl -= 1;
+ ttl = ip0->ttl;
+ }
+ else if (payload_is_ip6)
+ {
+ /*
+ * decrement the TTL on ingress to the LSP
+ */
+ ip6_header_t * ip0 = vlib_buffer_get_current(b0);
+
+ ip0->hop_limit -= 1;
+ ttl = ip0->hop_limit;
+ }
+ else
+ {
+ /*
+ * else, the packet to be encapped is an MPLS packet
+ */
+ if (vnet_buffer(b0)->mpls.first)
+ {
+ /*
+ * The first label to be imposed on the packet. this is a label swap.
+ * in which case we stashed the TTL and EXP bits in the
+ * packet in the lookup node
+ */
+ ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
+
+ ttl = vnet_buffer(b0)->mpls.ttl - 1;
+ }
+ else
+ {
+ /*
+ * not the first label. implying we are recusring down a chain of
+ * output labels.
+ * Each layer is considered a new LSP - hence the TTL is reset.
+ */
+ ttl = 255;
+ }
+ }
+ vnet_buffer(b0)->mpls.first = 0;
+
+ /* Paint the MPLS header */
+ vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+ hdr0 = vlib_buffer_get_current(b0);
+ clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+
+ /* fixup the TTL for the inner most label */
+ hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+ ((char*)hdr0)[3] = ttl;
+
+ next0 = mld0->mld_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_imposition_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_label_imposition_trace_t * t;
+ mpls_unicast_header_t hdr;
+ uword indent;
+
+ t = va_arg (*args, mpls_label_imposition_trace_t *);
+ indent = format_get_indent (s);
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%Umpls-header:%U",
+ format_white_space, indent,
+ format_mpls_header, hdr);
+ return (s);
+}
+
+static uword
+mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0));
+}
+
+VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
+ .function = mpls_label_imposition,
+ .name = "mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "mpls-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node,
+ mpls_label_imposition)
+
+static uword
+ip4_mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0));
+}
+
+VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = {
+ .function = ip4_mpls_label_imposition,
+ .name = "ip4-mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node,
+ ip4_mpls_label_imposition)
+
+static uword
+ip6_mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0));
+}
+
+VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
+ .function = ip6_mpls_label_imposition,
+ .name = "ip6-mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip6-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node,
+ ip6_mpls_label_imposition)
+
+static uword
+ethernet_mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1));
+}
+
+VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = {
+ .function = ethernet_mpls_label_imposition,
+ .name = "ethernet-mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node,
+ ethernet_mpls_label_imposition)
+
+static void
+mpls_label_dpo_mem_show (void)
+{
+ fib_show_memory_usage("MPLS label",
+ pool_elts(mpls_label_dpo_pool),
+ pool_len(mpls_label_dpo_pool),
+ sizeof(mpls_label_dpo_t));
+}
+
+const static dpo_vft_t mld_vft = {
+ .dv_lock = mpls_label_dpo_lock,
+ .dv_unlock = mpls_label_dpo_unlock,
+ .dv_format = format_mpls_label_dpo,
+ .dv_mem_show = mpls_label_dpo_mem_show,
+};
+
+const static char* const mpls_label_imp_ip4_nodes[] =
+{
+ "ip4-mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_ip6_nodes[] =
+{
+ "ip6-mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_mpls_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_ethernet_nodes[] =
+{
+ "ethernet-mpls-label-imposition",
+ NULL,
+};
+
+const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes,
+ [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes,
+ [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes,
+};
+
+
+void
+mpls_label_dpo_module_init (void)
+{
+ dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes);
+}
diff --git a/src/vnet/dpo/mpls_label_dpo.h b/src/vnet/dpo/mpls_label_dpo.h
new file mode 100644
index 00000000..e23f3d26
--- /dev/null
+++ b/src/vnet/dpo/mpls_label_dpo.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LABEL_DPO_H__
+#define __MPLS_LABEL_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_label_dpo_t
+{
+ /**
+ * The MPLS label header to impose. Outer most label first.
+ */
+ mpls_unicast_header_t mld_hdr[8];
+
+ /**
+ * Next DPO in the graph
+ */
+ dpo_id_t mld_dpo;
+
+ /**
+ * The protocol of the payload/packets that are being encapped
+ */
+ dpo_proto_t mld_payload_proto;
+
+ /**
+ * Size of the label stack
+ */
+ u16 mld_n_labels;
+
+ /**
+ * Cached amount of header bytes to paint
+ */
+ u16 mld_n_hdr_bytes;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 mld_locks;
+} mpls_label_dpo_t;
+
+/**
+ * @brief Assert that the MPLS label object is less than a cache line in size.
+ * Should this get any bigger then we will need to reconsider how many labels
+ * can be pushed in one object.
+ */
+STATIC_ASSERT((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+ "MPLS label DPO is larger than one cache line.");
+
+/**
+ * @brief Create an MPLS label object
+ *
+ * @param label_stack The stack if labels to impose, outer most label first
+ * @param eos The inner most label's EOS bit
+ * @param ttl The inner most label's TTL bit
+ * @param exp The inner most label's EXP bit
+ * @param payload_proto The ptocool of the payload packets that will
+ * be imposed with this label header.
+ * @param dpo The parent of the created MPLS label object
+ */
+extern index_t mpls_label_dpo_create(mpls_label_t *label_stack,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ dpo_proto_t payload_proto,
+ const dpo_id_t *dpo);
+
+extern u8* format_mpls_label_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static inline mpls_label_dpo_t *
+mpls_label_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(mpls_label_dpo_pool, index));
+}
+
+extern void mpls_label_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/punt_dpo.c b/src/vnet/dpo/punt_dpo.c
new file mode 100644
index 00000000..d1661dcc
--- /dev/null
+++ b/src/vnet/dpo/punt_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing puntping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t punt_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+punt_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1);
+
+ return (&punt_dpos[proto]);
+}
+
+int
+dpo_is_punt (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_PUNT);
+}
+
+static void
+punt_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the punt
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+punt_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_punt_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+
+ return (format(s, "dpo-punt"));
+}
+
+const static dpo_vft_t punt_vft = {
+ .dv_lock = punt_dpo_lock,
+ .dv_unlock = punt_dpo_unlock,
+ .dv_format = format_punt_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a punt
+ * object.
+ *
+ * this means that these graph nodes are ones from which a punt is the
+ * parent object in the DPO-graph.
+ */
+const static char* const punt_ip4_nodes[] =
+{
+ "ip4-punt",
+ NULL,
+};
+const static char* const punt_ip6_nodes[] =
+{
+ "ip6-punt",
+ NULL,
+};
+const static char* const punt_mpls_nodes[] =
+{
+ "mpls-punt",
+ NULL,
+};
+const static char* const * const punt_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = punt_ip4_nodes,
+ [DPO_PROTO_IP6] = punt_ip6_nodes,
+ [DPO_PROTO_MPLS] = punt_mpls_nodes,
+};
+
+void
+punt_dpo_module_init (void)
+{
+ dpo_register(DPO_PUNT, &punt_vft, punt_nodes);
+}
diff --git a/src/vnet/dpo/punt_dpo.h b/src/vnet/dpo/punt_dpo.h
new file mode 100644
index 00000000..370547c1
--- /dev/null
+++ b/src/vnet/dpo/punt_dpo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief A DPO to punt packets to the Control-plane
+ */
+
+#ifndef __PUNT_DPO_H__
+#define __PUNT_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_punt(const dpo_id_t *dpo);
+
+extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto);
+
+extern void punt_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
new file mode 100644
index 00000000..83e33ed8
--- /dev/null
+++ b/src/vnet/dpo/receive_dpo.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/receive_dpo.h>
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static receive_dpo_t *
+receive_dpo_alloc (void)
+{
+ receive_dpo_t *rd;
+
+ pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES);
+ memset(rd, 0, sizeof(*rd));
+
+ return (rd);
+}
+
+static receive_dpo_t *
+receive_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+ ASSERT(DPO_RECEIVE == dpo->dpoi_type);
+
+ return (receive_dpo_get(dpo->dpoi_index));
+}
+
+
+/*
+ * receive_dpo_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The local adj is added to an interface's receive prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+void
+receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_alloc();
+
+ rd->rd_sw_if_index = sw_if_index;
+ if (NULL != nh_addr)
+ {
+ rd->rd_addr = *nh_addr;
+ }
+
+ dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool));
+}
+
+static void
+receive_dpo_lock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks++;
+}
+
+static void
+receive_dpo_unlock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks--;
+
+ if (0 == rd->rd_locks)
+ {
+ pool_put(receive_dpo_pool, rd);
+ }
+}
+
+static u8*
+format_receive_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ receive_dpo_t *rd;
+
+ if (pool_is_free_index(receive_dpo_pool, index))
+ {
+ return (format(s, "dpo-receive DELETED"));
+ }
+
+ rd = receive_dpo_get(index);
+
+ if (~0 != rd->rd_sw_if_index)
+ {
+ return (format(s, "dpo-receive: %U on %U",
+ format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ }
+ else
+ {
+ return (format(s, "dpo-receive"));
+ }
+}
+
+static void
+receive_dpo_mem_show (void)
+{
+ fib_show_memory_usage("Receive",
+ pool_elts(receive_dpo_pool),
+ pool_len(receive_dpo_pool),
+ sizeof(receive_dpo_t));
+}
+
+const static dpo_vft_t receive_vft = {
+ .dv_lock = receive_dpo_lock,
+ .dv_unlock = receive_dpo_unlock,
+ .dv_format = format_receive_dpo,
+ .dv_mem_show = receive_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a receive
+ * object.
+ *
+ * this means that these graph nodes are ones from which a receive is the
+ * parent object in the DPO-graph.
+ */
+const static char* const receive_ip4_nodes[] =
+{
+ "ip4-local",
+ NULL,
+};
+const static char* const receive_ip6_nodes[] =
+{
+ "ip6-local",
+ NULL,
+};
+
+const static char* const * const receive_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = receive_ip4_nodes,
+ [DPO_PROTO_IP6] = receive_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+receive_dpo_module_init (void)
+{
+ dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes);
+}
diff --git a/src/vnet/dpo/receive_dpo.h b/src/vnet/dpo/receive_dpo.h
new file mode 100644
index 00000000..2420fd78
--- /dev/null
+++ b/src/vnet/dpo/receive_dpo.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+
+#ifndef __RECEIVE_DPO_H__
+#define __RECEIVE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip6.h>
+
+typedef struct receive_dpo_t_
+{
+ /**
+ * The Software interface index on which traffic is received
+ */
+ u32 rd_sw_if_index;
+
+ /**
+ * The address on the receive interface. packet are destined to this address
+ */
+ ip46_address_t rd_addr;
+
+ /**
+ * number oflocks.
+ */
+ u16 rd_locks;
+} receive_dpo_t;
+
+extern void receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo);
+
+extern void receive_dpo_module_init(void);
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static inline receive_dpo_t *
+receive_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(receive_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
new file mode 100644
index 00000000..9fdb9a05
--- /dev/null
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/adj/adj.h>
+#include <vnet/mpls/mpls_types.h>
+
+#undef REP_DEBUG
+
+#ifdef REP_DEBUG
+#define REP_DBG(_rep, _fmt, _args...) \
+{ \
+ u8* _tmp =NULL; \
+ clib_warning("rep:[%s]:" _fmt, \
+ replicate_format(replicate_get_index((_rep)), \
+ 0, _tmp), \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define REP_DBG(_p, _fmt, _args...)
+#endif
+
+#define foreach_replicate_dpo_error \
+_(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
+
+typedef enum {
+#define _(sym,str) REPLICATE_DPO_ERROR_##sym,
+ foreach_replicate_dpo_error
+#undef _
+ REPLICATE_DPO_N_ERROR,
+} replicate_dpo_error_t;
+
+static char * replicate_dpo_error_strings[] = {
+#define _(sym,string) string,
+ foreach_replicate_dpo_error
+#undef _
+};
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+replicate_t *replicate_pool;
+
+/**
+ * The one instance of replicate main
+ */
+replicate_main_t replicate_main;
+
+static inline index_t
+replicate_get_index (const replicate_t *rep)
+{
+ return (rep - replicate_pool);
+}
+
+static inline dpo_id_t*
+replicate_get_buckets (replicate_t *rep)
+{
+ if (REP_HAS_INLINE_BUCKETS(rep))
+ {
+ return (rep->rep_buckets_inline);
+ }
+ else
+ {
+ return (rep->rep_buckets);
+ }
+}
+
+static replicate_t *
+replicate_alloc_i (void)
+{
+ replicate_t *rep;
+
+ pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
+ memset(rep, 0, sizeof(*rep));
+
+ vlib_validate_combined_counter(&(replicate_main.repm_counters),
+ replicate_get_index(rep));
+ vlib_zero_combined_counter(&(replicate_main.repm_counters),
+ replicate_get_index(rep));
+
+ return (rep);
+}
+
+static u8*
+replicate_format (index_t repi,
+ replicate_format_flags_t flags,
+ u32 indent,
+ u8 *s)
+{
+ vlib_counter_t to;
+ replicate_t *rep;
+ dpo_id_t *buckets;
+ u32 i;
+
+ repi &= ~MPLS_IS_REPLICATE;
+ rep = replicate_get(repi);
+ vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
+ buckets = replicate_get_buckets(rep);
+
+ s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
+ s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
+ s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
+
+ for (i = 0; i < rep->rep_n_buckets; i++)
+ {
+ s = format(s, "\n%U", format_white_space, indent+2);
+ s = format(s, "[%d]", i);
+ s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
+ }
+ return (s);
+}
+
+u8*
+format_replicate (u8 * s, va_list * args)
+{
+ index_t repi = va_arg(*args, index_t);
+ replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
+
+ return (replicate_format(repi, flags, 0, s));
+}
+static u8*
+format_replicate_dpo (u8 * s, va_list * args)
+{
+ index_t repi = va_arg(*args, index_t);
+ u32 indent = va_arg(*args, u32);
+
+ return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
+}
+
+
+static replicate_t *
+replicate_create_i (u32 num_buckets,
+ dpo_proto_t rep_proto)
+{
+ replicate_t *rep;
+
+ rep = replicate_alloc_i();
+ rep->rep_n_buckets = num_buckets;
+ rep->rep_proto = rep_proto;
+
+ if (!REP_HAS_INLINE_BUCKETS(rep))
+ {
+ vec_validate_aligned(rep->rep_buckets,
+ rep->rep_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+ }
+
+ REP_DBG(rep, "create");
+
+ return (rep);
+}
+
+index_t
+replicate_create (u32 n_buckets,
+ dpo_proto_t rep_proto)
+{
+ return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
+}
+
+static inline void
+replicate_set_bucket_i (replicate_t *rep,
+ u32 bucket,
+ dpo_id_t *buckets,
+ const dpo_id_t *next)
+{
+ dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
+}
+
+void
+replicate_set_bucket (index_t repi,
+ u32 bucket,
+ const dpo_id_t *next)
+{
+ replicate_t *rep;
+ dpo_id_t *buckets;
+
+ repi &= ~MPLS_IS_REPLICATE;
+ rep = replicate_get(repi);
+ buckets = replicate_get_buckets(rep);
+
+ ASSERT(bucket < rep->rep_n_buckets);
+
+ replicate_set_bucket_i(rep, bucket, buckets, next);
+}
+
+int
+replicate_is_drop (const dpo_id_t *dpo)
+{
+ replicate_t *rep;
+ index_t repi;
+
+ if (DPO_REPLICATE != dpo->dpoi_type)
+ return (0);
+
+ repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
+ rep = replicate_get(repi);
+
+ if (1 == rep->rep_n_buckets)
+ {
+ return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
+ }
+ return (0);
+}
+
+const dpo_id_t *
+replicate_get_bucket (index_t repi,
+ u32 bucket)
+{
+ replicate_t *rep;
+
+ repi &= ~MPLS_IS_REPLICATE;
+ rep = replicate_get(repi);
+
+ return (replicate_get_bucket_i(rep, bucket));
+}
+
+
+static load_balance_path_t *
+replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
+ dpo_proto_t drop_proto)
+{
+ if (0 == vec_len(nhs))
+ {
+ load_balance_path_t *nh;
+
+ /*
+ * we need something for the replicate. so use the drop
+ */
+ vec_add2(nhs, nh, 1);
+
+ nh->path_weight = 1;
+ dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+ }
+
+ return (nhs);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+replicate_fill_buckets (replicate_t *rep,
+ load_balance_path_t *nhs,
+ dpo_id_t *buckets,
+ u32 n_buckets)
+{
+ load_balance_path_t * nh;
+ u16 ii, bucket;
+
+ bucket = 0;
+
+ /*
+ * the next-hops have normalised weights. that means their sum is the number
+ * of buckets we need to fill.
+ */
+ vec_foreach (nh, nhs)
+ {
+ for (ii = 0; ii < nh->path_weight; ii++)
+ {
+ ASSERT(bucket < n_buckets);
+ replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
+ }
+ }
+}
+
+static inline void
+replicate_set_n_buckets (replicate_t *rep,
+ u32 n_buckets)
+{
+ rep->rep_n_buckets = n_buckets;
+}
+
+void
+replicate_multipath_update (const dpo_id_t *dpo,
+ load_balance_path_t * next_hops)
+{
+ load_balance_path_t * nh, * nhs;
+ dpo_id_t *tmp_dpo;
+ u32 ii, n_buckets;
+ replicate_t *rep;
+ index_t repi;
+
+ ASSERT(DPO_REPLICATE == dpo->dpoi_type);
+ repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
+ rep = replicate_get(repi);
+ nhs = replicate_multipath_next_hop_fixup(next_hops,
+ rep->rep_proto);
+ n_buckets = vec_len(nhs);
+
+ if (0 == rep->rep_n_buckets)
+ {
+ /*
+ * first time initialisation. no packets inflight, so we can write
+ * at leisure.
+ */
+ replicate_set_n_buckets(rep, n_buckets);
+
+ if (!REP_HAS_INLINE_BUCKETS(rep))
+ vec_validate_aligned(rep->rep_buckets,
+ rep->rep_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ replicate_fill_buckets(rep, nhs,
+ replicate_get_buckets(rep),
+ n_buckets);
+ }
+ else
+ {
+ /*
+ * This is a modification of an existing replicate.
+ * We need to ensure that packets in flight see a consistent state, that
+ * is the number of reported buckets the REP has
+ * is not more than it actually has. So if the
+ * number of buckets is increasing, we must update the bucket array first,
+ * then the reported number. vice-versa if the number of buckets goes down.
+ */
+ if (n_buckets == rep->rep_n_buckets)
+ {
+ /*
+ * no change in the number of buckets. we can simply fill what
+ * is new over what is old.
+ */
+ replicate_fill_buckets(rep, nhs,
+ replicate_get_buckets(rep),
+ n_buckets);
+ }
+ else if (n_buckets > rep->rep_n_buckets)
+ {
+ /*
+ * we have more buckets. the old replicate map (if there is one)
+ * will remain valid, i.e. mapping to indices within range, so we
+ * update it last.
+ */
+ if (n_buckets > REP_NUM_INLINE_BUCKETS &&
+ rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new increased number of buckets is crossing the threshold
+ * from the inline storage to out-line. Alloc the outline buckets
+ * first, then fixup the number. then reset the inlines.
+ */
+ ASSERT(NULL == rep->rep_buckets);
+ vec_validate_aligned(rep->rep_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ replicate_fill_buckets(rep, nhs,
+ rep->rep_buckets,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ replicate_set_n_buckets(rep, n_buckets);
+
+ CLIB_MEMORY_BARRIER();
+
+ for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
+ {
+ dpo_reset(&rep->rep_buckets_inline[ii]);
+ }
+ }
+ else
+ {
+ if (n_buckets <= REP_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * we are not crossing the threshold and it's still inline buckets.
+ * we can write the new on the old..
+ */
+ replicate_fill_buckets(rep, nhs,
+ replicate_get_buckets(rep),
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ replicate_set_n_buckets(rep, n_buckets);
+ }
+ else
+ {
+ /*
+ * we are not crossing the threshold. We need a new bucket array to
+ * hold the increased number of choices.
+ */
+ dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
+
+ new_buckets = NULL;
+ old_buckets = replicate_get_buckets(rep);
+
+ vec_validate_aligned(new_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
+ CLIB_MEMORY_BARRIER();
+ rep->rep_buckets = new_buckets;
+ CLIB_MEMORY_BARRIER();
+ replicate_set_n_buckets(rep, n_buckets);
+
+ vec_foreach(tmp_dpo, old_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(old_buckets);
+ }
+ }
+ }
+ else
+ {
+ /*
+ * bucket size shrinkage.
+ */
+ if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
+ rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new decreased number of buckets is crossing the threshold
+ * from out-line storage to inline:
+ * 1 - Fill the inline buckets,
+ * 2 - fixup the number (and this point the inline buckets are
+ * used).
+ * 3 - free the outline buckets
+ */
+ replicate_fill_buckets(rep, nhs,
+ rep->rep_buckets_inline,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ replicate_set_n_buckets(rep, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ vec_foreach(tmp_dpo, rep->rep_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(rep->rep_buckets);
+ }
+ else
+ {
+ /*
+ * not crossing the threshold.
+ * 1 - update the number to the smaller size
+ * 2 - write the new buckets
+ * 3 - reset those no longer used.
+ */
+ dpo_id_t *buckets;
+ u32 old_n_buckets;
+
+ old_n_buckets = rep->rep_n_buckets;
+ buckets = replicate_get_buckets(rep);
+
+ replicate_set_n_buckets(rep, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ replicate_fill_buckets(rep, nhs,
+ buckets,
+ n_buckets);
+
+ for (ii = n_buckets; ii < old_n_buckets; ii++)
+ {
+ dpo_reset(&buckets[ii]);
+ }
+ }
+ }
+ }
+
+ vec_foreach (nh, nhs)
+ {
+ dpo_reset(&nh->path_dpo);
+ }
+ vec_free(nhs);
+}
+
+static void
+replicate_lock (dpo_id_t *dpo)
+{
+ replicate_t *rep;
+
+ rep = replicate_get(dpo->dpoi_index);
+
+ rep->rep_locks++;
+}
+
+static void
+replicate_destroy (replicate_t *rep)
+{
+ dpo_id_t *buckets;
+ int i;
+
+ buckets = replicate_get_buckets(rep);
+
+ for (i = 0; i < rep->rep_n_buckets; i++)
+ {
+ dpo_reset(&buckets[i]);
+ }
+
+ REP_DBG(rep, "destroy");
+ if (!REP_HAS_INLINE_BUCKETS(rep))
+ {
+ vec_free(rep->rep_buckets);
+ }
+
+ pool_put(replicate_pool, rep);
+}
+
+static void
+replicate_unlock (dpo_id_t *dpo)
+{
+ replicate_t *rep;
+
+ rep = replicate_get(dpo->dpoi_index);
+
+ rep->rep_locks--;
+
+ if (0 == rep->rep_locks)
+ {
+ replicate_destroy(rep);
+ }
+}
+
+static void
+replicate_mem_show (void)
+{
+ fib_show_memory_usage("replicate",
+ pool_elts(replicate_pool),
+ pool_len(replicate_pool),
+ sizeof(replicate_t));
+}
+
+const static dpo_vft_t rep_vft = {
+ .dv_lock = replicate_lock,
+ .dv_unlock = replicate_unlock,
+ .dv_format = format_replicate_dpo,
+ .dv_mem_show = replicate_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
+ * object.
+ *
+ * this means that these graph nodes are ones from which a replicate is the
+ * parent object in the DPO-graph.
+ */
+const static char* const replicate_ip4_nodes[] =
+{
+ "ip4-replicate",
+ NULL,
+};
+const static char* const replicate_ip6_nodes[] =
+{
+ "ip6-replicate",
+ NULL,
+};
+const static char* const replicate_mpls_nodes[] =
+{
+ "mpls-replicate",
+ NULL,
+};
+
+const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = replicate_ip4_nodes,
+ [DPO_PROTO_IP6] = replicate_ip6_nodes,
+ [DPO_PROTO_MPLS] = replicate_mpls_nodes,
+};
+
+void
+replicate_module_init (void)
+{
+ dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
+}
+
+static clib_error_t *
+replicate_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t repi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &repi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != repi)
+ {
+ vlib_cli_output (vm, "%U", format_replicate, repi,
+ REPLICATE_FORMAT_DETAIL);
+ }
+ else
+ {
+ replicate_t *rep;
+
+ pool_foreach(rep, replicate_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_replicate,
+ replicate_get_index(rep),
+ REPLICATE_FORMAT_NONE);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (replicate_show_command, static) = {
+ .path = "show replicate",
+ .short_help = "show replicate [<index>]",
+ .function = replicate_show,
+};
+
+typedef struct replicate_trace_t_
+{
+ index_t rep_index;
+ dpo_id_t dpo;
+} replicate_trace_t;
+
+static uword
+replicate_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
+ replicate_main_t * rm = &replicate_main;
+ u32 n_left_from, * from, * to_next, next_index;
+ u32 thread_index = vlib_get_thread_index();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 next0, ci0, bi0, bucket, repi0;
+ const replicate_t *rep0;
+ vlib_buffer_t * b0, *c0;
+ const dpo_id_t *dpo0;
+ u8 num_cloned;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ rep0 = replicate_get(repi0);
+
+ vlib_increment_combined_counter(
+ cm, thread_index, repi0, 1,
+ vlib_buffer_length_in_chain(vm, b0));
+
+ vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
+
+ num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], rep0->rep_n_buckets, 128);
+
+ if (num_cloned != rep0->rep_n_buckets)
+ {
+ vlib_node_increment_counter
+ (vm, node->node_index,
+ REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
+ }
+
+ for (bucket = 0; bucket < num_cloned; bucket++)
+ {
+ ci0 = rm->clones[thread_index][bucket];
+ c0 = vlib_get_buffer(vm, ci0);
+
+ to_next[0] = ci0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ dpo0 = replicate_get_bucket_i(rep0, bucket);
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE(c0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ replicate_trace_t *t = vlib_add_trace (vm, node, c0, sizeof (*t));
+ t->rep_index = repi0;
+ t->dpo = *dpo0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ ci0, next0);
+ if (PREDICT_FALSE (n_left_to_next == 0))
+ {
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ }
+ }
+ vec_reset_length (rm->clones[thread_index]);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_replicate_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
+
+ s = format (s, "replicate: %d via %U",
+ t->rep_index,
+ format_dpo_id, &t->dpo, 0);
+ return s;
+}
+
+static uword
+ip4_replicate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief IP4 replication node
+ */
+VLIB_REGISTER_NODE (ip4_replicate_node) = {
+ .function = ip4_replicate,
+ .name = "ip4-replicate",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+ .error_strings = replicate_dpo_error_strings,
+
+ .format_trace = format_replicate_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-drop",
+ },
+};
+
+static uword
+ip6_replicate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief IPv6 replication node
+ */
+VLIB_REGISTER_NODE (ip6_replicate_node) = {
+ .function = ip6_replicate,
+ .name = "ip6-replicate",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+ .error_strings = replicate_dpo_error_strings,
+
+ .format_trace = format_replicate_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip6-drop",
+ },
+};
+
+static uword
+mpls_replicate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief MPLS replication node
+ */
+VLIB_REGISTER_NODE (mpls_replicate_node) = {
+ .function = mpls_replicate,
+ .name = "mpls-replicate",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+ .error_strings = replicate_dpo_error_strings,
+
+ .format_trace = format_replicate_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "mpls-drop",
+ },
+};
+
+clib_error_t *
+replicate_dpo_init (vlib_main_t * vm)
+{
+ replicate_main_t * rm = &replicate_main;
+
+ vec_validate (rm->clones, vlib_num_workers());
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (replicate_dpo_init);
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
new file mode 100644
index 00000000..7383184a
--- /dev/null
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ *
+ */
+
+#ifndef __REPLICATE_DPO_H__
+#define __REPLICATE_DPO_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/mpls/mpls_types.h>
+
+/**
+ * replicate main
+ */
+typedef struct replicate_main_t_
+{
+ vlib_combined_counter_main_t repm_counters;
+
+ /* per-cpu vector of cloned packets */
+ u32 **clones;
+} replicate_main_t;
+
+extern replicate_main_t replicate_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define REP_NUM_INLINE_BUCKETS 4
+
+/**
+ * The FIB DPO provieds;
+ * - load-balancing over the next DPOs in the chain/graph
+ * - per-route counters
+ */
+typedef struct replicate_t_ {
+ /**
+ * number of buckets in the load-balance. always a power of 2.
+ */
+ u16 rep_n_buckets;
+
+ /**
+ * The protocol of packets that traverse this REP.
+ * need in combination with the flow hash config to determine how to hash.
+ * u8.
+ */
+ dpo_proto_t rep_proto;
+
+ /**
+ * The number of locks, which is approximately the number of users,
+ * of this load-balance.
+ * Load-balance objects of via-entries are heavily shared by recursives,
+ * so the lock count is a u32.
+ */
+ u32 rep_locks;
+
+ /**
+ * Vector of buckets containing the next DPOs, sized as repo_num
+ */
+ dpo_id_t *rep_buckets;
+
+ /**
+ * The rest of the cache line is used for buckets. In the common case
+ * where there there are less than 4 buckets, then the buckets are
+ * on the same cachlie and we save ourselves a pointer dereferance in
+ * the data-path.
+ */
+ dpo_id_t rep_buckets_inline[REP_NUM_INLINE_BUCKETS];
+} replicate_t;
+
+STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
+ "A replicate object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum replicate_format_flags_t_ {
+ REPLICATE_FORMAT_NONE,
+ REPLICATE_FORMAT_DETAIL = (1 << 0),
+} replicate_format_flags_t;
+
+extern index_t replicate_create(u32 num_buckets,
+ dpo_proto_t rep_proto);
+extern void replicate_multipath_update(
+ const dpo_id_t *dpo,
+ load_balance_path_t *next_hops);
+
+extern void replicate_set_bucket(index_t repi,
+ u32 bucket,
+ const dpo_id_t *next);
+
+extern u8* format_replicate(u8 * s, va_list * args);
+
+extern const dpo_id_t *replicate_get_bucket(index_t repi,
+ u32 bucket);
+extern int replicate_is_drop(const dpo_id_t *dpo);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern replicate_t *replicate_pool;
+static inline replicate_t*
+replicate_get (index_t repi)
+{
+ repi &= ~MPLS_IS_REPLICATE;
+ return (pool_elt_at_index(replicate_pool, repi));
+}
+
+#define REP_HAS_INLINE_BUCKETS(_rep) \
+ ((_rep)->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+replicate_get_bucket_i (const replicate_t *rep,
+ u32 bucket)
+{
+ ASSERT(bucket < rep->rep_n_buckets);
+
+ if (PREDICT_TRUE(REP_HAS_INLINE_BUCKETS(rep)))
+ {
+ return (&rep->rep_buckets_inline[bucket]);
+ }
+ else
+ {
+ return (&rep->rep_buckets[bucket]);
+ }
+}
+
+extern void replicate_module_init(void);
+
+#endif
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
new file mode 100644
index 00000000..52b13e04
--- /dev/null
+++ b/src/vnet/ethernet/arp.c
@@ -0,0 +1,2536 @@
+/*
+ * ethernet/arp.c: IP v4 ARP node
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vppinfra/mhash.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/mpls/mpls.h>
+
+/**
+ * @file
+ * @brief IPv4 ARP.
+ *
+ * This file contains code to manage the IPv4 ARP tables (IP Address
+ * to MAC Address lookup).
+ */
+
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+/**
+ * @brief Per-interface ARP configuration and state
+ */
+typedef struct ethernet_arp_interface_t_
+{
+ /**
+ * Hash table of ARP entries.
+ * Since this hash table is per-interface, the key is only the IPv4 address.
+ */
+ uword *arp_entries;
+} ethernet_arp_interface_t;
+
+typedef struct
+{
+ u32 lo_addr;
+ u32 hi_addr;
+ u32 fib_index;
+} ethernet_proxy_arp_t;
+
+typedef struct
+{
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+ /* Used for arp event notification only */
+ void *data_callback;
+ u32 pid;
+} pending_resolution_t;
+
+typedef struct
+{
+ /* Hash tables mapping name to opcode. */
+ uword *opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ uword *pending_resolutions_by_address;
+ pending_resolution_t *pending_resolutions;
+
+ /* Mac address change notification */
+ uword *mac_changes_by_address;
+ pending_resolution_t *mac_changes;
+
+ ethernet_arp_ip4_entry_t *ip4_entry_pool;
+
+ /* ARP attack mitigation */
+ u32 arp_delete_rotor;
+ u32 limit_arp_cache_size;
+
+ /** Per interface state */
+ ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
+
+ /* Proxy arp vector */
+ ethernet_proxy_arp_t *proxy_arps;
+
+ uword wc_ip4_arp_publisher_node;
+ uword wc_ip4_arp_publisher_et;
+} ethernet_arp_main_t;
+
+static ethernet_arp_main_t ethernet_arp_main;
+
+typedef struct
+{
+ u32 sw_if_index;
+ ethernet_arp_ip4_over_ethernet_address_t a;
+ int is_static;
+ int is_no_fib_entry;
+ int flags;
+#define ETHERNET_ARP_ARGS_REMOVE (1<<0)
+#define ETHERNET_ARP_ARGS_FLUSH (1<<1)
+#define ETHERNET_ARP_ARGS_POPULATE (1<<2)
+#define ETHERNET_ARP_ARGS_WC_PUB (1<<3)
+} vnet_arp_set_ip4_over_ethernet_rpc_args_t;
+
+static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
+
+/* Node index for send_garp_na_process */
+u32 send_garp_na_process_node_index;
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a);
+
+static u8 *
+format_ethernet_arp_hardware_type (u8 * s, va_list * va)
+{
+ ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
+ char *t = 0;
+ switch (h)
+ {
+#define _(n,f) case n: t = #f; break;
+ foreach_ethernet_arp_hardware_type;
+#undef _
+
+ default:
+ return format (s, "unknown 0x%x", h);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_ethernet_arp_opcode (u8 * s, va_list * va)
+{
+ ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
+ char *t = 0;
+ switch (o)
+ {
+#define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
+ foreach_ethernet_arp_opcode;
+#undef _
+
+ default:
+ return format (s, "unknown 0x%x", o);
+ }
+
+ return format (s, "%s", t);
+}
+
+static uword
+unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ int x, i;
+
+ /* Numeric opcode. */
+ if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
+ {
+ if (x >= (1 << 16))
+ return 0;
+ *result = x;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ am->opcode_by_name, &i))
+ {
+ *result = i;
+ return 1;
+ }
+
+ return 0;
+}
+
+static uword
+unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ if (!unformat_user
+ (input, unformat_ethernet_arp_opcode_host_byte_order, result))
+ return 0;
+
+ *result = clib_host_to_net_u16 ((u16) * result);
+ return 1;
+}
+
+static u8 *
+format_ethernet_arp_header (u8 * s, va_list * va)
+{
+ ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
+ u32 max_header_bytes = va_arg (*va, u32);
+ uword indent;
+ u16 l2_type, l3_type;
+
+ if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
+ return format (s, "ARP header truncated");
+
+ l2_type = clib_net_to_host_u16 (a->l2_type);
+ l3_type = clib_net_to_host_u16 (a->l3_type);
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%U, type %U/%U, address size %d/%d",
+ format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
+ format_ethernet_arp_hardware_type, l2_type,
+ format_ethernet_type, l3_type,
+ a->n_l2_address_bytes, a->n_l3_address_bytes);
+
+ if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
+ && l3_type == ETHERNET_TYPE_IP4)
+ {
+ s = format (s, "\n%U%U/%U -> %U/%U",
+ format_white_space, indent,
+ format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
+ format_ip4_address, &a->ip4_over_ethernet[0].ip4,
+ format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
+ format_ip4_address, &a->ip4_over_ethernet[1].ip4);
+ }
+ else
+ {
+ uword n2 = a->n_l2_address_bytes;
+ uword n3 = a->n_l3_address_bytes;
+ s = format (s, "\n%U%U/%U -> %U/%U",
+ format_white_space, indent,
+ format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
+ format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
+ format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
+ format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
+ }
+
+ return s;
+}
+
+u8 *
+format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
+{
+ vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
+ ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
+ vnet_sw_interface_t *si;
+ u8 *flags = 0;
+
+ if (!e)
+ return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
+ "Flags", "Ethernet", "Interface");
+
+ si = vnet_get_sw_interface (vnm, e->sw_if_index);
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
+ flags = format (flags, "S");
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY)
+ flags = format (flags, "N");
+
+ s = format (s, "%=12U%=16U%=6s%=20U%U",
+ format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
+ format_ip4_address, &e->ip4_address,
+ flags ? (char *) flags : "",
+ format_ethernet_address, e->ethernet_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ vec_free (flags);
+ return s;
+}
+
+typedef struct
+{
+ u8 packet_data[64];
+} ethernet_arp_input_trace_t;
+
+static u8 *
+format_ethernet_arp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ethernet_arp_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static u8 *
+format_arp_term_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
+
+ /* arp-term trace data saved is either arp or ip6/icmp6 packet:
+ - for arp, the 1st 16-bit field is hw type of value of 0x0001.
+ - for ip6, the first nibble has value of 6. */
+ s = format (s, "%U", t->packet_data[0] == 0 ?
+ format_ethernet_arp_header : format_ip6_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static void
+arp_nbr_probe (ip_adjacency_t * adj)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_interface_address_t *ia;
+ ethernet_arp_header_t *h;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ ip4_address_t *src;
+ vlib_buffer_t *b;
+ vlib_main_t *vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main ();
+
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+
+ src =
+ ip4_interface_address_matching_destination (im,
+ &adj->sub_type.nbr.next_hop.
+ ip4,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
+ {
+ return;
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet,
+ hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+ adj_nbr_update_rewrite
+ (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ e->sw_if_index,
+ adj_get_link_type (ai), e->ethernet_address));
+}
+
+static void
+arp_mk_incomplete (adj_index_t ai)
+{
+ ip_adjacency_t *adj = adj_get (ai);
+
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ adj->rewrite_header.sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+static ethernet_arp_ip4_entry_t *
+arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e = NULL;
+ uword *p;
+
+ if (NULL != eai->arp_entries)
+ {
+ p = hash_get (eai->arp_entries, addr->as_u32);
+ if (!p)
+ return (NULL);
+
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ }
+
+ return (e);
+}
+
+static adj_walk_rc_t
+arp_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ethernet_arp_ip4_entry_t *e = ctx;
+
+ arp_mk_complete (ai, e);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ arp_mk_incomplete (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_interface_t *arp_int;
+ ethernet_arp_ip4_entry_t *e;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+ e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+ else
+ {
+ /*
+ * no matching ARP entry.
+ * construct the rewrite required to for an ARP packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite
+ (vnm,
+ sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it
+ * may want to forward traffic sometime soon. Let's send a
+ * speculative ARP. just one. If we were to do periodically that
+ * wouldn't be bad either, but that's more code than i'm prepared to
+ * write at this time for relatively little reward.
+ */
+ arp_nbr_probe (adj);
+ }
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ {
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ */
+ u8 *rewrite;
+ u8 offset;
+
+ rewrite = ethernet_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ ethernet_ip4_mcast_dst_addr ());
+ offset = vec_len (rewrite) - 2;
+
+ /*
+ * Complete the remaining fields of the adj's rewrite to direct the
+ * complete of the rewrite at switch time by copying in the IP
+ * dst address's bytes.
+ * Ofset is 2 bytes into the MAC desintation address. And we copy 23 bits
+ * from the address.
+ */
+ adj_mcast_update_rewrite (ai, rewrite, offset, 0x007fffff);
+
+ break;
+ }
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+}
+
+static void
+arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = e->ip4_address,
+ };
+
+ e->fib_entry_index =
+ fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4, &pfx.fp_addr,
+ e->sw_if_index, ~0, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+}
+
+static int
+vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_ip4_entry_t *e = 0;
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
+ vlib_main_t *vm = vlib_get_main ();
+ int make_new_arp_cache_entry = 1;
+ uword *p;
+ pending_resolution_t *pr, *mc;
+ ethernet_arp_interface_t *arp_int;
+ int is_static = args->is_static;
+ u32 sw_if_index = args->sw_if_index;
+ int is_no_fib_entry = args->is_no_fib_entry;
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ if (NULL != arp_int->arp_entries)
+ {
+ p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
+ if (p)
+ {
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+
+ /* Refuse to over-write static arp. */
+ if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
+ return -2;
+ make_new_arp_cache_entry = 0;
+ }
+ }
+
+ if (make_new_arp_cache_entry)
+ {
+ pool_get (am->ip4_entry_pool, e);
+
+ if (NULL == arp_int->arp_entries)
+ {
+ arp_int->arp_entries = hash_create (0, sizeof (u32));
+ }
+
+ hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
+
+ e->sw_if_index = sw_if_index;
+ e->ip4_address = a->ip4;
+ e->fib_entry_index = FIB_NODE_INDEX_INVALID;
+ clib_memcpy (e->ethernet_address,
+ a->ethernet, sizeof (e->ethernet_address));
+
+ if (!is_no_fib_entry)
+ {
+ arp_adj_fib_add (e,
+ ip4_fib_table_get_index_for_sw_if_index
+ (e->sw_if_index));
+ }
+ else
+ {
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY;
+ }
+ }
+ else
+ {
+ /*
+ * prevent a DoS attack from the data-plane that
+ * spams us with no-op updates to the MAC address
+ */
+ if (0 == memcmp (e->ethernet_address,
+ a->ethernet, sizeof (e->ethernet_address)))
+ goto check_customers;
+
+ /* Update time stamp and ethernet address. */
+ clib_memcpy (e->ethernet_address, a->ethernet,
+ sizeof (e->ethernet_address));
+ }
+
+ e->cpu_time_last_updated = clib_cpu_time_now ();
+ if (is_static)
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
+ else
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+
+ adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
+
+check_customers:
+ /* Customer(s) waiting for this address to be resolved? */
+ p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
+ if (p)
+ {
+ u32 next_index;
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ pr = pool_elt_at_index (am->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque, pr->data);
+ next_index = pr->next_index;
+ pool_put (am->pending_resolutions, pr);
+ }
+
+ hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
+ }
+
+ /* Customer(s) requesting ARP event for this address? */
+ p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
+ if (p)
+ {
+ u32 next_index;
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, u32);
+ int rv = 1;
+ mc = pool_elt_at_index (am->mac_changes, next_index);
+ fp = mc->data_callback;
+
+ /* Call the user's data callback, return 1 to suppress dup events */
+ if (fp)
+ rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
+
+ /*
+ * Signal the resolver process, as long as the user
+ * says they want to be notified
+ */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ return 0;
+}
+
+void
+vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *pr;
+
+ pool_get (am->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+ pr->data_callback = 0;
+
+ p = hash_get (am->pending_resolutions_by_address, address->as_u32);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ hash_unset (am->pending_resolutions_by_address, address->as_u32);
+ }
+
+ hash_set (am->pending_resolutions_by_address, address->as_u32,
+ pr - am->pending_resolutions);
+}
+
+int
+vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data, int is_add)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_address_t *address = address_arg;
+
+ /* Try to find an existing entry */
+ u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32);
+ u32 *p = first;
+ pending_resolution_t *mc;
+ while (p && *p != ~0)
+ {
+ mc = pool_elt_at_index (am->mac_changes, *p);
+ if (mc->node_index == node_index && mc->type_opaque == type_opaque
+ && mc->pid == pid)
+ break;
+ p = &mc->next_index;
+ }
+
+ int found = p && *p != ~0;
+ if (is_add)
+ {
+ if (found)
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+
+ pool_get (am->mac_changes, mc);
+ *mc = (pending_resolution_t)
+ {
+ .next_index = ~0,.node_index = node_index,.type_opaque =
+ type_opaque,.data = data,.data_callback = data_callback,.pid =
+ pid,};
+
+ /* Insert new resolution at the end of the list */
+ u32 new_idx = mc - am->mac_changes;
+ if (p)
+ p[0] = new_idx;
+ else
+ hash_set (am->mac_changes_by_address, address->as_u32, new_idx);
+ }
+ else
+ {
+ if (!found)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Clients may need to clean up pool entries, too */
+ void (*fp) (u32, u8 *) = data_callback;
+ if (fp)
+ (*fp) (mc->data, 0 /* no new mac addrs */ );
+
+ /* Remove the entry from the list and delete the entry */
+ *p = mc->next_index;
+ pool_put (am->mac_changes, mc);
+
+ /* Remove from hash if we deleted the last entry */
+ if (*p == ~0 && p == first)
+ hash_unset (am->mac_changes_by_address, address->as_u32);
+ }
+ return 0;
+}
+
+/* Either we drop the packet or we send a reply to the sender. */
+typedef enum
+{
+ ARP_INPUT_NEXT_DROP,
+ ARP_INPUT_NEXT_REPLY_TX,
+ ARP_INPUT_N_NEXT,
+} arp_input_next_t;
+
+#define foreach_ethernet_arp_error \
+ _ (replies_sent, "ARP replies sent") \
+ _ (l2_type_not_ethernet, "L2 type not ethernet") \
+ _ (l3_type_not_ip4, "L3 type not IP4") \
+ _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
+ _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
+ _ (l3_src_address_is_local, "IP4 source address matches local interface") \
+ _ (l3_src_address_learned, "ARP request IP4 source address learned") \
+ _ (replies_received, "ARP replies received") \
+ _ (opcode_not_request, "ARP opcode not request") \
+ _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
+ _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
+ _ (gratuitous_arp, "ARP probe or announcement dropped") \
+ _ (interface_no_table, "Interface is not mapped to an IP table") \
+ _ (interface_not_ip_enabled, "Interface is not IP enabled") \
+
+typedef enum
+{
+#define _(sym,string) ETHERNET_ARP_ERROR_##sym,
+ foreach_ethernet_arp_error
+#undef _
+ ETHERNET_ARP_N_ERROR,
+} ethernet_arp_input_error_t;
+
+
+static void
+unset_random_arp_entry (void)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ u32 index;
+
+ index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
+ am->arp_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
+ am->arp_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (am->ip4_entry_pool, index);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+}
+
+static int
+arp_unnumbered (vlib_buffer_t * p0,
+ u32 input_sw_if_index, u32 conn_sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *vim = &vnm->interface_main;
+ vnet_sw_interface_t *si;
+
+ /* verify that the input interface is unnumbered to the connected.
+ * the connected interface is the interface on which the subnet is
+ * configured */
+ si = &vim->sw_interfaces[input_sw_if_index];
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
+ (si->unnumbered_sw_if_index == conn_sw_if_index)))
+ {
+ /* the input interface is not unnumbered to the interface on which
+ * the sub-net is configured that covers the ARP request.
+ * So this is not the case for unnumbered.. */
+ return 0;
+ }
+
+ return !0;
+}
+
+static u32
+arp_learn (vnet_main_t * vnm,
+ ethernet_arp_main_t * am, u32 sw_if_index, void *addr)
+{
+ if (am->limit_arp_cache_size &&
+ pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
+ unset_random_arp_entry ();
+
+ vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0, 0);
+ return (ETHERNET_ARP_ERROR_l3_src_address_learned);
+}
+
+static uword
+arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im4 = &ip4_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ethernet_arp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ vnet_hw_interface_t *hw_if0;
+ ethernet_arp_header_t *arp0;
+ ethernet_header_t *eth_rx, *eth_tx;
+ ip4_address_t *if_addr0, proxy_src;
+ u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
+ u8 is_request0, dst_is_local0, is_unnum0, is_vrrp_reply0;
+ ethernet_proxy_arp_t *pa;
+ fib_node_index_t dst_fei, src_fei;
+ fib_prefix_t pfx0;
+ fib_entry_flag_t src_flags, dst_flags;
+ u8 *rewrite0, rewrite0_len;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ pa = 0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ arp0 = vlib_buffer_get_current (p0);
+ /* Fill in ethernet header. */
+ eth_rx = ethernet_buffer_get_header (p0);
+
+ is_request0 = arp0->opcode
+ == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
+
+ error0 = ETHERNET_ARP_ERROR_replies_sent;
+
+ error0 =
+ (arp0->l2_type !=
+ clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
+ ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+ error0 =
+ (arp0->l3_type !=
+ clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* not playing the ARP game if the interface is not IPv4 enabled */
+ error0 =
+ (im4->ip_enabled_by_sw_if_index[sw_if_index0] == 0 ?
+ ETHERNET_ARP_ERROR_interface_not_ip_enabled : error0);
+
+ if (error0)
+ goto drop2;
+
+ /* Check that IP address is local and matches incoming interface. */
+ fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+ if (~0 == fib_index0)
+ {
+ error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ goto drop2;
+
+ }
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4,
+ 32);
+ dst_flags = fib_entry_get_flags (dst_fei);
+
+ conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
+
+ /* Honor unnumbered interface, if any */
+ is_unnum0 = sw_if_index0 != conn_sw_if_index0;
+
+ {
+ /*
+ * we're looking for FIB entries that indicate the source
+ * is attached. There may be more specific non-attached
+ * routes tht match the source, but these do not influence
+ * whether we respond to an ARP request, i.e. they do not
+ * influence whether we are the correct way for the sender
+ * to reach us, they only affect how we reach the sender.
+ */
+ fib_entry_t *src_fib_entry;
+ fib_entry_src_t *src;
+ fib_source_t source;
+ fib_prefix_t pfx;
+ int attached;
+ int mask;
+
+ mask = 32;
+ attached = 0;
+
+ do
+ {
+ src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->
+ ip4_over_ethernet[0].ip4,
+ mask);
+ src_fib_entry = fib_entry_get (src_fei);
+
+ /*
+ * It's possible that the source that provides the
+ * flags we need, or the flags we must not have,
+ * is not the best source, so check then all.
+ */
+ /* *INDENT-OFF* */
+ FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
+ ({
+ src_flags = fib_entry_get_flags_for_source (src_fei, source);
+
+ /* Reject requests/replies with our local interface
+ address. */
+ if (FIB_ENTRY_FLAG_LOCAL & src_flags)
+ {
+ error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
+ /*
+ * When VPP has an interface whose address is also
+ * applied to a TAP interface on the host, then VPP's
+ * TAP interface will be unnumbered to the 'real'
+ * interface and do proxy ARP from the host.
+ * The curious aspect of this setup is that ARP requests
+ * from the host will come from the VPP's own address.
+ * So don't drop immediately here, instead go see if this
+ * is a proxy ARP case.
+ */
+ goto drop1;
+ }
+ /* A Source must also be local to subnet of matching
+ * interface address. */
+ if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
+ (FIB_ENTRY_FLAG_CONNECTED & src_flags))
+ {
+ attached = 1;
+ break;
+ }
+ /*
+ * else
+ * The packet was sent from an address that is not
+ * connected nor attached i.e. it is not from an
+ * address that is covered by a link's sub-net,
+ * nor is it a already learned host resp.
+ */
+ }));
+ /* *INDENT-ON* */
+
+ /*
+ * shorter mask lookup for the next iteration.
+ */
+ fib_entry_get_prefix (src_fei, &pfx);
+ mask = pfx.fp_len - 1;
+
+ /*
+ * continue until we hit the default route or we find
+ * the attached we are looking for. The most likely
+ * outcome is we find the attached with the first source
+ * on the first lookup.
+ */
+ }
+ while (!attached &&
+ !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
+
+ if (!attached)
+ {
+ /*
+ * the matching route is a not attached, i.e. it was
+ * added as a result of routing, rather than interface/ARP
+ * configuration. If the matching route is not a host route
+ * (i.e. a /32)
+ */
+ error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
+ goto drop2;
+ }
+ }
+
+ if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
+ {
+ error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ goto drop1;
+ }
+
+ if (sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
+ {
+ /*
+ * The interface the ARP was received on is not the interface
+ * on which the covering prefix is configured. Maybe this is a
+ * case for unnumbered.
+ */
+ is_unnum0 = 1;
+ }
+
+ dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
+ fib_entry_get_prefix (dst_fei, &pfx0);
+ if_addr0 = &pfx0.fp_addr.ip4;
+
+ is_vrrp_reply0 =
+ ((arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+ &&
+ (!memcmp
+ (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix,
+ sizeof (vrrp_prefix))));
+
+ /* Trash ARP packets whose ARP-level source addresses do not
+ match their L2-frame-level source addresses, unless it's
+ a reply from a VRRP virtual router */
+ if (memcmp
+ (eth_rx->src_address, arp0->ip4_over_ethernet[0].ethernet,
+ sizeof (eth_rx->src_address)) && !is_vrrp_reply0)
+ {
+ error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ goto drop2;
+ }
+
+ /* Learn or update sender's mapping only for replies to addresses
+ * that are local to the subnet */
+ if (arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) &&
+ dst_is_local0)
+ {
+ error0 = arp_learn (vnm, am, sw_if_index0,
+ &arp0->ip4_over_ethernet[0]);
+ goto drop1;
+ }
+
+ send_reply:
+ /* Send a reply.
+ An adjacency to the sender is not always present,
+ so we use the interface to build us a rewrite string
+ which will contain all the necessary tags. */
+ rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
+ VNET_LINK_ARP,
+ eth_rx->src_address);
+ rewrite0_len = vec_len (rewrite0);
+
+ /* Figure out how much to rewind current data from adjacency. */
+ vlib_buffer_advance (p0, -rewrite0_len);
+ eth_tx = vlib_buffer_get_current (p0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Send reply back through input interface */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ next0 = ARP_INPUT_NEXT_REPLY_TX;
+
+ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+
+ arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+
+ clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
+ hw_if0->hw_address, 6);
+ clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
+ if_addr0->data_u32;
+
+ /* Hardware must be ethernet-like. */
+ ASSERT (vec_len (hw_if0->hw_address) == 6);
+
+ /* the rx nd tx ethernet headers wil overlap in the case
+ * when we received a tagged VLAN=0 packet, but we are sending
+ * back untagged */
+ clib_memcpy (eth_tx, rewrite0, vec_len (rewrite0));
+ vec_free (rewrite0);
+
+ if (NULL == pa)
+ {
+ if (is_unnum0)
+ {
+ if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
+ goto drop2;
+ }
+ }
+
+ /* We are going to reply to this request, so, in the absence of
+ errors, learn the sender */
+ if (!error0)
+ error0 = arp_learn (vnm, am, sw_if_index0,
+ &arp0->ip4_over_ethernet[1]);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+
+ n_replies_sent += 1;
+ continue;
+
+ drop1:
+ if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
+ (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+ arp0->ip4_over_ethernet[1].ip4.as_u32))
+ {
+ error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ goto drop2;
+ }
+ /* See if proxy arp is configured for the address */
+ if (is_request0)
+ {
+ vnet_sw_interface_t *si;
+ u32 this_addr = clib_net_to_host_u32
+ (arp0->ip4_over_ethernet[1].ip4.as_u32);
+ u32 fib_index0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index0);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
+ goto drop2;
+
+ fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
+ u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
+
+ /* an ARP request hit in the proxy-arp table? */
+ if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
+ (fib_index0 == pa->fib_index))
+ {
+ proxy_src.as_u32 =
+ arp0->ip4_over_ethernet[1].ip4.data_u32;
+
+ /*
+ * change the interface address to the proxied
+ */
+ if_addr0 = &proxy_src;
+ is_unnum0 = 0;
+ n_proxy_arp_replies_sent++;
+ goto send_reply;
+ }
+ }
+ }
+
+ drop2:
+
+ next0 = ARP_INPUT_NEXT_DROP;
+ p0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_replies_sent,
+ n_replies_sent - n_proxy_arp_replies_sent);
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
+ n_proxy_arp_replies_sent);
+ return frame->n_vectors;
+}
+
+static char *ethernet_arp_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ethernet_arp_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (arp_input_node, static) =
+{
+ .function = arp_input,
+ .name = "arp-input",
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_ARP_N_ERROR,
+ .error_strings = ethernet_arp_error_strings,
+ .n_next_nodes = ARP_INPUT_N_NEXT,
+ .next_nodes = {
+ [ARP_INPUT_NEXT_DROP] = "error-drop",
+ [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_ethernet_arp_input_trace,
+};
+/* *INDENT-ON* */
+
+static int
+ip4_arp_entry_sort (void *a1, void *a2)
+{
+ ethernet_arp_ip4_entry_t *e1 = a1;
+ ethernet_arp_ip4_entry_t *e2 = a2;
+
+ int cmp;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
+ if (!cmp)
+ cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
+ return cmp;
+}
+
+ethernet_arp_ip4_entry_t *
+ip4_neighbor_entries (u32 sw_if_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *n, *ns = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, am->ip4_entry_pool, ({
+ if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
+ continue;
+ vec_add1 (ns, n[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (ns)
+ vec_sort_with_function (ns, ip4_arp_entry_sort);
+ return ns;
+}
+
+static clib_error_t *
+show_ip4_arp (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e, *es;
+ ethernet_proxy_arp_t *pa;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ es = ip4_neighbor_entries (sw_if_index);
+ if (es)
+ {
+ vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
+ vec_foreach (e, es)
+ {
+ vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
+ }
+ vec_free (es);
+ }
+
+ if (vec_len (am->proxy_arps))
+ {
+ vlib_cli_output (vm, "Proxy arps enabled for:");
+ vec_foreach (pa, am->proxy_arps)
+ {
+ vlib_cli_output (vm, "Fib_index %d %U - %U ",
+ pa->fib_index,
+ format_ip4_address, &pa->lo_addr,
+ format_ip4_address, &pa->hi_addr);
+ }
+ }
+
+ return error;
+}
+
+/*?
+ * Display all the IPv4 ARP entries.
+ *
+ * @cliexpar
+ * Example of how to display the IPv4 ARP table:
+ * @cliexstart{show ip arp}
+ * Time FIB IP4 Flags Ethernet Interface
+ * 346.3028 0 6.1.1.3 de:ad:be:ef:ba:be GigabitEthernet2/0/0
+ * 3077.4271 0 6.1.1.4 S de:ad:be:ef:ff:ff GigabitEthernet2/0/0
+ * 2998.6409 1 6.2.2.3 de:ad:be:ef:00:01 GigabitEthernet2/0/0
+ * Proxy arps enabled for:
+ * Fib_index 0 6.0.0.1 - 6.0.0.11
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
+ .path = "show ip arp",
+ .function = show_ip4_arp,
+ .short_help = "show ip arp",
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ pg_edit_t l2_type, l3_type;
+ pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
+ pg_edit_t opcode;
+ struct
+ {
+ pg_edit_t ethernet;
+ pg_edit_t ip4;
+ } ip4_over_ethernet[2];
+} pg_ethernet_arp_header_t;
+
+static inline void
+pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
+ _(l2_type);
+ _(l3_type);
+ _(n_l2_address_bytes);
+ _(n_l3_address_bytes);
+ _(opcode);
+ _(ip4_over_ethernet[0].ethernet);
+ _(ip4_over_ethernet[0].ip4);
+ _(ip4_over_ethernet[1].ethernet);
+ _(ip4_over_ethernet[1].ip4);
+#undef _
+}
+
+uword
+unformat_pg_arp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ethernet_arp_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
+ &group_index);
+ pg_ethernet_arp_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
+ pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
+ pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
+
+ if (!unformat (input, "%U: %U/%U -> %U/%U",
+ unformat_pg_edit,
+ unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
+ unformat_pg_edit,
+ unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
+ unformat_pg_edit,
+ unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
+ {
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+ }
+ return 1;
+}
+
+clib_error_t *
+ip4_set_arp_limit (u32 arp_limit)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+
+ am->limit_arp_cache_size = arp_limit;
+ return 0;
+}
+
+/**
+ * @brief Control Plane hook to remove an ARP entry
+ */
+int
+vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_REMOVE;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to flush the ARP cache on an
+ * interface state change event.
+ * A flush will remove dynamic ARP entries, and for statics remove the MAC
+ * address from the corresponding adjacencies.
+ */
+static int
+vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_FLUSH;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to populate the ARP cache on an
+ * interface state change event.
+ * For static entries this will re-source the adjacencies.
+ *
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_POPULATE;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief publish wildcard arp event
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_arp_wc_publish (u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
+ .flags = ETHERNET_ARP_ARGS_WC_PUB,
+ .sw_if_index = sw_if_index,
+ .a = *a
+ };
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+static void
+vnet_arp_wc_publish_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t *
+ args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ uword ni = am->wc_ip4_arp_publisher_node;
+ uword et = am->wc_ip4_arp_publisher_et;
+
+ if (ni == (uword) ~ 0)
+ return;
+ wc_arp_report_t *r =
+ vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r);
+ r->ip4 = args->a.ip4.as_u32;
+ r->sw_if_index = args->sw_if_index;
+ memcpy (r->mac, args->a.ethernet, sizeof r->mac);
+}
+
+void
+wc_arp_set_publisher_node (uword node_index, uword event_type)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ am->wc_ip4_arp_publisher_node = node_index;
+ am->wc_ip4_arp_publisher_et = event_type;
+}
+
+/*
+ * arp_add_del_interface_address
+ *
+ * callback when an interface address is added or deleted
+ */
+static void
+arp_add_del_interface_address (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_del)
+{
+ /*
+ * Flush the ARP cache of all entries covered by the address
+ * that is being removed.
+ */
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+ return;
+
+ if (is_del)
+ {
+ ethernet_arp_interface_t *eai;
+ u32 i, *to_delete = 0;
+ hash_pair_t *pair;
+
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (pair, eai->arp_entries,
+ ({
+ e = pool_elt_at_index(am->ip4_entry_pool,
+ pair->value[0]);
+ if (ip4_destination_matches_route (im, &e->ip4_address,
+ address, address_length))
+ {
+ vec_add1 (to_delete, e - am->ip4_entry_pool);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
+ e->sw_if_index, &delme);
+ }
+
+ vec_free (to_delete);
+ }
+}
+
+void
+arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
+{
+ if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = e->ip4_address,
+ };
+ u32 fib_index;
+
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+
+ fib_table_entry_path_remove (fib_index, &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP4,
+ &pfx.fp_addr,
+ e->sw_if_index, ~0, 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+ }
+}
+
+static void
+arp_table_bind (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_interface_t *eai;
+ ethernet_arp_ip4_entry_t *e;
+ hash_pair_t *pair;
+
+ /*
+ * the IP table that the interface is bound to has changed.
+ * reinstall all the adj fibs.
+ */
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+ return;
+
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (pair, eai->arp_entries,
+ ({
+ e = pool_elt_at_index(am->ip4_entry_pool,
+ pair->value[0]);
+ /*
+ * remove the adj-fib from the old table and add to the new
+ */
+ arp_adj_fib_remove(e, old_fib_index);
+ arp_adj_fib_add(e, new_fib_index);
+ }));
+ /* *INDENT-ON* */
+
+}
+
+static clib_error_t *
+ethernet_arp_init (vlib_main_t * vm)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error;
+ pg_node_t *pn;
+
+ if ((error = vlib_call_init_function (vm, ethernet_init)))
+ return error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
+
+ pn = pg_get_node (arp_input_node.index);
+ pn->unformat_edit = unformat_pg_arp_header;
+
+ am->opcode_by_name = hash_create_string (0, sizeof (uword));
+#define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
+ foreach_ethernet_arp_opcode;
+#undef _
+
+ /* $$$ configurable */
+ am->limit_arp_cache_size = 50000;
+
+ am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
+ am->mac_changes_by_address = hash_create (0, sizeof (uword));
+ am->wc_ip4_arp_publisher_node = (uword) ~ 0;
+
+ /* don't trace ARP error packets */
+ {
+ vlib_node_runtime_t *rt =
+ vlib_node_get_runtime (vm, arp_input_node.index);
+
+#define _(a,b) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[ETHERNET_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_ethernet_arp_error
+#undef _
+ }
+
+ ip4_add_del_interface_address_callback_t cb;
+ cb.function = arp_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ ip4_table_bind_callback_t cbt;
+ cbt.function = arp_table_bind;
+ cbt.function_opaque = 0;
+ vec_add1 (im->table_bind_callbacks, cbt);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_arp_init);
+
+static void
+arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+
+ arp_adj_fib_remove (e,
+ ip4_fib_table_get_index_for_sw_if_index
+ (e->sw_if_index));
+ hash_unset (eai->arp_entries, e->ip4_address.as_u32);
+ pool_put (am->ip4_entry_pool, e);
+}
+
+static inline int
+vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
+ return 0;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ arp_entry_free (eai, e);
+
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, NULL);
+ }
+
+ return 0;
+}
+
+static int
+vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
+ return 0;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, e);
+
+ /*
+ * The difference between flush and unset, is that an unset
+ * means delete for static and dynamic entries. A flush
+ * means delete only for dynamic. Flushing is what the DP
+ * does in response to interface events. unset is only done
+ * by the control plane.
+ */
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
+ {
+ e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+ }
+ else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+ {
+ arp_entry_free (eai, e);
+ }
+ }
+ return (0);
+}
+
+static int
+vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, args->sw_if_index);
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+ return (0);
+}
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a)
+{
+ vnet_main_t *vm = vnet_get_main ();
+ ASSERT (vlib_get_thread_index () == 0);
+
+ if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
+ vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
+ vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
+ vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB)
+ vnet_arp_wc_publish_internal (vm, a);
+ else
+ vnet_arp_set_ip4_over_ethernet_internal (vm, a);
+}
+
+/**
+ * @brief Invoked when the interface's admin state changes
+ */
+static clib_error_t *
+ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, am->ip4_entry_pool,
+ ({
+ if (e->sw_if_index == sw_if_index)
+ vec_add1 (to_delete,
+ e - am->ip4_entry_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+ }
+ else
+ {
+ vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+ }
+
+ }
+ vec_free (to_delete);
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
+
+static void
+increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
+{
+ u8 old;
+ int i;
+
+ for (i = 3; i >= 0; i--)
+ {
+ old = a->ip4.as_u8[i];
+ a->ip4.as_u8[i] += 1;
+ if (old < a->ip4.as_u8[i])
+ break;
+ }
+
+ for (i = 5; i >= 0; i--)
+ {
+ old = a->ethernet[i];
+ a->ethernet[i] += 1;
+ if (old < a->ethernet[i])
+ break;
+ }
+}
+
+int
+vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg,
+ int is_static, int is_no_fib_entry)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.is_static = is_static;
+ args.is_no_fib_entry = is_no_fib_entry;
+ args.flags = 0;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+int
+vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
+ ip4_address_t * hi_addr, u32 fib_index, int is_del)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_proxy_arp_t *pa;
+ u32 found_at_index = ~0;
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ if (pa->lo_addr == lo_addr->as_u32
+ && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
+ {
+ found_at_index = pa - am->proxy_arps;
+ break;
+ }
+ }
+
+ if (found_at_index != ~0)
+ {
+ /* Delete, otherwise it's already in the table */
+ if (is_del)
+ vec_delete (am->proxy_arps, 1, found_at_index);
+ return 0;
+ }
+ /* delete, no such entry */
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* add, not in table */
+ vec_add2 (am->proxy_arps, pa, 1);
+ pa->lo_addr = lo_addr->as_u32;
+ pa->hi_addr = hi_addr->as_u32;
+ pa->fib_index = fib_index;
+ return 0;
+}
+
+/*
+ * Remove any proxy arp entries asdociated with the
+ * specificed fib.
+ */
+int
+vnet_proxy_arp_fib_reset (u32 fib_id)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_proxy_arp_t *pa;
+ u32 *entries_to_delete = 0;
+ u32 fib_index;
+ int i;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ if (pa->fib_index == fib_index)
+ {
+ vec_add1 (entries_to_delete, pa - am->proxy_arps);
+ }
+ }
+
+ for (i = 0; i < vec_len (entries_to_delete); i++)
+ {
+ vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
+ }
+
+ vec_free (entries_to_delete);
+
+ return 0;
+}
+
+static clib_error_t *
+ip_arp_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+ ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
+ int addr_valid = 0;
+ int is_del = 0;
+ int count = 1;
+ u32 fib_index = 0;
+ u32 fib_id;
+ int is_static = 0;
+ int is_no_fib_entry = 0;
+ int is_proxy = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip4_address, &addr.ip4,
+ unformat_ethernet_address, &addr.ethernet))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+
+ else if (unformat (input, "static"))
+ is_static = 1;
+
+ else if (unformat (input, "no-fib-entry"))
+ is_no_fib_entry = 1;
+
+ else if (unformat (input, "count %d", &count))
+ ;
+
+ else if (unformat (input, "fib-id %d", &fib_id))
+ {
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
+
+ if (~0 == fib_index)
+ return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
+ }
+
+ else if (unformat (input, "proxy %U - %U",
+ unformat_ip4_address, &lo_addr.ip4,
+ unformat_ip4_address, &hi_addr.ip4))
+ is_proxy = 1;
+ else
+ break;
+ }
+
+ if (is_proxy)
+ {
+ (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
+ fib_index, is_del);
+ return 0;
+ }
+
+ if (addr_valid)
+ {
+ int i;
+
+ for (i = 0; i < count; i++)
+ {
+ if (is_del == 0)
+ {
+ uword event_type, *event_data = 0;
+
+ /* Park the debug CLI until the arp entry is installed */
+ vnet_register_ip4_arp_resolution_event
+ (vnm, &addr.ip4, vlib_current_process (vm),
+ 1 /* type */ , 0 /* data */ );
+
+ vnet_arp_set_ip4_over_ethernet
+ (vnm, sw_if_index, &addr, is_static, is_no_fib_entry);
+
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+ if (event_type != 1)
+ clib_warning ("event type %d unexpected", event_type);
+ }
+ else
+ vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
+
+ increment_ip4_and_mac_address (&addr);
+ }
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+/*?
+ * Add or delete IPv4 ARP cache entries.
+ *
+ * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
+ * 'count <number>', 'interface ip4_addr mac_addr') can be added in
+ * any order and combination.
+ *
+ * @cliexpar
+ * @parblock
+ * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
+ * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
+ * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
+ *
+ * To add or delete an IPv4 ARP cache entry to or from a specific fib
+ * table:
+ * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ *
+ * Add or delete IPv4 static ARP cache entries as follows:
+ * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ *
+ * For testing / debugging purposes, the 'set ip arp' command can add or
+ * delete multiple entries. Supply the 'count N' parameter:
+ * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @endparblock
+ ?*/
+VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
+ .path = "set ip arp",
+ .short_help =
+ "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .function = ip_arp_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_int_proxy_arp_command_fn (vlib_main_t * vm,
+ unformat_input_t *
+ input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+ vnet_sw_interface_t *si;
+ int enable = 0;
+ int intfc_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ intfc_set = 1;
+ else if (unformat (input, "enable") || unformat (input, "on"))
+ enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (intfc_set == 0)
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ ASSERT (si);
+ if (enable)
+ si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+ else
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+/*?
+ * Enable proxy-arp on an interface. The vpp stack will answer ARP
+ * requests for the indicated address range. Multiple proxy-arp
+ * ranges may be provisioned.
+ *
+ * @note Proxy ARP as a technology is infamous for blackholing traffic.
+ * Also, the underlying implementation has not been performance-tuned.
+ * Avoid creating an unnecessarily large set of ranges.
+ *
+ * @cliexpar
+ * To enable proxy arp on a range of addresses, use:
+ * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
+ * Append 'del' to delete a range of proxy ARP addresses:
+ * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
+ * You must then specifically enable proxy arp on individual interfaces:
+ * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
+ * To disable proxy arp on an individual interface:
+ * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
+ ?*/
+VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
+ .path = "set interface proxy-arp",
+ .short_help =
+ "set interface proxy-arp <intfc> [enable|disable]",
+ .function = set_int_proxy_arp_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
+ * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
+ */
+typedef enum
+{
+ ARP_TERM_NEXT_L2_OUTPUT,
+ ARP_TERM_NEXT_DROP,
+ ARP_TERM_N_NEXT,
+} arp_term_next_t;
+
+u32 arp_term_next_node_index[32];
+
+static uword
+arp_term_l2bd (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ l2input_main_t *l2im = &l2input_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 n_replies_sent = 0;
+ u16 last_bd_index = ~0;
+ l2_bridge_domain_t *last_bd_config = 0;
+ l2_input_config_t *cfg0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ethernet_header_t *eth0;
+ ethernet_arp_header_t *arp0;
+ ip6_header_t *iph0;
+ u8 *l3h0;
+ u32 pi0, error0, next0, sw_if_index0;
+ u16 ethertype0;
+ u16 bd_index0;
+ u32 ip0;
+ u8 *macp0;
+ u8 is_vrrp_reply0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ // Terminate only local (SHG == 0) ARP
+ if (vnet_buffer (p0)->l2.shg != 0)
+ goto next_l2_feature;
+
+ eth0 = vlib_buffer_get_current (p0);
+ l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
+ ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
+ arp0 = (ethernet_arp_header_t *) l3h0;
+
+ if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
+ (arp0->opcode !=
+ clib_host_to_net_u16
+ (ETHERNET_ARP_OPCODE_request))))
+ goto check_ip6_nd;
+
+ /* Must be ARP request packet here */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (p0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ u8 *t0 = vlib_add_trace (vm, node, p0,
+ sizeof (ethernet_arp_input_trace_t));
+ clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
+ }
+
+ error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 =
+ (arp0->l2_type !=
+ clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
+ ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+ error0 =
+ (arp0->l3_type !=
+ clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ if (error0)
+ goto drop;
+
+ is_vrrp_reply0 =
+ ((arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+ &&
+ (!memcmp
+ (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix,
+ sizeof (vrrp_prefix))));
+
+ /* Trash ARP packets whose ARP-level source addresses do not
+ match their L2-frame-level source addresses, unless it's
+ a reply from a VRRP virtual router */
+ if (PREDICT_FALSE
+ (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
+ sizeof (eth0->src_address)) && !is_vrrp_reply0))
+ {
+ error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ goto drop;
+ }
+
+ /* Check if anyone want ARP request events for L2 BDs */
+ {
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ if (am->wc_ip4_arp_publisher_node != (uword) ~ 0)
+ vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]);
+ }
+
+ /* lookup BD mac_by_ip4 hash table for MAC entry */
+ ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
+ bd_index0 = vnet_buffer (p0)->l2.bd_index;
+ if (PREDICT_FALSE ((bd_index0 != last_bd_index)
+ || (last_bd_index == (u16) ~ 0)))
+ {
+ last_bd_index = bd_index0;
+ last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
+ }
+ macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
+
+ if (PREDICT_FALSE (!macp0))
+ goto next_l2_feature; /* MAC not found */
+
+ /* MAC found, send ARP reply -
+ Convert ARP request packet to ARP reply */
+ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+ arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+ arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
+ clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, macp0, 6);
+ n_replies_sent += 1;
+
+ output_response:
+ /* For BVI, need to use l2-fwd node to send ARP reply as
+ l2-output node cannot output packet to BVI properly */
+ cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
+ if (PREDICT_FALSE (cfg0->bvi))
+ {
+ vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
+ goto next_l2_feature;
+ }
+
+ /* Send ARP/ND reply back out input interface through l2-output */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ next0 = ARP_TERM_NEXT_L2_OUTPUT;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ continue;
+
+ check_ip6_nd:
+ /* IP6 ND event notification or solicitation handling to generate
+ local response instead of flooding */
+ iph0 = (ip6_header_t *) l3h0;
+ if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
+ iph0->protocol == IP_PROTOCOL_ICMP6 &&
+ !ip6_address_is_unspecified
+ (&iph0->src_address)))
+ {
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ if (vnet_ip6_nd_term
+ (vm, node, p0, eth0, iph0, sw_if_index0,
+ vnet_buffer (p0)->l2.bd_index))
+ goto output_response;
+ }
+
+ next_l2_feature:
+ {
+ next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
+ L2INPUT_FEAT_ARP_TERM);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ continue;
+ }
+
+ drop:
+ if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
+ (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+ arp0->ip4_over_ethernet[1].ip4.as_u32))
+ {
+ error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ }
+ next0 = ARP_TERM_NEXT_DROP;
+ p0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
+ .function = arp_term_l2bd,
+ .name = "arp-term-l2bd",
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_ARP_N_ERROR,
+ .error_strings = ethernet_arp_error_strings,
+ .n_next_nodes = ARP_TERM_N_NEXT,
+ .next_nodes = {
+ [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
+ [ARP_TERM_NEXT_DROP] = "error-drop",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_arp_term_input_trace,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+arp_term_init (vlib_main_t * vm)
+{
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes (vm,
+ arp_term_l2bd_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ arp_term_next_node_index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (arp_term_init);
+
+void
+change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
+{
+ if (e->sw_if_index == sw_if_index)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+}
+
+void
+ethernet_arp_change_mac (u32 sw_if_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, am->ip4_entry_pool,
+ ({
+ change_arp_mac (sw_if_index, e);
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+ ip4_main_t *i4m = &ip4_main;
+ u32 sw_if_index = hi->sw_if_index;
+ ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
+
+ if (ip4_addr)
+ {
+ clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
+ format_ip4_address, ip4_addr, sw_if_index);
+
+ /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
+ where the interface IP/MAC pair is used for both source and request
+ MAC/IP pairs in the request */
+ u32 bi = 0;
+ ethernet_arp_header_t *h = vlib_packet_template_get_packet
+ (vm, &i4m->ip4_arp_request_packet_template, &bi);
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+ clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[1].ethernet));
+ h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
+ h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
+
+ /* Setup MAC header with ARP Etype and broadcast DMAC */
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+ ethernet_header_t *e = vlib_buffer_get_current (b);
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
+ clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+ memset (e->dst_address, 0xff, sizeof (e->dst_address));
+
+ /* Send GARP packet out the specified interface */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
new file mode 100644
index 00000000..661f33f9
--- /dev/null
+++ b/src/vnet/ethernet/arp_packet.h
@@ -0,0 +1,180 @@
+/*
+ * ethernet/arp.c: IP v4 ARP node
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ethernet_arp_packet_h
+#define included_ethernet_arp_packet_h
+
+#define foreach_ethernet_arp_hardware_type \
+ _ (0, reserved) \
+ _ (1, ethernet) \
+ _ (2, experimental_ethernet) \
+ _ (3, ax_25) \
+ _ (4, proteon_pronet_token_ring) \
+ _ (5, chaos) \
+ _ (6, ieee_802) \
+ _ (7, arcnet) \
+ _ (8, hyperchannel) \
+ _ (9, lanstar) \
+ _ (10, autonet) \
+ _ (11, localtalk) \
+ _ (12, localnet) \
+ _ (13, ultra_link) \
+ _ (14, smds) \
+ _ (15, frame_relay) \
+ _ (16, atm) \
+ _ (17, hdlc) \
+ _ (18, fibre_channel) \
+ _ (19, atm19) \
+ _ (20, serial_line) \
+ _ (21, atm21) \
+ _ (22, mil_std_188_220) \
+ _ (23, metricom) \
+ _ (24, ieee_1394) \
+ _ (25, mapos) \
+ _ (26, twinaxial) \
+ _ (27, eui_64) \
+ _ (28, hiparp) \
+ _ (29, iso_7816_3) \
+ _ (30, arpsec) \
+ _ (31, ipsec_tunnel) \
+ _ (32, infiniband) \
+ _ (33, cai) \
+ _ (34, wiegand) \
+ _ (35, pure_ip) \
+ _ (36, hw_exp1) \
+ _ (256, hw_exp2)
+
+#define foreach_ethernet_arp_opcode \
+ _ (reserved) \
+ _ (request) \
+ _ (reply) \
+ _ (reverse_request) \
+ _ (reverse_reply) \
+ _ (drarp_request) \
+ _ (drarp_reply) \
+ _ (drarp_error) \
+ _ (inarp_request) \
+ _ (inarp_reply) \
+ _ (arp_nak) \
+ _ (mars_request) \
+ _ (mars_multi) \
+ _ (mars_mserv) \
+ _ (mars_join) \
+ _ (mars_leave) \
+ _ (mars_nak) \
+ _ (mars_unserv) \
+ _ (mars_sjoin) \
+ _ (mars_sleave) \
+ _ (mars_grouplist_request) \
+ _ (mars_grouplist_reply) \
+ _ (mars_redirect_map) \
+ _ (mapos_unarp) \
+ _ (exp1) \
+ _ (exp2)
+
+typedef enum
+{
+#define _(n,f) ETHERNET_ARP_HARDWARE_TYPE_##f = (n),
+ foreach_ethernet_arp_hardware_type
+#undef _
+} ethernet_arp_hardware_type_t;
+
+typedef enum
+{
+#define _(f) ETHERNET_ARP_OPCODE_##f,
+ foreach_ethernet_arp_opcode
+#undef _
+ ETHERNET_ARP_N_OPCODE,
+} ethernet_arp_opcode_t;
+
+typedef enum
+{
+ IP4_ARP_NEXT_DROP,
+ IP4_ARP_N_NEXT,
+} ip4_arp_next_t;
+
+typedef enum
+{
+ IP4_ARP_ERROR_DROP,
+ IP4_ARP_ERROR_REQUEST_SENT,
+ IP4_ARP_ERROR_NON_ARP_ADJ,
+ IP4_ARP_ERROR_REPLICATE_DROP,
+ IP4_ARP_ERROR_REPLICATE_FAIL,
+ IP4_ARP_ERROR_NO_SOURCE_ADDRESS,
+} ip4_arp_error_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 ethernet[6];
+ ip4_address_t ip4;
+}) ethernet_arp_ip4_over_ethernet_address_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u16 l2_type;
+ u16 l3_type;
+ u8 n_l2_address_bytes;
+ u8 n_l3_address_bytes;
+ u16 opcode;
+ union
+ {
+ ethernet_arp_ip4_over_ethernet_address_t ip4_over_ethernet[2];
+
+ /* Others... */
+ u8 data[0];
+ };
+} ethernet_arp_header_t;
+
+typedef enum ethernet_arp_entry_flags_t_
+{
+ ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC = (1 << 0),
+ ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC = (1 << 1),
+ ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY = (1 << 2),
+} __attribute__ ((packed)) ethernet_arp_entry_flags_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ ip4_address_t ip4_address;
+
+ u8 ethernet_address[6];
+
+ ethernet_arp_entry_flags_t flags;
+
+ u64 cpu_time_last_updated;
+
+ /**
+ * The index of the adj-fib entry created
+ */
+ fib_node_index_t fib_entry_index;
+} ethernet_arp_ip4_entry_t;
+
+ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
+u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
+
+void send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi);
+
+#endif /* included_ethernet_arp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/dir.dox b/src/vnet/ethernet/dir.dox
new file mode 100644
index 00000000..a55a73c0
--- /dev/null
+++ b/src/vnet/ethernet/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief Ethernet ARP and Loopback Code.
+
+This directory contains the source code for ARP and Loopback Interfaces.
+
+*/
+/*? %%clicmd:group_label ARP and Loopback CLI %% ?*/
diff --git a/src/vnet/ethernet/error.def b/src/vnet/ethernet/error.def
new file mode 100644
index 00000000..36679c0c
--- /dev/null
+++ b/src/vnet/ethernet/error.def
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_error.def: ethernet errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ethernet_error (NONE, PUNT, "no error")
+ethernet_error (BAD_LLC_LENGTH, DROP, "llc length > packet length")
+ethernet_error (UNKNOWN_TYPE, PUNT, "unknown ethernet type")
+ethernet_error (UNKNOWN_VLAN, DROP, "unknown vlan")
+ethernet_error (L3_MAC_MISMATCH, DROP, "l3 mac mismatch")
+ethernet_error (DOWN, DROP, "subinterface down")
+
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
new file mode 100644
index 00000000..a6846b13
--- /dev/null
+++ b/src/vnet/ethernet/ethernet.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet.h: types/functions for ethernet.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ethernet_h
+#define included_ethernet_h
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/feature/feature.h>
+
+always_inline u64
+ethernet_mac_address_u64 (u8 * a)
+{
+ return (((u64) a[0] << (u64) (5 * 8))
+ | ((u64) a[1] << (u64) (4 * 8))
+ | ((u64) a[2] << (u64) (3 * 8))
+ | ((u64) a[3] << (u64) (2 * 8))
+ | ((u64) a[4] << (u64) (1 * 8)) | ((u64) a[5] << (u64) (0 * 8)));
+}
+
+static inline int
+ethernet_mac_address_is_multicast_u64 (u64 a)
+{
+ return (a & (1ULL << (5 * 8))) != 0;
+}
+
+static_always_inline int
+ethernet_frame_is_tagged (u16 type)
+{
+#if __SSE4_2__
+ const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200,
+ /* duplicate last one to
+ fill register */
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200);
+
+ __m128i r = _mm_set1_epi16 (type);
+ r = _mm_cmpeq_epi16 (ethertype_mask, r);
+ return !_mm_test_all_zeros (r, r);
+#else
+ if ((type == ETHERNET_TYPE_VLAN) ||
+ (type == ETHERNET_TYPE_DOT1AD) ||
+ (type == ETHERNET_TYPE_VLAN_9100) || (type == ETHERNET_TYPE_VLAN_9200))
+ return 1;
+#endif
+ return 0;
+}
+
+/* Max. sized ethernet/vlan header for parsing. */
+typedef struct
+{
+ ethernet_header_t ethernet;
+
+ /* Allow up to 2 stacked vlan headers. */
+ ethernet_vlan_header_t vlan[2];
+} ethernet_max_header_t;
+
+struct vnet_hw_interface_t;
+/* Ethernet flag change callback. */
+typedef u32 (ethernet_flag_change_function_t)
+ (vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags);
+
+#define ETHERNET_MIN_PACKET_BYTES 64
+#define ETHERNET_MAX_PACKET_BYTES 9216
+
+/* Ethernet interface instance. */
+typedef struct ethernet_interface
+{
+
+ /* Accept all packets (promiscuous mode). */
+#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL (1 << 0)
+#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags) \
+ (((flags) & ~ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) == 0)
+
+ /* Change MTU on interface from hw interface structure */
+#define ETHERNET_INTERFACE_FLAG_MTU (1 << 1)
+#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags) \
+ ((flags) & ETHERNET_INTERFACE_FLAG_MTU)
+
+ /* Callback, e.g. to turn on/off promiscuous mode */
+ ethernet_flag_change_function_t *flag_change;
+
+ u32 driver_instance;
+
+ /* Ethernet (MAC) address for this interface. */
+ u8 address[6];
+} ethernet_interface_t;
+
+extern vnet_hw_interface_class_t ethernet_hw_interface_class;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* Ethernet type in host byte order. */
+ ethernet_type_t type;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} ethernet_type_info_t;
+
+typedef enum
+{
+#define ethernet_error(n,c,s) ETHERNET_ERROR_##n,
+#include <vnet/ethernet/error.def>
+#undef ethernet_error
+ ETHERNET_N_ERROR,
+} ethernet_error_t;
+
+
+// Structs used when parsing packet to find sw_if_index
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 flags;
+ // config entry is-valid flag
+ // exact match flags (valid if packet has 0/1/2/3 tags)
+ // L2 vs L3 forwarding mode
+#define SUBINT_CONFIG_MATCH_0_TAG (1<<0)
+#define SUBINT_CONFIG_MATCH_1_TAG (1<<1)
+#define SUBINT_CONFIG_MATCH_2_TAG (1<<2)
+#define SUBINT_CONFIG_MATCH_3_TAG (1<<3)
+#define SUBINT_CONFIG_VALID (1<<4)
+#define SUBINT_CONFIG_L2 (1<<5)
+#define SUBINT_CONFIG_P2P (1<<6)
+
+} subint_config_t;
+
+always_inline u32
+eth_create_valid_subint_match_flags (u32 num_tags)
+{
+ return SUBINT_CONFIG_VALID | (1 << num_tags);
+}
+
+
+typedef struct
+{
+ subint_config_t untagged_subint;
+ subint_config_t default_subint;
+ u16 dot1q_vlans; // pool id for vlan table
+ u16 dot1ad_vlans; // pool id for vlan table
+} main_intf_t;
+
+typedef struct
+{
+ subint_config_t single_tag_subint;
+ subint_config_t inner_any_subint;
+ u32 qinqs; // pool id for qinq table
+} vlan_intf_t;
+
+typedef struct
+{
+ vlan_intf_t vlans[ETHERNET_N_VLAN];
+} vlan_table_t;
+
+typedef struct
+{
+ subint_config_t subint;
+} qinq_intf_t;
+
+typedef struct
+{
+ qinq_intf_t vlans[ETHERNET_N_VLAN];
+} qinq_table_t;
+
+// Structure mapping to a next index based on ethertype.
+// Common ethertypes are stored explicitly, others are
+// stored in a sparse table.
+typedef struct
+{
+ /* Sparse vector mapping ethernet type in network byte order
+ to next index. */
+ u16 *input_next_by_type;
+ u32 *sparse_index_by_input_next_index;
+
+ /* cached next indexes for common ethertypes */
+ u32 input_next_ip4;
+ u32 input_next_ip6;
+ u32 input_next_mpls;
+} next_by_ethertype_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* next node index for L2 interfaces */
+ u32 l2_next;
+
+ /* flag and next node index for L3 redirect */
+ u32 redirect_l3;
+ u32 redirect_l3_next;
+
+ /* Pool of ethernet interface instances. */
+ ethernet_interface_t *interfaces;
+
+ ethernet_type_info_t *type_infos;
+
+ /* Hash tables mapping name/type to type info index. */
+ uword *type_info_by_name, *type_info_by_type;
+
+ // The root of the vlan parsing tables. A vector with one element
+ // for each main interface, indexed by hw_if_index.
+ main_intf_t *main_intfs;
+
+ // Pool of vlan tables
+ vlan_table_t *vlan_pool;
+
+ // Pool of qinq tables;
+ qinq_table_t *qinq_pool;
+
+ /* Set to one to use AB.CD.EF instead of A:B:C:D:E:F as ethernet format. */
+ int format_ethernet_address_16bit;
+
+ /* debug: make sure we don't wipe out an ethernet registration by mistake */
+ u8 next_by_ethertype_register_called;
+
+ /* Feature arc index */
+ u8 output_feature_arc_index;
+
+ /* Allocated loopback instances */
+ uword *bm_loopback_instances;
+} ethernet_main_t;
+
+ethernet_main_t ethernet_main;
+
+always_inline ethernet_type_info_t *
+ethernet_get_type_info (ethernet_main_t * em, ethernet_type_t type)
+{
+ uword *p = hash_get (em->type_info_by_type, type);
+ return p ? vec_elt_at_index (em->type_infos, p[0]) : 0;
+}
+
+ethernet_interface_t *ethernet_get_interface (ethernet_main_t * em,
+ u32 hw_if_index);
+
+clib_error_t *ethernet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u8 * address,
+ u32 * hw_if_index_return,
+ ethernet_flag_change_function_t
+ flag_change);
+
+void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index);
+
+/* Register given node index to take input for given ethernet type. */
+void
+ethernet_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+
+/* Register given node index to take input for packet from L2 interfaces. */
+void ethernet_register_l2_input (vlib_main_t * vm, u32 node_index);
+
+/* Register given node index to take redirected L3 traffic, and enable L3 redirect */
+void ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index);
+
+/* Formats ethernet address X:X:X:X:X:X */
+u8 *format_ethernet_address (u8 * s, va_list * args);
+u8 *format_ethernet_type (u8 * s, va_list * args);
+u8 *format_ethernet_vlan_tci (u8 * s, va_list * va);
+u8 *format_ethernet_header (u8 * s, va_list * args);
+u8 *format_ethernet_header_with_length (u8 * s, va_list * args);
+
+/* Parse ethernet address in either X:X:X:X:X:X unix or X.X.X cisco format. */
+uword unformat_ethernet_address (unformat_input_t * input, va_list * args);
+
+/* Parse ethernet type as 0xXXXX or type name from ethernet/types.def.
+ In either host or network byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args);
+uword
+unformat_ethernet_type_net_byte_order (unformat_input_t * input,
+ va_list * args);
+
+/* Parse ethernet header. */
+uword unformat_ethernet_header (unformat_input_t * input, va_list * args);
+
+/* Parse ethernet interface name; return hw_if_index. */
+uword unformat_ethernet_interface (unformat_input_t * input, va_list * args);
+
+uword unformat_pg_ethernet_header (unformat_input_t * input, va_list * args);
+
+always_inline void
+ethernet_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_ethernet_header_with_length;
+ n->unformat_buffer = unformat_ethernet_header;
+ pn->unformat_edit = unformat_pg_ethernet_header;
+}
+
+always_inline ethernet_header_t *
+ethernet_buffer_get_header (vlib_buffer_t * b)
+{
+ return (void *) (b->data + vnet_buffer (b)->l2_hdr_offset);
+}
+
+/** Returns the number of VLAN headers in the current Ethernet frame in the
+ * buffer. Returns 0, 1, 2 for the known header count. The value 3 indicates
+ * the number of headers is not known.
+ */
+#define ethernet_buffer_get_vlan_count(b) ( \
+ ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) >> VNET_BUFFER_F_LOG2_VLAN_1_DEEP \
+)
+
+/** Sets the number of VLAN headers in the current Ethernet frame in the
+ * buffer. Values 0, 1, 2 indicate the header count. The value 3 indicates
+ * the number of headers is not known.
+ */
+#define ethernet_buffer_set_vlan_count(b, v) ( \
+ (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | \
+ (((v) << VNET_BUFFER_F_LOG2_VLAN_1_DEEP) & VNET_BUFFER_FLAGS_VLAN_BITS) \
+)
+
+/** Adjusts the vlan count by the delta in 'v' */
+#define ethernet_buffer_adjust_vlan_count(b, v) ( \
+ ethernet_buffer_set_vlan_count(b, \
+ (word)ethernet_buffer_get_vlan_count(b) + (word)(v)) \
+)
+
+/** Adjusts the vlan count by the header size byte delta in 'v' */
+#define ethernet_buffer_adjust_vlan_count_by_bytes(b, v) ( \
+ (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | (( \
+ ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) + \
+ ((v) << (VNET_BUFFER_F_LOG2_VLAN_1_DEEP - 2)) \
+ ) & VNET_BUFFER_FLAGS_VLAN_BITS) \
+)
+
+/**
+ * Determine the size of the Ethernet headers of the current frame in
+ * the buffer. This uses the VLAN depth flags that are set by
+ * ethernet-input. Because these flags are stored in the vlib_buffer_t
+ * "flags" field this count is valid regardless of the node so long as it's
+ * checked downstream of ethernet-input; That is, the value is not stored in
+ * the opaque space.
+ */
+#define ethernet_buffer_header_size(b) ( \
+ ethernet_buffer_get_vlan_count((b)) * sizeof(ethernet_vlan_header_t) + \
+ sizeof(ethernet_header_t) \
+)
+
+ethernet_main_t *ethernet_get_main (vlib_main_t * vm);
+u32 ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags);
+void ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index,
+ u32 l2);
+void ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
+ u32 sw_if_index, u32 l2);
+void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi,
+ u32 enable);
+
+int
+vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg,
+ int is_static, int is_no_fib_entry);
+
+int
+vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg);
+
+int vnet_proxy_arp_fib_reset (u32 fib_id);
+
+clib_error_t *next_by_ethertype_init (next_by_ethertype_t * l3_next);
+clib_error_t *next_by_ethertype_register (next_by_ethertype_t * l3_next,
+ u32 ethertype, u32 next_index);
+
+int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
+ u8 is_specified, u32 user_instance);
+int vnet_delete_loopback_interface (u32 sw_if_index);
+int vnet_delete_sub_interface (u32 sw_if_index);
+
+// Perform ethernet subinterface classification table lookups given
+// the ports's sw_if_index and fields extracted from the ethernet header.
+// The resulting tables are used by identify_subint().
+always_inline void
+eth_vlan_table_lookups (ethernet_main_t * em,
+ vnet_main_t * vnm,
+ u32 port_sw_if_index0,
+ u16 first_ethertype,
+ u16 outer_id,
+ u16 inner_id,
+ vnet_hw_interface_t ** hi,
+ main_intf_t ** main_intf,
+ vlan_intf_t ** vlan_intf, qinq_intf_t ** qinq_intf)
+{
+ vlan_table_t *vlan_table;
+ qinq_table_t *qinq_table;
+ u32 vlan_table_id;
+
+ // Read the main, vlan, and qinq interface table entries
+ // TODO: Consider if/how to prefetch tables. Also consider
+ // single-entry cache to skip table lookups and identify_subint()
+ // processing.
+ *hi = vnet_get_sup_hw_interface (vnm, port_sw_if_index0);
+ *main_intf = vec_elt_at_index (em->main_intfs, (*hi)->hw_if_index);
+
+ // Always read the vlan and qinq tables, even if there are not that
+ // many tags on the packet. This makes the lookups and comparisons
+ // easier (and less branchy).
+ vlan_table_id = (first_ethertype == ETHERNET_TYPE_DOT1AD) ?
+ (*main_intf)->dot1ad_vlans : (*main_intf)->dot1q_vlans;
+ vlan_table = vec_elt_at_index (em->vlan_pool, vlan_table_id);
+ *vlan_intf = &vlan_table->vlans[outer_id];
+
+ qinq_table = vec_elt_at_index (em->qinq_pool, (*vlan_intf)->qinqs);
+ *qinq_intf = &qinq_table->vlans[inner_id];
+}
+
+
+// Determine the subinterface for this packet, given the result of the
+// vlan table lookups and vlan header parsing. Check the most specific
+// matches first.
+// Returns 1 if a matching subinterface was found, otherwise returns 0.
+always_inline u32
+eth_identify_subint (vnet_hw_interface_t * hi,
+ vlib_buffer_t * b0,
+ u32 match_flags,
+ main_intf_t * main_intf,
+ vlan_intf_t * vlan_intf,
+ qinq_intf_t * qinq_intf,
+ u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
+{
+ subint_config_t *subint;
+
+ // Each comparison is checking both the valid flag and the number of tags
+ // (incorporating exact-match/non-exact-match).
+
+ // check for specific double tag
+ subint = &qinq_intf->subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for specific outer and 'any' inner
+ subint = &vlan_intf->inner_any_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for specific single tag
+ subint = &vlan_intf->single_tag_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for untagged interface
+ subint = &main_intf->untagged_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for default interface
+ subint = &main_intf->default_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // No matching subinterface
+ *new_sw_if_index = ~0;
+ *error0 = ETHERNET_ERROR_UNKNOWN_VLAN;
+ *is_l2 = 0;
+ return 0;
+
+matched:
+ *new_sw_if_index = subint->sw_if_index;
+ *is_l2 = subint->flags & SUBINT_CONFIG_L2;
+ return 1;
+}
+
+// Compare two ethernet macs. Return 1 if they are the same, 0 if different
+always_inline u32
+eth_mac_equal (u8 * mac1, u8 * mac2)
+{
+ return (*((u32 *) (mac1 + 0)) == *((u32 *) (mac2 + 0)) &&
+ *((u32 *) (mac1 + 2)) == *((u32 *) (mac2 + 2)));
+}
+
+
+always_inline ethernet_main_t *
+vnet_get_ethernet_main (void)
+{
+ return &ethernet_main;
+}
+
+void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data);
+
+
+int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data, int is_add);
+
+void wc_arp_set_publisher_node (uword inode_index, uword event_type);
+
+void ethernet_arp_change_mac (u32 sw_if_index);
+void ethernet_ndp_change_mac (u32 sw_if_index);
+
+void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+
+void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+u8 *ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address);
+const u8 *ethernet_ip4_mcast_dst_addr (void);
+const u8 *ethernet_ip6_mcast_dst_addr (void);
+
+extern vlib_node_registration_t ethernet_input_node;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 ip4;
+ u8 mac[6];
+} wc_arp_report_t;
+
+#endif /* included_ethernet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/format.c b/src/vnet/ethernet/format.c
new file mode 100644
index 00000000..5b589998
--- /dev/null
+++ b/src/vnet/ethernet/format.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_format.c: ethernet formatting/parsing.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_ethernet_address (u8 * s, va_list * args)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u8 *a = va_arg (*args, u8 *);
+
+ if (em->format_ethernet_address_16bit)
+ return format (s, "%02x%02x.%02x%02x.%02x%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+ else
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+u8 *
+format_ethernet_type (u8 * s, va_list * args)
+{
+ ethernet_type_t type = va_arg (*args, u32);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *t = ethernet_get_type_info (em, type);
+
+ if (t)
+ s = format (s, "%s", t->name);
+ else
+ s = format (s, "0x%04x", type);
+
+ return s;
+}
+
+u8 *
+format_ethernet_vlan_tci (u8 * s, va_list * va)
+{
+ u32 vlan_tci = va_arg (*va, u32);
+
+ u32 vid = (vlan_tci & 0xfff);
+ u32 cfi = (vlan_tci >> 12) & 1;
+ u32 pri = (vlan_tci >> 13);
+
+ s = format (s, "%d", vid);
+ if (pri != 0)
+ s = format (s, " priority %d", pri);
+ if (cfi != 0)
+ s = format (s, " cfi");
+
+ return s;
+}
+
+u8 *
+format_ethernet_header_with_length (u8 * s, va_list * args)
+{
+ ethernet_pbb_header_packed_t *ph =
+ va_arg (*args, ethernet_pbb_header_packed_t *);
+ ethernet_max_header_t *m = (ethernet_max_header_t *) ph;
+ u32 max_header_bytes = va_arg (*args, u32);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_header_t *e = &m->ethernet;
+ ethernet_vlan_header_t *v;
+ ethernet_type_t type = clib_net_to_host_u16 (e->type);
+ ethernet_type_t vlan_type[ARRAY_LEN (m->vlan)];
+ u32 n_vlan = 0, i, header_bytes;
+ uword indent;
+
+ while ((type == ETHERNET_TYPE_VLAN || type == ETHERNET_TYPE_DOT1AD
+ || type == ETHERNET_TYPE_DOT1AH) && n_vlan < ARRAY_LEN (m->vlan))
+ {
+ vlan_type[n_vlan] = type;
+ if (type != ETHERNET_TYPE_DOT1AH)
+ {
+ v = m->vlan + n_vlan;
+ type = clib_net_to_host_u16 (v->type);
+ }
+ n_vlan++;
+ }
+
+ header_bytes = sizeof (e[0]) + n_vlan * sizeof (v[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "ethernet header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%U: %U -> %U",
+ format_ethernet_type, type,
+ format_ethernet_address, e->src_address,
+ format_ethernet_address, e->dst_address);
+
+ if (type != ETHERNET_TYPE_DOT1AH)
+ {
+ for (i = 0; i < n_vlan; i++)
+ {
+ u32 v = clib_net_to_host_u16 (m->vlan[i].priority_cfi_and_id);
+ if (*vlan_type == ETHERNET_TYPE_VLAN)
+ s = format (s, " 802.1q vlan %U", format_ethernet_vlan_tci, v);
+ else
+ s = format (s, " 802.1ad vlan %U", format_ethernet_vlan_tci, v);
+ }
+
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ethernet_type_info_t *ti;
+ vlib_node_t *node = 0;
+
+ ti = ethernet_get_type_info (em, type);
+ if (ti && ti->node_index != ~0)
+ node = vlib_get_node (em->vlib_main, ti->node_index);
+ if (node && node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) m + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+ }
+ else
+ {
+ s =
+ format (s, " %s b-tag %04X",
+ (clib_net_to_host_u16 (ph->b_type) ==
+ ETHERNET_TYPE_DOT1AD) ? "802.1ad" : "",
+ clib_net_to_host_u16 (ph->priority_dei_id));
+ s =
+ format (s, " %s i-tag %08X",
+ (clib_net_to_host_u16 (ph->i_type) ==
+ ETHERNET_TYPE_DOT1AH) ? "802.1ah" : "",
+ clib_net_to_host_u32 (ph->priority_dei_uca_res_sid));
+ }
+
+ return s;
+}
+
+u8 *
+format_ethernet_header (u8 * s, va_list * args)
+{
+ ethernet_max_header_t *m = va_arg (*args, ethernet_max_header_t *);
+ return format (s, "%U", format_ethernet_header_with_length, m, 0);
+}
+
+/* Parse X:X:X:X:X:X unix style ethernet address. */
+static uword
+unformat_ethernet_address_unix (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[6];
+
+ if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ if (a[i] >= (1 << 8))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ result[i] = a[i];
+
+ return 1;
+}
+
+/* Parse X.X.X cisco style ethernet address. */
+static uword
+unformat_ethernet_address_cisco (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[3];
+
+ if (!unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ if (a[i] >= (1 << 16))
+ return 0;
+
+ result[0] = (a[0] >> 8) & 0xff;
+ result[1] = (a[0] >> 0) & 0xff;
+ result[2] = (a[1] >> 8) & 0xff;
+ result[3] = (a[1] >> 0) & 0xff;
+ result[4] = (a[2] >> 8) & 0xff;
+ result[5] = (a[2] >> 0) & 0xff;
+
+ return 1;
+}
+
+/* Parse ethernet address; accept either unix or style addresses. */
+uword
+unformat_ethernet_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ return (unformat_user (input, unformat_ethernet_address_unix, result)
+ || unformat_user (input, unformat_ethernet_address_cisco, result));
+}
+
+/* Returns ethernet type as an int in host byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ethernet_main_t *em = &ethernet_main;
+ int type, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &type) || unformat (input, "%d", &type))
+ {
+ if (type >= (1 << 16))
+ return 0;
+ *result = type;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ em->type_info_by_name, &i))
+ {
+ ethernet_type_info_t *ti = vec_elt_at_index (em->type_infos, i);
+ *result = ti->type;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_ethernet_type_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ if (!unformat_user (input, unformat_ethernet_type_host_byte_order, result))
+ return 0;
+
+ *result = clib_host_to_net_u16 ((u16) * result);
+ return 1;
+}
+
+uword
+unformat_ethernet_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ethernet_max_header_t _m, *m = &_m;
+ ethernet_header_t *e = &m->ethernet;
+ u16 type;
+ u32 n_vlan;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ethernet_type_host_byte_order, &type,
+ unformat_ethernet_address, &e->src_address,
+ unformat_ethernet_address, &e->dst_address))
+ return 0;
+
+ n_vlan = 0;
+ while (unformat (input, "vlan"))
+ {
+ u32 id, priority;
+
+ if (!unformat_user (input, unformat_vlib_number, &id)
+ || id >= ETHERNET_N_VLAN)
+ return 0;
+
+ if (unformat (input, "priority %d", &priority))
+ {
+ if (priority >= 8)
+ return 0;
+ id |= priority << 13;
+ }
+
+ if (unformat (input, "cfi"))
+ id |= 1 << 12;
+
+ /* Too many vlans given. */
+ if (n_vlan >= ARRAY_LEN (m->vlan))
+ return 0;
+
+ m->vlan[n_vlan].priority_cfi_and_id = clib_host_to_net_u16 (id);
+ n_vlan++;
+ }
+
+ if (n_vlan == 0)
+ e->type = clib_host_to_net_u16 (type);
+ else
+ {
+ int i;
+
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ for (i = 0; i < n_vlan - 1; i++)
+ m->vlan[i].type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ m->vlan[n_vlan - 1].type = clib_host_to_net_u16 (type);
+ }
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (e[0]) + n_vlan * sizeof (m->vlan[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, m, n_bytes);
+ }
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c
new file mode 100644
index 00000000..2d20adc9
--- /dev/null
+++ b/src/vnet/ethernet/init.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_init.c: ethernet initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h> // for feature registration
+
+/* Global main structure. */
+ethernet_main_t ethernet_main;
+
+static void
+add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name)
+{
+ ethernet_type_info_t *ti;
+ u32 i;
+
+ vec_add2 (em->type_infos, ti, 1);
+ i = ti - em->type_infos;
+
+ ti->name = type_name;
+ ti->type = type;
+ ti->next_index = ti->node_index = ~0;
+
+ hash_set (em->type_info_by_type, type, i);
+ hash_set_mem (em->type_info_by_name, ti->name, i);
+}
+
+/* Built-in ip4 tx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ethernet_output, static) =
+{
+ .arc_name = "ethernet-output",
+ .start_nodes = VNET_FEATURES ("adj-l2-midchain"),
+ .arc_index_ptr = &ethernet_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ethernet_tx_drop, static) =
+{
+ .arc_name = "ethernet-output",
+ .node_name = "error-drop",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ethernet_init (vlib_main_t * vm)
+{
+ ethernet_main_t *em = &ethernet_main;
+ clib_error_t *error;
+
+ /*
+ * Set up the L2 path now, or we'll wipe out the L2 ARP
+ * registration set up by ethernet_arp_init.
+ */
+ if ((error = vlib_call_init_function (vm, l2_init)))
+ return error;
+
+ em->vlib_main = vm;
+
+ em->type_info_by_name = hash_create_string (0, sizeof (uword));
+ em->type_info_by_type = hash_create (0, sizeof (uword));
+
+#define ethernet_type(n,s) add_type (em, ETHERNET_TYPE_##s, #s);
+#include "types.def"
+#undef ethernet_type
+
+ if ((error = vlib_call_init_function (vm, llc_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ethernet_input_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_init);
+
+ethernet_main_t *
+ethernet_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, ethernet_init);
+ return &ethernet_main;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
new file mode 100644
index 00000000..3e78a49d
--- /dev/null
+++ b/src/vnet/ethernet/interface.c
@@ -0,0 +1,880 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_interface.c: ethernet interfaces
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/adj/adj.h>
+
+/**
+ * @file
+ * @brief Loopback Interfaces.
+ *
+ * This file contains code to manage loopback interfaces.
+ */
+
+const u8 *
+ethernet_ip4_mcast_dst_addr (void)
+{
+ const static u8 ethernet_mcast_dst_mac[] = {
+ 0x1, 0x0, 0x5e, 0x0, 0x0, 0x0,
+ };
+
+ return (ethernet_mcast_dst_mac);
+}
+
+const u8 *
+ethernet_ip6_mcast_dst_addr (void)
+{
+ const static u8 ethernet_mcast_dst_mac[] = {
+ 0x33, 0x33, 0x00, 0x0, 0x0, 0x0,
+ };
+
+ return (ethernet_mcast_dst_mac);
+}
+
+/**
+ * @brief build a rewrite string to use for sending packets of type 'link_type'
+ * to 'dst_address'
+ */
+u8 *
+ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ ethernet_header_t *h;
+ ethernet_type_t type;
+ uword n_bytes = sizeof (h[0]);
+ u8 *rewrite = NULL;
+ u8 is_p2p = 0;
+
+ if (sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P)
+ is_p2p = 1;
+ if (sub_sw != sup_sw)
+ {
+ if (sub_sw->sub.eth.flags.one_tag)
+ {
+ n_bytes += sizeof (ethernet_vlan_header_t);
+ }
+ else if (sub_sw->sub.eth.flags.two_tags)
+ {
+ n_bytes += 2 * (sizeof (ethernet_vlan_header_t));
+ }
+ else if (PREDICT_FALSE (is_p2p))
+ {
+ n_bytes = sizeof (ethernet_header_t);
+ }
+ if (PREDICT_FALSE (!is_p2p))
+ {
+ // Check for encaps that are not supported for L3 interfaces
+ if (!(sub_sw->sub.eth.flags.exact_match) ||
+ (sub_sw->sub.eth.flags.default_sub) ||
+ (sub_sw->sub.eth.flags.outer_vlan_id_any) ||
+ (sub_sw->sub.eth.flags.inner_vlan_id_any))
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ n_bytes = sizeof (ethernet_header_t);
+ }
+ }
+
+ switch (link_type)
+ {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
+ _(IP4, IP4);
+ _(IP6, IP6);
+ _(MPLS, MPLS);
+ _(ARP, ARP);
+#undef _
+ default:
+ return NULL;
+ }
+
+ vec_validate (rewrite, n_bytes - 1);
+ h = (ethernet_header_t *) rewrite;
+ ei = pool_elt_at_index (em->interfaces, hw->hw_instance);
+ clib_memcpy (h->src_address, ei->address, sizeof (h->src_address));
+ if (is_p2p)
+ {
+ clib_memcpy (h->dst_address, sub_sw->p2p.client_mac,
+ sizeof (h->dst_address));
+ }
+ else
+ {
+ if (dst_address)
+ clib_memcpy (h->dst_address, dst_address, sizeof (h->dst_address));
+ else
+ memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */
+ }
+
+ if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.one_tag)
+ {
+ ethernet_vlan_header_t *outer = (void *) (h + 1);
+
+ h->type = sub_sw->sub.eth.flags.dot1ad ?
+ clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ outer->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id);
+ outer->type = clib_host_to_net_u16 (type);
+
+ }
+ else if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.two_tags)
+ {
+ ethernet_vlan_header_t *outer = (void *) (h + 1);
+ ethernet_vlan_header_t *inner = (void *) (outer + 1);
+
+ h->type = sub_sw->sub.eth.flags.dot1ad ?
+ clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ outer->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id);
+ outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ inner->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.inner_vlan_id);
+ inner->type = clib_host_to_net_u16 (type);
+
+ }
+ else
+ {
+ h->type = clib_host_to_net_u16 (type);
+ }
+
+ return (rewrite);
+}
+
+void
+ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ default_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto)
+ {
+ arp_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto)
+ {
+ ip6_ethernet_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else
+ {
+ ASSERT (0);
+ }
+}
+
+static clib_error_t *
+ethernet_mac_change (vnet_hw_interface_t * hi, char *mac_address)
+{
+ ethernet_interface_t *ei;
+ ethernet_main_t *em;
+
+ em = &ethernet_main;
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+
+ vec_validate (hi->hw_address,
+ STRUCT_SIZE_OF (ethernet_header_t, src_address) - 1);
+ clib_memcpy (hi->hw_address, mac_address, vec_len (hi->hw_address));
+
+ clib_memcpy (ei->address, (u8 *) mac_address, sizeof (ei->address));
+ ethernet_arp_change_mac (hi->sw_if_index);
+ ethernet_ndp_change_mac (hi->sw_if_index);
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
+ .name = "Ethernet",
+ .format_address = format_ethernet_address,
+ .format_header = format_ethernet_header_with_length,
+ .unformat_hw_address = unformat_ethernet_address,
+ .unformat_header = unformat_ethernet_header,
+ .build_rewrite = ethernet_build_rewrite,
+ .update_adjacency = ethernet_update_adjacency,
+ .mac_addr_change_function = ethernet_mac_change,
+};
+/* *INDENT-ON* */
+
+uword
+unformat_ethernet_interface (unformat_input_t * input, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u32 hw_if_index;
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif;
+
+ if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+ return 0;
+
+ eif = ethernet_get_interface (em, hw_if_index);
+ if (eif)
+ {
+ *result = hw_if_index;
+ return 1;
+ }
+ return 0;
+}
+
+clib_error_t *
+ethernet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u8 * address,
+ u32 * hw_if_index_return,
+ ethernet_flag_change_function_t flag_change)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ vnet_hw_interface_t *hi;
+ clib_error_t *error = 0;
+ u32 hw_if_index;
+
+ pool_get (em->interfaces, ei);
+ ei->flag_change = flag_change;
+
+ hw_if_index = vnet_register_interface
+ (vnm,
+ dev_class_index, dev_instance,
+ ethernet_hw_interface_class.index, ei - em->interfaces);
+ *hw_if_index_return = hw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ethernet_setup_node (vnm->vlib_main, hi->output_node_index);
+
+ hi->min_packet_bytes = hi->min_supported_packet_bytes =
+ ETHERNET_MIN_PACKET_BYTES;
+ hi->max_packet_bytes = hi->max_supported_packet_bytes =
+ ETHERNET_MAX_PACKET_BYTES;
+ hi->per_packet_overhead_bytes =
+ /* preamble */ 8 + /* inter frame gap */ 12;
+
+ /* Standard default ethernet MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+
+ clib_memcpy (ei->address, address, sizeof (ei->address));
+ vec_free (hi->hw_address);
+ vec_add (hi->hw_address, address, sizeof (ei->address));
+
+ if (error)
+ {
+ pool_put (em->interfaces, ei);
+ return error;
+ }
+ return error;
+}
+
+void
+ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ vnet_hw_interface_t *hi;
+ main_intf_t *main_intf;
+ vlan_table_t *vlan_table;
+ u32 idx;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+
+ /* Delete vlan mapping table for dot1q and dot1ad. */
+ main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ if (main_intf->dot1q_vlans)
+ {
+ vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
+ for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
+ {
+ if (vlan_table->vlans[idx].qinqs)
+ {
+ pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
+ }
+ }
+ pool_put_index (em->vlan_pool, main_intf->dot1q_vlans);
+ }
+ if (main_intf->dot1ad_vlans)
+ {
+ vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
+ {
+ if (vlan_table->vlans[idx].qinqs)
+ {
+ pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
+ }
+ }
+ pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ }
+
+ vnet_delete_hw_interface (vnm, hw_if_index);
+ pool_put (em->interfaces, ei);
+}
+
+u32
+ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ ethernet_main_t *em = &ethernet_main;
+ vnet_hw_interface_t *hi;
+ ethernet_interface_t *ei;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ASSERT (hi->hw_class_index == ethernet_hw_interface_class.index);
+
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+ if (ei->flag_change)
+ return ei->flag_change (vnm, hi, flags);
+ return (u32) ~ 0;
+}
+
+/* Echo packets back to ethernet/l2-input. */
+static uword
+simulated_ethernet_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
+ u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
+ u32 i, next_node_index, bvi_flag, sw_if_index;
+ u32 n_pkts = 0, n_bytes = 0;
+ u32 thread_index = vm->thread_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *loop_node;
+ vlib_buffer_t *b;
+
+ // check tx node index, it is ethernet-input on loopback create
+ // but can be changed to l2-input if loopback is configured as
+ // BVI of a BD (Bridge Domain).
+ loop_node = vec_elt (nm->nodes, node->node_index);
+ next_node_index = loop_node->next_nodes[next_index];
+ bvi_flag = (next_node_index == l2input_node.index) ? 1 : 0;
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ n_copy = clib_min (n_left_from, n_left_to_next);
+
+ clib_memcpy (to_next, from, n_copy * sizeof (from[0]));
+ n_left_to_next -= n_copy;
+ n_left_from -= n_copy;
+ i = 0;
+ b = vlib_get_buffer (vm, from[i]);
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ while (1)
+ {
+ // Set up RX and TX indices as if received from a real driver
+ // unless loopback is used as a BVI. For BVI case, leave TX index
+ // and update l2_len in packet as required for l2 forwarding path
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
+ if (bvi_flag)
+ {
+ vnet_update_l2_len (b);
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = L2INPUT_BVI;
+ }
+ else
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ i++;
+ n_pkts++;
+ n_bytes += vlib_buffer_length_in_chain (vm, b);
+
+ if (i < n_copy)
+ b = vlib_get_buffer (vm, from[i]);
+ else
+ break;
+ }
+ from += n_copy;
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ /* increment TX interface stat */
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index, n_pkts,
+ n_bytes);
+ }
+
+ return n_left_from;
+}
+
+static u8 *
+format_simulated_ethernet_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "loop%d", dev_instance);
+}
+
+static clib_error_t *
+simulated_ethernet_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
+ .name = "Loopback",
+ .format_device_name = format_simulated_ethernet_name,
+ .tx_function = simulated_ethernet_interface_tx,
+ .admin_up_down_function = simulated_ethernet_admin_up_down,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Maintain a bitmap of allocated loopback instance numbers.
+ */
+#define LOOPBACK_MAX_INSTANCE (16 * 1024)
+
+static u32
+loopback_instance_alloc (u8 is_specified, u32 want)
+{
+ ethernet_main_t *em = &ethernet_main;
+
+ /*
+ * Check for dynamically allocaetd instance number.
+ */
+ if (!is_specified)
+ {
+ u32 bit;
+
+ bit = clib_bitmap_first_clear (em->bm_loopback_instances);
+ if (bit >= LOOPBACK_MAX_INSTANCE)
+ {
+ return ~0;
+ }
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ bit, 1);
+ return bit;
+ }
+
+ /*
+ * In range?
+ */
+ if (want >= LOOPBACK_MAX_INSTANCE)
+ {
+ return ~0;
+ }
+
+ /*
+ * Already in use?
+ */
+ if (clib_bitmap_get (em->bm_loopback_instances, want))
+ {
+ return ~0;
+ }
+
+ /*
+ * Grant allocation request.
+ */
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ want, 1);
+
+ return want;
+}
+
+static int
+loopback_instance_free (u32 instance)
+{
+ ethernet_main_t *em = &ethernet_main;
+
+ if (instance >= LOOPBACK_MAX_INSTANCE)
+ {
+ return -1;
+ }
+
+ if (clib_bitmap_get (em->bm_loopback_instances, instance) == 0)
+ {
+ return -1;
+ }
+
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ instance, 0);
+ return 0;
+}
+
+int
+vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
+ u8 is_specified, u32 user_instance)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u32 instance;
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t *hw_if;
+ u32 slot;
+ int rv = 0;
+
+ ASSERT (sw_if_indexp);
+
+ *sw_if_indexp = (u32) ~ 0;
+
+ memset (address, 0, sizeof (address));
+
+ /*
+ * Allocate a loopback instance. Either select on dynamically
+ * or try to use the desired user_instance number.
+ */
+ instance = loopback_instance_alloc (is_specified, user_instance);
+ if (instance == ~0)
+ {
+ return VNET_API_ERROR_INVALID_REGISTRATION;
+ }
+
+ /*
+ * Default MAC address (dead:0000:0000 + instance) is allocated
+ * if zero mac_address is configured. Otherwise, user-configurable MAC
+ * address is programmed on the loopback interface.
+ */
+ if (memcmp (address, mac_address, sizeof (address)))
+ clib_memcpy (address, mac_address, sizeof (address));
+ else
+ {
+ address[0] = 0xde;
+ address[1] = 0xad;
+ address[5] = instance;
+ }
+
+ error = ethernet_register_interface
+ (vnm,
+ ethernet_simulated_device_class.index, instance, address, &hw_if_index,
+ /* flag change */ 0);
+
+ if (error)
+ {
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ clib_error_report (error);
+ return rv;
+ }
+
+ hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+ slot = vlib_node_add_named_next_with_slot
+ (vm, hw_if->tx_node_index,
+ "ethernet-input", VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+
+ {
+ vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, hw_if_index);
+ *sw_if_indexp = si->sw_if_index;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+create_simulated_ethernet_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv;
+ u32 sw_if_index;
+ u8 mac_address[6];
+ u8 is_specified = 0;
+ u32 user_instance = 0;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mac %U", unformat_ethernet_address, mac_address))
+ ;
+ if (unformat (input, "instance %d", &user_instance))
+ is_specified = 1;
+ else
+ break;
+ }
+
+ rv = vnet_create_loopback_interface (&sw_if_index, mac_address,
+ is_specified, user_instance);
+
+ if (rv)
+ return clib_error_return (0, "vnet_create_loopback_interface failed");
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return 0;
+}
+
+/*?
+ * Create a loopback interface. Optionally, a MAC Address can be
+ * provided. If not provided, de:ad:00:00:00:<loopId> will be used.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]}
+ * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]}
+ * Example of how to create a loopback interface:
+ * @cliexcmd{loopback create-interface}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
+ .path = "loopback create-interface",
+ .short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]",
+ .function = create_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Create a loopback interface. Optionally, a MAC Address can be
+ * provided. If not provided, de:ad:00:00:00:<loopId> will be used.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]}
+ * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]}
+ * Example of how to create a loopback interface:
+ * @cliexcmd{create loopback interface}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_loopback_interface_command, static) = {
+ .path = "create loopback interface",
+ .short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]",
+ .function = create_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+ethernet_interface_t *
+ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index)
+{
+ vnet_hw_interface_t *i =
+ vnet_get_hw_interface (vnet_get_main (), hw_if_index);
+ return (i->hw_class_index ==
+ ethernet_hw_interface_class.
+ index ? pool_elt_at_index (em->interfaces, i->hw_instance) : 0);
+}
+
+int
+vnet_delete_loopback_interface (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hw;
+ u32 instance;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ hw_if_index = si->hw_if_index;
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ instance = hw->dev_instance;
+
+ if (loopback_instance_free (instance) < 0)
+ {
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+ ethernet_delete_interface (vnm, hw_if_index);
+
+ return 0;
+}
+
+int
+vnet_delete_sub_interface (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->type == VNET_SW_INTERFACE_TYPE_SUB ||
+ si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ u64 sup_and_sub_key =
+ ((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id;
+
+ hash_unset_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ vnet_delete_sw_interface (vnm, sw_if_index);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_INVALID_SUB_SW_IF_INDEX;
+ }
+ return rv;
+}
+
+static clib_error_t *
+delete_simulated_ethernet_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface not specified");
+
+ rv = vnet_delete_loopback_interface (sw_if_index);
+
+ if (rv)
+ return clib_error_return (0, "vnet_delete_loopback_interface failed");
+
+ return 0;
+}
+
+static clib_error_t *
+delete_sub_interface (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int rv = 0;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else
+ break;
+ }
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface doesn't exist");
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv = vnet_delete_sub_interface (sw_if_index);
+ if (rv)
+ return clib_error_return (0, "delete_subinterface_interface failed");
+ return 0;
+}
+
+/*?
+ * Delete a loopback interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback delete-interface intfc <interface>}
+ * @cliexcmd{delete loopback interface intfc <interface>}
+ * Example of how to delete a loopback interface:
+ * @cliexcmd{loopback delete-interface intfc loop0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
+ .path = "loopback delete-interface",
+ .short_help = "loopback delete-interface intfc <interface>",
+ .function = delete_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Delete a loopback interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback delete-interface intfc <interface>}
+ * @cliexcmd{delete loopback interface intfc <interface>}
+ * Example of how to delete a loopback interface:
+ * @cliexcmd{delete loopback interface intfc loop0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
+ .path = "delete loopback interface",
+ .short_help = "delete loopback interface intfc <interface>",
+ .function = delete_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Delete a sub-interface.
+ *
+ * @cliexpar
+ * Example of how to delete a sub-interface:
+ * @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_sub_interface_command, static) = {
+ .path = "delete sub-interface",
+ .short_help = "delete sub-interface <interface>",
+ .function = delete_sub_interface,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/mac_swap.c b/src/vnet/ethernet/mac_swap.c
new file mode 100644
index 00000000..c0fec12e
--- /dev/null
+++ b/src/vnet/ethernet/mac_swap.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+#include <vnet/devices/pci/ige.h>
+#include <vnet/devices/pci/ixge.h>
+#include <vnet/devices/pci/ixgev.h>
+
+typedef struct
+{
+ u32 cached_next_index;
+ u32 cached_sw_if_index;
+
+ /* Hash table to map sw_if_index to next node index */
+ uword *next_node_index_by_sw_if_index;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} mac_swap_main_t;
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u32 next_index;
+} swap_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_swap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ swap_trace_t *t = va_arg (*args, swap_trace_t *);
+
+ s = format (s, "SWAP: dst now %U src now %U sw_if_index %d next_index %d",
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->sw_if_index, t->next_index);
+ return s;
+}
+
+#define foreach_hw_driver_next \
+ _(IP4) \
+ _(IP6) \
+ _(ETHERNET)
+
+mac_swap_main_t mac_swap_main;
+
+static vlib_node_registration_t mac_swap_node;
+
+#define foreach_mac_swap_error \
+_(SWAPS, "mac addresses swapped")
+
+typedef enum
+{
+#define _(sym,str) MAC_SWAP_ERROR_##sym,
+ foreach_mac_swap_error
+#undef _
+ MAC_SWAP_N_ERROR,
+} mac_swap_error_t;
+
+static char *mac_swap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_mac_swap_error
+#undef _
+};
+
+/*
+ * To drop a pkt and increment one of the previous counters:
+ *
+ * set b0->error = error_node->errors[RANDOM_ERROR_SAMPLE];
+ * set next0 to a disposition index bound to "error-drop".
+ *
+ * To manually increment the specific counter MAC_SWAP_ERROR_SAMPLE:
+ *
+ * vlib_node_t *n = vlib_get_node (vm, mac_swap.index);
+ * u32 node_counter_base_index = n->error_heap_index;
+ * vlib_error_main_t * em = &vm->error_main;
+ * em->counters[node_counter_base_index + MAC_SWAP_ERROR_SAMPLE] += 1;
+ *
+ */
+
+typedef enum
+{
+ MAC_SWAP_NEXT_DROP,
+ MAC_SWAP_N_NEXT,
+} mac_swap_next_t;
+
+static uword
+mac_swap_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ mac_swap_next_t next_index;
+ mac_swap_main_t *msm = &mac_swap_main;
+ vlib_node_t *n = vlib_get_node (vm, mac_swap_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ uword *p0, *p1;
+ u64 tmp0a, tmp0b;
+ u64 tmp1a, tmp1b;
+ ethernet_header_t *h0, *h1;
+
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = msm->cached_next_index;
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ next1 = msm->cached_next_index;
+
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0))
+ {
+ p0 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0);
+ if (p0 == 0)
+ {
+ vnet_hw_interface_t *hw0;
+
+ hw0 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index0);
+
+ next0 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw0->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index0, next0);
+ }
+ else
+ next0 = p0[0];
+ msm->cached_sw_if_index = sw_if_index0;
+ msm->cached_next_index = next0;
+ next1 = next0;
+ }
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index1))
+ {
+ p1 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index1);
+ if (p1 == 0)
+ {
+ vnet_hw_interface_t *hw1;
+
+ hw1 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index1);
+
+ next1 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw1->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index1, next1);
+ }
+ else
+ next1 = p1[0];
+ msm->cached_sw_if_index = sw_if_index1;
+ msm->cached_next_index = next1;
+ }
+
+ em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 2;
+
+ /* reset buffer so we always point at the MAC hdr */
+ vlib_buffer_reset (b0);
+ vlib_buffer_reset (b1);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ /* Swap 2 x src and dst mac addresses using 8-byte load/stores */
+ tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]);
+ tmp1a = clib_net_to_host_u64 (((u64 *) (h1->dst_address))[0]);
+ tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]);
+ tmp1b = clib_net_to_host_u64 (((u64 *) (h1->src_address))[0]);
+ ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b);
+ ((u64 *) (h1->dst_address))[0] = clib_host_to_net_u64 (tmp1b);
+ /* Move the ethertype from "b" to "a" */
+ tmp0a &= ~(0xFFFF);
+ tmp1a &= ~(0xFFFF);
+ tmp0a |= tmp0b & 0xFFFF;
+ ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a);
+ tmp1a |= tmp1b & 0xFFFF;
+ ((u64 *) (h1->src_address))[0] = clib_host_to_net_u64 (tmp1a);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ swap_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ swap_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ uword *p0;
+ u64 tmp0a, tmp0b;
+ ethernet_header_t *h0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = msm->cached_next_index;
+
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0))
+ {
+ p0 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0);
+ if (p0 == 0)
+ {
+ vnet_hw_interface_t *hw0;
+
+ hw0 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index0);
+
+ next0 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw0->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index0, next0);
+ }
+ else
+ next0 = p0[0];
+ msm->cached_sw_if_index = sw_if_index0;
+ msm->cached_next_index = next0;
+ }
+
+ em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 1;
+
+ /* reset buffer so we always point at the MAC hdr */
+ vlib_buffer_reset (b0);
+ h0 = vlib_buffer_get_current (b0);
+
+ /* Exchange src and dst, preserve the ethertype */
+ tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]);
+ tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]);
+ ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b);
+ tmp0a &= ~(0xFFFF);
+ tmp0a |= tmp0b & 0xFFFF;
+ ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a);
+
+ /* ship it */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ swap_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (mac_swap_node,static) = {
+ .function = mac_swap_node_fn,
+ .name = "mac-swap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_swap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(mac_swap_error_strings),
+ .error_strings = mac_swap_error_strings,
+
+ .n_next_nodes = MAC_SWAP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [MAC_SWAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+mac_swap_init (vlib_main_t * vm)
+{
+ mac_swap_main_t *msm = &mac_swap_main;
+
+ msm->next_node_index_by_sw_if_index = hash_create (0, sizeof (uword));
+ msm->cached_next_index = (u32) ~ 0;
+ msm->cached_sw_if_index = (u32) ~ 0;
+ msm->vlib_main = vm;
+ msm->vnet_main = vnet_get_main ();
+
+ /* Driver RX nodes send pkts here... */
+#define _(a) ixge_set_next_node (IXGE_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+#define _(a) ixgev_set_next_node (IXGEV_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+#define _(a) ige_set_next_node (IGE_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mac_swap_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
new file mode 100755
index 00000000..f216216d
--- /dev/null
+++ b/src/vnet/ethernet/node.c
@@ -0,0 +1,1419 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_node.c: ethernet packet processing
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+#include <vppinfra/sparse_vec.h>
+#include <vnet/l2/l2_bvi.h>
+
+
+#define foreach_ethernet_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (LLC, "llc-input")
+
+typedef enum
+{
+#define _(s,n) ETHERNET_INPUT_NEXT_##s,
+ foreach_ethernet_input_next
+#undef _
+ ETHERNET_INPUT_N_NEXT,
+} ethernet_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} ethernet_input_trace_t;
+
+static u8 *
+format_ethernet_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
+
+ s = format (s, "%U", format_ethernet_header, t->packet_data);
+
+ return s;
+}
+
+vlib_node_registration_t ethernet_input_node;
+
+typedef enum
+{
+ ETHERNET_INPUT_VARIANT_ETHERNET,
+ ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
+ ETHERNET_INPUT_VARIANT_NOT_L2,
+} ethernet_input_variant_t;
+
+
+// Parse the ethernet header to extract vlan tags and innermost ethertype
+static_always_inline void
+parse_header (ethernet_input_variant_t variant,
+ vlib_buffer_t * b0,
+ u16 * type,
+ u16 * orig_type,
+ u16 * outer_id, u16 * inner_id, u32 * match_flags)
+{
+ u8 vlan_count;
+
+ if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ {
+ ethernet_header_t *e0;
+
+ e0 = (void *) (b0->data + b0->current_data);
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+
+ vlib_buffer_advance (b0, sizeof (e0[0]));
+
+ *type = clib_net_to_host_u16 (e0->type);
+ }
+ else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
+ {
+ // here when prior node was LLC/SNAP processing
+ u16 *e0;
+
+ e0 = (void *) (b0->data + b0->current_data);
+
+ vlib_buffer_advance (b0, sizeof (e0[0]));
+
+ *type = clib_net_to_host_u16 (e0[0]);
+ }
+
+ // save for distinguishing between dot1q and dot1ad later
+ *orig_type = *type;
+
+ // default the tags to 0 (used if there is no corresponding tag)
+ *outer_id = 0;
+ *inner_id = 0;
+
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
+ vlan_count = 0;
+
+ // check for vlan encaps
+ if (ethernet_frame_is_tagged (*type))
+ {
+ ethernet_vlan_header_t *h0;
+ u16 tag;
+
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
+
+ *outer_id = tag & 0xfff;
+ if (0 == *outer_id)
+ *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
+
+ *type = clib_net_to_host_u16 (h0->type);
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 1;
+
+ if (*type == ETHERNET_TYPE_VLAN)
+ {
+ // Double tagged packet
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
+
+ *inner_id = tag & 0xfff;
+
+ *type = clib_net_to_host_u16 (h0->type);
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 2;
+ if (*type == ETHERNET_TYPE_VLAN)
+ {
+ // More than double tagged packet
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 3; // "unknown" number, aka, 3-or-more
+ }
+ }
+ }
+ ethernet_buffer_set_vlan_count (b0, vlan_count);
+}
+
+// Determine the subinterface for this packet, given the result of the
+// vlan table lookups and vlan header parsing. Check the most specific
+// matches first.
+static_always_inline void
+identify_subint (vnet_hw_interface_t * hi,
+ vlib_buffer_t * b0,
+ u32 match_flags,
+ main_intf_t * main_intf,
+ vlan_intf_t * vlan_intf,
+ qinq_intf_t * qinq_intf,
+ u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
+{
+ u32 matched;
+
+ matched = eth_identify_subint (hi, b0, match_flags,
+ main_intf, vlan_intf, qinq_intf,
+ new_sw_if_index, error0, is_l2);
+
+ if (matched)
+ {
+
+ // Perform L3 my-mac filter
+ // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
+ // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
+ if (!(*is_l2))
+ {
+ ethernet_header_t *e0;
+ e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
+
+ if (!(ethernet_address_cast (e0->dst_address)))
+ {
+ if (!eth_mac_equal ((u8 *) e0, hi->hw_address))
+ {
+ *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ }
+ }
+ }
+
+ // Check for down subinterface
+ *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
+ }
+}
+
+static_always_inline void
+determine_next_node (ethernet_main_t * em,
+ ethernet_input_variant_t variant,
+ u32 is_l20,
+ u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
+{
+ if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
+ {
+ // some error occurred
+ *next0 = ETHERNET_INPUT_NEXT_DROP;
+ }
+ else if (is_l20)
+ {
+ *next0 = em->l2_next;
+ // record the L2 len and reset the buffer so the L2 header is preserved
+ u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
+ vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
+ ASSERT (vnet_buffer (b0)->l2.l2_len ==
+ ethernet_buffer_header_size (b0));
+ vlib_buffer_advance (b0, -ethernet_buffer_header_size (b0));
+
+ // check for common IP/MPLS ethertypes
+ }
+ else if (type0 == ETHERNET_TYPE_IP4)
+ {
+ *next0 = em->l3_next.input_next_ip4;
+ }
+ else if (type0 == ETHERNET_TYPE_IP6)
+ {
+ *next0 = em->l3_next.input_next_ip6;
+ }
+ else if (type0 == ETHERNET_TYPE_MPLS)
+ {
+ *next0 = em->l3_next.input_next_mpls;
+
+ }
+ else if (em->redirect_l3)
+ {
+ // L3 Redirect is on, the cached common next nodes will be
+ // pointing to the redirect node, catch the uncommon types here
+ *next0 = em->redirect_l3_next;
+ }
+ else
+ {
+ // uncommon ethertype, check table
+ u32 i0;
+ i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
+ *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
+ *error0 =
+ i0 ==
+ SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
+
+ // The table is not populated with LLC values, so check that now.
+ // If variant is variant_ethernet then we came from LLC processing. Don't
+ // go back there; drop instead using by keeping the drop/bad table result.
+ if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
+ {
+ *next0 = ETHERNET_INPUT_NEXT_LLC;
+ }
+ }
+}
+
+static_always_inline int
+ethernet_frame_is_any_tagged (u16 type0, u16 type1)
+{
+#if __SSE4_2__
+ const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200,
+ /* duplicate for type1 */
+ ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200);
+
+ __m128i r =
+ _mm_set_epi16 (type0, type0, type0, type0, type1, type1, type1, type1);
+ r = _mm_cmpeq_epi16 (ethertype_mask, r);
+ return !_mm_test_all_zeros (r, r);
+#else
+ return ethernet_frame_is_tagged (type0) || ethernet_frame_is_tagged (type1);
+#endif
+}
+
+static_always_inline uword
+ethernet_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ ethernet_input_variant_t variant)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ vlib_node_runtime_t *error_node;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 cached_sw_if_index = ~0;
+ u32 cached_is_l2 = 0; /* shut up gcc */
+ vnet_hw_interface_t *hi = NULL; /* used for main interface only */
+
+ if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
+ error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
+ else
+ error_node = node;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (ethernet_input_trace_t));
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u8 next0, next1, error0, error1;
+ u16 type0, orig_type0, type1, orig_type1;
+ u16 outer_id0, inner_id0, outer_id1, inner_id1;
+ u32 match_flags0, match_flags1;
+ u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
+ new_sw_if_index1, len1;
+ vnet_hw_interface_t *hi0, *hi1;
+ main_intf_t *main_intf0, *main_intf1;
+ vlan_intf_t *vlan_intf0, *vlan_intf1;
+ qinq_intf_t *qinq_intf0, *qinq_intf1;
+ u32 is_l20, is_l21;
+ ethernet_header_t *e0, *e1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, STORE);
+ vlib_prefetch_buffer_header (b3, STORE);
+
+ CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ error0 = error1 = ETHERNET_ERROR_NONE;
+ e0 = vlib_buffer_get_current (b0);
+ type0 = clib_net_to_host_u16 (e0->type);
+ e1 = vlib_buffer_get_current (b1);
+ type1 = clib_net_to_host_u16 (e1->type);
+
+ /* Speed-path for the untagged case */
+ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ && !ethernet_frame_is_any_tagged (type0, type1)))
+ {
+ main_intf_t *intf0;
+ subint_config_t *subint0;
+ u32 sw_if_index0, sw_if_index1;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ is_l20 = cached_is_l2;
+
+ /* This is probably wholly unnecessary */
+ if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
+ goto slowpath;
+
+ /* Now sw_if_index0 == sw_if_index1 */
+ if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
+ {
+ cached_sw_if_index = sw_if_index0;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ subint0 = &intf0->untagged_subint;
+ cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+ vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
+
+ if (PREDICT_TRUE (is_l20 != 0))
+ {
+ next0 = em->l2_next;
+ vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
+ next1 = em->l2_next;
+ vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
+ }
+ else
+ {
+ if (!ethernet_address_cast (e0->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e0, hi->hw_address))
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ if (!ethernet_address_cast (e1->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e1, hi->hw_address))
+ error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ determine_next_node (em, variant, 0, type0, b0,
+ &error0, &next0);
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+ determine_next_node (em, variant, 0, type1, b1,
+ &error1, &next1);
+ vlib_buffer_advance (b1, sizeof (ethernet_header_t));
+ }
+ goto ship_it01;
+ }
+
+ /* Slow-path for the tagged case */
+ slowpath:
+ parse_header (variant,
+ b0,
+ &type0,
+ &orig_type0, &outer_id0, &inner_id0, &match_flags0);
+
+ parse_header (variant,
+ b1,
+ &type1,
+ &orig_type1, &outer_id1, &inner_id1, &match_flags1);
+
+ old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index0,
+ orig_type0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index1,
+ orig_type1,
+ outer_id1,
+ inner_id1,
+ &hi1,
+ &main_intf1, &vlan_intf1, &qinq_intf1);
+
+ identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
+
+ identify_subint (hi1,
+ b1,
+ match_flags1,
+ main_intf1,
+ vlan_intf1,
+ qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
+
+ // Save RX sw_if_index for later nodes
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ error0 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] =
+ error1 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
+
+ // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
+ if (((new_sw_if_index0 != ~0)
+ && (new_sw_if_index0 != old_sw_if_index0))
+ || ((new_sw_if_index1 != ~0)
+ && (new_sw_if_index1 != old_sw_if_index1)))
+ {
+
+ len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
+ - vnet_buffer (b0)->l2_hdr_offset;
+ len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
+ - vnet_buffer (b1)->l2_hdr_offset;
+
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ if (PREDICT_FALSE
+ (!(new_sw_if_index0 == stats_sw_if_index
+ && new_sw_if_index1 == stats_sw_if_index)))
+ {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+
+ if (new_sw_if_index0 != old_sw_if_index0
+ && new_sw_if_index0 != ~0)
+ vlib_increment_combined_counter (vnm->
+ interface_main.combined_sw_if_counters
+ +
+ VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ new_sw_if_index0, 1,
+ len0);
+ if (new_sw_if_index1 != old_sw_if_index1
+ && new_sw_if_index1 != ~0)
+ vlib_increment_combined_counter (vnm->
+ interface_main.combined_sw_if_counters
+ +
+ VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ new_sw_if_index1, 1,
+ len1);
+
+ if (new_sw_if_index0 == new_sw_if_index1)
+ {
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = stats_n_bytes = 0;
+ }
+ stats_sw_if_index = new_sw_if_index0;
+ }
+ }
+ }
+
+ if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ is_l20 = is_l21 = 0;
+
+ determine_next_node (em, variant, is_l20, type0, b0, &error0,
+ &next0);
+ determine_next_node (em, variant, is_l21, type1, b1, &error1,
+ &next1);
+
+ ship_it01:
+ b0->error = error_node->errors[error0];
+ b1->error = error_node->errors[error1];
+
+ // verify speculative enqueue
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 error0, next0;
+ u16 type0, orig_type0;
+ u16 outer_id0, inner_id0;
+ u32 match_flags0;
+ u32 old_sw_if_index0, new_sw_if_index0, len0;
+ vnet_hw_interface_t *hi0;
+ main_intf_t *main_intf0;
+ vlan_intf_t *vlan_intf0;
+ qinq_intf_t *qinq_intf0;
+ ethernet_header_t *e0;
+ u32 is_l20;
+
+ // Prefetch next iteration
+ if (n_left_from > 1)
+ {
+ vlib_buffer_t *p2;
+
+ p2 = vlib_get_buffer (vm, from[1]);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ error0 = ETHERNET_ERROR_NONE;
+ e0 = vlib_buffer_get_current (b0);
+ type0 = clib_net_to_host_u16 (e0->type);
+
+ /* Speed-path for the untagged case */
+ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ && !ethernet_frame_is_tagged (type0)))
+ {
+ main_intf_t *intf0;
+ subint_config_t *subint0;
+ u32 sw_if_index0;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ is_l20 = cached_is_l2;
+
+ if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
+ {
+ cached_sw_if_index = sw_if_index0;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ subint0 = &intf0->untagged_subint;
+ cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+
+ if (PREDICT_TRUE (is_l20 != 0))
+ {
+ next0 = em->l2_next;
+ vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
+ }
+ else
+ {
+ if (!ethernet_address_cast (e0->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e0, hi->hw_address))
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ determine_next_node (em, variant, 0, type0, b0,
+ &error0, &next0);
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+ }
+ goto ship_it0;
+ }
+
+ /* Slow-path for the tagged case */
+ parse_header (variant,
+ b0,
+ &type0,
+ &orig_type0, &outer_id0, &inner_id0, &match_flags0);
+
+ old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index0,
+ orig_type0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
+
+ // Save RX sw_if_index for later nodes
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ error0 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
+
+ // Increment subinterface stats
+ // Note that interface-level counters have already been incremented
+ // prior to calling this function. Thus only subinterface counters
+ // are incremented here.
+ //
+ // Interface level counters include packets received on the main
+ // interface and all subinterfaces. Subinterface level counters
+ // include only those packets received on that subinterface
+ // Increment stats if the subint is valid and it is not the main intf
+ if ((new_sw_if_index0 != ~0)
+ && (new_sw_if_index0 != old_sw_if_index0))
+ {
+
+ len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
+ - vnet_buffer (b0)->l2_hdr_offset;
+
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ // Batch stat increments from the same subinterface so counters
+ // don't need to be incremented for every packet.
+ if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+
+ if (new_sw_if_index0 != ~0)
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, new_sw_if_index0, 1, len0);
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = stats_n_bytes = 0;
+ }
+ stats_sw_if_index = new_sw_if_index0;
+ }
+ }
+
+ if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ is_l20 = 0;
+
+ determine_next_node (em, variant, is_l20, type0, b0, &error0,
+ &next0);
+
+ ship_it0:
+ b0->error = error_node->errors[error0];
+
+ // verify speculative enqueue
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ // Increment any remaining batched stats
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+ethernet_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_ETHERNET);
+}
+
+static uword
+ethernet_input_type (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
+}
+
+static uword
+ethernet_input_not_l2 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_NOT_L2);
+}
+
+
+// Return the subinterface config struct for the given sw_if_index
+// Also return via parameter the appropriate match flags for the
+// configured number of tags.
+// On error (unsupported or not ethernet) return 0.
+static subint_config_t *
+ethernet_sw_interface_get_config (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 * flags, u32 * unsupported)
+{
+ ethernet_main_t *em = &ethernet_main;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ main_intf_t *main_intf;
+ vlan_table_t *vlan_table;
+ qinq_table_t *qinq_table;
+ subint_config_t *subint = 0;
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ *unsupported = 0;
+ goto done; // non-ethernet interface
+ }
+
+ // ensure there's an entry for the main intf (shouldn't really be necessary)
+ vec_validate (em->main_intfs, hi->hw_if_index);
+ main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+
+ // Locate the subint for the given ethernet config
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ u32 p2pe_sw_if_index =
+ p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
+ if (p2pe_sw_if_index == ~0)
+ {
+ pool_get (p2pm->p2p_subif_pool, subint);
+ si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
+ }
+ else
+ subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
+ *flags = SUBINT_CONFIG_P2P;
+ }
+ else if (si->sub.eth.flags.default_sub)
+ {
+ subint = &main_intf->default_subint;
+ *flags = SUBINT_CONFIG_MATCH_0_TAG |
+ SUBINT_CONFIG_MATCH_1_TAG |
+ SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
+ {
+ // if no flags are set then this is a main interface
+ // so treat as untagged
+ subint = &main_intf->untagged_subint;
+ *flags = SUBINT_CONFIG_MATCH_0_TAG;
+ }
+ else
+ {
+ // one or two tags
+ // first get the vlan table
+ if (si->sub.eth.flags.dot1ad)
+ {
+ if (main_intf->dot1ad_vlans == 0)
+ {
+ // Allocate a vlan table from the pool
+ pool_get (em->vlan_pool, vlan_table);
+ main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
+ }
+ else
+ {
+ // Get ptr to existing vlan table
+ vlan_table =
+ vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ }
+ }
+ else
+ { // dot1q
+ if (main_intf->dot1q_vlans == 0)
+ {
+ // Allocate a vlan table from the pool
+ pool_get (em->vlan_pool, vlan_table);
+ main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
+ }
+ else
+ {
+ // Get ptr to existing vlan table
+ vlan_table =
+ vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
+ }
+ }
+
+ if (si->sub.eth.flags.one_tag)
+ {
+ *flags = si->sub.eth.flags.exact_match ?
+ SUBINT_CONFIG_MATCH_1_TAG :
+ (SUBINT_CONFIG_MATCH_1_TAG |
+ SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
+
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ // not implemented yet
+ *unsupported = 1;
+ goto done;
+ }
+ else
+ {
+ // a single vlan, a common case
+ subint =
+ &vlan_table->vlans[si->sub.eth.
+ outer_vlan_id].single_tag_subint;
+ }
+
+ }
+ else
+ {
+ // Two tags
+ *flags = si->sub.eth.flags.exact_match ?
+ SUBINT_CONFIG_MATCH_2_TAG :
+ (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
+
+ if (si->sub.eth.flags.outer_vlan_id_any
+ && si->sub.eth.flags.inner_vlan_id_any)
+ {
+ // not implemented yet
+ *unsupported = 1;
+ goto done;
+ }
+
+ if (si->sub.eth.flags.inner_vlan_id_any)
+ {
+ // a specific outer and "any" inner
+ // don't need a qinq table for this
+ subint =
+ &vlan_table->vlans[si->sub.eth.
+ outer_vlan_id].inner_any_subint;
+ if (si->sub.eth.flags.exact_match)
+ {
+ *flags = SUBINT_CONFIG_MATCH_2_TAG;
+ }
+ else
+ {
+ *flags = SUBINT_CONFIG_MATCH_2_TAG |
+ SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ }
+ else
+ {
+ // a specific outer + specifc innner vlan id, a common case
+
+ // get the qinq table
+ if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
+ {
+ // Allocate a qinq table from the pool
+ pool_get (em->qinq_pool, qinq_table);
+ vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
+ qinq_table - em->qinq_pool;
+ }
+ else
+ {
+ // Get ptr to existing qinq table
+ qinq_table =
+ vec_elt_at_index (em->qinq_pool,
+ vlan_table->vlans[si->sub.
+ eth.outer_vlan_id].
+ qinqs);
+ }
+ subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
+ }
+ }
+ }
+
+done:
+ return subint;
+}
+
+clib_error_t *
+ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+ clib_error_t *error = 0;
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ // not implemented yet or not ethernet
+ goto done;
+ }
+
+ subint->sw_if_index =
+ ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
+
+done:
+ return error;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
+
+
+// Set the L2/L3 mode for the subinterface
+void
+ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+ int is_port;
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+
+ is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ // unimplemented or not ethernet
+ goto done;
+ }
+
+ // Double check that the config we found is for our interface (or the interface is down)
+ ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
+
+ if (l2)
+ {
+ subint->flags |= SUBINT_CONFIG_L2;
+ if (is_port)
+ subint->flags |=
+ SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
+ | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ else
+ {
+ subint->flags &= ~SUBINT_CONFIG_L2;
+ if (is_port)
+ subint->flags &=
+ ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
+ | SUBINT_CONFIG_MATCH_3_TAG);
+ }
+
+done:
+ return;
+}
+
+/*
+ * Set the L2/L3 mode for the subinterface regardless of port
+ */
+void
+ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
+ u32 sw_if_index, u32 l2)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+
+ /* Find the config for this subinterface */
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ /* unimplemented or not ethernet */
+ goto done;
+ }
+
+ /*
+ * Double check that the config we found is for our interface (or the
+ * interface is down)
+ */
+ ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
+
+ if (l2)
+ {
+ subint->flags |= SUBINT_CONFIG_L2;
+ }
+ else
+ {
+ subint->flags &= ~SUBINT_CONFIG_L2;
+ }
+
+done:
+ return;
+}
+
+static clib_error_t *
+ethernet_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index, u32 is_create)
+{
+ clib_error_t *error = 0;
+ subint_config_t *subint;
+ u32 match_flags;
+ u32 unsupported = 0;
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
+ &unsupported);
+
+ if (subint == 0)
+ {
+ // not implemented yet or not ethernet
+ if (unsupported)
+ {
+ // this is the NYI case
+ error = clib_error_return (0, "not implemented yet");
+ }
+ goto done;
+ }
+
+ if (!is_create)
+ {
+ subint->flags = 0;
+ return error;
+ }
+
+ // Initialize the subint
+ if (subint->flags & SUBINT_CONFIG_VALID)
+ {
+ // Error vlan already in use
+ error = clib_error_return (0, "vlan is already in use");
+ }
+ else
+ {
+ // Note that config is L3 by defaulty
+ subint->flags = SUBINT_CONFIG_VALID | match_flags;
+ subint->sw_if_index = ~0; // because interfaces are initially down
+ }
+
+done:
+ return error;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
+
+static char *ethernet_error_strings[] = {
+#define ethernet_error(n,c,s) s,
+#include "error.def"
+#undef ethernet_error
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_node) = {
+ .function = ethernet_input,
+ .name = "ethernet-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_N_ERROR,
+ .error_strings = ethernet_error_strings,
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_ethernet_input_trace,
+ .unformat_buffer = unformat_ethernet_header,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_node, ethernet_input)
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_type_node, static) = {
+ .function = ethernet_input_type,
+ .name = "ethernet-input-type",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_type_node, ethernet_input_type)
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_not_l2_node, static) = {
+ .function = ethernet_input_not_l2,
+ .name = "ethernet-input-not-l2",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_not_l2_node,
+ ethernet_input_not_l2)
+/* *INDENT-ON* */
+
+
+void
+ethernet_set_rx_redirect (vnet_main_t * vnm,
+ vnet_hw_interface_t * hi, u32 enable)
+{
+ // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
+ // don't go directly to ip4-input)
+ vnet_hw_interface_rx_redirect_to_node
+ (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
+}
+
+
+/*
+ * Initialization and registration for the next_by_ethernet structure
+ */
+
+clib_error_t *
+next_by_ethertype_init (next_by_ethertype_t * l3_next)
+{
+ l3_next->input_next_by_type = sparse_vec_new
+ ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
+ /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
+
+ vec_validate (l3_next->sparse_index_by_input_next_index,
+ ETHERNET_INPUT_NEXT_DROP);
+ vec_validate (l3_next->sparse_index_by_input_next_index,
+ ETHERNET_INPUT_NEXT_PUNT);
+ l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
+ SPARSE_VEC_INVALID_INDEX;
+ l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
+ SPARSE_VEC_INVALID_INDEX;
+
+ /*
+ * Make sure we don't wipe out an ethernet registration by mistake
+ * Can happen if init function ordering constraints are missing.
+ */
+ if (CLIB_DEBUG > 0)
+ {
+ ethernet_main_t *em = &ethernet_main;
+ ASSERT (em->next_by_ethertype_register_called == 0);
+ }
+
+ return 0;
+}
+
+// Add an ethertype -> next index mapping to the structure
+clib_error_t *
+next_by_ethertype_register (next_by_ethertype_t * l3_next,
+ u32 ethertype, u32 next_index)
+{
+ u32 i;
+ u16 *n;
+ ethernet_main_t *em = &ethernet_main;
+
+ if (CLIB_DEBUG > 0)
+ {
+ ethernet_main_t *em = &ethernet_main;
+ em->next_by_ethertype_register_called = 1;
+ }
+
+ /* Setup ethernet type -> next index sparse vector mapping. */
+ n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
+ n[0] = next_index;
+
+ /* Rebuild next index -> sparse index inverse mapping when sparse vector
+ is updated. */
+ vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
+ for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
+ l3_next->
+ sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
+
+ // do not allow the cached next index's to be updated if L3
+ // redirect is enabled, as it will have overwritten them
+ if (!em->redirect_l3)
+ {
+ // Cache common ethertypes directly
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ l3_next->input_next_ip4 = next_index;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ l3_next->input_next_ip6 = next_index;
+ }
+ else if (ethertype == ETHERNET_TYPE_MPLS)
+ {
+ l3_next->input_next_mpls = next_index;
+ }
+ }
+ return 0;
+}
+
+
+static clib_error_t *
+ethernet_input_init (vlib_main_t * vm)
+{
+ ethernet_main_t *em = &ethernet_main;
+ __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
+ __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
+
+ ethernet_setup_node (vm, ethernet_input_node.index);
+ ethernet_setup_node (vm, ethernet_input_type_node.index);
+ ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
+
+ next_by_ethertype_init (&em->l3_next);
+
+ // Initialize pools and vector for vlan parsing
+ vec_validate (em->main_intfs, 10); // 10 main interfaces
+ pool_alloc (em->vlan_pool, 10);
+ pool_alloc (em->qinq_pool, 1);
+
+ // The first vlan pool will always be reserved for an invalid table
+ pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
+ // The first qinq pool will always be reserved for an invalid table
+ pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_input_init);
+
+void
+ethernet_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *ti;
+ u32 i;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ ti = ethernet_get_type_info (em, type);
+ ti->node_index = node_index;
+ ti->next_index = vlib_node_add_next (vm,
+ ethernet_input_node.index, node_index);
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+ ASSERT (i == ti->next_index);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+ ASSERT (i == ti->next_index);
+
+ // Add the L3 node for this ethertype to the next nodes structure
+ next_by_ethertype_register (&em->l3_next, type, ti->next_index);
+
+ // Call the registration functions for other nodes that want a mapping
+ l2bvi_register_input_type (vm, type, node_index);
+}
+
+void
+ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u32 i;
+
+ em->l2_next =
+ vlib_node_add_next (vm, ethernet_input_node.index, node_index);
+
+ /*
+ * Even if we never use these arcs, we have to align the next indices...
+ */
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+
+ ASSERT (i == em->l2_next);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+ ASSERT (i == em->l2_next);
+}
+
+// Register a next node for L3 redirect, and enable L3 redirect
+void
+ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u32 i;
+
+ em->redirect_l3 = 1;
+ em->redirect_l3_next = vlib_node_add_next (vm,
+ ethernet_input_node.index,
+ node_index);
+ /*
+ * Change the cached next nodes to the redirect node
+ */
+ em->l3_next.input_next_ip4 = em->redirect_l3_next;
+ em->l3_next.input_next_ip6 = em->redirect_l3_next;
+ em->l3_next.input_next_mpls = em->redirect_l3_next;
+
+ /*
+ * Even if we never use these arcs, we have to align the next indices...
+ */
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+
+ ASSERT (i == em->redirect_l3_next);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+
+ ASSERT (i == em->redirect_l3_next);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet.api b/src/vnet/ethernet/p2p_ethernet.api
new file mode 100644
index 00000000..8fb66376
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.api
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+define p2p_ethernet_add
+{
+ u32 client_index;
+ u32 context;
+ u32 parent_if_index;
+ u32 subif_id;
+ u8 remote_mac[6];
+};
+
+define p2p_ethernet_add_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define p2p_ethernet_del
+{
+ u32 client_index;
+ u32 context;
+ u32 parent_if_index;
+ u8 remote_mac[6];
+};
+
+define p2p_ethernet_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
new file mode 100644
index 00000000..cf3c56b5
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -0,0 +1,276 @@
+/*
+ * p2p_ethernet.c: p2p ethernet
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/bihash_16_8.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+#include <vnet/l2/l2_input.h>
+
+p2p_ethernet_main_t p2p_main;
+
+static void
+create_p2pe_key (p2p_key_t * p2pe_key, u32 parent_if_index, u8 * client_mac)
+{
+ clib_memcpy (p2pe_key->mac, client_mac, 6);
+ p2pe_key->pad1 = 0;
+ p2pe_key->hw_if_index = parent_if_index;
+ p2pe_key->pad2 = 0;
+}
+
+u32
+p2p_ethernet_lookup (u32 parent_if_index, u8 * client_mac)
+{
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ p2p_key_t p2pe_key;
+ uword *p;
+
+ create_p2pe_key (&p2pe_key, parent_if_index, client_mac);
+ p = hash_get_mem (p2pm->p2p_ethernet_by_key, &p2pe_key);
+ if (p)
+ return p[0];
+
+ return ~0;
+}
+
+int
+p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
+ u8 * client_mac, u32 p2pe_subif_id, int is_add,
+ u32 * p2pe_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ u32 p2pe_sw_if_index = ~0;
+ p2pe_sw_if_index = p2p_ethernet_lookup (parent_if_index, client_mac);
+
+ if (p2pe_if_index)
+ *p2pe_if_index = ~0;
+
+ if (is_add)
+ {
+ if (p2pe_sw_if_index == ~0)
+ {
+ vnet_hw_interface_t *hi;
+
+ hi = vnet_get_hw_interface (vnm, parent_if_index);
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ return VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED;
+
+ u64 sup_and_sub_key =
+ ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id;
+ uword *p;
+ p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ if (p)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning
+ ("p2p ethernet sub-interface on sw_if_index %d with sub id %d already exists\n",
+ hi->sw_if_index, p2pe_subif_id);
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ }
+ vnet_sw_interface_t template = {
+ .type = VNET_SW_INTERFACE_TYPE_P2P,
+ .flood_class = VNET_FLOOD_CLASS_NORMAL,
+ .sup_sw_if_index = hi->sw_if_index,
+ .sub.id = p2pe_subif_id
+ };
+
+ clib_memcpy (template.p2p.client_mac, client_mac,
+ sizeof (template.p2p.client_mac));
+
+ if (vnet_create_sw_interface (vnm, &template, &p2pe_sw_if_index))
+ return VNET_API_ERROR_SUBIF_CREATE_FAILED;
+
+ /* Allocate counters for this interface. */
+ {
+ u32 i;
+
+ vnet_interface_counter_lock (im);
+
+ for (i = 0; i < vec_len (im->sw_if_counters); i++)
+ {
+ vlib_validate_simple_counter (&im->sw_if_counters[i],
+ p2pe_sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters[i],
+ p2pe_sw_if_index);
+ }
+
+ for (i = 0; i < vec_len (im->combined_sw_if_counters); i++)
+ {
+ vlib_validate_combined_counter (&im->combined_sw_if_counters
+ [i], p2pe_sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters[i],
+ p2pe_sw_if_index);
+ }
+
+ vnet_interface_counter_unlock (im);
+ }
+
+ vnet_interface_main_t *im = &vnm->interface_main;
+ sup_and_sub_key =
+ ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id;
+ u64 *kp = clib_mem_alloc (sizeof (*kp));
+
+ *kp = sup_and_sub_key;
+ hash_set (hi->sub_interface_sw_if_index_by_id, p2pe_subif_id,
+ p2pe_sw_if_index);
+ hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, p2pe_sw_if_index);
+
+ p2p_key_t *p_p2pe_key;
+ p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key));
+ create_p2pe_key (p_p2pe_key, parent_if_index, client_mac);
+ hash_set_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key,
+ p2pe_sw_if_index);
+
+ if (p2pe_if_index)
+ *p2pe_if_index = p2pe_sw_if_index;
+
+ vec_validate (p2pm->p2p_ethernet_by_sw_if_index, parent_if_index);
+ if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 0)
+ {
+ vnet_feature_enable_disable ("device-input",
+ "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
+ /* Set promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnm, parent_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ }
+ p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++;
+ /* set the interface mode */
+ set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0, 0, 0, 0);
+ return 0;
+ }
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ }
+ else
+ {
+ if (p2pe_sw_if_index == ~0)
+ return VNET_API_ERROR_SUBIF_DOESNT_EXIST;
+ else
+ {
+ int rv = 0;
+ rv = vnet_delete_sub_interface (p2pe_sw_if_index);
+ if (!rv)
+ {
+ vec_validate (p2pm->p2p_ethernet_by_sw_if_index,
+ parent_if_index);
+ if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 1)
+ {
+ vnet_feature_enable_disable ("device-input",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
+ /* Disable promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnm, parent_if_index, 0);
+ }
+ p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]--;
+
+ /* Remove p2p_ethernet from hash map */
+ p2p_key_t *p_p2pe_key;
+ p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key));
+ create_p2pe_key (p_p2pe_key, parent_if_index, client_mac);
+ hash_unset_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key);
+ }
+ return rv;
+ }
+ }
+}
+
+static clib_error_t *
+vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ int is_add = 1;
+ int remote_mac = 0;
+ u32 hw_if_index = ~0;
+ u32 sub_id = ~0;
+ u8 client_mac[6];
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (input, "%U", unformat_ethernet_address, &client_mac))
+ remote_mac = 1;
+ else if (unformat (input, "sub-id %d", &sub_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (hw_if_index == ~0)
+ return clib_error_return (0, "Please specify parent interface ...");
+ if (!remote_mac)
+ return clib_error_return (0, "Please specify client MAC address ...");
+ if (sub_id == ~0 && is_add)
+ return clib_error_return (0, "Please specify sub-interface id ...");
+
+ u32 rv;
+ rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, sub_id, is_add, 0);
+ switch (rv)
+ {
+ case VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED:
+ return clib_error_return (0,
+ "not allowed as parent interface belongs to a BondEthernet interface");
+ case -1:
+ return clib_error_return (0,
+ "p2p ethernet for given parent interface and client mac already exists");
+ case -2:
+ return clib_error_return (0,
+ "couldn't create p2p ethernet subinterface");
+ case -3:
+ return clib_error_return (0,
+ "p2p ethernet for given parent interface and client mac doesn't exist");
+ default:
+ break;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) =
+{
+.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help =
+ "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",};
+
+static clib_error_t *
+p2p_ethernet_init (vlib_main_t * vm)
+{
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+
+ p2pm->vlib_main = vm;
+ p2pm->vnet_main = vnet_get_main ();
+ p2pm->p2p_ethernet_by_key =
+ hash_create_mem (0, sizeof (p2p_key_t), sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (p2p_ethernet_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet.h b/src/vnet/ethernet/p2p_ethernet.h
new file mode 100644
index 00000000..bb1e2896
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_p2p_ethernet_h
+#define included_vnet_p2p_ethernet_h
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+typedef struct {
+ /**
+ * Hash mapping parent sw_if_index and client mac address to p2p_ethernet sub-interface
+ */
+ uword * p2p_ethernet_by_key;
+
+ u32 *p2p_ethernet_by_sw_if_index;
+
+ // Pool of p2p subifs;
+ subint_config_t *p2p_subif_pool;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} p2p_ethernet_main_t;
+
+extern p2p_ethernet_main_t p2p_main;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 p2pe_sw_if_index;
+ u8 client_mac[6];
+} p2p_ethernet_trace_t;
+
+/**
+ * @brief Key struct for P2P Ethernet
+ * Key fields: parent sw_if_index and client mac address
+ * all fields in NET byte order
+ */
+
+typedef struct {
+ u8 mac[6];
+ u16 pad1; // padding for u64 mac address
+ u32 hw_if_index;
+ u32 pad2; // padding for u64
+} p2p_key_t;
+
+u32 p2p_ethernet_lookup (u32 parent_sw_if_index, u8* client_mac);
+int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, u32 sub_id, int is_add, u32 *p2pe_if_index);
+
+#endif /* included_vnet_p2p_ethernet_h */
diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c
new file mode 100644
index 00000000..f2c730b4
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet_api.c
@@ -0,0 +1,137 @@
+/*
+ *------------------------------------------------------------------
+ * p2p_ethernet_api.c - p2p ethernet api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/vnet_msg_enum.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(P2P_ETHERNET_ADD, p2p_ethernet_add) \
+_(P2P_ETHERNET_DEL, p2p_ethernet_del)
+
+void
+vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp)
+{
+ vl_api_p2p_ethernet_add_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+
+ u32 parent_if_index = htonl (mp->parent_if_index);
+ u32 sub_id = htonl (mp->subif_id);
+ u32 p2pe_if_index;
+ u8 remote_mac[6];
+
+ clib_memcpy (remote_mac, mp->remote_mac, 6);
+ rv =
+ p2p_ethernet_add_del (vm, parent_if_index, remote_mac, sub_id, 1,
+ &p2pe_if_index);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY,
+ ({
+ rmp->sw_if_index = htonl(p2pe_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+vl_api_p2p_ethernet_del_t_handler (vl_api_p2p_ethernet_del_t * mp)
+{
+ vl_api_p2p_ethernet_del_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+
+ u32 parent_if_index = htonl (mp->parent_if_index);
+ u8 remote_mac[6];
+
+ clib_memcpy (remote_mac, mp->remote_mac, 6);
+ rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, ~0, 0, 0);
+
+ REPLY_MACRO (VL_API_P2P_ETHERNET_DEL_REPLY);
+}
+
+/*
+ * p2p_ethernet_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_p2p_ethernet;
+#undef _
+}
+
+static clib_error_t *
+p2p_ethernet_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (p2p_ethernet_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
new file mode 100644
index 00000000..eeff4f06
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -0,0 +1,262 @@
+/*
+ * node.c: p2p ethernet vpp node
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ethernet/p2p_ethernet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+vlib_node_registration_t p2p_ethernet_input_node;
+
+/* packet trace format function */
+u8 *
+format_p2p_ethernet_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ p2p_ethernet_trace_t *t = va_arg (*args, p2p_ethernet_trace_t *);
+
+ vnet_main_t *vnm = &vnet_main;
+ s = format (s, "P2P ethernet: %U -> %U",
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->p2pe_sw_if_index);
+
+ return s;
+}
+
+#define foreach_p2p_ethernet_error \
+_(HITS, "P2P ethernet incoming packets processed")
+
+typedef enum
+{
+#define _(sym,str) P2PE_ERROR_##sym,
+ foreach_p2p_ethernet_error
+#undef _
+ P2PE_N_ERROR,
+} p2p_ethernet_error_t;
+
+static char *p2p_ethernet_error_strings[] = {
+#define _(sym,string) string,
+ foreach_p2p_ethernet_error
+#undef _
+};
+
+static uword
+p2p_ethernet_input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ u32 n_p2p_ethernet_packets = 0;
+ vlib_combined_counter_main_t *cm =
+ vnet_get_main ()->interface_main.combined_sw_if_counters;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0 = 0, next1 = 0;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t *en0, *en1;
+ u32 rx0, rx1;
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ en0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ en1 = vlib_buffer_get_current (b1);
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+ vnet_feature_next (sw_if_index1, &next1, b1);
+
+ rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address);
+ rx1 = p2p_ethernet_lookup (sw_if_index1, en1->src_address);
+
+ if (rx0 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t0;
+ vlib_trace_buffer (vm, node, next_index, b0,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->sw_if_index = sw_if_index0;
+ t0->p2pe_sw_if_index = rx0;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx0, 1,
+ vlib_buffer_length_in_chain
+ (vm, b0));
+ }
+ if (rx1 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = rx1;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t1;
+ vlib_trace_buffer (vm, node, next_index, b1,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t1 = vlib_add_trace (vm, node, b1, sizeof (*t1));
+ t1->sw_if_index = sw_if_index1;
+ t1->p2pe_sw_if_index = rx1;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx1, 1,
+ vlib_buffer_length_in_chain
+ (vm, b1));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi1, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ u32 sw_if_index0;
+ ethernet_header_t *en0;
+ u32 rx0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ en0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+
+ rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address);
+ if (rx0 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t0;
+ vlib_trace_buffer (vm, node, next_index, b0,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->sw_if_index = sw_if_index0;
+ t0->p2pe_sw_if_index = rx0;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx0, 1,
+ vlib_buffer_length_in_chain
+ (vm, b0));
+ }
+ else
+ {
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ node->flags |= VLIB_NODE_FLAG_TRACE;
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, p2p_ethernet_input_node.index,
+ P2PE_ERROR_HITS, n_p2p_ethernet_packets);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
+ .function = p2p_ethernet_input_node_fn,
+ .name = "p2p-ethernet-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_p2p_ethernet_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(p2p_ethernet_error_strings),
+ .error_strings = p2p_ethernet_error_strings,
+
+ .n_next_nodes = 1,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (p2p_ethernet_input_node,
+ p2p_ethernet_input_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h
new file mode 100644
index 00000000..964cf638
--- /dev/null
+++ b/src/vnet/ethernet/packet.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet/packet.h: ethernet packet format.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ethernet_packet_h
+#define included_ethernet_packet_h
+
+typedef enum
+{
+#define ethernet_type(n,s) ETHERNET_TYPE_##s = n,
+#include <vnet/ethernet/types.def>
+#undef ethernet_type
+} ethernet_type_t;
+
+typedef struct
+{
+ /* Source/destination address. */
+ u8 dst_address[6];
+ u8 src_address[6];
+
+ /* Ethernet type. */
+ u16 type;
+} ethernet_header_t;
+
+#define ETHERNET_ADDRESS_UNICAST 0
+#define ETHERNET_ADDRESS_MULTICAST 1
+
+/* I/G bit: individual (unicast)/group (broadcast/multicast). */
+always_inline uword
+ethernet_address_cast (u8 * a)
+{
+ return (a[0] >> 0) & 1;
+}
+
+always_inline uword
+ethernet_address_is_locally_administered (u8 * a)
+{
+ return (a[0] >> 1) & 1;
+}
+
+always_inline void
+ethernet_address_set_locally_administered (u8 * a)
+{
+ a[0] |= 1 << 1;
+}
+
+/* For VLAN ethernet type. */
+typedef struct
+{
+ /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */
+ u16 priority_cfi_and_id;
+
+#define ETHERNET_N_VLAN (1 << 12)
+
+ /* Inner ethernet type. */
+ u16 type;
+} ethernet_vlan_header_t;
+
+
+/* VLAN with ethertype first and vlan id second */
+typedef struct
+{
+ /* vlan type */
+ u16 type;
+
+ /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */
+ u16 priority_cfi_and_id;
+} ethernet_vlan_header_tv_t;
+
+/* PBB header with B-TAG - backbone VLAN indicator and I-TAG - service encapsulation */
+typedef struct
+{
+ /* Backbone source/destination address. */
+ u8 b_dst_address[6];
+ u8 b_src_address[6];
+
+ /* B-tag */
+ u16 b_type;
+ /* 3 bit priority, 1 bit DEI and 12 bit vlan id */
+ u16 priority_dei_id;
+
+ /* I-tag */
+ u16 i_type;
+ /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
+ u32 priority_dei_uca_res_sid;
+
+#define ETHERNET_N_PBB (1 << 24)
+} ethernet_pbb_header_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+{
+ /* Backbone source/destination address. */
+ u8 b_dst_address[6];
+ u8 b_src_address[6];
+
+ /* B-tag */
+ u16 b_type;
+ /* 3 bit priority, 1 bit DEI and 12 bit vlan id */
+ u16 priority_dei_id;
+
+ /* I-tag */
+ u16 i_type;
+ /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
+ u32 priority_dei_uca_res_sid;
+}) ethernet_pbb_header_packed_t;
+/* *INDENT-ON* */
+
+#endif /* included_ethernet_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/pg.c b/src/vnet/ethernet/pg.c
new file mode 100644
index 00000000..67ccfcf5
--- /dev/null
+++ b/src/vnet/ethernet/pg.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_pg.c: packet generator ethernet interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct
+{
+ pg_edit_t type;
+ pg_edit_t src_address;
+ pg_edit_t dst_address;
+} pg_ethernet_header_t;
+
+static inline void
+pg_ethernet_header_init (pg_ethernet_header_t * e)
+{
+ pg_edit_init (&e->type, ethernet_header_t, type);
+ pg_edit_init (&e->src_address, ethernet_header_t, src_address);
+ pg_edit_init (&e->dst_address, ethernet_header_t, dst_address);
+}
+
+typedef struct
+{
+ pg_edit_t type;
+ pg_edit_t id;
+ pg_edit_t cfi;
+ pg_edit_t priority;
+} pg_ethernet_vlan_header_t;
+
+static inline void
+pg_ethernet_vlan_header_init (pg_ethernet_vlan_header_t * v, int vlan_index)
+{
+ ASSERT (vlan_index < ARRAY_LEN (((ethernet_max_header_t *) 0)->vlan));
+ pg_edit_init (&v->type, ethernet_max_header_t, vlan[vlan_index].type);
+
+ pg_edit_init_bitfield (&v->id, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 0, 12);
+ pg_edit_init_bitfield (&v->cfi, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 12, 1);
+ pg_edit_init_bitfield (&v->priority, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 13, 3);
+}
+
+uword
+unformat_pg_ethernet_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ethernet_header_t *e;
+ pg_ethernet_vlan_header_t *v;
+ pg_edit_t *ether_type_edit;
+ u32 n_vlan, error, group_index;
+
+ e = pg_create_edit_group (s, sizeof (e[0]), sizeof (ethernet_header_t),
+ &group_index);
+ pg_ethernet_header_init (e);
+ error = 1;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ethernet_type_net_byte_order, &e->type,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->src_address,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->dst_address))
+ goto done;
+
+ n_vlan = 0;
+ while (unformat (input, "vlan"))
+ {
+ v = pg_add_edits (s, sizeof (v[0]), sizeof (ethernet_vlan_header_t),
+ group_index);
+ pg_ethernet_vlan_header_init (v, n_vlan);
+
+ if (!unformat_user (input, unformat_pg_edit,
+ unformat_pg_number, &v->id))
+ goto done;
+
+ if (!unformat (input, "priority %U", unformat_pg_edit,
+ unformat_pg_number, &v->priority))
+ pg_edit_set_fixed (&v->priority, 0);
+
+ if (!unformat (input, "cfi %U", unformat_pg_edit,
+ unformat_pg_number, &v->cfi))
+ pg_edit_set_fixed (&v->cfi, 0);
+
+ /* Too many vlans given. */
+ if (n_vlan >= 2)
+ goto done;
+
+ n_vlan++;
+ }
+
+ /* Address of e may have changed due to vlan edits being added */
+ e = pg_get_edit_group (s, group_index);
+ v = (void *) (e + 1);
+
+ /* Correct types for vlan packets. */
+ ether_type_edit = &e->type;
+ if (n_vlan > 0)
+ {
+ int i;
+
+ ether_type_edit = &v[n_vlan - 1].type;
+ pg_edit_copy_type_and_values (ether_type_edit, &e->type);
+ pg_edit_set_fixed (&e->type, ETHERNET_TYPE_VLAN);
+
+ for (i = 0; i < n_vlan - 1; i++)
+ pg_edit_set_fixed (&v[i].type, ETHERNET_TYPE_VLAN);
+ }
+
+ {
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *ti = 0;
+ pg_node_t *pg_node = 0;
+
+ if (ether_type_edit->type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) ether_type_edit->values[PG_EDIT_LO];
+ ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t));
+ if (ti && ti->node_index != ~0)
+ pg_node = pg_get_node (ti->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/sfp.c b/src/vnet/ethernet/sfp.c
new file mode 100644
index 00000000..624740e3
--- /dev/null
+++ b/src/vnet/ethernet/sfp.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ethernet/sfp.h>
+
+static u8 *
+format_space_terminated (u8 * s, va_list * args)
+{
+ u32 l = va_arg (*args, u32);
+ u8 *v = va_arg (*args, u8 *);
+ u8 *p;
+
+ for (p = v + l - 1; p >= v && p[0] == ' '; p--)
+ ;
+ vec_add (s, v, clib_min (p - v + 1, l));
+ return s;
+}
+
+static u8 *
+format_sfp_id (u8 * s, va_list * args)
+{
+ u32 id = va_arg (*args, u32);
+ char *t = 0;
+ switch (id)
+ {
+#define _(f) case SFP_ID_##f: t = #f; break;
+ foreach_sfp_id
+#undef _
+ default:
+ return format (s, "unknown 0x%x", id);
+ }
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_sfp_compatibility (u8 * s, va_list * args)
+{
+ u32 c = va_arg (*args, u32);
+ char *t = 0;
+ switch (c)
+ {
+#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break;
+ foreach_sfp_compatibility
+#undef _
+ default:
+ return format (s, "unknown 0x%x", c);
+ }
+ return format (s, "%s", t);
+}
+
+u32
+sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c)
+{
+ static struct
+ {
+ u8 byte, bit;
+ } t[] =
+ {
+#define _(a,b,f) { .byte = a, .bit = b, },
+ foreach_sfp_compatibility
+#undef _
+ };
+
+ ASSERT (c < ARRAY_LEN (t));
+ return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0;
+}
+
+u8 *
+format_sfp_eeprom (u8 * s, va_list * args)
+{
+ sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *);
+ uword indent = format_get_indent (s);
+ int i;
+
+ if (e->id != SFP_ID_sfp)
+ s = format (s, "id %U, ", format_sfp_id, e->id);
+
+ s = format (s, "compatibility:");
+ for (i = 0; i < SFP_N_COMPATIBILITY; i++)
+ if (sfp_is_comatible (e, i))
+ s = format (s, " %U", format_sfp_compatibility, i);
+
+ s = format (s, "\n%Uvendor: %U, part %U",
+ format_white_space, indent,
+ format_space_terminated, sizeof (e->vendor_name),
+ e->vendor_name, format_space_terminated,
+ sizeof (e->vendor_part_number), e->vendor_part_number);
+ s =
+ format (s, "\n%Urevision: %U, serial: %U, date code: %U",
+ format_white_space, indent, format_space_terminated,
+ sizeof (e->vendor_revision), e->vendor_revision,
+ format_space_terminated, sizeof (e->vendor_serial_number),
+ e->vendor_serial_number, format_space_terminated,
+ sizeof (e->vendor_date_code), e->vendor_date_code);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/sfp.h b/src/vnet/ethernet/sfp.h
new file mode 100644
index 00000000..a1ac7997
--- /dev/null
+++ b/src/vnet/ethernet/sfp.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_optics_sfp_h
+#define included_vnet_optics_sfp_h
+
+#include <vppinfra/format.h>
+
+#define foreach_sfp_id \
+ _ (unknown) \
+ _ (gbic) \
+ _ (on_motherboard) \
+ _ (sfp)
+
+typedef enum
+{
+#define _(f) SFP_ID_##f,
+ foreach_sfp_id
+#undef _
+} sfp_id_t;
+
+typedef struct
+{
+ u8 id;
+ u8 extended_id;
+ u8 connector_type;
+ u8 compatibility[8];
+ u8 encoding;
+ u8 nominal_bit_rate_100mbits_per_sec;
+ u8 reserved13;
+ u8 link_length[5];
+ u8 reserved19;
+ u8 vendor_name[16];
+ u8 reserved36;
+ u8 vendor_oui[3];
+ u8 vendor_part_number[16];
+ u8 vendor_revision[4];
+ /* 16 bit value network byte order. */
+ u8 laser_wavelength_in_nm[2];
+ u8 reserved62;
+ u8 checksum_0_to_62;
+
+ u8 options[2];
+ u8 max_bit_rate_margin_percent;
+ u8 min_bit_rate_margin_percent;
+ u8 vendor_serial_number[16];
+ u8 vendor_date_code[8];
+ u8 reserved92[3];
+ u8 checksum_63_to_94;
+ u8 vendor_specific[32];
+ u8 reserved128[384];
+
+ /* Vendor specific data follows. */
+ u8 vendor_specific1[0];
+} sfp_eeprom_t;
+
+always_inline uword
+sfp_eeprom_is_valid (sfp_eeprom_t * e)
+{
+ int i;
+ u8 sum = 0;
+ for (i = 0; i < 63; i++)
+ sum += ((u8 *) e)[i];
+ return sum == e->checksum_0_to_62;
+}
+
+/* _ (byte_index, bit_index, name) */
+#define foreach_sfp_compatibility \
+ _ (0, 4, 10g_base_sr) \
+ _ (0, 5, 10g_base_lr) \
+ _ (1, 2, oc48_long_reach) \
+ _ (1, 1, oc48_intermediate_reach) \
+ _ (1, 0, oc48_short_reach) \
+ _ (2, 6, oc12_long_reach) \
+ _ (2, 5, oc12_intermediate_reach) \
+ _ (2, 4, oc12_short_reach) \
+ _ (2, 2, oc3_long_reach) \
+ _ (2, 1, oc3_intermediate_reach) \
+ _ (2, 0, oc3_short_reach) \
+ _ (3, 3, 1g_base_t) \
+ _ (3, 2, 1g_base_cx) \
+ _ (3, 1, 1g_base_lx) \
+ _ (3, 0, 1g_base_sx)
+
+typedef enum
+{
+#define _(a,b,f) SFP_COMPATIBILITY_##f,
+ foreach_sfp_compatibility
+#undef _
+ SFP_N_COMPATIBILITY,
+} sfp_compatibility_t;
+
+u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c);
+
+format_function_t format_sfp_eeprom;
+
+#endif /* included_vnet_optics_sfp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def
new file mode 100644
index 00000000..c7a47221
--- /dev/null
+++ b/src/vnet/ethernet/types.def
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*- Ethernet types. */
+
+/*
+ * ethernet types
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Types < 0x600 (1536) are LLC packet lengths. */
+ethernet_type (0x600, LLC_LENGTH)
+
+ethernet_type (0x600, XNS_IDP)
+ethernet_type (0x800, IP4)
+ethernet_type (0x806, ARP)
+ethernet_type (0x0BAD, VINES_IP)
+ethernet_type (0x0BAE, VINES_LOOPBACK)
+ethernet_type (0x0BAF, VINES_ECHO)
+ethernet_type (0x1984, TRAIN)
+ethernet_type (0x2000, CDP)
+ethernet_type (0x2001, CGMP)
+ethernet_type (0x2007, SRP_CONTROL)
+ethernet_type (0x2452, CENTRINO_PROMISC)
+ethernet_type (0x6000, DECNET)
+ethernet_type (0x6001, DECNET_DUMP_LOAD)
+ethernet_type (0x6002, DECNET_REMOTE_CONSOLE)
+ethernet_type (0x6003, DECNET_ROUTE)
+ethernet_type (0x6004, DEC_LAT)
+ethernet_type (0x6005, DEC_DIAGNOSTIC)
+ethernet_type (0x6006, DEC_CUSTOMER)
+ethernet_type (0x6007, DEC_SCA)
+ethernet_type (0x6558, TRANSPARENT_BRIDGING)
+ethernet_type (0x6559, RAW_FRAME_RELAY)
+ethernet_type (0x8035, REVERSE_ARP)
+ethernet_type (0x8038, DEC_LAN_BRIDGE)
+ethernet_type (0x803D, DEC_ETHERNET_ENCRYPTION)
+ethernet_type (0x803F, DEC_LAN_TRAFFIC_MONITOR)
+ethernet_type (0x8041, DEC_LAST)
+ethernet_type (0x809B, APPLETALK)
+ethernet_type (0x80D5, IBM_SNA)
+ethernet_type (0x80F3, APPLETALK_AARP)
+ethernet_type (0x80FF, WELLFLEET_COMPRESSION)
+ethernet_type (0x8100, VLAN)
+ethernet_type (0x8137, IPX)
+ethernet_type (0x814C, SNMP)
+ethernet_type (0x81FD, CABLETRON_ISMP)
+ethernet_type (0x81FF, CABLETRON_ISMP_TBFLOOD)
+ethernet_type (0x86DD, IP6)
+ethernet_type (0x86DF, ATOMIC)
+ethernet_type (0x876B, TCP_IP_COMPRESSION)
+ethernet_type (0x876C, IP_AUTONOMOUS_SYSTEMS)
+ethernet_type (0x876D, SECURE_DATA)
+ethernet_type (0x8808, MAC_CONTROL)
+ethernet_type (0x8809, SLOW_PROTOCOLS)
+ethernet_type (0x880B, PPP)
+ethernet_type (0x8847, MPLS)
+ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED)
+ethernet_type (0x8863, PPPOE_DISCOVERY)
+ethernet_type (0x8864, PPPOE_SESSION)
+ethernet_type (0x886D, INTEL_ANS)
+ethernet_type (0x886F, MICROSOFT_NLB_HEARTBEAT)
+ethernet_type (0x8881, CDMA_2000)
+ethernet_type (0x888e, 802_1X_AUTHENTICATION)
+ethernet_type (0x8892, PROFINET)
+ethernet_type (0x889a, HYPERSCSI)
+ethernet_type (0x88a2, AOE)
+ethernet_type (0x88a8, DOT1AD)
+ethernet_type (0x88AE, BRDWALK)
+ethernet_type (0x88B7, 802_OUI_EXTENDED)
+ethernet_type (0x88c7, 802_11I_PRE_AUTHENTICATION)
+ethernet_type (0x88cc, 802_1_LLDP)
+ethernet_type (0x88e7, DOT1AH)
+ethernet_type (0x894f, NSH)
+ethernet_type (0x9000, LOOPBACK)
+ethernet_type (0x9021, RTNET_MAC)
+ethernet_type (0x9022, RTNET_CONFIG)
+ethernet_type (0x9100, VLAN_9100)
+ethernet_type (0x9200, VLAN_9200)
+ethernet_type (0x9999, PGLAN)
+ethernet_type (0xFEFE, SRP_ISIS)
+ethernet_type (0xFFFF, RESERVED)
diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c
new file mode 100644
index 00000000..6756d0d4
--- /dev/null
+++ b/src/vnet/feature/feature.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/feature/feature.h>
+#include <vnet/adj/adj.h>
+
+vnet_feature_main_t feature_main;
+
+static clib_error_t *
+vnet_feature_init (vlib_main_t * vm)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_registration_t *freg;
+ vnet_feature_arc_registration_t *areg;
+ u32 arc_index = 0;
+
+ fm->arc_index_by_name = hash_create_string (0, sizeof (uword));
+ areg = fm->next_arc;
+
+ /* process feature arc registrations */
+ while (areg)
+ {
+ char *s;
+ int i = 0;
+ areg->feature_arc_index = arc_index;
+ if (areg->arc_index_ptr)
+ *areg->arc_index_ptr = arc_index;
+ hash_set_mem (fm->arc_index_by_name, areg->arc_name,
+ pointer_to_uword (areg));
+
+ /* process start nodes */
+ while ((s = areg->start_nodes[i]))
+ {
+ i++;
+ }
+ areg->n_start_nodes = i;
+
+ /* next */
+ areg = areg->next;
+ arc_index++;
+ }
+
+ vec_validate (fm->next_feature_by_arc, arc_index - 1);
+ vec_validate (fm->feature_nodes, arc_index - 1);
+ vec_validate (fm->feature_config_mains, arc_index - 1);
+ vec_validate (fm->next_feature_by_name, arc_index - 1);
+ vec_validate (fm->sw_if_index_has_features, arc_index - 1);
+ vec_validate (fm->feature_count_by_sw_if_index, arc_index - 1);
+
+ freg = fm->next_feature;
+ while (freg)
+ {
+ vnet_feature_registration_t *next;
+ uword *p = hash_get_mem (fm->arc_index_by_name, freg->arc_name);
+ if (p == 0)
+ {
+ /* Don't start vpp with broken features arcs */
+ clib_warning ("Unknown feature arc '%s'", freg->arc_name);
+ os_exit (1);
+ }
+
+ areg = uword_to_pointer (p[0], vnet_feature_arc_registration_t *);
+ arc_index = areg->feature_arc_index;
+
+ next = freg->next;
+ freg->next = fm->next_feature_by_arc[arc_index];
+ fm->next_feature_by_arc[arc_index] = freg;
+
+ /* next */
+ freg = next;
+ }
+
+ areg = fm->next_arc;
+ while (areg)
+ {
+ clib_error_t *error;
+ vnet_feature_config_main_t *cm;
+ vnet_config_main_t *vcm;
+
+ arc_index = areg->feature_arc_index;
+ cm = &fm->feature_config_mains[arc_index];
+ vcm = &cm->config_main;
+ if ((error = vnet_feature_arc_init (vm, vcm,
+ areg->start_nodes,
+ areg->n_start_nodes,
+ fm->next_feature_by_arc[arc_index],
+ &fm->feature_nodes[arc_index])))
+ {
+ clib_error_report (error);
+ os_exit (1);
+ }
+
+ fm->next_feature_by_name[arc_index] =
+ hash_create_string (0, sizeof (uword));
+ freg = fm->next_feature_by_arc[arc_index];
+
+ while (freg)
+ {
+ hash_set_mem (fm->next_feature_by_name[arc_index],
+ freg->node_name, pointer_to_uword (freg));
+ freg = freg->next;
+ }
+
+ /* next */
+ areg = areg->next;
+ arc_index++;
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_feature_init);
+
+u8
+vnet_get_feature_arc_index (const char *s)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_arc_registration_t *reg;
+ uword *p;
+
+ p = hash_get_mem (fm->arc_index_by_name, s);
+ if (p == 0)
+ return ~0;
+
+ reg = uword_to_pointer (p[0], vnet_feature_arc_registration_t *);
+ return reg->feature_arc_index;
+}
+
+vnet_feature_registration_t *
+vnet_get_feature_reg (const char *arc_name, const char *node_name)
+{
+ u8 arc_index;
+
+ arc_index = vnet_get_feature_arc_index (arc_name);
+ if (arc_index == (u8) ~ 0)
+ return 0;
+
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_registration_t *reg;
+ uword *p;
+
+ p = hash_get_mem (fm->next_feature_by_name[arc_index], node_name);
+ if (p == 0)
+ return 0;
+
+ reg = uword_to_pointer (p[0], vnet_feature_registration_t *);
+ return reg;
+}
+
+u32
+vnet_get_feature_index (u8 arc, const char *s)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_registration_t *reg;
+ uword *p;
+
+ if (s == 0)
+ return ~0;
+
+ p = hash_get_mem (fm->next_feature_by_name[arc], s);
+ if (p == 0)
+ return ~0;
+
+ reg = uword_to_pointer (p[0], vnet_feature_registration_t *);
+ return reg->feature_index;
+}
+
+int
+vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index,
+ u32 sw_if_index, int enable_disable,
+ void *feature_config,
+ u32 n_feature_config_bytes)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ i16 feature_count;
+ u32 ci;
+
+ if (arc_index == (u8) ~ 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (feature_index == ~0)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ cm = &fm->feature_config_mains[arc_index];
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ vec_validate (fm->feature_count_by_sw_if_index[arc_index], sw_if_index);
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+
+ if (!enable_disable && feature_count < 1)
+ return 0;
+
+ ci = (enable_disable
+ ? vnet_config_add_feature
+ : vnet_config_del_feature)
+ (vlib_get_main (), &cm->config_main, ci, feature_index, feature_config,
+ n_feature_config_bytes);
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ /* update feature count */
+ enable_disable = (enable_disable > 0);
+ feature_count += enable_disable ? 1 : -1;
+ ASSERT (feature_count >= 0);
+
+ fm->sw_if_index_has_features[arc_index] =
+ clib_bitmap_set (fm->sw_if_index_has_features[arc_index], sw_if_index,
+ (feature_count > 0));
+ adj_feature_update (sw_if_index, arc_index, (feature_count > 0));
+
+ fm->feature_count_by_sw_if_index[arc_index][sw_if_index] = feature_count;
+ return 0;
+}
+
+int
+vnet_feature_enable_disable (const char *arc_name, const char *node_name,
+ u32 sw_if_index, int enable_disable,
+ void *feature_config, u32 n_feature_config_bytes)
+{
+ u32 feature_index;
+ u8 arc_index;
+
+ arc_index = vnet_get_feature_arc_index (arc_name);
+
+ if (arc_index == (u8) ~ 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ feature_index = vnet_get_feature_index (arc_index, node_name);
+
+ return vnet_feature_enable_disable_with_index (arc_index, feature_index,
+ sw_if_index, enable_disable,
+ feature_config,
+ n_feature_config_bytes);
+}
+
+
+/** Display the set of available driver features.
+ Useful for verifying that expected features are present
+*/
+
+static clib_error_t *
+show_features_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_arc_registration_t *areg;
+ vnet_feature_registration_t *freg;
+
+ vlib_cli_output (vm, "Available feature paths");
+
+ areg = fm->next_arc;
+ while (areg)
+ {
+ vlib_cli_output (vm, "%s:", areg->arc_name);
+ freg = fm->next_feature_by_arc[areg->feature_arc_index];
+ while (freg)
+ {
+ vlib_cli_output (vm, " %s\n", freg->node_name);
+ freg = freg->next;
+ }
+
+
+ /* next */
+ areg = areg->next;
+ }
+
+ return 0;
+}
+
+/*?
+ * Display the set of available driver features
+ *
+ * @cliexpar
+ * Example:
+ * @cliexcmd{show ip features}
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_features_command, static) = {
+ .path = "show features",
+ .short_help = "show features",
+ .function = show_features_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Display the set of driver features configured on a specific interface
+ * Called by "show interface" handler
+ */
+
+void
+vnet_interface_features_show (vlib_main_t * vm, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ u32 node_index, current_config_index;
+ u16 feature_arc;
+ vnet_feature_config_main_t *cm = fm->feature_config_mains;
+ vnet_feature_arc_registration_t *areg;
+ vnet_config_main_t *vcm;
+ vnet_config_t *cfg;
+ u32 cfg_index;
+ vnet_config_feature_t *feat;
+ vlib_node_t *n;
+ int i;
+
+ vlib_cli_output (vm, "Driver feature paths configured on %U...",
+ format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index);
+
+ areg = fm->next_arc;
+ while (areg)
+ {
+ feature_arc = areg->feature_arc_index;
+ vcm = &(cm[feature_arc].config_main);
+
+ vlib_cli_output (vm, "\n%s:", areg->arc_name);
+ areg = areg->next;
+
+ if (NULL == cm[feature_arc].config_index_by_sw_if_index ||
+ vec_len (cm[feature_arc].config_index_by_sw_if_index) <=
+ sw_if_index)
+ {
+ vlib_cli_output (vm, " none configured");
+ continue;
+ }
+
+ current_config_index =
+ vec_elt (cm[feature_arc].config_index_by_sw_if_index, sw_if_index);
+
+ if (current_config_index == ~0)
+ {
+ vlib_cli_output (vm, " none configured");
+ continue;
+ }
+
+ ASSERT (current_config_index
+ < vec_len (vcm->config_pool_index_by_user_index));
+
+ cfg_index = vcm->config_pool_index_by_user_index[current_config_index];
+ cfg = pool_elt_at_index (vcm->config_pool, cfg_index);
+
+ for (i = 0; i < vec_len (cfg->features); i++)
+ {
+ feat = cfg->features + i;
+ node_index = feat->node_index;
+ n = vlib_get_node (vm, node_index);
+ vlib_cli_output (vm, " %v", n->name);
+ }
+ }
+}
+
+static clib_error_t *
+set_interface_features_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ u8 *arc_name = 0;
+ u8 *feature_name = 0;
+ u32 sw_if_index = ~0;
+ u8 enable = 1;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto done;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U %v", unformat_vnet_sw_interface, vnm, &sw_if_index,
+ &feature_name))
+ ;
+ else if (unformat (line_input, "arc %v", &arc_name))
+ ;
+ else if (unformat (line_input, "disable"))
+ enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "Interface not specified...");
+ goto done;
+ }
+
+ vec_add1 (arc_name, 0);
+ vec_add1 (feature_name, 0);
+
+ vnet_feature_registration_t *reg;
+ reg =
+ vnet_get_feature_reg ((const char *) arc_name,
+ (const char *) feature_name);
+ if (reg == 0)
+ {
+ error = clib_error_return (0, "Unknown feature...");
+ goto done;
+ }
+ if (reg->enable_disable_cb)
+ error = reg->enable_disable_cb (sw_if_index, enable);
+ if (!error)
+ vnet_feature_enable_disable ((const char *) arc_name,
+ (const char *) feature_name, sw_if_index,
+ enable, 0, 0);
+
+done:
+ vec_free (feature_name);
+ vec_free (arc_name);
+ unformat_free (line_input);
+ return error;
+}
+
+/*?
+ * Set feature for given interface
+ *
+ * @cliexpar
+ * Example:
+ * @cliexcmd{set interface feature GigabitEthernet2/0/0 ip4_flow_classify arc ip4_unicast}
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_feature_command, static) = {
+ .path = "set interface feature",
+ .short_help = "set interface feature <intfc> <feature_name> arc <arc_name> "
+ "[disable]",
+ .function = set_interface_features_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h
new file mode 100644
index 00000000..7ec43ea8
--- /dev/null
+++ b/src/vnet/feature/feature.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_features_h
+#define included_features_h
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/devices/devices.h>
+
+/** feature registration object */
+typedef struct _vnet_feature_arc_registration
+{
+ /** next registration in list of all registrations*/
+ struct _vnet_feature_arc_registration *next;
+ /** Feature Arc name */
+ char *arc_name;
+ /** Start nodes */
+ char **start_nodes;
+ int n_start_nodes;
+ /* Feature arc index, assigned by init function */
+ u8 feature_arc_index;
+ u8 *arc_index_ptr;
+} vnet_feature_arc_registration_t;
+
+/* Enable feature callback. */
+typedef clib_error_t *(vnet_feature_enable_disable_function_t)
+ (u32 sw_if_index, int enable_disable);
+
+/** feature registration object */
+typedef struct _vnet_feature_registration
+{
+ /** next registration in list of all registrations*/
+ struct _vnet_feature_registration *next;
+ /** Feature arc name */
+ char *arc_name;
+ /** Graph node name */
+ char *node_name;
+ /** Pointer to this feature index, filled in by vnet_feature_arc_init */
+ u32 *feature_index_ptr;
+ u32 feature_index;
+ /** Constraints of the form "this feature runs before X" */
+ char **runs_before;
+ /** Constraints of the form "this feature runs after Y" */
+ char **runs_after;
+
+ /** Function to enable/disable feature **/
+ vnet_feature_enable_disable_function_t *enable_disable_cb;
+} vnet_feature_registration_t;
+
+typedef struct vnet_feature_config_main_t_
+{
+ vnet_config_main_t config_main;
+ u32 *config_index_by_sw_if_index;
+} vnet_feature_config_main_t;
+
+typedef struct
+{
+ /** feature arc configuration list */
+ vnet_feature_arc_registration_t *next_arc;
+ uword **arc_index_by_name;
+
+ /** feature path configuration lists */
+ vnet_feature_registration_t *next_feature;
+ vnet_feature_registration_t **next_feature_by_arc;
+ uword **next_feature_by_name;
+
+ /** feature config main objects */
+ vnet_feature_config_main_t *feature_config_mains;
+
+ /** Save partial order results for show command */
+ char ***feature_nodes;
+
+ /** bitmap of interfaces which have driver rx features configured */
+ uword **sw_if_index_has_features;
+
+ /** feature reference counts by interface */
+ i16 **feature_count_by_sw_if_index;
+
+ /** Feature arc index for device-input */
+ u8 device_input_feature_arc_index;
+
+ /** convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} vnet_feature_main_t;
+
+extern vnet_feature_main_t feature_main;
+
+#define VNET_FEATURE_ARC_INIT(x,...) \
+ __VA_ARGS__ vnet_feature_arc_registration_t vnet_feat_arc_##x;\
+static void __vnet_add_feature_arc_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vnet_add_feature_arc_registration_##x (void) \
+{ \
+ vnet_feature_main_t * fm = &feature_main; \
+ vnet_feat_arc_##x.next = fm->next_arc; \
+ fm->next_arc = & vnet_feat_arc_##x; \
+} \
+__VA_ARGS__ vnet_feature_arc_registration_t vnet_feat_arc_##x
+
+#define VNET_FEATURE_INIT(x,...) \
+ __VA_ARGS__ vnet_feature_registration_t vnet_feat_##x; \
+static void __vnet_add_feature_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vnet_add_feature_registration_##x (void) \
+{ \
+ vnet_feature_main_t * fm = &feature_main; \
+ vnet_feat_##x.next = fm->next_feature; \
+ fm->next_feature = & vnet_feat_##x; \
+} \
+__VA_ARGS__ vnet_feature_registration_t vnet_feat_##x
+
+void
+vnet_config_update_feature_count (vnet_feature_main_t * fm, u8 arc,
+ u32 sw_if_index, int is_add);
+
+u32 vnet_get_feature_index (u8 arc, const char *s);
+u8 vnet_get_feature_arc_index (const char *s);
+vnet_feature_registration_t *vnet_get_feature_reg (const char *arc_name,
+ const char *node_name);
+
+
+int
+vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index,
+ u32 sw_if_index, int enable_disable,
+ void *feature_config,
+ u32 n_feature_config_bytes);
+
+int
+vnet_feature_enable_disable (const char *arc_name, const char *node_name,
+ u32 sw_if_index, int enable_disable,
+ void *feature_config,
+ u32 n_feature_config_bytes);
+
+static inline vnet_feature_config_main_t *
+vnet_get_feature_arc_config_main (u8 arc_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+
+ if (arc_index == (u8) ~ 0)
+ return 0;
+
+ return &fm->feature_config_mains[arc_index];
+}
+
+static_always_inline vnet_feature_config_main_t *
+vnet_feature_get_config_main (u16 arc)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ return &fm->feature_config_mains[arc];
+}
+
+static_always_inline int
+vnet_have_features (u8 arc, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ return clib_bitmap_get (fm->sw_if_index_has_features[arc], sw_if_index);
+}
+
+static_always_inline u32
+vnet_get_feature_config_index (u8 arc, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc];
+ return vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+}
+
+static_always_inline void *
+vnet_feature_arc_start_with_data (u8 arc, u32 sw_if_index, u32 * next,
+ vlib_buffer_t * b, u32 n_data_bytes)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ cm = &fm->feature_config_mains[arc];
+
+ if (PREDICT_FALSE (vnet_have_features (arc, sw_if_index)))
+ {
+ b->feature_arc_index = arc;
+ b->current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ return vnet_get_config_data (&cm->config_main, &b->current_config_index,
+ next, n_data_bytes);
+ }
+ return 0;
+}
+
+static_always_inline void
+vnet_feature_arc_start (u8 arc, u32 sw_if_index, u32 * next0,
+ vlib_buffer_t * b0)
+{
+ vnet_feature_arc_start_with_data (arc, sw_if_index, next0, b0, 0);
+}
+
+static_always_inline void *
+vnet_feature_next_with_data (u32 sw_if_index, u32 * next0,
+ vlib_buffer_t * b0, u32 n_data_bytes)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ u8 arc = b0->feature_arc_index;
+ vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc];
+
+ return vnet_get_config_data (&cm->config_main,
+ &b0->current_config_index, next0,
+ n_data_bytes);
+}
+
+static_always_inline void
+vnet_feature_next (u32 sw_if_index, u32 * next0, vlib_buffer_t * b0)
+{
+ vnet_feature_next_with_data (sw_if_index, next0, b0, 0);
+}
+
+static_always_inline void
+vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0,
+ vlib_buffer_t * b0)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u8 feature_arc_index = fm->device_input_feature_arc_index;
+ cm = &fm->feature_config_mains[feature_arc_index];
+
+ if (PREDICT_FALSE
+ (clib_bitmap_get
+ (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
+ {
+ /*
+ * Save next0 so that the last feature in the chain
+ * can skip ethernet-input if indicated...
+ */
+ u16 adv;
+
+ vnet_buffer (b0)->device_input_feat.saved_next_index = *next0;
+ adv = device_input_next_node_advance[*next0];
+ vnet_buffer (b0)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b0, -adv);
+
+ b0->feature_arc_index = feature_arc_index;
+ b0->current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ vnet_get_config_data (&cm->config_main, &b0->current_config_index,
+ next0, /* # bytes of config data */ 0);
+ }
+}
+
+static_always_inline void
+vnet_feature_start_device_input_x2 (u32 sw_if_index,
+ u32 * next0,
+ u32 * next1,
+ vlib_buffer_t * b0, vlib_buffer_t * b1)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u8 feature_arc_index = fm->device_input_feature_arc_index;
+ cm = &fm->feature_config_mains[feature_arc_index];
+
+ if (PREDICT_FALSE
+ (clib_bitmap_get
+ (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
+ {
+ /*
+ * Save next0 so that the last feature in the chain
+ * can skip ethernet-input if indicated...
+ */
+ u16 adv;
+
+ vnet_buffer (b0)->device_input_feat.saved_next_index = *next0;
+ adv = device_input_next_node_advance[*next0];
+ vnet_buffer (b0)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b0, -adv);
+
+ vnet_buffer (b1)->device_input_feat.saved_next_index = *next1;
+ adv = device_input_next_node_advance[*next1];
+ vnet_buffer (b1)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b1, -adv);
+
+ b0->feature_arc_index = feature_arc_index;
+ b1->feature_arc_index = feature_arc_index;
+ b0->current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ b1->current_config_index = b0->current_config_index;
+ vnet_get_config_data (&cm->config_main, &b0->current_config_index,
+ next0, /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm->config_main, &b1->current_config_index,
+ next1, /* # bytes of config data */ 0);
+ }
+}
+
+static_always_inline void
+vnet_feature_start_device_input_x4 (u32 sw_if_index,
+ u32 * next0,
+ u32 * next1,
+ u32 * next2,
+ u32 * next3,
+ vlib_buffer_t * b0,
+ vlib_buffer_t * b1,
+ vlib_buffer_t * b2, vlib_buffer_t * b3)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u8 feature_arc_index = fm->device_input_feature_arc_index;
+ cm = &fm->feature_config_mains[feature_arc_index];
+
+ if (PREDICT_FALSE
+ (clib_bitmap_get
+ (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
+ {
+ /*
+ * Save next0 so that the last feature in the chain
+ * can skip ethernet-input if indicated...
+ */
+ u16 adv;
+
+ vnet_buffer (b0)->device_input_feat.saved_next_index = *next0;
+ adv = device_input_next_node_advance[*next0];
+ vnet_buffer (b0)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b0, -adv);
+
+ vnet_buffer (b1)->device_input_feat.saved_next_index = *next1;
+ adv = device_input_next_node_advance[*next1];
+ vnet_buffer (b1)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b1, -adv);
+
+ vnet_buffer (b2)->device_input_feat.saved_next_index = *next2;
+ adv = device_input_next_node_advance[*next2];
+ vnet_buffer (b2)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b2, -adv);
+
+ vnet_buffer (b3)->device_input_feat.saved_next_index = *next3;
+ adv = device_input_next_node_advance[*next3];
+ vnet_buffer (b3)->device_input_feat.buffer_advance = adv;
+ vlib_buffer_advance (b3, -adv);
+
+ b0->feature_arc_index = feature_arc_index;
+ b1->feature_arc_index = feature_arc_index;
+ b2->feature_arc_index = feature_arc_index;
+ b3->feature_arc_index = feature_arc_index;
+
+ b0->current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ b1->current_config_index = b0->current_config_index;
+ b2->current_config_index = b0->current_config_index;
+ b3->current_config_index = b0->current_config_index;
+
+ vnet_get_config_data (&cm->config_main, &b0->current_config_index,
+ next0, /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm->config_main, &b1->current_config_index,
+ next1, /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm->config_main, &b2->current_config_index,
+ next2, /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm->config_main, &b3->current_config_index,
+ next3, /* # bytes of config data */ 0);
+ }
+}
+
+#define VNET_FEATURES(...) (char*[]) { __VA_ARGS__, 0}
+
+clib_error_t *vnet_feature_arc_init (vlib_main_t * vm,
+ vnet_config_main_t * vcm,
+ char **feature_start_nodes,
+ int num_feature_start_nodes,
+ vnet_feature_registration_t *
+ first_reg, char ***feature_nodes);
+
+void vnet_interface_features_show (vlib_main_t * vm, u32 sw_if_index);
+
+#endif /* included_feature_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/feature/registration.c b/src/vnet/feature/registration.c
new file mode 100644
index 00000000..1deeeef9
--- /dev/null
+++ b/src/vnet/feature/registration.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+
+/**
+ * @file
+ * @brief Feature Subgraph Ordering.
+
+ Dynamically compute feature subgraph ordering by performing a
+ topological sort across a set of "feature A before feature B" and
+ "feature C after feature B" constraints.
+
+ Use the topological sort result to set up vnet_config_main_t's for
+ use at runtime.
+
+ Feature subgraph arcs are simple enough. They start at specific
+ fixed nodes, and end at specific fixed nodes. In between, a
+ per-interface current feature configuration dictates which
+ additional nodes each packet visits. Each so-called feature node
+ can [of course] drop any specific packet.
+
+ See ip4_forward.c, ip6_forward.c in this directory to see the
+ current rx-unicast, rx-multicast, and tx feature subgraph arc
+ definitions.
+
+ Let's say that we wish to add a new feature to the ip4 unicast
+ feature subgraph arc, which needs to run before @c ip4-lookup. In
+ either base code or a plugin,
+ <CODE><PRE>
+ \#include <vnet/feature/feature.h>
+ </PRE></CODE>
+
+ and add the new feature as shown:
+
+ <CODE><PRE>
+ VNET_FEATURE_INIT (ip4_lookup, static) =
+ {
+ .arch_name = "ip4-unicast",
+ .node_name = "my-ip4-unicast-feature",
+ .runs_before = VLIB_FEATURES ("ip4-lookup")
+ };
+ </PRE></CODE>
+
+ Here's the standard coding pattern to enable / disable
+ @c my-ip4-unicast-feature on an interface:
+
+ <CODE><PRE>
+
+ sw_if_index = <interface-handle>
+ vnet_feature_enable_disable ("ip4-unicast", "my-ip4-unicast-feature",
+ sw_if_index, 1 );
+ </PRE></CODE>
+
+ Here's how to obtain the correct next node index in packet
+ processing code, aka in the implementation of @c my-ip4-unicast-feature:
+
+ <CODE><PRE>
+ vnet_feature_next (sw_if_index0, &next0, b0);
+
+ </PRE></CODE>
+
+ Nodes are free to drop or otherwise redirect packets. Packets
+ which "pass" should be enqueued via the next0 arc computed by
+ vnet_feature_next.
+*/
+
+
+static int
+comma_split (u8 * s, u8 ** a, u8 ** b)
+{
+ *a = s;
+
+ while (*s && *s != ',')
+ s++;
+
+ if (*s == ',')
+ *s = 0;
+ else
+ return 1;
+
+ *b = (u8 *) (s + 1);
+ return 0;
+}
+
+/**
+ * @brief Initialize a feature graph arc
+ * @param vm vlib main structure pointer
+ * @param vcm vnet config main structure pointer
+ * @param feature_start_nodes names of start-nodes which use this
+ * feature graph arc
+ * @param num_feature_start_nodes number of start-nodes
+ * @param first_reg first element in
+ * [an __attribute__((constructor)) function built, or
+ * otherwise created] singly-linked list of feature registrations
+ * @param [out] in_feature_nodes returned vector of
+ * topologically-sorted feature node names, for use in
+ * show commands
+ * @returns 0 on success, otherwise an error message. Errors
+ * are fatal since they invariably involve mistyped node-names, or
+ * genuinely missing node-names
+ */
+clib_error_t *
+vnet_feature_arc_init (vlib_main_t * vm,
+ vnet_config_main_t * vcm,
+ char **feature_start_nodes,
+ int num_feature_start_nodes,
+ vnet_feature_registration_t * first_reg,
+ char ***in_feature_nodes)
+{
+ uword *index_by_name;
+ uword *reg_by_index;
+ u8 **node_names = 0;
+ u8 *node_name;
+ char **these_constraints;
+ char *this_constraint_c;
+ u8 **constraints = 0;
+ u8 *constraint_tuple;
+ u8 *this_constraint;
+ u8 **orig, **closure;
+ uword *p;
+ int i, j, k;
+ u8 *a_name, *b_name;
+ int a_index, b_index;
+ int n_features;
+ u32 *result = 0;
+ vnet_feature_registration_t *this_reg = 0;
+ char **feature_nodes = 0;
+ hash_pair_t *hp;
+ u8 **keys_to_delete = 0;
+
+ index_by_name = hash_create_string (0, sizeof (uword));
+ reg_by_index = hash_create (0, sizeof (uword));
+
+ this_reg = first_reg;
+
+ /* pass 1, collect feature node names, construct a before b pairs */
+ while (this_reg)
+ {
+ node_name = format (0, "%s%c", this_reg->node_name, 0);
+ hash_set (reg_by_index, vec_len (node_names), (uword) this_reg);
+
+ hash_set_mem (index_by_name, node_name, vec_len (node_names));
+
+ vec_add1 (node_names, node_name);
+
+ these_constraints = this_reg->runs_before;
+ while (these_constraints && these_constraints[0])
+ {
+ this_constraint_c = these_constraints[0];
+
+ constraint_tuple = format (0, "%s,%s%c", node_name,
+ this_constraint_c, 0);
+ vec_add1 (constraints, constraint_tuple);
+ these_constraints++;
+ }
+
+ these_constraints = this_reg->runs_after;
+ while (these_constraints && these_constraints[0])
+ {
+ this_constraint_c = these_constraints[0];
+
+ constraint_tuple = format (0, "%s,%s%c",
+ this_constraint_c, node_name, 0);
+ vec_add1 (constraints, constraint_tuple);
+ these_constraints++;
+ }
+
+ this_reg = this_reg->next;
+ }
+
+ n_features = vec_len (node_names);
+ orig = clib_ptclosure_alloc (n_features);
+
+ for (i = 0; i < vec_len (constraints); i++)
+ {
+ this_constraint = constraints[i];
+
+ if (comma_split (this_constraint, &a_name, &b_name))
+ return clib_error_return (0, "comma_split failed!");
+
+ p = hash_get_mem (index_by_name, a_name);
+ /*
+ * Note: the next two errors mean that something is
+ * b0rked. As in: if you code "A depends on B," and you forget
+ * to define a FEATURE_INIT macro for B, you lose.
+ * Nonexistent graph nodes are tolerated.
+ */
+ if (p == 0)
+ return clib_error_return (0, "feature node '%s' not found", a_name);
+ a_index = p[0];
+
+ p = hash_get_mem (index_by_name, b_name);
+ if (p == 0)
+ return clib_error_return (0, "feature node '%s' not found", b_name);
+ b_index = p[0];
+
+ /* add a before b to the original set of constraints */
+ orig[a_index][b_index] = 1;
+ vec_free (this_constraint);
+ }
+
+ /* Compute the positive transitive closure of the original constraints */
+ closure = clib_ptclosure (orig);
+
+ /* Compute a partial order across feature nodes, if one exists. */
+again:
+ for (i = 0; i < n_features; i++)
+ {
+ for (j = 0; j < n_features; j++)
+ {
+ if (closure[i][j])
+ goto item_constrained;
+ }
+ /* Item i can be output */
+ vec_add1 (result, i);
+ {
+ for (k = 0; k < n_features; k++)
+ closure[k][i] = 0;
+ /*
+ * Add a "Magic" a before a constraint.
+ * This means we'll never output it again
+ */
+ closure[i][i] = 1;
+ goto again;
+ }
+ item_constrained:
+ ;
+ }
+
+ /* see if we got a partial order... */
+ if (vec_len (result) != n_features)
+ return clib_error_return (0, "%d feature_init_cast no partial order!");
+
+ /*
+ * We win.
+ * Bind the index variables, and output the feature node name vector
+ * using the partial order we just computed. Result is in stack
+ * order, because the entry with the fewest constraints (e.g. none)
+ * is output first, etc.
+ */
+
+ for (i = n_features - 1; i >= 0; i--)
+ {
+ p = hash_get (reg_by_index, result[i]);
+ ASSERT (p != 0);
+ this_reg = (vnet_feature_registration_t *) p[0];
+ if (this_reg->feature_index_ptr)
+ *this_reg->feature_index_ptr = n_features - (i + 1);
+ this_reg->feature_index = n_features - (i + 1);
+ vec_add1 (feature_nodes, this_reg->node_name);
+ }
+
+ /* Set up the config infrastructure */
+ vnet_config_init (vm, vcm,
+ feature_start_nodes,
+ num_feature_start_nodes,
+ feature_nodes, vec_len (feature_nodes));
+
+ /* Save a copy for show command */
+ *in_feature_nodes = feature_nodes;
+
+ /* Finally, clean up all the shit we allocated */
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, index_by_name,
+ ({
+ vec_add1 (keys_to_delete, (u8 *)hp->key);
+ }));
+ /* *INDENT-ON* */
+ hash_free (index_by_name);
+ for (i = 0; i < vec_len (keys_to_delete); i++)
+ vec_free (keys_to_delete[i]);
+ vec_free (keys_to_delete);
+ hash_free (reg_by_index);
+ vec_free (result);
+ clib_ptclosure_free (orig);
+ clib_ptclosure_free (closure);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
new file mode 100644
index 00000000..b430e113
--- /dev/null
+++ b/src/vnet/fib/fib.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_path_list.h>
+
+static clib_error_t *
+fib_module_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, dpo_module_init)))
+ return (error);
+ if ((error = vlib_call_init_function (vm, adj_module_init)))
+ return (error);
+ if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
+ return (error);
+
+ fib_entry_module_init();
+ fib_entry_src_module_init();
+ fib_path_module_init();
+ fib_path_list_module_init();
+ fib_walk_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (fib_module_init);
diff --git a/src/vnet/fib/fib.h b/src/vnet/fib/fib.h
new file mode 100644
index 00000000..ec97c565
--- /dev/null
+++ b/src/vnet/fib/fib.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * A IP v4/6 independent FIB.
+ *
+ * The main functions provided by the FIB are as follows;
+ *
+ * - source priorities
+ *
+ * A route can be added to the FIB by more than entity or source. Sources
+ * include, but are not limited to, API, CLI, LISP, MAP, etc (for the full list
+ * see fib_entry.h). Each source provides the forwarding information (FI) that
+ * is has determined as required for that route. Since each source determines the
+ * FI using different best path and loop prevention algorithms, it is not
+ * correct for the FI of multiple sources to be combined. Instead the FIB must
+ * choose to use the FI from only one source. This choose is based on a static
+ * priority assignment. For example;
+ * IF a prefix is added as a result of interface configuration:
+ * set interface address 192.168.1.1/24 GigE0
+ * and then it is also added from the CLI
+ * ip route 192.168.1.1/32 via 2.2.2.2/32
+ * then the 'interface' source will prevail, and the route will remain as
+ * 'local'.
+ * The requirement of the FIB is to always install the FI from the winning
+ * source and thus to maintain the FI added by losing sources so it can be
+ * installed should the winning source be withdrawn.
+ *
+ * - adj-fib maintenance
+ *
+ * When ARP or ND discover a neighbour on a link an adjacency forms for the
+ * address of that neighbour. It is also required to insert a route in the
+ * appropriate FIB table, corresponding to the VRF for the link, an entry for
+ * that neighbour. This entry is often referred to as an adj-fib. Adj-fibs
+ * have a dedicated source; 'ADJ'.
+ * The priority of the ADJ source is lower than most. This is so the following
+ * config;
+ * set interface address 192.168.1.1/32 GigE0
+ * ip arp 192.168.1.2 GigE0 dead.dead.dead
+ * ip route add 192.168.1.2 via 10.10.10.10 GigE1
+ * will forward traffic for 192.168.1.2 via GigE1. That is the route added
+ * by the control plane is favoured over the adjacency discovered by ARP.
+ * The control plane, with its associated authentication, is considered the
+ * authoritative source.
+ * To counter the nefarious addition of adj-fib, through the nefarious injection
+ * of adjacencies, the FIB is also required to ensure that only adj-fibs whose
+ * less specific covering prefix is connected are installed in forwarding. This
+ * requires the use of 'cover tracking', where a route maintains a dependency
+ * relationship with the route that is its less specific cover. When this cover
+ * changes (i.e. there is a new covering route) or the forwarding information
+ * of the cover changes, then the covered route is notified.
+ *
+ * Overlapping sub-nets are not supported, so no adj-fib has multiple paths.
+ * The control plane is expected to remove a prefix configured for an interface
+ * before the interface changes VRF.
+ * So while the following config is accepted:
+ * set interface address 192.168.1.1/32 GigE0
+ * ip arp 192.168.1.2 GigE0 dead.dead.dead
+ * set interface ip table GigE0 2
+ * it does not result in the desired behaviour.
+ *
+ * - attached export.
+ *
+ * Further to adj-fib maintenance above consider the following config:
+ * set interface address 192.168.1.1/24 GigE0
+ * ip route add table 2 192.168.1.0/24 GigE0
+ * Traffic destined for 192.168.1.2 in table 2 will generate an ARP request
+ * on GigE0. However, since GigE0 is in table 0, all adj-fibs will be added in
+ * FIB 0. Hence all hosts in the sub-net are unreachable from table 2. To resolve
+ * this, all adj-fib and local prefixes are exported (i.e. copied) from the
+ * 'export' table 0, to the 'import' table 2. There can be many import tables
+ * for a single export table.
+ *
+ * - recursive route resolution
+ *
+ * A recursive route is of the form:
+ * 1.1.1.1/32 via 10.10.10.10
+ * i.e. a route for which no egress interface is provided. In order to forward
+ * traffic to 1.1.1.1/32 the FIB must therefore first determine how to forward
+ * traffic to 10.10.10.10/32. This is recursive resolution.
+ * Recursive resolution, just like normal resolution, proceeds via a longest
+ * prefix match for the 'via-address' 10.10.10.10. Note it is only possible
+ * to add routes via an address (i.e. a /32 or /128) not via a shorter mask
+ * prefix. There is no use case for the latter.
+ * Since recursive resolution proceeds via a longest prefix match, the entry
+ * in the FIB that will resolve the recursive route, termed the via-entry, may
+ * change as other routes are added to the FIB. Consider the recursive
+ * route shown above, and this non-recursive route:
+ * 10.10.10.0/24 via 192.168.16.1 GigE0
+ * The entry for 10.10.10.0/24 is thus the resolving via-entry. If this entry is
+ * modified, to say;
+ * 10.10.10.0/24 via 192.16.1.3 GigE0
+ * Then packet for 1.1.1.1/32 must also be sent to the new next-hop.
+ * Now consider the addition of;
+ * 10.10.10.0/28 via 192.168.16.2 GigE0
+ * The more specific /28 is a better longest prefix match and thus becomes the
+ * via-entry. Removal of the /28 means the resolution will revert to the /24.
+ * The tracking to the changes in recursive resolution is the requirement of
+ * the FIB. When the forwarding information of the via-entry changes a back-walk
+ * is used to update dependent recursive routes. When new routes are added to
+ * the table the cover tracking feature provides the necessary notifications to
+ * the via-entry routes.
+ * The adjacency constructed for 1.1.1.1/32 will be a recursive adjacency
+ * whose next adjacency will be contributed from the via-entry. Maintaining
+ * the validity of this recursive adjacency is a requirement of the FIB.
+ *
+ * - recursive loop avoidance
+ *
+ * Consider this set of routes:
+ * 1.1.1.1/32 via 2.2.2.2
+ * 2.2.2.2/32 via 3.3.3.3
+ * 3.3.3.3/32 via 1.1.1.1
+ * this is termed a recursion loop - all of the routes in the loop are
+ * unresolved in so far as they do not have a resolving adjacency, but each
+ * is resolved because the via-entry is known. It is important here to note
+ * the distinction between the control-plane objects and the data-plane objects
+ * (more details in the implementation section). The control plane objects must
+ * allow the loop to form (i.e. the graph becomes cyclic), however, the
+ * data-plane absolutely must not allow the loop to form, otherwise the packet
+ * would loop indefinitely and never egress the device - meltdown would follow.
+ * The control plane must allow the loop to form, because when the loop breaks,
+ * all members of the loop need to be updated. Forming the loop allows the
+ * dependencies to be correctly setup to allow this to happen.
+ * There is no limit to the depth of recursion supported by VPP so:
+ * 9.9.9.100/32 via 9.9.9.99
+ * 9.9.9.99/32 via 9.9.9.98
+ * 9.9.9.98/32 via 9.9.9.97
+ * ... turtles, turtles, turtles ...
+ * 9.9.9.1/32 via 10.10.10.10 Gig0
+ * is supported to as many layers of turtles is desired, however, when
+ * back-walking a graph (in this case from 9.9.9.1/32 up toward 9.9.9.100/32)
+ * a FIB needs to differentiate the case where the recursion is deep versus
+ * the case where the recursion is looped. A simple method, employed by VPP FIB,
+ * is to limit the number of steps. VPP FIB limit is 16. Typical BGP scenarios
+ * in the wild do not exceed 3 (BGP Inter-AS option C).
+ *
+ * - Fast Convergence
+ *
+ * After a network topology change, the 'convergence' time, is the time taken
+ * for the router to complete a transition to forward traffic using the new
+ * topology. The convergence time is therefore a summation of the time to;
+ * - detect the failure.
+ * - calculate the new 'best path' information
+ * - download the new best paths to the data-plane.
+ * - install those best best in data-plane forwarding.
+ * The last two points are of relevance to VPP architecture. The download API is
+ * binary and batch, details are not discussed here. There is no HW component to
+ * programme, installation time is bounded by the memory allocation and table
+ * lookup and insert access times.
+ *
+ * 'Fast' convergence refers to a set of technologies that a FIB can employ to
+ * completely or partially restore forwarding whilst the convergence actions
+ * listed above are ongoing. Fast convergence technologies are further
+ * sub-divided into Prefix Independent Convergence (PIC) and Loop Free
+ * Alternate path Fast re-route (LFA-FRR or sometimes called IP-FRR) which
+ * affect recursive and non-recursive routes respectively.
+ *
+ * LFA-FRR
+ *
+ * Consider the network topology below:
+ *
+ * C
+ * / \
+ * X -- A --- B - Y
+ * | |
+ * D F
+ * \ /
+ * E
+ *
+ * all links are equal cost, traffic is passing from X to Y. the best path is
+ * X-A-B-Y. There are two alternative paths, one via C and one via E. An
+ * alternate path is considered to be loop free if no other router on that path
+ * would forward the traffic back to the sender. Consider router C, its best
+ * path to Y is via B, so if A were to send traffic destined to Y to C, then C
+ * would forward that traffic to B - this is a loop-free alternate path. In
+ * contrast consider router D. D's shortest path to Y is via A, so if A were to
+ * send traffic destined to Y via D, then D would send it back to A; this is
+ * not a loop-free alternate path. There are several points of note;
+ * - we are considering the pre-failure routing topology
+ * - any equal-cost multi-path between A and B is also a LFA path.
+ * - in order for A to calculate LFA paths it must be aware of the best-path
+ * to Y from the perspective of D. These calculations are thus limited to
+ * routing protocols that have a full view of the network topology, i.e.
+ * link-state DB protocols like OSPF or an SDN controller. LFA protected
+ * prefixes are thus non-recursive.
+ *
+ * LFA is specified as a 1 to 1 redundancy; a primary path has only one LFA
+ * (a.k.a. backup) path. To my knowledge this limitation is one of complexity
+ * in the calculation of and capacity planning using a 1-n redundancy.
+ *
+ * In the event that the link A-B fails, the alternate path via C can be used.
+ * In order to provide 'fast' failover in the event of a failure, the control
+ * plane will download both the primary and the backup path to the FIB. It is
+ * then a requirement of the FIB to perform the failover (a.k.a cutover) from
+ * the primary to the backup path as quickly as possible, and particularly
+ * without any other control-plane intervention. The expectation is cutover is
+ * less than 50 milli-seconds - a value allegedly from the VOIP QoS. Note that
+ * cutover time still includes the fault detection time, which in a vitalised
+ * environment could be the dominant factor. Failure detection can be either a
+ * link down, which will affect multiple paths on a multi-access interface, or
+ * via a specific path heartbeat (i.e. BFD).
+ * At this time VPP does not support LFA, that is it does not support the
+ * installation of a primary and backup path[s] for a route. However, it does
+ * support ECMP, and VPP FIB is designed to quickly remove failed paths from
+ * the ECMP set, however, it does not insert shared objects specific to the
+ * protected resource into the forwarding object graph, since this would incur
+ * a forwarding/performance cost. Failover time is thus route number dependent.
+ * Details are provided in the implementation section below.
+ *
+ * PIC
+ *
+ * PIC refers to the concept that the converge time should be independent of
+ * the number of prefixes/routes that are affected by the failure. PIC is
+ * therefore most appropriate when considering networks with large number of
+ * prefixes, i.e. BGP networks and thus recursive prefixes. There are several
+ * flavours of PIC covering different locations of protection and failure
+ * scenarios. An outline is given below, see the literature for more details:
+ *
+ * Y/16 - CE1 -- PE1---\
+ * | \ P1---\
+ * | \ PE3 -- CE3 - X/16
+ * | - P2---/
+ * Y/16 - CE2 -- PE2---/
+ *
+ * CE = customer edge, PE = provider edge. external-BGP runs between customer
+ * and provider, internal-BGP runs between provider and provider.
+ *
+ * 1) iBGP PIC-core: consider traffic from CE1 to X/16 via CE3. On PE1 there is
+ * are routes;
+ * X/16 (and hundreds of thousands of others like it)
+ * via PE3
+ * and
+ * PE3/32 (its loopback address)
+ * via 10.0.0.1 Link0 (this is P1)
+ * via 10.1.1.1 Link1 (this is P2)
+ * the failure is the loss of link0 or link1
+ * As in all PIC scenarios, in order to provide prefix independent convergence
+ * it must be that the route for X/16 (and all other routes via PE3) do not
+ * need to be updated in the FIB. The FIB therefore needs to update a single
+ * object that is shared by all routes - once this shared object is updated,
+ * then all routes using it will be instantly updated to use the new forwarding
+ * information. In this case the shared object is the resolving route via PE3.
+ * Once the route via PE3 is updated via IGP (OSPF) convergence, then all
+ * recursive routes that resolve through it are also updated. VPP FIB
+ * implements this scenario via a recursive-adjacency. the X/16 and it sibling
+ * routes share a recursive-adjacency that links to/points at/stacks on the
+ * normal adjacency contributed by the route for PE3. Once this shared
+ * recursive adj is re-linked then all routes are switched to using the new
+ * forwarding information. This is shown below;
+ *
+ * pre-failure;
+ * X/16 --> R-ADJ-1 --> ADJ-1-PE3 (multi-path via P1 and P2)
+ *
+ * post-failure:
+ * X/16 --> R-ADJ-1 --> ADJ-2-PE3 (single path via P1)
+ *
+ * note that R-ADJ-1 (the recursive adj) remains in the forwarding graph,
+ * therefore X/16 (and all its siblings) is not updated.
+ * X/16 and its siblings share the recursive adj since they share the same
+ * path-list. It is the path-list object that contributes the recursive-adj
+ * (see next section for more details)
+ *
+ *
+ * 2) iBGP PIC-edge; Traffic from CE3 to Y/16. On PE3 there is are routes;
+ * Y/16 (and hundreds of thousands of others like it)
+ * via PE1
+ * via PE2
+ * and
+ * PE1/32 (PE1's loopback address)
+ * via 10.0.2.2 Link0 (this is P1)
+ * PE2/32 (PE2's loopback address)
+ * via 10.0.3.3 Link1 (this is P2)
+ *
+ * the failure is the loss of reachability to PE2. this could be either the
+ * loss of the link P2-PE2 or the loss of the node PE2. This is detected either
+ * by the withdrawal of the PE2's loopback route or by some form of failure
+ * detection (i.e. BFD).
+ * VPP FIB again provides PIC via the use of the shared recursive-adj. Y/16 and
+ * its siblings will again share a path-list for the list {PE1,PE2}, this
+ * path-list will contribute a multi-path-recursive-adj, i.e. a multi-path-adj
+ * with each choice therein being another adj;
+ *
+ * Y/16 -> RM-ADJ --> ADJ1 (for PE1)
+ * --> ADJ2 (for PE2)
+ *
+ * when the route for PE1 is withdrawn then the multi-path-recursive-adjacency
+ * is updated to be;
+ *
+ * Y/16 --> RM-ADJ --> ADJ1 (for PE1)
+ * --> ADJ1 (for PE1)
+ *
+ * that is both choices in the ECMP set are the same and thus all traffic is
+ * forwarded to PE1. Eventually the control plane will download a route update
+ * for Y/16 to be via PE1 only. At that time the situation will be:
+ *
+ * Y/16 -> R-ADJ --> ADJ1 (for PE1)
+ *
+ * In the scenario above we assumed that PE1 and PE2 are ECMP for Y/16. eBGP
+ * PIC core is also specified for the case were one PE is primary and the other
+ * backup - VPP FIB does not support that case at this time.
+ *
+ * 3) eBGP PIC Edge; Traffic from CE3 to Y/16. On PE1 there is are routes;
+ * Y/16 (and hundreds of thousands of others like it)
+ * via CE1 (primary)
+ * via PE2 (backup)
+ * and
+ * CE1 (this is an adj-fib)
+ * via 11.0.0.1 Link0 (this is CE1) << this is an adj-fib
+ * PE2 (PE2's loopback address)
+ * via 10.0.5.5 Link1 (this is link PE1-PE2)
+ * the failure is the loss of link0 to CE1. The failure can be detected by FIB
+ * either as a link down event or by the control plane withdrawing the connected
+ * prefix on the link0 (say 10.0.5.4/30). The latter works because the resolving
+ * entry is an adj-fib, so removing the connected will withdraw the adj-fib, and
+ * hence the recursive path becomes unresolved. The former is faster,
+ * particularly in the case of Inter-AS option A where there are many VLAN
+ * sub-interfaces on the PE-CE link, one for each VRF, and so the control plane
+ * must remove the connected prefix for each sub-interface to trigger PIC in
+ * each VRF. Note though that total PIC cutover time will depend on VRF scale
+ * with either trigger.
+ * Primary and backup paths in this eBGP PIC-edge scenario are calculated by
+ * BGP. Each peer is configured to always advertise its best external path to
+ * its iBGP peers. Backup paths therefore send traffic from the PE back into the
+ * core to an alternate PE. A PE may have multiple external paths, i.e. multiple
+ * directly connected CEs, it may also have multiple backup PEs, however there
+ * is no correlation between the two, so unlike LFA-FRR, the redundancy model is
+ * N-M; N primary paths are backed-up by M backup paths - only when all primary
+ * paths fail, then the cutover is performed onto the M backup paths. Note that
+ * PE2 must be suitably configured to forward traffic on its external path that
+ * was received from PE1. VPP FIB does not support external-internal-BGP (eiBGP)
+ * load-balancing.
+ *
+ * As with LFA-FRR the use of primary and backup paths is not currently
+ * supported, however, the use of a recursive-multi-path-adj, and a suitably
+ * constrained hashing algorithm to choose from the primary or backup path sets,
+ * would again provide the necessary shared object and hence the prefix scale
+ * independent cutover.
+ *
+ * Astute readers will recognise that both of the eBGP PIC scenarios refer only
+ * to a BGP free core.
+ *
+ * Fast convergence implementation options come in two flavours:
+ * 1) Insert switches into the data-path. The switch represents the protected
+ * resource. If the switch is 'on' the primary path is taken, otherwise
+ * the backup path is taken. Testing the switch in the data-path comes with
+ * an associated performance cost. A given packet may encounter more than
+ * one protected resource as it is forwarded. This approach minimises
+ * cutover times as packets will be forwarded on the backup path as soon
+ * as the protected resource is detected to be down and the single switch
+ * is tripped. However, it comes at a performance cost, which increases
+ * with each shared resource a packet encounters in the data-path.
+ * This approach is thus best suited to LFA-FRR where the protected routes
+ * are non-recursive (i.e. encounter few shared resources) and the
+ * expectation on cutover times is more stringent (<50msecs).
+ * 2) Update shared objects. Identify objects in the data-path, that are
+ * required to be present whether or not fast convergence is required (i.e.
+ * adjacencies) that can be shared by multiple routes. Create a dependency
+ * between these objects at the protected resource. When the protected
+ * resource fails, each of the shared objects is updated in a way that all
+ * users of it see a consistent change. This approach incurs no performance
+ * penalty as the data-path structure is unchanged, however, the cutover
+ * times are longer as more work is required when the resource fails. This
+ * scheme is thus more appropriate to recursive prefixes (where the packet
+ * will encounter multiple protected resources) and to fast-convergence
+ * technologies where the cutover times are less stringent (i.e. PIC).
+ *
+ * Implementation:
+ * ---------------
+ *
+ * Due to the requirements outlined above, not all routes known to FIB
+ * (e.g. adj-fibs) are installed in forwarding. However, should circumstances
+ * change, those routes will need to be added. This adds the requirement that
+ * a FIB maintains two tables per-VRF, per-AF (where a 'table' is indexed by
+ * prefix); the forwarding and non-forwarding tables.
+ *
+ * For DP speed in VPP we want the lookup in the forwarding table to directly
+ * result in the ADJ. So the two tables; one contains all the routes (a
+ * lookup therein yields a fib_entry_t), the other contains only the forwarding
+ * routes (a lookup therein yields an ip_adjacency_t). The latter is used by the
+ * DP.
+ * This trades memory for forwarding performance. A good trade-off in VPP's
+ * expected operating environments.
+ *
+ * Note these tables are keyed only by the prefix (and since there 2 two
+ * per-VRF, implicitly by the VRF too). The key for an adjacency is the
+ * tuple:{next-hop, address (and it's AF), interface, link/ether-type}.
+ * consider this curious, but allowed, config;
+ *
+ * set int ip addr 10.0.0.1/24 Gig0
+ * set ip arp Gig0 10.0.0.2 dead.dead.dead
+ * # a host in that sub-net is routed via a better next hop (say it avoids a
+ * # big L2 domain)
+ * ip route add 10.0.0.2 Gig1 192.168.1.1
+ * # this recursive should go via Gig1
+ * ip route add 1.1.1.1/32 via 10.0.0.2
+ * # this non-recursive should go via Gig0
+ * ip route add 2.2.2.2/32 via Gig0 10.0.0.2
+ *
+ * for the last route, the lookup for the path (via {Gig0, 10.0.0.2}) in the
+ * prefix table would not yield the correct result. To fix this we need a
+ * separate table for the adjacencies.
+ *
+ * - FIB data structures;
+ *
+ * fib_entry_t:
+ * - a representation of a route.
+ * - has a prefix.
+ * - it maintains an array of path-lists that have been contributed by the
+ * different sources
+ * - install an adjacency in the forwarding table contributed by the best
+ * source's path-list.
+ *
+ * fib_path_list_t:
+ * - a list of paths
+ * - path-lists may be shared between FIB entries. The path-lists are thus
+ * kept in a DB. The key is the combined description of the paths. We share
+ * path-lists when it will aid convergence to do so. Adding path-lists to
+ * this DB that are never shared, or are not shared by prefixes that are
+ * not subject to PIC, will increase the size of the DB unnecessarily and
+ * may lead to increased search times due to hash collisions.
+ * - the path-list contributes the appropriate adj for the entry in the
+ * forwarding table. The adj can be 'normal', multi-path or recursive,
+ * depending on the number of paths and their types.
+ * - since path-lists are shared there is only one instance of the multi-path
+ * adj that they [may] create. As such multi-path adjacencies do not need a
+ * separate DB.
+ * The path-list with recursive paths and the recursive adjacency that it
+ * contributes forms the backbone of the fast convergence architecture (as
+ * described previously).
+ *
+ * fib_path_t:
+ * - a description of how to forward the traffic (i.e. via {Gig1, K}).
+ * - the path describes the intent on how to forward. This differs from how
+ * the path resolves. I.e. it might not be resolved at all (since the
+ * interface is deleted or down).
+ * - paths have different types, most notably recursive or non-recursive.
+ * - a fib_path_t will contribute the appropriate adjacency object. It is from
+ * these contributions that the DP graph/chain for the route is built.
+ * - if the path is recursive and a recursion loop is detected, then the path
+ * will contribute the special DROP adjacency. This way, whilst the control
+ * plane graph is looped, the data-plane graph does not.
+ *
+ * we build a graph of these objects;
+ *
+ * fib_entry_t -> fib_path_list_t -> fib_path_t -> ...
+ *
+ * for recursive paths:
+ *
+ * fib_path_t -> fib_entry_t -> ....
+ *
+ * for non-recursive paths
+ *
+ * fib_path_t -> ip_adjacency_t -> interface
+ *
+ * These objects, which constitute the 'control plane' part of the FIB are used
+ * to represent the resolution of a route. As a whole this is referred to as the
+ * control plane graph. There is a separate DP graph to represent the forwarding
+ * of a packet. In the DP graph each object represents an action that is applied
+ * to a packet as it traverses the graph. For example, a lookup of a IP address
+ * in the forwarding table could result in the following graph:
+ *
+ * recursive-adj --> multi-path-adj --> interface_A
+ * --> interface_B
+ *
+ * A packet traversing this FIB DP graph would thus also traverse a VPP node
+ * graph of:
+ *
+ * ipX_recursive --> ipX_rewrite --> interface_A_tx --> etc
+ *
+ * The taxonomy of objects in a FIB graph is as follows, consider;
+ *
+ * A -->
+ * B --> D
+ * C -->
+ *
+ * Where A,B and C are (for example) routes that resolve through D.
+ * parent; D is the parent of A, B, and C.
+ * children: A, B, and C are children of D.
+ * sibling: A, B and C are siblings of one another.
+ *
+ * All shared objects in the FIB are reference counted. Users of these objects
+ * are thus expected to use the add_lock/unlock semantics (as one would
+ * normally use malloc/free).
+ *
+ * WALKS
+ *
+ * It is necessary to walk/traverse the graph forwards (entry to interface) to
+ * perform a collapse or build a recursive adj and backwards (interface
+ * to entry) to perform updates, i.e. when interface state changes or when
+ * recursive route resolution updates occur.
+ * A forward walk follows simply by navigating an object's parent pointer to
+ * access its parent object. For objects with multiple parents (e.g. a
+ * path-list), each parent is walked in turn.
+ * To support back-walks direct dependencies are maintained between objects,
+ * i.e. in the relationship, {A, B, C} --> D, then object D will maintain a list
+ * of 'pointers' to its children {A, B, C}. Bare C-language pointers are not
+ * allowed, so a pointer is described in terms of an object type (i.e. entry,
+ * path-list, etc) and index - this allows the object to be retrieved from the
+ * appropriate pool. A list is maintained to achieve fast convergence at scale.
+ * When there are millions or recursive prefixes, it is very inefficient to
+ * blindly walk the tables looking for entries that were affected by a given
+ * topology change. The lowest hanging fruit when optimising is to remove
+ * actions that are not required, so all back-walks only traverse objects that
+ * are directly affected by the change.
+ *
+ * PIC Core and fast-reroute rely on FIB reacting quickly to an interface
+ * state change to update the multi-path-adjacencies that use this interface.
+ * An example graph is shown below:
+ *
+ * E_a -->
+ * E_b --> PL_2 --> P_a --> Interface_A
+ * ... --> P_c -\
+ * E_k --> \
+ * Interface_K
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_f --> Interface_F
+ * E_z -->
+ *
+ * E = fib_entry_t
+ * PL = fib_path_list_t
+ * P = fib_path_t
+ * The subscripts are arbitrary and serve only to distinguish object instances.
+ * This CP graph result in the following DP graph:
+ *
+ * M-ADJ-2 --> Interface_A
+ * \
+ * -> Interface_K
+ * /
+ * M-ADJ-1 --> Interface_F
+ *
+ * M-ADJ = multi-path-adjacency.
+ *
+ * When interface K goes down a back-walk is started over its dependants in the
+ * control plane graph. This back-walk will reach PL_1 and PL_2 and result in
+ * the calculation of new adjacencies that have interface K removed. The walk
+ * will continue to the entry objects and thus the forwarding table is updated
+ * for each prefix with the new adjacency. The DP graph then becomes:
+ *
+ * ADJ-3 --> Interface_A
+ *
+ * ADJ-4 --> Interface_F
+ *
+ * The eBGP PIC scenarios described above relied on the update of a path-list's
+ * recursive-adjacency to provide the shared point of cutover. This is shown
+ * below
+ *
+ * E_a -->
+ * E_b --> PL_2 --> P_a --> E_44 --> PL_a --> P_b --> Interface_A
+ * ... --> P_c -\
+ * E_k --> \
+ * \
+ * E_1 --> PL_k -> P_k --> Interface_K
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_f --> E_55 --> PL_e --> P_e --> Interface_E
+ * E_z -->
+ *
+ * The failure scenario is the removal of entry E_1 and thus the paths P_c and
+ * P_d become unresolved. To achieve PIC the two shared recursive path-lists,
+ * PL_1 and PL_2 must be updated to remove E_1 from the recursive-multi-path-
+ * adjacencies that they contribute, before any entry E_a to E_z is updated.
+ * This means that as the update propagates backwards (right to left) in the
+ * graph it must do so breadth first not depth first. Note this approach leads
+ * to convergence times that are dependent on the number of path-list and so
+ * the number of combinations of egress PEs - this is desirable as this
+ * scale is considerably lower than the number of prefixes.
+ *
+ * If we consider another section of the graph that is similar to the one
+ * shown above where there is another prefix E_2 in a similar position to E_1
+ * and so also has many dependent children. It is reasonable to expect that a
+ * particular network failure may simultaneously render E_1 and E_2 unreachable.
+ * This means that the update to withdraw E_2 is download immediately after the
+ * update to withdraw E_1. It is a requirement on the FIB to not spend large
+ * amounts of time in a back-walk whilst processing the update for E_1, i.e. the
+ * back-walk must not reach as far as E_a and its siblings. Therefore, after the
+ * back-walk has traversed one generation (breadth first) to update all the
+ * path-lists it should be suspended/back-ground and further updates allowed
+ * to be handled. Once the update queue is empty, the suspended walks can be
+ * resumed. Note that in the case that multiple updates affect the same entry
+ * (say E_1) then this will trigger multiple similar walks, these are merged,
+ * so each child is updated only once.
+ * In the presence of more layers of recursion PIC is still a desirable
+ * feature. Consider an extension to the diagram above, where more recursive
+ * routes (E_100 -> E_200) are added as children of E_a:
+ *
+ * E_100 -->
+ * E_101 --> PL_3 --> P_j-\
+ * ... \
+ * E_199 --> E_a -->
+ * E_b --> PL_2 --> P_a --> E_44 --> ...etc..
+ * ... --> P_c -\
+ * E_k \
+ * E_1 --> ...etc..
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_e --> E_55 --> ...etc..
+ * E_z -->
+ *
+ * To achieve PIC for the routes E_100->E_199, PL_3 needs to be updated before
+ * E_b -> E_z, a breadth first traversal at each level would not achieve this.
+ * Instead the walk must proceed intelligently. Children on PL_2 are sorted so
+ * those Entry objects that themselves have children appear first in the list,
+ * those without later. When an entry object is walked that has children, a
+ * walk of its children is pushed to the front background queue. The back
+ * ground queue is a priority queue. As the breadth first traversal proceeds
+ * across the dependent entry object E_a to E_k, when the first entry that does
+ * not have children is reached (E_b), the walk is suspended and placed at the
+ * back of the queue. Following this prioritisation method shared path-list
+ * updates are performed before all non-resolving entry objects.
+ * The CPU/core/thread that handles the updates is the same thread that handles
+ * the back-walks. Handling updates has a higher priority than making walk
+ * progress, so a walk is required to be interruptable/suspendable when new
+ * updates are available.
+ * !!! TODO - this section describes how walks should be not how they are !!!
+ *
+ * In the diagram above E_100 is an IP route, however, VPP has no restrictions
+ * on the type of object that can be a dependent of a FIB entry. Children of
+ * a FIB entry can be (and are) GRE & VXLAN tunnels endpoints, L2VPN LSPs etc.
+ * By including all object types into the graph and extending the back-walk, we
+ * can thus deliver fast convergence to technologies that overlay on an IP
+ * network.
+ *
+ * If having read all the above carefully you are still thinking; 'i don't need
+ * all this %&$* i have a route only I know about and I just need to jam it in',
+ * then fib_table_entry_special_add() is your only friend.
+ */
+
+#ifndef __FIB_H__
+#define __FIB_H__
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+
+#endif
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
new file mode 100644
index 00000000..f5a107ca
--- /dev/null
+++ b/src/vnet/fib/fib_api.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_API_H__
+#define __FIB_API_H__
+
+
+int
+add_del_route_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index,
+ dpo_proto_t next_hop_table_proto,
+ u32 next_hop_table_id,
+ u8 is_rpf_id,
+ u32 * fib_index, u32 * next_hop_fib_index);
+
+int
+add_del_route_t_handler (u8 is_multipath,
+ u8 is_add,
+ u8 is_drop,
+ u8 is_unreach,
+ u8 is_prohibit,
+ u8 is_local,
+ u8 is_multicast,
+ u8 is_classify,
+ u32 classify_table_index,
+ u8 is_resolve_host,
+ u8 is_resolve_attached,
+ u8 is_interface_rx,
+ u8 is_rpf_id,
+ u32 fib_index,
+ const fib_prefix_t * prefix,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u8 next_hop_fib_index,
+ u16 next_hop_weight,
+ u16 next_hop_preference,
+ mpls_label_t next_hop_via_label,
+ mpls_label_t * next_hop_out_label_stack);
+
+void
+copy_fib_next_hop (fib_route_path_encode_t * api_rpath,
+ void * fp_arg);
+
+#endif /* __FIB_API_H__ */
diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c
new file mode 100644
index 00000000..cc8ebc86
--- /dev/null
+++ b/src/vnet/fib/fib_attached_export.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/fib/fib_attached_export.h>
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_entry_delegate.h>
+
+/**
+ * A description of the need to import routes from the export table
+ */
+typedef struct fib_ae_import_t_
+{
+ /**
+ * The entry in the epxort table that this importer
+ * is importing covereds from
+ */
+ fib_node_index_t faei_export_entry;
+
+ /**
+ * The attached entry in the import table
+ */
+ fib_node_index_t faei_import_entry;
+ /**
+ * the sibling index on the cover
+ */
+ u32 faei_export_sibling;
+
+ /**
+ * The index of the exporter tracker. Not set if the
+ * export entry is not valid for export
+ */
+ fib_node_index_t faei_exporter;
+
+ /**
+ * A vector/list of imported entry indicies
+ */
+ fib_node_index_t *faei_importeds;
+
+ /**
+ * The FIB index and prefix we are tracking
+ */
+ fib_node_index_t faei_export_fib;
+ fib_prefix_t faei_prefix;
+
+ /**
+ * The FIB index we are importing into
+ */
+ fib_node_index_t faei_import_fib;
+} fib_ae_import_t;
+
+/**
+ * A description of the need to export routes to one or more export tables
+ */
+typedef struct fib_ae_export_t_ {
+ /**
+ * The vector/list of import tracker indicies
+ */
+ fib_node_index_t *faee_importers;
+
+ /**
+ * THe connected entry this export is acting on behalf of
+ */
+ fib_node_index_t faee_ei;
+
+ /**
+ * Reference counting locks
+ */
+ u32 faee_locks;
+} fib_ae_export_t;
+
+/*
+ * memory pools for the importers and exportes
+ */
+static fib_ae_import_t *fib_ae_import_pool;
+static fib_ae_export_t *fib_ae_export_pool;
+
+static fib_ae_export_t *
+fib_entry_ae_add_or_lock (fib_node_index_t connected)
+{
+ fib_entry_delegate_t *fed;
+ fib_ae_export_t *export;
+ fib_entry_t *entry;
+
+ entry = fib_entry_get(connected);
+ fed = fib_entry_delegate_get(entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+
+ if (NULL == fed)
+ {
+ fed = fib_entry_delegate_find_or_add(entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+ pool_get(fib_ae_export_pool, export);
+ memset(export, 0, sizeof(*export));
+
+ fed->fd_index = (export - fib_ae_export_pool);
+ export->faee_ei = connected;
+ }
+ else
+ {
+ export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index);
+ }
+
+ export->faee_locks++;
+
+ return (export);
+}
+
+static void
+fib_entry_import_remove (fib_ae_import_t *import,
+ fib_node_index_t entry_index)
+{
+ fib_prefix_t prefix;
+ u32 index;
+
+ /*
+ * find the index in the vector of the entry we are removing
+ */
+ index = vec_search(import->faei_importeds, entry_index);
+
+ if (index < vec_len(import->faei_importeds))
+ {
+ /*
+ * this is an entry that was previsouly imported
+ */
+ fib_entry_get_prefix(entry_index, &prefix);
+
+ fib_table_entry_special_remove(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE);
+
+ fib_entry_unlock(entry_index);
+ vec_del1(import->faei_importeds, index);
+ }
+}
+
+static void
+fib_entry_import_add (fib_ae_import_t *import,
+ fib_node_index_t entry_index)
+{
+ fib_node_index_t *existing;
+ fib_prefix_t prefix;
+
+ /*
+ * ensure we only add the exported entry once, since
+ * sourcing prefixes in the table is reference counted
+ */
+ vec_foreach(existing, import->faei_importeds)
+ {
+ if (*existing == entry_index)
+ {
+ return;
+ }
+ }
+
+ /*
+ * this is the first time this export entry has been imported
+ * Add it to the import FIB and to the list of importeds
+ */
+ fib_entry_get_prefix(entry_index, &prefix);
+
+ /*
+ * don't import entries that have the same prefix the import entry
+ */
+ if (0 != fib_prefix_cmp(&prefix,
+ &import->faei_prefix))
+ {
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(entry_index);
+
+ if (dpo_id_is_valid(dpo))
+ {
+ fib_table_entry_special_dpo_add(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE,
+ (fib_entry_get_flags(entry_index) |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ load_balance_get_bucket(dpo->dpoi_index, 0));
+
+ fib_entry_lock(entry_index);
+ vec_add1(import->faei_importeds, entry_index);
+ }
+ /*
+ * else
+ * the entry currently has no valid forwarding. when it
+ * does it will export itself
+ */
+ }
+}
+
+/**
+ * Call back when walking a connected prefix's covered prefixes for import
+ */
+static int
+fib_entry_covered_walk_import (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx)
+{
+ fib_ae_import_t *import = ctx;
+
+ fib_entry_import_add(import, covered);
+
+ return (0);
+}
+
+/*
+ * fib_entry_ae_import_add
+ *
+ * Add an importer to a connected entry
+ */
+static void
+fib_ae_export_import_add (fib_ae_export_t *export,
+ fib_ae_import_t *import)
+{
+ fib_entry_t *entry;
+
+ import->faei_exporter = (export - fib_ae_export_pool);
+ entry = fib_entry_get(export->faee_ei);
+
+ fib_entry_cover_walk(entry,
+ fib_entry_covered_walk_import,
+ import);
+}
+
+void
+fib_attached_export_import (fib_entry_t *fib_entry,
+ fib_node_index_t export_fib)
+{
+ fib_entry_delegate_t *fed;
+ fib_ae_import_t *import;
+ fib_node_index_t fei;
+
+ /*
+ * save index for later post-realloc retreival
+ */
+ fei = fib_entry_get_index(fib_entry);
+
+ pool_get(fib_ae_import_pool, import);
+
+ import->faei_import_fib = fib_entry->fe_fib_index;
+ import->faei_export_fib = export_fib;
+ import->faei_prefix = fib_entry->fe_prefix;
+ import->faei_import_entry = fib_entry_get_index(fib_entry);
+ import->faei_export_sibling = ~0;
+
+ /*
+ * do an exact match in the export table
+ */
+ import->faei_export_entry =
+ fib_table_lookup_exact_match(import->faei_export_fib,
+ &import->faei_prefix);
+
+ if (FIB_NODE_INDEX_INVALID == import->faei_export_entry)
+ {
+ /*
+ * no exact matching entry in the export table. can't be good.
+ * track the next best thing
+ */
+ import->faei_export_entry =
+ fib_table_lookup(import->faei_export_fib,
+ &import->faei_prefix);
+ import->faei_exporter = FIB_NODE_INDEX_INVALID;
+ }
+ else
+ {
+ /*
+ * found the entry in the export table. import the
+ * the prefixes that it covers.
+ * only if the prefix found in the export FIB really is
+ * attached do we want to import its covered
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED &
+ fib_entry_get_flags_i(fib_entry_get(import->faei_export_entry)))
+ {
+ fib_ae_export_t *export;
+
+ export = fib_entry_ae_add_or_lock(import->faei_export_entry);
+ vec_add1(export->faee_importers, (import - fib_ae_import_pool));
+ fib_ae_export_import_add(export, import);
+ }
+ }
+
+ /*
+ * track the entry in the export table so we can update appropriately
+ * when it changes.
+ * Exporting prefixes will have allocated new fib_entry_t objects, so the pool
+ * may have realloc'd.
+ */
+ fib_entry = fib_entry_get(fei);
+ import->faei_export_sibling =
+ fib_entry_cover_track(fib_entry_get(import->faei_export_entry), fei);
+
+ fed = fib_entry_delegate_find_or_add(fib_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_IMPORT);
+ fed->fd_index = (import - fib_ae_import_pool);
+}
+
+/**
+ * \brief All the imported entries need to be pruged
+ */
+void
+fib_attached_export_purge (fib_entry_t *fib_entry)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = fib_entry_delegate_get(fib_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_IMPORT);
+
+ if (NULL != fed)
+ {
+ fib_node_index_t *import_index;
+ fib_entry_t *export_entry;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index);
+
+ /*
+ * remove each imported entry
+ */
+ vec_foreach(import_index, import->faei_importeds)
+ {
+ fib_prefix_t prefix;
+
+ fib_entry_get_prefix(*import_index, &prefix);
+
+ fib_table_entry_delete(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE);
+ fib_entry_unlock(*import_index);
+ }
+ vec_free(import->faei_importeds);
+
+ /*
+ * stop tracking the export entry
+ */
+ if (~0 != import->faei_export_sibling)
+ {
+ fib_entry_cover_untrack(fib_entry_get(import->faei_export_entry),
+ import->faei_export_sibling);
+ }
+ import->faei_export_sibling = ~0;
+
+ /*
+ * remove this import tracker from the export's list,
+ * if it is attached to one. It won't be in the case the tracked
+ * export entry is not an attached exact match.
+ */
+ if (FIB_NODE_INDEX_INVALID != import->faei_exporter)
+ {
+ fib_entry_delegate_t *fed;
+
+ export_entry = fib_entry_get(import->faei_export_entry);
+
+ fed = fib_entry_delegate_get(export_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+ ASSERT(NULL != fed);
+
+ export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index);
+
+ u32 index = vec_search(export->faee_importers,
+ (import - fib_ae_import_pool));
+
+ ASSERT(index < vec_len(export->faee_importers));
+ vec_del1(export->faee_importers, index);
+
+ /*
+ * free the exporter if there are no longer importers
+ */
+ if (0 == --export->faee_locks)
+ {
+ pool_put(fib_ae_export_pool, export);
+ fib_entry_delegate_remove(export_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+ }
+ }
+
+ /*
+ * free the import tracker
+ */
+ pool_put(fib_ae_import_pool, import);
+ fib_entry_delegate_remove(fib_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_IMPORT);
+ }
+}
+
+void
+fib_attached_export_covered_added (fib_entry_t *cover,
+ fib_node_index_t covered)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = fib_entry_delegate_get(cover,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+
+ if (NULL != fed)
+ {
+ /*
+ * the covering prefix is exporting to other tables
+ */
+ fib_node_index_t *import_index;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index);
+
+ /*
+ * export the covered entry to each of the importers
+ */
+ vec_foreach(import_index, export->faee_importers)
+ {
+ import = pool_elt_at_index(fib_ae_import_pool, *import_index);
+
+ fib_entry_import_add(import, covered);
+ }
+ }
+}
+
+void
+fib_attached_export_covered_removed (fib_entry_t *cover,
+ fib_node_index_t covered)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = fib_entry_delegate_get(cover,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
+
+ if (NULL != fed)
+ {
+ /*
+ * the covering prefix is exporting to other tables
+ */
+ fib_node_index_t *import_index;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index);
+
+ /*
+ * remove the covered entry from each of the importers
+ */
+ vec_foreach(import_index, export->faee_importers)
+ {
+ import = pool_elt_at_index(fib_ae_import_pool, *import_index);
+
+ fib_entry_import_remove(import, covered);
+ }
+ }
+}
+
+static void
+fib_attached_export_cover_modified_i (fib_entry_t *fib_entry)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = fib_entry_delegate_get(fib_entry,
+ FIB_ENTRY_DELEGATE_ATTACHED_IMPORT);
+
+ if (NULL != fed)
+ {
+ fib_ae_import_t *import;
+ u32 export_fib;
+
+ /*
+ * safe the temporaries we need from the existing import
+ * since it will be toast after the purge.
+ */
+ import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index);
+ export_fib = import->faei_export_fib;
+
+ /*
+ * keep it simple. purge anything that was previously imported.
+ * then re-evaluate the need to import.
+ */
+ fib_attached_export_purge(fib_entry);
+ fib_attached_export_import(fib_entry, export_fib);
+ }
+}
+
+/**
+ * \brief If this entry is tracking a cover (in another table)
+ * then that cover has changed. re-evaluate import.
+ */
+void
+fib_attached_export_cover_change (fib_entry_t *fib_entry)
+{
+ fib_attached_export_cover_modified_i(fib_entry);
+}
+
+/**
+ * \brief If this entry is tracking a cover (in another table)
+ * then that cover has been updated. re-evaluate import.
+ */
+void
+fib_attached_export_cover_update (fib_entry_t *fib_entry)
+{
+ fib_attached_export_cover_modified_i(fib_entry);
+}
+
+u8*
+fib_ae_import_format (fib_node_index_t impi,
+ u8* s)
+{
+ fib_node_index_t *index;
+ fib_ae_import_t *import;
+
+ import = pool_elt_at_index(fib_ae_import_pool, impi);
+
+ s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool));
+ s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix);
+ s = format(s, "export-entry:%d ", import->faei_export_entry);
+ s = format(s, "export-sibling:%d ", import->faei_export_sibling);
+ s = format(s, "exporter:%d ", import->faei_exporter);
+ s = format(s, "export-fib:%d ", import->faei_export_fib);
+
+ s = format(s, "import-entry:%d ", import->faei_import_entry);
+ s = format(s, "import-fib:%d ", import->faei_import_fib);
+
+ s = format(s, "importeds:[");
+ vec_foreach(index, import->faei_importeds)
+ {
+ s = format(s, "%d, ", *index);
+ }
+ s = format(s, "]]");
+
+ return (s);
+}
+
+u8*
+fib_ae_export_format (fib_node_index_t expi,
+ u8* s)
+{
+ fib_node_index_t *index;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, expi);
+
+ s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool));
+ s = format(s, "export-entry:%d ", export->faee_ei);
+
+ s = format(s, "importers:[");
+ vec_foreach(index, export->faee_importers)
+ {
+ s = format(s, "%d, ", *index);
+ }
+ s = format(s, "]]");
+
+ return (s);
+}
diff --git a/src/vnet/fib/fib_attached_export.h b/src/vnet/fib/fib_attached_export.h
new file mode 100644
index 00000000..d4c2b57c
--- /dev/null
+++ b/src/vnet/fib/fib_attached_export.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * FIB attached export
+ *
+ * what's it all about?
+ * say one does this:
+ * set int ip table Gig0 2
+ * set int ip addr Gig0 10.0.0.1/24
+ * Ggi0 is in table 2 with a connected address.
+ * Now we add a routing matching said connected in a different table
+ * ip route add table 3 10.0.0.0/24 via Gig0
+ * How do we expect traffic in table 3 to be forwarded? Clearly out of
+ * Ggi0. It's an attached route, hence we are saying that we can ARP for
+ * hosts in the attached subnet. and we can. but any ARP entries we send
+ * we be received on Gig0, but since Gig0 is in table 2, it will install
+ * the adj-fins in table 2. So traffic in table 3 will never hit an adj-fib
+ * and hence always the glean, and so thus be effectively dropped.
+ * How do we fix this? Attached Export !! All more specfiic entries in table 2
+ * that track and are covered by the connected are automatically exported into
+ * table 3. Now table 3 also has adj-fibs (and the local) so traffic to hosts
+ * is restored.
+ */
+
+#ifndef __FIB_ATTACHED_EXPORT_H__
+#define __FIB_ATTACHED_EXPORT_H__
+
+#include <vnet/fib/fib_types.h>
+
+extern void fib_attached_export_import(fib_entry_t *fib_entry,
+ fib_node_index_t export_fib);
+
+extern void fib_attached_export_purge(fib_entry_t *fib_entry);
+
+extern void fib_attached_export_covered_added(fib_entry_t *cover,
+ fib_node_index_t covered);
+extern void fib_attached_export_covered_removed(fib_entry_t *cover,
+ fib_node_index_t covered);
+extern void fib_attached_export_cover_change(fib_entry_t *fib_entry);
+extern void fib_attached_export_cover_update(fib_entry_t *fib_entry);
+
+extern u8* fib_ae_import_format(fib_node_index_t impi, u8*s);
+extern u8* fib_ae_export_format(fib_node_index_t expi, u8*s);
+
+#endif
diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c
new file mode 100644
index 00000000..734ee8cc
--- /dev/null
+++ b/src/vnet/fib/fib_bfd.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/bfd/bfd_main.h>
+
+#include <vnet/fib/fib_entry_delegate.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_walk.h>
+
+static fib_bfd_state_t
+fib_bfd_bfd_state_to_fib (bfd_state_e bstate)
+{
+ switch (bstate)
+ {
+ case BFD_STATE_up:
+ return (FIB_BFD_STATE_UP);
+ case BFD_STATE_down:
+ case BFD_STATE_admin_down:
+ case BFD_STATE_init:
+ return (FIB_BFD_STATE_DOWN);
+ }
+ return (FIB_BFD_STATE_DOWN);
+}
+
+static void
+fib_bfd_update_walk (fib_node_index_t fei)
+{
+ /*
+ * initiate a backwalk of dependent children
+ * to notify of the state change of this entry.
+ */
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fei, &ctx);
+}
+
+/**
+ * @brief Callback function registered with BFD module to receive notifications
+ * of the CRUD of BFD sessions
+ * would be static but for the fact it's called from the unit-tests
+ */
+void
+fib_bfd_notify (bfd_listen_event_e event,
+ const bfd_session_t *session)
+{
+ fib_entry_delegate_t *fed;
+ const bfd_udp_key_t *key;
+ fib_node_index_t fei;
+
+ if (BFD_HOP_TYPE_MULTI != session->hop_type)
+ {
+ /*
+ * multi-hop BFD sessions attach directly to the FIB entry
+ * single-hop adj to the associate adjacency.
+ */
+ return;
+ }
+
+ key = &session->udp.key;
+
+ fib_prefix_t pfx = {
+ .fp_addr = key->peer_addr,
+ .fp_proto = (ip46_address_is_ip4 (&key->peer_addr) ?
+ FIB_PROTOCOL_IP4:
+ FIB_PROTOCOL_IP6),
+ .fp_len = (ip46_address_is_ip4 (&key->peer_addr) ?
+ 32:
+ 128),
+ };
+
+ /*
+ * get the FIB entry
+ */
+ fei = fib_table_lookup_exact_match(key->fib_index, &pfx);
+
+ switch (event)
+ {
+ case BFD_LISTEN_EVENT_CREATE:
+ /*
+ * The creation of a new session
+ */
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (fed = fib_entry_delegate_get(fib_entry_get(fei),
+ FIB_ENTRY_DELEGATE_BFD)))
+ {
+ /*
+ * already got state for this entry
+ */
+ }
+ else
+ {
+ /*
+ * source and lock the entry. add the delegate
+ */
+ fei = fib_table_entry_special_add(key->fib_index,
+ &pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ fib_entry_lock(fei);
+
+ fed = fib_entry_delegate_find_or_add(fib_entry_get(fei),
+ FIB_ENTRY_DELEGATE_BFD);
+
+ /*
+ * pretend the session is up and skip the walk.
+ * If we set it down then we get traffic loss on new children.
+ * if we walk then we lose traffic for existing children. Wait
+ * for the first BFD UP/DOWN before we let the session's state
+ * influence forwarding.
+ */
+ fed->fd_bfd_state = FIB_BFD_STATE_UP;
+ }
+ break;
+
+ case BFD_LISTEN_EVENT_UPDATE:
+ /*
+ * state change up/dowm and
+ */
+ ASSERT(FIB_NODE_INDEX_INVALID != fei);
+
+ fed = fib_entry_delegate_get(fib_entry_get(fei),
+ FIB_ENTRY_DELEGATE_BFD);
+
+ if (NULL != fed)
+ {
+ fed->fd_bfd_state = fib_bfd_bfd_state_to_fib(session->local_state);
+ fib_bfd_update_walk(fei);
+ }
+ /*
+ * else
+ * no BFD state
+ */
+ break;
+
+ case BFD_LISTEN_EVENT_DELETE:
+ /*
+ * session has been removed.
+ */
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * no FIB entry
+ */
+ }
+ else if (fib_entry_delegate_get(fib_entry_get(fei),
+ FIB_ENTRY_DELEGATE_BFD))
+ {
+ /*
+ * has an associated BFD tracking delegate
+ * usource the entry and remove the BFD tracking deletgate
+ */
+ fib_entry_delegate_remove(fib_entry_get(fei),
+ FIB_ENTRY_DELEGATE_BFD);
+ fib_bfd_update_walk(fei);
+
+ fib_table_entry_special_remove(key->fib_index,
+ &pfx,
+ FIB_SOURCE_RR);
+ fib_entry_unlock(fei);
+ }
+ /*
+ * else
+ * no BFD associated state
+ */
+ break;
+ }
+}
+
+static clib_error_t *
+fib_bfd_main_init (vlib_main_t * vm)
+{
+ clib_error_t * error = NULL;
+
+ if ((error = vlib_call_init_function (vm, bfd_main_init)))
+ return (error);
+
+ bfd_register_listener(fib_bfd_notify);
+
+ return (error);
+}
+
+VLIB_INIT_FUNCTION (fib_bfd_main_init);
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
new file mode 100644
index 00000000..4c9b1abd
--- /dev/null
+++ b/src/vnet/fib/fib_entry.c
@@ -0,0 +1,1618 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/fib_attached_export.h>
+#include <vnet/fib/fib_path_ext.h>
+
+/*
+ * Array of strings/names for the FIB sources
+ */
+static const char *fib_source_names[] = FIB_SOURCES;
+static const char *fib_attribute_names[] = FIB_ENTRY_ATTRIBUTES;
+
+/*
+ * Pool for all fib_entries
+ */
+static fib_entry_t *fib_entry_pool;
+
+fib_entry_t *
+fib_entry_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_entry_pool, index));
+}
+
+static fib_node_t *
+fib_entry_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_entry_get(index));
+}
+
+fib_node_index_t
+fib_entry_get_index (const fib_entry_t * fib_entry)
+{
+ return (fib_entry - fib_entry_pool);
+}
+
+fib_protocol_t
+fib_entry_get_proto (const fib_entry_t * fib_entry)
+{
+ return (fib_entry->fe_prefix.fp_proto);
+}
+
+dpo_proto_t
+fib_entry_get_dpo_proto (const fib_entry_t * fib_entry)
+{
+ return (fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto));
+}
+
+fib_forward_chain_type_t
+fib_entry_get_default_chain_type (const fib_entry_t *fib_entry)
+{
+ switch (fib_entry->fe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case FIB_PROTOCOL_MPLS:
+ if (MPLS_EOS == fib_entry->fe_prefix.fp_eos)
+ return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
+ else
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ }
+
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+u8 *
+format_fib_source (u8 * s, va_list * args)
+{
+ fib_source_t source = va_arg (*args, int);
+
+ s = format (s, "src:%s", fib_source_names[source]);
+
+ return (s);
+}
+
+u8 *
+format_fib_entry (u8 * s, va_list * args)
+{
+ fib_forward_chain_type_t fct;
+ fib_entry_attribute_t attr;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *src;
+ fib_node_index_t fei;
+ fib_source_t source;
+ int level;
+
+ fei = va_arg (*args, fib_node_index_t);
+ level = va_arg (*args, int);
+ fib_entry = fib_entry_get(fei);
+
+ s = format (s, "%U", format_fib_prefix, &fib_entry->fe_prefix);
+
+ if (level >= FIB_ENTRY_FORMAT_DETAIL)
+ {
+ s = format (s, " fib:%d", fib_entry->fe_fib_index);
+ s = format (s, " index:%d", fib_entry_get_index(fib_entry));
+ s = format (s, " locks:%d", fib_entry->fe_node.fn_locks);
+
+ FOR_EACH_SRC_ADDED(fib_entry, src, source,
+ ({
+ s = format (s, "\n %U", format_fib_source, source);
+ s = fib_entry_src_format(fib_entry, source, s);
+ s = format (s, " refs:%d ", src->fes_ref_count);
+ if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) {
+ s = format(s, "flags:");
+ FOR_EACH_FIB_ATTRIBUTE(attr) {
+ if ((1<<attr) & src->fes_entry_flags) {
+ s = format (s, "%s,", fib_attribute_names[attr]);
+ }
+ }
+ }
+ s = format (s, "\n");
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ s = fib_path_list_format(src->fes_pl, s);
+ }
+ s = format(s, "%U", format_fib_path_ext_list, &src->fes_path_exts);
+ }));
+
+ s = format (s, "\n forwarding: ");
+ }
+ else
+ {
+ s = format (s, "\n");
+ }
+
+ fct = fib_entry_get_default_chain_type(fib_entry);
+
+ if (!dpo_id_is_valid(&fib_entry->fe_lb))
+ {
+ s = format (s, " UNRESOLVED\n");
+ return (s);
+ }
+ else
+ {
+ s = format(s, " %U-chain\n %U",
+ format_fib_forw_chain_type, fct,
+ format_dpo_id,
+ &fib_entry->fe_lb,
+ 2);
+ s = format(s, "\n");
+
+ if (level >= FIB_ENTRY_FORMAT_DETAIL2)
+ {
+ fib_entry_delegate_type_t fdt;
+ fib_entry_delegate_t *fed;
+
+ s = format (s, " Delegates:\n");
+ FOR_EACH_DELEGATE(fib_entry, fdt, fed,
+ {
+ s = format(s, " %U\n", format_fib_entry_deletegate, fed);
+ });
+ }
+ }
+
+ if (level >= FIB_ENTRY_FORMAT_DETAIL2)
+ {
+ s = format(s, " Children:");
+ s = fib_node_children_format(fib_entry->fe_node.fn_children, s);
+ }
+
+ return (s);
+}
+
+static fib_entry_t*
+fib_entry_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_ENTRY == node->fn_type);
+#endif
+ return ((fib_entry_t*)node);
+}
+
+static void
+fib_entry_last_lock_gone (fib_node_t *node)
+{
+ fib_entry_delegate_type_t fdt;
+ fib_entry_delegate_t *fed;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_from_fib_node(node);
+
+ FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed,
+ {
+ dpo_reset(&fed->fd_dpo);
+ fib_entry_delegate_remove(fib_entry, fdt);
+ });
+
+ FIB_ENTRY_DBG(fib_entry, "last-lock");
+
+ fib_node_deinit(&fib_entry->fe_node);
+ // FIXME -RR Backwalk
+
+ ASSERT(0 == vec_len(fib_entry->fe_delegates));
+ vec_free(fib_entry->fe_delegates);
+ vec_free(fib_entry->fe_srcs);
+ pool_put(fib_entry_pool, fib_entry);
+}
+
+static fib_entry_src_t*
+fib_entry_get_best_src_i (const fib_entry_t *fib_entry)
+{
+ fib_entry_src_t *bsrc;
+
+ /*
+ * the enum of sources is deliberately arranged in priority order
+ */
+ if (0 == vec_len(fib_entry->fe_srcs))
+ {
+ bsrc = NULL;
+ }
+ else
+ {
+ bsrc = vec_elt_at_index(fib_entry->fe_srcs, 0);
+ }
+
+ return (bsrc);
+}
+
+static fib_source_t
+fib_entry_src_get_source (const fib_entry_src_t *esrc)
+{
+ if (NULL != esrc)
+ {
+ return (esrc->fes_src);
+ }
+ return (FIB_SOURCE_MAX);
+}
+
+static fib_entry_flag_t
+fib_entry_src_get_flags (const fib_entry_src_t *esrc)
+{
+ if (NULL != esrc)
+ {
+ return (esrc->fes_entry_flags);
+ }
+ return (FIB_ENTRY_FLAG_NONE);
+}
+
+fib_entry_flag_t
+fib_entry_get_flags (fib_node_index_t fib_entry_index)
+{
+ return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index)));
+}
+
+/*
+ * fib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+fib_entry_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_from_fib_node(node);
+
+ if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_get_best_source(
+ fib_entry_get_index(fib_entry)));
+ }
+
+ /*
+ * all other walk types can be reclassifed to a re-evaluate to
+ * all recursive dependents.
+ * By reclassifying we ensure that should any of these walk types meet
+ * they can be merged.
+ */
+ ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ /*
+ * ... and nothing is forced sync from now on.
+ */
+ ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC;
+
+ /*
+ * propagate the backwalk further if we haven't already reached the
+ * maximum depth.
+ */
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry),
+ ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+fib_entry_show_memory (void)
+{
+ u32 n_srcs = 0, n_exts = 0;
+ fib_entry_src_t *esrc;
+ fib_entry_t *entry;
+
+ fib_show_memory_usage("Entry",
+ pool_elts(fib_entry_pool),
+ pool_len(fib_entry_pool),
+ sizeof(fib_entry_t));
+
+ pool_foreach(entry, fib_entry_pool,
+ ({
+ n_srcs += vec_len(entry->fe_srcs);
+ vec_foreach(esrc, entry->fe_srcs)
+ {
+ n_exts += fib_path_ext_list_length(&esrc->fes_path_exts);
+ }
+ }));
+
+ fib_show_memory_usage("Entry Source",
+ n_srcs, n_srcs, sizeof(fib_entry_src_t));
+ fib_show_memory_usage("Entry Path-Extensions",
+ n_exts, n_exts,
+ sizeof(fib_path_ext_t));
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_entry_vft = {
+ .fnv_get = fib_entry_get_node,
+ .fnv_last_lock = fib_entry_last_lock_gone,
+ .fnv_back_walk = fib_entry_back_walk_notify,
+ .fnv_mem_show = fib_entry_show_memory,
+};
+
+/**
+ * @brief Contribute the set of Adjacencies that this entry forwards with
+ * to build the uRPF list of its children
+ */
+void
+fib_entry_contribute_urpf (fib_node_index_t entry_index,
+ index_t urpf)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ return (fib_path_list_contribute_urpf(fib_entry->fe_parent, urpf));
+}
+
+/*
+ * If the client is request a chain for multicast forwarding then swap
+ * the chain type to one that can provide such transport.
+ */
+static fib_forward_chain_type_t
+fib_entry_chain_type_mcast_to_ucast (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ /*
+ * we can only transport IP multicast packets if there is an
+ * LSP.
+ */
+ fct = FIB_FORW_CHAIN_TYPE_MPLS_EOS;
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ break;
+ }
+
+ return (fct);
+}
+
+/*
+ * fib_entry_contribute_forwarding
+ *
+ * Get an lock the forwarding information (DPO) contributed by the FIB entry.
+ */
+void
+fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ fib_entry_delegate_t *fed;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * mfib children ask for mcast chains. fix these to the appropriate ucast types.
+ */
+ fct = fib_entry_chain_type_mcast_to_ucast(fct);
+
+ if (fct == fib_entry_get_default_chain_type(fib_entry))
+ {
+ dpo_copy(dpo, &fib_entry->fe_lb);
+ }
+ else
+ {
+ fed = fib_entry_delegate_get(fib_entry,
+ fib_entry_chain_type_to_delegate_type(fct));
+
+ if (NULL == fed)
+ {
+ fed = fib_entry_delegate_find_or_add(
+ fib_entry,
+ fib_entry_chain_type_to_delegate_type(fct));
+ /*
+ * on-demand create eos/non-eos.
+ * There is no on-demand delete because:
+ * - memory versus complexity & reliability:
+ * leaving unrequired [n]eos LB arounds wastes memory, cleaning
+ * then up on the right trigger is more code. i favour the latter.
+ */
+ fib_entry_src_mk_lb(fib_entry,
+ fib_entry_get_best_src_i(fib_entry),
+ fct,
+ &fed->fd_dpo);
+ }
+
+ dpo_copy(dpo, &fed->fd_dpo);
+ }
+ /*
+ * don't allow the special index indicating replicate.vs.load-balance
+ * to escape to the clients
+ */
+ dpo->dpoi_index &= ~MPLS_IS_REPLICATE;
+}
+
+const dpo_id_t *
+fib_entry_contribute_ip_forwarding (fib_node_index_t fib_entry_index)
+{
+ fib_forward_chain_type_t fct;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ fct = fib_entry_get_default_chain_type(fib_entry);
+
+ ASSERT((fct == FIB_FORW_CHAIN_TYPE_UNICAST_IP4 ||
+ fct == FIB_FORW_CHAIN_TYPE_UNICAST_IP6));
+
+ return (&fib_entry->fe_lb);
+}
+
+adj_index_t
+fib_entry_get_adj (fib_node_index_t fib_entry_index)
+{
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(fib_entry_index);
+
+ if (dpo_id_is_valid(dpo))
+ {
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+
+ if (dpo_is_adj(dpo))
+ {
+ return (dpo->dpoi_index);
+ }
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_entry_get_path_list (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (fib_entry->fe_parent);
+}
+
+u32
+fib_entry_child_add (fib_node_index_t fib_entry_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ return (fib_node_child_add(FIB_NODE_TYPE_ENTRY,
+ fib_entry_index,
+ child_type,
+ child_index));
+};
+
+void
+fib_entry_child_remove (fib_node_index_t fib_entry_index,
+ u32 sibling_index)
+{
+ fib_node_child_remove(FIB_NODE_TYPE_ENTRY,
+ fib_entry_index,
+ sibling_index);
+
+ if (0 == fib_node_get_n_children(FIB_NODE_TYPE_ENTRY,
+ fib_entry_index))
+ {
+ /*
+ * if there are no children left then there is no reason to keep
+ * the non-default forwarding chains. those chains are built only
+ * because the children want them.
+ */
+ fib_entry_delegate_type_t fdt;
+ fib_entry_delegate_t *fed;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed,
+ {
+ dpo_reset(&fed->fd_dpo);
+ fib_entry_delegate_remove(fib_entry, fdt);
+ });
+ }
+}
+
+static fib_entry_t *
+fib_entry_alloc (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_node_index_t *fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+ fib_prefix_t *fep;
+
+ pool_get(fib_entry_pool, fib_entry);
+ memset(fib_entry, 0, sizeof(*fib_entry));
+
+ fib_node_init(&fib_entry->fe_node,
+ FIB_NODE_TYPE_ENTRY);
+
+ fib_entry->fe_fib_index = fib_index;
+
+ /*
+ * the one time we need to update the const prefix is when
+ * the entry is first created
+ */
+ fep = (fib_prefix_t*)&(fib_entry->fe_prefix);
+ *fep = *prefix;
+
+ if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto)
+ {
+ fep->fp_len = 21;
+ if (MPLS_NON_EOS == fep->fp_eos)
+ {
+ fep->fp_payload_proto = DPO_PROTO_MPLS;
+ }
+ ASSERT(DPO_PROTO_NONE != fib_entry->fe_prefix.fp_payload_proto);
+ }
+
+ dpo_reset(&fib_entry->fe_lb);
+
+ *fib_entry_index = fib_entry_get_index(fib_entry);
+
+ FIB_ENTRY_DBG(fib_entry, "alloc");
+
+ return (fib_entry);
+}
+
+static fib_entry_t*
+fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ fib_node_index_t fei;
+
+ /*
+ * save the index so we can recover from pool reallocs
+ */
+ fei = fib_entry_get_index(fib_entry);
+
+ /*
+ * handle changes to attached export for import entries
+ */
+ int is_import = (FIB_ENTRY_FLAG_IMPORT & fib_entry_get_flags_i(fib_entry));
+ int was_import = (FIB_ENTRY_FLAG_IMPORT & old_flags);
+
+ if (!was_import && is_import)
+ {
+ /*
+ * transition from not exported to exported
+ */
+
+ /*
+ * there is an assumption here that the entry resolves via only
+ * one interface and that it is the cross VRF interface.
+ */
+ u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
+
+ fib_attached_export_import(fib_entry,
+ fib_table_get_index_for_sw_if_index(
+ fib_entry_get_proto(fib_entry),
+ sw_if_index));
+ }
+ else if (was_import && !is_import)
+ {
+ /*
+ * transition from exported to not exported
+ */
+ fib_attached_export_purge(fib_entry);
+ }
+ /*
+ * else
+ * no change. nothing to do.
+ */
+
+ /*
+ * reload the entry address post possible pool realloc
+ */
+ fib_entry = fib_entry_get(fei);
+
+ /*
+ * handle changes to attached export for export entries
+ */
+ int is_attached = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(fib_entry));
+ int was_attached = (FIB_ENTRY_FLAG_ATTACHED & old_flags);
+
+ if (!was_attached && is_attached)
+ {
+ /*
+ * transition to attached. time to export
+ */
+ // FIXME
+ }
+ // else FIXME
+
+ return (fib_entry);
+}
+
+static void
+fib_entry_post_install_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry,
+ source,
+ old_flags);
+ fib_entry_src_action_installed(fib_entry, source);
+}
+
+fib_node_index_t
+fib_entry_create (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_t *fib_entry;
+
+ ASSERT(0 < vec_len(paths));
+
+ fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
+
+ /*
+ * since this is a new entry create, we don't need to check for winning
+ * sources - there is only one.
+ */
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags,
+ drop_dpo_get(
+ fib_proto_to_dpo(
+ fib_entry_get_proto(fib_entry))));
+ fib_entry_src_action_path_swap(fib_entry,
+ source,
+ flags,
+ paths);
+ /*
+ * handle possible realloc's by refetching the pointer
+ */
+ fib_entry = fib_entry_get(fib_entry_index);
+ fib_entry_src_action_activate(fib_entry, source);
+
+ fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_entry_create_special (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_t *fib_entry;
+
+ /*
+ * create and initiliase the new enty
+ */
+ fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
+
+ /*
+ * create the path-list
+ */
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo);
+ fib_entry_src_action_activate(fib_entry, source);
+
+ fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
+
+ return (fib_entry_index);
+}
+
+static void
+fib_entry_post_update_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ /*
+ * backwalk to children to inform then of the change to forwarding.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_get_index(fib_entry), &bw_ctx);
+
+ /*
+ * then inform any covered prefixes
+ */
+ fib_entry_cover_update_notify(fib_entry);
+
+ fib_entry_post_install_actions(fib_entry, source, old_flags);
+}
+
+static void
+fib_entry_source_change (fib_entry_t *fib_entry,
+ fib_source_t best_source,
+ fib_source_t new_source,
+ fib_entry_flag_t old_flags)
+{
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (new_source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, new_source);
+ }
+ else if (new_source > best_source)
+ {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, new_source);
+ fib_entry_src_action_activate(fib_entry, new_source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, new_source, old_flags);
+}
+
+void
+fib_entry_special_add (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo);
+ fib_entry_source_change(fib_entry, best_source, source, bflags);
+}
+
+void
+fib_entry_special_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry = fib_entry_src_action_update(fib_entry, source, flags, dpo);
+ fib_entry_source_change(fib_entry, best_source, source, bflags);
+}
+
+
+void
+fib_entry_path_add (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ ASSERT(1 == vec_len(rpath));
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry = fib_entry_src_action_path_add(fib_entry, source, flags, rpath);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ else if (source > best_source)
+ {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+}
+
+/*
+ * fib_entry_path_remove
+ *
+ * remove a path from the entry.
+ * return the fib_entry's index if it is still present, INVALID otherwise.
+ */
+fib_entry_src_flag_t
+fib_entry_path_remove (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_entry_src_flag_t sflag;
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ ASSERT(1 == vec_len(rpath));
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ sflag = fib_entry_src_action_path_remove(fib_entry, source, rpath);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source )
+ {
+ /*
+ * Que! removing a path from a source that is better than the
+ * one this entry is using.
+ */
+ ASSERT(0);
+ }
+ else if (source > best_source )
+ {
+ /*
+ * the source is not the best. nothing to do.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ /*
+ * removing a path from the path-list we were using.
+ */
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag))
+ {
+ /*
+ * the last path from the source was removed.
+ * fallback to lower source
+ */
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_SOURCE_MAX == best_source) {
+ /*
+ * no more sources left. this entry is toast.
+ */
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry,
+ source,
+ bflags);
+ fib_entry_src_action_uninstall(fib_entry);
+
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+ else
+ {
+ fib_entry_src_action_activate(fib_entry, best_source);
+ source = best_source;
+ }
+ }
+ else
+ {
+ /*
+ * re-install the new forwarding information
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+
+ /*
+ * still have sources
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+}
+
+/*
+ * fib_entry_special_remove
+ *
+ * remove a special source from the entry.
+ * return the fib_entry's index if it is still present, INVALID otherwise.
+ */
+fib_entry_src_flag_t
+fib_entry_special_remove (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_src_flag_t sflag;
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ sflag = fib_entry_src_action_remove(fib_entry, source);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source )
+ {
+ /*
+ * Que! removing a path from a source that is better than the
+ * one this entry is using. This can only mean it is a source
+ * this prefix does not have.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else if (source > best_source ) {
+ /*
+ * the source is not the best. nothing to do.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag))
+ {
+ /*
+ * the source was removed. use the next best.
+ */
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_SOURCE_MAX == best_source) {
+ /*
+ * no more sources left. this entry is toast.
+ */
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry,
+ source,
+ bflags);
+ fib_entry_src_action_uninstall(fib_entry);
+
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+ else
+ {
+ fib_entry_src_action_activate(fib_entry, best_source);
+ source = best_source;
+ }
+ }
+ else
+ {
+ /*
+ * re-install the new forwarding information
+ */
+ fib_entry_src_action_reactivate(fib_entry, source);
+ }
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+
+ /*
+ * still have sources
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+}
+
+/**
+ * fib_entry_delete
+ *
+ * The source is withdrawing all the paths it provided
+ */
+fib_entry_src_flag_t
+fib_entry_delete (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ return (fib_entry_special_remove(fib_entry_index, source));
+}
+
+/**
+ * fib_entry_update
+ *
+ * The source has provided a new set of paths that will replace the old.
+ */
+void
+fib_entry_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry_src_action_path_swap(fib_entry,
+ source,
+ flags,
+ paths);
+ /*
+ * handle possible realloc's by refetching the pointer
+ */
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ else if (source > best_source) {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+}
+
+
+/*
+ * fib_entry_cover_changed
+ *
+ * this entry is tracking its cover and that cover has changed.
+ */
+void
+fib_entry_cover_changed (fib_node_index_t fib_entry_index)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_source_t source, best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+ u32 index;
+
+ bflags = FIB_ENTRY_FLAG_NONE;
+ best_source = FIB_SOURCE_FIRST;
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_attached_export_cover_change(fib_entry);
+
+ /*
+ * propagate the notificuation to each of the added sources
+ */
+ index = 0;
+ FOR_EACH_SRC_ADDED(fib_entry, esrc, source,
+ ({
+ if (0 == index)
+ {
+ /*
+ * only the best source gets to set the back walk flags
+ */
+ res = fib_entry_src_action_cover_change(fib_entry, source);
+ bflags = fib_entry_src_get_flags(esrc);
+ best_source = fib_entry_src_get_source(esrc);
+ }
+ else
+ {
+ fib_entry_src_action_cover_change(fib_entry, source);
+ }
+ index++;
+ }));
+
+ if (res.install)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_src_get_source(
+ fib_entry_get_best_src_i(fib_entry)));
+ fib_entry_post_install_actions(fib_entry, best_source, bflags);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+
+ if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason)
+ {
+ /*
+ * time for walkies fido.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = res.bw_reason,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx);
+ }
+}
+
+/*
+ * fib_entry_cover_updated
+ *
+ * this entry is tracking its cover and that cover has been updated
+ * (i.e. its forwarding information has changed).
+ */
+void
+fib_entry_cover_updated (fib_node_index_t fib_entry_index)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_source_t source, best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+ u32 index;
+
+ bflags = FIB_ENTRY_FLAG_NONE;
+ best_source = FIB_SOURCE_FIRST;
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_attached_export_cover_update(fib_entry);
+
+ /*
+ * propagate the notificuation to each of the added sources
+ */
+ index = 0;
+ FOR_EACH_SRC_ADDED(fib_entry, esrc, source,
+ ({
+ if (0 == index)
+ {
+ /*
+ * only the best source gets to set the back walk flags
+ */
+ res = fib_entry_src_action_cover_update(fib_entry, source);
+ bflags = fib_entry_src_get_flags(esrc);
+ best_source = fib_entry_src_get_source(esrc);
+ }
+ else
+ {
+ fib_entry_src_action_cover_update(fib_entry, source);
+ }
+ index++;
+ }));
+
+ if (res.install)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_src_get_source(
+ fib_entry_get_best_src_i(fib_entry)));
+ fib_entry_post_install_actions(fib_entry, best_source, bflags);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+
+ if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason)
+ {
+ /*
+ * time for walkies fido.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = res.bw_reason,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx);
+ }
+}
+
+int
+fib_entry_recursive_loop_detect (fib_node_index_t entry_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_entry_t *fib_entry;
+ int was_looped, is_looped;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ if (FIB_NODE_INDEX_INVALID != fib_entry->fe_parent)
+ {
+ fib_node_index_t *entries = *entry_indicies;
+
+ vec_add1(entries, entry_index);
+ was_looped = fib_path_list_is_looped(fib_entry->fe_parent);
+ is_looped = fib_path_list_recursive_loop_detect(fib_entry->fe_parent,
+ &entries);
+
+ *entry_indicies = entries;
+
+ if (!!was_looped != !!is_looped)
+ {
+ /*
+ * re-evaluate all the entry's forwarding
+ * NOTE: this is an inplace modify
+ */
+ fib_entry_delegate_type_t fdt;
+ fib_entry_delegate_t *fed;
+
+ FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed,
+ {
+ fib_entry_src_mk_lb(fib_entry,
+ fib_entry_get_best_src_i(fib_entry),
+ fib_entry_delegate_type_to_chain_type(fdt),
+ &fed->fd_dpo);
+ });
+ }
+ }
+ else
+ {
+ /*
+ * the entry is currently not linked to a path-list. this happens
+ * when it is this entry that is re-linking path-lists and has thus
+ * broken the loop
+ */
+ is_looped = 0;
+ }
+
+ return (is_looped);
+}
+
+u32
+fib_entry_get_resolving_interface (fib_node_index_t entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ return (fib_path_list_get_resolving_interface(fib_entry->fe_parent));
+}
+
+fib_source_t
+fib_entry_get_best_source (fib_node_index_t entry_index)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ return (fib_entry_src_get_source(bsrc));
+}
+
+/**
+ * Return !0 is the entry is reoslved, i.e. will return a valid forwarding
+ * chain
+ */
+int
+fib_entry_is_resolved (fib_node_index_t fib_entry_index)
+{
+ fib_entry_delegate_t *fed;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fed = fib_entry_delegate_get(fib_entry, FIB_ENTRY_DELEGATE_BFD);
+
+ if (NULL == fed)
+ {
+ /*
+ * no BFD tracking - consider it resolved.
+ */
+ return (!0);
+ }
+ else
+ {
+ /*
+ * defer to the state of the BFD tracking
+ */
+ return (FIB_BFD_STATE_UP == fed->fd_bfd_state);
+ }
+}
+
+void
+fib_entry_set_flow_hash_config (fib_node_index_t fib_entry_index,
+ flow_hash_config_t hash_config)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * pass the hash-config on to the load-balance object where it is cached.
+ * we can ignore LBs in the delegate chains, since they will not be of the
+ * correct protocol type (i.e. they are not IP)
+ * There's no way, nor need, to change the hash config for MPLS.
+ */
+ if (dpo_id_is_valid(&fib_entry->fe_lb))
+ {
+ load_balance_t *lb;
+
+ ASSERT(DPO_LOAD_BALANCE == fib_entry->fe_lb.dpoi_type);
+
+ lb = load_balance_get(fib_entry->fe_lb.dpoi_index);
+
+ /*
+ * atomic update for packets in flight
+ */
+ lb->lb_hash_config = hash_config;
+ }
+}
+
+static int
+fib_ip4_address_compare (const ip4_address_t * a1,
+ const ip4_address_t * a2)
+{
+ /*
+ * IP addresses are unsiged ints. the return value here needs to be signed
+ * a simple subtraction won't cut it.
+ * If the addresses are the same, the sort order is undefiend, so phoey.
+ */
+ return ((clib_net_to_host_u32(a1->data_u32) >
+ clib_net_to_host_u32(a2->data_u32) ) ?
+ 1 : -1);
+}
+
+static int
+fib_ip6_address_compare (const ip6_address_t * a1,
+ const ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]));
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+static int
+fib_entry_cmp (fib_node_index_t fib_entry_index1,
+ fib_node_index_t fib_entry_index2)
+{
+ fib_entry_t *fib_entry1, *fib_entry2;
+ int cmp = 0;
+
+ fib_entry1 = fib_entry_get(fib_entry_index1);
+ fib_entry2 = fib_entry_get(fib_entry_index2);
+
+ switch (fib_entry1->fe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ cmp = fib_ip4_address_compare(&fib_entry1->fe_prefix.fp_addr.ip4,
+ &fib_entry2->fe_prefix.fp_addr.ip4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ cmp = fib_ip6_address_compare(&fib_entry1->fe_prefix.fp_addr.ip6,
+ &fib_entry2->fe_prefix.fp_addr.ip6);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ cmp = (fib_entry1->fe_prefix.fp_label - fib_entry2->fe_prefix.fp_label);
+
+ if (0 == cmp)
+ {
+ cmp = (fib_entry1->fe_prefix.fp_eos - fib_entry2->fe_prefix.fp_eos);
+ }
+ break;
+ }
+
+ if (0 == cmp) {
+ cmp = (fib_entry1->fe_prefix.fp_len - fib_entry2->fe_prefix.fp_len);
+ }
+ return (cmp);
+}
+
+int
+fib_entry_cmp_for_sort (void *i1, void *i2)
+{
+ fib_node_index_t *fib_entry_index1 = i1, *fib_entry_index2 = i2;
+
+ return (fib_entry_cmp(*fib_entry_index1,
+ *fib_entry_index2));
+}
+
+void
+fib_entry_lock (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_node_lock(&fib_entry->fe_node);
+}
+
+void
+fib_entry_unlock (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_node_unlock(&fib_entry->fe_node);
+}
+
+void
+fib_entry_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_ENTRY, &fib_entry_vft);
+}
+
+void
+fib_entry_encode (fib_node_index_t fib_entry_index,
+ fib_route_path_encode_t **api_rpaths)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ if (FIB_NODE_INDEX_INVALID != fib_entry->fe_parent)
+ {
+ fib_path_list_walk(fib_entry->fe_parent, fib_path_encode, api_rpaths);
+ }
+}
+
+void
+fib_entry_get_prefix (fib_node_index_t fib_entry_index,
+ fib_prefix_t *pfx)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ *pfx = fib_entry->fe_prefix;
+}
+
+u32
+fib_entry_get_fib_index (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (fib_entry->fe_fib_index);
+}
+
+u32
+fib_entry_pool_size (void)
+{
+ return (pool_elts(fib_entry_pool));
+}
+
+static clib_error_t *
+show_fib_entry_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_node_index_t fei;
+
+ if (unformat (input, "%d", &fei))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_entry_pool, fei))
+ {
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_fib_entry, fei,
+ FIB_ENTRY_FORMAT_DETAIL2);
+ }
+ else
+ {
+ vlib_cli_output (vm, "entry %d invalid", fei);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB Entries:");
+ pool_foreach_index(fei, fib_entry_pool,
+ ({
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_fib_entry, fei,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }));
+ }
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_entry, static) = {
+ .path = "show fib entry",
+ .function = show_fib_entry_command,
+ .short_help = "show fib entry",
+};
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
new file mode 100644
index 00000000..2f6e37fe
--- /dev/null
+++ b/src/vnet/fib/fib_entry.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_H__
+#define __FIB_ENTRY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_entry_delegate.h>
+#include <vnet/adj/adj.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * The different sources that can create a route.
+ * The sources are defined here the thier relative priority order.
+ * The lower the value the higher the priority
+ */
+typedef enum fib_source_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_SOURCE_FIRST,
+ /**
+ * Special sources. These are for entries that are added to all
+ * FIBs by default, and should never be over-ridden (hence they
+ * are the highest priority)
+ */
+ FIB_SOURCE_SPECIAL = FIB_SOURCE_FIRST,
+ /**
+ * Classify. A route that links directly to a classify adj
+ */
+ FIB_SOURCE_CLASSIFY,
+ /**
+ * Route added as a result of interface configuration.
+ * this will also come from the API/CLI, but the distinction is
+ * that is from confiiguration on an interface, not a 'ip route' command
+ */
+ FIB_SOURCE_INTERFACE,
+ /**
+ * SRv6 and SR-MPLS
+ */
+ FIB_SOURCE_SR,
+ /**
+ * A high priority source a plugin can use
+ */
+ FIB_SOURCE_PLUGIN_HI,
+ /**
+ * From the control plane API
+ */
+ FIB_SOURCE_API,
+ /**
+ * From the CLI.
+ */
+ FIB_SOURCE_CLI,
+ /**
+ * LISP
+ */
+ FIB_SOURCE_LISP,
+ /**
+ * IPv[46] Mapping
+ */
+ FIB_SOURCE_MAP,
+ /**
+ * SIXRD
+ */
+ FIB_SOURCE_SIXRD,
+ /**
+ * DHCP
+ */
+ FIB_SOURCE_DHCP,
+ /**
+ * IPv6 Proxy ND
+ */
+ FIB_SOURCE_IP6_ND_PROXY,
+ /**
+ * Adjacency source.
+ * routes created as a result of ARP/ND entries. This is lower priority
+ * then the API/CLI. This is on purpose. trust me.
+ */
+ FIB_SOURCE_ADJ,
+ /**
+ * MPLS label. The prefix has been assigned a local label. This source
+ * never provides forwarding information, instead it acts as a place-holder
+ * so the association of label to prefix can be maintained
+ */
+ FIB_SOURCE_MPLS,
+ /**
+ * Attached Export source.
+ * routes created as a result of attahced export. routes thus sourced
+ * will be present in the export tables
+ */
+ FIB_SOURCE_AE,
+ /**
+ * Recursive resolution source.
+ * Used to install an entry that is the resolution traget of another.
+ */
+ FIB_SOURCE_RR,
+ /**
+ * uRPF bypass/exemption.
+ * Used to install an entry that is exempt from the loose uRPF check
+ */
+ FIB_SOURCE_URPF_EXEMPT,
+ /**
+ * The default route source.
+ * The default route is always added to the FIB table (like the
+ * special sources) but we need to be able to over-ride it with
+ * 'ip route' sources when provided
+ */
+ FIB_SOURCE_DEFAULT_ROUTE,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_SOURCE_LAST = FIB_SOURCE_DEFAULT_ROUTE,
+} __attribute__ ((packed)) fib_source_t;
+
+STATIC_ASSERT (sizeof(fib_source_t) == 1,
+ "FIB too many sources");
+
+/**
+ * The maximum number of sources
+ */
+#define FIB_SOURCE_MAX (FIB_SOURCE_LAST+1)
+
+#define FIB_SOURCES { \
+ [FIB_SOURCE_SPECIAL] = "special", \
+ [FIB_SOURCE_INTERFACE] = "interface", \
+ [FIB_SOURCE_API] = "API", \
+ [FIB_SOURCE_CLI] = "CLI", \
+ [FIB_SOURCE_ADJ] = "adjacency", \
+ [FIB_SOURCE_MAP] = "MAP", \
+ [FIB_SOURCE_SR] = "SR", \
+ [FIB_SOURCE_SIXRD] = "SixRD", \
+ [FIB_SOURCE_LISP] = "LISP", \
+ [FIB_SOURCE_CLASSIFY] = "classify", \
+ [FIB_SOURCE_DHCP] = "DHCP", \
+ [FIB_SOURCE_IP6_ND_PROXY] = "IPv6-proxy-nd", \
+ [FIB_SOURCE_RR] = "recursive-resolution", \
+ [FIB_SOURCE_AE] = "attached_export", \
+ [FIB_SOURCE_MPLS] = "mpls", \
+ [FIB_SOURCE_URPF_EXEMPT] = "urpf-exempt", \
+ [FIB_SOURCE_DEFAULT_ROUTE] = "default-route", \
+}
+
+#define FOR_EACH_FIB_SOURCE(_item) \
+ for (_item = FIB_SOURCE_FIRST; _item < FIB_SOURCE_MAX; _item++)
+
+/**
+ * The different sources that can create a route.
+ * The sources are defined here the thier relative priority order.
+ * The lower the value the higher the priority
+ */
+typedef enum fib_entry_attribute_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_ENTRY_ATTRIBUTE_FIRST,
+ /**
+ * Connected. The prefix is configured on an interface.
+ */
+ FIB_ENTRY_ATTRIBUTE_CONNECTED = FIB_ENTRY_ATTRIBUTE_FIRST,
+ /**
+ * Attached. The prefix is attached to an interface.
+ */
+ FIB_ENTRY_ATTRIBUTE_ATTACHED,
+ /**
+ * The route is an explicit drop.
+ */
+ FIB_ENTRY_ATTRIBUTE_DROP,
+ /**
+ * The route is exclusive. The client creating the route is
+ * providing an exclusive adjacency.
+ */
+ FIB_ENTRY_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * The route is attached cross tables and thus imports covered
+ * prefixes from the other table.
+ */
+ FIB_ENTRY_ATTRIBUTE_IMPORT,
+ /**
+ * The prefix/address is local to this device
+ */
+ FIB_ENTRY_ATTRIBUTE_LOCAL,
+ /**
+ * The prefix/address is a multicast prefix.
+ * this aplies only to MPLS. IP multicast is handled by mfib
+ */
+ FIB_ENTRY_ATTRIBUTE_MULTICAST,
+ /**
+ * The prefix/address exempted from loose uRPF check
+ * To be used with caution
+ */
+ FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT,
+} fib_entry_attribute_t;
+
+#define FIB_ENTRY_ATTRIBUTES { \
+ [FIB_ENTRY_ATTRIBUTE_CONNECTED] = "connected", \
+ [FIB_ENTRY_ATTRIBUTE_ATTACHED] = "attached", \
+ [FIB_ENTRY_ATTRIBUTE_IMPORT] = "import", \
+ [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \
+ [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \
+ [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt", \
+ [FIB_ENTRY_ATTRIBUTE_MULTICAST] = "multicast", \
+}
+
+#define FOR_EACH_FIB_ATTRIBUTE(_item) \
+ for (_item = FIB_ENTRY_ATTRIBUTE_FIRST; \
+ _item <= FIB_ENTRY_ATTRIBUTE_LAST; \
+ _item++)
+
+typedef enum fib_entry_flag_t_ {
+ FIB_ENTRY_FLAG_NONE = 0,
+ FIB_ENTRY_FLAG_CONNECTED = (1 << FIB_ENTRY_ATTRIBUTE_CONNECTED),
+ FIB_ENTRY_FLAG_ATTACHED = (1 << FIB_ENTRY_ATTRIBUTE_ATTACHED),
+ FIB_ENTRY_FLAG_DROP = (1 << FIB_ENTRY_ATTRIBUTE_DROP),
+ FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE),
+ FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL),
+ FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT),
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT),
+ FIB_ENTRY_FLAG_MULTICAST = (1 << FIB_ENTRY_ATTRIBUTE_MULTICAST),
+} __attribute__((packed)) fib_entry_flag_t;
+
+/**
+ * Flags for the source data
+ */
+typedef enum fib_entry_src_attribute_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_FIRST,
+ /**
+ * the source has been added to the entry
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_ADDED = FIB_ENTRY_SRC_ATTRIBUTE_FIRST,
+ /**
+ * the source is active/best
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE,
+} fib_entry_src_attribute_t;
+
+#define FIB_ENTRY_SRC_ATTRIBUTE_MAX (FIB_ENTRY_SRC_ATTRIBUTE_LAST+1)
+
+#define FIB_ENTRY_SRC_ATTRIBUTES { \
+ [FIB_ENTRY_SRC_ATTRIBUTE_ADDED] = "added", \
+ [FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \
+}
+
+typedef enum fib_entry_src_flag_t_ {
+ FIB_ENTRY_SRC_FLAG_NONE = 0,
+ FIB_ENTRY_SRC_FLAG_ADDED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ADDED),
+ FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE),
+} __attribute__ ((packed)) fib_entry_src_flag_t;
+
+/*
+ * Keep the size of the flags field to 2 bytes, so it
+ * can be placed next to the 2 bytes reference count
+ */
+STATIC_ASSERT (sizeof(fib_entry_src_flag_t) <= 2,
+ "FIB entry flags field size too big");
+
+/**
+ * Information related to the source of a FIB entry
+ */
+typedef struct fib_entry_src_t_ {
+ /**
+ * A vector of path extensions
+ */
+ fib_path_ext_list_t fes_path_exts;
+
+ /**
+ * The path-list created by the source
+ */
+ fib_node_index_t fes_pl;
+ /**
+ * Which source this info block is for
+ */
+ fib_source_t fes_src;
+ /**
+ * Flags on the source
+ */
+ fib_entry_src_flag_t fes_flags;
+
+ /**
+ * 1 bytes ref count. This is not the number of users of the Entry
+ * (which is itself not large, due to path-list sharing), but the number
+ * of times a given source has been added. Which is even fewer
+ */
+ u8 fes_ref_count;
+
+ /**
+ * Flags the source contributes to the entry
+ */
+ fib_entry_flag_t fes_entry_flags;
+
+ /**
+ * Source specific info
+ */
+ union {
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesr_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesr_sibling;
+ } rr;
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesa_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesa_sibling;
+ } adj;
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesi_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesi_sibling;
+ } interface;
+ struct {
+ /**
+ * This MPLS local label associated with the prefix.
+ */
+ mpls_label_t fesm_label;
+
+ /**
+ * the indicies of the LFIB entries created
+ */
+ fib_node_index_t fesm_lfes[2];
+ } mpls;
+ struct {
+ /**
+ * The source FIB index.
+ */
+ fib_node_index_t fesl_fib_index;
+ } lisp;
+ };
+} fib_entry_src_t;
+
+/**
+ * An entry in a FIB table.
+ *
+ * This entry represents a route added to the FIB that is stored
+ * in one of the FIB tables.
+ */
+typedef struct fib_entry_t_ {
+ /**
+ * Base class. The entry's node representation in the graph.
+ */
+ fib_node_t fe_node;
+ /**
+ * The prefix of the route. this is const just to be sure.
+ * It is the entry's key/identity and so should never change.
+ */
+ const fib_prefix_t fe_prefix;
+ /**
+ * The index of the FIB table this entry is in
+ */
+ u32 fe_fib_index;
+ /**
+ * The load-balance used for forwarding.
+ *
+ * We don't share the EOS and non-EOS even in case when they could be
+ * because:
+ * - complexity & reliability v. memory
+ * determining the conditions where sharing is possible is non-trivial.
+ * - separate LBs means we can get the EOS bit right in the MPLS label DPO
+ * and so save a few clock cycles in the DP imposition node since we can
+ * paint the header straight on without the need to check the packet
+ * type to derive the EOS bit value.
+ */
+ dpo_id_t fe_lb;
+ /**
+ * Vector of source infos.
+ * Most entries will only have 1 source. So we optimise for memory usage,
+ * which is preferable since we have many entries.
+ */
+ fib_entry_src_t *fe_srcs;
+ /**
+ * the path-list for which this entry is a child. This is also the path-list
+ * that is contributing forwarding for this entry.
+ */
+ fib_node_index_t fe_parent;
+ /**
+ * index of this entry in the parent's child list.
+ * This is set when this entry is added as a child, but can also
+ * be changed by the parent as it manages its list.
+ */
+ u32 fe_sibling;
+
+ /**
+ * A vector of delegates.
+ */
+ fib_entry_delegate_t *fe_delegates;
+} fib_entry_t;
+
+#define FOR_EACH_FIB_ENTRY_FLAG(_item) \
+ for (_item = FIB_ENTRY_FLAG_FIRST; _item < FIB_ENTRY_FLAG_MAX; _item++)
+
+#define FIB_ENTRY_FORMAT_BRIEF (0x0)
+#define FIB_ENTRY_FORMAT_DETAIL (0x1)
+#define FIB_ENTRY_FORMAT_DETAIL2 (0x2)
+
+extern u8 *format_fib_entry (u8 * s, va_list * args);
+extern u8 *format_fib_source (u8 * s, va_list * args);
+
+extern fib_node_index_t fib_entry_create_special(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_node_index_t fib_entry_create (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths);
+extern void fib_entry_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths);
+
+extern void fib_entry_path_add(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath);
+extern void fib_entry_special_add(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+extern void fib_entry_special_update(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+extern fib_entry_src_flag_t fib_entry_special_remove(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_entry_src_flag_t fib_entry_path_remove(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const fib_route_path_t *rpath);
+extern fib_entry_src_flag_t fib_entry_delete(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern void fib_entry_contribute_urpf(fib_node_index_t path_index,
+ index_t urpf);
+extern void fib_entry_contribute_forwarding(
+ fib_node_index_t fib_entry_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern const dpo_id_t * fib_entry_contribute_ip_forwarding(
+ fib_node_index_t fib_entry_index);
+extern adj_index_t fib_entry_get_adj_for_source(
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
+extern const int fib_entry_get_dpo_for_source (
+ fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ dpo_id_t *dpo);
+
+extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index);
+
+extern int fib_entry_cmp_for_sort(void *i1, void *i2);
+
+extern void fib_entry_cover_changed(fib_node_index_t fib_entry);
+extern void fib_entry_cover_updated(fib_node_index_t fib_entry);
+extern int fib_entry_recursive_loop_detect(fib_node_index_t entry_index,
+ fib_node_index_t **entry_indicies);
+
+extern void fib_entry_lock(fib_node_index_t fib_entry_index);
+extern void fib_entry_unlock(fib_node_index_t fib_entry_index);
+
+extern u32 fib_entry_child_add(fib_node_index_t fib_entry_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+extern void fib_entry_child_remove(fib_node_index_t fib_entry_index,
+ u32 sibling_index);
+extern u32 fib_entry_get_resolving_interface(fib_node_index_t fib_entry_index);
+extern u32 fib_entry_get_resolving_interface_for_source(
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern void fib_entry_encode(fib_node_index_t fib_entry_index,
+ fib_route_path_encode_t **api_rpaths);
+extern void fib_entry_get_prefix(fib_node_index_t fib_entry_index,
+ fib_prefix_t *pfx);
+extern u32 fib_entry_get_fib_index(fib_node_index_t fib_entry_index);
+extern void fib_entry_set_source_data(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const void *data);
+extern const void* fib_entry_get_source_data(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_entry_flag_t fib_entry_get_flags(fib_node_index_t fib_entry_index);
+extern fib_entry_flag_t fib_entry_get_flags_for_source(
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
+extern fib_source_t fib_entry_get_best_source(fib_node_index_t fib_entry_index);
+extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index);
+extern int fib_entry_is_resolved(fib_node_index_t fib_entry_index);
+extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index,
+ flow_hash_config_t hash_config);
+
+extern void fib_entry_module_init(void);
+
+/*
+ * unsafe... beware the raw pointer.
+ */
+extern fib_node_index_t fib_entry_get_index(const fib_entry_t * fib_entry);
+extern fib_entry_t * fib_entry_get(fib_node_index_t fib_entry_index);
+
+/*
+ * for testing purposes.
+ */
+extern u32 fib_entry_pool_size(void);
+
+#endif
diff --git a/src/vnet/fib/fib_entry_cover.c b/src/vnet/fib/fib_entry_cover.c
new file mode 100644
index 00000000..814df578
--- /dev/null
+++ b/src/vnet/fib/fib_entry_cover.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_node_list.h>
+
+u32
+fib_entry_cover_track (fib_entry_t* cover,
+ fib_node_index_t covered)
+{
+ fib_entry_delegate_t *fed;
+
+ FIB_ENTRY_DBG(cover, "cover-track %d", covered);
+
+ ASSERT(fib_entry_get_index(cover) != covered);
+
+ fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED);
+
+ if (NULL == fed)
+ {
+ fed = fib_entry_delegate_find_or_add(cover, FIB_ENTRY_DELEGATE_COVERED);
+ fed->fd_list = fib_node_list_create();
+ }
+
+ return (fib_node_list_push_front(fed->fd_list,
+ 0, FIB_NODE_TYPE_ENTRY,
+ covered));
+}
+
+void
+fib_entry_cover_untrack (fib_entry_t* cover,
+ u32 tracked_index)
+{
+ fib_entry_delegate_t *fed;
+
+ FIB_ENTRY_DBG(cover, "cover-untrack @ %d", tracked_index);
+
+ fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED);
+
+ if (NULL == fed)
+ return;
+
+ fib_node_list_remove(fed->fd_list, tracked_index);
+
+ if (0 == fib_node_list_get_size(fed->fd_list))
+ {
+ fib_node_list_destroy(&fed->fd_list);
+ fib_entry_delegate_remove(cover, FIB_ENTRY_DELEGATE_COVERED);
+ }
+}
+
+/**
+ * Internal struct to hold user supplied paraneters for the cover walk
+ */
+typedef struct fib_enty_cover_walk_ctx_t_ {
+ fib_entry_t *cover;
+ fib_entry_covered_walk_t walk;
+ void *ctx;
+} fib_enty_cover_walk_ctx_t;
+
+static int
+fib_entry_cover_walk_node_ptr (fib_node_ptr_t *depend,
+ void *args)
+{
+ fib_enty_cover_walk_ctx_t *ctx = args;
+
+ ctx->walk(ctx->cover, depend->fnp_index, ctx->ctx);
+
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_walk (fib_entry_t *cover,
+ fib_entry_covered_walk_t walk,
+ void *args)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED);
+
+ if (NULL == fed)
+ return;
+
+ fib_enty_cover_walk_ctx_t ctx = {
+ .cover = cover,
+ .walk = walk,
+ .ctx = args,
+ };
+
+ fib_node_list_walk(fed->fd_list,
+ fib_entry_cover_walk_node_ptr,
+ &ctx);
+}
+
+static int
+fib_entry_cover_change_one (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *args)
+{
+ fib_node_index_t new_cover;
+
+ /*
+ * The 3 entries involved here are:
+ * cover - the least specific. It will cover both the others
+ * new_cover - the enty just inserted below the cover
+ * covered - the entry that was tracking the cover.
+ *
+ * The checks below are to determine if new_cover is a cover for covered.
+ */
+ new_cover = pointer_to_uword(args);
+
+ if (FIB_NODE_INDEX_INVALID == new_cover)
+ {
+ /*
+ * nothing has been inserted, which implies the cover was removed.
+ * 'cover' is thus the new cover.
+ */
+ fib_entry_cover_changed(covered);
+ }
+ else if (new_cover != covered)
+ {
+ fib_prefix_t pfx_covered, pfx_new_cover;
+
+ fib_entry_get_prefix(covered, &pfx_covered);
+ fib_entry_get_prefix(new_cover, &pfx_new_cover);
+
+ if (fib_prefix_is_cover(&pfx_new_cover, &pfx_covered))
+ {
+ fib_entry_cover_changed(covered);
+ }
+ }
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_change_notify (fib_node_index_t cover_index,
+ fib_node_index_t covered)
+{
+ fib_entry_t *cover;
+
+ cover = fib_entry_get(cover_index);
+
+ fib_entry_cover_walk(cover,
+ fib_entry_cover_change_one,
+ uword_to_pointer(covered, void*));
+}
+
+static int
+fib_entry_cover_update_one (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *args)
+{
+ fib_entry_cover_updated(covered);
+
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_update_notify (fib_entry_t *fib_entry)
+{
+ fib_entry_cover_walk(fib_entry,
+ fib_entry_cover_update_one,
+ NULL);
+}
diff --git a/src/vnet/fib/fib_entry_cover.h b/src/vnet/fib/fib_entry_cover.h
new file mode 100644
index 00000000..500d5b33
--- /dev/null
+++ b/src/vnet/fib/fib_entry_cover.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_COVER_H__
+#define __FIB_ENTRY_COVER_H__
+
+#include "fib_entry.h"
+
+/**
+ * callback function used when walking the covered entries
+ */
+typedef int (*fib_entry_covered_walk_t)(fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx);
+
+extern u32 fib_entry_cover_track(fib_entry_t *cover,
+ fib_node_index_t covered);
+
+extern void fib_entry_cover_untrack(fib_entry_t *cover,
+ u32 tracked_index);
+
+extern void fib_entry_cover_walk(fib_entry_t *cover,
+ fib_entry_covered_walk_t walk,
+ void *ctx);
+
+extern void fib_entry_cover_change_notify(fib_node_index_t cover_index,
+ fib_node_index_t covered_index);
+extern void fib_entry_cover_update_notify(fib_entry_t *cover);
+
+#endif
diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c
new file mode 100644
index 00000000..41af14f2
--- /dev/null
+++ b/src/vnet/fib/fib_entry_delegate.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry_delegate.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_attached_export.h>
+
+static fib_entry_delegate_t *
+fib_entry_delegate_find_i (const fib_entry_t *fib_entry,
+ fib_entry_delegate_type_t type,
+ u32 *index)
+{
+ fib_entry_delegate_t *delegate;
+ int ii;
+
+ ii = 0;
+ vec_foreach(delegate, fib_entry->fe_delegates)
+ {
+ if (delegate->fd_type == type)
+ {
+ if (NULL != index)
+ *index = ii;
+
+ return (delegate);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+fib_entry_delegate_t *
+fib_entry_delegate_get (const fib_entry_t *fib_entry,
+ fib_entry_delegate_type_t type)
+{
+ return (fib_entry_delegate_find_i(fib_entry, type, NULL));
+}
+
+void
+fib_entry_delegate_remove (fib_entry_t *fib_entry,
+ fib_entry_delegate_type_t type)
+{
+ fib_entry_delegate_t *fed;
+ u32 index = ~0;
+
+ fed = fib_entry_delegate_find_i(fib_entry, type, &index);
+
+ ASSERT(NULL != fed);
+
+ vec_del1(fib_entry->fe_delegates, index);
+}
+
+static int
+fib_entry_delegate_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_entry_delegate_t *delegate1 = v1, *delegate2 = v2;
+
+ return (delegate1->fd_type - delegate2->fd_type);
+}
+
+static void
+fib_entry_delegate_init (fib_entry_t *fib_entry,
+ fib_entry_delegate_type_t type)
+
+{
+ fib_entry_delegate_t delegate = {
+ .fd_entry_index = fib_entry_get_index(fib_entry),
+ .fd_type = type,
+ };
+
+ vec_add1(fib_entry->fe_delegates, delegate);
+ vec_sort_with_function(fib_entry->fe_delegates,
+ fib_entry_delegate_cmp_for_sort);
+}
+
+fib_entry_delegate_t *
+fib_entry_delegate_find_or_add (fib_entry_t *fib_entry,
+ fib_entry_delegate_type_t fdt)
+{
+ fib_entry_delegate_t *delegate;
+
+ delegate = fib_entry_delegate_get(fib_entry, fdt);
+
+ if (NULL == delegate)
+ {
+ fib_entry_delegate_init(fib_entry, fdt);
+ }
+
+ return (fib_entry_delegate_get(fib_entry, fdt));
+}
+
+fib_entry_delegate_type_t
+fib_entry_chain_type_to_delegate_type (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6);
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS);
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS);
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ return (FIB_ENTRY_DELEGATE_CHAIN_ETHERNET);
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ break;
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ return (FIB_ENTRY_DELEGATE_CHAIN_NSH);
+ }
+ ASSERT(0);
+ return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4);
+}
+
+fib_forward_chain_type_t
+fib_entry_delegate_type_to_chain_type (fib_entry_delegate_type_t fdt)
+{
+ switch (fdt)
+ {
+ case FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS:
+ return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
+ case FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS:
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ case FIB_ENTRY_DELEGATE_CHAIN_ETHERNET:
+ return (FIB_FORW_CHAIN_TYPE_ETHERNET);
+ case FIB_ENTRY_DELEGATE_CHAIN_NSH:
+ return (FIB_FORW_CHAIN_TYPE_NSH);
+ case FIB_ENTRY_DELEGATE_COVERED:
+ case FIB_ENTRY_DELEGATE_ATTACHED_IMPORT:
+ case FIB_ENTRY_DELEGATE_ATTACHED_EXPORT:
+ case FIB_ENTRY_DELEGATE_BFD:
+ break;
+ }
+ ASSERT(0);
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+/**
+ * typedef for printing a delegate
+ */
+typedef u8 * (*fib_entry_delegate_format_t)(const fib_entry_delegate_t *fed,
+ u8 *s);
+
+/**
+ * Print a delegate that represents a forwarding chain
+ */
+static u8 *
+fib_entry_delegate_fmt_fwd_chain (const fib_entry_delegate_t *fed,
+ u8 *s)
+{
+ s = format(s, "%U-chain\n %U",
+ format_fib_forw_chain_type,
+ fib_entry_delegate_type_to_chain_type(fed->fd_type),
+ format_dpo_id, &fed->fd_dpo, 2);
+
+ return (s);
+}
+
+/**
+ * Print a delegate that represents cover tracking
+ */
+static u8 *
+fib_entry_delegate_fmt_covered (const fib_entry_delegate_t *fed,
+ u8 *s)
+{
+ s = format(s, "covered:[");
+ s = fib_node_children_format(fed->fd_list, s);
+ s = format(s, "]");
+
+ return (s);
+}
+
+/**
+ * Print a delegate that represents attached-import tracking
+ */
+static u8 *
+fib_entry_delegate_fmt_import (const fib_entry_delegate_t *fed,
+ u8 *s)
+{
+ s = format(s, "import:%U", fib_ae_import_format, fed->fd_index);
+
+ return (s);
+}
+
+/**
+ * Print a delegate that represents attached-export tracking
+ */
+static u8 *
+fib_entry_delegate_fmt_export (const fib_entry_delegate_t *fed,
+ u8 *s)
+{
+ s = format(s, "export:%U", fib_ae_export_format, fed->fd_index);
+
+ return (s);
+}
+
+/**
+ * Print a delegate that represents BFD tracking
+ */
+static u8 *
+fib_entry_delegate_fmt_bfd (const fib_entry_delegate_t *fed,
+ u8 *s)
+{
+ s = format(s, "BFD:%d", fed->fd_bfd_state);
+
+ return (s);
+}
+
+/**
+ * A delegate type to formatter map
+ */
+static fib_entry_delegate_format_t fed_formatters[] =
+{
+ [FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_CHAIN_ETHERNET] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_CHAIN_NSH] = fib_entry_delegate_fmt_fwd_chain,
+ [FIB_ENTRY_DELEGATE_COVERED] = fib_entry_delegate_fmt_covered,
+ [FIB_ENTRY_DELEGATE_ATTACHED_IMPORT] = fib_entry_delegate_fmt_import,
+ [FIB_ENTRY_DELEGATE_ATTACHED_EXPORT] = fib_entry_delegate_fmt_export,
+ [FIB_ENTRY_DELEGATE_BFD] = fib_entry_delegate_fmt_bfd,
+};
+
+u8 *
+format_fib_entry_deletegate (u8 * s, va_list * args)
+{
+ fib_entry_delegate_t *fed;
+
+ fed = va_arg (*args, fib_entry_delegate_t *);
+
+ return (fed_formatters[fed->fd_type](fed, s));
+}
diff --git a/src/vnet/fib/fib_entry_delegate.h b/src/vnet/fib/fib_entry_delegate.h
new file mode 100644
index 00000000..333d357c
--- /dev/null
+++ b/src/vnet/fib/fib_entry_delegate.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_DELEGATE_T__
+#define __FIB_ENTRY_DELEGATE_T__
+
+#include <vnet/fib/fib_node.h>
+
+/**
+ * Delegate types
+ */
+typedef enum fib_entry_delegate_type_t_ {
+ /**
+ * Forwarding chain types:
+ * for the vast majority of FIB entries only one chain is required - the
+ * one that forwards traffic matching the fib_entry_t's fib_prefix_t. For those
+ * fib_entry_t that are a resolution target for other fib_entry_t's they will also
+ * need the chain to provide forwarding for those children. We store these additional
+ * chains in delegates to save memory in the common case.
+ */
+ FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4 = FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6 = FIB_FORW_CHAIN_TYPE_UNICAST_IP6,
+ FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS = FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ FIB_ENTRY_DELEGATE_CHAIN_ETHERNET = FIB_FORW_CHAIN_TYPE_ETHERNET,
+ FIB_ENTRY_DELEGATE_CHAIN_NSH = FIB_FORW_CHAIN_TYPE_NSH,
+ /**
+ * Dependency list of covered entries.
+ * these are more specific entries that are interested in changes
+ * to their respective cover
+ */
+ FIB_ENTRY_DELEGATE_COVERED,
+ /**
+ * BFD session state
+ */
+ FIB_ENTRY_DELEGATE_BFD,
+ /**
+ * Attached import/export functionality
+ */
+ FIB_ENTRY_DELEGATE_ATTACHED_IMPORT,
+ FIB_ENTRY_DELEGATE_ATTACHED_EXPORT,
+} fib_entry_delegate_type_t;
+
+#define FOR_EACH_DELEGATE_CHAIN(_entry, _fdt, _fed, _body) \
+{ \
+ for (_fdt = FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4; \
+ _fdt <= FIB_ENTRY_DELEGATE_CHAIN_NSH; \
+ _fdt++) \
+ { \
+ _fed = fib_entry_delegate_get(_entry, _fdt); \
+ if (NULL != _fed) { \
+ _body; \
+ } \
+ } \
+}
+#define FOR_EACH_DELEGATE(_entry, _fdt, _fed, _body) \
+{ \
+ for (_fdt = FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4; \
+ _fdt <= FIB_ENTRY_DELEGATE_ATTACHED_EXPORT; \
+ _fdt++) \
+ { \
+ _fed = fib_entry_delegate_get(_entry, _fdt); \
+ if (NULL != _fed) { \
+ _body; \
+ } \
+ } \
+}
+
+/**
+ * Distillation of the BFD session states into a go/no-go for using
+ * the associated tracked FIB entry
+ */
+typedef enum fib_bfd_state_t_
+{
+ FIB_BFD_STATE_UP,
+ FIB_BFD_STATE_DOWN,
+} fib_bfd_state_t;
+
+/**
+ * A Delagate is a means to implmenet the Delagation design pattern; the extension of an
+ * objects functionality through the composition of, and delgation to, other objects.
+ * These 'other' objects are delegates. Delagates are thus attached to other FIB objects
+ * to extend their functionality.
+ */
+typedef struct fib_entry_delegate_t_
+{
+ /**
+ * The FIB entry object to which the delagate is attached
+ */
+ fib_node_index_t fd_entry_index;
+
+ /**
+ * The delagate type
+ */
+ fib_entry_delegate_type_t fd_type;
+
+ /**
+ * A union of data for the different delegate types
+ * These delegates are stored in a sparse vector on the entry, so they
+ * must all be of the same size. We could use indirection here for all types,
+ * i.e. store an index, that's ok for large delegates, like the attached export
+ * but for the chain delegates it's excessive
+ */
+ union
+ {
+ /**
+ * Valid for the forwarding chain delegates. The LB that is built.
+ */
+ dpo_id_t fd_dpo;
+
+ /**
+ * Valid for the attached import cases. An index of the importer/exporter
+ */
+ fib_node_index_t fd_index;
+
+ /**
+ * For the cover tracking. The node list;
+ */
+ fib_node_list_t fd_list;
+
+ /**
+ * BFD state
+ */
+ fib_bfd_state_t fd_bfd_state;
+ };
+} fib_entry_delegate_t;
+
+struct fib_entry_t_;
+
+extern void fib_entry_delegate_remove(struct fib_entry_t_ *fib_entry,
+ fib_entry_delegate_type_t type);
+
+extern fib_entry_delegate_t *fib_entry_delegate_find_or_add(struct fib_entry_t_ *fib_entry,
+ fib_entry_delegate_type_t fdt);
+extern fib_entry_delegate_t *fib_entry_delegate_get(const struct fib_entry_t_ *fib_entry,
+ fib_entry_delegate_type_t type);
+
+extern fib_forward_chain_type_t fib_entry_delegate_type_to_chain_type(
+ fib_entry_delegate_type_t type);
+
+extern fib_entry_delegate_type_t fib_entry_chain_type_to_delegate_type(
+ fib_forward_chain_type_t type);
+
+extern u8 *format_fib_entry_deletegate(u8 * s, va_list * args);
+
+#endif
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
new file mode 100644
index 00000000..173df74f
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src.c
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_path_ext.h>
+#include <vnet/fib/fib_urpf_list.h>
+
+/*
+ * per-source type vft
+ */
+static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX];
+
+void
+fib_entry_src_register (fib_source_t source,
+ const fib_entry_src_vft_t *vft)
+{
+ fib_entry_src_vft[source] = *vft;
+}
+
+static int
+fib_entry_src_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_entry_src_t *esrc1 = v1, *esrc2 = v2;
+
+ return (esrc1->fes_src - esrc2->fes_src);
+}
+
+void
+fib_entry_src_action_init (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_entry_src_t esrc = {
+ .fes_pl = FIB_NODE_INDEX_INVALID,
+ .fes_flags = FIB_ENTRY_SRC_FLAG_NONE,
+ .fes_src = source,
+ };
+
+ if (NULL != fib_entry_src_vft[source].fesv_init)
+ {
+ fib_entry_src_vft[source].fesv_init(&esrc);
+ }
+
+ vec_add1(fib_entry->fe_srcs, esrc);
+ vec_sort_with_function(fib_entry->fe_srcs,
+ fib_entry_src_cmp_for_sort);
+}
+
+static fib_entry_src_t *
+fib_entry_src_find (const fib_entry_t *fib_entry,
+ fib_source_t source,
+ u32 *index)
+
+{
+ fib_entry_src_t *esrc;
+ int ii;
+
+ ii = 0;
+ vec_foreach(esrc, fib_entry->fe_srcs)
+ {
+ if (esrc->fes_src == source)
+ {
+ if (NULL != index)
+ {
+ *index = ii;
+ }
+ return (esrc);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+int
+fib_entry_is_sourced (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (NULL != fib_entry_src_find(fib_entry, source, NULL));
+}
+
+static fib_entry_src_t *
+fib_entry_src_find_or_create (fib_entry_t *fib_entry,
+ fib_source_t source,
+ u32 *index)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL == esrc)
+ {
+ fib_entry_src_action_init(fib_entry, source);
+ }
+
+ return (fib_entry_src_find(fib_entry, source, NULL));
+}
+
+void
+fib_entry_src_action_deinit (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_entry_src_t *esrc;
+ u32 index = ~0;
+
+ esrc = fib_entry_src_find(fib_entry, source, &index);
+
+ ASSERT(NULL != esrc);
+
+ if (NULL != fib_entry_src_vft[source].fesv_deinit)
+ {
+ fib_entry_src_vft[source].fesv_deinit(esrc);
+ }
+
+ fib_path_ext_list_flush(&esrc->fes_path_exts);
+ vec_del1(fib_entry->fe_srcs, index);
+}
+
+fib_entry_src_cover_res_t
+fib_entry_src_action_cover_change (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ if (NULL != fib_entry_src_vft[source].fesv_cover_change)
+ {
+ return (fib_entry_src_vft[source].fesv_cover_change(
+ fib_entry_src_find(fib_entry, source, NULL),
+ fib_entry));
+ }
+
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ return (res);
+}
+
+fib_entry_src_cover_res_t
+fib_entry_src_action_cover_update (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ if (NULL != fib_entry_src_vft[source].fesv_cover_update)
+ {
+ return (fib_entry_src_vft[source].fesv_cover_update(
+ fib_entry_src_find(fib_entry, source, NULL),
+ fib_entry));
+ }
+
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ return (res);
+}
+
+typedef struct fib_entry_src_collect_forwarding_ctx_t_
+{
+ load_balance_path_t *next_hops;
+ const fib_entry_t *fib_entry;
+ const fib_entry_src_t *esrc;
+ fib_forward_chain_type_t fct;
+ int n_recursive_constrained;
+ u16 preference;
+} fib_entry_src_collect_forwarding_ctx_t;
+
+/**
+ * @brief Determine whether this FIB entry should use a load-balance MAP
+ * to support PIC edge fast convergence
+ */
+load_balance_flags_t
+fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx)
+{
+ /**
+ * We'll use a LB map if the path-list has multiple recursive paths.
+ * recursive paths implies BGP, and hence scale.
+ */
+ if (ctx->n_recursive_constrained > 1 &&
+ fib_path_list_is_popular(ctx->esrc->fes_pl))
+ {
+ return (LOAD_BALANCE_FLAG_USES_MAP);
+ }
+ return (LOAD_BALANCE_FLAG_NONE);
+}
+
+static int
+fib_entry_src_valid_out_label (mpls_label_t label)
+{
+ return ((MPLS_LABEL_IS_REAL(label) ||
+ MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL == label ||
+ MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL == label ||
+ MPLS_IETF_IMPLICIT_NULL_LABEL == label));
+}
+
+/**
+ * @brief Turn the chain type requested by the client into the one they
+ * really wanted
+ */
+fib_forward_chain_type_t
+fib_entry_chain_type_fixup (const fib_entry_t *entry,
+ fib_forward_chain_type_t fct)
+{
+ /*
+ * The EOS chain is a tricky since one cannot know the adjacency
+ * to link to without knowing what the packets payload protocol
+ * will be once the label is popped.
+ */
+ fib_forward_chain_type_t dfct;
+
+ if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct)
+ {
+ return (fct);
+ }
+
+ dfct = fib_entry_get_default_chain_type(entry);
+
+ if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
+ {
+ /*
+ * If the entry being asked is a eos-MPLS label entry,
+ * then use the payload-protocol field, that we stashed there
+ * for just this purpose
+ */
+ return (fib_forw_chain_type_from_dpo_proto(
+ entry->fe_prefix.fp_payload_proto));
+ }
+ /*
+ * else give them what this entry would be by default. i.e. if it's a v6
+ * entry, then the label its local labelled should be carrying v6 traffic.
+ * If it's a non-EOS label entry, then there are more labels and we want
+ * a non-eos chain.
+ */
+ return (dfct);
+}
+
+static void
+fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
+ fib_entry_src_collect_forwarding_ctx_t *ctx)
+{
+ load_balance_path_t *nh;
+
+ /*
+ * no extension => no out-going label for this path. that's OK
+ * in the case of an IP or EOS chain, but not for non-EOS
+ */
+ switch (ctx->fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ /*
+ * EOS traffic with no label to stack, we need the IP Adj
+ */
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ if (fib_path_is_exclusive(path_index) ||
+ fib_path_is_deag(path_index))
+ {
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &nh->path_dpo);
+ }
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ {
+ /*
+ * no label. we need a chain based on the payload. fixup.
+ */
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index,
+ fib_entry_chain_type_fixup(ctx->fib_entry,
+ ctx->fct),
+ &nh->path_dpo);
+ fib_path_stack_mpls_disp(path_index,
+ ctx->fib_entry->fe_prefix.fp_payload_proto,
+ &nh->path_dpo);
+
+ break;
+ }
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ ASSERT(0);
+ break;
+ }
+}
+
+static fib_path_list_walk_rc_t
+fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ fib_entry_src_collect_forwarding_ctx_t *ctx;
+ fib_path_ext_t *path_ext;
+
+ ctx = arg;
+
+ /*
+ * if the path is not resolved, don't include it.
+ */
+ if (!fib_path_is_resolved(path_index))
+ {
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ }
+
+ if (fib_path_is_recursive_constrained(path_index))
+ {
+ ctx->n_recursive_constrained += 1;
+ }
+ if (0xffff == ctx->preference)
+ {
+ /*
+ * not set a preference yet, so the first path we encounter
+ * sets the preference we are collecting.
+ */
+ ctx->preference = fib_path_get_preference(path_index);
+ }
+ else if (ctx->preference != fib_path_get_preference(path_index))
+ {
+ /*
+ * this path does not belong to the same preference as the
+ * previous paths encountered. we are done now.
+ */
+ return (FIB_PATH_LIST_WALK_STOP);
+ }
+
+ /*
+ * get the matching path-extension for the path being visited.
+ */
+ path_ext = fib_path_ext_list_find_by_path_index(&ctx->esrc->fes_path_exts,
+ path_index);
+
+ if (NULL != path_ext)
+ {
+ switch (path_ext->fpe_type)
+ {
+ case FIB_PATH_EXT_MPLS:
+ if (fib_entry_src_valid_out_label(path_ext->fpe_label_stack[0]))
+ {
+ /*
+ * found a matching extension. stack it to obtain the forwarding
+ * info for this path.
+ */
+ ctx->next_hops =
+ fib_path_ext_stack(path_ext,
+ ctx->fct,
+ fib_entry_chain_type_fixup(ctx->fib_entry,
+ ctx->fct),
+ ctx->next_hops);
+ }
+ else
+ {
+ fib_entry_src_get_path_forwarding(path_index, ctx);
+ }
+ break;
+ case FIB_PATH_EXT_ADJ:
+ if (FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER & path_ext->fpe_adj_flags)
+ {
+ fib_entry_src_get_path_forwarding(path_index, ctx);
+ }
+ /*
+ * else
+ * the path does not refine the cover, meaning that
+ * the adjacency doesdoes not match the sub-net on the link.
+ * So this path does not contribute forwarding.
+ */
+ break;
+ }
+ }
+ else
+ {
+ fib_entry_src_get_path_forwarding(path_index, ctx);
+ }
+
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+void
+fib_entry_src_mk_lb (fib_entry_t *fib_entry,
+ const fib_entry_src_t *esrc,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb)
+{
+ dpo_proto_t lb_proto;
+
+ /*
+ * If the entry has path extensions then we construct a load-balance
+ * by stacking the extensions on the forwarding chains of the paths.
+ * Otherwise we use the load-balance of the path-list
+ */
+ fib_entry_src_collect_forwarding_ctx_t ctx = {
+ .esrc = esrc,
+ .fib_entry = fib_entry,
+ .next_hops = NULL,
+ .n_recursive_constrained = 0,
+ .fct = fct,
+ .preference = 0xffff,
+ };
+
+ /*
+ * As an optimisation we allocate the vector of next-hops to be sized
+ * equal to the maximum nuber of paths we will need, which is also the
+ * most likely number we will need, since in most cases the paths are 'up'.
+ */
+ vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
+ vec_reset_length(ctx.next_hops);
+
+ lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
+
+ fib_path_list_walk(esrc->fes_pl,
+ fib_entry_src_collect_forwarding,
+ &ctx);
+
+ if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_EXCLUSIVE)
+ {
+ /*
+ * the client provided the DPO that the entry should link to.
+ * all entries must link to a LB, so if it is an LB already
+ * then we can use it.
+ */
+ if ((1 == vec_len(ctx.next_hops)) &&
+ (DPO_LOAD_BALANCE == ctx.next_hops[0].path_dpo.dpoi_type))
+ {
+ dpo_copy(dpo_lb, &ctx.next_hops[0].path_dpo);
+ dpo_reset(&ctx.next_hops[0].path_dpo);
+ return;
+ }
+ }
+
+ if (!dpo_id_is_valid(dpo_lb))
+ {
+ /*
+ * first time create
+ */
+ if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
+ {
+ dpo_set(dpo_lb,
+ DPO_REPLICATE,
+ lb_proto,
+ MPLS_IS_REPLICATE | replicate_create(0, lb_proto));
+ }
+ else
+ {
+ flow_hash_config_t fhc;
+ fib_protocol_t fp;
+
+ /*
+ * if the protocol for the LB we are building does not match that
+ * of the fib_entry (i.e. we are build the [n]EOS LB for an IPv[46]
+ * then the fib_index is not an index that relates to the table
+ * type we need. So get the default flow-hash config instead.
+ */
+ fp = dpo_proto_to_fib(lb_proto);
+
+ if (fib_entry->fe_prefix.fp_proto != fp)
+ {
+ fhc = fib_table_get_default_flow_hash_config(fp);
+ }
+ else
+ {
+ fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, fp);
+ }
+ dpo_set(dpo_lb,
+ DPO_LOAD_BALANCE,
+ lb_proto,
+ load_balance_create(0, lb_proto, fhc));
+ }
+ }
+
+ if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
+ {
+ /*
+ * MPLS multicast
+ */
+ replicate_multipath_update(dpo_lb, ctx.next_hops);
+ }
+ else
+ {
+ load_balance_multipath_update(dpo_lb,
+ ctx.next_hops,
+ fib_entry_calc_lb_flags(&ctx));
+ vec_free(ctx.next_hops);
+
+ /*
+ * if this entry is sourced by the uRPF-exempt source then we
+ * append the always present local0 interface (index 0) to the
+ * uRPF list so it is not empty. that way packets pass the loose check.
+ */
+ index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
+
+ if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
+ FIB_SOURCE_URPF_EXEMPT) ||
+ (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
+ (0 == fib_urpf_check_size(ui)))
+ {
+ /*
+ * The uRPF list we get from the path-list is shared by all
+ * other users of the list, but the uRPF exemption applies
+ * only to this prefix. So we need our own list.
+ */
+ ui = fib_urpf_list_alloc_and_lock();
+ fib_urpf_list_append(ui, 0);
+ fib_urpf_list_bake(ui);
+ load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+ fib_urpf_list_unlock(ui);
+ }
+ else
+ {
+ load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+ }
+ load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
+ fib_entry_get_flags_i(fib_entry));
+ }
+}
+
+void
+fib_entry_src_action_install (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ /*
+ * Install the forwarding chain for the given source into the forwarding
+ * tables
+ */
+ fib_forward_chain_type_t fct;
+ fib_entry_src_t *esrc;
+ int insert;
+
+ fct = fib_entry_get_default_chain_type(fib_entry);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ /*
+ * Every entry has its own load-balance object. All changes to the entry's
+ * forwarding result in an inplace modify of the load-balance. This means
+ * the load-balance object only needs to be added to the forwarding
+ * DB once, when it is created.
+ */
+ insert = !dpo_id_is_valid(&fib_entry->fe_lb);
+
+ fib_entry_src_mk_lb(fib_entry, esrc, fct, &fib_entry->fe_lb);
+
+ ASSERT(dpo_id_is_valid(&fib_entry->fe_lb));
+ FIB_ENTRY_DBG(fib_entry, "install: %d", fib_entry->fe_lb);
+
+ /*
+ * insert the adj into the data-plane forwarding trie
+ */
+ if (insert)
+ {
+ fib_table_fwding_dpo_update(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix,
+ &fib_entry->fe_lb);
+ }
+
+ /*
+ * if any of the other chain types are already created they will need
+ * updating too
+ */
+ fib_entry_delegate_type_t fdt;
+ fib_entry_delegate_t *fed;
+
+ FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed,
+ {
+ fib_entry_src_mk_lb(fib_entry, esrc,
+ fib_entry_delegate_type_to_chain_type(fdt),
+ &fed->fd_dpo);
+ });
+}
+
+void
+fib_entry_src_action_uninstall (fib_entry_t *fib_entry)
+{
+ /*
+ * uninstall the forwarding chain from the forwarding tables
+ */
+ FIB_ENTRY_DBG(fib_entry, "uninstall: %d",
+ fib_entry->fe_adj_index);
+
+ if (dpo_id_is_valid(&fib_entry->fe_lb))
+ {
+ fib_table_fwding_dpo_remove(
+ fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix,
+ &fib_entry->fe_lb);
+
+ dpo_reset(&fib_entry->fe_lb);
+ }
+}
+
+static void
+fib_entry_recursive_loop_detect_i (fib_node_index_t path_list_index)
+{
+ fib_node_index_t *entries = NULL;
+
+ fib_path_list_recursive_loop_detect(path_list_index, &entries);
+
+ vec_free(entries);
+}
+
+void
+fib_entry_src_action_activate (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ int houston_we_are_go_for_install;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(!(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE));
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED);
+
+ esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ACTIVE;
+
+ if (NULL != fib_entry_src_vft[source].fesv_activate)
+ {
+ houston_we_are_go_for_install =
+ fib_entry_src_vft[source].fesv_activate(esrc, fib_entry);
+ }
+ else
+ {
+ /*
+ * the source is not providing an activate function, we'll assume
+ * therefore it has no objection to installing the entry
+ */
+ houston_we_are_go_for_install = !0;
+ }
+
+ /*
+ * link to the path-list provided by the source, and go check
+ * if that forms any loops in the graph.
+ */
+ fib_entry->fe_parent = esrc->fes_pl;
+ fib_entry->fe_sibling =
+ fib_path_list_child_add(fib_entry->fe_parent,
+ FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry));
+
+ fib_entry_recursive_loop_detect_i(fib_entry->fe_parent);
+
+ FIB_ENTRY_DBG(fib_entry, "activate: %d",
+ fib_entry->fe_parent);
+
+ if (0 != houston_we_are_go_for_install)
+ {
+ fib_entry_src_action_install(fib_entry, source);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+}
+
+void
+fib_entry_src_action_deactivate (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_node_index_t path_list_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ if (NULL != fib_entry_src_vft[source].fesv_deactivate)
+ {
+ fib_entry_src_vft[source].fesv_deactivate(esrc, fib_entry);
+ }
+
+ esrc->fes_flags &= ~FIB_ENTRY_SRC_FLAG_ACTIVE;
+
+ FIB_ENTRY_DBG(fib_entry, "deactivate: %d", fib_entry->fe_parent);
+
+ /*
+ * un-link from an old path-list. Check for any loops this will clear
+ */
+ path_list_index = fib_entry->fe_parent;
+ fib_entry->fe_parent = FIB_NODE_INDEX_INVALID;
+
+ fib_entry_recursive_loop_detect_i(path_list_index);
+
+ /*
+ * this will unlock the path-list, so it may be invalid thereafter.
+ */
+ fib_path_list_child_remove(path_list_index, fib_entry->fe_sibling);
+ fib_entry->fe_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_action_fwd_update (const fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_entry_src_t *esrc;
+
+ vec_foreach(esrc, fib_entry->fe_srcs)
+ {
+ if (NULL != fib_entry_src_vft[esrc->fes_src].fesv_fwd_update)
+ {
+ fib_entry_src_vft[esrc->fes_src].fesv_fwd_update(esrc,
+ fib_entry,
+ source);
+ }
+ }
+}
+
+void
+fib_entry_src_action_reactivate (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_node_index_t path_list_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ FIB_ENTRY_DBG(fib_entry, "reactivate: %d to %d",
+ fib_entry->fe_parent,
+ esrc->fes_pl);
+
+ if (fib_entry->fe_parent != esrc->fes_pl)
+ {
+ /*
+ * un-link from an old path-list. Check for any loops this will clear
+ */
+ path_list_index = fib_entry->fe_parent;
+ fib_entry->fe_parent = FIB_NODE_INDEX_INVALID;
+
+ /*
+ * temporary lock so it doesn't get deleted when this entry is no
+ * longer a child.
+ */
+ fib_path_list_lock(path_list_index);
+
+ /*
+ * this entry is no longer a child. after unlinking check if any loops
+ * were broken
+ */
+ fib_path_list_child_remove(path_list_index,
+ fib_entry->fe_sibling);
+
+ fib_entry_recursive_loop_detect_i(path_list_index);
+
+ /*
+ * link to the path-list provided by the source, and go check
+ * if that forms any loops in the graph.
+ */
+ fib_entry->fe_parent = esrc->fes_pl;
+ fib_entry->fe_sibling =
+ fib_path_list_child_add(fib_entry->fe_parent,
+ FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry));
+
+ fib_entry_recursive_loop_detect_i(fib_entry->fe_parent);
+ fib_path_list_unlock(path_list_index);
+ }
+ fib_entry_src_action_install(fib_entry, source);
+ fib_entry_src_action_fwd_update(fib_entry, source);
+}
+
+void
+fib_entry_src_action_installed (const fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != fib_entry_src_vft[source].fesv_installed)
+ {
+ fib_entry_src_vft[source].fesv_installed(esrc,
+ fib_entry);
+ }
+
+ fib_entry_src_action_fwd_update(fib_entry, source);
+}
+
+/*
+ * fib_entry_src_action_add
+ *
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the original entry
+ */
+fib_entry_t *
+fib_entry_src_action_add (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find_or_create(fib_entry, source, NULL);
+
+ esrc->fes_ref_count++;
+
+ if (1 != esrc->fes_ref_count)
+ {
+ /*
+ * we only want to add the source on the 0->1 transition
+ */
+ return (fib_entry);
+ }
+
+ esrc->fes_entry_flags = flags;
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ if (NULL != fib_entry_src_vft[source].fesv_add)
+ {
+ fib_entry_src_vft[source].fesv_add(esrc,
+ fib_entry,
+ flags,
+ fib_entry_get_dpo_proto(fib_entry),
+ dpo);
+ }
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED;
+
+ fib_path_list_lock(esrc->fes_pl);
+
+ /*
+ * the source owns a lock on the entry
+ */
+ fib_entry_lock(fib_entry_get_index(fib_entry));
+
+ return (fib_entry);
+}
+
+/*
+ * fib_entry_src_action_update
+ *
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the original entry
+ */
+fib_entry_t *
+fib_entry_src_action_update (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index, old_path_list_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find_or_create(fib_entry, source, NULL);
+
+ if (NULL == esrc)
+ return (fib_entry_src_action_add(fib_entry, source, flags, dpo));
+
+ old_path_list_index = esrc->fes_pl;
+ esrc->fes_entry_flags = flags;
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ if (NULL != fib_entry_src_vft[source].fesv_add)
+ {
+ fib_entry_src_vft[source].fesv_add(esrc,
+ fib_entry,
+ flags,
+ fib_entry_get_dpo_proto(fib_entry),
+ dpo);
+ }
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED;
+
+ fib_path_list_lock(esrc->fes_pl);
+ fib_path_list_unlock(old_path_list_index);
+
+ return (fib_entry);
+}
+
+
+fib_entry_src_flag_t
+fib_entry_src_action_remove (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_node_index_t old_path_list;
+ fib_entry_src_flag_t sflags;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL == esrc)
+ return (FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ esrc->fes_ref_count--;
+ sflags = esrc->fes_flags;
+
+ if (0 != esrc->fes_ref_count)
+ {
+ /*
+ * only remove the source on the 1->0 transisition
+ */
+ return (sflags);
+ }
+
+ if (esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE)
+ {
+ fib_entry_src_action_deactivate(fib_entry, source);
+ }
+
+ old_path_list = esrc->fes_pl;
+
+ if (NULL != fib_entry_src_vft[source].fesv_remove)
+ {
+ fib_entry_src_vft[source].fesv_remove(esrc);
+ }
+
+ fib_path_list_unlock(old_path_list);
+ fib_entry_unlock(fib_entry_get_index(fib_entry));
+
+ sflags &= ~FIB_ENTRY_SRC_FLAG_ADDED;
+ fib_entry_src_action_deinit(fib_entry, source);
+
+ return (sflags);
+}
+
+/*
+ * fib_route_attached_cross_table
+ *
+ * Return true the the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static inline int
+fib_route_attached_cross_table (const fib_entry_t *fib_entry,
+ const fib_route_path_t *rpath)
+{
+ /*
+ * - All zeros next-hop
+ * - a valid interface
+ * - entry's fib index not equeal to interface's index
+ */
+ if (ip46_address_is_zero(&rpath->frp_addr) &&
+ (~0 != rpath->frp_sw_if_index) &&
+ (fib_entry->fe_fib_index !=
+ fib_table_get_index_for_sw_if_index(fib_entry_get_proto(fib_entry),
+ rpath->frp_sw_if_index)))
+ {
+ return (!0);
+ }
+ return (0);
+}
+
+/*
+ * fib_route_attached_cross_table
+ *
+ * Return true the the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static inline int
+fib_path_is_attached (const fib_route_path_t *rpath)
+{
+ /*
+ * - All zeros next-hop
+ * - a valid interface
+ */
+ if (ip46_address_is_zero(&rpath->frp_addr) &&
+ (~0 != rpath->frp_sw_if_index))
+ {
+ return (!0);
+ }
+ else if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED)
+ {
+ return (!0);
+ }
+ return (0);
+}
+
+fib_path_list_flags_t
+fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags)
+{
+ fib_path_list_flags_t plf = FIB_PATH_LIST_FLAG_NONE;
+
+ if (eflags & FIB_ENTRY_FLAG_DROP)
+ {
+ plf |= FIB_PATH_LIST_FLAG_DROP;
+ }
+ if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
+ {
+ plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE;
+ }
+ if (eflags & FIB_ENTRY_FLAG_LOCAL)
+ {
+ plf |= FIB_PATH_LIST_FLAG_LOCAL;
+ }
+
+ return (plf);
+}
+
+static void
+fib_entry_flags_update (const fib_entry_t *fib_entry,
+ const fib_route_path_t *rpath,
+ fib_path_list_flags_t *pl_flags,
+ fib_entry_src_t *esrc)
+{
+ if ((esrc->fes_src == FIB_SOURCE_API) ||
+ (esrc->fes_src == FIB_SOURCE_CLI))
+ {
+ if (fib_path_is_attached(rpath))
+ {
+ esrc->fes_entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ }
+ else
+ {
+ esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_ATTACHED;
+ }
+ }
+ if (fib_route_attached_cross_table(fib_entry, rpath))
+ {
+ esrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT;
+ }
+ else
+ {
+ esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT;
+ }
+}
+
+/*
+ * fib_entry_src_action_add
+ *
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the entry
+ */
+fib_entry_t*
+fib_entry_src_action_path_add (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t old_path_list, fib_entry_index;
+ fib_path_list_flags_t pl_flags;
+ fib_entry_src_t *esrc;
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ if (NULL == esrc)
+ {
+ fib_entry =
+ fib_entry_src_action_add(fib_entry,
+ source,
+ flags,
+ drop_dpo_get(
+ fib_entry_get_dpo_proto(fib_entry)));
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ }
+
+ /*
+ * we are no doubt modifying a path-list. If the path-list
+ * is shared, and hence not modifiable, then the index returned
+ * will be for a different path-list. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_add);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry));
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+
+ fib_entry_src_vft[source].fesv_path_add(esrc, fib_entry, pl_flags, rpath);
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_path_list_lock(esrc->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ return (fib_entry);
+}
+
+/*
+ * fib_entry_src_action_swap
+ *
+ * The source is providing new paths to replace the old ones.
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the entry
+ */
+fib_entry_t*
+fib_entry_src_action_path_swap (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t old_path_list, fib_entry_index;
+ fib_path_list_flags_t pl_flags;
+ const fib_route_path_t *rpath;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ if (NULL == esrc)
+ {
+ fib_entry = fib_entry_src_action_add(fib_entry,
+ source,
+ flags,
+ drop_dpo_get(
+ fib_entry_get_dpo_proto(fib_entry)));
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ }
+
+ /*
+ * swapping paths may create a new path-list (or may use an existing shared)
+ * but we are certainly getting a different one. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_swap);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(flags);
+
+ vec_foreach(rpath, rpaths)
+ {
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+ }
+
+ fib_entry_src_vft[source].fesv_path_swap(esrc,
+ fib_entry,
+ pl_flags,
+ rpaths);
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_path_list_lock(esrc->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ return (fib_entry);
+}
+
+fib_entry_src_flag_t
+fib_entry_src_action_path_remove (fib_entry_t *fib_entry,
+ fib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_path_list_flags_t pl_flags;
+ fib_node_index_t old_path_list;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(NULL != esrc);
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED);
+
+ /*
+ * we no doubt modifying a path-list. If the path-list
+ * is shared, and hence not modifiable, then the index returned
+ * will be for a different path-list. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_remove);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry));
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+
+ fib_entry_src_vft[source].fesv_path_remove(esrc, pl_flags, rpath);
+
+ /*
+ * lock the new path-list, unlock the old if it had one
+ */
+ fib_path_list_unlock(old_path_list);
+
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) {
+ fib_path_list_lock(esrc->fes_pl);
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ /*
+ * no more paths left from this source
+ */
+ fib_entry_src_action_remove(fib_entry, source);
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+}
+
+u8*
+fib_entry_src_format (fib_entry_t *fib_entry,
+ fib_source_t source,
+ u8* s)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != fib_entry_src_vft[source].fesv_format)
+ {
+ return (fib_entry_src_vft[source].fesv_format(esrc, s));
+ }
+ return (s);
+}
+
+adj_index_t
+fib_entry_get_adj_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return (ADJ_INDEX_INVALID);
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl)
+ {
+ return (fib_path_list_get_adj(
+ esrc->fes_pl,
+ fib_entry_get_default_chain_type(fib_entry)));
+ }
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+const int
+fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ dpo_id_t *dpo)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return (0);
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl)
+ {
+ fib_path_list_contribute_forwarding(
+ esrc->fes_pl,
+ fib_entry_get_default_chain_type(fib_entry),
+ dpo);
+
+ return (dpo_id_is_valid(dpo));
+ }
+ }
+ return (0);
+}
+
+u32
+fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl)
+ {
+ return (fib_path_list_get_resolving_interface(esrc->fes_pl));
+ }
+ }
+ return (~0);
+}
+
+fib_entry_flag_t
+fib_entry_get_flags_for_source (fib_node_index_t entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ return (esrc->fes_entry_flags);
+ }
+
+ return (FIB_ENTRY_FLAG_NONE);
+}
+
+fib_entry_flag_t
+fib_entry_get_flags_i (const fib_entry_t *fib_entry)
+{
+ fib_entry_flag_t flags;
+
+ /*
+ * the vector of sources is deliberately arranged in priority order
+ */
+ if (0 == vec_len(fib_entry->fe_srcs))
+ {
+ flags = FIB_ENTRY_FLAG_NONE;
+ }
+ else
+ {
+ fib_entry_src_t *esrc;
+
+ esrc = vec_elt_at_index(fib_entry->fe_srcs, 0);
+ flags = esrc->fes_entry_flags;
+ }
+
+ return (flags);
+}
+
+void
+fib_entry_set_source_data (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const void *data)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc &&
+ NULL != fib_entry_src_vft[source].fesv_set_data)
+ {
+ fib_entry_src_vft[source].fesv_set_data(esrc, fib_entry, data);
+ }
+}
+
+const void*
+fib_entry_get_source_data (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc &&
+ NULL != fib_entry_src_vft[source].fesv_get_data)
+ {
+ return (fib_entry_src_vft[source].fesv_get_data(esrc, fib_entry));
+ }
+ return (NULL);
+}
+
+void
+fib_entry_src_module_init (void)
+{
+ fib_entry_src_rr_register();
+ fib_entry_src_interface_register();
+ fib_entry_src_default_route_register();
+ fib_entry_src_special_register();
+ fib_entry_src_api_register();
+ fib_entry_src_adj_register();
+ fib_entry_src_mpls_register();
+ fib_entry_src_lisp_register();
+}
diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h
new file mode 100644
index 00000000..35c43936
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src.h
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_SRC_H__
+#define __FIB_ENTRY_SRC_H__
+
+#include "fib_entry.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+
+/**
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_ENTRY_DBG(_e, _fmt, _args...) \
+{ \
+ u8*__tmp = NULL; \
+ __tmp = format(__tmp, "e:[%d:%U", \
+ fib_entry_get_index(_e), \
+ format_ip46_address, \
+ &_e->fe_prefix.fp_addr, \
+ IP46_TYPE_ANY); \
+ __tmp = format(__tmp, "/%d]:", \
+ _e->fe_prefix.fp_len); \
+ __tmp = format(__tmp, _fmt, ##_args); \
+ clib_warning("%s", __tmp); \
+ vec_free(__tmp); \
+}
+#else
+#define FIB_ENTRY_DBG(_e, _fmt, _args...)
+#endif
+
+/**
+ * Source initialisation Function
+ */
+typedef void (*fib_entry_src_init_t)(fib_entry_src_t *src);
+
+/**
+ * Source deinitialisation Function
+ */
+typedef void (*fib_entry_src_deinit_t)(fib_entry_src_t *src);
+
+/**
+ * Source activation. Called when the source is the new best source on the entry.
+ * Return non-zero if the entry can now install, 0 otherwise
+ */
+typedef int (*fib_entry_src_activate_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Source Add.
+ * Called when the source is added to the entry
+ */
+typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo);
+
+/**
+ * Source Remove.
+ */
+typedef void (*fib_entry_src_remove_t)(fib_entry_src_t *src);
+
+/**
+ * Result from a cover update/change
+ */
+typedef struct fib_entry_src_cover_res_t_ {
+ u16 install;
+ fib_node_bw_reason_flag_t bw_reason;
+} fib_entry_src_cover_res_t;
+
+/**
+ * Cover changed. the source should re-evaluate its cover.
+ */
+typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_change_t)(
+ fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Cover updated. The cover the source has, has updated (i.e. its forwarding)
+ * the source may need to re-evaluate.
+ */
+typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_update_t)(
+ fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Forwarding updated. Notification that the forwarding information for the
+ * entry has been updated. This notification is sent to all sources, not just
+ * the active best.
+ */
+typedef void (*fib_entry_src_fwd_update_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_source_t best_source);
+
+/**
+ * Installed. Notification that the source is now installed as
+ * the entry's forwarding source.
+ */
+typedef void (*fib_entry_src_installed_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * format.
+ */
+typedef u8* (*fib_entry_src_format_t)(fib_entry_src_t *src,
+ u8* s);
+
+/**
+ * Source path add
+ * the source is adding a new path
+ */
+typedef void (*fib_entry_src_path_add_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Source path remove
+ * the source is remoinvg a path
+ */
+typedef void (*fib_entry_src_path_remove_t)(fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Source path replace/swap
+ * the source is providing a new set of paths
+ */
+typedef void (*fib_entry_src_path_swap_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Set source specific opaque data
+ */
+typedef void (*fib_entry_src_set_data_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ const void *data);
+
+/**
+ * Get source specific opaque data
+ */
+typedef const void* (*fib_entry_src_get_data_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Virtual function table each FIB entry source will register
+ */
+typedef struct fib_entry_src_vft_t_ {
+ fib_entry_src_init_t fesv_init;
+ fib_entry_src_deinit_t fesv_deinit;
+ fib_entry_src_activate_t fesv_activate;
+ fib_entry_src_deactivate_t fesv_deactivate;
+ fib_entry_src_add_t fesv_add;
+ fib_entry_src_remove_t fesv_remove;
+ fib_entry_src_path_swap_t fesv_path_swap;
+ fib_entry_src_path_add_t fesv_path_add;
+ fib_entry_src_path_remove_t fesv_path_remove;
+ fib_entry_src_cover_change_t fesv_cover_change;
+ fib_entry_src_cover_update_t fesv_cover_update;
+ fib_entry_src_format_t fesv_format;
+ fib_entry_src_installed_t fesv_installed;
+ fib_entry_src_fwd_update_t fesv_fwd_update;
+ fib_entry_src_get_data_t fesv_get_data;
+ fib_entry_src_set_data_t fesv_set_data;
+} fib_entry_src_vft_t;
+
+#define FOR_EACH_SRC_ADDED(_entry, _src, _source, action) \
+{ \
+ vec_foreach(_src, _entry->fe_srcs) \
+ { \
+ if (_src->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED) { \
+ _source = _src->fes_src; \
+ do { \
+ action; \
+ } while(0); \
+ } \
+ } \
+}
+
+extern u8* fib_entry_src_format(fib_entry_t *entry,
+ fib_source_t source,
+ u8* s);
+
+extern void fib_entry_src_register(fib_source_t source,
+ const fib_entry_src_vft_t *vft);
+
+extern void fib_entry_src_action_init(fib_entry_t *entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_deinit(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_entry_src_cover_res_t fib_entry_src_action_cover_change(
+ fib_entry_t *entry,
+ fib_source_t source);
+
+extern fib_entry_src_cover_res_t fib_entry_src_action_cover_update(
+ fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_activate(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_deactivate(fib_entry_t *fib_entry,
+ fib_source_t source);
+extern void fib_entry_src_action_reactivate(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_entry_t* fib_entry_src_action_add(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+extern fib_entry_t* fib_entry_src_action_update(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_entry_src_flag_t fib_entry_src_action_remove(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_install(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_uninstall(fib_entry_t *fib_entry);
+
+extern fib_entry_t* fib_entry_src_action_path_add(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *path);
+
+extern fib_entry_t* fib_entry_src_action_path_swap(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *path);
+
+extern fib_entry_src_flag_t fib_entry_src_action_path_remove(fib_entry_t *fib_entry,
+ fib_source_t source,
+ const fib_route_path_t *path);
+
+extern void fib_entry_src_action_installed(const fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_forward_chain_type_t fib_entry_get_default_chain_type(
+ const fib_entry_t *fib_entry);
+extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry);
+extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags(
+ fib_entry_flag_t eflags);
+
+extern fib_forward_chain_type_t fib_entry_chain_type_fixup(const fib_entry_t *entry,
+ fib_forward_chain_type_t fct);
+
+extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry,
+ const fib_entry_src_t *esrc,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb);
+
+extern fib_protocol_t fib_entry_get_proto(const fib_entry_t * fib_entry);
+extern dpo_proto_t fib_entry_get_dpo_proto(const fib_entry_t * fib_entry);
+
+/*
+ * Per-source registration. declared here so we save a separate .h file for each
+ */
+extern void fib_entry_src_default_register(void);
+extern void fib_entry_src_rr_register(void);
+extern void fib_entry_src_interface_register(void);
+extern void fib_entry_src_default_route_register(void);
+extern void fib_entry_src_special_register(void);
+extern void fib_entry_src_api_register(void);
+extern void fib_entry_src_adj_register(void);
+extern void fib_entry_src_mpls_register(void);
+extern void fib_entry_src_lisp_register(void);
+
+extern void fib_entry_src_module_init(void);
+
+#endif
diff --git a/src/vnet/fib/fib_entry_src_adj.c b/src/vnet/fib/fib_entry_src_adj.c
new file mode 100644
index 00000000..9ea2b17e
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_adj.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+#include "fib_table.h"
+#include "fib_entry_cover.h"
+#include "fib_attached_export.h"
+#include "fib_path_ext.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_adj_init (fib_entry_src_t *src)
+{
+ src->adj.fesa_cover = FIB_NODE_INDEX_INVALID;
+ src->adj.fesa_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_adj_path_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ const fib_route_path_t *rpath;
+
+ if (FIB_NODE_INDEX_INVALID == src->fes_pl)
+ {
+ src->fes_pl = fib_path_list_create(pl_flags, paths);
+ }
+ else
+ {
+ src->fes_pl = fib_path_list_copy_and_path_add(src->fes_pl,
+ pl_flags,
+ paths);
+ }
+
+ /*
+ * resolve the existing extensions
+ */
+ fib_path_ext_list_resolve(&src->fes_path_exts, src->fes_pl);
+
+ /*
+ * and new extensions
+ */
+ vec_foreach(rpath, paths)
+ {
+ fib_path_ext_list_insert(&src->fes_path_exts,
+ src->fes_pl,
+ FIB_PATH_EXT_ADJ,
+ rpath);
+ }
+}
+
+static void
+fib_entry_src_adj_path_remove (fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *rpaths)
+{
+ const fib_route_path_t *rpath;
+
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ src->fes_pl = fib_path_list_copy_and_path_remove(src->fes_pl,
+ pl_flags,
+ rpaths);
+ }
+
+ /*
+ * remove the path-extension for the path
+ */
+ vec_foreach(rpath, rpaths)
+ {
+ fib_path_ext_list_remove(&src->fes_path_exts, FIB_PATH_EXT_ADJ, rpath);
+ };
+ /*
+ * resolve the remaining extensions
+ */
+ fib_path_ext_list_resolve(&src->fes_path_exts, src->fes_pl);
+}
+
+static void
+fib_entry_src_adj_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ const fib_route_path_t *rpath;
+
+ /*
+ * flush all the old extensions before we create a brand new path-list
+ */
+ fib_path_ext_list_flush(&src->fes_path_exts);
+
+ src->fes_pl = fib_path_list_create(pl_flags, paths);
+
+ /*
+ * and new extensions
+ */
+ vec_foreach(rpath, paths)
+ {
+ fib_path_ext_list_push_back(&src->fes_path_exts,
+ src->fes_pl,
+ FIB_PATH_EXT_ADJ,
+ rpath);
+ }
+}
+
+static void
+fib_entry_src_adj_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+/*
+ * Add a path-extension indicating whether this path is resolved,
+ * because it passed the refinement check
+ */
+static void
+fib_enty_src_adj_update_path_ext (fib_entry_src_t *src,
+ fib_node_index_t path_index,
+ fib_path_ext_adj_flags_t flags)
+{
+ fib_path_ext_t *path_ext;
+
+ path_ext = fib_path_ext_list_find_by_path_index(&src->fes_path_exts,
+ path_index);
+
+ if (NULL != path_ext)
+ {
+ path_ext->fpe_adj_flags = flags;
+ }
+ else
+ {
+ ASSERT(!"no path extension");
+ }
+}
+
+typedef struct fib_entry_src_path_list_walk_cxt_t_
+{
+ fib_entry_src_t *src;
+ u32 cover_itf;
+ fib_path_ext_adj_flags_t flags;
+} fib_entry_src_path_list_walk_cxt_t;
+
+static fib_path_list_walk_rc_t
+fib_entry_src_adj_path_list_walk (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ fib_entry_src_path_list_walk_cxt_t *ctx;
+ u32 adj_itf;
+
+ ctx = arg;
+ adj_itf = fib_path_get_resolving_interface(path_index);
+
+ if (ctx->cover_itf == adj_itf)
+ {
+ fib_enty_src_adj_update_path_ext(ctx->src, path_index,
+ FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER);
+ ctx->flags |= FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER;
+ }
+ else
+ {
+ /*
+ * if the interface the adj is on is unnumbered to the
+ * cover's, then allow that too.
+ */
+ vnet_sw_interface_t *swif;
+
+ swif = vnet_get_sw_interface (vnet_get_main(), adj_itf);
+
+ if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
+ ctx->cover_itf == swif->unnumbered_sw_if_index)
+ {
+ fib_enty_src_adj_update_path_ext(ctx->src, path_index,
+ FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER);
+ ctx->flags |= FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER;
+ }
+ else
+ {
+ fib_enty_src_adj_update_path_ext(ctx->src, path_index,
+ FIB_PATH_EXT_ADJ_FLAG_NONE);
+ }
+ }
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+/*
+ * Source activate.
+ * Called when the source is the new longer best source on the entry
+ */
+static int
+fib_entry_src_adj_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * find the covering prefix. become a dependent thereof.
+ * there should always be a cover, though it may be the default route.
+ */
+ src->adj.fesa_cover = fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ ASSERT(fib_entry_get_index(fib_entry) != src->adj.fesa_cover);
+
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ ASSERT(cover != fib_entry);
+
+ src->adj.fesa_sibling =
+ fib_entry_cover_track(cover,
+ fib_entry_get_index(fib_entry));
+
+ /*
+ * if the cover is attached on the same interface as this adj source then
+ * install the FIB entry via the adj. otherwise install a drop.
+ * This prevents ARP/ND entries that on interface X that do not belong
+ * on X's subnet from being added to the FIB. To do so would allow
+ * nefarious gratuitous ARP requests from attracting traffic to the sender.
+ *
+ * and yes, I really do mean attached and not connected.
+ * this abomination;
+ * ip route add 10.0.0.0/24 Eth0
+ * is attached. and we want adj-fibs to install on Eth0.
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
+ {
+ fib_entry_src_path_list_walk_cxt_t ctx = {
+ .cover_itf = fib_entry_get_resolving_interface(src->adj.fesa_cover),
+ .flags = FIB_PATH_EXT_ADJ_FLAG_NONE,
+ .src = src,
+ };
+
+ fib_path_list_walk(src->fes_pl,
+ fib_entry_src_adj_path_list_walk,
+ &ctx);
+
+ /*
+ * active the entry is one of the paths refines the cover.
+ */
+ return (FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER & ctx.flags);
+ }
+ return (0);
+}
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_adj_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ fib_entry_cover_untrack(cover, src->adj.fesa_sibling);
+
+ /*
+ * tell the cover this entry no longer needs exporting
+ */
+ fib_attached_export_covered_removed(cover, fib_entry_get_index(fib_entry));
+
+ src->adj.fesa_cover = FIB_NODE_INDEX_INVALID;
+}
+
+static u8*
+fib_entry_src_adj_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->adj.fesa_cover));
+}
+
+static void
+fib_entry_src_adj_installed (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * The adj source now rules! poke our cover to get exported
+ */
+ fib_entry_t *cover;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ fib_attached_export_covered_added(cover,
+ fib_entry_get_index(fib_entry));
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_adj_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ fib_entry_src_adj_deactivate(src, fib_entry);
+
+ res.install = fib_entry_src_adj_activate(src, fib_entry);
+
+ if (res.install) {
+ /*
+ * ADJ fib can install
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+ }
+
+ return (res);
+}
+
+/*
+ * fib_entry_src_adj_cover_update
+ */
+static fib_entry_src_cover_res_t
+fib_entry_src_adj_cover_update (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * the cover has updated, i.e. its forwarding or flags
+ * have changed. don't decativate/activate here, since this
+ * prefix is updated during the covers walk.
+ */
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_entry_t *cover;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ res.install = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover));
+
+ return (res);
+}
+
+const static fib_entry_src_vft_t adj_src_vft = {
+ .fesv_init = fib_entry_src_adj_init,
+ .fesv_path_swap = fib_entry_src_adj_path_swap,
+ .fesv_path_add = fib_entry_src_adj_path_add,
+ .fesv_path_remove = fib_entry_src_adj_path_remove,
+ .fesv_remove = fib_entry_src_adj_remove,
+ .fesv_activate = fib_entry_src_adj_activate,
+ .fesv_deactivate = fib_entry_src_adj_deactivate,
+ .fesv_format = fib_entry_src_adj_format,
+ .fesv_installed = fib_entry_src_adj_installed,
+ .fesv_cover_change = fib_entry_src_adj_cover_change,
+ .fesv_cover_update = fib_entry_src_adj_cover_update,
+};
+
+void
+fib_entry_src_adj_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_ADJ, &adj_src_vft);
+}
diff --git a/src/vnet/fib/fib_entry_src_api.c b/src/vnet/fib/fib_entry_src_api.c
new file mode 100644
index 00000000..1cdcfbde
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_api.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_path_ext.h>
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_api_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_api_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_api_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *rpaths)
+{
+ const fib_route_path_t *rpath;
+
+ fib_path_ext_list_flush(&src->fes_path_exts);
+
+ src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ rpaths);
+
+ vec_foreach(rpath, rpaths)
+ {
+ if (NULL != rpath->frp_label_stack)
+ {
+ fib_path_ext_list_push_back(&src->fes_path_exts,
+ src->fes_pl,
+ FIB_PATH_EXT_MPLS,
+ rpath);
+ }
+ }
+}
+
+static void
+fib_entry_src_api_path_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *rpaths)
+{
+ const fib_route_path_t *rpath;
+
+ if (FIB_NODE_INDEX_INVALID == src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), rpaths);
+ }
+ else
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_add(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ rpaths);
+ }
+
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ fib_path_ext_list_resolve(&src->fes_path_exts, src->fes_pl);
+
+ /*
+ * if the path has a label we need to add a path extension
+ */
+ vec_foreach(rpath, rpaths)
+ {
+ if (NULL != rpath->frp_label_stack)
+ {
+ fib_path_ext_list_insert(&src->fes_path_exts,
+ src->fes_pl,
+ FIB_PATH_EXT_MPLS,
+ rpath);
+ }
+ }
+}
+
+static void
+fib_entry_src_api_path_remove (fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *rpaths)
+{
+ const fib_route_path_t *rpath;
+
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_remove(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ rpaths);
+ /*
+ * remove the path-extension for the path
+ */
+ vec_foreach(rpath, rpaths)
+ {
+ fib_path_ext_list_remove(&src->fes_path_exts, FIB_PATH_EXT_MPLS, rpath);
+ };
+ /*
+ * resolve the remaining extensions
+ */
+ fib_path_ext_list_resolve(&src->fes_path_exts, src->fes_pl);
+ }
+}
+
+static void
+fib_entry_src_api_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ if (FIB_ENTRY_FLAG_NONE != flags)
+ {
+ src->fes_pl = fib_path_list_create_special(
+ proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+ }
+}
+
+static void
+fib_entry_src_api_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+const static fib_entry_src_vft_t api_src_vft = {
+ .fesv_init = fib_entry_src_api_init,
+ .fesv_deinit = fib_entry_src_api_deinit,
+ .fesv_add = fib_entry_src_api_add,
+ .fesv_remove = fib_entry_src_api_remove,
+ .fesv_path_add = fib_entry_src_api_path_add,
+ .fesv_path_swap = fib_entry_src_api_path_swap,
+ .fesv_path_remove = fib_entry_src_api_path_remove,
+};
+
+void
+fib_entry_src_api_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_PLUGIN_HI, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_API, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_CLI, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_DHCP, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_IP6_ND_PROXY, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_SR, &api_src_vft);
+}
diff --git a/src/vnet/fib/fib_entry_src_default.c b/src/vnet/fib/fib_entry_src_default.c
new file mode 100644
index 00000000..9846cf56
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_default.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_default_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_default_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_cover_change (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_default_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_default_path_add (fib_entry_src_t *src,
+ fib_protocol_t proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight)
+{
+}
+
+static void
+fib_entry_src_default_path_remove (fib_entry_src_t *src,
+ fib_protocol_t proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight)
+{
+}
+
+
+/*
+ * Source activate.
+ * Called when the source is teh new longer best source on the entry
+ */
+static void
+fib_entry_src_default_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+}
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_default_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+}
+
+static void
+fib_entry_src_default_add (fib_entry_src_t *src,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto)
+{
+}
+
+static void
+fib_entry_src_default_remove (fib_entry_src_t *src)
+{
+}
+
+const static fib_entry_src_vft_t default_src_vft = {
+ .fesv_init = fib_entry_src_default_init,
+ .fesv_deinit = fib_entry_src_default_deinit,
+ .fesv_add = fib_entry_src_default_add,
+ .fesv_remove = fib_entry_src_default_remove,
+ .fesv_path_add = fib_entry_src_default_path_add,
+ .fesv_path_remove = fib_entry_src_default_path_remove,
+ .fesv_activate = fib_entry_src_default_activate,
+ .fesv_deactivate = fib_entry_src_default_deactivate,
+};
+
+void
+fib_entry_src_default_register (void)
+{
+ fib_source_t source;
+
+ FOR_EACH_FIB_SOURCE(source) {
+ fib_entry_src_register(source, &default_src_vft);
+ }
+}
diff --git a/src/vnet/fib/fib_entry_src_default_route.c b/src/vnet/fib/fib_entry_src_default_route.c
new file mode 100644
index 00000000..431abb66
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_default_route.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_default_route_init (fib_entry_src_t *src)
+{
+ src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE;
+}
+
+static void
+fib_entry_src_default_route_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_default_route_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl = fib_path_list_create_special(proto,
+ FIB_PATH_LIST_FLAG_DROP,
+ dpo);
+}
+
+const static fib_entry_src_vft_t interface_src_vft = {
+ .fesv_init = fib_entry_src_default_route_init,
+ .fesv_add = fib_entry_src_default_route_add,
+ .fesv_remove = fib_entry_src_default_route_remove,
+};
+
+void
+fib_entry_src_default_route_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_DEFAULT_ROUTE, &interface_src_vft);
+}
+
+
diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c
new file mode 100644
index 00000000..6c087f34
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_interface.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+#include "fib_table.h"
+#include "fib_entry_cover.h"
+#include "fib_attached_export.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_interface_init (fib_entry_src_t *src)
+{
+ src->interface.fesi_cover = FIB_NODE_INDEX_INVALID;
+ src->interface.fesi_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_interface_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl = fib_path_list_create_special(
+ proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+}
+
+static void
+fib_entry_src_interface_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_interface_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ ip_adjacency_t *adj;
+
+ src->fes_pl = fib_path_list_create(pl_flags, paths);
+
+ /*
+ * this is a hack to get the entry's prefix into the glean adjacnecy
+ * so that it is available for fast retreival in the switch path.
+ */
+ if (!(FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags))
+ {
+ adj = adj_get(fib_path_list_get_adj(
+ src->fes_pl,
+ fib_entry_get_default_chain_type(entry)));
+
+ if (IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index)
+ {
+ /*
+ * the connected prefix will link to a glean on a non-p2p
+ * interface.
+ */
+ adj->sub_type.glean.receive_addr = entry->fe_prefix.fp_addr;
+ }
+ }
+}
+
+/*
+ * Source activate.
+ * Called when the source is teh new longer best source on the entry
+ */
+static int
+fib_entry_src_interface_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)
+ {
+ /*
+ * Track the covering attached/connected cover. This is so that
+ * during an attached export of the cover, this local prefix is
+ * also exported
+ */
+ src->interface.fesi_cover =
+ fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->interface.fesi_cover);
+
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ src->interface.fesi_sibling =
+ fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
+ }
+
+ return (!0);
+}
+
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_interface_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover)
+ {
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ fib_entry_cover_untrack(cover, src->interface.fesi_sibling);
+
+ src->interface.fesi_cover = FIB_NODE_INDEX_INVALID;
+ }
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_interface_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ if (FIB_NODE_INDEX_INVALID == src->interface.fesi_cover)
+ {
+ /*
+ * not tracking the cover. surprised we got poked?
+ */
+ return (res);
+ }
+
+ /*
+ * this function is called when this entry's cover has a more specific
+ * entry inserted benaeth it. That does not necessarily mean that this
+ * entry is covered by the new prefix. check that
+ */
+ if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix))
+ {
+ fib_entry_src_interface_deactivate(src, fib_entry);
+ fib_entry_src_interface_activate(src, fib_entry);
+ }
+ return (res);
+}
+
+static void
+fib_entry_src_interface_installed (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * The interface source now rules! poke our cover to get exported
+ */
+ fib_entry_t *cover;
+
+ if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover)
+ {
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ fib_attached_export_covered_added(cover,
+ fib_entry_get_index(fib_entry));
+ }
+}
+
+static u8*
+fib_entry_src_interface_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->interface.fesi_cover));
+}
+
+const static fib_entry_src_vft_t interface_src_vft = {
+ .fesv_init = fib_entry_src_interface_init,
+ .fesv_add = fib_entry_src_interface_add,
+ .fesv_remove = fib_entry_src_interface_remove,
+ .fesv_path_swap = fib_entry_src_interface_path_swap,
+ .fesv_activate = fib_entry_src_interface_activate,
+ .fesv_deactivate = fib_entry_src_interface_deactivate,
+ .fesv_format = fib_entry_src_interface_format,
+ .fesv_installed = fib_entry_src_interface_installed,
+ .fesv_cover_change = fib_entry_src_interface_cover_change,
+ /*
+ * not concerned about updates to the cover. the cover will
+ * decide to export or not
+ */
+};
+
+void
+fib_entry_src_interface_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_INTERFACE, &interface_src_vft);
+}
diff --git a/src/vnet/fib/fib_entry_src_lisp.c b/src/vnet/fib/fib_entry_src_lisp.c
new file mode 100644
index 00000000..e72dce63
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_lisp.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_lisp_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_lisp_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_lisp_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+}
+
+static void
+fib_entry_src_lisp_path_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID == src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths);
+ }
+ else
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_add(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_lisp_path_remove (fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_remove(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_lisp_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ if (FIB_ENTRY_FLAG_NONE != flags)
+ {
+ src->fes_pl = fib_path_list_create_special(
+ proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+ }
+}
+
+static void
+fib_entry_src_lisp_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_lisp_set_data (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ const void *data)
+{
+ src->lisp.fesl_fib_index = *(u32*)data;
+}
+
+static const void*
+fib_entry_src_lisp_get_data (fib_entry_src_t *src,
+ const fib_entry_t *entry)
+{
+ return (&(src->lisp.fesl_fib_index));
+}
+
+const static fib_entry_src_vft_t api_src_vft = {
+ .fesv_init = fib_entry_src_lisp_init,
+ .fesv_deinit = fib_entry_src_lisp_deinit,
+ .fesv_add = fib_entry_src_lisp_add,
+ .fesv_remove = fib_entry_src_lisp_remove,
+ .fesv_path_add = fib_entry_src_lisp_path_add,
+ .fesv_path_swap = fib_entry_src_lisp_path_swap,
+ .fesv_path_remove = fib_entry_src_lisp_path_remove,
+ .fesv_set_data = fib_entry_src_lisp_set_data,
+ .fesv_get_data = fib_entry_src_lisp_get_data,
+};
+
+void
+fib_entry_src_lisp_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_LISP, &api_src_vft);
+}
diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c
new file mode 100644
index 00000000..6fdd5c0a
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_mpls.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/mpls_fib.h>
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_mpls_init (fib_entry_src_t *src)
+{
+ mpls_eos_bit_t eos;
+
+ src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE;
+ src->mpls.fesm_label = MPLS_LABEL_INVALID;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ src->mpls.fesm_lfes[eos] = FIB_NODE_INDEX_INVALID;
+ }
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_mpls_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_mpls_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+ src->mpls.fesm_label = MPLS_LABEL_INVALID;
+}
+
+static void
+fib_entry_src_mpls_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl =
+ fib_path_list_create_special(proto,
+ FIB_PATH_LIST_FLAG_DROP,
+ drop_dpo_get(proto));
+}
+
+static void
+fib_entry_src_mpls_set_data (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ const void *data)
+{
+ fib_protocol_t payload_proto;
+ fib_node_index_t fei;
+ mpls_label_t label;
+ mpls_eos_bit_t eos;
+
+ /*
+ * post MPLS table alloc and the possible rea-alloc of fib entrys
+ * the entry pointer will no longer be valid. so save its index
+ */
+ payload_proto = entry->fe_prefix.fp_proto;
+ fei = fib_entry_get_index(entry);
+ label = *(mpls_label_t*)data;
+
+ if (MPLS_LABEL_INVALID == label)
+ {
+ /*
+ * removing the local label
+ */
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ fib_table_entry_delete_index(src->mpls.fesm_lfes[eos],
+ FIB_SOURCE_SPECIAL);
+ }
+ fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_MPLS);
+ src->mpls.fesm_label = label;
+ }
+ else
+ {
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = label,
+ };
+ fib_node_index_t fib_index;
+ dpo_id_t dpo = DPO_INVALID;
+
+ /*
+ * adding a new local label. make sure the MPLS fib exists.
+ */
+ if (MPLS_LABEL_INVALID == src->mpls.fesm_label)
+ {
+ fib_index =
+ fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_SOURCE_MPLS);
+ }
+ else
+ {
+ fib_index = mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID);
+
+ /*
+ * if this is a change in label, reomve the old one first
+ */
+ if (src->mpls.fesm_label != label)
+ {
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ ASSERT(FIB_NODE_INDEX_INVALID != src->mpls.fesm_lfes[eos]);
+ fib_table_entry_delete_index(src->mpls.fesm_lfes[eos],
+ FIB_SOURCE_SPECIAL);
+ }
+ }
+ }
+
+ src->mpls.fesm_label = label;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_eos = eos;
+ prefix.fp_payload_proto = fib_proto_to_dpo(payload_proto);
+
+ fib_entry_contribute_forwarding(fei,
+ (eos ?
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS :
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS),
+ &dpo);
+ src->mpls.fesm_lfes[eos] =
+ fib_table_entry_special_dpo_add(fib_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ }
+}
+
+static const void *
+fib_entry_src_mpls_get_data (fib_entry_src_t *src,
+ const fib_entry_t *entry)
+{
+ return (&(src->mpls.fesm_label));
+}
+
+static u8*
+fib_entry_src_mpls_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "MPLS local-label:%d", src->mpls.fesm_label));
+}
+
+const static fib_entry_src_vft_t mpls_src_vft = {
+ .fesv_init = fib_entry_src_mpls_init,
+ .fesv_deinit = fib_entry_src_mpls_deinit,
+ .fesv_add = fib_entry_src_mpls_add,
+ .fesv_remove = fib_entry_src_mpls_remove,
+ .fesv_format = fib_entry_src_mpls_format,
+ .fesv_set_data = fib_entry_src_mpls_set_data,
+ .fesv_get_data = fib_entry_src_mpls_get_data,
+ /*
+ * .fesv_fwd_update = fib_entry_src_mpls_fwd_update,
+ * When the forwarding for the IP entry is updated, any MPLS chains
+ * it has created are also updated. Since the MPLS entry will have already
+ * installed that chain/load-balance there is no need to update the netry
+ * FIXME: later: propagate any walk to the children of the MPLS entry. for SR
+ */
+};
+
+void
+fib_entry_src_mpls_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_MPLS, &mpls_src_vft);
+}
+
+
diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c
new file mode 100644
index 00000000..1153f3f1
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_rr.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include "fib_entry_src.h"
+#include "fib_entry_cover.h"
+#include "fib_entry.h"
+#include "fib_table.h"
+
+/*
+ * fib_entry_src_rr_resolve_via_connected
+ *
+ * Resolve via a connected cover.
+ */
+static void
+fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ const fib_entry_t *cover)
+{
+ const fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto),
+ .frp_addr = fib_entry->fe_prefix.fp_addr,
+ .frp_sw_if_index = fib_entry_get_resolving_interface(
+ fib_entry_get_index(cover)),
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ };
+ fib_route_path_t *paths = NULL;
+ vec_add1(paths, path);
+
+ /*
+ * since the cover is connected, the address this entry corresponds
+ * to is a peer (ARP-able for) on the interface to which the cover is
+ * connected. The fact we resolve via the cover, just means this RR
+ * source is the first SRC to use said peer. The ARP source will be along
+ * shortly to over-rule this RR source.
+ */
+ src->fes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NONE, paths);
+ src->fes_entry_flags = fib_entry_get_flags(fib_entry_get_index(cover));
+
+ vec_free(paths);
+}
+
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_rr_init (fib_entry_src_t *src)
+{
+ src->rr.fesr_cover = FIB_NODE_INDEX_INVALID;
+ src->rr.fesr_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+
+/*
+ * use the path-list of the cover, unless it would form a loop.
+ * that is unless the cover is via this entry.
+ * If a loop were to form it would be a 1 level loop (i.e. X via X),
+ * and there would be 2 locks on the path-list; one since its used
+ * by the cover, and 1 from here. The first lock will go when the
+ * cover is removed, the second, and last, when the covered walk
+ * occurs during the cover's removel - this is not a place where
+ * we can handle last lock gone.
+ * In short, don't let the loop form. The usual rules of 'we must
+ * let it form so we know when it breaks' don't apply here, since
+ * the loop will break when the cover changes, and this function
+ * will be called again when that happens.
+ */
+static void
+fib_entry_src_rr_use_covers_pl (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ const fib_entry_t *cover)
+{
+ fib_node_index_t *entries = NULL;
+ dpo_proto_t proto;
+
+ proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
+ vec_add1(entries, fib_entry_get_index(fib_entry));
+
+ if (fib_path_list_recursive_loop_detect(cover->fe_parent,
+ &entries))
+ {
+ src->fes_pl = fib_path_list_create_special(proto,
+ FIB_PATH_LIST_FLAG_DROP,
+ drop_dpo_get(proto));
+ }
+ else
+ {
+ src->fes_pl = cover->fe_parent;
+ }
+ vec_free(entries);
+}
+
+/*
+ * Source activation. Called when the source is the new best source on the entry
+ */
+static int
+fib_entry_src_rr_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * find the covering prefix. become a dependent thereof.
+ * for IP there should always be a cover, though it may be the default route.
+ * For MPLS there is never a cover.
+ */
+ if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto)
+ {
+ src->fes_pl = fib_path_list_create_special(DPO_PROTO_MPLS,
+ FIB_PATH_LIST_FLAG_DROP,
+ NULL);
+ fib_path_list_lock(src->fes_pl);
+ return (!0);
+ }
+
+ src->rr.fesr_cover = fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover);
+
+ cover = fib_entry_get(src->rr.fesr_cover);
+
+ src->rr.fesr_sibling =
+ fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
+
+ /*
+ * if the cover is attached then install an attached-host path
+ * (like an adj-fib). Otherwise inherit the forwarding from the cover
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
+ {
+ fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover);
+ }
+ else
+ {
+ fib_entry_src_rr_use_covers_pl(src, fib_entry, cover);
+ }
+ fib_path_list_lock(src->fes_pl);
+
+ /*
+ * return go for install
+ */
+ return (!0);
+}
+
+/**
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_rr_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ if (FIB_NODE_INDEX_INVALID != src->rr.fesr_cover)
+ {
+ cover = fib_entry_get(src->rr.fesr_cover);
+ fib_entry_cover_untrack(cover, src->rr.fesr_sibling);
+ src->rr.fesr_cover = FIB_NODE_INDEX_INVALID;
+ }
+
+ fib_path_list_unlock(src->fes_pl);
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+ src->fes_entry_flags = FIB_ENTRY_FLAG_NONE;
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_rr_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover)
+ {
+ /*
+ * the source may be added, but it is not active
+ * if it is not tracking the cover.
+ */
+ return (res);
+ }
+
+ /*
+ * this function is called when this entry's cover has a more specific
+ * entry inserted benaeth it. That does not necessarily mean that this
+ * entry is covered by the new prefix. check that
+ */
+ if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix))
+ {
+ fib_entry_src_rr_deactivate(src, fib_entry);
+ fib_entry_src_rr_activate(src, fib_entry);
+
+ /*
+ * dependent children need to re-resolve to the new forwarding info
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+ }
+ return (res);
+}
+
+/*
+ * fib_entry_src_rr_cover_update
+ *
+ * This entry's cover has updated its forwarding info. This entry
+ * will need to re-inheret.
+ */
+static fib_entry_src_cover_res_t
+fib_entry_src_rr_cover_update (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_node_index_t old_path_list;
+ fib_entry_t *cover;
+
+ if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover)
+ {
+ /*
+ * the source may be added, but it is not active
+ * if it is not tracking the cover.
+ */
+ return (res);
+ }
+
+ cover = fib_entry_get(src->rr.fesr_cover);
+ old_path_list = src->fes_pl;
+
+ /*
+ * if the ocver is attached then install an attached-host path
+ * (like an adj-fib). Otherwise inherit the forwarding from the cover
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
+ {
+ fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover);
+ }
+ else
+ {
+ fib_entry_src_rr_use_covers_pl(src, fib_entry, cover);
+ }
+ fib_path_list_lock(src->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ /*
+ * dependent children need to re-resolve to the new forwarding info
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ return (res);
+}
+
+static u8*
+fib_entry_src_rr_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->rr.fesr_cover));
+}
+
+const static fib_entry_src_vft_t rr_src_vft = {
+ .fesv_init = fib_entry_src_rr_init,
+ .fesv_activate = fib_entry_src_rr_activate,
+ .fesv_deactivate = fib_entry_src_rr_deactivate,
+ .fesv_cover_change = fib_entry_src_rr_cover_change,
+ .fesv_cover_update = fib_entry_src_rr_cover_update,
+ .fesv_format = fib_entry_src_rr_format,
+};
+
+void
+fib_entry_src_rr_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_RR, &rr_src_vft);
+ fib_entry_src_register(FIB_SOURCE_URPF_EXEMPT, &rr_src_vft);
+}
diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c
new file mode 100644
index 00000000..e979e18f
--- /dev/null
+++ b/src/vnet/fib/fib_entry_src_special.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_special_init (fib_entry_src_t *src)
+{
+ src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE;
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_special_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_special_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_special_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ dpo_proto_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl =
+ fib_path_list_create_special(proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+}
+
+const static fib_entry_src_vft_t special_src_vft = {
+ .fesv_init = fib_entry_src_special_init,
+ .fesv_deinit = fib_entry_src_special_deinit,
+ .fesv_add = fib_entry_src_special_add,
+ .fesv_remove = fib_entry_src_special_remove,
+};
+
+void
+fib_entry_src_special_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_SPECIAL, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_MAP, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft);
+}
diff --git a/src/vnet/fib/fib_internal.h b/src/vnet/fib/fib_internal.h
new file mode 100644
index 00000000..8abc0e07
--- /dev/null
+++ b/src/vnet/fib/fib_internal.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_INTERNAL_H__
+#define __FIB_INTERNAL_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Big train switch; FIB debugs on or off
+ */
+#undef FIB_DEBUG
+
+extern void fib_prefix_from_mpls_label(mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_prefix_t *prf);
+
+extern int fib_route_path_cmp(const fib_route_path_t *rpath1,
+ const fib_route_path_t *rpath2);
+
+/**
+ * @brief
+ * Add or update an entry in the FIB's forwarding table.
+ * This is called from the fib_entry code. It is not meant to be used
+ * by the client/source.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add/update
+ *
+ * @param dpo
+ * The data-path object to use for forwarding
+ */
+extern void fib_table_fwding_dpo_update(u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo);
+/**
+ * @brief
+ * remove an entry in the FIB's forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add/update
+ *
+ * @param dpo
+ * The data-path object to use for forwarding
+ */
+extern void fib_table_fwding_dpo_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo);
+
+
+#endif
diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c
new file mode 100644
index 00000000..db3e22bb
--- /dev/null
+++ b/src/vnet/fib/fib_node.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_node_list.h>
+
+/*
+ * The per-type vector of virtual function tables
+ */
+static fib_node_vft_t *fn_vfts;
+
+/**
+ * The last registered new type
+ */
+static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST;
+
+/*
+ * the node type names
+ */
+static const char *fn_type_names[] = FIB_NODE_TYPES;
+
+const char*
+fib_node_type_get_name (fib_node_type_t type)
+{
+ if (type < FIB_NODE_TYPE_LAST)
+ return (fn_type_names[type]);
+ else
+ {
+ if (NULL != fn_vfts[type].fnv_format)
+ {
+ return ("fixme");
+ }
+ else
+ {
+ return ("unknown");
+ }
+ }
+}
+
+/**
+ * fib_node_register_type
+ *
+ * Register the function table for a given type
+ */
+void
+fib_node_register_type (fib_node_type_t type,
+ const fib_node_vft_t *vft)
+{
+ /*
+ * assert that one only registration is made per-node type
+ */
+ if (vec_len(fn_vfts) > type)
+ ASSERT(NULL == fn_vfts[type].fnv_get);
+
+ /*
+ * Assert that we are getting each of the required functions
+ */
+ ASSERT(NULL != vft->fnv_get);
+ ASSERT(NULL != vft->fnv_last_lock);
+
+ vec_validate(fn_vfts, type);
+ fn_vfts[type] = *vft;
+}
+
+fib_node_type_t
+fib_node_register_new_type (const fib_node_vft_t *vft)
+{
+ fib_node_type_t new_type;
+
+ new_type = ++last_new_type;
+
+ fib_node_register_type(new_type, vft);
+
+ return (new_type);
+}
+
+static u8*
+fib_node_format (fib_node_ptr_t *fnp, u8*s)
+{
+ return (format(s, "{%s:%d}", fn_type_names[fnp->fnp_type], fnp->fnp_index));
+}
+
+u32
+fib_node_child_add (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_t *parent;
+
+ parent = fn_vfts[parent_type].fnv_get(parent_index);
+
+ /*
+ * return the index of the sibling in the child list
+ */
+ fib_node_lock(parent);
+
+ if (FIB_NODE_INDEX_INVALID == parent->fn_children)
+ {
+ parent->fn_children = fib_node_list_create();
+ }
+
+ return (fib_node_list_push_front(parent->fn_children,
+ 0, type,
+ index));
+}
+
+void
+fib_node_child_remove (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_index_t sibling_index)
+{
+ fib_node_t *parent;
+
+ parent = fn_vfts[parent_type].fnv_get(parent_index);
+
+ fib_node_list_remove(parent->fn_children, sibling_index);
+
+ if (0 == fib_node_list_get_size(parent->fn_children))
+ {
+ fib_node_list_destroy(&parent->fn_children);
+ }
+
+ fib_node_unlock(parent);
+}
+
+u32
+fib_node_get_n_children (fib_node_type_t parent_type,
+ fib_node_index_t parent_index)
+{
+ fib_node_t *parent;
+
+ parent = fn_vfts[parent_type].fnv_get(parent_index);
+
+ return (fib_node_list_get_size(parent->fn_children));
+}
+
+
+fib_node_back_walk_rc_t
+fib_node_back_walk_one (fib_node_ptr_t *ptr,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_t *node;
+
+ node = fn_vfts[ptr->fnp_type].fnv_get(ptr->fnp_index);
+
+ return (fn_vfts[ptr->fnp_type].fnv_back_walk(node, ctx));
+}
+
+static int
+fib_node_ptr_format_one_child (fib_node_ptr_t *ptr,
+ void *arg)
+{
+ u8 **s = (u8**) arg;
+
+ *s = fib_node_format(ptr, *s);
+
+ return (1);
+}
+
+u8*
+fib_node_children_format (fib_node_list_t list,
+ u8 *s)
+{
+ fib_node_list_walk(list, fib_node_ptr_format_one_child, (void*)&s);
+
+ return (s);
+}
+
+void
+fib_node_init (fib_node_t *node,
+ fib_node_type_t type)
+{
+#if CLIB_DEBUG > 0
+ /**
+ * The node's type. make sure we are dynamic/down casting correctly
+ */
+ node->fn_type = type;
+#endif
+ node->fn_locks = 0;
+ node->fn_vft = &fn_vfts[type];
+ node->fn_children = FIB_NODE_INDEX_INVALID;
+}
+
+void
+fib_node_deinit (fib_node_t *node)
+{
+ fib_node_list_destroy(&node->fn_children);
+}
+
+void
+fib_node_lock (fib_node_t *node)
+{
+ node->fn_locks++;
+}
+
+void
+fib_node_unlock (fib_node_t *node)
+{
+ node->fn_locks--;
+
+ if (0 == node->fn_locks)
+ {
+ node->fn_vft->fnv_last_lock(node);
+ }
+}
+
+void
+fib_show_memory_usage (const char *name,
+ u32 in_use_elts,
+ u32 allocd_elts,
+ size_t size_elt)
+{
+ vlib_cli_output (vlib_get_main(), "%=30s %=5d %=8d/%=9d %d/%d ",
+ name, size_elt,
+ in_use_elts, allocd_elts,
+ in_use_elts*size_elt, allocd_elts*size_elt);
+}
+
+static clib_error_t *
+fib_memory_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_node_vft_t *vft;
+
+ vlib_cli_output (vm, "FIB memory");
+ vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s totals",
+ "Name","Size", "in-use", "allocated");
+
+ vec_foreach(vft, fn_vfts)
+ {
+ if (NULL != vft->fnv_mem_show)
+ vft->fnv_mem_show();
+ }
+
+ fib_node_list_memory_show();
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+/*?
+ * The '<em>sh fib memory </em>' command displays the memory usage for each
+ * FIB object type.
+ *
+ * @cliexpar
+ * @cliexstart{show fib memory}
+ * FIB memory
+ * Name Size in-use /allocated totals
+ * Entry 120 11 / 11 1320/1320
+ * Entry Source 32 11 / 11 352/352
+ * Entry Path-Extensions 44 0 / 0 0/0
+ * Path-list 40 11 / 11 440/440
+ * Path 88 11 / 11 968/968
+ * Node-list elements 20 11 / 11 220/220
+ * Node-list heads 8 13 / 13 104/104
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_fib_memory, static) = {
+ .path = "show fib memory",
+ .function = fib_memory_show,
+ .short_help = "show fib memory",
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h
new file mode 100644
index 00000000..ec517e15
--- /dev/null
+++ b/src/vnet/fib/fib_node.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_NODE_H__
+#define __FIB_NODE_H__
+
+#include <vnet/fib/fib_types.h>
+
+/**
+ * The types of nodes in a FIB graph
+ */
+typedef enum fib_node_type_t_ {
+ /**
+ * Marker. New types after this one.
+ */
+ FIB_NODE_TYPE_FIRST = 0,
+ /**
+ * See the respective fib_*.h files for descriptions of these objects.
+ */
+ FIB_NODE_TYPE_WALK,
+ FIB_NODE_TYPE_ENTRY,
+ FIB_NODE_TYPE_MFIB_ENTRY,
+ FIB_NODE_TYPE_PATH_LIST,
+ FIB_NODE_TYPE_PATH,
+ FIB_NODE_TYPE_ADJ,
+ FIB_NODE_TYPE_MPLS_ENTRY,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY,
+ FIB_NODE_TYPE_LISP_ADJ,
+ FIB_NODE_TYPE_GRE_TUNNEL,
+ FIB_NODE_TYPE_VXLAN_TUNNEL,
+ FIB_NODE_TYPE_MAP_E,
+ FIB_NODE_TYPE_VXLAN_GPE_TUNNEL,
+ /**
+ * Marker. New types before this one. leave the test last.
+ */
+ FIB_NODE_TYPE_TEST,
+ FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST,
+} fib_node_type_t;
+
+#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1)
+
+#define FIB_NODE_TYPES { \
+ [FIB_NODE_TYPE_ENTRY] = "entry", \
+ [FIB_NODE_TYPE_MFIB_ENTRY] = "mfib-entry", \
+ [FIB_NODE_TYPE_WALK] = "walk", \
+ [FIB_NODE_TYPE_PATH_LIST] = "path-list", \
+ [FIB_NODE_TYPE_PATH] = "path", \
+ [FIB_NODE_TYPE_MPLS_ENTRY] = "mpls-entry", \
+ [FIB_NODE_TYPE_MPLS_TUNNEL] = "mpls-tunnel", \
+ [FIB_NODE_TYPE_ADJ] = "adj", \
+ [FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY] = "lisp-gpe-fwd-entry", \
+ [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \
+ [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \
+ [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \
+ [FIB_NODE_TYPE_MAP_E] = "map-e", \
+ [FIB_NODE_TYPE_VXLAN_GPE_TUNNEL] = "vxlan-gpe-tunnel", \
+}
+
+/**
+ * Reasons for backwalking the FIB object graph
+ */
+typedef enum fib_node_back_walk_reason_t_ {
+ /**
+ * Marker. Add new ones after.
+ */
+ FIB_NODE_BW_REASON_FIRST = 0,
+ /**
+ * Walk to re-resolve the child.
+ * Used when the parent is no longer a valid resolution target
+ */
+ FIB_NODE_BW_REASON_RESOLVE = FIB_NODE_BW_REASON_FIRST,
+ /**
+ * Walk to re-evaluate the forwarding contributed by the parent.
+ * Used when a parent's forwarding changes and the child needs to
+ * incorporate this change in its forwarding.
+ */
+ FIB_NODE_BW_REASON_EVALUATE,
+ /**
+ * A resolving interface has come up
+ */
+ FIB_NODE_BW_REASON_INTERFACE_UP,
+ /**
+ * A resolving interface has gone down
+ */
+ FIB_NODE_BW_REASON_INTERFACE_DOWN,
+ /**
+ * A resolving interface has been deleted.
+ */
+ FIB_NODE_BW_REASON_INTERFACE_DELETE,
+ /**
+ * Walk to re-collapse the multipath adjs when the rewrite of
+ * a unipath adjacency changes
+ */
+ FIB_NODE_BW_REASON_ADJ_UPDATE,
+ /**
+ * Walk to update children to inform them the adjacency is now down.
+ */
+ FIB_NODE_BW_REASON_ADJ_DOWN,
+ /**
+ * Marker. Add new before and update
+ */
+ FIB_NODE_BW_REASON_LAST = FIB_NODE_BW_REASON_ADJ_DOWN,
+} fib_node_back_walk_reason_t;
+
+#define FIB_NODE_BW_REASONS { \
+ [FIB_NODE_BW_REASON_RESOLVE] = "resolve", \
+ [FIB_NODE_BW_REASON_EVALUATE] = "evaluate", \
+ [FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up", \
+ [FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down", \
+ [FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete", \
+ [FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update", \
+ [FIB_NODE_BW_REASON_ADJ_DOWN] = "adj-down", \
+}
+
+#define FOR_EACH_FIB_NODE_BW_REASON(_item) \
+ for (_item = FIB_NODE_BW_REASON_FIRST; \
+ _item <= FIB_NODE_BW_REASON_LAST; \
+ _item++)
+
+/**
+ * Flags enum constructed from the reaons
+ */
+typedef enum fib_node_bw_reason_flag_t_ {
+ FIB_NODE_BW_REASON_FLAG_NONE = 0,
+ FIB_NODE_BW_REASON_FLAG_RESOLVE = (1 << FIB_NODE_BW_REASON_RESOLVE),
+ FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE),
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE),
+ FIB_NODE_BW_REASON_FLAG_ADJ_DOWN = (1 << FIB_NODE_BW_REASON_ADJ_DOWN),
+} __attribute__ ((packed)) fib_node_bw_reason_flag_t;
+
+STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 2,
+ "BW Reason enum < 2 byte. Consequences for cover_upd_res_t");
+
+/**
+ * Flags on the walk
+ */
+typedef enum fib_node_bw_flags_t_
+{
+ /**
+ * Force the walk to be synchronous
+ */
+ FIB_NODE_BW_FLAG_FORCE_SYNC = (1 << 0),
+} fib_node_bw_flags_t;
+
+/**
+ * Forward eclarations
+ */
+struct fib_node_t_;
+
+/**
+ * A representation of one pointer to another node.
+ * To fully qualify a node, one must know its type and its index so it
+ * can be retrieved from the appropriate pool. Direct pointers to nodes
+ * are forbidden, since all nodes are allocated from pools, which are vectors,
+ * and thus subject to realloc at any time.
+ */
+typedef struct fib_node_ptr_t_ {
+ /**
+ * node type
+ */
+ fib_node_type_t fnp_type;
+ /**
+ * node's index
+ */
+ fib_node_index_t fnp_index;
+} fib_node_ptr_t;
+
+/**
+ * @brief A list of FIB nodes.
+ */
+typedef u32 fib_node_list_t;
+
+/**
+ * Context passed between object during a back walk.
+ */
+typedef struct fib_node_back_walk_ctx_t_ {
+ /**
+ * The reason/trigger for the backwalk
+ */
+ fib_node_bw_reason_flag_t fnbw_reason;
+
+ /**
+ * additional flags for the walk
+ */
+ fib_node_bw_flags_t fnbw_flags;
+
+ /**
+ * the number of levels the walk has already traversed.
+ * this value is maintained by the walk infra, tp limit the depth of
+ * a walk so it does not run indefinately the presence of a loop/cycle
+ * in the graph.
+ */
+ u32 fnbw_depth;
+} fib_node_back_walk_ctx_t;
+
+/**
+ * We consider a depth of 32 to be sufficient to cover all sane
+ * network topologies. Anything more is then an indication that
+ * there is a loop/cycle in the FIB graph.
+ * Note that all object types contribute to 1 to the depth.
+ */
+#define FIB_NODE_GRAPH_MAX_DEPTH ((u32)32)
+
+/**
+ * A callback function for walking a node dependency list
+ */
+typedef int (*fib_node_ptr_walk_t)(fib_node_ptr_t *depend,
+ void *ctx);
+
+/**
+ * A list of dependent nodes.
+ * This is currently implemented as a hash_table of fib_node_ptr_t
+ */
+typedef fib_node_ptr_t fib_node_ptr_list_t;
+
+/**
+ * Return code from a back walk function
+ */
+typedef enum fib_node_back_walk_rc_t_ {
+ FIB_NODE_BACK_WALK_MERGE,
+ FIB_NODE_BACK_WALK_CONTINUE,
+} fib_node_back_walk_rc_t;
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+typedef fib_node_back_walk_rc_t (*fib_node_back_walk_t)(
+ struct fib_node_t_ *node,
+ fib_node_back_walk_ctx_t *ctx);
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+typedef struct fib_node_t_* (*fib_node_get_t)(fib_node_index_t index);
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+typedef void (*fib_node_last_lock_gone_t)(struct fib_node_t_ *node);
+
+/**
+ * Function definition to display the amount of memory used by a type.
+ * Implementations should call fib_show_memory_usage()
+ */
+typedef void (*fib_node_memory_show_t)(void);
+
+/**
+ * A FIB graph nodes virtual function table
+ */
+typedef struct fib_node_vft_t_ {
+ fib_node_get_t fnv_get;
+ fib_node_last_lock_gone_t fnv_last_lock;
+ fib_node_back_walk_t fnv_back_walk;
+ format_function_t *fnv_format;
+ fib_node_memory_show_t fnv_mem_show;
+} fib_node_vft_t;
+
+/**
+ * An node in the FIB graph
+ *
+ * Objects in the FIB form a graph.
+ */
+typedef struct fib_node_t_ {
+#if CLIB_DEBUG > 0
+ /**
+ * The node's type. make sure we are dynamic/down casting correctly
+ */
+ fib_node_type_t fn_type;
+#endif
+ /**
+ * The node's VFT.
+ * we could store the type here instead, and lookup the VFT using that. But
+ * I like this better,
+ */
+ const fib_node_vft_t *fn_vft;
+
+ /**
+ * Vector of nodes that depend upon/use/share this node
+ */
+ fib_node_list_t fn_children;
+
+ /**
+ * Number of dependents on this node. This number includes the number
+ * of children
+ */
+ u32 fn_locks;
+} fib_node_t;
+
+/**
+ * @brief
+ * Register the function table for a given type
+ *
+ * @param ft
+ * FIB node type
+ *
+ * @param vft
+ * virtual function table
+ */
+extern void fib_node_register_type (fib_node_type_t ft,
+ const fib_node_vft_t *vft);
+
+/**
+ * @brief
+ * Create a new FIB node type and Register the function table for it.
+ *
+ * @param vft
+ * virtual function table
+ *
+ * @return new FIB node type
+ */
+extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft);
+
+/**
+ * @brief Show the memory usage for a type
+ *
+ * This should be invoked by the type in response to the infra calling
+ * its registered memory show function
+ *
+ * @param name the name of the type
+ * @param in_use_elts The number of elements in use
+ * @param allocd_elts The number of allocated pool elemenets
+ * @param size_elt The size of one element
+ */
+extern void fib_show_memory_usage(const char *name,
+ u32 in_use_elts,
+ u32 allocd_elts,
+ size_t size_elt);
+
+extern void fib_node_init(fib_node_t *node,
+ fib_node_type_t ft);
+extern void fib_node_deinit(fib_node_t *node);
+
+extern void fib_node_lock(fib_node_t *node);
+extern void fib_node_unlock(fib_node_t *node);
+
+extern u32 fib_node_get_n_children(fib_node_type_t parent_type,
+ fib_node_index_t parent_index);
+extern u32 fib_node_child_add(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index);
+extern void fib_node_child_remove(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_index_t sibling_index);
+
+extern fib_node_back_walk_rc_t fib_node_back_walk_one(fib_node_ptr_t *ptr,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern u8* fib_node_children_format(fib_node_list_t list,
+ u8 *s);
+
+extern const char* fib_node_type_get_name(fib_node_type_t type);
+
+static inline int
+fib_node_index_is_valid (fib_node_index_t ni)
+{
+ return (FIB_NODE_INDEX_INVALID != ni);
+}
+
+#endif
+
diff --git a/src/vnet/fib/fib_node_list.c b/src/vnet/fib/fib_node_list.c
new file mode 100644
index 00000000..ceb951b4
--- /dev/null
+++ b/src/vnet/fib/fib_node_list.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief a hetrogeneous w.r.t. FIB node type, of FIB nodes.
+ * Since we cannot use C pointers, due to memeory reallocs, the next/prev
+ * are described as key:{type,index}.
+ */
+
+#include <vnet/fib/fib_node_list.h>
+
+/**
+ * @brief An element in the list
+ */
+typedef struct fib_node_list_elt_t_
+{
+ /**
+ * The index of the list this element is in
+ */
+ fib_node_list_t fnle_list;
+
+ /**
+ * The owner of this element
+ */
+ fib_node_ptr_t fnle_owner;
+
+ /**
+ * The next element in the list
+ */
+ u32 fnle_next;
+
+ /**
+ * The previous element in the list
+ */
+ u32 fnle_prev;
+} fib_node_list_elt_t;
+
+/**
+ * @brief A list of FIB nodes
+ */
+typedef struct fib_node_list_head_t_
+{
+ /**
+ * The head element
+ */
+ u32 fnlh_head;
+
+ /**
+ * Number of elements in the list
+ */
+ u32 fnlh_n_elts;
+} fib_node_list_head_t;
+
+/**
+ * Pools of list elements and heads
+ */
+static fib_node_list_elt_t *fib_node_list_elt_pool;
+static fib_node_list_head_t *fib_node_list_head_pool;
+
+static index_t
+fib_node_list_elt_get_index (fib_node_list_elt_t *elt)
+{
+ return (elt - fib_node_list_elt_pool);
+}
+
+static fib_node_list_elt_t *
+fib_node_list_elt_get (index_t fi)
+{
+ return (pool_elt_at_index(fib_node_list_elt_pool, fi));
+}
+
+static index_t
+fib_node_list_head_get_index (fib_node_list_head_t *head)
+{
+ return (head - fib_node_list_head_pool);
+}
+static fib_node_list_head_t *
+fib_node_list_head_get (fib_node_list_t fi)
+{
+ return (pool_elt_at_index(fib_node_list_head_pool, fi));
+}
+
+static fib_node_list_elt_t *
+fib_node_list_elt_create (fib_node_list_head_t *head,
+ int id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_list_elt_t *elt;
+
+ pool_get(fib_node_list_elt_pool, elt);
+
+ elt->fnle_list = fib_node_list_head_get_index(head);
+ elt->fnle_owner.fnp_type = type;
+ elt->fnle_owner.fnp_index = index;
+
+ elt->fnle_next = FIB_NODE_INDEX_INVALID;
+ elt->fnle_prev = FIB_NODE_INDEX_INVALID;
+
+ return (elt);
+}
+
+static void
+fib_node_list_head_init (fib_node_list_head_t *head)
+{
+ head->fnlh_n_elts = 0;
+ head->fnlh_head = FIB_NODE_INDEX_INVALID;
+}
+
+/**
+ * @brief Create a new node list.
+ */
+fib_node_list_t
+fib_node_list_create (void)
+{
+ fib_node_list_head_t *head;
+
+ pool_get(fib_node_list_head_pool, head);
+
+ fib_node_list_head_init(head);
+
+ return (fib_node_list_head_get_index(head));
+}
+
+void
+fib_node_list_destroy (fib_node_list_t *list)
+{
+ fib_node_list_head_t *head;
+
+ if (FIB_NODE_INDEX_INVALID == *list)
+ return;
+
+ head = fib_node_list_head_get(*list);
+ ASSERT(0 == head->fnlh_n_elts);
+
+ pool_put(fib_node_list_head_pool, head);
+ *list = FIB_NODE_INDEX_INVALID;
+}
+
+
+/**
+ * @brief Insert an element at the from of the list.
+ */
+u32
+fib_node_list_push_front (fib_node_list_t list,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_list_elt_t *elt, *next;
+ fib_node_list_head_t *head;
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_create(head, owner_id, type, index);
+
+ elt->fnle_prev = FIB_NODE_INDEX_INVALID;
+ elt->fnle_next = head->fnlh_head;
+
+ if (FIB_NODE_INDEX_INVALID != head->fnlh_head)
+ {
+ next = fib_node_list_elt_get(head->fnlh_head);
+ next->fnle_prev = fib_node_list_elt_get_index(elt);
+ }
+ head->fnlh_head = fib_node_list_elt_get_index(elt);
+
+ head->fnlh_n_elts++;
+
+ return (fib_node_list_elt_get_index(elt));
+}
+
+u32
+fib_node_list_push_back (fib_node_list_t list,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ ASSERT(0);
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+fib_node_list_extract (fib_node_list_head_t *head,
+ fib_node_list_elt_t *elt)
+{
+ fib_node_list_elt_t *next, *prev;
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ next = fib_node_list_elt_get(elt->fnle_next);
+ next->fnle_prev = elt->fnle_prev;
+ }
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_prev)
+ {
+ prev = fib_node_list_elt_get(elt->fnle_prev);
+ prev->fnle_next = elt->fnle_next;
+ }
+ else
+ {
+ ASSERT (fib_node_list_elt_get_index(elt) == head->fnlh_head);
+ head->fnlh_head = elt->fnle_next;
+ }
+}
+
+static void
+fib_node_list_insert_after (fib_node_list_head_t *head,
+ fib_node_list_elt_t *prev,
+ fib_node_list_elt_t *elt)
+{
+ fib_node_list_elt_t *next;
+
+ elt->fnle_next = prev->fnle_next;
+ if (FIB_NODE_INDEX_INVALID != prev->fnle_next)
+ {
+ next = fib_node_list_elt_get(prev->fnle_next);
+ next->fnle_prev = fib_node_list_elt_get_index(elt);
+ }
+ prev->fnle_next = fib_node_list_elt_get_index(elt);
+ elt->fnle_prev = fib_node_list_elt_get_index(prev);
+}
+
+void
+fib_node_list_remove (fib_node_list_t list,
+ u32 sibling)
+{
+ fib_node_list_head_t *head;
+ fib_node_list_elt_t *elt;
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_get(sibling);
+
+ fib_node_list_extract(head, elt);
+
+ head->fnlh_n_elts--;
+ pool_put(fib_node_list_elt_pool, elt);
+}
+
+void
+fib_node_list_elt_remove (u32 sibling)
+{
+ fib_node_list_elt_t *elt;
+
+ elt = fib_node_list_elt_get(sibling);
+
+ fib_node_list_remove(elt->fnle_list, sibling);
+}
+
+/**
+ * @brief Advance the sibling one step (toward the tail) in the list.
+ * return 0 if at the end of the list, 1 otherwise.
+ */
+int
+fib_node_list_advance (u32 sibling)
+{
+ fib_node_list_elt_t *elt, *next;
+ fib_node_list_head_t *head;
+
+ elt = fib_node_list_elt_get(sibling);
+ head = fib_node_list_head_get(elt->fnle_list);
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ /*
+ * not at the end of the list
+ */
+ next = fib_node_list_elt_get(elt->fnle_next);
+
+ fib_node_list_extract(head, elt);
+ fib_node_list_insert_after(head, next, elt);
+
+ return (1);
+ }
+ else
+ {
+ return (0);
+ }
+}
+
+int
+fib_node_list_elt_get_next (u32 sibling,
+ fib_node_ptr_t *ptr)
+{
+ fib_node_list_elt_t *elt, *next;
+
+ elt = fib_node_list_elt_get(sibling);
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ next = fib_node_list_elt_get(elt->fnle_next);
+
+ *ptr = next->fnle_owner;
+ return (1);
+ }
+ else
+ {
+ ptr->fnp_index = FIB_NODE_INDEX_INVALID;
+ return (0);
+ }
+}
+
+u32
+fib_node_list_get_size (fib_node_list_t list)
+{
+ fib_node_list_head_t *head;
+
+ if (FIB_NODE_INDEX_INVALID == list)
+ {
+ return (0);
+ }
+
+ head = fib_node_list_head_get(list);
+
+ return (head->fnlh_n_elts);
+}
+
+int
+fib_node_list_get_front (fib_node_list_t list,
+ fib_node_ptr_t *ptr)
+{
+ fib_node_list_head_t *head;
+ fib_node_list_elt_t *elt;
+
+
+ if (0 == fib_node_list_get_size(list))
+ {
+ ptr->fnp_index = FIB_NODE_INDEX_INVALID;
+ return (0);
+ }
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_get(head->fnlh_head);
+
+ *ptr = elt->fnle_owner;
+
+ return (1);
+}
+
+/**
+ * @brief Walk the list of node. This must be safe w.r.t. the removal
+ * of nodes during the walk.
+ */
+void
+fib_node_list_walk (fib_node_list_t list,
+ fib_node_list_walk_cb_t fn,
+ void *args)
+{
+ fib_node_list_elt_t *elt;
+ fib_node_list_head_t *head;
+ u32 sibling;
+
+ if (FIB_NODE_INDEX_INVALID == list)
+ {
+ return;
+ }
+
+ head = fib_node_list_head_get(list);
+ sibling = head->fnlh_head;
+
+ while (FIB_NODE_INDEX_INVALID != sibling)
+ {
+ elt = fib_node_list_elt_get(sibling);
+ sibling = elt->fnle_next;
+
+ fn(&elt->fnle_owner, args);
+ }
+}
+
+void
+fib_node_list_memory_show (void)
+{
+ fib_show_memory_usage("Node-list elements",
+ pool_elts(fib_node_list_elt_pool),
+ pool_len(fib_node_list_elt_pool),
+ sizeof(fib_node_list_elt_t));
+ fib_show_memory_usage("Node-list heads",
+ pool_elts(fib_node_list_head_pool),
+ pool_len(fib_node_list_head_pool),
+ sizeof(fib_node_list_head_t));
+}
diff --git a/src/vnet/fib/fib_node_list.h b/src/vnet/fib/fib_node_list.h
new file mode 100644
index 00000000..9567b966
--- /dev/null
+++ b/src/vnet/fib/fib_node_list.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief a hetrogeneous w.r.t. FIB node type, list of FIB nodes.
+ * Since we cannot use C pointers, due to memeory reallocs, the next/prev
+ * are described as an index to an element. Each element contains a pointer
+ * (key:{type, index}) to a FIB node.
+ */
+
+#ifndef __FIB_NODE_LIST_H__
+#define __FIB_NODE_LIST_H__
+
+#include <vnet/fib/fib_node.h>
+
+extern fib_node_list_t fib_node_list_create(void);
+extern void fib_node_list_destroy(fib_node_list_t *list);
+
+extern u32 fib_node_list_push_front(fib_node_list_t head,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index);
+extern u32 fib_node_list_push_back(fib_node_list_t head,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index);
+extern void fib_node_list_remove(fib_node_list_t head,
+ u32 sibling);
+extern void fib_node_list_elt_remove(u32 sibling);
+
+extern int fib_node_list_advance(u32 sibling);
+
+extern int fib_node_list_get_front(fib_node_list_t head,
+ fib_node_ptr_t *ptr);
+
+extern int fib_node_list_elt_get_next(u32 elt,
+ fib_node_ptr_t *ptr);
+
+extern u32 fib_node_list_get_size(fib_node_list_t head);
+
+/**
+ * @brief Callback function invoked during a list walk
+ */
+typedef int (*fib_node_list_walk_cb_t)(fib_node_ptr_t *owner,
+ void *args);
+
+extern void fib_node_list_walk(fib_node_list_t head,
+ fib_node_list_walk_cb_t fn,
+ void *args);
+
+extern void fib_node_list_memory_show(void);
+
+#endif
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
new file mode 100644
index 00000000..f1263334
--- /dev/null
+++ b/src/vnet/fib/fib_path.c
@@ -0,0 +1,2242 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/dpo/mpls_disposition.h>
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/fib/mpls_fib.h>
+
+/**
+ * Enurmeration of path types
+ */
+typedef enum fib_path_type_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_TYPE_FIRST = 0,
+ /**
+ * Attached-nexthop. An interface and a nexthop are known.
+ */
+ FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
+ /**
+ * attached. Only the interface is known.
+ */
+ FIB_PATH_TYPE_ATTACHED,
+ /**
+ * recursive. Only the next-hop is known.
+ */
+ FIB_PATH_TYPE_RECURSIVE,
+ /**
+ * special. nothing is known. so we drop.
+ */
+ FIB_PATH_TYPE_SPECIAL,
+ /**
+ * exclusive. user provided adj.
+ */
+ FIB_PATH_TYPE_EXCLUSIVE,
+ /**
+ * deag. Link to a lookup adj in the next table
+ */
+ FIB_PATH_TYPE_DEAG,
+ /**
+ * interface receive.
+ */
+ FIB_PATH_TYPE_INTF_RX,
+ /**
+ * receive. it's for-us.
+ */
+ FIB_PATH_TYPE_RECEIVE,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
+} __attribute__ ((packed)) fib_path_type_t;
+
+/**
+ * The maximum number of path_types
+ */
+#define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
+
+#define FIB_PATH_TYPES { \
+ [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop", \
+ [FIB_PATH_TYPE_ATTACHED] = "attached", \
+ [FIB_PATH_TYPE_RECURSIVE] = "recursive", \
+ [FIB_PATH_TYPE_SPECIAL] = "special", \
+ [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_TYPE_DEAG] = "deag", \
+ [FIB_PATH_TYPE_INTF_RX] = "intf-rx", \
+ [FIB_PATH_TYPE_RECEIVE] = "receive", \
+}
+
+#define FOR_EACH_FIB_PATH_TYPE(_item) \
+ for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
+
+/**
+ * Enurmeration of path operational (i.e. derived) attributes
+ */
+typedef enum fib_path_oper_attribute_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
+ /**
+ * The path forms part of a recursive loop.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
+ /**
+ * The path is resolved
+ */
+ FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
+ /**
+ * The path is attached, despite what the next-hop may say.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_ATTACHED,
+ /**
+ * The path has become a permanent drop.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_DROP,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
+} __attribute__ ((packed)) fib_path_oper_attribute_t;
+
+/**
+ * The maximum number of path operational attributes
+ */
+#define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
+
+#define FIB_PATH_OPER_ATTRIBUTES { \
+ [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop", \
+ [FIB_PATH_OPER_ATTRIBUTE_RESOLVED] = "resolved", \
+ [FIB_PATH_OPER_ATTRIBUTE_DROP] = "drop", \
+}
+
+#define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
+ _item++)
+
+/**
+ * Path flags from the attributes
+ */
+typedef enum fib_path_oper_flags_t_ {
+ FIB_PATH_OPER_FLAG_NONE = 0,
+ FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
+ FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
+ FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
+ FIB_PATH_OPER_FLAG_ATTACHED = (1 << FIB_PATH_OPER_ATTRIBUTE_ATTACHED),
+} __attribute__ ((packed)) fib_path_oper_flags_t;
+
+/**
+ * A FIB path
+ */
+typedef struct fib_path_t_ {
+ /**
+ * A path is a node in the FIB graph.
+ */
+ fib_node_t fp_node;
+
+ /**
+ * The index of the path-list to which this path belongs
+ */
+ u32 fp_pl_index;
+
+ /**
+ * This marks the start of the memory area used to hash
+ * the path
+ */
+ STRUCT_MARK(path_hash_start);
+
+ /**
+ * Configuration Flags
+ */
+ fib_path_cfg_flags_t fp_cfg_flags;
+
+ /**
+ * The type of the path. This is the selector for the union
+ */
+ fib_path_type_t fp_type;
+
+ /**
+ * The protocol of the next-hop, i.e. the address family of the
+ * next-hop's address. We can't derive this from the address itself
+ * since the address can be all zeros
+ */
+ dpo_proto_t fp_nh_proto;
+
+ /**
+ * UCMP [unnormalised] weigth
+ */
+ u8 fp_weight;
+
+ /**
+ * A path preference. 0 is the best.
+ * Only paths of the best preference, that are 'up', are considered
+ * for forwarding.
+ */
+ u8 fp_preference;
+
+ /**
+ * per-type union of the data required to resolve the path
+ */
+ union {
+ struct {
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_nh;
+ /**
+ * The interface
+ */
+ u32 fp_interface;
+ } attached_next_hop;
+ struct {
+ /**
+ * The interface
+ */
+ u32 fp_interface;
+ } attached;
+ struct {
+ union
+ {
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_ip;
+ struct {
+ /**
+ * The local label to resolve through.
+ */
+ mpls_label_t fp_local_label;
+ /**
+ * The EOS bit of the resolving label
+ */
+ mpls_eos_bit_t fp_eos;
+ };
+ } fp_nh;
+ /**
+ * The FIB table index in which to find the next-hop.
+ */
+ fib_node_index_t fp_tbl_id;
+ } recursive;
+ struct {
+ /**
+ * The FIB index in which to perfom the next lookup
+ */
+ fib_node_index_t fp_tbl_id;
+ /**
+ * The RPF-ID to tag the packets with
+ */
+ fib_rpf_id_t fp_rpf_id;
+ } deag;
+ struct {
+ } special;
+ struct {
+ /**
+ * The user provided 'exclusive' DPO
+ */
+ dpo_id_t fp_ex_dpo;
+ } exclusive;
+ struct {
+ /**
+ * The interface on which the local address is configured
+ */
+ u32 fp_interface;
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_addr;
+ } receive;
+ struct {
+ /**
+ * The interface on which the packets will be input.
+ */
+ u32 fp_interface;
+ } intf_rx;
+ };
+ STRUCT_MARK(path_hash_end);
+
+ /**
+ * Memebers in this last section represent information that is
+ * dervied during resolution. It should not be copied to new paths
+ * nor compared.
+ */
+
+ /**
+ * Operational Flags
+ */
+ fib_path_oper_flags_t fp_oper_flags;
+
+ /**
+ * the resolving via fib. not part of the union, since it it not part
+ * of the path's hash.
+ */
+ fib_node_index_t fp_via_fib;
+
+ /**
+ * The Data-path objects through which this path resolves for IP.
+ */
+ dpo_id_t fp_dpo;
+
+ /**
+ * the index of this path in the parent's child list.
+ */
+ u32 fp_sibling;
+} fib_path_t;
+
+/*
+ * Array of strings/names for the path types and attributes
+ */
+static const char *fib_path_type_names[] = FIB_PATH_TYPES;
+static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
+static const char *fib_path_cfg_attribute_names[] = FIB_PATH_CFG_ATTRIBUTES;
+
+/*
+ * The memory pool from which we allocate all the paths
+ */
+static fib_path_t *fib_path_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_PATH_DBG(_p, _fmt, _args...) \
+{ \
+ u8 *_tmp = NULL; \
+ _tmp = fib_path_format(fib_path_get_index(_p), _tmp); \
+ clib_warning("path:[%d:%s]:" _fmt, \
+ fib_path_get_index(_p), _tmp, \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define FIB_PATH_DBG(_p, _fmt, _args...)
+#endif
+
+static fib_path_t *
+fib_path_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_path_pool, index));
+}
+
+static fib_node_index_t
+fib_path_get_index (fib_path_t *path)
+{
+ return (path - fib_path_pool);
+}
+
+static fib_node_t *
+fib_path_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_path_get(index));
+}
+
+static fib_path_t*
+fib_path_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
+#endif
+ return ((fib_path_t*)node);
+}
+
+u8 *
+format_fib_path (u8 * s, va_list * args)
+{
+ fib_path_t *path = va_arg (*args, fib_path_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ fib_path_oper_attribute_t oattr;
+ fib_path_cfg_attribute_t cattr;
+
+ s = format (s, " index:%d ", fib_path_get_index(path));
+ s = format (s, "pl-index:%d ", path->fp_pl_index);
+ s = format (s, "%U ", format_dpo_proto, path->fp_nh_proto);
+ s = format (s, "weight=%d ", path->fp_weight);
+ s = format (s, "pref=%d ", path->fp_preference);
+ s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
+ if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
+ s = format(s, " oper-flags:");
+ FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
+ if ((1<<oattr) & path->fp_oper_flags) {
+ s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
+ }
+ }
+ }
+ if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
+ s = format(s, " cfg-flags:");
+ FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
+ if ((1<<cattr) & path->fp_cfg_flags) {
+ s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
+ }
+ }
+ }
+ s = format(s, "\n ");
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ s = format (s, "%U", format_ip46_address,
+ &path->attached_next_hop.fp_nh,
+ IP46_TYPE_ANY);
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
+ {
+ s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
+ }
+ else
+ {
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(
+ vnm,
+ path->attached_next_hop.fp_interface));
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached_next_hop.fp_interface))
+ {
+ s = format (s, " (p2p)");
+ }
+ }
+ if (!dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, "\n unresolved");
+ }
+ else
+ {
+ s = format(s, "\n %U",
+ format_dpo_id,
+ &path->fp_dpo, 13);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
+ {
+ s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
+ }
+ else
+ {
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(
+ vnm,
+ path->attached.fp_interface));
+ }
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (DPO_PROTO_MPLS == path->fp_nh_proto)
+ {
+ s = format (s, "via %U %U",
+ format_mpls_unicast_label,
+ path->recursive.fp_nh.fp_local_label,
+ format_mpls_eos_bit,
+ path->recursive.fp_nh.fp_eos);
+ }
+ else
+ {
+ s = format (s, "via %U",
+ format_ip46_address,
+ &path->recursive.fp_nh.fp_ip,
+ IP46_TYPE_ANY);
+ }
+ s = format (s, " in fib:%d",
+ path->recursive.fp_tbl_id,
+ path->fp_via_fib);
+ s = format (s, " via-fib:%d", path->fp_via_fib);
+ s = format (s, " via-dpo:[%U:%d]",
+ format_dpo_type, path->fp_dpo.dpoi_type,
+ path->fp_dpo.dpoi_index);
+
+ break;
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_INTF_RX:
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ if (dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, "%U", format_dpo_id,
+ &path->fp_dpo, 2);
+ }
+ break;
+ }
+ return (s);
+}
+
+u8 *
+fib_path_format (fib_node_index_t pi, u8 *s)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(pi);
+ ASSERT(NULL != path);
+
+ return (format (s, "%U", format_fib_path, path));
+}
+
+u8 *
+fib_path_adj_format (fib_node_index_t pi,
+ u32 indent,
+ u8 *s)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(pi);
+ ASSERT(NULL != path);
+
+ if (!dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, " unresolved");
+ }
+ else
+ {
+ s = format(s, "%U", format_dpo_id,
+ &path->fp_dpo, 2);
+ }
+
+ return (s);
+}
+
+/*
+ * fib_path_last_lock_gone
+ *
+ * We don't share paths, we share path lists, so the [un]lock functions
+ * are no-ops
+ */
+static void
+fib_path_last_lock_gone (fib_node_t *node)
+{
+ ASSERT(0);
+}
+
+static const adj_index_t
+fib_path_attached_next_hop_get_adj (fib_path_t *path,
+ vnet_link_t link)
+{
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached_next_hop.fp_interface))
+ {
+ /*
+ * if the interface is p2p then the adj for the specific
+ * neighbour on that link will never exist. on p2p links
+ * the subnet address (the attached route) links to the
+ * auto-adj (see below), we want that adj here too.
+ */
+ return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
+ link,
+ &zero_addr,
+ path->attached_next_hop.fp_interface));
+ }
+ else
+ {
+ return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
+ link,
+ &path->attached_next_hop.fp_nh,
+ path->attached_next_hop.fp_interface));
+ }
+}
+
+static void
+fib_path_attached_next_hop_set (fib_path_t *path)
+{
+ /*
+ * resolve directly via the adjacnecy discribed by the
+ * interface and next-hop
+ */
+ dpo_set(&path->fp_dpo,
+ DPO_ADJACENCY,
+ path->fp_nh_proto,
+ fib_path_attached_next_hop_get_adj(
+ path,
+ dpo_proto_to_link(path->fp_nh_proto)));
+
+ /*
+ * become a child of the adjacency so we receive updates
+ * when its rewrite changes
+ */
+ path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+
+ if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+ path->attached_next_hop.fp_interface) ||
+ !adj_is_up(path->fp_dpo.dpoi_index))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+}
+
+static const adj_index_t
+fib_path_attached_get_adj (fib_path_t *path,
+ vnet_link_t link)
+{
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached.fp_interface))
+ {
+ /*
+ * point-2-point interfaces do not require a glean, since
+ * there is nothing to ARP. Install a rewrite/nbr adj instead
+ */
+ return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
+ link,
+ &zero_addr,
+ path->attached.fp_interface));
+ }
+ else
+ {
+ return (adj_glean_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
+ path->attached.fp_interface,
+ NULL));
+ }
+}
+
+/*
+ * create of update the paths recursive adj
+ */
+static void
+fib_path_recursive_adj_update (fib_path_t *path,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ dpo_id_t via_dpo = DPO_INVALID;
+
+ /*
+ * get the DPO to resolve through from the via-entry
+ */
+ fib_entry_contribute_forwarding(path->fp_via_fib,
+ fct,
+ &via_dpo);
+
+
+ /*
+ * hope for the best - clear if restrictions apply.
+ */
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+
+ /*
+ * Validate any recursion constraints and over-ride the via
+ * adj if not met
+ */
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
+ {
+ /*
+ * the via FIB must be a host route.
+ * note the via FIB just added will always be a host route
+ * since it is an RR source added host route. So what we need to
+ * check is whether the route has other sources. If it does then
+ * some other source has added it as a host route. If it doesn't
+ * then it was added only here and inherits forwarding from a cover.
+ * the cover is not a host route.
+ * The RR source is the lowest priority source, so we check if it
+ * is the best. if it is there are no other sources.
+ */
+ if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
+
+ /*
+ * PIC edge trigger. let the load-balance maps know
+ */
+ load_balance_map_path_state_change(fib_path_get_index(path));
+ }
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
+ {
+ /*
+ * RR source entries inherit the flags from the cover, so
+ * we can check the via directly
+ */
+ if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
+
+ /*
+ * PIC edge trigger. let the load-balance maps know
+ */
+ load_balance_map_path_state_change(fib_path_get_index(path));
+ }
+ }
+ /*
+ * check for over-riding factors on the FIB entry itself
+ */
+ if (!fib_entry_is_resolved(path->fp_via_fib))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
+
+ /*
+ * PIC edge trigger. let the load-balance maps know
+ */
+ load_balance_map_path_state_change(fib_path_get_index(path));
+ }
+
+ /*
+ * If this path is contributing a drop, then it's not resolved
+ */
+ if (dpo_is_drop(&via_dpo) || load_balance_is_drop(&via_dpo))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+
+ /*
+ * update the path's contributed DPO
+ */
+ dpo_copy(dpo, &via_dpo);
+
+ FIB_PATH_DBG(path, "recursive update:");
+
+ dpo_reset(&via_dpo);
+}
+
+/*
+ * fib_path_is_permanent_drop
+ *
+ * Return !0 if the path is configured to permanently drop,
+ * despite other attributes.
+ */
+static int
+fib_path_is_permanent_drop (fib_path_t *path)
+{
+ return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
+ (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
+}
+
+/*
+ * fib_path_unresolve
+ *
+ * Remove our dependency on the resolution target
+ */
+static void
+fib_path_unresolve (fib_path_t *path)
+{
+ /*
+ * the forced drop path does not need unresolving
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ return;
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
+ {
+ fib_prefix_t pfx;
+
+ fib_entry_get_prefix(path->fp_via_fib, &pfx);
+ fib_entry_child_remove(path->fp_via_fib,
+ path->fp_sibling);
+ fib_table_entry_special_remove(path->recursive.fp_tbl_id,
+ &pfx,
+ FIB_SOURCE_RR);
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ case FIB_PATH_TYPE_ATTACHED:
+ adj_child_remove(path->fp_dpo.dpoi_index,
+ path->fp_sibling);
+ adj_unlock(path->fp_dpo.dpoi_index);
+ break;
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ dpo_reset(&path->exclusive.fp_ex_dpo);
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_INTF_RX:
+ case FIB_PATH_TYPE_DEAG:
+ /*
+ * these hold only the path's DPO, which is reset below.
+ */
+ break;
+ }
+
+ /*
+ * release the adj we were holding and pick up the
+ * drop just in case.
+ */
+ dpo_reset(&path->fp_dpo);
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+
+ return;
+}
+
+static fib_forward_chain_type_t
+fib_path_to_chain_type (const fib_path_t *path)
+{
+ if (DPO_PROTO_MPLS == path->fp_nh_proto)
+ {
+ if (FIB_PATH_TYPE_RECURSIVE == path->fp_type &&
+ MPLS_EOS == path->recursive.fp_nh.fp_eos)
+ {
+ return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
+ }
+ else
+ {
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ }
+ }
+ else
+ {
+ return (fib_forw_chain_type_from_dpo_proto(path->fp_nh_proto));
+ }
+}
+
+/*
+ * fib_path_back_walk_notify
+ *
+ * A back walk has reach this path.
+ */
+static fib_node_back_walk_rc_t
+fib_path_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_path_t *path;
+
+ path = fib_path_from_fib_node(node);
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
+ {
+ /*
+ * modify the recursive adjacency to use the new forwarding
+ * of the via-fib.
+ * this update is visible to packets in flight in the DP.
+ */
+ fib_path_recursive_adj_update(
+ path,
+ fib_path_to_chain_type(path),
+ &path->fp_dpo);
+ }
+ if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
+ (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason))
+ {
+ /*
+ * ADJ updates (complete<->incomplete) do not need to propagate to
+ * recursive entries.
+ * The only reason its needed as far back as here, is that the adj
+ * and the incomplete adj are a different DPO type, so the LBs need
+ * to re-stack.
+ * If this walk was quashed in the fib_entry, then any non-fib_path
+ * children (like tunnels that collapse out the LB when they stack)
+ * would not see the update.
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ /*
+FIXME comment
+ * ADJ_UPDATE backwalk pass silently through here and up to
+ * the path-list when the multipath adj collapse occurs.
+ * The reason we do this is that the assumtption is that VPP
+ * runs in an environment where the Control-Plane is remote
+ * and hence reacts slowly to link up down. In order to remove
+ * this down link from the ECMP set quickly, we back-walk.
+ * VPP also has dedicated CPUs, so we are not stealing resources
+ * from the CP to do so.
+ */
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
+ {
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
+ {
+ /*
+ * alreday resolved. no need to walk back again
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
+ {
+ if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
+ {
+ /*
+ * alreday unresolved. no need to walk back again
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ /*
+ * The interface this path resolves through has been deleted.
+ * This will leave the path in a permanent drop state. The route
+ * needs to be removed and readded (and hence the path-list deleted)
+ * before it can forward again.
+ */
+ fib_path_unresolve(path);
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
+ {
+ /*
+ * restack the DPO to pick up the correct DPO sub-type
+ */
+ uword if_is_up;
+ adj_index_t ai;
+
+ if_is_up = vnet_sw_interface_is_admin_up(
+ vnet_get_main(),
+ path->attached_next_hop.fp_interface);
+
+ ai = fib_path_attached_next_hop_get_adj(
+ path,
+ dpo_proto_to_link(path->fp_nh_proto));
+
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ if (if_is_up && adj_is_up(ai))
+ {
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+
+ dpo_set(&path->fp_dpo, DPO_ADJACENCY, path->fp_nh_proto, ai);
+ adj_unlock(ai);
+
+ if (!if_is_up)
+ {
+ /*
+ * If the interface is not up there is no reason to walk
+ * back to children. if we did they would only evalute
+ * that this path is unresolved and hence it would
+ * not contribute the adjacency - so it would be wasted
+ * CPU time.
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ }
+ if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
+ {
+ if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
+ {
+ /*
+ * alreday unresolved. no need to walk back again
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ /*
+ * the adj has gone down. the path is no longer resolved.
+ */
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ /*
+ * FIXME; this could schedule a lower priority walk, since attached
+ * routes are not usually in ECMP configurations so the backwalk to
+ * the FIB entry does not need to be high priority
+ */
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_path_unresolve(path);
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
+ }
+ break;
+ case FIB_PATH_TYPE_INTF_RX:
+ ASSERT(0);
+ case FIB_PATH_TYPE_DEAG:
+ /*
+ * FIXME When VRF delete is allowed this will need a poke.
+ */
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * these path types have no parents. so to be
+ * walked from one is unexpected.
+ */
+ ASSERT(0);
+ break;
+ }
+
+ /*
+ * propagate the backwalk further to the path-list
+ */
+ fib_path_list_back_walk(path->fp_pl_index, ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+fib_path_memory_show (void)
+{
+ fib_show_memory_usage("Path",
+ pool_elts(fib_path_pool),
+ pool_len(fib_path_pool),
+ sizeof(fib_path_t));
+}
+
+/*
+ * The FIB path's graph node virtual function table
+ */
+static const fib_node_vft_t fib_path_vft = {
+ .fnv_get = fib_path_get_node,
+ .fnv_last_lock = fib_path_last_lock_gone,
+ .fnv_back_walk = fib_path_back_walk_notify,
+ .fnv_mem_show = fib_path_memory_show,
+};
+
+static fib_path_cfg_flags_t
+fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
+{
+ fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
+
+ if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
+ cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
+ cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL)
+ cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED)
+ cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_INTF_RX)
+ cfg_flags |= FIB_PATH_CFG_FLAG_INTF_RX;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_RPF_ID)
+ cfg_flags |= FIB_PATH_CFG_FLAG_RPF_ID;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_EXCLUSIVE)
+ cfg_flags |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_DROP)
+ cfg_flags |= FIB_PATH_CFG_FLAG_DROP;
+
+ return (cfg_flags);
+}
+
+/*
+ * fib_path_create
+ *
+ * Create and initialise a new path object.
+ * return the index of the path.
+ */
+fib_node_index_t
+fib_path_create (fib_node_index_t pl_index,
+ const fib_route_path_t *rpath)
+{
+ fib_path_t *path;
+
+ pool_get(fib_path_pool, path);
+ memset(path, 0, sizeof(*path));
+
+ fib_node_init(&path->fp_node,
+ FIB_NODE_TYPE_PATH);
+
+ dpo_reset(&path->fp_dpo);
+ path->fp_pl_index = pl_index;
+ path->fp_nh_proto = rpath->frp_proto;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ path->fp_weight = rpath->frp_weight;
+ if (0 == path->fp_weight)
+ {
+ /*
+ * a weight of 0 is a meaningless value. We could either reject it, and thus force
+ * clients to always use 1, or we can accept it and fixup approrpiately.
+ */
+ path->fp_weight = 1;
+ }
+ path->fp_preference = rpath->frp_preference;
+ path->fp_cfg_flags = fib_path_route_flags_to_cfg_flags(rpath);
+
+ /*
+ * deduce the path's tpye from the parementers and save what is needed.
+ */
+ if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL)
+ {
+ path->fp_type = FIB_PATH_TYPE_RECEIVE;
+ path->receive.fp_interface = rpath->frp_sw_if_index;
+ path->receive.fp_addr = rpath->frp_addr;
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_INTF_RX)
+ {
+ path->fp_type = FIB_PATH_TYPE_INTF_RX;
+ path->intf_rx.fp_interface = rpath->frp_sw_if_index;
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID)
+ {
+ path->fp_type = FIB_PATH_TYPE_DEAG;
+ path->deag.fp_tbl_id = rpath->frp_fib_index;
+ path->deag.fp_rpf_id = rpath->frp_rpf_id;
+ }
+ else if (~0 != rpath->frp_sw_if_index)
+ {
+ if (ip46_address_is_zero(&rpath->frp_addr))
+ {
+ path->fp_type = FIB_PATH_TYPE_ATTACHED;
+ path->attached.fp_interface = rpath->frp_sw_if_index;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
+ path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
+ path->attached_next_hop.fp_nh = rpath->frp_addr;
+ }
+ }
+ else
+ {
+ if (ip46_address_is_zero(&rpath->frp_addr))
+ {
+ if (~0 == rpath->frp_fib_index)
+ {
+ path->fp_type = FIB_PATH_TYPE_SPECIAL;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_DEAG;
+ path->deag.fp_tbl_id = rpath->frp_fib_index;
+ }
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_RECURSIVE;
+ if (DPO_PROTO_MPLS == path->fp_nh_proto)
+ {
+ path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
+ path->recursive.fp_nh.fp_eos = rpath->frp_eos;
+ }
+ else
+ {
+ path->recursive.fp_nh.fp_ip = rpath->frp_addr;
+ }
+ path->recursive.fp_tbl_id = rpath->frp_fib_index;
+ }
+ }
+
+ FIB_PATH_DBG(path, "create");
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_create_special
+ *
+ * Create and initialise a new path object.
+ * return the index of the path.
+ */
+fib_node_index_t
+fib_path_create_special (fib_node_index_t pl_index,
+ dpo_proto_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_path_t *path;
+
+ pool_get(fib_path_pool, path);
+ memset(path, 0, sizeof(*path));
+
+ fib_node_init(&path->fp_node,
+ FIB_NODE_TYPE_PATH);
+ dpo_reset(&path->fp_dpo);
+
+ path->fp_pl_index = pl_index;
+ path->fp_weight = 1;
+ path->fp_preference = 0;
+ path->fp_nh_proto = nh_proto;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ path->fp_cfg_flags = flags;
+
+ if (FIB_PATH_CFG_FLAG_DROP & flags)
+ {
+ path->fp_type = FIB_PATH_TYPE_SPECIAL;
+ }
+ else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
+ {
+ path->fp_type = FIB_PATH_TYPE_RECEIVE;
+ path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
+ ASSERT(NULL != dpo);
+ dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
+ }
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_copy
+ *
+ * Copy a path. return index of new path.
+ */
+fib_node_index_t
+fib_path_copy (fib_node_index_t path_index,
+ fib_node_index_t path_list_index)
+{
+ fib_path_t *path, *orig_path;
+
+ pool_get(fib_path_pool, path);
+
+ orig_path = fib_path_get(path_index);
+ ASSERT(NULL != orig_path);
+
+ memcpy(path, orig_path, sizeof(*path));
+
+ FIB_PATH_DBG(path, "create-copy:%d", path_index);
+
+ /*
+ * reset the dynamic section
+ */
+ fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
+ path->fp_oper_flags = FIB_PATH_OPER_FLAG_NONE;
+ path->fp_pl_index = path_list_index;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
+ dpo_reset(&path->fp_dpo);
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_destroy
+ *
+ * destroy a path that is no longer required
+ */
+void
+fib_path_destroy (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(NULL != path);
+ FIB_PATH_DBG(path, "destroy");
+
+ fib_path_unresolve(path);
+
+ fib_node_deinit(&path->fp_node);
+ pool_put(fib_path_pool, path);
+}
+
+/*
+ * fib_path_destroy
+ *
+ * destroy a path that is no longer required
+ */
+uword
+fib_path_hash (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
+ (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
+ STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
+ 0));
+}
+
+/*
+ * fib_path_cmp_i
+ *
+ * Compare two paths for equivalence.
+ */
+static int
+fib_path_cmp_i (const fib_path_t *path1,
+ const fib_path_t *path2)
+{
+ int res;
+
+ res = 1;
+
+ /*
+ * paths of different types and protocol are not equal.
+ * different weights and/or preference only are the same path.
+ */
+ if (path1->fp_type != path2->fp_type)
+ {
+ res = (path1->fp_type - path2->fp_type);
+ }
+ else if (path1->fp_nh_proto != path2->fp_nh_proto)
+ {
+ res = (path1->fp_nh_proto - path2->fp_nh_proto);
+ }
+ else
+ {
+ /*
+ * both paths are of the same type.
+ * consider each type and its attributes in turn.
+ */
+ switch (path1->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
+ &path2->attached_next_hop.fp_nh);
+ if (0 == res) {
+ res = (path1->attached_next_hop.fp_interface -
+ path2->attached_next_hop.fp_interface);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ res = (path1->attached.fp_interface -
+ path2->attached.fp_interface);
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ res = ip46_address_cmp(&path1->recursive.fp_nh,
+ &path2->recursive.fp_nh);
+
+ if (0 == res)
+ {
+ res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
+ if (0 == res)
+ {
+ res = (path1->deag.fp_rpf_id - path2->deag.fp_rpf_id);
+ }
+ break;
+ case FIB_PATH_TYPE_INTF_RX:
+ res = (path1->intf_rx.fp_interface - path2->intf_rx.fp_interface);
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ res = 0;
+ break;
+ }
+ }
+ return (res);
+}
+
+/*
+ * fib_path_cmp_for_sort
+ *
+ * Compare two paths for equivalence. Used during path sorting.
+ * As usual 0 means equal.
+ */
+int
+fib_path_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_node_index_t *pi1 = v1, *pi2 = v2;
+ fib_path_t *path1, *path2;
+
+ path1 = fib_path_get(*pi1);
+ path2 = fib_path_get(*pi2);
+
+ /*
+ * when sorting paths we want the highest preference paths
+ * first, so that the choices set built is in prefernce order
+ */
+ if (path1->fp_preference != path2->fp_preference)
+ {
+ return (path1->fp_preference - path2->fp_preference);
+ }
+
+ return (fib_path_cmp_i(path1, path2));
+}
+
+/*
+ * fib_path_cmp
+ *
+ * Compare two paths for equivalence.
+ */
+int
+fib_path_cmp (fib_node_index_t pi1,
+ fib_node_index_t pi2)
+{
+ fib_path_t *path1, *path2;
+
+ path1 = fib_path_get(pi1);
+ path2 = fib_path_get(pi2);
+
+ return (fib_path_cmp_i(path1, path2));
+}
+
+int
+fib_path_cmp_w_route_path (fib_node_index_t path_index,
+ const fib_route_path_t *rpath)
+{
+ fib_path_t *path;
+ int res;
+
+ path = fib_path_get(path_index);
+
+ res = 1;
+
+ if (path->fp_weight != rpath->frp_weight)
+ {
+ res = (path->fp_weight - rpath->frp_weight);
+ }
+ else
+ {
+ /*
+ * both paths are of the same type.
+ * consider each type and its attributes in turn.
+ */
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
+ &rpath->frp_addr);
+ if (0 == res)
+ {
+ res = (path->attached_next_hop.fp_interface -
+ rpath->frp_sw_if_index);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ res = (path->attached.fp_interface - rpath->frp_sw_if_index);
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (DPO_PROTO_MPLS == path->fp_nh_proto)
+ {
+ res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
+
+ if (res == 0)
+ {
+ res = path->recursive.fp_nh.fp_eos - rpath->frp_eos;
+ }
+ }
+ else
+ {
+ res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip,
+ &rpath->frp_addr);
+ }
+
+ if (0 == res)
+ {
+ res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
+ }
+ break;
+ case FIB_PATH_TYPE_INTF_RX:
+ res = (path->intf_rx.fp_interface - rpath->frp_sw_if_index);
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
+ if (0 == res)
+ {
+ res = (path->deag.fp_rpf_id - rpath->frp_rpf_id);
+ }
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ res = 0;
+ break;
+ }
+ }
+ return (res);
+}
+
+/*
+ * fib_path_recursive_loop_detect
+ *
+ * A forward walk of the FIB object graph to detect for a cycle/loop. This
+ * walk is initiated when an entry is linking to a new path list or from an old.
+ * The entry vector passed contains all the FIB entrys that are children of this
+ * path (it is all the entries encountered on the walk so far). If this vector
+ * contains the entry this path resolve via, then a loop is about to form.
+ * The loop must be allowed to form, since we need the dependencies in place
+ * so that we can track when the loop breaks.
+ * However, we MUST not produce a loop in the forwarding graph (else packets
+ * would loop around the switch path until the loop breaks), so we mark recursive
+ * paths as looped so that they do not contribute forwarding information.
+ * By marking the path as looped, an etry such as;
+ * X/Y
+ * via a.a.a.a (looped)
+ * via b.b.b.b (not looped)
+ * can still forward using the info provided by b.b.b.b only
+ */
+int
+fib_path_recursive_loop_detect (fib_node_index_t path_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ /*
+ * the forced drop path is never looped, cos it is never resolved.
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ return (0);
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ {
+ fib_node_index_t *entry_index, *entries;
+ int looped = 0;
+ entries = *entry_indicies;
+
+ vec_foreach(entry_index, entries) {
+ if (*entry_index == path->fp_via_fib)
+ {
+ /*
+ * the entry that is about to link to this path-list (or
+ * one of this path-list's children) is the same entry that
+ * this recursive path resolves through. this is a cycle.
+ * abort the walk.
+ */
+ looped = 1;
+ break;
+ }
+ }
+
+ if (looped)
+ {
+ FIB_PATH_DBG(path, "recursive loop formed");
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+
+ dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
+ }
+ else
+ {
+ /*
+ * no loop here yet. keep forward walking the graph.
+ */
+ if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
+ {
+ FIB_PATH_DBG(path, "recursive loop formed");
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+ }
+ else
+ {
+ FIB_PATH_DBG(path, "recursive loop cleared");
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+ }
+ }
+ break;
+ }
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ case FIB_PATH_TYPE_ATTACHED:
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_INTF_RX:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * these path types cannot be part of a loop, since they are the leaves
+ * of the graph.
+ */
+ break;
+ }
+
+ return (fib_path_is_looped(path_index));
+}
+
+int
+fib_path_resolve (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ /*
+ * hope for the best.
+ */
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+
+ /*
+ * the forced drop path resolves via the drop adj
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ return (fib_path_is_resolved(path_index));
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ fib_path_attached_next_hop_set(path);
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ /*
+ * path->attached.fp_interface
+ */
+ if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+ path->attached.fp_interface))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ dpo_set(&path->fp_dpo,
+ DPO_ADJACENCY,
+ path->fp_nh_proto,
+ fib_path_attached_get_adj(path,
+ dpo_proto_to_link(path->fp_nh_proto)));
+
+ /*
+ * become a child of the adjacency so we receive updates
+ * when the interface state changes
+ */
+ path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ {
+ /*
+ * Create a RR source entry in the table for the address
+ * that this path recurses through.
+ * This resolve action is recursive, hence we may create
+ * more paths in the process. more creates mean maybe realloc
+ * of this path.
+ */
+ fib_node_index_t fei;
+ fib_prefix_t pfx;
+
+ ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
+
+ if (DPO_PROTO_MPLS == path->fp_nh_proto)
+ {
+ fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label,
+ path->recursive.fp_nh.fp_eos,
+ &pfx);
+ }
+ else
+ {
+ fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
+ }
+
+ fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
+ &pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+
+ path = fib_path_get(path_index);
+ path->fp_via_fib = fei;
+
+ /*
+ * become a dependent child of the entry so the path is
+ * informed when the forwarding for the entry changes.
+ */
+ path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+
+ /*
+ * create and configure the IP DPO
+ */
+ fib_path_recursive_adj_update(
+ path,
+ fib_path_to_chain_type(path),
+ &path->fp_dpo);
+
+ break;
+ }
+ case FIB_PATH_TYPE_SPECIAL:
+ /*
+ * Resolve via the drop
+ */
+ dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ {
+ /*
+ * Resolve via a lookup DPO.
+ * FIXME. control plane should add routes with a table ID
+ */
+ lookup_cast_t cast;
+
+ cast = (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID ?
+ LOOKUP_MULTICAST :
+ LOOKUP_UNICAST);
+
+ lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
+ path->fp_nh_proto,
+ cast,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &path->fp_dpo);
+ break;
+ }
+ case FIB_PATH_TYPE_RECEIVE:
+ /*
+ * Resolve via a receive DPO.
+ */
+ receive_dpo_add_or_lock(path->fp_nh_proto,
+ path->receive.fp_interface,
+ &path->receive.fp_addr,
+ &path->fp_dpo);
+ break;
+ case FIB_PATH_TYPE_INTF_RX: {
+ /*
+ * Resolve via a receive DPO.
+ */
+ interface_rx_dpo_add_or_lock(path->fp_nh_proto,
+ path->intf_rx.fp_interface,
+ &path->fp_dpo);
+ break;
+ }
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * Resolve via the user provided DPO
+ */
+ dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
+ break;
+ }
+
+ return (fib_path_is_resolved(path_index));
+}
+
+u32
+fib_path_get_resolving_interface (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ return (path->attached_next_hop.fp_interface);
+ case FIB_PATH_TYPE_ATTACHED:
+ return (path->attached.fp_interface);
+ case FIB_PATH_TYPE_RECEIVE:
+ return (path->receive.fp_interface);
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (fib_path_is_resolved(path_index))
+ {
+ return (fib_entry_get_resolving_interface(path->fp_via_fib));
+ }
+ break;
+ case FIB_PATH_TYPE_INTF_RX:
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ break;
+ }
+ return (~0);
+}
+
+adj_index_t
+fib_path_get_adj (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(dpo_is_adj(&path->fp_dpo));
+ if (dpo_is_adj(&path->fp_dpo))
+ {
+ return (path->fp_dpo.dpoi_index);
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+u16
+fib_path_get_weight (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ return (path->fp_weight);
+}
+
+u16
+fib_path_get_preference (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ return (path->fp_preference);
+}
+
+/**
+ * @brief Contribute the path's adjacency to the list passed.
+ * By calling this function over all paths, recursively, a child
+ * can construct its full set of forwarding adjacencies, and hence its
+ * uRPF list.
+ */
+void
+fib_path_contribute_urpf (fib_node_index_t path_index,
+ index_t urpf)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ /*
+ * resolved and unresolved paths contribute to the RPF list.
+ */
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
+ break;
+
+ case FIB_PATH_TYPE_ATTACHED:
+ fib_urpf_list_append(urpf, path->attached.fp_interface);
+ break;
+
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (FIB_NODE_INDEX_INVALID != path->fp_via_fib &&
+ !fib_path_is_looped(path_index))
+ {
+ /*
+ * there's unresolved due to constraints, and there's unresolved
+ * due to ain't got no via. can't do nowt w'out via.
+ */
+ fib_entry_contribute_urpf(path->fp_via_fib, urpf);
+ }
+ break;
+
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ case FIB_PATH_TYPE_SPECIAL:
+ /*
+ * these path types may link to an adj, if that's what
+ * the clinet gave
+ */
+ if (dpo_is_adj(&path->fp_dpo))
+ {
+ ip_adjacency_t *adj;
+
+ adj = adj_get(path->fp_dpo.dpoi_index);
+
+ fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
+ }
+ break;
+
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_INTF_RX:
+ /*
+ * these path types don't link to an adj
+ */
+ break;
+ }
+}
+
+void
+fib_path_stack_mpls_disp (fib_node_index_t path_index,
+ dpo_proto_t payload_proto,
+ dpo_id_t *dpo)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_DEAG:
+ {
+ dpo_id_t tmp = DPO_INVALID;
+
+ dpo_copy(&tmp, dpo);
+ dpo_set(dpo,
+ DPO_MPLS_DISPOSITION,
+ payload_proto,
+ mpls_disp_dpo_create(payload_proto,
+ path->deag.fp_rpf_id,
+ &tmp));
+ dpo_reset(&tmp);
+ break;
+ }
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_ATTACHED:
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ case FIB_PATH_TYPE_RECURSIVE:
+ case FIB_PATH_TYPE_INTF_RX:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ case FIB_PATH_TYPE_SPECIAL:
+ break;
+ }
+}
+
+void
+fib_path_contribute_forwarding (fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+ ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
+
+ FIB_PATH_DBG(path, "contribute");
+
+ /*
+ * The DPO stored in the path was created when the path was resolved.
+ * This then represents the path's 'native' protocol; IP.
+ * For all others will need to go find something else.
+ */
+ if (fib_path_to_chain_type(path) == fct)
+ {
+ dpo_copy(dpo, &path->fp_dpo);
+ }
+ else
+ {
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ {
+ adj_index_t ai;
+
+ /*
+ * get a appropriate link type adj.
+ */
+ ai = fib_path_attached_next_hop_get_adj(
+ path,
+ fib_forw_chain_type_to_link_type(fct));
+ dpo_set(dpo, DPO_ADJACENCY,
+ fib_forw_chain_type_to_dpo_proto(fct), ai);
+ adj_unlock(ai);
+
+ break;
+ }
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ fib_path_recursive_adj_update(path, fct, dpo);
+ break;
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ ASSERT(0);
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
+ DPO_PROTO_MPLS,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ dpo);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ dpo_copy(dpo, &path->fp_dpo);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ ASSERT(0);
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ {
+ adj_index_t ai;
+
+ /*
+ * get a appropriate link type adj.
+ */
+ ai = fib_path_attached_get_adj(
+ path,
+ fib_forw_chain_type_to_link_type(fct));
+ dpo_set(dpo, DPO_ADJACENCY,
+ fib_forw_chain_type_to_dpo_proto(fct), ai);
+ adj_unlock(ai);
+ break;
+ }
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ {
+ adj_index_t ai;
+
+ /*
+ * Create the adj needed for sending IP multicast traffic
+ */
+ ai = adj_mcast_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
+ fib_forw_chain_type_to_link_type(fct),
+ path->attached.fp_interface);
+ dpo_set(dpo, DPO_ADJACENCY,
+ fib_forw_chain_type_to_dpo_proto(fct),
+ ai);
+ adj_unlock(ai);
+ }
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_INTF_RX:
+ /*
+ * Create the adj needed for sending IP multicast traffic
+ */
+ interface_rx_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
+ path->attached.fp_interface,
+ dpo);
+ break;
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_SPECIAL:
+ dpo_copy(dpo, &path->fp_dpo);
+ break;
+ }
+ }
+}
+
+load_balance_path_t *
+fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ load_balance_path_t *hash_key)
+{
+ load_balance_path_t *mnh;
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ if (fib_path_is_resolved(path_index))
+ {
+ vec_add2(hash_key, mnh, 1);
+
+ mnh->path_weight = path->fp_weight;
+ mnh->path_index = path_index;
+ fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
+ }
+
+ return (hash_key);
+}
+
+int
+fib_path_is_recursive_constrained (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return ((FIB_PATH_TYPE_RECURSIVE == path->fp_type) &&
+ ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) ||
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)));
+}
+
+int
+fib_path_is_exclusive (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
+}
+
+int
+fib_path_is_deag (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (FIB_PATH_TYPE_DEAG == path->fp_type);
+}
+
+int
+fib_path_is_resolved (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (dpo_id_is_valid(&path->fp_dpo) &&
+ (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
+ !fib_path_is_looped(path_index) &&
+ !fib_path_is_permanent_drop(path));
+}
+
+int
+fib_path_is_looped (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
+}
+
+fib_path_list_walk_rc_t
+fib_path_encode (fib_node_index_t path_list_index,
+ fib_node_index_t path_index,
+ void *ctx)
+{
+ fib_route_path_encode_t **api_rpaths = ctx;
+ fib_route_path_encode_t *api_rpath;
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+ if (!path)
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ vec_add2(*api_rpaths, api_rpath, 1);
+ api_rpath->rpath.frp_weight = path->fp_weight;
+ api_rpath->rpath.frp_preference = path->fp_preference;
+ api_rpath->rpath.frp_proto = path->fp_nh_proto;
+ api_rpath->rpath.frp_sw_if_index = ~0;
+ api_rpath->dpo = path->exclusive.fp_ex_dpo;
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECEIVE:
+ api_rpath->rpath.frp_addr = path->receive.fp_addr;
+ api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
+ api_rpath->dpo = path->fp_dpo;
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
+ api_rpath->dpo = path->fp_dpo;
+ break;
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
+ api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ api_rpath->rpath.frp_fib_index = path->deag.fp_tbl_id;
+ api_rpath->dpo = path->fp_dpo;
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip;
+ break;
+ default:
+ break;
+ }
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+dpo_proto_t
+fib_path_get_proto (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (path->fp_nh_proto);
+}
+
+void
+fib_path_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
+}
+
+static clib_error_t *
+show_fib_path_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_node_index_t pi;
+ fib_path_t *path;
+
+ if (unformat (input, "%d", &pi))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_path_pool, pi))
+ {
+ path = fib_path_get(pi);
+ u8 *s = fib_path_format(pi, NULL);
+ s = format(s, "children:");
+ s = fib_node_children_format(path->fp_node.fn_children, s);
+ vlib_cli_output (vm, "%s", s);
+ vec_free(s);
+ }
+ else
+ {
+ vlib_cli_output (vm, "path %d invalid", pi);
+ }
+ }
+ else
+ {
+ vlib_cli_output (vm, "FIB Paths");
+ pool_foreach(path, fib_path_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_fib_path, path);
+ }));
+ }
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_path, static) = {
+ .path = "show fib paths",
+ .function = show_fib_path_command,
+ .short_help = "show fib paths",
+};
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
new file mode 100644
index 00000000..f986e437
--- /dev/null
+++ b/src/vnet/fib/fib_path.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Given a route of the form;
+ * q.r.s.t/Y
+ * via <interface> <next-hop>
+ *
+ * The prefix is: q.r.s.t./Y
+ * the path is: 'via <interface> <next-hop>
+ *
+ * The path is the description of where to send the traffic, and the
+ * the prefix is a description of which traffic to send.
+ * It is the aim of the FIB to resolve the path, i.e. to find the corresponding
+ * adjacency to match the path's description.
+ */
+
+#ifndef __FIB_PATH_H__
+#define __FIB_PATH_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/load_balance.h>
+
+#include <vnet/fib/fib_types.h>
+#include <vnet/adj/adj_types.h>
+
+/**
+ * Enurmeration of path configuration attributes
+ */
+typedef enum fib_path_cfg_attribute_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_FIRST = 0,
+ /**
+ * The path is forced to a drop, whatever the next-hop info says.
+ * something somewhere knows better...
+ */
+ FIB_PATH_CFG_ATTRIBUTE_DROP = FIB_PATH_CFG_ATTRIBUTE_FIRST,
+ /**
+ * The path uses an adj that is exclusive. I.e. it is known only by
+ * the source of the route.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * Recursion constraint via host
+ */
+ FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST,
+ /**
+ * Recursion constraint via attached
+ */
+ FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED,
+ /**
+ * The path is attached
+ */
+ FIB_PATH_CFG_ATTRIBUTE_ATTACHED,
+ /**
+ * The path is a for-us path
+ */
+ FIB_PATH_CFG_ATTRIBUTE_INTF_RX,
+ /**
+ * The path is a deag with rpf-id
+ */
+ FIB_PATH_CFG_ATTRIBUTE_RPF_ID,
+ /**
+ * The path is an interface recieve
+ */
+ FIB_PATH_CFG_ATTRIBUTE_LOCAL,
+ /**
+ * The path is L2. i.e. the parameters therein are to be interpreted as
+ * pertaining to L2 config.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_L2,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_LAST = FIB_PATH_CFG_ATTRIBUTE_LOCAL,
+} __attribute__ ((packed)) fib_path_cfg_attribute_t;
+
+/**
+ * The maximum number of path attributes
+ */
+#define FIB_PATH_CFG_ATTRIBUTE_MAX (FIB_PATH_CFG_ATTRIBUTE_LAST + 1)
+
+#define FIB_PATH_CFG_ATTRIBUTES { \
+ [FIB_PATH_CFG_ATTRIBUTE_DROP] = "drop", \
+ [FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST] = "resolve-host", \
+ [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \
+ [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \
+ [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached", \
+ [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx", \
+ [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id", \
+ [FIB_PATH_CFG_ATTRIBUTE_L2] = "l2", \
+}
+
+#define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_CFG_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_CFG_ATTRIBUTE_LAST; \
+ _item++)
+
+/**
+ * Path config flags from the attributes
+ */
+typedef enum fib_path_cfg_flags_t_ {
+ FIB_PATH_CFG_FLAG_NONE = 0,
+ FIB_PATH_CFG_FLAG_DROP = (1 << FIB_PATH_CFG_ATTRIBUTE_DROP),
+ FIB_PATH_CFG_FLAG_EXCLUSIVE = (1 << FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE),
+ FIB_PATH_CFG_FLAG_RESOLVE_HOST = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST),
+ FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED),
+ FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL),
+ FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED),
+ FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX),
+ FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID),
+ FIB_PATH_CFG_FLAG_L2 = (1 << FIB_PATH_CFG_ATTRIBUTE_L2),
+} __attribute__ ((packed)) fib_path_cfg_flags_t;
+
+
+extern u8 *fib_path_format(fib_node_index_t pi, u8 *s);
+extern u8 *fib_path_adj_format(fib_node_index_t pi,
+ u32 indent,
+ u8 *s);
+
+extern u8 * format_fib_path(u8 * s, va_list * args);
+
+extern fib_node_index_t fib_path_create(fib_node_index_t pl_index,
+ const fib_route_path_t *path);
+extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index,
+ dpo_proto_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const dpo_id_t *dpo);
+
+extern int fib_path_cmp(fib_node_index_t path_index1,
+ fib_node_index_t path_index2);
+extern int fib_path_cmp_for_sort(void * a1, void * a2);
+extern int fib_path_cmp_w_route_path(fib_node_index_t path_index,
+ const fib_route_path_t *rpath);
+extern fib_node_index_t fib_path_copy(fib_node_index_t path_index,
+ fib_node_index_t path_list_index);
+extern int fib_path_resolve(fib_node_index_t path_index);
+extern int fib_path_is_resolved(fib_node_index_t path_index);
+extern int fib_path_is_recursive_constrained(fib_node_index_t path_index);
+extern int fib_path_is_exclusive(fib_node_index_t path_index);
+extern int fib_path_is_deag(fib_node_index_t path_index);
+extern int fib_path_is_looped(fib_node_index_t path_index);
+extern dpo_proto_t fib_path_get_proto(fib_node_index_t path_index);
+extern void fib_path_destroy(fib_node_index_t path_index);
+extern uword fib_path_hash(fib_node_index_t path_index);
+extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
+ fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ load_balance_path_t *hash_key);
+extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
+ dpo_proto_t payload_proto,
+ dpo_id_t *dpo);
+extern void fib_path_contribute_forwarding(fib_node_index_t path_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern void fib_path_contribute_urpf(fib_node_index_t path_index,
+ index_t urpf);
+extern adj_index_t fib_path_get_adj(fib_node_index_t path_index);
+extern int fib_path_recursive_loop_detect(fib_node_index_t path_index,
+ fib_node_index_t **entry_indicies);
+extern u32 fib_path_get_resolving_interface(fib_node_index_t fib_entry_index);
+extern u16 fib_path_get_weight(fib_node_index_t path_index);
+extern u16 fib_path_get_preference(fib_node_index_t path_index);
+
+extern void fib_path_module_init(void);
+extern fib_path_list_walk_rc_t fib_path_encode(fib_node_index_t path_list_index,
+ fib_node_index_t path_index,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c
new file mode 100644
index 00000000..4438671b
--- /dev/null
+++ b/src/vnet/fib/fib_path_ext.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_path_ext.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_internal.h>
+
+const char *fib_path_ext_adj_flags_names[] = FIB_PATH_EXT_ADJ_ATTR_NAMES;
+
+u8 *
+format_fib_path_ext (u8 * s, va_list * args)
+{
+ fib_path_ext_t *path_ext;
+ u32 ii;
+
+ path_ext = va_arg (*args, fib_path_ext_t *);
+
+ s = format(s, "path:%d ", path_ext->fpe_path_index);
+
+ switch (path_ext->fpe_type)
+ {
+ case FIB_PATH_EXT_MPLS:
+ s = format(s, "labels:",
+ path_ext->fpe_path_index);
+ for (ii = 0; ii < vec_len(path_ext->fpe_path.frp_label_stack); ii++)
+ {
+ s = format(s, "%U ",
+ format_mpls_unicast_label,
+ path_ext->fpe_path.frp_label_stack[ii]);
+ }
+ break;
+ case FIB_PATH_EXT_ADJ: {
+ fib_path_ext_adj_attr_t attr;
+
+ s = format(s, "adj-flags:");
+ if (path_ext->fpe_adj_flags)
+ {
+ FOR_EACH_PATH_EXT_ADJ_ATTR(attr)
+ {
+ s = format(s, "%s", fib_path_ext_adj_flags_names[attr]);
+ }
+ }
+ else
+ {
+ s = format(s, "None");
+ }
+ break;
+ }
+ }
+ return (s);
+}
+
+int
+fib_path_ext_cmp (fib_path_ext_t *path_ext,
+ const fib_route_path_t *rpath)
+{
+ return (fib_route_path_cmp(&path_ext->fpe_path, rpath));
+}
+
+static fib_path_list_walk_rc_t
+fib_path_ext_match (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *ctx)
+{
+ fib_path_ext_t *path_ext = ctx;
+
+ if (!fib_path_cmp_w_route_path(path_index,
+ &path_ext->fpe_path))
+ {
+ path_ext->fpe_path_index = path_index;
+ return (FIB_PATH_LIST_WALK_STOP);
+ }
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+void
+fib_path_ext_resolve (fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index)
+{
+ /*
+ * Find the path on the path list that this is an extension for
+ */
+ path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID;
+ fib_path_list_walk(path_list_index,
+ fib_path_ext_match,
+ path_ext);
+}
+
+static void
+fib_path_ext_init (fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath)
+{
+ path_ext->fpe_path = *rpath;
+ path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID;
+ path_ext->fpe_adj_flags = FIB_PATH_EXT_ADJ_FLAG_NONE;
+ path_ext->fpe_type = ext_type;
+
+ fib_path_ext_resolve(path_ext, path_list_index);
+}
+
+/**
+ * @brief Return true if the label stack is implicit null
+ */
+static int
+fib_path_ext_is_imp_null (fib_path_ext_t *path_ext)
+{
+ return ((1 == vec_len(path_ext->fpe_label_stack)) &&
+ (MPLS_IETF_IMPLICIT_NULL_LABEL == path_ext->fpe_label_stack[0]));
+}
+
+load_balance_path_t *
+fib_path_ext_stack (fib_path_ext_t *path_ext,
+ fib_forward_chain_type_t child_fct,
+ fib_forward_chain_type_t imp_null_fct,
+ load_balance_path_t *nhs)
+{
+ fib_forward_chain_type_t parent_fct;
+ load_balance_path_t *nh;
+
+ if (!fib_path_is_resolved(path_ext->fpe_path_index))
+ return (nhs);
+
+ /*
+ * Since we are stacking this path-extension, it must have a valid out
+ * label. From the chain type request by the child, determine what
+ * chain type we will request from the parent.
+ */
+ switch (child_fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ {
+ /*
+ * The EOS chain is a tricky since, when the path has an imp NULL one cannot know
+ * the adjacency to link to without knowing what the packets payload protocol
+ * will be once the label is popped.
+ */
+ if (fib_path_ext_is_imp_null(path_ext))
+ {
+ parent_fct = imp_null_fct;
+ }
+ else
+ {
+ /*
+ * we have a label to stack. packets will thus be labelled when
+ * they encounter the child, ergo, non-eos.
+ */
+ parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+ }
+ break;
+ }
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ if (fib_path_ext_is_imp_null(path_ext))
+ {
+ /*
+ * implicit-null label for the eos or IP chain, need to pick up
+ * the IP adj
+ */
+ parent_fct = child_fct;
+ }
+ else
+ {
+ /*
+ * we have a label to stack. packets will thus be labelled when
+ * they encounter the child, ergo, non-eos.
+ */
+ parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+ }
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ parent_fct = child_fct;
+ break;
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+ break;
+ default:
+ return (nhs);
+ break;
+ }
+
+ dpo_id_t via_dpo = DPO_INVALID;
+
+ /*
+ * The next object in the graph after the imposition of the label
+ * will be the DPO contributed by the path through which the packets
+ * are to be sent. We stack the MPLS Label DPO on this path DPO
+ */
+ fib_path_contribute_forwarding(path_ext->fpe_path_index,
+ parent_fct,
+ &via_dpo);
+
+ if (dpo_is_drop(&via_dpo) ||
+ load_balance_is_drop(&via_dpo))
+ {
+ /*
+ * don't stack a path extension on a drop. doing so will create
+ * a LB bucket entry on drop, and we will lose a percentage of traffic.
+ */
+ }
+ else
+ {
+ vec_add2(nhs, nh, 1);
+ nh->path_weight = fib_path_get_weight(path_ext->fpe_path_index);
+ nh->path_index = path_ext->fpe_path_index;
+ dpo_copy(&nh->path_dpo, &via_dpo);
+
+ /*
+ * The label is stackable for this chain type
+ * construct the mpls header that will be imposed in the data-path
+ */
+ if (!fib_path_ext_is_imp_null(path_ext))
+ {
+ /*
+ * we use the parent protocol for the label so that
+ * we pickup the correct MPLS imposition nodes to do
+ * ip[46] processing.
+ */
+ dpo_proto_t chain_proto;
+ mpls_eos_bit_t eos;
+ index_t mldi;
+
+ eos = (child_fct == FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS ?
+ MPLS_NON_EOS :
+ MPLS_EOS);
+ chain_proto = fib_forw_chain_type_to_dpo_proto(child_fct);
+
+ mldi = mpls_label_dpo_create(path_ext->fpe_label_stack,
+ eos, 255, 0,
+ chain_proto,
+ &nh->path_dpo);
+
+ dpo_set(&nh->path_dpo,
+ DPO_MPLS_LABEL,
+ chain_proto,
+ mldi);
+ }
+ }
+ dpo_reset(&via_dpo);
+
+ return (nhs);
+}
+
+fib_path_ext_t *
+fib_path_ext_list_find (const fib_path_ext_list_t *list,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath)
+{
+ fib_path_ext_t *path_ext;
+
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ if ((path_ext->fpe_type == ext_type) &&
+ !fib_path_ext_cmp(path_ext, rpath) )
+ {
+ return (path_ext);
+ }
+ }
+ return (NULL);
+}
+
+fib_path_ext_t *
+fib_path_ext_list_find_by_path_index (const fib_path_ext_list_t *list,
+ fib_node_index_t path_index)
+{
+ fib_path_ext_t *path_ext;
+
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ if (path_ext->fpe_path_index == path_index)
+ {
+ return (path_ext);
+ }
+ }
+ return (NULL);
+}
+
+
+fib_path_ext_t *
+fib_path_ext_list_push_back (fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath)
+{
+ fib_path_ext_t *path_ext;
+
+ path_ext = fib_path_ext_list_find(list, ext_type, rpath);
+
+ if (NULL == path_ext)
+ {
+ vec_add2(list->fpel_exts, path_ext, 1);
+ fib_path_ext_init(path_ext, path_list_index, ext_type, rpath);
+ }
+
+ return (path_ext);
+}
+
+/*
+ * insert, sorted, a path extension to the entry's list.
+ * It's not strictly necessary to sort the path extensions, since each
+ * extension has the path index to which it resolves. However, by being
+ * sorted the load-balance produced has a deterministic order, not an order
+ * based on the sequence of extension additions. this is a considerable benefit.
+ */
+fib_path_ext_t *
+fib_path_ext_list_insert (fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath)
+{
+ fib_path_ext_t new_path_ext, *path_ext;
+ int i = 0;
+
+ if (0 == fib_path_ext_list_length(list))
+ {
+ return (fib_path_ext_list_push_back(list, path_list_index,
+ ext_type, rpath));
+ }
+
+ fib_path_ext_init(&new_path_ext, path_list_index, ext_type, rpath);
+
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ int res = fib_path_ext_cmp(path_ext, rpath);
+
+ if (0 == res)
+ {
+ /*
+ * don't add duplicate extensions. modify instead
+ */
+ vec_free(path_ext->fpe_label_stack);
+ *path_ext = new_path_ext;
+ goto done;
+ }
+ else if (res < 0)
+ {
+ i++;
+ }
+ else
+ {
+ break;
+ }
+ }
+ vec_insert_elts(list->fpel_exts, &new_path_ext, 1, i);
+done:
+ return (&(list->fpel_exts[i]));
+}
+
+void
+fib_path_ext_list_resolve (fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index)
+{
+ fib_path_ext_t *path_ext;
+
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ fib_path_ext_resolve(path_ext, path_list_index);
+ };
+}
+
+void
+fib_path_ext_list_remove (fib_path_ext_list_t *list,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath)
+{
+ fib_path_ext_t *path_ext;
+
+ path_ext = fib_path_ext_list_find(list, ext_type, rpath);
+
+ if (NULL != path_ext)
+ {
+ /*
+ * delete the element moving the remaining elements down 1 position.
+ * this preserves the sorted order.
+ */
+ vec_free(path_ext->fpe_label_stack);
+ vec_delete(list->fpel_exts, 1, (path_ext - list->fpel_exts));
+ }
+}
+
+void
+fib_path_ext_list_flush (fib_path_ext_list_t *list)
+{
+ fib_path_ext_t *path_ext;
+
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ vec_free(path_ext->fpe_label_stack);
+ };
+ vec_free(list->fpel_exts);
+ list->fpel_exts = NULL;
+}
+
+u8*
+format_fib_path_ext_list (u8 * s, va_list * args)
+{
+ fib_path_ext_list_t *list;
+ fib_path_ext_t *path_ext;
+
+ list = va_arg (*args, fib_path_ext_list_t *);
+
+ if (fib_path_ext_list_length(list))
+ {
+ s = format(s, " Extensions:");
+ vec_foreach(path_ext, list->fpel_exts)
+ {
+ s = format(s, "\n %U", format_fib_path_ext, path_ext);
+ };
+ }
+
+ return (s);
+}
+
+int
+fib_path_ext_list_length (const fib_path_ext_list_t *list)
+{
+ return (vec_len(list->fpel_exts));
+}
diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h
new file mode 100644
index 00000000..d07941c1
--- /dev/null
+++ b/src/vnet/fib/fib_path_ext.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_PATH_EXT_H__
+#define __FIB_PATH_EXT_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * A description of the type of path extension
+ */
+typedef enum fib_path_ext_type_t_
+{
+ /**
+ * An MPLS extension that maintains the path's outgoing labels,
+ */
+ FIB_PATH_EXT_MPLS,
+ /**
+ * A adj-source extension indicating the path's refinement criteria
+ * result
+ */
+ FIB_PATH_EXT_ADJ,
+} fib_path_ext_type_t;
+
+/**
+ * Flags present on an ADJ sourced path-extension
+ */
+typedef enum fib_path_ext_adj_attr_t_
+{
+ FIB_PATH_EXT_ADJ_ATTR_REFINES_COVER,
+} fib_path_ext_adj_attr_t;
+
+typedef enum fib_path_ext_adj_flags_t_
+{
+ FIB_PATH_EXT_ADJ_FLAG_NONE = 0,
+ FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER = (1 << FIB_PATH_EXT_ADJ_ATTR_REFINES_COVER),
+} fib_path_ext_adj_flags_t;
+
+#define FIB_PATH_EXT_ADJ_ATTR_NAMES { \
+ [FIB_PATH_EXT_ADJ_ATTR_REFINES_COVER] = "refines-cover", \
+}
+
+#define FOR_EACH_PATH_EXT_ADJ_ATTR(_item) \
+ for (_item = FIB_PATH_EXT_ADJ_ATTR_REFINES_COVER; \
+ _item <= FIB_PATH_EXT_ADJ_ATTR_REFINES_COVER; \
+ _item++)
+
+/**
+ * A path extension is a per-entry addition to the forwarding information
+ * when packets are sent for that entry over that path.
+ *
+ * For example:
+ * ip route add 1.1.1.1/32 via 10.10.10.10 out-label 100
+ *
+ * The out-going MPLS label value 100 is a path-extension. It is a value sepcific
+ * to the entry 1.1.1.1/32 and valid only when packets are sent via 10.10.10.10.
+ */
+typedef struct fib_path_ext_t_
+{
+ /**
+ * A description of the path that is being extended.
+ * This description is used to match this extension with the [changing]
+ * instance of a fib_path_t that is extended
+ */
+ fib_route_path_t fpe_path;
+#define fpe_label_stack fpe_path.frp_label_stack
+
+ union {
+ /**
+ * For an ADJ type extension
+ *
+ * Flags describing the adj state
+ */
+ fib_path_ext_adj_flags_t fpe_adj_flags;
+ };
+
+ /**
+ * The type of path extension
+ */
+ fib_path_ext_type_t fpe_type;
+
+ /**
+ * The index of the path. This is the global index, not the path's
+ * position in the path-list.
+ */
+ fib_node_index_t fpe_path_index;
+} __attribute__ ((packed)) fib_path_ext_t;
+
+extern u8 * format_fib_path_ext(u8 * s, va_list * args);
+
+extern int fib_path_ext_cmp(fib_path_ext_t *path_ext,
+ const fib_route_path_t *rpath);
+
+extern void fib_path_ext_resolve(fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index);
+
+extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext,
+ fib_forward_chain_type_t fct,
+ fib_forward_chain_type_t imp_null_fct,
+ load_balance_path_t *nhs);
+
+extern fib_path_ext_t * fib_path_ext_list_push_back (fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath);
+
+extern fib_path_ext_t * fib_path_ext_list_insert (fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath);
+
+extern u8* format_fib_path_ext_list (u8 * s, va_list * args);
+
+extern void fib_path_ext_list_remove (fib_path_ext_list_t *list,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath);
+
+extern fib_path_ext_t * fib_path_ext_list_find (const fib_path_ext_list_t *list,
+ fib_path_ext_type_t ext_type,
+ const fib_route_path_t *rpath);
+extern fib_path_ext_t * fib_path_ext_list_find_by_path_index (const fib_path_ext_list_t *list,
+ fib_node_index_t path_index);
+extern void fib_path_ext_list_resolve(fib_path_ext_list_t *list,
+ fib_node_index_t path_list_index);
+
+extern int fib_path_ext_list_length(const fib_path_ext_list_t *list);
+extern void fib_path_ext_list_flush(fib_path_ext_list_t *list);
+
+#endif
+
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
new file mode 100644
index 00000000..f30fd7ea
--- /dev/null
+++ b/src/vnet/fib/fib_path_list.c
@@ -0,0 +1,1380 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/mhash.h>
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_urpf_list.h>
+
+/**
+ * The magic number of child entries that make a path-list popular.
+ * There's a trade-off here between convergnece and forwarding speed.
+ * Popular path-lists generate load-balance maps for the entires that
+ * use them. If the map is present there is a switch path cost to indirect
+ * through the map - this indirection provides the fast convergence - so
+ * without the map convergence is slower.
+ */
+#define FIB_PATH_LIST_POPULAR 64
+
+/**
+ * FIB path-list
+ * A representation of the list/set of path trough which a prefix is reachable
+ */
+typedef struct fib_path_list_t_ {
+ /**
+ * A path-list is a node in the FIB graph.
+ */
+ fib_node_t fpl_node;
+
+ /**
+ * Flags on the path-list
+ */
+ fib_path_list_flags_t fpl_flags;
+
+ /**
+ * Vector of paths indicies for all configured paths.
+ * For shareable path-lists this list MUST not change.
+ */
+ fib_node_index_t *fpl_paths;
+
+ /**
+ * the RPF list calculated for this path list
+ */
+ fib_node_index_t fpl_urpf;
+
+ /**
+ * Hash table of paths. valid only with INDEXED flag
+ */
+ uword *fpl_db;
+} fib_path_list_t;
+
+/*
+ * Array of strings/names for the FIB sources
+ */
+static const char *fib_path_list_attr_names[] = FIB_PATH_LIST_ATTRIBUTES;
+
+/*
+ * The memory pool from which we allocate all the path-lists
+ */
+static fib_path_list_t * fib_path_list_pool;
+
+/*
+ * The data-base of shared path-lists
+ */
+static uword *fib_path_list_db;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) \
+{ \
+ u8 *_tmp = 0; \
+ _tmp = fib_path_list_format( \
+ fib_path_list_get_index(_pl), _tmp); \
+ clib_warning("pl:[%d:%p:%p:%s]:" _fmt, \
+ fib_path_list_get_index(_pl), \
+ _pl, _pl->fpl_paths, _tmp, \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...)
+#endif
+
+static fib_path_list_t *
+fib_path_list_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_path_list_pool, index));
+}
+
+static fib_node_t *
+fib_path_list_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_path_list_get(index));
+}
+
+static fib_path_list_t*
+fib_path_list_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_PATH_LIST == node->fn_type);
+#endif
+ return ((fib_path_list_t*)node);
+}
+
+static fib_node_index_t
+fib_path_list_get_index (fib_path_list_t *path_list)
+{
+ return (path_list - fib_path_list_pool);
+}
+
+static u8 *
+format_fib_path_list (u8 * s, va_list * args)
+{
+ fib_path_list_attribute_t attr;
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+
+ path_list = va_arg (*args, fib_path_list_t *);
+
+ s = format (s, " index:%u", fib_path_list_get_index(path_list));
+ s = format (s, " locks:%u", path_list->fpl_node.fn_locks);
+
+ if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags)
+ {
+ s = format (s, " flags:");
+ FOR_EACH_PATH_LIST_ATTRIBUTE(attr)
+ {
+ if ((1<<attr) & path_list->fpl_flags)
+ {
+ s = format (s, "%s,", fib_path_list_attr_names[attr]);
+ }
+ }
+ }
+ s = format (s, " %U\n", format_fib_urpf_list, path_list->fpl_urpf);
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ s = fib_path_format(*path_index, s);
+ s = format(s, "\n");
+ }
+
+ return (s);
+}
+
+u8 *
+fib_path_list_format (fib_node_index_t path_list_index,
+ u8 * s)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (format(s, "%U", format_fib_path_list, path_list));
+}
+
+static uword
+fib_path_list_hash (fib_path_list_t *path_list)
+{
+ uword old_path_list_hash, new_path_list_hash, path_hash;
+ fib_node_index_t *path_index;
+
+ ASSERT(path_list);
+
+ new_path_list_hash = old_path_list_hash = vec_len(path_list->fpl_paths);
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ path_hash = fib_path_hash(*path_index);
+#if uword_bits == 64
+ hash_mix64(path_hash, old_path_list_hash, new_path_list_hash);
+#else
+ hash_mix32(path_hash, old_path_list_hash, new_path_list_hash);
+#endif
+ }
+
+ return (new_path_list_hash);
+}
+
+always_inline uword
+fib_path_list_db_hash_key_from_index (uword index)
+{
+ return 1 + 2*index;
+}
+
+always_inline uword
+fib_path_list_db_hash_key_is_index (uword key)
+{
+ return key & 1;
+}
+
+always_inline uword
+fib_path_list_db_hash_key_2_index (uword key)
+{
+ ASSERT (fib_path_list_db_hash_key_is_index (key));
+ return key / 2;
+}
+
+static fib_path_list_t*
+fib_path_list_db_get_from_hash_key (uword key)
+{
+ fib_path_list_t *path_list;
+
+ if (fib_path_list_db_hash_key_is_index (key))
+ {
+ fib_node_index_t path_list_index;
+
+ path_list_index = fib_path_list_db_hash_key_2_index(key);
+ path_list = fib_path_list_get(path_list_index);
+ }
+ else
+ {
+ path_list = uword_to_pointer (key, fib_path_list_t *);
+ }
+
+ return (path_list);
+}
+
+static uword
+fib_path_list_db_hash_key_sum (hash_t * h,
+ uword key)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_db_get_from_hash_key(key);
+
+ return (fib_path_list_hash(path_list));
+}
+
+static uword
+fib_path_list_db_hash_key_equal (hash_t * h,
+ uword key1,
+ uword key2)
+{
+ fib_path_list_t *path_list1, *path_list2;
+
+ path_list1 = fib_path_list_db_get_from_hash_key(key1);
+ path_list2 = fib_path_list_db_get_from_hash_key(key2);
+
+ return (fib_path_list_hash(path_list1) ==
+ fib_path_list_hash(path_list2));
+}
+
+static fib_node_index_t
+fib_path_list_db_find (fib_path_list_t *path_list)
+{
+ uword *p;
+
+ p = hash_get(fib_path_list_db, path_list);
+
+ if (NULL != p)
+ {
+ return p[0];
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+fib_path_list_db_insert (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(FIB_NODE_INDEX_INVALID == fib_path_list_db_find(path_list));
+
+ hash_set (fib_path_list_db,
+ fib_path_list_db_hash_key_from_index(path_list_index),
+ path_list_index);
+
+ FIB_PATH_LIST_DBG(path_list, "DB-inserted");
+}
+
+static void
+fib_path_list_db_remove (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != fib_path_list_db_find(path_list));
+
+ hash_unset(fib_path_list_db,
+ fib_path_list_db_hash_key_from_index(path_list_index));
+
+ FIB_PATH_LIST_DBG(path_list, "DB-removed");
+}
+
+static void
+fib_path_list_destroy (fib_path_list_t *path_list)
+{
+ fib_node_index_t *path_index;
+
+ FIB_PATH_LIST_DBG(path_list, "destroy");
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ fib_path_destroy(*path_index);
+ }
+
+ vec_free(path_list->fpl_paths);
+ fib_urpf_list_unlock(path_list->fpl_urpf);
+
+ fib_node_deinit(&path_list->fpl_node);
+ pool_put(fib_path_list_pool, path_list);
+}
+
+static void
+fib_path_list_last_lock_gone (fib_node_t *node)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_from_fib_node(node);
+
+ FIB_PATH_LIST_DBG(path_list, "last-lock");
+
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ fib_path_list_db_remove(fib_path_list_get_index(path_list));
+ }
+ fib_path_list_destroy(path_list);
+}
+
+/*
+ * fib_path_mk_lb
+ *
+ * update the multipath adj this path-list will contribute to its
+ * children's forwarding.
+ */
+static void
+fib_path_list_mk_lb (fib_path_list_t *path_list,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ load_balance_path_t *nhs;
+ fib_node_index_t *path_index;
+
+ nhs = NULL;
+
+ if (!dpo_id_is_valid(dpo))
+ {
+ /*
+ * first time create
+ */
+ dpo_set(dpo,
+ DPO_LOAD_BALANCE,
+ fib_forw_chain_type_to_dpo_proto(fct),
+ load_balance_create(0,
+ fib_forw_chain_type_to_dpo_proto(fct),
+ 0 /* FIXME FLOW HASH */));
+ }
+
+ /*
+ * We gather the DPOs from resolved paths.
+ */
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ nhs = fib_path_append_nh_for_multipath_hash(*path_index,
+ fct,
+ nhs);
+ }
+
+ /*
+ * Path-list load-balances, which if used, would be shared and hence
+ * never need a load-balance map.
+ */
+ load_balance_multipath_update(dpo, nhs, LOAD_BALANCE_FLAG_NONE);
+
+ FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index);
+
+ vec_free(nhs);
+}
+
+/**
+ * @brief [re]build the path list's uRPF list
+ */
+static void
+fib_path_list_mk_urpf (fib_path_list_t *path_list)
+{
+ fib_node_index_t *path_index;
+
+ /*
+ * ditch the old one. by iterating through all paths we are going
+ * to re-find all the adjs that were in the old one anyway. If we
+ * keep the old one, then the |sort|uniq requires more work.
+ * All users of the RPF list have their own lock, so we can release
+ * immediately.
+ */
+ fib_urpf_list_unlock(path_list->fpl_urpf);
+ path_list->fpl_urpf = fib_urpf_list_alloc_and_lock();
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ fib_path_contribute_urpf(*path_index, path_list->fpl_urpf);
+ }
+
+ fib_urpf_list_bake(path_list->fpl_urpf);
+}
+
+/**
+ * @brief Contribute (add) this path list's uRPF list. This allows the child
+ * to construct an aggregate list.
+ */
+void
+fib_path_list_contribute_urpf (fib_node_index_t path_list_index,
+ index_t urpf)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_urpf_list_combine(urpf, path_list->fpl_urpf);
+}
+
+/**
+ * @brief Return the the child the RPF list pre-built for this path list
+ */
+index_t
+fib_path_list_get_urpf (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (path_list->fpl_urpf);
+}
+
+/*
+ * fib_path_list_back_walk
+ *
+ * Called from one of this path-list's paths to progate
+ * a back walk
+ */
+void
+fib_path_list_back_walk (fib_node_index_t path_list_index,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_path_list_mk_urpf(path_list);
+
+ /*
+ * propagate the backwalk further
+ */
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR)
+ {
+ /*
+ * many children. schedule a async walk
+ */
+ fib_walk_async(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ FIB_WALK_PRIORITY_LOW,
+ ctx);
+ }
+ else
+ {
+ /*
+ * only a few children. continue the walk synchronously
+ */
+ fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
+ }
+}
+
+/*
+ * fib_path_list_back_walk_notify
+ *
+ * A back walk has reach this path-list.
+ */
+static fib_node_back_walk_rc_t
+fib_path_list_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * the path-list is not a direct child of any other node type
+ * paths, which do not change thier to-list-mapping, save the
+ * list they are a member of, and invoke the BW function directly.
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * Display the path-list memory usage
+ */
+static void
+fib_path_list_memory_show (void)
+{
+ fib_show_memory_usage("Path-list",
+ pool_elts(fib_path_list_pool),
+ pool_len(fib_path_list_pool),
+ sizeof(fib_path_list_t));
+ fib_urpf_list_show_mem();
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_path_list_vft = {
+ .fnv_get = fib_path_list_get_node,
+ .fnv_last_lock = fib_path_list_last_lock_gone,
+ .fnv_back_walk = fib_path_list_back_walk_notify,
+ .fnv_mem_show = fib_path_list_memory_show,
+};
+
+static inline fib_path_list_t *
+fib_path_list_alloc (fib_node_index_t *path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ pool_get(fib_path_list_pool, path_list);
+ memset(path_list, 0, sizeof(*path_list));
+
+ fib_node_init(&path_list->fpl_node,
+ FIB_NODE_TYPE_PATH_LIST);
+ path_list->fpl_urpf = INDEX_INVALID;
+ path_list->fpl_paths = NULL;
+
+ *path_list_index = fib_path_list_get_index(path_list);
+
+ FIB_PATH_LIST_DBG(path_list, "alloc");
+
+ return (path_list);
+}
+
+static fib_path_list_t *
+fib_path_list_resolve (fib_path_list_t *path_list)
+{
+ fib_node_index_t *path_index, *paths, path_list_index;
+
+ ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_RESOLVED));
+
+ /*
+ * resolving a path-list is a recursive action. this means more path
+ * lists can be created during this call, and hence this path-list
+ * can be realloc'd. so we work with copies.
+ * this function is called only once per-path list, so its no great overhead.
+ */
+ path_list_index = fib_path_list_get_index(path_list);
+ paths = vec_dup(path_list->fpl_paths);
+
+ vec_foreach (path_index, paths)
+ {
+ fib_path_resolve(*path_index);
+ }
+
+ vec_free(paths);
+ path_list = fib_path_list_get(path_list_index);
+
+ FIB_PATH_LIST_DBG(path_list, "resovled");
+
+ if (!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_NO_URPF))
+ {
+ fib_path_list_mk_urpf(path_list);
+ }
+ return (path_list);
+}
+
+u32
+fib_path_list_get_n_paths (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (vec_len(path_list->fpl_paths));
+}
+
+
+u32
+fib_path_list_get_resolving_interface (fib_node_index_t path_list_index)
+{
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+ u32 sw_if_index;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ sw_if_index = ~0;
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ sw_if_index = fib_path_get_resolving_interface(*path_index);
+ if (~0 != sw_if_index)
+ {
+ return (sw_if_index);
+ }
+ }
+
+ return (sw_if_index);
+}
+
+dpo_proto_t
+fib_path_list_get_proto (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ /*
+ * we don't support a mix of path protocols, so we can return the proto
+ * of the first
+ */
+ return (fib_path_get_proto(path_list->fpl_paths[0]));
+}
+
+int
+fib_path_list_is_looped (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
+}
+
+int
+fib_path_list_is_popular (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR);
+}
+
+static fib_path_list_flags_t
+fib_path_list_flags_fixup (fib_path_list_flags_t flags)
+{
+ /*
+ * we do no share drop nor exclusive path-lists
+ */
+ if (flags & FIB_PATH_LIST_FLAG_DROP ||
+ flags & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+ {
+ flags &= ~FIB_PATH_LIST_FLAG_SHARED;
+ }
+
+ return (flags);
+}
+
+fib_node_index_t
+fib_path_list_create (fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_list_index, old_path_list_index;
+ fib_path_list_t *path_list;
+ int i;
+
+ flags = fib_path_list_flags_fixup(flags);
+ path_list = fib_path_list_alloc(&path_list_index);
+ path_list->fpl_flags = flags;
+
+ if (NULL != rpaths)
+ {
+ vec_foreach_index(i, rpaths)
+ {
+ vec_add1(path_list->fpl_paths,
+ fib_path_create(path_list_index,
+ &rpaths[i]));
+ }
+ /*
+ * we sort the paths since the key for the path-list is
+ * the description of the paths it contains. The paths need to
+ * be sorted else this description will differ.
+ */
+ if (vec_len(path_list->fpl_paths) > 1)
+ {
+ vec_sort_with_function(path_list->fpl_paths,
+ fib_path_cmp_for_sort);
+ }
+ }
+
+ /*
+ * If a shared path list is requested, consult the DB for a match
+ */
+ if (flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ old_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != old_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = old_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+
+ return (path_list_index);
+}
+
+static fib_path_cfg_flags_t
+fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
+{
+ fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
+
+ if (plf & FIB_PATH_LIST_FLAG_DROP)
+ {
+ pf |= FIB_PATH_CFG_FLAG_DROP;
+ }
+ if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+ {
+ pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+ }
+ if (plf & FIB_PATH_LIST_FLAG_LOCAL)
+ {
+ pf |= FIB_PATH_CFG_FLAG_LOCAL;
+ }
+
+ return (pf);
+}
+
+fib_node_index_t
+fib_path_list_create_special (dpo_proto_t nh_proto,
+ fib_path_list_flags_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t path_index, path_list_index;
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_alloc(&path_list_index);
+ path_list->fpl_flags = flags;
+
+ path_index =
+ fib_path_create_special(path_list_index,
+ nh_proto,
+ fib_path_list_flags_2_path_flags(flags),
+ dpo);
+ vec_add1(path_list->fpl_paths, path_index);
+
+ /*
+ * we don't share path-lists. we can do PIC on them so why bother.
+ */
+ path_list = fib_path_list_resolve(path_list);
+
+ return (path_list_index);
+}
+
+/*
+ * return the index info the path-lists's vector of paths, of the matching path.
+ * ~0 if not found
+ */
+u32
+fib_path_list_find_rpath (fib_node_index_t path_list_index,
+ const fib_route_path_t *rpath)
+{
+ fib_path_list_t *path_list;
+ u32 ii;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach_index (ii, path_list->fpl_paths)
+ {
+ if (!fib_path_cmp_w_route_path(path_list->fpl_paths[ii], rpath))
+ {
+ return (ii);
+ }
+ }
+ return (~0);
+}
+
+
+/*
+ * fib_path_list_copy_and_path_add
+ *
+ * Create a copy of a path-list and append one more path to it.
+ * The path-list returned could either have been newly created, or
+ * can be a shared path-list from the data-base.
+ */
+fib_node_index_t
+fib_path_list_path_add (fib_node_index_t path_list_index,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t new_path_index, *orig_path_index;
+ fib_path_list_t *path_list;
+
+ /*
+ * alloc the new list before we retrieve the old one, lest
+ * the alloc result in a realloc
+ */
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(1 == vec_len(rpaths));
+ ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+ FIB_PATH_LIST_DBG(orig_path_list, "path-add");
+
+ new_path_index = fib_path_create(path_list_index,
+ rpaths);
+
+ vec_foreach (orig_path_index, path_list->fpl_paths)
+ {
+ /*
+ * don't add duplicate paths
+ */
+ if (0 == fib_path_cmp(new_path_index, *orig_path_index))
+ {
+ fib_path_destroy(new_path_index);
+ return (*orig_path_index);
+ }
+ }
+
+ /*
+ * Add the new path - no sort, no sharing, no key..
+ */
+ vec_add1(path_list->fpl_paths, new_path_index);
+
+ FIB_PATH_LIST_DBG(path_list, "path-added");
+
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ fib_path_resolve(new_path_index);
+
+ return (new_path_index);
+}
+
+fib_node_index_t
+fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_index, new_path_index, *orig_path_index;
+ fib_path_list_t *path_list, *orig_path_list;
+ fib_node_index_t exist_path_list_index;
+ fib_node_index_t path_list_index;
+ fib_node_index_t pi;
+
+ ASSERT(1 == vec_len(rpaths));
+
+ /*
+ * alloc the new list before we retrieve the old one, lest
+ * the alloc result in a realloc
+ */
+ path_list = fib_path_list_alloc(&path_list_index);
+
+ orig_path_list = fib_path_list_get(orig_path_list_index);
+
+ FIB_PATH_LIST_DBG(orig_path_list, "copy-add");
+
+ flags = fib_path_list_flags_fixup(flags);
+ path_list->fpl_flags = flags;
+
+ vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths));
+ pi = 0;
+
+ new_path_index = fib_path_create(path_list_index,
+ rpaths);
+
+ vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+ {
+ /*
+ * don't add duplicate paths
+ * In the unlikely event the path is a duplicate, then we'll
+ * find a matching path-list later and this one will be toast.
+ */
+ if (0 != fib_path_cmp(new_path_index, *orig_path_index))
+ {
+ path_index = fib_path_copy(*orig_path_index, path_list_index);
+ path_list->fpl_paths[pi++] = path_index;
+ }
+ else
+ {
+ _vec_len(path_list->fpl_paths) = vec_len(orig_path_list->fpl_paths);
+ }
+ }
+
+ path_list->fpl_paths[pi] = new_path_index;
+
+ /*
+ * we sort the paths since the key for the path-list is
+ * the description of the paths it contains. The paths need to
+ * be sorted else this description will differ.
+ */
+ vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort);
+
+ FIB_PATH_LIST_DBG(path_list, "path-added");
+
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ exist_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = exist_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+
+ return (path_list_index);
+}
+
+/*
+ * fib_path_list_path_remove
+ */
+fib_node_index_t
+fib_path_list_path_remove (fib_node_index_t path_list_index,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t match_path_index, tmp_path_index;
+ fib_path_list_t *path_list;
+ fib_node_index_t pi;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(1 == vec_len(rpaths));
+ ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+ FIB_PATH_LIST_DBG(orig_path_list, "path-remove");
+
+ /*
+ * create a representation of the path to be removed, so it
+ * can be used as a comparison object during the copy.
+ */
+ tmp_path_index = fib_path_create(path_list_index,
+ rpaths);
+ match_path_index = FIB_NODE_INDEX_INVALID;
+
+ vec_foreach_index (pi, path_list->fpl_paths)
+ {
+ if (0 == fib_path_cmp(tmp_path_index,
+ path_list->fpl_paths[pi]))
+ {
+ /*
+ * match - remove it
+ */
+ match_path_index = path_list->fpl_paths[pi];
+ fib_path_destroy(match_path_index);
+ vec_del1(path_list->fpl_paths, pi);
+ }
+ }
+
+ /*
+ * done with the temporary now
+ */
+ fib_path_destroy(tmp_path_index);
+
+ return (match_path_index);
+}
+
+/*
+ * fib_path_list_copy_and_path_remove
+ *
+ * Copy the path-list excluding the path passed.
+ * If the path is the last one, then the index reurned will be invalid.
+ * i.e. the path-list is toast.
+ */
+fib_node_index_t
+fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_index, *orig_path_index, path_list_index, tmp_path_index;
+ fib_path_list_t *path_list, *orig_path_list;
+ fib_node_index_t pi;
+
+ ASSERT(1 == vec_len(rpaths));
+
+ path_list = fib_path_list_alloc(&path_list_index);
+
+ flags = fib_path_list_flags_fixup(flags);
+ orig_path_list = fib_path_list_get(orig_path_list_index);
+
+ FIB_PATH_LIST_DBG(orig_path_list, "copy-remove");
+
+ path_list->fpl_flags = flags;
+ /*
+ * allocate as many paths as we might need in one go, rather than
+ * using vec_add to do a few at a time.
+ */
+ if (vec_len(orig_path_list->fpl_paths) > 1)
+ {
+ vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths) - 2);
+ }
+ pi = 0;
+
+ /*
+ * create a representation of the path to be removed, so it
+ * can be used as a comparison object during the copy.
+ */
+ tmp_path_index = fib_path_create(path_list_index,
+ rpaths);
+
+ vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+ {
+ if (0 != fib_path_cmp(tmp_path_index, *orig_path_index)) {
+ path_index = fib_path_copy(*orig_path_index, path_list_index);
+ if (pi < vec_len(path_list->fpl_paths))
+ {
+ path_list->fpl_paths[pi++] = path_index;
+ }
+ else
+ {
+ /*
+ * this is the unlikely case that the path being
+ * removed does not match one in the path-list, so
+ * we end up with as many paths as we started with.
+ * the paths vector was sized above with the expectation
+ * that we would have 1 less.
+ */
+ vec_add1(path_list->fpl_paths, path_index);
+ }
+ }
+ }
+
+ /*
+ * done with the temporary now
+ */
+ fib_path_destroy(tmp_path_index);
+
+ /*
+ * if there are no paths, then the new path-list is aborted
+ */
+ if (0 == vec_len(path_list->fpl_paths)) {
+ FIB_PATH_LIST_DBG(path_list, "last-path-removed");
+
+ fib_path_list_destroy(path_list);
+
+ path_list_index = FIB_NODE_INDEX_INVALID;
+ } else {
+ /*
+ * we sort the paths since the key for the path-list is
+ * the description of the paths it contains. The paths need to
+ * be sorted else this description will differ.
+ */
+ vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort);
+
+ /*
+ * If a shared path list is requested, consult the DB for a match
+ */
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ fib_node_index_t exist_path_list_index;
+
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ exist_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = exist_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+
+ return (path_list_index);
+}
+
+/*
+ * fib_path_list_contribute_forwarding
+ *
+ * Return the index of a load-balance that user of this path-list should
+ * use for forwarding
+ */
+void
+fib_path_list_contribute_forwarding (fib_node_index_t path_list_index,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_path_list_mk_lb(path_list, fct, dpo);
+}
+
+/*
+ * fib_path_list_get_adj
+ *
+ * Return the index of a adjacency for the first path that user of this
+ * path-list should use for forwarding
+ */
+adj_index_t
+fib_path_list_get_adj (fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+ return (fib_path_get_adj(path_list->fpl_paths[0]));
+}
+
+int
+fib_path_list_recursive_loop_detect (fib_node_index_t path_list_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_node_index_t *path_index;
+ int is_looped, list_looped;
+ fib_path_list_t *path_list;
+
+ list_looped = 0;
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ fib_node_index_t *copy, **copy_ptr;
+
+ /*
+ * we need a copy of the nodes visited so that when we add entries
+ * we explore on the nth path and a looped is detected, those entries
+ * are not again searched for n+1 path and so finding a loop that does
+ * not exist.
+ */
+ copy = vec_dup(*entry_indicies);
+ copy_ptr = &copy;
+
+ is_looped = fib_path_recursive_loop_detect(*path_index, copy_ptr);
+ list_looped += is_looped;
+ }
+
+ FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", eval);
+
+ if (list_looped)
+ {
+ path_list->fpl_flags |= FIB_PATH_LIST_FLAG_LOOPED;
+ }
+ else
+ {
+ path_list->fpl_flags &= ~FIB_PATH_LIST_FLAG_LOOPED;
+ }
+
+ return (list_looped);
+}
+
+u32
+fib_path_list_child_add (fib_node_index_t path_list_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ u32 sibling;
+
+ sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ child_type,
+ child_index);
+
+ if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index))
+ {
+ /*
+ * Set the popular flag on the path-list once we pass the magic
+ * threshold. then walk children to update.
+ * We don't undo this action. The rational being that the number
+ * of entries using this prefix is large enough such that it is a
+ * non-trival amount of effort to converge them. If we get into the
+ * situation where we are adding and removing entries such that we
+ * flip-flop over the threshold, then this non-trivial work is added
+ * to each of those routes adds/deletes - not a situation we want.
+ */
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+ path_list->fpl_flags |= FIB_PATH_LIST_FLAG_POPULAR;
+
+ fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx);
+ }
+
+ return (sibling);
+}
+
+void
+fib_path_list_child_remove (fib_node_index_t path_list_index,
+ u32 si)
+{
+ fib_node_child_remove(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ si);
+}
+
+void
+fib_path_list_lock(fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ if (FIB_NODE_INDEX_INVALID != path_list_index)
+ {
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_node_lock(&path_list->fpl_node);
+ FIB_PATH_LIST_DBG(path_list, "lock");
+ }
+}
+
+void
+fib_path_list_unlock (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ if (FIB_NODE_INDEX_INVALID != path_list_index)
+ {
+ path_list = fib_path_list_get(path_list_index);
+ FIB_PATH_LIST_DBG(path_list, "unlock");
+
+ fib_node_unlock(&path_list->fpl_node);
+ }
+}
+
+u32
+fib_path_list_pool_size (void)
+{
+ return (pool_elts(fib_path_list_pool));
+}
+
+u32
+fib_path_list_db_size (void)
+{
+ return (hash_elts(fib_path_list_db));
+}
+
+void
+fib_path_list_walk (fib_node_index_t path_list_index,
+ fib_path_list_walk_fn_t func,
+ void *ctx)
+{
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach(path_index, path_list->fpl_paths)
+ {
+ if (FIB_PATH_LIST_WALK_STOP == func(path_list_index,
+ *path_index,
+ ctx))
+ break;
+ }
+}
+
+
+void
+fib_path_list_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_PATH_LIST, &fib_path_list_vft);
+
+ fib_path_list_db = hash_create2 (/* elts */ 0,
+ /* user */ 0,
+ /* value_bytes */ sizeof (fib_node_index_t),
+ fib_path_list_db_hash_key_sum,
+ fib_path_list_db_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+}
+
+static clib_error_t *
+show_fib_path_list_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_path_list_t *path_list;
+ fib_node_index_t pli;
+
+ if (unformat (input, "%d", &pli))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_path_list_pool, pli))
+ {
+ path_list = fib_path_list_get(pli);
+ u8 *s = fib_path_list_format(pli, NULL);
+ s = format(s, "children:");
+ s = fib_node_children_format(path_list->fpl_node.fn_children, s);
+ vlib_cli_output (vm, "%s", s);
+ vec_free(s);
+ }
+ else
+ {
+ vlib_cli_output (vm, "path list %d invalid", pli);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB Path Lists");
+ pool_foreach(path_list, fib_path_list_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_fib_path_list, path_list);
+ }));
+ }
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_path_list, static) = {
+ .path = "show fib path-lists",
+ .function = show_fib_path_list_command,
+ .short_help = "show fib path-lists",
+};
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
new file mode 100644
index 00000000..a54b79e2
--- /dev/null
+++ b/src/vnet/fib/fib_path_list.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_PATH_LIST_H__
+#define __FIB_PATH_LIST_H__
+
+#include <vlib/vlib.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_path.h>
+
+/**
+ * Enumeration of path-list flags.
+ */
+typedef enum fib_path_list_attribute_t_ {
+ /**
+ * Marker. Add new flags after this one.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_FIRST = 0,
+ /**
+ * This path list is shareable. Shareable path-lists
+ * are inserted into the path-list data-base.
+ * All path-list are inherently shareable, the reason we share some and
+ * not others is to limit the size of the path-list database. This DB must
+ * be searched for each route update.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST,
+ /**
+ * explicit drop path-list. Used when the entry source needs to
+ * force a drop, despite the fact the path info is present.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_DROP,
+ /**
+ * explicit local path-list.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LOCAL,
+ /**
+ * exclusive path-list. Exclusive means the path will resolve via the
+ * exclusive (user provided) adj.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * resolved path-list
+ */
+ FIB_PATH_LIST_ATTRIBUTE_RESOLVED,
+ /**
+ * looped path-list. one path looped implies the whole list is
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+ /**
+ * a popular path-ist is one that is shared amongst many entries.
+ * Path list become popular as they gain more children, but they
+ * don't become unpopular as they lose them.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_POPULAR,
+ /**
+ * no uRPF - do not generate unicast RPF list for this path-list
+ */
+ FIB_PATH_LIST_ATTRIBUTE_NO_URPF,
+ /**
+ * Marher. Add new flags before this one, and then update it.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_NO_URPF,
+} fib_path_list_attribute_t;
+
+typedef enum fib_path_list_flags_t_ {
+ FIB_PATH_LIST_FLAG_NONE = 0,
+ FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED),
+ FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP),
+ FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL),
+ FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
+ FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED),
+ FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED),
+ FIB_PATH_LIST_FLAG_POPULAR = (1 << FIB_PATH_LIST_ATTRIBUTE_POPULAR),
+ FIB_PATH_LIST_FLAG_NO_URPF = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF),
+} fib_path_list_flags_t;
+
+#define FIB_PATH_LIST_ATTRIBUTES { \
+ [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \
+ [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \
+ [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \
+ [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \
+ [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \
+ [FIB_PATH_LIST_ATTRIBUTE_POPULAR] = "popular", \
+ [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \
+}
+
+#define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_LIST_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_LIST_ATTRIBUTE_LAST; \
+ _item++)
+
+extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags,
+ const fib_route_path_t *paths);
+extern fib_node_index_t fib_path_list_create_special(dpo_proto_t nh_proto,
+ fib_path_list_flags_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_node_index_t fib_path_list_copy_and_path_add(
+ fib_node_index_t pl_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *path);
+extern fib_node_index_t fib_path_list_copy_and_path_remove(
+ fib_node_index_t pl_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *path);
+extern fib_node_index_t fib_path_list_path_add (
+ fib_node_index_t path_list_index,
+ const fib_route_path_t *rpaths);
+extern fib_node_index_t fib_path_list_path_remove (
+ fib_node_index_t path_list_index,
+ const fib_route_path_t *rpaths);
+
+extern u32 fib_path_list_get_n_paths(fib_node_index_t pl_index);
+
+extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern void fib_path_list_contribute_urpf(fib_node_index_t path_index,
+ index_t urpf);
+extern index_t fib_path_list_get_urpf(fib_node_index_t path_list_index);
+extern index_t fib_path_list_get_adj(fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type);
+
+extern u32 fib_path_list_child_add(fib_node_index_t pl_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+extern void fib_path_list_child_remove(fib_node_index_t pl_index,
+ fib_node_index_t sibling_index);
+extern void fib_path_list_back_walk(fib_node_index_t pl_index,
+ fib_node_back_walk_ctx_t *ctx);
+extern void fib_path_list_lock(fib_node_index_t pl_index);
+extern void fib_path_list_unlock(fib_node_index_t pl_index);
+extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index,
+ fib_node_index_t **entry_indicies);
+extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index);
+extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
+extern int fib_path_list_is_popular(fib_node_index_t path_list_index);
+extern dpo_proto_t fib_path_list_get_proto(fib_node_index_t path_list_index);
+extern u8 * fib_path_list_format(fib_node_index_t pl_index,
+ u8 * s);
+extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index,
+ const fib_node_index_t *pis);
+extern u32 fib_path_list_find_rpath (fib_node_index_t path_list_index,
+ const fib_route_path_t *rpath);
+
+/**
+ * A callback function type for walking a path-list's paths
+ */
+typedef fib_path_list_walk_rc_t (*fib_path_list_walk_fn_t)(
+ fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *ctx);
+
+extern void fib_path_list_walk(fib_node_index_t pl_index,
+ fib_path_list_walk_fn_t func,
+ void *ctx);
+
+extern void fib_path_list_module_init(void);
+
+extern void fib_path_list_module_init(void);
+
+/*
+ * functions for testing.
+ */
+u32 fib_path_list_pool_size(void);
+u32 fib_path_list_db_size(void);
+
+#endif
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
new file mode 100644
index 00000000..ba1e2720
--- /dev/null
+++ b/src/vnet/fib/fib_table.c
@@ -0,0 +1,1295 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+fib_table_t *
+fib_table_get (fib_node_index_t index,
+ fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (pool_elt_at_index(ip4_main.fibs, index));
+ case FIB_PROTOCOL_IP6:
+ return (pool_elt_at_index(ip6_main.fibs, index));
+ case FIB_PROTOCOL_MPLS:
+ return (pool_elt_at_index(mpls_main.fibs, index));
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+static inline fib_node_index_t
+fib_table_lookup_i (fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_lookup(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_table_lookup (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_lookup_i(fib_table_get(fib_index, prefix->fp_proto), prefix));
+}
+
+static inline fib_node_index_t
+fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_lookup_exact_match(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_table_lookup_exact_match (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_lookup_exact_match_i(fib_table_get(fib_index,
+ prefix->fp_proto),
+ prefix));
+}
+
+static fib_node_index_t
+fib_table_get_less_specific_i (fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ fib_prefix_t pfx;
+
+ pfx = *prefix;
+
+ if (FIB_PROTOCOL_MPLS == pfx.fp_proto)
+ {
+ return (FIB_NODE_INDEX_INVALID);
+ }
+
+ /*
+ * in the absence of a tree structure for the table that allows for an O(1)
+ * parent get, a cheeky way to find the cover is to LPM for the prefix with
+ * mask-1.
+ * there should always be a cover, though it may be the default route. the
+ * default route's cover is the default route.
+ */
+ if (pfx.fp_len != 0) {
+ pfx.fp_len -= 1;
+ }
+
+ return (fib_table_lookup_i(fib_table, &pfx));
+}
+
+fib_node_index_t
+fib_table_get_less_specific (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_get_less_specific_i(fib_table_get(fib_index,
+ prefix->fp_proto),
+ prefix));
+}
+
+static void
+fib_table_entry_remove (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ fib_table->ft_total_route_counts--;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_entry_remove(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index),
+ prefix->fp_label,
+ prefix->fp_eos);
+ break;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+}
+
+static void
+fib_table_post_insert_actions (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ fib_node_index_t fib_entry_cover_index;
+
+ /*
+ * no cover relationships in the MPLS FIB
+ */
+ if (FIB_PROTOCOL_MPLS == prefix->fp_proto)
+ return;
+
+ /*
+ * find and inform the covering entry that a new more specific
+ * has been inserted beneath it
+ */
+ fib_entry_cover_index = fib_table_get_less_specific_i(fib_table, prefix);
+ /*
+ * the indicies are the same when the default route is first added
+ */
+ if (fib_entry_cover_index != fib_entry_index)
+ {
+ fib_entry_cover_change_notify(fib_entry_cover_index,
+ fib_entry_index);
+ }
+}
+
+static void
+fib_table_entry_insert (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ fib_entry_lock(fib_entry_index);
+ fib_table->ft_total_route_counts++;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ fib_entry_index);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_entry_insert(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ fib_entry_index);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index),
+ prefix->fp_label,
+ prefix->fp_eos,
+ fib_entry_index);
+ break;
+ }
+
+ fib_table_post_insert_actions(fib_table, prefix, fib_entry_index);
+}
+
+void
+fib_table_fwding_dpo_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo)
+{
+ vlib_smp_unsafe_warning();
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_fwding_dpo_update(ip4_fib_get(fib_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_fwding_dpo_update(fib_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_forwarding_table_update(mpls_fib_get(fib_index),
+ prefix->fp_label,
+ prefix->fp_eos,
+ dpo));
+ }
+}
+
+void
+fib_table_fwding_dpo_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo)
+{
+ vlib_smp_unsafe_warning();
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ dpo,
+ fib_table_get_less_specific(fib_index,
+ prefix)));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_fwding_dpo_remove(fib_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_forwarding_table_reset(mpls_fib_get(fib_index),
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+}
+
+
+fib_node_index_t
+fib_table_entry_special_dpo_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create_special(fib_index, prefix,
+ source, flags,
+ dpo);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_special_add(fib_entry_index, source, flags, dpo);
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_special_dpo_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create_special(fib_index, prefix,
+ source, flags,
+ dpo);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ if (was_sourced)
+ fib_entry_special_update(fib_entry_index, source, flags, dpo);
+ else
+ fib_entry_special_add(fib_entry_index, source, flags, dpo);
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_special_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags)
+{
+ fib_node_index_t fib_entry_index;
+ dpo_id_t tmp_dpo = DPO_INVALID;
+
+ dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto)));
+
+ fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, source,
+ flags, &tmp_dpo);
+
+ dpo_unlock(&tmp_dpo);
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_special_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist. i'll allow it.
+ */
+ }
+ else
+ {
+ fib_entry_src_flag_t src_flag;
+ int was_sourced;
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ src_flag = fib_entry_special_remove(fib_entry_index, source);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+ }
+}
+
+/**
+ * fib_table_route_path_fixup
+ *
+ * Convert attached hosts to attached next-hops.
+ *
+ * This special case is required because an attached path will link to a
+ * glean, and the FIB entry will have the interface or API/CLI source. When
+ * the ARP/ND process is completes then that source (which will provide a
+ * complete adjacency) will be lower priority and so the FIB entry will
+ * remain linked to a glean and traffic will never reach the hosts. For
+ * an ATTAHCED_HOST path we can link the path directly to the [incomplete]
+ * adjacency.
+ */
+static void
+fib_table_route_path_fixup (const fib_prefix_t *prefix,
+ fib_entry_flag_t eflags,
+ fib_route_path_t *path)
+{
+ /*
+ * not all zeros next hop &&
+ * is recursive path &&
+ * nexthop is same as the route's address
+ */
+ if ((!ip46_address_is_zero(&path->frp_addr)) &&
+ (~0 == path->frp_sw_if_index) &&
+ (0 == ip46_address_cmp(&path->frp_addr, &prefix->fp_addr)))
+ {
+ /* Prefix recurses via itse;f */
+ path->frp_flags |= FIB_ROUTE_PATH_DROP;
+ }
+ if (fib_prefix_is_host(prefix) &&
+ ip46_address_is_zero(&path->frp_addr) &&
+ path->frp_sw_if_index != ~0)
+ {
+ path->frp_addr = prefix->fp_addr;
+ path->frp_flags |= FIB_ROUTE_PATH_ATTACHED;
+ }
+ if (eflags & FIB_ENTRY_FLAG_DROP)
+ {
+ path->frp_flags |= FIB_ROUTE_PATH_DROP;
+ }
+ if (eflags & FIB_ENTRY_FLAG_LOCAL)
+ {
+ path->frp_flags |= FIB_ROUTE_PATH_LOCAL;
+ }
+ if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
+ {
+ path->frp_flags |= FIB_ROUTE_PATH_EXCLUSIVE;
+ }
+}
+
+fib_node_index_t
+fib_table_entry_path_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t *next_hop_labels,
+ fib_route_path_flags_t path_flags)
+{
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ .frp_label_stack = next_hop_labels,
+ };
+ fib_node_index_t fib_entry_index;
+ fib_route_path_t *paths = NULL;
+
+ vec_add1(paths, path);
+
+ fib_entry_index = fib_table_entry_path_add2(fib_index, prefix,
+ source, flags, paths);
+
+ vec_free(paths);
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_path_add2 (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_route_path_t *rpath)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+ u32 ii;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ for (ii = 0; ii < vec_len(rpath); ii++)
+ {
+ fib_table_route_path_fixup(prefix, flags, &rpath[ii]);
+ }
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create(fib_index, prefix,
+ source, flags,
+ rpath);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_path_add(fib_entry_index, source, flags, rpath);;
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_path_remove2 (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_route_path_t *rpath)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+ u32 ii;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist. i'll allow it.
+ */
+ }
+ else
+ {
+ fib_entry_src_flag_t src_flag;
+ int was_sourced;
+
+ /*
+ * if it's not sourced, then there's nowt to remove
+ */
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ if (!was_sourced)
+ {
+ return;
+ }
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+
+ for (ii = 0; ii < vec_len(rpath); ii++)
+ {
+ fib_table_route_path_fixup(
+ prefix,
+ fib_entry_get_flags_for_source(fib_entry_index,
+ source),
+ &rpath[ii]);
+ }
+
+ src_flag = fib_entry_path_remove(fib_entry_index, source, rpath);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+ }
+}
+
+void
+fib_table_entry_path_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ fib_route_path_flags_t path_flags)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ };
+ fib_route_path_t *paths = NULL;
+
+ vec_add1(paths, path);
+
+ fib_table_entry_path_remove2(fib_index, prefix, source, paths);
+
+ vec_free(paths);
+}
+
+static int
+fib_route_path_cmp_for_sort (void * v1,
+ void * v2)
+{
+ return (fib_route_path_cmp(v1, v2));
+}
+
+fib_node_index_t
+fib_table_entry_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_route_path_t *paths)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+ u32 ii;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ for (ii = 0; ii < vec_len(paths); ii++)
+ {
+ fib_table_route_path_fixup(prefix, flags, &paths[ii]);
+ }
+ /*
+ * sort the paths provided by the control plane. this means
+ * the paths and the extension on the entry will be sorted.
+ */
+ vec_sort_with_function(paths, fib_route_path_cmp_for_sort);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create(fib_index, prefix,
+ source, flags,
+ paths);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_update(fib_entry_index, source, flags, paths);
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_update_one_path (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t *next_hop_labels,
+ fib_route_path_flags_t path_flags)
+{
+ fib_node_index_t fib_entry_index;
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ .frp_label_stack = next_hop_labels,
+ };
+ fib_route_path_t *paths = NULL;
+
+ vec_add1(paths, path);
+
+ fib_entry_index =
+ fib_table_entry_update(fib_index, prefix, source, flags, paths);
+
+ vec_free(paths);
+
+ return (fib_entry_index);
+}
+
+static void
+fib_table_entry_delete_i (u32 fib_index,
+ fib_node_index_t fib_entry_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ fib_entry_src_flag_t src_flag;
+ fib_table_t *fib_table;
+ int was_sourced;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+
+ src_flag = fib_entry_delete(fib_entry_index, source);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+}
+
+void
+fib_table_entry_delete (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ fib_node_index_t fib_entry_index;
+
+ fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist.
+ * i'll allow it, but i won't like it.
+ */
+ clib_warning("%U not in FIB", format_fib_prefix, prefix);
+ }
+ else
+ {
+ fib_table_entry_delete_i(fib_index, fib_entry_index, prefix, source);
+ }
+}
+
+void
+fib_table_entry_delete_index (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_prefix_t prefix;
+
+ fib_entry_get_prefix(fib_entry_index, &prefix);
+
+ fib_table_entry_delete_i(fib_entry_get_fib_index(fib_entry_index),
+ fib_entry_index, &prefix, source);
+}
+
+fib_node_index_t
+fib_table_entry_local_label_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label)
+{
+ fib_node_index_t fib_entry_index;
+
+ fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index ||
+ !fib_entry_is_sourced(fib_entry_index, FIB_SOURCE_MPLS))
+ {
+ /*
+ * only source the prefix once. this allows the label change
+ * operation to work
+ */
+ fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix,
+ FIB_SOURCE_MPLS,
+ FIB_ENTRY_FLAG_NONE,
+ NULL);
+ }
+
+ fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &label);
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_local_label_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label)
+{
+ fib_node_index_t fib_entry_index;
+ const void *data;
+ mpls_label_t pl;
+
+ fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return;
+
+ data = fib_entry_get_source_data(fib_entry_index, FIB_SOURCE_MPLS);
+
+ if (NULL == data)
+ return;
+
+ pl = *(mpls_label_t*)data;
+
+ if (pl != label)
+ return;
+
+ pl = MPLS_LABEL_INVALID;
+
+ fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &pl);
+ fib_table_entry_special_remove(fib_index,
+ prefix,
+ FIB_SOURCE_MPLS);
+}
+
+u32
+fib_table_get_index_for_sw_if_index (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_get_index_for_sw_if_index(sw_if_index));
+ }
+ return (~0);
+}
+
+flow_hash_config_t
+fib_table_get_flow_hash_config (u32 fib_index,
+ fib_protocol_t proto)
+{
+ fib_table_t *fib;
+
+ fib = fib_table_get(fib_index, proto);
+
+ return (fib->ft_flow_hash_config);
+}
+flow_hash_config_t
+fib_table_get_default_flow_hash_config (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ case FIB_PROTOCOL_IP6:
+ return (IP_FLOW_HASH_DEFAULT);
+
+ case FIB_PROTOCOL_MPLS:
+ return (MPLS_FLOW_HASH_DEFAULT);
+ }
+
+ ASSERT(0);
+ return (IP_FLOW_HASH_DEFAULT);
+}
+
+/**
+ * @brief Table set flow hash config context.
+ */
+typedef struct fib_table_set_flow_hash_config_ctx_t_
+{
+ /**
+ * the flow hash config to set
+ */
+ flow_hash_config_t hash_config;
+} fib_table_set_flow_hash_config_ctx_t;
+
+static int
+fib_table_set_flow_hash_config_cb (fib_node_index_t fib_entry_index,
+ void *arg)
+{
+ fib_table_set_flow_hash_config_ctx_t *ctx = arg;
+
+ fib_entry_set_flow_hash_config(fib_entry_index, ctx->hash_config);
+
+ return (1);
+}
+
+void
+fib_table_set_flow_hash_config (u32 fib_index,
+ fib_protocol_t proto,
+ flow_hash_config_t hash_config)
+{
+ fib_table_set_flow_hash_config_ctx_t ctx = {
+ .hash_config = hash_config,
+ };
+ fib_table_t *fib;
+
+ fib = fib_table_get(fib_index, proto);
+ fib->ft_flow_hash_config = hash_config;
+
+ fib_table_walk(fib_index, proto,
+ fib_table_set_flow_hash_config_cb,
+ &ctx);
+}
+
+u32
+fib_table_get_table_id_for_sw_if_index (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_table_get_index_for_sw_if_index(
+ proto, sw_if_index),
+ proto);
+
+ return ((NULL != fib_table ? fib_table->ft_table_id : ~0));
+}
+
+u32
+fib_table_find (fib_protocol_t proto,
+ u32 table_id)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_index_from_table_id(table_id));
+ }
+ return (~0);
+}
+
+static u32
+fib_table_find_or_create_and_lock_i (fib_protocol_t proto,
+ u32 table_id,
+ fib_source_t src,
+ const u8 *name)
+{
+ fib_table_t *fib_table;
+ fib_node_index_t fi;
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fi = ip4_fib_table_find_or_create_and_lock(table_id, src);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fi = ip6_fib_table_find_or_create_and_lock(table_id, src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fi = mpls_fib_table_find_or_create_and_lock(table_id, src);
+ break;
+ default:
+ return (~0);
+ }
+
+ fib_table = fib_table_get(fi, proto);
+
+ if (NULL == fib_table->ft_desc)
+ {
+ if (name && name[0])
+ {
+ fib_table->ft_desc = format(NULL, "%s", name);
+ }
+ else
+ {
+ fib_table->ft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+ }
+ }
+
+ return (fi);
+}
+
+u32
+fib_table_find_or_create_and_lock (fib_protocol_t proto,
+ u32 table_id,
+ fib_source_t src)
+{
+ return (fib_table_find_or_create_and_lock_i(proto, table_id,
+ src, NULL));
+}
+
+u32
+fib_table_find_or_create_and_lock_w_name (fib_protocol_t proto,
+ u32 table_id,
+ fib_source_t src,
+ const u8 *name)
+{
+ return (fib_table_find_or_create_and_lock_i(proto, table_id,
+ src, name));
+}
+
+u32
+fib_table_create_and_lock (fib_protocol_t proto,
+ fib_source_t src,
+ const char *const fmt,
+ ...)
+{
+ fib_table_t *fib_table;
+ fib_node_index_t fi;
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fi = ip4_fib_table_create_and_lock(src);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fi = ip6_fib_table_create_and_lock(src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fi = mpls_fib_table_create_and_lock(src);
+ break;
+ default:
+ return (~0);
+ }
+
+ fib_table = fib_table_get(fi, proto);
+
+ fib_table->ft_desc = va_format(fib_table->ft_desc, fmt, &ap);
+
+ va_end(ap);
+ return (fi);
+}
+
+static void
+fib_table_destroy (fib_table_t *fib_table)
+{
+ vec_free(fib_table->ft_desc);
+
+ switch (fib_table->ft_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_destroy(fib_table->ft_index);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_destroy(fib_table->ft_index);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_destroy(fib_table->ft_index);
+ break;
+ }
+}
+
+void
+fib_table_walk (u32 fib_index,
+ fib_protocol_t proto,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_walk(ip4_fib_get(fib_index), fn, ctx);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_walk(fib_index, fn, ctx);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_walk(mpls_fib_get(fib_index), fn, ctx);
+ break;
+ }
+}
+
+void
+fib_table_unlock (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+ fib_table->ft_locks[source]--;
+ fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]--;
+
+ if (0 == fib_table->ft_locks[source])
+ {
+ /*
+ * The source no longer needs the table. flush any routes
+ * from it just in case
+ */
+ fib_table_flush(fib_index, proto, source);
+ }
+
+ if (0 == fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS])
+ {
+ /*
+ * no more locak from any source - kill it
+ */
+ fib_table_destroy(fib_table);
+ }
+}
+
+void
+fib_table_lock (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+ fib_table->ft_locks[source]++;
+ fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]++;
+}
+
+u32
+fib_table_get_num_entries (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+
+ return (fib_table->ft_src_route_counts[source]);
+}
+
+u8*
+format_fib_table_name (u8* s, va_list ap)
+{
+ fib_node_index_t fib_index = va_arg(ap, fib_node_index_t);
+ fib_protocol_t proto = va_arg(ap, int); // int promotion
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+
+ s = format(s, "%v", fib_table->ft_desc);
+
+ return (s);
+}
+
+/**
+ * @brief Table flush context. Store the indicies of matching FIB entries
+ * that need to be removed.
+ */
+typedef struct fib_table_flush_ctx_t_
+{
+ /**
+ * The list of entries to flush
+ */
+ fib_node_index_t *ftf_entries;
+
+ /**
+ * The source we are flushing
+ */
+ fib_source_t ftf_source;
+} fib_table_flush_ctx_t;
+
+static int
+fib_table_flush_cb (fib_node_index_t fib_entry_index,
+ void *arg)
+{
+ fib_table_flush_ctx_t *ctx = arg;
+
+ if (fib_entry_is_sourced(fib_entry_index, ctx->ftf_source))
+ {
+ vec_add1(ctx->ftf_entries, fib_entry_index);
+ }
+ return (1);
+}
+
+
+void
+fib_table_flush (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ fib_node_index_t *fib_entry_index;
+ fib_table_flush_ctx_t ctx = {
+ .ftf_entries = NULL,
+ .ftf_source = source,
+ };
+
+ fib_table_walk(fib_index, proto,
+ fib_table_flush_cb,
+ &ctx);
+
+ vec_foreach(fib_entry_index, ctx.ftf_entries)
+ {
+ fib_table_entry_delete_index(*fib_entry_index, source);
+ }
+
+ vec_free(ctx.ftf_entries);
+}
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
new file mode 100644
index 00000000..923d7aff
--- /dev/null
+++ b/src/vnet/fib/fib_table.h
@@ -0,0 +1,811 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TABLE_H__
+#define __FIB_TABLE_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/packet.h>
+
+/**
+ * Keep a lock per-source and a total
+ */
+#define FIB_TABLE_N_LOCKS (FIB_SOURCE_MAX+1)
+#define FIB_TABLE_TOTAL_LOCKS FIB_SOURCE_MAX
+
+/**
+ * @brief
+ * A protocol Independent FIB table
+ */
+typedef struct fib_table_t_
+{
+ /**
+ * Which protocol this table serves. Used to switch on the union above.
+ */
+ fib_protocol_t ft_proto;
+
+ /**
+ * per-source number of locks on the table
+ */
+ u16 ft_locks[FIB_TABLE_N_LOCKS];
+
+ /**
+ * Table ID (hash key) for this FIB.
+ */
+ u32 ft_table_id;
+
+ /**
+ * Index into FIB vector.
+ */
+ fib_node_index_t ft_index;
+
+ /**
+ * flow hash configuration
+ */
+ u32 ft_flow_hash_config;
+
+ /**
+ * Per-source route counters
+ */
+ u32 ft_src_route_counts[FIB_SOURCE_MAX];
+
+ /**
+ * Total route counters
+ */
+ u32 ft_total_route_counts;
+
+ /**
+ * Table description
+ */
+ u8* ft_desc;
+} fib_table_t;
+
+/**
+ * @brief
+ * Format the description/name of the table
+ */
+extern u8* format_fib_table_name(u8* s, va_list ap);
+
+/**
+ * @brief
+ * Perfom a longest prefix match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the best match, which may be the default route
+ */
+extern fib_node_index_t fib_table_lookup(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Perfom an exact match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the exact match, or INVALID
+ * is there is no match.
+ */
+extern fib_node_index_t fib_table_lookup_exact_match(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Get the less specific (covering) prefix
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the less specific fib_entry_t.
+ */
+extern fib_node_index_t fib_table_get_less_specific(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Add a 'special' entry to the FIB.
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the will link to a DPO valid for the source and/or the flags.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ * If the source needs to provide non-default forwarding use:
+ * fib_table_entry_special_dpo_add()
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or exists already).
+ */
+extern fib_node_index_t fib_table_entry_special_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags);
+
+/**
+ * @brief
+ * Add a 'special' entry to the FIB that links to the DPO passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the DPO to link to.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param dpo
+ * The DPO to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t stype,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Update a 'special' entry to the FIB that links to the DPO passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the DPO to link to.
+ * Special entries are add/remove reference counted per-source. So n
+ * 'removes' are required for n 'adds', if the entry is no longer required.
+ * An 'update' is an 'add' if no 'add' has already been called, otherwise an 'add'
+ * is therefore assumed to act on the reference instance of that add.
+ *
+ * @param fib_entry_index
+ * The index of the FIB entry to update
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param dpo
+ * The DPO to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_special_dpo_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t stype,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Remove a 'special' entry from the FIB.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to remove
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ */
+extern void fib_table_entry_special_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Add one path to an entry (aka route) in the FIB. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param next_hop_label_stack
+ * The path's out-going label stack. NULL is there is none.
+ *
+ * @param pf
+ * Flags for the path
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_path_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t *next_hop_label_stack,
+ fib_route_path_flags_t pf);
+/**
+ * @brief
+ * Add n paths to an entry (aka route) in the FIB. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param rpaths
+ * A vector of paths. Not const since they may be modified.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_route_path_t *rpath);
+
+/**
+ * @brief
+ * remove one path to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param pf
+ * Flags for the path
+ */
+extern void fib_table_entry_path_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ fib_route_path_flags_t pf);
+
+/**
+ * @brief
+ * Remove n paths to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ */
+extern void fib_table_entry_path_remove2(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_route_path_t *paths);
+
+/**
+ * @brief
+ * Update an entry to have a new set of paths. If the entry does not
+ * exist, it will be created.
+ * The difference between an 'path-add' and an update, is that path-add is
+ * an incremental addition of paths, whereas an update is a wholesale swap.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ * A vector of paths. Not const since they may be modified.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_update(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_route_path_t *paths);
+
+/**
+ * @brief
+ * Update the entry to have just one path. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param next_hop_label_stack
+ * The path's out-going label stack. NULL is there is none.
+ *
+ * @param pf
+ * Flags for the path
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t *next_hop_label_stack,
+ fib_route_path_flags_t pf);
+
+/**
+ * @brief
+ * Add a MPLS local label for the prefix/route. If the entry does not
+ * exist, it will be created. In theory more than one local label can be
+ * added, but this is not yet supported.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to which to add the label
+ *
+ * @param label
+ * The MPLS label to add
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_local_label_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label);
+/**
+ * @brief
+ * remove a MPLS local label for the prefix/route.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to which to add the label
+ *
+ * @param label
+ * The MPLS label to add
+ */
+extern void fib_table_entry_local_label_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label);
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to remove
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void fib_table_entry_delete(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param entry_index
+ * The index of the FIB entry
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void fib_table_entry_delete_index(fib_node_index_t entry_index,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Flush all entries from a table for the source
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the entries in the table
+ *
+ * @param source
+ * the source to flush
+ */
+extern void fib_table_flush(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Get the index of the FIB bound to the interface
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ * The interface index
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Get the Table-ID of the FIB bound to the interface
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ * The interface index
+ *
+ * @return fib_index
+ * The tableID of the FIB
+ */
+extern u32 fib_table_get_table_id_for_sw_if_index(fib_protocol_t proto,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES NOT create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB, which may be INVALID.
+ */
+extern u32 fib_table_find(fib_protocol_t proto, u32 table_id);
+
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto,
+ u32 table_id,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB
+ *
+ * @param source
+ * The ID of the client/source.
+ *
+ * @param name
+ * The client is choosing the name they want the table to have
+ */
+extern u32 fib_table_find_or_create_and_lock_w_name(fib_protocol_t proto,
+ u32 table_id,
+ fib_source_t source,
+ const u8 *name);
+
+/**
+ * @brief
+ * Create a new table with no table ID. This means it does not get
+ * added to the hash-table and so can only be found by using the index returned.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param fmt
+ * A string to describe the table
+ *
+ * @param source
+ * The ID of the client/source.
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 fib_table_create_and_lock(fib_protocol_t proto,
+ fib_source_t source,
+ const char *const fmt,
+ ...);
+
+/**
+ * @brief
+ * Get the flow hash configured used by the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return The flow hash config
+ */
+extern flow_hash_config_t fib_table_get_flow_hash_config(u32 fib_index,
+ fib_protocol_t proto);
+
+/**
+ * @brief
+ * Get the flow hash configured used by the protocol
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return The flow hash config
+ */
+extern flow_hash_config_t fib_table_get_default_flow_hash_config(fib_protocol_t proto);
+
+/**
+ * @brief
+ * Set the flow hash configured used by the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param hash_config
+ * The flow-hash config to set
+ *
+ * @return none
+ */
+extern void fib_table_set_flow_hash_config(u32 fib_index,
+ fib_protocol_t proto,
+ flow_hash_config_t hash_config);
+
+/**
+ * @brief
+ * Take a reference counting lock on the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern void fib_table_unlock(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Release a reference counting lock on the table. When the last lock
+ * has gone. the FIB is deleted.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern void fib_table_lock(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Return the number of entries in the FIB added by a given source.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return number of sourced entries.
+ */
+extern u32 fib_table_get_num_entries(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Get a pointer to a FIB table
+ */
+extern fib_table_t *fib_table_get(fib_node_index_t index,
+ fib_protocol_t proto);
+
+/**
+ * @brief Call back function when walking entries in a FIB table
+ */
+typedef int (*fib_table_walk_fn_t)(fib_node_index_t fei,
+ void *ctx);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void fib_table_walk(u32 fib_index,
+ fib_protocol_t proto,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
new file mode 100644
index 00000000..540289ce
--- /dev/null
+++ b/src/vnet/fib/fib_test.c
@@ -0,0 +1,8768 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/bfd/bfd_main.h>
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vnet/mpls/mpls.h>
+
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/fib/fib_urpf_list.h>
+
+/*
+ * Add debugs for passing tests
+ */
+static int fib_test_do_debug;
+
+#define FIB_TEST_I(_cond, _comment, _args...) \
+({ \
+ int _evald = (_cond); \
+ if (!(_evald)) { \
+ fformat(stderr, "FAIL:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } else { \
+ if (fib_test_do_debug) \
+ fformat(stderr, "PASS:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } \
+ _evald; \
+})
+#define FIB_TEST(_cond, _comment, _args...) \
+{ \
+ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
+ return 1; \
+ ASSERT(!("FAIL: " _comment)); \
+ } \
+}
+
+/**
+ * A 'i'm not fussed is this is not efficient' store of test data
+ */
+typedef struct test_main_t_ {
+ /**
+ * HW if indicies
+ */
+ u32 hw_if_indicies[4];
+ /**
+ * HW interfaces
+ */
+ vnet_hw_interface_t * hw[4];
+
+} test_main_t;
+static test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 * format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+test_interface_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+ return 0;
+}
+
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = test_interface_admin_up_down,
+};
+
+static u8 *hw_address;
+
+static int
+fib_test_mk_intf (u32 ninterfaces)
+{
+ clib_error_t * error = NULL;
+ test_main_t *tm = &test_main;
+ u8 byte;
+ u32 i;
+
+ ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies));
+
+ for (i=0; i<6; i++)
+ {
+ byte = 0xd0+i;
+ vec_add1(hw_address, byte);
+ }
+
+ for (i = 0; i < ninterfaces; i++)
+ {
+ hw_address[5] = i;
+
+ error = ethernet_register_interface(vnet_get_main(),
+ test_interface_device_class.index,
+ i /* instance */,
+ hw_address,
+ &tm->hw_if_indicies[i],
+ /* flag change */ 0);
+
+ FIB_TEST((NULL == error), "ADD interface %d", i);
+
+ error = vnet_hw_interface_set_flags(vnet_get_main(),
+ tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ vec_validate (ip4_main.fib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ vec_validate (ip6_main.fib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "UP interface %d", i);
+ }
+ /*
+ * re-eval after the inevitable realloc
+ */
+ for (i = 0; i < ninterfaces; i++)
+ {
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ }
+
+ return (0);
+}
+
+#define FIB_TEST_REC_FORW(_rec_prefix, _via_prefix, _bucket) \
+{ \
+ const dpo_id_t *_rec_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup_exact_match(fib_index, (_rec_prefix))); \
+ const dpo_id_t *_via_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup(fib_index, (_via_prefix))); \
+ FIB_TEST(!dpo_cmp(_via_dpo, \
+ load_balance_get_bucket(_rec_dpo->dpoi_index, \
+ _bucket)), \
+ "%U is recursive via %U", \
+ format_fib_prefix, (_rec_prefix), \
+ format_fib_prefix, _via_prefix); \
+}
+
+#define FIB_TEST_LB_BUCKET_VIA_ADJ(_prefix, _bucket, _ai) \
+{ \
+ const dpo_id_t *_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup_exact_match(fib_index, (_prefix))); \
+ const dpo_id_t *_dpo1 = \
+ load_balance_get_bucket(_dpo->dpoi_index, _bucket); \
+ FIB_TEST(DPO_ADJACENCY == _dpo1->dpoi_type, "type is %U", \
+ format_dpo_type, _dpo1->dpoi_type); \
+ FIB_TEST((_ai == _dpo1->dpoi_index), \
+ "%U bucket %d resolves via %U", \
+ format_fib_prefix, (_prefix), \
+ _bucket, \
+ format_dpo_id, _dpo1, 0); \
+}
+
+#define FIB_TEST_RPF(_cond, _comment, _args...) \
+{ \
+ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
+ return (0); \
+ } \
+}
+
+static int
+fib_test_urpf_is_equal (fib_node_index_t fei,
+ fib_forward_chain_type_t fct,
+ u32 num, ...)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ fib_urpf_list_t *urpf;
+ index_t ui;
+ va_list ap;
+ int ii;
+
+ va_start(ap, num);
+
+ fib_entry_contribute_forwarding(fei, fct, &dpo);
+ ui = load_balance_get_urpf(dpo.dpoi_index);
+
+ urpf = fib_urpf_list_get(ui);
+
+ FIB_TEST_RPF(num == vec_len(urpf->furpf_itfs),
+ "RPF:%U len %d == %d",
+ format_fib_urpf_list, ui,
+ num, vec_len(urpf->furpf_itfs));
+ FIB_TEST_RPF(num == fib_urpf_check_size(ui),
+ "RPF:%U check-size %d == %d",
+ format_fib_urpf_list, ui,
+ num, vec_len(urpf->furpf_itfs));
+
+ for (ii = 0; ii < num; ii++)
+ {
+ adj_index_t ai = va_arg(ap, adj_index_t);
+
+ FIB_TEST_RPF(ai == urpf->furpf_itfs[ii],
+ "RPF:%d item:%d - %d == %d",
+ ui, ii, ai, urpf->furpf_itfs[ii]);
+ FIB_TEST_RPF(fib_urpf_check(ui, ai),
+ "RPF:%d %d found",
+ ui, ai);
+ }
+
+ dpo_reset(&dpo);
+
+ va_end(ap);
+
+ return (1);
+}
+
+static u8*
+fib_test_build_rewrite (u8 *eth_addr)
+{
+ u8* rewrite = NULL;
+
+ vec_validate(rewrite, 13);
+
+ memcpy(rewrite, eth_addr, 6);
+ memcpy(rewrite+6, eth_addr, 6);
+
+ return (rewrite);
+}
+
+typedef enum fib_test_lb_bucket_type_t_ {
+ FT_LB_LABEL_O_ADJ,
+ FT_LB_LABEL_STACK_O_ADJ,
+ FT_LB_LABEL_O_LB,
+ FT_LB_O_LB,
+ FT_LB_SPECIAL,
+ FT_LB_ADJ,
+ FT_LB_INTF,
+} fib_test_lb_bucket_type_t;
+
+typedef struct fib_test_lb_bucket_t_ {
+ fib_test_lb_bucket_type_t type;
+
+ union
+ {
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ adj_index_t adj;
+ } label_o_adj;
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label_stack[8];
+ u8 label_stack_size;
+ u8 ttl;
+ adj_index_t adj;
+ } label_stack_o_adj;
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ index_t lb;
+ } label_o_lb;
+ struct
+ {
+ index_t adj;
+ } adj;
+ struct
+ {
+ index_t lb;
+ } lb;
+ struct
+ {
+ index_t adj;
+ } special;
+ };
+} fib_test_lb_bucket_t;
+
+typedef enum fib_test_rep_bucket_type_t_ {
+ FT_REP_LABEL_O_ADJ,
+ FT_REP_DISP_MFIB_LOOKUP,
+ FT_REP_INTF,
+} fib_test_rep_bucket_type_t;
+
+typedef struct fib_test_rep_bucket_t_ {
+ fib_test_rep_bucket_type_t type;
+
+ union
+ {
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ adj_index_t adj;
+ } label_o_adj;
+ struct
+ {
+ adj_index_t adj;
+ } adj;
+ };
+} fib_test_rep_bucket_t;
+
+#define FIB_TEST_LB(_cond, _comment, _args...) \
+{ \
+ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
+ return (0); \
+ } \
+}
+
+int
+fib_test_validate_rep_v (const replicate_t *rep,
+ u16 n_buckets,
+ va_list ap)
+{
+ const fib_test_rep_bucket_t *exp;
+ const dpo_id_t *dpo;
+ int bucket;
+
+ FIB_TEST_LB((n_buckets == rep->rep_n_buckets),
+ "n_buckets = %d", rep->rep_n_buckets);
+
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ exp = va_arg(ap, fib_test_rep_bucket_t*);
+
+ dpo = replicate_get_bucket_i(rep, bucket);
+
+ switch (exp->type)
+ {
+ case FT_REP_LABEL_O_ADJ:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+ hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_o_adj.label),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_o_adj.label);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_adj.eos),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_o_adj.label,
+ format_mpls_eos_bit, exp->label_o_adj.eos);
+
+ FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on adj %d",
+ bucket,
+ exp->label_o_adj.adj);
+ }
+ break;
+ case FT_REP_INTF:
+ FIB_TEST_LB((DPO_INTERFACE_RX == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+ "bucket %d stacks on adj %d",
+ bucket,
+ exp->adj.adj);
+ break;
+ case FT_REP_DISP_MFIB_LOOKUP:
+// ASSERT(0);
+ break;
+ }
+ }
+
+ return (!0);
+}
+
+int
+fib_test_validate_lb_v (const load_balance_t *lb,
+ u16 n_buckets,
+ va_list ap)
+{
+ const dpo_id_t *dpo;
+ int bucket;
+
+ FIB_TEST_LB((n_buckets == lb->lb_n_buckets), "n_buckets = %d", lb->lb_n_buckets);
+
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ const fib_test_lb_bucket_t *exp;
+
+ exp = va_arg(ap, fib_test_lb_bucket_t*);
+ dpo = load_balance_get_bucket_i(lb, bucket);
+
+ switch (exp->type)
+ {
+ case FT_LB_LABEL_STACK_O_ADJ:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+ u32 ii;
+
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST_LB(exp->label_stack_o_adj.label_stack_size == mld->mld_n_labels,
+ "label stack size",
+ mld->mld_n_labels);
+
+ for (ii = 0; ii < mld->mld_n_labels; ii++)
+ {
+ hdr = clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_stack_o_adj.label_stack[ii]),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_stack_o_adj.label_stack[ii]);
+
+ if (ii == mld->mld_n_labels-1)
+ {
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_adj.eos),
+ "bucket %d stacks on label %d %U!=%U",
+ bucket,
+ exp->label_stack_o_adj.label_stack[ii],
+ format_mpls_eos_bit, exp->label_o_adj.eos,
+ format_mpls_eos_bit, vnet_mpls_uc_get_s(hdr));
+ }
+ else
+ {
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == MPLS_NON_EOS),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_stack_o_adj.label_stack[ii],
+ format_mpls_eos_bit, vnet_mpls_uc_get_s(hdr));
+ }
+ }
+
+ FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_stack_o_adj.adj == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on adj %d",
+ bucket,
+ exp->label_stack_o_adj.adj);
+ }
+ break;
+ case FT_LB_LABEL_O_ADJ:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+ hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_o_adj.label),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_o_adj.label);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_adj.eos),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_o_adj.label,
+ format_mpls_eos_bit, exp->label_o_adj.eos);
+
+ FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on adj %d",
+ bucket,
+ exp->label_o_adj.adj);
+ }
+ break;
+ case FT_LB_LABEL_O_LB:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+ hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl);
+
+ FIB_TEST_LB(1 == mld->mld_n_labels, "label stack size",
+ mld->mld_n_labels);
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_o_lb.label),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_o_lb.label);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_lb.eos),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_o_lb.label,
+ format_mpls_eos_bit, exp->label_o_lb.eos);
+
+ FIB_TEST_LB((DPO_LOAD_BALANCE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_o_lb.lb == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on LB %d",
+ bucket,
+ exp->label_o_lb.lb);
+ }
+ break;
+ case FT_LB_ADJ:
+ FIB_TEST_I(((DPO_ADJACENCY == dpo->dpoi_type) ||
+ (DPO_ADJACENCY_INCOMPLETE == dpo->dpoi_type)),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+ "bucket %d stacks on adj %d",
+ bucket,
+ exp->adj.adj);
+ break;
+ case FT_LB_INTF:
+ FIB_TEST_I((DPO_INTERFACE_RX == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+ "bucket %d stacks on adj %d",
+ bucket,
+ exp->adj.adj);
+ break;
+ case FT_LB_O_LB:
+ FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->lb.lb == dpo->dpoi_index),
+ "bucket %d stacks on lb %d not %d",
+ bucket,
+ dpo->dpoi_index,
+ exp->lb.lb);
+ break;
+ case FT_LB_SPECIAL:
+ FIB_TEST_I((DPO_DROP == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->special.adj == dpo->dpoi_index),
+ "bucket %d stacks on drop %d",
+ bucket,
+ exp->special.adj);
+ break;
+ }
+ }
+ return (!0);
+}
+
+int
+fib_test_validate_entry (fib_node_index_t fei,
+ fib_forward_chain_type_t fct,
+ u16 n_buckets,
+ ...)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ fib_prefix_t pfx;
+ index_t fw_lbi;
+ u32 fib_index;
+ va_list ap;
+ int res;
+
+ va_start(ap, n_buckets);
+
+ fib_entry_get_prefix(fei, &pfx);
+ fib_index = fib_entry_get_fib_index(fei);
+ fib_entry_contribute_forwarding(fei, fct, &dpo);
+
+ if (DPO_REPLICATE == dpo.dpoi_type)
+ {
+ const replicate_t *rep;
+
+ rep = replicate_get(dpo.dpoi_index);
+ res = fib_test_validate_rep_v(rep, n_buckets, ap);
+ }
+ else
+ {
+ const load_balance_t *lb;
+
+ FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type),
+ "Entry links to %U",
+ format_dpo_type, dpo.dpoi_type);
+
+ lb = load_balance_get(dpo.dpoi_index);
+ res = fib_test_validate_lb_v(lb, n_buckets, ap);
+
+ /*
+ * ensure that the LB contributed by the entry is the
+ * same as the LB in the forwarding tables
+ */
+ if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei)))
+ {
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ {
+ mpls_unicast_header_t hdr = {
+ .label_exp_s_ttl = 0,
+ };
+
+ vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label);
+ vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos);
+ hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl);
+
+ fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr);
+ break;
+ }
+ default:
+ fw_lbi = 0;
+ }
+ FIB_TEST_LB((fw_lbi == dpo.dpoi_index),
+ "Contributed LB = FW LB: %U\n %U",
+ format_load_balance, fw_lbi, 0,
+ format_load_balance, dpo.dpoi_index, 0);
+ }
+ }
+
+ dpo_reset(&dpo);
+
+ va_end(ap);
+
+ return (res);
+}
+
+static int
+fib_test_v4 (void)
+{
+ /*
+ * In the default table check for the presence and correct forwarding
+ * of the special entries
+ */
+ fib_node_index_t dfrt, fei, ai, ai2, locked_ai, ai_01, ai_02, ai_03;
+ const dpo_id_t *dpo, *dpo1, *dpo2, *dpo_drop;
+ const ip_adjacency_t *adj;
+ const load_balance_t *lb;
+ test_main_t *tm;
+ u32 fib_index;
+ int lb_count;
+ int ii;
+
+ /* via 10.10.10.1 */
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ };
+ /* via 10.10.10.2 */
+ ip46_address_t nh_10_10_10_2 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ };
+
+ FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
+ pool_elts(load_balance_map_pool));
+
+ tm = &test_main;
+
+ /* record the nubmer of load-balances in use before we start */
+ lb_count = pool_elts(load_balance_pool);
+
+ /* Find or create FIB table 11 */
+ fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11,
+ FIB_SOURCE_API);
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
+ }
+
+ fib_prefix_t pfx_0_0_0_0_s_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ fib_prefix_t pfx = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
+
+ dfrt = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ pfx.fp_len = 32;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all zeros route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all 0s route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xffffffff);
+ pfx.fp_len = 32;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all ones route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all 1s route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xe0000000);
+ pfx.fp_len = 8;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all-mcast route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all-mcast route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xf0000000);
+ pfx.fp_len = 8;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "class-e route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "class-e route is DROP");
+
+ /*
+ * at this stage there are 5 entries in the test FIB (plus 5 in the default),
+ * all of which are special sourced and so none of which share path-lists.
+ * There are also 2 entries, and 2 non-shared path-lists, in the v6 default
+ * table, and 4 path-lists in the v6 MFIB table
+ */
+#define ENBR (5+5+2)
+#define PNBR (5+5+6)
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add interface routes.
+ * validate presence of /24 attached and /32 recieve.
+ * test for the presence of the receive address in the glean and local adj
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+ FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on attached interface");
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai),
+ "attached interface route adj present %d", ai);
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &adj->sub_type.glean.receive_addr)),
+ "attached interface adj is receive ok");
+
+ local_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+ FIB_TEST(((FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on local interface");
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0),
+ "RPF list for local length 0");
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ receive_dpo_t *rd = receive_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ FIB_TEST((2 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_INTERFACE)),
+ "2 Interface Source'd prefixes");
+
+ /*
+ * +2 interface routes +2 non-shared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Modify the default route to be via an adj not yet known.
+ * this sources the defalut route with the API source, which is
+ * a higher preference to the DEFAULT_ROUTE source
+ */
+ pfx.fp_addr.ip4.as_u32 = 0;
+ pfx.fp_len = 0;
+ fib_table_entry_path_add(fib_index, &pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)),
+ "Flags set on API route");
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&nh_10_10_10_1, &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((1 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_API)),
+ "1 API Source'd prefixes");
+
+ /*
+ * find the adj in the shared db
+ */
+ locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((locked_ai == ai), "ADJ NBR DB find");
+ adj_unlock(locked_ai);
+
+ /*
+ * +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+3 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove the API source from the default route. We expected
+ * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP
+ */
+ pfx.fp_addr.ip4.as_u32 = 0;
+ pfx.fp_len = 0;
+ fib_table_entry_path_remove(fib_index, &pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // non-recursive path, so no FIB index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "Default route is DROP");
+
+ /*
+ * -1 shared-path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_10_10_10_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.1 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ fib_prefix_t pfx_10_10_10_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.2 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ },
+ };
+ fib_prefix_t pfx_11_11_11_11_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 11.11.11.11 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0b0b0b0b),
+ },
+ };
+ u8 eth_addr[] = {
+ 0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
+ };
+
+ ip46_address_t nh_12_12_12_12 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0c0c0c0c),
+ };
+ adj_index_t ai_12_12_12_12;
+
+ /*
+ * Add a route via an incomplete ADJ. then complete the ADJ
+ * Expect the route LB is updated to use complete adj type.
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_11_11_11_11_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY_INCOMPLETE == dpo1->dpoi_type,
+ "11.11.11.11/32 via incomplete adj");
+
+ ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created");
+ adj = adj_get(ai_01);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type,
+ "11.11.11.11/32 via complete adj");
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1,
+ tm->hw[0]->sw_if_index),
+ "RPF list for adj-fib contains adj");
+
+ ai_12_12_12_12 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_12_12_12_12), "adj created");
+ adj = adj_get(ai_12_12_12_12);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&nh_12_12_12_12,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ adj_nbr_update_rewrite(ai_12_12_12_12, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+
+ /*
+ * add the adj fib
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)),
+ "Flags set on adj-fib");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj, %d", ai);
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_11_11_11_11_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ eth_addr[5] = 0xb2;
+
+ ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created");
+ adj = adj_get(ai_02);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((ai_01 != ai_02), "ADJs are different");
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_10_10_10_2_s_32);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+
+ /*
+ * +2 adj-fibs, and their non-shared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add 2 routes via the first ADJ. ensure path-list sharing
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.1/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.1 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry and a shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /* 1.1.2.0/24 */
+ fib_prefix_t pfx_1_1_2_0_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010200),
+ }
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry only
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * modify 1.1.2.0/24 to use multipath.
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1, tm->hw[0]->sw_if_index),
+ "RPF list for 1.1.2.0/24 contains both adjs");
+
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type);
+ FIB_TEST((ai_01 == dpo1->dpoi_index),
+ "1.1.2.0/24 bucket 0 resolves via 10.10.10.1 (%d=%d)",
+ ai_01, dpo1->dpoi_index);
+
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 1);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type);
+ FIB_TEST((ai_02 == dpo1->dpoi_index),
+ "1.1.2.0/24 bucket 1 resolves via 10.10.10.2");
+
+ /*
+ * +1 shared-pathlist
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * revert the modify
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1, tm->hw[0]->sw_if_index),
+ "RPF list for 1.1.2.0/24 contains one adj");
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 shared-pathlist
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is %d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add 2 recursive routes:
+ * 100.100.100.100/32 via 1.1.1.1/32 => the via entry is installed.
+ * 100.100.100.101/32 via 1.1.1.1/32 => the via entry is installed.
+ */
+ fib_prefix_t bgp_100_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.100/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646464),
+ },
+ };
+ /* via 1.1.1.1 */
+ ip46_address_t nh_1_1_1_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_100_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_1_1_1_1,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_100_pfx, &pfx_1_1_1_1_s_32, 0);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1,
+ tm->hw[0]->sw_if_index),
+ "RPF list for adj-fib contains adj");
+
+ /*
+ * +1 entry and +1 shared-path-list
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fib_prefix_t bgp_101_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.101/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646465),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_101_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_1_1_1_1,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_101_pfx, &pfx_1_1_1_1_s_32, 0);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1,
+ tm->hw[0]->sw_if_index),
+ "RPF list for adj-fib contains adj");
+
+ /*
+ * +1 entry, but the recursive path-list is shared.
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * An special route; one where the user (me) provides the
+ * adjacency through which the route will resovle by setting the flags
+ */
+ fib_prefix_t ex_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.4.4.4/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04040404),
+ },
+ };
+
+ fib_table_entry_special_add(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL);
+ fei = fib_table_lookup_exact_match(fib_index, &ex_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+
+ fib_table_entry_special_remove(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &ex_pfx),
+ "Exclusive reoute removed");
+
+ /*
+ * An EXCLUSIVE route; one where the user (me) provides the exclusive
+ * adjacency through which the route will resovle
+ */
+ dpo_id_t ex_dpo = DPO_INVALID;
+
+ lookup_dpo_add_or_lock_w_fib_index(fib_index,
+ DPO_PROTO_IP4,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &ex_dpo);
+
+ fib_table_entry_special_dpo_add(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &ex_dpo);
+ fei = fib_table_lookup_exact_match(fib_index, &ex_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "exclusive remote uses lookup DPO");
+
+ /*
+ * update the exclusive to use a different DPO
+ */
+ ip_null_dpo_add_and_lock(DPO_PROTO_IP4,
+ IP_NULL_ACTION_SEND_ICMP_UNREACH,
+ &ex_dpo);
+ fib_table_entry_special_dpo_update(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &ex_dpo);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "exclusive remote uses now uses NULL DPO");
+
+ fib_table_entry_special_remove(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &ex_pfx),
+ "Exclusive reoute removed");
+ dpo_reset(&ex_dpo);
+
+ /*
+ * Add a recursive route:
+ * 200.200.200.200/32 via 1.1.1.2/32 => the via entry is NOT installed.
+ */
+ fib_prefix_t bgp_200_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 200.200.200.200/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c8),
+ },
+ };
+ /* via 1.1.1.2 */
+ fib_prefix_t pfx_1_1_1_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010102),
+ },
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "Recursive via unresolved is drop");
+
+ /*
+ * the adj should be recursive via drop, since the route resolves via
+ * the default route, which is itself a DROP
+ */
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo1), "1.1.1.2/32 is drop");
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0),
+ "RPF list for 1.1.1.2/32 contains 0 adjs");
+
+ /*
+ * +2 entry and +1 shared-path-list
+ */
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Unequal Cost load-balance. 3:1 ratio. fits in a 4 bucket LB
+ * The paths are sort by NH first. in this case the the path with greater
+ * weight is first in the set. This ordering is to test the RPF sort|uniq logic
+ */
+ fib_prefix_t pfx_1_2_3_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01020304),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 3,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.4/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 4),
+ "1.2.3.4/32 LB has %d bucket",
+ lb->lb_n_buckets);
+
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 0, ai_12_12_12_12);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 1, ai_12_12_12_12);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 2, ai_12_12_12_12);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 3, ai_01);
+
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2,
+ tm->hw[0]->sw_if_index,
+ tm->hw[1]->sw_if_index),
+ "RPF list for 1.2.3.4/32 contains both adjs");
+
+
+ /*
+ * Unequal Cost load-balance. 4:1 ratio.
+ * fits in a 16 bucket LB with ratio 13:3
+ */
+ fib_prefix_t pfx_1_2_3_5_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01020305),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 4,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.5/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 16),
+ "1.2.3.5/32 LB has %d bucket",
+ lb->lb_n_buckets);
+
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 0, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 1, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 2, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 3, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 4, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 5, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 6, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 7, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 8, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 9, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 10, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 11, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 12, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 13, ai_12_12_12_12);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 14, ai_12_12_12_12);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 15, ai_12_12_12_12);
+
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2,
+ tm->hw[0]->sw_if_index,
+ tm->hw[1]->sw_if_index),
+ "RPF list for 1.2.3.4/32 contains both adjs");
+
+ /*
+ * Test UCMP with a large weight skew - this produces load-balance objects with large
+ * numbers of buckets to accommodate the skew. By updating said load-balances we are
+ * laso testing the LB in placce modify code when number of buckets is large.
+ */
+ fib_prefix_t pfx_6_6_6_6_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.1/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x06060606),
+ },
+ };
+ fib_test_lb_bucket_t ip_o_10_10_10_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_01,
+ },
+ };
+ fib_test_lb_bucket_t ip_o_10_10_10_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_02,
+ },
+ };
+ fib_test_lb_bucket_t ip_6_6_6_6_o_12_12_12_12 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_12_12_12_12,
+ },
+ };
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_6_6_6_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 0, // zero weigth
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_10_10_10_1),
+ "6.6.6.6/32 via 10.10.10.1");
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_6_6_6_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 100,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 64,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_1),
+ "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio");
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_6_6_6_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 100,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 128,
+ &ip_o_10_10_10_1,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12,
+ &ip_6_6_6_6_o_12_12_12_12),
+ "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_6_6_6_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 100,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 64,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_2,
+ &ip_o_10_10_10_1),
+ "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_6_6_6_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 100,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_10_10_10_1),
+ "6.6.6.6/32 via 10.10.10.1");
+
+ fib_table_entry_delete(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API);
+
+ /*
+ * A recursive via the two unequal cost entries
+ */
+ fib_prefix_t bgp_44_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 200.200.200.201/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x44444444),
+ },
+ };
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_44_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_2_3_4_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_44_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_2_3_5_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_44_s_32, &pfx_1_2_3_4_s_32, 0);
+ FIB_TEST_REC_FORW(&bgp_44_s_32, &pfx_1_2_3_5_s_32, 1);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2,
+ tm->hw[0]->sw_if_index,
+ tm->hw[1]->sw_if_index),
+ "RPF list for 1.2.3.4/32 contains both adjs");
+
+ /*
+ * test the uRPF check functions
+ */
+ dpo_id_t dpo_44 = DPO_INVALID;
+ index_t urpfi;
+
+ fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo_44);
+ urpfi = load_balance_get_urpf(dpo_44.dpoi_index);
+
+ FIB_TEST(fib_urpf_check(urpfi, tm->hw[0]->sw_if_index),
+ "uRPF check for 68.68.68.68/32 on %d OK",
+ tm->hw[0]->sw_if_index);
+ FIB_TEST(fib_urpf_check(urpfi, tm->hw[1]->sw_if_index),
+ "uRPF check for 68.68.68.68/32 on %d OK",
+ tm->hw[1]->sw_if_index);
+ FIB_TEST(!fib_urpf_check(urpfi, 99),
+ "uRPF check for 68.68.68.68/32 on 99 not-OK",
+ 99);
+ dpo_reset(&dpo_44);
+
+ fib_table_entry_delete(fib_index,
+ &bgp_44_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API);
+
+ /*
+ * Add a recursive route:
+ * 200.200.200.201/32 via 1.1.1.200/32 => the via entry is NOT installed.
+ */
+ fib_prefix_t bgp_201_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 200.200.200.201/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c9),
+ },
+ };
+ /* via 1.1.1.200 */
+ fib_prefix_t pfx_1_1_1_200_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x010101c8),
+ },
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_200_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "Recursive via unresolved is drop");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32);
+ FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)),
+ "Flags set on RR via non-attached");
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0),
+ "RPF list for BGP route empty");
+
+ /*
+ * +2 entry (BGP & RR) and +1 shared-path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * insert a route that covers the missing 1.1.1.2/32. we epxect
+ * 200.200.200.200/32 and 200.200.200.201/32 to resolve through it.
+ */
+ fib_prefix_t pfx_1_1_1_0_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.0/24 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010100),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.0/24 resolves via 10.10.10.1");
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.2/32 resolves via 10.10.10.1");
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.200/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry. 1.1.1.1/32 already uses 10.10.10.1 so no new pah-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * the recursive adj for 200.200.200.200 should be updated.
+ */
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+ fei = fib_table_lookup(fib_index, &bgp_200_pfx);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1,
+ tm->hw[0]->sw_if_index),
+ "RPF list for BGP route has itf index 0");
+
+ /*
+ * insert a more specific route than 1.1.1.0/24 that also covers the
+ * missing 1.1.1.2/32, but not 1.1.1.200/32. we epxect
+ * 200.200.200.200 to resolve through it.
+ */
+ fib_prefix_t pfx_1_1_1_0_s_28 = {
+ .fp_len = 28,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.0/24 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010100),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "1.1.1.0/24 resolves via 10.10.10.2");
+
+ /*
+ * +1 entry. +1 shared path-list
+ */
+ FIB_TEST((5 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+9 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+14 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * the recursive adj for 200.200.200.200 should be updated.
+ * 200.200.200.201 remains unchanged.
+ */
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+
+ /*
+ * remove this /28. 200.200.200.200/32 should revert back to via 1.1.1.0/24
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.0/28 removed");
+ FIB_TEST((fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28) ==
+ fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24)),
+ "1.1.1.0/28 lookup via /24");
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+
+ /*
+ * -1 entry. -1 shared path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove 1.1.1.0/24. 200.200.200.200/32 should revert back to via 0.0.0.0/0
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_0_s_24,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_24) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.0/24 removed");
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1.1.1.2/32 route is DROP");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1.1.1.200/32 route is DROP");
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "201 is drop");
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "200 is drop");
+
+ /*
+ * -1 entry
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * insert the missing 1.1.1.2/32
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai = ai_01), "1.1.1.2/32 resolves via 10.10.10.1");
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "201 is drop");
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+
+ /*
+ * no change. 1.1.1.2/32 was already there RR sourced.
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * give 201 a resolved path.
+ * it now has the unresolved 1.1.1.200 and the resolved 1.1.1.2,
+ * only the latter contributes forwarding.
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_2_s_32, 0);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * remove 200.200.200.201/32 which does not have a valid via FIB
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_200_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * -2 entries (BGP and RR). -1 shared path-list;
+ */
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.201/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.200/32 removed");
+
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove 200.200.200.200/32 which does have a valid via FIB
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.200/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32) !=
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.2/32 still present");
+
+ /*
+ * -1 entry (BGP, the RR source is also API sourced). -1 shared path-list;
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+9 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A recursive prefix that has a 2 path load-balance.
+ * It also shares a next-hop with other BGP prefixes and hence
+ * test the ref counting of RR sourced prefixes and 2 level LB.
+ */
+ const fib_prefix_t bgp_102 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.101/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646466),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_102);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "100.100.100.102/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 2), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "First via 10.10.10.1");
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 1)),
+ "Second via 10.10.10.1");
+
+ fib_table_entry_path_remove(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_102);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "100.100.100.102/32 removed");
+
+ /*
+ * remove the remaining recursives
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_100_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_101_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_100_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "100.100.100.100/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_101_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "100.100.100.101/32 removed");
+
+ /*
+ * -2 entry (2*BGP, the RR source is also API sourced). -1 shared path-list;
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add a recursive route via a connected cover, using an adj-fib that does exist
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ ~0, // no index provided.
+ fib_index, // Same as route's FIB
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * +1 entry. +1 shared path-list (recursive via 10.10.10.1)
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "200.200.200.200/32 is recursive via adj for 10.10.10.1");
+
+ FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)),
+ "Flags set on RR via existing attached");
+
+ /*
+ * Add a recursive route via a connected cover, using and adj-fib that does
+ * not exist
+ */
+ ip46_address_t nh_10_10_10_3 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03),
+ };
+ fib_prefix_t pfx_10_10_10_3 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = nh_10_10_10_3,
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * +2 entries (BGP and RR). +1 shared path-list (recursive via 10.10.10.3) and
+ * one unshared non-recursive via 10.10.10.3
+ */
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai == ai_03), "adj for 10.10.10.3/32 is via adj for 10.10.10.3");
+ FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on RR via non-existing attached");
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 10.10.10.3");
+
+ adj_unlock(ai_03);
+
+ /*
+ * remove the recursives
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.201/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.200/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3) ==
+ FIB_NODE_INDEX_INVALID),
+ "10.10.10.3/32 removed");
+
+ /*
+ * -3 entries (2*BGP and RR). -2 shared path-list (recursive via 10.10.10.3 &
+ * 10.10.10.1) and one unshared non-recursive via 10.10.10.3
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+
+ /*
+ * RECURSION LOOPS
+ * Add 5.5.5.5/32 -> 5.5.5.6/32 -> 5.5.5.7/32 -> 5.5.5.5/32
+ */
+ fib_prefix_t pfx_5_5_5_5_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050505),
+ },
+ };
+ fib_prefix_t pfx_5_5_5_6_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050506),
+ },
+ };
+ fib_prefix_t pfx_5_5_5_7_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050507),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_7_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_7_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_5_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ /*
+ * +3 entries, +3 shared path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * All the entries have only looped paths, so they are all drop
+ */
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * provide 5.5.5.6/32 with alternate path.
+ * this will allow only 5.5.5.6/32 to forward with this path, the others
+ * are still drop since the loop is still present.
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.6 LB has %d bucket", lb->lb_n_buckets);
+
+ dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type);
+ FIB_TEST((ai_01 == dpo2->dpoi_index),
+ "5.5.5.6 bucket 0 resolves via 10.10.10.2");
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+
+ /*
+ * remove the alternate path for 5.5.5.6/32
+ * back to all drop
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * break the loop by giving 5.5.5.5/32 a new set of paths
+ * expect all to forward via this new path.
+ */
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.5 LB has %d bucket", lb->lb_n_buckets);
+
+ dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type);
+ FIB_TEST((ai_01 == dpo2->dpoi_index),
+ "5.5.5.5 bucket 0 resolves via 10.10.10.2");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_7_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo2->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo2->dpoi_index, 0)),
+ "5.5.5.5.7 via 5.5.5.5");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "5.5.5.5.6 via 5.5.5.7");
+
+ /*
+ * revert back to the loop. so we can remove the prefixes with
+ * the loop intact
+ */
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * remove all the 5.5.5.x/32 prefixes
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_7_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_7_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_5_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * -3 entries, -3 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Single level loop 5.5.5.5/32 via 5.5.5.5/32
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1-level 5.5.5.6/32 loop is via adj for DROP");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32),
+ "1-level 5.5.5.6/32 loop is removed");
+
+ /*
+ * A recursive route whose next-hop is covered by the prefix.
+ * This would mean the via-fib, which inherits forwarding from its
+ * cover, thus picks up forwarding from the prfix, which is via the
+ * via-fib, and we have a loop.
+ */
+ fib_prefix_t pfx_23_23_23_0_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x17171700),
+ },
+ };
+ fib_prefix_t pfx_23_23_23_23_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x17171717),
+ },
+ };
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_23_23_23_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_23_23_23_23_s_32.fp_addr,
+ ~0, // recursive
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo),
+ "23.23.23.0/24 via covered is DROP");
+ fib_table_entry_delete_index(fei, FIB_SOURCE_API);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Make the default route recursive via a unknown next-hop. Thus the
+ * next hop's cover would be the default route
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_0_0_0_0_s_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_23_23_23_23_s_32.fp_addr,
+ ~0, // recursive
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo),
+ "0.0.0.0.0/0 via is DROP");
+ FIB_TEST((fib_entry_get_resolving_interface(fei) == ~0),
+ "no resolving interface for looped 0.0.0.0/0");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_23_23_23_23_s_32);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo),
+ "23.23.23.23/32 via is DROP");
+ FIB_TEST((fib_entry_get_resolving_interface(fei) == ~0),
+ "no resolving interface for looped 23.23.23.23/32");
+
+ fib_table_entry_delete(fib_index, &pfx_0_0_0_0_s_0, FIB_SOURCE_API);
+
+ /*
+ * A recursive route with recursion constraints.
+ * 200.200.200.200/32 via 1.1.1.1 is recurse via host constrained
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_1_1_1_1,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+
+ /*
+ * save the load-balance. we expect it to be inplace modified
+ */
+ lb = load_balance_get(dpo1->dpoi_index);
+
+ /*
+ * add a covering prefix for the via fib that would otherwise serve
+ * as the resolving route when the host is removed
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai == ai_01),
+ "adj for 1.1.1.0/28 is via adj for 1.1.1.1");
+
+ /*
+ * remove the host via FIB - expect the BGP prefix to be drop
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for DROP");
+
+ /*
+ * add the via-entry host reoute back. expect to resolve again
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+
+ /*
+ * add another path for the recursive. it will then have 2.
+ */
+ fib_prefix_t pfx_1_1_1_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010103),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_3_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+
+ /*
+ * add a bunch load more entries using this path combo so that we get
+ * an LB-map created.
+ */
+#define N_P 128
+ fib_prefix_t bgp_78s[N_P];
+ for (ii = 0; ii < N_P; ii++)
+ {
+ bgp_78s[ii].fp_len = 32;
+ bgp_78s[ii].fp_proto = FIB_PROTOCOL_IP4;
+ bgp_78s[ii].fp_addr.ip4.as_u32 = clib_host_to_net_u32(0x4e000000+ii);
+
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_78s[ii],
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_3_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ fib_table_entry_path_add(fib_index,
+ &bgp_78s[ii],
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_1_1_1_1,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ }
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 1)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.3");
+
+ /*
+ * expect the lb-map used by the recursive's load-balance is using both buckets
+ */
+ load_balance_map_t *lbm;
+ index_t lbmi;
+
+ lb = load_balance_get(dpo->dpoi_index);
+ lbmi = lb->lb_map;
+ load_balance_map_lock(lbmi);
+ lbm = load_balance_map_get(lbmi);
+
+ FIB_TEST(lbm->lbm_buckets[0] == 0,
+ "LB maps's bucket 0 is %d",
+ lbm->lbm_buckets[0]);
+ FIB_TEST(lbm->lbm_buckets[1] == 1,
+ "LB maps's bucket 1 is %d",
+ lbm->lbm_buckets[1]);
+
+ /*
+ * withdraw one of the /32 via-entrys.
+ * that ECMP path will be unresolved and forwarding should continue on the
+ * other available path. this is an iBGP PIC edge failover.
+ * Test the forwarding changes without re-fetching the adj from the
+ * recursive entry. this ensures its the same one that is updated; i.e. an
+ * inplace-modify.
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)),
+ "post PIC 200.200.200.200/32 was inplace modified");
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 0)),
+ "post PIC adj for 200.200.200.200/32 is recursive"
+ " via adj for 1.1.1.3");
+
+ /*
+ * the LB maps that was locked above should have been modified to remove
+ * the path that was down, and thus its bucket points to a path that is
+ * still up.
+ */
+ FIB_TEST(lbm->lbm_buckets[0] == 1,
+ "LB maps's bucket 0 is %d",
+ lbm->lbm_buckets[0]);
+ FIB_TEST(lbm->lbm_buckets[1] == 1,
+ "LB maps's bucket 1 is %d",
+ lbm->lbm_buckets[1]);
+
+ load_balance_map_unlock(lbmi);
+
+ /*
+ * add it back. again
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /* suspend so the update walk kicks in */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)),
+ "post PIC recovery adj for 200.200.200.200/32 is recursive "
+ "via adj for 1.1.1.1");
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 1)),
+ "post PIC recovery adj for 200.200.200.200/32 is recursive "
+ "via adj for 1.1.1.3");
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "post PIC 200.200.200.200/32 was inplace modified");
+
+ /*
+ * add a 3rd path. this makes the LB 16 buckets.
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ for (ii = 0; ii < N_P; ii++)
+ {
+ fib_table_entry_path_add(fib_index,
+ &bgp_78s[ii],
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ }
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "200.200.200.200/32 was inplace modified for 3rd path");
+ FIB_TEST(16 == lb->lb_n_buckets,
+ "200.200.200.200/32 was inplace modified for 3rd path to 16 buckets");
+
+ lbmi = lb->lb_map;
+ load_balance_map_lock(lbmi);
+ lbm = load_balance_map_get(lbmi);
+
+ for (ii = 0; ii < 16; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+
+ /*
+ * trigger PIC by removing the first via-entry
+ * the first 6 buckets of the map should map to the next 6
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "200.200.200.200/32 was inplace modified for 3rd path");
+ FIB_TEST(2 == lb->lb_n_buckets,
+ "200.200.200.200/32 was inplace modified for 3rd path remove to 2 buckets");
+
+ for (ii = 0; ii < 6; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii+6,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+ for (ii = 6; ii < 16; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+ load_balance_map_unlock(lbmi);
+
+ /*
+ * tidy up
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ for (ii = 0; ii < N_P; ii++)
+ {
+ fib_table_entry_delete(fib_index,
+ &bgp_78s[ii],
+ FIB_SOURCE_API);
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &bgp_78s[ii])),
+ "%U removed",
+ format_fib_prefix, &bgp_78s[ii]);
+ }
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_1_1_1_1,
+ ~0,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_3_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API);
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)),
+ "1.1.1.1/28 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32)),
+ "1.1.1.3/32 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &bgp_200_pfx)),
+ "200.200.200.200/32 removed");
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route whose paths are built up iteratively and then removed
+ * all at once
+ */
+ fib_prefix_t pfx_4_4_4_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.4.4.4/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04040404),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(FIB_NODE_INDEX_INVALID !=
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 present");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route with multiple paths at once
+ */
+ fib_route_path_t *r_paths = NULL;
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ fib_route_path_t r_path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
+ },
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ };
+ vec_add1(r_paths, r_path);
+ }
+
+ fib_table_entry_update(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route deag route
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &zero_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ lookup_dpo_t *lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "4.4.4.4/32 is deag in %d %U",
+ lkd->lkd_fib_index,
+ format_dpo_id, dpo, 0);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Duplicate paths:
+ * add a recursive with duplicate paths. Expect the duplicate to be ignored.
+ */
+ fib_prefix_t pfx_34_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x22010101),
+ },
+ };
+ fib_prefix_t pfx_34_34_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x22220101),
+ },
+ };
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_34_34_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ 0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_34_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_34_34_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_34_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_34_34_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST_REC_FORW(&pfx_34_1_1_1_s_32, &pfx_34_34_1_1_s_32, 0);
+ fib_table_entry_delete_index(fei, FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_34_34_1_1_s_32,
+ FIB_SOURCE_API);
+
+ /*
+ * CLEANUP
+ * remove: 1.1.1.2/32, 1.1.2.0/24 and 1.1.1.1/32
+ * all of which are via 10.10.10.1, Itf1
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32),
+ "1.1.1.1/32 removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32),
+ "1.1.1.2/32 removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_2_0_s_24),
+ "1.1.2.0/24 removed");
+
+ /*
+ * -3 entries and -1 shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * An attached-host route. Expect to link to the incomplete adj
+ */
+ fib_prefix_t pfx_4_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.1.1.1/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04010101),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &zero_addr,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_1_1_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.1.1.1/32 present");
+ ai = fib_entry_get_adj(fei);
+
+ ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &pfx_4_1_1_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((ai == ai2), "Attached-host link to incomplete ADJ");
+ adj_unlock(ai2);
+
+ /*
+ * +1 entry and +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add a v6 prefix via v4 next-hops
+ */
+ fib_prefix_t pfx_2001_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000),
+ },
+ };
+ fei = fib_table_entry_path_add(0, //default v6 table
+ &pfx_2001_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(0, &pfx_2001_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "2001::/64 present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP),
+ "2001::/64 via ARP-adj");
+ FIB_TEST((adj->ia_link == VNET_LINK_IP6),
+ "2001::/64 is link type v6");
+ FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP4),
+ "2001::/64 ADJ-adj is NH proto v4");
+ fib_table_entry_delete(0, &pfx_2001_s_64, FIB_SOURCE_API);
+
+ /*
+ * add a uRPF exempt prefix:
+ * test:
+ * - it's forwarding is drop
+ * - it's uRPF list is not empty
+ * - the uRPF list for the default route (it's cover) is empty
+ */
+ fei = fib_table_entry_special_add(fib_index,
+ &pfx_4_1_1_1_s_32,
+ FIB_SOURCE_URPF_EXEMPT,
+ FIB_ENTRY_FLAG_DROP);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo),
+ "uRPF exempt 4.1.1.1/32 DROP");
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, 0),
+ "uRPF list for exempt prefix has itf index 0");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_0_0_0_0_s_0);
+ FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0),
+ "uRPF list for 0.0.0.0/0 empty");
+
+ fib_table_entry_delete(fib_index, &pfx_4_1_1_1_s_32, FIB_SOURCE_URPF_EXEMPT);
+
+ /*
+ * An adj-fib that fails the refinement criteria - no connected cover
+ */
+ fib_prefix_t pfx_12_10_10_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 12.10.10.2 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0c0a0a02),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_12_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_12_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_12_10_10_2_s_32);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_id_is_valid(dpo),
+ "no connected cover adj-fib fails refinement");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_12_10_10_2_s_32,
+ FIB_SOURCE_ADJ);
+
+ /*
+ * An adj-fib that fails the refinement criteria - cover is connected
+ * but on a different interface
+ */
+ fib_prefix_t pfx_10_10_10_127_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.127 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a7f),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_127_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_127_s_32.fp_addr,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_127_s_32);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_id_is_valid(dpo),
+ "wrong interface adj-fib fails refinement");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_127_s_32,
+ FIB_SOURCE_ADJ);
+
+ /*
+ * add a second path to an adj-fib
+ * this is a sumiluation of another ARP entry created
+ * on an interface on which the connected prefi does not exist.
+ * The second path fails refinement. Expect to forward through the
+ * first.
+ */
+ fib_prefix_t pfx_10_10_10_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.3 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03),
+ },
+ };
+
+ ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index);
+
+ fib_test_lb_bucket_t ip_o_10_10_10_3 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_03,
+ },
+ };
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_10_10_10_3),
+ "10.10.10.3 via 10.10.10.3/Eth0 only");
+
+ /*
+ * remove the path that refines the cover, should go unresolved
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_id_is_valid(dpo),
+ "wrong interface adj-fib fails refinement");
+
+ /*
+ * add back the path that refines the cover
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_10_10_10_3),
+ "10.10.10.3 via 10.10.10.3/Eth0 only");
+
+ /*
+ * remove the path that does not refine the cover
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP4,
+ &nh_12_12_12_12,
+ tm->hw[1]->sw_if_index,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_10_10_10_3),
+ "10.10.10.3 via 10.10.10.3/Eth0 only");
+
+ /*
+ * remove the path that does refine, it's the last path, so
+ * the entry should be gone
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32);
+ FIB_TEST((fei == FIB_NODE_INDEX_INVALID), "10.10.10.3 gone");
+
+ adj_unlock(ai_03);
+
+ /*
+ * change the table's flow-hash config - expect the update to propagete to
+ * the entries' load-balance objects
+ */
+ flow_hash_config_t old_hash_config, new_hash_config;
+
+ old_hash_config = fib_table_get_flow_hash_config(fib_index,
+ FIB_PROTOCOL_IP4);
+ new_hash_config = (IP_FLOW_HASH_SRC_ADDR |
+ IP_FLOW_HASH_DST_ADDR);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_hash_config == old_hash_config),
+ "Table and LB hash config match: %U",
+ format_ip_flow_hash_config, lb->lb_hash_config);
+
+ fib_table_set_flow_hash_config(fib_index, FIB_PROTOCOL_IP4, new_hash_config);
+
+ FIB_TEST((lb->lb_hash_config == new_hash_config),
+ "Table and LB newhash config match: %U",
+ format_ip_flow_hash_config, lb->lb_hash_config);
+
+ /*
+ * CLEANUP
+ * remove adj-fibs:
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32),
+ "10.10.10.1/32 adj-fib removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32),
+ "10.10.10.2/32 adj-fib removed");
+
+ /*
+ * -2 entries and -2 non-shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * unlock the adjacencies for which this test provided a rewrite.
+ * These are the last locks on these adjs. they should thus go away.
+ */
+ adj_unlock(ai_02);
+ adj_unlock(ai_01);
+ adj_unlock(ai_12_12_12_12);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * CLEANUP
+ * remove the interface prefixes
+ */
+ local_pfx.fp_len = 32;
+ fib_table_entry_special_remove(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx),
+ "10.10.10.10/32 adj-fib removed");
+
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx),
+ "10.10.10.10/24 adj-fib removed");
+
+ /*
+ * -2 entries and -2 non-shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Last but not least, remove the VRF
+ */
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_API)),
+ "NO API Source'd prefixes");
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_RR)),
+ "NO RR Source'd prefixes");
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_INTERFACE)),
+ "NO INterface Source'd prefixes");
+
+ fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNBR-5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENBR-5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+ FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
+ pool_elts(fib_urpf_list_pool));
+ FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
+ pool_elts(load_balance_map_pool));
+ FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
+ pool_elts(load_balance_pool));
+
+ return 0;
+}
+
+static int
+fib_test_v6 (void)
+{
+ /*
+ * In the default table check for the presence and correct forwarding
+ * of the special entries
+ */
+ fib_node_index_t dfrt, fei, ai, locked_ai, ai_01, ai_02;
+ const dpo_id_t *dpo, *dpo_drop;
+ const ip_adjacency_t *adj;
+ const receive_dpo_t *rd;
+ test_main_t *tm;
+ u32 fib_index;
+ int ii;
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /* via 2001:0:0:1::2 */
+ ip46_address_t nh_2001_2 = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ },
+ };
+
+ tm = &test_main;
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP6);
+
+ /* Find or create FIB table 11 */
+ fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11,
+ FIB_SOURCE_API);
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
+ }
+
+ fib_prefix_t pfx_0_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ {0, 0},
+ },
+ },
+ };
+
+ dfrt = fib_table_lookup(fib_index, &pfx_0_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ dpo = fib_entry_contribute_ip_forwarding(dfrt);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &pfx_0_0.fp_addr.ip6)),
+ "default-route; fwd and non-fwd tables match");
+
+ // FIXME - check specials.
+
+ /*
+ * At this stage there is one v4 FIB with 5 routes and two v6 FIBs
+ * each with 2 entries and a v6 mfib with 4 path-lists.
+ * All entries are special so no path-list sharing.
+ */
+#define ENPS (5+4)
+#define PNPS (5+4+4)
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add interface routes.
+ * validate presence of /64 attached and /128 recieve.
+ * test for the presence of the receive address in the glean and local adj
+ *
+ * receive on 2001:0:0:1::1/128
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ },
+ }
+ };
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &adj->sub_type.glean.receive_addr)),
+ "attached interface adj is receive ok");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &local_pfx.fp_addr.ip6)),
+ "attached-route; fwd and non-fwd tables match");
+
+ local_pfx.fp_len = 128;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ rd = receive_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &local_pfx.fp_addr.ip6)),
+ "local-route; fwd and non-fwd tables match");
+
+ /*
+ * +2 entries. +2 unshared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Modify the default route to be via an adj not yet known.
+ * this sources the defalut route with the API source, which is
+ * a higher preference to the DEFAULT_ROUTE source
+ */
+ fib_table_entry_path_add(fib_index, &pfx_0_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&nh_2001_2, &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ /*
+ * find the adj in the shared db
+ */
+ locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((locked_ai == ai), "ADJ NBR DB find");
+ adj_unlock(locked_ai);
+
+ /*
+ * no more entires. +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+3 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove the API source from the default route. We expected
+ * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP
+ */
+ fib_table_entry_path_remove(fib_index, &pfx_0_0,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ /*
+ * no more entires. -1 shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_2001_1_2_s_128 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ },
+ }
+ };
+ fib_prefix_t pfx_2001_1_3_s_128 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000003),
+ },
+ },
+ }
+ };
+ u8 eth_addr[] = {
+ 0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
+ };
+
+ ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ &pfx_2001_1_2_s_128.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created");
+ adj = adj_get(ai_01);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_1_2_s_128,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP6,
+ &pfx_2001_1_2_s_128.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2001_1_2_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+
+ eth_addr[5] = 0xb2;
+
+ ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ &pfx_2001_1_3_s_128.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created");
+ adj = adj_get(ai_02);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((ai_01 != ai_02), "ADJs are different");
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_1_3_s_128,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP6,
+ &pfx_2001_1_3_s_128.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2001_1_3_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+
+ /*
+ * +2 entries, +2 unshread path-lists.
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+4 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add a 2 routes via the first ADJ. ensure path-list sharing
+ */
+ fib_prefix_t pfx_2001_a_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000a),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+ fib_prefix_t pfx_2001_b_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000b),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_a_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_2001_a_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1");
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_b_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_2001_b_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1");
+
+ /*
+ * +2 entries, +1 shared path-list.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add a v4 prefix via a v6 next-hop
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = 0x01010101,
+ },
+ };
+ fei = fib_table_entry_path_add(0, // default table
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fei == fib_table_lookup_exact_match(0, &pfx_1_1_1_1_s_32),
+ "1.1.1.1/32 o v6 route present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP),
+ "1.1.1.1/32 via ARP-adj");
+ FIB_TEST((adj->ia_link == VNET_LINK_IP4),
+ "1.1.1.1/32 ADJ-adj is link type v4");
+ FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP6),
+ "1.1.1.1/32 ADJ-adj is NH proto v6");
+ fib_table_entry_delete(0, &pfx_1_1_1_1_s_32, FIB_SOURCE_API);
+
+ /*
+ * An attached route
+ */
+ fib_prefix_t pfx_2001_c_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000c),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached route present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_GLEAN),
+ "2001:0:0:c/64 attached resolves via glean");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_CLI,
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached route removed");
+
+ /*
+ * Shutdown the interface on which we have a connected and through
+ * which the routes are reachable.
+ * This will result in the connected, adj-fibs, and routes linking to drop
+ * The local/for-us prefix continues to receive.
+ */
+ clib_error_t * error;
+
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ ~VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface shutdown OK");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::b/64 resolves via drop");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::a/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::1/128 not drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * no change
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * shutdown one of the other interfaces, then add a connected.
+ * and swap one of the routes to it.
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ ~VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface 1 shutdown OK");
+
+ fib_prefix_t connected_pfx = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ /* 2001:0:0:2::1/64 */
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000002),
+ [1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ },
+ }
+ };
+ fib_table_entry_update_one_path(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &connected_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(!dpo_cmp(dpo, dpo_drop),
+ "2001:0:0:2/64 not resolves via drop");
+
+ connected_pfx.fp_len = 128;
+ fib_table_entry_update_one_path(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &connected_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ rd = receive_dpo_get(dpo->dpoi_index);
+ FIB_TEST((0 == ip46_address_cmp(&connected_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ /*
+ * +2 entries, +2 unshared path-lists
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+
+ /*
+ * bring the interface back up. we expected the routes to return
+ * to normal forwarding.
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface bring-up OK");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+
+ /*
+ * Same test as above, but this time the HW interface goes down
+ */
+ error = vnet_hw_interface_set_flags(vnet_get_main(),
+ tm->hw_if_indicies[0],
+ ~VNET_HW_INTERFACE_FLAG_LINK_UP);
+ FIB_TEST((NULL == error), "Interface shutdown OK");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::a/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::3/128 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::2/128 resolves via drop");
+ local_pfx.fp_len = 128;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::1/128 not drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1/64 resolves via drop");
+
+ error = vnet_hw_interface_set_flags(vnet_get_main(),
+ tm->hw_if_indicies[0],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ FIB_TEST((NULL == error), "Interface bring-up OK");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+
+ /*
+ * Delete the interface that the routes reolve through.
+ * Again no routes are removed. They all point to drop.
+ *
+ * This is considered an error case. The control plane should
+ * not remove interfaces through which routes resolve, but
+ * such things can happen. ALL affected routes will drop.
+ */
+ vnet_delete_hw_interface(vnet_get_main(), tm->hw_if_indicies[0]);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::1/128 is drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * no change
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add the interface back. routes stay unresolved.
+ */
+ error = ethernet_register_interface(vnet_get_main(),
+ test_interface_device_class.index,
+ 0 /* instance */,
+ hw_address,
+ &tm->hw_if_indicies[0],
+ /* flag change */ 0);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::1/128 is drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * CLEANUP ALL the routes
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_a_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_b_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_1_3_s_128,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_1_2_s_128,
+ FIB_SOURCE_ADJ);
+ local_pfx.fp_len = 64;
+ fib_table_entry_delete(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ connected_pfx.fp_len = 64;
+ fib_table_entry_delete(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE);
+ connected_pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64)),
+ "2001::a/64 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64)),
+ "2001::b/64 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128)),
+ "2001:0:0:1::3/128 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128)),
+ "2001:0:0:1::3/128 removed");
+ local_pfx.fp_len = 64;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx)),
+ "2001:0:0:1/64 removed");
+ local_pfx.fp_len = 128;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx)),
+ "2001:0:0:1::1/128 removed");
+ connected_pfx.fp_len = 64;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &connected_pfx)),
+ "2001:0:0:2/64 removed");
+ connected_pfx.fp_len = 128;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &connected_pfx)),
+ "2001:0:0:2::1/128 removed");
+
+ /*
+ * -8 entries. -7 path-lists (1 was shared).
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * now remove the VRF
+ */
+ fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((PNPS-2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((ENPS-2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ adj_unlock(ai_02);
+ adj_unlock(ai_01);
+
+ /*
+ * return the interfaces to up state
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ return (0);
+}
+
+/*
+ * Test Attached Exports
+ */
+static int
+fib_test_ae (void)
+{
+ const dpo_id_t *dpo, *dpo_drop;
+ const u32 fib_index = 0;
+ fib_node_index_t fei;
+ test_main_t *tm;
+ ip4_main_t *im;
+
+ tm = &test_main;
+ im = &ip4_main;
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * add interface routes. We'll assume this works. It's more rigorously
+ * tested elsewhere.
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.10.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_10_10_10_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.1 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ fib_node_index_t ai;
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 created");
+ ai = fib_entry_get_adj(fei);
+
+ /*
+ * create another FIB table into which routes will be imported
+ */
+ u32 import_fib_index1;
+
+ import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+ 11,
+ FIB_SOURCE_CLI);
+
+ /*
+ * Add an attached route in the import FIB
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created");
+
+ /*
+ * check for the presence of the adj-fibs in the import table
+ */
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "adj-fib1 Import uses same adj as export");
+
+ /*
+ * check for the presence of the local in the import table
+ */
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * Add another adj-fin in the export table. Expect this
+ * to get magically exported;
+ */
+ fib_prefix_t pfx_10_10_10_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.2 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 present");
+ ai = fib_entry_get_adj(fei);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+ FIB_TEST((FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(fei)),
+ "ADJ-fib2 imported flags %d",
+ fib_entry_get_flags(fei));
+
+ /*
+ * create a 2nd FIB table into which routes will be imported
+ */
+ u32 import_fib_index2;
+
+ import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12,
+ FIB_SOURCE_CLI);
+
+ /*
+ * Add an attached route in the import FIB
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index2,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created");
+
+ /*
+ * check for the presence of all the adj-fibs and local in the import table
+ */
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * add a 3rd adj-fib. expect it to be exported to both tables.
+ */
+ fib_prefix_t pfx_10_10_10_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.3 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_3_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 present");
+ ai = fib_entry_get_adj(fei);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB1");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB2");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+
+ /*
+ * remove the 3rd adj fib. we expect it to be removed from both FIBs
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 remved");
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB1");
+
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB2");
+
+ /*
+ * remove the attached route from the 2nd FIB. expect the imported
+ * entires to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(import_fib_index2,
+ &local_pfx,
+ FIB_SOURCE_API);
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached export removed");
+
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB2");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB2");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB2");
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 still in FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 still in FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local still in FIB1");
+
+ /*
+ * modify the route in FIB1 so it is no longer attached. expect the imported
+ * entires to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * modify it back to attached. expect the adj-fibs back
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported in FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported in FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported in FIB1");
+
+ /*
+ * add a covering attached next-hop for the interface address, so we have
+ * a valid adj to find when we check the forwarding tables
+ */
+ fib_prefix_t pfx_10_0_0_0_s_8 = {
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.0.0.0 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a000000),
+ },
+ };
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_10_0_0_0_s_8,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_3_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ /*
+ * remove the route in the export fib. expect the adj-fibs to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "Delete export: ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * the adj-fibs in the export VRF are present in the FIB table,
+ * but not installed in forwarding, since they have no attached cover.
+ * Consequently a lookup in the MTRIE gives the adj for the covering
+ * route 10.0.0.0/8.
+ */
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export");
+
+ index_t lbi;
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.1 forwards on \n%U not \n%U",
+ format_load_balance, lbi, 0,
+ format_dpo_id, dpo, 0);
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0);
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_3_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.3 forwards on %U", format_dpo_id, dpo, 0);
+
+ /*
+ * add the export prefix back, but not as attached.
+ * No adj-fibs in export nor import tables
+ */
+ local_pfx.fp_len = 24;
+ fei = fib_table_entry_update_one_path(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "non-attached in export: ADJ-fib1 in export");
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.1 forwards on %U", format_dpo_id, dpo, 0);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export");
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * modify the export prefix so it is attached. expect all covereds to return
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * modify the export prefix so connected. no change.
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * CLEANUP
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_0_0_0_s_8,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ);
+ local_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API);
+
+ fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI);
+ fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ return (0);
+}
+
+/*
+ * Test Path Preference
+ */
+static int
+fib_test_pref (void)
+{
+ test_main_t *tm = &test_main;
+
+ const fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ },
+ };
+
+ /*
+ * 2 high, 2 medium and 2 low preference non-recursive paths
+ */
+ fib_route_path_t nr_path_hi_1 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 0,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ fib_route_path_t nr_path_hi_2 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 0,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ },
+ };
+ fib_route_path_t nr_path_med_1 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[1]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0c01),
+ },
+ };
+ fib_route_path_t nr_path_med_2 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[1]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0c01),
+ },
+ };
+ fib_route_path_t nr_path_low_1 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[2]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 2,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b01),
+ },
+ };
+ fib_route_path_t nr_path_low_2 = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = tm->hw[2]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_preference = 2,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b02),
+ },
+ };
+ fib_route_path_t *nr_paths = NULL;
+
+ vec_add1(nr_paths, nr_path_hi_1);
+ vec_add1(nr_paths, nr_path_hi_2);
+ vec_add1(nr_paths, nr_path_med_1);
+ vec_add1(nr_paths, nr_path_med_2);
+ vec_add1(nr_paths, nr_path_low_1);
+ vec_add1(nr_paths, nr_path_low_2);
+
+ adj_index_t ai_hi_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_hi_1.frp_addr,
+ nr_path_hi_1.frp_sw_if_index);
+ adj_index_t ai_hi_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_hi_2.frp_addr,
+ nr_path_hi_2.frp_sw_if_index);
+ adj_index_t ai_med_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_med_1.frp_addr,
+ nr_path_med_1.frp_sw_if_index);
+ adj_index_t ai_med_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_med_2.frp_addr,
+ nr_path_med_2.frp_sw_if_index);
+ adj_index_t ai_low_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_low_1.frp_addr,
+ nr_path_low_1.frp_sw_if_index);
+ adj_index_t ai_low_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nr_path_low_2.frp_addr,
+ nr_path_low_2.frp_sw_if_index);
+
+ fib_test_lb_bucket_t ip_hi_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_hi_1,
+ },
+ };
+ fib_test_lb_bucket_t ip_hi_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_hi_2,
+ },
+ };
+ fib_test_lb_bucket_t ip_med_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_med_1,
+ },
+ };
+ fib_test_lb_bucket_t ip_med_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_med_2,
+ },
+ };
+ fib_test_lb_bucket_t ip_low_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_low_1,
+ },
+ };
+ fib_test_lb_bucket_t ip_low_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_low_2,
+ },
+ };
+
+ fib_node_index_t fei;
+
+ fei = fib_table_entry_path_add2(0,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ nr_paths);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &ip_hi_1,
+ &ip_hi_2),
+ "1.1.1.1/32 via high preference paths");
+
+ /*
+ * bring down the interface on which the high preference path lie
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ 0);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &ip_med_1,
+ &ip_med_2),
+ "1.1.1.1/32 via medium preference paths");
+
+ /*
+ * bring down the interface on which the medium preference path lie
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ 0);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &ip_low_1,
+ &ip_low_2),
+ "1.1.1.1/32 via low preference paths");
+
+ /*
+ * bring up the interface on which the high preference path lie
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &ip_hi_1,
+ &ip_hi_2),
+ "1.1.1.1/32 via high preference paths");
+
+ /*
+ * bring up the interface on which the medium preference path lie
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &ip_hi_1,
+ &ip_hi_2),
+ "1.1.1.1/32 via high preference paths");
+
+ dpo_id_t ip_1_1_1_1 = DPO_INVALID;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1_1_1_1);
+
+ /*
+ * 3 recursive paths of different preference
+ */
+ const fib_prefix_t pfx_1_1_1_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x01010102),
+ },
+ },
+ };
+ const fib_prefix_t pfx_1_1_1_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x01010103),
+ },
+ },
+ };
+ fei = fib_table_entry_path_add2(0,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ nr_paths);
+ dpo_id_t ip_1_1_1_2 = DPO_INVALID;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1_1_1_2);
+ fei = fib_table_entry_path_add2(0,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ nr_paths);
+ dpo_id_t ip_1_1_1_3 = DPO_INVALID;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1_1_1_3);
+
+ fib_test_lb_bucket_t ip_o_1_1_1_1 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1_1_1_1.dpoi_index,
+ },
+ };
+ fib_test_lb_bucket_t ip_o_1_1_1_2 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1_1_1_2.dpoi_index,
+ },
+ };
+ fib_test_lb_bucket_t ip_o_1_1_1_3 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1_1_1_3.dpoi_index,
+ },
+ };
+ fib_route_path_t r_path_hi = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_preference = 0,
+ .frp_flags = FIB_ROUTE_PATH_RESOLVE_VIA_HOST,
+ .frp_addr = pfx_1_1_1_1_s_32.fp_addr,
+ };
+ fib_route_path_t r_path_med = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_preference = 10,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_addr = pfx_1_1_1_2_s_32.fp_addr,
+ };
+ fib_route_path_t r_path_low = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_preference = 255,
+ .frp_flags = FIB_ROUTE_PATH_RESOLVE_VIA_HOST,
+ .frp_addr = pfx_1_1_1_3_s_32.fp_addr,
+ };
+ fib_route_path_t *r_paths = NULL;
+
+ vec_add1(r_paths, r_path_hi);
+ vec_add1(r_paths, r_path_low);
+ vec_add1(r_paths, r_path_med);
+
+ /*
+ * add many recursive so we get the LB MAp created
+ */
+ #define N_PFXS 64
+ fib_prefix_t pfx_r[N_PFXS];
+ unsigned int n_pfxs;
+ for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+ {
+ pfx_r[n_pfxs].fp_len = 32;
+ pfx_r[n_pfxs].fp_proto = FIB_PROTOCOL_IP4;
+ pfx_r[n_pfxs].fp_addr.ip4.as_u32 =
+ clib_host_to_net_u32(0x02000000 + n_pfxs);
+
+ fei = fib_table_entry_path_add2(0,
+ &pfx_r[n_pfxs],
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "recursive via high preference paths");
+
+ /*
+ * withdraw hig pref resolving entry
+ */
+ fib_table_entry_delete(0,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_2),
+ "recursive via medium preference paths");
+
+ /*
+ * withdraw medium pref resolving entry
+ */
+ fib_table_entry_delete(0,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API);
+
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_3),
+ "recursive via low preference paths");
+
+ /*
+ * add back paths for next iteration
+ */
+ fei = fib_table_entry_update(0,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ nr_paths);
+ fei = fib_table_entry_update(0,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ nr_paths);
+
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ fei = fib_table_lookup_exact_match(0, &pfx_r[n_pfxs]);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "recursive via high preference paths");
+ }
+
+
+ fib_table_entry_delete(0,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ /* suspend so the update walk kicks int */
+ vlib_process_suspend(vlib_get_main(), 1e-5);
+
+ for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+ {
+ fei = fib_table_lookup_exact_match(0, &pfx_r[n_pfxs]);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_2),
+ "recursive via medium preference paths");
+ }
+ for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+ {
+ fib_table_entry_delete(0,
+ &pfx_r[n_pfxs],
+ FIB_SOURCE_API);
+ }
+
+ /*
+ * Cleanup
+ */
+ fib_table_entry_delete(0,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(0,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API);
+
+ dpo_reset(&ip_1_1_1_1);
+ dpo_reset(&ip_1_1_1_2);
+ dpo_reset(&ip_1_1_1_3);
+ adj_unlock(ai_low_2);
+ adj_unlock(ai_low_1);
+ adj_unlock(ai_med_2);
+ adj_unlock(ai_med_1);
+ adj_unlock(ai_hi_2);
+ adj_unlock(ai_hi_1);
+ return (0);
+}
+
+/*
+ * Test the recursive route route handling for GRE tunnels
+ */
+static int
+fib_test_label (void)
+{
+ fib_node_index_t fei, ai_mpls_10_10_10_1, ai_v4_10_10_11_1, ai_v4_10_10_11_2, ai_mpls_10_10_11_2, ai_mpls_10_10_11_1;
+ const u32 fib_index = 0;
+ test_main_t *tm;
+ ip4_main_t *im;
+ int lb_count, ii;
+
+ lb_count = pool_elts(load_balance_pool);
+ tm = &test_main;
+ im = &ip4_main;
+
+ /*
+ * add interface routes. We'll assume this works. It's more rigorously
+ * tested elsewhere.
+ */
+ fib_prefix_t local0_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.10.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+
+ fib_table_entry_update_one_path(fib_index, &local0_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local0_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local0_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local0_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local0_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ fib_prefix_t local1_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.11.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b0a),
+ },
+ },
+ };
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index;
+
+ fib_table_entry_update_one_path(fib_index, &local1_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local1_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local1_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local1_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local1_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ ip46_address_t nh_10_10_11_1 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b01),
+ },
+ };
+ ip46_address_t nh_10_10_11_2 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b02),
+ },
+ };
+
+ ai_v4_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index);
+ ai_v4_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index);
+ ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_MPLS,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ ai_mpls_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_MPLS,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index);
+ ai_mpls_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_MPLS,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index);
+
+ /*
+ * Add an etry with one path with a real out-going label
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ };
+ fib_test_lb_bucket_t l99_eos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 99,
+ .eos = MPLS_EOS,
+ },
+ };
+ fib_test_lb_bucket_t l99_neos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 99,
+ .eos = MPLS_NON_EOS,
+ },
+ };
+ mpls_label_t *l99 = NULL;
+ vec_add1(l99, 99);
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l99,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.1.1.1/32 created");
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l99_eos_o_10_10_10_1),
+ "1.1.1.1/32 LB 1 bucket via label 99 over 10.10.10.1");
+
+ /*
+ * add a path with an implicit NULL label
+ */
+ fib_test_lb_bucket_t a_o_10_10_11_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_v4_10_10_11_1,
+ },
+ };
+ fib_test_lb_bucket_t a_mpls_o_10_10_11_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_mpls_10_10_11_1,
+ },
+ };
+ mpls_label_t *l_imp_null = NULL;
+ vec_add1(l_imp_null, MPLS_IETF_IMPLICIT_NULL_LABEL);
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l_imp_null,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1");
+
+ /*
+ * assign the route a local label
+ */
+ fib_table_entry_local_label_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ 24001);
+
+ fib_prefix_t pfx_24001_eos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 24001,
+ .fp_eos = MPLS_EOS,
+ };
+ fib_prefix_t pfx_24001_neos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 24001,
+ .fp_eos = MPLS_NON_EOS,
+ };
+
+ /*
+ * The EOS entry should link to both the paths,
+ * and use an ip adj for the imp-null
+ * The NON-EOS entry should link to both the paths,
+ * and use an mpls adj for the imp-null
+ */
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1");
+
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 bucket via: "
+ "label 99 over 10.10.10.1 ",
+ "mpls-adj via 10.10.11.1");
+
+ /*
+ * add an unlabelled path, this is excluded from the neos chains,
+ */
+ fib_test_lb_bucket_t adj_o_10_10_11_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_v4_10_10_11_2,
+ },
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj over 10.10.11.2");
+
+ /*
+ * get and lock a reference to the non-eos of the via entry 1.1.1.1/32
+ */
+ dpo_id_t non_eos_1_1_1_1 = DPO_INVALID;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_1);
+
+ /*
+ * n-eos has only the 2 labelled paths
+ */
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * A labelled recursive
+ */
+ fib_prefix_t pfx_2_2_2_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020202),
+ },
+ };
+ fib_test_lb_bucket_t l1600_eos_o_1_1_1_1 = {
+ .type = FT_LB_LABEL_O_LB,
+ .label_o_lb = {
+ .lb = non_eos_1_1_1_1.dpoi_index,
+ .label = 1600,
+ .eos = MPLS_EOS,
+ },
+ };
+ mpls_label_t *l1600 = NULL;
+ vec_add1(l1600, 1600);
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ l1600,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 over 1.1.1.1");
+
+ dpo_id_t dpo_44 = DPO_INVALID;
+ index_t urpfi;
+
+ fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo_44);
+ urpfi = load_balance_get_urpf(dpo_44.dpoi_index);
+
+ FIB_TEST(fib_urpf_check(urpfi, tm->hw[0]->sw_if_index),
+ "uRPF check for 2.2.2.2/32 on %d OK",
+ tm->hw[0]->sw_if_index);
+ FIB_TEST(fib_urpf_check(urpfi, tm->hw[1]->sw_if_index),
+ "uRPF check for 2.2.2.2/32 on %d OK",
+ tm->hw[1]->sw_if_index);
+ FIB_TEST(!fib_urpf_check(urpfi, 99),
+ "uRPF check for 2.2.2.2/32 on 99 not-OK",
+ 99);
+
+ fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, &dpo_44);
+ FIB_TEST(urpfi == load_balance_get_urpf(dpo_44.dpoi_index),
+ "Shared uRPF on IP and non-EOS chain");
+
+ dpo_reset(&dpo_44);
+
+ /*
+ * we are holding a lock on the non-eos LB of the via-entry.
+ * do a PIC-core failover by shutting the link of the via-entry.
+ *
+ * shut down the link with the valid label
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ 0);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "adj over 10.10.11.1, ",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "adj over 10.10.11.1, ",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 buckets via: "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * test that the pre-failover load-balance has been in-place
+ * modified
+ */
+ dpo_id_t current = DPO_INVALID;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &current);
+
+ FIB_TEST(!dpo_cmp(&non_eos_1_1_1_1,
+ &current),
+ "PIC-core LB inplace modified %U %U",
+ format_dpo_id, &non_eos_1_1_1_1, 0,
+ format_dpo_id, &current, 0);
+
+ dpo_reset(&non_eos_1_1_1_1);
+ dpo_reset(&current);
+
+ /*
+ * no-shut the link with the valid label
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * remove the first path with the valid label
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "adj over 10.10.11.1, "
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "adj over 10.10.11.1, "
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 buckets via: "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * remove the other path with a valid label
+ */
+ fib_test_lb_bucket_t bucket_drop = {
+ .type = FT_LB_SPECIAL,
+ .special = {
+ .adj = DPO_PROTO_IP4,
+ },
+ };
+ fib_test_lb_bucket_t mpls_bucket_drop = {
+ .type = FT_LB_SPECIAL,
+ .special = {
+ .adj = DPO_PROTO_MPLS,
+ },
+ };
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 1 buckets via: "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 1 buckets via: "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &mpls_bucket_drop),
+ "24001/neos LB 1 buckets via: DROP");
+
+ /*
+ * add back the path with the valid label
+ */
+ l99 = NULL;
+ vec_add1(l99, 99);
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l99,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &l99_neos_o_10_10_10_1),
+ "24001/neos LB 1 buckets via: "
+ "label 99 over 10.10.10.1");
+
+ /*
+ * change the local label
+ */
+ fib_table_entry_local_label_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ 25005);
+
+ fib_prefix_t pfx_25005_eos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 25005,
+ .fp_eos = MPLS_EOS,
+ };
+ fib_prefix_t pfx_25005_neos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 25005,
+ .fp_eos = MPLS_NON_EOS,
+ };
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup(fib_index, &pfx_24001_eos)),
+ "24001/eos removed after label change");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup(fib_index, &pfx_24001_neos)),
+ "24001/eos removed after label change");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_25005_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "25005/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_25005_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &l99_neos_o_10_10_10_1),
+ "25005/neos LB 1 buckets via: "
+ "label 99 over 10.10.10.1");
+
+ /*
+ * remove the local label.
+ * the check that the MPLS entries are gone is done by the fact the
+ * MPLS table is no longer present.
+ */
+ fib_table_entry_local_label_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ 25005);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID)),
+ "No more MPLS FIB entries => table removed");
+
+ /*
+ * add another via-entry for the recursive
+ */
+ fib_prefix_t pfx_1_1_1_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010102),
+ },
+ };
+ fib_test_lb_bucket_t l101_eos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 101,
+ .eos = MPLS_EOS,
+ },
+ };
+ mpls_label_t *l101 = NULL;
+ vec_add1(l101, 101);
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l101,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l101_eos_o_10_10_10_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "label 101 over 10.10.10.1");
+
+ dpo_id_t non_eos_1_1_1_2 = DPO_INVALID;
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_1_s_32),
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_1);
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_2_s_32),
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_2);
+
+ fib_test_lb_bucket_t l1601_eos_o_1_1_1_2 = {
+ .type = FT_LB_LABEL_O_LB,
+ .label_o_lb = {
+ .lb = non_eos_1_1_1_2.dpoi_index,
+ .label = 1601,
+ .eos = MPLS_EOS,
+ },
+ };
+ mpls_label_t *l1601 = NULL;
+ vec_add1(l1601, 1601);
+
+ l1600_eos_o_1_1_1_1.label_o_lb.lb = non_eos_1_1_1_1.dpoi_index;
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ l1601,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l1600_eos_o_1_1_1_1,
+ &l1601_eos_o_1_1_1_2),
+ "2.2.2.2/32 LB 2 buckets via: "
+ "label 1600 via 1.1,1.1, "
+ "label 16001 via 1.1.1.2");
+
+ /*
+ * update the via-entry so it no longer has an imp-null path.
+ * the LB for the recursive can use an imp-null
+ */
+ l_imp_null = NULL;
+ vec_add1(l_imp_null, MPLS_IETF_IMPLICIT_NULL_LABEL);
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l_imp_null,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &a_o_10_10_11_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "adj 10.10.11.1");
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l1600_eos_o_1_1_1_1,
+ &l1601_eos_o_1_1_1_2),
+ "2.2.2.2/32 LB 2 buckets via: "
+ "label 1600 via 1.1,1.1, "
+ "label 16001 via 1.1.1.2");
+
+ /*
+ * update the via-entry so it no longer has labelled paths.
+ * the LB for the recursive should exclue this via form its LB
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &a_o_10_10_11_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "adj 10.10.11.1");
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 via 1.1,1.1");
+
+ dpo_reset(&non_eos_1_1_1_1);
+ dpo_reset(&non_eos_1_1_1_2);
+
+ /*
+ * Add a recursive with no out-labels. We expect to use the IP of the via
+ */
+ fib_prefix_t pfx_2_2_2_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020203),
+ },
+ };
+ dpo_id_t ip_1_1_1_1 = DPO_INVALID;
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_1_s_32),
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1_1_1_1);
+
+ fib_test_lb_bucket_t ip_o_1_1_1_1 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1_1_1_1.dpoi_index,
+ },
+ };
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "2.2.2.2.3/32 LB 1 buckets via: "
+ "ip 1.1.1.1");
+
+ /*
+ * Add a recursive with an imp-null out-label.
+ * We expect to use the IP of the via
+ */
+ fib_prefix_t pfx_2_2_2_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020204),
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "2.2.2.2.4/32 LB 1 buckets via: "
+ "ip 1.1.1.1");
+
+ dpo_reset(&ip_1_1_1_1);
+
+ /*
+ * Create an entry with a deep label stack
+ */
+ fib_prefix_t pfx_2_2_5_5_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020505),
+ },
+ };
+ fib_test_lb_bucket_t ls_eos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_STACK_O_ADJ,
+ .label_stack_o_adj = {
+ .adj = ai_mpls_10_10_11_1,
+ .label_stack_size = 8,
+ .label_stack = {
+ 200, 201, 202, 203, 204, 205, 206, 207
+ },
+ .eos = MPLS_EOS,
+ },
+ };
+ mpls_label_t *label_stack = NULL;
+ vec_validate(label_stack, 7);
+ for (ii = 0; ii < 8; ii++)
+ {
+ label_stack[ii] = ii + 200;
+ }
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ label_stack,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ls_eos_o_10_10_10_1),
+ "2.2.5.5/32 LB 1 buckets via: "
+ "adj 10.10.11.1");
+ fib_table_entry_delete_index(fei, FIB_SOURCE_API);
+
+ /*
+ * cleanup
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 via 1.1,1.1");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &bucket_drop),
+ "2.2.2.2/32 LB 1 buckets via: DROP");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_3_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API);
+
+ adj_unlock(ai_mpls_10_10_10_1);
+ adj_unlock(ai_mpls_10_10_11_2);
+ adj_unlock(ai_v4_10_10_11_1);
+ adj_unlock(ai_v4_10_10_11_2);
+ adj_unlock(ai_mpls_10_10_11_1);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ local0_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local0_pfx,
+ FIB_SOURCE_INTERFACE);
+ local0_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local0_pfx,
+ FIB_SOURCE_INTERFACE);
+ local1_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local1_pfx,
+ FIB_SOURCE_INTERFACE);
+ local1_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local1_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ /*
+ * +1 for the drop LB in the MPLS tables.
+ */
+ FIB_TEST(lb_count+1 == pool_elts(load_balance_pool),
+ "Load-balance resources freed %d of %d",
+ lb_count+1, pool_elts(load_balance_pool));
+
+ return (0);
+}
+
+#define N_TEST_CHILDREN 4
+#define PARENT_INDEX 0
+
+typedef struct fib_node_test_t_
+{
+ fib_node_t node;
+ u32 sibling;
+ u32 index;
+ fib_node_back_walk_ctx_t *ctxs;
+ u32 destroyed;
+} fib_node_test_t;
+
+static fib_node_test_t fib_test_nodes[N_TEST_CHILDREN+1];
+
+#define PARENT() (&fib_test_nodes[PARENT_INDEX].node)
+
+#define FOR_EACH_TEST_CHILD(_tc) \
+ for (ii = 1, (_tc) = &fib_test_nodes[1]; \
+ ii < N_TEST_CHILDREN+1; \
+ ii++, (_tc) = &fib_test_nodes[ii])
+
+static fib_node_t *
+fib_test_child_get_node (fib_node_index_t index)
+{
+ return (&fib_test_nodes[index].node);
+}
+
+static int fib_test_walk_spawns_walks;
+
+static fib_node_back_walk_rc_t
+fib_test_child_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_test_t *tc = (fib_node_test_t*) node;
+
+ vec_add1(tc->ctxs, *ctx);
+
+ if (1 == fib_test_walk_spawns_walks)
+ fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx);
+ if (2 == fib_test_walk_spawns_walks)
+ fib_walk_async(FIB_NODE_TYPE_TEST, tc->index,
+ FIB_WALK_PRIORITY_HIGH, ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+fib_test_child_last_lock_gone (fib_node_t *node)
+{
+ fib_node_test_t *tc = (fib_node_test_t *)node;
+
+ tc->destroyed = 1;
+}
+
+/**
+ * The FIB walk's graph node virtual function table
+ */
+static const fib_node_vft_t fib_test_child_vft = {
+ .fnv_get = fib_test_child_get_node,
+ .fnv_last_lock = fib_test_child_last_lock_gone,
+ .fnv_back_walk = fib_test_child_back_walk_notify,
+};
+
+/*
+ * the function (that should have been static but isn't so I can do this)
+ * that processes the walk from the async queue,
+ */
+f64 fib_walk_process_queues(vlib_main_t * vm,
+ const f64 quota);
+u32 fib_walk_queue_get_size(fib_walk_priority_t prio);
+
+static int
+fib_test_walk (void)
+{
+ fib_node_back_walk_ctx_t high_ctx = {}, low_ctx = {};
+ fib_node_test_t *tc;
+ vlib_main_t *vm;
+ u32 ii;
+
+ vm = vlib_get_main();
+ fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft);
+
+ /*
+ * init a fake node on which we will add children
+ */
+ fib_node_init(&fib_test_nodes[PARENT_INDEX].node,
+ FIB_NODE_TYPE_TEST);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ fib_node_init(&tc->node, FIB_NODE_TYPE_TEST);
+ fib_node_lock(&tc->node);
+ tc->ctxs = NULL;
+ tc->index = ii;
+ tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST,
+ PARENT_INDEX,
+ FIB_NODE_TYPE_TEST, ii);
+ }
+
+ /*
+ * enqueue a walk across the parents children.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children pre-walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * give the walk a large amount of time so it gets to the end
+ */
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * walk again. should be no increase in the number of visits, since
+ * the walk will have terminated.
+ */
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times",
+ ii, vec_len(tc->ctxs));
+ }
+
+ /*
+ * schedule a low and hig priority walk. expect the high to be performed
+ * before the low.
+ * schedule the high prio walk first so that it is further from the head
+ * of the dependency list. that way it won't merge with the low one.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_LOW, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason,
+ "%d child visitsed by high prio walk", ii);
+ FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason,
+ "%d child visitsed by low prio walk", ii);
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post prio walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post prio walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule 2 walks of the same priority that can be megred.
+ * expect that each child is thus visited only once.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times during merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule 2 walks of the same priority that cannot be megred.
+ * expect that each child is thus visited twice and in the order
+ * in which the walks were scheduled.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason,
+ "%d child visitsed by high prio walk", ii);
+ FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason,
+ "%d child visitsed by low prio walk", ii);
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post no-merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post no-merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule a walk that makes one one child progress.
+ * we do this by giving the queue draining process zero
+ * time quanta. it's a do..while loop, so it does something.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_process_queues(vm, 0);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii == N_TEST_CHILDREN)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in zero quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 0 quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ }
+ FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post zero quanta walk");
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post zero qunta walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * another one step
+ */
+ fib_walk_process_queues(vm, 0);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd zero quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd 0 quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ }
+ FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post zero quanta walk");
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post zero qunta walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule another walk that will catch-up and merge.
+ */
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd zero quanta merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ else
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd 0 quanta merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post 2nd zero quanta merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post 2nd zero qunta merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * park a async walk in the middle of the list, then have an sync walk catch
+ * it. same expectations as async catches async.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 0);
+ fib_walk_process_queues(vm, 0);
+
+ fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in sync catches async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ else
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in sync catches async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post 2nd zero quanta merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post 2nd zero qunta merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * make the parent a child of one of its children, thus inducing a routing loop.
+ */
+ fib_test_nodes[PARENT_INDEX].sibling =
+ fib_node_child_add(FIB_NODE_TYPE_TEST,
+ 1, // the first child
+ FIB_NODE_TYPE_TEST,
+ PARENT_INDEX);
+
+ /*
+ * execute a sync walk from the parent. each child visited spawns more sync
+ * walks. we expect the walk to terminate.
+ */
+ fib_test_walk_spawns_walks = 1;
+
+ fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * child 1 - which is last in the list - has the loop.
+ * the other children a re thus visitsed first. the we meet
+ * child 1. we go round the loop again, visting the other children.
+ * then we meet the walk in the dep list and bail. child 1 is not visitsed
+ * again.
+ */
+ if (1 == ii)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped sync walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped sync walk",
+ ii, vec_len(tc->ctxs));
+ }
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post sync loop walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * the walk doesn't reach the max depth because the infra knows that sync
+ * meets sync implies a loop and bails early.
+ */
+ FIB_TEST(high_ctx.fnbw_depth == 9,
+ "Walk context depth %d post sync loop walk",
+ high_ctx.fnbw_depth);
+
+ /*
+ * execute an async walk of the graph loop, with each child spawns sync walks
+ */
+ high_ctx.fnbw_depth = 0;
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * we don't really care how many times the children are visisted, as long as
+ * it is more than once.
+ */
+ FIB_TEST(1 <= vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped aync spawns sync walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+
+ /*
+ * execute an async walk of the graph loop, with each child spawns async walks
+ */
+ fib_test_walk_spawns_walks = 2;
+ high_ctx.fnbw_depth = 0;
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * we don't really care how many times the children are visisted, as long as
+ * it is more than once.
+ */
+ FIB_TEST(1 <= vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped async spawns async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+
+
+ fib_node_child_remove(FIB_NODE_TYPE_TEST,
+ 1, // the first child
+ fib_test_nodes[PARENT_INDEX].sibling);
+
+ /*
+ * cleanup
+ */
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ tc->sibling);
+ fib_node_deinit(&tc->node);
+ fib_node_unlock(&tc->node);
+ }
+ fib_node_deinit(PARENT());
+
+ /*
+ * The parent will be destroyed when the last lock on it goes.
+ * this test ensures all the walk objects are unlocking it.
+ */
+ FIB_TEST((1 == fib_test_nodes[PARENT_INDEX].destroyed),
+ "Parent was destroyed");
+
+ return (0);
+}
+
+/*
+ * declaration of the otherwise static callback functions
+ */
+void fib_bfd_notify (bfd_listen_event_e event,
+ const bfd_session_t *session);
+void adj_bfd_notify (bfd_listen_event_e event,
+ const bfd_session_t *session);
+
+/**
+ * Test BFD session interaction with FIB
+ */
+static int
+fib_test_bfd (void)
+{
+ fib_node_index_t fei;
+ test_main_t *tm;
+ int n_feis;
+
+ /* via 10.10.10.1 */
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ };
+ /* via 10.10.10.2 */
+ ip46_address_t nh_10_10_10_2 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ };
+ /* via 10.10.10.10 */
+ ip46_address_t nh_10_10_10_10 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ };
+ n_feis = fib_entry_pool_size();
+
+ tm = &test_main;
+
+ /*
+ * add interface routes. we'll assume this works. it's tested elsewhere
+ */
+ fib_prefix_t pfx_10_10_10_10_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = nh_10_10_10_10,
+ };
+
+ fib_table_entry_update_one_path(0, &pfx_10_10_10_10_s_24,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_prefix_t pfx_10_10_10_10_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = nh_10_10_10_10,
+ };
+ fib_table_entry_update_one_path(0, &pfx_10_10_10_10_s_32,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * A BFD session via a neighbour we do not yet know
+ */
+ bfd_session_t bfd_10_10_10_1 = {
+ .udp = {
+ .key = {
+ .fib_index = 0,
+ .peer_addr = nh_10_10_10_1,
+ },
+ },
+ .hop_type = BFD_HOP_TYPE_MULTI,
+ .local_state = BFD_STATE_init,
+ };
+
+ fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
+ /*
+ * A new entry will be created that forwards via the adj
+ */
+ adj_index_t ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ fib_prefix_t pfx_10_10_10_1_s_32 = {
+ .fp_addr = nh_10_10_10_1,
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ fib_test_lb_bucket_t adj_o_10_10_10_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_10_10_10_1,
+ },
+ };
+
+ fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &adj_o_10_10_10_1),
+ "BFD sourced %U via %U",
+ format_fib_prefix, &pfx_10_10_10_1_s_32,
+ format_ip_adjacency, ai_10_10_10_1, FORMAT_IP_ADJACENCY_NONE);
+
+ /*
+ * Delete the BFD session. Expect the fib_entry to be removed
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1);
+
+ fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32);
+ FIB_TEST(FIB_NODE_INDEX_INVALID == fei,
+ "BFD sourced %U removed",
+ format_fib_prefix, &pfx_10_10_10_1_s_32);
+
+ /*
+ * Add the BFD source back
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
+ /*
+ * source the entry via the ADJ fib
+ */
+ fei = fib_table_entry_path_add(0,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * Delete the BFD session. Expect the fib_entry to remain
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1);
+
+ fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &adj_o_10_10_10_1),
+ "BFD sourced %U remains via %U",
+ format_fib_prefix, &pfx_10_10_10_1_s_32,
+ format_ip_adjacency, ai_10_10_10_1, FORMAT_IP_ADJACENCY_NONE);
+
+ /*
+ * Add the BFD source back
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
+ /*
+ * Create another ADJ FIB
+ */
+ fib_prefix_t pfx_10_10_10_2_s_32 = {
+ .fp_addr = nh_10_10_10_2,
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ fib_table_entry_path_add(0,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ /*
+ * A BFD session for the new ADJ FIB
+ */
+ bfd_session_t bfd_10_10_10_2 = {
+ .udp = {
+ .key = {
+ .fib_index = 0,
+ .peer_addr = nh_10_10_10_2,
+ },
+ },
+ .hop_type = BFD_HOP_TYPE_MULTI,
+ .local_state = BFD_STATE_init,
+ };
+
+ fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_2);
+
+ /*
+ * remove the adj-fib source whilst the session is present
+ * then add it back
+ */
+ fib_table_entry_delete(0, &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ);
+ fib_table_entry_path_add(0,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * Before adding a recursive via the BFD tracked ADJ-FIBs,
+ * bring one of the sessions UP, leave the other down
+ */
+ bfd_10_10_10_1.local_state = BFD_STATE_up;
+ fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1);
+ bfd_10_10_10_2.local_state = BFD_STATE_down;
+ fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2);
+
+ /*
+ * A recursive prefix via both of the ADJ FIBs
+ */
+ fib_prefix_t pfx_200_0_0_0_s_24 = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xc8000000),
+ },
+ };
+ const dpo_id_t *dpo_10_10_10_1, *dpo_10_10_10_2;
+
+ dpo_10_10_10_1 =
+ fib_entry_contribute_ip_forwarding(
+ fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32));
+ dpo_10_10_10_2 =
+ fib_entry_contribute_ip_forwarding(
+ fib_table_lookup_exact_match(0, &pfx_10_10_10_2_s_32));
+
+ fib_test_lb_bucket_t lb_o_10_10_10_1 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = dpo_10_10_10_1->dpoi_index,
+ },
+ };
+ fib_test_lb_bucket_t lb_o_10_10_10_2 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = dpo_10_10_10_2->dpoi_index,
+ },
+ };
+
+ /*
+ * A prefix via the adj-fib that is BFD down => DROP
+ */
+ fei = fib_table_entry_path_add(0,
+ &pfx_200_0_0_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ ~0, // recursive
+ 0, // default fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "%U resolves via drop",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * add a path via the UP BFD adj-fib.
+ * we expect that the DOWN BFD ADJ FIB is not used.
+ */
+ fei = fib_table_entry_path_add(0,
+ &pfx_200_0_0_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ ~0, // recursive
+ 0, // default fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &lb_o_10_10_10_1),
+ "Recursive %U only UP BFD adj-fibs",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * Send a BFD state change to UP - both sessions are now up
+ * the recursive prefix should LB over both
+ */
+ bfd_10_10_10_2.local_state = BFD_STATE_up;
+ fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2);
+
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &lb_o_10_10_10_1,
+ &lb_o_10_10_10_2),
+ "Recursive %U via both UP BFD adj-fibs",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * Send a BFD state change to DOWN
+ * the recursive prefix should exclude the down
+ */
+ bfd_10_10_10_2.local_state = BFD_STATE_down;
+ fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2);
+
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &lb_o_10_10_10_1),
+ "Recursive %U via only UP",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * Delete the BFD session while it is in the DOWN state.
+ * FIB should consider the entry's state as back up
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_2);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &lb_o_10_10_10_1,
+ &lb_o_10_10_10_2),
+ "Recursive %U via both UP BFD adj-fibs post down session delete",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * Delete the BFD other session while it is in the UP state.
+ */
+ fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &lb_o_10_10_10_1,
+ &lb_o_10_10_10_2),
+ "Recursive %U via both UP BFD adj-fibs post up session delete",
+ format_fib_prefix, &pfx_200_0_0_0_s_24);
+
+ /*
+ * cleaup
+ */
+ fib_table_entry_delete(0, &pfx_200_0_0_0_s_24, FIB_SOURCE_API);
+ fib_table_entry_delete(0, &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ);
+ fib_table_entry_delete(0, &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ);
+
+ fib_table_entry_delete(0, &pfx_10_10_10_10_s_32, FIB_SOURCE_INTERFACE);
+ fib_table_entry_delete(0, &pfx_10_10_10_10_s_24, FIB_SOURCE_INTERFACE);
+
+ adj_unlock(ai_10_10_10_1);
+ /*
+ * test no-one left behind
+ */
+ FIB_TEST((n_feis == fib_entry_pool_size()), "Entries gone");
+ FIB_TEST(0 == adj_nbr_db_size(), "All adjacencies removed");
+
+ /*
+ * Single-hop BFD tests
+ */
+ bfd_10_10_10_1.hop_type = BFD_HOP_TYPE_SINGLE;
+ bfd_10_10_10_1.udp.key.sw_if_index = tm->hw[0]->sw_if_index;
+
+ adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
+ ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ /*
+ * whilst the BFD session is not signalled, the adj is up
+ */
+ FIB_TEST(adj_is_up(ai_10_10_10_1), "Adj state up on uninit session");
+
+ /*
+ * bring the BFD session up
+ */
+ bfd_10_10_10_1.local_state = BFD_STATE_up;
+ adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1);
+ FIB_TEST(adj_is_up(ai_10_10_10_1), "Adj state up on UP session");
+
+ /*
+ * bring the BFD session down
+ */
+ bfd_10_10_10_1.local_state = BFD_STATE_down;
+ adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1);
+ FIB_TEST(!adj_is_up(ai_10_10_10_1), "Adj state down on DOWN session");
+
+
+ /*
+ * add an attached next hop FIB entry via the down adj
+ */
+ fib_prefix_t pfx_5_5_5_5_s_32 = {
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x05050505),
+ },
+ },
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+
+ fei = fib_table_entry_path_add(0,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "%U resolves via drop",
+ format_fib_prefix, &pfx_5_5_5_5_s_32);
+
+ /*
+ * Add a path via an ADJ that is up
+ */
+ adj_index_t ai_10_10_10_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index);
+
+ fib_test_lb_bucket_t adj_o_10_10_10_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_10_10_10_2,
+ },
+ };
+ adj_o_10_10_10_1.adj.adj = ai_10_10_10_1;
+
+ fei = fib_table_entry_path_add(0,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &adj_o_10_10_10_2),
+ "BFD sourced %U via %U",
+ format_fib_prefix, &pfx_5_5_5_5_s_32,
+ format_ip_adjacency, ai_10_10_10_2, FORMAT_IP_ADJACENCY_NONE);
+
+ /*
+ * Bring up the down session - should now LB
+ */
+ bfd_10_10_10_1.local_state = BFD_STATE_up;
+ adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &adj_o_10_10_10_1,
+ &adj_o_10_10_10_2),
+ "BFD sourced %U via noth adjs",
+ format_fib_prefix, &pfx_5_5_5_5_s_32);
+
+ /*
+ * remove the BFD session state from the adj
+ */
+ adj_bfd_notify(BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1);
+
+ /*
+ * clean-up
+ */
+ fib_table_entry_delete(0, &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI);
+ adj_unlock(ai_10_10_10_1);
+ adj_unlock(ai_10_10_10_2);
+
+ /*
+ * test no-one left behind
+ */
+ FIB_TEST((n_feis == fib_entry_pool_size()), "Entries gone");
+ FIB_TEST(0 == adj_nbr_db_size(), "All adjacencies removed");
+ return (0);
+}
+
+static int
+lfib_test (void)
+{
+ const mpls_label_t deag_label = 50;
+ const u32 lfib_index = 0;
+ const u32 fib_index = 0;
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *dpo1;
+ fib_node_index_t lfe;
+ lookup_dpo_t *lkd;
+ test_main_t *tm;
+ int lb_count;
+ adj_index_t ai_mpls_10_10_10_1;
+
+ tm = &test_main;
+ lb_count = pool_elts(load_balance_pool);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * MPLS enable an interface so we get the MPLS table created
+ */
+ mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 1, 1);
+
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ };
+ ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_MPLS,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+
+ /*
+ * Test the specials stack properly.
+ */
+ fib_prefix_t exp_null_v6_pfx = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = MPLS_EOS,
+ .fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ .fp_payload_proto = DPO_PROTO_IP6,
+ };
+ lfe = fib_table_lookup(lfib_index, &exp_null_v6_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != lfe),
+ "%U/%U present",
+ format_mpls_unicast_label, MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ format_mpls_eos_bit, MPLS_EOS);
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table),
+ "%U/%U is lookup in interface's table",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((DPO_PROTO_IP6 == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+
+ /*
+ * A route deag route for EOS
+ */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = MPLS_EOS,
+ .fp_label = deag_label,
+ .fp_payload_proto = DPO_PROTO_IP4,
+ };
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &zero_addr,
+ ~0,
+ fib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)),
+ "%U/%U present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((DPO_PROTO_IP4 == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+ fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index,
+ &pfx)),
+ "%U/%U not present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+ /*
+ * A route deag route for non-EOS
+ */
+ pfx.fp_eos = MPLS_NON_EOS;
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &zero_addr,
+ ~0,
+ lfib_index,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)),
+ "%U/%U present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS);
+
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS);
+
+ FIB_TEST((DPO_PROTO_MPLS == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+ fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index,
+ &pfx)),
+ "%U/%U not present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+ dpo_reset(&dpo);
+
+ /*
+ * An MPLS x-connect
+ */
+ fib_prefix_t pfx_1200 = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 1200,
+ .fp_eos = MPLS_NON_EOS,
+ };
+ fib_test_lb_bucket_t neos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_STACK_O_ADJ,
+ .label_stack_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label_stack_size = 4,
+ .label_stack = {
+ 200, 300, 400, 500,
+ },
+ .eos = MPLS_NON_EOS,
+ },
+ };
+ dpo_id_t neos_1200 = DPO_INVALID;
+ dpo_id_t ip_1200 = DPO_INVALID;
+ mpls_label_t *l200 = NULL;
+ vec_add1(l200, 200);
+ vec_add1(l200, 300);
+ vec_add1(l200, 400);
+ vec_add1(l200, 500);
+
+ lfe = fib_table_entry_update_one_path(fib_index,
+ &pfx_1200,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l200,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &neos_o_10_10_10_1),
+ "1200/0 LB 1 buckets via: "
+ "adj 10.10.11.1");
+
+ /*
+ * A recursive route via the MPLS x-connect
+ */
+ fib_prefix_t pfx_2_2_2_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020203),
+ },
+ };
+ fib_route_path_t *rpaths = NULL, rpath = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_local_label = 1200,
+ .frp_eos = MPLS_NON_EOS,
+ .frp_sw_if_index = ~0, // recurive
+ .frp_fib_index = 0, // Default MPLS fib
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = NULL,
+ };
+ vec_add1(rpaths, rpath);
+
+ fib_table_entry_path_add2(fib_index,
+ &pfx_2_2_2_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ /*
+ * A labelled recursive route via the MPLS x-connect
+ */
+ fib_prefix_t pfx_2_2_2_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020204),
+ },
+ };
+ mpls_label_t *l999 = NULL;
+ vec_add1(l999, 999);
+ rpaths[0].frp_label_stack = l999,
+
+ fib_table_entry_path_add2(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200),
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1200);
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200),
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &neos_1200);
+
+ fib_test_lb_bucket_t ip_o_1200 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1200.dpoi_index,
+ },
+ };
+ fib_test_lb_bucket_t mpls_o_1200 = {
+ .type = FT_LB_LABEL_O_LB,
+ .label_o_lb = {
+ .lb = neos_1200.dpoi_index,
+ .label = 999,
+ .eos = MPLS_EOS,
+ },
+ };
+
+ lfe = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1200),
+ "2.2.2.2.3/32 LB 1 buckets via: label 1200 EOS");
+ lfe = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &mpls_o_1200),
+ "2.2.2.2.4/32 LB 1 buckets via: label 1200 non-EOS");
+
+ fib_table_entry_delete(fib_index, &pfx_1200, FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index, &pfx_2_2_2_3_s_32, FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API);
+
+ dpo_reset(&neos_1200);
+ dpo_reset(&ip_1200);
+
+ /*
+ * A recursive via a label that does not exist
+ */
+ fib_test_lb_bucket_t bucket_drop = {
+ .type = FT_LB_SPECIAL,
+ .special = {
+ .adj = DPO_PROTO_IP4,
+ },
+ };
+ fib_test_lb_bucket_t mpls_bucket_drop = {
+ .type = FT_LB_SPECIAL,
+ .special = {
+ .adj = DPO_PROTO_MPLS,
+ },
+ };
+
+ rpaths[0].frp_label_stack = NULL;
+ lfe = fib_table_entry_path_add2(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200),
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1200);
+ ip_o_1200.lb.lb = ip_1200.dpoi_index;
+
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &bucket_drop),
+ "2.2.2.2.4/32 LB 1 buckets via: drop");
+ lfe = fib_table_lookup(fib_index, &pfx_1200);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &bucket_drop),
+ "1200/neos LB 1 buckets via: ip4-DROP");
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &mpls_bucket_drop),
+ "1200/neos LB 1 buckets via: mpls-DROP");
+
+ fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API);
+
+ dpo_reset(&ip_1200);
+
+ /*
+ * An rx-interface route.
+ * like the tail of an mcast LSP
+ */
+ dpo_id_t idpo = DPO_INVALID;
+
+ interface_rx_dpo_add_or_lock(DPO_PROTO_IP4,
+ tm->hw[0]->sw_if_index,
+ &idpo);
+
+ fib_prefix_t pfx_2500 = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 2500,
+ .fp_eos = MPLS_EOS,
+ .fp_payload_proto = DPO_PROTO_IP4,
+ };
+ fib_test_lb_bucket_t rx_intf_0 = {
+ .type = FT_LB_INTF,
+ .adj = {
+ .adj = idpo.dpoi_index,
+ },
+ };
+
+ lfe = fib_table_entry_update_one_path(fib_index,
+ &pfx_2500,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 0,
+ NULL,
+ FIB_ROUTE_PATH_INTF_RX);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 1,
+ &rx_intf_0),
+ "2500 rx-interface 0");
+ fib_table_entry_delete(fib_index, &pfx_2500, FIB_SOURCE_API);
+
+ /*
+ * An MPLS mulicast entry
+ */
+ fib_prefix_t pfx_3500 = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 3500,
+ .fp_eos = MPLS_EOS,
+ .fp_payload_proto = DPO_PROTO_IP4,
+ };
+ fib_test_rep_bucket_t mc_0 = {
+ .type = FT_REP_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 3300,
+ .eos = MPLS_EOS,
+ },
+ };
+ fib_test_rep_bucket_t mc_intf_0 = {
+ .type = FT_REP_INTF,
+ .adj = {
+ .adj = idpo.dpoi_index,
+ },
+ };
+ mpls_label_t *l3300 = NULL;
+ vec_add1(l3300, 3300);
+
+ lfe = fib_table_entry_update_one_path(lfib_index,
+ &pfx_3500,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_MULTICAST,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l3300,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 1,
+ &mc_0),
+ "3500 via replicate over 10.10.10.1");
+
+ /*
+ * MPLS Bud-node. Add a replication via an interface-receieve path
+ */
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx_3500,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_MULTICAST,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 0,
+ NULL,
+ FIB_ROUTE_PATH_INTF_RX);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &mc_0,
+ &mc_intf_0),
+ "3500 via replicate over 10.10.10.1 and interface-rx");
+
+ /*
+ * Add a replication via an interface-free for-us path
+ */
+ fib_test_rep_bucket_t mc_disp = {
+ .type = FT_REP_DISP_MFIB_LOOKUP,
+ .adj = {
+ .adj = idpo.dpoi_index,
+ },
+ };
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx_3500,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_MULTICAST,
+ DPO_PROTO_IP4,
+ NULL,
+ 5, // rpf-id
+ 0, // default table
+ 0,
+ NULL,
+ FIB_ROUTE_PATH_RPF_ID);
+ FIB_TEST(fib_test_validate_entry(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 3,
+ &mc_0,
+ &mc_disp,
+ &mc_intf_0),
+ "3500 via replicate over 10.10.10.1 and interface-rx");
+
+
+
+ fib_table_entry_delete(fib_index, &pfx_3500, FIB_SOURCE_API);
+ dpo_reset(&idpo);
+
+ /*
+ * cleanup
+ */
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 0, 1);
+ mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
+
+ FIB_TEST(lb_count == pool_elts(load_balance_pool),
+ "Load-balance resources freed %d of %d",
+ lb_count, pool_elts(load_balance_pool));
+ FIB_TEST(0 == pool_elts(interface_rx_dpo_pool),
+ "interface_rx_dpo resources freed %d of %d",
+ 0, pool_elts(interface_rx_dpo_pool));
+
+ return (0);
+}
+
+static clib_error_t *
+fib_test (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ int res;
+
+ res = 0;
+ fib_test_mk_intf(4);
+
+ if (unformat (input, "debug"))
+ {
+ fib_test_do_debug = 1;
+ }
+
+ if (unformat (input, "ip"))
+ {
+ res += fib_test_v4();
+ res += fib_test_v6();
+ }
+ else if (unformat (input, "label"))
+ {
+ res += fib_test_label();
+ }
+ else if (unformat (input, "ae"))
+ {
+ res += fib_test_ae();
+ }
+ else if (unformat (input, "pref"))
+ {
+ res += fib_test_pref();
+ }
+ else if (unformat (input, "lfib"))
+ {
+ res += lfib_test();
+ }
+ else if (unformat (input, "walk"))
+ {
+ res += fib_test_walk();
+ }
+ else if (unformat (input, "bfd"))
+ {
+ res += fib_test_bfd();
+ }
+ else
+ {
+ res += fib_test_v4();
+ res += fib_test_v6();
+ res += fib_test_ae();
+ res += fib_test_bfd();
+ res += fib_test_pref();
+ res += fib_test_label();
+ res += lfib_test();
+
+ /*
+ * fib-walk process must be disabled in order for the walk tests to work
+ */
+ fib_walk_process_disable();
+ res += fib_test_walk();
+ fib_walk_process_enable();
+ }
+
+ if (res)
+ {
+ return clib_error_return(0, "FIB Unit Test Failed");
+ }
+ else
+ {
+ return (NULL);
+ }
+}
+
+VLIB_CLI_COMMAND (test_fib_command, static) = {
+ .path = "test fib",
+ .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+ .function = fib_test,
+};
+
+clib_error_t *
+fib_test_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (fib_test_init);
diff --git a/src/vnet/fib/fib_test.h b/src/vnet/fib/fib_test.h
new file mode 100644
index 00000000..b98680bf
--- /dev/null
+++ b/src/vnet/fib/fib_test.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TEST_H__
+#define __FIB_TEST_H__
+
+#include <vnet/fib/fib_types.h>
+
+typedef enum fib_test_lb_bucket_type_t_ {
+ FT_LB_LABEL_O_ADJ,
+ FT_LB_LABEL_STACK_O_ADJ,
+ FT_LB_LABEL_O_LB,
+ FT_LB_O_LB,
+ FT_LB_SPECIAL,
+ FT_LB_ADJ,
+ FT_LB_INTF,
+} fib_test_lb_bucket_type_t;
+
+typedef struct fib_test_lb_bucket_t_ {
+ fib_test_lb_bucket_type_t type;
+
+ union
+ {
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ adj_index_t adj;
+ } label_o_adj;
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label_stack[8];
+ u8 label_stack_size;
+ u8 ttl;
+ adj_index_t adj;
+ } label_stack_o_adj;
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ index_t lb;
+ } label_o_lb;
+ struct
+ {
+ index_t adj;
+ } adj;
+ struct
+ {
+ index_t lb;
+ } lb;
+ struct
+ {
+ index_t adj;
+ } special;
+ };
+} fib_test_lb_bucket_t;
+
+typedef enum fib_test_rep_bucket_type_t_ {
+ FT_REP_LABEL_O_ADJ,
+ FT_REP_INTF,
+} fib_test_rep_bucket_type_t;
+
+typedef struct fib_test_rep_bucket_t_ {
+ fib_test_rep_bucket_type_t type;
+
+ union
+ {
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ adj_index_t adj;
+ } label_o_adj;
+ struct
+ {
+ adj_index_t adj;
+ } adj;
+ };
+} fib_test_rep_bucket_t;
+
+
+extern int fib_test_validate_rep_v(const replicate_t *rep,
+ u16 n_buckets,
+ va_list ap);
+
+extern int fib_test_validate_lb_v(const load_balance_t *lb,
+ u16 n_buckets,
+ va_list ap);
+
+extern int fib_test_validate_entry(fib_node_index_t fei,
+ fib_forward_chain_type_t fct,
+ u16 n_buckets,
+ ...);
+
+#endif
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
new file mode 100644
index 00000000..8165f3eb
--- /dev/null
+++ b/src/vnet/fib/fib_types.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * arrays of protocol and link names
+ */
+static const char* fib_protocol_names[] = FIB_PROTOCOLS;
+static const char* vnet_link_names[] = VNET_LINKS;
+static const char* fib_forw_chain_names[] = FIB_FORW_CHAINS;
+
+u8 *
+format_fib_protocol (u8 * s, va_list ap)
+{
+ fib_protocol_t proto = va_arg(ap, int); // fib_protocol_t promotion
+
+ return (format (s, "%s", fib_protocol_names[proto]));
+}
+
+u8 *
+format_vnet_link (u8 * s, va_list ap)
+{
+ vnet_link_t link = va_arg(ap, int); // vnet_link_t promotion
+
+ return (format (s, "%s", vnet_link_names[link]));
+}
+
+u8 *
+format_fib_forw_chain_type (u8 * s, va_list * args)
+{
+ fib_forward_chain_type_t fct = va_arg(*args, int);
+
+ return (format (s, "%s", fib_forw_chain_names[fct]));
+}
+
+void
+fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+ fib_prefix_t *pfx)
+{
+ ASSERT(!ip46_address_is_zero(addr));
+
+ pfx->fp_proto = ((ip46_address_is_ip4(addr) ?
+ FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6));
+ pfx->fp_len = ((ip46_address_is_ip4(addr) ?
+ 32 : 128));
+ pfx->fp_addr = *addr;
+}
+
+void
+fib_prefix_from_mpls_label (mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_prefix_t *pfx)
+{
+ pfx->fp_proto = FIB_PROTOCOL_MPLS;
+ pfx->fp_len = 21;
+ pfx->fp_label = label;
+ pfx->fp_eos = eos;
+}
+
+int
+fib_prefix_cmp (const fib_prefix_t *p1,
+ const fib_prefix_t *p2)
+{
+ int res;
+
+ res = (p1->fp_proto - p2->fp_proto);
+
+ if (0 == res)
+ {
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ case FIB_PROTOCOL_IP6:
+ res = (p1->fp_len - p2->fp_len);
+
+ if (0 == res)
+ {
+ res = ip46_address_cmp(&p1->fp_addr, &p2->fp_addr);
+ }
+ break;
+ case FIB_PROTOCOL_MPLS:
+ res = (p1->fp_label - p2->fp_label);
+
+ if (0 == res)
+ {
+ res = (p1->fp_eos - p2->fp_eos);
+ }
+ break;
+ }
+ }
+
+ return (res);
+}
+
+int
+fib_prefix_is_cover (const fib_prefix_t *p1,
+ const fib_prefix_t *p2)
+{
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_destination_matches_route(&ip4_main,
+ &p1->fp_addr.ip4,
+ &p2->fp_addr.ip4,
+ p1->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_destination_matches_route(&ip6_main,
+ &p1->fp_addr.ip6,
+ &p2->fp_addr.ip6,
+ p1->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (0);
+}
+
+int
+fib_prefix_is_host (const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (prefix->fp_len == 32);
+ case FIB_PROTOCOL_IP6:
+ return (prefix->fp_len == 128);
+ case FIB_PROTOCOL_MPLS:
+ return (!0);
+ }
+ return (0);
+}
+
+u8 *
+format_fib_prefix (u8 * s, va_list * args)
+{
+ fib_prefix_t *fp = va_arg (*args, fib_prefix_t *);
+
+ /*
+ * protocol specific so it prints ::/0 correctly.
+ */
+ switch (fp->fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ {
+ ip6_address_t p6 = fp->fp_addr.ip6;
+
+ ip6_address_mask(&p6, &(ip6_main.fib_masks[fp->fp_len]));
+ s = format (s, "%U", format_ip6_address, &p6);
+ break;
+ }
+ case FIB_PROTOCOL_IP4:
+ {
+ ip4_address_t p4 = fp->fp_addr.ip4;
+ p4.as_u32 &= ip4_main.fib_masks[fp->fp_len];
+
+ s = format (s, "%U", format_ip4_address, &p4);
+ break;
+ }
+ case FIB_PROTOCOL_MPLS:
+ s = format (s, "%U:%U",
+ format_mpls_unicast_label, fp->fp_label,
+ format_mpls_eos_bit, fp->fp_eos);
+ break;
+ }
+ s = format (s, "/%d", fp->fp_len);
+
+ return (s);
+}
+
+int
+fib_route_path_cmp (const fib_route_path_t *rpath1,
+ const fib_route_path_t *rpath2)
+{
+ int res;
+
+ res = ip46_address_cmp(&rpath1->frp_addr,
+ &rpath2->frp_addr);
+
+ if (0 != res) return (res);
+
+ res = (rpath1->frp_sw_if_index - rpath2->frp_sw_if_index);
+
+ if (0 != res) return (res);
+
+ if (ip46_address_is_zero(&rpath1->frp_addr))
+ {
+ res = rpath1->frp_fib_index - rpath2->frp_fib_index;
+ }
+
+ return (res);
+}
+
+dpo_proto_t
+fib_proto_to_dpo (fib_protocol_t fib_proto)
+{
+ switch (fib_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ return (DPO_PROTO_IP6);
+ case FIB_PROTOCOL_IP4:
+ return (DPO_PROTO_IP4);
+ case FIB_PROTOCOL_MPLS:
+ return (DPO_PROTO_MPLS);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+fib_protocol_t
+dpo_proto_to_fib (dpo_proto_t dpo_proto)
+{
+ switch (dpo_proto)
+ {
+ case DPO_PROTO_IP6:
+ return (FIB_PROTOCOL_IP6);
+ case DPO_PROTO_IP4:
+ return (FIB_PROTOCOL_IP4);
+ case DPO_PROTO_MPLS:
+ return (FIB_PROTOCOL_MPLS);
+ default:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+vnet_link_t
+fib_proto_to_link (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (VNET_LINK_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (VNET_LINK_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (VNET_LINK_MPLS);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+fib_forward_chain_type_t
+fib_forw_chain_type_from_dpo_proto (dpo_proto_t proto)
+{
+ switch (proto)
+ {
+ case DPO_PROTO_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case DPO_PROTO_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case DPO_PROTO_MPLS:
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ case DPO_PROTO_ETHERNET:
+ return (FIB_FORW_CHAIN_TYPE_ETHERNET);
+ case DPO_PROTO_NSH:
+ return (FIB_FORW_CHAIN_TYPE_NSH);
+ }
+ ASSERT(0);
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+vnet_link_t
+fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ return (VNET_LINK_IP4);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ return (VNET_LINK_IP6);
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ return (VNET_LINK_ETHERNET);
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ return (VNET_LINK_NSH);
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ /*
+ * insufficient information to to convert
+ */
+ ASSERT(0);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ return (VNET_LINK_MPLS);
+ }
+ return (VNET_LINK_IP4);
+}
+
+dpo_proto_t
+fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ return (DPO_PROTO_IP4);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ return (DPO_PROTO_IP6);
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ return (DPO_PROTO_ETHERNET);
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ return (DPO_PROTO_NSH);
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ return (DPO_PROTO_MPLS);
+ }
+ return (DPO_PROTO_IP4);
+}
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
new file mode 100644
index 00000000..f11a55da
--- /dev/null
+++ b/src/vnet/fib/fib_types.h
@@ -0,0 +1,426 @@
+ /*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TYPES_H__
+#define __FIB_TYPES_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A typedef of a node index.
+ * we make this typedef so the code becomes easier for a human to parse.
+ */
+typedef u32 fib_node_index_t;
+#define FIB_NODE_INDEX_INVALID ((fib_node_index_t)(~0))
+
+/**
+ * Protocol Type. packed so it consumes a u8 only
+ */
+typedef enum fib_protocol_t_ {
+ FIB_PROTOCOL_IP4 = DPO_PROTO_IP4,
+ FIB_PROTOCOL_IP6 = DPO_PROTO_IP6,
+ FIB_PROTOCOL_MPLS = DPO_PROTO_MPLS,
+} __attribute__ ((packed)) fib_protocol_t;
+
+#define FIB_PROTOCOLS { \
+ [FIB_PROTOCOL_IP4] = "ipv4", \
+ [FIB_PROTOCOL_IP6] = "ipv6", \
+ [FIB_PROTOCOL_MPLS] = "MPLS", \
+}
+
+/**
+ * Definition outside of enum so it does not need to be included in non-defaulted
+ * switch statements
+ */
+#define FIB_PROTOCOL_MAX (FIB_PROTOCOL_MPLS + 1)
+
+/**
+ * Not part of the enum so it does not have to be handled in switch statements
+ */
+#define FIB_PROTOCOL_NONE (FIB_PROTOCOL_MAX+1)
+
+#define FOR_EACH_FIB_PROTOCOL(_item) \
+ for (_item = FIB_PROTOCOL_IP4; \
+ _item <= FIB_PROTOCOL_MPLS; \
+ _item++)
+
+#define FOR_EACH_FIB_IP_PROTOCOL(_item) \
+ for (_item = FIB_PROTOCOL_IP4; \
+ _item <= FIB_PROTOCOL_IP6; \
+ _item++)
+
+/**
+ * @brief Convert from a protocol to a link type
+ */
+vnet_link_t fib_proto_to_link (fib_protocol_t proto);
+
+/**
+ * FIB output chain type. When a child object requests a forwarding contribution
+ * from a parent, it does so for a particular scenario. This enumererates those
+ * sceanrios
+ */
+typedef enum fib_forward_chain_type_t_ {
+ /**
+ * Contribute an object that is to be used to forward IP4 packets
+ */
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ /**
+ * Contribute an object that is to be used to forward IP6 packets
+ */
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP6,
+ /**
+ * Contribute an object that is to be used to forward non-end-of-stack
+ * MPLS packets
+ */
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ /**
+ * Contribute an object that is to be used to forward end-of-stack
+ * MPLS packets. This is a convenient ID for clients. A real EOS chain
+ * must be pay-load protocol specific. This
+ * option is converted into one of the other three internally.
+ */
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ /**
+ * Contribute an object that is to be used to forward IP4 packets
+ */
+ FIB_FORW_CHAIN_TYPE_MCAST_IP4,
+ /**
+ * Contribute an object that is to be used to forward IP6 packets
+ */
+ FIB_FORW_CHAIN_TYPE_MCAST_IP6,
+ /**
+ * Contribute an object that is to be used to forward Ethernet packets.
+ */
+ FIB_FORW_CHAIN_TYPE_ETHERNET,
+ /**
+ * Contribute an object that is to be used to forward NSH packets.
+ * This is last in the list since it is not valid for many FIB objects,
+ * and thus their array of per-chain-type DPOs can be sized smaller.
+ */
+ FIB_FORW_CHAIN_TYPE_NSH,
+} __attribute__ ((packed)) fib_forward_chain_type_t;
+
+#define FIB_FORW_CHAINS { \
+ [FIB_FORW_CHAIN_TYPE_ETHERNET] = "ethernet", \
+ [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \
+ [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \
+ [FIB_FORW_CHAIN_TYPE_MCAST_IP4] = "multicast-ip4", \
+ [FIB_FORW_CHAIN_TYPE_MCAST_IP6] = "multicast-ip6", \
+ [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \
+ [FIB_FORW_CHAIN_TYPE_MPLS_EOS] = "mpls-eos", \
+ [FIB_FORW_CHAIN_TYPE_NSH] = "nsh", \
+}
+
+#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_NSH+1)
+#define FIB_FORW_CHAIN_MPLS_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1)
+
+#define FOR_EACH_FIB_FORW_CHAIN(_item) \
+ for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \
+ _item <= FIB_FORW_CHAIN_TYPE_NSH; \
+ _item++)
+
+#define FOR_EACH_FIB_FORW_MPLS_CHAIN(_item) \
+ for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \
+ _item <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; \
+ _item++)
+
+/**
+ * @brief Convert from a chain type to the adjacencies link type
+ */
+extern vnet_link_t fib_forw_chain_type_to_link_type(fib_forward_chain_type_t fct);
+
+/**
+ * @brief Convert from a payload-protocol to a chain type.
+ */
+extern fib_forward_chain_type_t fib_forw_chain_type_from_dpo_proto(dpo_proto_t proto);
+
+/**
+ * @brief Convert from a chain type to the DPO proto it will install
+ */
+extern dpo_proto_t fib_forw_chain_type_to_dpo_proto(fib_forward_chain_type_t fct);
+
+/**
+ * Aggregrate type for a prefix
+ */
+typedef struct fib_prefix_t_ {
+ /**
+ * The mask length
+ */
+ u16 fp_len;
+
+ /**
+ * protocol type
+ */
+ fib_protocol_t fp_proto;
+
+ /**
+ * Pad to keep the address 4 byte aligned
+ */
+ u8 ___fp___pad;
+
+ union {
+ /**
+ * The address type is not deriveable from the fp_addr member.
+ * If it's v4, then the first 3 u32s of the address will be 0.
+ * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned
+ * to non-zero values. true. but when it's all zero, one cannot decide.
+ */
+ ip46_address_t fp_addr;
+
+ struct {
+ mpls_label_t fp_label;
+ mpls_eos_bit_t fp_eos;
+ /**
+ * This protocol determines the payload protocol of packets
+ * that will be forwarded by this entry once the label is popped.
+ * For a non-eos entry it will be MPLS.
+ */
+ dpo_proto_t fp_payload_proto;
+ };
+ };
+} fib_prefix_t;
+
+STATIC_ASSERT(STRUCT_OFFSET_OF(fib_prefix_t, fp_addr) == 4,
+ "FIB Prefix's address is 4 byte aligned.");
+
+/**
+ * \brief Compare two prefixes for equality
+ */
+extern int fib_prefix_cmp(const fib_prefix_t *p1,
+ const fib_prefix_t *p2);
+
+/**
+ * \brief Compare two prefixes for covering relationship
+ *
+ * \return non-zero if the first prefix is a cover for the second
+ */
+extern int fib_prefix_is_cover(const fib_prefix_t *p1,
+ const fib_prefix_t *p2);
+
+/**
+ * \brief Return true is the prefix is a host prefix
+ */
+extern int fib_prefix_is_host(const fib_prefix_t *p);
+
+
+/**
+ * \brief Host prefix from ip
+ */
+extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+ fib_prefix_t *pfx);
+
+extern u8 * format_fib_prefix(u8 * s, va_list * args);
+extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args);
+
+extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto);
+extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto);
+
+/**
+ * Enurmeration of special path/entry types
+ */
+typedef enum fib_special_type_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_SPECIAL_TYPE_FIRST = 0,
+ /**
+ * Local/for-us paths
+ */
+ FIB_SPECIAL_TYPE_LOCAL = FIB_SPECIAL_TYPE_FIRST,
+ /**
+ * drop paths
+ */
+ FIB_SPECIAL_TYPE_DROP,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_SPECIAL_TYPE_LAST = FIB_SPECIAL_TYPE_DROP,
+} __attribute__ ((packed)) fib_special_type_t;
+
+/**
+ * The maximum number of types
+ */
+#define FIB_SPEICAL_TYPE_MAX (FIB_SPEICAL_TYPE_LAST + 1)
+
+#define FOR_EACH_FIB_SPEICAL_TYPE(_item) \
+ for (_item = FIB_TYPE_SPEICAL_FIRST; \
+ _item <= FIB_SPEICAL_TYPE_LAST; _item++)
+
+extern u8 * format_fib_protocol(u8 * s, va_list ap);
+extern u8 * format_vnet_link(u8 *s, va_list ap);
+
+/**
+ * Path flags from the control plane
+ */
+typedef enum fib_route_path_flags_t_
+{
+ FIB_ROUTE_PATH_FLAG_NONE = 0,
+ /**
+ * Recursion constraint of via a host prefix
+ */
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST = (1 << 0),
+ /**
+ * Recursion constraint of via an attahced prefix
+ */
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1),
+ /**
+ * A for-us/local path
+ */
+ FIB_ROUTE_PATH_LOCAL = (1 << 2),
+ /**
+ * Attached path
+ */
+ FIB_ROUTE_PATH_ATTACHED = (1 << 3),
+ /**
+ * A Drop path - resolve the path on the drop DPO
+ */
+ FIB_ROUTE_PATH_DROP = (1 << 4),
+ /**
+ * Don't resolve the path, use the DPO the client provides
+ */
+ FIB_ROUTE_PATH_EXCLUSIVE = (1 << 5),
+ /**
+ * A path that result in received traffic being recieved/recirculated
+ * so that it appears to have arrived on the new interface
+ */
+ FIB_ROUTE_PATH_INTF_RX = (1 << 6),
+ /**
+ * A local path with a RPF-ID => multicast traffic
+ */
+ FIB_ROUTE_PATH_RPF_ID = (1 << 7),
+} fib_route_path_flags_t;
+
+/**
+ * An RPF-ID is numerical value that is used RPF validate. An entry
+ * has-a RPF-ID, when a packet egress from (e.g. an LSP) it gains an
+ * RPF-ID, these two are compared for the RPF check.
+ * This replaces the interfce based chack (since the LSP has no associated
+ * interface.
+ */
+typedef u32 fib_rpf_id_t;
+
+#define MFIB_RPF_ID_NONE (0)
+
+/**
+ * @brief
+ * A representation of a path as described by a route producer.
+ * These paramenters will determine the path 'type', of which there are:
+ * 1) Attached-next-hop:
+ * a single peer on a link.
+ * It is 'attached' because it is in the same sub-net as the router, on a link
+ * directly connected to the route.
+ * It is 'next=hop' since the next-hop address of the peer is known.
+ * 2) Attached:
+ * the next-hop is not known. but we can ARP for it.
+ * 3) Recursive.
+ * The next-hop is known but the interface is not. So to find the adj to use
+ * we must recursively resolve the next-hop.
+ * 3) deaggregate (deag)
+ * A further lookup is required.
+ */
+typedef struct fib_route_path_t_ {
+ /**
+ * The protocol of the address below. We need this since the all
+ * zeros address is ambiguous.
+ */
+ dpo_proto_t frp_proto;
+
+ union {
+ /**
+ * The next-hop address.
+ * Will be NULL for attached paths.
+ * Will be all zeros for attached-next-hop paths on a p2p interface
+ * Will be all zeros for a deag path.
+ */
+ ip46_address_t frp_addr;
+
+ struct {
+ /**
+ * The MPLS local Label to reursively resolve through.
+ * This is valid when the path type is MPLS.
+ */
+ mpls_label_t frp_local_label;
+ /**
+ * EOS bit for the resolving label
+ */
+ mpls_eos_bit_t frp_eos;
+ };
+ };
+ union {
+ /**
+ * The interface.
+ * Will be invalid for recursive paths.
+ */
+ u32 frp_sw_if_index;
+ /**
+ * The RPF-ID
+ */
+ fib_rpf_id_t frp_rpf_id;
+ };
+ /**
+ * The FIB index to lookup the nexthop
+ * Only valid for recursive paths.
+ */
+ u32 frp_fib_index;
+ /**
+ * [un]equal cost path weight
+ */
+ u8 frp_weight;
+ /**
+ * A path preference. 0 is the best.
+ * Only paths of the best preference, that are 'up', are considered
+ * for forwarding.
+ */
+ u8 frp_preference;
+ /**
+ * flags on the path
+ */
+ fib_route_path_flags_t frp_flags;
+ /**
+ * The outgoing MPLS label Stack. NULL implies no label.
+ */
+ mpls_label_t *frp_label_stack;
+} fib_route_path_t;
+
+/**
+ * @brief
+ * A representation of a fib path for fib_path_encode to convey the information to the caller
+ */
+typedef struct fib_route_path_encode_t_ {
+ fib_route_path_t rpath;
+ dpo_id_t dpo;
+} fib_route_path_encode_t;
+
+/**
+ * return code to control pat-hlist walk
+ */
+typedef enum fib_path_list_walk_rc_t_
+{
+ FIB_PATH_LIST_WALK_STOP,
+ FIB_PATH_LIST_WALK_CONTINUE,
+} fib_path_list_walk_rc_t;
+
+/**
+ * A list of path-extensions
+ */
+typedef struct fib_path_ext_list_t_
+{
+ struct fib_path_ext_t_ *fpel_exts;
+} fib_path_ext_list_t;
+
+#endif
diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c
new file mode 100644
index 00000000..b4844420
--- /dev/null
+++ b/src/vnet/fib/fib_urpf_list.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief pool of all fib_urpf_list
+ */
+fib_urpf_list_t *fib_urpf_list_pool;
+
+u8 *
+format_fib_urpf_list (u8 *s, va_list args)
+{
+ fib_urpf_list_t *urpf;
+ index_t ui;
+ u32 *swi;
+
+ ui = va_arg(args, index_t);
+
+ if (INDEX_INVALID != ui)
+ {
+ urpf = fib_urpf_list_get(ui);
+
+ s = format(s, "uPRF-list:%d len:%d itfs:[",
+ ui, vec_len(urpf->furpf_itfs));
+
+ vec_foreach(swi, urpf->furpf_itfs)
+ {
+ s = format(s, "%d, ", *swi);
+ }
+ s = format(s, "]");
+ }
+ else
+ {
+ s = format(s, "uRPF-list: None");
+ }
+
+ return (s);
+}
+
+index_t
+fib_urpf_list_alloc_and_lock (void)
+{
+ fib_urpf_list_t *urpf;
+
+ pool_get(fib_urpf_list_pool, urpf);
+ memset(urpf, 0, sizeof(*urpf));
+
+ urpf->furpf_locks++;
+
+ return (urpf - fib_urpf_list_pool);
+}
+
+void
+fib_urpf_list_unlock (index_t ui)
+{
+ fib_urpf_list_t *urpf;
+
+ if (INDEX_INVALID == ui)
+ return;
+
+ urpf = fib_urpf_list_get(ui);
+
+ urpf->furpf_locks--;
+
+ if (0 == urpf->furpf_locks)
+ {
+ vec_free(urpf->furpf_itfs);
+ pool_put(fib_urpf_list_pool, urpf);
+ }
+}
+
+void
+fib_urpf_list_lock (index_t ui)
+{
+ fib_urpf_list_t *urpf;
+
+ urpf = fib_urpf_list_get(ui);
+
+ urpf->furpf_locks++;
+}
+
+/**
+ * @brief Append another interface to the list.
+ */
+void
+fib_urpf_list_append (index_t ui,
+ u32 sw_if_index)
+{
+ fib_urpf_list_t *urpf;
+
+ urpf = fib_urpf_list_get(ui);
+
+ vec_add1(urpf->furpf_itfs, sw_if_index);
+}
+
+/**
+ * @brief Combine to interface lists
+ */
+void
+fib_urpf_list_combine (index_t ui1,
+ index_t ui2)
+{
+ fib_urpf_list_t *urpf1, *urpf2;
+
+ urpf1 = fib_urpf_list_get(ui1);
+ urpf2 = fib_urpf_list_get(ui2);
+
+ vec_append(urpf1->furpf_itfs, urpf2->furpf_itfs);
+}
+
+/**
+ * @brief Sort the interface indicies.
+ * The sort is the first step in obtaining a unique list, so the order,
+ * w.r.t. next-hop, interface,etc is not important. So a sort based on the
+ * index is all we need.
+ */
+static int
+fib_urpf_itf_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_node_index_t *i1 = v1, *i2 = v2;
+
+ return (*i2 < *i1);
+}
+
+/**
+ * @brief Convert the uRPF list from the itf set obtained during the walk
+ * to a unique list.
+ */
+void
+fib_urpf_list_bake (index_t ui)
+{
+ fib_urpf_list_t *urpf;
+
+ urpf = fib_urpf_list_get(ui);
+
+ ASSERT(!(urpf->furpf_flags & FIB_URPF_LIST_BAKED));
+
+ if (vec_len(urpf->furpf_itfs) > 1)
+ {
+ u32 i,j;
+
+ /*
+ * cat list | sort | uniq > rpf_list
+ */
+ vec_sort_with_function(urpf->furpf_itfs, fib_urpf_itf_cmp_for_sort);
+
+ i = 0, j = 1;
+ while (j < vec_len(urpf->furpf_itfs))
+ {
+ if (urpf->furpf_itfs[i] == urpf->furpf_itfs[j])
+ {
+ /*
+ * the itfacenct entries are the same.
+ * search forward for a unique one
+ */
+ while (urpf->furpf_itfs[i] == urpf->furpf_itfs[j] &&
+ j < vec_len(urpf->furpf_itfs))
+ {
+ j++;
+ }
+ if (j == vec_len(urpf->furpf_itfs))
+ {
+ /*
+ * ran off the end without finding a unique index.
+ * we are done.
+ */
+ break;
+ }
+ else
+ {
+ urpf->furpf_itfs[i+1] = urpf->furpf_itfs[j];
+ }
+ }
+ i++, j++;
+ }
+
+ /*
+ * set the length of the vector to the number of unique itfs
+ */
+ _vec_len(urpf->furpf_itfs) = i+1;
+ }
+
+ urpf->furpf_flags |= FIB_URPF_LIST_BAKED;
+}
+
+void
+fib_urpf_list_show_mem (void)
+{
+ fib_show_memory_usage("uRPF-list",
+ pool_elts(fib_urpf_list_pool),
+ pool_len(fib_urpf_list_pool),
+ sizeof(fib_urpf_list_t));
+}
+
+static clib_error_t *
+show_fib_urpf_list_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t ui;
+
+ if (unformat (input, "%d", &ui))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_urpf_list_pool, ui))
+ {
+ vlib_cli_output (vm, "%d@%U",
+ ui,
+ format_fib_urpf_list, ui);
+ }
+ else
+ {
+ vlib_cli_output (vm, "uRPF %d invalid", ui);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB uRPF Entries:");
+ pool_foreach_index(ui, fib_urpf_list_pool,
+ ({
+ vlib_cli_output (vm, "%d@%U",
+ ui,
+ format_fib_urpf_list, ui);
+ }));
+ }
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+/*?
+ * The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists
+ *
+ * @cliexpar
+ * @cliexstart{show fib uRPF}
+ * FIB uRPF Entries:
+ * 0@uPRF-list:0 len:0 itfs:[]
+ * 1@uPRF-list:1 len:2 itfs:[1, 2, ]
+ * 2@uPRF-list:2 len:1 itfs:[3, ]
+ * 3@uPRF-list:3 len:1 itfs:[9, ]
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_fib_urpf_list, static) = {
+ .path = "show fib uRPF",
+ .function = show_fib_urpf_list_command,
+ .short_help = "show fib uRPF",
+};
+/* *INDENT-OFF* */
diff --git a/src/vnet/fib/fib_urpf_list.h b/src/vnet/fib/fib_urpf_list.h
new file mode 100644
index 00000000..09f47574
--- /dev/null
+++ b/src/vnet/fib/fib_urpf_list.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief A unicast RPF list.
+ * The uRPF list is the set of interfaces that a prefix can be reached through.
+ * There are 3 levels of RPF check:
+ * - do we have any route to the source (i.e. it's not drop)
+ * - did the packet arrive on an interface that the source is reachable through
+ * - did the packet arrive from a peer that the source is reachable through
+ * we don't support the last. But it could be done by storing adjs in the uPRF
+ * list rather than interface indices.
+ *
+ * these conditions are checked against the list by:
+ * - the list is not empty
+ * - there is an interface in the list that is on the input interface.
+ * - there is an adj in the list whose MAC address matches the packet's
+ * source MAC and input interface.
+ *
+ * To speed the last two checks the interface list only needs to have the unique
+ * interfaces present. If the uRPF check was instead implemented by forward
+ * walking the DPO chain, then that walk would encounter a great deal of
+ * non-adjacency objects (i.e. load-balances, mpls-labels, etc) and potentially
+ * the same adjacency many times (esp. when UCMP is used).
+ * To that end the uRPF list is a collapsed, unique interface only list.
+ */
+
+#ifndef __FIB_URPF_LIST_H__
+#define __FIB_URPF_LIST_H__
+
+#include <vnet/fib/fib_types.h>
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief flags
+ */
+typedef enum fib_urpf_list_flag_t_
+{
+ /**
+ * @brief Set to indicated that the uRPF list has already been baked.
+ * This is protection against it being baked more than once. These
+ * are not chunky fries - once is enough.
+ */
+ FIB_URPF_LIST_BAKED = (1 << 0),
+} fib_urpf_list_flag_t;
+
+typedef struct fib_urpf_list_t_
+{
+ /**
+ * The list of interfaces that comprise the allowed accepting interfaces
+ */
+ adj_index_t *furpf_itfs;
+
+ /**
+ * flags
+ */
+ fib_urpf_list_flag_t furpf_flags;
+
+ /**
+ * uRPF lists are shared amongst many entries so we require a locking
+ * mechanism.
+ */
+ u32 furpf_locks;
+} fib_urpf_list_t;
+
+extern index_t fib_urpf_list_alloc_and_lock(void);
+extern void fib_urpf_list_unlock(index_t urpf);
+extern void fib_urpf_list_lock(index_t urpf);
+
+extern void fib_urpf_list_append(index_t urpf, adj_index_t adj);
+extern void fib_urpf_list_combine(index_t urpf1, index_t urpf2);
+
+extern void fib_urpf_list_bake(index_t urpf);
+
+extern u8 *format_fib_urpf_list(u8 *s, va_list ap);
+
+extern void fib_urpf_list_show_mem(void);
+
+/**
+ * @brief pool of all fib_urpf_list
+ */
+extern fib_urpf_list_t *fib_urpf_list_pool;
+
+static inline fib_urpf_list_t *
+fib_urpf_list_get (index_t index)
+{
+ return (pool_elt_at_index(fib_urpf_list_pool, index));
+}
+
+/**
+ * @brief Data-Plane function to check an input interface against an uRPF list
+ *
+ * @param ui The uRPF list index to check against. Get this from the load-balance
+ * object that is the result of the FIB lookup
+ * @param sw_if_index The SW interface index to validate
+ *
+ * @return 1 if the interface is found, 0 otherwise
+ */
+always_inline int
+fib_urpf_check (index_t ui, u32 sw_if_index)
+{
+ fib_urpf_list_t *urpf;
+ u32 *swi;
+
+ urpf = fib_urpf_list_get(ui);
+
+ vec_foreach(swi, urpf->furpf_itfs)
+ {
+ if (*swi == sw_if_index)
+ return (1);
+ }
+
+ return (0);
+}
+
+/**
+ * @brief Data-Plane function to check the size of an uRPF list, (i.e. the number
+ * of interfaces in the list).
+ *
+ * @param ui The uRPF list index to check against. Get this from the load-balance
+ * object that is the result of the FIB lookup
+ *
+ * @return the number of interfaces in the list
+ */
+always_inline int
+fib_urpf_check_size (index_t ui)
+{
+ fib_urpf_list_t *urpf;
+
+ urpf = fib_urpf_list_get(ui);
+
+ return (vec_len(urpf->furpf_itfs));
+}
+
+#endif
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
new file mode 100644
index 00000000..94297442
--- /dev/null
+++ b/src/vnet/fib/fib_walk.c
@@ -0,0 +1,1205 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_node_list.h>
+
+/**
+ * The flags on a walk
+ */
+typedef enum fib_walk_flags_t_
+{
+ /**
+ * A synchronous walk.
+ * This walk will run to completion, i.e. visit ALL the children.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_SYNC = (1 << 0),
+ /**
+ * An asynchronous walk.
+ * This walk will be scheduled to run in the background. It will thus visits
+ * the children at a later point in time.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_ASYNC = (1 << 1),
+ /**
+ * An indication that the walk is currently executing.
+ */
+ FIB_WALK_FLAG_EXECUTING = (1 << 2),
+} fib_walk_flags_t;
+
+/**
+ * A representation of a graph walk from a parent object to its children
+ */
+typedef struct fib_walk_t_
+{
+ /**
+ * FIB node linkage. This object is not in the FIB object graph,
+ * but it is present in other node's dependency lists, so it needs to
+ * be pointerable to.
+ */
+ fib_node_t fw_node;
+
+ /**
+ * the walk's flags
+ */
+ fib_walk_flags_t fw_flags;
+
+ /**
+ * Sibling index in the dependency list
+ */
+ u32 fw_dep_sibling;
+
+ /**
+ * Sibling index in the list of all walks
+ */
+ u32 fw_prio_sibling;
+
+ /**
+ * Pointer to the node whose dependants this walk is walking
+ */
+ fib_node_ptr_t fw_parent;
+
+ /**
+ * Number of nodes visited by this walk. saved for debugging purposes.
+ */
+ u32 fw_n_visits;
+
+ /**
+ * Time the walk started
+ */
+ f64 fw_start_time;
+
+ /**
+ * The reasons this walk is occuring.
+ * This is a vector ordered in time. The reasons and the front were started
+ * first, and so should be acted first when a node is visisted.
+ */
+ fib_node_back_walk_ctx_t *fw_ctx;
+} fib_walk_t;
+
+/**
+ * @brief The pool of all walk objects
+ */
+static fib_walk_t *fib_walk_pool;
+
+/**
+ * Statistics maintained per-walk queue
+ */
+typedef enum fib_walk_queue_stats_t_
+{
+ FIB_WALK_SCHEDULED,
+ FIB_WALK_COMPLETED,
+} fib_walk_queue_stats_t;
+#define FIB_WALK_QUEUE_STATS_NUM ((fib_walk_queue_stats_t)(FIB_WALK_COMPLETED+1))
+
+#define FIB_WALK_QUEUE_STATS { \
+ [FIB_WALK_SCHEDULED] = "scheduled", \
+ [FIB_WALK_COMPLETED] = "completed", \
+}
+
+#define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \
+ for ((_wqs) = FIB_WALK_SCHEDULED; \
+ (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \
+ (_wqs)++)
+
+/**
+ * The names of the walk stats
+ */
+static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS;
+/**
+ * The names of the walk reasons
+ */
+static const char * const fib_node_bw_reason_names[] = FIB_NODE_BW_REASONS;
+
+/**
+ * A represenation of one queue of walk
+ */
+typedef struct fib_walk_queue_t_
+{
+ /**
+ * Qeuee stats
+ */
+ u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM];
+
+ /**
+ * The node list which acts as the queue
+ */
+ fib_node_list_t fwq_queue;
+} fib_walk_queue_t;
+
+/**
+ * A set of priority queues for outstanding walks
+ */
+typedef struct fib_walk_queues_t_
+{
+ fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM];
+} fib_walk_queues_t;
+
+/**
+ * The global queues of outstanding walks
+ */
+static fib_walk_queues_t fib_walk_queues;
+
+/**
+ * The names of the walk priorities
+ */
+static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES;
+
+/**
+ * @brief Histogram stats on the lenths of each walk in elemenets visisted.
+ * Store upto 1<<23 elements in increments of 1<<10
+ */
+#define HISTOGRAM_VISITS_PER_WALK_MAX (1<<23)
+#define HISTOGRAM_VISITS_PER_WALK_INCR (1<<10)
+#define HISTOGRAM_VISITS_PER_WALK_N_BUCKETS \
+ (HISTOGRAM_VISITS_PER_WALK_MAX/HISTOGRAM_VISITS_PER_WALK_INCR)
+static u64 fib_walk_hist_vists_per_walk[HISTOGRAM_VISITS_PER_WALK_N_BUCKETS];
+
+/**
+ * @brief History of state for the last 128 walks
+ */
+#define HISTORY_N_WALKS 128
+#define MAX_HISTORY_REASONS 16
+static u32 history_last_walk_pos;
+typedef struct fib_walk_history_t_ {
+ u32 fwh_n_visits;
+ f64 fwh_duration;
+ f64 fwh_completed;
+ fib_node_ptr_t fwh_parent;
+ fib_walk_flags_t fwh_flags;
+ fib_node_bw_reason_flag_t fwh_reason[MAX_HISTORY_REASONS];
+} fib_walk_history_t;
+static fib_walk_history_t fib_walk_history[HISTORY_N_WALKS];
+
+u8*
+format_fib_walk_priority (u8 *s, va_list ap)
+{
+ fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t);
+
+ ASSERT(prio < FIB_WALK_PRIORITY_NUM);
+
+ return (format(s, "%s", fib_walk_priority_names[prio]));
+}
+static u8*
+format_fib_walk_queue_stats (u8 *s, va_list ap)
+{
+ fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t);
+
+ ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM);
+
+ return (format(s, "%s", fib_walk_queue_stats_names[wqs]));
+}
+
+static index_t
+fib_walk_get_index (fib_walk_t *fwalk)
+{
+ return (fwalk - fib_walk_pool);
+}
+
+static fib_walk_t *
+fib_walk_get (index_t fwi)
+{
+ return (pool_elt_at_index(fib_walk_pool, fwi));
+}
+
+/*
+ * not static so it can be used in the unit tests
+ */
+u32
+fib_walk_queue_get_size (fib_walk_priority_t prio)
+{
+ return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue));
+}
+
+static fib_node_index_t
+fib_walk_queue_get_front (fib_walk_priority_t prio)
+{
+ fib_node_ptr_t wp;
+
+ fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp);
+
+ return (wp.fnp_index);
+}
+
+static void
+fib_walk_destroy (index_t fwi)
+{
+ fib_walk_t *fwalk;
+ u32 bucket, ii;
+
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
+ {
+ fib_node_list_elt_remove(fwalk->fw_prio_sibling);
+ }
+ fib_node_child_remove(fwalk->fw_parent.fnp_type,
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_dep_sibling);
+
+ /*
+ * refetch the walk object. More walks could have been spawned as a result
+ * of releasing the lock on the parent.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ /*
+ * add the stats to the continuous histogram collection.
+ */
+ bucket = (fwalk->fw_n_visits / HISTOGRAM_VISITS_PER_WALK_INCR);
+ bucket = (bucket >= HISTOGRAM_VISITS_PER_WALK_N_BUCKETS ?
+ HISTOGRAM_VISITS_PER_WALK_N_BUCKETS - 1 :
+ bucket);
+ fib_walk_hist_vists_per_walk[bucket]++;
+
+ /*
+ * save stats to the recent history
+ */
+
+ fib_walk_history[history_last_walk_pos].fwh_n_visits =
+ fwalk->fw_n_visits;
+ fib_walk_history[history_last_walk_pos].fwh_completed =
+ vlib_time_now(vlib_get_main());
+ fib_walk_history[history_last_walk_pos].fwh_duration =
+ fib_walk_history[history_last_walk_pos].fwh_completed -
+ fwalk->fw_start_time;
+ fib_walk_history[history_last_walk_pos].fwh_parent =
+ fwalk->fw_parent;
+ fib_walk_history[history_last_walk_pos].fwh_flags =
+ fwalk->fw_flags;
+
+ vec_foreach_index(ii, fwalk->fw_ctx)
+ {
+ if (ii < MAX_HISTORY_REASONS)
+ {
+ fib_walk_history[history_last_walk_pos].fwh_reason[ii] =
+ fwalk->fw_ctx[ii].fnbw_reason;
+ }
+ }
+
+ history_last_walk_pos = (history_last_walk_pos + 1) % HISTORY_N_WALKS;
+
+ fib_node_deinit(&fwalk->fw_node);
+ vec_free(fwalk->fw_ctx);
+ pool_put(fib_walk_pool, fwalk);
+}
+
+/**
+ * return code when advancing a walk
+ */
+typedef enum fib_walk_advance_rc_t_
+{
+ /**
+ * The walk is complete
+ */
+ FIB_WALK_ADVANCE_DONE,
+ /**
+ * the walk has more work
+ */
+ FIB_WALK_ADVANCE_MORE,
+ /**
+ * The walk merged with the one in front
+ */
+ FIB_WALK_ADVANCE_MERGE,
+} fib_walk_advance_rc_t;
+
+/**
+ * @brief Advance the walk one element in its work list
+ */
+static fib_walk_advance_rc_t
+fib_walk_advance (fib_node_index_t fwi)
+{
+ fib_node_back_walk_rc_t wrc;
+ fib_node_ptr_t sibling;
+ fib_walk_t *fwalk;
+ uint n_ctxs, ii;
+ int more_elts;
+
+ /*
+ * this walk function is re-entrant - walks acan spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to retch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling);
+
+ if (more_elts)
+ {
+
+ /*
+ * loop through the backwalk contexts. This can grow in length
+ * as walks on the same object meet each other. Order is preserved so the
+ * most recently started walk as at the back of the vector.
+ */
+ ii = 0;
+ n_ctxs = vec_len(fwalk->fw_ctx);
+
+ while (ii < n_ctxs)
+ {
+ wrc = fib_node_back_walk_one(&sibling, &fwalk->fw_ctx[ii]);
+
+ ii++;
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_n_visits++;
+
+ if (FIB_NODE_BACK_WALK_MERGE == wrc)
+ {
+ /*
+ * this walk has merged with the one further along the node's
+ * dependecy list.
+ */
+ return (FIB_WALK_ADVANCE_MERGE);
+ }
+
+ /*
+ * re-evaluate the number of backwalk contexts we need to process.
+ */
+ n_ctxs = vec_len(fwalk->fw_ctx);
+ }
+ /*
+ * move foward to the next node to visit
+ */
+ more_elts = fib_node_list_advance(fwalk->fw_dep_sibling);
+ }
+
+ if (more_elts)
+ {
+ return (FIB_WALK_ADVANCE_MORE);
+ }
+
+ return (FIB_WALK_ADVANCE_DONE);
+}
+
+/**
+ * @breif Enurmerate the times of sleep between walks
+ */
+typedef enum fib_walk_sleep_type_t_
+{
+ FIB_WALK_SHORT_SLEEP,
+ FIB_WALK_LONG_SLEEP,
+} fib_walk_sleep_type_t;
+
+#define FIB_WALK_N_SLEEP (FIB_WALK_LONG_SLEEP+1)
+
+/**
+ * @brief Durations for the sleep types
+ */
+static f64 fib_walk_sleep_duration[] = {
+ /**
+ * Long sleep when there is no more work, i.e. the queues are empty.
+ * This is a sleep (as opposed to a wait for event) just to be sure we
+ * are not missing events by sleeping forever.
+ */
+ [FIB_WALK_LONG_SLEEP] = 2,
+
+ /**
+ * Short sleep. There is work left in the queues. We are yielding the CPU
+ * momentarily.
+ */
+ [FIB_WALK_SHORT_SLEEP] = 1e-8,
+};
+
+/**
+ * @brief The time quota for a walk. When more than this amount of time is
+ * spent, the walk process will yield.
+ */
+static f64 quota = 1e-4;
+
+/**
+ * Histogram on the amount of work done (in msecs) in each walk
+ */
+#define N_TIME_BUCKETS 128
+#define TIME_INCREMENTS (N_TIME_BUCKETS/2)
+static u64 fib_walk_work_time_taken[N_TIME_BUCKETS];
+
+/**
+ * Histogram on the number of nodes visted in each quota
+ */
+#define N_ELTS_BUCKETS 128
+static u32 fib_walk_work_nodes_visisted_incr = 2;
+static u64 fib_walk_work_nodes_visited[N_ELTS_BUCKETS];
+
+/**
+ * Histogram of the sleep lengths
+ */
+static u64 fib_walk_sleep_lengths[2];
+
+/**
+ * @brief Service the queues
+ * This is not declared static so that it can be unit tested - i know i know...
+ */
+f64
+fib_walk_process_queues (vlib_main_t * vm,
+ const f64 quota)
+{
+ f64 start_time, consumed_time;
+ fib_walk_sleep_type_t sleep;
+ fib_walk_priority_t prio;
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+ u32 n_elts;
+ i32 bucket;
+
+ consumed_time = 0;
+ start_time = vlib_time_now(vm);
+ n_elts = 0;
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ while (0 != fib_walk_queue_get_size(prio))
+ {
+ fwi = fib_walk_queue_get_front(prio);
+
+ /*
+ * set this walk as executing
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ n_elts++;
+ consumed_time = (vlib_time_now(vm) - start_time);
+ } while ((consumed_time < quota) &&
+ (FIB_WALK_ADVANCE_MORE == rc));
+
+ /*
+ * if this walk has no more work then pop it from the queue
+ * and move on to the next.
+ */
+ if (FIB_WALK_ADVANCE_MORE != rc)
+ {
+ fib_walk_destroy(fwi);
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
+ }
+ else
+ {
+ /*
+ * passed our work quota. sleep time.
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING;
+ sleep = FIB_WALK_SHORT_SLEEP;
+ goto that_will_do_for_now;
+ }
+ }
+ }
+ /*
+ * got to the end of all the work
+ */
+ sleep = FIB_WALK_LONG_SLEEP;
+
+that_will_do_for_now:
+
+ /*
+ * collect the stats:
+ * - for the number of nodes visisted we store 128 increments
+ * - for the time consumed we store quota/TIME_INCREMENTS increments.
+ */
+ bucket = ((n_elts/fib_walk_work_nodes_visisted_incr) > N_ELTS_BUCKETS ?
+ N_ELTS_BUCKETS-1 :
+ n_elts/fib_walk_work_nodes_visisted_incr);
+ ++fib_walk_work_nodes_visited[bucket];
+
+ bucket = (consumed_time - quota) / (quota / TIME_INCREMENTS);
+ bucket += N_TIME_BUCKETS/2;
+ bucket = (bucket < 0 ? 0 : bucket);
+ bucket = (bucket > N_TIME_BUCKETS-1 ? N_TIME_BUCKETS-1 : bucket);
+ ++fib_walk_work_time_taken[bucket];
+
+ ++fib_walk_sleep_lengths[sleep];
+
+ return (fib_walk_sleep_duration[sleep]);
+}
+
+/**
+ * Events sent to the FIB walk process
+ */
+typedef enum fib_walk_process_event_t_
+{
+ FIB_WALK_PROCESS_EVENT_DATA,
+ FIB_WALK_PROCESS_EVENT_ENABLE,
+ FIB_WALK_PROCESS_EVENT_DISABLE,
+} fib_walk_process_event;
+
+/**
+ * @brief The 'fib-walk' process's main loop.
+ */
+static uword
+fib_walk_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ uword event_type, *event_data = 0;
+ f64 sleep_time;
+ int enabled;
+
+ enabled = 1;
+ sleep_time = fib_walk_sleep_duration[FIB_WALK_SHORT_SLEEP];
+
+ while (1)
+ {
+ /*
+ * the feature to disable/enable this walk process is only
+ * for testing purposes
+ */
+ if (enabled)
+ {
+ vlib_process_wait_for_event_or_clock(vm, sleep_time);
+ }
+ else
+ {
+ vlib_process_wait_for_event(vm);
+ }
+
+ event_type = vlib_process_get_events(vm, &event_data);
+ vec_reset_length(event_data);
+
+ switch (event_type)
+ {
+ case FIB_WALK_PROCESS_EVENT_ENABLE:
+ enabled = 1;
+ break;
+ case FIB_WALK_PROCESS_EVENT_DISABLE:
+ enabled = 0;
+ break;
+ default:
+ break;
+ }
+
+ if (enabled)
+ {
+ sleep_time = fib_walk_process_queues(vm, quota);
+ }
+ }
+
+ /*
+ * Unreached
+ */
+ ASSERT(!"WTF");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
+ .function = fib_walk_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "fib-walk",
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Allocate a new walk object
+ */
+static fib_walk_t *
+fib_walk_alloc (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_flags_t flags,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ pool_get(fib_walk_pool, fwalk);
+
+ fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK);
+
+ fwalk->fw_flags = flags;
+ fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_parent.fnp_index = parent_index;
+ fwalk->fw_parent.fnp_type = parent_type;
+ fwalk->fw_ctx = NULL;
+ fwalk->fw_start_time = vlib_time_now(vlib_get_main());
+ fwalk->fw_n_visits = 0;
+
+ /*
+ * make a copy of the backwalk context so the depth count remains
+ * the same for each sibling visitsed. This is important in the case
+ * where a parent has a loop via one child, but all the others are not.
+ * if the looped child were visited first, the depth count would exceed, the
+ * max and the walk would terminate before it reached the other siblings.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+
+ return (fwalk);
+}
+
+/**
+ * @brief Enqueue a walk onto the appropriate priority queue. Then signal
+ * the background process there is work to do.
+ */
+static index_t
+fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
+ fib_walk_t *fwalk)
+{
+ index_t sibling;
+
+ sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ 0,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++;
+
+ /*
+ * poke the fib-walk process to perform the async walk.
+ * we are not passing it specific data, hence the last two args,
+ * the process will drain the queues
+ */
+ vlib_process_signal_event(vlib_get_main(),
+ fib_walk_process_node.index,
+ FIB_WALK_PROCESS_EVENT_DATA,
+ 0);
+
+ return (sibling);
+}
+
+void
+fib_walk_async (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_priority_t prio,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+ if (0 == fib_node_get_n_children(parent_type,
+ parent_index))
+ {
+ /*
+ * no children to walk - quit now
+ */
+ return;
+ }
+ if (ctx->fnbw_flags & FIB_NODE_BW_FLAG_FORCE_SYNC)
+ {
+ /*
+ * the originator of the walk wanted it to be synchronous, but the
+ * parent object chose async - denied.
+ */
+ return (fib_walk_sync(parent_type, parent_index, ctx));
+ }
+
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_ASYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+
+ fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk);
+}
+
+/**
+ * @brief Back walk all the children of a FIB node.
+ *
+ * note this is a synchronous depth first walk. Children visited may propagate
+ * the walk to thier children. Other children node types may not propagate,
+ * synchronously but instead queue the walk for later async completion.
+ */
+void
+fib_walk_sync (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+ if (0 == fib_node_get_n_children(parent_type,
+ parent_index))
+ {
+ /*
+ * no children to walk - quit now
+ */
+ return;
+ }
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_SYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fwi = fib_walk_get_index(fwalk);
+
+ while (1)
+ {
+ /*
+ * set this walk as executing
+ */
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ } while (FIB_WALK_ADVANCE_MORE == rc);
+
+
+ /*
+ * this walk function is re-entrant - walks can spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to re-fetch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_ADVANCE_MERGE == rc)
+ {
+ /*
+ * this sync walk merged with an walk in front.
+ * by reqeusting a sync walk the client wanted all children walked,
+ * so we ditch the walk object in hand and continue with the one
+ * we merged into
+ */
+ fib_node_ptr_t merged_walk;
+
+ fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
+
+ fib_walk_destroy(fwi);
+
+ fwi = merged_walk.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags)
+ {
+ /*
+ * we are executing a sync walk, and we have met with another
+ * walk that is also executing. since only one walk executs at once
+ * (there is no multi-threading) this implies we have met ourselves
+ * and hence the is a loop in the graph.
+ * This function is re-entrant, so the walk object we met is being
+ * acted on in a stack frame below this one. We must therefore not
+ * continue with it now, but let the stack unwind and along the
+ * appropriate frame to read the depth count and bail.
+ */
+ fwalk = NULL;
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * the walk reached the end of the depdency list.
+ */
+ break;
+ }
+ }
+
+ if (NULL != fwalk)
+ {
+ fib_walk_destroy(fwi);
+ }
+}
+
+static fib_node_t *
+fib_walk_get_node (fib_node_index_t index)
+{
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(index);
+
+ return (&(fwalk->fw_node));
+}
+
+/**
+ * Walk objects are not parents, nor are they locked.
+ * are no-ops
+ */
+static void
+fib_walk_last_lock_gone (fib_node_t *node)
+{
+ ASSERT(0);
+}
+
+static fib_walk_t*
+fib_walk_get_from_node (fib_node_t *node)
+{
+ return ((fib_walk_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(fib_walk_t, fw_node)));
+}
+
+/**
+ * @brief Another back walk has reach this walk.
+ * Megre them so there is only one left. It is this node being
+ * visited that will remain, so copy or merge the context onto it.
+ */
+static fib_node_back_walk_rc_t
+fib_walk_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_back_walk_ctx_t *last;
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get_from_node(node);
+
+ /*
+ * check whether the walk context can be merged with the most recent.
+ * the most recent was the one last added and is thus at the back of the vector.
+ * we can merge walks if the reason for the walk is the same.
+ */
+ last = vec_end(fwalk->fw_ctx) - 1;
+
+ if (last->fnbw_reason == ctx->fnbw_reason)
+ {
+ /*
+ * copy the largest of the depth values. in the presence of a loop,
+ * the same walk will merge with itself. if we take the smaller depth
+ * then it will never end.
+ */
+ last->fnbw_depth = ((last->fnbw_depth >= ctx->fnbw_depth) ?
+ last->fnbw_depth :
+ ctx->fnbw_depth);
+ }
+ else
+ {
+ /*
+ * walks could not be merged, this means that the walk infront needs to
+ * perform different action to this one that has caught up. the one in
+ * front was scheduled first so append the new walk context to the back
+ * of the list.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+ }
+
+ return (FIB_NODE_BACK_WALK_MERGE);
+}
+
+/**
+ * The FIB walk's graph node virtual function table
+ */
+static const fib_node_vft_t fib_walk_vft = {
+ .fnv_get = fib_walk_get_node,
+ .fnv_last_lock = fib_walk_last_lock_gone,
+ .fnv_back_walk = fib_walk_back_walk_notify,
+};
+
+void
+fib_walk_module_init (void)
+{
+ fib_walk_priority_t prio;
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create();
+ }
+
+ fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft);
+}
+
+static u8*
+format_fib_walk (u8* s, va_list ap)
+{
+ fib_node_index_t fwi = va_arg(ap, fib_node_index_t);
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(fwi);
+
+ return (format(s, " parent:{%s:%d} visits:%d flags:%d",
+ fib_node_type_get_name(fwalk->fw_parent.fnp_type),
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_n_visits,
+ fwalk->fw_flags));
+}
+
+static clib_error_t *
+fib_walk_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_walk_queue_stats_t wqs;
+ fib_walk_priority_t prio;
+ fib_node_ptr_t sibling;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+ int more_elts, ii;
+ u8 *s = NULL;
+
+#define USEC 1000000
+ vlib_cli_output(vm, "FIB Walk Quota = %.2fusec:", quota * USEC);
+ vlib_cli_output(vm, "FIB Walk queues:");
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ vlib_cli_output(vm, " %U priority queue:",
+ format_fib_walk_priority, prio);
+ vlib_cli_output(vm, " Stats: ");
+
+ FOR_EACH_FIB_WALK_QUEUE_STATS(wqs)
+ {
+ vlib_cli_output(vm, " %U:%d",
+ format_fib_walk_queue_stats, wqs,
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]);
+ }
+ vlib_cli_output(vm, " Occupancy:%d",
+ fib_node_list_get_size(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue));
+
+ more_elts = fib_node_list_get_front(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ &sibling);
+
+ while (more_elts)
+ {
+ ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type);
+
+ fwi = sibling.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ vlib_cli_output(vm, " %U", format_fib_walk, fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling,
+ &sibling);
+ }
+ }
+
+ vlib_cli_output(vm, "Histogram Statistics:");
+ vlib_cli_output(vm, " Number of Elements visit per-quota:");
+ for (ii = 0; ii < N_ELTS_BUCKETS; ii++)
+ {
+ if (0 != fib_walk_work_nodes_visited[ii])
+ s = format(s, "%d:%d ",
+ (ii * fib_walk_work_nodes_visisted_incr),
+ fib_walk_work_nodes_visited[ii]);
+ }
+ vlib_cli_output(vm, " %v", s);
+ vec_free(s);
+
+ vlib_cli_output(vm, " Time consumed per-quota (Quota=%f usec):", quota*USEC);
+ s = format(s, "0:%d ", fib_walk_work_time_taken[0]);
+ for (ii = 1; ii < N_TIME_BUCKETS; ii++)
+ {
+ if (0 != fib_walk_work_time_taken[ii])
+ s = format(s, "%d:%d ", (u32)((((ii - N_TIME_BUCKETS/2) *
+ (quota / TIME_INCREMENTS)) + quota) *
+ USEC),
+ fib_walk_work_time_taken[ii]);
+ }
+ vlib_cli_output(vm, " %v", s);
+ vec_free(s);
+
+ vlib_cli_output(vm, " Sleep Types:");
+ vlib_cli_output(vm, " Short Long:");
+ vlib_cli_output(vm, " %d %d:",
+ fib_walk_sleep_lengths[FIB_WALK_SHORT_SLEEP],
+ fib_walk_sleep_lengths[FIB_WALK_LONG_SLEEP]);
+
+ vlib_cli_output(vm, " Number of Elements visited per-walk:");
+ for (ii = 0; ii < HISTOGRAM_VISITS_PER_WALK_N_BUCKETS; ii++)
+ {
+ if (0 != fib_walk_hist_vists_per_walk[ii])
+ s = format(s, "%d:%d ",
+ ii*HISTOGRAM_VISITS_PER_WALK_INCR,
+ fib_walk_hist_vists_per_walk[ii]);
+ }
+ vlib_cli_output(vm, " %v", s);
+ vec_free(s);
+
+
+ vlib_cli_output(vm, "Brief History (last %d walks):", HISTORY_N_WALKS);
+ ii = history_last_walk_pos - 1;
+ if (ii < 0)
+ ii = HISTORY_N_WALKS - 1;
+
+ while (ii != history_last_walk_pos)
+ {
+ if (0 != fib_walk_history[ii].fwh_reason[0])
+ {
+ fib_node_back_walk_reason_t reason;
+ u8 *s = NULL;
+ u32 jj;
+
+ s = format(s, "[@%d]: %s:%d visits:%d duration:%.2f completed:%.2f ",
+ ii, fib_node_type_get_name(fib_walk_history[ii].fwh_parent.fnp_type),
+ fib_walk_history[ii].fwh_parent.fnp_index,
+ fib_walk_history[ii].fwh_n_visits,
+ fib_walk_history[ii].fwh_duration,
+ fib_walk_history[ii].fwh_completed);
+ if (FIB_WALK_FLAG_SYNC & fib_walk_history[ii].fwh_flags)
+ s = format(s, "sync, ");
+ if (FIB_WALK_FLAG_ASYNC & fib_walk_history[ii].fwh_flags)
+ s = format(s, "async, ");
+
+ s = format(s, "reason:");
+ jj = 0;
+ while (0 != fib_walk_history[ii].fwh_reason[jj])
+ {
+ FOR_EACH_FIB_NODE_BW_REASON(reason) {
+ if ((1<<reason) & fib_walk_history[ii].fwh_reason[jj]) {
+ s = format (s, "%s,", fib_node_bw_reason_names[reason]);
+ }
+ }
+ jj++;
+ }
+ vlib_cli_output(vm, "%v", s);
+ }
+
+ ii--;
+ if (ii < 0)
+ ii = HISTORY_N_WALKS - 1;
+ }
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_show_command, static) = {
+ .path = "show fib walk",
+ .short_help = "show fib walk",
+ .function = fib_walk_show,
+};
+
+static clib_error_t *
+fib_walk_set_quota (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t * error = NULL;
+ f64 new_quota;
+
+ if (unformat (input, "%f", &new_quota))
+ {
+ quota = new_quota;
+ }
+ else
+ {
+ error = clib_error_return(0 , "Pass a float value");
+ }
+
+ return (error);
+}
+
+VLIB_CLI_COMMAND (fib_walk_set_quota_command, static) = {
+ .path = "set fib walk quota",
+ .short_help = "set fib walk quota",
+ .function = fib_walk_set_quota,
+};
+
+static clib_error_t *
+fib_walk_set_histogram_elements_size (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t * error = NULL;
+ u32 new;
+
+ if (unformat (input, "%d", &new))
+ {
+ fib_walk_work_nodes_visisted_incr = new;
+ }
+ else
+ {
+ error = clib_error_return(0 , "Pass an int value");
+ }
+
+ return (error);
+}
+
+VLIB_CLI_COMMAND (fib_walk_set_histogram_elements_size_command, static) = {
+ .path = "set fib walk histogram elements size",
+ .short_help = "set fib walk histogram elements size",
+ .function = fib_walk_set_histogram_elements_size,
+};
+
+static clib_error_t *
+fib_walk_clear (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ memset(fib_walk_hist_vists_per_walk, 0, sizeof(fib_walk_hist_vists_per_walk));
+ memset(fib_walk_history, 0, sizeof(fib_walk_history));
+ memset(fib_walk_work_time_taken, 0, sizeof(fib_walk_work_time_taken));
+ memset(fib_walk_work_nodes_visited, 0, sizeof(fib_walk_work_nodes_visited));
+ memset(fib_walk_sleep_lengths, 0, sizeof(fib_walk_sleep_lengths));
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_clear_command, static) = {
+ .path = "clear fib walk",
+ .short_help = "clear fib walk",
+ .function = fib_walk_clear,
+};
+
+void
+fib_walk_process_enable (void)
+{
+ vlib_process_signal_event(vlib_get_main(),
+ fib_walk_process_node.index,
+ FIB_WALK_PROCESS_EVENT_ENABLE,
+ 0);
+}
+
+void
+fib_walk_process_disable (void)
+{
+ vlib_process_signal_event(vlib_get_main(),
+ fib_walk_process_node.index,
+ FIB_WALK_PROCESS_EVENT_DISABLE,
+ 0);
+}
+
+static clib_error_t *
+fib_walk_process_enable_disable (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ if (unformat (input, "enable"))
+ {
+ fib_walk_process_enable();
+ }
+ else if (unformat (input, "disable"))
+ {
+ fib_walk_process_disable();
+ }
+ else
+ {
+ return clib_error_return(0, "choose enable or disable");
+ }
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_process_command, static) = {
+ .path = "test fib-walk-process",
+ .short_help = "test fib-walk-process [enable|disable]",
+ .function = fib_walk_process_enable_disable,
+};
diff --git a/src/vnet/fib/fib_walk.h b/src/vnet/fib/fib_walk.h
new file mode 100644
index 00000000..fdf2f10c
--- /dev/null
+++ b/src/vnet/fib/fib_walk.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_WALK_H__
+#define __FIB_WALK_H__
+
+#include <vnet/fib/fib_node.h>
+
+/**
+ * @brief Walk priorities.
+ * Strict priorities. All walks a priority n are completed before n+1 is started.
+ * Increasing numerical value implies decreasing priority.
+ */
+typedef enum fib_walk_priority_t_
+{
+ FIB_WALK_PRIORITY_HIGH = 0,
+ FIB_WALK_PRIORITY_LOW = 1,
+} fib_walk_priority_t;
+
+#define FIB_WALK_PRIORITY_NUM ((fib_walk_priority_t)(FIB_WALK_PRIORITY_LOW+1))
+
+#define FIB_WALK_PRIORITIES { \
+ [FIB_WALK_PRIORITY_HIGH] = "high", \
+ [FIB_WALK_PRIORITY_LOW] = "low", \
+}
+
+#define FOR_EACH_FIB_WALK_PRIORITY(_prio) \
+ for ((_prio) = FIB_WALK_PRIORITY_HIGH; \
+ (_prio) < FIB_WALK_PRIORITY_NUM; \
+ (_prio)++)
+
+extern void fib_walk_module_init(void);
+
+extern void fib_walk_async(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_priority_t prio,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern void fib_walk_sync(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern u8* format_fib_walk_priority(u8 *s, va_list ap);
+
+extern void fib_walk_process_enable(void);
+extern void fib_walk_process_disable(void);
+
+#endif
+
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
new file mode 100644
index 00000000..48dc2c6c
--- /dev/null
+++ b/src/vnet/fib/ip4_fib.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+
+/*
+ * A table of pefixes to be added to tables and the sources for them
+ */
+typedef struct ip4_fib_table_special_prefix_t_ {
+ fib_prefix_t ift_prefix;
+ fib_source_t ift_source;
+ fib_entry_flag_t ift_flag;
+} ip4_fib_table_special_prefix_t;
+
+static const ip4_fib_table_special_prefix_t ip4_specials[] = {
+ {
+ /* 0.0.0.0/0*/
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /* 0.0.0.0/32*/
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /*
+ * 240.0.0.0/4
+ * drop class E
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xf0000000,
+ },
+ .fp_len = 4,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_SPECIAL,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+
+ },
+ {
+ /*
+ * 224.0.0.0/4
+ * drop all mcast
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xe0000000,
+ },
+ .fp_len = 4,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_SPECIAL,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /*
+ * 255.255.255.255/32
+ * drop, but we'll allow it to be usurped by the likes of DHCP
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xffffffff,
+ },
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ }
+};
+
+
+static u32
+ip4_create_fib_with_table_id (u32 table_id,
+ fib_source_t src)
+{
+ fib_table_t *fib_table;
+ ip4_fib_t *v4_fib;
+
+ pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ memset(fib_table, 0, sizeof(*fib_table));
+
+ pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
+
+ ASSERT((fib_table - ip4_main.fibs) ==
+ (v4_fib - ip4_main.v4_fibs));
+
+ fib_table->ft_proto = FIB_PROTOCOL_IP4;
+ fib_table->ft_index =
+ v4_fib->index =
+ (fib_table - ip4_main.fibs);
+
+ hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id =
+ v4_fib->table_id =
+ table_id;
+ fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ v4_fib->fwd_classify_table_index = ~0;
+ v4_fib->rev_classify_table_index = ~0;
+
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src);
+
+ ip4_mtrie_init(&v4_fib->mtrie);
+
+ /*
+ * add the special entries into the new FIB
+ */
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_add(fib_table->ft_index,
+ &prefix,
+ ip4_specials[ii].ift_source,
+ ip4_specials[ii].ift_flag);
+ }
+
+ return (fib_table->ft_index);
+}
+
+void
+ip4_fib_table_destroy (u32 fib_index)
+{
+ fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index);
+ ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index);
+ int ii;
+
+ /*
+ * remove all the specials we added when the table was created.
+ * In reverse order so the default route is last.
+ */
+ for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_remove(fib_table->ft_index,
+ &prefix,
+ ip4_specials[ii].ift_source);
+ }
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == fib_table->ft_total_route_counts);
+ FOR_EACH_FIB_SOURCE(ii)
+ {
+ ASSERT(0 == fib_table->ft_src_route_counts[ii]);
+ }
+
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
+ }
+
+ ip4_mtrie_free(&v4_fib->mtrie);
+
+ pool_put(ip4_main.v4_fibs, v4_fib);
+ pool_put(ip4_main.fibs, fib_table);
+}
+
+
+u32
+ip4_fib_table_find_or_create_and_lock (u32 table_id,
+ fib_source_t src)
+{
+ u32 index;
+
+ index = ip4_fib_index_from_table_id(table_id);
+ if (~0 == index)
+ return ip4_create_fib_with_table_id(table_id, src);
+
+ fib_table_lock(index, FIB_PROTOCOL_IP4, src);
+
+ return (index);
+}
+
+u32
+ip4_fib_table_create_and_lock (fib_source_t src)
+{
+ return (ip4_create_fib_with_table_id(~0, src));
+}
+
+u32
+ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip4_main.fib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip4_main.fib_index_by_sw_if_index[sw_if_index]);
+}
+
+/*
+ * ip4_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_fib_table_lookup_exact_match (const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ hash = fib->fib_entry_by_dst_address[len];
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+
+ result = hash_get(hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup_adj
+ *
+ * Longest prefix match
+ */
+index_t
+ip4_fib_table_lookup_lb (ip4_fib_t *fib,
+ const ip4_address_t *addr)
+{
+ fib_node_index_t fei;
+
+ fei = ip4_fib_table_lookup(fib, addr, 32);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ return (dpo->dpoi_index);
+ }
+ return (INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_fib_table_lookup (const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ i32 mask_len;
+ u32 key;
+
+ for (mask_len = len; mask_len >= 0; mask_len--)
+ {
+ hash = fib->fib_entry_by_dst_address[mask_len];
+ key = (addr->data_u32 & ip4_main.fib_masks[mask_len]);
+
+ result = hash_get (hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+void
+ip4_fib_table_entry_insert (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result) {
+ /*
+ * adding a new entry
+ */
+ if (NULL == hash) {
+ hash = hash_create (32 /* elts */, sizeof (uword));
+ hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
+ }
+ hash = hash_set(hash, key, fib_entry_index);
+ fib->fib_entry_by_dst_address[len] = hash;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+
+void
+ip4_fib_table_entry_remove (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result)
+ {
+ /*
+ * removing a non-existant entry. i'll allow it.
+ */
+ }
+ else
+ {
+ hash_unset(hash, key);
+ }
+
+ fib->fib_entry_by_dst_address[len] = hash;
+}
+
+void
+ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
+}
+
+void
+ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ u32 cover_index)
+{
+ fib_prefix_t cover_prefix = {
+ .fp_len = 0,
+ };
+ const dpo_id_t *cover_dpo;
+
+ /*
+ * We need to pass the MTRIE the LB index and address length of the
+ * covering prefix, so it can fill the plys with the correct replacement
+ * for the entry being removed
+ */
+ fib_entry_get_prefix(cover_index, &cover_prefix);
+ cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+ ip4_fib_mtrie_route_del(&fib->mtrie,
+ addr, len, dpo->dpoi_index,
+ cover_prefix.fp_len,
+ cover_dpo->dpoi_index);
+}
+
+void
+ip4_fib_table_walk (ip4_fib_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = fib->fib_entry_by_dst_address[i];
+
+ if (NULL != hash)
+ {
+ hash_pair_t * p;
+
+ hash_foreach_pair (p, hash,
+ ({
+ fn(p->value[0], ctx);
+ }));
+ }
+ }
+}
+
+/**
+ * Walk show context
+ */
+typedef struct ip4_fib_show_walk_ctx_t_
+{
+ fib_node_index_t *ifsw_indicies;
+} ip4_fib_show_walk_ctx_t;
+
+static int
+ip4_fib_show_walk_cb (fib_node_index_t fib_entry_index,
+ void *arg)
+{
+ ip4_fib_show_walk_ctx_t *ctx = arg;
+
+ vec_add1(ctx->ifsw_indicies, fib_entry_index);
+
+ return (1);
+}
+
+static void
+ip4_fib_table_show_all (ip4_fib_t *fib,
+ vlib_main_t * vm)
+{
+ ip4_fib_show_walk_ctx_t ctx = {
+ .ifsw_indicies = NULL,
+ };
+ fib_node_index_t *fib_entry_index;
+
+ ip4_fib_table_walk(fib, ip4_fib_show_walk_cb, &ctx);
+ vec_sort_with_function(ctx.ifsw_indicies,
+ fib_entry_cmp_for_sort);
+
+ vec_foreach(fib_entry_index, ctx.ifsw_indicies)
+ {
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ *fib_entry_index,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(ctx.ifsw_indicies);
+}
+
+static void
+ip4_fib_table_show_one (ip4_fib_t *fib,
+ vlib_main_t * vm,
+ ip4_address_t *address,
+ u32 mask_len,
+ int detail)
+{
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ ip4_fib_table_lookup(fib, address, mask_len),
+ (detail ?
+ FIB_ENTRY_FORMAT_DETAIL2 :
+ FIB_ENTRY_FORMAT_DETAIL));
+}
+
+static clib_error_t *
+ip4_show_fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip4_main_t * im4 = &ip4_main;
+ fib_table_t * fib_table;
+ int verbose, matching, mtrie;
+ ip4_address_t matching_address;
+ u32 matching_mask = 32;
+ int i, table_id = -1, fib_index = ~0;
+ int detail = 0;
+
+ verbose = 1;
+ matching = 0;
+ mtrie = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "detail") || unformat (input, "det"))
+ detail = 1;
+
+ else if (unformat (input, "mtrie"))
+ mtrie = 1;
+
+ else if (unformat (input, "%U/%d",
+ unformat_ip4_address, &matching_address, &matching_mask))
+ matching = 1;
+
+ else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
+ matching = 1;
+
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, im4->fibs,
+ ({
+ ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
+ fib_source_t source;
+ u8 *s = NULL;
+
+ if (table_id >= 0 && table_id != (int)fib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)fib->index)
+ continue;
+
+ s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[",
+ format_fib_table_name, fib->index,
+ FIB_PROTOCOL_IP4,
+ fib->index,
+ format_ip_flow_hash_config,
+ fib_table->ft_flow_hash_config);
+ FOR_EACH_FIB_SOURCE(source)
+ {
+ if (0 != fib_table->ft_locks[source])
+ {
+ s = format(s, "%U:%d, ",
+ format_fib_source, source,
+ fib_table->ft_locks[source]);
+ }
+ }
+ s = format (s, "]");
+ vlib_cli_output (vm, "%v", s);
+ vec_free(s);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+ for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = fib->fib_entry_by_dst_address[i];
+ uword n_elts = hash_elts (hash);
+ if (n_elts > 0)
+ vlib_cli_output (vm, "%20d%16d", i, n_elts);
+ }
+ continue;
+ }
+ if (mtrie)
+ {
+ vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip4_fib_table_show_all(fib, vm);
+ }
+ else
+ {
+ ip4_fib_table_show_one(fib, vm, &matching_address,
+ matching_mask, detail);
+ }
+ }));
+
+ return 0;
+}
+
+/*?
+ * This command displays the IPv4 FIB Tables (VRF Tables) and the route
+ * entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * a single table or summary mode.
+ *
+ * @cliexpar
+ * Example of how to display all the IPv4 FIB tables:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 6.0.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 7.0.0.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 172.16.1.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet0
+ * 172.16.1.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
+ * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
+ * 172.16.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
+ * 172.16.2.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet1
+ * 172.16.2.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
+ * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @cliexend
+ * Example of how to display a single IPv4 FIB table:
+ * @cliexstart{show ip fib table 7}
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 172.16.1.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet0
+ * 172.16.1.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
+ * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
+ * 172.16.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
+ * 172.16.2.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet1
+ * 172.16.2.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
+ * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @cliexend
+ * Example of how to display a summary of all IPv4 FIB tables:
+ * @cliexstart{show ip fib summary}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 32 4
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 24 2
+ * 32 4
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show ip fib",
+ .short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie] [detail]",
+ .function = ip4_show_fib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
new file mode 100644
index 00000000..495b45cc
--- /dev/null
+++ b/src/vnet/fib/ip4_fib.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 FIB
+ *
+ * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding
+ * table contains all the routes that the control plane has programmed, the
+ * forwarding table contains the sub-set of those routes that can be used to
+ * forward packets.
+ * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed
+ * by mask length, the forwarding table is an mtrie
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_FIB_H__
+#define __IP4_FIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_t_
+{
+ /**
+ * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+ * First member so it's in the first cacheline.
+ */
+ ip4_fib_mtrie_t mtrie;
+
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[33];
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* N-tuple classifier indices */
+ u32 fwd_classify_table_index;
+ u32 rev_classify_table_index;
+
+} ip4_fib_t;
+
+extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip4_fib_table_lookup_exact_match(const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_table_entry_remove(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_table_entry_insert(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_fib_table_destroy(u32 fib_index);
+
+extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ fib_node_index_t cover_index);
+extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
+ const ip4_address_t * dst);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_table_walk(ip4_fib_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip4_fib_t *
+ip4_fib_get (u32 index)
+{
+ return (pool_elt_at_index(ip4_main.v4_fibs, index));
+}
+
+always_inline u32
+ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
+{
+ return (ip4_fib_table_lookup_lb(
+ ip4_fib_get(vec_elt (im->fib_index_by_sw_if_index, sw_if_index)),
+ dst));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id,
+ fib_source_t src);
+extern u32 ip4_fib_table_create_and_lock(fib_source_t src);
+
+
+static inline
+u32 ip4_fib_index_from_table_id (u32 table_id)
+{
+ ip4_main_t * im = &ip4_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+always_inline index_t
+ip4_fib_forwarding_lookup (u32 fib_index,
+ const ip4_address_t * addr)
+{
+ ip4_fib_mtrie_leaf_t leaf;
+ ip4_fib_mtrie_t * mtrie;
+
+ mtrie = &ip4_fib_get(fib_index)->mtrie;
+
+ leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
+
+ return (ip4_fib_mtrie_leaf_get_adj_index(leaf));
+}
+
+
+#endif
+
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
new file mode 100644
index 00000000..f37ae0d2
--- /dev/null
+++ b/src/vnet/fib/ip6_fib.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/fib_table.h>
+
+static void
+vnet_ip6_fib_init (u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 0,
+ .fp_addr = {
+ .ip6 = {
+ { 0, 0, },
+ },
+ }
+ };
+
+ /*
+ * Add the default route.
+ */
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_DEFAULT_ROUTE,
+ FIB_ENTRY_FLAG_DROP);
+
+ /*
+ * all link local for us
+ */
+ pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ pfx.fp_addr.ip6.as_u64[1] = 0;
+ pfx.fp_len = 10;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL);
+}
+
+static u32
+create_fib_with_table_id (u32 table_id,
+ fib_source_t src)
+{
+ fib_table_t *fib_table;
+ ip6_fib_t *v6_fib;
+
+ pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES);
+
+ memset(fib_table, 0, sizeof(*fib_table));
+ memset(v6_fib, 0, sizeof(*v6_fib));
+
+ ASSERT((fib_table - ip6_main.fibs) ==
+ (v6_fib - ip6_main.v6_fibs));
+
+ fib_table->ft_proto = FIB_PROTOCOL_IP6;
+ fib_table->ft_index =
+ v6_fib->index =
+ (fib_table - ip6_main.fibs);
+
+ hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id =
+ v6_fib->table_id =
+ table_id;
+ fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT;
+
+ vnet_ip6_fib_init(fib_table->ft_index);
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6, src);
+
+ return (fib_table->ft_index);
+}
+
+u32
+ip6_fib_table_find_or_create_and_lock (u32 table_id,
+ fib_source_t src)
+{
+ uword * p;
+
+ p = hash_get (ip6_main.fib_index_by_table_id, table_id);
+ if (NULL == p)
+ return create_fib_with_table_id(table_id, src);
+
+ fib_table_lock(p[0], FIB_PROTOCOL_IP6, src);
+
+ return (p[0]);
+}
+
+u32
+ip6_fib_table_create_and_lock (fib_source_t src)
+{
+ return (create_fib_with_table_id(~0, src));
+}
+
+void
+ip6_fib_table_destroy (u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 0,
+ .fp_addr = {
+ .ip6 = {
+ { 0, 0, },
+ },
+ }
+ };
+
+ /*
+ * the default route.
+ */
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_DEFAULT_ROUTE);
+
+
+ /*
+ * ff02::1:ff00:0/104
+ */
+ ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0);
+ pfx.fp_len = 104;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-routers multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-nodes multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-mldv2 multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all link local
+ */
+ pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ pfx.fp_addr.ip6.as_u64[1] = 0;
+ pfx.fp_len = 10;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ fib_table_t *fib_table = fib_table_get(fib_index, FIB_PROTOCOL_IP6);
+ fib_source_t source;
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == fib_table->ft_total_route_counts);
+ FOR_EACH_FIB_SOURCE(source)
+ {
+ ASSERT(0 == fib_table->ft_src_route_counts[source]);
+ }
+
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
+ }
+ pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
+ pool_put(ip6_main.fibs, fib_table);
+}
+
+fib_node_index_t
+ip6_fib_table_lookup (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv, value;
+ int i, n_p, rv;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ n_p = vec_len (table->prefix_lengths_in_search_order);
+
+ kv.key[0] = addr->as_u64[0];
+ kv.key[1] = addr->as_u64[1];
+ fib = ((u64)((fib_index))<<32);
+
+ /*
+ * start search from a mask length same length or shorter.
+ * we don't want matches longer than the mask passed
+ */
+ i = 0;
+ while (i < n_p && table->prefix_lengths_in_search_order[i] > len)
+ {
+ i++;
+ }
+
+ for (; i < n_p; i++)
+ {
+ int dst_address_length = table->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length];
+
+ ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
+ //As lengths are decreasing, masks are increasingly specific.
+ kv.key[0] &= mask->as_u64[0];
+ kv.key[1] &= mask->as_u64[1];
+ kv.key[2] = fib | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+ip6_fib_table_lookup_exact_match (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv, value;
+ ip6_address_t *mask;
+ u64 fib;
+ int rv;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+compute_prefix_lengths_in_search_order (ip6_fib_table_instance_t *table)
+{
+ int i;
+ vec_reset_length (table->prefix_lengths_in_search_order);
+ /* Note: bitmap reversed so this is in fact a longest prefix match */
+ clib_bitmap_foreach (i, table->non_empty_dst_address_length_bitmap,
+ ({
+ int dst_address_length = 128 - i;
+ vec_add1(table->prefix_lengths_in_search_order, dst_address_length);
+ }));
+}
+
+void
+ip6_fib_table_entry_remove (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0);
+
+ /* refcount accounting */
+ ASSERT (table->dst_address_length_refcounts[len] > 0);
+ if (--table->dst_address_length_refcounts[len] == 0)
+ {
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 0);
+ compute_prefix_lengths_in_search_order (table);
+ }
+}
+
+void
+ip6_fib_table_entry_insert (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = fib_entry_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1);
+
+ table->dst_address_length_refcounts[len]++;
+
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 1);
+ compute_prefix_lengths_in_search_order (table);
+}
+
+u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im,
+ u32 sw_if_index,
+ const ip6_address_t * dst)
+{
+ u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+ return ip6_fib_table_fwding_lookup(im, fib_index, dst);
+}
+
+u32
+ip6_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip6_main.fib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip6_main.fib_index_by_sw_if_index[sw_if_index]);
+}
+
+void
+ip6_fib_table_fwding_dpo_update (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = dpo->dpoi_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1);
+
+ table->dst_address_length_refcounts[len]++;
+
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 1);
+ compute_prefix_lengths_in_search_order (table);
+}
+
+void
+ip6_fib_table_fwding_dpo_remove (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = dpo->dpoi_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0);
+
+ /* refcount accounting */
+ ASSERT (table->dst_address_length_refcounts[len] > 0);
+ if (--table->dst_address_length_refcounts[len] == 0)
+ {
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 0);
+ compute_prefix_lengths_in_search_order (table);
+ }
+}
+
+/**
+ * @brief Context when walking the IPv6 table. Since all VRFs are in the
+ * same hash table, we need to filter only those we need as we walk
+ */
+typedef struct ip6_fib_walk_ctx_t_
+{
+ u32 i6w_fib_index;
+ fib_table_walk_fn_t i6w_fn;
+ void *i6w_ctx;
+} ip6_fib_walk_ctx_t;
+
+static int
+ip6_fib_walk_cb (clib_bihash_kv_24_8_t * kvp,
+ void *arg)
+{
+ ip6_fib_walk_ctx_t *ctx = arg;
+
+ if ((kvp->key[2] >> 32) == ctx->i6w_fib_index)
+ {
+ ctx->i6w_fn(kvp->value, ctx->i6w_ctx);
+ }
+
+ return (1);
+}
+
+void
+ip6_fib_table_walk (u32 fib_index,
+ fib_table_walk_fn_t fn,
+ void *arg)
+{
+ ip6_fib_walk_ctx_t ctx = {
+ .i6w_fib_index = fib_index,
+ .i6w_fn = fn,
+ .i6w_ctx = arg,
+ };
+ ip6_main_t *im = &ip6_main;
+
+ BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ ip6_fib_walk_cb,
+ &ctx);
+
+}
+
+typedef struct ip6_fib_show_ctx_t_ {
+ fib_node_index_t *entries;
+} ip6_fib_show_ctx_t;
+
+static int
+ip6_fib_table_show_walk (fib_node_index_t fib_entry_index,
+ void *arg)
+{
+ ip6_fib_show_ctx_t *ctx = arg;
+
+ vec_add1(ctx->entries, fib_entry_index);
+
+ return (1);
+}
+
+static void
+ip6_fib_table_show_all (ip6_fib_t *fib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t *fib_entry_index;
+ ip6_fib_show_ctx_t ctx = {
+ .entries = NULL,
+ };
+
+ ip6_fib_table_walk(fib->index, ip6_fib_table_show_walk, &ctx);
+ vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort);
+
+ vec_foreach(fib_entry_index, ctx.entries)
+ {
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ *fib_entry_index,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(ctx.entries);
+}
+
+static void
+ip6_fib_table_show_one (ip6_fib_t *fib,
+ vlib_main_t * vm,
+ ip6_address_t *address,
+ u32 mask_len,
+ int detail)
+{
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ ip6_fib_table_lookup(fib->index, address, mask_len),
+ (detail ?
+ FIB_ENTRY_FORMAT_DETAIL2:
+ FIB_ENTRY_FORMAT_DETAIL));
+}
+
+typedef struct {
+ u32 fib_index;
+ u64 count_by_prefix_length[129];
+} count_routes_in_fib_at_prefix_length_arg_t;
+
+static void
+count_routes_in_fib_at_prefix_length (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
+ int mask_width;
+
+ if ((kvp->key[2]>>32) != ap->fib_index)
+ return;
+
+ mask_width = kvp->key[2] & 0xFF;
+
+ ap->count_by_prefix_length[mask_width]++;
+}
+
+static clib_error_t *
+ip6_show_fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
+ ip6_main_t * im6 = &ip6_main;
+ fib_table_t *fib_table;
+ ip6_fib_t * fib;
+ int verbose, matching;
+ ip6_address_t matching_address;
+ u32 mask_len = 128;
+ int table_id = -1, fib_index = ~0;
+ int detail = 0;
+
+ verbose = 1;
+ matching = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") ||
+ unformat (input, "summary") ||
+ unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "detail") ||
+ unformat (input, "det"))
+ detail = 1;
+
+ else if (unformat (input, "%U/%d",
+ unformat_ip6_address, &matching_address, &mask_len))
+ matching = 1;
+
+ else if (unformat (input, "%U", unformat_ip6_address, &matching_address))
+ matching = 1;
+
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ fib_source_t source;
+ u8 *s = NULL;
+
+ fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index);
+ if (table_id >= 0 && table_id != (int)fib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)fib->index)
+ continue;
+
+ s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[",
+ format_fib_table_name, fib->index,
+ FIB_PROTOCOL_IP6,
+ fib->index,
+ format_ip_flow_hash_config,
+ fib_table->ft_flow_hash_config);
+ FOR_EACH_FIB_SOURCE(source)
+ {
+ if (0 != fib_table->ft_locks[source])
+ {
+ s = format(s, "%U:%d, ",
+ format_fib_source, source,
+ fib_table->ft_locks[source]);
+ }
+ }
+ s = format (s, "]");
+ vlib_cli_output (vm, "%v", s);
+ vec_free(s);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ BVT(clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash;
+ int len;
+
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+
+ memset (ca, 0, sizeof(*ca));
+ ca->fib_index = fib->index;
+
+ BV(clib_bihash_foreach_key_value_pair)
+ (h, count_routes_in_fib_at_prefix_length, ca);
+
+ for (len = 128; len >= 0; len--)
+ {
+ if (ca->count_by_prefix_length[len])
+ vlib_cli_output (vm, "%=20d%=16lld",
+ len, ca->count_by_prefix_length[len]);
+ }
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip6_fib_table_show_all(fib, vm);
+ }
+ else
+ {
+ ip6_fib_table_show_one(fib, vm, &matching_address, mask_len, detail);
+ }
+ }));
+
+ return 0;
+}
+
+/*?
+ * This command displays the IPv6 FIB Tables (VRF Tables) and the route
+ * entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * in summary mode.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to display all the IPv6 FIB tables:
+ * @cliexstart{show ip6 fib}
+ * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ipv6-VRF:8, fib_index 1, flow hash: src dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @::a:1:1:0:4/126
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
+ * [0] [@4]: ipv6-glean: af_packet0
+ * @::a:1:1:0:7/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
+ * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * fe80::fe:3eff:fe3e:9222/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
+ * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * @cliexend
+ *
+ * Example of how to display a summary of all IPv6 FIB tables:
+ * @cliexstart{show ip6 fib summary}
+ * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 128 3
+ * 104 1
+ * 10 1
+ * 0 1
+ * ipv6-VRF:8, fib_index 1, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 128 5
+ * 126 1
+ * 104 1
+ * 10 1
+ * 0 1
+ * @cliexend
+ * @endparblock
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
+ .path = "show ip6 fib",
+ .short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]] [detail]",
+ .function = ip6_show_fib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h
new file mode 100644
index 00000000..9728eecc
--- /dev/null
+++ b/src/vnet/fib/ip6_fib.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IP6_FIB_H__
+#define __IP6_FIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+
+extern fib_node_index_t ip6_fib_table_lookup(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip6_fib_table_lookup_exact_match(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+
+extern void ip6_fib_table_entry_remove(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+
+extern void ip6_fib_table_entry_insert(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip6_fib_table_destroy(u32 fib_index);
+
+extern void ip6_fib_table_fwding_dpo_update(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im,
+ u32 sw_if_index,
+ const ip6_address_t * dst);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip6_fib_table_walk(u32 fib_index,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+always_inline u32
+ip6_fib_table_fwding_lookup (ip6_main_t * im,
+ u32 fib_index,
+ const ip6_address_t * dst)
+{
+ ip6_fib_table_instance_t *table;
+ int i, len;
+ int rv;
+ BVT(clib_bihash_kv) kv, value;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ len = vec_len (table->prefix_lengths_in_search_order);
+
+ kv.key[0] = dst->as_u64[0];
+ kv.key[1] = dst->as_u64[1];
+ fib = ((u64)((fib_index))<<32);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = table->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length];
+
+ ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
+ //As lengths are decreasing, masks are increasingly specific.
+ kv.key[0] &= mask->as_u64[0];
+ kv.key[1] &= mask->as_u64[1];
+ kv.key[2] = fib | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ /* default route is always present */
+ ASSERT(0);
+ return 0;
+}
+
+/**
+ * @brief return the DPO that the LB stacks on.
+ */
+always_inline u32
+ip6_src_lookup_for_packet (ip6_main_t * im,
+ vlib_buffer_t * b,
+ ip6_header_t * i)
+{
+ if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0)
+ {
+ const dpo_id_t *dpo;
+ index_t lbi;
+
+ lbi = ip6_fib_table_fwding_lookup_with_if_index(
+ im,
+ vnet_buffer (b)->sw_if_index[VLIB_RX],
+ &i->src_address);
+
+ dpo = load_balance_get_bucket_i(load_balance_get(lbi), 0);
+
+ if (dpo_is_adj(dpo))
+ {
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] = dpo->dpoi_index;
+ }
+ }
+ return vnet_buffer (b)->ip.adj_index[VLIB_RX];
+}
+
+/**
+ * \brief Get or create an IPv6 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * \param im
+ * ip4_main pointer.
+ * \param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * \returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id,
+ fib_source_t src);
+extern u32 ip6_fib_table_create_and_lock(fib_source_t src);
+
+static inline ip6_fib_t *
+ip6_fib_get (fib_node_index_t index)
+{
+ ASSERT(!pool_is_free_index(ip6_main.fibs, index));
+ return (pool_elt_at_index (ip6_main.v6_fibs, index));
+}
+
+static inline
+u32 ip6_fib_index_from_table_id (u32 table_id)
+{
+ ip6_main_t * im = &ip6_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip6_fib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+#endif
+
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
new file mode 100644
index 00000000..4eeef7ab
--- /dev/null
+++ b/src/vnet/fib/mpls_fib.c
@@ -0,0 +1,456 @@
+/*
+ * mpls_fib.h: The Label/MPLS FIB
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An MPLS_FIB table;
+ *
+ * The entries in the table are programmed wtih one or more MOIs. These MOIs
+ * may result in different forwarding actions for end-of-stack (EOS) and non-EOS
+ * packets. Whether the two actions are the same more often than they are
+ * different, or vice versa, is a function of the deployment in which the router
+ * is used and thus not predictable.
+ * The desgin choice to make with an MPLS_FIB table is:
+ * 1 - 20 bit key: label only.
+ * When the EOS and non-EOS actions differ the result is a 'EOS-choice' object.
+ * 2 - 21 bit key: label and EOS-bit.
+ * The result is then the specific action based on EOS-bit.
+ *
+ * 20 bit key:
+ * Advantages:
+ * - lower memory overhead, since there are few DB entries.
+ * Disadvantages:
+ * - slower DP performance in the case the chains differ, as more objects are
+ * encounterd in the switch path
+ *
+ * 21 bit key:
+ * Advantages:
+ * - faster DP performance
+ * Disadvantages
+ * - increased memory footprint.
+ *
+ * Switching between schemes based on observed/measured action similarity is not
+ * considered on the grounds of complexity and flip-flopping.
+ *
+ * VPP mantra - favour performance over memory. We choose a 21 bit key.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/**
+ * All lookups in an MPLS_FIB table must result in a DPO of type load-balance.
+ * This is the default result which links to drop
+ */
+static index_t mpls_fib_drop_dpo_index = INDEX_INVALID;
+
+static inline u32
+mpls_fib_entry_mk_key (mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ ASSERT(eos <= 1);
+ return (label << 1 | eos);
+}
+
+u32
+mpls_fib_index_from_table_id (u32 table_id)
+{
+ mpls_main_t *mm = &mpls_main;
+ uword * p;
+
+ p = hash_get (mm->fib_index_by_table_id, table_id);
+ if (!p)
+ return FIB_NODE_INDEX_INVALID;
+
+ return p[0];
+}
+
+static u32
+mpls_fib_create_with_table_id (u32 table_id,
+ fib_source_t src)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ fib_table_t *fib_table;
+ mpls_eos_bit_t eos;
+ mpls_fib_t *mf;
+ int i;
+
+ pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES);
+
+ ASSERT((fib_table - mpls_main.fibs) ==
+ (mf - mpls_main.mpls_fibs));
+
+ memset(fib_table, 0, sizeof(*fib_table));
+
+ fib_table->ft_proto = FIB_PROTOCOL_MPLS;
+ fib_table->ft_index = (fib_table - mpls_main.fibs);
+
+ hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id = table_id;
+ fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT;
+
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS, src);
+
+ if (INDEX_INVALID == mpls_fib_drop_dpo_index)
+ {
+ mpls_fib_drop_dpo_index = load_balance_create(1, DPO_PROTO_MPLS, 0);
+ load_balance_set_bucket(mpls_fib_drop_dpo_index,
+ 0,
+ drop_dpo_get(DPO_PROTO_MPLS));
+ }
+
+ mf->mf_entries = hash_create(0, sizeof(fib_node_index_t));
+ for (i = 0; i < MPLS_FIB_DB_SIZE; i++)
+ {
+ /*
+ * initialise each DPO in the data-path lookup table
+ * to be the special MPLS drop
+ */
+ mf->mf_lbs[i] = mpls_fib_drop_dpo_index;
+ }
+
+ /*
+ * non-default forwarding for the special labels.
+ */
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ /*
+ * PUNT the router alert, both EOS and non-eos
+ */
+ prefix.fp_label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_eos = eos;
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ punt_dpo_get(DPO_PROTO_MPLS));
+ }
+
+ /*
+ * IPv4 explicit NULL EOS lookup in the interface's IPv4 table
+ */
+ prefix.fp_label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ prefix.fp_payload_proto = DPO_PROTO_IP4;
+ prefix.fp_eos = MPLS_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, // unused
+ DPO_PROTO_IP4,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ prefix.fp_payload_proto = DPO_PROTO_MPLS;
+ prefix.fp_eos = MPLS_NON_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, //unsued
+ DPO_PROTO_MPLS,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ /*
+ * IPv6 explicit NULL EOS lookup in the interface's IPv6 table
+ */
+ prefix.fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ prefix.fp_payload_proto = DPO_PROTO_IP6;
+ prefix.fp_eos = MPLS_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, //unused
+ DPO_PROTO_IP6,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ prefix.fp_payload_proto = DPO_PROTO_MPLS;
+ prefix.fp_eos = MPLS_NON_EOS;
+ lookup_dpo_add_or_lock_w_fib_index(0, // unsued
+ DPO_PROTO_MPLS,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ return (fib_table->ft_index);
+}
+
+u32
+mpls_fib_table_find_or_create_and_lock (u32 table_id,
+ fib_source_t src)
+{
+ u32 index;
+
+ index = mpls_fib_index_from_table_id(table_id);
+ if (~0 == index)
+ return mpls_fib_create_with_table_id(table_id, src);
+
+ fib_table_lock(index, FIB_PROTOCOL_MPLS, src);
+
+ return (index);
+}
+u32
+mpls_fib_table_create_and_lock (fib_source_t src)
+{
+ return (mpls_fib_create_with_table_id(~0, src));
+}
+
+void
+mpls_fib_table_destroy (u32 fib_index)
+{
+ fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index);
+ mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index);
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ };
+ mpls_label_t special_labels[] = {
+ MPLS_IETF_ROUTER_ALERT_LABEL,
+ MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL,
+ };
+ mpls_eos_bit_t eos;
+ u32 ii;
+
+ for (ii = 0; ii < ARRAY_LEN(special_labels); ii++)
+ {
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_label = special_labels[ii];
+ prefix.fp_eos = eos;
+
+ fib_table_entry_delete(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL);
+ }
+ }
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset(mpls_main.fib_index_by_table_id,
+ fib_table->ft_table_id);
+ }
+ hash_free(mf->mf_entries);
+
+ pool_put(mpls_main.mpls_fibs, mf);
+ pool_put(mpls_main.fibs, fib_table);
+}
+
+fib_node_index_t
+mpls_fib_table_lookup (const mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ uword *p;
+
+ p = hash_get(mf->mf_entries, mpls_fib_entry_mk_key(label, eos));
+
+ if (NULL == p)
+ return FIB_NODE_INDEX_INVALID;
+
+ return p[0];
+}
+
+void
+mpls_fib_table_entry_insert (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t lfei)
+{
+ hash_set(mf->mf_entries, mpls_fib_entry_mk_key(label, eos), lfei);
+}
+
+void
+mpls_fib_table_entry_remove (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ hash_unset(mf->mf_entries, mpls_fib_entry_mk_key(label, eos));
+}
+
+void
+mpls_fib_forwarding_table_update (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ const dpo_id_t *dpo)
+{
+ mpls_label_t key;
+
+ ASSERT((DPO_LOAD_BALANCE == dpo->dpoi_type) ||
+ (DPO_REPLICATE == dpo->dpoi_type));
+ if (CLIB_DEBUG > 0)
+ {
+ if (DPO_REPLICATE == dpo->dpoi_type)
+ ASSERT(dpo->dpoi_index & MPLS_IS_REPLICATE);
+ if (DPO_LOAD_BALANCE == dpo->dpoi_type)
+ ASSERT(!(dpo->dpoi_index & MPLS_IS_REPLICATE));
+ }
+ key = mpls_fib_entry_mk_key(label, eos);
+
+ mf->mf_lbs[key] = dpo->dpoi_index;
+}
+
+void
+mpls_fib_forwarding_table_reset (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ mpls_label_t key;
+
+ key = mpls_fib_entry_mk_key(label, eos);
+
+ mf->mf_lbs[key] = mpls_fib_drop_dpo_index;
+}
+
+void
+mpls_fib_table_walk (mpls_fib_t *mpls_fib,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ fib_node_index_t lfei;
+ mpls_label_t key;
+
+ hash_foreach(key, lfei, mpls_fib->mf_entries,
+ ({
+ fn(lfei, ctx);
+ }));
+}
+
+static void
+mpls_fib_table_show_all (const mpls_fib_t *mpls_fib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t lfei, *lfeip, *lfeis = NULL;
+ mpls_label_t key;
+
+ hash_foreach(key, lfei, mpls_fib->mf_entries,
+ ({
+ vec_add1(lfeis, lfei);
+ }));
+
+ vec_sort_with_function(lfeis, fib_entry_cmp_for_sort);
+
+ vec_foreach(lfeip, lfeis)
+ {
+ vlib_cli_output (vm, "%U",
+ format_fib_entry, *lfeip,
+ FIB_ENTRY_FORMAT_DETAIL);
+ }
+ vec_free(lfeis);
+}
+
+static void
+mpls_fib_table_show_one (const mpls_fib_t *mpls_fib,
+ mpls_label_t label,
+ vlib_main_t * vm)
+{
+ fib_node_index_t lfei;
+ mpls_eos_bit_t eos;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ lfei = mpls_fib_table_lookup(mpls_fib, label, eos);
+
+ if (FIB_NODE_INDEX_INVALID != lfei)
+ {
+ vlib_cli_output (vm, "%U",
+ format_fib_entry, lfei, FIB_ENTRY_FORMAT_DETAIL);
+ }
+ }
+}
+
+static clib_error_t *
+mpls_fib_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_table_t * fib_table;
+ mpls_label_t label;
+ int table_id;
+
+ table_id = -1;
+ label = MPLS_LABEL_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* if (unformat (input, "brief") || unformat (input, "summary") */
+ /* || unformat (input, "sum")) */
+ /* verbose = 0; */
+
+ if (unformat (input, "%d", &label))
+ continue;
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, mpls_main.fibs,
+ ({
+ if (table_id >= 0 && table_id != fib_table->ft_table_id)
+ continue;
+
+ vlib_cli_output (vm, "%v, fib_index %d",
+ fib_table->ft_desc, mpls_main.fibs - fib_table);
+
+ if (MPLS_LABEL_INVALID == label)
+ {
+ mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm);
+ }
+ else
+ {
+ mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm);
+ }
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (mpls_fib_show_command, static) = {
+ .path = "show mpls fib",
+ .short_help = "show mpls fib [summary] [table <n>]",
+ .function = mpls_fib_show,
+};
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
new file mode 100644
index 00000000..29cd1d20
--- /dev/null
+++ b/src/vnet/fib/mpls_fib.h
@@ -0,0 +1,139 @@
+/*
+ * mpls_fib.h: The Label/MPLS FIB
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_FIB_TABLE_H__
+#define __MPLS_FIB_TABLE_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_table.h>
+
+#define MPLS_FIB_DEFAULT_TABLE_ID 0
+
+/**
+ * Type exposure is to allow the DP fast/inlined access
+ */
+#define MPLS_FIB_KEY_SIZE 21
+#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+
+/**
+ * There are no options for controlling the MPLS flow hash
+ */
+#define MPLS_FLOW_HASH_DEFAULT 0
+
+typedef struct mpls_fib_t_
+{
+ /**
+ * A hash table of entries. 21 bit key
+ * Hash table for reduced memory footprint
+ */
+ uword * mf_entries;
+
+ /**
+ * The load-balance indices keyed by 21 bit label+eos bit.
+ * A flat array for maximum lookup performace.
+ */
+ index_t mf_lbs[MPLS_FIB_DB_SIZE];
+} mpls_fib_t;
+
+static inline mpls_fib_t*
+mpls_fib_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(mpls_main.mpls_fibs, index));
+}
+
+extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id,
+ fib_source_t src);
+extern u32 mpls_fib_table_create_and_lock(fib_source_t src);
+// extern mpls_fib_t * mpls_fib_find(u32 table_id);
+extern u32 mpls_fib_index_from_table_id(u32 table_id);
+
+extern u8 *format_mpls_fib_table_name(u8 * s, va_list * args);
+
+extern fib_node_index_t mpls_fib_table_entry_add_from_ip_fib_entry (
+ u32 table_id,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t fib_entry_index);
+
+
+extern fib_node_index_t mpls_fib_table_lookup(const mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+
+extern void mpls_fib_table_entry_remove(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+extern void mpls_fib_table_entry_insert(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t fei);
+extern void mpls_fib_table_destroy(u32 fib_index);
+
+
+extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ const dpo_id_t *dpo);
+extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void mpls_fib_table_walk(mpls_fib_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief
+ * Lookup a label and EOS bit in the MPLS_FIB table to retrieve the
+ * load-balance index to be used for packet forwarding.
+ */
+static inline index_t
+mpls_fib_table_forwarding_lookup (u32 mpls_fib_index,
+ const mpls_unicast_header_t *hdr)
+{
+ mpls_label_t label;
+ mpls_fib_t *mf;
+ u32 key;
+
+ label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ key = (vnet_mpls_uc_get_label(label) << 1) | vnet_mpls_uc_get_s(label);
+
+ mf = mpls_fib_get(mpls_fib_index);
+
+ return (mf->mf_lbs[key]);
+}
+
+static inline u32
+mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ mpls_main_t *mm = &mpls_main;
+
+ ASSERT(vec_len(mm->fib_index_by_sw_if_index) > sw_if_index);
+
+ return (mm->fib_index_by_sw_if_index[sw_if_index]);
+}
+
+#endif
diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api
new file mode 100644
index 00000000..1c5e8c5c
--- /dev/null
+++ b/src/vnet/flow/flow.api
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Configure IPFIX exporter process request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param collector_address - address of IPFIX collector
+ @param collector_port - port of IPFIX collector
+ @param src_address - address of IPFIX exporter
+ @param vrf_id - VRF / fib table ID
+ @param path_mtu - Path MTU between exporter and collector
+ @param template_interval - number of seconds after which to resend template
+ @param udp_checksum - UDP checksum calculation enable flag
+*/
+autoreply define set_ipfix_exporter
+{
+ u32 client_index;
+ u32 context;
+ u8 collector_address[16];
+ u16 collector_port;
+ u8 src_address[16];
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ u8 udp_checksum;
+};
+
+/** \brief IPFIX exporter dump request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ipfix_exporter_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to IPFIX exporter dump request
+ @param context - sender context which was passed in the request
+ @param collector_address - address of IPFIX collector
+ @param collector_port - port of IPFIX collector
+ @param src_address - address of IPFIX exporter
+ @param fib_index - fib table index
+ @param path_mtu - Path MTU between exporter and collector
+ @param template_interval - number of seconds after which to resend template
+ @param udp_checksum - UDP checksum calculation enable flag
+*/
+define ipfix_exporter_details
+{
+ u32 context;
+ u8 collector_address[16];
+ u16 collector_port;
+ u8 src_address[16];
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ u8 udp_checksum;
+};
+
+/** \brief IPFIX classify stream configure request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param domain_id - domain ID reported in IPFIX messages for classify stream
+ @param src_port - source port of UDP session for classify stream
+*/
+autoreply define set_ipfix_classify_stream {
+ u32 client_index;
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+};
+
+/** \brief IPFIX classify stream dump request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ipfix_classify_stream_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to IPFIX classify stream dump request
+ @param context - sender context, to match reply w/ request
+ @param domain_id - domain ID reported in IPFIX messages for classify stream
+ @param src_port - source port of UDP session for classify stream
+*/
+define ipfix_classify_stream_details {
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+};
+
+/** \brief IPFIX add or delete classifier table request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_id - classifier table ID
+ @param ip_version - version of IP used in the classifier table
+ @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified
+*/
+autoreply define ipfix_classify_table_add_del {
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ u8 ip_version;
+ u8 transport_protocol;
+ u8 is_add;
+};
+
+/** \brief IPFIX classify tables dump request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ipfix_classify_table_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to IPFIX classify tables dump request
+ @param context - sender context, to match reply w/ request
+ @param table_id - classifier table ID
+ @param ip_version - version of IP used in the classifier table
+ @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified
+*/
+define ipfix_classify_table_details {
+ u32 context;
+ u32 table_id;
+ u8 ip_version;
+ u8 transport_protocol;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c
new file mode 100644
index 00000000..52a608ca
--- /dev/null
+++ b/src/vnet/flow/flow_api.c
@@ -0,0 +1,397 @@
+/*
+ *------------------------------------------------------------------
+ * flow_api.c - flow api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/flow/flow_report.h>
+#include <vnet/flow/flow_report_classify.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \
+_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \
+_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \
+_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \
+_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \
+_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump)
+
+static void
+vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ flow_report_main_t *frm = &flow_report_main;
+ vl_api_set_ipfix_exporter_reply_t *rmp;
+ ip4_address_t collector, src;
+ u16 collector_port = UDP_DST_PORT_ipfix;
+ u32 path_mtu;
+ u32 template_interval;
+ u8 udp_checksum;
+ u32 fib_id;
+ u32 fib_index = ~0;
+ int rv = 0;
+
+ memcpy (collector.data, mp->collector_address, sizeof (collector.data));
+ collector_port = ntohs (mp->collector_port);
+ if (collector_port == (u16) ~ 0)
+ collector_port = UDP_DST_PORT_ipfix;
+ memcpy (src.data, mp->src_address, sizeof (src.data));
+ fib_id = ntohl (mp->vrf_id);
+
+ ip4_main_t *im = &ip4_main;
+ if (fib_id == ~0)
+ {
+ fib_index = ~0;
+ }
+ else
+ {
+ uword *p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ fib_index = p[0];
+ }
+
+ path_mtu = ntohl (mp->path_mtu);
+ if (path_mtu == ~0)
+ path_mtu = 512; // RFC 7011 section 10.3.3.
+ template_interval = ntohl (mp->template_interval);
+ if (template_interval == ~0)
+ template_interval = 20;
+ udp_checksum = mp->udp_checksum;
+
+ if (collector.as_u32 == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (src.as_u32 == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (path_mtu > 1450 /* vpp does not support fragmentation */ )
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (path_mtu < 68)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ /* Reset report streams if we are reconfiguring IP addresses */
+ if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
+ frm->src_address.as_u32 != src.as_u32 ||
+ frm->collector_port != collector_port)
+ vnet_flow_reports_reset (frm);
+
+ frm->ipfix_collector.as_u32 = collector.as_u32;
+ frm->collector_port = collector_port;
+ frm->src_address.as_u32 = src.as_u32;
+ frm->fib_index = fib_index;
+ frm->path_mtu = path_mtu;
+ frm->template_interval = template_interval;
+ frm->udp_checksum = udp_checksum;
+
+ /* Turn on the flow reporting process */
+ vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0);
+
+out:
+ REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY);
+}
+
+static void
+vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ unix_shared_memory_queue_t *q;
+ vl_api_ipfix_exporter_details_t *rmp;
+ ip4_main_t *im = &ip4_main;
+ u32 vrf_id;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS);
+ rmp->context = mp->context;
+ memcpy (rmp->collector_address, frm->ipfix_collector.data,
+ sizeof (frm->ipfix_collector.data));
+ rmp->collector_port = htons (frm->collector_port);
+ memcpy (rmp->src_address, frm->src_address.data,
+ sizeof (frm->src_address.data));
+ if (frm->fib_index == ~0)
+ vrf_id = ~0;
+ else
+ vrf_id = im->fibs[frm->fib_index].ft_table_id;
+ rmp->vrf_id = htonl (vrf_id);
+ rmp->path_mtu = htonl (frm->path_mtu);
+ rmp->template_interval = htonl (frm->template_interval);
+ rmp->udp_checksum = (frm->udp_checksum != 0);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_set_ipfix_classify_stream_t_handler
+ (vl_api_set_ipfix_classify_stream_t * mp)
+{
+ vl_api_set_ipfix_classify_stream_reply_t *rmp;
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ flow_report_main_t *frm = &flow_report_main;
+ u32 domain_id = 0;
+ u32 src_port = UDP_DST_PORT_ipfix;
+ int rv = 0;
+
+ domain_id = ntohl (mp->domain_id);
+ src_port = ntohs (mp->src_port);
+
+ if (fcm->src_port != 0 &&
+ (fcm->domain_id != domain_id || fcm->src_port != (u16) src_port))
+ {
+ int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ domain_id, (u16) src_port);
+ ASSERT (rv == 0);
+ }
+
+ fcm->domain_id = domain_id;
+ fcm->src_port = (u16) src_port;
+
+ REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY);
+}
+
+static void
+ vl_api_ipfix_classify_stream_dump_t_handler
+ (vl_api_ipfix_classify_stream_dump_t * mp)
+{
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ unix_shared_memory_queue_t *q;
+ vl_api_ipfix_classify_stream_details_t *rmp;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_STREAM_DETAILS);
+ rmp->context = mp->context;
+ rmp->domain_id = htonl (fcm->domain_id);
+ rmp->src_port = htons (fcm->src_port);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_ipfix_classify_table_add_del_t_handler
+ (vl_api_ipfix_classify_table_add_del_t * mp)
+{
+ vl_api_ipfix_classify_table_add_del_reply_t *rmp;
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vnet_flow_report_add_del_args_t args;
+ ipfix_classify_table_t *table;
+ int is_add;
+ u32 classify_table_index;
+ u8 ip_version;
+ u8 transport_protocol;
+ int rv = 0;
+
+ classify_table_index = ntohl (mp->table_id);
+ ip_version = mp->ip_version;
+ transport_protocol = mp->transport_protocol;
+ is_add = mp->is_add;
+
+ if (fcm->src_port == 0)
+ {
+ /* call set_ipfix_classify_stream first */
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ goto out;
+ }
+
+ memset (&args, 0, sizeof (args));
+
+ table = 0;
+ int i;
+ for (i = 0; i < vec_len (fcm->tables); i++)
+ if (ipfix_classify_table_index_valid (i))
+ if (fcm->tables[i].classify_table_index == classify_table_index)
+ {
+ table = &fcm->tables[i];
+ break;
+ }
+
+ if (is_add)
+ {
+ if (table)
+ {
+ rv = VNET_API_ERROR_VALUE_EXIST;
+ goto out;
+ }
+ table = ipfix_classify_add_table ();
+ table->classify_table_index = classify_table_index;
+ }
+ else
+ {
+ if (!table)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ }
+
+ table->ip_version = ip_version;
+ table->transport_protocol = transport_protocol;
+
+ args.opaque.as_uword = table - fcm->tables;
+ args.rewrite_callback = ipfix_classify_template_rewrite;
+ args.flow_data_callback = ipfix_classify_send_flows;
+ args.is_add = is_add;
+ args.domain_id = fcm->domain_id;
+ args.src_port = fcm->src_port;
+
+ rv = vnet_flow_report_add_del (frm, &args, NULL);
+
+ /* If deleting, or add failed */
+ if (is_add == 0 || (rv && is_add))
+ ipfix_classify_delete_table (table - fcm->tables);
+
+out:
+ REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY);
+}
+
+static void
+send_ipfix_classify_table_details (u32 table_index,
+ unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ vl_api_ipfix_classify_table_details_t *mp;
+
+ ipfix_classify_table_t *table = &fcm->tables[table_index];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_TABLE_DETAILS);
+ mp->context = context;
+ mp->table_id = htonl (table->classify_table_index);
+ mp->ip_version = table->ip_version;
+ mp->transport_protocol = table->transport_protocol;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+ vl_api_ipfix_classify_table_dump_t_handler
+ (vl_api_ipfix_classify_table_dump_t * mp)
+{
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ unix_shared_memory_queue_t *q;
+ u32 i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ for (i = 0; i < vec_len (fcm->tables); i++)
+ if (ipfix_classify_table_index_valid (i))
+ send_ipfix_classify_table_details (i, q, mp->context);
+}
+
+/*
+ * flow_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_flow;
+#undef _
+}
+
+static clib_error_t *
+flow_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (flow_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/flow/flow_report.c b/src/vnet/flow/flow_report.c
new file mode 100644
index 00000000..ccc84235
--- /dev/null
+++ b/src/vnet/flow/flow_report.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * flow_report.c
+ */
+#include <vnet/flow/flow_report.h>
+#include <vnet/api_errno.h>
+
+flow_report_main_t flow_report_main;
+
+static_always_inline u8 stream_index_valid (u32 index)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ return index < vec_len(frm->streams) &&
+ frm->streams[index].domain_id != ~0;
+}
+
+static_always_inline flow_report_stream_t * add_stream (void)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ u32 i;
+ for (i = 0; i < vec_len(frm->streams); i++)
+ if (!stream_index_valid(i))
+ return &frm->streams[i];
+ u32 index = vec_len(frm->streams);
+ vec_validate(frm->streams, index);
+ return &frm->streams[index];
+}
+
+static_always_inline void delete_stream (u32 index)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ ASSERT (index < vec_len(frm->streams));
+ ASSERT (frm->streams[index].domain_id != ~0);
+ frm->streams[index].domain_id = ~0;
+}
+
+static i32 find_stream (u32 domain_id, u16 src_port)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ flow_report_stream_t * stream;
+ u32 i;
+ for (i = 0; i < vec_len(frm->streams); i++)
+ if (stream_index_valid(i)) {
+ stream = &frm->streams[i];
+ if (domain_id == stream->domain_id) {
+ if (src_port != stream->src_port)
+ return -2;
+ return i;
+ } else if (src_port == stream->src_port) {
+ return -2;
+ }
+ }
+ return -1;
+}
+
+int send_template_packet (flow_report_main_t *frm,
+ flow_report_t *fr,
+ u32 * buffer_indexp)
+{
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip4_ipfix_template_packet_t * tp;
+ ipfix_message_header_t * h;
+ ip4_header_t * ip;
+ udp_header_t * udp;
+ vlib_main_t * vm = frm->vlib_main;
+ flow_report_stream_t * stream;
+ vlib_buffer_free_list_t *fl;
+
+ ASSERT (buffer_indexp);
+
+ if (fr->update_rewrite || fr->rewrite == 0)
+ {
+ if (frm->ipfix_collector.as_u32 == 0
+ || frm->src_address.as_u32 == 0)
+ {
+ vlib_node_set_state (frm->vlib_main, flow_report_process_node.index,
+ VLIB_NODE_STATE_DISABLED);
+ return -1;
+ }
+ vec_free (fr->rewrite);
+ fr->update_rewrite = 1;
+ }
+
+ if (fr->update_rewrite)
+ {
+ fr->rewrite = fr->rewrite_callback (frm, fr,
+ &frm->ipfix_collector,
+ &frm->src_address,
+ frm->collector_port);
+ fr->update_rewrite = 0;
+ }
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return -1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Initialize the buffer */
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ ASSERT (vec_len (fr->rewrite) < VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES);
+
+ clib_memcpy (b0->data, fr->rewrite, vec_len (fr->rewrite));
+ b0->current_data = 0;
+ b0->current_length = vec_len (fr->rewrite);
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+ h = (ipfix_message_header_t *)(udp+1);
+
+ /* FIXUP: message header export_time */
+ h->export_time = (u32)
+ (((f64)frm->unix_time_0) +
+ (vlib_time_now(frm->vlib_main) - frm->vlib_time_0));
+ h->export_time = clib_host_to_net_u32(h->export_time);
+
+ stream = &frm->streams[fr->stream_index];
+
+ /* FIXUP: message header sequence_number. Templates do not increase it */
+ h->sequence_number = clib_host_to_net_u32(stream->sequence_number);
+
+ /* FIXUP: udp length */
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ /* RFC 7011 section 10.3.2. */
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ *buffer_indexp = bi0;
+
+ fr->last_template_sent = vlib_time_now (vm);
+
+ return 0;
+}
+
+static uword
+flow_report_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ flow_report_t * fr;
+ u32 ip4_lookup_node_index;
+ vlib_node_t * ip4_lookup_node;
+ vlib_frame_t * nf = 0;
+ u32 template_bi;
+ u32 * to_next;
+ int send_template;
+ f64 now;
+ int rv;
+ uword event_type;
+ uword *event_data = 0;
+
+ /* Wait for Godot... */
+ vlib_process_wait_for_event_or_clock (vm, 1e9);
+ event_type = vlib_process_get_events (vm, &event_data);
+ if (event_type != 1)
+ clib_warning ("bogus kickoff event received, %d", event_type);
+ vec_reset_length (event_data);
+
+ /* Enqueue pkts to ip4-lookup */
+ ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
+ ip4_lookup_node_index = ip4_lookup_node->index;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 5.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ vec_foreach (fr, frm->reports)
+ {
+ now = vlib_time_now (vm);
+
+ /* Need to send a template packet? */
+ send_template =
+ now > (fr->last_template_sent + frm->template_interval);
+ send_template += fr->last_template_sent == 0;
+ template_bi = ~0;
+ rv = 0;
+
+ if (send_template)
+ rv = send_template_packet (frm, fr, &template_bi);
+
+ if (rv < 0)
+ continue;
+
+ nf = vlib_get_frame_to_node (vm, ip4_lookup_node_index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+
+ if (template_bi != ~0)
+ {
+ to_next[0] = template_bi;
+ to_next++;
+ nf->n_vectors++;
+ }
+
+ nf = fr->flow_data_callback (frm, fr,
+ nf, to_next, ip4_lookup_node_index);
+ if (nf)
+ vlib_put_frame_to_node (vm, ip4_lookup_node_index, nf);
+ }
+ }
+
+ return 0; /* not so much */
+}
+
+VLIB_REGISTER_NODE (flow_report_process_node) = {
+ .function = flow_report_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "flow-report-process",
+};
+
+int vnet_flow_report_add_del (flow_report_main_t *frm,
+ vnet_flow_report_add_del_args_t *a,
+ u16 *template_id)
+{
+ int i;
+ int found_index = ~0;
+ flow_report_t *fr;
+ flow_report_stream_t * stream;
+ u32 si;
+
+ si = find_stream(a->domain_id, a->src_port);
+ if (si == -2)
+ return VNET_API_ERROR_INVALID_VALUE;
+ if (si == -1 && a->is_add == 0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ for (i = 0; i < vec_len(frm->reports); i++)
+ {
+ fr = vec_elt_at_index (frm->reports, i);
+ if (fr->opaque.as_uword == a->opaque.as_uword
+ && fr->rewrite_callback == a->rewrite_callback
+ && fr->flow_data_callback == a->flow_data_callback)
+ {
+ found_index = i;
+ if (template_id)
+ *template_id = fr->template_id;
+ break;
+ }
+ }
+
+ if (a->is_add == 0)
+ {
+ if (found_index != ~0)
+ {
+ vec_delete (frm->reports, 1, found_index);
+ stream = &frm->streams[si];
+ stream->n_reports--;
+ if (stream->n_reports == 0)
+ delete_stream(si);
+ return 0;
+ }
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ if (found_index != ~0)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ if (si == -1)
+ {
+ stream = add_stream();
+ stream->domain_id = a->domain_id;
+ stream->src_port = a->src_port;
+ stream->sequence_number = 0;
+ stream->n_reports = 0;
+ si = stream - frm->streams;
+ }
+ else
+ stream = &frm->streams[si];
+
+ stream->n_reports++;
+
+ vec_add2 (frm->reports, fr, 1);
+
+ fr->stream_index = si;
+ fr->template_id = 256 + stream->next_template_no;
+ stream->next_template_no = (stream->next_template_no + 1) % (65536 - 256);
+ fr->update_rewrite = 1;
+ fr->opaque = a->opaque;
+ fr->rewrite_callback = a->rewrite_callback;
+ fr->flow_data_callback = a->flow_data_callback;
+
+ if (template_id)
+ *template_id = fr->template_id;
+
+ return 0;
+}
+
+clib_error_t * flow_report_add_del_error_to_clib_error (int error)
+{
+ switch (error)
+ {
+ case 0:
+ return 0;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "Flow report not found");
+ case VNET_API_ERROR_VALUE_EXIST:
+ return clib_error_return (0, "Flow report already exists");
+ case VNET_API_ERROR_INVALID_VALUE:
+ return clib_error_return (0, "Expecting either still unused values "
+ "for both domain_id and src_port "
+ "or already used values for both fields");
+ default:
+ return clib_error_return (0, "vnet_flow_report_add_del returned %d",
+ error);
+ }
+}
+
+void vnet_flow_reports_reset (flow_report_main_t * frm)
+{
+ flow_report_t *fr;
+ u32 i;
+
+ for (i = 0; i < vec_len(frm->streams); i++)
+ if (stream_index_valid(i))
+ frm->streams[i].sequence_number = 0;
+
+ vec_foreach (fr, frm->reports)
+ {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
+}
+
+void vnet_stream_reset (flow_report_main_t * frm, u32 stream_index)
+{
+ flow_report_t *fr;
+
+ frm->streams[stream_index].sequence_number = 0;
+
+ vec_foreach (fr, frm->reports)
+ if (frm->reports->stream_index == stream_index) {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
+}
+
+int vnet_stream_change (flow_report_main_t * frm,
+ u32 old_domain_id, u16 old_src_port,
+ u32 new_domain_id, u16 new_src_port)
+{
+ i32 stream_index = find_stream (old_domain_id, old_src_port);
+ if (stream_index < 0)
+ return 1;
+ flow_report_stream_t * stream = &frm->streams[stream_index];
+ stream->domain_id = new_domain_id;
+ stream->src_port = new_src_port;
+ if (old_domain_id != new_domain_id || old_src_port != new_src_port)
+ vnet_stream_reset (frm, stream_index);
+ return 0;
+}
+
+static clib_error_t *
+set_ipfix_exporter_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flow_report_main_t * frm = &flow_report_main;
+ ip4_address_t collector, src;
+ u16 collector_port = UDP_DST_PORT_ipfix;
+ u32 fib_id;
+ u32 fib_index = ~0;
+
+ collector.as_u32 = 0;
+ src.as_u32 = 0;
+ u32 path_mtu = 512; // RFC 7011 section 10.3.3.
+ u32 template_interval = 20;
+ u8 udp_checksum = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ ;
+ else if (unformat (input, "port %u", &collector_port))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "fib-id %u", &fib_id))
+ {
+ ip4_main_t * im = &ip4_main;
+ uword * p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (! p)
+ return clib_error_return (0, "fib ID %d doesn't exist\n",
+ fib_id);
+ fib_index = p[0];
+ }
+ else if (unformat (input, "path-mtu %u", &path_mtu))
+ ;
+ else if (unformat (input, "template-interval %u", &template_interval))
+ ;
+ else if (unformat (input, "udp-checksum"))
+ udp_checksum = 1;
+ else
+ break;
+ }
+
+ if (collector.as_u32 != 0 && src.as_u32 == 0)
+ return clib_error_return (0, "src address required");
+
+ if (path_mtu > 1450 /* vpp does not support fragmentation */)
+ return clib_error_return (0, "too big path-mtu value, maximum is 1450");
+
+ if (path_mtu < 68)
+ return clib_error_return (0, "too small path-mtu value, minimum is 68");
+
+ /* Reset report streams if we are reconfiguring IP addresses */
+ if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
+ frm->src_address.as_u32 != src.as_u32 ||
+ frm->collector_port != collector_port)
+ vnet_flow_reports_reset(frm);
+
+ frm->ipfix_collector.as_u32 = collector.as_u32;
+ frm->collector_port = collector_port;
+ frm->src_address.as_u32 = src.as_u32;
+ frm->fib_index = fib_index;
+ frm->path_mtu = path_mtu;
+ frm->template_interval = template_interval;
+ frm->udp_checksum = udp_checksum;
+
+ if (collector.as_u32)
+ vlib_cli_output (vm, "Collector %U, src address %U, "
+ "fib index %d, path MTU %u, "
+ "template resend interval %us, "
+ "udp checksum %s",
+ format_ip4_address, &frm->ipfix_collector,
+ format_ip4_address, &frm->src_address,
+ fib_index, path_mtu, template_interval,
+ udp_checksum ? "enabled" : "disabled");
+ else
+ vlib_cli_output (vm, "IPFIX Collector is disabled");
+
+ /* Turn on the flow reporting process */
+ vlib_process_signal_event (vm, flow_report_process_node.index,
+ 1, 0);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
+ .path = "set ipfix exporter",
+ .short_help = "set ipfix exporter "
+ "collector <ip4-address> [port <port>] "
+ "src <ip4-address> [fib-id <fib-id>] "
+ "[path-mtu <path-mtu>] "
+ "[template-interval <template-interval>]",
+ "[udp-checksum]",
+ .function = set_ipfix_exporter_command_fn,
+};
+
+
+static clib_error_t *
+ipfix_flush_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ /* poke the flow reporting process */
+ vlib_process_signal_event (vm, flow_report_process_node.index,
+ 1, 0);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ipfix_flush_command, static) = {
+ .path = "ipfix flush",
+ .short_help = "flush the current ipfix data [for make test]",
+ .function = ipfix_flush_command_fn,
+};
+
+static clib_error_t *
+flow_report_init (vlib_main_t *vm)
+{
+ flow_report_main_t * frm = &flow_report_main;
+
+ frm->vlib_main = vm;
+ frm->vnet_main = vnet_get_main();
+ frm->unix_time_0 = time(0);
+ frm->vlib_time_0 = vlib_time_now(frm->vlib_main);
+ frm->fib_index = ~0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (flow_report_init)
diff --git a/src/vnet/flow/flow_report.h b/src/vnet/flow/flow_report.h
new file mode 100644
index 00000000..01859ce5
--- /dev/null
+++ b/src/vnet/flow/flow_report.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vnet_flow_report_h__
+#define __included_vnet_flow_report_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp.h>
+#include <vlib/cli.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/flow/ipfix_packet.h>
+
+/* Used to build the rewrite */
+typedef struct {
+ ip4_header_t ip4;
+ udp_header_t udp;
+ ipfix_template_packet_t ipfix;
+} ip4_ipfix_template_packet_t;
+
+struct flow_report_main;
+struct flow_report;
+
+typedef u8 * (vnet_flow_rewrite_callback_t)(struct flow_report_main *,
+ struct flow_report *,
+ ip4_address_t *,
+ ip4_address_t *,
+ u16);
+
+typedef vlib_frame_t * (vnet_flow_data_callback_t) (struct flow_report_main *,
+ struct flow_report *,
+ vlib_frame_t *, u32 *,
+ u32);
+
+typedef union {
+ void * as_ptr;
+ uword as_uword;
+} opaque_t;
+
+typedef struct {
+ u32 domain_id;
+ u32 sequence_number;
+ u16 src_port;
+ u16 n_reports;
+ u16 next_template_no;
+} flow_report_stream_t;
+
+typedef struct flow_report {
+ /* ipfix rewrite, set by callback */
+ u8 * rewrite;
+ u16 template_id;
+ u32 stream_index;
+ f64 last_template_sent;
+ int update_rewrite;
+
+ /* Bitmap of fields to send */
+ uword * fields_to_send;
+
+ /* Opaque data */
+ opaque_t opaque;
+
+ /* build-the-rewrite callback */
+ vnet_flow_rewrite_callback_t *rewrite_callback;
+
+ /* Send-flow-data callback */
+ vnet_flow_data_callback_t *flow_data_callback;
+} flow_report_t;
+
+typedef struct flow_report_main {
+ flow_report_t * reports;
+ flow_report_stream_t * streams;
+
+ /* ipfix collector ip address, port, our ip address, fib index */
+ ip4_address_t ipfix_collector;
+ u16 collector_port;
+ ip4_address_t src_address;
+ u32 fib_index;
+
+ /* Path MTU */
+ u32 path_mtu;
+
+ /* time interval in seconds after which to resend templates */
+ u32 template_interval;
+
+ /* UDP checksum calculation enable flag */
+ u8 udp_checksum;
+
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} flow_report_main_t;
+
+extern flow_report_main_t flow_report_main;
+
+extern vlib_node_registration_t flow_report_process_node;
+
+int vnet_flow_report_enable_disable (u32 sw_if_index, u32 table_index,
+ int enable_disable);
+typedef struct {
+ vnet_flow_data_callback_t *flow_data_callback;
+ vnet_flow_rewrite_callback_t *rewrite_callback;
+ opaque_t opaque;
+ int is_add;
+ u32 domain_id;
+ u16 src_port;
+} vnet_flow_report_add_del_args_t;
+
+int vnet_flow_report_add_del (flow_report_main_t *frm,
+ vnet_flow_report_add_del_args_t *a,
+ u16 *template_id);
+
+clib_error_t * flow_report_add_del_error_to_clib_error (int error);
+
+void vnet_flow_reports_reset (flow_report_main_t * frm);
+
+void vnet_stream_reset (flow_report_main_t * frm, u32 stream_index);
+
+int vnet_stream_change (flow_report_main_t * frm,
+ u32 old_domain_id, u16 old_src_port,
+ u32 new_domain_id, u16 new_src_port);
+
+#endif /* __included_vnet_flow_report_h__ */
diff --git a/src/vnet/flow/flow_report_classify.c b/src/vnet/flow/flow_report_classify.c
new file mode 100644
index 00000000..d4c30492
--- /dev/null
+++ b/src/vnet/flow/flow_report_classify.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/flow/flow_report.h>
+#include <vnet/flow/flow_report_classify.h>
+#include <vnet/api_errno.h>
+
+/* Common prefix of tcp and udp headers
+ * containing only source and destination port fields */
+typedef struct {
+ u16 src_port, dst_port;
+} tcpudp_header_t;
+
+flow_report_classify_main_t flow_report_classify_main;
+
+u8 * ipfix_classify_template_rewrite (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port)
+{
+ flow_report_classify_main_t * fcm = &flow_report_classify_main;
+ vnet_classify_table_t * tblp;
+ vnet_classify_main_t * vcm = &vnet_classify_main;
+ u32 flow_table_index = fr->opaque.as_uword;
+ u8 * ip_start;
+ ip4_header_t * ip;
+ ip6_header_t * ip6;
+ tcpudp_header_t * tcpudp;
+ udp_header_t * udp;
+ ipfix_message_header_t * h;
+ ipfix_set_header_t * s;
+ ipfix_template_header_t * t;
+ ipfix_field_specifier_t * f;
+ ipfix_field_specifier_t * first_field;
+ u8 * rewrite = 0;
+ ip4_ipfix_template_packet_t * tp;
+ u32 field_count = 0;
+ u32 field_index = 0;
+ flow_report_stream_t * stream;
+ u8 ip_version;
+ u8 transport_protocol;
+ u8 * virt_mask;
+ u8 * real_mask;
+
+ stream = &frm->streams[fr->stream_index];
+
+ ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
+
+ ip_version = table->ip_version;
+ transport_protocol = table->transport_protocol;
+
+ tblp = pool_elt_at_index (vcm->tables, table->classify_table_index);
+
+ virt_mask = (u8 *)(tblp->mask - tblp->skip_n_vectors);
+ real_mask = (u8 *)(tblp->mask);
+
+ /* Determine field count */
+ ip_start = virt_mask + sizeof(ethernet_header_t);
+#define _(field,mask,item,length) \
+ if (((u8 *)&field >= real_mask) && (memcmp(&field, &mask, length) == 0)) \
+ { \
+ field_count++; \
+ \
+ fr->fields_to_send = clib_bitmap_set (fr->fields_to_send, \
+ field_index, 1); \
+ } \
+ field_index++;
+ foreach_ipfix_field;
+#undef _
+
+ /* Add packetTotalCount manually */
+ field_count += 1;
+
+ /* $$$ enterprise fields, at some later date */
+
+ /* allocate rewrite space */
+ vec_validate_aligned (rewrite,
+ sizeof (ip4_ipfix_template_packet_t)
+ + field_count * sizeof (ipfix_field_specifier_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_template_packet_t *) rewrite;
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+ h = (ipfix_message_header_t *)(udp+1);
+ s = (ipfix_set_header_t *)(h+1);
+ t = (ipfix_template_header_t *)(s+1);
+ first_field = f = (ipfix_field_specifier_t *)(t+1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (collector_port);
+ udp->length = clib_host_to_net_u16 (vec_len(rewrite) - sizeof (*ip));
+
+ /* FIXUP: message header export_time */
+ /* FIXUP: message header sequence_number */
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /* Take another trip through the mask and build the template */
+ ip_start = virt_mask + sizeof(ethernet_header_t);
+#define _(field,mask,item,length) \
+ if (((u8 *)&field >= real_mask) && (memcmp(&field, &mask, length) == 0)) \
+ { \
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */, \
+ item, length); \
+ f++; \
+ }
+ foreach_ipfix_field;
+#undef _
+
+ /* Add packetTotalCount manually */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */, packetTotalCount, 8);
+ f++;
+
+ /* Back to the template packet... */
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+
+ ASSERT (f - first_field);
+ /* Field count in this template */
+ t->id_count = ipfix_id_count (fr->template_id, f - first_field);
+
+ /* set length in octets*/
+ s->set_id_length = ipfix_set_id_length (2 /* set_id */, (u8 *) f - (u8 *)s);
+
+ /* message length in octets */
+ h->version_length = version_length ((u8 *)f - (u8 *)h);
+
+ ip->length = clib_host_to_net_u16 ((u8 *)f - (u8 *)ip);
+ ip->checksum = ip4_header_checksum (ip);
+
+ return rewrite;
+}
+
+vlib_frame_t * ipfix_classify_send_flows (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next,
+ u32 node_index)
+{
+ flow_report_classify_main_t * fcm = &flow_report_classify_main;
+ vnet_classify_main_t * vcm = &vnet_classify_main;
+ u32 flow_table_index = fr->opaque.as_uword;
+ vnet_classify_table_t * t;
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * v, * save_v;
+ vlib_buffer_t *b0 = 0;
+ u32 next_offset = 0;
+ u32 record_offset = 0;
+ u32 bi0 = ~0;
+ int i, j, k;
+ ip4_ipfix_template_packet_t * tp;
+ ipfix_message_header_t * h = 0;
+ ipfix_set_header_t * s = 0;
+ u8 * ip_start;
+ ip4_header_t * ip;
+ ip6_header_t * ip6;
+ tcpudp_header_t * tcpudp;
+ udp_header_t * udp;
+ int field_index;
+ u32 records_this_buffer;
+ u16 new_l0, old_l0;
+ ip_csum_t sum0;
+ vlib_main_t * vm = frm->vlib_main;
+ flow_report_stream_t * stream;
+ u8 ip_version;
+ u8 transport_protocol;
+ u8 * virt_key;
+
+ stream = &frm->streams[fr->stream_index];
+
+ ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
+
+ ip_version = table->ip_version;
+ transport_protocol = table->transport_protocol;
+
+ t = pool_elt_at_index (vcm->tables, table->classify_table_index);
+
+ while (__sync_lock_test_and_set (t->writer_lock, 1))
+ ;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ continue;
+
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+ v = vnet_classify_entry_at_index
+ (t, save_v, j*t->entries_per_page + k);
+
+ if (vnet_classify_entry_is_free (v))
+ continue;
+
+ /* OK, we have something to send... */
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ goto flush;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ u32 copy_len = sizeof(ip4_header_t) +
+ sizeof(udp_header_t) +
+ sizeof(ipfix_message_header_t);
+ clib_memcpy (b0->data, fr->rewrite, copy_len);
+ b0->current_data = 0;
+ b0->current_length = copy_len;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+ h = (ipfix_message_header_t *)(udp+1);
+ s = (ipfix_set_header_t *)(h+1);
+
+ /* FIXUP: message header export_time */
+ h->export_time = (u32)
+ (((f64)frm->unix_time_0) +
+ (vlib_time_now(frm->vlib_main) - frm->vlib_time_0));
+ h->export_time = clib_host_to_net_u32(h->export_time);
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = stream->sequence_number;
+ h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
+
+ next_offset = (u32) (((u8 *)(s+1)) - (u8 *)tp);
+ record_offset = next_offset;
+ records_this_buffer = 0;
+ }
+
+ field_index = 0;
+ virt_key = (u8 *)(v->key - t->skip_n_vectors);
+ ip_start = virt_key + sizeof(ethernet_header_t);
+#define _(field,mask,item,length) \
+ if (clib_bitmap_get (fr->fields_to_send, field_index)) \
+ { \
+ clib_memcpy (b0->data + next_offset, &field, \
+ length); \
+ next_offset += length; \
+ } \
+ field_index++;
+ foreach_ipfix_field;
+#undef _
+
+ /* Add packetTotalCount manually */
+ {
+ u64 packets = clib_host_to_net_u64 (v->hits);
+ clib_memcpy (b0->data + next_offset, &packets, sizeof (packets));
+ next_offset += sizeof (packets);
+ }
+ records_this_buffer++;
+ stream->sequence_number++;
+
+ /* Next record will have the same size as this record */
+ u32 next_record_size = next_offset - record_offset;
+ record_offset = next_offset;
+
+ if (next_offset + next_record_size > frm->path_mtu)
+ {
+ s->set_id_length = ipfix_set_id_length (fr->template_id,
+ next_offset -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length = version_length (next_offset -
+ (sizeof (*ip) + sizeof (*udp)));
+ b0->current_length = next_offset;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 =
+ clib_host_to_net_u16 ((u16)next_offset);
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ /* RFC 7011 section 10.3.2. */
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ to_next[0] = bi0;
+ f->n_vectors++;
+ to_next++;
+
+ if (f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, node_index, f);
+ f = vlib_get_frame_to_node (vm, node_index);
+ f->n_vectors = 0;
+ to_next = vlib_frame_vector_args (f);
+ }
+ b0 = 0;
+ bi0 = ~0;
+ }
+ }
+ }
+ }
+
+ flush:
+ if (b0)
+ {
+ s->set_id_length = ipfix_set_id_length (fr->template_id,
+ next_offset -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length = version_length (next_offset -
+ (sizeof (*ip) + sizeof (*udp)));
+ b0->current_length = next_offset;
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) &tp->ip4;
+ udp = (udp_header_t *) (ip+1);
+
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16)next_offset);
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ /* RFC 7011 section 10.3.2. */
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+
+ to_next[0] = bi0;
+ f->n_vectors++;
+
+ b0 = 0;
+ bi0 = ~0;
+ }
+
+ *(t->writer_lock) = 0;
+ return f;
+}
+
+static clib_error_t *
+ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vnet_flow_report_add_del_args_t args;
+ ipfix_classify_table_t * table;
+ int rv;
+ int is_add = -1;
+ u32 classify_table_index = ~0;
+ u8 ip_version = 0;
+ u8 transport_protocol = 255;
+ clib_error_t * error = 0;
+
+ if (fcm->src_port == 0)
+ clib_error_return (0, "call 'set ipfix classify stream' first");
+
+ memset (&args, 0, sizeof (args));
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "%d", &classify_table_index))
+ ;
+ else if (unformat (input, "ip4"))
+ ip_version = 4;
+ else if (unformat (input, "ip6"))
+ ip_version = 6;
+ else if (unformat (input, "tcp"))
+ transport_protocol = 6;
+ else if (unformat (input, "udp"))
+ transport_protocol = 17;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_add == -1)
+ return clib_error_return (0, "expecting: add|del");
+ if (classify_table_index == ~0)
+ return clib_error_return (0, "classifier table not specified");
+ if (ip_version == 0)
+ return clib_error_return (0, "IP version not specified");
+
+ table = 0;
+ int i;
+ for (i = 0; i < vec_len(fcm->tables); i++)
+ if (ipfix_classify_table_index_valid(i))
+ if (fcm->tables[i].classify_table_index == classify_table_index) {
+ table = &fcm->tables[i];
+ break;
+ }
+
+ if (is_add) {
+ if (table)
+ return clib_error_return (0, "Specified classifier table already used");
+ table = ipfix_classify_add_table();
+ table->classify_table_index = classify_table_index;
+ } else {
+ if (!table)
+ return clib_error_return (0, "Specified classifier table not registered");
+ }
+
+ table->ip_version = ip_version;
+ table->transport_protocol = transport_protocol;
+
+ args.opaque.as_uword = table - fcm->tables;
+ args.rewrite_callback = ipfix_classify_template_rewrite;
+ args.flow_data_callback = ipfix_classify_send_flows;
+ args.is_add = is_add;
+ args.domain_id = fcm->domain_id;
+ args.src_port = fcm->src_port;
+
+ rv = vnet_flow_report_add_del (frm, &args, NULL);
+
+ error = flow_report_add_del_error_to_clib_error(rv);
+
+ /* If deleting, or add failed */
+ if (is_add == 0 || (rv && is_add))
+ ipfix_classify_delete_table (table - fcm->tables);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
+ .path = "ipfix classify table",
+ .short_help = "ipfix classify table add|del <table-index>",
+ .function = ipfix_classify_table_add_del_command_fn,
+};
+
+static clib_error_t *
+set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ flow_report_classify_main_t *fcm = &flow_report_classify_main;
+ flow_report_main_t *frm = &flow_report_main;
+ u32 domain_id = 1;
+ u32 src_port = UDP_DST_PORT_ipfix;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "domain %d", &domain_id))
+ ;
+ else if (unformat (input, "src-port %d", &src_port))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (fcm->src_port != 0 &&
+ (fcm->domain_id != domain_id ||
+ fcm->src_port != (u16)src_port)) {
+ int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ domain_id, (u16)src_port);
+ ASSERT (rv == 0);
+ }
+
+ fcm->domain_id = domain_id;
+ fcm->src_port = (u16)src_port;
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
+ .path = "set ipfix classify stream",
+ .short_help = "set ipfix classify stream"
+ "[domain <domain-id>] [src-port <src-port>]",
+ .function = set_ipfix_classify_stream_command_fn,
+};
+
+static clib_error_t *
+flow_report_classify_init (vlib_main_t *vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, flow_report_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (flow_report_classify_init);
diff --git a/src/vnet/flow/flow_report_classify.h b/src/vnet/flow/flow_report_classify.h
new file mode 100644
index 00000000..77d98b58
--- /dev/null
+++ b/src/vnet/flow/flow_report_classify.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_flow_report_classify_h__
+#define __included_flow_report_classify_h__
+
+#define foreach_ipfix_ip4_field \
+_(ip->src_address.as_u32, ((u32[]){0xFFFFFFFF}), sourceIPv4Address, 4) \
+_(ip->dst_address.as_u32, ((u32[]){0xFFFFFFFF}), destinationIPv4Address, 4) \
+_(ip->protocol, ((u8[]){0xFF}), protocolIdentifier, 1)
+
+#define foreach_ipfix_ip6_field \
+_(ip6->src_address.as_u8, \
+ ((u32[]){0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}), \
+ sourceIPv6Address, 16) \
+_(ip6->dst_address.as_u8, \
+ ((u32[]){0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}), \
+ destinationIPv6Address, 16) \
+_(ip6->protocol, ((u8[]){0xFF}), protocolIdentifier, 1)
+
+#define foreach_ipfix_tcpudp_field \
+_(tcpudp->src_port, ((u16[]){0xFFFF}), sourceTransportPort, 2) \
+_(tcpudp->dst_port, ((u16[]){0xFFFF}), destinationTransportPort, 2)
+
+#define foreach_ipfix_tcp_field \
+_(tcpudp->src_port, ((u16[]){0xFFFF}), tcpSourcePort, 2) \
+_(tcpudp->dst_port, ((u16[]){0xFFFF}), tcpDestinationPort, 2)
+
+#define foreach_ipfix_udp_field \
+_(tcpudp->src_port, ((u16[]){0xFFFF}), udpSourcePort, 2) \
+_(tcpudp->dst_port, ((u16[]){0xFFFF}), udpDestinationPort, 2)
+
+#define foreach_ipfix_transport_protocol_field \
+ switch (transport_protocol) { \
+ case 255: \
+ foreach_ipfix_tcpudp_field; \
+ break; \
+ case 6: \
+ foreach_ipfix_tcp_field; \
+ break; \
+ case 17: \
+ foreach_ipfix_udp_field; \
+ break; \
+ }
+
+#define foreach_ipfix_field \
+ if (ip_version == 4) { \
+ ip = (ip4_header_t *)ip_start; \
+ tcpudp = (tcpudp_header_t *)(ip+1); \
+ foreach_ipfix_ip4_field; \
+ } else { \
+ ip6 = (ip6_header_t *)ip_start; \
+ tcpudp = (tcpudp_header_t *)(ip6+1); \
+ foreach_ipfix_ip6_field; \
+ } \
+ foreach_ipfix_transport_protocol_field
+
+typedef struct {
+ u32 classify_table_index;
+ u8 ip_version;
+ u8 transport_protocol;
+} ipfix_classify_table_t;
+
+typedef struct {
+ u32 domain_id;
+ u16 src_port;
+ ipfix_classify_table_t * tables;
+} flow_report_classify_main_t;
+
+extern flow_report_classify_main_t flow_report_classify_main;
+
+static_always_inline u8 ipfix_classify_table_index_valid (u32 index)
+{
+ flow_report_classify_main_t * fcm = &flow_report_classify_main;
+ return index < vec_len(fcm->tables) &&
+ fcm->tables[index].classify_table_index != ~0;
+}
+
+static_always_inline ipfix_classify_table_t * ipfix_classify_add_table (void)
+{
+ flow_report_classify_main_t * fcm = &flow_report_classify_main;
+ u32 i;
+ for (i = 0; i < vec_len(fcm->tables); i++)
+ if (!ipfix_classify_table_index_valid(i))
+ return &fcm->tables[i];
+ u32 index = vec_len(fcm->tables);
+ vec_validate(fcm->tables, index);
+ return &fcm->tables[index];
+}
+
+static_always_inline void ipfix_classify_delete_table (u32 index)
+{
+ flow_report_classify_main_t * fcm = &flow_report_classify_main;
+ ASSERT (index < vec_len(fcm->tables));
+ ASSERT (fcm->tables[index].classify_table_index != ~0);
+ fcm->tables[index].classify_table_index = ~0;
+}
+
+u8 * ipfix_classify_template_rewrite (flow_report_main_t * frm,
+ flow_report_t * fr,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address,
+ u16 collector_port);
+
+vlib_frame_t * ipfix_classify_send_flows (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next,
+ u32 node_index);
+
+#endif /* __included_flow_report_classify_h__ */
diff --git a/src/vnet/flow/ipfix_info_elements.h b/src/vnet/flow/ipfix_info_elements.h
new file mode 100644
index 00000000..1403db43
--- /dev/null
+++ b/src/vnet/flow/ipfix_info_elements.h
@@ -0,0 +1,430 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ipfix_info_elements_h__
+#define __included_ipfix_info_elements_h__
+
+#define foreach_ipfix_info_element_t \
+_(octetDeltaCount, 1, u64) \
+_(packetDeltaCount, 2, u64) \
+_(deltaFlowCount, 3, u64) \
+_(protocolIdentifier, 4, u8) \
+_(ipClassOfService, 5, u8) \
+_(tcpControlBits, 6, u16) \
+_(sourceTransportPort, 7, u16) \
+_(sourceIPv4Address, 8, ip4_address_t) \
+_(sourceIPv4PrefixLength, 9, u8) \
+_(ingressInterface, 10, u32) \
+_(destinationTransportPort, 11, u16) \
+_(destinationIPv4Address, 12, ip4_address_t) \
+_(destinationIPv4PrefixLength, 13, u8) \
+_(egressInterface, 14, u32) \
+_(ipNextHopIPv4Address, 15, ip4_address_t) \
+_(bgpSourceAsNumber, 16, u32) \
+_(bgpDestinationAsNumber, 17, u32) \
+_(bgpNextHopIPv4Address, 18, ip4_address_t) \
+_(postMCastPacketDeltaCount, 19, u64) \
+_(postMCastOctetDeltaCount, 20, u64) \
+_(flowEndSysUpTime, 21, u32) \
+_(flowStartSysUpTime, 22, u32) \
+_(postOctetDeltaCount, 23, u64) \
+_(postPacketDeltaCount, 24, u64) \
+_(minimumIpTotalLength, 25, u64) \
+_(maximumIpTotalLength, 26, u64) \
+_(sourceIPv6Address, 27, ip6_address_t) \
+_(destinationIPv6Address, 28, ip6_address_t) \
+_(sourceIPv6PrefixLength, 29, u8) \
+_(destinationIPv6PrefixLength, 30, u8) \
+_(flowLabelIPv6, 31, u32) \
+_(icmpTypeCodeIPv4, 32, u16) \
+_(igmpType, 33, u8) \
+_(samplingInterval, 34, u32) \
+_(samplingAlgorithm, 35, u8) \
+_(flowActiveTimeout, 36, u16) \
+_(flowIdleTimeout, 37, u16) \
+_(engineType, 38, u8) \
+_(engineId, 39, u8) \
+_(exportedOctetTotalCount, 40, u64) \
+_(exportedMessageTotalCount, 41, u64) \
+_(exportedFlowRecordTotalCount, 42, u64) \
+_(ipv4RouterSc, 43, ip4_address_t) \
+_(sourceIPv4Prefix, 44, ip4_address_t) \
+_(destinationIPv4Prefix, 45, ip4_address_t) \
+_(mplsTopLabelType, 46, u8) \
+_(mplsTopLabelIPv4Address, 47, ip4_address_t) \
+_(samplerId, 48, u8) \
+_(samplerMode, 49, u8) \
+_(samplerRandomInterval, 50, u32) \
+_(classId, 51, u8) \
+_(minimumTTL, 52, u8) \
+_(maximumTTL, 53, u8) \
+_(fragmentIdentification, 54, u32) \
+_(postIpClassOfService, 55, u8) \
+_(sourceMacAddress, 56, macAddress) \
+_(postDestinationMacAddress, 57, macAddress) \
+_(vlanId, 58, u16) \
+_(postVlanId, 59, u16) \
+_(ipVersion, 60, u8) \
+_(flowDirection, 61, u8) \
+_(ipNextHopIPv6Address, 62, ip6_address_t) \
+_(bgpNextHopIPv6Address, 63, ip6_address_t) \
+_(ipv6ExtensionHeaders, 64, u32) \
+_(mplsTopLabelStackSection, 70, octetArray) \
+_(mplsLabelStackSection2, 71, octetArray) \
+_(mplsLabelStackSection3, 72, octetArray) \
+_(mplsLabelStackSection4, 73, octetArray) \
+_(mplsLabelStackSection5, 74, octetArray) \
+_(mplsLabelStackSection6, 75, octetArray) \
+_(mplsLabelStackSection7, 76, octetArray) \
+_(mplsLabelStackSection8, 77, octetArray) \
+_(mplsLabelStackSection9, 78, octetArray) \
+_(mplsLabelStackSection10, 79, octetArray) \
+_(destinationMacAddress, 80, macAddress) \
+_(postSourceMacAddress, 81, macAddress) \
+_(interfaceName, 82, string) \
+_(interfaceDescription, 83, string) \
+_(samplerName, 84, string) \
+_(octetTotalCount, 85, u64) \
+_(packetTotalCount, 86, u64) \
+_(flagsAndSamplerId, 87, u32) \
+_(fragmentOffset, 88, u16) \
+_(forwardingStatus, 89, u32) \
+_(mplsVpnRouteDistinguisher, 90, octetArray) \
+_(mplsTopLabelPrefixLength, 91, u8) \
+_(srcTrafficIndex, 92, u32) \
+_(dstTrafficIndex, 93, u32) \
+_(applicationDescription, 94, string) \
+_(applicationId, 95, octetArray) \
+_(applicationName, 96, string) \
+_(Assigned, 97, for NetFlow v9 compatibility ) \
+_(postIpDiffServCodePoint, 98, u8) \
+_(multicastReplicationFactor, 99, u32) \
+_(className, 100, string) \
+_(classificationEngineId, 101, u8) \
+_(layer2packetSectionOffset, 102, u16) \
+_(layer2packetSectionSize, 103, u16) \
+_(layer2packetSectionData, 104, octetArray) \
+_(bgpNextAdjacentAsNumber, 128, u32) \
+_(bgpPrevAdjacentAsNumber, 129, u32) \
+_(exporterIPv4Address, 130, ip4_address_t) \
+_(exporterIPv6Address, 131, ip6_address_t) \
+_(droppedOctetDeltaCount, 132, u64) \
+_(droppedPacketDeltaCount, 133, u64) \
+_(droppedOctetTotalCount, 134, u64) \
+_(droppedPacketTotalCount, 135, u64) \
+_(flowEndReason, 136, u8) \
+_(commonPropertiesId, 137, u64) \
+_(observationPointId, 138, u64) \
+_(icmpTypeCodeIPv6, 139, u16) \
+_(mplsTopLabelIPv6Address, 140, ip6_address_t) \
+_(lineCardId, 141, u32) \
+_(portId, 142, u32) \
+_(meteringProcessId, 143, u32) \
+_(exportingProcessId, 144, u32) \
+_(templateId, 145, u16) \
+_(wlanChannelId, 146, u8) \
+_(wlanSSID, 147, string) \
+_(flowId, 148, u64) \
+_(observationDomainId, 149, u32) \
+_(flowStartSeconds, 150, dateTimeSeconds) \
+_(flowEndSeconds, 151, dateTimeSeconds) \
+_(flowStartMilliseconds, 152, dateTimeMilliseconds) \
+_(flowEndMilliseconds, 153, dateTimeMilliseconds) \
+_(flowStartMicroseconds, 154, dateTimeMicroseconds) \
+_(flowEndMicroseconds, 155, dateTimeMicroseconds) \
+_(flowStartNanoseconds, 156, dateTimeNanoseconds) \
+_(flowEndNanoseconds, 157, dateTimeNanoseconds) \
+_(flowStartDeltaMicroseconds, 158, u32) \
+_(flowEndDeltaMicroseconds, 159, u32) \
+_(systemInitTimeMilliseconds, 160, dateTimeMilliseconds) \
+_(flowDurationMilliseconds, 161, u32) \
+_(flowDurationMicroseconds, 162, u32) \
+_(observedFlowTotalCount, 163, u64) \
+_(ignoredPacketTotalCount, 164, u64) \
+_(ignoredOctetTotalCount, 165, u64) \
+_(notSentFlowTotalCount, 166, u64) \
+_(notSentPacketTotalCount, 167, u64) \
+_(notSentOctetTotalCount, 168, u64) \
+_(destinationIPv6Prefix, 169, ip6_address_t) \
+_(sourceIPv6Prefix, 170, ip6_address_t) \
+_(postOctetTotalCount, 171, u64) \
+_(postPacketTotalCount, 172, u64) \
+_(flowKeyIndicator, 173, u64) \
+_(postMCastPacketTotalCount, 174, u64) \
+_(postMCastOctetTotalCount, 175, u64) \
+_(icmpTypeIPv4, 176, u8) \
+_(icmpCodeIPv4, 177, u8) \
+_(icmpTypeIPv6, 178, u8) \
+_(icmpCodeIPv6, 179, u8) \
+_(udpSourcePort, 180, u16) \
+_(udpDestinationPort, 181, u16) \
+_(tcpSourcePort, 182, u16) \
+_(tcpDestinationPort, 183, u16) \
+_(tcpSequenceNumber, 184, u32) \
+_(tcpAcknowledgementNumber, 185, u32) \
+_(tcpWindowSize, 186, u16) \
+_(tcpUrgentPointer, 187, u16) \
+_(tcpHeaderLength, 188, u8) \
+_(ipHeaderLength, 189, u8) \
+_(totalLengthIPv4, 190, u16) \
+_(payloadLengthIPv6, 191, u16) \
+_(ipTTL, 192, u8) \
+_(nextHeaderIPv6, 193, u8) \
+_(mplsPayloadLength, 194, u32) \
+_(ipDiffServCodePoint, 195, u8) \
+_(ipPrecedence, 196, u8) \
+_(fragmentFlags, 197, u8) \
+_(octetDeltaSumOfSquares, 198, u64) \
+_(octetTotalSumOfSquares, 199, u64) \
+_(mplsTopLabelTTL, 200, u8) \
+_(mplsLabelStackLength, 201, u32) \
+_(mplsLabelStackDepth, 202, u32) \
+_(mplsTopLabelExp, 203, u8) \
+_(ipPayloadLength, 204, u32) \
+_(udpMessageLength, 205, u16) \
+_(isMulticast, 206, u8) \
+_(ipv4IHL, 207, u8) \
+_(ipv4Options, 208, u32) \
+_(tcpOptions, 209, u64) \
+_(paddingOctets, 210, octetArray) \
+_(collectorIPv4Address, 211, ip4_address_t) \
+_(collectorIPv6Address, 212, ip6_address_t) \
+_(exportInterface, 213, u32) \
+_(exportProtocolVersion, 214, u8) \
+_(exportTransportProtocol, 215, u8) \
+_(collectorTransportPort, 216, u16) \
+_(exporterTransportPort, 217, u16) \
+_(tcpSynTotalCount, 218, u64) \
+_(tcpFinTotalCount, 219, u64) \
+_(tcpRstTotalCount, 220, u64) \
+_(tcpPshTotalCount, 221, u64) \
+_(tcpAckTotalCount, 222, u64) \
+_(tcpUrgTotalCount, 223, u64) \
+_(ipTotalLength, 224, u64) \
+_(postNATSourceIPv4Address, 225, ip4_address_t) \
+_(postNATDestinationIPv4Address, 226, ip4_address_t) \
+_(postNAPTSourceTransportPort, 227, u16) \
+_(postNAPTDestinationTransportPort, 228, u16) \
+_(natOriginatingAddressRealm, 229, u8) \
+_(natEvent, 230, u8) \
+_(initiatorOctets, 231, u64) \
+_(responderOctets, 232, u64) \
+_(firewallEvent, 233, u8) \
+_(ingressVRFID, 234, u32) \
+_(egressVRFID, 235, u32) \
+_(VRFname, 236, string) \
+_(postMplsTopLabelExp, 237, u8) \
+_(tcpWindowScale, 238, u16) \
+_(biflowDirection, 239, u8) \
+_(ethernetHeaderLength, 240, u8) \
+_(ethernetPayloadLength, 241, u16) \
+_(ethernetTotalLength, 242, u16) \
+_(dot1qVlanId, 243, u16) \
+_(dot1qPriority, 244, u8) \
+_(dot1qCustomerVlanId, 245, u16) \
+_(dot1qCustomerPriority, 246, u8) \
+_(metroEvcId, 247, string) \
+_(metroEvcType, 248, u8) \
+_(pseudoWireId, 249, u32) \
+_(pseudoWireType, 250, u16) \
+_(pseudoWireControlWord, 251, u32) \
+_(ingressPhysicalInterface, 252, u32) \
+_(egressPhysicalInterface, 253, u32) \
+_(postDot1qVlanId, 254, u16) \
+_(postDot1qCustomerVlanId, 255, u16) \
+_(ethernetType, 256, u16) \
+_(postIpPrecedence, 257, u8) \
+_(collectionTimeMilliseconds, 258, dateTimeMilliseconds) \
+_(exportSctpStreamId, 259, u16) \
+_(maxExportSeconds, 260, dateTimeSeconds) \
+_(maxFlowEndSeconds, 261, dateTimeSeconds) \
+_(messageMD5Checksum, 262, octetArray) \
+_(messageScope, 263, u8) \
+_(minExportSeconds, 264, dateTimeSeconds) \
+_(minFlowStartSeconds, 265, dateTimeSeconds) \
+_(opaqueOctets, 266, octetArray) \
+_(sessionScope, 267, u8) \
+_(maxFlowEndMicroseconds, 268, dateTimeMicroseconds) \
+_(maxFlowEndMilliseconds, 269, dateTimeMilliseconds) \
+_(maxFlowEndNanoseconds, 270, dateTimeNanoseconds) \
+_(minFlowStartMicroseconds, 271, dateTimeMicroseconds) \
+_(minFlowStartMilliseconds, 272, dateTimeMilliseconds) \
+_(minFlowStartNanoseconds, 273, dateTimeNanoseconds) \
+_(collectorCertificate, 274, octetArray) \
+_(exporterCertificate, 275, octetArray) \
+_(dataRecordsReliability, 276, boolean) \
+_(observationPointType, 277, u8) \
+_(newConnectionDeltaCount, 278, u32) \
+_(connectionSumDurationSeconds, 279, u64) \
+_(connectionTransactionId, 280, u64) \
+_(postNATSourceIPv6Address, 281, ip6_address_t) \
+_(postNATDestinationIPv6Address, 282, ip6_address_t) \
+_(natPoolId, 283, u32) \
+_(natPoolName, 284, string) \
+_(anonymizationFlags, 285, u16) \
+_(anonymizationTechnique, 286, u16) \
+_(informationElementIndex, 287, u16) \
+_(p2pTechnology, 288, string) \
+_(tunnelTechnology, 289, string) \
+_(encryptedTechnology, 290, string) \
+_(basicList, 291, basicList) \
+_(subTemplateList, 292, subTemplateList) \
+_(subTemplateMultiList, 293, subTemplateMultiList) \
+_(bgpValidityState, 294, u8) \
+_(IPSecSPI, 295, u32) \
+_(greKey, 296, u32) \
+_(natType, 297, u8) \
+_(initiatorPackets, 298, u64) \
+_(responderPackets, 299, u64) \
+_(observationDomainName, 300, string) \
+_(selectionSequenceId, 301, u64) \
+_(selectorId, 302, u64) \
+_(informationElementId, 303, u16) \
+_(selectorAlgorithm, 304, u16) \
+_(samplingPacketInterval, 305, u32) \
+_(samplingPacketSpace, 306, u32) \
+_(samplingTimeInterval, 307, u32) \
+_(samplingTimeSpace, 308, u32) \
+_(samplingSize, 309, u32) \
+_(samplingPopulation, 310, u32) \
+_(samplingProbability, 311, float64) \
+_(dataLinkFrameSize, 312, u16) \
+_(ipHeaderPacketSection, 313, octetArray) \
+_(ipPayloadPacketSection, 314, octetArray) \
+_(dataLinkFrameSection, 315, octetArray) \
+_(mplsLabelStackSection, 316, octetArray) \
+_(mplsPayloadPacketSection, 317, octetArray) \
+_(selectorIdTotalPktsObserved, 318, u64) \
+_(selectorIdTotalPktsSelected, 319, u64) \
+_(absoluteError, 320, float64) \
+_(relativeError, 321, float64) \
+_(observationTimeSeconds, 322, dateTimeSeconds) \
+_(observationTimeMilliseconds, 323, dateTimeMilliseconds) \
+_(observationTimeMicroseconds, 324, dateTimeMicroseconds) \
+_(observationTimeNanoseconds, 325, dateTimeNanoseconds) \
+_(digestHashValue, 326, u64) \
+_(hashIPPayloadOffset, 327, u64) \
+_(hashIPPayloadSize, 328, u64) \
+_(hashOutputRangeMin, 329, u64) \
+_(hashOutputRangeMax, 330, u64) \
+_(hashSelectedRangeMin, 331, u64) \
+_(hashSelectedRangeMax, 332, u64) \
+_(hashDigestOutput, 333, boolean) \
+_(hashInitialiserValue, 334, u64) \
+_(selectorName, 335, string) \
+_(upperCILimit, 336, float64) \
+_(lowerCILimit, 337, float64) \
+_(confidenceLevel, 338, float64) \
+_(informationElementDataType, 339, u8) \
+_(informationElementDescription, 340, string) \
+_(informationElementName, 341, string) \
+_(informationElementRangeBegin, 342, u64) \
+_(informationElementRangeEnd, 343, u64) \
+_(informationElementSemantics, 344, u8) \
+_(informationElementUnits, 345, u16) \
+_(privateEnterpriseNumber, 346, u32) \
+_(virtualStationInterfaceId, 347, octetArray) \
+_(virtualStationInterfaceName, 348, string) \
+_(virtualStationUUID, 349, octetArray) \
+_(virtualStationName, 350, string) \
+_(layer2SegmentId, 351, u64) \
+_(layer2OctetDeltaCount, 352, u64) \
+_(layer2OctetTotalCount, 353, u64) \
+_(ingressUnicastPacketTotalCount, 354, u64) \
+_(ingressMulticastPacketTotalCount, 355, u64) \
+_(ingressBroadcastPacketTotalCount, 356, u64) \
+_(egressUnicastPacketTotalCount, 357, u64) \
+_(egressBroadcastPacketTotalCount, 358, u64) \
+_(monitoringIntervalStartMilliSeconds, 359, dateTimeMilliseconds) \
+_(monitoringIntervalEndMilliSeconds, 360, dateTimeMilliseconds) \
+_(portRangeStart, 361, u16) \
+_(portRangeEnd, 362, u16) \
+_(portRangeStepSize, 363, u16) \
+_(portRangeNumPorts, 364, u16) \
+_(staMacAddress, 365, macAddress) \
+_(staIPv4Address, 366, ip4_address_t) \
+_(wtpMacAddress, 367, macAddress ) \
+_(ingressInterfaceType, 368, u32) \
+_(egressInterfaceType, 369, u32) \
+_(rtpSequenceNumber, 370, u16) \
+_(userName, 371, string) \
+_(applicationCategoryName, 372, string) \
+_(applicationSubCategoryName, 373, string) \
+_(applicationGroupName, 374, string) \
+_(originalFlowsPresent, 375, u64) \
+_(originalFlowsInitiated, 376, u64) \
+_(originalFlowsCompleted, 377, u64) \
+_(distinctCountOfSourceIPAddress, 378, u64) \
+_(distinctCountOfDestinationIPAddress, 379, u64) \
+_(distinctCountOfSourceIPv4Address, 380, u32) \
+_(distinctCountOfDestinationIPv4Address, 381, u32) \
+_(distinctCountOfSourceIPv6Address, 382, u64) \
+_(distinctCountOfDestinationIPv6Address, 383, u64) \
+_(valueDistributionMethod, 384, u8) \
+_(rfc3550JitterMilliseconds, 385, u32) \
+_(rfc3550JitterMicroseconds, 386, u32) \
+_(rfc3550JitterNanoseconds, 387, u32) \
+_(dot1qDEI, 388, boolean) \
+_(dot1qCustomerDEI, 389, boolean) \
+_(flowSelectorAlgorithm, 390, u16) \
+_(flowSelectedOctetDeltaCount, 391, u64) \
+_(flowSelectedPacketDeltaCount, 392, u64) \
+_(flowSelectedFlowDeltaCount, 393, u64) \
+_(selectorIDTotalFlowsObserved, 394, u64) \
+_(selectorIDTotalFlowsSelected, 395, u64) \
+_(samplingFlowInterval, 396, u64) \
+_(samplingFlowSpacing, 397, u64) \
+_(flowSamplingTimeInterval, 398, u64) \
+_(flowSamplingTimeSpacing, 399, u64) \
+_(hashFlowDomain, 400, u16) \
+_(transportOctetDeltaCount, 401, u64) \
+_(transportPacketDeltaCount, 402, u64) \
+_(originalExporterIPv4Address, 403, ip4_address_t) \
+_(originalExporterIPv6Address, 404, ip6_address_t) \
+_(originalObservationDomainId, 405, u32) \
+_(intermediateProcessId, 406, u32) \
+_(ignoredDataRecordTotalCount, 407, u64) \
+_(dataLinkFrameType, 408, u16) \
+_(sectionOffset, 409, u16) \
+_(sectionExportedOctets, 410, u16) \
+_(dot1qServiceInstanceTag, 411, octetArray) \
+_(dot1qServiceInstanceId, 412, u32) \
+_(dot1qServiceInstancePriority, 413, u8) \
+_(dot1qCustomerSourceMacAddress, 414, macAddress) \
+_(dot1qCustomerDestinationMacAddress, 415, macAddress) \
+_(postLayer2OctetDeltaCount, 417, u64) \
+_(postMCastLayer2OctetDeltaCount, 418, u64) \
+_(postLayer2OctetTotalCount, 420, u64) \
+_(postMCastLayer2OctetTotalCount, 421, u64) \
+_(minimumLayer2TotalLength, 422, u64) \
+_(maximumLayer2TotalLength, 423, u64) \
+_(droppedLayer2OctetDeltaCount, 424, u64) \
+_(droppedLayer2OctetTotalCount, 425, u64) \
+_(ignoredLayer2OctetTotalCount, 426, u64) \
+_(notSentLayer2OctetTotalCount, 427, u64) \
+_(layer2OctetDeltaSumOfSquares, 428, u64) \
+_(layer2OctetTotalSumOfSquares, 429, u64) \
+_(layer2FrameDeltaCount, 430, u64) \
+_(layer2FrameTotalCount, 431, u64) \
+_(pseudoWireDestinationIPv4Address, 432, ip4_address_t) \
+_(ignoredLayer2FrameTotalCount, 433, u64) \
+_(natQuotaExceededEvent, 466, u32)
+
+typedef enum {
+#define _(n,v,t) n = v,
+ foreach_ipfix_info_element_t
+#undef _
+} ipfix_info_element_id_t;
+
+#endif /* __included_ipfix_info_elements_h__ */
diff --git a/src/vnet/flow/ipfix_packet.h b/src/vnet/flow/ipfix_packet.h
new file mode 100644
index 00000000..32979619
--- /dev/null
+++ b/src/vnet/flow/ipfix_packet.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ipfix_packet_h__
+#define __included_ipfix_packet_h__
+
+#include <vnet/flow/ipfix_info_elements.h>
+
+/* From RFC-7011:
+ * https://tools.ietf.org/html/rfc7011
+ */
+
+typedef struct {
+ u32 version_length;
+ u32 export_time;
+ u32 sequence_number;
+ u32 domain_id;
+} ipfix_message_header_t;
+
+static inline u32 version_length (u16 length)
+{
+ return clib_host_to_net_u32 (0x000a0000 | length);
+}
+
+
+/*
+ * The Field Specifier format is shown in Figure G.
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |E| Information Element ident. | Field Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Enterprise Number |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Figure G: Field Specifier Format
+ *
+ * Where:
+ *
+ * E
+ *
+ * Enterprise bit. This is the first bit of the Field Specifier. If
+ * this bit is zero, the Information Element identifier identifies an
+ * Information Element in [IANA-IPFIX], and the four-octet Enterprise
+ * Number field MUST NOT be present. If this bit is one, the
+ * Information Element identifier identifies an enterprise-specific
+ * Information Element, and the Enterprise Number field MUST be
+ * present.
+ */
+
+typedef struct {
+ u32 e_id_length;
+ u32 enterprise;
+} ipfix_enterprise_field_specifier_t;
+
+typedef struct {
+ u32 e_id_length;
+} ipfix_field_specifier_t;
+
+static inline u32 ipfix_e_id_length (int e, u16 id, u16 length)
+{
+ u32 value;
+ value = (e<<31) | ((id&0x7FFF) <<16) | length;
+ return clib_host_to_net_u32 (value);
+}
+
+/*
+ * Every Set contains a common header. This header is defined in
+ * Figure I.
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Set ID | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Figure I: Set Header Format
+ *
+ * Each Set Header field is exported in network format. The fields are
+ * defined as follows:
+ *
+ * Set ID
+ *
+ * Identifies the Set. A value of 2 is reserved for Template Sets.
+ * A value of 3 is reserved for Options Template Sets. Values from 4
+ * to 255 are reserved for future use. Values 256 and above are used
+ * for Data Sets. The Set ID values of 0 and 1 are not used, for
+ * historical reasons [RFC3954].
+ *
+ * Length
+ *
+ * Total length of the Set, in octets, including the Set Header, all
+ * records, and the optional padding. Because an individual Set MAY
+ * contain multiple records, the Length value MUST be used to
+ * determine the position of the next Set.
+ */
+
+typedef struct {
+ u32 set_id_length;
+} ipfix_set_header_t;
+
+static inline u32 ipfix_set_id_length (u16 set_id, u16 length)
+{
+ return clib_host_to_net_u32 ((set_id<<16) | length);
+}
+
+/*
+ * The format of the Template Record is shown in Figure J. It consists
+ * of a Template Record Header and one or more Field Specifiers. Field
+ * Specifiers are defined in Figure G above.
+ *
+ * +--------------------------------------------------+
+ * | Template Record Header |
+ * +--------------------------------------------------+
+ * | Field Specifier |
+ * +--------------------------------------------------+
+ * | Field Specifier |
+ * +--------------------------------------------------+
+ * ...
+ * +--------------------------------------------------+
+ * | Field Specifier |
+ * +--------------------------------------------------+
+ *
+ * Figure J: Template Record Format
+ *
+ * The format of the Template Record Header is shown in Figure K.
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Template ID (> 255) | Field Count |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Figure K: Template Record Header Format
+ *
+ * The Template Record Header Field definitions are as follows:
+ *
+ * Template ID
+ *
+ * Each Template Record is given a unique Template ID in the range
+ * 256 to 65535. This uniqueness is local to the Transport Session
+ * and Observation Domain that generated the Template ID. Since
+ * Template IDs are used as Set IDs in the Sets they describe (see
+ * Section 3.4.3), values 0-255 are reserved for special Set types
+ * (e.g., Template Sets themselves), and Templates and Options
+ * Templates (see Section 3.4.2) cannot share Template IDs within a
+ * Transport Session and Observation Domain. There are no
+ * constraints regarding the order of the Template ID allocation. As
+ * Exporting Processes are free to allocate Template IDs as they see
+ * fit, Collecting Processes MUST NOT assume incremental Template
+ * IDs, or anything about the contents of a Template based on its
+ * Template ID alone.
+ *
+ * Field Count
+ *
+ * Number of fields in this Template Record.
+ */
+
+typedef struct {
+ u32 id_count;
+} ipfix_template_header_t;
+
+static inline u32 ipfix_id_count (u16 id, u16 count)
+{
+ return clib_host_to_net_u32 ((id<<16) | count);
+}
+
+/* Template packet */
+typedef struct {
+ ipfix_message_header_t h;
+ ipfix_set_header_t s;
+ ipfix_template_header_t t;
+ ipfix_field_specifier_t fields[0];
+} ipfix_template_packet_t;
+
+#endif /* __included_ipfix_packet_h__ */
diff --git a/src/vnet/global_funcs.h b/src/vnet/global_funcs.h
new file mode 100644
index 00000000..92a5c04d
--- /dev/null
+++ b/src/vnet/global_funcs.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * global_funcs.h: global data structure access functions
+ */
+
+#ifndef included_vnet_global_funcs_h_
+#define included_vnet_global_funcs_h_
+
+vnet_main_t *vnet_get_main (void);
+
+#endif /* included_vnet_global_funcs_h_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/gre/error.def b/src/vnet/gre/error.def
new file mode 100644
index 00000000..161ecc1d
--- /dev/null
+++ b/src/vnet/gre/error.def
@@ -0,0 +1,23 @@
+/*
+ * gre_error.def: gre errors
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+gre_error (NONE, "no error")
+gre_error (UNKNOWN_PROTOCOL, "unknown protocol")
+gre_error (UNSUPPORTED_VERSION, "unsupported version")
+gre_error (PKTS_DECAP, "GRE input packets decapsulated")
+gre_error (PKTS_ENCAP, "GRE output packets encapsulated")
+gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel")
diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api
new file mode 100644
index 00000000..28f6dbc9
--- /dev/null
+++ b/src/vnet/gre/gre.api
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+define gre_add_del_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 teb;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 outer_fib_id;
+};
+
+define gre_add_del_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define gre_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+define gre_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 teb;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 outer_fib_id;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
new file mode 100644
index 00000000..a153c3c5
--- /dev/null
+++ b/src/vnet/gre/gre.c
@@ -0,0 +1,522 @@
+/*
+ * gre.c: gre
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/gre/gre.h>
+#include <vnet/adj/adj_midchain.h>
+
+gre_main_t gre_main;
+
+typedef struct {
+ union {
+ ip4_and_gre_header_t ip4_and_gre;
+ u64 as_u64[3];
+ };
+} ip4_and_gre_union_t;
+
+typedef struct {
+ union {
+ ip6_and_gre_header_t ip6_and_gre;
+ u64 as_u64[3];
+ };
+} ip6_and_gre_union_t;
+
+
+/* Packet trace structure */
+typedef struct {
+ /* Tunnel-id / index in tunnel vector */
+ u32 tunnel_id;
+
+ /* pkt length */
+ u32 length;
+
+ /* tunnel ip addresses */
+ ip46_address_t src;
+ ip46_address_t dst;
+} gre_tx_trace_t;
+
+u8 * format_gre_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gre_tx_trace_t * t = va_arg (*args, gre_tx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U",
+ t->tunnel_id, clib_net_to_host_u16 (t->length),
+ format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY);
+ return s;
+}
+
+u8 * format_gre_protocol (u8 * s, va_list * args)
+{
+ gre_protocol_t p = va_arg (*args, u32);
+ gre_main_t * gm = &gre_main;
+ gre_protocol_info_t * pi = gre_get_protocol_info (gm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%04x", p);
+
+ return s;
+}
+
+u8 * format_gre_header_with_length (u8 * s, va_list * args)
+{
+ gre_main_t * gm = &gre_main;
+ gre_header_t * h = va_arg (*args, gre_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ gre_protocol_t p = clib_net_to_host_u16 (h->protocol);
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "gre header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "GRE %U", format_gre_protocol, p);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ gre_protocol_info_t * pi = gre_get_protocol_info (gm, p);
+ vlib_node_t * node = vlib_get_node (gm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 * format_gre_header (u8 * s, va_list * args)
+{
+ gre_header_t * h = va_arg (*args, gre_header_t *);
+ return format (s, "%U", format_gre_header_with_length, h, 0);
+}
+
+/* Returns gre protocol as an int in host byte order. */
+uword
+unformat_gre_protocol_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 * result = va_arg (*args, u16 *);
+ gre_main_t * gm = &gre_main;
+ int i;
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ gm->protocol_info_by_name, &i))
+ {
+ gre_protocol_info_t * pi = vec_elt_at_index (gm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_gre_protocol_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 * result = va_arg (*args, u16 *);
+ if (! unformat_user (input, unformat_gre_protocol_host_byte_order, result))
+ return 0;
+ *result = clib_host_to_net_u16 ((u16) *result);
+ return 1;
+}
+
+uword
+unformat_gre_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ gre_header_t _h, * h = &_h;
+ u16 p;
+
+ if (! unformat (input, "%U",
+ unformat_gre_protocol_host_byte_order, &p))
+ return 0;
+
+ h->protocol = clib_host_to_net_u16 (p);
+
+ /* Add header to result. */
+ {
+ void * p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static int
+gre_proto_from_vnet_link (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (GRE_PROTOCOL_ip4);
+ case VNET_LINK_IP6:
+ return (GRE_PROTOCOL_ip6);
+ case VNET_LINK_MPLS:
+ return (GRE_PROTOCOL_mpls_unicast);
+ case VNET_LINK_ETHERNET:
+ return (GRE_PROTOCOL_teb);
+ case VNET_LINK_ARP:
+ return (GRE_PROTOCOL_arp);
+ case VNET_LINK_NSH:
+ ASSERT(0);
+ break;
+ }
+ ASSERT(0);
+ return (GRE_PROTOCOL_ip4);
+}
+
+static u8*
+gre_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ gre_main_t * gm = &gre_main;
+ ip4_and_gre_header_t * h4;
+ ip6_and_gre_header_t * h6;
+ u8* rewrite = NULL;
+ gre_tunnel_t *t;
+ u32 ti;
+ u8 is_ipv6;
+
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return (0);
+
+ t = pool_elt_at_index(gm->tunnels, ti);
+
+ is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ if (!is_ipv6)
+ {
+ vec_validate(rewrite, sizeof(*h4)-1);
+ h4 = (ip4_and_gre_header_t*)rewrite;
+ h4->gre.protocol = clib_host_to_net_u16(gre_proto_from_vnet_link(link_type));
+
+ h4->ip4.ip_version_and_header_length = 0x45;
+ h4->ip4.ttl = 254;
+ h4->ip4.protocol = IP_PROTOCOL_GRE;
+ /* fixup ip4 header length and checksum after-the-fact */
+ h4->ip4.src_address.as_u32 = t->tunnel_src.ip4.as_u32;
+ h4->ip4.dst_address.as_u32 = t->tunnel_dst.fp_addr.ip4.as_u32;
+ h4->ip4.checksum = ip4_header_checksum (&h4->ip4);
+ }
+ else
+ {
+ vec_validate(rewrite, sizeof(*h6)-1);
+ h6 = (ip6_and_gre_header_t*)rewrite;
+ h6->gre.protocol = clib_host_to_net_u16(gre_proto_from_vnet_link(link_type));
+
+ h6->ip6.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
+ h6->ip6.hop_limit = 255;
+ h6->ip6.protocol = IP_PROTOCOL_GRE;
+ /* fixup ip6 header length and checksum after-the-fact */
+ h6->ip6.src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0];
+ h6->ip6.src_address.as_u64[1] = t->tunnel_src.ip6.as_u64[1];
+ h6->ip6.dst_address.as_u64[0] = t->tunnel_dst.fp_addr.ip6.as_u64[0];
+ h6->ip6.dst_address.as_u64[1] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
+ }
+
+ return (rewrite);
+}
+
+#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40
+
+void
+gre4_fixup (vlib_main_t *vm,
+ ip_adjacency_t *adj,
+ vlib_buffer_t *b0)
+{
+ ip4_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Fixup the checksum and len fields in the GRE tunnel encap
+ * that was applied at the midchain node */
+ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ ip0->checksum = ip4_header_checksum (ip0);
+}
+
+void
+gre6_fixup (vlib_main_t *vm,
+ ip_adjacency_t *adj,
+ vlib_buffer_t *b0)
+{
+ ip6_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Fixup the payload length field in the GRE tunnel encap that was applied
+ * at the midchain node */
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0))
+ - sizeof(*ip0);
+}
+
+void
+gre_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai)
+{
+ gre_main_t * gm = &gre_main;
+ gre_tunnel_t *t;
+ u32 ti;
+ u8 is_ipv6;
+
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+ t = pool_elt_at_index(gm->tunnels, ti);
+ is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ adj_nbr_midchain_update_rewrite (ai, !is_ipv6 ? gre4_fixup : gre6_fixup,
+ (VNET_LINK_ETHERNET == adj_get_link_type (ai) ?
+ ADJ_FLAG_MIDCHAIN_NO_COUNT :
+ ADJ_FLAG_NONE),
+ gre_build_rewrite(vnm, sw_if_index,
+ adj_get_link_type(ai),
+ NULL));
+
+ gre_tunnel_stack(ai);
+}
+
+/**
+ * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ */
+static uword
+gre_interface_tx_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ gre_main_t * gm = &gre_main;
+ u32 next_index;
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
+ const gre_tunnel_t *gt = pool_elt_at_index (gm->tunnels, rd->dev_instance);
+ u8 is_ipv6 = gt->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /*
+ * FIXME DUAL LOOP
+ */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = gt - gm->tunnels;
+ tr->src = gt->tunnel_src;
+ tr->dst = gt->tunnel_src;
+ tr->length = vlib_buffer_length_in_chain (vm, b0);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, gt->l2_tx_arc);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, !is_ipv6 ? gre4_input_node.index :
+ gre6_input_node.index,
+ GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+static uword
+gre_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (gre_interface_tx_inline (vm, node, frame));
+}
+
+static uword
+gre_teb_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (gre_interface_tx_inline (vm, node, frame));
+}
+
+static u8 * format_gre_tunnel_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "gre%d", dev_instance);
+}
+
+static u8 * format_gre_tunnel_teb_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "teb-gre%d", dev_instance);
+}
+
+static u8 * format_gre_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+
+ s = format (s, "GRE tunnel: id %d\n", dev_instance);
+ return s;
+}
+
+VNET_DEVICE_CLASS (gre_device_class) = {
+ .name = "GRE tunnel device",
+ .format_device_name = format_gre_tunnel_name,
+ .format_device = format_gre_device,
+ .format_tx_trace = format_gre_tx_trace,
+ .tx_function = gre_interface_tx,
+ .admin_up_down_function = gre_interface_admin_up_down,
+#ifdef SOON
+ .clear counter = 0;
+#endif
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class,
+ gre_interface_tx)
+
+VNET_DEVICE_CLASS (gre_device_teb_class) = {
+ .name = "GRE TEB tunnel device",
+ .format_device_name = format_gre_tunnel_teb_name,
+ .format_device = format_gre_device,
+ .format_tx_trace = format_gre_tx_trace,
+ .tx_function = gre_teb_interface_tx,
+ .admin_up_down_function = gre_interface_admin_up_down,
+#ifdef SOON
+ .clear counter = 0;
+#endif
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_teb_class,
+ gre_teb_interface_tx)
+
+VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
+ .name = "GRE",
+ .format_header = format_gre_header_with_length,
+ .unformat_header = unformat_gre_header,
+ .build_rewrite = gre_build_rewrite,
+ .update_adjacency = gre_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+static void add_protocol (gre_main_t * gm,
+ gre_protocol_t protocol,
+ char * protocol_name)
+{
+ gre_protocol_info_t * pi;
+ u32 i;
+
+ vec_add2 (gm->protocol_infos, pi, 1);
+ i = pi - gm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (gm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (gm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t * gre_init (vlib_main_t * vm)
+{
+ gre_main_t * gm = &gre_main;
+ clib_error_t * error;
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+
+ memset (gm, 0, sizeof (gm[0]));
+ gm->vlib_main = vm;
+ gm->vnet_main = vnet_get_main();
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ /* Set up the ip packet generator */
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE);
+ pi->format_header = format_gre_header;
+ pi->unformat_pg_edit = unformat_pg_gre_header;
+
+ gm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ gm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+ gm->tunnel_by_key4 = hash_create (0, sizeof (uword));
+ gm->tunnel_by_key6 = hash_create_mem (0, sizeof(u64[4]), sizeof (uword));
+
+#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
+ foreach_gre_protocol
+#undef _
+
+ return vlib_call_init_function (vm, gre_input_init);
+}
+
+VLIB_INIT_FUNCTION (gre_init);
+
+gre_main_t * gre_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, gre_init);
+ return &gre_main;
+}
+
diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h
new file mode 100644
index 00000000..ad3e025a
--- /dev/null
+++ b/src/vnet/gre/gre.h
@@ -0,0 +1,253 @@
+/*
+ * gre.h: types/functions for gre.
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_gre_h
+#define included_gre_h
+
+#include <vnet/vnet.h>
+#include <vnet/gre/packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj_types.h>
+
+extern vnet_hw_interface_class_t gre_hw_interface_class;
+
+typedef enum {
+#define gre_error(n,s) GRE_ERROR_##n,
+#include <vnet/gre/error.def>
+#undef gre_error
+ GRE_N_ERROR,
+} gre_error_t;
+
+/**
+ * A GRE payload protocol registration
+ */
+typedef struct {
+ /** Name (a c string). */
+ char * name;
+
+ /** GRE protocol type in host byte order. */
+ gre_protocol_t protocol;
+
+ /** Node which handles this type. */
+ u32 node_index;
+
+ /** Next index for this type. */
+ u32 next_index;
+} gre_protocol_info_t;
+
+/**
+ * @brief The GRE tunnel type
+ */
+typedef enum gre_tunnel_tyoe_t_
+{
+ /**
+ * L3 GRE (i.e. this tunnel is in L3 mode)
+ */
+ GRE_TUNNEL_TYPE_L3,
+ /**
+ * Transparent Ethernet Bridging - the tunnel is in L2 mode
+ */
+ GRE_TUNNEL_TYPE_TEB,
+} gre_tunnel_type_t;
+
+#define GRE_TUNNEL_TYPE_NAMES { \
+ [GRE_TUNNEL_TYPE_L3] = "L3", \
+ [GRE_TUNNEL_TYPE_TEB] = "TEB", \
+}
+
+#define GRE_TUNNEL_N_TYPES ((gre_tunnel_type_t)GRE_TUNNEL_TYPE_TEB+1)
+
+/**
+ * @brief A representation of a GRE tunnel
+ */
+typedef struct {
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /**
+ * The tunnel's source/local address
+ */
+ ip46_address_t tunnel_src;
+ /**
+ * The tunnel's destination/remote address
+ */
+ fib_prefix_t tunnel_dst;
+ /**
+ * The FIB in which the src.dst address are present
+ */
+ u32 outer_fib_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ gre_tunnel_type_t type;
+
+ /**
+ * The FIB entry sourced by the tunnel for its destination prefix
+ */
+ fib_node_index_t fib_entry_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its desintion. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+
+ /**
+ * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
+ */
+ u32 l2_tx_arc;
+
+ /**
+ * an L2 tunnel always rquires an L2 midchain. cache here for DP.
+ */
+ adj_index_t l2_adj_index;
+} gre_tunnel_t;
+
+/**
+ * @brief GRE related global data
+ */
+typedef struct {
+ /**
+ * pool of tunnel instances
+ */
+ gre_tunnel_t *tunnels;
+
+ /**
+ * GRE payload protocol registrations
+ */
+ gre_protocol_info_t * protocol_infos;
+
+ /**
+ * Hash tables mapping name/protocol to protocol info index.
+ */
+ uword * protocol_info_by_name, * protocol_info_by_protocol;
+
+ /**
+ * Hash mapping ipv4 src/dst addr pair to tunnel
+ */
+ uword * tunnel_by_key4;
+
+ /**
+ * Hash mapping ipv6 src/dst addr pair to tunnel
+ */
+ uword * tunnel_by_key6;
+
+ /**
+ * Free vlib hw_if_indices.
+ * A free list per-tunnel type since the interfaces ctreated are fo different
+ * types and we cannot change the type.
+ */
+ u32 * free_gre_tunnel_hw_if_indices[GRE_TUNNEL_N_TYPES];
+
+ /**
+ * Mapping from sw_if_index to tunnel index
+ */
+ u32 * tunnel_index_by_sw_if_index;
+
+ /* Sparse vector mapping gre protocol in network byte order
+ to next index. */
+ u16 * next_by_protocol;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} gre_main_t;
+
+/**
+ * @brief IPv4 and GRE header.
+ */
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ gre_header_t gre;
+}) ip4_and_gre_header_t;
+
+/**
+ * @brief IPv6 and GRE header.
+ */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ gre_header_t gre;
+}) ip6_and_gre_header_t;
+
+always_inline gre_protocol_info_t *
+gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
+{
+ uword * p = hash_get (em->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
+}
+
+gre_main_t gre_main;
+
+/* Register given node index to take input for given gre type. */
+void
+gre_register_input_type (vlib_main_t * vm,
+ gre_protocol_t protocol,
+ u32 node_index);
+
+extern clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags);
+
+extern void gre_tunnel_stack (adj_index_t ai);
+extern void gre_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai);
+
+format_function_t format_gre_protocol;
+format_function_t format_gre_header;
+format_function_t format_gre_header_with_length;
+
+extern vlib_node_registration_t gre4_input_node;
+extern vlib_node_registration_t gre6_input_node;
+extern vnet_device_class_t gre_device_class;
+extern vnet_device_class_t gre_device_teb_class;
+
+/* Parse gre protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_gre_protocol_host_byte_order;
+unformat_function_t unformat_gre_protocol_net_byte_order;
+
+/* Parse gre header. */
+unformat_function_t unformat_gre_header;
+unformat_function_t unformat_pg_gre_header;
+
+void
+gre_register_input_protocol (vlib_main_t * vm,
+ gre_protocol_t protocol,
+ u32 node_index);
+
+/* manually added to the interface output node in gre.c */
+#define GRE_OUTPUT_NEXT_LOOKUP 1
+
+typedef struct {
+ u8 is_add;
+
+ ip46_address_t src, dst;
+ u8 is_ipv6;
+ u32 outer_fib_id;
+ u8 teb;
+} vnet_gre_add_del_tunnel_args_t;
+
+int vnet_gre_add_del_tunnel
+ (vnet_gre_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+#endif /* included_gre_h */
diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c
new file mode 100644
index 00000000..ceeb1d4c
--- /dev/null
+++ b/src/vnet/gre/gre_api.c
@@ -0,0 +1,226 @@
+/*
+ *------------------------------------------------------------------
+ * gre_api.c - gre api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/gre/gre.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(GRE_ADD_DEL_TUNNEL, gre_add_del_tunnel) \
+_(GRE_TUNNEL_DUMP, gre_tunnel_dump)
+
+static void vl_api_gre_add_del_tunnel_t_handler
+ (vl_api_gre_add_del_tunnel_t * mp)
+{
+ vl_api_gre_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ vnet_gre_add_del_tunnel_args_t _a, *a = &_a;
+ u32 outer_fib_id;
+ u32 p;
+ u32 sw_if_index = ~0;
+
+ p = fib_table_find (!mp->is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6,
+ ntohl (mp->outer_fib_id));
+ if (p == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ outer_fib_id = p;
+
+ /* Check src & dst are different */
+ if ((mp->is_ipv6 && memcmp (mp->src_address, mp->dst_address, 16) == 0) ||
+ (!mp->is_ipv6 && memcmp (mp->src_address, mp->dst_address, 4) == 0))
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = mp->is_add;
+ a->teb = mp->teb;
+ a->is_ipv6 = mp->is_ipv6;
+
+ /* ip addresses sent in network byte order */
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&(a->src.ip4), mp->src_address, 4);
+ clib_memcpy (&(a->dst.ip4), mp->dst_address, 4);
+ }
+ else
+ {
+ clib_memcpy (&(a->src.ip6), mp->src_address, 16);
+ clib_memcpy (&(a->dst.ip6), mp->dst_address, 16);
+ }
+
+ a->outer_fib_id = outer_fib_id;
+ rv = vnet_gre_add_del_tunnel (a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GRE_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_gre_tunnel_details
+ (gre_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_gre_tunnel_details_t *rmp;
+ u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+ fib_table_t *ft;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GRE_TUNNEL_DETAILS);
+ if (!is_ipv6)
+ {
+ clib_memcpy (rmp->src_address, &(t->tunnel_src.ip4.as_u8), 4);
+ clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip4.as_u8), 4);
+ ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP4);
+ rmp->outer_fib_id = ft->ft_table_id;
+ }
+ else
+ {
+ clib_memcpy (rmp->src_address, &(t->tunnel_src.ip6.as_u8), 16);
+ clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip6.as_u8), 16);
+ ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP6);
+ rmp->outer_fib_id = ft->ft_table_id;
+ }
+ rmp->teb = (GRE_TUNNEL_TYPE_TEB == t->type);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+ rmp->is_ipv6 = is_ipv6;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, gm->tunnels,
+ ({
+ send_gre_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &gm->tunnels[gm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_gre_tunnel_details (t, q, mp->context);
+ }
+}
+
+/*
+ * gre_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_gre;
+#undef _
+}
+
+static clib_error_t *
+gre_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (gre_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
new file mode 100644
index 00000000..d574e596
--- /dev/null
+++ b/src/vnet/gre/interface.c
@@ -0,0 +1,706 @@
+/*
+ * gre_interface.c: gre interfaces
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/gre/gre.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/mpls/mpls.h>
+
+static const char *gre_tunnel_type_names[] = GRE_TUNNEL_TYPE_NAMES;
+
+static inline u64
+gre4_mk_key (const ip4_address_t *src,
+ const ip4_address_t *dst,
+ u32 out_fib_index)
+{
+ // FIXME. the fib index should be part of the key
+ return ((u64)src->as_u32 << 32 | (u64)dst->as_u32);
+}
+
+static u8 *
+format_gre_tunnel_type (u8 * s, va_list * args)
+{
+ gre_tunnel_type_t type = va_arg (*args, gre_tunnel_type_t);
+
+ return (format(s, "%s", gre_tunnel_type_names[type]));
+}
+
+static u8 *
+format_gre_tunnel (u8 * s, va_list * args)
+{
+ gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *);
+ gre_main_t * gm = &gre_main;
+ u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ if (!is_ipv6)
+ s = format (s,
+ "[%d] %U (src) %U (dst) payload %U outer_fib_index %d",
+ t - gm->tunnels,
+ format_ip4_address, &t->tunnel_src.ip4,
+ format_ip4_address, &t->tunnel_dst.fp_addr.ip4,
+ format_gre_tunnel_type, t->type,
+ t->outer_fib_index);
+ else
+ s = format (s,
+ "[%d] %U (src) %U (dst) payload %U outer_fib_index %d",
+ t - gm->tunnels,
+ format_ip6_address, &t->tunnel_src.ip6,
+ format_ip6_address, &t->tunnel_dst.fp_addr.ip6,
+ format_gre_tunnel_type, t->type,
+ t->outer_fib_index);
+
+ return s;
+}
+
+static gre_tunnel_t *
+gre_tunnel_db_find (const ip46_address_t *src,
+ const ip46_address_t *dst,
+ u32 out_fib_index,
+ u8 is_ipv6)
+{
+ gre_main_t * gm = &gre_main;
+ uword * p;
+ u64 key4, key6[4];
+
+ if (!is_ipv6)
+ {
+ key4 = gre4_mk_key(&src->ip4, &dst->ip4, out_fib_index);
+ p = hash_get (gm->tunnel_by_key4, key4);
+ }
+ else
+ {
+ key6[0] = src->ip6.as_u64[0];
+ key6[1] = src->ip6.as_u64[1];
+ key6[2] = dst->ip6.as_u64[0];
+ key6[3] = dst->ip6.as_u64[1];
+ p = hash_get_mem (gm->tunnel_by_key6, key6);
+ }
+
+ if (NULL == p)
+ return (NULL);
+
+ return (pool_elt_at_index (gm->tunnels, p[0]));
+}
+
+static void
+gre_tunnel_db_add (const gre_tunnel_t *t)
+{
+ gre_main_t * gm = &gre_main;
+ u64 key4, key6[4], *key6_copy;
+ u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ if (!is_ipv6)
+ {
+ key4 = gre4_mk_key(&t->tunnel_src.ip4, &t->tunnel_dst.fp_addr.ip4,
+ t->outer_fib_index);
+ hash_set (gm->tunnel_by_key4, key4, t - gm->tunnels);
+ }
+ else
+ {
+ key6[0] = t->tunnel_src.ip6.as_u64[0];
+ key6[1] = t->tunnel_src.ip6.as_u64[1];
+ key6[2] = t->tunnel_dst.fp_addr.ip6.as_u64[0];
+ key6[3] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
+ key6_copy = clib_mem_alloc (sizeof (key6));
+ clib_memcpy (key6_copy, key6, sizeof (key6));
+ hash_set_mem (gm->tunnel_by_key6, key6_copy, t - gm->tunnels);
+ }
+}
+
+static void
+gre_tunnel_db_remove (const gre_tunnel_t *t)
+{
+ gre_main_t * gm = &gre_main;
+ u64 key4, key6[4];
+ u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ if (!is_ipv6)
+ {
+ key4 = gre4_mk_key(&t->tunnel_src.ip4, &t->tunnel_dst.fp_addr.ip4,
+ t->outer_fib_index);
+ hash_unset (gm->tunnel_by_key4, key4);
+ }
+ else
+ {
+ key6[0] = t->tunnel_src.ip6.as_u64[0];
+ key6[1] = t->tunnel_src.ip6.as_u64[1];
+ key6[2] = t->tunnel_dst.fp_addr.ip6.as_u64[0];
+ key6[3] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
+ hash_unset_mem (gm->tunnel_by_key6, key6);
+ }
+
+}
+
+static gre_tunnel_t *
+gre_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_GRE_TUNNEL == node->fn_type);
+#endif
+ return ((gre_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(gre_tunnel_t, node)));
+}
+
+/**
+ * gre_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+void
+gre_tunnel_stack (adj_index_t ai)
+{
+ gre_main_t * gm = &gre_main;
+ ip_adjacency_t *adj;
+ gre_tunnel_t *gt;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ return;
+
+ gt = pool_elt_at_index(gm->tunnels,
+ gm->tunnel_index_by_sw_if_index[sw_if_index]);
+
+ /*
+ * find the adjacency that is contributed by the FIB entry
+ * that this tunnel resovles via, and use it as the next adj
+ * in the midchain
+ */
+ if (vnet_hw_interface_get_flags(vnet_get_main(),
+ gt->hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP)
+ {
+ adj_nbr_midchain_stack(
+ ai,
+ fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
+ }
+ else
+ {
+ adj_nbr_midchain_unstack(ai);
+ }
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+gre_adj_walk_cb (adj_index_t ai,
+ void *ctx)
+{
+ gre_tunnel_stack(ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+gre_tunnel_restack (gre_tunnel_t *gt)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk all the adjacencies on th GRE interface and restack them
+ */
+ FOR_EACH_FIB_IP_PROTOCOL(proto)
+ {
+ adj_nbr_walk(gt->sw_if_index,
+ proto,
+ gre_adj_walk_cb,
+ NULL);
+ }
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+gre_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ gre_tunnel_restack(gre_tunnel_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+gre_tunnel_fib_node_get (fib_node_index_t index)
+{
+ gre_tunnel_t * gt;
+ gre_main_t * gm;
+
+ gm = &gre_main;
+ gt = pool_elt_at_index(gm->tunnels, index);
+
+ return (&gt->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+gre_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The MPLS GRE tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t gre_vft = {
+ .fnv_get = gre_tunnel_fib_node_get,
+ .fnv_last_lock = gre_tunnel_last_lock_gone,
+ .fnv_back_walk = gre_tunnel_back_walk,
+};
+
+static int
+vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
+{
+ gre_main_t * gm = &gre_main;
+ vnet_main_t * vnm = gm->vnet_main;
+ ip4_main_t * im4 = &ip4_main;
+ ip6_main_t * im6 = &ip6_main;
+ gre_tunnel_t * t;
+ vnet_hw_interface_t * hi;
+ u32 hw_if_index, sw_if_index;
+ u32 outer_fib_index;
+ u8 address[6];
+ clib_error_t *error;
+ u8 is_ipv6 = a->is_ipv6;
+
+ if (!is_ipv6)
+ outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id);
+ else
+ outer_fib_index = ip6_fib_index_from_table_id(a->outer_fib_id);
+
+ if (~0 == outer_fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id, a->is_ipv6);
+
+ if (NULL != t)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+ fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
+
+ if (a->teb)
+ t->type = GRE_TUNNEL_TYPE_TEB;
+ else
+ t->type = GRE_TUNNEL_TYPE_L3;
+
+ if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0) {
+ vnet_interface_main_t * im = &vnm->interface_main;
+
+ hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type]
+ [vec_len (gm->free_gre_tunnel_hw_if_indices[t->type])-1];
+ _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - gm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock(im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
+ vlib_zero_simple_counter
+ (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
+ vnet_interface_counter_unlock(im);
+ if (GRE_TUNNEL_TYPE_TEB == t->type)
+ {
+ t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
+ hi->tx_node_index,
+ "adj-l2-midchain");
+ }
+ } else {
+ if (GRE_TUNNEL_TYPE_TEB == t->type)
+ {
+ /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
+ memset (address, 0, sizeof (address));
+ address[0] = 0xd0;
+ address[1] = 0x0b;
+ address[2] = 0xee;
+ address[3] = 0xd0;
+ address[4] = t - gm->tunnels;
+
+ error = ethernet_register_interface(vnm,
+ gre_device_teb_class.index,
+ t - gm->tunnels, address,
+ &hw_if_index,
+ 0);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return VNET_API_ERROR_INVALID_REGISTRATION;
+ }
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
+ hi->tx_node_index,
+ "adj-l2-midchain");
+ } else {
+ hw_if_index = vnet_register_interface(vnm,
+ gre_device_class.index,
+ t - gm->tunnels,
+ gre_hw_interface_class.index,
+ t - gm->tunnels);
+ }
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->outer_fib_index = outer_fib_index;
+ t->sw_if_index = sw_if_index;
+ t->l2_adj_index = ADJ_INDEX_INVALID;
+
+ vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
+
+ if (!is_ipv6)
+ {
+ vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
+ hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
+ }
+ else
+ {
+ vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
+ hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
+ }
+
+ hi->per_packet_overhead_bytes =
+ /* preamble */ 8 + /* inter frame gap */ 12;
+
+ /* Standard default gre MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+
+ /*
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128;
+ t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ t->tunnel_dst.fp_addr = a->dst;
+
+ gre_tunnel_db_add(t);
+
+ t->fib_entry_index =
+ fib_table_entry_special_add(outer_fib_index,
+ &t->tunnel_dst,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ t->sibling_index =
+ fib_entry_child_add(t->fib_entry_index,
+ FIB_NODE_TYPE_GRE_TUNNEL,
+ t - gm->tunnels);
+
+ if (GRE_TUNNEL_TYPE_TEB == t->type)
+ {
+ t->l2_adj_index = adj_nbr_add_or_lock(t->tunnel_dst.fp_proto,
+ VNET_LINK_ETHERNET,
+ &zero_addr,
+ sw_if_index);
+ gre_update_adj(vnm, t->sw_if_index, t->l2_adj_index);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static int
+vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
+{
+ gre_main_t * gm = &gre_main;
+ vnet_main_t * vnm = gm->vnet_main;
+ gre_tunnel_t * t;
+ u32 sw_if_index;
+
+ t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id, a->is_ipv6);
+
+ if (NULL == t)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
+ vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+
+ if (GRE_TUNNEL_TYPE_TEB == t->type)
+ adj_unlock(t->l2_adj_index);
+
+ if (t->l2_adj_index != ADJ_INDEX_INVALID)
+ adj_unlock(t->l2_adj_index);
+
+ fib_entry_child_remove(t->fib_entry_index,
+ t->sibling_index);
+ fib_table_entry_delete_index(t->fib_entry_index,
+ FIB_SOURCE_RR);
+
+ gre_tunnel_db_remove(t);
+ fib_node_deinit(&t->node);
+ pool_put (gm->tunnels, t);
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+int
+vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
+{
+ if (a->is_add)
+ return (vnet_gre_tunnel_add(a, sw_if_indexp));
+ else
+ return (vnet_gre_tunnel_delete(a, sw_if_indexp));
+}
+
+clib_error_t *
+gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ gre_main_t * gm = &gre_main;
+ vnet_hw_interface_t * hi;
+ gre_tunnel_t *t;
+ u32 ti;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (NULL == gm->tunnel_index_by_sw_if_index ||
+ hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
+ return (NULL);
+
+ ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return (NULL);
+
+ t = pool_elt_at_index(gm->tunnels, ti);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+
+ gre_tunnel_restack(t);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+create_gre_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_gre_add_del_tunnel_args_t _a, * a = &_a;
+ ip46_address_t src, dst;
+ u32 outer_fib_id = 0;
+ u8 teb = 0;
+ int rv;
+ u32 num_m_args = 0;
+ u8 is_add = 1;
+ u32 sw_if_index;
+ clib_error_t *error = NULL;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4)) {
+ num_m_args++;
+ ipv4_set = 1;
+ } else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4)) {
+ num_m_args++;
+ ipv4_set = 1;
+ } else if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6)) {
+ num_m_args++;
+ ipv6_set = 1;
+ } else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6)) {
+ num_m_args++;
+ ipv6_set = 1;
+ } else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id))
+ ;
+ else if (unformat (line_input, "teb"))
+ teb = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args < 2)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ if ((ipv4_set && memcmp (&src.ip4, &dst.ip4, sizeof(src.ip4)) == 0) ||
+ (ipv6_set && memcmp (&src.ip6, &dst.ip6, sizeof(src.ip6)) == 0))
+ {
+ error = clib_error_return (0, "src and dst are identical");
+ goto done;
+ }
+
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+
+ if ((ipv4_set && memcmp (&dst.ip4, &zero_addr.ip4, sizeof(dst.ip4)) == 0) ||
+ (ipv6_set && memcmp (&dst.ip6, &zero_addr.ip6, sizeof(dst.ip6)) == 0))
+ {
+ error = clib_error_return (0, "dst address cannot be zero");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+ a->outer_fib_id = outer_fib_id;
+ a->teb = teb;
+ a->is_ipv6 = ipv6_set;
+ if (!ipv6_set)
+ {
+ clib_memcpy(&a->src.ip4, &src.ip4, sizeof(src.ip4));
+ clib_memcpy(&a->dst.ip4, &dst.ip4, sizeof(dst.ip4));
+ }
+ else
+ {
+ clib_memcpy(&a->src.ip6, &src.ip6, sizeof(src.ip6));
+ clib_memcpy(&a->dst.ip6, &dst.ip6, sizeof(dst.ip6));
+ }
+
+ if (is_add)
+ rv = vnet_gre_tunnel_add(a, &sw_if_index);
+ else
+ rv = vnet_gre_tunnel_delete(a, &sw_if_index);
+
+ switch(rv)
+ {
+ case 0:
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "GRE tunnel already exists...");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ error = clib_error_return (0, "outer fib ID %d doesn't exist\n",
+ outer_fib_id);
+ goto done;
+ default:
+ error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
+ .path = "create gre tunnel",
+ .short_help = "create gre tunnel src <addr> dst <addr> "
+ "[outer-fib-id <fib>] [teb] [del]",
+ .function = create_gre_tunnel_command_fn,
+};
+
+static clib_error_t *
+show_gre_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ gre_main_t * gm = &gre_main;
+ gre_tunnel_t * t;
+ u32 ti = ~0;
+
+ if (pool_elts (gm->tunnels) == 0)
+ vlib_cli_output (vm, "No GRE tunnels configured...");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ti))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == ti)
+ {
+ pool_foreach (t, gm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
+ }));
+ }
+ else
+ {
+ t = pool_elt_at_index(gm->tunnels, ti);
+
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
+ .path = "show gre tunnel",
+ .function = show_gre_tunnel_command_fn,
+};
+
+/* force inclusion from application's main.c */
+clib_error_t *gre_interface_init (vlib_main_t *vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft);
+
+ return 0;
+}
+VLIB_INIT_FUNCTION(gre_interface_init);
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
new file mode 100644
index 00000000..acf15f24
--- /dev/null
+++ b/src/vnet/gre/node.c
@@ -0,0 +1,703 @@
+/*
+ * node.c: gre packet processing
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/gre/gre.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/sparse_vec.h>
+
+#define foreach_gre_input_next \
+_(PUNT, "error-punt") \
+_(DROP, "error-drop") \
+_(ETHERNET_INPUT, "ethernet-input") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(MPLS_INPUT, "mpls-input")
+
+typedef enum {
+#define _(s,n) GRE_INPUT_NEXT_##s,
+ foreach_gre_input_next
+#undef _
+ GRE_INPUT_N_NEXT,
+} gre_input_next_t;
+
+typedef struct {
+ u32 tunnel_id;
+ u32 length;
+ ip46_address_t src;
+ ip46_address_t dst;
+ u8 is_ipv6;
+} gre_rx_trace_t;
+
+u8 * format_gre_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gre_rx_trace_t * t = va_arg (*args, gre_rx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U",
+ t->tunnel_id, clib_net_to_host_u16(t->length),
+ format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY);
+ return s;
+}
+
+typedef struct {
+ /* Sparse vector mapping gre protocol in network byte order
+ to next index. */
+ u16 * next_by_protocol;
+} gre_input_runtime_t;
+
+always_inline uword
+gre_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 is_ipv6)
+{
+ gre_main_t * gm = &gre_main;
+ __attribute__((unused)) u32 n_left_from, next_index, * from, * to_next;
+ u64 cached_tunnel_key4;
+ u64 cached_tunnel_key6[4];
+ u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0;
+
+ u32 thread_index = vlib_get_thread_index();
+ u32 len;
+ vnet_interface_main_t *im = &gm->vnet_main->interface_main;
+
+ if (!is_ipv6)
+ memset (&cached_tunnel_key4, 0xff, sizeof(cached_tunnel_key4));
+ else
+ memset (&cached_tunnel_key6, 0xff, sizeof(cached_tunnel_key6));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ gre_header_t * h0, * h1;
+ u16 version0, version1;
+ int verr0, verr1;
+ u32 i0, i1, next0, next1, protocol0, protocol1;
+ ip4_header_t *ip4_0, *ip4_1;
+ ip6_header_t *ip6_0, *ip6_1;
+ u32 ip4_tun_src0, ip4_tun_dst0;
+ u32 ip4_tun_src1, ip4_tun_dst1;
+ u64 ip6_tun_src0[2], ip6_tun_dst0[2];
+ u64 ip6_tun_src1[2], ip6_tun_dst1[2];
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (!is_ipv6)
+ {
+ /* ip4_local hands us the ip header, not the gre header */
+ ip4_0 = vlib_buffer_get_current (b0);
+ ip4_1 = vlib_buffer_get_current (b1);
+ /* Save src + dst ip4 address, e.g. for mpls-o-gre */
+ ip4_tun_src0 = ip4_0->src_address.as_u32;
+ ip4_tun_dst0 = ip4_0->dst_address.as_u32;
+ ip4_tun_src1 = ip4_1->src_address.as_u32;
+ ip4_tun_dst1 = ip4_1->dst_address.as_u32;
+
+ vlib_buffer_advance (b0, sizeof (*ip4_0));
+ vlib_buffer_advance (b1, sizeof (*ip4_1));
+ }
+ else
+ {
+ /* ip6_local hands us the ip header, not the gre header */
+ ip6_0 = vlib_buffer_get_current (b0);
+ ip6_1 = vlib_buffer_get_current (b1);
+ /* Save src + dst ip6 address, e.g. for mpls-o-gre */
+ ip6_tun_src0[0] = ip6_0->src_address.as_u64[0];
+ ip6_tun_src0[1] = ip6_0->src_address.as_u64[1];
+ ip6_tun_dst0[0] = ip6_0->dst_address.as_u64[0];
+ ip6_tun_dst0[1] = ip6_0->dst_address.as_u64[1];
+ ip6_tun_src1[0] = ip6_1->src_address.as_u64[0];
+ ip6_tun_src1[1] = ip6_1->src_address.as_u64[1];
+ ip6_tun_dst1[0] = ip6_1->dst_address.as_u64[0];
+ ip6_tun_dst1[1] = ip6_1->dst_address.as_u64[1];
+
+ vlib_buffer_advance (b0, sizeof (*ip6_0));
+ vlib_buffer_advance (b1, sizeof (*ip6_1));
+ }
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ /* Index sparse array with network byte order. */
+ protocol0 = h0->protocol;
+ protocol1 = h1->protocol;
+ sparse_vec_index2 (gm->next_by_protocol, protocol0, protocol1,
+ &i0, &i1);
+ next0 = vec_elt(gm->next_by_protocol, i0);
+ next1 = vec_elt(gm->next_by_protocol, i1);
+
+ b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE];
+ b1->error = node->errors[i1 == SPARSE_VEC_INVALID_INDEX ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE];
+
+ version0 = clib_net_to_host_u16 (h0->flags_and_version);
+ verr0 = version0 & GRE_VERSION_MASK;
+ version1 = clib_net_to_host_u16 (h1->flags_and_version);
+ verr1 = version1 & GRE_VERSION_MASK;
+
+ b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
+ : b0->error;
+ next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0;
+ b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
+ : b1->error;
+ next1 = verr1 ? GRE_INPUT_NEXT_DROP : next1;
+
+
+ /* RPF check for ip4/ip6 input */
+ if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_IP4_INPUT
+ || next0 == GRE_INPUT_NEXT_IP6_INPUT
+ || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT
+ || next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+
+ u64 key4, key6[4];
+ if (!is_ipv6)
+ {
+ key4 = ((u64)(ip4_tun_dst0) << 32) | (u64)(ip4_tun_src0);
+ }
+ else
+ {
+ key6[0] = ip6_tun_dst0[0];
+ key6[1] = ip6_tun_dst0[1];
+ key6[2] = ip6_tun_src0[0];
+ key6[3] = ip6_tun_src0[1];
+ }
+
+ if ((!is_ipv6 && cached_tunnel_key4 != key4) ||
+ (is_ipv6 && cached_tunnel_key6[0] != key6[0] &&
+ cached_tunnel_key6[1] != key6[1] &&
+ cached_tunnel_key6[2] != key6[2] &&
+ cached_tunnel_key6[3] != key6[3]))
+ {
+ vnet_hw_interface_t * hi;
+ gre_tunnel_t * t;
+ uword * p;
+
+ if (!is_ipv6)
+ p = hash_get (gm->tunnel_by_key4, key4);
+ else
+ p = hash_get_mem (gm->tunnel_by_key6, key6);
+ if (!p)
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop0;
+ }
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ }
+ else
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ goto drop0;
+ }
+ len = vlib_buffer_length_in_chain (vm, b0);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+
+drop0:
+ if (PREDICT_TRUE(next1 == GRE_INPUT_NEXT_IP4_INPUT
+ || next1 == GRE_INPUT_NEXT_IP6_INPUT
+ || next1 == GRE_INPUT_NEXT_ETHERNET_INPUT
+ || next1 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+ u64 key4, key6[4];
+ if (!is_ipv6)
+ {
+ key4 = ((u64)(ip4_tun_dst1) << 32) | (u64)(ip4_tun_src1);
+ }
+ else
+ {
+ key6[0] = ip6_tun_dst1[0];
+ key6[1] = ip6_tun_dst1[1];
+ key6[2] = ip6_tun_src1[0];
+ key6[3] = ip6_tun_src1[1];
+ }
+
+ if ((!is_ipv6 && cached_tunnel_key4 != key4) ||
+ (is_ipv6 && cached_tunnel_key6[0] != key6[0] &&
+ cached_tunnel_key6[1] != key6[1] &&
+ cached_tunnel_key6[2] != key6[2] &&
+ cached_tunnel_key6[3] != key6[3]))
+ {
+ vnet_hw_interface_t * hi;
+ gre_tunnel_t * t;
+ uword * p;
+
+ if (!is_ipv6)
+ p = hash_get (gm->tunnel_by_key4, key4);
+ else
+ p = hash_get_mem (gm->tunnel_by_key6, key6);
+
+ if (!p)
+ {
+ next1 = GRE_INPUT_NEXT_DROP;
+ b1->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop1;
+ }
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ }
+ else
+ {
+ next1 = GRE_INPUT_NEXT_DROP;
+ goto drop1;
+ }
+ len = vlib_buffer_length_in_chain (vm, b1);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+
+drop1:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = tunnel_sw_if_index;
+ if (!is_ipv6)
+ {
+ tr->length = ip4_0->length;
+ tr->src.ip4.as_u32 = ip4_0->src_address.as_u32;
+ tr->dst.ip4.as_u32 = ip4_0->dst_address.as_u32;
+ }
+ else
+ {
+ tr->length = ip6_0->payload_length;
+ tr->src.ip6.as_u64[0] = ip6_0->src_address.as_u64[0];
+ tr->src.ip6.as_u64[1] = ip6_0->src_address.as_u64[1];
+ tr->dst.ip6.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ tr->dst.ip6.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ }
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->tunnel_id = tunnel_sw_if_index;
+ if (!is_ipv6)
+ {
+ tr->length = ip4_1->length;
+ tr->src.ip4.as_u32 = ip4_1->src_address.as_u32;
+ tr->dst.ip4.as_u32 = ip4_1->dst_address.as_u32;
+ }
+ else
+ {
+ tr->length = ip6_1->payload_length;
+ tr->src.ip6.as_u64[0] = ip6_1->src_address.as_u64[0];
+ tr->src.ip6.as_u64[1] = ip6_1->src_address.as_u64[1];
+ tr->dst.ip6.as_u64[0] = ip6_1->dst_address.as_u64[0];
+ tr->dst.ip6.as_u64[1] = ip6_1->dst_address.as_u64[1];
+ }
+ }
+
+ vlib_buffer_advance (b0, sizeof (*h0));
+ vlib_buffer_advance (b1, sizeof (*h1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ gre_header_t * h0;
+ ip4_header_t * ip4_0;
+ ip6_header_t * ip6_0;
+ u16 version0;
+ int verr0;
+ u32 i0, next0;
+ u32 ip4_tun_src0, ip4_tun_dst0;
+ u32 ip6_tun_src0[4], ip6_tun_dst0[4];
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip4_0 = vlib_buffer_get_current (b0);
+ ip6_0 = (void *)ip4_0;
+
+ if (!is_ipv6)
+ {
+ ip4_tun_src0 = ip4_0->src_address.as_u32;
+ ip4_tun_dst0 = ip4_0->dst_address.as_u32;
+
+ vlib_buffer_advance (b0, sizeof (*ip4_0));
+ }
+ else
+ {
+ ip6_tun_src0[0] = ip6_0->src_address.as_u64[0];
+ ip6_tun_src0[1] = ip6_0->src_address.as_u64[1];
+ ip6_tun_dst0[0] = ip6_0->dst_address.as_u64[0];
+ ip6_tun_dst0[1] = ip6_0->dst_address.as_u64[1];
+
+ vlib_buffer_advance (b0, sizeof (*ip6_0));
+ }
+
+ h0 = vlib_buffer_get_current (b0);
+
+ i0 = sparse_vec_index (gm->next_by_protocol, h0->protocol);
+ next0 = vec_elt(gm->next_by_protocol, i0);
+
+ b0->error =
+ node->errors[i0 == SPARSE_VEC_INVALID_INDEX
+ ? GRE_ERROR_UNKNOWN_PROTOCOL : GRE_ERROR_NONE];
+
+ version0 = clib_net_to_host_u16 (h0->flags_and_version);
+ verr0 = version0 & GRE_VERSION_MASK;
+ b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
+ : b0->error;
+ next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0;
+
+
+ /* For IP payload we need to find source interface
+ so we can increase counters and help forward node to
+ pick right FIB */
+ /* RPF check for ip4/ip6 input */
+ if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_IP4_INPUT
+ || next0 == GRE_INPUT_NEXT_IP6_INPUT
+ || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT
+ || next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+ u64 key4, key6[4];
+ if (!is_ipv6)
+ {
+ key4 = ((u64)(ip4_tun_dst0) << 32) | (u64)(ip4_tun_src0);
+ }
+ else
+ {
+ key6[0] = ip6_tun_dst0[0];
+ key6[1] = ip6_tun_dst0[1];
+ key6[2] = ip6_tun_src0[0];
+ key6[3] = ip6_tun_src0[1];
+ }
+
+ if ((!is_ipv6 && cached_tunnel_key4 != key4) ||
+ (is_ipv6 && cached_tunnel_key6[0] != key6[0] &&
+ cached_tunnel_key6[1] != key6[1] &&
+ cached_tunnel_key6[2] != key6[2] &&
+ cached_tunnel_key6[3] != key6[3]))
+ {
+ vnet_hw_interface_t * hi;
+ gre_tunnel_t * t;
+ uword * p;
+
+ if (!is_ipv6)
+ p = hash_get (gm->tunnel_by_key4, key4);
+ else
+ p = hash_get_mem (gm->tunnel_by_key6, key6);
+
+ if (!p)
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop;
+ }
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ }
+ else
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ goto drop;
+ }
+ len = vlib_buffer_length_in_chain (vm, b0);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+
+drop:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = tunnel_sw_if_index;
+ if (!is_ipv6)
+ {
+ tr->length = ip4_0->length;
+ tr->src.ip4.as_u32 = ip4_0->src_address.as_u32;
+ tr->dst.ip4.as_u32 = ip4_0->dst_address.as_u32;
+ }
+ else
+ {
+ tr->length = ip6_0->payload_length;
+ tr->src.ip6.as_u64[0] = ip6_0->src_address.as_u64[0];
+ tr->src.ip6.as_u64[1] = ip6_0->src_address.as_u64[1];
+ tr->dst.ip6.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ tr->dst.ip6.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ }
+ }
+
+ vlib_buffer_advance (b0, sizeof (*h0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, !is_ipv6 ? gre4_input_node.index : gre6_input_node.index,
+ GRE_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static uword
+gre4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gre_input(vm, node, from_frame, /* is_ip6 */ 0);
+}
+
+static uword
+gre6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return gre_input(vm, node, from_frame, /* is_ip6 */ 1);
+}
+
+static char * gre_error_strings[] = {
+#define gre_error(n,s) s,
+#include "error.def"
+#undef gre_error
+};
+
+VLIB_REGISTER_NODE (gre4_input_node) = {
+ .function = gre4_input,
+ .name = "gre4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+
+ .n_next_nodes = GRE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
+ foreach_gre_input_next
+#undef _
+ },
+
+ .format_buffer = format_gre_header_with_length,
+ .format_trace = format_gre_rx_trace,
+ .unformat_buffer = unformat_gre_header,
+};
+
+VLIB_REGISTER_NODE (gre6_input_node) = {
+ .function = gre6_input,
+ .name = "gre6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (gre_input_runtime_t),
+
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+
+ .n_next_nodes = GRE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
+ foreach_gre_input_next
+#undef _
+ },
+
+ .format_buffer = format_gre_header_with_length,
+ .format_trace = format_gre_rx_trace,
+ .unformat_buffer = unformat_gre_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (gre4_input_node, gre4_input)
+VLIB_NODE_FUNCTION_MULTIARCH (gre6_input_node, gre6_input)
+
+void
+gre_register_input_protocol (vlib_main_t * vm,
+ gre_protocol_t protocol,
+ u32 node_index)
+{
+ gre_main_t * em = &gre_main;
+ gre_protocol_info_t * pi;
+ u16 * n;
+ u32 i;
+
+ {
+ clib_error_t * error = vlib_call_init_function (vm, gre_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = gre_get_protocol_info (em, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index);
+ i = vlib_node_add_next (vm, gre6_input_node.index, node_index);
+ ASSERT(i == pi->next_index);
+
+ /* Setup gre protocol -> next index sparse vector mapping. */
+ n = sparse_vec_validate (em->next_by_protocol,
+ clib_host_to_net_u16 (protocol));
+ n[0] = pi->next_index;
+}
+
+static void
+gre_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ pg_node_t * pn = pg_get_node (node_index);
+
+ n->format_buffer = format_gre_header_with_length;
+ n->unformat_buffer = unformat_gre_header;
+ pn->unformat_edit = unformat_pg_gre_header;
+}
+
+static clib_error_t * gre_input_init (vlib_main_t * vm)
+{
+ gre_main_t * gm = &gre_main;
+ vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input;
+
+ {
+ clib_error_t * error;
+ error = vlib_call_init_function (vm, gre_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ gre_setup_node (vm, gre4_input_node.index);
+ gre_setup_node (vm, gre6_input_node.index);
+
+ gm->next_by_protocol = sparse_vec_new
+ (/* elt bytes */ sizeof (gm->next_by_protocol[0]),
+ /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
+
+ /* These could be moved to the supported protocol input node defn's */
+ ethernet_input = vlib_get_node_by_name (vm, (u8 *)"ethernet-input");
+ ASSERT(ethernet_input);
+ ip4_input = vlib_get_node_by_name (vm, (u8 *)"ip4-input");
+ ASSERT(ip4_input);
+ ip6_input = vlib_get_node_by_name (vm, (u8 *)"ip6-input");
+ ASSERT(ip6_input);
+ mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-input");
+ ASSERT(mpls_unicast_input);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
+ ethernet_input->index);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip4,
+ ip4_input->index);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip6,
+ ip6_input->index);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
+ mpls_unicast_input->index);
+
+ ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
+ ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (gre_input_init);
diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h
new file mode 100644
index 00000000..cc2ccda9
--- /dev/null
+++ b/src/vnet/gre/packet.h
@@ -0,0 +1,55 @@
+#ifndef included_vnet_gre_packet_h
+#define included_vnet_gre_packet_h
+
+/*
+ * GRE packet format
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define foreach_gre_protocol \
+_ (0x0800, ip4) \
+_ (0x86DD, ip6) \
+_ (0x6558, teb) \
+_ (0x0806, arp) \
+_ (0x8847, mpls_unicast) \
+_ (0x894F, nsh)
+
+typedef enum {
+#define _(n,f) GRE_PROTOCOL_##f = n,
+ foreach_gre_protocol
+#undef _
+} gre_protocol_t;
+
+typedef struct {
+ /* flags and version */
+ u16 flags_and_version;
+ /* unimplemented at the moment */
+#define GRE_FLAGS_CHECKSUM (1 << 15)
+
+ /* deprecated, according to rfc2784 */
+#define GRE_FLAGS_ROUTING (1 << 14)
+#define GRE_FLAGS_KEY (1 << 13)
+#define GRE_FLAGS_SEQUENCE (1 << 12)
+#define GRE_FLAGS_STRICT_SOURCE_ROUTE (1 << 11)
+
+ /* version 1 is PPTP which we don't support */
+#define GRE_SUPPORTED_VERSION 0
+#define GRE_VERSION_MASK 0x7
+
+ /* 0x800 for ip4, etc. */
+ u16 protocol;
+} gre_header_t;
+
+#endif /* included_vnet_gre_packet_h */
diff --git a/src/vnet/gre/pg.c b/src/vnet/gre/pg.c
new file mode 100644
index 00000000..cc065d3b
--- /dev/null
+++ b/src/vnet/gre/pg.c
@@ -0,0 +1,77 @@
+/*
+ * hdlc_pg.c: packet generator gre interface
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/gre/gre.h>
+
+typedef struct {
+ pg_edit_t flags_and_version;
+ pg_edit_t protocol;
+} pg_gre_header_t;
+
+static inline void
+pg_gre_header_init (pg_gre_header_t * e)
+{
+ pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version);
+ pg_edit_init (&e->protocol, gre_header_t, protocol);
+}
+
+uword
+unformat_pg_gre_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_gre_header_t * h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t),
+ &group_index);
+ pg_gre_header_init (h);
+
+ pg_edit_set_fixed (&h->flags_and_version, 0);
+
+ error = 1;
+ if (! unformat (input, "%U",
+ unformat_pg_edit,
+ unformat_gre_protocol_net_byte_order, &h->protocol))
+ goto done;
+
+ {
+ gre_main_t * pm = &gre_main;
+ gre_protocol_info_t * pi = 0;
+ pg_node_t * pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
+ pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t));
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ }
+
+ error = 0;
+ done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
new file mode 100644
index 00000000..81cb9f55
--- /dev/null
+++ b/src/vnet/handoff.c
@@ -0,0 +1,594 @@
+
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/xxhash.h>
+#include <vlib/threads.h>
+#include <vnet/handoff.h>
+#include <vnet/feature/feature.h>
+
+typedef struct
+{
+ uword *workers_bitmap;
+ u32 *workers;
+} per_inteface_handoff_data_t;
+
+typedef struct
+{
+ u32 cached_next_index;
+ u32 num_workers;
+ u32 first_worker_index;
+
+ per_inteface_handoff_data_t *if_data;
+
+ /* Worker handoff index */
+ u32 frame_queue_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ u64 (*hash_fn) (ethernet_header_t *);
+} handoff_main_t;
+
+handoff_main_t handoff_main;
+vlib_node_registration_t handoff_dispatch_node;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_worker_index;
+ u32 buffer_index;
+} worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *);
+
+ s =
+ format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
+ t->sw_if_index, t->next_worker_index, t->buffer_index);
+ return s;
+}
+
+vlib_node_registration_t handoff_node;
+
+static uword
+worker_handoff_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ handoff_main_t *hm = &handoff_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ hm->first_worker_index + hm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 hash;
+ u64 hash_key;
+ per_inteface_handoff_data_t *ihd0;
+ u32 index0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ ASSERT (hm->if_data);
+ ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
+
+ next_worker_index = hm->first_worker_index;
+
+ /*
+ * Force unknown traffic onto worker 0,
+ * and into ethernet-input. $$$$ add more hashes.
+ */
+
+ /* Compute ingress LB hash */
+ hash_key = hm->hash_fn ((ethernet_header_t *) b0->data);
+ hash = (u32) clib_xxhash (hash_key);
+
+ /* if input node did not specify next index, then packet
+ should go to eternet-input */
+ if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_HANDOFF_NEXT_VALID) == 0))
+ vnet_buffer (b0)->handoff.next_index =
+ HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
+ else if (vnet_buffer (b0)->handoff.next_index ==
+ HANDOFF_DISPATCH_NEXT_IP4_INPUT
+ || vnet_buffer (b0)->handoff.next_index ==
+ HANDOFF_DISPATCH_NEXT_IP6_INPUT
+ || vnet_buffer (b0)->handoff.next_index ==
+ HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
+ vlib_buffer_advance (b0, (sizeof (ethernet_header_t)));
+
+ if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
+ index0 = hash & (vec_len (ihd0->workers) - 1);
+ else
+ index0 = hash % vec_len (ihd0->workers);
+
+ next_worker_index += ihd0->workers[index0];
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (hm->frame_queue_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_worker_index = next_worker_index - hm->first_worker_index;
+ t->buffer_index = bi0;
+ }
+
+ }
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (worker_handoff_node) = {
+ .function = worker_handoff_node_fn,
+ .name = "worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
+/* *INDENT-ON* */
+
+int
+interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
+ uword * bitmap, int enable_disable)
+{
+ handoff_main_t *hm = &handoff_main;
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ per_inteface_handoff_data_t *d;
+ int i, rv = 0;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (clib_bitmap_last_set (bitmap) >= hm->num_workers)
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ if (hm->frame_queue_index == ~0)
+ hm->frame_queue_index =
+ vlib_frame_queue_main_init (handoff_dispatch_node.index, 0);
+
+ vec_validate (hm->if_data, sw_if_index);
+ d = vec_elt_at_index (hm->if_data, sw_if_index);
+
+ vec_free (d->workers);
+ vec_free (d->workers_bitmap);
+
+ if (enable_disable)
+ {
+ d->workers_bitmap = bitmap;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ vec_add1(d->workers, i);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ vnet_feature_enable_disable ("device-input", "worker-handoff",
+ sw_if_index, enable_disable, 0, 0);
+ return rv;
+}
+
+static clib_error_t *
+set_interface_handoff_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ handoff_main_t *hm = &handoff_main;
+ u32 sw_if_index = ~0;
+ int enable_disable = 1;
+ uword *bitmap = 0;
+ u32 sym = ~0;
+
+ int rv = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ enable_disable = 0;
+ else if (unformat (input, "workers %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else if (unformat (input, "symmetrical"))
+ sym = 1;
+ else if (unformat (input, "asymmetrical"))
+ sym = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ if (bitmap == 0)
+ return clib_error_return (0, "Please specify list of workers...");
+
+ rv =
+ interface_handoff_enable_disable (vm, sw_if_index, bitmap,
+ enable_disable);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "Invalid interface");
+ break;
+
+ case VNET_API_ERROR_INVALID_WORKER:
+ return clib_error_return (0, "Invalid worker(s)");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0,
+ "Device driver doesn't support redirection");
+ break;
+
+ default:
+ return clib_error_return (0, "unknown return value %d", rv);
+ }
+
+ if (sym == 1)
+ hm->hash_fn = eth_get_sym_key;
+ else if (sym == 0)
+ hm->hash_fn = eth_get_key;
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
+ .path = "set interface handoff",
+ .short_help =
+ "set interface handoff <interface-name> workers <workers-list> [symmetrical|asymmetrical]",
+ .function = set_interface_handoff_command_fn,
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 buffer_index;
+ u32 next_index;
+ u32 sw_if_index;
+} handoff_dispatch_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_handoff_dispatch_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ handoff_dispatch_trace_t *t = va_arg (*args, handoff_dispatch_trace_t *);
+
+ s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
+ t->sw_if_index, t->next_index, t->buffer_index);
+ return s;
+}
+
+#define foreach_handoff_dispatch_error \
+_(EXAMPLE, "example packets")
+
+typedef enum
+{
+#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
+ foreach_handoff_dispatch_error
+#undef _
+ HANDOFF_DISPATCH_N_ERROR,
+} handoff_dispatch_error_t;
+
+static char *handoff_dispatch_error_strings[] = {
+#define _(sym,string) string,
+ foreach_handoff_dispatch_error
+#undef _
+};
+
+static uword
+handoff_dispatch_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ handoff_dispatch_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ next0 = vnet_buffer (b0)->handoff.next_index;
+ next1 = vnet_buffer (b1)->handoff.next_index;
+
+ if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */
+ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->buffer_index = bi0;
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */
+ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->buffer_index = bi1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = vnet_buffer (b0)->handoff.next_index;
+
+ if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */
+ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->buffer_index = bi0;
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (handoff_dispatch_node) = {
+ .function = handoff_dispatch_node_fn,
+ .name = "handoff-dispatch",
+ .vector_size = sizeof (u32),
+ .format_trace = format_handoff_dispatch_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .flags = VLIB_NODE_FLAG_IS_HANDOFF,
+
+ .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
+ .error_strings = handoff_dispatch_error_strings,
+
+ .n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
+
+ .next_nodes = {
+ [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
+ [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
+ [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
+/* *INDENT-ON* */
+
+clib_error_t *
+handoff_init (vlib_main_t * vm)
+{
+ handoff_main_t *hm = &handoff_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ clib_error_t *error;
+ uword *p;
+
+ if ((error = vlib_call_init_function (vm, threads_init)))
+ return error;
+
+ vlib_thread_registration_t *tr;
+ /* Only the standard vnet worker threads are supported */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ if (p)
+ {
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr)
+ {
+ hm->num_workers = tr->count;
+ hm->first_worker_index = tr->first_index;
+ }
+ }
+
+ hm->hash_fn = eth_get_key;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = &vnet_main;
+
+ hm->frame_queue_index = ~0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (handoff_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/handoff.h b/src/vnet/handoff.h
new file mode 100644
index 00000000..04ba8bfb
--- /dev/null
+++ b/src/vnet/handoff.h
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_handoff_h
+#define included_vnet_handoff_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/mpls/packet.h>
+
+typedef enum
+{
+ HANDOFF_DISPATCH_NEXT_IP4_INPUT,
+ HANDOFF_DISPATCH_NEXT_IP6_INPUT,
+ HANDOFF_DISPATCH_NEXT_MPLS_INPUT,
+ HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT,
+ HANDOFF_DISPATCH_NEXT_DROP,
+ HANDOFF_DISPATCH_N_NEXT,
+} handoff_dispatch_next_t;
+
+
+static inline u64
+ipv4_get_key (ip4_header_t * ip)
+{
+ u64 hash_key;
+
+ hash_key = *((u64 *) (&ip->address_pair)) ^ ip->protocol;
+
+ return hash_key;
+}
+
+static inline u64
+ipv6_get_key (ip6_header_t * ip)
+{
+ u64 hash_key;
+
+ hash_key = ip->src_address.as_u64[0] ^
+ rotate_left (ip->src_address.as_u64[1], 13) ^
+ rotate_left (ip->dst_address.as_u64[0], 26) ^
+ rotate_left (ip->dst_address.as_u64[1], 39) ^ ip->protocol;
+
+ return hash_key;
+}
+
+#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U
+#define MPLS_LABEL_MASK 0xFFFFF000U
+
+static inline u64
+mpls_get_key (mpls_unicast_header_t * m)
+{
+ u64 hash_key;
+ u8 ip_ver;
+
+
+ /* find the bottom of the MPLS label stack. */
+ if (PREDICT_TRUE (m->label_exp_s_ttl &
+ clib_net_to_host_u32 (MPLS_BOTTOM_OF_STACK_BIT_MASK)))
+ {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (PREDICT_TRUE (m->label_exp_s_ttl &
+ clib_net_to_host_u32 (MPLS_BOTTOM_OF_STACK_BIT_MASK)))
+ {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl &
+ clib_net_to_host_u32 (MPLS_BOTTOM_OF_STACK_BIT_MASK))
+ {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl &
+ clib_net_to_host_u32 (MPLS_BOTTOM_OF_STACK_BIT_MASK))
+ {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl &
+ clib_net_to_host_u32 (MPLS_BOTTOM_OF_STACK_BIT_MASK))
+ {
+ goto bottom_lbl_found;
+ }
+
+ /* the bottom label was not found - use the last label */
+ hash_key = m->label_exp_s_ttl & clib_net_to_host_u32 (MPLS_LABEL_MASK);
+
+ return hash_key;
+
+bottom_lbl_found:
+ m++;
+ ip_ver = (*((u8 *) m) >> 4);
+
+ /* find out if it is IPV4 or IPV6 header */
+ if (PREDICT_TRUE (ip_ver == 4))
+ {
+ hash_key = ipv4_get_key ((ip4_header_t *) m);
+ }
+ else if (PREDICT_TRUE (ip_ver == 6))
+ {
+ hash_key = ipv6_get_key ((ip6_header_t *) m);
+ }
+ else
+ {
+ /* use the bottom label */
+ hash_key =
+ (m - 1)->label_exp_s_ttl & clib_net_to_host_u32 (MPLS_LABEL_MASK);
+ }
+
+ return hash_key;
+
+}
+
+static inline u64
+eth_get_sym_key (ethernet_header_t * h0)
+{
+ u64 hash_key;
+
+ if (PREDICT_TRUE (h0->type) == clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip = (ip4_header_t *) (h0 + 1);
+ hash_key =
+ (u64) (ip->src_address.as_u32 ^
+ ip->dst_address.as_u32 ^ ip->protocol);
+ }
+ else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip = (ip6_header_t *) (h0 + 1);
+ hash_key = (u64) (ip->src_address.as_u64[0] ^
+ ip->src_address.as_u64[1] ^
+ ip->dst_address.as_u64[0] ^
+ ip->dst_address.as_u64[1] ^ ip->protocol);
+ }
+ else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
+ {
+ hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1));
+ }
+ else
+ if (PREDICT_FALSE
+ ((h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_VLAN))
+ || (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD))))
+ {
+ ethernet_vlan_header_t *outer = (ethernet_vlan_header_t *) (h0 + 1);
+
+ outer = (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_VLAN)) ?
+ outer + 1 : outer;
+ if (PREDICT_TRUE (outer->type) ==
+ clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip = (ip4_header_t *) (outer + 1);
+ hash_key =
+ (u64) (ip->src_address.as_u32 ^
+ ip->dst_address.as_u32 ^ ip->protocol);
+ }
+ else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip = (ip6_header_t *) (outer + 1);
+ hash_key =
+ (u64) (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1] ^
+ ip->dst_address.as_u64[0] ^
+ ip->dst_address.as_u64[1] ^ ip->protocol);
+ }
+ else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
+ {
+ hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1));
+ }
+ else
+ {
+ hash_key = outer->type;
+ }
+ }
+ else
+ {
+ hash_key = 0;
+ }
+
+ return hash_key;
+}
+
+static inline u64
+eth_get_key (ethernet_header_t * h0)
+{
+ u64 hash_key;
+
+ if (PREDICT_TRUE (h0->type) == clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
+ {
+ hash_key = ipv4_get_key ((ip4_header_t *) (h0 + 1));
+ }
+ else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6))
+ {
+ hash_key = ipv6_get_key ((ip6_header_t *) (h0 + 1));
+ }
+ else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
+ {
+ hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1));
+ }
+ else if ((h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_VLAN)) ||
+ (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD)))
+ {
+ ethernet_vlan_header_t *outer = (ethernet_vlan_header_t *) (h0 + 1);
+
+ outer = (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_VLAN)) ?
+ outer + 1 : outer;
+ if (PREDICT_TRUE (outer->type) ==
+ clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
+ {
+ hash_key = ipv4_get_key ((ip4_header_t *) (outer + 1));
+ }
+ else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6))
+ {
+ hash_key = ipv6_get_key ((ip6_header_t *) (outer + 1));
+ }
+ else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
+ {
+ hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1));
+ }
+ else
+ {
+ hash_key = outer->type;
+ }
+ }
+ else
+ {
+ hash_key = 0;
+ }
+
+ return hash_key;
+}
+
+#endif /* included_vnet_handoff_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/hdlc/error.def b/src/vnet/hdlc/error.def
new file mode 100644
index 00000000..16e001bb
--- /dev/null
+++ b/src/vnet/hdlc/error.def
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * hdlc_error.def: hdlc errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+hdlc_error (NONE, "no error")
+hdlc_error (UNKNOWN_PROTOCOL, "unknown hdlc protocol")
+hdlc_error (UNKNOWN_ADDRESS_CONTROL, "address, control != 0x0f00")
diff --git a/src/vnet/hdlc/hdlc.c b/src/vnet/hdlc/hdlc.c
new file mode 100644
index 00000000..174085ac
--- /dev/null
+++ b/src/vnet/hdlc/hdlc.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * hdlc.c: hdlc
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/hdlc/hdlc.h>
+
+/* Global main structure. */
+hdlc_main_t hdlc_main;
+
+u8 * format_hdlc_protocol (u8 * s, va_list * args)
+{
+ hdlc_protocol_t p = va_arg (*args, u32);
+ hdlc_main_t * pm = &hdlc_main;
+ hdlc_protocol_info_t * pi = hdlc_get_protocol_info (pm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%04x", p);
+
+ return s;
+}
+
+u8 * format_hdlc_header_with_length (u8 * s, va_list * args)
+{
+ hdlc_main_t * pm = &hdlc_main;
+ hdlc_header_t * h = va_arg (*args, hdlc_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ hdlc_protocol_t p = clib_net_to_host_u16 (h->protocol);
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "hdlc header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "HDLC %U", format_hdlc_protocol, p);
+
+ if (h->address != 0xff)
+ s = format (s, ", address 0x%02x", h->address);
+ if (h->control != 0x03)
+ s = format (s, ", control 0x%02x", h->control);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ hdlc_protocol_info_t * pi = hdlc_get_protocol_info (pm, p);
+ vlib_node_t * node = vlib_get_node (pm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 * format_hdlc_header (u8 * s, va_list * args)
+{
+ hdlc_header_t * h = va_arg (*args, hdlc_header_t *);
+ return format (s, "%U", format_hdlc_header_with_length, h, 0);
+}
+
+/* Returns hdlc protocol as an int in host byte order. */
+uword
+unformat_hdlc_protocol_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 * result = va_arg (*args, u16 *);
+ hdlc_main_t * pm = &hdlc_main;
+ int p, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &p)
+ || unformat (input, "%d", &p))
+ {
+ if (p >= (1 << 16))
+ return 0;
+ *result = p;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ pm->protocol_info_by_name, &i))
+ {
+ hdlc_protocol_info_t * pi = vec_elt_at_index (pm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_hdlc_protocol_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 * result = va_arg (*args, u16 *);
+ if (! unformat_user (input, unformat_hdlc_protocol_host_byte_order, result))
+ return 0;
+ *result = clib_host_to_net_u16 ((u16) *result);
+ return 1;
+}
+
+uword
+unformat_hdlc_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ hdlc_header_t _h, * h = &_h;
+ u16 p;
+
+ if (! unformat (input, "%U",
+ unformat_hdlc_protocol_host_byte_order, &p))
+ return 0;
+
+ h->address = 0xff;
+ h->control = 0x03;
+ h->protocol = clib_host_to_net_u16 (p);
+
+ /* Add header to result. */
+ {
+ void * p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static u8*
+hdlc_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ hdlc_header_t * h;
+ u8* rewrite = NULL;
+ hdlc_protocol_t protocol;
+
+ switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: protocol = HDLC_PROTOCOL_##b; break
+ _ (IP4, ip4);
+ _ (IP6, ip6);
+ _ (MPLS, mpls_unicast);
+#undef _
+ default:
+ return (NULL);
+ }
+
+ vec_validate(rewrite, sizeof(*h)-1);
+ h = (hdlc_header_t *)rewrite;
+ h->address = 0x0f;
+ h->control = 0x00;
+ h->protocol = clib_host_to_net_u16 (protocol);
+
+ return (rewrite);
+}
+
+VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
+ .name = "HDLC",
+ .format_header = format_hdlc_header_with_length,
+ .unformat_header = unformat_hdlc_header,
+ .build_rewrite = hdlc_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+static void add_protocol (hdlc_main_t * pm,
+ hdlc_protocol_t protocol,
+ char * protocol_name)
+{
+ hdlc_protocol_info_t * pi;
+ u32 i;
+
+ vec_add2 (pm->protocol_infos, pi, 1);
+ i = pi - pm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (pm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (pm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t * hdlc_init (vlib_main_t * vm)
+{
+ hdlc_main_t * pm = &hdlc_main;
+
+ memset (pm, 0, sizeof (pm[0]));
+ pm->vlib_main = vm;
+
+ pm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ pm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+
+#define _(n,s) add_protocol (pm, HDLC_PROTOCOL_##s, #s);
+ foreach_hdlc_protocol
+#undef _
+
+ return vlib_call_init_function (vm, hdlc_input_init);
+}
+
+VLIB_INIT_FUNCTION (hdlc_init);
+
+hdlc_main_t * hdlc_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, hdlc_init);
+ return &hdlc_main;
+}
+
diff --git a/src/vnet/hdlc/hdlc.h b/src/vnet/hdlc/hdlc.h
new file mode 100644
index 00000000..8407d39d
--- /dev/null
+++ b/src/vnet/hdlc/hdlc.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * hdlc.h: types/functions for hdlc.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_hdlc_h
+#define included_hdlc_h
+
+#include <vnet/vnet.h>
+#include <vnet/hdlc/packet.h>
+#include <vnet/pg/pg.h>
+
+extern vnet_hw_interface_class_t hdlc_hw_interface_class;
+
+typedef enum {
+#define hdlc_error(n,s) HDLC_ERROR_##n,
+#include <vnet/hdlc/error.def>
+#undef hdlc_error
+ HDLC_N_ERROR,
+} hdlc_error_t;
+
+typedef struct {
+ /* Name (a c string). */
+ char * name;
+
+ /* HDLC protocol type in host byte order. */
+ hdlc_protocol_t protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} hdlc_protocol_info_t;
+
+typedef struct {
+ vlib_main_t * vlib_main;
+
+ hdlc_protocol_info_t * protocol_infos;
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword * protocol_info_by_name, * protocol_info_by_protocol;
+} hdlc_main_t;
+
+always_inline hdlc_protocol_info_t *
+hdlc_get_protocol_info (hdlc_main_t * em, hdlc_protocol_t protocol)
+{
+ uword * p = hash_get (em->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
+}
+
+extern hdlc_main_t hdlc_main;
+
+/* Register given node index to take input for given hdlc type. */
+void
+hdlc_register_input_type (vlib_main_t * vm,
+ hdlc_protocol_t protocol,
+ u32 node_index);
+
+format_function_t format_hdlc_protocol;
+format_function_t format_hdlc_header;
+format_function_t format_hdlc_header_with_length;
+
+/* Parse hdlc protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_hdlc_protocol_host_byte_order;
+unformat_function_t unformat_hdlc_protocol_net_byte_order;
+
+/* Parse hdlc header. */
+unformat_function_t unformat_hdlc_header;
+unformat_function_t unformat_pg_hdlc_header;
+
+always_inline void
+hdlc_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ pg_node_t * pn = pg_get_node (node_index);
+
+ n->format_buffer = format_hdlc_header_with_length;
+ n->unformat_buffer = unformat_hdlc_header;
+ pn->unformat_edit = unformat_pg_hdlc_header;
+}
+
+void
+hdlc_register_input_protocol (vlib_main_t * vm,
+ hdlc_protocol_t protocol,
+ u32 node_index);
+
+#endif /* included_hdlc_h */
diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c
new file mode 100644
index 00000000..57e04c85
--- /dev/null
+++ b/src/vnet/hdlc/node.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * hdlc_node.c: hdlc packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/hdlc/hdlc.h>
+#include <vppinfra/sparse_vec.h>
+
+#define foreach_hdlc_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) HDLC_INPUT_NEXT_##s,
+ foreach_hdlc_input_next
+#undef _
+ HDLC_INPUT_N_NEXT,
+} hdlc_input_next_t;
+
+typedef struct {
+ u8 packet_data[32];
+} hdlc_input_trace_t;
+
+static u8 * format_hdlc_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ hdlc_input_trace_t * t = va_arg (*va, hdlc_input_trace_t *);
+
+ s = format (s, "%U", format_hdlc_header, t->packet_data);
+
+ return s;
+}
+
+typedef struct {
+ /* Sparse vector mapping hdlc protocol in network byte order
+ to next index. */
+ u16 * next_by_protocol;
+
+ u32 * sparse_index_by_next_index;
+} hdlc_input_runtime_t;
+
+static uword
+hdlc_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ hdlc_input_runtime_t * rt = (void *) node->runtime_data;
+ u32 n_left_from, next_index, i_next, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (hdlc_input_trace_t));
+
+ next_index = node->cached_next_index;
+ i_next = vec_elt (rt->sparse_index_by_next_index, next_index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ hdlc_header_t * h0, * h1;
+ u32 i0, i1, len0, len1, protocol0, protocol1, enqueue_code;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * b2, * b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = (void *) (b0->data + b0->current_data);
+ h1 = (void *) (b1->data + b1->current_data);
+
+ protocol0 = h0->protocol;
+ protocol1 = h1->protocol;
+
+ /* Add padding bytes for OSI protocols. */
+ len0 = sizeof (h0[0]);
+ len1 = sizeof (h1[0]);
+
+ len0 += protocol0 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi);
+ len1 += protocol1 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi);
+
+ b0->current_data += len0;
+ b1->current_data += len1;
+
+ b0->current_length -= len0;
+ b1->current_length -= len1;
+
+ /* Index sparse array with network byte order. */
+ sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, &i0, &i1);
+
+ b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE];
+ b1->error = node->errors[i1 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE];
+
+ enqueue_code = (i0 != i_next) + 2*(i1 != i_next);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i0), bi0);
+ vlib_set_next_frame_buffer (vm, node, vec_elt (rt->next_by_protocol, i1), bi1);
+ if (i0 == i1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ i_next = i1;
+ next_index = vec_elt (rt->next_by_protocol, i_next);
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ hdlc_header_t * h0;
+ u32 i0, len0, protocol0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ protocol0 = h0->protocol;
+
+ /* Add padding bytes for OSI protocols. */
+ len0 = sizeof (h0[0]);
+ len0 += protocol0 == clib_host_to_net_u16 (HDLC_PROTOCOL_osi);
+
+ b0->current_data += len0;
+ b0->current_length -= len0;
+
+ i0 = sparse_vec_index (rt->next_by_protocol, protocol0);
+
+ b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX ? HDLC_ERROR_UNKNOWN_PROTOCOL : HDLC_ERROR_NONE];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (i0 != i_next))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ i_next = i0;
+ next_index = vec_elt (rt->next_by_protocol, i_next);
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char * hdlc_error_strings[] = {
+#define hdlc_error(n,s) s,
+#include "error.def"
+#undef hdlc_error
+};
+
+VLIB_REGISTER_NODE (hdlc_input_node) = {
+ .function = hdlc_input,
+ .name = "hdlc-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (hdlc_input_runtime_t),
+
+ .n_errors = HDLC_N_ERROR,
+ .error_strings = hdlc_error_strings,
+
+ .n_next_nodes = HDLC_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [HDLC_INPUT_NEXT_##s] = n,
+ foreach_hdlc_input_next
+#undef _
+ },
+
+ .format_buffer = format_hdlc_header_with_length,
+ .format_trace = format_hdlc_input_trace,
+ .unformat_buffer = unformat_hdlc_header,
+};
+
+static clib_error_t * hdlc_input_runtime_init (vlib_main_t * vm)
+{
+ hdlc_input_runtime_t * rt;
+ rt = vlib_node_get_runtime_data (vm, hdlc_input_node.index);
+
+ rt->next_by_protocol = sparse_vec_new
+ (/* elt bytes */ sizeof (rt->next_by_protocol[0]),
+ /* bits in index */ BITS (((hdlc_header_t *) 0)->protocol));
+
+ vec_validate (rt->sparse_index_by_next_index, HDLC_INPUT_NEXT_DROP);
+ vec_validate (rt->sparse_index_by_next_index, HDLC_INPUT_NEXT_PUNT);
+ rt->sparse_index_by_next_index[HDLC_INPUT_NEXT_DROP]
+ = SPARSE_VEC_INVALID_INDEX;
+ rt->sparse_index_by_next_index[HDLC_INPUT_NEXT_PUNT]
+ = SPARSE_VEC_INVALID_INDEX;
+
+ return 0;
+}
+
+static clib_error_t * hdlc_input_init (vlib_main_t * vm)
+{
+
+ {
+ clib_error_t * error = vlib_call_init_function (vm, hdlc_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ hdlc_setup_node (vm, hdlc_input_node.index);
+ hdlc_input_runtime_init (vm);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hdlc_input_init);
+VLIB_WORKER_INIT_FUNCTION (hdlc_input_runtime_init);
+
+void
+hdlc_register_input_protocol (vlib_main_t * vm,
+ hdlc_protocol_t protocol,
+ u32 node_index)
+{
+ hdlc_main_t * em = &hdlc_main;
+ hdlc_protocol_info_t * pi;
+ hdlc_input_runtime_t * rt;
+ u16 * n;
+ u32 i;
+
+ {
+ clib_error_t * error = vlib_call_init_function (vm, hdlc_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = hdlc_get_protocol_info (em, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm,
+ hdlc_input_node.index,
+ node_index);
+
+ /* Setup hdlc protocol -> next index sparse vector mapping. */
+ rt = vlib_node_get_runtime_data (vm, hdlc_input_node.index);
+ n = sparse_vec_validate (rt->next_by_protocol, clib_host_to_net_u16 (protocol));
+ n[0] = pi->next_index;
+
+ /* Rebuild next index -> sparse index inverse mapping when sparse vector
+ is updated. */
+ vec_validate (rt->sparse_index_by_next_index, pi->next_index);
+ for (i = 1; i < vec_len (rt->next_by_protocol); i++)
+ rt->sparse_index_by_next_index[rt->next_by_protocol[i]] = i;
+}
diff --git a/src/vnet/hdlc/packet.h b/src/vnet/hdlc/packet.h
new file mode 100644
index 00000000..45e5496f
--- /dev/null
+++ b/src/vnet/hdlc/packet.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_hdlc_packet_h
+#define included_vnet_hdlc_packet_h
+
+/*
+ * HDLC packet format
+ *
+ * Copyright (c) 2009 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define foreach_hdlc_protocol \
+ _ (0x0800, ip4) \
+ _ (0x2000, cdp) \
+ _ (0x8035, slarp) \
+ _ (0x8847, mpls_unicast) \
+ _ (0x8848, mpls_multicast) \
+ _ (0x86dd, ip6) \
+ _ (0xfefe, osi)
+
+typedef enum {
+#define _(n,f) HDLC_PROTOCOL_##f = n,
+ foreach_hdlc_protocol
+#undef _
+} hdlc_protocol_t;
+
+typedef struct {
+ /* Set to 0x0f for unicast; 0x8f for broadcast. */
+ u8 address;
+
+ /* Always zero. */
+ u8 control;
+
+ /* Layer 3 protocol for this packet. */
+ u16 protocol;
+
+ /* Layer 3 payload. */
+ u8 payload[0];
+} hdlc_header_t;
+
+#endif /* included_vnet_hdlc_packet_h */
diff --git a/src/vnet/hdlc/pg.c b/src/vnet/hdlc/pg.c
new file mode 100644
index 00000000..b8e67022
--- /dev/null
+++ b/src/vnet/hdlc/pg.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * hdlc_pg.c: packet generator hdlc interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct {
+ pg_edit_t address;
+ pg_edit_t control;
+ pg_edit_t protocol;
+} pg_hdlc_header_t;
+
+static inline void
+pg_hdlc_header_init (pg_hdlc_header_t * e)
+{
+ pg_edit_init (&e->address, hdlc_header_t, address);
+ pg_edit_init (&e->control, hdlc_header_t, control);
+ pg_edit_init (&e->protocol, hdlc_header_t, protocol);
+}
+
+uword
+unformat_pg_hdlc_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_hdlc_header_t * h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (hdlc_header_t),
+ &group_index);
+ pg_hdlc_header_init (h);
+
+ pg_edit_set_fixed (&h->address, 0x0f);
+ pg_edit_set_fixed (&h->control, 0x00);
+
+ error = 1;
+ if (! unformat (input, "%U",
+ unformat_pg_edit,
+ unformat_hdlc_protocol_net_byte_order, &h->protocol))
+ goto done;
+
+ {
+ hdlc_main_t * pm = &hdlc_main;
+ hdlc_protocol_info_t * pi = 0;
+ pg_node_t * pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
+ pi = hdlc_get_protocol_info (pm, clib_net_to_host_u16 (t));
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+ done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
new file mode 100644
index 00000000..94ecdd9f
--- /dev/null
+++ b/src/vnet/interface.api
@@ -0,0 +1,396 @@
+/** \brief Set flags on the interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to set flags on
+ @param admin_up_down - set the admin state, 1 = up, 0 = down
+ @param link_up_down - Oper state sent on change event, not used in config.
+*/
+autoreply define sw_interface_set_flags
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = up, 0 = down */
+ u8 admin_up_down;
+};
+
+/** \brief Set interface MTU
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to set MTU on
+ @param mtu - MTU
+*/
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u16 mtu;
+};
+
+/** \brief Interface Event generated by want_interface_events
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param sw_if_index - index of the interface of the event
+ @param admin_up_down - The administrative state; 1 = up, 0 = down
+ @param link_up_down - The operational state; 1 = up, 0 = down
+ @param deleted - interface was deleted
+*/
+define sw_interface_event
+{
+ u32 client_index;
+ u32 pid;
+ u32 sw_if_index;
+ u8 admin_up_down;
+ u8 link_up_down;
+ u8 deleted;
+};
+
+/** \brief Register for interface events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+*/
+autoreply define want_interface_events
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Interface details structure (fix this)
+ @param sw_if_index - index of the interface
+ @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index
+ @param l2_address_length - length of the interface's l2 address
+ @param pid - the interface's l2 address
+ @param interface_name - name of the interface
+ @param link_duplex - 1 if half duplex, 2 if full duplex
+ @param link_speed - 1 = 10M, 2 = 100M, 4 = 1G, 8 = 10G, 16 = 40G, 32 = 100G
+ @param link_MTU - max. transmittion unit
+ @param sub_if_id - A number 0-N to uniquely identify this subif on super if
+ @param sub_dot1ad - 0 = dot1q, 1 = dot1ad
+ @param sub_dot1ah - 1 = dot1ah, 0 = otherwise
+ @param sub_number_of_tags - Number of tags (0 - 2)
+ @param sub_outer_vlan_id
+ @param sub_inner_vlan_id
+ @param sub_exact_match
+ @param sub_default
+ @param sub_outer_vlan_id_any
+ @param sub_inner_vlan_id_any
+ @param vtr_op - vlan tag rewrite operation
+ @param vtr_push_dot1q
+ @param vtr_tag1
+ @param vtr_tag2
+ @param pbb_outer_tag - translate pbb s-tag
+ @param pbb_b_dmac[6] - B-tag remote mac address
+ @param pbb_b_smac[6] - B-tag local mac address
+ @param pbb_b_vlanid - B-tag vlanid
+ @param pbb_i_sid - I-tag service id
+*/
+define sw_interface_details
+{
+ u32 context;
+ u32 sw_if_index;
+
+ /* index of sup interface (e.g. hw interface).
+ equal to sw_if_index for super hw interface. */
+ u32 sup_sw_if_index;
+
+ /* Layer 2 address, if applicable */
+ u32 l2_address_length;
+ u8 l2_address[8];
+
+ /* Interface name */
+ u8 interface_name[64];
+
+ /* 1 = up, 0 = down */
+ u8 admin_up_down;
+ u8 link_up_down;
+
+ /* 1 = half duplex, 2 = full duplex */
+ u8 link_duplex;
+
+ /* 1 = 10M, 2 = 100M, 4 = 1G, 8 = 10G, 16 = 40G, 32 = 100G */
+ u8 link_speed;
+
+ /* MTU */
+ u16 link_mtu;
+
+ /* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */
+ u32 sub_id;
+
+ /* 0 = dot1q, 1=dot1ad */
+ u8 sub_dot1ad;
+ /* 1 = dot1h, 1=otherwise */
+ u8 sub_dot1ah;
+
+ /* Number of tags 0-2 */
+ u8 sub_number_of_tags;
+ u16 sub_outer_vlan_id;
+ u16 sub_inner_vlan_id;
+ u8 sub_exact_match;
+ u8 sub_default;
+ u8 sub_outer_vlan_id_any;
+ u8 sub_inner_vlan_id_any;
+
+ /* vlan tag rewrite state */
+ u32 vtr_op;
+ u32 vtr_push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad
+ u32 vtr_tag1; // first pushed tag
+ u32 vtr_tag2; // second pushed tag
+ u8 tag[64];
+
+ /* pbb tag rewrite info */
+ u16 outer_tag;
+ u8 b_dmac[6];
+ u8 b_smac[6];
+ u16 b_vlanid;
+ u32 i_sid;
+};
+
+/* works */
+define sw_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 name_filter_valid;
+ u8 name_filter[49];
+};
+
+/** \brief Set or delete one or all ip addresses on a specified interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to add/del addresses
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param del_all - if non-zero delete all addresses on the interface
+ @param address_length - address length in bytes, 4 for ip4, 16 for ip6
+ @param address - array of address bytes
+*/
+autoreply define sw_interface_add_del_address
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 del_all;
+ u8 address_length;
+ u8 address[16];
+};
+
+/** \brief Associate the specified interface with a fib table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface
+ @param is_ipv6 - if non-zero ipv6, else ipv4
+ @param vrf_id - fib table/vrd id to associate the interface with
+*/
+autoreply define sw_interface_set_table
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u32 vrf_id;
+};
+
+/** \brief Get VRF id assigned to interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface
+*/
+define sw_interface_get_table
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+/** \brief Reply to get_sw_interface_vrf
+ @param context - sender context which was passed in the request
+ @param vrf_id - VRF id assigned to the interface
+*/
+define sw_interface_get_table_reply
+{
+ u32 context;
+ i32 retval;
+ u32 vrf_id;
+};
+
+typeonly manual_print manual_endian define vlib_counter
+{
+ u64 packets; /**< packet counter */
+ u64 bytes; /**< byte counter */
+};
+
+/** \brief Combined interface counter data type for vnet_interface_combined_counters
+ @param sw_if_index - interface indexes for counters
+ @param rx_packets - received packet count
+ @param rx_bytes - received byte count
+ @param tx_packets - transmitted packet count
+ @param tx_bytes - transmitted byte count
+
+*/
+typeonly manual_print manual_endian define vnet_combined_counter
+{
+ u32 sw_if_index;
+ u64 rx_packets; /**< packet counter */
+ u64 rx_bytes; /**< byte counter */
+ u64 tx_packets; /**< packet counter */
+ u64 tx_bytes; /**< byte counter */
+};
+
+/** \brief Simple interface counter data type for vnet_interface_simple_counters
+ @param sw_if_index - interface indexes for counters
+ @param drop - RX or TX drops due to buffer starvation
+ @param punt - used with VNET "punt" disposition
+ @param rx_ip4 - received IP4 packets
+ @param rx_ip6 - received IP6 packets
+ @param rx_no_buffer - no RX buffers available
+ @param rx_miss - receive misses
+ @param rx_error - receive errors
+ @param tx_error - transmit errors
+ @param rx_mpls - received MPLS packet
+
+*/
+typeonly manual_print manual_endian define vnet_simple_counter
+{
+ u32 sw_if_index;
+ u64 drop;
+ u64 punt;
+ u64 rx_ip4;
+ u64 rx_ip6;
+ u64 rx_no_buffer;
+ u64 rx_miss;
+ u64 rx_error;
+ u64 tx_error;
+ u64 rx_mpls;
+};
+
+/** \brief Simple stats counters structure
+ @param vnet_counter_type- such as ip4, ip6, punts, etc
+ @param first_sw_if_index - first sw index in block of index, counts
+ @param count - number of counters, equal to the number of interfaces in
+ this stats block
+ @param data - contiguous block of u64 counters
+
+ vnet_counter_type defined in enums - plural - in vnet/interface.h
+*/
+manual_print manual_endian define vnet_interface_simple_counters
+{
+ u8 vnet_counter_type;
+ u32 first_sw_if_index;
+ u32 count;
+ u64 data[count];
+};
+
+/** \brief Combined stats counters structure
+ @param vnet_counter_type- such as ip4, ip6, punts, etc
+ @param first_sw_if_index - first sw index in block of index, counts
+ @param count - number of counters, equal to the number of interfaces in
+ this stats block
+ @param data - contiguous block of vlib_counter_t structures
+
+ vnet_counter_type defined in enums - plural - in vnet/interface.h
+*/
+manual_print manual_endian define vnet_interface_combined_counters
+{
+ u8 vnet_counter_type;
+ u32 first_sw_if_index;
+ u32 count;
+ vl_api_vlib_counter_t data[count];
+};
+
+
+/** \brief Simple per interface stats counters structure
+ @param count - number of elements in message
+ @param timestamp - u32 vlib timestamp for control plane
+ @param data[count] - vl_api_vnet_simple_counter_t
+
+*/
+manual_print manual_endian define vnet_per_interface_simple_counters
+{
+ u32 count;
+ u32 timestamp;
+ vl_api_vnet_simple_counter_t data[count];
+};
+
+/** \brief Combined stats counters structure per interface
+ @param count - number of elements in message
+ @param timestamp - u32 vlib timestamp for control plane
+ @param data[count] - vl_api_vnet_combined_counter_t
+*/
+manual_print manual_endian define vnet_per_interface_combined_counters
+{
+ u32 count;
+ u32 timestamp;
+ vl_api_vnet_combined_counter_t data[count];
+};
+
+/** \brief Set unnumbered interface add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface with an IP address
+ @param unnumbered_sw_if_index - interface which will use the address
+ @param is_add - if non-zero set the association, else unset it
+*/
+autoreply define sw_interface_set_unnumbered
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index; /* use this intfc address */
+ u32 unnumbered_sw_if_index; /* on this interface */
+ u8 is_add;
+};
+
+/** \brief Clear interface statistics
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to clear statistics
+*/
+autoreply define sw_interface_clear_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Set / clear software interface tag
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param add_del - 1 = add, 0 = delete
+ @param tag - an ascii tag
+*/
+autoreply define sw_interface_tag_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 sw_if_index;
+ u8 tag[64];
+};
+
+/** \brief Set an interface's MAC address
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose MAC will be set
+ @param mac_addr - the new MAC address
+*/
+autoreply define sw_interface_set_mac_address
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 mac_address[6];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
new file mode 100644
index 00000000..159ce8c6
--- /dev/null
+++ b/src/vnet/interface.c
@@ -0,0 +1,1464 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface.c: VNET interfaces/sub-interfaces
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+
+#define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0)
+#define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1)
+
+static clib_error_t *vnet_hw_interface_set_flags_helper (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags,
+ u32 helper_flags);
+
+static clib_error_t *vnet_sw_interface_set_flags_helper (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags,
+ u32 helper_flags);
+
+static clib_error_t *vnet_hw_interface_set_class_helper (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 hw_class_index,
+ u32 redistribute);
+
+typedef struct
+{
+ /* Either sw or hw interface index. */
+ u32 sw_hw_if_index;
+
+ /* Flags. */
+ u32 flags;
+} vnet_sw_hw_interface_state_t;
+
+static void
+serialize_vec_vnet_sw_hw_interface_state (serialize_main_t * m, va_list * va)
+{
+ vnet_sw_hw_interface_state_t *s =
+ va_arg (*va, vnet_sw_hw_interface_state_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+ serialize_integer (m, s[i].sw_hw_if_index,
+ sizeof (s[i].sw_hw_if_index));
+ serialize_integer (m, s[i].flags, sizeof (s[i].flags));
+ }
+}
+
+static void
+unserialize_vec_vnet_sw_hw_interface_state (serialize_main_t * m,
+ va_list * va)
+{
+ vnet_sw_hw_interface_state_t *s =
+ va_arg (*va, vnet_sw_hw_interface_state_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+ unserialize_integer (m, &s[i].sw_hw_if_index,
+ sizeof (s[i].sw_hw_if_index));
+ unserialize_integer (m, &s[i].flags, sizeof (s[i].flags));
+ }
+}
+
+static void
+serialize_vnet_sw_hw_interface_set_flags (serialize_main_t * m, va_list * va)
+{
+ vnet_sw_hw_interface_state_t *s =
+ va_arg (*va, vnet_sw_hw_interface_state_t *);
+ serialize (m, serialize_vec_vnet_sw_hw_interface_state, s, 1);
+}
+
+static void
+unserialize_vnet_sw_interface_set_flags (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *);
+ vnet_sw_hw_interface_state_t s;
+
+ unserialize (m, unserialize_vec_vnet_sw_hw_interface_state, &s, 1);
+
+ vnet_sw_interface_set_flags_helper
+ (vnet_get_main (), s.sw_hw_if_index, s.flags,
+ /* helper_flags no redistribution */ 0);
+}
+
+static void
+unserialize_vnet_hw_interface_set_flags (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *);
+ vnet_sw_hw_interface_state_t s;
+
+ unserialize (m, unserialize_vec_vnet_sw_hw_interface_state, &s, 1);
+
+ vnet_hw_interface_set_flags_helper
+ (vnet_get_main (), s.sw_hw_if_index, s.flags,
+ /* helper_flags no redistribution */ 0);
+}
+
+MC_SERIALIZE_MSG (vnet_sw_interface_set_flags_msg, static) =
+{
+.name = "vnet_sw_interface_set_flags",.serialize =
+ serialize_vnet_sw_hw_interface_set_flags,.unserialize =
+ unserialize_vnet_sw_interface_set_flags,};
+
+MC_SERIALIZE_MSG (vnet_hw_interface_set_flags_msg, static) =
+{
+.name = "vnet_hw_interface_set_flags",.serialize =
+ serialize_vnet_sw_hw_interface_set_flags,.unserialize =
+ unserialize_vnet_hw_interface_set_flags,};
+
+void
+serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
+{
+ vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
+ vnet_sw_hw_interface_state_t *sts = 0, *st;
+ vnet_sw_interface_t *sif;
+ vnet_hw_interface_t *hif;
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ /* Serialize hardware interface classes since they may have changed.
+ Must do this before sending up/down flags. */
+ /* *INDENT-OFF* */
+ pool_foreach (hif, im->hw_interfaces, ({
+ vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index);
+ serialize_cstring (m, hw_class->name);
+ }));
+ /* *INDENT-ON* */
+
+ /* Send sw/hw interface state when non-zero. */
+ /* *INDENT-OFF* */
+ pool_foreach (sif, im->sw_interfaces, ({
+ if (sif->flags != 0)
+ {
+ vec_add2 (sts, st, 1);
+ st->sw_hw_if_index = sif->sw_if_index;
+ st->flags = sif->flags;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
+
+ if (sts)
+ _vec_len (sts) = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (hif, im->hw_interfaces, ({
+ if (hif->flags != 0)
+ {
+ vec_add2 (sts, st, 1);
+ st->sw_hw_if_index = hif->hw_if_index;
+ st->flags = hif->flags;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
+
+ vec_free (sts);
+}
+
+void
+unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
+{
+ vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
+ vnet_sw_hw_interface_state_t *sts = 0, *st;
+
+ /* First set interface hardware class. */
+ {
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hif;
+ char *class_name;
+ uword *p;
+ clib_error_t *error;
+
+ /* *INDENT-OFF* */
+ pool_foreach (hif, im->hw_interfaces, ({
+ unserialize_cstring (m, &class_name);
+ p = hash_get_mem (im->hw_interface_class_by_name, class_name);
+ ASSERT (p != 0);
+ error = vnet_hw_interface_set_class_helper (vnm, hif->hw_if_index, p[0], /* redistribute */ 0);
+ if (error)
+ clib_error_report (error);
+ vec_free (class_name);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state);
+ vec_foreach (st, sts)
+ vnet_sw_interface_set_flags_helper (vnm, st->sw_hw_if_index, st->flags,
+ /* no distribute */ 0);
+ vec_free (sts);
+
+ vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state);
+ vec_foreach (st, sts)
+ vnet_hw_interface_set_flags_helper (vnm, st->sw_hw_if_index, st->flags,
+ /* no distribute */ 0);
+ vec_free (sts);
+}
+
+static clib_error_t *
+call_elf_section_interface_callbacks (vnet_main_t * vnm, u32 if_index,
+ u32 flags,
+ _vnet_interface_function_list_elt_t **
+ elts)
+{
+ _vnet_interface_function_list_elt_t *elt;
+ vnet_interface_function_priority_t prio;
+ clib_error_t *error = 0;
+
+ for (prio = VNET_ITF_FUNC_PRIORITY_LOW;
+ prio <= VNET_ITF_FUNC_PRIORITY_HIGH; prio++)
+ {
+ elt = elts[prio];
+
+ while (elt)
+ {
+ error = elt->fp (vnm, if_index, flags);
+ if (error)
+ return error;
+ elt = elt->next_interface_function;
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+call_hw_interface_add_del_callbacks (vnet_main_t * vnm, u32 hw_if_index,
+ u32 is_create)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_interface_class_t *hw_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+ clib_error_t *error = 0;
+
+ if (hw_class->interface_add_del_function
+ && (error =
+ hw_class->interface_add_del_function (vnm, hw_if_index, is_create)))
+ return error;
+
+ if (dev_class->interface_add_del_function
+ && (error =
+ dev_class->interface_add_del_function (vnm, hw_if_index,
+ is_create)))
+ return error;
+
+ error = call_elf_section_interface_callbacks
+ (vnm, hw_if_index, is_create, vnm->hw_interface_add_del_functions);
+
+ return error;
+}
+
+static clib_error_t *
+call_sw_interface_add_del_callbacks (vnet_main_t * vnm, u32 sw_if_index,
+ u32 is_create)
+{
+ return call_elf_section_interface_callbacks
+ (vnm, sw_if_index, is_create, vnm->sw_interface_add_del_functions);
+}
+
+#define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0)
+#define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1)
+
+static clib_error_t *
+vnet_hw_interface_set_flags_helper (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags, u32 helper_flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_interface_class_t *hw_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+ vlib_main_t *vm = vnm->vlib_main;
+ u32 mask;
+ clib_error_t *error = 0;
+ u32 is_create =
+ (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE) != 0;
+
+ mask =
+ (VNET_HW_INTERFACE_FLAG_LINK_UP | VNET_HW_INTERFACE_FLAG_DUPLEX_MASK |
+ VNET_HW_INTERFACE_FLAG_SPEED_MASK);
+ flags &= mask;
+
+ /* Call hardware interface add/del callbacks. */
+ if (is_create)
+ call_hw_interface_add_del_callbacks (vnm, hw_if_index, is_create);
+
+ /* Already in the desired state? */
+ if (!is_create && (hi->flags & mask) == flags)
+ goto done;
+
+ /* Some interface classes do not redistribute (e.g. are local). */
+ if (!dev_class->redistribute)
+ helper_flags &= ~VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE;
+
+ if (vm->mc_main
+ && (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE))
+ {
+ vnet_sw_hw_interface_state_t s;
+ s.sw_hw_if_index = hw_if_index;
+ s.flags = flags;
+ mc_serialize (vm->mc_main, &vnet_hw_interface_set_flags_msg, &s);
+ }
+
+ if ((hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) !=
+ (flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ {
+ /* Do hardware class (e.g. ethernet). */
+ if (hw_class->link_up_down_function
+ && (error = hw_class->link_up_down_function (vnm, hw_if_index,
+ flags)))
+ goto done;
+
+ error = call_elf_section_interface_callbacks
+ (vnm, hw_if_index, flags, vnm->hw_interface_link_up_down_functions);
+
+ if (error)
+ goto done;
+ }
+
+ hi->flags &= ~mask;
+ hi->flags |= flags;
+
+done:
+ return error;
+}
+
+static clib_error_t *
+vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index,
+ u32 flags, u32 helper_flags)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vlib_main_t *vm = vnm->vlib_main;
+ u32 mask;
+ clib_error_t *error = 0;
+ u32 is_create =
+ (helper_flags & VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE) != 0;
+ u32 old_flags;
+
+ mask = VNET_SW_INTERFACE_FLAG_ADMIN_UP | VNET_SW_INTERFACE_FLAG_PUNT;
+ flags &= mask;
+
+ if (is_create)
+ {
+ error =
+ call_sw_interface_add_del_callbacks (vnm, sw_if_index, is_create);
+ if (error)
+ goto done;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ /* Notify everyone when the interface is created as admin up */
+ error = call_elf_section_interface_callbacks (vnm, sw_if_index,
+ flags,
+ vnm->
+ sw_interface_admin_up_down_functions);
+ if (error)
+ goto done;
+ }
+ }
+ else
+ {
+ vnet_sw_interface_t *si_sup = si;
+
+ /* Check that super interface is in correct state. */
+ if (si->type == VNET_SW_INTERFACE_TYPE_SUB)
+ {
+ si_sup = vnet_get_sw_interface (vnm, si->sup_sw_if_index);
+
+ /* Check to see if we're bringing down the soft interface and if it's parent is up */
+ if ((flags != (si_sup->flags & mask)) &&
+ (!((flags == 0)
+ && ((si_sup->flags & mask) ==
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP))))
+ {
+ error = clib_error_return (0, "super-interface %U must be %U",
+ format_vnet_sw_interface_name, vnm,
+ si_sup,
+ format_vnet_sw_interface_flags,
+ flags);
+ goto done;
+ }
+ }
+
+ /* Donot change state for slave link of bonded interfaces */
+ if (si->flags & VNET_SW_INTERFACE_FLAG_BOND_SLAVE)
+ {
+ error = clib_error_return
+ (0, "not allowed as %U belong to a BondEthernet interface",
+ format_vnet_sw_interface_name, vnm, si);
+ goto done;
+ }
+
+ /* Already in the desired state? */
+ if ((si->flags & mask) == flags)
+ goto done;
+
+ /* Sub-interfaces of hardware interfaces that do no redistribute,
+ do not redistribute themselves. */
+ if (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (vnm, si_sup->hw_if_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+ if (!dev_class->redistribute)
+ helper_flags &=
+ ~VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE;
+ }
+
+ if (vm->mc_main
+ && (helper_flags &
+ VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE))
+ {
+ vnet_sw_hw_interface_state_t s;
+ s.sw_hw_if_index = sw_if_index;
+ s.flags = flags;
+ mc_serialize (vm->mc_main, &vnet_sw_interface_set_flags_msg, &s);
+ }
+
+ /* set the flags now before invoking the registered clients
+ * so that the state they query is consistent with the state here notified */
+ old_flags = si->flags;
+ si->flags &= ~mask;
+ si->flags |= flags;
+ if ((flags | old_flags) & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ error = call_elf_section_interface_callbacks
+ (vnm, sw_if_index, flags,
+ vnm->sw_interface_admin_up_down_functions);
+ si->flags = old_flags;
+
+ if (error)
+ goto done;
+
+ if (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (vnm, si->hw_if_index);
+ vnet_hw_interface_class_t *hw_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+
+ if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) &&
+ (si->flags & VNET_SW_INTERFACE_FLAG_ERROR))
+ {
+ error = clib_error_return (0, "Interface in the error state");
+ goto done;
+ }
+
+ /* save the si admin up flag */
+ old_flags = si->flags;
+
+ /* update si admin up flag in advance if we are going admin down */
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (dev_class->admin_up_down_function
+ && (error = dev_class->admin_up_down_function (vnm,
+ si->hw_if_index,
+ flags)))
+ {
+ /* restore si admin up flag to it's original state on errors */
+ si->flags = old_flags;
+ goto done;
+ }
+
+ if (hw_class->admin_up_down_function
+ && (error = hw_class->admin_up_down_function (vnm,
+ si->hw_if_index,
+ flags)))
+ {
+ /* restore si admin up flag to it's original state on errors */
+ si->flags = old_flags;
+ goto done;
+ }
+
+ /* Admin down implies link down. */
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ && (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ vnet_hw_interface_set_flags_helper (vnm, si->hw_if_index,
+ hi->flags &
+ ~VNET_HW_INTERFACE_FLAG_LINK_UP,
+ helper_flags);
+ }
+ }
+
+ si->flags &= ~mask;
+ si->flags |= flags;
+
+done:
+ return error;
+}
+
+clib_error_t *
+vnet_hw_interface_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ return vnet_hw_interface_set_flags_helper
+ (vnm, hw_if_index, flags,
+ VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE);
+}
+
+clib_error_t *
+vnet_sw_interface_set_flags (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ return vnet_sw_interface_set_flags_helper
+ (vnm, sw_if_index, flags,
+ VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE);
+}
+
+static u32
+vnet_create_sw_interface_no_callbacks (vnet_main_t * vnm,
+ vnet_sw_interface_t * template)
+{
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *sw;
+ u32 sw_if_index;
+
+ pool_get (im->sw_interfaces, sw);
+ sw_if_index = sw - im->sw_interfaces;
+
+ sw[0] = template[0];
+
+ sw->flags = 0;
+ sw->sw_if_index = sw_if_index;
+ if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ sw->sup_sw_if_index = sw->sw_if_index;
+
+ /* Allocate counters for this interface. */
+ {
+ u32 i;
+
+ vnet_interface_counter_lock (im);
+
+ for (i = 0; i < vec_len (im->sw_if_counters); i++)
+ {
+ vlib_validate_simple_counter (&im->sw_if_counters[i], sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters[i], sw_if_index);
+ }
+
+ for (i = 0; i < vec_len (im->combined_sw_if_counters); i++)
+ {
+ vlib_validate_combined_counter (&im->combined_sw_if_counters[i],
+ sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters[i],
+ sw_if_index);
+ }
+
+ vnet_interface_counter_unlock (im);
+ }
+
+ return sw_if_index;
+}
+
+clib_error_t *
+vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template,
+ u32 * sw_if_index)
+{
+ clib_error_t *error;
+ vnet_hw_interface_t *hi;
+ vnet_device_class_t *dev_class;
+
+ hi = vnet_get_sup_hw_interface (vnm, template->sup_sw_if_index);
+ dev_class = vnet_get_device_class (vnm, hi->dev_class_index);
+
+ if (template->type == VNET_SW_INTERFACE_TYPE_SUB &&
+ dev_class->subif_add_del_function)
+ {
+ error = dev_class->subif_add_del_function (vnm, hi->hw_if_index,
+ (struct vnet_sw_interface_t
+ *) template, 1);
+ if (error)
+ return error;
+ }
+
+ *sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, template);
+ error = vnet_sw_interface_set_flags_helper
+ (vnm, *sw_if_index, template->flags,
+ VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
+
+ if (error)
+ {
+ /* undo the work done by vnet_create_sw_interface_no_callbacks() */
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *sw =
+ pool_elt_at_index (im->sw_interfaces, *sw_if_index);
+ pool_put (im->sw_interfaces, sw);
+ }
+
+ return error;
+}
+
+void
+vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *sw =
+ pool_elt_at_index (im->sw_interfaces, sw_if_index);
+
+ /* Check if the interface has config and is removed from L2 BD or XConnect */
+ vlib_main_t *vm = vlib_get_main ();
+ l2_input_config_t *config;
+ if (sw_if_index < vec_len (l2input_main.configs))
+ {
+ config = vec_elt_at_index (l2input_main.configs, sw_if_index);
+ if (config->xconnect)
+ set_int_l2_mode (vm, vnm, MODE_L3, config->output_sw_if_index, 0, 0,
+ 0, 0);
+ if (config->xconnect || config->bridge)
+ set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
+ }
+
+ /* Bring down interface in case it is up. */
+ if (sw->flags != 0)
+ vnet_sw_interface_set_flags (vnm, sw_if_index, /* flags */ 0);
+
+ call_sw_interface_add_del_callbacks (vnm, sw_if_index, /* is_create */ 0);
+
+ pool_put (im->sw_interfaces, sw);
+}
+
+static void
+setup_tx_node (vlib_main_t * vm,
+ u32 node_index, vnet_device_class_t * dev_class)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+
+ n->function = dev_class->tx_function;
+ n->format_trace = dev_class->format_tx_trace;
+
+ vlib_register_errors (vm, node_index,
+ dev_class->tx_function_n_errors,
+ dev_class->tx_function_error_strings);
+}
+
+static void
+setup_output_node (vlib_main_t * vm,
+ u32 node_index, vnet_hw_interface_class_t * hw_class)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ n->format_buffer = hw_class->format_header;
+ n->unformat_buffer = hw_class->unformat_header;
+}
+
+/* Register an interface instance. */
+u32
+vnet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u32 hw_class_index, u32 hw_instance)
+{
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hw;
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, dev_class_index);
+ vnet_hw_interface_class_t *hw_class =
+ vnet_get_hw_interface_class (vnm, hw_class_index);
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_feature_config_main_t *fcm;
+ vnet_config_main_t *cm;
+ u32 hw_index, i;
+ char *tx_node_name, *output_node_name;
+
+ pool_get (im->hw_interfaces, hw);
+
+ hw_index = hw - im->hw_interfaces;
+ hw->hw_if_index = hw_index;
+ hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+
+ if (dev_class->format_device_name)
+ hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
+ else if (hw_class->format_interface_name)
+ hw->name = format (0, "%U", hw_class->format_interface_name,
+ dev_instance);
+ else
+ hw->name = format (0, "%s%x", hw_class->name, dev_instance);
+
+ if (!im->hw_interface_by_name)
+ im->hw_interface_by_name = hash_create_vec ( /* size */ 0,
+ sizeof (hw->name[0]),
+ sizeof (uword));
+
+ hash_set_mem (im->hw_interface_by_name, hw->name, hw_index);
+
+ /* Make hardware interface point to software interface. */
+ {
+ vnet_sw_interface_t sw = {
+ .type = VNET_SW_INTERFACE_TYPE_HARDWARE,
+ .flood_class = VNET_FLOOD_CLASS_NORMAL,
+ .hw_if_index = hw_index
+ };
+ hw->sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, &sw);
+ }
+
+ hw->dev_class_index = dev_class_index;
+ hw->dev_instance = dev_instance;
+ hw->hw_class_index = hw_class_index;
+ hw->hw_instance = hw_instance;
+
+ hw->max_rate_bits_per_sec = 0;
+ hw->min_packet_bytes = 0;
+ hw->per_packet_overhead_bytes = 0;
+ hw->max_l3_packet_bytes[VLIB_RX] = ~0;
+ hw->max_l3_packet_bytes[VLIB_TX] = ~0;
+
+ tx_node_name = (char *) format (0, "%v-tx", hw->name);
+ output_node_name = (char *) format (0, "%v-output", hw->name);
+
+ /* If we have previously deleted interface nodes, re-use them. */
+ if (vec_len (im->deleted_hw_interface_nodes) > 0)
+ {
+ vnet_hw_interface_nodes_t *hn;
+ vlib_node_t *node;
+ vlib_node_runtime_t *nrt;
+
+ hn = vec_end (im->deleted_hw_interface_nodes) - 1;
+
+ hw->tx_node_index = hn->tx_node_index;
+ hw->output_node_index = hn->output_node_index;
+
+ vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
+ vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main ({
+ vnet_interface_output_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
+ ASSERT (rt->is_deleted == 1);
+ rt->is_deleted = 0;
+ rt->hw_if_index = hw_index;
+ rt->sw_if_index = hw->sw_if_index;
+ rt->dev_instance = hw->dev_instance;
+
+ rt = vlib_node_get_runtime_data (this_vlib_main, hw->tx_node_index);
+ rt->hw_if_index = hw_index;
+ rt->sw_if_index = hw->sw_if_index;
+ rt->dev_instance = hw->dev_instance;
+ });
+ /* *INDENT-ON* */
+
+ /* The new class may differ from the old one.
+ * Functions have to be updated. */
+ node = vlib_get_node (vm, hw->output_node_index);
+ node->function = vnet_interface_output_node_multiarch_select ();
+ node->format_trace = format_vnet_interface_output_trace;
+ /* *INDENT-OFF* */
+ foreach_vlib_main ({
+ nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
+ nrt->function = node->function;
+ });
+ /* *INDENT-ON* */
+
+ node = vlib_get_node (vm, hw->tx_node_index);
+ node->function = dev_class->tx_function;
+ node->format_trace = dev_class->format_tx_trace;
+ /* *INDENT-OFF* */
+ foreach_vlib_main ({
+ nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
+ nrt->function = node->function;
+ });
+ /* *INDENT-ON* */
+
+ _vec_len (im->deleted_hw_interface_nodes) -= 1;
+ }
+ else
+ {
+ vlib_node_registration_t r;
+ vnet_interface_output_runtime_t rt = {
+ .hw_if_index = hw_index,
+ .sw_if_index = hw->sw_if_index,
+ .dev_instance = hw->dev_instance,
+ .is_deleted = 0,
+ };
+
+ memset (&r, 0, sizeof (r));
+ r.type = VLIB_NODE_TYPE_INTERNAL;
+ r.runtime_data = &rt;
+ r.runtime_data_bytes = sizeof (rt);
+ r.scalar_size = 0;
+ r.vector_size = sizeof (u32);
+
+ r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
+ r.name = tx_node_name;
+ r.function = dev_class->tx_function;
+
+ hw->tx_node_index = vlib_register_node (vm, &r);
+
+ vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
+ "error-drop",
+ VNET_INTERFACE_TX_NEXT_DROP);
+
+ r.flags = 0;
+ r.name = output_node_name;
+ r.function = vnet_interface_output_node_multiarch_select ();
+ r.format_trace = format_vnet_interface_output_trace;
+
+ {
+ static char *e[] = {
+ "interface is down",
+ "interface is deleted",
+ };
+
+ r.n_errors = ARRAY_LEN (e);
+ r.error_strings = e;
+ }
+ hw->output_node_index = vlib_register_node (vm, &r);
+
+ vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
+ "error-drop",
+ VNET_INTERFACE_OUTPUT_NEXT_DROP);
+ vlib_node_add_next_with_slot (vm, hw->output_node_index,
+ hw->tx_node_index,
+ VNET_INTERFACE_OUTPUT_NEXT_TX);
+
+ /* add interface to the list of "output-interface" feature arc start nodes
+ and clone nexts from 1st interface if it exists */
+ fcm = vnet_feature_get_config_main (im->output_feature_arc_index);
+ cm = &fcm->config_main;
+ i = vec_len (cm->start_node_indices);
+ vec_validate (cm->start_node_indices, i);
+ cm->start_node_indices[i] = hw->output_node_index;
+ if (hw_index)
+ {
+ /* copy nexts from 1st interface */
+ vnet_hw_interface_t *first_hw;
+ vlib_node_t *first_node;
+
+ first_hw = vnet_get_hw_interface (vnm, /* hw_if_index */ 0);
+ first_node = vlib_get_node (vm, first_hw->output_node_index);
+
+ /* 1st 2 nexts are already added above */
+ for (i = 2; i < vec_len (first_node->next_nodes); i++)
+ vlib_node_add_next_with_slot (vm, hw->output_node_index,
+ first_node->next_nodes[i], i);
+ }
+ }
+
+ setup_output_node (vm, hw->output_node_index, hw_class);
+ setup_tx_node (vm, hw->tx_node_index, dev_class);
+
+ /* Call all up/down callbacks with zero flags when interface is created. */
+ vnet_sw_interface_set_flags_helper (vnm, hw->sw_if_index, /* flags */ 0,
+ VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
+ vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
+ VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
+
+ return hw_index;
+}
+
+void
+vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
+{
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vlib_main_t *vm = vnm->vlib_main;
+
+ /* If it is up, mark it down. */
+ if (hw->flags != 0)
+ vnet_hw_interface_set_flags (vnm, hw_if_index, /* flags */ 0);
+
+ /* Call delete callbacks. */
+ call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0);
+
+ /* Delete software interface corresponding to hardware interface. */
+ vnet_delete_sw_interface (vnm, hw->sw_if_index);
+
+ /* Delete any sub-interfaces. */
+ {
+ u32 id, sw_if_index;
+ /* *INDENT-OFF* */
+ hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id, ({
+ vnet_delete_sw_interface (vnm, sw_if_index);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ {
+ vnet_hw_interface_nodes_t *dn;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main ({
+ vnet_interface_output_runtime_t *rt =
+ vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
+
+ /* Mark node runtime as deleted so output node (if called)
+ * will drop packets. */
+ rt->is_deleted = 1;
+ });
+ /* *INDENT-ON* */
+
+ vlib_node_rename (vm, hw->output_node_index,
+ "interface-%d-output-deleted", hw_if_index);
+ vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
+ hw_if_index);
+ vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
+ dn->tx_node_index = hw->tx_node_index;
+ dn->output_node_index = hw->output_node_index;
+ }
+
+ hash_unset_mem (im->hw_interface_by_name, hw->name);
+ vec_free (hw->name);
+ vec_free (hw->input_node_thread_index_by_queue);
+ vec_free (hw->dq_runtime_index_by_queue);
+
+ pool_put (im->hw_interfaces, hw);
+}
+
+void
+vnet_hw_interface_walk_sw (vnet_main_t * vnm,
+ u32 hw_if_index,
+ vnet_hw_sw_interface_walk_t fn, void *ctx)
+{
+ vnet_hw_interface_t *hi;
+ u32 id, sw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ /* the super first, then the and sub interfaces */
+ fn (vnm, hi->sw_if_index, ctx);
+
+ /* *INDENT-OFF* */
+ hash_foreach (id, sw_if_index,
+ hi->sub_interface_sw_if_index_by_id,
+ ({
+ fn (vnm, sw_if_index, ctx);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+serialize_vnet_hw_interface_set_class (serialize_main_t * m, va_list * va)
+{
+ u32 hw_if_index = va_arg (*va, u32);
+ char *hw_class_name = va_arg (*va, char *);
+ serialize_integer (m, hw_if_index, sizeof (hw_if_index));
+ serialize_cstring (m, hw_class_name);
+}
+
+static void
+unserialize_vnet_hw_interface_set_class (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mc) = va_arg (*va, mc_main_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 hw_if_index;
+ char *hw_class_name;
+ uword *p;
+ clib_error_t *error;
+
+ unserialize_integer (m, &hw_if_index, sizeof (hw_if_index));
+ unserialize_cstring (m, &hw_class_name);
+ p =
+ hash_get (vnm->interface_main.hw_interface_class_by_name, hw_class_name);
+ ASSERT (p != 0);
+ error = vnet_hw_interface_set_class_helper (vnm, hw_if_index, p[0],
+ /* redistribute */ 0);
+ if (error)
+ clib_error_report (error);
+}
+
+MC_SERIALIZE_MSG (vnet_hw_interface_set_class_msg, static) =
+{
+.name = "vnet_hw_interface_set_class",.serialize =
+ serialize_vnet_hw_interface_set_class,.unserialize =
+ unserialize_vnet_hw_interface_set_class,};
+
+void
+vnet_hw_interface_init_for_class (vnet_main_t * vnm, u32 hw_if_index,
+ u32 hw_class_index, u32 hw_instance)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw_class_index);
+
+ hi->hw_class_index = hw_class_index;
+ hi->hw_instance = hw_instance;
+ setup_output_node (vnm->vlib_main, hi->output_node_index, hc);
+}
+
+static clib_error_t *
+vnet_hw_interface_set_class_helper (vnet_main_t * vnm, u32 hw_if_index,
+ u32 hw_class_index, u32 redistribute)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, hi->sw_if_index);
+ vnet_hw_interface_class_t *old_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ vnet_hw_interface_class_t *new_class =
+ vnet_get_hw_interface_class (vnm, hw_class_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+ clib_error_t *error = 0;
+
+ /* New class equals old class? Nothing to do. */
+ if (hi->hw_class_index == hw_class_index)
+ return 0;
+
+ /* No need (and incorrect since admin up flag may be set) to do error checking when
+ receiving unserialize message. */
+ if (redistribute)
+ {
+ if (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ return clib_error_return (0,
+ "%v must be admin down to change class from %s to %s",
+ hi->name, old_class->name, new_class->name);
+
+ /* Make sure interface supports given class. */
+ if ((new_class->is_valid_class_for_interface
+ && !new_class->is_valid_class_for_interface (vnm, hw_if_index,
+ hw_class_index))
+ || (dev_class->is_valid_class_for_interface
+ && !dev_class->is_valid_class_for_interface (vnm, hw_if_index,
+ hw_class_index)))
+ return clib_error_return (0,
+ "%v class cannot be changed from %s to %s",
+ hi->name, old_class->name, new_class->name);
+
+ if (vnm->vlib_main->mc_main)
+ {
+ mc_serialize (vnm->vlib_main->mc_main,
+ &vnet_hw_interface_set_class_msg, hw_if_index,
+ new_class->name);
+ return 0;
+ }
+ }
+
+ if (old_class->hw_class_change)
+ old_class->hw_class_change (vnm, hw_if_index, old_class->index,
+ new_class->index);
+
+ vnet_hw_interface_init_for_class (vnm, hw_if_index, new_class->index,
+ /* instance */ ~0);
+
+ if (new_class->hw_class_change)
+ new_class->hw_class_change (vnm, hw_if_index, old_class->index,
+ new_class->index);
+
+ if (dev_class->hw_class_change)
+ dev_class->hw_class_change (vnm, hw_if_index, new_class->index);
+
+ return error;
+}
+
+clib_error_t *
+vnet_hw_interface_set_class (vnet_main_t * vnm, u32 hw_if_index,
+ u32 hw_class_index)
+{
+ return vnet_hw_interface_set_class_helper (vnm, hw_if_index, hw_class_index,
+ /* redistribute */ 1);
+}
+
+static int
+vnet_hw_interface_rx_redirect_to_node_helper (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 node_index,
+ u32 redistribute)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_class_t *dev_class = vnet_get_device_class
+ (vnm, hi->dev_class_index);
+
+ if (redistribute)
+ {
+ /* $$$$ fixme someday maybe */
+ ASSERT (vnm->vlib_main->mc_main == 0);
+ }
+ if (dev_class->rx_redirect_to_node)
+ {
+ dev_class->rx_redirect_to_node (vnm, hw_if_index, node_index);
+ return 0;
+ }
+
+ return VNET_API_ERROR_UNIMPLEMENTED;
+}
+
+int
+vnet_hw_interface_rx_redirect_to_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ return vnet_hw_interface_rx_redirect_to_node_helper (vnm, hw_if_index,
+ node_index,
+ 1 /* redistribute */ );
+}
+
+word
+vnet_sw_interface_compare (vnet_main_t * vnm,
+ uword sw_if_index0, uword sw_if_index1)
+{
+ vnet_sw_interface_t *sup0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ vnet_sw_interface_t *sup1 = vnet_get_sup_sw_interface (vnm, sw_if_index1);
+ vnet_hw_interface_t *h0 = vnet_get_hw_interface (vnm, sup0->hw_if_index);
+ vnet_hw_interface_t *h1 = vnet_get_hw_interface (vnm, sup1->hw_if_index);
+
+ if (h0 != h1)
+ return vec_cmp (h0->name, h1->name);
+ return (word) h0->hw_instance - (word) h1->hw_instance;
+}
+
+word
+vnet_hw_interface_compare (vnet_main_t * vnm,
+ uword hw_if_index0, uword hw_if_index1)
+{
+ vnet_hw_interface_t *h0 = vnet_get_hw_interface (vnm, hw_if_index0);
+ vnet_hw_interface_t *h1 = vnet_get_hw_interface (vnm, hw_if_index1);
+
+ if (h0 != h1)
+ return vec_cmp (h0->name, h1->name);
+ return (word) h0->hw_instance - (word) h1->hw_instance;
+}
+
+int
+vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ return 1;
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ return (hc->flags & VNET_HW_INTERFACE_CLASS_FLAG_P2P);
+}
+
+clib_error_t *
+vnet_interface_init (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_buffer_t *b = 0;
+ vnet_buffer_opaque_t *o = 0;
+
+ /*
+ * Keep people from shooting themselves in the foot.
+ */
+ if (sizeof (b->opaque) != sizeof (vnet_buffer_opaque_t))
+ {
+#define _(a) if (sizeof(o->a) > sizeof (o->unused)) \
+ clib_warning \
+ ("FATAL: size of opaque union subtype %s is %d (max %d)", \
+ #a, sizeof(o->a), sizeof (o->unused));
+ foreach_buffer_opaque_union_subtype;
+#undef _
+
+ return clib_error_return
+ (0, "FATAL: size of vlib buffer opaque %d, size of vnet opaque %d",
+ sizeof (b->opaque), sizeof (vnet_buffer_opaque_t));
+ }
+
+ im->sw_if_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ im->sw_if_counter_lock[0] = 1; /* should be no need */
+
+ vec_validate (im->sw_if_counters, VNET_N_SIMPLE_INTERFACE_COUNTER - 1);
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP].name = "drops";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_PUNT].name = "punts";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_IP4].name = "ip4";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_IP6].name = "ip6";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_NO_BUF].name = "rx-no-buf";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_MISS].name = "rx-miss";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_RX_ERROR].name = "rx-error";
+ im->sw_if_counters[VNET_INTERFACE_COUNTER_TX_ERROR].name = "tx-error";
+
+ vec_validate (im->combined_sw_if_counters,
+ VNET_N_COMBINED_INTERFACE_COUNTER - 1);
+ im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX].name = "rx";
+ im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX].name = "tx";
+
+ im->sw_if_counter_lock[0] = 0;
+
+ im->device_class_by_name = hash_create_string ( /* size */ 0,
+ sizeof (uword));
+ {
+ vnet_device_class_t *c;
+
+ c = vnm->device_class_registrations;
+
+ while (c)
+ {
+ c->index = vec_len (im->device_classes);
+ hash_set_mem (im->device_class_by_name, c->name, c->index);
+ vec_add1 (im->device_classes, c[0]);
+ c = c->next_class_registration;
+ }
+ }
+
+ im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
+ sizeof (uword));
+
+ im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64),
+ sizeof (uword));
+ {
+ vnet_hw_interface_class_t *c;
+
+ c = vnm->hw_interface_class_registrations;
+
+ while (c)
+ {
+ c->index = vec_len (im->hw_interface_classes);
+ hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);
+
+ if (NULL == c->build_rewrite)
+ c->build_rewrite = default_build_rewrite;
+ if (NULL == c->update_adjacency)
+ c->update_adjacency = default_update_adjacency;
+
+ vec_add1 (im->hw_interface_classes, c[0]);
+ c = c->next_class_registration;
+ }
+ }
+
+ {
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, vnet_interface_cli_init)))
+ return error;
+
+ return error;
+ }
+ vnm->interface_tag_by_sw_if_index = hash_create (0, sizeof (uword));
+}
+
+VLIB_INIT_FUNCTION (vnet_interface_init);
+
+/* Kludge to renumber interface names [only!] */
+int
+vnet_interface_name_renumber (u32 sw_if_index, u32 new_show_dev_instance)
+{
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ vnet_device_class_t *dev_class = vnet_get_device_class
+ (vnm, hi->dev_class_index);
+
+ if (dev_class->name_renumber == 0 || dev_class->format_device_name == 0)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ rv = dev_class->name_renumber (hi, new_show_dev_instance);
+
+ if (rv)
+ return rv;
+
+ hash_unset_mem (im->hw_interface_by_name, hi->name);
+ vec_free (hi->name);
+ /* Use the mapping we set up to call it Ishmael */
+ hi->name = format (0, "%U", dev_class->format_device_name,
+ hi->dev_instance);
+
+ hash_set_mem (im->hw_interface_by_name, hi->name, hi->hw_if_index);
+ return rv;
+}
+
+clib_error_t *
+vnet_rename_interface (vnet_main_t * vnm, u32 hw_if_index, char *new_name)
+{
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_hw_interface_t *hw;
+ u8 *old_name;
+ clib_error_t *error = 0;
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ if (!hw)
+ {
+ return clib_error_return (0,
+ "unable to find hw interface for index %u",
+ hw_if_index);
+ }
+
+ old_name = hw->name;
+
+ /* set new hw->name */
+ hw->name = format (0, "%s", new_name);
+
+ /* remove the old name to hw_if_index mapping and install the new one */
+ hash_unset_mem (im->hw_interface_by_name, old_name);
+ hash_set_mem (im->hw_interface_by_name, hw->name, hw_if_index);
+
+ /* rename tx/output nodes */
+ vlib_node_rename (vm, hw->tx_node_index, "%v-tx", hw->name);
+ vlib_node_rename (vm, hw->output_node_index, "%v-output", hw->name);
+
+ /* free the old name vector */
+ vec_free (old_name);
+
+ return error;
+}
+
+static clib_error_t *
+vnet_hw_interface_change_mac_address_helper (vnet_main_t * vnm,
+ u32 hw_if_index, u64 mac_address)
+{
+ clib_error_t *error = 0;
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (hi->hw_address)
+ {
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hi->dev_class_index);
+ if (dev_class->mac_addr_change_function)
+ {
+ error =
+ dev_class->mac_addr_change_function (hi, (char *) &mac_address);
+ }
+ if (!error)
+ {
+ vnet_hw_interface_class_t *hw_class;
+
+ hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+
+ if (NULL != hw_class->mac_addr_change_function)
+ hw_class->mac_addr_change_function (hi, (char *) &mac_address);
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "MAC Address Change is not supported on this interface");
+ }
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "mac address change is not supported for interface index %u",
+ hw_if_index);
+ }
+ return error;
+}
+
+clib_error_t *
+vnet_hw_interface_change_mac_address (vnet_main_t * vnm, u32 hw_if_index,
+ u64 mac_address)
+{
+ return vnet_hw_interface_change_mac_address_helper
+ (vnm, hw_if_index, mac_address);
+}
+
+vnet_l3_packet_type_t
+vnet_link_to_l3_proto (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (VNET_L3_PACKET_TYPE_IP4);
+ case VNET_LINK_IP6:
+ return (VNET_L3_PACKET_TYPE_IP6);
+ case VNET_LINK_MPLS:
+ return (VNET_L3_PACKET_TYPE_MPLS);
+ case VNET_LINK_ARP:
+ return (VNET_L3_PACKET_TYPE_ARP);
+ case VNET_LINK_ETHERNET:
+ case VNET_LINK_NSH:
+ ASSERT (0);
+ break;
+ }
+ ASSERT (0);
+ return (0);
+}
+
+u8 *
+default_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ return (NULL);
+}
+
+void
+default_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ /*
+ * default rewirte in neighbour adj
+ */
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ vnet_build_rewrite_for_sw_interface (vnm,
+ sw_if_index,
+ adj_get_link_type (ai), NULL));
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ /*
+ * mcast traffic also uses default rewrite string with no mcast
+ * switch time updates.
+ */
+ adj_mcast_update_rewrite
+ (ai,
+ vnet_build_rewrite_for_sw_interface (vnm,
+ sw_if_index,
+ adj_get_link_type (ai),
+ NULL), 0, 0);
+ break;
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
new file mode 100644
index 00000000..5ca489db
--- /dev/null
+++ b/src/vnet/interface.h
@@ -0,0 +1,711 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface.h: VNET interfaces/sub-interfaces
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_interface_h
+#define included_vnet_interface_h
+
+#include <vnet/unix/pcap.h>
+#include <vnet/l3_types.h>
+
+struct vnet_main_t;
+struct vnet_hw_interface_t;
+struct vnet_sw_interface_t;
+struct ip46_address_t;
+
+typedef enum
+{
+ VNET_HW_INTERFACE_RX_MODE_UNKNOWN,
+ VNET_HW_INTERFACE_RX_MODE_POLLING,
+ VNET_HW_INTERFACE_RX_MODE_INTERRUPT,
+ VNET_HW_INTERFACE_RX_MODE_ADAPTIVE,
+ VNET_HW_INTERFACE_RX_MODE_DEFAULT,
+ VNET_HW_INTERFACE_NUM_RX_MODES,
+} vnet_hw_interface_rx_mode;
+
+/* Interface up/down callback. */
+typedef clib_error_t *(vnet_interface_function_t)
+ (struct vnet_main_t * vnm, u32 if_index, u32 flags);
+
+/* Sub-interface add/del callback. */
+typedef clib_error_t *(vnet_subif_add_del_function_t)
+ (struct vnet_main_t * vnm, u32 if_index,
+ struct vnet_sw_interface_t * template, int is_add);
+
+/* Interface set mac address callback. */
+typedef clib_error_t *(vnet_interface_set_mac_address_function_t)
+ (struct vnet_hw_interface_t * hi, char *address);
+
+/* Interface set rx mode callback. */
+typedef clib_error_t *(vnet_interface_set_rx_mode_function_t)
+ (struct vnet_main_t * vnm, u32 if_index, u32 queue_id,
+ vnet_hw_interface_rx_mode mode);
+
+typedef enum vnet_interface_function_priority_t_
+{
+ VNET_ITF_FUNC_PRIORITY_LOW,
+ VNET_ITF_FUNC_PRIORITY_HIGH,
+} vnet_interface_function_priority_t;
+#define VNET_ITF_FUNC_N_PRIO ((vnet_interface_function_priority_t)VNET_ITF_FUNC_PRIORITY_HIGH+1)
+
+typedef struct _vnet_interface_function_list_elt
+{
+ struct _vnet_interface_function_list_elt *next_interface_function;
+ clib_error_t *(*fp) (struct vnet_main_t * vnm, u32 if_index, u32 flags);
+} _vnet_interface_function_list_elt_t;
+
+#define _VNET_INTERFACE_FUNCTION_DECL(f,tag) \
+ \
+static void __vnet_interface_function_init_##tag##_##f (void) \
+ __attribute__((__constructor__)) ; \
+ \
+static void __vnet_interface_function_init_##tag##_##f (void) \
+{ \
+ vnet_main_t * vnm = vnet_get_main(); \
+ static _vnet_interface_function_list_elt_t init_function; \
+ init_function.next_interface_function = \
+ vnm->tag##_functions[VNET_ITF_FUNC_PRIORITY_LOW]; \
+ vnm->tag##_functions[VNET_ITF_FUNC_PRIORITY_LOW] = &init_function; \
+ init_function.fp = (void *) &f; \
+}
+
+#define _VNET_INTERFACE_FUNCTION_DECL_PRIO(f,tag,p) \
+ \
+static void __vnet_interface_function_init_##tag##_##f (void) \
+ __attribute__((__constructor__)) ; \
+ \
+static void __vnet_interface_function_init_##tag##_##f (void) \
+{ \
+ vnet_main_t * vnm = vnet_get_main(); \
+ static _vnet_interface_function_list_elt_t init_function; \
+ init_function.next_interface_function = vnm->tag##_functions[p]; \
+ vnm->tag##_functions[p] = &init_function; \
+ init_function.fp = (void *) &f; \
+}
+
+#define VNET_HW_INTERFACE_ADD_DEL_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_add_del)
+#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_link_up_down)
+#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p) \
+ _VNET_INTERFACE_FUNCTION_DECL_PRIO(f,hw_interface_link_up_down,p)
+#define VNET_SW_INTERFACE_ADD_DEL_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_add_del)
+#define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_admin_up_down)
+#define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(f,p) \
+ _VNET_INTERFACE_FUNCTION_DECL_PRIO(f,sw_interface_admin_up_down, p)
+
+/* A class of hardware interface devices. */
+typedef struct _vnet_device_class
+{
+ /* Index into main vector. */
+ u32 index;
+
+ /* Device name (e.g. "FOOBAR 1234a"). */
+ char *name;
+
+ /* Function to call when hardware interface is added/deleted. */
+ vnet_interface_function_t *interface_add_del_function;
+
+ /* Function to bring device administratively up/down. */
+ vnet_interface_function_t *admin_up_down_function;
+
+ /* Function to call when sub-interface is added/deleted */
+ vnet_subif_add_del_function_t *subif_add_del_function;
+
+ /* Function to call interface rx mode is changed */
+ vnet_interface_set_rx_mode_function_t *rx_mode_change_function;
+
+ /* Redistribute flag changes/existence of this interface class. */
+ u32 redistribute;
+
+ /* Transmit function. */
+ vlib_node_function_t *tx_function;
+
+ /* Error strings indexed by error code for this node. */
+ char **tx_function_error_strings;
+
+ /* Number of error codes used by this node. */
+ u32 tx_function_n_errors;
+
+ /* Renumber device name [only!] support, a control-plane kludge */
+ int (*name_renumber) (struct vnet_hw_interface_t * hi,
+ u32 new_dev_instance);
+
+ /* Format device instance as name. */
+ format_function_t *format_device_name;
+
+ /* Parse function for device name. */
+ unformat_function_t *unformat_device_name;
+
+ /* Format device verbosely for this class. */
+ format_function_t *format_device;
+
+ /* Trace buffer format for TX function. */
+ format_function_t *format_tx_trace;
+
+ /* Function to clear hardware counters for device. */
+ void (*clear_counters) (u32 dev_class_instance);
+
+ uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 hw_class_index);
+
+ /* Called when hardware class of an interface changes. */
+ void (*hw_class_change) (struct vnet_main_t * vnm,
+ u32 hw_if_index, u32 new_hw_class_index);
+
+ /* Called to redirect traffic from a specific interface instance */
+ void (*rx_redirect_to_node) (struct vnet_main_t * vnm,
+ u32 hw_if_index, u32 node_index);
+
+ /* Link-list of all device classes set up by constructors created below */
+ struct _vnet_device_class *next_class_registration;
+
+ /* Function to set mac address. */
+ vnet_interface_set_mac_address_function_t *mac_addr_change_function;
+} vnet_device_class_t;
+
+#define VNET_DEVICE_CLASS(x,...) \
+ __VA_ARGS__ vnet_device_class_t x; \
+static void __vnet_add_device_class_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vnet_add_device_class_registration_##x (void) \
+{ \
+ vnet_main_t * vnm = vnet_get_main(); \
+ x.next_class_registration = vnm->device_class_registrations; \
+ vnm->device_class_registrations = &x; \
+} \
+__VA_ARGS__ vnet_device_class_t x
+
+#define VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( vlib_main_t * vm, \
+ vlib_node_runtime_t * node, \
+ vlib_frame_t * frame) \
+ { return fn (vm, node, frame); }
+
+#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \
+ foreach_march_variant(VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE, fn)
+
+#if CLIB_DEBUG > 0
+#define VLIB_MULTIARCH_CLONE_AND_SELECT_FN(fn,...)
+#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn)
+#else
+#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) \
+ VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \
+ CLIB_MULTIARCH_SELECT_FN(fn, static inline) \
+ static void __attribute__((__constructor__)) \
+ __vlib_device_tx_function_multiarch_select_##dev (void) \
+ { dev.tx_function = fn ## _multiarch_select(); }
+#endif
+
+/**
+ * Link Type: A description of the protocol of packets on the link.
+ * On an ethernet link this maps directly into the ethertype. On a GRE tunnel
+ * it maps to the GRE-proto, etc for other lnk types.
+ */
+typedef enum vnet_link_t_
+{
+#if CLIB_DEBUG > 0
+ VNET_LINK_IP4 = 1,
+#else
+ VNET_LINK_IP4 = 0,
+#endif
+ VNET_LINK_IP6,
+ VNET_LINK_MPLS,
+ VNET_LINK_ETHERNET,
+ VNET_LINK_ARP,
+ VNET_LINK_NSH,
+} __attribute__ ((packed)) vnet_link_t;
+
+#define VNET_LINKS { \
+ [VNET_LINK_ETHERNET] = "ethernet", \
+ [VNET_LINK_IP4] = "ipv4", \
+ [VNET_LINK_IP6] = "ipv6", \
+ [VNET_LINK_MPLS] = "mpls", \
+ [VNET_LINK_ARP] = "arp", \
+ [VNET_LINK_NSH] = "nsh", \
+}
+
+/**
+ * @brief Number of link types. Not part of the enum so it does not have to be included in
+ * switch statements
+ */
+#define VNET_LINK_NUM (VNET_LINK_NSH+1)
+
+/**
+ * @brief Convert a link to to an Ethertype
+ */
+extern vnet_l3_packet_type_t vnet_link_to_l3_proto (vnet_link_t link);
+
+/**
+ * @brief Attributes assignable to a HW interface Class.
+ */
+typedef enum vnet_hw_interface_class_flags_t_
+{
+ /**
+ * @brief a point 2 point interface
+ */
+ VNET_HW_INTERFACE_CLASS_FLAG_P2P = (1 << 0),
+} vnet_hw_interface_class_flags_t;
+
+/* Layer-2 (e.g. Ethernet) interface class. */
+typedef struct _vnet_hw_interface_class
+{
+ /* Index into main vector. */
+ u32 index;
+
+ /* Class name (e.g. "Ethernet"). */
+ char *name;
+
+ /* Flags */
+ vnet_hw_interface_class_flags_t flags;
+
+ /* Function to call when hardware interface is added/deleted. */
+ vnet_interface_function_t *interface_add_del_function;
+
+ /* Function to bring interface administratively up/down. */
+ vnet_interface_function_t *admin_up_down_function;
+
+ /* Function to call when link state changes. */
+ vnet_interface_function_t *link_up_down_function;
+
+ /* Function to call when link MAC changes. */
+ vnet_interface_set_mac_address_function_t *mac_addr_change_function;
+
+ /* Format function to display interface name. */
+ format_function_t *format_interface_name;
+
+ /* Format function to display interface address. */
+ format_function_t *format_address;
+
+ /* Format packet header for this interface class. */
+ format_function_t *format_header;
+
+ /* Format device verbosely for this class. */
+ format_function_t *format_device;
+
+ /* Parser for hardware (e.g. ethernet) address. */
+ unformat_function_t *unformat_hw_address;
+
+ /* Parser for packet header for e.g. rewrite string. */
+ unformat_function_t *unformat_header;
+
+ /* Builds a rewrite string for the interface to the destination
+ * for the payload/link type. */
+ u8 *(*build_rewrite) (struct vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_hw_address);
+
+ /* Update an adjacecny added by FIB (as opposed to via the
+ * neighbour resolution protocol). */
+ void (*update_adjacency) (struct vnet_main_t * vnm,
+ u32 sw_if_index, u32 adj_index);
+
+ uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 hw_class_index);
+
+ /* Called when hw interface class is changed and old hardware instance
+ may want to be deleted. */
+ void (*hw_class_change) (struct vnet_main_t * vnm, u32 hw_if_index,
+ u32 old_class_index, u32 new_class_index);
+
+ /* List of hw interface classes, built by constructors */
+ struct _vnet_hw_interface_class *next_class_registration;
+
+} vnet_hw_interface_class_t;
+
+/**
+ * @brief Return a complete, zero-length (aka dummy) rewrite
+ */
+extern u8 *default_build_rewrite (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_hw_address);
+
+/**
+ * @brief Default adjacency update function
+ */
+extern void default_update_adjacency (struct vnet_main_t *vnm,
+ u32 sw_if_index, u32 adj_index);
+
+#define VNET_HW_INTERFACE_CLASS(x,...) \
+ __VA_ARGS__ vnet_hw_interface_class_t x; \
+static void __vnet_add_hw_interface_class_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vnet_add_hw_interface_class_registration_##x (void) \
+{ \
+ vnet_main_t * vnm = vnet_get_main(); \
+ x.next_class_registration = vnm->hw_interface_class_registrations; \
+ vnm->hw_interface_class_registrations = &x; \
+} \
+__VA_ARGS__ vnet_hw_interface_class_t x
+
+/* Hardware-interface. This corresponds to a physical wire
+ that packets flow over. */
+typedef struct vnet_hw_interface_t
+{
+ /* Interface name. */
+ u8 *name;
+
+ u32 flags;
+ /* Hardware link state is up. */
+#define VNET_HW_INTERFACE_FLAG_LINK_UP (1 << 0)
+ /* Hardware duplex state */
+#define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1
+#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX (1 << 1)
+#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX (1 << 2)
+#define VNET_HW_INTERFACE_FLAG_DUPLEX_MASK \
+ (VNET_HW_INTERFACE_FLAG_HALF_DUPLEX | \
+ VNET_HW_INTERFACE_FLAG_FULL_DUPLEX)
+
+ /* Hardware link speed */
+#define VNET_HW_INTERFACE_FLAG_SPEED_SHIFT 3
+#define VNET_HW_INTERFACE_FLAG_SPEED_10M (1 << 3)
+#define VNET_HW_INTERFACE_FLAG_SPEED_100M (1 << 4)
+#define VNET_HW_INTERFACE_FLAG_SPEED_1G (1 << 5)
+#define VNET_HW_INTERFACE_FLAG_SPEED_10G (1 << 6)
+#define VNET_HW_INTERFACE_FLAG_SPEED_40G (1 << 7)
+#define VNET_HW_INTERFACE_FLAG_SPEED_100G (1 << 8)
+#define VNET_HW_INTERFACE_FLAG_SPEED_MASK \
+ (VNET_HW_INTERFACE_FLAG_SPEED_10M | \
+ VNET_HW_INTERFACE_FLAG_SPEED_100M | \
+ VNET_HW_INTERFACE_FLAG_SPEED_1G | \
+ VNET_HW_INTERFACE_FLAG_SPEED_10G | \
+ VNET_HW_INTERFACE_FLAG_SPEED_40G | \
+ VNET_HW_INTERFACE_FLAG_SPEED_100G)
+
+ /* rx mode flags */
+#define VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE (1 << 10)
+
+ /* tx checksum offload */
+#define VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD (1 << 11)
+
+ /* Hardware address as vector. Zero (e.g. zero-length vector) if no
+ address for this class (e.g. PPP). */
+ u8 *hw_address;
+
+ /* Interface is up as far as software is concerned. */
+ /* NAME.{output,tx} nodes for this interface. */
+ u32 output_node_index, tx_node_index;
+
+ /* (dev_class, dev_instance) uniquely identifies hw interface. */
+ u32 dev_class_index;
+ u32 dev_instance;
+
+ /* (hw_class, hw_instance) uniquely identifies hw interface. */
+ u32 hw_class_index;
+ u32 hw_instance;
+
+ /* Hardware index for this hardware interface. */
+ u32 hw_if_index;
+
+ /* Software index for this hardware interface. */
+ u32 sw_if_index;
+
+ /* Maximum transmit rate for this interface in bits/sec. */
+ f64 max_rate_bits_per_sec;
+
+ /* Smallest packet size supported by this interface. */
+ u32 min_supported_packet_bytes;
+
+ /* Largest packet size supported by this interface. */
+ u32 max_supported_packet_bytes;
+
+ /* Smallest packet size for this interface. */
+ u32 min_packet_bytes;
+
+ /* Largest packet size for this interface. */
+ u32 max_packet_bytes;
+
+ /* Number of extra bytes that go on the wire.
+ Packet length on wire
+ = max (length + per_packet_overhead_bytes, min_packet_bytes). */
+ u32 per_packet_overhead_bytes;
+
+ /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
+ u32 max_l3_packet_bytes[VLIB_N_RX_TX];
+
+ /* Hash table mapping sub interface id to sw_if_index. */
+ uword *sub_interface_sw_if_index_by_id;
+
+ /* Count of number of L2 subinterfaces */
+ u32 l2_if_count;
+
+ /* Bonded interface info -
+ 0 - not a bonded interface nor a slave
+ ~0 - slave to a bonded interface
+ others - A bonded interface with a pointer to bitmap for all slaves */
+ uword *bond_info;
+#define VNET_HW_INTERFACE_BOND_INFO_NONE ((uword *) 0)
+#define VNET_HW_INTERFACE_BOND_INFO_SLAVE ((uword *) ~0)
+
+ /* Input node */
+ u32 input_node_index;
+
+ /* input node cpu index by queue */
+ u32 *input_node_thread_index_by_queue;
+
+ /* vnet_hw_interface_rx_mode by queue */
+ u8 *rx_mode_by_queue;
+ vnet_hw_interface_rx_mode default_rx_mode;
+
+ /* device input device_and_queue runtime index */
+ uword *dq_runtime_index_by_queue;
+
+} vnet_hw_interface_t;
+
+extern vnet_device_class_t vnet_local_interface_device_class;
+
+typedef enum
+{
+ /* A hw interface. */
+ VNET_SW_INTERFACE_TYPE_HARDWARE,
+
+ /* A sub-interface. */
+ VNET_SW_INTERFACE_TYPE_SUB,
+ VNET_SW_INTERFACE_TYPE_P2P,
+} vnet_sw_interface_type_t;
+
+typedef struct
+{
+ /*
+ * Subinterface ID. A number 0-N to uniquely identify
+ * this subinterface under the main (parent?) interface
+ */
+ u32 id;
+
+ /* Classification data. Used to associate packet header with subinterface. */
+ struct
+ {
+ u16 outer_vlan_id;
+ u16 inner_vlan_id;
+ union
+ {
+ u16 raw_flags;
+ struct
+ {
+ u16 no_tags:1;
+ u16 one_tag:1;
+ u16 two_tags:1;
+ u16 dot1ad:1; /* 0 = dot1q, 1=dot1ad */
+ u16 exact_match:1;
+ u16 default_sub:1;
+ u16 outer_vlan_id_any:1;
+ u16 inner_vlan_id_any:1;
+ } flags;
+ };
+ } eth;
+} vnet_sub_interface_t;
+
+typedef struct
+{
+ /*
+ * Subinterface ID. A number 0-N to uniquely identify
+ * this subinterface under the main interface
+ */
+ u32 id;
+ u32 pool_index;
+ u8 client_mac[6];
+} vnet_p2p_sub_interface_t;
+
+typedef enum
+{
+ /* Always flood */
+ VNET_FLOOD_CLASS_NORMAL,
+ VNET_FLOOD_CLASS_TUNNEL_MASTER,
+ /* Does not flood when tunnel master is in the same L2 BD */
+ VNET_FLOOD_CLASS_TUNNEL_NORMAL
+} vnet_flood_class_t;
+
+/* Software-interface. This corresponds to a Ethernet VLAN, ATM vc, a
+ tunnel, etc. Configuration (e.g. IP address) gets attached to
+ software interface. */
+typedef struct
+{
+ vnet_sw_interface_type_t type:16;
+
+ u16 flags;
+ /* Interface is "up" meaning adminstratively up.
+ Up in the sense of link state being up is maintained by hardware interface. */
+#define VNET_SW_INTERFACE_FLAG_ADMIN_UP (1 << 0)
+
+ /* Interface is disabled for forwarding: punt all traffic to slow-path. */
+#define VNET_SW_INTERFACE_FLAG_PUNT (1 << 1)
+
+#define VNET_SW_INTERFACE_FLAG_PROXY_ARP (1 << 2)
+
+#define VNET_SW_INTERFACE_FLAG_UNNUMBERED (1 << 3)
+
+#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE (1 << 4)
+
+ /* Interface does not appear in CLI/API */
+#define VNET_SW_INTERFACE_FLAG_HIDDEN (1 << 5)
+
+ /* Interface in ERROR state */
+#define VNET_SW_INTERFACE_FLAG_ERROR (1 << 6)
+
+ /* Index for this interface. */
+ u32 sw_if_index;
+
+ /* Software interface index of super-interface;
+ equal to sw_if_index if this interface is not a
+ sub-interface. */
+ u32 sup_sw_if_index;
+
+ /* this swif is unnumbered, use addresses on unnumbered_sw_if_index... */
+ u32 unnumbered_sw_if_index;
+
+ u32 link_speed;
+
+ union
+ {
+ /* VNET_SW_INTERFACE_TYPE_HARDWARE. */
+ u32 hw_if_index;
+
+ /* VNET_SW_INTERFACE_TYPE_SUB. */
+ vnet_sub_interface_t sub;
+
+ /* VNET_SW_INTERFACE_TYPE_P2P. */
+ vnet_p2p_sub_interface_t p2p;
+ };
+
+ vnet_flood_class_t flood_class;
+} vnet_sw_interface_t;
+
+typedef enum
+{
+ /* Simple counters. */
+ VNET_INTERFACE_COUNTER_DROP = 0,
+ VNET_INTERFACE_COUNTER_PUNT = 1,
+ VNET_INTERFACE_COUNTER_IP4 = 2,
+ VNET_INTERFACE_COUNTER_IP6 = 3,
+ VNET_INTERFACE_COUNTER_RX_NO_BUF = 4,
+ VNET_INTERFACE_COUNTER_RX_MISS = 5,
+ VNET_INTERFACE_COUNTER_RX_ERROR = 6,
+ VNET_INTERFACE_COUNTER_TX_ERROR = 7,
+ VNET_INTERFACE_COUNTER_MPLS = 8,
+ VNET_N_SIMPLE_INTERFACE_COUNTER = 9,
+ /* Combined counters. */
+ VNET_INTERFACE_COUNTER_RX = 0,
+ VNET_INTERFACE_COUNTER_TX = 1,
+ VNET_N_COMBINED_INTERFACE_COUNTER = 2,
+} vnet_interface_counter_type_t;
+
+typedef struct
+{
+ u32 output_node_index;
+ u32 tx_node_index;
+} vnet_hw_interface_nodes_t;
+
+typedef struct
+{
+ /* Hardware interfaces. */
+ vnet_hw_interface_t *hw_interfaces;
+
+ /* Hash table mapping HW interface name to index. */
+ uword *hw_interface_by_name;
+
+ /* Vectors if hardware interface classes and device classes. */
+ vnet_hw_interface_class_t *hw_interface_classes;
+ vnet_device_class_t *device_classes;
+
+ /* Hash table mapping name to hw interface/device class. */
+ uword *hw_interface_class_by_name;
+ uword *device_class_by_name;
+
+ /* Software interfaces. */
+ vnet_sw_interface_t *sw_interfaces;
+
+ /* Hash table mapping sub intfc sw_if_index by sup sw_if_index and sub id */
+ uword *sw_if_index_by_sup_and_sub;
+
+ /* Software interface counters both simple and combined
+ packet and byte counters. */
+ volatile u32 *sw_if_counter_lock;
+ vlib_simple_counter_main_t *sw_if_counters;
+ vlib_combined_counter_main_t *combined_sw_if_counters;
+
+ vnet_hw_interface_nodes_t *deleted_hw_interface_nodes;
+
+ /* pcap drop tracing */
+ int drop_pcap_enable;
+ pcap_main_t pcap_main;
+ u8 *pcap_filename;
+ u32 pcap_sw_if_index;
+ u32 pcap_pkts_to_capture;
+ uword *pcap_drop_filter_hash;
+
+ /* feature_arc_index */
+ u8 output_feature_arc_index;
+} vnet_interface_main_t;
+
+static inline void
+vnet_interface_counter_lock (vnet_interface_main_t * im)
+{
+ if (im->sw_if_counter_lock)
+ while (__sync_lock_test_and_set (im->sw_if_counter_lock, 1))
+ /* zzzz */ ;
+}
+
+static inline void
+vnet_interface_counter_unlock (vnet_interface_main_t * im)
+{
+ if (im->sw_if_counter_lock)
+ *im->sw_if_counter_lock = 0;
+}
+
+void vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add);
+
+int vnet_interface_name_renumber (u32 sw_if_index, u32 new_show_dev_instance);
+
+#endif /* included_vnet_interface_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
new file mode 100644
index 00000000..6374a880
--- /dev/null
+++ b/src/vnet/interface_api.c
@@ -0,0 +1,958 @@
+/*
+ *------------------------------------------------------------------
+ * interface_api.c - vnet interface api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/vnet_msg_enum.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/mfib/mfib_table.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+vpe_api_main_t vpe_api_main;
+
+#define foreach_vpe_api_msg \
+_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
+_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \
+_(WANT_INTERFACE_EVENTS, want_interface_events) \
+_(SW_INTERFACE_DUMP, sw_interface_dump) \
+_(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \
+_(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \
+_(SW_INTERFACE_GET_TABLE, sw_interface_get_table) \
+_(SW_INTERFACE_SET_UNNUMBERED, sw_interface_set_unnumbered) \
+_(SW_INTERFACE_CLEAR_STATS, sw_interface_clear_stats) \
+_(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \
+_(SW_INTERFACE_SET_MAC_ADDRESS, sw_interface_set_mac_address)
+
+static void
+vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp)
+{
+ vl_api_sw_interface_set_flags_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+ u16 flags;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ flags = mp->admin_up_down ? VNET_SW_INTERFACE_FLAG_ADMIN_UP : 0;
+
+ error = vnet_sw_interface_set_flags (vnm, ntohl (mp->sw_if_index), flags);
+ if (error)
+ {
+ rv = -1;
+ clib_error_report (error);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_FLAGS_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+{
+ vl_api_sw_interface_set_mtu_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 flags = ETHERNET_INTERFACE_FLAG_MTU;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u16 mtu = ntohs (mp->mtu);
+ ethernet_main_t *em = &ethernet_main;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, si->hw_if_index);
+ ethernet_interface_t *eif = ethernet_get_interface (em, si->hw_if_index);
+
+ if (!eif)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto bad_sw_if_index;
+ }
+
+ if (mtu < hi->min_supported_packet_bytes)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ if (mtu > hi->max_supported_packet_bytes)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ if (hi->max_packet_bytes != mtu)
+ {
+ hi->max_packet_bytes = mtu;
+ ethernet_set_flags (vnm, si->hw_if_index, flags);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY);
+}
+
+static void
+send_sw_interface_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ vnet_sw_interface_t * swif,
+ u8 * interface_name, u32 context)
+{
+ vnet_hw_interface_t *hi =
+ vnet_get_sup_hw_interface (am->vnet_main, swif->sw_if_index);
+
+ vl_api_sw_interface_details_t *mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_DETAILS);
+ mp->sw_if_index = ntohl (swif->sw_if_index);
+ mp->sup_sw_if_index = ntohl (swif->sup_sw_if_index);
+ mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0;
+ mp->link_up_down = (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0;
+ mp->link_duplex = ((hi->flags & VNET_HW_INTERFACE_FLAG_DUPLEX_MASK) >>
+ VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT);
+ mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >>
+ VNET_HW_INTERFACE_FLAG_SPEED_SHIFT);
+ mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->context = context;
+
+ strncpy ((char *) mp->interface_name,
+ (char *) interface_name, ARRAY_LEN (mp->interface_name) - 1);
+
+ /* Send the L2 address for ethernet physical intfcs */
+ if (swif->sup_sw_if_index == swif->sw_if_index
+ && hi->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ ethernet_main_t *em = ethernet_get_main (am->vlib_main);
+ ethernet_interface_t *ei;
+
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+ ASSERT (sizeof (mp->l2_address) >= sizeof (ei->address));
+ clib_memcpy (mp->l2_address, ei->address, sizeof (ei->address));
+ mp->l2_address_length = ntohl (sizeof (ei->address));
+ }
+ else if (swif->sup_sw_if_index != swif->sw_if_index)
+ {
+ vnet_sub_interface_t *sub = &swif->sub;
+ mp->sub_id = ntohl (sub->id);
+ mp->sub_dot1ad = sub->eth.flags.dot1ad;
+ mp->sub_number_of_tags =
+ sub->eth.flags.one_tag + sub->eth.flags.two_tags * 2;
+ mp->sub_outer_vlan_id = ntohs (sub->eth.outer_vlan_id);
+ mp->sub_inner_vlan_id = ntohs (sub->eth.inner_vlan_id);
+ mp->sub_exact_match = sub->eth.flags.exact_match;
+ mp->sub_default = sub->eth.flags.default_sub;
+ mp->sub_outer_vlan_id_any = sub->eth.flags.outer_vlan_id_any;
+ mp->sub_inner_vlan_id_any = sub->eth.flags.inner_vlan_id_any;
+
+ /* vlan tag rewrite data */
+ u32 vtr_op = L2_VTR_DISABLED;
+ u32 vtr_push_dot1q = 0, vtr_tag1 = 0, vtr_tag2 = 0;
+
+ if (l2vtr_get (am->vlib_main, am->vnet_main, swif->sw_if_index,
+ &vtr_op, &vtr_push_dot1q, &vtr_tag1, &vtr_tag2) != 0)
+ {
+ // error - default to disabled
+ mp->vtr_op = ntohl (L2_VTR_DISABLED);
+ clib_warning ("cannot get vlan tag rewrite for sw_if_index %d",
+ swif->sw_if_index);
+ }
+ else
+ {
+ mp->vtr_op = ntohl (vtr_op);
+ mp->vtr_push_dot1q = ntohl (vtr_push_dot1q);
+ mp->vtr_tag1 = ntohl (vtr_tag1);
+ mp->vtr_tag2 = ntohl (vtr_tag2);
+ }
+ }
+
+ /* pbb tag rewrite data */
+ ethernet_header_t eth_hdr;
+ u32 vtr_op = L2_VTR_DISABLED;
+ u16 outer_tag = 0;
+ u16 b_vlanid = 0;
+ u32 i_sid = 0;
+ memset (&eth_hdr, 0, sizeof (eth_hdr));
+
+ if (!l2pbb_get (am->vlib_main, am->vnet_main, swif->sw_if_index,
+ &vtr_op, &outer_tag, &eth_hdr, &b_vlanid, &i_sid))
+ {
+ mp->sub_dot1ah = 1;
+ clib_memcpy (mp->b_dmac, eth_hdr.dst_address,
+ sizeof (eth_hdr.dst_address));
+ clib_memcpy (mp->b_smac, eth_hdr.src_address,
+ sizeof (eth_hdr.src_address));
+ mp->b_vlanid = b_vlanid;
+ mp->i_sid = i_sid;
+ }
+
+ u8 *tag = vnet_get_sw_interface_tag (vnet_get_main (), swif->sw_if_index);
+ if (tag)
+ strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_sw_interface_t *swif;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ u8 *filter = 0, *name = 0;
+ if (mp->name_filter_valid)
+ {
+ mp->name_filter[ARRAY_LEN (mp->name_filter) - 1] = 0;
+ filter = format (0, "%s%c", mp->name_filter, 0);
+ }
+
+ char *strcasestr (char *, char *); /* lnx hdr file botch */
+ /* *INDENT-OFF* */
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ if (!vnet_swif_is_api_visible (swif))
+ continue;
+ vec_reset_length(name);
+ name = format (name, "%U%c", format_vnet_sw_interface_name, am->vnet_main,
+ swif, 0);
+
+ if (filter && !strcasestr((char *) name, (char *) filter))
+ continue;
+
+ send_sw_interface_details (am, q, swif, name, mp->context);
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (name);
+ vec_free (filter);
+}
+
+static void
+ vl_api_sw_interface_add_del_address_t_handler
+ (vl_api_sw_interface_add_del_address_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_add_del_address_reply_t *rmp;
+ int rv = 0;
+ u32 is_del;
+ clib_error_t *error = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ is_del = mp->is_add == 0;
+ vnm->api_errno = 0;
+
+ if (mp->del_all)
+ ip_del_all_interface_addresses (vm, ntohl (mp->sw_if_index));
+ else if (mp->is_ipv6)
+ error = ip6_add_del_interface_address (vm, ntohl (mp->sw_if_index),
+ (void *) mp->address,
+ mp->address_length, is_del);
+ else
+ error = ip4_add_del_interface_address (vm, ntohl (mp->sw_if_index),
+ (void *) mp->address,
+ mp->address_length, is_del);
+
+ if (error)
+ {
+ rv = vnm->api_errno;
+ clib_error_report (error);
+ goto done;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+done:
+ REPLY_MACRO (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS_REPLY);
+}
+
+void stats_dslock_with_hint (int hint, int tag) __attribute__ ((weak));
+void
+stats_dslock_with_hint (int hint, int tag)
+{
+}
+
+void stats_dsunlock (void) __attribute__ ((weak));
+void
+stats_dsunlock (void)
+{
+}
+
+static void
+vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp)
+{
+ vl_api_sw_interface_set_table_reply_t *rmp;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 table_id = ntohl (mp->vrf_id);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ stats_dslock_with_hint (1 /* release hint */ , 4 /* tag */ );
+
+ if (mp->is_ipv6)
+ rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1);
+ else
+ rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1);
+
+ stats_dsunlock ();
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY);
+}
+
+int
+ip_table_bind (fib_protocol_t fproto,
+ u32 sw_if_index, u32 table_id, u8 is_api)
+{
+ CLIB_UNUSED (ip_interface_address_t * ia);
+ u32 fib_index, mfib_index;
+ fib_source_t src;
+ mfib_source_t msrc;
+
+ if (is_api)
+ {
+ src = FIB_SOURCE_API;
+ msrc = MFIB_SOURCE_API;
+ }
+ else
+ {
+ src = FIB_SOURCE_CLI;
+ msrc = MFIB_SOURCE_CLI;
+ }
+
+ /*
+ * This is temporary whilst I do the song and dance with the CSIT version
+ */
+ if (0 != table_id)
+ {
+ fib_index = fib_table_find_or_create_and_lock (fproto, table_id, src);
+ mfib_index =
+ mfib_table_find_or_create_and_lock (fproto, table_id, msrc);
+ }
+ else
+ {
+ fib_index = 0;
+ mfib_index = 0;
+ }
+
+ /*
+ * This if table does not exist = error is what we want in the end.
+ */
+ /* fib_index = fib_table_find (fproto, table_id); */
+ /* mfib_index = mfib_table_find (fproto, table_id); */
+
+ /* if (~0 == fib_index || ~0 == mfib_index) */
+ /* { */
+ /* return (VNET_API_ERROR_NO_SUCH_FIB); */
+ /* } */
+
+ if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ /*
+ * If the interface already has in IP address, then a change int
+ * VRF is not allowed. The IP address applied must first be removed.
+ * We do not do that automatically here, since VPP has no knowledge
+ * of whether thoses subnets are valid in the destination VRF.
+ */
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&ip6_main.lookup_main,
+ ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
+ }));
+ /* *INDENT-ON* */
+
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
+
+ /*
+ * tell those that are interested that the binding is changing.
+ */
+ ip6_table_bind_callback_t *cb;
+ vec_foreach (cb, ip6_main.table_bind_callbacks)
+ cb->function (&ip6_main, cb->function_opaque,
+ sw_if_index,
+ fib_index,
+ ip6_main.fib_index_by_sw_if_index[sw_if_index]);
+
+ if (0 == table_id)
+ {
+ /* reset back to default */
+ if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index])
+ fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP6, src);
+ if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index
+ [sw_if_index], FIB_PROTOCOL_IP6, msrc);
+
+ }
+ else
+ {
+ /* we need to lock the table now it's inuse */
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src);
+ mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc);
+ }
+
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
+ }
+ else
+ {
+ /*
+ * If the interface already has in IP address, then a change int
+ * VRF is not allowed. The IP address applied must first be removed.
+ * We do not do that automatically here, since VPP has no knowledge
+ * of whether thoses subnets are valid in the destination VRF.
+ */
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&ip4_main.lookup_main,
+ ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
+ }));
+ /* *INDENT-ON* */
+
+ vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index);
+
+ /*
+ * tell those that are interested that the binding is changing.
+ */
+ ip4_table_bind_callback_t *cb;
+ vec_foreach (cb, ip4_main.table_bind_callbacks)
+ cb->function (&ip4_main, cb->function_opaque,
+ sw_if_index,
+ fib_index,
+ ip4_main.fib_index_by_sw_if_index[sw_if_index]);
+
+ if (0 == table_id)
+ {
+ /* reset back to default */
+ if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index])
+ fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP4, src);
+ if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index
+ [sw_if_index], FIB_PROTOCOL_IP4, msrc);
+
+ }
+ else
+ {
+ /* we need to lock the table now it's inuse */
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ table_id, src);
+
+ mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ table_id, msrc);
+ }
+
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
+ }
+
+ /*
+ * Temporary. undo the locks from the find and create at the staart
+ */
+ if (0 != table_id)
+ {
+ fib_table_unlock (fib_index, fproto, src);
+ mfib_table_unlock (mfib_index, fproto, msrc);
+ }
+
+ return (0);
+}
+
+static void
+send_sw_interface_get_table_reply (unix_shared_memory_queue_t * q,
+ u32 context, int retval, u32 vrf_id)
+{
+ vl_api_sw_interface_get_table_reply_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_GET_TABLE_REPLY);
+ mp->context = context;
+ mp->retval = htonl (retval);
+ mp->vrf_id = htonl (vrf_id);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_sw_interface_get_table_t_handler (vl_api_sw_interface_get_table_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ fib_table_t *fib_table = 0;
+ u32 sw_if_index = ~0;
+ u32 fib_index = ~0;
+ u32 table_id = ~0;
+ fib_protocol_t fib_proto = FIB_PROTOCOL_IP4;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (mp->is_ipv6)
+ fib_proto = FIB_PROTOCOL_IP6;
+
+ fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
+ if (fib_index != ~0)
+ {
+ fib_table = fib_table_get (fib_index, fib_proto);
+ table_id = fib_table->ft_table_id;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ send_sw_interface_get_table_reply (q, mp->context, rv, table_id);
+}
+
+static void vl_api_sw_interface_set_unnumbered_t_handler
+ (vl_api_sw_interface_set_unnumbered_t * mp)
+{
+ vl_api_sw_interface_set_unnumbered_reply_t *rmp;
+ int rv = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 unnumbered_sw_if_index = ntohl (mp->unnumbered_sw_if_index);
+ u32 was_unnum;
+
+ /*
+ * The API message field names are backwards from
+ * the underlying data structure names.
+ * It's not worth changing them now.
+ */
+ if (!vnet_sw_interface_is_api_valid (vnm, unnumbered_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto done;
+ }
+
+ /* Only check the "use loop0" field when setting the binding */
+ if (mp->is_add && !vnet_sw_interface_is_api_valid (vnm, sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
+ goto done;
+ }
+
+ vnet_sw_interface_t *si =
+ vnet_get_sw_interface (vnm, unnumbered_sw_if_index);
+ was_unnum = (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED);
+
+ if (mp->is_add)
+ {
+ si->flags |= VNET_SW_INTERFACE_FLAG_UNNUMBERED;
+ si->unnumbered_sw_if_index = sw_if_index;
+
+ ip4_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] =
+ ip4_main.
+ lookup_main.if_address_pool_index_by_sw_if_index[sw_if_index];
+ ip6_main.
+ lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] =
+ ip6_main.
+ lookup_main.if_address_pool_index_by_sw_if_index[sw_if_index];
+ }
+ else
+ {
+ si->flags &= ~(VNET_SW_INTERFACE_FLAG_UNNUMBERED);
+ si->unnumbered_sw_if_index = (u32) ~ 0;
+
+ ip4_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] = ~0;
+ ip6_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] = ~0;
+ }
+
+ if (was_unnum != (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
+ {
+ ip4_sw_interface_enable_disable (unnumbered_sw_if_index, mp->is_add);
+ ip6_sw_interface_enable_disable (unnumbered_sw_if_index, mp->is_add);
+ }
+
+done:
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_UNNUMBERED_REPLY);
+}
+
+static void
+vl_api_sw_interface_clear_stats_t_handler (vl_api_sw_interface_clear_stats_t *
+ mp)
+{
+ vl_api_sw_interface_clear_stats_reply_t *rmp;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_simple_counter_main_t *sm;
+ vlib_combined_counter_main_t *cm;
+ static vnet_main_t **my_vnet_mains;
+ int i, j, n_counters;
+ int rv = 0;
+
+ if (mp->sw_if_index != ~0)
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vec_reset_length (my_vnet_mains);
+
+ for (i = 0; i < vec_len (vnet_mains); i++)
+ {
+ if (vnet_mains[i])
+ vec_add1 (my_vnet_mains, vnet_mains[i]);
+ }
+
+ if (vec_len (vnet_mains) == 0)
+ vec_add1 (my_vnet_mains, vnm);
+
+ n_counters = vec_len (im->combined_sw_if_counters);
+
+ for (j = 0; j < n_counters; j++)
+ {
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ cm = im->combined_sw_if_counters + j;
+ if (mp->sw_if_index == (u32) ~ 0)
+ vlib_clear_combined_counters (cm);
+ else
+ vlib_zero_combined_counter (cm, ntohl (mp->sw_if_index));
+ }
+ }
+
+ n_counters = vec_len (im->sw_if_counters);
+
+ for (j = 0; j < n_counters; j++)
+ {
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ sm = im->sw_if_counters + j;
+ if (mp->sw_if_index == (u32) ~ 0)
+ vlib_clear_simple_counters (sm);
+ else
+ vlib_zero_simple_counter (sm, ntohl (mp->sw_if_index));
+ }
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_CLEAR_STATS_REPLY);
+}
+
+#define API_LINK_STATE_EVENT 1
+#define API_ADMIN_UP_DOWN_EVENT 2
+
+static int
+event_data_cmp (void *a1, void *a2)
+{
+ uword *e1 = a1;
+ uword *e2 = a2;
+
+ return (word) e1[0] - (word) e2[0];
+}
+
+static void
+send_sw_interface_event (vpe_api_main_t * am,
+ vpe_client_registration_t * reg,
+ unix_shared_memory_queue_t * q,
+ vnet_sw_interface_t * swif)
+{
+ vl_api_sw_interface_event_t *mp;
+ vnet_main_t *vnm = am->vnet_main;
+
+ vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm,
+ swif->sw_if_index);
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = ntohl (swif->sw_if_index);
+ mp->client_index = reg->client_index;
+ mp->pid = reg->client_pid;
+
+ mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0;
+ mp->link_up_down = (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static uword
+link_state_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vnet_main_t *vnm = vam->vnet_main;
+ vnet_sw_interface_t *swif;
+ uword *event_data = 0;
+ vpe_client_registration_t *reg;
+ int i;
+ u32 prev_sw_if_index;
+ unix_shared_memory_queue_t *q;
+
+ vam->link_state_process_up = 1;
+
+ while (1)
+ {
+ vlib_process_wait_for_event (vm);
+
+ /* Unified list of changed link or admin state sw_if_indices */
+ vlib_process_get_events_with_type
+ (vm, &event_data, API_LINK_STATE_EVENT);
+ vlib_process_get_events_with_type
+ (vm, &event_data, API_ADMIN_UP_DOWN_EVENT);
+
+ /* Sort, so we can eliminate duplicates */
+ vec_sort_with_function (event_data, event_data_cmp);
+
+ prev_sw_if_index = ~0;
+
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ /* Only one message per swif */
+ if (prev_sw_if_index == event_data[i])
+ continue;
+ prev_sw_if_index = event_data[i];
+
+ /* *INDENT-OFF* */
+ pool_foreach(reg, vam->interface_events_registrations,
+ ({
+ q = vl_api_client_index_to_input_queue (reg->client_index);
+ if (q)
+ {
+ /* sw_interface may be deleted already */
+ if (!pool_is_free_index (vnm->interface_main.sw_interfaces,
+ event_data[i]))
+ {
+ swif = vnet_get_sw_interface (vnm, event_data[i]);
+ send_sw_interface_event (vam, reg, q, swif);
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ vec_reset_length (event_data);
+ }
+
+ return 0;
+}
+
+static clib_error_t *link_up_down_function (vnet_main_t * vm, u32 hw_if_index,
+ u32 flags);
+static clib_error_t *admin_up_down_function (vnet_main_t * vm,
+ u32 hw_if_index, u32 flags);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (link_state_process_node,static) = {
+ .function = link_state_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vpe-link-state-process",
+};
+/* *INDENT-ON* */
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (admin_up_down_function);
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (link_up_down_function);
+
+static clib_error_t *
+link_up_down_function (vnet_main_t * vm, u32 hw_if_index, u32 flags)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vm, hw_if_index);
+
+ if (vam->link_state_process_up)
+ vlib_process_signal_event (vam->vlib_main,
+ link_state_process_node.index,
+ API_LINK_STATE_EVENT, hi->sw_if_index);
+ return 0;
+}
+
+static clib_error_t *
+admin_up_down_function (vnet_main_t * vm, u32 sw_if_index, u32 flags)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+
+ /*
+ * Note: it's perfectly fair to set a subif admin up / admin down.
+ * Note the subtle distinction between this routine and the previous
+ * routine.
+ */
+ if (vam->link_state_process_up)
+ vlib_process_signal_event (vam->vlib_main,
+ link_state_process_node.index,
+ API_ADMIN_UP_DOWN_EVENT, sw_if_index);
+ return 0;
+}
+
+static void vl_api_sw_interface_tag_add_del_t_handler
+ (vl_api_sw_interface_tag_add_del_t * mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_tag_add_del_reply_t *rmp;
+ int rv = 0;
+ u8 *tag;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->is_add)
+ {
+ if (mp->tag[0] == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
+ tag = format (0, "%s%c", mp->tag, 0);
+ vnet_set_sw_interface_tag (vnm, tag, sw_if_index);
+ }
+ else
+ vnet_clear_sw_interface_tag (vnm, sw_if_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+out:
+ REPLY_MACRO (VL_API_SW_INTERFACE_TAG_ADD_DEL_REPLY);
+}
+
+static void vl_api_sw_interface_set_mac_address_t_handler
+ (vl_api_sw_interface_set_mac_address_t * mp)
+{
+ vl_api_sw_interface_set_mac_address_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vnet_sw_interface_t *si;
+ u64 mac;
+ clib_error_t *error;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ mac = ((u64) mp->mac_address[0] << (8 * 0)
+ | (u64) mp->mac_address[1] << (8 * 1)
+ | (u64) mp->mac_address[2] << (8 * 2)
+ | (u64) mp->mac_address[3] << (8 * 3)
+ | (u64) mp->mac_address[4] << (8 * 4)
+ | (u64) mp->mac_address[5] << (8 * 5));
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ error = vnet_hw_interface_change_mac_address (vnm, si->hw_if_index, mac);
+ if (error)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ clib_error_report (error);
+ goto out;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+out:
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_MAC_ADDRESS_REPLY);
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/interface.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_interface;
+#undef _
+}
+
+pub_sub_handler (interface_events, INTERFACE_EVENTS);
+
+static clib_error_t *
+interface_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (interface_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
new file mode 100644
index 00000000..15dc7f8d
--- /dev/null
+++ b/src/vnet/interface_cli.c
@@ -0,0 +1,1660 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface_cli.c: interface CLI
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * @brief Interface CLI.
+ *
+ * Source code for several CLI interface commands.
+ *
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/bitmap.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_input.h>
+
+static int
+compare_interface_names (void *a1, void *a2)
+{
+ u32 *hi1 = a1;
+ u32 *hi2 = a2;
+
+ return vnet_hw_interface_compare (vnet_get_main (), *hi1, *hi2);
+}
+
+static clib_error_t *
+show_or_clear_hw_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hi;
+ u32 hw_if_index, *hw_if_indices = 0;
+ int i, verbose = -1, is_show, show_bond = 0;
+
+ is_show = strstr (cmd->path, "show") != 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* See if user wants to show a specific interface. */
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ vec_add1 (hw_if_indices, hw_if_index);
+
+ /* See if user wants to show an interface with a specific hw_if_index. */
+ else if (unformat (input, "%u", &hw_if_index))
+ vec_add1 (hw_if_indices, hw_if_index);
+
+ else if (unformat (input, "verbose"))
+ verbose = 1; /* this is also the default */
+
+ else if (unformat (input, "detail"))
+ verbose = 2;
+
+ else if (unformat (input, "brief"))
+ verbose = 0;
+
+ else if (unformat (input, "bond"))
+ {
+ show_bond = 1;
+ if (verbose < 0)
+ verbose = 0; /* default to brief for link bonding */
+ }
+
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ /* Gather interfaces. */
+ if (vec_len (hw_if_indices) == 0)
+ pool_foreach (hi, im->hw_interfaces,
+ vec_add1 (hw_if_indices, hi - im->hw_interfaces));
+
+ if (verbose < 0)
+ verbose = 1; /* default to verbose (except bond) */
+
+ if (is_show)
+ {
+ /* Sort by name. */
+ vec_sort_with_function (hw_if_indices, compare_interface_names);
+
+ vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm, 0, verbose);
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ if (show_bond == 0) /* show all interfaces */
+ vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm,
+ hi, verbose);
+ else if ((hi->bond_info) &&
+ (hi->bond_info != VNET_HW_INTERFACE_BOND_INFO_SLAVE))
+ { /* show only bonded interface and all its slave interfaces */
+ int hw_idx;
+ vnet_hw_interface_t *shi;
+ vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm,
+ hi, verbose);
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (hw_idx, hi->bond_info,
+ ({
+ shi = vnet_get_hw_interface(vnm, hw_idx);
+ vlib_cli_output (vm, "%U\n",
+ format_vnet_hw_interface, vnm, shi, verbose);
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ vnet_device_class_t *dc;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ dc = vec_elt_at_index (im->device_classes, hi->dev_class_index);
+
+ if (dc->clear_counters)
+ dc->clear_counters (hi->dev_instance);
+ }
+ }
+
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/*?
+ * Display more detailed information about all or a list of given interfaces.
+ * The verboseness of the output can be controlled by the following optional
+ * parameters:
+ * - brief: Only show name, index and state (default for bonded interfaces).
+ * - verbose: Also display additional attributes (default for all other interfaces).
+ * - detail: Also display all remaining attributes and extended statistics.
+ *
+ * To limit the output of the command to bonded interfaces and their slave
+ * interfaces, use the '<em>bond</em>' optional parameter.
+ *
+ * @cliexpar
+ * Example of how to display default data for all interfaces:
+ * @cliexstart{show hardware-interfaces}
+ * Name Idx Link Hardware
+ * GigabitEthernet7/0/0 1 up GigabitEthernet7/0/0
+ * Ethernet address ec:f4:bb:c0:bc:fc
+ * Intel e1000
+ * carrier up full duplex speed 1000 mtu 9216
+ * rx queues 1, rx desc 1024, tx queues 3, tx desc 1024
+ * cpu socket 0
+ * GigabitEthernet7/0/1 2 up GigabitEthernet7/0/1
+ * Ethernet address ec:f4:bb:c0:bc:fd
+ * Intel e1000
+ * carrier up full duplex speed 1000 mtu 9216
+ * rx queues 1, rx desc 1024, tx queues 3, tx desc 1024
+ * cpu socket 0
+ * VirtualEthernet0/0/0 3 up VirtualEthernet0/0/0
+ * Ethernet address 02:fe:a5:a9:8b:8e
+ * VirtualEthernet0/0/1 4 up VirtualEthernet0/0/1
+ * Ethernet address 02:fe:c0:4e:3b:b0
+ * VirtualEthernet0/0/2 5 up VirtualEthernet0/0/2
+ * Ethernet address 02:fe:1f:73:92:81
+ * VirtualEthernet0/0/3 6 up VirtualEthernet0/0/3
+ * Ethernet address 02:fe:f2:25:c4:68
+ * local0 0 down local0
+ * local
+ * @cliexend
+ * Example of how to display '<em>verbose</em>' data for an interface by name and
+ * software index (where 2 is the software index):
+ * @cliexstart{show hardware-interfaces GigabitEthernet7/0/0 2 verbose}
+ * Name Idx Link Hardware
+ * GigabitEthernet7/0/0 1 up GigabitEthernet7/0/0
+ * Ethernet address ec:f4:bb:c0:bc:fc
+ * Intel e1000
+ * carrier up full duplex speed 1000 mtu 9216
+ * rx queues 1, rx desc 1024, tx queues 3, tx desc 1024
+ * cpu socket 0
+ * GigabitEthernet7/0/1 2 down GigabitEthernet7/0/1
+ * Ethernet address ec:f4:bb:c0:bc:fd
+ * Intel e1000
+ * carrier up full duplex speed 1000 mtu 9216
+ * rx queues 1, rx desc 1024, tx queues 3, tx desc 1024
+ * cpu socket 0
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
+ .path = "show hardware-interfaces",
+ .short_help = "show hardware-interfaces [brief|verbose|detail] [bond] "
+ "[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
+ .function = show_or_clear_hw_interfaces,
+};
+/* *INDENT-ON* */
+
+
+/*?
+ * Clear the extended statistics for all or a list of given interfaces
+ * (statistics associated with the '<em>show hardware-interfaces</em>' command).
+ *
+ * @cliexpar
+ * Example of how to clear the extended statistics for all interfaces:
+ * @cliexcmd{clear hardware-interfaces}
+ * Example of how to clear the extended statistics for an interface by
+ * name and software index (where 2 is the software index):
+ * @cliexcmd{clear hardware-interfaces GigabitEthernet7/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = {
+ .path = "clear hardware-interfaces",
+ .short_help = "clear hardware-interfaces "
+ "[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
+ .function = show_or_clear_hw_interfaces,
+};
+/* *INDENT-ON* */
+
+static int
+sw_interface_name_compare (void *a1, void *a2)
+{
+ vnet_sw_interface_t *si1 = a1;
+ vnet_sw_interface_t *si2 = a2;
+
+ return vnet_sw_interface_compare (vnet_get_main (),
+ si1->sw_if_index, si2->sw_if_index);
+}
+
+static clib_error_t *
+show_sw_interfaces (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si, *sorted_sis = 0;
+ u32 sw_if_index = ~(u32) 0;
+ u8 show_addresses = 0;
+ u8 show_features = 0;
+ u8 show_tag = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* See if user wants to show specific interface */
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ si = pool_elt_at_index (im->sw_interfaces, sw_if_index);
+ vec_add1 (sorted_sis, si[0]);
+ }
+ else if (unformat (input, "address") || unformat (input, "addr"))
+ show_addresses = 1;
+ else if (unformat (input, "features") || unformat (input, "feat"))
+ show_features = 1;
+ else if (unformat (input, "tag"))
+ show_tag = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (show_features || show_tag)
+ {
+ if (sw_if_index == ~(u32) 0)
+ return clib_error_return (0, "Interface not specified...");
+ }
+
+ if (show_features)
+ {
+ vnet_interface_features_show (vm, sw_if_index);
+
+ l2_input_config_t *l2_input = l2input_intf_config (sw_if_index);
+ u32 fb = l2_input->feature_bitmap;
+ /* intf input features are masked by bridge domain */
+ if (l2_input->bridge)
+ fb &= l2input_bd_config (l2_input->bd_index)->feature_bitmap;
+ vlib_cli_output (vm, "\nl2-input:\n%U", format_l2_input_features, fb);
+
+ l2_output_config_t *l2_output = l2output_intf_config (sw_if_index);
+ vlib_cli_output (vm, "\nl2-output:");
+ if (l2_output->out_vtr_flag)
+ vlib_cli_output (vm, "%10s (%s)", "VTR", "--internal--");
+ vlib_cli_output (vm, "%U", format_l2_output_features,
+ l2_output->feature_bitmap);
+ return 0;
+ }
+ if (show_tag)
+ {
+ u8 *tag;
+ tag = vnet_get_sw_interface_tag (vnm, sw_if_index);
+ vlib_cli_output (vm, "%U: %s",
+ format_vnet_sw_if_index_name, vnm, sw_if_index,
+ tag ? (char *) tag : "(none)");
+ return 0;
+ }
+
+ if (!show_addresses)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, 0);
+
+ if (vec_len (sorted_sis) == 0) /* Get all interfaces */
+ {
+ /* Gather interfaces. */
+ sorted_sis =
+ vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sorted_sis) = 0;
+ pool_foreach (si, im->sw_interfaces, (
+ {
+ int visible =
+ vnet_swif_is_api_visible (si);
+ if (visible)
+ vec_add1 (sorted_sis, si[0]);}
+ ));
+
+ /* Sort by name. */
+ vec_sort_with_function (sorted_sis, sw_interface_name_compare);
+ }
+
+ if (show_addresses)
+ {
+ vec_foreach (si, sorted_sis)
+ {
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *r4;
+ ip6_address_t *r6;
+ u32 fib_index4 = 0, fib_index6 = 0;
+ ip4_fib_t *fib4;
+ ip6_fib_t *fib6;
+
+ if (vec_len (im4->fib_index_by_sw_if_index) > si->sw_if_index)
+ fib_index4 = vec_elt (im4->fib_index_by_sw_if_index,
+ si->sw_if_index);
+
+ if (vec_len (im6->fib_index_by_sw_if_index) > si->sw_if_index)
+ fib_index6 = vec_elt (im6->fib_index_by_sw_if_index,
+ si->sw_if_index);
+
+ fib4 = ip4_fib_get (fib_index4);
+ fib6 = ip6_fib_get (fib_index6);
+
+ if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ vlib_cli_output
+ (vm, "%U (%s): \n unnumbered, use %U",
+ format_vnet_sw_if_index_name,
+ vnm, si->sw_if_index,
+ (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? "up" : "dn",
+ format_vnet_sw_if_index_name, vnm, si->unnumbered_sw_if_index);
+
+ else
+ {
+ vlib_cli_output (vm, "%U (%s):",
+ format_vnet_sw_if_index_name,
+ vnm, si->sw_if_index,
+ (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ ? "up" : "dn");
+ }
+
+ /* Display any L2 info */
+ l2_input_config_t *l2_input = l2input_intf_config (si->sw_if_index);
+ if (l2_input->bridge)
+ {
+ u32 bd_id = l2input_main.bd_configs[l2_input->bd_index].bd_id;
+ vlib_cli_output (vm, " l2 bridge bd_id %d%s%d", bd_id,
+ l2_input->bvi ? " bvi shg " : " shg ",
+ l2_input->shg);
+ }
+ else if (l2_input->xconnect)
+ {
+ vlib_cli_output (vm, " l2 xconnect %U",
+ format_vnet_sw_if_index_name,
+ vnm, l2_input->output_sw_if_index);
+ }
+
+ /* Display any IP4 addressing info */
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, si->sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r4 = ip_interface_address_get_address (lm4, ia);
+ if (fib4->table_id)
+ {
+ vlib_cli_output (vm, " %U/%d table %d",
+ format_ip4_address, r4,
+ ia->address_length,
+ fib4->table_id);
+ }
+ else
+ {
+ vlib_cli_output (vm, " %U/%d",
+ format_ip4_address, r4,
+ ia->address_length);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* Display any IP6 addressing info */
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, si->sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r6 = ip_interface_address_get_address (lm6, ia);
+ if (fib6->table_id)
+ {
+ vlib_cli_output (vm, " %U/%d table %d",
+ format_ip6_address, r6,
+ ia->address_length,
+ fib6->table_id);
+ }
+ else
+ {
+ vlib_cli_output (vm, " %U/%d",
+ format_ip6_address, r6,
+ ia->address_length);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ else
+ {
+ vec_foreach (si, sorted_sis)
+ {
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
+ }
+ }
+
+done:
+ vec_free (sorted_sis);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = {
+ .path = "show interface",
+ .short_help = "show interface [address|addr|features|feat] [<interface> [<interface> [..]]]",
+ .function = show_sw_interfaces,
+};
+/* *INDENT-ON* */
+
+/* Root of all interface commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = {
+ .path = "interface",
+ .short_help = "Interface commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = {
+ .path = "set interface",
+ .short_help = "Interface commands",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_interface_counters (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_simple_counter_main_t *sm;
+ vlib_combined_counter_main_t *cm;
+ static vnet_main_t **my_vnet_mains;
+ int i, j, n_counters;
+
+ vec_reset_length (my_vnet_mains);
+
+ for (i = 0; i < vec_len (vnet_mains); i++)
+ {
+ if (vnet_mains[i])
+ vec_add1 (my_vnet_mains, vnet_mains[i]);
+ }
+
+ if (vec_len (vnet_mains) == 0)
+ vec_add1 (my_vnet_mains, vnm);
+
+ n_counters = vec_len (im->combined_sw_if_counters);
+
+ for (j = 0; j < n_counters; j++)
+ {
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ cm = im->combined_sw_if_counters + j;
+ vlib_clear_combined_counters (cm);
+ }
+ }
+
+ n_counters = vec_len (im->sw_if_counters);
+
+ for (j = 0; j < n_counters; j++)
+ {
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ sm = im->sw_if_counters + j;
+ vlib_clear_simple_counters (sm);
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * Clear the statistics for all interfaces (statistics associated with the
+ * '<em>show interface</em>' command).
+ *
+ * @cliexpar
+ * Example of how to clear the statistics for all interfaces:
+ * @cliexcmd{clear interfaces}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_interface_counters_command, static) = {
+ .path = "clear interfaces",
+ .short_help = "clear interfaces",
+ .function = clear_interface_counters,
+};
+/* *INDENT-ON* */
+
+/**
+ * Parse subinterface names.
+ *
+ * The following subinterface syntax is supported. The first two are for
+ * backwards compatability:
+ *
+ * <intf-name> <id>
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a single dot1q vlan with vlan id <id> and exact-match semantics.
+ *
+ * <intf-name> <min_id>-<max_id>
+ * - a set of the above subinterfaces, repeating for each id
+ * in the range <min_id> to <max_id>
+ *
+ * In the following, exact-match semantics (i.e. the number of vlan tags on the
+ * packet must match the number of tags in the configuration) are used only if
+ * the keyword exact-match is present. Non-exact match is the default.
+ *
+ * <intf-name> <id> dot1q <outer_id> [exact-match]
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a single dot1q vlan with vlan id <outer_id>.
+ *
+ * <intf-name> <id> dot1q any [exact-match]
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a single dot1q vlan with any vlan id.
+ *
+ * <intf-name> <id> dot1q <outer_id> inner-dot1q <inner_id> [exact-match]
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a double dot1q vlan with outer vlan id <outer_id> and inner vlan id
+ * <inner_id>.
+ *
+ * <intf-name> <id> dot1q <outer_id> inner-dot1q any [exact-match]
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a double dot1q vlan with outer vlan id <id> and any inner vlan id.
+ *
+ * <intf-name> <id> dot1q any inner-dot1q any [exact-match]
+ *
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a double dot1q vlan with any outer vlan id and any inner vlan id.
+ *
+ * For each of the above CLI, there is a duplicate that uses the keyword
+ * "dot1ad" in place of the first "dot1q". These interfaces use ethertype
+ * 0x88ad in place of 0x8100 for the outer ethertype. Note that for double-
+ * tagged packets the inner ethertype is always 0x8100. Also note that
+ * the dot1q and dot1ad naming spaces are independent, so it is legal to
+ * have both "Gig3/0/0.1 dot1q 100" and "Gig3/0/0.2 dot1ad 100". For example:
+ *
+ * <intf-name> <id> dot1ad <outer_id> inner-dot1q <inner_id> [exact-match]
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * is a double dot1ad vlan with outer vlan id <outer_id> and inner vlan
+ * id <inner_id>.
+ *
+ * <intf-name> <id> untagged
+ * - a subinterface with the name <intf-name>.<id>. The subinterface
+ * has no vlan tags. Only one can be specified per interface.
+ *
+ * <intf-name> <id> default
+ * - a subinterface with the name <intf-name>.<id>. This is associated
+ * with a packet that did not match any other configured subinterface
+ * on this interface. Only one can be specified per interface.
+ */
+
+static clib_error_t *
+parse_vlan_sub_interfaces (unformat_input_t * input,
+ vnet_sw_interface_t * template)
+{
+ clib_error_t *error = 0;
+ u32 inner_vlan, outer_vlan;
+
+ if (unformat (input, "any inner-dot1q any"))
+ {
+ template->sub.eth.flags.two_tags = 1;
+ template->sub.eth.flags.outer_vlan_id_any = 1;
+ template->sub.eth.flags.inner_vlan_id_any = 1;
+ }
+ else if (unformat (input, "any"))
+ {
+ template->sub.eth.flags.one_tag = 1;
+ template->sub.eth.flags.outer_vlan_id_any = 1;
+ }
+ else if (unformat (input, "%d inner-dot1q any", &outer_vlan))
+ {
+ template->sub.eth.flags.two_tags = 1;
+ template->sub.eth.flags.inner_vlan_id_any = 1;
+ template->sub.eth.outer_vlan_id = outer_vlan;
+ }
+ else if (unformat (input, "%d inner-dot1q %d", &outer_vlan, &inner_vlan))
+ {
+ template->sub.eth.flags.two_tags = 1;
+ template->sub.eth.outer_vlan_id = outer_vlan;
+ template->sub.eth.inner_vlan_id = inner_vlan;
+ }
+ else if (unformat (input, "%d", &outer_vlan))
+ {
+ template->sub.eth.flags.one_tag = 1;
+ template->sub.eth.outer_vlan_id = outer_vlan;
+ }
+ else
+ {
+ error = clib_error_return (0, "expected dot1q config, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "exact-match"))
+ {
+ template->sub.eth.flags.exact_match = 1;
+ }
+ }
+
+done:
+ return error;
+}
+
+static clib_error_t *
+create_sub_interfaces (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 hw_if_index, sw_if_index;
+ vnet_hw_interface_t *hi;
+ u32 id, id_min, id_max;
+ vnet_sw_interface_t template;
+
+ hw_if_index = ~0;
+ if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ memset (&template, 0, sizeof (template));
+ template.sub.eth.raw_flags = 0;
+
+ if (unformat (input, "%d default", &id_min))
+ {
+ id_max = id_min;
+ template.sub.eth.flags.default_sub = 1;
+ }
+ else if (unformat (input, "%d untagged", &id_min))
+ {
+ id_max = id_min;
+ template.sub.eth.flags.no_tags = 1;
+ template.sub.eth.flags.exact_match = 1;
+ }
+ else if (unformat (input, "%d dot1q", &id_min))
+ {
+ /* parse dot1q config */
+ id_max = id_min;
+ error = parse_vlan_sub_interfaces (input, &template);
+ if (error)
+ goto done;
+ }
+ else if (unformat (input, "%d dot1ad", &id_min))
+ {
+ /* parse dot1ad config */
+ id_max = id_min;
+ template.sub.eth.flags.dot1ad = 1;
+ error = parse_vlan_sub_interfaces (input, &template);
+ if (error)
+ goto done;
+ }
+ else if (unformat (input, "%d-%d", &id_min, &id_max))
+ {
+ template.sub.eth.flags.one_tag = 1;
+ template.sub.eth.flags.exact_match = 1;
+ if (id_min > id_max)
+ goto id_error;
+ }
+ else if (unformat (input, "%d", &id_min))
+ {
+ id_max = id_min;
+ template.sub.eth.flags.one_tag = 1;
+ template.sub.eth.outer_vlan_id = id_min;
+ template.sub.eth.flags.exact_match = 1;
+ }
+ else
+ {
+ id_error:
+ error = clib_error_return (0, "expected ID or ID MIN-MAX, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ {
+ error =
+ clib_error_return (0,
+ "not allowed as %v belong to a BondEthernet interface",
+ hi->name);
+ goto done;
+ }
+
+ for (id = id_min; id <= id_max; id++)
+ {
+ uword *p;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u64 sup_and_sub_key = ((u64) (hi->sw_if_index) << 32) | (u64) id;
+ u64 *kp;
+
+ p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ if (p)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning ("sup sw_if_index %d, sub id %d already exists\n",
+ hi->sw_if_index, id);
+ continue;
+ }
+
+ kp = clib_mem_alloc (sizeof (*kp));
+ *kp = sup_and_sub_key;
+
+ template.type = VNET_SW_INTERFACE_TYPE_SUB;
+ template.flood_class = VNET_FLOOD_CLASS_NORMAL;
+ template.sup_sw_if_index = hi->sw_if_index;
+ template.sub.id = id;
+ if (id_min < id_max)
+ template.sub.eth.outer_vlan_id = id;
+
+ error = vnet_create_sw_interface (vnm, &template, &sw_if_index);
+ if (error)
+ goto done;
+
+ hash_set (hi->sub_interface_sw_if_index_by_id, id, sw_if_index);
+ hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index);
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index);
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * This command is used to add VLAN IDs to interfaces, also known as subinterfaces.
+ * The primary input to this command is the '<em>interface</em>' and '<em>subId</em>'
+ * (subinterface Id) parameters. If no additional VLAN ID is provide, the VLAN ID is
+ * assumed to be the '<em>subId</em>'. The VLAN ID and '<em>subId</em>' can be different,
+ * but this is not recommended.
+ *
+ * This command has several variations:
+ * - <b>create sub-interfaces <interface> <subId></b> - Create a subinterface to
+ * process packets with a given 802.1q VLAN ID (same value as the '<em>subId</em>').
+ *
+ * - <b>create sub-interfaces <interface> <subId> default</b> - Adding the
+ * '<em>default</em>' parameter indicates that packets with VLAN IDs that do not
+ * match any other subinterfaces should be sent to this subinterface.
+ *
+ * - <b>create sub-interfaces <interface> <subId> untagged</b> - Adding the
+ * '<em>untagged</em>' parameter indicates that packets no VLAN IDs should be sent
+ * to this subinterface.
+ *
+ * - <b>create sub-interfaces <interface> <subId>-<subId></b> - Create a range of
+ * subinterfaces to handle a range of VLAN IDs.
+ *
+ * - <b>create sub-interfaces <interface> <subId> dot1q|dot1ad <vlanId>|any [exact-match]</b> -
+ * Use this command to specify the outer VLAN ID, to either be explicited or to make the
+ * VLAN ID different from the '<em>subId</em>'.
+ *
+ * - <b>create sub-interfaces <interface> <subId> dot1q|dot1ad <vlanId>|any inner-dot1q
+ * <vlanId>|any [exact-match]</b> - Use this command to specify the outer VLAN ID and
+ * the innner VLAN ID.
+ *
+ * When '<em>dot1q</em>' or '<em>dot1ad</em>' is explictly entered, subinterfaces
+ * can be configured as either exact-match or non-exact match. Non-exact match is the CLI
+ * default. If '<em>exact-match</em>' is specified, packets must have the same number of
+ * VLAN tags as the configuration. For non-exact-match, packets must at least that number
+ * of tags. L3 (routed) interfaces must be configured as exact-match. L2 interfaces are
+ * typically configured as non-exact-match. If '<em>dot1q</em>' or '<em>dot1ad</em>' is NOT
+ * entered, then the default behavior is exact-match.
+ *
+ * Use the '<em>show interface</em>' command to display all subinterfaces.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to create a VLAN subinterface 11 to process packets on 802.1q VLAN ID 11:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 11}
+ *
+ * The previous example is shorthand and is equivalent to:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 11 dot1q 11 exact-match}
+ *
+ *
+ * Example of how to create a subinterface number that is different from the VLAN ID:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 11 dot1q 100}
+ *
+ *
+ * Examples of how to create q-in-q and q-in-any subinterfaces:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 11 dot1q 100 inner-dot1q 200}
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 12 dot1q 100 inner-dot1q any}
+ *
+ * Examples of how to create dot1ad interfaces:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 11 dot1ad 11}
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 12 dot1ad 100 inner-dot1q 200}
+ *
+ *
+ * Examples of '<em>exact-match</em>' versus non-exact match. A packet with
+ * outer VLAN 100 and inner VLAN 200 would match this interface, because the default
+ * is non-exact match:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 5 dot1q 100}
+ *
+ * However, the same packet would NOT match this interface because '<em>exact-match</em>'
+ * is specified and only one VLAN is configured, but packet contains two VLANs:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 5 dot1q 100 exact-match}
+ *
+ *
+ * Example of how to created a subinterface to process untagged packets:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 5 untagged}
+ *
+ * Example of how to created a subinterface to process any packet with a VLAN ID that
+ * does not match any other subinterface:
+ * @cliexcmd{create sub-interfaces GigabitEthernet2/0/0 7 default}
+ *
+ * When subinterfaces are created, they are in the down state. Example of how to
+ * enable a newly created subinterface:
+ * @cliexcmd{set interface GigabitEthernet2/0/0.7 up}
+ * @endparblock
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
+ .path = "create sub-interfaces",
+ .short_help = "create sub-interfaces <interface> "
+ "{<subId> [default|untagged]} | "
+ "{<subId>-<subId>} | "
+ "{<subId> dot1q|dot1ad <vlanId>|any [inner-dot1q <vlanId>|any] [exact-match]}",
+ .function = create_sub_interfaces,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_state (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error;
+ u32 sw_if_index, flags;
+
+ sw_if_index = ~0;
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%U", unformat_vnet_sw_interface_flags, &flags))
+ {
+ error = clib_error_return (0, "unknown flags `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ error = vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
+ if (error)
+ goto done;
+
+done:
+ return error;
+}
+
+
+/*?
+ * This command is used to change the admin state (up/down) of an interface.
+ *
+ * If an interface is down, the optional '<em>punt</em>' flag can also be set.
+ * The '<em>punt</em>' flag implies the interface is disabled for forwarding
+ * but punt all traffic to slow-path. Use the '<em>enable</em>' flag to clear
+ * '<em>punt</em>' flag (interface is still down).
+ *
+ * @cliexpar
+ * Example of how to configure the admin state of an interface to '<em>up</em?':
+ * @cliexcmd{set interface state GigabitEthernet2/0/0 up}
+ * Example of how to configure the admin state of an interface to '<em>down</em?':
+ * @cliexcmd{set interface state GigabitEthernet2/0/0 down}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_state_command, static) = {
+ .path = "set interface state",
+ .short_help = "set interface state <interface> [up|down|punt|enable]",
+ .function = set_state,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_unnumbered (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 unnumbered_sw_if_index;
+ u32 inherit_from_sw_if_index;
+ vnet_sw_interface_t *si;
+ int is_set = 0;
+ int is_del = 0;
+ u32 was_unnum;
+
+ if (unformat (input, "%U use %U",
+ unformat_vnet_sw_interface, vnm, &unnumbered_sw_if_index,
+ unformat_vnet_sw_interface, vnm, &inherit_from_sw_if_index))
+ is_set = 1;
+ else if (unformat (input, "del %U",
+ unformat_vnet_sw_interface, vnm,
+ &unnumbered_sw_if_index))
+ is_del = 1;
+ else
+ return clib_error_return (0, "parse error '%U'",
+ format_unformat_error, input);
+
+ si = vnet_get_sw_interface (vnm, unnumbered_sw_if_index);
+ was_unnum = (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED);
+
+ if (is_del)
+ {
+ si->flags &= ~(VNET_SW_INTERFACE_FLAG_UNNUMBERED);
+ si->unnumbered_sw_if_index = (u32) ~ 0;
+
+ ip4_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] = ~0;
+ ip6_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] = ~0;
+ }
+ else if (is_set)
+ {
+ si->flags |= VNET_SW_INTERFACE_FLAG_UNNUMBERED;
+ si->unnumbered_sw_if_index = inherit_from_sw_if_index;
+
+ ip4_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] =
+ ip4_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [inherit_from_sw_if_index];
+ ip6_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [unnumbered_sw_if_index] =
+ ip6_main.lookup_main.if_address_pool_index_by_sw_if_index
+ [inherit_from_sw_if_index];
+ }
+
+ if (was_unnum != (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
+ {
+ ip4_sw_interface_enable_disable (unnumbered_sw_if_index, !is_del);
+ ip6_sw_interface_enable_disable (unnumbered_sw_if_index, !is_del);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_unnumbered_command, static) = {
+ .path = "set interface unnumbered",
+ .short_help = "set interface unnumbered [<interface> use <interface> | del <interface>]",
+ .function = set_unnumbered,
+};
+/* *INDENT-ON* */
+
+
+
+static clib_error_t *
+set_hw_class (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ clib_error_t *error;
+ u32 hw_if_index, hw_class_index;
+
+ hw_if_index = ~0;
+ if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ error = clib_error_return (0, "unknown hardware interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat_user (input, unformat_hash_string,
+ im->hw_interface_class_by_name, &hw_class_index))
+ {
+ error = clib_error_return (0, "unknown hardware class `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ error = vnet_hw_interface_set_class (vnm, hw_if_index, hw_class_index);
+ if (error)
+ goto done;
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_hw_class_command, static) = {
+ .path = "set interface hw-class",
+ .short_help = "Set interface hardware class",
+ .function = set_hw_class,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vnet_interface_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_interface_cli_init);
+
+static clib_error_t *
+renumber_interface_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 hw_if_index;
+ u32 new_dev_instance;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+
+ if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+ return clib_error_return (0, "unknown hardware interface `%U'",
+ format_unformat_error, input);
+
+ if (!unformat (input, "%d", &new_dev_instance))
+ return clib_error_return (0, "new dev instance missing");
+
+ rv = vnet_interface_name_renumber (hw_if_index, new_dev_instance);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_interface_name_renumber returned %d",
+ rv);
+
+ }
+
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (renumber_interface_command, static) = {
+ .path = "renumber interface",
+ .short_help = "renumber interface <interface> <new-dev-instance>",
+ .function = renumber_interface_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+promiscuous_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 hw_if_index;
+ u32 flags = ETHERNET_INTERFACE_FLAG_ACCEPT_ALL;
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif;
+
+ if (unformat (input, "on %U",
+ unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (input, "off %U",
+ unformat_ethernet_interface, vnm, &hw_if_index))
+ flags = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ eif = ethernet_get_interface (em, hw_if_index);
+ if (!eif)
+ return clib_error_return (0, "not supported");
+
+ ethernet_set_flags (vnm, hw_if_index, flags);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_promiscuous_cmd, static) = {
+ .path = "set interface promiscuous",
+ .short_help = "set interface promiscuous [on|off] <interface>",
+ .function = promiscuous_cmd,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 hw_if_index, mtu;
+ u32 flags = ETHERNET_INTERFACE_FLAG_MTU;
+ ethernet_main_t *em = &ethernet_main;
+
+ if (unformat (input, "%d %U", &mtu,
+ unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
+
+ if (!eif)
+ return clib_error_return (0, "not supported");
+
+ if (mtu < hi->min_supported_packet_bytes)
+ return clib_error_return (0, "Invalid mtu (%d): "
+ "must be >= min pkt bytes (%d)", mtu,
+ hi->min_supported_packet_bytes);
+
+ if (mtu > hi->max_supported_packet_bytes)
+ return clib_error_return (0, "Invalid mtu (%d): must be <= (%d)", mtu,
+ hi->max_supported_packet_bytes);
+
+ if (hi->max_packet_bytes != mtu)
+ {
+ hi->max_packet_bytes = mtu;
+ ethernet_set_flags (vnm, hw_if_index, flags);
+ }
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
+ .path = "set interface mtu",
+ .short_help = "set interface mtu <value> <interface>",
+ .function = mtu_cmd,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_interface_mac_address (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si = NULL;
+ clib_error_t *error = 0;
+ u32 sw_if_index = ~0;
+ u64 mac = 0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (!unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ error = vnet_hw_interface_change_mac_address (vnm, si->hw_if_index, mac);
+done:
+ return error;
+}
+
+/*?
+ * The '<em>set interface mac address </em>' command allows to set MAC address of given interface.
+ * In case of NIC interfaces the one has to support MAC address change. A side effect of MAC address
+ * change are changes of MAC addresses in FIB tables (ipv4 and ipv6).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to change MAC Address of interface:
+ * @cliexcmd{set interface mac address GigabitEthernet0/8/0 aa:bb:cc:dd:ee:01}
+ * @cliexcmd{set interface mac address host-vpp0 aa:bb:cc:dd:ee:02}
+ * @cliexcmd{set interface mac address tap-0 aa:bb:cc:dd:ee:03}
+ * @cliexcmd{set interface mac address pg0 aa:bb:cc:dd:ee:04}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_mac_address_cmd, static) = {
+ .path = "set interface mac address",
+ .short_help = "set interface mac address <interface> <mac-address>",
+ .function = set_interface_mac_address,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u8 *tag = 0;
+
+ if (!unformat (input, "%U %s", unformat_vnet_sw_interface,
+ vnm, &sw_if_index, &tag))
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ vnet_set_sw_interface_tag (vnm, tag, sw_if_index);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_tag_command, static) = {
+ .path = "set interface tag",
+ .short_help = "set interface tag <interface> <tag>",
+ .function = set_tag,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_tag (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+
+ if (!unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ vnet_clear_sw_interface_tag (vnm, sw_if_index);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_tag_command, static) = {
+ .path = "clear interface tag",
+ .short_help = "clear interface tag <interface>",
+ .function = clear_tag,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
+ u32 queue_id, vnet_hw_interface_rx_mode mode)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_class_t *dev_class =
+ vnet_get_device_class (vnm, hw->dev_class_index);
+ clib_error_t *error;
+ vnet_hw_interface_rx_mode old_mode;
+ int rv;
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT)
+ mode = hw->default_rx_mode;
+
+ rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &old_mode);
+ switch (rv)
+ {
+ case 0:
+ if (old_mode == mode)
+ return 0; /* same rx-mode, no change */
+ break;
+ case VNET_API_ERROR_INVALID_INTERFACE:
+ return clib_error_return (0, "invalid interface");
+ case VNET_API_ERROR_INVALID_QUEUE:
+ return clib_error_return (0, "invalid queue");
+ default:
+ return clib_error_return (0, "unknown error");
+ }
+
+ if (dev_class->rx_mode_change_function)
+ {
+ error = dev_class->rx_mode_change_function (vnm, hw_if_index, queue_id,
+ mode);
+ if (error)
+ return (error);
+ }
+
+ rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode);
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_UNSUPPORTED:
+ return clib_error_return (0, "unsupported");
+ case VNET_API_ERROR_INVALID_INTERFACE:
+ return clib_error_return (0, "invalid interface");
+ case VNET_API_ERROR_INVALID_QUEUE:
+ return clib_error_return (0, "invalid queue");
+ default:
+ return clib_error_return (0, "unknown error");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 queue_id = (u32) ~ 0;
+ vnet_hw_interface_rx_mode mode = VNET_HW_INTERFACE_RX_MODE_UNKNOWN;
+ int i;
+ u8 input_queue_id = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (line_input, "queue %d", &queue_id))
+ input_queue_id = 1;
+ else if (unformat (line_input, "polling"))
+ mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ else if (unformat (line_input, "interrupt"))
+ mode = VNET_HW_INTERFACE_RX_MODE_INTERRUPT;
+ else if (unformat (line_input, "adaptive"))
+ mode = VNET_HW_INTERFACE_RX_MODE_ADAPTIVE;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~ 0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
+ return clib_error_return (0, "please specify valid rx-mode");
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (input_queue_id == 0)
+ {
+ for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++)
+ {
+ error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode);
+ if (error)
+ break;
+ }
+ hw->default_rx_mode = mode;
+ }
+ else
+ error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode);
+
+ return (error);
+}
+
+/*?
+ * This command is used to assign the RX packet processing mode (polling,
+ * interrupt, adaptive) of the a given interface, and optionally a
+ * given queue. If the '<em>queue</em>' is not provided, the '<em>mode</em>'
+ * is applied to all queues of the interface. Not all interfaces support
+ * all modes. To display the current rx-mode use the command
+ * '<em>show interface rx-placement</em>'.
+ *
+ * @cliexpar
+ * Example of how to assign rx-mode to all queues on an interface:
+ * @cliexcmd{set interface rx-mode VirtualEthernet0/0/12 polling}
+ * Example of how to assign rx-mode to one queue of an interface:
+ * @cliexcmd{set interface rx-mode VirtualEthernet0/0/12 queue 0 interrupt}
+ * Example of how to display the rx-mode of all interfaces:
+ * @cliexstart{show interface rx-placement}
+ * Thread 1 (vpp_wk_0):
+ * node dpdk-input:
+ * GigabitEthernet7/0/0 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 0 (interrupt)
+ * VirtualEthernet0/0/12 queue 2 (polling)
+ * VirtualEthernet0/0/13 queue 0 (polling)
+ * VirtualEthernet0/0/13 queue 2 (polling)
+ * Thread 2 (vpp_wk_1):
+ * node dpdk-input:
+ * GigabitEthernet7/0/1 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 1 (polling)
+ * VirtualEthernet0/0/12 queue 3 (polling)
+ * VirtualEthernet0/0/13 queue 1 (polling)
+ * VirtualEthernet0/0/13 queue 3 (polling)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = {
+ .path = "set interface rx-mode",
+ .short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]",
+ .function = set_interface_rx_mode,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *s = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input");
+ uword si;
+ int index = 0;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (({
+ clib_bitmap_foreach (si, pn->sibling_bitmap,
+ ({
+ rt = vlib_node_get_runtime_data (this_vlib_main, si);
+
+ if (vec_len (rt->devices_and_queues))
+ s = format (s, " node %U:\n", format_vlib_node_name, vm, si);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm,
+ dq->hw_if_index);
+ s = format (s, " %U queue %u (%U)\n",
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index,
+ dq->queue_id,
+ format_vnet_hw_interface_rx_mode, dq->mode);
+ }
+ }));
+ if (vec_len (s) > 0)
+ {
+ vlib_cli_output(vm, "Thread %u (%v):\n%v", index,
+ vlib_worker_threads[index].name, s);
+ vec_reset_length (s);
+ }
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command is used to display the interface and queue worker
+ * thread placement.
+ *
+ * @cliexpar
+ * Example of how to display the interface placement:
+ * @cliexstart{show interface rx-placement}
+ * Thread 1 (vpp_wk_0):
+ * node dpdk-input:
+ * GigabitEthernet7/0/0 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 0 (polling)
+ * VirtualEthernet0/0/12 queue 2 (polling)
+ * VirtualEthernet0/0/13 queue 0 (polling)
+ * VirtualEthernet0/0/13 queue 2 (polling)
+ * Thread 2 (vpp_wk_1):
+ * node dpdk-input:
+ * GigabitEthernet7/0/1 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 1 (polling)
+ * VirtualEthernet0/0/12 queue 3 (polling)
+ * VirtualEthernet0/0/13 queue 1 (polling)
+ * VirtualEthernet0/0/13 queue 3 (polling)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_interface_rx_placement, static) = {
+ .path = "show interface rx-placement",
+ .short_help = "show interface rx-placement",
+ .function = show_interface_rx_placement_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_interface_rx_placement (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vnet_hw_interface_rx_mode mode;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 queue_id = (u32) 0;
+ u32 thread_index = (u32) ~ 0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (line_input, "queue %d", &queue_id))
+ ;
+ else if (unformat (line_input, "main", &thread_index))
+ thread_index = 0;
+ else if (unformat (line_input, "worker %d", &thread_index))
+ thread_index += vdm->first_worker_thread_index;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~ 0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ if (thread_index > vdm->last_worker_thread_index)
+ return clib_error_return (0,
+ "please specify valid worker thread or main");
+
+ rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode);
+
+ if (rv)
+ return clib_error_return (0, "not found");
+
+ rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id);
+
+ if (rv)
+ return clib_error_return (0, "not found");
+
+ vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id,
+ thread_index);
+ vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode);
+
+ return 0;
+}
+
+/*?
+ * This command is used to assign a given interface, and optionally a
+ * given queue, to a different thread. If the '<em>queue</em>' is not provided,
+ * it defaults to 0. The '<em>worker</em>' parameter is zero based and the index
+ * in the thread name, for example, 0 in the thread name '<em>vpp_wk_0</em>'.
+ *
+ * @cliexpar
+ * Example of how to display the interface placement:
+ * @cliexstart{show interface rx-placement}
+ * Thread 1 (vpp_wk_0):
+ * node dpdk-input:
+ * GigabitEthernet7/0/0 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 0 (polling)
+ * VirtualEthernet0/0/12 queue 2 (polling)
+ * VirtualEthernet0/0/13 queue 0 (polling)
+ * VirtualEthernet0/0/13 queue 2 (polling)
+ * Thread 2 (vpp_wk_1):
+ * node dpdk-input:
+ * GigabitEthernet7/0/1 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 1 (polling)
+ * VirtualEthernet0/0/12 queue 3 (polling)
+ * VirtualEthernet0/0/13 queue 1 (polling)
+ * VirtualEthernet0/0/13 queue 3 (polling)
+ * @cliexend
+ * Example of how to assign a interface and queue to a worker thread:
+ * @cliexcmd{set interface rx-placement VirtualEthernet0/0/12 queue 1 worker 0}
+ * Example of how to display the interface placement:
+ * @cliexstart{show interface rx-placement}
+ * Thread 1 (vpp_wk_0):
+ * node dpdk-input:
+ * GigabitEthernet7/0/0 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 0 (polling)
+ * VirtualEthernet0/0/12 queue 1 (polling)
+ * VirtualEthernet0/0/12 queue 2 (polling)
+ * VirtualEthernet0/0/13 queue 0 (polling)
+ * VirtualEthernet0/0/13 queue 2 (polling)
+ * Thread 2 (vpp_wk_1):
+ * node dpdk-input:
+ * GigabitEthernet7/0/1 queue 0 (polling)
+ * node vhost-user-input:
+ * VirtualEthernet0/0/12 queue 3 (polling)
+ * VirtualEthernet0/0/13 queue 1 (polling)
+ * VirtualEthernet0/0/13 queue 3 (polling)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
+ .path = "set interface rx-placement",
+ .short_help = "set interface rx-placement <interface> [queue <n>] "
+ "[worker <n> | main]",
+ .function = set_interface_rx_placement,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
new file mode 100644
index 00000000..5694bb2f
--- /dev/null
+++ b/src/vnet/interface_format.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface_format.c: interface formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/bitmap.h>
+
+u8 *
+format_vnet_sw_interface_flags (u8 * s, va_list * args)
+{
+ u32 flags = va_arg (*args, u32);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ERROR)
+ s = format (s, "error");
+ else if (flags & VNET_SW_INTERFACE_FLAG_BOND_SLAVE)
+ s = format (s, "bond-slave");
+ else
+ {
+ s = format (s, "%s",
+ (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? "up" : "down");
+ if (flags & VNET_SW_INTERFACE_FLAG_PUNT)
+ s = format (s, "/punt");
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_hw_interface_rx_mode (u8 * s, va_list * args)
+{
+ vnet_hw_interface_rx_mode mode = va_arg (*args, vnet_hw_interface_rx_mode);
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ return format (s, "polling");
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT)
+ return format (s, "interrupt");
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)
+ return format (s, "adaptive");
+
+ return format (s, "unknown");
+}
+
+u8 *
+format_vnet_hw_interface (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ vnet_hw_interface_t *hi = va_arg (*args, vnet_hw_interface_t *);
+ vnet_hw_interface_class_t *hw_class;
+ vnet_device_class_t *dev_class;
+ int verbose = va_arg (*args, int);
+ uword indent;
+
+ if (!hi)
+ return format (s, "%=32s%=6s%=8s%s", "Name", "Idx", "Link", "Hardware");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%-32v%=6d", hi->name, hi->hw_if_index);
+
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ s = format (s, "%=8s", "slave");
+ else
+ s = format (s, "%=8s",
+ hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP ? "up" : "down");
+
+ hw_class = vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ dev_class = vnet_get_device_class (vnm, hi->dev_class_index);
+
+ if (hi->bond_info && (hi->bond_info != VNET_HW_INTERFACE_BOND_INFO_SLAVE))
+ {
+ int hw_idx;
+ s = format (s, "Slave-Idx:");
+ clib_bitmap_foreach (hw_idx, hi->bond_info, s =
+ format (s, " %d", hw_idx));
+ }
+ else if (dev_class->format_device_name)
+ s = format (s, "%U", dev_class->format_device_name, hi->dev_instance);
+ else
+ s = format (s, "%s%d", dev_class->name, hi->dev_instance);
+
+ if (verbose)
+ {
+ if (hw_class->format_device)
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2,
+ hw_class->format_device, hi->hw_if_index, verbose);
+ else
+ {
+ s = format (s, "\n%U%s",
+ format_white_space, indent + 2, hw_class->name);
+ if (hw_class->format_address && vec_len (hi->hw_address) > 0)
+ s =
+ format (s, " address %U", hw_class->format_address,
+ hi->hw_address);
+ }
+
+ if (dev_class->format_device)
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2,
+ dev_class->format_device, hi->dev_instance, verbose);
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_sw_interface_name (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+ vnet_sw_interface_t *si_sup =
+ vnet_get_sup_sw_interface (vnm, si->sw_if_index);
+ vnet_hw_interface_t *hi_sup;
+
+ ASSERT (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ hi_sup = vnet_get_hw_interface (vnm, si_sup->hw_if_index);
+
+ s = format (s, "%v", hi_sup->name);
+
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ s = format (s, ".%d", si->sub.id);
+
+ return s;
+}
+
+u8 *
+format_vnet_sw_if_index_name (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ vnet_sw_interface_t *si;
+
+ si = vnet_get_sw_interface_safe (vnm, sw_if_index);
+
+ if (NULL == si)
+ {
+ return format (s, "DELETED");
+ }
+ return format (s, "%U", format_vnet_sw_interface_name, vnm, si);
+}
+
+u8 *
+format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
+ vnet_sw_interface_t * si)
+{
+ uword indent, n_printed;
+ int i, j, n_counters;
+ static vnet_main_t **my_vnet_mains;
+
+ vec_reset_length (my_vnet_mains);
+
+ indent = format_get_indent (s);
+ n_printed = 0;
+
+ {
+ vlib_combined_counter_main_t *cm;
+ vlib_counter_t v, vtotal;
+ u8 *n = 0;
+
+ for (i = 0; i < vec_len (vnet_mains); i++)
+ {
+ if (vnet_mains[i])
+ vec_add1 (my_vnet_mains, vnet_mains[i]);
+ }
+
+ if (vec_len (my_vnet_mains) == 0)
+ vec_add1 (my_vnet_mains, &vnet_main);
+
+ /* Each vnet_main_t has its own copy of the interface counters */
+ n_counters = vec_len (im->combined_sw_if_counters);
+
+ /* rx, tx counters... */
+ for (j = 0; j < n_counters; j++)
+ {
+ vtotal.packets = 0;
+ vtotal.bytes = 0;
+
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ cm = im->combined_sw_if_counters + j;
+ vlib_get_combined_counter (cm, si->sw_if_index, &v);
+ vtotal.packets += v.packets;
+ vtotal.bytes += v.bytes;
+ }
+
+ /* Only display non-zero counters. */
+ if (vtotal.packets == 0)
+ continue;
+
+ if (n_printed > 0)
+ s = format (s, "\n%U", format_white_space, indent);
+ n_printed += 2;
+
+ if (n)
+ _vec_len (n) = 0;
+ n = format (n, "%s packets", cm->name);
+ s = format (s, "%-16v%16Ld", n, vtotal.packets);
+
+ _vec_len (n) = 0;
+ n = format (n, "%s bytes", cm->name);
+ s = format (s, "\n%U%-16v%16Ld",
+ format_white_space, indent, n, vtotal.bytes);
+ }
+ vec_free (n);
+ }
+
+ {
+ vlib_simple_counter_main_t *cm;
+ u64 v, vtotal;
+
+ n_counters = vec_len (im->sw_if_counters);
+
+ for (j = 0; j < n_counters; j++)
+ {
+ vtotal = 0;
+
+ for (i = 0; i < vec_len (my_vnet_mains); i++)
+ {
+ im = &my_vnet_mains[i]->interface_main;
+ cm = im->sw_if_counters + j;
+
+ v = vlib_get_simple_counter (cm, si->sw_if_index);
+ vtotal += v;
+ }
+
+ /* Only display non-zero counters. */
+ if (vtotal == 0)
+ continue;
+
+ if (n_printed > 0)
+ s = format (s, "\n%U", format_white_space, indent);
+ n_printed += 1;
+
+ s = format (s, "%-16s%16Ld", cm->name, vtotal);
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_sw_interface (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ if (!si)
+ return format (s, "%=32s%=5s%=16s%=16s%=16s",
+ "Name", "Idx", "State", "Counter", "Count");
+
+ s = format (s, "%-32U%=5d%=16U",
+ format_vnet_sw_interface_name, vnm, si, si->sw_if_index,
+ format_vnet_sw_interface_flags, si->flags);
+
+ s = format_vnet_sw_interface_cntrs (s, im, si);
+
+ return s;
+}
+
+u8 *
+format_vnet_sw_interface_name_override (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+ /* caller supplied display name for this interface */
+ u8 *name = va_arg (*args, u8 *);
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+
+ if (!si)
+ return format (s, "%=32s%=5s%=16s%=16s%=16s",
+ "Name", "Idx", "State", "Counter", "Count");
+
+ s = format (s, "%-32v%=5d%=16U",
+ name, si->sw_if_index,
+ format_vnet_sw_interface_flags, si->flags);
+
+ s = format_vnet_sw_interface_cntrs (s, im, si);
+
+ return s;
+}
+
+uword
+unformat_vnet_hw_interface (unformat_input_t * input, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 *hw_if_index = va_arg (*args, u32 *);
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_device_class_t *c;
+
+ /* Try per device class functions first. */
+ vec_foreach (c, im->device_classes)
+ {
+ if (c->unformat_device_name
+ && unformat_user (input, c->unformat_device_name, hw_if_index))
+ return 1;
+ }
+
+ return unformat_user (input, unformat_hash_vec_string,
+ im->hw_interface_by_name, hw_if_index);
+}
+
+uword
+unformat_vnet_sw_interface (unformat_input_t * input, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ vnet_hw_interface_t *hi;
+ u32 hw_if_index, id, id_specified;
+ u32 sw_if_index;
+ u8 *if_name = 0;
+ uword *p, error = 0;
+
+ id = ~0;
+ if (unformat (input, "%_%v.%d%_", &if_name, &id)
+ && ((p = hash_get (vnm->interface_main.hw_interface_by_name, if_name))))
+ {
+ hw_if_index = p[0];
+ id_specified = 1;
+ }
+ else
+ if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ id_specified = 0;
+ else
+ goto done;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ if (!id_specified)
+ {
+ sw_if_index = hi->sw_if_index;
+ }
+ else
+ {
+ if (!(p = hash_get (hi->sub_interface_sw_if_index_by_id, id)))
+ goto done;
+ sw_if_index = p[0];
+ }
+ if (!vnet_sw_interface_is_api_visible (vnm, sw_if_index))
+ goto done;
+ *result = sw_if_index;
+ error = 1;
+done:
+ vec_free (if_name);
+ return error;
+}
+
+uword
+unformat_vnet_sw_interface_flags (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 flags = 0;
+
+ if (unformat (input, "up"))
+ flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ else if (unformat (input, "down"))
+ flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ else if (unformat (input, "punt"))
+ flags |= VNET_SW_INTERFACE_FLAG_PUNT;
+ else if (unformat (input, "enable"))
+ flags &= ~VNET_SW_INTERFACE_FLAG_PUNT;
+ else
+ return 0;
+
+ *result = flags;
+ return 1;
+}
+
+uword
+unformat_vnet_hw_interface_flags (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 flags = 0;
+
+ if (unformat (input, "up"))
+ flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
+ else if (unformat (input, "down"))
+ flags &= ~VNET_HW_INTERFACE_FLAG_LINK_UP;
+ else
+ return 0;
+
+ *result = flags;
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
new file mode 100644
index 00000000..142bef57
--- /dev/null
+++ b/src/vnet/interface_funcs.h
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface_funcs.h: VNET interfaces/sub-interfaces exported functions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_interface_funcs_h
+#define included_vnet_interface_funcs_h
+
+always_inline vnet_hw_interface_t *
+vnet_get_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
+{
+ return pool_elt_at_index (vnm->interface_main.hw_interfaces, hw_if_index);
+}
+
+always_inline vnet_sw_interface_t *
+vnet_get_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ return pool_elt_at_index (vnm->interface_main.sw_interfaces, sw_if_index);
+}
+
+always_inline vnet_sw_interface_t *
+vnet_get_sw_interface_safe (vnet_main_t * vnm, u32 sw_if_index)
+{
+ if (!pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return pool_elt_at_index (vnm->interface_main.sw_interfaces, sw_if_index);
+ return (NULL);
+}
+
+always_inline vnet_sw_interface_t *
+vnet_get_hw_sw_interface (vnet_main_t * vnm, u32 hw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, hw->sw_if_index);
+ ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ return sw;
+}
+
+always_inline vnet_sw_interface_t *
+vnet_get_sup_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw->type == VNET_SW_INTERFACE_TYPE_SUB ||
+ sw->type == VNET_SW_INTERFACE_TYPE_P2P)
+ sw = vnet_get_sw_interface (vnm, sw->sup_sw_if_index);
+ return sw;
+}
+
+always_inline vnet_hw_interface_t *
+vnet_get_sup_hw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ return vnet_get_hw_interface (vnm, sw->hw_if_index);
+}
+
+always_inline vnet_hw_interface_class_t *
+vnet_get_hw_interface_class (vnet_main_t * vnm, u32 hw_class_index)
+{
+ return vec_elt_at_index (vnm->interface_main.hw_interface_classes,
+ hw_class_index);
+}
+
+always_inline vnet_device_class_t *
+vnet_get_device_class (vnet_main_t * vnm, u32 dev_class_index)
+{
+ return vec_elt_at_index (vnm->interface_main.device_classes,
+ dev_class_index);
+}
+
+static inline u8 *
+vnet_get_sw_interface_tag (vnet_main_t * vnm, u32 sw_if_index)
+{
+ uword *p;
+ p = hash_get (vnm->interface_tag_by_sw_if_index, sw_if_index);
+ if (p)
+ return ((u8 *) p[0]);
+ return 0;
+}
+
+static inline void
+vnet_set_sw_interface_tag (vnet_main_t * vnm, u8 * tag, u32 sw_if_index)
+{
+ uword *p;
+ p = hash_get (vnm->interface_tag_by_sw_if_index, sw_if_index);
+ if (p)
+ {
+ u8 *oldtag = (u8 *) p[0];
+ hash_unset (vnm->interface_tag_by_sw_if_index, sw_if_index);
+ vec_free (oldtag);
+ }
+
+ hash_set (vnm->interface_tag_by_sw_if_index, sw_if_index, tag);
+}
+
+static inline void
+vnet_clear_sw_interface_tag (vnet_main_t * vnm, u32 sw_if_index)
+{
+ uword *p;
+ p = hash_get (vnm->interface_tag_by_sw_if_index, sw_if_index);
+ if (p)
+ {
+ u8 *oldtag = (u8 *) p[0];
+ hash_unset (vnm->interface_tag_by_sw_if_index, sw_if_index);
+ vec_free (oldtag);
+ }
+}
+
+/**
+ * Call back walk type for walking SW indices on a HW interface
+ */
+typedef void (*vnet_hw_sw_interface_walk_t) (vnet_main_t * vnm,
+ u32 sw_if_index, void *ctx);
+
+/**
+ * @brief
+ * Walk the SW interfaces on a HW interface - this is the super
+ * interface and any sub-interfaces.
+ */
+void vnet_hw_interface_walk_sw (vnet_main_t * vnm,
+ u32 hw_if_index,
+ vnet_hw_sw_interface_walk_t fn, void *ctx);
+
+/* Register a hardware interface instance. */
+u32 vnet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u32 hw_class_index, u32 hw_instance);
+
+/* Creates a software interface given template. */
+clib_error_t *vnet_create_sw_interface (vnet_main_t * vnm,
+ vnet_sw_interface_t * template,
+ u32 * sw_if_index);
+
+void vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index);
+void vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index);
+int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index);
+
+always_inline uword
+vnet_sw_interface_get_flags (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ return sw->flags;
+}
+
+always_inline uword
+vnet_sw_interface_is_admin_up (vnet_main_t * vnm, u32 sw_if_index)
+{
+ return (vnet_sw_interface_get_flags (vnm, sw_if_index) &
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+}
+
+always_inline uword
+vnet_swif_is_api_visible (vnet_sw_interface_t * si)
+{
+ return !(si->flags & VNET_SW_INTERFACE_FLAG_HIDDEN);
+}
+
+always_inline uword
+vnet_sw_interface_is_api_visible (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ return vnet_swif_is_api_visible (si);
+}
+
+always_inline uword
+vnet_sw_interface_is_api_valid (vnet_main_t * vnm, u32 sw_if_index)
+{
+ return !pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)
+ && vnet_sw_interface_is_api_visible (vnm, sw_if_index);
+}
+
+always_inline uword
+vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ return hw->flags;
+}
+
+always_inline uword
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
+ vlib_rx_or_tx_t dir)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ return hw->max_l3_packet_bytes[dir];
+}
+
+always_inline uword
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ vlib_rx_or_tx_t dir)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (hw->max_l3_packet_bytes[dir]);
+}
+
+always_inline uword
+vnet_hw_interface_is_link_up (vnet_main_t * vnm, u32 hw_if_index)
+{
+ return (vnet_hw_interface_get_flags (vnm, hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP) != 0;
+}
+
+always_inline vlib_frame_t *
+vnet_get_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return vlib_get_frame_to_node (vnm->vlib_main, hw->output_node_index);
+}
+
+always_inline void
+vnet_put_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index,
+ vlib_frame_t * f)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return vlib_put_frame_to_node (vnm->vlib_main, hw->output_node_index, f);
+}
+
+/* Change interface flags (e.g. up, down, enable, disable). */
+clib_error_t *vnet_hw_interface_set_flags (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags);
+
+/* Change interface flags (e.g. up, down, enable, disable). */
+clib_error_t *vnet_sw_interface_set_flags (vnet_main_t * vnm, u32 sw_if_index,
+ u32 flags);
+
+/* Change interface class. */
+clib_error_t *vnet_hw_interface_set_class (vnet_main_t * vnm, u32 hw_if_index,
+ u32 new_hw_class_index);
+
+/* Redirect rx pkts to node */
+int vnet_hw_interface_rx_redirect_to_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index);
+
+void vnet_hw_interface_init_for_class (vnet_main_t * vnm, u32 hw_if_index,
+ u32 hw_class_index, u32 hw_instance);
+
+/* Rename interface */
+clib_error_t *vnet_rename_interface (vnet_main_t * vnm, u32 hw_if_index,
+ char *new_name);
+
+/* Change interface mac address*/
+clib_error_t *vnet_hw_interface_change_mac_address (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u64 mac_address);
+
+/* Formats sw/hw interface. */
+format_function_t format_vnet_hw_interface;
+format_function_t format_vnet_hw_interface_rx_mode;
+format_function_t format_vnet_sw_interface;
+format_function_t format_vnet_sw_interface_name;
+format_function_t format_vnet_sw_interface_name_override;
+format_function_t format_vnet_sw_if_index_name;
+format_function_t format_vnet_sw_interface_flags;
+
+/* Parses sw/hw interface name -> index. */
+unformat_function_t unformat_vnet_sw_interface;
+unformat_function_t unformat_vnet_hw_interface;
+
+/* Parses interface flags (up, down, enable, disable, etc.) */
+unformat_function_t unformat_vnet_hw_interface_flags;
+unformat_function_t unformat_vnet_sw_interface_flags;
+
+/* Node runtime for interface output function. */
+typedef struct
+{
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 dev_instance;
+ u32 is_deleted;
+} vnet_interface_output_runtime_t;
+
+/* Interface output function. */
+void *vnet_interface_output_node_multiarch_select (void);
+
+word vnet_sw_interface_compare (vnet_main_t * vnm, uword sw_if_index0,
+ uword sw_if_index1);
+word vnet_hw_interface_compare (vnet_main_t * vnm, uword hw_if_index0,
+ uword hw_if_index1);
+
+typedef enum
+{
+ VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ VNET_INTERFACE_OUTPUT_NEXT_TX,
+} vnet_interface_output_next_t;
+
+typedef enum
+{
+ VNET_INTERFACE_TX_NEXT_DROP,
+ VNET_INTERFACE_TX_N_NEXT,
+} vnet_interface_tx_next_t;
+
+#define VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT
+
+typedef enum
+{
+ VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN,
+ VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED,
+} vnet_interface_output_error_t;
+
+/* Format for interface output traces. */
+u8 *format_vnet_interface_output_trace (u8 * s, va_list * va);
+
+serialize_function_t serialize_vnet_interface_state,
+ unserialize_vnet_interface_state;
+
+#endif /* included_vnet_interface_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
new file mode 100644
index 00000000..06f1c7dd
--- /dev/null
+++ b/src/vnet/interface_output.c
@@ -0,0 +1,1260 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * interface_output.c: interface output node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/icmp46_packet.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/feature/feature.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u8 data[128 - sizeof (u32)];
+}
+interface_output_trace_t;
+
+u8 *
+format_vnet_interface_output_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ vlib_node_t *node = va_arg (*va, vlib_node_t *);
+ interface_output_trace_t *t = va_arg (*va, interface_output_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ uword indent;
+
+ if (t->sw_if_index != (u32) ~ 0)
+ {
+ indent = format_get_indent (s);
+
+ if (pool_is_free_index
+ (vnm->interface_main.sw_interfaces, t->sw_if_index))
+ {
+ /* the interface may have been deleted by the time the trace is printed */
+ s = format (s, "sw_if_index: %d\n%U%U",
+ t->sw_if_index,
+ format_white_space, indent,
+ node->format_buffer ? node->
+ format_buffer : format_hex_bytes, t->data,
+ sizeof (t->data));
+ }
+ else
+ {
+ si = vnet_get_sw_interface (vnm, t->sw_if_index);
+
+ s = format (s, "%U\n%U%U",
+ format_vnet_sw_interface_name, vnm, si,
+ format_white_space, indent,
+ node->format_buffer ? node->
+ format_buffer : format_hex_bytes, t->data,
+ sizeof (t->data));
+ }
+ }
+ return s;
+}
+
+static void
+vnet_interface_output_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, uword n_buffers)
+{
+ u32 n_left, *from;
+
+ n_left = n_buffers;
+ from = vlib_frame_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ interface_output_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0),
+ sizeof (t0->data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ clib_memcpy (t1->data, vlib_buffer_get_current (b1),
+ sizeof (t1->data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ interface_output_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0),
+ sizeof (t0->data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static_always_inline void
+calc_checksums (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ tcp_header_t *th;
+ udp_header_t *uh;
+
+ int is_ip4 = (b->flags & VNET_BUFFER_F_IS_IP4) != 0;
+ int is_ip6 = (b->flags & VNET_BUFFER_F_IS_IP6) != 0;
+
+ ASSERT (!(is_ip4 && is_ip6));
+
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
+ ip6 = (ip6_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
+ th = (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ uh = (udp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+
+ if (is_ip4)
+ {
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
+ uh->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
+ }
+ if (is_ip6)
+ {
+ int bogus;
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
+ uh->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ }
+
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+}
+
+static_always_inline uword
+vnet_interface_output_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vnet_main_t * vnm,
+ vnet_hw_interface_t * hi,
+ int do_tx_offloads)
+{
+ vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
+ vnet_sw_interface_t *si;
+ u32 n_left_to_tx, *from, *from_end, *to_tx;
+ u32 n_bytes, n_buffers, n_packets;
+ u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
+ u32 thread_index = vm->thread_index;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
+ u32 current_config_index = ~0;
+ u8 arc = im->output_feature_arc_index;
+
+ n_buffers = frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vnet_interface_output_trace (vm, node, frame, n_buffers);
+
+ from = vlib_frame_args (frame);
+
+ if (rt->is_deleted)
+ return vlib_error_drop_buffers (vm, node, from,
+ /* buffer stride */ 1,
+ n_buffers,
+ VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index,
+ VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED);
+
+ si = vnet_get_sw_interface (vnm, rt->sw_if_index);
+ hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
+ if (!(si->flags & (VNET_SW_INTERFACE_FLAG_ADMIN_UP |
+ VNET_SW_INTERFACE_FLAG_BOND_SLAVE)) ||
+ !(hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ {
+ vlib_simple_counter_main_t *cm;
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
+ vlib_increment_simple_counter (cm, thread_index,
+ rt->sw_if_index, n_buffers);
+
+ return vlib_error_drop_buffers (vm, node, from,
+ /* buffer stride */ 1,
+ n_buffers,
+ VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index,
+ VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN);
+ }
+
+ from_end = from + n_buffers;
+
+ /* Total byte count of all buffers. */
+ n_bytes = 0;
+ n_packets = 0;
+
+ /* interface-output feature arc handling */
+ if (PREDICT_FALSE (vnet_have_features (arc, rt->sw_if_index)))
+ {
+ vnet_feature_config_main_t *fcm;
+ fcm = vnet_feature_get_config_main (arc);
+ current_config_index = vnet_get_feature_config_index (arc,
+ rt->sw_if_index);
+ vnet_get_config_data (&fcm->config_main, &current_config_index,
+ &next_index, 0);
+ }
+
+ while (from < from_end)
+ {
+ /* Get new next frame since previous incomplete frame may have less
+ than VNET_FRAME_SIZE vectors in it. */
+ vlib_get_new_next_frame (vm, node, next_index, to_tx, n_left_to_tx);
+
+ while (from + 8 <= from_end && n_left_to_tx >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 tx_swif0, tx_swif1, tx_swif2, tx_swif3;
+ u32 or_flags;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[4], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[5], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[6], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[7], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+ bi2 = from[2];
+ bi3 = from[3];
+ to_tx[0] = bi0;
+ to_tx[1] = bi1;
+ to_tx[2] = bi2;
+ to_tx[3] = bi3;
+ from += 4;
+ to_tx += 4;
+ n_left_to_tx -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* Be grumpy about zero length buffers for benefit of
+ driver tx function. */
+ ASSERT (b0->current_length > 0);
+ ASSERT (b1->current_length > 0);
+ ASSERT (b2->current_length > 0);
+ ASSERT (b3->current_length > 0);
+
+ n_bytes_b0 = vlib_buffer_length_in_chain (vm, b0);
+ n_bytes_b1 = vlib_buffer_length_in_chain (vm, b1);
+ n_bytes_b2 = vlib_buffer_length_in_chain (vm, b2);
+ n_bytes_b3 = vlib_buffer_length_in_chain (vm, b3);
+ tx_swif0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ tx_swif1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ tx_swif2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+ tx_swif3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+ n_bytes += n_bytes_b0 + n_bytes_b1;
+ n_bytes += n_bytes_b2 + n_bytes_b3;
+ n_packets += 4;
+
+ if (PREDICT_FALSE (current_config_index != ~0))
+ {
+ b0->feature_arc_index = arc;
+ b1->feature_arc_index = arc;
+ b2->feature_arc_index = arc;
+ b3->feature_arc_index = arc;
+ b0->current_config_index = current_config_index;
+ b1->current_config_index = current_config_index;
+ b2->current_config_index = current_config_index;
+ b3->current_config_index = current_config_index;
+ }
+
+ /* update vlan subif tx counts, if required */
+ if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, tx_swif0, 1,
+ n_bytes_b0);
+ }
+
+ if (PREDICT_FALSE (tx_swif1 != rt->sw_if_index))
+ {
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, tx_swif1, 1,
+ n_bytes_b1);
+ }
+
+ if (PREDICT_FALSE (tx_swif2 != rt->sw_if_index))
+ {
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, tx_swif2, 1,
+ n_bytes_b2);
+ }
+ if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
+ {
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, tx_swif3, 1,
+ n_bytes_b3);
+ }
+
+ or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
+
+ if (do_tx_offloads)
+ {
+ if (or_flags &
+ (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
+ VNET_BUFFER_F_OFFLOAD_UDP_CKSUM |
+ VNET_BUFFER_F_OFFLOAD_IP_CKSUM))
+ {
+ calc_checksums (vm, b0);
+ calc_checksums (vm, b1);
+ calc_checksums (vm, b2);
+ calc_checksums (vm, b3);
+ }
+ }
+ }
+
+ while (from + 1 <= from_end && n_left_to_tx >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 tx_swif0;
+
+ bi0 = from[0];
+ to_tx[0] = bi0;
+ from += 1;
+ to_tx += 1;
+ n_left_to_tx -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Be grumpy about zero length buffers for benefit of
+ driver tx function. */
+ ASSERT (b0->current_length > 0);
+
+ n_bytes_b0 = vlib_buffer_length_in_chain (vm, b0);
+ tx_swif0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ n_bytes += n_bytes_b0;
+ n_packets += 1;
+
+ if (PREDICT_FALSE (current_config_index != ~0))
+ {
+ b0->feature_arc_index = arc;
+ b0->current_config_index = current_config_index;
+ }
+
+ if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
+ {
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, tx_swif0, 1,
+ n_bytes_b0);
+ }
+
+ if (do_tx_offloads)
+ calc_checksums (vm, b0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_tx);
+ }
+
+ /* Update main interface stats. */
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ rt->sw_if_index, n_packets, n_bytes);
+ return n_buffers;
+}
+
+static_always_inline uword
+vnet_interface_output_node (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
+ hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
+
+ if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD)
+ return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
+ /* do_tx_offloads */ 0);
+ else
+ return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
+ /* do_tx_offloads */ 1);
+}
+
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE (vnet_interface_output_node);
+CLIB_MULTIARCH_SELECT_FN (vnet_interface_output_node);
+
+/* Use buffer's sw_if_index[VNET_TX] to choose output interface. */
+static uword
+vnet_per_buffer_interface_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 n_left_to_next, *from, *to_next;
+ u32 n_left_from, next_index;
+
+ n_left_from = frame->n_vectors;
+
+ from = vlib_frame_args (frame);
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1, next0, next1;
+ vlib_buffer_t *b0, *b1;
+ vnet_hw_interface_t *hi0, *hi1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ hi0 =
+ vnet_get_sup_hw_interface (vnm,
+ vnet_buffer (b0)->sw_if_index
+ [VLIB_TX]);
+ hi1 =
+ vnet_get_sup_hw_interface (vnm,
+ vnet_buffer (b1)->sw_if_index
+ [VLIB_TX]);
+
+ next0 = hi0->hw_if_index;
+ next1 = hi1->hw_if_index;
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0;
+ vnet_hw_interface_t *hi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ hi0 =
+ vnet_get_sup_hw_interface (vnm,
+ vnet_buffer (b0)->sw_if_index
+ [VLIB_TX]);
+
+ next0 = hi0->hw_if_index;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+always_inline u32
+counter_index (vlib_main_t * vm, vlib_error_t e)
+{
+ vlib_node_t *n;
+ u32 ci, ni;
+
+ ni = vlib_error_get_node (e);
+ n = vlib_get_node (vm, ni);
+
+ ci = vlib_error_get_code (e);
+ ASSERT (ci < n->n_errors);
+
+ ci += n->error_heap_index;
+
+ return ci;
+}
+
+static u8 *
+format_vnet_error_trace (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ vlib_error_t *e = va_arg (*va, vlib_error_t *);
+ vlib_node_t *error_node;
+ vlib_error_main_t *em = &vm->error_main;
+ u32 i;
+
+ error_node = vlib_get_node (vm, vlib_error_get_node (e[0]));
+ i = counter_index (vm, e[0]);
+ s = format (s, "%v: %s", error_node->name, em->error_strings_heap[i]);
+
+ return s;
+}
+
+static void
+trace_errors_with_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left, *buffers;
+
+ buffers = vlib_frame_vector_args (frame);
+ n_left = frame->n_vectors;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ vlib_error_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
+
+ bi0 = buffers[0];
+ bi1 = buffers[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0[0] = b0->error;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1[0] = b1->error;
+ }
+ buffers += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ vlib_error_t *t0;
+
+ bi0 = buffers[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0[0] = b0->error;
+ }
+ buffers += 1;
+ n_left -= 1;
+ }
+}
+
+static u8 *
+validate_error (vlib_main_t * vm, vlib_error_t * e, u32 index)
+{
+ uword node_index = vlib_error_get_node (e[0]);
+ uword code = vlib_error_get_code (e[0]);
+ vlib_node_t *n;
+
+ if (node_index >= vec_len (vm->node_main.nodes))
+ return format (0, "[%d], node index out of range 0x%x, error 0x%x",
+ index, node_index, e[0]);
+
+ n = vlib_get_node (vm, node_index);
+ if (code >= n->n_errors)
+ return format (0, "[%d], code %d out of range for node %v",
+ index, code, n->name);
+
+ return 0;
+}
+
+static u8 *
+validate_error_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ u32 *buffers = vlib_frame_args (f);
+ vlib_buffer_t *b;
+ u8 *msg = 0;
+ uword i;
+
+ for (i = 0; i < f->n_vectors; i++)
+ {
+ b = vlib_get_buffer (vm, buffers[i]);
+ msg = validate_error (vm, &b->error, i);
+ if (msg)
+ return msg;
+ }
+
+ return msg;
+}
+
+typedef enum
+{
+ VNET_ERROR_DISPOSITION_DROP,
+ VNET_ERROR_DISPOSITION_PUNT,
+ VNET_ERROR_N_DISPOSITION,
+} vnet_error_disposition_t;
+
+always_inline void
+do_packet (vlib_main_t * vm, vlib_error_t a)
+{
+ vlib_error_main_t *em = &vm->error_main;
+ u32 i = counter_index (vm, a);
+ em->counters[i] += 1;
+ vlib_error_elog_count (vm, i, 1);
+}
+
+static_always_inline uword
+process_drop_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vnet_error_disposition_t disposition)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_error_main_t *em = &vm->error_main;
+ u32 *buffers, *first_buffer;
+ vlib_error_t current_error;
+ u32 current_counter_index, n_errors_left;
+ u32 current_sw_if_index, n_errors_current_sw_if_index;
+ u64 current_counter;
+ vlib_simple_counter_main_t *cm;
+ u32 thread_index = vm->thread_index;
+
+ static vlib_error_t memory[VNET_ERROR_N_DISPOSITION];
+ static char memory_init[VNET_ERROR_N_DISPOSITION];
+
+ buffers = vlib_frame_args (frame);
+ first_buffer = buffers;
+
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, first_buffer[0]);
+
+ if (!memory_init[disposition])
+ {
+ memory_init[disposition] = 1;
+ memory[disposition] = b->error;
+ }
+
+ current_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ n_errors_current_sw_if_index = 0;
+ }
+
+ current_error = memory[disposition];
+ current_counter_index = counter_index (vm, memory[disposition]);
+ current_counter = em->counters[current_counter_index];
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ trace_errors_with_buffers (vm, node, frame);
+
+ n_errors_left = frame->n_vectors;
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ (disposition == VNET_ERROR_DISPOSITION_PUNT
+ ? VNET_INTERFACE_COUNTER_PUNT
+ : VNET_INTERFACE_COUNTER_DROP));
+
+ while (n_errors_left >= 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ vnet_sw_interface_t *sw_if0, *sw_if1;
+ vlib_error_t e0, e1;
+ u32 bi0, bi1;
+ u32 sw_if_index0, sw_if_index1;
+
+ bi0 = buffers[0];
+ bi1 = buffers[1];
+
+ buffers += 2;
+ n_errors_left -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ e0 = b0->error;
+ e1 = b1->error;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* Speculate that sw_if_index == sw_if_index[01]. */
+ n_errors_current_sw_if_index += 2;
+
+ /* Speculatively assume all 2 (node, code) pairs are equal
+ to current (node, code). */
+ current_counter += 2;
+
+ if (PREDICT_FALSE (e0 != current_error
+ || e1 != current_error
+ || sw_if_index0 != current_sw_if_index
+ || sw_if_index1 != current_sw_if_index))
+ {
+ current_counter -= 2;
+ n_errors_current_sw_if_index -= 2;
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+
+ /* Increment super-interface drop/punt counters for
+ sub-interfaces. */
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
+ vlib_increment_simple_counter
+ (cm, thread_index, sw_if0->sup_sw_if_index,
+ sw_if0->sup_sw_if_index != sw_if_index0);
+
+ sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1);
+ vlib_increment_simple_counter
+ (cm, thread_index, sw_if1->sup_sw_if_index,
+ sw_if1->sup_sw_if_index != sw_if_index1);
+
+ em->counters[current_counter_index] = current_counter;
+ do_packet (vm, e0);
+ do_packet (vm, e1);
+
+ /* For 2 repeated errors, change current error. */
+ if (e0 == e1 && e1 != current_error)
+ {
+ current_error = e0;
+ current_counter_index = counter_index (vm, e0);
+ }
+ current_counter = em->counters[current_counter_index];
+ }
+ }
+
+ while (n_errors_left >= 1)
+ {
+ vlib_buffer_t *b0;
+ vnet_sw_interface_t *sw_if0;
+ vlib_error_t e0;
+ u32 bi0, sw_if_index0;
+
+ bi0 = buffers[0];
+
+ buffers += 1;
+ n_errors_left -= 1;
+ current_counter += 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ e0 = b0->error;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* Increment drop/punt counters. */
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+
+ /* Increment super-interface drop/punt counters for sub-interfaces. */
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
+ vlib_increment_simple_counter (cm, thread_index,
+ sw_if0->sup_sw_if_index,
+ sw_if0->sup_sw_if_index != sw_if_index0);
+
+ if (PREDICT_FALSE (e0 != current_error))
+ {
+ current_counter -= 1;
+
+ vlib_error_elog_count (vm, current_counter_index,
+ (current_counter
+ - em->counters[current_counter_index]));
+
+ em->counters[current_counter_index] = current_counter;
+
+ do_packet (vm, e0);
+ current_error = e0;
+ current_counter_index = counter_index (vm, e0);
+ current_counter = em->counters[current_counter_index];
+ }
+ }
+
+ if (n_errors_current_sw_if_index > 0)
+ {
+ vnet_sw_interface_t *si;
+
+ vlib_increment_simple_counter (cm, thread_index, current_sw_if_index,
+ n_errors_current_sw_if_index);
+
+ si = vnet_get_sw_interface (vnm, current_sw_if_index);
+ if (si->sup_sw_if_index != current_sw_if_index)
+ vlib_increment_simple_counter (cm, thread_index, si->sup_sw_if_index,
+ n_errors_current_sw_if_index);
+ }
+
+ vlib_error_elog_count (vm, current_counter_index,
+ (current_counter
+ - em->counters[current_counter_index]));
+
+ /* Return cached counter. */
+ em->counters[current_counter_index] = current_counter;
+
+ /* Save memory for next iteration. */
+ memory[disposition] = current_error;
+
+ if (disposition == VNET_ERROR_DISPOSITION_DROP || !vm->os_punt_frame)
+ {
+ vlib_buffer_free (vm, first_buffer, frame->n_vectors);
+
+ /* If there is no punt function, free the frame as well. */
+ if (disposition == VNET_ERROR_DISPOSITION_PUNT && !vm->os_punt_frame)
+ vlib_frame_free (vm, node, frame);
+ }
+ else
+ vm->os_punt_frame (vm, node, frame);
+
+ return frame->n_vectors;
+}
+
+static inline void
+pcap_drop_trace (vlib_main_t * vm,
+ vnet_interface_main_t * im, vlib_frame_t * f)
+{
+ u32 *from;
+ u32 n_left = f->n_vectors;
+ vlib_buffer_t *b0, *p1;
+ u32 bi0;
+ i16 save_current_data;
+ u16 save_current_length;
+
+ from = vlib_frame_vector_args (f);
+
+ while (n_left > 0)
+ {
+ if (PREDICT_TRUE (n_left > 1))
+ {
+ p1 = vlib_get_buffer (vm, from[1]);
+ vlib_prefetch_buffer_header (p1, LOAD);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ from++;
+ n_left--;
+
+ /* See if we're pointedly ignoring this specific error */
+ if (im->pcap_drop_filter_hash
+ && hash_get (im->pcap_drop_filter_hash, b0->error))
+ continue;
+
+ /* Trace all drops, or drops received on a specific interface */
+ if (im->pcap_sw_if_index == 0 ||
+ im->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_RX])
+ {
+ save_current_data = b0->current_data;
+ save_current_length = b0->current_length;
+
+ /*
+ * Typically, we'll need to rewind the buffer
+ */
+ if (b0->current_data > 0)
+ vlib_buffer_advance (b0, (word) - b0->current_data);
+
+ pcap_add_buffer (&im->pcap_main, vm, bi0, 512);
+
+ b0->current_data = save_current_data;
+ b0->current_length = save_current_length;
+ }
+ }
+}
+
+void
+vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add)
+{
+ vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
+
+ if (im->pcap_drop_filter_hash == 0)
+ im->pcap_drop_filter_hash = hash_create (0, sizeof (uword));
+
+ if (is_add)
+ hash_set (im->pcap_drop_filter_hash, error_index, 1);
+ else
+ hash_unset (im->pcap_drop_filter_hash, error_index);
+}
+
+static uword
+process_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
+
+ if (PREDICT_FALSE (im->drop_pcap_enable))
+ pcap_drop_trace (vm, im, frame);
+
+ return process_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_DROP);
+}
+
+static uword
+process_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return process_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (drop_buffers,static) = {
+ .function = process_drop,
+ .name = "error-drop",
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .format_trace = format_vnet_error_trace,
+ .validate_frame = validate_error_frame,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (drop_buffers, process_drop);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (punt_buffers,static) = {
+ .function = process_punt,
+ .flags = (VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
+ | VLIB_NODE_FLAG_IS_PUNT),
+ .name = "error-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vnet_error_trace,
+ .validate_frame = validate_error_frame,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (punt_buffers, process_punt);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = {
+ .function = vnet_per_buffer_interface_output,
+ .name = "interface-output",
+ .vector_size = sizeof (u32),
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (vnet_per_buffer_interface_output_node,
+ vnet_per_buffer_interface_output);
+
+static uword
+interface_tx_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 last_sw_if_index = ~0;
+ vlib_frame_t *to_frame = 0;
+ vnet_hw_interface_t *hw = 0;
+ u32 *from, *to_next = 0;
+ u32 n_left_from;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+
+ bi0 = from[0];
+ from++;
+ n_left_from--;
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((last_sw_if_index != sw_if_index0) || to_frame == 0))
+ {
+ if (to_frame)
+ {
+ hw = vnet_get_sup_hw_interface (vnm, last_sw_if_index);
+ vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
+ }
+ last_sw_if_index = sw_if_index0;
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ to_frame = vlib_get_frame_to_node (vm, hw->tx_node_index);
+ to_next = vlib_frame_vector_args (to_frame);
+ }
+
+ to_next[0] = bi0;
+ to_next++;
+ to_frame->n_vectors++;
+ }
+ vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (interface_tx, static) = {
+ .function = interface_tx_node_fn,
+ .name = "interface-tx",
+ .vector_size = sizeof (u32),
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VNET_FEATURE_ARC_INIT (interface_output, static) =
+{
+ .arc_name = "interface-output",
+ .start_nodes = VNET_FEATURES (0),
+ .arc_index_ptr = &vnet_main.interface_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (span_tx, static) = {
+ .arc_name = "interface-output",
+ .node_name = "span-output",
+ .runs_before = VNET_FEATURES ("interface-tx"),
+};
+
+VNET_FEATURE_INIT (interface_tx, static) = {
+ .arc_name = "interface-output",
+ .node_name = "interface-tx",
+ .runs_before = 0,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 is_create)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ u32 next_index;
+
+ next_index = vlib_node_add_next_with_slot
+ (vnm->vlib_main, vnet_per_buffer_interface_output_node.index,
+ hi->output_node_index,
+ /* next_index */ hw_if_index);
+
+ ASSERT (next_index == hw_if_index);
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION
+ (vnet_per_buffer_interface_output_hw_interface_add_del);
+
+static clib_error_t *
+pcap_drop_trace_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u8 *filename;
+ u32 max;
+ int matched = 0;
+ clib_error_t *error = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "on"))
+ {
+ if (im->drop_pcap_enable == 0)
+ {
+ if (im->pcap_filename == 0)
+ im->pcap_filename = format (0, "/tmp/drop.pcap%c", 0);
+
+ memset (&im->pcap_main, 0, sizeof (im->pcap_main));
+ im->pcap_main.file_name = (char *) im->pcap_filename;
+ im->pcap_main.n_packets_to_capture = 100;
+ if (im->pcap_pkts_to_capture)
+ im->pcap_main.n_packets_to_capture = im->pcap_pkts_to_capture;
+
+ im->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet;
+ im->drop_pcap_enable = 1;
+ matched = 1;
+ vlib_cli_output (vm, "pcap drop capture on...");
+ }
+ else
+ {
+ vlib_cli_output (vm, "pcap drop capture already on...");
+ }
+ matched = 1;
+ }
+ else if (unformat (input, "off"))
+ {
+ matched = 1;
+
+ if (im->drop_pcap_enable)
+ {
+ vlib_cli_output (vm, "captured %d pkts...",
+ im->pcap_main.n_packets_captured);
+ if (im->pcap_main.n_packets_captured)
+ {
+ im->pcap_main.n_packets_to_capture =
+ im->pcap_main.n_packets_captured;
+ error = pcap_write (&im->pcap_main);
+ if (error)
+ clib_error_report (error);
+ else
+ vlib_cli_output (vm, "saved to %s...", im->pcap_filename);
+ }
+ }
+ else
+ {
+ vlib_cli_output (vm, "pcap drop capture already off...");
+ }
+
+ im->drop_pcap_enable = 0;
+ }
+ else if (unformat (input, "max %d", &max))
+ {
+ im->pcap_pkts_to_capture = max;
+ matched = 1;
+ }
+
+ else if (unformat (input, "intfc %U",
+ unformat_vnet_sw_interface, vnm,
+ &im->pcap_sw_if_index))
+ matched = 1;
+ else if (unformat (input, "intfc any"))
+ {
+ im->pcap_sw_if_index = 0;
+ matched = 1;
+ }
+ else if (unformat (input, "file %s", &filename))
+ {
+ u8 *chroot_filename;
+ /* Brain-police user path input */
+ if (strstr ((char *) filename, "..")
+ || index ((char *) filename, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'",
+ filename);
+ continue;
+ }
+
+ chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+ vec_free (filename);
+
+ if (im->pcap_filename)
+ vec_free (im->pcap_filename);
+ im->pcap_filename = chroot_filename;
+ im->pcap_main.file_name = (char *) im->pcap_filename;
+ matched = 1;
+ }
+ else if (unformat (input, "status"))
+ {
+ if (im->drop_pcap_enable == 0)
+ {
+ vlib_cli_output (vm, "pcap drop capture is off...");
+ continue;
+ }
+
+ vlib_cli_output (vm, "pcap drop capture: %d of %d pkts...",
+ im->pcap_main.n_packets_captured,
+ im->pcap_main.n_packets_to_capture);
+ matched = 1;
+ }
+
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (pcap_trace_command, static) = {
+ .path = "pcap drop trace",
+ .short_help =
+ "pcap drop trace on off max <nn> intfc <intfc> file <name> status",
+ .function = pcap_drop_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/dir.dox b/src/vnet/ip/dir.dox
new file mode 100644
index 00000000..a4eb7337
--- /dev/null
+++ b/src/vnet/ip/dir.dox
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Layer 3 IP Code.
+
+This directory contains the source code for IP routing.
+
+*/
+/*? %%clicmd:group_label Layer 3 IP CLI %% ?*/
diff --git a/src/vnet/ip/format.c b/src/vnet/ip/format.c
new file mode 100644
index 00000000..be1c4fd3
--- /dev/null
+++ b/src/vnet/ip/format.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_format.c: ip generic (4 or 6) formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format IP protocol. */
+u8 *
+format_ip_protocol (u8 * s, va_list * args)
+{
+ ip_protocol_t protocol = va_arg (*args, ip_protocol_t);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, protocol);
+
+ if (pi)
+ return format (s, "%s", pi->name);
+ else
+ return format (s, "unknown %d", protocol);
+}
+
+uword
+unformat_ip_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ int i;
+
+ if (!unformat_user (input, unformat_vlib_number_by_name,
+ im->protocol_info_by_name, &i))
+ return 0;
+
+ pi = vec_elt_at_index (im->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+}
+
+u8 *
+format_tcp_udp_port (u8 * s, va_list * args)
+{
+ int port = va_arg (*args, int);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, port);
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "%d", clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+uword
+unformat_tcp_udp_port (unformat_input_t * input, va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+ u32 i, port;
+
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ im->port_info_by_name, &i))
+ {
+ pi = vec_elt_at_index (im->port_infos, i);
+ port = pi->port;
+ }
+ else if (unformat_user (input, unformat_vlib_number, &port)
+ && port < (1 << 16))
+ port = clib_host_to_net_u16 (port);
+
+ else
+ return 0;
+
+ *result = port;
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/format.h b/src/vnet/ip/format.h
new file mode 100644
index 00000000..c35f0f4b
--- /dev/null
+++ b/src/vnet/ip/format.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/format.h: ip 4 and/or 6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_format_h
+#define included_ip_format_h
+
+/* IP4 or IP6. */
+
+format_function_t format_ip_protocol;
+unformat_function_t unformat_ip_protocol;
+
+format_function_t format_tcp_udp_port;
+unformat_function_t unformat_tcp_udp_port;
+
+typedef enum format_ip_adjacency_flags_t_
+{
+ FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
+} format_ip_adjacency_flags_t;
+
+format_function_t format_ip_adjacency;
+format_function_t format_ip_adjacency_packet_data;
+
+format_function_t format_ip46_address;
+
+typedef enum
+{
+ IP46_TYPE_ANY,
+ IP46_TYPE_IP4,
+ IP46_TYPE_IP6
+} ip46_type_t;
+/* unformat_ip46_address expects arguments (ip46_address_t *, ip46_type_t)
+ * The type argument is used to enforce a particular IP version. */
+unformat_function_t unformat_ip46_address;
+
+/* IP4 */
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+unformat_function_t unformat_ip4_address;
+
+/* Format an IP4 address. */
+format_function_t format_ip4_address;
+format_function_t format_ip4_address_and_length;
+
+/* Parse an IP4 header. */
+unformat_function_t unformat_ip4_header;
+
+/* Format an IP4 header. */
+format_function_t format_ip4_header;
+
+/* Parse an IP packet matching pattern. */
+unformat_function_t unformat_ip4_match;
+
+unformat_function_t unformat_pg_ip4_header;
+
+/* IP6 */
+unformat_function_t unformat_ip6_address;
+format_function_t format_ip6_address;
+format_function_t format_ip6_address_and_length;
+unformat_function_t unformat_ip6_header;
+format_function_t format_ip6_header;
+unformat_function_t unformat_pg_ip6_header;
+
+/* Format a TCP/UDP headers. */
+format_function_t format_tcp_header, format_udp_header;
+
+unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header;
+
+#endif /* included_ip_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
new file mode 100644
index 00000000..bbeab32b
--- /dev/null
+++ b/src/vnet/ip/icmp4.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp4.c: ipv4 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp4_error
+#undef _
+};
+
+static u8 *
+format_ip4_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp4_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp4_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp4_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_ip4_icmp_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip4_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ return s;
+}
+
+static u8 *
+format_icmp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_input_trace_t *t = va_arg (*va, icmp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_ERROR,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 ip4_input_next_index_by_type[256];
+} icmp4_main_t;
+
+icmp4_main_t icmp4_main;
+
+static uword
+ip4_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp4_main_t *im = &icmp4_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp4_type_t type0;
+ u32 bi0, next0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ p0 = vlib_get_buffer (vm, from[1]);
+ ip0 = vlib_buffer_get_current (p0);
+ CLIB_PREFETCH (ip0, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+ type0 = icmp0->type;
+ next0 = im->ip4_input_next_index_by_type[type0];
+
+ p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
+ .function = ip4_icmp_input,
+ .name = "ip4-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_ERROR] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+ip4_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+ ip4_main_t *i4m = &ip4_main;
+ u16 *fragment_ids, *fid;
+ u8 host_config_ttl = i4m->host_config.ttl;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets *
+ sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ u32 bi0, src0, dst0;
+ u32 bi1, src1, dst1;
+ ip_csum_t sum0, sum1;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip4_next_header (ip0);
+ icmp1 = ip4_next_header (ip1);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ ASSERT (icmp1->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp1->type = ICMP4_echo_reply;
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+
+ /* Swap source and destination address.
+ Does not change checksum. */
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+ ip1->ttl = host_config_ttl;
+
+ /* New fragment id. */
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ ip1->fragment_id = fid[1];
+ fid += 2;
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0, src0, dst0;
+ ip_csum_t sum0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ fid += 1;
+
+ ip0->checksum = ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip4_icmp_input_node.index,
+ ICMP4_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
+ .function = ip4_icmp_echo_request,
+ .name = "ip4-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-load-balance",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP4_ICMP_ERROR_NEXT_DROP,
+ IP4_ICMP_ERROR_NEXT_LOOKUP,
+ IP4_ICMP_ERROR_N_NEXT,
+} ip4_icmp_error_next_t;
+
+void
+icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp4_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP4_destination_unreachable:
+ return ICMP4_ERROR_DEST_UNREACH_SENT;
+ case ICMP4_time_exceeded:
+ return ICMP4_ERROR_TTL_EXPIRE_SENT;
+ case ICMP4_parameter_problem:
+ return ICMP4_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP4_ERROR_DROP;
+ }
+}
+
+static uword
+ip4_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip4_icmp_error_next_t next_index;
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP4_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ ip_csum_t sum;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /*
+ * RFC1812 says to keep as much of the original packet as
+ * possible within the minimum MTU (576). We cheat "a little"
+ * here by keeping whatever fits in the first buffer, to be more
+ * efficient
+ */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ {
+ /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 576 ? 576 : p0->current_length;
+
+ /* Add IP header and ICMPv4 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip4_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_and_header_length = 0x45;
+ out_ip0->tos = 0;
+ out_ip0->length = clib_host_to_net_u16 (p0->current_length);
+ out_ip0->fragment_id = 0;
+ out_ip0->flags_and_fragment_offset = 0;
+ out_ip0->ttl = 0xff;
+ out_ip0->protocol = IP_PROTOCOL_ICMP;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 = ~0;
+ if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
+ > sw_if_index0))
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else
+ {
+ /* interface has no IP4 address - should not happen */
+ next0 = IP4_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP4_ERROR_DROP;
+ }
+ out_ip0->checksum = ip4_header_checksum (out_ip0);
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ sum =
+ ip_incremental_checksum (0, icmp0,
+ p0->current_length -
+ sizeof (ip4_header_t));
+ icmp0->checksum = ~ip_csum_fold (sum);
+
+ /* Update error status */
+ if (error0 == ICMP4_ERROR_NONE)
+ error0 = icmp4_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
+ .function = ip4_icmp_error,
+ .name = "ip4-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup",
+ },
+
+ .format_trace = format_icmp_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp4_main_t *cm = &icmp4_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+ len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ icmp0->checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index)
+{
+ icmp4_main_t *im = &icmp4_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->ip4_input_next_index_by_type));
+ im->ip4_input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp4_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp4_main_t *cm = &icmp4_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
+ pi->format_header = format_ip4_icmp_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp4_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
+ foreach_icmp4_code;
+#undef _
+
+ memset (cm->ip4_input_next_index_by_type,
+ ICMP_INPUT_NEXT_ERROR, sizeof (cm->ip4_input_next_index_by_type));
+
+ ip4_icmp_register_type (vm, ICMP4_echo_request,
+ ip4_icmp_echo_request_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (icmp4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
new file mode 100644
index 00000000..ae805148
--- /dev/null
+++ b/src/vnet/ip/icmp4.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp4_h
+#define included_vnet_icmp4_h
+
+#define foreach_icmp4_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
+ _ (DROP, "error message dropped")
+
+typedef enum
+{
+#define _(f,s) ICMP4_ERROR_##f,
+ foreach_icmp4_error
+#undef _
+} icmp4_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp_input_trace_t;
+
+format_function_t format_icmp4_input_trace;
+void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
+ u32 node_index);
+void icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+#endif /* included_vnet_icmp4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
new file mode 100644
index 00000000..a86cbd57
--- /dev/null
+++ b/src/vnet/ip/icmp46_packet.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * icmp46_packet.h: ip4/ip6 icmp packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_icmp46_packet_h
+#define included_vnet_icmp46_packet_h
+
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_icmp4_type \
+ _ (0, echo_reply) \
+ _ (3, destination_unreachable) \
+ _ (4, source_quench) \
+ _ (5, redirect) \
+ _ (6, alternate_host_address) \
+ _ (8, echo_request) \
+ _ (9, router_advertisement) \
+ _ (10, router_solicitation) \
+ _ (11, time_exceeded) \
+ _ (12, parameter_problem) \
+ _ (13, timestamp_request) \
+ _ (14, timestamp_reply) \
+ _ (15, information_request) \
+ _ (16, information_reply) \
+ _ (17, address_mask_request) \
+ _ (18, address_mask_reply) \
+ _ (30, traceroute) \
+ _ (31, datagram_conversion_error) \
+ _ (32, mobile_host_redirect) \
+ _ (33, ip6_where_are_you) \
+ _ (34, ip6_i_am_here) \
+ _ (35, mobile_registration_request) \
+ _ (36, mobile_registration_reply) \
+ _ (37, domain_name_request) \
+ _ (38, domain_name_reply) \
+ _ (39, skip) \
+ _ (40, photuris)
+
+#define icmp_no_code 0
+
+#define foreach_icmp4_code \
+ _ (destination_unreachable, 0, destination_unreachable_net) \
+ _ (destination_unreachable, 1, destination_unreachable_host) \
+ _ (destination_unreachable, 2, protocol_unreachable) \
+ _ (destination_unreachable, 3, port_unreachable) \
+ _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \
+ _ (destination_unreachable, 5, source_route_failed) \
+ _ (destination_unreachable, 6, destination_network_unknown) \
+ _ (destination_unreachable, 7, destination_host_unknown) \
+ _ (destination_unreachable, 8, source_host_isolated) \
+ _ (destination_unreachable, 9, network_administratively_prohibited) \
+ _ (destination_unreachable, 10, host_administratively_prohibited) \
+ _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 13, communication_administratively_prohibited) \
+ _ (destination_unreachable, 14, host_precedence_violation) \
+ _ (destination_unreachable, 15, precedence_cutoff_in_effect) \
+ _ (redirect, 0, network_redirect) \
+ _ (redirect, 1, host_redirect) \
+ _ (redirect, 2, type_of_service_and_network_redirect) \
+ _ (redirect, 3, type_of_service_and_host_redirect) \
+ _ (router_advertisement, 0, normal_router_advertisement) \
+ _ (router_advertisement, 16, does_not_route_common_traffic) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, pointer_indicates_error) \
+ _ (parameter_problem, 1, missing_required_option) \
+ _ (parameter_problem, 2, bad_length)
+
+/* ICMPv6 */
+#define foreach_icmp6_type \
+ _ (1, destination_unreachable) \
+ _ (2, packet_too_big) \
+ _ (3, time_exceeded) \
+ _ (4, parameter_problem) \
+ _ (128, echo_request) \
+ _ (129, echo_reply) \
+ _ (130, multicast_listener_request) \
+ _ (131, multicast_listener_report) \
+ _ (132, multicast_listener_done) \
+ _ (133, router_solicitation) \
+ _ (134, router_advertisement) \
+ _ (135, neighbor_solicitation) \
+ _ (136, neighbor_advertisement) \
+ _ (137, redirect) \
+ _ (138, router_renumbering) \
+ _ (139, node_information_request) \
+ _ (140, node_information_response) \
+ _ (141, inverse_neighbor_solicitation) \
+ _ (142, inverse_neighbor_advertisement) \
+ _ (143, multicast_listener_report_v2) \
+ _ (144, home_agent_address_discovery_request) \
+ _ (145, home_agent_address_discovery_reply) \
+ _ (146, mobile_prefix_solicitation) \
+ _ (147, mobile_prefix_advertisement) \
+ _ (148, certification_path_solicitation) \
+ _ (149, certification_path_advertisement) \
+ _ (151, multicast_router_advertisement) \
+ _ (152, multicast_router_solicitation) \
+ _ (153, multicast_router_termination) \
+ _ (154, fmipv6_messages)
+
+#define foreach_icmp6_code \
+ _ (destination_unreachable, 0, no_route_to_destination) \
+ _ (destination_unreachable, 1, destination_administratively_prohibited) \
+ _ (destination_unreachable, 2, beyond_scope_of_source_address) \
+ _ (destination_unreachable, 3, address_unreachable) \
+ _ (destination_unreachable, 4, port_unreachable) \
+ _ (destination_unreachable, 5, source_address_failed_policy) \
+ _ (destination_unreachable, 6, reject_route_to_destination) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, erroneous_header_field) \
+ _ (parameter_problem, 1, unrecognized_next_header) \
+ _ (parameter_problem, 2, unrecognized_option) \
+ _ (router_renumbering, 0, command) \
+ _ (router_renumbering, 1, result) \
+ _ (node_information_request, 0, data_contains_ip6_address) \
+ _ (node_information_request, 1, data_contains_name) \
+ _ (node_information_request, 2, data_contains_ip4_address) \
+ _ (node_information_response, 0, success) \
+ _ (node_information_response, 1, failed) \
+ _ (node_information_response, 2, unknown_request)
+
+typedef enum
+{
+#define _(n,f) ICMP4_##f = n,
+ foreach_icmp4_type
+#undef _
+} icmp4_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP4_##t##_##f = n,
+ foreach_icmp4_code
+#undef _
+} icmp4_code_t;
+
+typedef enum
+{
+#define _(n,f) ICMP6_##f = n,
+ foreach_icmp6_type
+#undef _
+} icmp6_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP6_##t##_##f = n,
+ foreach_icmp6_code
+#undef _
+} icmp6_code_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 code;
+ /* IP checksum of icmp header plus data which follows. */
+ u16 checksum;
+ }) icmp46_header_t;
+
+/* ip6 neighbor discovery */
+#define foreach_icmp6_neighbor_discovery_option \
+ _ (1, source_link_layer_address) \
+ _ (2, target_link_layer_address) \
+ _ (3, prefix_information) \
+ _ (4, redirected_header) \
+ _ (5, mtu) \
+ _ (6, nbma_shortcut_limit) \
+ _ (7, advertisement_interval) \
+ _ (8, home_agent_information) \
+ _ (9, source_address_list) \
+ _ (10, target_address_list) \
+ _ (11, cryptographically_generated_address) \
+ _ (12, rsa_signature) \
+ _ (13, timestamp) \
+ _ (14, nonce) \
+ _ (15, trust_anchor) \
+ _ (16, certificate) \
+ _ (17, ip_address_and_prefix) \
+ _ (18, new_router_prefix_information) \
+ _ (19, mobile_link_layer_address) \
+ _ (20, neighbor_advertisement_acknowledgment) \
+ _ (23, map) \
+ _ (24, route_information) \
+ _ (25, recursive_dns_server) \
+ _ (26, ra_flags_extension) \
+ _ (27, handover_key_request) \
+ _ (28, handover_key_reply) \
+ _ (29, handover_assist_information) \
+ _ (30, mobile_node_identifier) \
+ _ (31, dns_search_list) \
+ _ (138, card_request) \
+ _ (139, card_reply)
+
+typedef enum icmp6_neighbor_discovery_option_type
+{
+#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n,
+ foreach_icmp6_neighbor_discovery_option
+#undef _
+} icmp6_neighbor_discovery_option_type_t;
+
+typedef CLIB_PACKED (struct
+ {
+ /* Option type. */
+ u8 type;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ /* Option data follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_option_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 dst_address_length;
+ u8 flags;
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7)
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6)
+ u32 valid_time;
+ u32 preferred_time;
+ u32 unused; ip6_address_t dst_address;
+ }) icmp6_neighbor_discovery_prefix_information_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 aux_data_len_u32s;
+ u16 num_sources;
+ ip6_address_t mcast_addr; ip6_address_t source_addr[0];
+ }) icmp6_multicast_address_record_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_hop_by_hop_ext_t ext_hdr;
+ ip6_router_alert_option_t alert;
+ ip6_padN_option_t pad;
+ icmp46_header_t icmp;
+ u16 rsvd;
+ u16 num_addr_records;
+ icmp6_multicast_address_record_t records[0];
+ }) icmp6_multicast_listener_report_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 reserved[6];
+ /* IP6 header plus payload follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_redirected_header_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u16 unused; u32 mtu;
+ }) icmp6_neighbor_discovery_mtu_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 ethernet_address[6];
+ })
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 max_l2_address[6 + 8];
+ })
+ icmp6_neighbor_discovery_max_link_layer_address_option_t;
+
+/* Generic neighbor discover header. Used for router solicitations,
+ etc. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp; u32 reserved_must_be_zero;
+ }) icmp6_neighbor_discovery_header_t;
+
+/* Router advertisement packet formats. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Current hop limit to use for outgoing packets. */
+ u8 current_hop_limit;
+ u8 flags;
+#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7)
+#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6)
+ /* Zero means unspecified. */
+ u16 router_lifetime_in_sec;
+ /* Zero means unspecified. */
+ u32 neighbor_reachable_time_in_msec;
+ /* Zero means unspecified. */
+ u32
+ time_in_msec_between_retransmitted_neighbor_solicitations;
+ /* Options that may follow: source_link_layer_address, mtu, prefix_information. */
+ }) icmp6_router_advertisement_header_t;
+
+/* Neighbor solicitation/advertisement header. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Zero for solicitation; flags for advertisement. */
+ u32 advertisement_flags;
+ /* Set when sent by a router. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31)
+ /* Set when response to solicitation. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30)
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29)
+ ip6_address_t target_address;
+ /* Options that may follow: source_link_layer_address
+ (for solicitation) target_link_layer_address (for advertisement). */
+ }) icmp6_neighbor_solicitation_or_advertisement_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ u32 reserved_must_be_zero;
+ /* Better next hop to use for given destination. */
+ ip6_address_t better_next_hop_address;
+ ip6_address_t dst_address;
+ /* Options that may follow: target_link_layer_address,
+ redirected_header. */
+ }) icmp6_redirect_header_t;
+
+/* Solicitation/advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_solicitation_or_advertisement_header_t
+ neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_neighbor_solicitation_header_t;
+
+/* Router solicitation packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_discovery_header_t neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_router_solicitation_header_t;
+
+/* router advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_router_advertisement_header_t router;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ icmp6_neighbor_discovery_mtu_option_t mtu_option;
+ icmp6_neighbor_discovery_prefix_information_option_t
+ prefix[0];
+ }) icmp6_router_advertisement_packet_t;
+
+/* multicast listener report packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_multicast_listener_report_header_t report_hdr;
+ }) icmp6_multicast_listener_report_packet_t;
+
+#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
new file mode 100644
index 00000000..70696d0c
--- /dev/null
+++ b/src/vnet/ip/icmp6.c
@@ -0,0 +1,882 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp6.c: ip6 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static u8 *
+format_ip6_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp6_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp6_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp6_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_icmp6_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip6_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ if (max_header_bytes >=
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t) &&
+ (icmp->type == ICMP6_neighbor_solicitation ||
+ icmp->type == ICMP6_neighbor_advertisement))
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd =
+ (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp;
+ s = format (s, "\n target address %U",
+ format_ip6_address, &icmp6_nd->target_address);
+ }
+
+ return s;
+}
+
+u8 *
+format_icmp6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp6_input_trace_t *t = va_arg (*va, icmp6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp6_error
+#undef _
+};
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_DROP,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 input_next_index_by_type[256];
+
+ /* Max valid code indexed by icmp type. */
+ u8 max_valid_code_by_type[256];
+
+ /* hop_limit must be >= this value for this icmp type. */
+ u8 min_valid_hop_limit_by_type[256];
+
+ u8 min_valid_length_by_type[256];
+} icmp6_main_t;
+
+icmp6_main_t icmp6_main;
+
+static uword
+ip6_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp6_main_t *im = &icmp6_main;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp6_type_t type0;
+ u32 bi0, next0, error0, len0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ icmp0 = ip6_next_header (ip0);
+ type0 = icmp0->type;
+
+ error0 = ICMP6_ERROR_NONE;
+
+ next0 = im->input_next_index_by_type[type0];
+ error0 =
+ next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0;
+
+ /* Check code is valid for type. */
+ error0 =
+ icmp0->code >
+ im->max_valid_code_by_type[type0] ?
+ ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0;
+
+ /* Checksum is already validated by ip6_local node so we don't need to check that. */
+
+ /* Check that hop limit == 255 for certain types. */
+ error0 =
+ ip0->hop_limit <
+ im->min_valid_hop_limit_by_type[type0] ?
+ ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0;
+
+ len0 = clib_net_to_host_u16 (ip0->payload_length);
+ error0 =
+ len0 <
+ im->min_valid_length_by_type[type0] ?
+ ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0;
+
+ b0->error = node->errors[error0];
+
+ next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
+ .function = ip6_icmp_input,
+ .name = "ip6-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+
+ if (ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ ethernet_header_t *eth1;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p1);
+ eth1 = vlib_buffer_get_current (p1);
+ clib_memcpy (tmp_mac, eth1->dst_address, 6);
+ clib_memcpy (eth1->dst_address, eth1->src_address, 6);
+ clib_memcpy (eth1->src_address, tmp_mac, 6);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p1)->sw_if_index[VLIB_RX];
+ next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP6_ICMP_ERROR_NEXT_DROP,
+ IP6_ICMP_ERROR_NEXT_LOOKUP,
+ IP6_ICMP_ERROR_N_NEXT,
+} ip6_icmp_error_next_t;
+
+void
+icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp6_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP6_destination_unreachable:
+ return ICMP6_ERROR_DEST_UNREACH_SENT;
+ case ICMP6_packet_too_big:
+ return ICMP6_ERROR_PACKET_TOO_BIG_SENT;
+ case ICMP6_time_exceeded:
+ return ICMP6_ERROR_TTL_EXPIRE_SENT;
+ case ICMP6_parameter_problem:
+ return ICMP6_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP6_ERROR_DROP;
+ }
+}
+
+static uword
+ip6_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip6_icmp_error_next_t next_index;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP6_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ int bogus_length;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* RFC4443 says to keep as much of the original packet as possible
+ * within the minimum MTU. We cheat "a little" here by keeping whatever fits
+ * in the first buffer, to be more efficient */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ { /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 1280 ? 1280 : p0->current_length;
+
+ /* Add IP header and ICMPv6 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip6_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ out_ip0->payload_length =
+ clib_host_to_net_u16 (p0->current_length - sizeof (ip6_header_t));
+ out_ip0->protocol = IP_PROTOCOL_ICMP6;
+ out_ip0->hop_limit = 0xff;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip6_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else /* interface has no IP6 address - should not happen */
+ {
+ next0 = IP6_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP6_ERROR_DROP;
+ }
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ icmp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0,
+ &bogus_length);
+
+
+
+ /* Update error status */
+ if (error0 == ICMP6_ERROR_NONE)
+ error0 = icmp6_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
+ .function = ip6_icmp_error,
+ .name = "ip6-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup",
+ },
+
+ .format_trace = format_icmp6_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp6_main_t *cm = &icmp6_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+ int bogus_length;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+
+ icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp6_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index)
+{
+ icmp6_main_t *im = &icmp6_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->input_next_index_by_type));
+ im->input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp6_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp6_main_t *cm = &icmp6_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6);
+ pi->format_header = format_icmp6_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp6_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8));
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->input_next_index_by_type,
+ ICMP_INPUT_NEXT_DROP, sizeof (cm->input_next_index_by_type));
+ memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type));
+
+#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n);
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->min_valid_hop_limit_by_type, 0,
+ sizeof (cm->min_valid_hop_limit_by_type));
+ cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255;
+
+ memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t),
+ sizeof (cm->min_valid_length_by_type));
+ cm->min_valid_length_by_type[ICMP6_router_solicitation] =
+ sizeof (icmp6_neighbor_discovery_header_t);
+ cm->min_valid_length_by_type[ICMP6_router_advertisement] =
+ sizeof (icmp6_router_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_solicitation] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_advertisement] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_redirect] =
+ sizeof (icmp6_redirect_header_t);
+
+ icmp6_register_type (vm, ICMP6_echo_request,
+ ip6_icmp_echo_request_node.index);
+
+ return vlib_call_init_function (vm, ip6_neighbor_init);
+}
+
+VLIB_INIT_FUNCTION (icmp6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
new file mode 100644
index 00000000..9a3487b1
--- /dev/null
+++ b/src/vnet/ip/icmp6.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp6_h
+#define included_vnet_icmp6_h
+
+#define foreach_icmp6_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "neighbor solicitations from source not on link") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
+ "neighbor solicitations for unknown targets") \
+ _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
+ _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
+ _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "router solicitations from source not on link") \
+ _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
+ "neighbor discovery unsupported interface") \
+ _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
+ "neighbor discovery not configured") \
+ _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
+ "router advertisement source not link local") \
+ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
+ _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \
+ _ (DROP, "error message dropped")
+
+
+typedef enum
+{
+#define _(f,s) ICMP6_ERROR_##f,
+ foreach_icmp6_error
+#undef _
+} icmp6_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp6_input_trace_t;
+
+format_function_t format_icmp6_input_trace;
+void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type,
+ u32 node_index);
+void icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+extern vlib_node_registration_t ip6_icmp_input_node;
+
+#endif /* included_vnet_icmp6_h */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/igmp_packet.h b/src/vnet/ip/igmp_packet.h
new file mode 100644
index 00000000..503259ec
--- /dev/null
+++ b/src/vnet/ip/igmp_packet.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * igmp_packet.h: igmp packet format
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_igmp_packet_h
+#define included_vnet_igmp_packet_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_igmp_type \
+ _ (0x11, membership_query) \
+ _ (0x12, membership_report_v1) \
+ _ (0x13, dvmrp) \
+ _ (0x14, pim_v1) \
+ _ (0x15, cisco_trace) \
+ _ (0x16, membership_report_v2) \
+ _ (0x17, leave_group_v2) \
+ _ (0x1e, traceroute_response) \
+ _ (0x1f, traceroute_request) \
+ _ (0x22, membership_report_v3) \
+ _ (0x30, router_advertisement) \
+ _ (0x31, router_solicitation) \
+ _ (0x32, router_termination)
+
+typedef enum
+{
+#define _(n,f) IGMP_TYPE_##f = n,
+ foreach_igmp_type
+#undef _
+} igmp_type_t;
+
+typedef struct
+{
+ igmp_type_t type:8;
+
+ u8 code;
+
+ u16 checksum;
+} igmp_header_t;
+
+typedef struct
+{
+ /* membership_query, version <= 2 reports. */
+ igmp_header_t header;
+
+ /* Multicast destination address. */
+ ip4_address_t dst;
+} igmp_message_t;
+
+#define foreach_igmp_membership_group_v3_type \
+ _ (1, mode_is_filter_include) \
+ _ (2, mode_is_filter_exclude) \
+ _ (3, change_to_filter_include) \
+ _ (4, change_to_filter_exclude) \
+ _ (5, allow_new_sources) \
+ _ (6, block_old_sources)
+
+typedef enum
+{
+#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n,
+ foreach_igmp_membership_group_v3_type
+#undef _
+} igmp_membership_group_v3_type_t;
+
+typedef struct
+{
+ igmp_membership_group_v3_type_t type:8;
+
+ /* Number of 32 bit words of aux data after source addresses. */
+ u8 n_aux_u32s;
+
+ /* Number of source addresses that follow. */
+ u16 n_src_addresses;
+
+ /* Destination multicast address. */
+ ip4_address_t dst_address;
+
+ ip4_address_t src_addresses[0];
+} igmp_membership_group_v3_t;
+
+always_inline igmp_membership_group_v3_t *
+igmp_membership_group_v3_next (igmp_membership_group_v3_t * g)
+{
+ return ((void *) g
+ + g->n_src_addresses * sizeof (g->src_addresses[0])
+ + g->n_aux_u32s * sizeof (u32));
+}
+
+typedef struct
+{
+ /* Type 0x22. */
+ igmp_header_t header;
+
+ u16 unused;
+
+ /* Number of groups which follow. */
+ u16 n_groups;
+
+ igmp_membership_group_v3_t groups[0];
+} igmp_membership_report_v3_t;
+
+/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */
+typedef struct
+{
+ /* Preceeded by ICMP v6 header. */
+ u16 max_response_delay_in_milliseconds;
+ u16 reserved;
+ ip6_address_t dst;
+} mld_header_t;
+
+#endif /* included_vnet_igmp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
new file mode 100644
index 00000000..f26d7943
--- /dev/null
+++ b/src/vnet/ip/ip.api
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpp IP control-plane API messages which are generally
+ called through a shared memory interface.
+*/
+
+/** \brief Add / del table request
+ A table can be added multiple times, but need be deleted only once.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - V4 or V6 table
+ @param table_id - table ID associated with the route
+ This table ID will apply to both the unicats
+ and mlticast FIBs
+ @param name - A client provided name/tag for the table. If this is
+ not set by the client, then VPP will generate something
+ meaningfull.
+*/
+autoreply define ip_table_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ u8 is_ipv6;
+ u8 is_add;
+ u8 name[64];
+};
+
+/** \brief Dump IP fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief FIB path
+ @param sw_if_index - index of the interface
+ @param weight - The weight, for UCMP
+ @param preference - The preference of the path. lowest preference is prefered
+ @param is_local - local if non-zero, else remote
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+ @param next_hop[16] - the next hop address
+
+ WARNING: this type is replicated, pending cleanup completion
+*/
+typeonly manual_print manual_endian define fib_path
+{
+ u32 sw_if_index;
+ u8 weight;
+ u8 preference;
+ u8 is_local;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 afi;
+ u8 next_hop[16];
+};
+
+/** \brief IP FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @address - ip4 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 address_length;
+ u8 address[4];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP6 fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip6_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP6 FIB table entry response
+ @param table_id - IP6 fib table id
+ @param address_length - mask length
+ @param address - ip6 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip6_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 address_length;
+ u8 address[16];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP neighboors
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface to dump neighboors
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+/** \brief IP neighboors dump response
+ @param context - sender context which was passed in the request
+ @param is_static - [1|0] to indicate if neighbor is statically configured
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_details {
+ u32 context;
+ u8 is_static;
+ u8 is_ipv6;
+ u8 mac_address[6];
+ u8 ip_address[16];
+};
+
+/** \brief IP neighbor add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_add - 1 to add neighbor, 0 to delete
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param is_static - A static neighbor Entry - there are not flushed
+ If the interface goes down.
+ @param is_no_adj_fib - Do not create a corresponding entry in the FIB
+ table for the neighbor.
+ @param mac_address - l2 address of the neighbor
+ @param dst_address - ip4 or ip6 address of the neighbor
+*/
+autoreply define ip_neighbor_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = add, 0 = delete */
+ u8 is_add;
+ u8 is_ipv6;
+ u8 is_static;
+ u8 is_no_adj_fib;
+ u8 mac_address[6];
+ u8 dst_address[16];
+};
+
+/** \brief Set the ip flow hash config for a fib request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf/fib id
+ @param is_ipv6 - if non-zero the fib is ip6, else ip4
+ @param src - if non-zero include src in flow hash
+ @param dst - if non-zero include dst in flow hash
+ @param sport - if non-zero include sport in flow hash
+ @param dport - if non-zero include dport in flow hash
+ @param proto -if non-zero include proto in flow hash
+ @param reverse - if non-zero include reverse in flow hash
+*/
+autoreply define set_ip_flow_hash
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 is_ipv6;
+ u8 src;
+ u8 dst;
+ u8 sport;
+ u8 dport;
+ u8 proto;
+ u8 reverse;
+};
+
+/** \brief IPv6 router advertisement config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param suppress -
+ @param managed -
+ @param other -
+ @param ll_option -
+ @param send_unicast -
+ @param cease -
+ @param is_no -
+ @param default_router -
+ @param max_interval -
+ @param min_interval -
+ @param lifetime -
+ @param initial_count -
+ @param initial_interval -
+*/
+autoreply define sw_interface_ip6nd_ra_config
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 suppress;
+ u8 managed;
+ u8 other;
+ u8 ll_option;
+ u8 send_unicast;
+ u8 cease;
+ u8 is_no;
+ u8 default_router;
+ u32 max_interval;
+ u32 min_interval;
+ u32 lifetime;
+ u32 initial_count;
+ u32 initial_interval;
+};
+
+/** \brief IPv6 router advertisement prefix config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - The interface the RA prefix information is for
+ @param address[] - The prefix to advertise
+ @param address_length - the prefix length
+ @param use_default - Revert to default settings
+ @param no_advertise - Do not advertise this prefix
+ @param off_link - The prefix is off link (it is not configured on the interface)
+ Configures the L-flag, When set, indicates that this
+ prefix can be used for on-link determination.
+ @param no_autoconfig - Setting for the A-flag. When
+ set indicates that this prefix can be used for
+ stateless address configuration.
+ @param no_onlink - The prefix is not on link. Make sure this is consistent
+ with the off_link parameter else YMMV
+ @param is_no - add/delete
+ @param val_lifetime - The length of time in
+ seconds (relative to the time the packet is sent)
+ that the prefix is valid for the purpose of on-link
+ determination. A value of all one bits
+ (0xffffffff) represents infinity
+ @param pref_lifetime - The length of time in
+ seconds (relative to the time the packet is sent)
+ that addresses generated from the prefix via
+ stateless address autoconfiguration remain
+ preferred [ADDRCONF]. A value of all one bits
+ (0xffffffff) represents infinity.
+*/
+autoreply define sw_interface_ip6nd_ra_prefix
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+ u8 address_length;
+ u8 use_default;
+ u8 no_advertise;
+ u8 off_link;
+ u8 no_autoconfig;
+ u8 no_onlink;
+ u8 is_no;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+};
+
+/** \brief IPv6 ND proxy config
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - The interface the host is on
+ @param address - The address of the host for which to proxy for
+ @param is_add - Adding or deleting
+*/
+autoreply define ip6nd_proxy_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_del;
+ u8 address[16];
+};
+
+/** \brief IPv6 ND proxy details returned after request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define ip6nd_proxy_details
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+};
+
+/** \brief IPv6 ND proxy dump request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - The interface the host is on
+ @param address - The address of the host for which to proxy for
+*/
+define ip6nd_proxy_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IPv6 interface enable / disable request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param enable - if non-zero enable ip6 on interface, else disable
+*/
+autoreply define sw_interface_ip6_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable; /* set to true if enable */
+};
+
+/** \brief IPv6 set link local address on interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set link local on
+ @param address[] - the new link local address
+*/
+autoreply define sw_interface_ip6_set_link_local_address
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+};
+
+/** \brief Add / del route request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vrf_id - fib table /vrf associated with the route
+ @param lookup_in_vrf -
+ @param classify_table_index -
+ @param create_vrf_if_needed -
+ @param is_add - 1 if adding the route, 0 if deleting
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param is_ipv6 - 0 if an ip4 route, else ip6
+ @param is_local -
+ @param is_classify -
+ @param is_multipath - Set to 1 if this is a multipath route, else 0
+ @param not_last - Is last or not last msg in group of multiple add/del msgs
+ @param next_hop_weight -
+ @param dst_address_length -
+ @param dst_address[16] -
+ @param next_hop_address[16] -
+ @param next_hop_n_out_labels - the number of labels in the label stack
+ @param next_hop_out_label_stack - the next-hop output label stack, outer most first
+ @param next_hop_via_label - The next-hop is a resolved via a local label
+*/
+autoreply define ip_add_del_route
+{
+ u32 client_index;
+ u32 context;
+ u32 next_hop_sw_if_index;
+ u32 table_id;
+ u32 classify_table_index;
+ u32 next_hop_table_id;
+ u8 create_vrf_if_needed;
+ u8 is_add;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 is_ipv6;
+ u8 is_local;
+ u8 is_classify;
+ u8 is_multipath;
+ u8 is_resolve_host;
+ u8 is_resolve_attached;
+ /* Is last/not-last message in group of multiple add/del messages. */
+ u8 not_last;
+ u8 next_hop_weight;
+ u8 next_hop_preference;
+ u8 dst_address_length;
+ u8 dst_address[16];
+ u8 next_hop_address[16];
+ u8 next_hop_n_out_labels;
+ u32 next_hop_via_label;
+ u32 next_hop_out_label_stack[next_hop_n_out_labels];
+};
+
+/** \brief Add / del route request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vrf_id - fib table /vrf associated with the route
+
+ FIXME
+*/
+autoreply define ip_mroute_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 next_hop_sw_if_index;
+ u32 table_id;
+ u32 entry_flags;
+ u32 itf_flags;
+ u32 rpf_id;
+ u16 grp_address_length;
+ u8 create_vrf_if_needed;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 is_local;
+ u8 grp_address[16];
+ u8 src_address[16];
+};
+
+/** \brief Dump IP multicast fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip_mfib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP Multicast FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @grp_address - Group address/prefix
+ @src_address - Source address
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip_mfib_details
+{
+ u32 context;
+ u32 table_id;
+ u32 entry_flags;
+ u32 rpf_id;
+ u8 address_length;
+ u8 grp_address[4];
+ u8 src_address[4];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP6 multicast fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip6_mfib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP6 Multicast FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @grp_address - Group address/prefix
+ @src_address - Source address
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip6_mfib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 address_length;
+ u8 grp_address[16];
+ u8 src_address[16];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+define ip_address_details
+{
+ u32 client_index;
+ u32 context;
+ u8 ip[16];
+ u8 prefix_length;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+define ip_address_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+define ip_details
+{
+ u32 sw_if_index;
+ u32 context;
+ u8 is_ipv6;
+};
+
+define ip_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+};
+
+define mfib_signal_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define mfib_signal_details
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 table_id;
+ u16 grp_address_len;
+ u8 grp_address[16];
+ u8 src_address[16];
+ u16 ip_packet_len;
+ u8 ip_packet_data[256];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
new file mode 100644
index 00000000..7e26bc6c
--- /dev/null
+++ b/src/vnet/ip/ip.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip.h: ip generic (4 or 6) main
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_main_h
+#define included_ip_main_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/heap.h> /* adjacency heap */
+#include <vppinfra/ptclosure.h>
+
+#include <vnet/vnet.h>
+
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/lookup.h>
+
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/ip/icmp46_packet.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_error.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/icmp4.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_error.h>
+#include <vnet/ip/icmp6.h>
+#include <vnet/classify/vnet_classify.h>
+
+/* Per protocol info. */
+typedef struct
+{
+ /* Protocol name (also used as hash key). */
+ u8 *name;
+
+ /* Protocol number. */
+ ip_protocol_t protocol;
+
+ /* Format function for this IP protocol. */
+ format_function_t *format_header;
+
+ /* Parser for header. */
+ unformat_function_t *unformat_header;
+
+ /* Parser for per-protocol matches. */
+ unformat_function_t *unformat_match;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} ip_protocol_info_t;
+
+/* Per TCP/UDP port info. */
+typedef struct
+{
+ /* Port name (used as hash key). */
+ u8 *name;
+
+ /* UDP/TCP port number in network byte order. */
+ u16 port;
+
+ /* Port specific format function. */
+ format_function_t *format_header;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} tcp_udp_port_info_t;
+
+typedef struct
+{
+ /* Per IP protocol info. */
+ ip_protocol_info_t *protocol_infos;
+
+ /* Protocol info index hashed by 8 bit IP protocol. */
+ uword *protocol_info_by_protocol;
+
+ /* Hash table mapping IP protocol name (see protocols.def)
+ to protocol number. */
+ uword *protocol_info_by_name;
+
+ /* Per TCP/UDP port info. */
+ tcp_udp_port_info_t *port_infos;
+
+ /* Hash table from network-byte-order port to port info index. */
+ uword *port_info_by_port;
+
+ /* Hash table mapping TCP/UDP name to port info index. */
+ uword *port_info_by_name;
+} ip_main_t;
+
+extern ip_main_t ip_main;
+
+clib_error_t *ip_main_init (vlib_main_t * vm);
+
+static inline ip_protocol_info_t *
+ip_get_protocol_info (ip_main_t * im, u32 protocol)
+{
+ uword *p;
+
+ p = hash_get (im->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0;
+}
+
+static inline tcp_udp_port_info_t *
+ip_get_tcp_udp_port_info (ip_main_t * im, u32 port)
+{
+ uword *p;
+
+ p = hash_get (im->port_info_by_port, port);
+ return p ? vec_elt_at_index (im->port_infos, p[0]) : 0;
+}
+
+always_inline ip_csum_t
+ip_incremental_checksum_buffer (vlib_main_t * vm,
+ vlib_buffer_t * first_buffer,
+ u32 first_buffer_offset,
+ u32 n_bytes_to_checksum, ip_csum_t sum)
+{
+ vlib_buffer_t *b = first_buffer;
+ u32 n_bytes_left = n_bytes_to_checksum;
+ ASSERT (b->current_length >= first_buffer_offset);
+ void *h;
+ u32 n;
+
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b) + first_buffer_offset;
+ sum = ip_incremental_checksum (sum, h, n);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ while (1)
+ {
+ n_bytes_left -= n;
+ if (n_bytes_left == 0)
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b);
+ sum = ip_incremental_checksum (sum, h, n);
+ }
+ }
+
+ return sum;
+}
+
+void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
+
+extern vlib_node_registration_t ip4_inacl_node;
+extern vlib_node_registration_t ip6_inacl_node;
+
+void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
+ const u8 * name);
+
+void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
+
+int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index,
+ u32 table_id, u8 is_api);
+
+#endif /* included_ip_main_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
new file mode 100644
index 00000000..af0e6b9a
--- /dev/null
+++ b/src/vnet/ip/ip4.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4.h: ip4 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_h
+#define included_ip_ip4_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/buffer.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/icmp46_packet.h>
+
+typedef struct ip4_mfib_t
+{
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[65];
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+} ip4_mfib_t;
+
+struct ip4_main_t;
+
+typedef void (ip4_add_del_interface_address_function_t)
+ (struct ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip4_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip4_add_del_interface_address_callback_t;
+
+typedef void (ip4_table_bind_function_t)
+ (struct ip4_main_t * im,
+ uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+ ip4_table_bind_function_t *function;
+ uword function_opaque;
+} ip4_table_bind_callback_t;
+
+/**
+ * @brief IPv4 main type.
+ *
+ * State of IPv4 VPP processing including:
+ * - FIBs
+ * - Feature indices used in feature topological sort
+ * - Feature node run time references
+ */
+
+typedef struct ip4_main_t
+{
+ ip_lookup_main_t lookup_main;
+
+ /** Vector of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ /** Vector of MTries. */
+ struct ip4_fib_t_ *v4_fibs;
+
+ /** Vector of MFIBs. */
+ struct mfib_table_t_ *mfibs;
+
+ u32 fib_masks[33];
+
+ /** Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /** Table index indexed by software interface. */
+ u32 *mfib_index_by_sw_if_index;
+
+ /* IP4 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /** Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /** Hash table mapping table id to multicast fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *mfib_index_by_table_id;
+
+ /** Functions to call when interface address changes. */
+ ip4_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /** Functions to call when interface to table biding changes. */
+ ip4_table_bind_callback_t *table_bind_callbacks;
+
+ /** Template used to generate IP4 ARP packets. */
+ vlib_packet_template_t ip4_arp_request_packet_template;
+
+ /** Seed for Jenkins hash used to compute ip4 flow hash. */
+ u32 flow_hash_seed;
+
+ /** @brief Template information for VPP generated packets */
+ struct
+ {
+ /** TTL to use for host generated packets. */
+ u8 ttl;
+
+ /** TOS byte to use for host generated packets. */
+ u8 tos;
+
+ u8 pad[2];
+ } host_config;
+} ip4_main_t;
+
+/** Global ip4 main structure. */
+extern ip4_main_t ip4_main;
+
+/** Global ip4 input node. Errors get attached to ip4 input node. */
+extern vlib_node_registration_t ip4_input_node;
+extern vlib_node_registration_t ip4_lookup_node;
+extern vlib_node_registration_t ip4_local_node;
+extern vlib_node_registration_t ip4_rewrite_node;
+extern vlib_node_registration_t ip4_rewrite_mcast_node;
+extern vlib_node_registration_t ip4_rewrite_local_node;
+extern vlib_node_registration_t ip4_arp_node;
+extern vlib_node_registration_t ip4_glean_node;
+extern vlib_node_registration_t ip4_midchain_node;
+
+always_inline uword
+ip4_destination_matches_route (const ip4_main_t * im,
+ const ip4_address_t * key,
+ const ip4_address_t * dest, uword dest_length)
+{
+ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline uword
+ip4_destination_matches_interface (ip4_main_t * im,
+ ip4_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip4_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip4_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip4_unaligned_destination_matches_route (ip4_main_t * im,
+ ip4_address_t * key,
+ ip4_address_t * dest,
+ uword dest_length)
+{
+ return 0 ==
+ ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->
+ data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline int
+ip4_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip4_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return 0;
+ }
+ else
+ {
+ src->as_u32 = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip4_address_t *
+ip4_interface_address_matching_destination (ip4_main_t * im,
+ ip4_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip4_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+ip4_address_t *ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia);
+
+clib_error_t *ip4_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del);
+
+void ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst,
+ u32 sw_if_index);
+
+clib_error_t *ip4_set_arp_limit (u32 arp_limit);
+
+uword
+ip4_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index);
+
+u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0);
+
+void ip4_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
+
+int vnet_set_ip4_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+
+/* Compute flow hash. We'll use it to select which adjacency to use for this
+ flow. And other things. */
+always_inline u32
+ip4_compute_flow_hash (const ip4_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp = (void *) (ip + 1);
+ u32 a, b, c, t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
+ ? ip->src_address.data_u32 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR)
+ ? ip->dst_address.data_u32 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ (t1 << 16) | t2 : (t2 << 16) | t1;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c;
+}
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+u8 *format_ip4_forward_next_trace (u8 * s, va_list * args);
+
+u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
+
+#define IP_DF 0x4000 /* don't fragment */
+
+/**
+ * Push IPv4 header to buffer
+ *
+ * This does not support fragmentation.
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
+ ip4_address_t * src, ip4_address_t * dst, int proto,
+ u8 csum_offload)
+{
+ ip4_header_t *ih;
+
+ /* make some room */
+ ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
+
+ ih->ip_version_and_header_length = 0x45;
+ ih->tos = 0;
+ ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
+
+ /* No fragments */
+ ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF);
+ ih->ttl = 255;
+ ih->protocol = proto;
+ ih->src_address.as_u32 = src->as_u32;
+ ih->dst_address.as_u32 = dst->as_u32;
+
+ /* Offload ip4 header checksum generation */
+ if (csum_offload)
+ {
+ ih->checksum = 0;
+ b->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4;
+ vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data;
+ vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset +
+ sizeof (*ih);
+ }
+ else
+ ih->checksum = ip4_header_checksum (ih);
+
+ return ih;
+}
+#endif /* included_ip_ip4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
new file mode 100644
index 00000000..668c6506
--- /dev/null
+++ b/src/vnet/ip/ip46_cli.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_cli.c: ip4 commands
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/**
+ * @file
+ * @brief Set IP Address.
+ *
+ * Configure an IPv4 or IPv6 address for on an interface.
+ */
+
+
+int
+ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2)
+{
+ return clib_net_to_host_u32 (a1->data_u32) -
+ clib_net_to_host_u32 (a2->data_u32);
+}
+
+int
+ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp =
+ clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
+ .path = "set interface ip",
+ .short_help = "IP4/IP6 commands",
+};
+/* *INDENT-ON* */
+
+void
+ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip4_address_t *ip4_addrs = 0;
+ u32 *ip4_masks = 0;
+ ip6_main_t *im6 = &ip6_main;
+ ip6_address_t *ip6_addrs = 0;
+ u32 *ip6_masks = 0;
+ ip_interface_address_t *ia;
+ int i;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = (ip4_address_t *)
+ ip_interface_address_get_address (&im4->lookup_main, ia);
+ vec_add1 (ip4_addrs, x[0]);
+ vec_add1 (ip4_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip6_address_t * x = (ip6_address_t *)
+ ip_interface_address_get_address (&im6->lookup_main, ia);
+ vec_add1 (ip6_addrs, x[0]);
+ vec_add1 (ip6_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (ip4_addrs); i++)
+ ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
+ ip4_masks[i], 1 /* is_del */ );
+ for (i = 0; i < vec_len (ip6_addrs); i++)
+ ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i],
+ ip6_masks[i], 1 /* is_del */ );
+
+ vec_free (ip4_addrs);
+ vec_free (ip4_masks);
+ vec_free (ip6_addrs);
+ vec_free (ip6_masks);
+}
+
+static clib_error_t *
+ip_address_delete_cleanup (vnet_main_t * vnm, u32 hw_if_index, u32 is_create)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw;
+
+ if (is_create)
+ return 0;
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ip_del_all_interface_addresses (vm, hw->sw_if_index);
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION (ip_address_delete_cleanup);
+
+static clib_error_t *
+add_del_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index, length, is_del;
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (unformat (input, "del"))
+ is_del = 1;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (is_del && unformat (input, "all"))
+ ip_del_all_interface_addresses (vm, sw_if_index);
+ else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length))
+ error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length,
+ is_del);
+ else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length))
+ error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length,
+ is_del);
+ else
+ {
+ error = clib_error_return (0, "expected IP4/IP6 address/length `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+done:
+ return error;
+}
+
+/*?
+ * Add an IP Address to an interface or remove and IP Address from an interface.
+ * The IP Address can be an IPv4 or an IPv6 address. Interfaces may have multiple
+ * IPv4 and IPv6 addresses. There is no concept of primary vs. secondary
+ * interface addresses; they're just addresses.
+ *
+ * To display the addresses associated with a given interface, use the command
+ * '<em>show interface address <interface></em>'.
+ *
+ * Note that the debug CLI does not enforce classful mask-width / addressing
+ * constraints.
+ *
+ * @cliexpar
+ * @parblock
+ * An example of how to add an IPv4 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24}
+ *
+ * An example of how to add an IPv6 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 @::a:1:1:0:7/126}
+ *
+ * To delete a specific interface ip address:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24 del}
+ *
+ * To delete all interfaces addresses (IPv4 and IPv6):
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 del all}
+ * @endparblock
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
+ .path = "set interface ip address",
+ .function = add_del_ip_address,
+ .short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
+};
+/* *INDENT-ON* */
+
+/* Dummy init function to get us linked in. */
+static clib_error_t *
+ip4_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
new file mode 100644
index 00000000..95d12ec2
--- /dev/null
+++ b/src/vnet/ip/ip4_error.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_error.h: ip4 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_error_h
+#define included_ip_ip4_error_h
+
+#define foreach_ip4_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip4 packets") \
+ \
+ /* Errors signalled by ip4-input */ \
+ _ (TOO_SHORT, "ip4 length < 20 bytes") \
+ _ (BAD_LENGTH, "ip4 length > l2 length") \
+ _ (BAD_CHECKSUM, "bad ip4 checksum") \
+ _ (VERSION, "ip4 version != 4") \
+ _ (OPTIONS, "ip4 options present") \
+ _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
+ _ (TIME_EXPIRED, "ip4 ttl <= 1") \
+ \
+ /* Errors signalled by ip4-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
+ _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip4 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip4 adjacency punt") \
+ \
+ /* Errors signalled by ip4-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (TCP_CHECKSUM, "bad tcp checksum") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by ip4-source-check. */ \
+ _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \
+ \
+ /* Spoofed packets in ip4-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Errors singalled by ip4-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP4_ERROR_##sym,
+ foreach_ip4_error
+#undef _
+ IP4_N_ERROR,
+} ip4_error_t;
+
+#endif /* included_ip_ip4_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_format.c b/src/vnet/ip/ip4_format.c
new file mode 100644
index 00000000..c803e065
--- /dev/null
+++ b/src/vnet/ip/ip4_format.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_format.c: ip4 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP4 address. */
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP4 route destination and length. */
+u8 *
+format_ip4_address_and_length (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip4_address, a, l);
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+/* Format an IP4 header. */
+u8 *
+format_ip4_header (u8 * s, va_list * args)
+{
+ ip4_header_t *ip = va_arg (*args, ip4_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 ip_version, header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ ip_version = (ip->ip_version_and_header_length >> 4);
+ header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32);
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip4_address, ip->src_address.data,
+ format_ip4_address, ip->dst_address.data);
+
+ /* Show IP version and header length only with unexpected values. */
+ if (ip_version != 4 || header_bytes != sizeof (ip4_header_t))
+ s = format (s, "\n%Uversion %d, header length %d",
+ format_white_space, indent, ip_version, header_bytes);
+
+ s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x",
+ format_white_space, indent,
+ ip->tos, ip->ttl,
+ clib_net_to_host_u16 (ip->length),
+ clib_net_to_host_u16 (ip->checksum));
+
+ /* Check and report invalid checksums. */
+ {
+ u16 c = ip4_header_checksum (ip);
+ if (c != ip->checksum)
+ s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c));
+ }
+
+ {
+ u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset);
+ u32 o;
+
+ s = format (s, "\n%Ufragment id 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (ip->fragment_id));
+
+ /* Fragment offset. */
+ o = 8 * (f & 0x1fff);
+ f ^= o;
+ if (o != 0)
+ s = format (s, " offset %d", o);
+
+ if (f != 0)
+ {
+ s = format (s, ", flags ");
+#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l);
+ _(MORE_FRAGMENTS);
+ _(DONT_FRAGMENT);
+ _(CONGESTION);
+#undef _
+ }
+ }
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) ip + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+/* Parse an IP4 header. */
+uword
+unformat_ip4_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip4_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip4_address, &ip->src_address,
+ unformat_ip4_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i, j;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->tos = i;
+
+ else if (unformat (input, "ttl %U", unformat_vlib_number, &i))
+ ip->ttl = i;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_vlib_number, &i, unformat_vlib_number, &j))
+ {
+ ip->fragment_id = clib_host_to_net_u16 (i);
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 ((i / 8) & 0x1fff);
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Fill in checksum. */
+ ip->checksum = ip4_header_checksum (ip);
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ /* Fill in IP length. */
+ ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length);
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
new file mode 100755
index 00000000..6b3453b5
--- /dev/null
+++ b/src/vnet/ip/ip4_forward.c
@@ -0,0 +1,3197 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_forward.c: IP v4 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
+#include <vnet/ppp/ppp.h>
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
+
+/**
+ * @file
+ * @brief IPv4 Forwarding.
+ *
+ * This file contains the source code for IPv4 forwarding.
+ */
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip4_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int lookup_for_responses_to_locally_received_packets)
+{
+ ip4_main_t *im = &ip4_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ vlib_buffer_t *p0, *p1, *p2, *p3;
+ ip4_header_t *ip0, *ip1, *ip2, *ip3;
+ ip_lookup_next_t next0, next1, next2, next3;
+ const load_balance_t *lb0, *lb1, *lb2, *lb3;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
+ ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
+ u32 pi0, fib_index0, lb_index0;
+ u32 pi1, fib_index1, lb_index1;
+ u32 pi2, fib_index2, lb_index2;
+ u32 pi3, fib_index3, lb_index3;
+ flow_hash_config_t flow_hash_config0, flow_hash_config1;
+ flow_hash_config_t flow_hash_config2, flow_hash_config3;
+ u32 hash_c0, hash_c1, hash_c2, hash_c3;
+ const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ pi2 = to_next[2] = from[2];
+ pi3 = to_next[3] = from[3];
+
+ from += 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+ n_left_from -= 4;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ p2 = vlib_get_buffer (vm, pi2);
+ p3 = vlib_get_buffer (vm, pi3);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ ip2 = vlib_buffer_get_current (p2);
+ ip3 = vlib_buffer_get_current (p3);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+ dst_addr2 = &ip2->dst_address;
+ dst_addr3 = &ip3->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index2 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p2)->sw_if_index[VLIB_RX]);
+ fib_index3 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p3)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+ fib_index2 =
+ (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
+ fib_index3 =
+ (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
+
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+ mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
+ mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
+ leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
+ leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
+ }
+
+ if (lookup_for_responses_to_locally_received_packets)
+ {
+ lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
+ lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
+ lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
+ }
+ else
+ {
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
+ lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
+ }
+
+ ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+ lb2 = load_balance_get (lb_index2);
+ lb3 = load_balance_get (lb_index3);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, flow_hash_config1);
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (hash_c1 &
+ (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
+ {
+ flow_hash_config2 = lb2->lb_hash_config;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash =
+ ip4_compute_flow_hash (ip2, flow_hash_config2);
+ dpo2 =
+ load_balance_get_fwd_bucket (lb2,
+ (hash_c2 &
+ (lb2->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo2 = load_balance_get_bucket_i (lb2, 0);
+ }
+ if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
+ {
+ flow_hash_config3 = lb3->lb_hash_config;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash =
+ ip4_compute_flow_hash (ip3, flow_hash_config3);
+ dpo3 =
+ load_balance_get_fwd_bucket (lb3,
+ (hash_c3 &
+ (lb3->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo3 = load_balance_get_bucket_i (lb3, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ next2 = dpo2->dpoi_next_node;
+ vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ next3 = dpo3->dpoi_next_node;
+ vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index2, 1,
+ vlib_buffer_length_in_chain (vm, p2));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index3, 1,
+ vlib_buffer_length_in_chain (vm, p3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, pi2, pi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_address_t *dst_addr0;
+ u32 pi0, fib_index0, lbi0;
+ flow_hash_config_t flow_hash_config0;
+ const dpo_id_t *dpo0;
+ u32 hash_c0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+
+ if (lookup_for_responses_to_locally_received_packets)
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ else
+ {
+ /* Handle default route. */
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ }
+
+ ASSERT (lbi0);
+ lb0 = load_balance_get (lbi0);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm,
+ p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+/** @brief IPv4 lookup node.
+ @node ip4-lookup
+
+ This is the main IPv4 lookup dispatch node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ - When the value is @c ~0 then the node performs a longest prefix
+ match (LPM) for the packet destination address in the FIB attached
+ to the receive interface.
+ - Otherwise perform LPM for the packet destination address in the
+ indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
+ value (0, 1, ...) and not a VRF id.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the node index found in
+ ip_adjacency_t @c adj->lookup_next_index
+ (where @c adj is the lookup result adjacency).
+*/
+static uword
+ip4_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_lookup_inline (vm, node, frame,
+ /* lookup_for_responses_to_locally_received_packets */
+ 0);
+
+}
+
+static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
+
+VLIB_REGISTER_NODE (ip4_lookup_node) =
+{
+.function = ip4_lookup,.name = "ip4-lookup",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
+ IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
+
+always_inline uword
+ip4_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip4_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket
+ (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip4_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_load_balance_node) =
+{
+.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
+ sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
+ format_ip4_lookup_trace,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
+
+/* get first interface address */
+ip4_address_t *
+ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address
+ (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ ip4_address_t * a =
+ ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-OFF* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+static void
+ip4_add_interface_routes (u32 sw_if_index,
+ ip4_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip4_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len <= 30)
+ {
+ /* a /30 or shorter - add a glean for the network address */
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ /* No next-hop address */
+ NULL,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1,
+ // no out-label stack
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /* Add the two broadcast addresses as drop */
+ fib_prefix_t net_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
+ };
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_add(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_DROP |
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+ net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_add(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_DROP |
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+ }
+ else if (pfx.fp_len == 31)
+ {
+ u32 mask = clib_host_to_net_u32(1);
+ fib_prefix_t net_pfx = pfx;
+
+ net_pfx.fp_len = 32;
+ net_pfx.fp_addr.ip4.as_u32 ^= mask;
+
+ /* a /31 - add the other end as an attached host */
+ fib_table_entry_update_one_path (fib_index, &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ &net_pfx.fp_addr,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ pfx.fp_len = 32;
+
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ &pfx.fp_addr,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip4_del_interface_routes (ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len <= 30)
+ {
+ fib_prefix_t net_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
+ };
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_remove(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE);
+ net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_remove(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE);
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+ }
+ else if (pfx.fp_len == 31)
+ {
+ u32 mask = clib_host_to_net_u32(1);
+ fib_prefix_t net_pfx = pfx;
+
+ net_pfx.fp_len = 32;
+ net_pfx.fp_addr.ip4.as_u32 ^= mask;
+
+ fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
+ }
+
+ pfx.fp_len = 32;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip4_main_t *im = &ip4_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
+ sw_if_index, !is_enable, 0, 0);
+}
+
+static clib_error_t *
+ip4_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error = 0;
+ u32 if_address_index, elts_before;
+ ip4_address_fib_t ip4_af, *addr_fib = 0;
+
+ /* local0 interface doesn't support IP addressing */
+ if (sw_if_index == 0)
+ {
+ return
+ clib_error_create ("local0 interface doesn't support IP addressing");
+ }
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip4_addr_fib_init (&ip4_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip4_af);
+
+ /* FIXME-LATER
+ * there is no support for adj-fib handling in the presence of overlapping
+ * subnets on interfaces. Easy fix - disallow overlapping subnets, like
+ * most routers do.
+ */
+ /* *INDENT-OFF* */
+ if (!is_del)
+ {
+ /* When adding an address check that it does not conflict
+ with an existing address. */
+ ip_interface_address_t *ia;
+ foreach_ip_interface_address
+ (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */ ,
+ ({
+ ip4_address_t * x =
+ ip_interface_address_get_address
+ (&im->lookup_main, ia);
+ if (ip4_destination_matches_route
+ (im, address, x, ia->address_length) ||
+ ip4_destination_matches_route (im,
+ x,
+ address,
+ address_length))
+ return
+ clib_error_create
+ ("failed to add %U which conflicts with %U for interface %U",
+ format_ip4_address_and_length, address,
+ address_length,
+ format_ip4_address_and_length, x,
+ ia->address_length,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }));
+ }
+ /* *INDENT-ON* */
+
+ elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ ip4_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
+
+ /* If pool did not grow/shrink: add duplicate address. */
+ if (elts_before != pool_elts (lm->if_address_pool))
+ {
+ ip4_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip4_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ return ip4_add_del_interface_address_internal
+ (vm, sw_if_index, address, address_length, is_del);
+}
+
+/* Built-in ip4 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
+{
+ .arc_name = "ip4-unicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_flow_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-flow-classify",
+ .runs_before = VNET_FEATURES ("ip4-inacl"),
+};
+
+VNET_FEATURE_INIT (ip4_inacl, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-inacl",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_1, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-rx",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_2, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-any",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-and-port-range-check-rx",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_policer_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ipsec-input-ip4",
+ .runs_before = VNET_FEATURES ("vpath-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_vpath, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-vxlan-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_drop, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-drop",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-lookup",
+ .runs_before = 0, /* not before any other features */
+};
+
+/* Built-in ip4 multicast rx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
+{
+ .arc_name = "ip4-multicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_vpath_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_mc_drop, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-drop",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-mfib-forward-lookup",
+ .runs_before = 0, /* last feature */
+};
+
+/* Source and port-range check ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_output, static) =
+{
+ .arc_name = "ip4-output",
+ .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
+ .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ip4-source-and-port-range-check-tx",
+ .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ipsec-output-ip4",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_INIT (ip4_interface_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ ip4_main_t *im = &ip4_main;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ if (!is_add)
+ {
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *address;
+ vlib_main_t *vm = vlib_get_main ();
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
+ ({
+ address = ip_interface_address_get_address (lm4, ia);
+ ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
+
+/* Global IP4 main. */
+ip4_main_t ip4_main;
+
+clib_error_t *
+ip4_lookup_init (vlib_main_t * vm)
+{
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 m;
+
+ if (i < 32)
+ m = pow2_mask (i) << (32 - i);
+ else
+ m = ~0;
+ im->fib_masks[i] = clib_host_to_net_u32 (m);
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+ FIB_SOURCE_DEFAULT_ROUTE);
+ mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+ MFIB_SOURCE_DEFAULT_ROUTE);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ {
+ ethernet_arp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Set target ethernet address to all zeros. */
+ memset (h.ip4_over_ethernet[1].ethernet, 0,
+ sizeof (h.ip4_over_ethernet[1].ethernet));
+
+#define _16(f,v) h.f = clib_host_to_net_u16 (v);
+#define _8(f,v) h.f = v;
+ _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ _16 (l3_type, ETHERNET_TYPE_IP4);
+ _8 (n_l2_address_bytes, 6);
+ _8 (n_l3_address_bytes, 4);
+ _16 (opcode, ETHERNET_ARP_OPCODE_request);
+#undef _16
+#undef _8
+
+ vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "ip4 arp");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_lookup_init);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 dpo_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1 * sizeof (u32)];
+}
+ip4_forward_next_trace_t;
+
+u8 *
+format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, format_ip_adjacency,
+ t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->dpo_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip4_main_t *im = &ip4_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip4_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip4_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip4_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip4_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip4_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_drop_node, static) =
+{
+ .function = ip4_drop,
+ .name = "ip4-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
+
+VLIB_REGISTER_NODE (ip4_punt_node, static) =
+{
+ .function = ip4_punt,
+ .name = "ip4-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
+/* *INDENT-ON */
+
+/* Compute TCP/UDP/ICMP4 checksum in software. */
+u16
+ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0)
+{
+ ip_csum_t sum0;
+ u32 ip_header_length, payload_length_host_byte_order;
+ u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
+ u16 sum16;
+ void *data_this_buffer;
+
+ /* Initialize checksum with ip header. */
+ ip_header_length = ip4_header_bytes (ip0);
+ payload_length_host_byte_order =
+ clib_net_to_host_u16 (ip0->length) - ip_header_length;
+ sum0 =
+ clib_host_to_net_u32 (payload_length_host_byte_order +
+ (ip0->protocol << 16));
+
+ if (BITS (uword) == 32)
+ {
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->src_address, u32));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 =
+ ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ data_this_buffer = (void *) ip0 + ip_header_length;
+ n_ip_bytes_this_buffer = p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
+ if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
+ {
+ n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
+ n_ip_bytes_this_buffer - ip_header_length : 0;
+ }
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
+
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_local) =
+{
+ .arc_name = "ip4-local",
+ .start_nodes = VNET_FEATURES ("ip4-local"),
+};
+/* *INDENT-ON* */
+
+static inline void
+ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
+ u8 is_udp, u8 * error, u8 * good_tcp_udp)
+{
+ u32 flags0;
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p);
+ *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ if (is_udp)
+ {
+ udp_header_t *udp;
+ u32 ip_len, udp_len;
+ i32 len_diff;
+ udp = ip4_next_header (ip);
+ /* Verify UDP length. */
+ ip_len = clib_net_to_host_u16 (ip->length);
+ udp_len = clib_net_to_host_u16 (udp->length);
+
+ len_diff = ip_len - udp_len;
+ *good_tcp_udp &= len_diff >= 0;
+ *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
+ }
+}
+
+#define ip4_local_do_l4_check(is_tcp_udp, flags) \
+ (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
+
+static inline uword
+ip4_local_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int head_of_feature_arc)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, next0, fib_index0, lbi0;
+ u32 pi1, next1, fib_index1, lbi1;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
+ u32 sw_if_index0, sw_if_index1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = next1 = IP_LOCAL_NEXT_DROP;
+ error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+ proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
+
+ if (head_of_feature_arc == 0)
+ goto skip_checks;
+
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_udp1 = proto1 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
+
+ good_tcp_udp0 =
+ (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 =
+ (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
+ || ip4_local_do_l4_check (is_tcp_udp1,
+ p1->flags)))
+ {
+ if (is_tcp_udp0)
+ ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
+ &good_tcp_udp0);
+ if (is_tcp_udp1)
+ ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
+ &good_tcp_udp1);
+ }
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+ error1 = (is_tcp_udp1 && !good_tcp_udp1
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
+ 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
+ 3);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+
+ /*
+ * Must have a route to source otherwise we drop the packet.
+ * ip4 broadcasts are accepted, e.g. to make dhcp client work
+ *
+ * The checks are:
+ * - the source is a recieve => it's from us => bogus, do this
+ * first since it sets a different error code.
+ * - uRPF check for any route to source - accept if passes.
+ * - allow packets destined to the broadcast address from unknown sources
+ */
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo1->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb1->lb_urpf) &&
+ ip1->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
+
+ skip_checks:
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next1 = lm->local_next_by_ip_protocol[proto1];
+
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+ p1->error = error1 ? error_node->errors[error1] : 0;
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
+ if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 pi0, next0, fib_index0, lbi0;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ u32 sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP_LOCAL_NEXT_DROP;
+ error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+
+ if (head_of_feature_arc == 0)
+ goto skip_check;
+
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ good_tcp_udp0 =
+ (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
+ {
+ ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
+ &good_tcp_udp0);
+ }
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 3);
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
+
+ lb0 = load_balance_get (lbi0);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+
+ skip_check:
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_local_node) =
+{
+ .function = ip4_local,
+ .name = "ip4-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
+
+static uword
+ip4_local_end_of_arc (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
+ .function = ip4_local_end_of_arc,
+ .name = "ip4-local-end-of-arc",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-local",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
+
+VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
+ .arc_name = "ip4-local",
+ .node_name = "ip4-local-end-of-arc",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+void
+ip4_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip4_local_node.index, node_index);
+}
+
+static clib_error_t *
+show_ip_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip4_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ {
+ u32 node_index = vlib_get_node (vm,
+ ip4_local_node.index)->
+ next_nodes[lm->local_next_by_ip_protocol[i]];
+ vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
+ node_index);
+ }
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv4 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip local}
+ * Protocols handled by ip4_local
+ * 1
+ * 17
+ * 47
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip_local, static) =
+{
+ .path = "show ip local",
+ .function = show_ip_local_command_fn,
+ .short_help = "show ip local",
+};
+/* *INDENT-ON* */
+
+always_inline uword
+ip4_arp_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop, next_index;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been no-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ if (next_index == IP4_ARP_NEXT_DROP)
+ next_index = IP4_ARP_N_NEXT; /* point to first interface */
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ uword bm0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ if (is_glean)
+ {
+ /*
+ * this is the Glean case, so we are ARPing for the
+ * packet's destination
+ */
+ a0 ^= ip0->dst_address.data_u32;
+ }
+ else
+ {
+ a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
+ }
+ b0 ^= sw_if_index0;
+
+ hash_v3_mix32 (a0, b0, c0);
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ m0 = (uword) 1 << (c0 % BITS (uword));
+ c0 = c0 / BITS (uword);
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ p0->error =
+ node->errors[drop0 ? IP4_ARP_ERROR_DROP :
+ IP4_ARP_ERROR_REQUEST_SENT];
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ if (drop0)
+ continue;
+
+ /*
+ * Can happen if the control-plane is programming tables
+ * with traffic flowing; at least that's today's lame excuse.
+ */
+ if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
+ || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ }
+ else
+ /* Send ARP request. */
+ {
+ u32 bi0 = 0;
+ vlib_buffer_t *b0;
+ ethernet_arp_header_t *h0;
+ vnet_hw_interface_t *hw_if0;
+
+ h0 =
+ vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi0);
+
+ /* Seems we're out of buffers */
+ if (PREDICT_FALSE (!h0))
+ continue;
+
+ /* Add rewrite/encap string for ARP packet. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Src ethernet address in ARP header. */
+ clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
+ hw_if0->hw_address,
+ sizeof (h0->ip4_over_ethernet[0].ethernet));
+
+ if (is_glean)
+ {
+ /* The interface's source address is stashed in the Glean Adj */
+ h0->ip4_over_ethernet[0].ip4 =
+ adj0->sub_type.glean.receive_addr.ip4;
+
+ /* Copy in destination address we are requesting. This is the
+ * glean case, so it's the packet's destination.*/
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ ip0->dst_address.data_u32;
+ }
+ else
+ {
+ /* Src IP address in ARP header. */
+ if (ip4_src_address_for_packet (lm, sw_if_index0,
+ &h0->
+ ip4_over_ethernet[0].ip4))
+ {
+ /* No source address available */
+ p0->error =
+ node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /* Copy in destination address we are requesting from the
+ incomplete adj */
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ adj0->sub_type.nbr.next_hop.ip4.as_u32;
+ }
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ vlib_set_next_frame_buffer (vm, node,
+ adj0->rewrite_header.next_index,
+ bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 0));
+}
+
+static uword
+ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 1));
+}
+
+static char *ip4_arp_error_strings[] = {
+ [IP4_ARP_ERROR_DROP] = "address overflow drops",
+ [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
+ [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
+ [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
+ [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+ [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
+};
+
+VLIB_REGISTER_NODE (ip4_arp_node) =
+{
+ .function = ip4_arp,.name = "ip4-arp",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+VLIB_REGISTER_NODE (ip4_glean_node) =
+{
+ .function = ip4_glean,.name = "ip4-glean",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+#define foreach_notrace_ip4_arp_error \
+_(DROP) \
+_(REQUEST_SENT) \
+_(REPLICATE_DROP) \
+_(REPLICATE_FAIL)
+
+clib_error_t *
+arp_notrace_init (vlib_main_t * vm)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
+
+ /* don't trace ARP request packets */
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[IP4_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_notrace_ip4_arp_error;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (arp_notrace_init);
+
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ethernet_arp_header_t *h;
+ ip4_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ adj_index_t ai;
+ u32 bi = 0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0,
+ "no matching interface address for destination %U (interface %U)",
+ format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ h = vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (PREDICT_FALSE (!hi->hw_address))
+ {
+ return clib_error_return (0, "%U: interface %U do not support ip probe",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = dst[0];
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ ip46_address_t nh = {
+ .ip4 = *dst,
+ };
+
+ ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4, &nh, sw_if_index);
+ adj = adj_get (ai);
+
+ /* Peer has been previously resolved, retrieve glean adj instead */
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
+ {
+ adj_unlock (ai);
+ ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
+ adj = adj_get (ai);
+ }
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ adj_unlock (ai);
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP4_REWRITE_NEXT_DROP,
+ IP4_REWRITE_NEXT_ICMP_ERROR,
+} ip4_rewrite_next_t;
+
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ /*
+ * pre-fetch the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index1);
+ }
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP4_ERROR_NONE;
+ next0 = next1 = IP4_REWRITE_NEXT_DROP;
+
+ /* Decrement TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip0->ttl > 0);
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ttl0 -= 1;
+ ip0->ttl = ttl0;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
+ (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl1 = ip1->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip1->ttl > 0);
+
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 += checksum1 >= 0xffff;
+
+ ip1->checksum = checksum1;
+ ttl1 -= 1;
+ ip1->ttl = ttl1;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl1 <= 0))
+ {
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
+ (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ /* Rewrite packet header and updates lengths. */
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ /* Worth pipelining. No guarantee that adj0,1 are hot... */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ next0 = adj0[0].rewrite_header.next_index;
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
+ {
+ next1 = adj1[0].rewrite_header.next_index;
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+
+ if (PREDICT_FALSE
+ (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ /*
+ * Bump the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+ }
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP4_ERROR_NONE;
+ next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
+
+ /* Decrement TTL & update checksum. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+
+ ASSERT (ip0->ttl > 0);
+
+ ttl0 -= 1;
+
+ ip0->ttl = ttl0;
+
+ ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
+ (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ if (do_counters)
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0)
+ > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED : error0);
+
+ p0->error = error_node->errors[error0];
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+
+ }
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+
+/** @brief IPv4 rewrite node.
+ @node ip4-rewrite
+
+ This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
+ header checksum, fetch the ip adjacency, check the outbound mtu,
+ apply the adjacency rewrite, and send pkts to the adjacency
+ rewrite header's rewrite_next_index.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - the rewrite adjacency index
+ - <code>adj->lookup_next_index</code>
+ - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+ the packet will be dropped.
+ - <code>adj->rewrite_header</code>
+ - Rewrite string length, rewrite string, next_index
+
+ @em Sets:
+ - <code>b->current_data, b->current_length</code>
+ - Updated net of applying the rewrite string
+
+ <em>Next Indices:</em>
+ - <code> adj->rewrite_header.next_index </code>
+ or @c error-drop
+*/
+static uword
+ip4_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
+}
+
+static uword
+ip4_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
+}
+
+static uword
+ip4_rewrite_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
+}
+
+static uword
+ip4_mcast_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_rewrite_node) = {
+ .function = ip4_rewrite,
+ .name = "ip4-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
+
+VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
+ .function = ip4_rewrite_mcast,
+ .name = "ip4-rewrite-mcast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
+
+VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
+ .function = ip4_mcast_midchain,
+ .name = "ip4-mcast-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
+
+VLIB_REGISTER_NODE (ip4_midchain_node) = {
+ .function = ip4_midchain,
+ .name = "ip4-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
+/* *INDENT-ON */
+
+int
+ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
+{
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 lbi0;
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
+
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
+}
+
+static clib_error_t *
+test_lookup_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_fib_t *fib;
+ u32 table_id = 0;
+ f64 count = 1;
+ u32 n;
+ int i;
+ ip4_address_t ip4_base_address;
+ u64 errors = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ {
+ /* Make sure the entry exists. */
+ fib = ip4_fib_get (table_id);
+ if ((fib) && (fib->index != table_id))
+ return clib_error_return (0, "<fib-index> %d does not exist",
+ table_id);
+ }
+ else if (unformat (input, "count %f", &count))
+ ;
+
+ else if (unformat (input, "%U",
+ unformat_ip4_address, &ip4_base_address))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ n = count;
+
+ for (i = 0; i < n; i++)
+ {
+ if (!ip4_lookup_validate (&ip4_base_address, table_id))
+ errors++;
+
+ ip4_base_address.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (ip4_base_address.as_u32));
+ }
+
+ if (errors)
+ vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
+ else
+ vlib_cli_output (vm, "No errors in %d lookups\n", n);
+
+ return 0;
+}
+
+/*?
+ * Perform a lookup of an IPv4 Address (or range of addresses) in the
+ * given FIB table to determine if there is a conflict with the
+ * adjacency table. The fib-id can be determined by using the
+ * '<em>show ip fib</em>' command. If fib-id is not entered, default value
+ * of 0 is used.
+ *
+ * @todo This command uses fib-id, other commands use table-id (not
+ * just a name, they are different indexes). Would like to change this
+ * to table-id for consistency.
+ *
+ * @cliexpar
+ * Example of how to run the test lookup command:
+ * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
+ * No errors in 2 lookups
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lookup_test_command, static) =
+{
+ .path = "test lookup",
+ .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
+ .function = test_lookup_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
+ flow_hash_config);
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv4 fields used by the flow hash.
+ *
+ * @cliexpar
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 6.0.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 7.0.0.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 172.16.1.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet0
+ * 172.16.1.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
+ * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
+ * 172.16.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
+ * 172.16.2.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet1
+ * 172.16.2.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
+ * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
+{
+ .path = "set ip flow-hash",
+ .short_help =
+ "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip4_main_t *ipm = &ip4_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip4_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_classify_command, static) =
+{
+ .path = "set ip classify",
+ .short_help =
+ "set ip classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
new file mode 100644
index 00000000..3b08f4b0
--- /dev/null
+++ b/src/vnet/ip/ip4_input.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_input.c: IP v4 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip4_input_trace_t;
+
+static u8 *
+format_ip4_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_input_trace_t *t = va_arg (*va, ip4_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_INPUT_NEXT_DROP,
+ IP4_INPUT_NEXT_PUNT,
+ IP4_INPUT_NEXT_LOOKUP,
+ IP4_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP4_INPUT_NEXT_ICMP_ERROR,
+ IP4_INPUT_N_NEXT,
+} ip4_input_next_t;
+
+/* Validate IP v4 packets and pass them either to forwarding code
+ or drop/punt exception packets. */
+always_inline uword
+ip4_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int verify_checksum)
+{
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip4_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP4);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ u32 sw_if_index1, pi1, ip_len1, cur_len1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ to_next[0] = pi0 = from[0];
+ to_next[1] = pi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip1->dst_address)))
+ {
+ arc1 = lm->mcast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc1 = lm->ucast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip1->ttl < 1))
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45))
+ error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0, sum1;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip4_partial_header_checksum_x1 (ip1, sum1);
+
+ error0 = 0xffff != ip_csum_fold (sum0) ?
+ IP4_ERROR_BAD_CHECKSUM : error0;
+ error1 = 0xffff != ip_csum_fold (sum1) ?
+ IP4_ERROR_BAD_CHECKSUM : error1;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 = ip4_get_fragment_offset (ip0) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+ error1 = ip4_get_fragment_offset (ip1) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error1;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+ error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ cur_len1 = vlib_buffer_length_in_chain (vm, p1);
+
+ len_diff0 = cur_len0 - ip_len0;
+ len_diff1 = cur_len1 - ip_len1;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+ if (PREDICT_FALSE (error1 != IP4_ERROR_NONE))
+ {
+ if (error1 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next1 = error1 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ i32 len_diff0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ error0 =
+ 0xffff !=
+ ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 =
+ ip4_get_fragment_offset (ip0) ==
+ 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ len_diff0 = cur_len0 - ip_len0;
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/** \brief IPv4 input node.
+ @node ip4-input
+
+ This is the IPv4 input node: validates ip4 header checksums,
+ verifies ip header lengths, discards pkts with expired TTLs,
+ and sends pkts to the set of ip feature nodes configured on
+ the rx interface.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - vnet_feature_config_main_t cm corresponding to each pkt's dst address unicast /
+ multicast status.
+ - <code>b->current_config_index</code> corresponding to each pkt's
+ rx sw_if_index.
+ - This sets the per-packet graph trajectory, ensuring that
+ each packet visits the per-interface features in order.
+
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Indices:</em>
+ - Dispatches pkts to the (first) feature node:
+ <code> vnet_get_config_data (... &next0 ...); </code>
+ or @c error-drop
+*/
+static uword
+ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1);
+}
+
+static uword
+ip4_input_no_checksum (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
+}
+
+static char *ip4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_node) = {
+ .function = ip4_input,
+ .name = "ip4-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP4_N_ERROR,
+ .error_strings = ip4_error_strings,
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = {
+ .function = ip4_input_no_checksum,
+ .name = "ip4-input-no-checksum",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node,
+ ip4_input_no_checksum);
+
+static clib_error_t *
+ip4_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, ip4_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, ip4_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, ip4_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_input_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ pn = pg_get_node (ip4_input_no_checksum_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ if ((error = vlib_call_init_function (vm, ip4_cli_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_source_check_init)))
+ return error;
+
+ if ((error = vlib_call_init_function
+ (vm, ip4_source_and_port_range_check_init)))
+ return error;
+
+ /* Set flow hash to something non-zero. */
+ ip4_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default TTL for packets we generate. */
+ ip4_main.host_config.ttl = 64;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
new file mode 100644
index 00000000..cc82384d
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+/**
+ * Global pool of IPv4 8bit PLYs
+ */
+ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
+{
+ /*
+ * It's 'non-empty' if the length of the leaf stored is greater than the
+ * length of a leaf in the covering ply. i.e. the leaf is more specific
+ * than it's would be cover in the covering ply
+ */
+ if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
+ return (1);
+ return (0);
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 1 + 2 * adj_index;
+ ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ return l;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{
+ return (n & 1) == 0;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 0 + 2 * i;
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ return l;
+}
+
+#ifndef __ALTIVEC__
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+ init_x4 = u32x4_splat (init);
+#else
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+{ \
+ u32x4_union_t y; \
+ y.as_u32[0] = init; \
+ y.as_u32[1] = init; \
+ y.as_u32[2] = init; \
+ y.as_u32[3] = init; \
+ init_x4 = y.as_u32x4; \
+}
+#endif
+
+#ifdef CLIB_HAVE_VEC128
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32x4 *l, init_x4; \
+ \
+ PLY_X4_SPLAT_INIT(init_x4, init); \
+ for (l = p->leaves_as_u32x4; \
+ l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
+ l += 4) \
+ { \
+ l[0] = init_x4; \
+ l[1] = init_x4; \
+ l[2] = init_x4; \
+ l[3] = init_x4; \
+ } \
+}
+#else
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32 *l; \
+ \
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
+ { \
+ l[0] = init; \
+ l[1] = init; \
+ l[2] = init; \
+ l[3] = init; \
+ } \
+}
+#endif
+
+#define PLY_INIT(p, init, prefix_len, ply_base_len) \
+{ \
+ /* \
+ * A leaf is 'empty' if it represents a leaf from the covering PLY \
+ * i.e. if the prefix length of the leaf is less than or equal to \
+ * the prefix length of the PLY \
+ */ \
+ p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
+ ARRAY_LEN (p->leaves) : 0); \
+ memset (p->dst_address_bits_of_leaves, prefix_len, \
+ sizeof (p->dst_address_bits_of_leaves)); \
+ p->dst_address_bits_base = ply_base_len; \
+ \
+ /* Initialize leaves. */ \
+ PLY_INIT_LEAVES(p); \
+}
+
+static void
+ply_8_init (ip4_fib_mtrie_8_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+{
+ PLY_INIT (p, init, prefix_len, ply_base_len);
+}
+
+static void
+ply_16_init (ip4_fib_mtrie_16_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+ memset (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ PLY_INIT_LEAVES (p);
+}
+
+static ip4_fib_mtrie_leaf_t
+ply_create (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t init_leaf,
+ u32 leaf_prefix_len, u32 ply_base_len)
+{
+ ip4_fib_mtrie_8_ply_t *p;
+
+ /* Get cache aligned ply. */
+ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
+
+ ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+}
+
+always_inline ip4_fib_mtrie_8_ply_t *
+get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+{
+ uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+
+ return pool_elt_at_index (ip4_ply_pool, n);
+}
+
+void
+ip4_mtrie_free (ip4_fib_mtrie_t * m)
+{
+ /* the root ply is embedded so the is nothing to do,
+ * the assumption being that the IP4 FIB table has emptied the trie
+ * before deletion.
+ */
+#if CLIB_DEBUG > 0
+ int i;
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+ {
+ ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
+ }
+#endif
+}
+
+void
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
+{
+ ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
+}
+
+typedef struct
+{
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+ u32 adj_index;
+ u32 cover_address_length;
+ u32 cover_adj_index;
+} ip4_fib_mtrie_set_unset_leaf_args_t;
+
+static void
+set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_8_ply_t * ply,
+ ip4_fib_mtrie_leaf_t new_leaf,
+ uword new_leaf_dst_address_bits)
+{
+ ip4_fib_mtrie_leaf_t old_leaf;
+ uword i;
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ {
+ old_leaf = ply->leaves[i];
+
+ /* Recurse into sub plies. */
+ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ ip4_fib_mtrie_8_ply_t *sub_ply =
+ get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ new_leaf_dst_address_bits);
+ }
+
+ /* Replace less specific terminal leaves with new leaf. */
+ else if (new_leaf_dst_address_bits >=
+ ply->dst_address_bits_of_leaves[i])
+ {
+ __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
+ ASSERT (ply->leaves[i] == new_leaf);
+ ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
+ }
+ }
+}
+
+static void
+set_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ u32 old_ply_index, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ i32 n_dst_bits_next_plies;
+ u8 dst_byte;
+ ip4_fib_mtrie_8_ply_t *old_ply;
+
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
+
+ ASSERT (a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ /* how many bits of the destination address are in the next PLY */
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ /* The mask length of the address to insert maps to this ply */
+ uword old_leaf_is_terminal;
+ u32 i, n_dst_bits_this_ply;
+
+ /* The number of bits, and hence slots/buckets, we will fill */
+ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
+ ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_8_ply_t *new_ply;
+
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
+ {
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ old_ply->dst_address_bits_of_leaves[i] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[i] == new_leaf);
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ASSERT (old_ply->n_non_empty_leafs <=
+ ARRAY_LEN (old_ply->leaves));
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+ else if (!old_leaf_is_terminal)
+ {
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - ip4_ply_pool,
+ dst_address_byte_index + 1);
+ }
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
+ }
+ }
+ else
+ {
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u8 ply_base_len;
+
+ ply_base_len = 8 * (dst_address_byte_index + 1);
+
+ old_leaf = old_ply->leaves[dst_byte];
+
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ /* There is a leaf occupying the slot. Replace it with a new ply */
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ /* Refetch since ply_create may move pool. */
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static void
+set_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ ip4_fib_mtrie_16_ply_t *old_ply;
+ i32 n_dst_bits_next_plies;
+ u16 dst_byte;
+
+ old_ply = &m->root_ply;
+
+ ASSERT (a->dst_address_length <= 32);
+
+ /* how many bits of the destination address are in the next PLY */
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+ dst_byte = a->dst_address.as_u16[0];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ /* The mask length of the address to insert maps to this ply */
+ uword old_leaf_is_terminal;
+ u32 i, n_dst_bits_this_ply;
+
+ /* The number of bits, and hence slots/buckets, we will fill */
+ n_dst_bits_this_ply = 16 - a->dst_address_length;
+ ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
+
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (a->dst_address_length >=
+ old_ply->dst_address_bits_of_leaves[slot])
+ {
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
+ old_ply->dst_address_bits_of_leaves[slot] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[slot],
+ old_leaf, new_leaf);
+ ASSERT (old_ply->leaves[slot] == new_leaf);
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+ else if (!old_leaf_is_terminal)
+ {
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ }
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
+ }
+ }
+ else
+ {
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u8 ply_base_len;
+
+ ply_base_len = 16;
+
+ old_leaf = old_ply->leaves[dst_byte];
+
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ /* There is a leaf occupying the slot. Replace it with a new ply */
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ }
+}
+
+static uword
+unset_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+ if (n_dst_bits_next_plies < 0)
+ dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
+
+ n_dst_bits_this_ply =
+ n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
+ n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
+ dst_address_byte_index + 1)))
+ {
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ old_ply->leaves[i] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[i] =
+ clib_max (old_ply->dst_address_bits_base,
+ a->cover_address_length);
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
+ {
+ pool_put (ip4_ply_pool, old_ply);
+ /* Old ply was deleted. */
+ return 1;
+ }
+#if CLIB_DEBUG > 0
+ else if (dst_address_byte_index)
+ {
+ int ii, count = 0;
+ for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
+ {
+ count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
+ }
+ ASSERT (count);
+ }
+#endif
+ }
+ }
+
+ /* Old ply was not deleted. */
+ return 0;
+}
+
+static void
+unset_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u16 dst_byte;
+ ip4_fib_mtrie_16_ply_t *old_ply;
+
+ ASSERT (a->dst_address_length <= 32);
+
+ old_ply = &m->root_ply;
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+ dst_byte = a->dst_address.as_u16[0];
+
+ n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
+ (16 - a->dst_address_length) : 0);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+ {
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
+
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
+ {
+ old_ply->leaves[slot] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
+ }
+ }
+}
+
+void
+ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ set_root_leaf (m, &a);
+}
+
+void
+ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+ a.cover_adj_index = cover_adj_index;
+ a.cover_address_length = cover_address_length;
+
+ /* the top level ply is never removed */
+ unset_root_leaf (m, &a);
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
+{
+ uword bytes, i;
+
+ bytes = sizeof (p[0]);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m)
+{
+ uword bytes, i;
+
+ bytes = sizeof (*m);
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+static u8 *
+format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ else
+ s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ return s;
+}
+
+#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \
+({ \
+ u32 a, ia_length; \
+ ip4_address_t ia; \
+ ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
+ \
+ a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \
+ ia.as_u32 = clib_host_to_net_u32 (a); \
+ ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
+ s = format (s, "\n%U%20U %U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_address_and_length, &ia, ia_length, \
+ format_ip4_fib_mtrie_leaf, _l); \
+ \
+ if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
+ s = format (s, "\n%U%U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_fib_mtrie_ply, m, a, \
+ ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
+ s; \
+})
+
+static u8 *
+format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ u32 base_address = va_arg (*va, u32);
+ u32 ply_index = va_arg (*va, u32);
+ ip4_fib_mtrie_8_ply_t *p;
+ uword indent;
+ int i;
+
+ p = pool_elt_at_index (ip4_ply_pool, ply_index);
+ indent = format_get_indent (s);
+ s = format (s, "ply index %d, %d non-empty leaves", ply_index,
+ p->n_non_empty_leafs);
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
+ {
+ FORMAT_PLY (s, p, i, base_address,
+ p->dst_address_bits_base + 8, indent);
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_ip4_fib_mtrie (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ ip4_fib_mtrie_16_ply_t *p;
+ u32 base_address = 0;
+ int i;
+
+ s = format (s, "%d plies, memory usage %U\n",
+ pool_elts (ip4_ply_pool),
+ format_memory_size, mtrie_memory_usage (m));
+ s = format (s, "root-ply");
+ p = &m->root_ply;
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ u16 slot;
+
+ slot = clib_host_to_net_u16 (i);
+
+ if (p->dst_address_bits_of_leaves[slot] > 0)
+ {
+ FORMAT_PLY (s, p, slot, base_address, 16, 2);
+ }
+ }
+
+ return s;
+}
+
+static clib_error_t *
+ip4_mtrie_module_init (vlib_main_t * vm)
+{
+ /* Burn one ply so index 0 is taken */
+ CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+
+ pool_get (ip4_ply_pool, p);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
new file mode 100644
index 00000000..be262c2c
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_fib_h
+#define included_ip_ip4_fib_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/vector.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */
+
+/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
+ 1 + 2*adj_index for terminal leaves.
+ 0 + 2*next_ply_index for non-terminals, i.e. PLYs
+ 1 => empty (adjacency index of zero is special miss adjacency). */
+typedef u32 ip4_fib_mtrie_leaf_t;
+
+#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
+
+/**
+ * @brief the 16 way stride that is the top PLY of the mtrie
+ * We do not maintain the count of 'real' leaves in this PLY, since
+ * it is never removed. The FIB will destroy the mtrie and the ply once
+ * the FIB is destroyed.
+ */
+#define PLY_16_SIZE (1<<16)
+typedef struct ip4_fib_mtrie_16_ply_t_
+{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
+#endif
+ };
+
+ /**
+ * Prefix length for terminal leaves.
+ */
+ u8 dst_address_bits_of_leaves[PLY_16_SIZE];
+} ip4_fib_mtrie_16_ply_t;
+
+/**
+ * @brief One ply of the 4 ply mtrie fib.
+ */
+typedef struct ip4_fib_mtrie_8_ply_t_
+{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[256];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[256 / 4];
+#endif
+ };
+
+ /**
+ * Prefix length for leaves/ply.
+ */
+ u8 dst_address_bits_of_leaves[256];
+
+ /**
+ * Number of non-empty leafs (whether terminal or not).
+ */
+ i32 n_non_empty_leafs;
+
+ /**
+ * The length of the ply's coviering prefix. Also a measure of its depth
+ * If a leaf in a slot has a mask length longer than this then it is
+ * 'non-empty'. Otherwise it is the value of the cover.
+ */
+ i32 dst_address_bits_base;
+
+ /* Pad to cache line boundary. */
+ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
+}
+ip4_fib_mtrie_8_ply_t;
+
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
+ "IP4 Mtrie ply cache line");
+
+/**
+ * @brief The mutiway-TRIE.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
+typedef struct
+{
+ /**
+ * Embed the PLY with the mtrie struct. This means that the Data-plane
+ * 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
+ * to it. therefore no cachline misses in the data-path.
+ */
+ ip4_fib_mtrie_16_ply_t root_ply;
+} ip4_fib_mtrie_t;
+
+/**
+ * @brief Initialise an mtrie
+ */
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
+
+/**
+ * @brief Free an mtrie, It must be emty when free'd
+ */
+void ip4_mtrie_free (ip4_fib_mtrie_t * m);
+
+/**
+ * @brief Add a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index);
+/**
+ * @brief remove a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
+
+/**
+ * @brief Format/display the contents of the mtrie
+ */
+format_function_t format_ip4_fib_mtrie;
+
+/**
+ * @brief A global pool of 8bit stride plys
+ */
+extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+/**
+ * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
+ */
+always_inline u32
+ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{
+ return n & 1;
+}
+
+/**
+ * From the stored slot value extract the LB index value
+ */
+always_inline u32
+ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ return n >> 1;
+}
+
+/**
+ * @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
+ */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t current_leaf,
+ const ip4_address_t * dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_8_ply_t *ply;
+
+ uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
+
+ if (!current_is_terminal)
+ {
+ ply = ip4_ply_pool + (current_leaf >> 1);
+ return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+ }
+
+ return current_leaf;
+}
+
+/**
+ * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address.
+ */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address)
+{
+ ip4_fib_mtrie_leaf_t next_leaf;
+
+ next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
+
+ return next_leaf;
+}
+
+#endif /* included_ip_ip4_fib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
new file mode 100644
index 00000000..1ff9fbdb
--- /dev/null
+++ b/src/vnet/ip/ip4_packet.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/packet.h: ip4 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip4_packet_h
+#define included_ip4_packet_h
+
+#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
+#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */
+#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
+
+/* IP4 address which can be accessed either as 4 bytes
+ or as a 32-bit number. */
+typedef union
+{
+ u8 data[4];
+ u32 data_u32;
+ /* Aliases. */
+ u8 as_u8[4];
+ u16 as_u16[2];
+ u32 as_u32;
+} ip4_address_t;
+
+typedef struct
+{
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip4_address_t ip4_addr;
+ u32 fib_index;
+} ip4_address_fib_t;
+
+always_inline void
+ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address,
+ u32 fib_index)
+{
+ clib_memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr));
+ addr_fib->fib_index = fib_index;
+}
+
+/* (src,dst) pair of addresses as found in packet header. */
+typedef struct
+{
+ ip4_address_t src, dst;
+} ip4_address_pair_t;
+
+/* If address is a valid netmask, return length of mask. */
+always_inline uword
+ip4_address_netmask_length (ip4_address_t * a)
+{
+ uword result = 0;
+ uword i;
+ for (i = 0; i < ARRAY_LEN (a->as_u8); i++)
+ {
+ switch (a->as_u8[i])
+ {
+ case 0xff:
+ result += 8;
+ break;
+ case 0xfe:
+ result += 7;
+ goto done;
+ case 0xfc:
+ result += 6;
+ goto done;
+ case 0xf8:
+ result += 5;
+ goto done;
+ case 0xf0:
+ result += 4;
+ goto done;
+ case 0xe0:
+ result += 3;
+ goto done;
+ case 0xc0:
+ result += 2;
+ goto done;
+ case 0x80:
+ result += 1;
+ goto done;
+ case 0x00:
+ result += 0;
+ goto done;
+ default:
+ /* Not a valid netmask mask. */
+ return ~0;
+ }
+ }
+done:
+ return result;
+}
+
+typedef union
+{
+ struct
+ {
+ /* 4 bit packet length (in 32bit units) and version VVVVLLLL.
+ e.g. for packets w/ no options ip_version_and_header_length == 0x45. */
+ u8 ip_version_and_header_length;
+
+ /* Type of service. */
+ u8 tos;
+
+ /* Total layer 3 packet length including this header. */
+ u16 length;
+
+ /* Fragmentation ID. */
+ u16 fragment_id;
+
+ /* 3 bits of flags and 13 bits of fragment offset (in units
+ of 8 byte quantities). */
+ u16 flags_and_fragment_offset;
+#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13)
+#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14)
+#define IP4_HEADER_FLAG_CONGESTION (1 << 15)
+
+ /* Time to live decremented by router at each hop. */
+ u8 ttl;
+
+ /* Next level protocol packet. */
+ u8 protocol;
+
+ /* Checksum. */
+ u16 checksum;
+
+ /* Source and destination address. */
+ union
+ {
+ struct
+ {
+ ip4_address_t src_address, dst_address;
+ };
+ ip4_address_pair_t address_pair;
+ };
+ };
+
+ /* For checksumming we'll want to access IP header in word sized chunks. */
+ /* For 64 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u64 checksum_data_64[2];
+ u32 checksum_data_64_32[1];
+ });
+ /* *INDENT-ON* */
+
+ /* For 32 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u32 checksum_data_32[5];
+ });
+ /* *INDENT-ON* */
+} ip4_header_t;
+
+/* Value of ip_version_and_header_length for packets w/o options. */
+#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \
+ ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32)))
+
+always_inline int
+ip4_get_fragment_offset (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff;
+}
+
+always_inline int
+ip4_get_fragment_more (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS;
+}
+
+always_inline int
+ip4_is_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS));
+}
+
+always_inline int
+ip4_is_first_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) ==
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+}
+
+/* Fragment offset in bytes. */
+always_inline int
+ip4_get_fragment_offset_bytes (ip4_header_t * i)
+{
+ return 8 * ip4_get_fragment_offset (i);
+}
+
+always_inline int
+ip4_header_bytes (ip4_header_t * i)
+{
+ return sizeof (u32) * (i->ip_version_and_header_length & 0xf);
+}
+
+always_inline void *
+ip4_next_header (ip4_header_t * i)
+{
+ return (void *) i + ip4_header_bytes (i);
+}
+
+always_inline u16
+ip4_header_checksum (ip4_header_t * i)
+{
+ u16 save, csum;
+ ip_csum_t sum;
+
+ save = i->checksum;
+ i->checksum = 0;
+ sum = ip_incremental_checksum (0, i, ip4_header_bytes (i));
+ csum = ~ip_csum_fold (sum);
+
+ i->checksum = save;
+
+ /* Make checksum agree for special case where either
+ 0 or 0xffff would give same 1s complement sum. */
+ if (csum == 0 && save == 0xffff)
+ csum = save;
+
+ return csum;
+}
+
+static inline uword
+ip4_header_checksum_is_valid (ip4_header_t * i)
+{
+ return i->checksum == ip4_header_checksum (i);
+}
+
+#define ip4_partial_header_checksum_x1(ip0,sum0) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ } \
+} while (0)
+
+#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum1 = ip1->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum1 = ip1->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \
+ } \
+} while (0)
+
+always_inline uword
+ip4_address_is_multicast (ip4_address_t * a)
+{
+ return (a->data[0] & 0xf0) == 0xe0;
+}
+
+always_inline void
+ip4_multicast_address_set_for_group (ip4_address_t * a,
+ ip_multicast_group_t g)
+{
+ ASSERT ((u32) g < (1 << 28));
+ a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g);
+}
+
+always_inline void
+ip4_multicast_ethernet_address (u8 * ethernet_address, ip4_address_t * a)
+{
+ u8 *d = a->as_u8;
+
+ ethernet_address[0] = 0x01;
+ ethernet_address[1] = 0x00;
+ ethernet_address[2] = 0x5e;
+ ethernet_address[3] = d[1] & 0x7f;
+ ethernet_address[4] = d[2];
+ ethernet_address[5] = d[3];
+}
+
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+}
+
+#endif /* included_ip4_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_pg.c b/src/vnet/ip/ip4_pg.c
new file mode 100644
index 00000000..9697a3b9
--- /dev/null
+++ b/src/vnet/ip/ip4_pg.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+#define IP4_PG_EDIT_CHECKSUM (1 << 0)
+#define IP4_PG_EDIT_LENGTH (1 << 1)
+
+static_always_inline void
+compute_length_and_or_checksum (vlib_main_t * vm,
+ u32 * packets,
+ u32 n_packets,
+ u32 ip_header_offset, u32 flags)
+{
+ ASSERT (flags != 0);
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip_csum_t sum0, sum1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ {
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+ ip1->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset);
+ }
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+ ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0]));
+
+ ip0->checksum = 0;
+ ip1->checksum = 0;
+
+ ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1);
+ ip0->checksum = ~ip_csum_fold (sum0);
+ ip1->checksum = ~ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip_csum_t sum0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+
+ ip0->checksum = 0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip0->checksum = ~ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ }
+}
+
+static void
+ip4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset;
+
+ ip_offset = g->start_byte_offset;
+
+ switch (g->edit_function_opaque)
+ {
+ case IP4_PG_EDIT_LENGTH:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH);
+ break;
+
+ case IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH
+ | IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version, header_length;
+ pg_edit_t tos;
+ pg_edit_t length;
+
+ pg_edit_t fragment_id, fragment_offset;
+
+ /* Flags together with fragment offset. */
+ pg_edit_t mf_flag, df_flag, ce_flag;
+
+ pg_edit_t ttl;
+
+ pg_edit_t protocol;
+
+ pg_edit_t checksum;
+
+ pg_edit_t src_address, dst_address;
+} pg_ip4_header_t;
+
+static inline void
+pg_ip4_header_init (pg_ip4_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip4_header_t, f);
+ _(tos);
+ _(length);
+ _(fragment_id);
+ _(ttl);
+ _(protocol);
+ _(checksum);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->header_length, ip4_header_t,
+ ip_version_and_header_length, 0, 4);
+ pg_edit_init_bitfield (&p->ip_version, ip4_header_t,
+ ip_version_and_header_length, 4, 4);
+
+ pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t,
+ flags_and_fragment_offset, 0, 13);
+ pg_edit_init_bitfield (&p->mf_flag, ip4_header_t,
+ flags_and_fragment_offset, 13, 1);
+ pg_edit_init_bitfield (&p->df_flag, ip4_header_t,
+ flags_and_fragment_offset, 14, 1);
+ pg_edit_init_bitfield (&p->ce_flag, ip4_header_t,
+ flags_and_fragment_offset, 15, 1);
+}
+
+uword
+unformat_pg_ip4_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip4_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t),
+ &group_index);
+ pg_ip4_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 4);
+ pg_edit_set_fixed (&p->header_length, sizeof (ip4_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->tos, 0);
+ pg_edit_set_fixed (&p->ttl, 64);
+
+ pg_edit_set_fixed (&p->fragment_id, 0);
+ pg_edit_set_fixed (&p->fragment_offset, 0);
+ pg_edit_set_fixed (&p->mf_flag, 0);
+ pg_edit_set_fixed (&p->df_flag, 0);
+ pg_edit_set_fixed (&p->ce_flag, 0);
+
+ p->length.type = PG_EDIT_UNSPECIFIED;
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->src_address,
+ unformat_pg_edit, unformat_ip4_address, &p->dst_address))
+ goto found;
+
+ if (!unformat (input, "%U:",
+ unformat_pg_edit, unformat_ip_protocol, &p->protocol))
+ goto error;
+
+found:
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "header-length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->header_length))
+ ;
+
+ else if (unformat (input, "tos %U",
+ unformat_pg_edit, unformat_pg_number, &p->tos))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit, unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ else if (unformat (input, "ttl %U",
+ unformat_pg_edit, unformat_pg_number, &p->ttl))
+ ;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_id,
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_offset))
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (p->fragment_offset.values); i++)
+ pg_edit_set_value (&p->fragment_offset, i,
+ pg_edit_get_value (&p->fragment_offset,
+ i) / 8);
+
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ pg_edit_set_fixed (&p->mf_flag, 1);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ pg_edit_set_fixed (&p->df_flag, 1);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ pg_edit_set_fixed (&p->ce_flag, 1);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->length,
+ pg_edit_group_n_bytes (s, group_index));
+ }
+
+ /* Compute IP header checksum if all edits are fixed. */
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ ip4_header_t fixed_header, fixed_mask, cmp_mask;
+
+ /* See if header is all fixed and specified except for
+ checksum field. */
+ memset (&cmp_mask, ~0, sizeof (cmp_mask));
+ cmp_mask.checksum = 0;
+
+ pg_edit_group_get_fixed_packet_data (s, group_index,
+ &fixed_header, &fixed_mask);
+ if (!memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask)))
+ pg_edit_set_fixed (&p->checksum,
+ clib_net_to_host_u16 (ip4_header_checksum
+ (&fixed_header)));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ || p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_LENGTH;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
new file mode 100644
index 00000000..4829079b
--- /dev/null
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+
+/**
+ * @file
+ * @brief IPv4 Source and Port Range Checking.
+ *
+ * This file contains the source code for IPv4 source and port range
+ * checking.
+ */
+
+
+/**
+ * @brief The pool of range chack DPOs
+ */
+static protocol_port_range_dpo_t *ppr_dpo_pool;
+
+/**
+ * @brief Dynamically registered DPO type
+ */
+static dpo_type_t ppr_dpo_type;
+
+vlib_node_registration_t ip4_source_port_and_range_check_rx;
+vlib_node_registration_t ip4_source_port_and_range_check_tx;
+
+#define foreach_ip4_source_and_port_range_check_error \
+ _(CHECK_FAIL, "ip4 source and port range check bad packets") \
+ _(CHECK_OK, "ip4 source and port range check good packets")
+
+typedef enum
+{
+#define _(sym,str) IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_##sym,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_ERROR,
+} ip4_source_and_port_range_check_error_t;
+
+static char *ip4_source_and_port_range_check_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+};
+
+typedef struct
+{
+ u32 pass;
+ u32 bypass;
+ u32 is_tcp;
+ ip4_address_t src_addr;
+ u16 port;
+ u32 fib_index;
+} ip4_source_and_port_range_check_trace_t;
+
+static u8 *
+format_ip4_source_and_port_range_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_and_port_range_check_trace_t *t =
+ va_arg (*va, ip4_source_and_port_range_check_trace_t *);
+
+ if (t->bypass)
+ s = format (s, "PASS (bypass case)");
+ else
+ s = format (s, "fib %d src ip %U %s dst port %d: %s",
+ t->fib_index, format_ip4_address, &t->src_addr,
+ t->is_tcp ? "TCP" : "UDP", (u32) t->port,
+ (t->pass == 1) ? "PASS" : "FAIL");
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+} ip4_source_and_port_range_check_next_t;
+
+
+static inline u32
+check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
+ u16 dst_port, u32 next)
+{
+ u16x8vec_t key;
+ u16x8vec_t diff1;
+ u16x8vec_t diff2;
+ u16x8vec_t sum, sum_equal_diff2;
+ u16 sum_nonzero, sum_equal, winner_mask;
+ int i;
+
+ if (NULL == ppr_dpo || dst_port == 0)
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+
+ /* Make the obvious screw-case work. A variant also works w/ no MMX */
+ if (PREDICT_FALSE (dst_port == 65535))
+ {
+ int j;
+
+ for (i = 0;
+ i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t);
+ i++)
+ {
+ for (j = 0; j < 8; j++)
+ if (ppr_dpo->blocks[i].low.as_u16[j] == 65535)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+ }
+
+ key.as_u16x8 = u16x8_splat (dst_port);
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ diff1.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8);
+ diff2.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8);
+ sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8);
+ sum_equal_diff2.as_u16x8 =
+ u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8);
+ sum_nonzero = ~u16x8_zero_byte_mask (sum.as_u16x8);
+ sum_equal = ~u16x8_zero_byte_mask (sum_equal_diff2.as_u16x8);
+ winner_mask = sum_nonzero & sum_equal;
+ if (winner_mask)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+}
+
+always_inline protocol_port_range_dpo_t *
+protocol_port_range_dpo_get (index_t index)
+{
+ return (pool_elt_at_index (ppr_dpo_pool, index));
+}
+
+always_inline uword
+ip4_source_and_port_range_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_tx)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node = node;
+ u32 good_packets = 0;
+ int i;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* vlib_buffer_t *b0, *b1; */
+ /* ip4_header_t *ip0, *ip1; */
+ /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */
+ /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */
+ /* ip_source_and_port_range_check_config_t *c0, *c1; */
+ /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */
+ /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */
+ /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */
+ /* udp_header_t *udp0, *udp1; */
+
+ /* /\* Prefetch next iteration. *\/ */
+ /* { */
+ /* vlib_buffer_t *p2, *p3; */
+
+ /* p2 = vlib_get_buffer (vm, from[2]); */
+ /* p3 = vlib_get_buffer (vm, from[3]); */
+
+ /* vlib_prefetch_buffer_header (p2, LOAD); */
+ /* vlib_prefetch_buffer_header (p3, LOAD); */
+
+ /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */
+ /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */
+ /* } */
+
+ /* bi0 = to_next[0] = from[0]; */
+ /* bi1 = to_next[1] = from[1]; */
+ /* from += 2; */
+ /* to_next += 2; */
+ /* n_left_from -= 2; */
+ /* n_left_to_next -= 2; */
+
+ /* b0 = vlib_get_buffer (vm, bi0); */
+ /* b1 = vlib_get_buffer (vm, bi1); */
+
+ /* fib_index0 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */
+ /* fib_index1 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */
+
+ /* ip0 = vlib_buffer_get_current (b0); */
+ /* ip1 = vlib_buffer_get_current (b1); */
+
+ /* if (is_tx) */
+ /* { */
+ /* c0 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+ /* else */
+ /* { */
+ /* c0 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+
+ /* /\* we can't use the default VRF here... *\/ */
+ /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */
+ /* { */
+ /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */
+ /* } */
+
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index0 != ~0)) */
+ /* { */
+
+ /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */
+
+ /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 0); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 1); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 2); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 3); */
+
+ /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */
+
+ /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */
+ /* &ip0->src_address, */
+ /* 0 */
+ /* /\* use dflt rt *\/ */
+ /* )); */
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* } */
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index1 != ~0)) */
+ /* { */
+
+ /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */
+
+ /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 0); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 1); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 2); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 3); */
+
+ /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */
+
+ /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */
+ /* &ip1->src_address, */
+ /* 0)); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+ /* } */
+
+ /* pass0 = 0; */
+ /* pass0 |= adj0 == 0; */
+ /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */
+ /* pass0 |= */
+ /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip0->protocol != IP_PROTOCOL_TCP); */
+
+ /* pass1 = 0; */
+ /* pass1 |= adj1 == 0; */
+ /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */
+ /* pass1 |= */
+ /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip1->protocol != IP_PROTOCOL_TCP); */
+
+ /* save_next0 = next0; */
+ /* udp0 = ip4_next_header (ip0); */
+ /* save_next1 = next1; */
+ /* udp1 = ip4_next_header (ip1); */
+
+ /* if (PREDICT_TRUE (pass0 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next0 = check_adj_port_range_x1 */
+ /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */
+ /* good_packets -= (save_next0 != next0); */
+ /* b0->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (pass1 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next1 = check_adj_port_range_x1 */
+ /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */
+ /* good_packets -= (save_next1 != next1); */
+ /* b1->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b0, sizeof (*t)); */
+ /* t->pass = next0 == save_next0; */
+ /* t->bypass = pass0; */
+ /* t->fib_index = fib_index0; */
+ /* t->src_addr.as_u32 = ip0->src_address.as_u32; */
+ /* t->port = (pass0 == 0) ? */
+ /* clib_net_to_host_u16 (udp0->dst_port) : 0; */
+ /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b1, sizeof (*t)); */
+ /* t->pass = next1 == save_next1; */
+ /* t->bypass = pass1; */
+ /* t->fib_index = fib_index1; */
+ /* t->src_addr.as_u32 = ip1->src_address.as_u32; */
+ /* t->port = (pass1 == 0) ? */
+ /* clib_net_to_host_u16 (udp1->dst_port) : 0; */
+ /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */
+ /* to_next, n_left_to_next, */
+ /* bi0, bi1, next0, next1); */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ ip_source_and_port_range_check_config_t *c0;
+ u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0;
+ udp_header_t *udp0;
+ const protocol_port_range_dpo_t *ppr_dpo0 = NULL;
+ const dpo_id_t *dpo;
+ u32 sw_if_index0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+
+ if (is_tx)
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ c0 = vnet_feature_next_with_data (sw_if_index0, &next0,
+ b0, sizeof (c0[0]));
+
+ /* we can't use the default VRF here... */
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ ASSERT (c0->fib_index[i]);
+ }
+
+
+ if (is_tx)
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN];
+ }
+ else
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT];
+ }
+
+ if (fib_index0 != ~0)
+ {
+ lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
+ &ip0->src_address);
+
+ dpo =
+ load_balance_get_bucket_i (load_balance_get (lb_index0), 0);
+
+ if (ppr_dpo_type == dpo->dpoi_type)
+ {
+ ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index);
+ }
+ /*
+ * else the lookup hit an enty that was no inserted
+ * by this range checker, which is the default route
+ */
+ }
+ /*
+ * $$$ which (src,dst) categories should we always pass?
+ */
+ pass0 = 0;
+ pass0 |= ip4_address_is_multicast (&ip0->src_address);
+ pass0 |=
+ ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass0 |= (ip0->protocol != IP_PROTOCOL_UDP)
+ && (ip0->protocol != IP_PROTOCOL_TCP);
+
+ save_next0 = next0;
+ udp0 = ip4_next_header (ip0);
+
+ if (PREDICT_TRUE (pass0 == 0))
+ {
+ good_packets++;
+ next0 = check_adj_port_range_x1
+ (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0);
+ good_packets -= (save_next0 != next0);
+ b0->error = error_node->errors
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_source_and_port_range_check_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->pass = next0 == save_next0;
+ t->bypass = pass0;
+ t->fib_index = fib_index0;
+ t->src_addr.as_u32 = ip0->src_address.as_u32;
+ t->port = (pass0 == 0) ?
+ clib_net_to_host_u16 (udp0->dst_port) : 0;
+ t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP;
+ }
+
+ if (is_tx)
+ vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (is_tx)
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_tx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ else
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_rx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_and_port_range_check_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 0 /* !is_tx */ );
+}
+
+static uword
+ip4_source_and_port_range_check_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 1 /* is_tx */ );
+}
+
+/* Note: Calling same function for both RX and TX nodes
+ as always checking dst_port, although
+ if this changes can easily make new function
+*/
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
+ .function = ip4_source_and_port_range_check_rx,
+ .name = "ip4-source-and-port-range-check-rx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
+ .function = ip4_source_and_port_range_check_tx,
+ .name = "ip4-source-and-port-range-check-tx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+int
+set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add)
+{
+ ip_source_and_port_range_check_config_t config;
+ int rv = 0;
+ int i;
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ config.fib_index[i] = fib_index[i];
+ }
+
+ /* For OUT we are in the RX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-unicast",
+ "ip4-source-and-port-range-check-rx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+
+ /* For IN we are in the TX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-output",
+ "ip4-source-and-port-range-check-tx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+ return rv;
+}
+
+static clib_error_t *
+set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u8 is_add = 1;
+ u32 sw_if_index = ~0;
+ u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ int vrf_set = 0;
+ uword *p;
+ int rv = 0;
+ int i;
+
+ sw_if_index = ~0;
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ fib_index[i] = ~0;
+ vrf_id[i] = ~0;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ if (unformat
+ (input, "tcp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "tcp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]))
+ vrf_set = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface required but not specified");
+
+ if (!vrf_set)
+ return clib_error_return (0,
+ "TCP or UDP VRF ID required but not specified");
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+
+ if (vrf_id[i] == 0)
+ return clib_error_return (0,
+ "TCP, UDP VRF ID should not be 0 (default). Should be distinct VRF for this purpose. ");
+
+ if (vrf_id[i] != ~0)
+ {
+ p = hash_get (im->fib_index_by_table_id, vrf_id[i]);
+
+ if (p == 0)
+ return clib_error_return (0, "Invalid VRF ID %d", vrf_id[i]);
+
+ fib_index[i] = p[0];
+ }
+ }
+ rv =
+ set_ip_source_and_port_range_check (vm, fib_index, sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return
+ (0,
+ "set source and port-range on interface returned an unexpected value: %d",
+ rv);
+ }
+ return error;
+}
+
+/*?
+ * Add the 'ip4-source-and-port-range-check-rx' or
+ * 'ip4-source-and-port-range-check-tx' graph node for a given
+ * interface. 'tcp-out-vrf' and 'udp-out-vrf' will add to
+ * the RX path. 'tcp-in-vrf' and 'udp-in-vrf' will add to
+ * the TX path. A graph node will be inserted into the chain when
+ * the range check is added to the first interface. It will not
+ * be removed from when range check is removed from the last
+ * interface.
+ *
+ * By adding the range check graph node to the interface, incoming
+ * or outgoing TCP/UDP packets will be validated using the
+ * provided IPv4 FIB table (VRF).
+ *
+ * @note 'ip4-source-and-port-range-check-rx' and
+ * 'ip4-source-and-port-range-check-tx' strings are too long, so
+ * they are truncated on the 'show vlib graph' output.
+ *
+ * @todo This content needs to be validated and potentially more detail added.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable range checking on TX:
+ * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0] ip4-rewrite
+ * interface-output [1]
+ * @cliexend
+ *
+ * Example of how to display the features enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-and-port-range-check-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
+ .path = "set interface ip source-and-port-range-check",
+ .function = set_ip_source_and_port_range_check_fn,
+ .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_ppr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ protocol_port_range_dpo_t *ppr_dpo;
+ int i, j;
+ int printed = 0;
+
+ ppr_dpo = protocol_port_range_dpo_get (index);
+
+ s = format (s, "allow ");
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ {
+ if (printed)
+ s = format (s, ", ");
+ if (ppr_dpo->blocks[i].hi.as_u16[j] >
+ (ppr_dpo->blocks[i].low.as_u16[j] + 1))
+ s =
+ format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1);
+ else
+ s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]);
+ printed = 1;
+ }
+ }
+ }
+ return s;
+}
+
+static void
+ppr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+static void
+ppr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t ppr_vft = {
+ .dv_lock = ppr_dpo_lock,
+ .dv_unlock = ppr_dpo_unlock,
+ .dv_format = format_ppr_dpo,
+};
+
+const static char *const ppr_ip4_nodes[] = {
+ "ip4-source-and-port-range-check-rx",
+ NULL,
+};
+
+const static char *const *const ppr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = ppr_ip4_nodes,
+};
+
+clib_error_t *
+ip4_source_and_port_range_check_init (vlib_main_t * vm)
+{
+ source_range_check_main_t *srm = &source_range_check_main;
+
+ srm->vlib_main = vm;
+ srm->vnet_main = vnet_get_main ();
+
+ ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init);
+
+protocol_port_range_dpo_t *
+protocol_port_range_dpo_alloc (void)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+
+ pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES);
+ memset (ppr_dpo, 0, sizeof (*ppr_dpo));
+
+ ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO;
+
+ return (ppr_dpo);
+}
+
+
+static int
+add_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ dpo_id_t dpop = DPO_INVALID;
+ int i, j, k;
+
+ fib_node_index_t fei;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have already sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * this is a first time add for this prefix.
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us, and if so get the ragne DPO from it.
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * there is no PPR state associated with this prefix,
+ * so we'll need a new DPO
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ }
+
+ if (vec_len (low_ports) > ppr_dpo->n_free_ranges)
+ return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY;
+
+ j = k = 0;
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (; k < 8; k++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[k] == 0)
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i];
+ ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i];
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+ ppr_dpo->n_used_blocks = j + 1;
+
+ /*
+ * add or update the entry in the FIB
+ */
+ dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool));
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+ else
+ {
+ fib_entry_special_update (fei,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+
+ return 0;
+}
+
+static int
+remove_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ fib_node_index_t fei;
+ int i, j, k;
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ }
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (j = 0; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (k = 0; k < 8; k++)
+ {
+ if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] &&
+ high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k])
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] =
+ ppr_dpo->blocks[j].hi.as_u16[k] = 0;
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+
+ ppr_dpo->n_free_ranges = 0;
+
+ /* Have we deleted all ranges yet? */
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[i] == 0)
+ ppr_dpo->n_free_ranges++;
+ }
+ }
+
+ if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges)
+ {
+ /* Yes, lose the adjacency... */
+ fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL);
+ }
+ else
+ {
+ /*
+ * compact the ranges down to a contiguous block
+ */
+ // FIXME. TODO.
+ }
+
+ return 0;
+}
+
+// This will be moved to another file and implemented post API freeze.
+int
+ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+
+ ASSERT (~0 != fib_index);
+
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
+
+ return 0;
+}
+
+int
+ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+ FIB_SOURCE_CLASSIFY);
+
+ if (is_add == 0)
+ {
+ remove_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+ else
+ {
+ add_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u16 this_low;
+ u16 this_hi;
+ ip4_address_t ip4_addr;
+ ip6_address_t ip6_addr; //This function will be moved to generic impl when v6 done.
+ u32 length;
+ u32 tmp, tmp2;
+ u32 vrf_id = ~0;
+ int is_add = 1, ip_ver = ~0;
+ int rv;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length))
+ ip_ver = 4;
+ else
+ if (unformat
+ (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length))
+ ip_ver = 6;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "port %d", &tmp))
+ {
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "port %d out of range", tmp);
+ this_low = tmp;
+ this_hi = this_low + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else if (unformat (input, "range %d - %d", &tmp, &tmp2))
+ {
+ if (tmp > tmp2)
+ return clib_error_return (0, "ports %d and %d out of order",
+ tmp, tmp2);
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "low port %d out of range", tmp);
+ if (tmp2 == 0 || tmp2 > 65535)
+ return clib_error_return (0, "high port %d out of range", tmp2);
+ this_low = tmp;
+ this_hi = tmp2 + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else
+ break;
+ }
+
+ if (ip_ver == ~0)
+ return clib_error_return (0, " <address>/<mask> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, " VRF ID required, not specified");
+
+ if (vec_len (low_ports) == 0)
+ return clib_error_return (0,
+ " Both VRF ID and range/port must be set for a protocol.");
+
+ if (vrf_id == 0)
+ return clib_error_return (0, " VRF ID can not be 0 (default).");
+
+
+ if (ip_ver == 4)
+ rv = ip4_source_and_port_range_check_add_del
+ (&ip4_addr, length, vrf_id, low_ports, high_ports, is_add);
+ else
+ return clib_error_return (0, " IPv6 in subsequent patch");
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE:
+ return clib_error_return
+ (0, " Incorrect adjacency for add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY:
+ return clib_error_return (0, " Too many ports in add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY:
+ return clib_error_return
+ (0, " Too many ranges requested for add operation");
+
+ default:
+ return clib_error_return (0, " returned an unexpected value: %d", rv);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command adds an IP Subnet and range of ports to be validated
+ * by an IP FIB table (VRF).
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to add an IPv4 subnet and single port to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23}
+ * Example of how to add an IPv4 subnet and range of ports to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100}
+ * Example of how to delete an IPv4 subnet and single port from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23 del}
+ * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
+ .path = "set ip source-and-port-range-check",
+ .function = ip_source_and_port_range_check_command_fn,
+ .short_help =
+ "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+show_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ u32 fib_index;
+ u8 addr_set = 0;
+ u32 vrf_id = ~0;
+ int rv, i, j;
+ u32 port = 0;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4))
+ addr_set = 1;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "port %d", &port))
+ ;
+ else
+ break;
+ }
+
+ if (addr_set == 0)
+ return clib_error_return (0, "<address> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, "VRF ID required, not specified");
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ if (~0 == fib_index)
+ return clib_error_return (0, "VRF %d not found", vrf_id);
+
+ /*
+ * find the longest prefix match on the address requested,
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx),
+ FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * not one of ours
+ */
+ vlib_cli_output (vm, "%U: src address drop", format_ip4_address,
+ &pfx.fp_addr.ip4);
+ return 0;
+ }
+
+ bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+
+ if (port)
+ {
+ rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234);
+ if (rv == 1234)
+ vlib_cli_output (vm, "%U port %d PASS", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ else
+ vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ return 0;
+ }
+ else
+ {
+ u8 *s;
+
+ s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4);
+
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ s = format (s, "%d - %d ",
+ (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j]);
+ }
+ }
+ vlib_cli_output (vm, "%s", s);
+ vec_free (s);
+ }
+
+ return 0;
+}
+
+/*?
+ * Display the range of ports being validated by an IPv4 FIB for a given
+ * IP or subnet, or test if a given IP and port are being validated.
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to display the set of ports being validated for a given
+ * IPv4 subnet:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0}
+ * 172.16.2.0: 23 - 101
+ * @cliexend
+ * Example of how to test to determine of a given Pv4 address and port
+ * are being validated:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23}
+ * 172.16.2.2 port 23 PASS
+ * @cliexend
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 250}
+ * 172.16.2.2 port 250 FAIL
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
+ .path = "show ip source-and-port-range-check",
+ .function = show_source_and_port_range_check_fn,
+ .short_help =
+ "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
new file mode 100644
index 00000000..17a1cb1b
--- /dev/null
+++ b/src/vnet/ip/ip4_source_check.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * @file
+ * @brief IPv4 Unicast Source Check.
+ *
+ * This file contains the IPv4 interface unicast source check.
+ */
+
+
+typedef struct
+{
+ u8 packet_data[64];
+ index_t urpf;
+} ip4_source_check_trace_t;
+
+static u8 *
+format_ip4_source_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_check_trace_t *t = va_arg (*va, ip4_source_check_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_NEXT_DROP,
+ IP4_SOURCE_CHECK_N_NEXT,
+} ip4_source_check_next_t;
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY,
+} ip4_source_check_type_t;
+
+typedef union
+{
+ u32 fib_index;
+} ip4_source_check_config_t;
+
+always_inline uword
+ip4_source_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip4_source_check_type_t source_check_type)
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_source_check_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_source_check_config_t *c0, *c1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, next0, pass0, lb_index0;
+ u32 pi1, next1, pass1, lb_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+ c1 =
+ vnet_feature_next_with_data (vnet_buffer (p1)->sw_if_index
+ [VLIB_RX], &next1, p1,
+ sizeof (c1[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass1 = ip4_address_is_multicast (&ip1->src_address)
+ || ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ pass1 |=
+ fib_urpf_check (lb1->lb_urpf,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ pass1 |= fib_urpf_check_size (lb1->lb_urpf);
+ }
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP);
+
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+ p1->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_source_check_config_t *c0;
+ u32 pi0, next0, pass0, lb_index0;
+ const load_balance_t *lb0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ lb0 = load_balance_get (lb_index0);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ }
+
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_check_reachable_via_any (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+}
+
+static uword
+ip4_source_check_reachable_via_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = {
+ .function = ip4_source_check_reachable_via_any,
+ .name = "ip4-source-check-via-any",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_any,
+ ip4_source_check_reachable_via_any);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = {
+ .function = ip4_source_check_reachable_via_rx,
+ .name = "ip4-source-check-via-rx",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_rx,
+ ip4_source_check_reachable_via_rx);
+
+static clib_error_t *
+set_ip_source_check (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_del;
+ ip4_source_check_config_t config;
+ char *feature_name = "ip4-source-check-via-rx";
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "loose"))
+ feature_name = "ip4-source-check-via-any";
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ config.fib_index = im->fib_index_by_sw_if_index[sw_if_index];
+ vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
+ is_del == 0, &config, sizeof (config));
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command adds the 'ip4-source-check-via-rx' graph node for
+ * a given interface. By adding the IPv4 source check graph node to
+ * an interface, the code verifies that the source address of incoming
+ * unicast packets are reachable over the incoming interface. Two flavours
+ * are supported (the default is strict):
+ * - loose: accept ingress packet if there is a route to reach the source
+ * - strict: accept ingress packet if it arrived on an interface which
+ * the route to the source uses. i.e. an interface that the source
+ * is reachable via.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 loose}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0] ip4-input-no-checksum
+ * ip4-source-and-port-range- ip4-input
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-check-via-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ *
+ * Example of how to disable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
+ .path = "set interface ip source-check",
+ .function = set_ip_source_check,
+ .short_help = "set interface ip source-check <interface> [strict|loose] [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip_source_check_accept (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_error_t *error = NULL;
+ u32 table_id, is_add, fib_index;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 != table_id)
+ {
+ fib_index = fib_table_find (pfx.fp_proto, table_id);
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+ else
+ {
+ fib_index = 0;
+ }
+
+ if (is_add)
+ {
+ fib_table_entry_special_add (fib_index,
+ &pfx,
+ FIB_SOURCE_URPF_EXEMPT,
+ FIB_ENTRY_FLAG_DROP);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_URPF_EXEMPT);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add an exemption for a prefix to pass the Unicast Reverse Path
+ * Forwarding (uRPF) loose check. This is for testing purposes only.
+ * If the '<em>table</em>' is not enter it is defaulted to 0. Default
+ * is to '<em>add</em>'. VPP always performs a loose uRPF check for
+ * for-us traffic.
+ *
+ * @cliexpar
+ * Example of how to add a uRPF exception to a FIB table to pass the
+ * loose RPF tests:
+ * @cliexcmd{ip urpf-accept table 7 add}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_check_accept_command, static) = {
+ .path = "ip urpf-accept",
+ .function = ip_source_check_accept,
+ .short_help = "ip urpf-accept [table <table-id>] [add|del]",
+};
+/* *INDENT-ON* */
+
+
+/* Dummy init function to get us linked in. */
+clib_error_t *
+ip4_source_check_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_check_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c
new file mode 100644
index 00000000..73dabfdc
--- /dev/null
+++ b/src/vnet/ip/ip4_test.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+/**
+ * @file
+ * @brief IPv4 FIB Tester.
+ *
+ * Not compiled in by default. IPv4 FIB tester. Add, probe, delete a bunch of
+ * random routes / masks and make sure that the mtrie agrees with
+ * the hash-table FIB.
+ *
+ * Manipulate the FIB by means of the debug CLI commands, to minimize
+ * the chances of doing something idiotic.
+ */
+
+/*
+ * These routines need to be redeclared non-static elsewhere.
+ *
+ * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual
+ * test_route_init() call to main.c
+ */
+clib_error_t *vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg);
+
+int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0);
+
+ip4_fib_t *find_fib_by_table_index_or_id (ip4_main_t * im,
+ u32 table_index_or_id, u32 flags);
+
+/* Routes to insert/delete/probe in FIB */
+typedef struct
+{
+ ip4_address_t address;
+ u32 mask_width;
+ u32 interface_id; /* not an xx_if_index */
+} test_route_t;
+
+typedef struct
+{
+ /* Test routes in use */
+ test_route_t *route_pool;
+
+ /* Number of fake ethernets created */
+ u32 test_interfaces_created;
+} test_main_t;
+
+test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 *
+format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+thrash (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd_arg)
+{
+ u32 seed = 0xdeaddabe;
+ u32 niter = 10;
+ u32 nroutes = 10;
+ u32 ninterfaces = 4;
+ f64 min_mask_bits = 7.0;
+ f64 max_mask_bits = 32.0;
+ u32 table_id = 11; /* my amp goes to 11 (use fib 11) */
+ u32 table_index;
+ int iter, i;
+ u8 *cmd;
+ test_route_t *tr;
+ test_main_t *tm = &test_main;
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t cmd_input;
+ f64 rf;
+ u32 *masks = 0;
+ u32 tmp;
+ u32 hw_if_index;
+ clib_error_t *error = 0;
+ uword *p;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 hw_address[6];
+ ip4_fib_t *fib;
+ int verbose = 0;
+
+ /* Precompute mask width -> mask vector */
+ tmp = (u32) ~ 0;
+ vec_validate (masks, 32);
+ for (i = 32; i > 0; i--)
+ {
+ masks[i] = tmp;
+ tmp <<= 1;
+ }
+
+ if (unformat_user (main_input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "seed %d", &seed))
+ ;
+ else if (unformat (line_input, "niter %d", &niter))
+ ;
+ else if (unformat (line_input, "nroutes %d", &nroutes))
+ ;
+ else if (unformat (line_input, "ninterfaces %d", &ninterfaces))
+ ;
+ else if (unformat (line_input, "min-mask-bits %d", &tmp))
+ min_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "max-mask-bits %d", &tmp))
+ max_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ }
+
+ /* Find or create FIB table 11 */
+ fib = ip4_fib_find_or_create_fib_by_table_id (table_id);
+
+ for (i = tm->test_interfaces_created; i < ninterfaces; i++)
+ {
+ vnet_hw_interface_t *hw;
+ memset (hw_address, 0, sizeof (hw_address));
+ hw_address[0] = 0xd0;
+ hw_address[1] = 0x0f;
+ hw_address[5] = i;
+
+ error = ethernet_register_interface
+ (vnm, test_interface_device_class.index, i /* instance */ ,
+ hw_address, &hw_if_index,
+ /* flag change */ 0);
+
+ /* Fake interfaces use FIB table 11 */
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index);
+ im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index;
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+ }
+
+ tm->test_interfaces_created = ninterfaces;
+
+ /* Find fib index corresponding to FIB id 11 */
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (p == 0)
+ {
+ vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id);
+ goto done;
+ }
+ table_index = p[0];
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ /* Pick random routes to install */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+
+ pool_get (tm->route_pool, tr);
+ memset (tr, 0, sizeof (*tr));
+
+ again:
+ rf = random_f64 (&seed);
+ tr->mask_width = (u32) (min_mask_bits
+ + rf * (max_mask_bits - min_mask_bits));
+ tmp = random_u32 (&seed);
+ tmp &= masks[tr->mask_width];
+ tr->address.as_u32 = clib_host_to_net_u32 (tmp);
+
+ /* We can't add the same address/mask twice... */
+ for (j = 0; j < i; j++)
+ {
+ test_route_t *prev;
+ prev = pool_elt_at_index (tm->route_pool, j);
+ if ((prev->address.as_u32 == tr->address.as_u32)
+ && (prev->mask_width == tr->mask_width))
+ goto again;
+ }
+
+ rf = random_f64 (&seed);
+ tr->interface_id = (u32) (rf * ninterfaces);
+ }
+
+ /* Add them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ cmd = format (0, "add table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+ }
+ /* Probe them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (!ip4_lookup_validate (&tr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &tr->address);
+
+ fformat (stderr, "FAIL-after-insert: %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+
+ /* Delete them */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (0)
+ cmd = format (0, "del table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ else
+ cmd = format (0, "del table %d %U/%d",
+ table_id,
+ format_ip4_address, &tr->address, tr->mask_width);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+
+ /* Make sure all undeleted routes still work */
+ for (j = i + 1; j < nroutes; j++)
+ {
+ test_route_t *rr; /* remaining route */
+ rr = pool_elt_at_index (tm->route_pool, j);
+ if (!ip4_lookup_validate (&rr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &rr->address);
+
+ fformat (stderr, "FAIL: %U/%d AWOL\n",
+ format_ip4_address, &rr->address, rr->mask_width);
+ fformat (stderr, " iter %d after %d of %d deletes\n",
+ iter, i, nroutes);
+ fformat (stderr, " last route deleted %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+ }
+
+ pool_free (tm->route_pool);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command in not in the build by default. It is an internal
+ * command used to test the route functonality.
+ *
+ * Create test routes on IPv4 FIB table 11. Table will be created if it
+ * does not exist.
+ *
+ * There are several optional attributes:
+ * - If not provided, <seed> defaults to 0xdeaddabe.
+ * - If not provided, <num-iter> defaults to 10.
+ * - If not provided, <num-iface> defaults to 4.
+ * - If not provided, <min-mask> defaults to 7.0.
+ * - If not provided, <max-mask> defaults to 32.0.
+ *
+ * @cliexpar
+ * Example of how to run:
+ * @cliexcmd{test route}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_route_command, static) = {
+ .path = "test route",
+ .short_help = "test route [seed <seed-num>] [niter <num-iter>] [ninterfaces <num-iface>] [min-mask-bits <min-mask>] [max-mask-bits <max-mask>] [verbose]", .function = thrash,
+ .function = thrash,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+test_route_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (test_route_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
new file mode 100644
index 00000000..6ffc562c
--- /dev/null
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief IPv4 to IPv6 translation
+ */
+#ifndef __included_ip4_to_ip6_h__
+#define __included_ip4_to_ip6_h__
+
+#include <vnet/ip/ip.h>
+
+
+/**
+ * IPv4 to IPv6 set call back function type
+ */
+typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *ctx);
+
+/* *INDENT-OFF* */
+static u8 icmp_to_icmp6_updater_pointer_table[] =
+ { 0, 1, 4, 4, ~0,
+ ~0, ~0, ~0, 7, 6,
+ ~0, ~0, 8, 8, 8,
+ 8, 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+#define frag_id_4to6(id) (id)
+
+/**
+ * @brief Get TCP/UDP port number or ICMP id from IPv4 packet.
+ *
+ * @param ip4 IPv4 header.
+ * @param sender 1 get sender port, 0 get receiver port.
+ *
+ * @returns Port number on success, 0 otherwise.
+ */
+always_inline u16
+ip4_get_port (ip4_header_t * ip, u8 sender)
+{
+ if (ip->ip_version_and_header_length != 0x45 ||
+ ip4_get_fragment_offset (ip))
+ return 0;
+
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (sender) ? udp->src_port : udp->dst_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ else if (clib_net_to_host_u16 (ip->length) >= 64)
+ {
+ ip = (ip4_header_t *) (icmp + 2);
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (sender) ? udp->dst_port : udp->src_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request ||
+ icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief Convert type and code value from ICMP4 to ICMP6.
+ *
+ * @param icmp ICMP header.
+ * @param inner_ip4 Inner IPv4 header if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp_to_icmp6_header (icmp46_header_t * icmp, ip4_header_t ** inner_ip4)
+{
+ *inner_ip4 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP4_echo_reply:
+ icmp->type = ICMP6_echo_reply;
+ break;
+ case ICMP4_echo_request:
+ icmp->type = ICMP6_echo_request;
+ break;
+ case ICMP4_destination_unreachable:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_destination_unreachable_destination_unreachable_net: //0
+ case ICMP4_destination_unreachable_destination_unreachable_host: //1
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_protocol_unreachable: //2
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_unrecognized_next_header;
+ break;
+ case ICMP4_destination_unreachable_port_unreachable: //3
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_port_unreachable;
+ break;
+ case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4
+ icmp->type =
+ ICMP6_packet_too_big;
+ icmp->code = 0;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (advertised_mtu)
+ advertised_mtu += 20;
+ else
+ advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value)
+
+ //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20)
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu);
+ }
+ break;
+
+ case ICMP4_destination_unreachable_source_route_failed: //5
+ case ICMP4_destination_unreachable_destination_network_unknown: //6
+ case ICMP4_destination_unreachable_destination_host_unknown: //7
+ case ICMP4_destination_unreachable_source_host_isolated: //8
+ case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11
+ case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12
+ icmp->type =
+ ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_network_administratively_prohibited: //9
+ case ICMP4_destination_unreachable_host_administratively_prohibited: //10
+ case ICMP4_destination_unreachable_communication_administratively_prohibited: //13
+ case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code =
+ ICMP6_destination_unreachable_destination_administratively_prohibited;
+ break;
+ case ICMP4_destination_unreachable_host_precedence_violation: //14
+ default:
+ return -1;
+ }
+ break;
+
+ case ICMP4_time_exceeded: //11
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+ icmp->type = ICMP6_time_exceeded;
+ break;
+
+ case ICMP4_parameter_problem:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_parameter_problem_pointer_indicates_error:
+ case ICMP4_parameter_problem_bad_length:
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_erroneous_header_field;
+ {
+ u8 ptr =
+ icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))];
+ if (ptr == 0xff)
+ return -1;
+
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr);
+ }
+ break;
+ default:
+ //All other codes cause error
+ return -1;
+ }
+ break;
+
+ default:
+ //All other types cause error
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+/**
+ * @brief Translate ICMP4 packet to ICMP6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate outer header.
+ * @param ctx A context passed in the outer header translate function.
+ * @param inner_fn The function to translate inner header.
+ * @param inner_ctx A context passed in the inner header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
+ ip4_to_ip6_set_fn_t inner_fn, void *inner_ctx)
+{
+ ip4_header_t *ip4, *inner_ip4;
+ ip6_header_t *ip6, *inner_ip6;
+ u32 ip_len;
+ icmp46_header_t *icmp;
+ ip_csum_t csum;
+ ip6_frag_hdr_t *inner_frag;
+ u32 inner_frag_id;
+ u32 inner_frag_offset;
+ u8 inner_frag_more;
+ u16 *inner_L4_checksum = 0;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+ ip_len = clib_net_to_host_u16 (ip4->length);
+ ASSERT (ip_len <= p->current_length);
+
+ icmp = (icmp46_header_t *) (ip4 + 1);
+ if (icmp_to_icmp6_header (icmp, &inner_ip4))
+ return -1;
+
+ if (inner_ip4)
+ {
+ //We have 2 headers to translate.
+ //We need to make some room in the middle of the packet
+ if (PREDICT_FALSE (ip4_is_fragment (inner_ip4)))
+ {
+ //Here it starts getting really tricky
+ //We will add a fragmentation header in the inner packet
+
+ if (!ip4_is_first_fragment (inner_ip4))
+ {
+ //For now we do not handle unless it is the first fragment
+ //Ideally we should handle the case as we are in slow path already
+ return -1;
+ }
+
+ vlib_buffer_advance (p,
+ -2 * (sizeof (*ip6) - sizeof (*ip4)) -
+ sizeof (*inner_frag));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) (ip4 + 1);
+
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*inner_frag));
+ inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length,
+ sizeof (*ip6) - 2 * sizeof (*ip4) +
+ sizeof (*inner_frag));
+ inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id);
+ inner_frag_offset = ip4_get_fragment_offset (inner_ip4);
+ inner_frag_more =
+ ! !(inner_ip4->flags_and_fragment_offset &
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4)));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4));
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4));
+ inner_frag = NULL;
+ }
+
+ if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP))
+ {
+ inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum;
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP))
+ {
+ inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum;
+ if (*inner_L4_checksum)
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //We have an ICMP inside an ICMP
+ //It needs to be translated, but not for error ICMP messages
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+ //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by icmp_to_icmp6_header
+ inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ?
+ ICMP6_echo_request : ICMP6_echo_reply;
+ inner_L4_checksum = &inner_icmp->checksum;
+ inner_ip4->protocol = IP_PROTOCOL_ICMP6;
+ }
+ else
+ {
+ /* To shut up Coverity */
+ os_panic ();
+ }
+
+ inner_ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20));
+ inner_ip6->payload_length =
+ u16_net_add (inner_ip4->length, -sizeof (*inner_ip4));
+ inner_ip6->hop_limit = inner_ip4->ttl;
+ inner_ip6->protocol = inner_ip4->protocol;
+
+ if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0)
+ return rv;
+
+ if (PREDICT_FALSE (inner_frag != NULL))
+ {
+ inner_frag->next_hdr = inner_ip6->protocol;
+ inner_frag->identification = inner_frag_id;
+ inner_frag->rsv = 0;
+ inner_frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more);
+ inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ inner_ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16
+ (inner_ip6->payload_length) +
+ sizeof (*inner_frag));
+ }
+
+ csum = *inner_L4_checksum;
+ if (inner_ip6->protocol == IP_PROTOCOL_ICMP6)
+ {
+ //Recompute ICMP checksum
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+
+ inner_icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, inner_ip6->payload_length);
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (inner_ip6->protocol));
+ csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, inner_icmp,
+ clib_net_to_host_u16
+ (inner_ip6->payload_length));
+ inner_icmp->checksum = ~ip_csum_fold (csum);
+ }
+ else
+ {
+ /* UDP checksum is optional */
+ if (csum)
+ {
+ csum =
+ ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold (csum);
+ }
+ }
+ }
+ else
+ {
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ ip6 = vlib_buffer_get_current (p);
+ ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ }
+
+ //Translate outer IPv6
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_ICMP6;
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ //Truncate when the packet exceeds the minimal IPv6 MTU
+ if (p->current_length > 1280)
+ {
+ ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6));
+ p->current_length = 1280; //Looks too simple to be correct...
+ }
+
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, ip6->payload_length);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, icmp,
+ clib_net_to_host_u16 (ip6->payload_length));
+ icmp->checksum = ~ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 fragmented packet to IPv6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+ frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*frag) -
+ sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+
+ //We know that the protocol was one of ICMP, TCP or UDP
+ //because the first fragment was found and cached
+ frag->next_hdr =
+ (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol;
+ frag->identification = frag_id_4to6 (ip4->fragment_id);
+ frag->rsv = 0;
+ frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4),
+ clib_net_to_host_u16
+ (ip4->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4) + sizeof (*frag));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 UDP/TCP packet to IPv6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip_csum_t csum;
+ u16 *checksum;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+
+ if (ip4->protocol == IP_PROTOCOL_UDP)
+ {
+ udp_header_t *udp = ip4_next_header (ip4);
+ checksum = &udp->checksum;
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE (!checksum))
+ {
+ u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
+ *checksum = ~ip_csum_fold (csum);
+ }
+ }
+ else
+ {
+ tcp_header_t *tcp = ip4_next_header (ip4);
+ checksum = &tcp->checksum;
+ }
+
+ csum = ip_csum_sub_even (*checksum, ip4->src_address.as_u32);
+ csum = ip_csum_sub_even (csum, ip4->dst_address.as_u32);
+ *checksum = ip_csum_fold (csum);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
+ if (PREDICT_FALSE (frag != NULL))
+ {
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+ }
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ csum = ip_csum_add_even (*checksum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+ *checksum = ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 packet to IPv6 (IP header only).
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
+ if (PREDICT_FALSE (frag != NULL))
+ {
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+ }
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ return 0;
+}
+
+#endif /* __included_ip4_to_ip6_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
new file mode 100644
index 00000000..8aef53a9
--- /dev/null
+++ b/src/vnet/ip/ip6.h
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6.h: ip6 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_h
+#define included_ip_ip6_h
+
+#include <vlib/mc.h>
+#include <vlib/buffer.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/lookup.h>
+#include <stdbool.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+#include <vnet/util/radix.h>
+
+/*
+ * Default size of the ip6 fib hash table
+ */
+#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+typedef struct
+{
+ ip6_address_t addr;
+ u32 dst_address_length;
+ u32 vrf_index;
+} ip6_fib_key_t;
+
+typedef struct
+{
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+} ip6_fib_t;
+
+typedef struct ip6_mfib_t
+{
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /*
+ * Pointer to the top of a radix tree.
+ * This cannot be realloc'd, hence it cannot be inlined with this table
+ */
+ struct radix_node_head *rhead;
+} ip6_mfib_t;
+
+struct ip6_main_t;
+
+typedef void (ip6_add_del_interface_address_function_t)
+ (struct ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip6_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip6_add_del_interface_address_callback_t;
+
+typedef void (ip6_table_bind_function_t)
+ (struct ip6_main_t * im,
+ uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+ ip6_table_bind_function_t *function;
+ uword function_opaque;
+} ip6_table_bind_callback_t;
+
+/**
+ * Enumeration of the FIB table instance types
+ */
+typedef enum ip6_fib_table_instance_type_t_
+{
+ /**
+ * This table stores the routes that are used to forward traffic.
+ * The key is the prefix, the result the adjacnecy to forward on.
+ */
+ IP6_FIB_TABLE_FWDING,
+ /**
+ * The table that stores ALL routes learned by the DP.
+ * Some of these routes may not be ready to install in forwarding
+ * at a given time.
+ * The key in this table is the prefix, the result is the fib_entry_t
+ */
+ IP6_FIB_TABLE_NON_FWDING,
+} ip6_fib_table_instance_type_t;
+
+#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1)
+
+/**
+ * A represenation of a single IP6 table
+ */
+typedef struct ip6_fib_table_instance_t_
+{
+ /* The hash table */
+ BVT (clib_bihash) ip6_hash;
+
+ /* bitmap / refcounts / vector of mask widths to search */
+ uword *non_empty_dst_address_length_bitmap;
+ u8 *prefix_lengths_in_search_order;
+ i32 dst_address_length_refcounts[129];
+} ip6_fib_table_instance_t;
+
+typedef struct ip6_main_t
+{
+ /**
+ * The two FIB tables; fwding and non-fwding
+ */
+ ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES];
+
+ ip_lookup_main_t lookup_main;
+
+ /* Pool of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ /* Pool of V6 FIBs. */
+ ip6_fib_t *v6_fibs;
+
+ /** Vector of MFIBs. */
+ struct mfib_table_t_ *mfibs;
+
+ /* Network byte orders subnet mask for each prefix length */
+ ip6_address_t fib_masks[129];
+
+ /* Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /** Table index indexed by software interface. */
+ u32 *mfib_index_by_sw_if_index;
+
+ /* IP6 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /* Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /** Hash table mapping table id to multicast fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *mfib_index_by_table_id;
+
+ /* Hash table mapping interface rewrite adjacency index by sw if index. */
+ uword *interface_route_adj_index_by_sw_if_index;
+
+ /* Functions to call when interface address changes. */
+ ip6_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /** Functions to call when interface to table biding changes. */
+ ip6_table_bind_callback_t *table_bind_callbacks;
+
+ /* Template used to generate IP6 neighbor solicitation packets. */
+ vlib_packet_template_t discover_neighbor_packet_template;
+
+ /* ip6 lookup table config parameters */
+ u32 lookup_table_nbuckets;
+ uword lookup_table_size;
+
+ /* Seed for Jenkins hash used to compute ip6 flow hash. */
+ u32 flow_hash_seed;
+
+ struct
+ {
+ /* TTL to use for host generated packets. */
+ u8 ttl;
+
+ u8 pad[3];
+ } host_config;
+
+ /* HBH processing enabled? */
+ u8 hbh_enabled;
+} ip6_main_t;
+
+/* Global ip6 main structure. */
+extern ip6_main_t ip6_main;
+
+/* Global ip6 input node. Errors get attached to ip6 input node. */
+extern vlib_node_registration_t ip6_input_node;
+extern vlib_node_registration_t ip6_rewrite_node;
+extern vlib_node_registration_t ip6_rewrite_mcast_node;
+extern vlib_node_registration_t ip6_rewrite_local_node;
+extern vlib_node_registration_t ip6_discover_neighbor_node;
+extern vlib_node_registration_t ip6_glean_node;
+extern vlib_node_registration_t ip6_midchain_node;
+
+always_inline uword
+ip6_destination_matches_route (const ip6_main_t * im,
+ const ip6_address_t * key,
+ const ip6_address_t * dest, uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((key->as_uword[i] ^ dest->as_uword[i]) & im->
+ fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline uword
+ip6_destination_matches_interface (ip6_main_t * im,
+ ip6_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip6_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip6_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip6_unaligned_destination_matches_route (ip6_main_t * im,
+ ip6_address_t * key,
+ ip6_address_t * dest,
+ uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i])
+ & im->fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline int
+ip6_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip6_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip6_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return (0);
+ }
+ else
+ {
+ src->as_u64[0] = 0;
+ src->as_u64[1] = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip6_address_t *
+ip6_interface_address_matching_destination (ip6_main_t * im,
+ ip6_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip6_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del);
+void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+/**
+ * @brie get first IPv6 interface address
+ */
+ip6_address_t *ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index);
+
+int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2);
+
+clib_error_t *ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst,
+ u32 sw_if_index);
+
+uword
+ip6_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0,
+ int *bogus_lengthp);
+
+void ip6_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
+
+void ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai);
+
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac);
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip);
+
+int vnet_set_ip6_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+clib_error_t *enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+int ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address);
+
+int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data, int is_add);
+
+int vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip, u32 sw_if_index, u16 bd_index);
+
+void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi);
+
+u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
+
+u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
+
+int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+extern vlib_node_registration_t ip6_lookup_node;
+
+/* Compute flow hash. We'll use it to select which Sponge to use for this
+ flow. And other things. */
+always_inline u32
+ip6_compute_flow_hash (const ip6_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp;
+ u64 a, b, c;
+ u64 t1, t2;
+ uword is_tcp_udp = 0;
+ u8 protocol = ip->protocol;
+
+ if (PREDICT_TRUE
+ ((ip->protocol == IP_PROTOCOL_TCP)
+ || (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ is_tcp_udp = 1;
+ tcp = (void *) (ip + 1);
+ }
+ else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
+ if ((hbh->protocol == IP_PROTOCOL_TCP) ||
+ (hbh->protocol == IP_PROTOCOL_UDP))
+ {
+ is_tcp_udp = 1;
+ tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ }
+ protocol = hbh->protocol;
+ }
+
+ t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+ t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ ((t1 << 16) | t2) : ((t2 << 16) | t1);
+
+ hash_mix64 (a, b, c);
+ return (u32) c;
+}
+
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the protocol number
+ * in find_hdr_type.
+ * This is used to locate a specific IPv6 extension header
+ * or to find transport layer header.
+ * 1. If the find_hdr_type < 0 then it finds and returns the protocol number and
+ * offset stored in *offset of the transport or ESP header in the chain if
+ * found.
+ * 2. If a header with find_hdr_type > 0 protocol number is found then the
+ * offset is stored in *offset and protocol number of the header is
+ * returned.
+ * 3. If find_hdr_type is not found or packet is malformed or
+ * it is a non-first fragment -1 is returned.
+ */
+always_inline int
+ip6_locate_header (vlib_buffer_t * p0,
+ ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+{
+ u8 next_proto = ip0->protocol;
+ u8 *next_header;
+ u8 done = 0;
+ u32 cur_offset;
+ u8 *temp_nxthdr = 0;
+ u32 exthdr_len = 0;
+
+ next_header = ip6_next_header (ip0);
+ cur_offset = sizeof (ip6_header_t);
+ while (1)
+ {
+ done = (next_proto == find_hdr_type);
+ if (PREDICT_FALSE
+ (next_header >=
+ (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ return (-1);
+ }
+ if (done)
+ break;
+ if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+ {
+ if (find_hdr_type < 0)
+ break;
+ return -1;
+ }
+ if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
+ u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
+ /* Non first fragment return -1 */
+ if (frag_off)
+ return (-1);
+ exthdr_len = sizeof (ip6_frag_hdr_t);
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+ {
+ exthdr_len =
+ ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else
+ {
+ exthdr_len =
+ ip6_ext_header_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
+ next_header = temp_nxthdr;
+ cur_offset += exthdr_len;
+ }
+
+ *offset = cur_offset;
+ return (next_proto);
+}
+
+u8 *format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args);
+/*
+ * Hop-by-Hop handling
+ */
+typedef struct
+{
+ /* Array of function pointers to HBH option handling routines */
+ int (*options[256]) (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ u8 *(*trace[256]) (u8 * s, ip6_hop_by_hop_option_t * opt);
+ uword next_override;
+} ip6_hop_by_hop_main_t;
+
+extern ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+int ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_unregister_option (u8 option);
+void ip6_hbh_set_next_override (uword next);
+
+/**
+ * Push IPv6 header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b,
+ ip6_address_t * src, ip6_address_t * dst, int proto)
+{
+ ip6_header_t *ip6h;
+ u16 payload_length;
+
+ /* make some room */
+ ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t));
+
+ ip6h->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ /* calculate ip6 payload length */
+ payload_length = vlib_buffer_length_in_chain (vm, b);
+ payload_length -= sizeof (*ip6h);
+
+ ip6h->payload_length = clib_host_to_net_u16 (payload_length);
+
+ ip6h->hop_limit = 0xff;
+ ip6h->protocol = proto;
+ clib_memcpy (ip6h->src_address.as_u8, src->as_u8,
+ sizeof (ip6h->src_address));
+ clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8,
+ sizeof (ip6h->src_address));
+ b->flags |= VNET_BUFFER_F_IS_IP6;
+
+ return ip6h;
+}
+
+#endif /* included_ip_ip6_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
new file mode 100644
index 00000000..a2807169
--- /dev/null
+++ b/src/vnet/ip/ip6_error.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_error.h: ip6 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_error_h
+#define included_ip_ip6_error_h
+
+#define foreach_ip6_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip6 packets") \
+ \
+ /* Errors signalled by ip6-input */ \
+ _ (TOO_SHORT, "ip6 length < 40 bytes") \
+ _ (BAD_LENGTH, "ip6 length > l2 length") \
+ _ (VERSION, "ip6 version != 6") \
+ _ (TIME_EXPIRED, "ip6 ttl <= 1") \
+ \
+ /* Errors signalled by ip6-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
+ _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip6 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip6 adjacency punt") \
+ \
+ /* Errors signalled by ip6-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (ICMP_CHECKSUM, "bad icmp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by udp6-lookup. */ \
+ _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
+ \
+ /* Spoofed packets in ip6-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Erros singalled by ip6-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP6_ERROR_##sym,
+ foreach_ip6_error
+#undef _
+ IP6_N_ERROR,
+} ip6_error_t;
+
+#endif /* included_ip_ip6_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
new file mode 100644
index 00000000..56899b73
--- /dev/null
+++ b/src/vnet/ip/ip6_format.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_format.c: ip6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP6 address. */
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 max_zero_run = 0, this_zero_run = 0;
+ int max_zero_run_index = -1, this_zero_run_index = 0;
+ int in_zero_run = 0, i;
+ int last_double_colon = 0;
+
+ /* Ugh, this is a pain. Scan forward looking for runs of 0's */
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (a->as_u16[i] == 0)
+ {
+ if (in_zero_run)
+ this_zero_run++;
+ else
+ {
+ in_zero_run = 1;
+ this_zero_run = 1;
+ this_zero_run_index = i;
+ }
+ }
+ else
+ {
+ if (in_zero_run)
+ {
+ /* offer to compress the biggest run of > 1 zero */
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+ in_zero_run = 0;
+ this_zero_run = 0;
+ }
+ }
+
+ if (in_zero_run)
+ {
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == max_zero_run_index)
+ {
+ s = format (s, "::");
+ i += max_zero_run - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP6 route destination and length. */
+u8 *
+format_ip6_address_and_length (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip6_address, a, l);
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ {
+ ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads));
+ hex_quads[double_colon_index + i] = 0;
+ }
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+/* Format an IP6 header. */
+u8 *
+format_ip6_header (u8 * s, va_list * args)
+{
+ ip6_header_t *ip = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 i, ip_version, traffic_class, flow_label;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip6_address, &ip->src_address,
+ format_ip6_address, &ip->dst_address);
+
+ i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label);
+ ip_version = (i >> 28);
+ traffic_class = (i >> 20) & 0xff;
+ flow_label = i & pow2_mask (20);
+
+ if (ip_version != 6)
+ s = format (s, "\n%Uversion %d", format_white_space, indent, ip_version);
+
+ s =
+ format (s,
+ "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
+ format_white_space, indent, traffic_class, flow_label,
+ ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) (ip + 1),
+ max_header_bytes - sizeof (ip[0]));
+ }
+
+ return s;
+}
+
+/* Parse an IP6 header. */
+uword
+unformat_ip6_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip6_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip[0]));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip6_address, &ip->src_address,
+ unformat_ip6_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->ip_version_traffic_class_and_flow_label |=
+ clib_host_to_net_u32 ((i & 0xff) << 20);
+
+ else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i))
+ ip->hop_limit = i;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ ip->payload_length =
+ clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0])));
+
+ return 1;
+}
+
+/* Parse an IP46 address. */
+uword
+unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat (input, "%U", unformat_ip4_address, &ip46->ip4))
+ {
+ ip46_address_mask_ip4 (ip46);
+ return 1;
+ }
+ else if ((type != IP46_TYPE_IP4) &&
+ unformat (input, "%U", unformat_ip6_address, &ip46->ip6))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
new file mode 100644
index 00000000..54582d38
--- /dev/null
+++ b/src/vnet/ip/ip6_forward.c
@@ -0,0 +1,3558 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_forward.c: IP v6 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vppinfra/cache.h>
+#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/classify_dpo.h>
+
+#include <vppinfra/bihash_template.c>
+
+/* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
+#define OI_DECAP 0x80000000
+
+/**
+ * @file
+ * @brief IPv6 Forwarding.
+ *
+ * This file contains the source code for IPv6 forwarding.
+ */
+
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip6_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_main_t *im = &ip6_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, pi1, lbi0, lbi1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_address_t *dst_addr0, *dst_addr1;
+ u32 fib_index0, fib_index1;
+ u32 flow_hash_config0, flow_hash_config1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+ lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, flow_hash_config1);
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2 * (next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, lbi0;
+ ip_lookup_next_t next0;
+ load_balance_t *lb0;
+ ip6_address_t *dst_addr0;
+ u32 fib_index0, flow_hash_config0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+
+ lb0 = load_balance_get (lbi0);
+ flow_hash_config0 = lb0->lb_hash_config;
+
+ vnet_buffer (p0)->ip.flow_hash = 0;
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ next0 = dpo0->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static void
+ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
+ ip6_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (a->address_length < 128)
+ {
+ fib_table_entry_update_one_path (fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP6,
+ /* No next-hop address */
+ NULL, sw_if_index,
+ /* invalid FIB index */
+ ~0, 1,
+ /* no label stack */
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ pfx.fp_len = 128;
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP6,
+ &pfx.fp_addr,
+ sw_if_index, ~0,
+ 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip6_del_interface_routes (ip6_main_t * im,
+ u32 fib_index,
+ ip6_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (pfx.fp_len < 128)
+ {
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+
+ }
+
+ pfx.fp_len = 128;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip6_main_t *im = &ip6_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ /* The ref count is 0 when an address is removed from an interface that has
+ * no address - this is not a ciritical error */
+ if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
+ 0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
+ !is_enable, 0, 0);
+}
+
+/* get first interface address */
+ip6_address_t *
+ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-ON* */
+ return result;
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error;
+ u32 if_address_index;
+ ip6_address_fib_t ip6_af, *addr_fib = 0;
+
+ /* local0 interface doesn't support IP addressing */
+ if (sw_if_index == 0)
+ {
+ return
+ clib_error_create ("local0 interface doesn't support IP addressing");
+ }
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ ip6_addr_fib_init (&ip6_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip6_af);
+
+ {
+ uword elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ /* Pool did not grow: add duplicate address. */
+ if (elts_before == pool_elts (lm->if_address_pool))
+ goto done;
+ }
+
+ ip6_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
+ else
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, ip6_af.fib_index,
+ pool_elt_at_index (lm->if_address_pool,
+ if_address_index));
+
+ {
+ ip6_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip6_del_interface_routes (im, fib_index,
+ a, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
+
+/* Built-in ip6 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
+{
+ .arc_name = "ip6-unicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_flow_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-flow-classify",
+ .runs_before = VNET_FEATURES ("ip6-inacl"),
+};
+
+VNET_FEATURE_INIT (ip6_inacl, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-inacl",
+ .runs_before = VNET_FEATURES ("ip6-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip6_policer_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_ipsec, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ipsec-input-ip6",
+ .runs_before = VNET_FEATURES ("l2tp-decap"),
+};
+
+VNET_FEATURE_INIT (ip6_l2tp, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "l2tp-decap",
+ .runs_before = VNET_FEATURES ("vpath-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_vpath, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-vxlan-bypass",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_drop, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-drop",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_lookup, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-lookup",
+ .runs_before = 0, /*last feature*/
+};
+
+/* Built-in ip6 multicast rx feature path definition (none now) */
+VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
+{
+ .arc_name = "ip6-multicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_drop_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-drop",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-mfib-forward-lookup",
+ .runs_before = 0, /* last feature */
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip6_output, static) =
+{
+ .arc_name = "ip6-output",
+ .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain"),
+ .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "ipsec-output-ip6",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VNET_FEATURE_INIT (ip6_interface_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ ip6_main_t *im = &ip6_main;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ if (!is_add)
+ {
+ /* Ensure that IPv6 is disabled */
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *address;
+ vlib_main_t *vm = vlib_get_main ();
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* honor unnumbered */,
+ ({
+ address = ip_interface_address_get_address (lm6, ia);
+ ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
+ }));
+ /* *INDENT-ON* */
+ ip6_mfib_interface_enable_disable (sw_if_index, 0);
+ }
+
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
+
+static uword
+ip6_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_lookup_inline (vm, node, frame);
+}
+
+static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_lookup_node) =
+{
+ .function = ip6_lookup,
+ .name = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_lookup_trace,
+ .n_next_nodes = IP6_LOOKUP_N_NEXT,
+ .next_nodes = IP6_LOOKUP_NEXT_NODES,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup);
+
+always_inline uword
+ip6_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip6_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hc0 &
+ lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
+ }
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (hc1 &
+ lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip6_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hc0 &
+ lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_load_balance_node) =
+{
+ .function = ip6_load_balance,
+ .name = "ip6-load-balance",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip6-lookup",
+ .format_trace = format_ip6_lookup_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[128 - 1 * sizeof (u32)];
+}
+ip6_forward_next_trace_t;
+
+u8 *
+format_ip6_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip6_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->adj_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+
+static u8 *
+format_ip6_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->adj_index, format_ip_adjacency,
+ t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->adj_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip6-forward next nodes. */
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip6_main_t *im = &ip6_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip6_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t1->packet_data,
+ vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip6_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip6_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip6_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip6_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip6_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_drop_node, static) =
+{
+ .function = ip6_drop,
+ .name = "ip6-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-drop",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_punt_node, static) =
+{
+ .function = ip6_punt,
+ .name = "ip6-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-punt",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
+
+/* Compute TCP/UDP/ICMP6 checksum in software. */
+u16
+ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0, int *bogus_lengthp)
+{
+ ip_csum_t sum0;
+ u16 sum16, payload_length_host_byte_order;
+ u32 i, n_this_buffer, n_bytes_left;
+ u32 headers_size = sizeof (ip0[0]);
+ void *data_this_buffer;
+
+ ASSERT (bogus_lengthp);
+ *bogus_lengthp = 0;
+
+ /* Initialize checksum with ip header. */
+ sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
+ payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
+ data_this_buffer = (void *) (ip0 + 1);
+
+ for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
+ {
+ sum0 = ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->
+ src_address.as_uword[i],
+ uword));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address.as_uword[i],
+ uword));
+ }
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
+ * or UDP-Ping packets */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ u32 skip_bytes;
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) data_this_buffer;
+
+ /* validate really icmp6 next */
+ ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
+ || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
+
+ skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
+ data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
+
+ payload_length_host_byte_order -= skip_bytes;
+ headers_size += skip_bytes;
+ }
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ if (p0 && n_this_buffer + headers_size > p0->current_length)
+ n_this_buffer =
+ p0->current_length >
+ headers_size ? p0->current_length - headers_size : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ *bogus_lengthp = 1;
+ return 0xfefe;
+ }
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip6_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+ int bogus_length;
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_ICMP6
+ || ip0->protocol == IP_PROTOCOL_UDP
+ || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
+
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+/**
+ * @brief returns number of links on which src is reachable.
+ */
+always_inline int
+ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
+{
+ const load_balance_t *lb0;
+ index_t lbi;
+
+ lbi = ip6_fib_table_fwding_lookup_with_if_index (im,
+ vnet_buffer
+ (b)->sw_if_index[VLIB_RX],
+ &i->src_address);
+
+ lb0 = load_balance_get (lbi);
+
+ return (fib_urpf_check_size (lb0->lb_urpf));
+}
+
+always_inline u8
+ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
+ u32 * udp_offset0)
+{
+ u32 proto0;
+ proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
+ if (proto0 != IP_PROTOCOL_UDP)
+ {
+ proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
+ proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
+ }
+ return proto0;
+}
+
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip6_local) =
+{
+ .arc_name = "ip6-local",
+ .start_nodes = VNET_FEATURES ("ip6-local"),
+};
+/* *INDENT-ON* */
+
+static uword
+ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int head_of_feature_arc)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+ u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ udp_header_t *udp0, *udp1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, type0, good_l4_csum0, is_tcp_udp0;
+ u8 error1, type1, good_l4_csum1, is_tcp_udp1;
+ u32 udp_offset0, udp_offset1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ if (head_of_feature_arc == 0)
+ goto skip_checks;
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
+ is_tcp_udp1 = ip6_next_proto_is_tcp_udp (p1, ip1, &udp_offset1);
+
+ good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ good_l4_csum1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
+ len_diff1 = 0;
+
+ if (PREDICT_TRUE (is_tcp_udp0))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp0 == IP_PROTOCOL_UDP)
+ {
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ }
+ if (PREDICT_TRUE (is_tcp_udp1))
+ {
+ udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP
+ && udp1->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp1 == IP_PROTOCOL_UDP)
+ {
+ ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+ }
+ }
+
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+ len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum0
+ && !(flags0 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_csum0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum1
+ && !(flags1 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
+ good_l4_csum1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+ error1 = (!good_l4_csum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1);
+
+ /* Drop packets from unroutable hosts. */
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ error0 = (!ip6_urpf_loose_check (im, p0, ip0)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+ if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type1 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip1->src_address))
+ {
+ error1 = (!ip6_urpf_loose_check (im, p1, ip1)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error1);
+ }
+
+ skip_checks:
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next1 = lm->local_next_by_ip_protocol[ip1->protocol];
+
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0);
+ if (PREDICT_TRUE (error1 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p1)->sw_if_index
+ [VLIB_RX], &next1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ udp_header_t *udp0;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, type0, good_l4_csum0;
+ u32 udp_offset0;
+ u8 is_tcp_udp0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ if (head_of_feature_arc == 0)
+ goto skip_check;
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ flags0 = p0->flags;
+ is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
+ good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ len_diff0 = 0;
+ if (PREDICT_TRUE (is_tcp_udp0))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero
+ * checksum. */
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp0 == IP_PROTOCOL_UDP)
+ {
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ }
+
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum0
+ && !(flags0 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_csum0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+
+ /* If this is a neighbor solicitation (ICMP), skip src RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ error0 = (!ip6_urpf_loose_check (im, p0, ip0)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+
+ skip_check:
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_local_node, static) =
+{
+ .function = ip6_local,
+ .name = "ip6-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local);
+
+
+static uword
+ip6_local_end_of_arc (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = {
+ .function = ip6_local_end_of_arc,
+ .name = "ip6-local-end-of-arc",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-local",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc)
+
+VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
+ .arc_name = "ip6-local",
+ .node_name = "ip6-local-end-of-arc",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+void
+ip6_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip6_local_node.index, node_index);
+}
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
+ IP6_DISCOVER_NEIGHBOR_N_NEXT,
+} ip6_discover_neighbor_next_t;
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
+ IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
+ IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
+} ip6_discover_neighbor_error_t;
+
+static uword
+ip6_discover_neighbor_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+ int bogus_length;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been not-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ uword bm0;
+ ip_adjacency_t *adj0;
+ vnet_hw_interface_t *hw_if0;
+ u32 next0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ adj0 = adj_get (adj_index0);
+
+ if (!is_glean)
+ {
+ ip0->dst_address.as_u64[0] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
+ }
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ a0 ^= sw_if_index0;
+ b0 ^= ip0->dst_address.as_u32[0];
+ c0 ^= ip0->dst_address.as_u32[1];
+
+ hash_v3_mix32 (a0, b0, c0);
+
+ b0 ^= ip0->dst_address.as_u32[2];
+ c0 ^= ip0->dst_address.as_u32[3];
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* If the interface is link-down, drop the pkt */
+ if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ drop0 = 1;
+
+ p0->error =
+ node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
+ : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
+ if (drop0)
+ continue;
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ {
+ u32 bi0 = 0;
+ icmp6_neighbor_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+
+ h0 = vlib_packet_template_get_packet
+ (vm, &im->discover_neighbor_packet_template, &bi0);
+
+ /*
+ * Build ethernet header.
+ * Choose source address based on destination lookup
+ * adjacency.
+ */
+ if (ip6_src_address_for_packet (lm,
+ sw_if_index0,
+ &h0->ip.src_address))
+ {
+ /* There is no address on the interface */
+ p0->error =
+ node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /*
+ * Destination address is a solicited node multicast address.
+ * We need to fill in
+ * the low 24 bits with low 24 bits of target's address.
+ */
+ h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
+ h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
+ h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
+
+ h0->neighbor.target_address = ip0->dst_address;
+
+ clib_memcpy (h0->link_layer_option.ethernet_address,
+ hw_if0->hw_address, vec_len (hw_if0->hw_address));
+
+ /* $$$$ appears we need this; why is the checksum non-zero? */
+ h0->neighbor.icmp.checksum = 0;
+ h0->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
+ &bogus_length);
+
+ ASSERT (bogus_length == 0);
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]
+ = vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ /* Add rewrite/encap string. */
+ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
+
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_discover_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 0));
+}
+
+static uword
+ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 1));
+}
+
+static char *ip6_discover_neighbor_error_strings[] = {
+ [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
+ = "no source address for ND solicitation",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
+{
+ .function = ip6_discover_neighbor,
+ .name = "ip6-discover-neighbor",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_glean_node) =
+{
+ .function = ip6_glean,
+ .name = "ip6-glean",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ icmp6_neighbor_solicitation_header_t *h;
+ ip6_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ adj_index_t ai;
+ u32 bi = 0;
+ int bogus_length;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* Destination address is a solicited node multicast address. We need to fill in
+ the low 24 bits with low 24 bits of target's address. */
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ if (PREDICT_FALSE (!hi->hw_address))
+ {
+ return clib_error_return (0, "%U: interface %U do not support ip probe",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address,
+ vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ ip46_address_t nh = {
+ .ip6 = *dst,
+ };
+
+ ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6, &nh, sw_if_index);
+ adj = adj_get (ai);
+
+ /* Peer has been previously resolved, retrieve glean adj instead */
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
+ {
+ adj_unlock (ai);
+ ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, sw_if_index, &nh);
+ adj = adj_get (ai);
+ }
+
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ adj_unlock (ai);
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP6_REWRITE_NEXT_DROP,
+ IP6_REWRITE_NEXT_ICMP_ERROR,
+} ip6_rewrite_next_t;
+
+always_inline uword
+ip6_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->pre_data, 32, STORE);
+ CLIB_PREFETCH (p3->pre_data, 32, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+ next0 = next1 = IP6_REWRITE_NEXT_DROP;
+
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit1 = ip1->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip1->hop_limit > 0);
+
+ hop_limit1 -= 1;
+
+ ip1->hop_limit = hop_limit1;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit1 <= 0))
+ {
+ error1 = IP6_ERROR_TIME_EXPIRED;
+ next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+ }
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+ next1 = adj1[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, rw_len0;
+ u32 adj_index0, next0, error0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP6_ERROR_NONE;
+ next0 = IP6_REWRITE_NEXT_DROP;
+
+ /* Check hop limit */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ }
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+ if (is_mcast)
+ {
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
+}
+
+static uword
+ip6_rewrite_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
+}
+
+static uword
+ip6_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
+}
+
+static uword
+ip6_mcast_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_midchain_node) =
+{
+ .function = ip6_midchain,
+ .name = "ip6-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-rewrite",
+ };
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_node) =
+{
+ .function = ip6_rewrite,
+ .name = "ip6-rewrite",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .n_next_nodes = 2,
+ .next_nodes =
+ {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
+{
+ .function = ip6_rewrite_mcast,
+ .name = "ip6-rewrite-mcast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) =
+{
+ .function = ip6_mcast_midchain,
+ .name = "ip6-mcast-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain);
+
+/*
+ * Hop-by-Hop handling
+ */
+ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+#define foreach_ip6_hop_by_hop_error \
+_(PROCESSED, "pkts with ip6 hop-by-hop options") \
+_(FORMAT, "incorrectly formatted hop-by-hop options") \
+_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
+
+/* *INDENT-OFF* */
+typedef enum
+{
+#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_hop_by_hop_error
+#undef _
+ IP6_HOP_BY_HOP_N_ERROR,
+} ip6_hop_by_hop_error_t;
+/* *INDENT-ON* */
+
+/*
+ * Primary h-b-h handler trace support
+ * We work pretty hard on the problem for obvious reasons
+ */
+typedef struct
+{
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ip6_hop_by_hop_trace_t;
+
+vlib_node_registration_t ip6_hop_by_hop_node;
+
+static char *ip6_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_error
+#undef _
+};
+
+u8 *
+format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
+{
+ ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
+ int total_len = va_arg (*args, int);
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u8 type0;
+
+ s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
+ hbh0->protocol, (hbh0->length + 1) << 3, total_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+static u8 *
+format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ u8 type0;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
+
+ s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
+ t->next_index, (hbh0->length + 1) << 3, t->trace_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+always_inline u8
+ip6_scan_hbh_options (vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ ip6_hop_by_hop_option_t * opt0,
+ ip6_hop_by_hop_option_t * limit0, u32 * next0)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u8 type0;
+ u8 error0 = 0;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->options[type0])
+ {
+ if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ return (error0);
+ }
+ }
+ else
+ {
+ /* Unrecognized mandatory option, check the two high order bits */
+ switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
+ {
+ case HBH_OPTION_TYPE_SKIP_UNKNOWN:
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ if (!ip6_address_is_multicast (&ip0->dst_address))
+ {
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ }
+ else
+ {
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ }
+ break;
+ }
+ return (error0);
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return (error0);
+}
+
+/*
+ * Process the Hop-by-Hop Options header
+ */
+static uword
+ip6_hop_by_hop (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
+ u8 error0 = 0, error1 = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* Speculatively enqueue b0, b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+ u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj1 = adj_get (adj_index1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+ limit1 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
+ ((hbh1->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ if ((hbh1->length + 1) << 3 >
+ clib_net_to_host_u16 (ip1->payload_length))
+ {
+ error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next1 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
+
+ outdual:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error1 == 0)
+ && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
+ next1 = hm->next_override;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ u32 trace_len = (hbh1->length + 1) << 3;
+ t->next_index = next1;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh1, trace_len);
+ }
+
+ }
+
+ b0->error = error_node->errors[error0];
+ b1->error = error_node->errors[error1];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 error0 = 0;
+
+ /* Speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ /*
+ * Default use the next_index from the adjacency.
+ * A HBH option rarely redirects to a different node
+ */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+ next0 = adj0->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto out0;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ out0:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+
+ b0->error = error_node->errors[error0];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
+{
+ .function = ip6_hop_by_hop,
+ .name = "ip6-hop-by-hop",
+ .sibling_of = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
+ .error_strings = ip6_hop_by_hop_error_strings,
+ .n_next_nodes = 0,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
+
+static clib_error_t *
+ip6_hop_by_hop_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ memset (hm->options, 0, sizeof (hm->options));
+ memset (hm->trace, 0, sizeof (hm->trace));
+ hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
+
+void
+ip6_hbh_set_next_override (uword next)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ hm->next_override = next;
+}
+
+int
+ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Already registered */
+ if (hm->options[option])
+ return (-1);
+
+ hm->options[option] = options;
+ hm->trace[option] = trace;
+
+ /* Set global variable */
+ im->hbh_enabled = 1;
+
+ return (0);
+}
+
+int
+ip6_hbh_unregister_option (u8 option)
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Not registered */
+ if (!hm->options[option])
+ return (-1);
+
+ hm->options[option] = NULL;
+ hm->trace[option] = NULL;
+
+ /* Disable global knob if this was the last option configured */
+ int i;
+ bool found = false;
+ for (i = 0; i < 256; i++)
+ {
+ if (hm->options[option])
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ im->hbh_enabled = 0;
+
+ return (0);
+}
+
+/* Global IP6 main. */
+ip6_main_t ip6_main;
+
+static clib_error_t *
+ip6_lookup_init (vlib_main_t * vm)
+{
+ ip6_main_t *im = &ip6_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 j, i0, i1;
+
+ i0 = i / 32;
+ i1 = i % 32;
+
+ for (j = 0; j < i0; j++)
+ im->fib_masks[i].as_u32[j] = ~0;
+
+ if (i1)
+ im->fib_masks[i].as_u32[i0] =
+ clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
+
+ if (im->lookup_table_nbuckets == 0)
+ im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
+
+ im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
+
+ if (im->lookup_table_size == 0)
+ im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
+ "ip6 FIB fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+ BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ "ip6 FIB non-fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+ FIB_SOURCE_DEFAULT_ROUTE);
+ mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+ MFIB_SOURCE_DEFAULT_ROUTE);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Unless explicitly configured, don't process HBH options */
+ im->hbh_enabled = 0;
+
+ {
+ icmp6_neighbor_solicitation_header_t p;
+
+ memset (&p, 0, sizeof (p));
+
+ p.ip.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ p.ip.payload_length =
+ clib_host_to_net_u16 (sizeof (p) -
+ STRUCT_OFFSET_OF
+ (icmp6_neighbor_solicitation_header_t, neighbor));
+ p.ip.protocol = IP_PROTOCOL_ICMP6;
+ p.ip.hop_limit = 255;
+ ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
+
+ p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
+
+ p.link_layer_option.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ p.link_layer_option.header.n_data_u64s =
+ sizeof (p.link_layer_option) / sizeof (u64);
+
+ vlib_packet_template_init (vm,
+ &im->discover_neighbor_packet_template,
+ &p, sizeof (p),
+ /* alloc chunk size */ 8,
+ "ip6 neighbor discovery");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip6_lookup_init);
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac)
+{
+ ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* Invert the "u" bit */
+ ip->as_u8[8] = mac[0] ^ (1 << 1);
+ ip->as_u8[9] = mac[1];
+ ip->as_u8[10] = mac[2];
+ ip->as_u8[11] = 0xFF;
+ ip->as_u8[12] = 0xFE;
+ ip->as_u8[13] = mac[3];
+ ip->as_u8[14] = mac[4];
+ ip->as_u8[15] = mac[5];
+}
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip)
+{
+ /* Invert the previously inverted "u" bit */
+ mac[0] = ip->as_u8[8] ^ (1 << 1);
+ mac[1] = ip->as_u8[9];
+ mac[2] = ip->as_u8[10];
+ mac[3] = ip->as_u8[13];
+ mac[4] = ip->as_u8[14];
+ mac[5] = ip->as_u8[15];
+}
+
+static clib_error_t *
+test_ip6_link_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u8 mac[6];
+ ip6_address_t _a, *a = &_a;
+
+ if (unformat (input, "%U", unformat_ethernet_address, mac))
+ {
+ ip6_link_local_address_from_ethernet_mac_address (a, mac);
+ vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
+ ip6_ethernet_mac_address_from_link_local_address (mac, a);
+ vlib_cli_output (vm, "Original MAC address: %U",
+ format_ethernet_address, mac);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command converts the given MAC Address into an IPv6 link-local
+ * address.
+ *
+ * @cliexpar
+ * Example of how to create an IPv6 link-local address:
+ * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
+ * Link local address: fe80::14d9:e0ff:fe91:7986
+ * Original MAC address: 16:d9:e0:91:79:86
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_link_command, static) =
+{
+ .path = "test ip6 link",
+ .function = test_ip6_link_command_fn,
+ .short_help = "test ip6 link <mac-address>",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
+ flow_hash_config);
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case -1:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv6 fields used by the flow hash.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
+ *
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip6 fib}
+ * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @::a:1:1:0:4/126
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
+ * [0] [@4]: ipv6-glean: af_packet0
+ * @::a:1:1:0:7/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
+ * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * fe80::fe:3eff:fe3e:9222/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
+ * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
+{
+ .path = "set ip6 flow-hash",
+ .short_help =
+ "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip6_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ip6_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip6_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ {
+
+ u32 node_index = vlib_get_node (vm,
+ ip6_local_node.index)->
+ next_nodes[lm->local_next_by_ip_protocol[i]];
+ vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
+ node_index);
+ }
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv6 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip6 local}
+ * Protocols handled by ip6_local
+ * 17
+ * 43
+ * 58
+ * 115
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_local, static) =
+{
+ .path = "show ip6 local",
+ .function = show_ip6_local_command_fn,
+ .short_help = "show ip6 local",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip6_main_t *ipm = &ip6_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip6_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip6_interface_first_address (ipm, sw_if_index);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
+{
+ .path = "set ip6 classify",
+ .short_help =
+ "set ip6 classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip6_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ ip6_main_t *im = &ip6_main;
+ uword heapsize = 0;
+ u32 tmp;
+ u32 nbuckets = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hash-buckets %d", &tmp))
+ nbuckets = tmp;
+ else if (unformat (input, "heap-size %dm", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dM", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dg", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else if (unformat (input, "heap-size %dG", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ im->lookup_table_nbuckets = nbuckets;
+ im->lookup_table_size = heapsize;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
new file mode 100644
index 00000000..14fbb392
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/classify/vnet_classify.h>
+
+/**
+ * @file
+ * @brief In-band OAM (iOAM).
+ *
+ * In-band OAM (iOAM) is an implementation study to record operational
+ * information in the packet while the packet traverses a path between
+ * two points in the network.
+ *
+ * VPP can function as in-band OAM encapsulating, transit and
+ * decapsulating node. In this version of VPP in-band OAM data is
+ * transported as options in an IPv6 hop-by-hop extension header. Hence
+ * in-band OAM can be enabled for IPv6 traffic.
+ */
+
+ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+#define foreach_ip6_hbyh_ioam_input_next \
+ _(IP6_REWRITE, "ip6-rewrite") \
+ _(IP6_LOOKUP, "ip6-lookup") \
+ _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ IP6_HBYH_IOAM_INPUT_N_NEXT,
+} ip6_hbyh_ioam_input_next_t;
+
+static uword
+unformat_opaque_ioam (unformat_input_t * input, va_list * args)
+{
+ u64 *opaquep = va_arg (*args, u64 *);
+ u8 *flow_name = NULL;
+ uword ret = 0;
+
+ if (unformat (input, "ioam-encap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (1, flow_name);
+ ret = 1;
+ }
+ else if (unformat (input, "ioam-decap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (0, flow_name);
+ ret = 1;
+ }
+
+ vec_free (flow_name);
+ return ret;
+}
+
+u8 *
+get_flow_name_from_flow_ctx (u32 flow_ctx)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return (flow->flow_name);
+}
+
+/* The main h-b-h tracer will be invoked, no need to do much here */
+int
+ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * rewrite_size))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Already registered */
+ if (hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = rewrite_options;
+ hm->options_size[option] = size;
+
+ return (0);
+}
+
+int
+ip6_hbh_add_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Not registered */
+ if (!hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = NULL;
+ hm->options_size[option] = 0;
+ return (0);
+}
+
+/* Config handler registration */
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Already registered */
+ if (hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = config_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_config_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Not registered */
+ if (!hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = NULL;
+ return (0);
+}
+
+/* Flow handler registration */
+int
+ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx, u8 add))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Already registered */
+ if (hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = ioam_flow_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_flow_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Not registered */
+ if (!hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = NULL;
+ return (0);
+}
+
+typedef struct
+{
+ u32 next_index;
+} ip6_add_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_add_hop_by_hop_trace_t *t = va_arg (*args,
+ ip6_add_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_add_hop_by_hop_node;
+
+#define foreach_ip6_add_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+ IP6_ADD_HOP_BY_HOP_N_ERROR,
+} ip6_add_hop_by_hop_error_t;
+
+static char *ip6_add_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u8 *rewrite = hm->rewrite;
+ u32 rewrite_length = vec_len (rewrite);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+
+ copy_dst1[0] = copy_src1[0];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[4] = copy_src1[4];
+
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ vlib_buffer_advance (b1, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ clib_memcpy (hbh1, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ hbh1->protocol = ip1->protocol;
+ ip0->protocol = 0;
+ ip1->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+ next1 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+ processed += 2;
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_src0, *copy_dst0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ processed++;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
+ IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+{
+ .function = ip6_add_hop_by_hop_node_fn,.name =
+ "ip6-add-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_add_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (ip6_add_hop_by_hop_error_strings),.error_strings =
+ ip6_add_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+ .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT,.next_nodes =
+ {
+#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ }
+,};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node,
+ ip6_add_hop_by_hop_node_fn);
+/* The main h-b-h tracer was already invoked, no need to do much here */
+typedef struct
+{
+ u32 next_index;
+} ip6_pop_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_pop_hop_by_hop_trace_t *t =
+ va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_POP_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+int
+ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Already registered */
+ if (hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = options;
+
+ return (0);
+}
+
+int
+ip6_hbh_pop_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Not registered */
+ if (!hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = NULL;
+ return (0);
+}
+
+vlib_node_registration_t ip6_pop_hop_by_hop_node;
+
+#define foreach_ip6_pop_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
+_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") \
+_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process")
+
+typedef enum
+{
+#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+ IP6_POP_HOP_BY_HOP_N_ERROR,
+} ip6_pop_hop_by_hop_error_t;
+
+static char *ip6_pop_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+};
+
+static inline void
+ioam_pop_hop_by_hop_processing (vlib_main_t * vm,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ vlib_buffer_t * b)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ if (!hbh0 || !ip0)
+ return;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->pop_options[type0])
+ {
+ if ((*hm->pop_options[type0]) (b, ip0, opt0) < 0)
+ {
+ vlib_node_increment_counter (vm,
+ ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED,
+ 1);
+ }
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+}
+
+static uword
+ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u32 no_header = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 adj_index0, adj_index1;
+ ip6_header_t *ip0, *ip1;
+ ip_adjacency_t *adj0, *adj1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_dst0, *copy_src0, *copy_dst1, *copy_src1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ ioam_pop_hop_by_hop_processing (vm, ip1, hbh1, b1);
+
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ vlib_buffer_advance (b1, (hbh1->length + 1) << 3);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
+ ((hbh1->length + 1) << 3);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ ip0->protocol = hbh0->protocol;
+ ip1->protocol = hbh1->protocol;
+
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ copy_dst1 = copy_src1 + (hbh1->length + 1);
+ copy_dst1[4] = copy_src1[4];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[0] = copy_src1[0];
+ processed += 2;
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 adj_index0;
+ ip6_header_t *ip0;
+ ip_adjacency_t *adj0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+
+ /* Default use the next_index from the adjacency. */
+ next0 = adj0->lookup_next_index;
+
+ /* Perfectly normal to end up here w/ out h-b-h header */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ /* Pop the trace data */
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->protocol = hbh0->protocol;
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ processed++;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
+{
+ .function = ip6_pop_hop_by_hop_node_fn,.name =
+ "ip6-pop-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_pop_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.sibling_of = "ip6-lookup",.n_errors =
+ ARRAY_LEN (ip6_pop_hop_by_hop_error_strings),.error_strings =
+ ip6_pop_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+.n_next_nodes = 0,};
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node,
+ ip6_pop_hop_by_hop_node_fn);
+static clib_error_t *
+ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ hm->unix_time_0 = (u32) time (0); /* Store starting time */
+ hm->vlib_time_0 = vlib_time_now (vm);
+ hm->ioam_flag = IOAM_HBYH_MOD;
+ memset (hm->add_options, 0, sizeof (hm->add_options));
+ memset (hm->pop_options, 0, sizeof (hm->pop_options));
+ memset (hm->options_size, 0, sizeof (hm->options_size));
+
+ vnet_classify_register_unformat_opaque_index_fn (unformat_opaque_ioam);
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init);
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seqno_option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *rewrite = NULL;
+ u32 size, rnd_size;
+ ip6_hop_by_hop_header_t *hbh;
+ u8 *current;
+ u8 *trace_data_size = NULL;
+ u8 *pot_data_size = NULL;
+
+ vec_free (*rwp);
+
+ if (has_trace_option == 0 && has_pot_option == 0)
+ return -1;
+
+ /* Work out how much space we need */
+ size = sizeof (ip6_hop_by_hop_header_t);
+
+ //if (has_trace_option && hm->get_sizeof_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ if (has_trace_option
+ && hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ }
+
+ if (has_seqno_option)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ /* Round to a multiple of 8 octets */
+ rnd_size = (size + 7) & ~7;
+
+ /* allocate it, zero-fill / pad by construction */
+ vec_validate (rewrite, rnd_size - 1);
+
+ hbh = (ip6_hop_by_hop_header_t *) rewrite;
+ /* Length of header in 8 octet units, not incl first 8 octets */
+ hbh->length = (rnd_size >> 3) - 1;
+ current = (u8 *) (hbh + 1);
+
+ if (has_trace_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ if (0 != (hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]))
+ {
+ trace_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (current,
+ trace_data_size))
+ current += *trace_data_size;
+ }
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ pot_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (current,
+ pot_data_size))
+ current += *pot_data_size;
+ }
+
+ if (has_seqno_option &&
+ (hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] != 0))
+ {
+ if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] (current,
+ &
+ (hm->options_size
+ [HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])))
+ current += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ *rwp = rewrite;
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_fn (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ vec_free (hm->rewrite);
+ hm->rewrite = 0;
+ hm->has_trace_option = 0;
+ hm->has_pot_option = 0;
+ hm->has_seqno_option = 0;
+ hm->has_analyse_option = 0;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &hm->has_analyse_option,
+ 1);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (clear_ioam_rewrite_fn ());
+}
+
+/*?
+ * This command clears all the In-band OAM (iOAM) features enabled by
+ * the '<em>set ioam rewrite</em>' command. Use '<em>show ioam summary</em>' to
+ * verify the configured settings cleared.
+ *
+ * @cliexpar
+ * Example of how to clear iOAM features:
+ * @cliexcmd{clear ioam rewrite}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
+ .path = "clear ioam rewrite",
+ .short_help = "clear ioam rewrite",
+ .function = clear_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_ioam_enable (int has_trace_option, int has_pot_option,
+ int has_seqno_option, int has_analyse_option)
+{
+ int rv;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ rv = ip6_ioam_set_rewrite (&hm->rewrite, has_trace_option,
+ has_pot_option, has_seqno_option);
+
+ switch (rv)
+ {
+ case 0:
+ if (has_trace_option)
+ {
+ hm->has_trace_option = has_trace_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL,
+ 0);
+ }
+
+ if (has_pot_option)
+ {
+ hm->has_pot_option = has_pot_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL,
+ 0);
+ }
+ hm->has_analyse_option = has_analyse_option;
+ if (has_seqno_option)
+ {
+ hm->has_seqno_option = has_seqno_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &has_analyse_option,
+ 0);
+ }
+ }
+ break;
+
+ default:
+ return clib_error_return_code (0, rv, 0,
+ "ip6_ioam_set_rewrite returned %d", rv);
+ }
+
+ return 0;
+}
+
+
+static clib_error_t *
+ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_seqno_option = 0;
+ int has_analyse_option = 0;
+ clib_error_t *rv = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "seqno"))
+ has_seqno_option = 1;
+ else if (unformat (input, "analyse"))
+ has_analyse_option = 1;
+ else
+ break;
+ }
+
+
+ rv = ip6_ioam_enable (has_trace_option, has_pot_option,
+ has_seqno_option, has_analyse_option);
+
+ return rv;
+}
+
+/*?
+ * This command is used to enable In-band OAM (iOAM) features on IPv6.
+ * '<em>trace</em>' is used to enable iOAM trace feature. '<em>pot</em>' is used to
+ * enable the Proof Of Transit feature. '<em>ppc</em>' is used to indicate the
+ * Per Packet Counter feature for Edge to Edge processing. '<em>ppc</em>' is
+ * used to indicate if this node is an '<em>encap</em>' node (iOAM edge node
+ * where packet enters iOAM domain), a '<em>decap</em>' node (iOAM edge node
+ * where packet leaves iOAM domain) or '<em>none</em>' (iOAM node where packet
+ * is in-transit through the iOAM domain). '<em>ppc</em>' can only be set if
+ * '<em>trace</em>' or '<em>pot</em>' is enabled.
+ *
+ * Use '<em>clear ioam rewrite</em>' to disable all features enabled by this
+ * command. Use '<em>show ioam summary</em>' to verify the configured settings.
+ *
+ * @cliexpar
+ * Example of how to enable trace and pot with ppc set to encap:
+ * @cliexcmd{set ioam rewrite trace pot ppc encap}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
+ .path = "set ioam rewrite",
+ .short_help = "set ioam [trace] [pot] [seqno] [analyse]",
+ .function = ip6_set_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *s = 0;
+
+
+ if (!is_zero_ip6_address (&hm->adj))
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - \n");
+ s = format (s, " Destination Address : %U\n",
+ format_ip6_address, &hm->adj, sizeof (ip6_address_t));
+ s =
+ format (s, " Flow operation : %d (%s)\n",
+ hm->ioam_flag,
+ (hm->ioam_flag ==
+ IOAM_HBYH_ADD) ? "Add" : ((hm->ioam_flag ==
+ IOAM_HBYH_MOD) ? "Mod" : "Pop"));
+ }
+ else
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - Not configured\n");
+ }
+
+
+ s = format (s, " TRACE OPTION - %d (%s)\n",
+ hm->has_trace_option,
+ (hm->has_trace_option ? "Enabled" : "Disabled"));
+ if (hm->has_trace_option)
+ s =
+ format (s,
+ "Try 'show ioam trace and show ioam-trace profile' for more information\n");
+
+
+ s = format (s, " POT OPTION - %d (%s)\n",
+ hm->has_pot_option,
+ (hm->has_pot_option ? "Enabled" : "Disabled"));
+ if (hm->has_pot_option)
+ s =
+ format (s,
+ "Try 'show ioam pot and show pot profile' for more information\n");
+
+ s = format (s, " EDGE TO EDGE - SeqNo OPTION - %d (%s)\n",
+ hm->has_seqno_option,
+ hm->has_seqno_option ? "Enabled" : "Disabled");
+ if (hm->has_seqno_option)
+ s = format (s, "Try 'show ioam e2e' for more information\n");
+
+ s = format (s, " iOAM Analyse OPTION - %d (%s)\n",
+ hm->has_analyse_option,
+ hm->has_analyse_option ? "Enabled" : "Disabled");
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command displays the current configuration data for In-band
+ * OAM (iOAM).
+ *
+ * @cliexpar
+ * Example to show the iOAM configuration:
+ * @cliexstart{show ioam summary}
+ * REWRITE FLOW CONFIGS -
+ * Destination Address : ff02::1
+ * Flow operation : 2 (Pop)
+ * TRACE OPTION - 1 (Enabled)
+ * Try 'show ioam trace and show ioam-trace profile' for more information
+ * POT OPTION - 1 (Enabled)
+ * Try 'show ioam pot and show pot profile' for more information
+ * EDGE TO EDGE - PPC OPTION - 1 (Encap)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
+ .path = "show ioam summary",
+ .short_help = "show ioam summary",
+ .function = ip6_show_ioam_summary_cmd_fn,
+};
+/* *INDENT-ON* */
+
+void
+vnet_register_ioam_end_of_path_callback (void *cb)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ hm->ioam_end_of_path_cb = cb;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.h b/src/vnet/ip/ip6_hop_by_hop.h
new file mode 100644
index 00000000..5f12f647
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_ioam_h__
+#define __included_ip6_hop_by_hop_ioam_h__
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/ip.h>
+
+
+#define MAX_IP6_HBH_OPTION 256
+
+/* To determine whether a node is decap MS bit is set */
+#define IOAM_DECAP_BIT 0x80000000
+
+#define IOAM_DEAP_ENABLED(opaque_data) (opaque_data & IOAM_DECAP_BIT)
+
+#define IOAM_SET_DECAP(opaque_data) \
+ (opaque_data |= IOAM_DECAP_BIT)
+
+#define IOAM_MASK_DECAP_BIT(x) (x & ~IOAM_DECAP_BIT)
+
+/*
+ * Stores the run time flow data of hbh options
+ */
+typedef struct
+{
+ u32 ctx[MAX_IP6_HBH_OPTION];
+ u8 flow_name[64];
+} flow_data_t;
+
+typedef struct
+{
+ /* The current rewrite we're using */
+ u8 *rewrite;
+
+ /* Trace data processing callback */
+ void *ioam_end_of_path_cb;
+ /* Configuration data */
+ /* Adjacency */
+ ip6_address_t adj;
+#define IOAM_HBYH_ADD 0
+#define IOAM_HBYH_MOD 1
+#define IOAM_HBYH_POP 2
+ u8 ioam_flag;
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+
+ /* Trace option */
+ u8 has_trace_option;
+
+ /* Pot option */
+ u8 has_pot_option;
+
+ /* Per Packet Counter option */
+ u8 has_seqno_option;
+
+ /* Enabling analyis of iOAM data on decap node */
+ u8 has_analyse_option;
+
+ /* Array of function pointers to ADD and POP HBH option handling routines */
+ u8 options_size[MAX_IP6_HBH_OPTION];
+ int (*add_options[MAX_IP6_HBH_OPTION]) (u8 * rewrite_string,
+ u8 * rewrite_size);
+ int (*pop_options[MAX_IP6_HBH_OPTION]) (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ int (*get_sizeof_options[MAX_IP6_HBH_OPTION]) (u32 * rewrite_size);
+ int (*config_handler[MAX_IP6_HBH_OPTION]) (void *data, u8 disable);
+
+ /* Array of function pointers to handle hbh options being used with classifier */
+ u32 (*flow_handler[MAX_IP6_HBH_OPTION]) (u32 flow_ctx, u8 add);
+ flow_data_t *flows;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_hop_by_hop_ioam_main_t;
+
+extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+extern clib_error_t *ip6_ioam_enable (int has_trace_option,
+ int has_pot_option,
+ int has_seqno_option,
+ int has_analyse_option);
+
+extern int ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width,
+ u32 vrf_id, int is_add, int is_pop,
+ int is_none);
+
+extern clib_error_t *clear_ioam_rewrite_fn (void);
+
+static inline u8
+is_zero_ip4_address (ip4_address_t * a)
+{
+ return (a->as_u32 == 0);
+}
+
+static inline void
+copy_ip6_address (ip6_address_t * dst, ip6_address_t * src)
+{
+ dst->as_u64[0] = src->as_u64[0];
+ dst->as_u64[1] = src->as_u64[1];
+}
+
+static inline void
+set_zero_ip6_address (ip6_address_t * a)
+{
+ a->as_u64[0] = 0;
+ a->as_u64[1] = 0;
+}
+
+static inline u8
+cmp_ip6_address (ip6_address_t * a1, ip6_address_t * a2)
+{
+ return ((a1->as_u64[0] == a2->as_u64[0])
+ && (a1->as_u64[1] == a2->as_u64[1]));
+}
+
+static inline u8
+is_zero_ip6_address (ip6_address_t * a)
+{
+ return ((a->as_u64[0] == 0) && (a->as_u64[1] == 0));
+}
+
+int ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * size));
+int ip6_hbh_add_unregister_option (u8 option);
+
+int ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_pop_unregister_option (u8 option);
+
+int
+ip6_hbh_get_sizeof_register_option (u8 option,
+ int get_sizeof_hdr_options (u32 *
+ rewrite_size));
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seq_no);
+
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable));
+
+int ip6_hbh_config_handler_unregister (u8 option);
+
+int ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx,
+ u8 add));
+
+int ip6_hbh_flow_handler_unregister (u8 option);
+
+u8 *get_flow_name_from_flow_ctx (u32 flow_ctx);
+
+static inline flow_data_t *
+get_flow (u32 index)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return flow;
+}
+
+static inline u32
+get_flow_data_from_flow_ctx (u32 flow_ctx, u8 option)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+ //flow = pool_elt_at_index (hm->flows, index);
+ flow = &hm->flows[index];
+ return (flow->ctx[option]);
+}
+
+static inline u8
+is_seqno_enabled (void)
+{
+ return (ip6_hop_by_hop_ioam_main.has_seqno_option);
+}
+
+int ip6_trace_profile_setup ();
+
+static inline u32
+ioam_flow_add (u8 encap, u8 * flow_name)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ flow_data_t *flow = 0;
+ u32 index = 0;
+ u8 i;
+
+ pool_get (hm->flows, flow);
+ memset (flow, 0, sizeof (flow_data_t));
+
+ index = flow - hm->flows;
+ strncpy ((char *) flow->flow_name, (char *) flow_name, 31);
+
+ if (!encap)
+ IOAM_SET_DECAP (index);
+
+ for (i = 0; i < 255; i++)
+ {
+ if (hm->flow_handler[i])
+ flow->ctx[i] = hm->flow_handler[i] (index, 1);
+ }
+ return (index);
+}
+
+always_inline ip6_hop_by_hop_option_t *
+ip6_hbh_get_option (ip6_hop_by_hop_header_t * hbh0, u8 option_to_search)
+{
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ if (!hbh0)
+ return NULL;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (type0 == option_to_search)
+ return opt0;
+ break;
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return NULL;
+}
+
+#endif /* __included_ip6_hop_by_hop_ioam_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop_packet.h b/src/vnet/ip/ip6_hop_by_hop_packet.h
new file mode 100644
index 00000000..dd8c7d5e
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop_packet.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_packet_h__
+#define __included_ip6_hop_by_hop_packet_h__
+
+typedef struct
+{
+ /* Protocol for next header */
+ u8 protocol;
+ /*
+ * Length of hop_by_hop header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+} ip6_hop_by_hop_header_t;
+
+typedef struct
+{
+ /* Option Type */
+#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x00)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x40)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x80)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0xc0)
+#define HBH_OPTION_TYPE_HIGH_ORDER_BITS (0xc0)
+#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5)
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} ip6_hop_by_hop_option_t;
+
+/* $$$$ IANA banana constants */
+#define HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST 59 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE 29
+
+#endif /* __included_ip6_hop_by_hop_packet_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
new file mode 100644
index 00000000..ffdc4727
--- /dev/null
+++ b/src/vnet/ip/ip6_input.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_input.c: IP v6 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip6_input_trace_t;
+
+static u8 *
+format_ip6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip6_input_trace_t *t = va_arg (*va, ip6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP6_INPUT_NEXT_DROP,
+ IP6_INPUT_NEXT_LOOKUP,
+ IP6_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP6_INPUT_NEXT_ICMP_ERROR,
+ IP6_INPUT_N_NEXT,
+} ip6_input_next_t;
+
+/* Validate IP v6 packets and pass them either to forwarding code
+ or drop exception packets. */
+static uword
+ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip6_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip6_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP6);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u32 pi1, sw_if_index1, next1 = 0;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = from[0];
+ pi1 = from[1];
+
+ to_next[0] = pi0;
+ to_next[1] = pi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip1->dst_address)))
+ {
+ arc1 = lm->mcast_feature_arc_index;
+ next1 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc1 = lm->ucast_feature_arc_index;
+ next1 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+ error1 =
+ (clib_net_to_host_u32
+ (ip1->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error1;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+ error1 = ip1->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error1;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error1 =
+ p1->current_length <
+ sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ if (PREDICT_FALSE (error1 != IP6_ERROR_NONE))
+ {
+ if (error1 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next1 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ error0 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static char *ip6_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_input_node) = {
+ .function = ip6_input,
+ .name = "ip6-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP6_N_ERROR,
+ .error_strings = ip6_error_strings,
+
+ .n_next_nodes = IP6_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP6_INPUT_NEXT_DROP] = "error-drop",
+ [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup",
+ [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_INPUT_NEXT_LOOKUP_MULTICAST] = "ip6-mfib-forward-lookup",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_input_node, ip6_input)
+ static clib_error_t *ip6_init (vlib_main_t * vm)
+{
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, ip6_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6, ip6_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6, ip6_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_input_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Set flow hash to something non-zero. */
+ ip6_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default hop limit for packets we generate. */
+ ip6_main.host_config.ttl = 64;
+
+ return /* no error */ 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
new file mode 100644
index 00000000..d549ac37
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -0,0 +1,4332 @@
+/*
+ * ip/ip6_neighbor.c: IP6 neighbor handling
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/mhash.h>
+#include <vppinfra/md5.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
+
+/**
+ * @file
+ * @brief IPv6 Neighbor Adjacency and Neighbor Discovery.
+ *
+ * The files contains the API and CLI code for managing IPv6 neighbor
+ * adjacency tables and neighbor discovery logic.
+ */
+
+/* can't use sizeof link_layer_address, that's 8 */
+#define ETHER_MAC_ADDR_LEN 6
+
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+
+typedef struct
+{
+ /* group information */
+ u8 type;
+ ip6_address_t mcast_address;
+ u16 num_sources;
+ ip6_address_t *mcast_source_address_pool;
+} ip6_mldp_group_t;
+
+/* configured router advertisement information per ipv6 interface */
+typedef struct
+{
+
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* source link layer option */
+ u8 link_layer_address[8];
+ u8 link_layer_addr_len;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool. */
+ mhash_t address_to_prefix_index;
+
+ /* MLDP group information */
+ ip6_mldp_group_t *mldp_group_pool;
+
+ /* Hash table mapping address to index in mldp address pool. */
+ mhash_t address_to_mldp_index;
+
+ /* local information */
+ u32 sw_if_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int all_routers_mcast;
+ u32 seed;
+ u64 randomizer;
+ int ref_count;
+ adj_index_t mcast_adj_index;
+
+ /* timing information */
+#define DEF_MAX_RADV_INTERVAL 200
+#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
+#define DEF_CURR_HOP_LIMIT 64
+#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL
+#define MAX_DEF_RTR_LIFETIME 9000
+
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */
+#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */
+#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */
+#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
+#define MAX_RA_DELAY_TIME .5 /* seconds */
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* Link local address to use (defaults to underlying physical for logical interfaces */
+ ip6_address_t link_local_address;
+} ip6_radv_t;
+
+typedef struct
+{
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+ /* Used for nd event notification only */
+ void *data_callback;
+ u32 pid;
+} pending_resolution_t;
+
+
+typedef struct
+{
+ /* Hash tables mapping name to opcode. */
+ uword *opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ mhash_t pending_resolutions_by_address;
+ pending_resolution_t *pending_resolutions;
+
+ /* Mac address change notification */
+ mhash_t mac_changes_by_address;
+ pending_resolution_t *mac_changes;
+
+ u32 *neighbor_input_next_index_by_hw_if_index;
+
+ ip6_neighbor_t *neighbor_pool;
+
+ mhash_t neighbor_index_by_key;
+
+ u32 *if_radv_pool_index_by_sw_if_index;
+
+ ip6_radv_t *if_radv_pool;
+
+ /* Neighbor attack mitigation */
+ u32 limit_neighbor_cache_size;
+ u32 neighbor_delete_rotor;
+
+ /* Wildcard nd report publisher */
+ uword wc_ip6_nd_publisher_node;
+ uword wc_ip6_nd_publisher_et;
+} ip6_neighbor_main_t;
+
+/* ipv6 neighbor discovery - timer/event types */
+typedef enum
+{
+ ICMP6_ND_EVENT_INIT,
+} ip6_icmp_neighbor_discovery_event_type_t;
+
+typedef union
+{
+ u32 add_del_swindex;
+ struct
+ {
+ u32 up_down_swindex;
+ u32 fib_index;
+ } up_down_event;
+} ip6_icmp_neighbor_discovery_event_data_t;
+
+static ip6_neighbor_main_t ip6_neighbor_main;
+static ip6_address_t ip6a_zero; /* ip6 address 0 */
+
+static void wc_nd_signal_report (wc_nd_report_t * r);
+
+/**
+ * @brief publish wildcard arp event
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_nd_wc_publish (u32 sw_if_index, u8 * mac, ip6_address_t * ip6)
+{
+ wc_nd_report_t r = {
+ .sw_if_index = sw_if_index,
+ .ip6 = *ip6,
+ };
+ memcpy (r.mac, mac, sizeof r.mac);
+
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+ vl_api_rpc_call_main_thread (wc_nd_signal_report, (u8 *) & r, sizeof r);
+ return 0;
+}
+
+static void
+wc_nd_signal_report (wc_nd_report_t * r)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword ni = nm->wc_ip6_nd_publisher_node;
+ uword et = nm->wc_ip6_nd_publisher_et;
+
+ if (ni == (uword) ~ 0)
+ return;
+ wc_nd_report_t *q =
+ vlib_process_signal_event_data (vm, ni, et, 1, sizeof *q);
+
+ *q = *r;
+}
+
+void
+wc_nd_set_publisher_node (uword node_index, uword event_type)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ nm->wc_ip6_nd_publisher_node = node_index;
+ nm->wc_ip6_nd_publisher_et = event_type;
+}
+
+static u8 *
+format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ ip6_neighbor_t *n = va_arg (*va, ip6_neighbor_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ u8 *flags = 0;
+
+ if (!n)
+ return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags",
+ "Link layer", "Interface");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_STATIC)
+ flags = format (flags, "S");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY)
+ flags = format (flags, "N");
+
+ si = vnet_get_sw_interface (vnm, n->key.sw_if_index);
+ s = format (s, "%=12U%=20U%=6s%=20U%=40U",
+ format_vlib_cpu_time, vm, n->cpu_time_last_updated,
+ format_ip6_address, &n->key.ip6_address,
+ flags ? (char *) flags : "",
+ format_ethernet_address, n->link_layer_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ vec_free (flags);
+ return s;
+}
+
+static void
+ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, u32 fib_index)
+{
+ if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+ fib_table_entry_path_remove (fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP6,
+ &pfx.fp_addr,
+ n->key.sw_if_index, ~0,
+ 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+}
+
+static clib_error_t *
+ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_delete, n - nm->neighbor_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ ip6_neighbor_adj_fib_remove (n,
+ ip6_fib_table_get_index_for_sw_if_index
+ (n->key.sw_if_index));
+ pool_put (nm->neighbor_pool, n);
+ }
+ vec_free (to_delete);
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down);
+
+static void
+unset_random_neighbor_entry (void)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_t *e;
+ u32 index;
+
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (nm->neighbor_pool, index);
+
+ vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index,
+ &e->key.ip6_address,
+ e->link_layer_address,
+ ETHER_MAC_ADDR_LEN);
+}
+
+typedef struct
+{
+ u8 is_add;
+ u8 is_static;
+ u8 is_no_fib_entry;
+ u8 link_layer_address[6];
+ u32 sw_if_index;
+ ip6_address_t addr;
+} ip6_neighbor_set_unset_rpc_args_t;
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a);
+
+static void set_unset_ip6_neighbor_rpc
+ (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a, u8 * link_layer_address, int is_add, int is_static,
+ int is_no_fib_entry)
+{
+ ip6_neighbor_set_unset_rpc_args_t args;
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+ args.sw_if_index = sw_if_index;
+ args.is_add = is_add;
+ args.is_static = is_static;
+ args.is_no_fib_entry = is_no_fib_entry;
+ clib_memcpy (&args.addr, a, sizeof (*a));
+ if (NULL != link_layer_address)
+ clib_memcpy (args.link_layer_address, link_layer_address, 6);
+
+ vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback,
+ (u8 *) & args, sizeof (args));
+}
+
+static void
+ip6_nbr_probe (ip_adjacency_t * adj)
+{
+ icmp6_neighbor_solicitation_header_t *h;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *dst, *src;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ int bogus_length;
+ vlib_main_t *vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main ();
+
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+ dst = &adj->sub_type.nbr.next_hop.ip6;
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+ src = ip6_interface_address_matching_destination (im, dst,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address, vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+ adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ nbr->key.sw_if_index,
+ adj_get_link_type (ai),
+ nbr->link_layer_address));
+}
+
+static void
+ip6_nd_mk_incomplete (adj_index_t ai)
+{
+ ip_adjacency_t *adj = adj_get (ai);
+
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ adj->rewrite_header.
+ sw_if_index,
+ adj_get_link_type (ai),
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \
+{ \
+ k.sw_if_index = sw_if_index; \
+ k.ip6_address = *addr; \
+ k.pad = 0; \
+}
+
+static ip6_neighbor_t *
+ip6_nd_find (u32 sw_if_index, const ip6_address_t * addr)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n = NULL;
+ ip6_neighbor_key_t k;
+ uword *p;
+
+ IP6_NBR_MK_KEY (k, sw_if_index, addr);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ }
+
+ return (n);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_neighbor_t *nbr = ctx;
+
+ ip6_nd_mk_complete (ai, nbr);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_nd_mk_incomplete (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip6_neighbor_t *nbr;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ if (NULL != nbr)
+ {
+ adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+ ip6_nd_mk_complete_walk, nbr);
+ }
+ else
+ {
+ /*
+ * no matching ND entry.
+ * construct the rewrite required to for an ND packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnm,
+ sw_if_index,
+ VNET_LINK_IP6,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it may
+ * want to forward traffic sometime soon. Let's send a speculative ND.
+ * just one. If we were to do periodically that wouldn't be bad either,
+ * but that's more code than i'm prepared to write at this time for
+ * relatively little reward.
+ */
+ ip6_nbr_probe (adj);
+ }
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ {
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ */
+ u8 *rewrite;
+ u8 offset;
+
+ rewrite = ethernet_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ ethernet_ip6_mcast_dst_addr ());
+
+ /*
+ * Complete the remaining fields of the adj's rewrite to direct the
+ * complete of the rewrite at switch time by copying in the IP
+ * dst address's bytes.
+ * Ofset is 2 bytes into the desintation address. And we write 4 bytes.
+ */
+ offset = vec_len (rewrite) - 2;
+ adj_mcast_update_rewrite (ai, rewrite, offset, 0xffffffff);
+
+ break;
+ }
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+}
+
+
+static void
+ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+
+ n->fib_entry_index =
+ fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP6, &pfx.fp_addr,
+ n->key.sw_if_index, ~0, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static, int is_no_fib_entry)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n = 0;
+ int make_new_nd_cache_entry = 1;
+ uword *p;
+ u32 next_index;
+ pending_resolution_t *pr, *mc;
+
+ if (vlib_get_thread_index ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 1 /* set new neighbor */ , is_static,
+ is_no_fib_entry);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ /* Refuse to over-write static neighbor entry. */
+ if (!is_static && (n->flags & IP6_NEIGHBOR_FLAG_STATIC))
+ return -2;
+ make_new_nd_cache_entry = 0;
+ }
+
+ if (make_new_nd_cache_entry)
+ {
+ pool_get (nm->neighbor_pool, n);
+ mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
+ /* old value */ 0);
+ n->key = k;
+ n->fib_entry_index = FIB_NODE_INDEX_INVALID;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+
+ /*
+ * create the adj-fib. the entry in the FIB table for and to the peer.
+ */
+ if (!is_no_fib_entry)
+ {
+ ip6_neighbor_adj_fib_add (n,
+ ip6_fib_table_get_index_for_sw_if_index
+ (n->key.sw_if_index));
+ }
+ else
+ {
+ n->flags |= IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY;
+ }
+ }
+ else
+ {
+ /*
+ * prevent a DoS attack from the data-plane that
+ * spams us with no-op updates to the MAC address
+ */
+ if (0 == memcmp (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address))
+ goto check_customers;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+ }
+
+ /* Update time stamp and flags. */
+ n->cpu_time_last_updated = clib_cpu_time_now ();
+ if (is_static)
+ n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
+ else
+ n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_complete_walk, n);
+
+check_customers:
+ /* Customer(s) waiting for this address to be resolved? */
+ p = mhash_get (&nm->pending_resolutions_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ pr = pool_elt_at_index (nm->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque, pr->data);
+ next_index = pr->next_index;
+ pool_put (nm->pending_resolutions, pr);
+ }
+
+ mhash_unset (&nm->pending_resolutions_by_address, a, 0);
+ }
+
+ /* Customer(s) requesting ND event for this address? */
+ p = mhash_get (&nm->mac_changes_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, ip6_address_t *);
+ int rv = 1;
+ mc = pool_elt_at_index (nm->mac_changes, next_index);
+ fp = mc->data_callback;
+
+ /* Call the user's data callback, return 1 to suppress dup events */
+ if (fp)
+ rv =
+ (*fp) (mc->data, link_layer_address, sw_if_index, &ip6a_zero);
+ /*
+ * Signal the resolver process, as long as the user
+ * says they want to be notified
+ */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ return 0;
+}
+
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n;
+ uword *p;
+ int rv = 0;
+
+ if (vlib_get_thread_index ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 0 /* unset */ , 0, 0);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p == 0)
+ {
+ rv = -1;
+ goto out;
+ }
+
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_incomplete_walk, NULL);
+
+
+ if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+ fib_table_entry_path_remove
+ (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index),
+ &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP6,
+ &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ pool_put (nm->neighbor_pool, n);
+
+out:
+ return rv;
+}
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ if (a->is_add)
+ vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6, a->is_static,
+ a->is_no_fib_entry);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6);
+}
+
+static int
+ip6_neighbor_sort (void *a1, void *a2)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n1 = a1, *n2 = a2;
+ int cmp;
+ cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index,
+ n2->key.sw_if_index);
+ if (!cmp)
+ cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address);
+ return cmp;
+}
+
+ip6_neighbor_t *
+ip6_neighbors_entries (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n, *ns = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index)
+ continue;
+ vec_add1 (ns, n[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (ns)
+ vec_sort_with_function (ns, ip6_neighbor_sort);
+ return ns;
+}
+
+static clib_error_t *
+show_ip6_neighbors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n, *ns;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ if (ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0);
+ vec_foreach (n, ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n);
+ }
+ vec_free (ns);
+ }
+
+ return error;
+}
+
+/*?
+ * This command is used to display the adjacent IPv6 hosts found via
+ * neighbor discovery. Optionally, limit the output to the specified
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display the IPv6 neighbor adjacency table:
+ * @cliexstart{show ip6 neighbors}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * 886.6654 ::1:1:c:0:9 S 02:fe:e4:45:27:5b GigabitEthernet3/0/0
+ * @cliexend
+ * Example of how to display the IPv6 neighbor adjacency table for given interface:
+ * @cliexstart{show ip6 neighbors GigabitEthernet2/0/0}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = {
+ .path = "show ip6 neighbors",
+ .function = show_ip6_neighbors,
+ .short_help = "show ip6 neighbors [<interface>]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_neighbor (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_address_t addr;
+ u8 mac_address[6];
+ int addr_valid = 0;
+ int is_del = 0;
+ int is_static = 0;
+ int is_no_fib_entry = 0;
+ u32 sw_if_index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* intfc, ip6-address, mac-address */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &addr,
+ unformat_ethernet_address, mac_address))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "static"))
+ is_static = 1;
+ else if (unformat (input, "no-fib-entry"))
+ is_no_fib_entry = 1;
+ else
+ break;
+ }
+
+ if (!addr_valid)
+ return clib_error_return (0, "Missing interface, ip6 or hw address");
+
+ if (!is_del)
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address),
+ is_static, is_no_fib_entry);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address));
+ return 0;
+}
+
+/*?
+ * This command is used to manually add an entry to the IPv6 neighbor
+ * adjacency table. Optionally, the entry can be added as static. It is
+ * also used to remove an entry from the table. Use the '<em>show ip6
+ * neighbors</em>' command to display all learned and manually entered entries.
+ *
+ * @cliexpar
+ * Example of how to add a static entry to the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b static}
+ * Example of how to delete an entry from the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor del GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) =
+{
+ .path = "set ip6 neighbor",
+ .function = set_ip6_neighbor,
+ .short_help = "set ip6 neighbor [del] <interface> <ip6-address> <mac-address> [static]",
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP,
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY,
+ ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+} icmp6_neighbor_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_solicitation)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent;
+ icmp6_neighbor_discovery_option_type_t option_type;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ int bogus_length;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ option_type =
+ (is_solicitation
+ ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address
+ : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address);
+ n_advertisements_sent = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ int is_rewrite0;
+ u32 ni0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ ip6_sadd_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+ ip6_sadd_unspecified =
+ ip6_address_is_unspecified (&ip0->src_address);
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!ip6_sadd_unspecified && !ip6_sadd_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 = adj_get (src_adj_index0);
+
+ /* Allow all realistic-looking rewrite adjacencies to pass */
+ ni0 = adj0->lookup_next_index;
+ is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
+ (ni0 < IP6_LOOKUP_N_NEXT);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || !is_rewrite0)
+ ?
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8 && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* If src address unspecified or link local, donot learn neighbor MAC */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !ip6_sadd_unspecified))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ is_solicitation ?
+ &ip0->src_address :
+ &h0->target_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0, ip6_sadd_link_local);
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ /* Check that target address is local to this router. */
+ fib_node_index_t fei;
+ u32 fib_index;
+
+ fib_index =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ if (~0 == fib_index)
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ fei = ip6_fib_table_lookup_exact_match (fib_index,
+ &h0->target_address,
+ 128);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /* The target address is not in the FIB */
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ if (FIB_ENTRY_FLAG_LOCAL &
+ fib_entry_get_flags_for_source (fei,
+ FIB_SOURCE_INTERFACE))
+ {
+ /* It's an address that belongs to one of our interfaces
+ * that's good. */
+ }
+ else
+ if (fib_entry_is_sourced
+ (fei, FIB_SOURCE_IP6_ND_PROXY))
+ {
+ /* The address was added by IPv6 Proxy ND config.
+ * We should only respond to these if the NS arrived on
+ * the link that has a matching covering prefix */
+ }
+ else
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ }
+ }
+ }
+
+ if (is_solicitation)
+ next0 = (error0 != ICMP6_ERROR_NONE
+ ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
+ : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ else
+ {
+ next0 = 0;
+ error0 = error0 == ICMP6_ERROR_NONE ?
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ ethernet_header_t *eth0;
+
+ /* dst address is either source address or the all-nodes mcast addr */
+ if (!ip6_sadd_unspecified)
+ ip0->dst_address = ip0->src_address;
+ else
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ ip0->src_address = h0->target_address;
+ ip0->hop_limit = 255;
+ h0->icmp.type = ICMP6_neighbor_advertisement;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0 && o0)
+ {
+ clib_memcpy (o0->ethernet_address, eth_if0->address, 6);
+ o0->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ }
+
+ h0->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED
+ | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_advance (p0, -ethernet_buffer_header_size (p0));
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ if (eth_if0)
+ clib_memcpy (eth0->src_address, eth_if0->address, 6);
+
+ /* Setup input and output sw_if_index for packet */
+ ASSERT (vnet_buffer (p0)->sw_if_index[VLIB_RX] == sw_if_index0);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ n_advertisements_sent++;
+ }
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+/* for "syslogging" - use elog for now */
+#define foreach_log_level \
+ _ (DEBUG, "DEBUG") \
+ _ (INFO, "INFORMATION") \
+ _ (NOTICE, "NOTICE") \
+ _ (WARNING, "WARNING") \
+ _ (ERR, "ERROR") \
+ _ (CRIT, "CRITICAL") \
+ _ (ALERT, "ALERT") \
+ _ (EMERG, "EMERGENCY")
+
+typedef enum
+{
+#define _(f,s) LOG_##f,
+ foreach_log_level
+#undef _
+} log_level_t;
+
+static char *log_level_strings[] = {
+#define _(f,s) s,
+ foreach_log_level
+#undef _
+};
+
+static int logmask = 1 << LOG_DEBUG;
+
+static void
+ip6_neighbor_syslog (vlib_main_t * vm, int priority, char *fmt, ...)
+{
+ /* just use elog for now */
+ u8 *what;
+ va_list va;
+
+ if ((priority > LOG_EMERG) || !(logmask & (1 << priority)))
+ return;
+
+ va_start (va, fmt);
+ if (fmt)
+ {
+ what = va_format (0, fmt, &va);
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "ip6 nd: (%s): %s",.format_args = "T4T4",};
+ struct
+ {
+ u32 s[2];
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->s[0] = elog_string (&vm->elog_main, log_level_strings[priority]);
+ ed->s[1] = elog_string (&vm->elog_main, (char *) what);
+ }
+ va_end (va);
+ return;
+}
+
+/* ipv6 neighbor discovery - router advertisements */
+typedef enum
+{
+ ICMP6_ROUTER_SOLICITATION_NEXT_DROP,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX,
+ ICMP6_ROUTER_SOLICITATION_N_NEXT,
+} icmp6_router_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_router_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_sent = 0;
+ int bogus_length;
+
+ icmp6_neighbor_discovery_option_type_t option_type;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ /* source may append his LL address */
+ option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+
+ icmp6_neighbor_discovery_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 is_solicitation = 1, is_dropped = 0;
+ u32 is_unspecified, is_link_local;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+ is_unspecified = ip6_address_is_unspecified (&ip0->src_address);
+ is_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* check if solicitation (not from nd_timer node) */
+ if (ip6_address_is_unspecified (&ip0->dst_address))
+ is_solicitation = 0;
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!is_unspecified && !is_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 = adj_get (src_adj_index0);
+
+ error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0
+ ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ /* check for source LL option and process */
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8
+ && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* if src address unspecified IGNORE any options */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !is_unspecified && !is_link_local))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ &ip0->src_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0, 0);
+ }
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 adj_index0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ p0->current_length -=
+ (options_len0 +
+ sizeof (icmp6_neighbor_discovery_header_t));
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ f64 now = vlib_time_now (vm);
+
+ /* for solicited adverts - need to rate limit */
+ if (is_solicitation)
+ {
+ if (0 != radv_info->last_radv_time &&
+ (now - radv_info->last_radv_time) <
+ MIN_DELAY_BETWEEN_RAS)
+ is_dropped = 1;
+ else
+ radv_info->last_radv_time = now;
+ }
+
+ /* send now */
+ icmp6_router_advertisement_header_t rh;
+
+ rh.icmp.type = ICMP6_router_advertisement;
+ rh.icmp.code = 0;
+ rh.icmp.checksum = 0;
+
+ rh.current_hop_limit = radv_info->curr_hop_limit;
+ rh.router_lifetime_in_sec =
+ clib_host_to_net_u16
+ (radv_info->adv_router_lifetime_in_sec);
+ rh.
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ =
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rh.neighbor_reachable_time_in_msec =
+ clib_host_to_net_u32 (radv_info->
+ adv_neighbor_reachable_time_in_msec);
+
+ rh.flags =
+ (radv_info->adv_managed_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP :
+ 0;
+ rh.flags |=
+ ((radv_info->adv_other_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP :
+ 0);
+
+
+ u16 payload_length =
+ sizeof (icmp6_router_advertisement_header_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &rh,
+ sizeof
+ (icmp6_router_advertisement_header_t));
+
+ if (radv_info->adv_link_layer_address)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ h;
+
+ h.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ h.header.n_data_u64s = 1;
+
+ /* copy ll address */
+ clib_memcpy (&h.ethernet_address[0],
+ eth_if0->address, 6);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t));
+
+ payload_length +=
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t);
+ }
+
+ /* add MTU option */
+ if (radv_info->adv_link_mtu)
+ {
+ icmp6_neighbor_discovery_mtu_option_t h;
+
+ h.unused = 0;
+ h.mtu =
+ clib_host_to_net_u32 (radv_info->adv_link_mtu);
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu;
+ h.header.n_data_u64s = 1;
+
+ payload_length +=
+ sizeof (icmp6_neighbor_discovery_mtu_option_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_mtu_option_t));
+ }
+
+ /* add advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+ if(pr_info->enabled &&
+ (!pr_info->decrement_lifetime_flag
+ || (pr_info->pref_lifetime_expires >0)))
+ {
+ /* advertise this prefix */
+ icmp6_neighbor_discovery_prefix_information_option_t h;
+
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information;
+ h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3);
+
+ h.dst_address_length = pr_info->prefix_len;
+
+ h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0;
+ h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0;
+
+ if(radv_info->cease_radv && pr_info->deprecated_prefix_flag)
+ {
+ h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME);
+ h.preferred_time = 0;
+ }
+ else
+ {
+ if(pr_info->decrement_lifetime_flag)
+ {
+ pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ?
+ (pr_info->valid_lifetime_expires - now) : 0;
+
+ pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ?
+ (pr_info->pref_lifetime_expires - now) : 0;
+ }
+
+ h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs);
+ h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ;
+ }
+ h.unused = 0;
+
+ clib_memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t));
+
+ payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index (p0),
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t));
+
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* add additional options before here */
+
+ /* finish building the router advertisement... */
+ if (!is_unspecified && radv_info->send_unicast)
+ {
+ ip0->dst_address = ip0->src_address;
+ }
+ else
+ {
+ /* target address is all-nodes mcast addr */
+ ip6_set_reserved_multicast_address
+ (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ }
+
+ /* source address MUST be the link-local address */
+ ip0->src_address = radv_info->link_local_address;
+
+ ip0->hop_limit = 255;
+ ip0->payload_length =
+ clib_host_to_net_u16 (payload_length);
+
+ icmp6_router_advertisement_header_t *rh0 =
+ (icmp6_router_advertisement_header_t *) (ip0 + 1);
+ rh0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* setup output if and adjacency */
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ if (is_solicitation)
+ {
+ ethernet_header_t *eth0;
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address,
+ 6);
+ clib_memcpy (eth0->src_address, eth_if0->address,
+ 6);
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ sw_if_index0;
+ }
+ else
+ {
+ adj_index0 = radv_info->mcast_adj_index;
+ if (adj_index0 == 0)
+ error0 = ICMP6_ERROR_DST_LOOKUP_MISS;
+ else
+ {
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ adj_index0;
+ }
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ radv_info->n_solicitations_dropped += is_dropped;
+ radv_info->n_solicitations_rcvd += is_solicitation;
+
+ if ((error0 == ICMP6_ERROR_NONE) && !is_dropped)
+ {
+ radv_info->n_advertisements_sent++;
+ n_advertisements_sent++;
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+ /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
+static_always_inline uword
+icmp6_router_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_rcvd = 0;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+ icmp6_router_advertisement_header_t *h0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Check that source address is link-local */
+ error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ?
+ ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0;
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ n_advertisements_rcvd++;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ /* validate advertised information */
+ if ((h0->current_hop_limit && radv_info->curr_hop_limit)
+ && (h0->current_hop_limit !=
+ radv_info->curr_hop_limit))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvCurHopLimit on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP)
+ != radv_info->adv_managed_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvManagedFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP)
+ != radv_info->adv_other_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvOtherConfigFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ && radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)
+ && (h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ !=
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvRetransTimer on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->neighbor_reachable_time_in_msec &&
+ radv_info->adv_neighbor_reachable_time_in_msec) &&
+ (h0->neighbor_reachable_time_in_msec !=
+ clib_host_to_net_u32
+ (radv_info->adv_neighbor_reachable_time_in_msec)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvReachableTime on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ /* check for MTU or prefix options or .. */
+ u8 *opt_hdr = (u8 *) (h0 + 1);
+ while (options_len0 > 0)
+ {
+ icmp6_neighbor_discovery_option_header_t *o0 =
+ (icmp6_neighbor_discovery_option_header_t *)
+ opt_hdr;
+ int opt_len = o0->n_data_u64s << 3;
+ icmp6_neighbor_discovery_option_type_t option_type =
+ o0->type;
+
+ if (options_len0 < 2)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "malformed RA packet on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ if (opt_len == 0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ " zero length option in RA on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+ else if (opt_len > options_len0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "option length in RA packet greater than total length on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ options_len0 -= opt_len;
+ opt_hdr += opt_len;
+
+ switch (option_type)
+ {
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu:
+ {
+ icmp6_neighbor_discovery_mtu_option_t *h =
+ (icmp6_neighbor_discovery_mtu_option_t
+ *) (o0);
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ if ((h->mtu && radv_info->adv_link_mtu) &&
+ (h->mtu !=
+ clib_host_to_net_u32
+ (radv_info->adv_link_mtu)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvLinkMTU on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+ }
+ break;
+
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information:
+ {
+ icmp6_neighbor_discovery_prefix_information_option_t
+ * h =
+ (icmp6_neighbor_discovery_prefix_information_option_t
+ *) (o0);
+
+ /* validate advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+ u32 preferred, valid;
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ preferred =
+ clib_net_to_host_u32 (h->preferred_time);
+ valid = clib_net_to_host_u32 (h->valid_time);
+
+ /* look for matching prefix - if we our advertising it, it better be consistant */
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+
+ ip6_address_t mask;
+ ip6_address_mask_from_width(&mask, pr_info->prefix_len);
+
+ if(pr_info->enabled &&
+ (pr_info->prefix_len == h->dst_address_length) &&
+ ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask))
+ {
+ /* found it */
+ if(!pr_info->decrement_lifetime_flag &&
+ valid != pr_info->adv_valid_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV validlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ if(!pr_info->decrement_lifetime_flag &&
+ preferred != pr_info->adv_pref_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV preferredlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ }
+ break;
+ }));
+ /* *INDENT-ON* */
+ break;
+ }
+ default:
+ /* skip this one */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX,
+ n_advertisements_rcvd);
+
+ return frame->n_vectors;
+}
+
+/**
+ * @brief Add a multicast Address to the advertised MLD set
+ */
+static void
+ip6_neighbor_add_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr)
+{
+ ip6_mldp_group_t *mcast_group_info;
+ uword *p;
+
+ /* lookup mldp info for this interface */
+ p = mhash_get (&radv_info->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ /* add address */
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (radv_info->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - radv_info->mldp_group_pool;
+ mhash_set (&radv_info->address_to_mldp_index, &addr, mi, /* old_value */
+ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &addr,
+ sizeof (ip6_address_t));
+ }
+}
+
+/**
+ * @brief Delete a multicast Address from the advertised MLD set
+ */
+static void
+ip6_neighbor_del_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr)
+{
+ ip6_mldp_group_t *mcast_group_info;
+ uword *p;
+
+ p = mhash_get (&radv_info->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ if (mcast_group_info)
+ {
+ mhash_unset (&radv_info->address_to_mldp_index, &addr,
+ /* old_value */ 0);
+ pool_put (radv_info->mldp_group_pool, mcast_group_info);
+ }
+}
+
+/**
+ * @brief Add a multicast Address to the advertised MLD set
+ */
+static void
+ip6_neighbor_add_mld_grp (ip6_radv_t * a,
+ ip6_multicast_address_scope_t scope,
+ ip6_multicast_link_local_group_id_t group)
+{
+ ip6_address_t addr;
+
+ ip6_set_reserved_multicast_address (&addr, scope, group);
+
+ ip6_neighbor_add_mld_prefix (a, &addr);
+}
+
+/**
+ * @brief create and initialize router advertisement parameters with default
+ * values for this intfc
+ */
+u32
+ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index, u32 is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *a = 0;
+ u32 ri = ~0;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0 = 0;
+
+ /* lookup radv container - ethernet interfaces only */
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ return ri;
+
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ a = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if (!is_add)
+ {
+ ip6_radv_prefix_t *p;
+ ip6_mldp_group_t *m;
+
+ /* release the lock on the interface's mcast adj */
+ adj_unlock (a->mcast_adj_index);
+
+ /* clean up prefix and MDP pools */
+ /* *INDENT-OFF* */
+ pool_flush(p, a->adv_prefixes_pool,
+ ({
+ mhash_unset (&a->address_to_prefix_index, &p->prefix, 0);
+ }));
+ pool_flush (m, a->mldp_group_pool,
+ ({
+ mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (a->mldp_group_pool);
+ pool_free (a->adv_prefixes_pool);
+
+ mhash_free (&a->address_to_prefix_index);
+ mhash_free (&a->address_to_mldp_index);
+
+ pool_put (nm->if_radv_pool, a);
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0;
+ ri = ~0;
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vnet_hw_interface_t *hw_if0;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ pool_get (nm->if_radv_pool, a);
+
+ ri = a - nm->if_radv_pool;
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri;
+
+ /* initialize default values (most of which are zero) */
+ memset (a, 0, sizeof (a[0]));
+
+ a->sw_if_index = sw_if_index;
+ a->max_radv_interval = DEF_MAX_RADV_INTERVAL;
+ a->min_radv_interval = DEF_MIN_RADV_INTERVAL;
+ a->curr_hop_limit = DEF_CURR_HOP_LIMIT;
+ a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME;
+
+ /* send ll address source address option */
+ a->adv_link_layer_address = 1;
+
+ a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS;
+ a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS;
+ a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME;
+ a->seed = (u32) clib_cpu_time_now ();
+ (void) random_u32 (&a->seed);
+ a->randomizer = clib_cpu_time_now ();
+ (void) random_u64 (&a->randomizer);
+
+ a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS;
+ a->initial_adverts_sent = a->initial_adverts_count - 1;
+ a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
+
+ /* deafult is to send */
+ a->send_radv = 1;
+
+ /* fill in radv_info for this interface that will be needed later */
+ a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+
+ clib_memcpy (a->link_layer_address, eth_if0->address, 6);
+
+ /* fill in default link-local address (this may be overridden) */
+ ip6_link_local_address_from_ethernet_address
+ (&a->link_local_address, eth_if0->address);
+
+ mhash_init (&a->address_to_prefix_index, sizeof (uword),
+ sizeof (ip6_address_t));
+ mhash_init (&a->address_to_mldp_index, sizeof (uword),
+ sizeof (ip6_address_t));
+
+ a->mcast_adj_index = adj_mcast_add_or_lock (FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ sw_if_index);
+
+ /* add multicast groups we will always be reporting */
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+ }
+ }
+ return ri;
+}
+
+/* send an mldpv2 report */
+static void
+ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 ri;
+ int bogus_length;
+
+ ip6_radv_t *radv_info;
+ u16 payload_length;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ u32 *to_next;
+ vlib_frame_t *f;
+ u32 bo0;
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ icmp6_multicast_listener_report_header_t *rh0;
+ icmp6_multicast_listener_report_packet_t *rp0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index))
+ return;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri == ~0)
+ return;
+
+ /* send report now - build a mldpv2 report packet */
+ n_allocated = vlib_buffer_alloc_from_free_list (vm,
+ &bo0,
+ n_to_alloc,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ if (PREDICT_FALSE (n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+
+ b0 = vlib_get_buffer (vm, bo0);
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ b0->current_length = sizeof (icmp6_multicast_listener_report_packet_t);
+
+ payload_length = sizeof (icmp6_multicast_listener_report_header_t);
+
+ b0->error = ICMP6_ERROR_NONE;
+
+ rp0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *) & rp0->ip;
+ rh0 = (icmp6_multicast_listener_report_header_t *) & rp0->report_hdr;
+
+ memset (rp0, 0x0, sizeof (icmp6_multicast_listener_report_packet_t));
+
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* for DEBUG - vnet driver won't seem to emit router alerts */
+ /* ip0->protocol = IP_PROTOCOL_ICMP6; */
+ ip0->hop_limit = 1;
+
+ rh0->icmp.type = ICMP6_multicast_listener_report_v2;
+
+ /* source address MUST be the link-local address */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ ip0->src_address = radv_info->link_local_address;
+
+ /* destination is all mldpv2 routers */
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ /* add reports here */
+ ip6_mldp_group_t *m;
+ int num_addr_records = 0;
+ icmp6_multicast_address_record_t rr;
+
+ /* fill in the hop-by-hop extension header (router alert) info */
+ rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6;
+ rh0->ext_hdr.n_data_u64s = 0;
+
+ rh0->alert.type = IP6_MLDP_ALERT_TYPE;
+ rh0->alert.len = 2;
+ rh0->alert.value = 0;
+
+ rh0->pad.type = 1;
+ rh0->pad.len = 0;
+
+ rh0->icmp.checksum = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ rr.type = m->type;
+ rr.aux_data_len_u32s = 0;
+ rr.num_sources = clib_host_to_net_u16 (m->num_sources);
+ clib_memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t));
+
+ num_addr_records++;
+
+ vlib_buffer_add_data
+ (vm, vlib_buffer_get_free_list_index (b0), bo0,
+ (void *)&rr, sizeof(icmp6_multicast_address_record_t));
+
+ payload_length += sizeof( icmp6_multicast_address_record_t);
+ }));
+ /* *INDENT-ON* */
+
+ rh0->rsvd = 0;
+ rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
+
+ /* update lengths */
+ ip0->payload_length = clib_host_to_net_u16 (payload_length);
+
+ rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /*
+ * OK to override w/ no regard for actual FIB, because
+ * ip6-rewrite only looks at the adjacency.
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast");
+
+ f = vlib_get_frame_to_node (vm, node->index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bo0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, node->index, f);
+ return;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
+{
+ .function = icmp6_router_solicitation,
+ .name = "icmp6-router-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-mcast",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* send a RA or update the timer info etc.. */
+static uword
+ip6_neighbor_process_timer_event (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *radv_info;
+ vlib_frame_t *f = 0;
+ u32 n_this_frame = 0;
+ u32 n_left_to_next = 0;
+ u32 *to_next = 0;
+ u32 bo0;
+ icmp6_router_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+ f64 now = vlib_time_now (vm);
+
+ /* Interface ip6 radv info list */
+ /* *INDENT-OFF* */
+ pool_foreach (radv_info, nm->if_radv_pool,
+ ({
+ if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
+ {
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1;
+ radv_info->next_multicast_time = now;
+ radv_info->last_multicast_time = now;
+ radv_info->last_radv_time = 0;
+ radv_info->all_routers_mcast = 0;
+ continue;
+ }
+
+ /* Make sure that we've joined the all-routers multicast group */
+ if(!radv_info->all_routers_mcast)
+ {
+ /* send MDLP_REPORT_EVENT message */
+ ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index);
+ radv_info->all_routers_mcast = 1;
+ }
+
+ /* is it time to send a multicast RA on this interface? */
+ if(radv_info->send_radv && (now >= radv_info->next_multicast_time))
+ {
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) *
+ random_f64 (&radv_info->seed) + radv_info->min_radv_interval;
+
+ /* multicast send - compute next multicast send time */
+ if( radv_info->initial_adverts_sent > 0)
+ {
+ radv_info->initial_adverts_sent--;
+ if(rfn > radv_info-> initial_adverts_interval)
+ rfn = radv_info-> initial_adverts_interval;
+
+ /* check to see if we are ceasing to send */
+ if( radv_info->initial_adverts_sent == 0)
+ if(radv_info->cease_radv)
+ radv_info->send_radv = 0;
+ }
+
+ radv_info->next_multicast_time = rfn + now;
+ radv_info->last_multicast_time = now;
+
+ /* send advert now - build a "solicted" router advert with unspecified source address */
+ n_allocated = vlib_buffer_alloc_from_free_list
+ (vm, &bo0, n_to_alloc, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (PREDICT_FALSE(n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ continue;
+ }
+ b0 = vlib_get_buffer (vm, bo0);
+ b0->current_length = sizeof( icmp6_router_solicitation_header_t);
+ b0->error = ICMP6_ERROR_NONE;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index;
+
+ h0 = vlib_buffer_get_current (b0);
+
+ memset (h0, 0, sizeof (icmp6_router_solicitation_header_t));
+
+ h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+ h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t)
+ - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor));
+ h0->ip.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip.hop_limit = 255;
+
+ /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */
+ h0->ip.src_address.as_u64[0] = 0;
+ h0->ip.src_address.as_u64[1] = 0;
+
+ h0->ip.dst_address.as_u64[0] = 0;
+ h0->ip.dst_address.as_u64[1] = 0;
+
+ h0->neighbor.icmp.type = ICMP6_router_solicitation;
+
+ if (PREDICT_FALSE(f == 0))
+ {
+ f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index);
+ to_next = vlib_frame_vector_args (f);
+ n_left_to_next = VLIB_FRAME_SIZE;
+ n_this_frame = 0;
+ }
+
+ n_this_frame++;
+ n_left_to_next--;
+ to_next[0] = bo0;
+ to_next += 1;
+
+ if (PREDICT_FALSE(n_left_to_next == 0))
+ {
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ f = 0;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (f)
+ {
+ ASSERT (n_this_frame);
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ }
+ return 0;
+}
+
+static uword
+ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ uword event_type;
+ ip6_icmp_neighbor_discovery_event_data_t *event_data;
+
+ /* init code here */
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */ );
+
+ event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (!event_data)
+ {
+ /* No events found: timer expired. */
+ /* process interface list and send RAs as appropriate, update timer info */
+ ip6_neighbor_process_timer_event (vm, node, frame);
+ }
+ else
+ {
+ switch (event_type)
+ {
+
+ case ICMP6_ND_EVENT_INIT:
+ break;
+
+ case ~0:
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+ }
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
+{
+ .function = icmp6_router_advertisement,
+ .name = "icmp6-router-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = {
+
+ .function = ip6_icmp_neighbor_discovery_event_process,
+ .name = "ip6-icmp-neighbor-discovery-event-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+static uword
+icmp6_neighbor_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 1);
+}
+
+static uword
+icmp6_neighbor_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
+{
+ .function = icmp6_neighbor_solicitation,
+ .name = "icmp6-neighbor-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
+{
+ .function = icmp6_neighbor_advertisement,
+ .name = "icmp6-neighbor-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* API support functions */
+int
+ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if ((max_interval != 0) && (min_interval == 0))
+ min_interval = .75 * max_interval;
+
+ max_interval =
+ (max_interval !=
+ 0) ? ((is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) :
+ radv_info->max_radv_interval;
+ min_interval =
+ (min_interval !=
+ 0) ? ((is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) :
+ radv_info->min_radv_interval;
+ lifetime =
+ (use_lifetime !=
+ 0) ? ((is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) :
+ radv_info->adv_router_lifetime_in_sec;
+
+ if (lifetime)
+ {
+ if (lifetime > MAX_DEF_RTR_LIFETIME)
+ lifetime = MAX_DEF_RTR_LIFETIME;
+
+ if (lifetime <= max_interval)
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if (min_interval != 0)
+ {
+ if ((min_interval > .75 * max_interval) || (min_interval < 3))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if ((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) ||
+ (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /*
+ if "flag" is set and is_no is true then restore default value else set value corresponding to "flag"
+ if "flag" is clear don't change corresponding value
+ */
+ radv_info->send_radv =
+ (suppress != 0) ? ((is_no != 0) ? 1 : 0) : radv_info->send_radv;
+ radv_info->adv_managed_flag =
+ (managed != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_managed_flag;
+ radv_info->adv_other_flag =
+ (other != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_other_flag;
+ radv_info->adv_link_layer_address =
+ (ll_option !=
+ 0) ? ((is_no) ? 1 : 0) : radv_info->adv_link_layer_address;
+ radv_info->send_unicast =
+ (send_unicast != 0) ? ((is_no) ? 0 : 1) : radv_info->send_unicast;
+ radv_info->cease_radv =
+ (cease != 0) ? ((is_no) ? 0 : 1) : radv_info->cease_radv;
+
+ radv_info->min_radv_interval = min_interval;
+ radv_info->max_radv_interval = max_interval;
+ radv_info->adv_router_lifetime_in_sec = lifetime;
+
+ radv_info->initial_adverts_count =
+ (initial_count !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) :
+ radv_info->initial_adverts_count;
+ radv_info->initial_adverts_interval =
+ (initial_interval !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) :
+ radv_info->initial_adverts_interval;
+
+ /* restart */
+ if ((cease != 0) && (is_no))
+ radv_info->send_radv = 1;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+int
+ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+ f64 now = vlib_time_now (vm);
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* prefix info add, delete or update */
+ ip6_radv_prefix_t *prefix;
+
+ /* lookup prefix info for this address on this interface */
+ uword *p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr);
+
+ prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0;
+
+ if (is_no)
+ {
+ /* delete */
+ if (!prefix)
+ return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* FIXME - Should the DP do this or the CP ? */
+ /* do specific delete processing here before returning */
+ /* try to remove from routing table */
+
+ mhash_unset (&radv_info->address_to_prefix_index, prefix_addr,
+ /* old_value */ 0);
+ pool_put (radv_info->adv_prefixes_pool, prefix);
+
+ radv_info->initial_adverts_sent =
+ radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ return (error);
+ }
+
+ /* adding or changing */
+ if (!prefix)
+ {
+ /* add */
+ u32 pi;
+ pool_get (radv_info->adv_prefixes_pool, prefix);
+ pi = prefix - radv_info->adv_prefixes_pool;
+ mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi,
+ /* old_value */ 0);
+
+ memset (prefix, 0x0, sizeof (ip6_radv_prefix_t));
+
+ prefix->prefix_len = prefix_len;
+ clib_memcpy (&prefix->prefix, prefix_addr, sizeof (ip6_address_t));
+
+ /* initialize default values */
+ prefix->adv_on_link_flag = 1; /* L bit set */
+ prefix->adv_autonomous_flag = 1; /* A bit set */
+ prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME;
+ prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME;
+ prefix->enabled = 1;
+ prefix->decrement_lifetime_flag = 1;
+ prefix->deprecated_prefix_flag = 1;
+
+ if (off_link == 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* insert prefix into routing table as a connected prefix */
+ }
+
+ if (use_default)
+ goto restart;
+ }
+ else
+ {
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ if (off_link != 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* remove from routing table if already there */
+ }
+ }
+
+ if ((val_lifetime == ~0) || (pref_lifetime == ~0))
+ {
+ prefix->adv_valid_lifetime_in_secs = ~0;
+ prefix->adv_pref_lifetime_in_secs = ~0;
+ prefix->decrement_lifetime_flag = 0;
+ }
+ else
+ {
+ prefix->adv_valid_lifetime_in_secs = val_lifetime;;
+ prefix->adv_pref_lifetime_in_secs = pref_lifetime;
+ }
+
+ /* copy remaining */
+ prefix->enabled = !(no_advertise != 0);
+ prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0));
+ prefix->adv_autonomous_flag = !(no_autoconfig != 0);
+
+ restart:
+ /* restart */
+ /* fill in the expiration times */
+ prefix->valid_lifetime_expires =
+ now + prefix->adv_valid_lifetime_in_secs;
+ prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+clib_error_t *
+ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u8 is_no = 0;
+ u8 suppress = 0, managed = 0, other = 0;
+ u8 suppress_ll_option = 0, send_unicast = 0, cease = 0;
+ u8 use_lifetime = 0;
+ u32 sw_if_index, ra_lifetime = 0, ra_initial_count =
+ 0, ra_initial_interval = 0;
+ u32 ra_max_interval = 0, ra_min_interval = 0;
+
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_sw_interface_t *sw_if0;
+
+ int add_radv_info = 1;
+ __attribute__ ((unused)) ip6_radv_t *radv_info = 0;
+ ip6_address_t ip6_addr;
+ u32 addr_len;
+
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ /* get basic radv info for this interface */
+ if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat_user (line_input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+ ethernet_interface_t *eth_if0 = 0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ {
+ error =
+ clib_error_return (0, "Interface must be of ethernet type");
+ goto done;
+ }
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid interface name %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no"))
+ is_no = 1;
+ else if (unformat (line_input, "prefix %U/%d",
+ unformat_ip6_address, &ip6_addr, &addr_len))
+ {
+ add_radv_info = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-managed-config-flag"))
+ {
+ managed = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-other-config-flag"))
+ {
+ other = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress") ||
+ unformat (line_input, "ra-surpress"))
+ {
+ suppress = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress-link-layer") ||
+ unformat (line_input, "ra-surpress-link-layer"))
+ {
+ suppress_ll_option = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-send-unicast"))
+ {
+ send_unicast = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-lifetime"))
+ {
+ if (!unformat (line_input, "%d", &ra_lifetime))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ use_lifetime = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-initial"))
+ {
+ if (!unformat
+ (line_input, "%d %d", &ra_initial_count, &ra_initial_interval))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ break;
+ }
+ else if (unformat (line_input, "ra-interval"))
+ {
+ if (!unformat (line_input, "%d", &ra_max_interval))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+
+ if (!unformat (line_input, "%d", &ra_min_interval))
+ ra_min_interval = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-cease"))
+ {
+ cease = 1;
+ break;
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (add_radv_info)
+ {
+ ip6_neighbor_ra_config (vm, sw_if_index,
+ suppress, managed, other,
+ suppress_ll_option, send_unicast, cease,
+ use_lifetime, ra_lifetime,
+ ra_initial_count, ra_initial_interval,
+ ra_max_interval, ra_min_interval, is_no);
+ }
+ else
+ {
+ u32 valid_lifetime_in_secs = 0;
+ u32 pref_lifetime_in_secs = 0;
+ u8 use_prefix_default_values = 0;
+ u8 no_advertise = 0;
+ u8 off_link = 0;
+ u8 no_autoconfig = 0;
+ u8 no_onlink = 0;
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "default"))
+ {
+ use_prefix_default_values = 1;
+ break;
+ }
+ else if (unformat (line_input, "infinite"))
+ {
+ valid_lifetime_in_secs = ~0;
+ pref_lifetime_in_secs = ~0;
+ break;
+ }
+ else if (unformat (line_input, "%d %d", &valid_lifetime_in_secs,
+ &pref_lifetime_in_secs))
+ break;
+ else
+ break;
+ }
+
+
+ /* get the rest of the command */
+ while (!use_prefix_default_values &&
+ unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no-advertise"))
+ no_advertise = 1;
+ else if (unformat (line_input, "off-link"))
+ off_link = 1;
+ else if (unformat (line_input, "no-autoconfig"))
+ no_autoconfig = 1;
+ else if (unformat (line_input, "no-onlink"))
+ no_onlink = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ ip6_neighbor_ra_prefix (vm, sw_if_index,
+ &ip6_addr, addr_len,
+ use_prefix_default_values,
+ valid_lifetime_in_secs,
+ pref_lifetime_in_secs,
+ no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static void
+ip6_print_addrs (vlib_main_t * vm, u32 * addrs)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 i;
+
+ for (i = 0; i < vec_len (addrs); i++)
+ {
+ ip_interface_address_t *a =
+ pool_elt_at_index (lm->if_address_pool, addrs[i]);
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+
+ vlib_cli_output (vm, "\t\t%U/%d",
+ format_ip6_address, address, a->address_length);
+ }
+}
+
+static clib_error_t *
+show_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ vlib_cli_output (vm, "%U is admin %s\n",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index),
+ (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ?
+ "up" : "down"));
+
+ u32 ai;
+ u32 *link_scope = 0, *global_scope = 0;
+ u32 *local_scope = 0, *unknown_scope = 0;
+ ip_interface_address_t *a;
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+
+ while (ai != (u32) ~ 0)
+ {
+ a = pool_elt_at_index (lm->if_address_pool, ai);
+ ip6_address_t *address =
+ ip_interface_address_get_address (lm, a);
+
+ if (ip6_address_is_link_local_unicast (address))
+ vec_add1 (link_scope, ai);
+ else if (ip6_address_is_global_unicast (address))
+ vec_add1 (global_scope, ai);
+ else if (ip6_address_is_local_unicast (address))
+ vec_add1 (local_scope, ai);
+ else
+ vec_add1 (unknown_scope, ai);
+
+ ai = a->next_this_sw_interface;
+ }
+
+ if (vec_len (link_scope))
+ {
+ vlib_cli_output (vm, "\tLink-local address(es):\n");
+ ip6_print_addrs (vm, link_scope);
+ vec_free (link_scope);
+ }
+
+ if (vec_len (local_scope))
+ {
+ vlib_cli_output (vm, "\tLocal unicast address(es):\n");
+ ip6_print_addrs (vm, local_scope);
+ vec_free (local_scope);
+ }
+
+ if (vec_len (global_scope))
+ {
+ vlib_cli_output (vm, "\tGlobal unicast address(es):\n");
+ ip6_print_addrs (vm, global_scope);
+ vec_free (global_scope);
+ }
+
+ if (vec_len (unknown_scope))
+ {
+ vlib_cli_output (vm, "\tOther-scope address(es):\n");
+ ip6_print_addrs (vm, unknown_scope);
+ vec_free (unknown_scope);
+ }
+
+ vlib_cli_output (vm, "\tJoined group address(es):\n");
+ ip6_mldp_group_t *m;
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ vlib_cli_output (vm, "\t\t%U\n", format_ip6_address,
+ &m->mcast_address);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tAdvertised Prefixes:\n");
+ ip6_radv_prefix_t *p;
+ /* *INDENT-OFF* */
+ pool_foreach (p, radv_info->adv_prefixes_pool,
+ ({
+ vlib_cli_output (vm, "\t\tprefix %U, length %d\n",
+ format_ip6_address, &p->prefix, p->prefix_len);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu);
+ vlib_cli_output (vm, "\tICMP error messages are unlimited\n");
+ vlib_cli_output (vm, "\tICMP redirects are disabled\n");
+ vlib_cli_output (vm, "\tICMP unreachables are not sent\n");
+ vlib_cli_output (vm, "\tND DAD is disabled\n");
+ //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",);
+ vlib_cli_output (vm, "\tND advertised reachable time is %d\n",
+ radv_info->adv_neighbor_reachable_time_in_msec);
+ vlib_cli_output (vm,
+ "\tND advertised retransmit interval is %d (msec)\n",
+ radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+
+ u32 ra_interval = radv_info->max_radv_interval;
+ u32 ra_interval_min = radv_info->min_radv_interval;
+ vlib_cli_output (vm,
+ "\tND router advertisements are sent every %d seconds (min interval is %d)\n",
+ ra_interval, ra_interval_min);
+ vlib_cli_output (vm,
+ "\tND router advertisements live for %d seconds\n",
+ radv_info->adv_router_lifetime_in_sec);
+ vlib_cli_output (vm,
+ "\tHosts %s stateless autoconfig for addresses\n",
+ (radv_info->adv_managed_flag) ? "use" :
+ " don't use");
+ vlib_cli_output (vm, "\tND router advertisements sent %d\n",
+ radv_info->n_advertisements_sent);
+ vlib_cli_output (vm, "\tND router solicitations received %d\n",
+ radv_info->n_solicitations_rcvd);
+ vlib_cli_output (vm, "\tND router solicitations dropped %d\n",
+ radv_info->n_solicitations_dropped);
+ }
+ else
+ {
+ error = clib_error_return (0, "IPv6 not enabled on interface",
+ format_unformat_error, input);
+
+ }
+ }
+ return error;
+}
+
+/*?
+ * This command is used to display various IPv6 attributes on a given
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display IPv6 settings:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * GigabitEthernet2/0/0 is admin up
+ * Link-local address(es):
+ * fe80::ab8/64
+ * Joined group address(es):
+ * ff02::1
+ * ff02::2
+ * ff02::16
+ * ff02::1:ff00:ab8
+ * Advertised Prefixes:
+ * prefix fe80::fe:28ff:fe9c:75b3, length 64
+ * MTU is 1500
+ * ICMP error messages are unlimited
+ * ICMP redirects are disabled
+ * ICMP unreachables are not sent
+ * ND DAD is disabled
+ * ND advertised reachable time is 0
+ * ND advertised retransmit interval is 0 (msec)
+ * ND router advertisements are sent every 200 seconds (min interval is 150)
+ * ND router advertisements live for 600 seconds
+ * Hosts use stateless autoconfig for addresses
+ * ND router advertisements sent 19336
+ * ND router solicitations received 0
+ * ND router solicitations dropped 0
+ * @cliexend
+ * Example of output if IPv6 is not enabled on the interface:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * show ip6 interface: IPv6 not enabled on interface
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_interface_command, static) =
+{
+ .path = "show ip6 interface",
+ .function = show_ip6_interface_cmd,
+ .short_help = "show ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created - do nothing */
+ if (ri != ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_radv_t *radv_info;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* check radv_info ref count for other ip6 addresses on this interface */
+ /* This implicitly excludes the link local address */
+ if (radv_info->ref_count == 0)
+ {
+ /* essentially "disables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->
+ link_local_address, 128,
+ 1 /* is_del */ );
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ 0 /* is_add */ );
+ ip6_mfib_interface_enable_disable (sw_if_index, 0);
+ }
+ }
+ return error;
+}
+
+int
+ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri = ~0;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ return ri != ~0;
+}
+
+clib_error_t *
+enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ int is_add = 1;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created yet */
+ if (ri == ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ ethernet_interface_t *eth_if0;
+
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0)
+ {
+ /* create radv_info. for this interface. This holds all the info needed for router adverts */
+ ri =
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add);
+
+ if (ri != ~0)
+ {
+ ip6_radv_t *radv_info;
+ ip6_address_t link_local_address;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_link_local_address_from_ethernet_mac_address
+ (&link_local_address, eth_if0->address);
+
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB ||
+ sw_if0->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ /* make up an interface id */
+ md5_context_t m;
+ u8 digest[16];
+
+ link_local_address.as_u64[0] = radv_info->randomizer;
+
+ md5_init (&m);
+ md5_add (&m, &link_local_address, 16);
+ md5_finish (&m, digest);
+
+ clib_memcpy (&link_local_address, digest, 16);
+
+ radv_info->randomizer = link_local_address.as_u64[0];
+
+ link_local_address.as_u64[0] =
+ clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* clear u bit */
+ link_local_address.as_u8[8] &= 0xfd;
+ }
+
+ ip6_mfib_interface_enable_disable (sw_if_index, 1);
+
+ /* essentially "enables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &link_local_address,
+ 128
+ /* address width */ ,
+ 0 /* is_del */ );
+
+ if (error)
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ !is_add);
+ else
+ {
+ radv_info->link_local_address = link_local_address;
+ }
+ }
+ }
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+enable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ enable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to enable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how enable IPv6 on a given interface:
+ * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
+{
+ .path = "enable ip6 interface",
+ .function = enable_ip6_interface_cmd,
+ .short_help = "enable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+disable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = disable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to disable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how disable IPv6 on a given interface:
+ * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
+{
+ .path = "disable ip6 interface",
+ .function = disable_ip6_interface_cmd,
+ .short_help = "disable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to configure the neighbor discovery
+ * parameters on a given interface. Use the '<em>show ip6 interface</em>'
+ * command to display some of the current neighbor discovery parameters
+ * on a given interface. This command has three formats:
+ *
+ *
+ * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in a single command)
+ *
+ * '<em><b>ip6 nd <interface> [no] [ra-managed-config-flag] | [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] | [ra-send-unicast] | [ra-lifetime <lifetime>] | [ra-initial <cnt> <interval>] | [ra-interval <max-interval> [<min-interval>]] | [ra-cease]</b></em>'
+ *
+ * Where:
+ *
+ * <em>[no] ra-managed-config-flag</em> - Advertises in ICMPv6
+ * router-advertisement messages to use stateful address
+ * auto-configuration to obtain address information (sets the M-bit).
+ * Default is the M-bit is not set and the '<em>no</em>' option
+ * returns it to this default state.
+ *
+ * <em>[no] ra-other-config-flag</em> - Indicates in ICMPv6
+ * router-advertisement messages that hosts use stateful auto
+ * configuration to obtain nonaddress related information (sets
+ * the O-bit). Default is the O-bit is not set and the '<em>no</em>'
+ * option returns it to this default state.
+ *
+ * <em>[no] ra-suppress</em> - Disables sending ICMPv6 router-advertisement
+ * messages. The '<em>no</em>' option implies to enable sending ICMPv6
+ * router-advertisement messages.
+ *
+ * <em>[no] ra-suppress-link-layer</em> - Indicates not to include the
+ * optional source link-layer address in the ICMPv6 router-advertisement
+ * messages. Default is to include the optional source link-layer address
+ * and the '<em>no</em>' option returns it to this default state.
+ *
+ * <em>[no] ra-send-unicast</em> - Use the source address of the
+ * router-solicitation message if availiable. The default is to use
+ * multicast address of all nodes, and the '<em>no</em>' option returns
+ * it to this default state.
+ *
+ * <em>[no] ra-lifetime <lifetime></em> - Advertises the lifetime of a
+ * default router in ICMPv6 router-advertisement messages. The range is
+ * from 0 to 9000 seconds. '<em><lifetime></em>' must be greater than
+ * '<em><max-interval></em>'. The default value is 600 seconds and the
+ * '<em>no</em>' option returns it to this default value.
+ *
+ * <em>[no] ra-initial <cnt> <interval></em> - Number of initial ICMPv6
+ * router-advertisement messages sent and the interval between each
+ * message. Range for count is 1 - 3 and default is 3. Range for interval
+ * is 1 to 16 seconds, and default is 16 seconds. The '<em>no</em>' option
+ * returns both to their default value.
+ *
+ * <em>[no] ra-interval <max-interval> [<min-interval>]</em> - Configures the
+ * interval between sending ICMPv6 router-advertisement messages. The
+ * range for max-interval is from 4 to 200 seconds. min-interval can not
+ * be more than 75% of max-interval. If not set, min-interval will be
+ * set to 75% of max-interval. The range for min-interval is from 3 to
+ * 150 seconds. The '<em>no</em>' option returns both to their default
+ * value.
+ *
+ * <em>[no] ra-cease</em> - Cease sending ICMPv6 router-advertisement messages.
+ * The '<em>no</em>' options implies to start (or restart) sending
+ * ICMPv6 router-advertisement messages.
+ *
+ *
+ * <b>Format 2 - Prefix Options:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link] [no-autoconfig] [no-onlink]</b></em>'
+ *
+ * Where:
+ *
+ * <em>no</em> - All additional flags are ignored and the prefix is deleted.
+ *
+ * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is the
+ * length of time in seconds during what the prefix is valid for the purpose of
+ * on-link determination. Range is 7203 to 2592000 seconds and default is 2592000
+ * seconds (30 days). '<em><pref-lifetime></em>' is the prefered-lifetime and is the
+ * length of time in seconds during what addresses generated from the prefix remain
+ * preferred. Range is 0 to 604800 seconds and default is 604800 seconds (7 days).
+ *
+ * <em>infinite</em> - Both '<em><valid-lifetime></em>' and '<em><<pref-lifetime></em>'
+ * are inifinte, no timeout.
+ *
+ * <em>no-advertise</em> - Do not send full router address in prefix
+ * advertisement. Default is to advertise (i.e. - This flag is off by default).
+ *
+ * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is on-link
+ * (i.e. - This flag is off and L-bit in packet is set by default and this prefix can
+ * be used for on-link determination). '<em>no-onlink</em>' also controls the L-bit.
+ *
+ * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear A-bit in packet.
+ * Default is autoconfig (i.e. - This flag is off and A-bit in packet is set by default.
+ *
+ * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit in packet.
+ * Default is on-link (i.e. - This flag is off and L-bit in packet is set by default and
+ * this prefix can be used for on-link determination). '<em>off-link</em>' also controls
+ * the L-bit.
+ *
+ *
+ * <b>Format 3: - Default of Prefix:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> default</b></em>'
+ *
+ * When a new prefix is added (or existing one is being overwritten) <em>default</em>
+ * uses default values for the prefix. If <em>no</em> is used, the <em>default</em>
+ * is ignored and the prefix is deleted.
+ *
+ *
+ * @cliexpar
+ * Example of how set a router advertisement option:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 ra-interval 100 20}
+ * Example of how to add a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64 infinite no-advertise}
+ * Example of how to delete a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_nd_command, static) =
+{
+ .path = "ip6 nd",
+ .short_help = "ip6 nd <interface> ...",
+ .function = ip6_neighbor_cmd,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index, ip6_address_t * address)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ ip6_radv_t *radv_info;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (!ip6_address_is_link_local_unicast (address))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL;
+ return (error = clib_error_return (0, "address not link-local",
+ format_unformat_error));
+ }
+
+ /* call enable ipv6 */
+ enable_ip6_interface (vm, sw_if_index);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* save if link local address (overwrite default) */
+
+ /* delete the old one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->link_local_address,
+ 128, 1 /* is_del */ );
+
+ if (!error)
+ {
+ /* add the new one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ address, 128,
+ 0 /* is_del */ );
+
+ if (!error)
+ {
+ radv_info->link_local_address = *address;
+ }
+ }
+ }
+ else
+ {
+ vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED;
+ error = clib_error_return (0, "ip6 not enabled for interface",
+ format_unformat_error);
+ }
+ return error;
+}
+
+clib_error_t *
+set_ip6_link_local_address_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ ip6_address_t ip6_addr;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &ip6_addr))
+ break;
+ else
+ return (unformat_parse_error (input));
+ }
+ }
+ error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr);
+ return error;
+}
+
+/*?
+ * This command is used to assign an IPv6 Link-local address to an
+ * interface. This command will enable IPv6 on an interface if it
+ * is not already enabled. Use the '<em>show ip6 interface</em>' command
+ * to display the assigned Link-local address.
+ *
+ * @cliexpar
+ * Example of how to assign an IPv6 Link-local address to an interface:
+ * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) =
+{
+ .path = "set ip6 link-local address",
+ .short_help = "set ip6 link-local address <interface> <ip6-address>",
+ .function = set_ip6_link_local_address_cmd,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief callback when an interface address is added or deleted
+ */
+static void
+ip6_neighbor_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_delete)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_radv_t *radv_info;
+ ip6_address_t a;
+
+ /* create solicited node multicast address for this interface adddress */
+ ip6_set_solicited_node_multicast_address (&a, 0);
+
+ a.as_u8[0xd] = address->as_u8[0xd];
+ a.as_u8[0xe] = address->as_u8[0xe];
+ a.as_u8[0xf] = address->as_u8[0xf];
+
+ if (!is_delete)
+ {
+ /* try to create radv_info - does nothing if ipv6 already enabled */
+ enable_ip6_interface (vm, sw_if_index);
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count++;
+
+ ip6_neighbor_add_mld_prefix (radv_info, &a);
+ }
+ }
+ else
+ {
+
+ /* delete */
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_neighbor_del_mld_prefix (radv_info, &a);
+
+ /* if interface up send MLDP "report" */
+ radv_info->all_routers_mcast = 0;
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count--;
+ }
+ /* Ensure that IPv6 is disabled, and LL removed after ref_count reaches 0 */
+ disable_ip6_interface (vm, sw_if_index);
+ }
+}
+
+clib_error_t *
+ip6_set_neighbor_limit (u32 neighbor_limit)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+
+ nm->limit_neighbor_cache_size = neighbor_limit;
+ return 0;
+}
+
+static void
+ip6_neighbor_table_bind (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n = NULL;
+ u32 i, *to_re_add = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_re_add, n - nm->neighbor_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_re_add); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]);
+ ip6_neighbor_adj_fib_remove (n, old_fib_index);
+ ip6_neighbor_adj_fib_add (n, new_fib_index);
+ }
+ vec_free (to_re_add);
+}
+
+static clib_error_t *
+ip6_neighbor_init (vlib_main_t * vm)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_main_t *im = &ip6_main;
+
+ mhash_init (&nm->neighbor_index_by_key,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_neighbor_key_t));
+
+ icmp6_register_type (vm, ICMP6_neighbor_solicitation,
+ ip6_icmp_neighbor_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_neighbor_advertisement,
+ ip6_icmp_neighbor_advertisement_node.index);
+ icmp6_register_type (vm, ICMP6_router_solicitation,
+ ip6_icmp_router_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_router_advertisement,
+ ip6_icmp_router_advertisement_node.index);
+
+ /* handler node for ip6 neighbor discovery events and timers */
+ vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node);
+
+ /* add call backs */
+ ip6_add_del_interface_address_callback_t cb;
+ memset (&cb, 0x0, sizeof (ip6_add_del_interface_address_callback_t));
+
+ /* when an interface address changes... */
+ cb.function = ip6_neighbor_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ ip6_table_bind_callback_t cbt;
+ cbt.function = ip6_neighbor_table_bind;
+ cbt.function_opaque = 0;
+ vec_add1 (im->table_bind_callbacks, cbt);
+
+ mhash_init (&nm->pending_resolutions_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ mhash_init (&nm->mac_changes_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ /* default, configurable */
+ nm->limit_neighbor_cache_size = 50000;
+
+ nm->wc_ip6_nd_publisher_node = (uword) ~ 0;
+
+#if 0
+ /* $$$$ Hack fix for today */
+ vec_validate_init_empty
+ (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */ );
+#endif
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_neighbor_init);
+
+
+void
+vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *pr;
+
+ pool_get (nm->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+
+ p = mhash_get (&nm->pending_resolutions_by_address, address);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ mhash_unset (&nm->pending_resolutions_by_address, address, 0);
+ }
+
+ mhash_set (&nm->pending_resolutions_by_address, address,
+ pr - nm->pending_resolutions, 0 /* old value */ );
+}
+
+int
+vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data, int is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+
+ /* Try to find an existing entry */
+ u32 *first = (u32 *) mhash_get (&nm->mac_changes_by_address, address);
+ u32 *p = first;
+ pending_resolution_t *mc;
+ while (p && *p != ~0)
+ {
+ mc = pool_elt_at_index (nm->mac_changes, *p);
+ if (mc->node_index == node_index && mc->type_opaque == type_opaque
+ && mc->pid == pid)
+ break;
+ p = &mc->next_index;
+ }
+
+ int found = p && *p != ~0;
+ if (is_add)
+ {
+ if (found)
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+
+ pool_get (nm->mac_changes, mc);
+ *mc = (pending_resolution_t)
+ {
+ .next_index = ~0,.node_index = node_index,.type_opaque =
+ type_opaque,.data = data,.data_callback = data_callback,.pid =
+ pid,};
+
+ /* Insert new resolution at the end of the list */
+ u32 new_idx = mc - nm->mac_changes;
+ if (p)
+ p[0] = new_idx;
+ else
+ mhash_set (&nm->mac_changes_by_address, address, new_idx, 0);
+ }
+ else
+ {
+ if (!found)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Clients may need to clean up pool entries, too */
+ void (*fp) (u32, u8 *) = data_callback;
+ if (fp)
+ (*fp) (mc->data, 0 /* no new mac addrs */ );
+
+ /* Remove the entry from the list and delete the entry */
+ *p = mc->next_index;
+ pool_put (nm->mac_changes, mc);
+
+ /* Remove from hash if we deleted the last entry */
+ if (*p == ~0 && p == first)
+ mhash_unset (&nm->mac_changes_by_address, address, 0);
+ }
+ return 0;
+}
+
+int
+vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip, u32 sw_if_index, u16 bd_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
+
+ ndh = ip6_next_header (ip);
+ if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
+ ndh->icmp.type != ICMP6_neighbor_advertisement)
+ return 0;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (p0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ u8 *t0 = vlib_add_trace (vm, node, p0,
+ sizeof (icmp6_input_trace_t));
+ clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
+ }
+
+ /* Check if anyone want ND events for L2 BDs */
+ if (PREDICT_FALSE
+ (nm->wc_ip6_nd_publisher_node != (uword) ~ 0
+ && !ip6_address_is_link_local_unicast (&ip->src_address)))
+ {
+ vnet_nd_wc_publish (sw_if_index, eth->src_address, &ip->src_address);
+ }
+
+ /* Check if MAC entry exsist for solicited target IP */
+ if (ndh->icmp.type == ICMP6_neighbor_solicitation)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
+ l2_bridge_domain_t *bd_config;
+ u8 *macp;
+
+ opt = (void *) (ndh + 1);
+ if ((opt->header.type !=
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
+ (opt->header.n_data_u64s != 1))
+ return 0; /* source link layer address option not present */
+
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ macp =
+ (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
+ if (macp)
+ { /* found ip-mac entry, generate eighbor advertisement response */
+ int bogus_length;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ ip->dst_address = ip->src_address;
+ ip->src_address = ndh->target_address;
+ ip->hop_limit = 255;
+ opt->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ clib_memcpy (opt->ethernet_address, macp, 6);
+ ndh->icmp.type = ICMP6_neighbor_advertisement;
+ ndh->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
+ ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ ndh->icmp.checksum = 0;
+ ndh->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
+ clib_memcpy (eth->dst_address, eth->src_address, 6);
+ clib_memcpy (eth->src_address, macp, 6);
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
+ return 1;
+ }
+ }
+
+ return 0;
+
+}
+
+int
+ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del)
+{
+ u32 fib_index;
+
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = *addr,
+ },
+ };
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ if (is_del)
+ {
+ fib_table_entry_path_remove (fib_index,
+ &pfx,
+ FIB_SOURCE_IP6_ND_PROXY,
+ DPO_PROTO_IP6,
+ &nh,
+ sw_if_index,
+ ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ /* flush the ND cache of this address if it's there */
+ vnet_unset_ip6_ethernet_neighbor (vlib_get_main (),
+ sw_if_index, addr, NULL, 0);
+ }
+ else
+ {
+ fib_table_entry_path_add (fib_index,
+ &pfx,
+ FIB_SOURCE_IP6_ND_PROXY,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh,
+ sw_if_index,
+ ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ return (0);
+}
+
+static clib_error_t *
+set_ip6_nd_proxy_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ ip6_address_t addr;
+ u32 sw_if_index;
+ u8 is_del = 0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &addr))
+ break;
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else
+ return (unformat_parse_error (input));
+ }
+ }
+
+ ip6_neighbor_proxy_add_del (sw_if_index, &addr, is_del);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) =
+{
+ .path = "set ip6 nd proxy",
+ .short_help = "set ip6 nd proxy <HOST> <INTERFACE>",
+ .function = set_ip6_nd_proxy_cmd,
+};
+/* *INDENT-ON* */
+
+void
+ethernet_ndp_change_mac (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ {
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_complete_walk, n);
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+ ip6_main_t *i6m = &ip6_main;
+ u32 sw_if_index = hi->sw_if_index;
+ ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index);
+ if (ip6_addr)
+ {
+ clib_warning
+ ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d",
+ format_ip6_address, ip6_addr, sw_if_index);
+
+ /* Form unsolicited neighbor advertisement packet from NS pkt template */
+ int bogus_length;
+ u32 bi = 0;
+ icmp6_neighbor_solicitation_header_t *h =
+ vlib_packet_template_get_packet (vm,
+ &i6m->discover_neighbor_packet_template,
+ &bi);
+ ip6_set_reserved_multicast_address (&h->ip.dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ h->ip.src_address = ip6_addr[0];
+ h->neighbor.icmp.type = ICMP6_neighbor_advertisement;
+ h->neighbor.target_address = ip6_addr[0];
+ h->neighbor.advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address, vec_len (hi->hw_address));
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* Setup MAC header with IP6 Etype and mcast DMAC */
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+ ethernet_header_t *e = vlib_buffer_get_current (b);
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
+ clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+ ip6_multicast_ethernet_address (e->dst_address,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ /* Send unsolicited ND advertisement packet out the specified interface */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h
new file mode 100644
index 00000000..ed80381b
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.h
@@ -0,0 +1,109 @@
+/*
+ *
+ * ip6_neighboor.h: ip6 neighbor structures
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip6_neighbor_h
+#define included_ip6_neighbor_h
+
+#include <vnet/fib/fib_types.h>
+
+typedef struct
+{
+ ip6_address_t ip6_address;
+ u32 sw_if_index;
+ u32 pad;
+} ip6_neighbor_key_t;
+
+typedef enum ip6_neighbor_flags_t_
+{
+ IP6_NEIGHBOR_FLAG_STATIC = (1 << 0),
+ IP6_NEIGHBOR_FLAG_DYNAMIC = (1 << 1),
+ IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2),
+} __attribute__ ((packed)) ip6_neighbor_flags_t;
+
+typedef struct
+{
+ ip6_neighbor_key_t key;
+ u8 link_layer_address[8];
+ ip6_neighbor_flags_t flags;
+ u64 cpu_time_last_updated;
+ fib_node_index_t fib_entry_index;
+} ip6_neighbor_t;
+
+extern ip6_neighbor_t *ip6_neighbors_entries (u32 sw_if_index);
+
+extern int ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval,
+ u8 is_no);
+
+extern int ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime,
+ u32 pref_lifetime, u8 no_advertise,
+ u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no);
+
+extern clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit);
+
+extern void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data);
+
+extern int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static,
+ int is_no_fib_entry);
+
+extern int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword
+ n_bytes_link_layer_address);
+
+extern int ip6_neighbor_proxy_add_del (u32 sw_if_index,
+ ip6_address_t * addr, u8 is_add);
+
+u32 ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index,
+ u32 is_add);
+typedef struct
+{
+ u32 sw_if_index;
+ ip6_address_t ip6;
+ u8 mac[6];
+} wc_nd_report_t;
+
+void wc_nd_set_publisher_node (uword node_index, uword event_type);
+
+#endif /* included_ip6_neighbor_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
new file mode 100644
index 00000000..c0c745e2
--- /dev/null
+++ b/src/vnet/ip/ip6_packet.h
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip6/packet.h: ip6 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip6_packet_h
+#define included_ip6_packet_h
+
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/ip/ip4_packet.h>
+
+typedef union
+{
+ u8 as_u8[16];
+ u16 as_u16[8];
+ u32 as_u32[4];
+ u64 as_u64[2];
+ uword as_uword[16 / sizeof (uword)];
+}
+ip6_address_t;
+
+/* Packed so that the mhash key doesn't include uninitialized pad bytes */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip6_address_t ip6_addr;
+ u32 fib_index;
+}) ip6_address_fib_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (union {
+ struct {
+ u32 pad[3];
+ ip4_address_t ip4;
+ };
+ ip6_address_t ip6;
+ u8 as_u8[16];
+ u64 as_u64[2];
+}) ip46_address_t;
+/* *INDENT-ON* */
+#define ip46_address_is_ip4(ip46) (((ip46)->pad[0] | (ip46)->pad[1] | (ip46)->pad[2]) == 0)
+#define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0)
+#define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0])
+#define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0)
+#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1)))
+#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0))
+#define ip46_address_is_equal(a1, a2) (((a1)->as_u64[0] == (a2)->as_u64[0]) \
+ && ((a1)->as_u64[1] == (a2)->as_u64[1]))
+
+always_inline ip46_address_t
+to_ip46 (u32 is_ipv6, u8 * buf)
+{
+ ip46_address_t ip;
+ if (is_ipv6)
+ ip.ip6 = *((ip6_address_t *) buf);
+ else
+ ip46_address_set_ip4 (&ip, (ip4_address_t *) buf);
+ return ip;
+}
+
+
+always_inline void
+ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
+ u32 fib_index)
+{
+ addr_fib->ip6_addr = *address;
+ addr_fib->fib_index = fib_index;
+}
+
+/* Special addresses:
+ unspecified ::/128
+ loopback ::1/128
+ global unicast 2000::/3
+ unique local unicast fc00::/7
+ link local unicast fe80::/10
+ multicast ff00::/8
+ ietf reserved everything else. */
+
+#define foreach_ip6_multicast_address_scope \
+ _ (loopback, 0x1) \
+ _ (link_local, 0x2) \
+ _ (admin_local, 0x4) \
+ _ (site_local, 0x5) \
+ _ (organization_local, 0x8) \
+ _ (global, 0xe)
+
+#define foreach_ip6_multicast_link_local_group_id \
+ _ (all_hosts, 0x1) \
+ _ (all_routers, 0x2) \
+ _ (rip_routers, 0x9) \
+ _ (eigrp_routers, 0xa) \
+ _ (pim_routers, 0xd) \
+ _ (mldv2_routers, 0x16)
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_SCOPE_##f = n,
+ foreach_ip6_multicast_address_scope
+#undef _
+} ip6_multicast_address_scope_t;
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n,
+ foreach_ip6_multicast_link_local_group_id
+#undef _
+} ip6_multicast_link_local_group_id_t;
+
+always_inline uword
+ip6_address_is_multicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xff;
+}
+
+always_inline uword
+ip46_address_is_multicast (ip46_address_t * a)
+{
+ return ip46_address_is_ip4 (a) ? ip4_address_is_multicast (&a->ip4) :
+ ip6_address_is_multicast (&a->ip6);
+}
+
+always_inline void
+ip6_set_reserved_multicast_address (ip6_address_t * a,
+ ip6_multicast_address_scope_t scope,
+ u16 id)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope);
+ a->as_u16[7] = clib_host_to_net_u16 (id);
+}
+
+always_inline void
+ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id)
+{
+ /* 0xff02::1:ffXX:XXXX. */
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff02);
+ a->as_u8[11] = 1;
+ ASSERT ((id >> 24) == 0);
+ id |= 0xff << 24;
+ a->as_u32[3] = clib_host_to_net_u32 (id);
+}
+
+always_inline void
+ip6_link_local_address_from_ethernet_address (ip6_address_t * a,
+ u8 * ethernet_address)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xfe80);
+ /* Always set locally administered bit (6). */
+ a->as_u8[0x8] = ethernet_address[0] | (1 << 6);
+ a->as_u8[0x9] = ethernet_address[1];
+ a->as_u8[0xa] = ethernet_address[2];
+ a->as_u8[0xb] = 0xff;
+ a->as_u8[0xc] = 0xfe;
+ a->as_u8[0xd] = ethernet_address[3];
+ a->as_u8[0xe] = ethernet_address[4];
+ a->as_u8[0xf] = ethernet_address[5];
+}
+
+always_inline void
+ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id)
+{
+ ethernet_address[0] = 0x33;
+ ethernet_address[1] = 0x33;
+ ethernet_address[2] = ((group_id >> 24) & 0xff);
+ ethernet_address[3] = ((group_id >> 16) & 0xff);
+ ethernet_address[4] = ((group_id >> 8) & 0xff);
+ ethernet_address[5] = ((group_id >> 0) & 0xff);
+}
+
+always_inline uword
+ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != b->as_uword[i])
+ return 0;
+ return 1;
+}
+
+always_inline uword
+ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b,
+ ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ {
+ uword a_masked, b_masked;
+ a_masked = a->as_uword[i] & mask->as_uword[i];
+ b_masked = b->as_uword[i] & mask->as_uword[i];
+
+ if (a_masked != b_masked)
+ return 0;
+ }
+ return 1;
+}
+
+always_inline void
+ip6_address_mask (ip6_address_t * a, ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] &= mask->as_uword[i];
+}
+
+always_inline void
+ip6_address_set_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] = 0;
+}
+
+always_inline void
+ip6_address_mask_from_width (ip6_address_t * a, u32 width)
+{
+ int i, byte, bit, bitnum;
+ ASSERT (width <= 128);
+ memset (a, 0, sizeof (a[0]));
+ for (i = 0; i < width; i++)
+ {
+ bitnum = (7 - (i & 7));
+ byte = i / 8;
+ bit = 1 << bitnum;
+ a->as_u8[byte] |= bit;
+ }
+}
+
+always_inline uword
+ip6_address_is_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != 0)
+ return 0;
+ return 1;
+}
+
+/* Check for unspecified address ::0 */
+always_inline uword
+ip6_address_is_unspecified (ip6_address_t * a)
+{
+ return ip6_address_is_zero (a);
+}
+
+/* Check for loopback address ::1 */
+always_inline uword
+ip6_address_is_loopback (ip6_address_t * a)
+{
+ uword is_loopback;
+ u8 save = a->as_u8[15];
+ a->as_u8[15] = save ^ 1;
+ is_loopback = ip6_address_is_zero (a);
+ a->as_u8[15] = save;
+ return is_loopback;
+}
+
+/* Check for link local unicast fe80::/10. */
+always_inline uword
+ip6_address_is_link_local_unicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80;
+}
+
+/* Check for unique local unicast fc00::/7. */
+always_inline uword
+ip6_address_is_local_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xfe) == 0xfc;
+}
+
+/* Check for unique global unicast 2000::/3. */
+always_inline uword
+ip6_address_is_global_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xe0) == 0x20;
+}
+
+/* Check for solicited node multicast 0xff02::1:ff00:0/104 */
+always_inline uword
+ip6_is_solicited_node_multicast_address (ip6_address_t * a)
+{
+ return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000)
+ && a->as_u32[1] == 0
+ && a->as_u32[2] == clib_host_to_net_u32 (1)
+ && a->as_u8[12] == 0xff);
+}
+
+typedef struct
+{
+ /* 4 bit version, 8 bit traffic class and 20 bit flow label. */
+ u32 ip_version_traffic_class_and_flow_label;
+
+ /* Total packet length not including this header (but including
+ any extension headers if present). */
+ u16 payload_length;
+
+ /* Protocol for next header. */
+ u8 protocol;
+
+ /* Hop limit decremented by router at each hop. */
+ u8 hop_limit;
+
+ /* Source and destination address. */
+ ip6_address_t src_address, dst_address;
+} ip6_header_t;
+
+always_inline u8
+ip6_traffic_class (ip6_header_t * i)
+{
+ return (i->ip_version_traffic_class_and_flow_label & 0x0FF00000) >> 20;
+}
+
+always_inline void *
+ip6_next_header (ip6_header_t * i)
+{
+ return (void *) (i + 1);
+}
+
+always_inline void
+ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
+{
+ dst->ip_version_traffic_class_and_flow_label =
+ src->ip_version_traffic_class_and_flow_label;
+ dst->payload_length = src->payload_length;
+ dst->protocol = src->protocol;
+ dst->hop_limit = src->hop_limit;
+
+ dst->src_address.as_uword[0] = src->src_address.as_uword[0];
+ dst->src_address.as_uword[1] = src->src_address.as_uword[1];
+ dst->dst_address.as_uword[0] = src->dst_address.as_uword[0];
+ dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
+}
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+ }
+}
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 data;
+}) ip6_pad1_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u8 data[0];
+}) ip6_padN_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+#define IP6_MLDP_ALERT_TYPE 0x5
+ u8 type;
+ u8 len;
+ u16 value;
+}) ip6_router_alert_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+}) ip6_ext_header_t;
+
+always_inline u8 ip6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) ||
+ (nexthdr == IP_PROTOCOL_IPSEC_AH) ||
+ (nexthdr == IP_PROTOCOL_IPV6_ROUTE) ||
+ (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
+}
+
+#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3)
+#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2)
+
+always_inline void *
+ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
+{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }
+
+/*
+ * Macro to find the IPv6 ext header of type t
+ * I is the IPv6 header
+ * P is the previous IPv6 ext header (NULL if none)
+ * M is the matched IPv6 ext header of type t
+ */
+#define ip6_ext_header_find_t(i, p, m, t) \
+if ((i)->protocol == t) \
+{ \
+ (m) = (void *)((i)+1); \
+ (p) = NULL; \
+} \
+else \
+{ \
+ (m) = NULL; \
+ (p) = (void *)((i)+1); \
+ while (ip6_ext_hdr((p)->next_hdr) && \
+ ((ip6_ext_header_t *)(p))->next_hdr != (t)) \
+ { \
+ (p) = ip6_ext_next_header((p)); \
+ } \
+ if ( ((p)->next_hdr) == (t)) \
+ { \
+ (m) = (void *)(ip6_ext_next_header((p))); \
+ } \
+}
+
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+/* *INDENT-ON* */
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16(((offset) << 3) + !!(more))
+
+#endif /* included_ip6_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_pg.c b/src/vnet/ip/ip6_pg.c
new file mode 100644
index 00000000..ba1e4ad9
--- /dev/null
+++ b/src/vnet/ip/ip6_pg.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static void
+ip6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_header_offset = g->start_byte_offset;
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ ip1->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset - sizeof (ip1[0]));
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version;
+ pg_edit_t traffic_class;
+ pg_edit_t flow_label;
+ pg_edit_t payload_length;
+ pg_edit_t protocol;
+ pg_edit_t hop_limit;
+ pg_edit_t src_address, dst_address;
+} pg_ip6_header_t;
+
+static inline void
+pg_ip6_header_init (pg_ip6_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip6_header_t, f);
+ _(payload_length);
+ _(hop_limit);
+ _(protocol);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->ip_version, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 28, 4);
+ pg_edit_init_bitfield (&p->traffic_class, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 20, 8);
+ pg_edit_init_bitfield (&p->flow_label, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 0, 20);
+}
+
+uword
+unformat_pg_ip6_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip6_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t),
+ &group_index);
+ pg_ip6_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 6);
+ pg_edit_set_fixed (&p->traffic_class, 0);
+ pg_edit_set_fixed (&p->flow_label, 0);
+ pg_edit_set_fixed (&p->hop_limit, 64);
+
+ p->payload_length.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip6_address, &p->src_address,
+ unformat_pg_edit, unformat_ip6_address, &p->dst_address))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "traffic-class %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->traffic_class))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->payload_length))
+ ;
+
+ else if (unformat (input, "hop-limit %U",
+ unformat_pg_edit, unformat_pg_number, &p->hop_limit))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->payload_length,
+ pg_edit_group_n_bytes (s,
+ group_index) -
+ sizeof (ip6_header_t));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip6_pg_edit_function;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
new file mode 100644
index 00000000..c14b46c4
--- /dev/null
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -0,0 +1,634 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief IPv6 to IPv4 translation
+ */
+#ifndef __included_ip6_to_ip4_h__
+#define __included_ip6_to_ip4_h__
+
+#include <vnet/ip/ip.h>
+
+/**
+ * IPv6 to IPv4 set call back function type
+ */
+typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *ctx);
+
+/* *INDENT-OFF* */
+static u8 icmp6_to_icmp_updater_pointer_table[] =
+ { 0, 1, ~0, ~0,
+ 2, 2, 9, 8,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
+
+/**
+ * @brief Parse some useful information from IPv6 header.
+ *
+ * @param ip6 IPv6 header.
+ * @param buff_len Buffer length.
+ * @param l4_protocol L4 protocol number.
+ * @param l4_offset L4 header offset.
+ * @param frag_hdr_offset Fragment header offset if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+static_always_inline int
+ip6_parse (const ip6_header_t * ip6, u32 buff_len,
+ u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset)
+{
+ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr;
+ *frag_hdr_offset = sizeof (*ip6);
+ *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t);
+ }
+ else
+ {
+ *l4_protocol = ip6->protocol;
+ *frag_hdr_offset = 0;
+ *l4_offset = sizeof (*ip6);
+ }
+
+ return (buff_len < (*l4_offset + 4)) ||
+ (clib_net_to_host_u16 (ip6->payload_length) <
+ (*l4_offset + 4 - sizeof (*ip6)));
+}
+
+/**
+ * @brief Get TCP/UDP port number or ICMP id from IPv6 packet.
+ *
+ * @param ip6 IPv6 header.
+ * @param sender 1 get sender port, 0 get receiver port.
+ * @param buffer_len Buffer length.
+ *
+ * @returns Port number on success, 0 otherwise.
+ */
+always_inline u16
+ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len)
+{
+ u8 l4_protocol;
+ u16 l4_offset;
+ u16 frag_offset;
+ u8 *l4;
+
+ if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
+ return 0;
+
+ if (frag_offset &&
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
+ u8_ptr_add (ip6, frag_offset))))
+ return 0; //Can't deal with non-first fragment for now
+
+ l4 = u8_ptr_add (ip6, l4_offset);
+ if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP)
+ {
+ return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t
+ *)
+ (l4))->dst_port;
+ }
+ else if (l4_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *icmp = (icmp46_header_t *) (l4);
+ if (icmp->type == ICMP6_echo_request)
+ {
+ return (sender) ? ((u16 *) (icmp))[2] : -1;
+ }
+ else if (icmp->type == ICMP6_echo_reply)
+ {
+ return (sender) ? -1 : ((u16 *) (icmp))[2];
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief Convert type and code value from ICMP6 to ICMP4.
+ *
+ * @param icmp ICMP header.
+ * @param inner_ip6 Inner IPv6 header if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+static_always_inline int
+icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6)
+{
+ *inner_ip6 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP6_echo_request:
+ icmp->type = ICMP4_echo_request;
+ break;
+ case ICMP6_echo_reply:
+ icmp->type = ICMP4_echo_reply;
+ break;
+ case ICMP6_destination_unreachable:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ switch (icmp->code)
+ {
+ case ICMP6_destination_unreachable_no_route_to_destination: //0
+ case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2
+ case ICMP6_destination_unreachable_address_unreachable: //3
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+ case ICMP6_destination_unreachable_destination_administratively_prohibited: //1
+ icmp->type =
+ ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_communication_administratively_prohibited;
+ break;
+ case ICMP6_destination_unreachable_port_unreachable:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ default:
+ return -1;
+ }
+ break;
+ case ICMP6_packet_too_big:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = 4;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ advertised_mtu -= 20;
+ //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20)
+ ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu);
+ }
+ break;
+
+ case ICMP6_time_exceeded:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ icmp->type = ICMP4_time_exceeded;
+ break;
+
+ case ICMP6_parameter_problem:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ switch (icmp->code)
+ {
+ case ICMP6_parameter_problem_erroneous_header_field:
+ icmp->type = ICMP4_parameter_problem;
+ icmp->code = ICMP4_parameter_problem_pointer_indicates_error;
+ u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (pointer >= 40)
+ return -1;
+
+ ((u8 *) (icmp + 1))[0] =
+ icmp6_to_icmp_updater_pointer_table[pointer];
+ break;
+ case ICMP6_parameter_problem_unrecognized_next_header:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ case ICMP6_parameter_problem_unrecognized_option:
+ default:
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+/**
+ * @brief Translate TOS value from IPv6 to IPv4.
+ *
+ * @param ip6 IPv6 header.
+ *
+ * @returns IPv4 TOS value.
+ */
+static_always_inline u8
+ip6_translate_tos (const ip6_header_t * ip6)
+{
+ return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label)
+ & 0x0ff00000) >> 20;
+}
+
+/**
+ * @brief Translate ICMP6 packet to ICMP4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate outer header.
+ * @param ctx A context passed in the outer header translate function.
+ * @param inner_fn The function to translate inner header.
+ * @param inner_ctx A context passed in the inner header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
+ ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx)
+{
+ ip6_header_t *ip6, *inner_ip6;
+ ip4_header_t *ip4, *inner_ip4;
+ u32 ip6_pay_len;
+ icmp46_header_t *icmp;
+ ip_csum_t csum;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+ ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length);
+ icmp = (icmp46_header_t *) (ip6 + 1);
+ ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length);
+
+ //No extensions headers allowed here
+ if (ip6->protocol != IP_PROTOCOL_ICMP6)
+ return -1;
+
+ //There are no fragmented ICMP messages, so no extension header for now
+ if (icmp6_to_icmp_header (icmp, &inner_ip6))
+ return -1;
+
+ if (inner_ip6)
+ {
+ u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset,
+ inner_frag_id;
+ u8 *inner_l4, inner_protocol;
+
+ //We have two headers to translate
+ // FROM
+ // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ...
+ // Handled cases:
+ // [ IPv6 ][IC][ IPv6 ][L4 header ...
+ // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ...
+ // TO
+ // [ IPv4][IC][ IPv4][L4 header ...
+
+ if (ip6_parse (inner_ip6, ip6_pay_len - 8,
+ &inner_protocol, &inner_l4_offset, &inner_frag_offset))
+ return -1;
+
+ inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset);
+ inner_ip4 =
+ (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4));
+ if (inner_frag_offset)
+ {
+ ip6_frag_hdr_t *inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset);
+ inner_frag_id = frag_id_6to4 (inner_frag->identification);
+ }
+ else
+ {
+ inner_frag_id = 0;
+ }
+
+ //Do the translation of the inner packet
+ if (inner_protocol == IP_PROTOCOL_TCP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16);
+ }
+ else if (inner_protocol == IP_PROTOCOL_UDP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6);
+ }
+ else if (inner_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded
+ inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ?
+ ICMP4_echo_request : ICMP4_echo_reply;
+ inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later
+ inner_L4_checksum = &inner_icmp->checksum;
+ }
+ else
+ {
+ return -1;
+ }
+
+ csum = *inner_L4_checksum;
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold (csum);
+
+ if ((rv = inner_fn (inner_ip6, inner_ip4, inner_ctx)) != 0)
+ return rv;
+
+ inner_ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ inner_ip4->tos = ip6_translate_tos (inner_ip6);
+ inner_ip4->length =
+ u16_net_add (inner_ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset);
+ inner_ip4->fragment_id = inner_frag_id;
+ inner_ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ inner_ip4->ttl = inner_ip6->hop_limit;
+ inner_ip4->protocol = inner_protocol;
+ inner_ip4->checksum = ip4_header_checksum (inner_ip4);
+
+ if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //Recompute ICMP checksum
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ inner_icmp->checksum = 0;
+ csum =
+ ip_incremental_checksum (0, inner_icmp,
+ clib_net_to_host_u16 (inner_ip4->length)
+ - sizeof (*inner_ip4));
+ inner_icmp->checksum = ~ip_csum_fold (csum);
+ }
+ else
+ {
+ //Update to new pseudo-header
+ csum = *inner_L4_checksum;
+ csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32);
+ csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32);
+ *inner_L4_checksum = ip_csum_fold (csum);
+ }
+
+ //Move up icmp header
+ ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8);
+ clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8);
+ icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8);
+ }
+ else
+ {
+ //Only one header to translate
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ }
+
+ vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6)));
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->fragment_id = 0;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = IP_PROTOCOL_ICMP;
+ //TODO fix the length depending on offset length
+ ip4->length = u16_net_add (ip6->payload_length,
+ (inner_ip6 ==
+ NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) -
+ sizeof (*ip6)));
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum =
+ ip_incremental_checksum (0, icmp,
+ clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ icmp->checksum = ~ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 fragmented packet to IPv4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
+{
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ ip4_header_t *ip4;
+ u16 frag_id;
+ u8 frag_more;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ frag_id = frag_id_6to4 (frag->identification);
+ frag_more = ip6_frag_hdr_more (frag);
+ frag_offset = ip6_frag_hdr_offset (frag);
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) - l4_offset + sizeof (*ip6));
+ ip4->fragment_id = frag_id;
+ ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 (frag_offset |
+ (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol =
+ (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 UDP/TCP packet to IPv4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
+ u8 udp_checksum)
+{
+ ip6_header_t *ip6;
+ u16 *checksum;
+ ip_csum_t csum = 0;
+ ip4_header_t *ip4;
+ u16 fragment_id;
+ u16 flags;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ if (l4_protocol == IP_PROTOCOL_TCP)
+ {
+ tcp_header_t *tcp = ip6_next_header (ip6);
+ checksum = &tcp->checksum;
+ }
+ else
+ {
+ udp_header_t *udp = ip6_next_header (ip6);
+ checksum = &udp->checksum;
+ //UDP checksum is optional over IPv4
+ if (!udp_checksum)
+ goto no_csum;
+ }
+
+ csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+ *checksum = ip_csum_fold (csum);
+
+no_csum:
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ if (PREDICT_FALSE (frag_offset))
+ {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ fragment_id = frag_id_6to4 (hdr->identification);
+ flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id = 0;
+ flags = 0;
+ }
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+ ip4->fragment_id = fragment_id;
+ ip4->flags_and_fragment_offset = flags;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ //UDP checksum is optional over IPv4
+ if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP)
+ {
+ *checksum = 0;
+ }
+ else
+ {
+ csum = ip_csum_add_even (*checksum, ip4->dst_address.as_u32);
+ csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
+ *checksum = ip_csum_fold (csum);
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 packet to IPv4 (IP header only).
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
+{
+ ip6_header_t *ip6;
+ ip4_header_t *ip4;
+ u16 fragment_id;
+ u16 flags;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ if (PREDICT_FALSE (frag_offset))
+ {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ fragment_id = frag_id_6to4 (hdr->identification);
+ flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id = 0;
+ flags = 0;
+ }
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+ ip4->fragment_id = fragment_id;
+ ip4->flags_and_fragment_offset = flags;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ return 0;
+}
+
+#endif /* __included_ip6_to_ip4_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
new file mode 100644
index 00000000..e13e6e64
--- /dev/null
+++ b/src/vnet/ip/ip_api.c
@@ -0,0 +1,1825 @@
+/*
+ *------------------------------------------------------------------
+ * ip_api.c - vnet ip api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/mfib/mfib_entry.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+
+#define foreach_ip_api_msg \
+_(IP_FIB_DUMP, ip_fib_dump) \
+_(IP6_FIB_DUMP, ip6_fib_dump) \
+_(IP_MFIB_DUMP, ip_mfib_dump) \
+_(IP6_MFIB_DUMP, ip6_mfib_dump) \
+_(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \
+_(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \
+_(MFIB_SIGNAL_DUMP, mfib_signal_dump) \
+_(IP_ADDRESS_DUMP, ip_address_dump) \
+_(IP_DUMP, ip_dump) \
+_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \
+_(IP_ADD_DEL_ROUTE, ip_add_del_route) \
+_(IP_TABLE_ADD_DEL, ip_table_add_del) \
+_(SET_IP_FLOW_HASH,set_ip_flow_hash) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \
+_(IP6ND_PROXY_ADD_DEL, ip6nd_proxy_add_del) \
+_(IP6ND_PROXY_DUMP, ip6nd_proxy_dump) \
+_(SW_INTERFACE_IP6_ENABLE_DISABLE, sw_interface_ip6_enable_disable ) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \
+ sw_interface_ip6_set_link_local_address)
+
+extern void stats_dslock_with_hint (int hint, int tag);
+extern void stats_dsunlock (void);
+
+static void
+send_ip_neighbor_details (u8 is_ipv6,
+ u8 is_static,
+ u8 * mac_address,
+ u8 * ip_address,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_ip_neighbor_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_DETAILS);
+ mp->context = context;
+ mp->is_ipv6 = is_ipv6;
+ mp->is_static = is_static;
+ memcpy (mp->mac_address, mac_address, 6);
+ memcpy (mp->ip_address, ip_address, (is_ipv6) ? 16 : 4);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (mp->is_ipv6)
+ {
+ ip6_neighbor_t *n, *ns;
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details
+ (mp->is_ipv6, ((n->flags & IP6_NEIGHBOR_FLAG_STATIC) ? 1 : 0),
+ (u8 *) n->link_layer_address,
+ (u8 *) & (n->key.ip6_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+ else
+ {
+ ethernet_arp_ip4_entry_t *n, *ns;
+
+ ns = ip4_neighbor_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details (mp->is_ipv6,
+ ((n->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? 1 : 0),
+ (u8*) n->ethernet_address,
+ (u8*) & (n->ip4_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+}
+
+
+void
+copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg)
+{
+ int is_ip4;
+ vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg;
+
+ if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4)
+ fp->afi = IP46_TYPE_IP4;
+ else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6)
+ fp->afi = IP46_TYPE_IP6;
+ else
+ {
+ is_ip4 = ip46_address_is_ip4 (&api_rpath->rpath.frp_addr);
+ if (is_ip4)
+ fp->afi = IP46_TYPE_IP4;
+ else
+ fp->afi = IP46_TYPE_IP6;
+ }
+ if (fp->afi == IP46_TYPE_IP4)
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip4,
+ sizeof (api_rpath->rpath.frp_addr.ip4));
+ else
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip6,
+ sizeof (api_rpath->rpath.frp_addr.ip6));
+}
+
+static void
+send_ip_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ const fib_table_t * table,
+ const fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table->ft_table_id);
+ memcpy (mp->table_name, table->ft_desc,
+ clib_min (vec_len (table->ft_desc), sizeof (mp->table_name)));
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip4, sizeof (pfx->fp_addr.ip4));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip_fib_dump_walk_ctx_t_
+{
+ fib_node_index_t *feis;
+} vl_api_ip_fib_dump_walk_ctx_t;
+
+static int
+vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip_fib_dump_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->feis, fei);
+
+ return (1);
+}
+
+static void
+vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip4_main_t *im = &ip4_main;
+ fib_table_t *fib_table;
+ fib_node_index_t *lfeip;
+ fib_prefix_t pfx;
+ u32 fib_index;
+ fib_route_path_encode_t *api_rpaths;
+ vl_api_ip_fib_dump_walk_ctx_t ctx = {
+ .feis = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im->fibs,
+ ({
+ fib_table_walk(fib_table->ft_index,
+ FIB_PROTOCOL_IP4,
+ vl_api_ip_fib_dump_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort);
+
+ vec_foreach (lfeip, ctx.feis)
+ {
+ fib_entry_get_prefix (*lfeip, &pfx);
+ fib_index = fib_entry_get_fib_index (*lfeip);
+ fib_table = fib_table_get (fib_index, pfx.fp_proto);
+ api_rpaths = NULL;
+ fib_entry_encode (*lfeip, &api_rpaths);
+ send_ip_fib_details (am, q, fib_table, &pfx, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.feis);
+}
+
+static void
+send_ip6_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id, fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip6_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip6, sizeof (pfx->fp_addr.ip6));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct apt_ip6_fib_show_ctx_t_
+{
+ u32 fib_index;
+ fib_node_index_t *entries;
+} api_ip6_fib_show_ctx_t;
+
+static void
+api_ip6_fib_table_put_entries (clib_bihash_kv_24_8_t * kvp, void *arg)
+{
+ api_ip6_fib_show_ctx_t *ctx = arg;
+
+ if ((kvp->key[2] >> 32) == ctx->fib_index)
+ {
+ vec_add1 (ctx->entries, kvp->value);
+ }
+}
+
+static void
+api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
+ vl_api_ip6_fib_dump_t * mp,
+ fib_table_t * fib_table)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ ip6_main_t *im6 = &ip6_main;
+ fib_node_index_t *fib_entry_index;
+ api_ip6_fib_show_ctx_t ctx = {
+ .fib_index = fib_table->ft_index,
+ .entries = NULL,
+ };
+ fib_route_path_encode_t *api_rpaths;
+ fib_prefix_t pfx;
+
+ BV (clib_bihash_foreach_key_value_pair)
+ ((BVT (clib_bihash) *) & im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].
+ ip6_hash, api_ip6_fib_table_put_entries, &ctx);
+
+ vec_sort_with_function (ctx.entries, fib_entry_cmp_for_sort);
+
+ vec_foreach (fib_entry_index, ctx.entries)
+ {
+ fib_entry_get_prefix (*fib_entry_index, &pfx);
+ api_rpaths = NULL;
+ fib_entry_encode (*fib_entry_index, &api_rpaths);
+ send_ip6_fib_details (am, q,
+ fib_table->ft_table_id,
+ &pfx, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.entries);
+}
+
+static void
+vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ip6_main_t *im6 = &ip6_main;
+ fib_table_t *fib_table;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ api_ip6_fib_table_get_all(q, mp, fib_table);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+send_ip_mfib_details (unix_shared_memory_queue_t * q,
+ u32 context, u32 table_id, fib_node_index_t mfei)
+{
+ fib_route_path_encode_t *api_rpath, *api_rpaths = NULL;
+ vl_api_ip_mfib_details_t *mp;
+ mfib_entry_t *mfib_entry;
+ vl_api_fib_path_t *fp;
+ mfib_prefix_t pfx;
+ int path_count;
+
+ mfib_entry = mfib_entry_get (mfei);
+ mfib_entry_get_prefix (mfei, &pfx);
+ mfib_entry_encode (mfei, &api_rpaths);
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
+ mp->context = context;
+
+ mp->rpf_id = mfib_entry->mfe_rpf_id;
+ mp->entry_flags = mfib_entry->mfe_flags;
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx.fp_len;
+ memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4,
+ sizeof (pfx.fp_grp_addr.ip4));
+ memcpy (mp->src_address, &pfx.fp_src_addr.ip4,
+ sizeof (pfx.fp_src_addr.ip4));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = 0;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+ vec_free (api_rpaths);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip_mfib_dump_ctc_t_
+{
+ fib_node_index_t *entries;
+} vl_api_ip_mfib_dump_ctc_t;
+
+static int
+vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip_mfib_dump_ctc_t *ctx = arg;
+
+ vec_add1 (ctx->entries, fei);
+
+ return (0);
+}
+
+static void
+vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ip4_main_t *im = &ip4_main;
+ mfib_table_t *mfib_table;
+ fib_node_index_t *mfeip;
+ vl_api_ip_mfib_dump_ctc_t ctx = {
+ .entries = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+
+ /* *INDENT-OFF* */
+ pool_foreach (mfib_table, im->mfibs,
+ ({
+ ip4_mfib_table_walk(&mfib_table->v4,
+ vl_api_ip_mfib_table_dump_walk,
+ &ctx);
+
+ vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort);
+
+ vec_foreach (mfeip, ctx.entries)
+ {
+ send_ip_mfib_details (q, mp->context,
+ mfib_table->mft_table_id,
+ *mfeip);
+ }
+ vec_reset_length (ctx.entries);
+
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (ctx.entries);
+}
+
+static void
+send_ip6_mfib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id,
+ mfib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip6_mfib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->grp_address, &pfx->fp_grp_addr.ip6,
+ sizeof (pfx->fp_grp_addr.ip6));
+ memcpy (mp->src_address, &pfx->fp_src_addr.ip6,
+ sizeof (pfx->fp_src_addr.ip6));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = 0;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip6_mfib_dump_ctc_t_
+{
+ fib_node_index_t *entries;
+} vl_api_ip6_mfib_dump_ctc_t;
+
+static int
+vl_api_ip6_mfib_table_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip6_mfib_dump_ctc_t *ctx = arg;
+
+ vec_add1 (ctx->entries, fei);
+
+ return (0);
+}
+
+static void
+vl_api_ip6_mfib_dump_t_handler (vl_api_ip6_mfib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip6_main_t *im = &ip6_main;
+ mfib_table_t *mfib_table;
+ fib_node_index_t *mfeip;
+ mfib_prefix_t pfx;
+ fib_route_path_encode_t *api_rpaths = NULL;
+ vl_api_ip6_mfib_dump_ctc_t ctx = {
+ .entries = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+
+ /* *INDENT-OFF* */
+ pool_foreach (mfib_table, im->mfibs,
+ ({
+ ip6_mfib_table_walk(&mfib_table->v6,
+ vl_api_ip6_mfib_table_dump_walk,
+ &ctx);
+
+ vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort);
+
+ vec_foreach(mfeip, ctx.entries)
+ {
+ mfib_entry_get_prefix (*mfeip, &pfx);
+ mfib_entry_encode (*mfeip, &api_rpaths);
+ send_ip6_mfib_details (am, q,
+ mfib_table->mft_table_id,
+ &pfx, api_rpaths,
+ mp->context);
+ }
+ vec_reset_length (api_rpaths);
+ vec_reset_length (ctx.entries);
+
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (ctx.entries);
+ vec_free (api_rpaths);
+}
+
+static void
+vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
+ vlib_main_t * vm)
+{
+ vl_api_ip_neighbor_add_del_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ stats_dslock_with_hint (1 /* release hint */ , 7 /* tag */ );
+
+ /*
+ * there's no validation here of the ND/ARP entry being added.
+ * The expectation is that the FIB will ensure that nothing bad
+ * will come of adding bogus entries.
+ */
+ if (mp->is_ipv6)
+ {
+ if (mp->is_add)
+ rv = vnet_set_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address), mp->is_static,
+ mp->is_no_adj_fib);
+ else
+ rv = vnet_unset_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address));
+ }
+ else
+ {
+ ethernet_arp_ip4_over_ethernet_address_t a;
+
+ clib_memcpy (&a.ethernet, mp->mac_address, 6);
+ clib_memcpy (&a.ip4, mp->dst_address, 4);
+
+ if (mp->is_add)
+ rv = vnet_arp_set_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index),
+ &a, mp->is_static,
+ mp->is_no_adj_fib);
+ else
+ rv =
+ vnet_arp_unset_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), &a);
+ }
+
+ stats_dsunlock ();
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY);
+}
+
+void
+ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api)
+{
+ u32 fib_index, mfib_index;
+
+ /*
+ * ignore action on the default table - this is always present
+ * and cannot be added nor deleted from the API
+ */
+ if (0 != table_id)
+ {
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ * The FIB index for unicast and multicast is not necessarily the
+ * same, since internal VPP systesm (like LISP and SR) create
+ * their own unicast tables.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 != fib_index)
+ {
+ fib_table_unlock (fib_index, fproto,
+ (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+ }
+ if (~0 != mfib_index)
+ {
+ mfib_table_unlock (mfib_index, fproto,
+ (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI));
+ }
+ }
+}
+
+void
+vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
+{
+ vl_api_ip_table_add_del_reply_t *rmp;
+ fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ u32 table_id = ntohl (mp->table_id);
+ int rv = 0;
+
+ if (mp->is_add)
+ {
+ ip_table_create (fproto, table_id, 1, mp->name);
+ }
+ else
+ {
+ ip_table_delete (fproto, table_id, 1);
+ }
+
+ REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY);
+}
+
+int
+add_del_route_t_handler (u8 is_multipath,
+ u8 is_add,
+ u8 is_drop,
+ u8 is_unreach,
+ u8 is_prohibit,
+ u8 is_local,
+ u8 is_multicast,
+ u8 is_classify,
+ u32 classify_table_index,
+ u8 is_resolve_host,
+ u8 is_resolve_attached,
+ u8 is_interface_rx,
+ u8 is_rpf_id,
+ u32 fib_index,
+ const fib_prefix_t * prefix,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u8 next_hop_fib_index,
+ u16 next_hop_weight,
+ u16 next_hop_preference,
+ mpls_label_t next_hop_via_label,
+ mpls_label_t * next_hop_out_label_stack)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop ? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_preference = next_hop_preference,
+ .frp_label_stack = next_hop_out_label_stack,
+ };
+ fib_route_path_t *paths = NULL;
+ fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE;
+
+ /*
+ * the special INVALID label meams we are not recursing via a
+ * label. Exp-null value is never a valid via-label so that
+ * also means it's not a via-label and means clients that set
+ * it to 0 by default get the expected behaviour
+ */
+ if ((MPLS_LABEL_INVALID != next_hop_via_label) && (0 != next_hop_via_label))
+ {
+ path.frp_proto = DPO_PROTO_MPLS;
+ path.frp_local_label = next_hop_via_label;
+ path.frp_eos = MPLS_NON_EOS;
+ }
+ if (is_resolve_host)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ if (is_resolve_attached)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ if (is_interface_rx)
+ path_flags |= FIB_ROUTE_PATH_INTF_RX;
+ if (is_rpf_id)
+ path_flags |= FIB_ROUTE_PATH_RPF_ID;
+ if (is_multicast)
+ entry_flags |= FIB_ENTRY_FLAG_MULTICAST;
+
+ path.frp_flags = path_flags;
+
+ if (is_multipath)
+ {
+ stats_dslock_with_hint (1 /* release hint */ , 10 /* tag */ );
+
+
+ vec_add1 (paths, path);
+
+ if (is_add)
+ fib_table_entry_path_add2 (fib_index,
+ prefix,
+ FIB_SOURCE_API, entry_flags, paths);
+ else
+ fib_table_entry_path_remove2 (fib_index,
+ prefix, FIB_SOURCE_API, paths);
+
+ vec_free (paths);
+ stats_dsunlock ();
+ return 0;
+ }
+
+ stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+ if (is_drop || is_local || is_classify || is_unreach || is_prohibit)
+ {
+ /*
+ * special route types that link directly to the adj
+ */
+ if (is_add)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+ dpo_proto_t dproto;
+
+ dproto = fib_proto_to_dpo (prefix->fp_proto);
+
+ if (is_drop)
+ ip_null_dpo_add_and_lock (dproto, IP_NULL_ACTION_NONE, &dpo);
+ else if (is_local)
+ receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo);
+ else if (is_unreach)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_UNREACH, &dpo);
+ else if (is_prohibit)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+ &dpo);
+ else if (is_classify)
+ {
+ if (pool_is_free_index (cm->tables,
+ ntohl (classify_table_index)))
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ dpo_set (&dpo, DPO_CLASSIFY, dproto,
+ classify_dpo_create (dproto,
+ ntohl (classify_table_index)));
+ }
+ else
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ fib_table_entry_special_dpo_update (fib_index,
+ prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vec_add1 (paths, path);
+ fib_table_entry_update (fib_index,
+ prefix, FIB_SOURCE_API, entry_flags, paths);
+ vec_free (paths);
+ }
+ else
+ {
+ fib_table_entry_delete (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+
+ stats_dsunlock ();
+ return (0);
+}
+
+int
+add_del_route_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index,
+ dpo_proto_t next_hop_table_proto,
+ u32 next_hop_table_id,
+ u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Temporaray whilst I do the CSIT dance */
+ u8 create_missing_tables = 1;
+
+ *fib_index = fib_table_find (table_proto, ntohl (table_id));
+ if (~0 == *fib_index)
+ {
+ if (create_missing_tables)
+ {
+ *fib_index = fib_table_find_or_create_and_lock (table_proto,
+ ntohl (table_id),
+ FIB_SOURCE_API);
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+
+ if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index))
+ {
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ ntohl (next_hop_sw_if_index)))
+ {
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ }
+ }
+ else
+ {
+ fib_protocol_t fib_nh_proto;
+
+ if (next_hop_table_proto > DPO_PROTO_MPLS)
+ return (0);
+
+ fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto);
+
+ if (is_rpf_id)
+ *next_hop_fib_index = mfib_table_find (fib_nh_proto,
+ ntohl (next_hop_table_id));
+ else
+ *next_hop_fib_index = fib_table_find (fib_nh_proto,
+ ntohl (next_hop_table_id));
+
+ if (~0 == *next_hop_fib_index)
+ {
+ if (create_missing_tables)
+ {
+ if (is_rpf_id)
+ *next_hop_fib_index =
+ mfib_table_find_or_create_and_lock (fib_nh_proto,
+ ntohl
+ (next_hop_table_id),
+ MFIB_SOURCE_API);
+ else
+ *next_hop_fib_index =
+ fib_table_find_or_create_and_lock (fib_nh_proto,
+ ntohl
+ (next_hop_table_id),
+ FIB_SOURCE_API);
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP4,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ DPO_PROTO_IP4,
+ mp->next_hop_table_id,
+ 0, &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_memcpy (&pfx.fp_addr.ip4, mp->dst_address, sizeof (pfx.fp_addr.ip4));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip4, mp->next_hop_address, sizeof (nh.ip4));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local, 0,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached, 0, 0,
+ fib_index, &pfx, DPO_PROTO_IP4,
+ &nh,
+ ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ mp->next_hop_preference,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+static int
+ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP6,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ DPO_PROTO_IP6,
+ mp->next_hop_table_id,
+ 0, &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ clib_memcpy (&pfx.fp_addr.ip6, mp->dst_address, sizeof (pfx.fp_addr.ip6));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip6, mp->next_hop_address, sizeof (nh.ip6));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local, 0,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached, 0, 0,
+ fib_index, &pfx, DPO_PROTO_IP6,
+ &nh, ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ mp->next_hop_preference,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+void
+vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ vl_api_ip_add_del_route_reply_t *rmp;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ if (mp->is_ipv6)
+ rv = ip6_add_del_route_t_handler (mp);
+ else
+ rv = ip4_add_del_route_t_handler (mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY);
+}
+
+void
+ip_table_create (fib_protocol_t fproto,
+ u32 table_id, u8 is_api, const u8 * name)
+{
+ u32 fib_index, mfib_index;
+
+ /*
+ * ignore action on the default table - this is always present
+ * and cannot be added nor deleted from the API
+ */
+ if (0 != table_id)
+ {
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ * The FIB index for unicast and multicast is not necessarily the
+ * same, since internal VPP systesm (like LISP and SR) create
+ * their own unicast tables.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 == fib_index)
+ {
+ fib_table_find_or_create_and_lock_w_name (fproto, table_id,
+ (is_api ?
+ FIB_SOURCE_API :
+ FIB_SOURCE_CLI), name);
+ }
+ if (~0 == mfib_index)
+ {
+ mfib_table_find_or_create_and_lock_w_name (fproto, table_id,
+ (is_api ?
+ MFIB_SOURCE_API :
+ MFIB_SOURCE_CLI), name);
+ }
+ }
+}
+
+static int
+add_del_mroute_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ *fib_index = mfib_table_find (table_proto, ntohl (table_id));
+ if (~0 == *fib_index)
+ {
+ /* No such table */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+
+ if (~0 != ntohl (next_hop_sw_if_index))
+ {
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ ntohl (next_hop_sw_if_index)))
+ {
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ }
+ }
+
+ return (0);
+}
+
+static int
+mroute_add_del_handler (u8 is_add,
+ u8 is_local,
+ u32 fib_index,
+ const mfib_prefix_t * prefix,
+ u32 entry_flags,
+ fib_rpf_id_t rpf_id,
+ u32 next_hop_sw_if_index, u32 itf_flags)
+{
+ stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+ fib_route_path_t path = {
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_proto = fib_proto_to_dpo (prefix->fp_proto),
+ };
+
+ if (is_local)
+ path.frp_flags |= FIB_ROUTE_PATH_LOCAL;
+
+
+ if (!is_local && ~0 == next_hop_sw_if_index)
+ {
+ mfib_table_entry_update (fib_index, prefix,
+ MFIB_SOURCE_API, rpf_id, entry_flags);
+ }
+ else
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (fib_index, prefix,
+ MFIB_SOURCE_API, &path, itf_flags);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (fib_index, prefix,
+ MFIB_SOURCE_API, &path);
+ }
+ }
+
+ stats_dsunlock ();
+ return (0);
+}
+
+static int
+api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+ fib_protocol_t fproto;
+ u32 fib_index;
+ int rv;
+
+ fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ rv = add_del_mroute_check (fproto,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ mp->is_local, &fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ mfib_prefix_t pfx = {
+ .fp_len = ntohs (mp->grp_address_length),
+ .fp_proto = fproto,
+ };
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ clib_memcpy (&pfx.fp_grp_addr.ip4, mp->grp_address,
+ sizeof (pfx.fp_grp_addr.ip4));
+ clib_memcpy (&pfx.fp_src_addr.ip4, mp->src_address,
+ sizeof (pfx.fp_src_addr.ip4));
+ }
+ else
+ {
+ clib_memcpy (&pfx.fp_grp_addr.ip6, mp->grp_address,
+ sizeof (pfx.fp_grp_addr.ip6));
+ clib_memcpy (&pfx.fp_src_addr.ip6, mp->src_address,
+ sizeof (pfx.fp_src_addr.ip6));
+ }
+
+ return (mroute_add_del_handler (mp->is_add,
+ mp->is_local,
+ fib_index, &pfx,
+ ntohl (mp->entry_flags),
+ ntohl (mp->rpf_id),
+ ntohl (mp->next_hop_sw_if_index),
+ ntohl (mp->itf_flags)));
+}
+
+void
+vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+ vl_api_ip_mroute_add_del_reply_t *rmp;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ rv = api_mroute_add_del_t_handler (mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_IP_MROUTE_ADD_DEL_REPLY);
+}
+
+static void
+send_ip_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q, u32 sw_if_index,
+ u8 is_ipv6, u32 context)
+{
+ vl_api_ip_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_DETAILS);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+send_ip_address_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u8 * ip, u16 prefix_length,
+ u32 sw_if_index, u8 is_ipv6, u32 context)
+{
+ vl_api_ip_address_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_ADDRESS_DETAILS);
+
+ if (is_ipv6)
+ {
+ clib_memcpy (&mp->ip, ip, sizeof (mp->ip));
+ }
+ else
+ {
+ u32 *tp = (u32 *) mp->ip;
+ *tp = *(u32 *) ip;
+ }
+ mp->prefix_length = prefix_length;
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip6_address_t *r6;
+ ip4_address_t *r4;
+ ip6_main_t *im6 = &ip6_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_interface_address_t *ia = 0;
+ u32 sw_if_index = ~0;
+ int rv __attribute__ ((unused)) = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ if (mp->is_ipv6)
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r6 = ip_interface_address_get_address (lm6, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r6, prefix_length,
+ sw_if_index, 1, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r4 = ip_interface_address_get_address (lm4, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r4, prefix_length,
+ sw_if_index, 0, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ BAD_SW_IF_INDEX_LABEL;
+}
+
+static void
+vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ unix_shared_memory_queue_t *q;
+ vnet_sw_interface_t *si, *sorted_sis;
+ u32 sw_if_index = ~0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* Gather interfaces. */
+ sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sorted_sis) = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ vec_add1 (sorted_sis, si[0]);
+ }));
+ /* *INDENT-ON* */
+
+ vec_foreach (si, sorted_sis)
+ {
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
+ {
+ if (mp->is_ipv6 && !ip6_interface_enabled (vm, si->sw_if_index))
+ {
+ continue;
+ }
+ sw_if_index = si->sw_if_index;
+ send_ip_details (am, q, sw_if_index, mp->is_ipv6, mp->context);
+ }
+ }
+}
+
+static void
+set_ip6_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv;
+ u32 table_id;
+ flow_hash_config_t flow_hash_config = 0;
+
+ table_id = ntohl (mp->vrf_id);
+
+#define _(a,b) if (mp->a) flow_hash_config |= b;
+ foreach_flow_hash_bit;
+#undef _
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+static void
+set_ip4_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv;
+ u32 table_id;
+ flow_hash_config_t flow_hash_config = 0;
+
+ table_id = ntohl (mp->vrf_id);
+
+#define _(a,b) if (mp->a) flow_hash_config |= b;
+ foreach_flow_hash_bit;
+#undef _
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+
+static void
+vl_api_set_ip_flow_hash_t_handler (vl_api_set_ip_flow_hash_t * mp)
+{
+ if (mp->is_ipv6 == 0)
+ set_ip4_flow_hash (mp);
+ else
+ set_ip6_flow_hash (mp);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_config_t_handler
+ (vl_api_sw_interface_ip6nd_ra_config_t * mp)
+{
+ vl_api_sw_interface_ip6nd_ra_config_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u8 is_no, suppress, managed, other, ll_option, send_unicast, cease,
+ default_router;
+
+ is_no = mp->is_no == 1;
+ suppress = mp->suppress == 1;
+ managed = mp->managed == 1;
+ other = mp->other == 1;
+ ll_option = mp->ll_option == 1;
+ send_unicast = mp->send_unicast == 1;
+ cease = mp->cease == 1;
+ default_router = mp->default_router == 1;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = ip6_neighbor_ra_config (vm, ntohl (mp->sw_if_index),
+ suppress, managed, other,
+ ll_option, send_unicast, cease,
+ default_router, ntohl (mp->lifetime),
+ ntohl (mp->initial_count),
+ ntohl (mp->initial_interval),
+ ntohl (mp->max_interval),
+ ntohl (mp->min_interval), is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_prefix_t_handler
+ (vl_api_sw_interface_ip6nd_ra_prefix_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6nd_ra_prefix_reply_t *rmp;
+ int rv = 0;
+ u8 is_no, use_default, no_advertise, off_link, no_autoconfig, no_onlink;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ is_no = mp->is_no == 1;
+ use_default = mp->use_default == 1;
+ no_advertise = mp->no_advertise == 1;
+ off_link = mp->off_link == 1;
+ no_autoconfig = mp->no_autoconfig == 1;
+ no_onlink = mp->no_onlink == 1;
+
+ rv = ip6_neighbor_ra_prefix (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address,
+ mp->address_length, use_default,
+ ntohl (mp->val_lifetime),
+ ntohl (mp->pref_lifetime), no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX_REPLY);
+}
+
+static void
+send_ip6nd_proxy_details (unix_shared_memory_queue_t * q,
+ u32 context,
+ const ip46_address_t * addr, u32 sw_if_index)
+{
+ vl_api_ip6nd_proxy_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6ND_PROXY_DETAILS);
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ memcpy (mp->address, addr, 16);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct api_ip6nd_proxy_fib_table_walk_ctx_t_
+{
+ u32 *indices;
+} api_ip6nd_proxy_fib_table_walk_ctx_t;
+
+static int
+api_ip6nd_proxy_fib_table_walk (fib_node_index_t fei, void *arg)
+{
+ api_ip6nd_proxy_fib_table_walk_ctx_t *ctx = arg;
+
+ if (fib_entry_is_sourced (fei, FIB_SOURCE_IP6_ND_PROXY))
+ {
+ vec_add1 (ctx->indices, fei);
+ }
+
+ return (1);
+}
+
+static void
+vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
+{
+ ip6_main_t *im6 = &ip6_main;
+ fib_table_t *fib_table;
+ api_ip6nd_proxy_fib_table_walk_ctx_t ctx = {
+ .indices = NULL,
+ };
+ fib_node_index_t *feip;
+ fib_prefix_t pfx;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ fib_table_walk(fib_table->ft_index,
+ FIB_PROTOCOL_IP6,
+ api_ip6nd_proxy_fib_table_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort);
+
+ vec_foreach (feip, ctx.indices)
+ {
+ fib_entry_get_prefix (*feip, &pfx);
+
+ send_ip6nd_proxy_details (q,
+ mp->context,
+ &pfx.fp_addr,
+ fib_entry_get_resolving_interface (*feip));
+ }
+
+ vec_free (ctx.indices);
+}
+
+static void
+vl_api_ip6nd_proxy_add_del_t_handler (vl_api_ip6nd_proxy_add_del_t * mp)
+{
+ vl_api_ip6nd_proxy_add_del_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = ip6_neighbor_proxy_add_del (ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address, mp->is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_IP6ND_PROXY_ADD_DEL_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_enable_disable_t_handler
+ (vl_api_sw_interface_ip6_enable_disable_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_enable_disable_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error =
+ (mp->enable == 1) ? enable_ip6_interface (vm,
+ ntohl (mp->sw_if_index)) :
+ disable_ip6_interface (vm, ntohl (mp->sw_if_index));
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_set_link_local_address_t_handler
+ (vl_api_sw_interface_ip6_set_link_local_address_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_set_link_local_address_reply_t *rmp;
+ int rv = 0;
+ clib_error_t *error;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error = set_ip6_link_local_address (vm,
+ ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY);
+}
+
+void
+vl_mfib_signal_send_one (unix_shared_memory_queue_t * q,
+ u32 context, const mfib_signal_t * mfs)
+{
+ vl_api_mfib_signal_details_t *mp;
+ mfib_prefix_t prefix;
+ mfib_table_t *mfib;
+ mfib_itf_t *mfi;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MFIB_SIGNAL_DETAILS);
+ mp->context = context;
+
+ mfi = mfib_itf_get (mfs->mfs_itf);
+ mfib_entry_get_prefix (mfs->mfs_entry, &prefix);
+ mfib = mfib_table_get (mfib_entry_get_fib_index (mfs->mfs_entry),
+ prefix.fp_proto);
+ mp->table_id = ntohl (mfib->mft_table_id);
+ mp->sw_if_index = ntohl (mfi->mfi_sw_if_index);
+
+ if (FIB_PROTOCOL_IP4 == prefix.fp_proto)
+ {
+ mp->grp_address_len = ntohs (prefix.fp_len);
+
+ memcpy (mp->grp_address, &prefix.fp_grp_addr.ip4, 4);
+ if (prefix.fp_len > 32)
+ {
+ memcpy (mp->src_address, &prefix.fp_src_addr.ip4, 4);
+ }
+ }
+ else
+ {
+ mp->grp_address_len = ntohs (prefix.fp_len);
+
+ ASSERT (0);
+ }
+
+ if (0 != mfs->mfs_buffer_len)
+ {
+ mp->ip_packet_len = ntohs (mfs->mfs_buffer_len);
+
+ memcpy (mp->ip_packet_data, mfs->mfs_buffer, mfs->mfs_buffer_len);
+ }
+ else
+ {
+ mp->ip_packet_len = 0;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_mfib_signal_dump_t_handler (vl_api_mfib_signal_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ while (q->cursize < q->maxsize && mfib_signal_send_one (q, mp->context))
+ ;
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/ip/ip.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_ip;
+#undef _
+}
+
+static clib_error_t *
+ip_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ip_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ip_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
new file mode 100644
index 00000000..6a9cf657
--- /dev/null
+++ b/src/vnet/ip/ip_checksum.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/ip_checksum.c: ip/tcp/udp checksums
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_csum_t
+ip_incremental_checksum (ip_csum_t sum, void *_data, uword n_bytes)
+{
+ uword data = pointer_to_uword (_data);
+ ip_csum_t sum0, sum1;
+
+ sum0 = 0;
+ sum1 = sum;
+
+ /* Align data pointer to 64 bits. */
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) \
+ && sizeof (t) < sizeof (ip_csum_t) \
+ && (data % (2 * sizeof (t))) != 0) \
+ { \
+ sum0 += * uword_to_pointer (data, t *); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ _(u8);
+ _(u16);
+ if (BITS (ip_csum_t) > 32)
+ _(u32);
+
+#undef _
+
+ {
+ ip_csum_t *d = uword_to_pointer (data, ip_csum_t *);
+
+ while (n_bytes >= 2 * sizeof (d[0]))
+ {
+ sum0 = ip_csum_with_carry (sum0, d[0]);
+ sum1 = ip_csum_with_carry (sum1, d[1]);
+ d += 2;
+ n_bytes -= 2 * sizeof (d[0]);
+ }
+
+ data = pointer_to_uword (d);
+ }
+
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \
+ { \
+ sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ if (BITS (ip_csum_t) > 32)
+ _(u64);
+ _(u32);
+ _(u16);
+ _(u8);
+
+#undef _
+
+ /* Combine even and odd sums. */
+ sum0 = ip_csum_with_carry (sum0, sum1);
+
+ return sum0;
+}
+
+ip_csum_t
+ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src, uword n_bytes)
+{
+ uword n_left;
+ ip_csum_t sum0 = sum, sum1;
+ n_left = n_bytes;
+
+ if (n_left && (pointer_to_uword (dst) & sizeof (u8)))
+ {
+ u8 *d8, val;
+
+ d8 = dst;
+ val = ((u8 *) src)[0];
+ d8[0] = val;
+ dst += 1;
+ src += 1;
+ n_left -= 1;
+ sum0 =
+ ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_LITTLE_ENDIAN));
+ }
+
+ while ((n_left >= sizeof (u16))
+ && (pointer_to_uword (dst) & (sizeof (sum) - sizeof (u16))))
+ {
+ u16 *d16, *s16;
+
+ d16 = dst;
+ s16 = src;
+
+ d16[0] = clib_mem_unaligned (&s16[0], u16);
+
+ sum0 = ip_csum_with_carry (sum0, d16[0]);
+ dst += sizeof (u16);
+ src += sizeof (u16);
+ n_left -= sizeof (u16);
+ }
+
+ sum1 = 0;
+ while (n_left >= 2 * sizeof (sum))
+ {
+ ip_csum_t dst0, dst1;
+ ip_csum_t *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+ dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t);
+
+ dst_even[0] = dst0;
+ dst_even[1] = dst1;
+
+ dst += 2 * sizeof (dst_even[0]);
+ src += 2 * sizeof (dst_even[0]);
+ n_left -= 2 * sizeof (dst_even[0]);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ sum1 = ip_csum_with_carry (sum1, dst1);
+ }
+
+ sum0 = ip_csum_with_carry (sum0, sum1);
+ while (n_left >= 1 * sizeof (sum))
+ {
+ ip_csum_t dst0, *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+
+ dst_even[0] = dst0;
+
+ dst += 1 * sizeof (sum);
+ src += 1 * sizeof (sum);
+ n_left -= 1 * sizeof (sum);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ }
+
+ while (n_left >= sizeof (u16))
+ {
+ u16 dst0, *dst_short, *src_short;
+
+ dst_short = dst;
+ src_short = src;
+
+ dst0 = clib_mem_unaligned (&src_short[0], u16);
+
+ dst_short[0] = dst0;
+
+ sum0 = ip_csum_with_carry (sum0, dst_short[0]);
+ dst += 1 * sizeof (dst0);
+ src += 1 * sizeof (dst0);
+ n_left -= 1 * sizeof (dst0);
+
+ }
+
+ if (n_left == 1)
+ {
+ u8 *d8, *s8, val;
+
+ d8 = dst;
+ s8 = src;
+
+ d8[0] = val = s8[0];
+ d8 += 1;
+ s8 += 1;
+ n_left -= 1;
+ sum0 = ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_BIG_ENDIAN));
+ }
+
+ return sum0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
new file mode 100644
index 00000000..ca062bfd
--- /dev/null
+++ b/src/vnet/ip/ip_frag.c
@@ -0,0 +1,581 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 Fragmentation Node
+ *
+ *
+ */
+
+#include "ip_frag.h"
+
+#include <vnet/ip/ip.h>
+
+
+typedef struct
+{
+ u8 ipv6;
+ u16 header_offset;
+ u16 mtu;
+ u8 next;
+ u16 n_fragments;
+} ip_frag_trace_t;
+
+static u8 *
+format_ip_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
+ s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
+ t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
+ return s;
+}
+
+static u32 running_fragment_id;
+
+static void
+ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip4_header_t *ip4;
+ u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+ u8 *packet, more;
+
+ vec_add1 (*buffer, pi);
+ p = vlib_get_buffer (vm, pi);
+ offset = vnet_buffer (p)->ip_frag.header_offset;
+ mtu = vnet_buffer (p)->ip_frag.mtu;
+ packet = (u8 *) vlib_buffer_get_current (p);
+ ip4 = (ip4_header_t *) (packet + offset);
+
+ rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4);
+ ptr = 0;
+ max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7;
+
+ if (rem < (p->current_length - offset - sizeof (*ip4)))
+ {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ if (mtu < sizeof (*ip4))
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ if (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
+ {
+ *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ return;
+ }
+
+ if (ip4_is_fragment (ip4))
+ {
+ ip_frag_id = ip4->fragment_id;
+ ip_frag_offset = ip4_get_fragment_offset (ip4);
+ more =
+ ! !(ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ ip_frag_id = (++running_fragment_id);
+ ip_frag_offset = 0;
+ more = 0;
+ }
+
+ //Do the actual fragmentation
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ ip4_header_t *fip4;
+
+ len =
+ (rem >
+ (mtu - sizeof (*ip4) -
+ vnet_buffer (p)->ip_frag.header_offset)) ? max : rem;
+
+ if (ptr == 0)
+ {
+ bi = pi;
+ b = p;
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+ }
+ else
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ vec_add1 (*buffer, bi);
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+
+ //Copy offset and ip4 header
+ clib_memcpy (b->data, packet, offset + sizeof (*ip4));
+ //Copy data
+ clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4),
+ packet + offset + sizeof (*fip4) + ptr, len);
+ }
+ b->current_length = offset + len + sizeof (*fip4);
+
+ fip4->fragment_id = ip_frag_id;
+ fip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset);
+ fip4->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (((len != rem) || more) << 13);
+ // ((len0 != rem0) || more0) << 13 is optimization for
+ // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
+ fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4));
+ fip4->checksum = ip4_header_checksum (fip4);
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ rem -= len;
+ ptr += len;
+ }
+}
+
+void
+ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags)
+{
+ vnet_buffer (b)->ip_frag.header_offset = offset;
+ vnet_buffer (b)->ip_frag.mtu = mtu;
+ vnet_buffer (b)->ip_frag.next_index = next_index;
+ vnet_buffer (b)->ip_frag.flags = flags;
+}
+
+static uword
+ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip4_frag_next_t next0;
+
+ //Note: The packet is not enqueued now.
+ //It is instead put in a vector where other fragments
+ //will be put as well.
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 0;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->ip_frag.header_offset);
+ next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+
+ if (error0 == IP_FRAG_ERROR_NONE)
+ {
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+ }
+ else
+ vlib_error_count (vm, ip4_frag_node.index, error0, 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+
+static void
+ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip6_header_t *ip6_hdr;
+ ip6_frag_hdr_t *frag_hdr;
+ u8 *payload, *next_header;
+
+ p = vlib_get_buffer (vm, pi);
+
+ //Parsing the IPv6 headers
+ ip6_hdr =
+ vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
+ payload = (u8 *) (ip6_hdr + 1);
+ next_header = &ip6_hdr->protocol;
+ if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (PREDICT_FALSE
+ (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ //and make us modify another vlib_buffer
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ u8 has_more;
+ u16 initial_offset;
+ if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ //The fragmentation header is already there
+ frag_hdr = (ip6_frag_hdr_t *) payload;
+ has_more = ip6_frag_hdr_more (frag_hdr);
+ initial_offset = ip6_frag_hdr_offset (frag_hdr);
+ }
+ else
+ {
+ //Insert a fragmentation header in the packet
+ u8 nh = *next_header;
+ *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ vlib_buffer_advance (p, -sizeof (*frag_hdr));
+ u8 *start = vlib_buffer_get_current (p);
+ memmove (start, start + sizeof (*frag_hdr),
+ payload - (start + sizeof (*frag_hdr)));
+ frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
+ frag_hdr->identification = ++running_fragment_id;
+ frag_hdr->next_hdr = nh;
+ frag_hdr->rsv = 0;
+ has_more = 0;
+ initial_offset = 0;
+ }
+ payload = (u8 *) (frag_hdr + 1);
+
+ u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
+ u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
+ u16 rem = p->current_length - headers_len;
+ u16 ptr = 0;
+
+ if (max_payload < 8)
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
+ rem -= len;
+
+ if (ptr != 0)
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ clib_memcpy (vlib_buffer_get_current (b),
+ vlib_buffer_get_current (p), headers_len);
+ clib_memcpy (vlib_buffer_get_current (b) + headers_len,
+ payload + ptr, len);
+ frag_hdr =
+ vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
+ }
+ else
+ {
+ bi = pi;
+ b = vlib_get_buffer (vm, bi);
+ //frag_hdr already set here
+ }
+
+ ip6_hdr =
+ vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
+ frag_hdr->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
+ (rem || has_more));
+ b->current_length = headers_len + len;
+ ip6_hdr->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ vnet_buffer (p)->ip_frag.header_offset -
+ sizeof (*ip6_hdr));
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ vec_add1 (*buffer, bi);
+
+ ptr += len;
+ }
+}
+
+static uword
+ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip6_frag_next_t next0;
+
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 1;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+static char *ip4_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_frag_node) = {
+ .function = ip4_frag,
+ .name = IP4_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP4_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP4_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_frag_node) = {
+ .function = ip6_frag,
+ .name = IP6_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP6_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP6_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
new file mode 100644
index 00000000..348f5a2f
--- /dev/null
+++ b/src/vnet/ip/ip_frag.h
@@ -0,0 +1,96 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 and IPv6 Fragmentation Nodes
+ *
+ * A packet sent to those nodes require the following
+ * buffer attributes to be set:
+ * ip_frag.header_offset :
+ * Where to find the IPv4 (or IPv6) header in the packet. Previous
+ * bytes are left untouched and copied in every fragment. The fragments
+ * are then appended. This option is used for fragmented packets
+ * that are encapsulated.
+ * ip_frag.mtu :
+ * Maximum size of IP packets, header included, but ignoring
+ * the 'ip_frag.header_offset' copied bytes.
+ * ip_frag.next_index :
+ * One of ip_frag_next_t, indicating to which exit node the fragments
+ * should be sent to.
+ *
+ */
+
+#ifndef IP_FRAG_H
+#define IP_FRAG_H
+
+#include <vnet/vnet.h>
+
+#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
+#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
+
+#define IP4_FRAG_NODE_NAME "ip4-frag"
+#define IP6_FRAG_NODE_NAME "ip6-frag"
+
+extern vlib_node_registration_t ip4_frag_node;
+extern vlib_node_registration_t ip6_frag_node;
+
+typedef enum
+{
+ IP4_FRAG_NEXT_IP4_LOOKUP,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP4_FRAG_NEXT_ICMP_ERROR,
+ IP4_FRAG_NEXT_DROP,
+ IP4_FRAG_N_NEXT
+} ip4_frag_next_t;
+
+typedef enum
+{
+ IP6_FRAG_NEXT_IP4_LOOKUP,
+ IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP6_FRAG_NEXT_DROP,
+ IP6_FRAG_N_NEXT
+} ip6_frag_next_t;
+
+#define foreach_ip_frag_error \
+ /* Must be first. */ \
+ _(NONE, "packet fragmented") \
+ _(SMALL_PACKET, "packet smaller than MTU") \
+ _(FRAGMENT_SENT, "number of sent fragments") \
+ _(CANT_FRAGMENT_HEADER, "can't fragment header") \
+ _(DONT_FRAGMENT_SET, "can't fragment this packet") \
+ _(MALFORMED, "malformed packet") \
+ _(MEMORY, "could not allocate buffer") \
+ _(UNKNOWN, "unknown error")
+
+typedef enum
+{
+#define _(sym,str) IP_FRAG_ERROR_##sym,
+ foreach_ip_frag_error
+#undef _
+ IP_FRAG_N_ERROR,
+} ip_frag_error_t;
+
+void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags);
+
+#endif /* ifndef IP_FRAG_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
new file mode 100644
index 00000000..f7635b35
--- /dev/null
+++ b/src/vnet/ip/ip_init.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_init.c: ip generic initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_main_t ip_main;
+
+clib_error_t *
+ip_main_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ clib_error_t *error = 0;
+
+ memset (im, 0, sizeof (im[0]));
+
+ {
+ ip_protocol_info_t *pi;
+ u32 i;
+
+#define ip_protocol(n,s) \
+do { \
+ vec_add2 (im->protocol_infos, pi, 1); \
+ pi->protocol = n; \
+ pi->name = (u8 *) #s; \
+} while (0);
+
+#include "protocols.def"
+
+#undef ip_protocol
+
+ im->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ for (i = 0; i < vec_len (im->protocol_infos); i++)
+ {
+ pi = im->protocol_infos + i;
+
+ hash_set_mem (im->protocol_info_by_name, pi->name, i);
+ hash_set (im->protocol_info_by_protocol, pi->protocol, i);
+ }
+ }
+
+ {
+ tcp_udp_port_info_t *pi;
+ u32 i;
+ static char *port_names[] = {
+#define ip_port(s,n) #s,
+#include "ports.def"
+#undef ip_port
+ };
+ static u16 ports[] = {
+#define ip_port(s,n) n,
+#include "ports.def"
+#undef ip_port
+ };
+
+ vec_resize (im->port_infos, ARRAY_LEN (port_names));
+ im->port_info_by_name = hash_create_string (0, sizeof (uword));
+
+ for (i = 0; i < vec_len (im->port_infos); i++)
+ {
+ pi = im->port_infos + i;
+ pi->port = clib_host_to_net_u16 (ports[i]);
+ pi->name = (u8 *) port_names[i];
+ hash_set_mem (im->port_info_by_name, pi->name, i);
+ hash_set (im->port_info_by_port, pi->port, i);
+ }
+ }
+
+ if ((error = vlib_call_init_function (vm, vnet_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_local_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, input_acl_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, policer_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, flow_classify_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip_main_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_input_acl.c b/src/vnet/ip/ip_input_acl.c
new file mode 100644
index 00000000..b0b52ab1
--- /dev/null
+++ b/src/vnet/ip/ip_input_acl.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} ip_inacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_inacl_trace_t *t = va_arg (*args, ip_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+vlib_node_registration_t ip4_inacl_node;
+vlib_node_registration_t ip6_inacl_node;
+
+#define foreach_ip_inacl_error \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk")
+
+typedef enum
+{
+#define _(sym,str) IP_INACL_ERROR_##sym,
+ foreach_ip_inacl_error
+#undef _
+ IP_INACL_N_ERROR,
+} ip_inacl_error_t;
+
+static char *ip_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_inacl_error
+#undef _
+};
+
+static inline uword
+ip_inacl_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_next_index_t next_index;
+ input_acl_main_t *am = &input_acl_main;
+ vnet_classify_main_t *vcm = am->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ input_acl_table_id_t tid;
+ vlib_node_runtime_t *error_node;
+ u32 n_next_nodes;
+
+ n_next_nodes = node->n_next_nodes;
+
+ if (is_ip4)
+ {
+ tid = INPUT_ACL_TABLE_IP4;
+ error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ }
+ else
+ {
+ tid = INPUT_ACL_TABLE_IP6;
+ error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset;
+ else
+ h1 = b1->data;
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+ vnet_get_config_data (am->vnet_config_main[tid],
+ &b0->current_config_index, &next0,
+ /* # bytes of config data */ 0);
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+
+ hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX ||
+ e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = e0->metadata;
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+
+ misses++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_TABLE_MISS : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_TABLE_MISS : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ break;
+ }
+
+ if (t0->current_data_flag ==
+ CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX
+ || e0->action ==
+ CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ e0->metadata;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_inacl_node) = {
+ .function = ip4_inacl,
+ .name = "ip4-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_inacl_node, ip4_inacl);
+
+static uword
+ip6_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_inacl_node) = {
+ .function = ip6_inacl,
+ .name = "ip6-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_inacl_node, ip6_inacl);
+
+static clib_error_t *
+ip_inacl_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_inacl_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
new file mode 100644
index 00000000..d3f3de77
--- /dev/null
+++ b/src/vnet/ip/ip_packet.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_packet.h: packet format common between ip4 & ip6
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_packet_h
+#define included_ip_packet_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum ip_protocol
+{
+#define ip_protocol(n,s) IP_PROTOCOL_##s = n,
+#include "protocols.def"
+#undef ip_protocol
+} ip_protocol_t;
+
+/* TCP/UDP ports. */
+typedef enum
+{
+#define ip_port(s,n) IP_PORT_##s = n,
+#include "ports.def"
+#undef ip_port
+} ip_port_t;
+
+/* Classifies protocols into UDP, ICMP or other. */
+typedef enum
+{
+ IP_BUILTIN_PROTOCOL_UDP,
+ IP_BUILTIN_PROTOCOL_ICMP,
+ IP_BUILTIN_PROTOCOL_UNKNOWN,
+} ip_builtin_protocol_t;
+
+#define foreach_ip_builtin_multicast_group \
+ _ (1, all_hosts_on_subnet) \
+ _ (2, all_routers_on_subnet) \
+ _ (4, dvmrp) \
+ _ (5, ospf_all_routers) \
+ _ (6, ospf_designated_routers) \
+ _ (13, pim) \
+ _ (18, vrrp) \
+ _ (102, hsrp) \
+ _ (22, igmp_v3)
+
+typedef enum
+{
+#define _(n,f) IP_MULTICAST_GROUP_##f = n,
+ foreach_ip_builtin_multicast_group
+#undef _
+} ip_multicast_group_t;
+
+/* IP checksum support. */
+
+/* Incremental checksum update. */
+typedef uword ip_csum_t;
+
+always_inline ip_csum_t
+ip_csum_with_carry (ip_csum_t sum, ip_csum_t x)
+{
+ ip_csum_t t = sum + x;
+ return t + (t < x);
+}
+
+/* Update checksum changing field at even byte offset from x -> 0. */
+always_inline ip_csum_t
+ip_csum_add_even (ip_csum_t c, ip_csum_t x)
+{
+ ip_csum_t d;
+
+ d = c - x;
+
+ /* Fold in carry from high bit. */
+ d -= d > c;
+
+ ASSERT (ip_csum_with_carry (d, x) == c);
+
+ return d;
+}
+
+/* Update checksum changing field at even byte offset from 0 -> x. */
+always_inline ip_csum_t
+ip_csum_sub_even (ip_csum_t c, ip_csum_t x)
+{
+ return ip_csum_with_carry (c, x);
+}
+
+always_inline ip_csum_t
+ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new,
+ u32 field_byte_offset, u32 field_n_bytes)
+{
+ /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian
+ we need to shift byte into place for checksum. */
+ if ((field_n_bytes % 2)
+ && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN)
+ {
+ old = old << 8;
+ new = new << 8;
+ }
+ sum = ip_csum_sub_even (sum, old);
+ sum = ip_csum_add_even (sum, new);
+ return sum;
+}
+
+#define ip_csum_update(sum,old,new,type,field) \
+ ip_csum_update_inline ((sum), (old), (new), \
+ STRUCT_OFFSET_OF (type, field), \
+ STRUCT_SIZE_OF (type, field))
+
+always_inline u16
+ip_csum_fold (ip_csum_t c)
+{
+ /* Reduce to 16 bits. */
+#if uword_bits == 64
+ c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
+ c = (c & 0xffff) + (c >> 16);
+#endif
+
+ c = (c & 0xffff) + (c >> 16);
+ c = (c & 0xffff) + (c >> 16);
+
+ return c;
+}
+
+/* Copy data and checksum at the same time. */
+ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src,
+ uword n_bytes);
+
+always_inline u16
+ip_csum_and_memcpy_fold (ip_csum_t sum, void *dst)
+{
+ return ip_csum_fold (sum);
+}
+
+/* Checksum routine. */
+ip_csum_t ip_incremental_checksum (ip_csum_t sum, void *data, uword n_bytes);
+
+#endif /* included_ip_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_source_and_port_range_check.h b/src/vnet/ip/ip_source_and_port_range_check.h
new file mode 100644
index 00000000..fefe5ff1
--- /dev/null
+++ b/src/vnet/ip/ip_source_and_port_range_check.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip_ip_source_and_port_range_check_h
+#define included_ip_ip_source_and_port_range_check_h
+
+
+typedef struct
+{
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} source_range_check_main_t;
+
+source_range_check_main_t source_range_check_main;
+
+typedef enum
+{
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS,
+} ip_source_and_port_range_check_protocol_t;
+
+typedef struct
+{
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+} ip_source_and_port_range_check_config_t;
+
+#define IP_SOURCE_AND_PORT_RANGE_CHECK_RANGE_LIMIT VLIB_BUFFER_PRE_DATA_SIZE/(2*sizeof(u16x8));
+
+typedef struct
+{
+ union
+ {
+ u16x8 as_u16x8;
+ u16 as_u16[8];
+ };
+} u16x8vec_t;
+
+typedef struct
+{
+ u16x8vec_t low;
+ u16x8vec_t hi;
+} protocol_port_range_t;
+
+/**
+ * @brief The number of supported ranges per-data path object.
+ * If more ranges are required, bump this number.
+ */
+#define N_PORT_RANGES_PER_DPO 64
+#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2)
+#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK)
+
+/**
+ * @brief
+ * The object that is in the data-path to perform the check.
+ *
+ * Some trade-offs here; memory vs performance.
+ *
+ * performance:
+ * the principle factor is d-cache line misses/hits.
+ * so we want the data layout to minimise the d-cache misses. This
+ * means not following dependent reads. i.e. not doing
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t *ragnes; // vector of ranges.
+ * }
+ *
+ * so to read ranges[0] we would first d-cache miss on the address
+ * of the object of type B, for which we would need to wait before we
+ * can get the address of B->ranges.
+ * So this layout is better:
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t ragnes[N];
+ * }
+ *
+ * memory:
+ * the latter layout above is more memory hungry. And N needs to be:
+ * 1 - sized for the maximum required
+ * 2 - fixed, so that objects of type B can be pool allocated and so
+ * 'get'-able using an index.
+ * An option over fixed might be to allocate contiguous chunk from
+ * the pool (like we used to do for multi-path adjs).
+ */
+typedef struct protocol_port_range_dpo_t_
+{
+ /**
+ * The number of blocks from the 'block' array below
+ * that have rnages configured. We keep this count so that in the data-path
+ * we can limit the loop to be only over the blocks we need
+ */
+ u16 n_used_blocks;
+
+ /**
+ * The total number of free ranges from all blocks.
+ * Used to prevent overrun of the ranges available.
+ */
+ u16 n_free_ranges;
+
+ /**
+ * the fixed size array of ranges
+ */
+ protocol_port_range_t blocks[N_BLOCKS_PER_DPO];
+} protocol_port_range_dpo_t;
+
+int ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+// This will be moved to another file in another patch -- for API freeze
+int ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+int set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add);
+
+#endif /* included ip_source_and_port_range_check_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
new file mode 100644
index 00000000..856c4942
--- /dev/null
+++ b/src/vnet/ip/lookup.c
@@ -0,0 +1,1442 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ip/ip6_neighbor.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 adjacency and lookup table managment.
+ *
+ */
+
+clib_error_t *
+ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *addr_fib,
+ u32 address_length,
+ u32 is_del, u32 * result_if_address_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_interface_address_t *a, *prev, *next;
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+
+ /* Verify given length. */
+ if ((a && (address_length != a->address_length)) ||
+ (address_length == 0) ||
+ (lm->is_ip6 && address_length > 128) ||
+ (!lm->is_ip6 && address_length > 32))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
+ return clib_error_create
+ ("%U wrong length (expected %d) for interface %U",
+ lm->format_address_and_length, addr_fib,
+ address_length, a ? a->address_length : -1,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ if (is_del)
+ {
+ if (!a)
+ {
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
+ return clib_error_create ("%U not found for interface %U",
+ lm->format_address_and_length,
+ addr_fib, address_length,
+ format_vnet_sw_interface_name, vnm, si);
+ }
+
+ if (a->prev_this_sw_interface != ~0)
+ {
+ prev =
+ pool_elt_at_index (lm->if_address_pool,
+ a->prev_this_sw_interface);
+ prev->next_this_sw_interface = a->next_this_sw_interface;
+ }
+ if (a->next_this_sw_interface != ~0)
+ {
+ next =
+ pool_elt_at_index (lm->if_address_pool,
+ a->next_this_sw_interface);
+ next->prev_this_sw_interface = a->prev_this_sw_interface;
+
+ if (a->prev_this_sw_interface == ~0)
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ a->next_this_sw_interface;
+ }
+
+ if ((a->next_this_sw_interface == ~0)
+ && (a->prev_this_sw_interface == ~0))
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
+
+ mhash_unset (&lm->address_to_if_address_index, addr_fib,
+ /* old_value */ 0);
+ pool_put (lm->if_address_pool, a);
+
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ }
+
+ else if (!a)
+ {
+ u32 pi; /* previous index */
+ u32 ai;
+ u32 hi; /* head index */
+
+ pool_get (lm->if_address_pool, a);
+ memset (a, ~0, sizeof (a[0]));
+ ai = a - lm->if_address_pool;
+
+ hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ prev = 0;
+ while (pi != (u32) ~ 0)
+ {
+ prev = pool_elt_at_index (lm->if_address_pool, pi);
+ pi = prev->next_this_sw_interface;
+ }
+ pi = prev ? prev - lm->if_address_pool : (u32) ~ 0;
+
+ a->address_key = mhash_set (&lm->address_to_if_address_index,
+ addr_fib, ai, /* old_value */ 0);
+ a->address_length = address_length;
+ a->sw_if_index = sw_if_index;
+ a->flags = 0;
+ a->prev_this_sw_interface = pi;
+ a->next_this_sw_interface = ~0;
+ if (prev)
+ prev->next_this_sw_interface = ai;
+
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ (hi != ~0) ? hi : ai;
+ if (result_if_address_index)
+ *result_if_address_index = ai;
+ }
+ else
+ {
+ if (sw_if_index != a->sw_if_index)
+ {
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
+ return clib_error_create
+ ("Prefix %U already found on interface %U",
+ lm->format_address_and_length, addr_fib, address_length,
+ format_vnet_sw_if_index_name, vnm, a->sw_if_index);
+ }
+
+ if (result_if_address_index)
+ *result_if_address_index = a - lm->if_address_pool;
+ }
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+ip_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ vec_validate_init_empty (ip4_main.
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (ip6_main.
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_sw_interface_add_del);
+
+void
+ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
+{
+ if (!lm->fib_result_n_bytes)
+ lm->fib_result_n_bytes = sizeof (uword);
+
+ lm->is_ip6 = is_ip6;
+ if (is_ip6)
+ {
+ lm->format_address_and_length = format_ip6_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip6_address_fib_t));
+ }
+ else
+ {
+ lm->format_address_and_length = format_ip4_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip4_address_fib_t));
+ }
+
+ {
+ int i;
+
+ /* Setup all IP protocols to be punted and builtin-unknown. */
+ for (i = 0; i < 256; i++)
+ {
+ lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
+ lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
+ }
+
+ lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
+ lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
+ lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] =
+ IP_BUILTIN_PROTOCOL_UDP;
+ lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] =
+ IP_BUILTIN_PROTOCOL_ICMP;
+ }
+}
+
+u8 *
+format_ip_flow_hash_config (u8 * s, va_list * args)
+{
+ flow_hash_config_t flow_hash_config = va_arg (*args, u32);
+
+#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
+ foreach_flow_hash_bit;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_ip_lookup_next (u8 * s, va_list * args)
+{
+ /* int promotion of ip_lookup_next_t */
+ ip_lookup_next_t n = va_arg (*args, int);
+ char *t = 0;
+
+ switch (n)
+ {
+ default:
+ s = format (s, "unknown %d", n);
+ return s;
+
+ case IP_LOOKUP_NEXT_DROP:
+ t = "drop";
+ break;
+ case IP_LOOKUP_NEXT_PUNT:
+ t = "punt";
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ t = "arp";
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ t = "midchain";
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ t = "glean";
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ t = "mcast";
+ break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ break;
+ }
+
+ if (t)
+ vec_add (s, t, strlen (t));
+
+ return s;
+}
+
+u8 *
+format_ip_adjacency_packet_data (u8 * s, va_list * args)
+{
+ u32 adj_index = va_arg (*args, u32);
+ u8 *packet_data = va_arg (*args, u8 *);
+ u32 n_packet_data_bytes = va_arg (*args, u32);
+ ip_adjacency_t *adj = adj_get (adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST:
+ s =
+ format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes);
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+static uword
+unformat_dpo (unformat_input_t * input, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t *);
+ fib_protocol_t fp = va_arg (*args, int);
+ dpo_proto_t proto;
+
+ proto = fib_proto_to_dpo (fp);
+
+ if (unformat (input, "drop"))
+ dpo_copy (dpo, drop_dpo_get (proto));
+ else if (unformat (input, "punt"))
+ dpo_copy (dpo, punt_dpo_get (proto));
+ else if (unformat (input, "local"))
+ receive_dpo_add_or_lock (proto, ~0, NULL, dpo);
+ else if (unformat (input, "null-send-unreach"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_UNREACH, dpo);
+ else if (unformat (input, "null-send-prohibit"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_PROHIBIT, dpo);
+ else if (unformat (input, "null"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_NONE, dpo);
+ else if (unformat (input, "classify"))
+ {
+ u32 classify_table_index;
+
+ if (!unformat (input, "%d", &classify_table_index))
+ {
+ clib_warning ("classify adj must specify table index");
+ return 0;
+ }
+
+ dpo_set (dpo, DPO_CLASSIFY, proto,
+ classify_dpo_create (proto, classify_table_index));
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+const ip46_address_t zero_addr = {
+ .as_u64 = {
+ 0, 0},
+};
+
+clib_error_t *
+vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ dpo_id_t dpo = DPO_INVALID, *dpos = NULL;
+ fib_prefix_t *prefixs = NULL, pfx;
+ mpls_label_t out_label, via_label;
+ clib_error_t *error = NULL;
+ u32 weight, preference;
+ u32 table_id, is_del;
+ vnet_main_t *vnm;
+ u32 fib_index;
+ f64 count;
+ int i;
+
+ vnm = vnet_get_main ();
+ is_del = 0;
+ table_id = 0;
+ count = 1;
+ memset (&pfx, 0, sizeof (pfx));
+ out_label = via_label = MPLS_LABEL_INVALID;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ memset (&rpath, 0, sizeof (rpath));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "resolve-via-host"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ }
+ else if (unformat (line_input, "resolve-via-attached"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ }
+ else if (unformat (line_input, "out-labels"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then labels");
+ goto done;
+ }
+ else
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label, &out_label))
+ {
+ vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack,
+ out_label);
+ }
+ }
+ }
+ else if (unformat (line_input, "via-label %U",
+ unformat_mpls_unicast_label, &rpath.frp_local_label))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_eos = MPLS_NON_EOS;
+ rpath.frp_proto = DPO_PROTO_MPLS;
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "count %f", &count))
+ ;
+
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address, &pfx.fp_addr.ip6, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "weight %u", &weight))
+ {
+ ASSERT (vec_len (rpaths));
+ rpaths[vec_len (rpaths) - 1].frp_weight = weight;
+ }
+ else if (unformat (line_input, "preference %u", &preference))
+ {
+ ASSERT (vec_len (rpaths));
+ rpaths[vec_len (rpaths) - 1].frp_preference = preference;
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address, &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address, &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "lookup in table %d", &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto);
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto);
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_dpo, &dpo, prefixs[0].fp_proto))
+ {
+ vec_add1 (dpos, dpo);
+ }
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (prefixs) == 0)
+ {
+ error =
+ clib_error_return (0, "expected ip4/ip6 destination address/length.");
+ goto done;
+ }
+
+ if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0)
+ {
+ error = clib_error_return (0, "expected paths.");
+ goto done;
+ }
+
+ if (~0 == table_id)
+ {
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = fib_table_find (prefixs[0].fp_proto, table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ for (i = 0; i < vec_len (prefixs); i++)
+ {
+ if (is_del && 0 == vec_len (rpaths))
+ {
+ fib_table_entry_delete (fib_index, &prefixs[i], FIB_SOURCE_CLI);
+ }
+ else if (!is_del && 1 == vec_len (dpos))
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &prefixs[i],
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpos[0]);
+ dpo_reset (&dpos[0]);
+ }
+ else if (vec_len (dpos) > 0)
+ {
+ error =
+ clib_error_return (0,
+ "Load-balancing over multiple special adjacencies is unsupported");
+ goto done;
+ }
+ else if (0 < vec_len (rpaths))
+ {
+ u32 k, j, n, incr;
+ ip46_address_t dst = prefixs[i].fp_addr;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
+ prefixs[i].fp_len);
+
+ for (k = 0; k < n; k++)
+ {
+ for (j = 0; j < vec_len (rpaths); j++)
+ {
+ u32 fi;
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ fi = fib_table_find (prefixs[i].fp_proto,
+ rpaths[i].frp_fib_index);
+
+ if (~0 == fi)
+ {
+ error =
+ clib_error_return (0, "Via table %d does not exist",
+ rpaths[i].frp_fib_index);
+ goto done;
+ }
+ rpaths[i].frp_fib_index = fi;
+
+ fib_prefix_t rpfx = {
+ .fp_len = prefixs[i].fp_len,
+ .fp_proto = prefixs[i].fp_proto,
+ .fp_addr = dst,
+ };
+
+ if (is_del)
+ fib_table_entry_path_remove2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI, &rpaths[j]);
+ else
+ fib_table_entry_path_add2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ &rpaths[j]);
+ }
+
+ if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
+ {
+ dst.ip4.as_u32 =
+ clib_host_to_net_u32 (incr +
+ clib_net_to_host_u32 (dst.
+ ip4.as_u32));
+ }
+ else
+ {
+ int bucket = (incr < 64 ? 0 : 1);
+ dst.ip6.as_u64[bucket] =
+ clib_host_to_net_u64 (incr +
+ clib_net_to_host_u64 (dst.ip6.as_u64
+ [bucket]));
+
+ }
+ }
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ else
+ {
+ error = clib_error_return (0, "Don't understand what you want...");
+ goto done;
+ }
+ }
+
+
+done:
+ vec_free (dpos);
+ vec_free (prefixs);
+ vec_free (rpaths);
+ unformat_free (line_input);
+ return error;
+}
+
+clib_error_t *
+vnet_ip_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmd, fib_protocol_t fproto)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ u32 table_id, is_add;
+ u8 *name = NULL;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "name %s", &name))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ else if (0 == table_id)
+ {
+ error = clib_error_return (0, "Can't change the default table");
+ goto done;
+ }
+ else
+ {
+ if (is_add)
+ {
+ ip_table_create (fproto, table_id, 0, name);
+ }
+ else
+ {
+ ip_table_delete (fproto, table_id, 0);
+ }
+ }
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+clib_error_t *
+vnet_ip4_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4));
+}
+
+clib_error_t *
+vnet_ip6_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
+ .path = "ip",
+ .short_help = "Internet protocol (IP) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
+ .path = "ip6",
+ .short_help = "Internet protocol version 6 (IPv6) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
+ .path = "show ip",
+ .short_help = "Internet protocol (IP) show commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
+ .path = "show ip6",
+ .short_help = "Internet protocol version 6 (IPv6) show commands",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6 routes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip fib</em>' and '<em>show ip6 fib</em>'.
+ *
+ * @cliexpar
+ * Example of how to add a straight forward static route:
+ * @cliexcmd{ip route add 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Example of how to delete a straight forward static route:
+ * @cliexcmd{ip route del 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Mainly for route add/del performance testing, one can add or delete
+ * multiple routes by adding 'count N' to the previous item:
+ * @cliexcmd{ip route add count 10 7.0.0.0/24 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Add multiple routes for the same destination to create equal-cost multipath:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0}
+ * For unequal-cost multipath, specify the desired weights. This
+ * combination of weights results in 3/4 of the traffic following the
+ * second path, 1/4 following the first path:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0 weight 1}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3}
+ * To add a route to a particular FIB table (VRF), use:
+ * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_route_command, static) = {
+ .path = "ip route",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>] [weight <weight>]] | [via arp <interface> <adj-hop-ip-addr>] | [via drop|punt|local<id>|arp|classify <classify-idx>] [lookup in table <out-table-id>]",
+ .function = vnet_ip_route_cmd,
+ .is_mp_safe = 1,
+};
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4 Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_table_command, static) = {
+ .path = "ip table",
+ .short_help = "ip table [add|del] <table-id>",
+ .function = vnet_ip4_table_cmd,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4 Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_table_command, static) = {
+ .path = "ip6 table",
+ .short_help = "ip6 table [add|del] <table-id>",
+ .function = vnet_ip6_table_cmd,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+ip_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd,
+ fib_protocol_t fproto)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, table_id;
+ int rv;
+
+ sw_if_index = ~0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ rv = ip_table_bind (fproto, sw_if_index, table_id, 0);
+
+ if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv)
+ {
+ error = clib_error_return (0, "IP addresses are still present on %U",
+ format_vnet_sw_if_index_name,
+ vnet_get_main(),
+ sw_if_index);
+ }
+ else if (VNET_API_ERROR_NO_SUCH_FIB == rv)
+ {
+ error = clib_error_return (0, "no such table %d", table_id);
+ }
+ else if (0 != rv)
+ {
+ error = clib_error_return (0, "unknown error");
+ }
+
+ done:
+ return error;
+}
+
+static clib_error_t *
+ip4_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4));
+}
+
+static clib_error_t *
+ip6_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6));
+}
+
+/*?
+ * Place the indicated interface into the supplied IPv4 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
+{
+ .path = "set interface ip table",
+ .function = ip4_table_bind_cmd,
+ .short_help = "set interface ip table <interface> <table-id>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * Place the indicated interface into the supplied IPv6 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
+{
+ .path = "set interface ip6 table",
+ .function = ip6_table_bind_cmd,
+ .short_help = "set interface ip6 table <interface> <table-id>"
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vnet_ip_mroute_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ fib_route_path_t rpath;
+ u32 table_id, is_del;
+ vnet_main_t *vnm;
+ mfib_prefix_t pfx;
+ u32 fib_index;
+ mfib_itf_flags_t iflags = 0;
+ mfib_entry_flags_t eflags = 0;
+ u32 gcount, scount, ss, gg, incr;
+ f64 timet[2];
+
+ gcount = scount = 1;
+ vnm = vnet_get_main ();
+ is_del = 0;
+ table_id = 0;
+ memset (&pfx, 0, sizeof (pfx));
+ memset (&rpath, 0, sizeof (rpath));
+ rpath.frp_sw_if_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "scount %d", &scount))
+ ;
+ else if (unformat (line_input, "gcount %d", &gcount))
+ ;
+ else if (unformat (line_input, "%U %U",
+ unformat_ip4_address,
+ &pfx.fp_src_addr.ip4,
+ unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = 64;
+ }
+ else if (unformat (line_input, "%U %U",
+ unformat_ip6_address,
+ &pfx.fp_src_addr.ip6,
+ unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = 256;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address,
+ &pfx.fp_grp_addr.ip4, &pfx.fp_len))
+ {
+ memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4));
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address,
+ &pfx.fp_grp_addr.ip6, &pfx.fp_len))
+ {
+ memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6));
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+ else if (unformat (line_input, "%U",
+ unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+ {
+ memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4));
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = 32;
+ }
+ else if (unformat (line_input, "%U",
+ unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+ {
+ memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6));
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = 128;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ }
+ else if (unformat (line_input, "via local"))
+ {
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_weight = 1;
+ rpath.frp_flags |= FIB_ROUTE_PATH_LOCAL;
+ }
+ else if (unformat (line_input, "%U", unformat_mfib_itf_flags, &iflags))
+ ;
+ else if (unformat (line_input, "%U",
+ unformat_mfib_entry_flags, &eflags))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = mfib_table_find (pfx.fp_proto, table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ timet[0] = vlib_time_now (vm);
+
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ incr = 1 << (32 - (pfx.fp_len % 32));
+ }
+ else
+ {
+ incr = 1 << (128 - (pfx.fp_len % 128));
+ }
+
+ for (ss = 0; ss < scount; ss++)
+ {
+ for (gg = 0; gg < gcount; gg++)
+ {
+ if (is_del && 0 == rpath.frp_weight)
+ {
+ /* no path provided => route delete */
+ mfib_table_entry_delete (fib_index, &pfx, MFIB_SOURCE_CLI);
+ }
+ else if (eflags)
+ {
+ mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI,
+ MFIB_RPF_ID_NONE, eflags);
+ }
+ else
+ {
+ if (is_del)
+ mfib_table_entry_path_remove (fib_index,
+ &pfx, MFIB_SOURCE_CLI, &rpath);
+ else
+ mfib_table_entry_path_update (fib_index,
+ &pfx, MFIB_SOURCE_CLI, &rpath,
+ iflags);
+ }
+
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ pfx.fp_grp_addr.ip4.as_u32 =
+ clib_host_to_net_u32 (incr +
+ clib_net_to_host_u32 (pfx.
+ fp_grp_addr.ip4.
+ as_u32));
+ }
+ else
+ {
+ int bucket = (incr < 64 ? 0 : 1);
+ pfx.fp_grp_addr.ip6.as_u64[bucket] =
+ clib_host_to_net_u64 (incr +
+ clib_net_to_host_u64 (pfx.
+ fp_grp_addr.ip6.as_u64
+ [bucket]));
+
+ }
+ }
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ pfx.fp_src_addr.ip4.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (pfx.fp_src_addr.
+ ip4.as_u32));
+ }
+ else
+ {
+ pfx.fp_src_addr.ip6.as_u64[1] =
+ clib_host_to_net_u64 (1 +
+ clib_net_to_host_u64 (pfx.fp_src_addr.
+ ip6.as_u64[1]));
+ }
+ }
+
+ timet[1] = vlib_time_now (vm);
+
+ if (scount > 1 || gcount > 1)
+ vlib_cli_output (vm, "%.6e routes/sec",
+ (scount * gcount) / (timet[1] - timet[0]));
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6 multicastroutes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip mfib</em>' and '<em>show ip6 mfib</em>'.
+ * The full set of support flags for interfaces and route is shown via;
+ * '<em>show mfib route flags</em>' and '<em>show mfib itf flags</em>'
+ * respectively.
+ * @cliexpar
+ * Example of how to add a forwarding interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/0 Forward}
+ * Example of how to add an accepting interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/1 Accept}
+ * Example of changing the route's flags to send signals via the API
+ * @cliexcmd{ip mroute add 232.1.1.1 Signal}
+
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_mroute_command, static) =
+{
+ .path = "ip mroute",
+ .short_help = "ip mroute [add|del] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>],",
+ .function = vnet_ip_mroute_cmd,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * The next two routines address a longstanding script hemorrhoid.
+ * Probing a v4 or v6 neighbor needs to appear to be synchronous,
+ * or dependent route-adds will simply fail.
+ */
+static clib_error_t *
+ip6_probe_neighbor_wait (vlib_main_t * vm, ip6_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip6_neighbor_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip6_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip6_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip6_address, a);
+ return 0;
+}
+
+static clib_error_t *
+ip4_probe_neighbor_wait (vlib_main_t * vm, ip4_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip4_arp_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip4_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip4_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ vec_reset_length (event_data);
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip4_address, a);
+ return 0;
+}
+
+static clib_error_t *
+probe_neighbor_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index = ~0;
+ int retry_count = 3;
+ int is_ip4 = 1;
+ int address_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "retry %d", &retry_count))
+ ;
+
+ else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
+ address_set++;
+ else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
+ {
+ address_set++;
+ is_ip4 = 0;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "Interface required, not set.");
+ goto done;
+ }
+ if (address_set == 0)
+ {
+ error = clib_error_return (0, "ip address required, not set.");
+ goto done;
+ }
+ if (address_set > 1)
+ {
+ error = clib_error_return (0, "Multiple ip addresses not supported.");
+ goto done;
+ }
+
+ if (is_ip4)
+ error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
+ else
+ error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * The '<em>ip probe-neighbor</em>' command ARPs for IPv4 addresses or
+ * attempts IPv6 neighbor discovery depending on the supplied IP address
+ * format.
+ *
+ * @note This command will not immediately affect the indicated FIB; it
+ * is not suitable for use in establishing a FIB entry prior to adding
+ * recursive FIB entries. As in: don't use it in a script to probe a
+ * gateway prior to adding a default route. It won't work. Instead,
+ * configure a static ARP cache entry [see '<em>set ip arp</em>'], or
+ * a static IPv6 neighbor [see '<em>set ip6 neighbor</em>'].
+ *
+ * @cliexpar
+ * Example of probe for an IPv4 address:
+ * @cliexcmd{ip probe-neighbor GigabitEthernet2/0/0 172.16.1.2}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
+ .path = "ip probe-neighbor",
+ .function = probe_neighbor_address,
+ .short_help = "ip probe-neighbor <interface> <ip4-addr> | <ip6-addr> [retry nn]",
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
new file mode 100644
index 00000000..28a4bd8f
--- /dev/null
+++ b/src/vnet/ip/lookup.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Definitions for all things IP (v4|v6) unicast and multicast lookup related.
+ *
+ * - Adjacency definitions and registration.
+ * - Callbacks on route add.
+ * - Callbacks on interface address change.
+ */
+#ifndef included_ip_lookup_h
+#define included_ip_lookup_h
+
+#include <vnet/vnet.h>
+#include <vlib/buffer.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/feature/feature.h>
+
+/** Flow hash configuration */
+#define IP_FLOW_HASH_SRC_ADDR (1<<0)
+#define IP_FLOW_HASH_DST_ADDR (1<<1)
+#define IP_FLOW_HASH_PROTO (1<<2)
+#define IP_FLOW_HASH_SRC_PORT (1<<3)
+#define IP_FLOW_HASH_DST_PORT (1<<4)
+#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
+
+/** Default: 5-tuple without the "reverse" bit */
+#define IP_FLOW_HASH_DEFAULT (0x1F)
+
+#define foreach_flow_hash_bit \
+_(src, IP_FLOW_HASH_SRC_ADDR) \
+_(dst, IP_FLOW_HASH_DST_ADDR) \
+_(sport, IP_FLOW_HASH_SRC_PORT) \
+_(dport, IP_FLOW_HASH_DST_PORT) \
+_(proto, IP_FLOW_HASH_PROTO) \
+_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
+
+/**
+ * A flow hash configuration is a mask of the flow hash options
+ */
+typedef u32 flow_hash_config_t;
+
+/* An all zeros address */
+extern const ip46_address_t zero_addr;
+
+
+typedef struct
+{
+ /* Key for mhash; in fact, just a byte offset into mhash key vector. */
+ u32 address_key;
+
+ /* Interface which has this address. */
+ u32 sw_if_index;
+
+ /* Address (prefix) length for this interface. */
+ u16 address_length;
+
+ /* Will be used for something eventually. Primary vs. secondary? */
+ u16 flags;
+
+ /* Next and previous pointers for doubly linked list of
+ addresses per software interface. */
+ u32 next_this_sw_interface;
+ u32 prev_this_sw_interface;
+} ip_interface_address_t;
+
+typedef enum
+{
+ IP_LOCAL_NEXT_DROP,
+ IP_LOCAL_NEXT_PUNT,
+ IP_LOCAL_NEXT_UDP_LOOKUP,
+ IP_LOCAL_NEXT_ICMP,
+ IP_LOCAL_N_NEXT,
+} ip_local_next_t;
+
+struct ip_lookup_main_t;
+
+typedef struct ip_lookup_main_t
+{
+ /** Pool of addresses that are assigned to interfaces. */
+ ip_interface_address_t *if_address_pool;
+
+ /** Hash table mapping address to index in interface address pool. */
+ mhash_t address_to_if_address_index;
+
+ /** Head of doubly linked list of interface addresses for each software interface.
+ ~0 means this interface has no address. */
+ u32 *if_address_pool_index_by_sw_if_index;
+
+ /** First table index to use for this interface, ~0 => none */
+ u32 *classify_table_index_by_sw_if_index;
+
+ /** Feature arc indices */
+ u8 mcast_feature_arc_index;
+ u8 ucast_feature_arc_index;
+ u8 output_feature_arc_index;
+
+ /** Number of bytes in a fib result. Must be at least
+ sizeof (uword). First word is always adjacency index. */
+ u32 fib_result_n_bytes, fib_result_n_words;
+
+ /** 1 for ip6; 0 for ip4. */
+ u32 is_ip6;
+
+ /** Either format_ip4_address_and_length or format_ip6_address_and_length. */
+ format_function_t *format_address_and_length;
+
+ /** Table mapping ip protocol to ip[46]-local node next index. */
+ u8 local_next_by_ip_protocol[256];
+
+ /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
+ u8 builtin_protocol_by_ip_protocol[256];
+} ip_lookup_main_t;
+
+clib_error_t *ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *address,
+ u32 address_length,
+ u32 is_del, u32 * result_index);
+
+u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
+
+always_inline ip_interface_address_t *
+ip_get_interface_address (ip_lookup_main_t * lm, void *addr_fib)
+{
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+ return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+}
+
+always_inline void *
+ip_interface_address_get_address (ip_lookup_main_t * lm,
+ ip_interface_address_t * a)
+{
+ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key);
+}
+
+/* *INDENT-OFF* */
+#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
+do { \
+ vnet_main_t *_vnm = vnet_get_main(); \
+ u32 _sw_if_index = sw_if_index; \
+ vnet_sw_interface_t *_swif; \
+ _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
+ \
+ /* \
+ * Loop => honor unnumbered interface addressing. \
+ */ \
+ if (_swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
+ { \
+ if (loop) \
+ _sw_if_index = _swif->unnumbered_sw_if_index; \
+ else \
+ /* the interface is unnumbered, by the caller does not want \
+ * unnumbered interfaces considered/honoured */ \
+ break; \
+ } \
+ u32 _ia = ((vec_len((lm)->if_address_pool_index_by_sw_if_index) \
+ > (_sw_if_index)) ? \
+ vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
+ (_sw_if_index)) : \
+ (u32)~0); \
+ ip_interface_address_t * _a; \
+ while (_ia != ~0) \
+ { \
+ _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
+ _ia = _a->next_this_sw_interface; \
+ (a) = _a; \
+ body; \
+ } \
+} while (0)
+/* *INDENT-ON* */
+
+void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+
+#endif /* included_ip_lookup_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c
new file mode 100755
index 00000000..0fa537f6
--- /dev/null
+++ b/src/vnet/ip/ping.c
@@ -0,0 +1,928 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+#include <vnet/ip/ping.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 ICMP Ping.
+ *
+ * This file contains code to suppport IPv4 or IPv6 ICMP ECHO_REQUEST to
+ * network hosts.
+ *
+ */
+
+
+u8 *
+format_icmp_echo_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_echo_trace_t *t = va_arg (*va, icmp_echo_trace_t *);
+
+ s = format (s, "ICMP echo id %d seq %d%s",
+ clib_net_to_host_u16 (t->id),
+ clib_net_to_host_u16 (t->seq), t->bound ? "" : " (unknown)");
+
+ return s;
+}
+
+/*
+ * If we can find the ping run by an ICMP ID, then we send the signal
+ * to the CLI process referenced by that ping run, alongside with
+ * a freshly made copy of the packet.
+ * I opted for a packet copy to keep the main packet processing path
+ * the same as for all the other nodes.
+ *
+ */
+
+static int
+signal_ip46_icmp_reply_event (u8 event_type, vlib_buffer_t * b0)
+{
+ ping_main_t *pm = &ping_main;
+ u16 net_icmp_id = 0;
+ u32 bi0_copy = 0;
+
+ switch (event_type)
+ {
+ case PING_RESPONSE_IP4:
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ uword *p = hash_get (pm->ping_run_by_icmp_id,
+ clib_net_to_host_u16 (net_icmp_id));
+ if (!p)
+ return 0;
+
+ ping_run_t *pr = vec_elt_at_index (pm->ping_runs, p[0]);
+ vlib_main_t *vm = vlib_mains[pr->cli_thread_index];
+ if (vlib_buffer_alloc (vm, &bi0_copy, 1) == 1)
+ {
+ void *dst = vlib_buffer_get_current (vlib_get_buffer (vm,
+ bi0_copy));
+ clib_memcpy (dst, vlib_buffer_get_current (b0), b0->current_length);
+ }
+ /* If buffer_alloc failed, bi0_copy == 0 - just signaling an event. */
+ f64 nowts = vlib_time_now (vm);
+ /* Pass the timestamp to the cli_process thanks to the vnet_buffer unused metadata field */
+ clib_memcpy (vnet_buffer
+ (vlib_get_buffer
+ (vm, bi0_copy))->unused, &nowts, sizeof (nowts));
+ vlib_process_signal_event_mt (vm, pr->cli_process_id, event_type, bi0_copy);
+ return 1;
+}
+
+/*
+ * Process ICMPv6 echo replies
+ */
+static uword
+ip6_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP6, b0) ?
+ ICMP6_ECHO_REPLY_NEXT_DROP : ICMP6_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP6_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) =
+{
+ .function = ip6_icmp_echo_reply_node_fn,
+ .name = "ip6-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP6_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP6_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * Process ICMPv4 echo replies
+ */
+static uword
+ip4_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP4, b0) ?
+ ICMP4_ECHO_REPLY_NEXT_DROP : ICMP4_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP4_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) =
+{
+ .function = ip4_icmp_echo_reply_node_fn,
+ .name = "ip4-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP4_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP4_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP4_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+char *ip6_lookup_next_nodes[] = IP6_LOOKUP_NEXT_NODES;
+char *ip4_lookup_next_nodes[] = IP4_LOOKUP_NEXT_NODES;
+
+/* Fill in the ICMP ECHO structure, return the safety-checked and possibly shrunk data_len */
+static u16
+init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo,
+ u16 seq_host, u16 id_host, u16 data_len)
+{
+ int i;
+ icmp46_echo->seq = clib_host_to_net_u16 (seq_host);
+ icmp46_echo->id = clib_host_to_net_u16 (id_host);
+
+ if (data_len > PING_MAXIMUM_DATA_SIZE)
+ data_len = PING_MAXIMUM_DATA_SIZE;
+ for (i = 0; i < data_len; i++)
+ icmp46_echo->data[i] = i % 256;
+ return data_len;
+}
+
+static send_ip46_ping_result_t
+send_ip6_ping (vlib_main_t * vm, ip6_main_t * im,
+ u32 table_id, ip6_address_t * pa6,
+ u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose)
+{
+ icmp6_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ int bogus_length = 0;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+ vlib_buffer_free_list_t *fl;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (p0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip6_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index = ip6_fib_table_lookup (fib_index, pa6, 128);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use its table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip6 header fields */
+ h0->ip6.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ h0->ip6.payload_length = 0; /* Set below */
+ h0->ip6.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip6.hop_limit = 255;
+ h0->ip6.dst_address = *pa6;
+ h0->ip6.src_address = *pa6;
+
+ /* Fill in the correct source now */
+ ip6_address_t *a = ip6_interface_first_address (im, sw_if_index);
+ if (!a)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_SRC_ADDRESS;
+ }
+ h0->ip6.src_address = a[0];
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP6_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip6.payload_length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip6.payload_length) +
+ STRUCT_OFFSET_OF (icmp6_echo_request_header_t, icmp);
+
+ /* Calculate the ICMP checksum */
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip6, &bogus_length);
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip6_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+
+ ASSERT (burst <= VLIB_FRAME_SIZE);
+ f->n_vectors = burst;
+ while (--burst)
+ {
+ vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0);
+ to_next++;
+ to_next[0] = vlib_get_buffer_index (vm, c0);
+ }
+ vlib_put_frame_to_node (vm, ip6_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+static send_ip46_ping_result_t
+send_ip4_ping (vlib_main_t * vm,
+ ip4_main_t * im,
+ u32 table_id,
+ ip4_address_t * pa4,
+ u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len, u32 burst, u8 verbose)
+{
+ icmp4_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+ u32 if_add_index0;
+ vlib_buffer_free_list_t *fl;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (p0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip4_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index =
+ ip4_fib_table_lookup (ip4_fib_get (fib_index), pa4, 32);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use the user's table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip4 header fields */
+ h0->ip4.checksum = 0;
+ h0->ip4.ip_version_and_header_length = 0x45;
+ h0->ip4.tos = 0;
+ h0->ip4.length = 0; /* Set below */
+ h0->ip4.fragment_id = 0;
+ h0->ip4.flags_and_fragment_offset = 0;
+ h0->ip4.ttl = 0xff;
+ h0->ip4.protocol = IP_PROTOCOL_ICMP;
+ h0->ip4.dst_address = *pa4;
+ h0->ip4.src_address = *pa4;
+
+ /* Fill in the correct source now */
+ if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ h0->ip4.src_address = *if_ip;
+ if (verbose)
+ {
+ vlib_cli_output (vm, "Source address: %U",
+ format_ip4_address, &h0->ip4.src_address);
+ }
+ }
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP4_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip4.length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t) +
+ sizeof (ip4_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip4.length);
+
+ /* Calculate the IP and ICMP checksums */
+ h0->ip4.checksum = ip4_header_checksum (&(h0->ip4));
+ h0->icmp.checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, &(h0->icmp),
+ p0->current_length -
+ sizeof (ip4_header_t)));
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+
+ ASSERT (burst <= VLIB_FRAME_SIZE);
+ f->n_vectors = burst;
+ while (--burst)
+ {
+ vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0);
+ to_next++;
+ to_next[0] = vlib_get_buffer_index (vm, c0);
+ }
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+
+static void
+print_ip6_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = 0;
+ clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt));
+ rtt -= h0->icmp_echo.time_sent;
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ clib_host_to_net_u16 (h0->ip6.payload_length),
+ format_ip6_address,
+ &h0->ip6.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip6.hop_limit, rtt * 1000.0);
+}
+
+static void
+print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = 0;
+ clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt));
+ rtt -= h0->icmp_echo.time_sent;
+ u32 rcvd_icmp_len =
+ clib_host_to_net_u16 (h0->ip4.length) -
+ (4 * (0xF & h0->ip4.ip_version_and_header_length));
+
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ rcvd_icmp_len,
+ format_ip4_address,
+ &h0->ip4.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip4.ttl, rtt * 1000.0);
+}
+
+
+/*
+ * Perform the ping run with the given parameters in the current CLI process.
+ * Depending on whether pa4 or pa6 is set, runs IPv4 or IPv6 ping.
+ * The amusing side effect is of course if both are set, then both pings are sent.
+ * This behavior can be used to ping a dualstack host over IPv4 and IPv6 at once.
+ */
+
+static void
+run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
+ ip6_address_t * pa6, u32 sw_if_index,
+ f64 ping_interval, u32 ping_repeat, u32 data_len,
+ u32 ping_burst, u32 verbose)
+{
+ int i;
+ ping_main_t *pm = &ping_main;
+ uword curr_proc = vlib_current_process (vm);
+ u32 n_replies = 0;
+ u32 n_requests = 0;
+ ping_run_t *pr = 0;
+ u32 ping_run_index = 0;
+ u16 icmp_id;
+
+ static u32 rand_seed = 0;
+
+ if (PREDICT_FALSE (!rand_seed))
+ rand_seed = random_default_seed ();
+
+ icmp_id = random_u32 (&rand_seed) & 0xffff;
+
+ while (hash_get (pm->ping_run_by_icmp_id, icmp_id))
+ {
+ vlib_cli_output (vm, "ICMP ID collision at %d, incrementing", icmp_id);
+ icmp_id++;
+ }
+ pool_get (pm->ping_runs, pr);
+ ping_run_index = pr - pm->ping_runs;
+ pr->cli_process_id = curr_proc;
+ pr->cli_thread_index = vlib_get_thread_index ();
+ pr->icmp_id = icmp_id;
+ hash_set (pm->ping_run_by_icmp_id, icmp_id, ping_run_index);
+ for (i = 1; i <= ping_repeat; i++)
+ {
+ f64 sleep_interval;
+ f64 time_ping_sent = vlib_time_now (vm);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ pr->curr_seq = i;
+ if (pa6 &&
+ (SEND_PING_OK ==
+ send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6, sw_if_index,
+ i, icmp_id, data_len, ping_burst, verbose)))
+ {
+ n_requests += ping_burst;
+ }
+ if (pa4 &&
+ (SEND_PING_OK ==
+ send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4, sw_if_index,
+ i, icmp_id, data_len, ping_burst, verbose)))
+ {
+ n_requests += ping_burst;
+ }
+ while ((i <= ping_repeat)
+ &&
+ ((sleep_interval =
+ time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0))
+ {
+ uword event_type, *event_data = 0;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[i];
+ print_ip6_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ case PING_RESPONSE_IP4:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[i];
+ print_ip4_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ default:
+ /* someone pressed a key, abort */
+ vlib_cli_output (vm, "Aborted due to a keypress.");
+ i = 1 + ping_repeat;
+ break;
+ }
+ vec_free (event_data);
+ }
+ }
+ vlib_cli_output (vm, "\n");
+ {
+ float loss =
+ (0 ==
+ n_requests) ? 0 : 100.0 * ((float) n_requests -
+ (float) n_replies) / (float) n_requests;
+ vlib_cli_output (vm,
+ "Statistics: %u sent, %u received, %f%% packet loss\n",
+ n_requests, n_replies, loss);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ hash_unset (pm->ping_run_by_icmp_id, icmp_id);
+ pool_put (pm->ping_runs, pr);
+ }
+}
+
+
+
+
+
+static clib_error_t *
+ping_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 ping_repeat = 5;
+ u32 ping_burst = 1;
+ u8 ping_ip4, ping_ip6;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 data_len = PING_DEFAULT_DATA_LEN;
+ u32 verbose = 0;
+ f64 ping_interval = PING_DEFAULT_INTERVAL;
+ u32 sw_if_index, table_id;
+
+ table_id = 0;
+ ping_ip4 = ping_ip6 = 0;
+ sw_if_index = ~0;
+
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else if (unformat (input, "ipv4"))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv4 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else if (unformat (input, "ipv6"))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv6 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IP4/IP6 address `%U'. Usage: ping <addr> [source <intf>] [size <datasz>] [repeat <count>] [verbose]",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* allow for the second AF in the same ping */
+ if (!ping_ip4 && (unformat (input, "ipv4")))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ }
+ else if (!ping_ip6 && (unformat (input, "ipv6")))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ }
+
+ /* parse the rest of the parameters in a cycle */
+ while (!unformat_eof (input, NULL))
+ {
+ if (unformat (input, "source"))
+ {
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error =
+ clib_error_return (0,
+ "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "size"))
+ {
+ if (!unformat (input, "%u", &data_len))
+ {
+ error =
+ clib_error_return (0,
+ "expecting size but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (data_len > PING_MAXIMUM_DATA_SIZE)
+ {
+ error =
+ clib_error_return (0,
+ "%d is bigger than maximum allowed payload size %d",
+ data_len, PING_MAXIMUM_DATA_SIZE);
+ goto done;
+ }
+ }
+ else if (unformat (input, "table-id"))
+ {
+ if (!unformat (input, "%u", &table_id))
+ {
+ error =
+ clib_error_return (0,
+ "expecting table-id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "interval"))
+ {
+ if (!unformat (input, "%f", &ping_interval))
+ {
+ error =
+ clib_error_return (0,
+ "expecting interval (floating point number) got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "repeat"))
+ {
+ if (!unformat (input, "%u", &ping_repeat))
+ {
+ error =
+ clib_error_return (0,
+ "expecting repeat count but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "burst"))
+ {
+ if (!unformat (input, "%u", &ping_burst))
+ {
+ error =
+ clib_error_return (0,
+ "expecting burst count but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "verbose"))
+ {
+ verbose = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (ping_burst < 1 || ping_burst > VLIB_FRAME_SIZE)
+ return clib_error_return (0, "burst size must be between 1 and %u",
+ VLIB_FRAME_SIZE);
+
+ run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL,
+ ping_ip6 ? &a6 : NULL, sw_if_index, ping_interval,
+ ping_repeat, data_len, ping_burst, verbose);
+done:
+ return error;
+}
+
+/*?
+ * This command sends an ICMP ECHO_REQUEST to network hosts. The address
+ * can be an IPv4 or IPv6 address (or both at the same time).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how ping an IPv4 address:
+ * @cliexstart{ping 172.16.1.2 source GigabitEthernet2/0/0 repeat 2}
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1090 ms
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0914 ms
+ *
+ * Statistics: 2 sent, 2 received, 0% packet loss
+ * @cliexend
+ *
+ * Example of how ping both an IPv4 address and IPv6 address at the same time:
+ * @cliexstart{ping 172.16.1.2 ipv6 fe80::24a5:f6ff:fe9c:3a36 source GigabitEthernet2/0/0 repeat 2 verbose}
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1899 ms
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0910 ms
+ *
+ * Statistics: 4 sent, 2 received, 50% packet loss
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ping_command, static) =
+{
+ .path = "ping",
+ .function = ping_ip_address,
+ .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>}"
+ " [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>]"
+ " [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>]"
+ " [verbose]",
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ping_cli_init (vlib_main_t * vm)
+{
+ ping_main_t *pm = &ping_main;
+ pm->ip6_main = &ip6_main;
+ pm->ip4_main = &ip4_main;
+ icmp6_register_type (vm, ICMP6_echo_reply, ip6_icmp_echo_reply_node.index);
+ ip4_icmp_register_type (vm, ICMP4_echo_reply,
+ ip4_icmp_echo_reply_node.index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ping_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.h b/src/vnet/ip/ping.h
new file mode 100644
index 00000000..b1b71f68
--- /dev/null
+++ b/src/vnet/ip/ping.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_ping_h
+#define included_vnet_ping_h
+
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/ip/lookup.h>
+
+typedef enum
+{
+ PING_RESPONSE_IP6 = 42,
+ PING_RESPONSE_IP4,
+} ping_response_type_t;
+
+typedef enum
+{
+ SEND_PING_OK = 0,
+ SEND_PING_ALLOC_FAIL,
+ SEND_PING_NO_INTERFACE,
+ SEND_PING_NO_TABLE,
+ SEND_PING_NO_SRC_ADDRESS,
+} send_ip46_ping_result_t;
+
+/*
+ * Currently running ping command.
+ */
+typedef struct ping_run_t
+{
+ u16 icmp_id;
+ u16 curr_seq;
+ uword cli_process_id;
+ uword cli_thread_index;
+} ping_run_t;
+
+typedef struct ping_main_t
+{
+ ip6_main_t *ip6_main;
+ ip4_main_t *ip4_main;
+ ping_run_t *ping_runs;
+ /* hash table to find back the CLI process for a reply */
+ // uword *cli_proc_by_icmp_id;
+ ping_run_t *ping_run_by_icmp_id;
+} ping_main_t;
+
+ping_main_t ping_main;
+
+#define PING_DEFAULT_DATA_LEN 60
+#define PING_DEFAULT_INTERVAL 1.0
+
+#define PING_MAXIMUM_DATA_SIZE (VLIB_BUFFER_DATA_SIZE - sizeof(ip6_header_t) - sizeof(icmp46_header_t) - offsetof(icmp46_echo_request_t, data))
+
+/* *INDENT-OFF* */
+
+typedef CLIB_PACKED (struct {
+ u16 id;
+ u16 seq;
+ f64 time_sent;
+ u8 data[0];
+}) icmp46_echo_request_t;
+
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ icmp46_header_t icmp;
+ icmp46_echo_request_t icmp_echo;
+}) icmp6_echo_request_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ icmp46_header_t icmp;
+ icmp46_echo_request_t icmp_echo;
+}) icmp4_echo_request_header_t;
+
+/* *INDENT-ON* */
+
+
+typedef struct
+{
+ u16 id;
+ u16 seq;
+ u8 bound;
+} icmp_echo_trace_t;
+
+
+
+
+typedef enum
+{
+ ICMP6_ECHO_REPLY_NEXT_DROP,
+ ICMP6_ECHO_REPLY_NEXT_PUNT,
+ ICMP6_ECHO_REPLY_N_NEXT,
+} icmp6_echo_reply_next_t;
+
+typedef enum
+{
+ ICMP4_ECHO_REPLY_NEXT_DROP,
+ ICMP4_ECHO_REPLY_NEXT_PUNT,
+ ICMP4_ECHO_REPLY_N_NEXT,
+} icmp4_echo_reply_next_t;
+
+#endif /* included_vnet_ping_h */
diff --git a/src/vnet/ip/ports.def b/src/vnet/ip/ports.def
new file mode 100644
index 00000000..cdb754f5
--- /dev/null
+++ b/src/vnet/ip/ports.def
@@ -0,0 +1,757 @@
+/*
+ * ip/ports.def: tcp/udp port definitions
+ *
+ * Eliot Dresselhaus
+ * August, 2005
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+PORT NUMBERS
+
+(last updated 18 October 2005)
+
+The port numbers are divided into three ranges: the Well Known Ports,
+the Registered Ports, and the Dynamic and/or Private Ports.
+
+The Well Known Ports are those from 0 through 1023.
+
+The Registered Ports are those from 1024 through 49151
+
+The Dynamic and/or Private Ports are those from 49152 through 65535
+
+
+************************************************************************
+* PLEASE NOTE THE FOLLOWING: *
+* *
+* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN *
+* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. *
+* *
+* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN *
+* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK *
+* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT *
+* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD *
+* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF *
+* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER *
+* REGISTERED OR NOT. *
+************************************************************************
+
+
+WELL KNOWN PORT NUMBERS
+
+The Well Known Ports are assigned by the IANA and on most systems can
+only be used by system (or root) processes or by programs executed by
+privileged users.
+
+Ports are used in the TCP [RFC793] to name the ends of logical
+connections which carry long term conversations. For the purpose of
+providing services to unknown callers, a service contact port is
+defined. This list specifies the port used by the server process as
+its contact port. The contact port is sometimes called the
+"well-known port".
+
+To the extent possible, these same port assignments are used with the
+UDP [RFC768].
+
+The range for assigned ports managed by the IANA is 0-1023.
+*/
+ip_port (TCPMUX, 1)
+ip_port (COMPRESS_NET_MANAGEMENT, 2)
+ip_port (COMPRESS_NET, 3)
+ip_port (RJE, 5)
+ip_port (ECHO, 7)
+ip_port (DISCARD, 9)
+ip_port (SYSTAT, 11)
+ip_port (DAYTIME, 13)
+ip_port (QOTD, 17)
+ip_port (MSP, 18)
+ip_port (CHARGEN, 19)
+ip_port (FTP_DATA, 20)
+ip_port (FTP, 21)
+ip_port (SSH, 22)
+ip_port (TELNET, 23)
+ip_port (SMTP, 25)
+ip_port (NSW_FE, 27)
+ip_port (MSG_ICP, 29)
+ip_port (MSG_AUTH, 31)
+ip_port (DSP, 33)
+ip_port (TIME, 37)
+ip_port (RAP, 38)
+ip_port (RLP, 39)
+ip_port (GRAPHICS, 41)
+ip_port (NAME, 42)
+ip_port (NAMESERVER, 42)
+ip_port (NICNAME, 43)
+ip_port (MPM_FLAGS, 44)
+ip_port (MPM, 45)
+ip_port (MPM_SND, 46)
+ip_port (NI_FTP, 47)
+ip_port (AUDITD, 48)
+ip_port (TACACS, 49)
+ip_port (RE_MAIL_CK, 50)
+ip_port (LA_MAINT, 51)
+ip_port (XNS_TIME, 52)
+ip_port (DNS, 53)
+ip_port (XNS_CH, 54)
+ip_port (ISI_GL, 55)
+ip_port (XNS_AUTH, 56)
+ip_port (XNS_MAIL, 58)
+ip_port (NI_MAIL, 61)
+ip_port (ACAS, 62)
+ip_port (WHOIS_PLUS_PLUS, 63)
+ip_port (COVIA, 64)
+ip_port (TACACS_DS, 65)
+ip_port (ORACLE_SQL_NET, 66)
+ip_port (BOOTPS, 67)
+ip_port (BOOTPC, 68)
+ip_port (TFTP, 69)
+ip_port (GOPHER, 70)
+ip_port (NETRJS_1, 71)
+ip_port (NETRJS_2, 72)
+ip_port (NETRJS_3, 73)
+ip_port (NETRJS_4, 74)
+ip_port (DEOS, 76)
+ip_port (VETTCP, 78)
+ip_port (FINGER, 79)
+ip_port (WWW, 80)
+ip_port (HOSTS2_NS, 81)
+ip_port (XFER, 82)
+ip_port (MIT_ML_DEV, 83)
+ip_port (CTF, 84)
+ip_port (MIT_ML_DEV1, 85)
+ip_port (MFCOBOL, 86)
+ip_port (KERBEROS, 88)
+ip_port (SU_MIT_TG, 89)
+ip_port (DNSIX, 90)
+ip_port (MIT_DOV, 91)
+ip_port (NPP, 92)
+ip_port (DCP, 93)
+ip_port (OBJCALL, 94)
+ip_port (SUPDUP, 95)
+ip_port (DIXIE, 96)
+ip_port (SWIFT_RVF, 97)
+ip_port (TACNEWS, 98)
+ip_port (METAGRAM, 99)
+ip_port (NEWACCT, 100)
+ip_port (HOSTNAME, 101)
+ip_port (ISO_TSAP, 102)
+ip_port (GPPITNP, 103)
+ip_port (ACR_NEMA, 104)
+ip_port (CSO, 105)
+ip_port (CSNET_NS, 105)
+ip_port (3COM_TSMUX, 106)
+ip_port (RTELNET, 107)
+ip_port (SNAGAS, 108)
+ip_port (POP2, 109)
+ip_port (POP3, 110)
+ip_port (SUNRPC, 111)
+ip_port (MCIDAS, 112)
+ip_port (IDENT, 113)
+ip_port (SFTP, 115)
+ip_port (ANSANOTIFY, 116)
+ip_port (UUCP_PATH, 117)
+ip_port (SQLSERV, 118)
+ip_port (NNTP, 119)
+ip_port (CFDPTKT, 120)
+ip_port (ERPC, 121)
+ip_port (SMAKYNET, 122)
+ip_port (NTP, 123)
+ip_port (ANSATRADER, 124)
+ip_port (LOCUS_MAP, 125)
+ip_port (NXEDIT, 126)
+ip_port (LOCUS_CON, 127)
+ip_port (GSS_XLICEN, 128)
+ip_port (PWDGEN, 129)
+ip_port (CISCO_FNA, 130)
+ip_port (CISCO_TNA, 131)
+ip_port (CISCO_SYS, 132)
+ip_port (STATSRV, 133)
+ip_port (INGRES_NET, 134)
+ip_port (EPMAP, 135)
+ip_port (PROFILE, 136)
+ip_port (NETBIOS_NS, 137)
+ip_port (NETBIOS_DGM, 138)
+ip_port (NETBIOS_SSN, 139)
+ip_port (EMFIS_DATA, 140)
+ip_port (EMFIS_CNTL, 141)
+ip_port (BL_IDM, 142)
+ip_port (IMAP, 143)
+ip_port (UMA, 144)
+ip_port (UAAC, 145)
+ip_port (ISO_TP0, 146)
+ip_port (ISO_IP, 147)
+ip_port (JARGON, 148)
+ip_port (AED_512, 149)
+ip_port (SQL_NET, 150)
+ip_port (HEMS, 151)
+ip_port (BFTP, 152)
+ip_port (SGMP, 153)
+ip_port (NETSC_PROD, 154)
+ip_port (NETSC_DEV, 155)
+ip_port (SQLSRV, 156)
+ip_port (KNET_CMP, 157)
+ip_port (PCMAIL_SRV, 158)
+ip_port (NSS_ROUTING, 159)
+ip_port (SGMP_TRAPS, 160)
+ip_port (SNMP, 161)
+ip_port (SNMPTRAP, 162)
+ip_port (CMIP_MAN, 163)
+ip_port (CMIP_AGENT, 164)
+ip_port (XNS_COURIER, 165)
+ip_port (S_NET, 166)
+ip_port (NAMP, 167)
+ip_port (RSVD, 168)
+ip_port (SEND, 169)
+ip_port (PRINT_SRV, 170)
+ip_port (MULTIPLEX, 171)
+ip_port (CL1, 172)
+ip_port (XYPLEX_MUX, 173)
+ip_port (MAILQ, 174)
+ip_port (VMNET, 175)
+ip_port (GENRAD_MUX, 176)
+ip_port (XDMCP, 177)
+ip_port (NEXTSTEP, 178)
+ip_port (BGP, 179)
+ip_port (RIS, 180)
+ip_port (UNIFY, 181)
+ip_port (AUDIT, 182)
+ip_port (OCBINDER, 183)
+ip_port (OCSERVER, 184)
+ip_port (REMOTE_KIS, 185)
+ip_port (KIS, 186)
+ip_port (ACI, 187)
+ip_port (MUMPS, 188)
+ip_port (QFT, 189)
+ip_port (GACP, 190)
+ip_port (PROSPERO, 191)
+ip_port (OSU_NMS, 192)
+ip_port (SRMP, 193)
+ip_port (IRC, 194)
+ip_port (DN6_NLM_AUD, 195)
+ip_port (DN6_SMM_RED, 196)
+ip_port (DLS, 197)
+ip_port (DLS_MON, 198)
+ip_port (SMUX, 199)
+ip_port (SRC, 200)
+ip_port (AT_RTMP, 201)
+ip_port (AT_NBP, 202)
+ip_port (AT_3, 203)
+ip_port (AT_ECHO, 204)
+ip_port (AT_5, 205)
+ip_port (AT_ZIS, 206)
+ip_port (AT_7, 207)
+ip_port (AT_8, 208)
+ip_port (QMTP, 209)
+ip_port (Z39_50, 210)
+ip_port (TI914CG, 211)
+ip_port (ANET, 212)
+ip_port (IPX, 213)
+ip_port (VMPWSCS, 214)
+ip_port (SOFTPC, 215)
+ip_port (CAILIC, 216)
+ip_port (DBASE, 217)
+ip_port (MPP, 218)
+ip_port (UARPS, 219)
+ip_port (IMAP3, 220)
+ip_port (FLN_SPX, 221)
+ip_port (RSH_SPX, 222)
+ip_port (CDC, 223)
+ip_port (MASQDIALER, 224)
+ip_port (DIRECT, 242)
+ip_port (SUR_MEAS, 243)
+ip_port (INBUSINESS, 244)
+ip_port (LINK, 245)
+ip_port (DSP3270, 246)
+ip_port (SUBNTBCST_TFTP, 247)
+ip_port (BHFHS, 248)
+ip_port (RAP1, 256)
+ip_port (SET, 257)
+ip_port (YAK_CHAT, 258)
+ip_port (ESRO_GEN, 259)
+ip_port (OPENPORT, 260)
+ip_port (NSIIOPS, 261)
+ip_port (ARCISDMS, 262)
+ip_port (HDAP, 263)
+ip_port (BGMP, 264)
+ip_port (X_BONE_CTL, 265)
+ip_port (SST, 266)
+ip_port (TD_SERVICE, 267)
+ip_port (TD_REPLICA, 268)
+ip_port (HTTP_MGMT, 280)
+ip_port (PERSONAL_LINK, 281)
+ip_port (CABLEPORT_AX, 282)
+ip_port (RESCAP, 283)
+ip_port (CORERJD, 284)
+ip_port (FXP, 286)
+ip_port (K_BLOCK, 287)
+ip_port (NOVASTORBAKCUP, 308)
+ip_port (ENTRUSTTIME, 309)
+ip_port (BHMDS, 310)
+ip_port (ASIP_WEBADMIN, 311)
+ip_port (VSLMP, 312)
+ip_port (MAGENTA_LOGIC, 313)
+ip_port (OPALIS_ROBOT, 314)
+ip_port (DPSI, 315)
+ip_port (DECAUTH, 316)
+ip_port (ZANNET, 317)
+ip_port (PKIX_TIMESTAMP, 318)
+ip_port (PTP_EVENT, 319)
+ip_port (PTP_GENERAL, 320)
+ip_port (PIP, 321)
+ip_port (RTSPS, 322)
+ip_port (TEXAR, 333)
+ip_port (PDAP, 344)
+ip_port (PAWSERV, 345)
+ip_port (ZSERV, 346)
+ip_port (FATSERV, 347)
+ip_port (CSI_SGWP, 348)
+ip_port (MFTP, 349)
+ip_port (MATIP_TYPE_A, 350)
+ip_port (MATIP_TYPE_B, 351)
+ip_port (BHOETTY, 351)
+ip_port (DTAG_STE_SB, 352)
+ip_port (BHOEDAP4, 352)
+ip_port (NDSAUTH, 353)
+ip_port (BH611, 354)
+ip_port (DATEX_ASN, 355)
+ip_port (CLOANTO_NET_1, 356)
+ip_port (BHEVENT, 357)
+ip_port (SHRINKWRAP, 358)
+ip_port (NSRMP, 359)
+ip_port (SCOI2ODIALOG, 360)
+ip_port (SEMANTIX, 361)
+ip_port (SRSSEND, 362)
+ip_port (RSVP_TUNNEL, 363)
+ip_port (AURORA_CMGR, 364)
+ip_port (DTK, 365)
+ip_port (ODMR, 366)
+ip_port (MORTGAGEWARE, 367)
+ip_port (QBIKGDP, 368)
+ip_port (RPC2PORTMAP, 369)
+ip_port (CODAAUTH2, 370)
+ip_port (CLEARCASE, 371)
+ip_port (ULISTPROC, 372)
+ip_port (LEGENT_1, 373)
+ip_port (LEGENT_2, 374)
+ip_port (HASSLE, 375)
+ip_port (NIP, 376)
+ip_port (TNETOS, 377)
+ip_port (DSETOS, 378)
+ip_port (IS99C, 379)
+ip_port (IS99S, 380)
+ip_port (HP_COLLECTOR, 381)
+ip_port (HP_MANAGED_NODE, 382)
+ip_port (HP_ALARM_MGR, 383)
+ip_port (ARNS, 384)
+ip_port (IBM_APP, 385)
+ip_port (ASA, 386)
+ip_port (AURP, 387)
+ip_port (UNIDATA_LDM, 388)
+ip_port (LDAP, 389)
+ip_port (UIS, 390)
+ip_port (SYNOTICS_RELAY, 391)
+ip_port (SYNOTICS_BROKER, 392)
+ip_port (META5, 393)
+ip_port (EMBL_NDT, 394)
+ip_port (NETCP, 395)
+ip_port (NETWARE_IP, 396)
+ip_port (MPTN, 397)
+ip_port (KRYPTOLAN, 398)
+ip_port (ISO_TSAP_C2, 399)
+ip_port (WORK_SOL, 400)
+ip_port (UPS, 401)
+ip_port (GENIE, 402)
+ip_port (DECAP, 403)
+ip_port (NCED, 404)
+ip_port (NCLD, 405)
+ip_port (IMSP, 406)
+ip_port (TIMBUKTU, 407)
+ip_port (PRM_SM, 408)
+ip_port (PRM_NM, 409)
+ip_port (DECLADEBUG, 410)
+ip_port (RMT, 411)
+ip_port (SYNOPTICS_TRAP, 412)
+ip_port (SMSP, 413)
+ip_port (INFOSEEK, 414)
+ip_port (BNET, 415)
+ip_port (SILVERPLATTER, 416)
+ip_port (ONMUX, 417)
+ip_port (HYPER_G, 418)
+ip_port (ARIEL1, 419)
+ip_port (SMPTE, 420)
+ip_port (ARIEL2, 421)
+ip_port (ARIEL3, 422)
+ip_port (OPC_JOB_START, 423)
+ip_port (OPC_JOB_TRACK, 424)
+ip_port (ICAD_EL, 425)
+ip_port (SMARTSDP, 426)
+ip_port (SVRLOC, 427)
+ip_port (OCS_CMU, 428)
+ip_port (OCS_AMU, 429)
+ip_port (UTMPSD, 430)
+ip_port (UTMPCD, 431)
+ip_port (IASD, 432)
+ip_port (NNSP, 433)
+ip_port (MOBILEIP_AGENT, 434)
+ip_port (MOBILIP_MN, 435)
+ip_port (DNA_CML, 436)
+ip_port (COMSCM, 437)
+ip_port (DSFGW, 438)
+ip_port (DASP, 439)
+ip_port (SGCP, 440)
+ip_port (DECVMS_SYSMGT, 441)
+ip_port (CVC_HOSTD, 442)
+ip_port (HTTPS, 443)
+ip_port (SNPP, 444)
+ip_port (MICROSOFT_DS, 445)
+ip_port (DDM_RDB, 446)
+ip_port (DDM_DFM, 447)
+ip_port (DDM_SSL, 448)
+ip_port (AS_SERVERMAP, 449)
+ip_port (TSERVER, 450)
+ip_port (SFS_SMP_NET, 451)
+ip_port (SFS_CONFIG, 452)
+ip_port (CREATIVESERVER, 453)
+ip_port (CONTENTSERVER, 454)
+ip_port (CREATIVEPARTNR, 455)
+ip_port (MACON_TCP, 456)
+ip_port (SCOHELP, 457)
+ip_port (APPLEQTC, 458)
+ip_port (AMPR_RCMD, 459)
+ip_port (SKRONK, 460)
+ip_port (DATASURFSRV, 461)
+ip_port (DATASURFSRVSEC, 462)
+ip_port (ALPES, 463)
+ip_port (KPASSWD, 464)
+ip_port (URD, 465)
+ip_port (DIGITAL_VRC, 466)
+ip_port (MYLEX_MAPD, 467)
+ip_port (PHOTURIS, 468)
+ip_port (RCP, 469)
+ip_port (SCX_PROXY, 470)
+ip_port (MONDEX, 471)
+ip_port (LJK_LOGIN, 472)
+ip_port (HYBRID_POP, 473)
+ip_port (TN_TL_W1, 474)
+ip_port (TCPNETHASPSRV, 475)
+ip_port (TN_TL_FD1, 476)
+ip_port (SS7NS, 477)
+ip_port (SPSC, 478)
+ip_port (IAFSERVER, 479)
+ip_port (IAFDBASE, 480)
+ip_port (PH, 481)
+ip_port (BGS_NSI, 482)
+ip_port (ULPNET, 483)
+ip_port (INTEGRA_SME, 484)
+ip_port (POWERBURST, 485)
+ip_port (AVIAN, 486)
+ip_port (SAFT, 487)
+ip_port (GSS_HTTP, 488)
+ip_port (NEST_PROTOCOL, 489)
+ip_port (MICOM_PFS, 490)
+ip_port (GO_LOGIN, 491)
+ip_port (TICF_1, 492)
+ip_port (TICF_2, 493)
+ip_port (POV_RAY, 494)
+ip_port (INTECOURIER, 495)
+ip_port (PIM_RP_DISC, 496)
+ip_port (DANTZ, 497)
+ip_port (SIAM, 498)
+ip_port (ISO_ILL, 499)
+ip_port (ISAKMP, 500)
+ip_port (STMF, 501)
+ip_port (ASA_APPL_PROTO, 502)
+ip_port (INTRINSA, 503)
+ip_port (CITADEL, 504)
+ip_port (MAILBOX_LM, 505)
+ip_port (OHIMSRV, 506)
+ip_port (CRS, 507)
+ip_port (XVTTP, 508)
+ip_port (SNARE, 509)
+ip_port (FCP, 510)
+ip_port (PASSGO, 511)
+ip_port (EXEC, 512)
+ip_port (LOGIN, 513)
+ip_port (SHELL, 514)
+ip_port (PRINTER, 515)
+ip_port (VIDEOTEX, 516)
+ip_port (TALK, 517)
+ip_port (NTALK, 518)
+ip_port (UTIME, 519)
+ip_port (EFS, 520)
+ip_port (RIPNG, 521)
+ip_port (ULP, 522)
+ip_port (IBM_DB2, 523)
+ip_port (NCP, 524)
+ip_port (TIMED, 525)
+ip_port (TEMPO, 526)
+ip_port (STX, 527)
+ip_port (CUSTIX, 528)
+ip_port (IRC_SERV, 529)
+ip_port (COURIER, 530)
+ip_port (CONFERENCE, 531)
+ip_port (NETNEWS, 532)
+ip_port (NETWALL, 533)
+ip_port (MM_ADMIN, 534)
+ip_port (IIOP, 535)
+ip_port (OPALIS_RDV, 536)
+ip_port (NMSP, 537)
+ip_port (GDOMAP, 538)
+ip_port (APERTUS_LDP, 539)
+ip_port (UUCP, 540)
+ip_port (UUCP_RLOGIN, 541)
+ip_port (COMMERCE, 542)
+ip_port (KLOGIN, 543)
+ip_port (KSHELL, 544)
+ip_port (APPLEQTCSRVR, 545)
+ip_port (DHCPV6_CLIENT, 546)
+ip_port (DHCPV6_SERVER, 547)
+ip_port (AFPOVERTCP, 548)
+ip_port (IDFP, 549)
+ip_port (NEW_RWHO, 550)
+ip_port (CYBERCASH, 551)
+ip_port (DEVSHR_NTS, 552)
+ip_port (PIRP, 553)
+ip_port (RTSP, 554)
+ip_port (DSF, 555)
+ip_port (REMOTEFS, 556)
+ip_port (OPENVMS_SYSIPC, 557)
+ip_port (SDNSKMP, 558)
+ip_port (TEEDTAP, 559)
+ip_port (RMONITOR, 560)
+ip_port (MONITOR, 561)
+ip_port (CHSHELL, 562)
+ip_port (NNTPS, 563)
+ip_port (9PFS, 564)
+ip_port (WHOAMI, 565)
+ip_port (STREETTALK, 566)
+ip_port (BANYAN_RPC, 567)
+ip_port (MS_SHUTTLE, 568)
+ip_port (MS_ROME, 569)
+ip_port (METER, 570)
+ip_port (METER1, 571)
+ip_port (SONAR, 572)
+ip_port (BANYAN_VIP, 573)
+ip_port (FTP_AGENT, 574)
+ip_port (VEMMI, 575)
+ip_port (IPCD, 576)
+ip_port (VNAS, 577)
+ip_port (IPDD, 578)
+ip_port (DECBSRV, 579)
+ip_port (SNTP_HEARTBEAT, 580)
+ip_port (BDP, 581)
+ip_port (SCC_SECURITY, 582)
+ip_port (PHILIPS_VC, 583)
+ip_port (KEYSERVER, 584)
+ip_port (IMAP4_SSL, 585)
+ip_port (PASSWORD_CHG, 586)
+ip_port (SUBMISSION, 587)
+ip_port (CAL, 588)
+ip_port (EYELINK, 589)
+ip_port (TNS_CML, 590)
+ip_port (HTTP_ALT, 591)
+ip_port (EUDORA_SET, 592)
+ip_port (HTTP_RPC_EPMAP, 593)
+ip_port (TPIP, 594)
+ip_port (CAB_PROTOCOL, 595)
+ip_port (SMSD, 596)
+ip_port (PTCNAMESERVICE, 597)
+ip_port (SCO_WEBSRVRMG3, 598)
+ip_port (ACP, 599)
+ip_port (IPCSERVER, 600)
+ip_port (SYSLOG_CONN, 601)
+ip_port (XMLRPC_BEEP, 602)
+ip_port (IDXP, 603)
+ip_port (TUNNEL, 604)
+ip_port (SOAP_BEEP, 605)
+ip_port (URM, 606)
+ip_port (NQS, 607)
+ip_port (SIFT_UFT, 608)
+ip_port (NPMP_TRAP, 609)
+ip_port (NPMP_LOCAL, 610)
+ip_port (NPMP_GUI, 611)
+ip_port (HMMP_IND, 612)
+ip_port (HMMP_OP, 613)
+ip_port (SSHELL, 614)
+ip_port (SCO_INETMGR, 615)
+ip_port (SCO_SYSMGR, 616)
+ip_port (SCO_DTMGR, 617)
+ip_port (DEI_ICDA, 618)
+ip_port (COMPAQ_EVM, 619)
+ip_port (SCO_WEBSRVRMGR, 620)
+ip_port (ESCP_IP, 621)
+ip_port (COLLABORATOR, 622)
+ip_port (ASF_RMCP, 623)
+ip_port (CRYPTOADMIN, 624)
+ip_port (DEC_DLM, 625)
+ip_port (ASIA, 626)
+ip_port (PASSGO_TIVOLI, 627)
+ip_port (QMQP, 628)
+ip_port (3COM_AMP3, 629)
+ip_port (RDA, 630)
+ip_port (IPP, 631)
+ip_port (BMPP, 632)
+ip_port (SERVSTAT, 633)
+ip_port (GINAD, 634)
+ip_port (RLZDBASE, 635)
+ip_port (LDAPS, 636)
+ip_port (LANSERVER, 637)
+ip_port (MCNS_SEC, 638)
+ip_port (MSDP, 639)
+ip_port (ENTRUST_SPS, 640)
+ip_port (REPCMD, 641)
+ip_port (ESRO_EMSDP, 642)
+ip_port (SANITY, 643)
+ip_port (DWR, 644)
+ip_port (PSSC, 645)
+ip_port (LDP, 646)
+ip_port (DHCP_FAILOVER, 647)
+ip_port (RRP, 648)
+ip_port (CADVIEW_3D, 649)
+ip_port (OBEX, 650)
+ip_port (IEEE_MMS, 651)
+ip_port (HELLO_PORT, 652)
+ip_port (REPSCMD, 653)
+ip_port (AODV, 654)
+ip_port (TINC, 655)
+ip_port (SPMP, 656)
+ip_port (RMC, 657)
+ip_port (TENFOLD, 658)
+ip_port (MAC_SRVR_ADMIN, 660)
+ip_port (HAP, 661)
+ip_port (PFTP, 662)
+ip_port (PURENOISE, 663)
+ip_port (ASF_SECURE_RMCP, 664)
+ip_port (SUN_DR, 665)
+ip_port (MDQS, 666)
+ip_port (DOOM, 666)
+ip_port (DISCLOSE, 667)
+ip_port (MECOMM, 668)
+ip_port (MEREGISTER, 669)
+ip_port (VACDSM_SWS, 670)
+ip_port (VACDSM_APP, 671)
+ip_port (VPPS_QUA, 672)
+ip_port (CIMPLEX, 673)
+ip_port (ACAP, 674)
+ip_port (DCTP, 675)
+ip_port (VPPS_VIA, 676)
+ip_port (VPP, 677)
+ip_port (GGF_NCP, 678)
+ip_port (MRM, 679)
+ip_port (ENTRUST_AAAS, 680)
+ip_port (ENTRUST_AAMS, 681)
+ip_port (XFR, 682)
+ip_port (CORBA_IIOP, 683)
+ip_port (CORBA_IIOP_SSL, 684)
+ip_port (MDC_PORTMAPPER, 685)
+ip_port (HCP_WISMAR, 686)
+ip_port (ASIPREGISTRY, 687)
+ip_port (REALM_RUSD, 688)
+ip_port (NMAP, 689)
+ip_port (VATP, 690)
+ip_port (MSEXCH_ROUTING, 691)
+ip_port (HYPERWAVE_ISP, 692)
+ip_port (CONNENDP, 693)
+ip_port (HA_CLUSTER, 694)
+ip_port (IEEE_MMS_SSL, 695)
+ip_port (RUSHD, 696)
+ip_port (UUIDGEN, 697)
+ip_port (OLSR, 698)
+ip_port (ACCESSNETWORK, 699)
+ip_port (EPP, 700)
+ip_port (LMP, 701)
+ip_port (IRIS_BEEP, 702)
+ip_port (ELCSD, 704)
+ip_port (AGENTX, 705)
+ip_port (SILC, 706)
+ip_port (BORLAND_DSJ, 707)
+ip_port (ENTRUST_KMSH, 709)
+ip_port (ENTRUST_ASH, 710)
+ip_port (CISCO_TDP, 711)
+ip_port (TBRPF, 712)
+ip_port (NETVIEWDM1, 729)
+ip_port (NETVIEWDM2, 730)
+ip_port (NETVIEWDM3, 731)
+ip_port (NETGW, 741)
+ip_port (NETRCS, 742)
+ip_port (FLEXLM, 744)
+ip_port (FUJITSU_DEV, 747)
+ip_port (RIS_CM, 748)
+ip_port (KERBEROS_ADM, 749)
+ip_port (RFILE, 750)
+ip_port (PUMP, 751)
+ip_port (QRH, 752)
+ip_port (RRH, 753)
+ip_port (TELL, 754)
+ip_port (NLOGIN, 758)
+ip_port (CON, 759)
+ip_port (NS, 760)
+ip_port (RXE, 761)
+ip_port (QUOTAD, 762)
+ip_port (CYCLESERV, 763)
+ip_port (OMSERV, 764)
+ip_port (WEBSTER, 765)
+ip_port (PHONEBOOK, 767)
+ip_port (VID, 769)
+ip_port (CADLOCK, 770)
+ip_port (RTIP, 771)
+ip_port (CYCLESERV2, 772)
+ip_port (SUBMIT, 773)
+ip_port (RPASSWD, 774)
+ip_port (ENTOMB, 775)
+ip_port (WPAGES, 776)
+ip_port (MULTILING_HTTP, 777)
+ip_port (WPGS, 780)
+ip_port (MDBS_DAEMON, 800)
+ip_port (DEVICE, 801)
+ip_port (FCP_UDP, 810)
+ip_port (ITM_MCELL_S, 828)
+ip_port (PKIX_3_CA_RA, 829)
+ip_port (DHCP_FAILOVER2, 847)
+ip_port (GDOI, 848)
+ip_port (ISCSI, 860)
+ip_port (RSYNC, 873)
+ip_port (ICLCNET_LOCATE, 886)
+ip_port (ICLCNET_SVINFO, 887)
+ip_port (ACCESSBUILDER, 888)
+ip_port (CDDBP, 888)
+ip_port (OMGINITIALREFS, 900)
+ip_port (SMPNAMERES, 901)
+ip_port (IDEAFARM_CHAT, 902)
+ip_port (IDEAFARM_CATCH, 903)
+ip_port (XACT_BACKUP, 911)
+ip_port (APEX_MESH, 912)
+ip_port (APEX_EDGE, 913)
+ip_port (FTPS_DATA, 989)
+ip_port (FTPS, 990)
+ip_port (NAS, 991)
+ip_port (TELNETS, 992)
+ip_port (IMAPS, 993)
+ip_port (IRCS, 994)
+ip_port (POP3S, 995)
+ip_port (VSINET, 996)
+ip_port (MAITRD, 997)
+ip_port (BUSBOY, 998)
+ip_port (GARCON, 999)
+ip_port (PUPROUTER, 999)
+ip_port (CADLOCK2, 1000)
+ip_port (SURF, 1010)
+
diff --git a/src/vnet/ip/protocols.def b/src/vnet/ip/protocols.def
new file mode 100644
index 00000000..77fab31d
--- /dev/null
+++ b/src/vnet/ip/protocols.def
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*-
+
+From http://www.iana.org/assignments/protocol-numbers
+
+PROTOCOL NUMBERS
+
+(last updated 18 October 2004)
+
+In the Internet Protocol version 4 (IPv4) [RFC791] there is a field,
+called "Protocol", to identify the next level protocol. This is an 8
+bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field
+is called the "Next Header" field.
+*/
+ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS)
+ip_protocol (1, ICMP)
+ip_protocol (2, IGMP)
+ip_protocol (3, GGP)
+ip_protocol (4, IP_IN_IP)
+ip_protocol (5, ST)
+ip_protocol (6, TCP)
+ip_protocol (7, CBT)
+ip_protocol (8, EGP)
+ip_protocol (9, IGP)
+ip_protocol (10, BBN_RCC_MON)
+ip_protocol (11, NVP_II)
+ip_protocol (12, PUP)
+ip_protocol (13, ARGUS)
+ip_protocol (14, EMCON)
+ip_protocol (15, XNET)
+ip_protocol (16, CHAOS)
+ip_protocol (17, UDP)
+ip_protocol (18, MUX)
+ip_protocol (19, DCN_MEAS)
+ip_protocol (20, HMP)
+ip_protocol (21, PRM)
+ip_protocol (22, XNS_IDP)
+ip_protocol (23, TRUNK_1)
+ip_protocol (24, TRUNK_2)
+ip_protocol (25, LEAF_1)
+ip_protocol (26, LEAF_2)
+ip_protocol (27, RDP)
+ip_protocol (28, IRTP)
+ip_protocol (29, ISO_TP4)
+ip_protocol (30, NETBLT)
+ip_protocol (31, MFE_NSP)
+ip_protocol (32, MERIT_INP)
+ip_protocol (33, SEP)
+ip_protocol (34, 3PC)
+ip_protocol (35, IDPR)
+ip_protocol (36, XTP)
+ip_protocol (37, DDP)
+ip_protocol (38, IDPR_CMTP)
+ip_protocol (39, TP)
+ip_protocol (40, IL)
+ip_protocol (41, IPV6)
+ip_protocol (42, SDRP)
+ip_protocol (43, IPV6_ROUTE)
+ip_protocol (44, IPV6_FRAGMENTATION)
+ip_protocol (45, IDRP)
+ip_protocol (46, RSVP)
+ip_protocol (47, GRE)
+ip_protocol (48, MHRP)
+ip_protocol (49, BNA)
+ip_protocol (50, IPSEC_ESP)
+ip_protocol (51, IPSEC_AH)
+ip_protocol (52, I_NLSP)
+ip_protocol (53, SWIPE)
+ip_protocol (54, NARP)
+ip_protocol (55, MOBILE)
+ip_protocol (56, TLSP)
+ip_protocol (57, SKIP)
+ip_protocol (58, ICMP6)
+ip_protocol (59, IP6_NONXT)
+ip_protocol (60, IP6_DESTINATION_OPTIONS)
+ip_protocol (62, CFTP)
+ip_protocol (64, SAT_EXPAK)
+ip_protocol (65, KRYPTOLAN)
+ip_protocol (66, RVD)
+ip_protocol (67, IPPC)
+ip_protocol (69, SAT_MON)
+ip_protocol (70, VISA)
+ip_protocol (71, IPCV)
+ip_protocol (72, CPNX)
+ip_protocol (73, CPHB)
+ip_protocol (74, WSN)
+ip_protocol (75, PVP)
+ip_protocol (76, BR_SAT_MON)
+ip_protocol (77, SUN_ND)
+ip_protocol (78, WB_MON)
+ip_protocol (79, WB_EXPAK)
+ip_protocol (80, ISO_IP)
+ip_protocol (81, VMTP)
+ip_protocol (82, SECURE_VMTP)
+ip_protocol (83, VINES)
+ip_protocol (84, TTP)
+ip_protocol (85, NSFNET_IGP)
+ip_protocol (86, DGP)
+ip_protocol (87, TCF)
+ip_protocol (88, EIGRP)
+ip_protocol (89, OSPF)
+ip_protocol (90, SPRITE_RPC)
+ip_protocol (91, LARP)
+ip_protocol (92, MTP)
+ip_protocol (93, AX)
+ip_protocol (94, IPIP)
+ip_protocol (95, MICP)
+ip_protocol (96, SCC_SP)
+ip_protocol (97, ETHERIP)
+ip_protocol (98, ENCAP)
+ip_protocol (100, GMTP)
+ip_protocol (101, IFMP)
+ip_protocol (102, PNNI)
+ip_protocol (103, PIM)
+ip_protocol (104, ARIS)
+ip_protocol (105, SCPS)
+ip_protocol (106, QNX)
+ip_protocol (107, A)
+ip_protocol (108, IPCOMP)
+ip_protocol (109, SNP)
+ip_protocol (110, COMPAQ_PEER)
+ip_protocol (111, IPX_IN_IP)
+ip_protocol (112, VRRP)
+ip_protocol (113, PGM)
+ip_protocol (115, L2TP)
+ip_protocol (116, DDX)
+ip_protocol (117, IATP)
+ip_protocol (118, STP)
+ip_protocol (119, SRP)
+ip_protocol (120, UTI)
+ip_protocol (121, SMP)
+ip_protocol (122, SM)
+ip_protocol (123, PTP)
+ip_protocol (124, ISIS)
+ip_protocol (125, FIRE)
+ip_protocol (126, CRTP)
+ip_protocol (127, CRUDP)
+ip_protocol (128, SSCOPMCE)
+ip_protocol (129, IPLT)
+ip_protocol (130, SPS)
+ip_protocol (131, PIPE)
+ip_protocol (132, SCTP)
+ip_protocol (133, FC)
+ip_protocol (134, RSVP_E2E_IGNORE)
+ip_protocol (135, MOBILITY)
+ip_protocol (136, UDP_LITE)
+ip_protocol (137, MPLS_IN_IP)
+ip_protocol (255, RESERVED)
+
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
new file mode 100644
index 00000000..0869954c
--- /dev/null
+++ b/src/vnet/ip/punt.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Local TCP/IP stack punt infrastructure.
+ *
+ * Provides a set of VPP nodes together with the relevant APIs and CLI
+ * commands in order to adjust and dispatch packets from the VPP data plane
+ * to the local TCP/IP stack
+ */
+
+#include <vnet/ip/ip.h>
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/udp/udp.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/ip/punt.h>
+#include <vppinfra/sparse_vec.h>
+#include <vlib/unix/unix.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/uio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define foreach_punt_next \
+ _ (PUNT, "error-punt")
+
+typedef enum
+{
+#define _(s,n) PUNT_NEXT_##s,
+ foreach_punt_next
+#undef _
+ PUNT_N_NEXT,
+} punt_next_t;
+
+enum punt_socket_rx_next_e
+{
+ PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT,
+ PUNT_SOCKET_RX_NEXT_IP4_LOOKUP,
+ PUNT_SOCKET_RX_NEXT_IP6_LOOKUP,
+ PUNT_SOCKET_RX_N_NEXT
+};
+
+vlib_node_registration_t udp4_punt_node;
+vlib_node_registration_t udp6_punt_node;
+vlib_node_registration_t udp4_punt_socket_node;
+vlib_node_registration_t udp6_punt_socket_node;
+static vlib_node_registration_t punt_socket_rx_node;
+
+punt_main_t punt_main;
+
+char *
+vnet_punt_get_server_pathname (void)
+{
+ punt_main_t *pm = &punt_main;
+ return pm->sun_path;
+}
+
+/** @brief IPv4/IPv6 UDP punt node main loop.
+
+ This is the main loop inline function for IPv4/IPv6 UDP punt
+ transition node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+ @param is_ipv4 indicates if called for IPv4 or IPv6 node
+*/
+always_inline uword
+udp46_punt_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ word advance;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ /* udp[46]_lookup hands us the data payload, not the IP header */
+ if (is_ip4)
+ advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
+ else
+ advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, PUNT_NEXT_PUNT, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_advance (b0, advance);
+ b0->error = node->errors[PUNT_ERROR_UDP_PORT];
+ }
+
+ vlib_put_next_frame (vm, node, PUNT_NEXT_PUNT, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *punt_error_strings[] = {
+#define punt_error(n,s) s,
+#include "punt_error.def"
+#undef punt_error
+};
+
+/** @brief IPv4 UDP punt node.
+ @node ip4-udp-punt
+
+ This is the IPv4 UDP punt transition node. It is registered as a next
+ node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv4 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp4_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+/** @brief IPv6 UDP punt node.
+ @node ip6-udp-punt
+
+ This is the IPv6 UDP punt transition node. It is registered as a next
+ node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv6 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp6_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_punt_node) = {
+ .function = udp4_punt,
+ .name = "ip4-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_punt_node, udp4_punt);
+
+VLIB_REGISTER_NODE (udp6_punt_node) = {
+ .function = udp6_punt,
+ .name = "ip6-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);;
+
+/* *INDENT-ON* */
+
+static struct sockaddr_un *
+punt_socket_get (bool is_ip4, u16 port)
+{
+ punt_main_t *pm = &punt_main;
+ punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 :
+ pm->clients_by_dst_port6;
+
+ u16 i = sparse_vec_index (v, port);
+ if (i == SPARSE_VEC_INVALID_INDEX)
+ return 0;
+
+ return &vec_elt (v, i).caddr;
+}
+
+static void
+punt_socket_register (bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname)
+{
+ punt_main_t *pm = &punt_main;
+ punt_client_t c, *n;
+ punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 :
+ pm->clients_by_dst_port6;
+
+ memset (&c, 0, sizeof (c));
+ memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path));
+ c.caddr.sun_family = AF_UNIX;
+ c.port = port;
+ n = sparse_vec_validate (v, port);
+ n[0] = c;
+}
+
+/* $$$$ Just leaves the mapping in place for now */
+static void
+punt_socket_unregister (bool is_ip4, u8 protocol, u16 port)
+{
+ return;
+}
+
+always_inline uword
+udp46_punt_socket_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, bool is_ip4)
+{
+ u32 *buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ struct iovec *iovecs = 0;
+ punt_main_t *pm = &punt_main;
+ int i;
+
+ u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index;
+
+ for (i = 0; i < n_packets; i++)
+ {
+ struct iovec *iov;
+ vlib_buffer_t *b;
+ uword l;
+ punt_packetdesc_t packetdesc;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (is_ip4)
+ {
+ vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
+ sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
+ sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+
+ u16 port = clib_net_to_host_u16 (udp->dst_port);
+
+ /*
+ * Find registerered client
+ * If no registered client, drop packet and count
+ */
+ struct sockaddr_un *caddr;
+ caddr = punt_socket_get (is_ip4, port);
+ if (!caddr)
+ {
+ vlib_node_increment_counter (vm, node_index,
+ PUNT_ERROR_SOCKET_TX_ERROR, 1);
+ goto error;
+ }
+
+ /* Re-set iovecs if present. */
+ if (iovecs)
+ _vec_len (iovecs) = 0;
+
+ /* Add packet descriptor */
+ packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ packetdesc.action = 0;
+ vec_add2 (iovecs, iov, 1);
+ iov->iov_base = &packetdesc;
+ iov->iov_len = sizeof (packetdesc);
+
+ /** VLIB buffer chain -> Unix iovec(s). */
+ vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vec_add2 (iovecs, iov, 1);
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = l = b->current_length;
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ do
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ vec_add2 (iovecs, iov, 1);
+
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = b->current_length;
+ l += b->current_length;
+ }
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
+
+ struct msghdr msg = {
+ .msg_name = caddr,
+ .msg_namelen = sizeof (*caddr),
+ .msg_iov = iovecs,
+ .msg_iovlen = vec_len (iovecs),
+ };
+
+ if (sendmsg (pm->socket_fd, &msg, 0) < l)
+ vlib_node_increment_counter (vm, node_index,
+ PUNT_ERROR_SOCKET_TX_ERROR, 1);
+ }
+
+error:
+ vlib_buffer_free_no_next (vm, buffers, n_packets);
+
+ return n_packets;
+}
+
+static uword
+udp4_punt_socket (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ );
+}
+
+static uword
+udp6_punt_socket (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
+ .function = udp4_punt_socket,
+ .name = "ip4-udp-punt-socket",
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+VLIB_REGISTER_NODE (udp6_punt_socket_node) = {
+ .function = udp6_punt_socket,
+ .name = "ip6-udp-punt-socket",
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ enum punt_action_e action;
+ u32 sw_if_index;
+} punt_trace_t;
+
+static u8 *
+format_punt_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ punt_trace_t *t = va_arg (*va, punt_trace_t *);
+ s = format (s, "%U Action: %d", format_vnet_sw_if_index_name,
+ vnm, t->sw_if_index, t->action);
+ return s;
+}
+
+static uword
+punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd)
+{
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 next = node->cached_next_index;
+ u32 n_left_to_next, next_index;
+ u32 *to_next;
+ u32 error = PUNT_ERROR_NONE;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ /* $$$$ Only dealing with one buffer at the time for now */
+
+ u32 bi;
+ vlib_buffer_t *b;
+ punt_packetdesc_t packetdesc;
+ ssize_t size;
+ struct iovec io[2];
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ error = PUNT_ERROR_NOBUFFER;
+ goto error;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+ io[0].iov_base = &packetdesc;
+ io[0].iov_len = sizeof (packetdesc);
+ io[1].iov_base = b->data;
+ io[1].iov_len = buffer_size;
+
+ size = readv (fd, io, 2);
+ /* We need at least the packet descriptor plus a header */
+ if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t)))
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ error = PUNT_ERROR_READV;
+ goto error;
+ }
+
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_length = size - sizeof (packetdesc);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+
+ switch (packetdesc.action)
+ {
+ case PUNT_L2:
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index;
+ next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT;
+ break;
+
+ case PUNT_IP4_ROUTED:
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP;
+ break;
+
+ case PUNT_IP6_ROUTED:
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP;
+ break;
+
+ default:
+ error = PUNT_ERROR_ACTION;
+ vlib_buffer_free (vm, &bi, 1);
+ goto error;
+ }
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ punt_trace_t *t;
+ vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->sw_if_index = packetdesc.sw_if_index;
+ t->action = packetdesc.action;
+ }
+
+ to_next[0] = bi;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next,
+ bi, next_index);
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ return 1;
+
+error:
+ vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1);
+ return 0;
+}
+
+static uword
+punt_socket_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ punt_main_t *pm = &punt_main;
+ u32 total_count = 0;
+ int i;
+
+ for (i = 0; i < vec_len (pm->ready_fds); i++)
+ {
+ total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]);
+ vec_del1 (pm->ready_fds, i);
+ }
+ return total_count;
+}
+
+VLIB_REGISTER_NODE (punt_socket_rx_node, static) =
+{
+ .function = punt_socket_rx,.name = "punt-socket-rx",.type =
+ VLIB_NODE_TYPE_INPUT,.state = VLIB_NODE_STATE_INTERRUPT,.vector_size =
+ 1,.n_errors = PUNT_N_ERROR,.error_strings =
+ punt_error_strings,.n_next_nodes = PUNT_SOCKET_RX_N_NEXT,.next_nodes =
+ {
+[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output",
+ [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",},.format_trace =
+ format_punt_trace,};
+
+static clib_error_t *
+punt_socket_read_ready (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ punt_main_t *pm = &punt_main;
+
+ /** Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index);
+ vec_add1 (pm->ready_fds, uf->file_descriptor);
+
+ return 0;
+}
+
+clib_error_t *
+vnet_punt_socket_add (vlib_main_t * vm, u32 header_version,
+ bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname)
+{
+ punt_main_t *pm = &punt_main;
+
+ if (!pm->is_configured)
+ return clib_error_return (0, "socket is not configured");
+
+ if (header_version != PUNT_PACKETDESC_VERSION)
+ return clib_error_return (0, "Invalid packet descriptor version");
+
+ /* For now we only support UDP punt */
+ if (protocol != IP_PROTOCOL_UDP)
+ return clib_error_return (0,
+ "only UDP protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+
+ if (port == (u16) ~ 0)
+ return clib_error_return (0, "UDP port number required");
+
+ /* Register client */
+ punt_socket_register (is_ip4, protocol, port, client_pathname);
+
+ u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index;
+
+ udp_register_dst_port (vm, port, node_index, is_ip4);
+
+ return 0;
+}
+
+clib_error_t *
+vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port)
+{
+ punt_main_t *pm = &punt_main;
+
+ if (!pm->is_configured)
+ return clib_error_return (0, "socket is not configured");
+
+ punt_socket_unregister (is_ip4, l4_protocol, port);
+ udp_unregister_dst_port (vm, port, is_ip4);
+
+ return 0;
+}
+
+/**
+ * @brief Request IP traffic punt to the local TCP/IP stack.
+ *
+ * @em Note
+ * - UDP and TCP are the only protocols supported in the current implementation
+ *
+ * @param vm vlib_main_t corresponding to the current thread
+ * @param ipv IP protcol version.
+ * 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4
+ * @param protocol 8-bits L4 protocol value
+ * UDP is 17
+ * TCP is 1
+ * @param port 16-bits L4 (TCP/IP) port number when applicable (UDP only)
+ *
+ * @returns 0 on success, non-zero value otherwise
+ */
+clib_error_t *
+vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
+ bool is_add)
+{
+
+ /* For now we only support UDP punt */
+ if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP)
+ return clib_error_return (0,
+ "only UDP (%d) and TCP (%d) protocols are supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, protocol);
+
+ if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6)
+ return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv);
+
+ if (port == (u16) ~ 0)
+ {
+ if ((ipv == 4) || (ipv == (u8) ~ 0))
+ {
+ if (protocol == IP_PROTOCOL_UDP)
+ udp_punt_unknown (vm, 1, is_add);
+ else if (protocol == IP_PROTOCOL_TCP)
+ tcp_punt_unknown (vm, 1, is_add);
+ }
+
+ if ((ipv == 6) || (ipv == (u8) ~ 0))
+ {
+ if (protocol == IP_PROTOCOL_UDP)
+ udp_punt_unknown (vm, 0, is_add);
+ else if (protocol == IP_PROTOCOL_TCP)
+ tcp_punt_unknown (vm, 0, is_add);
+ }
+
+ return 0;
+ }
+
+ else if (is_add)
+ {
+ if (protocol == IP_PROTOCOL_TCP)
+ return clib_error_return (0, "punt TCP ports is not supported yet");
+
+ if (ipv == 4 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp4_punt_node.index, 1);
+
+ if (ipv == 6 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp6_punt_node.index, 0);
+
+ return 0;
+ }
+ else
+ return clib_error_return (0, "punt delete is not supported yet");
+}
+
+static clib_error_t *
+punt_cli (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 port;
+ bool is_add = true;
+ u32 protocol = ~0;
+ clib_error_t *error;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = false;
+ else if (unformat (input, "all"))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, protocol, ~0, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ else if (unformat (input, "%d", &port))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, protocol, port, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ else if (unformat (input, "udp"))
+ protocol = IP_PROTOCOL_UDP;
+ else if (unformat (input, "tcp"))
+ protocol = IP_PROTOCOL_TCP;
+ }
+
+ return 0;
+}
+
+/*?
+ * The set of '<em>set punt</em>' commands allows specific IP traffic to
+ * be punted to the host TCP/IP stack
+ *
+ * @em Note
+ * - UDP is the only protocol supported in the current implementation
+ * - All TCP traffic is currently punted to the host by default
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to request NTP traffic to be punted
+ * @cliexcmd{set punt udp 125}
+ *
+ * Example of how to request all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp all}
+ *
+ * Example of how to stop all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp del all}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (punt_command, static) = {
+ .path = "set punt",
+ .short_help = "set punt [udp|tcp] [del] <all | port-num1 [port-num2 ...]>",
+ .function = punt_cli,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+punt_init (vlib_main_t * vm)
+{
+ punt_main_t *pm = &punt_main;
+
+ pm->clients_by_dst_port6 = sparse_vec_new
+ (sizeof (pm->clients_by_dst_port6[0]),
+ BITS (((udp_header_t *) 0)->dst_port));
+ pm->clients_by_dst_port4 = sparse_vec_new
+ (sizeof (pm->clients_by_dst_port4[0]),
+ BITS (((udp_header_t *) 0)->dst_port));
+
+ pm->is_configured = false;
+ pm->interface_output_node = vlib_get_node_by_name (vm,
+ (u8 *)
+ "interface-output");
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (punt_init);
+
+static clib_error_t *
+punt_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ punt_main_t *pm = &punt_main;
+ char *socket_path = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "socket %s", &socket_path))
+ strncpy (pm->sun_path, socket_path, 108 - 1);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (socket_path == 0)
+ return 0;
+
+ /* UNIX domain socket */
+ struct sockaddr_un addr;
+ if ((pm->socket_fd = socket (AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1)
+ {
+ return clib_error_return (0, "socket error");
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ addr.sun_family = AF_UNIX;
+ if (*socket_path == '\0')
+ {
+ *addr.sun_path = '\0';
+ strncpy (addr.sun_path + 1, socket_path + 1,
+ sizeof (addr.sun_path) - 2);
+ }
+ else
+ {
+ strncpy (addr.sun_path, socket_path, sizeof (addr.sun_path) - 1);
+ unlink (socket_path);
+ }
+
+ if (bind (pm->socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1)
+ {
+ return clib_error_return (0, "bind error");
+ }
+
+ /* Register socket */
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ template.read_function = punt_socket_read_ready;
+ template.file_descriptor = pm->socket_fd;
+ pm->clib_file_index = clib_file_add (fm, &template);
+
+ pm->is_configured = true;
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (punt_config, "punt");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
new file mode 100644
index 00000000..9defa881
--- /dev/null
+++ b/src/vnet/ip/punt.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Definitions for punt infrastructure.
+ */
+#ifndef included_punt_h
+#define included_punt_h
+
+#include <sys/un.h>
+typedef enum
+{
+#define punt_error(n,s) PUNT_ERROR_##n,
+#include <vnet/ip/punt_error.def>
+#undef punt_error
+ PUNT_N_ERROR,
+} punt_error_t;
+
+
+clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv,
+ u8 protocol, u16 port, bool is_add);
+clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, u32 header_version,
+ bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname);
+clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4,
+ u8 l4_protocol, u16 port);
+char *vnet_punt_get_server_pathname (void);
+
+enum punt_action_e
+{
+ PUNT_L2 = 0,
+ PUNT_IP4_ROUTED,
+ PUNT_IP6_ROUTED,
+};
+
+/*
+ * Packet descriptor header. Version 1
+ * If this header changes, the version must also change to notify clients.
+ */
+#define PUNT_PACKETDESC_VERSION 1
+typedef struct __attribute__ ((packed))
+{
+ u32 sw_if_index; /* RX or TX interface */
+ enum punt_action_e action;
+} punt_packetdesc_t;
+
+/*
+ * Client registration
+ */
+typedef struct
+{
+ u16 port;
+ struct sockaddr_un caddr;
+} punt_client_t;
+
+typedef struct
+{
+ int socket_fd;
+ char sun_path[sizeof (struct sockaddr_un)];
+ punt_client_t *clients_by_dst_port4;
+ punt_client_t *clients_by_dst_port6;
+ u32 clib_file_index;
+ bool is_configured;
+ vlib_node_t *interface_output_node;
+ u32 *ready_fds;
+ u32 *rx_buffers;
+} punt_main_t;
+extern punt_main_t punt_main;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt_error.def b/src/vnet/ip/punt_error.def
new file mode 100644
index 00000000..13afa2c7
--- /dev/null
+++ b/src/vnet/ip/punt_error.def
@@ -0,0 +1,27 @@
+/*
+ * punt_error.def: punt errors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+punt_error (NONE, "no error")
+punt_error (UDP_PORT, "udp port punt")
+punt_error (SOCKET_RX, "Socket RX")
+punt_error (SOCKET_TX, "Socket TX")
+punt_error (SOCKET_RX_ERROR, "Socket RX error")
+punt_error (SOCKET_TX_ERROR, "Socket TX error")
+punt_error (NOBUFFER, "buffer allocation failure")
+punt_error (READV, "socket read failure")
+punt_error (ACTION, "invalid packet descriptor")
+
diff --git a/src/vnet/ipsec-gre/dir.dox b/src/vnet/ipsec-gre/dir.dox
new file mode 100644
index 00000000..e6ffd10b
--- /dev/null
+++ b/src/vnet/ipsec-gre/dir.dox
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir vnet/vnet/ipsec-gre
+ @brief L2-GRE over IPSec tunnel interface implementation
+*/
diff --git a/src/vnet/ipsec-gre/error.def b/src/vnet/ipsec-gre/error.def
new file mode 100644
index 00000000..d84e8ed1
--- /dev/null
+++ b/src/vnet/ipsec-gre/error.def
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2-GRE over IPSec errors.
+ */
+
+
+ipsec_gre_error (NONE, "no error")
+ipsec_gre_error (UNKNOWN_PROTOCOL, "unknown protocol")
+ipsec_gre_error (UNSUPPORTED_VERSION, "unsupported version")
+ipsec_gre_error (PKTS_DECAP, "GRE input packets decapsulated")
+ipsec_gre_error (PKTS_ENCAP, "GRE output packets encapsulated")
+ipsec_gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel")
diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c
new file mode 100644
index 00000000..0772ce73
--- /dev/null
+++ b/src/vnet/ipsec-gre/interface.c
@@ -0,0 +1,319 @@
+/*
+ * gre_interface.c: gre interfaces
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2-GRE over IPSec tunnel interface.
+ *
+ * Creates ipsec-gre tunnel interface.
+ * Provides a command line interface so humans can interact with VPP.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ipsec-gre/ipsec_gre.h>
+#include <vnet/ip/format.h>
+#include <vnet/ipsec/ipsec.h>
+
+#include <vnet/ipsec/esp.h>
+
+u8 *
+format_ipsec_gre_tunnel (u8 * s, va_list * args)
+{
+ ipsec_gre_tunnel_t *t = va_arg (*args, ipsec_gre_tunnel_t *);
+ ipsec_gre_main_t *gm = &ipsec_gre_main;
+
+ s = format (s,
+ "[%d] %U (src) %U (dst) local-sa %d remote-sa %d",
+ t - gm->tunnels,
+ format_ip4_address, &t->tunnel_src,
+ format_ip4_address, &t->tunnel_dst,
+ t->local_sa_id, t->remote_sa_id);
+ return s;
+}
+
+static clib_error_t *
+show_ipsec_gre_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ipsec_gre_main_t *igm = &ipsec_gre_main;
+ ipsec_gre_tunnel_t *t;
+
+ if (pool_elts (igm->tunnels) == 0)
+ vlib_cli_output (vm, "No IPSec GRE tunnels configured...");
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, igm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_ipsec_gre_tunnel, t);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ipsec_gre_tunnel_command, static) = {
+ .path = "show ipsec gre tunnel",
+ .function = show_ipsec_gre_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+/* force inclusion from application's main.c */
+clib_error_t *
+ipsec_gre_interface_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ipsec_gre_interface_init);
+
+/**
+ * @brief Add or delete ipsec-gre tunnel interface.
+ *
+ * @param *a vnet_ipsec_gre_add_del_tunnel_args_t - tunnel interface parameters
+ * @param *sw_if_indexp u32 - software interface index
+ * @return int - 0 if success otherwise <code>VNET_API_ERROR_</code>
+ */
+int
+vnet_ipsec_gre_add_del_tunnel (vnet_ipsec_gre_add_del_tunnel_args_t * a,
+ u32 * sw_if_indexp)
+{
+ ipsec_gre_main_t *igm = &ipsec_gre_main;
+ vnet_main_t *vnm = igm->vnet_main;
+ ip4_main_t *im = &ip4_main;
+ ipsec_gre_tunnel_t *t;
+ vnet_hw_interface_t *hi;
+ u32 hw_if_index, sw_if_index;
+ u32 slot;
+ uword *p;
+ u64 key;
+ ipsec_add_del_ipsec_gre_tunnel_args_t args;
+
+ memset (&args, 0, sizeof (args));
+ args.is_add = a->is_add;
+ args.local_sa_id = a->lsa;
+ args.remote_sa_id = a->rsa;
+ args.local_ip.as_u32 = a->src.as_u32;
+ args.remote_ip.as_u32 = a->dst.as_u32;
+
+ key = (u64) a->src.as_u32 << 32 | (u64) a->dst.as_u32;
+ p = hash_get (igm->tunnel_by_key, key);
+
+ if (a->is_add)
+ {
+ /* check if same src/dst pair exists */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get_aligned (igm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ if (vec_len (igm->free_ipsec_gre_tunnel_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ hw_if_index = igm->free_ipsec_gre_tunnel_hw_if_indices
+ [vec_len (igm->free_ipsec_gre_tunnel_hw_if_indices) - 1];
+ _vec_len (igm->free_ipsec_gre_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - igm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock (im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
+ sw_if_index);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ sw_if_index);
+ vlib_zero_simple_counter
+ (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
+ vnet_interface_counter_unlock (im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, ipsec_gre_device_class.index, t - igm->tunnels,
+ ipsec_gre_hw_interface_class.index, t - igm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index;
+ t->local_sa_id = a->lsa;
+ t->remote_sa_id = a->rsa;
+ t->local_sa = ipsec_get_sa_index_by_sa_id (a->lsa);
+ t->remote_sa = ipsec_get_sa_index_by_sa_id (a->rsa);
+
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+
+ vec_validate_init_empty (igm->tunnel_index_by_sw_if_index,
+ sw_if_index, ~0);
+ igm->tunnel_index_by_sw_if_index[sw_if_index] = t - igm->tunnels;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ hi->min_packet_bytes = 64 + sizeof (gre_header_t) +
+ sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t);
+ hi->per_packet_overhead_bytes =
+ /* preamble */ 8 + /* inter frame gap */ 12;
+
+ /* Standard default gre MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
+ 9000;
+
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+
+ hash_set (igm->tunnel_by_key, key, t - igm->tunnels);
+
+ slot = vlib_node_add_named_next_with_slot
+ (vnm->vlib_main, hi->tx_node_index, "esp-encrypt",
+ IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT);
+
+ ASSERT (slot == IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT);
+
+ }
+ else
+ { /* !is_add => delete */
+ /* tunnel needs to exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (igm->tunnels, p[0]);
+
+ sw_if_index = t->sw_if_index;
+ ip4_sw_interface_enable_disable (sw_if_index, 0);
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode (igm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
+ vec_add1 (igm->free_ipsec_gre_tunnel_hw_if_indices, t->hw_if_index);
+ igm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+
+ hash_unset (igm->tunnel_by_key, key);
+ pool_put (igm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return ipsec_add_del_ipsec_gre_tunnel (vnm, &args);
+}
+
+static clib_error_t *
+create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u32 num_m_args = 0;
+ ip4_address_t src, dst;
+ u32 lsa = 0, rsa = 0;
+ vnet_ipsec_gre_add_del_tunnel_args_t _a, *a = &_a;
+ int rv;
+ u32 sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U", unformat_ip4_address, &src))
+ num_m_args++;
+ else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst))
+ num_m_args++;
+ else if (unformat (line_input, "local-sa %d", &lsa))
+ num_m_args++;
+ else if (unformat (line_input, "remote-sa %d", &rsa))
+ num_m_args++;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args < 4)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ if (memcmp (&src, &dst, sizeof (src)) == 0)
+ {
+ error = clib_error_return (0, "src and dst are identical");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+ a->is_add = is_add;
+ a->lsa = lsa;
+ a->rsa = rsa;
+ clib_memcpy (&a->src, &src, sizeof (src));
+ clib_memcpy (&a->dst, &dst, sizeof (dst));
+
+ rv = vnet_ipsec_gre_add_del_tunnel (a, &sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index);
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "GRE tunnel already exists...");
+ goto done;
+ default:
+ error = clib_error_return (0,
+ "vnet_ipsec_gre_add_del_tunnel returned %d",
+ rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_ipsec_gre_tunnel_command, static) = {
+ .path = "create ipsec gre tunnel",
+ .short_help = "create ipsec gre tunnel src <addr> dst <addr> "
+ "local-sa <id> remote-sa <id> [del]",
+ .function = create_ipsec_gre_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/ipsec-gre/ipsec_gre.api b/src/vnet/ipsec-gre/ipsec_gre.api
new file mode 100644
index 00000000..793bca0a
--- /dev/null
+++ b/src/vnet/ipsec-gre/ipsec_gre.api
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Add / del ipsec gre tunnel request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param local_sa_id - local SA id
+ @param remote_sa_id - remote SA id
+ @param is_add - 1 if adding the tunnel, 0 if deleting
+ @param src_address - tunnel source address
+ @param dst_address - tunnel destination address
+*/
+define ipsec_gre_add_del_tunnel {
+ u32 client_index;
+ u32 context;
+ u32 local_sa_id;
+ u32 remote_sa_id;
+ u8 is_add;
+ u8 src_address[4];
+ u8 dst_address[4];
+};
+
+/** \brief Reply for add / del ipsec gre tunnel request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the new ipsec gre tunnel
+*/
+define ipsec_gre_add_del_tunnel_reply {
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Dump ipsec gre tunnel table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param tunnel_index - gre tunnel identifier or -1 in case of all tunnels
+*/
+define ipsec_gre_tunnel_dump {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief ipsec gre tunnel operational state response
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - software index of the ipsec gre tunnel
+ @param local_sa_id - local SA id
+ @param remote_sa_id - remote SA id
+ @param src_address - tunnel source address
+ @param dst_address - tunnel destination address
+*/
+define ipsec_gre_tunnel_details {
+ u32 context;
+ u32 sw_if_index;
+ u32 local_sa_id;
+ u32 remote_sa_id;
+ u8 src_address[4];
+ u8 dst_address[4];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+ \ No newline at end of file
diff --git a/src/vnet/ipsec-gre/ipsec_gre.c b/src/vnet/ipsec-gre/ipsec_gre.c
new file mode 100644
index 00000000..a0b065ac
--- /dev/null
+++ b/src/vnet/ipsec-gre/ipsec_gre.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2-GRE over IPSec packet processing.
+ *
+ * Add GRE header to thr packet and send it to the esp-encrypt node.
+*/
+
+#include <vnet/vnet.h>
+#include <vnet/ipsec-gre/ipsec_gre.h>
+
+ipsec_gre_main_t ipsec_gre_main;
+
+/**
+ * @brief IPv4 and GRE header union.
+ *
+*/
+typedef struct
+{
+ union
+ {
+ ip4_and_gre_header_t ip4_and_gre;
+ u64 as_u64[3];
+ };
+} ip4_and_gre_union_t;
+
+/**
+ * @brief Packet trace.
+ *
+*/
+typedef struct
+{
+ u32 tunnel_id; /**< Tunnel-id / index in tunnel vector */
+
+ u32 length; /**< pkt length */
+
+ ip4_address_t src; /**< tunnel src IPv4 address */
+ ip4_address_t dst; /**< tunnel dst IPv4 address */
+
+ u32 sa_id; /**< tunnel IPSec SA id */
+} ipsec_gre_tx_trace_t;
+
+u8 *
+format_ipsec_gre_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_gre_tx_trace_t *t = va_arg (*args, ipsec_gre_tx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U sa-id %d",
+ t->tunnel_id, clib_net_to_host_u16 (t->length),
+ format_ip4_address, &t->src.as_u8,
+ format_ip4_address, &t->dst.as_u8, t->sa_id);
+ return s;
+}
+
+/**
+ * @brief IPSec-GRE tunnel interface tx function.
+ *
+ * Add GRE header to the packet.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * <em>Uses:</em>
+ * - <code>node->runtime_data</code>
+ * - Match tunnel by <code>rd->dev_instance</code> in IPSec-GRE tunnels
+ * pool.
+ *
+ * <em>Sets:</em>
+ * - <code>vnet_buffer(b)->output_features.ipsec_sad_index</code>
+ * - Set IPSec Security Association for packet encryption.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ * - Reset output sw_if_index.
+ *
+ * <em>Nexd Index:</em>
+ * - Dispatches the packet to the esp-encrypt node.
+*/
+static uword
+ipsec_gre_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ipsec_gre_main_t *igm = &ipsec_gre_main;
+ u32 next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ ipsec_gre_tunnel_t *t = pool_elt_at_index (igm->tunnels, rd->dev_instance);
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /*
+ * As long as we have enough pkts left to process two pkts
+ * and prefetch two pkts...
+ */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ ip4_header_t *ip0, *ip1;
+ ip4_and_gre_union_t *h0, *h1;
+ u32 bi0, next0, bi1, next1;
+ __attribute__ ((unused)) u8 error0, error1;
+ u16 gre_protocol0, gre_protocol1;
+
+ /* Prefetch the next iteration */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /*
+ * Prefetch packet data. We expect to overwrite
+ * the inbound L2 header with an ip header and a
+ * gre header. Might want to prefetch the last line
+ * of rewrite space as well; need profile data
+ */
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* Pick up the next two buffer indices */
+ bi0 = from[0];
+ bi1 = from[1];
+
+ /* Speculatively enqueue them where we sent the last buffer */
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip0 = vlib_buffer_get_current (b0);
+ gre_protocol0 = clib_net_to_host_u16 (0x01);
+
+ ip1 = vlib_buffer_get_current (b1);
+ gre_protocol1 = clib_net_to_host_u16 (0x01);
+
+ vlib_buffer_advance (b0, -sizeof (*h0));
+ vlib_buffer_advance (b1, -sizeof (*h1));
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h0->as_u64[0] = 0;
+ h0->as_u64[1] = 0;
+ h0->as_u64[2] = 0;
+
+ h1->as_u64[0] = 0;
+ h1->as_u64[1] = 0;
+ h1->as_u64[2] = 0;
+
+ ip0 = &h0->ip4_and_gre.ip4;
+ h0->ip4_and_gre.gre.protocol = gre_protocol0;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_GRE;
+
+ ip1 = &h1->ip4_and_gre.ip4;
+ h1->ip4_and_gre.gre.protocol = gre_protocol1;
+ ip1->ip_version_and_header_length = 0x45;
+ ip1->ttl = 254;
+ ip1->protocol = IP_PROTOCOL_GRE;
+
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ ip1->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+ ip0->src_address.as_u32 = t->tunnel_src.as_u32;
+ ip1->src_address.as_u32 = t->tunnel_src.as_u32;
+ ip0->dst_address.as_u32 = t->tunnel_dst.as_u32;
+ ip1->dst_address.as_u32 = t->tunnel_dst.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+ ip1->checksum = ip4_header_checksum (ip1);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ vnet_buffer (b0)->ipsec.sad_index = t->local_sa;
+ vnet_buffer (b1)->ipsec.sad_index = t->local_sa;
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ next0 = IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT;
+ next1 = IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT;
+ error0 = IPSEC_GRE_ERROR_NONE;
+ error1 = IPSEC_GRE_ERROR_NONE;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = t - igm->tunnels;
+ tr->length = ip0->length;
+ tr->src.as_u32 = ip0->src_address.as_u32;
+ tr->dst.as_u32 = ip0->dst_address.as_u32;
+ tr->sa_id = t->local_sa_id;
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->tunnel_id = t - igm->tunnels;
+ tr->length = ip1->length;
+ tr->src.as_u32 = ip1->src_address.as_u32;
+ tr->dst.as_u32 = ip1->dst_address.as_u32;
+ tr->sa_id = t->local_sa_id;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ ip4_and_gre_union_t *h0;
+ u32 bi0, next0;
+ __attribute__ ((unused)) u8 error0;
+ u16 gre_protocol0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ gre_protocol0 = clib_net_to_host_u16 (0x01);
+
+ vlib_buffer_advance (b0, -sizeof (*h0));
+
+ h0 = vlib_buffer_get_current (b0);
+ h0->as_u64[0] = 0;
+ h0->as_u64[1] = 0;
+ h0->as_u64[2] = 0;
+
+ ip0 = &h0->ip4_and_gre.ip4;
+ h0->ip4_and_gre.gre.protocol = gre_protocol0;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_GRE;
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ ip0->src_address.as_u32 = t->tunnel_src.as_u32;
+ ip0->dst_address.as_u32 = t->tunnel_dst.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b0)->ipsec.sad_index = t->local_sa;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ next0 = IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT;
+ error0 = IPSEC_GRE_ERROR_NONE;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = t - igm->tunnels;
+ tr->length = ip0->length;
+ tr->src.as_u32 = ip0->src_address.as_u32;
+ tr->dst.as_u32 = ip0->dst_address.as_u32;
+ tr->sa_id = t->local_sa_id;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ipsec_gre_input_node.index,
+ IPSEC_GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+ipsec_gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
+
+ return /* no error */ 0;
+}
+
+static u8 *
+format_ipsec_gre_tunnel_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "ipsec-gre%d", dev_instance);
+}
+
+static u8 *
+format_ipsec_gre_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+
+ s = format (s, "IPSEC-GRE tunnel: id %d\n", dev_instance);
+ return s;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ipsec_gre_device_class) = {
+ .name = "IPSec GRE tunnel device",
+ .format_device_name = format_ipsec_gre_tunnel_name,
+ .format_device = format_ipsec_gre_device,
+ .format_tx_trace = format_ipsec_gre_tx_trace,
+ .tx_function = ipsec_gre_interface_tx,
+ .admin_up_down_function = ipsec_gre_interface_admin_up_down,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ipsec_gre_device_class,
+ ipsec_gre_interface_tx)
+
+
+VNET_HW_INTERFACE_CLASS (ipsec_gre_hw_interface_class) = {
+ .name = "IPSEC-GRE",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_gre_init (vlib_main_t * vm)
+{
+ ipsec_gre_main_t *igm = &ipsec_gre_main;
+ clib_error_t *error;
+
+ memset (igm, 0, sizeof (igm[0]));
+ igm->vlib_main = vm;
+ igm->vnet_main = vnet_get_main ();
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+
+ igm->tunnel_by_key = hash_create (0, sizeof (uword));
+
+ return vlib_call_init_function (vm, ipsec_gre_input_init);
+}
+
+VLIB_INIT_FUNCTION (ipsec_gre_init);
+
+ipsec_gre_main_t *
+ipsec_gre_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, ipsec_gre_init);
+ return &ipsec_gre_main;
+}
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/ipsec-gre/ipsec_gre.h b/src/vnet/ipsec-gre/ipsec_gre.h
new file mode 100644
index 00000000..a2ca64b6
--- /dev/null
+++ b/src/vnet/ipsec-gre/ipsec_gre.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2-GRE over IPSec packet processing.
+*/
+
+#ifndef included_ipsec_gre_h
+#define included_ipsec_gre_h
+
+#include <vnet/vnet.h>
+#include <vnet/gre/packet.h>
+#include <vnet/gre/gre.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+
+extern vnet_hw_interface_class_t ipsec_gre_hw_interface_class;
+
+/**
+ * @brief IPSec-GRE errors.
+ *
+*/
+typedef enum
+{
+#define ipsec_gre_error(n,s) IPSEC_GRE_ERROR_##n,
+#include <vnet/ipsec-gre/error.def>
+#undef ipsec_gre_error
+ IPSEC_GRE_N_ERROR,
+} ipsec_gre_error_t;
+
+/**
+ * @brief IPSec-GRE tunnel parameters.
+ *
+*/
+typedef struct
+{
+ ip4_address_t tunnel_src; /**< tunnel IPv4 src address */
+ ip4_address_t tunnel_dst; /**< tunnel IPv4 dst address */
+ u32 local_sa; /**< local IPSec SA index */
+ u32 remote_sa; /**< remote IPSec SA index */
+ u32 local_sa_id; /**< local IPSec SA id */
+ u32 remote_sa_id; /**< remote IPSec SA id */
+ u32 hw_if_index;; /**< hardware interface index */
+ u32 sw_if_index;; /**< software interface index */
+} ipsec_gre_tunnel_t;
+
+/**
+ * @brief IPSec-GRE state.
+ *
+*/
+typedef struct
+{
+ ipsec_gre_tunnel_t *tunnels; /**< pool of tunnel instances */
+
+ uword *tunnel_by_key; /**< hash mapping src/dst addr pair to tunnel */
+
+ u32 *free_ipsec_gre_tunnel_hw_if_indices; /**< free vlib hw_if_indices */
+
+ u32 *tunnel_index_by_sw_if_index; /**< mapping from sw_if_index to tunnel
+ index */
+
+ vlib_main_t *vlib_main; /**< convenience */
+ vnet_main_t *vnet_main; /**< convenience */
+} ipsec_gre_main_t;
+
+ipsec_gre_main_t ipsec_gre_main;
+
+extern vlib_node_registration_t ipsec_gre_input_node;
+extern vnet_device_class_t ipsec_gre_device_class;
+
+/* manually added to the interface output node in ipsec_gre.c */
+#define IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT 1
+
+/**
+ * @brief IPSec-GRE tunnel add/del arguments.
+ *
+*/
+typedef struct
+{
+ u8 is_add; /**< 1 - add, 0 - delete */
+
+ ip4_address_t src; /**< tunnel IPv4 src address */
+ ip4_address_t dst; /**< tunnel IPv4 dst address */
+ u32 lsa; /**< local IPSec SA id */
+ u32 rsa; /**< remote IPSec SA id */
+} vnet_ipsec_gre_add_del_tunnel_args_t;
+
+int vnet_ipsec_gre_add_del_tunnel
+ (vnet_ipsec_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
+
+#endif /* included_ipsec_gre_h */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/ipsec-gre/ipsec_gre_api.c b/src/vnet/ipsec-gre/ipsec_gre_api.c
new file mode 100644
index 00000000..a7ea1490
--- /dev/null
+++ b/src/vnet/ipsec-gre/ipsec_gre_api.c
@@ -0,0 +1,190 @@
+/*
+ *------------------------------------------------------------------
+ * ipsec_gre_api.c - ipsec_gre api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ipsec-gre/ipsec_gre.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(IPSEC_GRE_ADD_DEL_TUNNEL, ipsec_gre_add_del_tunnel) \
+_(IPSEC_GRE_TUNNEL_DUMP, ipsec_gre_tunnel_dump)
+
+static void
+vl_api_ipsec_gre_add_del_tunnel_t_handler (vl_api_ipsec_gre_add_del_tunnel_t *
+ mp)
+{
+ vl_api_ipsec_gre_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ vnet_ipsec_gre_add_del_tunnel_args_t _a, *a = &_a;
+ u32 sw_if_index = ~0;
+
+ /* Check src & dst are different */
+ if (memcmp (mp->src_address, mp->dst_address, 4) == 0)
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ /* ip addresses sent in network byte order */
+ clib_memcpy (&(a->src), mp->src_address, 4);
+ clib_memcpy (&(a->dst), mp->dst_address, 4);
+ a->is_add = mp->is_add;
+ a->lsa = ntohl (mp->local_sa_id);
+ a->rsa = ntohl (mp->remote_sa_id);
+
+ rv = vnet_ipsec_gre_add_del_tunnel (a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GRE_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_ipsec_gre_tunnel_details
+ (ipsec_gre_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_ipsec_gre_tunnel_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_IPSEC_GRE_TUNNEL_DETAILS);
+ clib_memcpy (rmp->src_address, &(t->tunnel_src), 4);
+ clib_memcpy (rmp->dst_address, &(t->tunnel_dst), 4);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->local_sa_id = htonl (t->local_sa_id);
+ rmp->remote_sa_id = htonl (t->remote_sa_id);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void vl_api_ipsec_gre_tunnel_dump_t_handler
+ (vl_api_ipsec_gre_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ipsec_gre_main_t *igm = &ipsec_gre_main;
+ ipsec_gre_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, igm->tunnels,
+ ({
+ send_ipsec_gre_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (igm->tunnel_index_by_sw_if_index)) ||
+ (~0 == igm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &igm->tunnels[igm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_ipsec_gre_tunnel_details (t, q, mp->context);
+ }
+}
+
+/*
+ * ipsec_gre_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_ipsec_gre;
+#undef _
+}
+
+static clib_error_t *
+ipsec_gre_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ipsec_gre_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec-gre/ipsec_gre_doc.md b/src/vnet/ipsec-gre/ipsec_gre_doc.md
new file mode 100644
index 00000000..e1bb9cda
--- /dev/null
+++ b/src/vnet/ipsec-gre/ipsec_gre_doc.md
@@ -0,0 +1,74 @@
+# VPP L2-GRE over IPsec implementation {#ipsec_gre_doc}
+
+This is a memo intended to contain documentation of the VPP L2-GRE over IPsec implementation.
+Everything that is not directly obvious should come here.
+
+
+## L2-GRE over IPsec
+GRE encapsulate layer 2 traffic and IPSec encrypt what is encapsulated by GRE. The whole point of L2-GRE over IPSec is to tunnel layer 2 over GRE and IPSec by bridging the physical interface with IPSec-GRE tunnel interface.
+
+There are 2 dedicated nodes for encapsulation:
+* ipsec-gre<n>-tx - add GRE header
+* esp-encrypt - encrypt GRE packet to ESP packet
+
+There are 3 dedicated nodes for decapsulation:
+* ipsec-if-input - match IPSec SA by source IP address and SPI in ESP packet
+* esp-decrypt - decrypt ESP packet
+* ipsec-gre-input - remove GRE header
+
+
+### Configuration
+
+L2-GRE over IPsec support the following CLI configuration command:
+ create ipsec gre tunnel src <addr> dst <addr> local-sa <id> remote-sa <id> [del]
+
+src: tunnel source IPv4 address
+dst: tunnel destination IPv4 address
+local-sa: tunnel local IPSec Security Association
+remote-sa: tunnel remote IPSec Security Association
+del: delete IPSec-GRE tunnel
+
+L2-GRE over IPsec support the following API configuration command:
+ ipsec_gre_add_del_tunnel src <addr> dst <addr> local_sa <sa-id> remote_sa <sa-id> [del]
+
+src: tunnel source IPv4 address
+dst: tunnel destination IPv4 address
+local_sa: tunnel local IPSec Security Association
+remote_sa: tunnel remote IPSec Security Association
+del: delete IPSec-GRE tunnel
+
+
+### Configuration example
+
+Interface GigabitEthernet0/9/0 is in bridge with ipsec-gre0 tunnel interface, interface GigabitEthernet0/8/0 sending encapsulated and encrypted traffic.
+
+Configure IPv4 address on sending interface:
+set int ip address GigabitEthernet0/8/0 192.168.1.1/24
+
+Configure IPSec Security Associations:
+ipsec sa add 10 spi 1001 esp crypto-key 4a506a794f574265564551694d653768 crypto-alg aes-cbc-128 integ-key 4339314b55523947594d6d3547666b45764e6a58 integ-alg sha1-96
+ipsec sa add 20 spi 1000 esp crypto-key 49517065716d6235726c734a4372466c crypto-alg aes-cbc-128 integ-key 307439636a5542735133595835546f68534e4f64 integ-alg sha1-96
+
+Create IPSec-GRE tunnel:
+create ipsec gre tunnel src 192.168.1.1 dst 192.168.1.2 local-sa 10 remote-sa 20
+
+Set interfaces state:
+set int state GigabitEthernet0/8/0 up
+set int state GigabitEthernet0/9/0 up
+set int state ipsec-gre0 up
+
+Bridge physical interface with IPSec-GRE tunnel interface:
+set interface l2 bridge GigabitEthernet0/9/0 1
+set interface l2 bridge ipsec-gre0 1
+
+
+### Operational data
+
+L2-GRE over IPsec support the following CLI show command:
+ show ipsec gre tunnel
+
+L2-GRE over IPsec support the following API dump command:
+ ipsec_gre_tunnel_dump [sw_if_index <nn>]
+
+sw_if_index: software interface index of the IPSec-GRE tunnel interface
+
diff --git a/src/vnet/ipsec-gre/node.c b/src/vnet/ipsec-gre/node.c
new file mode 100644
index 00000000..217d323a
--- /dev/null
+++ b/src/vnet/ipsec-gre/node.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2-GRE over IPSec packet processing.
+ *
+ * Removes GRE header from the packet and sends it to the l2-input node.
+*/
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ipsec-gre/ipsec_gre.h>
+#include <vppinfra/sparse_vec.h>
+
+#define foreach_ipsec_gre_input_next \
+_(PUNT, "error-punt") \
+_(DROP, "error-drop") \
+_(L2_INPUT, "l2-input")
+
+typedef enum {
+#define _(s,n) IPSEC_GRE_INPUT_NEXT_##s,
+ foreach_ipsec_gre_input_next
+#undef _
+ IPSEC_GRE_INPUT_N_NEXT,
+} ipsec_gre_input_next_t;
+
+typedef struct {
+ u32 tunnel_id;
+ u32 length;
+ ip4_address_t src;
+ ip4_address_t dst;
+} ipsec_gre_rx_trace_t;
+
+u8 * format_ipsec_gre_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_gre_rx_trace_t * t = va_arg (*args, ipsec_gre_rx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U",
+ t->tunnel_id, clib_net_to_host_u16(t->length),
+ format_ip4_address, &t->src.as_u8,
+ format_ip4_address, &t->dst.as_u8);
+ return s;
+}
+
+/**
+ * @brief L2-GRE over IPSec input node.
+ * @node ipsec-gre-input
+ *
+ * This node remove GRE header.
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param from_frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * <em>Uses:</em>
+ * - <code>ip->src_address</code> and <code>ip->dst_address</code>
+ * - Match tunnel by source and destination addresses in GRE IP header.
+ *
+ * <em>Sets:</em>
+ * - <code>vnet_buffer(b)->gre.src</code>
+ * - Save tunnel source IPv4 address.
+ * - <code>vnet_buffer(b)->gre.dst</code>
+ * - Save tunnel destination IPv4 address.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ * - Set input sw_if_index to IPSec-GRE tunnel for learning.
+ *
+ * <em>Next Index:</em>
+ * - Dispatches the packet to the l2-input node.
+*/
+static uword
+ipsec_gre_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ipsec_gre_main_t * igm = &ipsec_gre_main;
+ u32 n_left_from, next_index, * from, * to_next;
+ u64 cached_tunnel_key = (u64) ~0;
+ u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index;
+ u32 tun_src0, tun_dst0;
+ u32 tun_src1, tun_dst1;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ gre_header_t * h0, * h1;
+ u16 version0, version1, protocol0, protocol1;
+ int verr0, verr1;
+ u32 next0, next1;
+ ip4_header_t *ip0, *ip1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* ip4_local hands us the ip header, not the gre header */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Save src + dst ip4 address */
+ tun_src0 = ip0->src_address.as_u32;
+ tun_dst0 = ip0->dst_address.as_u32;
+ tun_src1 = ip1->src_address.as_u32;
+ tun_dst1 = ip1->dst_address.as_u32;
+
+ vlib_buffer_advance (b0, sizeof (*ip0));
+ vlib_buffer_advance (b1, sizeof (*ip1));
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ protocol0 = clib_net_to_host_u16 (h0->protocol);
+ protocol1 = clib_net_to_host_u16 (h1->protocol);
+ if (PREDICT_TRUE(protocol0 == 0x0001))
+ {
+ next0 = IPSEC_GRE_INPUT_NEXT_L2_INPUT;
+ b0->error = node->errors[IPSEC_GRE_ERROR_NONE];
+ }
+ else
+ {
+ clib_warning("unknown GRE protocol: %d", protocol0);
+ b0->error = node->errors[IPSEC_GRE_ERROR_UNKNOWN_PROTOCOL];
+ next0 = IPSEC_GRE_INPUT_NEXT_DROP;
+ }
+ if (PREDICT_TRUE(protocol1 == 0x0001))
+ {
+ next1 = IPSEC_GRE_INPUT_NEXT_L2_INPUT;
+ b1->error = node->errors[IPSEC_GRE_ERROR_NONE];
+ }
+ else
+ {
+ clib_warning("unknown GRE protocol: %d", protocol1);
+ b1->error = node->errors[IPSEC_GRE_ERROR_UNKNOWN_PROTOCOL];
+ next1 = IPSEC_GRE_INPUT_NEXT_DROP;
+ }
+
+ version0 = clib_net_to_host_u16 (h0->flags_and_version);
+ verr0 = version0 & GRE_VERSION_MASK;
+ version1 = clib_net_to_host_u16 (h1->flags_and_version);
+ verr1 = version1 & GRE_VERSION_MASK;
+
+ b0->error = verr0 ? node->errors[IPSEC_GRE_ERROR_UNSUPPORTED_VERSION]
+ : b0->error;
+ next0 = verr0 ? IPSEC_GRE_INPUT_NEXT_DROP : next0;
+ b1->error = verr1 ? node->errors[IPSEC_GRE_ERROR_UNSUPPORTED_VERSION]
+ : b1->error;
+ next1 = verr1 ? IPSEC_GRE_INPUT_NEXT_DROP : next1;
+
+ /* For L2 payload set input sw_if_index to GRE tunnel for learning */
+ if (PREDICT_TRUE(next0 == IPSEC_GRE_INPUT_NEXT_L2_INPUT))
+ {
+ u64 key = ((u64)(tun_dst0) << 32) | (u64)(tun_src0);
+
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ ipsec_gre_tunnel_t * t;
+ uword * p;
+
+ p = hash_get (igm->tunnel_by_key, key);
+ if (!p)
+ {
+ next0 = IPSEC_GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[IPSEC_GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop0;
+ }
+ t = pool_elt_at_index (igm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (igm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ }
+
+drop0:
+ /* For L2 payload set input sw_if_index to GRE tunnel for learning */
+ if (PREDICT_TRUE(next1 == IPSEC_GRE_INPUT_NEXT_L2_INPUT))
+ {
+ u64 key = ((u64)(tun_dst1) << 32) | (u64)(tun_src1);
+
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ ipsec_gre_tunnel_t * t;
+ uword * p;
+
+ p = hash_get (igm->tunnel_by_key, key);
+ if (!p)
+ {
+ next1 = IPSEC_GRE_INPUT_NEXT_DROP;
+ b1->error = node->errors[IPSEC_GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop1;
+ }
+ t = pool_elt_at_index (igm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (igm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ }
+
+drop1:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = ~0;
+ tr->length = ip0->length;
+ tr->src.as_u32 = ip0->src_address.as_u32;
+ tr->dst.as_u32 = ip0->dst_address.as_u32;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->tunnel_id = ~0;
+ tr->length = ip1->length;
+ tr->src.as_u32 = ip1->src_address.as_u32;
+ tr->dst.as_u32 = ip1->dst_address.as_u32;
+ }
+
+ vlib_buffer_advance (b0, sizeof (*h0));
+ vlib_buffer_advance (b1, sizeof (*h1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ gre_header_t * h0;
+ ip4_header_t * ip0;
+ u16 version0, protocol0;
+ int verr0;
+ u32 next0;
+ u32 tun_src0, tun_dst0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ tun_src0 = ip0->src_address.as_u32;
+ tun_dst0 = ip0->dst_address.as_u32;
+
+ vlib_buffer_advance (b0, sizeof (*ip0));
+
+ h0 = vlib_buffer_get_current (b0);
+
+ protocol0 = clib_net_to_host_u16 (h0->protocol);
+ if (PREDICT_TRUE(protocol0 == 0x0001))
+ {
+ next0 = IPSEC_GRE_INPUT_NEXT_L2_INPUT;
+ b0->error = node->errors[IPSEC_GRE_ERROR_NONE];
+ }
+ else
+ {
+ clib_warning("unknown GRE protocol: %d", protocol0);
+ b0->error = node->errors[IPSEC_GRE_ERROR_UNKNOWN_PROTOCOL];
+ next0 = IPSEC_GRE_INPUT_NEXT_DROP;
+ }
+
+ version0 = clib_net_to_host_u16 (h0->flags_and_version);
+ verr0 = version0 & GRE_VERSION_MASK;
+ b0->error = verr0 ? node->errors[IPSEC_GRE_ERROR_UNSUPPORTED_VERSION]
+ : b0->error;
+ next0 = verr0 ? IPSEC_GRE_INPUT_NEXT_DROP : next0;
+
+ /* For L2 payload set input sw_if_index to GRE tunnel for learning */
+ if (PREDICT_FALSE(next0 == IPSEC_GRE_INPUT_NEXT_L2_INPUT))
+ {
+ u64 key = ((u64)(tun_dst0) << 32) | (u64)(tun_src0);
+
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ ipsec_gre_tunnel_t * t;
+ uword * p;
+
+ p = hash_get (igm->tunnel_by_key, key);
+ if (!p)
+ {
+ next0 = IPSEC_GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[IPSEC_GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop;
+ }
+ t = pool_elt_at_index (igm->tunnels, p[0]);
+ hi = vnet_get_hw_interface (igm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ }
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ }
+
+drop:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = ~0;
+ tr->length = ip0->length;
+ tr->src.as_u32 = ip0->src_address.as_u32;
+ tr->dst.as_u32 = ip0->dst_address.as_u32;
+ }
+
+ vlib_buffer_advance (b0, sizeof (*h0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, ipsec_gre_input_node.index,
+ IPSEC_GRE_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static char * ipsec_gre_error_strings[] = {
+#define ipsec_gre_error(n,s) s,
+#include "error.def"
+#undef ipsec_gre_error
+};
+
+VLIB_REGISTER_NODE (ipsec_gre_input_node) = {
+ .function = ipsec_gre_input,
+ .name = "ipsec-gre-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = IPSEC_GRE_N_ERROR,
+ .error_strings = ipsec_gre_error_strings,
+
+ .n_next_nodes = IPSEC_GRE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [IPSEC_GRE_INPUT_NEXT_##s] = n,
+ foreach_ipsec_gre_input_next
+#undef _
+ },
+
+ .format_trace = format_ipsec_gre_rx_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_gre_input_node, ipsec_gre_input)
+
+static clib_error_t * ipsec_gre_input_init (vlib_main_t * vm)
+{
+ {
+ clib_error_t * error;
+ error = vlib_call_init_function (vm, ipsec_gre_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ipsec_gre_input_init);
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
new file mode 100644
index 00000000..799003b9
--- /dev/null
+++ b/src/vnet/ipsec/esp.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ESP_H__
+#define __ESP_H__
+
+#include <openssl/hmac.h>
+#include <openssl/rand.h>
+#include <openssl/evp.h>
+
+typedef struct
+{
+ u32 spi;
+ u32 seq;
+ u8 data[0];
+} esp_header_t;
+
+typedef struct
+{
+ u8 pad_length;
+ u8 next_header;
+} esp_footer_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ esp_header_t esp;
+}) ip4_and_esp_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ esp_header_t esp;
+}) ip6_and_esp_header_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ const EVP_CIPHER *type;
+} esp_crypto_alg_t;
+
+typedef struct
+{
+ const EVP_MD *md;
+ u8 trunc_size;
+} esp_integ_alg_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ EVP_CIPHER_CTX encrypt_ctx;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ EVP_CIPHER_CTX decrypt_ctx;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ HMAC_CTX hmac_ctx;
+ ipsec_crypto_alg_t last_encrypt_alg;
+ ipsec_crypto_alg_t last_decrypt_alg;
+ ipsec_integ_alg_t last_integ_alg;
+} esp_main_per_thread_data_t;
+
+typedef struct
+{
+ esp_crypto_alg_t *esp_crypto_algs;
+ esp_integ_alg_t *esp_integ_algs;
+ esp_main_per_thread_data_t *per_thread_data;
+} esp_main_t;
+
+esp_main_t esp_main;
+
+#define ESP_WINDOW_SIZE (64)
+#define ESP_SEQ_MAX (4294967295UL)
+
+
+always_inline int
+esp_replay_check (ipsec_sa_t * sa, u32 seq)
+{
+ u32 diff;
+
+ if (PREDICT_TRUE (seq > sa->last_seq))
+ return 0;
+
+ diff = sa->last_seq - seq;
+
+ if (ESP_WINDOW_SIZE > diff)
+ return (sa->replay_window & (1ULL << diff)) ? 1 : 0;
+ else
+ return 1;
+
+ return 0;
+}
+
+always_inline int
+esp_replay_check_esn (ipsec_sa_t * sa, u32 seq)
+{
+ u32 tl = sa->last_seq;
+ u32 th = sa->last_seq_hi;
+ u32 diff = tl - seq;
+
+ if (PREDICT_TRUE (tl >= (ESP_WINDOW_SIZE - 1)))
+ {
+ if (seq >= (tl - ESP_WINDOW_SIZE + 1))
+ {
+ sa->seq_hi = th;
+ if (seq <= tl)
+ return (sa->replay_window & (1ULL << diff)) ? 1 : 0;
+ else
+ return 0;
+ }
+ else
+ {
+ sa->seq_hi = th + 1;
+ return 0;
+ }
+ }
+ else
+ {
+ if (seq >= (tl - ESP_WINDOW_SIZE + 1))
+ {
+ sa->seq_hi = th - 1;
+ return (sa->replay_window & (1ULL << diff)) ? 1 : 0;
+ }
+ else
+ {
+ sa->seq_hi = th;
+ if (seq <= tl)
+ return (sa->replay_window & (1ULL << diff)) ? 1 : 0;
+ else
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+/* TODO seq increment should be atomic to be accessed by multiple workers */
+always_inline void
+esp_replay_advance (ipsec_sa_t * sa, u32 seq)
+{
+ u32 pos;
+
+ if (seq > sa->last_seq)
+ {
+ pos = seq - sa->last_seq;
+ if (pos < ESP_WINDOW_SIZE)
+ sa->replay_window = ((sa->replay_window) << pos) | 1;
+ else
+ sa->replay_window = 1;
+ sa->last_seq = seq;
+ }
+ else
+ {
+ pos = sa->last_seq - seq;
+ sa->replay_window |= (1ULL << pos);
+ }
+}
+
+always_inline void
+esp_replay_advance_esn (ipsec_sa_t * sa, u32 seq)
+{
+ int wrap = sa->seq_hi - sa->last_seq_hi;
+ u32 pos;
+
+ if (wrap == 0 && seq > sa->last_seq)
+ {
+ pos = seq - sa->last_seq;
+ if (pos < ESP_WINDOW_SIZE)
+ sa->replay_window = ((sa->replay_window) << pos) | 1;
+ else
+ sa->replay_window = 1;
+ sa->last_seq = seq;
+ }
+ else if (wrap > 0)
+ {
+ pos = ~seq + sa->last_seq + 1;
+ if (pos < ESP_WINDOW_SIZE)
+ sa->replay_window = ((sa->replay_window) << pos) | 1;
+ else
+ sa->replay_window = 1;
+ sa->last_seq = seq;
+ sa->last_seq_hi = sa->seq_hi;
+ }
+ else if (wrap < 0)
+ {
+ pos = ~seq + sa->last_seq + 1;
+ sa->replay_window |= (1ULL << pos);
+ }
+ else
+ {
+ pos = sa->last_seq - seq;
+ sa->replay_window |= (1ULL << pos);
+ }
+}
+
+always_inline int
+esp_seq_advance (ipsec_sa_t * sa)
+{
+ if (PREDICT_TRUE (sa->use_esn))
+ {
+ if (PREDICT_FALSE (sa->seq == ESP_SEQ_MAX))
+ {
+ if (PREDICT_FALSE
+ (sa->use_anti_replay && sa->seq_hi == ESP_SEQ_MAX))
+ return 1;
+ sa->seq_hi++;
+ }
+ sa->seq++;
+ }
+ else
+ {
+ if (PREDICT_FALSE (sa->use_anti_replay && sa->seq == ESP_SEQ_MAX))
+ return 1;
+ sa->seq++;
+ }
+
+ return 0;
+}
+
+always_inline void
+esp_init ()
+{
+ esp_main_t *em = &esp_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ memset (em, 0, sizeof (em[0]));
+
+ vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1);
+ em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128].type = EVP_aes_128_cbc ();
+ em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192].type = EVP_aes_192_cbc ();
+ em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256].type = EVP_aes_256_cbc ();
+
+ vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1);
+ esp_integ_alg_t *i;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96];
+ i->md = EVP_sha1 ();
+ i->trunc_size = 12;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96];
+ i->md = EVP_sha256 ();
+ i->trunc_size = 12;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128];
+ i->md = EVP_sha256 ();
+ i->trunc_size = 16;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192];
+ i->md = EVP_sha384 ();
+ i->trunc_size = 24;
+
+ i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256];
+ i->md = EVP_sha512 ();
+ i->trunc_size = 32;
+
+ vec_validate_aligned (em->per_thread_data, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ int thread_id;
+
+ for (thread_id = 0; thread_id < tm->n_vlib_mains - 1; thread_id++)
+ {
+ EVP_CIPHER_CTX_init (&(em->per_thread_data[thread_id].encrypt_ctx));
+ EVP_CIPHER_CTX_init (&(em->per_thread_data[thread_id].decrypt_ctx));
+ HMAC_CTX_init (&(em->per_thread_data[thread_id].hmac_ctx));
+ }
+}
+
+always_inline unsigned int
+hmac_calc (ipsec_integ_alg_t alg,
+ u8 * key,
+ int key_len,
+ u8 * data, int data_len, u8 * signature, u8 use_esn, u32 seq_hi)
+{
+ esp_main_t *em = &esp_main;
+ u32 thread_index = vlib_get_thread_index ();
+ HMAC_CTX *ctx = &(em->per_thread_data[thread_index].hmac_ctx);
+ const EVP_MD *md = NULL;
+ unsigned int len;
+
+ ASSERT (alg < IPSEC_INTEG_N_ALG);
+
+ if (PREDICT_FALSE (em->esp_integ_algs[alg].md == 0))
+ return 0;
+
+ if (PREDICT_FALSE (alg != em->per_thread_data[thread_index].last_integ_alg))
+ {
+ md = em->esp_integ_algs[alg].md;
+ em->per_thread_data[thread_index].last_integ_alg = alg;
+ }
+
+ HMAC_Init (ctx, key, key_len, md);
+
+ HMAC_Update (ctx, data, data_len);
+
+ if (PREDICT_TRUE (use_esn))
+ HMAC_Update (ctx, (u8 *) & seq_hi, sizeof (seq_hi));
+ HMAC_Final (ctx, signature, &len);
+
+ return em->esp_integ_algs[alg].trunc_size;
+}
+
+#endif /* __ESP_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
new file mode 100644
index 00000000..de4cc6dd
--- /dev/null
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -0,0 +1,435 @@
+/*
+ * esp_decrypt.c : IPSec ESP decrypt node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+#define foreach_esp_decrypt_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(IPSEC_GRE_INPUT, "ipsec-gre-input")
+
+#define _(v, s) ESP_DECRYPT_NEXT_##v,
+typedef enum
+{
+ foreach_esp_decrypt_next
+#undef _
+ ESP_DECRYPT_N_NEXT,
+} esp_decrypt_next_t;
+
+
+#define foreach_esp_decrypt_error \
+ _(RX_PKTS, "ESP pkts received") \
+ _(NO_BUFFER, "No buffer (packed dropped)") \
+ _(DECRYPTION_FAILED, "ESP decryption failed") \
+ _(INTEG_ERROR, "Integrity check failed") \
+ _(REPLAY, "SA replayed packet") \
+ _(NOT_IP, "Not IP packet (dropped)")
+
+
+typedef enum
+{
+#define _(sym,str) ESP_DECRYPT_ERROR_##sym,
+ foreach_esp_decrypt_error
+#undef _
+ ESP_DECRYPT_N_ERROR,
+} esp_decrypt_error_t;
+
+static char *esp_decrypt_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_decrypt_error
+#undef _
+};
+
+typedef struct
+{
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+} esp_decrypt_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_esp_decrypt_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *);
+
+ s = format (s, "esp: crypto %U integrity %U",
+ format_ipsec_crypto_alg, t->crypto_alg,
+ format_ipsec_integ_alg, t->integ_alg);
+ return s;
+}
+
+always_inline void
+esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg,
+ u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
+{
+ esp_main_t *em = &esp_main;
+ u32 thread_index = vlib_get_thread_index ();
+ EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].decrypt_ctx);
+ const EVP_CIPHER *cipher = NULL;
+ int out_len;
+
+ ASSERT (alg < IPSEC_CRYPTO_N_ALG);
+
+ if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == 0))
+ return;
+
+ if (PREDICT_FALSE
+ (alg != em->per_thread_data[thread_index].last_decrypt_alg))
+ {
+ cipher = em->esp_crypto_algs[alg].type;
+ em->per_thread_data[thread_index].last_decrypt_alg = alg;
+ }
+
+ EVP_DecryptInit_ex (ctx, cipher, NULL, key, iv);
+
+ EVP_DecryptUpdate (ctx, out, &out_len, in, in_len);
+ EVP_DecryptFinal_ex (ctx, out + out_len, &out_len);
+}
+
+static uword
+esp_decrypt_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, next_index, *to_next;
+ ipsec_main_t *im = &ipsec_main;
+ esp_main_t *em = &esp_main;
+ u32 *recycle = 0;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ u32 thread_index = vlib_get_thread_index ();
+
+ ipsec_alloc_empty_buffers (vm, im);
+
+ u32 *empty_buffers = im->empty_buffers[thread_index];
+
+ if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
+ {
+ vlib_node_increment_counter (vm, esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_NO_BUFFER, n_left_from);
+ goto free_buffers_and_exit;
+ }
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 i_bi0, o_bi0 = (u32) ~ 0, next0;
+ vlib_buffer_t *i_b0;
+ vlib_buffer_t *o_b0 = 0;
+ esp_header_t *esp0;
+ ipsec_sa_t *sa0;
+ u32 sa_index0 = ~0;
+ u32 seq;
+ ip4_header_t *ih4 = 0, *oh4 = 0;
+ ip6_header_t *ih6 = 0, *oh6 = 0;
+ u8 tunnel_mode = 1;
+ u8 transport_ip6 = 0;
+
+
+ i_bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ next0 = ESP_DECRYPT_NEXT_DROP;
+
+ i_b0 = vlib_get_buffer (vm, i_bi0);
+ esp0 = vlib_buffer_get_current (i_b0);
+
+ sa_index0 = vnet_buffer (i_b0)->ipsec.sad_index;
+ sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+ seq = clib_host_to_net_u32 (esp0->seq);
+
+ /* anti-replay check */
+ if (sa0->use_anti_replay)
+ {
+ int rv = 0;
+
+ if (PREDICT_TRUE (sa0->use_esn))
+ rv = esp_replay_check_esn (sa0, seq);
+ else
+ rv = esp_replay_check (sa0, seq);
+
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq);
+ vlib_node_increment_counter (vm, esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_REPLAY, 1);
+ o_bi0 = i_bi0;
+ to_next[0] = o_bi0;
+ to_next += 1;
+ goto trace;
+ }
+ }
+
+ sa0->total_data_size += i_b0->current_length;
+
+ if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
+ {
+ u8 sig[64];
+ int icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+ memset (sig, 0, sizeof (sig));
+ u8 *icv =
+ vlib_buffer_get_current (i_b0) + i_b0->current_length -
+ icv_size;
+ i_b0->current_length -= icv_size;
+
+ hmac_calc (sa0->integ_alg, sa0->integ_key, sa0->integ_key_len,
+ (u8 *) esp0, i_b0->current_length, sig, sa0->use_esn,
+ sa0->seq_hi);
+
+ if (PREDICT_FALSE (memcmp (icv, sig, icv_size)))
+ {
+ vlib_node_increment_counter (vm, esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_INTEG_ERROR,
+ 1);
+ o_bi0 = i_bi0;
+ to_next[0] = o_bi0;
+ to_next += 1;
+ goto trace;
+ }
+ }
+
+ if (PREDICT_TRUE (sa0->use_anti_replay))
+ {
+ if (PREDICT_TRUE (sa0->use_esn))
+ esp_replay_advance_esn (sa0, seq);
+ else
+ esp_replay_advance (sa0, seq);
+ }
+
+ /* grab free buffer */
+ uword last_empty_buffer = vec_len (empty_buffers) - 1;
+ o_bi0 = empty_buffers[last_empty_buffer];
+ to_next[0] = o_bi0;
+ to_next += 1;
+ o_b0 = vlib_get_buffer (vm, o_bi0);
+ vlib_prefetch_buffer_with_index (vm,
+ empty_buffers[last_empty_buffer -
+ 1], STORE);
+ _vec_len (empty_buffers) = last_empty_buffer;
+
+ /* add old buffer to the recycle list */
+ vec_add1 (recycle, i_bi0);
+
+ if (sa0->crypto_alg >= IPSEC_CRYPTO_ALG_AES_CBC_128 &&
+ sa0->crypto_alg <= IPSEC_CRYPTO_ALG_AES_CBC_256)
+ {
+ const int BLOCK_SIZE = 16;
+ const int IV_SIZE = 16;
+ esp_footer_t *f0;
+ u8 ip_hdr_size = 0;
+
+ int blocks =
+ (i_b0->current_length - sizeof (esp_header_t) -
+ IV_SIZE) / BLOCK_SIZE;
+
+ o_b0->current_data = sizeof (ethernet_header_t);
+
+ /* transport mode */
+ if (PREDICT_FALSE (!sa0->is_tunnel && !sa0->is_tunnel_ip6))
+ {
+ tunnel_mode = 0;
+ ih4 =
+ (ip4_header_t *) (i_b0->data +
+ sizeof (ethernet_header_t));
+ if (PREDICT_TRUE
+ ((ih4->ip_version_and_header_length & 0xF0) != 0x40))
+ {
+ if (PREDICT_TRUE
+ ((ih4->ip_version_and_header_length & 0xF0) ==
+ 0x60))
+ {
+ transport_ip6 = 1;
+ ip_hdr_size = sizeof (ip6_header_t);
+ ih6 =
+ (ip6_header_t *) (i_b0->data +
+ sizeof (ethernet_header_t));
+ oh6 = vlib_buffer_get_current (o_b0);
+ }
+ else
+ {
+ vlib_node_increment_counter (vm,
+ esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_NOT_IP,
+ 1);
+ o_b0 = 0;
+ goto trace;
+ }
+ }
+ else
+ {
+ oh4 = vlib_buffer_get_current (o_b0);
+ ip_hdr_size = sizeof (ip4_header_t);
+ }
+ }
+
+ esp_decrypt_aes_cbc (sa0->crypto_alg,
+ esp0->data + IV_SIZE,
+ (u8 *) vlib_buffer_get_current (o_b0) +
+ ip_hdr_size, BLOCK_SIZE * blocks,
+ sa0->crypto_key, esp0->data);
+
+ o_b0->current_length = (blocks * 16) - 2 + ip_hdr_size;
+ o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ f0 =
+ (esp_footer_t *) ((u8 *) vlib_buffer_get_current (o_b0) +
+ o_b0->current_length);
+ o_b0->current_length -= f0->pad_length;
+
+ /* tunnel mode */
+ if (PREDICT_TRUE (tunnel_mode))
+ {
+ if (PREDICT_TRUE (f0->next_header == IP_PROTOCOL_IP_IN_IP))
+ {
+ next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+ oh4 = vlib_buffer_get_current (o_b0);
+ }
+ else if (f0->next_header == IP_PROTOCOL_IPV6)
+ next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
+ else
+ {
+ clib_warning ("next header: 0x%x", f0->next_header);
+ vlib_node_increment_counter (vm, esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+ 1);
+ o_b0 = 0;
+ goto trace;
+ }
+ }
+ /* transport mode */
+ else
+ {
+ if (PREDICT_FALSE (transport_ip6))
+ {
+ next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
+ oh6->ip_version_traffic_class_and_flow_label =
+ ih6->ip_version_traffic_class_and_flow_label;
+ oh6->protocol = f0->next_header;
+ oh6->hop_limit = ih6->hop_limit;
+ oh6->src_address.as_u64[0] = ih6->src_address.as_u64[0];
+ oh6->src_address.as_u64[1] = ih6->src_address.as_u64[1];
+ oh6->dst_address.as_u64[0] = ih6->dst_address.as_u64[0];
+ oh6->dst_address.as_u64[1] = ih6->dst_address.as_u64[1];
+ oh6->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain
+ (vm,
+ o_b0) - sizeof (ip6_header_t));
+ }
+ else
+ {
+ next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+ oh4->ip_version_and_header_length = 0x45;
+ oh4->tos = ih4->tos;
+ oh4->fragment_id = 0;
+ oh4->flags_and_fragment_offset = 0;
+ oh4->ttl = ih4->ttl;
+ oh4->protocol = f0->next_header;
+ oh4->src_address.as_u32 = ih4->src_address.as_u32;
+ oh4->dst_address.as_u32 = ih4->dst_address.as_u32;
+ oh4->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain
+ (vm, o_b0));
+ oh4->checksum = ip4_header_checksum (oh4);
+ }
+ }
+
+ /* for IPSec-GRE tunnel next node is ipsec-gre-input */
+ if (PREDICT_FALSE
+ ((vnet_buffer (i_b0)->ipsec.flags) &
+ IPSEC_FLAG_IPSEC_GRE_TUNNEL))
+ next0 = ESP_DECRYPT_NEXT_IPSEC_GRE_INPUT;
+
+ vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ vnet_buffer (o_b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (i_b0)->sw_if_index[VLIB_RX];
+ }
+
+ trace:
+ if (PREDICT_FALSE (i_b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (o_b0)
+ {
+ o_b0->flags |= VLIB_BUFFER_IS_TRACED;
+ o_b0->trace_index = i_b0->trace_index;
+ esp_decrypt_trace_t *tr =
+ vlib_add_trace (vm, node, o_b0, sizeof (*tr));
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, o_bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, esp_decrypt_node.index,
+ ESP_DECRYPT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+
+free_buffers_and_exit:
+ if (recycle)
+ vlib_buffer_free (vm, recycle, vec_len (recycle));
+ vec_free (recycle);
+ return from_frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (esp_decrypt_node) = {
+ .function = esp_decrypt_node_fn,
+ .name = "esp-decrypt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_decrypt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
+ .error_strings = esp_decrypt_error_strings,
+
+ .n_next_nodes = ESP_DECRYPT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
+ foreach_esp_decrypt_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (esp_decrypt_node, esp_decrypt_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
new file mode 100644
index 00000000..b2bc4e0b
--- /dev/null
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -0,0 +1,428 @@
+/*
+ * esp_encrypt.c : IPSec ESP encrypt node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+
+#define foreach_esp_encrypt_next \
+_(DROP, "error-drop") \
+_(IP4_LOOKUP, "ip4-lookup") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(INTERFACE_OUTPUT, "interface-output")
+
+#define _(v, s) ESP_ENCRYPT_NEXT_##v,
+typedef enum
+{
+ foreach_esp_encrypt_next
+#undef _
+ ESP_ENCRYPT_N_NEXT,
+} esp_encrypt_next_t;
+
+#define foreach_esp_encrypt_error \
+ _(RX_PKTS, "ESP pkts received") \
+ _(NO_BUFFER, "No buffer (packet dropped)") \
+ _(DECRYPTION_FAILED, "ESP encryption failed") \
+ _(SEQ_CYCLED, "sequence number cycled")
+
+
+typedef enum
+{
+#define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
+ foreach_esp_encrypt_error
+#undef _
+ ESP_ENCRYPT_N_ERROR,
+} esp_encrypt_error_t;
+
+static char *esp_encrypt_error_strings[] = {
+#define _(sym,string) string,
+ foreach_esp_encrypt_error
+#undef _
+};
+
+vlib_node_registration_t esp_encrypt_node;
+
+typedef struct
+{
+ u32 spi;
+ u32 seq;
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+} esp_encrypt_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_esp_encrypt_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
+
+ s = format (s, "esp: spi %u seq %u crypto %U integrity %U",
+ t->spi, t->seq,
+ format_ipsec_crypto_alg, t->crypto_alg,
+ format_ipsec_integ_alg, t->integ_alg);
+ return s;
+}
+
+always_inline void
+esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg,
+ u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
+{
+ esp_main_t *em = &esp_main;
+ u32 thread_index = vlib_get_thread_index ();
+ EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].encrypt_ctx);
+ const EVP_CIPHER *cipher = NULL;
+ int out_len;
+
+ ASSERT (alg < IPSEC_CRYPTO_N_ALG);
+
+ if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE))
+ return;
+
+ if (PREDICT_FALSE
+ (alg != em->per_thread_data[thread_index].last_encrypt_alg))
+ {
+ cipher = em->esp_crypto_algs[alg].type;
+ em->per_thread_data[thread_index].last_encrypt_alg = alg;
+ }
+
+ EVP_EncryptInit_ex (ctx, cipher, NULL, key, iv);
+
+ EVP_EncryptUpdate (ctx, out, &out_len, in, in_len);
+ EVP_EncryptFinal_ex (ctx, out + out_len, &out_len);
+}
+
+static uword
+esp_encrypt_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next = 0, next_index;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ ipsec_main_t *im = &ipsec_main;
+ u32 *recycle = 0;
+ u32 thread_index = vlib_get_thread_index ();
+
+ ipsec_alloc_empty_buffers (vm, im);
+
+ u32 *empty_buffers = im->empty_buffers[thread_index];
+
+ if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
+ {
+ vlib_node_increment_counter (vm, esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_NO_BUFFER, n_left_from);
+ clib_warning ("no enough empty buffers. discarding frame");
+ goto free_buffers_and_exit;
+ }
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 i_bi0, o_bi0, next0;
+ vlib_buffer_t *i_b0, *o_b0 = 0;
+ u32 sa_index0;
+ ipsec_sa_t *sa0;
+ ip4_and_esp_header_t *ih0, *oh0 = 0;
+ ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
+ uword last_empty_buffer;
+ esp_header_t *o_esp0;
+ esp_footer_t *f0;
+ u8 is_ipv6;
+ u8 ip_hdr_size;
+ u8 next_hdr_type;
+ u32 ip_proto = 0;
+ u8 transport_mode = 0;
+
+ i_bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ next0 = ESP_ENCRYPT_NEXT_DROP;
+
+ i_b0 = vlib_get_buffer (vm, i_bi0);
+ sa_index0 = vnet_buffer (i_b0)->ipsec.sad_index;
+ sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+ if (PREDICT_FALSE (esp_seq_advance (sa0)))
+ {
+ clib_warning ("sequence number counter has cycled SPI %u",
+ sa0->spi);
+ vlib_node_increment_counter (vm, esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
+ //TODO: rekey SA
+ o_bi0 = i_bi0;
+ to_next[0] = o_bi0;
+ to_next += 1;
+ goto trace;
+ }
+
+ sa0->total_data_size += i_b0->current_length;
+
+ /* grab free buffer */
+ last_empty_buffer = vec_len (empty_buffers) - 1;
+ o_bi0 = empty_buffers[last_empty_buffer];
+ o_b0 = vlib_get_buffer (vm, o_bi0);
+ o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ o_b0->current_data = sizeof (ethernet_header_t);
+ ih0 = vlib_buffer_get_current (i_b0);
+ vlib_prefetch_buffer_with_index (vm,
+ empty_buffers[last_empty_buffer -
+ 1], STORE);
+ _vec_len (empty_buffers) = last_empty_buffer;
+ to_next[0] = o_bi0;
+ to_next += 1;
+
+ /* add old buffer to the recycle list */
+ vec_add1 (recycle, i_bi0);
+
+ /* is ipv6 */
+ if (PREDICT_FALSE
+ ((ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60))
+ {
+ is_ipv6 = 1;
+ ih6_0 = vlib_buffer_get_current (i_b0);
+ ip_hdr_size = sizeof (ip6_header_t);
+ next_hdr_type = IP_PROTOCOL_IPV6;
+ oh6_0 = vlib_buffer_get_current (o_b0);
+ o_esp0 = vlib_buffer_get_current (o_b0) + sizeof (ip6_header_t);
+
+ oh6_0->ip6.ip_version_traffic_class_and_flow_label =
+ ih6_0->ip6.ip_version_traffic_class_and_flow_label;
+ oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
+ oh6_0->ip6.hop_limit = 254;
+ oh6_0->ip6.src_address.as_u64[0] =
+ ih6_0->ip6.src_address.as_u64[0];
+ oh6_0->ip6.src_address.as_u64[1] =
+ ih6_0->ip6.src_address.as_u64[1];
+ oh6_0->ip6.dst_address.as_u64[0] =
+ ih6_0->ip6.dst_address.as_u64[0];
+ oh6_0->ip6.dst_address.as_u64[1] =
+ ih6_0->ip6.dst_address.as_u64[1];
+ oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi);
+ oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+ ip_proto = ih6_0->ip6.protocol;
+
+ next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP;
+ }
+ else
+ {
+ is_ipv6 = 0;
+ ip_hdr_size = sizeof (ip4_header_t);
+ next_hdr_type = IP_PROTOCOL_IP_IN_IP;
+ oh0 = vlib_buffer_get_current (o_b0);
+ o_esp0 = vlib_buffer_get_current (o_b0) + sizeof (ip4_header_t);
+
+ oh0->ip4.ip_version_and_header_length = 0x45;
+ oh0->ip4.tos = ih0->ip4.tos;
+ oh0->ip4.fragment_id = 0;
+ oh0->ip4.flags_and_fragment_offset = 0;
+ oh0->ip4.ttl = 254;
+ oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
+ oh0->ip4.src_address.as_u32 = ih0->ip4.src_address.as_u32;
+ oh0->ip4.dst_address.as_u32 = ih0->ip4.dst_address.as_u32;
+ oh0->esp.spi = clib_net_to_host_u32 (sa0->spi);
+ oh0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+ ip_proto = ih0->ip4.protocol;
+
+ next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP;
+ }
+
+ if (PREDICT_TRUE
+ (!is_ipv6 && sa0->is_tunnel && !sa0->is_tunnel_ip6))
+ {
+ oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32;
+ oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32;
+
+ vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ }
+ else if (is_ipv6 && sa0->is_tunnel && sa0->is_tunnel_ip6)
+ {
+ oh6_0->ip6.src_address.as_u64[0] =
+ sa0->tunnel_src_addr.ip6.as_u64[0];
+ oh6_0->ip6.src_address.as_u64[1] =
+ sa0->tunnel_src_addr.ip6.as_u64[1];
+ oh6_0->ip6.dst_address.as_u64[0] =
+ sa0->tunnel_dst_addr.ip6.as_u64[0];
+ oh6_0->ip6.dst_address.as_u64[1] =
+ sa0->tunnel_dst_addr.ip6.as_u64[1];
+
+ vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ }
+ else
+ {
+ next_hdr_type = ip_proto;
+ if (vnet_buffer (i_b0)->sw_if_index[VLIB_TX] != ~0)
+ {
+ transport_mode = 1;
+ ethernet_header_t *ieh0, *oeh0;
+ ieh0 =
+ (ethernet_header_t *) ((u8 *)
+ vlib_buffer_get_current (i_b0) -
+ sizeof (ethernet_header_t));
+ oeh0 = (ethernet_header_t *) o_b0->data;
+ clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t));
+ next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT;
+ vnet_buffer (o_b0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (i_b0)->sw_if_index[VLIB_TX];
+ }
+ vlib_buffer_advance (i_b0, ip_hdr_size);
+ }
+
+ ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG);
+
+ if (PREDICT_TRUE (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE))
+ {
+
+ const int BLOCK_SIZE = 16;
+ const int IV_SIZE = 16;
+ int blocks = 1 + (i_b0->current_length + 1) / BLOCK_SIZE;
+
+ /* pad packet in input buffer */
+ u8 pad_bytes = BLOCK_SIZE * blocks - 2 - i_b0->current_length;
+ u8 i;
+ u8 *padding =
+ vlib_buffer_get_current (i_b0) + i_b0->current_length;
+ i_b0->current_length = BLOCK_SIZE * blocks;
+ for (i = 0; i < pad_bytes; ++i)
+ {
+ padding[i] = i + 1;
+ }
+ f0 = vlib_buffer_get_current (i_b0) + i_b0->current_length - 2;
+ f0->pad_length = pad_bytes;
+ f0->next_header = next_hdr_type;
+
+ o_b0->current_length = ip_hdr_size + sizeof (esp_header_t) +
+ BLOCK_SIZE * blocks + IV_SIZE;
+
+ vnet_buffer (o_b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (i_b0)->sw_if_index[VLIB_RX];
+
+ u8 iv[16];
+ RAND_bytes (iv, sizeof (iv));
+
+ clib_memcpy ((u8 *) vlib_buffer_get_current (o_b0) +
+ ip_hdr_size + sizeof (esp_header_t), iv, 16);
+
+ esp_encrypt_aes_cbc (sa0->crypto_alg,
+ (u8 *) vlib_buffer_get_current (i_b0),
+ (u8 *) vlib_buffer_get_current (o_b0) +
+ ip_hdr_size + sizeof (esp_header_t) +
+ IV_SIZE, BLOCK_SIZE * blocks,
+ sa0->crypto_key, iv);
+ }
+
+ o_b0->current_length += hmac_calc (sa0->integ_alg, sa0->integ_key,
+ sa0->integ_key_len,
+ (u8 *) o_esp0,
+ o_b0->current_length -
+ ip_hdr_size,
+ vlib_buffer_get_current (o_b0) +
+ o_b0->current_length,
+ sa0->use_esn, sa0->seq_hi);
+
+
+ if (PREDICT_FALSE (is_ipv6))
+ {
+ oh6_0->ip6.payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, o_b0) -
+ sizeof (ip6_header_t));
+ }
+ else
+ {
+ oh0->ip4.length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, o_b0));
+ oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
+ }
+
+ if (transport_mode)
+ vlib_buffer_reset (o_b0);
+
+ trace:
+ if (PREDICT_FALSE (i_b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (o_b0)
+ {
+ o_b0->flags |= VLIB_BUFFER_IS_TRACED;
+ o_b0->trace_index = i_b0->trace_index;
+ esp_encrypt_trace_t *tr =
+ vlib_add_trace (vm, node, o_b0, sizeof (*tr));
+ tr->spi = sa0->spi;
+ tr->seq = sa0->seq - 1;
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, o_bi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, esp_encrypt_node.index,
+ ESP_ENCRYPT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+
+free_buffers_and_exit:
+ if (recycle)
+ vlib_buffer_free (vm, recycle, vec_len (recycle));
+ vec_free (recycle);
+ return from_frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (esp_encrypt_node) = {
+ .function = esp_encrypt_node_fn,
+ .name = "esp-encrypt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_esp_encrypt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
+ .error_strings = esp_encrypt_error_strings,
+
+ .n_next_nodes = ESP_ENCRYPT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
+ foreach_esp_encrypt_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (esp_encrypt_node, esp_encrypt_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c
new file mode 100644
index 00000000..a3dc7b87
--- /dev/null
+++ b/src/vnet/ipsec/ikev2.c
@@ -0,0 +1,3450 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vppinfra/random.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/ikev2_priv.h>
+#include <openssl/sha.h>
+
+static int ikev2_delete_tunnel_interface (vnet_main_t * vnm,
+ ikev2_sa_t * sa,
+ ikev2_child_sa_t * child);
+
+#define ikev2_set_state(sa, v) do { \
+ (sa)->state = v; \
+ clib_warning("sa state changed to " #v); \
+ } while(0);
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+} ikev2_trace_t;
+
+static u8 *
+format_ikev2_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ikev2_trace_t *t = va_arg (*args, ikev2_trace_t *);
+
+ s = format (s, "ikev2: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+static vlib_node_registration_t ikev2_node;
+
+#define foreach_ikev2_error \
+_(PROCESSED, "IKEv2 packets processed") \
+_(IKE_SA_INIT_RETRANSMIT, "IKE_SA_INIT retransmit ") \
+_(IKE_SA_INIT_IGNORE, "IKE_SA_INIT ignore (IKE SA already auth)") \
+_(IKE_REQ_RETRANSMIT, "IKE request retransmit") \
+_(IKE_REQ_IGNORE, "IKE request ignore (old msgid)") \
+_(NOT_IKEV2, "Non IKEv2 packets received")
+
+typedef enum
+{
+#define _(sym,str) IKEV2_ERROR_##sym,
+ foreach_ikev2_error
+#undef _
+ IKEV2_N_ERROR,
+} ikev2_error_t;
+
+static char *ikev2_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ikev2_error
+#undef _
+};
+
+typedef enum
+{
+ IKEV2_NEXT_IP4_LOOKUP,
+ IKEV2_NEXT_ERROR_DROP,
+ IKEV2_N_NEXT,
+} ikev2_next_t;
+
+static ikev2_sa_transform_t *
+ikev2_find_transform_data (ikev2_sa_transform_t * t)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_sa_transform_t *td;
+
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type != t->type)
+ continue;
+
+ if (td->transform_id != t->transform_id)
+ continue;
+
+ if (td->type == IKEV2_TRANSFORM_TYPE_ENCR)
+ {
+ if (vec_len (t->attrs) != 4 || t->attrs[0] != 0x80
+ || t->attrs[1] != 14)
+ continue;
+
+ if (((t->attrs[2] << 8 | t->attrs[3]) / 8) != td->key_len)
+ continue;
+ }
+ return td;
+ }
+ return 0;
+}
+
+static ikev2_sa_proposal_t *
+ikev2_select_proposal (ikev2_sa_proposal_t * proposals,
+ ikev2_protocol_id_t prot_id)
+{
+ ikev2_sa_proposal_t *rv = 0;
+ ikev2_sa_proposal_t *proposal;
+ ikev2_sa_transform_t *transform, *new_t;
+ u8 mandatory_bitmap, optional_bitmap;
+
+ if (prot_id == IKEV2_PROTOCOL_IKE)
+ {
+ mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_ENCR) |
+ (1 << IKEV2_TRANSFORM_TYPE_PRF) |
+ (1 << IKEV2_TRANSFORM_TYPE_INTEG) | (1 << IKEV2_TRANSFORM_TYPE_DH);
+ optional_bitmap = mandatory_bitmap;
+ }
+ else if (prot_id == IKEV2_PROTOCOL_ESP)
+ {
+ mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_ENCR) |
+ (1 << IKEV2_TRANSFORM_TYPE_ESN);
+ optional_bitmap = mandatory_bitmap |
+ (1 << IKEV2_TRANSFORM_TYPE_INTEG) | (1 << IKEV2_TRANSFORM_TYPE_DH);
+ }
+ else if (prot_id == IKEV2_PROTOCOL_AH)
+ {
+ mandatory_bitmap = (1 << IKEV2_TRANSFORM_TYPE_INTEG) |
+ (1 << IKEV2_TRANSFORM_TYPE_ESN);
+ optional_bitmap = mandatory_bitmap | (1 << IKEV2_TRANSFORM_TYPE_DH);
+ }
+ else
+ return 0;
+
+ vec_add2 (rv, proposal, 1);
+
+ vec_foreach (proposal, proposals)
+ {
+ u8 bitmap = 0;
+ if (proposal->protocol_id != prot_id)
+ continue;
+
+ vec_foreach (transform, proposal->transforms)
+ {
+ if ((1 << transform->type) & bitmap)
+ continue;
+
+ if (ikev2_find_transform_data (transform))
+ {
+ bitmap |= 1 << transform->type;
+ vec_add2 (rv->transforms, new_t, 1);
+ clib_memcpy (new_t, transform, sizeof (*new_t));
+ new_t->attrs = vec_dup (transform->attrs);
+ }
+ }
+
+ clib_warning ("bitmap is %x mandatory is %x optional is %x",
+ bitmap, mandatory_bitmap, optional_bitmap);
+
+ if ((bitmap & mandatory_bitmap) == mandatory_bitmap &&
+ (bitmap & ~optional_bitmap) == 0)
+ {
+ rv->proposal_num = proposal->proposal_num;
+ rv->protocol_id = proposal->protocol_id;
+ RAND_bytes ((u8 *) & rv->spi, sizeof (rv->spi));
+ goto done;
+ }
+ else
+ {
+ vec_free (rv->transforms);
+ }
+ }
+
+ vec_free (rv);
+done:
+ return rv;
+}
+
+ikev2_sa_transform_t *
+ikev2_sa_get_td_for_type (ikev2_sa_proposal_t * p,
+ ikev2_transform_type_t type)
+{
+ ikev2_sa_transform_t *t;
+
+ if (!p)
+ return 0;
+
+ vec_foreach (t, p->transforms)
+ {
+ if (t->type == type)
+ return ikev2_find_transform_data (t);
+ }
+ return 0;
+}
+
+ikev2_child_sa_t *
+ikev2_sa_get_child (ikev2_sa_t * sa, u32 spi, ikev2_protocol_id_t prot_id,
+ int by_initiator)
+{
+ ikev2_child_sa_t *c;
+ vec_foreach (c, sa->childs)
+ {
+ ikev2_sa_proposal_t *proposal =
+ by_initiator ? &c->i_proposals[0] : &c->r_proposals[0];
+ if (proposal && proposal->spi == spi && proposal->protocol_id == prot_id)
+ return c;
+ }
+
+ return 0;
+}
+
+void
+ikev2_sa_free_proposal_vector (ikev2_sa_proposal_t ** v)
+{
+ ikev2_sa_proposal_t *p;
+ ikev2_sa_transform_t *t;
+
+ if (!*v)
+ return;
+
+ vec_foreach (p, *v)
+ {
+ vec_foreach (t, p->transforms)
+ {
+ vec_free (t->attrs);
+ }
+ vec_free (p->transforms);
+ }
+ vec_free (*v);
+};
+
+static void
+ikev2_sa_free_all_child_sa (ikev2_child_sa_t ** childs)
+{
+ ikev2_child_sa_t *c;
+ vec_foreach (c, *childs)
+ {
+ ikev2_sa_free_proposal_vector (&c->r_proposals);
+ ikev2_sa_free_proposal_vector (&c->i_proposals);
+ vec_free (c->sk_ai);
+ vec_free (c->sk_ar);
+ vec_free (c->sk_ei);
+ vec_free (c->sk_er);
+ }
+
+ vec_free (*childs);
+}
+
+static void
+ikev2_sa_del_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * child)
+{
+ ikev2_sa_free_proposal_vector (&child->r_proposals);
+ ikev2_sa_free_proposal_vector (&child->i_proposals);
+ vec_free (child->sk_ai);
+ vec_free (child->sk_ar);
+ vec_free (child->sk_ei);
+ vec_free (child->sk_er);
+
+ vec_del1 (sa->childs, child - sa->childs);
+}
+
+static void
+ikev2_sa_free_all_vec (ikev2_sa_t * sa)
+{
+ vec_free (sa->i_nonce);
+ vec_free (sa->i_dh_data);
+ vec_free (sa->dh_shared_key);
+ vec_free (sa->dh_private_key);
+
+ ikev2_sa_free_proposal_vector (&sa->r_proposals);
+ ikev2_sa_free_proposal_vector (&sa->i_proposals);
+
+ vec_free (sa->sk_d);
+ vec_free (sa->sk_ai);
+ vec_free (sa->sk_ar);
+ vec_free (sa->sk_ei);
+ vec_free (sa->sk_er);
+ vec_free (sa->sk_pi);
+ vec_free (sa->sk_pr);
+
+ vec_free (sa->i_id.data);
+ vec_free (sa->i_auth.data);
+ vec_free (sa->r_id.data);
+ vec_free (sa->r_auth.data);
+ if (sa->r_auth.key)
+ EVP_PKEY_free (sa->r_auth.key);
+
+ vec_free (sa->del);
+
+ ikev2_sa_free_all_child_sa (&sa->childs);
+}
+
+static void
+ikev2_delete_sa (ikev2_sa_t * sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ u32 thread_index = vlib_get_thread_index ();
+ uword *p;
+
+ ikev2_sa_free_all_vec (sa);
+
+ p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
+ if (p)
+ {
+ hash_unset (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
+ pool_put (km->per_thread_data[thread_index].sas, sa);
+ }
+}
+
+static void
+ikev2_generate_sa_init_data (ikev2_sa_t * sa)
+{
+ ikev2_sa_transform_t *t = 0, *t2;
+ ikev2_main_t *km = &ikev2_main;
+
+ if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE)
+ {
+ return;
+ }
+
+ /* check if received DH group is on our list of supported groups */
+ vec_foreach (t2, km->supported_transforms)
+ {
+ if (t2->type == IKEV2_TRANSFORM_TYPE_DH && sa->dh_group == t2->dh_type)
+ {
+ t = t2;
+ break;
+ }
+ }
+
+ if (!t)
+ {
+ clib_warning ("unknown dh data group %u (data len %u)", sa->dh_group,
+ vec_len (sa->i_dh_data));
+ sa->dh_group = IKEV2_TRANSFORM_DH_TYPE_NONE;
+ return;
+ }
+
+ if (sa->is_initiator)
+ {
+ /* generate rspi */
+ RAND_bytes ((u8 *) & sa->ispi, 8);
+
+ /* generate nonce */
+ sa->i_nonce = vec_new (u8, IKEV2_NONCE_SIZE);
+ RAND_bytes ((u8 *) sa->i_nonce, IKEV2_NONCE_SIZE);
+ }
+ else
+ {
+ /* generate rspi */
+ RAND_bytes ((u8 *) & sa->rspi, 8);
+
+ /* generate nonce */
+ sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE);
+ RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ }
+
+ /* generate dh keys */
+ ikev2_generate_dh (sa, t);
+
+}
+
+static void
+ikev2_complete_sa_data (ikev2_sa_t * sa, ikev2_sa_t * sai)
+{
+ ikev2_sa_transform_t *t = 0, *t2;
+ ikev2_main_t *km = &ikev2_main;
+
+
+ /*move some data to the new SA */
+#define _(A) ({void* __tmp__ = (A); (A) = 0; __tmp__;})
+ sa->i_nonce = _(sai->i_nonce);
+ sa->i_dh_data = _(sai->i_dh_data);
+ sa->dh_private_key = _(sai->dh_private_key);
+ sa->iaddr.as_u32 = sai->iaddr.as_u32;
+ sa->raddr.as_u32 = sai->raddr.as_u32;
+ sa->is_initiator = sai->is_initiator;
+ sa->profile = sai->profile;
+ sa->i_id.type = sai->i_id.type;
+ sa->i_id.data = _(sai->i_id.data);
+ sa->i_auth.method = sai->i_auth.method;
+ sa->i_auth.hex = sai->i_auth.hex;
+ sa->i_auth.data = _(sai->i_auth.data);
+ sa->i_auth.key = _(sai->i_auth.key);
+ sa->last_sa_init_req_packet_data = _(sai->last_sa_init_req_packet_data);
+ sa->childs = _(sai->childs);
+#undef _
+
+
+ if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE)
+ {
+ return;
+ }
+
+ /* check if received DH group is on our list of supported groups */
+ vec_foreach (t2, km->supported_transforms)
+ {
+ if (t2->type == IKEV2_TRANSFORM_TYPE_DH && sa->dh_group == t2->dh_type)
+ {
+ t = t2;
+ break;
+ }
+ }
+
+ if (!t)
+ {
+ clib_warning ("unknown dh data group %u (data len %u)", sa->dh_group,
+ vec_len (sa->i_dh_data));
+ sa->dh_group = IKEV2_TRANSFORM_DH_TYPE_NONE;
+ return;
+ }
+
+
+ /* generate dh keys */
+ ikev2_complete_dh (sa, t);
+
+}
+
+static void
+ikev2_calc_keys (ikev2_sa_t * sa)
+{
+ u8 *tmp;
+ /* calculate SKEYSEED = prf(Ni | Nr, g^ir) */
+ u8 *skeyseed = 0;
+ u8 *s = 0;
+ ikev2_sa_transform_t *tr_encr, *tr_prf, *tr_integ;
+ tr_encr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ tr_integ =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+
+ vec_append (s, sa->i_nonce);
+ vec_append (s, sa->r_nonce);
+ skeyseed = ikev2_calc_prf (tr_prf, s, sa->dh_shared_key);
+
+ /* Calculate S = Ni | Nr | SPIi | SPIr */
+ u64 *spi;
+ vec_add2 (s, tmp, 2 * sizeof (*spi));
+ spi = (u64 *) tmp;
+ spi[0] = clib_host_to_net_u64 (sa->ispi);
+ spi[1] = clib_host_to_net_u64 (sa->rspi);
+
+ /* calculate PRFplus */
+ u8 *keymat;
+ int len = tr_prf->key_trunc + /* SK_d */
+ tr_integ->key_len * 2 + /* SK_ai, SK_ar */
+ tr_encr->key_len * 2 + /* SK_ei, SK_er */
+ tr_prf->key_len * 2; /* SK_pi, SK_pr */
+
+ keymat = ikev2_calc_prfplus (tr_prf, skeyseed, s, len);
+ vec_free (skeyseed);
+ vec_free (s);
+
+ int pos = 0;
+
+ /* SK_d */
+ sa->sk_d = vec_new (u8, tr_prf->key_trunc);
+ clib_memcpy (sa->sk_d, keymat + pos, tr_prf->key_trunc);
+ pos += tr_prf->key_trunc;
+
+ /* SK_ai */
+ sa->sk_ai = vec_new (u8, tr_integ->key_len);
+ clib_memcpy (sa->sk_ai, keymat + pos, tr_integ->key_len);
+ pos += tr_integ->key_len;
+
+ /* SK_ar */
+ sa->sk_ar = vec_new (u8, tr_integ->key_len);
+ clib_memcpy (sa->sk_ar, keymat + pos, tr_integ->key_len);
+ pos += tr_integ->key_len;
+
+ /* SK_ei */
+ sa->sk_ei = vec_new (u8, tr_encr->key_len);
+ clib_memcpy (sa->sk_ei, keymat + pos, tr_encr->key_len);
+ pos += tr_encr->key_len;
+
+ /* SK_er */
+ sa->sk_er = vec_new (u8, tr_encr->key_len);
+ clib_memcpy (sa->sk_er, keymat + pos, tr_encr->key_len);
+ pos += tr_encr->key_len;
+
+ /* SK_pi */
+ sa->sk_pi = vec_new (u8, tr_prf->key_len);
+ clib_memcpy (sa->sk_pi, keymat + pos, tr_prf->key_len);
+ pos += tr_prf->key_len;
+
+ /* SK_pr */
+ sa->sk_pr = vec_new (u8, tr_prf->key_len);
+ clib_memcpy (sa->sk_pr, keymat + pos, tr_prf->key_len);
+ pos += tr_prf->key_len;
+
+ vec_free (keymat);
+}
+
+static void
+ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
+{
+ u8 *s = 0;
+ ikev2_sa_transform_t *tr_prf, *ctr_encr, *ctr_integ;
+ tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ ctr_encr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ ctr_integ =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+
+ vec_append (s, sa->i_nonce);
+ vec_append (s, sa->r_nonce);
+ /* calculate PRFplus */
+ u8 *keymat;
+ int len = ctr_encr->key_len * 2 + ctr_integ->key_len * 2;
+
+ keymat = ikev2_calc_prfplus (tr_prf, sa->sk_d, s, len);
+
+ int pos = 0;
+
+ /* SK_ei */
+ child->sk_ei = vec_new (u8, ctr_encr->key_len);
+ clib_memcpy (child->sk_ei, keymat + pos, ctr_encr->key_len);
+ pos += ctr_encr->key_len;
+
+ /* SK_ai */
+ child->sk_ai = vec_new (u8, ctr_integ->key_len);
+ clib_memcpy (child->sk_ai, keymat + pos, ctr_integ->key_len);
+ pos += ctr_integ->key_len;
+
+ /* SK_er */
+ child->sk_er = vec_new (u8, ctr_encr->key_len);
+ clib_memcpy (child->sk_er, keymat + pos, ctr_encr->key_len);
+ pos += ctr_encr->key_len;
+
+ /* SK_ar */
+ child->sk_ar = vec_new (u8, ctr_integ->key_len);
+ clib_memcpy (child->sk_ar, keymat + pos, ctr_integ->key_len);
+ pos += ctr_integ->key_len;
+
+ ASSERT (pos == len);
+
+ vec_free (keymat);
+}
+
+static void
+ikev2_process_sa_init_req (vlib_main_t * vm, ikev2_sa_t * sa,
+ ike_header_t * ike)
+{
+ int p = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ u8 payload = ike->nextpayload;
+
+ clib_warning ("ispi %lx rspi %lx nextpayload %x version %x "
+ "exchange %x flags %x msgid %x length %u",
+ clib_net_to_host_u64 (ike->ispi),
+ clib_net_to_host_u64 (ike->rspi),
+ payload, ike->version,
+ ike->exchange, ike->flags,
+ clib_net_to_host_u32 (ike->msgid), len);
+
+ sa->ispi = clib_net_to_host_u64 (ike->ispi);
+
+ /* store whole IKE payload - needed for PSK auth */
+ vec_free (sa->last_sa_init_req_packet_data);
+ vec_add (sa->last_sa_init_req_packet_data, ike, len);
+
+ while (p < len && payload != IKEV2_PAYLOAD_NONE)
+ {
+ ike_payload_header_t *ikep = (ike_payload_header_t *) & ike->payload[p];
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (ike_payload_header_t))
+ return;
+
+ if (payload == IKEV2_PAYLOAD_SA)
+ {
+ ikev2_sa_free_proposal_vector (&sa->i_proposals);
+ sa->i_proposals = ikev2_parse_sa_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_KE)
+ {
+ ike_ke_payload_header_t *ke = (ike_ke_payload_header_t *) ikep;
+ sa->dh_group = clib_net_to_host_u16 (ke->dh_group);
+ vec_free (sa->i_dh_data);
+ vec_add (sa->i_dh_data, ke->payload, plen - sizeof (*ke));
+ }
+ else if (payload == IKEV2_PAYLOAD_NONCE)
+ {
+ vec_free (sa->i_nonce);
+ vec_add (sa->i_nonce, ikep->payload, plen - sizeof (*ikep));
+ }
+ else if (payload == IKEV2_PAYLOAD_NOTIFY)
+ {
+ ikev2_notify_t *n = ikev2_parse_notify_payload (ikep);
+ vec_free (n);
+ }
+ else if (payload == IKEV2_PAYLOAD_VENDOR)
+ {
+ ikev2_parse_vendor_payload (ikep);
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u", payload,
+ ikep->flags, plen);
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ sa->unsupported_cp = payload;
+ return;
+ }
+ }
+
+ payload = ikep->nextpayload;
+ p += plen;
+ }
+
+ ikev2_set_state (sa, IKEV2_STATE_SA_INIT);
+}
+
+static void
+ikev2_process_sa_init_resp (vlib_main_t * vm, ikev2_sa_t * sa,
+ ike_header_t * ike)
+{
+ int p = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ u8 payload = ike->nextpayload;
+
+ clib_warning ("ispi %lx rspi %lx nextpayload %x version %x "
+ "exchange %x flags %x msgid %x length %u",
+ clib_net_to_host_u64 (ike->ispi),
+ clib_net_to_host_u64 (ike->rspi),
+ payload, ike->version,
+ ike->exchange, ike->flags,
+ clib_net_to_host_u32 (ike->msgid), len);
+
+ sa->ispi = clib_net_to_host_u64 (ike->ispi);
+ sa->rspi = clib_net_to_host_u64 (ike->rspi);
+
+ /* store whole IKE payload - needed for PSK auth */
+ vec_free (sa->last_sa_init_res_packet_data);
+ vec_add (sa->last_sa_init_res_packet_data, ike, len);
+
+ while (p < len && payload != IKEV2_PAYLOAD_NONE)
+ {
+ ike_payload_header_t *ikep = (ike_payload_header_t *) & ike->payload[p];
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (ike_payload_header_t))
+ return;
+
+ if (payload == IKEV2_PAYLOAD_SA)
+ {
+ ikev2_sa_free_proposal_vector (&sa->r_proposals);
+ sa->r_proposals = ikev2_parse_sa_payload (ikep);
+ if (sa->r_proposals)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_SA_INIT);
+ ike->msgid =
+ clib_host_to_net_u32 (clib_net_to_host_u32 (ike->msgid) + 1);
+ }
+ }
+ else if (payload == IKEV2_PAYLOAD_KE)
+ {
+ ike_ke_payload_header_t *ke = (ike_ke_payload_header_t *) ikep;
+ sa->dh_group = clib_net_to_host_u16 (ke->dh_group);
+ vec_free (sa->r_dh_data);
+ vec_add (sa->r_dh_data, ke->payload, plen - sizeof (*ke));
+ }
+ else if (payload == IKEV2_PAYLOAD_NONCE)
+ {
+ vec_free (sa->r_nonce);
+ vec_add (sa->r_nonce, ikep->payload, plen - sizeof (*ikep));
+ }
+ else if (payload == IKEV2_PAYLOAD_NOTIFY)
+ {
+ ikev2_notify_t *n = ikev2_parse_notify_payload (ikep);
+ vec_free (n);
+ }
+ else if (payload == IKEV2_PAYLOAD_VENDOR)
+ {
+ ikev2_parse_vendor_payload (ikep);
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u", payload,
+ ikep->flags, plen);
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ sa->unsupported_cp = payload;
+ return;
+ }
+ }
+
+ payload = ikep->nextpayload;
+ p += plen;
+ }
+}
+
+static u8 *
+ikev2_decrypt_sk_payload (ikev2_sa_t * sa, ike_header_t * ike, u8 * payload)
+{
+ int p = 0;
+ u8 last_payload = 0;
+ u8 *hmac = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ ike_payload_header_t *ikep = 0;
+ u32 plen = 0;
+ ikev2_sa_transform_t *tr_integ;
+ tr_integ =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+
+ while (p < len &&
+ *payload != IKEV2_PAYLOAD_NONE && last_payload != IKEV2_PAYLOAD_SK)
+ {
+ ikep = (ike_payload_header_t *) & ike->payload[p];
+ plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (*ikep))
+ return 0;
+
+ if (*payload == IKEV2_PAYLOAD_SK)
+ {
+ clib_warning ("received IKEv2 payload SK, len %u", plen - 4);
+ last_payload = *payload;
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u", payload,
+ ikep->flags, plen);
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ sa->unsupported_cp = *payload;
+ return 0;
+ }
+ }
+
+ *payload = ikep->nextpayload;
+ p += plen;
+ }
+
+ if (last_payload != IKEV2_PAYLOAD_SK)
+ {
+ clib_warning ("Last payload must be SK");
+ return 0;
+ }
+
+ hmac =
+ ikev2_calc_integr (tr_integ, sa->is_initiator ? sa->sk_ar : sa->sk_ai,
+ (u8 *) ike, len - tr_integ->key_trunc);
+
+ plen = plen - sizeof (*ikep) - tr_integ->key_trunc;
+
+ if (memcmp (hmac, &ikep->payload[plen], tr_integ->key_trunc))
+ {
+ clib_warning ("message integrity check failed");
+ vec_free (hmac);
+ return 0;
+ }
+ vec_free (hmac);
+
+ return ikev2_decrypt_data (sa, ikep->payload, plen);
+}
+
+static void
+ikev2_initial_contact_cleanup (ikev2_sa_t * sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_sa_t *tmp;
+ u32 i, *delete = 0;
+ ikev2_child_sa_t *c;
+ u32 thread_index = vlib_get_thread_index ();
+
+ if (!sa->initial_contact)
+ return;
+
+ /* find old IKE SAs with the same authenticated identity */
+ /* *INDENT-OFF* */
+ pool_foreach (tmp, km->per_thread_data[thread_index].sas, ({
+ if (tmp->i_id.type != sa->i_id.type ||
+ vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) ||
+ memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data)))
+ continue;
+
+ if (sa->rspi != tmp->rspi)
+ vec_add1(delete, tmp - km->per_thread_data[thread_index].sas);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (delete); i++)
+ {
+ tmp =
+ pool_elt_at_index (km->per_thread_data[thread_index].sas, delete[i]);
+ vec_foreach (c,
+ tmp->childs) ikev2_delete_tunnel_interface (km->vnet_main,
+ tmp, c);
+ ikev2_delete_sa (tmp);
+ }
+
+ vec_free (delete);
+ sa->initial_contact = 0;
+}
+
+static void
+ikev2_process_auth_req (vlib_main_t * vm, ikev2_sa_t * sa, ike_header_t * ike)
+{
+ ikev2_child_sa_t *first_child_sa;
+ int p = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ u8 payload = ike->nextpayload;
+ u8 *plaintext = 0;
+
+ ike_payload_header_t *ikep;
+ u32 plen;
+
+ clib_warning ("ispi %lx rspi %lx nextpayload %x version %x "
+ "exchange %x flags %x msgid %x length %u",
+ clib_net_to_host_u64 (ike->ispi),
+ clib_net_to_host_u64 (ike->rspi),
+ payload, ike->version,
+ ike->exchange, ike->flags,
+ clib_net_to_host_u32 (ike->msgid), len);
+
+ ikev2_calc_keys (sa);
+
+ plaintext = ikev2_decrypt_sk_payload (sa, ike, &payload);
+
+ if (!plaintext)
+ {
+ if (sa->unsupported_cp)
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ goto cleanup_and_exit;
+ }
+
+ /* select or create 1st child SA */
+ if (sa->is_initiator)
+ {
+ first_child_sa = &sa->childs[0];
+ }
+ else
+ {
+ ikev2_sa_free_all_child_sa (&sa->childs);
+ vec_add2 (sa->childs, first_child_sa, 1);
+ }
+
+
+ /* process encrypted payload */
+ p = 0;
+ while (p < vec_len (plaintext) && payload != IKEV2_PAYLOAD_NONE)
+ {
+ ikep = (ike_payload_header_t *) & plaintext[p];
+ plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (ike_payload_header_t))
+ goto cleanup_and_exit;
+
+ if (payload == IKEV2_PAYLOAD_SA) /* 33 */
+ {
+ clib_warning ("received payload SA, len %u", plen - sizeof (*ikep));
+ if (sa->is_initiator)
+ {
+ ikev2_sa_free_proposal_vector (&first_child_sa->r_proposals);
+ first_child_sa->r_proposals = ikev2_parse_sa_payload (ikep);
+ }
+ else
+ {
+ ikev2_sa_free_proposal_vector (&first_child_sa->i_proposals);
+ first_child_sa->i_proposals = ikev2_parse_sa_payload (ikep);
+ }
+ }
+ else if (payload == IKEV2_PAYLOAD_IDI) /* 35 */
+ {
+ ike_id_payload_header_t *id = (ike_id_payload_header_t *) ikep;
+
+ sa->i_id.type = id->id_type;
+ vec_free (sa->i_id.data);
+ vec_add (sa->i_id.data, id->payload, plen - sizeof (*id));
+
+ clib_warning ("received payload IDi, len %u id_type %u",
+ plen - sizeof (*id), id->id_type);
+ }
+ else if (payload == IKEV2_PAYLOAD_IDR) /* 36 */
+ {
+ ike_id_payload_header_t *id = (ike_id_payload_header_t *) ikep;
+
+ sa->r_id.type = id->id_type;
+ vec_free (sa->r_id.data);
+ vec_add (sa->r_id.data, id->payload, plen - sizeof (*id));
+
+ clib_warning ("received payload IDr len %u id_type %u",
+ plen - sizeof (*id), id->id_type);
+ }
+ else if (payload == IKEV2_PAYLOAD_AUTH) /* 39 */
+ {
+ ike_auth_payload_header_t *a = (ike_auth_payload_header_t *) ikep;
+
+ if (sa->is_initiator)
+ {
+ sa->r_auth.method = a->auth_method;
+ vec_free (sa->r_auth.data);
+ vec_add (sa->r_auth.data, a->payload, plen - sizeof (*a));
+ }
+ else
+ {
+ sa->i_auth.method = a->auth_method;
+ vec_free (sa->i_auth.data);
+ vec_add (sa->i_auth.data, a->payload, plen - sizeof (*a));
+ }
+
+ clib_warning ("received payload AUTH, len %u auth_type %u",
+ plen - sizeof (*a), a->auth_method);
+ }
+ else if (payload == IKEV2_PAYLOAD_NOTIFY) /* 41 */
+ {
+ ikev2_notify_t *n = ikev2_parse_notify_payload (ikep);
+ if (n->msg_type == IKEV2_NOTIFY_MSG_INITIAL_CONTACT)
+ {
+ sa->initial_contact = 1;
+ }
+ vec_free (n);
+ }
+ else if (payload == IKEV2_PAYLOAD_VENDOR) /* 43 */
+ {
+ ikev2_parse_vendor_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_TSI) /* 44 */
+ {
+ clib_warning ("received payload TSi, len %u",
+ plen - sizeof (*ikep));
+
+ vec_free (first_child_sa->tsi);
+ first_child_sa->tsi = ikev2_parse_ts_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_TSR) /* 45 */
+ {
+ clib_warning ("received payload TSr, len %u",
+ plen - sizeof (*ikep));
+
+ vec_free (first_child_sa->tsr);
+ first_child_sa->tsr = ikev2_parse_ts_payload (ikep);
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u data %u",
+ payload, ikep->flags, plen - 4,
+ format_hex_bytes, ikep->payload, plen - 4);
+
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ sa->unsupported_cp = payload;
+ return;
+ }
+ }
+
+ payload = ikep->nextpayload;
+ p += plen;
+ }
+
+cleanup_and_exit:
+ vec_free (plaintext);
+}
+
+static void
+ikev2_process_informational_req (vlib_main_t * vm, ikev2_sa_t * sa,
+ ike_header_t * ike)
+{
+ int p = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ u8 payload = ike->nextpayload;
+ u8 *plaintext = 0;
+
+ ike_payload_header_t *ikep;
+ u32 plen;
+
+ clib_warning ("ispi %lx rspi %lx nextpayload %x version %x "
+ "exchange %x flags %x msgid %x length %u",
+ clib_net_to_host_u64 (ike->ispi),
+ clib_net_to_host_u64 (ike->rspi),
+ payload, ike->version,
+ ike->exchange, ike->flags,
+ clib_net_to_host_u32 (ike->msgid), len);
+
+ plaintext = ikev2_decrypt_sk_payload (sa, ike, &payload);
+
+ if (!plaintext)
+ goto cleanup_and_exit;
+
+ /* process encrypted payload */
+ p = 0;
+ while (p < vec_len (plaintext) && payload != IKEV2_PAYLOAD_NONE)
+ {
+ ikep = (ike_payload_header_t *) & plaintext[p];
+ plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (ike_payload_header_t))
+ goto cleanup_and_exit;
+
+ if (payload == IKEV2_PAYLOAD_NOTIFY) /* 41 */
+ {
+ ikev2_notify_t *n = ikev2_parse_notify_payload (ikep);
+ if (n->msg_type == IKEV2_NOTIFY_MSG_AUTHENTICATION_FAILED)
+ ikev2_set_state (sa, IKEV2_STATE_AUTH_FAILED);
+ vec_free (n);
+ }
+ else if (payload == IKEV2_PAYLOAD_DELETE) /* 42 */
+ {
+ sa->del = ikev2_parse_delete_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_VENDOR) /* 43 */
+ {
+ ikev2_parse_vendor_payload (ikep);
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u data %u",
+ payload, ikep->flags, plen - 4,
+ format_hex_bytes, ikep->payload, plen - 4);
+
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ sa->unsupported_cp = payload;
+ return;
+ }
+ }
+
+ payload = ikep->nextpayload;
+ p += plen;
+ }
+
+cleanup_and_exit:
+ vec_free (plaintext);
+}
+
+static void
+ikev2_process_create_child_sa_req (vlib_main_t * vm, ikev2_sa_t * sa,
+ ike_header_t * ike)
+{
+ int p = 0;
+ u32 len = clib_net_to_host_u32 (ike->length);
+ u8 payload = ike->nextpayload;
+ u8 *plaintext = 0;
+ u8 rekeying = 0;
+ u8 nonce[IKEV2_NONCE_SIZE];
+
+ ike_payload_header_t *ikep;
+ u32 plen;
+ ikev2_notify_t *n = 0;
+ ikev2_ts_t *tsi = 0;
+ ikev2_ts_t *tsr = 0;
+ ikev2_sa_proposal_t *proposal = 0;
+ ikev2_child_sa_t *child_sa;
+
+ clib_warning ("ispi %lx rspi %lx nextpayload %x version %x "
+ "exchange %x flags %x msgid %x length %u",
+ clib_net_to_host_u64 (ike->ispi),
+ clib_net_to_host_u64 (ike->rspi),
+ payload, ike->version,
+ ike->exchange, ike->flags,
+ clib_net_to_host_u32 (ike->msgid), len);
+
+ plaintext = ikev2_decrypt_sk_payload (sa, ike, &payload);
+
+ if (!plaintext)
+ goto cleanup_and_exit;
+
+ /* process encrypted payload */
+ p = 0;
+ while (p < vec_len (plaintext) && payload != IKEV2_PAYLOAD_NONE)
+ {
+ ikep = (ike_payload_header_t *) & plaintext[p];
+ plen = clib_net_to_host_u16 (ikep->length);
+
+ if (plen < sizeof (ike_payload_header_t))
+ goto cleanup_and_exit;
+
+ else if (payload == IKEV2_PAYLOAD_SA)
+ {
+ proposal = ikev2_parse_sa_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_NOTIFY)
+ {
+ n = ikev2_parse_notify_payload (ikep);
+ if (n->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA)
+ {
+ rekeying = 1;
+ }
+ }
+ else if (payload == IKEV2_PAYLOAD_DELETE)
+ {
+ sa->del = ikev2_parse_delete_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_VENDOR)
+ {
+ ikev2_parse_vendor_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_NONCE)
+ {
+ clib_memcpy (nonce, ikep->payload, plen - sizeof (*ikep));
+ }
+ else if (payload == IKEV2_PAYLOAD_TSI)
+ {
+ tsi = ikev2_parse_ts_payload (ikep);
+ }
+ else if (payload == IKEV2_PAYLOAD_TSR)
+ {
+ tsr = ikev2_parse_ts_payload (ikep);
+ }
+ else
+ {
+ clib_warning ("unknown payload %u flags %x length %u data %u",
+ payload, ikep->flags, plen - 4,
+ format_hex_bytes, ikep->payload, plen - 4);
+
+ if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL)
+ {
+ sa->unsupported_cp = payload;
+ return;
+ }
+ }
+
+ payload = ikep->nextpayload;
+ p += plen;
+ }
+
+ if (sa->is_initiator && proposal->protocol_id == IKEV2_PROTOCOL_ESP)
+ {
+ ikev2_rekey_t *rekey = &sa->rekey[0];
+ rekey->protocol_id = proposal->protocol_id;
+ rekey->i_proposal =
+ ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
+ rekey->i_proposal->spi = rekey->spi;
+ rekey->r_proposal = proposal;
+ rekey->tsi = tsi;
+ rekey->tsr = tsr;
+ /* update Nr */
+ vec_free (sa->r_nonce);
+ vec_add (sa->r_nonce, nonce, IKEV2_NONCE_SIZE);
+ child_sa = ikev2_sa_get_child (sa, rekey->ispi, IKEV2_PROTOCOL_ESP, 1);
+ if (child_sa)
+ {
+ child_sa->rekey_retries = 0;
+ }
+ }
+ else if (rekeying)
+ {
+ ikev2_rekey_t *rekey;
+ child_sa = ikev2_sa_get_child (sa, n->spi, n->protocol_id, 1);
+ if (!child_sa)
+ {
+ clib_warning ("child SA spi %lx not found", n->spi);
+ goto cleanup_and_exit;
+ }
+ vec_add2 (sa->rekey, rekey, 1);
+ rekey->protocol_id = n->protocol_id;
+ rekey->spi = n->spi;
+ rekey->i_proposal = proposal;
+ rekey->r_proposal =
+ ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
+ rekey->tsi = tsi;
+ rekey->tsr = tsr;
+ /* update Ni */
+ vec_free (sa->i_nonce);
+ vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE);
+ /* generate new Nr */
+ vec_free (sa->r_nonce);
+ sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE);
+ RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ }
+
+cleanup_and_exit:
+ vec_free (plaintext);
+ vec_free (n);
+}
+
+static u8 *
+ikev2_sa_generate_authmsg (ikev2_sa_t * sa, int is_responder)
+{
+ u8 *authmsg = 0;
+ u8 *data;
+ u8 *nonce;
+ ikev2_id_t *id;
+ u8 *key;
+ u8 *packet_data;
+ ikev2_sa_transform_t *tr_prf;
+
+ tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ if (is_responder)
+ {
+ id = &sa->r_id;
+ key = sa->sk_pr;
+ nonce = sa->i_nonce;
+ packet_data = sa->last_sa_init_res_packet_data;
+ }
+ else
+ {
+ id = &sa->i_id;
+ key = sa->sk_pi;
+ nonce = sa->r_nonce;
+ packet_data = sa->last_sa_init_req_packet_data;
+ }
+
+ data = vec_new (u8, 4);
+ data[0] = id->type;
+ vec_append (data, id->data);
+
+ u8 *id_hash = ikev2_calc_prf (tr_prf, key, data);
+ vec_append (authmsg, packet_data);
+ vec_append (authmsg, nonce);
+ vec_append (authmsg, id_hash);
+ vec_free (id_hash);
+ vec_free (data);
+
+ return authmsg;
+}
+
+static int
+ikev2_ts_cmp (ikev2_ts_t * ts1, ikev2_ts_t * ts2)
+{
+ if (ts1->ts_type == ts2->ts_type && ts1->protocol_id == ts2->protocol_id &&
+ ts1->start_port == ts2->start_port && ts1->end_port == ts2->end_port &&
+ ts1->start_addr.as_u32 == ts2->start_addr.as_u32 &&
+ ts1->end_addr.as_u32 == ts2->end_addr.as_u32)
+ return 1;
+
+ return 0;
+}
+
+static void
+ikev2_sa_match_ts (ikev2_sa_t * sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p;
+ ikev2_ts_t *ts, *p_tsi, *p_tsr, *tsi = 0, *tsr = 0;
+ ikev2_id_t *id;
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, km->profiles, ({
+
+ if (sa->is_initiator)
+ {
+ p_tsi = &p->loc_ts;
+ p_tsr = &p->rem_ts;
+ id = &sa->r_id;
+ }
+ else
+ {
+ p_tsi = &p->rem_ts;
+ p_tsr = &p->loc_ts;
+ id = &sa->i_id;
+ }
+
+ /* check id */
+ if (p->rem_id.type != id->type ||
+ vec_len(p->rem_id.data) != vec_len(id->data) ||
+ memcmp(p->rem_id.data, id->data, vec_len(p->rem_id.data)))
+ continue;
+
+ vec_foreach(ts, sa->childs[0].tsi)
+ {
+ if (ikev2_ts_cmp(p_tsi, ts))
+ {
+ tsi = vec_dup(ts);
+ break;
+ }
+ }
+
+ vec_foreach(ts, sa->childs[0].tsr)
+ {
+ if (ikev2_ts_cmp(p_tsr, ts))
+ {
+ tsr = vec_dup(ts);
+ break;
+ }
+ }
+
+ break;
+ }));
+ /* *INDENT-ON* */
+
+ if (tsi && tsr)
+ {
+ vec_free (sa->childs[0].tsi);
+ vec_free (sa->childs[0].tsr);
+ sa->childs[0].tsi = tsi;
+ sa->childs[0].tsr = tsr;
+ }
+ else
+ {
+ vec_free (tsi);
+ vec_free (tsr);
+ ikev2_set_state (sa, IKEV2_STATE_TS_UNACCEPTABLE);
+ }
+}
+
+static void
+ikev2_sa_auth (ikev2_sa_t * sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p, *sel_p = 0;
+ u8 *authmsg, *key_pad, *psk = 0, *auth = 0;
+ ikev2_sa_transform_t *tr_prf;
+
+ tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ /* only shared key and rsa signature */
+ if (!(sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC ||
+ sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG))
+ {
+ clib_warning ("unsupported authentication method %u",
+ sa->i_auth.method);
+ ikev2_set_state (sa, IKEV2_STATE_AUTH_FAILED);
+ return;
+ }
+
+ key_pad = format (0, "%s", IKEV2_KEY_PAD);
+ authmsg = ikev2_sa_generate_authmsg (sa, sa->is_initiator);
+
+ ikev2_id_t *sa_id;
+ ikev2_auth_t *sa_auth;
+
+ if (sa->is_initiator)
+ {
+ sa_id = &sa->r_id;
+ sa_auth = &sa->r_auth;
+ }
+ else
+ {
+ sa_id = &sa->i_id;
+ sa_auth = &sa->i_auth;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, km->profiles, ({
+
+ /* check id */
+ if (p->rem_id.type != sa_id->type ||
+ vec_len(p->rem_id.data) != vec_len(sa_id->data) ||
+ memcmp(p->rem_id.data, sa_id->data, vec_len(p->rem_id.data)))
+ continue;
+
+ if (sa_auth->method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
+ {
+ if (!p->auth.data ||
+ p->auth.method != IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
+ continue;
+
+ psk = ikev2_calc_prf(tr_prf, p->auth.data, key_pad);
+ auth = ikev2_calc_prf(tr_prf, psk, authmsg);
+
+ if (!memcmp(auth, sa_auth->data, vec_len(sa_auth->data)))
+ {
+ ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED);
+ vec_free(auth);
+ sel_p = p;
+ break;
+ }
+
+ }
+ else if (sa_auth->method == IKEV2_AUTH_METHOD_RSA_SIG)
+ {
+ if (p->auth.method != IKEV2_AUTH_METHOD_RSA_SIG)
+ continue;
+
+ if (ikev2_verify_sign(p->auth.key, sa_auth->data, authmsg) == 1)
+ {
+ ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED);
+ sel_p = p;
+ break;
+ }
+ }
+
+ vec_free(auth);
+ vec_free(psk);
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (authmsg);
+
+ if (sa->state == IKEV2_STATE_AUTHENTICATED)
+ {
+ if (!sa->is_initiator)
+ {
+ vec_free (sa->r_id.data);
+ sa->r_id.data = vec_dup (sel_p->loc_id.data);
+ sa->r_id.type = sel_p->loc_id.type;
+
+ /* generate our auth data */
+ authmsg = ikev2_sa_generate_authmsg (sa, 1);
+ if (sel_p->auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
+ {
+ sa->r_auth.data = ikev2_calc_prf (tr_prf, psk, authmsg);
+ sa->r_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC;
+ }
+ else if (sel_p->auth.method == IKEV2_AUTH_METHOD_RSA_SIG)
+ {
+ sa->r_auth.data = ikev2_calc_sign (km->pkey, authmsg);
+ sa->r_auth.method = IKEV2_AUTH_METHOD_RSA_SIG;
+ }
+ vec_free (authmsg);
+
+ /* select transforms for 1st child sa */
+ ikev2_sa_free_proposal_vector (&sa->childs[0].r_proposals);
+ sa->childs[0].r_proposals =
+ ikev2_select_proposal (sa->childs[0].i_proposals,
+ IKEV2_PROTOCOL_ESP);
+ }
+ }
+ else
+ {
+ ikev2_set_state (sa, IKEV2_STATE_AUTH_FAILED);
+ }
+ vec_free (psk);
+ vec_free (key_pad);
+}
+
+
+static void
+ikev2_sa_auth_init (ikev2_sa_t * sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ u8 *authmsg, *key_pad, *psk = 0, *auth = 0;
+ ikev2_sa_transform_t *tr_prf;
+
+ tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ /* only shared key and rsa signature */
+ if (!(sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC ||
+ sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG))
+ {
+ clib_warning ("unsupported authentication method %u",
+ sa->i_auth.method);
+ ikev2_set_state (sa, IKEV2_STATE_AUTH_FAILED);
+ return;
+ }
+
+ key_pad = format (0, "%s", IKEV2_KEY_PAD);
+ authmsg = ikev2_sa_generate_authmsg (sa, 0);
+ psk = ikev2_calc_prf (tr_prf, sa->i_auth.data, key_pad);
+ auth = ikev2_calc_prf (tr_prf, psk, authmsg);
+
+
+ if (sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
+ {
+ sa->i_auth.data = ikev2_calc_prf (tr_prf, psk, authmsg);
+ sa->i_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC;
+ }
+ else if (sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG)
+ {
+ sa->i_auth.data = ikev2_calc_sign (km->pkey, authmsg);
+ sa->i_auth.method = IKEV2_AUTH_METHOD_RSA_SIG;
+ }
+
+ vec_free (psk);
+ vec_free (key_pad);
+ vec_free (auth);
+ vec_free (authmsg);
+}
+
+
+static int
+ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa,
+ ikev2_child_sa_t * child)
+{
+ ipsec_add_del_tunnel_args_t a;
+ ikev2_sa_transform_t *tr;
+ ikev2_sa_proposal_t *proposals;
+ u8 encr_type = 0;
+
+ if (!child->r_proposals)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ }
+
+ memset (&a, 0, sizeof (a));
+ a.is_add = 1;
+ if (sa->is_initiator)
+ {
+ a.local_ip.as_u32 = sa->iaddr.as_u32;
+ a.remote_ip.as_u32 = sa->raddr.as_u32;
+ proposals = child->i_proposals;
+ a.local_spi = child->r_proposals[0].spi;
+ a.remote_spi = child->i_proposals[0].spi;
+ }
+ else
+ {
+ a.local_ip.as_u32 = sa->raddr.as_u32;
+ a.remote_ip.as_u32 = sa->iaddr.as_u32;
+ proposals = child->r_proposals;
+ a.local_spi = child->i_proposals[0].spi;
+ a.remote_spi = child->r_proposals[0].spi;
+ }
+ a.anti_replay = 1;
+
+ tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_ESN);
+ if (tr)
+ a.esn = tr->esn_type;
+ else
+ a.esn = 0;
+
+ tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ {
+ if (tr->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC && tr->key_len)
+ {
+ switch (tr->key_len)
+ {
+ case 16:
+ encr_type = IPSEC_CRYPTO_ALG_AES_CBC_128;
+ break;
+ case 24:
+ encr_type = IPSEC_CRYPTO_ALG_AES_CBC_192;
+ break;
+ case 32:
+ encr_type = IPSEC_CRYPTO_ALG_AES_CBC_256;
+ break;
+ default:
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ break;
+ }
+ }
+ else
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ }
+ }
+ else
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ }
+
+ tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ {
+ if (tr->integ_type != IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ }
+ }
+ else
+ {
+ ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
+ return 1;
+ }
+
+ ikev2_calc_child_keys (sa, child);
+
+ u8 *loc_ckey, *rem_ckey, *loc_ikey, *rem_ikey;
+ if (sa->is_initiator)
+ {
+ loc_ikey = child->sk_ai;
+ rem_ikey = child->sk_ar;
+ loc_ckey = child->sk_ei;
+ rem_ckey = child->sk_er;
+ }
+ else
+ {
+ loc_ikey = child->sk_ar;
+ rem_ikey = child->sk_ai;
+ loc_ckey = child->sk_er;
+ rem_ckey = child->sk_ei;
+ }
+
+ a.integ_alg = IPSEC_INTEG_ALG_SHA1_96;
+ a.local_integ_key_len = vec_len (loc_ikey);
+ clib_memcpy (a.local_integ_key, loc_ikey, a.local_integ_key_len);
+ a.remote_integ_key_len = vec_len (rem_ikey);
+ clib_memcpy (a.remote_integ_key, rem_ikey, a.remote_integ_key_len);
+
+ a.crypto_alg = encr_type;
+ a.local_crypto_key_len = vec_len (loc_ckey);
+ clib_memcpy (a.local_crypto_key, loc_ckey, a.local_crypto_key_len);
+ a.remote_crypto_key_len = vec_len (rem_ckey);
+ clib_memcpy (a.remote_crypto_key, rem_ckey, a.remote_crypto_key_len);
+
+ if (sa->profile && sa->profile->lifetime)
+ {
+ child->time_to_expiration = vlib_time_now (vnm->vlib_main)
+ + sa->profile->lifetime;
+ if (sa->profile->lifetime_jitter)
+ {
+ // This is not much better than rand(3), which Coverity warns
+ // is unsuitable for security applications; random_u32 is
+ // however fast. If this perturbance to the expiration time
+ // needs to use a better RNG then we may need to use something
+ // like /dev/urandom which has significant overhead.
+ u32 rnd = (u32) (vlib_time_now (vnm->vlib_main) * 1e6);
+ rnd = random_u32 (&rnd);
+
+ child->time_to_expiration +=
+ 1 + (rnd % sa->profile->lifetime_jitter);
+ }
+ }
+
+ ipsec_add_del_tunnel_if (&a);
+
+ return 0;
+}
+
+static int
+ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa,
+ ikev2_child_sa_t * child)
+{
+ ipsec_add_del_tunnel_args_t a;
+
+ if (sa->is_initiator)
+ {
+ if (!vec_len (child->i_proposals))
+ return 0;
+
+ a.is_add = 0;
+ a.local_ip.as_u32 = sa->iaddr.as_u32;
+ a.remote_ip.as_u32 = sa->raddr.as_u32;
+ a.local_spi = child->r_proposals[0].spi;
+ a.remote_spi = child->i_proposals[0].spi;
+ }
+ else
+ {
+ if (!vec_len (child->r_proposals))
+ return 0;
+
+ a.is_add = 0;
+ a.local_ip.as_u32 = sa->raddr.as_u32;
+ a.remote_ip.as_u32 = sa->iaddr.as_u32;
+ a.local_spi = child->i_proposals[0].spi;
+ a.remote_spi = child->r_proposals[0].spi;
+ }
+
+ ipsec_add_del_tunnel_if (&a);
+ return 0;
+}
+
+static u32
+ikev2_generate_message (ikev2_sa_t * sa, ike_header_t * ike, void *user)
+{
+ v8 *integ = 0;
+ ike_payload_header_t *ph;
+ u16 plen;
+ u32 tlen = 0;
+
+ ikev2_sa_transform_t *tr_encr, *tr_integ;
+ tr_encr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ tr_integ =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+
+ ikev2_payload_chain_t *chain = 0;
+ ikev2_payload_new_chain (chain);
+
+ if (ike->exchange == IKEV2_EXCHANGE_SA_INIT)
+ {
+ if (sa->r_proposals == 0)
+ {
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_NO_PROPOSAL_CHOSEN, 0);
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ }
+ else if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE)
+ {
+ u8 *data = vec_new (u8, 2);
+ ikev2_sa_transform_t *tr_dh;
+ tr_dh =
+ ikev2_sa_get_td_for_type (sa->r_proposals,
+ IKEV2_TRANSFORM_TYPE_DH);
+ ASSERT (tr_dh && tr_dh->dh_type);
+
+ data[0] = (tr_dh->dh_type >> 8) & 0xff;
+ data[1] = (tr_dh->dh_type) & 0xff;
+
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD,
+ data);
+ vec_free (data);
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ }
+ else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE)
+ {
+ u8 *data = vec_new (u8, 1);
+
+ data[0] = sa->unsupported_cp;
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD,
+ data);
+ vec_free (data);
+ }
+ else
+ {
+ ike->rspi = clib_host_to_net_u64 (sa->rspi);
+ ikev2_payload_add_sa (chain, sa->r_proposals);
+ ikev2_payload_add_ke (chain, sa->dh_group, sa->r_dh_data);
+ ikev2_payload_add_nonce (chain, sa->r_nonce);
+ }
+ }
+ else if (ike->exchange == IKEV2_EXCHANGE_IKE_AUTH)
+ {
+ if (sa->state == IKEV2_STATE_AUTHENTICATED)
+ {
+ ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
+ ikev2_payload_add_auth (chain, &sa->r_auth);
+ ikev2_payload_add_sa (chain, sa->childs[0].r_proposals);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
+ }
+ else if (sa->state == IKEV2_STATE_AUTH_FAILED)
+ {
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_AUTHENTICATION_FAILED,
+ 0);
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ }
+ else if (sa->state == IKEV2_STATE_TS_UNACCEPTABLE)
+ {
+ ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_TS_UNACCEPTABLE,
+ 0);
+ ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
+ ikev2_payload_add_auth (chain, &sa->r_auth);
+ }
+ else if (sa->state == IKEV2_STATE_NO_PROPOSAL_CHOSEN)
+ {
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_NO_PROPOSAL_CHOSEN, 0);
+ ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
+ ikev2_payload_add_auth (chain, &sa->r_auth);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
+ }
+ else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE)
+ {
+ u8 *data = vec_new (u8, 1);
+
+ data[0] = sa->unsupported_cp;
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD,
+ data);
+ vec_free (data);
+ }
+ else if (sa->state == IKEV2_STATE_SA_INIT)
+ {
+ ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI);
+ ikev2_payload_add_auth (chain, &sa->i_auth);
+ ikev2_payload_add_sa (chain, sa->childs[0].i_proposals);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
+ }
+ else
+ {
+ ikev2_set_state (sa, IKEV2_STATE_DELETED);
+ goto done;
+ }
+ }
+ else if (ike->exchange == IKEV2_EXCHANGE_INFORMATIONAL)
+ {
+ /* if pending delete */
+ if (sa->del)
+ {
+ if (sa->del[0].protocol_id == IKEV2_PROTOCOL_IKE)
+ {
+ if (sa->is_initiator)
+ ikev2_payload_add_delete (chain, sa->del);
+
+ /* The response to a request that deletes the IKE SA is an empty
+ INFORMATIONAL response. */
+ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
+ }
+ /* The response to a request that deletes ESP or AH SAs will contain
+ delete payloads for the paired SAs going in the other direction. */
+ else
+ {
+ ikev2_payload_add_delete (chain, sa->del);
+ }
+ vec_free (sa->del);
+ sa->del = 0;
+ }
+ /* received N(AUTHENTICATION_FAILED) */
+ else if (sa->state == IKEV2_STATE_AUTH_FAILED)
+ {
+ ikev2_set_state (sa, IKEV2_STATE_DELETED);
+ goto done;
+ }
+ /* received unsupported critical payload */
+ else if (sa->unsupported_cp)
+ {
+ u8 *data = vec_new (u8, 1);
+
+ data[0] = sa->unsupported_cp;
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD,
+ data);
+ vec_free (data);
+ sa->unsupported_cp = 0;
+ }
+ /* else send empty response */
+ }
+ else if (ike->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA)
+ {
+ if (sa->is_initiator)
+ {
+
+ ikev2_sa_proposal_t *proposals = (ikev2_sa_proposal_t *) user;
+ ikev2_notify_t notify;
+ u8 *data = vec_new (u8, 4);
+ memset (&notify, 0, sizeof (notify));
+ notify.protocol_id = IKEV2_PROTOCOL_ESP;
+ notify.spi = sa->childs[0].i_proposals->spi;
+ *(u32 *) data = clib_host_to_net_u32 (notify.spi);
+
+ ikev2_payload_add_sa (chain, proposals);
+ ikev2_payload_add_nonce (chain, sa->i_nonce);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
+ ikev2_payload_add_notify_2 (chain, IKEV2_NOTIFY_MSG_REKEY_SA, data,
+ &notify);
+
+ vec_free (data);
+ }
+ else
+ {
+ if (sa->rekey)
+ {
+ ikev2_payload_add_sa (chain, sa->rekey[0].r_proposal);
+ ikev2_payload_add_nonce (chain, sa->r_nonce);
+ ikev2_payload_add_ts (chain, sa->rekey[0].tsi,
+ IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, sa->rekey[0].tsr,
+ IKEV2_PAYLOAD_TSR);
+ vec_del1 (sa->rekey, 0);
+ }
+ else if (sa->unsupported_cp)
+ {
+ u8 *data = vec_new (u8, 1);
+
+ data[0] = sa->unsupported_cp;
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD,
+ data);
+ vec_free (data);
+ sa->unsupported_cp = 0;
+ }
+ else
+ {
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_NO_ADDITIONAL_SAS,
+ 0);
+ }
+ }
+ }
+
+ /* IKEv2 header */
+ ike->version = IKE_VERSION_2;
+ ike->nextpayload = IKEV2_PAYLOAD_SK;
+ tlen = sizeof (*ike);
+ if (sa->is_initiator)
+ {
+ ike->flags = IKEV2_HDR_FLAG_INITIATOR;
+ sa->last_init_msg_id = clib_net_to_host_u32 (ike->msgid);
+ }
+ else
+ {
+ ike->flags = IKEV2_HDR_FLAG_RESPONSE;
+ }
+
+
+ if (ike->exchange == IKEV2_EXCHANGE_SA_INIT)
+ {
+ tlen += vec_len (chain->data);
+ ike->nextpayload = chain->first_payload_type;
+ ike->length = clib_host_to_net_u32 (tlen);
+ clib_memcpy (ike->payload, chain->data, vec_len (chain->data));
+
+ /* store whole IKE payload - needed for PSK auth */
+ vec_free (sa->last_sa_init_res_packet_data);
+ vec_add (sa->last_sa_init_res_packet_data, ike, tlen);
+ }
+ else
+ {
+
+ ikev2_payload_chain_add_padding (chain, tr_encr->block_size);
+
+ /* SK payload */
+ plen = sizeof (*ph);
+ ph = (ike_payload_header_t *) & ike->payload[0];
+ ph->nextpayload = chain->first_payload_type;
+ ph->flags = 0;
+ int enc_len = ikev2_encrypt_data (sa, chain->data, ph->payload);
+ plen += enc_len;
+
+ /* add space for hmac */
+ plen += tr_integ->key_trunc;
+ tlen += plen;
+
+ /* payload and total length */
+ ph->length = clib_host_to_net_u16 (plen);
+ ike->length = clib_host_to_net_u32 (tlen);
+
+ /* calc integrity data for whole packet except hash itself */
+ integ =
+ ikev2_calc_integr (tr_integ, sa->is_initiator ? sa->sk_ai : sa->sk_ar,
+ (u8 *) ike, tlen - tr_integ->key_trunc);
+
+ clib_memcpy (ike->payload + tlen - tr_integ->key_trunc - sizeof (*ike),
+ integ, tr_integ->key_trunc);
+
+ /* store whole IKE payload - needed for retransmit */
+ vec_free (sa->last_res_packet_data);
+ vec_add (sa->last_res_packet_data, ike, tlen);
+ }
+
+done:
+ ikev2_payload_destroy_chain (chain);
+ vec_free (integ);
+ return tlen;
+}
+
+static int
+ikev2_retransmit_sa_init (ike_header_t * ike,
+ ip4_address_t iaddr, ip4_address_t raddr)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_sa_t *sa;
+ u32 thread_index = vlib_get_thread_index ();
+
+ /* *INDENT-OFF* */
+ pool_foreach (sa, km->per_thread_data[thread_index].sas, ({
+ if (sa->ispi == clib_net_to_host_u64(ike->ispi) &&
+ sa->iaddr.as_u32 == iaddr.as_u32 &&
+ sa->raddr.as_u32 == raddr.as_u32)
+ {
+ int p = 0;
+ u32 len = clib_net_to_host_u32(ike->length);
+ u8 payload = ike->nextpayload;
+
+ while (p < len && payload!= IKEV2_PAYLOAD_NONE) {
+ ike_payload_header_t * ikep = (ike_payload_header_t *) &ike->payload[p];
+ u32 plen = clib_net_to_host_u16(ikep->length);
+
+ if (plen < sizeof(ike_payload_header_t))
+ return -1;
+
+ if (payload == IKEV2_PAYLOAD_NONCE)
+ {
+ if (!memcmp(sa->i_nonce, ikep->payload, plen - sizeof(*ikep)))
+ {
+ /* req is retransmit */
+ if (sa->state == IKEV2_STATE_SA_INIT)
+ {
+ ike_header_t * tmp;
+ tmp = (ike_header_t*)sa->last_sa_init_res_packet_data;
+ ike->ispi = tmp->ispi;
+ ike->rspi = tmp->rspi;
+ ike->nextpayload = tmp->nextpayload;
+ ike->version = tmp->version;
+ ike->exchange = tmp->exchange;
+ ike->flags = tmp->flags;
+ ike->msgid = tmp->msgid;
+ ike->length = tmp->length;
+ clib_memcpy(ike->payload, tmp->payload,
+ clib_net_to_host_u32(tmp->length) - sizeof(*ike));
+ clib_warning("IKE_SA_INIT retransmit from %U to %U",
+ format_ip4_address, &raddr,
+ format_ip4_address, &iaddr);
+ return 1;
+ }
+ /* else ignore req */
+ else
+ {
+ clib_warning("IKE_SA_INIT ignore from %U to %U",
+ format_ip4_address, &raddr,
+ format_ip4_address, &iaddr);
+ return -1;
+ }
+ }
+ }
+ payload = ikep->nextpayload;
+ p+=plen;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* req is not retransmit */
+ return 0;
+}
+
+static int
+ikev2_retransmit_resp (ikev2_sa_t * sa, ike_header_t * ike)
+{
+ u32 msg_id = clib_net_to_host_u32 (ike->msgid);
+
+ /* new req */
+ if (msg_id > sa->last_msg_id)
+ {
+ sa->last_msg_id = msg_id;
+ return 0;
+ }
+ /* retransmitted req */
+ else if (msg_id == sa->last_msg_id)
+ {
+ ike_header_t *tmp;
+ tmp = (ike_header_t *) sa->last_res_packet_data;
+ ike->ispi = tmp->ispi;
+ ike->rspi = tmp->rspi;
+ ike->nextpayload = tmp->nextpayload;
+ ike->version = tmp->version;
+ ike->exchange = tmp->exchange;
+ ike->flags = tmp->flags;
+ ike->msgid = tmp->msgid;
+ ike->length = tmp->length;
+ clib_memcpy (ike->payload, tmp->payload,
+ clib_net_to_host_u32 (tmp->length) - sizeof (*ike));
+ clib_warning ("IKE msgid %u retransmit from %U to %U",
+ msg_id,
+ format_ip4_address, &sa->raddr,
+ format_ip4_address, &sa->iaddr);
+ return 1;
+ }
+ /* old req ignore */
+ else
+ {
+ clib_warning ("IKE msgid %u req ignore from %U to %U",
+ msg_id,
+ format_ip4_address, &sa->raddr,
+ format_ip4_address, &sa->iaddr);
+ return -1;
+ }
+}
+
+static uword
+ikev2_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ ikev2_next_t next_index;
+ ikev2_main_t *km = &ikev2_main;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = IKEV2_NEXT_ERROR_DROP;
+ u32 sw_if_index0;
+ ip4_header_t *ip40;
+ udp_header_t *udp0;
+ ike_header_t *ike0;
+ ikev2_sa_t *sa0 = 0;
+ ikev2_sa_t sa; /* temporary store for SA */
+ int len = 0;
+ int r;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ike0 = vlib_buffer_get_current (b0);
+ vlib_buffer_advance (b0, -sizeof (*udp0));
+ udp0 = vlib_buffer_get_current (b0);
+ vlib_buffer_advance (b0, -sizeof (*ip40));
+ ip40 = vlib_buffer_get_current (b0);
+
+ if (ike0->version != IKE_VERSION_2)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_NOT_IKEV2, 1);
+ goto dispatch0;
+ }
+
+ if (ike0->exchange == IKEV2_EXCHANGE_SA_INIT)
+ {
+ sa0 = &sa;
+ memset (sa0, 0, sizeof (*sa0));
+
+ if (ike0->flags & IKEV2_HDR_FLAG_INITIATOR)
+ {
+ if (ike0->rspi == 0)
+ {
+ sa0->raddr.as_u32 = ip40->dst_address.as_u32;
+ sa0->iaddr.as_u32 = ip40->src_address.as_u32;
+
+ r = ikev2_retransmit_sa_init (ike0, sa0->iaddr,
+ sa0->raddr);
+ if (r == 1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_SA_INIT_RETRANSMIT,
+ 1);
+ len = clib_net_to_host_u32 (ike0->length);
+ goto dispatch0;
+ }
+ else if (r == -1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_SA_INIT_IGNORE,
+ 1);
+ goto dispatch0;
+ }
+
+ ikev2_process_sa_init_req (vm, sa0, ike0);
+
+ if (sa0->state == IKEV2_STATE_SA_INIT)
+ {
+ ikev2_sa_free_proposal_vector (&sa0->r_proposals);
+ sa0->r_proposals =
+ ikev2_select_proposal (sa0->i_proposals,
+ IKEV2_PROTOCOL_IKE);
+ ikev2_generate_sa_init_data (sa0);
+ }
+
+ if (sa0->state == IKEV2_STATE_SA_INIT
+ || sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE)
+ {
+ len = ikev2_generate_message (sa0, ike0, 0);
+ }
+
+ if (sa0->state == IKEV2_STATE_SA_INIT)
+ {
+ /* add SA to the pool */
+ pool_get (km->per_thread_data[thread_index].sas,
+ sa0);
+ clib_memcpy (sa0, &sa, sizeof (*sa0));
+ hash_set (km->
+ per_thread_data[thread_index].sa_by_rspi,
+ sa0->rspi,
+ sa0 -
+ km->per_thread_data[thread_index].sas);
+ }
+ else
+ {
+ ikev2_sa_free_all_vec (sa0);
+ }
+ }
+ }
+ else
+ {
+ ikev2_process_sa_init_resp (vm, sa0, ike0);
+
+ if (sa0->state == IKEV2_STATE_SA_INIT)
+ {
+ ike0->exchange = IKEV2_EXCHANGE_IKE_AUTH;
+ uword *p = hash_get (km->sa_by_ispi, ike0->ispi);
+ if (p)
+ {
+ ikev2_sa_t *sai =
+ pool_elt_at_index (km->sais, p[0]);
+
+ ikev2_complete_sa_data (sa0, sai);
+ ikev2_calc_keys (sa0);
+ ikev2_sa_auth_init (sa0);
+ len = ikev2_generate_message (sa0, ike0, 0);
+ }
+ }
+
+ if (sa0->state == IKEV2_STATE_SA_INIT)
+ {
+ /* add SA to the pool */
+ pool_get (km->per_thread_data[thread_index].sas, sa0);
+ clib_memcpy (sa0, &sa, sizeof (*sa0));
+ hash_set (km->per_thread_data[thread_index].sa_by_rspi,
+ sa0->rspi,
+ sa0 - km->per_thread_data[thread_index].sas);
+ }
+ else
+ {
+ ikev2_sa_free_all_vec (sa0);
+ }
+ }
+ }
+ else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH)
+ {
+ uword *p;
+ p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
+ clib_net_to_host_u64 (ike0->rspi));
+ if (p)
+ {
+ sa0 =
+ pool_elt_at_index (km->per_thread_data[thread_index].sas,
+ p[0]);
+
+ r = ikev2_retransmit_resp (sa0, ike0);
+ if (r == 1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_RETRANSMIT,
+ 1);
+ len = clib_net_to_host_u32 (ike0->length);
+ goto dispatch0;
+ }
+ else if (r == -1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_IGNORE,
+ 1);
+ goto dispatch0;
+ }
+
+ ikev2_process_auth_req (vm, sa0, ike0);
+ ikev2_sa_auth (sa0);
+ if (sa0->state == IKEV2_STATE_AUTHENTICATED)
+ {
+ ikev2_initial_contact_cleanup (sa0);
+ ikev2_sa_match_ts (sa0);
+ if (sa0->state != IKEV2_STATE_TS_UNACCEPTABLE)
+ ikev2_create_tunnel_interface (km->vnet_main, sa0,
+ &sa0->childs[0]);
+ }
+
+ if (sa0->is_initiator)
+ {
+ uword *p = hash_get (km->sa_by_ispi, ike0->ispi);
+ if (p)
+ {
+ ikev2_sa_t *sai =
+ pool_elt_at_index (km->sais, p[0]);
+ hash_unset (km->sa_by_ispi, sai->ispi);
+ ikev2_sa_free_all_vec (sai);
+ pool_put (km->sais, sai);
+ }
+ }
+ else
+ {
+ len = ikev2_generate_message (sa0, ike0, 0);
+ }
+ }
+ }
+ else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL)
+ {
+ uword *p;
+ p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
+ clib_net_to_host_u64 (ike0->rspi));
+ if (p)
+ {
+ sa0 =
+ pool_elt_at_index (km->per_thread_data[thread_index].sas,
+ p[0]);
+
+ r = ikev2_retransmit_resp (sa0, ike0);
+ if (r == 1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_RETRANSMIT,
+ 1);
+ len = clib_net_to_host_u32 (ike0->length);
+ goto dispatch0;
+ }
+ else if (r == -1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_IGNORE,
+ 1);
+ goto dispatch0;
+ }
+
+ ikev2_process_informational_req (vm, sa0, ike0);
+ if (sa0->del)
+ {
+ if (sa0->del[0].protocol_id != IKEV2_PROTOCOL_IKE)
+ {
+ ikev2_delete_t *d, *tmp, *resp = 0;
+ vec_foreach (d, sa0->del)
+ {
+ ikev2_child_sa_t *ch_sa;
+ ch_sa = ikev2_sa_get_child (sa0, d->spi,
+ d->protocol_id,
+ !sa0->is_initiator);
+ if (ch_sa)
+ {
+ ikev2_delete_tunnel_interface (km->vnet_main,
+ sa0, ch_sa);
+ if (!sa0->is_initiator)
+ {
+ vec_add2 (resp, tmp, 1);
+ tmp->protocol_id = d->protocol_id;
+ tmp->spi = ch_sa->r_proposals[0].spi;
+ }
+ ikev2_sa_del_child_sa (sa0, ch_sa);
+ }
+ }
+ if (!sa0->is_initiator)
+ {
+ vec_free (sa0->del);
+ sa0->del = resp;
+ }
+ }
+ }
+ if (!sa0->is_initiator)
+ {
+ len = ikev2_generate_message (sa0, ike0, 0);
+ }
+ }
+ }
+ else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA)
+ {
+ uword *p;
+ p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
+ clib_net_to_host_u64 (ike0->rspi));
+ if (p)
+ {
+ sa0 =
+ pool_elt_at_index (km->per_thread_data[thread_index].sas,
+ p[0]);
+
+ r = ikev2_retransmit_resp (sa0, ike0);
+ if (r == 1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_RETRANSMIT,
+ 1);
+ len = clib_net_to_host_u32 (ike0->length);
+ goto dispatch0;
+ }
+ else if (r == -1)
+ {
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_IKE_REQ_IGNORE,
+ 1);
+ goto dispatch0;
+ }
+
+ ikev2_process_create_child_sa_req (vm, sa0, ike0);
+ if (sa0->rekey)
+ {
+ if (sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE)
+ {
+ ikev2_child_sa_t *child;
+ vec_add2 (sa0->childs, child, 1);
+ child->r_proposals = sa0->rekey[0].r_proposal;
+ child->i_proposals = sa0->rekey[0].i_proposal;
+ child->tsi = sa0->rekey[0].tsi;
+ child->tsr = sa0->rekey[0].tsr;
+ ikev2_create_tunnel_interface (km->vnet_main, sa0,
+ child);
+ }
+ if (sa0->is_initiator)
+ {
+ vec_del1 (sa0->rekey, 0);
+ }
+ else
+ {
+ len = ikev2_generate_message (sa0, ike0, 0);
+ }
+ }
+ }
+ }
+ else
+ {
+ clib_warning ("IKEv2 exchange %u packet received from %U to %U",
+ ike0->exchange,
+ format_ip4_address, ip40->src_address.as_u8,
+ format_ip4_address, ip40->dst_address.as_u8);
+ }
+
+ dispatch0:
+ /* if we are sending packet back, rewrite headers */
+ if (len)
+ {
+ next0 = IKEV2_NEXT_IP4_LOOKUP;
+ if (sa0->is_initiator)
+ {
+ ip40->dst_address.as_u32 = sa0->raddr.as_u32;
+ ip40->src_address.as_u32 = sa0->iaddr.as_u32;
+ }
+ else
+ {
+ ip40->dst_address.as_u32 = sa0->iaddr.as_u32;
+ ip40->src_address.as_u32 = sa0->raddr.as_u32;
+ }
+ udp0->length =
+ clib_host_to_net_u16 (len + sizeof (udp_header_t));
+ udp0->checksum = 0;
+ b0->current_length =
+ len + sizeof (ip4_header_t) + sizeof (udp_header_t);
+ ip40->length = clib_host_to_net_u16 (b0->current_length);
+ ip40->checksum = ip4_header_checksum (ip40);
+ }
+ /* delete sa */
+ if (sa0 && (sa0->state == IKEV2_STATE_DELETED ||
+ sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE))
+ {
+ ikev2_child_sa_t *c;
+
+ vec_foreach (c, sa0->childs)
+ ikev2_delete_tunnel_interface (km->vnet_main, sa0, c);
+
+ ikev2_delete_sa (sa0);
+ }
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ikev2_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ikev2_node.index,
+ IKEV2_ERROR_PROCESSED, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ikev2_node,static) = {
+ .function = ikev2_node_fn,
+ .name = "ikev2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ikev2_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ikev2_error_strings),
+ .error_strings = ikev2_error_strings,
+
+ .n_next_nodes = IKEV2_N_NEXT,
+
+ .next_nodes = {
+ [IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IKEV2_NEXT_ERROR_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ikev2_set_initiator_proposals (vlib_main_t * vm, ikev2_sa_t * sa,
+ ikev2_transforms_set * ts,
+ ikev2_sa_proposal_t ** proposals, int is_ike)
+{
+ clib_error_t *r;
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_sa_proposal_t *proposal;
+ vec_add2 (*proposals, proposal, 1);
+ ikev2_sa_transform_t *td;
+ int error;
+
+ /* Encryption */
+ error = 1;
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type == IKEV2_TRANSFORM_TYPE_ENCR
+ && td->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC
+ && td->key_len == ts->crypto_key_size / 8)
+ {
+ u16 attr[2];
+ attr[0] = clib_host_to_net_u16 (14 | (1 << 15));
+ attr[1] = clib_host_to_net_u16 (td->key_len << 3);
+ vec_add (td->attrs, (u8 *) attr, 4);
+ vec_add1 (proposal->transforms, *td);
+ td->attrs = 0;
+
+ error = 0;
+ break;
+ }
+ }
+ if (error)
+ {
+ r = clib_error_return (0, "Unsupported algorithm");
+ return r;
+ }
+
+ /* Integrity */
+ error = 1;
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type == IKEV2_TRANSFORM_TYPE_INTEG
+ && td->integ_type == IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96)
+ {
+ vec_add1 (proposal->transforms, *td);
+ error = 0;
+ break;
+ }
+ }
+ if (error)
+ {
+ r = clib_error_return (0, "Unsupported algorithm");
+ return r;
+ }
+
+ /* PRF */
+ if (is_ike)
+ {
+ error = 1;
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type == IKEV2_TRANSFORM_TYPE_PRF
+ && td->prf_type == IKEV2_TRANSFORM_PRF_TYPE_PRF_HMAC_SHA1)
+ {
+ vec_add1 (proposal->transforms, *td);
+ error = 0;
+ break;
+ }
+ }
+ if (error)
+ {
+ r = clib_error_return (0, "Unsupported algorithm");
+ return r;
+ }
+ }
+
+ /* DH */
+ error = 1;
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type == IKEV2_TRANSFORM_TYPE_DH && td->dh_type == ts->dh_type)
+ {
+ vec_add1 (proposal->transforms, *td);
+ if (is_ike)
+ {
+ sa->dh_group = td->dh_type;
+ }
+ error = 0;
+ break;
+ }
+ }
+ if (error)
+ {
+ r = clib_error_return (0, "Unsupported algorithm");
+ return r;
+ }
+
+ if (!is_ike)
+ {
+ error = 1;
+ vec_foreach (td, km->supported_transforms)
+ {
+ if (td->type == IKEV2_TRANSFORM_TYPE_ESN)
+ {
+ vec_add1 (proposal->transforms, *td);
+ error = 0;
+ break;
+ }
+ }
+ if (error)
+ {
+ r = clib_error_return (0, "Unsupported algorithm");
+ return r;
+ }
+ }
+
+
+ return 0;
+}
+
+static ikev2_profile_t *
+ikev2_profile_index_by_name (u8 * name)
+{
+ ikev2_main_t *km = &ikev2_main;
+ uword *p;
+
+ p = mhash_get (&km->profile_index_by_name, name);
+ if (!p)
+ return 0;
+
+ return pool_elt_at_index (km->profiles, p[0]);
+}
+
+
+static void
+ikev2_send_ike (vlib_main_t * vm, ip4_address_t * src, ip4_address_t * dst,
+ u32 bi0, u32 len)
+{
+ ip4_header_t *ip40;
+ udp_header_t *udp0;
+ vlib_buffer_t *b0;
+ vlib_frame_t *f;
+ u32 *to_next;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_advance (b0, -sizeof (udp_header_t));
+ udp0 = vlib_buffer_get_current (b0);
+ vlib_buffer_advance (b0, -sizeof (ip4_header_t));
+ ip40 = vlib_buffer_get_current (b0);
+
+
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->tos = 0;
+ ip40->fragment_id = 0;
+ ip40->flags_and_fragment_offset = 0;
+ ip40->ttl = 0xff;
+ ip40->protocol = IP_PROTOCOL_UDP;
+ ip40->dst_address.as_u32 = dst->as_u32;
+ ip40->src_address.as_u32 = src->as_u32;
+ udp0->dst_port = clib_host_to_net_u16 (500);
+ udp0->src_port = clib_host_to_net_u16 (500);
+ udp0->length = clib_host_to_net_u16 (len + sizeof (udp_header_t));
+ udp0->checksum = 0;
+ b0->current_length = len + sizeof (ip4_header_t) + sizeof (udp_header_t);
+ ip40->length = clib_host_to_net_u16 (b0->current_length);
+ ip40->checksum = ip4_header_checksum (ip40);
+
+
+ /* send the request */
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+
+}
+
+static u32
+ikev2_get_new_ike_header_buff (vlib_main_t * vm, ike_header_t ** ike)
+{
+ u32 bi0;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ *ike = 0;
+ return 0;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ *ike = vlib_buffer_get_current (b0);
+ return bi0;
+}
+
+clib_error_t *
+ikev2_set_local_key (vlib_main_t * vm, u8 * file)
+{
+ ikev2_main_t *km = &ikev2_main;
+
+ km->pkey = ikev2_load_key_file (file);
+ if (km->pkey == NULL)
+ return clib_error_return (0, "load key '%s' failed", file);
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p;
+
+ if (is_add)
+ {
+ if (ikev2_profile_index_by_name (name))
+ return clib_error_return (0, "policy %v already exists", name);
+
+ pool_get (km->profiles, p);
+ memset (p, 0, sizeof (*p));
+ p->name = vec_dup (name);
+ p->responder.sw_if_index = ~0;
+ uword index = p - km->profiles;
+ mhash_set_mem (&km->profile_index_by_name, name, &index, 0);
+ }
+ else
+ {
+ p = ikev2_profile_index_by_name (name);
+ if (!p)
+ return clib_error_return (0, "policy %v does not exists", name);
+
+ vec_free (p->name);
+ pool_put (km->profiles, p);
+ mhash_unset (&km->profile_index_by_name, name, 0);
+ }
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_auth (vlib_main_t * vm, u8 * name, u8 auth_method,
+ u8 * auth_data, u8 data_hex_format)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+ vec_free (p->auth.data);
+ p->auth.method = auth_method;
+ p->auth.data = vec_dup (auth_data);
+ p->auth.hex = data_hex_format;
+
+ if (auth_method == IKEV2_AUTH_METHOD_RSA_SIG)
+ {
+ vec_add1 (p->auth.data, 0);
+ if (p->auth.key)
+ EVP_PKEY_free (p->auth.key);
+ p->auth.key = ikev2_load_cert_file (auth_data);
+ if (p->auth.key == NULL)
+ return clib_error_return (0, "load cert '%s' failed", auth_data);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_id (vlib_main_t * vm, u8 * name, u8 id_type, u8 * data,
+ int is_local)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ if (id_type > IKEV2_ID_TYPE_ID_RFC822_ADDR
+ && id_type < IKEV2_ID_TYPE_ID_KEY_ID)
+ {
+ r = clib_error_return (0, "unsupported identity type %U",
+ format_ikev2_id_type, id_type);
+ return r;
+ }
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ if (is_local)
+ {
+ vec_free (p->loc_id.data);
+ p->loc_id.type = id_type;
+ p->loc_id.data = vec_dup (data);
+ }
+ else
+ {
+ vec_free (p->rem_id.data);
+ p->rem_id.type = id_type;
+ p->rem_id.data = vec_dup (data);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_ts (vlib_main_t * vm, u8 * name, u8 protocol_id,
+ u16 start_port, u16 end_port, ip4_address_t start_addr,
+ ip4_address_t end_addr, int is_local)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ if (is_local)
+ {
+ p->loc_ts.start_addr.as_u32 = start_addr.as_u32;
+ p->loc_ts.end_addr.as_u32 = end_addr.as_u32;
+ p->loc_ts.start_port = start_port;
+ p->loc_ts.end_port = end_port;
+ p->loc_ts.protocol_id = protocol_id;
+ p->loc_ts.ts_type = 7;
+ }
+ else
+ {
+ p->rem_ts.start_addr.as_u32 = start_addr.as_u32;
+ p->rem_ts.end_addr.as_u32 = end_addr.as_u32;
+ p->rem_ts.start_port = start_port;
+ p->rem_ts.end_port = end_port;
+ p->rem_ts.protocol_id = protocol_id;
+ p->rem_ts.ts_type = 7;
+ }
+
+ return 0;
+}
+
+
+clib_error_t *
+ikev2_set_profile_responder (vlib_main_t * vm, u8 * name,
+ u32 sw_if_index, ip4_address_t ip4)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ p->responder.sw_if_index = sw_if_index;
+ p->responder.ip4 = ip4;
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name,
+ ikev2_transform_encr_type_t crypto_alg,
+ ikev2_transform_integ_type_t integ_alg,
+ ikev2_transform_dh_type_t dh_type,
+ u32 crypto_key_size)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ p->ike_ts.crypto_alg = crypto_alg;
+ p->ike_ts.integ_alg = integ_alg;
+ p->ike_ts.dh_type = dh_type;
+ p->ike_ts.crypto_key_size = crypto_key_size;
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_esp_transforms (vlib_main_t * vm, u8 * name,
+ ikev2_transform_encr_type_t crypto_alg,
+ ikev2_transform_integ_type_t integ_alg,
+ ikev2_transform_dh_type_t dh_type,
+ u32 crypto_key_size)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ p->esp_ts.crypto_alg = crypto_alg;
+ p->esp_ts.integ_alg = integ_alg;
+ p->esp_ts.dh_type = dh_type;
+ p->esp_ts.crypto_key_size = crypto_key_size;
+ return 0;
+}
+
+clib_error_t *
+ikev2_set_profile_sa_lifetime (vlib_main_t * vm, u8 * name,
+ u64 lifetime, u32 jitter, u32 handover,
+ u64 maxdata)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ p->lifetime = lifetime;
+ p->lifetime_jitter = jitter;
+ p->handover = handover;
+ p->lifetime_maxdata = maxdata;
+ return 0;
+}
+
+clib_error_t *
+ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name)
+{
+ ikev2_profile_t *p;
+ clib_error_t *r;
+ ip4_main_t *im = &ip4_main;
+ ikev2_main_t *km = &ikev2_main;
+
+ p = ikev2_profile_index_by_name (name);
+
+ if (!p)
+ {
+ r = clib_error_return (0, "unknown profile %v", name);
+ return r;
+ }
+
+ if (p->responder.sw_if_index == ~0 || p->responder.ip4.data_u32 == 0)
+ {
+ r = clib_error_return (0, "responder not set for profile %v", name);
+ return r;
+ }
+
+
+ /* Create the Initiator Request */
+ {
+ ike_header_t *ike0;
+ u32 bi0 = 0;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 if_add_index0;
+ int len = sizeof (ike_header_t);
+
+ /* Get own iface IP */
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[p->responder.sw_if_index];
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+
+ bi0 = ikev2_get_new_ike_header_buff (vm, &ike0);
+
+ /* Prepare the SA and the IKE payload */
+ ikev2_sa_t sa;
+ memset (&sa, 0, sizeof (ikev2_sa_t));
+ ikev2_payload_chain_t *chain = 0;
+ ikev2_payload_new_chain (chain);
+
+ /* Build the IKE proposal payload */
+ ikev2_sa_proposal_t *proposals = 0;
+ ikev2_set_initiator_proposals (vm, &sa, &p->ike_ts, &proposals, 1);
+ proposals[0].proposal_num = 1;
+ proposals[0].protocol_id = IKEV2_PROTOCOL_IKE;
+
+ /* Add and then cleanup proposal data */
+ ikev2_payload_add_sa (chain, proposals);
+ ikev2_sa_free_proposal_vector (&proposals);
+
+ sa.is_initiator = 1;
+ sa.profile = p;
+ sa.state = IKEV2_STATE_SA_INIT;
+ ikev2_generate_sa_init_data (&sa);
+ ikev2_payload_add_ke (chain, sa.dh_group, sa.i_dh_data);
+ ikev2_payload_add_nonce (chain, sa.i_nonce);
+
+ /* Build the child SA proposal */
+ vec_resize (sa.childs, 1);
+ ikev2_set_initiator_proposals (vm, &sa, &p->esp_ts,
+ &sa.childs[0].i_proposals, 0);
+ sa.childs[0].i_proposals[0].proposal_num = 1;
+ sa.childs[0].i_proposals[0].protocol_id = IKEV2_PROTOCOL_ESP;
+ RAND_bytes ((u8 *) & sa.childs[0].i_proposals[0].spi,
+ sizeof (sa.childs[0].i_proposals[0].spi));
+
+
+
+ /* Add NAT detection notification messages (mandatory) */
+ u8 nat_detection_source[8 + 8 + 4 + 2];
+ u8 *nat_detection_sha1 = vec_new (u8, 20);
+
+ u64 tmpspi = clib_host_to_net_u64 (sa.ispi);
+ clib_memcpy (&nat_detection_source[0], &tmpspi, sizeof (tmpspi));
+ tmpspi = clib_host_to_net_u64 (sa.rspi);
+ clib_memcpy (&nat_detection_source[8], &tmpspi, sizeof (tmpspi));
+ u16 tmpport = clib_host_to_net_u16 (500);
+ clib_memcpy (&nat_detection_source[8 + 8 + 4], &tmpport,
+ sizeof (tmpport));
+ u32 tmpip = clib_host_to_net_u32 (if_ip->as_u32);
+ clib_memcpy (&nat_detection_source[8 + 8], &tmpip, sizeof (tmpip));
+ SHA1 (nat_detection_source, sizeof (nat_detection_source),
+ nat_detection_sha1);
+ ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_NAT_DETECTION_SOURCE_IP,
+ nat_detection_sha1);
+ tmpip = clib_host_to_net_u32 (p->responder.ip4.as_u32);
+ clib_memcpy (&nat_detection_source[8 + 8], &tmpip, sizeof (tmpip));
+ SHA1 (nat_detection_source, sizeof (nat_detection_source),
+ nat_detection_sha1);
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_NAT_DETECTION_DESTINATION_IP,
+ nat_detection_sha1);
+ vec_free (nat_detection_sha1);
+
+ u8 *sig_hash_algo = vec_new (u8, 8);
+ u64 tmpsig = clib_host_to_net_u64 (0x0001000200030004);
+ clib_memcpy (sig_hash_algo, &tmpsig, sizeof (tmpsig));
+ ikev2_payload_add_notify (chain,
+ IKEV2_NOTIFY_MSG_SIGNATURE_HASH_ALGORITHMS,
+ sig_hash_algo);
+ vec_free (sig_hash_algo);
+
+
+ /* Buffer update and bolierplate */
+ len += vec_len (chain->data);
+ ike0->nextpayload = chain->first_payload_type;
+ ike0->length = clib_host_to_net_u32 (len);
+ clib_memcpy (ike0->payload, chain->data, vec_len (chain->data));
+ ikev2_payload_destroy_chain (chain);
+
+ ike0->version = IKE_VERSION_2;
+ ike0->flags = IKEV2_HDR_FLAG_INITIATOR;
+ ike0->exchange = IKEV2_EXCHANGE_SA_INIT;
+ ike0->ispi = sa.ispi;
+
+ /* store whole IKE payload - needed for PSK auth */
+ vec_free (sa.last_sa_init_req_packet_data);
+ vec_add (sa.last_sa_init_req_packet_data, ike0, len);
+
+ /* add data to the SA then add it to the pool */
+ sa.iaddr.as_u32 = if_ip->as_u32;
+ sa.raddr.as_u32 = p->responder.ip4.as_u32;
+ sa.i_id.type = p->loc_id.type;
+ sa.i_id.data = vec_dup (p->loc_id.data);
+ sa.i_auth.method = p->auth.method;
+ sa.i_auth.hex = p->auth.hex;
+ sa.i_auth.data = vec_dup (p->auth.data);
+ sa.i_auth.key = vec_dup (p->auth.key);
+ vec_add (sa.childs[0].tsi, &p->loc_ts, 1);
+ vec_add (sa.childs[0].tsr, &p->rem_ts, 1);
+
+ /* add SA to the pool */
+ ikev2_sa_t *sa0 = 0;
+ pool_get (km->sais, sa0);
+ clib_memcpy (sa0, &sa, sizeof (*sa0));
+ hash_set (km->sa_by_ispi, sa0->ispi, sa0 - km->sais);
+
+ ikev2_send_ike (vm, if_ip, &p->responder.ip4, bi0, len);
+
+ }
+
+ return 0;
+}
+
+static void
+ikev2_delete_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa,
+ ikev2_child_sa_t * csa)
+{
+ /* Create the Initiator notification for child SA removal */
+ ikev2_main_t *km = &ikev2_main;
+ ike_header_t *ike0;
+ u32 bi0 = 0;
+ int len;
+
+ bi0 = ikev2_get_new_ike_header_buff (vm, &ike0);
+
+
+ ike0->exchange = IKEV2_EXCHANGE_INFORMATIONAL;
+ ike0->ispi = clib_host_to_net_u64 (sa->ispi);
+ ike0->rspi = clib_host_to_net_u64 (sa->rspi);
+ vec_resize (sa->del, 1);
+ sa->del->protocol_id = IKEV2_PROTOCOL_ESP;
+ sa->del->spi = csa->i_proposals->spi;
+ ike0->msgid = clib_host_to_net_u32 (sa->last_init_msg_id + 1);
+ sa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid);
+ len = ikev2_generate_message (sa, ike0, 0);
+
+ ikev2_send_ike (vm, &sa->iaddr, &sa->raddr, bi0, len);
+
+ /* delete local child SA */
+ ikev2_delete_tunnel_interface (km->vnet_main, sa, csa);
+ ikev2_sa_del_child_sa (sa, csa);
+}
+
+clib_error_t *
+ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi)
+{
+ clib_error_t *r;
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *fsa = 0;
+ ikev2_child_sa_t *fchild = 0;
+
+ /* Search for the child SA */
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ ikev2_sa_t *sa;
+ if (fchild)
+ break;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
+ if (fchild)
+ {
+ fsa = sa;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (!fchild || !fsa)
+ {
+ r = clib_error_return (0, "Child SA not found");
+ return r;
+ }
+ else
+ {
+ ikev2_delete_child_sa_internal (vm, fsa, fchild);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi)
+{
+ clib_error_t *r;
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *fsa = 0;
+ ikev2_main_per_thread_data_t *ftkm = 0;
+
+ /* Search for the IKE SA */
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ ikev2_sa_t *sa;
+ if (fsa)
+ break;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ if (sa->ispi == ispi)
+ {
+ fsa = sa;
+ ftkm = tkm;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (!fsa)
+ {
+ r = clib_error_return (0, "IKE SA not found");
+ return r;
+ }
+
+
+ /* Create the Initiator notification for IKE SA removal */
+ {
+ ike_header_t *ike0;
+ u32 bi0 = 0;
+ int len;
+
+ bi0 = ikev2_get_new_ike_header_buff (vm, &ike0);
+
+
+ ike0->exchange = IKEV2_EXCHANGE_INFORMATIONAL;
+ ike0->ispi = clib_host_to_net_u64 (fsa->ispi);
+ ike0->rspi = clib_host_to_net_u64 (fsa->rspi);
+ vec_resize (fsa->del, 1);
+ fsa->del->protocol_id = IKEV2_PROTOCOL_IKE;
+ fsa->del->spi = ispi;
+ ike0->msgid = clib_host_to_net_u32 (fsa->last_init_msg_id + 1);
+ fsa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid);
+ len = ikev2_generate_message (fsa, ike0, 0);
+
+ ikev2_send_ike (vm, &fsa->iaddr, &fsa->raddr, bi0, len);
+ }
+
+
+ /* delete local SA */
+ ikev2_child_sa_t *c;
+ vec_foreach (c, fsa->childs)
+ {
+ ikev2_delete_tunnel_interface (km->vnet_main, fsa, c);
+ ikev2_sa_del_child_sa (fsa, c);
+ }
+ ikev2_sa_free_all_vec (fsa);
+ uword *p = hash_get (ftkm->sa_by_rspi, fsa->rspi);
+ if (p)
+ {
+ hash_unset (ftkm->sa_by_rspi, fsa->rspi);
+ pool_put (ftkm->sas, fsa);
+ }
+
+
+ return 0;
+}
+
+static void
+ikev2_rekey_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa,
+ ikev2_child_sa_t * csa)
+{
+ /* Create the Initiator request for create child SA */
+ ike_header_t *ike0;
+ u32 bi0 = 0;
+ int len;
+
+
+ bi0 = ikev2_get_new_ike_header_buff (vm, &ike0);
+
+
+ ike0->version = IKE_VERSION_2;
+ ike0->flags = IKEV2_HDR_FLAG_INITIATOR;
+ ike0->exchange = IKEV2_EXCHANGE_CREATE_CHILD_SA;
+ ike0->ispi = clib_host_to_net_u64 (sa->ispi);
+ ike0->rspi = clib_host_to_net_u64 (sa->rspi);
+ ike0->msgid = clib_host_to_net_u32 (sa->last_init_msg_id + 1);
+ sa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid);
+
+ ikev2_rekey_t *rekey;
+ vec_add2 (sa->rekey, rekey, 1);
+ ikev2_sa_proposal_t *proposals = vec_dup (csa->i_proposals);
+
+ /*need new ispi */
+ RAND_bytes ((u8 *) & proposals[0].spi, sizeof (proposals[0].spi));
+ rekey->spi = proposals[0].spi;
+ rekey->ispi = csa->i_proposals->spi;
+ len = ikev2_generate_message (sa, ike0, proposals);
+ ikev2_send_ike (vm, &sa->iaddr, &sa->raddr, bi0, len);
+ vec_free (proposals);
+}
+
+clib_error_t *
+ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi)
+{
+ clib_error_t *r;
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *fsa = 0;
+ ikev2_child_sa_t *fchild = 0;
+
+ /* Search for the child SA */
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ ikev2_sa_t *sa;
+ if (fchild)
+ break;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
+ if (fchild)
+ {
+ fsa = sa;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (!fchild || !fsa)
+ {
+ r = clib_error_return (0, "Child SA not found");
+ return r;
+ }
+ else
+ {
+ ikev2_rekey_child_sa_internal (vm, fsa, fchild);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+ikev2_init (vlib_main_t * vm)
+{
+ ikev2_main_t *km = &ikev2_main;
+ clib_error_t *error;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int thread_id;
+
+ memset (km, 0, sizeof (ikev2_main_t));
+ km->vnet_main = vnet_get_main ();
+ km->vlib_main = vm;
+
+ ikev2_crypto_init (km);
+
+ mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword));
+
+ vec_validate (km->per_thread_data, tm->n_vlib_mains - 1);
+ for (thread_id = 0; thread_id < tm->n_vlib_mains - 1; thread_id++)
+ {
+ km->per_thread_data[thread_id].sa_by_rspi =
+ hash_create (0, sizeof (uword));
+ }
+
+ km->sa_by_ispi = hash_create (0, sizeof (uword));
+
+
+ if ((error = vlib_call_init_function (vm, ikev2_cli_init)))
+ return error;
+
+ udp_register_dst_port (vm, 500, ikev2_node.index, 1);
+
+ return 0;
+}
+
+
+static u8
+ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vlib_main_t *vm = km->vlib_main;
+ f64 now = vlib_time_now (vm);
+ u8 res = 0;
+
+ if (sa->is_initiator && sa->profile && csa->time_to_expiration
+ && now > csa->time_to_expiration)
+ {
+ if (!csa->is_expired || csa->rekey_retries > 0)
+ {
+ ikev2_rekey_child_sa_internal (vm, sa, csa);
+ csa->time_to_expiration = now + sa->profile->handover;
+ csa->is_expired = 1;
+ if (csa->rekey_retries == 0)
+ {
+ csa->rekey_retries = 5;
+ }
+ else if (csa->rekey_retries > 0)
+ {
+ csa->rekey_retries--;
+ clib_warning ("Rekeing Child SA 0x%x, retries left %d",
+ csa->i_proposals->spi, csa->rekey_retries);
+ if (csa->rekey_retries == 0)
+ {
+ csa->rekey_retries = -1;
+ }
+ }
+ res |= 1;
+ }
+ else
+ {
+ csa->time_to_expiration = 0;
+ ikev2_delete_child_sa_internal (vm, sa, csa);
+ res |= 1;
+ }
+ }
+
+ return res;
+}
+
+static void
+ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vlib_main_t *vm = km->vlib_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *fsa = 0;
+ ikev2_child_sa_t *fchild = 0;
+ f64 now = vlib_time_now (vm);
+
+ /* Search for the SA and child SA */
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ ikev2_sa_t *sa;
+ if (fchild)
+ break;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ fchild = ikev2_sa_get_child(sa, ipsec_sa->spi, IKEV2_PROTOCOL_ESP, 1);
+ if (fchild)
+ {
+ fsa = sa;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (fchild && fsa && fsa->profile && fsa->profile->lifetime_maxdata)
+ {
+ if (!fchild->is_expired
+ && ipsec_sa->total_data_size > fsa->profile->lifetime_maxdata)
+ {
+ fchild->time_to_expiration = now;
+ }
+ }
+}
+
+static vlib_node_registration_t ikev2_mngr_process_node;
+
+static uword
+ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ipsec_main_t *im = &ipsec_main;
+
+ while (1)
+ {
+ u8 req_sent = 0;
+ vlib_process_wait_for_event_or_clock (vm, 1);
+ vlib_process_get_events (vm, NULL);
+
+ /* process ike child sas */
+ ikev2_main_per_thread_data_t *tkm;
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ ikev2_sa_t *sa;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ ikev2_child_sa_t *c;
+ vec_foreach (c, sa->childs)
+ {
+ req_sent |= ikev2_mngr_process_child_sa(sa, c);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ /* process ipsec sas */
+ ipsec_sa_t *sa;
+ /* *INDENT-OFF* */
+ pool_foreach (sa, im->sad, ({
+ ikev2_mngr_process_ipsec_sa(sa);
+ }));
+ /* *INDENT-ON* */
+
+ if (req_sent)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 5);
+ vlib_process_get_events (vm, NULL);
+ req_sent = 0;
+ }
+
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = {
+ .function = ikev2_mngr_process_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name =
+ "ikev2-manager-process",
+};
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2.h b/src/vnet/ipsec/ikev2.h
new file mode 100644
index 00000000..84a8be53
--- /dev/null
+++ b/src/vnet/ipsec/ikev2.h
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ikev2_h__
+#define __included_ikev2_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/error.h>
+
+#define IKEV2_NONCE_SIZE 32
+
+#define IKEV2_KEY_PAD "Key Pad for IKEv2"
+
+typedef u8 v8;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u64 ispi;
+ u64 rspi;
+ u8 nextpayload;
+ u8 version;
+ u8 exchange;
+ u8 flags;
+ u32 msgid; u32 length; u8 payload[0];
+}) ike_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u16 dh_group;
+ u8 reserved[2]; u8 payload[0];}) ike_ke_payload_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 nextpayload;
+ u8 flags;
+ u16 length; u8 payload[0];
+}) ike_payload_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u8 auth_method;
+ u8 reserved[3];
+ u8 payload[0];
+}) ike_auth_payload_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u8 id_type;
+ u8 reserved[3]; u8 payload[0];
+}) ike_id_payload_header_t;
+/* *INDENT-ON* */
+
+#define IKE_VERSION_2 0x20
+
+#define IKEV2_EXCHANGE_SA_INIT 34
+#define IKEV2_EXCHANGE_IKE_AUTH 35
+#define IKEV2_EXCHANGE_CREATE_CHILD_SA 36
+#define IKEV2_EXCHANGE_INFORMATIONAL 37
+
+#define IKEV2_HDR_FLAG_INITIATOR (1<<3)
+#define IKEV2_HDR_FLAG_VERSION (1<<4)
+#define IKEV2_HDR_FLAG_RESPONSE (1<<5)
+
+#define IKEV2_PAYLOAD_FLAG_CRITICAL (1<<7)
+
+#define IKEV2_PAYLOAD_NONE 0
+#define IKEV2_PAYLOAD_SA 33
+#define IKEV2_PAYLOAD_KE 34
+#define IKEV2_PAYLOAD_IDI 35
+#define IKEV2_PAYLOAD_IDR 36
+#define IKEV2_PAYLOAD_AUTH 39
+#define IKEV2_PAYLOAD_NONCE 40
+#define IKEV2_PAYLOAD_NOTIFY 41
+#define IKEV2_PAYLOAD_DELETE 42
+#define IKEV2_PAYLOAD_VENDOR 43
+#define IKEV2_PAYLOAD_TSI 44
+#define IKEV2_PAYLOAD_TSR 45
+#define IKEV2_PAYLOAD_SK 46
+
+typedef enum
+{
+ IKEV2_PROTOCOL_IKE = 1,
+ IKEV2_PROTOCOL_AH = 2,
+ IKEV2_PROTOCOL_ESP = 3,
+} ikev2_protocol_id_t;
+
+#define foreach_ikev2_notify_msg_type \
+ _( 0, NONE) \
+ _( 1, UNSUPPORTED_CRITICAL_PAYLOAD) \
+ _( 4, INVALID_IKE_SPI) \
+ _( 5, INVALID_MAJOR_VERSION) \
+ _( 7, INVALID_SYNTAX) \
+ _( 8, INVALID_MESSAGE_ID) \
+ _( 11, INVALID_SPI) \
+ _( 14, NO_PROPOSAL_CHOSEN) \
+ _( 17, INVALID_KE_PAYLOAD) \
+ _( 24, AUTHENTICATION_FAILED) \
+ _( 34, SINGLE_PAIR_REQUIRED) \
+ _( 35, NO_ADDITIONAL_SAS) \
+ _( 36, INTERNAL_ADDRESS_FAILURE) \
+ _( 37, FAILED_CP_REQUIRED) \
+ _( 38, TS_UNACCEPTABLE) \
+ _( 39, INVALID_SELECTORS) \
+ _( 40, UNACCEPTABLE_ADDRESSES) \
+ _( 41, UNEXPECTED_NAT_DETECTED) \
+ _( 42, USE_ASSIGNED_HoA) \
+ _( 43, TEMPORARY_FAILURE) \
+ _( 44, CHILD_SA_NOT_FOUND) \
+ _( 45, INVALID_GROUP_ID) \
+ _( 46, AUTHORIZATION_FAILED) \
+ _(16384, INITIAL_CONTACT) \
+ _(16385, SET_WINDOW_SIZE) \
+ _(16386, ADDITIONAL_TS_POSSIBLE) \
+ _(16387, IPCOMP_SUPPORTED) \
+ _(16388, NAT_DETECTION_SOURCE_IP) \
+ _(16389, NAT_DETECTION_DESTINATION_IP) \
+ _(16390, COOKIE) \
+ _(16391, USE_TRANSPORT_MODE) \
+ _(16392, HTTP_CERT_LOOKUP_SUPPORTED) \
+ _(16393, REKEY_SA) \
+ _(16394, ESP_TFC_PADDING_NOT_SUPPORTED) \
+ _(16395, NON_FIRST_FRAGMENTS_ALSO) \
+ _(16396, MOBIKE_SUPPORTED) \
+ _(16397, ADDITIONAL_IP4_ADDRESS) \
+ _(16398, ADDITIONAL_IP6_ADDRESS) \
+ _(16399, NO_ADDITIONAL_ADDRESSES) \
+ _(16400, UPDATE_SA_ADDRESSES) \
+ _(16401, COOKIE2) \
+ _(16402, NO_NATS_ALLOWED) \
+ _(16403, AUTH_LIFETIME) \
+ _(16404, MULTIPLE_AUTH_SUPPORTED) \
+ _(16405, ANOTHER_AUTH_FOLLOWS) \
+ _(16406, REDIRECT_SUPPORTED) \
+ _(16407, REDIRECT) \
+ _(16408, REDIRECTED_FROM) \
+ _(16409, TICKET_LT_OPAQUE) \
+ _(16410, TICKET_REQUEST) \
+ _(16411, TICKET_ACK) \
+ _(16412, TICKET_NACK) \
+ _(16413, TICKET_OPAQUE) \
+ _(16414, LINK_ID) \
+ _(16415, USE_WESP_MODE) \
+ _(16416, ROHC_SUPPORTED) \
+ _(16417, EAP_ONLY_AUTHENTICATION) \
+ _(16418, CHILDLESS_IKEV2_SUPPORTED) \
+ _(16419, QUICK_CRASH_DETECTION) \
+ _(16420, IKEV2_MESSAGE_ID_SYNC_SUPPORTED) \
+ _(16421, IPSEC_REPLAY_COUNTER_SYNC_SUPPORTED) \
+ _(16422, IKEV2_MESSAGE_ID_SYNC) \
+ _(16423, IPSEC_REPLAY_COUNTER_SYNC) \
+ _(16424, SECURE_PASSWORD_METHODS) \
+ _(16425, PSK_PERSIST) \
+ _(16426, PSK_CONFIRM) \
+ _(16427, ERX_SUPPORTED) \
+ _(16428, IFOM_CAPABILITY) \
+ _(16429, SENDER_REQUEST_ID) \
+ _(16430, IKEV2_FRAGMENTATION_SUPPORTED) \
+ _(16431, SIGNATURE_HASH_ALGORITHMS)
+
+
+typedef enum
+{
+#define _(v,f) IKEV2_NOTIFY_MSG_##f = v,
+ foreach_ikev2_notify_msg_type
+#undef _
+} ikev2_notify_msg_type_t;
+
+#define foreach_ikev2_transform_type \
+ _(0, UNDEFINED, "undefinded") \
+ _(1, ENCR, "encr") \
+ _(2, PRF, "prf") \
+ _(3, INTEG, "integ") \
+ _(4, DH, "dh-group") \
+ _(5, ESN, "esn")
+
+typedef enum
+{
+#define _(v,f,s) IKEV2_TRANSFORM_TYPE_##f = v,
+ foreach_ikev2_transform_type
+#undef _
+ IKEV2_TRANSFORM_NUM_TYPES
+} ikev2_transform_type_t;
+
+
+#define foreach_ikev2_transform_encr_type \
+ _(1 , DES_IV64, "des-iv64") \
+ _(2 , DES, "des") \
+ _(3 , 3DES, "3des") \
+ _(4 , RC5, "rc5") \
+ _(5 , IDEA, "idea") \
+ _(6 , CAST, "cast") \
+ _(7 , BLOWFISH, "blowfish") \
+ _(8 , 3IDEA, "3idea") \
+ _(9 , DES_IV32, "des-iv32") \
+ _(11, NULL, "null") \
+ _(12, AES_CBC, "aes-cbc") \
+ _(13, AES_CTR, "aes-ctr")
+
+typedef enum
+{
+#define _(v,f,str) IKEV2_TRANSFORM_ENCR_TYPE_##f = v,
+ foreach_ikev2_transform_encr_type
+#undef _
+} ikev2_transform_encr_type_t;
+
+#define foreach_ikev2_transform_prf_type \
+ _(1, PRF_HMAC_MD5, "hmac-md5") \
+ _(2, PRF_HMAC_SHA1, "hmac-sha1") \
+ _(3, PRF_MAC_TIGER, "mac-tiger") \
+ _(4, PRF_AES128_XCBC, "aes128-xcbc") \
+ _(5, PRF_HMAC_SHA2_256, "hmac-sha2-256") \
+ _(6, PRF_HMAC_SHA2_384, "hmac-sha2-384") \
+ _(7, PRF_HMAC_SHA2_512, "hmac-sha2-512") \
+ _(8, PRF_AES128_CMAC, "aes128-cmac")
+
+typedef enum
+{
+#define _(v,f,str) IKEV2_TRANSFORM_PRF_TYPE_##f = v,
+ foreach_ikev2_transform_prf_type
+#undef _
+} ikev2_transform_prf_type_t;
+
+#define foreach_ikev2_transform_integ_type \
+ _(0, NONE, "none") \
+ _(1, AUTH_HMAC_MD5_96, "md5-96") \
+ _(2, AUTH_HMAC_SHA1_96, "sha1-96") \
+ _(3, AUTH_DES_MAC, "des-mac") \
+ _(4, AUTH_KPDK_MD5, "kpdk-md5") \
+ _(5, AUTH_AES_XCBC_96, "aes-xcbc-96") \
+ _(6, AUTH_HMAC_MD5_128, "md5-128") \
+ _(7, AUTH_HMAC_SHA1_160, "sha1-160") \
+ _(8, AUTH_AES_CMAC_96, "cmac-96") \
+ _(9, AUTH_AES_128_GMAC, "aes-128-gmac") \
+ _(10, AUTH_AES_192_GMAC, "aes-192-gmac") \
+ _(11, AUTH_AES_256_GMAC, "aes-256-gmac") \
+ _(12, AUTH_HMAC_SHA2_256_128, "hmac-sha2-256-128") \
+ _(13, AUTH_HMAC_SHA2_384_192, "hmac-sha2-384-192") \
+ _(14, AUTH_HMAC_SHA2_512_256, "hmac-sha2-512-256")
+
+typedef enum
+{
+#define _(v,f, str) IKEV2_TRANSFORM_INTEG_TYPE_##f = v,
+ foreach_ikev2_transform_integ_type
+#undef _
+} ikev2_transform_integ_type_t;
+
+#if defined(OPENSSL_NO_CISCO_FECDH)
+#define foreach_ikev2_transform_dh_type \
+ _(0, NONE, "none") \
+ _(1, MODP_768, "modp-768") \
+ _(2, MODP_1024, "modp-1024") \
+ _(5, MODP_1536, "modp-1536") \
+ _(14, MODP_2048, "modp-2048") \
+ _(15, MODP_3072, "modp-3072") \
+ _(16, MODP_4096, "modp-4096") \
+ _(17, MODP_6144, "modp-6144") \
+ _(18, MODP_8192, "modp-8192") \
+ _(19, ECP_256, "ecp-256") \
+ _(20, ECP_384, "ecp-384") \
+ _(21, ECP_521, "ecp-521") \
+ _(22, MODP_1024_160, "modp-1024-160") \
+ _(23, MODP_2048_224, "modp-2048-224") \
+ _(24, MODP_2048_256, "modp-2048-256") \
+ _(25, ECP_192, "ecp-192") \
+ _(26, ECP_224, "ecp-224") \
+ _(27, BRAINPOOL_224, "brainpool-224") \
+ _(28, BRAINPOOL_256, "brainpool-256") \
+ _(29, BRAINPOOL_384, "brainpool-384") \
+ _(30, BRAINPOOL_512, "brainpool-512")
+#else
+#define foreach_ikev2_transform_dh_type \
+ _(0, NONE, "none") \
+ _(1, MODP_768, "modp-768") \
+ _(2, MODP_1024, "modp-1024") \
+ _(5, MODP_1536, "modp-1536") \
+ _(14, MODP_2048, "modp-2048") \
+ _(15, MODP_3072, "modp-3072") \
+ _(16, MODP_4096, "modp-4096") \
+ _(17, MODP_6144, "modp-6144") \
+ _(18, MODP_8192, "modp-8192") \
+ _(19, ECP_256, "ecp-256") \
+ _(20, ECP_384, "ecp-384") \
+ _(21, ECP_521, "ecp-521") \
+ _(22, MODP_1024_160, "modp-1024-160") \
+ _(23, MODP_2048_224, "modp-2048-224") \
+ _(24, MODP_2048_256, "modp-2048-256") \
+ _(25, ECP_192, "ecp-192")
+#endif
+
+typedef enum
+{
+#define _(v,f, str) IKEV2_TRANSFORM_DH_TYPE_##f = v,
+ foreach_ikev2_transform_dh_type
+#undef _
+} ikev2_transform_dh_type_t;
+
+#define foreach_ikev2_transform_esn_type \
+ _(0, NO_ESN, "no") \
+ _(1, ESN, "yes")
+
+typedef enum
+{
+#define _(v,f,str) IKEV2_TRANSFORM_ESN_TYPE_##f = v,
+ foreach_ikev2_transform_esn_type
+#undef _
+} ikev2_transform_esn_type_t;
+
+#define foreach_ikev2_auth_method \
+ _( 1, RSA_SIG, "rsa-sig") \
+ _( 2, SHARED_KEY_MIC, "shared-key-mic")
+
+typedef enum
+{
+#define _(v,f,s) IKEV2_AUTH_METHOD_##f = v,
+ foreach_ikev2_auth_method
+#undef _
+} ikev2_auth_method_t;
+
+#define foreach_ikev2_id_type \
+ _( 1, ID_IPV4_ADDR, "ip4-addr") \
+ _( 2, ID_FQDN, "fqdn") \
+ _( 3, ID_RFC822_ADDR, "rfc822") \
+ _( 5, ID_IPV6_ADDR, "ip6-addr") \
+ _( 9, ID_DER_ASN1_DN, "der-asn1-dn") \
+ _(10, ID_DER_ASN1_GN, "der-asn1-gn") \
+ _(11, ID_KEY_ID, "key-id")
+
+typedef enum
+{
+#define _(v,f,s) IKEV2_ID_TYPE_##f = v,
+ foreach_ikev2_id_type
+#undef _
+} ikev2_id_type_t;
+
+clib_error_t *ikev2_init (vlib_main_t * vm);
+clib_error_t *ikev2_set_local_key (vlib_main_t * vm, u8 * file);
+clib_error_t *ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add);
+clib_error_t *ikev2_set_profile_auth (vlib_main_t * vm, u8 * name,
+ u8 auth_method, u8 * data,
+ u8 data_hex_format);
+clib_error_t *ikev2_set_profile_id (vlib_main_t * vm, u8 * name,
+ u8 id_type, u8 * data, int is_local);
+clib_error_t *ikev2_set_profile_ts (vlib_main_t * vm, u8 * name,
+ u8 protocol_id, u16 start_port,
+ u16 end_port, ip4_address_t start_addr,
+ ip4_address_t end_addr, int is_local);
+clib_error_t *ikev2_set_profile_responder (vlib_main_t * vm, u8 * name,
+ u32 sw_if_index,
+ ip4_address_t ip4);
+clib_error_t *ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name,
+ ikev2_transform_encr_type_t
+ crypto_alg,
+ ikev2_transform_integ_type_t
+ integ_alg,
+ ikev2_transform_dh_type_t
+ dh_type, u32 crypto_key_size);
+clib_error_t *ikev2_set_profile_esp_transforms (vlib_main_t * vm, u8 * name,
+ ikev2_transform_encr_type_t
+ crypto_alg,
+ ikev2_transform_integ_type_t
+ integ_alg,
+ ikev2_transform_dh_type_t
+ dh_type, u32 crypto_key_size);
+clib_error_t *ikev2_set_profile_sa_lifetime (vlib_main_t * vm, u8 * name,
+ u64 lifetime, u32 jitter,
+ u32 handover, u64 maxdata);
+clib_error_t *ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name);
+clib_error_t *ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi);
+clib_error_t *ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi);
+clib_error_t *ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi);
+
+/* ikev2_format.c */
+u8 *format_ikev2_auth_method (u8 * s, va_list * args);
+u8 *format_ikev2_id_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_type (u8 * s, va_list * args);
+u8 *format_ikev2_notify_msg_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_encr_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_prf_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_integ_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_dh_type (u8 * s, va_list * args);
+u8 *format_ikev2_transform_esn_type (u8 * s, va_list * args);
+u8 *format_ikev2_sa_transform (u8 * s, va_list * args);
+
+uword unformat_ikev2_auth_method (unformat_input_t * input, va_list * args);
+uword unformat_ikev2_id_type (unformat_input_t * input, va_list * args);
+uword unformat_ikev2_transform_type (unformat_input_t * input,
+ va_list * args);
+uword unformat_ikev2_transform_encr_type (unformat_input_t * input,
+ va_list * args);
+uword unformat_ikev2_transform_prf_type (unformat_input_t * input,
+ va_list * args);
+uword unformat_ikev2_transform_integ_type (unformat_input_t * input,
+ va_list * args);
+uword unformat_ikev2_transform_dh_type (unformat_input_t * input,
+ va_list * args);
+uword unformat_ikev2_transform_esn_type (unformat_input_t * input,
+ va_list * args);
+
+#endif /* __included_ikev2_h__ */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c
new file mode 100644
index 00000000..05ed4e60
--- /dev/null
+++ b/src/vnet/ipsec/ikev2_cli.c
@@ -0,0 +1,602 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/ikev2_priv.h>
+
+u8 *
+format_ikev2_id_type_and_data (u8 * s, va_list * args)
+{
+ ikev2_id_t *id = va_arg (*args, ikev2_id_t *);
+
+ if (id->type == 0 || vec_len (id->data) == 0)
+ return format (s, "none");
+
+ s = format (s, "%U", format_ikev2_id_type, id->type);
+
+ if (id->type == IKEV2_ID_TYPE_ID_FQDN ||
+ id->type == IKEV2_ID_TYPE_ID_RFC822_ADDR)
+ {
+ s = format (s, " %v", id->data);
+ }
+ else
+ {
+ s =
+ format (s, " %U", format_hex_bytes, &id->data,
+ (uword) (vec_len (id->data)));
+ }
+
+ return s;
+}
+
+
+static clib_error_t *
+show_ikev2_sa_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+ ikev2_ts_t *ts;
+ ikev2_child_sa_t *child;
+ ikev2_sa_transform_t *tr;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (sa, tkm->sas, ({
+ u8 * s = 0;
+ vlib_cli_output(vm, " iip %U ispi %lx rip %U rspi %lx",
+ format_ip4_address, &sa->iaddr, sa->ispi,
+ format_ip4_address, &sa->raddr, sa->rspi);
+
+ tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ tr = ikev2_sa_get_td_for_type(sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ vlib_cli_output(vm, " %v", s);
+ vec_free(s);
+
+ vlib_cli_output(vm, " nonce i:%U\n r:%U",
+ format_hex_bytes, sa->i_nonce, vec_len(sa->i_nonce),
+ format_hex_bytes, sa->r_nonce, vec_len(sa->r_nonce));
+
+ vlib_cli_output(vm, " SK_d %U",
+ format_hex_bytes, sa->sk_d, vec_len(sa->sk_d));
+ vlib_cli_output(vm, " SK_a i:%U\n r:%U",
+ format_hex_bytes, sa->sk_ai, vec_len(sa->sk_ai),
+ format_hex_bytes, sa->sk_ar, vec_len(sa->sk_ar));
+ vlib_cli_output(vm, " SK_e i:%U\n r:%U",
+ format_hex_bytes, sa->sk_ei, vec_len(sa->sk_ei),
+ format_hex_bytes, sa->sk_er, vec_len(sa->sk_er));
+ vlib_cli_output(vm, " SK_p i:%U\n r:%U",
+ format_hex_bytes, sa->sk_pi, vec_len(sa->sk_pi),
+ format_hex_bytes, sa->sk_pr, vec_len(sa->sk_pr));
+
+ vlib_cli_output(vm, " identifier (i) %U",
+ format_ikev2_id_type_and_data, &sa->i_id);
+ vlib_cli_output(vm, " identifier (r) %U",
+ format_ikev2_id_type_and_data, &sa->r_id);
+
+ vec_foreach(child, sa->childs)
+ {
+ vlib_cli_output(vm, " child sa %u:", child - sa->childs);
+
+ tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ tr = ikev2_sa_get_td_for_type(child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN);
+ s = format(s, "%U ", format_ikev2_sa_transform, tr);
+
+ vlib_cli_output(vm, " %v", s);
+ vec_free(s);
+
+ vlib_cli_output(vm, " spi(i) %lx spi(r) %lx",
+ child->i_proposals ? child->i_proposals[0].spi : 0,
+ child->r_proposals ? child->r_proposals[0].spi : 0);
+
+ vlib_cli_output(vm, " SK_e i:%U\n r:%U",
+ format_hex_bytes, child->sk_ei, vec_len(child->sk_ei),
+ format_hex_bytes, child->sk_er, vec_len(child->sk_er));
+ vlib_cli_output(vm, " SK_a i:%U\n r:%U",
+ format_hex_bytes, child->sk_ai, vec_len(child->sk_ai),
+ format_hex_bytes, child->sk_ar, vec_len(child->sk_ar));
+ vlib_cli_output(vm, " traffic selectors (i):");
+ vec_foreach(ts, child->tsi)
+ {
+ vlib_cli_output(vm, " %u type %u protocol_id %u addr "
+ "%U - %U port %u - %u",
+ ts - child->tsi,
+ ts->ts_type, ts->protocol_id,
+ format_ip4_address, &ts->start_addr,
+ format_ip4_address, &ts->end_addr,
+ clib_net_to_host_u16( ts->start_port),
+ clib_net_to_host_u16( ts->end_port));
+ }
+ vlib_cli_output(vm, " traffic selectors (r):");
+ vec_foreach(ts, child->tsr)
+ {
+ vlib_cli_output(vm, " %u type %u protocol_id %u addr "
+ "%U - %U port %u - %u",
+ ts - child->tsr,
+ ts->ts_type, ts->protocol_id,
+ format_ip4_address, &ts->start_addr,
+ format_ip4_address, &ts->end_addr,
+ clib_net_to_host_u16( ts->start_port),
+ clib_net_to_host_u16( ts->end_port));
+ }
+ }
+ vlib_cli_output(vm, "");
+ }));
+ /* *INDENT-ON* */
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ikev2_sa_command, static) = {
+ .path = "show ikev2 sa",
+ .short_help = "show ikev2 sa",
+ .function = show_ikev2_sa_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ikev2_profile_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *name = 0;
+ clib_error_t *r = 0;
+ u32 id_type;
+ u8 *data = 0;
+ u32 tmp1, tmp2, tmp3;
+ u64 tmp4, tmp5;
+ ip4_address_t ip4;
+ ip4_address_t end_addr;
+ u32 responder_sw_if_index = (u32) ~ 0;
+ ip4_address_t responder_ip4;
+ ikev2_transform_encr_type_t crypto_alg;
+ ikev2_transform_integ_type_t integ_alg;
+ ikev2_transform_dh_type_t dh_type;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add %U", unformat_token, valid_chars, &name))
+ {
+ r = ikev2_add_del_profile (vm, name, 1);
+ goto done;
+ }
+ else
+ if (unformat
+ (line_input, "del %U", unformat_token, valid_chars, &name))
+ {
+ r = ikev2_add_del_profile (vm, name, 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U auth shared-key-mic string %v",
+ unformat_token, valid_chars, &name, &data))
+ {
+ r =
+ ikev2_set_profile_auth (vm, name,
+ IKEV2_AUTH_METHOD_SHARED_KEY_MIC, data,
+ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U auth shared-key-mic hex %U",
+ unformat_token, valid_chars, &name,
+ unformat_hex_string, &data))
+ {
+ r =
+ ikev2_set_profile_auth (vm, name,
+ IKEV2_AUTH_METHOD_SHARED_KEY_MIC, data,
+ 1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U auth rsa-sig cert-file %v",
+ unformat_token, valid_chars, &name, &data))
+ {
+ r =
+ ikev2_set_profile_auth (vm, name, IKEV2_AUTH_METHOD_RSA_SIG, data,
+ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id local %U %U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type,
+ unformat_ip4_address, &ip4))
+ {
+ data = vec_new (u8, 4);
+ clib_memcpy (data, ip4.as_u8, 4);
+ r =
+ ikev2_set_profile_id (vm, name, (u8) id_type, data, /*local */ 1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id local %U 0x%U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type,
+ unformat_hex_string, &data))
+ {
+ r =
+ ikev2_set_profile_id (vm, name, (u8) id_type, data, /*local */ 1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id local %U %v",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type, &data))
+ {
+ r =
+ ikev2_set_profile_id (vm, name, (u8) id_type, data, /*local */ 1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id remote %U %U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type,
+ unformat_ip4_address, &ip4))
+ {
+ data = vec_new (u8, 4);
+ clib_memcpy (data, ip4.as_u8, 4);
+ r = ikev2_set_profile_id (vm, name, (u8) id_type, data, /*remote */
+ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id remote %U 0x%U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type,
+ unformat_hex_string, &data))
+ {
+ r = ikev2_set_profile_id (vm, name, (u8) id_type, data, /*remote */
+ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U id remote %U %v",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_id_type, &id_type, &data))
+ {
+ r = ikev2_set_profile_id (vm, name, (u8) id_type, data, /*remote */
+ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U traffic-selector local "
+ "ip-range %U - %U port-range %u - %u protocol %u",
+ unformat_token, valid_chars, &name,
+ unformat_ip4_address, &ip4,
+ unformat_ip4_address, &end_addr,
+ &tmp1, &tmp2, &tmp3))
+ {
+ r =
+ ikev2_set_profile_ts (vm, name, (u8) tmp3, (u16) tmp1, (u16) tmp2,
+ ip4, end_addr, /*local */ 1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U traffic-selector remote "
+ "ip-range %U - %U port-range %u - %u protocol %u",
+ unformat_token, valid_chars, &name,
+ unformat_ip4_address, &ip4,
+ unformat_ip4_address, &end_addr,
+ &tmp1, &tmp2, &tmp3))
+ {
+ r =
+ ikev2_set_profile_ts (vm, name, (u8) tmp3, (u16) tmp1, (u16) tmp2,
+ ip4, end_addr, /*remote */ 0);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U responder %U %U",
+ unformat_token, valid_chars, &name,
+ unformat_vnet_sw_interface, vnm,
+ &responder_sw_if_index, unformat_ip4_address,
+ &responder_ip4))
+ {
+ r =
+ ikev2_set_profile_responder (vm, name, responder_sw_if_index,
+ responder_ip4);
+ goto done;
+ }
+ else
+ if (unformat
+ (line_input,
+ "set %U ike-crypto-alg %U %u ike-integ-alg %U ike-dh %U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_transform_encr_type, &crypto_alg, &tmp1,
+ unformat_ikev2_transform_integ_type, &integ_alg,
+ unformat_ikev2_transform_dh_type, &dh_type))
+ {
+ r =
+ ikev2_set_profile_ike_transforms (vm, name, crypto_alg, integ_alg,
+ dh_type, tmp1);
+ goto done;
+ }
+ else
+ if (unformat
+ (line_input,
+ "set %U esp-crypto-alg %U %u esp-integ-alg %U esp-dh %U",
+ unformat_token, valid_chars, &name,
+ unformat_ikev2_transform_encr_type, &crypto_alg, &tmp1,
+ unformat_ikev2_transform_integ_type, &integ_alg,
+ unformat_ikev2_transform_dh_type, &dh_type))
+ {
+ r =
+ ikev2_set_profile_esp_transforms (vm, name, crypto_alg, integ_alg,
+ dh_type, tmp1);
+ goto done;
+ }
+ else if (unformat (line_input, "set %U sa-lifetime %lu %u %u %lu",
+ unformat_token, valid_chars, &name,
+ &tmp4, &tmp1, &tmp2, &tmp5))
+ {
+ r =
+ ikev2_set_profile_sa_lifetime (vm, name, tmp4, tmp1, tmp2, tmp5);
+ goto done;
+ }
+ else
+ break;
+ }
+
+ r = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+
+done:
+ vec_free (name);
+ vec_free (data);
+ unformat_free (line_input);
+ return r;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = {
+ .path = "ikev2 profile",
+ .short_help =
+ "ikev2 profile [add|del] <id>\n"
+ "ikev2 profile set <id> auth [rsa-sig|shared-key-mic] [cert-file|string|hex]"
+ " <data>\n"
+ "ikev2 profile set <id> id <local|remote> <type> <data>\n"
+ "ikev2 profile set <id> traffic-selector <local|remote> ip-range "
+ "<start-addr> - <end-addr> port-range <start-port> - <end-port> "
+ "protocol <protocol-number>\n"
+ "ikev2 profile set <id> responder <interface> <addr>\n"
+ "ikev2 profile set <id> ike-crypto-alg <crypto alg> <key size> ike-integ-alg <integ alg> ike-dh <dh type>\n"
+ "ikev2 profile set <id> esp-crypto-alg <crypto alg> <key size> esp-integ-alg <integ alg> esp-dh <dh type>\n"
+ "ikev2 profile set <id> sa-lifetime <seconds> <jitter> <handover> <max bytes>",
+ .function = ikev2_profile_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ikev2_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p;
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, km->profiles, ({
+ vlib_cli_output(vm, "profile %v", p->name);
+
+ if (p->auth.data)
+ {
+ if (p->auth.hex)
+ vlib_cli_output(vm, " auth-method %U auth data 0x%U",
+ format_ikev2_auth_method, p->auth.method,
+ format_hex_bytes, p->auth.data, vec_len(p->auth.data));
+ else
+ vlib_cli_output(vm, " auth-method %U auth data %v",
+ format_ikev2_auth_method, p->auth.method, p->auth.data);
+ }
+
+ if (p->loc_id.data)
+ {
+ if (p->loc_id.type == IKEV2_ID_TYPE_ID_IPV4_ADDR)
+ vlib_cli_output(vm, " local id-type %U data %U",
+ format_ikev2_id_type, p->loc_id.type,
+ format_ip4_address, p->loc_id.data);
+ else if (p->loc_id.type == IKEV2_ID_TYPE_ID_KEY_ID)
+ vlib_cli_output(vm, " local id-type %U data 0x%U",
+ format_ikev2_id_type, p->loc_id.type,
+ format_hex_bytes, p->loc_id.data,
+ vec_len(p->loc_id.data));
+ else
+ vlib_cli_output(vm, " local id-type %U data %v",
+ format_ikev2_id_type, p->loc_id.type, p->loc_id.data);
+ }
+
+ if (p->rem_id.data)
+ {
+ if (p->rem_id.type == IKEV2_ID_TYPE_ID_IPV4_ADDR)
+ vlib_cli_output(vm, " remote id-type %U data %U",
+ format_ikev2_id_type, p->rem_id.type,
+ format_ip4_address, p->rem_id.data);
+ else if (p->rem_id.type == IKEV2_ID_TYPE_ID_KEY_ID)
+ vlib_cli_output(vm, " remote id-type %U data 0x%U",
+ format_ikev2_id_type, p->rem_id.type,
+ format_hex_bytes, p->rem_id.data,
+ vec_len(p->rem_id.data));
+ else
+ vlib_cli_output(vm, " remote id-type %U data %v",
+ format_ikev2_id_type, p->rem_id.type, p->rem_id.data);
+ }
+
+ if (p->loc_ts.end_addr.as_u32)
+ vlib_cli_output(vm, " local traffic-selector addr %U - %U port %u - %u"
+ " protocol %u",
+ format_ip4_address, &p->loc_ts.start_addr,
+ format_ip4_address, &p->loc_ts.end_addr,
+ p->loc_ts.start_port, p->loc_ts.end_port,
+ p->loc_ts.protocol_id);
+
+ if (p->rem_ts.end_addr.as_u32)
+ vlib_cli_output(vm, " remote traffic-selector addr %U - %U port %u - %u"
+ " protocol %u",
+ format_ip4_address, &p->rem_ts.start_addr,
+ format_ip4_address, &p->rem_ts.end_addr,
+ p->rem_ts.start_port, p->rem_ts.end_port,
+ p->rem_ts.protocol_id);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ikev2_profile_command, static) = {
+ .path = "show ikev2 profile",
+ .short_help = "show ikev2 profile",
+ .function = show_ikev2_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ikev2_local_key_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *r = 0;
+ u8 *data = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%s", &data))
+ {
+ r = ikev2_set_local_key (vm, data);
+ goto done;
+ }
+ else
+ break;
+ }
+
+ r = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+
+done:
+ vec_free (data);
+ unformat_free (line_input);
+ return r;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ikev2_local_key_command, static) = {
+ .path = "set ikev2 local key",
+ .short_help =
+ "set ikev2 local key <file>",
+ .function = set_ikev2_local_key_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ikev2_initiate_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *r = 0;
+ u8 *name = 0;
+ u32 tmp1;
+ u64 tmp2;
+
+ const char *valid_chars = "a-zA-Z0-9_";
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "sa-init %U", unformat_token, valid_chars, &name))
+ {
+ r = ikev2_initiate_sa_init (vm, name);
+ goto done;
+ }
+ else if (unformat (line_input, "del-child-sa %x", &tmp1))
+ {
+ r = ikev2_initiate_delete_child_sa (vm, tmp1);
+ goto done;
+ }
+ else if (unformat (line_input, "del-sa %lx", &tmp2))
+ {
+ r = ikev2_initiate_delete_ike_sa (vm, tmp2);
+ goto done;
+ }
+ else if (unformat (line_input, "rekey-child-sa %x", &tmp1))
+ {
+ r = ikev2_initiate_rekey_child_sa (vm, tmp1);
+ goto done;
+ }
+ else
+ break;
+ }
+
+ r = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+
+done:
+ vec_free (name);
+ unformat_free (line_input);
+ return r;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ikev2_initiate_command, static) = {
+ .path = "ikev2 initiate",
+ .short_help =
+ "ikev2 initiate sa-init <profile id>\n"
+ "ikev2 initiate del-child-sa <child sa ispi>\n"
+ "ikev2 initiate del-sa <sa ispi>\n"
+ "ikev2 initiate rekey-child-sa <profile id> <child sa ispi>\n",
+ .function = ikev2_initiate_command_fn,
+};
+/* *INDENT-ON* */
+
+
+clib_error_t *
+ikev2_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ikev2_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c
new file mode 100644
index 00000000..ca56158f
--- /dev/null
+++ b/src/vnet/ipsec/ikev2_crypto.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/ikev2_priv.h>
+#include <openssl/obj_mac.h>
+#include <openssl/ec.h>
+#include <openssl/x509.h>
+#include <openssl/pem.h>
+#include <openssl/bn.h>
+
+/* from RFC7296 */
+static const char modp_dh_768_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A63A3620FFFFFFFFFFFFFFFF";
+static const char modp_dh_768_generator[] = "02";
+
+static const char modp_dh_1024_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381" "FFFFFFFFFFFFFFFF";
+static const char modp_dh_1024_generator[] = "02";
+
+/* from RFC3526 */
+static const char modp_dh_1536_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
+ "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
+ "83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA237327FFFFFFFFFFFFFFFF";
+static const char modp_dh_1536_generator[] = "02";
+
+static const char modp_dh_2048_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
+ "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
+ "83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
+ "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
+ "DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
+ "15728E5A8AACAA68FFFFFFFFFFFFFFFF";
+static const char modp_dh_2048_generator[] = "02";
+
+static const char modp_dh_3072_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
+ "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
+ "83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
+ "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
+ "DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
+ "15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
+ "ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
+ "ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
+ "F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
+ "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
+ "43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF";
+static const char modp_dh_3072_generator[] = "02";
+
+static const char modp_dh_4096_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
+ "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
+ "83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
+ "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
+ "DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
+ "15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
+ "ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
+ "ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
+ "F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
+ "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
+ "43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7"
+ "88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA"
+ "2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6"
+ "287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED"
+ "1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9"
+ "93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934063199" "FFFFFFFFFFFFFFFF";
+static const char modp_dh_4096_generator[] = "02";
+
+static const char modp_dh_6144_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E08"
+ "8A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B"
+ "302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9"
+ "A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE6"
+ "49286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8"
+ "FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C"
+ "180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF695581718"
+ "3995497CEA956AE515D2261898FA051015728E5A8AAAC42DAD33170D"
+ "04507A33A85521ABDF1CBA64ECFB850458DBEF0A8AEA71575D060C7D"
+ "B3970F85A6E1E4C7ABF5AE8CDB0933D71E8C94E04A25619DCEE3D226"
+ "1AD2EE6BF12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
+ "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB3143DB5BFC"
+ "E0FD108E4B82D120A92108011A723C12A787E6D788719A10BDBA5B26"
+ "99C327186AF4E23C1A946834B6150BDA2583E9CA2AD44CE8DBBBC2DB"
+ "04DE8EF92E8EFC141FBECAA6287C59474E6BC05D99B2964FA090C3A2"
+ "233BA186515BE7ED1F612970CEE2D7AFB81BDD762170481CD0069127"
+ "D5B05AA993B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492"
+ "36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BDF8FF9406"
+ "AD9E530EE5DB382F413001AEB06A53ED9027D831179727B0865A8918"
+ "DA3EDBEBCF9B14ED44CE6CBACED4BB1BDB7F1447E6CC254B33205151"
+ "2BD7AF426FB8F401378CD2BF5983CA01C64B92ECF032EA15D1721D03"
+ "F482D7CE6E74FEF6D55E702F46980C82B5A84031900B1C9E59E7C97F"
+ "BEC7E8F323A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA"
+ "CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE32806A1D58B"
+ "B7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55CDA56C9EC2EF29632"
+ "387FE8D76E3C0468043E8F663F4860EE12BF2D5B0B7474D6E694F91E"
+ "6DCC4024FFFFFFFFFFFFFFFF";
+static const char modp_dh_6144_generator[] = "02";
+
+static const char modp_dh_8192_prime[] =
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+ "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+ "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+ "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+ "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
+ "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
+ "83655D23DCA3AD961C62F356208552BB9ED529077096966D"
+ "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
+ "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
+ "DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
+ "15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
+ "ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
+ "ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
+ "F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
+ "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
+ "43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7"
+ "88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA"
+ "2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6"
+ "287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED"
+ "1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9"
+ "93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492"
+ "36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BD"
+ "F8FF9406AD9E530EE5DB382F413001AEB06A53ED9027D831"
+ "179727B0865A8918DA3EDBEBCF9B14ED44CE6CBACED4BB1B"
+ "DB7F1447E6CC254B332051512BD7AF426FB8F401378CD2BF"
+ "5983CA01C64B92ECF032EA15D1721D03F482D7CE6E74FEF6"
+ "D55E702F46980C82B5A84031900B1C9E59E7C97FBEC7E8F3"
+ "23A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA"
+ "CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE328"
+ "06A1D58BB7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55C"
+ "DA56C9EC2EF29632387FE8D76E3C0468043E8F663F4860EE"
+ "12BF2D5B0B7474D6E694F91E6DBE115974A3926F12FEE5E4"
+ "38777CB6A932DF8CD8BEC4D073B931BA3BC832B68D9DD300"
+ "741FA7BF8AFC47ED2576F6936BA424663AAB639C5AE4F568"
+ "3423B4742BF1C978238F16CBE39D652DE3FDB8BEFC848AD9"
+ "22222E04A4037C0713EB57A81A23F0C73473FC646CEA306B"
+ "4BCBC8862F8385DDFA9D4B7FA2C087E879683303ED5BDD3A"
+ "062B3CF5B3A278A66D2A13F83F44F82DDF310EE074AB6A36"
+ "4597E899A0255DC164F31CC50846851DF9AB48195DED7EA1"
+ "B1D510BD7EE74D73FAF36BC31ECFA268359046F4EB879F92"
+ "4009438B481C6CD7889A002ED5EE382BC9190DA6FC026E47"
+ "9558E4475677E9AA9E3050E2765694DFC81F56E880B96E71"
+ "60C980DD98EDD3DFFFFFFFFFFFFFFFFF";
+static const char modp_dh_8192_generator[] = "02";
+
+/* from RFC5114 */
+static const char modp_dh_1024_160_prime[] =
+ "B10B8F96A080E01DDE92DE5EAE5D54EC52C99FBCFB06A3C6"
+ "9A6A9DCA52D23B616073E28675A23D189838EF1E2EE652C0"
+ "13ECB4AEA906112324975C3CD49B83BFACCBDD7D90C4BD70"
+ "98488E9C219A73724EFFD6FAE5644738FAA31A4FF55BCCC0"
+ "A151AF5F0DC8B4BD45BF37DF365C1A65E68CFDA76D4DA708" "DF1FB2BC2E4A4371";
+static const char modp_dh_1024_160_generator[] =
+ "A4D1CBD5C3FD34126765A442EFB99905F8104DD258AC507F"
+ "D6406CFF14266D31266FEA1E5C41564B777E690F5504F213"
+ "160217B4B01B886A5E91547F9E2749F4D7FBD7D3B9A92EE1"
+ "909D0D2263F80A76A6A24C087A091F531DBF0A0169B6A28A"
+ "D662A4D18E73AFA32D779D5918D08BC8858F4DCEF97C2A24" "855E6EEB22B3B2E5";
+
+static const char modp_dh_2048_224_prime[] =
+ "AD107E1E9123A9D0D660FAA79559C51FA20D64E5683B9FD1"
+ "B54B1597B61D0A75E6FA141DF95A56DBAF9A3C407BA1DF15"
+ "EB3D688A309C180E1DE6B85A1274A0A66D3F8152AD6AC212"
+ "9037C9EDEFDA4DF8D91E8FEF55B7394B7AD5B7D0B6C12207"
+ "C9F98D11ED34DBF6C6BA0B2C8BBC27BE6A00E0A0B9C49708"
+ "B3BF8A317091883681286130BC8985DB1602E714415D9330"
+ "278273C7DE31EFDC7310F7121FD5A07415987D9ADC0A486D"
+ "CDF93ACC44328387315D75E198C641A480CD86A1B9E587E8"
+ "BE60E69CC928B2B9C52172E413042E9B23F10B0E16E79763"
+ "C9B53DCF4BA80A29E3FB73C16B8E75B97EF363E2FFA31F71"
+ "CF9DE5384E71B81C0AC4DFFE0C10E64F";
+static const char modp_dh_2048_224_generator[] =
+ "AC4032EF4F2D9AE39DF30B5C8FFDAC506CDEBE7B89998CAF"
+ "74866A08CFE4FFE3A6824A4E10B9A6F0DD921F01A70C4AFA"
+ "AB739D7700C29F52C57DB17C620A8652BE5E9001A8D66AD7"
+ "C17669101999024AF4D027275AC1348BB8A762D0521BC98A"
+ "E247150422EA1ED409939D54DA7460CDB5F6C6B250717CBE"
+ "F180EB34118E98D119529A45D6F834566E3025E316A330EF"
+ "BB77A86F0C1AB15B051AE3D428C8F8ACB70A8137150B8EEB"
+ "10E183EDD19963DDD9E263E4770589EF6AA21E7F5F2FF381"
+ "B539CCE3409D13CD566AFBB48D6C019181E1BCFE94B30269"
+ "EDFE72FE9B6AA4BD7B5A0F1C71CFFF4C19C418E1F6EC0179"
+ "81BC087F2A7065B384B890D3191F2BFA";
+
+static const char modp_dh_2048_256_prime[] =
+ "87A8E61DB4B6663CFFBBD19C651959998CEEF608660DD0F2"
+ "5D2CEED4435E3B00E00DF8F1D61957D4FAF7DF4561B2AA30"
+ "16C3D91134096FAA3BF4296D830E9A7C209E0C6497517ABD"
+ "5A8A9D306BCF67ED91F9E6725B4758C022E0B1EF4275BF7B"
+ "6C5BFC11D45F9088B941F54EB1E59BB8BC39A0BF12307F5C"
+ "4FDB70C581B23F76B63ACAE1CAA6B7902D52526735488A0E"
+ "F13C6D9A51BFA4AB3AD8347796524D8EF6A167B5A41825D9"
+ "67E144E5140564251CCACB83E6B486F6B3CA3F7971506026"
+ "C0B857F689962856DED4010ABD0BE621C3A3960A54E710C3"
+ "75F26375D7014103A4B54330C198AF126116D2276E11715F"
+ "693877FAD7EF09CADB094AE91E1A1597";
+static const char modp_dh_2048_256_generator[] =
+ "3FB32C9B73134D0B2E77506660EDBD484CA7B18F21EF2054"
+ "07F4793A1A0BA12510DBC15077BE463FFF4FED4AAC0BB555"
+ "BE3A6C1B0C6B47B1BC3773BF7E8C6F62901228F8C28CBB18"
+ "A55AE31341000A650196F931C77A57F2DDF463E5E9EC144B"
+ "777DE62AAAB8A8628AC376D282D6ED3864E67982428EBC83"
+ "1D14348F6F2F9193B5045AF2767164E1DFC967C1FB3F2E55"
+ "A4BD1BFFE83B9C80D052B985D182EA0ADB2A3B7313D3FE14"
+ "C8484B1E052588B9B7D2BBD2DF016199ECD06E1557CD0915"
+ "B3353BBB64E0EC377FD028370DF92B52C7891428CDC67EB6"
+ "184B523D1DB246C32F63078490F00EF8D647D148D4795451"
+ "5E2327CFEF98C582664B4C0F6CC41659";
+
+v8 *
+ikev2_calc_prf (ikev2_sa_transform_t * tr, v8 * key, v8 * data)
+{
+ HMAC_CTX ctx;
+ v8 *prf;
+ unsigned int len = 0;
+
+ prf = vec_new (u8, tr->key_trunc);
+ HMAC_CTX_init (&ctx);
+ HMAC_Init_ex (&ctx, key, vec_len (key), tr->md, NULL);
+ HMAC_Update (&ctx, data, vec_len (data));
+ HMAC_Final (&ctx, prf, &len);
+ HMAC_CTX_cleanup (&ctx);
+
+ ASSERT (len == tr->key_trunc);
+
+ return prf;
+}
+
+u8 *
+ikev2_calc_prfplus (ikev2_sa_transform_t * tr, u8 * key, u8 * seed, int len)
+{
+ v8 *t = 0, *s = 0, *tmp = 0, *ret = 0;
+ u8 x = 0;
+
+ /* prf+ (K,S) = T1 | T2 | T3 | T4 | ...
+
+ where:
+ T1 = prf (K, S | 0x01)
+ T2 = prf (K, T1 | S | 0x02)
+ T3 = prf (K, T2 | S | 0x03)
+ T4 = prf (K, T3 | S | 0x04)
+ */
+
+ while (vec_len (ret) < len && x < 255)
+ {
+ if (t)
+ {
+ vec_append (s, t);
+ vec_free (t);
+ }
+
+ vec_append (s, seed);
+ vec_add2 (s, tmp, 1);
+ *tmp = x + 1;
+ t = ikev2_calc_prf (tr, key, s);
+ vec_append (ret, t);
+ vec_free (s);
+ x++;
+ }
+
+ vec_free (t);
+
+ if (x == 255)
+ {
+ vec_free (ret);
+ }
+
+ return ret;
+}
+
+v8 *
+ikev2_calc_integr (ikev2_sa_transform_t * tr, v8 * key, u8 * data, int len)
+{
+ v8 *r;
+ HMAC_CTX hctx;
+ unsigned int l;
+
+ ASSERT (tr->type == IKEV2_TRANSFORM_TYPE_INTEG);
+
+ r = vec_new (u8, tr->key_len);
+
+ /* verify integrity of data */
+ HMAC_CTX_init (&hctx);
+ HMAC_Init (&hctx, key, vec_len (key), tr->md);
+ HMAC_Update (&hctx, (const u8 *) data, len);
+ HMAC_Final (&hctx, r, &l);
+ HMAC_CTX_cleanup (&hctx);
+
+ ASSERT (l == tr->key_len);
+
+ return r;
+}
+
+v8 *
+ikev2_decrypt_data (ikev2_sa_t * sa, u8 * data, int len)
+{
+ EVP_CIPHER_CTX ctx;
+ v8 *r;
+ int out_len = 0, block_size;
+ ikev2_sa_transform_t *tr_encr;
+ u8 *key = sa->is_initiator ? sa->sk_er : sa->sk_ei;
+
+ tr_encr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ block_size = tr_encr->block_size;
+
+ /* check if data is multiplier of cipher block size */
+ if (len % block_size)
+ {
+ clib_warning ("wrong data length");
+ return 0;
+ }
+
+ EVP_CIPHER_CTX_init (&ctx);
+ r = vec_new (u8, len - block_size);
+ EVP_DecryptInit_ex (&ctx, tr_encr->cipher, NULL, key, data);
+ EVP_DecryptUpdate (&ctx, r, &out_len, data + block_size, len - block_size);
+ EVP_DecryptFinal_ex (&ctx, r + out_len, &out_len);
+
+ /* remove padding */
+ _vec_len (r) -= r[vec_len (r) - 1] + 1;
+
+ EVP_CIPHER_CTX_cleanup (&ctx);
+ return r;
+}
+
+int
+ikev2_encrypt_data (ikev2_sa_t * sa, v8 * src, u8 * dst)
+{
+ EVP_CIPHER_CTX ctx;
+ int out_len;
+ int bs;
+ ikev2_sa_transform_t *tr_encr;
+ u8 *key = sa->is_initiator ? sa->sk_ei : sa->sk_er;
+
+ tr_encr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ bs = tr_encr->block_size;
+
+ /* generate IV */
+ RAND_bytes (dst, bs);
+
+ EVP_CIPHER_CTX_init (&ctx);
+
+ EVP_EncryptInit_ex (&ctx, tr_encr->cipher, NULL, key, dst /* dst */ );
+ EVP_EncryptUpdate (&ctx, dst + bs, &out_len, src, vec_len (src));
+
+ EVP_CIPHER_CTX_cleanup (&ctx);
+
+ ASSERT (vec_len (src) == out_len);
+
+ return out_len + bs;
+}
+
+void
+ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
+{
+ int r;
+
+ if (t->dh_group == IKEV2_DH_GROUP_MODP)
+ {
+ DH *dh = DH_new ();
+ BN_hex2bn (&dh->p, t->dh_p);
+ BN_hex2bn (&dh->g, t->dh_g);
+ DH_generate_key (dh);
+
+ if (sa->is_initiator)
+ {
+ sa->i_dh_data = vec_new (u8, t->key_len);
+ r = BN_bn2bin (dh->pub_key, sa->i_dh_data);
+ ASSERT (r == t->key_len);
+
+ sa->dh_private_key = vec_new (u8, t->key_len);
+ r = BN_bn2bin (dh->priv_key, sa->dh_private_key);
+ ASSERT (r == t->key_len);
+
+ }
+ else
+ {
+ sa->r_dh_data = vec_new (u8, t->key_len);
+ r = BN_bn2bin (dh->pub_key, sa->r_dh_data);
+ ASSERT (r == t->key_len);
+ BIGNUM *ex;
+ sa->dh_shared_key = vec_new (u8, t->key_len);
+ ex = BN_bin2bn (sa->i_dh_data, vec_len (sa->i_dh_data), NULL);
+ r = DH_compute_key (sa->dh_shared_key, ex, dh);
+ ASSERT (r == t->key_len);
+ BN_clear_free (ex);
+ }
+ DH_free (dh);
+ }
+ else if (t->dh_group == IKEV2_DH_GROUP_ECP)
+ {
+ EC_KEY *ec = EC_KEY_new_by_curve_name (t->nid);
+ ASSERT (ec);
+
+ EC_KEY_generate_key (ec);
+
+ const EC_POINT *r_point = EC_KEY_get0_public_key (ec);
+ const EC_GROUP *group = EC_KEY_get0_group (ec);
+ BIGNUM *x = NULL, *y = NULL;
+ BN_CTX *bn_ctx = BN_CTX_new ();
+ u16 x_off, y_off, len;
+ EC_POINT *i_point = EC_POINT_new (group);
+ EC_POINT *shared_point = EC_POINT_new (group);
+
+ x = BN_new ();
+ y = BN_new ();
+ len = t->key_len / 2;
+
+ EC_POINT_get_affine_coordinates_GFp (group, r_point, x, y, bn_ctx);
+
+ if (sa->is_initiator)
+ {
+ sa->i_dh_data = vec_new (u8, t->key_len);
+ x_off = len - BN_num_bytes (x);
+ memset (sa->i_dh_data, 0, x_off);
+ BN_bn2bin (x, sa->i_dh_data + x_off);
+ y_off = t->key_len - BN_num_bytes (y);
+ memset (sa->i_dh_data + len, 0, y_off - len);
+ BN_bn2bin (y, sa->i_dh_data + y_off);
+
+ const BIGNUM *prv = EC_KEY_get0_private_key (ec);
+ sa->dh_private_key = vec_new (u8, BN_num_bytes (prv));
+ r = BN_bn2bin (prv, sa->dh_private_key);
+ ASSERT (r == BN_num_bytes (prv));
+ }
+ else
+ {
+ sa->r_dh_data = vec_new (u8, t->key_len);
+ x_off = len - BN_num_bytes (x);
+ memset (sa->r_dh_data, 0, x_off);
+ BN_bn2bin (x, sa->r_dh_data + x_off);
+ y_off = t->key_len - BN_num_bytes (y);
+ memset (sa->r_dh_data + len, 0, y_off - len);
+ BN_bn2bin (y, sa->r_dh_data + y_off);
+
+ x = BN_bin2bn (sa->i_dh_data, len, x);
+ y = BN_bin2bn (sa->i_dh_data + len, len, y);
+ EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx);
+ sa->dh_shared_key = vec_new (u8, t->key_len);
+ EC_POINT_mul (group, shared_point, NULL, i_point,
+ EC_KEY_get0_private_key (ec), NULL);
+ EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y,
+ bn_ctx);
+ x_off = len - BN_num_bytes (x);
+ memset (sa->dh_shared_key, 0, x_off);
+ BN_bn2bin (x, sa->dh_shared_key + x_off);
+ y_off = t->key_len - BN_num_bytes (y);
+ memset (sa->dh_shared_key + len, 0, y_off - len);
+ BN_bn2bin (y, sa->dh_shared_key + y_off);
+ }
+
+ EC_KEY_free (ec);
+ BN_free (x);
+ BN_free (y);
+ BN_CTX_free (bn_ctx);
+ EC_POINT_free (i_point);
+ EC_POINT_free (shared_point);
+ }
+}
+
+void
+ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
+{
+ int r;
+
+ if (t->dh_group == IKEV2_DH_GROUP_MODP)
+ {
+ DH *dh = DH_new ();
+ BN_hex2bn (&dh->p, t->dh_p);
+ BN_hex2bn (&dh->g, t->dh_g);
+ dh->priv_key =
+ BN_bin2bn (sa->dh_private_key, vec_len (sa->dh_private_key), NULL);
+
+ BIGNUM *ex;
+ sa->dh_shared_key = vec_new (u8, t->key_len);
+ ex = BN_bin2bn (sa->r_dh_data, vec_len (sa->r_dh_data), NULL);
+ r = DH_compute_key (sa->dh_shared_key, ex, dh);
+ ASSERT (r == t->key_len);
+ BN_clear_free (ex);
+ DH_free (dh);
+ }
+ else if (t->dh_group == IKEV2_DH_GROUP_ECP)
+ {
+ EC_KEY *ec = EC_KEY_new_by_curve_name (t->nid);
+ ASSERT (ec);
+
+ const EC_GROUP *group = EC_KEY_get0_group (ec);
+ BIGNUM *x = NULL, *y = NULL;
+ BN_CTX *bn_ctx = BN_CTX_new ();
+ u16 x_off, y_off, len;
+ BIGNUM *prv;
+
+ prv =
+ BN_bin2bn (sa->dh_private_key, vec_len (sa->dh_private_key), NULL);
+ EC_KEY_set_private_key (ec, prv);
+
+ x = BN_new ();
+ y = BN_new ();
+ len = t->key_len / 2;
+
+ x = BN_bin2bn (sa->r_dh_data, len, x);
+ y = BN_bin2bn (sa->r_dh_data + len, len, y);
+ EC_POINT *r_point = EC_POINT_new (group);
+ EC_POINT_set_affine_coordinates_GFp (group, r_point, x, y, bn_ctx);
+ EC_KEY_set_public_key (ec, r_point);
+
+ EC_POINT *i_point = EC_POINT_new (group);
+ EC_POINT *shared_point = EC_POINT_new (group);
+
+ x = BN_bin2bn (sa->i_dh_data, len, x);
+ y = BN_bin2bn (sa->i_dh_data + len, len, y);
+ EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx);
+ EC_POINT_mul (group, shared_point, NULL, r_point,
+ EC_KEY_get0_private_key (ec), NULL);
+ EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y, bn_ctx);
+ sa->dh_shared_key = vec_new (u8, t->key_len);
+ x_off = len - BN_num_bytes (x);
+ memset (sa->dh_shared_key, 0, x_off);
+ BN_bn2bin (x, sa->dh_shared_key + x_off);
+ y_off = t->key_len - BN_num_bytes (y);
+ memset (sa->dh_shared_key + len, 0, y_off - len);
+ BN_bn2bin (y, sa->dh_shared_key + y_off);
+
+ EC_KEY_free (ec);
+ BN_free (x);
+ BN_free (y);
+ BN_free (prv);
+ BN_CTX_free (bn_ctx);
+ EC_POINT_free (i_point);
+ EC_POINT_free (r_point);
+ EC_POINT_free (shared_point);
+ }
+}
+
+int
+ikev2_verify_sign (EVP_PKEY * pkey, u8 * sigbuf, u8 * data)
+{
+ EVP_MD_CTX md_ctx;
+
+ EVP_VerifyInit (&md_ctx, EVP_sha1 ());
+ EVP_VerifyUpdate (&md_ctx, data, vec_len (data));
+
+ return EVP_VerifyFinal (&md_ctx, sigbuf, vec_len (sigbuf), pkey);
+}
+
+u8 *
+ikev2_calc_sign (EVP_PKEY * pkey, u8 * data)
+{
+ EVP_MD_CTX md_ctx;
+ unsigned int sig_len = 0;
+ u8 *sign;
+
+ EVP_SignInit (&md_ctx, EVP_sha1 ());
+ EVP_SignUpdate (&md_ctx, data, vec_len (data));
+ /* get sign len */
+ EVP_SignFinal (&md_ctx, NULL, &sig_len, pkey);
+ sign = vec_new (u8, sig_len);
+ /* calc sign */
+ EVP_SignFinal (&md_ctx, sign, &sig_len, pkey);
+
+ return sign;
+}
+
+EVP_PKEY *
+ikev2_load_cert_file (u8 * file)
+{
+ FILE *fp;
+ X509 *x509;
+ EVP_PKEY *pkey = NULL;
+
+ fp = fopen ((char *) file, "r");
+ if (!fp)
+ {
+ clib_warning ("open %s failed", file);
+ goto end;
+ }
+
+ x509 = PEM_read_X509 (fp, NULL, NULL, NULL);
+ fclose (fp);
+ if (x509 == NULL)
+ {
+ clib_warning ("read cert %s failed", file);
+ goto end;
+ }
+
+ pkey = X509_get_pubkey (x509);
+ if (pkey == NULL)
+ clib_warning ("get pubkey %s failed", file);
+
+end:
+ return pkey;
+}
+
+EVP_PKEY *
+ikev2_load_key_file (u8 * file)
+{
+ FILE *fp;
+ EVP_PKEY *pkey = NULL;
+
+ fp = fopen ((char *) file, "r");
+ if (!fp)
+ {
+ clib_warning ("open %s failed", file);
+ goto end;
+ }
+
+ pkey = PEM_read_PrivateKey (fp, NULL, NULL, NULL);
+ fclose (fp);
+ if (pkey == NULL)
+ clib_warning ("read %s failed", file);
+
+end:
+ return pkey;
+}
+
+void
+ikev2_crypto_init (ikev2_main_t * km)
+{
+ ikev2_sa_transform_t *tr;
+
+ /* vector of supported transforms - in order of preference */
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_ENCR;
+ tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC;
+ tr->key_len = 256 / 8;
+ tr->block_size = 128 / 8;
+ tr->cipher = EVP_aes_256_cbc ();
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_ENCR;
+ tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC;
+ tr->key_len = 192 / 8;
+ tr->block_size = 128 / 8;
+ tr->cipher = EVP_aes_192_cbc ();
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_ENCR;
+ tr->encr_type = IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC;
+ tr->key_len = 128 / 8;
+ tr->block_size = 128 / 8;
+ tr->cipher = EVP_aes_128_cbc ();
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_PRF;
+ tr->prf_type = IKEV2_TRANSFORM_PRF_TYPE_PRF_HMAC_SHA1;
+ tr->key_len = 160 / 8;
+ tr->key_trunc = 160 / 8;
+ tr->md = EVP_sha1 ();
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_INTEG;
+ tr->integ_type = IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96;
+ tr->key_len = 160 / 8;
+ tr->key_trunc = 96 / 8;
+ tr->md = EVP_sha1 ();
+
+#if defined(OPENSSL_NO_CISCO_FECDH)
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_512;
+ tr->key_len = (512 * 2) / 8;
+ tr->nid = NID_brainpoolP512r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_384;
+ tr->key_len = (384 * 2) / 8;
+ tr->nid = NID_brainpoolP384r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_256;
+ tr->key_len = (256 * 2) / 8;
+ tr->nid = NID_brainpoolP256r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_BRAINPOOL_224;
+ tr->key_len = (224 * 2) / 8;
+ tr->nid = NID_brainpoolP224r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_224;
+ tr->key_len = (224 * 2) / 8;
+ tr->nid = NID_secp224r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+#endif
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_521;
+ tr->key_len = (528 * 2) / 8;
+ tr->nid = NID_secp521r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_384;
+ tr->key_len = (384 * 2) / 8;
+ tr->nid = NID_secp384r1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_256;
+ tr->key_len = (256 * 2) / 8;
+ tr->nid = NID_X9_62_prime256v1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_ECP_192;
+ tr->key_len = (192 * 2) / 8;
+ tr->nid = NID_X9_62_prime192v1;
+ tr->dh_group = IKEV2_DH_GROUP_ECP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048_256;
+ tr->key_len = 2048 / 8;
+ tr->dh_p = (const char *) &modp_dh_2048_256_prime;
+ tr->dh_g = (const char *) &modp_dh_2048_256_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048_224;
+ tr->key_len = 2048 / 8;
+ tr->dh_p = (const char *) &modp_dh_2048_224_prime;
+ tr->dh_g = (const char *) &modp_dh_2048_224_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1024_160;
+ tr->key_len = 1024 / 8;
+ tr->dh_p = (const char *) &modp_dh_1024_160_prime;
+ tr->dh_g = (const char *) &modp_dh_1024_160_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_8192;
+ tr->key_len = 8192 / 8;
+ tr->dh_p = (const char *) &modp_dh_8192_prime;
+ tr->dh_g = (const char *) &modp_dh_8192_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_6144;
+ tr->key_len = 6144 / 8;
+ tr->dh_p = (const char *) &modp_dh_6144_prime;
+ tr->dh_g = (const char *) &modp_dh_6144_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_4096;
+ tr->key_len = 4096 / 8;
+ tr->dh_p = (const char *) &modp_dh_4096_prime;
+ tr->dh_g = (const char *) &modp_dh_4096_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_3072;
+ tr->key_len = 3072 / 8;
+ tr->dh_p = (const char *) &modp_dh_3072_prime;
+ tr->dh_g = (const char *) &modp_dh_3072_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_2048;
+ tr->key_len = 2048 / 8;
+ tr->dh_p = (const char *) &modp_dh_2048_prime;
+ tr->dh_g = (const char *) &modp_dh_2048_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1536;
+ tr->key_len = 1536 / 8;
+ tr->dh_p = (const char *) &modp_dh_1536_prime;
+ tr->dh_g = (const char *) &modp_dh_1536_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_1024;
+ tr->key_len = 1024 / 8;
+ tr->dh_p = (const char *) &modp_dh_1024_prime;
+ tr->dh_g = (const char *) &modp_dh_1024_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_DH;
+ tr->dh_type = IKEV2_TRANSFORM_DH_TYPE_MODP_768;
+ tr->key_len = 768 / 8;
+ tr->dh_p = (const char *) &modp_dh_768_prime;
+ tr->dh_g = (const char *) &modp_dh_768_generator;
+ tr->dh_group = IKEV2_DH_GROUP_MODP;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_ESN;
+ tr->esn_type = IKEV2_TRANSFORM_ESN_TYPE_ESN;
+
+ vec_add2 (km->supported_transforms, tr, 1);
+ tr->type = IKEV2_TRANSFORM_TYPE_ESN;
+ tr->esn_type = IKEV2_TRANSFORM_ESN_TYPE_NO_ESN;
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2_format.c b/src/vnet/ipsec/ikev2_format.c
new file mode 100644
index 00000000..4d7a007f
--- /dev/null
+++ b/src/vnet/ipsec/ikev2_format.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/ikev2_priv.h>
+
+u8 *
+format_ikev2_sa_transform (u8 * s, va_list * args)
+{
+ ikev2_sa_transform_t *tr = va_arg (*args, ikev2_sa_transform_t *);
+
+ if (!tr)
+ return s;
+
+ if (tr->type >= IKEV2_TRANSFORM_NUM_TYPES)
+ return s;
+
+ s = format (s, "%U:", format_ikev2_transform_type, tr->type);
+
+ switch (tr->type)
+ {
+ case IKEV2_TRANSFORM_TYPE_ENCR:
+ s = format (s, "%U", format_ikev2_transform_encr_type, tr->encr_type);
+ break;
+ case IKEV2_TRANSFORM_TYPE_PRF:
+ s = format (s, "%U", format_ikev2_transform_prf_type, tr->prf_type);
+ break;
+ case IKEV2_TRANSFORM_TYPE_INTEG:
+ s = format (s, "%U", format_ikev2_transform_integ_type, tr->integ_type);
+ break;
+ case IKEV2_TRANSFORM_TYPE_DH:
+ s = format (s, "%U", format_ikev2_transform_dh_type, tr->dh_type);
+ break;
+ case IKEV2_TRANSFORM_TYPE_ESN:
+ s = format (s, "%U", format_ikev2_transform_esn_type, tr->esn_type);
+ break;
+ default:
+ break;
+ }
+
+ if (tr->type == IKEV2_TRANSFORM_TYPE_ENCR &&
+ tr->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC && tr->key_len)
+ s = format (s, "-%u", tr->key_len * 8);
+ else if (vec_len (tr->attrs) == 4 && tr->attrs[0] == 0x80
+ && tr->attrs[1] == 0x0e)
+ s = format (s, "-%u", tr->attrs[2] * 256 + tr->attrs[3]);
+ else if (vec_len (tr->attrs))
+ s = format (s, "(unknown attr %U)", format_hex_bytes,
+ tr->attrs, vec_len (tr->attrs));
+
+ return s;
+}
+
+#define MACRO_FORMAT(lc) \
+u8 * format_ikev2_##lc (u8 * s, va_list * args) \
+{ \
+ u32 i = va_arg (*args, u32); \
+ char * t = 0; \
+ switch (i) { \
+ foreach_ikev2_##lc \
+ default: \
+ return format (s, "unknown (%u)", i); \
+ } \
+ s = format (s, "%s", t); \
+ return s; \
+}
+
+#define MACRO_UNFORMAT(lc) \
+uword \
+unformat_ikev2_##lc (unformat_input_t * input, \
+ va_list * args) \
+{ \
+ u32 * r = va_arg (*args, u32 *); \
+ if (0) ; \
+ foreach_ikev2_##lc \
+ else \
+ return 0; \
+ return 1; \
+}
+
+#define _(v,f,str) case IKEV2_AUTH_METHOD_##f: t = str; break;
+MACRO_FORMAT (auth_method)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_AUTH_METHOD_##f;
+ MACRO_UNFORMAT (auth_method)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_TYPE_##f;
+ MACRO_UNFORMAT (transform_type)
+#undef _
+#define _(v,f) case IKEV2_NOTIFY_MSG_##f: t = #f; break;
+ MACRO_FORMAT (notify_msg_type)
+#undef _
+#define _(v,f,str) case IKEV2_ID_TYPE_##f: t = str; break;
+ MACRO_FORMAT (id_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_ID_TYPE_##f;
+ MACRO_UNFORMAT (id_type)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_ENCR_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_encr_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_ENCR_TYPE_##f;
+ MACRO_UNFORMAT (transform_encr_type)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_PRF_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_prf_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_PRF_TYPE_##f;
+ MACRO_UNFORMAT (transform_prf_type)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_INTEG_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_integ_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_INTEG_TYPE_##f;
+ MACRO_UNFORMAT (transform_integ_type)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_DH_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_dh_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_DH_TYPE_##f;
+ MACRO_UNFORMAT (transform_dh_type)
+#undef _
+#define _(v,f,str) case IKEV2_TRANSFORM_ESN_TYPE_##f: t = str; break;
+ MACRO_FORMAT (transform_esn_type)
+#undef _
+#define _(v,f,str) else if (unformat (input, str)) *r = IKEV2_TRANSFORM_ESN_TYPE_##f;
+ MACRO_UNFORMAT (transform_esn_type)
+#undef _
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2_payload.c b/src/vnet/ipsec/ikev2_payload.c
new file mode 100644
index 00000000..34595380
--- /dev/null
+++ b/src/vnet/ipsec/ikev2_payload.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ctype.h>
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/ikev2_priv.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u8 protocol_id;
+ u8 spi_size;
+ u16 msg_type;
+ u8 payload[0];}) ike_notify_payload_header_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ u8 ts_type;
+ u8 protocol_id;
+ u16 selector_len;
+ u16 start_port;
+ u16 end_port;
+ ip4_address_t start_addr;
+ ip4_address_t end_addr;}) ikev2_ts_payload_entry_t;
+/* *INDENT-OFF* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u8 num_ts;
+ u8 reserved[3];
+ ikev2_ts_payload_entry_t ts[0];})
+ ike_ts_payload_header_t;
+/* *INDENT-OFF* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 last_or_more;
+ u8 reserved;
+ u16 proposal_len;
+ u8 proposal_num;
+ u8 protocol_id;
+ u8 spi_size;
+ u8 num_transforms; u32 spi[0];
+}) ike_sa_proposal_data_t;
+/* *INDENT-OFF* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 last_or_more;
+ u8 reserved;
+ u16 transform_len;
+ u8 transform_type;
+ u8 reserved2;
+ u16 transform_id;
+ u8 attributes[0];
+}) ike_sa_transform_data_t;
+/* *INDENT-OFF* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 nextpayload;
+ u8 flags;
+ u16 length;
+ u8 protocol_id;
+ u8 spi_size;
+ u16 num_of_spi;
+ u32 spi[0];
+}) ike_delete_payload_header_t;
+/* *INDENT-OFF* */
+
+static ike_payload_header_t *
+ikev2_payload_add_hdr (ikev2_payload_chain_t * c, u8 payload_type, int len)
+{
+ ike_payload_header_t *hdr =
+ (ike_payload_header_t *) & c->data[c->last_hdr_off];
+ u8 *tmp;
+
+ if (c->data)
+ hdr->nextpayload = payload_type;
+ else
+ c->first_payload_type = payload_type;
+
+ c->last_hdr_off = vec_len (c->data);
+ vec_add2 (c->data, tmp, len);
+ hdr = (ike_payload_header_t *) tmp;
+ memset (hdr, 0, len);
+
+ hdr->length = clib_host_to_net_u16 (len);
+
+ return hdr;
+}
+
+static void
+ikev2_payload_add_data (ikev2_payload_chain_t * c, u8 * data)
+{
+ u16 len;
+ ike_payload_header_t *hdr;
+
+ vec_append (c->data, data);
+ hdr = (ike_payload_header_t *) & c->data[c->last_hdr_off];
+ len = clib_net_to_host_u16 (hdr->length);
+ hdr->length = clib_host_to_net_u16 (len + vec_len (data));
+}
+
+void
+ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type, u8 * data)
+{
+ ikev2_payload_add_notify_2(c, msg_type, data, 0);
+}
+
+void
+ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
+ u8 * data, ikev2_notify_t * notify)
+{
+ ike_notify_payload_header_t *n;
+
+ n =
+ (ike_notify_payload_header_t *) ikev2_payload_add_hdr (c,
+ IKEV2_PAYLOAD_NOTIFY,
+ sizeof (*n));
+ n->msg_type = clib_host_to_net_u16 (msg_type);
+ if (notify)
+ {
+ n->protocol_id = notify->protocol_id;
+ if (notify->spi)
+ {
+ n->spi_size = 4;
+ }
+ }
+ ikev2_payload_add_data (c, data);
+}
+
+void
+ikev2_payload_add_sa (ikev2_payload_chain_t * c,
+ ikev2_sa_proposal_t * proposals)
+{
+ ike_payload_header_t *ph;
+ ike_sa_proposal_data_t *prop;
+ ike_sa_transform_data_t *tr;
+ ikev2_sa_proposal_t *p;
+ ikev2_sa_transform_t *t;
+
+ u8 *tmp;
+ u8 *pr_data = 0;
+ u8 *tr_data = 0;
+
+ ikev2_payload_add_hdr (c, IKEV2_PAYLOAD_SA, sizeof (*ph));
+
+ vec_foreach (p, proposals)
+ {
+ int spi_size = (p->protocol_id == IKEV2_PROTOCOL_ESP) ? 4 : 0;
+ pr_data = vec_new (u8, sizeof (ike_sa_proposal_data_t) + spi_size);
+ prop = (ike_sa_proposal_data_t *) pr_data;
+ prop->last_or_more = proposals - p + 1 < vec_len (proposals) ? 2 : 0;
+ prop->protocol_id = p->protocol_id;
+ prop->proposal_num = p->proposal_num;
+ prop->spi_size = spi_size;
+ prop->num_transforms = vec_len (p->transforms);
+
+ if (spi_size)
+ prop->spi[0] = clib_host_to_net_u32 (p->spi);
+
+ DBG_PLD ("proposal num %u protocol_id %u last_or_more %u spi_size %u%s%U",
+ prop->proposal_num, prop->protocol_id, prop->last_or_more,
+ prop->spi_size, prop->spi_size ? " spi_data " : "",
+ format_hex_bytes, prop->spi, prop->spi_size);
+
+ vec_foreach (t, p->transforms)
+ {
+ vec_add2 (tr_data, tmp, sizeof (*tr) + vec_len (t->attrs));
+ tr = (ike_sa_transform_data_t *) tmp;
+ tr->last_or_more =
+ ((t - p->transforms) + 1 < vec_len (p->transforms)) ? 3 : 0;
+ tr->transform_type = t->type;
+ tr->transform_id = clib_host_to_net_u16 (t->transform_id);
+ tr->transform_len =
+ clib_host_to_net_u16 (sizeof (*tr) + vec_len (t->attrs));
+
+ if (vec_len (t->attrs) > 0)
+ clib_memcpy (tr->attributes, t->attrs, vec_len (t->attrs));
+
+ DBG_PLD
+ ("transform type %U transform_id %u last_or_more %u attr_size %u%s%U",
+ format_ikev2_transform_type, tr->transform_type, t->transform_id,
+ tr->last_or_more, vec_len (t->attrs),
+ vec_len (t->attrs) ? " attrs " : "", format_hex_bytes,
+ tr->attributes, vec_len (t->attrs));
+ }
+
+ prop->proposal_len =
+ clib_host_to_net_u16 (vec_len (tr_data) + vec_len (pr_data));
+ ikev2_payload_add_data (c, pr_data);
+ ikev2_payload_add_data (c, tr_data);
+ vec_free (pr_data);
+ vec_free (tr_data);
+ }
+}
+
+void
+ikev2_payload_add_ke (ikev2_payload_chain_t * c, u16 dh_group, u8 * dh_data)
+{
+ ike_ke_payload_header_t *ke;
+ ke = (ike_ke_payload_header_t *) ikev2_payload_add_hdr (c, IKEV2_PAYLOAD_KE,
+ sizeof (*ke));
+
+ ke->dh_group = clib_host_to_net_u16 (dh_group);
+ ikev2_payload_add_data (c, dh_data);
+}
+
+void
+ikev2_payload_add_nonce (ikev2_payload_chain_t * c, u8 * nonce)
+{
+ ikev2_payload_add_hdr (c, IKEV2_PAYLOAD_NONCE,
+ sizeof (ike_payload_header_t));
+ ikev2_payload_add_data (c, nonce);
+}
+
+void
+ikev2_payload_add_id (ikev2_payload_chain_t * c, ikev2_id_t * id, u8 type)
+{
+ ike_id_payload_header_t *idp;
+ idp =
+ (ike_id_payload_header_t *) ikev2_payload_add_hdr (c, type,
+ sizeof (*idp));
+
+ idp->id_type = id->type;
+ ikev2_payload_add_data (c, id->data);
+}
+
+void
+ikev2_payload_add_delete (ikev2_payload_chain_t * c, ikev2_delete_t * d)
+{
+ ike_delete_payload_header_t *dp;
+ u16 num_of_spi = vec_len (d);
+ ikev2_delete_t *d2;
+ dp =
+ (ike_delete_payload_header_t *) ikev2_payload_add_hdr (c,
+ IKEV2_PAYLOAD_DELETE,
+ sizeof (*dp));
+
+ if (d[0].protocol_id == IKEV2_PROTOCOL_IKE)
+ {
+ dp->protocol_id = 1;
+ }
+ else
+ {
+ dp->protocol_id = d[0].protocol_id;
+ dp->spi_size = 4;
+ dp->num_of_spi = clib_host_to_net_u16 (num_of_spi);
+ vec_foreach (d2, d)
+ {
+ u8 *data = vec_new (u8, 4);
+ u32 spi = clib_host_to_net_u32 (d2->spi);
+ clib_memcpy (data, &spi, 4);
+ ikev2_payload_add_data (c, data);
+ vec_free (data);
+ }
+ }
+}
+
+void
+ikev2_payload_add_auth (ikev2_payload_chain_t * c, ikev2_auth_t * auth)
+{
+ ike_auth_payload_header_t *ap;
+ ap =
+ (ike_auth_payload_header_t *) ikev2_payload_add_hdr (c,
+ IKEV2_PAYLOAD_AUTH,
+ sizeof (*ap));
+
+ ap->auth_method = auth->method;
+ ikev2_payload_add_data (c, auth->data);
+}
+
+void
+ikev2_payload_add_ts (ikev2_payload_chain_t * c, ikev2_ts_t * ts, u8 type)
+{
+ ike_ts_payload_header_t *tsh;
+ ikev2_ts_t *ts2;
+ u8 *data = 0, *tmp;
+
+ tsh =
+ (ike_ts_payload_header_t *) ikev2_payload_add_hdr (c, type,
+ sizeof (*tsh));
+ tsh->num_ts = vec_len (ts);
+
+ vec_foreach (ts2, ts)
+ {
+ ASSERT (ts2->ts_type == 7); /*TS_IPV4_ADDR_RANGE */
+ ikev2_ts_payload_entry_t *entry;
+ vec_add2 (data, tmp, sizeof (*entry));
+ entry = (ikev2_ts_payload_entry_t *) tmp;
+ entry->ts_type = ts2->ts_type;
+ entry->protocol_id = ts2->protocol_id;
+ entry->selector_len = clib_host_to_net_u16 (16);
+ entry->start_port = clib_host_to_net_u16 (ts2->start_port);
+ entry->end_port = clib_host_to_net_u16 (ts2->end_port);
+ entry->start_addr.as_u32 = ts2->start_addr.as_u32;
+ entry->end_addr.as_u32 = ts2->end_addr.as_u32;
+ }
+
+ ikev2_payload_add_data (c, data);
+ vec_free (data);
+}
+
+void
+ikev2_payload_chain_add_padding (ikev2_payload_chain_t * c, int bs)
+{
+ u8 *tmp __attribute__ ((unused));
+ u8 pad_len = (vec_len (c->data) / bs + 1) * bs - vec_len (c->data);
+ vec_add2 (c->data, tmp, pad_len);
+ c->data[vec_len (c->data) - 1] = pad_len - 1;
+}
+
+ikev2_sa_proposal_t *
+ikev2_parse_sa_payload (ike_payload_header_t * ikep)
+{
+ ikev2_sa_proposal_t *v = 0;
+ ikev2_sa_proposal_t *proposal;
+ ikev2_sa_transform_t *transform;
+
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+
+ ike_sa_proposal_data_t *sap;
+ int proposal_ptr = 0;
+
+ do
+ {
+ sap = (ike_sa_proposal_data_t *) & ikep->payload[proposal_ptr];
+ int i;
+ int transform_ptr;
+
+ DBG_PLD ("proposal num %u len %u last_or_more %u id %u "
+ "spi_size %u num_transforms %u",
+ sap->proposal_num, clib_net_to_host_u16 (sap->proposal_len),
+ sap->last_or_more, sap->protocol_id, sap->spi_size,
+ sap->num_transforms);
+
+ /* IKE proposal should not have SPI */
+ if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0)
+ goto data_corrupted;
+
+ /* IKE proposal should not have SPI */
+ if (sap->protocol_id == IKEV2_PROTOCOL_ESP && sap->spi_size != 4)
+ goto data_corrupted;
+
+ transform_ptr = proposal_ptr + sizeof (*sap) + sap->spi_size;
+
+ vec_add2 (v, proposal, 1);
+ proposal->proposal_num = sap->proposal_num;
+ proposal->protocol_id = sap->protocol_id;
+
+ if (sap->spi_size == 4)
+ {
+ proposal->spi = clib_net_to_host_u32 (sap->spi[0]);
+ }
+
+ for (i = 0; i < sap->num_transforms; i++)
+ {
+ ike_sa_transform_data_t *tr =
+ (ike_sa_transform_data_t *) & ikep->payload[transform_ptr];
+ u16 tlen = clib_net_to_host_u16 (tr->transform_len);
+
+ if (tlen < sizeof (*tr))
+ goto data_corrupted;
+
+ vec_add2 (proposal->transforms, transform, 1);
+
+ transform->type = tr->transform_type;
+ transform->transform_id = clib_net_to_host_u16 (tr->transform_id);
+ if (tlen > sizeof (*tr))
+ vec_add (transform->attrs, tr->attributes, tlen - sizeof (*tr));
+
+ DBG_PLD
+ ("transform num %u len %u last_or_more %u type %U id %u%s%U", i,
+ tlen, tr->last_or_more, format_ikev2_sa_transform, transform,
+ clib_net_to_host_u16 (tr->transform_id),
+ tlen > sizeof (*tr) ? " attrs " : "", format_hex_bytes,
+ tr->attributes, tlen - sizeof (*tr));
+
+ transform_ptr += tlen;
+ }
+
+ proposal_ptr += clib_net_to_host_u16 (sap->proposal_len);
+ }
+ while (proposal_ptr < (plen - sizeof (*ikep)) && sap->last_or_more == 2);
+
+ /* data validation */
+ if (proposal_ptr != (plen - sizeof (*ikep)) || sap->last_or_more)
+ goto data_corrupted;
+
+ return v;
+
+data_corrupted:
+ DBG_PLD ("SA payload data corrupted");
+ ikev2_sa_free_proposal_vector (&v);
+ return 0;
+}
+
+ikev2_ts_t *
+ikev2_parse_ts_payload (ike_payload_header_t * ikep)
+{
+ ike_ts_payload_header_t *tsp = (ike_ts_payload_header_t *) ikep;
+ ikev2_ts_t *r = 0, *ts;
+ u8 i;
+
+ for (i = 0; i < tsp->num_ts; i++)
+ {
+ if (tsp->ts[i].ts_type != 7) /* TS_IPV4_ADDR_RANGE */
+ {
+ DBG_PLD ("unsupported TS type received (%u)", tsp->ts[i].ts_type);
+ continue;
+ }
+
+ vec_add2 (r, ts, 1);
+ ts->ts_type = tsp->ts[i].ts_type;
+ ts->protocol_id = tsp->ts[i].protocol_id;
+ ts->start_port = tsp->ts[i].start_port;
+ ts->end_port = tsp->ts[i].end_port;
+ ts->start_addr.as_u32 = tsp->ts[i].start_addr.as_u32;
+ ts->end_addr.as_u32 = tsp->ts[i].end_addr.as_u32;
+ }
+ return r;
+}
+
+ikev2_notify_t *
+ikev2_parse_notify_payload (ike_payload_header_t * ikep)
+{
+ ike_notify_payload_header_t *n = (ike_notify_payload_header_t *) ikep;
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+ ikev2_notify_t *r = 0;
+ u32 spi;
+
+ DBG_PLD ("msg_type %U len %u%s%U",
+ format_ikev2_notify_msg_type, clib_net_to_host_u16 (n->msg_type),
+ plen, plen > sizeof (*n) ? " data " : "",
+ format_hex_bytes, n->payload, plen - sizeof (*n));
+
+ r = vec_new (ikev2_notify_t, 1);
+ r->msg_type = clib_net_to_host_u16 (n->msg_type);
+ r->protocol_id = n->protocol_id;
+
+ if (n->spi_size == 4)
+ {
+ clib_memcpy (&spi, n->payload, n->spi_size);
+ r->spi = clib_net_to_host_u32 (spi);
+ DBG_PLD ("spi %lx", r->spi);
+ }
+ else if (n->spi_size == 0)
+ {
+ r->spi = 0;
+ }
+ else
+ {
+ clib_warning ("invalid SPI Size %d", n->spi_size);
+ }
+
+ if (plen > (sizeof (*n) + n->spi_size))
+ {
+ vec_add (r->data, n->payload + n->spi_size,
+ plen - sizeof (*n) - n->spi_size);
+ }
+
+ return r;
+}
+
+void
+ikev2_parse_vendor_payload (ike_payload_header_t * ikep)
+{
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+ int i;
+ int is_string = 1;
+
+ for (i = 0; i < plen - 4; i++)
+ if (!isprint (ikep->payload[i]))
+ is_string = 0;
+
+ DBG_PLD ("len %u data %s:%U",
+ plen,
+ is_string ? "string" : "hex",
+ is_string ? format_ascii_bytes : format_hex_bytes,
+ ikep->payload, plen - sizeof (*ikep));
+}
+
+ikev2_delete_t *
+ikev2_parse_delete_payload (ike_payload_header_t * ikep)
+{
+ ike_delete_payload_header_t *d = (ike_delete_payload_header_t *) ikep;
+ u32 plen = clib_net_to_host_u16 (ikep->length);
+ ikev2_delete_t *r = 0, *del;
+ u16 num_of_spi = clib_net_to_host_u16 (d->num_of_spi);
+ u16 i = 0;
+
+ DBG_PLD ("protocol_id %u spi_size %u num_of_spi %u len %u%s%U",
+ d->protocol_id, d->spi_size, num_of_spi,
+ plen, plen > sizeof (d) ? " data " : "",
+ format_hex_bytes, d->spi, plen - sizeof (*d));
+
+ if (d->protocol_id == IKEV2_PROTOCOL_IKE)
+ {
+ r = vec_new (ikev2_delete_t, 1);
+ r->protocol_id = 1;
+ }
+ else
+ {
+ r = vec_new (ikev2_delete_t, num_of_spi);
+ vec_foreach (del, r)
+ {
+ del->protocol_id = d->protocol_id;
+ del->spi = clib_net_to_host_u32 (d->spi[i++]);
+ }
+ }
+
+ return r;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ikev2_priv.h b/src/vnet/ipsec/ikev2_priv.h
new file mode 100644
index 00000000..5a3dc520
--- /dev/null
+++ b/src/vnet/ipsec/ikev2_priv.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ikev2_priv_h__
+#define __included_ikev2_priv_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/ipsec/ikev2.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/error.h>
+
+#include <openssl/rand.h>
+#include <openssl/dh.h>
+#include <openssl/hmac.h>
+#include <openssl/evp.h>
+
+#define IKEV2_DEBUG_PAYLOAD 1
+
+#if IKEV2_DEBUG_PAYLOAD == 1
+#define DBG_PLD(my_args...) clib_warning(my_args)
+#else
+#define DBG_PLD(my_args...)
+#endif
+
+typedef enum
+{
+ IKEV2_STATE_UNKNOWN,
+ IKEV2_STATE_SA_INIT,
+ IKEV2_STATE_DELETED,
+ IKEV2_STATE_AUTH_FAILED,
+ IKEV2_STATE_AUTHENTICATED,
+ IKEV2_STATE_NOTIFY_AND_DELETE,
+ IKEV2_STATE_TS_UNACCEPTABLE,
+ IKEV2_STATE_NO_PROPOSAL_CHOSEN,
+} ikev2_state_t;
+
+typedef struct
+{
+ ikev2_auth_method_t method:8;
+ u8 *data;
+ u8 hex; /* hex encoding of the shared secret */
+ EVP_PKEY *key;
+} ikev2_auth_t;
+
+typedef enum
+{
+ IKEV2_DH_GROUP_MODP = 0,
+ IKEV2_DH_GROUP_ECP = 1,
+} ikev2_dh_group_t;
+
+typedef struct
+{
+ ikev2_transform_type_t type;
+ union
+ {
+ u16 transform_id;
+ ikev2_transform_encr_type_t encr_type:16;
+ ikev2_transform_prf_type_t prf_type:16;
+ ikev2_transform_integ_type_t integ_type:16;
+ ikev2_transform_dh_type_t dh_type:16;
+ ikev2_transform_esn_type_t esn_type:16;
+ };
+ u8 *attrs;
+ u16 key_len;
+ u16 key_trunc;
+ u16 block_size;
+ u8 dh_group;
+ int nid;
+ const char *dh_p;
+ const char *dh_g;
+ const void *md;
+ const void *cipher;
+} ikev2_sa_transform_t;
+
+typedef struct
+{
+ u8 proposal_num;
+ ikev2_protocol_id_t protocol_id:8;
+ u32 spi;
+ ikev2_sa_transform_t *transforms;
+} ikev2_sa_proposal_t;
+
+typedef struct
+{
+ u8 ts_type;
+ u8 protocol_id;
+ u16 selector_len;
+ u16 start_port;
+ u16 end_port;
+ ip4_address_t start_addr;
+ ip4_address_t end_addr;
+} ikev2_ts_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ ip4_address_t ip4;
+} ikev2_responder_t;
+
+typedef struct
+{
+ ikev2_transform_encr_type_t crypto_alg;
+ ikev2_transform_integ_type_t integ_alg;
+ ikev2_transform_dh_type_t dh_type;
+ u32 crypto_key_size;
+} ikev2_transforms_set;
+
+
+typedef struct
+{
+ ikev2_id_type_t type:8;
+ u8 *data;
+} ikev2_id_t;
+
+typedef struct
+{
+ /* sa proposals vectors */
+ ikev2_sa_proposal_t *i_proposals;
+ ikev2_sa_proposal_t *r_proposals;
+
+ /* Traffic Selectors */
+ ikev2_ts_t *tsi;
+ ikev2_ts_t *tsr;
+
+ /* keys */
+ u8 *sk_ai;
+ u8 *sk_ar;
+ u8 *sk_ei;
+ u8 *sk_er;
+
+ /* lifetime data */
+ f64 time_to_expiration;
+ u8 is_expired;
+ i8 rekey_retries;
+} ikev2_child_sa_t;
+
+typedef struct
+{
+ u8 protocol_id;
+ u32 spi; /*for ESP and AH SPI size is 4, for IKE size is 0 */
+} ikev2_delete_t;
+
+typedef struct
+{
+ u8 protocol_id;
+ u32 spi;
+ u32 ispi;
+ ikev2_sa_proposal_t *i_proposal;
+ ikev2_sa_proposal_t *r_proposal;
+ ikev2_ts_t *tsi;
+ ikev2_ts_t *tsr;
+} ikev2_rekey_t;
+
+typedef struct
+{
+ u16 msg_type;
+ u8 protocol_id;
+ u32 spi;
+ u8 *data;
+} ikev2_notify_t;
+
+typedef struct
+{
+ u8 *name;
+ u8 is_enabled;
+
+ ikev2_auth_t auth;
+ ikev2_id_t loc_id;
+ ikev2_id_t rem_id;
+ ikev2_ts_t loc_ts;
+ ikev2_ts_t rem_ts;
+ ikev2_responder_t responder;
+ ikev2_transforms_set ike_ts;
+ ikev2_transforms_set esp_ts;
+ u64 lifetime;
+ u64 lifetime_maxdata;
+ u32 lifetime_jitter;
+ u32 handover;
+} ikev2_profile_t;
+
+typedef struct
+{
+ ikev2_state_t state;
+ u8 unsupported_cp;
+ u8 initial_contact;
+ ip4_address_t iaddr;
+ ip4_address_t raddr;
+ u64 ispi;
+ u64 rspi;
+ u8 *i_nonce;
+ u8 *r_nonce;
+
+ /* DH data */
+ u16 dh_group;
+ u8 *dh_shared_key;
+ u8 *dh_private_key;
+ u8 *i_dh_data;
+ u8 *r_dh_data;
+
+ /* sa proposals vectors */
+ ikev2_sa_proposal_t *i_proposals;
+ ikev2_sa_proposal_t *r_proposals;
+
+ /* keys */
+ u8 *sk_d;
+ u8 *sk_ai;
+ u8 *sk_ar;
+ u8 *sk_ei;
+ u8 *sk_er;
+ u8 *sk_pi;
+ u8 *sk_pr;
+
+ /* auth */
+ ikev2_auth_t i_auth;
+ ikev2_auth_t r_auth;
+
+ /* ID */
+ ikev2_id_t i_id;
+ ikev2_id_t r_id;
+
+ /* pending deletes */
+ ikev2_delete_t *del;
+
+ /* pending rekeyings */
+ ikev2_rekey_t *rekey;
+
+ /* packet data */
+ u8 *last_sa_init_req_packet_data;
+ u8 *last_sa_init_res_packet_data;
+
+ /* retransmit */
+ u32 last_msg_id;
+ u8 *last_res_packet_data;
+
+ u8 is_initiator;
+ u32 last_init_msg_id;
+ ikev2_profile_t *profile;
+
+ ikev2_child_sa_t *childs;
+} ikev2_sa_t;
+
+
+typedef struct
+{
+ /* pool of IKEv2 Security Associations */
+ ikev2_sa_t *sas;
+
+ /* hash */
+ uword *sa_by_rspi;
+} ikev2_main_per_thread_data_t;
+
+typedef struct
+{
+ /* pool of IKEv2 profiles */
+ ikev2_profile_t *profiles;
+
+ /* vector of supported transform types */
+ ikev2_sa_transform_t *supported_transforms;
+
+ /* hash */
+ mhash_t profile_index_by_name;
+
+ /* local private key */
+ EVP_PKEY *pkey;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* pool of IKEv2 Security Associations created in initiator mode */
+ ikev2_sa_t *sais;
+ /* hash */
+ uword *sa_by_ispi;
+
+ ikev2_main_per_thread_data_t *per_thread_data;
+
+} ikev2_main_t;
+
+ikev2_main_t ikev2_main;
+
+void ikev2_sa_free_proposal_vector (ikev2_sa_proposal_t ** v);
+ikev2_sa_transform_t *ikev2_sa_get_td_for_type (ikev2_sa_proposal_t * p,
+ ikev2_transform_type_t type);
+
+/* ikev2_crypto.c */
+v8 *ikev2_calc_prf (ikev2_sa_transform_t * tr, v8 * key, v8 * data);
+u8 *ikev2_calc_prfplus (ikev2_sa_transform_t * tr, u8 * key, u8 * seed,
+ int len);
+v8 *ikev2_calc_integr (ikev2_sa_transform_t * tr, v8 * key, u8 * data,
+ int len);
+v8 *ikev2_decrypt_data (ikev2_sa_t * sa, u8 * data, int len);
+int ikev2_encrypt_data (ikev2_sa_t * sa, v8 * src, u8 * dst);
+void ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t);
+void ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t);
+int ikev2_verify_sign (EVP_PKEY * pkey, u8 * sigbuf, u8 * data);
+u8 *ikev2_calc_sign (EVP_PKEY * pkey, u8 * data);
+EVP_PKEY *ikev2_load_cert_file (u8 * file);
+EVP_PKEY *ikev2_load_key_file (u8 * file);
+void ikev2_crypto_init (ikev2_main_t * km);
+
+/* ikev2_payload.c */
+typedef struct
+{
+ u8 first_payload_type;
+ u16 last_hdr_off;
+ u8 *data;
+} ikev2_payload_chain_t;
+
+#define ikev2_payload_new_chain(V) vec_validate (V, 0)
+#define ikev2_payload_destroy_chain(V) do { \
+ vec_free((V)->data); \
+ vec_free(V); \
+} while (0)
+
+void ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type,
+ u8 * data);
+void ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
+ u8 * data, ikev2_notify_t * notify);
+void ikev2_payload_add_sa (ikev2_payload_chain_t * c,
+ ikev2_sa_proposal_t * proposals);
+void ikev2_payload_add_ke (ikev2_payload_chain_t * c, u16 dh_group,
+ u8 * dh_data);
+void ikev2_payload_add_nonce (ikev2_payload_chain_t * c, u8 * nonce);
+void ikev2_payload_add_id (ikev2_payload_chain_t * c, ikev2_id_t * id,
+ u8 type);
+void ikev2_payload_add_auth (ikev2_payload_chain_t * c, ikev2_auth_t * auth);
+void ikev2_payload_add_ts (ikev2_payload_chain_t * c, ikev2_ts_t * ts,
+ u8 type);
+void ikev2_payload_add_delete (ikev2_payload_chain_t * c, ikev2_delete_t * d);
+void ikev2_payload_chain_add_padding (ikev2_payload_chain_t * c, int bs);
+void ikev2_parse_vendor_payload (ike_payload_header_t * ikep);
+ikev2_sa_proposal_t *ikev2_parse_sa_payload (ike_payload_header_t * ikep);
+ikev2_ts_t *ikev2_parse_ts_payload (ike_payload_header_t * ikep);
+ikev2_delete_t *ikev2_parse_delete_payload (ike_payload_header_t * ikep);
+ikev2_notify_t *ikev2_parse_notify_payload (ike_payload_header_t * ikep);
+
+#endif /* __included_ikev2_priv_h__ */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
new file mode 100644
index 00000000..011b0d4b
--- /dev/null
+++ b/src/vnet/ipsec/ipsec.api
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief IPsec: Add/delete Security Policy Database
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add SPD if non-zero, else delete
+ @param spd_id - SPD instance id (control plane allocated)
+*/
+
+autoreply define ipsec_spd_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 spd_id;
+};
+
+/** \brief IPsec: Add/delete SPD from interface
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add security mode if non-zero, else delete
+ @param sw_if_index - index of the interface
+ @param spd_id - SPD instance id to use for lookups
+*/
+
+
+autoreply define ipsec_interface_add_del_spd
+{
+ u32 client_index;
+ u32 context;
+
+ u8 is_add;
+ u32 sw_if_index;
+ u32 spd_id;
+};
+
+/** \brief IPsec: Add/delete Security Policy Database entry
+
+ See RFC 4301, 4.4.1.1 on how to match packet to selectors
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add SPD if non-zero, else delete
+ @param spd_id - SPD instance id (control plane allocated)
+ @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
+ @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
+ @param is_ipv6 - remote/local address are IPv6 if non-zero, else IPv4
+ @param remote_address_start - start of remote address range to match
+ @param remote_address_stop - end of remote address range to match
+ @param local_address_start - start of local address range to match
+ @param local_address_stop - end of local address range to match
+ @param protocol - protocol type to match [0 means any]
+ @param remote_port_start - start of remote port range to match ...
+ @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param local_port_start - start of local port range to match ...
+ @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param policy - 0 = bypass (no IPsec processing), 1 = discard (discard packet with ICMP processing), 2 = resolve (send request to control plane for SA resolving, and discard without ICMP processing), 3 = protect (apply IPsec policy using following parameters)
+ @param sa_id - SAD instance id (control plane allocated)
+
+*/
+
+autoreply define ipsec_spd_add_del_entry
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+
+ u32 spd_id;
+ i32 priority;
+ u8 is_outbound;
+
+ // Selector
+ u8 is_ipv6;
+ u8 is_ip_any;
+ u8 remote_address_start[16];
+ u8 remote_address_stop[16];
+ u8 local_address_start[16];
+ u8 local_address_stop[16];
+
+ u8 protocol;
+
+ u16 remote_port_start;
+ u16 remote_port_stop;
+ u16 local_port_start;
+ u16 local_port_stop;
+
+ // Policy
+ u8 policy;
+ u32 sa_id;
+};
+
+/** \brief IPsec: Add/delete Security Association Database entry
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add SAD entry if non-zero, else delete
+
+ @param sad_id - sad id
+
+ @param spi - security parameter index
+
+ @param protocol - 0 = AH, 1 = ESP
+
+ @param crypto_algorithm - 0 = Null, 1 = AES-CBC-128, 2 = AES-CBC-192, 3 = AES-CBC-256, 4 = 3DES-CBC
+ @param crypto_key_length - length of crypto_key in bytes
+ @param crypto_key - crypto keying material
+
+ @param integrity_algorithm - 0 = None, 1 = MD5-96, 2 = SHA1-96, 3 = SHA-256, 4 = SHA-384, 5=SHA-512
+ @param integrity_key_length - length of integrity_key in bytes
+ @param integrity_key - integrity keying material
+
+ @param use_extended_sequence_number - use ESN when non-zero
+
+ @param is_tunnel - IPsec tunnel mode if non-zero, else transport mode
+ @param is_tunnel_ipv6 - IPsec tunnel mode is IPv6 if non-zero, else IPv4 tunnel only valid if is_tunnel is non-zero
+ @param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
+ @param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
+
+ To be added:
+ Anti-replay
+ IPsec tunnel address copy mode (to support GDOI)
+ */
+
+autoreply define ipsec_sad_add_del_entry
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+
+ u32 sad_id;
+
+ u32 spi;
+
+ u8 protocol;
+
+ u8 crypto_algorithm;
+ u8 crypto_key_length;
+ u8 crypto_key[128];
+
+ u8 integrity_algorithm;
+ u8 integrity_key_length;
+ u8 integrity_key[128];
+
+ u8 use_extended_sequence_number;
+
+ u8 is_tunnel;
+ u8 is_tunnel_ipv6;
+ u8 tunnel_src_address[16];
+ u8 tunnel_dst_address[16];
+};
+
+/** \brief IPsec: Update Security Association keys
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param sa_id - sa id
+
+ @param crypto_key_length - length of crypto_key in bytes
+ @param crypto_key - crypto keying material
+
+ @param integrity_key_length - length of integrity_key in bytes
+ @param integrity_key - integrity keying material
+*/
+
+autoreply define ipsec_sa_set_key
+{
+ u32 client_index;
+ u32 context;
+
+ u32 sa_id;
+
+ u8 crypto_key_length;
+ u8 crypto_key[128];
+
+ u8 integrity_key_length;
+ u8 integrity_key[128];
+};
+
+/** \brief IKEv2: Add/delete profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param is_add - Add IKEv2 profile if non-zero, else delete
+*/
+autoreply define ikev2_profile_add_del
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u8 is_add;
+};
+
+/** \brief IKEv2: Set IKEv2 profile authentication method
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param auth_method - IKEv2 authentication method (shared-key-mic/rsa-sig)
+ @param is_hex - Authentication data in hex format if non-zero, else string
+ @param data_len - Authentication data length
+ @param data - Authentication data (for rsa-sig cert file path)
+*/
+autoreply define ikev2_profile_set_auth
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u8 auth_method;
+ u8 is_hex;
+ u32 data_len;
+ u8 data[0];
+};
+
+/** \brief IKEv2: Set IKEv2 profile local/remote identification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param is_local - Identification is local if non-zero, else remote
+ @param id_type - Identification type
+ @param data_len - Identification data length
+ @param data - Identification data
+*/
+autoreply define ikev2_profile_set_id
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u8 is_local;
+ u8 id_type;
+ u32 data_len;
+ u8 data[0];
+};
+
+/** \brief IKEv2: Set IKEv2 profile traffic selector parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param is_local - Traffic selector is local if non-zero, else remote
+ @param proto - Traffic selector IP protocol (if zero not relevant)
+ @param start_port - The smallest port number allowed by traffic selector
+ @param end_port - The largest port number allowed by traffic selector
+ @param start_addr - The smallest address included in traffic selector
+ @param end_addr - The largest address included in traffic selector
+*/
+autoreply define ikev2_profile_set_ts
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u8 is_local;
+ u8 proto;
+ u16 start_port;
+ u16 end_port;
+ u32 start_addr;
+ u32 end_addr;
+};
+
+/** \brief IKEv2: Set IKEv2 local RSA private key
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param key_file - Key file absolute path
+*/
+autoreply define ikev2_set_local_key
+{
+ u32 client_index;
+ u32 context;
+
+ u8 key_file[256];
+};
+
+/** \brief IKEv2: Set IKEv2 responder interface and IP address
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param sw_if_index - interface index
+ @param address - interface address
+*/
+autoreply define ikev2_set_responder
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u32 sw_if_index;
+ u8 address[4];
+};
+
+/** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param crypto_alg - encryption algorithm
+ @param crypto_key_size - encryption key size
+ @param integ_alg - integrity algorithm
+ @param dh_group - Diffie-Hellman group
+
+*/
+autoreply define ikev2_set_ike_transforms
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u32 crypto_alg;
+ u32 crypto_key_size;
+ u32 integ_alg;
+ u32 dh_group;
+};
+
+/** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param crypto_alg - encryption algorithm
+ @param crypto_key_size - encryption key size
+ @param integ_alg - integrity algorithm
+ @param dh_group - Diffie-Hellman group
+
+*/
+autoreply define ikev2_set_esp_transforms
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u32 crypto_alg;
+ u32 crypto_key_size;
+ u32 integ_alg;
+ u32 dh_group;
+};
+
+/** \brief IKEv2: Set Child SA lifetime, limited by time and/or data
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+ @param lifetime - SA maximum life time in seconds (0 to disable)
+ @param lifetime_jitter - Jitter added to prevent simultaneounus rekeying
+ @param handover - Hand over time
+ @param lifetime_maxdata - SA maximum life time in bytes (0 to disable)
+
+*/
+autoreply define ikev2_set_sa_lifetime
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+ u64 lifetime;
+ u32 lifetime_jitter;
+ u32 handover;
+ u64 lifetime_maxdata;
+};
+
+/** \brief IKEv2: Initiate the SA_INIT exchange
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param name - IKEv2 profile name
+
+*/
+autoreply define ikev2_initiate_sa_init
+{
+ u32 client_index;
+ u32 context;
+
+ u8 name[64];
+};
+
+/** \brief IKEv2: Initiate the delete IKE SA exchange
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param ispi - IKE SA initiator SPI
+
+*/
+autoreply define ikev2_initiate_del_ike_sa
+{
+ u32 client_index;
+ u32 context;
+
+ u64 ispi;
+};
+
+/** \brief IKEv2: Initiate the delete Child SA exchange
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param ispi - Child SA initiator SPI
+
+*/
+autoreply define ikev2_initiate_del_child_sa
+{
+ u32 client_index;
+ u32 context;
+
+ u32 ispi;
+};
+
+/** \brief IKEv2: Initiate the rekey Child SA exchange
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+
+ @param ispi - Child SA initiator SPI
+
+*/
+autoreply define ikev2_initiate_rekey_child_sa
+{
+ u32 client_index;
+ u32 context;
+
+ u32 ispi;
+};
+
+/** \brief Dump ipsec policy database data
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param spd_id - SPD instance id
+ @param sa_id - SA id, optional, set to ~0 to see all policies in SPD
+*/
+define ipsec_spd_dump {
+ u32 client_index;
+ u32 context;
+ u32 spd_id;
+ u32 sa_id;
+};
+
+/** \brief IPsec policy database response
+ @param context - sender context which was passed in the request
+ @param spd_id - SPD instance id
+ @param priority - numeric value to control policy evaluation order
+ @param is_outbound - [1|0] to indicate if direction is [out|in]bound
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+ @param local_start_addr - first address in local traffic selector range
+ @param local_stop_addr - last address in local traffic selector range
+ @param local_start_port - first port in local traffic selector range
+ @param local_stop_port - last port in local traffic selector range
+ @param remote_start_addr - first address in remote traffic selector range
+ @param remote_stop_addr - last address in remote traffic selector range
+ @param remote_start_port - first port in remote traffic selector range
+ @param remote_stop_port - last port in remote traffic selector range
+ @param protocol - traffic selector protocol
+ @param policy - policy action
+ @param sa_id - SA id
+ @param bytes - byte count of packets matching this policy
+ @param packets - count of packets matching this policy
+*/
+define ipsec_spd_details {
+ u32 context;
+ u32 spd_id;
+ i32 priority;
+ u8 is_outbound;
+ u8 is_ipv6;
+ u8 local_start_addr[16];
+ u8 local_stop_addr[16];
+ u16 local_start_port;
+ u16 local_stop_port;
+ u8 remote_start_addr[16];
+ u8 remote_stop_addr[16];
+ u16 remote_start_port;
+ u16 remote_stop_port;
+ u8 protocol;
+ u8 policy;
+ u32 sa_id;
+ u64 bytes;
+ u64 packets;
+};
+
+/** \brief Add or delete IPsec tunnel interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add IPsec tunnel interface if nonzero, else delete
+ @param esn - enable extended sequence numbers if nonzero, else disable
+ @param anti_replay - enable anti replay check if nonzero, else disable
+ @param local_ip - local IP address
+ @param remote_ip - IP address of remote IPsec peer
+ @param local_spi - SPI of outbound IPsec SA
+ @param remote_spi - SPI of inbound IPsec SA
+ @param crypto_alg - encryption algorithm ID
+ @param local_crypto_key_len - length of local crypto key in bytes
+ @param local_crypto_key - crypto key for outbound IPsec SA
+ @param remote_crypto_key_len - length of remote crypto key in bytes
+ @param remote_crypto_key - crypto key for inbound IPsec SA
+ @param integ_alg - integrity algorithm ID
+ @param local_integ_key_len - length of local integrity key in bytes
+ @param local_integ_key - integrity key for outbound IPsec SA
+ @param remote_integ_key_len - length of remote integrity key in bytes
+ @param remote_integ_key - integrity key for inbound IPsec SA
+*/
+define ipsec_tunnel_if_add_del {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 esn;
+ u8 anti_replay;
+ u8 local_ip[4];
+ u8 remote_ip[4];
+ u32 local_spi;
+ u32 remote_spi;
+ u8 crypto_alg;
+ u8 local_crypto_key_len;
+ u8 local_crypto_key[128];
+ u8 remote_crypto_key_len;
+ u8 remote_crypto_key[128];
+ u8 integ_alg;
+ u8 local_integ_key_len;
+ u8 local_integ_key[128];
+ u8 remote_integ_key_len;
+ u8 remote_integ_key[128];
+};
+
+/** \brief Add/delete IPsec tunnel interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return status
+ @param sw_if_index - sw_if_index of new interface (for successful add)
+*/
+define ipsec_tunnel_if_add_del_reply {
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
new file mode 100644
index 00000000..cfe434ab
--- /dev/null
+++ b/src/vnet/ipsec/ipsec.c
@@ -0,0 +1,586 @@
+/*
+ * decap.c : IPSec tunnel support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#include <vnet/ipsec/esp.h>
+
+u32
+ipsec_get_sa_index_by_sa_id (u32 sa_id)
+{
+ ipsec_main_t *im = &ipsec_main;
+ uword *p = hash_get (im->sa_index_by_sa_id, sa_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+int
+ipsec_set_interface_spd (vlib_main_t * vm, u32 sw_if_index, u32 spd_id,
+ int is_add)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ip4_ipsec_config_t config;
+
+ u32 spd_index;
+ uword *p;
+
+ p = hash_get (im->spd_index_by_spd_id, spd_id);
+ if (!p)
+ return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such spd-id */
+
+ spd_index = p[0];
+
+ p = hash_get (im->spd_index_by_sw_if_index, sw_if_index);
+ if (p && is_add)
+ return VNET_API_ERROR_SYSCALL_ERROR_1; /* spd already assigned */
+
+ if (is_add)
+ {
+ hash_set (im->spd_index_by_sw_if_index, sw_if_index, spd_index);
+ }
+ else
+ {
+ hash_unset (im->spd_index_by_sw_if_index, sw_if_index);
+ }
+
+ clib_warning ("sw_if_index %u spd_id %u spd_index %u",
+ sw_if_index, spd_id, spd_index);
+
+ /* enable IPsec on TX */
+ vnet_feature_enable_disable ("ip4-output", "ipsec-output-ip4", sw_if_index,
+ is_add, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "ipsec-output-ip6", sw_if_index,
+ is_add, 0, 0);
+
+ /* enable IPsec on RX */
+ vnet_feature_enable_disable ("ip4-unicast", "ipsec-input-ip4", sw_if_index,
+ is_add, &config, sizeof (config));
+ vnet_feature_enable_disable ("ip6-unicast", "ipsec-input-ip6", sw_if_index,
+ is_add, &config, sizeof (config));
+
+ return 0;
+}
+
+int
+ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd = 0;
+ uword *p;
+ u32 spd_index, k, v;
+
+ p = hash_get (im->spd_index_by_spd_id, spd_id);
+ if (p && is_add)
+ return VNET_API_ERROR_INVALID_VALUE;
+ if (!p && !is_add)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (!is_add) /* delete */
+ {
+ spd_index = p[0];
+ spd = pool_elt_at_index (im->spds, spd_index);
+ if (!spd)
+ return VNET_API_ERROR_INVALID_VALUE;
+ /* *INDENT-OFF* */
+ hash_foreach (k, v, im->spd_index_by_sw_if_index, ({
+ if (v == spd_index)
+ ipsec_set_interface_spd(vm, k, spd_id, 0);
+ }));
+ /* *INDENT-ON* */
+ hash_unset (im->spd_index_by_spd_id, spd_id);
+ pool_free (spd->policies);
+ vec_free (spd->ipv4_outbound_policies);
+ vec_free (spd->ipv6_outbound_policies);
+ vec_free (spd->ipv4_inbound_protect_policy_indices);
+ vec_free (spd->ipv4_inbound_policy_discard_and_bypass_indices);
+ pool_put (im->spds, spd);
+ }
+ else /* create new SPD */
+ {
+ pool_get (im->spds, spd);
+ memset (spd, 0, sizeof (*spd));
+ spd_index = spd - im->spds;
+ spd->id = spd_id;
+ hash_set (im->spd_index_by_spd_id, spd_id, spd_index);
+ }
+ return 0;
+}
+
+static int
+ipsec_spd_entry_sort (void *a1, void *a2)
+{
+ ipsec_main_t *im = &ipsec_main;
+ u32 *id1 = a1;
+ u32 *id2 = a2;
+ ipsec_spd_t *spd;
+ ipsec_policy_t *p1, *p2;
+
+ /* *INDENT-OFF* */
+ pool_foreach (spd, im->spds, ({
+ p1 = pool_elt_at_index(spd->policies, *id1);
+ p2 = pool_elt_at_index(spd->policies, *id2);
+ if (p1 && p2)
+ return p2->priority - p1->priority;
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+int
+ipsec_add_del_policy (vlib_main_t * vm, ipsec_policy_t * policy, int is_add)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd = 0;
+ ipsec_policy_t *vp;
+ uword *p;
+ u32 spd_index;
+
+ clib_warning ("policy-id %u priority %d is_outbound %u", policy->id,
+ policy->priority, policy->is_outbound);
+
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ p = hash_get (im->sa_index_by_sa_id, policy->sa_id);
+ if (!p)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ policy->sa_index = p[0];
+ }
+
+ p = hash_get (im->spd_index_by_spd_id, policy->id);
+
+ if (!p)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ spd_index = p[0];
+ spd = pool_elt_at_index (im->spds, spd_index);
+ if (!spd)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ if (is_add)
+ {
+ u32 policy_index;
+
+ pool_get (spd->policies, vp);
+ clib_memcpy (vp, policy, sizeof (*vp));
+ policy_index = vp - spd->policies;
+
+ if (policy->is_outbound)
+ {
+ if (policy->is_ipv6)
+ {
+ vec_add1 (spd->ipv6_outbound_policies, policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function (spd->ipv6_outbound_policies,
+ ipsec_spd_entry_sort);
+ }
+ else
+ {
+ vec_add1 (spd->ipv4_outbound_policies, policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function (spd->ipv4_outbound_policies,
+ ipsec_spd_entry_sort);
+ }
+ }
+ else
+ {
+ if (policy->is_ipv6)
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ vec_add1 (spd->ipv6_inbound_protect_policy_indices,
+ policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function
+ (spd->ipv6_inbound_protect_policy_indices,
+ ipsec_spd_entry_sort);
+ }
+ else
+ {
+ vec_add1
+ (spd->ipv6_inbound_policy_discard_and_bypass_indices,
+ policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function
+ (spd->ipv6_inbound_policy_discard_and_bypass_indices,
+ ipsec_spd_entry_sort);
+ }
+ }
+ else
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ vec_add1 (spd->ipv4_inbound_protect_policy_indices,
+ policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function
+ (spd->ipv4_inbound_protect_policy_indices,
+ ipsec_spd_entry_sort);
+ }
+ else
+ {
+ vec_add1
+ (spd->ipv4_inbound_policy_discard_and_bypass_indices,
+ policy_index);
+ clib_memcpy (vp, policy, sizeof (ipsec_policy_t));
+ vec_sort_with_function
+ (spd->ipv4_inbound_policy_discard_and_bypass_indices,
+ ipsec_spd_entry_sort);
+ }
+ }
+ }
+
+ }
+ else
+ {
+ u32 i, j;
+ /* *INDENT-OFF* */
+ pool_foreach_index(i, spd->policies, ({
+ vp = pool_elt_at_index(spd->policies, i);
+ if (vp->priority != policy->priority)
+ continue;
+ if (vp->is_outbound != policy->is_outbound)
+ continue;
+ if (vp->policy != policy->policy)
+ continue;
+ if (vp->sa_id != policy->sa_id)
+ continue;
+ if (vp->protocol != policy->protocol)
+ continue;
+ if (vp->lport.start != policy->lport.start)
+ continue;
+ if (vp->lport.stop != policy->lport.stop)
+ continue;
+ if (vp->rport.start != policy->rport.start)
+ continue;
+ if (vp->rport.stop != policy->rport.stop)
+ continue;
+ if (vp->is_ipv6 != policy->is_ipv6)
+ continue;
+ if (policy->is_ipv6)
+ {
+ if (vp->laddr.start.ip6.as_u64[0] != policy->laddr.start.ip6.as_u64[0])
+ continue;
+ if (vp->laddr.start.ip6.as_u64[1] != policy->laddr.start.ip6.as_u64[1])
+ continue;
+ if (vp->laddr.stop.ip6.as_u64[0] != policy->laddr.stop.ip6.as_u64[0])
+ continue;
+ if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1])
+ continue;
+ if (vp->raddr.start.ip6.as_u64[0] != policy->raddr.start.ip6.as_u64[0])
+ continue;
+ if (vp->raddr.start.ip6.as_u64[1] != policy->raddr.start.ip6.as_u64[1])
+ continue;
+ if (vp->raddr.stop.ip6.as_u64[0] != policy->raddr.stop.ip6.as_u64[0])
+ continue;
+ if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1])
+ continue;
+ if (policy->is_outbound)
+ {
+ vec_foreach_index(j, spd->ipv6_outbound_policies) {
+ if (vec_elt(spd->ipv6_outbound_policies, j) == i) {
+ vec_del1 (spd->ipv6_outbound_policies, j);
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ vec_foreach_index(j, spd->ipv6_inbound_protect_policy_indices) {
+ if (vec_elt(spd->ipv6_inbound_protect_policy_indices, j) == i) {
+ vec_del1 (spd->ipv6_inbound_protect_policy_indices, j);
+ break;
+ }
+ }
+ }
+ else
+ {
+ vec_foreach_index(j, spd->ipv6_inbound_policy_discard_and_bypass_indices) {
+ if (vec_elt(spd->ipv6_inbound_policy_discard_and_bypass_indices, j) == i) {
+ vec_del1 (spd->ipv6_inbound_policy_discard_and_bypass_indices, j);
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (vp->laddr.start.ip4.as_u32 != policy->laddr.start.ip4.as_u32)
+ continue;
+ if (vp->laddr.stop.ip4.as_u32 != policy->laddr.stop.ip4.as_u32)
+ continue;
+ if (vp->raddr.start.ip4.as_u32 != policy->raddr.start.ip4.as_u32)
+ continue;
+ if (vp->raddr.stop.ip4.as_u32 != policy->raddr.stop.ip4.as_u32)
+ continue;
+ if (policy->is_outbound)
+ {
+ vec_foreach_index(j, spd->ipv4_outbound_policies) {
+ if (vec_elt(spd->ipv4_outbound_policies, j) == i) {
+ vec_del1 (spd->ipv4_outbound_policies, j);
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ vec_foreach_index(j, spd->ipv4_inbound_protect_policy_indices) {
+ if (vec_elt(spd->ipv4_inbound_protect_policy_indices, j) == i) {
+ vec_del1 (spd->ipv4_inbound_protect_policy_indices, j);
+ break;
+ }
+ }
+ }
+ else
+ {
+ vec_foreach_index(j, spd->ipv4_inbound_policy_discard_and_bypass_indices) {
+ if (vec_elt(spd->ipv4_inbound_policy_discard_and_bypass_indices, j) == i) {
+ vec_del1 (spd->ipv4_inbound_policy_discard_and_bypass_indices, j);
+ break;
+ }
+ }
+ }
+ }
+ pool_put (spd->policies, vp);
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+static u8
+ipsec_is_sa_used (u32 sa_index)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd;
+ ipsec_policy_t *p;
+ ipsec_tunnel_if_t *t;
+
+ /* *INDENT-OFF* */
+ pool_foreach(spd, im->spds, ({
+ pool_foreach(p, spd->policies, ({
+ if (p->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ if (p->sa_index == sa_index)
+ return 1;
+ }
+ }));
+ }));
+
+ pool_foreach(t, im->tunnel_interfaces, ({
+ if (t->input_sa_index == sa_index)
+ return 1;
+ if (t->output_sa_index == sa_index)
+ return 1;
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+int
+ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_t *sa = 0;
+ uword *p;
+ u32 sa_index;
+
+ clib_warning ("id %u spi %u", new_sa->id, new_sa->spi);
+
+ p = hash_get (im->sa_index_by_sa_id, new_sa->id);
+ if (p && is_add)
+ return VNET_API_ERROR_SYSCALL_ERROR_1; /* already exists */
+ if (!p && !is_add)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ if (!is_add) /* delete */
+ {
+ sa_index = p[0];
+ sa = pool_elt_at_index (im->sad, sa_index);
+ if (ipsec_is_sa_used (sa_index))
+ {
+ clib_warning ("sa_id %u used in policy", sa->id);
+ return VNET_API_ERROR_SYSCALL_ERROR_1; /* sa used in policy */
+ }
+ hash_unset (im->sa_index_by_sa_id, sa->id);
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ pool_put (im->sad, sa);
+ }
+ else /* create new SA */
+ {
+ pool_get (im->sad, sa);
+ clib_memcpy (sa, new_sa, sizeof (*sa));
+ sa_index = sa - im->sad;
+ hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ return 0;
+}
+
+int
+ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update)
+{
+ ipsec_main_t *im = &ipsec_main;
+ uword *p;
+ u32 sa_index;
+ ipsec_sa_t *sa = 0;
+
+ p = hash_get (im->sa_index_by_sa_id, sa_update->id);
+ if (!p)
+ return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such sa-id */
+
+ sa_index = p[0];
+ sa = pool_elt_at_index (im->sad, sa_index);
+
+ /* new crypto key */
+ if (0 < sa_update->crypto_key_len)
+ {
+ clib_memcpy (sa->crypto_key, sa_update->crypto_key,
+ sa_update->crypto_key_len);
+ sa->crypto_key_len = sa_update->crypto_key_len;
+ }
+
+ /* new integ key */
+ if (0 < sa_update->integ_key_len)
+ {
+ clib_memcpy (sa->integ_key, sa_update->integ_key,
+ sa_update->integ_key_len);
+ sa->integ_key_len = sa_update->integ_key_len;
+ }
+
+ if (sa->crypto_key_len + sa->integ_key_len > 0)
+ {
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (sa_index, 0) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+
+ return 0;
+}
+
+static void
+ipsec_rand_seed (void)
+{
+ struct
+ {
+ time_t time;
+ pid_t pid;
+ void *p;
+ } seed_data;
+
+ seed_data.time = time (NULL);
+ seed_data.pid = getpid ();
+ seed_data.p = (void *) &seed_data;
+
+ RAND_seed ((const void *) &seed_data, sizeof (seed_data));
+}
+
+static clib_error_t *
+ipsec_check_support (ipsec_sa_t * sa)
+{
+ if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+ return clib_error_return (0, "unsupported aes-gcm-128 crypto-alg");
+ if (sa->integ_alg == IPSEC_INTEG_ALG_NONE)
+ return clib_error_return (0, "unsupported none integ-alg");
+ if (sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)
+ return clib_error_return (0, "unsupported aes-gcm-128 integ-alg");
+
+ return 0;
+}
+
+static clib_error_t *
+ipsec_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ ipsec_main_t *im = &ipsec_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_node_t *node;
+
+ ipsec_rand_seed ();
+
+ memset (im, 0, sizeof (im[0]));
+
+ im->vnet_main = vnet_get_main ();
+ im->vlib_main = vm;
+
+ im->spd_index_by_spd_id = hash_create (0, sizeof (uword));
+ im->sa_index_by_sa_id = hash_create (0, sizeof (uword));
+ im->spd_index_by_sw_if_index = hash_create (0, sizeof (uword));
+
+ vec_validate_aligned (im->empty_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
+ ASSERT (node);
+ im->error_drop_node_index = node->index;
+
+ node = vlib_get_node_by_name (vm, (u8 *) "esp-encrypt");
+ ASSERT (node);
+ im->esp_encrypt_node_index = node->index;
+
+ node = vlib_get_node_by_name (vm, (u8 *) "esp-decrypt");
+ ASSERT (node);
+ im->esp_decrypt_node_index = node->index;
+
+ im->esp_encrypt_next_index = IPSEC_OUTPUT_NEXT_ESP_ENCRYPT;
+ im->esp_decrypt_next_index = IPSEC_INPUT_NEXT_ESP_DECRYPT;
+
+ im->cb.check_support_cb = ipsec_check_support;
+
+ if ((error = vlib_call_init_function (vm, ipsec_cli_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ipsec_tunnel_if_init)))
+ return error;
+
+ esp_init ();
+
+ if ((error = ikev2_init (vm)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ipsec_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
new file mode 100644
index 00000000..1eff1c3a
--- /dev/null
+++ b/src/vnet/ipsec/ipsec.h
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __IPSEC_H__
+#define __IPSEC_H__
+
+#define IPSEC_FLAG_IPSEC_GRE_TUNNEL (1 << 0)
+
+
+#define foreach_ipsec_output_next \
+_(DROP, "error-drop") \
+_(ESP_ENCRYPT, "esp-encrypt")
+
+#define _(v, s) IPSEC_OUTPUT_NEXT_##v,
+typedef enum
+{
+ foreach_ipsec_output_next
+#undef _
+ IPSEC_OUTPUT_N_NEXT,
+} ipsec_output_next_t;
+
+
+#define foreach_ipsec_input_next \
+_(DROP, "error-drop") \
+_(ESP_DECRYPT, "esp-decrypt")
+
+#define _(v, s) IPSEC_INPUT_NEXT_##v,
+typedef enum
+{
+ foreach_ipsec_input_next
+#undef _
+ IPSEC_INPUT_N_NEXT,
+} ipsec_input_next_t;
+
+
+#define foreach_ipsec_policy_action \
+ _(0, BYPASS, "bypass") \
+ _(1, DISCARD, "discard") \
+ _(2, RESOLVE, "resolve") \
+ _(3, PROTECT, "protect")
+
+typedef enum
+{
+#define _(v,f,s) IPSEC_POLICY_ACTION_##f = v,
+ foreach_ipsec_policy_action
+#undef _
+ IPSEC_POLICY_N_ACTION,
+} ipsec_policy_action_t;
+
+#define foreach_ipsec_crypto_alg \
+ _(0, NONE, "none") \
+ _(1, AES_CBC_128, "aes-cbc-128") \
+ _(2, AES_CBC_192, "aes-cbc-192") \
+ _(3, AES_CBC_256, "aes-cbc-256") \
+ _(4, AES_GCM_128, "aes-gcm-128")
+
+typedef enum
+{
+#define _(v,f,s) IPSEC_CRYPTO_ALG_##f = v,
+ foreach_ipsec_crypto_alg
+#undef _
+ IPSEC_CRYPTO_N_ALG,
+} ipsec_crypto_alg_t;
+
+#define foreach_ipsec_integ_alg \
+ _(0, NONE, "none") \
+ _(1, MD5_96, "md5-96") /* RFC2403 */ \
+ _(2, SHA1_96, "sha1-96") /* RFC2404 */ \
+ _(3, SHA_256_96, "sha-256-96") /* draft-ietf-ipsec-ciph-sha-256-00 */ \
+ _(4, SHA_256_128, "sha-256-128") /* RFC4868 */ \
+ _(5, SHA_384_192, "sha-384-192") /* RFC4868 */ \
+ _(6, SHA_512_256, "sha-512-256") /* RFC4868 */ \
+ _(7, AES_GCM_128, "aes-gcm-128") /* RFC4106 */
+
+typedef enum
+{
+#define _(v,f,s) IPSEC_INTEG_ALG_##f = v,
+ foreach_ipsec_integ_alg
+#undef _
+ IPSEC_INTEG_N_ALG,
+} ipsec_integ_alg_t;
+
+typedef enum
+{
+ IPSEC_PROTOCOL_AH = 0,
+ IPSEC_PROTOCOL_ESP = 1
+} ipsec_protocol_t;
+
+typedef struct
+{
+ u32 id;
+ u32 spi;
+ ipsec_protocol_t protocol;
+
+ ipsec_crypto_alg_t crypto_alg;
+ u8 crypto_key_len;
+ u8 crypto_key[128];
+
+ ipsec_integ_alg_t integ_alg;
+ u8 integ_key_len;
+ u8 integ_key[128];
+
+ u8 use_esn;
+ u8 use_anti_replay;
+
+ u8 is_tunnel;
+ u8 is_tunnel_ip6;
+ ip46_address_t tunnel_src_addr;
+ ip46_address_t tunnel_dst_addr;
+
+ u32 salt;
+
+ /* runtime */
+ u32 seq;
+ u32 seq_hi;
+ u32 last_seq;
+ u32 last_seq_hi;
+ u64 replay_window;
+
+ /*lifetime data */
+ u64 total_data_size;
+} ipsec_sa_t;
+
+typedef struct
+{
+ ip46_address_t start, stop;
+} ip46_address_range_t;
+
+typedef struct
+{
+ u16 start, stop;
+} port_range_t;
+
+typedef struct
+{
+ u8 is_add;
+ u8 esn;
+ u8 anti_replay;
+ ip4_address_t local_ip, remote_ip;
+ u32 local_spi;
+ u32 remote_spi;
+ ipsec_crypto_alg_t crypto_alg;
+ u8 local_crypto_key_len;
+ u8 local_crypto_key[128];
+ u8 remote_crypto_key_len;
+ u8 remote_crypto_key[128];
+ ipsec_integ_alg_t integ_alg;
+ u8 local_integ_key_len;
+ u8 local_integ_key[128];
+ u8 remote_integ_key_len;
+ u8 remote_integ_key[128];
+} ipsec_add_del_tunnel_args_t;
+
+typedef struct
+{
+ u8 is_add;
+ u32 local_sa_id;
+ u32 remote_sa_id;
+ ip4_address_t local_ip;
+ ip4_address_t remote_ip;
+} ipsec_add_del_ipsec_gre_tunnel_args_t;
+
+typedef enum
+{
+ IPSEC_IF_SET_KEY_TYPE_NONE,
+ IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO,
+ IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO,
+ IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG,
+ IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG,
+} ipsec_if_set_key_type_t;
+
+typedef struct
+{
+ u32 id;
+ i32 priority;
+ u8 is_outbound;
+
+ // Selector
+ u8 is_ipv6;
+ ip46_address_range_t laddr;
+ ip46_address_range_t raddr;
+ u8 protocol;
+ port_range_t lport;
+ port_range_t rport;
+
+ // Policy
+ u8 policy;
+ u32 sa_id;
+ u32 sa_index;
+
+ // Counter
+ vlib_counter_t counter;
+} ipsec_policy_t;
+
+typedef struct
+{
+ u32 id;
+ /* pool of policies */
+ ipsec_policy_t *policies;
+ /* vectors of policy indices */
+ u32 *ipv4_outbound_policies;
+ u32 *ipv6_outbound_policies;
+ u32 *ipv4_inbound_protect_policy_indices;
+ u32 *ipv4_inbound_policy_discard_and_bypass_indices;
+ u32 *ipv6_inbound_protect_policy_indices;
+ u32 *ipv6_inbound_policy_discard_and_bypass_indices;
+} ipsec_spd_t;
+
+typedef struct
+{
+ u32 spd_index;
+} ip4_ipsec_config_t;
+
+typedef struct
+{
+ u32 spd_index;
+} ip6_ipsec_config_t;
+
+typedef struct
+{
+ u32 input_sa_index;
+ u32 output_sa_index;
+ u32 hw_if_index;
+} ipsec_tunnel_if_t;
+
+typedef struct
+{
+ i32 (*add_del_sa_sess_cb) (u32 sa_index, u8 is_add);
+ clib_error_t *(*check_support_cb) (ipsec_sa_t * sa);
+} ipsec_main_callbacks_t;
+
+typedef struct
+{
+ /* pool of tunnel instances */
+ ipsec_spd_t *spds;
+ ipsec_sa_t *sad;
+
+ /* pool of tunnel interfaces */
+ ipsec_tunnel_if_t *tunnel_interfaces;
+ u32 *free_tunnel_if_indices;
+
+ u32 **empty_buffers;
+
+ uword *tunnel_index_by_key;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* next node indices */
+ u32 feature_next_node_index[32];
+
+ /* hashes */
+ uword *spd_index_by_spd_id;
+ uword *spd_index_by_sw_if_index;
+ uword *sa_index_by_sa_id;
+ uword *ipsec_if_pool_index_by_key;
+
+ /* node indeces */
+ u32 error_drop_node_index;
+ u32 esp_encrypt_node_index;
+ u32 esp_decrypt_node_index;
+ /* next node indeces */
+ u32 esp_encrypt_next_index;
+ u32 esp_decrypt_next_index;
+
+ /* callbacks */
+ ipsec_main_callbacks_t cb;
+} ipsec_main_t;
+
+ipsec_main_t ipsec_main;
+
+extern vlib_node_registration_t esp_encrypt_node;
+extern vlib_node_registration_t esp_decrypt_node;
+extern vlib_node_registration_t ipsec_if_output_node;
+extern vlib_node_registration_t ipsec_if_input_node;
+
+
+/*
+ * functions
+ */
+int ipsec_set_interface_spd (vlib_main_t * vm, u32 sw_if_index, u32 spd_id,
+ int is_add);
+int ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add);
+int ipsec_add_del_policy (vlib_main_t * vm, ipsec_policy_t * policy,
+ int is_add);
+int ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add);
+int ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update);
+
+u32 ipsec_get_sa_index_by_sa_id (u32 sa_id);
+u8 *format_ipsec_if_output_trace (u8 * s, va_list * args);
+u8 *format_ipsec_policy_action (u8 * s, va_list * args);
+u8 *format_ipsec_crypto_alg (u8 * s, va_list * args);
+u8 *format_ipsec_integ_alg (u8 * s, va_list * args);
+u8 *format_ipsec_replay_window (u8 * s, va_list * args);
+uword unformat_ipsec_policy_action (unformat_input_t * input, va_list * args);
+uword unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args);
+uword unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args);
+
+int ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
+ ipsec_add_del_tunnel_args_t * args,
+ u32 * sw_if_index);
+int ipsec_add_del_tunnel_if (ipsec_add_del_tunnel_args_t * args);
+int ipsec_add_del_ipsec_gre_tunnel (vnet_main_t * vnm,
+ ipsec_add_del_ipsec_gre_tunnel_args_t *
+ args);
+int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
+ ipsec_if_set_key_type_t type, u8 alg, u8 * key);
+
+
+/*
+ * inline functions
+ */
+
+always_inline void
+ipsec_alloc_empty_buffers (vlib_main_t * vm, ipsec_main_t * im)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ uword l = vec_len (im->empty_buffers[thread_index]);
+ uword n_alloc = 0;
+
+ if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
+ {
+ if (!im->empty_buffers[thread_index])
+ {
+ vec_alloc (im->empty_buffers[thread_index], 2 * VLIB_FRAME_SIZE);
+ }
+
+ n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[thread_index] + l,
+ 2 * VLIB_FRAME_SIZE - l);
+
+ _vec_len (im->empty_buffers[thread_index]) = l + n_alloc;
+ }
+}
+
+static_always_inline u32
+get_next_output_feature_node_index (vlib_buffer_t * b,
+ vlib_node_runtime_t * nr)
+{
+ u32 next;
+ u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *node = vlib_get_node (vm, nr->node_index);
+
+ vnet_feature_next (sw_if_index, &next, b);
+ return node->next_nodes[next];
+}
+
+#endif /* __IPSEC_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
new file mode 100644
index 00000000..3a5b89fe
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -0,0 +1,757 @@
+/*
+ *------------------------------------------------------------------
+ * ipsec_api.c - ipsec api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#if WITH_LIBSSL > 0
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ikev2.h>
+#endif /* IPSEC */
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(IPSEC_SPD_ADD_DEL, ipsec_spd_add_del) \
+_(IPSEC_INTERFACE_ADD_DEL_SPD, ipsec_interface_add_del_spd) \
+_(IPSEC_SPD_ADD_DEL_ENTRY, ipsec_spd_add_del_entry) \
+_(IPSEC_SAD_ADD_DEL_ENTRY, ipsec_sad_add_del_entry) \
+_(IPSEC_SA_SET_KEY, ipsec_sa_set_key) \
+_(IPSEC_SPD_DUMP, ipsec_spd_dump) \
+_(IPSEC_TUNNEL_IF_ADD_DEL, ipsec_tunnel_if_add_del) \
+_(IKEV2_PROFILE_ADD_DEL, ikev2_profile_add_del) \
+_(IKEV2_PROFILE_SET_AUTH, ikev2_profile_set_auth) \
+_(IKEV2_PROFILE_SET_ID, ikev2_profile_set_id) \
+_(IKEV2_PROFILE_SET_TS, ikev2_profile_set_ts) \
+_(IKEV2_SET_LOCAL_KEY, ikev2_set_local_key) \
+_(IKEV2_SET_RESPONDER, ikev2_set_responder) \
+_(IKEV2_SET_IKE_TRANSFORMS, ikev2_set_ike_transforms) \
+_(IKEV2_SET_ESP_TRANSFORMS, ikev2_set_esp_transforms) \
+_(IKEV2_SET_SA_LIFETIME, ikev2_set_sa_lifetime) \
+_(IKEV2_INITIATE_SA_INIT, ikev2_initiate_sa_init) \
+_(IKEV2_INITIATE_DEL_IKE_SA, ikev2_initiate_del_ike_sa) \
+_(IKEV2_INITIATE_DEL_CHILD_SA, ikev2_initiate_del_child_sa) \
+_(IKEV2_INITIATE_REKEY_CHILD_SA, ikev2_initiate_rekey_child_sa)
+
+static void vl_api_ipsec_spd_add_del_t_handler
+ (vl_api_ipsec_spd_add_del_t * mp)
+{
+#if WITH_LIBSSL == 0
+ clib_warning ("unimplemented");
+#else
+
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_spd_add_del_reply_t *rmp;
+ int rv;
+
+ rv = ipsec_add_del_spd (vm, ntohl (mp->spd_id), mp->is_add);
+
+ REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_REPLY);
+#endif
+}
+
+static void vl_api_ipsec_interface_add_del_spd_t_handler
+ (vl_api_ipsec_interface_add_del_spd_t * mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_interface_add_del_spd_reply_t *rmp;
+ int rv;
+ u32 sw_if_index __attribute__ ((unused));
+ u32 spd_id __attribute__ ((unused));
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ spd_id = ntohl (mp->spd_id);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+#if WITH_LIBSSL > 0
+ rv = ipsec_set_interface_spd (vm, sw_if_index, spd_id, mp->is_add);
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_IPSEC_INTERFACE_ADD_DEL_SPD_REPLY);
+}
+
+static void vl_api_ipsec_spd_add_del_entry_t_handler
+ (vl_api_ipsec_spd_add_del_entry_t * mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_spd_add_del_entry_reply_t *rmp;
+ int rv;
+
+#if WITH_LIBSSL > 0
+ ipsec_policy_t p;
+
+ memset (&p, 0, sizeof (p));
+
+ p.id = ntohl (mp->spd_id);
+ p.priority = ntohl (mp->priority);
+ p.is_outbound = mp->is_outbound;
+ p.is_ipv6 = mp->is_ipv6;
+
+ if (mp->is_ipv6 || mp->is_ip_any)
+ {
+ clib_memcpy (&p.raddr.start, mp->remote_address_start, 16);
+ clib_memcpy (&p.raddr.stop, mp->remote_address_stop, 16);
+ clib_memcpy (&p.laddr.start, mp->local_address_start, 16);
+ clib_memcpy (&p.laddr.stop, mp->local_address_stop, 16);
+ }
+ else
+ {
+ clib_memcpy (&p.raddr.start.ip4.data, mp->remote_address_start, 4);
+ clib_memcpy (&p.raddr.stop.ip4.data, mp->remote_address_stop, 4);
+ clib_memcpy (&p.laddr.start.ip4.data, mp->local_address_start, 4);
+ clib_memcpy (&p.laddr.stop.ip4.data, mp->local_address_stop, 4);
+ }
+ p.protocol = mp->protocol;
+ p.rport.start = ntohs (mp->remote_port_start);
+ p.rport.stop = ntohs (mp->remote_port_stop);
+ p.lport.start = ntohs (mp->local_port_start);
+ p.lport.stop = ntohs (mp->local_port_stop);
+ /* policy action resolve unsupported */
+ if (mp->policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+ p.policy = mp->policy;
+ p.sa_id = ntohl (mp->sa_id);
+
+ rv = ipsec_add_del_policy (vm, &p, mp->is_add);
+ if (rv)
+ goto out;
+
+ if (mp->is_ip_any)
+ {
+ p.is_ipv6 = 1;
+ rv = ipsec_add_del_policy (vm, &p, mp->is_add);
+ }
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+#endif
+
+out:
+ REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_ENTRY_REPLY);
+}
+
+static void vl_api_ipsec_sad_add_del_entry_t_handler
+ (vl_api_ipsec_sad_add_del_entry_t * mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_sad_add_del_entry_reply_t *rmp;
+ int rv;
+#if WITH_LIBSSL > 0
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_t sa;
+
+ memset (&sa, 0, sizeof (sa));
+
+ sa.id = ntohl (mp->sad_id);
+ sa.spi = ntohl (mp->spi);
+ /* security protocol AH unsupported */
+ if (mp->protocol == IPSEC_PROTOCOL_AH)
+ {
+ clib_warning ("unsupported security protocol 'AH'");
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+ sa.protocol = mp->protocol;
+ /* check for unsupported crypto-alg */
+ if (mp->crypto_algorithm < IPSEC_CRYPTO_ALG_AES_CBC_128 ||
+ mp->crypto_algorithm >= IPSEC_CRYPTO_N_ALG)
+ {
+ clib_warning ("unsupported crypto-alg: '%U'", format_ipsec_crypto_alg,
+ mp->crypto_algorithm);
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+ sa.crypto_alg = mp->crypto_algorithm;
+ sa.crypto_key_len = mp->crypto_key_length;
+ clib_memcpy (&sa.crypto_key, mp->crypto_key, sizeof (sa.crypto_key));
+ /* check for unsupported integ-alg */
+ if (mp->integrity_algorithm >= IPSEC_INTEG_N_ALG)
+ {
+ clib_warning ("unsupported integ-alg: '%U'", format_ipsec_integ_alg,
+ mp->integrity_algorithm);
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+
+ sa.integ_alg = mp->integrity_algorithm;
+ sa.integ_key_len = mp->integrity_key_length;
+ clib_memcpy (&sa.integ_key, mp->integrity_key, sizeof (sa.integ_key));
+ sa.use_esn = mp->use_extended_sequence_number;
+ sa.is_tunnel = mp->is_tunnel;
+ sa.is_tunnel_ip6 = mp->is_tunnel_ipv6;
+ if (sa.is_tunnel_ip6)
+ {
+ clib_memcpy (&sa.tunnel_src_addr, mp->tunnel_src_address, 16);
+ clib_memcpy (&sa.tunnel_dst_addr, mp->tunnel_dst_address, 16);
+ }
+ else
+ {
+ clib_memcpy (&sa.tunnel_src_addr.ip4.data, mp->tunnel_src_address, 4);
+ clib_memcpy (&sa.tunnel_dst_addr.ip4.data, mp->tunnel_dst_address, 4);
+ }
+
+ ASSERT (im->cb.check_support_cb);
+ clib_error_t *err = im->cb.check_support_cb (&sa);
+ if (err)
+ {
+ clib_warning ("%s", err->what);
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+
+ rv = ipsec_add_del_sa (vm, &sa, mp->is_add);
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+#endif
+
+out:
+ REPLY_MACRO (VL_API_IPSEC_SAD_ADD_DEL_ENTRY_REPLY);
+}
+
+static void
+send_ipsec_spd_details (ipsec_policy_t * p, unix_shared_memory_queue_t * q,
+ u32 context)
+{
+ vl_api_ipsec_spd_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPD_DETAILS);
+ mp->context = context;
+
+ mp->spd_id = htonl (p->id);
+ mp->priority = htonl (p->priority);
+ mp->is_outbound = p->is_outbound;
+ mp->is_ipv6 = p->is_ipv6;
+ if (p->is_ipv6)
+ {
+ memcpy (mp->local_start_addr, &p->laddr.start.ip6, 16);
+ memcpy (mp->local_stop_addr, &p->laddr.stop.ip6, 16);
+ memcpy (mp->remote_start_addr, &p->raddr.start.ip6, 16);
+ memcpy (mp->remote_stop_addr, &p->raddr.stop.ip6, 16);
+ }
+ else
+ {
+ memcpy (mp->local_start_addr, &p->laddr.start.ip4, 4);
+ memcpy (mp->local_stop_addr, &p->laddr.stop.ip4, 4);
+ memcpy (mp->remote_start_addr, &p->raddr.start.ip4, 4);
+ memcpy (mp->remote_stop_addr, &p->raddr.stop.ip4, 4);
+ }
+ mp->local_start_port = htons (p->lport.start);
+ mp->local_stop_port = htons (p->lport.stop);
+ mp->remote_start_port = htons (p->rport.start);
+ mp->remote_stop_port = htons (p->rport.stop);
+ mp->protocol = p->protocol;
+ mp->policy = p->policy;
+ mp->sa_id = htonl (p->sa_id);
+ mp->bytes = clib_host_to_net_u64 (p->counter.bytes);
+ mp->packets = clib_host_to_net_u64 (p->counter.packets);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *policy;
+ ipsec_spd_t *spd;
+ uword *p;
+ u32 spd_index;
+#if WITH_LIBSSL > 0
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ p = hash_get (im->spd_index_by_spd_id, ntohl (mp->spd_id));
+ if (!p)
+ return;
+
+ spd_index = p[0];
+ spd = pool_elt_at_index (im->spds, spd_index);
+
+ /* *INDENT-OFF* */
+ pool_foreach (policy, spd->policies,
+ ({
+ if (mp->sa_id == ~(0) || ntohl (mp->sa_id) == policy->sa_id)
+ send_ipsec_spd_details (policy, q,
+ mp->context);}
+ ));
+ /* *INDENT-ON* */
+#else
+ clib_warning ("unimplemented");
+#endif
+}
+
+static void
+vl_api_ipsec_sa_set_key_t_handler (vl_api_ipsec_sa_set_key_t * mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_sa_set_key_reply_t *rmp;
+ int rv;
+#if WITH_LIBSSL > 0
+ ipsec_sa_t sa;
+ sa.id = ntohl (mp->sa_id);
+ sa.crypto_key_len = mp->crypto_key_length;
+ clib_memcpy (&sa.crypto_key, mp->crypto_key, sizeof (sa.crypto_key));
+ sa.integ_key_len = mp->integrity_key_length;
+ clib_memcpy (&sa.integ_key, mp->integrity_key, sizeof (sa.integ_key));
+
+ rv = ipsec_set_sa_key (vm, &sa);
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IPSEC_SA_SET_KEY_REPLY);
+}
+
+static void
+vl_api_ipsec_tunnel_if_add_del_t_handler (vl_api_ipsec_tunnel_if_add_del_t *
+ mp)
+{
+ vl_api_ipsec_tunnel_if_add_del_reply_t *rmp;
+ ipsec_main_t *im = &ipsec_main;
+ vnet_main_t *vnm = im->vnet_main;
+ u32 sw_if_index = ~0;
+ int rv;
+
+#if WITH_LIBSSL > 0
+ ipsec_add_del_tunnel_args_t tun;
+
+ memset (&tun, 0, sizeof (ipsec_add_del_tunnel_args_t));
+
+ tun.is_add = mp->is_add;
+ tun.esn = mp->esn;
+ tun.anti_replay = mp->anti_replay;
+ tun.local_spi = ntohl (mp->local_spi);
+ tun.remote_spi = ntohl (mp->remote_spi);
+ tun.crypto_alg = mp->crypto_alg;
+ tun.local_crypto_key_len = mp->local_crypto_key_len;
+ tun.remote_crypto_key_len = mp->remote_crypto_key_len;
+ tun.integ_alg = mp->integ_alg;
+ tun.local_integ_key_len = mp->local_integ_key_len;
+ tun.remote_integ_key_len = mp->remote_integ_key_len;
+ memcpy (&tun.local_ip, mp->local_ip, 4);
+ memcpy (&tun.remote_ip, mp->remote_ip, 4);
+ memcpy (&tun.local_crypto_key, &mp->local_crypto_key,
+ mp->local_crypto_key_len);
+ memcpy (&tun.remote_crypto_key, &mp->remote_crypto_key,
+ mp->remote_crypto_key_len);
+ memcpy (&tun.local_integ_key, &mp->local_integ_key,
+ mp->local_integ_key_len);
+ memcpy (&tun.remote_integ_key, &mp->remote_integ_key,
+ mp->remote_integ_key_len);
+
+ rv = ipsec_add_del_tunnel_if_internal (vnm, &tun, &sw_if_index);
+
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO2 (VL_API_IPSEC_TUNNEL_IF_ADD_DEL_REPLY, (
+ {
+ rmp->sw_if_index =
+ htonl (sw_if_index);
+ }));
+}
+
+
+static void
+vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp)
+{
+ vl_api_ikev2_profile_add_del_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u8 *tmp = format (0, "%s", mp->name);
+ error = ikev2_add_del_profile (vm, tmp, mp->is_add);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_PROFILE_ADD_DEL_REPLY);
+}
+
+static void
+ vl_api_ikev2_profile_set_auth_t_handler
+ (vl_api_ikev2_profile_set_auth_t * mp)
+{
+ vl_api_ikev2_profile_set_auth_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u8 *tmp = format (0, "%s", mp->name);
+ u8 *data = vec_new (u8, mp->data_len);
+ clib_memcpy (data, mp->data, mp->data_len);
+ error = ikev2_set_profile_auth (vm, tmp, mp->auth_method, data, mp->is_hex);
+ vec_free (tmp);
+ vec_free (data);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_AUTH_REPLY);
+}
+
+static void
+vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp)
+{
+ vl_api_ikev2_profile_add_del_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u8 *tmp = format (0, "%s", mp->name);
+ u8 *data = vec_new (u8, mp->data_len);
+ clib_memcpy (data, mp->data, mp->data_len);
+ error = ikev2_set_profile_id (vm, tmp, mp->id_type, data, mp->is_local);
+ vec_free (tmp);
+ vec_free (data);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_ID_REPLY);
+}
+
+static void
+vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp)
+{
+ vl_api_ikev2_profile_set_ts_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u8 *tmp = format (0, "%s", mp->name);
+ error = ikev2_set_profile_ts (vm, tmp, mp->proto, mp->start_port,
+ mp->end_port, (ip4_address_t) mp->start_addr,
+ (ip4_address_t) mp->end_addr, mp->is_local);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_TS_REPLY);
+}
+
+static void
+vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp)
+{
+ vl_api_ikev2_profile_set_ts_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ error = ikev2_set_local_key (vm, mp->key_file);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_SET_LOCAL_KEY_REPLY);
+}
+
+static void
+vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp)
+{
+ vl_api_ikev2_set_responder_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ u8 *tmp = format (0, "%s", mp->name);
+ ip4_address_t ip4;
+ clib_memcpy (&ip4, mp->address, sizeof (ip4));
+
+ error = ikev2_set_profile_responder (vm, tmp, mp->sw_if_index, ip4);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_REPLY);
+}
+
+static void
+vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t *
+ mp)
+{
+ vl_api_ikev2_set_ike_transforms_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ u8 *tmp = format (0, "%s", mp->name);
+
+ error =
+ ikev2_set_profile_ike_transforms (vm, tmp, mp->crypto_alg, mp->integ_alg,
+ mp->dh_group, mp->crypto_key_size);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_SET_IKE_TRANSFORMS_REPLY);
+}
+
+static void
+vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t *
+ mp)
+{
+ vl_api_ikev2_set_esp_transforms_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ u8 *tmp = format (0, "%s", mp->name);
+
+ error =
+ ikev2_set_profile_esp_transforms (vm, tmp, mp->crypto_alg, mp->integ_alg,
+ mp->dh_group, mp->crypto_key_size);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_SET_ESP_TRANSFORMS_REPLY);
+}
+
+static void
+vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp)
+{
+ vl_api_ikev2_set_sa_lifetime_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ u8 *tmp = format (0, "%s", mp->name);
+
+ error =
+ ikev2_set_profile_sa_lifetime (vm, tmp, mp->lifetime, mp->lifetime_jitter,
+ mp->handover, mp->lifetime_maxdata);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_SET_SA_LIFETIME_REPLY);
+}
+
+static void
+vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp)
+{
+ vl_api_ikev2_initiate_sa_init_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ u8 *tmp = format (0, "%s", mp->name);
+
+ error = ikev2_initiate_sa_init (vm, tmp);
+ vec_free (tmp);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_INITIATE_SA_INIT_REPLY);
+}
+
+static void
+vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t
+ * mp)
+{
+ vl_api_ikev2_initiate_del_ike_sa_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ error = ikev2_initiate_delete_ike_sa (vm, mp->ispi);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_IKE_SA_REPLY);
+}
+
+static void
+ vl_api_ikev2_initiate_del_child_sa_t_handler
+ (vl_api_ikev2_initiate_del_child_sa_t * mp)
+{
+ vl_api_ikev2_initiate_del_child_sa_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ error = ikev2_initiate_delete_child_sa (vm, mp->ispi);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_CHILD_SA_REPLY);
+}
+
+static void
+ vl_api_ikev2_initiate_rekey_child_sa_t_handler
+ (vl_api_ikev2_initiate_rekey_child_sa_t * mp)
+{
+ vl_api_ikev2_initiate_rekey_child_sa_reply_t *rmp;
+ int rv = 0;
+
+#if WITH_LIBSSL > 0
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+
+ error = ikev2_initiate_rekey_child_sa (vm, mp->ispi);
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+#else
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+#endif
+
+ REPLY_MACRO (VL_API_IKEV2_INITIATE_REKEY_CHILD_SA_REPLY);
+}
+
+/*
+ * ipsec_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_ipsec;
+#undef _
+}
+
+static clib_error_t *
+ipsec_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ipsec_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
new file mode 100644
index 00000000..0e034402
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -0,0 +1,863 @@
+/*
+ * decap.c : IPSec tunnel support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#include <vnet/ipsec/ipsec.h>
+
+static clib_error_t *
+set_interface_spd_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_main_t *im = &ipsec_main;
+ u32 sw_if_index = (u32) ~ 0;
+ u32 spd_id;
+ int is_add = 1;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (unformat
+ (line_input, "%U %u", unformat_vnet_sw_interface, im->vnet_main,
+ &sw_if_index, &spd_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ ipsec_set_interface_spd (vm, sw_if_index, spd_id, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_spd_command, static) = {
+ .path = "set interface ipsec spd",
+ .short_help =
+ "set interface ipsec spd <int> <id>",
+ .function = set_interface_spd_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_sa_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ipsec_main_t *im = &ipsec_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_sa_t sa;
+ int is_add = ~0;
+ u8 *ck = 0, *ik = 0;
+ clib_error_t *error = NULL;
+
+ memset (&sa, 0, sizeof (sa));
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add %u", &sa.id))
+ is_add = 1;
+ else if (unformat (line_input, "del %u", &sa.id))
+ is_add = 0;
+ else if (unformat (line_input, "spi %u", &sa.spi))
+ ;
+ else if (unformat (line_input, "esp"))
+ sa.protocol = IPSEC_PROTOCOL_ESP;
+ else if (unformat (line_input, "ah"))
+ {
+ //sa.protocol = IPSEC_PROTOCOL_AH;
+ error = clib_error_return (0, "unsupported security protocol 'AH'");
+ goto done;
+ }
+ else
+ if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck))
+ sa.crypto_key_len = vec_len (ck);
+ else
+ if (unformat
+ (line_input, "crypto-alg %U", unformat_ipsec_crypto_alg,
+ &sa.crypto_alg))
+ {
+ if (sa.crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 ||
+ sa.crypto_alg >= IPSEC_CRYPTO_N_ALG)
+ {
+ error = clib_error_return (0, "unsupported crypto-alg: '%U'",
+ format_ipsec_crypto_alg,
+ sa.crypto_alg);
+ goto done;
+ }
+ }
+ else
+ if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik))
+ sa.integ_key_len = vec_len (ik);
+ else if (unformat (line_input, "integ-alg %U", unformat_ipsec_integ_alg,
+ &sa.integ_alg))
+ {
+ if (sa.integ_alg < IPSEC_INTEG_ALG_SHA1_96 ||
+ sa.integ_alg >= IPSEC_INTEG_N_ALG)
+ {
+ error = clib_error_return (0, "unsupported integ-alg: '%U'",
+ format_ipsec_integ_alg,
+ sa.integ_alg);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "tunnel-src %U",
+ unformat_ip4_address, &sa.tunnel_src_addr.ip4))
+ sa.is_tunnel = 1;
+ else if (unformat (line_input, "tunnel-dst %U",
+ unformat_ip4_address, &sa.tunnel_dst_addr.ip4))
+ sa.is_tunnel = 1;
+ else if (unformat (line_input, "tunnel-src %U",
+ unformat_ip6_address, &sa.tunnel_src_addr.ip6))
+ {
+ sa.is_tunnel = 1;
+ sa.is_tunnel_ip6 = 1;
+ }
+ else if (unformat (line_input, "tunnel-dst %U",
+ unformat_ip6_address, &sa.tunnel_dst_addr.ip6))
+ {
+ sa.is_tunnel = 1;
+ sa.is_tunnel_ip6 = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (sa.crypto_key_len > sizeof (sa.crypto_key))
+ sa.crypto_key_len = sizeof (sa.crypto_key);
+
+ if (sa.integ_key_len > sizeof (sa.integ_key))
+ sa.integ_key_len = sizeof (sa.integ_key);
+
+ if (ck)
+ strncpy ((char *) sa.crypto_key, (char *) ck, sa.crypto_key_len);
+
+ if (ik)
+ strncpy ((char *) sa.integ_key, (char *) ik, sa.integ_key_len);
+
+ if (is_add)
+ {
+ ASSERT (im->cb.check_support_cb);
+ error = im->cb.check_support_cb (&sa);
+ if (error)
+ goto done;
+ }
+
+ ipsec_add_del_sa (vm, &sa, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = {
+ .path = "ipsec sa",
+ .short_help =
+ "ipsec sa [add|del]",
+ .function = ipsec_sa_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_spd_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 spd_id = ~0;
+ int is_add = ~0;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "%u", &spd_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (spd_id == ~0)
+ {
+ error = clib_error_return (0, "please specify SPD ID");
+ goto done;
+ }
+
+ ipsec_add_del_spd (vm, spd_id, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = {
+ .path = "ipsec spd",
+ .short_help =
+ "ipsec spd [add|del] <id>",
+ .function = ipsec_spd_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ipsec_policy_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_policy_t p;
+ int is_add = 0;
+ int is_ip_any = 1;
+ u32 tmp, tmp2;
+ clib_error_t *error = NULL;
+
+ memset (&p, 0, sizeof (p));
+ p.lport.stop = p.rport.stop = ~0;
+ p.laddr.stop.ip4.as_u32 = p.raddr.stop.ip4.as_u32 = (u32) ~ 0;
+ p.laddr.stop.ip6.as_u64[0] = p.laddr.stop.ip6.as_u64[1] = (u64) ~ 0;
+ p.raddr.stop.ip6.as_u64[0] = p.raddr.stop.ip6.as_u64[1] = (u64) ~ 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "spd %u", &p.id))
+ ;
+ else if (unformat (line_input, "inbound"))
+ p.is_outbound = 0;
+ else if (unformat (line_input, "outbound"))
+ p.is_outbound = 1;
+ else if (unformat (line_input, "priority %d", &p.priority))
+ ;
+ else if (unformat (line_input, "protocol %u", &tmp))
+ p.protocol = (u8) tmp;
+ else
+ if (unformat
+ (line_input, "action %U", unformat_ipsec_policy_action,
+ &p.policy))
+ {
+ if (p.policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ error = clib_error_return (0, "unsupported action: 'resolve'");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "sa %u", &p.sa_id))
+ ;
+ else if (unformat (line_input, "local-ip-range %U - %U",
+ unformat_ip4_address, &p.laddr.start.ip4,
+ unformat_ip4_address, &p.laddr.stop.ip4))
+ is_ip_any = 0;
+ else if (unformat (line_input, "remote-ip-range %U - %U",
+ unformat_ip4_address, &p.raddr.start.ip4,
+ unformat_ip4_address, &p.raddr.stop.ip4))
+ is_ip_any = 0;
+ else if (unformat (line_input, "local-ip-range %U - %U",
+ unformat_ip6_address, &p.laddr.start.ip6,
+ unformat_ip6_address, &p.laddr.stop.ip6))
+ {
+ p.is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else if (unformat (line_input, "remote-ip-range %U - %U",
+ unformat_ip6_address, &p.raddr.start.ip6,
+ unformat_ip6_address, &p.raddr.stop.ip6))
+ {
+ p.is_ipv6 = 1;
+ is_ip_any = 0;
+ }
+ else if (unformat (line_input, "local-port-range %u - %u", &tmp, &tmp2))
+ {
+ p.lport.start = tmp;
+ p.lport.stop = tmp2;
+ }
+ else
+ if (unformat (line_input, "remote-port-range %u - %u", &tmp, &tmp2))
+ {
+ p.rport.start = tmp;
+ p.rport.stop = tmp2;
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ ipsec_add_del_policy (vm, &p, is_add);
+ if (is_ip_any)
+ {
+ p.is_ipv6 = 1;
+ ipsec_add_del_policy (vm, &p, is_add);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = {
+ .path = "ipsec policy",
+ .short_help =
+ "ipsec policy [add|del] spd <id> priority <n> ",
+ .function = ipsec_policy_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ipsec_sa_key_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_sa_t sa;
+ u8 *ck = 0, *ik = 0;
+ clib_error_t *error = NULL;
+
+ memset (&sa, 0, sizeof (sa));
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%u", &sa.id))
+ ;
+ else
+ if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck))
+ sa.crypto_key_len = vec_len (ck);
+ else
+ if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik))
+ sa.integ_key_len = vec_len (ik);
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (sa.crypto_key_len > sizeof (sa.crypto_key))
+ sa.crypto_key_len = sizeof (sa.crypto_key);
+
+ if (sa.integ_key_len > sizeof (sa.integ_key))
+ sa.integ_key_len = sizeof (sa.integ_key);
+
+ if (ck)
+ strncpy ((char *) sa.crypto_key, (char *) ck, sa.crypto_key_len);
+
+ if (ik)
+ strncpy ((char *) sa.integ_key, (char *) ik, sa.integ_key_len);
+
+ ipsec_set_sa_key (vm, &sa);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ipsec_sa_key_command, static) = {
+ .path = "set ipsec sa",
+ .short_help =
+ "set ipsec sa <id> crypto-key <key> integ-key <key>",
+ .function = set_ipsec_sa_key_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ipsec_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ipsec_spd_t *spd;
+ ipsec_sa_t *sa;
+ ipsec_policy_t *p;
+ ipsec_main_t *im = &ipsec_main;
+ u32 *i;
+ ipsec_tunnel_if_t *t;
+ vnet_hw_interface_t *hi;
+
+ /* *INDENT-OFF* */
+ pool_foreach (sa, im->sad, ({
+ if (sa->id) {
+ vlib_cli_output(vm, "sa %u spi %u mode %s protocol %s", sa->id, sa->spi,
+ sa->is_tunnel ? "tunnel" : "transport",
+ sa->protocol ? "esp" : "ah");
+ if (sa->protocol == IPSEC_PROTOCOL_ESP) {
+ vlib_cli_output(vm, " crypto alg %U%s%U integrity alg %U%s%U",
+ format_ipsec_crypto_alg, sa->crypto_alg,
+ sa->crypto_alg ? " key " : "",
+ format_hex_bytes, sa->crypto_key, sa->crypto_key_len,
+ format_ipsec_integ_alg, sa->integ_alg,
+ sa->integ_alg ? " key " : "",
+ format_hex_bytes, sa->integ_key, sa->integ_key_len);
+ }
+ if (sa->is_tunnel && sa->is_tunnel_ip6) {
+ vlib_cli_output(vm, " tunnel src %U dst %U",
+ format_ip6_address, &sa->tunnel_src_addr.ip6,
+ format_ip6_address, &sa->tunnel_dst_addr.ip6);
+ } else if (sa->is_tunnel) {
+ vlib_cli_output(vm, " tunnel src %U dst %U",
+ format_ip4_address, &sa->tunnel_src_addr.ip4,
+ format_ip4_address, &sa->tunnel_dst_addr.ip4);
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ pool_foreach (spd, im->spds, ({
+ vlib_cli_output(vm, "spd %u", spd->id);
+
+ vlib_cli_output(vm, " outbound policies");
+ vec_foreach(i, spd->ipv4_outbound_policies)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->laddr.start.ip4,
+ format_ip4_address, &p->laddr.stop.ip4,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->raddr.start.ip4,
+ format_ip4_address, &p->raddr.stop.ip4,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ vec_foreach(i, spd->ipv6_outbound_policies)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->laddr.start.ip6,
+ format_ip6_address, &p->laddr.stop.ip6,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->raddr.start.ip6,
+ format_ip6_address, &p->raddr.stop.ip6,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ vlib_cli_output(vm, " inbound policies");
+ vec_foreach(i, spd->ipv4_inbound_protect_policy_indices)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->laddr.start.ip4,
+ format_ip4_address, &p->laddr.stop.ip4,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->raddr.start.ip4,
+ format_ip4_address, &p->raddr.stop.ip4,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ vec_foreach(i, spd->ipv4_inbound_policy_discard_and_bypass_indices)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->laddr.start.ip4,
+ format_ip4_address, &p->laddr.stop.ip4,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remte addr range %U - %U port range %u - %u",
+ format_ip4_address, &p->raddr.start.ip4,
+ format_ip4_address, &p->raddr.stop.ip4,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ vec_foreach(i, spd->ipv6_inbound_protect_policy_indices)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->laddr.start.ip6,
+ format_ip6_address, &p->laddr.stop.ip6,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->raddr.start.ip6,
+ format_ip6_address, &p->raddr.stop.ip6,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ vec_foreach(i, spd->ipv6_inbound_policy_discard_and_bypass_indices)
+ {
+ p = pool_elt_at_index(spd->policies, *i);
+ vlib_cli_output(vm, " priority %d action %U protocol %s%s",
+ p->priority,
+ format_ipsec_policy_action, p->policy,
+ p->protocol ?
+ format(0, "%U", format_ip_protocol, p->protocol) :
+ (u8 *) "any",
+ p->policy == IPSEC_POLICY_ACTION_PROTECT ?
+ format(0, " sa %u", p->sa_id) :
+ (u8 *) "");
+ vlib_cli_output(vm, " local addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->laddr.start.ip6,
+ format_ip6_address, &p->laddr.stop.ip6,
+ p->lport.start, p->lport.stop);
+ vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u",
+ format_ip6_address, &p->raddr.start.ip6,
+ format_ip6_address, &p->raddr.stop.ip6,
+ p->rport.start, p->rport.stop);
+ vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets,
+ p->counter.bytes);
+ };
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "tunnel interfaces");
+ /* *INDENT-OFF* */
+ pool_foreach (t, im->tunnel_interfaces, ({
+ if (t->hw_if_index == ~0)
+ continue;
+ hi = vnet_get_hw_interface (im->vnet_main, t->hw_if_index);
+ vlib_cli_output(vm, " %s seq", hi->name);
+ sa = pool_elt_at_index(im->sad, t->output_sa_index);
+ vlib_cli_output(vm, " seq %u seq-hi %u esn %u anti-replay %u",
+ sa->seq, sa->seq_hi, sa->use_esn, sa->use_anti_replay);
+ vlib_cli_output(vm, " local-spi %u local-ip %U", sa->spi,
+ format_ip4_address, &sa->tunnel_src_addr.ip4);
+ vlib_cli_output(vm, " local-crypto %U %U",
+ format_ipsec_crypto_alg, sa->crypto_alg,
+ format_hex_bytes, sa->crypto_key, sa->crypto_key_len);
+ vlib_cli_output(vm, " local-integrity %U %U",
+ format_ipsec_integ_alg, sa->integ_alg,
+ format_hex_bytes, sa->integ_key, sa->integ_key_len);
+ sa = pool_elt_at_index(im->sad, t->input_sa_index);
+ vlib_cli_output(vm, " last-seq %u last-seq-hi %u esn %u anti-replay %u window %U",
+ sa->last_seq, sa->last_seq_hi, sa->use_esn,
+ sa->use_anti_replay,
+ format_ipsec_replay_window, sa->replay_window);
+ vlib_cli_output(vm, " remote-spi %u remote-ip %U", sa->spi,
+ format_ip4_address, &sa->tunnel_src_addr.ip4);
+ vlib_cli_output(vm, " remote-crypto %U %U",
+ format_ipsec_crypto_alg, sa->crypto_alg,
+ format_hex_bytes, sa->crypto_key, sa->crypto_key_len);
+ vlib_cli_output(vm, " remote-integrity %U %U",
+ format_ipsec_integ_alg, sa->integ_alg,
+ format_hex_bytes, sa->integ_key, sa->integ_key_len);
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ipsec_command, static) = {
+ .path = "show ipsec",
+ .short_help = "show ipsec",
+ .function = show_ipsec_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_ipsec_counters_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd;
+ ipsec_policy_t *p;
+
+ /* *INDENT-OFF* */
+ pool_foreach (spd, im->spds, ({
+ pool_foreach(p, spd->policies, ({
+ p->counter.packets = p->counter.bytes = 0;
+ }));
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = {
+ .path = "clear ipsec counters",
+ .short_help = "clear ipsec counters",
+ .function = clear_ipsec_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+create_ipsec_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_add_del_tunnel_args_t a;
+ int rv;
+ u32 num_m_args = 0;
+ clib_error_t *error = NULL;
+
+ memset (&a, 0, sizeof (a));
+ a.is_add = 1;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "local-ip %U", unformat_ip4_address, &a.local_ip))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "remote-ip %U", unformat_ip4_address, &a.remote_ip))
+ num_m_args++;
+ else if (unformat (line_input, "local-spi %u", &a.local_spi))
+ num_m_args++;
+ else if (unformat (line_input, "remote-spi %u", &a.remote_spi))
+ num_m_args++;
+ else if (unformat (line_input, "del"))
+ a.is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args < 4)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ rv = ipsec_add_del_tunnel_if (&a);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ if (a.is_add)
+ error = clib_error_return (0,
+ "IPSec tunnel interface already exists...");
+ else
+ error = clib_error_return (0, "IPSec tunnel interface not exists...");
+ goto done;
+ default:
+ error = clib_error_return (0, "ipsec_register_interface returned %d",
+ rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_ipsec_tunnel_command, static) = {
+ .path = "create ipsec tunnel",
+ .short_help = "create ipsec tunnel local-ip <addr> local-spi <spi> remote-ip <addr> remote-spi <spi>",
+ .function = create_ipsec_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_interface_key_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_if_set_key_type_t type = IPSEC_IF_SET_KEY_TYPE_NONE;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 alg;
+ u8 *key = 0;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U",
+ unformat_vnet_hw_interface, im->vnet_main, &hw_if_index))
+ ;
+ else
+ if (unformat
+ (line_input, "local crypto %U", unformat_ipsec_crypto_alg, &alg))
+ type = IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO;
+ else
+ if (unformat
+ (line_input, "remote crypto %U", unformat_ipsec_crypto_alg, &alg))
+ type = IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO;
+ else
+ if (unformat
+ (line_input, "local integ %U", unformat_ipsec_integ_alg, &alg))
+ type = IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG;
+ else
+ if (unformat
+ (line_input, "remote integ %U", unformat_ipsec_integ_alg, &alg))
+ type = IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG;
+ else if (unformat (line_input, "%U", unformat_hex_string, &key))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (type == IPSEC_IF_SET_KEY_TYPE_NONE)
+ {
+ error = clib_error_return (0, "unknown key type");
+ goto done;
+ }
+
+ if (alg > 0 && vec_len (key) == 0)
+ {
+ error = clib_error_return (0, "key is not specified");
+ goto done;
+ }
+
+ if (hw_if_index == (u32) ~ 0)
+ {
+ error = clib_error_return (0, "interface not specified");
+ goto done;
+ }
+
+ ipsec_set_interface_key (im->vnet_main, hw_if_index, type, alg, key);
+
+done:
+ vec_free (key);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_key_command, static) = {
+ .path = "set interface ipsec key",
+ .short_help =
+ "set interface ipsec key <int> <local|remote> <crypto|integ> <key type> <key>",
+ .function = set_interface_key_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ipsec_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ipsec_cli_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
new file mode 100644
index 00000000..38aed79a
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -0,0 +1,141 @@
+/*
+ * decap.c : IPSec tunnel support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#include <vnet/ipsec/ipsec.h>
+
+u8 *
+format_ipsec_policy_action (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ char *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case IPSEC_POLICY_ACTION_##f: t = str; break;
+ foreach_ipsec_policy_action
+#undef _
+ default:
+ s = format (s, "unknown");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+uword
+unformat_ipsec_policy_action (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_POLICY_ACTION_##f;
+ foreach_ipsec_policy_action
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ipsec_crypto_alg (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case IPSEC_CRYPTO_ALG_##f: t = (u8 *) str; break;
+ foreach_ipsec_crypto_alg
+#undef _
+ default:
+ s = format (s, "unknown");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+uword
+unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_CRYPTO_ALG_##f;
+ foreach_ipsec_crypto_alg
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ipsec_integ_alg (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case IPSEC_INTEG_ALG_##f: t = (u8 *) str; break;
+ foreach_ipsec_integ_alg
+#undef _
+ default:
+ s = format (s, "unknown");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+uword
+unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_INTEG_ALG_##f;
+ foreach_ipsec_integ_alg
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ipsec_replay_window (u8 * s, va_list * args)
+{
+ u64 w = va_arg (*args, u64);
+ u8 i;
+
+ for (i = 0; i < 64; i++)
+ {
+ s = format (s, "%u", w & (1ULL << i) ? 1 : 0);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c
new file mode 100644
index 00000000..9359a3b7
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_if.c
@@ -0,0 +1,413 @@
+/*
+ * ipsec_if.c : IPSec interface support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+static u8 *
+format_ipsec_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "ipsec%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+ipsec_admin_up_down_function (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ ipsec_main_t *im = &ipsec_main;
+ clib_error_t *err = 0;
+ ipsec_tunnel_if_t *t;
+ vnet_hw_interface_t *hi;
+ ipsec_sa_t *sa;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ t = pool_elt_at_index (im->tunnel_interfaces, hi->hw_instance);
+ ASSERT (im->cb.check_support_cb);
+ sa = pool_elt_at_index (im->sad, t->input_sa_index);
+ err = im->cb.check_support_cb (sa);
+ if (err)
+ return err;
+
+ sa = pool_elt_at_index (im->sad, t->output_sa_index);
+ err = im->cb.check_support_cb (sa);
+ if (err)
+ return err;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
+
+ return /* no error */ 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ipsec_device_class, static) =
+{
+ .name = "IPSec",
+ .format_device_name = format_ipsec_name,
+ .format_tx_trace = format_ipsec_if_output_trace,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = ipsec_admin_up_down_function,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (ipsec_hw_class) =
+{
+ .name = "IPSec",
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+static int
+ipsec_add_del_tunnel_if_rpc_callback (ipsec_add_del_tunnel_args_t * a)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ASSERT (vlib_get_thread_index () == 0);
+
+ return ipsec_add_del_tunnel_if_internal (vnm, a, NULL);
+}
+
+int
+ipsec_add_del_tunnel_if (ipsec_add_del_tunnel_args_t * args)
+{
+ vl_api_rpc_call_main_thread (ipsec_add_del_tunnel_if_rpc_callback,
+ (u8 *) args, sizeof (*args));
+ return 0;
+}
+
+int
+ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
+ ipsec_add_del_tunnel_args_t * args,
+ u32 * sw_if_index)
+{
+ ipsec_tunnel_if_t *t;
+ ipsec_main_t *im = &ipsec_main;
+ vnet_hw_interface_t *hi = NULL;
+ u32 hw_if_index = ~0;
+ uword *p;
+ ipsec_sa_t *sa;
+
+ u64 key = (u64) args->remote_ip.as_u32 << 32 | (u64) args->remote_spi;
+ p = hash_get (im->ipsec_if_pool_index_by_key, key);
+
+ if (args->is_add)
+ {
+ /* check if same src/dst pair exists */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get_aligned (im->tunnel_interfaces, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ pool_get (im->sad, sa);
+ memset (sa, 0, sizeof (*sa));
+ t->input_sa_index = sa - im->sad;
+ sa->spi = args->remote_spi;
+ sa->tunnel_src_addr.ip4.as_u32 = args->remote_ip.as_u32;
+ sa->tunnel_dst_addr.ip4.as_u32 = args->local_ip.as_u32;
+ sa->is_tunnel = 1;
+ sa->use_esn = args->esn;
+ sa->use_anti_replay = args->anti_replay;
+ sa->integ_alg = args->integ_alg;
+ if (args->remote_integ_key_len <= sizeof (args->remote_integ_key))
+ {
+ sa->integ_key_len = args->remote_integ_key_len;
+ clib_memcpy (sa->integ_key, args->remote_integ_key,
+ args->remote_integ_key_len);
+ }
+ sa->crypto_alg = args->crypto_alg;
+ if (args->remote_crypto_key_len <= sizeof (args->remote_crypto_key))
+ {
+ sa->crypto_key_len = args->remote_crypto_key_len;
+ clib_memcpy (sa->crypto_key, args->remote_crypto_key,
+ args->remote_crypto_key_len);
+ }
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ pool_get (im->sad, sa);
+ memset (sa, 0, sizeof (*sa));
+ t->output_sa_index = sa - im->sad;
+ sa->spi = args->local_spi;
+ sa->tunnel_src_addr.ip4.as_u32 = args->local_ip.as_u32;
+ sa->tunnel_dst_addr.ip4.as_u32 = args->remote_ip.as_u32;
+ sa->is_tunnel = 1;
+ sa->seq = 1;
+ sa->use_esn = args->esn;
+ sa->use_anti_replay = args->anti_replay;
+ sa->integ_alg = args->integ_alg;
+ if (args->local_integ_key_len <= sizeof (args->local_integ_key))
+ {
+ sa->integ_key_len = args->local_integ_key_len;
+ clib_memcpy (sa->integ_key, args->local_integ_key,
+ args->local_integ_key_len);
+ }
+ sa->crypto_alg = args->crypto_alg;
+ if (args->local_crypto_key_len <= sizeof (args->local_crypto_key))
+ {
+ sa->crypto_key_len = args->local_crypto_key_len;
+ clib_memcpy (sa->crypto_key, args->local_crypto_key,
+ args->local_crypto_key_len);
+ }
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ hash_set (im->ipsec_if_pool_index_by_key, key,
+ t - im->tunnel_interfaces);
+
+ if (vec_len (im->free_tunnel_if_indices) > 0)
+ {
+ hw_if_index =
+ im->free_tunnel_if_indices[vec_len (im->free_tunnel_if_indices) -
+ 1];
+ _vec_len (im->free_tunnel_if_indices) -= 1;
+ }
+ else
+ {
+ hw_if_index =
+ vnet_register_interface (vnm, ipsec_device_class.index,
+ t - im->tunnel_interfaces,
+ ipsec_hw_class.index,
+ t - im->tunnel_interfaces);
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = ipsec_if_output_node.index;
+ t->hw_if_index = hw_if_index;
+
+ /*1st interface, register protocol */
+ if (pool_elts (im->tunnel_interfaces) == 1)
+ ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
+ ipsec_if_input_node.index);
+
+ }
+ else
+ {
+ vnet_interface_main_t *vim = &vnm->interface_main;
+
+ /* check if exists */
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ t = pool_elt_at_index (im->tunnel_interfaces, p[0]);
+ hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0); /* admin down */
+ vec_add1 (im->free_tunnel_if_indices, t->hw_if_index);
+
+ vnet_interface_counter_lock (vim);
+ vlib_zero_combined_counter (vim->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX, hi->sw_if_index);
+ vlib_zero_combined_counter (vim->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX, hi->sw_if_index);
+ vnet_interface_counter_unlock (vim);
+
+ /* delete input and output SA */
+ sa = pool_elt_at_index (im->sad, t->input_sa_index);
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ pool_put (im->sad, sa);
+
+ sa = pool_elt_at_index (im->sad, t->output_sa_index);
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ pool_put (im->sad, sa);
+
+ hash_unset (im->ipsec_if_pool_index_by_key, key);
+ pool_put (im->tunnel_interfaces, t);
+ }
+
+ if (sw_if_index)
+ *sw_if_index = hi->sw_if_index;
+
+ return 0;
+}
+
+int
+ipsec_add_del_ipsec_gre_tunnel (vnet_main_t * vnm,
+ ipsec_add_del_ipsec_gre_tunnel_args_t * args)
+{
+ ipsec_tunnel_if_t *t = 0;
+ ipsec_main_t *im = &ipsec_main;
+ uword *p;
+ ipsec_sa_t *sa;
+ u64 key;
+ u32 isa, osa;
+
+ p = hash_get (im->sa_index_by_sa_id, args->local_sa_id);
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+ isa = p[0];
+
+ p = hash_get (im->sa_index_by_sa_id, args->remote_sa_id);
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+ osa = p[0];
+ sa = pool_elt_at_index (im->sad, p[0]);
+
+ if (sa->is_tunnel)
+ key = (u64) sa->tunnel_dst_addr.ip4.as_u32 << 32 | (u64) sa->spi;
+ else
+ key = (u64) args->remote_ip.as_u32 << 32 | (u64) sa->spi;
+
+ p = hash_get (im->ipsec_if_pool_index_by_key, key);
+
+ if (args->is_add)
+ {
+ /* check if same src/dst pair exists */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get_aligned (im->tunnel_interfaces, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ t->input_sa_index = isa;
+ t->output_sa_index = osa;
+ t->hw_if_index = ~0;
+ hash_set (im->ipsec_if_pool_index_by_key, key,
+ t - im->tunnel_interfaces);
+
+ /*1st interface, register protocol */
+ if (pool_elts (im->tunnel_interfaces) == 1)
+ ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
+ ipsec_if_input_node.index);
+ }
+ else
+ {
+ /* check if exists */
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ t = pool_elt_at_index (im->tunnel_interfaces, p[0]);
+ hash_unset (im->ipsec_if_pool_index_by_key, key);
+ pool_put (im->tunnel_interfaces, t);
+ }
+ return 0;
+}
+
+int
+ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
+ ipsec_if_set_key_type_t type, u8 alg, u8 * key)
+{
+ ipsec_main_t *im = &ipsec_main;
+ vnet_hw_interface_t *hi;
+ ipsec_tunnel_if_t *t;
+ ipsec_sa_t *sa;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ t = pool_elt_at_index (im->tunnel_interfaces, hi->dev_instance);
+
+ if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO)
+ {
+ sa = pool_elt_at_index (im->sad, t->output_sa_index);
+ sa->crypto_alg = alg;
+ sa->crypto_key_len = vec_len (key);
+ clib_memcpy (sa->crypto_key, key, vec_len (key));
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ else if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG)
+ {
+ sa = pool_elt_at_index (im->sad, t->output_sa_index);
+ sa->integ_alg = alg;
+ sa->integ_key_len = vec_len (key);
+ clib_memcpy (sa->integ_key, key, vec_len (key));
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO)
+ {
+ sa = pool_elt_at_index (im->sad, t->input_sa_index);
+ sa->crypto_alg = alg;
+ sa->crypto_key_len = vec_len (key);
+ clib_memcpy (sa->crypto_key, key, vec_len (key));
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG)
+ {
+ sa = pool_elt_at_index (im->sad, t->input_sa_index);
+ sa->integ_alg = alg;
+ sa->integ_key_len = vec_len (key);
+ clib_memcpy (sa->integ_key, key, vec_len (key));
+
+ if (im->cb.add_del_sa_sess_cb &&
+ im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ else
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ return 0;
+}
+
+
+clib_error_t *
+ipsec_tunnel_if_init (vlib_main_t * vm)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ im->ipsec_if_pool_index_by_key = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ipsec_tunnel_if_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_if_in.c b/src/vnet/ipsec/ipsec_if_in.c
new file mode 100644
index 00000000..b0761224
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_if_in.c
@@ -0,0 +1,221 @@
+/*
+ * ipsec_if_in.c : IPSec interface input node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+/* Statistics (not really errors) */
+#define foreach_ipsec_if_input_error \
+_(RX, "good packets received")
+
+static char *ipsec_if_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ipsec_if_input_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) IPSEC_IF_INPUT_ERROR_##sym,
+ foreach_ipsec_if_input_error
+#undef _
+ IPSEC_IF_INPUT_N_ERROR,
+} ipsec_if_input_error_t;
+
+
+typedef struct
+{
+ u32 spi;
+ u32 seq;
+} ipsec_if_input_trace_t;
+
+u8 *
+format_ipsec_if_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_if_input_trace_t *t = va_arg (*args, ipsec_if_input_trace_t *);
+
+ s = format (s, "IPSec: spi %u seq %u", t->spi, t->seq);
+ return s;
+}
+
+static uword
+ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ipsec_main_t *im = &ipsec_main;
+ vnet_main_t *vnm = im->vnet_main;
+ vnet_interface_main_t *vim = &vnm->interface_main;
+ esp_main_t *em = &esp_main;
+ u32 *from, *to_next = 0, next_index;
+ u32 n_left_from, last_sw_if_index = ~0;
+ u32 thread_index = vlib_get_thread_index ();
+ u64 n_bytes = 0, n_packets = 0;
+ u8 icv_len;
+ ipsec_tunnel_if_t *last_t = NULL;
+ ipsec_sa_t *sa0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0, sw_if_index0;
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ esp_header_t *esp0;
+ uword *p;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+
+ next0 = IPSEC_INPUT_NEXT_DROP;
+
+ u64 key = (u64) ip0->src_address.as_u32 << 32 |
+ (u64) clib_net_to_host_u32 (esp0->spi);
+
+ p = hash_get (im->ipsec_if_pool_index_by_key, key);
+
+ if (p)
+ {
+ ipsec_tunnel_if_t *t;
+ t = pool_elt_at_index (im->tunnel_interfaces, p[0]);
+ vnet_buffer (b0)->ipsec.sad_index = t->input_sa_index;
+ if (t->hw_if_index != ~0)
+ {
+ vnet_hw_interface_t *hi;
+
+ vnet_buffer (b0)->ipsec.flags = 0;
+ hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ sw_if_index0 = hi->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index))
+ {
+ n_packets++;
+ n_bytes += vlib_buffer_length_in_chain (vm, b0);
+ }
+ else
+ {
+ sa0 = pool_elt_at_index (im->sad, t->input_sa_index);
+ icv_len = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+
+ /* length = packet length - ESP/tunnel overhead */
+ n_bytes -= n_packets * (sizeof (ip4_header_t) +
+ sizeof (esp_header_t) +
+ sizeof (esp_footer_t) +
+ 16 /* aes-cbc IV */ + icv_len);
+
+ if (last_t)
+ {
+ vlib_increment_combined_counter
+ (vim->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, sw_if_index0, n_packets, n_bytes);
+ }
+
+ last_sw_if_index = sw_if_index0;
+ last_t = t;
+ n_packets = 1;
+ n_bytes = vlib_buffer_length_in_chain (vm, b0);
+ }
+ }
+ else
+ {
+ vnet_buffer (b0)->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL;
+ }
+
+ vlib_buffer_advance (b0, ip4_header_bytes (ip0));
+ next0 = im->esp_decrypt_next_index;
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_if_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->spi = clib_host_to_net_u32 (esp0->spi);
+ tr->seq = clib_host_to_net_u32 (esp0->seq);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (last_t)
+ {
+ sa0 = pool_elt_at_index (im->sad, last_t->input_sa_index);
+ icv_len = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+
+ n_bytes -= n_packets * (sizeof (ip4_header_t) + sizeof (esp_header_t) +
+ sizeof (esp_footer_t) + 16 /* aes-cbc IV */ +
+ icv_len);
+ vlib_increment_combined_counter (vim->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ last_sw_if_index, n_packets, n_bytes);
+ }
+
+ vlib_node_increment_counter (vm, ipsec_if_input_node.index,
+ IPSEC_IF_INPUT_ERROR_RX,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_if_input_node) = {
+ .function = ipsec_if_input_node_fn,
+ .name = "ipsec-if-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_if_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_if_input_error_strings),
+ .error_strings = ipsec_if_input_error_strings,
+
+ .sibling_of = "ipsec-input-ip4",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_if_input_node, ipsec_if_input_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_if_out.c b/src/vnet/ipsec/ipsec_if_out.c
new file mode 100644
index 00000000..cab6ff3a
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_if_out.c
@@ -0,0 +1,172 @@
+/*
+ * ipsec_if_out.c : IPSec interface output node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+
+/* Statistics (not really errors) */
+#define foreach_ipsec_if_output_error \
+_(TX, "good packets transmitted")
+
+static char *ipsec_if_output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ipsec_if_output_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) IPSEC_IF_OUTPUT_ERROR_##sym,
+ foreach_ipsec_if_output_error
+#undef _
+ IPSEC_IF_OUTPUT_N_ERROR,
+} ipsec_if_output_error_t;
+
+
+typedef struct
+{
+ u32 spi;
+ u32 seq;
+} ipsec_if_output_trace_t;
+
+u8 *
+format_ipsec_if_output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_if_output_trace_t *t = va_arg (*args, ipsec_if_output_trace_t *);
+
+ s = format (s, "IPSec: spi %u seq %u", t->spi, t->seq);
+ return s;
+}
+
+static uword
+ipsec_if_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ipsec_main_t *im = &ipsec_main;
+ vnet_main_t *vnm = im->vnet_main;
+ vnet_interface_main_t *vim = &vnm->interface_main;
+ u32 *from, *to_next = 0, next_index;
+ u32 n_left_from, sw_if_index0, last_sw_if_index = ~0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 n_bytes = 0, n_packets = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0, len0;
+ vlib_buffer_t *b0;
+ ipsec_tunnel_if_t *t0;
+ vnet_hw_interface_t *hi0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = pool_elt_at_index (im->tunnel_interfaces, hi0->dev_instance);
+ vnet_buffer (b0)->ipsec.sad_index = t0->output_sa_index;
+ next0 = im->esp_encrypt_next_index;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index))
+ {
+ n_packets++;
+ n_bytes += len0;
+ }
+ else
+ {
+ vlib_increment_combined_counter (vim->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index0,
+ n_packets, n_bytes);
+ last_sw_if_index = sw_if_index0;
+ n_packets = 1;
+ n_bytes = len0;
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_if_output_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ ipsec_sa_t *sa0 =
+ pool_elt_at_index (im->sad, t0->output_sa_index);
+ tr->spi = sa0->spi;
+ tr->seq = sa0->seq;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (last_sw_if_index != ~0)
+ {
+ vlib_increment_combined_counter (vim->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ last_sw_if_index, n_packets, n_bytes);
+ }
+
+ vlib_node_increment_counter (vm, ipsec_if_output_node.index,
+ IPSEC_IF_OUTPUT_ERROR_TX,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_if_output_node) = {
+ .function = ipsec_if_output_node_fn,
+ .name = "ipsec-if-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_if_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_if_output_error_strings),
+ .error_strings = ipsec_if_output_error_strings,
+
+ .sibling_of = "ipsec-output-ip4",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_if_output_node, ipsec_if_output_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
new file mode 100644
index 00000000..f27058bb
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -0,0 +1,430 @@
+/*
+ * decap.c : IPSec tunnel decapsulation
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+
+#define foreach_ipsec_input_error \
+ _(RX_PKTS, "IPSEC pkts received") \
+ _(DECRYPTION_FAILED, "IPSEC decryption failed")
+
+typedef enum
+{
+#define _(sym,str) IPSEC_INPUT_ERROR_##sym,
+ foreach_ipsec_input_error
+#undef _
+ IPSEC_INPUT_N_ERROR,
+} ipsec_input_error_t;
+
+static char *ipsec_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ipsec_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 sa_id;
+ u32 spi;
+ u32 seq;
+} ipsec_input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ipsec_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_input_trace_t *t = va_arg (*args, ipsec_input_trace_t *);
+
+ if (t->spi == 0 && t->seq == 0)
+ {
+ s = format (s, "esp: no esp packet");
+ return s;
+ }
+
+ if (t->sa_id != 0)
+ {
+ s = format (s, "esp: sa_id %u spi %u seq %u", t->sa_id, t->spi, t->seq);
+ }
+ else
+ {
+ s = format (s, "esp: no sa spi %u seq %u", t->spi, t->seq);
+ }
+ return s;
+}
+
+always_inline ipsec_policy_t *
+ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_sa_t *s;
+ u32 *i;
+
+ vec_foreach (i, spd->ipv4_inbound_protect_policy_indices)
+ {
+ p = pool_elt_at_index (spd->policies, *i);
+ s = pool_elt_at_index (im->sad, p->sa_index);
+
+ if (spi != s->spi)
+ continue;
+
+ if (s->is_tunnel)
+ {
+ if (da != clib_net_to_host_u32 (s->tunnel_dst_addr.ip4.as_u32))
+ continue;
+
+ if (sa != clib_net_to_host_u32 (s->tunnel_src_addr.ip4.as_u32))
+ continue;
+
+ return p;
+ }
+
+ if (da < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (da > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (sa < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (sa > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ return p;
+ }
+ return 0;
+}
+
+always_inline uword
+ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
+ ip6_address_t * ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+ return 0;
+}
+
+always_inline ipsec_policy_t *
+ipsec_input_ip6_protect_policy_match (ipsec_spd_t * spd,
+ ip6_address_t * sa,
+ ip6_address_t * da, u32 spi)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_sa_t *s;
+ u32 *i;
+
+ vec_foreach (i, spd->ipv6_inbound_protect_policy_indices)
+ {
+ p = pool_elt_at_index (spd->policies, *i);
+ s = pool_elt_at_index (im->sad, p->sa_index);
+
+ if (spi != s->spi)
+ continue;
+
+ if (s->is_tunnel)
+ {
+ if (!ip6_address_is_equal (sa, &s->tunnel_src_addr.ip6))
+ continue;
+
+ if (!ip6_address_is_equal (da, &s->tunnel_dst_addr.ip6))
+ continue;
+
+ return p;
+ }
+
+ if (!ip6_addr_match_range (sa, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (da, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+
+ return p;
+ }
+ return 0;
+}
+
+static vlib_node_registration_t ipsec_input_ip4_node;
+
+static uword
+ipsec_input_ip4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, next_index, *to_next;
+ ipsec_main_t *im = &ipsec_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ esp_header_t *esp0;
+ ip4_ipsec_config_t *c0;
+ ipsec_spd_t *spd0;
+ ipsec_policy_t *p0 = 0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (b0)->sw_if_index
+ [VLIB_RX], &next0, b0,
+ sizeof (c0[0]));
+
+ spd0 = pool_elt_at_index (im->spds, c0->spd_index);
+
+ ip0 = vlib_buffer_get_current (b0);
+ esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+
+ if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+ {
+#if 0
+ clib_warning
+ ("packet received from %U to %U spi %u size %u spd_id %u",
+ format_ip4_address, ip0->src_address.as_u8,
+ format_ip4_address, ip0->dst_address.as_u8,
+ clib_net_to_host_u32 (esp0->spi),
+ clib_net_to_host_u16 (ip0->length), spd0->id);
+#endif
+
+ p0 = ipsec_input_protect_policy_match (spd0,
+ clib_net_to_host_u32
+ (ip0->src_address.
+ as_u32),
+ clib_net_to_host_u32
+ (ip0->dst_address.
+ as_u32),
+ clib_net_to_host_u32
+ (esp0->spi));
+
+ if (PREDICT_TRUE (p0 != 0))
+ {
+ p0->counter.packets++;
+ p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
+ vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+ vnet_buffer (b0)->ipsec.flags = 0;
+ next0 = im->esp_decrypt_next_index;
+ vlib_buffer_advance (b0, ip4_header_bytes (ip0));
+ goto trace0;
+ }
+ }
+
+ /* FIXME bypass and discard */
+
+ trace0:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)
+ {
+ if (p0)
+ tr->sa_id = p0->sa_id;
+ tr->spi = clib_host_to_net_u32 (esp0->spi);
+ tr->seq = clib_host_to_net_u32 (esp0->seq);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, bi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, ipsec_input_ip4_node.index,
+ IPSEC_INPUT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_input_ip4_node,static) = {
+ .function = ipsec_input_ip4_node_fn,
+ .name = "ipsec-input-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_input_error_strings),
+ .error_strings = ipsec_input_error_strings,
+
+ .n_next_nodes = IPSEC_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [IPSEC_INPUT_NEXT_##s] = n,
+ foreach_ipsec_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_input_ip4_node, ipsec_input_ip4_node_fn)
+ static vlib_node_registration_t ipsec_input_ip6_node;
+
+ static uword
+ ipsec_input_ip6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, next_index, *to_next;
+ ipsec_main_t *im = &ipsec_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ esp_header_t *esp0;
+ ip4_ipsec_config_t *c0;
+ ipsec_spd_t *spd0;
+ ipsec_policy_t *p0 = 0;
+ u32 header_size = sizeof (ip0[0]);
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (b0)->sw_if_index
+ [VLIB_RX], &next0, b0,
+ sizeof (c0[0]));
+
+ spd0 = pool_elt_at_index (im->spds, c0->spd_index);
+
+ ip0 = vlib_buffer_get_current (b0);
+ esp0 = (esp_header_t *) ((u8 *) ip0 + header_size);
+
+ if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+ {
+#if 0
+ clib_warning
+ ("packet received from %U to %U spi %u size %u spd_id %u",
+ format_ip6_address, &ip0->src_address, format_ip6_address,
+ &ip0->dst_address, clib_net_to_host_u32 (esp0->spi),
+ clib_net_to_host_u16 (ip0->payload_length) + header_size,
+ spd0->id);
+#endif
+ p0 = ipsec_input_ip6_protect_policy_match (spd0,
+ &ip0->src_address,
+ &ip0->dst_address,
+ clib_net_to_host_u32
+ (esp0->spi));
+
+ if (PREDICT_TRUE (p0 != 0))
+ {
+ p0->counter.packets++;
+ p0->counter.bytes +=
+ clib_net_to_host_u16 (ip0->payload_length);
+ p0->counter.bytes += header_size;
+ vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+ vnet_buffer (b0)->ipsec.flags = 0;
+ next0 = im->esp_decrypt_next_index;
+ vlib_buffer_advance (b0, header_size);
+ goto trace0;
+ }
+ }
+
+ trace0:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)
+ {
+ if (p0)
+ tr->sa_id = p0->sa_id;
+ tr->spi = clib_host_to_net_u32 (esp0->spi);
+ tr->seq = clib_host_to_net_u32 (esp0->seq);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, ipsec_input_ip6_node.index,
+ IPSEC_INPUT_ERROR_RX_PKTS,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_input_ip6_node,static) = {
+ .function = ipsec_input_ip6_node_fn,
+ .name = "ipsec-input-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_input_error_strings),
+ .error_strings = ipsec_input_error_strings,
+
+ .sibling_of = "ipsec-input-ip4",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_input_ip6_node, ipsec_input_ip6_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c
new file mode 100644
index 00000000..1b8070d6
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_output.c
@@ -0,0 +1,458 @@
+/*
+ * ipsec_output.c : IPSec output node
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/ipsec/ipsec.h>
+
+#if WITH_LIBSSL > 0
+
+#define foreach_ipsec_output_error \
+ _(RX_PKTS, "IPSec pkts received") \
+ _(POLICY_DISCARD, "IPSec policy discard") \
+ _(POLICY_NO_MATCH, "IPSec policy (no match)") \
+ _(POLICY_PROTECT, "IPSec policy protect") \
+ _(POLICY_BYPASS, "IPSec policy bypass") \
+ _(ENCAPS_FAILED, "IPSec encapsulation failed")
+
+typedef enum
+{
+#define _(sym,str) IPSEC_OUTPUT_ERROR_##sym,
+ foreach_ipsec_output_error
+#undef _
+ IPSEC_DECAP_N_ERROR,
+} ipsec_output_error_t;
+
+static char *ipsec_output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ipsec_output_error
+#undef _
+};
+
+static vlib_node_registration_t ipsec_output_ip4_node;
+static vlib_node_registration_t ipsec_output_ip6_node;
+
+typedef struct
+{
+ u32 spd_id;
+} ipsec_output_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ipsec_output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ipsec_output_trace_t *t = va_arg (*args, ipsec_output_trace_t *);
+
+ if (t->spd_id != ~0)
+ {
+ s = format (s, "spd %u ", t->spd_id);
+ }
+ else
+ {
+ s = format (s, "no spd");
+ }
+ return s;
+}
+
+always_inline ipsec_policy_t *
+ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
+ u16 rp)
+{
+ ipsec_policy_t *p;
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ vec_foreach (i, spd->ipv4_outbound_policies)
+ {
+ p = pool_elt_at_index (spd->policies, *i);
+ if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
+ continue;
+
+ if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
+ return p;
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ return p;
+ }
+ return 0;
+}
+
+always_inline uword
+ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
+ ip6_address_t * ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+ return 0;
+}
+
+always_inline ipsec_policy_t *
+ipsec_output_ip6_policy_match (ipsec_spd_t * spd,
+ ip6_address_t * la,
+ ip6_address_t * ra, u16 lp, u16 rp, u8 pr)
+{
+ ipsec_policy_t *p;
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ vec_foreach (i, spd->ipv6_outbound_policies)
+ {
+ p = pool_elt_at_index (spd->policies, *i);
+ if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
+ continue;
+
+ if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
+ return p;
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ return p;
+ }
+
+ return 0;
+}
+
+static inline uword
+ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ipv6)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ u32 *from, *to_next = 0;
+ u32 n_left_from, sw_if_index0, last_sw_if_index = (u32) ~ 0;
+ u32 next_node_index = (u32) ~ 0, last_next_node_index = (u32) ~ 0;
+ vlib_frame_t *f = 0;
+ u32 spd_index0 = ~0;
+ ipsec_spd_t *spd0 = 0;
+ u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ipsec_policy_t *p0;
+ ip4_header_t *ip0;
+ ip6_header_t *ip6_0 = 0;
+ udp_header_t *udp0;
+ u32 iph_offset = 0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ iph_offset = vnet_buffer (b0)->ip.save_rewrite_length;
+ ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0)
+ + iph_offset);
+
+ /* lookup for SPD only if sw_if_index is changed */
+ if (PREDICT_FALSE (last_sw_if_index != sw_if_index0))
+ {
+ uword *p = hash_get (im->spd_index_by_sw_if_index, sw_if_index0);
+ ASSERT (p);
+ spd_index0 = p[0];
+ spd0 = pool_elt_at_index (im->spds, spd_index0);
+ last_sw_if_index = sw_if_index0;
+ }
+
+ if (is_ipv6)
+ {
+ ip6_0 = (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b0)
+ + iph_offset);
+
+ udp0 = ip6_next_header (ip6_0);
+#if 0
+ clib_warning
+ ("packet received from %U port %u to %U port %u spd_id %u",
+ format_ip6_address, &ip6_0->src_address,
+ clib_net_to_host_u16 (udp0->src_port), format_ip6_address,
+ &ip6_0->dst_address, clib_net_to_host_u16 (udp0->dst_port),
+ spd0->id);
+#endif
+
+ p0 = ipsec_output_ip6_policy_match (spd0,
+ &ip6_0->src_address,
+ &ip6_0->dst_address,
+ clib_net_to_host_u16
+ (udp0->src_port),
+ clib_net_to_host_u16
+ (udp0->dst_port),
+ ip6_0->protocol);
+ }
+ else
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+
+#if 0
+ clib_warning ("packet received from %U to %U port %u",
+ format_ip4_address, ip0->src_address.as_u8,
+ format_ip4_address, ip0->dst_address.as_u8,
+ clib_net_to_host_u16 (udp0->dst_port));
+ clib_warning ("sw_if_index0 %u spd_index0 %u spd_id %u",
+ sw_if_index0, spd_index0, spd0->id);
+#endif
+
+ p0 = ipsec_output_policy_match (spd0, ip0->protocol,
+ clib_net_to_host_u32
+ (ip0->src_address.as_u32),
+ clib_net_to_host_u32
+ (ip0->dst_address.as_u32),
+ clib_net_to_host_u16
+ (udp0->src_port),
+ clib_net_to_host_u16
+ (udp0->dst_port));
+ }
+
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ if (p0->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ nc_protect++;
+ next_node_index = im->esp_encrypt_node_index;
+ vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+ vlib_buffer_advance (b0, iph_offset);
+ p0->counter.packets++;
+ if (is_ipv6)
+ {
+ p0->counter.bytes +=
+ clib_net_to_host_u16 (ip6_0->payload_length);
+ p0->counter.bytes += sizeof (ip6_header_t);
+ }
+ else
+ {
+ p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
+ }
+ }
+ else if (p0->policy == IPSEC_POLICY_ACTION_BYPASS)
+ {
+ nc_bypass++;
+ next_node_index = get_next_output_feature_node_index (b0, node);
+ p0->counter.packets++;
+ if (is_ipv6)
+ {
+ p0->counter.bytes +=
+ clib_net_to_host_u16 (ip6_0->payload_length);
+ p0->counter.bytes += sizeof (ip6_header_t);
+ }
+ else
+ {
+ p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
+ }
+ }
+ else
+ {
+ nc_discard++;
+ p0->counter.packets++;
+ if (is_ipv6)
+ {
+ p0->counter.bytes +=
+ clib_net_to_host_u16 (ip6_0->payload_length);
+ p0->counter.bytes += sizeof (ip6_header_t);
+ }
+ else
+ {
+ p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
+ }
+ next_node_index = im->error_drop_node_index;
+ }
+ }
+ else
+ {
+ nc_nomatch++;
+ next_node_index = im->error_drop_node_index;
+ }
+
+ from += 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE ((last_next_node_index != next_node_index) || f == 0))
+ {
+ /* if this is not 1st frame */
+ if (f)
+ vlib_put_frame_to_node (vm, last_next_node_index, f);
+
+ last_next_node_index = next_node_index;
+
+ f = vlib_get_frame_to_node (vm, next_node_index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_output_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ if (spd0)
+ tr->spd_id = spd0->id;
+ }
+ }
+
+ vlib_put_frame_to_node (vm, next_node_index, f);
+ vlib_node_increment_counter (vm, node->node_index,
+ IPSEC_OUTPUT_ERROR_POLICY_PROTECT, nc_protect);
+ vlib_node_increment_counter (vm, node->node_index,
+ IPSEC_OUTPUT_ERROR_POLICY_BYPASS, nc_bypass);
+ vlib_node_increment_counter (vm, node->node_index,
+ IPSEC_OUTPUT_ERROR_POLICY_DISCARD, nc_discard);
+ vlib_node_increment_counter (vm, node->node_index,
+ IPSEC_OUTPUT_ERROR_POLICY_NO_MATCH,
+ nc_nomatch);
+ return from_frame->n_vectors;
+}
+
+static uword
+ipsec_output_ip4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ipsec_output_inline (vm, node, frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_output_ip4_node,static) = {
+ .function = ipsec_output_ip4_node_fn,
+ .name = "ipsec-output-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_output_error_strings),
+ .error_strings = ipsec_output_error_strings,
+
+ .n_next_nodes = IPSEC_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [IPSEC_OUTPUT_NEXT_##s] = n,
+ foreach_ipsec_output_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_output_ip4_node, ipsec_output_ip4_node_fn)
+ static uword
+ ipsec_output_ip6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ipsec_output_inline (vm, node, frame, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_output_ip6_node,static) = {
+ .function = ipsec_output_ip6_node_fn,
+ .name = "ipsec-output-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ipsec_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ipsec_output_error_strings),
+ .error_strings = ipsec_output_error_strings,
+
+ .n_next_nodes = IPSEC_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [IPSEC_OUTPUT_NEXT_##s] = n,
+ foreach_ipsec_output_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ipsec_output_ip6_node, ipsec_output_ip6_node_fn)
+#else /* IPSEC > 1 */
+
+/* Dummy ipsec output node, in case when IPSec is disabled */
+
+static uword
+ipsec_output_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("IPSec disabled");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ipsec_output_node) = {
+ .vector_size = sizeof (u32),
+ .function = ipsec_output_node_fn,
+ .name = "ipsec-output-ip4",
+};
+
+VLIB_REGISTER_NODE (ipsec_output_node) = {
+ .vector_size = sizeof (u32),
+ .function = ipsec_output_node_fn,
+ .name = "ipsec-output-ip6",
+};
+/* *INDENT-ON* */
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/dir.dox b/src/vnet/l2/dir.dox
new file mode 100644
index 00000000..8497a2f6
--- /dev/null
+++ b/src/vnet/l2/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief Layer 2 Forwarding Code.
+
+This directory contains the source code for basic Layer 2 forwarding.
+
+*/
+/*? %%clicmd:group_label Layer 2 CLI %% ?*/
diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c
new file mode 100644
index 00000000..6c046467
--- /dev/null
+++ b/src/vnet/l2/feat_bitmap.c
@@ -0,0 +1,185 @@
+/*
+ * feat_bitmap.c: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+/*
+ * Drop node for feature bitmaps
+ * For features that just do a drop, or are not yet implemented.
+ * Initial feature dispatch nodes don't need to set b0->error
+ * in case of a possible drop because that will be done here.
+ *The next node is always error-drop.
+ */
+
+static vlib_node_registration_t feat_bitmap_drop_node;
+
+#define foreach_feat_bitmap_drop_error \
+_(NO_FWD, "L2 feature forwarding disabled") \
+_(NYI, "L2 feature not implemented")
+
+typedef enum
+{
+#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym,
+ foreach_feat_bitmap_drop_error
+#undef _
+ FEAT_BITMAP_DROP_N_ERROR,
+} feat_bitmap_drop_error_t;
+
+static char *feat_bitmap_drop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_feat_bitmap_drop_error
+#undef _
+};
+
+typedef enum
+{
+ FEAT_BITMAP_DROP_NEXT_DROP,
+ FEAT_BITMAP_DROP_N_NEXT,
+} feat_bitmap_drop_next_t;
+
+typedef struct
+{
+ u32 feature_bitmap;
+} feat_bitmap_drop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_feat_bitmap_drop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ feat_bitmap_drop_trace_t *t = va_arg (*args, feat_bitmap_drop_trace_t *);
+
+ s =
+ format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap);
+ return s;
+}
+
+static uword
+feat_bitmap_drop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ feat_bitmap_drop_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ feat_bitmap_drop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->feature_bitmap = vnet_buffer (b0)->l2.feature_bitmap;
+ }
+
+ if (vnet_buffer (b0)->l2.feature_bitmap == 1)
+ {
+ /*
+ * If we are executing the last feature, this is the
+ * No forwarding catch-all
+ */
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD];
+ }
+ else
+ {
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI];
+ }
+ next0 = FEAT_BITMAP_DROP_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+clib_error_t *
+feat_bitmap_drop_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
+ .function = feat_bitmap_drop_node_fn,
+ .name = "feature-bitmap-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_feat_bitmap_drop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings),
+ .error_strings = feat_bitmap_drop_error_strings,
+
+ .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/feat_bitmap.h b/src/vnet/l2/feat_bitmap.h
new file mode 100644
index 00000000..5940ff7e
--- /dev/null
+++ b/src/vnet/l2/feat_bitmap.h
@@ -0,0 +1,110 @@
+/*
+ * feat_bitmap.h: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_feat_bitmap_h
+#define included_vnet_l2_feat_bitmap_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+/*
+ * The feature bitmap is a way of organizing input and output feature graph nodes.
+ * The set of features to be executed are arranged in a bitmap with one bit per
+ * feature and each bit positioned in the same order that the features should be
+ * executed. Features can be dynamically removed from the set by masking off their
+ * corresponding bits. The bitmap is stored in packet context. Each feature clears
+ * its bit and then calls feat_bitmap_get_next_node_index() to go to the next
+ * graph node.
+ */
+
+
+/* 32 features in a u32 bitmap */
+#define FEAT_MAX 32
+
+/**
+ Initialize the feature next-node indexes of a graph node.
+ Should be called by the init function of each feature graph node.
+*/
+always_inline void
+feat_bitmap_init_next_nodes (vlib_main_t * vm, u32 node_index, /* the current graph node index */
+ u32 num_features, /* number of entries in feat_names */
+ char **feat_names, /* array of feature graph node names */
+ u32 * next_nodes) /* array of 32 next indexes to init */
+{
+ u32 idx;
+
+ ASSERT (num_features <= FEAT_MAX);
+
+ for (idx = 0; idx < num_features; idx++)
+ {
+ if (vlib_get_node_by_name (vm, (u8 *) feat_names[idx]))
+ {
+ next_nodes[idx] =
+ vlib_node_add_named_next (vm, node_index, feat_names[idx]);
+ }
+ else
+ { // Node may be in plugin which is not installed, use drop node
+ next_nodes[idx] =
+ vlib_node_add_named_next (vm, node_index, "feature-bitmap-drop");
+ }
+ }
+
+ /* All unassigned bits go to the drop node */
+ for (; idx < FEAT_MAX; idx++)
+ {
+ next_nodes[idx] = vlib_node_add_named_next (vm, node_index,
+ "feature-bitmap-drop");
+ }
+}
+
+/**
+ Return the graph node index for the feature corresponding to the
+ first set bit in the bitmap.
+*/
+always_inline u32
+feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap)
+{
+ u32 first_bit;
+
+ count_leading_zeros (first_bit, bitmap);
+ first_bit = uword_bits - 1 - first_bit;
+ return next_nodes[first_bit];
+}
+
+/**
+ Return the graph node index for the feature corresponding to the next
+ set bit after clearing the current feature bit in the feature_bitmap
+ of the current packet.
+*/
+always_inline u32
+vnet_l2_feature_next (vlib_buffer_t * b, u32 * next_nodes, u32 feat_bit)
+{
+ vnet_buffer (b)->l2.feature_bitmap &= ~feat_bit;
+ u32 fb = vnet_buffer (b)->l2.feature_bitmap;
+ ASSERT (fb != 0);
+ return feat_bitmap_get_next_node_index (next_nodes, fb);
+}
+
+#endif /* included_vnet_l2_feat_bitmap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
new file mode 100644
index 00000000..ac923de4
--- /dev/null
+++ b/src/vnet/l2/l2.api
@@ -0,0 +1,385 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Reply to l2_xconnect_dump
+ @param context - sender context which was passed in the request
+ @param rx_sw_if_index - Receive interface index
+ @param tx_sw_if_index - Transmit interface index
+ */
+define l2_xconnect_details
+{
+ u32 context;
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+};
+
+/** \brief Dump L2 XConnects
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define l2_xconnect_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief l2 fib table details structure
+ @param bd_id - the l2 fib / bridge domain table id
+ @param mac - the entry's mac address
+ @param sw_if_index - index of the interface
+ @param static_mac - the entry is statically configured.
+ @param filter_mac - the entry is a mac filter entry.
+ @param bvi_mac - the mac address is a bridge virtual interface
+*/
+define l2_fib_table_details
+{
+ u32 context;
+ u32 bd_id;
+ u64 mac;
+ u32 sw_if_index;
+ u8 static_mac;
+ u8 filter_mac;
+ u8 bvi_mac;
+};
+
+/** \brief Dump l2 fib (aka bridge domain) table
+ @param client_index - opaque cookie to identify the sender
+ @param bd_id - the l2 fib / bridge domain table identifier
+*/
+define l2_fib_table_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+};
+
+/** \brief L2 fib clear table request, clear all mac entries in the l2 fib
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define l2_fib_clear_table
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief L2 FIB flush all entries
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define l2fib_flush_all
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief L2 FIB flush bridge domain entries
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the entry's bridge domain id
+*/
+autoreply define l2fib_flush_bd
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+};
+
+/** \brief L2 FIB flush interface entries
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the entry's bridge domain id
+*/
+autoreply define l2fib_flush_int
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief L2 FIB add entry request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mac - the entry's mac address
+ @param bd_id - the entry's bridge domain id
+ @param sw_if_index - the interface
+ @param is_add - If non zero add the entry, else delete it
+ @param static_mac -
+ @param filter_mac -
+*/
+autoreply define l2fib_add_del
+{
+ u32 client_index;
+ u32 context;
+ u64 mac;
+ u32 bd_id;
+ u32 sw_if_index;
+ u8 is_add;
+ u8 static_mac;
+ u8 filter_mac;
+ u8 bvi_mac;
+};
+
+/** \brief Register to recive L2 MAC events for leanred and aged MAC
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param learn_limit - MAC learn limit, 0 => default to 1000
+ @param scan_delay - event scan delay in 10 msec unit, 0 => default to 100 msec
+ @param max_macs_in_event - in units of 10 mac entries, 0 => default to 100 entries
+ @param enable_disable - 1 => register for MAC events, 0 => cancel registration
+ @param pid - sender's pid
+*/
+autoreply define want_l2_macs_events
+{
+ u32 client_index;
+ u32 context;
+ u32 learn_limit;
+ u8 scan_delay;
+ u8 max_macs_in_event;
+ u8 enable_disable;
+ u32 pid;
+};
+
+/** \brief Entry for learned or aged MAC in L2 MAC Events
+ @param sw_if_index - sw_if_index in the domain
+ @param mac_addr - mac_address
+ @is_del - 0 => newly learned MAC, 1 => aged out MAC
+*/
+typeonly define mac_entry
+{
+ u32 sw_if_index;
+ u8 mac_addr[6];
+ u8 is_del;
+ u8 spare;
+};
+
+/** \brief L2 MAC event for a list of learned or aged MACs
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param n_macs - number of learned/aged MAC enntries
+ @param mac - array of learned/aged MAC entries
+*/
+define l2_macs_event
+{
+ u32 client_index;
+ u32 pid;
+ u32 n_macs;
+ vl_api_mac_entry_t mac[n_macs];
+};
+
+/** \brief Set interface L2 flags (such as L2_LEARN, L2_FWD,
+ L2_FLOOD, L2_UU_FLOOD, or L2_ARP_TERM bits). This can be used
+ to disable one or more of the features represented by the
+ flag bits on an interface to override what is set as default
+ for all interfaces in the bridge domain
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface
+ @param is_set - if non-zero, set the bits, else clear them
+ @param feature_bitmap - non-zero bits (as above) to set or clear
+*/
+define l2_flags
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_set;
+ u32 feature_bitmap;
+};
+
+/** \brief Set interface L2 flags response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the set l2 bits request
+ @param resulting_feature_bitmap - the internal l2 feature bitmap after the request is implemented
+*/
+define l2_flags_reply
+{
+ u32 context;
+ i32 retval;
+ u32 resulting_feature_bitmap;
+};
+
+/** \brief L2 bridge domain set mac age
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the bridge domain to create
+ @param mac_age - mac aging time in min, 0 for disabled
+*/
+autoreply define bridge_domain_set_mac_age
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ u8 mac_age;
+};
+
+/** \brief L2 bridge domain add or delete request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the bridge domain to create
+ @param flood - enable/disable bcast/mcast flooding in the bd
+ @param uu_flood - enable/disable uknown unicast flood in the bd
+ @param forward - enable/disable forwarding on all interfaces in the bd
+ @param learn - enable/disable learning on all interfaces in the bd
+ @param arp_term - enable/disable arp termination in the bd
+ @param mac_age - mac aging time in min, 0 for disabled
+ @param is_add - add or delete flag
+*/
+autoreply define bridge_domain_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ u8 flood;
+ u8 uu_flood;
+ u8 forward;
+ u8 learn;
+ u8 arp_term;
+ u8 mac_age;
+ u8 bd_tag[64];
+ u8 is_add;
+};
+
+/** \brief L2 bridge domain request operational state details
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the bridge domain id desired or ~0 to request all bds
+*/
+define bridge_domain_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+};
+
+/** \brief L2 bridge domain sw interface operational state response
+ @param bd_id - the bridge domain id
+ @param sw_if_index - sw_if_index in the domain
+ @param shg - split horizon group for the interface
+*/
+typeonly manual_print manual_endian define bridge_domain_sw_if
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 shg;
+};
+
+/** \brief L2 bridge domain operational state response
+ @param bd_id - the bridge domain id
+ @param flood - bcast/mcast flooding state on all interfaces in the bd
+ @param uu_flood - uknown unicast flooding state on all interfaces in the bd
+ @param forward - forwarding state on all interfaces in the bd
+ @param learn - learning state on all interfaces in the bd
+ @param arp_term - arp termination state on all interfaces in the bd
+ @param mac_age - mac aging time in min, 0 for disabled
+ @param bd_tag - optional textual tag for the bridge domain
+ @param n_sw_ifs - number of sw_if_index's in the domain
+*/
+manual_print manual_endian define bridge_domain_details
+{
+ u32 context;
+ u32 bd_id;
+ u8 flood;
+ u8 uu_flood;
+ u8 forward;
+ u8 learn;
+ u8 arp_term;
+ u8 mac_age;
+ u8 bd_tag[64];
+ u32 bvi_sw_if_index;
+ u32 n_sw_ifs;
+ vl_api_bridge_domain_sw_if_t sw_if_details[n_sw_ifs];
+};
+
+/** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD,
+ L2_UU_FLOOD, or L2_ARP_TERM bits) request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the bridge domain to set the flags for
+ @param is_set - if non-zero, set the flags, else clear them
+ @param feature_bitmap - bits (as above) that are non-zero to set or clear
+*/
+define bridge_flags
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ u8 is_set;
+ u32 feature_bitmap;
+};
+
+/** \brief Set bridge flags response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the set bridge flags request
+ @param resulting_feature_bitmap - the internal L2 feature bitmap after the request is implemented
+*/
+define bridge_flags_reply
+{
+ u32 context;
+ i32 retval;
+ u32 resulting_feature_bitmap;
+};
+
+/** \brief L2 interface vlan tag rewrite configure request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface the operation is applied to
+ @param vtr_op - Choose from l2_vtr_op_t enum values
+ @param push_dot1q - first pushed flag dot1q id set, else dot1ad
+ @param tag1 - Needed for any push or translate vtr op
+ @param tag2 - Needed for any push 2 or translate x-2 vtr ops
+*/
+autoreply define l2_interface_vlan_tag_rewrite
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 vtr_op;
+ u32 push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad
+ u32 tag1; // first pushed tag
+ u32 tag2; // second pushed tag
+};
+
+/** \brief L2 interface pbb tag rewrite configure request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface the operation is applied to
+ @param vtr_op - Choose from l2_vtr_op_t enum values
+ @param inner_tag - needed for translate_qinq vtr op only
+ @param outer_tag - needed for translate_qinq vtr op only
+ @param b_dmac - B-tag remote mac address, needed for any push or translate_qinq vtr op
+ @param b_smac - B-tag local mac address, needed for any push or translate qinq vtr op
+ @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op
+ @param i_sid - I-tag service id, needed for any push or translate qinq vtr op
+*/
+autoreply define l2_interface_pbb_tag_rewrite
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 vtr_op;
+ u16 outer_tag;
+ u8 b_dmac[6];
+ u8 b_smac[6];
+ u16 b_vlanid;
+ u32 i_sid;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c
new file mode 100644
index 00000000..20d6ab32
--- /dev/null
+++ b/src/vnet/l2/l2_api.c
@@ -0,0 +1,679 @@
+/*
+ *------------------------------------------------------------------
+ * l2_api.c - layer 2 forwarding api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_learn.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+#define vl_api_bridge_domain_details_t_endian vl_noop_handler
+#define vl_api_bridge_domain_details_t_print vl_noop_handler
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(L2_XCONNECT_DUMP, l2_xconnect_dump) \
+_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \
+_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \
+_(L2FIB_FLUSH_ALL, l2fib_flush_all) \
+_(L2FIB_FLUSH_INT, l2fib_flush_int) \
+_(L2FIB_FLUSH_BD, l2fib_flush_bd) \
+_(L2FIB_ADD_DEL, l2fib_add_del) \
+_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \
+_(L2_FLAGS, l2_flags) \
+_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \
+_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \
+_(BRIDGE_FLAGS, bridge_flags) \
+_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \
+_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \
+_(BRIDGE_DOMAIN_SET_MAC_AGE, bridge_domain_set_mac_age)
+
+static void
+send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context,
+ u32 rx_sw_if_index, u32 tx_sw_if_index)
+{
+ vl_api_l2_xconnect_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_L2_XCONNECT_DETAILS);
+ mp->context = context;
+ mp->rx_sw_if_index = htonl (rx_sw_if_index);
+ mp->tx_sw_if_index = htonl (tx_sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ l2input_main_t *l2im = &l2input_main;
+ vnet_sw_interface_t *swif;
+ l2_input_config_t *config;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ config = vec_elt_at_index (l2im->configs, swif->sw_if_index);
+ if (config->xconnect)
+ send_l2_xconnect_details (q, mp->context, swif->sw_if_index,
+ config->output_sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp)
+{
+ int rv = 0;
+ vl_api_l2_fib_clear_table_reply_t *rmp;
+
+ /* Clear all MACs including static MACs */
+ l2fib_clear_table ();
+
+ REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY);
+}
+
+static void
+send_l2fib_table_entry (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ l2fib_entry_key_t * l2fe_key,
+ l2fib_entry_result_t * l2fe_res, u32 context)
+{
+ vl_api_l2_fib_table_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_DETAILS);
+
+ mp->bd_id =
+ ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id);
+
+ mp->mac = l2fib_make_key (l2fe_key->fields.mac, 0);
+ mp->sw_if_index = ntohl (l2fe_res->fields.sw_if_index);
+ mp->static_mac = l2fe_res->fields.static_mac;
+ mp->filter_mac = l2fe_res->fields.filter;
+ mp->bvi_mac = l2fe_res->fields.bvi;
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ bd_main_t *bdm = &bd_main;
+ l2fib_entry_key_t *l2fe_key = NULL;
+ l2fib_entry_result_t *l2fe_res = NULL;
+ u32 ni, bd_id = ntohl (mp->bd_id);
+ u32 bd_index;
+ unix_shared_memory_queue_t *q;
+ uword *p;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* see l2fib_table_dump: ~0 means "any" */
+ if (bd_id == ~0)
+ bd_index = ~0;
+ else
+ {
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ return;
+
+ bd_index = p[0];
+ }
+
+ l2fib_table_dump (bd_index, &l2fe_key, &l2fe_res);
+
+ vec_foreach_index (ni, l2fe_key)
+ {
+ send_l2fib_table_entry (am, q, vec_elt_at_index (l2fe_key, ni),
+ vec_elt_at_index (l2fe_res, ni), mp->context);
+ }
+ vec_free (l2fe_key);
+ vec_free (l2fe_res);
+}
+
+static void
+vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp)
+{
+ bd_main_t *bdm = &bd_main;
+ l2input_main_t *l2im = &l2input_main;
+ vl_api_l2fib_add_del_reply_t *rmp;
+ int rv = 0;
+ u32 bd_id = ntohl (mp->bd_id);
+ uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto bad_sw_if_index;
+ }
+ u32 bd_index = p[0];
+
+ u64 mac = mp->mac;
+ if (mp->is_add)
+ {
+ if (mp->filter_mac)
+ l2fib_add_filter_entry (mac, bd_index);
+ else
+ {
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX (mp);
+ if (vec_len (l2im->configs) <= sw_if_index)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ else
+ {
+ l2_input_config_t *config;
+ config = vec_elt_at_index (l2im->configs, sw_if_index);
+ if (config->bridge == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ }
+ u8 static_mac = mp->static_mac ? 1 : 0;
+ u8 bvi_mac = mp->bvi_mac ? 1 : 0;
+ l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac,
+ bvi_mac);
+ }
+ }
+ else
+ {
+ l2fib_del_entry (mac, bd_index);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_want_l2_macs_events_t_handler (vl_api_want_l2_macs_events_t * mp)
+{
+ int rv = 0;
+ vl_api_want_l2_macs_events_reply_t *rmp;
+ l2learn_main_t *lm = &l2learn_main;
+ l2fib_main_t *fm = &l2fib_main;
+ u32 pid = ntohl (mp->pid);
+ u32 learn_limit = ntohl (mp->learn_limit);
+
+ if (mp->enable_disable)
+ {
+ if (lm->client_pid == 0)
+ {
+ lm->client_pid = pid;
+ lm->client_index = mp->client_index;
+
+ if (mp->max_macs_in_event)
+ fm->max_macs_in_event = mp->max_macs_in_event * 10;
+ else
+ fm->max_macs_in_event = L2FIB_EVENT_MAX_MACS_DEFAULT;
+
+ if (mp->scan_delay)
+ fm->event_scan_delay = (f64) (mp->scan_delay) * 10e-3;
+ else
+ fm->event_scan_delay = L2FIB_EVENT_SCAN_DELAY_DEFAULT;
+
+ /* change learn limit and flush all learned MACs */
+ if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT))
+ lm->global_learn_limit = learn_limit;
+ else
+ lm->global_learn_limit = L2FIB_EVENT_LEARN_LIMIT_DEFAULT;
+
+ l2fib_flush_all_mac (vlib_get_main ());
+ }
+ else if (lm->client_pid != pid)
+ {
+ rv = VNET_API_ERROR_L2_MACS_EVENT_CLINET_PRESENT;
+ goto exit;
+ }
+ }
+ else if (lm->client_pid)
+ {
+ lm->client_pid = 0;
+ lm->client_index = 0;
+ if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT))
+ lm->global_learn_limit = learn_limit;
+ else
+ lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT;
+ }
+
+exit:
+ REPLY_MACRO (VL_API_WANT_L2_MACS_EVENTS_REPLY);
+}
+
+static void
+vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp)
+{
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_l2fib_flush_int_reply_t *rmp;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ l2fib_flush_int_mac (vm, sw_if_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_L2FIB_FLUSH_INT_REPLY);
+}
+
+static void
+vl_api_l2fib_flush_all_t_handler (vl_api_l2fib_flush_all_t * mp)
+{
+ int rv = 0;
+ vl_api_l2fib_flush_all_reply_t *rmp;
+
+ l2fib_flush_all_mac (vlib_get_main ());
+ REPLY_MACRO (VL_API_L2FIB_FLUSH_ALL_REPLY);
+}
+
+static void
+vl_api_l2fib_flush_bd_t_handler (vl_api_l2fib_flush_bd_t * mp)
+{
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+ bd_main_t *bdm = &bd_main;
+ vl_api_l2fib_flush_bd_reply_t *rmp;
+
+ u32 bd_id = ntohl (mp->bd_id);
+ uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ l2fib_flush_bd_mac (vm, *p);
+out:
+ REPLY_MACRO (VL_API_L2FIB_FLUSH_BD_REPLY);
+}
+
+static void
+vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp)
+{
+ vl_api_l2_flags_reply_t *rmp;
+ int rv = 0;
+ u32 rbm = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ u32 flags = ntohl (mp->feature_bitmap);
+ u32 bitmap = 0;
+
+ if (flags & L2_LEARN)
+ bitmap |= L2INPUT_FEAT_LEARN;
+
+ if (flags & L2_FWD)
+ bitmap |= L2INPUT_FEAT_FWD;
+
+ if (flags & L2_FLOOD)
+ bitmap |= L2INPUT_FEAT_FLOOD;
+
+ if (flags & L2_UU_FLOOD)
+ bitmap |= L2INPUT_FEAT_UU_FLOOD;
+
+ if (flags & L2_ARP_TERM)
+ bitmap |= L2INPUT_FEAT_ARP_TERM;
+
+ rbm = l2input_intf_bitmap_enable (sw_if_index, bitmap, mp->is_set);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_L2_FLAGS_REPLY,
+ ({
+ rmp->resulting_feature_bitmap = ntohl(rbm);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_bridge_domain_set_mac_age_t_handler (vl_api_bridge_domain_set_mac_age_t
+ * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ bd_main_t *bdm = &bd_main;
+ vl_api_bridge_domain_set_mac_age_reply_t *rmp;
+ int rv = 0;
+ u32 bd_id = ntohl (mp->bd_id);
+ uword *p;
+
+ if (bd_id == 0)
+ {
+ rv = VNET_API_ERROR_BD_NOT_MODIFIABLE;
+ goto out;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ bd_set_mac_age (vm, *p, mp->mac_age);
+out:
+ REPLY_MACRO (VL_API_BRIDGE_DOMAIN_SET_MAC_AGE_REPLY);
+}
+
+static void
+vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp)
+{
+ l2_bridge_domain_add_del_args_t a = {
+ .is_add = mp->is_add,
+ .flood = mp->flood,
+ .uu_flood = mp->uu_flood,
+ .forward = mp->forward,
+ .learn = mp->learn,
+ .arp_term = mp->arp_term,
+ .mac_age = mp->mac_age,
+ .bd_id = ntohl (mp->bd_id),
+ .bd_tag = mp->bd_tag
+ };
+
+ int rv = bd_add_del (&a);
+
+ vl_api_bridge_domain_add_del_reply_t *rmp;
+ REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY);
+}
+
+static void
+send_bridge_domain_details (l2input_main_t * l2im,
+ unix_shared_memory_queue_t * q,
+ l2_bridge_domain_t * bd_config,
+ u32 n_sw_ifs, u32 context)
+{
+ vl_api_bridge_domain_details_t *mp;
+ l2_flood_member_t *m;
+ vl_api_bridge_domain_sw_if_t *sw_ifs;
+ l2_input_config_t *input_cfg;
+
+ mp = vl_msg_api_alloc (sizeof (*mp) +
+ (n_sw_ifs * sizeof (vl_api_bridge_domain_sw_if_t)));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS);
+ mp->bd_id = ntohl (bd_config->bd_id);
+ mp->flood = bd_feature_flood (bd_config);
+ mp->uu_flood = bd_feature_uu_flood (bd_config);
+ mp->forward = bd_feature_forward (bd_config);
+ mp->learn = bd_feature_learn (bd_config);
+ mp->arp_term = bd_feature_arp_term (bd_config);
+ mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index);
+ mp->mac_age = bd_config->mac_age;
+ if (bd_config->bd_tag)
+ {
+ strncpy ((char *) mp->bd_tag, (char *) bd_config->bd_tag,
+ ARRAY_LEN (mp->bd_tag) - 1);
+ mp->bd_tag[ARRAY_LEN (mp->bd_tag) - 1] = 0;
+ }
+
+ mp->context = context;
+
+ sw_ifs = (vl_api_bridge_domain_sw_if_t *) mp->sw_if_details;
+ vec_foreach (m, bd_config->members)
+ {
+ sw_ifs->sw_if_index = ntohl (m->sw_if_index);
+ input_cfg = vec_elt_at_index (l2im->configs, m->sw_if_index);
+ sw_ifs->shg = input_cfg->shg;
+ sw_ifs++;
+ mp->n_sw_ifs++;
+ }
+ mp->n_sw_ifs = htonl (mp->n_sw_ifs);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp)
+{
+ bd_main_t *bdm = &bd_main;
+ l2input_main_t *l2im = &l2input_main;
+
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ u32 bd_id = ntohl (mp->bd_id);
+ if (bd_id == 0)
+ return;
+
+ u32 bd_index, end;
+ if (bd_id == ~0)
+ bd_index = 0, end = vec_len (l2im->bd_configs);
+ else
+ {
+ bd_index = bd_find_index (bdm, bd_id);
+ if (bd_index == ~0)
+ return;
+
+ end = bd_index + 1;
+ }
+
+ for (; bd_index < end; bd_index++)
+ {
+ l2_bridge_domain_t *bd_config =
+ l2input_bd_config_from_index (l2im, bd_index);
+ /* skip dummy bd_id 0 */
+ if (bd_config && (bd_config->bd_id > 0))
+ send_bridge_domain_details (l2im, q, bd_config,
+ vec_len (bd_config->members),
+ mp->context);
+ }
+}
+
+static void
+vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ bd_main_t *bdm = &bd_main;
+ vl_api_bridge_flags_reply_t *rmp;
+ int rv = 0;
+
+ u32 flags = ntohl (mp->feature_bitmap);
+ u32 bd_id = ntohl (mp->bd_id);
+ if (bd_id == 0)
+ {
+ rv = VNET_API_ERROR_BD_NOT_MODIFIABLE;
+ goto out;
+ }
+
+ u32 bd_index = bd_find_index (bdm, bd_id);
+ if (bd_index == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+
+ u32 bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY,
+ ({
+ rmp->resulting_feature_bitmap = ntohl(bitmap);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_l2_interface_vlan_tag_rewrite_t_handler
+ (vl_api_l2_interface_vlan_tag_rewrite_t * mp)
+{
+ int rv = 0;
+ vl_api_l2_interface_vlan_tag_rewrite_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u32 vtr_op;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vtr_op = ntohl (mp->vtr_op);
+
+ /* The L2 code is unsuspicious */
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ case L2_VTR_PUSH_1:
+ case L2_VTR_PUSH_2:
+ case L2_VTR_POP_1:
+ case L2_VTR_POP_2:
+ case L2_VTR_TRANSLATE_1_1:
+ case L2_VTR_TRANSLATE_1_2:
+ case L2_VTR_TRANSLATE_2_1:
+ case L2_VTR_TRANSLATE_2_2:
+ break;
+
+ default:
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ rv = l2vtr_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op,
+ ntohl (mp->push_dot1q), ntohl (mp->tag1),
+ ntohl (mp->tag2));
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY);
+}
+
+static void
+ vl_api_l2_interface_pbb_tag_rewrite_t_handler
+ (vl_api_l2_interface_pbb_tag_rewrite_t * mp)
+{
+ vl_api_l2_interface_pbb_tag_rewrite_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u32 vtr_op;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vtr_op = ntohl (mp->vtr_op);
+
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ case L2_VTR_PUSH_2:
+ case L2_VTR_POP_2:
+ case L2_VTR_TRANSLATE_2_1:
+ break;
+
+ default:
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ rv = l2pbb_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op,
+ mp->b_dmac, mp->b_smac, ntohs (mp->b_vlanid),
+ ntohl (mp->i_sid), ntohs (mp->outer_tag));
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2_INTERFACE_PBB_TAG_REWRITE_REPLY);
+}
+
+/*
+ * l2_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_l2;
+#undef _
+}
+
+static clib_error_t *
+l2_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (l2_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
new file mode 100644
index 00000000..b1abb4c0
--- /dev/null
+++ b/src/vnet/l2/l2_bd.c
@@ -0,0 +1,1359 @@
+/*
+ * l2_bd.c : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vlib/cli.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/format.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/l2/l2_learn.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+
+/**
+ * @file
+ * @brief Ethernet Bridge Domain.
+ *
+ * Code in this file manages Layer 2 bridge domains.
+ *
+ */
+
+bd_main_t bd_main;
+
+/**
+ Init bridge domain if not done already.
+ For feature bitmap, set all bits except ARP termination
+*/
+void
+bd_validate (l2_bridge_domain_t * bd_config)
+{
+ if (bd_is_valid (bd_config))
+ return;
+ bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM;
+ bd_config->bvi_sw_if_index = ~0;
+ bd_config->members = 0;
+ bd_config->flood_count = 0;
+ bd_config->tun_master_count = 0;
+ bd_config->tun_normal_count = 0;
+ bd_config->mac_by_ip4 = 0;
+ bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t),
+ sizeof (uword));
+}
+
+u32
+bd_find_index (bd_main_t * bdm, u32 bd_id)
+{
+ u32 *p = (u32 *) hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p)
+ return ~0;
+ return p[0];
+}
+
+u32
+bd_add_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ ASSERT (!hash_get (bdm->bd_index_by_bd_id, bd_id));
+ u32 rv = clib_bitmap_first_clear (bdm->bd_index_bitmap);
+
+ /* mark this index taken */
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1);
+
+ hash_set (bdm->bd_index_by_bd_id, bd_id, rv);
+
+ vec_validate (l2input_main.bd_configs, rv);
+ l2input_main.bd_configs[rv].bd_id = bd_id;
+
+ return rv;
+}
+
+static int
+bd_delete (bd_main_t * bdm, u32 bd_index)
+{
+ l2_bridge_domain_t *bd = &l2input_main.bd_configs[bd_index];
+ u32 bd_id = bd->bd_id;
+ u64 mac_addr;
+ ip6_address_t *ip6_addr_key;
+
+ /* flush non-static MACs in BD and removed bd_id from hash table */
+ l2fib_flush_bd_mac (vlib_get_main (), bd_index);
+ hash_unset (bdm->bd_index_by_bd_id, bd_id);
+
+ /* mark this index clear */
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0);
+
+ /* clear BD config for reuse: bd_id to -1 and clear feature_bitmap */
+ bd->bd_id = ~0;
+ bd->feature_bitmap = 0;
+
+ /* free BD tag */
+ vec_free (bd->bd_tag);
+
+ /* free memory used by BD */
+ vec_free (bd->members);
+ hash_free (bd->mac_by_ip4);
+ /* *INDENT-OFF* */
+ hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6,
+ ({
+ clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */
+ }));
+ /* *INDENT-ON* */
+ hash_free (bd->mac_by_ip6);
+
+ return 0;
+}
+
+static void
+update_flood_count (l2_bridge_domain_t * bd_config)
+{
+ bd_config->flood_count = vec_len (bd_config->members) -
+ (bd_config->tun_master_count ? bd_config->tun_normal_count : 0);
+}
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member)
+{
+ u32 ix;
+ vnet_sw_interface_t *sw_if = vnet_get_sw_interface
+ (vnet_get_main (), member->sw_if_index);
+
+ /*
+ * Add one element to the vector
+ * vector is ordered [ bvi, normal/tun_masters..., tun_normals... ]
+ * When flooding, the bvi interface (if present) must be the last member
+ * processed due to how BVI processing can change the packet. To enable
+ * this order, we make the bvi interface the first in the vector and
+ * flooding walks the vector in reverse.
+ */
+ switch (sw_if->flood_class)
+ {
+ case VNET_FLOOD_CLASS_TUNNEL_MASTER:
+ bd_config->tun_master_count++;
+ /* Fall through */
+ default:
+ /* Fall through */
+ case VNET_FLOOD_CLASS_NORMAL:
+ ix = (member->flags & L2_FLOOD_MEMBER_BVI) ? 0 :
+ vec_len (bd_config->members) - bd_config->tun_normal_count;
+ break;
+ case VNET_FLOOD_CLASS_TUNNEL_NORMAL:
+ ix = vec_len (bd_config->members);
+ bd_config->tun_normal_count++;
+ break;
+ }
+
+ vec_insert_elts (bd_config->members, member, 1, ix);
+ update_flood_count (bd_config);
+}
+
+#define BD_REMOVE_ERROR_OK 0
+#define BD_REMOVE_ERROR_NOT_FOUND 1
+
+u32
+bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index)
+{
+ u32 ix;
+
+ /* Find and delete the member */
+ vec_foreach_index (ix, bd_config->members)
+ {
+ l2_flood_member_t *m = vec_elt_at_index (bd_config->members, ix);
+ if (m->sw_if_index == sw_if_index)
+ {
+ vnet_sw_interface_t *sw_if = vnet_get_sw_interface
+ (vnet_get_main (), sw_if_index);
+
+ if (sw_if->flood_class != VNET_FLOOD_CLASS_NORMAL)
+ {
+ if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_MASTER)
+ bd_config->tun_master_count--;
+ else if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_NORMAL)
+ bd_config->tun_normal_count--;
+ }
+ vec_delete (bd_config->members, 1, ix);
+ update_flood_count (bd_config);
+
+ return BD_REMOVE_ERROR_OK;
+ }
+ }
+
+ return BD_REMOVE_ERROR_NOT_FOUND;
+}
+
+
+clib_error_t *
+l2bd_init (vlib_main_t * vm)
+{
+ bd_main_t *bdm = &bd_main;
+ bdm->bd_index_by_bd_id = hash_create (0, sizeof (uword));
+ /*
+ * create a dummy bd with bd_id of 0 and bd_index of 0 with feature set
+ * to packet drop only. Thus, packets received from any L2 interface with
+ * uninitialized bd_index of 0 can be dropped safely.
+ */
+ u32 bd_index = bd_add_bd_index (bdm, 0);
+ ASSERT (bd_index == 0);
+ l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP;
+
+ bdm->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2bd_init);
+
+
+/**
+ Set the learn/forward/flood flags for the bridge domain.
+ Return 0 if ok, non-zero if for an error.
+*/
+u32
+bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable)
+{
+
+ l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);
+ bd_validate (bd_config);
+ u32 feature_bitmap = 0;
+
+ if (flags & L2_LEARN)
+ {
+ feature_bitmap |= L2INPUT_FEAT_LEARN;
+ }
+ if (flags & L2_FWD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_FWD;
+ }
+ if (flags & L2_FLOOD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_FLOOD;
+ }
+ if (flags & L2_UU_FLOOD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_UU_FLOOD;
+ }
+ if (flags & L2_ARP_TERM)
+ {
+ feature_bitmap |= L2INPUT_FEAT_ARP_TERM;
+ }
+
+ if (enable)
+ {
+ bd_config->feature_bitmap |= feature_bitmap;
+ }
+ else
+ {
+ bd_config->feature_bitmap &= ~feature_bitmap;
+ }
+
+ return bd_config->feature_bitmap;
+}
+
+/**
+ Set the mac age for the bridge domain.
+*/
+void
+bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age)
+{
+ l2_bridge_domain_t *bd_config;
+ int enable = 0;
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ bd_config->mac_age = age;
+
+ /* check if there is at least one bd with mac aging enabled */
+ vec_foreach (bd_config, l2input_main.bd_configs)
+ enable |= bd_config->bd_id != ~0 && bd_config->mac_age != 0;
+
+ vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index,
+ enable ? L2_MAC_AGE_PROCESS_EVENT_START :
+ L2_MAC_AGE_PROCESS_EVENT_STOP, 0);
+}
+
+/**
+ Set the tag for the bridge domain.
+*/
+
+static void
+bd_set_bd_tag (vlib_main_t * vm, u32 bd_index, u8 * bd_tag)
+{
+ u8 *old;
+ l2_bridge_domain_t *bd_config;
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+
+ old = bd_config->bd_tag;
+
+ if (bd_tag[0])
+ {
+ bd_config->bd_tag = format (0, "%s%c", bd_tag, 0);
+ }
+ else
+ {
+ bd_config->bd_tag = NULL;
+ }
+
+ vec_free (old);
+}
+
+/**
+ Set bridge-domain learn enable/disable.
+ The CLI format is:
+ set bridge-domain learn <bd_id> [disable]
+*/
+static clib_error_t *
+bd_learn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ bd_set_flags (vm, bd_index, L2_LEARN, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 learning can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable learning (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain learn 200}
+ * Example of how to disable learning (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain learn 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_learn_cli, static) = {
+ .path = "set bridge-domain learn",
+ .short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
+ .function = bd_learn,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain forward enable/disable.
+ The CLI format is:
+ set bridge-domain forward <bd_index> [disable]
+*/
+static clib_error_t *
+bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ bd_set_flags (vm, bd_index, L2_FWD, enable);
+
+done:
+ return error;
+}
+
+
+/*?
+ * Layer 2 unicast forwarding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable forwarding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain forward 200}
+ * Example of how to disable forwarding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain forward 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
+ .path = "set bridge-domain forward",
+ .short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
+ .function = bd_fwd,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain flood enable/disable.
+ The CLI format is:
+ set bridge-domain flood <bd_index> [disable]
+*/
+static clib_error_t *
+bd_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ bd_set_flags (vm, bd_index, L2_FLOOD, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 flooding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200}
+ * Example of how to disable flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_flood_cli, static) = {
+ .path = "set bridge-domain flood",
+ .short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
+ .function = bd_flood,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain unkown-unicast flood enable/disable.
+ The CLI format is:
+ set bridge-domain uu-flood <bd_index> [disable]
+*/
+static clib_error_t *
+bd_uu_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 unknown-unicast flooding can be enabled and disabled on each
+ * bridge-domain. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable unknown-unicast flooding (where 200 is the
+ * bridge-domain-id):
+ * @cliexcmd{set bridge-domain uu-flood 200}
+ * Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain uu-flood 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
+ .path = "set bridge-domain uu-flood",
+ .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
+ .function = bd_uu_flood,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain arp term enable/disable.
+ The CLI format is:
+ set bridge-domain arp term <bridge-domain-id> [disable]
+*/
+static clib_error_t *
+bd_arp_term (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ enable = 0;
+
+ /* set the bridge domain flag */
+ bd_set_flags (vm, bd_index, L2_ARP_TERM, enable);
+
+done:
+ return error;
+}
+
+static clib_error_t *
+bd_mac_age (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 age;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ if (!unformat (input, "%u", &age))
+ {
+ error =
+ clib_error_return (0, "expecting ageing time in minutes but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the bridge domain flag */
+ if (age > 255)
+ {
+ error =
+ clib_error_return (0, "mac aging time cannot be bigger than 255");
+ goto done;
+ }
+ bd_set_mac_age (vm, bd_index, (u8) age);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 mac aging can be enabled and disabled on each
+ * bridge-domain. Use this command to set or disable mac aging
+ * on specific bridge-domains. It is disabled by default.
+ *
+ * @cliexpar
+ * Example of how to set mac aging (where 200 is the bridge-domain-id and
+ * 5 is aging time in minutes):
+ * @cliexcmd{set bridge-domain mac-age 200 5}
+ * Example of how to disable mac aging (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200 0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_mac_age_cli, static) = {
+ .path = "set bridge-domain mac-age",
+ .short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>",
+ .function = bd_mac_age,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Modify whether or not an existing bridge-domain should terminate and respond
+ * to ARP Requests. ARP Termination is disabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable ARP termination (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp term 200}
+ * Example of how to disable ARP termination (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp term 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
+ .path = "set bridge-domain arp term",
+ .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
+ .function = bd_arp_term,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Add/delete IP address to MAC address mapping.
+ *
+ * The clib hash implementation stores uword entries in the hash table.
+ * The hash table mac_by_ip4 is keyed via IP4 address and store the
+ * 6-byte MAC address directly in the hash table entry uword.
+ *
+ * @warning This only works for 64-bit processor with 8-byte uword;
+ * which means this code *WILL NOT WORK* for a 32-bit prcessor with
+ * 4-byte uword.
+ */
+u32
+bd_add_del_ip_mac (u32 bd_index,
+ u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add)
+{
+ l2_bridge_domain_t *bd_cfg = l2input_bd_config (bd_index);
+ u64 new_mac = *(u64 *) mac_addr;
+ u64 *old_mac;
+ u16 *mac16 = (u16 *) & new_mac;
+
+ ASSERT (sizeof (uword) == sizeof (u64)); /* make sure uword is 8 bytes */
+ ASSERT (bd_is_valid (bd_cfg));
+
+ mac16[3] = 0; /* Clear last 2 unsed bytes of the 8-byte MAC address */
+ if (is_ip6)
+ {
+ ip6_address_t *ip6_addr_key;
+ hash_pair_t *hp;
+ old_mac = (u64 *) hash_get_mem (bd_cfg->mac_by_ip6, ip_addr);
+ if (is_add)
+ {
+ if (old_mac == 0)
+ { /* new entry - allocate and craete ip6 address key */
+ ip6_addr_key = clib_mem_alloc (sizeof (ip6_address_t));
+ clib_memcpy (ip6_addr_key, ip_addr, sizeof (ip6_address_t));
+ }
+ else if (*old_mac == new_mac)
+ { /* same mac entry already exist for ip6 address */
+ return 0;
+ }
+ else
+ { /* updat mac for ip6 address */
+ hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr);
+ ip6_addr_key = (ip6_address_t *) hp->key;
+ }
+ hash_set_mem (bd_cfg->mac_by_ip6, ip6_addr_key, new_mac);
+ }
+ else
+ {
+ if (old_mac && (*old_mac == new_mac))
+ {
+ hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr);
+ ip6_addr_key = (ip6_address_t *) hp->key;
+ hash_unset_mem (bd_cfg->mac_by_ip6, ip_addr);
+ clib_mem_free (ip6_addr_key);
+ }
+ else
+ return 1;
+ }
+ }
+ else
+ {
+ ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr;
+ old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32);
+ if (is_add)
+ {
+ if (old_mac && (*old_mac == new_mac))
+ return 0; /* mac entry already exist */
+ hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac);
+ }
+ else
+ {
+ if (old_mac && (*old_mac == new_mac))
+ hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32);
+ else
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ Set bridge-domain arp entry add/delete.
+ The CLI format is:
+ set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del]
+*/
+static clib_error_t *
+bd_arp_entry (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u8 is_add = 1;
+ u8 is_ip6 = 0;
+ u8 ip_addr[16];
+ u8 mac_addr[6];
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ if (unformat (input, "%U", unformat_ip4_address, ip_addr))
+ {
+ is_ip6 = 0;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, ip_addr))
+ {
+ is_ip6 = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "expecting IP address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%U", unformat_ethernet_address, mac_addr))
+ {
+ error = clib_error_return (0, "expecting MAC address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+
+ /* set the bridge domain flagAdd IP-MAC entry into bridge domain */
+ if (bd_add_del_ip_mac (bd_index, ip_addr, mac_addr, is_ip6, is_add))
+ {
+ error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed",
+ is_add ? "add" : "del",
+ is_ip6 ?
+ format_ip4_address : format_ip6_address,
+ ip_addr, format_ethernet_address, mac_addr);
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Add an ARP entry to an existing bridge-domain.
+ *
+ * @cliexpar
+ * Example of how to add an ARP entry (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a}
+ * Example of how to delete an ARP entry (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
+ .path = "set bridge-domain arp entry",
+ .short_help = "set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del]",
+ .function = bd_arp_entry,
+};
+/* *INDENT-ON* */
+
+u8 *
+format_vtr (u8 * s, va_list * args)
+{
+ u32 vtr_op = va_arg (*args, u32);
+ u32 dot1q = va_arg (*args, u32);
+ u32 tag1 = va_arg (*args, u32);
+ u32 tag2 = va_arg (*args, u32);
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ return format (s, "none");
+ case L2_VTR_PUSH_1:
+ return format (s, "push-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_PUSH_2:
+ return format (s, "push-2 %s %d %d", dot1q ? "dot1q" : "dot1ad", tag1,
+ tag2);
+ case L2_VTR_POP_1:
+ return format (s, "pop-1");
+ case L2_VTR_POP_2:
+ return format (s, "pop-2");
+ case L2_VTR_TRANSLATE_1_1:
+ return format (s, "trans-1-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_TRANSLATE_1_2:
+ return format (s, "trans-1-2 %s %d %d", dot1q ? "dot1q" : "dot1ad",
+ tag1, tag2);
+ case L2_VTR_TRANSLATE_2_1:
+ return format (s, "trans-2-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_TRANSLATE_2_2:
+ return format (s, "trans-2-2 %s %d %d", dot1q ? "dot1q" : "dot1ad",
+ tag1, tag2);
+ default:
+ return format (s, "none");
+ }
+}
+
+/**
+ Show bridge-domain state.
+ The CLI format is:
+ show bridge-domain [<bd_index>]
+*/
+static clib_error_t *
+bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index = ~0;
+ l2_bridge_domain_t *bd_config;
+ u32 start, end;
+ u32 detail = 0;
+ u32 intf = 0;
+ u32 arp = 0;
+ u32 bd_tag = 0;
+ u32 bd_id = ~0;
+ uword *p;
+
+ start = 1;
+ end = vec_len (l2input_main.bd_configs);
+
+ if (unformat (input, "%d", &bd_id))
+ {
+ if (unformat (input, "detail"))
+ detail = 1;
+ else if (unformat (input, "det"))
+ detail = 1;
+ if (unformat (input, "int"))
+ intf = 1;
+ if (unformat (input, "arp"))
+ arp = 1;
+ if (unformat (input, "bd-tag"))
+ bd_tag = 1;
+
+ if (bd_id == 0)
+ return clib_error_return (0,
+ "No operations on the default bridge domain are supported");
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ if (bd_is_valid (bd_config))
+ {
+ start = bd_index;
+ end = start + 1;
+ }
+ else
+ {
+ vlib_cli_output (vm, "bridge-domain %d not in use", bd_id);
+ goto done;
+ }
+ }
+
+ /* Show all bridge-domains that have been initialized */
+ u32 printed = 0;
+ u8 *as = 0;
+ for (bd_index = start; bd_index < end; bd_index++)
+ {
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ if (bd_is_valid (bd_config))
+ {
+ if (!printed)
+ {
+ printed = 1;
+ vlib_cli_output (vm,
+ "%=8s %=7s %=4s %=9s %=9s %=9s %=9s %=9s %=9s %=9s",
+ "BD-ID", "Index", "BSN", "Age(min)",
+ "Learning", "U-Forwrd", "UU-Flood", "Flooding",
+ "ARP-Term", "BVI-Intf");
+ }
+
+ if (bd_config->mac_age)
+ as = format (as, "%d", bd_config->mac_age);
+ else
+ as = format (as, "off");
+ vlib_cli_output (vm,
+ "%=8d %=7d %=4d %=9v %=9s %=9s %=9s %=9s %=9s %=9U",
+ bd_config->bd_id, bd_index, bd_config->seq_num, as,
+ bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FWD ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ?
+ "on" : "off",
+ format_vnet_sw_if_index_name_with_NA,
+ vnm, bd_config->bvi_sw_if_index);
+ vec_reset_length (as);
+
+ if (detail || intf)
+ {
+ /* Show all member interfaces */
+ int i;
+ vec_foreach_index (i, bd_config->members)
+ {
+ l2_flood_member_t *member =
+ vec_elt_at_index (bd_config->members, i);
+ u8 swif_seq_num = *l2fib_swif_seq_num (member->sw_if_index);
+ u32 vtr_opr, dot1q, tag1, tag2;
+ if (i == 0)
+ {
+ vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=5s%=9s%=30s",
+ "Interface", "If-idx", "ISN", "SHG",
+ "BVI", "TxFlood", "VLAN-Tag-Rewrite");
+ }
+ l2vtr_get (vm, vnm, member->sw_if_index, &vtr_opr, &dot1q,
+ &tag1, &tag2);
+ vlib_cli_output (vm, "%=30U%=7d%=5d%=5d%=5s%=9s%=30U",
+ format_vnet_sw_if_index_name, vnm,
+ member->sw_if_index, member->sw_if_index,
+ swif_seq_num, member->shg,
+ member->flags & L2_FLOOD_MEMBER_BVI ? "*" :
+ "-", i < bd_config->flood_count ? "*" : "-",
+ format_vtr, vtr_opr, dot1q, tag1, tag2);
+ }
+ }
+
+ if ((detail || arp) &&
+ (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM))
+ {
+ u32 ip4_addr;
+ ip6_address_t *ip6_addr;
+ u64 mac_addr;
+ vlib_cli_output (vm,
+ "\n IP4/IP6 to MAC table for ARP Termination");
+
+ /* *INDENT-OFF* */
+ hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4,
+ ({
+ vlib_cli_output (vm, "%=40U => %=20U",
+ format_ip4_address, &ip4_addr,
+ format_ethernet_address, &mac_addr);
+ }));
+
+ hash_foreach_mem (ip6_addr, mac_addr, bd_config->mac_by_ip6,
+ ({
+ vlib_cli_output (vm, "%=40U => %=20U",
+ format_ip6_address, ip6_addr,
+ format_ethernet_address, &mac_addr);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if ((detail || bd_tag) && (bd_config->bd_tag))
+ {
+ vlib_cli_output (vm, "\n BD-Tag: %s", bd_config->bd_tag);
+
+ }
+ }
+ }
+ vec_free (as);
+
+ if (!printed)
+ {
+ vlib_cli_output (vm, "no bridge-domains in use");
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Show a summary of all the bridge-domain instances or detailed view of a
+ * single bridge-domain. Bridge-domains are created by adding an interface
+ * to a bridge using the '<em>set interface l2 bridge</em>' command.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of displaying all bridge-domains:
+ * @cliexstart{show bridge-domain}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 0 0 off off off off off local0
+ * 200 1 on on on on off N/A
+ * @cliexend
+ *
+ * Example of displaying details of a single bridge-domains:
+ * @cliexstart{show bridge-domain 200 detail}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 200 1 on on on on off N/A
+ *
+ * Interface Index SHG BVI VLAN-Tag-Rewrite
+ * GigabitEthernet0/8/0.200 3 0 - none
+ * GigabitEthernet0/9/0.200 4 0 - none
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_show_cli, static) = {
+ .path = "show bridge-domain",
+ .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]",
+ .function = bd_show,
+};
+/* *INDENT-ON* */
+
+int
+bd_add_del (l2_bridge_domain_add_del_args_t * a)
+{
+ bd_main_t *bdm = &bd_main;
+ vlib_main_t *vm = bdm->vlib_main;
+ int rv = 0;
+
+ u32 bd_index = bd_find_index (bdm, a->bd_id);
+ if (a->is_add)
+ {
+ if (bd_index != ~0)
+ return VNET_API_ERROR_BD_ALREADY_EXISTS;
+ if (a->bd_id > L2_BD_ID_MAX)
+ return VNET_API_ERROR_BD_ID_EXCEED_MAX;
+ bd_index = bd_add_bd_index (bdm, a->bd_id);
+
+ u32 enable_flags = 0, disable_flags = 0;
+ if (a->flood)
+ enable_flags |= L2_FLOOD;
+ else
+ disable_flags |= L2_FLOOD;
+
+ if (a->uu_flood)
+ enable_flags |= L2_UU_FLOOD;
+ else
+ disable_flags |= L2_UU_FLOOD;
+
+ if (a->forward)
+ enable_flags |= L2_FWD;
+ else
+ disable_flags |= L2_FWD;
+
+ if (a->learn)
+ enable_flags |= L2_LEARN;
+ else
+ disable_flags |= L2_LEARN;
+
+ if (a->arp_term)
+ enable_flags |= L2_ARP_TERM;
+ else
+ disable_flags |= L2_ARP_TERM;
+
+ if (enable_flags)
+ bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ );
+
+ if (disable_flags)
+ bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ );
+
+ bd_set_mac_age (vm, bd_index, a->mac_age);
+
+ if (a->bd_tag)
+ bd_set_bd_tag (vm, bd_index, a->bd_tag);
+
+ }
+ else
+ {
+ if (bd_index == ~0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (bd_index == 0)
+ return VNET_API_ERROR_BD_NOT_MODIFIABLE;
+ if (vec_len (l2input_main.bd_configs[bd_index].members))
+ return VNET_API_ERROR_BD_IN_USE;
+ rv = bd_delete (bdm, bd_index);
+ }
+
+ return rv;
+}
+
+/**
+ Create or delete bridge-domain.
+ The CLI format:
+ create bridge-domain <bd_index> [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] [flood <0|1>]
+ [arp-term <0|1>] [mac-age <nn>] [bd-tag <tag>] [del]
+*/
+
+static clib_error_t *
+bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u8 is_add = 1;
+ u32 bd_id = ~0;
+ u32 flood = 1, forward = 1, learn = 1, uu_flood = 1, arp_term = 0;
+ u32 mac_age = 0;
+ u8 *bd_tag = NULL;
+ l2_bridge_domain_add_del_args_t _a, *a = &_a;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &bd_id))
+ ;
+ else if (unformat (line_input, "flood %d", &flood))
+ ;
+ else if (unformat (line_input, "uu-flood %d", &uu_flood))
+ ;
+ else if (unformat (line_input, "forward %d", &forward))
+ ;
+ else if (unformat (line_input, "learn %d", &learn))
+ ;
+ else if (unformat (line_input, "arp-term %d", &arp_term))
+ ;
+ else if (unformat (line_input, "mac-age %d", &mac_age))
+ ;
+ else if (unformat (line_input, "bd-tag %s", &bd_tag))
+ ;
+ else if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ flood = uu_flood = forward = learn = 0;
+ }
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ error = clib_error_return (0, "bridge-domain-id not specified");
+ goto done;
+ }
+
+ if (bd_id == 0)
+ {
+ error = clib_error_return (0, "bridge domain 0 can not be modified");
+ goto done;
+ }
+
+ if (mac_age > 255)
+ {
+ error = clib_error_return (0, "mac age must be less than 256");
+ goto done;
+ }
+ if ((bd_tag) && (strlen ((char *) bd_tag) > 63))
+ {
+ error = clib_error_return (0, "bd-tag cannot be longer than 63");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+ a->is_add = is_add;
+ a->bd_id = bd_id;
+ a->flood = (u8) flood;
+ a->uu_flood = (u8) uu_flood;
+ a->forward = (u8) forward;
+ a->learn = (u8) learn;
+ a->arp_term = (u8) arp_term;
+ a->mac_age = (u8) mac_age;
+ a->bd_tag = bd_tag;
+
+ rv = bd_add_del (a);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "bridge-domain %d", bd_id);
+ break;
+ case VNET_API_ERROR_BD_IN_USE:
+ error = clib_error_return (0, "bridge domain in use - remove members");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "bridge domain ID does not exist");
+ goto done;
+ case VNET_API_ERROR_BD_NOT_MODIFIABLE:
+ error = clib_error_return (0, "bridge domain 0 can not be modified");
+ goto done;
+ case VNET_API_ERROR_BD_ID_EXCEED_MAX:
+ error = clib_error_return (0, "bridge domain ID exceed 16M limit");
+ goto done;
+ default:
+ error = clib_error_return (0, "bd_add_del returned %d", rv);
+ goto done;
+ }
+
+done:
+ vec_free (bd_tag);
+ unformat_free (line_input);
+
+ return error;
+}
+
+
+/*?
+ * Create/Delete bridge-domain instance
+ *
+ * @cliexpar
+ * @parblock
+ * Example of creating bridge-domain 1:
+ * @cliexstart{create bridge-domain 1}
+ * bridge-domain 1
+ * @cliexend
+ *
+ * Example of creating bridge-domain 2 with enabling arp-term, mac-age 60:
+ * @cliexstart{create bridge-domain 2 arp-term 1 mac-age 60}
+ * bridge-domain 2
+ *
+ * vpp# show bridge-domain
+ * ID Index BSN Age(min) Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 0 0 0 off off off off off off local0
+ * 1 1 0 off on on off on off N/A
+ * 2 2 0 60 on on off on on N/A
+ *
+ * @cliexend
+ *
+ * Example of delete bridge-domain 1:
+ * @cliexstart{create bridge-domain 1 del}
+ * @cliexend
+ * @endparblock
+?*/
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_create_cli, static) = {
+ .path = "create bridge-domain",
+ .short_help = "create bridge-domain <bridge-domain-id>"
+ " [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] [flood <0|1>] [arp-term <0|1>]"
+ " [mac-age <nn>] [bd-tag <tag>] [del]",
+ .function = bd_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h
new file mode 100644
index 00000000..fd34ae67
--- /dev/null
+++ b/src/vnet/l2/l2_bd.h
@@ -0,0 +1,190 @@
+/*
+ * l2_bd.h : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bd_h
+#define included_l2bd_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+typedef struct
+{
+ /* hash bd_id -> bd_index */
+ uword *bd_index_by_bd_id;
+
+ /* Busy bd_index bitmap */
+ uword *bd_index_bitmap;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} bd_main_t;
+
+extern bd_main_t bd_main;
+
+/* Bridge domain member */
+
+#define L2_FLOOD_MEMBER_NORMAL 0
+#define L2_FLOOD_MEMBER_BVI 1
+
+typedef struct
+{
+ u32 sw_if_index; /* the output L2 interface */
+ u8 flags; /* 0=normal, 1=bvi */
+ u8 shg; /* split horizon group number */
+ u16 spare;
+} l2_flood_member_t;
+
+/* Per-bridge domain configuration */
+
+typedef struct
+{
+ u32 feature_bitmap;
+ /*
+ * Contains bit enables for flooding, learning, and forwarding.
+ * All other feature bits should always be set.
+ *
+ * identity of the bridge-domain's BVI interface
+ * set to ~0 if there is no BVI
+ */
+ u32 bvi_sw_if_index;
+
+ /* bridge domain id, not to be confused with bd_index */
+ u32 bd_id;
+
+ /* Vector of member ports */
+ l2_flood_member_t *members;
+
+ /* First flood_count member ports are flooded */
+ u32 flood_count;
+
+ /* Tunnel Master (Multicast vxlan) are always flooded */
+ u32 tun_master_count;
+
+ /* Tunnels (Unicast vxlan) are flooded if there are no masters */
+ u32 tun_normal_count;
+
+ /* hash ip4/ip6 -> mac for arp/nd termination */
+ uword *mac_by_ip4;
+ uword *mac_by_ip6;
+
+ /* mac aging */
+ u8 mac_age;
+
+ /* sequence number for bridge domain based flush of MACs */
+ u8 seq_num;
+
+ /* Bridge domain tag (C string NULL terminated) */
+ u8 *bd_tag;
+
+} l2_bridge_domain_t;
+
+/* Limit Bridge Domain ID to 24 bits to match 24-bit VNI range */
+#define L2_BD_ID_MAX ((1<<24)-1)
+
+typedef struct
+{
+ u32 bd_id;
+ u8 flood;
+ u8 uu_flood;
+ u8 forward;
+ u8 learn;
+ u8 arp_term;
+ u8 mac_age;
+ u8 *bd_tag;
+ u8 is_add;
+} l2_bridge_domain_add_del_args_t;
+
+/* Return 1 if bridge domain has been initialized */
+always_inline u32
+bd_is_valid (l2_bridge_domain_t * bd_config)
+{
+ return (bd_config->feature_bitmap != 0);
+}
+
+/* Init bridge domain if not done already */
+void bd_validate (l2_bridge_domain_t * bd_config);
+
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member);
+
+u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index);
+
+
+#define L2_LEARN (1<<0)
+#define L2_FWD (1<<1)
+#define L2_FLOOD (1<<2)
+#define L2_UU_FLOOD (1<<3)
+#define L2_ARP_TERM (1<<4)
+
+u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable);
+void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age);
+int bd_add_del (l2_bridge_domain_add_del_args_t * args);
+
+/**
+ * \brief Get a bridge domain.
+ *
+ * Get a bridge domain with the given bridge domain ID.
+ *
+ * \param bdm bd_main pointer.
+ * \param bd_id The bridge domain ID
+ * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector.
+ */
+u32 bd_find_index (bd_main_t * bdm, u32 bd_id);
+
+/**
+ * \brief Create a bridge domain.
+ *
+ * Create a bridge domain with the given bridge domain ID
+ *
+ * \param bdm bd_main pointer.
+ * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector.
+ */
+u32 bd_add_bd_index (bd_main_t * bdm, u32 bd_id);
+
+/**
+ * \brief Get or create a bridge domain.
+ *
+ * Get a bridge domain with the given bridge domain ID, if one exists, otherwise
+ * create one with the given ID, or the first unused ID if the given ID is ~0..
+ *
+ * \param bdm bd_main pointer.
+ * \param bd_id The bridge domain ID
+ * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector.
+ */
+static inline u32
+bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ u32 bd_index = bd_find_index (bdm, bd_id);
+ if (bd_index == ~0)
+ return bd_add_bd_index (bdm, bd_id);
+ return bd_index;
+}
+
+u32 bd_add_del_ip_mac (u32 bd_index,
+ u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c
new file mode 100644
index 00000000..f239743a
--- /dev/null
+++ b/src/vnet/l2/l2_bvi.c
@@ -0,0 +1,40 @@
+/*
+ * l2_bvi.c : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_flood.h>
+#include <vnet/l2/l2_bvi.h>
+
+
+/* Call the L2 nodes that need the ethertype mapping */
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2fwd_register_input_type (vm, type, node_index);
+ l2flood_register_input_type (vm, type, node_index);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h
new file mode 100644
index 00000000..662ec402
--- /dev/null
+++ b/src/vnet/l2/l2_bvi.h
@@ -0,0 +1,117 @@
+/*
+ * l2_bvi.h : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bvi_h
+#define included_l2bvi_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/sparse_vec.h>
+
+#include <vnet/l2/l2_input.h>
+
+#define TO_BVI_ERR_OK 0
+#define TO_BVI_ERR_BAD_MAC 1
+#define TO_BVI_ERR_ETHERTYPE 2
+
+/**
+ * Send a packet from L2 processing to L3 via the BVI interface.
+ * Set next0 to the proper L3 input node.
+ * Return an error if the packet isn't what we expect.
+ */
+
+static_always_inline u32
+l2_to_bvi (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ vlib_buffer_t * b0,
+ u32 bvi_sw_if_index, next_by_ethertype_t * l3_next, u32 * next0)
+{
+ u8 l2_len;
+ u16 ethertype;
+ u8 *l3h;
+ ethernet_header_t *e0;
+ vnet_hw_interface_t *hi;
+
+ e0 = vlib_buffer_get_current (b0);
+ hi = vnet_get_sup_hw_interface (vnet_main, bvi_sw_if_index);
+
+ /* Perform L3 my-mac filter */
+ if ((!ethernet_address_cast (e0->dst_address)) &&
+ (!eth_mac_equal ((u8 *) e0, hi->hw_address)))
+ {
+ return TO_BVI_ERR_BAD_MAC;
+ }
+
+ /* Save L2 header position which may be changed due to packet replication */
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+
+ /* Strip L2 header */
+ l2_len = vnet_buffer (b0)->l2.l2_len;
+ vlib_buffer_advance (b0, l2_len);
+
+ l3h = vlib_buffer_get_current (b0);
+ ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2));
+
+ /* Set the input interface to be the BVI interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ /* Go to appropriate L3 input node */
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ *next0 = l3_next->input_next_ip4;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ *next0 = l3_next->input_next_ip6;
+ }
+ else
+ {
+ /* uncommon ethertype, check table */
+ u32 i0;
+
+ i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype);
+ *next0 = vec_elt (l3_next->input_next_by_type, i0);
+
+ if (i0 == SPARSE_VEC_INVALID_INDEX)
+ {
+ return TO_BVI_ERR_ETHERTYPE;
+ }
+ }
+
+ /* increment BVI RX interface stat */
+ vlib_increment_combined_counter
+ (vnet_main->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_main->thread_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX],
+ 1, vlib_buffer_length_in_chain (vlib_main, b0));
+ return TO_BVI_ERR_OK;
+}
+
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h
new file mode 100644
index 00000000..100c584a
--- /dev/null
+++ b/src/vnet/l2/l2_classify.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_l2_input_classify_h__
+#define __included_vnet_l2_input_classify_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum
+{
+ L2_INPUT_CLASSIFY_NEXT_DROP,
+ L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_IP4_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_IP6_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_LI,
+ L2_INPUT_CLASSIFY_N_NEXT,
+} l2_input_classify_next_t;
+
+typedef enum
+{
+ L2_INPUT_CLASSIFY_TABLE_IP4,
+ L2_INPUT_CLASSIFY_TABLE_IP6,
+ L2_INPUT_CLASSIFY_TABLE_OTHER,
+ L2_INPUT_CLASSIFY_N_TABLES,
+} l2_input_classify_table_id_t;
+
+typedef enum
+{
+ L2_OUTPUT_CLASSIFY_NEXT_DROP,
+ L2_OUTPUT_CLASSIFY_N_NEXT,
+} l2_output_classify_next_t;
+
+typedef enum
+{
+ L2_OUTPUT_CLASSIFY_TABLE_IP4,
+ L2_OUTPUT_CLASSIFY_TABLE_IP6,
+ L2_OUTPUT_CLASSIFY_TABLE_OTHER,
+ L2_OUTPUT_CLASSIFY_N_TABLES,
+} l2_output_classify_table_id_t;
+
+typedef struct _l2_classify_main
+{
+ /* Next nodes for L2 input and output features */
+ u32 l2_inp_feat_next[32];
+ u32 l2_out_feat_next[32];
+
+ /* Per-address-family classifier table vectors */
+ u32 *classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_N_TABLES];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ vnet_classify_main_t *vnet_classify_main;
+} l2_input_classify_main_t;
+
+typedef struct _l2_classify_main l2_output_classify_main_t;
+
+extern l2_input_classify_main_t l2_input_classify_main;
+extern vlib_node_registration_t l2_input_classify_node;
+
+extern l2_output_classify_main_t l2_output_classify_main;
+extern vlib_node_registration_t l2_output_classify_node;
+
+void vnet_l2_input_classify_enable_disable (u32 sw_if_index,
+ int enable_disable);
+
+int vnet_l2_input_classify_set_tables (u32 sw_if_index, u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index);
+
+void vnet_l2_output_classify_enable_disable (u32 sw_if_index,
+ int enable_disable);
+
+int vnet_l2_output_classify_set_tables (u32 sw_if_index, u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index);
+
+#endif /* __included_vnet_l2_input_classify_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c
new file mode 100644
index 00000000..faf78153
--- /dev/null
+++ b/src/vnet/l2/l2_efp_filter.c
@@ -0,0 +1,575 @@
+/*
+ * l2_efp_filter.c : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+/**
+ * @file
+ * @brief EFP-filter - Ethernet Flow Point Filter.
+ *
+ * It is possible to transmit a packet out a subinterface with VLAN tags
+ * that are not compatible with that subinterface. In other words, if that
+ * packet arrived on the output port, it would not be classified as coming
+ * from the output subinterface. This can happen in various ways: through
+ * misconfiguration, by putting subinterfaces with different VLAN encaps in
+ * the same bridge-domain, etc. The EFP Filter Check detects such packets
+ * and drops them. It consists of two checks, one that verifies the packet
+ * prior to output VLAN tag rewrite and one that verifies the packet after
+ * VLAN tag rewrite.
+ *
+ */
+typedef struct
+{
+ /* Next nodes for L2 output features */
+ u32 l2_out_feat_next[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_efp_filter_main_t;
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; /* raw data (vlans) */
+ u32 sw_if_index;
+} l2_efp_filter_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_efp_filter_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_efp_filter_trace_t *t = va_arg (*args, l2_efp_filter_trace_t *);
+
+ s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4],
+ t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9],
+ t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_efp_filter_main_t l2_efp_filter_main;
+
+static vlib_node_registration_t l2_efp_filter_node;
+
+#define foreach_l2_efp_filter_error \
+_(L2_EFP_FILTER, "L2 EFP filter packets") \
+_(DROP, "L2 EFP filter post-rewrite drops")
+
+typedef enum
+{
+#define _(sym,str) L2_EFP_FILTER_ERROR_##sym,
+ foreach_l2_efp_filter_error
+#undef _
+ L2_EFP_FILTER_N_ERROR,
+} l2_efp_filter_error_t;
+
+static char *l2_efp_filter_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_efp_filter_error
+#undef _
+};
+
+typedef enum
+{
+ L2_EFP_FILTER_NEXT_DROP,
+ L2_EFP_FILTER_N_NEXT,
+} l2_efp_filter_next_t;
+
+
+/**
+ * Extract fields from the packet that will be used in interface
+ * classification.
+ */
+static_always_inline void
+extract_keys (vnet_main_t * vnet_main,
+ u32 sw_if_index0,
+ vlib_buffer_t * b0,
+ u32 * port_sw_if_index0,
+ u16 * first_ethertype0,
+ u16 * outer_id0, u16 * inner_id0, u32 * match_flags0)
+{
+ ethernet_header_t *e0;
+ ethernet_vlan_header_t *h0;
+ u32 tag_len;
+ u32 tag_num;
+
+ *port_sw_if_index0 =
+ vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index;
+
+ e0 = vlib_buffer_get_current (b0);
+ h0 = (ethernet_vlan_header_t *) (e0 + 1);
+
+ *first_ethertype0 = clib_net_to_host_u16 (e0->type);
+ *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id);
+ *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id);
+
+ tag_len = vnet_buffer (b0)->l2.l2_len - sizeof (ethernet_header_t);
+ tag_num = tag_len / sizeof (ethernet_vlan_header_t);
+ *match_flags0 = eth_create_valid_subint_match_flags (tag_num);
+}
+
+/*
+ * EFP filtering is a basic switch feature which prevents an interface from
+ * transmitting a packet that doesn't match the interface's ingress match
+ * criteria. The check has two parts, one performed before egress vlan tag
+ * rewrite and one after.
+ *
+ * The pre-rewrite check insures the packet matches what an ingress packet looks
+ * like after going through the interface's ingress tag rewrite operation. Only
+ * pushed tags are compared. So:
+ * - if the ingress vlan tag rewrite pushes no tags (or is not enabled),
+ * any packet passes the filter
+ * - if the ingress vlan tag rewrite pushes one tag,
+ * the packet must have at least one tag, and the outer tag must match the pushed tag
+ * - if the ingress vlan tag rewrite pushes two tags,
+ * the packet must have at least two tags, and the outer two tags must match the pushed tags
+ *
+ * The pre-rewrite check is performed in the l2-output node.
+ *
+ * The post-rewrite check insures the packet matches what an ingress packet looks
+ * like before going through the interface's ingress tag rewrite operation. It verifies
+ * that such a packet arriving on the wire at this port would be classified as arriving
+ * an input interface equal to the packet's output interface. This can be done by running
+ * the output packet's vlan tags and output port through the interface classification,
+ * and checking if the resulting interface matches the output interface.
+ *
+ * The post-rewrite check is performed here.
+ */
+
+static uword
+l2_efp_filter_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_efp_filter_next_t next_index;
+ l2_efp_filter_main_t *msm = &l2_efp_filter_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 first_ethertype0, first_ethertype1;
+ u16 outer_id0, inner_id0, outer_id1, inner_id1;
+ u32 match_flags0, match_flags1;
+ u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1,
+ subint_sw_if_index1;
+ vnet_hw_interface_t *hi0, *hi1;
+ main_intf_t *main_intf0, *main_intf1;
+ vlan_intf_t *vlan_intf0, *vlan_intf1;
+ qinq_intf_t *qinq_intf0, *qinq_intf1;
+ u32 is_l20, is_l21;
+ __attribute__ ((unused)) u32 matched0, matched1;
+ u8 error0, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+ __attribute__ ((unused)) u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Prefetch the input config for the N+1 loop iteration
+ * This depends on the buffer header above
+ */
+ sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_TX];
+ /*
+ * $$$ TODO
+ * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD);
+ * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD);
+ */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ /* process 2 packets */
+ em->counters[node_counter_base_index +
+ L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2;
+
+ /* Determine next node */
+ next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next,
+ L2OUTPUT_FEAT_EFP_FILTER);
+ next1 = vnet_l2_feature_next (b1, msm->l2_out_feat_next,
+ L2OUTPUT_FEAT_EFP_FILTER);
+
+ /* perform the efp filter check on two packets */
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0, &inner_id0, &match_flags0);
+
+ extract_keys (msm->vnet_main,
+ sw_if_index1,
+ b1,
+ &port_sw_if_index1,
+ &first_ethertype1,
+ &outer_id1, &inner_id1, &match_flags1);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index1,
+ first_ethertype1,
+ outer_id1,
+ inner_id1,
+ &hi1,
+ &main_intf1, &vlan_intf1, &qinq_intf1);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0, &is_l20);
+
+ matched1 = eth_identify_subint (hi1,
+ b1,
+ match_flags1,
+ main_intf1,
+ vlan_intf1,
+ qinq_intf1,
+ &subint_sw_if_index1,
+ &error1, &is_l21);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0))
+ {
+ /* Drop packet */
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1))
+ {
+ /* Drop packet */
+ next1 = L2_EFP_FILTER_NEXT_DROP;
+ b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ clib_memcpy (t->raw, &h1->type, sizeof (t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u16 first_ethertype0;
+ u16 outer_id0, inner_id0;
+ u32 match_flags0;
+ u32 port_sw_if_index0, subint_sw_if_index0;
+ vnet_hw_interface_t *hi0;
+ main_intf_t *main_intf0;
+ vlan_intf_t *vlan_intf0;
+ qinq_intf_t *qinq_intf0;
+ u32 is_l20;
+ __attribute__ ((unused)) u32 matched0;
+ u8 error0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /* process 1 packet */
+ em->counters[node_counter_base_index +
+ L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1;
+
+ /* Determine next node */
+ next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next,
+ L2OUTPUT_FEAT_EFP_FILTER);
+
+ /* perform the efp filter check on one packet */
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0, &inner_id0, &match_flags0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0, &is_l20);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0))
+ {
+ /* Drop packet */
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_efp_filter_node,static) = {
+ .function = l2_efp_filter_node_fn,
+ .name = "l2-efp-filter",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_efp_filter_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_efp_filter_error_strings),
+ .error_strings = l2_efp_filter_error_strings,
+
+ .n_next_nodes = L2_EFP_FILTER_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_EFP_FILTER_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_efp_filter_node, l2_efp_filter_node_fn)
+ clib_error_t *l2_efp_filter_init (vlib_main_t * vm)
+{
+ l2_efp_filter_main_t *mp = &l2_efp_filter_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_efp_filter_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->l2_out_feat_next);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_efp_filter_init);
+
+
+/** Enable/disable the EFP Filter check on the subinterface. */
+void
+l2_efp_filter_configure (vnet_main_t * vnet_main, u32 sw_if_index, u32 enable)
+{
+ /* set the interface flag */
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable);
+}
+
+
+/**
+ * Set subinterface egress efp filter enable/disable.
+ * The CLI format is:
+ * set interface l2 efp-filter <interface> [disable]]
+ */
+static clib_error_t *
+int_l2_efp_filter (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* enable/disable the feature */
+ l2_efp_filter_configure (vnm, sw_if_index, enable);
+
+done:
+ return error;
+}
+
+
+/*?
+ * EFP filtering is a basic switch feature which prevents an interface from
+ * transmitting a packet that doesn't match the interface's ingress match
+ * criteria. The check has two parts, one performed before egress vlan tag
+ * rewrite and one after. This command enables or disables the EFP filtering
+ * for a given sub-interface.
+ *
+ * @cliexpar
+ * Example of how to enable a Layer 2 efp-filter on a sub-interface:
+ * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200}
+ * Example of how to disable a Layer 2 efp-filter on a sub-interface:
+ * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
+ .path = "set interface l2 efp-filter",
+ .short_help = "set interface l2 efp-filter <interface> [disable]",
+ .function = int_l2_efp_filter,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_efp_filter.h b/src/vnet/l2/l2_efp_filter.h
new file mode 100644
index 00000000..f40851df
--- /dev/null
+++ b/src/vnet/l2/l2_efp_filter.h
@@ -0,0 +1,33 @@
+/*
+ * l2_efp_filter.h : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef included_vnet_l2_efp_filter_h
+#define included_vnet_l2_efp_filter_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
new file mode 100644
index 00000000..64b3275b
--- /dev/null
+++ b/src/vnet/l2/l2_fib.c
@@ -0,0 +1,1250 @@
+/*
+ * l2_fib.c : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+#include <vnet/l2/l2_bd.h>
+
+#include <vppinfra/bihash_template.c>
+
+#include <vlibmemory/api.h>
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/**
+ * @file
+ * @brief Ethernet MAC Address FIB Table Management.
+ *
+ * The MAC Address forwarding table for bridge-domains is called the l2fib.
+ * Entries are added automatically as part of mac learning, but MAC Addresses
+ * entries can also be added manually.
+ *
+ */
+
+l2fib_main_t l2fib_main;
+
+/** Format sw_if_index. If the value is ~0, use the text "N/A" */
+u8 *
+format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ if (sw_if_index == ~0)
+ return format (s, "N/A");
+
+ vnet_sw_interface_t *swif = vnet_get_sw_interface_safe (vnm, sw_if_index);
+ if (!swif)
+ return format (s, "Stale");
+
+ return format (s, "%U", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface_safe (vnm, sw_if_index));
+}
+
+void
+l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key,
+ l2fib_entry_result_t ** l2fe_res)
+{
+ l2fib_main_t *msm = &l2fib_main;
+ BVT (clib_bihash) * h = &msm->mac_table;
+ BVT (clib_bihash_bucket) * b;
+ BVT (clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ int i, j, k;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if ((bd_index == ~0) || (bd_index == key.fields.bd_index))
+ {
+ vec_add1 (*l2fe_key, key);
+ vec_add1 (*l2fe_res, result);
+ }
+ }
+ v++;
+ }
+ }
+}
+
+/** Display the contents of the l2fib. */
+static clib_error_t *
+show_l2fib (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ l2fib_main_t *msm = &l2fib_main;
+ l2_bridge_domain_t *bd_config;
+ BVT (clib_bihash) * h = &msm->mac_table;
+ BVT (clib_bihash_bucket) * b;
+ BVT (clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ u32 first_entry = 1;
+ u64 total_entries = 0;
+ int i, j, k;
+ u8 verbose = 0;
+ u8 raw = 0;
+ u8 learn = 0;
+ u32 bd_id, bd_index = ~0;
+ u8 now = (u8) (vlib_time_now (vm) / 60);
+ u8 *s = 0;
+
+ if (unformat (input, "raw"))
+ raw = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "bd_index %d", &bd_index))
+ verbose = 1;
+ else if (unformat (input, "learn"))
+ {
+ learn = 1;
+ verbose = 0;
+ }
+ else if (unformat (input, "bd_id %d", &bd_id))
+ {
+ uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ {
+ if (learn == 0)
+ verbose = 1;
+ bd_index = p[0];
+ }
+ else
+ {
+ vlib_cli_output (vm, "no such bridge domain id");
+ return 0;
+ }
+ }
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ if ((verbose || learn) && first_entry)
+ {
+ first_entry = 0;
+ vlib_cli_output (vm,
+ "%=19s%=7s%=7s%=8s%=9s%=7s%=7s%=5s%=30s",
+ "Mac-Address", "BD-Idx", "If-Idx",
+ "BSN-ISN", "Age(min)", "static", "filter",
+ "bvi", "Interface-Name");
+ }
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if ((verbose || learn)
+ & ((bd_index >> 31) || (bd_index == key.fields.bd_index)))
+ {
+ if (learn && result.fields.age_not)
+ {
+ total_entries++;
+ continue; /* skip provisioned macs */
+ }
+
+ bd_config = vec_elt_at_index (l2input_main.bd_configs,
+ key.fields.bd_index);
+
+ if (bd_config->mac_age && !result.fields.age_not)
+ {
+ i16 delta = now - result.fields.timestamp;
+ delta += delta < 0 ? 256 : 0;
+ s = format (s, "%d", delta);
+ }
+ else
+ s = format (s, "-");
+
+ vlib_cli_output (vm,
+ "%=19U%=7d%=7d %3d/%-3d%=9v%=7s%=7s%=5s%=30U",
+ format_ethernet_address, key.fields.mac,
+ key.fields.bd_index,
+ result.fields.sw_if_index == ~0
+ ? -1 : result.fields.sw_if_index,
+ result.fields.sn.bd, result.fields.sn.swif,
+ s, result.fields.static_mac ? "*" : "-",
+ result.fields.filter ? "*" : "-",
+ result.fields.bvi ? "*" : "-",
+ format_vnet_sw_if_index_name_with_NA,
+ msm->vnet_main, result.fields.sw_if_index);
+ vec_reset_length (s);
+ }
+ total_entries++;
+ }
+ v++;
+ }
+ }
+
+ if (total_entries == 0)
+ vlib_cli_output (vm, "no l2fib entries");
+ else
+ {
+ l2learn_main_t *lm = &l2learn_main;
+ vlib_cli_output (vm, "L2FIB total/learned entries: %d/%d "
+ "Last scan time: %.4esec Learn limit: %d ",
+ total_entries, lm->global_learn_count,
+ msm->age_scan_duration, lm->global_learn_limit);
+ if (lm->client_pid)
+ vlib_cli_output (vm, "L2MAC events client PID: %d "
+ "Last e-scan time: %.4esec Delay: %.2esec "
+ "Max macs in event: %d",
+ lm->client_pid, msm->evt_scan_duration,
+ msm->event_scan_delay, msm->max_macs_in_event);
+ }
+
+ if (raw)
+ vlib_cli_output (vm, "Raw Hash Table:\n%U\n",
+ BV (format_bihash), h, 1 /* verbose */ );
+
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command dispays the MAC Address entries of the L2 FIB table.
+ * Output can be filtered to just get the number of MAC Addresses or display
+ * each MAC Address for all bridge domains or just a single bridge domain.
+ *
+ * @cliexpar
+ * Example of how to display the number of MAC Address entries in the L2
+ * FIB table:
+ * @cliexstart{show l2fib}
+ * 3 l2fib entries
+ * @cliexend
+ * Example of how to display all the MAC Address entries in the L2
+ * FIB table:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0
+ * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0
+ * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0
+ * 3 l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
+ .path = "show l2fib",
+ .short_help = "show l2fib [verbose | learn | bd_id <nn> | bd_index <nn> | raw",
+ .function = show_l2fib,
+};
+/* *INDENT-ON* */
+
+
+/* Remove all entries from the l2fib */
+void
+l2fib_clear_table (void)
+{
+ l2fib_main_t *mp = &l2fib_main;
+
+ /* Remove all entries */
+ BV (clib_bihash_free) (&mp->mac_table);
+ BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+ l2learn_main.global_learn_count = 0;
+}
+
+/** Clear all entries in L2FIB.
+ * @TODO: Later we may want a way to remove only the non-static entries
+ */
+static clib_error_t *
+clear_l2fib (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2fib_clear_table ();
+ return 0;
+}
+
+/*?
+ * This command clears all the MAC Address entries from the L2 FIB table.
+ *
+ * @cliexpar
+ * Example of how to clear the L2 FIB Table:
+ * @cliexcmd{clear l2fib}
+ * Example to show the L2 FIB Table has been cleared:
+ * @cliexstart{show l2fib verbose}
+ * no l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
+ .path = "clear l2fib",
+ .short_help = "clear l2fib",
+ .function = clear_l2fib,
+};
+/* *INDENT-ON* */
+
+static inline l2fib_seq_num_t
+l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index)
+{
+ l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);
+ /* *INDENT-OFF* */
+ return (l2fib_seq_num_t) {
+ .swif = *l2fib_swif_seq_num (sw_if_index),
+ .bd = bd_config->seq_num,
+ };
+ /* *INDENT-ON* */
+}
+
+/**
+ * Add an entry to the l2fib.
+ * If the entry already exists then overwrite it
+ */
+void
+l2fib_add_entry (u64 mac, u32 bd_index,
+ u32 sw_if_index, u8 static_mac, u8 filter_mac, u8 bvi_mac)
+{
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ __attribute__ ((unused)) u32 bucket_contents;
+ l2fib_main_t *fm = &l2fib_main;
+ l2learn_main_t *lm = &l2learn_main;
+ BVT (clib_bihash_kv) kv;
+
+ /* set up key */
+ key.raw = l2fib_make_key ((u8 *) & mac, bd_index);
+
+ /* check if entry alread exist */
+ if (BV (clib_bihash_search) (&fm->mac_table, &kv, &kv))
+ {
+ /* decrement counter if overwriting a learned mac */
+ result.raw = kv.value;
+ if ((result.fields.age_not == 0) && (lm->global_learn_count))
+ lm->global_learn_count--;
+ }
+
+ /* set up result */
+ result.raw = 0; /* clear all fields */
+ result.fields.sw_if_index = sw_if_index;
+ result.fields.static_mac = static_mac;
+ result.fields.filter = filter_mac;
+ result.fields.bvi = bvi_mac;
+ result.fields.age_not = 1; /* no aging for provisioned entry */
+
+ kv.key = key.raw;
+ kv.value = result.raw;
+
+ BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1 /* is_add */ );
+}
+
+/**
+ * Add an entry to the L2FIB.
+ * The CLI format is:
+ * l2fib add <mac> <bd> <intf> [static] [bvi]
+ * l2fib add <mac> <bd> filter
+ * Note that filter and bvi entries are always static
+ */
+static clib_error_t *
+l2fib_add (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ u32 sw_if_index = ~0;
+ u32 filter_mac = 0;
+ u32 static_mac = 0;
+ u32 bvi_mac = 0;
+ uword *p;
+
+ if (!unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p)
+ {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ if (unformat (input, "filter"))
+ {
+ filter_mac = 1;
+ static_mac = 1;
+
+ }
+ else
+ {
+
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (unformat (input, "static"))
+ {
+ static_mac = 1;
+ }
+ else if (unformat (input, "bvi"))
+ {
+ bvi_mac = 1;
+ static_mac = 1;
+ }
+ }
+
+ if (vec_len (l2input_main.configs) <= sw_if_index)
+ {
+ error = clib_error_return (0, "Interface sw_if_index %d not in L2 mode",
+ sw_if_index);
+ goto done;
+ }
+
+ if (filter_mac)
+ l2fib_add_filter_entry (mac, bd_index);
+ else
+ l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, bvi_mac);
+
+done:
+ return error;
+}
+
+/*?
+ * This command adds a MAC Address entry to the L2 FIB table
+ * of an existing bridge-domain. The MAC Address can be static
+ * or dynamic. This command also allows a filter to be added,
+ * such that packets with given MAC Addresses (source mac or
+ * destination mac match) are dropped.
+ *
+ * @cliexpar
+ * Example of how to add a dynamic MAC Address entry to the L2 FIB table
+ * of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:33 200 GigabitEthernet0/8/0.200}
+ * Example of how to add a static MAC Address entry to the L2 FIB table
+ * of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:55 200 GigabitEthernet0/8/0.200 static}
+ * Example of how to add a filter such that a packet with the given MAC
+ * Address will be dropped in a given bridge-domain (where 200 is the
+ * bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:77 200 filter}
+ * Example of show command of the provisioned MAC Addresses and filters:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0
+ * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0
+ * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0
+ * 3 l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
+ .path = "l2fib add",
+ .short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]",
+ .function = l2fib_add,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+l2fib_test_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ u64 mac, save_mac;
+ u32 bd_index = 0;
+ u32 sw_if_index = 8;
+ u32 bvi_mac = 0;
+ u32 is_add = 0;
+ u32 is_del = 0;
+ u32 is_check = 0;
+ u32 count = 1;
+ int mac_set = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mac %U", unformat_ethernet_address, &mac))
+ mac_set = 1;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "check"))
+ is_check = 1;
+ else if (unformat (input, "count %d", &count))
+ ;
+ else
+ break;
+ }
+
+ if (mac_set == 0)
+ return clib_error_return (0, "mac not set");
+
+ if (is_add == 0 && is_del == 0 && is_check == 0)
+ return clib_error_return (0,
+ "noop: pick at least one of (add,del,check)");
+
+ save_mac = mac;
+
+ if (is_add)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ l2fib_add_fwd_entry (mac, bd_index, sw_if_index, mac, bvi_mac);
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_check)
+ {
+ BVT (clib_bihash_kv) kv;
+ l2fib_main_t *mp = &l2fib_main;
+
+ mac = save_mac;
+
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ kv.key = l2fib_make_key ((u8 *) & mac, bd_index);
+ if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ {
+ clib_warning ("key %U AWOL", format_ethernet_address, &mac);
+ break;
+ }
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_del)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+
+ l2fib_del_entry (mac, bd_index);
+
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ return error;
+}
+
+/*?
+ * The set of '<em>test l2fib</em>' commands allow the L2 FIB table of the default
+ * bridge domain (bridge-domain-id of 0) to be modified.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to add a set of 4 sequential MAC Address entries to L2
+ * FIB table of the default bridge-domain:
+ * @cliexcmd{test l2fib add mac 52:54:00:53:00:00 count 4}
+ *
+ * Show the set of 4 sequential MAC Address entries that were added:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:00:00 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:01 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:03 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:02 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 4 l2fib entries
+ * @cliexend
+ *
+ * Example of how to check that the set of 4 sequential MAC Address
+ * entries were added to L2 FIB table of the default
+ * bridge-domain. Used a count of 5 to produce an error:
+ *
+ * @cliexcmd{test l2fib check mac 52:54:00:53:00:00 count 5}
+ * The output of the check command is in the log files. Log file
+ * location may vary based on your OS and Version:
+ *
+ * <b><em># tail -f /var/log/messages | grep l2fib_test_command_fn</em></b>
+ *
+ * Sep 7 17:15:24 localhost vnet[4952]: l2fib_test_command_fn:446: key 52:54:00:53:00:04 AWOL
+ *
+ * Example of how to delete a set of 4 sequential MAC Address entries
+ * from L2 FIB table of the default bridge-domain:
+ * @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_test_command, static) = {
+ .path = "test l2fib",
+ .short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>",
+ .function = l2fib_test_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Delete an entry from the l2fib.
+ * Return 0 if the entry was deleted, or 1 if it was not found
+ */
+static u32
+l2fib_del_entry_by_key (u64 raw_key)
+{
+
+ l2fib_entry_result_t result;
+ l2fib_main_t *mp = &l2fib_main;
+ BVT (clib_bihash_kv) kv;
+
+ /* set up key */
+ kv.key = raw_key;
+
+ if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ return 1;
+
+ result.raw = kv.value;
+
+ /* decrement counter if dynamically learned mac */
+ if ((result.fields.age_not == 0) && (l2learn_main.global_learn_count))
+ l2learn_main.global_learn_count--;
+
+ /* Remove entry from hash table */
+ BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ );
+ return 0;
+}
+
+/**
+ * Delete an entry from the l2fib.
+ * Return 0 if the entry was deleted, or 1 if it was not found
+ */
+u32
+l2fib_del_entry (u64 mac, u32 bd_index)
+{
+ return l2fib_del_entry_by_key (l2fib_make_key ((u8 *) & mac, bd_index));
+}
+
+/**
+ * Delete an entry from the L2FIB.
+ * The CLI format is:
+ * l2fib del <mac> <bd-id>
+ */
+static clib_error_t *
+l2fib_del (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ uword *p;
+
+ if (!unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p)
+ {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ /* Delete the entry */
+ if (l2fib_del_entry (mac, bd_index))
+ {
+ error = clib_error_return (0, "mac entry not found");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * This command deletes an existing MAC Address entry from the L2 FIB
+ * table of an existing bridge-domain.
+ *
+ * @cliexpar
+ * Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib del 52:54:00:53:18:33 200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
+ .path = "l2fib del",
+ .short_help = "l2fib del <mac> <bridge-domain-id>",
+ .function = l2fib_del,
+};
+/* *INDENT-ON* */
+
+/**
+ Kick off ager to scan MACs to age/delete MAC entries
+*/
+void
+l2fib_start_ager_scan (vlib_main_t * vm)
+{
+ uword evt = L2_MAC_AGE_PROCESS_EVENT_ONE_PASS;
+
+ /* check if there is at least one bd with mac aging enabled */
+ l2_bridge_domain_t *bd_config;
+ vec_foreach (bd_config, l2input_main.bd_configs)
+ {
+ if (bd_config->bd_id != ~0 && bd_config->mac_age != 0)
+ {
+ evt = L2_MAC_AGE_PROCESS_EVENT_START;
+ break;
+ }
+ }
+
+ vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index,
+ evt, 0);
+}
+
+/**
+ Flush all non static MACs from an interface
+*/
+void
+l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index)
+{
+ *l2fib_swif_seq_num (sw_if_index) += 1;
+ l2fib_start_ager_scan (vm);
+}
+
+/**
+ Flush all non static MACs in a bridge domain
+*/
+void
+l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index)
+{
+ l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);
+ bd_config->seq_num += 1;
+ l2fib_start_ager_scan (vm);
+}
+
+/**
+ Flush all non static MACs - flushes all valid BDs
+*/
+void
+l2fib_flush_all_mac (vlib_main_t * vm)
+{
+ l2_bridge_domain_t *bd_config;
+ vec_foreach (bd_config, l2input_main.bd_configs)
+ if (bd_is_valid (bd_config))
+ bd_config->seq_num += 1;
+
+ l2fib_start_ager_scan (vm);
+}
+
+
+/**
+ Flush MACs, except static ones, associated with an interface
+ The CLI format is:
+ l2fib flush-mac interface <if-name>
+*/
+static clib_error_t *
+l2fib_flush_mac_int (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ l2fib_flush_int_mac (vm, sw_if_index);
+
+done:
+ return error;
+}
+
+/**
+ Flush all MACs, except static ones
+ The CLI format is:
+ l2fib flush-mac all
+*/
+static clib_error_t *
+l2fib_flush_mac_all (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2fib_flush_all_mac (vm);
+ return 0;
+}
+
+/*?
+ * This command kick off ager to delete all existing MAC Address entries,
+ * except static ones, associated with an interface from the L2 FIB table.
+ *
+ * @cliexpar
+ * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
+ * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
+ .path = "l2fib flush-mac all",
+ .short_help = "l2fib flush-mac all",
+ .function = l2fib_flush_mac_all,
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command kick off ager to delete all existing MAC Address entries,
+ * except static ones, associated with an interface from the L2 FIB table.
+ *
+ * @cliexpar
+ * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
+ * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = {
+ .path = "l2fib flush-mac interface",
+ .short_help = "l2fib flush-mac interface <if-name>",
+ .function = l2fib_flush_mac_int,
+};
+/* *INDENT-ON* */
+
+/**
+ Flush bridge-domain MACs except static ones.
+ The CLI format is:
+ l2fib flush-mac bridge-domain <bd-id>
+*/
+static clib_error_t *
+l2fib_flush_mac_bd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ l2fib_flush_bd_mac (vm, bd_index);
+
+done:
+ return error;
+}
+
+/*?
+ * This command kick off ager to delete all existing MAC Address entries,
+ * except static ones, in a bridge domain from the L2 FIB table.
+ *
+ * @cliexpar
+ * Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table:
+ * @cliexcmd{l2fib flush-mac bridge-domain 1000}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = {
+ .path = "l2fib flush-mac bridge-domain",
+ .short_help = "l2fib flush-mac bridge-domain <bd-id>",
+ .function = l2fib_flush_mac_bd,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ l2_input_config_t *config = l2input_intf_config (sw_if_index);
+ if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0 && config->bridge)
+ l2fib_flush_int_mac (vnm->vlib_main, sw_if_index);
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (l2fib_sw_interface_up_down);
+
+BVT (clib_bihash) * get_mac_table (void)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ return &mp->mac_table;
+}
+
+static_always_inline void *
+allocate_mac_evt_buf (u32 client, u32 client_index)
+{
+ l2fib_main_t *fm = &l2fib_main;
+ vl_api_l2_macs_event_t *mp = vl_msg_api_alloc
+ (sizeof (*mp) + (fm->max_macs_in_event * sizeof (vl_api_mac_entry_t)));
+ mp->_vl_msg_id = htons (VL_API_L2_MACS_EVENT);
+ mp->pid = htonl (client);
+ mp->client_index = client_index;
+ return mp;
+}
+
+static_always_inline f64
+l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only)
+{
+ l2fib_main_t *fm = &l2fib_main;
+ l2learn_main_t *lm = &l2learn_main;
+
+ BVT (clib_bihash) * h = &fm->mac_table;
+ int i, j, k;
+ f64 last_start = start_time;
+ f64 accum_t = 0;
+ f64 delta_t = 0;
+ u32 evt_idx = 0;
+ u32 learn_count = 0;
+ u32 client = lm->client_pid;
+ u32 cl_idx = lm->client_index;
+ vl_api_l2_macs_event_t *mp = 0;
+ unix_shared_memory_queue_t *q = 0;
+
+ if (client)
+ {
+ mp = allocate_mac_evt_buf (client, cl_idx);
+ q = vl_api_client_index_to_input_queue (lm->client_index);
+ }
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ /* allow no more than 20us without a pause */
+ delta_t = vlib_time_now (vm) - last_start;
+ if (delta_t > 20e-6)
+ {
+ vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */
+ last_start = vlib_time_now (vm);
+ accum_t += delta_t;
+ }
+
+ if (i < (h->nbuckets - 3))
+ {
+ BVT (clib_bihash_bucket) * b = &h->buckets[i + 3];
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+ b = &h->buckets[i + 1];
+ if (b->offset)
+ {
+ BVT (clib_bihash_value) * v =
+ BV (clib_bihash_get_value) (h, b->offset);
+ CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+ }
+
+ BVT (clib_bihash_bucket) * b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ l2fib_entry_key_t key = {.raw = v->kvp[k].key };
+ l2fib_entry_result_t result = {.raw = v->kvp[k].value };
+
+ if (result.fields.age_not == 0)
+ learn_count++;
+
+ if (client)
+ {
+ if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event))
+ {
+ /* event message full, send it and start a new one */
+ if (q && (q->cursize < q->maxsize))
+ {
+ mp->n_macs = htonl (evt_idx);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ mp = allocate_mac_evt_buf (client, cl_idx);
+ }
+ else
+ {
+ clib_warning ("MAC event to pid %d queue stuffed!"
+ " %d MAC entries lost", client,
+ evt_idx);
+ }
+ evt_idx = 0;
+ }
+
+ if (result.fields.lrn_evt)
+ {
+ /* copy mac entry to event msg */
+ clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac,
+ 6);
+ mp->mac[evt_idx].is_del = 0;
+ mp->mac[evt_idx].sw_if_index =
+ htonl (result.fields.sw_if_index);
+ /* clear event bit and update mac entry */
+ result.fields.lrn_evt = 0;
+ BVT (clib_bihash_kv) kv;
+ kv.key = key.raw;
+ kv.value = result.raw;
+ BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1);
+ evt_idx++;
+ continue; /* skip aging */
+ }
+ }
+
+ if (event_only || result.fields.age_not)
+ continue; /* skip aging - static_mac alsways age_not */
+
+ /* start aging processing */
+ u32 bd_index = key.fields.bd_index;
+ u32 sw_if_index = result.fields.sw_if_index;
+ u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16;
+ if (result.fields.sn.as_u16 != sn)
+ goto age_out; /* stale mac */
+
+ l2_bridge_domain_t *bd_config =
+ vec_elt_at_index (l2input_main.bd_configs, bd_index);
+
+ if (bd_config->mac_age == 0)
+ continue; /* skip aging */
+
+ i16 delta = (u8) (start_time / 60) - result.fields.timestamp;
+ delta += delta < 0 ? 256 : 0;
+
+ if (delta < bd_config->mac_age)
+ continue; /* still valid */
+
+ age_out:
+ if (client)
+ {
+ /* copy mac entry to event msg */
+ clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, 6);
+ mp->mac[evt_idx].is_del = 1;
+ mp->mac[evt_idx].sw_if_index =
+ htonl (result.fields.sw_if_index);
+ evt_idx++;
+ }
+ /* delete mac entry */
+ BVT (clib_bihash_kv) kv;
+ kv.key = key.raw;
+ BV (clib_bihash_add_del) (&fm->mac_table, &kv, 0);
+ learn_count--;
+ }
+ v++;
+ }
+ }
+
+ /* keep learn count consistent */
+ l2learn_main.global_learn_count = learn_count;
+
+ if (mp)
+ {
+ /* send any outstanding mac event message else free message buffer */
+ if (evt_idx)
+ {
+ if (q && (q->cursize < q->maxsize))
+ {
+ mp->n_macs = htonl (evt_idx);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ }
+ else
+ {
+ clib_warning ("MAC event to pid %d queue stuffed!"
+ " %d MAC entries lost", client, evt_idx);
+ vl_msg_api_free (mp);
+ }
+ }
+ else
+ vl_msg_api_free (mp);
+ }
+ return delta_t + accum_t;
+}
+
+/* Maximum f64 value */
+#define TIME_MAX (1.7976931348623157e+308)
+
+static uword
+l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ uword event_type, *event_data = 0;
+ l2fib_main_t *fm = &l2fib_main;
+ l2learn_main_t *lm = &l2learn_main;
+ bool enabled = 0;
+ f64 start_time, next_age_scan_time = TIME_MAX;
+
+ while (1)
+ {
+ if (lm->client_pid)
+ vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay);
+ else if (enabled)
+ {
+ f64 t = next_age_scan_time - vlib_time_now (vm);
+ vlib_process_wait_for_event_or_clock (vm, t);
+ }
+ else
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ start_time = vlib_time_now (vm);
+ enum
+ { SCAN_MAC_AGE, SCAN_MAC_EVENT, SCAN_DISABLE } scan = SCAN_MAC_AGE;
+
+ switch (event_type)
+ {
+ case ~0: /* timer expired */
+ if (lm->client_pid != 0 && start_time < next_age_scan_time)
+ scan = SCAN_MAC_EVENT;
+ break;
+
+ case L2_MAC_AGE_PROCESS_EVENT_START:
+ enabled = 1;
+ break;
+
+ case L2_MAC_AGE_PROCESS_EVENT_STOP:
+ enabled = 0;
+ scan = SCAN_DISABLE;
+ break;
+
+ case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS:
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (scan == SCAN_MAC_EVENT)
+ l2fib_main.evt_scan_duration = l2fib_scan (vm, start_time, 1);
+ else
+ {
+ if (scan == SCAN_MAC_AGE)
+ l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0);
+ if (scan == SCAN_DISABLE)
+ {
+ l2fib_main.age_scan_duration = 0;
+ l2fib_main.evt_scan_duration = 0;
+ }
+ /* schedule next scan */
+ if (enabled)
+ next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL;
+ else
+ next_age_scan_time = TIME_MAX;
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = {
+ .function = l2fib_mac_age_scanner_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "l2fib-mac-age-scanner-process",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+l2fib_init (vlib_main_t * vm)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ l2fib_entry_key_t test_key;
+ u8 test_mac[6];
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Create the hash table */
+ BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+
+ /* verify the key constructor is good, since it is endian-sensitive */
+ memset (test_mac, 0, sizeof (test_mac));
+ test_mac[0] = 0x11;
+ test_key.raw = 0;
+ test_key.raw = l2fib_make_key ((u8 *) & test_mac, 0x1234);
+ ASSERT (test_key.fields.mac[0] == 0x11);
+ ASSERT (test_key.fields.bd_index == 0x1234);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fib_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h
new file mode 100644
index 00000000..7cc2dc5e
--- /dev/null
+++ b/src/vnet/l2/l2_fib.h
@@ -0,0 +1,432 @@
+/*
+ * l2_fib.h : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fib_h
+#define included_l2fib_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/bihash_8_8.h>
+
+/*
+ * The size of the hash table
+ */
+#define L2FIB_NUM_BUCKETS (64 * 1024)
+#define L2FIB_MEMORY_SIZE (512<<20)
+
+/* Ager scan interval is 1 minute for aging */
+#define L2FIB_AGE_SCAN_INTERVAL (60.0)
+
+/* MAC event scan delay is 100 msec unless specified by MAC event client */
+#define L2FIB_EVENT_SCAN_DELAY_DEFAULT (0.1)
+
+/* Max MACs in a event message is 100 unless specified by MAC event client */
+#define L2FIB_EVENT_MAX_MACS_DEFAULT (100)
+
+/* MAC event learn limit is 1000 unless specified by MAC event client */
+#define L2FIB_EVENT_LEARN_LIMIT_DEFAULT (1000)
+
+typedef struct
+{
+
+ /* hash table */
+ BVT (clib_bihash) mac_table;
+
+ /* per swif vector of sequence number for interface based flush of MACs */
+ u8 *swif_seq_num;
+
+ /* last event or ager scan duration */
+ f64 evt_scan_duration;
+ f64 age_scan_duration;
+
+ /* delay between event scans, default to 100 msec */
+ f64 event_scan_delay;
+
+ /* max macs in evet message, default to 100 entries */
+ u32 max_macs_in_event;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2fib_main_t;
+
+extern l2fib_main_t l2fib_main;
+
+/*
+ * The L2fib key is the mac address and bridge domain ID
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u16 bd_index;
+ u8 mac[6];
+ } fields;
+ struct
+ {
+ u32 w0;
+ u32 w1;
+ } words;
+ u64 raw;
+ };
+} l2fib_entry_key_t;
+
+STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8);
+
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 swif;
+ u8 bd;
+ };
+ u16 as_u16;
+ };
+} l2fib_seq_num_t;
+
+/*
+ * The l2fib entry results
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 sw_if_index; /* output sw_if_index (L3 intf if bvi==1) */
+
+ u8 static_mac:1; /* static mac, no MAC move */
+ u8 age_not:1; /* not subject to age */
+ u8 bvi:1; /* mac is for a bridged virtual interface */
+ u8 filter:1; /* drop packets to/from this mac */
+ u8 lrn_evt:1; /* MAC learned to be sent in L2 MAC event */
+ u8 unused:3;
+
+ u8 timestamp; /* timestamp for aging */
+ l2fib_seq_num_t sn; /* bd/int seq num */
+ } fields;
+ u64 raw;
+ };
+} l2fib_entry_result_t;
+
+STATIC_ASSERT_SIZEOF (l2fib_entry_result_t, 8);
+
+/**
+ * Compute the hash for the given key and return
+ * the corresponding bucket index
+ */
+always_inline u32
+l2fib_compute_hash_bucket (l2fib_entry_key_t * key)
+{
+ u32 result;
+ u32 temp_a;
+ u32 temp_b;
+
+ result = 0xa5a5a5a5; /* some seed */
+ temp_a = key->words.w0;
+ temp_b = key->words.w1;
+ hash_mix32 (temp_a, temp_b, result);
+
+ return result % L2FIB_NUM_BUCKETS;
+}
+
+always_inline u64
+l2fib_make_key (u8 * mac_address, u16 bd_index)
+{
+ u64 temp;
+
+ /*
+ * The mac address in memory is A:B:C:D:E:F
+ * The bd id in register is H:L
+ */
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ /*
+ * Create the in-register key as F:E:D:C:B:A:H:L
+ * In memory the key is L:H:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) << 16;
+ temp = (temp & ~0xffff) | (u64) (bd_index);
+#else
+ /*
+ * Create the in-register key as H:L:A:B:C:D:E:F
+ * In memory the key is H:L:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) >> 16;
+ temp = temp | (((u64) bd_index) << 48);
+#endif
+
+ return temp;
+}
+
+
+
+/**
+ * Lookup the entry for mac and bd_index in the mac table for 1 packet.
+ * Cached_key and cached_result are used as a one-entry cache.
+ * The function reads and updates them as needed.
+ *
+ * mac0 and bd_index0 are the keys. The entry is written to result0.
+ * If the entry was not found, result0 is set to ~0.
+ *
+ * key0 and bucket0 return with the computed key and hash bucket,
+ * convenient if the entry needs to be updated afterward.
+ * If the cached_result was used, bucket0 is set to ~0.
+ */
+
+static_always_inline void
+l2fib_lookup_1 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u16 bd_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0, l2fib_entry_result_t * result0)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ *bucket0 = ~0;
+
+ if (key0->raw == cached_key->raw)
+ {
+ /* Hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ }
+ else
+ {
+ /* Do a regular mac table lookup */
+ BVT (clib_bihash_kv) kv;
+
+ kv.key = key0->raw;
+ kv.value = ~0ULL;
+ BV (clib_bihash_search_inline) (mac_table, &kv);
+ result0->raw = kv.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key0->raw;
+ cached_result->raw = result0->raw;
+ }
+}
+
+
+/**
+ * Lookup the entry for mac and bd_index in the mac table for 2 packets.
+ * The lookups for the two packets are interleaved.
+ *
+ * Cached_key and cached_result are used as a one-entry cache.
+ * The function reads and updates them as needed.
+ *
+ * mac0 and bd_index0 are the keys. The entry is written to result0.
+ * If the entry was not found, result0 is set to ~0. The same
+ * holds for mac1/bd_index1/result1.
+ */
+static_always_inline void
+l2fib_lookup_2 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u8 * mac1,
+ u16 bd_index0,
+ u16 bd_index1,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * key1,
+ u32 * bucket0,
+ u32 * bucket1,
+ l2fib_entry_result_t * result0,
+ l2fib_entry_result_t * result1)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ key1->raw = l2fib_make_key (mac1, bd_index1);
+
+ if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw))
+ {
+ /* Both hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ result1->raw = cached_result->raw;
+ *bucket0 = ~0;
+ *bucket1 = ~0;
+
+ }
+ else
+ {
+ BVT (clib_bihash_kv) kv0, kv1;
+
+ /*
+ * Do a regular mac table lookup
+ * Interleave lookups for packet 0 and packet 1
+ */
+ kv0.key = key0->raw;
+ kv1.key = key1->raw;
+ kv0.value = ~0ULL;
+ kv1.value = ~0ULL;
+
+ BV (clib_bihash_search_inline) (mac_table, &kv0);
+ BV (clib_bihash_search_inline) (mac_table, &kv1);
+
+ result0->raw = kv0.value;
+ result1->raw = kv1.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key1->raw;
+ cached_result->raw = result1->raw;
+ }
+}
+
+static_always_inline void
+l2fib_lookup_4 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u8 * mac1,
+ u8 * mac2,
+ u8 * mac3,
+ u16 bd_index0,
+ u16 bd_index1,
+ u16 bd_index2,
+ u16 bd_index3,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * key1,
+ l2fib_entry_key_t * key2,
+ l2fib_entry_key_t * key3,
+ u32 * bucket0,
+ u32 * bucket1,
+ u32 * bucket2,
+ u32 * bucket3,
+ l2fib_entry_result_t * result0,
+ l2fib_entry_result_t * result1,
+ l2fib_entry_result_t * result2,
+ l2fib_entry_result_t * result3)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ key1->raw = l2fib_make_key (mac1, bd_index1);
+ key2->raw = l2fib_make_key (mac2, bd_index2);
+ key3->raw = l2fib_make_key (mac3, bd_index3);
+
+ if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw) &&
+ (key2->raw == cached_key->raw) && (key3->raw == cached_key->raw))
+ {
+ /* Both hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ result1->raw = cached_result->raw;
+ result2->raw = cached_result->raw;
+ result3->raw = cached_result->raw;
+ *bucket0 = ~0;
+ *bucket1 = ~0;
+ *bucket2 = ~0;
+ *bucket3 = ~0;
+
+ }
+ else
+ {
+ BVT (clib_bihash_kv) kv0, kv1, kv2, kv3;
+
+ /*
+ * Do a regular mac table lookup
+ * Interleave lookups for packet 0 and packet 1
+ */
+ kv0.key = key0->raw;
+ kv1.key = key1->raw;
+ kv2.key = key2->raw;
+ kv3.key = key3->raw;
+ kv0.value = ~0ULL;
+ kv1.value = ~0ULL;
+ kv2.value = ~0ULL;
+ kv3.value = ~0ULL;
+
+ BV (clib_bihash_search_inline) (mac_table, &kv0);
+ BV (clib_bihash_search_inline) (mac_table, &kv1);
+ BV (clib_bihash_search_inline) (mac_table, &kv2);
+ BV (clib_bihash_search_inline) (mac_table, &kv3);
+
+ result0->raw = kv0.value;
+ result1->raw = kv1.value;
+ result2->raw = kv2.value;
+ result3->raw = kv3.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key1->raw;
+ cached_result->raw = result1->raw;
+ }
+}
+
+void l2fib_clear_table (void);
+
+void
+l2fib_add_entry (u64 mac,
+ u32 bd_index,
+ u32 sw_if_index, u8 static_mac, u8 drop_mac, u8 bvi_mac);
+
+static inline void
+l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u8 static_mac,
+ u8 bvi_mac)
+{
+ l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, 0, bvi_mac);
+}
+
+static inline void
+l2fib_add_filter_entry (u64 mac, u32 bd_index)
+{
+ l2fib_add_entry (mac, bd_index, ~0, 1, 1, 0);
+}
+
+u32 l2fib_del_entry (u64 mac, u32 bd_index);
+
+void l2fib_start_ager_scan (vlib_main_t * vm);
+
+void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index);
+
+void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index);
+
+void l2fib_flush_all_mac (vlib_main_t * vm);
+
+void
+l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key,
+ l2fib_entry_result_t ** l2fe_res);
+
+u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args);
+
+static_always_inline u8 *
+l2fib_swif_seq_num (u32 sw_if_index)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ return vec_elt_at_index (mp->swif_seq_num, sw_if_index);
+}
+
+static_always_inline u8 *
+l2fib_valid_swif_seq_num (u32 sw_if_index)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ vec_validate (mp->swif_seq_num, sw_if_index);
+ return l2fib_swif_seq_num (sw_if_index);
+}
+
+BVT (clib_bihash) * get_mac_table (void);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
new file mode 100644
index 00000000..ed9e5ac2
--- /dev/null
+++ b/src/vnet/l2/l2_flood.c
@@ -0,0 +1,568 @@
+/*
+ * l2_flood.c : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/replication.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+
+/**
+ * @file
+ * @brief Ethernet Flooding.
+ *
+ * Flooding uses the packet replication infrastructure to send a copy of the
+ * packet to each member interface. Logically the replication infrastructure
+ * expects two graph nodes: a prep node that initiates replication and sends the
+ * packet to the first destination, and a recycle node that is passed the packet
+ * after it has been transmitted.
+ *
+ * To decrease the amount of code, l2 flooding implements both functions in
+ * the same graph node. This node can tell if is it being called as the "prep"
+ * or "recycle" using replication_is_recycled().
+ */
+
+
+typedef struct
+{
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2flood_main_t;
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2flood_trace_t;
+
+
+/* packet trace format function */
+static u8 *
+format_l2flood_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2flood_trace_t *t = va_arg (*args, l2flood_trace_t *);
+
+ s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+l2flood_main_t l2flood_main;
+
+static vlib_node_registration_t l2flood_node;
+
+#define foreach_l2flood_error \
+_(L2FLOOD, "L2 flood packets") \
+_(REPL_FAIL, "L2 replication failures") \
+_(NO_MEMBERS, "L2 replication complete") \
+_(BVI_BAD_MAC, "BVI L3 mac mismatch") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype")
+
+typedef enum
+{
+#define _(sym,str) L2FLOOD_ERROR_##sym,
+ foreach_l2flood_error
+#undef _
+ L2FLOOD_N_ERROR,
+} l2flood_error_t;
+
+static char *l2flood_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2flood_error
+#undef _
+};
+
+typedef enum
+{
+ L2FLOOD_NEXT_L2_OUTPUT,
+ L2FLOOD_NEXT_DROP,
+ L2FLOOD_N_NEXT,
+} l2flood_next_t;
+
+/*
+ * Perform flooding on one packet
+ *
+ * Due to the way BVI processing can modify the packet, the BVI interface
+ * (if present) must be processed last in the replication. The member vector
+ * is arranged so that the BVI interface is always the first element.
+ * Flooding walks the vector in reverse.
+ *
+ * BVI processing causes the packet to go to L3 processing. This strips the
+ * L2 header, which is fine because the replication infrastructure restores
+ * it. However L3 processing can trigger larger changes to the packet. For
+ * example, an ARP request could be turned into an ARP reply, an ICMP request
+ * could be turned into an ICMP reply. If BVI processing is not performed
+ * last, the modified packet would be replicated to the remaining members.
+ */
+
+static_always_inline void
+l2flood_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2flood_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 * sw_if_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0, l2fib_entry_result_t * result0, u32 * next0)
+{
+ u16 bd_index0;
+ l2_bridge_domain_t *bd_config;
+ l2_flood_member_t *members;
+ i32 current_member; /* signed */
+ replication_context_t *ctx;
+ u8 in_shg = vnet_buffer (b0)->l2.shg;
+
+ if (!replication_is_recycled (b0))
+ {
+
+ /* Do flood "prep node" processing */
+
+ /* Get config for the bridge domain interface */
+ bd_index0 = vnet_buffer (b0)->l2.bd_index;
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index0);
+ members = bd_config->members;
+
+ /* Find first member that passes the reflection and SHG checks */
+ current_member = bd_config->flood_count - 1;
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == *sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg)))
+ {
+ current_member--;
+ }
+
+ if (current_member < 0)
+ {
+ /* No members to flood to */
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ if ((current_member > 0) &&
+ ((current_member > 1) ||
+ ((members[0].sw_if_index != *sw_if_index0) &&
+ (!in_shg || members[0].shg != in_shg))))
+ {
+ /* If more than one member then initiate replication */
+ ctx =
+ replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */ );
+ ctx->feature_replicas = (uword) members;
+ ctx->feature_counter = current_member;
+ }
+
+ }
+ else
+ {
+ vnet_buffer_opaque_t *vnet_buff_op;
+
+ /* Do flood "recycle node" processing */
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL))
+ {
+ (void) replication_recycle (vm, b0, 1 /* is_last */ );
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL];
+ return;
+ }
+
+ ctx = replication_get_ctx (b0);
+ replication_clear_recycled (b0);
+
+ members = (l2_flood_member_t *) (intptr_t) ctx->feature_replicas;
+ current_member = (i32) ctx->feature_counter - 1;
+
+ /* Need to update input index from saved packet context */
+ vnet_buff_op = (vnet_buffer_opaque_t *) ctx->vnet_buffer;
+ *sw_if_index0 = vnet_buff_op->sw_if_index[VLIB_RX];
+
+ /* Find next member that passes the reflection and SHG check */
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == *sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg)))
+ {
+ current_member--;
+ }
+
+ if (current_member < 0)
+ {
+ /*
+ * No more members to flood to.
+ * Terminate replication and drop packet.
+ */
+
+ replication_recycle (vm, b0, 1 /* is_last */ );
+
+ *next0 = L2FLOOD_NEXT_DROP;
+ /* Ideally we woudn't bump a counter here, just silently complete */
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ /* Restore packet and context and continue replication */
+ ctx->feature_counter = current_member;
+ replication_recycle (vm, b0, ((current_member == 0) || /*is_last */
+ ((current_member == 1) &&
+ ((members[0].sw_if_index ==
+ *sw_if_index0) || (in_shg
+ && members[0].shg ==
+ in_shg)))));
+ }
+
+ /* Forward packet to the current member */
+ if (PREDICT_FALSE (members[current_member].flags & L2_FLOOD_MEMBER_BVI))
+ {
+ /* Do BVI processing */
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ members[current_member].sw_if_index,
+ &msm->l3_next, next0);
+
+ if (PREDICT_FALSE (rc))
+ {
+ if (rc == TO_BVI_ERR_BAD_MAC)
+ {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC];
+ *next0 = L2FLOOD_NEXT_DROP;
+ }
+ else if (rc == TO_BVI_ERR_ETHERTYPE)
+ {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FLOOD_NEXT_DROP;
+ }
+ }
+ }
+ else
+ {
+ /* Do normal L2 forwarding */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ members[current_member].sw_if_index;
+ *next0 = L2FLOOD_NEXT_L2_OUTPUT;
+
+ }
+
+}
+
+
+static uword
+l2flood_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2flood_next_t next_index;
+ l2flood_main_t *msm = &l2flood_main;
+ vlib_node_t *n = vlib_get_node (vm, l2flood_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ l2fib_entry_key_t key0, key1;
+ l2fib_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ /* Prefetch the replication context for the N+1 loop iteration */
+ /* This depends on the buffer header above */
+ replication_prefetch_ctx (p2);
+ replication_prefetch_ctx (p3);
+
+ /* Prefetch the packet for the N+1 loop iteration */
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* process 2 pkts */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2;
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b0,
+ &sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b1,
+ &sw_if_index1, &key1, &bucket1, &result1, &next1);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* process 1 pkt */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1;
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b0,
+ &sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2flood_node,static) = {
+ .function = l2flood_node_fn,
+ .name = "l2-flood",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2flood_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2flood_error_strings),
+ .error_strings = l2flood_error_strings,
+
+ .n_next_nodes = L2FLOOD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FLOOD_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn)
+ clib_error_t *l2flood_init (vlib_main_t * vm)
+{
+ l2flood_main_t *mp = &l2flood_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2flood_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2flood_init);
+
+
+
+/** Add the L3 input node for this ethertype to the next nodes structure. */
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2flood_main_t *mp = &l2flood_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm, l2flood_node.index, node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+/**
+ * Set subinterface flood enable/disable.
+ * The CLI format is:
+ * set interface l2 flood <interface> [disable]
+ */
+static clib_error_t *
+int_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FLOOD, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 flooding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable flooding:
+ * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0}
+ * Example of how to disable flooding:
+ * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_flood_cli, static) = {
+ .path = "set interface l2 flood",
+ .short_help = "set interface l2 flood <interface> [disable]",
+ .function = int_flood,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_flood.h b/src/vnet/l2/l2_flood.h
new file mode 100644
index 00000000..acd7c905
--- /dev/null
+++ b/src/vnet/l2/l2_flood.h
@@ -0,0 +1,35 @@
+/*
+ * l2_flood.h : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2flood_h
+#define included_l2flood_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c
new file mode 100644
index 00000000..2bb7307c
--- /dev/null
+++ b/src/vnet/l2/l2_fwd.c
@@ -0,0 +1,577 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/sparse_vec.h>
+
+
+/**
+ * @file
+ * @brief Ethernet Forwarding.
+ *
+ * Code in this file handles forwarding Layer 2 packets. This file calls
+ * the FIB lookup, packet learning and the packet flooding as necessary.
+ * Packet is then sent to the next graph node.
+ */
+
+typedef struct
+{
+
+ /* Hash table */
+ BVT (clib_bihash) * mac_table;
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2fwd_main_t;
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2fwd_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2fwd_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2fwd_trace_t *t = va_arg (*args, l2fwd_trace_t *);
+
+ s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+l2fwd_main_t l2fwd_main;
+
+static vlib_node_registration_t l2fwd_node;
+
+#define foreach_l2fwd_error \
+_(L2FWD, "L2 forward packets") \
+_(FLOOD, "L2 forward misses") \
+_(HIT, "L2 forward hits") \
+_(BVI_BAD_MAC, "BVI L3 MAC mismatch") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \
+_(FILTER_DROP, "Filter Mac Drop") \
+_(REFLECT_DROP, "Reflection Drop") \
+_(STALE_DROP, "Stale entry Drop")
+
+typedef enum
+{
+#define _(sym,str) L2FWD_ERROR_##sym,
+ foreach_l2fwd_error
+#undef _
+ L2FWD_N_ERROR,
+} l2fwd_error_t;
+
+static char *l2fwd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2fwd_error
+#undef _
+};
+
+typedef enum
+{
+ L2FWD_NEXT_L2_OUTPUT,
+ L2FWD_NEXT_FLOOD,
+ L2FWD_NEXT_DROP,
+ L2FWD_N_NEXT,
+} l2fwd_next_t;
+
+/** Forward one packet based on the mac table lookup result. */
+
+static_always_inline void
+l2fwd_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2fwd_main_t * msm,
+ vlib_error_main_t * em,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0, l2fib_entry_result_t * result0, u32 * next0)
+{
+ int try_flood = result0->raw == ~0;
+ int flood_error;
+
+ if (PREDICT_FALSE (try_flood))
+ {
+ flood_error = L2FWD_ERROR_FLOOD;
+ }
+ else
+ {
+ /* lookup hit, forward packet */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1;
+#endif
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index;
+ *next0 = L2FWD_NEXT_L2_OUTPUT;
+ int l2fib_seq_num_valid = 1;
+
+ /* check l2fib seq num for stale entries */
+ if (!result0->fields.age_not)
+ {
+ l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn };
+ l2fib_seq_num_t expected_sn = {
+ .bd = in_sn.bd,
+ .swif = *l2fib_swif_seq_num (result0->fields.sw_if_index),
+ };
+ l2fib_seq_num_valid =
+ expected_sn.as_u16 == result0->fields.sn.as_u16;
+ }
+
+ if (PREDICT_FALSE (!l2fib_seq_num_valid))
+ {
+ flood_error = L2FWD_ERROR_STALE_DROP;
+ try_flood = 1;
+ }
+ /* perform reflection check */
+ else if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index))
+ {
+ b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ /* perform filter check */
+ else if (PREDICT_FALSE (result0->fields.filter))
+ {
+ b0->error = node->errors[L2FWD_ERROR_FILTER_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ /* perform BVI check */
+ else if (PREDICT_FALSE (result0->fields.bvi))
+ {
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX],
+ &msm->l3_next, next0);
+
+ if (PREDICT_FALSE (rc))
+ {
+ if (rc == TO_BVI_ERR_BAD_MAC)
+ {
+ b0->error = node->errors[L2FWD_ERROR_BVI_BAD_MAC];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ else if (rc == TO_BVI_ERR_ETHERTYPE)
+ {
+ b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ }
+ }
+ }
+
+ /* flood */
+ if (PREDICT_FALSE (try_flood))
+ {
+ /*
+ * lookup miss, so flood
+ * TODO:replicate packet to each intf in bridge-domain
+ * For now just drop
+ */
+ if (vnet_buffer (b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD)
+ {
+ *next0 = L2FWD_NEXT_FLOOD;
+ }
+ else
+ {
+ /* Flooding is disabled */
+ b0->error = node->errors[flood_error];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ }
+
+}
+
+
+static_always_inline uword
+l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int do_trace)
+{
+ u32 n_left_from, *from, *to_next;
+ l2fwd_next_t next_index;
+ l2fwd_main_t *msm = &l2fwd_main;
+ vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index);
+ CLIB_UNUSED (u32 node_counter_base_index) = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+
+ /* Clear the one-entry cache in case mac table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2fib_entry_key_t key0, key1, key2, key3;
+ l2fib_entry_result_t result0, result1, result2, result3;
+ u32 bucket0, bucket1, bucket2, bucket3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ if (do_trace)
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ t->bd_index = vnet_buffer (b2)->l2.bd_index;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ t->bd_index = vnet_buffer (b3)->l2.bd_index;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ /* process 2 pkts */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4;
+#endif
+ /* *INDENT-OFF* */
+ l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
+ h0->dst_address, h1->dst_address,
+ h2->dst_address, h3->dst_address,
+ vnet_buffer (b0)->l2.bd_index,
+ vnet_buffer (b1)->l2.bd_index,
+ vnet_buffer (b2)->l2.bd_index,
+ vnet_buffer (b3)->l2.bd_index,
+ &key0, /* not used */
+ &key1, /* not used */
+ &key2, /* not used */
+ &key3, /* not used */
+ &bucket0, /* not used */
+ &bucket1, /* not used */
+ &bucket2, /* not used */
+ &bucket3, /* not used */
+ &result0,
+ &result1,
+ &result2,
+ &result3);
+ /* *INDENT-ON* */
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0,
+ &next0);
+ l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1,
+ &next1);
+ l2fwd_process (vm, node, msm, em, b2, sw_if_index2, &result2,
+ &next2);
+ l2fwd_process (vm, node, msm, em, b3, sw_if_index3, &result3,
+ &next3);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2fwd_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1;
+#endif
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, h0->dst_address, vnet_buffer (b0)->l2.bd_index, &key0, /* not used */
+ &bucket0, /* not used */
+ &result0);
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0,
+ &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+l2fwd_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ return l2fwd_node_inline (vm, node, frame, 1 /* do_trace */ );
+ return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2fwd_node,static) = {
+ .function = l2fwd_node_fn,
+ .name = "l2-fwd",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2fwd_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2fwd_error_strings),
+ .error_strings = l2fwd_error_strings,
+
+ .n_next_nodes = L2FWD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FWD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FWD_NEXT_FLOOD] = "l2-flood",
+ [L2FWD_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2fwd_node, l2fwd_node_fn)
+ clib_error_t *l2fwd_init (vlib_main_t * vm)
+{
+ l2fwd_main_t *mp = &l2fwd_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table ();
+
+ /* Initialize the next nodes for each ethertype */
+ next_by_ethertype_init (&mp->l3_next);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fwd_init);
+
+
+/** Add the L3 input node for this ethertype to the next nodes structure. */
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2fwd_main_t *mp = &l2fwd_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm, l2fwd_node.index, node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+/**
+ * Set subinterface forward enable/disable.
+ * The CLI format is:
+ * set interface l2 forward <interface> [disable]
+ */
+static clib_error_t *
+int_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ if (l2input_intf_config (sw_if_index)->xconnect)
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_XCONNECT, enable);
+ }
+ else
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FWD, enable);
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 unicast forwarding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable fowarding:
+ * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0}
+ * Example of how to disable fowarding:
+ * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_fwd_cli, static) = {
+ .path = "set interface l2 forward",
+ .short_help = "set interface l2 forward <interface> [disable]",
+ .function = int_fwd,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fwd.h b/src/vnet/l2/l2_fwd.h
new file mode 100644
index 00000000..3968732d
--- /dev/null
+++ b/src/vnet/l2/l2_fwd.h
@@ -0,0 +1,36 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fwd_h
+#define included_l2fwd_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
new file mode 100644
index 00000000..3933dae5
--- /dev/null
+++ b/src/vnet/l2/l2_input.c
@@ -0,0 +1,1187 @@
+/*
+ * l2_input.c : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_bd.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+/**
+ * @file
+ * @brief Interface Input Mode (Layer 2 Cross-Connect or Bridge / Layer 3).
+ *
+ * This file contains the CLI Commands that modify the input mode of an
+ * interface. For interfaces in a Layer 2 cross-connect, all packets
+ * received on one interface will be transmitted to the other. For
+ * interfaces in a bridge-domain, packets will be forwarded to other
+ * interfaces in the same bridge-domain based on destination mac address.
+ * For interfaces in Layer 3 mode, the packets will be routed.
+ */
+
+/* Feature graph node names */
+static char *l2input_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2input_feat
+#undef _
+};
+
+char **
+l2input_get_feat_names (void)
+{
+ return l2input_feat_names;
+}
+
+u8 *
+format_l2_input_features (u8 * s, va_list * args)
+{
+ static char *display_names[] = {
+#define _(sym,name) #sym,
+ foreach_l2input_feat
+#undef _
+ };
+ u32 feature_bitmap = va_arg (*args, u32);
+
+ if (feature_bitmap == 0)
+ {
+ s = format (s, " none configured");
+ return s;
+ }
+
+ feature_bitmap &= ~L2INPUT_FEAT_DROP; /* Not a feature */
+ int i;
+ for (i = L2INPUT_N_FEAT; i >= 0; i--)
+ if (feature_bitmap & (1 << i))
+ s = format (s, "%10s (%s)\n", display_names[i], l2input_feat_names[i]);
+ return s;
+}
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2input_trace_t *t = va_arg (*args, l2input_trace_t *);
+
+ s = format (s, "l2-input: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2input_main_t l2input_main;
+
+#define foreach_l2input_error \
+_(L2INPUT, "L2 input packets") \
+_(DROP, "L2 input drops")
+
+typedef enum
+{
+#define _(sym,str) L2INPUT_ERROR_##sym,
+ foreach_l2input_error
+#undef _
+ L2INPUT_N_ERROR,
+} l2input_error_t;
+
+static char *l2input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2input_error
+#undef _
+};
+
+typedef enum
+{ /* */
+ L2INPUT_NEXT_LEARN,
+ L2INPUT_NEXT_FWD,
+ L2INPUT_NEXT_DROP,
+ L2INPUT_N_NEXT,
+} l2input_next_t;
+
+
+static_always_inline void
+classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
+{
+ /*
+ * Load L2 input feature struct
+ * Load bridge domain struct
+ * Parse ethernet header to determine unicast/mcast/broadcast
+ * take L2 input stat
+ * classify packet as IP/UDP/TCP, control, other
+ * mask feature bitmap
+ * go to first node in bitmap
+ * Later: optimize VTM
+ *
+ * For L2XC,
+ * set tx sw-if-handle
+ */
+
+ u16 ethertype;
+ u8 protocol;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+ u16 bd_index0;
+ u32 feature_bitmap;
+ u32 feat_mask;
+ ethernet_header_t *h0;
+ u8 *l3h0;
+ u32 sw_if_index0;
+
+#define get_u16(addr) ( *((u16 *)(addr)) )
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len;
+
+ ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2));
+ feat_mask = ~0;
+
+ /* Get config for the input interface */
+ config = vec_elt_at_index (msm->configs, sw_if_index0);
+
+ /* Save split horizon group */
+ vnet_buffer (b0)->l2.shg = config->shg;
+
+ /* determine layer2 kind for stat and mask */
+ if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
+ {
+ protocol = ((ip6_header_t *) l3h0)->protocol;
+
+ /* Disable bridge forwarding (flooding will execute instead if not xconnect) */
+ feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD);
+
+ /* Disable ARP-term for non-ARP and non-ICMP6 packet */
+ if (ethertype != ETHERNET_TYPE_ARP &&
+ (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6))
+ feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
+
+ /*
+ * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor
+ * solicitation packet from BVI to 0 so it can also flood to VXLAN
+ * tunnels or other ports with the same SHG as that of the BVI.
+ */
+ else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
+ L2INPUT_BVI))
+ {
+ if (ethertype == ETHERNET_TYPE_ARP)
+ {
+ ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0;
+ if (arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request))
+ vnet_buffer (b0)->l2.shg = 0;
+ }
+ else /* must be ICMPv6 */
+ {
+ ip6_header_t *iph0 = (ip6_header_t *) l3h0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0;
+ ndh0 = ip6_next_header (iph0);
+ if (ndh0->icmp.type == ICMP6_neighbor_solicitation)
+ vnet_buffer (b0)->l2.shg = 0;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * For packet from BVI - set SHG of unicast packet from BVI to 0 so it
+ * is not dropped on output to VXLAN tunnels or other ports with the
+ * same SHG as that of the BVI.
+ */
+ if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
+ L2INPUT_BVI))
+ vnet_buffer (b0)->l2.shg = 0;
+ }
+
+
+ if (config->bridge)
+ {
+ /* Do bridge-domain processing */
+ bd_index0 = config->bd_index;
+ /* save BD ID for next feature graph nodes */
+ vnet_buffer (b0)->l2.bd_index = bd_index0;
+
+ /* Get config for the bridge domain interface */
+ bd_config = vec_elt_at_index (msm->bd_configs, bd_index0);
+
+ /* Save bridge domain and interface seq_num */
+ /* *INDENT-OFF* */
+ l2fib_seq_num_t sn = {
+ .swif = *l2fib_swif_seq_num(sw_if_index0),
+ .bd = bd_config->seq_num,
+ };
+ /* *INDENT-ON* */
+ vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;;
+ vnet_buffer (b0)->l2.bd_age = bd_config->mac_age;
+
+ /*
+ * Process bridge domain feature enables.
+ * To perform learning/flooding/forwarding, the corresponding bit
+ * must be enabled in both the input interface config and in the
+ * bridge domain config. In the bd_bitmap, bits for features other
+ * than learning/flooding/forwarding should always be set.
+ */
+ feat_mask = feat_mask & bd_config->feature_bitmap;
+ }
+ else if (config->xconnect)
+ {
+ /* Set the output interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
+ }
+ else
+ feat_mask = L2INPUT_FEAT_DROP;
+
+ /* mask out features from bitmap using packet type and bd config */
+ feature_bitmap = config->feature_bitmap & feat_mask;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ /* Determine the next node */
+ *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap);
+}
+
+static_always_inline uword
+l2input_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame,
+ int do_trace)
+{
+ u32 n_left_from, *from, *to_next;
+ l2input_next_t next_index;
+ l2input_main_t *msm = &l2input_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Don't bother prefetching the bridge-domain config (which
+ * depends on the input config above). Only a small number of
+ * bridge domains are expected. Plus the structure is small
+ * and several fit in a cache line.
+ */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ if (do_trace)
+ {
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h2 = vlib_buffer_get_current (b2);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h3 = vlib_buffer_get_current (b3);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ vlib_node_increment_counter (vm, l2input_node.index,
+ L2INPUT_ERROR_L2INPUT, 4);
+
+ classify_and_dispatch (msm, b0, &next0);
+ classify_and_dispatch (msm, b1, &next1);
+ classify_and_dispatch (msm, b2, &next2);
+ classify_and_dispatch (msm, b3, &next3);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ vlib_node_increment_counter (vm, l2input_node.index,
+ L2INPUT_ERROR_L2INPUT, 1);
+
+ classify_and_dispatch (msm, b0, &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+l2input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ return l2input_node_inline (vm, node, frame, 1 /* do_trace */ );
+ return l2input_node_inline (vm, node, frame, 0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2input_node) = {
+ .function = l2input_node_fn,
+ .name = "l2-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2input_trace,
+ .format_buffer = format_ethernet_header_with_length,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2input_error_strings),
+ .error_strings = l2input_error_strings,
+
+ .n_next_nodes = L2INPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2INPUT_NEXT_LEARN] = "l2-learn",
+ [L2INPUT_NEXT_FWD] = "l2-fwd",
+ [L2INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2input_node, l2input_node_fn)
+ clib_error_t *l2input_init (vlib_main_t * vm)
+{
+ l2input_main_t *mp = &l2input_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Get packets RX'd from L2 interfaces */
+ ethernet_register_l2_input (vm, l2input_node.index);
+
+ /* Create the config vector */
+ vec_validate (mp->configs, 100);
+ /* create 100 sw interface entries and zero them */
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2input_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2input_init);
+
+
+/** Get a pointer to the config for the given interface. */
+l2_input_config_t *
+l2input_intf_config (u32 sw_if_index)
+{
+ l2input_main_t *mp = &l2input_main;
+
+ vec_validate (mp->configs, sw_if_index);
+ return vec_elt_at_index (mp->configs, sw_if_index);
+}
+
+/** Enable (or disable) the feature in the bitmap for the given interface. */
+u32
+l2input_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable)
+{
+ l2_input_config_t *config = l2input_intf_config (sw_if_index);
+
+ if (enable)
+ config->feature_bitmap |= feature_bitmap;
+ else
+ config->feature_bitmap &= ~feature_bitmap;
+
+ return config->feature_bitmap;
+}
+
+u32
+l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value)
+{
+ l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);;
+ bd_validate (bd_config);
+ bd_config->feature_bitmap =
+ (bd_config->feature_bitmap & ~feat_mask) | feat_value;
+ return bd_config->feature_bitmap;
+}
+
+/**
+ * Set the subinterface to run in l2 or l3 mode.
+ * For L3 mode, just the sw_if_index is specified.
+ * For bridged mode, the bd id and bvi flag are also specified.
+ * For xconnect mode, the peer sw_if_index is also specified.
+ * Return 0 if ok, or non-0 if there was an error.
+ */
+
+u32
+set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */
+ u32 mode, /* One of L2 modes or back to L3 mode */
+ u32 sw_if_index, /* sw interface index */
+ u32 bd_index, /* for bridged interface */
+ u32 bvi, /* the bridged interface is the BVI */
+ u32 shg, /* the bridged interface split horizon group */
+ u32 xc_sw_if_index) /* peer interface for xconnect */
+{
+ l2input_main_t *mp = &l2input_main;
+ l2output_main_t *l2om = &l2output_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ l2_output_config_t *out_config;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+ u64 mac;
+ i32 l2_if_adjust = 0;
+ u32 slot;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ config = l2input_intf_config (sw_if_index);
+
+ if (config->bridge)
+ {
+ /* Interface is already in bridge mode. Undo the existing config. */
+ bd_config = vec_elt_at_index (mp->bd_configs, config->bd_index);
+
+ /* remove interface from flood vector */
+ bd_remove_member (bd_config, sw_if_index);
+
+ /* undo any BVI-related config */
+ if (bd_config->bvi_sw_if_index == sw_if_index)
+ {
+ bd_config->bvi_sw_if_index = ~0;
+ config->bvi = 0;
+
+ /* delete the l2fib entry for the bvi interface */
+ mac = *((u64 *) hi->hw_address);
+ l2fib_del_entry (mac, config->bd_index);
+
+ /* Make loop output node send packet back to ethernet-input node */
+ slot =
+ vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
+ "ethernet-input",
+ VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ }
+
+ /* Clear MACs learned on the interface */
+ if ((config->feature_bitmap & L2INPUT_FEAT_LEARN) ||
+ (bd_config->feature_bitmap & L2INPUT_FEAT_LEARN))
+ l2fib_flush_int_mac (vm, sw_if_index);
+
+ l2_if_adjust--;
+ }
+ else if (config->xconnect)
+ {
+ l2_if_adjust--;
+ }
+
+ /* Make sure vector is big enough */
+ vec_validate_init_empty (l2om->output_node_index_vec, sw_if_index,
+ L2OUTPUT_NEXT_DROP);
+
+ /* Initialize the l2-input configuration for the interface */
+ if (mode == MODE_L3)
+ {
+ /* Set L2 config to BD index 0 so that if any packet accidentally
+ * came in on L2 path, it will be dropped in BD 0 */
+ config->xconnect = 0;
+ config->bridge = 0;
+ config->shg = 0;
+ config->bd_index = 0;
+ config->feature_bitmap = L2INPUT_FEAT_DROP;
+
+ /* Clear L2 output config */
+ out_config = l2output_intf_config (sw_if_index);
+ memset (out_config, 0, sizeof (l2_output_config_t));
+
+ /* Make sure any L2-output packet to this interface now in L3 mode is
+ * dropped. This may happen if L2 FIB MAC entry is stale */
+ l2om->output_node_index_vec[sw_if_index] = L2OUTPUT_NEXT_BAD_INTF;
+ }
+ else
+ {
+ /* Add or update l2-output node next-arc and output_node_index_vec table
+ * for the interface */
+ l2output_create_output_node_mapping (vm, vnet_main, sw_if_index);
+
+ if (mode == MODE_L2_BRIDGE)
+ {
+ /*
+ * Remove a check that the interface must be an Ethernet.
+ * Specifically so we can bridge to L3 tunnel interfaces.
+ * Here's the check:
+ * if (hi->hw_class_index != ethernet_hw_interface_class.index)
+ *
+ */
+ if (!hi)
+ return MODE_ERROR_ETH; /* non-ethernet */
+
+ config->xconnect = 0;
+ config->bridge = 1;
+ config->bd_index = bd_index;
+ *l2fib_valid_swif_seq_num (sw_if_index) += 1;
+
+ /*
+ * Enable forwarding, flooding, learning and ARP termination by default
+ * (note that ARP term is disabled on BD feature bitmap by default)
+ */
+ config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD |
+ L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ /* Make sure xconnect is disabled */
+ config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT;
+
+ /* Set up bridge domain */
+ bd_config = l2input_bd_config (bd_index);
+ bd_validate (bd_config);
+
+ /* TODO: think: add l2fib entry even for non-bvi interface? */
+
+ /* Do BVI interface initializations */
+ if (bvi)
+ {
+ /* ensure BD has no bvi interface (or replace that one with this??) */
+ if (bd_config->bvi_sw_if_index != ~0)
+ {
+ return MODE_ERROR_BVI_DEF; /* bd already has a bvi interface */
+ }
+ bd_config->bvi_sw_if_index = sw_if_index;
+ config->bvi = 1;
+
+ /* create the l2fib entry for the bvi interface */
+ mac = *((u64 *) hi->hw_address);
+ l2fib_add_fwd_entry (mac, bd_index, sw_if_index, 1, 1); /* static + bvi */
+
+ /* Disable learning by default. no use since l2fib entry is static. */
+ config->feature_bitmap &= ~L2INPUT_FEAT_LEARN;
+
+ /* Make loop output node send packet to l2-input node */
+ slot =
+ vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
+ "l2-input",
+ VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ }
+
+ /* Add interface to bridge-domain flood vector */
+ l2_flood_member_t member = {
+ .sw_if_index = sw_if_index,
+ .flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL,
+ .shg = shg,
+ };
+ bd_add_member (bd_config, &member);
+
+ }
+ else if (mode == MODE_L2_XC)
+ {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ /* Make sure bridging features are disabled */
+ config->feature_bitmap &=
+ ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+
+ config->feature_bitmap |= L2INPUT_FEAT_XCONNECT;
+ shg = 0; /* not used in xconnect */
+ }
+ else if (mode == MODE_L2_CLASSIFY)
+ {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |=
+ L2INPUT_FEAT_DROP | L2INPUT_FEAT_INPUT_CLASSIFY;
+
+ /* Make sure bridging features are disabled */
+ config->feature_bitmap &=
+ ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+ shg = 0; /* not used in xconnect */
+
+ /* Insure all packets go to ethernet-input */
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+ }
+
+ /* set up split-horizon group and set output feature bit */
+ config->shg = shg;
+ out_config = l2output_intf_config (sw_if_index);
+ out_config->shg = shg;
+ out_config->feature_bitmap |= L2OUTPUT_FEAT_OUTPUT;
+
+ /*
+ * Test: remove this when non-IP features can be configured.
+ * Enable a non-IP feature to test IP feature masking
+ * config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT;
+ */
+
+ l2_if_adjust++;
+ }
+
+ /* Adjust count of L2 interfaces */
+ hi->l2_if_count += l2_if_adjust;
+
+ if (hi->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ if ((hi->l2_if_count == 1) && (l2_if_adjust == 1))
+ {
+ /* Just added first L2 interface on this port */
+
+ /* Set promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnet_main, hi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ /* ensure all packets go to ethernet-input */
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+
+ }
+ else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1))
+ {
+ /* Just removed only L2 subinterface on this port */
+
+ /* Disable promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnet_main, hi->hw_if_index, 0);
+
+ /* Allow ip packets to go directly to ip4-input etc */
+ ethernet_set_rx_redirect (vnet_main, hi, 0);
+ }
+ }
+
+ /* Set up the L2/L3 flag in the interface parsing tables */
+ ethernet_sw_interface_set_l2_mode (vnm, sw_if_index, (mode != MODE_L3));
+
+ return 0;
+}
+
+/**
+ * Set subinterface in bridging mode with a bridge-domain ID.
+ * The CLI format is:
+ * set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group]
+ */
+static clib_error_t *
+int_l2_bridge (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 sw_if_index;
+ u32 bvi;
+ u32 rc;
+ u32 shg;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (bd_id > L2_BD_ID_MAX)
+ {
+ error = clib_error_return (0, "bridge domain ID exceed 16M limit",
+ format_unformat_error, input);
+ goto done;
+ }
+ bd_index = bd_find_or_add_bd_index (&bd_main, bd_id);
+
+ /* optional bvi */
+ bvi = unformat (input, "bvi");
+
+ /* optional split horizon group */
+ shg = 0;
+ (void) unformat (input, "%d", &shg);
+
+ /* set the interface mode */
+ if ((rc =
+ set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi,
+ shg, 0)))
+ {
+ if (rc == MODE_ERROR_ETH)
+ {
+ error = clib_error_return (0, "bridged interface must be ethernet",
+ format_unformat_error, input);
+ }
+ else if (rc == MODE_ERROR_BVI_DEF)
+ {
+ error =
+ clib_error_return (0, "bridge-domain already has a bvi interface",
+ format_unformat_error, input);
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ }
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Use this command put an interface into Layer 2 bridge domain. If a
+ * bridge-domain with the provided bridge-domain-id does not exist, it
+ * will be created. Interfaces in a bridge-domain forward packets to
+ * other interfaces in the same bridge-domain based on destination mac
+ * address. To remove an interface from a the Layer 2 bridge domain,
+ * put the interface in a different mode, for example Layer 3 mode.
+ *
+ * Optionally, an interface can be added to a Layer 2 bridge-domain as
+ * a Bridged Virtual Interface (bvi). Only one interface in a Layer 2
+ * bridge-domain can be a bvi.
+ *
+ * Optionally, a split-horizon group can also be specified. This defaults
+ * to 0 if not specified.
+ *
+ * @cliexpar
+ * Example of how to configure a Layer 2 bridge-domain with three
+ * interfaces (where 200 is the bridge-domain-id):
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/8/0.200 200}
+ * This interface is added a BVI interface:
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/9/0.200 200 bvi}
+ * This interface also has a split-horizon group of 1 specified:
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/a/0.200 200 1}
+ * Example of how to remove an interface from a Layer2 bridge-domain:
+ * @cliexcmd{set interface l3 GigabitEthernet0/a/0.200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
+ .path = "set interface l2 bridge",
+ .short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi] [shg]",
+ .function = int_l2_bridge,
+};
+/* *INDENT-ON* */
+
+/**
+ * Set subinterface in xconnect mode with another interface.
+ * The CLI format is:
+ * set interface l2 xconnect <interface> <peer interface>
+ */
+static clib_error_t *
+int_l2_xc (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 xc_sw_if_index;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index))
+ {
+ error = clib_error_return (0, "unknown peer interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the interface mode */
+ if (set_int_l2_mode
+ (vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index))
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Use this command put an interface into Layer 2 cross-connect mode.
+ * Both interfaces must be in this mode for bi-directioal traffic. All
+ * packets received on one interface will be transmitted to the other.
+ * To remove the Layer 2 cross-connect, put the interface in a different
+ * mode, for example Layer 3 mode.
+ *
+ * @cliexpar
+ * Example of how to configure a Layer2 cross-connect between two interfaces:
+ * @cliexcmd{set interface l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300}
+ * @cliexcmd{set interface l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300}
+ * Example of how to remove a Layer2 cross-connect:
+ * @cliexcmd{set interface l3 GigabitEthernet0/8/0.300}
+ * @cliexcmd{set interface l3 GigabitEthernet0/9/0.300}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
+ .path = "set interface l2 xconnect",
+ .short_help = "set interface l2 xconnect <interface> <peer interface>",
+ .function = int_l2_xc,
+};
+/* *INDENT-ON* */
+
+/**
+ * Set subinterface in L3 mode.
+ * The CLI format is:
+ * set interface l3 <interface>
+ */
+static clib_error_t *
+int_l3 (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the interface mode */
+ if (set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0))
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Modify the packet processing mode of the interface to Layer 3, which
+ * implies packets will be routed. This is the default mode of an interface.
+ * Use this command to remove an interface from a Layer 2 cross-connect or a
+ * Layer 2 bridge.
+ *
+ * @cliexpar
+ * Example of how to set the mode of an interface to Layer 3:
+ * @cliexcmd{set interface l3 GigabitEthernet0/8/0.200}
+?*/
+/* *INDENT-OFF* */
+ VLIB_CLI_COMMAND (int_l3_cli, static) = {
+ .path = "set interface l3",
+ .short_help = "set interface l3 <interface>",
+ .function = int_l3,
+};
+/* *INDENT-ON* */
+
+/**
+ * Show interface mode.
+ * The CLI format is:
+ * show mode [<if-name1> <if-name2> ...]
+ */
+static clib_error_t *
+show_int_mode (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ char *mode;
+ u8 *args;
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ vnet_sw_interface_t *si, *sis = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 sw_if_index;
+
+ /* See if user wants to show specific interface */
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ si = pool_elt_at_index (im->sw_interfaces, sw_if_index);
+ vec_add1 (sis, si[0]);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (vec_len (sis) == 0) /* Get all interfaces */
+ {
+ /* Gather interfaces. */
+ sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sis) = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); }));
+ /* *INDENT-ON* */
+ }
+
+ vec_foreach (si, sis)
+ {
+ l2_input_config_t *config = l2input_intf_config (si->sw_if_index);
+ if (config->bridge)
+ {
+ u32 bd_id;
+ mode = "l2 bridge";
+ bd_id = l2input_main.bd_configs[config->bd_index].bd_id;
+
+ args = format (0, "bd_id %d%s%d", bd_id,
+ config->bvi ? " bvi shg " : " shg ", config->shg);
+ }
+ else if (config->xconnect)
+ {
+ mode = "l2 xconnect";
+ args = format (0, "%U",
+ format_vnet_sw_if_index_name,
+ vnm, config->output_sw_if_index);
+ }
+ else
+ {
+ mode = "l3";
+ args = format (0, " ");
+ }
+ vlib_cli_output (vm, "%s %U %v\n",
+ mode,
+ format_vnet_sw_if_index_name,
+ vnm, si->sw_if_index, args);
+ vec_free (args);
+ }
+
+done:
+ vec_free (sis);
+
+ return error;
+}
+
+/*?
+ * Show the packet processing mode (Layer2 xcross-onnect, Layer 2 bridge,
+ * Layer 3 routed) of all interfaces and sub-interfaces, or limit the
+ * output to just the provided list of interfaces and sub-interfaces.
+ * The output shows the mode, the interface, and if the interface is
+ * a member of a bridge, the bridge-domain-id and the split horizen group (shg).
+ *
+ * @cliexpar
+ * Example of displaying the mode of all interfaces:
+ * @cliexstart{show mode}
+ * l3 local0
+ * l3 GigabitEthernet0/8/0
+ * l3 GigabitEthernet0/9/0
+ * l3 GigabitEthernet0/a/0
+ * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
+ * l2 bridge GigabitEthernet0/9/0.200 bd_id 200 shg 0
+ * l2 bridge GigabitEthernet0/a/0.200 bd_id 200 shg 0
+ * l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300
+ * l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300
+ * @cliexend
+ * Example of displaying the mode of a seleted list of interfaces:
+ * @cliexstart{show mode GigabitEthernet0/8/0 GigabitEthernet0/8/0.200}
+ * l3 GigabitEthernet0/8/0
+ * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2_mode, static) = {
+ .path = "show mode",
+ .short_help = "show mode [<if-name1> <if-name2> ...]",
+ .function = show_int_mode,
+};
+/* *INDENT-ON* */
+
+#define foreach_l2_init_function \
+_(feat_bitmap_drop_init) \
+_(l2fib_init) \
+_(l2_input_classify_init) \
+_(l2bd_init) \
+_(l2fwd_init) \
+_(l2_inacl_init) \
+_(l2input_init) \
+_(l2_vtr_init) \
+_(l2_invtr_init) \
+_(l2_efp_filter_init) \
+_(l2learn_init) \
+_(l2flood_init) \
+_(l2_outacl_init) \
+_(l2output_init) \
+_(l2_patch_init) \
+_(l2_xcrw_init)
+
+clib_error_t *
+l2_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+#define _(a) do { \
+ if ((error = vlib_call_init_function (vm, a))) return error; } \
+while (0);
+ foreach_l2_init_function;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h
new file mode 100644
index 00000000..e8a6c776
--- /dev/null
+++ b/src/vnet/l2/l2_input.h
@@ -0,0 +1,289 @@
+/*
+ * l2_input.h : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_h
+#define included_vnet_l2_input_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip.h>
+
+/* Per-subinterface L2 feature configuration */
+
+typedef struct
+{
+
+ union
+ {
+ u16 bd_index; /* bridge domain id */
+ u32 output_sw_if_index; /* for xconnect */
+ };
+
+ /* Interface mode. If both are 0, this interface is in L3 mode */
+ u8 xconnect;
+ u8 bridge;
+
+ /* this is the bvi interface for the bridge-domain */
+ u8 bvi;
+
+ /* config for which input features are configured on this interface */
+ u32 feature_bitmap;
+
+ /* some of these flags are also in the feature bitmap */
+ u8 learn_enable;
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ /* split horizon group */
+ u8 shg;
+
+} l2_input_config_t;
+
+
+typedef struct
+{
+
+ /* Next nodes for the feature bitmap */
+ u32 feat_next_node_index[32];
+
+ /* config vector indexed by sw_if_index */
+ l2_input_config_t *configs;
+
+ /* bridge domain config vector indexed by bd_index */
+ l2_bridge_domain_t *bd_configs;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2input_main_t;
+
+extern l2input_main_t l2input_main;
+
+extern vlib_node_registration_t l2input_node;
+
+static_always_inline l2_bridge_domain_t *
+l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index)
+{
+ l2_bridge_domain_t *bd_config;
+
+ bd_config = vec_elt_at_index (l2im->bd_configs, bd_index);
+ return bd_is_valid (bd_config) ? bd_config : NULL;
+}
+
+static_always_inline l2_bridge_domain_t *
+l2input_bd_config (u32 bd_index)
+{
+ l2input_main_t *mp = &l2input_main;
+ l2_bridge_domain_t *bd_config;
+
+ vec_validate (mp->bd_configs, bd_index);
+ bd_config = vec_elt_at_index (mp->bd_configs, bd_index);
+ return bd_config;
+}
+
+/* L2 input indication packet is from BVI, using -2 */
+#define L2INPUT_BVI ((u32) (~0-1))
+
+/* L2 input features */
+
+/* Mappings from feature ID to graph node name in reverse order */
+#define foreach_l2input_feat \
+ _(DROP, "feature-bitmap-drop") \
+ _(XCONNECT, "l2-output") \
+ _(FLOOD, "l2-flood") \
+ _(ARP_TERM, "arp-term-l2bd") \
+ _(UU_FLOOD, "l2-flood") \
+ _(FWD, "l2-fwd") \
+ _(RW, "l2-rw") \
+ _(LEARN, "l2-learn") \
+ _(VTR, "l2-input-vtr") \
+ _(VPATH, "vpath-input-l2") \
+ _(ACL, "l2-input-acl") \
+ _(POLICER_CLAS, "l2-policer-classify") \
+ _(INPUT_CLASSIFY, "l2-input-classify") \
+ _(SPAN, "span-l2-input")
+
+/* Feature bitmap positions */
+typedef enum
+{
+#define _(sym,str) L2INPUT_FEAT_##sym##_BIT,
+ foreach_l2input_feat
+#undef _
+ L2INPUT_N_FEAT
+} l2input_feat_t;
+
+STATIC_ASSERT (L2INPUT_N_FEAT <= 32, "too many l2 input features");
+
+/* Feature bit masks */
+typedef enum
+{
+#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT),
+ foreach_l2input_feat
+#undef _
+ L2INPUT_VALID_MASK =
+#define _(sym,str) L2INPUT_FEAT_##sym |
+ foreach_l2input_feat
+#undef _
+ 0
+} l2input_feat_masks_t;
+
+STATIC_ASSERT ((u64) L2INPUT_VALID_MASK == (1ull << L2INPUT_N_FEAT) - 1, "");
+
+/** Return an array of strings containing graph node names of each feature */
+char **l2input_get_feat_names (void);
+
+/* arg0 - u32 feature_bitmap */
+u8 *format_l2_input_features (u8 * s, va_list * args);
+
+static_always_inline u8
+bd_feature_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) ==
+ L2INPUT_FEAT_FLOOD);
+}
+
+static_always_inline u8
+bd_feature_uu_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) ==
+ L2INPUT_FEAT_UU_FLOOD);
+}
+
+static_always_inline u8
+bd_feature_forward (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) == L2INPUT_FEAT_FWD);
+}
+
+static_always_inline u8
+bd_feature_learn (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) ==
+ L2INPUT_FEAT_LEARN);
+}
+
+static_always_inline u8
+bd_feature_arp_term (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) ==
+ L2INPUT_FEAT_ARP_TERM);
+}
+
+/** Masks for eliminating features that do not apply to a packet */
+
+/** Get a pointer to the config for the given interface */
+l2_input_config_t *l2input_intf_config (u32 sw_if_index);
+
+/* Enable (or disable) the feature in the bitmap for the given interface */
+u32 l2input_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap, u32 enable);
+
+/* Sets modifies flags from a bridge domain */
+u32 l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value);
+
+
+#define MODE_L3 0
+#define MODE_L2_BRIDGE 1
+#define MODE_L2_XC 2
+#define MODE_L2_CLASSIFY 3
+
+#define MODE_ERROR_ETH 1
+#define MODE_ERROR_BVI_DEF 2
+
+u32 set_int_l2_mode (vlib_main_t * vm,
+ vnet_main_t * vnet_main,
+ u32 mode,
+ u32 sw_if_index,
+ u32 bd_index, u32 bvi, u32 shg, u32 xc_sw_if_index);
+
+static inline void
+vnet_update_l2_len (vlib_buffer_t * b)
+{
+ ethernet_header_t *eth;
+ u16 ethertype;
+ u8 vlan_count = 0;
+
+ /* point at currrent l2 hdr */
+ eth = vlib_buffer_get_current (b);
+
+ /*
+ * l2-output pays no attention to this
+ * but the tag push/pop code on an l2 subif needs it.
+ *
+ * Determine l2 header len, check for up to 2 vlans
+ */
+ vnet_buffer (b)->l2.l2_len = sizeof (ethernet_header_t);
+ ethertype = clib_net_to_host_u16 (eth->type);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan;
+ vnet_buffer (b)->l2.l2_len += sizeof (*vlan);
+ vlan_count = 1;
+ vlan = (void *) (eth + 1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vnet_buffer (b)->l2.l2_len += sizeof (*vlan);
+ vlan_count = 2;
+ }
+ }
+ ethernet_buffer_set_vlan_count (b, vlan_count);
+}
+
+/*
+ * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
+ * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * The vlib buffer current pointer is expected to be at ethernet header
+ * and vnet l2.l2_len is exppected to be setup already.
+ */
+static inline u32
+vnet_l2_compute_flow_hash (vlib_buffer_t * b)
+{
+ ethernet_header_t *eh = vlib_buffer_get_current (b);
+ u8 *l3h = (u8 *) eh + vnet_buffer (b)->l2.l2_len;
+ u16 ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2));
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else
+ {
+ u32 a, b, c;
+ u32 *ap = (u32 *) & eh->dst_address[2];
+ u32 *bp = (u32 *) & eh->src_address[2];
+ a = *ap;
+ b = *bp;
+ c = ethertype;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ return c;
+ }
+}
+
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_acl.c b/src/vnet/l2/l2_input_acl.c
new file mode 100644
index 00000000..84030888
--- /dev/null
+++ b/src/vnet/l2/l2_input_acl.c
@@ -0,0 +1,431 @@
+/*
+ * l2_input_acl.c : layer 2 input acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct
+{
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_inacl_main_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} l2_inacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_inacl_trace_t *t = va_arg (*args, l2_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+l2_inacl_main_t l2_inacl_main;
+
+static vlib_node_registration_t l2_inacl_node;
+
+#define foreach_l2_inacl_error \
+_(NONE, "valid input ACL packets") \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk") \
+_(TABLE_MISS, "input ACL table-miss drops") \
+_(SESSION_DENY, "input ACL session deny drops")
+
+
+typedef enum
+{
+#define _(sym,str) L2_INACL_ERROR_##sym,
+ foreach_l2_inacl_error
+#undef _
+ L2_INACL_N_ERROR,
+} l2_inacl_error_t;
+
+static char *l2_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_inacl_error
+#undef _
+};
+
+static uword
+l2_inacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_next_index_t next_index;
+ l2_inacl_main_t *msm = &l2_inacl_main;
+ input_acl_main_t *am = &input_acl_main;
+ vnet_classify_main_t *vcm = am->vnet_classify_main;
+ input_acl_table_id_t tid = INPUT_ACL_TABLE_L2;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ /* First pass: compute hashes */
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset;
+ else
+ h1 = b1->data;
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ /* Determine the next node */
+ next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
+ L2INPUT_FEAT_ACL);
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ?
+ e0->next_index : next0;
+
+ hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 =
+ (t0->miss_next_index <
+ ACL_NEXT_INDEX_N_NEXT) ? t0->miss_next_index :
+ next0;
+
+ misses++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_TABLE_MISS : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+
+ if (t0->current_data_flag ==
+ CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (t0 && e0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_inacl_node,static) = {
+ .function = l2_inacl_node_fn,
+ .name = "l2-input-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_inacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_inacl_error_strings),
+ .error_strings = l2_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_inacl_node, l2_inacl_node_fn)
+ clib_error_t *l2_inacl_init (vlib_main_t * vm)
+{
+ l2_inacl_main_t *mp = &l2_inacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_inacl_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_inacl_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
new file mode 100644
index 00000000..ee8042a0
--- /dev/null
+++ b/src/vnet/l2/l2_input_classify.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * l2_classify.c
+ */
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/api_errno.h>
+
+/**
+ * @file
+ * @brief L2 input classifier.
+ *
+ * @sa @ref vnet/vnet/classify/vnet_classify.c
+ * @sa @ref vnet/vnet/classify/vnet_classify.h
+ */
+
+/**
+ * @brief l2_input_classifier packet trace record.
+ */
+typedef struct
+{
+ /** interface handle for the ith packet */
+ u32 sw_if_index;
+ /** graph arc index selected for this packet */
+ u32 next_index;
+ /** classifier table which provided the final result */
+ u32 table_index;
+ /** offset in classifier heap of the corresponding session */
+ u32 session_offset;
+} l2_input_classify_trace_t;
+
+/**
+ * @brief vlib node runtime.
+ */
+typedef struct
+{
+ /** use-case independent main object pointer */
+ vnet_classify_main_t *vcm;
+ /** l2 input classifier main object pointer */
+ l2_input_classify_main_t *l2cm;
+} l2_input_classify_runtime_t;
+
+/** Packet trace format function. */
+static u8 *
+format_l2_input_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_input_classify_trace_t *t = va_arg (*args, l2_input_classify_trace_t *);
+
+ s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d",
+ t->sw_if_index, t->table_index, t->session_offset,
+ t->next_index);
+ return s;
+}
+
+/** l2 input classifier main data structure. */
+l2_input_classify_main_t l2_input_classify_main;
+
+vlib_node_registration_t l2_input_classify_node;
+
+#define foreach_l2_input_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk") \
+_(DROP, "L2 Classify Drops")
+
+typedef enum
+{
+#define _(sym,str) L2_INPUT_CLASSIFY_ERROR_##sym,
+ foreach_l2_input_classify_error
+#undef _
+ L2_INPUT_CLASSIFY_N_ERROR,
+} l2_input_classify_error_t;
+
+static char *l2_input_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_input_classify_error
+#undef _
+};
+
+/**
+ * @brief l2 input classifier node.
+ * @node l2-input-classify
+ *
+ * This is the l2 input classifier dispatch node
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * @em Uses:
+ * - <code>(l2_input_classify_runtime_t *)
+ * rt->classify_table_index_by_sw_if_index</code>
+ * - Head of the per-interface, per-protocol classifier table chain
+ * for a specific interface.
+ * - @c ~0 => send pkts to the next feature in the L2 feature chain.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ * - Indicates the @c sw_if_index value of the interface that the
+ * packet was received on.
+ * - <code>vnet_buffer(b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>(vnet_classify_entry_t) e0->next_index</code>
+ * - Used to steer traffic when the classifier hits on a session
+ * - <code>(vnet_classify_entry_t) e0->advance</code>
+ * - Signed quantity applied via <code>vlib_buffer_advance</code>
+ * when the classifier hits on a session
+ * - <code>(vnet_classify_table_t) t0->miss_next_index</code>
+ * - Used to steer traffic when the classifier misses
+ *
+ * @em Sets:
+ * - <code>vnet_buffer (b0)->l2_classify.table_index</code>
+ * - Classifier table index of the first classifier table in
+ * the classifier table chain
+ * - <code>vnet_buffer (b0)->l2_classify.hash</code>
+ * - Bounded-index extensible hash corresponding to the
+ * masked fields in the current packet
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code>
+ * - Copied from the classifier session object upon classifier hit
+ *
+ * @em Counters:
+ * - <code>L2_INPUT_CLASSIFY_ERROR_MISS</code> Classifier misses
+ * - <code>L2_INPUT_CLASSIFY_ERROR_HIT</code> Classifier hits
+ * - <code>L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT</code>
+ * Classifier hits in other than the first table
+ */
+
+static uword
+l2_input_classify_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_input_classify_next_t next_index;
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+ l2_input_classify_runtime_t *rt =
+ (l2_input_classify_runtime_t *) node->runtime_data;
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ f64 now;
+ u32 n_next_nodes;
+
+ n_next_nodes = node->n_next_nodes;
+
+ now = vlib_time_now (vm);
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ /* First pass: compute hash */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ ethernet_header_t *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 type0, type1;
+ int type_index0, type_index1;
+ vnet_classify_table_t *t0, *t1;
+ u32 table_index0, table_index1;
+ u64 hash0, hash1;
+
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ vnet_buffer (b1)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+ type1 = clib_net_to_host_u16 (h1->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ type_index1 = (type1 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index1 = (type1 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index1;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+
+ vnet_buffer (b1)->l2_classify.table_index =
+ table_index1 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index1][sw_if_index1];
+
+ if (table_index1 != ~0)
+ {
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer (b1)->l2_classify.hash = hash1 =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+ vnet_classify_prefetch_bucket (t1, hash1);
+ }
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ ethernet_header_t *h0;
+ u32 sw_if_index0;
+ u16 type0;
+ u32 type_index0;
+ vnet_classify_table_t *t0;
+ u32 table_index0;
+ u64 hash0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 = rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0; /* next l2 input feature, please... */
+ ethernet_header_t *h0;
+ u32 table_index0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
+ u64 phash2;
+ u32 table_index2;
+ vnet_classify_table_t *tp2;
+
+ /*
+ * Prefetch table entry two ahead. Buffer / data
+ * were prefetched above...
+ */
+ table_index2 = vnet_buffer (p2)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index2 != ~0))
+ {
+ tp2 = pool_elt_at_index (vcm->tables, table_index2);
+ phash2 = vnet_buffer (p2)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp2, phash2);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 =
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (next0 == 0))
+ b0->error = node->errors[L2_INPUT_CLASSIFY_ERROR_DROP];
+
+ /* Determine the next node and remove ourself from bitmap */
+ if (PREDICT_TRUE (next0 == ~0))
+ next0 = vnet_l2_feature_next (b0, cm->l2_inp_feat_next,
+ L2INPUT_FEAT_INPUT_CLASSIFY);
+ else
+ vnet_buffer (b0)->l2.feature_bitmap &=
+ ~L2INPUT_FEAT_INPUT_CLASSIFY;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_input_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->table_index = table_index0;
+ t->next_index = next0;
+ t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_input_classify_node) = {
+ .function = l2_input_classify_node_fn,
+ .name = "l2-input-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_input_classify_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_input_classify_error_strings),
+ .error_strings = l2_input_classify_error_strings,
+
+ .runtime_data_bytes = sizeof (l2_input_classify_runtime_t),
+
+ .n_next_nodes = L2_INPUT_CLASSIFY_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_INPUT_CLASSIFY_NEXT_DROP] = "error-drop",
+ [L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
+ [L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
+ [L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
+ [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_input_classify_node,
+ l2_input_classify_node_fn);
+
+/** l2 input classsifier feature initialization. */
+clib_error_t *
+l2_input_classify_init (vlib_main_t * vm)
+{
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ l2_input_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index);
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+ cm->vnet_classify_main = &vnet_classify_main;
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_input_classify_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ cm->l2_inp_feat_next);
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_input_classify_init);
+
+clib_error_t *
+l2_input_classify_worker_init (vlib_main_t * vm)
+{
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ l2_input_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index);
+
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2_input_classify_worker_init);
+
+/** Enable/disable l2 input classification on a specific interface. */
+void
+vnet_l2_input_classify_enable_disable (u32 sw_if_index, int enable_disable)
+{
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_INPUT_CLASSIFY,
+ (u32) enable_disable);
+}
+
+/** @brief Set l2 per-protocol, per-interface input classification tables.
+ *
+ * @param sw_if_index interface handle
+ * @param ip4_table_index ip4 classification table index, or ~0
+ * @param ip6_table_index ip6 classification table index, or ~0
+ * @param other_table_index non-ip4, non-ip6 classification table index,
+ * or ~0
+ * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3
+ * if the indicated (non-~0) table does not exist.
+ */
+
+int
+vnet_l2_input_classify_set_tables (u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index, u32 other_table_index)
+{
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ if (ip4_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip4_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ if (ip6_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip6_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE2;
+
+ if (other_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, other_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE3;
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER],
+ sw_if_index);
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index] = ip4_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index] = ip6_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER]
+ [sw_if_index] = other_table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+int_l2_input_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (input, "other-table %d", &other_table_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface must be specified");
+
+
+ if (ip4_table_index == ~0 && ip6_table_index == ~0
+ && other_table_index == ~0)
+ {
+ vlib_cli_output (vm, "L2 classification disabled");
+ vnet_l2_input_classify_enable_disable (sw_if_index, 0 /* enable */ );
+ return 0;
+ }
+
+ rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, other_table_index);
+ switch (rv)
+ {
+ case 0:
+ vnet_l2_input_classify_enable_disable (sw_if_index, 1 /* enable */ );
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_l2_input_classify_set_tables: %d",
+ rv);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure l2 input classification.
+ *
+ * @cliexpar
+ * @cliexstart{set interface l2 input classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
+ .path = "set interface l2 input classify",
+ .short_help =
+ "set interface l2 input classify intfc <interface-name> [ip4-table <n>]\n"
+ " [ip6-table <n>] [other-table <n>]",
+ .function = int_l2_input_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c
new file mode 100644
index 00000000..9470752f
--- /dev/null
+++ b/src/vnet/l2/l2_input_vtr.c
@@ -0,0 +1,369 @@
+/*
+ * l2_input_vtr.c : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; /* raw data (vlans) */
+ u32 sw_if_index;
+} l2_invtr_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_invtr_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_invtr_trace_t *t = va_arg (*args, l2_invtr_trace_t *);
+
+ s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4],
+ t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9],
+ t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_invtr_main_t l2_invtr_main;
+
+static vlib_node_registration_t l2_invtr_node;
+
+#define foreach_l2_invtr_error \
+_(L2_INVTR, "L2 inverter packets") \
+_(DROP, "L2 input tag rewrite drops")
+
+typedef enum
+{
+#define _(sym,str) L2_INVTR_ERROR_##sym,
+ foreach_l2_invtr_error
+#undef _
+ L2_INVTR_N_ERROR,
+} l2_invtr_error_t;
+
+static char *l2_invtr_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_invtr_error
+#undef _
+};
+
+typedef enum
+{
+ L2_INVTR_NEXT_DROP,
+ L2_INVTR_N_NEXT,
+} l2_invtr_next_t;
+
+
+static uword
+l2_invtr_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_invtr_next_t next_index;
+ l2_invtr_main_t *msm = &l2_invtr_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+ u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Prefetch the input config for the N+1 loop iteration
+ * This depends on the buffer header above
+ */
+ sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_RX];
+ CLIB_PREFETCH (vec_elt_at_index
+ (l2output_main.configs, sw_if_index2),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vec_elt_at_index
+ (l2output_main.configs, sw_if_index3),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* Determine the next node */
+ next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
+ L2INPUT_FEAT_VTR);
+ next1 = vnet_l2_feature_next (b1, msm->feat_next_node_index,
+ L2INPUT_FEAT_VTR);
+
+ l2_output_config_t *config0;
+ l2_output_config_t *config1;
+ config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0);
+ config1 = vec_elt_at_index (l2output_main.configs, sw_if_index1);
+
+ if (PREDICT_FALSE (config0->out_vtr_flag))
+ {
+ if (config0->output_vtr.push_and_pop_bytes)
+ {
+ /* perform the tag rewrite on two packets */
+ if (l2_vtr_process (b0, &config0->input_vtr))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config0->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b0, &(config0->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+ if (PREDICT_FALSE (config1->out_vtr_flag))
+ {
+ if (config1->output_vtr.push_and_pop_bytes)
+ {
+ if (l2_vtr_process (b1, &config1->input_vtr))
+ {
+ /* Drop packet */
+ next1 = L2_INVTR_NEXT_DROP;
+ b1->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config1->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b1, &(config1->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next1 = L2_INVTR_NEXT_DROP;
+ b1->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ clib_memcpy (t->raw, &h1->type, sizeof (t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* Determine the next node */
+ next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
+ L2INPUT_FEAT_VTR);
+
+ l2_output_config_t *config0;
+ config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0);
+
+ if (PREDICT_FALSE (config0->out_vtr_flag))
+ {
+ if (config0->output_vtr.push_and_pop_bytes)
+ {
+ /* perform the tag rewrite on one packet */
+ if (l2_vtr_process (b0, &config0->input_vtr))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config0->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b0, &(config0->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_invtr_node,static) = {
+ .function = l2_invtr_node_fn,
+ .name = "l2-input-vtr",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_invtr_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_invtr_error_strings),
+ .error_strings = l2_invtr_error_strings,
+
+ .n_next_nodes = L2_INVTR_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_INVTR_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_invtr_node, l2_invtr_node_fn)
+ clib_error_t *l2_invtr_init (vlib_main_t * vm)
+{
+ l2_invtr_main_t *mp = &l2_invtr_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_invtr_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_invtr_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_vtr.h b/src/vnet/l2/l2_input_vtr.h
new file mode 100644
index 00000000..f248669e
--- /dev/null
+++ b/src/vnet/l2/l2_input_vtr.h
@@ -0,0 +1,54 @@
+/*
+ * l2_input_vtr.h : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_vtr_h
+#define included_vnet_l2_input_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+typedef struct
+{
+
+ /*
+ * The input vtr data is located in l2_output_config_t because
+ * the same config data is used for the egress EFP Filter check.
+ */
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_invtr_main_t;
+
+extern l2_invtr_main_t l2_invtr_main;
+
+#endif /* included_vnet_l2_input_vtr_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c
new file mode 100644
index 00000000..fddab824
--- /dev/null
+++ b/src/vnet/l2/l2_learn.c
@@ -0,0 +1,596 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+/**
+ * @file
+ * @brief Ethernet Bridge Learning.
+ *
+ * Populate the mac table with entries mapping the packet's source mac + bridge
+ * domain ID to the input sw_if_index.
+ *
+ * Note that learning and forwarding are separate graph nodes. This means that
+ * for a set of packets, all learning is performed first, then all nodes are
+ * forwarded. The forwarding is done based on the end-state of the mac table,
+ * instead of the state after each packet. Thus the forwarding results could
+ * differ in certain cases (mac move tests), but this not expected to cause
+ * problems in real-world networks. It is much simpler to separate learning
+ * and forwarding into separate nodes.
+ */
+
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2learn_trace_t;
+
+
+/* packet trace format function */
+static u8 *
+format_l2learn_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2learn_trace_t *t = va_arg (*args, l2learn_trace_t *);
+
+ s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+static vlib_node_registration_t l2learn_node;
+
+#define foreach_l2learn_error \
+_(L2LEARN, "L2 learn packets") \
+_(MISS, "L2 learn misses") \
+_(MAC_MOVE, "L2 mac moves") \
+_(MAC_MOVE_VIOLATE, "L2 mac move violations") \
+_(LIMIT, "L2 not learned due to limit") \
+_(HIT_UPDATE, "L2 learn hit updates") \
+_(FILTER_DROP, "L2 filter mac drops")
+
+typedef enum
+{
+#define _(sym,str) L2LEARN_ERROR_##sym,
+ foreach_l2learn_error
+#undef _
+ L2LEARN_N_ERROR,
+} l2learn_error_t;
+
+static char *l2learn_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2learn_error
+#undef _
+};
+
+typedef enum
+{
+ L2LEARN_NEXT_L2FWD,
+ L2LEARN_NEXT_DROP,
+ L2LEARN_N_NEXT,
+} l2learn_next_t;
+
+
+/** Perform learning on one packet based on the mac table lookup result. */
+
+static_always_inline void
+l2learn_process (vlib_node_runtime_t * node,
+ l2learn_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * cached_key,
+ u32 * count,
+ l2fib_entry_result_t * result0, u32 * next0, u8 timestamp)
+{
+ /* Set up the default next node (typically L2FWD) */
+ *next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
+ L2INPUT_FEAT_LEARN);
+
+ /* Check mac table lookup result */
+ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0))
+ {
+ /* Entry in L2FIB with matching sw_if_index matched - normal fast path */
+ u32 dtime = timestamp - result0->fields.timestamp;
+ u32 dsn = result0->fields.sn.as_u16 - vnet_buffer (b0)->l2.l2fib_sn;
+ u32 check = (dtime && vnet_buffer (b0)->l2.bd_age) || dsn;
+
+ if (PREDICT_TRUE (check == 0))
+ return; /* MAC entry up to date */
+ if (result0->fields.age_not)
+ return; /* Static MAC always age_not */
+ if (msm->global_learn_count > msm->global_learn_limit)
+ return; /* Above learn limit - do not update */
+
+ /* Limit updates per l2-learn node call to avoid prolonged update burst
+ * as dtime advance over 1 minute mark, unless more than 1 min behind
+ * or SN obsolete */
+ if ((*count > 2) && (dtime == 1) && (dsn == 0))
+ return;
+
+ counter_base[L2LEARN_ERROR_HIT_UPDATE] += 1;
+ *count += 1;
+ }
+ else if (result0->raw == ~0)
+ {
+ /* Entry not in L2FIB - add it */
+ counter_base[L2LEARN_ERROR_MISS] += 1;
+
+ if (msm->global_learn_count >= msm->global_learn_limit)
+ {
+ /*
+ * Global limit reached. Do not learn the mac but forward the packet.
+ * In the future, limits could also be per-interface or bridge-domain.
+ */
+ counter_base[L2LEARN_ERROR_LIMIT] += 1;
+ return;
+ }
+
+ /* Do not learn if mac is 0 */
+ l2fib_entry_key_t key = *key0;
+ key.fields.bd_index = 0;
+ if (key.raw == 0)
+ return;
+
+ /* It is ok to learn */
+ msm->global_learn_count++;
+ result0->raw = 0; /* clear all fields */
+ result0->fields.sw_if_index = sw_if_index0;
+ result0->fields.lrn_evt = (msm->client_pid != 0);
+ }
+ else
+ {
+ /* Entry in L2FIB with different sw_if_index - mac move or filter */
+ if (result0->fields.filter)
+ {
+ ASSERT (result0->fields.sw_if_index == ~0);
+ /* drop packet because lookup matched a filter mac entry */
+ b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP];
+ *next0 = L2LEARN_NEXT_DROP;
+ return;
+ }
+
+ if (result0->fields.static_mac)
+ {
+ /*
+ * Don't overwrite a static mac
+ * TODO: Check violation policy. For now drop the packet
+ */
+ b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE];
+ *next0 = L2LEARN_NEXT_DROP;
+ return;
+ }
+
+ /*
+ * TODO: may want to rate limit mac moves
+ * TODO: check global/bridge domain/interface learn limits
+ */
+ result0->fields.sw_if_index = sw_if_index0;
+ if (result0->fields.age_not) /* The mac was provisioned */
+ {
+ msm->global_learn_count++;
+ result0->fields.age_not = 0;
+ }
+ result0->fields.lrn_evt = (msm->client_pid != 0);
+ counter_base[L2LEARN_ERROR_MAC_MOVE] += 1;
+ }
+
+ /* Update the entry */
+ result0->fields.timestamp = timestamp;
+ result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn;
+
+ BVT (clib_bihash_kv) kv;
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+ BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ );
+
+ /* Invalidate the cache */
+ cached_key->raw = ~0;
+}
+
+
+static_always_inline uword
+l2learn_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int do_trace)
+{
+ u32 n_left_from, *from, *to_next;
+ l2learn_next_t next_index;
+ l2learn_main_t *msm = &l2learn_main;
+ vlib_node_t *n = vlib_get_node (vm, l2learn_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+ u8 timestamp = (u8) (vlib_time_now (vm) / 60);
+ u32 count = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ /* Clear the one-entry cache in case mac table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2fib_entry_key_t key0, key1, key2, key3;
+ l2fib_entry_result_t result0, result1, result2, result3;
+ u32 bucket0, bucket1, bucket2, bucket3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ /* Process 4 x pkts */
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ if (do_trace)
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ t->bd_index = vnet_buffer (b2)->l2.bd_index;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ t->bd_index = vnet_buffer (b3)->l2.bd_index;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ /* process 4 pkts */
+ vlib_node_increment_counter (vm, l2learn_node.index,
+ L2LEARN_ERROR_L2LEARN, 4);
+
+ l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address,
+ h1->src_address,
+ h2->src_address,
+ h3->src_address,
+ vnet_buffer (b0)->l2.bd_index,
+ vnet_buffer (b1)->l2.bd_index,
+ vnet_buffer (b2)->l2.bd_index,
+ vnet_buffer (b3)->l2.bd_index,
+ &key0, &key1, &key2, &key3,
+ &bucket0, &bucket1, &bucket2, &bucket3,
+ &result0, &result1, &result2, &result3);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &count, &result0, &next0, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b1, sw_if_index1, &key1, &cached_key,
+ &count, &result1, &next1, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b2, sw_if_index2, &key2, &cached_key,
+ &count, &result2, &next2, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b3, sw_if_index3, &key3, &cached_key,
+ &count, &result3, &next3, timestamp);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2learn_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+ vlib_node_increment_counter (vm, l2learn_node.index,
+ L2LEARN_ERROR_L2LEARN, 1);
+
+
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address, vnet_buffer (b0)->l2.bd_index,
+ &key0, &bucket0, &result0);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &count, &result0, &next0, timestamp);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+l2learn_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ return l2learn_node_inline (vm, node, frame, 1 /* do_trace */ );
+ return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2learn_node,static) = {
+ .function = l2learn_node_fn,
+ .name = "l2-learn",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2learn_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2learn_error_strings),
+ .error_strings = l2learn_error_strings,
+
+ .n_next_nodes = L2LEARN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2LEARN_NEXT_DROP] = "error-drop",
+ [L2LEARN_NEXT_L2FWD] = "l2-fwd",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn)
+ clib_error_t *l2learn_init (vlib_main_t * vm)
+{
+ l2learn_main_t *mp = &l2learn_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2learn_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table ();
+
+ /*
+ * Set the default number of dynamically learned macs to the number
+ * of buckets.
+ */
+ mp->global_learn_limit = L2LEARN_DEFAULT_LIMIT;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2learn_init);
+
+
+/**
+ * Set subinterface learn enable/disable.
+ * The CLI format is:
+ * set interface l2 learn <interface> [disable]
+ */
+static clib_error_t *
+int_learn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_LEARN, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 learning can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable learning:
+ * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0}
+ * Example of how to disable learning:
+ * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_learn_cli, static) = {
+ .path = "set interface l2 learn",
+ .short_help = "set interface l2 learn <interface> [disable]",
+ .function = int_learn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+l2learn_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ l2learn_main_t *mp = &l2learn_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "limit %d", &mp->global_learn_limit))
+ ;
+
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn");
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h
new file mode 100644
index 00000000..3aaf48e2
--- /dev/null
+++ b/src/vnet/l2/l2_learn.h
@@ -0,0 +1,70 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2learn_h
+#define included_l2learn_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+typedef struct
+{
+
+ /* Hash table */
+ BVT (clib_bihash) * mac_table;
+
+ /* number of dynamically learned mac entries */
+ u32 global_learn_count;
+
+ /* maximum number of dynamically learned mac entries */
+ u32 global_learn_limit;
+
+ /* client waiting for L2 MAC events for learned and aged MACs */
+ u32 client_pid;
+ u32 client_index;
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2learn_main_t;
+
+#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 64)
+
+l2learn_main_t l2learn_main;
+
+extern vlib_node_registration_t l2fib_mac_age_scanner_process_node;
+
+enum
+{
+ L2_MAC_AGE_PROCESS_EVENT_START = 1,
+ L2_MAC_AGE_PROCESS_EVENT_STOP = 2,
+ L2_MAC_AGE_PROCESS_EVENT_ONE_PASS = 3,
+} l2_mac_age_process_event_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
new file mode 100644
index 00000000..500fc5d0
--- /dev/null
+++ b/src/vnet/l2/l2_output.c
@@ -0,0 +1,710 @@
+/*
+ * l2_output.c : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+
+/* Feature graph node names */
+static char *l2output_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2output_feat
+#undef _
+};
+
+char **
+l2output_get_feat_names (void)
+{
+ return l2output_feat_names;
+}
+
+u8 *
+format_l2_output_features (u8 * s, va_list * args)
+{
+ static char *display_names[] = {
+#define _(sym,name) #sym,
+ foreach_l2output_feat
+#undef _
+ };
+ u32 feature_bitmap = va_arg (*args, u32);
+
+ if (feature_bitmap == 0)
+ {
+ s = format (s, " none configured");
+ return s;
+ }
+
+ int i;
+ for (i = L2OUTPUT_N_FEAT - 1; i >= 0; i--)
+ if (feature_bitmap & (1 << i))
+ s = format (s, "%10s (%s)\n", display_names[i], l2output_feat_names[i]);
+ return s;
+}
+
+l2output_main_t l2output_main;
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u8 raw[12]; /* raw data */
+} l2output_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2output_trace_t *t = va_arg (*args, l2output_trace_t *);
+
+ s = format (s, "l2-output: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4],
+ t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9],
+ t->raw[10], t->raw[11]);
+
+ return s;
+}
+
+
+static char *l2output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2output_error
+#undef _
+};
+
+/**
+ * Check for split horizon violations.
+ * Return 0 if split horizon check passes, otherwise return non-zero.
+ * Packets should not be transmitted out an interface with the same
+ * split-horizon group as the input interface, except if the @c shg is 0
+ * in which case the check always passes.
+ */
+static_always_inline u32
+split_horizon_violation (u8 shg1, u8 shg2)
+{
+ if (PREDICT_TRUE (shg1 == 0))
+ {
+ return 0;
+ }
+ else
+ {
+ return shg1 == shg2;
+ }
+}
+
+/** Determine the next L2 node based on the output feature bitmap */
+static_always_inline void
+l2_output_dispatch (vlib_buffer_t * b0, vlib_node_runtime_t * node,
+ u32 * cached_sw_if_index, u32 * cached_next_index,
+ u32 sw_if_index, u32 feature_bitmap, u32 * next0)
+{
+ /*
+ * The output feature bitmap always have at least the L2 output bit set
+ * for a normal L2 interface (or 0 if the interface is changed from L2
+ * to L3 mode). So if the feature bitmap is 0 or just have L2 output bits set,
+ * we know there is no more feature and will just output packets on interface.
+ * Otherwise, get the index of the next feature node.
+ */
+ if (PREDICT_FALSE ((feature_bitmap & ~L2OUTPUT_FEAT_OUTPUT) != 0))
+ {
+ /* Save bitmap for the next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ /* Determine the next node */
+ *next0 =
+ feat_bitmap_get_next_node_index (l2output_main.l2_out_feat_next,
+ feature_bitmap);
+ }
+ else
+ {
+ /*
+ * There are no features. Send packet to TX node for sw_if_index0
+ * This is a little tricky in that the output interface next node indexes
+ * are not precomputed at init time.
+ */
+
+ if (sw_if_index == *cached_sw_if_index)
+ {
+ /* We hit in the one-entry cache. Use it. */
+ *next0 = *cached_next_index;
+ }
+ else
+ {
+ /* Look up the output TX node for the sw_if_index */
+ *next0 = vec_elt (l2output_main.output_node_index_vec, sw_if_index);
+
+ if (PREDICT_FALSE (*next0 == L2OUTPUT_NEXT_DROP))
+ b0->error = node->errors[L2OUTPUT_ERROR_MAPPING_DROP];
+
+ /* Update the one-entry cache */
+ *cached_sw_if_index = sw_if_index;
+ *cached_next_index = *next0;
+ }
+ }
+}
+
+static_always_inline void
+l2output_vtr (vlib_node_runtime_t * node, l2_output_config_t * config,
+ u32 feature_bitmap, vlib_buffer_t * b, u32 * next)
+{
+ if (PREDICT_FALSE (config->out_vtr_flag))
+ {
+ /* Perform pre-vtr EFP filter check if configured */
+ if (config->output_vtr.push_and_pop_bytes)
+ {
+ /*
+ * Perform output vlan tag rewrite and the pre-vtr EFP filter check.
+ * The EFP Filter only needs to be run if there is an output VTR
+ * configured. The flag for the post-vtr EFP Filter node is used
+ * to trigger the pre-vtr check as well.
+ */
+ u32 failed1 = (feature_bitmap & L2OUTPUT_FEAT_EFP_FILTER)
+ && (l2_efp_filter_process (b, &(config->input_vtr)));
+ u32 failed2 = l2_vtr_process (b, &(config->output_vtr));
+
+ if (PREDICT_FALSE (failed1 | failed2))
+ {
+ *next = L2OUTPUT_NEXT_DROP;
+ if (failed2)
+ {
+ b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ if (failed1)
+ {
+ b->error = node->errors[L2OUTPUT_ERROR_EFP_DROP];
+ }
+ }
+ }
+ // perform the PBB rewrite
+ else if (config->output_pbb_vtr.push_and_pop_bytes)
+ {
+ u32 failed = l2_pbb_process (b, &(config->output_pbb_vtr));
+ if (PREDICT_FALSE (failed))
+ {
+ *next = L2OUTPUT_NEXT_DROP;
+ b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ }
+ }
+}
+
+
+static_always_inline uword
+l2output_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int do_trace)
+{
+ u32 n_left_from, *from, *to_next;
+ l2output_next_t next_index;
+ l2output_main_t *msm = &l2output_main;
+ u32 cached_sw_if_index;
+ u32 cached_next_index;
+
+ /* Invalidate cache */
+ cached_sw_if_index = ~0;
+ cached_next_index = ~0; /* warning be gone */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2_output_config_t *config0, *config1, *config2, *config3;
+ u32 feature_bitmap0, feature_bitmap1;
+ u32 feature_bitmap2, feature_bitmap3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+ vlib_node_increment_counter (vm, l2output_node.index,
+ L2OUTPUT_ERROR_L2OUTPUT, 4);
+
+ /* Get config for the output interface */
+ config0 = vec_elt_at_index (msm->configs, sw_if_index0);
+ config1 = vec_elt_at_index (msm->configs, sw_if_index1);
+ config2 = vec_elt_at_index (msm->configs, sw_if_index2);
+ config3 = vec_elt_at_index (msm->configs, sw_if_index3);
+
+ /*
+ * Get features from the config
+ * TODO: mask out any non-applicable features
+ */
+ feature_bitmap0 = config0->feature_bitmap;
+ feature_bitmap1 = config1->feature_bitmap;
+ feature_bitmap2 = config2->feature_bitmap;
+ feature_bitmap3 = config3->feature_bitmap;
+
+ /* Determine next node */
+ l2_output_dispatch (b0, node, &cached_sw_if_index,
+ &cached_next_index, sw_if_index0,
+ feature_bitmap0, &next0);
+ l2_output_dispatch (b1, node, &cached_sw_if_index,
+ &cached_next_index, sw_if_index1,
+ feature_bitmap1, &next1);
+ l2_output_dispatch (b2, node, &cached_sw_if_index,
+ &cached_next_index, sw_if_index2,
+ feature_bitmap2, &next2);
+ l2_output_dispatch (b3, node, &cached_sw_if_index,
+ &cached_next_index, sw_if_index3,
+ feature_bitmap3, &next3);
+
+ l2output_vtr (node, config0, feature_bitmap0, b0, &next0);
+ l2output_vtr (node, config1, feature_bitmap1, b1, &next1);
+ l2output_vtr (node, config2, feature_bitmap2, b2, &next2);
+ l2output_vtr (node, config3, feature_bitmap3, b3, &next3);
+
+ if (do_trace)
+ {
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ clib_memcpy (t->raw, &h1->type, sizeof (t->raw));
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ clib_memcpy (t->raw, &h2->type, sizeof (t->raw));
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ clib_memcpy (t->raw, &h3->type, sizeof (t->raw));
+ }
+ }
+
+ /*
+ * Perform the split horizon check
+ * The check can only fail for non-zero shg's
+ */
+ if (PREDICT_FALSE (config0->shg + config1->shg +
+ config2->shg + config3->shg))
+ {
+ /* one of the checks might fail, check both */
+ if (split_horizon_violation
+ (config0->shg, vnet_buffer (b0)->l2.shg))
+ {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config1->shg, vnet_buffer (b1)->l2.shg))
+ {
+ next1 = L2OUTPUT_NEXT_DROP;
+ b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config2->shg, vnet_buffer (b2)->l2.shg))
+ {
+ next2 = L2OUTPUT_NEXT_DROP;
+ b2->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config3->shg, vnet_buffer (b3)->l2.shg))
+ {
+ next3 = L2OUTPUT_NEXT_DROP;
+ b3->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2_output_config_t *config0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ vlib_node_increment_counter (vm, l2output_node.index,
+ L2OUTPUT_ERROR_L2OUTPUT, 1);
+
+ /* Get config for the output interface */
+ config0 = vec_elt_at_index (msm->configs, sw_if_index0);
+
+ /*
+ * Get features from the config
+ * TODO: mask out any non-applicable features
+ */
+ feature_bitmap0 = config0->feature_bitmap;
+
+ /* Determine next node */
+ l2_output_dispatch (b0, node, &cached_sw_if_index,
+ &cached_next_index, sw_if_index0,
+ feature_bitmap0, &next0);
+
+ l2output_vtr (node, config0, feature_bitmap0, b0, &next0);
+
+ if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ h0 = vlib_buffer_get_current (b0);
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+
+ /* Perform the split horizon check */
+ if (PREDICT_FALSE
+ (split_horizon_violation
+ (config0->shg, vnet_buffer (b0)->l2.shg)))
+ {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+l2output_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ return l2output_node_inline (vm, node, frame, 1 /* do_trace */ );
+ return l2output_node_inline (vm, node, frame, 0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2output_node) = {
+ .function = l2output_node_fn,
+ .name = "l2-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2output_error_strings),
+ .error_strings = l2output_error_strings,
+
+ .n_next_nodes = L2OUTPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2OUTPUT_NEXT_DROP] = "error-drop",
+ [L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn);
+/* *INDENT-ON* */
+
+
+#define foreach_l2output_bad_intf_error \
+_(DROP, "L2 output to interface not in L2 mode or deleted")
+
+static char *l2output_bad_intf_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2output_bad_intf_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_BAD_INTF_ERROR_##sym,
+ foreach_l2output_bad_intf_error
+#undef _
+ L2OUTPUT_BAD_INTF_N_ERROR,
+} l2output_bad_intf_error_t;
+
+
+/**
+ * Output node for interfaces/tunnels which was in L2 mode but were changed
+ * to L3 mode or possibly deleted thereafter. On changing forwarding mode
+ * of any tunnel/interface from L2 to L3, its entry in l2_output_main table
+ * next_nodes.output_node_index_vec[sw_if_index] MUST be set to the value of
+ * L2OUTPUT_NEXT_BAD_INTF. Thus, if there are stale entries in the L2FIB for
+ * this sw_if_index, l2-output will send packets for this sw_if_index to the
+ * l2-output-bad-intf node which just setup the proper drop reason before
+ * sending packets to the error-drop node to drop the packet. Then, stale L2FIB
+ * entries for delted tunnels won't cause possible packet or memory corrpution.
+ */
+static vlib_node_registration_t l2output_bad_intf_node;
+
+static uword
+l2output_bad_intf_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2output_next_t next_index = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ b1->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2output_bad_intf_node,static) = {
+ .function = l2output_bad_intf_node_fn,
+ .name = "l2-output-bad-intf",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2output_bad_intf_error_strings),
+ .error_strings = l2output_bad_intf_error_strings,
+
+ .n_next_nodes = 1,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2output_bad_intf_node, l2output_bad_intf_node_fn);
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2output_init (vlib_main_t * vm)
+{
+ l2output_main_t *mp = &l2output_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Create the config vector */
+ vec_validate (mp->configs, 100);
+ /* Until we hook up the CLI config, just create 100 sw interface entries and zero them */
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2output_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->l2_out_feat_next);
+
+ /* Initialize the output node mapping table */
+ vec_validate_init_empty (mp->output_node_index_vec, 100,
+ L2OUTPUT_NEXT_DROP);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2output_init);
+
+
+/** Create a mapping in the next node mapping table for the given sw_if_index. */
+void
+l2output_create_output_node_mapping (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw0 =
+ vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ /* dynamically create graph node arc */
+ u32 next = vlib_node_add_next (vlib_main, l2output_node.index,
+ hw0->output_node_index);
+ l2output_main.output_node_index_vec[sw_if_index] = next;
+}
+
+/* Get a pointer to the config for the given interface */
+l2_output_config_t *
+l2output_intf_config (u32 sw_if_index)
+{
+ l2output_main_t *mp = &l2output_main;
+
+ vec_validate (mp->configs, sw_if_index);
+ return vec_elt_at_index (mp->configs, sw_if_index);
+}
+
+/** Enable (or disable) the feature in the bitmap for the given interface. */
+void
+l2output_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable)
+{
+ l2output_main_t *mp = &l2output_main;
+ l2_output_config_t *config;
+
+ vec_validate (mp->configs, sw_if_index);
+ config = vec_elt_at_index (mp->configs, sw_if_index);
+
+ if (enable)
+ {
+ config->feature_bitmap |= feature_bitmap;
+ }
+ else
+ {
+ config->feature_bitmap &= ~feature_bitmap;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h
new file mode 100644
index 00000000..1a73fdf9
--- /dev/null
+++ b/src/vnet/l2/l2_output.h
@@ -0,0 +1,174 @@
+/*
+ * l2_output.h : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_output_h
+#define included_vnet_l2_output_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+/* The L2 output feature configuration, a per-interface struct */
+typedef struct
+{
+
+ u32 feature_bitmap;
+
+ /*
+ * vlan tag rewrite for ingress and egress
+ * ingress vtr is located here because the same config data is used for
+ * the egress EFP filter check
+ */
+ vtr_config_t input_vtr;
+ vtr_config_t output_vtr;
+ ptr_config_t input_pbb_vtr;
+ ptr_config_t output_pbb_vtr;
+
+ /* some of these flags may get integrated into the feature bitmap */
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ /* split horizon group */
+ u8 shg;
+
+ /* flag for output vtr operation */
+ u8 out_vtr_flag;
+
+} l2_output_config_t;
+
+typedef struct
+{
+ /*
+ * vector of output next node index, indexed by sw_if_index.
+ * used when all output features have been executed and the
+ * next nodes are the interface output nodes.
+ */
+ u32 *output_node_index_vec;
+
+ /*
+ * array of next node index for each output feature, indexed
+ * by l2output_feat_t. Used to determine next feature node.
+ */
+ u32 l2_out_feat_next[32];
+
+ /* config vector indexed by sw_if_index */
+ l2_output_config_t *configs;
+
+ /* Convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2output_main_t;
+
+l2output_main_t l2output_main;
+
+extern vlib_node_registration_t l2output_node;
+
+/* L2 output features */
+
+/* Mappings from feature ID to graph node name in reverse order */
+#define foreach_l2output_feat \
+ _(OUTPUT, "interface-output") \
+ _(SPAN, "span-l2-output") \
+ _(CFM, "feature-bitmap-drop") \
+ _(QOS, "feature-bitmap-drop") \
+ _(ACL, "l2-output-acl") \
+ _(L2PT, "feature-bitmap-drop") \
+ _(EFP_FILTER, "l2-efp-filter") \
+ _(IPIW, "feature-bitmap-drop") \
+ _(STP_BLOCKED, "feature-bitmap-drop") \
+ _(LINESTATUS_DOWN, "feature-bitmap-drop") \
+ _(OUTPUT_CLASSIFY, "l2-output-classify") \
+ _(XCRW, "l2-xcrw")
+
+/* Feature bitmap positions */
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT,
+ foreach_l2output_feat
+#undef _
+ L2OUTPUT_N_FEAT,
+} l2output_feat_t;
+
+STATIC_ASSERT (L2OUTPUT_N_FEAT <= 32, "too many l2 output features");
+
+/* Feature bit masks */
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT),
+ foreach_l2output_feat
+#undef _
+} l2output_feat_masks_t;
+
+#define foreach_l2output_error \
+_(L2OUTPUT, "L2 output packets") \
+_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \
+_(VTR_DROP, "L2 output tag rewrite drops") \
+_(SHG_DROP, "L2 split horizon drops") \
+_(DROP, "L2 output drops") \
+_(MAPPING_DROP, "L2 Output interface not valid")
+
+typedef enum
+{
+ L2OUTPUT_NEXT_DROP,
+ L2OUTPUT_NEXT_BAD_INTF,
+ L2OUTPUT_N_NEXT,
+} l2output_next_t;
+
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_ERROR_##sym,
+ foreach_l2output_error
+#undef _
+ L2OUTPUT_N_ERROR,
+} l2output_error_t;
+
+/* Return an array of strings containing graph node names of each feature */
+char **l2output_get_feat_names (void);
+
+/* arg0 - u32 feature_bitmap */
+u8 *format_l2_output_features (u8 * s, va_list * args);
+
+/**
+ * The next set of functions is for use by output feature graph nodes.
+ * When the last bit has been cleared from the output feature bitmap,
+ * the next node is the output graph node for the TX sw_if_index.
+ * These functions help the feature nodes get that node index.
+ */
+
+/* Create a mapping to the output graph node for the given sw_if_index */
+void l2output_create_output_node_mapping (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index);
+
+/** Get a pointer to the config for the given interface */
+l2_output_config_t *l2output_intf_config (u32 sw_if_index);
+
+/** Enable (or disable) the feature in the bitmap for the given interface */
+void l2output_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap, u32 enable);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output_acl.c b/src/vnet/l2/l2_output_acl.c
new file mode 100644
index 00000000..7d051326
--- /dev/null
+++ b/src/vnet/l2/l2_output_acl.c
@@ -0,0 +1,341 @@
+/*
+ * l2_output_acl.c : layer 2 output acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct
+{
+ /* Next nodes for L2 output features */
+ u32 l2_out_feat_next[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_outacl_main_t;
+
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2_outacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_outacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_outacl_trace_t *t = va_arg (*args, l2_outacl_trace_t *);
+
+ s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2_outacl_main_t l2_outacl_main;
+
+static vlib_node_registration_t l2_outacl_node;
+
+#define foreach_l2_outacl_error \
+_(L2_OUTACL, "L2 output ACL packets") \
+_(DROP, "L2 output drops")
+
+typedef enum
+{
+#define _(sym,str) L2_OUTACL_ERROR_##sym,
+ foreach_l2_outacl_error
+#undef _
+ L2_OUTACL_N_ERROR,
+} l2_outacl_error_t;
+
+static char *l2_outacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_outacl_error
+#undef _
+};
+
+typedef enum
+{
+ L2_OUTACL_NEXT_DROP,
+ L2_OUTACL_N_NEXT,
+} l2_outacl_next_t;
+
+
+
+static uword
+l2_outacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_outacl_next_t next_index;
+ l2_outacl_main_t *msm = &l2_outacl_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t *h0, *h1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] +=
+ 2;
+
+ /* add core loop code here */
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] +=
+ 1;
+
+ /*
+ * L2_OUTACL code
+ * Dummy for now, just go to next feature node
+ */
+
+ /* Determine next node */
+ next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next,
+ L2OUTPUT_FEAT_ACL);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_outacl_node,static) = {
+ .function = l2_outacl_node_fn,
+ .name = "l2-output-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_outacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_outacl_error_strings),
+ .error_strings = l2_outacl_error_strings,
+
+ .n_next_nodes = L2_OUTACL_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_OUTACL_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_outacl_node, l2_outacl_node_fn)
+ clib_error_t *l2_outacl_init (vlib_main_t * vm)
+{
+ l2_outacl_main_t *mp = &l2_outacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_outacl_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->l2_out_feat_next);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_outacl_init);
+
+#if 0
+/** @todo maybe someone will add output ACL's in the future.
+ * Set subinterface outacl enable/disable.
+ * The CLI format is:
+ * set interface acl output <interface> [disable]
+ */
+static clib_error_t *
+int_l2_outacl (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_ACL, enable);
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = {
+ .path = "set interface acl output",
+ .short_help = "set interface acl output <interface> [disable]",
+ .function = int_l2_outacl,
+};
+/* *INDENT-ON* */
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
new file mode 100644
index 00000000..a49abec2
--- /dev/null
+++ b/src/vnet/l2/l2_output_classify.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/api_errno.h>
+
+/**
+ * @file
+ * @brief Layer 2 Output Classifier.
+ *
+ * @sa @ref vnet/vnet/classify/vnet_classify.c
+ * @sa @ref vnet/vnet/classify/vnet_classify.h
+ */
+
+typedef struct
+{
+ /** interface handle for the ith packet */
+ u32 sw_if_index;
+ /** graph arc index selected for this packet */
+ u32 next_index;
+ /** classifier table which provided the final result */
+ u32 table_index;
+ /** offset in classifier heap of the corresponding session */
+ u32 session_offset;
+} l2_output_classify_trace_t;
+
+typedef struct
+{
+ /** use-case independent main object pointer */
+ vnet_classify_main_t *vcm;
+ /** l2 input classifier main object pointer */
+ l2_output_classify_main_t *l2cm;
+} l2_output_classify_runtime_t;
+
+/** Packet trace format function. */
+static u8 *
+format_l2_output_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_output_classify_trace_t *t =
+ va_arg (*args, l2_output_classify_trace_t *);
+
+ s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d",
+ t->sw_if_index, t->table_index, t->session_offset,
+ t->next_index);
+ return s;
+}
+
+/** l2 output classifier main data structure. */
+l2_output_classify_main_t l2_output_classify_main;
+
+vlib_node_registration_t l2_output_classify_node;
+
+#define foreach_l2_output_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk") \
+_(DROP, "L2 Classify Drops")
+
+typedef enum
+{
+#define _(sym,str) L2_OUTPUT_CLASSIFY_ERROR_##sym,
+ foreach_l2_output_classify_error
+#undef _
+ L2_OUTPUT_CLASSIFY_N_ERROR,
+} l2_output_classify_error_t;
+
+static char *l2_output_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_output_classify_error
+#undef _
+};
+
+/**
+ * @brief l2 output classifier node.
+ * @node l2-output-classify
+ *
+ * This is the l2 output classifier dispatch node
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * @em Uses:
+ * - <code>(l2_output_classify_runtime_t *)
+ * rt->classify_table_index_by_sw_if_index</code>
+ * Head of the per-interface, perprotocol classifier table chain
+ * for a specific interface. ~0 => send pkts to the next
+ * feature in the L2 feature chain.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ * - Indicates the @c sw_if_index value of the interface that the
+ * packet was received on.
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>(vnet_classify_entry_t) e0->next_index</code>
+ * - Used to steer traffic when the classifier hits on a session
+ * - <code>(vnet_classify_entry_t) e0->advance</code>
+ * - Signed quantity applied via <code>vlib_buffer_advance</code>
+ * when the classifier hits on a session
+ * - <code>(vnet_classify_table_t) t0->miss_next_index</code>
+ * - Used to steer traffic when the classifier misses
+ *
+ * @em Sets:
+ * - <code>vnet_buffer (b0)->l2_classify.table_index</code>
+ * - Classifier table index of the first classifier table in
+ * the classifier table chain
+ * - <code>vnet_buffer (b0)->l2_classify.hash</code>
+ * - Bounded-index extensible hash corresponding to the
+ * masked fields in the current packet
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code>
+ * - Copied from the classifier session object upon classifier hit
+ *
+ * @em Counters:
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_MISS</code> Classifier misses
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_HIT</code> Classifier hits
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT</code>
+ * Classifier hits in other than the first table
+ */
+
+static uword
+l2_output_classify_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_output_classify_next_t next_index;
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+ l2_output_classify_runtime_t *rt =
+ (l2_output_classify_runtime_t *) node->runtime_data;
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ f64 now;
+ u32 n_next_nodes;
+ u32 sw_if_index0;
+
+ n_next_nodes = node->n_next_nodes;
+
+ now = vlib_time_now (vm);
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ /* First pass: compute hash */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ ethernet_header_t *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 type0, type1;
+ int type_index0, type_index1;
+ vnet_classify_table_t *t0, *t1;
+ u32 table_index0, table_index1;
+ u64 hash0, hash1;
+
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ vnet_buffer (b1)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+ type1 = clib_net_to_host_u16 (h1->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ type_index1 = (type1 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index1 = (type1 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index1;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+
+ vnet_buffer (b1)->l2_classify.table_index =
+ table_index1 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index1][sw_if_index1];
+
+ if (table_index1 != ~0)
+ {
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer (b1)->l2_classify.hash = hash1 =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+ vnet_classify_prefetch_bucket (t1, hash1);
+ }
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ ethernet_header_t *h0;
+ u16 type0;
+ u32 type_index0;
+ vnet_classify_table_t *t0;
+ u32 table_index0;
+ u64 hash0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 = rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0;
+ ethernet_header_t *h0;
+ u32 table_index0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
+ u64 phash2;
+ u32 table_index2;
+ vnet_classify_table_t *tp2;
+
+ /*
+ * Prefetch table entry two ahead. Buffer / data
+ * were prefetched above...
+ */
+ table_index2 = vnet_buffer (p2)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index2 != ~0))
+ {
+ tp2 = pool_elt_at_index (vcm->tables, table_index2);
+ phash2 = vnet_buffer (p2)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp2, phash2);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 =
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (next0 == 0))
+ b0->error = node->errors[L2_OUTPUT_CLASSIFY_ERROR_DROP];
+
+ /* Determine the next node and remove ourself from bitmap */
+ if (PREDICT_FALSE (next0 == ~0))
+ next0 = vnet_l2_feature_next (b0, cm->l2_out_feat_next,
+ L2OUTPUT_FEAT_OUTPUT_CLASSIFY);
+ else
+ vnet_buffer (b0)->l2.feature_bitmap &=
+ ~L2OUTPUT_FEAT_OUTPUT_CLASSIFY;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_output_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ t->table_index = table_index0;
+ t->next_index = next0;
+ t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_output_classify_node) = {
+ .function = l2_output_classify_node_fn,
+ .name = "l2-output-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_output_classify_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_output_classify_error_strings),
+ .error_strings = l2_output_classify_error_strings,
+
+ .runtime_data_bytes = sizeof (l2_output_classify_runtime_t),
+
+ .n_next_nodes = L2_OUTPUT_CLASSIFY_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_output_classify_node,
+ l2_output_classify_node_fn);
+
+/** l2 output classsifier feature initialization. */
+clib_error_t *
+l2_output_classify_init (vlib_main_t * vm)
+{
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ l2_output_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index);
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+ cm->vnet_classify_main = &vnet_classify_main;
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_output_classify_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ cm->l2_out_feat_next);
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_output_classify_init);
+
+clib_error_t *
+l2_output_classify_worker_init (vlib_main_t * vm)
+{
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ l2_output_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index);
+
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2_output_classify_worker_init);
+
+/** Enable/disable l2 input classification on a specific interface. */
+void
+vnet_l2_output_classify_enable_disable (u32 sw_if_index, int enable_disable)
+{
+
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_OUTPUT_CLASSIFY,
+ (u32) enable_disable);
+}
+
+/** @brief Set l2 per-protocol, per-interface output classification tables.
+ *
+ * @param sw_if_index interface handle
+ * @param ip4_table_index ip4 classification table index, or ~0
+ * @param ip6_table_index ip6 classification table index, or ~0
+ * @param other_table_index non-ip4, non-ip6 classification table index,
+ * or ~0
+ * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3
+ * if the indicated (non-~0) table does not exist.
+ */
+
+int
+vnet_l2_output_classify_set_tables (u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index)
+{
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ if (ip4_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip4_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ if (ip6_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip6_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE2;
+
+ if (other_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, other_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE3;
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER],
+ sw_if_index);
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index] = ip4_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index] = ip6_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER]
+ [sw_if_index] = other_table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+int_l2_output_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (input, "other-table %d", &other_table_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface must be specified");
+
+
+ if (ip4_table_index == ~0 && ip6_table_index == ~0
+ && other_table_index == ~0)
+ {
+ vlib_cli_output (vm, "L2 classification disabled");
+ vnet_l2_output_classify_enable_disable (sw_if_index, 0 /* enable */ );
+ return 0;
+ }
+
+ rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index,
+ other_table_index);
+ switch (rv)
+ {
+ case 0:
+ vnet_l2_output_classify_enable_disable (sw_if_index, 1 /* enable */ );
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_l2_output_classify_set_tables: %d",
+ rv);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure Layer 2 output classification.
+ *
+ * @cliexpar
+ * @cliexstart{set interface l2 output classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
+ .path = "set interface l2 output classify",
+ .short_help =
+ "set interface l2 output classify intfc <<interface-name>> [ip4-table <n>]\n"
+ " [ip6-table <n>] [other-table <n>]",
+ .function = int_l2_output_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
new file mode 100644
index 00000000..ff3d2f3a
--- /dev/null
+++ b/src/vnet/l2/l2_patch.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/feature/feature.h>
+#include <vppinfra/error.h>
+
+typedef struct
+{
+ /* vector of dispositions, indexed by rx_sw_if_index */
+ u32 *tx_next_by_rx_sw_if_index;
+ u32 *tx_sw_if_index_by_rx_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_patch_main_t;
+
+typedef struct
+{
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+} l2_patch_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_patch_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_patch_trace_t *t = va_arg (*args, l2_patch_trace_t *);
+
+ s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index,
+ t->tx_sw_if_index);
+ return s;
+}
+
+l2_patch_main_t l2_patch_main;
+
+static vlib_node_registration_t l2_patch_node;
+
+#define foreach_l2_patch_error \
+_(PATCHED, "L2 patch packets") \
+_(DROPPED, "L2 patch misconfigured drops")
+
+typedef enum
+{
+#define _(sym,str) L2_PATCH_ERROR_##sym,
+ foreach_l2_patch_error
+#undef _
+ L2_PATCH_N_ERROR,
+} l2_patch_error_t;
+
+static char *l2_patch_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_patch_error
+#undef _
+};
+
+typedef enum
+{
+ L2_PATCH_NEXT_DROP,
+ L2_PATCH_N_NEXT,
+} l2_patch_next_t;
+
+static uword
+l2_patch_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_patch_next_t next_index;
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* So stupid / simple, we don't need to prefetch data */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0);
+
+ next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
+ next1 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index1];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index1;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+
+ next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] +=
+ frame->n_vectors;
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_patch_node, static) = {
+ .function = l2_patch_node_fn,
+ .name = "l2-patch",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_patch_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_patch_error_strings),
+ .error_strings = l2_patch_error_strings,
+
+ .n_next_nodes = L2_PATCH_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_PATCH_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_patch_node, l2_patch_node_fn)
+ int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index,
+ int is_add)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ vnet_hw_interface_t *rxhi, *txhi;
+ u32 tx_next_index;
+
+ /*
+ * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX
+ * macros...
+ */
+
+ rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index);
+
+ /* Make sure caller didn't pass a vlan subif, etc. */
+ if (rxhi->sw_if_index != rx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index);
+ if (txhi->sw_if_index != tx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
+
+ if (is_add)
+ {
+ tx_next_index = vlib_node_add_next (l2pm->vlib_main,
+ l2_patch_node.index,
+ txhi->output_node_index);
+
+ vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index;
+ vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]
+ = txhi->sw_if_index;
+
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ vnet_feature_enable_disable ("device-input", "l2-patch",
+ rxhi->hw_if_index, 1, 0, 0);
+ }
+ else
+ {
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ 0 /* disable promiscuous mode */ );
+
+ vnet_feature_enable_disable ("device-input", "l2-patch",
+ rxhi->hw_if_index, 0, 0, 0);
+ if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
+ {
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+test_patch_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 rx_sw_if_index, tx_sw_if_index;
+ int rv;
+ int rx_set = 0;
+ int tx_set = 0;
+ int is_add = 1;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "rx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &rx_sw_if_index))
+ rx_set = 1;
+ else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &tx_sw_if_index))
+ tx_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (rx_set == 0)
+ {
+ error = clib_error_return (0, "rx interface not set");
+ goto done;
+ }
+
+ if (tx_set == 0)
+ {
+ error = clib_error_return (0, "tx interface not set");
+ goto done;
+ }
+
+ rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ error = clib_error_return (0, "rx interface not a physical port");
+ goto done;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX_2:
+ error = clib_error_return (0, "tx interface not a physical port");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "WARNING: vnet_l2_patch_add_del returned %d", rv);
+ goto done;
+ }
+
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Create or delete a Layer 2 patch.
+ *
+ * @cliexpar
+ * @cliexstart{test l2patch rx <intfc> tx <intfc> [del]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_patch_command, static) = {
+ .path = "test l2patch",
+ .short_help = "test l2patch rx <intfc> tx <intfc> [del]",
+ .function = test_patch_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Display the contents of the l2patch table. */
+static clib_error_t *
+show_l2patch (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ u32 rx_sw_if_index;
+ u32 no_entries = 1;
+
+ ASSERT (vec_len (l2pm->tx_next_by_rx_sw_if_index) ==
+ vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index));
+
+ for (rx_sw_if_index = 0;
+ rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index);
+ rx_sw_if_index++)
+ {
+ u32 tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index];
+ if (tx_sw_if_index != ~0)
+ {
+ no_entries = 0;
+ vlib_cli_output (vm, "%26U -> %U",
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main, rx_sw_if_index,
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main, tx_sw_if_index);
+ }
+ }
+
+ if (no_entries)
+ vlib_cli_output (vm, "no l2patch entries");
+
+ return 0;
+}
+
+/*?
+ * Show Layer 2 patch entries.
+ *
+ * @cliexpar
+ * @cliexstart{show l2patch}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
+ .path = "show l2patch",
+ .short_help = "Show l2 interface cross-connect entries",
+ .function = show_l2patch,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+l2_patch_init (vlib_main_t * vm)
+{
+ l2_patch_main_t *mp = &l2_patch_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_patch_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
new file mode 100644
index 00000000..fec04774
--- /dev/null
+++ b/src/vnet/l2/l2_rw.c
@@ -0,0 +1,710 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_rw.h>
+
+/**
+ * @file
+ * @brief Layer 2 Rewrite.
+ *
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ */
+
+
+l2_rw_main_t l2_rw_main;
+
+vlib_node_registration_t l2_rw_node;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 classify_table_index;
+ u32 rewrite_entry_index;
+} l2_rw_trace_t;
+
+static u8 *
+format_l2_rw_entry (u8 * s, va_list * args)
+{
+ l2_rw_entry_t *e = va_arg (*args, l2_rw_entry_t *);
+ l2_rw_main_t *rw = &l2_rw_main;
+ s = format (s, "%d - mask:%U value:%U\n",
+ e - rw->entries,
+ format_hex_bytes, e->mask,
+ e->rewrite_n_vectors * sizeof (u32x4), format_hex_bytes,
+ e->value, e->rewrite_n_vectors * sizeof (u32x4));
+ s =
+ format (s, " hits:%d skip_bytes:%d", e->hit_count,
+ e->skip_n_vectors * sizeof (u32x4));
+ return s;
+}
+
+static u8 *
+format_l2_rw_config (u8 * s, va_list * args)
+{
+ l2_rw_config_t *c = va_arg (*args, l2_rw_config_t *);
+ return format (s, "table-index:%d miss-index:%d",
+ c->table_index, c->miss_index);
+}
+
+/* packet trace format function */
+static u8 *
+format_l2_rw_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_rw_trace_t *t = va_arg (*args, l2_rw_trace_t *);
+ return format (s, "l2-rw: sw_if_index %d, table %d, entry %d",
+ t->sw_if_index, t->classify_table_index,
+ t->rewrite_entry_index);
+}
+
+always_inline l2_rw_config_t *
+l2_rw_get_config (u32 sw_if_index)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ if (PREDICT_FALSE (!clib_bitmap_get (rw->configs_bitmap, sw_if_index)))
+ {
+ vec_validate (rw->configs, sw_if_index);
+ rw->configs[sw_if_index].table_index = ~0;
+ rw->configs[sw_if_index].miss_index = ~0;
+ rw->configs_bitmap =
+ clib_bitmap_set (rw->configs_bitmap, sw_if_index, 1);
+ }
+ return &rw->configs[sw_if_index];
+}
+
+static_always_inline void
+l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
+{
+ if (U32X4_ALIGNED (h))
+ {
+ u32x4 *d = ((u32x4 *) h) + rwe->skip_n_vectors;
+ switch (rwe->rewrite_n_vectors)
+ {
+ case 5:
+ d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4];
+ /* FALLTHROUGH */
+ case 4:
+ d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3];
+ /* FALLTHROUGH */
+ case 3:
+ d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2];
+ /* FALLTHROUGH */
+ case 2:
+ d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1];
+ /* FALLTHROUGH */
+ case 1:
+ d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
+ break;
+ default:
+ abort ();
+ }
+ }
+ else
+ {
+ u64 *d = ((u64 *) h) + rwe->skip_n_vectors * 2;
+ switch (rwe->rewrite_n_vectors)
+ {
+ case 5:
+ d[8] =
+ (d[8] & ~(((u64 *) rwe->mask)[8])) | (((u64 *) rwe->value)[8]);
+ d[9] =
+ (d[9] & ~(((u64 *) rwe->mask)[9])) | (((u64 *) rwe->value)[9]);
+ /* FALLTHROUGH */
+ case 4:
+ d[6] =
+ (d[6] & ~(((u64 *) rwe->mask)[6])) | (((u64 *) rwe->value)[6]);
+ d[7] =
+ (d[7] & ~(((u64 *) rwe->mask)[7])) | (((u64 *) rwe->value)[7]);
+ /* FALLTHROUGH */
+ case 3:
+ d[4] =
+ (d[4] & ~(((u64 *) rwe->mask)[4])) | (((u64 *) rwe->value)[4]);
+ d[5] =
+ (d[5] & ~(((u64 *) rwe->mask)[5])) | (((u64 *) rwe->value)[5]);
+ /* FALLTHROUGH */
+ case 2:
+ d[2] =
+ (d[2] & ~(((u64 *) rwe->mask)[2])) | (((u64 *) rwe->value)[2]);
+ d[3] =
+ (d[3] & ~(((u64 *) rwe->mask)[3])) | (((u64 *) rwe->value)[3]);
+ /* FALLTHROUGH */
+ case 1:
+ d[0] =
+ (d[0] & ~(((u64 *) rwe->mask)[0])) | (((u64 *) rwe->value)[0]);
+ d[1] =
+ (d[1] & ~(((u64 *) rwe->mask)[1])) | (((u64 *) rwe->value)[1]);
+ break;
+ default:
+ abort ();
+ }
+ }
+}
+
+static uword
+l2_rw_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ u32 n_left_from, *from, *to_next, next_index;
+ vnet_classify_main_t *vcm = &vnet_classify_main;
+ f64 now = vlib_time_now (vlib_get_main ());
+ u32 prefetch_size = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, next0, sw_if_index0, rwe_index0;
+ u32 bi1, next1, sw_if_index1, rwe_index1;
+ vlib_buffer_t *b0, *b1;
+ ethernet_header_t *h0, *h1;
+ l2_rw_config_t *config0, *config1;
+ u64 hash0, hash1;
+ vnet_classify_table_t *t0, *t1;
+ vnet_classify_entry_t *e0, *e1;
+ l2_rw_entry_t *rwe0, *rwe1;
+
+ {
+ vlib_buffer_t *p2, *p3;
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_current (p2), prefetch_size, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_current (p3), prefetch_size, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */
+ config1 = l2_rw_get_config (sw_if_index1); /*TODO: check sw_if_index0 value */
+ t0 = pool_elt_at_index (vcm->tables, config0->table_index);
+ t1 = pool_elt_at_index (vcm->tables, config1->table_index);
+ prefetch_size =
+ (t1->skip_n_vectors + t1->match_n_vectors) * sizeof (u32x4);
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);
+
+ while (!e0 && (t0->next_table_index != ~0))
+ {
+ t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ }
+
+ while (!e1 && (t1->next_table_index != ~0))
+ {
+ t1 = pool_elt_at_index (vcm->tables, t1->next_table_index);
+ hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
+ e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);
+ }
+
+ rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;
+ rwe_index1 = e1 ? e1->opaque_index : config1->miss_index;
+
+ if (rwe_index0 != ~0)
+ {
+ rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
+ l2_rw_rewrite (rwe0, (u8 *) h0);
+ }
+ if (rwe_index1 != ~0)
+ {
+ rwe1 = pool_elt_at_index (rw->entries, rwe_index1);
+ l2_rw_rewrite (rwe1, (u8 *) h1);
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->classify_table_index = config0->table_index;
+ t->rewrite_entry_index = rwe_index0;
+ }
+
+ if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->classify_table_index = config1->table_index;
+ t->rewrite_entry_index = rwe_index1;
+ }
+
+ /* Update feature bitmap and get next feature index */
+ next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index,
+ L2INPUT_FEAT_RW);
+ next1 = vnet_l2_feature_next (b1, rw->feat_next_node_index,
+ L2INPUT_FEAT_RW);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0, sw_if_index0, rwe_index0;
+ vlib_buffer_t *b0;
+ ethernet_header_t *h0;
+ l2_rw_config_t *config0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ l2_rw_entry_t *rwe0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */
+ t0 = pool_elt_at_index (vcm->tables, config0->table_index);
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+
+ while (!e0 && (t0->next_table_index != ~0))
+ {
+ t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ }
+
+ rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;
+
+ if (rwe_index0 != ~0)
+ {
+ rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
+ l2_rw_rewrite (rwe0, (u8 *) h0);
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->classify_table_index = config0->table_index;
+ t->rewrite_entry_index = rwe_index0;
+ }
+
+ /* Update feature bitmap and get next feature index */
+ next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index,
+ L2INPUT_FEAT_RW);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+int
+l2_rw_mod_entry (u32 * index,
+ u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ l2_rw_entry_t *e = 0;
+ if (*index != ~0)
+ {
+ if (pool_is_free_index (rw->entries, *index))
+ {
+ return -1;
+ }
+ e = pool_elt_at_index (rw->entries, *index);
+ }
+ else
+ {
+ pool_get (rw->entries, e);
+ *index = e - rw->entries;
+ }
+
+ if (!e)
+ return -1;
+
+ if (is_del)
+ {
+ pool_put (rw->entries, e);
+ return 0;
+ }
+
+ e->skip_n_vectors = skip / sizeof (u32x4);
+ skip -= e->skip_n_vectors * sizeof (u32x4);
+ e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
+ vec_alloc_aligned (e->mask, e->rewrite_n_vectors, sizeof (u32x4));
+ memset (e->mask, 0, e->rewrite_n_vectors * sizeof (u32x4));
+ vec_alloc_aligned (e->value, e->rewrite_n_vectors, sizeof (u32x4));
+ memset (e->value, 0, e->rewrite_n_vectors * sizeof (u32x4));
+
+ clib_memcpy (((u8 *) e->value) + skip, value, len);
+ clib_memcpy (((u8 *) e->mask) + skip, mask, len);
+
+ int i;
+ for (i = 0; i < e->rewrite_n_vectors; i++)
+ {
+ e->value[i] &= e->mask[i];
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_entry_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 index = ~0;
+ u8 *mask = 0;
+ u8 *value = 0;
+ u32 skip = 0;
+ u8 del = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "index %d", &index))
+ ;
+ else if (unformat (input, "mask %U", unformat_hex_string, &mask))
+ ;
+ else if (unformat (input, "value %U", unformat_hex_string, &value))
+ ;
+ else if (unformat (input, "skip %d", &skip))
+ ;
+ else if (unformat (input, "del"))
+ del = 1;
+ else
+ break;
+ }
+
+ if (!mask || !value)
+ return clib_error_return (0, "Unspecified mask or value");
+
+ if (vec_len (mask) != vec_len (value))
+ return clib_error_return (0, "Mask and value lengths must be identical");
+
+ int ret;
+ if ((ret =
+ l2_rw_mod_entry (&index, mask, value, vec_len (mask), skip, del)))
+ return clib_error_return (0, "Could not add entry");
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
+ .path = "l2 rewrite entry",
+ .short_help =
+ "l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
+ .function = l2_rw_entry_cli_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2_rw_interface_set_table (u32 sw_if_index, u32 table_index, u32 miss_index)
+{
+ l2_rw_config_t *c = l2_rw_get_config (sw_if_index);
+ l2_rw_main_t *rw = &l2_rw_main;
+
+ c->table_index = table_index;
+ c->miss_index = miss_index;
+ u32 feature_bitmap = (table_index == ~0) ? 0 : L2INPUT_FEAT_RW;
+
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_RW, feature_bitmap);
+
+ if (c->table_index == ~0)
+ clib_bitmap_set (rw->configs_bitmap, sw_if_index, 0);
+
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_interface_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 table_index = ~0;
+ u32 sw_if_index = ~0;
+ u32 miss_index = ~0;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index);
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_index))
+ ;
+ else if (unformat (input, "miss-index %d", &miss_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "You must specify an interface 'iface <interface>'",
+ format_unformat_error, input);
+ int ret;
+ if ((ret =
+ l2_rw_interface_set_table (sw_if_index, table_index, miss_index)))
+ return clib_error_return (0, "l2_rw_interface_set_table returned %d",
+ ret);
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
+ .path = "set interface l2 rewrite",
+ .short_help =
+ "set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
+ .function = l2_rw_interface_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ if (clib_bitmap_count_set_bits (rw->configs_bitmap) == 0)
+ vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");
+
+ uword i;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach(i, rw->configs_bitmap, {
+ vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
+ });
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
+ .path = "show l2 rewrite interfaces",
+ .short_help =
+ "show l2 rewrite interfaces",
+ .function = l2_rw_show_interfaces_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_show_entries_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ l2_rw_entry_t *e;
+ if (pool_elts (rw->entries) == 0)
+ vlib_cli_output (vm, "No entries\n");
+
+ /* *INDENT-OFF* */
+ pool_foreach(e, rw->entries, {
+ vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
+ });
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
+ .path = "show l2 rewrite entries",
+ .short_help =
+ "show l2 rewrite entries",
+ .function = l2_rw_show_entries_cli_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2_rw_enable_disable (u32 bridge_domain, u8 disable)
+{
+ u32 mask = L2INPUT_FEAT_RW;
+ l2input_set_bridge_features (bridge_domain, mask, disable ? 0 : mask);
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_set_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 bridge_domain;
+ u8 disable = 0;
+
+ if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT ||
+ !unformat (input, "%d", &bridge_domain))
+ {
+ return clib_error_return (0, "You must specify a bridge domain");
+ }
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT &&
+ unformat (input, "disable"))
+ {
+ disable = 1;
+ }
+
+ if (l2_rw_enable_disable (bridge_domain, disable))
+ return clib_error_return (0, "Could not enable or disable rewrite");
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
+ .path = "set bridge-domain rewrite",
+ .short_help =
+ "set bridge-domain rewrite <bridge-domain> [disable]",
+ .function = l2_rw_set_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_init (vlib_main_t * vm)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ rw->configs = 0;
+ rw->entries = 0;
+ clib_bitmap_alloc (rw->configs_bitmap, 1);
+ feat_bitmap_init_next_nodes (vm,
+ l2_rw_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ rw->feat_next_node_index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_rw_init);
+
+enum
+{
+ L2_RW_NEXT_DROP,
+ L2_RW_N_NEXT,
+};
+
+#define foreach_l2_rw_error \
+_(UNKNOWN, "Unknown error")
+
+typedef enum
+{
+#define _(sym,str) L2_RW_ERROR_##sym,
+ foreach_l2_rw_error
+#undef _
+ L2_RW_N_ERROR,
+} l2_rw_error_t;
+
+static char *l2_rw_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_rw_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_rw_node) = {
+ .function = l2_rw_node_fn,
+ .name = "l2-rw",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_rw_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(l2_rw_error_strings),
+ .error_strings = l2_rw_error_strings,
+ .runtime_data_bytes = 0,
+ .n_next_nodes = L2_RW_N_NEXT,
+ .next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_rw_node, l2_rw_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h
new file mode 100644
index 00000000..49aa25fb
--- /dev/null
+++ b/src/vnet/l2/l2_rw.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * l2_rw is based on vnet classifier and provides a way
+ * to modify packets matching a given table.
+ *
+ * Tables must be created using vnet's classify features.
+ * Entries contained within these tables must have their
+ * opaque index set to the rewrite entry created with l2_rw_mod_entry.
+ */
+
+#ifndef L2_RW_H_
+#define L2_RW_H_
+
+#include <vnet/l2/l2_input.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct _l2_rw_entry {
+ u16 skip_n_vectors;
+ u16 rewrite_n_vectors;
+ u64 hit_count;
+ u32x4 *mask;
+ u32x4 *value;
+}) l2_rw_entry_t;
+/* *INDENT-ON* */
+
+/* l2_rw configuration for one interface */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct _l2_rw_config {
+ u32 table_index; /* Which classify table to use */
+ u32 miss_index; /* Rewrite entry to use if table does not match */
+}) l2_rw_config_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* Next feature node indexes */
+ u32 feat_next_node_index[32];
+
+ /* A pool of entries */
+ l2_rw_entry_t *entries;
+
+ /* Config vector indexed by sw_if_index */
+ l2_rw_config_t *configs;
+ uword *configs_bitmap;
+} l2_rw_main_t;
+
+extern l2_rw_main_t l2_rw_main;
+
+/*
+ * Specifies which classify table and miss_index should be used
+ * with the given interface.
+ * Use special values ~0 in order to un-set table_index
+ * or miss_index.
+ * l2_rw feature is automatically enabled for the interface
+ * when table_index or miss_index is not ~0.
+ * returns 0 on success and something else on error.
+ */
+int l2_rw_interface_set_table (u32 sw_if_index,
+ u32 table_index, u32 miss_index);
+
+/*
+ * Creates, modifies or delete a rewrite entry.
+ * If *index != ~0, modifies an existing entry (or simply
+ * deletes it if is_del is set).
+ * If *index == ~0, creates a new entry and the created
+ * entry index is stored in *index (Does nothing if is_del
+ * is set).
+ * returns 0 on success and something else on error.
+ */
+int l2_rw_mod_entry (u32 * index,
+ u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del);
+
+#endif /* L2_FW_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c
new file mode 100644
index 00000000..02a68991
--- /dev/null
+++ b/src/vnet/l2/l2_vtr.c
@@ -0,0 +1,831 @@
+/*
+ * l2_vtr.c : layer 2 vlan tag rewrite configuration
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vlib/cli.h>
+
+/**
+ * @file
+ * @brief Ethernet VLAN Tag Rewrite.
+ *
+ * VLAN tag rewrite provides the ability to change the VLAN tags on a packet.
+ * Existing tags can be popped, new tags can be pushed, and existing tags can
+ * be swapped with new tags. The rewrite feature is attached to a subinterface
+ * as input and output operations. The input operation is explicitly configured.
+ * The output operation is the symmetric opposite and is automatically derived
+ * from the input operation.
+ */
+
+/** Just a placeholder; ensures file is not eliminated by linker. */
+clib_error_t *
+l2_vtr_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_vtr_init);
+
+u32
+l2pbb_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op,
+ u8 * b_dmac, u8 * b_smac,
+ u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag)
+{
+ u32 error = 0;
+ u32 enable = 0;
+
+ l2_output_config_t *config = 0;
+ vnet_hw_interface_t *hi;
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ if (!hi)
+ {
+ error = VNET_API_ERROR_INVALID_INTERFACE;
+ goto done;
+ }
+
+ // Config for this interface should be already initialized
+ ptr_config_t *in_config;
+ ptr_config_t *out_config;
+ config = vec_elt_at_index (l2output_main.configs, sw_if_index);
+ in_config = &(config->input_pbb_vtr);
+ out_config = &(config->output_pbb_vtr);
+
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 0;
+ out_config->pop_bytes = 0;
+ out_config->push_bytes = 0;
+ enable = (vtr_op != L2_VTR_DISABLED);
+
+ if (!enable)
+ goto done;
+
+ if (vtr_op == L2_VTR_POP_2)
+ {
+ in_config->pop_bytes = sizeof (ethernet_pbb_header_packed_t);
+ }
+ else if (vtr_op == L2_VTR_PUSH_2)
+ {
+ clib_memcpy (in_config->macs_tags.b_dst_address, b_dmac,
+ sizeof (in_config->macs_tags.b_dst_address));
+ clib_memcpy (in_config->macs_tags.b_src_address, b_smac,
+ sizeof (in_config->macs_tags.b_src_address));
+ in_config->macs_tags.b_type =
+ clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD);
+ in_config->macs_tags.priority_dei_id =
+ clib_net_to_host_u16 (b_vlanid & 0xFFF);
+ in_config->macs_tags.i_type =
+ clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AH);
+ in_config->macs_tags.priority_dei_uca_res_sid =
+ clib_net_to_host_u32 (i_sid & 0xFFFFF);
+ in_config->push_bytes = sizeof (ethernet_pbb_header_packed_t);
+ }
+ else if (vtr_op == L2_VTR_TRANSLATE_2_2)
+ {
+ /* TODO after PoC */
+ }
+
+ /*
+ * Construct the output tag-rewrite config
+ *
+ * The push/pop values are always reversed
+ */
+ out_config->raw_data = in_config->raw_data;
+ out_config->pop_bytes = in_config->push_bytes;
+ out_config->push_bytes = in_config->pop_bytes;
+
+done:
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable);
+ if (config)
+ config->out_vtr_flag = (u8) enable;
+
+ /* output vtr enable is checked explicitly in l2_output */
+ return error;
+}
+
+/**
+ * Configure vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32
+l2vtr_configure (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op, u32 push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */
+ u32 vtr_tag1, /* first pushed tag */
+ u32 vtr_tag2) /* second pushed tag */
+{
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ u32 hw_no_tags;
+ u32 error = 0;
+ l2_output_config_t *config;
+ vtr_config_t *in_config;
+ vtr_config_t *out_config;
+ u32 enable;
+ u32 push_inner_et;
+ u32 push_outer_et;
+ u32 cfg_tags;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ error = VNET_API_ERROR_INVALID_INTERFACE; /* non-ethernet interface */
+ goto done;
+ }
+
+ /* Init the config for this interface */
+ vec_validate (l2output_main.configs, sw_if_index);
+ config = vec_elt_at_index (l2output_main.configs, sw_if_index);
+ in_config = &(config->input_vtr);
+ out_config = &(config->output_vtr);
+ in_config->raw_tags = 0;
+ out_config->raw_tags = 0;
+
+ /* Get the configured tags for the interface */
+ si = vnet_get_sw_interface (vnet_main, sw_if_index);
+ hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+
+ /* Construct the input tag-rewrite config */
+
+ push_outer_et =
+ clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN :
+ ETHERNET_TYPE_DOT1AD);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (vtr_tag1);
+ vtr_tag2 = clib_net_to_host_u16 (vtr_tag2);
+
+ /* Determine number of vlan tags with explictly configured values */
+ cfg_tags = 0;
+ if (hw_no_tags || si->sub.eth.flags.no_tags)
+ {
+ cfg_tags = 0;
+ }
+ else if (si->sub.eth.flags.one_tag)
+ {
+ cfg_tags = 1;
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ cfg_tags = 0;
+ }
+ }
+ else if (si->sub.eth.flags.two_tags)
+ {
+ cfg_tags = 2;
+ if (si->sub.eth.flags.inner_vlan_id_any)
+ {
+ cfg_tags = 1;
+ }
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ cfg_tags = 0;
+ }
+ }
+
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ in_config->push_and_pop_bytes = 0;
+ break;
+
+ case L2_VTR_POP_1:
+ if (cfg_tags < 1)
+ {
+ /* Need one or two tags */
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT;
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 0;
+ break;
+
+ case L2_VTR_POP_2:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 0;
+ break;
+
+ case L2_VTR_PUSH_1:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_PUSH_2:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_1:
+ if (cfg_tags < 1)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_2:
+ if (cfg_tags < 1)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_1:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_2:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+ }
+
+ /*
+ * Construct the output tag-rewrite config
+ *
+ * The push/pop values are always reversed
+ */
+ out_config->push_bytes = in_config->pop_bytes;
+ out_config->pop_bytes = in_config->push_bytes;
+
+ /* Any pushed tags are derived from the subinterface config */
+ push_outer_et =
+ clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD :
+ ETHERNET_TYPE_VLAN);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id);
+ vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id);
+
+ if (out_config->push_bytes == 4)
+ {
+ out_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[1].type = push_outer_et;
+ }
+ else if (out_config->push_bytes == 8)
+ {
+ out_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[0].type = push_outer_et;
+ out_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ out_config->tags[1].type = push_inner_et;
+ }
+
+ /* set the interface enable flags */
+ enable = (vtr_op != L2_VTR_DISABLED);
+ config->out_vtr_flag = (u8) enable;
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable);
+ /* output vtr enable is checked explicitly in l2_output */
+
+done:
+ return error;
+}
+
+/**
+ * Get vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32
+l2vtr_get (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 * vtr_op, u32 * push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */
+ u32 * vtr_tag1, /* first pushed tag */
+ u32 * vtr_tag2) /* second pushed tag */
+{
+ vnet_hw_interface_t *hi;
+ u32 error = 0;
+ vtr_config_t *in_config;
+
+ if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2)
+ {
+ clib_warning ("invalid arguments");
+ error = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto done;
+ }
+
+ *vtr_op = L2_VTR_DISABLED;
+ *vtr_tag1 = 0;
+ *vtr_tag2 = 0;
+ *push_dot1q = 0;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ /* non-ethernet interface */
+ goto done;
+ }
+
+ if (sw_if_index >= vec_len (l2output_main.configs))
+ {
+ /* no specific config (return disabled) */
+ goto done;
+ }
+
+ /* Get the config for this interface */
+ in_config =
+ &(vec_elt_at_index (l2output_main.configs, sw_if_index)->input_vtr);
+
+ /* DISABLED */
+ if (in_config->push_and_pop_bytes == 0)
+ {
+ goto done;
+ }
+
+ /* find out vtr_op */
+ switch (in_config->pop_bytes)
+ {
+ case 0:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ /* DISABLED */
+ goto done;
+ case 4:
+ *vtr_op = L2_VTR_PUSH_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_PUSH_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 4:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ *vtr_op = L2_VTR_POP_1;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_1_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_1_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 8:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ *vtr_op = L2_VTR_POP_2;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_2_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_2_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ default:
+ clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/**
+ * Set subinterface vtr enable/disable.
+ * The CLI format is:
+ * set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]]
+ *
+ * "push" can also be replaced by "translate-{1|2}-{1|2}"
+ */
+static clib_error_t *
+int_l2_vtr (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 vtr_op;
+ u32 push_dot1q = 0;
+ u32 tag1 = 0, tag2 = 0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ vtr_op = L2_VTR_DISABLED;
+
+ if (unformat (input, "disable"))
+ {
+ vtr_op = L2_VTR_DISABLED;
+ }
+ else if (unformat (input, "pop 1"))
+ {
+ vtr_op = L2_VTR_POP_1;
+ }
+ else if (unformat (input, "pop 2"))
+ {
+ vtr_op = L2_VTR_POP_2;
+
+ }
+ else if (unformat (input, "push dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_PUSH_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_PUSH_2;
+
+ }
+ else if (unformat (input, "push dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_PUSH_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "push dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_PUSH_1;
+
+ }
+ else if (unformat (input, "translate 1-1 dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 1-1 dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+
+ }
+ else if (unformat (input, "translate 2-1 dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 2-1 dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+
+ }
+ else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+
+ }
+ else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n"
+ " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (l2vtr_configure (vm, vnm, sw_if_index, vtr_op, push_dot1q, tag1, tag2))
+ {
+ error =
+ clib_error_return (0,
+ "vlan tag rewrite is not compatible with interface");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * VLAN tag rewrite provides the ability to change the VLAN tags on a packet.
+ * Existing tags can be popped, new tags can be pushed, and existing tags can
+ * be swapped with new tags. The rewrite feature is attached to a subinterface
+ * as input and output operations. The input operation is explicitly configured.
+ * The output operation is the symmetric opposite and is automatically derived
+ * from the input operation.
+ *
+ * <b>POP:</b> For pop operations, the subinterface encapsulation (the vlan
+ * tags specified when it was created) must have at least the number of popped
+ * tags. e.g. the \"pop 2\" operation would be rejected on a single-vlan interface.
+ * The output tag-rewrite operation for pops is to push the specified number of
+ * vlan tags onto the packet. The pushed tag values are the ones in the
+ * subinterface encapsulation.
+ *
+ * <b>PUSH:</b> For push operations, the ethertype is also specified. The
+ * output tag-rewrite operation for pushes is to pop the same number of tags
+ * off the packet. If the packet doesn't have enough tags it is dropped.
+ *
+ *
+ * @cliexpar
+ * @parblock
+ * By default a subinterface has no tag-rewrite. To return a subinterface to
+ * this state use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 disable}
+ *
+ * To pop vlan tags off packets received from a subinterface, use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 1}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 2}
+ *
+ * To push one or two vlan tags onto packets received from an interface, use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1q 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1ad 100 150}
+ *
+ * Tags can also be translated, which is basically a combination of a pop and push.
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-1 dot1ad 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-2 dot1ad 100 150}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-2 dot1q 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-1 dot1q 100 150}
+ *
+ * To display the VLAN Tag settings, show the associate bridge-domain:
+ * @cliexstart{show bridge-domain 200 detail}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 200 1 on on on on off N/A
+ *
+ * Interface Index SHG BVI VLAN-Tag-Rewrite
+ * GigabitEthernet0/8/0.200 5 0 - trans-1-1 dot1ad 100
+ * GigabitEthernet0/9/0.200 4 0 - none
+ * GigabitEthernet0/a/0.200 6 0 - none
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
+ .path = "set interface l2 tag-rewrite",
+ .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
+ .function = int_l2_vtr,
+};
+/* *INDENT-ON* */
+
+/**
+ * Get pbb tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32
+l2pbb_get (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index,
+ u32 * vtr_op, u16 * outer_tag, ethernet_header_t * eth_hdr,
+ u16 * b_vlanid, u32 * i_sid)
+{
+ u32 error = 1;
+ ptr_config_t *in_config;
+
+ if (!vtr_op || !outer_tag || !b_vlanid || !i_sid)
+ {
+ clib_warning ("invalid arguments");
+ error = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto done;
+ }
+
+ *vtr_op = L2_VTR_DISABLED;
+ *outer_tag = 0;
+ *b_vlanid = 0;
+ *i_sid = 0;
+
+ if (sw_if_index >= vec_len (l2output_main.configs))
+ {
+ /* no specific config (return disabled) */
+ goto done;
+ }
+
+ /* Get the config for this interface */
+ in_config =
+ &(vec_elt_at_index (l2output_main.configs, sw_if_index)->input_pbb_vtr);
+
+ if (in_config->push_and_pop_bytes == 0)
+ {
+ /* DISABLED */
+ goto done;
+ }
+ else
+ {
+ if (in_config->pop_bytes && in_config->push_bytes)
+ *vtr_op = L2_VTR_TRANSLATE_2_1;
+ else if (in_config->pop_bytes)
+ *vtr_op = L2_VTR_POP_2;
+ else if (in_config->push_bytes)
+ *vtr_op = L2_VTR_PUSH_2;
+
+ clib_memcpy (&eth_hdr->dst_address, in_config->macs_tags.b_dst_address,
+ sizeof (eth_hdr->dst_address));
+ clib_memcpy (&eth_hdr->src_address, in_config->macs_tags.b_src_address,
+ sizeof (eth_hdr->src_address));
+
+ *b_vlanid =
+ clib_host_to_net_u16 (in_config->macs_tags.priority_dei_id) & 0xFFF;
+ *i_sid =
+ clib_host_to_net_u32 (in_config->macs_tags.
+ priority_dei_uca_res_sid) & 0xFFFFF;
+ error = 0;
+ }
+done:
+ return error;
+}
+
+/**
+ * Set subinterface pbb vtr enable/disable.
+ * The CLI format is:
+ * set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]
+ */
+static clib_error_t *
+int_l2_pbb_vtr (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, tmp;
+ u32 vtr_op = L2_VTR_DISABLED;
+ u32 outer_tag = 0;
+ u8 dmac[6];
+ u8 smac[6];
+ u8 dmac_set = 0, smac_set = 0;
+ u16 b_vlanid = 0;
+ u32 s_id = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "disable"))
+ vtr_op = L2_VTR_DISABLED;
+ else if (vtr_op == L2_VTR_DISABLED && unformat (input, "pop"))
+ vtr_op = L2_VTR_POP_2;
+ else if (vtr_op == L2_VTR_DISABLED && unformat (input, "push"))
+ vtr_op = L2_VTR_PUSH_2;
+ else if (vtr_op == L2_VTR_DISABLED
+ && unformat (input, "translate_pbb_stag %d", &outer_tag))
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ else if (unformat (input, "dmac %U", unformat_ethernet_address, dmac))
+ dmac_set = 1;
+ else if (unformat (input, "smac %U", unformat_ethernet_address, smac))
+ smac_set = 1;
+ else if (unformat (input, "b_vlanid %d", &tmp))
+ b_vlanid = tmp;
+ else if (unformat (input, "s_id %d", &s_id))
+ ;
+ else
+ {
+ error = clib_error_return (0,
+ "expecting [disable | pop | push | translate_pbb_stag <outer_tag>\n"
+ "dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]");
+ goto done;
+ }
+ }
+
+ if ((vtr_op == L2_VTR_PUSH_2 || vtr_op == L2_VTR_TRANSLATE_2_1)
+ && (!dmac_set || !smac_set || s_id == ~0))
+ {
+ error = clib_error_return (0,
+ "expecting dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]");
+ goto done;
+ }
+
+ if (l2pbb_configure
+ (vm, vnm, sw_if_index, vtr_op, dmac, smac, b_vlanid, s_id, outer_tag))
+ {
+ error =
+ clib_error_return (0,
+ "pbb tag rewrite is not compatible with interface");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = {
+ .path = "set interface l2 pbb-tag-rewrite",
+ .short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]",
+ .function = int_l2_pbb_vtr,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_vtr.h b/src/vnet/l2/l2_vtr.h
new file mode 100644
index 00000000..0aea618e
--- /dev/null
+++ b/src/vnet/l2/l2_vtr.h
@@ -0,0 +1,281 @@
+/*
+ * l2_vtr.h : layer 2 vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_vtr_h
+#define included_vnet_l2_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_vtr.h>
+
+/* VTR config options for API and CLI support */
+typedef enum
+{
+ L2_VTR_DISABLED,
+ L2_VTR_PUSH_1,
+ L2_VTR_PUSH_2,
+ L2_VTR_POP_1,
+ L2_VTR_POP_2,
+ L2_VTR_TRANSLATE_1_1,
+ L2_VTR_TRANSLATE_1_2,
+ L2_VTR_TRANSLATE_2_1,
+ L2_VTR_TRANSLATE_2_2
+} l2_vtr_op_t;
+
+/**
+ * Per-interface vlan tag rewrite configuration
+ * There will be one instance of this struct for each sw_if_index
+ * for both input vtr and output vtr
+ */
+typedef struct
+{
+ union
+ {
+ /*
+ * Up to two vlan tags to push.
+ * if there is only one vlan tag to push, it is in tags[1].
+ */
+ ethernet_vlan_header_tv_t tags[2];
+ u64 raw_tags;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 push_bytes; /* number of bytes to push for up to 2 vlans (0,4,8) */
+ u8 pop_bytes; /* number of bytes to pop for up to 2 vlans (0,4,8) */
+ };
+ u16 push_and_pop_bytes; /* if 0 then the feature is disabled */
+ };
+} vtr_config_t;
+
+
+/**
+ * Perform the configured tag rewrite on the packet.
+ * Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop
+ * too many tags)
+ */
+always_inline u32
+l2_vtr_process (vlib_buffer_t * b0, vtr_config_t * config)
+{
+ u64 temp_8;
+ u32 temp_4;
+ u8 *eth;
+
+ eth = vlib_buffer_get_current (b0);
+
+ /* copy the 12B dmac and smac to a temporary location */
+ temp_8 = *((u64 *) eth);
+ temp_4 = *((u32 *) (eth + 8));
+
+ /* adjust for popped tags */
+ eth += config->pop_bytes;
+
+ /* if not enough tags to pop then drop packet */
+ if (PREDICT_FALSE ((vnet_buffer (b0)->l2.l2_len - 12) < config->pop_bytes))
+ {
+ return 1;
+ }
+
+ /* copy the 2 new tags to the start of the packet */
+ *((u64 *) (eth + 12 - 8)) = config->raw_tags;
+
+ /* TODO: set cos bits */
+
+ /* adjust for pushed tags: */
+ eth -= config->push_bytes;
+
+ /* copy the 12 dmac and smac back to the packet */
+ *((u64 *) eth) = temp_8;
+ *((u32 *) (eth + 8)) = temp_4;
+
+ /* Update l2_len */
+ vnet_buffer (b0)->l2.l2_len +=
+ (word) config->push_bytes - (word) config->pop_bytes;
+
+ /* Update vlan tag count */
+ ethernet_buffer_adjust_vlan_count_by_bytes (b0,
+ (word) config->push_bytes -
+ (word) config->pop_bytes);
+
+ /* Update packet len */
+ vlib_buffer_advance (b0,
+ (word) config->pop_bytes - (word) config->push_bytes);
+
+ return 0;
+}
+
+
+/*
+ * Perform the egress pre-vlan tag rewrite EFP Filter check.
+ * The post-vlan tag rewrite check is a separate graph node.
+ *
+ * This check insures that a packet being output to an interface
+ * (before output vtr is performed) has vlan tags that match those
+ * on a packet received from that interface (after vtr has been performed).
+ * This means verifying that any tags pushed by input vtr are present
+ * on the packet.
+ *
+ * Return 0 if ok, 1 if packet should be dropped.
+ * This function should be passed the input vtr config for the interface.
+ */
+always_inline u8
+l2_efp_filter_process (vlib_buffer_t * b0, vtr_config_t * in_config)
+{
+ u8 *eth;
+ u64 packet_tags;
+ u64 tag_mask;
+
+ eth = vlib_buffer_get_current (b0);
+
+ /*
+ * If there are 2 tags pushed, they must match config->tags[0] and
+ * config->tags[1].
+ * If there is one tag pushed, it must match config->tag[1].
+ * If there are 0 tags pushed, the check passes.
+ */
+
+ /* mask for two vlan id and ethertypes, no cos bits */
+ tag_mask = clib_net_to_host_u64 (0xFFFF0FFFFFFF0FFF);
+ /* mask for one vlan id and ethertype, no cos bits */
+ tag_mask =
+ (in_config->push_bytes ==
+ 4) ? clib_net_to_host_u64 (0xFFFF0FFF) : tag_mask;
+ /* mask for always match */
+ tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask;
+
+ /*
+ * Read 8B from the packet, getting the proper set of vlan tags
+ * For 0 push bytes, the address doesn't matter since the mask
+ * clears the data to 0.
+ */
+ packet_tags = *((u64 *) (eth + 4 + in_config->push_bytes));
+
+ /* Check if the packet tags match the configured tags */
+ return (packet_tags & tag_mask) != in_config->raw_tags;
+}
+
+typedef struct
+{
+ union
+ {
+ ethernet_pbb_header_t macs_tags;
+ struct
+ {
+ u64 data1;
+ u64 data2;
+ u16 data3;
+ u32 data4;
+ } raw_data;
+ };
+ union
+ {
+ struct
+ {
+ u8 push_bytes; /* number of bytes to push pbb tags */
+ u8 pop_bytes; /* number of bytes to pop pbb tags */
+ };
+ u16 push_and_pop_bytes; /* if 0 then the feature is disabled */
+ };
+} ptr_config_t;
+
+always_inline u32
+l2_pbb_process (vlib_buffer_t * b0, ptr_config_t * config)
+{
+ u8 *eth = vlib_buffer_get_current (b0);
+
+ if (config->pop_bytes > 0)
+ {
+ ethernet_pbb_header_packed_t *ph = (ethernet_pbb_header_packed_t *) eth;
+
+ // drop packet without PBB header or with wrong I-tag or B-tag
+ if (clib_net_to_host_u16 (ph->priority_dei_id) !=
+ clib_net_to_host_u16 (config->macs_tags.priority_dei_id)
+ || clib_net_to_host_u32 (ph->priority_dei_uca_res_sid) !=
+ clib_net_to_host_u32 (config->macs_tags.priority_dei_uca_res_sid))
+ return 1;
+
+ eth += config->pop_bytes;
+ }
+
+ if (config->push_bytes > 0)
+ {
+ eth -= config->push_bytes;
+ // copy the B-DA (6B), B-SA (6B), B-TAG (4B), I-TAG (6B)
+ *((u64 *) eth) = config->raw_data.data1;
+ *((u64 *) (eth + 8)) = config->raw_data.data2;
+ *((u16 *) (eth + 16)) = config->raw_data.data3;
+ *((u32 *) (eth + 18)) = config->raw_data.data4;
+ }
+
+ /* Update l2_len */
+ vnet_buffer (b0)->l2.l2_len +=
+ (word) config->push_bytes - (word) config->pop_bytes;
+ /* Update packet len */
+ vlib_buffer_advance (b0,
+ (word) config->pop_bytes - (word) config->push_bytes);
+
+ return 0;
+}
+
+u32 l2pbb_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op,
+ u8 * b_dmac, u8 * b_smac,
+ u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag);
+
+/**
+ * Configure vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32 l2vtr_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 vtr_op, u32 push_dot1q, u32 vtr_tag1, u32 vtr_tag2);
+
+/**
+ * Get vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32 l2vtr_get (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 * vtr_op,
+ u32 * push_dot1q, u32 * vtr_tag1, u32 * vtr_tag2);
+
+/**
+ * Get pbb tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32 l2pbb_get (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 * vtr_op,
+ u16 * outer_tag,
+ ethernet_header_t * eth_hdr, u16 * b_vlanid, u32 * i_sid);
+
+#endif /* included_vnet_l2_vtr_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c
new file mode 100644
index 00000000..d08a5d8f
--- /dev/null
+++ b/src/vnet/l2/l2_xcrw.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/l2/l2_xcrw.h>
+
+/**
+ * @file
+ * General L2 / L3 cross-connect, used to set up
+ * "L2 interface <--> your-favorite-tunnel-encap" tunnels.
+ *
+ * We set up a typical L2 cross-connect or (future) bridge
+ * to hook L2 interface(s) up to the L3 stack in arbitrary ways.
+ *
+ * Each l2_xcrw adjacency specifies 3 things:
+ *
+ * 1. The next graph node (presumably in the L3 stack) to
+ * process the (L2 -> L3) packet
+ *
+ * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX]
+ * (i.e. a lookup FIB index),
+ *
+ * 3. A rewrite string to apply.
+ *
+ * Example: to cross-connect an L2 interface or (future) bridge
+ * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in
+ * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the
+ * GRE IP header checksum and length fields.
+ */
+
+typedef struct
+{
+ u32 next_index;
+ u32 tx_fib_index;
+} l2_xcrw_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_xcrw_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_xcrw_trace_t *t = va_arg (*args, l2_xcrw_trace_t *);
+
+ s = format (s, "L2_XCRW: next index %d tx_fib_index %d",
+ t->next_index, t->tx_fib_index);
+ return s;
+}
+
+l2_xcrw_main_t l2_xcrw_main;
+
+static vlib_node_registration_t l2_xcrw_node;
+
+static char *l2_xcrw_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_xcrw_error
+#undef _
+};
+
+static uword
+l2_xcrw_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_xcrw_next_t next_index;
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ l2_xcrw_adjacency_t *adj0, *adj1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+ adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ next1 = adj1->rewrite_header.next_index;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] =
+ adj1->rewrite_header.sw_if_index;
+
+ em->counters[node_counter_base_index + next1]++;
+
+ if (PREDICT_TRUE (next0 > 0))
+ {
+ u8 *h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_TRUE (next1 > 0))
+ {
+ u8 *h1 = vlib_buffer_get_current (b1);
+ vnet_rewrite_one_header (adj1[0], h1,
+ adj1->rewrite_header.data_bytes);
+ vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ t->tx_fib_index = adj1->rewrite_header.sw_if_index;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ l2_xcrw_adjacency_t *adj0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE (next0 > 0))
+ {
+ u8 *h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_xcrw_node, static) = {
+ .function = l2_xcrw_node_fn,
+ .name = "l2-xcrw",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_xcrw_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_xcrw_error_strings),
+ .error_strings = l2_xcrw_error_strings,
+
+ .n_next_nodes = L2_XCRW_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_XCRW_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_xcrw_node, l2_xcrw_node_fn)
+ clib_error_t *l2_xcrw_init (vlib_main_t * vm)
+{
+ l2_xcrw_main_t *mp = &l2_xcrw_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = &vnet_main;
+ mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_xcrw_init);
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static u8 *
+format_xcrw_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "xcrw%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (xcrw_device_class,static) = {
+ .name = "Xcrw",
+ .format_device_name = format_xcrw_name,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+/* Create a sham tunnel interface and return its sw_if_index */
+static u32
+create_xcrw_interface (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ static u32 instance;
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t *hi;
+ u32 sw_if_index;
+
+ /* mac address doesn't really matter */
+ memset (address, 0, sizeof (address));
+ address[2] = 0x12;
+
+ /* can returns error iff phy != 0 */
+ (void) ethernet_register_interface
+ (vnm, xcrw_device_class.index, instance++, address, &hw_if_index,
+ /* flag change */ 0);
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /* Output to the sham tunnel invokes the encap node */
+ hi->output_node_index = l2_xcrw_node.index;
+
+ return sw_if_index;
+}
+
+int
+vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t * vnm,
+ u32 l2_sw_if_index, u32 tx_fib_index,
+ u8 * rewrite, u32 next_node_index, int is_add)
+{
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ l2_xcrw_adjacency_t *a;
+ l2_xcrw_tunnel_t *t;
+ uword *p;
+
+ if (is_add)
+ {
+
+ pool_get (xcm->tunnels, t);
+
+ /* No interface allocated? Do it. Otherwise, set admin up */
+ if (t->tunnel_sw_if_index == 0)
+ t->tunnel_sw_if_index = create_xcrw_interface (vm);
+ else
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ t->l2_sw_if_index = l2_sw_if_index;
+
+ vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ memset (a, 0, sizeof (*a));
+
+ a->rewrite_header.sw_if_index = tx_fib_index;
+
+ /*
+ * Add or find a dynamic disposition for the successor node,
+ * e.g. so we can ship pkts to mpls_post_rewrite...
+ */
+ a->rewrite_header.next_index =
+ vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index);
+
+ if (vec_len (rewrite))
+ vnet_rewrite_set_data (a[0], rewrite, vec_len (rewrite));
+
+ set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0,
+ t->tunnel_sw_if_index);
+ hash_set (xcm->tunnel_index_by_l2_sw_if_index,
+ t->l2_sw_if_index, t - xcm->tunnels);
+ return 0;
+ }
+ else
+ {
+ p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ if (p == 0)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ t = pool_elt_at_index (xcm->tunnels, p[0]);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ /* Reset adj to drop traffic */
+ memset (a, 0, sizeof (*a));
+
+ set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0);
+
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */ );
+
+ hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ pool_put (xcm->tunnels, t);
+ }
+ return 0;
+}
+
+
+static clib_error_t *
+set_l2_xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int is_add = 1;
+ int is_ipv6 = 0; /* for fib id -> fib index mapping */
+ u32 tx_fib_id = ~0;
+ u32 tx_fib_index = ~0;
+ u32 next_node_index = ~0;
+ u32 l2_sw_if_index;
+ u8 *rw = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+ clib_error_t *error = NULL;
+
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat (line_input, "%U",
+ unformat_vnet_sw_interface, vnm, &l2_sw_if_index))
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "next %U",
+ unformat_vlib_node, vm, &next_node_index))
+ ;
+ else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (line_input, "rw %U", unformat_hex_string, &rw));
+ else
+ break;
+ }
+
+ if (next_node_index == ~0)
+ {
+ error = clib_error_return (0, "next node not specified");
+ goto done;
+ }
+
+ if (tx_fib_id != ~0)
+ {
+ uword *p;
+
+ if (is_ipv6)
+ p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id);
+ else
+ p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id);
+
+ if (p == 0)
+ {
+ error =
+ clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id);
+ goto done;
+ }
+
+ tx_fib_index = p[0];
+ }
+
+ rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index,
+ rw, next_node_index, is_add);
+
+ switch (rv)
+ {
+
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ error = clib_error_return (0, "%U not cross-connected",
+ format_vnet_sw_if_index_name,
+ vnm, l2_sw_if_index);
+ goto done;
+
+ default:
+ error = clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv);
+ goto done;
+ }
+
+done:
+ vec_free (rw);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a Layer 2 to Layer 3 rewrite cross-connect. This is
+ * used to hook Layer 2 interface(s) up to the Layer 3 stack in
+ * arbitrary ways. For example, cross-connect an L2 interface or
+ * (future) bridge to an mpls-o-gre tunnel. Set up the L2 rewrite
+ * string as shown in mpls_gre_rewrite, and use \"mpls-post-rewrite\"
+ * to fix the GRE IP header checksum and length fields.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
+ .path = "set interface l2 xcrw",
+ .short_help =
+ "set interface l2 xcrw <interface> next <node-name>\n"
+ " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
+ .function = set_l2_xcrw_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_l2xcrw (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ l2_xcrw_tunnel_t *t = va_arg (*args, l2_xcrw_tunnel_t *);
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ vlib_main_t *vm = vlib_get_main ();
+ l2_xcrw_adjacency_t *a;
+ u8 *rewrite_string;
+
+ if (t == 0)
+ {
+ s = format (s, "%-25s%s", "L2 interface", "Tunnel Details");
+ return s;
+ }
+
+ s = format (s, "%-25U %U ",
+ format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ s = format (s, "next %U ",
+ format_vlib_next_node_name, vm, l2_xcrw_node.index,
+ a->rewrite_header.next_index);
+
+ if (a->rewrite_header.sw_if_index != ~0)
+ s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index);
+
+ if (a->rewrite_header.data_bytes)
+ {
+ rewrite_string = (u8 *) (a + 1);
+ rewrite_string -= a->rewrite_header.data_bytes;
+ s = format (s, "rewrite data: %U ",
+ format_hex_bytes, rewrite_string,
+ a->rewrite_header.data_bytes);
+ }
+
+ s = format (s, "\n");
+
+ return s;
+}
+
+
+static clib_error_t *
+show_l2xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ l2_xcrw_tunnel_t *t;
+
+ if (pool_elts (xcm->tunnels) == 0)
+ {
+ vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, xcm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/*?
+ * Display a Layer 2 to Layer 3 rewrite cross-connect. This is used to
+ * hook Layer 2 interface(s) up to the Layer 3 stack in arbitrary ways.
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
+ .path = "show l2xcrw",
+ .short_help = "show l2xcrw",
+ .function = show_l2xcrw_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_xcrw.h b/src/vnet/l2/l2_xcrw.h
new file mode 100644
index 00000000..ca80aae9
--- /dev/null
+++ b/src/vnet/l2/l2_xcrw.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_l2_xcrw_h__
+#define __included_l2_xcrw_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct
+{
+ /*
+ * Let: rewrite_header.sw_if_index = tx_fib_index or ~0.
+ * rewrite_header.next_index = L2_XCRW_NEXT_XXX
+ */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+} l2_xcrw_adjacency_t;
+
+typedef struct
+{
+ /* L2 interface */
+ u32 l2_sw_if_index;
+
+ /* Tunnel interface */
+ u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */
+
+} l2_xcrw_tunnel_t;
+
+typedef struct
+{
+ u32 cached_next_index;
+
+ /* Vector of cross-connect rewrites */
+ l2_xcrw_adjacency_t *adj_by_sw_if_index;
+
+ /* Pool of xcrw tunnels */
+ l2_xcrw_tunnel_t *tunnels;
+
+ /* Tunnel index by tunnel sw_if_index */
+ uword *tunnel_index_by_l2_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_xcrw_main_t;
+
+typedef enum
+{
+ L2_XCRW_NEXT_DROP,
+ L2_XCRW_N_NEXT,
+} l2_xcrw_next_t;
+
+#define foreach_l2_xcrw_error \
+_(DROP, "Packets dropped") \
+_(FWD, "Packets forwarded")
+
+typedef enum
+{
+#define _(sym,str) L2_XCRW_ERROR_##sym,
+ foreach_l2_xcrw_error
+#undef _
+ L2_XCRW_N_ERROR,
+} l2_xcrw_error_t;
+
+#endif /* __included_l2_xcrw_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/decap.c b/src/vnet/l2tp/decap.c
new file mode 100644
index 00000000..46104129
--- /dev/null
+++ b/src/vnet/l2tp/decap.c
@@ -0,0 +1,309 @@
+/*
+ * decap.c : L2TPv3 tunnel decapsulation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2tp/l2tp.h>
+
+/* Statistics (not really errors) */
+#define foreach_l2t_decap_error \
+_(USER_TO_NETWORK, "L2TP user (ip6) to L2 network pkts") \
+_(SESSION_ID_MISMATCH, "l2tpv3 local session id mismatches") \
+_(COOKIE_MISMATCH, "l2tpv3 local cookie mismatches") \
+_(NO_SESSION, "l2tpv3 session not found") \
+_(ADMIN_DOWN, "l2tpv3 tunnel is down")
+
+static char *l2t_decap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2t_decap_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) L2T_DECAP_ERROR_##sym,
+ foreach_l2t_decap_error
+#undef _
+ L2T_DECAP_N_ERROR,
+} l2t_DECAP_error_t;
+
+typedef enum
+{
+ L2T_DECAP_NEXT_DROP,
+ L2T_DECAP_NEXT_L2_INPUT,
+ L2T_DECAP_N_NEXT,
+ /* Pseudo next index */
+ L2T_DECAP_NEXT_NO_INTERCEPT = L2T_DECAP_N_NEXT,
+} l2t_decap_next_t;
+
+#define NSTAGES 3
+
+static inline void
+stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ vlib_prefetch_buffer_header (b, STORE);
+ /* l2tpv3 header is a long way away, need 2 cache lines */
+ CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static inline void
+stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ l2t_main_t *lm = &l2t_main;
+ ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ u32 session_index;
+ uword *p = 0;
+ l2tpv3_header_t *l2t;
+
+ /* Not L2tpv3 (0x73, 0t115)? Use the normal path. */
+ if (PREDICT_FALSE (ip6->protocol != IP_PROTOCOL_L2TP))
+ {
+ vnet_buffer (b)->l2t.next_index = L2T_DECAP_NEXT_NO_INTERCEPT;
+ return;
+ }
+
+ /* Make up your minds, people... */
+ switch (lm->lookup_type)
+ {
+ case L2T_LOOKUP_SRC_ADDRESS:
+ p = hash_get_mem (lm->session_by_src_address, &ip6->src_address);
+ break;
+ case L2T_LOOKUP_DST_ADDRESS:
+ p = hash_get_mem (lm->session_by_dst_address, &ip6->dst_address);
+ break;
+ case L2T_LOOKUP_SESSION_ID:
+ l2t = (l2tpv3_header_t *) (ip6 + 1);
+ p = hash_get (lm->session_by_session_id, l2t->session_id);
+ break;
+ default:
+ ASSERT (0);
+ }
+
+ if (PREDICT_FALSE (p == 0))
+ {
+ vnet_buffer (b)->l2t.next_index = L2T_DECAP_NEXT_NO_INTERCEPT;
+ return;
+ }
+ else
+ {
+ session_index = p[0];
+ }
+
+ /* Remember mapping index, prefetch the mini counter */
+ vnet_buffer (b)->l2t.next_index = L2T_DECAP_NEXT_L2_INPUT;
+ vnet_buffer (b)->l2t.session_index = session_index;
+
+ /* $$$$$ prefetch counter */
+}
+
+static inline u32
+last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ l2t_main_t *lm = &l2t_main;
+ ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2tpv3_header_t *l2tp;
+ u32 counter_index;
+ l2t_session_t *session = 0;
+ u32 session_index;
+ u32 next_index;
+ u8 l2tp_decap_local = (l2t_decap_local_node.index == n->index);
+
+ /* Other-than-output pkt? We're done... */
+ if (vnet_buffer (b)->l2t.next_index != L2T_DECAP_NEXT_L2_INPUT)
+ {
+ next_index = vnet_buffer (b)->l2t.next_index;
+ goto done;
+ }
+
+ em->counters[node_counter_base_index + L2T_DECAP_ERROR_USER_TO_NETWORK] +=
+ 1;
+
+ session_index = vnet_buffer (b)->l2t.session_index;
+
+ counter_index =
+ session_index_to_counter_index (session_index,
+ SESSION_COUNTER_USER_TO_NETWORK);
+
+ /* per-mapping byte stats include the ethernet header */
+ vlib_increment_combined_counter (&lm->counter_main,
+ vlib_get_thread_index (),
+ counter_index, 1 /* packet_increment */ ,
+ vlib_buffer_length_in_chain (vm, b) +
+ sizeof (ethernet_header_t));
+
+ session = pool_elt_at_index (lm->sessions, session_index);
+
+ l2tp = vlib_buffer_get_current (b) + sizeof (*ip6);
+
+ if (PREDICT_FALSE (l2tp->session_id != session->local_session_id))
+ {
+ /* Key matched but session id does not. Assume packet is not for us. */
+ em->counters[node_counter_base_index +
+ L2T_DECAP_ERROR_SESSION_ID_MISMATCH] += 1;
+ next_index = L2T_DECAP_NEXT_NO_INTERCEPT;
+ goto done;
+ }
+
+ if (PREDICT_FALSE (l2tp->cookie != session->local_cookie[0]))
+ {
+ if (l2tp->cookie != session->local_cookie[1])
+ {
+ /* Key and session ID matched, but cookie doesn't. Drop this packet. */
+ b->error = node->errors[L2T_DECAP_ERROR_COOKIE_MISMATCH];
+ next_index = L2T_DECAP_NEXT_DROP;
+ goto done;
+ }
+ }
+
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = session->sw_if_index;
+
+ if (PREDICT_FALSE (!(session->admin_up)))
+ {
+ b->error = node->errors[L2T_DECAP_ERROR_ADMIN_DOWN];
+ next_index = L2T_DECAP_NEXT_DROP;
+ goto done;
+ }
+
+ /* strip the ip6 and L2TP header */
+ vlib_buffer_advance (b, sizeof (*ip6) + session->l2tp_hdr_size);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b);
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->is_user_to_network = 1;
+ t->our_address.as_u64[0] = ip6->dst_address.as_u64[0];
+ t->our_address.as_u64[1] = ip6->dst_address.as_u64[1];
+ t->client_address.as_u64[0] = ip6->src_address.as_u64[0];
+ t->client_address.as_u64[1] = ip6->src_address.as_u64[1];
+ t->session_index = session_index;
+ }
+
+ return L2T_DECAP_NEXT_L2_INPUT;
+
+done:
+ if (next_index == L2T_DECAP_NEXT_NO_INTERCEPT)
+ {
+ /* Small behavioral change between l2tp-decap and l2tp-decap-local */
+ if (l2tp_decap_local)
+ {
+ b->error = node->errors[L2T_DECAP_ERROR_NO_SESSION];
+ next_index = L2T_DECAP_NEXT_DROP;
+ }
+ else
+ {
+ /* Go to next node on the ip6 configuration chain */
+ if (PREDICT_TRUE (session != 0))
+ vnet_feature_next (session->sw_if_index, &next_index, b);
+ }
+ }
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->is_user_to_network = 1;
+ t->our_address.as_u64[0] = ip6->dst_address.as_u64[0];
+ t->our_address.as_u64[1] = ip6->dst_address.as_u64[1];
+ t->client_address.as_u64[0] = ip6->src_address.as_u64[0];
+ t->client_address.as_u64[1] = ip6->src_address.as_u64[1];
+ t->session_index = ~0;
+ }
+ return next_index;
+}
+
+#include <vnet/pipeline.h>
+
+static uword
+l2t_decap_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return dispatch_pipeline (vm, node, frame);
+}
+
+/*
+ * l2tp-decap and l2tp-decap-local have very slightly different behavior.
+ * When a packet has no associated session l2tp-decap let it go to ip6 forward,
+ * while l2tp-decap-local drops it.
+ */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2t_decap_node) = {
+ .function = l2t_decap_node_fn,
+ .name = "l2tp-decap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2t_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2t_decap_error_strings),
+ .error_strings = l2t_decap_error_strings,
+
+ .n_next_nodes = L2T_DECAP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2T_DECAP_NEXT_L2_INPUT] = "l2-input",
+ [L2T_DECAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2t_decap_node, l2t_decap_node_fn);
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2t_decap_local_node) = {
+ .function = l2t_decap_node_fn,
+ .name = "l2tp-decap-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2t_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2t_decap_error_strings),
+ .error_strings = l2t_decap_error_strings,
+
+ .n_next_nodes = L2T_DECAP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2T_DECAP_NEXT_L2_INPUT] = "l2-input",
+ [L2T_DECAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+void
+l2tp_decap_init (void)
+{
+ ip6_register_protocol (IP_PROTOCOL_L2TP, l2t_decap_local_node.index);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/encap.c b/src/vnet/l2tp/encap.c
new file mode 100644
index 00000000..dcdfde4b
--- /dev/null
+++ b/src/vnet/l2tp/encap.c
@@ -0,0 +1,238 @@
+/*
+ * encap.c : L2TPv3 tunnel encapsulation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2tp/l2tp.h>
+
+/* Statistics (not really errors) */
+#define foreach_l2t_encap_error \
+_(NETWORK_TO_USER, "L2TP L2 network to user (ip6) pkts") \
+_(LOOKUP_FAIL_TO_L3, "L2TP L2 session lookup failed pkts") \
+_(ADMIN_DOWN, "L2TP tunnel is down")
+
+static char *l2t_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2t_encap_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) L2T_ENCAP_ERROR_##sym,
+ foreach_l2t_encap_error
+#undef _
+ L2T_ENCAP_N_ERROR,
+} l2t_encap_error_t;
+
+
+typedef enum
+{
+ L2T_ENCAP_NEXT_DROP,
+ L2T_ENCAP_NEXT_IP6_LOOKUP,
+ L2T_ENCAP_N_NEXT,
+} l2t_encap_next_t;
+
+typedef struct
+{
+ u32 cached_session_index;
+ u32 cached_sw_if_index;
+ vnet_main_t *vnet_main;
+} l2tp_encap_runtime_t;
+
+vlib_node_registration_t l2t_encap_node;
+
+#define NSTAGES 3
+
+static inline void
+stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ vlib_prefetch_buffer_header (b, STORE);
+ CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static inline void
+stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
+{
+ l2tp_encap_runtime_t *rt = (void *) node->runtime_data;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vnet_hw_interface_t *hi;
+
+ u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ u32 session_index = rt->cached_session_index;
+
+ if (PREDICT_FALSE (rt->cached_sw_if_index != sw_if_index))
+ {
+ hi = vnet_get_sup_hw_interface (rt->vnet_main, sw_if_index);
+ session_index = rt->cached_session_index = hi->dev_instance;
+ rt->cached_sw_if_index = sw_if_index;
+ }
+
+ /* Remember mapping index, prefetch the mini counter */
+ vnet_buffer (b)->l2t.next_index = L2T_ENCAP_NEXT_IP6_LOOKUP;
+ vnet_buffer (b)->l2t.session_index = session_index;
+
+ /* $$$$ prefetch counter... */
+}
+
+static inline u32
+last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ l2t_main_t *lm = &l2t_main;
+ vlib_node_t *n = vlib_get_node (vm, l2t_encap_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2tpv3_header_t *l2tp;
+ u32 session_index;
+ u32 counter_index;
+ l2t_session_t *s;
+ ip6_header_t *ip6;
+ u16 payload_length;
+ u32 next_index = L2T_ENCAP_NEXT_IP6_LOOKUP;
+
+ /* Other-than-output pkt? We're done... */
+ if (vnet_buffer (b)->l2t.next_index != L2T_ENCAP_NEXT_IP6_LOOKUP)
+ return vnet_buffer (b)->l2t.next_index;
+
+ em->counters[node_counter_base_index + L2T_ENCAP_ERROR_NETWORK_TO_USER] +=
+ 1;
+
+ session_index = vnet_buffer (b)->l2t.session_index;
+
+ counter_index =
+ session_index_to_counter_index (session_index,
+ SESSION_COUNTER_NETWORK_TO_USER);
+
+ /* per-mapping byte stats include the ethernet header */
+ vlib_increment_combined_counter (&lm->counter_main,
+ vlib_get_thread_index (),
+ counter_index, 1 /* packet_increment */ ,
+ vlib_buffer_length_in_chain (vm, b));
+
+ s = pool_elt_at_index (lm->sessions, session_index);
+
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = s->encap_fib_index;
+
+ /* Paint on an l2tpv3 hdr */
+ vlib_buffer_advance (b, -(s->l2tp_hdr_size));
+ l2tp = vlib_buffer_get_current (b);
+
+ l2tp->session_id = s->remote_session_id;
+ l2tp->cookie = s->remote_cookie;
+ if (PREDICT_FALSE (s->l2_sublayer_present))
+ {
+ l2tp->l2_specific_sublayer = 0;
+ }
+
+ /* Paint on an ip6 header */
+ vlib_buffer_advance (b, -(sizeof (*ip6)));
+ ip6 = vlib_buffer_get_current (b);
+
+ if (PREDICT_FALSE (!(s->admin_up)))
+ {
+ b->error = node->errors[L2T_ENCAP_ERROR_ADMIN_DOWN];
+ next_index = L2T_ENCAP_NEXT_DROP;
+ goto done;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ /* calculate ip6 payload length */
+ payload_length = vlib_buffer_length_in_chain (vm, b);
+ payload_length -= sizeof (*ip6);
+
+ ip6->payload_length = clib_host_to_net_u16 (payload_length);
+ ip6->protocol = IP_PROTOCOL_L2TP;
+ ip6->hop_limit = 0xff;
+ ip6->src_address.as_u64[0] = s->our_address.as_u64[0];
+ ip6->src_address.as_u64[1] = s->our_address.as_u64[1];
+ ip6->dst_address.as_u64[0] = s->client_address.as_u64[0];
+ ip6->dst_address.as_u64[1] = s->client_address.as_u64[1];
+
+
+done:
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2t_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->is_user_to_network = 0;
+ t->our_address.as_u64[0] = ip6->src_address.as_u64[0];
+ t->our_address.as_u64[1] = ip6->src_address.as_u64[1];
+ t->client_address.as_u64[0] = ip6->dst_address.as_u64[0];
+ t->client_address.as_u64[1] = ip6->dst_address.as_u64[1];
+ t->session_index = session_index;
+ }
+
+ return next_index;
+}
+
+#include <vnet/pipeline.h>
+
+uword
+l2t_encap_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return dispatch_pipeline (vm, node, frame);
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2t_encap_node) = {
+ .function = l2t_encap_node_fn,
+ .name = "l2tp-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2t_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .runtime_data_bytes = sizeof (l2tp_encap_runtime_t),
+
+ .n_errors = ARRAY_LEN(l2t_encap_error_strings),
+ .error_strings = l2t_encap_error_strings,
+
+ .n_next_nodes = L2T_ENCAP_N_NEXT,
+
+ /* add dispositions here */
+ .next_nodes = {
+ [L2T_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [L2T_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2t_encap_node, l2t_encap_node_fn);
+void
+l2tp_encap_init (vlib_main_t * vm)
+{
+ l2tp_encap_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2t_encap_node.index);
+ rt->vnet_main = vnet_get_main ();
+ rt->cached_sw_if_index = (u32) ~ 0;
+ rt->cached_session_index = (u32) ~ 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api
new file mode 100644
index 00000000..4587a807
--- /dev/null
+++ b/src/vnet/l2tp/l2tp.api
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief l2tpv3 tunnel interface create request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param client_address - remote client tunnel ip address
+ @param client_address - local tunnel ip address
+ @param is_ipv6 - ipv6 if non-zero, else ipv4
+ @param local_session_id - local tunnel session id
+ @param remote_session_id - remote tunnel session id
+ @param local_cookie - local tunnel cookie
+ @param l2_sublayer_present - l2 sublayer is present in packets if non-zero
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+*/
+define l2tpv3_create_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 client_address[16];
+ u8 our_address[16];
+ u8 is_ipv6;
+ u32 local_session_id;
+ u32 remote_session_id;
+ u64 local_cookie;
+ u64 remote_cookie;
+ u8 l2_sublayer_present;
+ u32 encap_vrf_id;
+};
+
+/** \brief l2tpv3 tunnel interface create response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - index of the new tunnel interface
+*/
+define l2tpv3_create_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+autoreply define l2tpv3_set_tunnel_cookies
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u64 new_local_cookie;
+ u64 new_remote_cookie;
+};
+
+define sw_if_l2tpv3_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u8 client_address[16];
+ u8 our_address[16];
+ u32 local_session_id;
+ u32 remote_session_id;
+ u64 local_cookie[2];
+ u64 remote_cookie;
+ u8 l2_sublayer_present;
+};
+
+define sw_if_l2tpv3_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+autoreply define l2tpv3_interface_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 enable_disable;
+ u32 sw_if_index;
+};
+
+autoreply define l2tpv3_set_lookup_key
+{
+ u32 client_index;
+ u32 context;
+ /* 0 = ip6 src_address, 1 = ip6 dst_address, 2 = session_id */
+ u8 key;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c
new file mode 100644
index 00000000..3dedc447
--- /dev/null
+++ b/src/vnet/l2tp/l2tp.c
@@ -0,0 +1,766 @@
+/*
+ * l2tp.c : L2TPv3 tunnel support
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2tp/l2tp.h>
+
+l2t_main_t l2t_main;
+
+/* packet trace format function */
+u8 *
+format_l2t_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2t_trace_t *t = va_arg (*args, l2t_trace_t *);
+
+ if (t->is_user_to_network)
+ s = format (s, "L2T: %U (client) -> %U (our) session %d",
+ format_ip6_address, &t->client_address,
+ format_ip6_address, &t->our_address, t->session_index);
+ else
+ s = format (s, "L2T: %U (our) -> %U (client) session %d)",
+ format_ip6_address, &t->our_address,
+ format_ip6_address, &t->client_address, t->session_index);
+ return s;
+}
+
+u8 *
+format_l2t_session (u8 * s, va_list * args)
+{
+ l2t_session_t *session = va_arg (*args, l2t_session_t *);
+ l2t_main_t *lm = &l2t_main;
+ u32 counter_index;
+ vlib_counter_t v;
+
+ s = format (s, "[%d] %U (our) %U (client) %U (sw_if_index %d)\n",
+ session - lm->sessions,
+ format_ip6_address, &session->our_address,
+ format_ip6_address, &session->client_address,
+ format_vnet_sw_interface_name, lm->vnet_main,
+ vnet_get_sw_interface (lm->vnet_main, session->sw_if_index),
+ session->sw_if_index);
+
+ s = format (s, " local cookies %016llx %016llx remote cookie %016llx\n",
+ clib_net_to_host_u64 (session->local_cookie[0]),
+ clib_net_to_host_u64 (session->local_cookie[1]),
+ clib_net_to_host_u64 (session->remote_cookie));
+
+ s = format (s, " local session-id %d remote session-id %d\n",
+ clib_net_to_host_u32 (session->local_session_id),
+ clib_net_to_host_u32 (session->remote_session_id));
+
+ s = format (s, " l2 specific sublayer %s\n",
+ session->l2_sublayer_present ? "preset" : "absent");
+
+ counter_index =
+ session_index_to_counter_index (session - lm->sessions,
+ SESSION_COUNTER_USER_TO_NETWORK);
+
+ vlib_get_combined_counter (&lm->counter_main, counter_index, &v);
+ if (v.packets != 0)
+ s = format (s, " user-to-net: %llu pkts %llu bytes\n",
+ v.packets, v.bytes);
+
+ vlib_get_combined_counter (&lm->counter_main, counter_index + 1, &v);
+
+ if (v.packets != 0)
+ s = format (s, " net-to-user: %llu pkts %llu bytes\n",
+ v.packets, v.bytes);
+ return s;
+}
+
+static clib_error_t *
+show_l2tp_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2t_session_t *session;
+ l2t_main_t *lm = &l2t_main;
+ char *keystr = 0;
+ int verbose = 0;
+
+ if (unformat (input, "verbose") || unformat (input, "v"))
+ verbose = 1;
+
+ if (pool_elts (lm->sessions) == 0)
+ vlib_cli_output (vm, "No l2tp sessions...");
+ else
+ vlib_cli_output (vm, "%u l2tp sessions...", pool_elts (lm->sessions));
+
+ if (verbose)
+ {
+ switch (lm->lookup_type)
+ {
+ case L2T_LOOKUP_SRC_ADDRESS:
+ keystr = "src address";
+ break;
+
+ case L2T_LOOKUP_DST_ADDRESS:
+ keystr = "dst address";
+ break;
+
+ case L2T_LOOKUP_SESSION_ID:
+ keystr = "session id";
+ break;
+
+ default:
+ keystr = "BOGUS!";
+ break;
+ }
+
+ vlib_cli_output (vm, "L2tp session lookup on %s", keystr);
+
+ /* *INDENT-OFF* */
+ pool_foreach (session, lm->sessions,
+ ({
+ vlib_cli_output (vm, "%U", format_l2t_session, session);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_session_detail_command, static) = {
+ .path = "show l2tpv3",
+ .short_help = "show l2tpv3 [verbose]",
+ .function = show_l2tp_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+test_counters_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2t_session_t *session;
+ l2t_main_t *lm = &l2t_main;
+ u32 session_index;
+ u32 counter_index;
+ u32 nincr = 0;
+ u32 thread_index = vlib_get_thread_index ();
+
+ /* *INDENT-OFF* */
+ pool_foreach (session, lm->sessions,
+ ({
+ session_index = session - lm->sessions;
+ counter_index =
+ session_index_to_counter_index (session_index,
+ SESSION_COUNTER_USER_TO_NETWORK);
+ vlib_increment_combined_counter (&lm->counter_main,
+ thread_index,
+ counter_index,
+ 1/*pkt*/, 1111 /*bytes*/);
+ vlib_increment_combined_counter (&lm->counter_main,
+ thread_index,
+ counter_index+1,
+ 1/*pkt*/, 2222 /*bytes*/);
+ nincr++;
+
+ }));
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "Incremented %d active counters\n", nincr);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_counters_command, static) = {
+ .path = "test counters",
+ .short_help = "increment all active counters",
+ .function = test_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_counters_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2t_session_t *session;
+ l2t_main_t *lm = &l2t_main;
+ u32 session_index;
+ u32 counter_index;
+ u32 nincr = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (session, lm->sessions,
+ ({
+ session_index = session - lm->sessions;
+ counter_index =
+ session_index_to_counter_index (session_index,
+ SESSION_COUNTER_USER_TO_NETWORK);
+ vlib_zero_combined_counter (&lm->counter_main, counter_index);
+ vlib_zero_combined_counter (&lm->counter_main, counter_index+1);
+ nincr++;
+ }));
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "Cleared %d active counters\n", nincr);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_counters_command, static) = {
+ .path = "clear counters",
+ .short_help = "clear all active counters",
+ .function = clear_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_l2tpv3_name (u8 * s, va_list * args)
+{
+ l2t_main_t *lm = &l2t_main;
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+
+ if (i < vec_len (lm->dev_inst_by_real))
+ show_dev_instance = lm->dev_inst_by_real[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ return format (s, "l2tpv3_tunnel%d", i);
+}
+
+static int
+l2tpv3_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
+{
+ l2t_main_t *lm = &l2t_main;
+
+ vec_validate_init_empty (lm->dev_inst_by_real, hi->dev_instance, ~0);
+
+ lm->dev_inst_by_real[hi->dev_instance] = new_dev_instance;
+
+ return 0;
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (l2tpv3_device_class,static) = {
+ .name = "L2TPv3",
+ .format_device_name = format_l2tpv3_name,
+ .name_renumber = l2tpv3_name_renumber,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_l2tp_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = {
+ .name = "L2TPV3",
+ .format_header = format_l2tp_header_with_length,
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+int
+create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
+ ip6_address_t * client_address,
+ ip6_address_t * our_address,
+ u32 local_session_id,
+ u32 remote_session_id,
+ u64 local_cookie,
+ u64 remote_cookie,
+ int l2_sublayer_present,
+ u32 encap_fib_index, u32 * sw_if_index)
+{
+ l2t_session_t *s = 0;
+ vnet_main_t *vnm = lm->vnet_main;
+ vnet_hw_interface_t *hi;
+ uword *p = (uword *) ~ 0;
+ u32 hw_if_index;
+ l2tpv3_header_t l2tp_hdr;
+ ip6_address_t *dst_address_copy, *src_address_copy;
+ u32 counter_index;
+
+ remote_session_id = clib_host_to_net_u32 (remote_session_id);
+ local_session_id = clib_host_to_net_u32 (local_session_id);
+
+ switch (lm->lookup_type)
+ {
+ case L2T_LOOKUP_SRC_ADDRESS:
+ p = hash_get_mem (lm->session_by_src_address, client_address);
+ break;
+
+ case L2T_LOOKUP_DST_ADDRESS:
+ p = hash_get_mem (lm->session_by_dst_address, our_address);
+ break;
+
+ case L2T_LOOKUP_SESSION_ID:
+ p = hash_get (lm->session_by_session_id, local_session_id);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ /* adding a session: session must not already exist */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (lm->sessions, s);
+ memset (s, 0, sizeof (*s));
+ clib_memcpy (&s->our_address, our_address, sizeof (s->our_address));
+ clib_memcpy (&s->client_address, client_address,
+ sizeof (s->client_address));
+ s->local_cookie[0] = clib_host_to_net_u64 (local_cookie);
+ s->remote_cookie = clib_host_to_net_u64 (remote_cookie);
+ s->local_session_id = local_session_id;
+ s->remote_session_id = remote_session_id;
+ s->l2_sublayer_present = l2_sublayer_present;
+ /* precompute l2tp header size */
+ s->l2tp_hdr_size = l2_sublayer_present ?
+ sizeof (l2tpv3_header_t) :
+ sizeof (l2tpv3_header_t) - sizeof (l2tp_hdr.l2_specific_sublayer);
+ s->admin_up = 0;
+ s->encap_fib_index = encap_fib_index;
+
+ /* Setup hash table entries */
+ switch (lm->lookup_type)
+ {
+ case L2T_LOOKUP_SRC_ADDRESS:
+ src_address_copy = clib_mem_alloc (sizeof (*src_address_copy));
+ clib_memcpy (src_address_copy, client_address,
+ sizeof (*src_address_copy));
+ hash_set_mem (lm->session_by_src_address, src_address_copy,
+ s - lm->sessions);
+ break;
+ case L2T_LOOKUP_DST_ADDRESS:
+ dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy));
+ clib_memcpy (dst_address_copy, our_address, sizeof (*dst_address_copy));
+ hash_set_mem (lm->session_by_dst_address, dst_address_copy,
+ s - lm->sessions);
+ break;
+ case L2T_LOOKUP_SESSION_ID:
+ hash_set (lm->session_by_session_id, local_session_id,
+ s - lm->sessions);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ /* validate counters */
+ counter_index =
+ session_index_to_counter_index (s - lm->sessions,
+ SESSION_COUNTER_USER_TO_NETWORK);
+ vlib_validate_combined_counter (&lm->counter_main, counter_index);
+ vlib_validate_combined_counter (&lm->counter_main, counter_index + 1);
+
+ if (vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) > 0)
+ {
+ hw_if_index = lm->free_l2tpv3_tunnel_hw_if_indices
+ [vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) - 1];
+ _vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = s - lm->sessions;
+ hi->hw_instance = hi->dev_instance;
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, l2tpv3_device_class.index, s - lm->sessions,
+ l2tpv3_hw_class.index, s - lm->sessions);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = l2t_encap_node.index;
+ /* $$$$ initialize custom dispositions, if needed */
+ }
+
+ s->hw_if_index = hw_if_index;
+ s->sw_if_index = hi->sw_if_index;
+
+ if (sw_if_index)
+ *sw_if_index = hi->sw_if_index;
+
+ return 0;
+}
+
+static clib_error_t *
+create_l2tpv3_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_address_t client_address, our_address;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ l2t_main_t *lm = &l2t_main;
+ u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0;
+ u32 local_session_id = 1, remote_session_id = 1;
+ int our_address_set = 0, client_address_set = 0;
+ int l2_sublayer_present = 0;
+ int rv;
+ u32 sw_if_index;
+ u32 encap_fib_id = ~0;
+ u32 encap_fib_index = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "client %U",
+ unformat_ip6_address, &client_address))
+ client_address_set = 1;
+ else if (unformat (line_input, "our %U",
+ unformat_ip6_address, &our_address))
+ our_address_set = 1;
+ else if (unformat (line_input, "local-cookie %llx", &local_cookie))
+ ;
+ else if (unformat (line_input, "remote-cookie %llx", &remote_cookie))
+ ;
+ else if (unformat (line_input, "local-session-id %d",
+ &local_session_id))
+ ;
+ else if (unformat (line_input, "remote-session-id %d",
+ &remote_session_id))
+ ;
+ else if (unformat (line_input, "fib-id %d", &encap_fib_id))
+ ;
+ else if (unformat (line_input, "l2-sublayer-present"))
+ l2_sublayer_present = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (encap_fib_id != ~0)
+ {
+ uword *p;
+ ip6_main_t *im = &ip6_main;
+ if (!(p = hash_get (im->fib_index_by_table_id, encap_fib_id)))
+ {
+ error = clib_error_return (0, "No fib with id %d", encap_fib_id);
+ goto done;
+ }
+ encap_fib_index = p[0];
+ }
+ else
+ {
+ encap_fib_index = ~0;
+ }
+
+ if (our_address_set == 0)
+ {
+ error = clib_error_return (0, "our address not specified");
+ goto done;
+ }
+ if (client_address_set == 0)
+ {
+ error = clib_error_return (0, "client address not specified");
+ goto done;
+ }
+
+ rv = create_l2tpv3_ipv6_tunnel (lm, &client_address, &our_address,
+ local_session_id, remote_session_id,
+ local_cookie, remote_cookie,
+ l2_sublayer_present,
+ encap_fib_index, &sw_if_index);
+ switch (rv)
+ {
+ case 0:
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index);
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "session already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "session does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return (0, "l2tp_session_add_del returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) =
+{
+ .path = "create l2tpv3 tunnel",
+ .short_help =
+ "create l2tpv3 tunnel client <ip6> our <ip6> local-cookie <hex> remote-cookie <hex> local-session <dec> remote-session <dec>",
+ .function = create_l2tpv3_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2tpv3_set_tunnel_cookies (l2t_main_t * lm,
+ u32 sw_if_index,
+ u64 new_local_cookie, u64 new_remote_cookie)
+{
+ l2t_session_t *s;
+ vnet_hw_interface_t *hi;
+ vnet_main_t *vnm = vnet_get_main ();
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ if (pool_is_free_index (lm->sessions, hi->dev_instance))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ s = pool_elt_at_index (lm->sessions, hi->dev_instance);
+
+ s->local_cookie[1] = s->local_cookie[0];
+ s->local_cookie[0] = clib_host_to_net_u64 (new_local_cookie);
+ s->remote_cookie = clib_host_to_net_u64 (new_remote_cookie);
+
+ return 0;
+}
+
+
+static clib_error_t *
+set_l2tp_tunnel_cookie_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2t_main_t *lm = &l2t_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (input, "local %llx", &local_cookie))
+ ;
+ else if (unformat (input, "remote %llx", &remote_cookie))
+ ;
+ else
+ break;
+ }
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "unknown interface");
+ if (local_cookie == ~0)
+ return clib_error_return (0, "local cookie required");
+ if (remote_cookie == ~0)
+ return clib_error_return (0, "remote cookie required");
+
+ rv = l2tpv3_set_tunnel_cookies (lm, sw_if_index,
+ local_cookie, remote_cookie);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "invalid interface");
+
+ default:
+ return clib_error_return (0, "l2tp_session_set_cookies returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) =
+{
+ .path = "set l2tpv3 tunnel cookie",
+ .short_help =
+ "set l2tpv3 tunnel cookie <intfc> local <hex> remote <hex>",
+ .function = set_l2tp_tunnel_cookie_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2tpv3_interface_enable_disable (vnet_main_t * vnm,
+ u32 sw_if_index, int enable_disable)
+{
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vnet_feature_enable_disable ("ip6-unicast", "l2tp-decap", sw_if_index,
+ enable_disable, 0, 0);
+ return 0;
+}
+
+/* Enable/disable L2TPv3 intercept on IP6 fowarding path */
+static clib_error_t *
+set_ip6_l2tpv3 (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 sw_if_index = ~0;
+ int is_add = 1;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface required");
+
+ rv = l2tpv3_interface_enable_disable (vnm, sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "invalid interface");
+
+ default:
+ return clib_error_return (0,
+ "l2tp_interface_enable_disable returned %d",
+ rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_l2tpv3, static) =
+{
+ .path = "set interface ip6 l2tpv3",
+ .function = set_ip6_l2tpv3,
+ .short_help = "set interface ip6 l2tpv3 <intfc> [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2tp_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ l2t_main_t *lm = &l2t_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "lookup-v6-src"))
+ lm->lookup_type = L2T_LOOKUP_SRC_ADDRESS;
+ else if (unformat (input, "lookup-v6-dst"))
+ lm->lookup_type = L2T_LOOKUP_DST_ADDRESS;
+ else if (unformat (input, "lookup-session-id"))
+ lm->lookup_type = L2T_LOOKUP_SESSION_ID;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (l2tp_config, "l2tp");
+
+
+clib_error_t *
+l2tp_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ l2t_main_t *lm = &l2t_main;
+ vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hi->hw_class_index != l2tpv3_hw_class.index)
+ return 0;
+
+ u32 session_index = hi->dev_instance;
+ l2t_session_t *s = pool_elt_at_index (lm->sessions, session_index);
+ s->admin_up = ! !(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (l2tp_sw_interface_up_down);
+
+clib_error_t *
+l2tp_init (vlib_main_t * vm)
+{
+ l2t_main_t *lm = &l2t_main;
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+
+ lm->vnet_main = vnet_get_main ();
+ lm->vlib_main = vm;
+ lm->lookup_type = L2T_LOOKUP_DST_ADDRESS;
+
+ lm->session_by_src_address = hash_create_mem
+ (0, sizeof (ip6_address_t) /* key bytes */ ,
+ sizeof (u32) /* value bytes */ );
+ lm->session_by_dst_address = hash_create_mem
+ (0, sizeof (ip6_address_t) /* key bytes */ ,
+ sizeof (u32) /* value bytes */ );
+ lm->session_by_session_id = hash_create (0, sizeof (uword));
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_L2TP);
+ pi->unformat_pg_edit = unformat_pg_l2tp_header;
+
+ /* insure these nodes are included in build */
+ l2tp_encap_init (vm);
+ l2tp_decap_init ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2tp_init);
+
+clib_error_t *
+l2tp_worker_init (vlib_main_t * vm)
+{
+ l2tp_encap_init (vm);
+
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2tp_worker_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/l2tp.h b/src/vnet/l2tp/l2tp.h
new file mode 100644
index 00000000..e7d2892c
--- /dev/null
+++ b/src/vnet/l2tp/l2tp.h
@@ -0,0 +1,147 @@
+/*
+ * l2tp.h : L2TPv3 tunnel support
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_l2tp_h__
+#define __included_l2tp_h__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2tp/packet.h>
+
+typedef struct
+{
+ /* ip6 addresses */
+ ip6_address_t our_address;
+ ip6_address_t client_address;
+
+ /* l2tpv3 header parameters */
+ u64 local_cookie[2];
+ u64 remote_cookie;
+ u32 local_session_id;
+ u32 remote_session_id;
+
+ /* tunnel interface */
+ u32 hw_if_index;
+ u32 sw_if_index;
+
+ /* fib index used for outgoing encapsulated packets */
+ u32 encap_fib_index;
+
+ u8 l2tp_hdr_size;
+ u8 l2_sublayer_present;
+ u8 cookie_flags; /* in host byte order */
+
+ u8 admin_up;
+} l2t_session_t;
+
+typedef enum
+{
+ L2T_LOOKUP_SRC_ADDRESS = 0,
+ L2T_LOOKUP_DST_ADDRESS,
+ L2T_LOOKUP_SESSION_ID,
+} ip6_to_l2_lookup_t;
+
+typedef struct
+{
+ /* session pool */
+ l2t_session_t *sessions;
+
+ /* ip6 -> l2 hash tables. Make up your minds, people... */
+ uword *session_by_src_address;
+ uword *session_by_dst_address;
+ uword *session_by_session_id;
+
+ ip6_to_l2_lookup_t lookup_type;
+
+ /* Counters */
+ vlib_combined_counter_main_t counter_main;
+
+ /* vector of free l2tpv3 tunnel interfaces */
+ u32 *free_l2tpv3_tunnel_hw_if_indices;
+
+ /* show device instance by real device instance */
+ u32 *dev_inst_by_real;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+} l2t_main_t;
+
+/* Packet trace structure */
+typedef struct
+{
+ int is_user_to_network;
+ u32 session_index;
+ ip6_address_t our_address;
+ ip6_address_t client_address;
+} l2t_trace_t;
+
+l2t_main_t l2t_main;
+extern vlib_node_registration_t l2t_encap_node;
+extern vlib_node_registration_t l2t_decap_node;
+extern vlib_node_registration_t l2t_decap_local_node;
+
+enum
+{
+ SESSION_COUNTER_USER_TO_NETWORK = 0,
+ SESSION_COUNTER_NETWORK_TO_USER,
+};
+
+static inline u32
+session_index_to_counter_index (u32 session_index, u32 counter_id)
+{
+ return ((session_index << 1) + counter_id);
+}
+
+u8 *format_l2t_trace (u8 * s, va_list * args);
+
+typedef struct
+{
+ /* Any per-interface config would go here */
+} ip6_l2tpv3_config_t;
+
+uword unformat_pg_l2tp_header (unformat_input_t * input, va_list * args);
+
+void l2tp_encap_init (vlib_main_t * vm);
+void l2tp_decap_init (void);
+int create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
+ ip6_address_t * client_address,
+ ip6_address_t * our_address,
+ u32 local_session_id,
+ u32 remote_session_id,
+ u64 local_cookie,
+ u64 remote_cookie,
+ int l2_sublayer_present,
+ u32 encap_fib_index, u32 * sw_if_index);
+
+int l2tpv3_set_tunnel_cookies (l2t_main_t * lm,
+ u32 sw_if_index,
+ u64 new_local_cookie, u64 new_remote_cookie);
+
+int l2tpv3_interface_enable_disable (vnet_main_t * vnm,
+ u32 sw_if_index, int enable_disable);
+
+#endif /* __included_l2tp_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/l2tp_api.c b/src/vnet/l2tp/l2tp_api.c
new file mode 100644
index 00000000..88d758c9
--- /dev/null
+++ b/src/vnet/l2tp/l2tp_api.c
@@ -0,0 +1,267 @@
+/*
+ *------------------------------------------------------------------
+ * l2tp_api.c - l2tpv3 api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/l2tp/l2tp.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(L2TPV3_CREATE_TUNNEL, l2tpv3_create_tunnel) \
+_(L2TPV3_SET_TUNNEL_COOKIES, l2tpv3_set_tunnel_cookies) \
+_(L2TPV3_INTERFACE_ENABLE_DISABLE, l2tpv3_interface_enable_disable) \
+_(L2TPV3_SET_LOOKUP_KEY, l2tpv3_set_lookup_key) \
+_(SW_IF_L2TPV3_TUNNEL_DUMP, sw_if_l2tpv3_tunnel_dump)
+
+static void
+send_sw_if_l2tpv3_tunnel_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ l2t_session_t * s,
+ l2t_main_t * lm, u32 context)
+{
+ vl_api_sw_if_l2tpv3_tunnel_details_t *mp;
+ u8 *if_name = NULL;
+ vnet_sw_interface_t *si = NULL;
+
+ si = vnet_get_hw_sw_interface (lm->vnet_main, s->hw_if_index);
+
+ if_name = format (if_name, "%U",
+ format_vnet_sw_interface_name, lm->vnet_main, si);
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_IF_L2TPV3_TUNNEL_DETAILS);
+ strncpy ((char *) mp->interface_name,
+ (char *) if_name, ARRAY_LEN (mp->interface_name) - 1);
+ mp->sw_if_index = ntohl (si->sw_if_index);
+ mp->local_session_id = s->local_session_id;
+ mp->remote_session_id = s->remote_session_id;
+ mp->local_cookie[0] = s->local_cookie[0];
+ mp->local_cookie[1] = s->local_cookie[1];
+ mp->remote_cookie = s->remote_cookie;
+ clib_memcpy (mp->client_address, &s->client_address,
+ sizeof (s->client_address));
+ clib_memcpy (mp->our_address, &s->our_address, sizeof (s->our_address));
+ mp->l2_sublayer_present = s->l2_sublayer_present;
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_sw_if_l2tpv3_tunnel_dump_t_handler (vl_api_sw_if_l2tpv3_tunnel_dump_t *
+ mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ l2t_main_t *lm = &l2t_main;
+ unix_shared_memory_queue_t *q;
+ l2t_session_t *session;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (session, lm->sessions,
+ ({
+ send_sw_if_l2tpv3_tunnel_details (am, q, session, lm, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void vl_api_l2tpv3_create_tunnel_t_handler
+ (vl_api_l2tpv3_create_tunnel_t * mp)
+{
+ vl_api_l2tpv3_create_tunnel_reply_t *rmp;
+ l2t_main_t *lm = &l2t_main;
+ u32 sw_if_index = (u32) ~ 0;
+ int rv;
+
+ if (mp->is_ipv6 != 1)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+
+ u32 encap_fib_index;
+
+ if (mp->encap_vrf_id != ~0)
+ {
+ uword *p;
+ ip6_main_t *im = &ip6_main;
+ if (!
+ (p =
+ hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id))))
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ encap_fib_index = p[0];
+ }
+ else
+ {
+ encap_fib_index = ~0;
+ }
+
+ rv = create_l2tpv3_ipv6_tunnel (lm,
+ (ip6_address_t *) mp->client_address,
+ (ip6_address_t *) mp->our_address,
+ ntohl (mp->local_session_id),
+ ntohl (mp->remote_session_id),
+ clib_net_to_host_u64 (mp->local_cookie),
+ clib_net_to_host_u64 (mp->remote_cookie),
+ mp->l2_sublayer_present,
+ encap_fib_index, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_L2TPV3_CREATE_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void vl_api_l2tpv3_set_tunnel_cookies_t_handler
+ (vl_api_l2tpv3_set_tunnel_cookies_t * mp)
+{
+ vl_api_l2tpv3_set_tunnel_cookies_reply_t *rmp;
+ l2t_main_t *lm = &l2t_main;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = l2tpv3_set_tunnel_cookies (lm, ntohl (mp->sw_if_index),
+ clib_net_to_host_u64 (mp->new_local_cookie),
+ clib_net_to_host_u64
+ (mp->new_remote_cookie));
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2TPV3_SET_TUNNEL_COOKIES_REPLY);
+}
+
+static void vl_api_l2tpv3_interface_enable_disable_t_handler
+ (vl_api_l2tpv3_interface_enable_disable_t * mp)
+{
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_l2tpv3_interface_enable_disable_reply_t *rmp;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = l2tpv3_interface_enable_disable
+ (vnm, ntohl (mp->sw_if_index), mp->enable_disable);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2TPV3_INTERFACE_ENABLE_DISABLE_REPLY);
+}
+
+static void vl_api_l2tpv3_set_lookup_key_t_handler
+ (vl_api_l2tpv3_set_lookup_key_t * mp)
+{
+ int rv = 0;
+ l2t_main_t *lm = &l2t_main;
+ vl_api_l2tpv3_set_lookup_key_reply_t *rmp;
+
+ if (mp->key > L2T_LOOKUP_SESSION_ID)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ lm->lookup_type = mp->key;
+
+out:
+ REPLY_MACRO (VL_API_L2TPV3_SET_LOOKUP_KEY_REPLY);
+}
+
+/*
+ * l2tp_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_l2tp;
+#undef _
+}
+
+static clib_error_t *
+l2tp_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (l2tp_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/packet.h b/src/vnet/l2tp/packet.h
new file mode 100644
index 00000000..66dfea21
--- /dev/null
+++ b/src/vnet/l2tp/packet.h
@@ -0,0 +1,44 @@
+/*
+ * packet.h : L2TPv3 packet header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_l2tp_packet_h__
+#define __included_l2tp_packet_h__
+
+/*
+ * See RFC4719 for packet format.
+ * Note: the l2_specific_sublayer is present in current Linux l2tpv3
+ * tunnels. It is not present in IOS XR l2tpv3 tunnels.
+ * The Linux implementation is almost certainly wrong.
+ */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+{
+ u32 session_id;
+ u64 cookie; u32
+ l2_specific_sublayer; /* set to 0 (if present) */
+}) l2tpv3_header_t;
+/* *INDENT-ON* */
+
+#endif /* __included_l2tp_packet_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2tp/pg.c b/src/vnet/l2tp/pg.c
new file mode 100644
index 00000000..1e523d3b
--- /dev/null
+++ b/src/vnet/l2tp/pg.c
@@ -0,0 +1,106 @@
+/*
+ * pg.c: packet generator for L2TPv3 header
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/l2tp/l2tp.h>
+
+typedef struct
+{
+ pg_edit_t session_id;
+ pg_edit_t cookie;
+} pg_l2tp_header_t;
+
+typedef struct
+{
+ pg_edit_t l2_sublayer;
+} pg_l2tp_header_l2_sublayer_t;
+
+static inline void
+pg_l2tp_header_init (pg_l2tp_header_t * e)
+{
+ pg_edit_init (&e->session_id, l2tpv3_header_t, session_id);
+ pg_edit_init (&e->cookie, l2tpv3_header_t, cookie);
+}
+
+uword
+unformat_pg_l2tp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_l2tp_header_t *h;
+ u32 group_index, error;
+ vlib_main_t *vm = vlib_get_main ();
+
+ h = pg_create_edit_group (s, sizeof (h[0]),
+ sizeof (l2tpv3_header_t) - sizeof (u32),
+ &group_index);
+ pg_l2tp_header_init (h);
+
+ error = 1;
+
+ /* session id and cookie are required */
+ if (!unformat (input, "L2TP: session_id %U cookie %U",
+ unformat_pg_edit, unformat_pg_number, &h->session_id,
+ unformat_pg_edit, unformat_pg_number, &h->cookie))
+ {
+ goto done;
+ }
+
+ /* "l2_sublayer <value>" is optional */
+ if (unformat (input, "l2_sublayer"))
+ {
+ pg_l2tp_header_l2_sublayer_t *h2;
+
+ h2 = pg_add_edits (s, sizeof (h2[0]), sizeof (u32), group_index);
+ pg_edit_init (&h2->l2_sublayer, l2tpv3_header_t, l2_specific_sublayer);
+ if (!unformat_user (input, unformat_pg_edit,
+ unformat_pg_number, &h2->l2_sublayer))
+ {
+ goto done;
+ }
+ }
+
+ /* Parse an ethernet header if it is present */
+ {
+ pg_node_t *pg_node = 0;
+ vlib_node_t *eth_lookup_node;
+
+ eth_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
+ ASSERT (eth_lookup_node);
+
+ pg_node = pg_get_node (eth_lookup_node->index);
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ }
+
+ error = 0;
+
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l3_types.h b/src/vnet/l3_types.h
new file mode 100644
index 00000000..28b0891e
--- /dev/null
+++ b/src/vnet/l3_types.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * l3_types.h: layer 3 packet types
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_l3_types_h
+#define included_vnet_l3_types_h
+
+/* Inherit generic L3 packet types from ethernet. */
+typedef enum
+{
+#define ethernet_type(n,f) VNET_L3_PACKET_TYPE_##f,
+#include <vnet/ethernet/types.def>
+#undef ethernet_type
+} vnet_l3_packet_type_t;
+
+#endif /* included_vnet_l3_types_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lawful-intercept/lawful_intercept.c b/src/vnet/lawful-intercept/lawful_intercept.c
new file mode 100644
index 00000000..ef07a339
--- /dev/null
+++ b/src/vnet/lawful-intercept/lawful_intercept.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lawful-intercept/lawful_intercept.h>
+
+static clib_error_t *
+set_li_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ li_main_t * lm = &li_main;
+ ip4_address_t collector;
+ u8 collector_set = 0;
+ ip4_address_t src;
+ u8 src_set = 0;
+ u32 tmp;
+ u16 udp_port = 0;
+ u8 is_add = 1;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ collector_set = 1;
+ if (unformat (input, "src %U", unformat_ip4_address, &src))
+ src_set = 1;
+ else if (unformat (input, "udp-port %d", &tmp))
+ udp_port = tmp;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (collector_set == 0)
+ return clib_error_return (0, "collector must be set...");
+ if (src_set == 0)
+ return clib_error_return (0, "src must be set...");
+ if (udp_port == 0)
+ return clib_error_return (0, "udp-port must be set...");
+
+ if (is_add == 1)
+ {
+ for (i = 0; i < vec_len (lm->collectors); i++)
+ {
+ if (lm->collectors[i].as_u32 == collector.as_u32)
+ {
+ if (lm->ports[i] == udp_port)
+ return clib_error_return
+ (0, "collector %U:%d already configured",
+ &collector, udp_port);
+ else
+ return clib_error_return
+ (0, "collector %U already configured with port %d",
+ &collector, (int)(lm->ports[i]));
+ }
+ }
+ vec_add1 (lm->collectors, collector);
+ vec_add1 (lm->ports, udp_port);
+ vec_add1 (lm->src_addrs, src);
+ return 0;
+ }
+ else
+ {
+ for (i = 0; i < vec_len (lm->collectors); i++)
+ {
+ if ((lm->collectors[i].as_u32 == collector.as_u32)
+ && lm->ports[i] == udp_port)
+ {
+ vec_delete (lm->collectors, 1, i);
+ vec_delete (lm->ports, 1, i);
+ vec_delete (lm->src_addrs, 1, i);
+ return 0;
+ }
+ }
+ return clib_error_return (0, "collector %U:%d not configured",
+ &collector, udp_port);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_li_command, static) = {
+ .path = "set li",
+ .short_help =
+ "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>",
+ .function = set_li_command_fn,
+};
+
+static clib_error_t *
+li_init (vlib_main_t * vm)
+{
+ li_main_t * lm = &li_main;
+
+ lm->vlib_main = vm;
+ lm->vnet_main = vnet_get_main();
+ lm->hit_node_index = li_hit_node.index;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(li_init);
+
diff --git a/src/vnet/lawful-intercept/lawful_intercept.h b/src/vnet/lawful-intercept/lawful_intercept.h
new file mode 100644
index 00000000..89e699f5
--- /dev/null
+++ b/src/vnet/lawful-intercept/lawful_intercept.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __lawful_intercept_h__
+#define __lawful_intercept_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+typedef struct {
+ /* LI collector info */
+ ip4_address_t * src_addrs;
+ ip4_address_t * collectors;
+ u16 * ports;
+
+ /* Hit node index */
+ u32 hit_node_index;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} li_main_t;
+
+li_main_t li_main;
+
+typedef CLIB_PACKED(struct {
+ ip4_header_t ip4;
+ udp_header_t udp;
+}) ip4_udp_header_t;
+
+extern vlib_node_registration_t li_hit_node;
+
+#endif /* __lawful_intercept_h__ */
diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c
new file mode 100644
index 00000000..50c76ec5
--- /dev/null
+++ b/src/vnet/lawful-intercept/node.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+
+#include <vnet/lawful-intercept/lawful_intercept.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+vlib_node_registration_t li_hit_node;
+
+typedef struct {
+ u32 next_index;
+} li_hit_trace_t;
+
+/* packet trace format function */
+static u8 * format_li_hit_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ li_hit_trace_t * t = va_arg (*args, li_hit_trace_t *);
+
+ s = format (s, "LI_HIT: next index %d", t->next_index);
+
+ return s;
+}
+
+vlib_node_registration_t li_hit_node;
+
+#define foreach_li_hit_error \
+_(HITS, "LI packets processed") \
+_(NO_COLLECTOR, "No collector configured") \
+_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure")
+
+typedef enum {
+#define _(sym,str) LI_HIT_ERROR_##sym,
+ foreach_li_hit_error
+#undef _
+ LI_HIT_N_ERROR,
+} li_hit_error_t;
+
+static char * li_hit_error_strings[] = {
+#define _(sym,string) string,
+ foreach_li_hit_error
+#undef _
+};
+
+typedef enum {
+ LI_HIT_NEXT_ETHERNET,
+ LI_HIT_N_NEXT,
+} li_hit_next_t;
+
+static uword
+li_hit_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ li_hit_next_t next_index;
+ vlib_frame_t * int_frame = 0;
+ u32 * to_int_next = 0;
+ li_main_t * lm = &li_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (PREDICT_FALSE (vec_len (lm->collectors) == 0))
+ {
+ vlib_node_increment_counter (vm, li_hit_node.index,
+ LI_HIT_ERROR_NO_COLLECTOR,
+ n_left_from);
+ }
+ else
+ {
+ /* The intercept frame... */
+ int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ to_int_next = vlib_frame_vector_args (int_frame);
+ }
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ en0 = vlib_buffer_get_current (b0);
+ en1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* Send pkt back out the RX interface */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = sw_if_index1;
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ li_hit_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ li_hit_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif /* $$$ dual-loop off */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ vlib_buffer_t * c0;
+ ip4_udp_header_t * iu0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ u32 next0 = LI_HIT_NEXT_ETHERNET;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ if (PREDICT_TRUE(to_int_next != 0))
+ {
+ /* Make an intercept copy. This can fail. */
+ c0 = vlib_buffer_copy (vm, b0);
+
+ if (PREDICT_FALSE (c0 == 0))
+ {
+ vlib_node_increment_counter
+ (vm, node->node_index,
+ LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
+ goto skip;
+ }
+
+ vlib_buffer_advance(c0, -sizeof(*iu0));
+
+ iu0 = vlib_buffer_get_current(c0);
+ ip0 = &iu0->ip4;
+
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ ip0->src_address.as_u32 = lm->src_addrs[0].as_u32;
+ ip0->dst_address.as_u32 = lm->collectors[0].as_u32;
+ ip0->length = vlib_buffer_length_in_chain (vm, c0);
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ udp0 = &iu0->udp;
+ udp0->src_port = udp0->dst_port =
+ clib_host_to_net_u16(lm->ports[0]);
+ udp0->checksum = 0;
+ udp0->length =
+ clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm , b0));
+
+ to_int_next [0] = vlib_get_buffer_index (vm, c0);
+ to_int_next++;
+ }
+
+ skip:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ li_hit_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (int_frame)
+ {
+ int_frame->n_vectors = frame->n_vectors;
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame);
+ }
+
+ vlib_node_increment_counter (vm, li_hit_node.index,
+ LI_HIT_ERROR_HITS, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (li_hit_node) = {
+ .function = li_hit_node_fn,
+ .name = "li-hit",
+ .vector_size = sizeof (u32),
+ .format_trace = format_li_hit_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(li_hit_error_strings),
+ .error_strings = li_hit_error_strings,
+
+ .n_next_nodes = LI_HIT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (li_hit_node, li_hit_node_fn)
+
diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c
new file mode 100644
index 00000000..fe893606
--- /dev/null
+++ b/src/vnet/lisp-cp/control.c
@@ -0,0 +1,4756 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlibmemory/api.h>
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-cp/packets.h>
+#include <vnet/lisp-cp/lisp_msg_serdes.h>
+#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/ethernet/packet.h>
+
+#include <openssl/evp.h>
+#include <openssl/hmac.h>
+
+#define MAX_VALUE_U24 0xffffff
+
+/* mapping timer control constants (in seconds) */
+#define TIME_UNTIL_REFETCH_OR_DELETE 20
+#define MAPPING_TIMEOUT (((m->ttl) * 60) - TIME_UNTIL_REFETCH_OR_DELETE)
+
+lisp_cp_main_t lisp_control_main;
+
+u8 *format_lisp_cp_input_trace (u8 * s, va_list * args);
+static void *send_map_request_thread_fn (void *arg);
+
+typedef enum
+{
+ LISP_CP_INPUT_NEXT_DROP,
+ LISP_CP_INPUT_N_NEXT,
+} lisp_cp_input_next_t;
+
+typedef struct
+{
+ u8 is_resend;
+ gid_address_t seid;
+ gid_address_t deid;
+ u8 smr_invoked;
+} map_request_args_t;
+
+u8
+vnet_lisp_get_map_request_mode (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->map_request_mode;
+}
+
+static u16
+auth_data_len_by_key_id (lisp_key_type_t key_id)
+{
+ switch (key_id)
+ {
+ case HMAC_SHA_1_96:
+ return SHA1_AUTH_DATA_LEN;
+ case HMAC_SHA_256_128:
+ return SHA256_AUTH_DATA_LEN;
+ default:
+ clib_warning ("unsupported key type: %d!", key_id);
+ return (u16) ~ 0;
+ }
+ return (u16) ~ 0;
+}
+
+static const EVP_MD *
+get_encrypt_fcn (lisp_key_type_t key_id)
+{
+ switch (key_id)
+ {
+ case HMAC_SHA_1_96:
+ return EVP_sha1 ();
+ case HMAC_SHA_256_128:
+ return EVP_sha256 ();
+ default:
+ clib_warning ("unsupported encryption key type: %d!", key_id);
+ break;
+ }
+ return 0;
+}
+
+static int
+queue_map_request (gid_address_t * seid, gid_address_t * deid,
+ u8 smr_invoked, u8 is_resend);
+
+ip_interface_address_t *
+ip_interface_get_first_interface_address (ip_lookup_main_t * lm,
+ u32 sw_if_index, u8 loop)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *swif = vnet_get_sw_interface (vnm, sw_if_index);
+ if (loop && swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+ sw_if_index = swif->unnumbered_sw_if_index;
+ u32 ia =
+ (vec_len ((lm)->if_address_pool_index_by_sw_if_index) > (sw_if_index)) ?
+ vec_elt ((lm)->if_address_pool_index_by_sw_if_index, (sw_if_index)) :
+ (u32) ~ 0;
+ return pool_elt_at_index ((lm)->if_address_pool, ia);
+}
+
+void *
+ip_interface_get_first_address (ip_lookup_main_t * lm, u32 sw_if_index,
+ u8 version)
+{
+ ip_interface_address_t *ia;
+
+ ia = ip_interface_get_first_interface_address (lm, sw_if_index, 1);
+ if (!ia)
+ return 0;
+ return ip_interface_address_get_address (lm, ia);
+}
+
+int
+ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index,
+ u8 version, ip_address_t * result)
+{
+ ip_lookup_main_t *lm;
+ void *addr;
+
+ lm = (version == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main;
+ addr = ip_interface_get_first_address (lm, sw_if_index, version);
+ if (!addr)
+ return 0;
+
+ ip_address_set (result, addr, version);
+ return 1;
+}
+
+/**
+ * convert from a LISP address to a FIB prefix
+ */
+void
+ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
+{
+ if (addr->version == IP4)
+ {
+ prefix->fp_len = 32;
+ prefix->fp_proto = FIB_PROTOCOL_IP4;
+ memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad));
+ memcpy (&prefix->fp_addr.ip4, &addr->ip, sizeof (prefix->fp_addr.ip4));
+ }
+ else
+ {
+ prefix->fp_len = 128;
+ prefix->fp_proto = FIB_PROTOCOL_IP6;
+ memcpy (&prefix->fp_addr.ip6, &addr->ip, sizeof (prefix->fp_addr.ip6));
+ }
+}
+
+/**
+ * convert from a LISP to a FIB prefix
+ */
+void
+ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix,
+ fib_prefix_t * fib_prefix)
+{
+ ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix);
+ fib_prefix->fp_len = ip_prefix->len;
+}
+
+/**
+ * Find the sw_if_index of the interface that would be used to egress towards
+ * dst.
+ */
+u32
+ip_fib_get_egress_iface_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst)
+{
+ fib_node_index_t fei;
+ fib_prefix_t prefix;
+
+ ip_address_to_fib_prefix (dst, &prefix);
+
+ fei = fib_table_lookup (0, &prefix);
+
+ return (fib_entry_get_resolving_interface (fei));
+}
+
+/**
+ * Find first IP of the interface that would be used to egress towards dst.
+ * Returns 1 if the address is found 0 otherwise.
+ */
+int
+ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst,
+ ip_address_t * result)
+{
+ u32 si;
+ ip_lookup_main_t *lm;
+ void *addr = 0;
+ u8 ipver;
+
+ ASSERT (result != 0);
+
+ ipver = ip_addr_version (dst);
+
+ lm = (ipver == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main;
+ si = ip_fib_get_egress_iface_for_dst (lcm, dst);
+
+ if ((u32) ~ 0 == si)
+ return 0;
+
+ /* find the first ip address */
+ addr = ip_interface_get_first_address (lm, si, ipver);
+ if (0 == addr)
+ return 0;
+
+ ip_address_set (result, addr, ipver);
+ return 1;
+}
+
+static int
+dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_l2, u8 is_add)
+{
+ uword *dp_table;
+
+ if (!is_l2)
+ {
+ dp_table = hash_get (lcm->table_id_by_vni, vni);
+
+ if (!dp_table)
+ {
+ clib_warning ("vni %d not associated to a vrf!", vni);
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ }
+ else
+ {
+ dp_table = hash_get (lcm->bd_id_by_vni, vni);
+ if (!dp_table)
+ {
+ clib_warning ("vni %d not associated to a bridge domain!", vni);
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ }
+
+ /* enable/disable data-plane interface */
+ if (is_add)
+ {
+ if (is_l2)
+ lisp_gpe_tenant_l2_iface_add_or_lock (vni, dp_table[0]);
+ else
+ lisp_gpe_tenant_l3_iface_add_or_lock (vni, dp_table[0]);
+ }
+ else
+ {
+ if (is_l2)
+ lisp_gpe_tenant_l2_iface_unlock (vni);
+ else
+ lisp_gpe_tenant_l3_iface_unlock (vni);
+ }
+
+ return 0;
+}
+
+static void
+dp_del_fwd_entry (lisp_cp_main_t * lcm, u32 dst_map_index)
+{
+ vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a;
+ fwd_entry_t *fe = 0;
+ uword *feip = 0;
+ memset (a, 0, sizeof (*a));
+
+ feip = hash_get (lcm->fwd_entry_by_mapping_index, dst_map_index);
+ if (!feip)
+ return;
+
+ fe = pool_elt_at_index (lcm->fwd_entry_pool, feip[0]);
+
+ /* delete dp fwd entry */
+ u32 sw_if_index;
+ a->is_add = 0;
+ a->locator_pairs = fe->locator_pairs;
+ a->vni = gid_address_vni (&fe->reid);
+ gid_address_copy (&a->rmt_eid, &fe->reid);
+ if (fe->is_src_dst)
+ gid_address_copy (&a->lcl_eid, &fe->leid);
+
+ vnet_lisp_gpe_del_fwd_counters (a, feip[0]);
+ vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index);
+
+ /* delete entry in fwd table */
+ hash_unset (lcm->fwd_entry_by_mapping_index, dst_map_index);
+ vec_free (fe->locator_pairs);
+ pool_put (lcm->fwd_entry_pool, fe);
+}
+
+/**
+ * Finds first remote locator with best (lowest) priority that has a local
+ * peer locator with an underlying route to it.
+ *
+ */
+static u32
+get_locator_pairs (lisp_cp_main_t * lcm, mapping_t * lcl_map,
+ mapping_t * rmt_map, locator_pair_t ** locator_pairs)
+{
+ u32 i, limitp = 0, li, found = 0, esi;
+ locator_set_t *rmt_ls, *lcl_ls;
+ ip_address_t _lcl_addr, *lcl_addr = &_lcl_addr;
+ locator_t *lp, *rmt = 0;
+ uword *checked = 0;
+ locator_pair_t pair;
+
+ rmt_ls =
+ pool_elt_at_index (lcm->locator_set_pool, rmt_map->locator_set_index);
+ lcl_ls =
+ pool_elt_at_index (lcm->locator_set_pool, lcl_map->locator_set_index);
+
+ if (!rmt_ls || vec_len (rmt_ls->locator_indices) == 0)
+ return 0;
+
+ while (1)
+ {
+ rmt = 0;
+
+ /* find unvisited remote locator with best priority */
+ for (i = 0; i < vec_len (rmt_ls->locator_indices); i++)
+ {
+ if (0 != hash_get (checked, i))
+ continue;
+
+ li = vec_elt (rmt_ls->locator_indices, i);
+ lp = pool_elt_at_index (lcm->locator_pool, li);
+
+ /* we don't support non-IP locators for now */
+ if (gid_address_type (&lp->address) != GID_ADDR_IP_PREFIX)
+ continue;
+
+ if ((found && lp->priority == limitp)
+ || (!found && lp->priority >= limitp))
+ {
+ rmt = lp;
+
+ /* don't search for locators with lower priority and don't
+ * check this locator again*/
+ limitp = lp->priority;
+ hash_set (checked, i, 1);
+ break;
+ }
+ }
+ /* check if a local locator with a route to remote locator exists */
+ if (rmt != 0)
+ {
+ /* find egress sw_if_index for rmt locator */
+ esi =
+ ip_fib_get_egress_iface_for_dst (lcm,
+ &gid_address_ip (&rmt->address));
+ if ((u32) ~ 0 == esi)
+ continue;
+
+ for (i = 0; i < vec_len (lcl_ls->locator_indices); i++)
+ {
+ li = vec_elt (lcl_ls->locator_indices, i);
+ locator_t *sl = pool_elt_at_index (lcm->locator_pool, li);
+
+ /* found local locator with the needed sw_if_index */
+ if (sl->sw_if_index == esi)
+ {
+ /* and it has an address */
+ if (0 == ip_interface_get_first_ip_address (lcm,
+ sl->sw_if_index,
+ gid_address_ip_version
+ (&rmt->address),
+ lcl_addr))
+ continue;
+
+ memset (&pair, 0, sizeof (pair));
+ ip_address_copy (&pair.rmt_loc,
+ &gid_address_ip (&rmt->address));
+ ip_address_copy (&pair.lcl_loc, lcl_addr);
+ pair.weight = rmt->weight;
+ pair.priority = rmt->priority;
+ vec_add1 (locator_pairs[0], pair);
+ found = 1;
+ }
+ }
+ }
+ else
+ break;
+ }
+
+ hash_free (checked);
+ return found;
+}
+
+static void
+gid_address_sd_to_flat (gid_address_t * dst, gid_address_t * src,
+ fid_address_t * fid)
+{
+ ASSERT (GID_ADDR_SRC_DST == gid_address_type (src));
+
+ dst[0] = src[0];
+
+ switch (fid_addr_type (fid))
+ {
+ case FID_ADDR_IP_PREF:
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ippref (dst) = fid_addr_ippref (fid);
+ break;
+ case FID_ADDR_MAC:
+ gid_address_type (dst) = GID_ADDR_MAC;
+ mac_copy (gid_address_mac (dst), fid_addr_mac (fid));
+ break;
+ default:
+ clib_warning ("Unsupported fid type %d!", fid_addr_type (fid));
+ break;
+ }
+}
+
+u8
+vnet_lisp_map_register_state_get (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->map_registering;
+}
+
+u8
+vnet_lisp_rloc_probe_state_get (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->rloc_probing;
+}
+
+static void
+dp_add_fwd_entry (lisp_cp_main_t * lcm, u32 src_map_index, u32 dst_map_index)
+{
+ vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a;
+ gid_address_t *rmt_eid, *lcl_eid;
+ mapping_t *lcl_map, *rmt_map;
+ u32 sw_if_index, **rmts, rmts_idx;
+ uword *feip = 0, *dpid, *rmts_stored_idxp = 0;
+ fwd_entry_t *fe;
+ u8 type, is_src_dst = 0;
+ int rv;
+
+ memset (a, 0, sizeof (*a));
+
+ /* remove entry if it already exists */
+ feip = hash_get (lcm->fwd_entry_by_mapping_index, dst_map_index);
+ if (feip)
+ dp_del_fwd_entry (lcm, dst_map_index);
+
+ /*
+ * Determine local mapping and eid
+ */
+ if (lcm->lisp_pitr)
+ lcl_map = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ else
+ lcl_map = pool_elt_at_index (lcm->mapping_pool, src_map_index);
+ lcl_eid = &lcl_map->eid;
+
+ /*
+ * Determine remote mapping and eid
+ */
+ rmt_map = pool_elt_at_index (lcm->mapping_pool, dst_map_index);
+ rmt_eid = &rmt_map->eid;
+
+ /*
+ * Build and insert data plane forwarding entry
+ */
+ a->is_add = 1;
+
+ if (MR_MODE_SRC_DST == lcm->map_request_mode)
+ {
+ if (GID_ADDR_SRC_DST == gid_address_type (rmt_eid))
+ {
+ gid_address_sd_to_flat (&a->rmt_eid, rmt_eid,
+ &gid_address_sd_dst (rmt_eid));
+ gid_address_sd_to_flat (&a->lcl_eid, rmt_eid,
+ &gid_address_sd_src (rmt_eid));
+ }
+ else
+ {
+ gid_address_copy (&a->rmt_eid, rmt_eid);
+ gid_address_copy (&a->lcl_eid, lcl_eid);
+ }
+ is_src_dst = 1;
+ }
+ else
+ gid_address_copy (&a->rmt_eid, rmt_eid);
+
+ a->vni = gid_address_vni (&a->rmt_eid);
+ a->is_src_dst = is_src_dst;
+
+ /* get vrf or bd_index associated to vni */
+ type = gid_address_type (&a->rmt_eid);
+ if (GID_ADDR_IP_PREFIX == type)
+ {
+ dpid = hash_get (lcm->table_id_by_vni, a->vni);
+ if (!dpid)
+ {
+ clib_warning ("vni %d not associated to a vrf!", a->vni);
+ return;
+ }
+ a->table_id = dpid[0];
+ }
+ else if (GID_ADDR_MAC == type)
+ {
+ dpid = hash_get (lcm->bd_id_by_vni, a->vni);
+ if (!dpid)
+ {
+ clib_warning ("vni %d not associated to a bridge domain !", a->vni);
+ return;
+ }
+ a->bd_id = dpid[0];
+ }
+
+ /* find best locator pair that 1) verifies LISP policy 2) are connected */
+ rv = get_locator_pairs (lcm, lcl_map, rmt_map, &a->locator_pairs);
+
+ /* Either rmt mapping is negative or we can't find underlay path.
+ * Try again with petr if configured */
+ if (rv == 0 && (lcm->flags & LISP_FLAG_USE_PETR))
+ {
+ rmt_map = lisp_get_petr_mapping (lcm);
+ rv = get_locator_pairs (lcm, lcl_map, rmt_map, &a->locator_pairs);
+ }
+
+ /* negative entry */
+ if (rv == 0)
+ {
+ a->is_negative = 1;
+ a->action = rmt_map->action;
+ }
+
+ rv = vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index);
+ if (rv)
+ {
+ if (a->locator_pairs)
+ vec_free (a->locator_pairs);
+ return;
+ }
+
+ /* add tunnel to fwd entry table */
+ pool_get (lcm->fwd_entry_pool, fe);
+ vnet_lisp_gpe_add_fwd_counters (a, fe - lcm->fwd_entry_pool);
+
+ fe->locator_pairs = a->locator_pairs;
+ gid_address_copy (&fe->reid, &a->rmt_eid);
+
+ if (is_src_dst)
+ gid_address_copy (&fe->leid, &a->lcl_eid);
+ else
+ gid_address_copy (&fe->leid, lcl_eid);
+
+ fe->is_src_dst = is_src_dst;
+ hash_set (lcm->fwd_entry_by_mapping_index, dst_map_index,
+ fe - lcm->fwd_entry_pool);
+
+ /* Add rmt mapping to the vector of adjacent mappings to lcl mapping */
+ rmts_stored_idxp =
+ hash_get (lcm->lcl_to_rmt_adjs_by_lcl_idx, src_map_index);
+ if (!rmts_stored_idxp)
+ {
+ pool_get (lcm->lcl_to_rmt_adjacencies, rmts);
+ memset (rmts, 0, sizeof (*rmts));
+ rmts_idx = rmts - lcm->lcl_to_rmt_adjacencies;
+ hash_set (lcm->lcl_to_rmt_adjs_by_lcl_idx, src_map_index, rmts_idx);
+ }
+ else
+ {
+ rmts_idx = (u32) (*rmts_stored_idxp);
+ rmts = pool_elt_at_index (lcm->lcl_to_rmt_adjacencies, rmts_idx);
+ }
+ vec_add1 (rmts[0], dst_map_index);
+}
+
+typedef struct
+{
+ u32 si;
+ u32 di;
+} fwd_entry_mt_arg_t;
+
+static void *
+dp_add_fwd_entry_thread_fn (void *arg)
+{
+ fwd_entry_mt_arg_t *a = arg;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ dp_add_fwd_entry (lcm, a->si, a->di);
+ return 0;
+}
+
+static int
+dp_add_fwd_entry_from_mt (u32 si, u32 di)
+{
+ fwd_entry_mt_arg_t a;
+
+ memset (&a, 0, sizeof (a));
+ a.si = si;
+ a.di = di;
+
+ vl_api_rpc_call_main_thread (dp_add_fwd_entry_thread_fn,
+ (u8 *) & a, sizeof (a));
+ return 0;
+}
+
+/**
+ * Returns vector of adjacencies.
+ *
+ * The caller must free the vector returned by this function.
+ *
+ * @param vni virtual network identifier
+ * @return vector of adjacencies
+ */
+lisp_adjacency_t *
+vnet_lisp_adjacencies_get_by_vni (u32 vni)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ fwd_entry_t *fwd;
+ lisp_adjacency_t *adjs = 0, adj;
+
+ /* *INDENT-OFF* */
+ pool_foreach(fwd, lcm->fwd_entry_pool,
+ ({
+ if (gid_address_vni (&fwd->reid) != vni)
+ continue;
+
+ gid_address_copy (&adj.reid, &fwd->reid);
+ gid_address_copy (&adj.leid, &fwd->leid);
+ vec_add1 (adjs, adj);
+ }));
+ /* *INDENT-ON* */
+
+ return adjs;
+}
+
+static lisp_msmr_t *
+get_map_server (ip_address_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *m;
+
+ vec_foreach (m, lcm->map_servers)
+ {
+ if (!ip_address_cmp (&m->address, a))
+ {
+ return m;
+ }
+ }
+ return 0;
+}
+
+static lisp_msmr_t *
+get_map_resolver (ip_address_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *m;
+
+ vec_foreach (m, lcm->map_resolvers)
+ {
+ if (!ip_address_cmp (&m->address, a))
+ {
+ return m;
+ }
+ }
+ return 0;
+}
+
+int
+vnet_lisp_add_del_map_server (ip_address_t * addr, u8 is_add)
+{
+ u32 i;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t _ms, *ms = &_ms;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (is_add)
+ {
+ if (get_map_server (addr))
+ {
+ clib_warning ("map-server %U already exists!", format_ip_address,
+ addr);
+ return -1;
+ }
+
+ memset (ms, 0, sizeof (*ms));
+ ip_address_copy (&ms->address, addr);
+ vec_add1 (lcm->map_servers, ms[0]);
+
+ if (vec_len (lcm->map_servers) == 1)
+ lcm->do_map_server_election = 1;
+ }
+ else
+ {
+ for (i = 0; i < vec_len (lcm->map_servers); i++)
+ {
+ ms = vec_elt_at_index (lcm->map_servers, i);
+ if (!ip_address_cmp (&ms->address, addr))
+ {
+ if (!ip_address_cmp (&ms->address, &lcm->active_map_server))
+ lcm->do_map_server_election = 1;
+
+ vec_del1 (lcm->map_servers, i);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Add/remove mapping to/from map-cache. Overwriting not allowed.
+ */
+int
+vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
+ u32 * map_index_result)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 mi, *map_indexp, map_index, i;
+ u32 **rmts = 0, *remote_idxp, rmts_itr, remote_idx;
+ uword *rmts_idxp;
+ mapping_t *m, *old_map;
+ u32 **eid_indexes;
+
+ if (gid_address_type (&a->eid) == GID_ADDR_NSH)
+ {
+ if (gid_address_vni (&a->eid) != 0)
+ {
+ clib_warning ("Supported only default VNI for NSH!");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ if (gid_address_nsh_spi (&a->eid) > MAX_VALUE_U24)
+ {
+ clib_warning ("SPI is greater than 24bit!");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ }
+
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &a->eid);
+ old_map = mi != ~0 ? pool_elt_at_index (lcm->mapping_pool, mi) : 0;
+ if (a->is_add)
+ {
+ /* TODO check if overwriting and take appropriate actions */
+ if (mi != GID_LOOKUP_MISS && !gid_address_cmp (&old_map->eid, &a->eid))
+ {
+ clib_warning ("eid %U found in the eid-table", format_gid_address,
+ &a->eid);
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ pool_get (lcm->mapping_pool, m);
+ gid_address_copy (&m->eid, &a->eid);
+ m->locator_set_index = a->locator_set_index;
+ m->ttl = a->ttl;
+ m->action = a->action;
+ m->local = a->local;
+ m->is_static = a->is_static;
+ m->key = vec_dup (a->key);
+ m->key_id = a->key_id;
+
+ map_index = m - lcm->mapping_pool;
+ gid_dictionary_add_del (&lcm->mapping_index_by_gid, &a->eid, map_index,
+ 1);
+
+ if (pool_is_free_index (lcm->locator_set_pool, a->locator_set_index))
+ {
+ clib_warning ("Locator set with index %d doesn't exist",
+ a->locator_set_index);
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* add eid to list of eids supported by locator-set */
+ vec_validate (lcm->locator_set_to_eids, a->locator_set_index);
+ eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids,
+ a->locator_set_index);
+ vec_add1 (eid_indexes[0], map_index);
+
+ if (a->local)
+ {
+ /* mark as local */
+ vec_add1 (lcm->local_mappings_indexes, map_index);
+ }
+ map_index_result[0] = map_index;
+ }
+ else
+ {
+ if (mi == GID_LOOKUP_MISS)
+ {
+ clib_warning ("eid %U not found in the eid-table",
+ format_gid_address, &a->eid);
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* clear locator-set to eids binding */
+ eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids,
+ a->locator_set_index);
+ for (i = 0; i < vec_len (eid_indexes[0]); i++)
+ {
+ map_indexp = vec_elt_at_index (eid_indexes[0], i);
+ if (map_indexp[0] == mi)
+ break;
+ }
+ vec_del1 (eid_indexes[0], i);
+
+ /* remove local mark if needed */
+ m = pool_elt_at_index (lcm->mapping_pool, mi);
+ if (m->local)
+ {
+ /* Remove adjacencies associated with the local mapping */
+ rmts_idxp = hash_get (lcm->lcl_to_rmt_adjs_by_lcl_idx, mi);
+ if (rmts_idxp)
+ {
+ rmts =
+ pool_elt_at_index (lcm->lcl_to_rmt_adjacencies, rmts_idxp[0]);
+ vec_foreach (remote_idxp, rmts[0])
+ {
+ dp_del_fwd_entry (lcm, remote_idxp[0]);
+ }
+ vec_free (rmts[0]);
+ pool_put (lcm->lcl_to_rmt_adjacencies, rmts);
+ hash_unset (lcm->lcl_to_rmt_adjs_by_lcl_idx, mi);
+ }
+
+ u32 k, *lm_indexp;
+ for (k = 0; k < vec_len (lcm->local_mappings_indexes); k++)
+ {
+ lm_indexp = vec_elt_at_index (lcm->local_mappings_indexes, k);
+ if (lm_indexp[0] == mi)
+ break;
+ }
+ vec_del1 (lcm->local_mappings_indexes, k);
+ }
+ else
+ {
+ /* Remove remote (if present) from the vectors of lcl-to-rmts
+ * TODO: Address this in a more efficient way.
+ */
+ /* *INDENT-OFF* */
+ pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies,
+ ({
+ vec_foreach_index (rmts_itr, rmts[0])
+ {
+ remote_idx = vec_elt (rmts[0], rmts_itr);
+ if (mi == remote_idx)
+ {
+ vec_del1 (rmts[0], rmts_itr);
+ break;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ /* remove mapping from dictionary */
+ gid_dictionary_add_del (&lcm->mapping_index_by_gid, &a->eid, 0, 0);
+ gid_address_free (&m->eid);
+ pool_put_index (lcm->mapping_pool, mi);
+ }
+
+ return 0;
+}
+
+/**
+ * Add/update/delete mapping to/in/from map-cache.
+ */
+int
+vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a,
+ u32 * map_index_result)
+{
+ uword *dp_table = 0;
+ u32 vni;
+ u8 type;
+
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ vni = gid_address_vni (&a->eid);
+ type = gid_address_type (&a->eid);
+ if (GID_ADDR_IP_PREFIX == type)
+ dp_table = hash_get (lcm->table_id_by_vni, vni);
+ else if (GID_ADDR_MAC == type)
+ dp_table = hash_get (lcm->bd_id_by_vni, vni);
+
+ if (!dp_table && GID_ADDR_NSH != type)
+ {
+ clib_warning ("vni %d not associated to a %s!", vni,
+ GID_ADDR_IP_PREFIX == type ? "vrf" : "bd");
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* store/remove mapping from map-cache */
+ return vnet_lisp_map_cache_add_del (a, map_index_result);
+}
+
+static void
+add_l2_arp_bd (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ u32 **ht = arg;
+ u32 version = (u32) kvp->key[0];
+ if (IP6 == version)
+ return;
+
+ u32 bd = (u32) (kvp->key[0] >> 32);
+ hash_set (ht[0], bd, 0);
+}
+
+u32 *
+vnet_lisp_l2_arp_bds_get (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 *bds = 0;
+
+ gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid,
+ add_l2_arp_bd, &bds);
+ return bds;
+}
+
+static void
+add_ndp_bd (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ u32 **ht = arg;
+ u32 version = (u32) kvp->key[0];
+ if (IP4 == version)
+ return;
+
+ u32 bd = (u32) (kvp->key[0] >> 32);
+ hash_set (ht[0], bd, 0);
+}
+
+u32 *
+vnet_lisp_ndp_bds_get (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 *bds = 0;
+
+ gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid,
+ add_ndp_bd, &bds);
+ return bds;
+}
+
+typedef struct
+{
+ void *vector;
+ u32 bd;
+} lisp_add_l2_arp_ndp_args_t;
+
+static void
+add_l2_arp_entry (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ lisp_add_l2_arp_ndp_args_t *a = arg;
+ lisp_api_l2_arp_entry_t **vector = a->vector, e;
+
+ u32 version = (u32) kvp->key[0];
+ if (IP6 == version)
+ return;
+
+ u32 bd = (u32) (kvp->key[0] >> 32);
+
+ if (bd == a->bd)
+ {
+ mac_copy (e.mac, (void *) &kvp->value);
+ e.ip4 = (u32) kvp->key[1];
+ vec_add1 (vector[0], e);
+ }
+}
+
+lisp_api_l2_arp_entry_t *
+vnet_lisp_l2_arp_entries_get_by_bd (u32 bd)
+{
+ lisp_api_l2_arp_entry_t *entries = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_add_l2_arp_ndp_args_t a;
+
+ a.vector = &entries;
+ a.bd = bd;
+
+ gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid,
+ add_l2_arp_entry, &a);
+ return entries;
+}
+
+static void
+add_ndp_entry (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ lisp_add_l2_arp_ndp_args_t *a = arg;
+ lisp_api_ndp_entry_t **vector = a->vector, e;
+
+ u32 version = (u32) kvp->key[0];
+ if (IP4 == version)
+ return;
+
+ u32 bd = (u32) (kvp->key[0] >> 32);
+
+ if (bd == a->bd)
+ {
+ mac_copy (e.mac, (void *) &kvp->value);
+ clib_memcpy (e.ip6, &kvp->key[1], 16);
+ vec_add1 (vector[0], e);
+ }
+}
+
+lisp_api_ndp_entry_t *
+vnet_lisp_ndp_entries_get_by_bd (u32 bd)
+{
+ lisp_api_ndp_entry_t *entries = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_add_l2_arp_ndp_args_t a;
+
+ a.vector = &entries;
+ a.bd = bd;
+
+ gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid,
+ add_ndp_entry, &a);
+ return entries;
+}
+
+int
+vnet_lisp_add_del_l2_arp_ndp_entry (gid_address_t * key, u8 * mac, u8 is_add)
+{
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ int rc = 0;
+
+ u64 res = gid_dictionary_lookup (&lcm->mapping_index_by_gid, key);
+ if (is_add)
+ {
+ if (res != GID_LOOKUP_MISS_L2)
+ {
+ clib_warning ("Entry %U exists in DB!", format_gid_address, key);
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ }
+ u64 val = mac_to_u64 (mac);
+ gid_dictionary_add_del (&lcm->mapping_index_by_gid, key, val,
+ 1 /* is_add */ );
+ }
+ else
+ {
+ if (res == GID_LOOKUP_MISS_L2)
+ {
+ clib_warning ("ONE entry %U not found - cannot delete!",
+ format_gid_address, key);
+ return -1;
+ }
+ gid_dictionary_add_del (&lcm->mapping_index_by_gid, key, 0,
+ 0 /* is_add */ );
+ }
+
+ return rc;
+}
+
+int
+vnet_lisp_eid_table_map (u32 vni, u32 dp_id, u8 is_l2, u8 is_add)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ uword *dp_idp, *vnip, **dp_table_by_vni, **vni_by_dp_table;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ dp_table_by_vni = is_l2 ? &lcm->bd_id_by_vni : &lcm->table_id_by_vni;
+ vni_by_dp_table = is_l2 ? &lcm->vni_by_bd_id : &lcm->vni_by_table_id;
+
+ if (!is_l2 && (vni == 0 || dp_id == 0))
+ {
+ clib_warning ("can't add/del default vni-vrf mapping!");
+ return -1;
+ }
+
+ dp_idp = hash_get (dp_table_by_vni[0], vni);
+ vnip = hash_get (vni_by_dp_table[0], dp_id);
+
+ if (is_add)
+ {
+ if (dp_idp || vnip)
+ {
+ clib_warning ("vni %d or vrf %d already used in vrf/vni "
+ "mapping!", vni, dp_id);
+ return -1;
+ }
+ hash_set (dp_table_by_vni[0], vni, dp_id);
+ hash_set (vni_by_dp_table[0], dp_id, vni);
+
+ /* create dp iface */
+ dp_add_del_iface (lcm, vni, is_l2, 1);
+ }
+ else
+ {
+ if (!dp_idp || !vnip)
+ {
+ clib_warning ("vni %d or vrf %d not used in any vrf/vni! "
+ "mapping!", vni, dp_id);
+ return -1;
+ }
+ /* remove dp iface */
+ dp_add_del_iface (lcm, vni, is_l2, 0);
+
+ hash_unset (dp_table_by_vni[0], vni);
+ hash_unset (vni_by_dp_table[0], dp_id);
+ }
+ return 0;
+
+}
+
+/* return 0 if the two locator sets are identical 1 otherwise */
+static u8
+compare_locators (lisp_cp_main_t * lcm, u32 * old_ls_indexes,
+ locator_t * new_locators)
+{
+ u32 i, old_li;
+ locator_t *old_loc, *new_loc;
+
+ if (vec_len (old_ls_indexes) != vec_len (new_locators))
+ return 1;
+
+ for (i = 0; i < vec_len (new_locators); i++)
+ {
+ old_li = vec_elt (old_ls_indexes, i);
+ old_loc = pool_elt_at_index (lcm->locator_pool, old_li);
+
+ new_loc = vec_elt_at_index (new_locators, i);
+
+ if (locator_cmp (old_loc, new_loc))
+ return 1;
+ }
+ return 0;
+}
+
+typedef struct
+{
+ u8 is_negative;
+ void *lcm;
+ gid_address_t *eids_to_be_deleted;
+} remove_mapping_args_t;
+
+/**
+ * Callback invoked when a sub-prefix is found
+ */
+static void
+remove_mapping_if_needed (u32 mi, void *arg)
+{
+ u8 delete = 0;
+ remove_mapping_args_t *a = arg;
+ lisp_cp_main_t *lcm = a->lcm;
+ mapping_t *m;
+ locator_set_t *ls;
+
+ m = pool_elt_at_index (lcm->mapping_pool, mi);
+ if (!m)
+ return;
+
+ ls = pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+
+ if (a->is_negative)
+ {
+ if (0 != vec_len (ls->locator_indices))
+ delete = 1;
+ }
+ else
+ {
+ if (0 == vec_len (ls->locator_indices))
+ delete = 1;
+ }
+
+ if (delete)
+ vec_add1 (a->eids_to_be_deleted, m->eid);
+}
+
+/**
+ * This function searches map cache and looks for IP prefixes that are subset
+ * of the provided one. If such prefix is found depending on 'is_negative'
+ * it does follows:
+ *
+ * 1) if is_negative is true and found prefix points to positive mapping,
+ * then the mapping is removed
+ * 2) if is_negative is false and found prefix points to negative mapping,
+ * then the mapping is removed
+ */
+static void
+remove_overlapping_sub_prefixes (lisp_cp_main_t * lcm, gid_address_t * eid,
+ u8 is_negative)
+{
+ gid_address_t *e;
+ remove_mapping_args_t a;
+
+ memset (&a, 0, sizeof (a));
+
+ /* do this only in src/dst mode ... */
+ if (MR_MODE_SRC_DST != lcm->map_request_mode)
+ return;
+
+ /* ... and only for IP prefix */
+ if (GID_ADDR_SRC_DST != gid_address_type (eid)
+ || (FID_ADDR_IP_PREF != gid_address_sd_dst_type (eid)))
+ return;
+
+ a.is_negative = is_negative;
+ a.lcm = lcm;
+
+ gid_dict_foreach_subprefix (&lcm->mapping_index_by_gid, eid,
+ remove_mapping_if_needed, &a);
+
+ vec_foreach (e, a.eids_to_be_deleted)
+ {
+ vnet_lisp_add_del_adjacency_args_t _adj_args, *adj_args = &_adj_args;
+
+ memset (adj_args, 0, sizeof (adj_args[0]));
+ gid_address_copy (&adj_args->reid, e);
+ adj_args->is_add = 0;
+ if (vnet_lisp_add_del_adjacency (adj_args))
+ clib_warning ("failed to del adjacency!");
+
+ vnet_lisp_del_mapping (e, NULL);
+ }
+
+ vec_free (a.eids_to_be_deleted);
+}
+
+static void
+mapping_delete_timer (lisp_cp_main_t * lcm, u32 mi)
+{
+ timing_wheel_delete (&lcm->wheel, mi);
+}
+
+static int
+is_local_ip (lisp_cp_main_t * lcm, ip_address_t * addr)
+{
+ fib_node_index_t fei;
+ fib_prefix_t prefix;
+ fib_entry_flag_t flags;
+
+ ip_address_to_fib_prefix (addr, &prefix);
+
+ fei = fib_table_lookup (0, &prefix);
+ flags = fib_entry_get_flags (fei);
+ return (FIB_ENTRY_FLAG_LOCAL & flags);
+}
+
+/**
+ * Adds/updates mapping. Does not program forwarding.
+ *
+ * @param a parameters of the new mapping
+ * @param rlocs vector of remote locators
+ * @param res_map_index index of the newly created mapping
+ * @param locators_changed indicator if locators were updated in the mapping
+ * @return return code
+ */
+int
+vnet_lisp_add_mapping (vnet_lisp_add_del_mapping_args_t * a,
+ locator_t * rlocs,
+ u32 * res_map_index, u8 * is_updated)
+{
+ vnet_lisp_add_del_locator_set_args_t _ls_args, *ls_args = &_ls_args;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 mi, ls_index = 0, dst_map_index;
+ mapping_t *old_map;
+ locator_t *loc;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (res_map_index)
+ res_map_index[0] = ~0;
+ if (is_updated)
+ is_updated[0] = 0;
+
+ memset (ls_args, 0, sizeof (ls_args[0]));
+
+ ls_args->locators = rlocs;
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &a->eid);
+ old_map = ((u32) ~ 0 != mi) ? pool_elt_at_index (lcm->mapping_pool, mi) : 0;
+
+ /* check if none of the locators match localy configured address */
+ vec_foreach (loc, rlocs)
+ {
+ ip_prefix_t *p = &gid_address_ippref (&loc->address);
+ if (is_local_ip (lcm, &ip_prefix_addr (p)))
+ {
+ clib_warning ("RLOC %U matches a local address!",
+ format_gid_address, &loc->address);
+ return VNET_API_ERROR_LISP_RLOC_LOCAL;
+ }
+ }
+
+ /* overwrite: if mapping already exists, decide if locators should be
+ * updated and be done */
+ if (old_map && gid_address_cmp (&old_map->eid, &a->eid) == 0)
+ {
+ if (!a->is_static && (old_map->is_static || old_map->local))
+ {
+ /* do not overwrite local or static remote mappings */
+ clib_warning ("mapping %U rejected due to collision with local "
+ "or static remote mapping!", format_gid_address,
+ &a->eid);
+ return 0;
+ }
+
+ locator_set_t *old_ls;
+
+ /* update mapping attributes */
+ old_map->action = a->action;
+ if (old_map->action != a->action && NULL != is_updated)
+ is_updated[0] = 1;
+
+ old_map->authoritative = a->authoritative;
+ old_map->ttl = a->ttl;
+
+ old_ls = pool_elt_at_index (lcm->locator_set_pool,
+ old_map->locator_set_index);
+ if (compare_locators (lcm, old_ls->locator_indices, ls_args->locators))
+ {
+ /* set locator-set index to overwrite */
+ ls_args->is_add = 1;
+ ls_args->index = old_map->locator_set_index;
+ vnet_lisp_add_del_locator_set (ls_args, 0);
+ if (is_updated)
+ is_updated[0] = 1;
+ }
+ if (res_map_index)
+ res_map_index[0] = mi;
+ }
+ /* new mapping */
+ else
+ {
+ if (is_updated)
+ is_updated[0] = 1;
+ remove_overlapping_sub_prefixes (lcm, &a->eid, 0 == ls_args->locators);
+
+ ls_args->is_add = 1;
+ ls_args->index = ~0;
+
+ vnet_lisp_add_del_locator_set (ls_args, &ls_index);
+
+ /* add mapping */
+ a->is_add = 1;
+ a->locator_set_index = ls_index;
+ vnet_lisp_map_cache_add_del (a, &dst_map_index);
+
+ if (res_map_index)
+ res_map_index[0] = dst_map_index;
+ }
+
+ /* success */
+ return 0;
+}
+
+/**
+ * Removes a mapping. Does not program forwarding.
+ *
+ * @param eid end-host indetifier
+ * @param res_map_index index of the removed mapping
+ * @return return code
+ */
+int
+vnet_lisp_del_mapping (gid_address_t * eid, u32 * res_map_index)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ vnet_lisp_add_del_mapping_args_t _m_args, *m_args = &_m_args;
+ vnet_lisp_add_del_locator_set_args_t _ls_args, *ls_args = &_ls_args;
+ mapping_t *old_map;
+ u32 mi;
+
+ memset (ls_args, 0, sizeof (ls_args[0]));
+ memset (m_args, 0, sizeof (m_args[0]));
+ if (res_map_index)
+ res_map_index[0] = ~0;
+
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, eid);
+ old_map = ((u32) ~ 0 != mi) ? pool_elt_at_index (lcm->mapping_pool, mi) : 0;
+
+ if (old_map == 0 || gid_address_cmp (&old_map->eid, eid) != 0)
+ {
+ clib_warning ("cannot delete mapping for eid %U",
+ format_gid_address, eid);
+ return -1;
+ }
+
+ m_args->is_add = 0;
+ gid_address_copy (&m_args->eid, eid);
+ m_args->locator_set_index = old_map->locator_set_index;
+
+ /* delete mapping associated from map-cache */
+ vnet_lisp_map_cache_add_del (m_args, 0);
+
+ ls_args->is_add = 0;
+ ls_args->index = old_map->locator_set_index;
+
+ /* delete locator set */
+ vnet_lisp_add_del_locator_set (ls_args, 0);
+
+ /* delete timer associated to the mapping if any */
+ if (old_map->timer_set)
+ mapping_delete_timer (lcm, mi);
+
+ /* return old mapping index */
+ if (res_map_index)
+ res_map_index[0] = mi;
+
+ /* success */
+ return 0;
+}
+
+int
+vnet_lisp_clear_all_remote_adjacencies (void)
+{
+ int rv = 0;
+ u32 mi, *map_indices = 0, *map_indexp;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ vnet_lisp_add_del_mapping_args_t _dm_args, *dm_args = &_dm_args;
+ vnet_lisp_add_del_locator_set_args_t _ls, *ls = &_ls;
+
+ /* *INDENT-OFF* */
+ pool_foreach_index (mi, lcm->mapping_pool,
+ ({
+ vec_add1 (map_indices, mi);
+ }));
+ /* *INDENT-ON* */
+
+ vec_foreach (map_indexp, map_indices)
+ {
+ mapping_t *map = pool_elt_at_index (lcm->mapping_pool, map_indexp[0]);
+ if (!map->local)
+ {
+ dp_del_fwd_entry (lcm, map_indexp[0]);
+
+ dm_args->is_add = 0;
+ gid_address_copy (&dm_args->eid, &map->eid);
+ dm_args->locator_set_index = map->locator_set_index;
+
+ /* delete mapping associated to fwd entry */
+ vnet_lisp_map_cache_add_del (dm_args, 0);
+
+ ls->is_add = 0;
+ ls->local = 0;
+ ls->index = map->locator_set_index;
+ /* delete locator set */
+ rv = vnet_lisp_add_del_locator_set (ls, 0);
+ if (rv != 0)
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ if (map_indices)
+ vec_free (map_indices);
+ return rv;
+}
+
+/**
+ * Adds adjacency or removes forwarding entry associated to remote mapping.
+ * Note that adjacencies are not stored, they only result in forwarding entries
+ * being created.
+ */
+int
+vnet_lisp_add_del_adjacency (vnet_lisp_add_del_adjacency_args_t * a)
+{
+ lisp_cp_main_t *lcm = &lisp_control_main;
+ u32 local_mi, remote_mi = ~0;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ remote_mi = gid_dictionary_sd_lookup (&lcm->mapping_index_by_gid,
+ &a->reid, &a->leid);
+ if (GID_LOOKUP_MISS == remote_mi)
+ {
+ clib_warning ("Remote eid %U not found. Cannot add adjacency!",
+ format_gid_address, &a->reid);
+
+ return -1;
+ }
+
+ if (a->is_add)
+ {
+ /* check if source eid has an associated mapping. If pitr mode is on,
+ * just use the pitr's mapping */
+ if (lcm->lisp_pitr)
+ local_mi = lcm->pitr_map_index;
+ else
+ {
+ if (gid_address_type (&a->reid) == GID_ADDR_NSH)
+ {
+ if (lcm->nsh_map_index == ~0)
+ local_mi = GID_LOOKUP_MISS;
+ else
+ local_mi = lcm->nsh_map_index;
+ }
+ else
+ {
+ local_mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid,
+ &a->leid);
+ }
+ }
+
+ if (GID_LOOKUP_MISS == local_mi)
+ {
+ clib_warning ("Local eid %U not found. Cannot add adjacency!",
+ format_gid_address, &a->leid);
+
+ return -1;
+ }
+
+ /* update forwarding */
+ dp_add_fwd_entry (lcm, local_mi, remote_mi);
+ }
+ else
+ dp_del_fwd_entry (lcm, remote_mi);
+
+ return 0;
+}
+
+int
+vnet_lisp_set_map_request_mode (u8 mode)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (mode >= _MR_MODE_MAX)
+ {
+ clib_warning ("Invalid LISP map request mode %d!", mode);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ lcm->map_request_mode = mode;
+ return 0;
+}
+
+int
+vnet_lisp_nsh_set_locator_set (u8 * locator_set_name, u8 is_add)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ u32 locator_set_index = ~0;
+ mapping_t *m;
+ uword *p;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (is_add)
+ {
+ if (lcm->nsh_map_index == (u32) ~ 0)
+ {
+ p = hash_get_mem (lcm->locator_set_index_by_name, locator_set_name);
+ if (!p)
+ {
+ clib_warning ("locator-set %v doesn't exist", locator_set_name);
+ return -1;
+ }
+ locator_set_index = p[0];
+
+ pool_get (lcm->mapping_pool, m);
+ memset (m, 0, sizeof *m);
+ m->locator_set_index = locator_set_index;
+ m->local = 1;
+ m->nsh_set = 1;
+ lcm->nsh_map_index = m - lcm->mapping_pool;
+
+ if (~0 == vnet_lisp_gpe_add_nsh_iface (lgm))
+ return -1;
+ }
+ }
+ else
+ {
+ if (lcm->nsh_map_index != (u32) ~ 0)
+ {
+ /* remove NSH mapping */
+ pool_put_index (lcm->mapping_pool, lcm->nsh_map_index);
+ lcm->nsh_map_index = ~0;
+ vnet_lisp_gpe_del_nsh_iface (lgm);
+ }
+ }
+ return 0;
+}
+
+int
+vnet_lisp_pitr_set_locator_set (u8 * locator_set_name, u8 is_add)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 locator_set_index = ~0;
+ mapping_t *m;
+ uword *p;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ p = hash_get_mem (lcm->locator_set_index_by_name, locator_set_name);
+ if (!p)
+ {
+ clib_warning ("locator-set %v doesn't exist", locator_set_name);
+ return -1;
+ }
+ locator_set_index = p[0];
+
+ if (is_add)
+ {
+ pool_get (lcm->mapping_pool, m);
+ m->locator_set_index = locator_set_index;
+ m->local = 1;
+ m->pitr_set = 1;
+ lcm->pitr_map_index = m - lcm->mapping_pool;
+
+ /* enable pitr mode */
+ lcm->lisp_pitr = 1;
+ }
+ else
+ {
+ /* remove pitr mapping */
+ pool_put_index (lcm->mapping_pool, lcm->pitr_map_index);
+
+ /* disable pitr mode */
+ lcm->lisp_pitr = 0;
+ }
+ return 0;
+}
+
+int
+vnet_lisp_map_register_fallback_threshold_set (u32 value)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ if (0 == value)
+ {
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ lcm->max_expired_map_registers = value;
+ return 0;
+}
+
+u32
+vnet_lisp_map_register_fallback_threshold_get (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->max_expired_map_registers;
+}
+
+/**
+ * Configure Proxy-ETR
+ *
+ * @param ip PETR's IP address
+ * @param is_add Flag that indicates if this is an addition or removal
+ *
+ * return 0 on success
+ */
+int
+vnet_lisp_use_petr (ip_address_t * ip, u8 is_add)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 ls_index = ~0;
+ mapping_t *m;
+ vnet_lisp_add_del_locator_set_args_t _ls_args, *ls_args = &_ls_args;
+ locator_t loc;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ memset (ls_args, 0, sizeof (*ls_args));
+
+ if (is_add)
+ {
+ /* Create dummy petr locator-set */
+ memset (&loc, 0, sizeof (loc));
+ gid_address_from_ip (&loc.address, ip);
+ loc.priority = 1;
+ loc.state = loc.weight = 1;
+ loc.local = 0;
+
+ ls_args->is_add = 1;
+ ls_args->index = ~0;
+ vec_add1 (ls_args->locators, loc);
+ vnet_lisp_add_del_locator_set (ls_args, &ls_index);
+
+ /* Add petr mapping */
+ pool_get (lcm->mapping_pool, m);
+ m->locator_set_index = ls_index;
+ lcm->petr_map_index = m - lcm->mapping_pool;
+
+ /* Enable use-petr */
+ lcm->flags |= LISP_FLAG_USE_PETR;
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+
+ /* Remove petr locator */
+ ls_args->is_add = 0;
+ ls_args->index = m->locator_set_index;
+ vnet_lisp_add_del_locator_set (ls_args, 0);
+
+ /* Remove petr mapping */
+ pool_put_index (lcm->mapping_pool, lcm->petr_map_index);
+
+ /* Disable use-petr */
+ lcm->flags &= ~LISP_FLAG_USE_PETR;
+ }
+ return 0;
+}
+
+/* cleans locator to locator-set data and removes locators not part of
+ * any locator-set */
+static void
+clean_locator_to_locator_set (lisp_cp_main_t * lcm, u32 lsi)
+{
+ u32 i, j, *loc_indexp, *ls_indexp, **ls_indexes, *to_be_deleted = 0;
+ locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool, lsi);
+ for (i = 0; i < vec_len (ls->locator_indices); i++)
+ {
+ loc_indexp = vec_elt_at_index (ls->locator_indices, i);
+ ls_indexes = vec_elt_at_index (lcm->locator_to_locator_sets,
+ loc_indexp[0]);
+ for (j = 0; j < vec_len (ls_indexes[0]); j++)
+ {
+ ls_indexp = vec_elt_at_index (ls_indexes[0], j);
+ if (ls_indexp[0] == lsi)
+ break;
+ }
+
+ /* delete index for removed locator-set */
+ vec_del1 (ls_indexes[0], j);
+
+ /* delete locator if it's part of no locator-set */
+ if (vec_len (ls_indexes[0]) == 0)
+ {
+ pool_put_index (lcm->locator_pool, loc_indexp[0]);
+ vec_add1 (to_be_deleted, i);
+ }
+ }
+
+ if (to_be_deleted)
+ {
+ for (i = 0; i < vec_len (to_be_deleted); i++)
+ {
+ loc_indexp = vec_elt_at_index (to_be_deleted, i);
+ vec_del1 (ls->locator_indices, loc_indexp[0]);
+ }
+ vec_free (to_be_deleted);
+ }
+}
+
+static inline uword *
+get_locator_set_index (vnet_lisp_add_del_locator_set_args_t * a, uword * p)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ ASSERT (a != NULL);
+ ASSERT (p != NULL);
+
+ /* find locator-set */
+ if (a->local)
+ {
+ p = hash_get_mem (lcm->locator_set_index_by_name, a->name);
+ }
+ else
+ {
+ *p = a->index;
+ }
+
+ return p;
+}
+
+static inline int
+is_locator_in_locator_set (lisp_cp_main_t * lcm, locator_set_t * ls,
+ locator_t * loc)
+{
+ locator_t *itloc;
+ u32 *locit;
+
+ ASSERT (ls != NULL);
+ ASSERT (loc != NULL);
+
+ vec_foreach (locit, ls->locator_indices)
+ {
+ itloc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+ if ((ls->local && itloc->sw_if_index == loc->sw_if_index) ||
+ (!ls->local && !gid_address_cmp (&itloc->address, &loc->address)))
+ {
+ clib_warning ("Duplicate locator");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ }
+
+ return 0;
+}
+
+static void
+update_adjacencies_by_map_index (lisp_cp_main_t * lcm, u8 is_local,
+ u32 mapping_index, u8 remove_only)
+{
+ fwd_entry_t *fwd;
+ mapping_t *map;
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+
+ map = pool_elt_at_index (lcm->mapping_pool, mapping_index);
+
+ /* *INDENT-OFF* */
+ pool_foreach(fwd, lcm->fwd_entry_pool,
+ ({
+ if ((is_local && 0 == gid_address_cmp (&map->eid, &fwd->leid)) ||
+ (!is_local && 0 == gid_address_cmp (&map->eid, &fwd->reid)))
+ {
+ a->is_add = 0;
+ gid_address_copy (&a->leid, &fwd->leid);
+ gid_address_copy (&a->reid, &fwd->reid);
+
+ vnet_lisp_add_del_adjacency (a);
+
+ if (!remove_only)
+ {
+ a->is_add = 1;
+ vnet_lisp_add_del_adjacency (a);
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+update_fwd_entries_by_locator_set (lisp_cp_main_t * lcm, u8 is_local,
+ u32 ls_index, u8 remove_only)
+{
+ u32 i, *map_indexp;
+ u32 **eid_indexes;
+
+ if (vec_len (lcm->locator_set_to_eids) <= ls_index)
+ return;
+
+ eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids, ls_index);
+
+ for (i = 0; i < vec_len (eid_indexes[0]); i++)
+ {
+ map_indexp = vec_elt_at_index (eid_indexes[0], i);
+ update_adjacencies_by_map_index (lcm, is_local, map_indexp[0],
+ remove_only);
+ }
+}
+
+static inline void
+remove_locator_from_locator_set (locator_set_t * ls, u32 * locit,
+ u32 ls_index, u32 loc_id)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 **ls_indexes = NULL;
+
+ ASSERT (ls != NULL);
+ ASSERT (locit != NULL);
+
+ ls_indexes = vec_elt_at_index (lcm->locator_to_locator_sets, locit[0]);
+ pool_put_index (lcm->locator_pool, locit[0]);
+ vec_del1 (ls->locator_indices, loc_id);
+ vec_del1 (ls_indexes[0], ls_index);
+}
+
+int
+vnet_lisp_add_del_locator (vnet_lisp_add_del_locator_set_args_t * a,
+ locator_set_t * ls, u32 * ls_result)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_t *loc = NULL, *itloc = NULL;
+ uword _p = (u32) ~ 0, *p = &_p;
+ u32 loc_index = ~0, ls_index = ~0, *locit = NULL, **ls_indexes = NULL;
+ u32 loc_id = ~0;
+ int ret = 0;
+
+ ASSERT (a != NULL);
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ p = get_locator_set_index (a, p);
+ if (!p)
+ {
+ clib_warning ("locator-set %v doesn't exist", a->name);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ if (ls == 0)
+ {
+ ls = pool_elt_at_index (lcm->locator_set_pool, p[0]);
+ if (!ls)
+ {
+ clib_warning ("locator-set %d to be overwritten doesn't exist!",
+ p[0]);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ }
+
+ if (a->is_add)
+ {
+ if (ls_result)
+ ls_result[0] = p[0];
+
+ /* allocate locators */
+ vec_foreach (itloc, a->locators)
+ {
+ ret = is_locator_in_locator_set (lcm, ls, itloc);
+ if (0 != ret)
+ {
+ return ret;
+ }
+
+ pool_get (lcm->locator_pool, loc);
+ loc[0] = itloc[0];
+ loc_index = loc - lcm->locator_pool;
+
+ vec_add1 (ls->locator_indices, loc_index);
+
+ vec_validate (lcm->locator_to_locator_sets, loc_index);
+ ls_indexes = vec_elt_at_index (lcm->locator_to_locator_sets,
+ loc_index);
+ vec_add1 (ls_indexes[0], p[0]);
+ }
+ }
+ else
+ {
+ ls_index = p[0];
+ u8 removed;
+
+ vec_foreach (itloc, a->locators)
+ {
+ removed = 0;
+ loc_id = 0;
+ vec_foreach (locit, ls->locator_indices)
+ {
+ loc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+
+ if (loc->local && loc->sw_if_index == itloc->sw_if_index)
+ {
+ removed = 1;
+ remove_locator_from_locator_set (ls, locit, ls_index, loc_id);
+ }
+ if (0 == loc->local &&
+ !gid_address_cmp (&loc->address, &itloc->address))
+ {
+ removed = 1;
+ remove_locator_from_locator_set (ls, locit, ls_index, loc_id);
+ }
+
+ if (removed)
+ {
+ /* update fwd entries using this locator in DP */
+ update_fwd_entries_by_locator_set (lcm, loc->local, ls_index,
+ vec_len (ls->locator_indices)
+ == 0);
+ }
+
+ loc_id++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
+vnet_lisp_add_del_locator_set (vnet_lisp_add_del_locator_set_args_t * a,
+ u32 * ls_result)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *ls;
+ uword _p = (u32) ~ 0, *p = &_p;
+ u32 ls_index;
+ u32 **eid_indexes;
+ int ret = 0;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (a->is_add)
+ {
+ p = get_locator_set_index (a, p);
+
+ /* overwrite */
+ if (p && p[0] != (u32) ~ 0)
+ {
+ ls = pool_elt_at_index (lcm->locator_set_pool, p[0]);
+ if (!ls)
+ {
+ clib_warning ("locator-set %d to be overwritten doesn't exist!",
+ p[0]);
+ return -1;
+ }
+
+ /* clean locator to locator-set vectors and remove locators if
+ * they're not part of another locator-set */
+ clean_locator_to_locator_set (lcm, p[0]);
+
+ /* remove locator indices from locator set */
+ vec_free (ls->locator_indices);
+
+ ls_index = p[0];
+
+ if (ls_result)
+ ls_result[0] = p[0];
+ }
+ /* new locator-set */
+ else
+ {
+ pool_get (lcm->locator_set_pool, ls);
+ memset (ls, 0, sizeof (*ls));
+ ls_index = ls - lcm->locator_set_pool;
+
+ if (a->local)
+ {
+ ls->name = vec_dup (a->name);
+
+ if (!lcm->locator_set_index_by_name)
+ lcm->locator_set_index_by_name = hash_create_vec (
+ /* size */
+ 0,
+ sizeof
+ (ls->name
+ [0]),
+ sizeof
+ (uword));
+ hash_set_mem (lcm->locator_set_index_by_name, ls->name,
+ ls_index);
+
+ /* mark as local locator-set */
+ vec_add1 (lcm->local_locator_set_indexes, ls_index);
+ }
+ ls->local = a->local;
+ if (ls_result)
+ ls_result[0] = ls_index;
+ }
+
+ ret = vnet_lisp_add_del_locator (a, ls, NULL);
+ if (0 != ret)
+ {
+ return ret;
+ }
+ }
+ else
+ {
+ p = get_locator_set_index (a, p);
+ if (!p)
+ {
+ clib_warning ("locator-set %v doesn't exists", a->name);
+ return -1;
+ }
+
+ ls = pool_elt_at_index (lcm->locator_set_pool, p[0]);
+ if (!ls)
+ {
+ clib_warning ("locator-set with index %d doesn't exists", p[0]);
+ return -1;
+ }
+
+ if (lcm->mreq_itr_rlocs == p[0])
+ {
+ clib_warning ("Can't delete the locator-set used to constrain "
+ "the itr-rlocs in map-requests!");
+ return -1;
+ }
+
+ if (vec_len (lcm->locator_set_to_eids) != 0)
+ {
+ eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids, p[0]);
+ if (vec_len (eid_indexes[0]) != 0)
+ {
+ clib_warning
+ ("Can't delete a locator that supports a mapping!");
+ return -1;
+ }
+ }
+
+ /* clean locator to locator-sets data */
+ clean_locator_to_locator_set (lcm, p[0]);
+
+ if (ls->local)
+ {
+ u32 it, lsi;
+
+ vec_foreach_index (it, lcm->local_locator_set_indexes)
+ {
+ lsi = vec_elt (lcm->local_locator_set_indexes, it);
+ if (lsi == p[0])
+ {
+ vec_del1 (lcm->local_locator_set_indexes, it);
+ break;
+ }
+ }
+ hash_unset_mem (lcm->locator_set_index_by_name, ls->name);
+ }
+ vec_free (ls->name);
+ vec_free (ls->locator_indices);
+ pool_put (lcm->locator_set_pool, ls);
+ }
+ return 0;
+}
+
+int
+vnet_lisp_rloc_probe_enable_disable (u8 is_enable)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ lcm->rloc_probing = is_enable;
+ return 0;
+}
+
+int
+vnet_lisp_map_register_enable_disable (u8 is_enable)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ lcm->map_registering = is_enable;
+ return 0;
+}
+
+clib_error_t *
+vnet_lisp_enable_disable (u8 is_enable)
+{
+ u32 vni, dp_table, **rmts;
+ clib_error_t *error = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ vnet_lisp_gpe_enable_disable_args_t _a, *a = &_a;
+
+ a->is_en = is_enable;
+ error = vnet_lisp_gpe_enable_disable (a);
+ if (error)
+ {
+ return clib_error_return (0, "failed to %s data-plane!",
+ a->is_en ? "enable" : "disable");
+ }
+
+ if (is_enable)
+ {
+ /* enable all l2 and l3 ifaces */
+
+ /* *INDENT-OFF* */
+ hash_foreach(vni, dp_table, lcm->table_id_by_vni, ({
+ dp_add_del_iface(lcm, vni, 0, 1);
+ }));
+ hash_foreach(vni, dp_table, lcm->bd_id_by_vni, ({
+ dp_add_del_iface(lcm, vni, /* is_l2 */ 1, 1);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* clear interface table */
+ hash_free (lcm->fwd_entry_by_mapping_index);
+ pool_free (lcm->fwd_entry_pool);
+ /* Clear state tracking rmt-lcl fwd entries */
+ /* *INDENT-OFF* */
+ pool_foreach(rmts, lcm->lcl_to_rmt_adjacencies,
+ {
+ vec_free(rmts[0]);
+ });
+ /* *INDENT-ON* */
+ hash_free (lcm->lcl_to_rmt_adjs_by_lcl_idx);
+ pool_free (lcm->lcl_to_rmt_adjacencies);
+ }
+
+ /* update global flag */
+ lcm->is_enabled = is_enable;
+
+ return 0;
+}
+
+u8
+vnet_lisp_enable_disable_status (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->is_enabled;
+}
+
+int
+vnet_lisp_add_del_map_resolver (vnet_lisp_add_del_map_resolver_args_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 i;
+ lisp_msmr_t _mr, *mr = &_mr;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (a->is_add)
+ {
+
+ if (get_map_resolver (&a->address))
+ {
+ clib_warning ("map-resolver %U already exists!", format_ip_address,
+ &a->address);
+ return -1;
+ }
+
+ memset (mr, 0, sizeof (*mr));
+ ip_address_copy (&mr->address, &a->address);
+ vec_add1 (lcm->map_resolvers, *mr);
+
+ if (vec_len (lcm->map_resolvers) == 1)
+ lcm->do_map_resolver_election = 1;
+ }
+ else
+ {
+ for (i = 0; i < vec_len (lcm->map_resolvers); i++)
+ {
+ mr = vec_elt_at_index (lcm->map_resolvers, i);
+ if (!ip_address_cmp (&mr->address, &a->address))
+ {
+ if (!ip_address_cmp (&mr->address, &lcm->active_map_resolver))
+ lcm->do_map_resolver_election = 1;
+
+ vec_del1 (lcm->map_resolvers, i);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+int
+vnet_lisp_map_register_set_ttl (u32 ttl)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lcm->map_register_ttl = ttl;
+ return 0;
+}
+
+u32
+vnet_lisp_map_register_get_ttl (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->map_register_ttl;
+}
+
+int
+vnet_lisp_add_del_mreq_itr_rlocs (vnet_lisp_add_del_mreq_itr_rloc_args_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ uword *p = 0;
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ if (a->is_add)
+ {
+ p = hash_get_mem (lcm->locator_set_index_by_name, a->locator_set_name);
+ if (!p)
+ {
+ clib_warning ("locator-set %v doesn't exist", a->locator_set_name);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ lcm->mreq_itr_rlocs = p[0];
+ }
+ else
+ {
+ lcm->mreq_itr_rlocs = ~0;
+ }
+
+ return 0;
+}
+
+/* Statistics (not really errors) */
+#define foreach_lisp_cp_lookup_error \
+_(DROP, "drop") \
+_(MAP_REQUESTS_SENT, "map-request sent") \
+_(ARP_REPLY_TX, "ARP replies sent") \
+_(NDP_NEIGHBOR_ADVERTISEMENT_TX, \
+ "neighbor advertisement sent")
+
+static char *lisp_cp_lookup_error_strings[] = {
+#define _(sym,string) string,
+ foreach_lisp_cp_lookup_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) LISP_CP_LOOKUP_ERROR_##sym,
+ foreach_lisp_cp_lookup_error
+#undef _
+ LISP_CP_LOOKUP_N_ERROR,
+} lisp_cp_lookup_error_t;
+
+typedef enum
+{
+ LISP_CP_LOOKUP_NEXT_DROP,
+ LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX,
+ LISP_CP_LOOKUP_N_NEXT,
+} lisp_cp_lookup_next_t;
+
+typedef struct
+{
+ gid_address_t dst_eid;
+ ip_address_t map_resolver_ip;
+} lisp_cp_lookup_trace_t;
+
+u8 *
+format_lisp_cp_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lisp_cp_lookup_trace_t *t = va_arg (*args, lisp_cp_lookup_trace_t *);
+
+ s = format (s, "LISP-CP-LOOKUP: map-resolver: %U destination eid %U",
+ format_ip_address, &t->map_resolver_ip, format_gid_address,
+ &t->dst_eid);
+ return s;
+}
+
+int
+get_mr_and_local_iface_ip (lisp_cp_main_t * lcm, ip_address_t * mr_ip,
+ ip_address_t * sloc)
+{
+ lisp_msmr_t *mrit;
+ ip_address_t *a;
+
+ if (vec_len (lcm->map_resolvers) == 0)
+ {
+ clib_warning ("No map-resolver configured");
+ return 0;
+ }
+
+ /* find the first mr ip we have a route to and the ip of the
+ * iface that has a route to it */
+ vec_foreach (mrit, lcm->map_resolvers)
+ {
+ a = &mrit->address;
+ if (0 != ip_fib_get_first_egress_ip_for_dst (lcm, a, sloc))
+ {
+ ip_address_copy (mr_ip, a);
+
+ /* also update globals */
+ return 1;
+ }
+ }
+
+ clib_warning ("Can't find map-resolver and local interface ip!");
+ return 0;
+}
+
+static gid_address_t *
+build_itr_rloc_list (lisp_cp_main_t * lcm, locator_set_t * loc_set)
+{
+ void *addr;
+ u32 i;
+ locator_t *loc;
+ u32 *loc_indexp;
+ ip_interface_address_t *ia = 0;
+ gid_address_t gid_data, *gid = &gid_data;
+ gid_address_t *rlocs = 0;
+ ip_prefix_t *ippref = &gid_address_ippref (gid);
+ ip_address_t *rloc = &ip_prefix_addr (ippref);
+
+ memset (gid, 0, sizeof (gid[0]));
+ gid_address_type (gid) = GID_ADDR_IP_PREFIX;
+ for (i = 0; i < vec_len (loc_set->locator_indices); i++)
+ {
+ loc_indexp = vec_elt_at_index (loc_set->locator_indices, i);
+ loc = pool_elt_at_index (lcm->locator_pool, loc_indexp[0]);
+
+ /* Add ipv4 locators first TODO sort them */
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
+ loc->sw_if_index, 1 /* unnumbered */,
+ ({
+ addr = ip_interface_address_get_address (&lcm->im4->lookup_main, ia);
+ ip_address_set (rloc, addr, IP4);
+ ip_prefix_len (ippref) = 32;
+ ip_prefix_normalize (ippref);
+ vec_add1 (rlocs, gid[0]);
+ }));
+
+ /* Add ipv6 locators */
+ foreach_ip_interface_address (&lcm->im6->lookup_main, ia,
+ loc->sw_if_index, 1 /* unnumbered */,
+ ({
+ addr = ip_interface_address_get_address (&lcm->im6->lookup_main, ia);
+ ip_address_set (rloc, addr, IP6);
+ ip_prefix_len (ippref) = 128;
+ ip_prefix_normalize (ippref);
+ vec_add1 (rlocs, gid[0]);
+ }));
+ /* *INDENT-ON* */
+
+ }
+ return rlocs;
+}
+
+static vlib_buffer_t *
+build_map_request (lisp_cp_main_t * lcm, gid_address_t * deid,
+ ip_address_t * sloc, ip_address_t * rloc,
+ gid_address_t * itr_rlocs, u64 * nonce_res, u32 * bi_res)
+{
+ vlib_buffer_t *b;
+ u32 bi;
+ vlib_main_t *vm = lcm->vlib_main;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("Can't allocate buffer for Map-Request!");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+
+ /* leave some space for the encap headers */
+ vlib_buffer_make_headroom (b, MAX_LISP_MSG_ENCAP_LEN);
+
+ /* put lisp msg */
+ lisp_msg_put_mreq (lcm, b, NULL, deid, itr_rlocs, 0 /* smr invoked */ ,
+ 1 /* rloc probe */ , nonce_res);
+
+ /* push outer ip header */
+ pkt_push_udp_and_ip (vm, b, LISP_CONTROL_PORT, LISP_CONTROL_PORT, sloc,
+ rloc, 1);
+
+ bi_res[0] = bi;
+
+ return b;
+}
+
+static vlib_buffer_t *
+build_encapsulated_map_request (lisp_cp_main_t * lcm,
+ gid_address_t * seid, gid_address_t * deid,
+ locator_set_t * loc_set, ip_address_t * mr_ip,
+ ip_address_t * sloc, u8 is_smr_invoked,
+ u64 * nonce_res, u32 * bi_res)
+{
+ vlib_buffer_t *b;
+ u32 bi;
+ gid_address_t *rlocs = 0;
+ vlib_main_t *vm = lcm->vlib_main;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("Can't allocate buffer for Map-Request!");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+ b->flags = 0;
+
+ /* leave some space for the encap headers */
+ vlib_buffer_make_headroom (b, MAX_LISP_MSG_ENCAP_LEN);
+
+ /* get rlocs */
+ rlocs = build_itr_rloc_list (lcm, loc_set);
+
+ if (MR_MODE_SRC_DST == lcm->map_request_mode
+ && GID_ADDR_SRC_DST != gid_address_type (deid))
+ {
+ gid_address_t sd;
+ memset (&sd, 0, sizeof (sd));
+ build_src_dst (&sd, seid, deid);
+ lisp_msg_put_mreq (lcm, b, seid, &sd, rlocs, is_smr_invoked,
+ 0 /* rloc probe */ , nonce_res);
+ }
+ else
+ {
+ /* put lisp msg */
+ lisp_msg_put_mreq (lcm, b, seid, deid, rlocs, is_smr_invoked,
+ 0 /* rloc probe */ , nonce_res);
+ }
+
+ /* push ecm: udp-ip-lisp */
+ lisp_msg_push_ecm (vm, b, LISP_CONTROL_PORT, LISP_CONTROL_PORT, seid, deid);
+
+ /* push outer ip header */
+ pkt_push_udp_and_ip (vm, b, LISP_CONTROL_PORT, LISP_CONTROL_PORT, sloc,
+ mr_ip, 1);
+
+ bi_res[0] = bi;
+
+ vec_free (rlocs);
+ return b;
+}
+
+static void
+reset_pending_mr_counters (pending_map_request_t * r)
+{
+ r->time_to_expire = PENDING_MREQ_EXPIRATION_TIME;
+ r->retries_num = 0;
+}
+
+#define foreach_msmr \
+ _(server) \
+ _(resolver)
+
+#define _(name) \
+static int \
+elect_map_ ## name (lisp_cp_main_t * lcm) \
+{ \
+ lisp_msmr_t *mr; \
+ vec_foreach (mr, lcm->map_ ## name ## s) \
+ { \
+ if (!mr->is_down) \
+ { \
+ ip_address_copy (&lcm->active_map_ ##name, &mr->address); \
+ lcm->do_map_ ## name ## _election = 0; \
+ return 1; \
+ } \
+ } \
+ return 0; \
+}
+foreach_msmr
+#undef _
+ static void
+free_map_register_records (mapping_t * maps)
+{
+ mapping_t *map;
+ vec_foreach (map, maps) vec_free (map->locators);
+
+ vec_free (maps);
+}
+
+static void
+add_locators (lisp_cp_main_t * lcm, mapping_t * m, u32 locator_set_index,
+ ip_address_t * probed_loc)
+{
+ u32 *li;
+ locator_t *loc, new;
+ ip_interface_address_t *ia = 0;
+ void *addr;
+ ip_address_t *new_ip = &gid_address_ip (&new.address);
+
+ m->locators = 0;
+ locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool,
+ locator_set_index);
+ vec_foreach (li, ls->locator_indices)
+ {
+ loc = pool_elt_at_index (lcm->locator_pool, li[0]);
+ new = loc[0];
+ if (loc->local)
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
+ loc->sw_if_index, 1 /* unnumbered */,
+ ({
+ addr = ip_interface_address_get_address (&lcm->im4->lookup_main,
+ ia);
+ ip_address_set (new_ip, addr, IP4);
+ }));
+
+ /* Add ipv6 locators */
+ foreach_ip_interface_address (&lcm->im6->lookup_main, ia,
+ loc->sw_if_index, 1 /* unnumbered */,
+ ({
+ addr = ip_interface_address_get_address (&lcm->im6->lookup_main,
+ ia);
+ ip_address_set (new_ip, addr, IP6);
+ }));
+ /* *INDENT-ON* */
+
+ if (probed_loc && ip_address_cmp (probed_loc, new_ip) == 0)
+ new.probed = 1;
+ }
+ vec_add1 (m->locators, new);
+ }
+}
+
+static mapping_t *
+build_map_register_record_list (lisp_cp_main_t * lcm)
+{
+ mapping_t *recs = 0, rec, *m;
+
+ /* *INDENT-OFF* */
+ pool_foreach(m, lcm->mapping_pool,
+ {
+ /* for now build only local mappings */
+ if (!m->local)
+ continue;
+
+ rec = m[0];
+ add_locators (lcm, &rec, m->locator_set_index, NULL);
+ vec_add1 (recs, rec);
+ });
+ /* *INDENT-ON* */
+
+ return recs;
+}
+
+static int
+update_map_register_auth_data (map_register_hdr_t * map_reg_hdr,
+ lisp_key_type_t key_id, u8 * key,
+ u16 auth_data_len, u32 msg_len)
+{
+ MREG_KEY_ID (map_reg_hdr) = clib_host_to_net_u16 (key_id);
+ MREG_AUTH_DATA_LEN (map_reg_hdr) = clib_host_to_net_u16 (auth_data_len);
+
+ unsigned char *result = HMAC (get_encrypt_fcn (key_id), key, vec_len (key),
+ (unsigned char *) map_reg_hdr, msg_len, NULL,
+ NULL);
+ clib_memcpy (MREG_DATA (map_reg_hdr), result, auth_data_len);
+
+ return 0;
+}
+
+static vlib_buffer_t *
+build_map_register (lisp_cp_main_t * lcm, ip_address_t * sloc,
+ ip_address_t * ms_ip, u64 * nonce_res, u8 want_map_notif,
+ mapping_t * records, lisp_key_type_t key_id, u8 * key,
+ u32 * bi_res)
+{
+ void *map_reg_hdr;
+ vlib_buffer_t *b;
+ u32 bi, auth_data_len = 0, msg_len = 0;
+ vlib_main_t *vm = lcm->vlib_main;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("Can't allocate buffer for Map-Register!");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+
+ /* leave some space for the encap headers */
+ vlib_buffer_make_headroom (b, MAX_LISP_MSG_ENCAP_LEN);
+
+ auth_data_len = auth_data_len_by_key_id (key_id);
+ map_reg_hdr = lisp_msg_put_map_register (b, records, want_map_notif,
+ auth_data_len, nonce_res,
+ &msg_len);
+
+ update_map_register_auth_data (map_reg_hdr, key_id, key, auth_data_len,
+ msg_len);
+
+ /* push outer ip header */
+ pkt_push_udp_and_ip (vm, b, LISP_CONTROL_PORT, LISP_CONTROL_PORT, sloc,
+ ms_ip, 1);
+
+ bi_res[0] = bi;
+ return b;
+}
+
+#define _(name) \
+static int \
+get_egress_map_ ##name## _ip (lisp_cp_main_t * lcm, ip_address_t * ip) \
+{ \
+ lisp_msmr_t *mr; \
+ while (lcm->do_map_ ## name ## _election \
+ | (0 == ip_fib_get_first_egress_ip_for_dst \
+ (lcm, &lcm->active_map_ ##name, ip))) \
+ { \
+ if (0 == elect_map_ ## name (lcm)) \
+ /* all map resolvers/servers are down */ \
+ { \
+ /* restart MR/MS checking by marking all of them up */ \
+ vec_foreach (mr, lcm->map_ ## name ## s) mr->is_down = 0; \
+ return -1; \
+ } \
+ } \
+ return 0; \
+}
+
+foreach_msmr
+#undef _
+/* CP output statistics */
+#define foreach_lisp_cp_output_error \
+_(MAP_REGISTERS_SENT, "map-registers sent") \
+_(MAP_REQUESTS_SENT, "map-requests sent") \
+_(RLOC_PROBES_SENT, "rloc-probes sent")
+static char *lisp_cp_output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_lisp_cp_output_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) LISP_CP_OUTPUT_ERROR_##sym,
+ foreach_lisp_cp_output_error
+#undef _
+ LISP_CP_OUTPUT_N_ERROR,
+} lisp_cp_output_error_t;
+
+static uword
+lisp_cp_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return 0;
+}
+
+/* dummy node used only for statistics */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_output_node) = {
+ .function = lisp_cp_output,
+ .name = "lisp-cp-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_OUTPUT_N_ERROR,
+ .error_strings = lisp_cp_output_error_strings,
+
+ .n_next_nodes = LISP_CP_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+static int
+send_rloc_probe (lisp_cp_main_t * lcm, gid_address_t * deid,
+ u32 local_locator_set_index, ip_address_t * sloc,
+ ip_address_t * rloc)
+{
+ locator_set_t *ls;
+ u32 bi;
+ vlib_buffer_t *b;
+ vlib_frame_t *f;
+ u64 nonce = 0;
+ u32 next_index, *to_next;
+ gid_address_t *itr_rlocs;
+
+ ls = pool_elt_at_index (lcm->locator_set_pool, local_locator_set_index);
+ itr_rlocs = build_itr_rloc_list (lcm, ls);
+
+ b = build_map_request (lcm, deid, sloc, rloc, itr_rlocs, &nonce, &bi);
+ vec_free (itr_rlocs);
+ if (!b)
+ return -1;
+
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+
+ next_index = (ip_addr_version (rloc) == IP4) ?
+ ip4_lookup_node.index : ip6_lookup_node.index;
+
+ f = vlib_get_frame_to_node (lcm->vlib_main, next_index);
+
+ /* Enqueue the packet */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
+
+ return 0;
+}
+
+static int
+send_rloc_probes (lisp_cp_main_t * lcm)
+{
+ u8 lprio = 0;
+ mapping_t *lm;
+ fwd_entry_t *e;
+ locator_pair_t *lp;
+ u32 si, rloc_probes_sent = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, lcm->fwd_entry_pool,
+ {
+ if (vec_len (e->locator_pairs) == 0)
+ continue;
+
+ si = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &e->leid);
+ if (~0 == si)
+ {
+ clib_warning ("internal error: cannot find local eid %U in "
+ "map-cache!", format_gid_address, &e->leid);
+ continue;
+ }
+ lm = pool_elt_at_index (lcm->mapping_pool, si);
+
+ /* get the best (lowest) priority */
+ lprio = e->locator_pairs[0].priority;
+
+ /* send rloc-probe for pair(s) with the best remote locator priority */
+ vec_foreach (lp, e->locator_pairs)
+ {
+ if (lp->priority != lprio)
+ break;
+
+ /* get first remote locator */
+ send_rloc_probe (lcm, &e->reid, lm->locator_set_index, &lp->lcl_loc,
+ &lp->rmt_loc);
+ rloc_probes_sent++;
+ }
+ });
+ /* *INDENT-ON* */
+
+ vlib_node_increment_counter (vlib_get_main (), lisp_cp_output_node.index,
+ LISP_CP_OUTPUT_ERROR_RLOC_PROBES_SENT,
+ rloc_probes_sent);
+ return 0;
+}
+
+static int
+send_map_register (lisp_cp_main_t * lcm, u8 want_map_notif)
+{
+ pending_map_register_t *pmr;
+ u32 bi, map_registers_sent = 0;
+ vlib_buffer_t *b;
+ ip_address_t sloc;
+ vlib_frame_t *f;
+ u64 nonce = 0;
+ u32 next_index, *to_next;
+ mapping_t *records, *r, *group, *k;
+
+ if (get_egress_map_server_ip (lcm, &sloc) < 0)
+ return -1;
+
+ records = build_map_register_record_list (lcm);
+ if (!records)
+ return -1;
+
+ vec_foreach (r, records)
+ {
+ u8 *key = r->key;
+ u8 key_id = r->key_id;
+
+ if (!key)
+ continue; /* no secret key -> map-register cannot be sent */
+
+ group = 0;
+ vec_add1 (group, r[0]);
+
+ /* group mappings that share common key */
+ for (k = r + 1; k < vec_end (records); k++)
+ {
+ if (k->key_id != r->key_id)
+ continue;
+
+ if (vec_is_equal (k->key, r->key))
+ {
+ vec_add1 (group, k[0]);
+ k->key = 0; /* don't process this mapping again */
+ }
+ }
+
+ b = build_map_register (lcm, &sloc, &lcm->active_map_server, &nonce,
+ want_map_notif, group, key_id, key, &bi);
+ vec_free (group);
+ if (!b)
+ continue;
+
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+
+ next_index = (ip_addr_version (&lcm->active_map_server) == IP4) ?
+ ip4_lookup_node.index : ip6_lookup_node.index;
+
+ f = vlib_get_frame_to_node (lcm->vlib_main, next_index);
+
+ /* Enqueue the packet */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
+ map_registers_sent++;
+
+ pool_get (lcm->pending_map_registers_pool, pmr);
+ memset (pmr, 0, sizeof (*pmr));
+ pmr->time_to_expire = PENDING_MREG_EXPIRATION_TIME;
+ hash_set (lcm->map_register_messages_by_nonce, nonce,
+ pmr - lcm->pending_map_registers_pool);
+ }
+ free_map_register_records (records);
+
+ vlib_node_increment_counter (vlib_get_main (), lisp_cp_output_node.index,
+ LISP_CP_OUTPUT_ERROR_MAP_REGISTERS_SENT,
+ map_registers_sent);
+
+ return 0;
+}
+
+#define send_encapsulated_map_request(lcm, seid, deid, smr) \
+ _send_encapsulated_map_request(lcm, seid, deid, smr, 0)
+
+#define resend_encapsulated_map_request(lcm, seid, deid, smr) \
+ _send_encapsulated_map_request(lcm, seid, deid, smr, 1)
+
+static int
+_send_encapsulated_map_request (lisp_cp_main_t * lcm,
+ gid_address_t * seid, gid_address_t * deid,
+ u8 is_smr_invoked, u8 is_resend)
+{
+ u32 next_index, bi = 0, *to_next, map_index;
+ vlib_buffer_t *b;
+ vlib_frame_t *f;
+ u64 nonce = 0;
+ locator_set_t *loc_set;
+ mapping_t *map;
+ pending_map_request_t *pmr, *duplicate_pmr = 0;
+ ip_address_t sloc;
+ u32 ls_index;
+
+ /* if there is already a pending request remember it */
+
+ /* *INDENT-OFF* */
+ pool_foreach(pmr, lcm->pending_map_requests_pool,
+ ({
+ if (!gid_address_cmp (&pmr->src, seid)
+ && !gid_address_cmp (&pmr->dst, deid))
+ {
+ duplicate_pmr = pmr;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (!is_resend && duplicate_pmr)
+ {
+ /* don't send the request if there is a pending map request already */
+ return 0;
+ }
+
+ /* get locator-set for seid */
+ if (!lcm->lisp_pitr && gid_address_type (deid) != GID_ADDR_NSH)
+ {
+ map_index = gid_dictionary_lookup (&lcm->mapping_index_by_gid, seid);
+ if (map_index == ~0)
+ {
+ clib_warning ("No local mapping found in eid-table for %U!",
+ format_gid_address, seid);
+ return -1;
+ }
+
+ map = pool_elt_at_index (lcm->mapping_pool, map_index);
+
+ if (!map->local)
+ {
+ clib_warning
+ ("Mapping found for src eid %U is not marked as local!",
+ format_gid_address, seid);
+ return -1;
+ }
+ ls_index = map->locator_set_index;
+ }
+ else
+ {
+ if (lcm->lisp_pitr)
+ {
+ map = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ ls_index = map->locator_set_index;
+ }
+ else
+ {
+ if (lcm->nsh_map_index == (u32) ~ 0)
+ {
+ clib_warning ("No locator-set defined for NSH!");
+ return -1;
+ }
+ else
+ {
+ map = pool_elt_at_index (lcm->mapping_pool, lcm->nsh_map_index);
+ ls_index = map->locator_set_index;
+ }
+ }
+ }
+
+ /* overwrite locator set if map-request itr-rlocs configured */
+ if (~0 != lcm->mreq_itr_rlocs)
+ {
+ ls_index = lcm->mreq_itr_rlocs;
+ }
+
+ loc_set = pool_elt_at_index (lcm->locator_set_pool, ls_index);
+
+ if (get_egress_map_resolver_ip (lcm, &sloc) < 0)
+ {
+ if (duplicate_pmr)
+ duplicate_pmr->to_be_removed = 1;
+ return -1;
+ }
+
+ /* build the encapsulated map request */
+ b = build_encapsulated_map_request (lcm, seid, deid, loc_set,
+ &lcm->active_map_resolver,
+ &sloc, is_smr_invoked, &nonce, &bi);
+
+ if (!b)
+ return -1;
+
+ /* set fib index to default and lookup node */
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+ next_index = (ip_addr_version (&lcm->active_map_resolver) == IP4) ?
+ ip4_lookup_node.index : ip6_lookup_node.index;
+
+ f = vlib_get_frame_to_node (lcm->vlib_main, next_index);
+
+ /* Enqueue the packet */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
+
+ vlib_node_increment_counter (vlib_get_main (), lisp_cp_output_node.index,
+ LISP_CP_OUTPUT_ERROR_MAP_REQUESTS_SENT, 1);
+
+ if (duplicate_pmr)
+ /* if there is a pending request already update it */
+ {
+ if (clib_fifo_elts (duplicate_pmr->nonces) >= PENDING_MREQ_QUEUE_LEN)
+ {
+ /* remove the oldest nonce */
+ u64 CLIB_UNUSED (tmp), *nonce_del;
+ nonce_del = clib_fifo_head (duplicate_pmr->nonces);
+ hash_unset (lcm->pending_map_requests_by_nonce, nonce_del[0]);
+ clib_fifo_sub1 (duplicate_pmr->nonces, tmp);
+ }
+
+ clib_fifo_add1 (duplicate_pmr->nonces, nonce);
+ hash_set (lcm->pending_map_requests_by_nonce, nonce,
+ duplicate_pmr - lcm->pending_map_requests_pool);
+ }
+ else
+ {
+ /* add map-request to pending requests table */
+ pool_get (lcm->pending_map_requests_pool, pmr);
+ memset (pmr, 0, sizeof (*pmr));
+ gid_address_copy (&pmr->src, seid);
+ gid_address_copy (&pmr->dst, deid);
+ clib_fifo_add1 (pmr->nonces, nonce);
+ pmr->is_smr_invoked = is_smr_invoked;
+ reset_pending_mr_counters (pmr);
+ hash_set (lcm->pending_map_requests_by_nonce, nonce,
+ pmr - lcm->pending_map_requests_pool);
+ }
+
+ return 0;
+}
+
+static void
+get_src_and_dst_ip (void *hdr, ip_address_t * src, ip_address_t * dst)
+{
+ ip4_header_t *ip4 = hdr;
+ ip6_header_t *ip6;
+
+ if ((ip4->ip_version_and_header_length & 0xF0) == 0x40)
+ {
+ ip_address_set (src, &ip4->src_address, IP4);
+ ip_address_set (dst, &ip4->dst_address, IP4);
+ }
+ else
+ {
+ ip6 = hdr;
+ ip_address_set (src, &ip6->src_address, IP6);
+ ip_address_set (dst, &ip6->dst_address, IP6);
+ }
+}
+
+static u32
+lisp_get_vni_from_buffer_ip (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ u8 version)
+{
+ uword *vnip;
+ u32 vni = ~0, table_id = ~0;
+
+ table_id = fib_table_get_table_id_for_sw_if_index ((version ==
+ IP4 ? FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6),
+ vnet_buffer
+ (b)->sw_if_index
+ [VLIB_RX]);
+
+ vnip = hash_get (lcm->vni_by_table_id, table_id);
+ if (vnip)
+ vni = vnip[0];
+ else
+ clib_warning ("vrf %d is not mapped to any vni!", table_id);
+
+ return vni;
+}
+
+always_inline u32
+lisp_get_bd_from_buffer_eth (vlib_buffer_t * b)
+{
+ u32 sw_if_index0;
+
+ l2input_main_t *l2im = &l2input_main;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+
+ sw_if_index0 = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ config = vec_elt_at_index (l2im->configs, sw_if_index0);
+ bd_config = vec_elt_at_index (l2im->bd_configs, config->bd_index);
+
+ return bd_config->bd_id;
+}
+
+always_inline u32
+lisp_get_vni_from_buffer_eth (lisp_cp_main_t * lcm, vlib_buffer_t * b)
+{
+ uword *vnip;
+ u32 vni = ~0;
+ u32 bd = lisp_get_bd_from_buffer_eth (b);
+
+ vnip = hash_get (lcm->vni_by_bd_id, bd);
+ if (vnip)
+ vni = vnip[0];
+ else
+ clib_warning ("bridge domain %d is not mapped to any vni!", bd);
+
+ return vni;
+}
+
+void
+get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * src, gid_address_t * dst,
+ u16 type)
+{
+ ethernet_header_t *eh;
+ u32 vni = 0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
+
+ memset (src, 0, sizeof (*src));
+ memset (dst, 0, sizeof (*dst));
+
+ gid_address_type (dst) = GID_ADDR_NO_ADDRESS;
+ gid_address_type (src) = GID_ADDR_NO_ADDRESS;
+
+ if (LISP_AFI_IP == type || LISP_AFI_IP6 == type)
+ {
+ ip4_header_t *ip;
+ u8 version, preflen;
+
+ gid_address_type (src) = GID_ADDR_IP_PREFIX;
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+
+ ip = vlib_buffer_get_current (b);
+ get_src_and_dst_ip (ip, &gid_address_ip (src), &gid_address_ip (dst));
+
+ version = gid_address_ip_version (src);
+ preflen = ip_address_max_len (version);
+ gid_address_ippref_len (src) = preflen;
+ gid_address_ippref_len (dst) = preflen;
+
+ vni = lisp_get_vni_from_buffer_ip (lcm, b, version);
+ gid_address_vni (dst) = vni;
+ gid_address_vni (src) = vni;
+ }
+ else if (LISP_AFI_MAC == type)
+ {
+ ethernet_arp_header_t *ah;
+
+ eh = vlib_buffer_get_current (b);
+
+ if (clib_net_to_host_u16 (eh->type) == ETHERNET_TYPE_ARP)
+ {
+ ah = (ethernet_arp_header_t *) (((u8 *) eh) + sizeof (*eh));
+ if (clib_net_to_host_u16 (ah->opcode)
+ != ETHERNET_ARP_OPCODE_request)
+ return;
+
+ gid_address_type (dst) = GID_ADDR_ARP;
+ gid_address_arp_bd (dst) = lisp_get_bd_from_buffer_eth (b);
+ clib_memcpy (&gid_address_arp_ip4 (dst),
+ &ah->ip4_over_ethernet[1].ip4, 4);
+ }
+ else
+ {
+ if (clib_net_to_host_u16 (eh->type) == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t *ip;
+ ip = (ip6_header_t *) (eh + 1);
+
+ if (IP_PROTOCOL_ICMP6 == ip->protocol)
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
+ ndh = ip6_next_header (ip);
+ if (ndh->icmp.type == ICMP6_neighbor_solicitation)
+ {
+ opt = (void *) (ndh + 1);
+ if ((opt->header.type !=
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address)
+ || (opt->header.n_data_u64s != 1))
+ return; /* source link layer address option not present */
+
+ gid_address_type (dst) = GID_ADDR_NDP;
+ gid_address_ndp_bd (dst) =
+ lisp_get_bd_from_buffer_eth (b);
+ ip_address_set (&gid_address_arp_ndp_ip (dst),
+ &ndh->target_address, IP6);
+ return;
+ }
+ }
+ }
+
+ gid_address_type (src) = GID_ADDR_MAC;
+ gid_address_type (dst) = GID_ADDR_MAC;
+ mac_copy (&gid_address_mac (src), eh->src_address);
+ mac_copy (&gid_address_mac (dst), eh->dst_address);
+
+ /* get vni */
+ vni = lisp_get_vni_from_buffer_eth (lcm, b);
+
+ gid_address_vni (dst) = vni;
+ gid_address_vni (src) = vni;
+ }
+ }
+ else if (LISP_AFI_LCAF == type)
+ {
+ lisp_nsh_hdr_t *nh;
+ eh = vlib_buffer_get_current (b);
+
+ if (clib_net_to_host_u16 (eh->type) == ETHERNET_TYPE_NSH)
+ {
+ nh = (lisp_nsh_hdr_t *) (((u8 *) eh) + sizeof (*eh));
+ u32 spi = clib_net_to_host_u32 (nh->spi_si << 8);
+ u8 si = (u8) clib_net_to_host_u32 (nh->spi_si);
+ gid_address_nsh_spi (dst) = spi;
+ gid_address_nsh_si (dst) = si;
+
+ gid_address_type (dst) = GID_ADDR_NSH;
+ gid_address_type (src) = GID_ADDR_NSH;
+ }
+ }
+}
+
+static uword
+lisp_cp_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int overlay)
+{
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
+ u32 *from, *to_next, di, si;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 next_index;
+ uword n_left_from, n_left_to_next;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, sw_if_index0, next0;
+ u64 mac0;
+ vlib_buffer_t *b0;
+ gid_address_t src, dst;
+ ethernet_arp_header_t *arp0;
+ ethernet_header_t *eth0;
+ vnet_hw_interface_t *hw_if0;
+ ethernet_header_t *eh0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
+ ip6_header_t *ip0;
+
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, pi0);
+
+ /* src/dst eid pair */
+ get_src_and_dst_eids_from_buffer (lcm, b0, &src, &dst, overlay);
+
+ if (gid_address_type (&dst) == GID_ADDR_ARP)
+ {
+ mac0 = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &dst);
+ if (GID_LOOKUP_MISS_L2 == mac0)
+ goto drop;
+
+ /* send ARP reply */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ eth0 = vlib_buffer_get_current (b0);
+ arp0 = (ethernet_arp_header_t *) (((u8 *) eth0)
+ + sizeof (*eth0));
+ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+ arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+ clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
+ (u8 *) & mac0, 6);
+ clib_memcpy (&arp0->ip4_over_ethernet[0].ip4,
+ &gid_address_arp_ip4 (&dst), 4);
+
+ /* Hardware must be ethernet-like. */
+ ASSERT (vec_len (hw_if0->hw_address) == 6);
+
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
+
+ b0->error = node->errors[LISP_CP_LOOKUP_ERROR_ARP_REPLY_TX];
+ next0 = LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX;
+ goto enqueue;
+ }
+ else if (gid_address_type (&dst) == GID_ADDR_NDP)
+ {
+ mac0 = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &dst);
+ if (GID_LOOKUP_MISS_L2 == mac0)
+ goto drop;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ eh0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *) (eh0 + 1);
+ ndh = ip6_next_header (ip0);
+ int bogus_length;
+ ip0->dst_address = ip0->src_address;
+ ip0->src_address = ndh->target_address;
+ ip0->hop_limit = 255;
+ opt = (void *) (ndh + 1);
+ opt->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ clib_memcpy (opt->ethernet_address, (u8 *) & mac0, 6);
+ ndh->icmp.type = ICMP6_neighbor_advertisement;
+ ndh->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
+ ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ ndh->icmp.checksum = 0;
+ ndh->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0,
+ &bogus_length);
+ clib_memcpy (eh0->dst_address, eh0->src_address, 6);
+ clib_memcpy (eh0->src_address, (u8 *) & mac0, 6);
+ b0->error =
+ node->errors
+ [LISP_CP_LOOKUP_ERROR_NDP_NEIGHBOR_ADVERTISEMENT_TX];
+ next0 = LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX;
+ goto enqueue;
+ }
+
+ /* if we have remote mapping for destination already in map-chache
+ add forwarding tunnel directly. If not send a map-request */
+ di = gid_dictionary_sd_lookup (&lcm->mapping_index_by_gid, &dst,
+ &src);
+ if (~0 != di)
+ {
+ mapping_t *m = vec_elt_at_index (lcm->mapping_pool, di);
+ /* send a map-request also in case of negative mapping entry
+ with corresponding action */
+ if (m->action == LISP_SEND_MAP_REQUEST)
+ {
+ /* send map-request */
+ queue_map_request (&src, &dst, 0 /* smr_invoked */ ,
+ 0 /* is_resend */ );
+ }
+ else
+ {
+ if (GID_ADDR_NSH != gid_address_type (&dst))
+ {
+ si = gid_dictionary_lookup (&lcm->mapping_index_by_gid,
+ &src);
+ }
+ else
+ si = lcm->nsh_map_index;
+
+ if (~0 != si)
+ {
+ dp_add_fwd_entry_from_mt (si, di);
+ }
+ }
+ }
+ else
+ {
+ /* send map-request */
+ queue_map_request (&src, &dst, 0 /* smr_invoked */ ,
+ 0 /* is_resend */ );
+ }
+
+ drop:
+ b0->error = node->errors[LISP_CP_LOOKUP_ERROR_DROP];
+ next0 = LISP_CP_LOOKUP_NEXT_DROP;
+ enqueue:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lisp_cp_lookup_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+
+ memset (tr, 0, sizeof (*tr));
+ gid_address_copy (&tr->dst_eid, &dst);
+ ip_address_copy (&tr->map_resolver_ip,
+ &lcm->active_map_resolver);
+ }
+ gid_address_free (&dst);
+ gid_address_free (&src);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static uword
+lisp_cp_lookup_ip4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP));
+}
+
+static uword
+lisp_cp_lookup_ip6 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP6));
+}
+
+static uword
+lisp_cp_lookup_l2 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_MAC));
+}
+
+static uword
+lisp_cp_lookup_nsh (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ /* TODO decide if NSH should be propagated as LCAF or not */
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_LCAF));
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
+ .function = lisp_cp_lookup_ip4,
+ .name = "lisp-cp-lookup-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_lookup_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_LOOKUP_N_ERROR,
+ .error_strings = lisp_cp_lookup_error_strings,
+
+ .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+ [LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
+ .function = lisp_cp_lookup_ip6,
+ .name = "lisp-cp-lookup-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_lookup_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_LOOKUP_N_ERROR,
+ .error_strings = lisp_cp_lookup_error_strings,
+
+ .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+ [LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
+ .function = lisp_cp_lookup_l2,
+ .name = "lisp-cp-lookup-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_lookup_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_LOOKUP_N_ERROR,
+ .error_strings = lisp_cp_lookup_error_strings,
+
+ .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+ [LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
+ .function = lisp_cp_lookup_nsh,
+ .name = "lisp-cp-lookup-nsh",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_lookup_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_LOOKUP_N_ERROR,
+ .error_strings = lisp_cp_lookup_error_strings,
+
+ .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+ [LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* lisp_cp_input statistics */
+#define foreach_lisp_cp_input_error \
+_(DROP, "drop") \
+_(RLOC_PROBE_REQ_RECEIVED, "rloc-probe requests received") \
+_(RLOC_PROBE_REP_RECEIVED, "rloc-probe replies received") \
+_(MAP_NOTIFIES_RECEIVED, "map-notifies received") \
+_(MAP_REPLIES_RECEIVED, "map-replies received")
+
+static char *lisp_cp_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_lisp_cp_input_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) LISP_CP_INPUT_ERROR_##sym,
+ foreach_lisp_cp_input_error
+#undef _
+ LISP_CP_INPUT_N_ERROR,
+} lisp_cp_input_error_t;
+
+typedef struct
+{
+ gid_address_t dst_eid;
+ ip4_address_t map_resolver_ip;
+} lisp_cp_input_trace_t;
+
+u8 *
+format_lisp_cp_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ CLIB_UNUSED (lisp_cp_input_trace_t * t) =
+ va_arg (*args, lisp_cp_input_trace_t *);
+
+ s = format (s, "LISP-CP-INPUT: TODO");
+ return s;
+}
+
+static void
+remove_expired_mapping (lisp_cp_main_t * lcm, u32 mi)
+{
+ mapping_t *m;
+ vnet_lisp_add_del_adjacency_args_t _adj_args, *adj_args = &_adj_args;
+ memset (adj_args, 0, sizeof (adj_args[0]));
+
+ m = pool_elt_at_index (lcm->mapping_pool, mi);
+
+ gid_address_copy (&adj_args->reid, &m->eid);
+ adj_args->is_add = 0;
+ if (vnet_lisp_add_del_adjacency (adj_args))
+ clib_warning ("failed to del adjacency!");
+
+ vnet_lisp_del_mapping (&m->eid, NULL);
+ mapping_delete_timer (lcm, mi);
+}
+
+static void
+mapping_start_expiration_timer (lisp_cp_main_t * lcm, u32 mi,
+ f64 expiration_time)
+{
+ mapping_t *m;
+ u64 now = clib_cpu_time_now ();
+ u64 cpu_cps = lcm->vlib_main->clib_time.clocks_per_second;
+ u64 exp_clock_time = now + expiration_time * cpu_cps;
+
+ m = pool_elt_at_index (lcm->mapping_pool, mi);
+
+ m->timer_set = 1;
+ timing_wheel_insert (&lcm->wheel, exp_clock_time, mi);
+}
+
+static void
+process_expired_mapping (lisp_cp_main_t * lcm, u32 mi)
+{
+ int rv;
+ vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a;
+ mapping_t *m = pool_elt_at_index (lcm->mapping_pool, mi);
+ uword *fei;
+ fwd_entry_t *fe;
+ vlib_counter_t c;
+ u8 have_stats = 0;
+
+ if (m->delete_after_expiration)
+ {
+ remove_expired_mapping (lcm, mi);
+ return;
+ }
+
+ fei = hash_get (lcm->fwd_entry_by_mapping_index, mi);
+ if (!fei)
+ return;
+
+ fe = pool_elt_at_index (lcm->fwd_entry_pool, fei[0]);
+
+ memset (a, 0, sizeof (*a));
+ a->rmt_eid = fe->reid;
+ if (fe->is_src_dst)
+ a->lcl_eid = fe->leid;
+ a->vni = gid_address_vni (&fe->reid);
+
+ rv = vnet_lisp_gpe_get_fwd_stats (a, &c);
+ if (0 == rv)
+ have_stats = 1;
+
+ if (m->almost_expired)
+ {
+ m->almost_expired = 0; /* reset flag */
+ if (have_stats)
+ {
+ if (m->packets != c.packets)
+ {
+ /* mapping is in use, re-fetch */
+ map_request_args_t mr_args;
+ memset (&mr_args, 0, sizeof (mr_args));
+ mr_args.seid = fe->leid;
+ mr_args.deid = fe->reid;
+
+ send_map_request_thread_fn (&mr_args);
+ }
+ else
+ remove_expired_mapping (lcm, mi);
+ }
+ else
+ remove_expired_mapping (lcm, mi);
+ }
+ else
+ {
+ m->almost_expired = 1;
+ mapping_start_expiration_timer (lcm, mi, TIME_UNTIL_REFETCH_OR_DELETE);
+
+ if (have_stats)
+ /* save counter */
+ m->packets = c.packets;
+ else
+ m->delete_after_expiration = 1;
+ }
+}
+
+static void
+map_records_arg_free (map_records_arg_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ vec_foreach (m, a->mappings)
+ {
+ vec_free (m->locators);
+ gid_address_free (&m->eid);
+ }
+ pool_put (lcm->map_records_args_pool[vlib_get_thread_index ()], a);
+}
+
+void *
+process_map_reply (map_records_arg_t * a)
+{
+ mapping_t *m;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u32 dst_map_index = 0;
+ pending_map_request_t *pmr;
+ u64 *noncep;
+ uword *pmr_index;
+ u8 is_changed = 0;
+
+ if (a->is_rloc_probe)
+ goto done;
+
+ /* Check pending requests table and nonce */
+ pmr_index = hash_get (lcm->pending_map_requests_by_nonce, a->nonce);
+ if (!pmr_index)
+ {
+ clib_warning ("No pending map-request entry with nonce %lu!", a->nonce);
+ goto done;
+ }
+ pmr = pool_elt_at_index (lcm->pending_map_requests_pool, pmr_index[0]);
+
+ vec_foreach (m, a->mappings)
+ {
+ vnet_lisp_add_del_mapping_args_t _m_args, *m_args = &_m_args;
+ memset (m_args, 0, sizeof (m_args[0]));
+ gid_address_copy (&m_args->eid, &m->eid);
+ m_args->action = m->action;
+ m_args->authoritative = m->authoritative;
+ m_args->ttl = m->ttl;
+ m_args->is_static = 0;
+
+ /* insert/update mappings cache */
+ vnet_lisp_add_mapping (m_args, m->locators, &dst_map_index, &is_changed);
+
+ if (dst_map_index == (u32) ~ 0)
+ continue;
+
+ if (is_changed)
+ {
+ /* try to program forwarding only if mapping saved or updated */
+ vnet_lisp_add_del_adjacency_args_t _adj_args, *adj_args = &_adj_args;
+ memset (adj_args, 0, sizeof (adj_args[0]));
+
+ gid_address_copy (&adj_args->leid, &pmr->src);
+ gid_address_copy (&adj_args->reid, &m->eid);
+ adj_args->is_add = 1;
+
+ if (vnet_lisp_add_del_adjacency (adj_args))
+ clib_warning ("failed to add adjacency!");
+ }
+
+ if ((u32) ~ 0 != m->ttl)
+ mapping_start_expiration_timer (lcm, dst_map_index,
+ (m->ttl == 0) ? 0 : MAPPING_TIMEOUT);
+ }
+
+ /* remove pending map request entry */
+
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (noncep, pmr->nonces, ({
+ hash_unset(lcm->pending_map_requests_by_nonce, noncep[0]);
+ }));
+ /* *INDENT-ON* */
+
+ clib_fifo_free (pmr->nonces);
+ pool_put (lcm->pending_map_requests_pool, pmr);
+
+done:
+ a->is_free = 1;
+ return 0;
+}
+
+static int
+is_auth_data_valid (map_notify_hdr_t * h, u32 msg_len,
+ lisp_key_type_t key_id, u8 * key)
+{
+ u8 *auth_data = 0;
+ u16 auth_data_len;
+ int result;
+
+ auth_data_len = auth_data_len_by_key_id (key_id);
+ if ((u16) ~ 0 == auth_data_len)
+ {
+ clib_warning ("invalid length for key_id %d!", key_id);
+ return 0;
+ }
+
+ /* save auth data */
+ vec_validate (auth_data, auth_data_len - 1);
+ clib_memcpy (auth_data, MNOTIFY_DATA (h), auth_data_len);
+
+ /* clear auth data */
+ memset (MNOTIFY_DATA (h), 0, auth_data_len);
+
+ /* get hash of the message */
+ unsigned char *code = HMAC (get_encrypt_fcn (key_id), key, vec_len (key),
+ (unsigned char *) h, msg_len, NULL, NULL);
+
+ result = memcmp (code, auth_data, auth_data_len);
+
+ vec_free (auth_data);
+
+ return !result;
+}
+
+static void
+process_map_notify (map_records_arg_t * a)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ uword *pmr_index;
+
+ pmr_index = hash_get (lcm->map_register_messages_by_nonce, a->nonce);
+ if (!pmr_index)
+ {
+ clib_warning ("No pending map-register entry with nonce %lu!",
+ a->nonce);
+ return;
+ }
+
+ a->is_free = 1;
+ pool_put_index (lcm->pending_map_registers_pool, pmr_index[0]);
+ hash_unset (lcm->map_register_messages_by_nonce, a->nonce);
+
+ /* reset map-notify counter */
+ lcm->expired_map_registers = 0;
+}
+
+static mapping_t *
+get_mapping (lisp_cp_main_t * lcm, gid_address_t * e)
+{
+ u32 mi;
+
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, e);
+ if (~0 == mi)
+ {
+ clib_warning ("eid %U not found in map-cache!", unformat_gid_address,
+ e);
+ return 0;
+ }
+ return pool_elt_at_index (lcm->mapping_pool, mi);
+}
+
+/**
+ * When map-notify is received it is necessary that all EIDs in the record
+ * list share common key. The key is then used to verify authentication
+ * data in map-notify message.
+ */
+static int
+map_record_integrity_check (lisp_cp_main_t * lcm, mapping_t * maps,
+ u32 key_id, u8 ** key_out)
+{
+ u32 i, len = vec_len (maps);
+ mapping_t *m;
+
+ /* get key of the first mapping */
+ m = get_mapping (lcm, &maps[0].eid);
+ if (!m || !m->key)
+ return -1;
+
+ key_out[0] = m->key;
+
+ for (i = 1; i < len; i++)
+ {
+ m = get_mapping (lcm, &maps[i].eid);
+ if (!m || !m->key)
+ return -1;
+
+ if (key_id != m->key_id || vec_cmp (m->key, key_out[0]))
+ {
+ clib_warning ("keys does not match! %v, %v", key_out[0], m->key);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+parse_map_records (vlib_buffer_t * b, map_records_arg_t * a, u8 count)
+{
+ locator_t *locators = 0;
+ u32 i, len;
+ gid_address_t deid;
+ mapping_t m;
+ locator_t *loc;
+
+ memset (&m, 0, sizeof (m));
+
+ /* parse record eid */
+ for (i = 0; i < count; i++)
+ {
+ locators = 0;
+ len = lisp_msg_parse_mapping_record (b, &deid, &locators, NULL);
+ if (len == ~0)
+ {
+ clib_warning ("Failed to parse mapping record!");
+ vec_foreach (loc, locators) locator_free (loc);
+ vec_free (locators);
+ return -1;
+ }
+
+ m.locators = locators;
+ gid_address_copy (&m.eid, &deid);
+ vec_add1 (a->mappings, m);
+ }
+
+ return 0;
+}
+
+static map_records_arg_t *
+map_record_args_get ()
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ map_records_arg_t *rec;
+
+ /* Cleanup first */
+ /* *INDENT-OFF* */
+ pool_foreach (rec, lcm->map_records_args_pool[vlib_get_thread_index()], ({
+ if (rec->is_free)
+ map_records_arg_free (rec);
+ }));
+ /* *INDENT-ON* */
+
+ pool_get (lcm->map_records_args_pool[vlib_get_thread_index ()], rec);
+ return rec;
+}
+
+static map_records_arg_t *
+parse_map_notify (vlib_buffer_t * b)
+{
+ int rc = 0;
+ map_notify_hdr_t *mnotif_hdr;
+ lisp_key_type_t key_id;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ u8 *key = 0;
+ gid_address_t deid;
+ u16 auth_data_len = 0;
+ u8 record_count;
+ map_records_arg_t *a;
+
+ a = map_record_args_get ();
+ memset (a, 0, sizeof (*a));
+ mnotif_hdr = vlib_buffer_get_current (b);
+ vlib_buffer_pull (b, sizeof (*mnotif_hdr));
+ memset (&deid, 0, sizeof (deid));
+
+ a->nonce = MNOTIFY_NONCE (mnotif_hdr);
+ key_id = clib_net_to_host_u16 (MNOTIFY_KEY_ID (mnotif_hdr));
+ auth_data_len = auth_data_len_by_key_id (key_id);
+
+ /* advance buffer by authentication data */
+ vlib_buffer_pull (b, auth_data_len);
+
+ record_count = MNOTIFY_REC_COUNT (mnotif_hdr);
+ rc = parse_map_records (b, a, record_count);
+ if (rc != 0)
+ {
+ map_records_arg_free (a);
+ return 0;
+ }
+
+ rc = map_record_integrity_check (lcm, a->mappings, key_id, &key);
+ if (rc != 0)
+ {
+ map_records_arg_free (a);
+ return 0;
+ }
+
+ /* verify authentication data */
+ if (!is_auth_data_valid (mnotif_hdr, vlib_buffer_get_tail (b)
+ - (u8 *) mnotif_hdr, key_id, key))
+ {
+ clib_warning ("Map-notify auth data verification failed for nonce "
+ "0x%lx!", a->nonce);
+ map_records_arg_free (a);
+ return 0;
+ }
+ return a;
+}
+
+static vlib_buffer_t *
+build_map_reply (lisp_cp_main_t * lcm, ip_address_t * sloc,
+ ip_address_t * dst, u64 nonce, u8 probe_bit,
+ mapping_t * records, u16 dst_port, u32 * bi_res)
+{
+ vlib_buffer_t *b;
+ u32 bi;
+ vlib_main_t *vm = lcm->vlib_main;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ clib_warning ("Can't allocate buffer for Map-Register!");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+
+ /* leave some space for the encap headers */
+ vlib_buffer_make_headroom (b, MAX_LISP_MSG_ENCAP_LEN);
+
+ lisp_msg_put_map_reply (b, records, nonce, probe_bit);
+
+ /* push outer ip header */
+ pkt_push_udp_and_ip (vm, b, LISP_CONTROL_PORT, dst_port, sloc, dst, 1);
+
+ bi_res[0] = bi;
+ return b;
+}
+
+static int
+send_map_reply (lisp_cp_main_t * lcm, u32 mi, ip_address_t * dst,
+ u8 probe_bit, u64 nonce, u16 dst_port,
+ ip_address_t * probed_loc)
+{
+ ip_address_t src;
+ u32 bi;
+ vlib_buffer_t *b;
+ vlib_frame_t *f;
+ u32 next_index, *to_next;
+ mapping_t *records = 0, *m;
+
+ m = pool_elt_at_index (lcm->mapping_pool, mi);
+ if (!m)
+ return -1;
+
+ vec_add1 (records, m[0]);
+ add_locators (lcm, &records[0], m->locator_set_index, probed_loc);
+ memset (&src, 0, sizeof (src));
+
+ if (!ip_fib_get_first_egress_ip_for_dst (lcm, dst, &src))
+ {
+ clib_warning ("can't find inteface address for %U", format_ip_address,
+ dst);
+ return -1;
+ }
+
+ b = build_map_reply (lcm, &src, dst, nonce, probe_bit, records, dst_port,
+ &bi);
+ if (!b)
+ return -1;
+ free_map_register_records (records);
+
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+ next_index = (ip_addr_version (&lcm->active_map_resolver) == IP4) ?
+ ip4_lookup_node.index : ip6_lookup_node.index;
+
+ f = vlib_get_frame_to_node (lcm->vlib_main, next_index);
+
+ /* Enqueue the packet */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
+ return 0;
+}
+
+static void
+find_ip_header (vlib_buffer_t * b, u8 ** ip_hdr)
+{
+ const i32 start = vnet_buffer (b)->l3_hdr_offset;
+ if (start < 0 && start < -sizeof (b->pre_data))
+ {
+ *ip_hdr = 0;
+ return;
+ }
+
+ *ip_hdr = b->data + start;
+ if ((u8 *) * ip_hdr > (u8 *) vlib_buffer_get_current (b))
+ *ip_hdr = 0;
+}
+
+void
+process_map_request (vlib_main_t * vm, vlib_node_runtime_t * node,
+ lisp_cp_main_t * lcm, vlib_buffer_t * b)
+{
+ u8 *ip_hdr = 0;
+ ip_address_t *dst_loc = 0, probed_loc, src_loc;
+ mapping_t m;
+ map_request_hdr_t *mreq_hdr;
+ gid_address_t src, dst;
+ u64 nonce;
+ u32 i, len = 0, rloc_probe_recv = 0;
+ gid_address_t *itr_rlocs = 0;
+
+ mreq_hdr = vlib_buffer_get_current (b);
+ if (!MREQ_SMR (mreq_hdr) && !MREQ_RLOC_PROBE (mreq_hdr))
+ {
+ clib_warning
+ ("Only SMR Map-Requests and RLOC probe supported for now!");
+ return;
+ }
+
+ vlib_buffer_pull (b, sizeof (*mreq_hdr));
+ nonce = MREQ_NONCE (mreq_hdr);
+
+ /* parse src eid */
+ len = lisp_msg_parse_addr (b, &src);
+ if (len == ~0)
+ return;
+
+ len = lisp_msg_parse_itr_rlocs (b, &itr_rlocs,
+ MREQ_ITR_RLOC_COUNT (mreq_hdr) + 1);
+ if (len == ~0)
+ goto done;
+
+ /* parse eid records and send SMR-invoked map-requests */
+ for (i = 0; i < MREQ_REC_COUNT (mreq_hdr); i++)
+ {
+ memset (&dst, 0, sizeof (dst));
+ len = lisp_msg_parse_eid_rec (b, &dst);
+ if (len == ~0)
+ {
+ clib_warning ("Can't parse map-request EID-record");
+ goto done;
+ }
+
+ if (MREQ_SMR (mreq_hdr))
+ {
+ /* send SMR-invoked map-requests */
+ queue_map_request (&dst, &src, 1 /* invoked */ , 0 /* resend */ );
+ }
+ else if (MREQ_RLOC_PROBE (mreq_hdr))
+ {
+ find_ip_header (b, &ip_hdr);
+ if (!ip_hdr)
+ {
+ clib_warning ("Cannot find the IP header!");
+ goto done;
+ }
+ rloc_probe_recv++;
+ memset (&m, 0, sizeof (m));
+ u32 mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &dst);
+
+ // TODO: select best locator; for now use the first one
+ dst_loc = &gid_address_ip (&itr_rlocs[0]);
+
+ /* get src/dst IP addresses */
+ get_src_and_dst_ip (ip_hdr, &src_loc, &probed_loc);
+
+ // TODO get source port from buffer
+ u16 src_port = LISP_CONTROL_PORT;
+
+ send_map_reply (lcm, mi, dst_loc, 1 /* probe-bit */ , nonce,
+ src_port, &probed_loc);
+ }
+ }
+
+done:
+ vlib_node_increment_counter (vm, node->node_index,
+ LISP_CP_INPUT_ERROR_RLOC_PROBE_REQ_RECEIVED,
+ rloc_probe_recv);
+ vec_free (itr_rlocs);
+}
+
+map_records_arg_t *
+parse_map_reply (vlib_buffer_t * b)
+{
+ locator_t probed;
+ gid_address_t deid;
+ void *h;
+ u32 i, len = 0;
+ mapping_t m;
+ map_reply_hdr_t *mrep_hdr;
+ map_records_arg_t *a;
+
+ a = map_record_args_get ();
+ memset (a, 0, sizeof (*a));
+
+ locator_t *locators;
+
+ mrep_hdr = vlib_buffer_get_current (b);
+ a->nonce = MREP_NONCE (mrep_hdr);
+ a->is_rloc_probe = MREP_RLOC_PROBE (mrep_hdr);
+ if (!vlib_buffer_has_space (b, sizeof (*mrep_hdr)))
+ {
+ clib_mem_free (a);
+ return 0;
+ }
+ vlib_buffer_pull (b, sizeof (*mrep_hdr));
+
+ for (i = 0; i < MREP_REC_COUNT (mrep_hdr); i++)
+ {
+ memset (&m, 0, sizeof (m));
+ locators = 0;
+ h = vlib_buffer_get_current (b);
+
+ m.ttl = clib_net_to_host_u32 (MAP_REC_TTL (h));
+ m.action = MAP_REC_ACTION (h);
+ m.authoritative = MAP_REC_AUTH (h);
+
+ len = lisp_msg_parse_mapping_record (b, &deid, &locators, &probed);
+ if (len == ~0)
+ {
+ clib_warning ("Failed to parse mapping record!");
+ map_records_arg_free (a);
+ return 0;
+ }
+
+ m.locators = locators;
+ gid_address_copy (&m.eid, &deid);
+ vec_add1 (a->mappings, m);
+ }
+ return a;
+}
+
+static void
+queue_map_reply_for_processing (map_records_arg_t * a)
+{
+ vl_api_rpc_call_main_thread (process_map_reply, (u8 *) a, sizeof (*a));
+}
+
+static void
+queue_map_notify_for_processing (map_records_arg_t * a)
+{
+ vl_api_rpc_call_main_thread (process_map_notify, (u8 *) a, sizeof (a[0]));
+}
+
+static uword
+lisp_cp_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, *from, *to_next_drop, rloc_probe_rep_recv = 0,
+ map_notifies_recv = 0;
+ lisp_msg_type_e type;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ map_records_arg_t *a;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next_drop;
+
+ vlib_get_next_frame (vm, node, LISP_CP_INPUT_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = bi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ type = lisp_msg_type (vlib_buffer_get_current (b0));
+ switch (type)
+ {
+ case LISP_MAP_REPLY:
+ a = parse_map_reply (b0);
+ if (a)
+ {
+ if (a->is_rloc_probe)
+ rloc_probe_rep_recv++;
+ queue_map_reply_for_processing (a);
+ }
+ break;
+ case LISP_MAP_REQUEST:
+ process_map_request (vm, node, lcm, b0);
+ break;
+ case LISP_MAP_NOTIFY:
+ a = parse_map_notify (b0);
+ if (a)
+ {
+ map_notifies_recv++;
+ queue_map_notify_for_processing (a);
+ }
+ break;
+ default:
+ clib_warning ("Unsupported LISP message type %d", type);
+ break;
+ }
+
+ b0->error = node->errors[LISP_CP_INPUT_ERROR_DROP];
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+
+ }
+ }
+
+ vlib_put_next_frame (vm, node, LISP_CP_INPUT_NEXT_DROP,
+ n_left_to_next_drop);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ LISP_CP_INPUT_ERROR_RLOC_PROBE_REP_RECEIVED,
+ rloc_probe_rep_recv);
+ vlib_node_increment_counter (vm, node->node_index,
+ LISP_CP_INPUT_ERROR_MAP_NOTIFIES_RECEIVED,
+ map_notifies_recv);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_input_node) = {
+ .function = lisp_cp_input,
+ .name = "lisp-cp-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_INPUT_N_ERROR,
+ .error_strings = lisp_cp_input_error_strings,
+
+ .n_next_nodes = LISP_CP_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+lisp_cp_init (vlib_main_t * vm)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ clib_error_t *error = 0;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+
+ if ((error = vlib_call_init_function (vm, lisp_gpe_init)))
+ return error;
+
+ lcm->im4 = &ip4_main;
+ lcm->im6 = &ip6_main;
+ lcm->vlib_main = vm;
+ lcm->vnet_main = vnet_get_main ();
+ lcm->mreq_itr_rlocs = ~0;
+ lcm->lisp_pitr = 0;
+ lcm->flags = 0;
+ memset (&lcm->active_map_resolver, 0, sizeof (lcm->active_map_resolver));
+ memset (&lcm->active_map_server, 0, sizeof (lcm->active_map_server));
+
+ gid_dictionary_init (&lcm->mapping_index_by_gid);
+ lcm->do_map_resolver_election = 1;
+ lcm->do_map_server_election = 1;
+ lcm->map_request_mode = MR_MODE_DST_ONLY;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (lcm->map_records_args_pool, num_threads - 1);
+
+ /* default vrf mapped to vni 0 */
+ hash_set (lcm->table_id_by_vni, 0, 0);
+ hash_set (lcm->vni_by_table_id, 0, 0);
+
+ udp_register_dst_port (vm, UDP_DST_PORT_lisp_cp,
+ lisp_cp_input_node.index, 1 /* is_ip4 */ );
+ udp_register_dst_port (vm, UDP_DST_PORT_lisp_cp6,
+ lisp_cp_input_node.index, 0 /* is_ip4 */ );
+
+ u64 now = clib_cpu_time_now ();
+ timing_wheel_init (&lcm->wheel, now, vm->clib_time.clocks_per_second);
+ lcm->nsh_map_index = ~0;
+ lcm->map_register_ttl = MAP_REGISTER_DEFAULT_TTL;
+ lcm->max_expired_map_registers = MAX_EXPIRED_MAP_REGISTERS_DEFAULT;
+ lcm->expired_map_registers = 0;
+ lcm->transport_protocol = LISP_TRANSPORT_PROTOCOL_UDP;
+ return 0;
+}
+
+static int
+lisp_stats_api_fill (lisp_cp_main_t * lcm, lisp_gpe_main_t * lgm,
+ lisp_api_stats_t * stat, lisp_stats_key_t * key,
+ u32 stats_index)
+{
+ vlib_counter_t v;
+ vlib_combined_counter_main_t *cm = &lgm->counters;
+ lisp_gpe_fwd_entry_key_t fwd_key;
+ const lisp_gpe_tunnel_t *lgt;
+ fwd_entry_t *fe;
+
+ memset (stat, 0, sizeof (*stat));
+ memset (&fwd_key, 0, sizeof (fwd_key));
+
+ fe = pool_elt_at_index (lcm->fwd_entry_pool, key->fwd_entry_index);
+ ASSERT (fe != 0);
+
+ gid_to_dp_address (&fe->reid, &stat->deid);
+ gid_to_dp_address (&fe->leid, &stat->seid);
+ stat->vni = gid_address_vni (&fe->reid);
+
+ lgt = lisp_gpe_tunnel_get (key->tunnel_index);
+ stat->loc_rloc = lgt->key->lcl;
+ stat->rmt_rloc = lgt->key->rmt;
+
+ vlib_get_combined_counter (cm, stats_index, &v);
+ stat->counters = v;
+ return 1;
+}
+
+lisp_api_stats_t *
+vnet_lisp_get_stats (void)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_api_stats_t *stats = 0, stat;
+ lisp_stats_key_t *key;
+ u32 index;
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem (key, index, lgm->lisp_stats_index_by_key,
+ {
+ if (lisp_stats_api_fill (lcm, lgm, &stat, key, index))
+ vec_add1 (stats, stat);
+ });
+ /* *INDENT-ON* */
+
+ return stats;
+}
+
+static void *
+send_map_request_thread_fn (void *arg)
+{
+ map_request_args_t *a = arg;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (a->is_resend)
+ resend_encapsulated_map_request (lcm, &a->seid, &a->deid, a->smr_invoked);
+ else
+ send_encapsulated_map_request (lcm, &a->seid, &a->deid, a->smr_invoked);
+
+ return 0;
+}
+
+static int
+queue_map_request (gid_address_t * seid, gid_address_t * deid,
+ u8 smr_invoked, u8 is_resend)
+{
+ map_request_args_t a;
+
+ a.is_resend = is_resend;
+ gid_address_copy (&a.seid, seid);
+ gid_address_copy (&a.deid, deid);
+ a.smr_invoked = smr_invoked;
+
+ vl_api_rpc_call_main_thread (send_map_request_thread_fn,
+ (u8 *) & a, sizeof (a));
+ return 0;
+}
+
+/**
+ * Take an action with a pending map request depending on expiration time
+ * and re-try counters.
+ */
+static void
+update_pending_request (pending_map_request_t * r, f64 dt)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *mr;
+
+ if (r->time_to_expire - dt < 0)
+ /* it's time to decide what to do with this pending request */
+ {
+ if (r->retries_num >= NUMBER_OF_RETRIES)
+ /* too many retries -> assume current map resolver is not available */
+ {
+ mr = get_map_resolver (&lcm->active_map_resolver);
+ if (!mr)
+ {
+ clib_warning ("Map resolver %U not found - probably deleted "
+ "by the user recently.", format_ip_address,
+ &lcm->active_map_resolver);
+ }
+ else
+ {
+ clib_warning ("map resolver %U is unreachable, ignoring",
+ format_ip_address, &lcm->active_map_resolver);
+
+ /* mark current map resolver unavailable so it won't be
+ * selected next time */
+ mr->is_down = 1;
+ mr->last_update = vlib_time_now (lcm->vlib_main);
+ }
+
+ reset_pending_mr_counters (r);
+ elect_map_resolver (lcm);
+
+ /* try to find a next eligible map resolver and re-send */
+ queue_map_request (&r->src, &r->dst, r->is_smr_invoked,
+ 1 /* resend */ );
+ }
+ else
+ {
+ /* try again */
+ queue_map_request (&r->src, &r->dst, r->is_smr_invoked,
+ 1 /* resend */ );
+ r->retries_num++;
+ r->time_to_expire = PENDING_MREQ_EXPIRATION_TIME;
+ }
+ }
+ else
+ r->time_to_expire -= dt;
+}
+
+static void
+remove_dead_pending_map_requests (lisp_cp_main_t * lcm)
+{
+ u64 *nonce;
+ pending_map_request_t *pmr;
+ u32 *to_be_removed = 0, *pmr_index;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pmr, lcm->pending_map_requests_pool,
+ ({
+ if (pmr->to_be_removed)
+ {
+ clib_fifo_foreach (nonce, pmr->nonces, ({
+ hash_unset (lcm->pending_map_requests_by_nonce, nonce[0]);
+ }));
+
+ vec_add1 (to_be_removed, pmr - lcm->pending_map_requests_pool);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ vec_foreach (pmr_index, to_be_removed)
+ pool_put_index (lcm->pending_map_requests_pool, pmr_index[0]);
+
+ vec_free (to_be_removed);
+}
+
+static void
+update_rloc_probing (lisp_cp_main_t * lcm, f64 dt)
+{
+ static f64 time_left = RLOC_PROBING_INTERVAL;
+
+ if (!lcm->is_enabled || !lcm->rloc_probing)
+ return;
+
+ time_left -= dt;
+ if (time_left <= 0)
+ {
+ time_left = RLOC_PROBING_INTERVAL;
+ send_rloc_probes (lcm);
+ }
+}
+
+static int
+update_pending_map_register (pending_map_register_t * r, f64 dt, u8 * del_all)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *ms;
+ del_all[0] = 0;
+
+ r->time_to_expire -= dt;
+
+ if (r->time_to_expire < 0)
+ {
+ lcm->expired_map_registers++;
+
+ if (lcm->expired_map_registers >= lcm->max_expired_map_registers)
+ {
+ ms = get_map_server (&lcm->active_map_server);
+ if (!ms)
+ {
+ clib_warning ("Map server %U not found - probably deleted "
+ "by the user recently.", format_ip_address,
+ &lcm->active_map_server);
+ }
+ else
+ {
+ clib_warning ("map server %U is unreachable, ignoring",
+ format_ip_address, &lcm->active_map_server);
+
+ /* mark current map server unavailable so it won't be
+ * elected next time */
+ ms->is_down = 1;
+ ms->last_update = vlib_time_now (lcm->vlib_main);
+ }
+
+ elect_map_server (lcm);
+
+ /* indication for deleting all pending map registers */
+ del_all[0] = 1;
+ lcm->expired_map_registers = 0;
+ return 0;
+ }
+ else
+ {
+ /* delete pending map register */
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void
+update_map_register (lisp_cp_main_t * lcm, f64 dt)
+{
+ u32 *to_be_removed = 0, *pmr_index;
+ static f64 time_left = QUICK_MAP_REGISTER_INTERVAL;
+ static u64 mreg_sent_counter = 0;
+
+ pending_map_register_t *pmr;
+ u8 del_all = 0;
+
+ if (!lcm->is_enabled || !lcm->map_registering)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pmr, lcm->pending_map_registers_pool,
+ ({
+ if (!update_pending_map_register (pmr, dt, &del_all))
+ {
+ if (del_all)
+ break;
+ vec_add1 (to_be_removed, pmr - lcm->pending_map_registers_pool);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (del_all)
+ {
+ /* delete all pending map register messages so they won't
+ * trigger another map server election.. */
+ pool_free (lcm->pending_map_registers_pool);
+ hash_free (lcm->map_register_messages_by_nonce);
+
+ /* ..and trigger registration against next map server (if any) */
+ time_left = 0;
+ }
+ else
+ {
+ vec_foreach (pmr_index, to_be_removed)
+ pool_put_index (lcm->pending_map_registers_pool, pmr_index[0]);
+ }
+
+ vec_free (to_be_removed);
+
+ time_left -= dt;
+ if (time_left <= 0)
+ {
+ if (mreg_sent_counter >= QUICK_MAP_REGISTER_MSG_COUNT)
+ time_left = MAP_REGISTER_INTERVAL;
+ else
+ {
+ mreg_sent_counter++;
+ time_left = QUICK_MAP_REGISTER_INTERVAL;
+ }
+ send_map_register (lcm, 1 /* want map notify */ );
+ }
+}
+
+static uword
+send_map_resolver_service (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ u32 *expired = 0;
+ f64 period = 2.0;
+ pending_map_request_t *pmr;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, period);
+
+ /* currently no signals are expected - just wait for clock */
+ (void) vlib_process_get_events (vm, 0);
+
+ /* *INDENT-OFF* */
+ pool_foreach (pmr, lcm->pending_map_requests_pool,
+ ({
+ if (!pmr->to_be_removed)
+ update_pending_request (pmr, period);
+ }));
+ /* *INDENT-ON* */
+
+ remove_dead_pending_map_requests (lcm);
+
+ update_map_register (lcm, period);
+ update_rloc_probing (lcm, period);
+
+ u64 now = clib_cpu_time_now ();
+
+ expired = timing_wheel_advance (&lcm->wheel, now, expired, 0);
+ if (vec_len (expired) > 0)
+ {
+ u32 *mi = 0;
+ vec_foreach (mi, expired)
+ {
+ process_expired_mapping (lcm, mi[0]);
+ }
+ _vec_len (expired) = 0;
+ }
+ }
+
+ /* unreachable */
+ return 0;
+}
+
+vnet_api_error_t
+vnet_lisp_stats_enable_disable (u8 enable)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ return VNET_API_ERROR_LISP_DISABLED;
+
+ if (enable)
+ lcm->flags |= LISP_FLAG_STATS_ENABLED;
+ else
+ lcm->flags &= ~LISP_FLAG_STATS_ENABLED;
+
+ return 0;
+}
+
+u8
+vnet_lisp_stats_enable_disable_state (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (vnet_lisp_enable_disable_status () == 0)
+ return VNET_API_ERROR_LISP_DISABLED;
+
+ return lcm->flags & LISP_FLAG_STATS_ENABLED;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_retry_service_node,static) = {
+ .function = send_map_resolver_service,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "lisp-retry-service",
+ .process_log2_n_stack_bytes = 16,
+};
+/* *INDENT-ON* */
+
+u32
+vnet_lisp_set_transport_protocol (u8 protocol)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (protocol < LISP_TRANSPORT_PROTOCOL_UDP ||
+ protocol > LISP_TRANSPORT_PROTOCOL_API)
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
+ lcm->transport_protocol = protocol;
+ return 0;
+}
+
+lisp_transport_protocol_t
+vnet_lisp_get_transport_protocol (void)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return lcm->transport_protocol;
+}
+
+VLIB_INIT_FUNCTION (lisp_cp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/control.h b/src/vnet/lisp-cp/control.h
new file mode 100644
index 00000000..a6da8188
--- /dev/null
+++ b/src/vnet/lisp-cp/control.h
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_CONTROL_H_
+#define VNET_CONTROL_H_
+
+#include <vnet/vnet.h>
+#include <vnet/lisp-cp/gid_dictionary.h>
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vppinfra/timing_wheel.h>
+
+#define NUMBER_OF_RETRIES 1
+#define PENDING_MREQ_EXPIRATION_TIME 3.0 /* seconds */
+#define PENDING_MREQ_QUEUE_LEN 5
+
+#define RLOC_PROBING_INTERVAL 60.0
+
+/* when map-registration is enabled "quick registration" takes place first.
+ In this mode ETR sends map-register messages at an increased frequency
+ until specified message count is reached */
+#define QUICK_MAP_REGISTER_MSG_COUNT 5
+#define QUICK_MAP_REGISTER_INTERVAL 3.0
+
+/* normal map-register period */
+#define MAP_REGISTER_INTERVAL 60.0
+
+/* how many tries until next map-server election */
+#define MAX_EXPIRED_MAP_REGISTERS_DEFAULT 3
+
+#define PENDING_MREG_EXPIRATION_TIME 3.0 /* seconds */
+
+/* 24 hours */
+#define MAP_REGISTER_DEFAULT_TTL 86400
+
+typedef struct
+{
+ gid_address_t src;
+ gid_address_t dst;
+ u32 retries_num;
+ f64 time_to_expire;
+ u8 is_smr_invoked;
+ u64 *nonces;
+ u8 to_be_removed;
+} pending_map_request_t;
+
+typedef struct
+{
+ f64 time_to_expire;
+} pending_map_register_t;
+
+typedef struct
+{
+ gid_address_t leid;
+ gid_address_t reid;
+ u8 is_src_dst;
+ locator_pair_t *locator_pairs;
+} fwd_entry_t;
+
+typedef struct
+{
+ gid_address_t leid;
+ gid_address_t reid;
+} lisp_adjacency_t;
+
+typedef enum
+{
+ IP4_MISS_PACKET,
+ IP6_MISS_PACKET
+} miss_packet_type_t;
+
+/* map-server/map-resolver structure */
+typedef struct
+{
+ u8 is_down;
+ f64 last_update;
+ ip_address_t address;
+ char *key;
+} lisp_msmr_t;
+
+typedef struct
+{
+ /* headers */
+ u8 data[100];
+ u32 length;
+ miss_packet_type_t type;
+} miss_packet_t;
+
+typedef struct
+{
+ u8 mac[6];
+ u32 ip4;
+} lisp_api_l2_arp_entry_t;
+
+typedef struct
+{
+ u8 mac[6];
+ u8 ip6[16];
+} lisp_api_ndp_entry_t;
+
+typedef enum
+{
+ MR_MODE_DST_ONLY = 0,
+ MR_MODE_SRC_DST,
+ _MR_MODE_MAX
+} map_request_mode_t;
+
+#define foreach_lisp_flag_bit \
+ _(USE_PETR, "Use Proxy-ETR") \
+ _(STATS_ENABLED, "Statistics enabled")
+
+typedef enum lisp_flag_bits
+{
+#define _(sym, str) LISP_FLAG_BIT_##sym,
+ foreach_lisp_flag_bit
+#undef _
+} lisp_flag_bits_e;
+
+typedef enum lisp_flags
+{
+#define _(sym, str) LISP_FLAG_##sym = 1 << LISP_FLAG_BIT_##sym,
+ foreach_lisp_flag_bit
+#undef _
+} lisp_flags_e;
+
+typedef struct
+{
+ ip_address_t addr;
+ u32 bd;
+} lisp_l2_arp_key_t;
+
+typedef enum
+{
+ LISP_TRANSPORT_PROTOCOL_UDP = 1,
+ LISP_TRANSPORT_PROTOCOL_API
+} lisp_transport_protocol_t;
+
+typedef struct
+{
+ u64 nonce;
+ u8 is_rloc_probe;
+ mapping_t *mappings;
+ volatile u8 is_free;
+} map_records_arg_t;
+
+typedef struct
+{
+ u32 flags;
+
+ /* LISP feature status */
+ u8 is_enabled;
+
+ /* eid table */
+ gid_dictionary_t mapping_index_by_gid;
+
+ /* pool of mappings */
+ mapping_t *mapping_pool;
+
+ /* hash map of secret keys by mapping index */
+ u8 *key_by_mapping_index;
+
+ /* pool of locators */
+ locator_t *locator_pool;
+
+ /* pool of locator-sets */
+ locator_set_t *locator_set_pool;
+
+ /* vector of locator-set vectors composed of and indexed by locator index */
+ u32 **locator_to_locator_sets;
+
+ /* hash map of locators by name */
+ uword *locator_set_index_by_name;
+
+ /* vector of eid index vectors supported and indexed by locator-set index */
+ u32 **locator_set_to_eids;
+
+ /* vectors of indexes for local locator-sets and mappings */
+ u32 *local_mappings_indexes;
+ u32 *local_locator_set_indexes;
+
+ /* hash map of forwarding entries by mapping index */
+ u32 *fwd_entry_by_mapping_index;
+
+ /* pool of vectors of rmts per lcl mapping in adjacencies */
+ u32 **lcl_to_rmt_adjacencies;
+
+ /* hash of pool positions of vectors of rmts by lcl mapping index */
+ u32 *lcl_to_rmt_adjs_by_lcl_idx;
+
+ /* forwarding entries pool */
+ fwd_entry_t *fwd_entry_pool;
+
+ /* hash map keyed by nonce of pending map-requests */
+ uword *pending_map_requests_by_nonce;
+
+ /* pool of pending map requests */
+ pending_map_request_t *pending_map_requests_pool;
+
+ /* pool of pending map registers */
+ pending_map_register_t *pending_map_registers_pool;
+
+ /* hash map of sent map register messages */
+ uword *map_register_messages_by_nonce;
+
+ /* vector of map-resolvers */
+ lisp_msmr_t *map_resolvers;
+
+ /* vector of map-servers */
+ lisp_msmr_t *map_servers;
+
+ /* map resolver address currently being used for sending requests.
+ * This has to be an actual address and not an index to map_resolvers vector
+ * since the vector may be modified during request resend/retry procedure
+ * and break things :-) */
+ ip_address_t active_map_resolver;
+ ip_address_t active_map_server;
+
+ u8 do_map_resolver_election;
+ u8 do_map_server_election;
+
+ /* map-request locator set index */
+ u32 mreq_itr_rlocs;
+
+ /* vni to vrf hash tables */
+ uword *table_id_by_vni;
+ uword *vni_by_table_id;
+
+ /* vni to bd-index hash tables */
+ uword *bd_id_by_vni;
+ uword *vni_by_bd_id;
+
+ /* track l2 and l3 interfaces that have been created for vni */
+ uword *l2_dp_intf_by_vni;
+
+ /* Proxy ITR map index */
+ u32 pitr_map_index;
+
+ /** Proxy ETR map index */
+ u32 petr_map_index;
+
+ /* LISP PITR mode */
+ u8 lisp_pitr;
+
+ /* mapping index for NSH */
+ u32 nsh_map_index;
+
+ /* map request mode */
+ u8 map_request_mode;
+
+ /* enable/disable map registering */
+ u8 map_registering;
+
+ /* enable/disable rloc-probing */
+ u8 rloc_probing;
+
+ /* timing wheel for mappping timeouts */
+ timing_wheel_t wheel;
+
+ /** Per thread pool of records shared with thread0 */
+ map_records_arg_t **map_records_args_pool;
+
+ /* TTL used for all mappings when registering */
+ u32 map_register_ttl;
+
+ /* control variables for map server election */
+ u32 max_expired_map_registers;
+ u32 expired_map_registers;
+
+ /** either UDP based or binary API. Default is UDP */
+ lisp_transport_protocol_t transport_protocol;
+
+ /* commodity */
+ ip4_main_t *im4;
+ ip6_main_t *im6;
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} lisp_cp_main_t;
+
+/* lisp-gpe control plane */
+extern lisp_cp_main_t lisp_control_main;
+
+extern vlib_node_registration_t lisp_cp_input_node;
+extern vlib_node_registration_t lisp_cp_lookup_ip4_node;
+extern vlib_node_registration_t lisp_cp_lookup_ip6_node;
+
+clib_error_t *lisp_cp_init ();
+
+always_inline lisp_cp_main_t *
+vnet_lisp_cp_get_main ()
+{
+ return &lisp_control_main;
+}
+
+void
+get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * src, gid_address_t * dst,
+ u16 type);
+
+typedef struct
+{
+ u8 is_add;
+ union
+ {
+ u8 *name;
+ u32 index;
+ };
+ locator_t *locators;
+ u8 local;
+} vnet_lisp_add_del_locator_set_args_t;
+
+int
+vnet_lisp_add_del_locator_set (vnet_lisp_add_del_locator_set_args_t * a,
+ u32 * ls_index);
+int
+vnet_lisp_add_del_locator (vnet_lisp_add_del_locator_set_args_t * a,
+ locator_set_t * ls, u32 * ls_index);
+
+typedef struct
+{
+ u8 is_add;
+ gid_address_t eid;
+ u32 locator_set_index;
+
+ u32 ttl;
+ u8 action;
+ u8 authoritative;
+
+ u8 local;
+ u8 is_static;
+ u8 *key;
+ u8 key_id;
+} vnet_lisp_add_del_mapping_args_t;
+
+int
+vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
+ u32 * map_index);
+int
+vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a,
+ u32 * map_index_result);
+
+int
+vnet_lisp_add_mapping (vnet_lisp_add_del_mapping_args_t * a,
+ locator_t * rlocs, u32 * res_map_index,
+ u8 * is_changed);
+
+int vnet_lisp_del_mapping (gid_address_t * eid, u32 * res_map_index);
+
+typedef struct
+{
+ gid_address_t reid;
+ gid_address_t leid;
+ u8 is_add;
+} vnet_lisp_add_del_adjacency_args_t;
+
+int vnet_lisp_add_del_adjacency (vnet_lisp_add_del_adjacency_args_t * a);
+
+typedef struct
+{
+ u8 is_add;
+ ip_address_t address;
+} vnet_lisp_add_del_map_resolver_args_t;
+
+int
+vnet_lisp_add_del_map_resolver (vnet_lisp_add_del_map_resolver_args_t * a);
+int vnet_lisp_add_del_map_server (ip_address_t * addr, u8 is_add);
+
+clib_error_t *vnet_lisp_enable_disable (u8 is_enabled);
+u8 vnet_lisp_enable_disable_status (void);
+
+int vnet_lisp_pitr_set_locator_set (u8 * locator_set_name, u8 is_add);
+int vnet_lisp_use_petr (ip_address_t * ip, u8 is_add);
+
+typedef struct
+{
+ u8 is_add;
+ u8 *locator_set_name;
+} vnet_lisp_add_del_mreq_itr_rloc_args_t;
+
+int
+vnet_lisp_add_del_mreq_itr_rlocs (vnet_lisp_add_del_mreq_itr_rloc_args_t * a);
+
+int vnet_lisp_clear_all_remote_adjacencies (void);
+
+int vnet_lisp_eid_table_map (u32 vni, u32 vrf, u8 is_l2, u8 is_add);
+int vnet_lisp_add_del_map_table_key (gid_address_t * eid, char *key,
+ u8 is_add);
+int vnet_lisp_set_map_request_mode (u8 mode);
+u8 vnet_lisp_get_map_request_mode (void);
+lisp_adjacency_t *vnet_lisp_adjacencies_get_by_vni (u32 vni);
+int vnet_lisp_rloc_probe_enable_disable (u8 is_enable);
+int vnet_lisp_map_register_enable_disable (u8 is_enable);
+u8 vnet_lisp_map_register_state_get (void);
+u8 vnet_lisp_rloc_probe_state_get (void);
+int vnet_lisp_add_del_l2_arp_ndp_entry (gid_address_t * key, u8 * mac,
+ u8 is_add);
+u32 *vnet_lisp_l2_arp_bds_get (void);
+lisp_api_l2_arp_entry_t *vnet_lisp_l2_arp_entries_get_by_bd (u32 bd);
+int vnet_lisp_nsh_set_locator_set (u8 * locator_set_name, u8 is_add);
+int vnet_lisp_map_register_set_ttl (u32 ttl);
+u32 vnet_lisp_map_register_get_ttl (void);
+int vnet_lisp_map_register_fallback_threshold_set (u32 value);
+u32 vnet_lisp_map_register_fallback_threshold_get (void);
+u32 *vnet_lisp_ndp_bds_get (void);
+lisp_api_ndp_entry_t *vnet_lisp_ndp_entries_get_by_bd (u32 bd);
+u32 vnet_lisp_set_transport_protocol (u8 protocol);
+lisp_transport_protocol_t vnet_lisp_get_transport_protocol (void);
+
+map_records_arg_t *parse_map_reply (vlib_buffer_t * b);
+
+always_inline mapping_t *
+lisp_get_petr_mapping (lisp_cp_main_t * lcm)
+{
+ return pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+}
+
+#endif /* VNET_CONTROL_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/gid_dictionary.c b/src/vnet/lisp-cp/gid_dictionary.c
new file mode 100644
index 00000000..c3b93301
--- /dev/null
+++ b/src/vnet/lisp-cp/gid_dictionary.c
@@ -0,0 +1,1055 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/gid_dictionary.h>
+
+typedef struct
+{
+ void *arg;
+ ip_prefix_t src;
+ foreach_subprefix_match_cb_t cb;
+ union
+ {
+ gid_ip4_table_t *ip4_table;
+ gid_ip6_table_t *ip6_table;
+ };
+} sfib_entry_arg_t;
+
+static u32 ip4_lookup (gid_ip4_table_t * db, u32 vni, ip_prefix_t * key);
+
+static u32 ip6_lookup (gid_ip6_table_t * db, u32 vni, ip_prefix_t * key);
+
+static void
+foreach_sfib4_subprefix (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ sfib_entry_arg_t *a = arg;
+ u32 ip = (u32) kvp->key[0];
+ ip4_address_t *mask;
+ u8 plen = ip_prefix_len (&a->src);
+
+ ASSERT (plen <= 32);
+ mask = &a->ip4_table->ip4_fib_masks[plen];
+
+ u32 src_ip = ip_prefix_v4 (&a->src).as_u32;
+ src_ip &= mask->as_u32;
+ ip &= mask->as_u32;
+
+ if (src_ip == ip)
+ {
+ /* found sub-prefix of src prefix */
+ (a->cb) (kvp->value, a->arg);
+ }
+}
+
+static void
+gid_dict_foreach_ip4_subprefix (gid_dictionary_t * db, u32 vni,
+ ip_prefix_t * src, ip_prefix_t * dst,
+ foreach_subprefix_match_cb_t cb, void *arg)
+{
+ u32 sfi;
+ gid_ip4_table_t *sfib4;
+ sfib_entry_arg_t a;
+
+ sfi = ip4_lookup (&db->dst_ip4_table, vni, dst);
+ if (GID_LOOKUP_MISS == sfi)
+ return;
+
+ sfib4 = pool_elt_at_index (db->src_ip4_table_pool, sfi);
+
+ a.arg = arg;
+ a.cb = cb;
+ a.src = src[0];
+ a.ip4_table = sfib4;
+
+ BV (clib_bihash_foreach_key_value_pair) (&sfib4->ip4_lookup_table,
+ foreach_sfib4_subprefix, &a);
+}
+
+static void
+foreach_sfib6_subprefix (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ sfib_entry_arg_t *a = arg;
+ ip6_address_t ip;
+ ip6_address_t *mask;
+ u8 plen = ip_prefix_len (&a->src);
+
+ mask = &a->ip6_table->ip6_fib_masks[plen];
+ ip.as_u64[0] = kvp->key[0];
+ ip.as_u64[1] = kvp->key[1];
+
+ if (ip6_address_is_equal_masked (&ip_prefix_v6 (&a->src), &ip, mask))
+ {
+ /* found sub-prefix of src prefix */
+ (a->cb) (kvp->value, a->arg);
+ }
+}
+
+static void
+gid_dict_foreach_ip6_subprefix (gid_dictionary_t * db, u32 vni,
+ ip_prefix_t * src, ip_prefix_t * dst,
+ foreach_subprefix_match_cb_t cb, void *arg)
+{
+ u32 sfi;
+ gid_ip6_table_t *sfib6;
+ sfib_entry_arg_t a;
+
+ sfi = ip6_lookup (&db->dst_ip6_table, vni, dst);
+ if (GID_LOOKUP_MISS == sfi)
+ return;
+
+ sfib6 = pool_elt_at_index (db->src_ip6_table_pool, sfi);
+
+ a.arg = arg;
+ a.cb = cb;
+ a.src = src[0];
+ a.ip6_table = sfib6;
+
+ BV (clib_bihash_foreach_key_value_pair) (&sfib6->ip6_lookup_table,
+ foreach_sfib6_subprefix, &a);
+}
+
+void
+gid_dict_foreach_subprefix (gid_dictionary_t * db, gid_address_t * eid,
+ foreach_subprefix_match_cb_t cb, void *arg)
+{
+ ip_prefix_t *ippref = &gid_address_sd_dst_ippref (eid);
+
+ if (IP4 == ip_prefix_version (ippref))
+ gid_dict_foreach_ip4_subprefix (db, gid_address_vni (eid),
+ &gid_address_sd_src_ippref (eid),
+ &gid_address_sd_dst_ippref (eid), cb,
+ arg);
+ else
+ gid_dict_foreach_ip6_subprefix (db, gid_address_vni (eid),
+ &gid_address_sd_src_ippref (eid),
+ &gid_address_sd_dst_ippref (eid), cb,
+ arg);
+}
+
+void
+gid_dict_foreach_l2_arp_ndp_entry (gid_dictionary_t * db, void (*cb)
+ (BVT (clib_bihash_kv) * kvp, void *arg),
+ void *ht)
+{
+ gid_l2_arp_ndp_table_t *tab = &db->arp_ndp_table;
+ BV (clib_bihash_foreach_key_value_pair) (&tab->arp_ndp_lookup_table, cb,
+ ht);
+}
+
+static void
+make_mac_sd_key (BVT (clib_bihash_kv) * kv, u32 vni, u8 src_mac[6],
+ u8 dst_mac[6])
+{
+ kv->key[0] = (u64) vni;
+ kv->key[1] = mac_to_u64 (dst_mac);
+ kv->key[2] = src_mac ? mac_to_u64 (src_mac) : (u64) 0;
+}
+
+static u32
+mac_sd_lookup (gid_mac_table_t * db, u32 vni, u8 * dst, u8 * src)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ make_mac_sd_key (&kv, vni, src, dst);
+ rv = BV (clib_bihash_search_inline_2) (&db->mac_lookup_table, &kv, &value);
+
+ /* no match, try with src 0, catch all for dst */
+ if (rv != 0)
+ {
+ kv.key[2] = 0;
+ rv = BV (clib_bihash_search_inline_2) (&db->mac_lookup_table, &kv,
+ &value);
+ if (rv == 0)
+ return value.value;
+ }
+ else
+ return value.value;
+
+ return GID_LOOKUP_MISS;
+}
+
+static u32
+ip4_lookup_exact_match (gid_ip4_table_t * db, u32 vni, ip_prefix_t * key)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ ip4_address_t *mask;
+
+ mask = &db->ip4_fib_masks[ip_prefix_len (key)];
+
+ kv.key[0] = ((u64) vni << 32) | (ip_prefix_v4 (key).as_u32 & mask->as_u32);
+ kv.key[1] = 0;
+ kv.key[2] = 0;
+
+ rv = BV (clib_bihash_search_inline_2) (&db->ip4_lookup_table, &kv, &value);
+ if (rv == 0)
+ return value.value;
+
+ return GID_LOOKUP_MISS;
+}
+
+static u32
+ip4_lookup (gid_ip4_table_t * db, u32 vni, ip_prefix_t * key)
+{
+ int i, len;
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ len = vec_len (db->ip4_prefix_lengths_in_search_order);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = db->ip4_prefix_lengths_in_search_order[i];
+ ip4_address_t *mask;
+
+ ASSERT (dst_address_length >= 0 && dst_address_length <= 32);
+
+ mask = &db->ip4_fib_masks[dst_address_length];
+
+ kv.key[0] =
+ ((u64) vni << 32) | (ip_prefix_v4 (key).as_u32 & mask->as_u32);
+ kv.key[1] = 0;
+ kv.key[2] = 0;
+
+ rv =
+ BV (clib_bihash_search_inline_2) (&db->ip4_lookup_table, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ return GID_LOOKUP_MISS;
+}
+
+static u32
+ip6_lookup_exact_match (gid_ip6_table_t * db, u32 vni, ip_prefix_t * key)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ ip6_address_t *mask;
+ mask = &db->ip6_fib_masks[ip_prefix_len (key)];
+
+ kv.key[0] = ip_prefix_v6 (key).as_u64[0] & mask->as_u64[0];
+ kv.key[1] = ip_prefix_v6 (key).as_u64[1] & mask->as_u64[1];
+ kv.key[2] = (u64) vni;
+
+ rv = BV (clib_bihash_search_inline_2) (&db->ip6_lookup_table, &kv, &value);
+ if (rv == 0)
+ return value.value;
+
+ return GID_LOOKUP_MISS;
+}
+
+static u32
+ip6_lookup (gid_ip6_table_t * db, u32 vni, ip_prefix_t * key)
+{
+ int i, len;
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ len = vec_len (db->ip6_prefix_lengths_in_search_order);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = db->ip6_prefix_lengths_in_search_order[i];
+ ip6_address_t *mask;
+
+ ASSERT (dst_address_length >= 0 && dst_address_length <= 128);
+
+ mask = &db->ip6_fib_masks[dst_address_length];
+
+ kv.key[0] = ip_prefix_v6 (key).as_u64[0] & mask->as_u64[0];
+ kv.key[1] = ip_prefix_v6 (key).as_u64[1] & mask->as_u64[1];
+ kv.key[2] = (u64) vni;
+
+ rv =
+ BV (clib_bihash_search_inline_2) (&db->ip6_lookup_table, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ return GID_LOOKUP_MISS;
+}
+
+static u32
+ip_sd_lookup (gid_dictionary_t * db, u32 vni, ip_prefix_t * dst,
+ ip_prefix_t * src)
+{
+ u32 sfi;
+ gid_ip4_table_t *sfib4;
+ gid_ip6_table_t *sfib6;
+
+ switch (ip_prefix_version (dst))
+ {
+ case IP4:
+ sfi = ip4_lookup (&db->dst_ip4_table, vni, dst);
+ if (GID_LOOKUP_MISS != sfi)
+ sfib4 = pool_elt_at_index (db->src_ip4_table_pool, sfi);
+ else
+ return GID_LOOKUP_MISS;
+
+ if (!src)
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ return ip4_lookup_exact_match (sfib4, 0, &sp);
+ }
+ else
+ return ip4_lookup (sfib4, 0, src);
+
+ break;
+ case IP6:
+ sfi = ip6_lookup (&db->dst_ip6_table, vni, dst);
+ if (GID_LOOKUP_MISS != sfi)
+ sfib6 = pool_elt_at_index (db->src_ip6_table_pool, sfi);
+ else
+ return GID_LOOKUP_MISS;
+
+ if (!src)
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ ip_prefix_version (&sp) = IP6;
+ return ip6_lookup_exact_match (sfib6, 0, &sp);
+ }
+ else
+ return ip6_lookup (sfib6, 0, src);
+
+ break;
+ default:
+ clib_warning ("address type %d not supported!",
+ ip_prefix_version (dst));
+ break;
+ }
+ return GID_LOOKUP_MISS;
+}
+
+static void
+make_arp_ndp_key (BVT (clib_bihash_kv) * kv, u32 bd, ip_address_t * addr)
+{
+ kv->key[0] = ((u64) bd << 32) | (u32) ip_addr_version (addr);
+ if (ip_addr_version (addr) == IP4)
+ {
+ kv->key[1] = (u64) addr->ip.v4.as_u32;
+ kv->key[2] = (u64) 0;
+ }
+ else
+ {
+ kv->key[1] = (u64) addr->ip.v6.as_u64[0];
+ kv->key[2] = (u64) addr->ip.v6.as_u64[1];
+ }
+}
+
+static void
+make_nsh_key (BVT (clib_bihash_kv) * kv, u32 vni, u32 spi, u8 si)
+{
+ kv->key[0] = (u64) vni;
+ kv->key[1] = (u64) spi;
+ kv->key[2] = (u64) si;
+}
+
+static u64
+arp_ndp_lookup (gid_l2_arp_ndp_table_t * db, u32 bd, ip_address_t * key)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ make_arp_ndp_key (&kv, bd, key);
+ rv = BV (clib_bihash_search_inline_2) (&db->arp_ndp_lookup_table, &kv,
+ &value);
+
+ if (rv == 0)
+ return value.value;
+
+ return GID_LOOKUP_MISS_L2;
+}
+
+static u32
+nsh_lookup (gid_nsh_table_t * db, u32 vni, u32 spi, u8 si)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ make_nsh_key (&kv, vni, spi, si);
+ rv = BV (clib_bihash_search_inline_2) (&db->nsh_lookup_table, &kv, &value);
+
+ if (rv == 0)
+ return value.value;
+
+ return GID_LOOKUP_MISS;
+}
+
+u64
+gid_dictionary_lookup (gid_dictionary_t * db, gid_address_t * key)
+{
+ switch (gid_address_type (key))
+ {
+ case GID_ADDR_IP_PREFIX:
+ return ip_sd_lookup (db, gid_address_vni (key),
+ &gid_address_ippref (key), 0);
+ case GID_ADDR_MAC:
+ return mac_sd_lookup (&db->sd_mac_table, gid_address_vni (key),
+ gid_address_mac (key), 0);
+ case GID_ADDR_SRC_DST:
+ switch (gid_address_sd_dst_type (key))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_sd_lookup (db, gid_address_vni (key),
+ &gid_address_sd_dst_ippref (key),
+ &gid_address_sd_src_ippref (key));
+ break;
+ case FID_ADDR_MAC:
+ return mac_sd_lookup (&db->sd_mac_table, gid_address_vni (key),
+ gid_address_sd_dst_mac (key),
+ gid_address_sd_src_mac (key));
+ break;
+ default:
+ clib_warning ("Source/Dest address type %d not supported!",
+ gid_address_sd_dst_type (key));
+ break;
+ }
+ break;
+ case GID_ADDR_ARP:
+ case GID_ADDR_NDP:
+ return arp_ndp_lookup (&db->arp_ndp_table, gid_address_arp_ndp_bd (key),
+ &gid_address_arp_ndp_ip (key));
+ case GID_ADDR_NSH:
+ return nsh_lookup (&db->nsh_table, gid_address_vni (key),
+ gid_address_nsh_spi (key), gid_address_nsh_si (key));
+ default:
+ clib_warning ("address type %d not supported!", gid_address_type (key));
+ break;
+ }
+ return GID_LOOKUP_MISS;
+}
+
+u32
+gid_dictionary_sd_lookup (gid_dictionary_t * db, gid_address_t * dst,
+ gid_address_t * src)
+{
+ switch (gid_address_type (dst))
+ {
+ case GID_ADDR_IP_PREFIX:
+ return ip_sd_lookup (db, gid_address_vni (dst),
+ &gid_address_ippref (dst),
+ &gid_address_ippref (src));
+ case GID_ADDR_MAC:
+ return mac_sd_lookup (&db->sd_mac_table, gid_address_vni (dst),
+ gid_address_mac (dst), gid_address_mac (src));
+ case GID_ADDR_SRC_DST:
+ switch (gid_address_sd_dst_type (dst))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_sd_lookup (db, gid_address_vni (dst),
+ &gid_address_sd_dst_ippref (dst),
+ &gid_address_sd_src_ippref (dst));
+ break;
+ case FID_ADDR_MAC:
+ return mac_sd_lookup (&db->sd_mac_table, gid_address_vni (dst),
+ gid_address_sd_dst_mac (dst),
+ gid_address_sd_src_mac (dst));
+ break;
+ default:
+ clib_warning ("Source/Dest address type %d not supported!",
+ gid_address_sd_dst_type (dst));
+ break;
+ }
+ break;
+ case GID_ADDR_NSH:
+ return gid_dictionary_lookup (db, dst);
+ break;
+ default:
+ clib_warning ("address type %d not supported!", gid_address_type (dst));
+ break;
+ }
+ return GID_LOOKUP_MISS;
+}
+
+static void
+ip4_compute_prefix_lengths_in_search_order (gid_ip4_table_t * db)
+{
+ int i;
+ vec_reset_length (db->ip4_prefix_lengths_in_search_order);
+ /* Note: bitmap reversed so this is in fact a longest prefix match */
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, db->ip4_non_empty_dst_address_length_bitmap,
+ ({
+ int dst_address_length = 32 - i;
+ vec_add1 (db->ip4_prefix_lengths_in_search_order, dst_address_length);
+ }));
+ /* *INDENT-ON* */
+
+}
+
+static u32
+add_del_ip4_key (gid_ip4_table_t * db, u32 vni, ip_prefix_t * pref, u32 val,
+ u8 is_add)
+{
+ BVT (clib_bihash_kv) kv, value;
+ u32 old_val = ~0;
+ ip4_address_t key;
+ u8 plen = ip_prefix_len (pref);
+
+ clib_memcpy (&key, &ip_prefix_v4 (pref), sizeof (key));
+ key.as_u32 &= db->ip4_fib_masks[plen].as_u32;
+ if (is_add)
+ {
+ db->ip4_non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (db->ip4_non_empty_dst_address_length_bitmap,
+ 32 - plen, 1);
+ ip4_compute_prefix_lengths_in_search_order (db);
+
+ db->ip4_prefix_len_refcount[plen]++;
+ }
+ else
+ {
+ ASSERT (db->ip4_prefix_len_refcount[plen] != 0);
+
+ db->ip4_prefix_len_refcount[plen]--;
+
+ if (db->ip4_prefix_len_refcount[plen] == 0)
+ {
+ db->ip4_non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (db->ip4_non_empty_dst_address_length_bitmap,
+ 32 - plen, 0);
+ ip4_compute_prefix_lengths_in_search_order (db);
+ }
+ }
+
+ kv.key[0] = ((u64) vni << 32) | key.as_u32;
+ kv.key[1] = 0;
+ kv.key[2] = 0;
+
+ if (BV (clib_bihash_search) (&db->ip4_lookup_table, &kv, &value) == 0)
+ old_val = value.value;
+
+ if (!is_add)
+ {
+ BV (clib_bihash_add_del) (&db->ip4_lookup_table, &kv, 0 /* is_add */ );
+ db->count--;
+ }
+ else
+ {
+ kv.value = val;
+ BV (clib_bihash_add_del) (&db->ip4_lookup_table, &kv, 1 /* is_add */ );
+ db->count++;
+ }
+ return old_val;
+}
+
+static void
+ip4_lookup_init (gid_ip4_table_t * db)
+{
+ uword i;
+
+ memset (db->ip4_prefix_len_refcount, 0,
+ sizeof (db->ip4_prefix_len_refcount));
+
+ for (i = 0; i < ARRAY_LEN (db->ip4_fib_masks); i++)
+ {
+ u32 m;
+
+ if (i < 32)
+ m = pow2_mask (i) << (32 - i);
+ else
+ m = ~0;
+ db->ip4_fib_masks[i].as_u32 = clib_host_to_net_u32 (m);
+ }
+ if (db->ip4_lookup_table_nbuckets == 0)
+ db->ip4_lookup_table_nbuckets = IP4_LOOKUP_DEFAULT_HASH_NUM_BUCKETS;
+
+ db->ip4_lookup_table_nbuckets =
+ 1 << max_log2 (db->ip4_lookup_table_nbuckets);
+
+ if (db->ip4_lookup_table_size == 0)
+ db->ip4_lookup_table_size = IP4_LOOKUP_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&db->ip4_lookup_table, "ip4 lookup table",
+ db->ip4_lookup_table_nbuckets,
+ db->ip4_lookup_table_size);
+}
+
+static u32
+add_del_sd_ip4_key (gid_dictionary_t * db, u32 vni, ip_prefix_t * dst_pref,
+ ip_prefix_t * src_pref, u32 val, u8 is_add)
+{
+ u32 sfi, old_val = ~0;
+ gid_ip4_table_t *sfib;
+
+ sfi = ip4_lookup_exact_match (&db->dst_ip4_table, vni, dst_pref);
+
+ if (is_add)
+ {
+ if (GID_LOOKUP_MISS == sfi)
+ {
+ pool_get (db->src_ip4_table_pool, sfib);
+ ip4_lookup_init (sfib);
+ add_del_ip4_key (&db->dst_ip4_table, vni, dst_pref,
+ sfib - db->src_ip4_table_pool, is_add);
+ if (src_pref)
+ add_del_ip4_key (sfib, 0 /* vni */ , src_pref, val, is_add);
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ add_del_ip4_key (sfib, 0 /* vni */ , &sp, val, is_add);
+ }
+ }
+ else
+ {
+ ASSERT (!pool_is_free_index (db->src_ip4_table_pool, sfi));
+ sfib = pool_elt_at_index (db->src_ip4_table_pool, sfi);
+ if (src_pref)
+ {
+ old_val = ip4_lookup_exact_match (sfib, 0, src_pref);
+ add_del_ip4_key (sfib, 0 /* vni */ , src_pref, val, is_add);
+ }
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ old_val =
+ add_del_ip4_key (sfib, 0 /* vni */ , &sp, val, is_add);
+ }
+ }
+ }
+ else
+ {
+ if (GID_LOOKUP_MISS != sfi)
+ {
+ sfib = pool_elt_at_index (db->src_ip4_table_pool, sfi);
+ if (src_pref)
+ old_val = add_del_ip4_key (sfib, 0, src_pref, 0, is_add);
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ old_val = add_del_ip4_key (sfib, 0, &sp, 0, is_add);
+ }
+
+ if (sfib->count == 0)
+ add_del_ip4_key (&db->dst_ip4_table, vni, dst_pref, 0, is_add);
+ }
+ else
+ clib_warning ("cannot delete dst mapping %U!", format_ip_prefix,
+ dst_pref);
+ }
+ return old_val;
+}
+
+static void
+ip6_compute_prefix_lengths_in_search_order (gid_ip6_table_t * db)
+{
+ int i;
+ vec_reset_length (db->ip6_prefix_lengths_in_search_order);
+ /* Note: bitmap reversed so this is in fact a longest prefix match */
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, db->ip6_non_empty_dst_address_length_bitmap,
+ ({
+ int dst_address_length = 128 - i;
+ vec_add1 (db->ip6_prefix_lengths_in_search_order, dst_address_length);
+ }));
+ /* *INDENT-ON* */
+}
+
+static u32
+add_del_ip6_key (gid_ip6_table_t * db, u32 vni, ip_prefix_t * pref, u32 val,
+ u8 is_add)
+{
+ BVT (clib_bihash_kv) kv, value;
+ u32 old_val = ~0;
+ ip6_address_t key;
+ u8 plen = ip_prefix_len (pref);
+
+ clib_memcpy (&key, &ip_prefix_v6 (pref), sizeof (key));
+ ip6_address_mask (&key, &db->ip6_fib_masks[plen]);
+ if (is_add)
+ {
+ db->ip6_non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (db->ip6_non_empty_dst_address_length_bitmap,
+ 128 - plen, 1);
+ ip6_compute_prefix_lengths_in_search_order (db);
+ db->ip6_prefix_len_refcount[plen]++;
+ }
+ else
+ {
+ ASSERT (db->ip6_prefix_len_refcount[plen] != 0);
+
+ db->ip6_prefix_len_refcount[plen]--;
+
+ if (db->ip6_prefix_len_refcount[plen] == 0)
+ {
+ db->ip6_non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (db->ip6_non_empty_dst_address_length_bitmap,
+ 128 - plen, 0);
+ ip6_compute_prefix_lengths_in_search_order (db);
+ }
+ }
+
+ kv.key[0] = key.as_u64[0];
+ kv.key[1] = key.as_u64[1];
+ kv.key[2] = (u64) vni;
+// kv.key[2] = ((u64)((fib - im->fibs))<<32) | ip_prefix_len(key);
+
+ if (BV (clib_bihash_search) (&db->ip6_lookup_table, &kv, &value) == 0)
+ old_val = value.value;
+
+ if (!is_add)
+ {
+ BV (clib_bihash_add_del) (&db->ip6_lookup_table, &kv, 0 /* is_add */ );
+ db->count--;
+ }
+ else
+ {
+ kv.value = val;
+ BV (clib_bihash_add_del) (&db->ip6_lookup_table, &kv, 1 /* is_add */ );
+ db->count++;
+ }
+ return old_val;
+}
+
+static u32
+add_del_mac (gid_mac_table_t * db, u32 vni, u8 * dst_mac, u8 * src_mac,
+ u32 val, u8 is_add)
+{
+ BVT (clib_bihash_kv) kv, value;
+ u32 old_val = ~0;
+
+ make_mac_sd_key (&kv, vni, src_mac, dst_mac);
+
+ if (BV (clib_bihash_search) (&db->mac_lookup_table, &kv, &value) == 0)
+ old_val = value.value;
+
+ if (!is_add)
+ {
+ BV (clib_bihash_add_del) (&db->mac_lookup_table, &kv, 0 /* is_add */ );
+ db->count--;
+ }
+ else
+ {
+ kv.value = val;
+ BV (clib_bihash_add_del) (&db->mac_lookup_table, &kv, 1 /* is_add */ );
+ db->count++;
+ }
+ return old_val;
+}
+
+static void
+ip6_lookup_init (gid_ip6_table_t * db)
+{
+ uword i;
+
+ memset (db->ip6_prefix_len_refcount, 0,
+ sizeof (db->ip6_prefix_len_refcount));
+
+ for (i = 0; i < ARRAY_LEN (db->ip6_fib_masks); i++)
+ {
+ u32 j, i0, i1;
+
+ i0 = i / 32;
+ i1 = i % 32;
+
+ for (j = 0; j < i0; j++)
+ db->ip6_fib_masks[i].as_u32[j] = ~0;
+
+ if (i1)
+ db->ip6_fib_masks[i].as_u32[i0] =
+ clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ if (db->ip6_lookup_table_nbuckets == 0)
+ db->ip6_lookup_table_nbuckets = IP6_LOOKUP_DEFAULT_HASH_NUM_BUCKETS;
+
+ db->ip6_lookup_table_nbuckets =
+ 1 << max_log2 (db->ip6_lookup_table_nbuckets);
+
+ if (db->ip6_lookup_table_size == 0)
+ db->ip6_lookup_table_size = IP6_LOOKUP_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&db->ip6_lookup_table, "ip6 lookup table",
+ db->ip6_lookup_table_nbuckets,
+ db->ip6_lookup_table_size);
+}
+
+static u32
+add_del_sd_ip6_key (gid_dictionary_t * db, u32 vni, ip_prefix_t * dst_pref,
+ ip_prefix_t * src_pref, u32 val, u8 is_add)
+{
+ u32 sfi, old_val = ~0;
+ gid_ip6_table_t *sfib;
+
+ sfi = ip6_lookup_exact_match (&db->dst_ip6_table, vni, dst_pref);
+
+ if (is_add)
+ {
+ if (GID_LOOKUP_MISS == sfi)
+ {
+ pool_get (db->src_ip6_table_pool, sfib);
+ ip6_lookup_init (sfib);
+ add_del_ip6_key (&db->dst_ip6_table, vni, dst_pref,
+ sfib - db->src_ip6_table_pool, is_add);
+ if (src_pref)
+ add_del_ip6_key (sfib, 0 /* vni */ , src_pref, val, is_add);
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ ip_prefix_version (&sp) = IP6;
+ add_del_ip6_key (sfib, 0 /* vni */ , &sp, val, is_add);
+ }
+ }
+ else
+ {
+ ASSERT (!pool_is_free_index (db->src_ip6_table_pool, sfi));
+ sfib = pool_elt_at_index (db->src_ip6_table_pool, sfi);
+ if (src_pref)
+ {
+ old_val = ip6_lookup_exact_match (sfib, 0, src_pref);
+ add_del_ip6_key (sfib, 0 /* vni */ , src_pref, val, is_add);
+ }
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ ip_prefix_version (&sp) = IP6;
+ old_val =
+ add_del_ip6_key (sfib, 0 /* vni */ , &sp, val, is_add);
+ }
+ }
+ }
+ else
+ {
+ if (GID_LOOKUP_MISS != sfi)
+ {
+ sfib = pool_elt_at_index (db->src_ip6_table_pool, sfi);
+ if (src_pref)
+ old_val = add_del_ip6_key (sfib, 0, src_pref, 0, is_add);
+ else
+ {
+ ip_prefix_t sp;
+ memset (&sp, 0, sizeof (sp));
+ ip_prefix_version (&sp) = IP6;
+ old_val = add_del_ip6_key (sfib, 0, &sp, 0, is_add);
+ }
+
+ if (sfib->count == 0)
+ add_del_ip6_key (&db->dst_ip6_table, vni, dst_pref, 0, is_add);
+ }
+ else
+ clib_warning ("cannot delete dst mapping %U!", format_ip_prefix,
+ dst_pref);
+ }
+ return old_val;
+}
+
+static u32
+add_del_ip (gid_dictionary_t * db, u32 vni, ip_prefix_t * dst_key,
+ ip_prefix_t * src_key, u32 value, u8 is_add)
+{
+ switch (ip_prefix_version (dst_key))
+ {
+ case IP4:
+ return add_del_sd_ip4_key (db, vni, dst_key, src_key, value, is_add);
+ break;
+ case IP6:
+ return add_del_sd_ip6_key (db, vni, dst_key, src_key, value, is_add);
+ break;
+ default:
+ clib_warning ("address type %d not supported!",
+ ip_prefix_version (dst_key));
+ break;
+ }
+ return ~0;
+}
+
+static u32
+add_del_sd (gid_dictionary_t * db, u32 vni, source_dest_t * key, u32 value,
+ u8 is_add)
+{
+ switch (sd_dst_type (key))
+ {
+ case FID_ADDR_IP_PREF:
+ add_del_ip (db, vni, &sd_dst_ippref (key), &sd_src_ippref (key),
+ value, is_add);
+
+ case FID_ADDR_MAC:
+ return add_del_mac (&db->sd_mac_table, vni, sd_dst_mac (key),
+ sd_src_mac (key), value, is_add);
+
+ default:
+ clib_warning ("SD address type %d not supprted!", sd_dst_type (key));
+ break;
+ }
+
+ return ~0;
+}
+
+static u64
+add_del_arp_ndp (gid_l2_arp_ndp_table_t * db, u32 bd, ip_address_t * key,
+ u64 value, u8 is_add)
+{
+ BVT (clib_bihash_kv) kv, result;
+ u32 old_val = ~0;
+
+ make_arp_ndp_key (&kv, bd, key);
+ if (BV (clib_bihash_search) (&db->arp_ndp_lookup_table, &kv, &result) == 0)
+ old_val = result.value;
+
+ if (is_add)
+ {
+ kv.value = value;
+ BV (clib_bihash_add_del) (&db->arp_ndp_lookup_table, &kv,
+ 1 /* is_add */ );
+ db->count++;
+ }
+ else
+ {
+ BV (clib_bihash_add_del) (&db->arp_ndp_lookup_table, &kv,
+ 0 /* is_add */ );
+ db->count--;
+ }
+ return old_val;
+}
+
+static u32
+add_del_nsh (gid_nsh_table_t * db, u32 vni, u32 spi, u8 si, u32 value,
+ u8 is_add)
+{
+ BVT (clib_bihash_kv) kv, result;
+ u32 old_val = ~0;
+
+ make_nsh_key (&kv, vni, spi, si);
+ if (BV (clib_bihash_search) (&db->nsh_lookup_table, &kv, &result) == 0)
+ old_val = result.value;
+
+ if (is_add)
+ {
+ kv.value = value;
+ BV (clib_bihash_add_del) (&db->nsh_lookup_table, &kv, 1 /* is_add */ );
+ db->count++;
+ }
+ else
+ {
+ BV (clib_bihash_add_del) (&db->nsh_lookup_table, &kv, 0 /* is_add */ );
+ db->count--;
+ }
+ return old_val;
+}
+
+u32
+gid_dictionary_add_del (gid_dictionary_t * db, gid_address_t * key, u64 value,
+ u8 is_add)
+{
+ switch (gid_address_type (key))
+ {
+ case GID_ADDR_IP_PREFIX:
+ return add_del_ip (db, gid_address_vni (key), &gid_address_ippref (key),
+ 0, (u32) value, is_add);
+ case GID_ADDR_MAC:
+ return add_del_mac (&db->sd_mac_table, gid_address_vni (key),
+ gid_address_mac (key), 0, (u32) value, is_add);
+ case GID_ADDR_SRC_DST:
+ return add_del_sd (db, gid_address_vni (key), &gid_address_sd (key),
+ (u32) value, is_add);
+ case GID_ADDR_ARP:
+ case GID_ADDR_NDP:
+ return add_del_arp_ndp (&db->arp_ndp_table,
+ gid_address_arp_ndp_bd (key),
+ &gid_address_arp_ndp_ip (key), value, is_add);
+ case GID_ADDR_NSH:
+ return add_del_nsh (&db->nsh_table, gid_address_vni (key),
+ gid_address_nsh_spi (key), gid_address_nsh_si (key),
+ value, is_add);
+
+ default:
+ clib_warning ("address type %d not supported!", gid_address_type (key));
+ break;
+ }
+ return ~0;
+}
+
+static void
+mac_lookup_init (gid_mac_table_t * db)
+{
+ if (db->mac_lookup_table_nbuckets == 0)
+ db->mac_lookup_table_nbuckets = MAC_LOOKUP_DEFAULT_HASH_NUM_BUCKETS;
+
+ db->mac_lookup_table_nbuckets =
+ 1 << max_log2 (db->mac_lookup_table_nbuckets);
+
+ if (db->mac_lookup_table_size == 0)
+ db->mac_lookup_table_size = MAC_LOOKUP_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&db->mac_lookup_table, "mac lookup table",
+ db->mac_lookup_table_nbuckets,
+ db->mac_lookup_table_size);
+}
+
+static void
+arp_ndp_lookup_init (gid_l2_arp_ndp_table_t * db)
+{
+ if (db->arp_ndp_lookup_table_nbuckets == 0)
+ db->arp_ndp_lookup_table_nbuckets =
+ ARP_NDP_LOOKUP_DEFAULT_HASH_NUM_BUCKETS;
+
+ db->arp_ndp_lookup_table_nbuckets =
+ 1 << max_log2 (db->arp_ndp_lookup_table_nbuckets);
+
+ if (db->arp_ndp_lookup_table_size == 0)
+ db->arp_ndp_lookup_table_size = ARP_NDP_LOOKUP_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&db->arp_ndp_lookup_table, "arp ndp lookup table",
+ db->arp_ndp_lookup_table_nbuckets,
+ db->arp_ndp_lookup_table_size);
+}
+
+static void
+nsh_lookup_init (gid_nsh_table_t * db)
+{
+ if (db->nsh_lookup_table_nbuckets == 0)
+ db->nsh_lookup_table_nbuckets = MAC_LOOKUP_DEFAULT_HASH_NUM_BUCKETS;
+
+ db->nsh_lookup_table_nbuckets =
+ 1 << max_log2 (db->nsh_lookup_table_nbuckets);
+
+ if (db->nsh_lookup_table_size == 0)
+ db->nsh_lookup_table_size = MAC_LOOKUP_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&db->nsh_lookup_table, "nsh lookup table",
+ db->nsh_lookup_table_nbuckets,
+ db->nsh_lookup_table_size);
+}
+
+void
+gid_dictionary_init (gid_dictionary_t * db)
+{
+ ip4_lookup_init (&db->dst_ip4_table);
+ ip6_lookup_init (&db->dst_ip6_table);
+ mac_lookup_init (&db->sd_mac_table);
+ arp_ndp_lookup_init (&db->arp_ndp_table);
+ nsh_lookup_init (&db->nsh_table);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/gid_dictionary.h b/src/vnet/lisp-cp/gid_dictionary.h
new file mode 100644
index 00000000..3f8500e5
--- /dev/null
+++ b/src/vnet/lisp-cp/gid_dictionary.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_LISP_GPE_GID_DICTIONARY_H_
+#define VNET_LISP_GPE_GID_DICTIONARY_H_
+
+#include <vnet/vnet.h>
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+#define GID_LOOKUP_MISS ((u32)~0)
+#define GID_LOOKUP_MISS_L2 ((u64)~0)
+
+/* Default size of the ip4 hash table */
+#define IP4_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP4_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+/* Default size of the ip6 hash table */
+#define IP6_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP6_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+/* Default size of the MAC hash table */
+#define MAC_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define MAC_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+/* Default size of the ARP/NDP hash table */
+#define ARP_NDP_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define ARP_NDP_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+/* Default size of the NSH hash table */
+#define NSH_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define NSH_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+typedef void (*foreach_subprefix_match_cb_t) (u32, void *);
+
+typedef struct
+{
+ BVT (clib_bihash) ip4_lookup_table;
+
+ /* bitmap/vector of mask widths to search */
+ uword *ip4_non_empty_dst_address_length_bitmap;
+ u8 *ip4_prefix_lengths_in_search_order;
+ ip4_address_t ip4_fib_masks[33];
+ u32 ip4_prefix_len_refcount[33];
+
+ /* ip4 lookup table config parameters */
+ u32 ip4_lookup_table_nbuckets;
+ uword ip4_lookup_table_size;
+ u32 count;
+} gid_ip4_table_t;
+
+typedef struct
+{
+ BVT (clib_bihash) ip6_lookup_table;
+
+ /* bitmap/vector of mask widths to search */
+ uword *ip6_non_empty_dst_address_length_bitmap;
+ u8 *ip6_prefix_lengths_in_search_order;
+ ip6_address_t ip6_fib_masks[129];
+ u64 ip6_prefix_len_refcount[129];
+
+ /* ip6 lookup table config parameters */
+ u32 ip6_lookup_table_nbuckets;
+ uword ip6_lookup_table_size;
+ u64 count;
+} gid_ip6_table_t;
+
+typedef struct gid_mac_table
+{
+ BVT (clib_bihash) mac_lookup_table;
+
+ /* mac lookup table config parameters */
+ u32 mac_lookup_table_nbuckets;
+ uword mac_lookup_table_size;
+ u64 count;
+} gid_mac_table_t;
+
+typedef struct gid_nsh_table
+{
+ BVT (clib_bihash) nsh_lookup_table;
+
+ /* nsh lookup table config parameters */
+ u32 nsh_lookup_table_nbuckets;
+ uword nsh_lookup_table_size;
+ u64 count;
+} gid_nsh_table_t;
+
+typedef struct
+{
+ BVT (clib_bihash) arp_ndp_lookup_table;
+ u32 arp_ndp_lookup_table_nbuckets;
+ uword arp_ndp_lookup_table_size;
+ u64 count;
+} gid_l2_arp_ndp_table_t;
+
+typedef struct
+{
+ /** L2 ARP/NDP table */
+ gid_l2_arp_ndp_table_t arp_ndp_table;
+
+ /** NSH lookup table */
+ gid_nsh_table_t nsh_table;
+
+ /** destination IP LPM ip4 lookup table */
+ gid_ip4_table_t dst_ip4_table;
+
+ /** pool of source IP LPM ip4 lookup tables */
+ gid_ip4_table_t *src_ip4_table_pool;
+
+ /** destination IP LPM ip6 lookup table */
+ gid_ip6_table_t dst_ip6_table;
+
+ /** pool of source IP LPM ip6 lookup tables */
+ gid_ip6_table_t *src_ip6_table_pool;
+
+ /** flat source/dest mac lookup table */
+ gid_mac_table_t sd_mac_table;
+
+} gid_dictionary_t;
+
+u32
+gid_dictionary_add_del (gid_dictionary_t * db, gid_address_t * key, u64 value,
+ u8 is_add);
+
+u64 gid_dictionary_lookup (gid_dictionary_t * db, gid_address_t * key);
+u32 gid_dictionary_sd_lookup (gid_dictionary_t * db, gid_address_t * dst,
+ gid_address_t * src);
+
+void gid_dictionary_init (gid_dictionary_t * db);
+
+void
+gid_dict_foreach_subprefix (gid_dictionary_t * db, gid_address_t * eid,
+ foreach_subprefix_match_cb_t cb, void *arg);
+
+void
+gid_dict_foreach_l2_arp_ndp_entry (gid_dictionary_t * db, void (*cb)
+ (BVT (clib_bihash_kv) * kvp, void *arg),
+ void *ht);
+
+#endif /* VNET_LISP_GPE_GID_DICTIONARY_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api
new file mode 100644
index 00000000..8bed71b3
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp.api
@@ -0,0 +1,750 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+typeonly manual_print manual_endian define local_locator
+{
+ u32 sw_if_index;
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief add or delete locator_set
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - locator name
+ @param locator_num - number of locators
+ @param locators - LISP locator records
+*/
+manual_endian manual_print define lisp_add_del_locator_set
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+ u32 locator_num;
+ vl_api_local_locator_t locators[locator_num];
+};
+
+/** \brief Reply for locator_set add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param ls_index - locator set index
+*/
+define lisp_add_del_locator_set_reply
+{
+ u32 context;
+ i32 retval;
+ u32 ls_index;
+};
+
+/** \brief add or delete locator for locator_set
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - name of locator_set to add/del locator
+ @param sw_if_index - index of the interface
+ @param priority - priority of the lisp locator
+ @param weight - weight of the lisp locator
+*/
+autoreply define lisp_add_del_locator
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+ u32 sw_if_index;
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief add or delete lisp eid-table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param eid_type:
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param eid - EID can be ip4, ip6 or mac
+ @param prefix_len - prefix len
+ @param locator_set_name - name of locator_set to add/del eid-table
+ @param vni - virtual network instance
+ @param key_id
+ HMAC_NO_KEY 0
+ HMAC_SHA_1_96 1
+ HMAC_SHA_256_128 2
+ @param key - secret key
+*/
+autoreply define lisp_add_del_local_eid
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 eid_type;
+ u8 eid[16];
+ u8 prefix_len;
+ u8 locator_set_name[64];
+ u32 vni;
+ u16 key_id;
+ u8 key[64];
+};
+
+/** \brief Add/delete map server
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero; delete otherwise
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - map server IP address
+*/
+autoreply define lisp_add_del_map_server
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief add or delete map-resolver
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+*/
+autoreply define lisp_add_del_map_resolver
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief enable or disable LISP feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_en - enable protocol if non-zero, else disable
+*/
+autoreply define lisp_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+/** \brief configure or disable LISP PITR node
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ls_name - locator set name
+ @param is_add - add locator set if non-zero, else disable pitr
+*/
+autoreply define lisp_pitr_set_locator_set
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 ls_name[64];
+};
+
+/** \brief configure or disable use of PETR
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - Address is IPv4 if set and IPv6 otherwise
+ @param address - PETR IP address
+ @param is_add - add locator set if non-zero, else disable pitr
+*/
+autoreply define lisp_use_petr
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 address[16];
+ u8 is_add;
+};
+
+/** \brief Request for LISP PETR status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_use_petr
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief LISP PETR status, enable or disable
+ @param context - sender context, to match reply w/ request
+ @param status - LISP PETR enable if non-zero, else disable
+ @param is_ip4 - Address is IPv4 if non-zero, else IPv6
+ @param address - PETR IP address
+*/
+define show_lisp_use_petr_reply
+{
+ u32 context;
+ i32 retval;
+ u8 status;
+ u8 is_ip4;
+ u8 address[16];
+};
+
+/** \brief Get state of LISP RLOC probing
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_rloc_probe_state
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_lisp_rloc_probe_state
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param is_enabled - state of RLOC probing
+*/
+define show_lisp_rloc_probe_state_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_enabled;
+};
+
+/** \brief enable/disable LISP RLOC probing
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_enable - enable if non-zero; disable otherwise
+*/
+autoreply define lisp_rloc_probe_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_enabled;
+};
+
+/** \brief enable/disable LISP map-register
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_enable - enable if non-zero; disable otherwise
+*/
+autoreply define lisp_map_register_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_enabled;
+};
+
+/** \brief Get state of LISP map-register
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_map_register_state
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_lisp_map_register_state
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define show_lisp_map_register_state_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_enabled;
+};
+
+/** \brief set LISP map-request mode. Based on configuration VPP will send
+ src/dest or just normal destination map requests.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - new map-request mode. Supported values are:
+ 0 - destination only
+ 1 - source/destaination
+*/
+autoreply define lisp_map_request_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 mode;
+};
+
+/** \brief Request for LISP map-request mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_map_request_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_lisp_map_request_mode
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param mode - map-request mode
+*/
+define show_lisp_map_request_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 mode;
+};
+
+typeonly manual_endian manual_print define remote_locator
+{
+ u8 is_ip4;
+ u8 priority;
+ u8 weight;
+ u8 addr[16];
+};
+
+/** \brief add or delete remote static mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_src_dst - flag indicating src/dst based routing policy
+ @param del_all - if set, delete all remote mappings
+ @param vni - virtual network instance
+ @param action - negative map-reply action
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param deid - dst EID
+ @param seid - src EID, valid only if is_src_dst is enabled
+ @param rloc_num - number of remote locators
+ @param rlocs - remote locator records
+*/
+autoreply manual_print manual_endian define lisp_add_del_remote_mapping
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_src_dst;
+ u8 del_all;
+ u32 vni;
+ u8 action;
+ u8 eid_type;
+ u8 eid[16];
+ u8 eid_len;
+ u8 seid[16];
+ u8 seid_len;
+ u32 rloc_num;
+ vl_api_remote_locator_t rlocs[rloc_num];
+};
+
+/** \brief add or delete LISP adjacency adjacency
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param vni - virtual network instance
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param reid - remote EID
+ @param leid - local EID
+*/
+autoreply define lisp_add_del_adjacency
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 vni;
+ u8 eid_type;
+ u8 reid[16];
+ u8 leid[16];
+ u8 reid_len;
+ u8 leid_len;
+};
+
+/** \brief add or delete map request itr rlocs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - locator set name
+*/
+autoreply define lisp_add_del_map_request_itr_rlocs
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+};
+
+/** \brief Reply for lisp_add_del_map_request_itr_rlocs
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+
+/** \brief map/unmap vni/bd_index to vrf
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add or delete mapping
+ @param dp_table - virtual network id/bridge domain index
+ @param vrf - vrf
+*/
+autoreply define lisp_eid_table_add_del_map
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 vni;
+ u32 dp_table;
+ u8 is_l2;
+};
+
+/** \brief Request for map lisp locator status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param locator_set_index - index of locator_set
+ @param ls_name - locator set name
+ @param is_index_set - flag indicating whether ls_name or ls_index is set
+ */
+define lisp_locator_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 ls_index;
+ u8 ls_name[64];
+ u8 is_index_set;
+};
+
+/** \brief LISP locator_set status
+ @param local - if is set, then locator is local
+ @param locator_set_name - name of the locator_set
+ @param sw_if_index - sw_if_index of the locator
+ @param priority - locator priority
+ @param weight - locator weight
+ */
+define lisp_locator_details
+{
+ u32 context;
+ u8 local;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 ip_address[16];
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief LISP locator_set status
+ @param context - sender context, to match reply w/ request
+ @param ls_index - locator set index
+ @param ls_name - name of the locator set
+ */
+define lisp_locator_set_details
+{
+ u32 context;
+ u32 ls_index;
+ u8 ls_name[64];
+};
+
+/** \brief Request for locator_set summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param filter - filter type
+ Supported values:
+ 0: all locator sets
+ 1: local locator sets
+ 2: remote locator sets
+ */
+define lisp_locator_set_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 filter;
+};
+
+/** \brief Dump lisp eid-table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param locator_set_index - index of locator_set, if ~0 then the mapping
+ is negative
+ @param action - negative map request action
+ @param is_local - local if non-zero, else remote
+ @param eid_type:
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param is_src_dst - EID is type of source/destination
+ @param eid - EID can be ip4, ip6 or mac
+ @param eid_prefix_len - prefix length
+ @param seid - source EID can be ip4, ip6 or mac
+ @param seid_prefix_len - source prefix length
+ @param vni - virtual network instance
+ @param ttl - time to live
+ @param authoritative - authoritative
+ @param key_id
+ HMAC_NO_KEY 0
+ HMAC_SHA_1_96 1
+ HMAC_SHA_256_128 2
+ @param key - secret key
+*/
+
+define lisp_eid_table_details
+{
+ u32 context;
+ u32 locator_set_index;
+ u8 action;
+ u8 is_local;
+ u8 eid_type;
+ u8 is_src_dst;
+ u32 vni;
+ u8 eid[16];
+ u8 eid_prefix_len;
+ u8 seid[16];
+ u8 seid_prefix_len;
+ u32 ttl;
+ u8 authoritative;
+ u16 key_id;
+ u8 key[64];
+};
+
+/** \brief Request for eid table summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param eid_set - if non-zero request info about specific mapping
+ @param vni - virtual network instance; valid only if eid_set != 0
+ @param prefix_length - prefix length if EID is IP address;
+ valid only if eid_set != 0
+ @param eid_type - EID type; valid only if eid_set != 0
+ Supported values:
+ 0: EID is IPv4
+ 1: EID is IPv6
+ 2: EID is ethernet address
+ @param eid - endpoint identifier
+ @param filter - filter type;
+ Support values:
+ 0: all eid
+ 1: local eid
+ 2: remote eid
+ */
+define lisp_eid_table_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 eid_set;
+ u8 prefix_length;
+ u32 vni;
+ u8 eid_type;
+ u8 eid[16];
+ u8 filter;
+};
+
+/** \brief LISP adjacency
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param reid - remote EID
+ @param leid - local EID
+ @param reid_prefix_len - remote EID IP prefix length
+ @param leid_prefix_len - local EID IP prefix length
+ */
+typeonly manual_print manual_endian define lisp_adjacency
+{
+ u8 eid_type;
+ u8 reid[16];
+ u8 leid[16];
+ u8 reid_prefix_len;
+ u8 leid_prefix_len;
+};
+
+/** \brief LISP adjacency reply
+ @param count - number of adjacencies
+ @param adjacencies - array of adjacencies
+ */
+manual_endian manual_print define lisp_adjacencies_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_lisp_adjacency_t adjacencies[count];
+};
+
+/** \brief Request for LISP adjacencies
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - filter adjacencies by VNI
+ */
+define lisp_adjacencies_get
+{
+ u32 client_index;
+ u32 context;
+ u32 vni;
+};
+
+/** \brief Shows relationship between vni and vrf/bd
+ @param dp_table - VRF index or bridge domain index
+ @param vni - vitual network instance
+ */
+define lisp_eid_table_map_details
+{
+ u32 context;
+ u32 vni;
+ u32 dp_table;
+};
+
+/** \brief Request for lisp_eid_table_map_details
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_l2 - if set dump vni/bd mappings else vni/vrf
+ */
+define lisp_eid_table_map_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_l2;
+};
+
+/** \brief Dumps all VNIs used in mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define lisp_eid_table_vni_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief reply to lisp_eid_table_vni_dump
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - virtual network instance
+ */
+define lisp_eid_table_vni_details
+{
+ u32 client_index;
+ u32 context;
+ u32 vni;
+};
+
+/** \brief LISP map resolver status
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+ */
+define lisp_map_resolver_details
+{
+ u32 context;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief Request for map resolver summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define lisp_map_resolver_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief LISP map server details
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+ */
+define lisp_map_server_details
+{
+ u32 context;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief Request for map server summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define lisp_map_server_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Request for lisp-gpe protocol status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_status
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Status of lisp, enable or disable
+ @param context - sender context, to match reply w/ request
+ @param feature_status - lisp enable if non-zero, else disable
+ @param gpe_status - lisp enable if non-zero, else disable
+*/
+define show_lisp_status_reply
+{
+ u32 context;
+ i32 retval;
+ u8 feature_status;
+ u8 gpe_status;
+};
+
+/** \brief Get LISP map request itr rlocs status
+ @param context - sender context, to match reply w/ request
+ @param locator_set_name - name of the locator_set
+ */
+define lisp_get_map_request_itr_rlocs
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Request for map request itr rlocs summary status
+ */
+define lisp_get_map_request_itr_rlocs_reply
+{
+ u32 context;
+ i32 retval;
+ u8 locator_set_name[64];
+};
+
+/** \brief Request for lisp pitr status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_lisp_pitr
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Status of lisp pitr, enable or disable
+ @param context - sender context, to match reply w/ request
+ @param status - lisp pitr enable if non-zero, else disable
+ @param locator_set_name - name of the locator_set
+*/
+define show_lisp_pitr_reply
+{
+ u32 context;
+ i32 retval;
+ u8 status;
+ u8 locator_set_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/lisp-cp/lisp_api.c b/src/vnet/lisp-cp/lisp_api.c
new file mode 100644
index 00000000..f7c41971
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_api.c
@@ -0,0 +1,1342 @@
+/*
+ *------------------------------------------------------------------
+ * lisp_api.c - lisp api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_api_remote_locator_t_endian vl_noop_handler
+#define vl_api_remote_locator_t_print vl_noop_handler
+#define vl_api_local_locator_t_endian vl_noop_handler
+#define vl_api_local_locator_t_print vl_noop_handler
+
+#define vl_api_lisp_add_del_locator_set_t_endian vl_noop_handler
+#define vl_api_lisp_add_del_locator_set_t_print vl_noop_handler
+#define vl_api_lisp_add_del_remote_mapping_t_endian vl_noop_handler
+#define vl_api_lisp_add_del_remote_mapping_t_print vl_noop_handler
+
+#define vl_api_one_add_del_locator_set_t_endian vl_noop_handler
+#define vl_api_one_add_del_locator_set_t_print vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_endian vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_print vl_noop_handler
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(LISP_ADD_DEL_LOCATOR_SET, lisp_add_del_locator_set) \
+_(LISP_ADD_DEL_LOCATOR, lisp_add_del_locator) \
+_(LISP_ADD_DEL_LOCAL_EID, lisp_add_del_local_eid) \
+_(LISP_ADD_DEL_MAP_RESOLVER, lisp_add_del_map_resolver) \
+_(LISP_ADD_DEL_MAP_SERVER, lisp_add_del_map_server) \
+_(LISP_ENABLE_DISABLE, lisp_enable_disable) \
+_(LISP_RLOC_PROBE_ENABLE_DISABLE, lisp_rloc_probe_enable_disable) \
+_(LISP_MAP_REGISTER_ENABLE_DISABLE, lisp_map_register_enable_disable) \
+_(LISP_ADD_DEL_REMOTE_MAPPING, lisp_add_del_remote_mapping) \
+_(LISP_ADD_DEL_ADJACENCY, lisp_add_del_adjacency) \
+_(LISP_PITR_SET_LOCATOR_SET, lisp_pitr_set_locator_set) \
+_(LISP_MAP_REQUEST_MODE, lisp_map_request_mode) \
+_(LISP_EID_TABLE_ADD_DEL_MAP, lisp_eid_table_add_del_map) \
+_(LISP_LOCATOR_SET_DUMP, lisp_locator_set_dump) \
+_(LISP_LOCATOR_DUMP, lisp_locator_dump) \
+_(LISP_EID_TABLE_DUMP, lisp_eid_table_dump) \
+_(LISP_MAP_RESOLVER_DUMP, lisp_map_resolver_dump) \
+_(LISP_MAP_SERVER_DUMP, lisp_map_server_dump) \
+_(LISP_EID_TABLE_MAP_DUMP, lisp_eid_table_map_dump) \
+_(LISP_EID_TABLE_VNI_DUMP, lisp_eid_table_vni_dump) \
+_(LISP_ADJACENCIES_GET, lisp_adjacencies_get) \
+_(SHOW_LISP_RLOC_PROBE_STATE, show_lisp_rloc_probe_state) \
+_(SHOW_LISP_MAP_REGISTER_STATE, show_lisp_map_register_state) \
+_(SHOW_LISP_STATUS, show_lisp_status) \
+_(LISP_ADD_DEL_MAP_REQUEST_ITR_RLOCS, \
+ lisp_add_del_map_request_itr_rlocs) \
+_(LISP_GET_MAP_REQUEST_ITR_RLOCS, lisp_get_map_request_itr_rlocs) \
+_(SHOW_LISP_PITR, show_lisp_pitr) \
+_(SHOW_LISP_MAP_REQUEST_MODE, show_lisp_map_request_mode) \
+_(LISP_USE_PETR, lisp_use_petr) \
+_(SHOW_LISP_USE_PETR, show_lisp_use_petr) \
+
+static locator_t *
+unformat_lisp_locs (vl_api_remote_locator_t * rmt_locs, u32 rloc_num)
+{
+ u32 i;
+ locator_t *locs = 0, loc;
+ vl_api_remote_locator_t *r;
+
+ for (i = 0; i < rloc_num; i++)
+ {
+ /* remote locators */
+ r = &rmt_locs[i];
+ memset (&loc, 0, sizeof (loc));
+ gid_address_ip_set (&loc.address, &r->addr, r->is_ip4 ? IP4 : IP6);
+
+ loc.priority = r->priority;
+ loc.weight = r->weight;
+
+ vec_add1 (locs, loc);
+ }
+ return locs;
+}
+
+static void
+vl_api_lisp_add_del_locator_set_t_handler (vl_api_lisp_add_del_locator_set_t *
+ mp)
+{
+ vl_api_lisp_add_del_locator_set_reply_t *rmp;
+ int rv = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ locator_t locator;
+ vl_api_local_locator_t *ls_loc;
+ u32 ls_index = ~0, locator_num;
+ u8 *locator_name = NULL;
+ int i;
+
+ memset (a, 0, sizeof (a[0]));
+
+ locator_name = format (0, "%s", mp->locator_set_name);
+
+ a->name = locator_name;
+ a->is_add = mp->is_add;
+ a->local = 1;
+ locator_num = clib_net_to_host_u32 (mp->locator_num);
+
+ memset (&locator, 0, sizeof (locator));
+ for (i = 0; i < locator_num; i++)
+ {
+ ls_loc = &mp->locators[i];
+ VALIDATE_SW_IF_INDEX (ls_loc);
+
+ locator.sw_if_index = htonl (ls_loc->sw_if_index);
+ locator.priority = ls_loc->priority;
+ locator.weight = ls_loc->weight;
+ locator.local = 1;
+ vec_add1 (a->locators, locator);
+ }
+
+ rv = vnet_lisp_add_del_locator_set (a, &ls_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ vec_free (locator_name);
+ vec_free (a->locators);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_LISP_ADD_DEL_LOCATOR_SET_REPLY,
+ ({
+ rmp->ls_index = clib_host_to_net_u32 (ls_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_lisp_add_del_locator_t_handler (vl_api_lisp_add_del_locator_t * mp)
+{
+ vl_api_lisp_add_del_locator_reply_t *rmp;
+ int rv = 0;
+ locator_t locator, *locators = NULL;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = ~0;
+ u8 *locator_name = NULL;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ locator.sw_if_index = ntohl (mp->sw_if_index);
+ locator.priority = mp->priority;
+ locator.weight = mp->weight;
+ locator.local = 1;
+ vec_add1 (locators, locator);
+
+ locator_name = format (0, "%s", mp->locator_set_name);
+
+ a->name = locator_name;
+ a->locators = locators;
+ a->is_add = mp->is_add;
+ a->local = 1;
+
+ rv = vnet_lisp_add_del_locator (a, NULL, &ls_index);
+
+ vec_free (locators);
+ vec_free (locator_name);
+
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_LOCATOR_REPLY);
+}
+
+static int
+unformat_lisp_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src,
+ u8 len)
+{
+ switch (type)
+ {
+ case 0: /* ipv4 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP4);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 1: /* ipv6 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP6);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 2: /* l2 mac */
+ gid_address_type (dst) = GID_ADDR_MAC;
+ clib_memcpy (&gid_address_mac (dst), src, 6);
+ break;
+ default:
+ /* unknown type */
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ gid_address_vni (dst) = vni;
+
+ return 0;
+}
+
+static void
+vl_api_lisp_add_del_local_eid_t_handler (vl_api_lisp_add_del_local_eid_t * mp)
+{
+ vl_api_lisp_add_del_local_eid_reply_t *rmp;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ int rv = 0;
+ gid_address_t _eid, *eid = &_eid;
+ uword *p = NULL;
+ u32 locator_set_index = ~0, map_index = ~0;
+ vnet_lisp_add_del_mapping_args_t _a, *a = &_a;
+ u8 *name = NULL, *key = NULL;
+ memset (a, 0, sizeof (a[0]));
+ memset (eid, 0, sizeof (eid[0]));
+
+ rv = unformat_lisp_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->prefix_len);
+ if (rv)
+ goto out;
+
+ name = format (0, "%s", mp->locator_set_name);
+ p = hash_get_mem (lcm->locator_set_index_by_name, name);
+ if (!p)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+ locator_set_index = p[0];
+
+ if (*mp->key)
+ key = format (0, "%s", mp->key);
+
+ /* XXX treat batch configuration */
+ a->is_add = mp->is_add;
+ gid_address_copy (&a->eid, eid);
+ a->locator_set_index = locator_set_index;
+ a->local = 1;
+ a->key = key;
+ a->key_id = clib_net_to_host_u16 (mp->key_id);
+
+ rv = vnet_lisp_add_del_local_mapping (a, &map_index);
+
+out:
+ vec_free (name);
+ vec_free (key);
+ gid_address_free (&a->eid);
+
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_LOCAL_EID_REPLY);
+}
+
+static void
+ vl_api_lisp_eid_table_add_del_map_t_handler
+ (vl_api_lisp_eid_table_add_del_map_t * mp)
+{
+ vl_api_lisp_eid_table_add_del_map_reply_t *rmp;
+ int rv = 0;
+ rv = vnet_lisp_eid_table_map (clib_net_to_host_u32 (mp->vni),
+ clib_net_to_host_u32 (mp->dp_table),
+ mp->is_l2, mp->is_add);
+REPLY_MACRO (VL_API_LISP_EID_TABLE_ADD_DEL_MAP_REPLY)}
+
+static void
+vl_api_lisp_add_del_map_server_t_handler (vl_api_lisp_add_del_map_server_t
+ * mp)
+{
+ vl_api_lisp_add_del_map_server_reply_t *rmp;
+ int rv = 0;
+ ip_address_t addr;
+
+ memset (&addr, 0, sizeof (addr));
+
+ ip_address_set (&addr, mp->ip_address, mp->is_ipv6 ? IP6 : IP4);
+ rv = vnet_lisp_add_del_map_server (&addr, mp->is_add);
+
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_MAP_SERVER_REPLY);
+}
+
+static void
+vl_api_lisp_add_del_map_resolver_t_handler (vl_api_lisp_add_del_map_resolver_t
+ * mp)
+{
+ vl_api_lisp_add_del_map_resolver_reply_t *rmp;
+ int rv = 0;
+ vnet_lisp_add_del_map_resolver_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (a[0]));
+
+ a->is_add = mp->is_add;
+ ip_address_set (&a->address, mp->ip_address, mp->is_ipv6 ? IP6 : IP4);
+
+ rv = vnet_lisp_add_del_map_resolver (a);
+
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_MAP_RESOLVER_REPLY);
+}
+
+static void
+ vl_api_lisp_map_register_enable_disable_t_handler
+ (vl_api_lisp_map_register_enable_disable_t * mp)
+{
+ vl_api_lisp_map_register_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_map_register_enable_disable (mp->is_enabled);
+ REPLY_MACRO (VL_API_LISP_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_lisp_rloc_probe_enable_disable_t_handler
+ (vl_api_lisp_rloc_probe_enable_disable_t * mp)
+{
+ vl_api_lisp_rloc_probe_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_rloc_probe_enable_disable (mp->is_enabled);
+ REPLY_MACRO (VL_API_LISP_ENABLE_DISABLE_REPLY);
+}
+
+static void
+vl_api_lisp_enable_disable_t_handler (vl_api_lisp_enable_disable_t * mp)
+{
+ vl_api_lisp_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_enable_disable (mp->is_en);
+ REPLY_MACRO (VL_API_LISP_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_show_lisp_map_request_mode_t_handler
+ (vl_api_show_lisp_map_request_mode_t * mp)
+{
+ int rv = 0;
+ vl_api_show_lisp_map_request_mode_reply_t *rmp;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_LISP_MAP_REQUEST_MODE_REPLY,
+ ({
+ rmp->mode = vnet_lisp_get_map_request_mode ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_lisp_map_request_mode_t_handler (vl_api_lisp_map_request_mode_t * mp)
+{
+ vl_api_lisp_map_request_mode_reply_t *rmp;
+ int rv = 0;
+
+ rv = vnet_lisp_set_map_request_mode (mp->mode);
+
+ REPLY_MACRO (VL_API_LISP_MAP_REQUEST_MODE_REPLY);
+}
+
+static void
+vl_api_lisp_pitr_set_locator_set_t_handler (vl_api_lisp_pitr_set_locator_set_t
+ * mp)
+{
+ vl_api_lisp_pitr_set_locator_set_reply_t *rmp;
+ int rv = 0;
+ u8 *ls_name = 0;
+
+ ls_name = format (0, "%s", mp->ls_name);
+ rv = vnet_lisp_pitr_set_locator_set (ls_name, mp->is_add);
+ vec_free (ls_name);
+
+ REPLY_MACRO (VL_API_LISP_PITR_SET_LOCATOR_SET_REPLY);
+}
+
+static void
+vl_api_lisp_use_petr_t_handler (vl_api_lisp_use_petr_t * mp)
+{
+ vl_api_lisp_use_petr_reply_t *rmp;
+ int rv = 0;
+ ip_address_t addr;
+
+ ip_address_set (&addr, &mp->address, mp->is_ip4 ? IP4 : IP6);
+ rv = vnet_lisp_use_petr (&addr, mp->is_add);
+
+ REPLY_MACRO (VL_API_LISP_USE_PETR_REPLY);
+}
+
+static void
+vl_api_show_lisp_use_petr_t_handler (vl_api_show_lisp_use_petr_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_lisp_use_petr_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls = 0;
+ int rv = 0;
+ locator_t *loc = 0;
+ u8 status = 0;
+ gid_address_t addr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ status = lcm->flags & LISP_FLAG_USE_PETR;
+ if (status)
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ loc = pool_elt_at_index (lcm->locator_pool, ls->locator_indices[0]);
+ gid_address_copy (&addr, &loc->address);
+ }
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_LISP_USE_PETR_REPLY,
+ {
+ rmp->status = status;
+ ip_address_t *ip = &gid_address_ip (&addr);
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ clib_memcpy (rmp->address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ clib_memcpy (rmp->address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->is_ip4 = (gid_address_ip_version (&addr) == IP4);
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_lisp_add_del_map_request_itr_rlocs_t_handler
+ (vl_api_lisp_add_del_map_request_itr_rlocs_t * mp)
+{
+ vl_api_lisp_add_del_map_request_itr_rlocs_reply_t *rmp;
+ int rv = 0;
+ u8 *locator_set_name = NULL;
+ vnet_lisp_add_del_mreq_itr_rloc_args_t _a, *a = &_a;
+
+ locator_set_name = format (0, "%s", mp->locator_set_name);
+
+ a->is_add = mp->is_add;
+ a->locator_set_name = locator_set_name;
+
+ rv = vnet_lisp_add_del_mreq_itr_rlocs (a);
+
+ vec_free (locator_set_name);
+
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_MAP_REQUEST_ITR_RLOCS_REPLY);
+}
+
+static void
+ vl_api_lisp_add_del_remote_mapping_t_handler
+ (vl_api_lisp_add_del_remote_mapping_t * mp)
+{
+ locator_t *rlocs = 0;
+ vl_api_lisp_add_del_remote_mapping_reply_t *rmp;
+ int rv = 0;
+ gid_address_t _eid, *eid = &_eid;
+ u32 rloc_num = clib_net_to_host_u32 (mp->rloc_num);
+
+ memset (eid, 0, sizeof (eid[0]));
+
+ rv = unformat_lisp_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->eid_len);
+ if (rv)
+ goto send_reply;
+
+ rlocs = unformat_lisp_locs (mp->rlocs, rloc_num);
+
+ if (!mp->is_add)
+ {
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ gid_address_copy (&a->reid, eid);
+ a->is_add = 0;
+ rv = vnet_lisp_add_del_adjacency (a);
+ if (rv)
+ {
+ goto out;
+ }
+ }
+
+ /* NOTE: for now this works as a static remote mapping, i.e.,
+ * not authoritative and ttl infinite. */
+ if (mp->is_add)
+ {
+ vnet_lisp_add_del_mapping_args_t _m_args, *m_args = &_m_args;
+ memset (m_args, 0, sizeof (m_args[0]));
+ gid_address_copy (&m_args->eid, eid);
+ m_args->action = mp->action;
+ m_args->is_static = 1;
+ m_args->ttl = ~0;
+ m_args->authoritative = 0;
+ rv = vnet_lisp_add_mapping (m_args, rlocs, NULL, NULL);
+ }
+ else
+ rv = vnet_lisp_del_mapping (eid, NULL);
+
+ if (mp->del_all)
+ vnet_lisp_clear_all_remote_adjacencies ();
+
+out:
+ vec_free (rlocs);
+send_reply:
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_REMOTE_MAPPING_REPLY);
+}
+
+static void
+vl_api_lisp_add_del_adjacency_t_handler (vl_api_lisp_add_del_adjacency_t * mp)
+{
+ vl_api_lisp_add_del_adjacency_reply_t *rmp;
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+
+ int rv = 0;
+ memset (a, 0, sizeof (a[0]));
+
+ rv = unformat_lisp_eid_api (&a->leid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->leid, mp->leid_len);
+ rv |= unformat_lisp_eid_api (&a->reid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->reid, mp->reid_len);
+
+ if (rv)
+ goto send_reply;
+
+ a->is_add = mp->is_add;
+ rv = vnet_lisp_add_del_adjacency (a);
+
+send_reply:
+ REPLY_MACRO (VL_API_LISP_ADD_DEL_ADJACENCY_REPLY);
+}
+
+static void
+send_lisp_locator_details (lisp_cp_main_t * lcm,
+ locator_t * loc,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_lisp_locator_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_LOCATOR_DETAILS);
+ rmp->context = context;
+
+ rmp->local = loc->local;
+ if (loc->local)
+ {
+ rmp->sw_if_index = ntohl (loc->sw_if_index);
+ }
+ else
+ {
+ rmp->is_ipv6 = gid_address_ip_version (&loc->address);
+ ip_address_copy_addr (rmp->ip_address, &gid_address_ip (&loc->address));
+ }
+ rmp->priority = loc->priority;
+ rmp->weight = loc->weight;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_locator_dump_t_handler (vl_api_lisp_locator_dump_t * mp)
+{
+ u8 *ls_name = 0;
+ unix_shared_memory_queue_t *q = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *lsit = 0;
+ locator_t *loc = 0;
+ u32 ls_index = ~0, *locit = 0;
+ uword *p = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->is_index_set)
+ ls_index = htonl (mp->ls_index);
+ else
+ {
+ /* make sure we get a proper C-string */
+ mp->ls_name[sizeof (mp->ls_name) - 1] = 0;
+ ls_name = format (0, "%s", mp->ls_name);
+ p = hash_get_mem (lcm->locator_set_index_by_name, ls_name);
+ if (!p)
+ goto out;
+ ls_index = p[0];
+ }
+
+ if (pool_is_free_index (lcm->locator_set_pool, ls_index))
+ return;
+
+ lsit = pool_elt_at_index (lcm->locator_set_pool, ls_index);
+
+ vec_foreach (locit, lsit->locator_indices)
+ {
+ loc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+ send_lisp_locator_details (lcm, loc, q, mp->context);
+ };
+out:
+ vec_free (ls_name);
+}
+
+static void
+send_lisp_locator_set_details (lisp_cp_main_t * lcm,
+ locator_set_t * lsit,
+ unix_shared_memory_queue_t * q,
+ u32 context, u32 ls_index)
+{
+ vl_api_lisp_locator_set_details_t *rmp;
+ u8 *str = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_LOCATOR_SET_DETAILS);
+ rmp->context = context;
+
+ rmp->ls_index = htonl (ls_index);
+ if (lsit->local)
+ {
+ ASSERT (lsit->name != NULL);
+ strncpy ((char *) rmp->ls_name, (char *) lsit->name,
+ vec_len (lsit->name));
+ }
+ else
+ {
+ str = format (0, "<remote-%d>", ls_index);
+ strncpy ((char *) rmp->ls_name, (char *) str, vec_len (str));
+ vec_free (str);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_locator_set_dump_t_handler (vl_api_lisp_locator_set_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *lsit = NULL;
+ u8 filter;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ filter = mp->filter;
+ /* *INDENT-OFF* */
+ pool_foreach (lsit, lcm->locator_set_pool,
+ ({
+ if (filter && !((1 == filter && lsit->local) ||
+ (2 == filter && !lsit->local)))
+ {
+ continue;
+ }
+ send_lisp_locator_set_details (lcm, lsit, q, mp->context,
+ lsit - lcm->locator_set_pool);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+lisp_fid_put_api (u8 * dst, fid_address_t * src, u8 * prefix_length)
+{
+ ASSERT (prefix_length);
+ ip_prefix_t *ippref = &fid_addr_ippref (src);
+
+ switch (fid_addr_type (src))
+ {
+ case FID_ADDR_IP_PREF:
+ if (ip_prefix_version (ippref) == IP4)
+ clib_memcpy (dst, &ip_prefix_v4 (ippref), 4);
+ else
+ clib_memcpy (dst, &ip_prefix_v6 (ippref), 16);
+ prefix_length[0] = ip_prefix_len (ippref);
+ break;
+
+ case FID_ADDR_MAC:
+ prefix_length[0] = 0;
+ clib_memcpy (dst, fid_addr_mac (src), 6);
+ break;
+
+ default:
+ clib_warning ("Unknown FID type %d!", fid_addr_type (src));
+ break;
+ }
+}
+
+static u8
+fid_type_to_api_type (fid_address_t * fid)
+{
+ ip_prefix_t *ippref;
+
+ switch (fid_addr_type (fid))
+ {
+ case FID_ADDR_IP_PREF:
+ ippref = &fid_addr_ippref (fid);
+ if (ip_prefix_version (ippref) == IP4)
+ return 0;
+ else if (ip_prefix_version (ippref) == IP6)
+ return 1;
+ else
+ return ~0;
+
+ case FID_ADDR_MAC:
+ return 2;
+ case FID_ADDR_NSH:
+ return 3;
+ }
+
+ return ~0;
+}
+
+static void
+send_lisp_eid_table_details (mapping_t * mapit,
+ unix_shared_memory_queue_t * q,
+ u32 context, u8 filter)
+{
+ fid_address_t *fid;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *ls = 0;
+ vl_api_lisp_eid_table_details_t *rmp = NULL;
+ gid_address_t *gid = NULL;
+ u8 *mac = 0;
+ ip_prefix_t *ip_prefix = NULL;
+
+ switch (filter)
+ {
+ case 0: /* all mappings */
+ break;
+
+ case 1: /* local only */
+ if (!mapit->local)
+ return;
+ break;
+ case 2: /* remote only */
+ if (mapit->local)
+ return;
+ break;
+ default:
+ clib_warning ("Filter error, unknown filter: %d", filter);
+ return;
+ }
+
+ /* don't send PITR generated mapping */
+ if (mapit->pitr_set)
+ return;
+
+ gid = &mapit->eid;
+ ip_prefix = &gid_address_ippref (gid);
+ mac = gid_address_mac (gid);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_EID_TABLE_DETAILS);
+
+ ls = pool_elt_at_index (lcm->locator_set_pool, mapit->locator_set_index);
+ if (vec_len (ls->locator_indices) == 0)
+ rmp->locator_set_index = ~0;
+ else
+ rmp->locator_set_index = clib_host_to_net_u32 (mapit->locator_set_index);
+
+ rmp->is_local = mapit->local;
+ rmp->ttl = clib_host_to_net_u32 (mapit->ttl);
+ rmp->action = mapit->action;
+ rmp->authoritative = mapit->authoritative;
+
+ switch (gid_address_type (gid))
+ {
+ case GID_ADDR_SRC_DST:
+ rmp->is_src_dst = 1;
+ fid = &gid_address_sd_src (gid);
+ rmp->eid_type = fid_type_to_api_type (fid);
+ lisp_fid_put_api (rmp->seid, &gid_address_sd_src (gid),
+ &rmp->seid_prefix_len);
+ lisp_fid_put_api (rmp->eid, &gid_address_sd_dst (gid),
+ &rmp->eid_prefix_len);
+ break;
+ case GID_ADDR_IP_PREFIX:
+ rmp->eid_prefix_len = ip_prefix_len (ip_prefix);
+ if (ip_prefix_version (ip_prefix) == IP4)
+ {
+ rmp->eid_type = 0; /* ipv4 type */
+ clib_memcpy (rmp->eid, &ip_prefix_v4 (ip_prefix),
+ sizeof (ip_prefix_v4 (ip_prefix)));
+ }
+ else
+ {
+ rmp->eid_type = 1; /* ipv6 type */
+ clib_memcpy (rmp->eid, &ip_prefix_v6 (ip_prefix),
+ sizeof (ip_prefix_v6 (ip_prefix)));
+ }
+ break;
+ case GID_ADDR_MAC:
+ rmp->eid_type = 2; /* l2 mac type */
+ clib_memcpy (rmp->eid, mac, 6);
+ break;
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+ rmp->vni = clib_host_to_net_u32 (gid_address_vni (gid));
+ rmp->key_id = clib_host_to_net_u16 (mapit->key_id);
+ memcpy (rmp->key, mapit->key, vec_len (mapit->key));
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_eid_table_dump_t_handler (vl_api_lisp_eid_table_dump_t * mp)
+{
+ u32 mi;
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *mapit = NULL;
+ gid_address_t _eid, *eid = &_eid;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->eid_set)
+ {
+ memset (eid, 0, sizeof (*eid));
+
+ unformat_lisp_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->prefix_length);
+
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, eid);
+ if ((u32) ~ 0 == mi)
+ return;
+
+ mapit = pool_elt_at_index (lcm->mapping_pool, mi);
+ send_lisp_eid_table_details (mapit, q, mp->context,
+ 0 /* ignore filter */ );
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (mapit, lcm->mapping_pool,
+ ({
+ send_lisp_eid_table_details(mapit, q, mp->context,
+ mp->filter);
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+static void
+send_lisp_map_server_details (ip_address_t * ip,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_lisp_map_server_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_MAP_SERVER_DETAILS);
+
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ rmp->is_ipv6 = 0;
+ clib_memcpy (rmp->ip_address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ rmp->is_ipv6 = 1;
+ clib_memcpy (rmp->ip_address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_map_server_dump_t_handler (vl_api_lisp_map_server_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *mr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ vec_foreach (mr, lcm->map_servers)
+ {
+ send_lisp_map_server_details (&mr->address, q, mp->context);
+ }
+}
+
+static void
+send_lisp_map_resolver_details (ip_address_t * ip,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_lisp_map_resolver_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_MAP_RESOLVER_DETAILS);
+
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ rmp->is_ipv6 = 0;
+ clib_memcpy (rmp->ip_address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ rmp->is_ipv6 = 1;
+ clib_memcpy (rmp->ip_address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_map_resolver_dump_t_handler (vl_api_lisp_map_resolver_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *mr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ vec_foreach (mr, lcm->map_resolvers)
+ {
+ send_lisp_map_resolver_details (&mr->address, q, mp->context);
+ }
+}
+
+static void
+send_eid_table_map_pair (hash_pair_t * p,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_lisp_eid_table_map_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_EID_TABLE_MAP_DETAILS);
+
+ rmp->vni = clib_host_to_net_u32 (p->key);
+ rmp->dp_table = clib_host_to_net_u32 (p->value[0]);
+ rmp->context = context;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_lisp_eid_table_map_dump_t_handler (vl_api_lisp_eid_table_map_dump_t *
+ mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ hash_pair_t *p;
+ uword *vni_table = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->is_l2)
+ {
+ vni_table = lcm->bd_id_by_vni;
+ }
+ else
+ {
+ vni_table = lcm->table_id_by_vni;
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vni_table,
+ ({
+ send_eid_table_map_pair (p, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+send_eid_table_vni (u32 vni, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_lisp_eid_table_vni_details_t *rmp = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_LISP_EID_TABLE_VNI_DETAILS);
+ rmp->context = context;
+ rmp->vni = clib_host_to_net_u32 (vni);
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+lisp_adjacency_copy (vl_api_lisp_adjacency_t * dst, lisp_adjacency_t * adjs)
+{
+ lisp_adjacency_t *adj;
+ vl_api_lisp_adjacency_t a;
+ u32 i, n = vec_len (adjs);
+
+ for (i = 0; i < n; i++)
+ {
+ adj = vec_elt_at_index (adjs, i);
+ memset (&a, 0, sizeof (a));
+
+ switch (gid_address_type (&adj->reid))
+ {
+ case GID_ADDR_IP_PREFIX:
+ a.reid_prefix_len = gid_address_ippref_len (&adj->reid);
+ a.leid_prefix_len = gid_address_ippref_len (&adj->leid);
+ if (gid_address_ip_version (&adj->reid) == IP4)
+ {
+ a.eid_type = 0; /* ipv4 type */
+ clib_memcpy (a.reid, &gid_address_ip (&adj->reid), 4);
+ clib_memcpy (a.leid, &gid_address_ip (&adj->leid), 4);
+ }
+ else
+ {
+ a.eid_type = 1; /* ipv6 type */
+ clib_memcpy (a.reid, &gid_address_ip (&adj->reid), 16);
+ clib_memcpy (a.leid, &gid_address_ip (&adj->leid), 16);
+ }
+ break;
+ case GID_ADDR_MAC:
+ a.eid_type = 2; /* l2 mac type */
+ mac_copy (a.reid, gid_address_mac (&adj->reid));
+ mac_copy (a.leid, gid_address_mac (&adj->leid));
+ break;
+ default:
+ ASSERT (0);
+ }
+ dst[i] = a;
+ }
+}
+
+static void
+ vl_api_show_lisp_rloc_probe_state_t_handler
+ (vl_api_show_lisp_rloc_probe_state_t * mp)
+{
+ vl_api_show_lisp_rloc_probe_state_reply_t *rmp = 0;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_LISP_RLOC_PROBE_STATE_REPLY,
+ {
+ rmp->is_enabled = vnet_lisp_rloc_probe_state_get ();
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_show_lisp_map_register_state_t_handler
+ (vl_api_show_lisp_map_register_state_t * mp)
+{
+ vl_api_show_lisp_map_register_state_reply_t *rmp = 0;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_LISP_MAP_REGISTER_STATE_REPLY,
+ {
+ rmp->is_enabled = vnet_lisp_map_register_state_get ();
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_lisp_adjacencies_get_t_handler (vl_api_lisp_adjacencies_get_t * mp)
+{
+ vl_api_lisp_adjacencies_get_reply_t *rmp = 0;
+ lisp_adjacency_t *adjs = 0;
+ int rv = 0;
+ u32 size = ~0;
+ u32 vni = clib_net_to_host_u32 (mp->vni);
+
+ adjs = vnet_lisp_adjacencies_get_by_vni (vni);
+ size = vec_len (adjs) * sizeof (vl_api_lisp_adjacency_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_LISP_ADJACENCIES_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (vec_len (adjs));
+ lisp_adjacency_copy (rmp->adjacencies, adjs);
+ });
+ /* *INDENT-ON* */
+
+ vec_free (adjs);
+}
+
+static void
+vl_api_lisp_eid_table_vni_dump_t_handler (vl_api_lisp_eid_table_vni_dump_t *
+ mp)
+{
+ hash_pair_t *p;
+ u32 *vnis = 0;
+ unix_shared_memory_queue_t *q = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, lcm->table_id_by_vni,
+ ({
+ hash_set (vnis, p->key, 0);
+ }));
+
+ hash_foreach_pair (p, lcm->bd_id_by_vni,
+ ({
+ hash_set (vnis, p->key, 0);
+ }));
+
+ hash_foreach_pair (p, vnis,
+ ({
+ send_eid_table_vni (p->key, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+
+ hash_free (vnis);
+}
+
+static void
+vl_api_show_lisp_status_t_handler (vl_api_show_lisp_status_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_lisp_status_reply_t *rmp = NULL;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_LISP_STATUS_REPLY,
+ ({
+ rmp->gpe_status = vnet_lisp_gpe_enable_disable_status ();
+ rmp->feature_status = vnet_lisp_enable_disable_status ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_lisp_get_map_request_itr_rlocs_t_handler
+ (vl_api_lisp_get_map_request_itr_rlocs_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_lisp_get_map_request_itr_rlocs_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *loc_set = 0;
+ u8 *tmp_str = 0;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (~0 == lcm->mreq_itr_rlocs)
+ {
+ tmp_str = format (0, " ");
+ }
+ else
+ {
+ loc_set =
+ pool_elt_at_index (lcm->locator_set_pool, lcm->mreq_itr_rlocs);
+ tmp_str = format (0, "%s", loc_set->name);
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_LISP_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
+ ({
+ strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
+ ARRAY_LEN(rmp->locator_set_name) - 1);
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (tmp_str);
+}
+
+static void
+vl_api_show_lisp_pitr_t_handler (vl_api_show_lisp_pitr_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_lisp_pitr_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls = 0;
+ u8 *tmp_str = 0;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (!lcm->lisp_pitr)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ tmp_str = format (0, "%s", ls->name);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_LISP_PITR_REPLY,
+ ({
+ rmp->status = lcm->lisp_pitr;
+ strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
+ ARRAY_LEN(rmp->locator_set_name) - 1);
+ }));
+ /* *INDENT-ON* */
+}
+
+/*
+ * lisp_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_lisp;
+#undef _
+}
+
+static clib_error_t *
+lisp_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (lisp_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_cli.c b/src/vnet/lisp-cp/lisp_cli.c
new file mode 100644
index 00000000..50904601
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_cli.c
@@ -0,0 +1,1606 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+static clib_error_t *
+lisp_show_adjacencies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_adjacency_t *adjs, *adj;
+ vlib_cli_output (vm, "%s %40s\n", "leid", "reid");
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 vni = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "vni %d", &vni))
+ ;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == vni)
+ {
+ vlib_cli_output (vm, "error: no vni specified!");
+ goto done;
+ }
+
+ adjs = vnet_lisp_adjacencies_get_by_vni (vni);
+
+ vec_foreach (adj, adjs)
+ {
+ vlib_cli_output (vm, "%U %40U\n", format_gid_address, &adj->leid,
+ format_gid_address, &adj->reid);
+ }
+ vec_free (adjs);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_adjacencies_command) = {
+ .path = "show lisp adjacencies",
+ .short_help = "show lisp adjacencies",
+ .function = lisp_show_adjacencies_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_map_server_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = 0;
+ u8 is_add = 1, ip_set = 0;
+ ip_address_t ip;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "%U", unformat_ip_address, &ip))
+ ip_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!ip_set)
+ {
+ vlib_cli_output (vm, "map-server ip address not set!");
+ goto done;
+ }
+
+ rv = vnet_lisp_add_del_map_server (&ip, is_add);
+ if (!rv)
+ vlib_cli_output (vm, "failed to %s map-server!",
+ is_add ? "add" : "delete");
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_del_map_server_command) = {
+ .path = "lisp map-server",
+ .short_help = "lisp map-server add|del <ip>",
+ .function = lisp_add_del_map_server_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_local_eid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ gid_address_t eid;
+ gid_address_t *eids = 0;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ u32 locator_set_index = 0, map_index = 0;
+ uword *p;
+ vnet_lisp_add_del_mapping_args_t _a, *a = &_a;
+ int rv = 0;
+ u32 vni = 0;
+ u8 *key = 0;
+ u32 key_id = 0;
+
+ memset (&eid, 0, sizeof (eid));
+ memset (a, 0, sizeof (*a));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ gid_address_vni (&eid) = vni;
+ else if (unformat (line_input, "secret-key %_%v%_", &key))
+ ;
+ else if (unformat (line_input, "key-id %U", unformat_hmac_key_id,
+ &key_id))
+ ;
+ else if (unformat (line_input, "locator-set %_%v%_", &locator_set_name))
+ {
+ p = hash_get_mem (lcm->locator_set_index_by_name, locator_set_name);
+ if (!p)
+ {
+ error = clib_error_return (0, "locator-set %s doesn't exist",
+ locator_set_name);
+ goto done;
+ }
+ locator_set_index = p[0];
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+ /* XXX treat batch configuration */
+
+ if (GID_ADDR_SRC_DST == gid_address_type (&eid))
+ {
+ error =
+ clib_error_return (0, "src/dst is not supported for local EIDs!");
+ goto done;
+ }
+
+ if (key && (0 == key_id))
+ {
+ vlib_cli_output (vm, "invalid key_id!");
+ goto done;;
+ }
+
+ gid_address_copy (&a->eid, &eid);
+ a->is_add = is_add;
+ a->locator_set_index = locator_set_index;
+ a->local = 1;
+ a->key = key;
+ a->key_id = key_id;
+
+ rv = vnet_lisp_add_del_local_mapping (a, &map_index);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s local mapping!",
+ is_add ? "add" : "delete");
+ }
+done:
+ vec_free (eids);
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ gid_address_free (&a->eid);
+ vec_free (a->key);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_del_local_eid_command) = {
+ .path = "lisp eid-table",
+ .short_help = "lisp eid-table add/del [vni <vni>] eid <eid> "
+ "locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
+ .function = lisp_add_del_local_eid_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_eid_table_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_add = 1, is_l2 = 0;
+ u32 vni = 0, dp_id = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "vni %d", &vni))
+ ;
+ else if (unformat (line_input, "vrf %d", &dp_id))
+ ;
+ else if (unformat (line_input, "bd %d", &dp_id))
+ is_l2 = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+ vnet_lisp_eid_table_map (vni, dp_id, is_l2, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_eid_table_map_command) = {
+ .path = "lisp eid-table map",
+ .short_help = "lisp eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
+ .function = lisp_eid_table_map_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * Handler for add/del remote mapping CLI.
+ *
+ * @param vm vlib context
+ * @param input input from user
+ * @param cmd cmd
+ * @return pointer to clib error structure
+ */
+static clib_error_t *
+lisp_add_del_remote_mapping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1, del_all = 0;
+ locator_t rloc, *rlocs = 0, *curr_rloc = 0;
+ gid_address_t eid;
+ u8 eid_set = 0;
+ u32 vni, action = ~0, p, w;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&eid, 0, sizeof (eid));
+ memset (&rloc, 0, sizeof (rloc));
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-all"))
+ del_all = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ ;
+ else if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ eid_set = 1;
+ else if (unformat (line_input, "vni %u", &vni))
+ {
+ gid_address_vni (&eid) = vni;
+ }
+ else if (unformat (line_input, "p %d w %d", &p, &w))
+ {
+ if (!curr_rloc)
+ {
+ clib_warning
+ ("No RLOC configured for setting priority/weight!");
+ goto done;
+ }
+ curr_rloc->priority = p;
+ curr_rloc->weight = w;
+ }
+ else if (unformat (line_input, "rloc %U", unformat_ip_address,
+ &gid_address_ip (&rloc.address)))
+ {
+ /* since rloc is stored in ip prefix we need to set prefix length */
+ ip_prefix_t *pref = &gid_address_ippref (&rloc.address);
+
+ u8 version = gid_address_ip_version (&rloc.address);
+ ip_prefix_len (pref) = ip_address_max_len (version);
+
+ vec_add1 (rlocs, rloc);
+ curr_rloc = &rlocs[vec_len (rlocs) - 1];
+ }
+ else if (unformat (line_input, "action %U",
+ unformat_negative_mapping_action, &action))
+ ;
+ else
+ {
+ clib_warning ("parse error");
+ goto done;
+ }
+ }
+
+ if (!eid_set)
+ {
+ clib_warning ("missing eid!");
+ goto done;
+ }
+
+ if (!del_all)
+ {
+ if (is_add && (~0 == action) && 0 == vec_len (rlocs))
+ {
+ clib_warning ("no action set for negative map-reply!");
+ goto done;
+ }
+ }
+ else
+ {
+ vnet_lisp_clear_all_remote_adjacencies ();
+ goto done;
+ }
+
+ /* TODO build src/dst with seid */
+
+ /* if it's a delete, clean forwarding */
+ if (!is_add)
+ {
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (a[0]));
+ gid_address_copy (&a->reid, &eid);
+ if (vnet_lisp_add_del_adjacency (a))
+ {
+ clib_warning ("failed to delete adjacency!");
+ goto done;
+ }
+ }
+
+ /* add as static remote mapping, i.e., not authoritative and infinite
+ * ttl */
+ if (is_add)
+ {
+ vnet_lisp_add_del_mapping_args_t _map_args, *map_args = &_map_args;
+ memset (map_args, 0, sizeof (map_args[0]));
+ gid_address_copy (&map_args->eid, &eid);
+ map_args->action = action;
+ map_args->is_static = 1;
+ map_args->authoritative = 0;
+ map_args->ttl = ~0;
+ rv = vnet_lisp_add_mapping (map_args, rlocs, NULL, NULL);
+ }
+ else
+ rv = vnet_lisp_del_mapping (&eid, NULL);
+
+ if (rv)
+ clib_warning ("failed to %s remote mapping!", is_add ? "add" : "delete");
+
+done:
+ vec_free (rlocs);
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (lisp_add_del_remote_mapping_command) =
+{
+.path = "lisp remote-mapping",.short_help =
+ "lisp remote-mapping add|del [del-all] vni <vni> "
+ "eid <est-eid> [action <no-action|natively-forward|"
+ "send-map-request|drop>] rloc <dst-locator> p <prio> w <weight> "
+ "[rloc <dst-locator> ... ]",.function =
+ lisp_add_del_remote_mapping_command_fn,};
+
+/**
+ * Handler for add/del adjacency CLI.
+ */
+static clib_error_t *
+lisp_add_del_adjacency_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ u8 is_add = 1;
+ ip_prefix_t *reid_ippref, *leid_ippref;
+ gid_address_t leid, reid;
+ u8 *dmac = gid_address_mac (&reid);
+ u8 *smac = gid_address_mac (&leid);
+ u8 reid_set = 0, leid_set = 0;
+ u32 vni;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&reid, 0, sizeof (reid));
+ memset (&leid, 0, sizeof (leid));
+
+ leid_ippref = &gid_address_ippref (&leid);
+ reid_ippref = &gid_address_ippref (&reid);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ ;
+ else if (unformat (line_input, "reid %U",
+ unformat_ip_prefix, reid_ippref))
+ {
+ gid_address_type (&reid) = GID_ADDR_IP_PREFIX;
+ reid_set = 1;
+ }
+ else if (unformat (line_input, "reid %U", unformat_mac_address, dmac))
+ {
+ gid_address_type (&reid) = GID_ADDR_MAC;
+ reid_set = 1;
+ }
+ else if (unformat (line_input, "vni %u", &vni))
+ {
+ gid_address_vni (&leid) = vni;
+ gid_address_vni (&reid) = vni;
+ }
+ else if (unformat (line_input, "leid %U",
+ unformat_ip_prefix, leid_ippref))
+ {
+ gid_address_type (&leid) = GID_ADDR_IP_PREFIX;
+ leid_set = 1;
+ }
+ else if (unformat (line_input, "leid %U", unformat_mac_address, smac))
+ {
+ gid_address_type (&leid) = GID_ADDR_MAC;
+ leid_set = 1;
+ }
+ else
+ {
+ clib_warning ("parse error");
+ goto done;
+ }
+ }
+
+ if (!reid_set || !leid_set)
+ {
+ clib_warning ("missing remote or local eid!");
+ goto done;
+ }
+
+ if ((gid_address_type (&leid) != gid_address_type (&reid))
+ || (gid_address_type (&reid) == GID_ADDR_IP_PREFIX
+ && ip_prefix_version (reid_ippref)
+ != ip_prefix_version (leid_ippref)))
+ {
+ clib_warning ("remote and local EIDs are of different types!");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (a[0]));
+ gid_address_copy (&a->leid, &leid);
+ gid_address_copy (&a->reid, &reid);
+ a->is_add = is_add;
+
+ if (vnet_lisp_add_del_adjacency (a))
+ clib_warning ("failed to %s adjacency!", is_add ? "add" : "delete");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_del_adjacency_command) = {
+ .path = "lisp adjacency",
+ .short_help = "lisp adjacency add|del vni <vni> reid <remote-eid> "
+ "leid <local-eid>",
+ .function = lisp_add_del_adjacency_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_map_request_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _i, *i = &_i;
+ map_request_mode_t mr_mode = _MR_MODE_MAX;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, i))
+ return 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "dst-only"))
+ mr_mode = MR_MODE_DST_ONLY;
+ else if (unformat (i, "src-dst"))
+ mr_mode = MR_MODE_SRC_DST;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ goto done;
+ }
+ }
+
+ if (_MR_MODE_MAX == mr_mode)
+ {
+ clib_warning ("No LISP map request mode entered!");
+ goto done;
+ }
+
+ vnet_lisp_set_map_request_mode (mr_mode);
+
+done:
+ unformat_free (i);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_map_request_mode_command) = {
+ .path = "lisp map-request mode",
+ .short_help = "lisp map-request mode dst-only|src-dst",
+ .function = lisp_map_request_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static u8 *
+format_lisp_map_request_mode (u8 * s, va_list * args)
+{
+ u32 mode = va_arg (*args, u32);
+
+ switch (mode)
+ {
+ case 0:
+ return format (0, "dst-only");
+ case 1:
+ return format (0, "src-dst");
+ }
+ return 0;
+}
+
+static clib_error_t *
+lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "map-request mode: %U", format_lisp_map_request_mode,
+ vnet_lisp_get_map_request_mode ());
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_map_request_mode_command) = {
+ .path = "show lisp map-request mode",
+ .short_help = "show lisp map-request mode",
+ .function = lisp_show_map_request_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_msmr_t *mr;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ vec_foreach (mr, lcm->map_resolvers)
+ {
+ vlib_cli_output (vm, "%U", format_ip_address, &mr->address);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_map_resolvers_command) = {
+ .path = "show lisp map-resolvers",
+ .short_help = "show lisp map-resolvers",
+ .function = lisp_show_map_resolvers_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 locator_name_set = 0;
+ u8 *locator_set_name = 0;
+ u8 is_add = 1;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ls %_%v%_", &locator_set_name))
+ locator_name_set = 1;
+ else if (unformat (line_input, "disable"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!locator_name_set)
+ {
+ clib_warning ("No locator set specified!");
+ goto done;
+ }
+ rv = vnet_lisp_pitr_set_locator_set (locator_set_name, is_add);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s pitr!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_pitr_set_locator_set_command) = {
+ .path = "lisp pitr",
+ .short_help = "lisp pitr [disable] ls <locator-set-name>",
+ .function = lisp_pitr_set_locator_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_pitr_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls;
+ u8 *tmp_str = 0;
+
+ vlib_cli_output (vm, "%=20s%=16s",
+ "pitr", lcm->lisp_pitr ? "locator-set" : "");
+
+ if (!lcm->lisp_pitr)
+ {
+ vlib_cli_output (vm, "%=20s", "disable");
+ return 0;
+ }
+
+ if (~0 == lcm->pitr_map_index)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ tmp_str = format (0, "%s", ls->name);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ vlib_cli_output (vm, "%=20s%=16s", "enable", tmp_str);
+
+ vec_free (tmp_str);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_pitr_command) = {
+ .path = "show lisp pitr",
+ .short_help = "Show pitr",
+ .function = lisp_show_pitr_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_eid_entry (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ lisp_cp_main_t *lcm = va_arg (*args, lisp_cp_main_t *);
+ mapping_t *mapit = va_arg (*args, mapping_t *);
+ locator_set_t *ls = va_arg (*args, locator_set_t *);
+ gid_address_t *gid = &mapit->eid;
+ u32 ttl = mapit->ttl;
+ u8 aut = mapit->authoritative;
+ u32 *loc_index;
+ u8 first_line = 1;
+ u8 *loc;
+
+ u8 *type = ls->local ? format (0, "local(%s)", ls->name)
+ : format (0, "remote");
+
+ if (vec_len (ls->locator_indices) == 0)
+ {
+ s = format (s, "%-35U%-30s%-20u%-u", format_gid_address, gid,
+ type, ttl, aut);
+ }
+ else
+ {
+ vec_foreach (loc_index, ls->locator_indices)
+ {
+ locator_t *l = pool_elt_at_index (lcm->locator_pool, loc_index[0]);
+ if (l->local)
+ loc = format (0, "%U", format_vnet_sw_if_index_name, vnm,
+ l->sw_if_index);
+ else
+ loc = format (0, "%U", format_ip_address,
+ &gid_address_ip (&l->address));
+
+ if (first_line)
+ {
+ s = format (s, "%-35U%-20s%-30v%-20u%-u\n", format_gid_address,
+ gid, type, loc, ttl, aut);
+ first_line = 0;
+ }
+ else
+ s = format (s, "%55s%v\n", "", loc);
+ }
+ }
+ return s;
+}
+
+static clib_error_t *
+lisp_show_eid_table_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *mapit;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 mi;
+ gid_address_t eid;
+ u8 print_all = 1;
+ u8 filter = 0;
+ clib_error_t *error = NULL;
+
+ memset (&eid, 0, sizeof (eid));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ print_all = 0;
+ else if (unformat (line_input, "local"))
+ filter = 1;
+ else if (unformat (line_input, "remote"))
+ filter = 2;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vlib_cli_output (vm, "%-35s%-20s%-30s%-20s%-s",
+ "EID", "type", "locators", "ttl", "autoritative");
+
+ if (print_all)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (mapit, lcm->mapping_pool,
+ ({
+ if (mapit->pitr_set)
+ continue;
+
+ locator_set_t * ls = pool_elt_at_index (lcm->locator_set_pool,
+ mapit->locator_set_index);
+ if (filter && !((1 == filter && ls->local) ||
+ (2 == filter && !ls->local)))
+ {
+ continue;
+ }
+ vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
+ lcm, mapit, ls);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &eid);
+ if ((u32) ~ 0 == mi)
+ goto done;
+
+ mapit = pool_elt_at_index (lcm->mapping_pool, mi);
+ locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool,
+ mapit->locator_set_index);
+
+ if (filter && !((1 == filter && ls->local) ||
+ (2 == filter && !ls->local)))
+ {
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U,", format_eid_entry, lcm->vnet_main,
+ lcm, mapit, ls);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_cp_show_eid_table_command) = {
+ .path = "show lisp eid-table",
+ .short_help = "Shows EID table",
+ .function = lisp_show_eid_table_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ error = clib_error_return (0, "state not set");
+ goto done;
+ }
+
+ vnet_lisp_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_cp_enable_disable_command) = {
+ .path = "lisp",
+ .short_help = "lisp [enable|disable]",
+ .function = lisp_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ vlib_cli_output (vm, "state not set!");
+ goto done;
+ }
+
+ vnet_lisp_map_register_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_map_register_enable_disable_command) = {
+ .path = "lisp map-register",
+ .short_help = "lisp map-register [enable|disable]",
+ .function = lisp_map_register_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ vlib_cli_output (vm, "state not set!");
+ goto done;
+ }
+
+ vnet_lisp_rloc_probe_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_rloc_probe_enable_disable_command) = {
+ .path = "lisp rloc-probe",
+ .short_help = "lisp rloc-probe [enable|disable]",
+ .function = lisp_rloc_probe_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_lisp_status (u8 * s, va_list * args)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return format (s, "%s", lcm->is_enabled ? "enabled" : "disabled");
+}
+
+static clib_error_t *
+lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *msg = 0;
+ msg = format (msg, "feature: %U\ngpe: %U\n",
+ format_lisp_status, format_vnet_lisp_gpe_status);
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_status_command) = {
+ .path = "show lisp status",
+ .short_help = "show lisp status",
+ .function = lisp_show_status_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ hash_pair_t *p;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ uword *vni_table = 0;
+ u8 is_l2 = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "l2"))
+ {
+ vni_table = lcm->bd_id_by_vni;
+ is_l2 = 1;
+ }
+ else if (unformat (line_input, "l3"))
+ {
+ vni_table = lcm->table_id_by_vni;
+ is_l2 = 0;
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!vni_table)
+ {
+ vlib_cli_output (vm, "Error: expected l2|l3 param!\n");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vni_table,
+ ({
+ vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
+ }));
+ /* *INDENT-ON* */
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_eid_table_map_command) = {
+ .path = "show lisp eid-table map",
+ .short_help = "show lisp eid-table l2|l3",
+ .function = lisp_show_eid_table_map_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ vnet_main_t *vnm = lgm->vnet_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ locator_t locator, *locators = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = 0;
+ int rv = 0;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add %_%v%_", &locator_set_name))
+ is_add = 1;
+ else if (unformat (line_input, "del %_%v%_", &locator_set_name))
+ is_add = 0;
+ else if (unformat (line_input, "iface %U p %d w %d",
+ unformat_vnet_sw_interface, vnm,
+ &locator.sw_if_index, &locator.priority,
+ &locator.weight))
+ {
+ locator.local = 1;
+ vec_add1 (locators, locator);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ a->name = locator_set_name;
+ a->locators = locators;
+ a->is_add = is_add;
+ a->local = 1;
+
+ rv = vnet_lisp_add_del_locator_set (a, &ls_index);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s locator-set!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ vec_free (locators);
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_cp_add_del_locator_set_command) = {
+ .path = "lisp locator-set",
+ .short_help = "lisp locator-set add/del <name> [iface <iface-name> "
+ "p <priority> w <weight>]",
+ .function = lisp_add_del_locator_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ vnet_main_t *vnm = lgm->vnet_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ u8 locator_set_name_set = 0;
+ locator_t locator, *locators = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = 0;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "locator-set %_%v%_", &locator_set_name))
+ locator_set_name_set = 1;
+ else if (unformat (line_input, "iface %U p %d w %d",
+ unformat_vnet_sw_interface, vnm,
+ &locator.sw_if_index, &locator.priority,
+ &locator.weight))
+ {
+ locator.local = 1;
+ vec_add1 (locators, locator);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (!locator_set_name_set)
+ {
+ error = clib_error_return (0, "locator_set name not set!");
+ goto done;
+ }
+
+ a->name = locator_set_name;
+ a->locators = locators;
+ a->is_add = is_add;
+ a->local = 1;
+
+ vnet_lisp_add_del_locator (a, 0, &ls_index);
+
+done:
+ vec_free (locators);
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_cp_add_del_locator_in_set_command) = {
+ .path = "lisp locator",
+ .short_help = "lisp locator add/del locator-set <name> iface <iface-name> "
+ "p <priority> w <weight>",
+ .function = lisp_add_del_locator_in_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ locator_set_t *lsit;
+ locator_t *loc;
+ u32 *locit;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
+ "Priority", "Weight");
+
+ /* *INDENT-OFF* */
+ pool_foreach (lsit, lcm->locator_set_pool,
+ ({
+ u8 * msg = 0;
+ int next_line = 0;
+ if (lsit->local)
+ {
+ msg = format (msg, "%v", lsit->name);
+ }
+ else
+ {
+ msg = format (msg, "<%s-%d>", "remote", lsit - lcm->locator_set_pool);
+ }
+ vec_foreach (locit, lsit->locator_indices)
+ {
+ if (next_line)
+ {
+ msg = format (msg, "%16s", " ");
+ }
+ loc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+ if (loc->local)
+ msg = format (msg, "%16d%16d%16d\n", loc->sw_if_index, loc->priority,
+ loc->weight);
+ else
+ msg = format (msg, "%16U%16d%16d\n", format_ip_address,
+ &gid_address_ip(&loc->address), loc->priority,
+ loc->weight);
+ next_line = 1;
+ }
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_cp_show_locator_sets_command) = {
+ .path = "show lisp locator-set",
+ .short_help = "Shows locator-sets",
+ .function = lisp_cp_show_locator_sets_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_map_resolver_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1, addr_set = 0;
+ ip_address_t ip_addr;
+ clib_error_t *error = 0;
+ int rv = 0;
+ vnet_lisp_add_del_map_resolver_args_t _a, *a = &_a;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "%U", unformat_ip_address, &ip_addr))
+ addr_set = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (!addr_set)
+ {
+ error = clib_error_return (0, "Map-resolver address must be set!");
+ goto done;
+ }
+
+ a->is_add = is_add;
+ a->address = ip_addr;
+ rv = vnet_lisp_add_del_map_resolver (a);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s map-resolver!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_del_map_resolver_command) = {
+ .path = "lisp map-resolver",
+ .short_help = "lisp map-resolver add/del <ip_address>",
+ .function = lisp_add_del_map_resolver_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u8 *locator_set_name = 0;
+ clib_error_t *error = 0;
+ int rv = 0;
+ vnet_lisp_add_del_mreq_itr_rloc_args_t _a, *a = &_a;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add %_%v%_", &locator_set_name))
+ is_add = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ a->is_add = is_add;
+ a->locator_set_name = locator_set_name;
+ rv = vnet_lisp_add_del_mreq_itr_rlocs (a);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s map-request itr-rlocs!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_del_map_request_command) = {
+ .path = "lisp map-request itr-rlocs",
+ .short_help = "lisp map-request itr-rlocs add/del <locator_set_name>",
+ .function = lisp_add_del_mreq_itr_rlocs_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *loc_set;
+
+ vlib_cli_output (vm, "%=20s", "itr-rlocs");
+
+ if (~0 == lcm->mreq_itr_rlocs)
+ {
+ return 0;
+ }
+
+ loc_set = pool_elt_at_index (lcm->locator_set_pool, lcm->mreq_itr_rlocs);
+
+ vlib_cli_output (vm, "%=20s", loc_set->name);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_map_request_command) = {
+ .path = "show lisp map-request itr-rlocs",
+ .short_help = "Shows map-request itr-rlocs",
+ .function = lisp_show_mreq_itr_rlocs_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_add = 1, ip_set = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ ip_address_t ip;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_ip_address, &ip))
+ ip_set = 1;
+ else if (unformat (line_input, "disable"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!ip_set)
+ {
+ clib_warning ("No petr IP specified!");
+ goto done;
+ }
+
+ if (vnet_lisp_use_petr (&ip, is_add))
+ {
+ error = clib_error_return (0, "failed to %s petr!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_use_petr_set_locator_set_command) = {
+ .path = "lisp use-petr",
+ .short_help = "lisp use-petr [disable] <petr-ip>",
+ .function = lisp_use_petr_set_locator_set_command_fn,
+};
+
+static clib_error_t *
+lisp_show_petr_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls;
+ locator_t *loc;
+ u8 *tmp_str = 0;
+ u8 use_petr = lcm->flags & LISP_FLAG_USE_PETR;
+ vlib_cli_output (vm, "%=20s%=16s", "petr", use_petr ? "ip" : "");
+
+ if (!use_petr)
+ {
+ vlib_cli_output (vm, "%=20s", "disable");
+ return 0;
+ }
+
+ if (~0 == lcm->petr_map_index)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls = pool_elt_at_index(lcm->locator_set_pool, m->locator_set_index);
+ loc = pool_elt_at_index (lcm->locator_pool, ls->locator_indices[0]);
+ tmp_str = format (0, "%U", format_ip_address, &loc->address);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ vlib_cli_output (vm, "%=20s%=16s", "enable", tmp_str);
+
+ vec_free (tmp_str);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_petr_command) = {
+ .path = "show lisp petr",
+ .short_help = "Show petr",
+ .function = lisp_show_petr_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_cp_dpo.c b/src/vnet/lisp-cp/lisp_cp_dpo.c
new file mode 100644
index 00000000..848f621e
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_cp_dpo.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-cp/control.h>
+
+/**
+ * The static array of LISP punt DPOs
+ */
+static dpo_id_t lisp_cp_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+lisp_cp_dpo_get (dpo_proto_t proto)
+{
+ /*
+ * there are only two instances of this DPO type.
+ * we can use the protocol as the index
+ */
+ return (&lisp_cp_dpos[proto]);
+}
+
+static u8 *
+format_lisp_cp_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "lisp-cp-punt-%U", format_dpo_proto, index));
+}
+
+static void
+lisp_cp_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+static void
+lisp_cp_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t lisp_cp_vft = {
+ .dv_lock = lisp_cp_dpo_lock,
+ .dv_unlock = lisp_cp_dpo_unlock,
+ .dv_format = format_lisp_cp_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a LISP-CP
+ * object.
+ *
+ * this means that these graph nodes are ones from which a LISP-CP is the
+ * parent object in the DPO-graph.
+ */
+const static char *const lisp_cp_ip4_nodes[] = {
+ "lisp-cp-lookup-ip4",
+ NULL,
+};
+
+const static char *const lisp_cp_ip6_nodes[] = {
+ "lisp-cp-lookup-ip6",
+ NULL,
+};
+
+const static char *const lisp_cp_ethernet_nodes[] = {
+ "lisp-cp-lookup-l2",
+ NULL,
+};
+
+const static char *const lisp_cp_nsh_nodes[] = {
+ "lisp-cp-lookup-nsh",
+ NULL,
+};
+
+const static char *const *const lisp_cp_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lisp_cp_ip4_nodes,
+ [DPO_PROTO_IP6] = lisp_cp_ip6_nodes,
+ [DPO_PROTO_ETHERNET] = lisp_cp_ethernet_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+ [DPO_PROTO_NSH] = lisp_cp_nsh_nodes,
+};
+
+clib_error_t *
+lisp_cp_dpo_module_init (vlib_main_t * vm)
+{
+ dpo_proto_t dproto;
+
+ /*
+ * there are no exit arcs from the LIS-CP VLIB node, so we
+ * pass NULL as said node array.
+ */
+ dpo_register (DPO_LISP_CP, &lisp_cp_vft, lisp_cp_nodes);
+
+ FOR_EACH_DPO_PROTO (dproto)
+ {
+ dpo_set (&lisp_cp_dpos[dproto], DPO_LISP_CP, dproto, dproto);
+ }
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_cp_dpo_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_cp_dpo.h b/src/vnet/lisp-cp/lisp_cp_dpo.h
new file mode 100644
index 00000000..f0f3fae8
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_cp_dpo.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LISP_CP_DPO_H__
+#define __LISP_CP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of punt to the LISP control plane.
+ */
+typedef struct lisp_cp_dpo_t
+{
+ /**
+ * The transport payload type.
+ */
+ dpo_proto_t lcd_proto;
+} lisp_cp_dpo_t;
+
+extern const dpo_id_t *lisp_cp_dpo_get (dpo_proto_t proto);
+
+extern void lisp_cp_dpo_module_init (void);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_cp_messages.h b/src/vnet/lisp-cp/lisp_cp_messages.h
new file mode 100644
index 00000000..69510a0e
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_cp_messages.h
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_LISP_GPE_LISP_CP_MESSAGES_H_
+#define VNET_LISP_GPE_LISP_CP_MESSAGES_H_
+
+#include <vnet/vnet.h>
+
+#define MAX_IP_PKT_LEN 4096
+#define MAX_IP_HDR_LEN 40 /* without options or IPv6 hdr extensions */
+#define UDP_HDR_LEN 8
+#define LISP_DATA_HDR_LEN 8
+#define LISP_ECM_HDR_LEN 4
+#define MAX_LISP_MSG_ENCAP_LEN 2*(MAX_IP_HDR_LEN + UDP_HDR_LEN)+ LISP_ECM_HDR_LEN
+#define MAX_LISP_PKT_ENCAP_LEN MAX_IP_HDR_LEN + UDP_HDR_LEN + LISP_DATA_HDR_LEN
+
+#define LISP_CONTROL_PORT 4342
+
+/*
+ * EID RECORD FIELD
+ */
+
+/*
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | Reserved | EID mask-len | EID-prefix-AFI |
+ * Rec +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ | EID-prefix ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+
+typedef struct _eid_prefix_record_hdr
+{
+ u8 reserved;
+ u8 eid_prefix_length;
+} __attribute__ ((__packed__)) eid_record_hdr_t;
+
+void eid_rec_hdr_init (eid_record_hdr_t * ptr);
+
+#define EID_REC_CAST(h_) ((eid_record_hdr_t *)(h_))
+#define EID_REC_MLEN(h_) EID_REC_CAST((h_))->eid_prefix_length
+#define EID_REC_ADDR(h) (u8 *)(h) + sizeof(eid_record_hdr_t)
+
+/* LISP Types */
+typedef enum
+{
+ NOT_LISP_MSG,
+ LISP_MAP_REQUEST = 1,
+ LISP_MAP_REPLY,
+ LISP_MAP_REGISTER,
+ LISP_MAP_NOTIFY,
+ LISP_INFO_NAT = 7,
+ LISP_ENCAP_CONTROL_TYPE = 8,
+ LISP_MSG_TYPES
+} lisp_msg_type_e;
+
+/*
+ * ENCAPSULATED CONTROL MESSAGE
+ */
+
+/*
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | IPv4 or IPv6 Header |
+ * OH | (uses RLOC addresses) |
+ * \ | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | Source Port = xxxx | Dest Port = 4342 |
+ * UDP +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ | UDP Length | UDP Checksum |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * LH |Type=8 |S| Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | IPv4 or IPv6 Header |
+ * IH | (uses RLOC or EID addresses) |
+ * \ | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | Source Port = xxxx | Dest Port = yyyy |
+ * UDP +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ | UDP Length | UDP Checksum |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * LCM | LISP Control Message |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+
+/*
+ * Encapsulated control message header. This is followed by the IP
+ * header of the encapsulated LISP control message.
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Type=8 |S| Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+typedef struct
+{
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 reserved:3;
+ u8 s_bit:1;
+ u8 type:4;
+#else
+ u8 type:4;
+ u8 s_bit:1;
+ u8 reserved:3;
+#endif
+ u8 reserved2[3];
+} ecm_hdr_t;
+
+char *ecm_hdr_to_char (ecm_hdr_t * h);
+
+#define ECM_TYPE(h_) ((ecm_hdr_t *)(h_))->type
+
+/*
+ * MAP-REQUEST MESSAGE
+ */
+
+/*
+ * Map-Request Message Format
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Type=1 |A|M|P|S|p|s| Reserved | IRC | Record Count |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Nonce . . . |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | . . . Nonce |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Source-EID-AFI | Source EID Address ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ITR-RLOC-AFI 1 | ITR-RLOC Address 1 ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ITR-RLOC-AFI n | ITR-RLOC Address n ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * / | Reserved | EID mask-len | EID-prefix-AFI |
+ * Rec +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ | EID-prefix ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Map-Reply Record ... |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Mapping Protocol Data |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+
+/*
+ * Fixed size portion of the map request. Variable size source EID
+ * address, originating ITR RLOC AFIs and addresses and then map
+ * request records follow.
+ */
+typedef struct
+{
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 solicit_map_request:1;
+ u8 rloc_probe:1;
+ u8 map_data_present:1;
+ u8 authoritative:1;
+ u8 type:4;
+#else
+ u8 type:4;
+ u8 authoritative:1;
+ u8 map_data_present:1;
+ u8 rloc_probe:1;
+ u8 solicit_map_request:1;
+#endif
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 reserved1:6;
+ u8 smr_invoked:1;
+ u8 pitr:1;
+#else
+ u8 pitr:1;
+ u8 smr_invoked:1;
+ u8 reserved1:6;
+#endif
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 additional_itr_rloc_count:5;
+ u8 reserved2:3;
+#else
+ u8 reserved2:3;
+ u8 additional_itr_rloc_count:5;
+#endif
+ u8 record_count;
+ u64 nonce;
+} __attribute__ ((__packed__)) map_request_hdr_t;
+
+void map_request_hdr_init (void *ptr);
+char *map_request_hdr_to_char (map_request_hdr_t * h);
+
+#define MREQ_TYPE(h_) (h_)->type
+#define MREQ_HDR_CAST(h_) ((map_request_hdr_t *)(h_))
+#define MREQ_REC_COUNT(h_) (MREQ_HDR_CAST(h_))->record_count
+#define MREQ_RLOC_PROBE(h_) (MREQ_HDR_CAST(h_))->rloc_probe
+#define MREQ_ITR_RLOC_COUNT(h_) (MREQ_HDR_CAST(h_))->additional_itr_rloc_count
+#define MREQ_NONCE(h_) (MREQ_HDR_CAST(h_))->nonce
+#define MREQ_SMR(h_) (MREQ_HDR_CAST(h_))->solicit_map_request
+#define MREQ_SMR_INVOKED(h_) (MREQ_HDR_CAST(h_))->smr_invoked
+
+/*
+ * MAP-REPLY MESSAGE
+ */
+
+ /*
+ * Map-Reply Message Format
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Type=2 |P|E|S| Reserved | Record Count |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Nonce . . . |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | . . . Nonce |
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | Record TTL |
+ * | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * R | Locator Count | EID mask-len | ACT |A| Reserved |
+ * e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * c | Rsvd | Map-Version Number | EID-AFI |
+ * o +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * r | EID-prefix |
+ * d +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | /| Priority | Weight | M Priority | M Weight |
+ * | L +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | o | Unused Flags |L|p|R| Loc-AFI |
+ * | c +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | \| Locator |
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Mapping Protocol Data |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+ /*
+ * Fixed size portion of the map reply.
+ */
+typedef struct
+{
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 reserved1:1;
+ u8 security:1;
+ u8 echo_nonce:1;
+ u8 rloc_probe:1;
+ u8 type:4;
+#else
+ u8 type:4;
+ u8 rloc_probe:1;
+ u8 echo_nonce:1;
+ u8 security:1;
+ u8 reserved1:1;
+#endif
+ u8 reserved2;
+ u8 reserved3;
+ u8 record_count;
+ u64 nonce;
+} __attribute__ ((__packed__)) map_reply_hdr_t;
+
+void map_reply_hdr_init (void *ptr);
+char *map_reply_hdr_to_char (map_reply_hdr_t * h);
+
+#define MREP_TYPE(h_) MREP_HDR_CAST(h_)->type
+#define MREP_HDR_CAST(h_) ((map_reply_hdr_t *)(h_))
+#define MREP_REC_COUNT(h_) MREP_HDR_CAST(h_)->record_count
+#define MREP_RLOC_PROBE(h_) MREP_HDR_CAST(h_)->rloc_probe
+#define MREP_NONCE(h_) MREP_HDR_CAST(h_)->nonce
+
+
+always_inline lisp_msg_type_e
+lisp_msg_type (void *b)
+{
+ ecm_hdr_t *hdr = b;
+ if (!hdr)
+ {
+ return (NOT_LISP_MSG);
+ }
+ return (hdr->type);
+}
+
+always_inline void
+increment_record_count (void *b)
+{
+ switch (lisp_msg_type (b))
+ {
+ case LISP_MAP_REQUEST:
+ MREQ_REC_COUNT (b) += 1;
+ break;
+ case LISP_MAP_REPLY:
+ MREP_REC_COUNT (b) += 1;
+ break;
+ default:
+ return;
+ }
+}
+
+
+/*
+ * LOCATOR FIELD
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /| Priority | Weight | M Priority | M Weight |
+ * L +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * o | Unused Flags |L|p|R| Loc-AFI |
+ * c +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \| Locator |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Fixed portion of the mapping record locator. Variable length
+ * locator address follows.
+ */
+typedef struct _locator_hdr
+{
+ u8 priority;
+ u8 weight;
+ u8 mpriority;
+ u8 mweight;
+ u8 unused1;
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 reachable:1;
+ u8 probed:1;
+ u8 local:1;
+ u8 unused2:5;
+#else
+ u8 unused2:5;
+ u8 local:1;
+ u8 probed:1;
+ u8 reachable:1;
+#endif
+} __attribute__ ((__packed__)) locator_hdr_t;
+
+#define LOC_CAST(h_) ((locator_hdr_t *)(h_))
+#define LOC_PROBED(h_) LOC_CAST(h_)->probed
+#define LOC_PRIORITY(h_) LOC_CAST(h_)->priority
+#define LOC_WEIGHT(h_) LOC_CAST(h_)->weight
+#define LOC_MPRIORITY(h_) LOC_CAST(h_)->mpriority
+#define LOC_MWEIGHT(h_) LOC_CAST(h_)->mweight
+#define LOC_REACHABLE(h_) LOC_CAST(h_)->reachable
+#define LOC_LOCAL(h_) LOC_CAST(h_)->local
+#define LOC_ADDR(h_) ((u8 *)(h_) + sizeof(locator_hdr_t))
+
+/*
+ * MAPPING RECORD
+ *
+ * Mapping record used in all LISP control messages.
+ *
+ * +---> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | Record TTL |
+ * | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * R | Locator Count | EID mask-len | ACT |A| Reserved |
+ * e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * c | Rsvd | Map-Version Number | EID-AFI |
+ * o +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * r | EID-prefix |
+ * d +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | /| Priority | Weight | M Priority | M Weight |
+ * | / +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Loc | Unused Flags |L|p|R| Loc-AFI |
+ * | \ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | \| Locator |
+ * +---> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+/*
+ * Fixed portion of the mapping record. EID prefix address and
+ * locators follow.
+ */
+
+typedef struct _mapping_record_hdr_t
+{
+ u32 ttl;
+ u8 locator_count;
+ u8 eid_prefix_length;
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 reserved1:4;
+ u8 authoritative:1;
+ u8 action:3;
+#else
+ u8 action:3;
+ u8 authoritative:1;
+ u8 reserved1:4;
+#endif
+ u8 reserved2;
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 version_hi:4;
+ u8 reserved3:4;
+#else
+ u8 reserved3:4;
+ u8 version_hi:4;
+#endif
+ u8 version_low;
+} __attribute__ ((__packed__)) mapping_record_hdr_t;
+
+void mapping_record_init_hdr (mapping_record_hdr_t * h);
+
+#define MAP_REC_EID_PLEN(h) ((mapping_record_hdr_t *)(h))->eid_prefix_length
+#define MAP_REC_LOC_COUNT(h) ((mapping_record_hdr_t *)(h))->locator_count
+#define MAP_REC_ACTION(h) ((mapping_record_hdr_t *)(h))->action
+#define MAP_REC_AUTH(h) ((mapping_record_hdr_t *)(h))->authoritative
+#define MAP_REC_TTL(h) ((mapping_record_hdr_t *)(h))->ttl
+#define MAP_REC_EID(h) (u8 *)(h)+sizeof(mapping_record_hdr_t)
+#define MAP_REC_VERSION(h) (h)->version_hi << 8 | (h)->version_low
+
+typedef enum
+{
+ LISP_NO_ACTION,
+ LISP_FORWARD_NATIVE,
+ LISP_SEND_MAP_REQUEST,
+ LISP_DROP
+} lisp_action_e;
+
+typedef enum lisp_authoritative
+{
+ A_NO_AUTHORITATIVE = 0,
+ A_AUTHORITATIVE
+} lisp_authoritative_e;
+
+/*
+ * LISP Canonical Address Format Encodings
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | AFI = 16387 | Rsvd1 | Flags |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Type | Rsvd2 | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+typedef struct _lcaf_hdr_t
+{
+ u8 reserved1;
+ u8 flags;
+ u8 type;
+ u8 reserved2;
+ u16 len;
+} __attribute__ ((__packed__)) lcaf_hdr_t;
+
+#define LCAF_TYPE(h) ((lcaf_hdr_t *)(h))->type
+#define LCAF_LENGTH(h) ((lcaf_hdr_t *)(h))->len
+#define LCAF_RES2(h) ((lcaf_hdr_t *)(h))->reserved2
+#define LCAF_FLAGS(h) ((lcaf_hdr_t *)(h))->flags
+#define LCAF_PAYLOAD(h) (u8 *)(h)+sizeof(lcaf_hdr_t)
+
+/*
+ * Source/Dest Key Canonical Address Format:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | Source-ML | Dest-ML |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct _lcaf_src_dst_hdr_t
+{
+ u16 reserved;
+ u8 src_mask_len;
+ u8 dst_mask_len;
+} __attribute__ ((__packed__)) lcaf_src_dst_hdr_t;
+
+#define LCAF_SD_SRC_ML(_h) (_h)->src_mask_len
+#define LCAF_SD_DST_ML(_h) (_h)->dst_mask_len
+
+/*
+ * SPI LCAF
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Service Path ID | Service index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct _lcaf_spi_hdr_t
+{
+ u32 spi_si;
+} __attribute__ ((__packed__)) lcaf_spi_hdr_t;
+
+#define LCAF_SPI_SI(_h) (_h)->spi_si
+
+/*
+ * The Map-Register message format is:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Type=3 |P| Reserved |M| Record Count |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Nonce . . . |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | . . . Nonce |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Key ID | Authentication Data Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ~ Authentication Data ~
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | Record TTL |
+ * | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * R | Locator Count | EID mask-len | ACT |A| Reserved |
+ * e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * c | Rsvd | Map-Version Number | EID-Prefix-AFI |
+ * o +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * r | EID-Prefix |
+ * d +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | /| Priority | Weight | M Priority | M Weight |
+ * | L +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | o | Unused Flags |L|p|R| Loc-AFI |
+ * | c +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | \| Locator |
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 res1:3;
+ u8 proxy_map_reply:1;
+ u8 type:4;
+#else
+ u8 type:4;
+ u8 proxy_map_reply:1;
+ u8 res1:3;
+#endif
+
+ u8 res2;
+
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 want_map_notify:1;
+ u8 res3:7;
+#else
+ u8 res3:7;
+ u8 want_map_notify:1;
+#endif
+
+ u8 record_count;
+ u64 nonce;
+ u16 key_id;
+ u16 auth_data_len;
+ u8 data[0];
+} __attribute__ ((__packed__)) map_register_hdr_t;
+
+#define MREG_TYPE(h_) (h_)->type
+#define MREG_HDR_CAST(h_) ((map_register_hdr_t *)(h_))
+#define MREG_PROXY_MR(h_) (MREG_HDR_CAST(h_))->proxy_map_reply
+#define MREG_WANT_MAP_NOTIFY(h_) (MREG_HDR_CAST(h_))->want_map_notify
+#define MREG_REC_COUNT(h_) (MREG_HDR_CAST(h_))->record_count
+#define MREG_NONCE(h_) (MREG_HDR_CAST(h_))->nonce
+#define MREG_KEY_ID(h_) (MREG_HDR_CAST(h_))->key_id
+#define MREG_AUTH_DATA_LEN(h_) (MREG_HDR_CAST(h_))->auth_data_len
+#define MREG_DATA(h_) (MREG_HDR_CAST(h_))->data
+
+/*
+ * The Map-Notify message format is:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Type=4 | Reserved | Record Count |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Nonce . . . |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | . . . Nonce |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Key ID | Authentication Data Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ~ Authentication Data ~
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | Record TTL |
+ * | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * R | Locator Count | EID mask-len | ACT |A| Reserved |
+ * e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * c | Rsvd | Map-Version Number | EID-Prefix-AFI |
+ * o +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * r | EID-Prefix |
+ * d +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | /| Priority | Weight | M Priority | M Weight |
+ * | L +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | o | Unused Flags |L|p|R| Loc-AFI |
+ * | c +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | \| Locator |
+ * +-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+typedef struct
+{
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 res1:4;
+ u8 type:4;
+#else
+ u8 type:4;
+ u8 res1:4;
+#endif
+
+ u16 res2;
+
+ u8 record_count;
+ u64 nonce;
+ u16 key_id;
+ u16 auth_data_len;
+ u8 data[0];
+} __attribute__ ((__packed__)) map_notify_hdr_t;
+
+#define MNOTIFY_TYPE(h_) (h_)->type
+#define MNOTIFY_HDR_CAST(h_) ((map_register_hdr_t *)(h_))
+#define MNOTIFY_REC_COUNT(h_) (MREG_HDR_CAST(h_))->record_count
+#define MNOTIFY_NONCE(h_) (MREG_HDR_CAST(h_))->nonce
+#define MNOTIFY_KEY_ID(h_) (MREG_HDR_CAST(h_))->key_id
+#define MNOTIFY_AUTH_DATA_LEN(h_) (MREG_HDR_CAST(h_))->auth_data_len
+#define MNOTIFY_DATA(h_) (MREG_HDR_CAST(h_))->data
+
+/*
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|O|C|R|R|R|R|R|R| Length | MD type=0x1 | Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Service Path Identifer | Service Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+typedef struct
+{
+ u32 header;
+ u32 spi_si;
+} __attribute__ ((__packed__)) lisp_nsh_hdr_t;
+
+#endif /* VNET_LISP_GPE_LISP_CP_MESSAGES_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_msg_serdes.c b/src/vnet/lisp-cp/lisp_msg_serdes.c
new file mode 100644
index 00000000..aee4f25e
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_msg_serdes.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/lisp_msg_serdes.h>
+#include <vnet/lisp-cp/packets.h>
+#include <vppinfra/time.h>
+
+void *lisp_msg_put_gid (vlib_buffer_t * b, gid_address_t * gid);
+
+static void
+lisp_msg_put_locators (vlib_buffer_t * b, locator_t * locators)
+{
+ locator_t *loc;
+
+ vec_foreach (loc, locators)
+ {
+ u8 *p = vlib_buffer_put_uninit (b, sizeof (locator_hdr_t));
+ memset (p, 0, sizeof (locator_hdr_t));
+ LOC_PRIORITY (p) = loc->priority;
+ LOC_MPRIORITY (p) = loc->mpriority;
+ LOC_WEIGHT (p) = loc->weight;
+ LOC_MWEIGHT (p) = loc->mweight;
+ LOC_LOCAL (p) = loc->local;
+ LOC_PROBED (p) = loc->probed ? 1 : 0;
+ lisp_msg_put_gid (b, &loc->address);
+ }
+}
+
+static void
+lisp_msg_put_mapping_record (vlib_buffer_t * b, mapping_t * record)
+{
+ mapping_record_hdr_t *p =
+ vlib_buffer_put_uninit (b, sizeof (mapping_record_hdr_t));
+ gid_address_t *eid = &record->eid;
+
+ memset (p, 0, sizeof (*p));
+ MAP_REC_EID_PLEN (p) = gid_address_len (eid);
+ MAP_REC_TTL (p) = clib_host_to_net_u32 (MAP_REGISTER_DEFAULT_TTL);
+ MAP_REC_AUTH (p) = record->authoritative ? 1 : 0;
+ MAP_REC_LOC_COUNT (p) = vec_len (record->locators);
+
+ lisp_msg_put_gid (b, eid);
+ lisp_msg_put_locators (b, record->locators);
+}
+
+static void
+lisp_msg_put_mreg_records (vlib_buffer_t * b, mapping_t * records)
+{
+ u32 i;
+ for (i = 0; i < vec_len (records); i++)
+ lisp_msg_put_mapping_record (b, &records[i]);
+}
+
+void *
+lisp_msg_put_gid (vlib_buffer_t * b, gid_address_t * gid)
+{
+ u8 *p = 0;
+ if (!gid)
+ {
+ /* insert only src-eid-afi field set to 0 */
+ p = vlib_buffer_put_uninit (b, sizeof (u16));
+ *(u16 *) p = 0;
+ }
+ else
+ {
+ p = vlib_buffer_put_uninit (b, gid_address_size_to_put (gid));
+ gid_address_put (p, gid);
+ }
+ return p;
+}
+
+static void *
+lisp_msg_put_itr_rlocs (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * rlocs, u8 * locs_put)
+{
+ u8 *bp, count = 0;
+ u32 i;
+
+ bp = vlib_buffer_get_current (b);
+ for (i = 0; i < vec_len (rlocs); i++)
+ {
+ lisp_msg_put_gid (b, &rlocs[i]);
+ count++;
+ }
+
+ *locs_put = count - 1;
+ return bp;
+}
+
+void *
+lisp_msg_put_eid_rec (vlib_buffer_t * b, gid_address_t * eid)
+{
+ eid_record_hdr_t *h = vlib_buffer_put_uninit (b, sizeof (*h));
+
+ memset (h, 0, sizeof (*h));
+ EID_REC_MLEN (h) = gid_address_len (eid);
+ lisp_msg_put_gid (b, eid);
+ return h;
+}
+
+u64
+nonce_build (u32 seed)
+{
+ u64 nonce;
+ u32 nonce_lower;
+ u32 nonce_upper;
+ struct timespec ts;
+
+ /* Put nanosecond clock in lower 32-bits and put an XOR of the nanosecond
+ * clock with the seond clock in the upper 32-bits. */
+ syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts);
+ nonce_lower = ts.tv_nsec;
+ nonce_upper = ts.tv_sec ^ clib_host_to_net_u32 (nonce_lower);
+
+ /* OR in a caller provided seed to the low-order 32-bits. */
+ nonce_lower |= seed;
+
+ /* Return 64-bit nonce. */
+ nonce = nonce_upper;
+ nonce = (nonce << 32) | nonce_lower;
+ return nonce;
+}
+
+void *
+lisp_msg_put_map_reply (vlib_buffer_t * b, mapping_t * records, u64 nonce,
+ u8 probe_bit)
+{
+ map_reply_hdr_t *h = vlib_buffer_put_uninit (b, sizeof (h[0]));
+
+ memset (h, 0, sizeof (h[0]));
+ MREP_TYPE (h) = LISP_MAP_REPLY;
+ MREP_NONCE (h) = nonce;
+ MREP_REC_COUNT (h) = 1;
+ MREP_RLOC_PROBE (h) = probe_bit;
+
+ lisp_msg_put_mreg_records (b, records);
+ return h;
+}
+
+void *
+lisp_msg_put_map_register (vlib_buffer_t * b, mapping_t * records,
+ u8 want_map_notify, u16 auth_data_len, u64 * nonce,
+ u32 * msg_len)
+{
+ u8 *auth_data = 0;
+
+ /* Basic header init */
+ map_register_hdr_t *h = vlib_buffer_put_uninit (b, sizeof (h[0]));
+
+ memset (h, 0, sizeof (h[0]));
+ MREG_TYPE (h) = LISP_MAP_REGISTER;
+ MREG_NONCE (h) = nonce_build (0);
+ MREG_WANT_MAP_NOTIFY (h) = want_map_notify ? 1 : 0;
+ MREG_REC_COUNT (h) = vec_len (records);
+
+ auth_data = vlib_buffer_put_uninit (b, auth_data_len);
+ memset (auth_data, 0, auth_data_len);
+
+ /* Put map register records */
+ lisp_msg_put_mreg_records (b, records);
+
+ nonce[0] = MREG_NONCE (h);
+ msg_len[0] = vlib_buffer_get_tail (b) - (u8 *) h;
+ return h;
+}
+
+void *
+lisp_msg_put_mreq (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * seid, gid_address_t * deid,
+ gid_address_t * rlocs, u8 is_smr_invoked,
+ u8 rloc_probe_set, u64 * nonce)
+{
+ u8 loc_count = 0;
+
+ /* Basic header init */
+ map_request_hdr_t *h = vlib_buffer_put_uninit (b, sizeof (h[0]));
+
+ memset (h, 0, sizeof (h[0]));
+ MREQ_TYPE (h) = LISP_MAP_REQUEST;
+ MREQ_NONCE (h) = nonce_build (0);
+ MREQ_SMR_INVOKED (h) = is_smr_invoked ? 1 : 0;
+ MREQ_RLOC_PROBE (h) = rloc_probe_set ? 1 : 0;
+
+ /* We're adding one eid record */
+ increment_record_count (h);
+
+ /* Fill source eid */
+ lisp_msg_put_gid (b, seid);
+
+ /* Put itr rlocs */
+ lisp_msg_put_itr_rlocs (lcm, b, rlocs, &loc_count);
+ MREQ_ITR_RLOC_COUNT (h) = loc_count;
+
+ /* Put eid record */
+ lisp_msg_put_eid_rec (b, deid);
+
+ nonce[0] = MREQ_NONCE (h);
+ return h;
+}
+
+void *
+lisp_msg_push_ecm (vlib_main_t * vm, vlib_buffer_t * b, int lp, int rp,
+ gid_address_t * la, gid_address_t * ra)
+{
+ ecm_hdr_t *h;
+ ip_address_t _src_ip, *src_ip = &_src_ip, _dst_ip, *dst_ip = &_dst_ip;
+ if (gid_address_type (la) != GID_ADDR_IP_PREFIX)
+ {
+ /* empty ip4 */
+ memset (src_ip, 0, sizeof (src_ip[0]));
+ memset (dst_ip, 0, sizeof (dst_ip[0]));
+ }
+ else
+ {
+ src_ip = &gid_address_ip (la);
+ dst_ip = &gid_address_ip (ra);
+ }
+
+ /* Push inner ip and udp */
+ pkt_push_udp_and_ip (vm, b, lp, rp, src_ip, dst_ip, 0);
+
+ /* Push lisp ecm hdr */
+ h = pkt_push_ecm_hdr (b);
+
+ return h;
+}
+
+static u32
+msg_type_to_hdr_len (lisp_msg_type_e type)
+{
+ switch (type)
+ {
+ case LISP_MAP_REQUEST:
+ return (sizeof (map_request_hdr_t));
+ case LISP_MAP_REPLY:
+ return (sizeof (map_reply_hdr_t));
+ default:
+ return (0);
+ }
+}
+
+void *
+lisp_msg_pull_hdr (vlib_buffer_t * b, lisp_msg_type_e type)
+{
+ return vlib_buffer_pull (b, msg_type_to_hdr_len (type));
+}
+
+u32
+lisp_msg_parse_addr (vlib_buffer_t * b, gid_address_t * eid)
+{
+ u32 len;
+ memset (eid, 0, sizeof (*eid));
+ len = gid_address_parse (vlib_buffer_get_current (b), eid);
+ if (len != ~0)
+ vlib_buffer_pull (b, len);
+ return len;
+}
+
+u32
+lisp_msg_parse_eid_rec (vlib_buffer_t * b, gid_address_t * eid)
+{
+ eid_record_hdr_t *h = vlib_buffer_get_current (b);
+ u32 len;
+ memset (eid, 0, sizeof (*eid));
+ len = gid_address_parse (EID_REC_ADDR (h), eid);
+ if (len == ~0)
+ return len;
+
+ gid_address_ippref_len (eid) = EID_REC_MLEN (h);
+ vlib_buffer_pull (b, len + sizeof (eid_record_hdr_t));
+
+ return len + sizeof (eid_record_hdr_t);
+}
+
+u32
+lisp_msg_parse_itr_rlocs (vlib_buffer_t * b, gid_address_t ** rlocs,
+ u8 rloc_count)
+{
+ gid_address_t tloc;
+ u32 i, len = 0, tlen = 0;
+
+ //MREQ_ITR_RLOC_COUNT(mreq_hdr) + 1
+ for (i = 0; i < rloc_count; i++)
+ {
+ len = lisp_msg_parse_addr (b, &tloc);
+ if (len == ~0)
+ return len;
+ vec_add1 (*rlocs, tloc);
+ tlen += len;
+ }
+ return tlen;
+}
+
+u32
+lisp_msg_parse_loc (vlib_buffer_t * b, locator_t * loc)
+{
+ int len;
+
+ len = locator_parse (vlib_buffer_get_current (b), loc);
+ if (len == ~0)
+ return ~0;
+
+ if (!vlib_buffer_has_space (b, sizeof (len)))
+ return ~0;
+ vlib_buffer_pull (b, len);
+
+ return len;
+}
+
+u32
+lisp_msg_parse_mapping_record (vlib_buffer_t * b, gid_address_t * eid,
+ locator_t ** locs, locator_t * probed_)
+{
+ void *h = 0, *loc_hdr = 0;
+ locator_t loc, *probed = 0;
+ int i = 0, len = 0, llen = 0;
+
+ h = vlib_buffer_get_current (b);
+ if (!vlib_buffer_has_space (b, sizeof (mapping_record_hdr_t)))
+ return ~0;
+
+ vlib_buffer_pull (b, sizeof (mapping_record_hdr_t));
+
+ memset (eid, 0, sizeof (*eid));
+ len = gid_address_parse (vlib_buffer_get_current (b), eid);
+ if (len == ~0)
+ return len;
+
+ if (!vlib_buffer_has_space (b, sizeof (len)))
+ return ~0;
+
+ vlib_buffer_pull (b, len);
+ if (GID_ADDR_IP_PREFIX == gid_address_type (eid))
+ gid_address_ippref_len (eid) = MAP_REC_EID_PLEN (h);
+
+ for (i = 0; i < MAP_REC_LOC_COUNT (h); i++)
+ {
+ loc_hdr = vlib_buffer_get_current (b);
+
+ llen = lisp_msg_parse_loc (b, &loc);
+ if (llen == ~0)
+ return llen;
+ vec_add1 (*locs, loc);
+ len += llen;
+
+ if (LOC_PROBED (loc_hdr))
+ {
+ if (probed != 0)
+ clib_warning
+ ("Multiple locators probed! Probing only the first!");
+ else
+ probed = &loc;
+ }
+ }
+ /* XXX */
+ if (probed_ != 0 && probed)
+ *probed_ = *probed;
+
+ return len + sizeof (map_reply_hdr_t);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_msg_serdes.h b/src/vnet/lisp-cp/lisp_msg_serdes.h
new file mode 100644
index 00000000..d794eff6
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_msg_serdes.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_LISP_GPE_LISP_MSG_BUILDER_H_
+#define VNET_LISP_GPE_LISP_MSG_BUILDER_H_
+
+#include <vnet/vnet.h>
+#include <vnet/lisp-cp/lisp_cp_messages.h>
+#include <vnet/lisp-cp/control.h>
+
+void *lisp_msg_put_mreq (lisp_cp_main_t * lcm, vlib_buffer_t * b,
+ gid_address_t * seid, gid_address_t * deid,
+ gid_address_t * rlocs, u8 is_smr_invoked,
+ u8 rloc_probe_set, u64 * nonce);
+
+void *lisp_msg_put_map_register (vlib_buffer_t * b, mapping_t * records,
+ u8 want_map_notify, u16 auth_data_len,
+ u64 * nonce, u32 * msg_len);
+
+void *lisp_msg_push_ecm (vlib_main_t * vm, vlib_buffer_t * b, int lp, int rp,
+ gid_address_t * la, gid_address_t * ra);
+
+void *lisp_msg_put_map_reply (vlib_buffer_t * b, mapping_t * record,
+ u64 nonce, u8 probe_bit);
+
+u32
+lisp_msg_parse_mapping_record (vlib_buffer_t * b, gid_address_t * eid,
+ locator_t ** locs, locator_t * probed_);
+
+u32 lisp_msg_parse_addr (vlib_buffer_t * b, gid_address_t * eid);
+
+u32 lisp_msg_parse_eid_rec (vlib_buffer_t * b, gid_address_t * eid);
+
+u32
+lisp_msg_parse_itr_rlocs (vlib_buffer_t * b, gid_address_t ** rlocs,
+ u8 rloc_count);
+
+#endif /* VNET_LISP_GPE_LISP_MSG_BUILDER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_types.c b/src/vnet/lisp-cp/lisp_types.c
new file mode 100644
index 00000000..05f046fa
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_types.c
@@ -0,0 +1,1779 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/lisp_types.h>
+
+static u16 gid_address_put_no_vni (u8 * b, gid_address_t * gid);
+static u16 gid_address_size_to_put_no_vni (gid_address_t * gid);
+static u16 fid_addr_size_to_write (fid_address_t * a);
+
+u32 mac_parse (u8 * offset, u8 * a);
+
+typedef u16 (*size_to_write_fct) (void *);
+typedef void *(*cast_fct) (gid_address_t *);
+typedef u16 (*serdes_fct) (u8 *, void *);
+typedef u8 (*addr_len_fct) (void *);
+typedef void (*copy_fct) (void *, void *);
+typedef void (*free_fct) (void *);
+typedef int (*cmp_fct) (void *, void *);
+
+size_to_write_fct size_to_write_fcts[GID_ADDR_TYPES] =
+ { ip_prefix_size_to_write, lcaf_size_to_write, mac_size_to_write,
+ sd_size_to_write, nsh_size_to_write, 0 /* arp */ , no_addr_size_to_write
+};
+
+serdes_fct write_fcts[GID_ADDR_TYPES] =
+ { ip_prefix_write, lcaf_write, mac_write, sd_write, nsh_write, 0 /* arp */ ,
+ no_addr_write
+};
+
+cast_fct cast_fcts[GID_ADDR_TYPES] =
+ { ip_prefix_cast, lcaf_cast, mac_cast, sd_cast, nsh_cast, 0 /* arp */ ,
+ no_addr_cast
+};
+
+addr_len_fct addr_len_fcts[GID_ADDR_TYPES] =
+ { ip_prefix_length, lcaf_length, mac_length, sd_length, nsh_length,
+ 0 /* arp */ , no_addr_length
+};
+
+copy_fct copy_fcts[GID_ADDR_TYPES] =
+ { ip_prefix_copy, lcaf_copy, mac_copy, sd_copy, nsh_copy, 0 /* arp */ ,
+ no_addr_copy
+};
+
+#define foreach_lcaf_type \
+ _(1, no_addr) \
+ _(0, NULL) \
+ _(1, vni) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(1, sd) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(0, NULL) \
+ _(1, nsh)
+
+#define _(cond, name) \
+ u16 name ## _write (u8 * p, void * a); \
+ u16 name ## _parse (u8 * p, void * a); \
+ u16 name ## _size_to_write (void * a); \
+ void name ## _free (void * a); \
+ void name ## _copy (void * dst, void * src); \
+ u8 name ## _length (void * a); \
+ int name ## _cmp (void *, void *);
+foreach_lcaf_type
+#undef _
+#define CONCAT(a,b) a##_##b
+#define IF(c, t, e) CONCAT(IF, c)(t, e)
+#define IF_0(t, e) e
+#define IF_1(t, e) t
+#define EXPAND_FCN(cond, fcn) \
+ IF(cond, fcn, NULL)
+ cmp_fct lcaf_cmp_fcts[LCAF_TYPES] =
+{
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_cmp),
+ foreach_lcaf_type
+#undef _
+};
+
+addr_len_fct lcaf_body_length_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_length),
+ foreach_lcaf_type
+#undef _
+};
+
+copy_fct lcaf_copy_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_copy),
+ foreach_lcaf_type
+#undef _
+};
+
+free_fct lcaf_free_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_free),
+ foreach_lcaf_type
+#undef _
+};
+
+size_to_write_fct lcaf_size_to_write_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_size_to_write),
+ foreach_lcaf_type
+#undef _
+};
+
+serdes_fct lcaf_write_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_write),
+ foreach_lcaf_type
+#undef _
+};
+
+serdes_fct lcaf_parse_fcts[LCAF_TYPES] = {
+#define _(cond, name) \
+ EXPAND_FCN(cond, name##_parse),
+ foreach_lcaf_type
+#undef _
+};
+
+u8 *
+format_ip_address (u8 * s, va_list * args)
+{
+ ip_address_t *a = va_arg (*args, ip_address_t *);
+ u8 ver = ip_addr_version (a);
+ if (ver == IP4)
+ {
+ return format (s, "%U", format_ip4_address, &ip_addr_v4 (a));
+ }
+ else if (ver == IP6)
+ {
+ return format (s, "%U", format_ip6_address, &ip_addr_v6 (a));
+ }
+ else
+ {
+ clib_warning ("Can't format IP version %d!", ver);
+ return 0;
+ }
+}
+
+uword
+unformat_ip_address (unformat_input_t * input, va_list * args)
+{
+ ip_address_t *a = va_arg (*args, ip_address_t *);
+
+ memset (a, 0, sizeof (*a));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
+ ip_addr_version (a) = IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
+ ip_addr_version (a) = IP6;
+ else
+ return 0;
+ return 1;
+}
+
+u8 *
+format_ip_prefix (u8 * s, va_list * args)
+{
+ ip_prefix_t *a = va_arg (*args, ip_prefix_t *);
+ return format (s, "%U/%d", format_ip_address, &ip_prefix_addr (a),
+ ip_prefix_len (a));
+}
+
+uword
+unformat_ip_prefix (unformat_input_t * input, va_list * args)
+{
+ ip_prefix_t *a = va_arg (*args, ip_prefix_t *);
+ if (unformat (input, "%U/%d", unformat_ip_address, &ip_prefix_addr (a),
+ &ip_prefix_len (a)))
+ {
+ if ((ip_prefix_version (a) == IP4 && 32 < ip_prefix_len (a)) ||
+ (ip_prefix_version (a) == IP6 && 128 < ip_prefix_length (a)))
+ {
+ clib_warning ("Prefix length to big: %d!", ip_prefix_len (a));
+ return 0;
+ }
+ ip_prefix_normalize (a);
+ }
+ else
+ return 0;
+ return 1;
+}
+
+uword
+unformat_mac_address (unformat_input_t * input, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return unformat (input, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5]);
+}
+
+u8 *
+format_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+uword
+unformat_nsh_address (unformat_input_t * input, va_list * args)
+{
+ nsh_t *a = va_arg (*args, nsh_t *);
+ return unformat (input, "SPI:%d SI:%d", &a->spi, &a->si);
+}
+
+u8 *
+format_nsh_address (u8 * s, va_list * args)
+{
+ nsh_t *a = va_arg (*args, nsh_t *);
+ return format (s, "SPI:%d SI:%d", a->spi, a->si);
+}
+
+u8 *
+format_fid_nsh_address (u8 * s, va_list * args)
+{
+ u32 *a = va_arg (*args, u32 *);
+ return format (s, "SPI:%d SI:%d", *a >> 8, *a & 0xff);
+}
+
+u8 *
+format_fid_address (u8 * s, va_list * args)
+{
+ fid_address_t *a = va_arg (*args, fid_address_t *);
+
+ switch (fid_addr_type (a))
+ {
+ case FID_ADDR_IP_PREF:
+ return format (s, "%U", format_ip_prefix, &fid_addr_ippref (a));
+ case FID_ADDR_MAC:
+ return format (s, "%U", format_mac_address, &fid_addr_mac (a));
+ case FID_ADDR_NSH:
+ return format (s, "%U", format_fid_nsh_address, &fid_addr_nsh (a));
+
+ default:
+ clib_warning ("Can't format fid address type %d!", fid_addr_type (a));
+ return 0;
+ }
+ return 0;
+}
+
+u8 *
+format_gid_address (u8 * s, va_list * args)
+{
+ gid_address_t *a = va_arg (*args, gid_address_t *);
+ u8 type = gid_address_type (a);
+ switch (type)
+ {
+ case GID_ADDR_IP_PREFIX:
+ return format (s, "[%d] %U", gid_address_vni (a), format_ip_prefix,
+ &gid_address_ippref (a));
+ case GID_ADDR_SRC_DST:
+ return format (s, "[%d] %U|%U", gid_address_vni (a),
+ format_fid_address, &gid_address_sd_src (a),
+ format_fid_address, &gid_address_sd_dst (a));
+ case GID_ADDR_MAC:
+ return format (s, "[%d] %U", gid_address_vni (a), format_mac_address,
+ &gid_address_mac (a));
+ case GID_ADDR_ARP:
+ case GID_ADDR_NDP:
+ return format (s, "[%d, %U]", gid_address_arp_ndp_bd (a),
+ format_ip_address, &gid_address_arp_ndp_ip (a));
+ case GID_ADDR_NSH:
+ return format (s, "%U", format_nsh_address, &gid_address_nsh (a));
+
+ default:
+ clib_warning ("Can't format gid type %d", type);
+ return 0;
+ }
+ return 0;
+}
+
+uword
+unformat_fid_address (unformat_input_t * i, va_list * args)
+{
+ fid_address_t *a = va_arg (*args, fid_address_t *);
+ ip_prefix_t ippref;
+ u8 mac[6] = { 0 };
+ nsh_t nsh;
+
+ if (unformat (i, "%U", unformat_ip_prefix, &ippref))
+ {
+ fid_addr_type (a) = FID_ADDR_IP_PREF;
+ ip_prefix_copy (&fid_addr_ippref (a), &ippref);
+ }
+ else if (unformat (i, "%U", unformat_mac_address, mac))
+ {
+ fid_addr_type (a) = FID_ADDR_MAC;
+ mac_copy (fid_addr_mac (a), mac);
+ }
+ else if (unformat (i, "%U", unformat_nsh_address, &nsh))
+ {
+ fid_addr_type (a) = FID_ADDR_NSH;
+ nsh_copy (&fid_addr_nsh (a), &nsh);
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+uword
+unformat_hmac_key_id (unformat_input_t * input, va_list * args)
+{
+ u32 *key_id = va_arg (*args, u32 *);
+ u8 *s = 0;
+
+ if (unformat (input, "%s", &s))
+ {
+ if (!strcmp ((char *) s, "sha1"))
+ key_id[0] = HMAC_SHA_1_96;
+ else if (!strcmp ((char *) s, "sha256"))
+ key_id[0] = HMAC_SHA_256_128;
+ else
+ {
+ clib_warning ("invalid key_id: '%s'", s);
+ key_id[0] = HMAC_NO_KEY;
+ }
+ }
+ else
+ return 0;
+
+ vec_free (s);
+ return 1;
+}
+
+uword
+unformat_gid_address (unformat_input_t * input, va_list * args)
+{
+ gid_address_t *a = va_arg (*args, gid_address_t *);
+ u8 mac[6] = { 0 };
+ ip_prefix_t ippref;
+ fid_address_t sim1, sim2;
+ nsh_t nsh;
+
+ memset (&ippref, 0, sizeof (ippref));
+ memset (&sim1, 0, sizeof (sim1));
+ memset (&sim2, 0, sizeof (sim2));
+
+ if (unformat (input, "%U|%U", unformat_fid_address, &sim1,
+ unformat_fid_address, &sim2))
+ {
+ gid_address_sd_src (a) = sim1;
+ gid_address_sd_dst (a) = sim2;
+ gid_address_type (a) = GID_ADDR_SRC_DST;
+ }
+ else if (unformat (input, "%U", unformat_ip_prefix, &ippref))
+ {
+ ip_prefix_copy (&gid_address_ippref (a), &ippref);
+ gid_address_type (a) = GID_ADDR_IP_PREFIX;
+ }
+ else if (unformat (input, "%U", unformat_mac_address, mac))
+ {
+ mac_copy (gid_address_mac (a), mac);
+ gid_address_type (a) = GID_ADDR_MAC;
+ }
+ else if (unformat (input, "%U", unformat_nsh_address, &nsh))
+ {
+ nsh_copy (&gid_address_nsh (a), &nsh);
+ gid_address_type (a) = GID_ADDR_NSH;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+uword
+unformat_negative_mapping_action (unformat_input_t * input, va_list * args)
+{
+ u32 *action = va_arg (*args, u32 *);
+ u8 *s = 0;
+
+ if (unformat (input, "%s", &s))
+ {
+ if (!strcmp ((char *) s, "no-action"))
+ action[0] = LISP_NO_ACTION;
+ else if (!strcmp ((char *) s, "natively-forward"))
+ action[0] = LISP_FORWARD_NATIVE;
+ else if (!strcmp ((char *) s, "send-map-request"))
+ action[0] = LISP_SEND_MAP_REQUEST;
+ else if (!strcmp ((char *) s, "drop"))
+ action[0] = LISP_DROP;
+ else
+ {
+ clib_warning ("invalid action: '%s'", s);
+ action[0] = LISP_DROP;
+ }
+ }
+ else
+ return 0;
+
+ vec_free (s);
+ return 1;
+}
+
+u8 *
+format_hmac_key_id (u8 * s, va_list * args)
+{
+ lisp_key_type_t key_id = va_arg (*args, lisp_key_type_t);
+
+ switch (key_id)
+ {
+ case HMAC_SHA_1_96:
+ return format (0, "sha1");
+ case HMAC_SHA_256_128:
+ return format (0, "sha256");
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+u8 *
+format_negative_mapping_action (u8 * s, va_list * args)
+{
+ lisp_action_e action = va_arg (*args, lisp_action_e);
+
+ switch (action)
+ {
+ case LISP_NO_ACTION:
+ s = format (s, "no-action");
+ break;
+ case LISP_FORWARD_NATIVE:
+ s = format (s, "natively-forward");
+ break;
+ case LISP_SEND_MAP_REQUEST:
+ s = format (s, "send-map-request");
+ break;
+ case LISP_DROP:
+ default:
+ s = format (s, "drop");
+ break;
+ }
+ return (s);
+}
+
+u16
+ip_address_size (const ip_address_t * a)
+{
+ switch (ip_addr_version (a))
+ {
+ case IP4:
+ return sizeof (ip4_address_t);
+ break;
+ case IP6:
+ return sizeof (ip6_address_t);
+ break;
+ }
+ return 0;
+}
+
+u16
+ip_version_to_size (u8 ver)
+{
+ switch (ver)
+ {
+ case IP4:
+ return sizeof (ip4_address_t);
+ break;
+ case IP6:
+ return sizeof (ip6_address_t);
+ break;
+ }
+ return 0;
+}
+
+u8
+ip_version_to_max_plen (u8 ver)
+{
+ switch (ver)
+ {
+ case IP4:
+ return 32;
+ break;
+ case IP6:
+ return 128;
+ break;
+ }
+ return 0;
+}
+
+always_inline lisp_afi_e
+ip_version_to_iana_afi (u16 version)
+{
+ switch (version)
+ {
+ case IP4:
+ return LISP_AFI_IP;
+ case IP6:
+ return LISP_AFI_IP6;
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+always_inline u8
+ip_iana_afi_to_version (lisp_afi_e afi)
+{
+ switch (afi)
+ {
+ case LISP_AFI_IP:
+ return IP4;
+ case LISP_AFI_IP6:
+ return IP6;
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+u16
+ip_address_size_to_write (ip_address_t * a)
+{
+ return ip_address_size (a) + sizeof (u16);
+}
+
+u16
+ip_address_iana_afi (ip_address_t * a)
+{
+ return ip_version_to_iana_afi (ip_addr_version (a));
+}
+
+u8
+ip_address_max_len (u8 version)
+{
+ return version == IP4 ? 32 : 128;
+}
+
+u16
+ip4_address_size_to_put ()
+{
+ // return sizeof(u16) + sizeof (ip4_address_t);
+ return 6;
+}
+
+u16
+ip6_address_size_to_put ()
+{
+ //return sizeof(u16) + sizeof (ip6_address_t);
+ return 18;
+}
+
+u32
+ip4_address_put (u8 * b, ip4_address_t * a)
+{
+ *(u16 *) b = clib_host_to_net_u16 (ip_version_to_iana_afi (IP4));
+ u8 *p = b + sizeof (u16);
+ clib_memcpy (p, a, sizeof (*a));
+ return ip4_address_size_to_put ();
+}
+
+u32
+ip6_address_put (u8 * b, ip6_address_t * a)
+{
+ *(u16 *) b = clib_host_to_net_u16 (ip_version_to_iana_afi (IP6));
+ u8 *p = b + sizeof (u16);
+ clib_memcpy (p, a, sizeof (*a));
+ return ip6_address_size_to_put ();
+}
+
+u32
+ip_address_put (u8 * b, ip_address_t * a)
+{
+ u32 len = ip_address_size (a);
+ *(u16 *) b = clib_host_to_net_u16 (ip_address_iana_afi (a));
+ u8 *p = b + sizeof (u16);
+ clib_memcpy (p, &ip_addr_addr (a), len);
+ return (len + sizeof (u16));
+}
+
+u32
+ip_address_parse (void *offset, u16 iana_afi, ip_address_t * dst)
+{
+ ip_addr_version (dst) = ip_iana_afi_to_version (iana_afi);
+ u8 size = ip_version_to_size (ip_addr_version (dst));
+ clib_memcpy (&ip_addr_addr (dst), offset + sizeof (u16), size);
+ return (sizeof (u16) + size);
+}
+
+void
+gid_to_dp_address (gid_address_t * g, dp_address_t * d)
+{
+ switch (gid_address_type (g))
+ {
+ case GID_ADDR_SRC_DST:
+ switch (gid_address_sd_dst_type (g))
+ {
+ case FID_ADDR_IP_PREF:
+ ip_prefix_copy (&d->ippref, &gid_address_sd_dst_ippref (g));
+ d->type = FID_ADDR_IP_PREF;
+ break;
+ case FID_ADDR_MAC:
+ mac_copy (&d->mac, &gid_address_sd_dst_mac (g));
+ d->type = FID_ADDR_MAC;
+ break;
+ default:
+ clib_warning ("Source/Dest address type %d not supported!",
+ gid_address_sd_dst_type (g));
+ break;
+ }
+ break;
+ case GID_ADDR_IP_PREFIX:
+ ip_prefix_copy (&d->ippref, &gid_address_ippref (g));
+ d->type = FID_ADDR_IP_PREF;
+ break;
+ case GID_ADDR_MAC:
+ mac_copy (&d->mac, &gid_address_mac (g));
+ d->type = FID_ADDR_MAC;
+ break;
+ case GID_ADDR_NSH:
+ default:
+ d->nsh = gid_address_nsh (g).spi << 8 | gid_address_nsh (g).si;
+ d->type = FID_ADDR_NSH;
+ break;
+ }
+}
+
+u32
+lcaf_hdr_parse (void *offset, lcaf_t * lcaf)
+{
+ lcaf_hdr_t *lh = offset;
+ lcaf->type = lh->type;
+
+ /* this is a bit of hack: since the LCAF Instance ID is the
+ only message that uses reserved2 field, we can set it here.
+ If any LCAF format starts using reserved2 field as well this needs
+ to be moved elsewhere */
+ lcaf_vni_len (lcaf) = lh->reserved2;
+
+ return sizeof (lh[0]);
+}
+
+static u8
+iana_afi_to_fid_addr_type (u16 type)
+{
+ switch (type)
+ {
+ case LISP_AFI_IP:
+ case LISP_AFI_IP6:
+ return FID_ADDR_IP_PREF;
+
+ case LISP_AFI_MAC:
+ return FID_ADDR_MAC;
+ }
+ return ~0;
+}
+
+static u16
+fid_addr_parse (u8 * p, fid_address_t * a)
+{
+ u16 afi = clib_net_to_host_u16 (*(u16 *) p);
+ fid_addr_type (a) = iana_afi_to_fid_addr_type (afi);
+ ip_address_t *ip_addr = &ip_prefix_addr (&fid_addr_ippref (a));
+
+ switch (fid_addr_type (a))
+ {
+ case FID_ADDR_MAC:
+ return mac_parse (p, fid_addr_mac (a));
+
+ case FID_ADDR_IP_PREF:
+ return ip_address_parse (p, afi, ip_addr);
+
+ case FID_ADDR_NSH:
+ break;
+ }
+ return ~0;
+}
+
+#define INC(dst, exp) \
+do { \
+ u16 _sum = (exp); \
+ if ((u16)~0 == _sum) \
+ return ~0; \
+ dst += _sum; \
+} while (0);
+
+void
+nsh_free (void *a)
+{
+ /* nothing to do */
+}
+
+u16
+nsh_parse (u8 * p, void *a)
+{
+ lcaf_spi_hdr_t *h = (lcaf_spi_hdr_t *) p;
+ gid_address_t *g = a;
+
+ gid_address_type (g) = GID_ADDR_NSH;
+ gid_address_nsh_spi (g) = clib_net_to_host_u32 (LCAF_SPI_SI (h)) >> 8;
+ gid_address_nsh_si (g) = (u8) clib_net_to_host_u32 (LCAF_SPI_SI (h));
+
+ return sizeof (lcaf_spi_hdr_t);
+}
+
+int
+nsh_cmp (void *a1, void *a2)
+{
+ nsh_t *n1 = a1;
+ nsh_t *n2 = a2;
+
+ if (n1->spi != n2->spi)
+ return 1;
+ if (n1->si != n2->si)
+ return 1;
+ return 0;
+}
+
+u16
+sd_parse (u8 * p, void *a)
+{
+ lcaf_src_dst_hdr_t *sd_hdr;
+ gid_address_t *g = a;
+ u16 size = 0;
+ fid_address_t *src = &gid_address_sd_src (g);
+ fid_address_t *dst = &gid_address_sd_dst (g);
+
+ gid_address_type (g) = GID_ADDR_SRC_DST;
+
+ sd_hdr = (lcaf_src_dst_hdr_t *) (p + size);
+ size += sizeof (sd_hdr[0]);
+
+ INC (size, fid_addr_parse (p + size, src));
+ INC (size, fid_addr_parse (p + size, dst));
+
+ if (fid_addr_type (src) == FID_ADDR_IP_PREF)
+ {
+ ip_prefix_t *ippref = &fid_addr_ippref (src);
+ ip_prefix_len (ippref) = LCAF_SD_SRC_ML (sd_hdr);
+ }
+ if (fid_addr_type (dst) == FID_ADDR_IP_PREF)
+ {
+ ip_prefix_t *ippref = &fid_addr_ippref (dst);
+ ip_prefix_len (ippref) = LCAF_SD_DST_ML (sd_hdr);
+ }
+ return size;
+}
+
+u16
+try_parse_src_dst_lcaf (u8 * p, gid_address_t * a)
+{
+ lcaf_t lcaf;
+ u16 size = sizeof (u16); /* skip AFI */
+
+ size += lcaf_hdr_parse (p + size, &lcaf);
+
+ if (LCAF_SOURCE_DEST != lcaf_type (&lcaf))
+ return ~0;
+
+ INC (size, sd_parse (p + size, a));
+ return size;
+}
+
+u16
+vni_parse (u8 * p, void *a)
+{
+ lcaf_t *lcaf = a;
+ gid_address_t *g = a;
+ u16 size = 0;
+
+ gid_address_vni (g) = clib_net_to_host_u32 (*(u32 *) p);
+ size += sizeof (u32);
+ gid_address_vni_mask (g) = lcaf_vni_len (lcaf);
+
+ /* nested LCAFs are not supported except of src/dst with vni - to handle
+ * such case look at the next AFI and process src/dest LCAF separately */
+ u16 afi = clib_net_to_host_u16 (*((u16 *) (p + size)));
+ if (LISP_AFI_LCAF == afi)
+ {
+ INC (size, try_parse_src_dst_lcaf (p + size, g));
+ }
+ else
+ INC (size, gid_address_parse (p + size, g));
+
+ return size;
+}
+
+u16
+no_addr_parse (u8 * p, void *a)
+{
+ /* do nothing */
+ return 0;
+}
+
+u32
+lcaf_parse (void *offset, gid_address_t * addr)
+{
+ /* skip AFI type */
+ offset += sizeof (u16);
+ lcaf_t *lcaf = &gid_address_lcaf (addr);
+
+ u32 size = lcaf_hdr_parse (offset, lcaf);
+ u8 type = lcaf_type (lcaf);
+
+ if (!lcaf_parse_fcts[type])
+ {
+ clib_warning ("Unsupported LCAF type: %u", type);
+ return ~0;
+ }
+ INC (size, (*lcaf_parse_fcts[type]) (offset + size, lcaf));
+ return sizeof (u16) + size;
+}
+
+void
+vni_free (void *a)
+{
+ vni_t *v = a;
+ gid_address_free (vni_gid (v));
+ clib_mem_free (vni_gid (v));
+}
+
+void
+no_addr_free (void *a)
+{
+ /* nothing to do */
+}
+
+void
+sd_free (void *a)
+{
+ /* nothing */
+}
+
+void
+gid_address_free (gid_address_t * a)
+{
+ if (gid_address_type (a) != GID_ADDR_LCAF)
+ return;
+
+ lcaf_t *lcaf = &gid_address_lcaf (a);
+ u8 lcaf_type = lcaf_type (lcaf);
+ (*lcaf_free_fcts[lcaf_type]) (lcaf);
+}
+
+void
+gid_address_from_ip (gid_address_t * g, ip_address_t * ip)
+{
+ memset (g, 0, sizeof (g[0]));
+ ip_address_set (&gid_address_ip (g), ip, ip_addr_version (ip));
+ gid_address_ippref_len (g) = 32;
+}
+
+int
+ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2)
+{
+ int res = 0;
+ if (ip_addr_version (ip1) != ip_addr_version (ip2))
+ return -1;
+ res =
+ memcmp (&ip_addr_addr (ip1), &ip_addr_addr (ip2), ip_address_size (ip1));
+
+ if (res < 0)
+ res = 2;
+ else if (res > 0)
+ res = 1;
+
+ return res;
+}
+
+void
+ip_address_copy (ip_address_t * dst, const ip_address_t * src)
+{
+ if (IP4 == ip_addr_version (src))
+ {
+ /* don't copy any garbe from the union */
+ memset (dst, 0, sizeof (*dst));
+ dst->ip.v4 = src->ip.v4;
+ dst->version = IP4;
+ }
+ else
+ {
+ clib_memcpy (dst, src, sizeof (ip_address_t));
+ }
+}
+
+void
+ip_address_copy_addr (void *dst, const ip_address_t * src)
+{
+ clib_memcpy (dst, src, ip_address_size (src));
+}
+
+void
+ip_address_set (ip_address_t * dst, const void *src, u8 version)
+{
+ clib_memcpy (dst, src, ip_version_to_size (version));
+ ip_addr_version (dst) = version;
+}
+
+void
+ip_address_to_46 (const ip_address_t * addr,
+ ip46_address_t * a, fib_protocol_t * proto)
+{
+ *proto = (IP4 == ip_addr_version (addr) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ switch (*proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip46_address_set_ip4 (a, &addr->ip.v4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ a->ip6 = addr->ip.v6;
+ break;
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+static void
+ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen)
+{
+ u32 mask = ~0;
+
+ ASSERT (ip4);
+
+ if (32 <= preflen)
+ {
+ return;
+ }
+
+ mask = pow2_mask (preflen) << (32 - preflen);
+ mask = clib_host_to_net_u32 (mask);
+ ip4->data_u32 &= mask;
+}
+
+static void
+ip_prefix_normalize_ip6 (ip6_address_t * ip6, u8 preflen)
+{
+ u8 mask_6[16];
+ u32 *m;
+ u8 j, i0, i1;
+
+ ASSERT (ip6);
+
+ memset (mask_6, 0, sizeof (mask_6));
+
+ if (128 <= preflen)
+ {
+ return;
+ }
+
+ i1 = preflen % 32;
+ i0 = preflen / 32;
+ m = (u32 *) & mask_6[0];
+
+ for (j = 0; j < i0; j++)
+ {
+ m[j] = ~0;
+ }
+
+ if (i1)
+ {
+ m[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ for (j = 0; j < sizeof (mask_6); j++)
+ {
+ ip6->as_u8[j] &= mask_6[j];
+ }
+}
+
+void
+ip_prefix_normalize (ip_prefix_t * a)
+{
+ u8 preflen = ip_prefix_len (a);
+
+ switch (ip_prefix_version (a))
+ {
+ case IP4:
+ ip_prefix_normalize_ip4 (&ip_prefix_v4 (a), preflen);
+ break;
+
+ case IP6:
+ ip_prefix_normalize_ip6 (&ip_prefix_v6 (a), preflen);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+}
+
+void *
+ip_prefix_cast (gid_address_t * a)
+{
+ return &gid_address_ippref (a);
+}
+
+u16
+ip_prefix_size_to_write (void *pref)
+{
+ ip_prefix_t *a = (ip_prefix_t *) pref;
+ return ip_address_size_to_write (&ip_prefix_addr (a));
+}
+
+u16
+ip_prefix_write (u8 * p, void *gid)
+{
+ gid_address_t *g = gid;
+ ip_prefix_t *a = &gid_address_ippref (g);
+
+ switch (ip_prefix_version (a))
+ {
+ case IP4:
+ return ip4_address_put (p, &ip_prefix_v4 (a));
+ break;
+ case IP6:
+ return ip6_address_put (p, &ip_prefix_v6 (a));
+ break;
+ }
+ return 0;
+}
+
+u8
+ip_prefix_length (void *a)
+{
+ return ip_prefix_len ((ip_prefix_t *) a);
+}
+
+void
+ip_prefix_copy (void *dst, void *src)
+{
+ clib_memcpy (dst, src, sizeof (ip_prefix_t));
+}
+
+void
+mac_copy (void *dst, void *src)
+{
+ clib_memcpy (dst, src, 6);
+}
+
+void
+sd_copy (void *dst, void *src)
+{
+ clib_memcpy (dst, src, sizeof (source_dest_t));
+}
+
+void
+nsh_copy (void *dst, void *src)
+{
+ clib_memcpy (dst, src, sizeof (nsh_t));
+}
+
+int
+ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
+{
+ int cmp = 0;
+
+ ip_prefix_normalize (p1);
+ ip_prefix_normalize (p2);
+
+ cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2));
+ if (cmp == 0)
+ {
+ if (ip_prefix_len (p1) < ip_prefix_len (p2))
+ {
+ cmp = 1;
+ }
+ else
+ {
+ if (ip_prefix_len (p1) > ip_prefix_len (p2))
+ cmp = 2;
+ }
+ }
+ return cmp;
+}
+
+void
+no_addr_copy (void *dst, void *src)
+{
+ /* nothing to do */
+}
+
+void
+vni_copy (void *dst, void *src)
+{
+ vni_t *vd = dst;
+ vni_t *vs = src;
+
+ clib_memcpy (vd, vs, sizeof (vd[0]));
+ vni_gid (vd) = clib_mem_alloc (sizeof (gid_address_t));
+ gid_address_copy (vni_gid (vd), vni_gid (vs));
+}
+
+void
+lcaf_copy (void *dst, void *src)
+{
+ lcaf_t *lcaf_dst = dst;
+ lcaf_t *lcaf_src = src;
+
+ lcaf_type (lcaf_dst) = lcaf_type (lcaf_src);
+ (*lcaf_copy_fcts[lcaf_type (lcaf_src)]) (dst, src);
+}
+
+u8
+lcaf_length (void *a)
+{
+ return 0;
+}
+
+u8
+mac_length (void *a)
+{
+ return 0;
+}
+
+u8
+sd_length (void *a)
+{
+ return 0;
+}
+
+u8
+nsh_length (void *a)
+{
+ return 0;
+}
+
+void *
+lcaf_cast (gid_address_t * a)
+{
+ return &gid_address_lcaf (a);
+}
+
+void *
+mac_cast (gid_address_t * a)
+{
+ return &gid_address_mac (a);
+}
+
+void *
+no_addr_cast (gid_address_t * a)
+{
+ return (void *) a;
+}
+
+void *
+sd_cast (gid_address_t * a)
+{
+ return &gid_address_sd (a);
+}
+
+void *
+nsh_cast (gid_address_t * a)
+{
+ return &gid_address_nsh (a);
+}
+
+u8
+no_addr_length (void *a)
+{
+ return 0;
+}
+
+u8
+vni_length (void *a)
+{
+ vni_t *v = a;
+ return (sizeof (u32) /* VNI size */
+ + gid_address_size_to_put (vni_gid (v)) /* vni body size */ );
+}
+
+u16
+lcaf_write (u8 * p, void *a)
+{
+ u16 size = 0, len;
+ lcaf_t *lcaf = a;
+ u8 type = lcaf_type (lcaf);
+ lcaf_hdr_t _h, *h = &_h;
+
+ *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_LCAF);
+ size += sizeof (u16);
+ memset (h, 0, sizeof (h[0]));
+ LCAF_TYPE (h) = type;
+ u16 lcaf_len = (*lcaf_body_length_fcts[type]) (lcaf);
+ LCAF_LENGTH (h) = clib_host_to_net_u16 (lcaf_len);
+
+ clib_memcpy (p + size, h, sizeof (h[0]));
+ size += sizeof (h[0]);
+ len = (*lcaf_write_fcts[type]) (p + size, lcaf);
+
+ if ((u16) ~ 0 == len)
+ return ~0;
+
+ return size + len;
+}
+
+u16
+mac_write (u8 * p, void *a)
+{
+ *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_MAC);
+ clib_memcpy (p + sizeof (u16), a, 6);
+ return mac_size_to_write (a);
+}
+
+static u16
+fid_addr_write (u8 * p, fid_address_t * a)
+{
+ switch (fid_addr_type (a))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_prefix_write (p, &fid_addr_ippref (a));
+
+ case FID_ADDR_MAC:
+ return mac_write (p, &fid_addr_mac (a));
+
+ default:
+ return ~0;
+ }
+ return ~0;
+}
+
+static u8
+fid_address_length (fid_address_t * a)
+{
+ switch (fid_addr_type (a))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_prefix_length (&fid_addr_ippref (a));
+ case FID_ADDR_MAC:
+ return 0;
+ case FID_ADDR_NSH:
+ return 0;
+ }
+ return 0;
+}
+
+u16
+sd_write (u8 * p, void *a)
+{
+ source_dest_t *sd = a;
+ u16 size = 0;
+ lcaf_hdr_t _h, *h = &_h;
+ lcaf_src_dst_hdr_t sd_hdr;
+
+ *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_LCAF);
+ size += sizeof (u16);
+ memset (h, 0, sizeof (h[0]));
+ LCAF_TYPE (h) = LCAF_SOURCE_DEST;
+ u16 lcaf_len = sizeof (lcaf_src_dst_hdr_t)
+ + fid_addr_size_to_write (&sd_src (sd))
+ + fid_addr_size_to_write (&sd_dst (sd));
+ LCAF_LENGTH (h) = clib_host_to_net_u16 (lcaf_len);
+
+ clib_memcpy (p + size, h, sizeof (h[0]));
+ size += sizeof (h[0]);
+
+ memset (&sd_hdr, 0, sizeof (sd_hdr));
+ LCAF_SD_SRC_ML (&sd_hdr) = fid_address_length (&sd_src (sd));
+ LCAF_SD_DST_ML (&sd_hdr) = fid_address_length (&sd_dst (sd));
+ clib_memcpy (p + size, &sd_hdr, sizeof (sd_hdr));
+ size += sizeof (sd_hdr);
+
+ u16 len = fid_addr_write (p + size, &sd_src (sd));
+ if ((u16) ~ 0 == len)
+ return ~0;
+ size += len;
+
+ len = fid_addr_write (p + size, &sd_dst (sd));
+ if ((u16) ~ 0 == len)
+ return ~0;
+ size += len;
+
+ return size;
+}
+
+u16
+nsh_write (u8 * p, void *a)
+{
+ lcaf_spi_hdr_t spi;
+ lcaf_hdr_t lcaf;
+ gid_address_t *g = a;
+ u16 size = 0;
+
+ ASSERT (gid_address_type (g) == GID_ADDR_NSH);
+
+ memset (&lcaf, 0, sizeof (lcaf));
+ memset (&spi, 0, sizeof (spi));
+
+ LCAF_TYPE (&lcaf) = LCAF_NSH;
+ LCAF_LENGTH (&lcaf) = clib_host_to_net_u16 (sizeof (lcaf_spi_hdr_t));
+
+ u32 s = clib_host_to_net_u32 (gid_address_nsh_spi (g) << 8 |
+ gid_address_nsh_si (g));
+ LCAF_SPI_SI (&spi) = s;
+
+ *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_LCAF);
+ size += sizeof (u16);
+
+ clib_memcpy (p + size, &lcaf, sizeof (lcaf));
+ size += sizeof (lcaf);
+
+ clib_memcpy (p + size, &spi, sizeof (spi));
+ size += sizeof (spi);
+
+ return size;
+}
+
+u16
+vni_write (u8 * p, void *a)
+{
+ lcaf_hdr_t _h, *h = &_h;
+ gid_address_t *g = a;
+ u16 size = 0, len;
+
+ /* put lcaf header */
+ *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_LCAF);
+ size += sizeof (u16);
+ memset (h, 0, sizeof (h[0]));
+ LCAF_TYPE (h) = LCAF_INSTANCE_ID;
+ u16 lcaf_len = sizeof (u32) /* Instance ID size */
+ + gid_address_size_to_put_no_vni (g);
+ LCAF_LENGTH (h) = clib_host_to_net_u16 (lcaf_len);
+ LCAF_RES2 (h) = gid_address_vni_mask (g);
+
+ /* put vni header */
+ clib_memcpy (p + size, h, sizeof (h[0]));
+ size += sizeof (h[0]);
+
+ u32 *afip = (u32 *) (p + size);
+ afip[0] = clib_host_to_net_u32 (gid_address_vni (g));
+ size += sizeof (u32);
+
+ if (GID_ADDR_SRC_DST == gid_address_type (g))
+ /* write src/dst LCAF */
+ {
+ len = sd_write (p + size, g);
+ if ((u16) ~ 0 == len)
+ return ~0;
+ }
+ else
+ /* write the actual address */
+ len = gid_address_put_no_vni (p + size, g);
+
+ if ((u16) ~ 0 == len)
+ return ~0;
+
+ return size + len;
+}
+
+u16
+no_addr_write (u8 * p, void *a)
+{
+ /* do nothing; return AFI field size */
+ return sizeof (u16);
+}
+
+u16
+no_addr_size_to_write (void *a)
+{
+ return sizeof (u16); /* AFI field length */
+}
+
+static u16
+fid_addr_size_to_write (fid_address_t * a)
+{
+ switch (fid_addr_type (a))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_prefix_size_to_write (a);
+
+ case FID_ADDR_MAC:
+ return mac_size_to_write (a);
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+u16
+vni_size_to_write (void *a)
+{
+ gid_address_t *g = a;
+
+ u16 lcaf_size = sizeof (u32) + sizeof (u16) /* LCAF AFI field size */
+ + sizeof (lcaf_hdr_t);
+
+ if (gid_address_type (g) == GID_ADDR_SRC_DST)
+ /* special case where nested LCAF is supported */
+ return lcaf_size + sd_size_to_write (g);
+ else
+ return lcaf_size + gid_address_size_to_put_no_vni (g);
+}
+
+u16
+lcaf_size_to_write (void *a)
+{
+ lcaf_t *lcaf = (lcaf_t *) a;
+ u32 size = 0, len;
+ u8 type = lcaf_type (lcaf);
+
+ size += sizeof (u16); /* AFI size */
+
+ len = (*lcaf_size_to_write_fcts[type]) (lcaf);
+ if (~0 == len)
+ return ~0;
+
+ return size + len;
+}
+
+u16
+sd_size_to_write (void *a)
+{
+ source_dest_t *sd = a;
+ return sizeof (u16)
+ + sizeof (lcaf_hdr_t)
+ + sizeof (lcaf_src_dst_hdr_t)
+ + fid_addr_size_to_write (&sd_src (sd))
+ + fid_addr_size_to_write (&sd_dst (sd));
+}
+
+u16
+mac_size_to_write (void *a)
+{
+ return sizeof (u16) + 6;
+}
+
+u16
+nsh_size_to_write (void *a)
+{
+ return sizeof (u16) + sizeof (lcaf_hdr_t) + sizeof (lcaf_spi_hdr_t);
+}
+
+u8
+gid_address_len (gid_address_t * a)
+{
+ gid_address_type_t type = gid_address_type (a);
+ return (*addr_len_fcts[type]) ((*cast_fcts[type]) (a));
+}
+
+static u16
+gid_address_put_no_vni (u8 * b, gid_address_t * gid)
+{
+ gid_address_type_t type = gid_address_type (gid);
+ return (*write_fcts[type]) (b, (*cast_fcts[type]) (gid));
+}
+
+u16
+gid_address_put (u8 * b, gid_address_t * gid)
+{
+ if (0 != gid_address_vni (gid))
+ return vni_write (b, gid);
+
+ return gid_address_put_no_vni (b, gid);
+}
+
+static u16
+gid_address_size_to_put_no_vni (gid_address_t * gid)
+{
+ gid_address_type_t type = gid_address_type (gid);
+ return (*size_to_write_fcts[type]) ((*cast_fcts[type]) (gid));
+}
+
+u16
+gid_address_size_to_put (gid_address_t * gid)
+{
+ if (0 != gid_address_vni (gid))
+ return vni_size_to_write (gid);
+
+ return gid_address_size_to_put_no_vni (gid);
+}
+
+void *
+gid_address_cast (gid_address_t * gid, gid_address_type_t type)
+{
+ return (*cast_fcts[type]) (gid);
+}
+
+void
+gid_address_copy (gid_address_t * dst, gid_address_t * src)
+{
+ gid_address_type_t type = gid_address_type (src);
+ (*copy_fcts[type]) ((*cast_fcts[type]) (dst), (*cast_fcts[type]) (src));
+ gid_address_type (dst) = type;
+ gid_address_vni (dst) = gid_address_vni (src);
+ gid_address_vni_mask (dst) = gid_address_vni_mask (src);
+}
+
+u32
+mac_parse (u8 * offset, u8 * a)
+{
+ /* skip AFI field */
+ offset += sizeof (u16);
+
+ clib_memcpy (a, offset, 6);
+ return sizeof (u16) + 6;
+}
+
+u32
+gid_address_parse (u8 * offset, gid_address_t * a)
+{
+ lisp_afi_e afi;
+ u16 len = 0;
+
+ ASSERT (a);
+
+ /* NOTE: since gid_address_parse may be called by vni_parse, we can't 0
+ * the gid address here */
+ afi = clib_net_to_host_u16 (*((u16 *) offset));
+
+ switch (afi)
+ {
+ case LISP_AFI_NO_ADDR:
+ len = sizeof (u16);
+ gid_address_type (a) = GID_ADDR_NO_ADDRESS;
+ break;
+ case LISP_AFI_IP:
+ len = ip_address_parse (offset, afi, &gid_address_ip (a));
+ gid_address_type (a) = GID_ADDR_IP_PREFIX;
+ /* this should be modified outside if needed */
+ gid_address_ippref_len (a) = 32;
+ break;
+ case LISP_AFI_IP6:
+ len = ip_address_parse (offset, afi, &gid_address_ip (a));
+ gid_address_type (a) = GID_ADDR_IP_PREFIX;
+ /* this should be modified outside if needed */
+ gid_address_ippref_len (a) = 128;
+ break;
+ case LISP_AFI_LCAF:
+ gid_address_type (a) = GID_ADDR_LCAF;
+ len = lcaf_parse (offset, a);
+ break;
+ case LISP_AFI_MAC:
+ len = mac_parse (offset, gid_address_mac (a));
+ gid_address_type (a) = GID_ADDR_MAC;
+ break;
+ default:
+ clib_warning ("LISP AFI %d not supported!", afi);
+ return ~0;
+ }
+ return (len == (u16) ~ 0) ? ~0 : len;
+}
+
+void
+gid_address_ip_set (gid_address_t * dst, void *src, u8 version)
+{
+ gid_address_ippref_len (dst) = ip_address_max_len (version);
+ ip_address_set (&gid_address_ip (dst), src, version);
+}
+
+int
+no_addr_cmp (void *a1, void *a2)
+{
+ return 0;
+}
+
+int
+vni_cmp (void *a1, void *a2)
+{
+ vni_t *v1 = a1;
+ vni_t *v2 = a2;
+
+ if (vni_mask_len (v1) != vni_mask_len (v2))
+ return -1;
+ if (vni_vni (v1) != vni_vni (v2))
+ return -1;
+ return gid_address_cmp (vni_gid (v1), vni_gid (v2));
+}
+
+static int
+mac_cmp (void *a1, void *a2)
+{
+ return memcmp (a1, a2, 6);
+}
+
+static int
+fid_addr_cmp (fid_address_t * a1, fid_address_t * a2)
+{
+ if (fid_addr_type (a1) != fid_addr_type (a2))
+ return -1;
+
+ switch (fid_addr_type (a1))
+ {
+ case FID_ADDR_IP_PREF:
+ return ip_prefix_cmp (&fid_addr_ippref (a1), &fid_addr_ippref (a2));
+
+ case FID_ADDR_MAC:
+ return mac_cmp (fid_addr_mac (a1), fid_addr_mac (a2));
+
+ default:
+ return -1;
+ }
+ return -1;
+}
+
+int
+sd_cmp (void *a1, void *a2)
+{
+ source_dest_t *sd1 = a1;
+ source_dest_t *sd2 = a2;
+
+ if (fid_addr_cmp (&sd_dst (sd1), &sd_dst (sd2)))
+ return -1;
+ if (fid_addr_cmp (&sd_src (sd1), &sd_src (sd2)))
+ return -1;
+ return 0;
+}
+
+/* Compare two gid_address_t.
+ * Returns:
+ * -1: If they are from different afi
+ * 0: Both address are the same
+ * 1: Addr1 is bigger than addr2
+ * 2: Addr2 is bigger than addr1
+ */
+int
+gid_address_cmp (gid_address_t * a1, gid_address_t * a2)
+{
+ lcaf_t *lcaf1, *lcaf2;
+ int cmp = -1;
+ if (!a1 || !a2)
+ return -1;
+ if (gid_address_type (a1) != gid_address_type (a2))
+ return -1;
+ if (gid_address_vni (a1) != gid_address_vni (a2))
+ return -1;
+
+ /* TODO vni mask is not supported, disable comparing for now
+ if (gid_address_vni_mask (a1) != gid_address_vni_mask (a2))
+ return -1;
+ */
+
+ switch (gid_address_type (a1))
+ {
+ case GID_ADDR_NO_ADDRESS:
+ if (a1 == a2)
+ cmp = 0;
+ else
+ cmp = 2;
+ break;
+ case GID_ADDR_IP_PREFIX:
+ cmp =
+ ip_prefix_cmp (&gid_address_ippref (a1), &gid_address_ippref (a2));
+ break;
+ case GID_ADDR_LCAF:
+ lcaf1 = &gid_address_lcaf (a1);
+ lcaf2 = &gid_address_lcaf (a2);
+ if (lcaf_type (lcaf1) == lcaf_type (lcaf2))
+ cmp = (*lcaf_cmp_fcts[lcaf_type (lcaf1)]) (lcaf1, lcaf2);
+ break;
+ case GID_ADDR_MAC:
+ cmp = mac_cmp (gid_address_mac (a1), gid_address_mac (a2));
+ break;
+
+ case GID_ADDR_SRC_DST:
+ cmp = sd_cmp (&gid_address_sd (a1), &gid_address_sd (a2));
+ break;
+ case GID_ADDR_NSH:
+ cmp = nsh_cmp (&gid_address_nsh (a1), &gid_address_nsh (a2));
+ break;
+ default:
+ break;
+ }
+
+ return cmp;
+}
+
+u32
+locator_parse (void *b, locator_t * loc)
+{
+ locator_hdr_t *h;
+ u8 status = 1; /* locator up */
+ int len;
+
+ h = b;
+ if (!LOC_REACHABLE (h) && LOC_LOCAL (h))
+ status = 0;
+
+ len = gid_address_parse (LOC_ADDR (h), &loc->address);
+ if (len == ~0)
+ return len;
+
+ loc->state = status;
+ loc->local = 0;
+ loc->priority = LOC_PRIORITY (h);
+ loc->weight = LOC_WEIGHT (h);
+ loc->mpriority = LOC_MPRIORITY (h);
+ loc->mweight = LOC_MWEIGHT (h);
+
+ return sizeof (locator_hdr_t) + len;
+}
+
+void
+locator_copy (locator_t * dst, locator_t * src)
+{
+ /* TODO if gid become more complex, this will need to be changed! */
+ clib_memcpy (dst, src, sizeof (*dst));
+ if (!src->local)
+ gid_address_copy (&dst->address, &src->address);
+}
+
+u32
+locator_cmp (locator_t * l1, locator_t * l2)
+{
+ u32 ret = 0;
+ if ((ret = gid_address_cmp (&l1->address, &l2->address)) != 0)
+ return 1;
+
+ if (l1->priority != l2->priority)
+ return 1;
+ if (l1->weight != l2->weight)
+ return 1;
+ if (l1->mpriority != l2->mpriority)
+ return 1;
+ if (l1->mweight != l2->mweight)
+ return 1;
+ return 0;
+}
+
+void
+locator_free (locator_t * l)
+{
+ if (!l->local)
+ gid_address_free (&l->address);
+}
+
+void
+build_src_dst (gid_address_t * sd, gid_address_t * src, gid_address_t * dst)
+{
+ memset (sd, 0, sizeof (*sd));
+ gid_address_type (sd) = GID_ADDR_SRC_DST;
+ gid_address_vni (sd) = gid_address_vni (dst);
+ gid_address_vni_mask (sd) = gid_address_vni_mask (dst);
+
+ switch (gid_address_type (dst))
+ {
+ case GID_ADDR_IP_PREFIX:
+ gid_address_sd_src_type (sd) = FID_ADDR_IP_PREF;
+ gid_address_sd_dst_type (sd) = FID_ADDR_IP_PREF;
+ ip_prefix_copy (&gid_address_sd_src_ippref (sd),
+ &gid_address_ippref (src));
+ ip_prefix_copy (&gid_address_sd_dst_ippref (sd),
+ &gid_address_ippref (dst));
+ break;
+ case GID_ADDR_MAC:
+ gid_address_sd_src_type (sd) = FID_ADDR_MAC;
+ gid_address_sd_dst_type (sd) = FID_ADDR_MAC;
+ mac_copy (gid_address_sd_src_mac (sd), gid_address_mac (src));
+ mac_copy (gid_address_sd_dst_mac (sd), gid_address_mac (dst));
+ break;
+ default:
+ clib_warning ("Unsupported gid type %d while conversion!",
+ gid_address_type (dst));
+ break;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/lisp_types.h b/src/vnet/lisp-cp/lisp_types.h
new file mode 100644
index 00000000..4a919e79
--- /dev/null
+++ b/src/vnet/lisp-cp/lisp_types.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_LISP_GPE_LISP_TYPES_H_
+#define VNET_LISP_GPE_LISP_TYPES_H_
+
+#include <vnet/ip/ip.h>
+#include <vnet/lisp-cp/lisp_cp_messages.h>
+
+#define SHA1_AUTH_DATA_LEN 20
+#define SHA256_AUTH_DATA_LEN 32
+
+typedef enum
+{
+ HMAC_NO_KEY = 0,
+ HMAC_SHA_1_96,
+ HMAC_SHA_256_128
+} lisp_key_type_t;
+
+uword unformat_hmac_key_id (unformat_input_t * input, va_list * args);
+u8 *format_hmac_key_id (u8 * s, va_list * args);
+
+typedef enum
+{
+ IP4,
+ IP6
+} ip_address_type_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct ip_address
+{
+ union
+ {
+ ip4_address_t v4;
+ ip6_address_t v6;
+ } ip;
+ u8 version;
+}) ip_address_t;
+/* *INDENT-ON* */
+
+#define ip_addr_addr(_a) (_a)->ip
+#define ip_addr_v4(_a) (_a)->ip.v4
+#define ip_addr_v6(_a) (_a)->ip.v6
+#define ip_addr_version(_a) (_a)->version
+
+int ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2);
+void ip_address_copy (ip_address_t * dst, const ip_address_t * src);
+void ip_address_copy_addr (void *dst, const ip_address_t * src);
+void ip_address_set (ip_address_t * dst, const void *src, u8 version);
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct ip_prefix
+{
+ ip_address_t addr;
+ u8 len;
+}) ip_prefix_t;
+/* *INDENT-ON* */
+
+#define ip_prefix_addr(_a) (_a)->addr
+#define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a))
+#define ip_prefix_len(_a) (_a)->len
+#define ip_prefix_v4(_a) ip_addr_v4(&ip_prefix_addr(_a))
+#define ip_prefix_v6(_a) ip_addr_v6(&ip_prefix_addr(_a))
+
+void ip_prefix_normalize (ip_prefix_t * a);
+
+extern void ip_address_to_fib_prefix (const ip_address_t * addr,
+ fib_prefix_t * prefix);
+extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp,
+ fib_prefix_t * fibp);
+
+typedef enum
+{
+ /* NOTE: ip addresses are left out on purpose. Use max masked ip-prefixes
+ * instead */
+ GID_ADDR_IP_PREFIX,
+ GID_ADDR_LCAF,
+ GID_ADDR_MAC,
+ GID_ADDR_SRC_DST,
+ GID_ADDR_NSH,
+ GID_ADDR_ARP,
+ GID_ADDR_NDP,
+ GID_ADDR_NO_ADDRESS,
+ GID_ADDR_TYPES
+} gid_address_type_t;
+
+typedef enum
+{
+ /* make sure that values corresponds with RFC */
+ LCAF_NULL_BODY = 0,
+ LCAF_AFI_LIST_TYPE,
+ LCAF_INSTANCE_ID,
+ LCAF_SOURCE_DEST = 12,
+ LCAF_NSH = 17,
+ LCAF_TYPES
+} lcaf_type_t;
+
+typedef enum fid_addr_type_t_
+{
+ FID_ADDR_IP_PREF,
+ FID_ADDR_MAC,
+ FID_ADDR_NSH
+} __attribute__ ((packed)) fid_addr_type_t;
+
+/* flat address type */
+typedef struct
+{
+ union
+ {
+ ip_prefix_t ippref;
+ u8 mac[6];
+ u32 nsh;
+ };
+ fid_addr_type_t type;
+} fid_address_t;
+
+typedef fid_address_t dp_address_t;
+
+#define fid_addr_ippref(_a) (_a)->ippref
+#define fid_addr_prefix_length(_a) ip_prefix_len(&fid_addr_ippref(_a))
+#define fid_addr_ip_version(_a) ip_prefix_version(&fid_addr_ippref(_a))
+#define fid_addr_mac(_a) (_a)->mac
+#define fid_addr_nsh(_a) (_a)->nsh
+#define fid_addr_type(_a) (_a)->type
+u8 *format_fid_address (u8 * s, va_list * args);
+
+typedef struct
+{
+ fid_address_t src;
+ fid_address_t dst;
+} source_dest_t;
+
+#define sd_dst(_a) (_a)->dst
+#define sd_src(_a) (_a)->src
+#define sd_src_ippref(_a) fid_addr_ippref(&sd_src(_a))
+#define sd_dst_ippref(_a) fid_addr_ippref(&sd_dst(_a))
+#define sd_src_mac(_a) fid_addr_mac(&sd_src(_a))
+#define sd_dst_mac(_a) fid_addr_mac(&sd_dst(_a))
+#define sd_src_type(_a) fid_addr_type(&sd_src(_a))
+#define sd_dst_type(_a) fid_addr_type(&sd_dst(_a))
+
+typedef struct
+{
+ u8 vni_mask_len;
+ u32 vni;
+ struct _gid_address_t *gid_addr;
+} vni_t;
+
+#define vni_vni(_a) (_a)->vni
+#define vni_mask_len(_a) (_a)->vni_mask_len
+#define vni_gid(_a) (_a)->gid_addr
+
+typedef struct
+{
+ u32 spi;
+ u8 si;
+} nsh_t;
+
+#define nsh_spi(_a) (_a)->spi
+#define nsh_si(_a) (_a)->si
+
+typedef struct
+{
+ ip_address_t addr;
+ u32 bd;
+} lcaf_arp_ndp_t;
+
+#define lcaf_arp_ndp_ip(_a) (_a)->addr
+#define lcaf_arp_ndp_ip_ver(_a) ip_addr_version(&lcaf_arp_ndp_ip(_a))
+#define lcaf_arp_ndp_ip4(_a) ip_addr_v4(&lcaf_arp_ndp_ip(_a))
+#define lcaf_arp_ndp_ip6(_a) ip_addr_v6(&lcaf_arp_ndp_ip(_a))
+#define lcaf_arp_ndp_bd(_a) (_a)->bd
+
+typedef struct
+{
+ /* the union needs to be at the beginning! */
+ union
+ {
+ source_dest_t sd;
+ lcaf_arp_ndp_t arp_ndp;
+ vni_t uni;
+ };
+ u8 type;
+} lcaf_t;
+
+#define lcaf_type(_a) (_a)->type
+#define lcaf_vni(_a) vni_vni(& (_a)->uni)
+#define lcaf_vni_len(_a) vni_mask_len(& (_a)->uni)
+
+/* might want to expand this in the future :) */
+typedef struct _gid_address_t
+{
+ union
+ {
+ ip_prefix_t ippref;
+ lcaf_t lcaf;
+ u8 mac[6];
+ source_dest_t sd;
+ lcaf_arp_ndp_t arp_ndp;
+ nsh_t nsh;
+ };
+ u8 type;
+ u32 vni;
+ u8 vni_mask;
+} gid_address_t;
+
+u8 *format_ip_address (u8 * s, va_list * args);
+uword unformat_ip_address (unformat_input_t * input, va_list * args);
+u8 *format_ip_prefix (u8 * s, va_list * args);
+uword unformat_ip_prefix (unformat_input_t * input, va_list * args);
+u8 *format_mac_address (u8 * s, va_list * args);
+uword unformat_mac_address (unformat_input_t * input, va_list * args);
+
+u16 ip4_address_size_to_put ();
+u16 ip6_address_size_to_put ();
+u32 ip4_address_put (u8 * b, ip4_address_t * a);
+u32 ip6_address_put (u8 * b, ip6_address_t * a);
+
+u16 ip_address_size_to_write (ip_address_t * a);
+u16 ip_address_iana_afi (ip_address_t * a);
+u8 ip_address_max_len (u8 ver);
+u32 ip_address_put (u8 * b, ip_address_t * a);
+void ip_address_to_46 (const ip_address_t * addr,
+ ip46_address_t * a, fib_protocol_t * proto);
+
+/* LISP AFI codes */
+typedef enum
+{
+ LISP_AFI_NO_ADDR,
+ LISP_AFI_IP,
+ LISP_AFI_IP6,
+ LISP_AFI_LCAF = 16387,
+ LISP_AFI_MAC = 16389
+} lisp_afi_e;
+
+u8 *format_gid_address (u8 * s, va_list * args);
+uword unformat_gid_address (unformat_input_t * input, va_list * args);
+int gid_address_cmp (gid_address_t * a1, gid_address_t * a2);
+void gid_address_free (gid_address_t * a);
+
+u16 gid_address_size_to_put (gid_address_t * a);
+u16 gid_address_put (u8 * b, gid_address_t * gid);
+u8 gid_address_len (gid_address_t * a);
+void *gid_address_cast (gid_address_t * gid, gid_address_type_t type);
+void gid_address_copy (gid_address_t * dst, gid_address_t * src);
+u32 gid_address_parse (u8 * offset, gid_address_t * a);
+void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
+
+#define gid_address_type(_a) (_a)->type
+#define gid_address_ippref(_a) (_a)->ippref
+#define gid_address_ippref_len(_a) (_a)->ippref.len
+#define gid_address_ip(_a) ip_prefix_addr(&gid_address_ippref(_a))
+#define gid_address_ip_version(_a) ip_addr_version(&gid_address_ip(_a))
+#define gid_address_lcaf(_a) (_a)->lcaf
+#define gid_address_mac(_a) (_a)->mac
+#define gid_address_nsh(_a) (_a)->nsh
+#define gid_address_nsh_spi(_a) nsh_spi(&gid_address_nsh(_a))
+#define gid_address_nsh_si(_a) nsh_si(&gid_address_nsh(_a))
+#define gid_address_vni(_a) (_a)->vni
+#define gid_address_vni_mask(_a) (_a)->vni_mask
+#define gid_address_sd_dst_ippref(_a) sd_dst_ippref(&(_a)->sd)
+#define gid_address_sd_src_ippref(_a) sd_src_ippref(&(_a)->sd)
+#define gid_address_sd_dst_mac(_a) sd_dst_mac(&(_a)->sd)
+#define gid_address_sd_src_mac(_a) sd_src_mac(&(_a)->sd)
+#define gid_address_sd(_a) (_a)->sd
+#define gid_address_sd_src(_a) sd_src(&gid_address_sd(_a))
+#define gid_address_sd_dst(_a) sd_dst(&gid_address_sd(_a))
+#define gid_address_sd_src_type(_a) sd_src_type(&gid_address_sd(_a))
+#define gid_address_sd_dst_type(_a) sd_dst_type(&gid_address_sd(_a))
+#define gid_address_arp_ndp(_a) (_a)->arp_ndp
+#define gid_address_arp_ndp_bd(_a) lcaf_arp_ndp_bd(&gid_address_arp_ndp(_a))
+#define gid_address_arp_ndp_ip(_a) lcaf_arp_ndp_ip(&gid_address_arp_ndp(_a))
+#define gid_address_arp_ip4(_a) lcaf_arp_ndp_ip4(&gid_address_arp_ndp(_a))
+#define gid_address_ndp_ip6(_a) lcaf_arp_ndp_ip6(&gid_address_arp_ndp(_a))
+#define gid_address_ndp_bd gid_address_arp_ndp_bd
+#define gid_address_arp_bd gid_address_arp_ndp_bd
+
+/* 'sub'address functions */
+#define foreach_gid_address_type_fcns \
+ _(no_addr) \
+ _(ip_prefix) \
+ _(lcaf) \
+ _(mac) \
+ _(nsh) \
+ _(sd)
+
+/* *INDENT-OFF* */
+#define _(_n) \
+u16 _n ## _size_to_write (void * pref); \
+u16 _n ## _write (u8 * p, void * pref); \
+u8 _n ## _length (void *a); \
+void * _n ## _cast (gid_address_t * a); \
+void _n ## _copy (void * dst , void * src);
+
+foreach_gid_address_type_fcns
+#undef _
+/* *INDENT-ON* */
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+ return (*((u64 *) m) & 0xffffffffffff);
+}
+
+typedef struct
+{
+ /* mark locator as local as opposed to remote */
+ u8 local;
+ u8 state;
+ union
+ {
+ u32 sw_if_index;
+ gid_address_t address;
+ };
+ u8 priority;
+ u8 weight;
+ u8 mpriority;
+ u8 mweight;
+ u8 probed;
+} locator_t;
+
+u32 locator_parse (void *ptr, locator_t * loc);
+void locator_copy (locator_t * dst, locator_t * src);
+u32 locator_cmp (locator_t * l1, locator_t * l2);
+void locator_free (locator_t * l);
+
+typedef struct
+{
+ /* locator-set name */
+ u8 *name;
+
+ /* vector of locator indices */
+ u32 *locator_indices;
+ u8 local;
+} locator_set_t;
+
+typedef struct
+{
+ gid_address_t eid;
+
+ /* index of local locator set */
+ union
+ {
+ u32 locator_set_index;
+ locator_t *locators; /* used for map register message */
+ };
+
+ u32 ttl;
+ u8 action;
+
+ u8 authoritative:1;
+ u8 local:1;
+ /* valid only for remote mappings */
+ u8 is_static:1;
+ u8 pitr_set:1;
+ u8 nsh_set:1;
+ u8 almost_expired:1;
+ u8 delete_after_expiration:1;
+ u8 rsvd:1;
+
+ u8 *key;
+ lisp_key_type_t key_id;
+ u8 timer_set;
+ counter_t packets;
+} mapping_t;
+
+uword
+unformat_negative_mapping_action (unformat_input_t * input, va_list * args);
+u8 *format_negative_mapping_action (u8 *, va_list * args);
+
+typedef struct locator_pair
+{
+ /* local and remote locators (underlay attachment points) */
+ ip_address_t lcl_loc;
+ ip_address_t rmt_loc;
+
+ u8 priority;
+ u8 weight;
+} locator_pair_t;
+
+void
+build_src_dst (gid_address_t * sd, gid_address_t * src, gid_address_t * dst);
+
+void gid_address_from_ip (gid_address_t * g, ip_address_t * ip);
+void gid_to_dp_address (gid_address_t * g, dp_address_t * d);
+
+#endif /* VNET_LISP_GPE_LISP_TYPES_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api
new file mode 100644
index 00000000..39f2802d
--- /dev/null
+++ b/src/vnet/lisp-cp/one.api
@@ -0,0 +1,1110 @@
+/*
+ * Copyright (c) 2015-2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+typeonly manual_print manual_endian define one_local_locator
+{
+ u32 sw_if_index;
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief add or delete locator_set
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - locator name
+ @param locator_num - number of locators
+ @param locators - locator records
+*/
+manual_endian manual_print define one_add_del_locator_set
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+ u32 locator_num;
+ vl_api_one_local_locator_t locators[locator_num];
+};
+
+/** \brief Reply for locator_set add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param ls_index - locator set index
+*/
+define one_add_del_locator_set_reply
+{
+ u32 context;
+ i32 retval;
+ u32 ls_index;
+};
+
+/** \brief add or delete locator for locator set
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - name of locator_set to add/del locator
+ @param sw_if_index - index of the interface
+ @param priority - priority of the locator
+ @param weight - weight of the locator
+*/
+autoreply define one_add_del_locator
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+ u32 sw_if_index;
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief add or delete ONE eid-table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param eid_type:
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param eid - EID can be ip4, ip6 or mac
+ @param prefix_len - prefix len
+ @param locator_set_name - name of locator_set to add/del eid-table
+ @param vni - virtual network instance
+ @param key_id
+ HMAC_NO_KEY 0
+ HMAC_SHA_1_96 1
+ HMAC_SHA_256_128 2
+ @param key - secret key
+*/
+autoreply define one_add_del_local_eid
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 eid_type;
+ u8 eid[16];
+ u8 prefix_len;
+ u8 locator_set_name[64];
+ u32 vni;
+ u16 key_id;
+ u8 key[64];
+};
+
+/** \brief Set TTL for map register messages
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ttl - time to live
+*/
+autoreply define one_map_register_set_ttl
+{
+ u32 client_index;
+ u32 context;
+ u32 ttl;
+};
+
+/** \brief Get TTL for map register messages
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_map_register_ttl
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Contains current TTL for map register messages
+ @param client_index - opaque cookie to identify the sender
+ @param retval - return code
+ @param ttl - time to live
+*/
+define show_one_map_register_ttl_reply
+{
+ u32 context;
+ i32 retval;
+ u32 ttl;
+};
+
+/** \brief Add/delete map server
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero; delete otherwise
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - map server IP address
+*/
+autoreply define one_add_del_map_server
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief add or delete map-resolver
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+*/
+autoreply define one_add_del_map_resolver
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief enable or disable ONE feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_en - enable protocol if non-zero, else disable
+*/
+autoreply define one_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+/** \brief configure or delete ONE NSH mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ls_name - locator set name
+ @param is_add - add locator set if non-zero; delete otherwise
+*/
+autoreply define one_nsh_set_locator_set
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 ls_name[64];
+};
+
+/** \brief configure or disable ONE PITR node
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ls_name - locator set name
+ @param is_add - add locator set if non-zero, else disable pitr
+*/
+autoreply define one_pitr_set_locator_set
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 ls_name[64];
+};
+
+/** \brief configure or disable use of PETR
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - Address is IPv4 if set and IPv6 otherwise
+ @param address - PETR IP address
+ @param is_add - add locator set if non-zero, else disable PETR
+*/
+autoreply define one_use_petr
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 address[16];
+ u8 is_add;
+};
+
+/** \brief Request for ONE PETR status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_use_petr
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief ONE PETR status, enable or disable
+ @param context - sender context, to match reply w/ request
+ @param status - ONE PETR enable if non-zero, else disable
+ @param is_ip4 - Address is IPv4 if non-zero, else IPv6
+ @param address - PETR IP address
+*/
+define show_one_use_petr_reply
+{
+ u32 context;
+ i32 retval;
+ u8 status;
+ u8 is_ip4;
+ u8 address[16];
+};
+
+/** \brief Get state of ONE RLOC probing
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_rloc_probe_state
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_one_rloc_probe_state
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param is_enabled - state of RLOC probing
+*/
+define show_one_rloc_probe_state_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_enabled;
+};
+
+/** \brief enable/disable ONE RLOC probing
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_enable - enable if non-zero; disable otherwise
+*/
+autoreply define one_rloc_probe_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_enabled;
+};
+
+/** \brief enable/disable ONE map-register
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_enable - enable if non-zero; disable otherwise
+*/
+autoreply define one_map_register_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_enabled;
+};
+
+/** \brief Get state of ONE map-register
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_map_register_state
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_one_map_register_state
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define show_one_map_register_state_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_enabled;
+};
+
+/** \brief set ONE map-request mode. Based on configuration VPP will send
+ src/dest or just normal destination map requests.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - new map-request mode. Supported values are:
+ 0 - destination only
+ 1 - source/destaination
+*/
+autoreply define one_map_request_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 mode;
+};
+
+/** \brief Request for ONE map-request mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_map_request_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for show_one_map_request_mode
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param mode - map-request mode
+*/
+define show_one_map_request_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 mode;
+};
+
+typeonly manual_endian manual_print define one_remote_locator
+{
+ u8 is_ip4;
+ u8 priority;
+ u8 weight;
+ u8 addr[16];
+};
+
+/** \brief add or delete remote static mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param is_src_dst - flag indicating src/dst based routing policy
+ @param del_all - if set, delete all remote mappings
+ @param vni - virtual network instance
+ @param action - negative map-reply action
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ 3 : NSH : both information (service path ID and service index) are
+ encoded in 'eid' field in a following way:
+
+ |4 B |1 B |
+ -----------
+ |SPI | SI |
+ @param deid - dst EID
+ @param seid - src EID, valid only if is_src_dst is enabled
+ @param rloc_num - number of remote locators
+ @param rlocs - remote locator records
+*/
+autoreply manual_print manual_endian define one_add_del_remote_mapping
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_src_dst;
+ u8 del_all;
+ u32 vni;
+ u8 action;
+ u8 eid_type;
+ u8 eid[16];
+ u8 eid_len;
+ u8 seid[16];
+ u8 seid_len;
+ u32 rloc_num;
+ vl_api_one_remote_locator_t rlocs[rloc_num];
+};
+
+/** \brief Add/delete L2 ARP entries
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add if non-zero; delete otherwise
+ @param bd - bridge domain
+ @param mac - MAC address
+ @param ip4 - IPv4 address
+*/
+autoreply define one_add_del_l2_arp_entry
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 mac[6];
+ u32 bd;
+ u32 ip4;
+};
+
+/** \brief Request for L2 ARP entries from specified bridge domain
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd - bridge domain
+*/
+define one_l2_arp_entries_get
+{
+ u32 client_index;
+ u32 context;
+ u32 bd;
+};
+
+typeonly manual_print manual_endian define one_l2_arp_entry
+{
+ u8 mac[6];
+ u32 ip4;
+};
+
+/** \brief Reply with L2 ARP entries from specified bridge domain
+ @param context - sender context, to match reply w/ request
+ @param retval - error code
+ @param count - number of elements in the list
+ @param vl_api_one_arp_entry_t - list of entries
+*/
+manual_print manual_endian define one_l2_arp_entries_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_one_l2_arp_entry_t entries[count];
+};
+
+autoreply define one_add_del_ndp_entry
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 mac[6];
+ u32 bd;
+ u8 ip6[16];
+};
+
+define one_ndp_entries_get
+{
+ u32 client_index;
+ u32 context;
+ u32 bd;
+};
+
+typeonly manual_print manual_endian define one_ndp_entry
+{
+ u8 mac[6];
+ u8 ip6[16];
+};
+
+manual_print manual_endian define one_ndp_entries_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_one_ndp_entry_t entries[count];
+};
+
+/** \brief Set ONE transport protocol
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param protocol - supported vaules:
+ 1: UDP based LISP (default)
+ 2: binary API
+*/
+autoreply define one_set_transport_protocol
+{
+ u32 client_index;
+ u32 context;
+ u8 protocol;
+};
+
+define one_get_transport_protocol
+{
+ u32 client_index;
+ u32 context;
+};
+
+define one_get_transport_protocol_reply
+{
+ u32 context;
+ i32 retval;
+ u8 protocol;
+};
+
+/** \brief Request for list of bridge domains used by neighbor discovery
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define one_ndp_bd_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply with list of bridge domains used by neighbor discovery
+ @param context - sender context, to match reply w/ request
+ @param count - number of elements in the list
+ @param bridge_domains - list of BDs
+*/
+manual_print manual_endian define one_ndp_bd_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ u32 bridge_domains[count];
+};
+
+/** \brief Request for list of bridge domains used by L2 ARP table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define one_l2_arp_bd_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply with list of bridge domains used by L2 ARP table
+ @param context - sender context, to match reply w/ request
+ @param count - number of elements in the list
+ @param bridge_domains - list of BDs
+*/
+manual_print manual_endian define one_l2_arp_bd_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ u32 bridge_domains[count];
+};
+
+/** \brief add or delete ONE adjacency adjacency
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param vni - virtual network instance
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param reid - remote EID
+ @param leid - local EID
+*/
+autoreply define one_add_del_adjacency
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 vni;
+ u8 eid_type;
+ u8 reid[16];
+ u8 leid[16];
+ u8 reid_len;
+ u8 leid_len;
+};
+
+/** \brief add or delete map request itr rlocs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param locator_set_name - locator set name
+*/
+autoreply define one_add_del_map_request_itr_rlocs
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 locator_set_name[64];
+};
+
+/** \brief map/unmap vni/bd_index to vrf
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add or delete mapping
+ @param dp_table - virtual network id/bridge domain index
+ @param vrf - vrf
+*/
+autoreply define one_eid_table_add_del_map
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 vni;
+ u32 dp_table;
+ u8 is_l2;
+};
+
+/** \brief Request for map one locator status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param locator_set_index - index of locator_set
+ @param ls_name - locator set name
+ @param is_index_set - flag indicating whether ls_name or ls_index is set
+ */
+define one_locator_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 ls_index;
+ u8 ls_name[64];
+ u8 is_index_set;
+};
+
+/** \brief ONE locator_set status
+ @param local - if is set, then locator is local
+ @param locator_set_name - name of the locator_set
+ @param sw_if_index - sw_if_index of the locator
+ @param priority - locator priority
+ @param weight - locator weight
+ */
+define one_locator_details
+{
+ u32 context;
+ u8 local;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 ip_address[16];
+ u8 priority;
+ u8 weight;
+};
+
+/** \brief ONE locator_set status
+ @param context - sender context, to match reply w/ request
+ @param ls_index - locator set index
+ @param ls_name - name of the locator set
+ */
+define one_locator_set_details
+{
+ u32 context;
+ u32 ls_index;
+ u8 ls_name[64];
+};
+
+/** \brief Request for locator_set summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param filter - filter type
+ Supported values:
+ 0: all locator sets
+ 1: local locator sets
+ 2: remote locator sets
+ */
+define one_locator_set_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 filter;
+};
+
+/** \brief Dump ONE eid-table
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param locator_set_index - index of locator_set, if ~0 then the mapping
+ is negative
+ @param action - negative map request action
+ @param is_local - local if non-zero, else remote
+ @param eid_type:
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param is_src_dst - EID is type of source/destination
+ @param eid - EID can be ip4, ip6 or mac
+ @param eid_prefix_len - prefix length
+ @param seid - source EID can be ip4, ip6 or mac
+ @param seid_prefix_len - source prefix length
+ @param vni - virtual network instance
+ @param ttl - time to live
+ @param authoritative - authoritative
+ @param key_id
+ HMAC_NO_KEY 0
+ HMAC_SHA_1_96 1
+ HMAC_SHA_256_128 2
+ @param key - secret key
+*/
+
+define one_eid_table_details
+{
+ u32 context;
+ u32 locator_set_index;
+ u8 action;
+ u8 is_local;
+ u8 eid_type;
+ u8 is_src_dst;
+ u32 vni;
+ u8 eid[16];
+ u8 eid_prefix_len;
+ u8 seid[16];
+ u8 seid_prefix_len;
+ u32 ttl;
+ u8 authoritative;
+ u16 key_id;
+ u8 key[64];
+};
+
+/** \brief Request for eid table summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param eid_set - if non-zero request info about specific mapping
+ @param vni - virtual network instance; valid only if eid_set != 0
+ @param prefix_length - prefix length if EID is IP address;
+ valid only if eid_set != 0
+ @param eid_type - EID type; valid only if eid_set != 0
+ Supported values:
+ 0: EID is IPv4
+ 1: EID is IPv6
+ 2: EID is ethernet address
+ 3 : NSH : both information (service path ID and service index) are
+ encoded in 'eid' field in a following way:
+
+ |4 B |1 B |
+ -----------
+ |SPI | SI |
+ @param eid - endpoint identifier
+ @param filter - filter type;
+ Support values:
+ 0: all eid
+ 1: local eid
+ 2: remote eid
+ */
+define one_eid_table_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 eid_set;
+ u8 prefix_length;
+ u32 vni;
+ u8 eid_type;
+ u8 eid[16];
+ u8 filter;
+};
+
+/** \brief ONE adjacency
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param reid - remote EID
+ @param leid - local EID
+ @param reid_prefix_len - remote EID IP prefix length
+ @param leid_prefix_len - local EID IP prefix length
+ */
+typeonly manual_print manual_endian define one_adjacency
+{
+ u8 eid_type;
+ u8 reid[16];
+ u8 leid[16];
+ u8 reid_prefix_len;
+ u8 leid_prefix_len;
+};
+
+/** \brief ONE adjacency reply
+ @param count - number of adjacencies
+ @param adjacencies - array of adjacencies
+ */
+manual_endian manual_print define one_adjacencies_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_one_adjacency_t adjacencies[count];
+};
+
+/** \brief Request for ONE adjacencies
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - filter adjacencies by VNI
+ */
+define one_adjacencies_get
+{
+ u32 client_index;
+ u32 context;
+ u32 vni;
+};
+
+/** \brief Shows relationship between vni and vrf/bd
+ @param dp_table - VRF index or bridge domain index
+ @param vni - vitual network instance
+ */
+define one_eid_table_map_details
+{
+ u32 context;
+ u32 vni;
+ u32 dp_table;
+};
+
+/** \brief Request for one_eid_table_map_details
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_l2 - if set dump vni/bd mappings else vni/vrf
+ */
+define one_eid_table_map_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_l2;
+};
+
+/** \brief Dumps all VNIs used in mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define one_eid_table_vni_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief reply to one_eid_table_vni_dump
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - virtual network instance
+ */
+define one_eid_table_vni_details
+{
+ u32 client_index;
+ u32 context;
+ u32 vni;
+};
+
+/** \brief ONE map resolver status
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+ */
+define one_map_resolver_details
+{
+ u32 context;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief Request for map resolver summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define one_map_resolver_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief ONE map server details
+ @param is_ipv6 - if non-zero the address is ipv6, else ipv4
+ @param ip_address - array of address bytes
+ */
+define one_map_server_details
+{
+ u32 context;
+ u8 is_ipv6;
+ u8 ip_address[16];
+};
+
+/** \brief Request for map server summary status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ */
+define one_map_server_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Request for ONE status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_status
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief ONE status
+ @param context - sender context, to match reply w/ request
+ @param feature_status - enabled if non-zero, else disabled
+ @param gpe_status - enabled if non-zero, else disabled
+*/
+define show_one_status_reply
+{
+ u32 context;
+ i32 retval;
+ u8 feature_status;
+ u8 gpe_status;
+};
+
+/** \brief Get ONE map request itr rlocs status
+ @param context - sender context, to match reply w/ request
+ @param locator_set_name - name of the locator_set
+ */
+define one_get_map_request_itr_rlocs
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Request for map request itr rlocs summary status
+ */
+define one_get_map_request_itr_rlocs_reply
+{
+ u32 context;
+ i32 retval;
+ u8 locator_set_name[64];
+};
+
+/** \brief Request for ONE NSH mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_nsh_mapping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for ONE NSH mapping
+ @param context - sender context, to match reply w/ request
+ @param is_set - is ONE NSH mapping set
+ @param locator_set_name - name of the locator_set if NSH mapping is set
+*/
+define show_one_nsh_mapping_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_set;
+ u8 locator_set_name[64];
+};
+
+/** \brief Request for ONE PITR status
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_one_pitr
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Status of ONE PITR, enable or disable
+ @param context - sender context, to match reply w/ request
+ @param status - ONE PITR enable if non-zero, else disable
+ @param locator_set_name - name of the locator_set
+*/
+define show_one_pitr_reply
+{
+ u32 context;
+ i32 retval;
+ u8 status;
+ u8 locator_set_name[64];
+};
+
+define one_stats_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define one_stats_details
+{
+ u32 context;
+ u32 vni;
+ u8 eid_type;
+ u8 deid[16];
+ u8 seid[16];
+ u8 deid_pref_len;
+ u8 seid_pref_len;
+ u8 is_ip4;
+ u8 rloc[16];
+ u8 lloc[16];
+
+ u32 pkt_count;
+ u32 bytes;
+};
+
+autoreply define one_stats_flush
+{
+ u32 client_index;
+ u32 context;
+};
+
+autoreply define one_stats_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+define show_one_stats_enable_disable
+{
+ u32 client_index;
+ u32 context;
+};
+
+define show_one_stats_enable_disable_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_en;
+};
+
+autoreply define one_map_register_fallback_threshold
+{
+ u32 client_index;
+ u32 context;
+ u32 value;
+};
+
+define show_one_map_register_fallback_threshold
+{
+ u32 client_index;
+ u32 context;
+};
+
+define show_one_map_register_fallback_threshold_reply
+{
+ u32 context;
+ i32 retval;
+ u32 value;
+};
+
+autoreply define one_enable_disable_xtr_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+define one_show_xtr_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+define one_show_xtr_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_en;
+};
+
+autoreply define one_enable_disable_petr_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+define one_show_petr_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+define one_show_petr_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_en;
+};
+
+autoreply define one_enable_disable_pitr_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+define one_show_pitr_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+define one_show_pitr_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 is_en;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/one_api.c b/src/vnet/lisp-cp/one_api.c
new file mode 100644
index 00000000..e3a2afe7
--- /dev/null
+++ b/src/vnet/lisp-cp/one_api.c
@@ -0,0 +1,1809 @@
+/*
+ *------------------------------------------------------------------
+ * one_api.c - Overlay Network Engine API
+ *
+ * Copyright (c) 2016-2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_api_one_remote_locator_t_endian vl_noop_handler
+#define vl_api_one_remote_locator_t_print vl_noop_handler
+#define vl_api_one_local_locator_t_endian vl_noop_handler
+#define vl_api_one_local_locator_t_print vl_noop_handler
+
+#define vl_api_one_add_del_locator_set_t_endian vl_noop_handler
+#define vl_api_one_add_del_locator_set_t_print vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_endian vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_print vl_noop_handler
+
+#define vl_api_one_add_del_locator_set_t_endian vl_noop_handler
+#define vl_api_one_add_del_locator_set_t_print vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_endian vl_noop_handler
+#define vl_api_one_add_del_remote_mapping_t_print vl_noop_handler
+
+#define vl_api_one_l2_arp_entry_t_endian vl_noop_handler
+#define vl_api_one_l2_arp_entry_t_print vl_noop_handler
+#define vl_api_one_add_del_l2_arp_entry vl_noop_handler
+#define vl_api_one_l2_arp_bd_get vl_noop_handler
+
+#define vl_api_one_ndp_entry_t_endian vl_noop_handler
+#define vl_api_one_ndp_entry_t_print vl_noop_handler
+#define vl_api_one_ndp_entries_get_reply_t_endian vl_noop_handler
+#define vl_api_one_ndp_entries_get_reply_t_print vl_noop_handler
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define REPLY_DETAILS(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)); \
+ rmp->context = mp->context; \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define foreach_vpe_api_msg \
+_(ONE_ADD_DEL_LOCATOR_SET, one_add_del_locator_set) \
+_(ONE_ADD_DEL_LOCATOR, one_add_del_locator) \
+_(ONE_ADD_DEL_LOCAL_EID, one_add_del_local_eid) \
+_(ONE_ADD_DEL_MAP_RESOLVER, one_add_del_map_resolver) \
+_(ONE_ADD_DEL_MAP_SERVER, one_add_del_map_server) \
+_(ONE_ENABLE_DISABLE, one_enable_disable) \
+_(ONE_RLOC_PROBE_ENABLE_DISABLE, one_rloc_probe_enable_disable) \
+_(ONE_MAP_REGISTER_ENABLE_DISABLE, one_map_register_enable_disable) \
+_(ONE_MAP_REGISTER_FALLBACK_THRESHOLD, \
+ one_map_register_fallback_threshold) \
+_(ONE_ADD_DEL_REMOTE_MAPPING, one_add_del_remote_mapping) \
+_(ONE_ADD_DEL_ADJACENCY, one_add_del_adjacency) \
+_(ONE_PITR_SET_LOCATOR_SET, one_pitr_set_locator_set) \
+_(ONE_NSH_SET_LOCATOR_SET, one_nsh_set_locator_set) \
+_(ONE_MAP_REQUEST_MODE, one_map_request_mode) \
+_(ONE_EID_TABLE_ADD_DEL_MAP, one_eid_table_add_del_map) \
+_(ONE_LOCATOR_SET_DUMP, one_locator_set_dump) \
+_(ONE_LOCATOR_DUMP, one_locator_dump) \
+_(ONE_EID_TABLE_DUMP, one_eid_table_dump) \
+_(ONE_MAP_RESOLVER_DUMP, one_map_resolver_dump) \
+_(ONE_MAP_SERVER_DUMP, one_map_server_dump) \
+_(ONE_EID_TABLE_MAP_DUMP, one_eid_table_map_dump) \
+_(ONE_EID_TABLE_VNI_DUMP, one_eid_table_vni_dump) \
+_(ONE_ADJACENCIES_GET, one_adjacencies_get) \
+_(ONE_MAP_REGISTER_SET_TTL, one_map_register_set_ttl) \
+_(SHOW_ONE_NSH_MAPPING, show_one_nsh_mapping) \
+_(SHOW_ONE_RLOC_PROBE_STATE, show_one_rloc_probe_state) \
+_(SHOW_ONE_MAP_REGISTER_STATE, show_one_map_register_state) \
+_(SHOW_ONE_MAP_REGISTER_TTL, show_one_map_register_ttl) \
+_(SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD, \
+ show_one_map_register_fallback_threshold) \
+_(SHOW_ONE_STATUS, show_one_status) \
+_(ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS, \
+ one_add_del_map_request_itr_rlocs) \
+_(ONE_GET_MAP_REQUEST_ITR_RLOCS, one_get_map_request_itr_rlocs) \
+_(SHOW_ONE_PITR, show_one_pitr) \
+_(SHOW_ONE_MAP_REQUEST_MODE, show_one_map_request_mode) \
+_(ONE_USE_PETR, one_use_petr) \
+_(SHOW_ONE_USE_PETR, show_one_use_petr) \
+_(SHOW_ONE_STATS_ENABLE_DISABLE, show_one_stats_enable_disable) \
+_(ONE_STATS_ENABLE_DISABLE, one_stats_enable_disable) \
+_(ONE_STATS_DUMP, one_stats_dump) \
+_(ONE_STATS_FLUSH, one_stats_flush) \
+_(ONE_L2_ARP_BD_GET, one_l2_arp_bd_get) \
+_(ONE_L2_ARP_ENTRIES_GET, one_l2_arp_entries_get) \
+_(ONE_ADD_DEL_L2_ARP_ENTRY, one_add_del_l2_arp_entry) \
+_(ONE_ADD_DEL_NDP_ENTRY, one_add_del_ndp_entry) \
+_(ONE_NDP_BD_GET, one_ndp_bd_get) \
+_(ONE_NDP_ENTRIES_GET, one_ndp_entries_get) \
+_(ONE_SET_TRANSPORT_PROTOCOL, one_set_transport_protocol) \
+_(ONE_GET_TRANSPORT_PROTOCOL, one_get_transport_protocol)
+
+static locator_t *
+unformat_one_locs (vl_api_one_remote_locator_t * rmt_locs, u32 rloc_num)
+{
+ u32 i;
+ locator_t *locs = 0, loc;
+ vl_api_one_remote_locator_t *r;
+
+ for (i = 0; i < rloc_num; i++)
+ {
+ /* remote locators */
+ r = &rmt_locs[i];
+ memset (&loc, 0, sizeof (loc));
+ gid_address_ip_set (&loc.address, &r->addr, r->is_ip4 ? IP4 : IP6);
+
+ loc.priority = r->priority;
+ loc.weight = r->weight;
+
+ vec_add1 (locs, loc);
+ }
+ return locs;
+}
+
+static void
+vl_api_one_map_register_set_ttl_t_handler (vl_api_one_map_register_set_ttl_t *
+ mp)
+{
+ vl_api_one_map_register_set_ttl_reply_t *rmp;
+ int rv = 0;
+
+ mp->ttl = clib_net_to_host_u32 (mp->ttl);
+ rv = vnet_lisp_map_register_set_ttl (mp->ttl);
+
+ REPLY_MACRO (VL_API_ONE_MAP_REGISTER_SET_TTL_REPLY);
+}
+
+static void
+ vl_api_show_one_map_register_ttl_t_handler
+ (vl_api_show_one_map_register_ttl_t * mp)
+{
+ vl_api_show_one_map_register_ttl_reply_t *rmp;
+ int rv = 0;
+
+ u32 ttl = vnet_lisp_map_register_get_ttl ();
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_TTL_REPLY,
+ ({
+ rmp->ttl = clib_host_to_net_u32 (ttl);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_one_add_del_locator_set_t_handler (vl_api_one_add_del_locator_set_t *
+ mp)
+{
+ vl_api_one_add_del_locator_set_reply_t *rmp;
+ int rv = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ locator_t locator;
+ vl_api_one_local_locator_t *ls_loc;
+ u32 ls_index = ~0, locator_num;
+ u8 *locator_name = NULL;
+ int i;
+
+ memset (a, 0, sizeof (a[0]));
+
+ locator_name = format (0, "%s", mp->locator_set_name);
+
+ a->name = locator_name;
+ a->is_add = mp->is_add;
+ a->local = 1;
+ locator_num = clib_net_to_host_u32 (mp->locator_num);
+
+ memset (&locator, 0, sizeof (locator));
+ for (i = 0; i < locator_num; i++)
+ {
+ ls_loc = &mp->locators[i];
+ VALIDATE_SW_IF_INDEX (ls_loc);
+
+ locator.sw_if_index = htonl (ls_loc->sw_if_index);
+ locator.priority = ls_loc->priority;
+ locator.weight = ls_loc->weight;
+ locator.local = 1;
+ vec_add1 (a->locators, locator);
+ }
+
+ rv = vnet_lisp_add_del_locator_set (a, &ls_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ vec_free (locator_name);
+ vec_free (a->locators);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_ONE_ADD_DEL_LOCATOR_SET_REPLY,
+ ({
+ rmp->ls_index = clib_host_to_net_u32 (ls_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_one_add_del_locator_t_handler (vl_api_one_add_del_locator_t * mp)
+{
+ vl_api_one_add_del_locator_reply_t *rmp;
+ int rv = 0;
+ locator_t locator, *locators = NULL;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = ~0;
+ u8 *locator_name = NULL;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ locator.sw_if_index = ntohl (mp->sw_if_index);
+ locator.priority = mp->priority;
+ locator.weight = mp->weight;
+ locator.local = 1;
+ vec_add1 (locators, locator);
+
+ locator_name = format (0, "%s", mp->locator_set_name);
+
+ a->name = locator_name;
+ a->locators = locators;
+ a->is_add = mp->is_add;
+ a->local = 1;
+
+ rv = vnet_lisp_add_del_locator (a, NULL, &ls_index);
+
+ vec_free (locators);
+ vec_free (locator_name);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_LOCATOR_REPLY);
+}
+
+typedef struct
+{
+ u32 spi;
+ u8 si;
+} __attribute__ ((__packed__)) lisp_nsh_api_t;
+
+static int
+unformat_one_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src,
+ u8 len)
+{
+ lisp_nsh_api_t *nsh;
+
+ switch (type)
+ {
+ case 0: /* ipv4 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP4);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 1: /* ipv6 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP6);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 2: /* l2 mac */
+ gid_address_type (dst) = GID_ADDR_MAC;
+ clib_memcpy (&gid_address_mac (dst), src, 6);
+ break;
+ case 3: /* NSH */
+ gid_address_type (dst) = GID_ADDR_NSH;
+ nsh = src;
+ gid_address_nsh_spi (dst) = clib_net_to_host_u32 (nsh->spi);
+ gid_address_nsh_si (dst) = nsh->si;
+ break;
+ default:
+ /* unknown type */
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ gid_address_vni (dst) = vni;
+
+ return 0;
+}
+
+static void
+vl_api_one_add_del_local_eid_t_handler (vl_api_one_add_del_local_eid_t * mp)
+{
+ vl_api_one_add_del_local_eid_reply_t *rmp;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ int rv = 0;
+ gid_address_t _eid, *eid = &_eid;
+ uword *p = NULL;
+ u32 locator_set_index = ~0, map_index = ~0;
+ vnet_lisp_add_del_mapping_args_t _a, *a = &_a;
+ u8 *name = NULL, *key = NULL;
+ memset (a, 0, sizeof (a[0]));
+ memset (eid, 0, sizeof (eid[0]));
+
+ rv = unformat_one_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->prefix_len);
+ if (rv)
+ goto out;
+
+ if (gid_address_type (eid) == GID_ADDR_NSH)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ name = format (0, "%s", mp->locator_set_name);
+ p = hash_get_mem (lcm->locator_set_index_by_name, name);
+ if (!p)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+ locator_set_index = p[0];
+
+ if (*mp->key)
+ key = format (0, "%s", mp->key);
+
+ /* XXX treat batch configuration */
+ a->is_add = mp->is_add;
+ gid_address_copy (&a->eid, eid);
+ a->locator_set_index = locator_set_index;
+ a->local = 1;
+ a->key = key;
+ a->key_id = clib_net_to_host_u16 (mp->key_id);
+
+ rv = vnet_lisp_add_del_local_mapping (a, &map_index);
+
+out:
+ vec_free (name);
+ vec_free (key);
+ gid_address_free (&a->eid);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_LOCAL_EID_REPLY);
+}
+
+static void
+ vl_api_one_eid_table_add_del_map_t_handler
+ (vl_api_one_eid_table_add_del_map_t * mp)
+{
+ vl_api_one_eid_table_add_del_map_reply_t *rmp;
+ int rv = 0;
+ rv = vnet_lisp_eid_table_map (clib_net_to_host_u32 (mp->vni),
+ clib_net_to_host_u32 (mp->dp_table),
+ mp->is_l2, mp->is_add);
+REPLY_MACRO (VL_API_ONE_EID_TABLE_ADD_DEL_MAP_REPLY)}
+
+static void
+vl_api_one_add_del_map_server_t_handler (vl_api_one_add_del_map_server_t * mp)
+{
+ vl_api_one_add_del_map_server_reply_t *rmp;
+ int rv = 0;
+ ip_address_t addr;
+
+ memset (&addr, 0, sizeof (addr));
+
+ ip_address_set (&addr, mp->ip_address, mp->is_ipv6 ? IP6 : IP4);
+ rv = vnet_lisp_add_del_map_server (&addr, mp->is_add);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_MAP_SERVER_REPLY);
+}
+
+static void
+vl_api_one_add_del_map_resolver_t_handler (vl_api_one_add_del_map_resolver_t
+ * mp)
+{
+ vl_api_one_add_del_map_resolver_reply_t *rmp;
+ int rv = 0;
+ vnet_lisp_add_del_map_resolver_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (a[0]));
+
+ a->is_add = mp->is_add;
+ ip_address_set (&a->address, mp->ip_address, mp->is_ipv6 ? IP6 : IP4);
+
+ rv = vnet_lisp_add_del_map_resolver (a);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_MAP_RESOLVER_REPLY);
+}
+
+static void
+ vl_api_one_map_register_enable_disable_t_handler
+ (vl_api_one_map_register_enable_disable_t * mp)
+{
+ vl_api_one_map_register_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_map_register_enable_disable (mp->is_enabled);
+ REPLY_MACRO (VL_API_ONE_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_one_rloc_probe_enable_disable_t_handler
+ (vl_api_one_rloc_probe_enable_disable_t * mp)
+{
+ vl_api_one_rloc_probe_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_rloc_probe_enable_disable (mp->is_enabled);
+ REPLY_MACRO (VL_API_ONE_ENABLE_DISABLE_REPLY);
+}
+
+static void
+vl_api_one_enable_disable_t_handler (vl_api_one_enable_disable_t * mp)
+{
+ vl_api_one_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ vnet_lisp_enable_disable (mp->is_en);
+ REPLY_MACRO (VL_API_ONE_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_show_one_map_request_mode_t_handler
+ (vl_api_show_one_map_request_mode_t * mp)
+{
+ int rv = 0;
+ vl_api_show_one_map_request_mode_reply_t *rmp;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_ONE_MAP_REQUEST_MODE_REPLY,
+ ({
+ rmp->mode = vnet_lisp_get_map_request_mode ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_one_map_request_mode_t_handler (vl_api_one_map_request_mode_t * mp)
+{
+ vl_api_one_map_request_mode_reply_t *rmp;
+ int rv = 0;
+
+ rv = vnet_lisp_set_map_request_mode (mp->mode);
+
+ REPLY_MACRO (VL_API_ONE_MAP_REQUEST_MODE_REPLY);
+}
+
+static void
+vl_api_one_nsh_set_locator_set_t_handler (vl_api_one_nsh_set_locator_set_t
+ * mp)
+{
+ vl_api_one_nsh_set_locator_set_reply_t *rmp;
+ int rv = 0;
+ u8 *ls_name = 0;
+
+ ls_name = format (0, "%s", mp->ls_name);
+ rv = vnet_lisp_nsh_set_locator_set (ls_name, mp->is_add);
+ vec_free (ls_name);
+
+ REPLY_MACRO (VL_API_ONE_PITR_SET_LOCATOR_SET_REPLY);
+}
+
+static void
+vl_api_one_pitr_set_locator_set_t_handler (vl_api_one_pitr_set_locator_set_t
+ * mp)
+{
+ vl_api_one_pitr_set_locator_set_reply_t *rmp;
+ int rv = 0;
+ u8 *ls_name = 0;
+
+ ls_name = format (0, "%s", mp->ls_name);
+ rv = vnet_lisp_pitr_set_locator_set (ls_name, mp->is_add);
+ vec_free (ls_name);
+
+ REPLY_MACRO (VL_API_ONE_PITR_SET_LOCATOR_SET_REPLY);
+}
+
+static void
+vl_api_one_use_petr_t_handler (vl_api_one_use_petr_t * mp)
+{
+ vl_api_one_use_petr_reply_t *rmp;
+ int rv = 0;
+ ip_address_t addr;
+
+ ip_address_set (&addr, &mp->address, mp->is_ip4 ? IP4 : IP6);
+ rv = vnet_lisp_use_petr (&addr, mp->is_add);
+
+ REPLY_MACRO (VL_API_ONE_USE_PETR_REPLY);
+}
+
+static void
+vl_api_show_one_use_petr_t_handler (vl_api_show_one_use_petr_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_one_use_petr_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls = 0;
+ int rv = 0;
+ locator_t *loc = 0;
+ u8 status = 0;
+ gid_address_t addr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ status = lcm->flags & LISP_FLAG_USE_PETR;
+ if (status)
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ loc = pool_elt_at_index (lcm->locator_pool, ls->locator_indices[0]);
+ gid_address_copy (&addr, &loc->address);
+ }
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_USE_PETR_REPLY,
+ {
+ rmp->status = status;
+ ip_address_t *ip = &gid_address_ip (&addr);
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ clib_memcpy (rmp->address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ clib_memcpy (rmp->address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->is_ip4 = (gid_address_ip_version (&addr) == IP4);
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_one_add_del_map_request_itr_rlocs_t_handler
+ (vl_api_one_add_del_map_request_itr_rlocs_t * mp)
+{
+ vl_api_one_add_del_map_request_itr_rlocs_reply_t *rmp;
+ int rv = 0;
+ u8 *locator_set_name = NULL;
+ vnet_lisp_add_del_mreq_itr_rloc_args_t _a, *a = &_a;
+
+ locator_set_name = format (0, "%s", mp->locator_set_name);
+
+ a->is_add = mp->is_add;
+ a->locator_set_name = locator_set_name;
+
+ rv = vnet_lisp_add_del_mreq_itr_rlocs (a);
+
+ vec_free (locator_set_name);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS_REPLY);
+}
+
+static void
+ vl_api_one_add_del_remote_mapping_t_handler
+ (vl_api_one_add_del_remote_mapping_t * mp)
+{
+ locator_t *rlocs = 0;
+ vl_api_one_add_del_remote_mapping_reply_t *rmp;
+ int rv = 0;
+ gid_address_t _eid, *eid = &_eid;
+ u32 rloc_num = clib_net_to_host_u32 (mp->rloc_num);
+
+ memset (eid, 0, sizeof (eid[0]));
+
+ rv = unformat_one_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->eid_len);
+ if (rv)
+ goto send_reply;
+
+ rlocs = unformat_one_locs (mp->rlocs, rloc_num);
+
+ if (!mp->is_add)
+ {
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ gid_address_copy (&a->reid, eid);
+ a->is_add = 0;
+ rv = vnet_lisp_add_del_adjacency (a);
+ if (rv)
+ {
+ goto out;
+ }
+ }
+
+ /* NOTE: for now this works as a static remote mapping, i.e.,
+ * not authoritative and ttl infinite. */
+ if (mp->is_add)
+ {
+ vnet_lisp_add_del_mapping_args_t _m_args, *m_args = &_m_args;
+ memset (m_args, 0, sizeof (m_args[0]));
+ gid_address_copy (&m_args->eid, eid);
+ m_args->action = mp->action;
+ m_args->is_static = 1;
+ m_args->ttl = ~0;
+ m_args->authoritative = 0;
+ rv = vnet_lisp_add_mapping (m_args, rlocs, NULL, NULL);
+ }
+ else
+ rv = vnet_lisp_del_mapping (eid, NULL);
+
+ if (mp->del_all)
+ vnet_lisp_clear_all_remote_adjacencies ();
+
+out:
+ vec_free (rlocs);
+send_reply:
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_REMOTE_MAPPING_REPLY);
+}
+
+static void
+vl_api_one_add_del_adjacency_t_handler (vl_api_one_add_del_adjacency_t * mp)
+{
+ vl_api_one_add_del_adjacency_reply_t *rmp;
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+
+ int rv = 0;
+ memset (a, 0, sizeof (a[0]));
+
+ rv = unformat_one_eid_api (&a->leid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->leid, mp->leid_len);
+ rv |= unformat_one_eid_api (&a->reid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->reid, mp->reid_len);
+
+ if (rv)
+ goto send_reply;
+
+ a->is_add = mp->is_add;
+ rv = vnet_lisp_add_del_adjacency (a);
+
+send_reply:
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_ADJACENCY_REPLY);
+}
+
+static void
+send_one_locator_details (lisp_cp_main_t * lcm,
+ locator_t * loc,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_one_locator_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_LOCATOR_DETAILS);
+ rmp->context = context;
+
+ rmp->local = loc->local;
+ if (loc->local)
+ {
+ rmp->sw_if_index = ntohl (loc->sw_if_index);
+ }
+ else
+ {
+ rmp->is_ipv6 = gid_address_ip_version (&loc->address);
+ ip_address_copy_addr (rmp->ip_address, &gid_address_ip (&loc->address));
+ }
+ rmp->priority = loc->priority;
+ rmp->weight = loc->weight;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_locator_dump_t_handler (vl_api_one_locator_dump_t * mp)
+{
+ u8 *ls_name = 0;
+ unix_shared_memory_queue_t *q = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *lsit = 0;
+ locator_t *loc = 0;
+ u32 ls_index = ~0, *locit = 0;
+ uword *p = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->is_index_set)
+ ls_index = htonl (mp->ls_index);
+ else
+ {
+ /* make sure we get a proper C-string */
+ mp->ls_name[sizeof (mp->ls_name) - 1] = 0;
+ ls_name = format (0, "%s", mp->ls_name);
+ p = hash_get_mem (lcm->locator_set_index_by_name, ls_name);
+ if (!p)
+ goto out;
+ ls_index = p[0];
+ }
+
+ if (pool_is_free_index (lcm->locator_set_pool, ls_index))
+ return;
+
+ lsit = pool_elt_at_index (lcm->locator_set_pool, ls_index);
+
+ vec_foreach (locit, lsit->locator_indices)
+ {
+ loc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+ send_one_locator_details (lcm, loc, q, mp->context);
+ };
+out:
+ vec_free (ls_name);
+}
+
+static void
+send_one_locator_set_details (lisp_cp_main_t * lcm,
+ locator_set_t * lsit,
+ unix_shared_memory_queue_t * q,
+ u32 context, u32 ls_index)
+{
+ vl_api_one_locator_set_details_t *rmp;
+ u8 *str = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_LOCATOR_SET_DETAILS);
+ rmp->context = context;
+
+ rmp->ls_index = htonl (ls_index);
+ if (lsit->local)
+ {
+ ASSERT (lsit->name != NULL);
+ strncpy ((char *) rmp->ls_name, (char *) lsit->name,
+ vec_len (lsit->name));
+ }
+ else
+ {
+ str = format (0, "<remote-%d>", ls_index);
+ strncpy ((char *) rmp->ls_name, (char *) str, vec_len (str));
+ vec_free (str);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_locator_set_dump_t_handler (vl_api_one_locator_set_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *lsit = NULL;
+ u8 filter;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ filter = mp->filter;
+ /* *INDENT-OFF* */
+ pool_foreach (lsit, lcm->locator_set_pool,
+ ({
+ if (filter && !((1 == filter && lsit->local) ||
+ (2 == filter && !lsit->local)))
+ {
+ continue;
+ }
+ send_one_locator_set_details (lcm, lsit, q, mp->context,
+ lsit - lcm->locator_set_pool);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+one_fid_put_api (u8 * dst, fid_address_t * src, u8 * prefix_length)
+{
+ ASSERT (prefix_length);
+ ip_prefix_t *ippref = &fid_addr_ippref (src);
+
+ switch (fid_addr_type (src))
+ {
+ case FID_ADDR_IP_PREF:
+ if (ip_prefix_version (ippref) == IP4)
+ clib_memcpy (dst, &ip_prefix_v4 (ippref), 4);
+ else
+ clib_memcpy (dst, &ip_prefix_v6 (ippref), 16);
+ prefix_length[0] = ip_prefix_len (ippref);
+ break;
+
+ case FID_ADDR_MAC:
+ prefix_length[0] = 0;
+ clib_memcpy (dst, fid_addr_mac (src), 6);
+ break;
+
+ default:
+ clib_warning ("Unknown FID type %d!", fid_addr_type (src));
+ break;
+ }
+}
+
+static u8
+fid_type_to_api_type (fid_address_t * fid)
+{
+ ip_prefix_t *ippref;
+
+ switch (fid_addr_type (fid))
+ {
+ case FID_ADDR_IP_PREF:
+ ippref = &fid_addr_ippref (fid);
+ if (ip_prefix_version (ippref) == IP4)
+ return 0;
+ else if (ip_prefix_version (ippref) == IP6)
+ return 1;
+ else
+ return ~0;
+
+ case FID_ADDR_MAC:
+ return 2;
+ case FID_ADDR_NSH:
+ return 3;
+ }
+
+ return ~0;
+}
+
+static void
+send_one_eid_table_details (mapping_t * mapit,
+ unix_shared_memory_queue_t * q,
+ u32 context, u8 filter)
+{
+ fid_address_t *fid;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *ls = 0;
+ vl_api_one_eid_table_details_t *rmp = NULL;
+ gid_address_t *gid = NULL;
+ u8 *mac = 0;
+ ip_prefix_t *ip_prefix = NULL;
+
+ if (mapit->pitr_set || mapit->nsh_set)
+ return;
+
+ switch (filter)
+ {
+ case 0: /* all mappings */
+ break;
+
+ case 1: /* local only */
+ if (!mapit->local)
+ return;
+ break;
+ case 2: /* remote only */
+ if (mapit->local)
+ return;
+ break;
+ default:
+ clib_warning ("Filter error, unknown filter: %d", filter);
+ return;
+ }
+
+ gid = &mapit->eid;
+ ip_prefix = &gid_address_ippref (gid);
+ mac = gid_address_mac (gid);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_EID_TABLE_DETAILS);
+
+ ls = pool_elt_at_index (lcm->locator_set_pool, mapit->locator_set_index);
+ if (vec_len (ls->locator_indices) == 0)
+ rmp->locator_set_index = ~0;
+ else
+ rmp->locator_set_index = clib_host_to_net_u32 (mapit->locator_set_index);
+
+ rmp->is_local = mapit->local;
+ rmp->ttl = clib_host_to_net_u32 (mapit->ttl);
+ rmp->action = mapit->action;
+ rmp->authoritative = mapit->authoritative;
+
+ switch (gid_address_type (gid))
+ {
+ case GID_ADDR_SRC_DST:
+ rmp->is_src_dst = 1;
+ fid = &gid_address_sd_src (gid);
+ rmp->eid_type = fid_type_to_api_type (fid);
+ one_fid_put_api (rmp->seid, &gid_address_sd_src (gid),
+ &rmp->seid_prefix_len);
+ one_fid_put_api (rmp->eid, &gid_address_sd_dst (gid),
+ &rmp->eid_prefix_len);
+ break;
+ case GID_ADDR_IP_PREFIX:
+ rmp->eid_prefix_len = ip_prefix_len (ip_prefix);
+ if (ip_prefix_version (ip_prefix) == IP4)
+ {
+ rmp->eid_type = 0; /* ipv4 type */
+ clib_memcpy (rmp->eid, &ip_prefix_v4 (ip_prefix),
+ sizeof (ip_prefix_v4 (ip_prefix)));
+ }
+ else
+ {
+ rmp->eid_type = 1; /* ipv6 type */
+ clib_memcpy (rmp->eid, &ip_prefix_v6 (ip_prefix),
+ sizeof (ip_prefix_v6 (ip_prefix)));
+ }
+ break;
+ case GID_ADDR_MAC:
+ rmp->eid_type = 2; /* l2 mac type */
+ clib_memcpy (rmp->eid, mac, 6);
+ break;
+ case GID_ADDR_NSH:
+ rmp->eid_type = 3; /* NSH type */
+ lisp_nsh_api_t nsh;
+ nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (gid));
+ nsh.si = gid_address_nsh_si (gid);
+ clib_memcpy (rmp->eid, &nsh, sizeof (nsh));
+ break;
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+ rmp->vni = clib_host_to_net_u32 (gid_address_vni (gid));
+ rmp->key_id = clib_host_to_net_u16 (mapit->key_id);
+ memcpy (rmp->key, mapit->key, vec_len (mapit->key));
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_eid_table_dump_t_handler (vl_api_one_eid_table_dump_t * mp)
+{
+ u32 mi;
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *mapit = NULL;
+ gid_address_t _eid, *eid = &_eid;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->eid_set)
+ {
+ memset (eid, 0, sizeof (*eid));
+
+ unformat_one_eid_api (eid, clib_net_to_host_u32 (mp->vni),
+ mp->eid_type, mp->eid, mp->prefix_length);
+
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, eid);
+ if ((u32) ~ 0 == mi)
+ return;
+
+ mapit = pool_elt_at_index (lcm->mapping_pool, mi);
+ send_one_eid_table_details (mapit, q, mp->context,
+ 0 /* ignore filter */ );
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (mapit, lcm->mapping_pool,
+ ({
+ send_one_eid_table_details(mapit, q, mp->context,
+ mp->filter);
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+static void
+send_one_map_server_details (ip_address_t * ip,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_one_map_server_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_MAP_SERVER_DETAILS);
+
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ rmp->is_ipv6 = 0;
+ clib_memcpy (rmp->ip_address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ rmp->is_ipv6 = 1;
+ clib_memcpy (rmp->ip_address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_map_server_dump_t_handler (vl_api_one_map_server_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *mr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ vec_foreach (mr, lcm->map_servers)
+ {
+ send_one_map_server_details (&mr->address, q, mp->context);
+ }
+}
+
+static void
+send_one_map_resolver_details (ip_address_t * ip,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_one_map_resolver_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_MAP_RESOLVER_DETAILS);
+
+ switch (ip_addr_version (ip))
+ {
+ case IP4:
+ rmp->is_ipv6 = 0;
+ clib_memcpy (rmp->ip_address, &ip_addr_v4 (ip),
+ sizeof (ip_addr_v4 (ip)));
+ break;
+
+ case IP6:
+ rmp->is_ipv6 = 1;
+ clib_memcpy (rmp->ip_address, &ip_addr_v6 (ip),
+ sizeof (ip_addr_v6 (ip)));
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_map_resolver_dump_t_handler (vl_api_one_map_resolver_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ lisp_msmr_t *mr;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ vec_foreach (mr, lcm->map_resolvers)
+ {
+ send_one_map_resolver_details (&mr->address, q, mp->context);
+ }
+}
+
+static void
+send_eid_table_map_pair (hash_pair_t * p,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_one_eid_table_map_details_t *rmp = NULL;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_EID_TABLE_MAP_DETAILS);
+
+ rmp->vni = clib_host_to_net_u32 (p->key);
+ rmp->dp_table = clib_host_to_net_u32 (p->value[0]);
+ rmp->context = context;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_one_eid_table_map_dump_t_handler (vl_api_one_eid_table_map_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ hash_pair_t *p;
+ uword *vni_table = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->is_l2)
+ {
+ vni_table = lcm->bd_id_by_vni;
+ }
+ else
+ {
+ vni_table = lcm->table_id_by_vni;
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vni_table,
+ ({
+ send_eid_table_map_pair (p, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+send_eid_table_vni (u32 vni, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_one_eid_table_vni_details_t *rmp = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_ONE_EID_TABLE_VNI_DETAILS);
+ rmp->context = context;
+ rmp->vni = clib_host_to_net_u32 (vni);
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+one_adjacency_copy (vl_api_one_adjacency_t * dst, lisp_adjacency_t * adjs)
+{
+ lisp_adjacency_t *adj;
+ vl_api_one_adjacency_t a;
+ u32 i, n = vec_len (adjs);
+ lisp_nsh_api_t nsh;
+
+ for (i = 0; i < n; i++)
+ {
+ adj = vec_elt_at_index (adjs, i);
+ memset (&a, 0, sizeof (a));
+
+ switch (gid_address_type (&adj->reid))
+ {
+ case GID_ADDR_IP_PREFIX:
+ a.reid_prefix_len = gid_address_ippref_len (&adj->reid);
+ a.leid_prefix_len = gid_address_ippref_len (&adj->leid);
+ if (gid_address_ip_version (&adj->reid) == IP4)
+ {
+ a.eid_type = 0; /* ipv4 type */
+ clib_memcpy (a.reid, &gid_address_ip (&adj->reid), 4);
+ clib_memcpy (a.leid, &gid_address_ip (&adj->leid), 4);
+ }
+ else
+ {
+ a.eid_type = 1; /* ipv6 type */
+ clib_memcpy (a.reid, &gid_address_ip (&adj->reid), 16);
+ clib_memcpy (a.leid, &gid_address_ip (&adj->leid), 16);
+ }
+ break;
+ case GID_ADDR_MAC:
+ a.eid_type = 2; /* l2 mac type */
+ mac_copy (a.reid, gid_address_mac (&adj->reid));
+ mac_copy (a.leid, gid_address_mac (&adj->leid));
+ break;
+ case GID_ADDR_NSH:
+ a.eid_type = 3; /* NSH type */
+ nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (&adj->reid));
+ nsh.si = gid_address_nsh_si (&adj->reid);
+ clib_memcpy (a.reid, &nsh, sizeof (nsh));
+
+ nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (&adj->leid));
+ nsh.si = gid_address_nsh_si (&adj->leid);
+ clib_memcpy (a.leid, &nsh, sizeof (nsh));
+ break;
+ default:
+ ASSERT (0);
+ }
+ dst[i] = a;
+ }
+}
+
+static void
+ vl_api_show_one_rloc_probe_state_t_handler
+ (vl_api_show_one_rloc_probe_state_t * mp)
+{
+ vl_api_show_one_rloc_probe_state_reply_t *rmp = 0;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_RLOC_PROBE_STATE_REPLY,
+ {
+ rmp->is_enabled = vnet_lisp_rloc_probe_state_get ();
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_show_one_map_register_state_t_handler
+ (vl_api_show_one_map_register_state_t * mp)
+{
+ vl_api_show_one_map_register_state_reply_t *rmp = 0;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_STATE_REPLY,
+ {
+ rmp->is_enabled = vnet_lisp_map_register_state_get ();
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_one_adjacencies_get_t_handler (vl_api_one_adjacencies_get_t * mp)
+{
+ vl_api_one_adjacencies_get_reply_t *rmp = 0;
+ lisp_adjacency_t *adjs = 0;
+ int rv = 0;
+ u32 size = ~0;
+ u32 vni = clib_net_to_host_u32 (mp->vni);
+
+ adjs = vnet_lisp_adjacencies_get_by_vni (vni);
+ size = vec_len (adjs) * sizeof (vl_api_one_adjacency_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_ONE_ADJACENCIES_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (vec_len (adjs));
+ one_adjacency_copy (rmp->adjacencies, adjs);
+ });
+ /* *INDENT-ON* */
+
+ vec_free (adjs);
+}
+
+static void
+vl_api_one_eid_table_vni_dump_t_handler (vl_api_one_eid_table_vni_dump_t * mp)
+{
+ hash_pair_t *p;
+ u32 *vnis = 0;
+ unix_shared_memory_queue_t *q = 0;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, lcm->table_id_by_vni,
+ ({
+ hash_set (vnis, p->key, 0);
+ }));
+
+ hash_foreach_pair (p, lcm->bd_id_by_vni,
+ ({
+ hash_set (vnis, p->key, 0);
+ }));
+
+ hash_foreach_pair (p, vnis,
+ ({
+ send_eid_table_vni (p->key, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+
+ hash_free (vnis);
+}
+
+static void
+vl_api_show_one_status_t_handler (vl_api_show_one_status_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_one_status_reply_t *rmp = NULL;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_ONE_STATUS_REPLY,
+ ({
+ rmp->gpe_status = vnet_lisp_gpe_enable_disable_status ();
+ rmp->feature_status = vnet_lisp_enable_disable_status ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_one_get_map_request_itr_rlocs_t_handler
+ (vl_api_one_get_map_request_itr_rlocs_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_one_get_map_request_itr_rlocs_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *loc_set = 0;
+ u8 *tmp_str = 0;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (~0 == lcm->mreq_itr_rlocs)
+ {
+ tmp_str = format (0, " ");
+ }
+ else
+ {
+ loc_set =
+ pool_elt_at_index (lcm->locator_set_pool, lcm->mreq_itr_rlocs);
+ tmp_str = format (0, "%s", loc_set->name);
+ }
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_ONE_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
+ ({
+ strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
+ ARRAY_LEN(rmp->locator_set_name) - 1);
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (tmp_str);
+}
+
+static void
+vl_api_show_one_nsh_mapping_t_handler (vl_api_show_one_nsh_mapping_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_one_nsh_mapping_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls = 0;
+ u8 *tmp_str = 0;
+ u8 is_set = 0;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (lcm->nsh_map_index == (u32) ~ 0)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->nsh_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ tmp_str = format (0, "%s", ls->name);
+ is_set = 1;
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_ONE_NSH_MAPPING_REPLY,
+ ({
+ rmp->is_set = is_set;
+ strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
+ ARRAY_LEN(rmp->locator_set_name) - 1);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_show_one_pitr_t_handler (vl_api_show_one_pitr_t * mp)
+{
+ unix_shared_memory_queue_t *q = NULL;
+ vl_api_show_one_pitr_reply_t *rmp = NULL;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls = 0;
+ u8 *tmp_str = 0;
+ int rv = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (!lcm->lisp_pitr)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ tmp_str = format (0, "%s", ls->name);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_ONE_PITR_REPLY,
+ ({
+ rmp->status = lcm->lisp_pitr;
+ strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
+ ARRAY_LEN(rmp->locator_set_name) - 1);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_show_one_stats_enable_disable_t_handler
+ (vl_api_show_one_stats_enable_disable_t * mp)
+{
+ vl_api_show_one_stats_enable_disable_reply_t *rmp = NULL;
+ vnet_api_error_t rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_STATS_ENABLE_DISABLE_REPLY,
+ ({
+ rmp->is_en = vnet_lisp_stats_enable_disable_state ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_one_stats_enable_disable_t_handler
+ (vl_api_one_stats_enable_disable_t * mp)
+{
+ vl_api_one_enable_disable_reply_t *rmp = NULL;
+
+ vnet_api_error_t rv = vnet_lisp_stats_enable_disable (mp->is_en);
+ REPLY_MACRO (VL_API_ONE_ENABLE_DISABLE_REPLY);
+}
+
+static void
+lisp_fid_addr_to_api (fid_address_t * fid, u8 * dst, u8 * api_eid_type,
+ u8 * prefix_length)
+{
+ switch (fid_addr_type (fid))
+ {
+ case FID_ADDR_IP_PREF:
+ *prefix_length = fid_addr_prefix_length (fid);
+ if (fid_addr_ip_version (fid) == IP4)
+ {
+ *api_eid_type = 0; /* ipv4 type */
+ clib_memcpy (dst, &fid_addr_ippref (fid), 4);
+ }
+ else
+ {
+ *api_eid_type = 1; /* ipv6 type */
+ clib_memcpy (dst, &fid_addr_ippref (fid), 16);
+ }
+ break;
+ case FID_ADDR_MAC:
+ *api_eid_type = 2; /* l2 mac type */
+ mac_copy (dst, fid_addr_mac (fid));
+ break;
+ default:
+ ASSERT (0);
+ }
+}
+
+static void
+vl_api_one_stats_flush_t_handler (vl_api_one_stats_flush_t * mp)
+{
+ vl_api_one_stats_flush_reply_t *rmp;
+ u8 rv;
+
+ rv = vnet_lisp_flush_stats ();
+ REPLY_MACRO (VL_API_ONE_STATS_FLUSH_REPLY);
+}
+
+static void
+vl_api_one_stats_dump_t_handler (vl_api_one_stats_dump_t * mp)
+{
+ vl_api_one_stats_details_t *rmp;
+ lisp_api_stats_t *stats, *stat;
+ u8 rv = 0;
+
+ stats = vnet_lisp_get_stats ();
+ vec_foreach (stat, stats)
+ {
+ /* *INDENT-OFF* */
+ REPLY_DETAILS (VL_API_ONE_STATS_DETAILS,
+ ({
+ lisp_fid_addr_to_api (&stat->deid, rmp->deid, &rmp->eid_type,
+ &rmp->deid_pref_len);
+ lisp_fid_addr_to_api (&stat->seid, rmp->seid, &rmp->eid_type,
+ &rmp->seid_pref_len);
+ rmp->vni = clib_host_to_net_u32 (stat->vni);
+
+ rmp->is_ip4 = ip_addr_version (&stat->rmt_rloc) == IP4 ? 1 : 0;
+ ip_address_copy_addr (rmp->rloc, &stat->rmt_rloc);
+ ip_address_copy_addr (rmp->lloc, &stat->loc_rloc);
+
+ rmp->pkt_count = clib_host_to_net_u32 (stat->counters.packets);
+ rmp->bytes = clib_host_to_net_u32 (stat->counters.bytes);
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+static void
+ vl_api_one_add_del_l2_arp_entry_t_handler
+ (vl_api_one_add_del_l2_arp_entry_t * mp)
+{
+ vl_api_one_add_del_l2_arp_entry_reply_t *rmp;
+ int rv = 0;
+ gid_address_t _arp, *arp = &_arp;
+ memset (arp, 0, sizeof (*arp));
+
+ gid_address_type (arp) = GID_ADDR_ARP;
+ gid_address_arp_bd (arp) = clib_net_to_host_u32 (mp->bd);
+
+ /* vpp keeps ip4 addresses in network byte order */
+ ip_address_set (&gid_address_arp_ndp_ip (arp), &mp->ip4, IP4);
+
+ rv = vnet_lisp_add_del_l2_arp_ndp_entry (arp, mp->mac, mp->is_add);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_L2_ARP_ENTRY_REPLY);
+}
+
+static void
+vl_api_one_add_del_ndp_entry_t_handler (vl_api_one_add_del_ndp_entry_t * mp)
+{
+ vl_api_one_add_del_ndp_entry_reply_t *rmp;
+ int rv = 0;
+ gid_address_t _g, *g = &_g;
+ memset (g, 0, sizeof (*g));
+
+ gid_address_type (g) = GID_ADDR_NDP;
+ gid_address_ndp_bd (g) = clib_net_to_host_u32 (mp->bd);
+ ip_address_set (&gid_address_arp_ndp_ip (g), mp->ip6, IP6);
+
+ rv = vnet_lisp_add_del_l2_arp_ndp_entry (g, mp->mac, mp->is_add);
+
+ REPLY_MACRO (VL_API_ONE_ADD_DEL_NDP_ENTRY_REPLY);
+}
+
+static void
+vl_api_one_ndp_bd_get_t_handler (vl_api_one_ndp_bd_get_t * mp)
+{
+ vl_api_one_ndp_bd_get_reply_t *rmp;
+ int rv = 0;
+ u32 i = 0;
+ hash_pair_t *p;
+
+ u32 *bds = vnet_lisp_ndp_bds_get ();
+ u32 size = hash_elts (bds) * sizeof (u32);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_ONE_NDP_BD_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (hash_elts (bds));
+ hash_foreach_pair (p, bds,
+ ({
+ rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
+ }));
+ });
+ /* *INDENT-ON* */
+
+ hash_free (bds);
+}
+
+static void
+vl_api_one_l2_arp_bd_get_t_handler (vl_api_one_l2_arp_bd_get_t * mp)
+{
+ vl_api_one_l2_arp_bd_get_reply_t *rmp;
+ int rv = 0;
+ u32 i = 0;
+ hash_pair_t *p;
+
+ u32 *bds = vnet_lisp_l2_arp_bds_get ();
+ u32 size = hash_elts (bds) * sizeof (u32);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_ONE_L2_ARP_BD_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (hash_elts (bds));
+ hash_foreach_pair (p, bds,
+ ({
+ rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
+ }));
+ });
+ /* *INDENT-ON* */
+
+ hash_free (bds);
+}
+
+static void
+vl_api_one_l2_arp_entries_get_t_handler (vl_api_one_l2_arp_entries_get_t * mp)
+{
+ vl_api_one_l2_arp_entries_get_reply_t *rmp;
+ lisp_api_l2_arp_entry_t *entries = 0, *e;
+ u32 i = 0;
+ int rv = 0;
+
+ u32 bd = clib_net_to_host_u32 (mp->bd);
+
+ entries = vnet_lisp_l2_arp_entries_get_by_bd (bd);
+ u32 size = vec_len (entries) * sizeof (vl_api_one_l2_arp_entry_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_ONE_L2_ARP_ENTRIES_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (vec_len (entries));
+ vec_foreach (e, entries)
+ {
+ mac_copy (rmp->entries[i].mac, e->mac);
+ rmp->entries[i].ip4 = e->ip4;
+ i++;
+ }
+ });
+ /* *INDENT-ON* */
+
+ vec_free (entries);
+}
+
+static void
+ vl_api_one_map_register_fallback_threshold_t_handler
+ (vl_api_one_map_register_fallback_threshold_t * mp)
+{
+ vl_api_one_map_register_fallback_threshold_reply_t *rmp;
+ int rv = 0;
+
+ mp->value = clib_net_to_host_u32 (mp->value);
+ rv = vnet_lisp_map_register_fallback_threshold_set (mp->value);
+ REPLY_MACRO (VL_API_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY);
+}
+
+static void
+ vl_api_show_one_map_register_fallback_threshold_t_handler
+ (vl_api_show_one_map_register_fallback_threshold_t * mp)
+{
+ vl_api_show_one_map_register_fallback_threshold_reply_t *rmp;
+ int rv = 0;
+
+ u32 value = vnet_lisp_map_register_fallback_threshold_get ();
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY,
+ ({
+ rmp->value = clib_host_to_net_u32 (value);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_one_set_transport_protocol_t_handler
+ (vl_api_one_set_transport_protocol_t * mp)
+{
+ vl_api_one_set_transport_protocol_reply_t *rmp;
+ int rv = 0;
+
+ rv = vnet_lisp_set_transport_protocol (mp->protocol);
+
+ REPLY_MACRO (VL_API_ONE_SET_TRANSPORT_PROTOCOL_REPLY);
+}
+
+static void
+ vl_api_one_get_transport_protocol_t_handler
+ (vl_api_one_get_transport_protocol_t * mp)
+{
+ vl_api_one_get_transport_protocol_reply_t *rmp;
+ int rv = 0;
+ u8 proto = (u8) vnet_lisp_get_transport_protocol ();
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_ONE_GET_TRANSPORT_PROTOCOL_REPLY,
+ ({
+ rmp->protocol = proto;
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_one_ndp_entries_get_t_handler (vl_api_one_ndp_entries_get_t * mp)
+{
+ vl_api_one_ndp_entries_get_reply_t *rmp = 0;
+ lisp_api_ndp_entry_t *entries = 0, *e;
+ u32 i = 0;
+ int rv = 0;
+
+ u32 bd = clib_net_to_host_u32 (mp->bd);
+
+ entries = vnet_lisp_ndp_entries_get_by_bd (bd);
+ u32 size = vec_len (entries) * sizeof (vl_api_one_ndp_entry_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_ONE_NDP_ENTRIES_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (vec_len (entries));
+ vec_foreach (e, entries)
+ {
+ mac_copy (rmp->entries[i].mac, e->mac);
+ clib_memcpy (rmp->entries[i].ip6, e->ip6, 16);
+ i++;
+ }
+ });
+ /* *INDENT-ON* */
+
+ vec_free (entries);
+}
+
+/*
+ * one_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_one;
+#undef _
+}
+
+static clib_error_t *
+one_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (one_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/one_cli.c b/src/vnet/lisp-cp/one_cli.c
new file mode 100644
index 00000000..700bfd66
--- /dev/null
+++ b/src/vnet/lisp-cp/one_cli.c
@@ -0,0 +1,2158 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+static clib_error_t *
+lisp_show_adjacencies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_adjacency_t *adjs, *adj;
+ vlib_cli_output (vm, "%s %40s\n", "leid", "reid");
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 vni = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "vni %d", &vni))
+ ;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return 0;
+ }
+ }
+ unformat_free (line_input);
+
+ if (~0 == vni)
+ {
+ vlib_cli_output (vm, "error: no vni specified!");
+ return 0;
+ }
+
+ adjs = vnet_lisp_adjacencies_get_by_vni (vni);
+
+ vec_foreach (adj, adjs)
+ {
+ vlib_cli_output (vm, "%U %40U\n", format_gid_address, &adj->leid,
+ format_gid_address, &adj->reid);
+ }
+ vec_free (adjs);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_adjacencies_command) = {
+ .path = "show one adjacencies",
+ .short_help = "show one adjacencies",
+ .function = lisp_show_adjacencies_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_map_server_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = 0;
+ u8 is_add = 1, ip_set = 0;
+ ip_address_t ip;
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "%U", unformat_ip_address, &ip))
+ ip_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return 0;
+ }
+ }
+ unformat_free (line_input);
+
+ if (!ip_set)
+ {
+ vlib_cli_output (vm, "map-server ip address not set!");
+ return 0;
+ }
+
+ rv = vnet_lisp_add_del_map_server (&ip, is_add);
+ if (!rv)
+ vlib_cli_output (vm, "failed to %s map-server!",
+ is_add ? "add" : "delete");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_map_server_command) = {
+ .path = "one map-server",
+ .short_help = "one map-server add|del <ip>",
+ .function = lisp_add_del_map_server_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_local_eid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ gid_address_t eid;
+ gid_address_t *eids = 0;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ u32 locator_set_index = 0, map_index = 0;
+ uword *p;
+ vnet_lisp_add_del_mapping_args_t _a, *a = &_a;
+ int rv = 0;
+ u32 vni = 0;
+ u8 *key = 0;
+ u32 key_id = 0;
+
+ memset (&eid, 0, sizeof (eid));
+ memset (a, 0, sizeof (*a));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ gid_address_vni (&eid) = vni;
+ else if (unformat (line_input, "secret-key %_%v%_", &key))
+ ;
+ else if (unformat (line_input, "key-id %U", unformat_hmac_key_id,
+ &key_id))
+ ;
+ else if (unformat (line_input, "locator-set %_%v%_", &locator_set_name))
+ {
+ p = hash_get_mem (lcm->locator_set_index_by_name, locator_set_name);
+ if (!p)
+ {
+ error = clib_error_return (0, "locator-set %s doesn't exist",
+ locator_set_name);
+ goto done;
+ }
+ locator_set_index = p[0];
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+ /* XXX treat batch configuration */
+
+ if (GID_ADDR_SRC_DST == gid_address_type (&eid))
+ {
+ error =
+ clib_error_return (0, "src/dst is not supported for local EIDs!");
+ goto done;
+ }
+
+ if (key && (0 == key_id))
+ {
+ vlib_cli_output (vm, "invalid key_id!");
+ goto done;
+ }
+
+ gid_address_copy (&a->eid, &eid);
+ a->is_add = is_add;
+ a->locator_set_index = locator_set_index;
+ a->local = 1;
+ a->key = key;
+ a->key_id = key_id;
+
+ rv = vnet_lisp_add_del_local_mapping (a, &map_index);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s local mapping!",
+ is_add ? "add" : "delete");
+ }
+done:
+ vec_free (eids);
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ gid_address_free (&a->eid);
+ vec_free (a->key);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_local_eid_command) = {
+ .path = "one eid-table",
+ .short_help = "one eid-table add/del [vni <vni>] eid <eid> "
+ "locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
+ .function = lisp_add_del_local_eid_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_eid_table_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_add = 1, is_l2 = 0;
+ u32 vni = 0, dp_id = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "vni %d", &vni))
+ ;
+ else if (unformat (line_input, "vrf %d", &dp_id))
+ ;
+ else if (unformat (line_input, "bd %d", &dp_id))
+ is_l2 = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+ vnet_lisp_eid_table_map (vni, dp_id, is_l2, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_eid_table_map_command) = {
+ .path = "one eid-table map",
+ .short_help = "one eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
+ .function = lisp_eid_table_map_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_ndp_entry_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ int rc = 0;
+ u8 hw_addr[6], bd = 0;
+ ip6_address_t ip6;
+ u32 hw_addr_set = 0, ip_set = 0, is_add = 1;
+ gid_address_t _g, *g = &_g;
+
+ memset (&ip6, 0, sizeof (ip6));
+ memset (hw_addr, 0, sizeof (hw_addr));
+ memset (g, 0, sizeof (*g));
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "mac %U", unformat_mac_address, hw_addr))
+ hw_addr_set = 1;
+ else if (unformat (line_input, "ip %U", unformat_ip6_address, &ip6))
+ ip_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bd %d", &bd))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!ip_set || (!hw_addr_set && is_add))
+ {
+ vlib_cli_output (vm, "expected IP and MAC addresses!");
+ return 0;
+ }
+
+ /* build GID address */
+ ip_address_set (&gid_address_arp_ndp_ip (g), &ip6, IP6);
+ gid_address_ndp_bd (g) = bd;
+ gid_address_type (g) = GID_ADDR_NDP;
+ rc = vnet_lisp_add_del_l2_arp_ndp_entry (g, hw_addr, is_add);
+ if (rc)
+ clib_warning ("Failed to %s ndp entry!", is_add ? "add" : "delete");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_ndp_entry_command) = {
+ .path = "one ndp",
+ .short_help = "one ndp [del] bd <bd> mac <mac> ip <ipv6>",
+ .function = lisp_add_del_ndp_entry_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_l2_arp_entry_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ int rc = 0;
+ u8 hw_addr[6], bd = 0;
+ ip4_address_t ip4;
+ u32 hw_addr_set = 0, ip_set = 0, is_add = 1;
+ gid_address_t _arp, *arp = &_arp;
+
+ memset (&ip4, 0, sizeof (ip4));
+ memset (hw_addr, 0, sizeof (hw_addr));
+ memset (arp, 0, sizeof (*arp));
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "mac %U", unformat_mac_address, hw_addr))
+ hw_addr_set = 1;
+ else if (unformat (line_input, "ip %U", unformat_ip4_address, &ip4))
+ ip_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bd %d", &bd))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!ip_set || (!hw_addr_set && is_add))
+ {
+ vlib_cli_output (vm, "expected IP and MAC addresses!");
+ return 0;
+ }
+
+ /* build GID address */
+ gid_address_arp_ip4 (arp) = ip4;
+ gid_address_arp_bd (arp) = bd;
+ gid_address_type (arp) = GID_ADDR_ARP;
+ rc = vnet_lisp_add_del_l2_arp_ndp_entry (arp, hw_addr, is_add);
+ if (rc)
+ clib_warning ("Failed to %s l2 arp entry!", is_add ? "add" : "delete");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_l2_arp_entry_command) = {
+ .path = "one l2 arp",
+ .short_help = "one l2 arp [del] bd <bd> mac <mac> ip <ipv4>",
+ .function = lisp_add_del_l2_arp_entry_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 *ht = vnet_lisp_l2_arp_bds_get ();
+ lisp_api_l2_arp_entry_t *entries, *e;
+ hash_pair_t *p;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, ht,
+ ({
+ entries = vnet_lisp_l2_arp_entries_get_by_bd (p->key);
+ vlib_cli_output (vm, "Table: %d", p->key);
+
+ vec_foreach (e, entries)
+ {
+ vlib_cli_output (vm, "\t%U -> %U", format_ip4_address, &e->ip4,
+ format_mac_address, e->mac);
+ }
+ vec_free (entries);
+ }));
+ /* *INDENT-ON* */
+
+ hash_free (ht);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_l2_arp_entries_command) = {
+ .path = "show one l2 arp entries",
+ .short_help = "Show ONE L2 ARP entries",
+ .function = lisp_show_l2_arp_entries_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 *ht = vnet_lisp_ndp_bds_get ();
+ lisp_api_ndp_entry_t *entries, *e;
+ hash_pair_t *p;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, ht,
+ ({
+ entries = vnet_lisp_ndp_entries_get_by_bd (p->key);
+ vlib_cli_output (vm, "Table: %d", p->key);
+
+ vec_foreach (e, entries)
+ {
+ vlib_cli_output (vm, "\t%U -> %U", format_ip6_address, &e->ip6,
+ format_mac_address, e->mac);
+ }
+ vec_free (entries);
+ }));
+ /* *INDENT-ON* */
+
+ hash_free (ht);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_ndp_entries_command) = {
+ .path = "show one ndp entries",
+ .short_help = "Show ONE NDP entries",
+ .function = lisp_show_ndp_entries_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * Handler for add/del remote mapping CLI.
+ *
+ * @param vm vlib context
+ * @param input input from user
+ * @param cmd cmd
+ * @return pointer to clib error structure
+ */
+static clib_error_t *
+lisp_add_del_remote_mapping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1, del_all = 0;
+ locator_t rloc, *rlocs = 0, *curr_rloc = 0;
+ gid_address_t eid;
+ u8 eid_set = 0;
+ u32 vni, action = ~0, p, w;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&eid, 0, sizeof (eid));
+ memset (&rloc, 0, sizeof (rloc));
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-all"))
+ del_all = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ ;
+ else if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ eid_set = 1;
+ else if (unformat (line_input, "vni %u", &vni))
+ {
+ gid_address_vni (&eid) = vni;
+ }
+ else if (unformat (line_input, "p %d w %d", &p, &w))
+ {
+ if (!curr_rloc)
+ {
+ clib_warning
+ ("No RLOC configured for setting priority/weight!");
+ goto done;
+ }
+ curr_rloc->priority = p;
+ curr_rloc->weight = w;
+ }
+ else if (unformat (line_input, "rloc %U", unformat_ip_address,
+ &gid_address_ip (&rloc.address)))
+ {
+ /* since rloc is stored in ip prefix we need to set prefix length */
+ ip_prefix_t *pref = &gid_address_ippref (&rloc.address);
+
+ u8 version = gid_address_ip_version (&rloc.address);
+ ip_prefix_len (pref) = ip_address_max_len (version);
+
+ vec_add1 (rlocs, rloc);
+ curr_rloc = &rlocs[vec_len (rlocs) - 1];
+ }
+ else if (unformat (line_input, "action %U",
+ unformat_negative_mapping_action, &action))
+ ;
+ else
+ {
+ clib_warning ("parse error");
+ goto done;
+ }
+ }
+
+ if (!del_all && !eid_set)
+ {
+ clib_warning ("missing eid!");
+ goto done;
+ }
+
+ if (!del_all)
+ {
+ if (is_add && (~0 == action) && 0 == vec_len (rlocs))
+ {
+ clib_warning ("no action set for negative map-reply!");
+ goto done;
+ }
+ }
+ else
+ {
+ vnet_lisp_clear_all_remote_adjacencies ();
+ goto done;
+ }
+
+ /* if it's a delete, clean forwarding */
+ if (!is_add)
+ {
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (a[0]));
+ gid_address_copy (&a->reid, &eid);
+ if (vnet_lisp_add_del_adjacency (a))
+ {
+ clib_warning ("failed to delete adjacency!");
+ goto done;
+ }
+ }
+
+ /* add as static remote mapping, i.e., not authoritative and infinite
+ * ttl */
+ if (is_add)
+ {
+ vnet_lisp_add_del_mapping_args_t _map_args, *map_args = &_map_args;
+ memset (map_args, 0, sizeof (map_args[0]));
+ gid_address_copy (&map_args->eid, &eid);
+ map_args->action = action;
+ map_args->is_static = 1;
+ map_args->authoritative = 0;
+ map_args->ttl = ~0;
+ rv = vnet_lisp_add_mapping (map_args, rlocs, NULL, NULL);
+ }
+ else
+ rv = vnet_lisp_del_mapping (&eid, NULL);
+
+ if (rv)
+ clib_warning ("failed to %s remote mapping!", is_add ? "add" : "delete");
+
+done:
+ vec_free (rlocs);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_remote_mapping_command) = {
+ .path = "one remote-mapping",
+ .short_help =
+ "one remote-mapping add|del [del-all] vni <vni> "
+ "eid <est-eid> [action <no-action|natively-forward|"
+ "send-map-request|drop>] rloc <dst-locator> p <prio> w <weight> "
+ "[rloc <dst-locator> ... ]",
+ .function = lisp_add_del_remote_mapping_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * Handler for add/del adjacency CLI.
+ */
+static clib_error_t *
+lisp_add_del_adjacency_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_lisp_add_del_adjacency_args_t _a, *a = &_a;
+ u8 is_add = 1;
+ ip_prefix_t *reid_ippref, *leid_ippref;
+ gid_address_t leid, reid;
+ u8 *dmac = gid_address_mac (&reid);
+ u8 *smac = gid_address_mac (&leid);
+ u8 reid_set = 0, leid_set = 0;
+ u32 vni;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&reid, 0, sizeof (reid));
+ memset (&leid, 0, sizeof (leid));
+
+ leid_ippref = &gid_address_ippref (&leid);
+ reid_ippref = &gid_address_ippref (&reid);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ ;
+ else if (unformat (line_input, "reid %U",
+ unformat_ip_prefix, reid_ippref))
+ {
+ gid_address_type (&reid) = GID_ADDR_IP_PREFIX;
+ reid_set = 1;
+ }
+ else if (unformat (line_input, "reid %U", unformat_mac_address, dmac))
+ {
+ gid_address_type (&reid) = GID_ADDR_MAC;
+ reid_set = 1;
+ }
+ else if (unformat (line_input, "vni %u", &vni))
+ {
+ gid_address_vni (&leid) = vni;
+ gid_address_vni (&reid) = vni;
+ }
+ else if (unformat (line_input, "leid %U",
+ unformat_ip_prefix, leid_ippref))
+ {
+ gid_address_type (&leid) = GID_ADDR_IP_PREFIX;
+ leid_set = 1;
+ }
+ else if (unformat (line_input, "leid %U", unformat_mac_address, smac))
+ {
+ gid_address_type (&leid) = GID_ADDR_MAC;
+ leid_set = 1;
+ }
+ else
+ {
+ clib_warning ("parse error");
+ goto done;
+ }
+ }
+
+ if (!reid_set || !leid_set)
+ {
+ clib_warning ("missing remote or local eid!");
+ goto done;
+ }
+
+ if ((gid_address_type (&leid) != gid_address_type (&reid))
+ || (gid_address_type (&reid) == GID_ADDR_IP_PREFIX
+ && ip_prefix_version (reid_ippref)
+ != ip_prefix_version (leid_ippref)))
+ {
+ clib_warning ("remote and local EIDs are of different types!");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (a[0]));
+ gid_address_copy (&a->leid, &leid);
+ gid_address_copy (&a->reid, &reid);
+ a->is_add = is_add;
+
+ if (vnet_lisp_add_del_adjacency (a))
+ clib_warning ("failed to %s adjacency!", is_add ? "add" : "delete");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_adjacency_command) = {
+ .path = "one adjacency",
+ .short_help = "one adjacency add|del vni <vni> reid <remote-eid> "
+ "leid <local-eid>",
+ .function = lisp_add_del_adjacency_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_map_request_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _i, *i = &_i;
+ map_request_mode_t mr_mode = _MR_MODE_MAX;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, i))
+ return 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "dst-only"))
+ mr_mode = MR_MODE_DST_ONLY;
+ else if (unformat (i, "src-dst"))
+ mr_mode = MR_MODE_SRC_DST;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ goto done;
+ }
+ }
+
+ if (_MR_MODE_MAX == mr_mode)
+ {
+ clib_warning ("No map request mode entered!");
+ goto done;
+ }
+
+ vnet_lisp_set_map_request_mode (mr_mode);
+
+done:
+ unformat_free (i);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_request_mode_command) = {
+ .path = "one map-request mode",
+ .short_help = "one map-request mode dst-only|src-dst",
+ .function = lisp_map_request_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static u8 *
+format_lisp_map_request_mode (u8 * s, va_list * args)
+{
+ u32 mode = va_arg (*args, u32);
+
+ switch (mode)
+ {
+ case 0:
+ return format (0, "dst-only");
+ case 1:
+ return format (0, "src-dst");
+ }
+ return 0;
+}
+
+static clib_error_t *
+lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "map-request mode: %U", format_lisp_map_request_mode,
+ vnet_lisp_get_map_request_mode ());
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_map_request_mode_command) = {
+ .path = "show one map-request mode",
+ .short_help = "show one map-request mode",
+ .function = lisp_show_map_request_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_msmr_t *mr;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ vec_foreach (mr, lcm->map_resolvers)
+ {
+ vlib_cli_output (vm, "%U", format_ip_address, &mr->address);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_map_resolvers_command) = {
+ .path = "show one map-resolvers",
+ .short_help = "show one map-resolvers",
+ .function = lisp_show_map_resolvers_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_nsh_set_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 locator_name_set = 0;
+ u8 *locator_set_name = 0;
+ u8 is_add = 1;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ls %_%v%_", &locator_set_name))
+ locator_name_set = 1;
+ else if (unformat (line_input, "disable"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!locator_name_set)
+ {
+ clib_warning ("No locator set specified!");
+ goto done;
+ }
+
+ rv = vnet_lisp_nsh_set_locator_set (locator_set_name, is_add);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s NSH mapping!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_nsh_set_locator_set_command) = {
+ .path = "one nsh-mapping",
+ .short_help = "one nsh-mapping [del] ls <locator-set-name>",
+ .function = lisp_nsh_set_locator_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
+ unformat_input_t *
+ input,
+ vlib_cli_command_t *
+ cmd)
+{
+ u32 val = vnet_lisp_map_register_fallback_threshold_get ();
+ vlib_cli_output (vm, "map register fallback treshold value: %d", val);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_fallback_threshold_show_command) = {
+ .path = "show one map-register fallback-threshold",
+ .short_help = "show one map-register fallback-threshold",
+ .function = lisp_map_register_fallback_threshold_show_command_fn,
+};
+
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_fallback_threshold_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u32 val = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &val))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ rv = vnet_lisp_map_register_fallback_threshold_set (val);
+ if (rv)
+ {
+ error = clib_error_return (0, "setting fallback threshold failed!");
+ }
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_fallback_threshold_command) = {
+ .path = "one map-register fallback-threshold",
+ .short_help = "one map-register fallback-threshold <count>",
+ .function = lisp_map_register_fallback_threshold_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 locator_name_set = 0;
+ u8 *locator_set_name = 0;
+ u8 is_add = 1;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ls %_%v%_", &locator_set_name))
+ locator_name_set = 1;
+ else if (unformat (line_input, "disable"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!locator_name_set)
+ {
+ clib_warning ("No locator set specified!");
+ goto done;
+ }
+ rv = vnet_lisp_pitr_set_locator_set (locator_set_name, is_add);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s pitr!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_pitr_set_locator_set_command) = {
+ .path = "one pitr",
+ .short_help = "one pitr [disable] ls <locator-set-name>",
+ .function = lisp_pitr_set_locator_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_pitr_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls;
+ u8 *tmp_str = 0;
+
+ vlib_cli_output (vm, "%=20s%=16s",
+ "pitr", lcm->lisp_pitr ? "locator-set" : "");
+
+ if (!lcm->lisp_pitr)
+ {
+ vlib_cli_output (vm, "%=20s", "disable");
+ return 0;
+ }
+
+ if (~0 == lcm->pitr_map_index)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls =
+ pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index);
+ tmp_str = format (0, "%s", ls->name);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ vlib_cli_output (vm, "%=20s%=16s", "enable", tmp_str);
+
+ vec_free (tmp_str);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_pitr_command) = {
+ .path = "show one pitr",
+ .short_help = "Show pitr",
+ .function = lisp_show_pitr_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_eid_entry (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ lisp_cp_main_t *lcm = va_arg (*args, lisp_cp_main_t *);
+ mapping_t *mapit = va_arg (*args, mapping_t *);
+ locator_set_t *ls = va_arg (*args, locator_set_t *);
+ gid_address_t *gid = &mapit->eid;
+ u32 ttl = mapit->ttl;
+ u8 aut = mapit->authoritative;
+ u32 *loc_index;
+ u8 first_line = 1;
+ u8 *loc;
+
+ u8 *type = ls->local ? format (0, "local(%s)", ls->name)
+ : format (0, "remote");
+
+ if (vec_len (ls->locator_indices) == 0)
+ {
+ s = format (s, "%-35U%-20saction:%-30U%-20u%-u", format_gid_address,
+ gid, type, format_negative_mapping_action, mapit->action,
+ ttl, aut);
+ }
+ else
+ {
+ vec_foreach (loc_index, ls->locator_indices)
+ {
+ locator_t *l = pool_elt_at_index (lcm->locator_pool, loc_index[0]);
+ if (l->local)
+ loc = format (0, "%U", format_vnet_sw_if_index_name, vnm,
+ l->sw_if_index);
+ else
+ loc = format (0, "%U", format_ip_address,
+ &gid_address_ip (&l->address));
+
+ if (first_line)
+ {
+ s = format (s, "%-35U%-20s%-30v%-20u%-u\n", format_gid_address,
+ gid, type, loc, ttl, aut);
+ first_line = 0;
+ }
+ else
+ s = format (s, "%55s%v\n", "", loc);
+ }
+ }
+ return s;
+}
+
+static clib_error_t *
+lisp_show_eid_table_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *mapit;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 mi;
+ gid_address_t eid;
+ u8 print_all = 1;
+ u8 filter = 0;
+ clib_error_t *error = NULL;
+
+ memset (&eid, 0, sizeof (eid));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "eid %U", unformat_gid_address, &eid))
+ print_all = 0;
+ else if (unformat (line_input, "local"))
+ filter = 1;
+ else if (unformat (line_input, "remote"))
+ filter = 2;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vlib_cli_output (vm, "%-35s%-20s%-30s%-20s%-s",
+ "EID", "type", "locators", "ttl", "autoritative");
+
+ if (print_all)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (mapit, lcm->mapping_pool,
+ ({
+ if (mapit->pitr_set || mapit->nsh_set)
+ continue;
+
+ locator_set_t * ls = pool_elt_at_index (lcm->locator_set_pool,
+ mapit->locator_set_index);
+ if (filter && !((1 == filter && ls->local) ||
+ (2 == filter && !ls->local)))
+ {
+ continue;
+ }
+ vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
+ lcm, mapit, ls);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &eid);
+ if ((u32) ~ 0 == mi)
+ goto done;
+
+ mapit = pool_elt_at_index (lcm->mapping_pool, mi);
+ locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool,
+ mapit->locator_set_index);
+
+ if (filter && !((1 == filter && ls->local) ||
+ (2 == filter && !ls->local)))
+ {
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U,", format_eid_entry, lcm->vnet_main,
+ lcm, mapit, ls);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_cp_show_eid_table_command) = {
+ .path = "show one eid-table",
+ .short_help = "Shows EID table",
+ .function = lisp_show_eid_table_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ error = clib_error_return (0, "state not set");
+ goto done;
+ }
+
+ vnet_lisp_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_cp_enable_disable_command) = {
+ .path = "one",
+ .short_help = "one [enable|disable]",
+ .function = lisp_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_set_ttl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 ttl = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%u", &ttl))
+ is_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ vlib_cli_output (vm, "expected integer value for TTL!");
+ goto done;
+ }
+
+ vnet_lisp_map_register_set_ttl (ttl);
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_set_ttl_command) = {
+ .path = "one map-register ttl",
+ .short_help = "one map-register ttl",
+ .function = lisp_map_register_set_ttl_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_show_ttl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 ttl = vnet_lisp_map_register_get_ttl ();
+
+ vlib_cli_output (vm, "map-register TTL: %u", ttl);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_show_ttl_command) = {
+ .path = "show one map-register ttl",
+ .short_help = "show one map-register ttl",
+ .function = lisp_map_register_show_ttl_command_fn,
+};
+
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ vlib_cli_output (vm, "state not set!");
+ goto done;
+ }
+
+ vnet_lisp_map_register_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_enable_disable_command) = {
+ .path = "one map-register",
+ .short_help = "one map-register [enable|disable]",
+ .function = lisp_map_register_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_enabled = 0;
+ u8 is_set = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ {
+ is_set = 1;
+ is_enabled = 1;
+ }
+ else if (unformat (line_input, "disable"))
+ is_set = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ line_input);
+ goto done;
+ }
+ }
+
+ if (!is_set)
+ {
+ vlib_cli_output (vm, "state not set!");
+ goto done;
+ }
+
+ vnet_lisp_rloc_probe_enable_disable (is_enabled);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_rloc_probe_enable_disable_command) = {
+ .path = "one rloc-probe",
+ .short_help = "one rloc-probe [enable|disable]",
+ .function = lisp_rloc_probe_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_lisp_status (u8 * s, va_list * args)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ return format (s, "%s", lcm->is_enabled ? "enabled" : "disabled");
+}
+
+static clib_error_t *
+lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *msg = 0;
+ msg = format (msg, "feature: %U\ngpe: %U\n",
+ format_lisp_status, format_vnet_lisp_gpe_status);
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_status_command) = {
+ .path = "show one status",
+ .short_help = "show one status",
+ .function = lisp_show_status_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ hash_pair_t *p;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ uword *vni_table = 0;
+ u8 is_l2 = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "l2"))
+ {
+ vni_table = lcm->bd_id_by_vni;
+ is_l2 = 1;
+ }
+ else if (unformat (line_input, "l3"))
+ {
+ vni_table = lcm->table_id_by_vni;
+ is_l2 = 0;
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!vni_table)
+ {
+ vlib_cli_output (vm, "Error: expected l2|l3 param!\n");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, vni_table,
+ ({
+ vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
+ }));
+ /* *INDENT-ON* */
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_eid_table_map_command) = {
+ .path = "show one eid-table map",
+ .short_help = "show one eid-table l2|l3",
+ .function = lisp_show_eid_table_map_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ vnet_main_t *vnm = lgm->vnet_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ locator_t locator, *locators = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = 0;
+ int rv = 0;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add %_%v%_", &locator_set_name))
+ is_add = 1;
+ else if (unformat (line_input, "del %_%v%_", &locator_set_name))
+ is_add = 0;
+ else if (unformat (line_input, "iface %U p %d w %d",
+ unformat_vnet_sw_interface, vnm,
+ &locator.sw_if_index, &locator.priority,
+ &locator.weight))
+ {
+ locator.local = 1;
+ vec_add1 (locators, locator);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ a->name = locator_set_name;
+ a->locators = locators;
+ a->is_add = is_add;
+ a->local = 1;
+
+ rv = vnet_lisp_add_del_locator_set (a, &ls_index);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s locator-set!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ vec_free (locators);
+ if (locator_set_name)
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_cp_add_del_locator_set_command) = {
+ .path = "one locator-set",
+ .short_help = "one locator-set add/del <name> [iface <iface-name> "
+ "p <priority> w <weight>]",
+ .function = lisp_add_del_locator_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ vnet_main_t *vnm = lgm->vnet_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+ u8 *locator_set_name = 0;
+ u8 locator_set_name_set = 0;
+ locator_t locator, *locators = 0;
+ vnet_lisp_add_del_locator_set_args_t _a, *a = &_a;
+ u32 ls_index = 0;
+
+ memset (&locator, 0, sizeof (locator));
+ memset (a, 0, sizeof (a[0]));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "locator-set %_%v%_", &locator_set_name))
+ locator_set_name_set = 1;
+ else if (unformat (line_input, "iface %U p %d w %d",
+ unformat_vnet_sw_interface, vnm,
+ &locator.sw_if_index, &locator.priority,
+ &locator.weight))
+ {
+ locator.local = 1;
+ vec_add1 (locators, locator);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (!locator_set_name_set)
+ {
+ error = clib_error_return (0, "locator_set name not set!");
+ goto done;
+ }
+
+ a->name = locator_set_name;
+ a->locators = locators;
+ a->is_add = is_add;
+ a->local = 1;
+
+ vnet_lisp_add_del_locator (a, 0, &ls_index);
+
+done:
+ vec_free (locators);
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_cp_add_del_locator_in_set_command) = {
+ .path = "one locator",
+ .short_help = "one locator add/del locator-set <name> iface <iface-name> "
+ "p <priority> w <weight>",
+ .function = lisp_add_del_locator_in_set_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ locator_set_t *lsit;
+ locator_t *loc;
+ u32 *locit;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
+ "Priority", "Weight");
+
+ /* *INDENT-OFF* */
+ pool_foreach (lsit, lcm->locator_set_pool,
+ ({
+ u8 * msg = 0;
+ int next_line = 0;
+ if (lsit->local)
+ {
+ msg = format (msg, "%v", lsit->name);
+ }
+ else
+ {
+ msg = format (msg, "<%s-%d>", "remote", lsit - lcm->locator_set_pool);
+ }
+ vec_foreach (locit, lsit->locator_indices)
+ {
+ if (next_line)
+ {
+ msg = format (msg, "%16s", " ");
+ }
+ loc = pool_elt_at_index (lcm->locator_pool, locit[0]);
+ if (loc->local)
+ msg = format (msg, "%16d%16d%16d\n", loc->sw_if_index, loc->priority,
+ loc->weight);
+ else
+ msg = format (msg, "%16U%16d%16d\n", format_ip_address,
+ &gid_address_ip(&loc->address), loc->priority,
+ loc->weight);
+ next_line = 1;
+ }
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_cp_show_locator_sets_command) = {
+ .path = "show one locator-set",
+ .short_help = "Shows locator-sets",
+ .function = lisp_cp_show_locator_sets_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_map_resolver_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1, addr_set = 0;
+ ip_address_t ip_addr;
+ clib_error_t *error = 0;
+ int rv = 0;
+ vnet_lisp_add_del_map_resolver_args_t _a, *a = &_a;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "%U", unformat_ip_address, &ip_addr))
+ addr_set = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (!addr_set)
+ {
+ error = clib_error_return (0, "Map-resolver address must be set!");
+ goto done;
+ }
+
+ a->is_add = is_add;
+ a->address = ip_addr;
+ rv = vnet_lisp_add_del_map_resolver (a);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s map-resolver!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_map_resolver_command) = {
+ .path = "one map-resolver",
+ .short_help = "one map-resolver add/del <ip_address>",
+ .function = lisp_add_del_map_resolver_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+lisp_add_del_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u8 *locator_set_name = 0;
+ clib_error_t *error = 0;
+ int rv = 0;
+ vnet_lisp_add_del_mreq_itr_rloc_args_t _a, *a = &_a;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add %_%v%_", &locator_set_name))
+ is_add = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ a->is_add = is_add;
+ a->locator_set_name = locator_set_name;
+ rv = vnet_lisp_add_del_mreq_itr_rlocs (a);
+ if (0 != rv)
+ {
+ error = clib_error_return (0, "failed to %s map-request itr-rlocs!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ vec_free (locator_set_name);
+ unformat_free (line_input);
+ return error;
+
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_add_del_map_request_command) = {
+ .path = "one map-request itr-rlocs",
+ .short_help = "one map-request itr-rlocs add/del <locator_set_name>",
+ .function = lisp_add_del_mreq_itr_rlocs_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ locator_set_t *loc_set;
+
+ vlib_cli_output (vm, "%=20s", "itr-rlocs");
+
+ if (~0 == lcm->mreq_itr_rlocs)
+ {
+ return 0;
+ }
+
+ loc_set = pool_elt_at_index (lcm->locator_set_pool, lcm->mreq_itr_rlocs);
+
+ vlib_cli_output (vm, "%=20s", loc_set->name);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_map_request_command) = {
+ .path = "show one map-request itr-rlocs",
+ .short_help = "Shows map-request itr-rlocs",
+ .function = lisp_show_mreq_itr_rlocs_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_add = 1, ip_set = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ ip_address_t ip;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_ip_address, &ip))
+ ip_set = 1;
+ else if (unformat (line_input, "disable"))
+ is_add = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error");
+ goto done;
+ }
+ }
+
+ if (!ip_set)
+ {
+ clib_warning ("No petr IP specified!");
+ goto done;
+ }
+
+ if (vnet_lisp_use_petr (&ip, is_add))
+ {
+ error = clib_error_return (0, "failed to %s petr!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_use_petr_set_locator_set_command) = {
+ .path = "one use-petr",
+ .short_help = "one use-petr [disable] <petr-ip>",
+ .function = lisp_use_petr_set_locator_set_command_fn,
+};
+
+static clib_error_t *
+lisp_show_petr_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+ mapping_t *m;
+ locator_set_t *ls;
+ locator_t *loc;
+ u8 *tmp_str = 0;
+ u8 use_petr = lcm->flags & LISP_FLAG_USE_PETR;
+ vlib_cli_output (vm, "%=20s%=16s", "petr", use_petr ? "ip" : "");
+
+ if (!use_petr)
+ {
+ vlib_cli_output (vm, "%=20s", "disable");
+ return 0;
+ }
+
+ if (~0 == lcm->petr_map_index)
+ {
+ tmp_str = format (0, "N/A");
+ }
+ else
+ {
+ m = pool_elt_at_index (lcm->mapping_pool, lcm->petr_map_index);
+ if (~0 != m->locator_set_index)
+ {
+ ls = pool_elt_at_index(lcm->locator_set_pool, m->locator_set_index);
+ loc = pool_elt_at_index (lcm->locator_pool, ls->locator_indices[0]);
+ tmp_str = format (0, "%U", format_ip_address, &loc->address);
+ }
+ else
+ {
+ tmp_str = format (0, "N/A");
+ }
+ }
+ vec_add1 (tmp_str, 0);
+
+ vlib_cli_output (vm, "%=20s%=16s", "enable", tmp_str);
+
+ vec_free (tmp_str);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_petr_command) = {
+ .path = "show one petr",
+ .short_help = "Show petr",
+ .function = lisp_show_petr_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_map_servers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_msmr_t *ms;
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ vec_foreach (ms, lcm->map_servers)
+ {
+ vlib_cli_output (vm, "%U", format_ip_address, &ms->address);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_map_servers_command) = {
+ .path = "show one map-servers",
+ .short_help = "show one map servers",
+ .function = lisp_show_map_servers_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_map_register_state_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *msg = 0;
+ u8 is_enabled = vnet_lisp_map_register_state_get ();
+
+ msg = format (msg, "%s\n", is_enabled ? "enabled" : "disabled");
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_map_register_state_command) = {
+ .path = "show one map-register state",
+ .short_help = "show one map-register state",
+ .function = lisp_show_map_register_state_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_rloc_probe_state_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *msg = 0;
+ u8 is_enabled = vnet_lisp_rloc_probe_state_get ();
+
+ msg = format (msg, "%s\n", is_enabled ? "enabled" : "disabled");
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_rloc_probe_state_command) = {
+ .path = "show one rloc state",
+ .short_help = "show one RLOC state",
+ .function = lisp_show_rloc_probe_state_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_stats_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_enabled = vnet_lisp_stats_enable_disable_state ();
+ vlib_cli_output (vm, "%s\n", is_enabled ? "enabled" : "disabled");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_stats_command) = {
+ .path = "show one statistics status",
+ .short_help = "show ONE statistics enable/disable status",
+ .function = lisp_show_stats_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_show_stats_details_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_api_stats_t *stat, *stats = vnet_lisp_get_stats ();
+
+ if (vec_len (stats) > 0)
+ vlib_cli_output (vm,
+ "[src-EID, dst-EID] [loc-rloc, rmt-rloc] count bytes\n");
+ else
+ vlib_cli_output (vm, "No statistics found.\n");
+
+ vec_foreach (stat, stats)
+ {
+ vlib_cli_output (vm, "[%U, %U] [%U, %U] %7u %7u\n",
+ format_fid_address, &stat->seid,
+ format_fid_address, &stat->deid,
+ format_ip_address, &stat->loc_rloc,
+ format_ip_address, &stat->rmt_rloc,
+ stat->counters.packets, stat->counters.bytes);
+ }
+ vec_free (stats);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_show_stats_details_command) = {
+ .path = "show one statistics details",
+ .short_help = "show ONE statistics",
+ .function = lisp_show_stats_details_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_stats_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 enable = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ enable = 1;
+ else if (unformat (line_input, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("Error: expected enable/disable!");
+ goto done;
+ }
+ }
+ vnet_lisp_stats_enable_disable (enable);
+done:
+ unformat_free (line_input);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_stats_enable_disable_command) = {
+ .path = "one statistics",
+ .short_help = "enable/disable ONE statistics collecting",
+ .function = lisp_stats_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_stats_flush_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_lisp_flush_stats ();
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_stats_flush_command) = {
+ .path = "one statistics flush",
+ .short_help = "Flush ONE statistics",
+ .function = lisp_stats_flush_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/packets.c b/src/vnet/lisp-cp/packets.c
new file mode 100644
index 00000000..25086b8e
--- /dev/null
+++ b/src/vnet/lisp-cp/packets.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-cp/packets.h>
+#include <vnet/lisp-cp/lisp_cp_messages.h>
+#include <vnet/udp/udp_packet.h>
+
+/* Returns IP ID for the packet */
+/* static u16 ip_id = 0;
+static inline u16
+get_IP_ID()
+{
+ ip_id++;
+ return (ip_id);
+} */
+
+u16
+udp_ip4_checksum (const void *b, u32 len, u8 * src, u8 * dst)
+{
+ const u16 *buf = b;
+ u16 *ip_src = (u16 *) src;
+ u16 *ip_dst = (u16 *) dst;
+ u32 length = len;
+ u32 sum = 0;
+
+ while (len > 1)
+ {
+ sum += *buf++;
+ if (sum & 0x80000000)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ len -= 2;
+ }
+
+ /* Add the padding if the packet length is odd */
+ if (len & 1)
+ sum += *((u8 *) buf);
+
+ /* Add the pseudo-header */
+ sum += *(ip_src++);
+ sum += *ip_src;
+
+ sum += *(ip_dst++);
+ sum += *ip_dst;
+
+ sum += clib_host_to_net_u16 (IP_PROTOCOL_UDP);
+ sum += clib_host_to_net_u16 (length);
+
+ /* Add the carries */
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ /* Return the one's complement of sum */
+ return ((u16) (~sum));
+}
+
+u16
+udp_ip6_checksum (ip6_header_t * ip6, udp_header_t * up, u32 len)
+{
+ size_t i;
+ register const u16 *sp;
+ u32 sum;
+ union
+ {
+ struct
+ {
+ ip6_address_t ph_src;
+ ip6_address_t ph_dst;
+ u32 ph_len;
+ u8 ph_zero[3];
+ u8 ph_nxt;
+ } ph;
+ u16 pa[20];
+ } phu;
+
+ /* pseudo-header */
+ memset (&phu, 0, sizeof (phu));
+ phu.ph.ph_src = ip6->src_address;
+ phu.ph.ph_dst = ip6->dst_address;
+ phu.ph.ph_len = clib_host_to_net_u32 (len);
+ phu.ph.ph_nxt = IP_PROTOCOL_UDP;
+
+ sum = 0;
+ for (i = 0; i < sizeof (phu.pa) / sizeof (phu.pa[0]); i++)
+ sum += phu.pa[i];
+
+ sp = (const u16 *) up;
+
+ for (i = 0; i < (len & ~1); i += 2)
+ sum += *sp++;
+
+ if (len & 1)
+ sum += clib_host_to_net_u16 ((*(const u8 *) sp) << 8);
+
+ while (sum > 0xffff)
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = ~sum & 0xffff;
+
+ return (sum);
+}
+
+u16
+udp_checksum (udp_header_t * uh, u32 udp_len, void *ih, u8 version)
+{
+ switch (version)
+ {
+ case IP4:
+ return (udp_ip4_checksum (uh, udp_len,
+ ((ip4_header_t *) ih)->src_address.as_u8,
+ ((ip4_header_t *) ih)->dst_address.as_u8));
+ case IP6:
+ return (udp_ip6_checksum (ih, uh, udp_len));
+ default:
+ return ~0;
+ }
+}
+
+void *
+pkt_push_udp (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp)
+{
+ udp_header_t *uh;
+ u16 udp_len = sizeof (udp_header_t) + vlib_buffer_length_in_chain (vm, b);
+
+ uh = vlib_buffer_push_uninit (b, sizeof (*uh));
+
+ uh->src_port = clib_host_to_net_u16 (sp);
+ uh->dst_port = clib_host_to_net_u16 (dp);
+ uh->length = clib_host_to_net_u16 (udp_len);
+ uh->checksum = 0;
+ return uh;
+}
+
+void *
+pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src,
+ ip_address_t * dst, u32 proto, u8 csum_offload)
+{
+ if (ip_addr_version (src) != ip_addr_version (dst))
+ {
+ clib_warning ("src %U and dst %U IP have different AFI! Discarding!",
+ format_ip_address, src, format_ip_address, dst);
+ return 0;
+ }
+
+ switch (ip_addr_version (src))
+ {
+ case IP4:
+ return vlib_buffer_push_ip4 (vm, b, &ip_addr_v4 (src),
+ &ip_addr_v4 (dst), proto, csum_offload);
+ break;
+ case IP6:
+ return vlib_buffer_push_ip6 (vm, b, &ip_addr_v6 (src),
+ &ip_addr_v6 (dst), proto);
+ break;
+ }
+
+ return 0;
+}
+
+void *
+pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp,
+ ip_address_t * sip, ip_address_t * dip, u8 csum_offload)
+{
+ u16 udpsum;
+ udp_header_t *uh;
+ void *ih;
+
+ uh = pkt_push_udp (vm, b, sp, dp);
+
+ if (csum_offload)
+ {
+ ih = pkt_push_ip (vm, b, sip, dip, IP_PROTOCOL_UDP, 1);
+ b->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data;
+ vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data;
+ uh->checksum = 0;
+ }
+ else
+ {
+ ih = pkt_push_ip (vm, b, sip, dip, IP_PROTOCOL_UDP, 0);
+ udpsum = udp_checksum (uh, clib_net_to_host_u16 (uh->length), ih,
+ ip_addr_version (sip));
+ if (udpsum == (u16) ~ 0)
+ {
+ clib_warning ("Failed UDP checksum! Discarding");
+ return 0;
+ }
+ /* clear flags used for csum since we're not offloading */
+ b->flags &= ~(VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_IS_IP6);
+ uh->checksum = udpsum;
+ }
+ return ih;
+}
+
+void *
+pkt_push_ecm_hdr (vlib_buffer_t * b)
+{
+ ecm_hdr_t *h;
+ h = vlib_buffer_push_uninit (b, sizeof (h[0]));
+
+ memset (h, 0, sizeof (h[0]));
+ h->type = LISP_ENCAP_CONTROL_TYPE;
+ memset (h->reserved2, 0, sizeof (h->reserved2));
+
+ return h;
+}
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-cp/packets.h b/src/vnet/lisp-cp/packets.h
new file mode 100644
index 00000000..eb9871bf
--- /dev/null
+++ b/src/vnet/lisp-cp/packets.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/lisp-cp/lisp_types.h>
+
+#define IP_DF 0x4000 /* don't fragment */
+
+void *pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src,
+ ip_address_t * dst, u32 proto, u8 csum_offload);
+
+void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp,
+ u16 dp, ip_address_t * sip, ip_address_t * dip,
+ u8 cksum_offload);
+
+void *pkt_push_ecm_hdr (vlib_buffer_t * b);
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/decap.c b/src/vnet/lisp-gpe/decap.c
new file mode 100644
index 00000000..e59a353d
--- /dev/null
+++ b/src/vnet/lisp-gpe/decap.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief L2 LISP-GPE decap code.
+ *
+ */
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+ lisp_gpe_header_t h;
+} lisp_gpe_rx_trace_t;
+
+static u8 *
+format_lisp_gpe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lisp_gpe_rx_trace_t *t = va_arg (*args, lisp_gpe_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "LISP-GPE: tunnel %d next %d error %d", t->tunnel_index,
+ t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "LISP-GPE: no tunnel next %d error %d\n", t->next_index,
+ t->error);
+ }
+ s = format (s, "\n %U", format_lisp_gpe_header_with_length, &t->h,
+ (u32) sizeof (t->h) /* max size */ );
+ return s;
+}
+
+static u32 next_proto_to_next_index[LISP_GPE_NEXT_PROTOS] = {
+ LISP_GPE_INPUT_NEXT_DROP,
+ LISP_GPE_INPUT_NEXT_IP4_INPUT,
+ LISP_GPE_INPUT_NEXT_IP6_INPUT,
+ LISP_GPE_INPUT_NEXT_L2_INPUT,
+ LISP_GPE_INPUT_NEXT_DROP
+};
+
+always_inline u32
+next_protocol_to_next_index (lisp_gpe_header_t * lgh, u8 * next_header)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+
+ /* lisp-gpe router */
+ if (PREDICT_TRUE ((lgh->flags & LISP_GPE_FLAGS_P)
+ || GPE_ENCAP_VXLAN == lgm->encap_mode))
+ {
+ if (PREDICT_FALSE (lgh->next_protocol >= LISP_GPE_NEXT_PROTOS))
+ return LISP_GPE_INPUT_NEXT_DROP;
+
+ return next_proto_to_next_index[lgh->next_protocol];
+ }
+ /* legacy lisp router */
+ else if ((lgh->flags & LISP_GPE_FLAGS_P) == 0)
+ {
+ ip4_header_t *iph = (ip4_header_t *) next_header;
+ if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+ return LISP_GPE_INPUT_NEXT_IP4_INPUT;
+ else if ((iph->ip_version_and_header_length & 0xF0) == 0x60)
+ return LISP_GPE_INPUT_NEXT_IP6_INPUT;
+ else
+ return LISP_GPE_INPUT_NEXT_DROP;
+ }
+ else
+ return LISP_GPE_INPUT_NEXT_DROP;
+}
+
+always_inline tunnel_lookup_t *
+next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index)
+{
+ if (LISP_GPE_INPUT_NEXT_IP4_INPUT == next_index
+ || LISP_GPE_INPUT_NEXT_IP6_INPUT == next_index)
+ return &lgm->l3_ifaces;
+ else if (LISP_GPE_INPUT_NEXT_L2_INPUT == next_index)
+ return &lgm->l2_ifaces;
+ else if (LISP_GPE_INPUT_NEXT_NSH_INPUT == next_index)
+ return &lgm->nsh_ifaces;
+ clib_warning ("next_index not associated to an interface!");
+ return 0;
+}
+
+static_always_inline void
+incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length,
+ u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets,
+ u32 * n_bytes)
+{
+ vnet_interface_main_t *im;
+
+ if (PREDICT_TRUE (sw_if_index == *last_sw_if_index))
+ {
+ *n_packets += 1;
+ *n_bytes += length;
+ }
+ else
+ {
+ if (PREDICT_TRUE (*last_sw_if_index != ~0))
+ {
+ im = &vnm->interface_main;
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ thread_index, *last_sw_if_index,
+ *n_packets, *n_bytes);
+ }
+ *last_sw_if_index = sw_if_index;
+ *n_packets = 1;
+ *n_bytes = length;
+ }
+}
+
+/**
+ * @brief LISP-GPE decap dispatcher.
+ * @node lisp_gpe_input_inline
+ *
+ * LISP-GPE decap dispatcher.
+ *
+ * Decaps IP-UDP-LISP-GPE header and based on the next protocol and in the
+ * GPE header and the vni decides the next node to forward the packet to.
+ *
+ * @param[in] vm vlib_main_t corresponding to current thread.
+ * @param[in] node vlib_node_runtime_t data for this node.
+ * @param[in] frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @return number of vectors in frame.
+ */
+static uword
+lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_v4)
+{
+ u32 n_left_from, next_index, *from, *to_next, thread_index;
+ u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0;
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+
+ thread_index = vlib_get_thread_index ();
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip4_udp_lisp_gpe_header_t *iul4_0, *iul4_1;
+ ip6_udp_lisp_gpe_header_t *iul6_0, *iul6_1;
+ lisp_gpe_header_t *lh0, *lh1;
+ u32 next0, next1, error0, error1;
+ uword *si0, *si1;
+ tunnel_lookup_t *tl0, *tl1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* udp leaves current_data pointing at the lisp header */
+ if (is_v4)
+ {
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t)));
+ vlib_buffer_advance (b1,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t)));
+
+ iul4_0 = vlib_buffer_get_current (b0);
+ iul4_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, lisp-gpe) */
+ vlib_buffer_advance (b0, sizeof (*iul4_0));
+ vlib_buffer_advance (b1, sizeof (*iul4_1));
+
+ lh0 = &iul4_0->lisp;
+ lh1 = &iul4_1->lisp;
+ }
+ else
+ {
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip6_header_t)));
+ vlib_buffer_advance (b1,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip6_header_t)));
+
+ iul6_0 = vlib_buffer_get_current (b0);
+ iul6_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, lisp-gpe) */
+ vlib_buffer_advance (b0, sizeof (*iul6_0));
+ vlib_buffer_advance (b1, sizeof (*iul6_1));
+
+ lh0 = &iul6_0->lisp;
+ lh1 = &iul6_1->lisp;
+ }
+
+ /* determine next_index from lisp-gpe header */
+ next0 = next_protocol_to_next_index (lh0,
+ vlib_buffer_get_current (b0));
+ next1 = next_protocol_to_next_index (lh1,
+ vlib_buffer_get_current (b1));
+
+ /* determine if tunnel is l2 or l3 */
+ tl0 = next_index_to_iface (lgm, next0);
+ tl1 = next_index_to_iface (lgm, next1);
+
+ /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to
+ * decide the rx vrf and the input features to be applied */
+ si0 = hash_get (tl0->sw_if_index_by_vni,
+ clib_net_to_host_u32 (lh0->iid << 8));
+ si1 = hash_get (tl1->sw_if_index_by_vni,
+ clib_net_to_host_u32 (lh1->iid << 8));
+
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+ vnet_update_l2_len (b1);
+
+ if (si0)
+ {
+ incr_decap_stats (lgm->vnet_main, thread_index,
+ vlib_buffer_length_in_chain (vm, b0), si0[0],
+ &last_sw_if_index, &n_packets, &n_bytes);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
+ error0 = 0;
+ }
+ else
+ {
+ next0 = LISP_GPE_INPUT_NEXT_DROP;
+ error0 = LISP_GPE_ERROR_NO_TUNNEL;
+ drops++;
+ }
+
+ if (si1)
+ {
+ incr_decap_stats (lgm->vnet_main, thread_index,
+ vlib_buffer_length_in_chain (vm, b1), si1[0],
+ &last_sw_if_index, &n_packets, &n_bytes);
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = si1[0];
+ error1 = 0;
+ }
+ else
+ {
+ next1 = LISP_GPE_INPUT_NEXT_DROP;
+ error1 = LISP_GPE_ERROR_NO_TUNNEL;
+ drops++;
+ }
+
+ b0->error = error0 ? node->errors[error0] : 0;
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lisp_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->h = lh0[0];
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lisp_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b1,
+ sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->h = lh1[0];
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip4_udp_lisp_gpe_header_t *iul4_0;
+ ip6_udp_lisp_gpe_header_t *iul6_0;
+ lisp_gpe_header_t *lh0;
+ u32 error0;
+ uword *si0;
+ tunnel_lookup_t *tl0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp leaves current_data pointing at the lisp header
+ * TODO: there's no difference in processing between v4 and v6
+ * encapsulated packets so the code should be simplified if ip header
+ * info is not going to be used for dp smrs/dpsec */
+ if (is_v4)
+ {
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t)));
+
+ iul4_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, lisp-gpe) */
+ vlib_buffer_advance (b0, sizeof (*iul4_0));
+
+ lh0 = &iul4_0->lisp;
+ }
+ else
+ {
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip6_header_t)));
+
+ iul6_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, lisp-gpe) */
+ vlib_buffer_advance (b0, sizeof (*iul6_0));
+
+ lh0 = &iul6_0->lisp;
+ }
+
+ /* TODO if security is to be implemented, something similar to RPF,
+ * probably we'd like to check that the peer is allowed to send us
+ * packets. For this, we should use the tunnel table OR check that
+ * we have a mapping for the source eid and that the outer source of
+ * the packet is one of its locators */
+
+ /* determine next_index from lisp-gpe header */
+ next0 = next_protocol_to_next_index (lh0,
+ vlib_buffer_get_current (b0));
+
+ /* determine if tunnel is l2 or l3 */
+ tl0 = next_index_to_iface (lgm, next0);
+
+ /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to
+ * decide the rx vrf and the input features to be applied.
+ * NOTE: vni uses only the first 24 bits */
+ si0 = hash_get (tl0->sw_if_index_by_vni,
+ clib_net_to_host_u32 (lh0->iid << 8));
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ if (si0)
+ {
+ incr_decap_stats (lgm->vnet_main, thread_index,
+ vlib_buffer_length_in_chain (vm, b0), si0[0],
+ &last_sw_if_index, &n_packets, &n_bytes);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
+ error0 = 0;
+ }
+ else
+ {
+ next0 = LISP_GPE_INPUT_NEXT_DROP;
+ error0 = LISP_GPE_ERROR_NO_TUNNEL;
+ drops++;
+ }
+
+ /* TODO error handling if security is implemented */
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lisp_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->h = lh0[0];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* flush iface stats */
+ incr_decap_stats (lgm->vnet_main, thread_index, 0, ~0, &last_sw_if_index,
+ &n_packets, &n_bytes);
+ vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index,
+ LISP_GPE_ERROR_NO_TUNNEL, drops);
+ return from_frame->n_vectors;
+}
+
+static uword
+lisp_gpe_ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return lisp_gpe_input_inline (vm, node, from_frame, 1);
+}
+
+static uword
+lisp_gpe_ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return lisp_gpe_input_inline (vm, node, from_frame, 0);
+}
+
+static char *lisp_gpe_ip4_input_error_strings[] = {
+#define lisp_gpe_error(n,s) s,
+#include <vnet/lisp-gpe/lisp_gpe_error.def>
+#undef lisp_gpe_error
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = {
+ .function = lisp_gpe_ip4_input,
+ .name = "lisp-gpe-ip4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = LISP_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [LISP_GPE_INPUT_NEXT_##s] = n,
+ foreach_lisp_gpe_ip_input_next
+#undef _
+ },
+
+ .n_errors = ARRAY_LEN (lisp_gpe_ip4_input_error_strings),
+ .error_strings = lisp_gpe_ip4_input_error_strings,
+
+ .format_buffer = format_lisp_gpe_header_with_length,
+ .format_trace = format_lisp_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_lisp_gpe_header,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = {
+ .function = lisp_gpe_ip6_input,
+ .name = "lisp-gpe-ip6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = LISP_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [LISP_GPE_INPUT_NEXT_##s] = n,
+ foreach_lisp_gpe_ip_input_next
+#undef _
+ },
+
+ .n_errors = ARRAY_LEN (lisp_gpe_ip4_input_error_strings),
+ .error_strings = lisp_gpe_ip4_input_error_strings,
+
+ .format_buffer = format_lisp_gpe_header_with_length,
+ .format_trace = format_lisp_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_lisp_gpe_header,
+};
+/* *INDENT-ON* */
+
+/**
+ * Adds arc from lisp-gpe-input to nsh-input if nsh-input is available
+ */
+static void
+gpe_add_arc_from_input_to_nsh ()
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ vlib_main_t *vm = lgm->vlib_main;
+ vlib_node_t *nsh_input;
+
+ /* Arc already exists */
+ if (next_proto_to_next_index[LISP_GPE_NEXT_PROTO_NSH]
+ != LISP_GPE_INPUT_NEXT_DROP)
+ return;
+
+ /* Check if nsh-input is available */
+ if ((nsh_input = vlib_get_node_by_name (vm, (u8 *) "nsh-input")))
+ {
+ u32 slot4, slot6;
+ slot4 = vlib_node_add_next_with_slot (vm, lisp_gpe_ip4_input_node.index,
+ nsh_input->index,
+ LISP_GPE_NEXT_PROTO_NSH);
+ slot6 = vlib_node_add_next_with_slot (vm, lisp_gpe_ip6_input_node.index,
+ nsh_input->index,
+ LISP_GPE_NEXT_PROTO_NSH);
+ ASSERT (slot4 == slot6 && slot4 == LISP_GPE_INPUT_NEXT_NSH_INPUT);
+
+ next_proto_to_next_index[LISP_GPE_NEXT_PROTO_NSH] = slot4;
+ }
+}
+
+/** GPE decap init function. */
+clib_error_t *
+gpe_decap_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+
+ if ((error = vlib_call_init_function (vm, lisp_gpe_init)))
+ return error;
+
+ gpe_add_arc_from_input_to_nsh ();
+ return 0;
+}
+
+static uword
+lisp_gpe_nsh_dummy_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vlib_node_increment_counter (vm, node->node_index, 0, 1);
+ return from_frame->n_vectors;
+}
+
+static char *lisp_gpe_nsh_dummy_error_strings[] = {
+ "lisp gpe dummy nsh decap",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_gpe_nsh_dummy_input_node) = {
+ .function = lisp_gpe_nsh_dummy_input,
+ .name = "lisp-gpe-nsh-dummy-input",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = 1,
+
+ .n_errors = 1,
+ .error_strings = lisp_gpe_nsh_dummy_error_strings,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_add_dummy_nsh_node_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ vlib_node_add_next (lgm->vlib_main, lisp_gpe_ip4_input_node.index,
+ lisp_gpe_nsh_dummy_input_node.index);
+ next_proto_to_next_index[LISP_GPE_NEXT_PROTO_NSH] =
+ LISP_GPE_INPUT_NEXT_NSH_INPUT;
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_add_dummy_nsh_node_command, static) = {
+ .path = "test one nsh add-dummy-decap-node",
+ .function = lisp_add_dummy_nsh_node_command_fn,
+};
+/* *INDENT-ON* */
+
+VLIB_INIT_FUNCTION (gpe_decap_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/dir.dox b/src/vnet/lisp-gpe/dir.dox
new file mode 100644
index 00000000..afa6da9a
--- /dev/null
+++ b/src/vnet/lisp-gpe/dir.dox
@@ -0,0 +1,26 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief LISP-GPE code.
+
+ An implementation of LISP-GPE as per:
+ rfc-6830
+ draft-lewis-lisp-gpe-02
+
+ See file: rfc.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c
new file mode 100644
index 00000000..a0c05e85
--- /dev/null
+++ b/src/vnet/lisp-gpe/interface.c
@@ -0,0 +1,941 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Common utility functions for LISP-GPE interfaces.
+ *
+ */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/lisp-cp/lisp_cp_dpo.h>
+
+/**
+ * @brief The VLIB node arc/edge from the interface's TX node, to the L2
+ * load-balanceing node. Which is where all packets go
+ */
+static uword l2_arc_to_lb;
+
+#define foreach_lisp_gpe_tx_next \
+ _(DROP, "error-drop") \
+ _(IP4_LOOKUP, "ip4-lookup") \
+ _(IP6_LOOKUP, "ip6-lookup")
+
+typedef enum
+{
+#define _(sym,str) LISP_GPE_TX_NEXT_##sym,
+ foreach_lisp_gpe_tx_next
+#undef _
+ LISP_GPE_TX_N_NEXT,
+} lisp_gpe_tx_next_t;
+
+typedef struct
+{
+ u32 tunnel_index;
+} lisp_gpe_tx_trace_t;
+
+u8 *
+format_lisp_gpe_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lisp_gpe_tx_trace_t *t = va_arg (*args, lisp_gpe_tx_trace_t *);
+
+ s = format (s, "LISP-GPE-TX: tunnel %d", t->tunnel_index);
+ return s;
+}
+
+#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40
+
+/**
+ * @brief LISP-GPE interface TX (encap) function.
+ * @node lisp_gpe_interface_tx
+ *
+ * The LISP-GPE interface TX (encap) function.
+ *
+ * Looks up the associated tunnel based on the adjacency hit in the SD FIB
+ * and if the tunnel is multihomed it uses the flow hash to determine
+ * sub-tunnel, and rewrite string, to be used to encapsulate the packet.
+ *
+ * @param[in] vm vlib_main_t corresponding to the current thread.
+ * @param[in] node vlib_node_runtime_t data for this node.
+ * @param[in] frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @return number of vectors in frame.
+ */
+static uword
+lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t *adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t *b0;
+ u8 is_v4_0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Fixup the checksum and len fields in the LISP tunnel encap
+ * that was applied at the midchain node */
+ is_v4_0 = is_v4_packet (vlib_buffer_get_current (b0));
+ ip_udp_fixup_one (lgm->vlib_main, b0, is_v4_0);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->tunnel_index = adj_index0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lisp_gpe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "lisp_gpe%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
+ .name = "LISP_GPE",
+ .format_device_name = format_lisp_gpe_name,
+ .format_tx_trace = format_lisp_gpe_tx_trace,
+ .tx_function = lisp_gpe_interface_tx,
+};
+/* *INDENT-ON* */
+
+u8 *
+format_lisp_gpe_header_with_length (u8 * s, va_list * args)
+{
+ lisp_gpe_header_t *h = va_arg (*args, lisp_gpe_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "lisp-gpe header truncated");
+
+ s = format (s, "flags: ");
+#define _(n,v) if (h->flags & v) s = format (s, "%s ", #n);
+ foreach_lisp_gpe_flag_bit;
+#undef _
+
+ s = format (s, "\n ver_res %d res %d next_protocol %d iid %d(%x)",
+ h->ver_res, h->res, h->next_protocol,
+ clib_net_to_host_u32 (h->iid << 8),
+ clib_net_to_host_u32 (h->iid << 8));
+ return s;
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
+ .name = "LISP_GPE",
+ .format_header = format_lisp_gpe_header_with_length,
+ .build_rewrite = lisp_gpe_build_rewrite,
+ .update_adjacency = lisp_gpe_update_adjacency,
+};
+/* *INDENT-ON* */
+
+
+typedef struct
+{
+ u32 dpo_index;
+} l2_lisp_gpe_tx_trace_t;
+
+static u8 *
+format_l2_lisp_gpe_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_lisp_gpe_tx_trace_t *t = va_arg (*args, l2_lisp_gpe_tx_trace_t *);
+
+ s = format (s, "L2-LISP-GPE-TX: load-balance %d", t->dpo_index);
+ return s;
+}
+
+/**
+ * @brief LISP-GPE interface TX (encap) function for L2 overlays.
+ * @node l2_lisp_gpe_interface_tx
+ *
+ * The L2 LISP-GPE interface TX (encap) function.
+ *
+ * Uses bridge domain index, source and destination ethernet addresses to
+ * lookup tunnel. If the tunnel is multihomed a flow has is used to determine
+ * the sub-tunnel and therefore the rewrite string to be used to encapsulate
+ * the packets.
+ *
+ * @param[in] vm vlib_main_t corresponding to the current thread.
+ * @param[in] node vlib_node_runtime_t data for this node.
+ * @param[in] frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @return number of vectors in frame.
+ */
+static uword
+l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ u32 thread_index = vlib_get_thread_index ();
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0, lbi0;
+ ethernet_header_t *e0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ e0 = vlib_buffer_get_current (b0);
+
+ vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC;
+
+ /* lookup dst + src mac */
+ lbi0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index,
+ e0->src_address, e0->dst_address);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = lbi0;
+
+ vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm,
+ b0));
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->dpo_index = lbi0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, l2_arc_to_lb);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_l2_lisp_gpe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "l2_lisp_gpe%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = {
+ .name = "L2_LISP_GPE",
+ .format_device_name = format_l2_lisp_gpe_name,
+ .format_tx_trace = format_l2_lisp_gpe_tx_trace,
+ .tx_function = l2_lisp_gpe_interface_tx,
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 dpo_index;
+} nsh_lisp_gpe_tx_trace_t;
+
+u8 *
+format_nsh_lisp_gpe_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nsh_lisp_gpe_tx_trace_t *t = va_arg (*args, nsh_lisp_gpe_tx_trace_t *);
+
+ s = format (s, "NSH-GPE-TX: tunnel %d", t->dpo_index);
+ return s;
+}
+
+/**
+ * @brief LISP-GPE interface TX for NSH overlays.
+ * @node nsh_lisp_gpe_interface_tx
+ *
+ * The NSH LISP-GPE interface TX function.
+ *
+ * @param[in] vm vlib_main_t corresponding to the current thread.
+ * @param[in] node vlib_node_runtime_t data for this node.
+ * @param[in] frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @return number of vectors in frame.
+ */
+static uword
+nsh_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u32 *nsh0, next0;
+ const dpo_id_t *dpo0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ nsh0 = vlib_buffer_get_current (b0);
+
+ vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_LCAF;
+
+ /* lookup SPI + SI (second word of the NSH header).
+ * NB: Load balancing was done by the control plane */
+ dpo0 = lisp_nsh_fib_lookup (lgm, nsh0[1]);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ tr->dpo_index = dpo0->dpoi_index;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_nsh_lisp_gpe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "nsh_lisp_gpe%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (nsh_lisp_gpe_device_class,static) = {
+ .name = "NSH_LISP_GPE",
+ .format_device_name = format_nsh_lisp_gpe_name,
+ .format_tx_trace = format_nsh_lisp_gpe_tx_trace,
+ .tx_function = nsh_lisp_gpe_interface_tx,
+};
+/* *INDENT-ON* */
+
+static vnet_hw_interface_t *
+lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
+ vnet_device_class_t * dev_class,
+ tunnel_lookup_t * tuns)
+{
+ u32 flen;
+ u32 hw_if_index = ~0;
+ u8 *new_name;
+ vnet_hw_interface_t *hi;
+ vnet_main_t *vnm = lgm->vnet_main;
+
+ /* create hw lisp_gpeX iface if needed, otherwise reuse existing */
+ flen = vec_len (lgm->free_tunnel_hw_if_indices);
+ if (flen > 0)
+ {
+ hw_if_index = lgm->free_tunnel_hw_if_indices[flen - 1];
+ _vec_len (lgm->free_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ /* rename interface */
+ new_name = format (0, "%U", dev_class->format_device_name, vni);
+
+ vec_add1 (new_name, 0);
+ vnet_rename_interface (vnm, hw_if_index, (char *) new_name);
+ vec_free (new_name);
+
+ /* clear old stats of freed interface before reuse */
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_interface_counter_lock (im);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters
+ [VNET_INTERFACE_COUNTER_TX],
+ hi->sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters
+ [VNET_INTERFACE_COUNTER_RX],
+ hi->sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters
+ [VNET_INTERFACE_COUNTER_DROP],
+ hi->sw_if_index);
+ vnet_interface_counter_unlock (im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface (vnm, dev_class->index, vni,
+ lisp_gpe_hw_class.index, 0);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ }
+
+ hash_set (tuns->hw_if_index_by_dp_table, dp_table, hw_if_index);
+
+ /* set tunnel termination: post decap, packets are tagged as having been
+ * originated by lisp-gpe interface */
+ hash_set (tuns->sw_if_index_by_vni, vni, hi->sw_if_index);
+ hash_set (tuns->vni_by_sw_if_index, hi->sw_if_index, vni);
+
+ return hi;
+}
+
+static void
+lisp_gpe_remove_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table,
+ tunnel_lookup_t * tuns)
+{
+ vnet_main_t *vnm = lgm->vnet_main;
+ vnet_hw_interface_t *hi;
+ uword *vnip;
+
+ hi = vnet_get_hw_interface (vnm, hi_index);
+
+ /* disable interface */
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0 /* down */ );
+ vnet_hw_interface_set_flags (vnm, hi->hw_if_index, 0 /* down */ );
+ hash_unset (tuns->hw_if_index_by_dp_table, dp_table);
+ vec_add1 (lgm->free_tunnel_hw_if_indices, hi->hw_if_index);
+
+ /* clean tunnel termination and vni to sw_if_index binding */
+ vnip = hash_get (tuns->vni_by_sw_if_index, hi->sw_if_index);
+ if (0 == vnip)
+ {
+ clib_warning ("No vni associated to interface %d", hi->sw_if_index);
+ return;
+ }
+ hash_unset (tuns->sw_if_index_by_vni, vnip[0]);
+ hash_unset (tuns->vni_by_sw_if_index, hi->sw_if_index);
+}
+
+static void
+lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id)
+{
+ fib_node_index_t fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
+ FIB_SOURCE_LISP);
+ vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
+ FIB_SOURCE_LISP);
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip6_sw_interface_enable_disable (sw_if_index, 1);
+}
+
+static void
+lisp_gpe_tenant_del_default_routes (u32 table_id)
+{
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_IP_PROTOCOL (proto)
+ {
+ fib_prefix_t prefix = {
+ .fp_proto = proto,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_find (prefix.fp_proto, table_id);
+ fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP);
+ fib_table_unlock (fib_index, prefix.fp_proto, FIB_SOURCE_LISP);
+ }
+}
+
+static void
+lisp_gpe_tenant_add_default_routes (u32 table_id)
+{
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_IP_PROTOCOL (proto)
+ {
+ fib_prefix_t prefix = {
+ .fp_proto = proto,
+ };
+ u32 fib_index;
+
+ /*
+ * Add a deafult route that results in a control plane punt DPO
+ */
+ fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id,
+ FIB_SOURCE_LISP);
+ fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ lisp_cp_dpo_get (fib_proto_to_dpo
+ (proto)));
+ }
+}
+
+
+/**
+ * @brief Add/del LISP-GPE L3 interface.
+ *
+ * Creates LISP-GPE interface, sets ingress arcs from lisp_gpeX_lookup,
+ * installs default routes that attract all traffic with no more specific
+ * routes to lgpe-ipx-lookup, set egress arcs to ipx-lookup, sets
+ * the interface in the right vrf and enables it.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters to create interface.
+ *
+ * @return number of vectors in frame.
+ */
+u32
+lisp_gpe_add_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 table_id)
+{
+ vnet_main_t *vnm = lgm->vnet_main;
+ tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces;
+ vnet_hw_interface_t *hi;
+ uword *hip, *si;
+
+ hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, table_id);
+
+ if (hip)
+ {
+ clib_warning ("vrf %d already mapped to a vni", table_id);
+ return ~0;
+ }
+
+ si = hash_get (l3_ifaces->sw_if_index_by_vni, vni);
+
+ if (si)
+ {
+ clib_warning ("Interface for vni %d already exists", vni);
+ }
+
+ /* create lisp iface and populate tunnel tables */
+ hi = lisp_gpe_create_iface (lgm, vni, table_id,
+ &lisp_gpe_device_class, l3_ifaces);
+
+ /* insert default routes that point to lisp-cp lookup */
+ lisp_gpe_iface_set_table (hi->sw_if_index, table_id);
+ lisp_gpe_tenant_add_default_routes (table_id);
+
+ /* enable interface */
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_hw_interface_set_flags (vnm, hi->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return (hi->sw_if_index);
+}
+
+void
+lisp_gpe_del_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 table_id)
+{
+ vnet_main_t *vnm = lgm->vnet_main;
+ tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces;
+ vnet_hw_interface_t *hi;
+ uword *hip;
+
+ hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, table_id);
+
+ if (hip == 0)
+ {
+ clib_warning ("The interface for vrf %d doesn't exist", table_id);
+ return;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hip[0]);
+
+ lisp_gpe_remove_iface (lgm, hip[0], table_id, &lgm->l3_ifaces);
+
+ /* unset default routes */
+ ip4_sw_interface_enable_disable (hi->sw_if_index, 0);
+ ip6_sw_interface_enable_disable (hi->sw_if_index, 0);
+ lisp_gpe_tenant_del_default_routes (table_id);
+}
+
+/**
+ * @brief Add/del LISP-GPE L2 interface.
+ *
+ * Creates LISP-GPE interface, sets it in L2 mode in the appropriate
+ * bridge domain, sets egress arcs and enables it.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters to create interface.
+ *
+ * @return number of vectors in frame.
+ */
+u32
+lisp_gpe_add_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id)
+{
+ vnet_main_t *vnm = lgm->vnet_main;
+ tunnel_lookup_t *l2_ifaces = &lgm->l2_ifaces;
+ vnet_hw_interface_t *hi;
+ uword *hip, *si;
+ u16 bd_index;
+
+ if (bd_id > L2_BD_ID_MAX)
+ {
+ clib_warning ("bridge domain ID %d exceed 16M limit", bd_id);
+ return ~0;
+ }
+
+ bd_index = bd_find_or_add_bd_index (&bd_main, bd_id);
+ hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index);
+
+ if (hip)
+ {
+ clib_warning ("bridge domain %d already mapped to a vni", bd_id);
+ return ~0;
+ }
+
+ si = hash_get (l2_ifaces->sw_if_index_by_vni, vni);
+ if (si)
+ {
+ clib_warning ("Interface for vni %d already exists", vni);
+ return ~0;
+ }
+
+ /* create lisp iface and populate tunnel tables */
+ hi = lisp_gpe_create_iface (lgm, vni, bd_index,
+ &l2_lisp_gpe_device_class, &lgm->l2_ifaces);
+
+ /* enable interface */
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_hw_interface_set_flags (vnm, hi->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ l2_arc_to_lb = vlib_node_add_named_next (vlib_get_main (),
+ hi->tx_node_index,
+ "l2-load-balance");
+
+ /* we're ready. add iface to l2 bridge domain */
+ set_int_l2_mode (lgm->vlib_main, vnm, MODE_L2_BRIDGE, hi->sw_if_index,
+ bd_index, 0, 0, 0);
+
+ return (hi->sw_if_index);
+}
+
+/**
+ * @brief Add/del LISP-GPE L2 interface.
+ *
+ * Creates LISP-GPE interface, sets it in L2 mode in the appropriate
+ * bridge domain, sets egress arcs and enables it.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters to create interface.
+ *
+ * @return number of vectors in frame.
+ */
+void
+lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id)
+{
+ tunnel_lookup_t *l2_ifaces = &lgm->l2_ifaces;
+ vnet_hw_interface_t *hi;
+
+ u32 bd_index = bd_find_index (&bd_main, bd_id);
+ ASSERT (bd_index != ~0);
+ uword *hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index);
+
+ if (hip == 0)
+ {
+ clib_warning ("The interface for bridge domain %d doesn't exist",
+ bd_id);
+ return;
+ }
+
+ /* Remove interface from bridge .. by enabling L3 mode */
+ hi = vnet_get_hw_interface (lgm->vnet_main, hip[0]);
+ set_int_l2_mode (lgm->vlib_main, lgm->vnet_main, MODE_L3, hi->sw_if_index,
+ 0, 0, 0, 0);
+ lisp_gpe_remove_iface (lgm, hip[0], bd_index, &lgm->l2_ifaces);
+}
+
+/**
+ * @brief Add LISP-GPE NSH interface.
+ *
+ * Creates LISP-GPE interface, sets it in L3 mode.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters to create interface.
+ *
+ * @return sw_if_index.
+ */
+u32
+vnet_lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm)
+{
+ vnet_main_t *vnm = lgm->vnet_main;
+ tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces;
+ vnet_hw_interface_t *hi;
+ uword *hip, *si;
+
+ hip = hash_get (nsh_ifaces->hw_if_index_by_dp_table, 0);
+
+ if (hip)
+ {
+ clib_warning ("NSH interface 0 already exists");
+ return ~0;
+ }
+
+ si = hash_get (nsh_ifaces->sw_if_index_by_vni, 0);
+ if (si)
+ {
+ clib_warning ("NSH interface already exists");
+ return ~0;
+ }
+
+ /* create lisp iface and populate tunnel tables */
+ hi = lisp_gpe_create_iface (lgm, 0, 0,
+ &nsh_lisp_gpe_device_class, &lgm->nsh_ifaces);
+
+ /* enable interface */
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_hw_interface_set_flags (vnm, hi->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return (hi->sw_if_index);
+}
+
+/**
+ * @brief Del LISP-GPE NSH interface.
+ *
+ */
+void
+vnet_lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm)
+{
+ tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces;
+ uword *hip;
+
+ hip = hash_get (nsh_ifaces->hw_if_index_by_dp_table, 0);
+
+ if (hip == 0)
+ {
+ clib_warning ("The NSH 0 interface doesn't exist");
+ return;
+ }
+ lisp_gpe_remove_iface (lgm, hip[0], 0, &lgm->nsh_ifaces);
+}
+
+static clib_error_t *
+lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u32 table_id, vni, bd_id;
+ u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0;
+ u8 nsh_iface = 0;
+ clib_error_t *error = NULL;
+
+ if (vnet_lisp_gpe_enable_disable_status () == 0)
+ {
+ return clib_error_return (0, "LISP is disabled");
+ }
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "vrf %d", &table_id))
+ {
+ vrf_is_set = 1;
+ }
+ else if (unformat (line_input, "vni %d", &vni))
+ {
+ vni_is_set = 1;
+ }
+ else if (unformat (line_input, "bd %d", &bd_id))
+ {
+ bd_index_is_set = 1;
+ }
+ else if (unformat (line_input, "nsh"))
+ {
+ nsh_iface = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (nsh_iface)
+ {
+ if (is_add)
+ {
+ if (~0 == vnet_lisp_gpe_add_nsh_iface (&lisp_gpe_main))
+ {
+ error = clib_error_return (0, "NSH interface not created");
+ goto done;
+ }
+ }
+ else
+ {
+ vnet_lisp_gpe_del_nsh_iface (&lisp_gpe_main);
+ }
+ goto done;
+ }
+
+ if (vrf_is_set && bd_index_is_set)
+ {
+ error = clib_error_return
+ (0, "Cannot set both vrf and brdige domain index!");
+ goto done;
+ }
+
+ if (!vni_is_set)
+ {
+ error = clib_error_return (0, "vni must be set!");
+ goto done;
+ }
+
+ if (!vrf_is_set && !bd_index_is_set)
+ {
+ error =
+ clib_error_return (0, "vrf or bridge domain index must be set!");
+ goto done;
+ }
+
+ if (bd_index_is_set)
+ {
+ if (is_add)
+ {
+ if (~0 == lisp_gpe_tenant_l2_iface_add_or_lock (vni, bd_id))
+ {
+ error = clib_error_return (0, "L2 interface not created");
+ goto done;
+ }
+ }
+ else
+ lisp_gpe_tenant_l2_iface_unlock (vni);
+ }
+ else
+ {
+ if (is_add)
+ {
+ if (~0 == lisp_gpe_tenant_l3_iface_add_or_lock (vni, table_id))
+ {
+ error = clib_error_return (0, "L3 interface not created");
+ goto done;
+ }
+ }
+ else
+ lisp_gpe_tenant_l3_iface_unlock (vni);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (add_del_lisp_gpe_iface_command, static) = {
+ .path = "gpe iface",
+ .short_help = "gpe iface add/del vni <vni> vrf <vrf>",
+ .function = lisp_gpe_add_del_iface_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api
new file mode 100644
index 00000000..07ee3d7b
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe.api
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief GPE locator structure
+ @param is_ip4 - whether addr is IPv4 or v6
+ @param weight - locator weight
+ @param addr - IPv4/6 address
+*/
+typeonly manual_print manual_endian define gpe_locator
+{
+ u8 is_ip4;
+ u8 weight;
+ u8 addr[16];
+};
+
+/** \brief add or delete GPE tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param eid_type -
+ 0 : ipv4
+ 1 : ipv6
+ 2 : mac
+ @param rmt_eid - remote eid
+ @param lcl_eid - local eid
+ @param rmt_len - remote prefix len
+ @param lcl_len - local prefix len
+ @param vni - virtual network identifier
+ @param dp_table - vrf/bridge domain id
+ @param action - negative action when 0 locators configured
+ @param loc_num - number of locators
+ @param locs - array of remote locators
+*/
+manual_print manual_endian define gpe_add_del_fwd_entry
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 eid_type;
+ u8 rmt_eid[16];
+ u8 lcl_eid[16];
+ u8 rmt_len;
+ u8 lcl_len;
+ u32 vni;
+ u32 dp_table;
+ u8 action;
+ u32 loc_num;
+ vl_api_gpe_locator_t locs[loc_num];
+};
+
+define gpe_add_del_fwd_entry_reply
+{
+ i32 retval;
+ u32 context;
+ u32 fwd_entry_index;
+};
+
+/** \brief enable or disable gpe protocol
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_en - enable protocol if non-zero, else disable
+*/
+autoreply define gpe_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_en;
+};
+
+/** \brief add or delete gpe_iface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+*/
+autoreply define gpe_add_del_iface
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_l2;
+ u32 dp_table;
+ u32 vni;
+};
+
+define gpe_fwd_entry_vnis_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+manual_print manual_endian define gpe_fwd_entry_vnis_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ u32 vnis[count];
+};
+
+define gpe_fwd_entries_get
+{
+ u32 client_index;
+ u32 context;
+ u32 vni;
+};
+
+typeonly manual_print manual_endian define gpe_fwd_entry
+{
+ u32 fwd_entry_index;
+ u32 dp_table;
+ u8 eid_type;
+ u8 leid_prefix_len;
+ u8 reid_prefix_len;
+ u8 leid[16];
+ u8 reid[16];
+ u32 vni;
+ u8 action;
+};
+
+manual_print manual_endian define gpe_fwd_entries_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_gpe_fwd_entry_t entries[count];
+};
+
+define gpe_fwd_entry_path_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 fwd_entry_index;
+};
+
+manual_endian manual_print define gpe_fwd_entry_path_details
+{
+ u32 client_index;
+ u32 context;
+ vl_api_gpe_locator_t lcl_loc;
+ vl_api_gpe_locator_t rmt_loc;
+};
+
+/** \brief Set GPE encapsulation mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - LISP (value 0) or VXLAN (value 1)
+*/
+autoreply define gpe_set_encap_mode
+{
+ u32 client_index;
+ u32 context;
+ u8 mode;
+};
+
+/** \brief get GPE encapsulation mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - LISP (value 0) or VXLAN (value 1)
+*/
+define gpe_get_encap_mode
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for set_encap_mode
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param encap_mode - GPE encapsulation mode
+*/
+define gpe_get_encap_mode_reply
+{
+ u32 context;
+ i32 retval;
+ u8 encap_mode;
+};
+
+/** \brief Add native fwd rpath
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param is_add - flag to indicate add or del
+ @param table_id - table id for route path
+ @param nh_sw_if_index - next-hop sw_if_index (~0 if not set)
+ @param is_ip4 - flag to indicate if nh is ip4
+ @param nh_addr - next hop ip address
+*/
+autoreply define gpe_add_del_native_fwd_rpath
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 table_id;
+ u32 nh_sw_if_index;
+ u8 is_ip4;
+ u8 nh_addr[16];
+};
+
+/** \brief get GPE native fwd rpath
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define gpe_native_fwd_rpaths_get
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+};
+
+/** \brief Reply for get native fwd rpath
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param table_id - table id for route path
+ @param nh_sw_if_index - next-hop sw_if_index (~0 if not set)
+ @param nh_addr - next hop address
+*/
+typeonly manual_print manual_endian define gpe_native_fwd_rpath
+{
+ u32 fib_index;
+ u32 nh_sw_if_index;
+ u8 is_ip4;
+ u8 nh_addr[16];
+};
+
+manual_print manual_endian define gpe_native_fwd_rpaths_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_gpe_native_fwd_rpath_t entries[count];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c
new file mode 100644
index 00000000..8ee2a72d
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe.c
@@ -0,0 +1,726 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels.
+ *
+ */
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_internal.h>
+
+/** LISP-GPE global state */
+lisp_gpe_main_t lisp_gpe_main;
+
+
+/** CLI command to add/del forwarding entry. */
+static clib_error_t *
+lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ ip_address_t lloc, rloc;
+ clib_error_t *error = 0;
+ gid_address_t _reid, *reid = &_reid, _leid, *leid = &_leid;
+ u8 reid_set = 0, leid_set = 0, is_negative = 0, dp_table_set = 0,
+ vni_set = 0;
+ u32 vni = 0, dp_table = 0, action = ~0, w;
+ locator_pair_t pair, *pairs = 0;
+ int rv;
+
+ memset (leid, 0, sizeof (*leid));
+ memset (reid, 0, sizeof (*reid));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "leid %U", unformat_gid_address, leid))
+ {
+ leid_set = 1;
+ }
+ else if (unformat (line_input, "reid %U", unformat_gid_address, reid))
+ {
+ reid_set = 1;
+ }
+ else if (unformat (line_input, "vni %u", &vni))
+ {
+ gid_address_vni (leid) = vni;
+ gid_address_vni (reid) = vni;
+ vni_set = 1;
+ }
+ else if (unformat (line_input, "vrf %u", &dp_table))
+ {
+ dp_table_set = 1;
+ }
+ else if (unformat (line_input, "bd %u", &dp_table))
+ {
+ dp_table_set = 1;
+ }
+ else if (unformat (line_input, "negative action %U",
+ unformat_negative_mapping_action, &action))
+ {
+ is_negative = 1;
+ }
+ else if (unformat (line_input, "loc-pair %U %U w %d",
+ unformat_ip_address, &lloc,
+ unformat_ip_address, &rloc, &w))
+ {
+ ip_address_copy (&pair.lcl_loc, &lloc);
+ ip_address_copy (&pair.rmt_loc, &rloc);
+ pair.weight = w;
+ pair.priority = 0;
+ vec_add1 (pairs, pair);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ vlib_cli_output (vm, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!reid_set)
+ {
+ vlib_cli_output (vm, "remote eid must be set!");
+ goto done;
+ }
+
+ if (gid_address_type (reid) != GID_ADDR_NSH && (!vni_set || !dp_table_set))
+ {
+ vlib_cli_output (vm, "vni and vrf/bd must be set!");
+ goto done;
+ }
+
+ if (is_negative)
+ {
+ if (~0 == action)
+ {
+ vlib_cli_output (vm, "no action set for negative tunnel!");
+ goto done;
+ }
+ }
+ else
+ {
+ if (vec_len (pairs) == 0)
+ {
+ vlib_cli_output (vm, "expected ip4/ip6 locators");
+ goto done;
+ }
+ }
+
+ if (!leid_set)
+ {
+ /* if leid not set, make sure it's the same AFI like reid */
+ gid_address_type (leid) = gid_address_type (reid);
+ if (GID_ADDR_IP_PREFIX == gid_address_type (reid))
+ gid_address_ip_version (leid) = gid_address_ip_version (reid);
+ }
+
+ /* add fwd entry */
+ vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (a[0]));
+
+ a->is_add = is_add;
+ a->is_negative = is_negative;
+ a->vni = vni;
+ a->table_id = dp_table;
+ gid_address_copy (&a->lcl_eid, leid);
+ gid_address_copy (&a->rmt_eid, reid);
+ a->locator_pairs = pairs;
+ a->action = action;
+
+ rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0);
+ if (0 != rv)
+ {
+ vlib_cli_output (vm, "failed to %s gpe tunnel!",
+ is_add ? "add" : "delete");
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (pairs);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
+ .path = "gpe entry",
+ .short_help = "gpe entry add/del vni <vni> vrf/bd <id> [leid <leid>]"
+ "reid <reid> [loc-pair <lloc> <rloc> w <weight>] "
+ "[negative action <action>]",
+ .function = lisp_gpe_add_del_fwd_entry_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Check if LISP-GPE is enabled. */
+u8
+vnet_lisp_gpe_enable_disable_status (void)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ return lgm->is_en;
+}
+
+/** Enable/disable LISP-GPE. */
+clib_error_t *
+vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ if (a->is_en)
+ {
+ lgm->is_en = 1;
+ }
+ else
+ {
+ /* remove all entries */
+ vnet_lisp_gpe_fwd_entry_flush ();
+
+ /* disable all l3 ifaces */
+ lisp_gpe_tenant_flush ();
+
+ lgm->is_en = 0;
+ }
+
+ return 0;
+}
+
+/** Set GPE encapsulation mode. */
+int
+vnet_gpe_set_encap_mode (gpe_encap_mode_t mode)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ if (mode >= GPE_ENCAP_COUNT)
+ return VNET_API_ERROR_INVALID_GPE_MODE;
+
+ if (pool_elts (lgm->lisp_fwd_entry_pool) != 0)
+ return VNET_API_ERROR_LISP_GPE_ENTRIES_PRESENT;
+
+ lgm->encap_mode = mode;
+ return 0;
+}
+
+/** CLI command to set GPE encap */
+static clib_error_t *
+gpe_set_encap_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ gpe_encap_mode_t mode = GPE_ENCAP_COUNT;
+ vnet_api_error_t rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "lisp"))
+ mode = GPE_ENCAP_LISP;
+ else if (unformat (line_input, "vxlan"))
+ mode = GPE_ENCAP_VXLAN;
+ else
+ {
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+ }
+ rv = vnet_gpe_set_encap_mode (mode);
+ if (rv)
+ {
+ return clib_error_return (0,
+ "Error: invalid mode or GPE entries are present!");
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (gpe_set_encap_mode_command, static) = {
+ .path = "gpe encap",
+ .short_help = "gpe encap [lisp|vxlan]",
+ .function = gpe_set_encap_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Format GPE encap mode. */
+u8 *
+format_vnet_gpe_encap_mode (u8 * s, va_list * args)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+ switch (lgm->encap_mode)
+ {
+ case GPE_ENCAP_LISP:
+ return format (s, "lisp");
+ case GPE_ENCAP_VXLAN:
+ return format (s, "vxlan");
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+/** CLI command to show GPE encap */
+static clib_error_t *
+gpe_show_encap_mode_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "encap mode: %U", format_vnet_gpe_encap_mode);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (gpe_show_encap_mode_command, static) = {
+ .path = "show gpe encap",
+ .short_help = "show GPE encapulation mode",
+ .function = gpe_show_encap_mode_command_fn,
+};
+/* *INDENT-ON* */
+
+/** CLI command to enable/disable LISP-GPE. */
+static clib_error_t *
+lisp_gpe_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_en = 1;
+ vnet_lisp_gpe_enable_disable_args_t _a, *a = &_a;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ is_en = 1;
+ else if (unformat (line_input, "disable"))
+ is_en = 0;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ a->is_en = is_en;
+ error = vnet_lisp_gpe_enable_disable (a);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_disable_lisp_gpe_command, static) = {
+ .path = "gpe",
+ .short_help = "gpe [enable|disable]",
+ .function = lisp_gpe_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show LISP-GPE interfaces. */
+static clib_error_t *
+lisp_show_iface_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ hash_pair_t *p;
+
+ vlib_cli_output (vm, "%=10s%=12s", "vrf", "hw_if_index");
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({
+ vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (0 != lgm->l2_ifaces.hw_if_index_by_dp_table)
+ {
+ vlib_cli_output (vm, "%=10s%=12s", "bd_id", "hw_if_index");
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({
+ vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
+ }));
+ /* *INDENT-ON* */
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_show_iface_command) = {
+ .path = "show gpe interface",
+ .short_help = "show gpe interface",
+ .function = lisp_show_iface_command_fn,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show GPE fwd native route path. */
+static clib_error_t *
+gpe_show_native_fwd_rpath_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ fib_route_path_t *rpath;
+
+ if (vec_len (lgm->native_fwd_rpath[IP4]))
+ {
+ vec_foreach (rpath, lgm->native_fwd_rpath[IP4])
+ {
+ vlib_cli_output (vm, "nh: %U fib_index %u sw_if_index %u",
+ format_ip46_address, &rpath->frp_addr,
+ IP46_TYPE_IP4, rpath->frp_fib_index,
+ rpath->frp_sw_if_index);
+ }
+ }
+ if (vec_len (lgm->native_fwd_rpath[IP6]))
+ {
+ vec_foreach (rpath, lgm->native_fwd_rpath[IP6])
+ {
+ vlib_cli_output (vm, "nh: %U fib_index %u sw_if_index %u",
+ format_ip46_address, &rpath->frp_addr, IP46_TYPE_IP6,
+ rpath->frp_fib_index, rpath->frp_sw_if_index);
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (gpe_show_native_fwd_rpath_command) = {
+ .path = "show gpe native-forward",
+ .short_help = "show gpe native-forward",
+ .function = gpe_show_native_fwd_rpath_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+gpe_update_native_fwd_path (u8 ip_version)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ lisp_gpe_fwd_entry_t *lfe;
+ fib_prefix_t fib_prefix;
+ u32 *lfei;
+
+ vec_foreach (lfei, lgm->native_fwd_lfes[ip_version])
+ {
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, lfei[0]);
+ ip_prefix_to_fib_prefix (&lfe->key->rmt.ippref, &fib_prefix);
+ fib_table_entry_update (lfe->eid_fib_index, &fib_prefix, FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_NONE,
+ lgm->native_fwd_rpath[ip_version]);
+ }
+}
+
+int
+vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ fib_route_path_t *rpath;
+ u8 ip_version;
+
+ ip_version = a->rpath.frp_proto == DPO_PROTO_IP4 ? IP4 : IP6;
+
+ if (a->is_add)
+ {
+ vec_add1 (lgm->native_fwd_rpath[ip_version], a->rpath);
+ }
+ else
+ {
+ vec_foreach (rpath, lgm->native_fwd_rpath[ip_version])
+ {
+ if (!fib_route_path_cmp (rpath, &a->rpath))
+ {
+ vec_del1 (lgm->native_fwd_rpath[ip_version],
+ rpath - lgm->native_fwd_rpath[ip_version]);
+ break;
+ }
+ }
+ }
+ gpe_update_native_fwd_path (ip_version);
+ return 0;
+}
+
+/**
+ * CLI command to add action for native forward.
+ */
+static clib_error_t *
+gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_api_error_t rv;
+ fib_route_path_t rpath;
+ u32 table_id = ~0;
+ vnet_gpe_native_fwd_rpath_args_t _a, *a = &_a;
+ u8 is_add = 1;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&rpath, 0, sizeof (rpath));
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address, &rpath.frp_addr.ip4))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address, &rpath.frp_addr.ip6))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else
+ {
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ if ((u32) ~ 0 == table_id)
+ {
+ rpath.frp_fib_index = 0;
+ }
+ else
+ {
+ rpath.frp_fib_index =
+ fib_table_find (dpo_proto_to_fib (rpath.frp_proto), table_id);
+ if ((u32) ~ 0 == rpath.frp_fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ a->rpath = rpath;
+ a->is_add = is_add;
+
+ rv = vnet_gpe_add_del_native_fwd_rpath (a);
+ if (rv)
+ {
+ return clib_error_return (0, "Error: couldn't add path!");
+ }
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (gpe_native_forward_command) = {
+ .path = "gpe native-forward",
+ .short_help = "gpe native-forward [del] via <nh-ip-addr> [iface] "
+ "[table <table>]",
+ .function = gpe_native_forward_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Format LISP-GPE status. */
+u8 *
+format_vnet_lisp_gpe_status (u8 * s, va_list * args)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ return format (s, "%s", lgm->is_en ? "enabled" : "disabled");
+}
+
+/** LISP-GPE init function. */
+clib_error_t *
+lisp_gpe_init (vlib_main_t * vm)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ clib_error_t *error = 0;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+
+ lgm->vnet_main = vnet_get_main ();
+ lgm->vlib_main = vm;
+ lgm->im4 = &ip4_main;
+ lgm->im6 = &ip6_main;
+ lgm->lm4 = &ip4_main.lookup_main;
+ lgm->lm6 = &ip6_main.lookup_main;
+ lgm->encap_mode = GPE_ENCAP_LISP;
+
+ lgm->lisp_gpe_fwd_entries =
+ hash_create_mem (0, sizeof (lisp_gpe_fwd_entry_key_t), sizeof (uword));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe,
+ lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */ );
+ udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe6,
+ lisp_gpe_ip6_input_node.index, 0 /* is_ip4 */ );
+
+ lgm->lisp_stats_index_by_key =
+ hash_create_mem (0, sizeof (lisp_stats_key_t), sizeof (uword));
+ memset (&lgm->counters, 0, sizeof (lgm->counters));
+ lgm->counters.name = "LISP counters";
+
+ return 0;
+}
+
+gpe_encap_mode_t
+vnet_gpe_get_encap_mode (void)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ return lgm->encap_mode;
+}
+
+static clib_error_t *
+lisp_gpe_test_send_nsh_packet (u8 * file_name)
+{
+ vlib_frame_t *f;
+ vlib_buffer_t *b;
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ pcap_main_t pm;
+ clib_error_t *error = 0;
+
+ if (!file_name)
+ return clib_error_create ("no pcap file specified!");
+
+ memset (&pm, 0, sizeof (pm));
+ pm.file_name = (char *) file_name;
+ error = pcap_read (&pm);
+ if (error)
+ return error;
+
+ u32 bi;
+ if (vlib_buffer_alloc (lgm->vlib_main, &bi, 1) != 1)
+ return clib_error_create ("cannot allocate memory!");
+
+ b = vlib_get_buffer (lgm->vlib_main, bi);
+ tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces;
+ uword *hip;
+ vnet_hw_interface_t *hi;
+
+ hip = hash_get (nsh_ifaces->hw_if_index_by_dp_table, 0);
+ if (hip == 0)
+ return clib_error_create ("The NSH 0 interface doesn't exist");
+
+ hi = vnet_get_hw_interface (lgm->vnet_main, hip[0]);
+
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = hi->sw_if_index;
+ u8 *p = vlib_buffer_put_uninit (b, vec_len (pm.packets_read[0]));
+ clib_memcpy (p, pm.packets_read[0], vec_len (pm.packets_read[0]));
+ vlib_buffer_pull (b, sizeof (ethernet_header_t));
+
+ vlib_node_t *n = vlib_get_node_by_name (lgm->vlib_main,
+ (u8 *) "interface-tx");
+ f = vlib_get_frame_to_node (lgm->vlib_main, n->index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (lgm->vlib_main, n->index, f);
+
+ return error;
+}
+
+static clib_error_t *
+lisp_test_nsh_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ u8 *file_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "pcap %v", &file_name))
+ {
+ error = lisp_gpe_test_send_nsh_packet (file_name);
+ goto done;
+ }
+ else
+ {
+ error = clib_error_create ("unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_test_nsh_command, static) = {
+ .path = "test one nsh",
+ .short_help = "test gpe nsh pcap <path-to-pcap-file>",
+ .function = lisp_test_nsh_command_fn,
+};
+/* *INDENT-ON* */
+
+VLIB_INIT_FUNCTION (lisp_gpe_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h
new file mode 100644
index 00000000..5eafdd55
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP-GPE definitions.
+ */
+
+#ifndef included_vnet_lisp_gpe_h
+#define included_vnet_lisp_gpe_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/mhash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vnet/lisp-gpe/lisp_gpe_packet.h>
+#include <vnet/adj/adj_types.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+/** IP4-UDP-LISP encap header */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ lisp_gpe_header_t lisp; /* 8 bytes */
+}) ip4_udp_lisp_gpe_header_t;
+/* *INDENT-ON* */
+
+/** IP6-UDP-LISP encap header */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6; /* 40 bytes */
+ udp_header_t udp; /* 8 bytes */
+ lisp_gpe_header_t lisp; /* 8 bytes */
+}) ip6_udp_lisp_gpe_header_t;
+/* *INDENT-ON* */
+
+#define foreach_lisp_gpe_ip_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(L2_INPUT, "l2-input")
+
+/** Enum of possible next nodes post LISP-GPE decap */
+typedef enum
+{
+#define _(s,n) LISP_GPE_INPUT_NEXT_##s,
+ foreach_lisp_gpe_ip_input_next
+#undef _
+ LISP_GPE_INPUT_N_NEXT,
+} lisp_gpe_input_next_t;
+
+/* Arc to nsh-input added only if nsh-input exists */
+#define LISP_GPE_INPUT_NEXT_NSH_INPUT 4
+
+typedef enum
+{
+#define lisp_gpe_error(n,s) LISP_GPE_ERROR_##n,
+#include <vnet/lisp-gpe/lisp_gpe_error.def>
+#undef lisp_gpe_error
+ LISP_GPE_N_ERROR,
+} lisp_gpe_error_t;
+
+typedef struct tunnel_lookup
+{
+ /** Lookup lisp-gpe interfaces by dp table (eg. vrf/bridge index) */
+ uword *hw_if_index_by_dp_table;
+
+ /** lookup decap tunnel termination sw_if_index by vni and vice versa */
+ uword *sw_if_index_by_vni;
+
+ // FIXME - Need this?
+ uword *vni_by_sw_if_index;
+} tunnel_lookup_t;
+
+typedef struct
+{
+ u32 fwd_entry_index;
+ u32 tunnel_index;
+} lisp_stats_key_t;
+
+typedef struct
+{
+ u32 vni;
+ dp_address_t deid;
+ dp_address_t seid;
+ ip_address_t loc_rloc;
+ ip_address_t rmt_rloc;
+
+ vlib_counter_t counters;
+} lisp_api_stats_t;
+
+typedef enum gpe_encap_mode_e
+{
+ GPE_ENCAP_LISP,
+ GPE_ENCAP_VXLAN,
+ GPE_ENCAP_COUNT
+} gpe_encap_mode_t;
+
+/** LISP-GPE global state*/
+typedef struct lisp_gpe_main
+{
+ /**
+ * @brief DB of all forwarding entries. The Key is:{l-EID,r-EID,vni}
+ * where the EID encodes L2 or L3
+ */
+ uword *lisp_gpe_fwd_entries;
+
+ /**
+ * @brief A Pool of all LISP forwarding entries
+ */
+ struct lisp_gpe_fwd_entry_t_ *lisp_fwd_entry_pool;
+
+ /** Free vlib hw_if_indices */
+ u32 *free_tunnel_hw_if_indices;
+
+ u8 is_en;
+
+ /* L3 data structures
+ * ================== */
+ tunnel_lookup_t l3_ifaces;
+
+ /* L2 data structures
+ * ================== */
+
+ /** L2 LISP FIB */
+ BVT (clib_bihash) l2_fib;
+
+ tunnel_lookup_t l2_ifaces;
+
+ /** Load-balance for a miss in the table */
+ dpo_id_t l2_lb_cp_lkup;
+
+ /* NSH data structures
+ * ================== */
+
+ BVT (clib_bihash) nsh_fib;
+
+ tunnel_lookup_t nsh_ifaces;
+
+ const dpo_id_t *nsh_cp_lkup;
+
+ gpe_encap_mode_t encap_mode;
+
+ u8 *dummy_stats_pool;
+ uword *lisp_stats_index_by_key;
+ vlib_combined_counter_main_t counters;
+
+ /** Native fwd data structures */
+ fib_route_path_t *native_fwd_rpath[2];
+ u32 *native_fwd_lfes[2];
+
+ /** convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ip4_main_t *im4;
+ ip6_main_t *im6;
+ ip_lookup_main_t *lm4;
+ ip_lookup_main_t *lm6;
+} lisp_gpe_main_t;
+
+/** LISP-GPE global state*/
+lisp_gpe_main_t lisp_gpe_main;
+
+always_inline lisp_gpe_main_t *
+vnet_lisp_gpe_get_main ()
+{
+ return &lisp_gpe_main;
+}
+
+
+extern vlib_node_registration_t lisp_gpe_ip4_input_node;
+extern vlib_node_registration_t lisp_gpe_ip6_input_node;
+extern vnet_hw_interface_class_t lisp_gpe_hw_class;
+
+u8 *format_lisp_gpe_header_with_length (u8 * s, va_list * args);
+
+/** Read LISP-GPE status */
+u8 vnet_lisp_gpe_enable_disable_status (void);
+
+u32
+lisp_gpe_l3_iface_find_or_create (lisp_gpe_main_t * lgm,
+ u32 overlay_table_id, u32 vni);
+
+/** Add/del LISP-GPE interface. */
+extern void lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id);
+extern u32 lisp_gpe_add_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id);
+extern void lisp_gpe_del_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id);
+extern u32 lisp_gpe_add_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id);
+
+
+typedef struct
+{
+ u8 is_en;
+} vnet_lisp_gpe_enable_disable_args_t;
+
+clib_error_t
+ * vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a);
+
+typedef enum
+{
+ NO_ACTION,
+ FORWARD_NATIVE,
+ SEND_MAP_REQUEST,
+ DROP
+} negative_fwd_actions_e;
+
+/** */
+typedef struct
+{
+ /** forwarding entry index of */
+ u32 fwd_entry_index;
+
+ u8 is_src_dst;
+
+ u8 is_add;
+
+ /** type of mapping */
+ u8 is_negative;
+
+ /** action for negative mappings */
+ negative_fwd_actions_e action;
+
+ /** local eid */
+ gid_address_t lcl_eid;
+
+ /** remote eid */
+ gid_address_t rmt_eid;
+
+ /** vector of locator pairs */
+ locator_pair_t *locator_pairs;
+
+ /** FIB index to lookup remote locator at encap */
+ u32 encap_fib_index;
+
+ /** FIB index to lookup inner IP at decap */
+ u32 decap_fib_index;
+
+ /* TODO remove */
+ u32 decap_next_index;
+
+ /** VNI/tenant id in HOST byte order */
+ u32 vni;
+
+ /** vrf or bd where fwd entry should be inserted */
+ union
+ {
+ /** table (vrf) id */
+ u32 table_id;
+
+ /** bridge domain id */
+ u32 bd_id;
+
+ /** generic access */
+ u32 dp_table;
+ };
+} vnet_lisp_gpe_add_del_fwd_entry_args_t;
+
+typedef struct
+{
+ fib_route_path_t rpath;
+ u8 is_add;
+} vnet_gpe_native_fwd_rpath_args_t;
+
+typedef struct
+{
+ u32 fwd_entry_index;
+ u32 dp_table;
+ u32 vni;
+ u8 action;
+ dp_address_t leid;
+ dp_address_t reid;
+} lisp_api_gpe_fwd_entry_t;
+
+#define foreach_lgpe_ip4_lookup_next \
+ _(DROP, "error-drop") \
+ _(LISP_CP_LOOKUP, "lisp-cp-lookup")
+
+typedef enum lgpe_ip4_lookup_next
+{
+#define _(sym,str) LGPE_IP4_LOOKUP_NEXT_##sym,
+ foreach_lgpe_ip4_lookup_next
+#undef _
+ LGPE_IP4_LOOKUP_N_NEXT,
+} lgpe_ip4_lookup_next_t;
+
+#define foreach_lgpe_ip6_lookup_next \
+ _(DROP, "error-drop") \
+ _(LISP_CP_LOOKUP, "lisp-cp-lookup")
+
+typedef enum lgpe_ip6_lookup_next
+{
+#define _(sym,str) LGPE_IP6_LOOKUP_NEXT_##sym,
+ foreach_lgpe_ip6_lookup_next
+#undef _
+ LGPE_IP6_LOOKUP_N_NEXT,
+} lgpe_ip6_lookup_next_t;
+
+u8 *format_vnet_lisp_gpe_status (u8 * s, va_list * args);
+
+lisp_api_gpe_fwd_entry_t *vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni);
+gpe_encap_mode_t vnet_gpe_get_encap_mode (void);
+int vnet_gpe_set_encap_mode (gpe_encap_mode_t mode);
+
+u8 vnet_lisp_stats_enable_disable_state (void);
+vnet_api_error_t vnet_lisp_stats_enable_disable (u8 enable);
+lisp_api_stats_t *vnet_lisp_get_stats (void);
+int vnet_lisp_flush_stats (void);
+int vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a);
+u32 vnet_lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm);
+void vnet_lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm);
+
+#endif /* included_vnet_lisp_gpe_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
new file mode 100644
index 00000000..7db1c9bb
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys.
+ *
+ */
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/lisp-cp/control.h>
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+/**
+ * Memory pool of all adjacencies
+ */
+static lisp_gpe_adjacency_t *lisp_adj_pool;
+
+/**
+ * Hash table of all adjacencies. key:{nh, itf}
+ * We never have an all zeros address since the interfaces are multi-access,
+ * therefore there is no ambiguity between a v4 and v6 next-hop, so we don't
+ * need to add the protocol to the key.
+ */
+static
+BVT (clib_bihash)
+ lisp_adj_db;
+
+#define LISP_ADJ_SET_KEY(_key, _itf, _nh) \
+{ \
+ _key.key[0] = (_nh)->ip.v6.as_u64[0]; \
+ _key.key[1] = (_nh)->ip.v6.as_u64[1]; \
+ _key.key[2] = (_itf); \
+}
+
+ static index_t lisp_adj_find (const ip_address_t * addr, u32 sw_if_index)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+
+ if (BV (clib_bihash_search) (&lisp_adj_db, &kv, &kv) < 0)
+ {
+ return (INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static void
+lisp_adj_insert (const ip_address_t * addr, u32 sw_if_index, index_t ai)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+ kv.value = ai;
+
+ BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 1);
+}
+
+static void
+lisp_adj_remove (const ip_address_t * addr, u32 sw_if_index)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+
+ BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 0);
+}
+
+static lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_get_i (index_t lai)
+{
+ return (pool_elt_at_index (lisp_adj_pool, lai));
+}
+
+fib_forward_chain_type_t
+lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj)
+{
+ switch (ip_addr_version (&ladj->remote_rloc))
+ {
+ case IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ default:
+ ASSERT (0);
+ break;
+ }
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+static void
+ip46_address_to_ip_address (const ip46_address_t * a, ip_address_t * b)
+{
+ if (ip46_address_is_ip4 (a))
+ {
+ memset (b, 0, sizeof (*b));
+ ip_address_set (b, &a->ip4, IP4);
+ }
+ else
+ {
+ ip_address_set (b, &a->ip6, IP6);
+ }
+}
+
+/**
+ * @brief Stack the tunnel's midchain on the IP forwarding chain of the via
+ */
+static void
+lisp_gpe_adj_stack_one (lisp_gpe_adjacency_t * ladj, adj_index_t ai)
+{
+ const lisp_gpe_tunnel_t *lgt;
+ dpo_id_t tmp = DPO_INVALID;
+
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+ fib_entry_contribute_forwarding (lgt->fib_entry_index,
+ lisp_gpe_adj_get_fib_chain_type (ladj),
+ &tmp);
+
+ if (DPO_LOAD_BALANCE == tmp.dpoi_type)
+ {
+ /*
+ * post LISP rewrite we will load-balance. However, the LISP encap
+ * is always the same for this adjacency/tunnel and hence the IP/UDP src,dst
+ * hash is always the same result too. So we do that hash now and
+ * stack on the choice.
+ * If the choice is an incomplete adj then we will need a poke when
+ * it becomes complete. This happens since the adj update walk propagates
+ * as far a recursive paths.
+ */
+ const dpo_id_t *choice;
+ load_balance_t *lb;
+ int hash;
+
+ lb = load_balance_get (tmp.dpoi_index);
+
+ if (IP4 == ip_addr_version (&ladj->remote_rloc))
+ {
+ hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+ lb->lb_hash_config);
+ }
+ else
+ {
+ hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+ lb->lb_hash_config);
+ }
+
+ choice =
+ load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+ dpo_copy (&tmp, choice);
+ }
+
+ adj_nbr_midchain_stack (ai, &tmp);
+ dpo_reset (&tmp);
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+lisp_gpe_adj_walk_cb (adj_index_t ai, void *ctx)
+{
+ lisp_gpe_adjacency_t *ladj = ctx;
+
+ lisp_gpe_adj_stack_one (ladj, ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+{
+ fib_protocol_t nh_proto;
+ ip46_address_t nh;
+
+ ip_address_to_46 (&ladj->remote_rloc, &nh, &nh_proto);
+
+ /*
+ * walk all the adjacencies on th lisp interface and restack them
+ */
+ adj_nbr_walk_nh (ladj->sw_if_index,
+ nh_proto, &nh, lisp_gpe_adj_walk_cb, ladj);
+}
+
+static lisp_gpe_next_protocol_e
+lisp_gpe_adj_proto_from_vnet_link_type (vnet_link_t linkt)
+{
+ switch (linkt)
+ {
+ case VNET_LINK_IP4:
+ return (LISP_GPE_NEXT_PROTO_IP4);
+ case VNET_LINK_IP6:
+ return (LISP_GPE_NEXT_PROTO_IP6);
+ case VNET_LINK_ETHERNET:
+ return (LISP_GPE_NEXT_PROTO_ETHERNET);
+ case VNET_LINK_NSH:
+ return (LISP_GPE_NEXT_PROTO_NSH);
+ default:
+ ASSERT (0);
+ }
+ return (LISP_GPE_NEXT_PROTO_IP4);
+}
+
+#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40
+
+static lisp_afi_e
+lisp_afi_from_vnet_link_type (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return LISP_AFI_IP;
+ case VNET_LINK_IP6:
+ return LISP_AFI_IP6;
+ case VNET_LINK_ETHERNET:
+ return LISP_AFI_MAC;
+ default:
+ return LISP_AFI_NO_ADDR;
+ }
+}
+
+static void
+lisp_gpe_increment_stats_counters (lisp_cp_main_t * lcm, ip_adjacency_t * adj,
+ vlib_buffer_t * b)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ lisp_gpe_adjacency_t *ladj;
+ ip_address_t rloc;
+ index_t lai;
+ u32 si, di;
+ gid_address_t src, dst;
+ uword *feip;
+
+ ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);
+ si = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ lai = lisp_adj_find (&rloc, si);
+ ASSERT (INDEX_INVALID != lai);
+
+ ladj = pool_elt_at_index (lisp_adj_pool, lai);
+
+ u8 *lisp_data = (u8 *) vlib_buffer_get_current (b);
+
+ /* skip IP header */
+ if (is_v4_packet (lisp_data))
+ lisp_data += sizeof (ip4_header_t);
+ else
+ lisp_data += sizeof (ip6_header_t);
+
+ /* skip UDP header */
+ lisp_data += sizeof (udp_header_t);
+ // TODO: skip TCP?
+
+ /* skip LISP GPE header */
+ lisp_data += sizeof (lisp_gpe_header_t);
+
+ i16 saved_current_data = b->current_data;
+ b->current_data = lisp_data - b->data;
+
+ lisp_afi_e afi = lisp_afi_from_vnet_link_type (adj->ia_link);
+ get_src_and_dst_eids_from_buffer (lcm, b, &src, &dst, afi);
+ b->current_data = saved_current_data;
+ di = gid_dictionary_sd_lookup (&lcm->mapping_index_by_gid, &dst, &src);
+ if (PREDICT_FALSE (~0 == di))
+ {
+ clib_warning ("dst mapping not found (%U, %U)", format_gid_address,
+ &src, format_gid_address, &dst);
+ return;
+ }
+
+ feip = hash_get (lcm->fwd_entry_by_mapping_index, di);
+ if (PREDICT_FALSE (!feip))
+ return;
+
+ lisp_stats_key_t key;
+ memset (&key, 0, sizeof (key));
+ key.fwd_entry_index = feip[0];
+ key.tunnel_index = ladj->tunnel_index;
+
+ uword *p = hash_get_mem (lgm->lisp_stats_index_by_key, &key);
+ ASSERT (p);
+
+ /* compute payload length starting after GPE */
+ u32 bytes = b->current_length - (lisp_data - b->data - b->current_data);
+ vlib_increment_combined_counter (&lgm->counters, vlib_get_thread_index (),
+ p[0], 1, bytes);
+}
+
+static void
+lisp_gpe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b)
+{
+ lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+
+ if (lcm->flags & LISP_FLAG_STATS_ENABLED)
+ lisp_gpe_increment_stats_counters (lcm, adj, b);
+
+ /* Fixup the checksum and len fields in the LISP tunnel encap
+ * that was applied at the midchain node */
+ ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
+}
+
+/**
+ * @brief The LISP-GPE interface registered function to update, i.e.
+ * provide an rewrite string for, an adjacency.
+ */
+void
+lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+{
+ const lisp_gpe_tunnel_t *lgt;
+ lisp_gpe_adjacency_t *ladj;
+ ip_adjacency_t *adj;
+ ip_address_t rloc;
+ vnet_link_t linkt;
+ index_t lai;
+
+ adj = adj_get (ai);
+ ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);
+
+ /*
+ * find an existing or create a new adj
+ */
+ lai = lisp_adj_find (&rloc, sw_if_index);
+
+ ASSERT (INDEX_INVALID != lai);
+
+ ladj = pool_elt_at_index (lisp_adj_pool, lai);
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+ linkt = adj_get_link_type (ai);
+ adj_nbr_midchain_update_rewrite
+ (ai, lisp_gpe_fixup,
+ (VNET_LINK_ETHERNET == linkt ?
+ ADJ_FLAG_MIDCHAIN_NO_COUNT :
+ ADJ_FLAG_NONE),
+ lisp_gpe_tunnel_build_rewrite (lgt, ladj,
+ lisp_gpe_adj_proto_from_vnet_link_type
+ (linkt)));
+
+ lisp_gpe_adj_stack_one (ladj, ai);
+}
+
+u8 *
+lisp_gpe_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ ASSERT (0);
+ return (NULL);
+}
+
+index_t
+lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
+ u32 overlay_table_id, u32 vni)
+{
+ const lisp_gpe_sub_interface_t *l3s;
+ const lisp_gpe_tunnel_t *lgt;
+ lisp_gpe_adjacency_t *ladj;
+ index_t lai, l3si;
+
+ /*
+ * first find the L3 sub-interface that corresponds to the loacl-rloc and vni
+ */
+ l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc,
+ overlay_table_id,
+ vni);
+ l3s = lisp_gpe_sub_interface_get (l3si);
+
+ /*
+ * find an existing or create a new adj
+ */
+ lai = lisp_adj_find (&pair->rmt_loc, l3s->sw_if_index);
+
+ if (INDEX_INVALID == lai)
+ {
+
+ pool_get (lisp_adj_pool, ladj);
+ memset (ladj, 0, sizeof (*ladj));
+ lai = (ladj - lisp_adj_pool);
+
+ ip_address_copy (&ladj->remote_rloc, &pair->rmt_loc);
+ ladj->vni = vni;
+ /* transfer the lock to the adj */
+ ladj->lisp_l3_sub_index = l3si;
+ ladj->sw_if_index = l3s->sw_if_index;
+
+ /* if vni is non-default */
+ if (ladj->vni)
+ ladj->flags = LISP_GPE_FLAGS_I;
+
+ /* work in lisp-gpe not legacy mode */
+ ladj->flags |= LISP_GPE_FLAGS_P;
+
+ /*
+ * find the tunnel that will provide the underlying transport
+ * and hence the rewrite.
+ * The RLOC FIB index is default table - always.
+ */
+ ladj->tunnel_index = lisp_gpe_tunnel_find_or_create_and_lock (pair, 0);
+
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+
+ /*
+ * become of child of the RLOC FIB entry so we are updated when
+ * its reachability changes, allowing us to re-stack the midcahins
+ */
+ ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index,
+ FIB_NODE_TYPE_LISP_ADJ,
+ lai);
+
+ lisp_adj_insert (&ladj->remote_rloc, ladj->sw_if_index, lai);
+ }
+ else
+ {
+ /* unlock the interface from the find. */
+ lisp_gpe_sub_interface_unlock (l3si);
+ ladj = lisp_gpe_adjacency_get_i (lai);
+ }
+
+ ladj->locks++;
+
+ return (lai);
+}
+
+/**
+ * @brief Get a pointer to a tunnel from a pointer to a FIB node
+ */
+static lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_from_fib_node (const fib_node_t * node)
+{
+ return ((lisp_gpe_adjacency_t *)
+ ((char *) node -
+ STRUCT_OFFSET_OF (lisp_gpe_adjacency_t, fib_node)));
+}
+
+static void
+lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj)
+{
+ const lisp_gpe_tunnel_t *lgt;
+
+ /*
+ * no children so we are not counting locks. no-op.
+ * at least not counting
+ */
+ lisp_adj_remove (&ladj->remote_rloc, ladj->sw_if_index);
+
+ /*
+ * unlock the resources this adj holds
+ */
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+
+ fib_entry_child_remove (lgt->fib_entry_index, ladj->fib_entry_child_index);
+
+ lisp_gpe_tunnel_unlock (ladj->tunnel_index);
+ lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index);
+
+ pool_put (lisp_adj_pool, ladj);
+}
+
+void
+lisp_gpe_adjacency_unlock (index_t lai)
+{
+ lisp_gpe_adjacency_t *ladj;
+
+ ladj = lisp_gpe_adjacency_get_i (lai);
+
+ ladj->locks--;
+
+ if (0 == ladj->locks)
+ {
+ lisp_gpe_adjacency_last_lock_gone (ladj);
+ }
+}
+
+const lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_get (index_t lai)
+{
+ return (lisp_gpe_adjacency_get_i (lai));
+}
+
+
+/**
+ * @brief LISP GPE tunnel back walk
+ *
+ * The FIB entry through which this tunnel resolves has been updated.
+ * re-stack the midchain on the new forwarding.
+ */
+static fib_node_back_walk_rc_t
+lisp_gpe_adjacency_back_walk (fib_node_t * node,
+ fib_node_back_walk_ctx_t * ctx)
+{
+ lisp_gpe_adj_stack (lisp_gpe_adjacency_from_fib_node (node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static fib_node_t *
+lisp_gpe_adjacency_get_fib_node (fib_node_index_t index)
+{
+ lisp_gpe_adjacency_t *ladj;
+
+ ladj = pool_elt_at_index (lisp_adj_pool, index);
+ return (&ladj->fib_node);
+}
+
+static void
+lisp_gpe_adjacency_last_fib_lock_gone (fib_node_t * node)
+{
+ lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_from_fib_node (node));
+}
+
+const static fib_node_vft_t lisp_gpe_tuennel_vft = {
+ .fnv_get = lisp_gpe_adjacency_get_fib_node,
+ .fnv_back_walk = lisp_gpe_adjacency_back_walk,
+ .fnv_last_lock = lisp_gpe_adjacency_last_fib_lock_gone,
+};
+
+u8 *
+format_lisp_gpe_adjacency (u8 * s, va_list * args)
+{
+ lisp_gpe_adjacency_t *ladj = va_arg (*args, lisp_gpe_adjacency_t *);
+ lisp_gpe_adjacency_format_flags_t flags =
+ va_arg (*args, lisp_gpe_adjacency_format_flags_t);
+
+ if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
+ {
+ s =
+ format (s, "index %d locks:%d\n", ladj - lisp_adj_pool, ladj->locks);
+ }
+
+ s = format (s, " vni: %d,", ladj->vni);
+ s = format (s, " remote-RLOC: %U,", format_ip_address, &ladj->remote_rloc);
+
+ if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
+ {
+ s = format (s, " %U\n",
+ format_lisp_gpe_sub_interface,
+ lisp_gpe_sub_interface_get (ladj->lisp_l3_sub_index));
+ s = format (s, " %U\n",
+ format_lisp_gpe_tunnel,
+ lisp_gpe_tunnel_get (ladj->tunnel_index));
+ }
+ else
+ {
+ s = format (s, " LISP L3 sub-interface index: %d,",
+ ladj->lisp_l3_sub_index);
+ s = format (s, " LISP tunnel index: %d", ladj->tunnel_index);
+ }
+
+
+ return (s);
+}
+
+static clib_error_t *
+lisp_gpe_adjacency_show (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_gpe_adjacency_t *ladj;
+ index_t index;
+
+ if (pool_elts (lisp_adj_pool) == 0)
+ vlib_cli_output (vm, "No lisp-gpe Adjacencies");
+
+ if (unformat (input, "%d", &index))
+ {
+ ladj = lisp_gpe_adjacency_get_i (index);
+ vlib_cli_output (vm, "%U", format_lisp_gpe_adjacency, ladj,
+ LISP_GPE_ADJ_FORMAT_FLAG_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (ladj, lisp_adj_pool,
+ ({
+ vlib_cli_output (vm, "[%d] %U\n",
+ ladj - lisp_adj_pool,
+ format_lisp_gpe_adjacency, ladj,
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
+{
+ .path = "show gpe adjacency",
+ .function = lisp_gpe_adjacency_show,
+};
+/* *INDENT-ON* */
+
+#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256)
+#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20)
+
+static clib_error_t *
+lisp_gpe_adj_module_init (vlib_main_t * vm)
+{
+ BV (clib_bihash_init) (&lisp_adj_db,
+ "Adjacency Neighbour table",
+ LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+
+ fib_node_register_type (FIB_NODE_TYPE_LISP_ADJ, &lisp_gpe_tuennel_vft);
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_adj_module_init)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.h b/src/vnet/lisp-gpe/lisp_gpe_adjacency.h
new file mode 100644
index 00000000..adc3acae
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys.
+ *
+ */
+
+#ifndef LISP_GPE_ADJACENCY_H__
+#define LISP_GPE_ADJACENCY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * @brief A LISP GPE Adjacency.
+ *
+ * A adjacency represents peer on an L3 sub-interface to which to send traffic.
+ * adjacencies are thus present in the EID space.
+ * The peer is identified by the key:{remote-rloc, sub-interface}, which is
+ * equivalent to the usal adjacency key {next-hop, interface}. So curiously
+ * the rloc address from the underlay is used as a next hop address in the overlay
+ * This is OK because:
+ * 1 - the RLOC is unique in the underlay AND there is only one underlay VRF per
+ * overlay
+ * 2 - the RLOC may overlap with an address in the overlay, but we do not create
+ * an adj-fib (i.e. a route in the overlay FIB for the rloc)
+ *
+ *
+ */
+typedef struct lisp_gpe_adjacency_t_
+{
+ /**
+ * The LISP adj is a part of the FIB control plane graph.
+ */
+ fib_node_t fib_node;
+
+ /**
+ * remote RLOC. The adjacency's next-hop
+ */
+ ip_address_t remote_rloc;
+
+ /**
+ * The VNI. Used in combination with the local-rloc to get the sub-interface
+ */
+ u32 vni;
+
+ /**
+ * The number of locks/reference counts on the adjacency.
+ */
+ u32 locks;
+
+ /**
+ * The index of the LISP L3 subinterface
+ */
+ u32 lisp_l3_sub_index;
+
+ /**
+ * The SW IF index of the sub-interface this adjacency uses.
+ * Cached for convenience from the LISP L3 sub-interface
+ */
+ u32 sw_if_index;
+
+ /**
+ * The index of the LISP GPE tunnel that provides the transport
+ * in the underlay.
+ */
+ u32 tunnel_index;
+
+ /**
+ * This adjacency is a child of the FIB entry to reach the RLOC.
+ * This is so when the reachability of that RLOC changes, we can restack
+ * the FIB adjacnecies.
+ */
+ u32 fib_entry_child_index;
+
+ /**
+ * LISP header fields in HOST byte order
+ */
+ u8 flags;
+ u8 ver_res;
+ u8 res;
+ u8 next_protocol;
+
+} lisp_gpe_adjacency_t;
+
+extern index_t lisp_gpe_adjacency_find_or_create_and_lock (const
+ locator_pair_t *
+ pair,
+ u32 rloc_fib_index,
+ u32 vni);
+
+extern void lisp_gpe_adjacency_unlock (index_t l3si);
+
+extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si);
+
+extern void lisp_gpe_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index, adj_index_t ai);
+extern u8 *lisp_gpe_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address);
+
+
+/**
+ * @brief Flags for displaying the adjacency
+ */
+typedef enum lisp_gpe_adjacency_format_flags_t_
+{
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE,
+ LISP_GPE_ADJ_FORMAT_FLAG_DETAIL,
+} lisp_gpe_adjacency_format_flags_t;
+
+extern u8 *format_lisp_gpe_adjacency (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_api.c b/src/vnet/lisp-gpe/lisp_gpe_api.c
new file mode 100644
index 00000000..4367a719
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_api.c
@@ -0,0 +1,597 @@
+/*
+ *------------------------------------------------------------------
+ * lisp_gpe_api.c - lisp_gpe api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_api_gpe_locator_pair_t_endian vl_noop_handler
+#define vl_api_gpe_locator_pair_t_print vl_noop_handler
+#define vl_api_gpe_add_del_fwd_entry_t_endian vl_noop_handler
+#define vl_api_gpe_add_del_fwd_entry_t_print vl_noop_handler
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(GPE_ADD_DEL_FWD_ENTRY, gpe_add_del_fwd_entry) \
+_(GPE_FWD_ENTRIES_GET, gpe_fwd_entries_get) \
+_(GPE_FWD_ENTRY_PATH_DUMP, gpe_fwd_entry_path_dump) \
+_(GPE_ENABLE_DISABLE, gpe_enable_disable) \
+_(GPE_ADD_DEL_IFACE, gpe_add_del_iface) \
+_(GPE_FWD_ENTRY_VNIS_GET, gpe_fwd_entry_vnis_get) \
+_(GPE_SET_ENCAP_MODE, gpe_set_encap_mode) \
+_(GPE_GET_ENCAP_MODE, gpe_get_encap_mode) \
+_(GPE_ADD_DEL_NATIVE_FWD_RPATH, gpe_add_del_native_fwd_rpath) \
+_(GPE_NATIVE_FWD_RPATHS_GET, gpe_native_fwd_rpaths_get)
+
+static locator_pair_t *
+unformat_gpe_loc_pairs (void *locs, u32 rloc_num)
+{
+ u32 i;
+ locator_pair_t *pairs = 0, pair, *p;
+ vl_api_gpe_locator_t *r;
+
+ for (i = 0; i < rloc_num; i++)
+ {
+ /* local locator */
+ r = &((vl_api_gpe_locator_t *) locs)[i];
+ memset (&pair, 0, sizeof (pair));
+ ip_address_set (&pair.lcl_loc, &r->addr, r->is_ip4 ? IP4 : IP6);
+
+ pair.weight = r->weight;
+ vec_add1 (pairs, pair);
+ }
+
+ for (i = rloc_num; i < rloc_num * 2; i++)
+ {
+ /* remote locators */
+ r = &((vl_api_gpe_locator_t *) locs)[i];
+ p = &pairs[i - rloc_num];
+ ip_address_set (&p->rmt_loc, &r->addr, r->is_ip4 ? IP4 : IP6);
+ }
+ return pairs;
+}
+
+static int
+unformat_lisp_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src,
+ u8 len)
+{
+ switch (type)
+ {
+ case 0: /* ipv4 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP4);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 1: /* ipv6 */
+ gid_address_type (dst) = GID_ADDR_IP_PREFIX;
+ gid_address_ip_set (dst, src, IP6);
+ gid_address_ippref_len (dst) = len;
+ ip_prefix_normalize (&gid_address_ippref (dst));
+ break;
+ case 2: /* l2 mac */
+ gid_address_type (dst) = GID_ADDR_MAC;
+ clib_memcpy (&gid_address_mac (dst), src, 6);
+ break;
+ default:
+ /* unknown type */
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ gid_address_vni (dst) = vni;
+
+ return 0;
+}
+
+static void
+ gpe_fwd_entry_path_dump_t_net_to_host
+ (vl_api_gpe_fwd_entry_path_dump_t * mp)
+{
+ mp->fwd_entry_index = clib_net_to_host_u32 (mp->fwd_entry_index);
+}
+
+static void
+lisp_api_set_locator (vl_api_gpe_locator_t * loc,
+ const ip_address_t * addr, u8 weight)
+{
+ loc->weight = weight;
+ if (IP4 == ip_addr_version (addr))
+ {
+ loc->is_ip4 = 1;
+ memcpy (loc->addr, addr, 4);
+ }
+ else
+ {
+ loc->is_ip4 = 0;
+ memcpy (loc->addr, addr, 16);
+ }
+}
+
+static void
+ vl_api_gpe_fwd_entry_path_dump_t_handler
+ (vl_api_gpe_fwd_entry_path_dump_t * mp)
+{
+ lisp_fwd_path_t *path;
+ vl_api_gpe_fwd_entry_path_details_t *rmp = NULL;
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ unix_shared_memory_queue_t *q = NULL;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ gpe_fwd_entry_path_dump_t_net_to_host (mp);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ if (pool_is_free_index (lgm->lisp_fwd_entry_pool, mp->fwd_entry_index))
+ return;
+
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, mp->fwd_entry_index);
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ return;
+
+ vec_foreach (path, lfe->paths)
+ {
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ const lisp_gpe_tunnel_t *lgt;
+
+ rmp->_vl_msg_id =
+ clib_host_to_net_u16 (VL_API_GPE_FWD_ENTRY_PATH_DETAILS);
+
+ const lisp_gpe_adjacency_t *ladj =
+ lisp_gpe_adjacency_get (path->lisp_adj);
+ lisp_api_set_locator (&rmp->rmt_loc, &ladj->remote_rloc, path->weight);
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+ lisp_api_set_locator (&rmp->lcl_loc, &lgt->key->lcl, path->weight);
+
+ rmp->context = mp->context;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+ }
+}
+
+static void
+gpe_fwd_entries_copy (vl_api_gpe_fwd_entry_t * dst,
+ lisp_api_gpe_fwd_entry_t * src)
+{
+ lisp_api_gpe_fwd_entry_t *e;
+ u32 i = 0;
+
+ vec_foreach (e, src)
+ {
+ memset (&dst[i], 0, sizeof (*dst));
+ dst[i].dp_table = e->dp_table;
+ dst[i].fwd_entry_index = e->fwd_entry_index;
+ dst[i].vni = e->vni;
+ dst[i].action = e->action;
+ switch (fid_addr_type (&e->leid))
+ {
+ case FID_ADDR_IP_PREF:
+ if (IP4 == ip_prefix_version (&fid_addr_ippref (&e->leid)))
+ {
+ memcpy (&dst[i].leid, &fid_addr_ippref (&e->leid), 4);
+ memcpy (&dst[i].reid, &fid_addr_ippref (&e->reid), 4);
+ dst[i].eid_type = 0;
+ }
+ else
+ {
+ memcpy (&dst[i].leid, &fid_addr_ippref (&e->leid), 16);
+ memcpy (&dst[i].reid, &fid_addr_ippref (&e->reid), 16);
+ dst[i].eid_type = 1;
+ }
+ dst[i].leid_prefix_len = ip_prefix_len (&fid_addr_ippref (&e->leid));
+ dst[i].reid_prefix_len = ip_prefix_len (&fid_addr_ippref (&e->reid));
+ break;
+ case FID_ADDR_MAC:
+ memcpy (&dst[i].leid, fid_addr_mac (&e->leid), 6);
+ memcpy (&dst[i].reid, fid_addr_mac (&e->reid), 6);
+ dst[i].eid_type = 2;
+ break;
+ default:
+ clib_warning ("unknown fid type %d!", fid_addr_type (&e->leid));
+ break;
+ }
+ i++;
+ }
+}
+
+static void
+gpe_fwd_entries_get_t_net_to_host (vl_api_gpe_fwd_entries_get_t * mp)
+{
+ mp->vni = clib_net_to_host_u32 (mp->vni);
+}
+
+static void
+gpe_entry_t_host_to_net (vl_api_gpe_fwd_entry_t * e)
+{
+ e->fwd_entry_index = clib_host_to_net_u32 (e->fwd_entry_index);
+ e->dp_table = clib_host_to_net_u32 (e->dp_table);
+ e->vni = clib_host_to_net_u32 (e->vni);
+}
+
+static void
+ gpe_fwd_entries_get_reply_t_host_to_net
+ (vl_api_gpe_fwd_entries_get_reply_t * mp)
+{
+ u32 i;
+ vl_api_gpe_fwd_entry_t *e;
+
+ for (i = 0; i < mp->count; i++)
+ {
+ e = &mp->entries[i];
+ gpe_entry_t_host_to_net (e);
+ }
+ mp->count = clib_host_to_net_u32 (mp->count);
+}
+
+static void
+vl_api_gpe_fwd_entry_vnis_get_t_handler (vl_api_gpe_fwd_entry_vnis_get_t * mp)
+{
+ vl_api_gpe_fwd_entry_vnis_get_reply_t *rmp = 0;
+ hash_pair_t *p;
+ u32 i = 0;
+ int rv = 0;
+
+ u32 *vnis = vnet_lisp_gpe_get_fwd_entry_vnis ();
+ u32 size = hash_elts (vnis) * sizeof (u32);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_GPE_FWD_ENTRY_VNIS_GET_REPLY, size,
+ {
+ rmp->count = clib_host_to_net_u32 (hash_elts (vnis));
+ hash_foreach_pair (p, vnis,
+ ({
+ rmp->vnis[i++] = clib_host_to_net_u32 (p->key);
+ }));
+ });
+ /* *INDENT-ON* */
+
+ hash_free (vnis);
+}
+
+static void
+vl_api_gpe_fwd_entries_get_t_handler (vl_api_gpe_fwd_entries_get_t * mp)
+{
+ lisp_api_gpe_fwd_entry_t *e;
+ vl_api_gpe_fwd_entries_get_reply_t *rmp = 0;
+ u32 size = 0;
+ int rv = 0;
+
+ gpe_fwd_entries_get_t_net_to_host (mp);
+
+ e = vnet_lisp_gpe_fwd_entries_get_by_vni (mp->vni);
+ size = vec_len (e) * sizeof (vl_api_gpe_fwd_entry_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_GPE_FWD_ENTRIES_GET_REPLY, size,
+ {
+ rmp->count = vec_len (e);
+ gpe_fwd_entries_copy (rmp->entries, e);
+ gpe_fwd_entries_get_reply_t_host_to_net (rmp);
+ });
+ /* *INDENT-ON* */
+
+ vec_free (e);
+}
+
+static void
+gpe_add_del_fwd_entry_t_net_to_host (vl_api_gpe_add_del_fwd_entry_t * mp)
+{
+ mp->vni = clib_net_to_host_u32 (mp->vni);
+ mp->dp_table = clib_net_to_host_u32 (mp->dp_table);
+ mp->loc_num = clib_net_to_host_u32 (mp->loc_num);
+}
+
+static void
+vl_api_gpe_add_del_fwd_entry_t_handler (vl_api_gpe_add_del_fwd_entry_t * mp)
+{
+ vl_api_gpe_add_del_fwd_entry_reply_t *rmp;
+ vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a;
+ locator_pair_t *pairs = 0;
+ int rv = 0;
+
+ gpe_add_del_fwd_entry_t_net_to_host (mp);
+ memset (a, 0, sizeof (a[0]));
+
+ rv = unformat_lisp_eid_api (&a->rmt_eid, mp->vni, mp->eid_type,
+ mp->rmt_eid, mp->rmt_len);
+ rv |= unformat_lisp_eid_api (&a->lcl_eid, mp->vni, mp->eid_type,
+ mp->lcl_eid, mp->lcl_len);
+
+ if (mp->loc_num % 2 != 0)
+ {
+ rv = -1;
+ goto send_reply;
+ }
+ pairs = unformat_gpe_loc_pairs (mp->locs, mp->loc_num / 2);
+
+ if (rv)
+ goto send_reply;
+
+ a->is_add = mp->is_add;
+ a->locator_pairs = pairs;
+ a->dp_table = mp->dp_table;
+ a->vni = mp->vni;
+ a->action = mp->action;
+ if (mp->loc_num == 0)
+ a->is_negative = 1;
+
+ rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0);
+ vec_free (pairs);
+send_reply:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_GPE_ADD_DEL_FWD_ENTRY_REPLY,
+ {
+ rmp->fwd_entry_index = clib_host_to_net_u32 (a->fwd_entry_index);
+ });
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_gpe_enable_disable_t_handler (vl_api_gpe_enable_disable_t * mp)
+{
+ vl_api_gpe_enable_disable_reply_t *rmp;
+ int rv = 0;
+ vnet_lisp_gpe_enable_disable_args_t _a, *a = &_a;
+
+ a->is_en = mp->is_en;
+ vnet_lisp_gpe_enable_disable (a);
+
+ REPLY_MACRO (VL_API_GPE_ENABLE_DISABLE_REPLY);
+}
+
+static void
+vl_api_gpe_add_del_iface_t_handler (vl_api_gpe_add_del_iface_t * mp)
+{
+ vl_api_gpe_add_del_iface_reply_t *rmp;
+ int rv = 0;
+ u32 vni, dp_table;
+
+ vni = clib_net_to_host_u32 (mp->vni);
+ dp_table = clib_net_to_host_u32 (mp->dp_table);
+
+ if (mp->is_l2)
+ {
+ if (mp->is_add)
+ {
+ if (~0 == lisp_gpe_tenant_l2_iface_add_or_lock (vni, dp_table))
+ rv = 1;
+ }
+ else
+ lisp_gpe_tenant_l2_iface_unlock (vni);
+ }
+ else
+ {
+ if (mp->is_add)
+ {
+ if (~0 == lisp_gpe_tenant_l3_iface_add_or_lock (vni, dp_table))
+ rv = 1;
+ }
+ else
+ lisp_gpe_tenant_l3_iface_unlock (vni);
+ }
+
+ REPLY_MACRO (VL_API_GPE_ADD_DEL_IFACE_REPLY);
+}
+
+static void
+vl_api_gpe_set_encap_mode_t_handler (vl_api_gpe_set_encap_mode_t * mp)
+{
+ vl_api_gpe_set_encap_mode_reply_t *rmp;
+ int rv = 0;
+
+ rv = vnet_gpe_set_encap_mode (mp->mode);
+ REPLY_MACRO (VL_API_GPE_SET_ENCAP_MODE_REPLY);
+}
+
+static void
+vl_api_gpe_get_encap_mode_t_handler (vl_api_gpe_get_encap_mode_t * mp)
+{
+ vl_api_gpe_get_encap_mode_reply_t *rmp;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_GPE_GET_ENCAP_MODE_REPLY,
+ ({
+ rmp->encap_mode = vnet_gpe_get_encap_mode ();
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+ vl_api_gpe_add_del_native_fwd_rpath_t_handler
+ (vl_api_gpe_add_del_native_fwd_rpath_t * mp)
+{
+ vl_api_gpe_add_del_native_fwd_rpath_reply_t *rmp;
+ vnet_gpe_native_fwd_rpath_args_t _a, *a = &_a;
+ int rv = 0;
+
+ memset (a, 0, sizeof (a[0]));
+
+ if (mp->is_ip4)
+ clib_memcpy (&a->rpath.frp_addr.ip4, mp->nh_addr, sizeof (ip4_address_t));
+ else
+ clib_memcpy (&a->rpath.frp_addr.ip6, mp->nh_addr, sizeof (ip6_address_t));
+
+ a->is_add = mp->is_add;
+ a->rpath.frp_proto = mp->is_ip4 ? DPO_PROTO_IP4 : DPO_PROTO_IP6;
+ a->rpath.frp_fib_index =
+ fib_table_find (dpo_proto_to_fib (a->rpath.frp_proto),
+ clib_net_to_host_u32 (mp->table_id));
+ if (~0 == a->rpath.frp_fib_index)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto done;
+ }
+
+ a->rpath.frp_sw_if_index = clib_net_to_host_u32 (mp->nh_sw_if_index);
+ a->rpath.frp_weight = 1;
+
+ rv = vnet_gpe_add_del_native_fwd_rpath (a);
+done:
+ REPLY_MACRO (VL_API_GPE_ADD_DEL_NATIVE_FWD_RPATH_REPLY);
+}
+
+static void
+gpe_native_fwd_rpaths_copy (vl_api_gpe_native_fwd_rpath_t * dst,
+ fib_route_path_t * src, u8 is_ip4)
+{
+ fib_route_path_t *e;
+ fib_table_t *table;
+ u32 i = 0;
+
+ vec_foreach (e, src)
+ {
+ memset (&dst[i], 0, sizeof (*dst));
+ table = fib_table_get (e->frp_fib_index, dpo_proto_to_fib (e->frp_proto));
+ dst[i].fib_index = table->ft_table_id;
+ dst[i].nh_sw_if_index = e->frp_sw_if_index;
+ dst[i].is_ip4 = is_ip4;
+ if (is_ip4)
+ clib_memcpy (&dst[i].nh_addr, &e->frp_addr.ip4, sizeof (ip4_address_t));
+ else
+ clib_memcpy (&dst[i].nh_addr, &e->frp_addr.ip6, sizeof (ip6_address_t));
+ i++;
+ }
+}
+
+static void
+gpe_native_fwd_rpath_t_host_to_net (vl_api_gpe_native_fwd_rpath_t * e)
+{
+ e->fib_index = clib_host_to_net_u32 (e->fib_index);
+ e->nh_sw_if_index = clib_host_to_net_u32 (e->nh_sw_if_index);
+}
+
+static void
+ gpe_native_fwd_rpaths_get_reply_t_host_to_net
+ (vl_api_gpe_native_fwd_rpaths_get_reply_t * mp)
+{
+ u32 i;
+ vl_api_gpe_native_fwd_rpath_t *e;
+
+ for (i = 0; i < mp->count; i++)
+ {
+ e = &mp->entries[i];
+ gpe_native_fwd_rpath_t_host_to_net (e);
+ }
+ mp->count = clib_host_to_net_u32 (mp->count);
+}
+
+static void
+vl_api_gpe_native_fwd_rpaths_get_t_handler (vl_api_gpe_native_fwd_rpaths_get_t
+ * mp)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ vl_api_gpe_native_fwd_rpaths_get_reply_t *rmp;
+ u32 size = 0;
+ int rv = 0;
+
+ u8 rpath_index = mp->is_ip4 ? 0 : 1;
+
+ size = vec_len (lgm->native_fwd_rpath[rpath_index])
+ * sizeof (vl_api_gpe_native_fwd_rpath_t);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO4 (VL_API_GPE_NATIVE_FWD_RPATHS_GET_REPLY, size,
+ {
+ rmp->count = vec_len (lgm->native_fwd_rpath[rpath_index]);
+ gpe_native_fwd_rpaths_copy (rmp->entries,
+ lgm->native_fwd_rpath[rpath_index],
+ mp->is_ip4);
+ gpe_native_fwd_rpaths_get_reply_t_host_to_net (rmp);
+ });
+ /* *INDENT-ON* */
+}
+
+/*
+ * gpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_lisp_gpe;
+#undef _
+}
+
+static clib_error_t *
+gpe_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (gpe_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_error.def b/src/vnet/lisp-gpe/lisp_gpe_error.def
new file mode 100644
index 00000000..415fada7
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_error.def
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+lisp_gpe_error (ENCAPSULATED, "good packets encapsulated")
+lisp_gpe_error (DECAPSULATED, "good packets decapsulated")
+lisp_gpe_error (NO_TUNNEL, "tunnel does not exist")
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
new file mode 100644
index 00000000..dbbea418
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -0,0 +1,1588 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-cp/lisp_cp_dpo.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/adj/adj_midchain.h>
+
+/**
+ * @brief Add route to IP4 or IP6 Destination FIB.
+ *
+ * Add a route to the destination FIB that results in the lookup
+ * in the SRC FIB. The SRC FIB is created is it does not yet exist.
+ *
+ * @param[in] dst_table_id Destination FIB Table-ID
+ * @param[in] dst_prefix Destination IP prefix.
+ *
+ * @return src_fib_index The index/ID of the SRC FIB created.
+ */
+static u32
+ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix)
+{
+ fib_node_index_t src_fib_index;
+ fib_prefix_t dst_fib_prefix;
+ fib_node_index_t dst_fei;
+
+ ASSERT (NULL != dst_prefix);
+
+ ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix);
+
+ /*
+ * lookup the destination prefix in the VRF table and retrieve the
+ * LISP associated data
+ */
+ dst_fei = fib_table_lookup_exact_match (dst_fib_index, &dst_fib_prefix);
+
+ /*
+ * If the FIB entry is not present, or not LISP sourced, add it
+ */
+ if (dst_fei == FIB_NODE_INDEX_INVALID ||
+ NULL == fib_entry_get_source_data (dst_fei, FIB_SOURCE_LISP))
+ {
+ dpo_id_t src_lkup_dpo = DPO_INVALID;
+
+ /* create a new src FIB. */
+ src_fib_index =
+ fib_table_create_and_lock (dst_fib_prefix.fp_proto,
+ FIB_SOURCE_LISP,
+ "LISP-src for [%d,%U]",
+ dst_fib_index,
+ format_fib_prefix, &dst_fib_prefix);
+ /*
+ * add src fib default route
+ */
+ fib_prefix_t prefix = {
+ .fp_proto = dst_fib_prefix.fp_proto,
+ };
+ fib_table_entry_special_dpo_add (src_fib_index, &prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ lisp_cp_dpo_get (fib_proto_to_dpo
+ (dst_fib_prefix.fp_proto)));
+ /*
+ * create a data-path object to perform the source address lookup
+ * in the SRC FIB
+ */
+ lookup_dpo_add_or_lock_w_fib_index (src_fib_index,
+ (ip_prefix_version (dst_prefix) ==
+ IP6 ? DPO_PROTO_IP6 :
+ DPO_PROTO_IP4),
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_SRC_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &src_lkup_dpo);
+
+ /*
+ * add the entry to the destination FIB that uses the lookup DPO
+ */
+ dst_fei = fib_table_entry_special_dpo_add (dst_fib_index,
+ &dst_fib_prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &src_lkup_dpo);
+
+ /*
+ * the DPO is locked by the FIB entry, and we have no further
+ * need for it.
+ */
+ dpo_unlock (&src_lkup_dpo);
+
+ /*
+ * save the SRC FIB index on the entry so we can retrieve it for
+ * subsequent routes.
+ */
+ fib_entry_set_source_data (dst_fei, FIB_SOURCE_LISP, &src_fib_index);
+ }
+ else
+ {
+ /*
+ * destination FIB entry already present
+ */
+ src_fib_index = *(u32 *) fib_entry_get_source_data (dst_fei,
+ FIB_SOURCE_LISP);
+ }
+
+ return (src_fib_index);
+}
+
+/**
+ * @brief Del route to IP4 or IP6 SD FIB.
+ *
+ * Remove routes from both destination and source FIBs.
+ *
+ * @param[in] src_fib_index The index/ID of the SRC FIB
+ * @param[in] src_prefix Source IP prefix.
+ * @param[in] dst_fib_index The index/ID of the DST FIB
+ * @param[in] dst_prefix Destination IP prefix.
+ */
+static void
+ip_src_dst_fib_del_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ u32 dst_fib_index, const ip_prefix_t * dst_prefix)
+{
+ fib_prefix_t dst_fib_prefix, src_fib_prefix;
+ u8 have_default = 0;
+ u32 n_entries;
+
+ ASSERT (NULL != dst_prefix);
+ ASSERT (NULL != src_prefix);
+
+ ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix);
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
+
+ fib_table_entry_delete (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP);
+
+ /* check if only default left or empty */
+ fib_prefix_t default_pref = {
+ .fp_proto = dst_fib_prefix.fp_proto
+ };
+
+ if (fib_table_lookup_exact_match (src_fib_index,
+ &default_pref) != FIB_NODE_INDEX_INVALID)
+ have_default = 1;
+
+ n_entries = fib_table_get_num_entries (src_fib_index,
+ src_fib_prefix.fp_proto,
+ FIB_SOURCE_LISP);
+ if (n_entries == 0 || (have_default && n_entries == 1))
+ {
+ /*
+ * remove src FIB default route
+ */
+ if (have_default)
+ fib_table_entry_special_remove (src_fib_index, &default_pref,
+ FIB_SOURCE_LISP);
+
+ /*
+ * there's nothing left now, unlock the source FIB and the
+ * destination route
+ */
+ fib_table_entry_special_remove (dst_fib_index,
+ &dst_fib_prefix, FIB_SOURCE_LISP);
+ fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto,
+ FIB_SOURCE_LISP);
+ }
+}
+
+/**
+ * @brief Add route to IP4 or IP6 SRC FIB.
+ *
+ * Adds a route to in the LISP SRC FIB with the result of the route
+ * being the DPO passed.
+ *
+ * @param[in] src_fib_index The index/ID of the SRC FIB
+ * @param[in] src_prefix Source IP prefix.
+ * @param[in] src_dpo The DPO the route will link to.
+ *
+ * @return fib index of the inserted prefix
+ */
+static fib_node_index_t
+ip_src_fib_add_route_w_dpo (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const dpo_id_t * src_dpo)
+{
+ fib_node_index_t fei = ~0;
+ fib_prefix_t src_fib_prefix;
+
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
+
+ /*
+ * add the entry into the source fib.
+ */
+ fib_node_index_t src_fei;
+
+ src_fei = fib_table_lookup_exact_match (src_fib_index, &src_fib_prefix);
+
+ if (FIB_NODE_INDEX_INVALID == src_fei ||
+ !fib_entry_is_sourced (src_fei, FIB_SOURCE_LISP))
+ {
+ fei = fib_table_entry_special_dpo_add (src_fib_index,
+ &src_fib_prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ src_dpo);
+ }
+ return fei;
+}
+
+static fib_route_path_t *
+lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths)
+{
+ const lisp_gpe_adjacency_t *ladj;
+ fib_route_path_t *rpaths = NULL;
+ fib_protocol_t fp;
+ u8 best_priority;
+ u32 ii;
+
+ vec_validate (rpaths, vec_len (paths) - 1);
+
+ best_priority = paths[0].priority;
+
+ vec_foreach_index (ii, paths)
+ {
+ if (paths[0].priority != best_priority)
+ break;
+
+ ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj);
+
+ ip_address_to_46 (&ladj->remote_rloc, &rpaths[ii].frp_addr, &fp);
+
+ rpaths[ii].frp_proto = fib_proto_to_dpo (fp);
+ rpaths[ii].frp_sw_if_index = ladj->sw_if_index;
+ rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1);
+ }
+
+ ASSERT (0 != vec_len (rpaths));
+
+ return (rpaths);
+}
+
+/**
+ * @brief Add route to IP4 or IP6 SRC FIB.
+ *
+ * Adds a route to in the LISP SRC FIB for the tunnel.
+ *
+ * @param[in] src_fib_index The index/ID of the SRC FIB
+ * @param[in] src_prefix Source IP prefix.
+ * @param[in] paths The paths from which to construct the
+ * load balance
+ */
+static fib_node_index_t
+ip_src_fib_add_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const lisp_fwd_path_t * paths)
+{
+ fib_prefix_t src_fib_prefix;
+ fib_route_path_t *rpaths;
+
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
+
+ rpaths = lisp_gpe_mk_fib_paths (paths);
+
+ fib_node_index_t fib_entry_index =
+ fib_table_entry_update (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_NONE, rpaths);
+ vec_free (rpaths);
+ return fib_entry_index;
+}
+
+static void
+gpe_native_fwd_add_del_lfe (lisp_gpe_fwd_entry_t * lfe, u8 is_add)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ u8 found = 0, ip_version;
+ u32 *lfei, new_lfei;
+ ip_version = ip_prefix_version (&lfe->key->rmt.ippref);
+
+ new_lfei = lfe - lgm->lisp_fwd_entry_pool;
+ vec_foreach (lfei, lgm->native_fwd_lfes[ip_version])
+ {
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, lfei[0]);
+ if (lfei[0] == new_lfei)
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (!found)
+ vec_add1 (lgm->native_fwd_lfes[ip_version], new_lfei);
+ }
+ else
+ {
+ if (found)
+ vec_del1 (lgm->native_fwd_lfes[ip_version], lfei[0]);
+ }
+}
+
+static index_t
+create_fib_entries (lisp_gpe_fwd_entry_t * lfe)
+{
+ fib_node_index_t fi;
+ fib_entry_t *fe;
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ dpo_proto_t dproto;
+ ip_prefix_t ippref;
+ fib_prefix_t fib_prefix;
+ u8 ip_version = ip_prefix_version (&lfe->key->rmt.ippref);
+ dproto = (ip_version == IP4 ? DPO_PROTO_IP4 : DPO_PROTO_IP6);
+
+ if (lfe->is_src_dst)
+ {
+ lfe->src_fib_index = ip_dst_fib_add_route (lfe->eid_fib_index,
+ &lfe->key->rmt.ippref);
+ memcpy (&ippref, &lfe->key->lcl.ippref, sizeof (ippref));
+ }
+ else
+ {
+ lfe->src_fib_index = lfe->eid_fib_index;
+ memcpy (&ippref, &lfe->key->rmt.ippref, sizeof (ippref));
+ }
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ switch (lfe->action)
+ {
+ case LISP_FORWARD_NATIVE:
+ /* TODO handle route overlaps with fib and default route */
+ if (vec_len (lgm->native_fwd_rpath[ip_version]))
+ {
+ ip_prefix_to_fib_prefix (&lfe->key->rmt.ippref, &fib_prefix);
+ fi = fib_table_entry_update (lfe->eid_fib_index, &fib_prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_NONE,
+ lgm->native_fwd_rpath[ip_version]);
+ gpe_native_fwd_add_del_lfe (lfe, 1);
+ goto done;
+ }
+ case LISP_NO_ACTION:
+ /* TODO update timers? */
+ case LISP_SEND_MAP_REQUEST:
+ /* insert tunnel that always sends map-request */
+ dpo_copy (&dpo, lisp_cp_dpo_get (dproto));
+ break;
+ case LISP_DROP:
+ /* for drop fwd entries, just add route, no need to add encap tunnel */
+ dpo_copy (&dpo, drop_dpo_get (dproto));
+ break;
+ }
+ fi = ip_src_fib_add_route_w_dpo (lfe->src_fib_index, &ippref, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fi = ip_src_fib_add_route (lfe->src_fib_index, &ippref, lfe->paths);
+ }
+done:
+ fe = fib_entry_get (fi);
+ return fe->fe_lb.dpoi_index;
+}
+
+static void
+delete_fib_entries (lisp_gpe_fwd_entry_t * lfe)
+{
+ fib_prefix_t dst_fib_prefix;
+
+ if (lfe->is_src_dst)
+ ip_src_dst_fib_del_route (lfe->src_fib_index,
+ &lfe->key->lcl.ippref,
+ lfe->eid_fib_index, &lfe->key->rmt.ippref);
+ else
+ {
+ ip_prefix_to_fib_prefix (&lfe->key->rmt.ippref, &dst_fib_prefix);
+ fib_table_entry_delete (lfe->src_fib_index, &dst_fib_prefix,
+ FIB_SOURCE_LISP);
+ gpe_native_fwd_add_del_lfe (lfe, 0);
+ }
+}
+
+static lisp_gpe_fwd_entry_t *
+find_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ lisp_gpe_fwd_entry_key_t * key)
+{
+ uword *p;
+
+ memset (key, 0, sizeof (*key));
+
+ if (GID_ADDR_IP_PREFIX == gid_address_type (&a->rmt_eid))
+ {
+ /*
+ * the ip version of the source is not set to ip6 when the
+ * source is all zeros. force it.
+ */
+ ip_prefix_version (&gid_address_ippref (&a->lcl_eid)) =
+ ip_prefix_version (&gid_address_ippref (&a->rmt_eid));
+ }
+
+ gid_to_dp_address (&a->rmt_eid, &key->rmt);
+ gid_to_dp_address (&a->lcl_eid, &key->lcl);
+ key->vni = a->vni;
+
+ p = hash_get_mem (lgm->lisp_gpe_fwd_entries, key);
+
+ if (NULL != p)
+ {
+ return (pool_elt_at_index (lgm->lisp_fwd_entry_pool, p[0]));
+ }
+ return (NULL);
+}
+
+static int
+lisp_gpe_fwd_entry_path_sort (void *a1, void *a2)
+{
+ lisp_fwd_path_t *p1 = a1, *p2 = a2;
+
+ return (p1->priority - p2->priority);
+}
+
+static void
+lisp_gpe_fwd_entry_mk_paths (lisp_gpe_fwd_entry_t * lfe,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_fwd_path_t *path;
+ u32 index;
+
+ vec_validate (lfe->paths, vec_len (a->locator_pairs) - 1);
+
+ vec_foreach_index (index, a->locator_pairs)
+ {
+ path = &lfe->paths[index];
+
+ path->priority = a->locator_pairs[index].priority;
+ path->weight = a->locator_pairs[index].weight;
+
+ path->lisp_adj =
+ lisp_gpe_adjacency_find_or_create_and_lock (&a->locator_pairs
+ [index],
+ a->dp_table, lfe->key->vni);
+ }
+ vec_sort_with_function (lfe->paths, lisp_gpe_fwd_entry_path_sort);
+}
+
+void
+vnet_lisp_gpe_add_fwd_counters (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 fwd_entry_index)
+{
+ const lisp_gpe_adjacency_t *ladj;
+ lisp_fwd_path_t *path;
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ u8 *dummy_elt;
+ lisp_gpe_fwd_entry_t *lfe;
+ lisp_gpe_fwd_entry_key_t fe_key;
+ lisp_stats_key_t key;
+
+ lfe = find_fwd_entry (lgm, a, &fe_key);
+
+ if (!lfe)
+ return;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NORMAL != lfe->type)
+ return;
+
+ memset (&key, 0, sizeof (key));
+ key.fwd_entry_index = fwd_entry_index;
+
+ vec_foreach (path, lfe->paths)
+ {
+ ladj = lisp_gpe_adjacency_get (path->lisp_adj);
+ key.tunnel_index = ladj->tunnel_index;
+ lisp_stats_key_t *key_copy = clib_mem_alloc (sizeof (*key_copy));
+ memcpy (key_copy, &key, sizeof (*key_copy));
+ pool_get (lgm->dummy_stats_pool, dummy_elt);
+ hash_set_mem (lgm->lisp_stats_index_by_key, key_copy,
+ dummy_elt - lgm->dummy_stats_pool);
+
+ vlib_validate_combined_counter (&lgm->counters,
+ dummy_elt - lgm->dummy_stats_pool);
+ vlib_zero_combined_counter (&lgm->counters,
+ dummy_elt - lgm->dummy_stats_pool);
+ }
+}
+
+/**
+ * @brief Add/Delete LISP IP forwarding entry.
+ *
+ * creation of forwarding entries for IP LISP overlay:
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+add_ip_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_gpe_fwd_entry_t *lfe;
+ fib_protocol_t fproto;
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL != lfe)
+ /* don't support updates */
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (lgm->lisp_fwd_entry_pool, lfe);
+ memset (lfe, 0, sizeof (*lfe));
+ lfe->key = clib_mem_alloc (sizeof (key));
+ memcpy (lfe->key, &key, sizeof (key));
+
+ hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key,
+ lfe - lgm->lisp_fwd_entry_pool);
+ a->fwd_entry_index = lfe - lgm->lisp_fwd_entry_pool;
+
+ fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+
+ lfe->type = (a->is_negative ?
+ LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE :
+ LISP_GPE_FWD_ENTRY_TYPE_NORMAL);
+ lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni);
+ lfe->eid_table_id = a->table_id;
+ lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto,
+ lfe->eid_table_id,
+ FIB_SOURCE_LISP);
+ lfe->is_src_dst = a->is_src_dst;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ lisp_gpe_fwd_entry_mk_paths (lfe, a);
+ }
+ else
+ {
+ lfe->action = a->action;
+ }
+
+ lfe->dpoi_index = create_fib_entries (lfe);
+ return (0);
+}
+
+static void
+del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe)
+{
+ lisp_fwd_path_t *path;
+ fib_protocol_t fproto;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ vec_foreach (path, lfe->paths)
+ {
+ lisp_gpe_adjacency_unlock (path->lisp_adj);
+ }
+ }
+
+ delete_fib_entries (lfe);
+
+ fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ fib_table_unlock (lfe->eid_fib_index, fproto, FIB_SOURCE_LISP);
+
+ hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key);
+ clib_mem_free (lfe->key);
+ pool_put (lgm->lisp_fwd_entry_pool, lfe);
+}
+
+/**
+ * @brief Add/Delete LISP IP forwarding entry.
+ *
+ * removal of forwarding entries for IP LISP overlay:
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+del_ip_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL == lfe)
+ /* no such entry */
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ del_ip_fwd_entry_i (lgm, lfe);
+
+ return (0);
+}
+
+static void
+make_mac_fib_key (BVT (clib_bihash_kv) * kv, u16 bd_index, u8 src_mac[6],
+ u8 dst_mac[6])
+{
+ kv->key[0] = (((u64) bd_index) << 48) | mac_to_u64 (dst_mac);
+ kv->key[1] = mac_to_u64 (src_mac);
+ kv->key[2] = 0;
+}
+
+/**
+ * @brief Lookup L2 SD FIB entry
+ *
+ * Does a vni + dest + source lookup in the L2 LISP FIB. If the lookup fails
+ * it tries a second time with source set to 0 (i.e., a simple dest lookup).
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] bd_index Bridge domain index.
+ * @param[in] src_mac Source mac address.
+ * @param[in] dst_mac Destination mac address.
+ *
+ * @return index of mapping matching the lookup key.
+ */
+index_t
+lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6],
+ u8 dst_mac[6])
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ make_mac_fib_key (&kv, bd_index, src_mac, dst_mac);
+ rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value);
+
+ /* no match, try with src 0, catch all for dst */
+ if (rv != 0)
+ {
+ kv.key[1] = 0;
+ rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+ else
+ return value.value;
+
+ return lisp_gpe_main.l2_lb_cp_lkup.dpoi_index;
+}
+
+/**
+ * @brief Add/del L2 SD FIB entry
+ *
+ * Inserts value in L2 FIB keyed by vni + dest + source. If entry is
+ * overwritten the associated value is returned.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] bd_index Bridge domain index.
+ * @param[in] src_mac Source mac address.
+ * @param[in] dst_mac Destination mac address.
+ * @param[in] val Value to add.
+ * @param[in] is_add Add/del flag.
+ *
+ * @return ~0 or value of overwritten entry.
+ */
+static u32
+lisp_l2_fib_add_del_entry (u16 bd_index, u8 src_mac[6],
+ u8 dst_mac[6], const dpo_id_t * dpo, u8 is_add)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ BVT (clib_bihash_kv) kv, value;
+ u32 old_val = ~0;
+
+ make_mac_fib_key (&kv, bd_index, src_mac, dst_mac);
+
+ if (BV (clib_bihash_search) (&lgm->l2_fib, &kv, &value) == 0)
+ old_val = value.value;
+
+ if (!is_add)
+ BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 0 /* is_add */ );
+ else
+ {
+ kv.value = dpo->dpoi_index;
+ BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 1 /* is_add */ );
+ }
+ return old_val;
+}
+
+#define L2_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define L2_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+static void
+l2_fib_init (lisp_gpe_main_t * lgm)
+{
+ index_t lbi;
+
+ BV (clib_bihash_init) (&lgm->l2_fib, "l2 fib",
+ 1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS),
+ L2_FIB_DEFAULT_HASH_MEMORY_SIZE);
+
+ /*
+ * the result from a 'miss' in a L2 Table
+ */
+ lbi = load_balance_create (1, DPO_PROTO_ETHERNET, 0);
+ load_balance_set_bucket (lbi, 0, lisp_cp_dpo_get (DPO_PROTO_ETHERNET));
+
+ dpo_set (&lgm->l2_lb_cp_lkup, DPO_LOAD_BALANCE, DPO_PROTO_ETHERNET, lbi);
+}
+
+static void
+del_l2_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe)
+{
+ lisp_fwd_path_t *path;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ vec_foreach (path, lfe->paths)
+ {
+ lisp_gpe_adjacency_unlock (path->lisp_adj);
+ }
+ fib_path_list_child_remove (lfe->l2.path_list_index,
+ lfe->l2.child_index);
+ }
+
+ lisp_l2_fib_add_del_entry (lfe->l2.eid_bd_index,
+ fid_addr_mac (&lfe->key->lcl),
+ fid_addr_mac (&lfe->key->rmt), NULL, 0);
+
+ hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key);
+ clib_mem_free (lfe->key);
+ pool_put (lgm->lisp_fwd_entry_pool, lfe);
+}
+
+/**
+ * @brief Delete LISP L2 forwarding entry.
+ *
+ * Coordinates the removal of forwarding entries for L2 LISP overlay:
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+del_l2_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL == lfe)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ del_l2_fwd_entry_i (lgm, lfe);
+
+ return (0);
+}
+
+/**
+ * @brief Construct and insert the forwarding information used by an L2 entry
+ */
+static void
+lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_t * lfe)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ dpo_id_t dpo = DPO_INVALID;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ fib_path_list_contribute_forwarding (lfe->l2.path_list_index,
+ FIB_FORW_CHAIN_TYPE_ETHERNET,
+ &lfe->l2.dpo);
+ dpo_copy (&dpo, &lfe->l2.dpo);
+ }
+ else
+ {
+ switch (lfe->action)
+ {
+ case SEND_MAP_REQUEST:
+ dpo_copy (&dpo, &lgm->l2_lb_cp_lkup);
+ break;
+ case NO_ACTION:
+ case FORWARD_NATIVE:
+ case DROP:
+ dpo_copy (&dpo, drop_dpo_get (DPO_PROTO_ETHERNET));
+ }
+ }
+
+ /* add entry to l2 lisp fib */
+ lisp_l2_fib_add_del_entry (lfe->l2.eid_bd_index,
+ fid_addr_mac (&lfe->key->lcl),
+ fid_addr_mac (&lfe->key->rmt), &dpo, 1);
+ lfe->dpoi_index = dpo.dpoi_index;
+
+ dpo_reset (&dpo);
+}
+
+/**
+ * @brief Add LISP L2 forwarding entry.
+ *
+ * Coordinates the creation of forwarding entries for L2 LISP overlay:
+ * creates lisp-gpe tunnel and injects new entry in Source/Dest L2 FIB.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+add_l2_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ bd_main_t *bdm = &bd_main;
+ lisp_gpe_fwd_entry_t *lfe;
+ uword *bd_indexp;
+
+ bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id);
+ if (!bd_indexp)
+ {
+ clib_warning ("bridge domain %d doesn't exist", a->bd_id);
+ return -1;
+ }
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL != lfe)
+ /* don't support updates */
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (lgm->lisp_fwd_entry_pool, lfe);
+ memset (lfe, 0, sizeof (*lfe));
+ lfe->key = clib_mem_alloc (sizeof (key));
+ memcpy (lfe->key, &key, sizeof (key));
+
+ hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key,
+ lfe - lgm->lisp_fwd_entry_pool);
+ a->fwd_entry_index = lfe - lgm->lisp_fwd_entry_pool;
+
+ lfe->type = (a->is_negative ?
+ LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE :
+ LISP_GPE_FWD_ENTRY_TYPE_NORMAL);
+ lfe->l2.eid_bd_id = a->bd_id;
+ lfe->l2.eid_bd_index = bd_indexp[0];
+ lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni);
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ fib_route_path_t *rpaths;
+
+ /*
+ * Make the sorted array of LISP paths with their resp. adjacency
+ */
+ lisp_gpe_fwd_entry_mk_paths (lfe, a);
+
+ /*
+ * From the LISP paths, construct a FIB path list that will
+ * contribute a load-balance.
+ */
+ rpaths = lisp_gpe_mk_fib_paths (lfe->paths);
+
+ lfe->l2.path_list_index =
+ fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, rpaths);
+
+ /*
+ * become a child of the path-list so we receive updates when
+ * its forwarding state changes. this includes an implicit lock.
+ */
+ lfe->l2.child_index =
+ fib_path_list_child_add (lfe->l2.path_list_index,
+ FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY,
+ lfe - lgm->lisp_fwd_entry_pool);
+ }
+ else
+ {
+ lfe->action = a->action;
+ }
+
+ lisp_gpe_l2_update_fwding (lfe);
+
+ return 0;
+}
+
+/**
+ * @brief Lookup NSH SD FIB entry
+ *
+ * Does an SPI+SI lookup in the NSH LISP FIB.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] spi_si SPI + SI.
+ *
+ * @return next node index.
+ */
+const dpo_id_t *
+lisp_nsh_fib_lookup (lisp_gpe_main_t * lgm, u32 spi_si_net_order)
+{
+ int rv;
+ BVT (clib_bihash_kv) kv, value;
+
+ memset (&kv, 0, sizeof (kv));
+ kv.key[0] = spi_si_net_order;
+ rv = BV (clib_bihash_search_inline_2) (&lgm->nsh_fib, &kv, &value);
+
+ if (rv != 0)
+ {
+ return lgm->nsh_cp_lkup;
+ }
+ else
+ {
+ lisp_gpe_fwd_entry_t *lfe;
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, value.value);
+ return &lfe->nsh.choice;
+ }
+}
+
+/**
+ * @brief Add/del NSH FIB entry
+ *
+ * Inserts value in NSH FIB keyed by SPI+SI. If entry is
+ * overwritten the associated value is returned.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] spi_si SPI + SI.
+ * @param[in] dpo Load balanced mapped to SPI + SI
+ *
+ * @return ~0 or value of overwritten entry.
+ */
+static u32
+lisp_nsh_fib_add_del_entry (u32 spi_si_host_order, u32 lfei, u8 is_add)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ BVT (clib_bihash_kv) kv, value;
+ u32 old_val = ~0;
+
+ memset (&kv, 0, sizeof (kv));
+ kv.key[0] = clib_host_to_net_u32 (spi_si_host_order);
+ kv.value = 0ULL;
+
+ if (BV (clib_bihash_search) (&lgm->nsh_fib, &kv, &value) == 0)
+ old_val = value.value;
+
+ if (!is_add)
+ BV (clib_bihash_add_del) (&lgm->nsh_fib, &kv, 0 /* is_add */ );
+ else
+ {
+ kv.value = lfei;
+ BV (clib_bihash_add_del) (&lgm->nsh_fib, &kv, 1 /* is_add */ );
+ }
+ return old_val;
+}
+
+#define NSH_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define NSH_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+static void
+nsh_fib_init (lisp_gpe_main_t * lgm)
+{
+ BV (clib_bihash_init) (&lgm->nsh_fib, "nsh fib",
+ 1 << max_log2 (NSH_FIB_DEFAULT_HASH_NUM_BUCKETS),
+ NSH_FIB_DEFAULT_HASH_MEMORY_SIZE);
+
+ /*
+ * the result from a 'miss' in a NSH Table
+ */
+ lgm->nsh_cp_lkup = lisp_cp_dpo_get (DPO_PROTO_NSH);
+}
+
+static void
+del_nsh_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe)
+{
+ lisp_fwd_path_t *path;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ vec_foreach (path, lfe->paths)
+ {
+ lisp_gpe_adjacency_unlock (path->lisp_adj);
+ }
+ fib_path_list_child_remove (lfe->nsh.path_list_index,
+ lfe->nsh.child_index);
+ dpo_reset (&lfe->nsh.choice);
+ }
+
+ lisp_nsh_fib_add_del_entry (fid_addr_nsh (&lfe->key->rmt), (u32) ~ 0, 0);
+
+ hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key);
+ clib_mem_free (lfe->key);
+ pool_put (lgm->lisp_fwd_entry_pool, lfe);
+}
+
+/**
+ * @brief Delete LISP NSH forwarding entry.
+ *
+ * Coordinates the removal of forwarding entries for NSH LISP overlay:
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+del_nsh_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL == lfe)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ del_nsh_fwd_entry_i (lgm, lfe);
+
+ return (0);
+}
+
+/**
+ * @brief Construct and insert the forwarding information used by an NSH entry
+ */
+static void
+lisp_gpe_nsh_update_fwding (lisp_gpe_fwd_entry_t * lfe)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ dpo_id_t dpo = DPO_INVALID;
+ vnet_hw_interface_t *hi;
+ uword *hip;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ fib_path_list_contribute_forwarding (lfe->nsh.path_list_index,
+ FIB_FORW_CHAIN_TYPE_NSH,
+ &lfe->nsh.dpo);
+
+ /*
+ * LISP encap is always the same for this SPI+SI so we do that hash now
+ * and stack on the choice.
+ */
+ if (DPO_LOAD_BALANCE == lfe->nsh.dpo.dpoi_type)
+ {
+ const dpo_id_t *tmp;
+ const load_balance_t *lb;
+ int hash;
+
+ lb = load_balance_get (lfe->nsh.dpo.dpoi_index);
+ hash = fid_addr_nsh (&lfe->key->rmt) % lb->lb_n_buckets;
+ tmp =
+ load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+
+ dpo_copy (&dpo, tmp);
+ }
+ }
+ else
+ {
+ switch (lfe->action)
+ {
+ case SEND_MAP_REQUEST:
+ dpo_copy (&dpo, lgm->nsh_cp_lkup);
+ break;
+ case NO_ACTION:
+ case FORWARD_NATIVE:
+ case DROP:
+ dpo_copy (&dpo, drop_dpo_get (DPO_PROTO_NSH));
+ }
+ }
+
+ /* We have only one nsh-lisp interface (no NSH virtualization) */
+ hip = hash_get (lgm->nsh_ifaces.hw_if_index_by_dp_table, 0);
+ if (hip)
+ {
+ hi = vnet_get_hw_interface (lgm->vnet_main, hip[0]);
+ dpo_stack_from_node (hi->tx_node_index, &lfe->nsh.choice, &dpo);
+ }
+ /* add entry to nsh lisp fib */
+ lisp_nsh_fib_add_del_entry (fid_addr_nsh (&lfe->key->rmt),
+ lfe - lgm->lisp_fwd_entry_pool, 1);
+ dpo_reset (&dpo);
+
+}
+
+/**
+ * @brief Add LISP NSH forwarding entry.
+ *
+ * Coordinates the creation of forwarding entries for L2 LISP overlay:
+ * creates lisp-gpe tunnel and injects new entry in Source/Dest L2 FIB.
+ *
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+add_nsh_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ lfe = find_fwd_entry (lgm, a, &key);
+
+ if (NULL != lfe)
+ /* don't support updates */
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (lgm->lisp_fwd_entry_pool, lfe);
+ memset (lfe, 0, sizeof (*lfe));
+ lfe->key = clib_mem_alloc (sizeof (key));
+ memcpy (lfe->key, &key, sizeof (key));
+
+ hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key,
+ lfe - lgm->lisp_fwd_entry_pool);
+ a->fwd_entry_index = lfe - lgm->lisp_fwd_entry_pool;
+
+ lfe->type = (a->is_negative ?
+ LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE :
+ LISP_GPE_FWD_ENTRY_TYPE_NORMAL);
+ lfe->tenant = 0;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+ {
+ fib_route_path_t *rpaths;
+
+ /*
+ * Make the sorted array of LISP paths with their resp. adjacency
+ */
+ lisp_gpe_fwd_entry_mk_paths (lfe, a);
+
+ /*
+ * From the LISP paths, construct a FIB path list that will
+ * contribute a load-balance.
+ */
+ rpaths = lisp_gpe_mk_fib_paths (lfe->paths);
+
+ lfe->nsh.path_list_index =
+ fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, rpaths);
+
+ /*
+ * become a child of the path-list so we receive updates when
+ * its forwarding state changes. this includes an implicit lock.
+ */
+ lfe->nsh.child_index =
+ fib_path_list_child_add (lfe->nsh.path_list_index,
+ FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY,
+ lfe - lgm->lisp_fwd_entry_pool);
+ }
+ else
+ {
+ lfe->action = a->action;
+ }
+
+ lisp_gpe_nsh_update_fwding (lfe);
+
+ return 0;
+}
+
+/**
+ * @brief conver from the embedded fib_node_t struct to the LSIP entry
+ */
+static lisp_gpe_fwd_entry_t *
+lisp_gpe_fwd_entry_from_fib_node (fib_node_t * node)
+{
+ return ((lisp_gpe_fwd_entry_t *) (((char *) node) -
+ STRUCT_OFFSET_OF (lisp_gpe_fwd_entry_t,
+ node)));
+}
+
+/**
+ * @brief Function invoked during a backwalk of the FIB graph
+ */
+static fib_node_back_walk_rc_t
+lisp_gpe_fib_node_back_walk (fib_node_t * node,
+ fib_node_back_walk_ctx_t * ctx)
+{
+ lisp_gpe_fwd_entry_t *lfe = lisp_gpe_fwd_entry_from_fib_node (node);
+
+ if (fid_addr_type (&lfe->key->rmt) == FID_ADDR_MAC)
+ lisp_gpe_l2_update_fwding (lfe);
+ else if (fid_addr_type (&lfe->key->rmt) == FID_ADDR_NSH)
+ lisp_gpe_nsh_update_fwding (lfe);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * @brief Get a fib_node_t struct from the index of a LISP fwd entry
+ */
+static fib_node_t *
+lisp_gpe_fwd_entry_get_fib_node (fib_node_index_t index)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, index);
+
+ return (&(lfe->node));
+}
+
+/**
+ * @brief An indication from the graph that the last lock has gone
+ */
+static void
+lisp_gpe_fwd_entry_fib_node_last_lock_gone (fib_node_t * node)
+{
+ /* We don't manage the locks of the LISP objects via the graph, since
+ * this object has no children. so this is a no-op. */
+}
+
+/**
+ * @brief Virtual function table to register with FIB for the LISP type
+ */
+const static fib_node_vft_t lisp_fwd_vft = {
+ .fnv_get = lisp_gpe_fwd_entry_get_fib_node,
+ .fnv_last_lock = lisp_gpe_fwd_entry_fib_node_last_lock_gone,
+ .fnv_back_walk = lisp_gpe_fib_node_back_walk,
+};
+
+/**
+ * @brief Forwarding entry create/remove dispatcher.
+ *
+ * Calls l2 or l3 forwarding entry add/del function based on input data.
+ *
+ * @param[in] a Forwarding entry parameters.
+ * @param[out] hw_if_indexp NOT USED
+ *
+ * @return 0 on success.
+ */
+int
+vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 * hw_if_indexp)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ u8 type;
+
+ if (vnet_lisp_gpe_enable_disable_status () == 0)
+ {
+ clib_warning ("LISP is disabled!");
+ return VNET_API_ERROR_LISP_DISABLED;
+ }
+
+ type = gid_address_type (&a->rmt_eid);
+ switch (type)
+ {
+ case GID_ADDR_IP_PREFIX:
+ if (a->is_add)
+ return add_ip_fwd_entry (lgm, a);
+ else
+ return del_ip_fwd_entry (lgm, a);
+ break;
+ case GID_ADDR_MAC:
+ if (a->is_add)
+ return add_l2_fwd_entry (lgm, a);
+ else
+ return del_l2_fwd_entry (lgm, a);
+ case GID_ADDR_NSH:
+ if (a->is_add)
+ return add_nsh_fwd_entry (lgm, a);
+ else
+ return del_nsh_fwd_entry (lgm, a);
+ default:
+ clib_warning ("Forwarding entries for type %d not supported!", type);
+ return -1;
+ }
+}
+
+int
+vnet_lisp_flush_stats (void)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ vlib_combined_counter_main_t *cm = &lgm->counters;
+ u32 i;
+
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
+ vlib_zero_combined_counter (cm, i);
+
+ return 0;
+}
+
+static void
+lisp_del_adj_stats (lisp_gpe_main_t * lgm, u32 fwd_entry_index, u32 ti)
+{
+ hash_pair_t *hp;
+ lisp_stats_key_t key;
+ void *key_copy;
+ uword *p;
+ u8 *s;
+
+ memset (&key, 0, sizeof (key));
+ key.fwd_entry_index = fwd_entry_index;
+ key.tunnel_index = ti;
+
+ p = hash_get_mem (lgm->lisp_stats_index_by_key, &key);
+ if (p)
+ {
+ s = pool_elt_at_index (lgm->dummy_stats_pool, p[0]);
+ hp = hash_get_pair (lgm->lisp_stats_index_by_key, &key);
+ key_copy = (void *) (hp->key);
+ hash_unset_mem (lgm->lisp_stats_index_by_key, &key);
+ clib_mem_free (key_copy);
+ pool_put (lgm->dummy_stats_pool, s);
+ }
+}
+
+void
+vnet_lisp_gpe_del_fwd_counters (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 fwd_entry_index)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_key_t fe_key;
+ lisp_gpe_fwd_entry_t *lfe;
+ lisp_fwd_path_t *path;
+ const lisp_gpe_adjacency_t *ladj;
+
+ lfe = find_fwd_entry (lgm, a, &fe_key);
+ if (!lfe)
+ return;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NORMAL != lfe->type)
+ return;
+
+ vec_foreach (path, lfe->paths)
+ {
+ ladj = lisp_gpe_adjacency_get (path->lisp_adj);
+ lisp_del_adj_stats (lgm, fwd_entry_index, ladj->tunnel_index);
+ }
+}
+
+/**
+ * @brief Flush all the forwrding entries
+ */
+void
+vnet_lisp_gpe_fwd_entry_flush (void)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_t *lfe;
+
+ /* *INDENT-OFF* */
+ pool_foreach (lfe, lgm->lisp_fwd_entry_pool,
+ ({
+ switch (fid_addr_type(&lfe->key->rmt))
+ {
+ case FID_ADDR_MAC:
+ del_l2_fwd_entry_i (lgm, lfe);
+ break;
+ case FID_ADDR_IP_PREF:
+ del_ip_fwd_entry_i (lgm, lfe);
+ break;
+ case FID_ADDR_NSH:
+ del_nsh_fwd_entry_i (lgm, lfe);
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static u8 *
+format_lisp_fwd_path (u8 * s, va_list ap)
+{
+ lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *);
+
+ s = format (s, "weight:%d ", lfp->weight);
+ s = format (s, "adj:[%U]\n",
+ format_lisp_gpe_adjacency,
+ lisp_gpe_adjacency_get (lfp->lisp_adj),
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE);
+
+ return (s);
+}
+
+typedef enum lisp_gpe_fwd_entry_format_flag_t_
+{
+ LISP_GPE_FWD_ENTRY_FORMAT_NONE = (0 << 0),
+ LISP_GPE_FWD_ENTRY_FORMAT_DETAIL = (1 << 1),
+} lisp_gpe_fwd_entry_format_flag_t;
+
+
+static u8 *
+format_lisp_gpe_fwd_entry (u8 * s, va_list ap)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_t *lfe = va_arg (ap, lisp_gpe_fwd_entry_t *);
+ lisp_gpe_fwd_entry_format_flag_t flags =
+ va_arg (ap, lisp_gpe_fwd_entry_format_flag_t);
+
+ s = format (s, "VNI:%d VRF:%d EID: %U -> %U [index:%d]",
+ lfe->key->vni, lfe->eid_table_id,
+ format_fid_address, &lfe->key->lcl,
+ format_fid_address, &lfe->key->rmt,
+ lfe - lgm->lisp_fwd_entry_pool);
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ {
+ s = format (s, "\n Negative - action:%U",
+ format_negative_mapping_action, lfe->action);
+ }
+ else
+ {
+ lisp_fwd_path_t *path;
+
+ s = format (s, "\n via:");
+ vec_foreach (path, lfe->paths)
+ {
+ s = format (s, "\n %U", format_lisp_fwd_path, path);
+ }
+ }
+
+ if (flags & LISP_GPE_FWD_ENTRY_FORMAT_DETAIL)
+ {
+ switch (fid_addr_type (&lfe->key->rmt))
+ {
+ case FID_ADDR_MAC:
+ s = format (s, " fib-path-list:%d\n", lfe->l2.path_list_index);
+ s = format (s, " dpo:%U\n", format_dpo_id, &lfe->l2.dpo, 0);
+ break;
+ case FID_ADDR_NSH:
+ s = format (s, " fib-path-list:%d\n", lfe->nsh.path_list_index);
+ s = format (s, " dpo:%U\n", format_dpo_id, &lfe->nsh.dpo, 0);
+ break;
+ case FID_ADDR_IP_PREF:
+ break;
+ }
+ }
+
+ return (s);
+}
+
+static clib_error_t *
+lisp_gpe_fwd_entry_show (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_t *lfe;
+ index_t index;
+ u32 vni = ~0;
+
+ if (unformat (input, "vni %d", &vni))
+ ;
+ else if (unformat (input, "%d", &index))
+ {
+ if (!pool_is_free_index (lgm->lisp_fwd_entry_pool, index))
+ {
+ lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, index);
+
+ vlib_cli_output (vm, "[%d@] %U",
+ index,
+ format_lisp_gpe_fwd_entry, lfe,
+ LISP_GPE_FWD_ENTRY_FORMAT_DETAIL);
+ }
+ else
+ {
+ vlib_cli_output (vm, "entry %d invalid", index);
+ }
+
+ return (NULL);
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (lfe, lgm->lisp_fwd_entry_pool,
+ ({
+ if ((vni == ~0) ||
+ (lfe->key->vni == vni))
+ vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe,
+ LISP_GPE_FWD_ENTRY_FORMAT_NONE);
+ }));
+ /* *INDENT-ON* */
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = {
+ .path = "show gpe entry",
+ .short_help = "show gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>",
+ .function = lisp_gpe_fwd_entry_show,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+lisp_gpe_fwd_entry_init (vlib_main_t * vm)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ clib_error_t *error = NULL;
+
+ if ((error = vlib_call_init_function (vm, lisp_cp_dpo_module_init)))
+ return (error);
+
+ l2_fib_init (lgm);
+ nsh_fib_init (lgm);
+
+ fib_node_register_type (FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, &lisp_fwd_vft);
+
+ return (error);
+}
+
+u32 *
+vnet_lisp_gpe_get_fwd_entry_vnis (void)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ lisp_gpe_fwd_entry_t *lfe;
+ u32 *vnis = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (lfe, lgm->lisp_fwd_entry_pool,
+ ({
+ hash_set (vnis, lfe->key->vni, 0);
+ }));
+ /* *INDENT-ON* */
+
+ return vnis;
+}
+
+lisp_api_gpe_fwd_entry_t *
+vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni)
+{
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ lisp_gpe_fwd_entry_t *lfe;
+ lisp_api_gpe_fwd_entry_t *entries = 0, e;
+
+ /* *INDENT-OFF* */
+ pool_foreach (lfe, lgm->lisp_fwd_entry_pool,
+ ({
+ if (lfe->key->vni == vni)
+ {
+ memset (&e, 0, sizeof (e));
+ e.dp_table = lfe->eid_table_id;
+ e.vni = lfe->key->vni;
+ if (lfe->type == LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE)
+ e.action = lfe->action;
+ e.fwd_entry_index = lfe - lgm->lisp_fwd_entry_pool;
+ memcpy (&e.reid, &lfe->key->rmt, sizeof (e.reid));
+ memcpy (&e.leid, &lfe->key->lcl, sizeof (e.leid));
+ vec_add1 (entries, e);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ return entries;
+}
+
+int
+vnet_lisp_gpe_get_fwd_stats (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ vlib_counter_t * c)
+{
+ lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+ lisp_gpe_fwd_entry_t *lfe;
+ lisp_gpe_fwd_entry_key_t unused;
+
+ lfe = find_fwd_entry (lgm, a, &unused);
+ if (NULL == lfe)
+ return -1;
+
+ if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ return -1;
+
+ if (~0 == lfe->dpoi_index)
+ return -1;
+
+ vlib_get_combined_counter (&load_balance_main.lbm_to_counters,
+ lfe->dpoi_index, c);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_fwd_entry_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h
new file mode 100644
index 00000000..dfdb8b91
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP-GPE definitions.
+ */
+
+#ifndef __LISP_GPE_FWD_ENTRY_H__
+#define __LISP_GPE_FWD_ENTRY_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * @brief A path on which to forward lisp traffic
+ */
+typedef struct lisp_fwd_path_t_
+{
+ /**
+ * The adjacency constructed for the locator pair
+ */
+ index_t lisp_adj;
+
+ /**
+ * Priority. Only the paths with the best priority will be installed in FIB
+ */
+ u8 priority;
+
+ /**
+ * [UE]CMP weigt for the path
+ */
+ u8 weight;
+
+} lisp_fwd_path_t;
+
+/**
+ * @brief A Forwarding entry can be 'normal' or 'negative'
+ * Negative implies we deliberately want to add a FIB entry for an EID
+ * that results in 'special' behaviour determined by an 'action'.
+ * @normal means send it down some tunnels.
+ */
+typedef enum lisp_gpe_fwd_entry_type_t_
+{
+ LISP_GPE_FWD_ENTRY_TYPE_NORMAL,
+ LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE,
+} lisp_gpe_fwd_entry_type_t;
+
+
+/**
+ * LISP-GPE fwd entry key
+ */
+typedef struct lisp_gpe_fwd_entry_key_t_
+{
+ dp_address_t rmt;
+ dp_address_t lcl;
+ u32 vni;
+} lisp_gpe_fwd_entry_key_t;
+
+/**
+ * @brief A LISP Forwarding Entry
+ *
+ * A forwarding entry is from a locai EID to a remote EID over a set of rloc pairs
+ */
+typedef struct lisp_gpe_fwd_entry_t_
+{
+ /**
+ * Follows src/dst or dst only forwarding policy
+ */
+ u8 is_src_dst;
+
+ /**
+ * This object joins the FIB control plane graph to receive updates to
+ * for changes to the graph.
+ */
+ fib_node_t node;
+
+ /**
+ * The Entry's key: {lEID,rEID,vni}
+ */
+ lisp_gpe_fwd_entry_key_t *key;
+
+ /**
+ * The forwarding entry type
+ */
+ lisp_gpe_fwd_entry_type_t type;
+
+ /**
+ * The tenant the entry belongs to
+ */
+ u32 tenant;
+
+ /**
+ * The VRF (in the case of L3) or Bridge-Domain (for L2) index
+ */
+ union
+ {
+ /**
+ * Fields relevant to an L2 entry
+ */
+ struct
+ {
+ /**
+ * The VRF ID
+ */
+ u32 eid_table_id;
+
+ /**
+ * The FIB index for the overlay, i.e. the FIB in which the EIDs
+ * are present
+ */
+ u32 eid_fib_index;
+ /**
+ * The SRC-FIB index for created for anding source-route entries
+ */
+ u32 src_fib_index;
+ };
+ /**
+ * Fields relevant to an L2 entry
+ */
+ struct
+ {
+ /**
+ * The Bridge-Domain (for L2) index
+ */
+ u32 eid_bd_id;
+
+ /**
+ * The Bridge-domain index for the overlay EIDs
+ */
+ u32 eid_bd_index;
+
+ /**
+ * The path-list created for the forwarding
+ */
+ fib_node_index_t path_list_index;
+
+ /**
+ * Child index of this entry on the path-list
+ */
+ u32 child_index;
+
+ /**
+ * The DPO used to forward
+ */
+ dpo_id_t dpo;
+ } l2;
+
+ /**
+ * Fields relevant to an NSH entry
+ */
+ struct
+ {
+ /**
+ * The path-list created for the forwarding
+ */
+ fib_node_index_t path_list_index;
+
+ /**
+ * Child index of this entry on the path-list
+ */
+ u32 child_index;
+
+ /**
+ * The DPO contributed by NSH
+ */
+ dpo_id_t dpo;
+
+ /**
+ * The DPO used for forwarding. Obtained after stacking tx node
+ * onto lb choice
+ */
+ dpo_id_t choice;
+ } nsh;
+ };
+
+ union
+ {
+ /**
+ * @brief When the type is 'normal'
+ * The RLOC pair that form the route's paths. i.e. where to send
+ * packets for this route.
+ */
+ lisp_fwd_path_t *paths;
+
+ /**
+ * @brief When the type is negative. The action to take.
+ */
+ negative_fwd_actions_e action;
+ };
+
+ /**
+ * used for getting load balance statistics
+ */
+ index_t dpoi_index;
+
+} lisp_gpe_fwd_entry_t;
+
+extern int
+vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 * hw_if_indexp);
+
+extern void vnet_lisp_gpe_fwd_entry_flush (void);
+
+extern u32 lisp_l2_fib_lookup (lisp_gpe_main_t * lgm,
+ u16 bd_index, u8 src_mac[8], u8 dst_mac[8]);
+
+extern const dpo_id_t *lisp_nsh_fib_lookup (lisp_gpe_main_t * lgm,
+ u32 spi_si);
+extern void
+vnet_lisp_gpe_del_fwd_counters (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 fwd_entry_index);
+extern void
+vnet_lisp_gpe_add_fwd_counters (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ u32 fwd_entry_index);
+extern u32 *vnet_lisp_gpe_get_fwd_entry_vnis (void);
+
+int
+vnet_lisp_gpe_get_fwd_stats (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ vlib_counter_t * c);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_packet.h b/src/vnet/lisp-gpe/lisp_gpe_packet.h
new file mode 100644
index 00000000..62ac9bd7
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_packet.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP-GPE packet header structure
+ *
+ */
+
+#ifndef included_lisp_gpe_packet_h
+#define included_lisp_gpe_packet_h
+
+/*
+ * From draft-lewis-lisp-gpe-02.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |N|L|E|V|I|P|R|O|Ver| Reserved | Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Instance ID/Locator-Status-Bits |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * N: The N-bit is the nonce-present bit. When this bit is set to 1,
+ * the low-order 24 bits of the first 32 bits of the LISP header
+ * contain a Nonce. See Section 6.3.1 for details. Both N- and
+ * V-bits MUST NOT be set in the same packet. If they are, a
+ * decapsulating ETR MUST treat the 'Nonce/Map-Version' field as
+ * having a Nonce value present.
+ *
+ * L: The L-bit is the 'Locator-Status-Bits' field enabled bit. When
+ * this bit is set to 1, the Locator-Status-Bits in the second
+ * 32 bits of the LISP header are in use.
+ *
+ * E: The E-bit is the echo-nonce-request bit. This bit MUST be ignored
+ * and has no meaning when the N-bit is set to 0. When the N-bit is
+ * set to 1 and this bit is set to 1, an ITR is requesting that the
+ * nonce value in the 'Nonce' field be echoed back in LISP-
+ * encapsulated packets when the ITR is also an ETR. See
+ * Section 6.3.1 for details.
+ *
+ * V: The V-bit is the Map-Version present bit. When this bit is set to
+ * 1, the N-bit MUST be 0. Refer to Section 6.6.3 for more details.
+ *
+ * I: The I-bit is the Instance ID bit. See Section 5.5 for more
+ * details. When this bit is set to 1, the 'Locator-Status-Bits'
+ * field is reduced to 8 bits and the high-order 24 bits are used as
+ * an Instance ID. If the L-bit is set to 0, then the low-order
+ * 8 bits are transmitted as zero and ignored on receipt.
+ *
+ * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ * MUST be set to 1 to indicate the presence of the 8 bit next
+ * protocol field.
+ *
+ * P = 0 indicates that the payload MUST conform to LISP as defined
+ * in [RFC6830].
+ *
+ * Flag bit 5 was chosen as the P bit because this flag bit is
+ * currently unallocated in LISP [RFC6830].
+ *
+ * O: Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ * packet is an OAM packet and OAM processing MUST occur. The OAM
+ * protocol details are out of scope for this document. As with the
+ * P-bit, bit 7 is currently a reserved flag in [RFC6830].
+ *
+ * Next Protocol Field: The lower 8 bits of the first word are used to
+ * carry a next protocol. This next protocol field contains the
+ * protocol of the encapsulated payload packet.
+ *
+ * LISP [RFC6830] uses the lower 16 bits of the first word for either
+ * a nonce, an echo-nonce ([RFC6830]) or to support map-versioning
+ * ([RFC6834]). These are all optional capabilities that are
+ * indicated by setting the N, E, and the V bit respectively.
+ *
+ * To maintain the desired data plane compatibility, when the P bit
+ * is set, the N, E, and V bits MUST be set to zero.
+ *
+ * A new protocol registry will be requested from IANA for the Next
+ * Protocol field. This draft defines the following Next Protocol
+ * values:
+ *
+ * 0x1 : IPv4
+ * 0x2 : IPv6
+ * 0x3 : Ethernet
+ * 0x4: Network Service Header
+ */
+
+/** LISP-GPE header */
+typedef struct
+{
+ u8 flags;
+ u8 ver_res;
+ u8 res;
+ u8 next_protocol;
+ u32 iid;
+} lisp_gpe_header_t;
+
+#define foreach_lisp_gpe_flag_bit \
+_(N, 0x80) \
+_(L, 0x40) \
+_(E, 0x20) \
+_(V, 0x10) \
+_(I, 0x08) \
+_(P, 0x04) \
+_(O, 0x01)
+
+typedef enum
+{
+#define _(n,v) LISP_GPE_FLAGS_##n = v,
+ foreach_lisp_gpe_flag_bit
+#undef _
+} vnet_lisp_gpe_flag_bit_t;
+
+#define LISP_GPE_VERSION 0x0
+
+#define LISP_GPE_NEXT_PROTOCOL_IP4 0x1
+#define LISP_GPE_NEXT_PROTOCOL_IP6 0x2
+#define LISP_GPE_NEXT_PROTOCOL_ETHERNET 0x3
+#define LISP_GPE_NEXT_PROTOCOL_NSH 0x4
+
+typedef enum
+{
+ LISP_GPE_NEXT_PROTO_IP4 = 1,
+ LISP_GPE_NEXT_PROTO_IP6,
+ LISP_GPE_NEXT_PROTO_ETHERNET,
+ LISP_GPE_NEXT_PROTO_NSH,
+ LISP_GPE_NEXT_PROTOS
+} lisp_gpe_next_protocol_e;
+
+#endif /* included_lisp_gpe_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c
new file mode 100644
index 00000000..26664f53
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP sub-interfaces.
+ *
+ */
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/interface.h>
+
+/**
+ * @brief Pool of all l3-sub-interfaces
+ */
+static lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_pool;
+
+/**
+ * A DB of all LISP L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ */
+static uword *lisp_gpe_sub_interfaces;
+
+/**
+ * A DB of all VNET L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ * Used in the data-plane for interface lookup on decap.
+ */
+uword *lisp_gpe_sub_interfaces_sw_if_index;
+
+/**
+ * The next available sub-interface ID. FIXME
+ */
+static u32 lisp_gpe_sub_interface_id;
+
+
+static index_t
+lisp_gpe_sub_interface_db_find (const ip_address_t * lrloc, u32 vni)
+{
+ uword *p;
+
+ lisp_gpe_sub_interface_key_t key;
+
+ memset (&key, 0, sizeof (key));
+ ip_address_copy (&key.local_rloc, lrloc);
+ key.vni = vni;
+ p = hash_get_mem (lisp_gpe_sub_interfaces, &key);
+
+ if (NULL == p)
+ return (INDEX_INVALID);
+ else
+ return (p[0]);
+}
+
+static void
+lisp_gpe_sub_interface_db_insert (const lisp_gpe_sub_interface_t * l3s)
+{
+ hash_set_mem (lisp_gpe_sub_interfaces,
+ l3s->key, l3s - lisp_gpe_sub_interface_pool);
+ hash_set_mem (lisp_gpe_sub_interfaces_sw_if_index,
+ l3s->key, l3s->sw_if_index);
+}
+
+static void
+lisp_gpe_sub_interface_db_remove (const lisp_gpe_sub_interface_t * l3s)
+{
+ hash_unset_mem (lisp_gpe_sub_interfaces, l3s->key);
+ hash_unset_mem (lisp_gpe_sub_interfaces_sw_if_index, l3s->key);
+}
+
+lisp_gpe_sub_interface_t *
+lisp_gpe_sub_interface_get_i (index_t l3si)
+{
+ return (pool_elt_at_index (lisp_gpe_sub_interface_pool, l3si));
+}
+
+static void
+lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
+{
+ fib_node_index_t fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
+ FIB_SOURCE_LISP);
+ ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
+
+ vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
+ FIB_SOURCE_LISP);
+ ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
+
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+}
+
+static void
+lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id)
+{
+ fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP4, FIB_SOURCE_LISP);
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0;
+ ip4_sw_interface_enable_disable (sw_if_index, 0);
+
+ fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP6, FIB_SOURCE_LISP);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0;
+ ip6_sw_interface_enable_disable (sw_if_index, 0);
+}
+
+index_t
+lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc,
+ u32 overlay_table_id, u32 vni)
+{
+ lisp_gpe_sub_interface_t *l3s;
+ index_t l3si;
+
+ l3si = lisp_gpe_sub_interface_db_find (lrloc, vni);
+
+ if (INDEX_INVALID == l3si)
+ {
+ u32 main_sw_if_index, sub_sw_if_index;
+
+ /*
+ * find the main interface from the VNI
+ */
+ main_sw_if_index =
+ lisp_gpe_tenant_l3_iface_add_or_lock (vni, overlay_table_id);
+
+ vnet_sw_interface_t sub_itf_template = {
+ .type = VNET_SW_INTERFACE_TYPE_SUB,
+ .flood_class = VNET_FLOOD_CLASS_NORMAL,
+ .sup_sw_if_index = main_sw_if_index,
+ .sub.id = lisp_gpe_sub_interface_id++,
+ };
+
+ if (NULL != vnet_create_sw_interface (vnet_get_main (),
+ &sub_itf_template,
+ &sub_sw_if_index))
+ return (INDEX_INVALID);
+
+ pool_get (lisp_gpe_sub_interface_pool, l3s);
+ memset (l3s, 0, sizeof (*l3s));
+ l3s->key = clib_mem_alloc (sizeof (*l3s->key));
+ memset (l3s->key, 0, sizeof (*l3s->key));
+
+ ip_address_copy (&l3s->key->local_rloc, lrloc);
+ l3s->key->vni = vni;
+ l3s->main_sw_if_index = main_sw_if_index;
+ l3s->sw_if_index = sub_sw_if_index;
+ l3s->eid_table_id = overlay_table_id;
+
+ l3si = (l3s - lisp_gpe_sub_interface_pool);
+
+ // FIXME. enable When we get an adj
+ ip6_sw_interface_enable_disable (l3s->sw_if_index, 1);
+ ip4_sw_interface_enable_disable (l3s->sw_if_index, 1);
+
+ vnet_sw_interface_set_flags (vnet_get_main (),
+ l3s->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ lisp_gpe_sub_interface_db_insert (l3s);
+ }
+ else
+ {
+ l3s = lisp_gpe_sub_interface_get_i (l3si);
+ l3s->eid_table_id = overlay_table_id;
+ }
+
+ lisp_gpe_sub_interface_set_table (l3s->sw_if_index, l3s->eid_table_id);
+ l3s->locks++;
+
+ return (l3si);
+}
+
+void
+lisp_gpe_sub_interface_unlock (index_t l3si)
+{
+ lisp_gpe_sub_interface_t *l3s;
+
+ l3s = lisp_gpe_sub_interface_get_i (l3si);
+
+ ASSERT (0 != l3s->locks);
+ l3s->locks--;
+
+ if (0 == l3s->locks)
+ {
+ lisp_gpe_sub_interface_unset_table (l3s->sw_if_index,
+ l3s->eid_table_id);
+
+ lisp_gpe_tenant_l3_iface_unlock (l3s->key->vni);
+ vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0);
+ vnet_delete_sub_interface (l3s->sw_if_index);
+
+ lisp_gpe_sub_interface_db_remove (l3s);
+
+ clib_mem_free (l3s->key);
+ pool_put (lisp_gpe_sub_interface_pool, l3s);
+ }
+}
+
+const lisp_gpe_sub_interface_t *
+lisp_gpe_sub_interface_get (index_t l3si)
+{
+ return (lisp_gpe_sub_interface_get_i (l3si));
+}
+
+u8 *
+format_lisp_gpe_sub_interface (u8 * s, va_list ap)
+{
+ lisp_gpe_sub_interface_t *l3s = va_arg (ap, lisp_gpe_sub_interface_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ s = format (s, "%-16U",
+ format_vnet_sw_interface_name,
+ vnm, vnet_get_sw_interface (vnm, l3s->sw_if_index));
+ s = format (s, "%=8d", l3s->key->vni);
+ s = format (s, "%=15d", l3s->sw_if_index);
+ s = format (s, "%U", format_ip_address, &l3s->key->local_rloc);
+
+ return (s);
+}
+
+/** CLI command to show LISP-GPE interfaces. */
+static clib_error_t *
+lisp_gpe_sub_interface_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_sub_interface_t *l3s;
+
+ vlib_cli_output (vm, "%-16s%=8s%=15s%s", "Name", "VNI", "sw_if_index",
+ "local RLOC");
+
+ /* *INDENT-OFF* */
+ pool_foreach (l3s, lisp_gpe_sub_interface_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_lisp_gpe_sub_interface, l3s);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_gpe_sub_interface_command) = {
+ .path = "show gpe sub-interface",
+ .short_help = "show gpe sub-interface",
+ .function = lisp_gpe_sub_interface_show,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_gpe_sub_interface_module_init (vlib_main_t * vm)
+{
+ lisp_gpe_sub_interfaces =
+ hash_create_mem (0,
+ sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword));
+ lisp_gpe_sub_interfaces_sw_if_index =
+ hash_create_mem (0,
+ sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword));
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_sub_interface_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.h b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.h
new file mode 100644
index 00000000..ad942f41
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP sub-interfaces.
+ *
+ */
+
+#ifndef __LISP_GPE_SUB_INTERFACE_H__
+#define __LISP_GPE_SUB_INTERFACE_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * A Key for lookup in the L£ sub-interface DB
+ */
+typedef struct lisp_gpe_sub_interface_key_t_
+{
+ /**
+ * The local-RLOC. This is the interface's 'source' address.
+ */
+ ip_address_t local_rloc;
+
+ /**
+ * The VNI. In network byte order!
+ */
+ u32 vni;
+} lisp_gpe_sub_interface_key_t;
+
+/**
+ * @brief A LISP L3 sub-interface
+ *
+ * A LISP sub-interface is a multi-access interface, whose local address is a
+ * single local-RLOC. Adjacencies that form on this sub-interface, represent
+ * remote RLOCs.
+ * This is analogous to an ethernet interface.
+ * As with all interface types it can only be present in one VRF, hence a
+ * LISP sub-interface is per-local-rloc and per-VNI.
+ */
+typedef struct lisp_gpe_sub_interface_t_
+{
+ /**
+ * The interface's key inthe DB; rloc & vni;
+ * The key is allocated from the heap so it can be used in the hash-table.
+ * if it's part of the object, then it is subjet to realloc, which no-worky.
+ */
+ lisp_gpe_sub_interface_key_t *key;
+
+ /**
+ * The Table-ID in the overlay that this interface is bound to.
+ */
+ u32 eid_table_id;
+
+ /**
+ * A reference counting lock on the number of users of this interface.
+ * When this count drops to 0 the interface is deleted.
+ */
+ u32 locks;
+
+ /**
+ * The SW if index assigned to this sub-interface
+ */
+ u32 sw_if_index;
+
+ /**
+ * The SW IF index assigned to the main interface of which this is a sub.
+ */
+ u32 main_sw_if_index;
+} lisp_gpe_sub_interface_t;
+
+extern index_t lisp_gpe_sub_interface_find_or_create_and_lock (const
+ ip_address_t *
+ lrloc,
+ u32
+ eid_table_id,
+ u32 vni);
+
+extern u8 *format_lisp_gpe_sub_interface (u8 * s, va_list ap);
+
+extern void lisp_gpe_sub_interface_unlock (index_t itf);
+
+extern const lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_get (index_t
+ itf);
+
+/**
+ * A DB of all L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ */
+extern uword *lisp_gpe_sub_interfaces_sw_if_index;
+
+/**
+ * @brief
+ * Get a VNET L3 interface matching the local-RLOC and VNI
+ * Called from the data-plane
+ */
+always_inline u32
+lisp_gpe_sub_interface_find_ip6 (const ip6_address_t * addr, u32 vni)
+{
+ lisp_gpe_sub_interface_key_t key;
+ const uword *p;
+
+ key.local_rloc.ip.v6.as_u64[0] = addr->as_u64[0];
+ key.local_rloc.ip.v6.as_u64[1] = addr->as_u64[1];
+ key.local_rloc.version = IP6;
+ key.vni = vni;
+
+ p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key);
+
+ if (NULL != p)
+ return p[0];
+
+ return (INDEX_INVALID);
+}
+
+/**
+ * @brief
+ * Get a VNET L3 interface matching the local-RLOC and VNI
+ * Called from the data-plane
+ */
+always_inline index_t
+lisp_gpe_sub_interface_find_ip4 (const ip4_address_t * addr, u32 vni)
+{
+ lisp_gpe_sub_interface_key_t key;
+ const uword *p;
+
+ key.local_rloc.ip.v4.as_u32 = addr->as_u32;
+ key.local_rloc.version = IP4;
+ key.vni = vni;
+
+ p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key);
+
+ if (NULL != p)
+ return p[0];
+
+ return (INDEX_INVALID);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/vnet/lisp-gpe/lisp_gpe_tenant.c b/src/vnet/lisp-gpe/lisp_gpe_tenant.c
new file mode 100644
index 00000000..40cf7edb
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_tenant.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+
+/**
+ * The pool of all tenants
+ */
+static lisp_gpe_tenant_t *lisp_gpe_tenant_pool;
+
+/**
+ * The hash table of all tenants: key:{VNI}.
+ */
+uword *lisp_gpe_tenant_db;
+
+static lisp_gpe_tenant_t *
+lisp_gpe_tenant_find (u32 vni)
+{
+ uword *p;
+
+ p = hash_get (lisp_gpe_tenant_db, vni);
+
+ if (NULL == p)
+ return (NULL);
+
+ return (pool_elt_at_index (lisp_gpe_tenant_pool, p[0]));
+}
+
+static lisp_gpe_tenant_t *
+lisp_gpe_tenant_find_or_create_i (u32 vni)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find (vni);
+
+ if (NULL == lt)
+ {
+ pool_get (lisp_gpe_tenant_pool, lt);
+ memset (lt, 0, sizeof (*lt));
+
+ lt->lt_vni = vni;
+ lt->lt_table_id = ~0;
+ lt->lt_bd_id = ~0;
+
+ hash_set (lisp_gpe_tenant_db, vni, lt - lisp_gpe_tenant_pool);
+ }
+
+ return (lt);
+}
+
+/**
+ * @brief Find or create a tenant for the given VNI
+ */
+u32
+lisp_gpe_tenant_find_or_create (u32 vni)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find (vni);
+
+ if (NULL == lt)
+ {
+ lt = lisp_gpe_tenant_find_or_create_i (vni);
+ }
+
+ return (lt - lisp_gpe_tenant_pool);
+}
+
+/**
+ * @brief If there are no more locks/users of te tenant, then delete it
+ */
+static void
+lisp_gpe_tenant_delete_if_empty (lisp_gpe_tenant_t * lt)
+{
+ int i;
+
+ for (i = 0; i < LISP_GPE_TENANT_LOCK_NUM; i++)
+ {
+ if (lt->lt_locks[i])
+ return;
+ }
+
+ hash_unset (lisp_gpe_tenant_db, lt->lt_vni);
+ pool_put (lisp_gpe_tenant_pool, lt);
+}
+
+/**
+ * @brief Add/create and lock a new or find and lock the existing L3
+ * interface for the tenant
+ *
+ * @paran vni The tenant's VNI
+ * @param table_id the Tenant's L3 table ID.
+ *
+ * @return the SW IF index of the L3 interface
+ */
+u32
+lisp_gpe_tenant_l3_iface_add_or_lock (u32 vni, u32 table_id)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find_or_create_i (vni);
+
+ if (~0 == lt->lt_table_id)
+ lt->lt_table_id = table_id;
+
+ ASSERT (lt->lt_table_id == table_id);
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE])
+ {
+ /* create the l3 interface since there are currently no users of it */
+ lt->lt_l3_sw_if_index =
+ lisp_gpe_add_l3_iface (&lisp_gpe_main, vni, table_id);
+ }
+
+ lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]++;
+
+ return (lt->lt_l3_sw_if_index);
+}
+
+/**
+ * @brief Release the lock held on the tenant's L3 interface
+ */
+void
+lisp_gpe_tenant_l3_iface_unlock (u32 vni)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find (vni);
+
+ if (NULL == lt)
+ {
+ clib_warning ("No tenant for VNI %d", vni);
+ return;
+ }
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE])
+ {
+ clib_warning ("No L3 interface for tenant VNI %d", vni);
+ return;
+ }
+
+ lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]--;
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE])
+ {
+ /* the last user has gone, so delete the l3 interface */
+ lisp_gpe_del_l3_iface (&lisp_gpe_main, vni, lt->lt_table_id);
+ }
+
+ /*
+ * If there are no more locks on any tenant managed resource, then
+ * this tenant is toast.
+ */
+ lisp_gpe_tenant_delete_if_empty (lt);
+}
+
+/**
+ * @brief Add/create and lock a new or find and lock the existing L2
+ * interface for the tenant
+ *
+ * @paran vni The tenant's VNI
+ * @param table_id the Tenant's L2 Bridge Domain ID.
+ *
+ * @return the SW IF index of the L2 interface
+ */
+u32
+lisp_gpe_tenant_l2_iface_add_or_lock (u32 vni, u32 bd_id)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find_or_create_i (vni);
+
+ if (NULL == lt)
+ {
+ clib_warning ("No tenant for VNI %d", vni);
+ return ~0;
+ }
+
+ if (~0 == lt->lt_bd_id)
+ lt->lt_bd_id = bd_id;
+
+ ASSERT (lt->lt_bd_id == bd_id);
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE])
+ {
+ /* create the l2 interface since there are currently no users of it */
+ lt->lt_l2_sw_if_index =
+ lisp_gpe_add_l2_iface (&lisp_gpe_main, vni, bd_id);
+ }
+
+ lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]++;
+
+ return (lt->lt_l2_sw_if_index);
+}
+
+/**
+ * @brief Release the lock held on the tenant's L3 interface
+ */
+void
+lisp_gpe_tenant_l2_iface_unlock (u32 vni)
+{
+ lisp_gpe_tenant_t *lt;
+
+ lt = lisp_gpe_tenant_find (vni);
+
+ if (NULL == lt)
+ {
+ clib_warning ("No tenant for VNI %d", vni);
+ return;
+ }
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE])
+ {
+ clib_warning ("No L2 interface for tenant VNI %d", vni);
+ return;
+ }
+
+ lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]--;
+
+ if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE])
+ {
+ /* the last user has gone, so delete the l2 interface */
+ lisp_gpe_del_l2_iface (&lisp_gpe_main, vni, lt->lt_bd_id);
+ }
+
+ /*
+ * If there are no more locks on any tenant managed resource, then
+ * this tenant is toast.
+ */
+ lisp_gpe_tenant_delete_if_empty (lt);
+}
+
+/**
+ * @brief get a const pointer to the tenant object
+ */
+const lisp_gpe_tenant_t *
+lisp_gpe_tenant_get (u32 index)
+{
+ return (pool_elt_at_index (lisp_gpe_tenant_pool, index));
+}
+
+/**
+ * @brief Flush/delete ALL the tenants
+ */
+void
+lisp_gpe_tenant_flush (void)
+{
+ lisp_gpe_tenant_t *lt;
+
+ /* *INDENT-OFF* */
+ pool_foreach(lt, lisp_gpe_tenant_pool,
+ ({
+ lisp_gpe_tenant_l2_iface_unlock(lt->lt_vni);
+ lisp_gpe_tenant_l3_iface_unlock(lt->lt_vni);
+ }));
+ /* *INDENT-ON* */
+}
+
+/**
+ * @brif Show/display one tenant
+ */
+static u8 *
+format_lisp_gpe_tenant (u8 * s, va_list ap)
+{
+ const lisp_gpe_tenant_t *lt = va_arg (ap, lisp_gpe_tenant_t *);
+
+ s = format (s, "VNI:%d ", lt->lt_vni);
+
+ if (lt->lt_table_id != ~0)
+ {
+ s = format (s, "VRF:%d ", lt->lt_table_id);
+ s = format (s, "L3-SW-IF:%d ", lt->lt_l3_sw_if_index);
+ }
+
+ if (lt->lt_bd_id != ~0)
+ {
+ s = format (s, "BD-ID:%d ", lt->lt_bd_id);
+ s = format (s, "L2-SW-IF:%d ", lt->lt_l2_sw_if_index);
+ }
+
+ return (s);
+}
+
+/**
+ * @brief CLI command to show LISP-GPE tenant.
+ */
+static clib_error_t *
+lisp_gpe_tenant_show (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_gpe_tenant_t *lt;
+
+ /* *INDENT-OFF* */
+ pool_foreach (lt, lisp_gpe_tenant_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_lisp_gpe_tenant, lt);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_gpe_tenant_command) = {
+ .path = "show gpe tenant",
+ .short_help = "show gpe tenant",
+ .function = lisp_gpe_tenant_show,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_tenant.h b/src/vnet/lisp-gpe/lisp_gpe_tenant.h
new file mode 100644
index 00000000..5db7dde8
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_tenant.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LISP_GPE_TENANT_H__
+#define __LISP_GPE_TENANT_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * Refernece counting lock types on the tenant.
+ * When all of these counters drop to zero, we no longer need the tenant.
+ */
+typedef enum lisp_gpe_tenant_lock_t_
+{
+ LISP_GPE_TENANT_LOCK_L2_IFACE,
+ LISP_GPE_TENANT_LOCK_L3_IFACE,
+ LISP_GPE_TENANT_LOCK_NUM,
+} lisp_gpe_tenant_lock_t;
+
+/**
+ * @brief Representation of the data associated with a LISP overlay tenant
+ *
+ * This object exists to manage the shared resources of the L2 and L3 interface
+ * of a given tenant.
+ */
+typedef struct lisp_gpe_tenant_t_
+{
+ /**
+ * The VNI is the identifier of the tenant
+ */
+ u32 lt_vni;
+
+ /**
+ * The tenant can have both L2 and L3 services enabled.
+ */
+ u32 lt_table_id;
+ u32 lt_bd_id;
+
+ /**
+ * The number of locks on the tenant's L3 interface.
+ */
+ u32 lt_locks[LISP_GPE_TENANT_LOCK_NUM];
+
+ /**
+ * The L3 SW interface index
+ */
+ u32 lt_l3_sw_if_index;
+
+ /**
+ * The L2 SW interface index
+ */
+ u32 lt_l2_sw_if_index;
+
+} lisp_gpe_tenant_t;
+
+extern u32 lisp_gpe_tenant_find_or_create (u32 vni);
+
+extern u32 lisp_gpe_tenant_l3_iface_add_or_lock (u32 vni, u32 vrf);
+extern void lisp_gpe_tenant_l3_iface_unlock (u32 vni);
+
+extern u32 lisp_gpe_tenant_l2_iface_add_or_lock (u32 vni, u32 vrf);
+extern void lisp_gpe_tenant_l2_iface_unlock (u32 vni);
+
+extern const lisp_gpe_tenant_t *lisp_gpe_tenant_get (u32 index);
+
+extern void lisp_gpe_tenant_flush (void);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/vnet/lisp-gpe/lisp_gpe_tunnel.c b/src/vnet/lisp-gpe/lisp_gpe_tunnel.c
new file mode 100644
index 00000000..dd6c6fdd
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_tunnel.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels.
+ *
+ */
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+
+#include <vnet/fib/fib_table.h>
+
+/**
+ * @brief Pool of all LISP tunnels
+ */
+static lisp_gpe_tunnel_t *lisp_gpe_tunnel_pool;
+
+/**
+ * @brief a DB of all tunnels
+ */
+static uword *lisp_gpe_tunnel_db;
+
+/**
+ * @brief Compute IP-UDP-GPE sub-tunnel encap/rewrite header.
+ *
+ * @param[in] t Parent of the sub-tunnel.
+ * @param[in] st Sub-tunnel.
+ * @param[in] lp Local and remote locators used in the encap header.
+ *
+ * @return 0 on success.
+ */
+u8 *
+lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_t * lgt,
+ const lisp_gpe_adjacency_t * ladj,
+ lisp_gpe_next_protocol_e payload_proto)
+{
+ lisp_gpe_header_t *lisp0;
+ u8 *rw = 0;
+ int len;
+ gpe_encap_mode_t encap_mode = vnet_gpe_get_encap_mode ();
+
+ if (IP4 == ip_addr_version (&lgt->key->lcl))
+ {
+ ip4_udp_lisp_gpe_header_t *h0;
+ ip4_header_t *ip0;
+
+ len = sizeof (*h0);
+
+ vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_udp_lisp_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl);
+ ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt);
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4341);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
+
+ /* LISP-gpe header */
+ lisp0 = &h0->lisp;
+ }
+ else
+ {
+ ip6_udp_lisp_gpe_header_t *h0;
+ ip6_header_t *ip0;
+
+ len = sizeof (*h0);
+
+ vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip6_udp_lisp_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip6 header */
+ ip0 = &h0->ip6;
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ ip0->hop_limit = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip6 header length after-the-fact */
+ ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl);
+ ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4341);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
+
+ /* LISP-gpe header */
+ lisp0 = &h0->lisp;
+ }
+
+ lisp0->flags = ladj->flags;
+ if (GPE_ENCAP_VXLAN == encap_mode)
+ /* unset P flag */
+ lisp0->flags &= ~LISP_GPE_FLAGS_P;
+
+ lisp0->ver_res = 0;
+ lisp0->res = 0;
+ lisp0->next_protocol = payload_proto;
+ lisp0->iid = clib_host_to_net_u32 (ladj->vni) >> 8; /* first 24 bits only */
+
+ return (rw);
+}
+
+static lisp_gpe_tunnel_t *
+lisp_gpe_tunnel_db_find (const lisp_gpe_tunnel_key_t * key)
+{
+ uword *p;
+
+ p = hash_get_mem (lisp_gpe_tunnel_db, (void *) key);
+
+ if (NULL != p)
+ {
+ return (pool_elt_at_index (lisp_gpe_tunnel_pool, p[0]));
+ }
+ return (NULL);
+}
+
+lisp_gpe_tunnel_t *
+lisp_gpe_tunnel_get_i (index_t lgti)
+{
+ return (pool_elt_at_index (lisp_gpe_tunnel_pool, lgti));
+}
+
+index_t
+lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair,
+ u32 rloc_fib_index)
+{
+ lisp_gpe_tunnel_key_t key = {
+ .lcl = pair->lcl_loc,
+ .rmt = pair->rmt_loc,
+ .fib_index = rloc_fib_index,
+ };
+ lisp_gpe_tunnel_t *lgt;
+ fib_prefix_t pfx;
+
+ lgt = lisp_gpe_tunnel_db_find (&key);
+
+ if (NULL == lgt)
+ {
+ pool_get (lisp_gpe_tunnel_pool, lgt);
+ memset (lgt, 0, sizeof (*lgt));
+
+ lgt->key = clib_mem_alloc (sizeof (*lgt->key));
+ memset (lgt->key, 0, sizeof (*lgt->key));
+
+ lgt->key->rmt = pair->rmt_loc;
+ lgt->key->lcl = pair->lcl_loc;
+ lgt->key->fib_index = rloc_fib_index;
+
+ /*
+ * source the FIB entry for the RLOC so we can track its forwarding
+ * chain
+ */
+ ip_address_to_fib_prefix (&lgt->key->rmt, &pfx);
+
+ lgt->fib_entry_index = fib_table_entry_special_add (rloc_fib_index,
+ &pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+
+ hash_set_mem (lisp_gpe_tunnel_db, &lgt->key,
+ (lgt - lisp_gpe_tunnel_pool));
+ }
+
+ lgt->locks++;
+
+ return (lgt - lisp_gpe_tunnel_pool);
+}
+
+void
+lisp_gpe_tunnel_unlock (index_t lgti)
+{
+ lisp_gpe_tunnel_t *lgt;
+
+ lgt = lisp_gpe_tunnel_get_i (lgti);
+ lgt->locks--;
+
+ if (0 == lgt->locks)
+ {
+ hash_unset_mem (lisp_gpe_tunnel_db, &lgt->key);
+ clib_mem_free (lgt->key);
+ pool_put (lisp_gpe_tunnel_pool, lgt);
+ }
+}
+
+const lisp_gpe_tunnel_t *
+lisp_gpe_tunnel_get (index_t lgti)
+{
+ return (lisp_gpe_tunnel_get_i (lgti));
+}
+
+/** Format LISP-GPE tunnel. */
+u8 *
+format_lisp_gpe_tunnel (u8 * s, va_list * args)
+{
+ lisp_gpe_tunnel_t *lgt = va_arg (*args, lisp_gpe_tunnel_t *);
+
+ s = format (s, "tunnel %d\n", lgt - lisp_gpe_tunnel_pool);
+ s = format (s, " fib-index: %d, locks:%d \n",
+ lgt->key->fib_index, lgt->locks);
+ s = format (s, " lisp ver 0\n");
+
+ s = format (s, " locator-pair:\n");
+ s = format (s, " local: %U remote: %U\n",
+ format_ip_address, &lgt->key->lcl,
+ format_ip_address, &lgt->key->rmt);
+ s = format (s, " RLOC FIB entry: %d\n", lgt->fib_entry_index);
+
+ return s;
+}
+
+/**
+ * CLI command to show LISP-GPE tunnels.
+ */
+static clib_error_t *
+show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_tunnel_t *lgt;
+ index_t index;
+
+ if (pool_elts (lisp_gpe_tunnel_pool) == 0)
+ vlib_cli_output (vm, "No lisp-gpe tunnels configured...");
+
+ if (unformat (input, "%d", &index))
+ {
+ lgt = lisp_gpe_tunnel_get_i (index);
+ vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (lgt, lisp_gpe_tunnel_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
+{
+ .path = "show gpe tunnel",
+ .function = show_lisp_gpe_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_gpe_tunnel_module_init (vlib_main_t * vm)
+{
+ lisp_gpe_tunnel_db = hash_create_mem (0,
+ sizeof (lisp_gpe_tunnel_key_t),
+ sizeof (uword));
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_tunnel_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_tunnel.h b/src/vnet/lisp-gpe/lisp_gpe_tunnel.h
new file mode 100644
index 00000000..333d2882
--- /dev/null
+++ b/src/vnet/lisp-gpe/lisp_gpe_tunnel.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels.
+ *
+ */
+
+#ifndef LISP_GPE_TUNNEL_H__
+#define LISP_GPE_TUNNEL_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_packet.h>
+
+/**
+ * Forward declaration
+ */
+struct lisp_gpe_adjacency_t_;
+
+/**
+ * A Key for a tunnel
+ */
+typedef struct lisp_gpe_tunnel_key_t_
+{
+ ip_address_t rmt;
+ ip_address_t lcl;
+ u32 fib_index;
+} lisp_gpe_tunnel_key_t;
+
+/**
+ * @brief A LISP GPE Tunnel.
+ *
+ * A tunnel represents an associatation between a local and remote RLOC.
+ * As such it represents a unique LISP rewrite.
+ */
+typedef struct lisp_gpe_tunnel_t_
+{
+ /**
+ * RLOC pair and rloc fib_index. This is the tunnel's key.
+ */
+ lisp_gpe_tunnel_key_t *key;
+
+ /**
+ * number of reference counting locks
+ */
+ u32 locks;
+
+ /**
+ * the FIB entry through which the remote rloc is reachable
+ s */
+ fib_node_index_t fib_entry_index;
+} lisp_gpe_tunnel_t;
+
+extern index_t lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t *
+ pair,
+ u32 rloc_fib_index);
+
+extern void lisp_gpe_tunnel_unlock (index_t lgti);
+
+extern const lisp_gpe_tunnel_t *lisp_gpe_tunnel_get (index_t lgti);
+
+extern u8 *lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_t * lgt,
+ const struct lisp_gpe_adjacency_t_
+ *ladj,
+ lisp_gpe_next_protocol_e
+ payload_proto);
+extern u8 *format_lisp_gpe_tunnel (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lisp-gpe/rfc.txt b/src/vnet/lisp-gpe/rfc.txt
new file mode 100644
index 00000000..5e3da150
--- /dev/null
+++ b/src/vnet/lisp-gpe/rfc.txt
@@ -0,0 +1,826 @@
+Network Working Group D. Lewis
+Internet-Draft Cisco Systems, Inc.
+Intended status: Informational P. Agarwal
+Expires: January 5, 2015 Broadcom
+ L. Kreeger
+ F. Maino
+ P. Quinn
+ M. Smith
+ N. Yadav
+ Cisco Systems, Inc.
+ July 4, 2014
+
+
+ LISP Generic Protocol Extension
+ draft-lewis-lisp-gpe-02.txt
+
+Abstract
+
+ This draft describes extending the Locator/ID Separation Protocol
+ (LISP) [RFC6830], via changes to the LISP header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling, and explicit
+ versioning.
+
+Status of this Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+ This Internet-Draft will expire on January 5, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 1]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3
+ 2. LISP Header Without Protocol Extensions . . . . . . . . . . . 4
+ 3. Generic Protocol Extension for LISP (LISP-gpe) . . . . . . . . 5
+ 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 5
+ 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 6
+ 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 6
+ 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
+ 4.1. LISP-gpe Routers to (legacy) LISP Routers . . . . . . . . 8
+ 4.2. (legacy) LISP Routers to LISP-gpe Routers . . . . . . . . 8
+ 4.3. Type of Service . . . . . . . . . . . . . . . . . . . . . 8
+ 4.4. VLAN Identifier (VID) . . . . . . . . . . . . . . . . . . 8
+ 5. LISP-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
+ 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
+ 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
+ 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
+ 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+ 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+ 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 2]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+1. Introduction
+
+ LISP [RFC6830] defines an encapsulation format that carries IPv4 or
+ IPv6 (henceforth referred to as IP) packets in a LISP header and
+ outer UDP/IP transport.
+
+ The LISP header does not specify the protocol being encapsulated and
+ therefore is currently limited to encapsulating only IP packet
+ payloads. Other protocols, most notably VXLAN [VXLAN] (which defines
+ a similar header format to LISP), are used to encapsulate L2
+ protocols such as Ethernet. LISP [RFC6830] can be extended to
+ indicate the inner protocol, enabling the encapsulation of Ethernet,
+ IP or any other desired protocol all the while ensuring compatibility
+ with existing LISP [RFC6830] deployments.
+
+ As LISP is deployed, there's also the need to provide increased
+ visibility and diagnostic capabilities within the overlay.
+
+ This document describes extending LISP ([RFC6830]) via the following
+ changes:
+
+ Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
+ in the LISP-gpe header to indicate that a next protocol field is
+ present.
+
+ OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
+ the LISP-gpe header, to indicate that the packet is an OAM packet.
+
+ Version: Two reserved bits are allocated, and set in the LISP-gpe
+ header, to indicate LISP-gpe protocol version.
+
+ Next protocol: An 8 bit next protocol field is present in the LISP-
+ gpe header.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 3]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+2. LISP Header Without Protocol Extensions
+
+ As described in the introduction, the LISP header has no protocol
+ identifier that indicates the type of payload being carried by LISP.
+ Because of this, LISP is limited to an IP payload. Furthermore, the
+ LISP header has no mechanism to signal OAM packets.
+
+ The LISP header contains flags (some defined, some reserved), a
+ Nonce/Map-version field and an instance ID/Locator-status-bit field.
+ The flags provide flexibility to define how the reserved bits can be
+ used to change the definition of the LISP header.
+
+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|flags| Nonce/Map-Version |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Figure 1: LISP Header
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 4]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+3. Generic Protocol Extension for LISP (LISP-gpe)
+
+3.1. Multi Protocol Support
+
+ This draft defines the following changes to the LISP header in order
+ to support multi-protocol encapsulation.
+
+ P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ MUST be set to 1 to indicate the presence of the 8 bit next
+ protocol field.
+
+ P = 0 indicates that the payload MUST conform to LISP as defined
+ in [RFC6830].
+
+ Flag bit 5 was chosen as the P bit because this flag bit is
+ currently unallocated in LISP [RFC6830].
+
+ Next Protocol Field: The lower 8 bits of the first word are used to
+ carry a next protocol. This next protocol field contains the
+ protocol of the encapsulated payload packet.
+
+ LISP [RFC6830] uses the lower 16 bits of the first word for either
+ a nonce, an echo-nonce ([RFC6830]) or to support map-versioning
+ ([RFC6834]). These are all optional capabilities that are
+ indicated by setting the N, E, and the V bit respectively.
+
+ To maintain the desired data plane compatibility, when the P bit
+ is set, the N, E, and V bits MUST be set to zero.
+
+ A new protocol registry will be requested from IANA for the Next
+ Protocol field. This draft defines the following Next Protocol
+ values:
+
+ 0x1 : IPv4
+
+ 0x2 : IPv6
+
+ 0x3 : Ethernet
+
+ 0x4: Network Service Header
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 5]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|P|R|R| Reserved | Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 2: LISP-gpe Next Protocol (P=1)
+
+3.2. OAM Support
+
+ Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ packet is an OAM packet and OAM processing MUST occur. The OAM
+ protocol details are out of scope for this document. As with the
+ P-bit, bit 7 is currently a reserved flag in [RFC6830].
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|P|R|O| Reserved | Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 3: LISP-gpe OAM bit (P=1)
+
+3.3. Version Bits
+
+ LISP-gpe bits8 and 9 are defined as version bits. The version field
+ is used to ensure backward compatibility going forward with future
+ LISP-gpe updates.
+
+ The initial version for LISP-gpe is 0.
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 6]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|P|R|O|Ver| Reserved | Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 4: LISP-gpe Version bits (P=1)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 7]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+4. Backward Compatibility
+
+ Undefined (in RFC6830) flag bits 5 and 7, LISP-gpe P and O bits, were
+ selected to ensure compatibility with existing LISP [RFC6830]
+ deployments.
+
+ Similarly, using P = 0 to indicate that the format of the header and
+ payload conforms to [RFC6830] ensures compatibility with existing
+ LISP hardware forwarding platforms.
+
+4.1. LISP-gpe Routers to (legacy) LISP Routers
+
+ A LISP-gpe router MUST not encapsulate non-IP packet nor OAM packets
+ to a LISP router. A method for determining the capabilities of a
+ LISP router (gpe or "legacy") is out of the scope of this draft.
+
+ When encapsulating IP packets to a LISP router the P bit SHOULD be
+ set to 1 and the UDP port MUST be set to 4341. OAM bit MUST be set
+ to 0. The Next Protocol field SHOULD be 0x1 (IPv4) or 0x2 (IPv6).
+ The (legacy) LISP router will ignore the P bit and the protocol type
+ field. The (legacy) LISP router will treat the packet as a LISP
+ packet and inspect the first nibble of the payload to determine the
+ IP version.
+
+ When the P bit is set, the N, E, and V bits MUST be set to zero. The
+ receiving (legacy) LISP router will ignore N, E and V bits, when the
+ P bit is set.
+
+4.2. (legacy) LISP Routers to LISP-gpe Routers
+
+ When a LISP-gpe router receives a packet from a (legacy) LISP router,
+ the P bit MUST not be set and the UDP port MUST be 4341. The payload
+ MUST be IP, and the LISP-gpe router will inspect the first nibble of
+ the payload to determine IP version.
+
+4.3. Type of Service
+
+ When a LISP-gpe router performs Ethernet encapsulation, the inner
+ 802.1Q [IEEE8021Q] priority code point (PCP) field MAY be mapped from
+ the encapsulated frame to the Type of Service field in the outer IPv4
+ header, or in the case of IPv6 the 'Traffic Class' field.
+
+4.4. VLAN Identifier (VID)
+
+ When a LISP-gpe router performs Ethernet encapsulation, the inner
+ header 802.1Q [IEEE8021Q] VLAN Identifier (VID) MAY be mapped to, or
+ used to determine the LISP Instance ID field.
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 8]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+5. LISP-gpe Examples
+
+ This section provides two examples of IP protocols, and one example
+ of Ethernet encapsulated LISP-gpe using the generic extension
+ described in this document.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|1|0|0|0| Reserved | NP = IPv4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv4 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 5: IPv4 and LISP-gpe
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|1|0|0|0| Reserved | NP = IPv6 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv6 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 6: IPv6 and LISP-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 9]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |N|L|E|V|I|1|0|0|0| Reserved | NP = Ethernet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Instance ID/Locator-Status-Bits |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original Ethernet Frame |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 7: Ethernet and LISP-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 10]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+6. Security Considerations
+
+ LISP-gpe security considerations are similar to the LISP security
+ considerations documented at length in LISP [RFC6830]. With LISP-
+ gpe, issues such as dataplane spoofing, flooding, and traffic
+ redirection are dependent on the particular protocol payload
+ encapsulated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 11]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+7. Acknowledgments
+
+ A special thank you goes to Dino Farinacci for his guidance and
+ detailed review.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 12]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+8. IANA Considerations
+
+ IANA is requested to set up a registry of "Next Protocol". These are
+ 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
+ this draft. New values are assigned via Standards Action [RFC5226].
+
+ +---------------+-------------+---------------+
+ | Next Protocol | Description | Reference |
+ +---------------+-------------+---------------+
+ | 0 | Reserved | This document |
+ | | | |
+ | 1 | IPv4 | This document |
+ | | | |
+ | 2 | IPv6 | This document |
+ | | | |
+ | 3 | Ethernet | This document |
+ | | | |
+ | 4 | NSH | This document |
+ | | | |
+ | 5..253 | Unassigned | |
+ +---------------+-------------+---------------+
+
+ Table 1
+
+ There are ten bits at the beginning of the LISP-gpe header. New
+ bits are assigned via Standards Action [RFC5226].
+
+ Bits 0-3 - Assigned by LISP [RFC6830]
+ Bit 4 - Instance ID (I bit)
+ Bit 5 - Next Protocol (P bit)
+ Bit 6 - Reserved
+ Bit 7 - OAM (O bit)
+ Bits 8-9 - Version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 13]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+9. References
+
+9.1. Normative References
+
+ [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+ August 1980.
+
+ [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
+ September 1981.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+ IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+ May 2008.
+
+9.2. Informative References
+
+ [ETYPES] The IEEE Registration Authority, "IEEE 802 Numbers", 2012,
+ <http://www.iana.org/assignments/ieee-802-numbers/
+ ieee-802-numbers.xml>.
+
+ [IEEE8021Q]
+ The IEEE Computer Society, "Media Access Control (MAC)
+ Bridges and Virtual Bridge Local Area Networks", August
+ 2012, <http://standards.ieee.org/getieee802/download/
+ 802.1Q-2011.pdf>.
+
+ [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
+ October 1994.
+
+ [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
+ Locator/ID Separation Protocol (LISP)", RFC 6830,
+ January 2013.
+
+ [RFC6834] Iannone, L., Saucez, D., and O. Bonaventure, "Locator/ID
+ Separation Protocol (LISP) Map-Versioning", RFC 6834,
+ January 2013.
+
+ [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
+ L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
+ Framework for Overlaying Virtualized Layer 2 Networks over
+ Layer 3 Networks", 2013.
+
+
+
+
+
+
+
+Lewis, et al. Expires January 5, 2015 [Page 14]
+
+Internet-Draft LISP Generic Protocol Extension July 2014
+
+
+Authors' Addresses
+
+ Darrel Lewis
+ Cisco Systems, Inc.
+
+ Email: darlewis@cisco.com
+
+
+ Puneet Agarwal
+ Broadcom
+
+ Email: pagarwal@broadcom.com
+
+
+ Larry Kreeger
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Fabio Maino
+ Cisco Systems, Inc.
+
+ Email: fmaino@cisco.com
+
+
+ Paul Quinn
+ Cisco Systems, Inc.
+
+ Email: paulq@cisco.com
+
+
+ Michael Smith
+ Cisco Systems, Inc.
+
+ Email: michsmit@cisco.com
+
+
+ Navindra Yadav
+ Cisco Systems, Inc.
+
+ Email: nyadav@cisco.com
diff --git a/src/vnet/llc/llc.c b/src/vnet/llc/llc.c
new file mode 100644
index 00000000..975207b6
--- /dev/null
+++ b/src/vnet/llc/llc.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * llc.c: llc support
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/llc/llc.h>
+
+/* Global main structure. */
+llc_main_t llc_main;
+
+u8 *
+format_llc_protocol (u8 * s, va_list * args)
+{
+ llc_protocol_t p = va_arg (*args, u32);
+ llc_main_t *pm = &llc_main;
+ llc_protocol_info_t *pi = llc_get_protocol_info (pm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%02x", p);
+
+ return s;
+}
+
+u8 *
+format_llc_header_with_length (u8 * s, va_list * args)
+{
+ llc_main_t *pm = &llc_main;
+ llc_header_t *h = va_arg (*args, llc_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ llc_protocol_t p = h->dst_sap;
+ uword indent, header_bytes;
+
+ header_bytes = llc_header_length (h);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "llc header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "LLC %U -> %U",
+ format_llc_protocol, h->src_sap,
+ format_llc_protocol, h->dst_sap);
+
+ if (h->control != 0x03)
+ s = format (s, ", control 0x%x", llc_header_get_control (h));
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ llc_protocol_info_t *pi = llc_get_protocol_info (pm, p);
+ vlib_node_t *node = vlib_get_node (pm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_llc_header (u8 * s, va_list * args)
+{
+ llc_header_t *h = va_arg (*args, llc_header_t *);
+ return format (s, "%U", format_llc_header_with_length, h, 0);
+}
+
+/* Returns llc protocol as an int in host byte order. */
+uword
+unformat_llc_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ llc_main_t *pm = &llc_main;
+ int p, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &p) || unformat (input, "%d", &p))
+ {
+ if (p >= (1 << 8))
+ return 0;
+ *result = p;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ pm->protocol_info_by_name, &i))
+ {
+ llc_protocol_info_t *pi = vec_elt_at_index (pm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_llc_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ llc_header_t _h, *h = &_h;
+ u8 p;
+
+ if (!unformat (input, "%U", unformat_llc_protocol, &p))
+ return 0;
+
+ h->src_sap = h->dst_sap = p;
+ h->control = 0x3;
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static u8 *
+llc_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ llc_header_t *h;
+ u8 *rewrite = NULL;
+ llc_protocol_t protocol;
+
+ switch (link_type)
+ {
+#define _(a,b) case VNET_LINK_##a: protocol = LLC_PROTOCOL_##b; break
+ _(IP4, ip4);
+#undef _
+ default:
+ return (NULL);
+ }
+
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (llc_header_t *) rewrite;
+ h->src_sap = h->dst_sap = protocol;
+ h->control = 0x3;
+
+ return (rewrite);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
+ .name = "LLC",
+ .format_header = format_llc_header_with_length,
+ .unformat_header = unformat_llc_header,
+ .build_rewrite = llc_build_rewrite,
+};
+/* *INDENT-ON* */
+
+static void
+add_protocol (llc_main_t * pm, llc_protocol_t protocol, char *protocol_name)
+{
+ llc_protocol_info_t *pi;
+ u32 i;
+
+ vec_add2 (pm->protocol_infos, pi, 1);
+ i = pi - pm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (pm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (pm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t *
+llc_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ llc_main_t *pm = &llc_main;
+
+ memset (pm, 0, sizeof (pm[0]));
+ pm->vlib_main = vm;
+
+ pm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ pm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+
+#define _(f,n) add_protocol (pm, LLC_PROTOCOL_##f, #f);
+ foreach_llc_protocol;
+#undef _
+
+ if ((error = vlib_call_init_function (vm, snap_init)))
+ return error;
+
+ return vlib_call_init_function (vm, llc_input_init);
+}
+
+VLIB_INIT_FUNCTION (llc_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/llc/llc.h b/src/vnet/llc/llc.h
new file mode 100644
index 00000000..990a581d
--- /dev/null
+++ b/src/vnet/llc/llc.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * llc.h: LLC definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_llc_h
+#define included_llc_h
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+
+/* Protocol (SSAP/DSAP) types. */
+#define foreach_llc_protocol \
+ _ (null, 0x0) \
+ _ (sublayer, 0x2) \
+ _ (sna_path_control, 0x4) \
+ _ (ip4, 0x6) \
+ _ (sna1, 0x8) \
+ _ (sna2, 0xc) \
+ _ (sna3, 0x40) \
+ _ (proway_lan, 0x0e) \
+ _ (netware1, 0x10) \
+ _ (netware2, 0xe0) \
+ _ (osi_layer1, 0x14) \
+ _ (osi_layer2, 0x20) \
+ _ (osi_layer3, 0x34) \
+ _ (osi_layer4, 0x54) \
+ _ (osi_layer5, 0xfe) \
+ _ (bpdu, 0x42) \
+ _ (arp, 0x98) \
+ _ (snap, 0xaa) \
+ _ (vines1, 0xba) \
+ _ (vines2, 0xbc) \
+ _ (netbios, 0xf0) \
+ _ (global_dsap, 0xff)
+
+typedef enum
+{
+#define _(f,n) LLC_PROTOCOL_##f = n,
+ foreach_llc_protocol
+#undef _
+} llc_protocol_t;
+
+typedef struct
+{
+#define LLC_DST_SAP_IS_GROUP (1 << 0)
+#define LLC_SRC_SAP_IS_RESPONSE (1 << 0)
+ u8 dst_sap, src_sap;
+
+ /* Control byte.
+ [0] 1 => supervisory 0 => information
+ [1] unnumbered frame. */
+ u8 control;
+
+ /* Only present if (control & 3) != 3. */
+ u8 extended_control[0];
+} llc_header_t;
+
+always_inline u16
+llc_header_get_control (llc_header_t * h)
+{
+ u16 r = h->control;
+ return r | ((((r & 3) != 3) ? h->extended_control[0] : 0) << 8);
+}
+
+always_inline u8
+llc_header_length (llc_header_t * h)
+{
+ return ((h->control & 3) != 3 ? 4 : 3);
+}
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* LLC protocol (SAP type). */
+ llc_protocol_t protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} llc_protocol_info_t;
+
+#define foreach_llc_error \
+ _ (NONE, "no error") \
+ _ (UNKNOWN_PROTOCOL, "unknown llc ssap/dsap") \
+ _ (UNKNOWN_CONTROL, "control != 0x3")
+
+typedef enum
+{
+#define _(f,s) LLC_ERROR_##f,
+ foreach_llc_error
+#undef _
+ LLC_N_ERROR,
+} llc_error_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ llc_protocol_info_t *protocol_infos;
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *protocol_info_by_name, *protocol_info_by_protocol;
+
+ /* llc-input next index indexed by protocol. */
+ u8 input_next_by_protocol[256];
+} llc_main_t;
+
+always_inline llc_protocol_info_t *
+llc_get_protocol_info (llc_main_t * m, llc_protocol_t protocol)
+{
+ uword *p = hash_get (m->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0;
+}
+
+extern llc_main_t llc_main;
+
+/* Register given node index to take input for given llc type. */
+void
+llc_register_input_protocol (vlib_main_t * vm,
+ llc_protocol_t protocol, u32 node_index);
+
+format_function_t format_llc_protocol;
+format_function_t format_llc_header;
+format_function_t format_llc_header_with_length;
+
+/* Parse llc protocol as 0xXXXX or protocol name. */
+unformat_function_t unformat_llc_protocol;
+
+/* Parse llc header. */
+unformat_function_t unformat_llc_header;
+unformat_function_t unformat_pg_llc_header;
+
+always_inline void
+llc_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_llc_header_with_length;
+ n->unformat_buffer = unformat_llc_header;
+ pn->unformat_edit = unformat_pg_llc_header;
+}
+
+#endif /* included_llc_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/llc/node.c b/src/vnet/llc/node.c
new file mode 100644
index 00000000..60b5c134
--- /dev/null
+++ b/src/vnet/llc/node.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * llc_node.c: llc packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/llc/llc.h>
+
+#define foreach_llc_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) LLC_INPUT_NEXT_##s,
+ foreach_llc_input_next
+#undef _
+ LLC_INPUT_N_NEXT,
+} llc_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} llc_input_trace_t;
+
+static u8 *
+format_llc_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ llc_input_trace_t *t = va_arg (*va, llc_input_trace_t *);
+
+ s = format (s, "%U", format_llc_header, t->packet_data);
+
+ return s;
+}
+
+static uword
+llc_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ llc_main_t *lm = &llc_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (llc_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ llc_header_t *h0, *h1;
+ u8 next0, next1, len0, len1, enqueue_code;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = (void *) (b0->data + b0->current_data);
+ h1 = (void *) (b1->data + b1->current_data);
+
+ len0 = llc_header_length (h0);
+ len1 = llc_header_length (h1);
+
+ b0->current_data += len0;
+ b1->current_data += len1;
+
+ b0->current_length -= len0;
+ b1->current_length -= len1;
+
+ next0 = lm->input_next_by_protocol[h0->dst_sap];
+ next1 = lm->input_next_by_protocol[h1->dst_sap];
+
+ b0->error =
+ node->errors[next0 ==
+ LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL :
+ LLC_ERROR_NONE];
+ b1->error =
+ node->errors[next1 ==
+ LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL :
+ LLC_ERROR_NONE];
+
+ enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ llc_header_t *h0;
+ u8 next0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ len0 = llc_header_length (h0);
+
+ b0->current_data += len0;
+
+ b0->current_length -= len0;
+
+ next0 = lm->input_next_by_protocol[h0->dst_sap];
+
+ b0->error =
+ node->errors[next0 ==
+ LLC_INPUT_NEXT_DROP ? LLC_ERROR_UNKNOWN_PROTOCOL :
+ LLC_ERROR_NONE];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *llc_error_strings[] = {
+#define _(f,s) s,
+ foreach_llc_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (llc_input_node) = {
+ .function = llc_input,
+ .name = "llc-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = LLC_N_ERROR,
+ .error_strings = llc_error_strings,
+
+ .n_next_nodes = LLC_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [LLC_INPUT_NEXT_##s] = n,
+ foreach_llc_input_next
+#undef _
+ },
+
+ .format_buffer = format_llc_header_with_length,
+ .format_trace = format_llc_input_trace,
+ .unformat_buffer = unformat_llc_header,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+llc_input_init (vlib_main_t * vm)
+{
+ llc_main_t *lm = &llc_main;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, llc_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ llc_setup_node (vm, llc_input_node.index);
+
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++)
+ lm->input_next_by_protocol[i] = LLC_INPUT_NEXT_DROP;
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (llc_input_init);
+
+void
+llc_register_input_protocol (vlib_main_t * vm,
+ llc_protocol_t protocol, u32 node_index)
+{
+ llc_main_t *lm = &llc_main;
+ llc_protocol_info_t *pi;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, llc_input_init);
+ if (error)
+ clib_error_report (error);
+ /* Otherwise, osi_input_init will wipe out e.g. the snap init */
+ error = vlib_call_init_function (vm, osi_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = llc_get_protocol_info (lm, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, llc_input_node.index, node_index);
+
+ lm->input_next_by_protocol[protocol] = pi->next_index;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/llc/pg.c b/src/vnet/llc/pg.c
new file mode 100644
index 00000000..ad18a4b0
--- /dev/null
+++ b/src/vnet/llc/pg.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * llc_pg.c: packet generator llc interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/llc/llc.h>
+
+typedef struct
+{
+ pg_edit_t dst_sap;
+ pg_edit_t src_sap;
+ pg_edit_t control;
+} pg_llc_header_t;
+
+static inline void
+pg_llc_header_init (pg_llc_header_t * e)
+{
+ pg_edit_init (&e->dst_sap, llc_header_t, dst_sap);
+ pg_edit_init (&e->src_sap, llc_header_t, src_sap);
+ pg_edit_init (&e->control, llc_header_t, control);
+}
+
+uword
+unformat_pg_llc_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_llc_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (llc_header_t),
+ &group_index);
+ pg_llc_header_init (h);
+
+ pg_edit_set_fixed (&h->control, 0x03);
+
+ error = 1;
+ if (!unformat (input, "%U -> %U",
+ unformat_pg_edit,
+ unformat_llc_protocol, &h->src_sap, &h->dst_sap))
+ goto done;
+
+ {
+ llc_main_t *pm = &llc_main;
+ llc_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->dst_sap.type == PG_EDIT_FIXED)
+ {
+ u8 t = *h->dst_sap.values[PG_EDIT_LO];
+ pi = llc_get_protocol_info (pm, t);
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/dir.dox b/src/vnet/lldp/dir.dox
new file mode 100644
index 00000000..6aa45f70
--- /dev/null
+++ b/src/vnet/lldp/dir.dox
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir vnet/vnet/lldp
+ @brief Link Layer Discovery Protocol (LLDP) implementation
+*/
diff --git a/src/vnet/lldp/lldp.api b/src/vnet/lldp/lldp.api
new file mode 100644
index 00000000..02fe32ca
--- /dev/null
+++ b/src/vnet/lldp/lldp.api
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief configure global parameter for LLDP
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param system_name - VPP system name
+ @param tx_hold - multiplier for tx_interval when setting time-to-live (TTL)
+ value in the LLDP packets
+ @param tx_interval - time interval, in seconds, between each LLDP frames
+*/
+autoreply define lldp_config
+{
+ u32 client_index;
+ u32 context;
+ u8 system_name[256];
+ u32 tx_hold;
+ u32 tx_interval;
+};
+
+/** \brief Interface set LLDP request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface for which to enable/disable LLDP
+ @param port_desc - local port description
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_lldp
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 port_desc[256];
+ u8 enable;
+};
diff --git a/src/vnet/lldp/lldp.h b/src/vnet/lldp/lldp.h
new file mode 100644
index 00000000..473c2021
--- /dev/null
+++ b/src/vnet/lldp/lldp.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LLDP external definition
+ */
+#ifndef __included_lldp_h__
+#define __included_lldp_h__
+
+typedef enum lldp_cfg_err
+{
+ lldp_ok,
+ lldp_not_supported,
+ lldp_invalid_arg,
+} lldp_cfg_err_t;
+
+lldp_cfg_err_t lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, int enable);
+lldp_cfg_err_t lldp_cfg_set (u8 ** host, int hold_time, int tx_interval);
+
+#endif /* __included_lldp_h__ */
diff --git a/src/vnet/lldp/lldp_api.c b/src/vnet/lldp/lldp_api.c
new file mode 100644
index 00000000..bdada897
--- /dev/null
+++ b/src/vnet/lldp/lldp_api.c
@@ -0,0 +1,144 @@
+/*
+ *------------------------------------------------------------------
+ * lldp_api.c - lldp api
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/lldp/lldp.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(LLDP_CONFIG, lldp_config) \
+_(SW_INTERFACE_SET_LLDP, sw_interface_set_lldp)
+
+static void
+vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
+{
+ vl_api_lldp_config_reply_t *rmp;
+ int rv = 0;
+ u8 *sys_name = 0;
+
+ vec_validate (sys_name, strlen ((char *) mp->system_name) - 1);
+ strncpy ((char *) sys_name, (char *) mp->system_name, vec_len (sys_name));
+
+ if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold),
+ ntohl (mp->tx_interval)) != lldp_ok)
+ {
+ vec_free (sys_name);
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ REPLY_MACRO (VL_API_LLDP_CONFIG_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
+{
+ vl_api_sw_interface_set_lldp_reply_t *rmp;
+ int rv = 0;
+ u8 *port_desc = 0;
+
+ vec_validate (port_desc, strlen ((char *) mp->port_desc) - 1);
+ strncpy ((char *) port_desc, (char *) mp->port_desc, vec_len (port_desc));
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), &port_desc,
+ mp->enable) != lldp_ok)
+ {
+ vec_free (port_desc);
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_LLDP_REPLY);
+}
+
+
+/*
+ * * lldp_api_hookup
+ * * Add vpe's API message handlers to the table.
+ * * vlib has alread mapped shared memory and
+ * * added the client registration handlers.
+ * * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ * */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_lldp;
+#undef _
+}
+
+static clib_error_t *
+lldp_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * * Set up the (msg_name, crc, message-id) table
+ * */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (lldp_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_cli.c b/src/vnet/lldp/lldp_cli.c
new file mode 100644
index 00000000..1933ca8c
--- /dev/null
+++ b/src/vnet/lldp/lldp_cli.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief LLDP CLI handling
+ *
+ */
+#include <vnet/lisp-cp/lisp_types.h>
+#include <vnet/lldp/lldp.h>
+#include <vnet/lldp/lldp_node.h>
+
+#ifndef ETHER_ADDR_LEN
+#include <net/ethernet.h>
+#endif
+
+static clib_error_t *
+lldp_cfg_err_to_clib_err (lldp_cfg_err_t e)
+{
+
+ switch (e)
+ {
+ case lldp_ok:
+ return 0;
+ case lldp_not_supported:
+ return clib_error_return (0, "not supported");
+ case lldp_invalid_arg:
+ return clib_error_return (0, "invalid argument");
+ }
+ return 0;
+}
+
+lldp_cfg_err_t
+lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, int enable)
+{
+ lldp_main_t *lm = &lldp_main;
+ vnet_main_t *vnm = lm->vnet_main;
+ ethernet_main_t *em = &ethernet_main;
+ const vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ const ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
+
+ if (!eif)
+ {
+ return lldp_not_supported;
+ }
+
+ if (enable)
+ {
+ lldp_intf_t *n = lldp_get_intf (lm, hw_if_index);
+ if (n)
+ {
+ /* already enabled */
+ return lldp_ok;
+ }
+ n = lldp_create_intf (lm, hw_if_index);
+
+ if (port_desc && *port_desc)
+ {
+ n->port_desc = *port_desc;
+ *port_desc = NULL;
+ }
+
+ const vnet_sw_interface_t *sw =
+ vnet_get_sw_interface (lm->vnet_main, hi->sw_if_index);
+ if (sw->flags & (VNET_SW_INTERFACE_FLAG_ADMIN_UP |
+ VNET_SW_INTERFACE_FLAG_BOND_SLAVE))
+ {
+ lldp_schedule_intf (lm, n);
+ }
+ }
+ else
+ {
+ lldp_intf_t *n = lldp_get_intf (lm, hi->sw_if_index);
+ lldp_delete_intf (lm, n);
+ }
+
+ return lldp_ok;
+}
+
+static clib_error_t *
+lldp_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lldp_main_t *lm = &lldp_main;
+ vnet_main_t *vnm = lm->vnet_main;
+ u32 sw_if_index = (u32) ~ 0;
+ int enable = 1;
+ u8 *port_desc = NULL;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "sw_if_index %d", &sw_if_index))
+ ;
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else if (unformat (input, "port-desc %s", &port_desc))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == (u32) ~ 0)
+ return clib_error_return (0, "Interface name is invalid!");
+
+ return lldp_cfg_err_to_clib_err (lldp_cfg_intf_set (sw_if_index,
+ &port_desc, enable));
+}
+
+lldp_cfg_err_t
+lldp_cfg_set (u8 ** host, int hold_time, int tx_interval)
+{
+ lldp_main_t *lm = &lldp_main;
+ int reschedule = 0;
+ if (host && *host)
+ {
+ vec_free (lm->sys_name);
+ lm->sys_name = *host;
+ *host = NULL;
+ }
+ if (hold_time)
+ {
+ if (hold_time < LLDP_MIN_TX_HOLD || hold_time > LLDP_MAX_TX_HOLD)
+ {
+ return lldp_invalid_arg;
+ }
+ if (lm->msg_tx_hold != hold_time)
+ {
+ lm->msg_tx_hold = hold_time;
+ reschedule = 1;
+ }
+ }
+ if (tx_interval)
+ {
+ if (tx_interval < LLDP_MIN_TX_INTERVAL ||
+ tx_interval > LLDP_MAX_TX_INTERVAL)
+ {
+ return lldp_invalid_arg;
+ }
+ if (lm->msg_tx_interval != tx_interval)
+ {
+ reschedule = 1;
+ lm->msg_tx_interval = tx_interval;
+ }
+ }
+ if (reschedule)
+ {
+ vlib_process_signal_event (lm->vlib_main, lm->lldp_process_node_index,
+ LLDP_EVENT_RESCHEDULE, 0);
+ }
+ return lldp_ok;
+}
+
+static clib_error_t *
+lldp_cfg_cmd (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int hold_time = 0;
+ int tx_interval = 0;
+ u8 *host = NULL;
+ clib_error_t *ret = NULL;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "system-name %s", &host))
+ {
+ }
+ else if (unformat (input, "tx-hold %d", &hold_time))
+ {
+ if (hold_time < LLDP_MIN_TX_HOLD || hold_time > LLDP_MAX_TX_HOLD)
+ {
+ ret =
+ clib_error_return (0,
+ "invalid tx-hold `%d' (out of range <%d,%d>)",
+ hold_time, LLDP_MIN_TX_HOLD,
+ LLDP_MAX_TX_HOLD);
+ goto out;
+ }
+ }
+ else if (unformat (input, "tx-interval %d", &tx_interval))
+ {
+ if (tx_interval < LLDP_MIN_TX_INTERVAL ||
+ tx_interval > LLDP_MAX_TX_INTERVAL)
+ {
+ ret =
+ clib_error_return (0,
+ "invalid tx-interval `%d' (out of range <%d,%d>)",
+ tx_interval, LLDP_MIN_TX_INTERVAL,
+ LLDP_MAX_TX_INTERVAL);
+ goto out;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ ret =
+ lldp_cfg_err_to_clib_err (lldp_cfg_set (&host, hold_time, tx_interval));
+out:
+ vec_free (host);
+ return ret;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND(set_interface_lldp_cmd, static) = {
+ .path = "set interface lldp",
+ .short_help = "set interface lldp <interface> | sw_if_index <idx>"
+ " [port-desc <string>] [disable]",
+ .function = lldp_intf_cmd,
+};
+
+VLIB_CLI_COMMAND(set_lldp_cmd, static) = {
+ .path = "set lldp",
+ .short_help = "set lldp [system-name <string>] [tx-hold <value>] "
+ "[tx-interval <value>]",
+ .function = lldp_cfg_cmd,
+};
+/* *INDENT-ON* */
+
+static const char *
+lldp_chassis_id_subtype_str (lldp_chassis_id_subtype_t t)
+{
+ switch (t)
+ {
+#define F(num, val, str) \
+ case num: \
+ return str;
+ foreach_chassis_id_subtype (F)
+#undef F
+ }
+ return "unknown chassis subtype";
+}
+
+static const char *
+lldp_port_id_subtype_str (lldp_port_id_subtype_t t)
+{
+ switch (t)
+ {
+#define F(num, val, str) \
+ case num: \
+ return str;
+ foreach_port_id_subtype (F)
+#undef F
+ }
+ return "unknown port subtype";
+}
+
+/*
+ * format port id subtype&value
+ *
+ * @param va - 1st argument - unsigned - port id subtype
+ * @param va - 2nd argument - u8* - port id
+ * @param va - 3rd argument - unsigned - port id length
+ * @param va - 4th argument - int - 1 for detailed output, 0 for simple
+ */
+u8 *
+format_lldp_port_id (u8 * s, va_list * va)
+{
+ const lldp_port_id_subtype_t subtype = va_arg (*va, unsigned);
+ const u8 *id = va_arg (*va, u8 *);
+ const unsigned len = va_arg (*va, unsigned);
+ const int detail = va_arg (*va, int);
+ if (!id)
+ {
+ return s;
+ }
+ switch (subtype)
+ {
+ case LLDP_PORT_ID_SUBTYPE_NAME (intf_alias):
+ /* fallthrough */
+ case LLDP_PORT_ID_SUBTYPE_NAME (port_comp):
+ /* fallthrough */
+ case LLDP_PORT_ID_SUBTYPE_NAME (local):
+ /* fallthrough */
+ case LLDP_PORT_ID_SUBTYPE_NAME (intf_name):
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_ascii_bytes, id, len,
+ lldp_port_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_ascii_bytes, id, len);
+ }
+ break;
+ case LLDP_PORT_ID_SUBTYPE_NAME (mac_addr):
+ if (ETHER_ADDR_LEN == len)
+ {
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_mac_address, id,
+ lldp_port_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_mac_address, id);
+ }
+ break;
+ }
+ /* fallthrough */
+ case LLDP_PORT_ID_SUBTYPE_NAME (net_addr):
+ /* TODO */
+ /* fallthrough */
+ default:
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_hex_bytes, id, len,
+ lldp_port_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_hex_bytes, id, len);
+ }
+ break;
+ }
+ return s;
+}
+
+/*
+ * format chassis id subtype&value
+ *
+ * @param s format string
+ * @param va - 1st argument - unsigned - chassis id subtype
+ * @param va - 2nd argument - u8* - chassis id
+ * @param va - 3rd argument - unsigned - chassis id length
+ * @param va - 4th argument - int - 1 for detailed output, 0 for simple
+ */
+u8 *
+format_lldp_chassis_id (u8 * s, va_list * va)
+{
+ const lldp_chassis_id_subtype_t subtype =
+ va_arg (*va, lldp_chassis_id_subtype_t);
+ const u8 *id = va_arg (*va, u8 *);
+ const unsigned len = va_arg (*va, unsigned);
+ const int detail = va_arg (*va, int);
+ if (!id)
+ {
+ return s;
+ }
+ switch (subtype)
+ {
+ case LLDP_CHASS_ID_SUBTYPE_NAME (chassis_comp):
+ /* fallthrough */
+ case LLDP_CHASS_ID_SUBTYPE_NAME (intf_alias):
+ /* fallthrough */
+ case LLDP_CHASS_ID_SUBTYPE_NAME (port_comp):
+ /* fallthrough */
+ case LLDP_PORT_ID_SUBTYPE_NAME (local):
+ /* fallthrough */
+ case LLDP_CHASS_ID_SUBTYPE_NAME (intf_name):
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_ascii_bytes, id, len,
+ lldp_chassis_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_ascii_bytes, id, len);
+ }
+ break;
+ case LLDP_CHASS_ID_SUBTYPE_NAME (mac_addr):
+ if (ETHER_ADDR_LEN == len)
+ {
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_mac_address, id,
+ lldp_chassis_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_mac_address, id);
+ }
+ break;
+ }
+ /* fallthrough */
+ case LLDP_CHASS_ID_SUBTYPE_NAME (net_addr):
+ /* TODO */
+ default:
+ if (detail)
+ {
+ s = format (s, "%U(%s)", format_hex_bytes, id, len,
+ lldp_chassis_id_subtype_str (subtype));
+ }
+ else
+ {
+ s = format (s, "%U", format_hex_bytes, id, len);
+ }
+ break;
+ }
+ return s;
+}
+
+/*
+ * convert a tlv code to human-readable string
+ */
+static const char *
+lldp_tlv_code_str (lldp_tlv_code_t t)
+{
+ switch (t)
+ {
+#define F(n, t, s) \
+ case n: \
+ return s;
+ foreach_lldp_tlv_type (F)
+#undef F
+ }
+ return "unknown lldp tlv";
+}
+
+/*
+ * format a single LLDP TLV
+ *
+ * @param s format string
+ * @param va variable list - pointer to lldp_tlv_t is expected
+ */
+u8 *
+format_lldp_tlv (u8 * s, va_list * va)
+{
+ const lldp_tlv_t *tlv = va_arg (*va, lldp_tlv_t *);
+ if (!tlv)
+ {
+ return s;
+ }
+ u16 l = lldp_tlv_get_length (tlv);
+ switch (lldp_tlv_get_code (tlv))
+ {
+ case LLDP_TLV_NAME (chassis_id):
+ s = format (s, "%U", format_lldp_chassis_id,
+ ((lldp_chassis_id_tlv_t *) tlv)->subtype,
+ ((lldp_chassis_id_tlv_t *) tlv)->id,
+ l - STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype), 1);
+ break;
+ case LLDP_TLV_NAME (port_id):
+ s = format (s, "%U", format_lldp_port_id,
+ ((lldp_port_id_tlv_t *) tlv)->subtype,
+ ((lldp_port_id_tlv_t *) tlv)->id,
+ l - STRUCT_SIZE_OF (lldp_port_id_tlv_t, subtype), 1);
+ break;
+ case LLDP_TLV_NAME (ttl):
+ s = format (s, "%d", ntohs (((lldp_ttl_tlv_t *) tlv)->ttl));
+ break;
+ case LLDP_TLV_NAME (sys_name):
+ /* fallthrough */
+ case LLDP_TLV_NAME (sys_desc):
+ s = format (s, "%U", format_ascii_bytes, tlv->v, l);
+ break;
+ default:
+ s = format (s, "%U", format_hex_bytes, tlv->v, l);
+ }
+
+ return s;
+}
+
+static u8 *
+format_time_ago (u8 * s, va_list * va)
+{
+ f64 ago = va_arg (*va, double);
+ f64 now = va_arg (*va, double);
+ if (ago < 0.01)
+ {
+ return format (s, "never");
+ }
+ return format (s, "%.1fs ago", now - ago);
+}
+
+static u8 *
+format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm)
+{
+ vnet_main_t *vnm = &vnet_main;
+ const lldp_intf_t *n;
+ const vnet_hw_interface_t *hw;
+ const vnet_sw_interface_t *sw;
+ s = format (s, "LLDP configuration:\n");
+ if (lm->sys_name)
+ {
+ s = format (s, "Configured system name: %U\n", format_ascii_bytes,
+ lm->sys_name, vec_len (lm->sys_name));
+ }
+ s = format (s, "Configured tx-hold: %d\n", (int) lm->msg_tx_hold);
+ s = format (s, "Configured tx-interval: %d\n", (int) lm->msg_tx_interval);
+ s = format (s, "\nLLDP-enabled interface table:\n");
+ f64 now = vlib_time_now (vm);
+
+ /* *INDENT-OFF* */
+ pool_foreach(
+ n, lm->intfs, ({
+ hw = vnet_get_hw_interface(vnm, n->hw_if_index);
+ sw = vnet_get_sw_interface(lm->vnet_main, hw->sw_if_index);
+ /* Interface shutdown */
+ if (!(sw->flags & (VNET_SW_INTERFACE_FLAG_ADMIN_UP |
+ VNET_SW_INTERFACE_FLAG_BOND_SLAVE)))
+ {
+ s = format(s, "\nInterface name: %s\nInterface/peer state: "
+ "interface down\nLast packet sent: %U\n",
+ hw->name, format_time_ago, n->last_sent, now);
+ }
+ else if (now < n->last_heard + n->ttl)
+ {
+ s = format(s,
+ "\nInterface name: %s\nPort Desc: %s\nInterface/peer "
+ "state: active\nPeer chassis ID: %U\nRemote port ID:"
+ " %U\nLast packet sent: %U\nLast packet received: %U\n",
+ hw->name, n->port_desc, format_lldp_chassis_id,
+ n->chassis_id_subtype, n->chassis_id,
+ vec_len(n->chassis_id), 1,
+ format_lldp_port_id, n->port_id_subtype, n->port_id,
+ vec_len(n->port_id), 1, format_time_ago, n->last_sent,
+ now, format_time_ago, n->last_heard, now);
+ }
+ else
+ {
+ s = format(s,
+ "\nInterface name: %s\nPort Desc: %s\nInterface/peer "
+ "state: inactive(timeout)\nLast known peer chassis ID:"
+ "%U\nLast known peer port ID: %U\nLast packet sent: "
+ "%U\nLast packet received: %U\n",
+ hw->name, n->port_desc, format_lldp_chassis_id,
+ n->chassis_id_subtype, n->chassis_id,
+ vec_len(n->chassis_id), 1,
+ format_lldp_port_id, n->port_id_subtype, n->port_id,
+ vec_len(n->port_id), 1, format_time_ago, n->last_sent,
+ now, format_time_ago, n->last_heard, now);
+ }
+ }));
+ /* *INDENT-ON* */
+ return s;
+}
+
+static u8 *
+format_lldp_intfs (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ const lldp_main_t *lm = va_arg (*va, lldp_main_t *);
+ const int detail = va_arg (*va, int);
+ vnet_main_t *vnm = &vnet_main;
+ const lldp_intf_t *n;
+
+ if (detail)
+ {
+ return format_lldp_intfs_detail (s, vm, lm);
+ }
+
+ f64 now = vlib_time_now (vm);
+ s = format (s, "%-25s %-25s %-25s %=15s %=15s %=10s\n", "Local interface",
+ "Peer chassis ID", "Remote port ID", "Last heard", "Last sent",
+ "Status");
+
+ /* *INDENT-OFF* */
+ pool_foreach(
+ n, lm->intfs, ({
+ const vnet_hw_interface_t *hw =
+ vnet_get_hw_interface(vnm, n->hw_if_index);
+ const vnet_sw_interface_t *sw =
+ vnet_get_sw_interface(lm->vnet_main, hw->sw_if_index);
+ /* Interface shutdown */
+ if (!(sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ continue;
+ if (now < n->last_heard + n->ttl)
+ {
+ s = format(s, "%-25s %-25U %-25U %=15U %=15U %=10s\n", hw->name,
+ format_lldp_chassis_id, n->chassis_id_subtype,
+ n->chassis_id, vec_len(n->chassis_id), 0,
+ format_lldp_port_id, n->port_id_subtype, n->port_id,
+ vec_len(n->port_id), 0, format_time_ago, n->last_heard,
+ now, format_time_ago, n->last_sent, now, "active");
+ }
+ else
+ {
+ s = format(s, "%-25s %-25s %-25s %=15U %=15U %=10s\n", hw->name,
+ "", "", format_time_ago, n->last_heard, now,
+ format_time_ago, n->last_sent, now, "inactive");
+ }
+ }));
+ /* *INDENT-ON* */
+ return s;
+}
+
+static clib_error_t *
+show_lldp (vlib_main_t * vm, unformat_input_t * input,
+ CLIB_UNUSED (vlib_cli_command_t * lmd))
+{
+ lldp_main_t *lm = &lldp_main;
+
+ if (unformat (input, "detail"))
+ {
+ vlib_cli_output (vm, "%U\n", format_lldp_intfs, vm, lm, 1);
+ }
+ else
+ {
+ vlib_cli_output (vm, "%U\n", format_lldp_intfs, vm, lm, 0);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND(show_lldp_command, static) = {
+ .path = "show lldp",
+ .short_help = "show lldp [detail]",
+ .function = show_lldp,
+};
+/* *INDENT-ON* */
+
+/*
+ * packet trace format function, very similar to
+ * lldp_packet_scan except that we call the per TLV format
+ * functions instead of the per TLV processing functions
+ */
+u8 *
+lldp_input_format_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const lldp_input_trace_t *t = va_arg (*args, lldp_input_trace_t *);
+ const u8 *cur;
+ const lldp_tlv_t *tlv;
+ cur = t->data;
+ while (((cur + lldp_tlv_get_length ((lldp_tlv_t *) cur)) <
+ t->data + t->len))
+ {
+ tlv = (lldp_tlv_t *) cur;
+ if (cur == t->data)
+ {
+ s = format (s, "TLV #%d(%s): %U\n", lldp_tlv_get_code (tlv),
+ lldp_tlv_code_str (lldp_tlv_get_code (tlv)),
+ format_lldp_tlv, tlv);
+ }
+ else
+ {
+ s = format (s, " TLV #%d(%s): %U\n", lldp_tlv_get_code (tlv),
+ lldp_tlv_code_str (lldp_tlv_get_code (tlv)),
+ format_lldp_tlv, tlv);
+ }
+ cur += STRUCT_SIZE_OF (lldp_tlv_t, head) + lldp_tlv_get_length (tlv);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_doc.md b/src/vnet/lldp/lldp_doc.md
new file mode 100644
index 00000000..717de898
--- /dev/null
+++ b/src/vnet/lldp/lldp_doc.md
@@ -0,0 +1,86 @@
+# VPP Link Layer Discovery Protocol (LLDP) implementation {#lldp_doc}
+
+This is a memo intended to contain documentation of the VPP LLDP implementation
+Everything that is not directly obvious should come here.
+
+
+## LLDP
+LLDP is a link layer protocol to advertise the capabilities and current status of the system.
+
+There are 2 nodes handling LLDP
+
+1.) input-node which processes incoming packets and updates the local database
+2.) process-node which is responsible for sending out LLDP packets from VPP side
+
+
+### Configuration
+
+LLDP has a global configuration and a per-interface enable setting.
+
+Global configuration is modified using the "set lldp" command
+
+set lldp [system-name <string>] [tx-hold <value>] [tx-interval <value>]
+
+system-name: the name of the VPP system sent to peers in the system-name TLV
+tx-hold: multiplier for tx-interval when setting time-to-live (TTL) value in the LLDP packets (TTL = tx-hold * tx-interval + 1, if TTL > 65535, then TTL = 65535)
+tx-interval: time interval between sending out LLDP packets
+
+Per interface setting is done using the "set interface lldp" command
+
+set interface lldp <interface> | if_index <idx> [port-desc <string>] [disable]
+
+interface: the name of the interface for which to enable/disable LLDP
+if_index: sw interface index can be used if interface name is not used.
+port-desc: port description
+disable: LLDP feature can be enabled or disabled per interface.
+
+### Configuration example
+
+Configure system-name as "VPP" and transmit interval to 10 seconds:
+
+set lldp system-name VPP tx-interval 10
+
+Enable LLDP on interface TenGigabitEthernet5/0/1 with port description
+
+set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0
+
+
+### Operational data
+
+The list of LLDP-enabled interfaces which are up can be shown using "show lldp" command
+
+Example:
+DBGvpp# show lldp
+Local interface Peer chassis ID Remote port ID Last heard Last sent Status
+GigabitEthernet2/0/1 never 27.0s ago inactive
+TenGigabitEthernet5/0/1 8c:60:4f:dd:ca:52 Eth1/3/3 20.1s ago 18.3s ago active
+
+All LLDP configuration data with all LLDP-enabled interfaces can be shown using "show lldp detail" command
+
+Example:
+DBGvpp# show lldp detail
+LLDP configuration:
+Configured system name: vpp
+Configured tx-hold: 4
+Configured tx-interval: 30
+
+LLDP-enabled interface table:
+
+Interface name: GigabitEthernet2/0/1
+Interface/peer state: inactive(timeout)
+Last known peer chassis ID:
+Last known peer port ID:
+Last packet sent: 12.4s ago
+Last packet received: never
+
+Interface name: GigabitEthernet2/0/2
+Interface/peer state: interface down
+Last packet sent: never
+
+Interface name: TenGigabitEthernet5/0/1
+Interface/peer state: active
+Peer chassis ID: 8c:60:4f:dd:ca:52(MAC address)
+Remote port ID: Eth1/3/3(Locally assigned)
+Last packet sent: 3.6s ago
+Last packet received: 5.5s ago
+
diff --git a/src/vnet/lldp/lldp_input.c b/src/vnet/lldp/lldp_input.c
new file mode 100644
index 00000000..e88f6fdb
--- /dev/null
+++ b/src/vnet/lldp/lldp_input.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LLDP packet parsing implementation
+ */
+#include <vnet/lldp/lldp_node.h>
+#include <vnet/lldp/lldp_protocol.h>
+#include <vlibmemory/api.h>
+
+typedef struct
+{
+ u32 hw_if_index;
+ u8 chassis_id_len;
+ u8 chassis_id_subtype;
+ u8 portid_len;
+ u8 portid_subtype;
+ u16 ttl;
+ u8 data[0]; /* this contains both chassis id (chassis_id_len bytes) and port
+ id (portid_len bytes) */
+} lldp_intf_update_t;
+
+static void
+lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
+{
+ ASSERT (vlib_get_thread_index () == 0);
+
+ lldp_intf_t *n = lldp_get_intf (&lldp_main, a->hw_if_index);
+ if (!n)
+ {
+ /* LLDP turned off for this interface, ignore the update */
+ return;
+ }
+ const u8 *chassis_id = a->data;
+ const u8 *portid = a->data + a->chassis_id_len;
+
+ if (n->chassis_id)
+ {
+ _vec_len (n->chassis_id) = 0;
+ }
+ vec_add (n->chassis_id, chassis_id, a->chassis_id_len);
+ n->chassis_id_subtype = a->chassis_id_subtype;
+ if (n->port_id)
+ {
+ _vec_len (n->port_id) = 0;
+ }
+ vec_add (n->port_id, portid, a->portid_len);
+ n->port_id_subtype = a->portid_subtype;
+ n->ttl = a->ttl;
+ n->last_heard = vlib_time_now (lldp_main.vlib_main);
+}
+
+static void
+lldp_rpc_update_peer (u32 hw_if_index, const u8 * chid, u8 chid_len,
+ u8 chid_subtype, const u8 * portid,
+ u8 portid_len, u8 portid_subtype, u16 ttl)
+{
+ const size_t data_size =
+ sizeof (lldp_intf_update_t) + chid_len + portid_len;
+ u8 data[data_size];
+ lldp_intf_update_t *u = (lldp_intf_update_t *) data;
+ u->hw_if_index = hw_if_index;
+ u->chassis_id_len = chid_len;
+ u->chassis_id_subtype = chid_subtype;
+ u->ttl = ttl;
+ u->portid_len = portid_len;
+ u->portid_subtype = portid_subtype;
+ clib_memcpy (u->data, chid, chid_len);
+ clib_memcpy (u->data + chid_len, portid, portid_len);
+ vl_api_rpc_call_main_thread (lldp_rpc_update_peer_cb, data, data_size);
+}
+
+lldp_tlv_code_t
+lldp_tlv_get_code (const lldp_tlv_t * tlv)
+{
+ return tlv->head.byte1 >> 1;
+}
+
+void
+lldp_tlv_set_code (lldp_tlv_t * tlv, lldp_tlv_code_t code)
+{
+ tlv->head.byte1 = (tlv->head.byte1 & 1) + (code << 1);
+}
+
+u16
+lldp_tlv_get_length (const lldp_tlv_t * tlv)
+{
+ return (((u16) (tlv->head.byte1 & 1)) << 8) + tlv->head.byte2;
+}
+
+void
+lldp_tlv_set_length (lldp_tlv_t * tlv, u16 length)
+{
+ tlv->head.byte2 = length & ((1 << 8) - 1);
+ if (length > (1 << 8) - 1)
+ {
+ tlv->head.byte1 |= 1;
+ }
+ else
+ {
+ tlv->head.byte1 &= (1 << 8) - 2;
+ }
+}
+
+lldp_main_t lldp_main;
+
+static int
+lldp_packet_scan (u32 hw_if_index, const lldp_tlv_t * pkt)
+{
+ const lldp_tlv_t *tlv = pkt;
+
+#define TLV_VIOLATES_PKT_BOUNDARY(pkt, tlv) \
+ (((((u8 *)tlv) + sizeof (lldp_tlv_t)) > ((u8 *)pkt + vec_len (pkt))) || \
+ ((((u8 *)tlv) + lldp_tlv_get_length (tlv)) > ((u8 *)pkt + vec_len (pkt))))
+
+ /* first tlv is always chassis id, followed by port id and ttl tlvs */
+ if (TLV_VIOLATES_PKT_BOUNDARY (pkt, tlv) ||
+ LLDP_TLV_NAME (chassis_id) != lldp_tlv_get_code (tlv))
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+
+ u16 l = lldp_tlv_get_length (tlv);
+ if (l < STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype) +
+ LLDP_MIN_CHASS_ID_LEN ||
+ l > STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype) +
+ LLDP_MAX_CHASS_ID_LEN)
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+
+ u8 chid_subtype = ((lldp_chassis_id_tlv_t *) tlv)->subtype;
+ u8 *chid = ((lldp_chassis_id_tlv_t *) tlv)->id;
+ u8 chid_len = l - STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype);
+
+ tlv = (lldp_tlv_t *) ((u8 *) tlv + STRUCT_SIZE_OF (lldp_tlv_t, head) + l);
+
+ if (TLV_VIOLATES_PKT_BOUNDARY (pkt, tlv) ||
+ LLDP_TLV_NAME (port_id) != lldp_tlv_get_code (tlv))
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+ l = lldp_tlv_get_length (tlv);
+ if (l < STRUCT_SIZE_OF (lldp_port_id_tlv_t, subtype) +
+ LLDP_MIN_PORT_ID_LEN ||
+ l > STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype) +
+ LLDP_MAX_PORT_ID_LEN)
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+
+ u8 portid_subtype = ((lldp_port_id_tlv_t *) tlv)->subtype;
+ u8 *portid = ((lldp_port_id_tlv_t *) tlv)->id;
+ u8 portid_len = l - STRUCT_SIZE_OF (lldp_port_id_tlv_t, subtype);
+
+ tlv = (lldp_tlv_t *) ((u8 *) tlv + STRUCT_SIZE_OF (lldp_tlv_t, head) + l);
+
+ if (TLV_VIOLATES_PKT_BOUNDARY (pkt, tlv) ||
+ LLDP_TLV_NAME (ttl) != lldp_tlv_get_code (tlv))
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+ l = lldp_tlv_get_length (tlv);
+ if (l != STRUCT_SIZE_OF (lldp_ttl_tlv_t, ttl))
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+ u16 ttl = ntohs (((lldp_ttl_tlv_t *) tlv)->ttl);
+ tlv = (lldp_tlv_t *) ((u8 *) tlv + STRUCT_SIZE_OF (lldp_tlv_t, head) + l);
+ while (!TLV_VIOLATES_PKT_BOUNDARY (pkt, tlv) &&
+ LLDP_TLV_NAME (pdu_end) != lldp_tlv_get_code (tlv))
+ {
+ switch (lldp_tlv_get_code (tlv))
+ {
+#define F(num, type, str) \
+ case LLDP_TLV_NAME (type): \
+ /* ignore optional TLV */ \
+ break;
+ foreach_lldp_optional_tlv_type (F);
+#undef F
+ default:
+ return LLDP_ERROR_BAD_TLV;
+ }
+ tlv = (lldp_tlv_t *) ((u8 *) tlv + STRUCT_SIZE_OF (lldp_tlv_t, head) +
+ lldp_tlv_get_length (tlv));
+ }
+ /* last tlv is pdu_end */
+ if (TLV_VIOLATES_PKT_BOUNDARY (pkt, tlv) ||
+ LLDP_TLV_NAME (pdu_end) != lldp_tlv_get_code (tlv) ||
+ 0 != lldp_tlv_get_length (tlv))
+ {
+ return LLDP_ERROR_BAD_TLV;
+ }
+ lldp_rpc_update_peer (hw_if_index, chid, chid_len, chid_subtype, portid,
+ portid_len, portid_subtype, ttl);
+ return LLDP_ERROR_NONE;
+}
+
+lldp_intf_t *
+lldp_get_intf (lldp_main_t * lm, u32 hw_if_index)
+{
+ uword *p = hash_get (lm->intf_by_hw_if_index, hw_if_index);
+
+ if (p)
+ {
+ return pool_elt_at_index (lm->intfs, p[0]);
+ }
+ return NULL;
+}
+
+lldp_intf_t *
+lldp_create_intf (lldp_main_t * lm, u32 hw_if_index)
+{
+
+ uword *p;
+ lldp_intf_t *n;
+ p = hash_get (lm->intf_by_hw_if_index, hw_if_index);
+
+ if (p == 0)
+ {
+ pool_get (lm->intfs, n);
+ memset (n, 0, sizeof (*n));
+ n->hw_if_index = hw_if_index;
+ hash_set (lm->intf_by_hw_if_index, n->hw_if_index, n - lm->intfs);
+ }
+ else
+ {
+ n = pool_elt_at_index (lm->intfs, p[0]);
+ }
+ return n;
+}
+
+/*
+ * lldp input routine
+ */
+lldp_error_t
+lldp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
+{
+ lldp_main_t *lm = &lldp_main;
+ lldp_error_t e;
+
+ /* find our interface */
+ vnet_sw_interface_t *sw_interface = vnet_get_sw_interface (lm->vnet_main,
+ vnet_buffer
+ (b0)->sw_if_index
+ [VLIB_RX]);
+ lldp_intf_t *n = lldp_get_intf (lm, sw_interface->hw_if_index);
+
+ if (!n)
+ {
+ /* lldp disabled on this interface, we're done */
+ return LLDP_ERROR_DISABLED;
+ }
+
+ /* Actually scan the packet */
+ e = lldp_packet_scan (sw_interface->hw_if_index,
+ vlib_buffer_get_current (b0));
+
+ return e;
+}
+
+/*
+ * setup function
+ */
+static clib_error_t *
+lldp_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ lldp_main_t *lm = &lldp_main;
+
+ if ((error = vlib_call_init_function (vm, lldp_template_init)))
+ return error;
+
+ lm->vlib_main = vm;
+ lm->vnet_main = vnet_get_main ();
+ lm->msg_tx_hold = 4; /* default value per IEEE 802.1AB-2009 */
+ lm->msg_tx_interval = 30; /* default value per IEEE 802.1AB-2009 */
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lldp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_node.c b/src/vnet/lldp/lldp_node.c
new file mode 100644
index 00000000..acaa5e10
--- /dev/null
+++ b/src/vnet/lldp/lldp_node.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LLDP nodes implementation
+ */
+#include <vnet/lldp/lldp_node.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+
+/* set this to 1 to turn on debug prints via clib_warning() */
+#define LLDP_DEBUG (0)
+
+static vlib_node_registration_t lldp_process_node;
+
+#define F(sym, string) static char LLDP_ERR_##sym##_STR[] = string;
+foreach_lldp_error (F);
+#undef F
+
+/*
+ * packet counter strings
+ * Dump these counters via the "show error" CLI command
+ */
+static char *lldp_error_strings[] = {
+#define F(sym, string) LLDP_ERR_##sym##_STR,
+ foreach_lldp_error (F)
+#undef F
+};
+
+/*
+ * We actually send all lldp pkts to the "error" node after scanning
+ * them, so the graph node has only one next-index. The "error-drop"
+ * node automatically bumps our per-node packet counters for us.
+ */
+typedef enum
+{
+ LLDP_INPUT_NEXT_NORMAL,
+ LLDP_INPUT_N_NEXT,
+} lldp_next_t;
+
+/*
+ * Process a frame of lldp packets
+ * Expect 1 packet / frame
+ */
+static uword
+lldp_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+ lldp_input_trace_t *t0;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0, error0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = LLDP_INPUT_NEXT_NORMAL;
+
+ /* scan this lldp pkt. error0 is the counter index to bump */
+ error0 = lldp_input (vm, b0, bi0);
+ b0->error = node->errors[error0];
+
+ /* If this pkt is traced, snapshot the data */
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ int len;
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
+ : sizeof (t0->data);
+ t0->len = len;
+ clib_memcpy (t0->data, vlib_buffer_get_current (b0), len);
+ }
+ /* push this pkt to the next graph node, always error-drop */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/*
+ * lldp input graph node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(lldp_input_node, static) = {
+ .function = lldp_node_fn,
+ .name = "lldp-input",
+ .vector_size = sizeof(u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LLDP_N_ERROR,
+ .error_strings = lldp_error_strings,
+
+ .format_trace = lldp_input_format_trace,
+
+ .n_next_nodes = LLDP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+ [LLDP_INPUT_NEXT_NORMAL] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * lldp process node function
+ */
+static uword
+lldp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ lldp_main_t *lm = &lldp_main;
+ f64 timeout = 0;
+ uword event_type, *event_data = 0;
+
+ /* So we can send events to the lldp process */
+ lm->lldp_process_node_index = lldp_process_node.index;
+
+ /* with ethernet input */
+ ethernet_register_input_type (vm, ETHERNET_TYPE_802_1_LLDP /* LLDP */ ,
+ lldp_input_node.index);
+
+ while (1)
+ {
+ if (vec_len (lm->intfs_timeouts))
+ {
+#if LLDP_DEBUG
+ clib_warning ("DEBUG: wait for event with timeout %f", timeout);
+#endif
+ (void) vlib_process_wait_for_event_or_clock (vm, timeout);
+ }
+ else
+ {
+#if LLDP_DEBUG
+ clib_warning ("DEBUG: wait for event without timeout");
+#endif
+ (void) vlib_process_wait_for_event (vm);
+ }
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ /* nothing to do here */
+ break;
+ case LLDP_EVENT_RESCHEDULE:
+ /* nothing to do here - reschedule is done automatically after
+ * each event or timeout */
+ break;
+ default:
+ clib_warning ("BUG: event type 0x%wx", event_type);
+ break;
+ }
+ if (!vec_len (lm->intfs_timeouts))
+ {
+ continue;
+ }
+ /* send packet(s) and schedule another timeut */
+ const f64 now = vlib_time_now (lm->vlib_main);
+ while (1)
+ {
+ lldp_intf_t *n = pool_elt_at_index (lm->intfs,
+ lm->intfs_timeouts
+ [lm->intfs_timeouts_idx]);
+ if (n->last_sent < 0.01 || now > n->last_sent + lm->msg_tx_interval)
+ {
+#if LLDP_DEBUG
+ clib_warning ("send packet to lldp %p, if idx %d", n,
+ n->hw_if_index);
+#endif
+ lldp_send_ethernet (lm, n, 0);
+ ++lm->intfs_timeouts_idx;
+ if (lm->intfs_timeouts_idx >= vec_len (lm->intfs_timeouts))
+ {
+ lm->intfs_timeouts_idx = 0;
+ }
+ continue;
+ }
+ else
+ {
+ timeout = n->last_sent + lm->msg_tx_interval - now;
+ break;
+ }
+ }
+#if LLDP_DEBUG
+ clib_warning ("DEBUG: timeout set to %f", timeout);
+ u8 *s = NULL;
+ u32 i;
+ vec_foreach_index (i, lm->intfs_timeouts)
+ {
+ if (i == lm->intfs_timeouts_idx)
+ {
+ s = format (s, " [%d]", lm->intfs_timeouts[i]);
+ }
+ else
+ {
+ s = format (s, " %d", lm->intfs_timeouts[i]);
+ }
+ }
+ clib_warning ("DEBUG: timeout schedule: %s", s);
+ vec_free (s);
+#endif
+ if (event_data)
+ {
+ _vec_len (event_data) = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * lldp process node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(lldp_process_node, static) = {
+ .function = lldp_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "lldp-process",
+};
+/* *INDENT-ON* */
+
+void
+lldp_schedule_intf (lldp_main_t * lm, lldp_intf_t * n)
+{
+ const int idx = n - lm->intfs;
+ u32 v;
+ vec_foreach_index (v, lm->intfs_timeouts)
+ {
+ if (lm->intfs_timeouts[v] == idx)
+ {
+ /* already scheduled */
+ return;
+ }
+ }
+ n->last_sent = 0; /* ensure that a packet is sent out immediately */
+ /* put the interface at the current position in the timeouts - it
+ * will timeout immediately */
+ vec_insert (lm->intfs_timeouts, 1, lm->intfs_timeouts_idx);
+ lm->intfs_timeouts[lm->intfs_timeouts_idx] = n - lm->intfs;
+ vlib_process_signal_event (lm->vlib_main, lm->lldp_process_node_index,
+ LLDP_EVENT_RESCHEDULE, 0);
+#if LLDP_DEBUG
+ clib_warning ("DEBUG: schedule interface %p, if idx %d", n, n->hw_if_index);
+#endif
+}
+
+void
+lldp_unschedule_intf (lldp_main_t * lm, lldp_intf_t * n)
+{
+ if (!n)
+ {
+ return;
+ }
+#if LLDP_DEBUG
+ clib_warning ("DEBUG: unschedule interface %p, if idx %d", n,
+ n->hw_if_index);
+#endif
+ const int idx = n - lm->intfs;
+ u32 v;
+ /* remove intf index from timeouts vector */
+ vec_foreach_index (v, lm->intfs_timeouts)
+ {
+ if (lm->intfs_timeouts[v] == idx)
+ {
+ vec_delete (lm->intfs_timeouts, 1, v);
+ break;
+ }
+ }
+ /* wrap current timeout index to first element if needed */
+ if (lm->intfs_timeouts_idx >= vec_len (lm->intfs_timeouts))
+ {
+ lm->intfs_timeouts_idx = 0;
+ }
+ vlib_process_signal_event (lm->vlib_main, lm->lldp_process_node_index,
+ LLDP_EVENT_RESCHEDULE, 0);
+}
+
+static clib_error_t *
+lldp_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ lldp_main_t *lm = &lldp_main;
+ vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ lldp_intf_t *n = lldp_get_intf (lm, hi->hw_if_index);
+ if (n)
+ {
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ /* FIXME - the packet sent here isn't send properly - need to find a
+ * way to send the packet before interface goes down */
+ lldp_send_ethernet (lm, n, 1);
+ lldp_unschedule_intf (lm, n);
+ }
+ }
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lldp_sw_interface_up_down);
+
+static clib_error_t *
+lldp_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ lldp_main_t *lm = &lldp_main;
+ lldp_intf_t *n = lldp_get_intf (lm, hw_if_index);
+ if (n)
+ {
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ {
+ lldp_schedule_intf (lm, n);
+ }
+ }
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lldp_hw_interface_up_down);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_node.h b/src/vnet/lldp/lldp_node.h
new file mode 100644
index 00000000..14a10e33
--- /dev/null
+++ b/src/vnet/lldp/lldp_node.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LLDP global declarations
+ */
+#ifndef __included_lldp_node_h__
+#define __included_lldp_node_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/snap/snap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+
+#include <vnet/lldp/lldp_protocol.h>
+
+typedef struct lldp_intf
+{
+ /* hw interface index */
+ u32 hw_if_index;
+
+ /* Timers */
+ f64 last_heard;
+ f64 last_sent;
+
+ /* Info received from peer */
+ u8 *chassis_id;
+ u8 *port_id;
+ u16 ttl;
+ lldp_port_id_subtype_t port_id_subtype;
+ lldp_chassis_id_subtype_t chassis_id_subtype;
+
+ /* Local info */
+ u8 *port_desc;
+
+} lldp_intf_t;
+
+typedef struct
+{
+ /* pool of lldp-enabled interface context data */
+ lldp_intf_t *intfs;
+
+ /* rapidly find an interface by vlib hw interface index */
+ uword *intf_by_hw_if_index;
+
+ /* Background process node index */
+ u32 lldp_process_node_index;
+
+ /* interface idxs (into intfs pool) in the order of timing out */
+ u32 *intfs_timeouts;
+
+ /* index of the interface which will time out next */
+ u32 intfs_timeouts_idx;
+
+ /* packet template for sending out packets */
+ vlib_packet_template_t packet_template;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* system name advertised over LLDP (default is none) */
+ u8 *sys_name;
+
+ /* IEEE Std 802.1AB-2009:
+ * 9.2.5.6 msgTxHold
+ * This variable is used, as a multiplier of msgTxInterval, to determine the
+ * value of txTTL that is carried in LLDP frames transmitted by the LLDP
+ * agent. The recommended default value of msgTxHold is 4; this value can
+ * be changed by management to any value in the range 1 through 100.
+ */
+ u8 msg_tx_hold;
+
+ /* IEEE Std 802.1AB-2009:
+ * 9.2.5.7 msgTxInterval
+ * This variable defines the time interval in timer ticks between
+ * transmissions during normal transmission periods (i.e., txFast is zero).
+ * The recommended default value for msgTxInterval is 30 s; this value can
+ * be changed by management to any value in the range 1 through 3600.
+ */
+ u16 msg_tx_interval;
+} lldp_main_t;
+
+#define LLDP_MIN_TX_HOLD (1)
+#define LLDP_MAX_TX_HOLD (100)
+#define LLDP_MIN_TX_INTERVAL (1)
+#define LLDP_MAX_TX_INTERVAL (3600)
+
+extern lldp_main_t lldp_main;
+
+/* Packet counters */
+#define foreach_lldp_error(F) \
+ F(NONE, "good lldp packets (processed)") \
+ F(CACHE_HIT, "good lldp packets (cache hit)") \
+ F(BAD_TLV, "lldp packets with bad TLVs") \
+ F(DISABLED, "lldp packets received on disabled interfaces")
+
+typedef enum
+{
+#define F(sym, str) LLDP_ERROR_##sym,
+ foreach_lldp_error (F)
+#undef F
+ LLDP_N_ERROR,
+} lldp_error_t;
+
+/* lldp packet trace capture */
+typedef struct
+{
+ u32 len;
+ u8 data[400];
+} lldp_input_trace_t;
+
+enum
+{
+ LLDP_EVENT_RESCHEDULE = 1,
+} lldp_process_event_t;
+
+lldp_intf_t *lldp_get_intf (lldp_main_t * lm, u32 hw_if_index);
+lldp_intf_t *lldp_create_intf (lldp_main_t * lm, u32 hw_if_index);
+void lldp_delete_intf (lldp_main_t * lm, lldp_intf_t * n);
+lldp_error_t lldp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0);
+u8 *lldp_input_format_trace (u8 * s, va_list * args);
+void lldp_send_ethernet (lldp_main_t * lm, lldp_intf_t * n, int shutdown);
+void lldp_schedule_intf (lldp_main_t * lm, lldp_intf_t * n);
+void lldp_unschedule_intf (lldp_main_t * lm, lldp_intf_t * n);
+
+#endif /* __included_lldp_node_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_output.c b/src/vnet/lldp/lldp_output.c
new file mode 100644
index 00000000..8698ec97
--- /dev/null
+++ b/src/vnet/lldp/lldp_output.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LLDP packet generation implementation
+ */
+#include <vnet/lldp/lldp_node.h>
+
+static void
+lldp_add_chassis_id (const vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ lldp_chassis_id_tlv_t *t = (lldp_chassis_id_tlv_t *) * t0p;
+
+ lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (chassis_id));
+ t->subtype = LLDP_CHASS_ID_SUBTYPE_NAME (mac_addr);
+
+ const size_t addr_len = 6;
+ clib_memcpy (&t->id, hw->hw_address, addr_len);
+ const size_t len =
+ STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype) + addr_len;
+ lldp_tlv_set_length ((lldp_tlv_t *) t, len);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
+}
+
+static void
+lldp_add_port_id (const vnet_hw_interface_t * hw, u8 ** t0p)
+{
+ lldp_port_id_tlv_t *t = (lldp_port_id_tlv_t *) * t0p;
+
+ lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (port_id));
+ t->subtype = LLDP_PORT_ID_SUBTYPE_NAME (intf_name);
+
+ const size_t name_len = vec_len (hw->name);
+ clib_memcpy (&t->id, hw->name, name_len);
+ const size_t len = STRUCT_SIZE_OF (lldp_port_id_tlv_t, subtype) + name_len;
+ lldp_tlv_set_length ((lldp_tlv_t *) t, len);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
+}
+
+static void
+lldp_add_ttl (const lldp_main_t * lm, u8 ** t0p, int shutdown)
+{
+ lldp_ttl_tlv_t *t = (lldp_ttl_tlv_t *) * t0p;
+ lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (ttl));
+ if (shutdown)
+ {
+ t->ttl = 0;
+ }
+ else
+ {
+ if ((size_t) lm->msg_tx_interval * lm->msg_tx_hold + 1 > (1 << 16) - 1)
+ {
+ t->ttl = htons ((1 << 16) - 1);
+ }
+ else
+ {
+ t->ttl = htons (lm->msg_tx_hold * lm->msg_tx_interval + 1);
+ }
+ }
+ const size_t len = STRUCT_SIZE_OF (lldp_ttl_tlv_t, ttl);
+ lldp_tlv_set_length ((lldp_tlv_t *) t, len);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
+}
+
+static void
+lldp_add_port_desc (const lldp_main_t * lm, lldp_intf_t * n, u8 ** t0p)
+{
+ const size_t len = vec_len (n->port_desc);
+ if (len)
+ {
+ lldp_tlv_t *t = (lldp_tlv_t *) * t0p;
+ lldp_tlv_set_code (t, LLDP_TLV_NAME (port_desc));
+ lldp_tlv_set_length (t, len);
+ clib_memcpy (t->v, n->port_desc, len);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
+ }
+}
+
+static void
+lldp_add_sys_name (const lldp_main_t * lm, u8 ** t0p)
+{
+ const size_t len = vec_len (lm->sys_name);
+ if (len)
+ {
+ lldp_tlv_t *t = (lldp_tlv_t *) * t0p;
+ lldp_tlv_set_code (t, LLDP_TLV_NAME (sys_name));
+ lldp_tlv_set_length (t, len);
+ clib_memcpy (t->v, lm->sys_name, len);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
+ }
+}
+
+static void
+lldp_add_pdu_end (u8 ** t0p)
+{
+ lldp_tlv_t *t = (lldp_tlv_t *) * t0p;
+ lldp_tlv_set_code (t, LLDP_TLV_NAME (pdu_end));
+ lldp_tlv_set_length (t, 0);
+ *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head);
+}
+
+static void
+lldp_add_tlvs (lldp_main_t * lm, vnet_hw_interface_t * hw, u8 ** t0p,
+ int shutdown, lldp_intf_t * n)
+{
+ lldp_add_chassis_id (hw, t0p);
+ lldp_add_port_id (hw, t0p);
+ lldp_add_ttl (lm, t0p, shutdown);
+ lldp_add_port_desc (lm, n, t0p);
+ lldp_add_sys_name (lm, t0p);
+ lldp_add_pdu_end (t0p);
+}
+
+/*
+ * send a lldp pkt on an ethernet interface
+ */
+void
+lldp_send_ethernet (lldp_main_t * lm, lldp_intf_t * n, int shutdown)
+{
+ u32 *to_next;
+ ethernet_header_t *h0;
+ vnet_hw_interface_t *hw;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+ vlib_frame_t *f;
+ vlib_main_t *vm = lm->vlib_main;
+ vnet_main_t *vnm = lm->vnet_main;
+
+ /*
+ * see lldp_template_init() to understand what's already painted
+ * into the buffer by the packet template mechanism
+ */
+ h0 = vlib_packet_template_get_packet (vm, &lm->packet_template, &bi0);
+
+ if (!h0)
+ return;
+
+ /* Add the interface's ethernet source address */
+ hw = vnet_get_hw_interface (vnm, n->hw_if_index);
+
+ clib_memcpy (h0->src_address, hw->hw_address, vec_len (hw->hw_address));
+
+ u8 *data = ((u8 *) h0) + sizeof (*h0);
+ t0 = data;
+
+ /* add TLVs */
+ lldp_add_tlvs (lm, hw, &t0, shutdown, n);
+
+ /* Set the outbound packet length */
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_length = sizeof (*h0) + t0 - data;
+
+ /* And the outbound interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index;
+
+ /* And output the packet on the correct interface */
+ f = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, f);
+ n->last_sent = vlib_time_now (vm);
+}
+
+void
+lldp_delete_intf (lldp_main_t * lm, lldp_intf_t * n)
+{
+ if (n)
+ {
+ lldp_unschedule_intf (lm, n);
+ hash_unset (lm->intf_by_hw_if_index, n->hw_if_index);
+ vec_free (n->chassis_id);
+ vec_free (n->port_id);
+ vec_free (n->port_desc);
+ pool_put (lm->intfs, n);
+ }
+}
+
+static clib_error_t *
+lldp_template_init (vlib_main_t * vm)
+{
+ lldp_main_t *lm = &lldp_main;
+
+ /* Create the ethernet lldp packet template */
+ {
+ ethernet_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /*
+ * Send to 01:80:C2:00:00:0E - propagation constrained to a single
+ * physical link - stopped by all type of bridge
+ */
+ h.dst_address[0] = 0x01;
+ h.dst_address[1] = 0x80;
+ h.dst_address[2] = 0xC2;
+ /* h.dst_address[3] = 0x00; (memset) */
+ /* h.dst_address[4] = 0x00; (memset) */
+ h.dst_address[5] = 0x0E;
+
+ /* leave src address blank (fill in at send time) */
+
+ h.type = htons (ETHERNET_TYPE_802_1_LLDP);
+
+ vlib_packet_template_init (vm, &lm->packet_template,
+ /* data */ &h, sizeof (h),
+ /* alloc chunk size */ 8, "lldp-ethernet");
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lldp_template_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lldp/lldp_protocol.h b/src/vnet/lldp/lldp_protocol.h
new file mode 100644
index 00000000..e641b26e
--- /dev/null
+++ b/src/vnet/lldp/lldp_protocol.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2011-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_lldp_protocol_h__
+#define __included_lldp_protocol_h__
+/**
+ * @file
+ * @brief LLDP protocol declarations
+ */
+#include <vnet/srp/packet.h>
+
+/*
+ * optional TLV codes.
+ */
+#define foreach_lldp_optional_tlv_type(F) \
+ F (4, port_desc, "Port Description") \
+ F (5, sys_name, "System name") \
+ F (6, sys_desc, "System Description") \
+ F (7, sys_caps, "System Capabilities") \
+ F (8, mgmt_addr, "Management Address") \
+ F (127, org_spec, "Organizationally Specific TLV")
+
+/*
+ * all TLV codes.
+ */
+#define foreach_lldp_tlv_type(F) \
+ F (0, pdu_end, "End of LLDPDU") \
+ F (1, chassis_id, "Chassis ID") \
+ F (2, port_id, "Port ID") \
+ F (3, ttl, "Time To Live") \
+ foreach_lldp_optional_tlv_type (F)
+
+#define LLDP_TLV_NAME(t) LLDP_TLV_##t
+
+typedef enum
+{
+#define F(n, t, s) LLDP_TLV_NAME (t) = n,
+ foreach_lldp_tlv_type (F)
+#undef F
+} lldp_tlv_code_t;
+
+struct lldp_tlv_head
+{
+ u8 byte1; /* contains TLV code in the upper 7 bits + MSB of length */
+ u8 byte2; /* contains the lower bits of length */
+};
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ struct lldp_tlv_head head;
+ u8 v[0];
+}) lldp_tlv_t;
+/* *INDENT-ON* */
+
+lldp_tlv_code_t lldp_tlv_get_code (const lldp_tlv_t * tlv);
+void lldp_tlv_set_code (lldp_tlv_t * tlv, lldp_tlv_code_t code);
+u16 lldp_tlv_get_length (const lldp_tlv_t * tlv);
+void lldp_tlv_set_length (lldp_tlv_t * tlv, u16 length);
+
+#define foreach_chassis_id_subtype(F) \
+ F (0, reserved, "Reserved") \
+ F (1, chassis_comp, "Chassis component") \
+ F (2, intf_alias, "Interface alias") \
+ F (3, port_comp, "Port component") \
+ F (4, mac_addr, "MAC address") \
+ F (5, net_addr, "Network address") \
+ F (6, intf_name, "Interface name") \
+ F (7, local, "Locally assigned")
+
+#define LLDP_CHASS_ID_SUBTYPE_NAME(t) LLDP_CHASS_ID_SUBTYPE_##t
+#define LLDP_MIN_CHASS_ID_LEN (1)
+#define LLDP_MAX_CHASS_ID_LEN (255)
+
+typedef enum
+{
+#define F(n, t, s) LLDP_CHASS_ID_SUBTYPE_NAME (t) = n,
+ foreach_chassis_id_subtype (F)
+#undef F
+} lldp_chassis_id_subtype_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ struct lldp_tlv_head head;
+ u8 subtype;
+ u8 id[0];
+}) lldp_chassis_id_tlv_t;
+/* *INDENT-ON* */
+
+#define foreach_port_id_subtype(F) \
+ F (0, reserved, "Reserved") \
+ F (1, intf_alias, "Interface alias") \
+ F (2, port_comp, "Port component") \
+ F (3, mac_addr, "MAC address") \
+ F (4, net_addr, "Network address") \
+ F (5, intf_name, "Interface name") \
+ F (6, agent_circuit_id, "Agent circuit ID") \
+ F (7, local, "Locally assigned")
+
+#define LLDP_PORT_ID_SUBTYPE_NAME(t) LLDP_PORT_ID_SUBTYPE_##t
+#define LLDP_MIN_PORT_ID_LEN (1)
+#define LLDP_MAX_PORT_ID_LEN (255)
+
+typedef enum
+{
+#define F(n, t, s) LLDP_PORT_ID_SUBTYPE_NAME (t) = n,
+ foreach_port_id_subtype (F)
+#undef F
+} lldp_port_id_subtype_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ struct lldp_tlv_head head;
+ u8 subtype;
+ u8 id[0];
+}) lldp_port_id_tlv_t;
+
+typedef CLIB_PACKED (struct {
+ struct lldp_tlv_head head;
+ u16 ttl;
+}) lldp_ttl_tlv_t;
+/* *INDENT-ON* */
+
+#endif /* __included_lldp_protocol_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/examples/gen-rules.py b/src/vnet/map/examples/gen-rules.py
new file mode 100755
index 00000000..7964aa9a
--- /dev/null
+++ b/src/vnet/map/examples/gen-rules.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+def_ip4_pfx = '192.0.2.0/24'
+def_ip6_pfx = '2001:db8::/32'
+def_ip6_src = '2001:db8::1'
+def_psid_offset = 6
+def_psid_len = 6
+def_ea_bits_len = 0
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+parser.add_argument('-f', action="store", dest="format", default="vpp")
+parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx)
+parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx)
+parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src)
+parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len)
+parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset)
+parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len)
+args = parser.parse_args()
+
+#
+# Print domain
+#
+def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len):
+ if format == 'vpp':
+ print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src +
+ " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
+ if format == 'confd':
+ print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src +
+ " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx +
+ " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
+ if format == 'xml':
+ print("<softwire-instance>")
+ print("<id>", i, "</id>");
+ print(" <br-ipv6>" + ip6_src + "</br-ipv6>")
+ print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>")
+ print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>")
+ print(" <ea-len>", eabits_len, "</ea-len>")
+ print(" <psid-len>", psid_len, "</psid-len>")
+ print(" <psid-offset>", psid_offset, "</psid-offset>")
+
+def domain_print_end():
+ if format == 'xml':
+ print("</softwire-instance>")
+
+def rule_print(i, psid, dst):
+ if format == 'vpp':
+ print("map add rule index", i, "psid", psid, "ip6-dst", dst)
+ if format == 'confd':
+ print("binding", psid, "ipv6-addr", dst)
+ if format == 'xml':
+ print(" <binding>")
+ print(" <psid>", psid, "</psid>")
+ print(" <ipv6-addr>", dst, "</ipv6-addr>")
+ print(" </binding>")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len)
+ domain_print_end()
+
+#
+# 1:1 Full IPv4 address
+#
+def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = ipaddress.ip_network(ip6_pfx_str)
+ psid_len = 0
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0)
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+#
+# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
+#
+def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = ipaddress.ip_network(ip6_pfx_str)
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
+ for psid in range(0x1 << int(psid_len)):
+ rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid]))
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56))
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
+ for psid in range(0x1 << psid_len):
+ enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1]
+ rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid])
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+
+def xml_header_print():
+ print('''
+<?xml version="1.0" encoding="UTF-8"?>
+ <hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
+ <capabilities>
+ <capability>urn:ietf:params:netconf:base:1.0</capability>
+ </capabilities>
+ </hello>
+]]>]]>
+
+<?xml version="1.0" encoding="UTF-8"?>
+ <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="1">
+ <edit-config>
+ <target>
+ <candidate/>
+ </target>
+ <config>
+
+ <vpp xmlns="http://www.cisco.com/yang/cisco-vpp">
+ <softwire>
+ <softwire-instances>
+
+ ''')
+
+def xml_footer_print():
+ print('''
+</softwire-instances>
+</softwire>
+</vpp>
+ </config>
+ </edit-config>
+ </rpc>
+
+]]>]]>
+
+<?xml version="1.0" encoding="UTF-8"?>
+ <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="2">
+ <close-session/>
+ </rpc>
+
+]]>]]>
+ ''')
+
+
+format = args.format
+if format == 'xml':
+ xml_header_print()
+globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len)
+if format == 'xml':
+ xml_footer_print()
diff --git a/src/vnet/map/examples/health_check.c b/src/vnet/map/examples/health_check.c
new file mode 100644
index 00000000..5f0d85fe
--- /dev/null
+++ b/src/vnet/map/examples/health_check.c
@@ -0,0 +1,109 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <errno.h>
+
+static void
+usage (void) {
+ fprintf(stderr,
+ "Usage: health_check"
+ " -d debug"
+ " -I interface"
+ "\n");
+ exit(2);
+}
+
+int
+main (int argc, char **argv)
+{
+ int sd, ch;
+ uint8_t *opt, *pkt;
+ struct ifreq ifr;
+ char *interface = NULL;
+ bool debug = false;
+
+ while ((ch = getopt(argc, argv, "h?" "I:" "d")) != EOF) {
+ switch(ch) {
+ case 'I':
+ interface = optarg;
+ break;
+ case 'd':
+ debug = true;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (!interface)
+ usage();
+
+ /* Request a socket descriptor sd. */
+ if ((sd = socket (AF_INET6, SOCK_RAW, IPPROTO_IPIP)) < 0) {
+ perror ("Failed to get socket descriptor ");
+ exit (EXIT_FAILURE);
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", interface);
+
+ /* Bind socket to interface of this node. */
+ if (setsockopt (sd, SOL_SOCKET, SO_BINDTODEVICE, (void *) &ifr, sizeof (ifr)) < 0) {
+ perror ("SO_BINDTODEVICE failed");
+ exit (EXIT_FAILURE);
+ }
+ if (debug) printf("Binding to interface %s\n", interface);
+
+ while (1) {
+ struct sockaddr_in6 src_addr;
+ socklen_t addrlen = sizeof(src_addr);
+ char source[INET6_ADDRSTRLEN+1];
+ int len;
+ uint8_t inpack[IP_MAXPACKET];
+
+ if ((len = recvfrom(sd, inpack, sizeof(inpack), 0, (struct sockaddr *)&src_addr, &addrlen)) < 0) {
+ perror("recvfrom failed ");
+ }
+ if (inet_ntop(AF_INET6, &src_addr.sin6_addr, source, INET6_ADDRSTRLEN) == NULL) {
+ perror("inet_ntop() failed.");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Reply */
+ struct iphdr *ip = (struct iphdr *)inpack;
+ uint32_t saddr;
+ struct icmphdr *icmp;
+
+ saddr = ip->saddr;
+ ip->saddr = ip->daddr;
+ ip->daddr = saddr;
+
+ switch (ip->protocol) {
+ case 1:
+ if (debug) printf ("ICMP Echo request from %s\n", source);
+ icmp = (struct icmphdr *)&ip[1];
+ icmp->type = ICMP_ECHOREPLY;
+ break;
+ default:
+ fprintf(stderr, "Unsupported protocol %d", ip->protocol);
+ }
+ if (len = sendto(sd, inpack, len, 0, (struct sockaddr *)&src_addr, addrlen) < 0) {
+ perror("sendto failed ");
+ }
+ }
+
+ close (sd);
+
+ return (EXIT_SUCCESS);
+}
diff --git a/src/vnet/map/examples/test_map.py b/src/vnet/map/examples/test_map.py
new file mode 100755
index 00000000..21388d49
--- /dev/null
+++ b/src/vnet/map/examples/test_map.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+import time,argparse,sys,cmd, unittest
+from ipaddress import *
+
+parser = argparse.ArgumentParser(description='VPP MAP test')
+parser.add_argument('-i', nargs='*', action="store", dest="inputdir")
+args = parser.parse_args()
+
+for dir in args.inputdir:
+ sys.path.append(dir)
+from vpp_papi import *
+
+#
+# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
+#
+def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ip_network(ip4_pfx_str)
+ ip6_src = ip_address(ip6_src_str)
+ ip6_dst = ip_network(ip6_pfx_str)
+ ip6_nul = IPv6Address(u'0::0')
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ a = time.clock()
+ t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0)
+ #print "Return from map_add_domain", t
+ if t == None:
+ print "map_add_domain failed"
+ continue
+ if t.retval != 0:
+ print "map_add_domain failed", t
+ continue
+ for psid in range(0x1 << int(psid_len)):
+ r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<<int(psid_len))) + psid]).packed, psid)
+ #print "Return from map_add_del_rule", r
+
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+ print "Running time:", time.clock() - a
+
+class TestMAP(unittest.TestCase):
+ '''
+ def test_delete_all(self):
+ t = map_domain_dump(0)
+ self.assertNotEqual(t, None)
+ print "Number of domains configured: ", len(t)
+ for d in t:
+ ts = map_del_domain(0, d.domainindex)
+ self.assertNotEqual(ts, None)
+ t = map_domain_dump(0)
+ self.assertNotEqual(t, None)
+ print "Number of domains configured: ", len(t)
+ self.assertEqual(len(t), 0)
+
+ '''
+
+ def test_a_million_rules(self):
+ ip4_pfx = u'192.0.2.0/24'
+ ip6_pfx = u'2001:db8::/32'
+ ip6_src = u'2001:db8::1'
+ psid_offset = 6
+ psid_len = 6
+ ea_bits_len = 0
+ lw46_shared(ip4_pfx, ip6_pfx, ip6_src, ea_bits_len, psid_offset, psid_len)
+
+#
+# RX thread, that should sit on blocking vpe_api_read()
+
+#
+
+
+#
+#
+#
+import threading
+class RXThread (threading.Thread):
+ def __init__(self):
+ threading.Thread.__init__(self)
+
+ def run(self):
+ print "Starting "
+ i = 0
+ while True:
+ msg = vpe_api_read()
+ if msg:
+ #print msg
+ id = unpack('>H', msg[0:2])
+ size = unpack('>H', msg[2:4])
+ print "Received", id, "of size", size
+ i += 1
+ #del msg
+ continue
+
+ #time.sleep(0.001)
+ return
+
+# Create RX thread
+rxthread = RXThread()
+rxthread.setDaemon(True)
+
+print "Connect", connect_to_vpe("client124")
+import timeit
+rxthread.start()
+print "After thread started"
+
+#pneum_kill_thread()
+print "After thread killed"
+
+#t = show_version(0)
+#print "Result from show version", t
+
+print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version")
+time.sleep(10)
+#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
+
+
+disconnect_from_vpe()
+sys.exit()
+
+
+print t.program, t.version,t.builddate,t.builddirectory
+
+'''
+
+t = map_domain_dump(0)
+if not t:
+ print('show map domain failed')
+
+for d in t:
+ print("IP6 prefix:",str(IPv6Address(d.ip6prefix)))
+ print( "IP4 prefix:",str(IPv4Address(d.ip4prefix)))
+'''
+
+suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP)
+unittest.TextTestRunner(verbosity=2).run(suite)
+
+disconnect_from_vpe()
+
+
diff --git a/src/vnet/map/gen-rules.py b/src/vnet/map/gen-rules.py
new file mode 100755
index 00000000..533a8e23
--- /dev/null
+++ b/src/vnet/map/gen-rules.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+args = parser.parse_args()
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def shared11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+
+#
+# 1:1 Shared IPv4 address
+#
+def shared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address small
+#
+def smallshared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Full IPv4 address
+#
+def full11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+def full11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo():
+ print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
+ print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+
+#
+# IP4 forwarding
+#
+def ip4():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ for i in range(ip4_pfx.num_addresses):
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+
+
+globals()[args.mapmode]()
+
+
diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c
new file mode 100644
index 00000000..6a3bdd51
--- /dev/null
+++ b/src/vnet/map/ip4_map.c
@@ -0,0 +1,754 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Defines used for testing various optimisation schemes
+ */
+#define MAP_ENCAP_DUAL 0
+
+#include "map.h"
+#include "../ip/ip_frag.h"
+#include <vnet/ip/ip4_to_ip6.h>
+
+vlib_node_registration_t ip4_map_reass_node;
+
+enum ip4_map_next_e
+{
+ IP4_MAP_NEXT_IP6_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP4_MAP_NEXT_IP6_REWRITE,
+#endif
+ IP4_MAP_NEXT_IP4_FRAGMENT,
+ IP4_MAP_NEXT_IP6_FRAGMENT,
+ IP4_MAP_NEXT_REASS,
+ IP4_MAP_NEXT_ICMP_ERROR,
+ IP4_MAP_NEXT_DROP,
+ IP4_MAP_N_NEXT,
+};
+
+enum ip4_map_reass_next_t
+{
+ IP4_MAP_REASS_NEXT_IP6_LOOKUP,
+ IP4_MAP_REASS_NEXT_IP4_FRAGMENT,
+ IP4_MAP_REASS_NEXT_DROP,
+ IP4_MAP_REASS_N_NEXT,
+};
+
+typedef struct
+{
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip4_map_reass_trace_t;
+
+u8 *
+format_ip4_map_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *);
+ return format (s, "MAP domain index: %d L4 port: %u Status: %s",
+ t->map_domain_index, t->port,
+ t->cached ? "cached" : "forwarded");
+}
+
+static_always_inline u16
+ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip,
+ u32 * next, u8 * error)
+{
+ u16 port = 0;
+
+ if (d->psid_length > 0)
+ {
+ if (ip4_get_fragment_offset (ip) == 0)
+ {
+ if (PREDICT_FALSE
+ ((ip->ip_version_and_header_length != 0x45)
+ || clib_host_to_net_u16 (ip->length) < 28))
+ {
+ return 0;
+ }
+ port = ip4_get_port (ip, 0);
+ if (port)
+ {
+ /* Verify that port is not among the well-known ports */
+ if ((d->psid_offset > 0)
+ && (clib_net_to_host_u16 (port) <
+ (0x1 << (16 - d->psid_offset))))
+ {
+ *error = MAP_ERROR_ENCAP_SEC_CHECK;
+ }
+ else
+ {
+ if (ip4_get_fragment_more (ip))
+ *next = IP4_MAP_NEXT_REASS;
+ return (port);
+ }
+ }
+ else
+ {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ }
+ else
+ {
+ *next = IP4_MAP_NEXT_REASS;
+ }
+ }
+ return (0);
+}
+
+/*
+ * ip4_map_vtcfl
+ */
+static_always_inline u32
+ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
+{
+ map_main_t *mm = &map_main;
+ u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= tc << 20;
+ vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
+
+ return (clib_host_to_net_u32 (vtcfl));
+}
+
+static_always_inline bool
+ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
+ {
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+/*
+ * ip4_map_ttl
+ */
+static inline void
+ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
+{
+ i32 ttl = ip->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip->ttl > 0);
+
+ u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
+ checksum += checksum >= 0xffff;
+ ip->checksum = checksum;
+ ttl -= 1;
+ ip->ttl = ttl;
+ *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
+
+ /* Verify checksum. */
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+}
+
+static u32
+ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
+{
+ map_main_t *mm = &map_main;
+
+ if (mm->frag_inner)
+ {
+ ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_FLAG_IP6_HEADER);
+ return (IP4_MAP_NEXT_IP4_FRAGMENT);
+ }
+ else
+ {
+ if (df && !mm->frag_ignore_df)
+ {
+ icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ mtu);
+ vlib_buffer_advance (b, sizeof (ip6_header_t));
+ *error = MAP_ERROR_DF_SET;
+ return (IP4_MAP_NEXT_ICMP_ERROR);
+ }
+ ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_FLAG_IP6_HEADER);
+ return (IP4_MAP_NEXT_IP6_FRAGMENT);
+ }
+}
+
+/*
+ * ip4_map
+ */
+static uword
+ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ map_domain_t *d0, *d1;
+ u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
+ ip4_header_t *ip40, *ip41;
+ u16 port0 = 0, port1 = 0;
+ ip6_header_t *ip6h0, *ip6h1;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
+ IP4_MAP_NEXT_IP6_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+ /* IPv4 + 8 = 28. possibly plus -40 */
+ CLIB_PREFETCH (p2->data - 40, 68, STORE);
+ CLIB_PREFETCH (p3->data - 40, 68, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+ map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ d0 = ip4_map_get_domain (map_domain_index0);
+ map_domain_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ d1 = ip4_map_get_domain (map_domain_index1);
+ ASSERT (d0);
+ ASSERT (d1);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
+ port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1);
+
+ /* Decrement IPv4 TTL */
+ ip4_map_decrement_ttl (ip40, &error0);
+ ip4_map_decrement_ttl (ip41, &error1);
+ bool df0 =
+ ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+ bool df1 =
+ ip41->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
+ u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16 (port0);
+ u16 dp41 = clib_net_to_host_u16 (port1);
+ u64 dal60 = map_get_pfx (d0, da40, dp40);
+ u64 dal61 = map_get_pfx (d1, da41, dp41);
+ u64 dar60 = map_get_sfx (d0, da40, dp40);
+ u64 dar61 = map_get_sfx (d1, da41, dp41);
+ if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
+ && next0 != IP4_MAP_NEXT_REASS)
+ error0 = MAP_ERROR_NO_BINDING;
+ if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE
+ && next1 != IP4_MAP_NEXT_REASS)
+ error1 = MAP_ERROR_NO_BINDING;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+ ip6h0 = vlib_buffer_get_current (p0);
+ ip6h1 = vlib_buffer_get_current (p1);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip40, p0);
+ ip6h1->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip41, p1);
+ ip6h0->payload_length = ip40->length;
+ ip6h1->payload_length = ip41->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h1->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h1->src_address = d1->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
+ ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
+ ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip6h0->payload_length) +
+ sizeof (*ip6h0) > d0->mtu)))
+ {
+ next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ }
+ else
+ {
+ next0 =
+ ip4_map_ip6_lookup_bypass (p0,
+ ip40) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip6h0->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d1->mtu
+ && (clib_net_to_host_u16 (ip6h1->payload_length) +
+ sizeof (*ip6h1) > d1->mtu)))
+ {
+ next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
+ }
+ else
+ {
+ next1 =
+ ip4_map_ip6_lookup_bypass (p1,
+ ip41) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next1;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ map_domain_index1, 1,
+ clib_net_to_host_u16
+ (ip6h1->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next1 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ u16 port0 = 0;
+ ip6_header_t *ip6h0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0 = ~0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ d0 = ip4_map_get_domain (map_domain_index0);
+ ASSERT (d0);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
+
+ /* Decrement IPv4 TTL */
+ ip4_map_decrement_ttl (ip40, &error0);
+ bool df0 =
+ ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16 (port0);
+ u64 dal60 = map_get_pfx (d0, da40, dp40);
+ u64 dar60 = map_get_sfx (d0, da40, dp40);
+ if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
+ && next0 != IP4_MAP_NEXT_REASS)
+ error0 = MAP_ERROR_NO_BINDING;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
+ ip6h0 = vlib_buffer_get_current (p0);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip40, p0);
+ ip6h0->payload_length = ip40->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip6h0->payload_length) +
+ sizeof (*ip6h0) > d0->mtu)))
+ {
+ next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ }
+ else
+ {
+ next0 =
+ ip4_map_ip6_lookup_bypass (p0,
+ ip40) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip6h0->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/*
+ * ip4_map_reass
+ */
+static uword
+ip4_map_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_reass_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ i32 port0 = 0;
+ ip6_header_t *ip60;
+ u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ ip40 = (ip4_header_t *) (ip60 + 1);
+ map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ d0 = ip4_map_get_domain (map_domain_index0);
+
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
+ ip40->dst_address.as_u32,
+ ip40->fragment_id,
+ ip40->protocol,
+ &fragments_to_drop);
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
+ {
+ if (r->port >= 0)
+ {
+ // We know the port already
+ port0 = r->port;
+ }
+ else if (map_ip4_reass_add_fragment (r, pi0))
+ {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ cached = 1;
+ }
+ }
+ else if ((port0 = ip4_get_port (ip40, 0)) == 0)
+ {
+ // Could not find port. We'll free the reassembly.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ r->port = port0;
+ map_ip4_reass_get_fragments (r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r)
+ {
+ r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
+ if (!ip4_get_fragment_more (ip40))
+ r->expected_total =
+ ip4_get_fragment_offset (ip40) * 8 +
+ clib_host_to_net_u16 (ip40->length) - 20;
+ if (r->forwarded >= r->expected_total)
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock ();
+
+ // NOTE: Most operations have already been performed by ip4_map
+ // All we need is the right destination address
+ ip60->dst_address.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, port0);
+ ip60->dst_address.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, port0);
+
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip4_map_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if (cached)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip60->payload_length) + 40);
+ next0 =
+ (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if (n_left_from == 0 && vec_len (fragments_to_loopback))
+ {
+ from = vlib_frame_vector_args (frame);
+ u32 len = vec_len (fragments_to_loopback);
+ if (len <= VLIB_FRAME_SIZE)
+ {
+ clib_memcpy (from, fragments_to_loopback,
+ sizeof (u32) * len);
+ n_left_from = len;
+ vec_reset_length (fragments_to_loopback);
+ }
+ else
+ {
+ clib_memcpy (from,
+ fragments_to_loopback + (len -
+ VLIB_FRAME_SIZE),
+ sizeof (u32) * VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP4_MAP_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_node) = {
+ .function = ip4_map,
+ .name = "ip4-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
+#endif
+ [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
+ [IP4_MAP_NEXT_REASS] = "ip4-map-reass",
+ [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP4_MAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_reass_node) = {
+ .function = ip4_map_reass,
+ .name = "ip4-map-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip4_map_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_REASS_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
new file mode 100644
index 00000000..b89840cc
--- /dev/null
+++ b/src/vnet/map/ip4_map_t.c
@@ -0,0 +1,812 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+#include <vnet/ip/ip4_to_ip6.h>
+
+#define IP4_MAP_T_DUAL_LOOP 1
+
+typedef enum
+{
+ IP4_MAPT_NEXT_MAPT_TCP_UDP,
+ IP4_MAPT_NEXT_MAPT_ICMP,
+ IP4_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP4_MAPT_NEXT_DROP,
+ IP4_MAPT_N_NEXT
+} ip4_mapt_next_t;
+
+typedef enum
+{
+ IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_ICMP_NEXT_IP6_FRAG,
+ IP4_MAPT_ICMP_NEXT_DROP,
+ IP4_MAPT_ICMP_N_NEXT
+} ip4_mapt_icmp_next_t;
+
+typedef enum
+{
+ IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
+ IP4_MAPT_TCP_UDP_NEXT_DROP,
+ IP4_MAPT_TCP_UDP_N_NEXT
+} ip4_mapt_tcp_udp_next_t;
+
+typedef enum
+{
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
+ IP4_MAPT_FRAGMENTED_NEXT_DROP,
+ IP4_MAPT_FRAGMENTED_N_NEXT
+} ip4_mapt_fragmented_next_t;
+
+//This is used to pass information within the buffer data.
+//Buffer structure being too small to contain big structures like this.
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip6_address_t daddr;
+ ip6_address_t saddr;
+ //IPv6 header + Fragmentation header will be here
+ //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
+ u8 unused[28];
+}) ip4_mapt_pseudo_header_t;
+/* *INDENT-ON* */
+
+
+static_always_inline int
+ip4_map_fragment_cache (ip4_header_t * ip4, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r =
+ map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol ==
+ IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock ();
+ return !r;
+}
+
+static_always_inline i32
+ip4_map_fragment_get_port (ip4_header_t * ip4)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r =
+ map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol ==
+ IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ i32 ret = r ? r->port : -1;
+ map_ip4_reass_unlock ();
+ return ret;
+}
+
+typedef struct
+{
+ map_domain_t *d;
+ u16 recv_port;
+} icmp_to_icmp6_ctx_t;
+
+static int
+ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+{
+ icmp_to_icmp6_ctx_t *ctx = arg;
+
+ ip4_map_t_embedded_address (ctx->d, &ip6->src_address, &ip4->src_address);
+ ip6->dst_address.as_u64[0] =
+ map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
+ ip6->dst_address.as_u64[1] =
+ map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
+
+ return 0;
+}
+
+static int
+ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *arg)
+{
+ icmp_to_icmp6_ctx_t *ctx = arg;
+
+ //Note that the source address is within the domain
+ //while the destination address is the one outside the domain
+ ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, &ip4->dst_address);
+ ip6->src_address.as_u64[0] =
+ map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port);
+ ip6->src_address.as_u64[1] =
+ map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port);
+
+ return 0;
+}
+
+static uword
+ip4_map_t_icmp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_mapt_icmp_next_t next0;
+ u8 error0;
+ map_domain_t *d0;
+ u16 len0;
+ icmp_to_icmp6_ctx_t ctx0;
+ ip4_header_t *ip40;
+
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t)); //The pseudo-header is not used
+ len0 =
+ clib_net_to_host_u16 (((ip4_header_t *)
+ vlib_buffer_get_current (p0))->length);
+ d0 =
+ pool_elt_at_index (map_main.domains,
+ vnet_buffer (p0)->map_t.map_domain_index);
+
+ ip40 = vlib_buffer_get_current (p0);
+ ctx0.recv_port = ip4_get_port (ip40, 1);
+ ctx0.d = d0;
+ if (ctx0.recv_port == 0)
+ {
+ // In case of 1:1 mapping, we don't care about the port
+ if (!(d0->ea_bits_len == 0 && d0->rules))
+ {
+ error0 = MAP_ERROR_ICMP;
+ goto err0;
+ }
+ }
+
+ if (icmp_to_icmp6
+ (p0, ip4_to_ip6_set_icmp_cb, &ctx0,
+ ip4_to_ip6_set_inner_icmp_cb, &ctx0))
+ {
+ error0 = MAP_ERROR_ICMP;
+ goto err0;
+ }
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
+ }
+ err0:
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ len0);
+ }
+ else
+ {
+ next0 = IP4_MAPT_ICMP_NEXT_DROP;
+ }
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static int
+ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx)
+{
+ ip4_mapt_pseudo_header_t *pheader = ctx;
+
+ ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
+ ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
+ ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
+ ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];
+
+ return 0;
+}
+
+static uword
+ip4_map_t_fragmented (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_fragmented_next_t next0;
+
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+
+ if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0))
+ {
+ p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip4_map_t_tcp_udp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index);
+
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+ ip4_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ pheader1 = vlib_buffer_get_current (p1);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+ vlib_buffer_advance (p1, sizeof (*pheader1));
+
+ if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0))
+ {
+ p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next0 = IP4_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+ }
+
+ if (ip4_to_ip6_tcp_udp (p1, ip4_to_ip6_set_cb, pheader1))
+ {
+ p1->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next1 = IP4_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.next_index =
+ IP6_FRAG_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+
+ if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0))
+ {
+ p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next0 = IP4_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0,
+ ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0,
+ u8 * error0, ip4_mapt_next_t * next0)
+{
+ if (PREDICT_FALSE (ip4_get_fragment_offset (ip40)))
+ {
+ *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *dst_port0 = 0;
+ }
+ else
+ {
+ *dst_port0 = ip4_map_fragment_get_port (ip40);
+ *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
+ }
+ }
+ else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP))
+ {
+ vnet_buffer (p0)->map_t.checksum_offset = 36;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
+ }
+ else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP))
+ {
+ vnet_buffer (p0)->map_t.checksum_offset = 26;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
+ }
+ else if (ip40->protocol == IP_PROTOCOL_ICMP)
+ {
+ *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ *dst_port0 = 0;
+ else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code
+ == ICMP4_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip40,
+ sizeof (*ip40)))->code == ICMP4_echo_request)
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6));
+ }
+ else
+ {
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip40, *ip41;
+ map_domain_t *d0, *d1;
+ ip4_mapt_next_t next0 = 0, next1 = 0;
+ u16 ip4_len0, ip4_len1;
+ u8 error0, error1;
+ i32 dst_port0, dst_port1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+ ip4_len0 = clib_host_to_net_u16 (ip40->length);
+ ip4_len1 = clib_host_to_net_u16 (ip41->length);
+
+ if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p1->current_length < ip4_len1 ||
+ ip41->ip_version_and_header_length != 0x45))
+ {
+ error1 = MAP_ERROR_UNKNOWN;
+ next1 = IP4_MAPT_NEXT_DROP;
+ }
+
+ vnet_buffer (p0)->map_t.map_domain_index =
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index);
+ vnet_buffer (p1)->map_t.map_domain_index =
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ d1 = ip4_map_get_domain (vnet_buffer (p1)->map_t.map_domain_index);
+
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ dst_port0 = -1;
+ dst_port1 = -1;
+
+ ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
+ &next0);
+ ip4_map_t_classify (p1, d1, ip41, ip4_len1, &dst_port1, &error1,
+ &next1);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance (p0, -sizeof (*pheader0));
+ vlib_buffer_advance (p1, -sizeof (*pheader1));
+ pheader0 = vlib_buffer_get_current (p0);
+ pheader1 = vlib_buffer_get_current (p1);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address (d0, &pheader0->saddr,
+ &ip40->src_address);
+ ip4_map_t_embedded_address (d1, &pheader1->saddr,
+ &ip41->src_address);
+ pheader0->daddr.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader0->daddr.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader1->daddr.as_u64[0] =
+ map_get_pfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1);
+ pheader1->daddr.as_u64[1] =
+ map_get_sfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1);
+
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip40) && (dst_port0 != -1)
+ && (d0->ea_bits_len != 0 || !d0->rules)
+ && ip4_map_fragment_cache (ip40, dst_port0)))
+ {
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip41) && (dst_port1 != -1)
+ && (d1->ea_bits_len != 0 || !d1->rules)
+ && ip4_map_fragment_cache (ip41, dst_port1)))
+ {
+ error1 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+
+ if (PREDICT_TRUE
+ (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ vnet_buffer (p1)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip41->length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ map_domain_t *d0;
+ ip4_mapt_next_t next0;
+ u16 ip4_len0;
+ u8 error0;
+ i32 dst_port0;
+ ip4_mapt_pseudo_header_t *pheader0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ ip4_len0 = clib_host_to_net_u16 (ip40->length);
+ if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ vnet_buffer (p0)->map_t.map_domain_index =
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index);
+
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ dst_port0 = -1;
+ ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
+ &next0);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance (p0, -sizeof (*pheader0));
+ pheader0 = vlib_buffer_get_current (p0);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address (d0, &pheader0->saddr,
+ &ip40->src_address);
+ pheader0->daddr.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader0->daddr.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+
+ //It is important to cache at this stage because the result might be necessary
+ //for packets within the same vector.
+ //Actually, this approach even provides some limited out-of-order fragments support
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip40) && (dst_port0 != -1)
+ && (d0->ea_bits_len != 0 || !d0->rules)
+ && ip4_map_fragment_cache (ip40, dst_port0)))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
+ .function = ip4_map_t_fragmented,
+ .name = "ip4-map-t-fragmented",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
+ .function = ip4_map_t_icmp,
+ .name = "ip4-map-t-icmp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
+ .function = ip4_map_t_tcp_udp,
+ .name = "ip4-map-t-tcp-udp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_node) = {
+ .function = ip4_map_t,
+ .name = "ip4-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
+ [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
+ [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
+ [IP4_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c
new file mode 100644
index 00000000..720d13c2
--- /dev/null
+++ b/src/vnet/map/ip6_map.c
@@ -0,0 +1,1265 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+#include <vnet/ip/ip4_to_ip6.h>
+#include <vnet/ip/ip6_to_ip4.h>
+
+enum ip6_map_next_e
+{
+ IP6_MAP_NEXT_IP4_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP6_MAP_NEXT_IP4_REWRITE,
+#endif
+ IP6_MAP_NEXT_IP6_REASS,
+ IP6_MAP_NEXT_IP4_REASS,
+ IP6_MAP_NEXT_IP4_FRAGMENT,
+ IP6_MAP_NEXT_IP6_ICMP_RELAY,
+ IP6_MAP_NEXT_IP6_LOCAL,
+ IP6_MAP_NEXT_DROP,
+ IP6_MAP_NEXT_ICMP,
+ IP6_MAP_N_NEXT,
+};
+
+enum ip6_map_ip6_reass_next_e
+{
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
+ IP6_MAP_IP6_REASS_NEXT_DROP,
+ IP6_MAP_IP6_REASS_N_NEXT,
+};
+
+enum ip6_map_ip4_reass_next_e
+{
+ IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
+ IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
+ IP6_MAP_IP4_REASS_NEXT_DROP,
+ IP6_MAP_IP4_REASS_N_NEXT,
+};
+
+enum ip6_icmp_relay_next_e
+{
+ IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
+ IP6_ICMP_RELAY_NEXT_DROP,
+ IP6_ICMP_RELAY_N_NEXT,
+};
+
+vlib_node_registration_t ip6_map_ip4_reass_node;
+vlib_node_registration_t ip6_map_ip6_reass_node;
+static vlib_node_registration_t ip6_map_icmp_relay_node;
+
+typedef struct
+{
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip6_map_ip4_reass_trace_t;
+
+u8 *
+format_ip6_map_ip4_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip4_reass_trace_t *t =
+ va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
+ return format (s, "MAP domain index: %d L4 port: %u Status: %s",
+ t->map_domain_index, t->port,
+ t->cached ? "cached" : "forwarded");
+}
+
+typedef struct
+{
+ u16 offset;
+ u16 frag_len;
+ u8 out;
+} map_ip6_map_ip6_reass_trace_t;
+
+u8 *
+format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip6_reass_trace_t *t =
+ va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
+ return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
+ t->frag_len, t->out ? "out" : "in");
+}
+
+/*
+ * ip6_map_sec_check
+ */
+static_always_inline bool
+ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
+ ip6_header_t * ip6)
+{
+ u16 sp4 = clib_net_to_host_u16 (port);
+ u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
+ u64 sal6 = map_get_pfx (d, sa4, sp4);
+ u64 sar6 = map_get_sfx (d, sa4, sp4);
+
+ if (PREDICT_FALSE
+ (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
+ || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
+ return (false);
+ return (true);
+}
+
+static_always_inline void
+ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4,
+ ip6_header_t * ip6, u32 * next, u8 * error)
+{
+ map_main_t *mm = &map_main;
+ if (d->ea_bits_len || d->rules)
+ {
+ if (d->psid_length > 0)
+ {
+ if (!ip4_is_fragment (ip4))
+ {
+ u16 port = ip4_get_port (ip4, 1);
+ if (port)
+ {
+ if (mm->sec_check)
+ *error =
+ ip6_map_sec_check (d, port, ip4,
+ ip6) ? MAP_ERROR_NONE :
+ MAP_ERROR_DECAP_SEC_CHECK;
+ }
+ else
+ {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ }
+ else
+ {
+ *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
+ }
+ }
+ }
+}
+
+static_always_inline bool
+ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
+ {
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+/*
+ * ip6_map
+ */
+static uword
+ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ u8 error0 = MAP_ERROR_NONE;
+ u8 error1 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0, *d1 = 0;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ u16 port0 = 0, port1 = 0;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+ u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* IPv6 + IPv4 header + 8 bytes of ULP */
+ CLIB_PREFETCH (p2->data, 68, LOAD);
+ CLIB_PREFETCH (p3->data, 68, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ vlib_buffer_advance (p0, sizeof (ip6_header_t));
+ vlib_buffer_advance (p1, sizeof (ip6_header_t));
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE
+ (ip60->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip60->payload_length) > 20))
+ {
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->
+ src_address.as_u32, &map_domain_index0,
+ &error0);
+ }
+ else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip60->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+ if (PREDICT_TRUE
+ (ip61->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip61->payload_length) > 20))
+ {
+ d1 =
+ ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip41->
+ src_address.as_u32, &map_domain_index1,
+ &error1);
+ }
+ else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip61->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip61 + 1);
+ next1 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ next1 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error1 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ if (d0)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next0 =
+ ip6_map_ip4_lookup_bypass (p0,
+ ip40) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+ }
+ if (d1)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d1, ip41, ip61, &next1, &error1);
+
+ if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
+ next1 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d1->mtu
+ && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
+ {
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.flags = 0;
+ vnet_buffer (p1)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
+ next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next1 =
+ ip6_map_ip4_lookup_bypass (p1,
+ ip41) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next1;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ map_domain_index1, 1,
+ clib_net_to_host_u16
+ (ip41->length));
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next0 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
+ }
+
+ if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next1 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
+ }
+
+ /* Reset packet */
+ if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (ip6_header_t));
+ ip40 = vlib_buffer_get_current (p0);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE
+ (ip60->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip60->payload_length) > 20))
+ {
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->
+ src_address.as_u32, &map_domain_index0,
+ &error0);
+ }
+ else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip60->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
+ (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
+ IP_PROTOCOL_IP_IN_IP))
+ {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ if (d0)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next0 =
+ ip6_map_ip4_lookup_bypass (p0,
+ ip40) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = (u16) port0;
+ }
+
+ if (mm->icmp6_enabled &&
+ (error0 == MAP_ERROR_DECAP_SEC_CHECK
+ || error0 == MAP_ERROR_NO_DOMAIN))
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next0 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
+ }
+
+ /* Reset packet */
+ if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+static_always_inline void
+ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node,
+ map_ip6_reass_t * r, u32 ** fragments_ready,
+ u32 ** fragments_to_drop)
+{
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ vlib_buffer_t *p0;
+
+ if (!r->ip4_header.ip_version_and_header_length)
+ return;
+
+ //The IP header is here, we need to check for packets
+ //that can be forwarded
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ if (r->fragments[i].pi == ~0 ||
+ ((!r->fragments[i].next_data_len)
+ && (r->fragments[i].next_data_offset != (0xffff))))
+ continue;
+
+ p0 = vlib_get_buffer (vm, r->fragments[i].pi);
+ ip60 = vlib_buffer_get_current (p0);
+ frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
+ ip40 = (ip4_header_t *) (frag0 + 1);
+
+ if (ip6_frag_hdr_offset (frag0))
+ {
+ //Not first fragment, add the IPv4 header
+ clib_memcpy (ip40, &r->ip4_header, 20);
+ }
+
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->forwarded +=
+ clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
+#endif
+
+ if (ip6_frag_hdr_more (frag0))
+ {
+ //Not last fragment, we copy end of next
+ clib_memcpy (u8_ptr_add (ip60, p0->current_length),
+ r->fragments[i].next_data, 20);
+ p0->current_length += 20;
+ ip60->payload_length = u16_net_add (ip60->payload_length, 20);
+ }
+
+ if (!ip4_is_fragment (ip40))
+ {
+ ip40->fragment_id = frag_id_6to4 (frag0->identification);
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0));
+ }
+ else
+ {
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) +
+ ip6_frag_hdr_offset (frag0));
+ }
+
+ if (ip6_frag_hdr_more (frag0))
+ ip40->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip40->length =
+ clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) -
+ sizeof (*frag0));
+ ip40->checksum = ip4_header_checksum (ip40);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip6_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->offset = ip4_get_fragment_offset (ip40);
+ tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40);
+ tr->out = 1;
+ }
+
+ vec_add1 (*fragments_ready, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ map_main.ip6_reass_buffered_counter--;
+
+ //TODO: Best solution would be that ip6_map handles extension headers
+ // and ignores atomic fragment. But in the meantime, let's just copy the header.
+
+ u8 protocol = frag0->next_hdr;
+ memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60));
+ ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol =
+ protocol;
+ vlib_buffer_advance (p0, sizeof (*frag0));
+ }
+}
+
+void
+map_ip6_drop_pi (u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *n =
+ vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
+ vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
+}
+
+void
+map_ip4_drop_pi (u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *n =
+ vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
+ vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
+}
+
+/*
+ * ip6_reass
+ * TODO: We should count the number of successfully
+ * transmitted fragment bytes and compare that to the last fragment
+ * offset such that we can free the reassembly structure when all fragments
+ * have been forwarded.
+ */
+static uword
+ip6_map_ip6_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_ready = NULL;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ u16 offset;
+ u16 next_offset;
+ u16 frag_len;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
+ offset =
+ clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7);
+ frag_len =
+ clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
+ next_offset =
+ ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff);
+
+ //FIXME: Support other extension headers, maybe
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip6_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->offset = offset;
+ tr->frag_len = frag_len;
+ tr->out = 0;
+ }
+
+ map_ip6_reass_lock ();
+ map_ip6_reass_t *r =
+ map_ip6_reass_get (&ip60->src_address, &ip60->dst_address,
+ frag0->identification, frag0->next_hdr,
+ &fragments_to_drop);
+ //FIXME: Use better error codes
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_FALSE ((frag_len <= 20 &&
+ (ip6_frag_hdr_more (frag0) || (!offset)))))
+ {
+ //Very small fragment are restricted to the last one and
+ //can't be the first one
+ error0 = MAP_ERROR_FRAGMENT_MALFORMED;
+ }
+ else
+ if (map_ip6_reass_add_fragment
+ (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len))
+ {
+ map_ip6_reass_free (r, &fragments_to_drop);
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else
+ {
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if (!ip6_frag_hdr_more (frag0))
+ r->expected_total = offset + frag_len;
+#endif
+ ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready,
+ &fragments_to_drop);
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if (r->forwarded >= r->expected_total)
+ map_ip6_reass_free (r, &fragments_to_drop);
+#endif
+ }
+ map_ip6_reass_unlock ();
+
+ if (error0 == MAP_ERROR_NONE)
+ {
+ if (frag_len > 20)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ //All data from that packet was copied no need to keep it, but this is not an error
+ p0->error = error_node->errors[MAP_ERROR_NONE];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0,
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ }
+ else
+ {
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node (vm, fragments_ready, node,
+ &error_node->errors[MAP_ERROR_NONE],
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_ready);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_ip4_virt_reass
+ */
+static uword
+ip6_map_ip4_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0 = ~0;
+ u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ ip60 = ((ip6_header_t *) ip40) - 1;
+
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->src_address.as_u32,
+ &map_domain_index0, &error0);
+
+ map_ip4_reass_lock ();
+ //This node only deals with fragmented ip4
+ map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
+ ip40->dst_address.as_u32,
+ ip40->fragment_id,
+ ip40->protocol,
+ &fragments_to_drop);
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
+ {
+ // This is a fragment
+ if (r->port >= 0)
+ {
+ // We know the port already
+ port0 = r->port;
+ }
+ else if (map_ip4_reass_add_fragment (r, pi0))
+ {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ cached = 1;
+ }
+ }
+ else if ((port0 = ip4_get_port (ip40, 1)) == 0)
+ {
+ // Could not find port from first fragment. Stop reassembling.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ // Found port. Remember it and loopback saved fragments
+ r->port = port0;
+ map_ip4_reass_get_fragments (r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r)
+ {
+ r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
+ if (!ip4_get_fragment_more (ip40))
+ r->expected_total =
+ ip4_get_fragment_offset (ip40) * 8 +
+ clib_host_to_net_u16 (ip40->length) - 20;
+ if (r->forwarded >= r->expected_total)
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock ();
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ error0 =
+ ip6_map_sec_check (d0, port0, ip40,
+ ip60) ? MAP_ERROR_NONE :
+ MAP_ERROR_DECAP_SEC_CHECK;
+
+ if (PREDICT_FALSE
+ (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
+ && error0 == MAP_ERROR_NONE && !cached))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip4_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if (cached)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ next0 =
+ (error0 ==
+ MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if (n_left_from == 0 && vec_len (fragments_to_loopback))
+ {
+ from = vlib_frame_vector_args (frame);
+ u32 len = vec_len (fragments_to_loopback);
+ if (len <= VLIB_FRAME_SIZE)
+ {
+ clib_memcpy (from, fragments_to_loopback,
+ sizeof (u32) * len);
+ n_left_from = len;
+ vec_reset_length (fragments_to_loopback);
+ }
+ else
+ {
+ clib_memcpy (from,
+ fragments_to_loopback + (len -
+ VLIB_FRAME_SIZE),
+ sizeof (u32) * VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP4_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_icmp_relay
+ */
+static uword
+ip6_map_icmp_relay (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
+ map_main_t *mm = &map_main;
+ u32 thread_index = vlib_get_thread_index ();
+ u16 *fragment_ids, *fid;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids =
+ clib_random_buffer_get_data (&vm->random_buffer,
+ n_left_from * sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
+ u32 mtu;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
+
+ /*
+ * In:
+ * IPv6 header (40)
+ * ICMPv6 header (8)
+ * IPv6 header (40)
+ * Original IPv4 header / packet
+ * Out:
+ * New IPv4 header
+ * New ICMP header
+ * Original IPv4 header / packet
+ */
+
+ /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
+ if (tlen < 76)
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
+ ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
+
+ if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
+ vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
+ ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
+ icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
+
+ /*
+ * Relay according to RFC2473, section 8.3
+ */
+ switch (icmp60->type)
+ {
+ case ICMP6_destination_unreachable:
+ case ICMP6_time_exceeded:
+ case ICMP6_parameter_problem:
+ /* Type 3 - destination unreachable, Code 1 - host unreachable */
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code =
+ ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+
+ case ICMP6_packet_too_big:
+ /* Type 3 - destination unreachable, Code 4 - packet too big */
+ /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
+ mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
+
+ /* Check DF flag */
+ if (!
+ (inner_ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code =
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
+ *((u32 *) (new_icmp40 + 1)) =
+ clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
+ break;
+
+ default:
+ error0 = MAP_ERROR_ICMP_RELAY;
+ break;
+ }
+
+ /*
+ * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
+ */
+ new_ip40->ip_version_and_header_length = 0x45;
+ new_ip40->tos = 0;
+ u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
+ new_ip40->length = clib_host_to_net_u16 (nlen);
+ new_ip40->fragment_id = fid[0];
+ fid++;
+ new_ip40->ttl = 64;
+ new_ip40->protocol = IP_PROTOCOL_ICMP;
+ new_ip40->src_address = mm->icmp4_src_address;
+ new_ip40->dst_address = inner_ip40->src_address;
+ new_ip40->checksum = ip4_header_checksum (new_ip40);
+
+ new_icmp40->checksum = 0;
+ ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
+ new_icmp40->checksum = ~ip_csum_fold (sum);
+
+ vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
+ 1);
+
+ error:
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = 0;
+ tr->port = 0;
+ }
+
+ next0 =
+ (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_node) = {
+ .function = ip6_map,
+ .name = "ip6-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP6_MAP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
+#endif
+ [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
+ [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
+ [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
+ [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
+ [IP6_MAP_NEXT_DROP] = "error-drop",
+ [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
+ .function = ip6_map_ip6_reass,
+ .name = "ip6-map-ip6-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip6_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
+ [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
+ .function = ip6_map_ip4_reass,
+ .name = "ip6-map-ip4-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip4_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
+ .function = ip6_map_icmp_relay,
+ .name = "ip6-map-icmp-relay",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace, //FIXME
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c
new file mode 100644
index 00000000..b173bb2a
--- /dev/null
+++ b/src/vnet/map/ip6_map_t.c
@@ -0,0 +1,1039 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+#include <vnet/ip/ip6_to_ip4.h>
+#include <vnet/ip/ip4_to_ip6.h>
+
+#define IP6_MAP_T_DUAL_LOOP
+
+typedef enum
+{
+ IP6_MAPT_NEXT_MAPT_TCP_UDP,
+ IP6_MAPT_NEXT_MAPT_ICMP,
+ IP6_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP6_MAPT_NEXT_DROP,
+ IP6_MAPT_N_NEXT
+} ip6_mapt_next_t;
+
+typedef enum
+{
+ IP6_MAPT_ICMP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_ICMP_NEXT_IP4_FRAG,
+ IP6_MAPT_ICMP_NEXT_DROP,
+ IP6_MAPT_ICMP_N_NEXT
+} ip6_mapt_icmp_next_t;
+
+typedef enum
+{
+ IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG,
+ IP6_MAPT_TCP_UDP_NEXT_DROP,
+ IP6_MAPT_TCP_UDP_N_NEXT
+} ip6_mapt_tcp_udp_next_t;
+
+typedef enum
+{
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP,
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG,
+ IP6_MAPT_FRAGMENTED_NEXT_DROP,
+ IP6_MAPT_FRAGMENTED_N_NEXT
+} ip6_mapt_fragmented_next_t;
+
+static_always_inline int
+ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag,
+ map_domain_t * d, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address),
+ ip6_map_t_embedded_address (d,
+ &ip6->
+ dst_address),
+ frag_id_6to4 (frag->identification),
+ (ip6->protocol ==
+ IP_PROTOCOL_ICMP6) ?
+ IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock ();
+ return !r;
+}
+
+/* Returns the associated port or -1 */
+static_always_inline i32
+ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag,
+ map_domain_t * d)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address),
+ ip6_map_t_embedded_address (d,
+ &ip6->
+ dst_address),
+ frag_id_6to4 (frag->identification),
+ (ip6->protocol ==
+ IP_PROTOCOL_ICMP6) ?
+ IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ i32 ret = r ? r->port : -1;
+ map_ip4_reass_unlock ();
+ return ret;
+}
+
+typedef struct
+{
+ map_domain_t *d;
+ u16 sender_port;
+} icmp6_to_icmp_ctx_t;
+
+static int
+ip6_to_ip4_set_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
+{
+ icmp6_to_icmp_ctx_t *ctx = arg;
+ u32 ip4_sadr;
+
+ //Security check
+ //Note that this prevents an intermediate IPv6 router from answering the request
+ ip4_sadr = map_get_ip4 (&ip6->src_address);
+ if (ip6->src_address.as_u64[0] !=
+ map_get_pfx_net (ctx->d, ip4_sadr, ctx->sender_port)
+ || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_sadr,
+ ctx->sender_port))
+ return -1;
+
+ ip4->dst_address.as_u32 =
+ ip6_map_t_embedded_address (ctx->d, &ip6->dst_address);
+ ip4->src_address.as_u32 = ip4_sadr;
+
+ return 0;
+}
+
+static int
+ip6_to_ip4_set_inner_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *arg)
+{
+ icmp6_to_icmp_ctx_t *ctx = arg;
+ u32 inner_ip4_dadr;
+
+ //Security check of inner packet
+ inner_ip4_dadr = map_get_ip4 (&ip6->dst_address);
+ if (ip6->dst_address.as_u64[0] !=
+ map_get_pfx_net (ctx->d, inner_ip4_dadr, ctx->sender_port)
+ || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d,
+ inner_ip4_dadr,
+ ctx->sender_port))
+ return -1;
+
+ ip4->dst_address.as_u32 = inner_ip4_dadr;
+ ip4->src_address.as_u32 =
+ ip6_map_t_embedded_address (ctx->d, &ip6->src_address);
+
+ return 0;
+}
+
+static uword
+ip6_map_t_icmp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0;
+ ip6_mapt_icmp_next_t next0;
+ map_domain_t *d0;
+ u16 len0;
+ icmp6_to_icmp_ctx_t ctx0;
+ ip6_header_t *ip60;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16 (ip60->payload_length);
+ d0 =
+ pool_elt_at_index (map_main.domains,
+ vnet_buffer (p0)->map_t.map_domain_index);
+ ctx0.sender_port = ip6_get_port (ip60, 0, len0);
+ ctx0.d = d0;
+ if (ctx0.sender_port == 0)
+ {
+ // In case of 1:1 mapping, we don't care about the port
+ if (!(d0->ea_bits_len == 0 && d0->rules))
+ {
+ error0 = MAP_ERROR_ICMP;
+ goto err0;
+ }
+ }
+
+ if (icmp6_to_icmp
+ (p0, ip6_to_ip4_set_icmp_cb, d0, ip6_to_ip4_set_inner_icmp_cb,
+ d0))
+ {
+ error0 = MAP_ERROR_ICMP;
+ goto err0;
+ }
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
+ }
+ err0:
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ len0);
+ }
+ else
+ {
+ next0 = IP6_MAPT_ICMP_NEXT_DROP;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static int
+ip6_to_ip4_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *ctx)
+{
+ vlib_buffer_t *p = ctx;
+
+ ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr;
+ ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr;
+
+ return 0;
+}
+
+static uword
+ip6_map_t_fragmented (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_fragmented_node.index);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ u32 next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0))
+ {
+ p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+ }
+
+ if (ip6_to_ip4_fragmented (p1, ip6_to_ip4_set_cb, p1))
+ {
+ p1->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
+ next1 = IP6_MAPT_FRAGMENTED_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u32 next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+
+ if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0))
+ {
+ p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip6_map_t_tcp_udp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_tcp_udp_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1))
+ {
+ p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next0 = IP6_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+ }
+
+ if (ip6_to_ip4_tcp_udp (p1, ip6_to_ip4_set_cb, p1, 1))
+ {
+ p1->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next1 = IP6_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1))
+ {
+ p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
+ next0 = IP6_MAPT_TCP_UDP_NEXT_DROP;
+ }
+ else
+ {
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip6_map_t_classify (vlib_buffer_t * p0, ip6_header_t * ip60,
+ map_domain_t * d0, i32 * src_port0,
+ u8 * error0, ip6_mapt_next_t * next0,
+ u32 l4_len0, ip6_frag_hdr_t * frag0)
+{
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset (frag0)))
+ {
+ *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *src_port0 = 0;
+ }
+ else
+ {
+ *src_port0 = ip6_map_fragment_get (ip60, frag0, d0);
+ *error0 = (*src_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED;
+ }
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP))
+ {
+ *error0 =
+ l4_len0 < sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 16;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 =
+ (i32) *
+ ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP))
+ {
+ *error0 =
+ l4_len0 < sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 =
+ (i32) *
+ ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else if (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)
+ {
+ *error0 =
+ l4_len0 < sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ *next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *src_port0 = 0;
+ }
+ else
+ if (((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_request)
+ {
+ *src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset + 6));
+ }
+ }
+ else
+ {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_node.index);
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ u8 error0, error1;
+ ip6_mapt_next_t next0, next1;
+ u32 l4_len0, l4_len1;
+ i32 src_port0, src_port1;
+ map_domain_t *d0, *d1;
+ ip6_frag_hdr_t *frag0, *frag1;
+ u32 saddr0, saddr1;
+ next0 = next1 = 0; //Because compiler whines
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+
+ saddr0 = map_get_ip4 (&ip60->src_address);
+ saddr1 = map_get_ip4 (&ip61->src_address);
+ d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr0,
+ &vnet_buffer (p0)->map_t.map_domain_index,
+ &error0);
+ d1 =
+ ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr1,
+ &vnet_buffer (p1)->map_t.map_domain_index,
+ &error1);
+
+ vnet_buffer (p0)->map_t.v6.saddr = saddr0;
+ vnet_buffer (p1)->map_t.v6.saddr = saddr1;
+ vnet_buffer (p0)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d0, &ip60->dst_address);
+ vnet_buffer (p1)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d1, &ip61->dst_address);
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length,
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p0)->map_t.
+ v6.frag_offset))))
+ {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (ip6_parse (ip61, p1->current_length,
+ &(vnet_buffer (p1)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p1)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p1)->map_t.
+ v6.frag_offset))))
+ {
+ error1 = MAP_ERROR_MALFORMED;
+ next1 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = src_port1 = -1;
+ l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset;
+ l4_len1 = (u32) clib_net_to_host_u16 (ip61->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p1)->map_t.v6.l4_offset;
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+ frag1 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.
+ v6.frag_offset);
+
+ ip6_map_t_classify (p0, ip60, d0, &src_port0, &error0, &next0,
+ l4_len0, frag0);
+ ip6_map_t_classify (p1, ip61, d1, &src_port1, &error1, &next1,
+ l4_len1, frag1);
+
+ if (PREDICT_FALSE
+ ((src_port0 != -1)
+ && (ip60->src_address.as_u64[0] !=
+ map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr,
+ src_port0)
+ || ip60->src_address.as_u64[1] != map_get_sfx_net (d0,
+ vnet_buffer
+ (p0)->map_t.v6.saddr,
+ src_port0))))
+ {
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE
+ ((src_port1 != -1)
+ && (ip61->src_address.as_u64[0] !=
+ map_get_pfx_net (d1, vnet_buffer (p1)->map_t.v6.saddr,
+ src_port1)
+ || ip61->src_address.as_u64[1] != map_get_sfx_net (d1,
+ vnet_buffer
+ (p1)->map_t.v6.saddr,
+ src_port1))))
+ {
+ error1 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset)))
+ && (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules)
+ && (error0 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip60,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip61,
+ vnet_buffer
+ (p1)->map_t.
+ v6.frag_offset)))
+ && (src_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules)
+ && (error1 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip61,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer
+ (p1)->map_t.
+ v6.frag_offset),
+ d1, src_port1);
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip60->payload_length));
+ }
+
+ if (PREDICT_TRUE
+ (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ vnet_buffer (p1)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip61->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u8 error0;
+ u32 l4_len0;
+ i32 src_port0;
+ map_domain_t *d0;
+ ip6_frag_hdr_t *frag0;
+ ip6_mapt_next_t next0 = 0;
+ u32 saddr;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ //Save saddr in a different variable to not overwrite ip.adj_index
+ saddr = map_get_ip4 (&ip60->src_address);
+ d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr,
+ &vnet_buffer (p0)->map_t.map_domain_index,
+ &error0);
+
+ //FIXME: What if d0 is null
+ vnet_buffer (p0)->map_t.v6.saddr = saddr;
+ vnet_buffer (p0)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d0, &ip60->dst_address);
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length,
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p0)->map_t.
+ v6.frag_offset))))
+ {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = -1;
+ l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset;
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset (frag0)))
+ {
+ src_port0 = ip6_map_fragment_get (ip60, frag0, d0);
+ error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY;
+ next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP))
+ {
+ error0 =
+ l4_len0 <
+ sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 16;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP))
+ {
+ error0 =
+ l4_len0 <
+ sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else if (vnet_buffer (p0)->map_t.v6.l4_protocol ==
+ IP_PROTOCOL_ICMP6)
+ {
+ error0 =
+ l4_len0 <
+ sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : error0;
+ next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if (((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.
+ l4_offset))->code == ICMP6_echo_request)
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6));
+ }
+ else
+ {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ //Security check
+ if (PREDICT_FALSE
+ ((src_port0 != -1)
+ && (ip60->src_address.as_u64[0] !=
+ map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr,
+ src_port0)
+ || ip60->src_address.as_u64[1] != map_get_sfx_net (d0,
+ vnet_buffer
+ (p0)->map_t.v6.saddr,
+ src_port0))))
+ {
+ //Security check when src_port0 is not zero (non-first fragment, UDP or TCP)
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ //Fragmented first packet needs to be cached for following packets
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset)))
+ && (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules)
+ && (error0 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip60,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ thread_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip60->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
+ .function = ip6_map_t_fragmented,
+ .name = "ip6-map-t-fragmented",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
+ .function = ip6_map_t_icmp,
+ .name = "ip6-map-t-icmp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
+ .function = ip6_map_t_tcp_udp,
+ .name = "ip6-map-t-tcp-udp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_node) = {
+ .function = ip6_map_t,
+ .name = "ip6-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp",
+ [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp",
+ [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented",
+ [IP6_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api
new file mode 100644
index 00000000..4b142c8f
--- /dev/null
+++ b/src/vnet/map/map.api
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief Add MAP domains
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip6_prefix - Rule IPv6 prefix
+ @param ip4_prefix - Rule IPv4 prefix
+ @param ip6_src - MAP domain IPv6 BR address / Tunnel source
+ @param ip6_prefix_len - Rule IPv6 prefix length
+ @param ip4_prefix_len - Rule IPv4 prefix length
+ @param ea_bits_len - Embedded Address bits length
+ @param psid_offset - Port Set Identifider (PSID) offset
+ @param psid_length - PSID length
+ @param is_translation - MAP-E / MAP-T
+ @param mtu - MTU
+*/
+define map_add_domain
+{
+ u32 client_index;
+ u32 context;
+ u8 ip6_prefix[16];
+ u8 ip4_prefix[4];
+ u8 ip6_src[16];
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+ u8 ip6_src_prefix_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+ u8 is_translation;
+ u16 mtu;
+};
+
+/** \brief Reply for MAP domain add
+ @param context - returned sender context, to match reply w/ request
+ @param index - MAP domain index
+ @param retval - return code
+*/
+define map_add_domain_reply
+{
+ u32 context;
+ u32 index;
+ i32 retval;
+};
+
+/** \brief Delete MAP domain
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param index - MAP Domain index
+*/
+autoreply define map_del_domain
+{
+ u32 client_index;
+ u32 context;
+ u32 index;
+};
+
+
+/** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param index - MAP Domain index
+ @param is_add - If 1 add rule, if 0 delete rule
+ @param ip6_dst - MAP CE IPv6 address
+ @param psid - Rule PSID
+*/
+autoreply define map_add_del_rule
+{
+ u32 client_index;
+ u32 context;
+ u32 index;
+ u8 is_add;
+ u8 ip6_dst[16];
+ u16 psid;
+};
+
+
+/** \brief Get list of map domains
+ @param client_index - opaque cookie to identify the sender
+*/
+define map_domain_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define map_domain_details
+{
+ u32 context;
+ u32 domain_index;
+ u8 ip6_prefix[16];
+ u8 ip4_prefix[4];
+ u8 ip6_src[16];
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+ u8 ip6_src_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+ u8 flags;
+ u16 mtu;
+ u8 is_translation;
+};
+
+define map_rule_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 domain_index;
+};
+
+define map_rule_details
+{
+ u32 context;
+ u8 ip6_dst[16];
+ u16 psid;
+};
+
+/** \brief Request for a single block of summary stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define map_summary_stats
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for map_summary_stats request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for request
+ @param total_bindings -
+ @param total_pkts -
+ @param total_ip4_fragments -
+ @param total_security_check -
+*/
+define map_summary_stats_reply
+{
+ u32 context;
+ i32 retval;
+ u64 total_bindings;
+ u64 total_pkts[2];
+ u64 total_bytes[2];
+ u64 total_ip4_fragments;
+ u64 total_security_check[2];
+};
diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c
new file mode 100644
index 00000000..0c8cd6cd
--- /dev/null
+++ b/src/vnet/map/map.c
@@ -0,0 +1,2287 @@
+/*
+ * map.c : MAP support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+#include <vppinfra/crc32.h>
+
+#include "map.h"
+
+/*
+ * This code supports the following MAP modes:
+ *
+ * Algorithmic Shared IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix > 32
+ * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic Full IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix = 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic IPv4 prefix (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix < 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ *
+ * Independent Shared IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length > 0
+ * Rule IPv6 address = 128, Rule PSID Set
+ * Independent Full IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length = 0, ip6_prefix = 128
+ * Independent IPv4 prefix (ea_bits_len = 0):
+ * ip4_prefix < 32
+ * psid_length = 0, ip6_prefix = 128
+ *
+ */
+
+/*
+ * This code supports MAP-T:
+ *
+ * With DMR prefix length equal to 96.
+ *
+ */
+
+
+
+int
+map_create_domain (ip4_address_t * ip4_prefix,
+ u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix,
+ u8 ip6_prefix_len,
+ ip6_address_t * ip6_src,
+ u8 ip6_src_len,
+ u8 ea_bits_len,
+ u8 psid_offset,
+ u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags)
+{
+ u8 suffix_len, suffix_shift;
+ map_main_t *mm = &map_main;
+ dpo_id_t dpo_v4 = DPO_INVALID;
+ dpo_id_t dpo_v6 = DPO_INVALID;
+ map_domain_t *d;
+
+ /* Sanity check on the src prefix length */
+ if (flags & MAP_DOMAIN_TRANSLATION)
+ {
+ if (ip6_src_len != 96)
+ {
+ clib_warning ("MAP-T only supports ip6_src_len = 96 for now.");
+ return -1;
+ }
+ }
+ else
+ {
+ if (ip6_src_len != 128)
+ {
+ clib_warning
+ ("MAP-E requires a BR address, not a prefix (ip6_src_len should "
+ "be 128).");
+ return -1;
+ }
+ }
+
+ /* How many, and which bits to grab from the IPv4 DA */
+ if (ip4_prefix_len + ea_bits_len < 32)
+ {
+ flags |= MAP_DOMAIN_PREFIX;
+ suffix_shift = 32 - ip4_prefix_len - ea_bits_len;
+ suffix_len = ea_bits_len;
+ }
+ else
+ {
+ suffix_shift = 0;
+ suffix_len = 32 - ip4_prefix_len;
+ }
+
+ /* EA bits must be within the first 64 bits */
+ if (ea_bits_len > 0 && ((ip6_prefix_len + ea_bits_len) > 64 ||
+ ip6_prefix_len + suffix_len + psid_length > 64))
+ {
+ clib_warning
+ ("Embedded Address bits must be within the first 64 bits of "
+ "the IPv6 prefix");
+ return -1;
+ }
+
+ /* Get domain index */
+ pool_get_aligned (mm->domains, d, CLIB_CACHE_LINE_BYTES);
+ memset (d, 0, sizeof (*d));
+ *map_domain_index = d - mm->domains;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip6_src = *ip6_src;
+ d->ip6_src_len = ip6_src_len;
+ d->ea_bits_len = ea_bits_len;
+ d->psid_offset = psid_offset;
+ d->psid_length = psid_length;
+ d->mtu = mtu;
+ d->flags = flags;
+ d->suffix_shift = suffix_shift;
+ d->suffix_mask = (1 << suffix_len) - 1;
+
+ d->psid_shift = 16 - psid_length - psid_offset;
+ d->psid_mask = (1 << d->psid_length) - 1;
+ d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
+
+ /* MAP data-plane object */
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+ else
+ map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+
+ /* Create ip4 route */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_dpo_add (0, &pfx,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
+ dpo_reset (&dpo_v4);
+
+ /*
+ * construct a DPO to use the v6 domain
+ */
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
+ else
+ map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
+
+ /*
+ * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy.
+ * We are not tracking the sharing. So a v4 lookup to find the correct
+ * domain post decap/trnaslate is always done
+ *
+ * Create ip6 route. This is a reference counted add. If the prefix
+ * already exists and is MAP sourced, it is now MAP source n+1 times
+ * and will need to be removed n+1 times.
+ */
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr.ip6 = d->ip6_src,
+ };
+
+ fib_table_entry_special_dpo_add (0, &pfx6,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6);
+ dpo_reset (&dpo_v6);
+
+ /* Validate packet/byte counters */
+ map_domain_counter_lock (mm);
+ int i;
+ for (i = 0; i < vec_len (mm->simple_domain_counters); i++)
+ {
+ vlib_validate_simple_counter (&mm->simple_domain_counters[i],
+ *map_domain_index);
+ vlib_zero_simple_counter (&mm->simple_domain_counters[i],
+ *map_domain_index);
+ }
+ for (i = 0; i < vec_len (mm->domain_counters); i++)
+ {
+ vlib_validate_combined_counter (&mm->domain_counters[i],
+ *map_domain_index);
+ vlib_zero_combined_counter (&mm->domain_counters[i], *map_domain_index);
+ }
+ map_domain_counter_unlock (mm);
+
+ return 0;
+}
+
+/*
+ * map_delete_domain
+ */
+int
+map_delete_domain (u32 map_domain_index)
+{
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ clib_warning ("MAP domain delete: domain does not exist: %d",
+ map_domain_index);
+ return -1;
+ }
+
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP);
+
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr = {
+ .ip6 = d->ip6_src,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP);
+
+ /* Deleting rules */
+ if (d->rules)
+ clib_mem_free (d->rules);
+
+ pool_put (mm->domains, d);
+
+ return 0;
+}
+
+int
+map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
+ u8 is_add)
+{
+ map_domain_t *d;
+ map_main_t *mm = &map_main;
+
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ clib_warning ("MAP rule: domain does not exist: %d", map_domain_index);
+ return -1;
+ }
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+
+ /* Rules are only used in 1:1 independent case */
+ if (d->ea_bits_len > 0)
+ return (-1);
+
+ if (!d->rules)
+ {
+ u32 l = (0x1 << d->psid_length) * sizeof (ip6_address_t);
+ d->rules = clib_mem_alloc_aligned (l, CLIB_CACHE_LINE_BYTES);
+ if (!d->rules)
+ return -1;
+ memset (d->rules, 0, l);
+ }
+
+ if (psid >= (0x1 << d->psid_length))
+ {
+ clib_warning ("MAP rule: PSID outside bounds: %d [%d]", psid,
+ 0x1 << d->psid_length);
+ return -1;
+ }
+
+ if (is_add)
+ {
+ d->rules[psid] = *tep;
+ }
+ else
+ {
+ memset (&d->rules[psid], 0, sizeof (ip6_address_t));
+ }
+ return 0;
+}
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+/**
+ * Pre-resolvd per-protocol global next-hops
+ */
+map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX];
+
+static void
+map_pre_resolve_init (map_main_pre_resolved_t * pr)
+{
+ pr->fei = FIB_NODE_INDEX_INVALID;
+ fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E);
+}
+
+static u8 *
+format_map_pre_resolve (u8 * s, va_list ap)
+{
+ map_main_pre_resolved_t *pr = va_arg (ap, map_main_pre_resolved_t *);
+
+ if (FIB_NODE_INDEX_INVALID != pr->fei)
+ {
+ fib_prefix_t pfx;
+
+ fib_entry_get_prefix (pr->fei, &pfx);
+
+ return (format (s, "%U (%u)",
+ format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY,
+ pr->dpo.dpoi_index));
+ }
+ else
+ {
+ return (format (s, "un-set"));
+ }
+}
+
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+map_last_lock_gone (fib_node_t * node)
+{
+ /*
+ * The MAP is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT (0);
+}
+
+static map_main_pre_resolved_t *
+map_from_fib_node (fib_node_t * node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type);
+#endif
+ return ((map_main_pre_resolved_t *)
+ (((char *) node) -
+ STRUCT_OFFSET_OF (map_main_pre_resolved_t, node)));
+}
+
+static void
+map_stack (map_main_pre_resolved_t * pr)
+{
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding (pr->fei);
+
+ dpo_copy (&pr->dpo, dpo);
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+ map_stack (map_from_fib_node (node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+map_fib_node_get (fib_node_index_t index)
+{
+ return (&pre_resolved[index].node);
+}
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t map_vft = {
+ .fnv_get = map_fib_node_get,
+ .fnv_last_lock = map_last_lock_gone,
+ .fnv_back_walk = map_back_walk,
+};
+
+static void
+map_fib_resolve (map_main_pre_resolved_t * pr,
+ fib_protocol_t proto, u8 len, const ip46_address_t * addr)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = proto,
+ .fp_len = len,
+ .fp_addr = *addr,
+ };
+
+ pr->fei = fib_table_entry_special_add (0, // default fib
+ &pfx,
+ FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
+ pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto);
+ map_stack (pr);
+}
+
+static void
+map_fib_unresolve (map_main_pre_resolved_t * pr,
+ fib_protocol_t proto, u8 len, const ip46_address_t * addr)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = proto,
+ .fp_len = len,
+ .fp_addr = *addr,
+ };
+
+ fib_entry_child_remove (pr->fei, pr->sibling);
+
+ fib_table_entry_special_remove (0, // default fib
+ &pfx, FIB_SOURCE_RR);
+ dpo_reset (&pr->dpo);
+
+ pr->fei = FIB_NODE_INDEX_INVALID;
+ pr->sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del)
+{
+ if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0))
+ {
+ ip46_address_t addr = {
+ .ip6 = *ip6,
+ };
+ if (is_del)
+ map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP6],
+ FIB_PROTOCOL_IP6, 128, &addr);
+ else
+ map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6],
+ FIB_PROTOCOL_IP6, 128, &addr);
+ }
+ if (ip4 && (ip4->as_u32 != 0))
+ {
+ ip46_address_t addr = {
+ .ip4 = *ip4,
+ };
+ if (is_del)
+ map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP4],
+ FIB_PROTOCOL_IP4, 32, &addr);
+ else
+ map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4],
+ FIB_PROTOCOL_IP4, 32, &addr);
+ }
+}
+#endif
+
+static clib_error_t *
+map_security_check_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "off"))
+ mm->sec_check = false;
+ else if (unformat (line_input, "on"))
+ mm->sec_check = true;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_security_check_frag_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "off"))
+ mm->sec_check_frag = false;
+ else if (unformat (line_input, "on"))
+ mm->sec_check_frag = true;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_add_domain_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ u32 ip6_prefix_len = 0, ip4_prefix_len = 0, map_domain_index, ip6_src_len;
+ u32 num_m_args = 0;
+ /* Optional arguments */
+ u32 ea_bits_len = 0, psid_offset = 0, psid_length = 0;
+ u32 mtu = 0;
+ u8 flags = 0;
+ ip6_src_len = 128;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix,
+ &ip4_prefix_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix,
+ &ip6_prefix_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src,
+ &ip6_src_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-src %U", unformat_ip6_address, &ip6_src))
+ num_m_args++;
+ else if (unformat (line_input, "ea-bits-len %d", &ea_bits_len))
+ num_m_args++;
+ else if (unformat (line_input, "psid-offset %d", &psid_offset))
+ num_m_args++;
+ else if (unformat (line_input, "psid-len %d", &psid_length))
+ num_m_args++;
+ else if (unformat (line_input, "mtu %d", &mtu))
+ num_m_args++;
+ else if (unformat (line_input, "map-t"))
+ flags |= MAP_DOMAIN_TRANSLATION;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args < 3)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ map_create_domain (&ip4_prefix, ip4_prefix_len,
+ &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len,
+ ea_bits_len, psid_offset, psid_length, &map_domain_index,
+ mtu, flags);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_del_domain_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 num_m_args = 0;
+ u32 map_domain_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args != 1)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ map_delete_domain (map_domain_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_add_rule_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip6_address_t tep;
+ u32 num_m_args = 0;
+ u32 psid = 0, map_domain_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else if (unformat (line_input, "psid %d", &psid))
+ num_m_args++;
+ else
+ if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep))
+ num_m_args++;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (num_m_args != 3)
+ {
+ error = clib_error_return (0, "mandatory argument(s) missing");
+ goto done;
+ }
+
+ if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0)
+ {
+ error = clib_error_return (0, "Failing to add Mapping Rule");
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+#if MAP_SKIP_IP6_LOOKUP
+static clib_error_t *
+map_pre_resolve_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4nh, *p_v4 = NULL;
+ ip6_address_t ip6nh, *p_v6 = NULL;
+ clib_error_t *error = NULL;
+ int is_del = 0;
+
+ memset (&ip4nh, 0, sizeof (ip4nh));
+ memset (&ip6nh, 0, sizeof (ip6nh));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh))
+ p_v4 = &ip4nh;
+ else
+ if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh))
+ p_v6 = &ip6nh;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ map_pre_resolve (p_v4, p_v6, is_del);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+#endif
+
+static clib_error_t *
+map_icmp_relay_source_address_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t icmp_src_address;
+ map_main_t *mm = &map_main;
+ clib_error_t *error = NULL;
+
+ mm->icmp4_src_address.as_u32 = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_ip4_address, &icmp_src_address))
+ mm->icmp4_src_address = icmp_src_address;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_icmp_unreachables_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ int num_m_args = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ num_m_args++;
+ if (unformat (line_input, "on"))
+ mm->icmp6_enabled = true;
+ else if (unformat (line_input, "off"))
+ mm->icmp6_enabled = false;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+
+ if (num_m_args != 1)
+ error = clib_error_return (0, "mandatory argument(s) missing");
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_fragment_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "inner"))
+ mm->frag_inner = true;
+ else if (unformat (line_input, "outer"))
+ mm->frag_inner = false;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_fragment_df_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ mm->frag_ignore_df = true;
+ else if (unformat (line_input, "off"))
+ mm->frag_ignore_df = false;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+map_traffic_class_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ u32 tc = 0;
+ clib_error_t *error = NULL;
+
+ mm->tc_copy = false;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "copy"))
+ mm->tc_copy = true;
+ else if (unformat (line_input, "%x", &tc))
+ mm->tc = tc & 0xff;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static u8 *
+format_map_domain (u8 * s, va_list * args)
+{
+ map_domain_t *d = va_arg (*args, map_domain_t *);
+ bool counters = va_arg (*args, int);
+ map_main_t *mm = &map_main;
+ ip6_address_t ip6_prefix;
+
+ if (d->rules)
+ memset (&ip6_prefix, 0, sizeof (ip6_prefix));
+ else
+ ip6_prefix = d->ip6_prefix;
+
+ s = format (s,
+ "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s",
+ d - mm->domains,
+ format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len,
+ format_ip6_address, &ip6_prefix, d->ip6_prefix_len,
+ format_ip6_address, &d->ip6_src, d->ip6_src_len,
+ d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu,
+ (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : "");
+
+ if (counters)
+ {
+ map_domain_counter_lock (mm);
+ vlib_counter_t v;
+ vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_TX],
+ d - mm->domains, &v);
+ s = format (s, " TX: %lld/%lld", v.packets, v.bytes);
+ vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_RX],
+ d - mm->domains, &v);
+ s = format (s, " RX: %lld/%lld", v.packets, v.bytes);
+ map_domain_counter_unlock (mm);
+ }
+ s = format (s, "\n");
+
+ if (d->rules)
+ {
+ int i;
+ ip6_address_t dst;
+ for (i = 0; i < (0x1 << d->psid_length); i++)
+ {
+ dst = d->rules[i];
+ if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0)
+ continue;
+ s = format (s,
+ " rule psid: %d ip6-dst %U\n", i, format_ip6_address,
+ &dst);
+ }
+ }
+ return s;
+}
+
+static u8 *
+format_map_ip4_reass (u8 * s, va_list * args)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *r = va_arg (*args, map_ip4_reass_t *);
+ map_ip4_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now (mm->vlib_main);
+ f64 lifetime = (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format (s,
+ "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n",
+ format_ip4_address, &k->src.as_u8, format_ip4_address,
+ &k->dst.as_u8, k->protocol,
+ clib_net_to_host_u16 (k->fragment_id),
+ (r->port >= 0) ? clib_net_to_host_u16 (r->port) : -1, dt);
+ return s;
+}
+
+static u8 *
+format_map_ip6_reass (u8 * s, va_list * args)
+{
+ map_main_t *mm = &map_main;
+ map_ip6_reass_t *r = va_arg (*args, map_ip6_reass_t *);
+ map_ip6_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now (mm->vlib_main);
+ f64 lifetime = (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format (s,
+ "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n",
+ format_ip6_address, &k->src.as_u8, format_ip6_address,
+ &k->dst.as_u8, k->protocol,
+ clib_net_to_host_u32 (k->fragment_id), dt);
+ return s;
+}
+
+static clib_error_t *
+show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ bool counters = false;
+ u32 map_domain_index = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "counters"))
+ counters = true;
+ else if (unformat (line_input, "index %d", &map_domain_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output (vm, "No MAP domains are configured...");
+
+ if (map_domain_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);}));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ error = clib_error_return (0, "MAP domain does not exists %d",
+ map_domain_index);
+ goto done;
+ }
+
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+ vlib_cli_output (vm, "%U", format_map_domain, d, counters);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+show_map_fragments_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *f4;
+ map_ip6_reass_t *f6;
+
+ /* *INDENT-OFF* */
+ pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);}));
+ /* *INDENT-ON* */
+ /* *INDENT-OFF* */
+ pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);}));
+ /* *INDENT-ON* */
+ return (0);
+}
+
+u64
+map_error_counter_get (u32 node_index, map_error_t map_error)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_error_t e = error_node->errors[map_error];
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ u32 ci;
+
+ ci = vlib_error_get_code (e);
+ ASSERT (ci < n->n_errors);
+ ci += n->error_heap_index;
+
+ return (em->counters[ci]);
+}
+
+static clib_error_t *
+show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ int domains = 0, rules = 0, domaincount = 0, rulecount = 0;
+ if (pool_elts (mm->domains) == 0)
+ {
+ vlib_cli_output (vm, "No MAP domains are configured...");
+ return 0;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains, ({
+ if (d->rules) {
+ rulecount+= 0x1 << d->psid_length;
+ rules += sizeof(ip6_address_t) * 0x1 << d->psid_length;
+ }
+ domains += sizeof(*d);
+ domaincount++;
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t));
+ vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains);
+ vlib_cli_output (vm, "MAP rules: %d (%d bytes)\n", rulecount, rules);
+ vlib_cli_output (vm, "Total: %d bytes)\n", rules + domains);
+
+#if MAP_SKIP_IP6_LOOKUP
+ vlib_cli_output (vm,
+ "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n",
+ format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6],
+ format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]);
+
+#endif
+
+ if (mm->tc_copy)
+ vlib_cli_output (vm, "MAP traffic-class: copy");
+ else
+ vlib_cli_output (vm, "MAP traffic-class: %x", mm->tc);
+
+ vlib_cli_output (vm,
+ "MAP IPv6 inbound security check: %s, fragmented packet security check: %s",
+ mm->sec_check ? "enabled" : "disabled",
+ mm->sec_check_frag ? "enabled" : "disabled");
+
+ vlib_cli_output (vm, "ICMP-relay IPv4 source address: %U\n",
+ format_ip4_address, &mm->icmp4_src_address);
+ vlib_cli_output (vm, "ICMP6 unreachables sent for unmatched packets: %s\n",
+ mm->icmp6_enabled ? "enabled" : "disabled");
+ vlib_cli_output (vm, "Inner fragmentation: %s\n",
+ mm->frag_inner ? "enabled" : "disabled");
+ vlib_cli_output (vm, "Fragment packets regardless of DF flag: %s\n",
+ mm->frag_ignore_df ? "enabled" : "disabled");
+
+ /*
+ * Counters
+ */
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u64 total_pkts[MAP_N_DOMAIN_COUNTER];
+ u64 total_bytes[MAP_N_DOMAIN_COUNTER];
+ int which, i;
+ vlib_counter_t v;
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ map_domain_counter_lock (mm);
+ vec_foreach (cm, mm->domain_counters)
+ {
+ which = cm - mm->domain_counters;
+
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
+ {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+ map_domain_counter_unlock (mm);
+
+ vlib_cli_output (vm, "Encapsulated packets: %lld bytes: %lld\n",
+ total_pkts[MAP_DOMAIN_COUNTER_TX],
+ total_bytes[MAP_DOMAIN_COUNTER_TX]);
+ vlib_cli_output (vm, "Decapsulated packets: %lld bytes: %lld\n",
+ total_pkts[MAP_DOMAIN_COUNTER_RX],
+ total_bytes[MAP_DOMAIN_COUNTER_RX]);
+
+ vlib_cli_output (vm, "ICMP relayed packets: %d\n",
+ vlib_get_simple_counter (&mm->icmp_relayed, 0));
+
+ return 0;
+}
+
+static clib_error_t *
+map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 lifetime = ~0;
+ f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1);
+ u32 pool_size = ~0;
+ u64 buffers = ~(0ull);
+ u8 ip4 = 0, ip6 = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "lifetime %u", &lifetime))
+ ;
+ else if (unformat (line_input, "ht-ratio %lf", &ht_ratio))
+ ;
+ else if (unformat (line_input, "pool-size %u", &pool_size))
+ ;
+ else if (unformat (line_input, "buffers %llu", &buffers))
+ ;
+ else if (unformat (line_input, "ip4"))
+ ip4 = 1;
+ else if (unformat (line_input, "ip6"))
+ ip6 = 1;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "invalid input");
+ }
+ }
+ unformat_free (line_input);
+
+ if (!ip4 && !ip6)
+ return clib_error_return (0, "must specify ip4 and/or ip6");
+
+ if (ip4)
+ {
+ if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return (0, "invalid ip4-reass pool-size ( > %d)",
+ MAP_IP4_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)
+ && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return (0, "invalid ip4-reass ht-ratio ( > %d)",
+ MAP_IP4_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return (0, "invalid ip4-reass lifetime ( > %d)",
+ MAP_IP4_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return (0, "invalid ip4-reass buffers ( > %ld)",
+ MAP_IP4_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip6)
+ {
+ if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return (0, "invalid ip6-reass pool-size ( > %d)",
+ MAP_IP6_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)
+ && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return (0, "invalid ip6-reass ht-log2len ( > %d)",
+ MAP_IP6_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return (0, "invalid ip6-reass lifetime ( > %d)",
+ MAP_IP6_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return (0, "invalid ip6-reass buffers ( > %ld)",
+ MAP_IP6_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip4)
+ {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0)
+ {
+ if (map_ip4_reass_conf_pool_size (pool_size, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip4-reass pool-size");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1))
+ {
+ if (map_ip4_reass_conf_ht_ratio (ht_ratio, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip4-reass ht-log2len");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (lifetime != ~0)
+ {
+ if (map_ip4_reass_conf_lifetime (lifetime))
+ vlib_cli_output (vm, "Could not set ip4-reass lifetime");
+ else
+ vlib_cli_output (vm, "Setting ip4-reass lifetime");
+ }
+ if (buffers != ~(0ull))
+ {
+ if (map_ip4_reass_conf_buffers (buffers))
+ vlib_cli_output (vm, "Could not set ip4-reass buffers");
+ else
+ vlib_cli_output (vm, "Setting ip4-reass buffers");
+ }
+
+ if (map_main.ip4_reass_conf_buffers >
+ map_main.ip4_reass_conf_pool_size *
+ MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY)
+ {
+ vlib_cli_output (vm,
+ "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ if (ip6)
+ {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0)
+ {
+ if (map_ip6_reass_conf_pool_size (pool_size, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip6-reass pool-size");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1))
+ {
+ if (map_ip6_reass_conf_ht_ratio (ht_ratio, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip6-reass ht-log2len");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (lifetime != ~0)
+ {
+ if (map_ip6_reass_conf_lifetime (lifetime))
+ vlib_cli_output (vm, "Could not set ip6-reass lifetime");
+ else
+ vlib_cli_output (vm, "Setting ip6-reass lifetime");
+ }
+ if (buffers != ~(0ull))
+ {
+ if (map_ip6_reass_conf_buffers (buffers))
+ vlib_cli_output (vm, "Could not set ip6-reass buffers");
+ else
+ vlib_cli_output (vm, "Setting ip6-reass buffers");
+ }
+
+ if (map_main.ip6_reass_conf_buffers >
+ map_main.ip6_reass_conf_pool_size *
+ MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY)
+ {
+ vlib_cli_output (vm,
+ "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * packet trace format function
+ */
+u8 *
+format_map_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_trace_t *t = va_arg (*args, map_trace_t *);
+ u32 map_domain_index = t->map_domain_index;
+ u16 port = t->port;
+
+ s =
+ format (s, "MAP domain index: %d L4 port: %u", map_domain_index,
+ clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+static_always_inline map_ip4_reass_t *
+map_ip4_reass_lookup (map_ip4_reass_key_t * k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip4_reass_hash_table[bucket];
+ while (ri != MAP_REASS_INDEX_NONE)
+ {
+ map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri);
+ if (r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ now < r->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000))
+ {
+ return r;
+ }
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool)
+
+void
+map_ip4_reass_free (map_ip4_reass_t * r, u32 ** pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_get_fragments (r, pi_to_drop);
+
+ // Unlink in hash bucket
+ map_ip4_reass_t *r2 = NULL;
+ u32 r2i = mm->ip4_reass_hash_table[r->bucket];
+ while (r2i != map_ip4_reass_pool_index (r))
+ {
+ ASSERT (r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index (mm->ip4_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2)
+ {
+ r2->bucket_next = r->bucket_next;
+ }
+ else
+ {
+ mm->ip4_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip4_reass_pool_index (r))
+ {
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+ else
+ {
+ if (mm->ip4_reass_fifo_last == map_ip4_reass_pool_index (r))
+ mm->ip4_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next =
+ r->fifo_next;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev =
+ r->fifo_prev;
+ }
+
+ pool_put (mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated--;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 ** pi_to_drop)
+{
+ map_ip4_reass_t *r;
+ map_main_t *mm = &map_main;
+ map_ip4_reass_key_t k = {.src.data_u32 = src,
+ .dst.data_u32 = dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol
+ };
+
+ u32 h = 0;
+#ifdef clib_crc32c_uses_intrinsics
+ h = clib_crc32c ((u8 *) k.as_u32, 16);
+#else
+ u64 tmp = k.as_u32[0] ^ k.as_u32[1] ^ k.as_u32[2] ^ k.as_u32[3];
+ h = clib_xxhash (tmp);
+#endif
+ h = h >> (32 - mm->ip4_reass_ht_log2len);
+
+ f64 now = vlib_time_now (mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ map_ip4_reass_t *last =
+ pool_elt_at_index (mm->ip4_reass_pool, mm->ip4_reass_fifo_last);
+ if (last->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip4_reass_free (last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip4_reass_lookup (&k, h, now)))
+ return r;
+
+ if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size)
+ return NULL;
+
+ pool_get (mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated++;
+ int i;
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ r->fragments[i] = ~0;
+
+ u32 ri = map_ip4_reass_pool_index (r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip4_reass_hash_table[h];
+ mm->ip4_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ r->fifo_next =
+ pool_elt_at_index (mm->ip4_reass_pool,
+ mm->ip4_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip4_reass_fifo_last;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri;
+ }
+ else
+ {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip4_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->port = -1;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+
+ return r;
+}
+
+int
+map_ip4_reass_add_fragment (map_ip4_reass_t * r, u32 pi)
+{
+ if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers)
+ return -1;
+
+ int i;
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i] == ~0)
+ {
+ r->fragments[i] = pi;
+ map_main.ip4_reass_buffered_counter++;
+ return 0;
+ }
+ return -1;
+}
+
+static_always_inline map_ip6_reass_t *
+map_ip6_reass_lookup (map_ip6_reass_key_t * k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip6_reass_hash_table[bucket];
+ while (ri != MAP_REASS_INDEX_NONE)
+ {
+ map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri);
+ if (now < r->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) &&
+ r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ r->key.as_u64[2] == k->as_u64[2] &&
+ r->key.as_u64[3] == k->as_u64[3] &&
+ r->key.as_u64[4] == k->as_u64[4])
+ return r;
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool)
+
+void
+map_ip6_reass_free (map_ip6_reass_t * r, u32 ** pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i].pi != ~0)
+ {
+ vec_add1 (*pi_to_drop, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ map_main.ip6_reass_buffered_counter--;
+ }
+
+ // Unlink in hash bucket
+ map_ip6_reass_t *r2 = NULL;
+ u32 r2i = mm->ip6_reass_hash_table[r->bucket];
+ while (r2i != map_ip6_reass_pool_index (r))
+ {
+ ASSERT (r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index (mm->ip6_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2)
+ {
+ r2->bucket_next = r->bucket_next;
+ }
+ else
+ {
+ mm->ip6_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip6_reass_pool_index (r))
+ {
+ //Single element in the list, list is now empty
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+ else
+ {
+ if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index (r)) //First element
+ mm->ip6_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next =
+ r->fifo_next;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev =
+ r->fifo_prev;
+ }
+
+ // Free from pool if necessary
+ pool_put (mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated--;
+}
+
+map_ip6_reass_t *
+map_ip6_reass_get (ip6_address_t * src, ip6_address_t * dst, u32 fragment_id,
+ u8 protocol, u32 ** pi_to_drop)
+{
+ map_ip6_reass_t *r;
+ map_main_t *mm = &map_main;
+ map_ip6_reass_key_t k = {
+ .src = *src,
+ .dst = *dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol
+ };
+
+ u32 h = 0;
+ int i;
+
+#ifdef clib_crc32c_uses_intrinsics
+ h = clib_crc32c ((u8 *) k.as_u32, 40);
+#else
+ u64 tmp =
+ k.as_u64[0] ^ k.as_u64[1] ^ k.as_u64[2] ^ k.as_u64[3] ^ k.as_u64[4];
+ h = clib_xxhash (tmp);
+#endif
+
+ h = h >> (32 - mm->ip6_reass_ht_log2len);
+
+ f64 now = vlib_time_now (mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ map_ip6_reass_t *last =
+ pool_elt_at_index (mm->ip6_reass_pool, mm->ip6_reass_fifo_last);
+ if (last->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip6_reass_free (last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip6_reass_lookup (&k, h, now)))
+ return r;
+
+ if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size)
+ return NULL;
+
+ pool_get (mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated++;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ }
+
+ u32 ri = map_ip6_reass_pool_index (r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip6_reass_hash_table[h];
+ mm->ip6_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ r->fifo_next =
+ pool_elt_at_index (mm->ip6_reass_pool,
+ mm->ip6_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip6_reass_fifo_last;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri;
+ }
+ else
+ {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip6_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->ip4_header.ip_version_and_header_length = 0;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+ return r;
+}
+
+int
+map_ip6_reass_add_fragment (map_ip6_reass_t * r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 * data_start, u16 data_len)
+{
+ map_ip6_fragment_t *f = NULL, *prev_f = NULL;
+ u16 copied_len = (data_len > 20) ? 20 : data_len;
+
+ if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers)
+ return -1;
+
+ //Lookup for fragments for the current buffer
+ //and the one before that
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ if (data_offset && r->fragments[i].next_data_offset == data_offset)
+ {
+ prev_f = &r->fragments[i]; // This is buffer for previous packet
+ }
+ else if (r->fragments[i].next_data_offset == next_data_offset)
+ {
+ f = &r->fragments[i]; // This is a buffer for the current packet
+ }
+ else if (r->fragments[i].next_data_offset == 0)
+ { //Available
+ if (f == NULL)
+ f = &r->fragments[i];
+ else if (prev_f == NULL)
+ prev_f = &r->fragments[i];
+ }
+ }
+
+ if (!f || f->pi != ~0)
+ return -1;
+
+ if (data_offset)
+ {
+ if (!prev_f)
+ return -1;
+
+ clib_memcpy (prev_f->next_data, data_start, copied_len);
+ prev_f->next_data_len = copied_len;
+ prev_f->next_data_offset = data_offset;
+ }
+ else
+ {
+ if (((ip4_header_t *) data_start)->ip_version_and_header_length != 0x45)
+ return -1;
+
+ if (r->ip4_header.ip_version_and_header_length == 0)
+ clib_memcpy (&r->ip4_header, data_start, sizeof (ip4_header_t));
+ }
+
+ if (data_len > 20)
+ {
+ f->next_data_offset = next_data_offset;
+ f->pi = pi;
+ map_main.ip6_reass_buffered_counter++;
+ }
+ return 0;
+}
+
+void
+map_ip4_reass_reinit (u32 * trashed_reass, u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ int i;
+
+ if (dropped_packets)
+ *dropped_packets = mm->ip4_reass_buffered_counter;
+ if (trashed_reass)
+ *trashed_reass = mm->ip4_reass_allocated;
+ if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ u16 ri = mm->ip4_reass_fifo_last;
+ do
+ {
+ map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri);
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i] != ~0)
+ map_ip4_drop_pi (r->fragments[i]);
+
+ ri = r->fifo_next;
+ pool_put (mm->ip4_reass_pool, r);
+ }
+ while (ri != mm->ip4_reass_fifo_last);
+ }
+
+ vec_free (mm->ip4_reass_hash_table);
+ vec_resize (mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len);
+ for (i = 0; i < (1 << mm->ip4_reass_ht_log2len); i++)
+ mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free (mm->ip4_reass_pool);
+ pool_alloc (mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip4_reass_allocated = 0;
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ mm->ip4_reass_buffered_counter = 0;
+}
+
+u8
+map_get_ht_log2len (f32 ht_ratio, u16 pool_size)
+{
+ u32 desired_size = (u32) (pool_size * ht_ratio);
+ u8 i;
+ for (i = 1; i < 31; i++)
+ if ((1 << i) >= desired_size)
+ return i;
+ return 4;
+}
+
+int
+map_ip4_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip4_reass_lock ();
+ mm->ip4_reass_conf_ht_ratio = ht_ratio;
+ mm->ip4_reass_ht_log2len =
+ map_get_ht_log2len (ht_ratio, mm->ip4_reass_conf_pool_size);
+ map_ip4_reass_reinit (trashed_reass, dropped_packets);
+ map_ip4_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip4_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip4_reass_lock ();
+ mm->ip4_reass_conf_pool_size = pool_size;
+ map_ip4_reass_reinit (trashed_reass, dropped_packets);
+ map_ip4_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip4_reass_conf_lifetime (u16 lifetime_ms)
+{
+ map_main.ip4_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int
+map_ip4_reass_conf_buffers (u32 buffers)
+{
+ map_main.ip4_reass_conf_buffers = buffers;
+ return 0;
+}
+
+void
+map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (dropped_packets)
+ *dropped_packets = mm->ip6_reass_buffered_counter;
+ if (trashed_reass)
+ *trashed_reass = mm->ip6_reass_allocated;
+ int i;
+ if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ u16 ri = mm->ip6_reass_fifo_last;
+ do
+ {
+ map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri);
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i].pi != ~0)
+ map_ip6_drop_pi (r->fragments[i].pi);
+
+ ri = r->fifo_next;
+ pool_put (mm->ip6_reass_pool, r);
+ }
+ while (ri != mm->ip6_reass_fifo_last);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+
+ vec_free (mm->ip6_reass_hash_table);
+ vec_resize (mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len);
+ for (i = 0; i < (1 << mm->ip6_reass_ht_log2len); i++)
+ mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free (mm->ip6_reass_pool);
+ pool_alloc (mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip6_reass_allocated = 0;
+ mm->ip6_reass_buffered_counter = 0;
+}
+
+int
+map_ip6_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip6_reass_lock ();
+ mm->ip6_reass_conf_ht_ratio = ht_ratio;
+ mm->ip6_reass_ht_log2len =
+ map_get_ht_log2len (ht_ratio, mm->ip6_reass_conf_pool_size);
+ map_ip6_reass_reinit (trashed_reass, dropped_packets);
+ map_ip6_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip6_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip6_reass_lock ();
+ mm->ip6_reass_conf_pool_size = pool_size;
+ map_ip6_reass_reinit (trashed_reass, dropped_packets);
+ map_ip6_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip6_reass_conf_lifetime (u16 lifetime_ms)
+{
+ map_main.ip6_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int
+map_ip6_reass_conf_buffers (u32 buffers)
+{
+ map_main.ip6_reass_conf_buffers = buffers;
+ return 0;
+}
+
+/* *INDENT-OFF* */
+
+/*?
+ * Configure MAP reassembly behaviour
+ *
+ * @cliexpar
+ * @cliexstart{map params reassembly}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = {
+ .path = "map params reassembly",
+ .short_help = "map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] "
+ "[pool-size <pool-size>] [buffers <buffers>] "
+ "[ht-ratio <ht-ratio>]",
+ .function = map_params_reass_command_fn,
+};
+
+/*?
+ * Set or copy the IP TOS/Traffic Class field
+ *
+ * @cliexpar
+ * @cliexstart{map params traffic-class}
+ *
+ * This command is used to set the traffic-class field in translated
+ * or encapsulated packets. If copy is specifed (the default) then the
+ * traffic-class/TOS field is copied from the original packet to the
+ * translated / encapsulating header.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_traffic_class_command, static) = {
+ .path = "map params traffic-class",
+ .short_help = "map params traffic-class {0x0-0xff | copy}",
+ .function = map_traffic_class_command_fn,
+};
+
+/*?
+ * Bypass IP4/IP6 lookup
+ *
+ * @cliexpar
+ * @cliexstart{map params pre-resolve}
+ *
+ * Bypass a second FIB lookup of the translated or encapsulated
+ * packet, and forward the packet directly to the specified
+ * next-hop. This optimization trades forwarding flexibility for
+ * performance.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_pre_resolve_command, static) = {
+ .path = "map params pre-resolve",
+ .short_help = " map params pre-resolve {ip4-nh <address>} "
+ "| {ip6-nh <address>}",
+ .function = map_pre_resolve_command_fn,
+};
+
+/*?
+ * Enable or disable the MAP-E inbound security check
+ *
+ * @cliexpar
+ * @cliexstart{map params security-check}
+ *
+ * By default, a decapsulated packet's IPv4 source address will be
+ * verified against the outer header's IPv6 source address. Disabling
+ * this feature will allow IPv4 source address spoofing.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_security_check_command, static) = {
+ .path = "map params security-check",
+ .short_help = "map params security-check on|off",
+ .function = map_security_check_command_fn,
+};
+
+/*?
+ * Specifiy the IPv4 source address used for relayed ICMP error messages
+ *
+ * @cliexpar
+ * @cliexstart{map params icmp source-address}
+ *
+ * This command specifies which IPv4 source address (must be local to
+ * the system), that is used for relayed received IPv6 ICMP error
+ * messages.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = {
+ .path = "map params icmp source-address",
+ .short_help = "map params icmp source-address <ip4-address>",
+ .function = map_icmp_relay_source_address_command_fn,
+};
+
+/*?
+ * Send IPv6 ICMP unreachables
+ *
+ * @cliexpar
+ * @cliexstart{map params icmp6 unreachables}
+ *
+ * Send IPv6 ICMP unreachable messages back if security check fails or
+ * no MAP domain exists.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_icmp_unreachables_command, static) = {
+ .path = "map params icmp6 unreachables",
+ .short_help = "map params icmp6 unreachables {on|off}",
+ .function = map_icmp_unreachables_command_fn,
+};
+
+/*?
+ * Configure MAP fragmentation behaviour
+ *
+ * @cliexpar
+ * @cliexstart{map params fragment}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_fragment_command, static) = {
+ .path = "map params fragment",
+ .short_help = "map params fragment inner|outer",
+ .function = map_fragment_command_fn,
+};
+
+/*?
+ * Ignore the IPv4 Don't fragment bit
+ *
+ * @cliexpar
+ * @cliexstart{map params fragment ignore-df}
+ *
+ * Allows fragmentation of the IPv4 packet even if the DF bit is
+ * set. The choice between inner or outer fragmentation of tunnel
+ * packets is complicated. The benefit of inner fragmentation is that
+ * the ultimate endpoint must reassemble, instead of the tunnel
+ * endpoint.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_fragment_df_command, static) = {
+ .path = "map params fragment ignore-df",
+ .short_help = "map params fragment ignore-df on|off",
+ .function = map_fragment_df_command_fn,
+};
+
+/*?
+ * Specifiy if the inbound security check should be done on fragments
+ *
+ * @cliexpar
+ * @cliexstart{map params security-check fragments}
+ *
+ * Typically the inbound on-decapsulation security check is only done
+ * on the first packet. The packet that contains the L4
+ * information. While a security check on every fragment is possible,
+ * it has a cost. State must be created on the first fragment.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_security_check_frag_command, static) = {
+ .path = "map params security-check fragments",
+ .short_help = "map params security-check fragments on|off",
+ .function = map_security_check_frag_command_fn,
+};
+
+/*?
+ * Add MAP domain
+ *
+ * @cliexpar
+ * @cliexstart{map add domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_add_domain_command, static) = {
+ .path = "map add domain",
+ .short_help = "map add domain ip4-pfx <ip4-pfx> ip6-pfx <ip6-pfx> "
+ "ip6-src <ip6-pfx> ea-bits-len <n> psid-offset <n> psid-len <n> "
+ "[map-t] [mtu <mtu>]",
+ .function = map_add_domain_command_fn,
+};
+
+/*?
+ * Add MAP rule to a domain
+ *
+ * @cliexpar
+ * @cliexstart{map add rule}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_add_rule_command, static) = {
+ .path = "map add rule",
+ .short_help = "map add rule index <domain> psid <psid> ip6-dst <ip6-addr>",
+ .function = map_add_rule_command_fn,
+};
+
+/*?
+ * Delete MAP domain
+ *
+ * @cliexpar
+ * @cliexstart{map del domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_del_command, static) = {
+ .path = "map del domain",
+ .short_help = "map del domain index <domain>",
+ .function = map_del_domain_command_fn,
+};
+
+/*?
+ * Show MAP domains
+ *
+ * @cliexpar
+ * @cliexstart{show map domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_domain_command, static) = {
+ .path = "show map domain",
+ .short_help = "show map domain index <n> [counters]",
+ .function = show_map_domain_command_fn,
+};
+
+/*?
+ * Show MAP statistics
+ *
+ * @cliexpar
+ * @cliexstart{show map stats}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_stats_command, static) = {
+ .path = "show map stats",
+ .short_help = "show map stats",
+ .function = show_map_stats_command_fn,
+};
+
+/*?
+ * Show MAP fragmentation information
+ *
+ * @cliexpar
+ * @cliexstart{show map fragments}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_fragments_command, static) = {
+ .path = "show map fragments",
+ .short_help = "show map fragments",
+ .function = show_map_fragments_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * map_init
+ */
+clib_error_t *
+map_init (vlib_main_t * vm)
+{
+ map_main_t *mm = &map_main;
+ mm->vnet_main = vnet_get_main ();
+ mm->vlib_main = vm;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_PROTOCOL (proto)
+ {
+ map_pre_resolve_init (&pre_resolved[proto]);
+ }
+#endif
+
+ /* traffic class */
+ mm->tc = 0;
+ mm->tc_copy = true;
+
+ /* Inbound security check */
+ mm->sec_check = true;
+ mm->sec_check_frag = false;
+
+ /* ICMP6 Type 1, Code 5 for security check failure */
+ mm->icmp6_enabled = false;
+
+ /* Inner or outer fragmentation */
+ mm->frag_inner = false;
+ mm->frag_ignore_df = false;
+
+ vec_validate (mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1);
+ mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx";
+ mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx";
+
+ vlib_validate_simple_counter (&mm->icmp_relayed, 0);
+ vlib_zero_simple_counter (&mm->icmp_relayed, 0);
+
+ /* IP4 virtual reassembly */
+ mm->ip4_reass_hash_table = 0;
+ mm->ip4_reass_pool = 0;
+ mm->ip4_reass_lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ *mm->ip4_reass_lock = 0;
+ mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT;
+ mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT;
+ mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT;
+ mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT;
+ mm->ip4_reass_ht_log2len =
+ map_get_ht_log2len (mm->ip4_reass_conf_ht_ratio,
+ mm->ip4_reass_conf_pool_size);
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip4_reass_reinit (NULL, NULL);
+
+ /* IP6 virtual reassembly */
+ mm->ip6_reass_hash_table = 0;
+ mm->ip6_reass_pool = 0;
+ mm->ip6_reass_lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ *mm->ip6_reass_lock = 0;
+ mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT;
+ mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT;
+ mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT;
+ mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT;
+ mm->ip6_reass_ht_log2len =
+ map_get_ht_log2len (mm->ip6_reass_conf_ht_ratio,
+ mm->ip6_reass_conf_pool_size);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip6_reass_reinit (NULL, NULL);
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft);
+#endif
+ map_dpo_module_init ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (map_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h
new file mode 100644
index 00000000..208a58ef
--- /dev/null
+++ b/src/vnet/map/map.h
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdbool.h>
+#include <vppinfra/error.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+#include <vnet/dpo/load_balance.h>
+
+#define MAP_SKIP_IP6_LOOKUP 1
+
+int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
+ ip6_address_t * ip6_src, u8 ip6_src_len,
+ u8 ea_bits_len, u8 psid_offset, u8 psid_length,
+ u32 * map_domain_index, u16 mtu, u8 flags);
+int map_delete_domain (u32 map_domain_index);
+int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
+ u8 is_add);
+u8 *format_map_trace (u8 * s, va_list * args);
+
+typedef enum __attribute__ ((__packed__))
+{
+ MAP_DOMAIN_PREFIX = 1 << 0, MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T
+} map_domain_flags_e;
+
+/**
+ * IP4 reassembly logic:
+ * One virtually reassembled flow requires a map_ip4_reass_t structure in order
+ * to keep the first-fragment port number and, optionally, cache out of sequence
+ * packets.
+ * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures.
+ * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets.
+ * When a new structure needs to be used, it is allocated from available ones.
+ * If there is no structure available, the oldest in use is selected and used if and
+ * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago.
+ * In case no structure can be allocated, the fragment is dropped.
+ */
+
+#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly
+
+#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5
+
+#define MAP_IP6_REASS_COUNT_BYTES
+#define MAP_IP4_REASS_COUNT_BYTES
+
+//#define IP6_MAP_T_OVERRIDE_TOS 0
+
+/*
+ * This structure _MUST_ be no larger than a single cache line (64 bytes).
+ * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive.
+ */
+typedef struct
+{
+ ip6_address_t ip6_src;
+ ip6_address_t ip6_prefix;
+ ip6_address_t *rules;
+ u32 suffix_mask;
+ ip4_address_t ip4_prefix;
+ u16 psid_mask;
+ u16 mtu;
+ map_domain_flags_e flags;
+ u8 ip6_prefix_len;
+ u8 ip6_src_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+
+ /* helpers */
+ u8 psid_shift;
+ u8 suffix_shift;
+ u8 ea_shift;
+
+ /* not used by forwarding */
+ u8 ip4_prefix_len;
+} map_domain_t;
+
+STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES),
+ "MAP domain fits in one cacheline");
+
+#define MAP_REASS_INDEX_NONE ((u16)0xffff)
+
+/*
+ * Hash key, padded out to 16 bytes for fast compare
+ */
+/* *INDENT-OFF* */
+typedef union {
+ CLIB_PACKED (struct {
+ ip4_address_t src;
+ ip4_address_t dst;
+ u16 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[2];
+ u32 as_u32[4];
+} map_ip4_reass_key_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ map_ip4_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ i32 port;
+ u16 bucket;
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip4_reass_t;
+
+/*
+ * MAP domain counters
+ */
+typedef enum
+{
+ /* Simple counters */
+ MAP_DOMAIN_IPV4_FRAGMENT = 0,
+ /* Combined counters */
+ MAP_DOMAIN_COUNTER_RX = 0,
+ MAP_DOMAIN_COUNTER_TX,
+ MAP_N_DOMAIN_COUNTER
+} map_domain_counter_t;
+
+/*
+ * main_main_t
+ */
+/* *INDENT-OFF* */
+typedef union {
+ CLIB_PACKED (struct {
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[5];
+ u32 as_u32[10];
+} map_ip6_reass_key_t;
+/* *INDENT-OFF* */
+
+typedef struct {
+ u32 pi; //Cached packet or ~0
+ u16 next_data_offset; //The data offset of the additional 20 bytes or ~0
+ u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment)
+ u8 next_data[20]; //The 20 additional bytes
+} map_ip6_fragment_t;
+
+typedef struct {
+ map_ip6_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ u16 bucket; //What hash bucket this element is linked in
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ ip4_header_t ip4_header;
+ map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip6_reass_t;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+/**
+ * A pre-resolved next-hop
+ */
+typedef struct map_main_pre_resolved_t_
+{
+ /**
+ * Linkage into the FIB graph
+ */
+ fib_node_t node;
+
+ /**
+ * The FIB entry index of the next-hop
+ */
+ fib_node_index_t fei;
+
+ /**
+ * This object sibling index on the FIB entry's child dependency list
+ */
+ u32 sibling;
+
+ /**
+ * The Load-balance object index to use to forward
+ */
+ dpo_id_t dpo;
+} map_main_pre_resolved_t;
+
+/**
+ * Pre-resolved next hops for v4 and v6. Why these are global and not
+ * per-domain is beyond me.
+ */
+extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX];
+#endif
+
+typedef struct {
+ /* pool of MAP domains */
+ map_domain_t *domains;
+
+ /* MAP Domain packet/byte counters indexed by map domain index */
+ vlib_simple_counter_main_t *simple_domain_counters;
+ vlib_combined_counter_main_t *domain_counters;
+ volatile u32 *counter_lock;
+
+ /* Traffic class: zero, copy (~0) or fixed value */
+ u8 tc;
+ bool tc_copy;
+
+ bool sec_check; /* Inbound security check */
+ bool sec_check_frag; /* Inbound security check for (subsequent) fragments */
+ bool icmp6_enabled; /* Send destination unreachable for security check failure */
+
+ /* ICMPv6 -> ICMPv4 relay parameters */
+ ip4_address_t icmp4_src_address;
+ vlib_simple_counter_main_t icmp_relayed;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /*
+ * IPv4 encap and decap reassembly
+ */
+ /* Configuration */
+ f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly
+
+ /* Runtime */
+ map_ip4_reass_t *ip4_reass_pool;
+ u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip4_reass_allocated;
+ u16 *ip4_reass_hash_table;
+ u16 ip4_reass_fifo_last;
+ volatile u32 *ip4_reass_lock;
+
+ /* Counters */
+ u32 ip4_reass_buffered_counter;
+
+ bool frag_inner; /* Inner or outer fragmentation */
+ bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */
+
+ /*
+ * IPv6 decap reassembly
+ */
+ /* Configuration */
+ f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly
+
+ /* Runtime */
+ map_ip6_reass_t *ip6_reass_pool;
+ u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip6_reass_allocated;
+ u16 *ip6_reass_hash_table;
+ u16 ip6_reass_fifo_last;
+ volatile u32 *ip6_reass_lock;
+
+ /* Counters */
+ u32 ip6_reass_buffered_counter;
+
+} map_main_t;
+
+/*
+ * MAP Error counters/messages
+ */
+#define foreach_map_error \
+ /* Must be first. */ \
+ _(NONE, "valid MAP packets") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(SEC_CHECK, "security check failed") \
+ _(ENCAP_SEC_CHECK, "encap security check failed") \
+ _(DECAP_SEC_CHECK, "decap security check failed") \
+ _(ICMP, "unable to translate ICMP") \
+ _(ICMP_RELAY, "unable to relay ICMP") \
+ _(UNKNOWN, "unknown") \
+ _(NO_BINDING, "no binding") \
+ _(NO_DOMAIN, "no domain") \
+ _(FRAGMENTED, "packet is a fragment") \
+ _(FRAGMENT_MEMORY, "could not cache fragment") \
+ _(FRAGMENT_MALFORMED, "fragment has unexpected format")\
+ _(FRAGMENT_DROPPED, "dropped cached fragment") \
+ _(MALFORMED, "malformed packet") \
+ _(DF_SET, "can't fragment, DF set")
+
+typedef enum {
+#define _(sym,str) MAP_ERROR_##sym,
+ foreach_map_error
+#undef _
+ MAP_N_ERROR,
+ } map_error_t;
+
+u64 map_error_counter_get(u32 node_index, map_error_t map_error);
+
+typedef struct {
+ u32 map_domain_index;
+ u16 port;
+} map_trace_t;
+
+map_main_t map_main;
+
+extern vlib_node_registration_t ip4_map_node;
+extern vlib_node_registration_t ip6_map_node;
+
+extern vlib_node_registration_t ip4_map_t_node;
+extern vlib_node_registration_t ip4_map_t_fragmented_node;
+extern vlib_node_registration_t ip4_map_t_tcp_udp_node;
+extern vlib_node_registration_t ip4_map_t_icmp_node;
+
+extern vlib_node_registration_t ip6_map_t_node;
+extern vlib_node_registration_t ip6_map_t_fragmented_node;
+extern vlib_node_registration_t ip6_map_t_tcp_udp_node;
+extern vlib_node_registration_t ip6_map_t_icmp_node;
+
+/*
+ * map_get_pfx
+ */
+static_always_inline u64
+map_get_pfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[0]);
+
+ u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask;
+ u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid;
+
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift;
+}
+
+static_always_inline u64
+map_get_pfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+/*
+ * map_get_sfx
+ */
+static_always_inline u64
+map_get_sfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ /* Shared 1:1 mode. */
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[1]);
+ if (d->ip6_prefix_len == 128)
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]);
+
+ /* IPv4 prefix */
+ if (d->flags & MAP_DOMAIN_PREFIX)
+ return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16;
+
+ /* Shared or full IPv4 address */
+ return ((u64) addr << 16) | psid;
+}
+
+static_always_inline u64
+map_get_sfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+static_always_inline u32
+map_get_ip4 (ip6_address_t *addr)
+{
+ return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16);
+}
+
+/*
+ * Get the MAP domain from an IPv4 lookup adjacency.
+ */
+static_always_inline map_domain_t *
+ip4_map_get_domain (u32 mdi)
+{
+ map_main_t *mm = &map_main;
+
+ return pool_elt_at_index(mm->domains, mdi);
+}
+
+/*
+ * Get the MAP domain from an IPv6 lookup adjacency.
+ * If the IPv6 address or prefix is not shared, no lookup is required.
+ * The IPv4 address is used otherwise.
+ */
+static_always_inline map_domain_t *
+ip6_map_get_domain (u32 mdi,
+ ip4_address_t *addr,
+ u32 *map_domain_index,
+ u8 *error)
+{
+ map_main_t *mm = &map_main;
+
+ /*
+ * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA.
+ * (That's done implicitly when MAP domain is looked up in the IPv4 FIB)
+ */
+#ifdef MAP_NONSHARED_DOMAIN_ENABLED
+#error "How can you be sure this domain is not shared?"
+ *map_domain_index = mdi;
+ return pool_elt_at_index(mm->domains, mdi);
+#endif
+
+ u32 lbi = ip4_fib_forwarding_lookup(0, addr);
+ const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0);
+ if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type ||
+ dpo->dpoi_type == map_t_dpo_type))
+ {
+ *map_domain_index = dpo->dpoi_index;
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+ }
+ *error = MAP_ERROR_NO_DOMAIN;
+ return NULL;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {}
+#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0)
+
+static_always_inline void
+map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi)
+{
+ int i;
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if(r->fragments[i] != ~0) {
+ vec_add1(*pi, r->fragments[i]);
+ r->fragments[i] = ~0;
+ map_main.ip4_reass_buffered_counter--;
+ }
+}
+
+int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi);
+
+map_ip6_reass_t *
+map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {}
+#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0)
+
+int
+map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 *data_start, u16 data_len);
+
+void map_ip4_drop_pi(u32 pi);
+
+int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100
+int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip4_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip4_reass_conf_buffers(u32 buffers);
+#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+void map_ip6_drop_pi(u32 pi);
+
+
+int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100
+int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip6_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip6_reass_conf_buffers(u32 buffers);
+#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+
+#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index)
+#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val))
+
+static_always_inline void
+ip4_map_t_embedded_address (map_domain_t *d,
+ ip6_address_t *ip6, const ip4_address_t *ip4)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ ip6->as_u64[0] = d->ip6_src.as_u64[0];
+ ip6->as_u32[2] = d->ip6_src.as_u32[2];
+ ip6->as_u32[3] = ip4->as_u32;
+}
+
+static_always_inline u32
+ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ return addr->as_u32[3];
+}
+
+static inline void
+map_domain_counter_lock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ while (__sync_lock_test_and_set(mm->counter_lock, 1))
+ /* zzzz */ ;
+}
+static inline void
+map_domain_counter_unlock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ *mm->counter_lock = 0;
+}
+
+
+static_always_inline void
+map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector,
+ vlib_node_runtime_t *node, vlib_error_t *error,
+ u32 next)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ //Deal with fragments that are ready
+ from = pi_vector;
+ n_left_from = vec_len(pi_vector);
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0);
+ p0->error = *error;
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c
new file mode 100644
index 00000000..994a64de
--- /dev/null
+++ b/src/vnet/map/map_api.c
@@ -0,0 +1,302 @@
+/*
+ *------------------------------------------------------------------
+ * map_api.c - vnet map api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include "map.h"
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(MAP_ADD_DOMAIN, map_add_domain) \
+_(MAP_DEL_DOMAIN, map_del_domain) \
+_(MAP_ADD_DEL_RULE, map_add_del_rule) \
+_(MAP_DOMAIN_DUMP, map_domain_dump) \
+_(MAP_RULE_DUMP, map_rule_dump) \
+_(MAP_SUMMARY_STATS, map_summary_stats)
+
+static void
+vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp)
+{
+ vl_api_map_add_domain_reply_t *rmp;
+ int rv = 0;
+ u32 index;
+ u8 flags = mp->is_translation ? MAP_DOMAIN_TRANSLATION : 0;
+ rv =
+ map_create_domain ((ip4_address_t *) & mp->ip4_prefix, mp->ip4_prefix_len,
+ (ip6_address_t *) & mp->ip6_prefix, mp->ip6_prefix_len,
+ (ip6_address_t *) & mp->ip6_src,
+ mp->ip6_src_prefix_len, mp->ea_bits_len,
+ mp->psid_offset, mp->psid_length, &index,
+ ntohs (mp->mtu), flags);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MAP_ADD_DOMAIN_REPLY,
+ ({
+ rmp->index = ntohl(index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_map_del_domain_t_handler (vl_api_map_del_domain_t * mp)
+{
+ vl_api_map_del_domain_reply_t *rmp;
+ int rv = 0;
+
+ rv = map_delete_domain (ntohl (mp->index));
+
+ REPLY_MACRO (VL_API_MAP_DEL_DOMAIN_REPLY);
+}
+
+static void
+vl_api_map_add_del_rule_t_handler (vl_api_map_add_del_rule_t * mp)
+{
+ vl_api_map_del_domain_reply_t *rmp;
+ int rv = 0;
+
+ rv =
+ map_add_del_psid (ntohl (mp->index), ntohs (mp->psid),
+ (ip6_address_t *) mp->ip6_dst, mp->is_add);
+
+ REPLY_MACRO (VL_API_MAP_ADD_DEL_RULE_REPLY);
+}
+
+static void
+vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp)
+{
+ vl_api_map_domain_details_t *rmp;
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ unix_shared_memory_queue_t *q;
+
+ if (pool_elts (mm->domains) == 0)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains,
+ ({
+ /* Make sure every field is initiated (or don't skip the memset()) */
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs(VL_API_MAP_DOMAIN_DETAILS);
+ rmp->domain_index = htonl(d - mm->domains);
+ rmp->ea_bits_len = d->ea_bits_len;
+ rmp->psid_offset = d->psid_offset;
+ rmp->psid_length = d->psid_length;
+ clib_memcpy(rmp->ip4_prefix, &d->ip4_prefix, sizeof(rmp->ip4_prefix));
+ rmp->ip4_prefix_len = d->ip4_prefix_len;
+ clib_memcpy(rmp->ip6_prefix, &d->ip6_prefix, sizeof(rmp->ip6_prefix));
+ rmp->ip6_prefix_len = d->ip6_prefix_len;
+ clib_memcpy(rmp->ip6_src, &d->ip6_src, sizeof(rmp->ip6_src));
+ rmp->ip6_src_len = d->ip6_src_len;
+ rmp->mtu = htons(d->mtu);
+ rmp->is_translation = (d->flags & MAP_DOMAIN_TRANSLATION);
+ rmp->context = mp->context;
+
+ vl_msg_api_send_shmem (q, (u8 *)&rmp);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_map_rule_dump_t_handler (vl_api_map_rule_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ u16 i;
+ ip6_address_t dst;
+ vl_api_map_rule_details_t *rmp;
+ map_main_t *mm = &map_main;
+ u32 domain_index = ntohl (mp->domain_index);
+ map_domain_t *d;
+
+ if (pool_elts (mm->domains) == 0)
+ return;
+
+ d = pool_elt_at_index (mm->domains, domain_index);
+ if (!d || !d->rules)
+ {
+ return;
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ for (i = 0; i < (0x1 << d->psid_length); i++)
+ {
+ dst = d->rules[i];
+ if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0)
+ {
+ continue;
+ }
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MAP_RULE_DETAILS);
+ rmp->psid = htons (i);
+ clib_memcpy (rmp->ip6_dst, &dst, sizeof (rmp->ip6_dst));
+ rmp->context = mp->context;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+ }
+}
+
+static void
+vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp)
+{
+ vl_api_map_summary_stats_reply_t *rmp;
+ vlib_combined_counter_main_t *cm;
+ vlib_counter_t v;
+ int i, which;
+ u64 total_pkts[VLIB_N_RX_TX];
+ u64 total_bytes[VLIB_N_RX_TX];
+ map_main_t *mm = &map_main;
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MAP_SUMMARY_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = 0;
+
+ if (pool_elts (mm->domains) == 0)
+ {
+ rmp->retval = -1;
+ goto out;
+ }
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ map_domain_counter_lock (mm);
+ vec_foreach (cm, mm->domain_counters)
+ {
+ which = cm - mm->domain_counters;
+
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
+ {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+
+ map_domain_counter_unlock (mm);
+
+ /* Note: in network byte order! */
+ rmp->total_pkts[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_RX]);
+ rmp->total_bytes[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_RX]);
+ rmp->total_pkts[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_TX]);
+ rmp->total_bytes[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_TX]);
+ rmp->total_bindings = clib_host_to_net_u64 (pool_elts (mm->domains));
+ rmp->total_ip4_fragments = 0; // Not yet implemented. Should be a simple counter.
+ rmp->total_security_check[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (map_error_counter_get
+ (ip4_map_node.index, MAP_ERROR_ENCAP_SEC_CHECK));
+ rmp->total_security_check[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (map_error_counter_get
+ (ip4_map_node.index, MAP_ERROR_DECAP_SEC_CHECK));
+
+out:
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_map;
+#undef _
+}
+
+static clib_error_t *
+map_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (map_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map_doc.md b/src/vnet/map/map_doc.md
new file mode 100644
index 00000000..17f3c511
--- /dev/null
+++ b/src/vnet/map/map_doc.md
@@ -0,0 +1,69 @@
+# VPP MAP and Lw4o6 implementation {#map_doc}
+
+This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations.
+Everything that is not directly obvious should come here.
+
+
+
+## MAP-E Virtual Reassembly
+
+The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments.
+
+Fragment caching and handling is not always necessary. It is performed when:
+* An IPv4 fragment is received and the destination IPv4 address is shared.
+* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on.
+* An IPv6 fragment is received.
+
+There are 3 dedicated nodes:
+* ip4-map-reass
+* ip6-map-ip4-reass
+* ip6-map-ip6-reass
+
+ip4-map sends all fragments to ip4-map-reass.
+ip6-map sends all inner-fragments to ip6-map-ip4-reass.
+ip6-map sends all outer-fragments to ip6-map-ip6-reass.
+
+IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes.
+
+An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received.
+
+#### Virtual Reassembly configuration
+
+IPv4 and IPv6 virtual reassembly support the following configuration:
+ map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]
+
+lifetime:
+ The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases.
+
+buffers:
+ The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool.
+
+pool-size:
+ The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total.
+
+ht-ratio:
+ The amount of buckets in the hash-table is pool-size * ht-ratio.
+
+
+Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost.
+
+
+##### Additional considerations
+
+Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart.
+
+Let:
+R be the packet rate at which fragments are received.
+F be the number of fragments per packet.
+
+Assuming the first fragment is always received last. We should have:
+buffers > lifetime * R / F * (F - 1)
+pool-size > lifetime * R/F
+
+This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'.
+
+But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments.
+
+If you want to do that, be prepared to configure a lot of fragments.
+
+
diff --git a/src/vnet/map/map_dpo.c b/src/vnet/map/map_dpo.c
new file mode 100644
index 00000000..430c1fbf
--- /dev/null
+++ b/src/vnet/map/map_dpo.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/map/map_dpo.h>
+
+/**
+ * The register MAP DPO type
+ */
+dpo_type_t map_dpo_type;
+dpo_type_t map_t_dpo_type;
+
+void
+map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ dpo_set(dpo,
+ map_dpo_type,
+ dproto,
+ domain_index);
+}
+
+void
+map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ dpo_set(dpo,
+ map_t_dpo_type,
+ dproto,
+ domain_index);
+}
+
+
+u8*
+format_map_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+
+ return (format(s, "map: domain:%d", index));
+}
+
+u8*
+format_map_t_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+
+ return (format(s, "map-t: domain:%d", index));
+}
+
+
+static void
+map_dpo_lock (dpo_id_t *dpo)
+{
+}
+
+static void
+map_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+const static dpo_vft_t md_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_dpo,
+};
+
+const static char* const map_ip4_nodes[] =
+{
+ "ip4-map",
+ NULL,
+};
+const static char* const map_ip6_nodes[] =
+{
+ "ip6-map",
+ NULL,
+};
+
+const static char* const * const map_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_ip4_nodes,
+ [DPO_PROTO_IP6] = map_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static dpo_vft_t md_t_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_t_dpo,
+};
+
+const static char* const map_t_ip4_nodes[] =
+{
+ "ip4-map-t",
+ NULL,
+};
+const static char* const map_t_ip6_nodes[] =
+{
+ "ip6-map-t",
+ NULL,
+};
+
+const static char* const * const map_t_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_t_ip4_nodes,
+ [DPO_PROTO_IP6] = map_t_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+map_dpo_module_init (void)
+{
+ map_dpo_type = dpo_register_new_type(&md_vft, map_nodes);
+ map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes);
+}
diff --git a/src/vnet/map/map_dpo.h b/src/vnet/map/map_dpo.h
new file mode 100644
index 00000000..63bf4787
--- /dev/null
+++ b/src/vnet/map/map_dpo.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MAP_DPO_H__
+#define __MAP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of a MAP DPO
+ */
+
+extern void map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+extern void map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+
+extern u8* format_map_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern dpo_type_t map_dpo_type;
+extern dpo_type_t map_t_dpo_type;
+
+extern void map_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/map/test.c b/src/vnet/map/test.c
new file mode 100644
index 00000000..f3c893a7
--- /dev/null
+++ b/src/vnet/map/test.c
@@ -0,0 +1,205 @@
+/*
+ * test.c : MAP unit tests
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include "map.h"
+
+static map_domain_t *
+get_domain(ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
+ ip6_address_t * ip6_src, u8 ip6_src_len,
+ u8 ea_bits_len, u8 psid_offset,
+ u8 psid_length, u16 mtu, u8 flags)
+{
+ map_domain_t * d = malloc(sizeof(*d));
+ u8 suffix_len;
+
+ /* EA bits must be within the first 64 bits */
+ if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64)
+ return NULL;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip6_src = *ip6_src;
+ d->ip6_src_len = ip6_src_len;
+ d->ea_bits_len = ea_bits_len;
+ d->psid_offset = psid_offset;
+ d->psid_length = psid_length;
+ d->mtu = mtu;
+ d->flags = flags;
+
+ /* How many, and which bits to grab from the IPv4 DA */
+ if (ip4_prefix_len + ea_bits_len < 32)
+ {
+ d->flags |= MAP_DOMAIN_PREFIX;
+ d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len;
+ suffix_len = ea_bits_len;
+ }
+ else
+ {
+ d->suffix_shift = 0;
+ suffix_len = 32 - ip4_prefix_len;
+ }
+ d->suffix_mask = (1 << suffix_len) - 1;
+
+ d->psid_shift = 16 - psid_length - psid_offset;
+ d->psid_mask = (1 << d->psid_length) - 1;
+
+ if (ip6_prefix_len + suffix_len + d->psid_length > 64)
+ return NULL;
+
+ d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
+
+ return d;
+}
+
+
+/*
+ * VPP-340:
+ * map_add_domain ip4-pfx 20.0.0.0/8 ip6-pfx 2001:db8::/40 ip6-src 2001:db8:ffff::/96 ea-bits-len 24 psid-offset 0 psid-len 0 map-t
+ * IPv4 src = 100.0.0.1
+ * IPv4 dst = 20.169.201.219
+ * UDP dest port = 1232
+ * IPv6 src = 2001:db8:ffff::6400:1
+ * IPv6 dst = a9c9:dfb8::14a9:c9db:0
+ * a9c9:dfb8::14a9:c9db:0 != 2001:db8:a9:c9db:0:14a9:c9db:0
+ */
+static void
+test_map_t_destaddr (void)
+{
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+
+ ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000);
+ ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000);
+ ip6_prefix.as_u64[1] = 0;
+ ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000);
+ map_domain_t * d = get_domain (&ip4_prefix, 8, &ip6_prefix, 40, &ip6_src, 96, 24, 0, 0, 0, MAP_DOMAIN_TRANSLATION);
+
+ ip6_address_t dst6;
+
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db800a9c9db);
+ assert(dst6.as_u64[1] == 0x000014a9c9db0000);
+}
+
+/*
+ * VPP-228
+ * ip4-pfx 20.0.0.0/8
+ * ip6-pfx 2001:db8::/<n>
+ * ip6-src 2001:db8:ffff::1
+ * ea-bits-len 16 psid-offset 6 psid-len 8
+ * 20.169.201.219 port 1232
+ */
+static void
+test_map_eabits (void)
+{
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ ip6_address_t dst6;
+
+ ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000);
+ ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000);
+ ip6_prefix.as_u64[1] = 0;
+ ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000);
+ ip6_src.as_u64[1] = clib_host_to_net_u64(0x0000000000000001);
+ map_domain_t * d = get_domain (&ip4_prefix, 16, &ip6_prefix, 48, &ip6_src,
+ 128, 16, 6, 8, 0, 0);
+ assert(!d);
+
+ //20.0.0.0/8 2001:db8::/32 4 2001:db8:a000::14a0:0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 4, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a0000000);
+ assert(dst6.as_u64[1] == 0x000014a000000000);
+
+ //20.0.0.0/8 2001:db8::/32 8 2001:db8:a900::14a9:0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 8, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9000000);
+ assert(dst6.as_u64[1] == 0x000014a900000000);
+
+ //20.0.0.0/8 2001:db8::/32 10 2001:db8:a9c0::14a9:c000:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 10, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c00000);
+ assert(dst6.as_u64[1] == 0x000014a9c0000000);
+
+ //20.0.0.0/8 2001:db8::/32 16 2001:db8:a9c9::14a9:c900:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 16, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c90000);
+ assert(dst6.as_u64[1] == 0x000014a9c9000000);
+
+ //20.0.0.0/8 2001:db8::/32 20 2001:db8:a9c9:d000:0:14a9:c9d0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 20, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c9d000);
+ assert(dst6.as_u64[1] == 0x000014a9c9d00000);
+
+ //20.0.0.0/8 2001:db8::/32 23 2001:db8:a9c9:da00:0:14a9:c9da:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 23, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c9da00);
+ assert(dst6.as_u64[1] == 0x000014a9c9da0000);
+
+ //20.169.201.0/24 2001:db8::/32 7 2001:db8:da00::14a9:c9da:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 7, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a8000000);
+ assert(dst6.as_u64[1] == 0x000014a800000000);
+}
+
+#define foreach_test_case \
+ _(map_t_destaddr) \
+ _(map_eabits)
+
+static void
+run_tests (void)
+{
+#define _(_test_name) \
+ test_ ## _test_name ();
+
+ foreach_test_case
+#undef _
+}
+
+int main()
+{
+ run_tests ();
+ return 0;
+}
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
new file mode 100644
index 00000000..b2482580
--- /dev/null
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/ip4_mfib.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+
+static const mfib_prefix_t ip4_specials[] = {
+ {
+ /* (*,*)/0 */
+ .fp_src_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_grp_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+};
+
+static u32
+ip4_create_mfib_with_table_id (u32 table_id,
+ mfib_source_t src)
+{
+ mfib_table_t *mfib_table;
+
+ pool_get_aligned(ip4_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES);
+ memset(mfib_table, 0, sizeof(*mfib_table));
+
+ mfib_table->mft_proto = FIB_PROTOCOL_IP4;
+ mfib_table->mft_index =
+ mfib_table->v4.index =
+ (mfib_table - ip4_main.mfibs);
+
+ hash_set (ip4_main.mfib_index_by_table_id,
+ table_id,
+ mfib_table->mft_index);
+
+ mfib_table->mft_table_id =
+ mfib_table->v4.table_id =
+ table_id;
+
+ mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4, src);
+
+ /*
+ * add the special entries into the new FIB
+ */
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ mfib_prefix_t prefix = ip4_specials[ii];
+
+ prefix.fp_src_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32);
+ prefix.fp_grp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32);
+
+ mfib_table_entry_update(mfib_table->mft_index,
+ &prefix,
+ MFIB_SOURCE_DEFAULT_ROUTE,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_DROP);
+ }
+
+ return (mfib_table->mft_index);
+}
+
+void
+ip4_mfib_table_destroy (ip4_mfib_t *mfib)
+{
+ mfib_table_t *mfib_table = (mfib_table_t*)mfib;
+ int ii;
+
+ /*
+ * remove all the specials we added when the table was created.
+ */
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ fib_node_index_t mfei;
+ mfib_prefix_t prefix = ip4_specials[ii];
+
+ prefix.fp_src_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32);
+ prefix.fp_grp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32);
+
+ mfei = mfib_table_lookup(mfib_table->mft_index, &prefix);
+ mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE);
+ }
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == mfib_table->mft_total_route_counts);
+ ASSERT(~0 != mfib_table->mft_table_id);
+
+ hash_unset (ip4_main.mfib_index_by_table_id, mfib_table->mft_table_id);
+ pool_put(ip4_main.mfibs, mfib_table);
+}
+
+u32
+ip4_mfib_table_find_or_create_and_lock (u32 table_id,
+ mfib_source_t src)
+{
+ u32 index;
+
+ index = ip4_mfib_index_from_table_id(table_id);
+ if (~0 == index)
+ return ip4_create_mfib_with_table_id(table_id, src);
+ mfib_table_lock(index, FIB_PROTOCOL_IP4, src);
+
+ return (index);
+}
+
+u32
+ip4_mfib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip4_main.mfib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip4_main.mfib_index_by_sw_if_index[sw_if_index]);
+}
+
+#define IPV4_MFIB_GRP_LEN(_len)\
+ (_len > 32 ? 32 : _len)
+
+#define IP4_MFIB_MK_KEY(_grp, _src, _len, _key) \
+{ \
+ _key = ((u64)(_grp->data_u32 & \
+ ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \
+ _key |= _src->data_u32; \
+}
+#define IP4_MFIB_MK_GRP_KEY(_grp, _len, _key) \
+{ \
+ _key = ((u64)(_grp->data_u32 & \
+ ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \
+}
+
+/*
+ * ip4_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_mfib_table_lookup_exact_match (const ip4_mfib_t *mfib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len)
+{
+ uword * hash, * result;
+ u64 key;
+
+ hash = mfib->fib_entry_by_dst_address[len];
+ IP4_MFIB_MK_KEY(grp, src, len, key);
+
+ result = hash_get(hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_mfib_table_lookup (const ip4_mfib_t *mfib,
+ const ip4_address_t *src,
+ const ip4_address_t *grp,
+ u32 len)
+{
+ uword * hash, * result;
+ i32 mask_len;
+ u64 key;
+
+ mask_len = len;
+
+ if (PREDICT_TRUE(64 == mask_len))
+ {
+ hash = mfib->fib_entry_by_dst_address[mask_len];
+ IP4_MFIB_MK_KEY(grp, src, mask_len, key);
+
+ result = hash_get (hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ }
+
+ for (mask_len = 32; mask_len >= 0; mask_len--)
+ {
+ hash = mfib->fib_entry_by_dst_address[mask_len];
+ IP4_MFIB_MK_GRP_KEY(grp, mask_len, key);
+
+ result = hash_get (hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+void
+ip4_mfib_table_entry_insert (ip4_mfib_t *mfib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ uword * hash, * result;
+ u64 key;
+
+ IP4_MFIB_MK_KEY(grp, src, len, key);
+ hash = mfib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result) {
+ /*
+ * adding a new entry
+ */
+ if (NULL == hash) {
+ hash = hash_create (32 /* elts */, sizeof (uword));
+ hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
+ }
+ hash = hash_set(hash, key, fib_entry_index);
+ mfib->fib_entry_by_dst_address[len] = hash;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+
+void
+ip4_mfib_table_entry_remove (ip4_mfib_t *mfib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len)
+{
+ uword * hash, * result;
+ u64 key;
+
+ IP4_MFIB_MK_KEY(grp, src, len, key);
+ hash = mfib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result)
+ {
+ /*
+ * removing a non-existant entry. i'll allow it.
+ */
+ }
+ else
+ {
+ hash_unset(hash, key);
+ }
+
+ mfib->fib_entry_by_dst_address[len] = hash;
+}
+
+void
+ip4_mfib_table_walk (ip4_mfib_t *mfib,
+ mfib_table_walk_fn_t fn,
+ void *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = mfib->fib_entry_by_dst_address[i];
+
+ if (NULL != hash)
+ {
+ hash_pair_t * p;
+
+ hash_foreach_pair (p, hash,
+ ({
+ fn(p->value[0], ctx);
+ }));
+ }
+ }
+}
+
+static void
+ip4_mfib_table_show_all (ip4_mfib_t *mfib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t *mfib_entry_indicies;
+ fib_node_index_t *mfib_entry_index;
+ int i;
+
+ mfib_entry_indicies = NULL;
+
+ for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = mfib->fib_entry_by_dst_address[i];
+
+ if (NULL != hash)
+ {
+ hash_pair_t * p;
+
+ hash_foreach_pair (p, hash,
+ ({
+ vec_add1(mfib_entry_indicies, p->value[0]);
+ }));
+ }
+ }
+
+ vec_sort_with_function(mfib_entry_indicies, mfib_entry_cmp_for_sort);
+
+ vec_foreach(mfib_entry_index, mfib_entry_indicies)
+ {
+ vlib_cli_output(vm, "%U",
+ format_mfib_entry,
+ *mfib_entry_index,
+ MFIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(mfib_entry_indicies);
+}
+
+static void
+ip4_mfib_table_show_one (ip4_mfib_t *mfib,
+ vlib_main_t * vm,
+ ip4_address_t *src,
+ ip4_address_t *grp,
+ u32 mask_len)
+{
+ vlib_cli_output(vm, "%U",
+ format_mfib_entry,
+ ip4_mfib_table_lookup(mfib, src, grp, mask_len),
+ MFIB_ENTRY_FORMAT_DETAIL);
+}
+
+static clib_error_t *
+ip4_show_mfib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip4_main_t * im4 = &ip4_main;
+ mfib_table_t *mfib_table;
+ int verbose, matching;
+ ip4_address_t grp, src = {{0}};
+ u32 mask = 32;
+ int i, table_id = -1, fib_index = ~0;
+
+ verbose = 1;
+ matching = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "%U %U",
+ unformat_ip4_address, &src,
+ unformat_ip4_address, &grp))
+ {
+ matching = 1;
+ mask = 64;
+ }
+ else if (unformat (input, "%U/%d", unformat_ip4_address, &grp, &mask))
+ {
+ memset(&src, 0, sizeof(src));
+ matching = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip4_address, &grp))
+ {
+ memset(&src, 0, sizeof(src));
+ matching = 1;
+ mask = 32;
+ }
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (mfib_table, im4->mfibs,
+ ({
+ ip4_mfib_t *mfib = &mfib_table->v4;
+
+ if (table_id >= 0 && table_id != (int)mfib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)mfib->index)
+ continue;
+
+ vlib_cli_output (vm, "%U, fib_index %d",
+ format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP4,
+ mfib->index);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+ for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = mfib->fib_entry_by_dst_address[i];
+ uword n_elts = hash_elts (hash);
+ if (n_elts > 0)
+ vlib_cli_output (vm, "%20d%16d", i, n_elts);
+ }
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip4_mfib_table_show_all(mfib, vm);
+ }
+ else
+ {
+ ip4_mfib_table_show_one(mfib, vm, &src, &grp, mask);
+ }
+ }));
+
+ return 0;
+}
+
+/*?
+ * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and
+ * the route entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * a single table or summary mode.
+ *
+ * @cliexpar
+ * Example of how to display all the IPv4 Multicast FIB tables:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0
+ * (*, 0.0.0.0/0): flags:D,
+ * Interfaces:
+ * multicast-ip4-chain
+ * [@1]: dpo-drop ip4
+ * (*, 232.1.1.1/32):
+ * Interfaces:
+ * test-eth1: Forward,
+ * test-eth2: Forward,
+ * test-eth0: Accept,
+ * multicast-ip4-chain
+ * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]]
+ * [0] [@1]: ipv4-mcast: test-eth1: IP4: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00
+ * [1] [@1]: ipv4-mcast: test-eth2: IP4: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00
+ *
+ * @cliexend
+ * Example of how to display a summary of all IPv4 FIB tables:
+ * @cliexstart{show ip fib summary}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 32 4
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 24 2
+ * 32 4
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_show_mfib_command, static) = {
+ .path = "show ip mfib",
+ .short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<grp-addr>[/<mask>]] [<grp-addr>] [<src-addr> <grp-addr>]",
+ .function = ip4_show_mfib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h
new file mode 100644
index 00000000..e31fb744
--- /dev/null
+++ b/src/vnet/mfib/ip4_mfib.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 Multicast-FIB
+ *
+ * FIXME
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_MFIB_H__
+#define __IP4_MFIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/mfib/mfib_table.h>
+
+extern fib_node_index_t ip4_mfib_table_lookup(const ip4_mfib_t *fib,
+ const ip4_address_t *src,
+ const ip4_address_t *grp,
+ u32 len);
+extern fib_node_index_t ip4_mfib_table_lookup_exact_match(const ip4_mfib_t *fib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len);
+
+extern void ip4_mfib_table_entry_remove(ip4_mfib_t *fib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len);
+
+extern void ip4_mfib_table_entry_insert(ip4_mfib_t *fib,
+ const ip4_address_t *grp,
+ const ip4_address_t *src,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_mfib_table_destroy(ip4_mfib_t *fib);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip4_mfib_t *
+ip4_mfib_get (u32 index)
+{
+ return (&(pool_elt_at_index(ip4_main.mfibs, index)->v4));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id,
+ mfib_source_t src);
+extern u32 ip4_mfib_table_create_and_lock(mfib_source_t src);
+
+static inline
+u32 ip4_mfib_index_from_table_id (u32 table_id)
+{
+ ip4_main_t * im = &ip4_main;
+ uword * p;
+
+ p = hash_get (im->mfib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip4_mfib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+/**
+ * @brief Walk the IP4 mfib table.
+ *
+ * @param mfib the table to walk
+ * @param fn The function to invoke on each entry visited
+ * @param ctx A context passed in the visit function
+ */
+extern void ip4_mfib_table_walk (ip4_mfib_t *mfib,
+ mfib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
new file mode 100644
index 00000000..e4861330
--- /dev/null
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -0,0 +1,699 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/ip6_mfib.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/fib/ip6_fib.h>
+
+/**
+ * The number of bytes in an address/ask key in the radix tree
+ * First byte is the length in bytes.
+ */
+#define IP6_MFIB_KEY_LEN 33
+
+/**
+ * Key and mask for radix
+ */
+typedef struct ip6_mfib_key_t_
+{
+ u8 key[IP6_MFIB_KEY_LEN];
+ u8 mask[IP6_MFIB_KEY_LEN];
+} ip6_mfib_key_t;
+
+/**
+ * An object that is inserted into the radix tree.
+ * Since it's in the tree and has pointers, it cannot realloc and so cannot
+ * come from a vlib pool.
+ */
+typedef struct ip6_mfib_node_t_
+{
+ struct radix_node i6mn_nodes[2];
+ ip6_mfib_key_t i6mn_key;
+ index_t i6mn_entry;
+} ip6_mfib_node_t;
+
+static const mfib_prefix_t all_zeros = {
+ /* (*,*) */
+ .fp_src_addr = {
+ .ip6.as_u64 = {0, 0},
+ },
+ .fp_grp_addr = {
+ .ip6.as_u64 = {0, 0},
+ },
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP6,
+};
+
+typedef enum ip6_mfib_special_type_t_ {
+ IP6_MFIB_SPECIAL_TYPE_NONE,
+ IP6_MFIB_SPECIAL_TYPE_SOLICITED,
+} ip6_mfib_special_type_t;
+
+typedef struct ip6_mfib_special_t_ {
+ /**
+ * @brief solicited or not
+ */
+ ip6_mfib_special_type_t ims_type;
+
+ /**
+ * @brief the Prefix length
+ */
+ u8 ims_len;
+
+ /**
+ * @brief The last byte of the mcast address
+ */
+ u8 ims_byte;
+ /**
+ * @brief The scope of the address
+ */
+ u8 ims_scope;
+} ip6_mfib_special_t;
+
+static const ip6_mfib_special_t ip6_mfib_specials[] =
+{
+ {
+ /*
+ * Add ff02::1:ff00:0/104 via local route for all tables.
+ * This is required for neighbor discovery to work.
+ */
+ .ims_type = IP6_MFIB_SPECIAL_TYPE_SOLICITED,
+ .ims_len = 104,
+ },
+ {
+ /*
+ * all-routers multicast address
+ */
+ .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+ .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+ .ims_byte = IP6_MULTICAST_GROUP_ID_all_routers,
+ .ims_len = 128,
+ },
+ {
+ /*
+ * all-nodes multicast address
+ */
+ .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+ .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+ .ims_byte = IP6_MULTICAST_GROUP_ID_all_hosts,
+ .ims_len = 128,
+ },
+ {
+ /*
+ * Add all-mldv2 multicast address via local route for all tables
+ */
+ .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+ .ims_len = 128,
+ .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+ .ims_byte = IP6_MULTICAST_GROUP_ID_mldv2_routers,
+ }
+};
+
+#define FOR_EACH_IP6_SPECIAL(_pfx, _body) \
+{ \
+ const ip6_mfib_special_t *_spec; \
+ u8 _ii; \
+ for (_ii = 0; \
+ _ii < ARRAY_LEN(ip6_mfib_specials); \
+ _ii++) \
+ { \
+ _spec = &ip6_mfib_specials[_ii]; \
+ if (IP6_MFIB_SPECIAL_TYPE_SOLICITED == _spec->ims_type) \
+ { \
+ ip6_set_solicited_node_multicast_address( \
+ &(_pfx)->fp_grp_addr.ip6, 0); \
+ } \
+ else \
+ { \
+ ip6_set_reserved_multicast_address ( \
+ &(_pfx)->fp_grp_addr.ip6, \
+ _spec->ims_scope, \
+ _spec->ims_byte); \
+ } \
+ (_pfx)->fp_len = _spec->ims_len; \
+ do { _body; } while (0); \
+ } \
+}
+
+
+static u32
+ip6_create_mfib_with_table_id (u32 table_id,
+ mfib_source_t src)
+{
+ mfib_table_t *mfib_table;
+ mfib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ const fib_route_path_t path_for_us = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+
+ pool_get_aligned(ip6_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES);
+ memset(mfib_table, 0, sizeof(*mfib_table));
+
+ mfib_table->mft_proto = FIB_PROTOCOL_IP6;
+ mfib_table->mft_index =
+ mfib_table->v6.index =
+ (mfib_table - ip6_main.mfibs);
+
+ hash_set (ip6_main.mfib_index_by_table_id,
+ table_id,
+ mfib_table->mft_index);
+
+ mfib_table->mft_table_id =
+ mfib_table->v6.table_id =
+ table_id;
+
+ mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src);
+
+ mfib_table->v6.rhead =
+ clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead),
+ CLIB_CACHE_LINE_BYTES);
+ rn_inithead0(mfib_table->v6.rhead, 8);
+
+ /*
+ * add the special entries into the new FIB
+ */
+ mfib_table_entry_update(mfib_table->mft_index,
+ &all_zeros,
+ MFIB_SOURCE_DEFAULT_ROUTE,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_DROP);
+
+ /*
+ * Add each of the specials
+ */
+ FOR_EACH_IP6_SPECIAL(&pfx,
+ ({
+ mfib_table_entry_path_update(mfib_table->mft_index,
+ &pfx,
+ MFIB_SOURCE_SPECIAL,
+ &path_for_us,
+ MFIB_ITF_FLAG_FORWARD);
+ }));
+
+ return (mfib_table->mft_index);
+}
+
+void
+ip6_mfib_table_destroy (ip6_mfib_t *mfib)
+{
+ mfib_table_t *mfib_table = (mfib_table_t*)mfib;
+ fib_node_index_t mfei;
+ mfib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ const fib_route_path_t path_for_us = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+
+ /*
+ * remove all the specials we added when the table was created.
+ */
+ FOR_EACH_IP6_SPECIAL(&pfx,
+ {
+ mfib_table_entry_path_remove(mfib_table->mft_index,
+ &pfx,
+ MFIB_SOURCE_SPECIAL,
+ &path_for_us);
+ });
+
+ mfei = mfib_table_lookup_exact_match(mfib_table->mft_index, &all_zeros);
+ mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE);
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == mfib_table->mft_total_route_counts);
+ ASSERT(~0 != mfib_table->mft_table_id);
+
+ hash_unset (ip6_main.mfib_index_by_table_id, mfib_table->mft_table_id);
+ clib_mem_free(mfib_table->v6.rhead);
+ pool_put(ip6_main.mfibs, mfib_table);
+}
+
+void
+ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ };
+ mfib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ u32 mfib_index;
+
+ vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
+ mfib_index = ip6_mfib_table_get_index_for_sw_if_index(sw_if_index);
+
+ if (is_enable)
+ {
+ FOR_EACH_IP6_SPECIAL(&pfx,
+ {
+ mfib_table_entry_path_update(mfib_index,
+ &pfx,
+ MFIB_SOURCE_SPECIAL,
+ &path,
+ MFIB_ITF_FLAG_ACCEPT);
+ });
+ }
+ else
+ {
+ FOR_EACH_IP6_SPECIAL(&pfx,
+ {
+ mfib_table_entry_path_remove(mfib_index,
+ &pfx,
+ MFIB_SOURCE_SPECIAL,
+ &path);
+ });
+ }
+}
+
+u32
+ip6_mfib_table_find_or_create_and_lock (u32 table_id,
+ mfib_source_t src)
+{
+ u32 index;
+
+ index = ip6_mfib_index_from_table_id(table_id);
+ if (~0 == index)
+ return ip6_create_mfib_with_table_id(table_id, src);
+ mfib_table_lock(index, FIB_PROTOCOL_IP6, src);
+
+ return (index);
+}
+
+u32
+ip6_mfib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip6_main.mfib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip6_main.mfib_index_by_sw_if_index[sw_if_index]);
+}
+
+#define IP6_MFIB_MK_KEY(_grp, _src, _key) \
+{ \
+ (_key)->key[0] = 33; \
+ memcpy((_key)->key+1, _grp, 16); \
+ memcpy((_key)->key+17, _src, 16); \
+}
+
+#define IP6_MFIB_MK_KEY_MASK(_grp, _src, _len, _key) \
+{ \
+ IP6_MFIB_MK_KEY(_grp, _src, _key); \
+ \
+ (_key)->mask[0] = 33; \
+ if (_len <= 128) \
+ { \
+ memcpy((_key)->mask+1, &ip6_main.fib_masks[_len], 16); \
+ memset((_key)->mask+17, 0, 16); \
+ } \
+ else \
+ { \
+ ASSERT(_len == 256); \
+ memcpy((_key)->mask+1, &ip6_main.fib_masks[128], 16); \
+ memcpy((_key)->mask+17, &ip6_main.fib_masks[128], 16); \
+ } \
+}
+
+/*
+ * ip6_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip6_mfib_table_lookup_exact_match (const ip6_mfib_t *mfib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len)
+{
+ ip6_mfib_node_t *i6mn;
+ ip6_mfib_key_t key;
+
+ IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+ i6mn = (ip6_mfib_node_t*) rn_lookup(key.key, key.mask,
+ (struct radix_node_head *)mfib->rhead);
+
+ if (NULL == i6mn)
+ {
+ return (INDEX_INVALID);
+ }
+
+ return (i6mn->i6mn_entry);
+}
+
+/*
+ * ip6_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip6_mfib_table_lookup (const ip6_mfib_t *mfib,
+ const ip6_address_t *src,
+ const ip6_address_t *grp,
+ u32 len)
+{
+ ip6_mfib_node_t *i6mn;
+ ip6_mfib_key_t key;
+
+ IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+ i6mn = (ip6_mfib_node_t*) rn_search_m(key.key,
+ mfib->rhead->rnh_treetop,
+ key.mask);
+
+ ASSERT(NULL != i6mn);
+
+ return (i6mn->i6mn_entry);
+}
+
+/*
+ * ip6_fib_table_lookup
+ *
+ * Longest prefix match no mask
+ */
+fib_node_index_t
+ip6_mfib_table_lookup2 (const ip6_mfib_t *mfib,
+ const ip6_address_t *src,
+ const ip6_address_t *grp)
+{
+ ip6_mfib_node_t *i6mn;
+ ip6_mfib_key_t key;
+
+ IP6_MFIB_MK_KEY(grp, src, &key);
+
+ i6mn = (ip6_mfib_node_t*) rn_match(key.key,
+ (struct radix_node_head *)mfib->rhead); // const cast
+
+ ASSERT(NULL != i6mn);
+
+ return (i6mn->i6mn_entry);
+}
+
+void
+ip6_mfib_table_entry_insert (ip6_mfib_t *mfib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len,
+ fib_node_index_t mfib_entry_index)
+{
+ ip6_mfib_node_t *i6mn = clib_mem_alloc(sizeof(*i6mn));
+
+ memset(i6mn->i6mn_nodes, 0, sizeof(i6mn->i6mn_nodes));
+
+ IP6_MFIB_MK_KEY_MASK(grp, src, len, &i6mn->i6mn_key);
+ i6mn->i6mn_entry = mfib_entry_index;
+
+ if (NULL == rn_addroute(i6mn->i6mn_key.key,
+ i6mn->i6mn_key.mask,
+ mfib->rhead,
+ i6mn->i6mn_nodes))
+ {
+ ASSERT(0);
+ }
+}
+
+void
+ip6_mfib_table_entry_remove (ip6_mfib_t *mfib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len)
+{
+ ip6_mfib_node_t *i6mn;
+ ip6_mfib_key_t key;
+
+ IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+ i6mn = (ip6_mfib_node_t*) rn_delete(key.key, key.mask, mfib->rhead);
+
+ clib_mem_free(i6mn);
+}
+
+static clib_error_t *
+ip6_mfib_module_init (vlib_main_t * vm)
+{
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(ip6_mfib_module_init);
+
+static void
+ip6_mfib_table_show_one (ip6_mfib_t *mfib,
+ vlib_main_t * vm,
+ ip6_address_t *src,
+ ip6_address_t *grp,
+ u32 mask_len)
+{
+ vlib_cli_output(vm, "%U",
+ format_mfib_entry,
+ ip6_mfib_table_lookup(mfib, src, grp, mask_len),
+ MFIB_ENTRY_FORMAT_DETAIL);
+}
+
+typedef struct ip6_mfib_show_ctx_t_ {
+ fib_node_index_t *entries;
+} ip6_mfib_show_ctx_t;
+
+
+static int
+ip6_mfib_table_collect_entries (fib_node_index_t mfei, void *arg)
+{
+ ip6_mfib_show_ctx_t *ctx = arg;
+
+ vec_add1(ctx->entries, mfei);
+
+ return (0);
+}
+
+static void
+ip6_mfib_table_show_all (ip6_mfib_t *mfib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t *mfib_entry_index;
+ ip6_mfib_show_ctx_t ctx = {
+ .entries = NULL,
+ };
+
+ ip6_mfib_table_walk(mfib,
+ ip6_mfib_table_collect_entries,
+ &ctx);
+
+ vec_sort_with_function(ctx.entries, mfib_entry_cmp_for_sort);
+
+ vec_foreach(mfib_entry_index, ctx.entries)
+ {
+ vlib_cli_output(vm, "%U",
+ format_mfib_entry,
+ *mfib_entry_index,
+ MFIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(ctx.entries);
+}
+
+typedef struct ip6_mfib_radix_walk_ctx_t_
+{
+ mfib_table_walk_fn_t user_fn;
+ void *user_ctx;
+} ip6_mfib_radix_walk_ctx_t;
+
+static int
+ip6_mfib_table_radix_walk (struct radix_node *rn,
+ void *arg)
+{
+ ip6_mfib_radix_walk_ctx_t *ctx = arg;
+ ip6_mfib_node_t *i6mn;
+
+ i6mn = (ip6_mfib_node_t*) rn;
+
+ ctx->user_fn(i6mn->i6mn_entry, ctx->user_ctx);
+
+ return (0);
+}
+
+void
+ip6_mfib_table_walk (ip6_mfib_t *mfib,
+ mfib_table_walk_fn_t fn,
+ void *ctx)
+{
+ ip6_mfib_radix_walk_ctx_t rn_ctx = {
+ .user_fn = fn,
+ .user_ctx = ctx,
+ };
+
+ rn_walktree(mfib->rhead,
+ ip6_mfib_table_radix_walk,
+ &rn_ctx);
+}
+
+static clib_error_t *
+ip6_show_mfib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_main_t * im4 = &ip6_main;
+ mfib_table_t *mfib_table;
+ int verbose, matching;
+ ip6_address_t grp, src = {{0}};
+ u32 mask = 32;
+ int table_id = -1, fib_index = ~0;
+
+ verbose = 1;
+ matching = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "%U %U",
+ unformat_ip6_address, &src,
+ unformat_ip6_address, &grp))
+ {
+ matching = 1;
+ mask = 256;
+ }
+ else if (unformat (input, "%U/%d", unformat_ip6_address, &grp, &mask))
+ {
+ memset(&src, 0, sizeof(src));
+ matching = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &grp))
+ {
+ memset(&src, 0, sizeof(src));
+ matching = 1;
+ mask = 128;
+ }
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (mfib_table, im4->mfibs,
+ ({
+ ip6_mfib_t *mfib = &mfib_table->v6;
+
+ if (table_id >= 0 && table_id != (int)mfib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)mfib->index)
+ continue;
+
+ vlib_cli_output (vm, "%U, fib_index %d",
+ format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP6,
+ mfib->index);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ /* vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); */
+ /* for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++) */
+ /* { */
+ /* uword * hash = mfib->fib_entry_by_dst_address[i]; */
+ /* uword n_elts = hash_elts (hash); */
+ /* if (n_elts > 0) */
+ /* vlib_cli_output (vm, "%20d%16d", i, n_elts); */
+ /* } */
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip6_mfib_table_show_all(mfib, vm);
+ }
+ else
+ {
+ ip6_mfib_table_show_one(mfib, vm, &src, &grp, mask);
+ }
+ }));
+
+ return 0;
+}
+
+/*
+ * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and
+ * the route entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * a single table or summary mode.
+ *
+ * @cliexpar
+ * Example of how to display all the IPv4 Multicast FIB tables:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0
+ * (*, 0.0.0.0/0): flags:D,
+ * Interfaces:
+ * multicast-ip6-chain
+ * [@1]: dpo-drop ip6
+ * (*, 232.1.1.1/32):
+ * Interfaces:
+ * test-eth1: Forward,
+ * test-eth2: Forward,
+ * test-eth0: Accept,
+ * multicast-ip6-chain
+ * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]]
+ * [0] [@1]: ipv4-mcast: test-eth1: IP6: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00
+ * [1] [@1]: ipv4-mcast: test-eth2: IP6: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00
+ *
+ * @cliexend
+ * Example of how to display a summary of all IPv4 FIB tables:
+ * @cliexstart{show ip fib summary}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 32 4
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ * Prefix length Count
+ * 0 1
+ * 8 2
+ * 24 2
+ * 32 4
+ * @cliexend
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
+ .path = "show ip6 mfib",
+ .short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<grp-addr>[/<mask>]] [<grp-addr>] [<src-addr> <grp-addr>]",
+ .function = ip6_show_mfib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h
new file mode 100644
index 00000000..ea81b553
--- /dev/null
+++ b/src/vnet/mfib/ip6_mfib.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 Multicast-FIB
+ *
+ * FIXME
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP6_MFIB_H__
+#define __IP6_MFIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/mfib/mfib_table.h>
+
+extern fib_node_index_t ip6_mfib_table_lookup(const ip6_mfib_t *fib,
+ const ip6_address_t *src,
+ const ip6_address_t *grp,
+ u32 len);
+extern fib_node_index_t ip6_mfib_table_lookup_exact_match(const ip6_mfib_t *fib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len);
+
+extern void ip6_mfib_table_entry_remove(ip6_mfib_t *fib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len);
+
+extern void ip6_mfib_table_entry_insert(ip6_mfib_t *fib,
+ const ip6_address_t *grp,
+ const ip6_address_t *src,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip6_mfib_table_destroy(ip6_mfib_t *fib);
+
+/**
+ * @brief
+ * Add/remove the interface from the accepting list of the special MFIB entries
+ */
+extern void ip6_mfib_interface_enable_disable(u32 sw_if_index,
+ int is_enable);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip6_mfib_t *
+ip6_mfib_get (u32 index)
+{
+ return (&(pool_elt_at_index(ip6_main.mfibs, index)->v6));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id,
+ mfib_source_t src);
+extern u32 ip6_mfib_table_create_and_lock(mfib_source_t src);
+
+
+static inline
+u32 ip6_mfib_index_from_table_id (u32 table_id)
+{
+ ip6_main_t * im = &ip6_main;
+ uword * p;
+
+ p = hash_get (im->mfib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip6_mfib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+/**
+ * @brief Data-plane lookup function
+ */
+extern fib_node_index_t ip6_mfib_table_lookup2(const ip6_mfib_t *mfib,
+ const ip6_address_t *src,
+ const ip6_address_t *grp);
+
+/**
+ * @brief Walk the IP6 mfib table.
+ *
+ * @param mfib the table to walk
+ * @param fn The function to invoke on each entry visited
+ * @param ctx A context passed in the visit function
+ */
+extern void ip6_mfib_table_walk (ip6_mfib_t *mfib,
+ mfib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
+
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
new file mode 100644
index 00000000..2302b9a1
--- /dev/null
+++ b/src/vnet/mfib/mfib_entry.c
@@ -0,0 +1,1313 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/fib/fib_path_list.h>
+
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+/**
+ * Debug macro
+ */
+#ifdef MFIB_DEBUG
+#DEFIne MFIB_ENTRY_DBG(_e, _fmt, _args...) \
+{ \
+ u8*__tmp = NULL; \
+ __tmp = format(__tmp, "e:[%d:%U", \
+ mfib_entry_get_index(_e), \
+ format_ip46_address, \
+ &_e->mfe_prefix.fp_grp_addr, \
+ IP46_TYPE_ANY); \
+ __tmp = format(__tmp, "/%d,", \
+ _e->mfe_prefix.fp_len); \
+ __tmp = format(__tmp, "%U]", \
+ mfib_entry_get_index(_e), \
+ format_ip46_address, \
+ &_e->mfe_prefix.fp_src_addr, \
+ IP46_TYPE_ANY); \
+ __tmp = format(__tmp, _fmt, ##_args); \
+ clib_warning("%s", __tmp); \
+ vec_free(__tmp); \
+}
+#else
+#define MFIB_ENTRY_DBG(_e, _fmt, _args...)
+#endif
+
+/**
+ * MFIB extensions to each path
+ */
+typedef struct mfib_path_ext_t_
+{
+ mfib_itf_flags_t mfpe_flags;
+ fib_node_index_t mfpe_path;
+} mfib_path_ext_t;
+
+/**
+ * The source of an MFIB entry
+ */
+typedef struct mfib_entry_src_t_
+{
+ /**
+ * Which source this is
+ */
+ mfib_source_t mfes_src;
+
+ /**
+ * Route flags
+ */
+ mfib_entry_flags_t mfes_flags;
+
+ /**
+ * The path-list of forwarding interfaces
+ */
+ fib_node_index_t mfes_pl;
+
+ /**
+ * RPF-ID
+ */
+ fib_rpf_id_t mfes_rpf_id;
+
+ /**
+ * Hash table of path extensions
+ */
+ mfib_path_ext_t *mfes_exts;
+
+ /**
+ * The hash table of all interfaces.
+ * This is forwarding time information derived from the paths
+ * and their extensions.
+ */
+ mfib_itf_t *mfes_itfs;
+} mfib_entry_src_t;
+
+/**
+ * Pool of path extensions
+ */
+static mfib_path_ext_t *mfib_path_ext_pool;
+
+/**
+ * String names for each source
+ */
+static const char *mfib_source_names[] = MFIB_SOURCE_NAMES;
+
+/*
+ * Pool for all fib_entries
+ */
+mfib_entry_t *mfib_entry_pool;
+
+static fib_node_t *
+mfib_entry_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)mfib_entry_get(index));
+}
+
+static fib_protocol_t
+mfib_entry_get_proto (const mfib_entry_t * mfib_entry)
+{
+ return (mfib_entry->mfe_prefix.fp_proto);
+}
+
+fib_forward_chain_type_t
+mfib_entry_get_default_chain_type (const mfib_entry_t *mfib_entry)
+{
+ switch (mfib_entry->mfe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_MCAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_MCAST_IP6);
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
+ }
+ return (FIB_FORW_CHAIN_TYPE_MCAST_IP4);
+}
+
+static u8 *
+format_mfib_entry_dpo (u8 * s, va_list * args)
+{
+ index_t fei = va_arg(*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*args, u32);
+
+ return (format(s, "%U",
+ format_mfib_entry, fei,
+ MFIB_ENTRY_FORMAT_BRIEF));
+}
+
+static inline mfib_path_ext_t *
+mfib_entry_path_ext_get (index_t mi)
+{
+ return (pool_elt_at_index(mfib_path_ext_pool, mi));
+}
+
+static u8 *
+format_mfib_entry_path_ext (u8 * s, va_list * args)
+{
+ mfib_path_ext_t *path_ext;
+ index_t mpi = va_arg(*args, index_t);
+
+ path_ext = mfib_entry_path_ext_get(mpi);
+ return (format(s, "path:%d flags:%U",
+ path_ext->mfpe_path,
+ format_mfib_itf_flags, path_ext->mfpe_flags));
+}
+
+u8 *
+format_mfib_entry (u8 * s, va_list * args)
+{
+ fib_node_index_t fei, mfi;
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+ u32 sw_if_index;
+ int level;
+
+ fei = va_arg (*args, fib_node_index_t);
+ level = va_arg (*args, int);
+ mfib_entry = mfib_entry_get(fei);
+
+ s = format (s, "%U", format_mfib_prefix, &mfib_entry->mfe_prefix);
+ s = format (s, ": %U", format_mfib_entry_flags, mfib_entry->mfe_flags);
+
+ if (level >= MFIB_ENTRY_FORMAT_DETAIL)
+ {
+ fib_node_index_t path_index, mpi;
+
+ s = format (s, "\n");
+ s = format (s, " fib:%d", mfib_entry->mfe_fib_index);
+ s = format (s, " index:%d", mfib_entry_get_index(mfib_entry));
+ s = format (s, " locks:%d\n", mfib_entry->mfe_node.fn_locks);
+ vec_foreach(msrc, mfib_entry->mfe_srcs)
+ {
+ s = format (s, " src:%s", mfib_source_names[msrc->mfes_src]);
+ s = format (s, ": %U\n", format_mfib_entry_flags, msrc->mfes_flags);
+ if (FIB_NODE_INDEX_INVALID != msrc->mfes_pl)
+ {
+ s = fib_path_list_format(msrc->mfes_pl, s);
+ }
+ s = format (s, " Extensions:\n");
+ hash_foreach(path_index, mpi, msrc->mfes_exts,
+ ({
+ s = format(s, " %U\n", format_mfib_entry_path_ext, mpi);
+ }));
+ s = format (s, " Interface-Forwarding:\n");
+ hash_foreach(sw_if_index, mfi, msrc->mfes_itfs,
+ ({
+ s = format(s, " %U\n", format_mfib_itf, mfi);
+ }));
+ }
+ }
+
+ s = format(s, "\n Interfaces:");
+ hash_foreach(sw_if_index, mfi, mfib_entry->mfe_itfs,
+ ({
+ s = format(s, "\n %U", format_mfib_itf, mfi);
+ }));
+ s = format(s, "\n RPF-ID:%d", mfib_entry->mfe_rpf_id);
+ s = format(s, "\n %U-chain\n %U",
+ format_fib_forw_chain_type,
+ mfib_entry_get_default_chain_type(mfib_entry),
+ format_dpo_id,
+ &mfib_entry->mfe_rep,
+ 2);
+ s = format(s, "\n");
+
+ if (level >= MFIB_ENTRY_FORMAT_DETAIL2)
+ {
+ s = format(s, "\nchildren:");
+ s = fib_node_children_format(mfib_entry->mfe_node.fn_children, s);
+ }
+
+ return (s);
+}
+
+static mfib_entry_t*
+mfib_entry_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_MFIB_ENTRY == node->fn_type);
+#endif
+ return ((mfib_entry_t*)node);
+}
+
+static int
+mfib_entry_src_cmp_for_sort (void * v1,
+ void * v2)
+{
+ mfib_entry_src_t *esrc1 = v1, *esrc2 = v2;
+
+ return (esrc1->mfes_src - esrc2->mfes_src);
+}
+
+static void
+mfib_entry_src_init (mfib_entry_t *mfib_entry,
+ mfib_source_t source)
+
+{
+ mfib_entry_src_t esrc = {
+ .mfes_pl = FIB_NODE_INDEX_INVALID,
+ .mfes_flags = MFIB_ENTRY_FLAG_NONE,
+ .mfes_src = source,
+ };
+
+ vec_add1(mfib_entry->mfe_srcs, esrc);
+ vec_sort_with_function(mfib_entry->mfe_srcs,
+ mfib_entry_src_cmp_for_sort);
+}
+
+static mfib_entry_src_t *
+mfib_entry_src_find (const mfib_entry_t *mfib_entry,
+ mfib_source_t source,
+ u32 *index)
+
+{
+ mfib_entry_src_t *esrc;
+ int ii;
+
+ ii = 0;
+ vec_foreach(esrc, mfib_entry->mfe_srcs)
+ {
+ if (esrc->mfes_src == source)
+ {
+ if (NULL != index)
+ {
+ *index = ii;
+ }
+ return (esrc);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+static mfib_entry_src_t *
+mfib_entry_src_find_or_create (mfib_entry_t *mfib_entry,
+ mfib_source_t source)
+{
+ mfib_entry_src_t *esrc;
+
+ esrc = mfib_entry_src_find(mfib_entry, source, NULL);
+
+ if (NULL == esrc)
+ {
+ mfib_entry_src_init(mfib_entry, source);
+ }
+
+ return (mfib_entry_src_find(mfib_entry, source, NULL));
+}
+
+static mfib_entry_src_t*
+mfib_entry_get_best_src (const mfib_entry_t *mfib_entry)
+{
+ mfib_entry_src_t *bsrc;
+
+ /*
+ * the enum of sources is deliberately arranged in priority order
+ */
+ if (0 == vec_len(mfib_entry->mfe_srcs))
+ {
+ bsrc = NULL;
+ }
+ else
+ {
+ bsrc = vec_elt_at_index(mfib_entry->mfe_srcs, 0);
+ }
+
+ return (bsrc);
+}
+
+int
+mfib_entry_is_sourced (fib_node_index_t mfib_entry_index,
+ mfib_source_t source)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+
+ return (NULL != mfib_entry_src_find(mfib_entry, source, NULL));
+}
+
+static void
+mfib_entry_src_flush (mfib_entry_src_t *msrc)
+{
+ u32 sw_if_index;
+ index_t mfii;
+
+ hash_foreach(sw_if_index, mfii, msrc->mfes_itfs,
+ ({
+ mfib_itf_delete(mfib_itf_get(mfii));
+ }));
+ hash_free(msrc->mfes_itfs);
+ msrc->mfes_itfs = NULL;
+ fib_path_list_unlock(msrc->mfes_pl);
+}
+
+static void
+mfib_entry_src_remove (mfib_entry_t *mfib_entry,
+ mfib_source_t source)
+
+{
+ mfib_entry_src_t *msrc;
+ u32 index = ~0;
+
+ msrc = mfib_entry_src_find(mfib_entry, source, &index);
+
+ if (NULL != msrc)
+ {
+ mfib_entry_src_flush(msrc);
+ vec_del1(mfib_entry->mfe_srcs, index);
+ }
+}
+
+u32
+mfib_entry_child_add (fib_node_index_t mfib_entry_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ return (fib_node_child_add(FIB_NODE_TYPE_MFIB_ENTRY,
+ mfib_entry_index,
+ child_type,
+ child_index));
+};
+
+void
+mfib_entry_child_remove (fib_node_index_t mfib_entry_index,
+ u32 sibling_index)
+{
+ fib_node_child_remove(FIB_NODE_TYPE_MFIB_ENTRY,
+ mfib_entry_index,
+ sibling_index);
+}
+
+static mfib_entry_t *
+mfib_entry_alloc (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ fib_node_index_t *mfib_entry_index)
+{
+ mfib_entry_t *mfib_entry;
+
+ pool_get_aligned(mfib_entry_pool, mfib_entry, CLIB_CACHE_LINE_BYTES);
+
+ fib_node_init(&mfib_entry->mfe_node,
+ FIB_NODE_TYPE_MFIB_ENTRY);
+
+ /*
+ * Some of the members require non-default initialisation
+ * so we also init those that don't and thus save on the call to memset.
+ */
+ mfib_entry->mfe_flags = 0;
+ mfib_entry->mfe_fib_index = fib_index;
+ mfib_entry->mfe_prefix = *prefix;
+ mfib_entry->mfe_srcs = NULL;
+ mfib_entry->mfe_itfs = NULL;
+ mfib_entry->mfe_rpf_id = MFIB_RPF_ID_NONE;
+ mfib_entry->mfe_pl = FIB_NODE_INDEX_INVALID;
+
+ dpo_reset(&mfib_entry->mfe_rep);
+
+ *mfib_entry_index = mfib_entry_get_index(mfib_entry);
+
+ MFIB_ENTRY_DBG(mfib_entry, "alloc");
+
+ return (mfib_entry);
+}
+
+static inline mfib_path_ext_t *
+mfib_entry_path_ext_find (mfib_path_ext_t *exts,
+ fib_node_index_t path_index)
+{
+ uword *p;
+
+ p = hash_get(exts, path_index);
+
+ if (NULL != p)
+ {
+ return (mfib_entry_path_ext_get(p[0]));
+ }
+
+ return (NULL);
+}
+
+static mfib_path_ext_t*
+mfib_path_ext_add (mfib_entry_src_t *msrc,
+ fib_node_index_t path_index,
+ mfib_itf_flags_t mfi_flags)
+{
+ mfib_path_ext_t *path_ext;
+
+ pool_get(mfib_path_ext_pool, path_ext);
+
+ path_ext->mfpe_flags = mfi_flags;
+ path_ext->mfpe_path = path_index;
+
+ hash_set(msrc->mfes_exts, path_index,
+ path_ext - mfib_path_ext_pool);
+
+ return (path_ext);
+}
+
+static void
+mfib_path_ext_remove (mfib_entry_src_t *msrc,
+ fib_node_index_t path_index)
+{
+ mfib_path_ext_t *path_ext;
+
+ path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index);
+
+ hash_unset(msrc->mfes_exts, path_index);
+ pool_put(mfib_path_ext_pool, path_ext);
+}
+
+typedef struct mfib_entry_collect_forwarding_ctx_t_
+{
+ load_balance_path_t * next_hops;
+ fib_forward_chain_type_t fct;
+ mfib_entry_src_t *msrc;
+} mfib_entry_collect_forwarding_ctx_t;
+
+static fib_path_list_walk_rc_t
+mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ mfib_entry_collect_forwarding_ctx_t *ctx;
+ load_balance_path_t *nh;
+
+ ctx = arg;
+
+ /*
+ * if the path is not resolved, don't include it.
+ */
+ if (!fib_path_is_resolved(path_index))
+ {
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ }
+
+ /*
+ * If the path is not forwarding to use it
+ */
+ mfib_path_ext_t *path_ext;
+
+ path_ext = mfib_entry_path_ext_find(ctx->msrc->mfes_exts,
+ path_index);
+
+ if (NULL != path_ext &&
+ !(path_ext->mfpe_flags & MFIB_ITF_FLAG_FORWARD))
+ {
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ }
+
+ switch (ctx->fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+ /*
+ * EOS traffic with no label to stack, we need the IP Adj
+ */
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+ break;
+
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_ETHERNET:
+ case FIB_FORW_CHAIN_TYPE_NSH:
+ ASSERT(0);
+ break;
+ }
+
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+static void
+mfib_entry_stack (mfib_entry_t *mfib_entry,
+ mfib_entry_src_t *msrc)
+{
+ dpo_proto_t dp;
+
+ dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
+
+ /*
+ * unlink the enty from the previous path list.
+ */
+ if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_pl)
+ {
+ fib_path_list_child_remove(mfib_entry->mfe_pl,
+ mfib_entry->mfe_sibling);
+ }
+
+ if (NULL != msrc &&
+ FIB_NODE_INDEX_INVALID != msrc->mfes_pl)
+ {
+ mfib_entry_collect_forwarding_ctx_t ctx = {
+ .next_hops = NULL,
+ .fct = mfib_entry_get_default_chain_type(mfib_entry),
+ .msrc = msrc,
+ };
+
+ fib_path_list_walk(msrc->mfes_pl,
+ mfib_entry_src_collect_forwarding,
+ &ctx);
+
+ if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags))
+ {
+ if (NULL == ctx.next_hops)
+ {
+ /*
+ * no next-hops, stack directly on the drop
+ */
+ dpo_stack(DPO_MFIB_ENTRY, dp,
+ &mfib_entry->mfe_rep,
+ drop_dpo_get(dp));
+ }
+ else
+ {
+ /*
+ * each path contirbutes a next-hop. form a replicate
+ * from those choices.
+ */
+ if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
+ dpo_is_drop(&mfib_entry->mfe_rep))
+ {
+ dpo_id_t tmp_dpo = DPO_INVALID;
+
+ dpo_set(&tmp_dpo,
+ DPO_REPLICATE, dp,
+ replicate_create(0, dp));
+
+ dpo_stack(DPO_MFIB_ENTRY, dp,
+ &mfib_entry->mfe_rep,
+ &tmp_dpo);
+
+ dpo_reset(&tmp_dpo);
+ }
+ replicate_multipath_update(&mfib_entry->mfe_rep,
+ ctx.next_hops);
+ }
+ }
+ else
+ {
+ /*
+ * for exclusive routes the source provided a replicate DPO
+ * we we stashed inthe special path list with one path
+ * so we can stack directly on that.
+ */
+ ASSERT(1 == vec_len(ctx.next_hops));
+
+ dpo_stack(DPO_MFIB_ENTRY, dp,
+ &mfib_entry->mfe_rep,
+ &ctx.next_hops[0].path_dpo);
+ dpo_reset(&ctx.next_hops[0].path_dpo);
+ vec_free(ctx.next_hops);
+ }
+
+ /*
+ * link the entry to the path-list.
+ * The entry needs to be a child so that we receive the back-walk
+ * updates to recalculate forwarding.
+ */
+ mfib_entry->mfe_pl = msrc->mfes_pl;
+ mfib_entry->mfe_sibling =
+ fib_path_list_child_add(mfib_entry->mfe_pl,
+ FIB_NODE_TYPE_MFIB_ENTRY,
+ mfib_entry_get_index(mfib_entry));
+ }
+ else
+ {
+ dpo_stack(DPO_MFIB_ENTRY, dp,
+ &mfib_entry->mfe_rep,
+ drop_dpo_get(dp));
+ }
+}
+
+static fib_node_index_t
+mfib_entry_src_path_add (mfib_entry_src_t *msrc,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t path_index;
+ fib_route_path_t *rpaths;
+
+ ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
+
+ /*
+ * path-lists require a vector of paths
+ */
+ rpaths = NULL;
+ vec_add1(rpaths, rpath[0]);
+
+ if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl)
+ {
+ /* A non-shared path-list */
+ msrc->mfes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF,
+ NULL);
+ fib_path_list_lock(msrc->mfes_pl);
+ }
+
+ path_index = fib_path_list_path_add(msrc->mfes_pl, rpaths);
+
+ vec_free(rpaths);
+
+ return (path_index);
+}
+
+static fib_node_index_t
+mfib_entry_src_path_remove (mfib_entry_src_t *msrc,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t path_index;
+ fib_route_path_t *rpaths;
+
+ ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
+
+ /*
+ * path-lists require a vector of paths
+ */
+ rpaths = NULL;
+ vec_add1(rpaths, rpath[0]);
+
+ path_index = fib_path_list_path_remove(msrc->mfes_pl, rpaths);
+
+ vec_free(rpaths);
+
+ return (path_index);
+}
+
+static void
+mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry)
+{
+ mfib_entry_src_t *bsrc;
+
+ /*
+ * copy the forwarding data from the bast source
+ */
+ bsrc = mfib_entry_get_best_src(mfib_entry);
+
+ if (NULL != bsrc)
+ {
+ mfib_entry->mfe_flags = bsrc->mfes_flags;
+ mfib_entry->mfe_itfs = bsrc->mfes_itfs;
+ mfib_entry->mfe_rpf_id = bsrc->mfes_rpf_id;
+ }
+
+ mfib_entry_stack(mfib_entry, bsrc);
+}
+
+
+fib_node_index_t
+mfib_entry_create (u32 fib_index,
+ mfib_source_t source,
+ const mfib_prefix_t *prefix,
+ fib_rpf_id_t rpf_id,
+ mfib_entry_flags_t entry_flags)
+{
+ fib_node_index_t mfib_entry_index;
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+
+ mfib_entry = mfib_entry_alloc(fib_index, prefix,
+ &mfib_entry_index);
+ msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+ msrc->mfes_flags = entry_flags;
+ msrc->mfes_rpf_id = rpf_id;
+
+ mfib_entry_recalculate_forwarding(mfib_entry);
+
+ return (mfib_entry_index);
+}
+
+static int
+mfib_entry_ok_for_delete (mfib_entry_t *mfib_entry)
+{
+ return (0 == vec_len(mfib_entry->mfe_srcs));
+}
+
+static int
+mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc)
+{
+ return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags &&
+ 0 == fib_path_list_get_n_paths(msrc->mfes_pl)));
+}
+
+int
+mfib_entry_update (fib_node_index_t mfib_entry_index,
+ mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
+ fib_rpf_id_t rpf_id,
+ index_t repi)
+{
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+ msrc->mfes_flags = entry_flags;
+ msrc->mfes_rpf_id = rpf_id;
+
+ if (INDEX_INVALID != repi)
+ {
+ /*
+ * The source is providing its own replicate DPO.
+ * Create a sepcial path-list to manage it, that way
+ * this entry and the source are equivalent to a normal
+ * entry
+ */
+ fib_node_index_t old_pl_index;
+ dpo_proto_t dp;
+ dpo_id_t dpo = DPO_INVALID;
+
+ dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
+ old_pl_index = msrc->mfes_pl;
+
+ dpo_set(&dpo, DPO_REPLICATE, dp, repi);
+
+ msrc->mfes_pl =
+ fib_path_list_create_special(dp,
+ FIB_PATH_LIST_FLAG_EXCLUSIVE,
+ &dpo);
+
+ dpo_reset(&dpo);
+ fib_path_list_lock(msrc->mfes_pl);
+ fib_path_list_unlock(old_pl_index);
+ }
+
+ if (mfib_entry_src_ok_for_delete(msrc))
+ {
+ /*
+ * this source has no interfaces and no flags.
+ * it has nothing left to give - remove it
+ */
+ mfib_entry_src_remove(mfib_entry, source);
+ }
+
+ mfib_entry_recalculate_forwarding(mfib_entry);
+
+ return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+static void
+mfib_entry_itf_add (mfib_entry_src_t *msrc,
+ u32 sw_if_index,
+ index_t mi)
+{
+ hash_set(msrc->mfes_itfs, sw_if_index, mi);
+}
+
+static void
+mfib_entry_itf_remove (mfib_entry_src_t *msrc,
+ u32 sw_if_index)
+{
+ mfib_itf_t *mfi;
+
+ mfi = mfib_entry_itf_find(msrc->mfes_itfs, sw_if_index);
+
+ mfib_itf_delete(mfi);
+
+ hash_unset(msrc->mfes_itfs, sw_if_index);
+}
+
+void
+mfib_entry_path_update (fib_node_index_t mfib_entry_index,
+ mfib_source_t source,
+ const fib_route_path_t *rpath,
+ mfib_itf_flags_t itf_flags)
+{
+ fib_node_index_t path_index;
+ mfib_path_ext_t *path_ext;
+ mfib_itf_flags_t old, new;
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ ASSERT(NULL != mfib_entry);
+ msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+
+ /*
+ * add the path to the path-list. If it's a duplicate we'll get
+ * back the original path.
+ */
+ path_index = mfib_entry_src_path_add(msrc, rpath);
+
+ /*
+ * find the path extension for that path
+ */
+ path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index);
+
+ if (NULL == path_ext)
+ {
+ old = MFIB_ITF_FLAG_NONE;
+ path_ext = mfib_path_ext_add(msrc, path_index, itf_flags);
+ }
+ else
+ {
+ old = path_ext->mfpe_flags;
+ path_ext->mfpe_flags = itf_flags;
+ }
+
+ /*
+ * Has the path changed its contribution to the input interface set.
+ * Which only paths with interfaces can do...
+ */
+ if (~0 != rpath[0].frp_sw_if_index)
+ {
+ mfib_itf_t *mfib_itf;
+
+ new = itf_flags;
+
+ if (old != new)
+ {
+ if (MFIB_ITF_FLAG_NONE == new)
+ {
+ /*
+ * no more interface flags on this path, remove
+ * from the data-plane set
+ */
+ mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
+ }
+ else if (MFIB_ITF_FLAG_NONE == old)
+ {
+ /*
+ * This interface is now contributing
+ */
+ mfib_entry_itf_add(msrc,
+ rpath[0].frp_sw_if_index,
+ mfib_itf_create(rpath[0].frp_sw_if_index,
+ itf_flags));
+ }
+ else
+ {
+ /*
+ * change of flag contributions
+ */
+ mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
+ rpath[0].frp_sw_if_index);
+ /* Seen by packets inflight */
+ mfib_itf->mfi_flags = new;
+ }
+ }
+ }
+
+ mfib_entry_recalculate_forwarding(mfib_entry);
+}
+
+/*
+ * mfib_entry_path_remove
+ *
+ * remove a path from the entry.
+ * return the mfib_entry's index if it is still present, INVALID otherwise.
+ */
+int
+mfib_entry_path_remove (fib_node_index_t mfib_entry_index,
+ mfib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t path_index;
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ ASSERT(NULL != mfib_entry);
+ msrc = mfib_entry_src_find(mfib_entry, source, NULL);
+
+ if (NULL == msrc)
+ {
+ /*
+ * there are no paths left for this source
+ */
+ return (mfib_entry_ok_for_delete(mfib_entry));
+ }
+
+ /*
+ * remove the path from the path-list. If it's not there we'll get
+ * back invalid
+ */
+ path_index = mfib_entry_src_path_remove(msrc, rpath);
+
+ if (FIB_NODE_INDEX_INVALID != path_index)
+ {
+ /*
+ * don't need the extension, nor the interface anymore
+ */
+ mfib_path_ext_remove(msrc, path_index);
+ if (~0 != rpath[0].frp_sw_if_index)
+ {
+ mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
+ }
+ }
+
+ if (mfib_entry_src_ok_for_delete(msrc))
+ {
+ /*
+ * this source has no interfaces and no flags.
+ * it has nothing left to give - remove it
+ */
+ mfib_entry_src_remove(mfib_entry, source);
+ }
+
+ mfib_entry_recalculate_forwarding(mfib_entry);
+
+ return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+/**
+ * mfib_entry_delete
+ *
+ * The source is withdrawing all the paths it provided
+ */
+int
+mfib_entry_delete (fib_node_index_t mfib_entry_index,
+ mfib_source_t source)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ mfib_entry_src_remove(mfib_entry, source);
+
+ mfib_entry_recalculate_forwarding(mfib_entry);
+
+ return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+static int
+fib_ip4_address_compare (ip4_address_t * a1,
+ ip4_address_t * a2)
+{
+ /*
+ * IP addresses are unsiged ints. the return value here needs to be signed
+ * a simple subtraction won't cut it.
+ * If the addresses are the same, the sort order is undefiend, so phoey.
+ */
+ return ((clib_net_to_host_u32(a1->data_u32) >
+ clib_net_to_host_u32(a2->data_u32) ) ?
+ 1 : -1);
+}
+
+static int
+fib_ip6_address_compare (ip6_address_t * a1,
+ ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]));
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+static int
+mfib_entry_cmp (fib_node_index_t mfib_entry_index1,
+ fib_node_index_t mfib_entry_index2)
+{
+ mfib_entry_t *mfib_entry1, *mfib_entry2;
+ int cmp = 0;
+
+ mfib_entry1 = mfib_entry_get(mfib_entry_index1);
+ mfib_entry2 = mfib_entry_get(mfib_entry_index2);
+
+ switch (mfib_entry1->mfe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip4,
+ &mfib_entry2->mfe_prefix.fp_grp_addr.ip4);
+
+ if (0 == cmp)
+ {
+ cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip4,
+ &mfib_entry2->mfe_prefix.fp_src_addr.ip4);
+ }
+ break;
+ case FIB_PROTOCOL_IP6:
+ cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip6,
+ &mfib_entry2->mfe_prefix.fp_grp_addr.ip6);
+
+ if (0 == cmp)
+ {
+ cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip6,
+ &mfib_entry2->mfe_prefix.fp_src_addr.ip6);
+ }
+ break;
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ cmp = 0;
+ break;
+ }
+
+ if (0 == cmp) {
+ cmp = (mfib_entry1->mfe_prefix.fp_len - mfib_entry2->mfe_prefix.fp_len);
+ }
+ return (cmp);
+}
+
+int
+mfib_entry_cmp_for_sort (void *i1, void *i2)
+{
+ fib_node_index_t *mfib_entry_index1 = i1, *mfib_entry_index2 = i2;
+
+ return (mfib_entry_cmp(*mfib_entry_index1,
+ *mfib_entry_index2));
+}
+
+static void
+mfib_entry_last_lock_gone (fib_node_t *node)
+{
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *msrc;
+
+ mfib_entry = mfib_entry_from_fib_node(node);
+
+ dpo_reset(&mfib_entry->mfe_rep);
+
+ MFIB_ENTRY_DBG(mfib_entry, "last-lock");
+
+ vec_foreach(msrc, mfib_entry->mfe_srcs)
+ {
+ mfib_entry_src_flush(msrc);
+ }
+
+ vec_free(mfib_entry->mfe_srcs);
+
+ fib_node_deinit(&mfib_entry->mfe_node);
+ pool_put(mfib_entry_pool, mfib_entry);
+}
+
+/*
+ * mfib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+mfib_entry_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ mfib_entry_recalculate_forwarding(mfib_entry_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+mfib_entry_show_memory (void)
+{
+ fib_show_memory_usage("multicast-Entry",
+ pool_elts(mfib_entry_pool),
+ pool_len(mfib_entry_pool),
+ sizeof(mfib_entry_t));
+}
+
+/*
+ * The MFIB entry's graph node virtual function table
+ */
+static const fib_node_vft_t mfib_entry_vft = {
+ .fnv_get = mfib_entry_get_node,
+ .fnv_last_lock = mfib_entry_last_lock_gone,
+ .fnv_back_walk = mfib_entry_back_walk_notify,
+ .fnv_mem_show = mfib_entry_show_memory,
+};
+
+void
+mfib_entry_lock (fib_node_index_t mfib_entry_index)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+
+ fib_node_lock(&mfib_entry->mfe_node);
+}
+
+void
+mfib_entry_unlock (fib_node_index_t mfib_entry_index)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+
+ fib_node_unlock(&mfib_entry->mfe_node);
+}
+
+static void
+mfib_entry_dpo_lock (dpo_id_t *dpo)
+{
+}
+static void
+mfib_entry_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+const static dpo_vft_t mfib_entry_dpo_vft = {
+ .dv_lock = mfib_entry_dpo_lock,
+ .dv_unlock = mfib_entry_dpo_unlock,
+ .dv_format = format_mfib_entry_dpo,
+ .dv_mem_show = mfib_entry_show_memory,
+};
+
+const static char* const mfib_entry_ip4_nodes[] =
+{
+ "ip4-mfib-forward-rpf",
+ NULL,
+};
+const static char* const mfib_entry_ip6_nodes[] =
+{
+ "ip6-mfib-forward-rpf",
+ NULL,
+};
+
+const static char* const * const mfib_entry_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = mfib_entry_ip4_nodes,
+ [DPO_PROTO_IP6] = mfib_entry_ip6_nodes,
+};
+
+void
+mfib_entry_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_MFIB_ENTRY, &mfib_entry_vft);
+ dpo_register(DPO_MFIB_ENTRY, &mfib_entry_dpo_vft, mfib_entry_nodes);
+}
+
+void
+mfib_entry_encode (fib_node_index_t mfib_entry_index,
+ fib_route_path_encode_t **api_rpaths)
+{
+ mfib_entry_t *mfib_entry;
+ mfib_entry_src_t *bsrc;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ bsrc = mfib_entry_get_best_src(mfib_entry);
+
+ if (FIB_NODE_INDEX_INVALID != bsrc->mfes_pl)
+ {
+ fib_path_list_walk(bsrc->mfes_pl,
+ fib_path_encode,
+ api_rpaths);
+ }
+}
+
+
+void
+mfib_entry_get_prefix (fib_node_index_t mfib_entry_index,
+ mfib_prefix_t *pfx)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+ *pfx = mfib_entry->mfe_prefix;
+}
+
+u32
+mfib_entry_get_fib_index (fib_node_index_t mfib_entry_index)
+{
+ mfib_entry_t *mfib_entry;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+
+ return (mfib_entry->mfe_fib_index);
+}
+
+void
+mfib_entry_contribute_forwarding (fib_node_index_t mfib_entry_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo)
+{
+ /*
+ * An IP mFIB entry can only provide a forwarding chain that
+ * is the same IP proto as the prefix.
+ * No use-cases (i know of) for other combinations.
+ */
+ mfib_entry_t *mfib_entry;
+ dpo_proto_t dp;
+
+ mfib_entry = mfib_entry_get(mfib_entry_index);
+
+ dp = fib_proto_to_dpo(mfib_entry->mfe_prefix.fp_proto);
+
+ if (type == fib_forw_chain_type_from_dpo_proto(dp))
+ {
+ dpo_copy(dpo, &mfib_entry->mfe_rep);
+ }
+ else
+ {
+ dpo_copy(dpo, drop_dpo_get(dp));
+ }
+}
+
+u32
+mfib_entry_pool_size (void)
+{
+ return (pool_elts(mfib_entry_pool));
+}
+
+static clib_error_t *
+show_mfib_entry_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_node_index_t fei;
+
+ if (unformat (input, "%d", &fei))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(mfib_entry_pool, fei))
+ {
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_mfib_entry, fei,
+ MFIB_ENTRY_FORMAT_DETAIL2);
+ }
+ else
+ {
+ vlib_cli_output (vm, "entry %d invalid", fei);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB Entries:");
+ pool_foreach_index(fei, mfib_entry_pool,
+ ({
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_mfib_entry, fei,
+ MFIB_ENTRY_FORMAT_BRIEF);
+ }));
+ }
+
+ return (NULL);
+}
+
+/*?
+ * This commnad displays an entry, or all entries, in the mfib tables indexed by their unique
+ * numerical indentifier.
+ ?*/
+VLIB_CLI_COMMAND (show_mfib_entry, static) = {
+ .path = "show mfib entry",
+ .function = show_mfib_entry_command,
+ .short_help = "show mfib entry",
+};
diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h
new file mode 100644
index 00000000..96ee49f7
--- /dev/null
+++ b/src/vnet/mfib/mfib_entry.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_ENTRY_H__
+#define __MFIB_ENTRY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/mfib/mfib_types.h>
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * An entry in a FIB table.
+ *
+ * This entry represents a route added to the FIB that is stored
+ * in one of the FIB tables.
+ */
+typedef struct mfib_entry_t_ {
+ CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
+ /**
+ * Base class. The entry's node representation in the graph.
+ */
+ fib_node_t mfe_node;
+ /**
+ * The prefix of the route
+ */
+ mfib_prefix_t mfe_prefix;
+ /**
+ * The index of the FIB table this entry is in
+ */
+ u32 mfe_fib_index;
+
+ /**
+ * A vector of sources contributing forwarding
+ */
+ struct mfib_entry_src_t_ *mfe_srcs;
+
+ /**
+ * The path-list of which this entry is a child
+ */
+ fib_node_index_t mfe_pl;
+
+ /**
+ * The sibling index on the path-list
+ */
+ u32 mfe_sibling;
+
+ /**
+ * 2nd cache line has the members used in the data plane
+ */
+ CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
+
+ /**
+ * The DPO used for forwarding; replicate, drop, etc..
+ */
+ dpo_id_t mfe_rep;
+
+ /**
+ * Route flags
+ */
+ mfib_entry_flags_t mfe_flags;
+
+ /**
+ * RPF-ID used when the packets ingress not from an interface
+ */
+ fib_rpf_id_t mfe_rpf_id;
+
+ /**
+ * A hash table of interfaces
+ */
+ mfib_itf_t *mfe_itfs;
+} mfib_entry_t;
+
+#define MFIB_ENTRY_FORMAT_BRIEF (0x0)
+#define MFIB_ENTRY_FORMAT_DETAIL (0x1)
+#define MFIB_ENTRY_FORMAT_DETAIL2 (0x2)
+
+extern u8 *format_mfib_entry(u8 * s, va_list * args);
+
+
+extern fib_node_index_t mfib_entry_create(u32 fib_index,
+ mfib_source_t source,
+ const mfib_prefix_t *prefix,
+ fib_rpf_id_t rpf_id,
+ mfib_entry_flags_t entry_flags);
+
+extern int mfib_entry_update(fib_node_index_t fib_entry_index,
+ mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
+ fib_rpf_id_t rpf_id,
+ index_t rep_dpo);
+
+extern void mfib_entry_path_update(fib_node_index_t fib_entry_index,
+ mfib_source_t source,
+ const fib_route_path_t *rpath,
+ mfib_itf_flags_t itf_flags);
+
+
+extern int mfib_entry_path_remove(fib_node_index_t fib_entry_index,
+ mfib_source_t source,
+ const fib_route_path_t *rpath);
+
+extern int mfib_entry_delete(fib_node_index_t mfib_entry_index,
+ mfib_source_t source);
+
+extern int mfib_entry_cmp_for_sort(void *i1, void *i2);
+
+extern u32 mfib_entry_child_add(fib_node_index_t mfib_entry_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+extern void mfib_entry_child_remove(fib_node_index_t mfib_entry_index,
+ u32 sibling_index);
+
+extern void mfib_entry_lock(fib_node_index_t fib_entry_index);
+extern void mfib_entry_unlock(fib_node_index_t fib_entry_index);
+
+extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index,
+ mfib_prefix_t *pfx);
+extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index);
+extern int mfib_entry_is_sourced(fib_node_index_t fib_entry_index,
+ mfib_source_t source);
+
+extern void mfib_entry_contribute_forwarding(
+ fib_node_index_t mfib_entry_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+
+extern void mfib_entry_encode(fib_node_index_t fib_entry_index,
+ fib_route_path_encode_t **api_rpaths);
+
+extern void mfib_entry_module_init(void);
+
+
+extern mfib_entry_t *mfib_entry_pool;
+
+static inline mfib_entry_t *
+mfib_entry_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(mfib_entry_pool, index));
+}
+static inline fib_node_index_t
+mfib_entry_get_index (const mfib_entry_t *mfe)
+{
+ return (mfe - mfib_entry_pool);
+}
+
+
+static inline mfib_itf_t *
+mfib_entry_itf_find (mfib_itf_t *itfs,
+ u32 sw_if_index)
+{
+ uword *p;
+
+ p = hash_get(itfs, sw_if_index);
+
+ if (NULL != p)
+ {
+ return (mfib_itf_get(p[0]));
+ }
+
+ return (NULL);
+}
+
+static inline mfib_itf_t *
+mfib_entry_get_itf (const mfib_entry_t *mfe,
+ u32 sw_if_index)
+{
+ return (mfib_entry_itf_find(mfe->mfe_itfs, sw_if_index));
+}
+
+#endif
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
new file mode 100644
index 00000000..3d8f4f98
--- /dev/null
+++ b/src/vnet/mfib/mfib_forward.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/vnet.h>
+
+typedef struct mfib_forward_lookup_trace_t_ {
+ u32 entry_index;
+ u32 fib_index;
+} mfib_forward_lookup_trace_t;
+
+static u8 *
+format_mfib_forward_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mfib_forward_lookup_trace_t * t = va_arg (*args, mfib_forward_lookup_trace_t *);
+
+ s = format (s, "fib %d entry %d", t->fib_index, t->entry_index);
+ return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+mfib_forward_lookup_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * from, n_left;
+ ip4_main_t * im = &ip4_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ mfib_forward_lookup_trace_t * t0, * t1;
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->entry_index = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ t1->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ mfib_forward_lookup_trace_t * t0;
+ vlib_buffer_t * b0;
+ u32 bi0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+typedef enum mfib_forward_lookup_next_t_ {
+ MFIB_FORWARD_LOOKUP_NEXT_RPF,
+ MFIB_FORWARD_LOOKUP_N_NEXT,
+} mfib_forward_lookup_next_t;
+
+static uword
+mfib_forward_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_v4)
+{
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, MFIB_FORWARD_LOOKUP_NEXT_RPF,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ fib_node_index_t mfei0;
+ vlib_buffer_t * p0;
+ u32 fib_index0;
+ u32 pi0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ if (is_v4)
+ {
+ ip4_header_t * ip0;
+
+ fib_index0 = vec_elt (ip4_main.mfib_index_by_sw_if_index,
+ vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+ ip0 = vlib_buffer_get_current (p0);
+ mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0),
+ &ip0->src_address,
+ &ip0->dst_address,
+ 64);
+ }
+ else
+ {
+ ip6_header_t * ip0;
+
+ fib_index0 = vec_elt (ip6_main.mfib_index_by_sw_if_index,
+ vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+ ip0 = vlib_buffer_get_current (p0);
+ mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0),
+ &ip0->src_address,
+ &ip0->dst_address);
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = mfei0;
+ }
+
+ vlib_put_next_frame(vm, node,
+ MFIB_FORWARD_LOOKUP_NEXT_RPF,
+ n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ mfib_forward_lookup_trace(vm, node, frame);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_mfib_forward_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mfib_forward_lookup (vm, node, frame, 1));
+}
+
+VLIB_REGISTER_NODE (ip4_mfib_forward_lookup_node, static) = {
+ .function = ip4_mfib_forward_lookup,
+ .name = "ip4-mfib-forward-lookup",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mfib_forward_lookup_trace,
+
+ .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip4-mfib-forward-rpf",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_lookup_node,
+ ip4_mfib_forward_lookup)
+
+static uword
+ip6_mfib_forward_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mfib_forward_lookup (vm, node, frame, 0));
+}
+
+VLIB_REGISTER_NODE (ip6_mfib_forward_lookup_node, static) = {
+ .function = ip6_mfib_forward_lookup,
+ .name = "ip6-mfib-forward-lookup",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mfib_forward_lookup_trace,
+
+ .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip6-mfib-forward-rpf",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_lookup_node,
+ ip6_mfib_forward_lookup)
+
+
+typedef struct mfib_forward_rpf_trace_t_ {
+ u32 entry_index;
+ u32 sw_if_index;
+ mfib_itf_flags_t itf_flags;
+} mfib_forward_rpf_trace_t;
+
+typedef enum mfib_forward_rpf_next_t_ {
+ MFIB_FORWARD_RPF_NEXT_DROP,
+ MFIB_FORWARD_RPF_N_NEXT,
+} mfib_forward_rpf_next_t;
+
+static u8 *
+format_mfib_forward_rpf_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mfib_forward_rpf_trace_t * t = va_arg (*args, mfib_forward_rpf_trace_t *);
+
+ s = format (s, "entry %d", t->entry_index);
+ s = format (s, " %d", t->sw_if_index);
+ s = format (s, " %U", format_mfib_itf_flags, t->itf_flags);
+
+ return s;
+}
+
+static int
+mfib_forward_connected_check (vlib_buffer_t * b0,
+ u32 sw_if_index,
+ int is_v4)
+{
+ /*
+ * Lookup the source of the IP packet in the
+ * FIB. return true if the entry is attached.
+ */
+ index_t lbi0;
+
+ if (is_v4)
+ {
+ load_balance_t *lb0;
+ ip4_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current(b0);
+
+ lbi0 = ip4_fib_forwarding_lookup(
+ ip4_fib_table_get_index_for_sw_if_index(
+ sw_if_index),
+ &ip0->src_address);
+ lb0 = load_balance_get(lbi0);
+
+ return (FIB_ENTRY_FLAG_ATTACHED &
+ lb0->lb_fib_entry_flags);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ return (0);
+}
+
+static void
+mfib_forward_itf_signal (vlib_main_t *vm,
+ const mfib_entry_t *mfe,
+ mfib_itf_t *mfi,
+ vlib_buffer_t *b0)
+{
+ mfib_itf_flags_t old_flags;
+
+ old_flags = __sync_fetch_and_or(&mfi->mfi_flags,
+ MFIB_ITF_FLAG_SIGNAL_PRESENT);
+
+ if (!(old_flags & MFIB_ITF_FLAG_SIGNAL_PRESENT))
+ {
+ /*
+ * we were the lucky ones to set the signal present flag
+ */
+ if (!(old_flags & MFIB_ITF_FLAG_DONT_PRESERVE))
+ {
+ /*
+ * preserve a copy of the packet for the control
+ * plane to examine.
+ * Only allow one preserved packet at at time, since
+ * when the signal present flag is cleared so is the
+ * preserved packet.
+ */
+ mfib_signal_push(mfe, mfi, b0);
+ }
+ else
+ {
+ /*
+ * The control plane just wants the signal, not the packet as well
+ */
+ mfib_signal_push(mfe, mfi, NULL);
+ }
+ }
+ /*
+ * else
+ * there is already a signal present on this interface that the
+ * control plane has not yet acknowledged
+ */
+}
+
+always_inline uword
+mfib_forward_rpf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_v4)
+{
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ mfib_forward_rpf_next_t next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = MFIB_FORWARD_RPF_NEXT_DROP;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ fib_node_index_t mfei0;
+ const mfib_entry_t *mfe0;
+ mfib_itf_t *mfi0;
+ vlib_buffer_t * b0;
+ u32 pi0, next0;
+ mfib_itf_flags_t iflags0;
+ mfib_entry_flags_t eflags0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, pi0);
+ mfei0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ mfe0 = mfib_entry_get(mfei0);
+ mfi0 = mfib_entry_get_itf(mfe0,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+ /*
+ * throughout this function we are 'PREDICT' optimising
+ * for the case of throughput traffic that is not replicated
+ * to the host stack nor sets local flags
+ */
+
+ /*
+ * If the mfib entry has a configured RPF-ID check that
+ * in preference to an interface based RPF
+ */
+ if (MFIB_RPF_ID_NONE != mfe0->mfe_rpf_id)
+ {
+ iflags0 = (mfe0->mfe_rpf_id == vnet_buffer(b0)->ip.rpf_id ?
+ MFIB_ITF_FLAG_ACCEPT :
+ MFIB_ITF_FLAG_NONE);
+ }
+ else
+ {
+ if (PREDICT_TRUE(NULL != mfi0))
+ {
+ iflags0 = mfi0->mfi_flags;
+ }
+ else
+ {
+ iflags0 = MFIB_ITF_FLAG_NONE;
+ }
+ }
+ eflags0 = mfe0->mfe_flags;
+
+ if (PREDICT_FALSE(eflags0 & MFIB_ENTRY_FLAG_CONNECTED))
+ {
+ /*
+ * lookup the source in the unicast FIB - check it
+ * matches a connected.
+ */
+ if (mfib_forward_connected_check(
+ b0,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX],
+ is_v4))
+ {
+ mfib_forward_itf_signal(vm, mfe0, mfi0, b0);
+ }
+ }
+ if (PREDICT_FALSE((eflags0 & MFIB_ENTRY_FLAG_SIGNAL) ^
+ (iflags0 & MFIB_ITF_FLAG_NEGATE_SIGNAL)))
+ {
+ /*
+ * Entry signal XOR interface negate-signal
+ */
+ if (NULL != mfi0)
+ {
+ mfib_forward_itf_signal(vm, mfe0, mfi0, b0);
+ }
+ }
+
+ if (PREDICT_TRUE((iflags0 & MFIB_ITF_FLAG_ACCEPT) ||
+ (eflags0 & MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF)))
+ {
+ /*
+ * This interface is accepting packets for the matching entry
+ */
+ next0 = mfe0->mfe_rep.dpoi_next_node;
+
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] =
+ mfe0->mfe_rep.dpoi_index;
+ }
+ else
+ {
+ next0 = MFIB_FORWARD_RPF_NEXT_DROP;
+ }
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ mfib_forward_rpf_trace_t *t0;
+
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->entry_index = mfei0;
+ t0->itf_flags = iflags0;
+ if (NULL == mfi0)
+ {
+ t0->sw_if_index = ~0;
+ }
+ else
+ {
+ t0->sw_if_index = mfi0->mfi_sw_if_index;
+ }
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame(vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_mfib_forward_rpf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mfib_forward_rpf(vm, node, frame, 1));
+}
+
+
+VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = {
+ .function = ip4_mfib_forward_rpf,
+ .name = "ip4-mfib-forward-rpf",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mfib_forward_rpf_trace,
+
+ .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
+ .next_nodes = {
+ [MFIB_FORWARD_RPF_NEXT_DROP] = "ip4-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_rpf_node,
+ ip4_mfib_forward_rpf)
+
+static uword
+ip6_mfib_forward_rpf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mfib_forward_rpf(vm, node, frame, 1));
+}
+
+
+VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = {
+ .function = ip6_mfib_forward_rpf,
+ .name = "ip6-mfib-forward-rpf",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mfib_forward_rpf_trace,
+
+ .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
+ .next_nodes = {
+ [MFIB_FORWARD_RPF_NEXT_DROP] = "ip6-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_rpf_node,
+ ip6_mfib_forward_rpf)
+
diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c
new file mode 100644
index 00000000..f77b40e7
--- /dev/null
+++ b/src/vnet/mfib/mfib_itf.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_signal.h>
+
+mfib_itf_t *mfib_itf_pool;
+
+index_t
+mfib_itf_create (u32 sw_if_index,
+ mfib_itf_flags_t mfi_flags)
+{
+ mfib_itf_t *mfib_itf;
+
+ pool_get_aligned(mfib_itf_pool, mfib_itf,
+ CLIB_CACHE_LINE_BYTES);
+
+ mfib_itf->mfi_sw_if_index = sw_if_index;
+ mfib_itf->mfi_flags = mfi_flags;
+ mfib_itf->mfi_si = INDEX_INVALID;
+
+ return (mfib_itf - mfib_itf_pool);
+}
+
+void
+mfib_itf_delete (mfib_itf_t *mfi)
+{
+ mfib_signal_remove_itf(mfi);
+ pool_put(mfib_itf_pool, mfi);
+}
+
+u8 *
+format_mfib_itf (u8 * s, va_list * args)
+{
+ mfib_itf_t *mfib_itf;
+ vnet_main_t *vnm;
+ index_t mfi;
+
+ mfi = va_arg (*args, index_t);
+
+ vnm = vnet_get_main();
+ mfib_itf = mfib_itf_get(mfi);
+
+ if (~0 != mfib_itf->mfi_sw_if_index)
+ {
+ return (format(s, " %U: %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ mfib_itf->mfi_sw_if_index),
+ format_mfib_itf_flags, mfib_itf->mfi_flags));
+ }
+ else
+ {
+ return (format(s, " local: %U",
+ format_mfib_itf_flags, mfib_itf->mfi_flags));
+ }
+ return (s);
+}
+
+static clib_error_t *
+show_mfib_itf_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t mfii;
+
+ if (unformat (input, "%d", &mfii))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(mfib_itf_pool, mfii))
+ {
+ vlib_cli_output (vm, "%d@%U",
+ mfii,
+ format_mfib_itf, mfii);
+ }
+ else
+ {
+ vlib_cli_output (vm, "itf %d invalid", mfii);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "mFIB interfaces::");
+ pool_foreach_index(mfii, mfib_itf_pool,
+ ({
+ vlib_cli_output (vm, "%d@%U",
+ mfii,
+ format_mfib_itf, mfii);
+ }));
+ }
+
+ return (NULL);
+}
+
+/*?
+ * This commnad displays an MFIB interface, or all interfaces, indexed by their unique
+ * numerical indentifier.
+ ?*/
+VLIB_CLI_COMMAND (show_mfib_itf, static) = {
+ .path = "show mfib interface",
+ .function = show_mfib_itf_command,
+ .short_help = "show mfib interface",
+};
diff --git a/src/vnet/mfib/mfib_itf.h b/src/vnet/mfib/mfib_itf.h
new file mode 100644
index 00000000..5f26a476
--- /dev/null
+++ b/src/vnet/mfib/mfib_itf.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_ITF_H__
+#define __MFIB_ITF_H__
+
+#include <vlib/vlib.h>
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * @brief An interface associated with a particular MFIB entry
+ */
+typedef struct mfib_itf_t_
+{
+ /**
+ * @brief Falags on the entry
+ */
+ mfib_itf_flags_t mfi_flags;
+
+ /**
+ * The SW IF index that this MFIB interface represents
+ */
+ u32 mfi_sw_if_index;
+
+ /**
+ * The index of the signal in the pending list
+ */
+ u32 mfi_si;
+} mfib_itf_t;
+
+
+extern index_t mfib_itf_create(u32 sw_if_index,
+ mfib_itf_flags_t mfi_flags);
+extern void mfib_itf_delete(mfib_itf_t *mfi);
+
+extern u8 *format_mfib_itf(u8 * s, va_list * args);
+
+extern mfib_itf_t *mfib_itf_pool;
+
+static inline mfib_itf_t *
+mfib_itf_get (index_t mi)
+{
+ return (pool_elt_at_index(mfib_itf_pool, mi));
+}
+static inline index_t
+mfib_itf_get_index (const mfib_itf_t *mfi)
+{
+ return (mfi - mfib_itf_pool);
+}
+
+#endif
diff --git a/src/vnet/mfib/mfib_signal.c b/src/vnet/mfib/mfib_signal.c
new file mode 100644
index 00000000..cd486da3
--- /dev/null
+++ b/src/vnet/mfib/mfib_signal.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vnet/vnet.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vppinfra/dlist.h>
+
+/**
+ * @brief Pool of signals
+ */
+static mfib_signal_t *mfib_signal_pool;
+
+/**
+ * @brief pool of dlist elements
+ */
+static dlist_elt_t *mfib_signal_dlist_pool;
+
+/**
+ * the list/set of interfaces with signals pending
+ */
+typedef struct mfib_signal_q_t_
+{
+ /**
+ * the dlist indext that is the head of the list
+ */
+ u32 mip_head;
+
+ /**
+ * Spin lock to protect the list
+ */
+ int mip_lock;
+} mfib_signal_q_t;
+
+/**
+ * @brief The pending queue of signals to deliver to the control plane
+ */
+static mfib_signal_q_t mfib_signal_pending ;
+
+static void
+mfib_signal_list_init (void)
+{
+ dlist_elt_t *head;
+ u32 hi;
+
+ pool_get(mfib_signal_dlist_pool, head);
+ hi = head - mfib_signal_dlist_pool;
+
+ mfib_signal_pending.mip_head = hi;
+ clib_dlist_init(mfib_signal_dlist_pool, hi);
+}
+
+void
+mfib_signal_module_init (void)
+{
+ mfib_signal_list_init();
+}
+
+static inline void
+mfib_signal_lock_aquire (void)
+{
+ while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1))
+ ;
+}
+
+static inline void
+mfib_signal_lock_release (void)
+{
+ mfib_signal_pending.mip_lock = 0;
+}
+
+#define MFIB_SIGNAL_CRITICAL_SECTION(_body) \
+{ \
+ mfib_signal_lock_aquire(); \
+ do { \
+ _body; \
+ } while (0); \
+ mfib_signal_lock_release(); \
+}
+
+int
+mfib_signal_send_one (struct _unix_shared_memory_queue *q,
+ u32 context)
+{
+ u32 li, si;
+
+ /*
+ * with the lock held, pop a signal from the q.
+ */
+ MFIB_SIGNAL_CRITICAL_SECTION(
+ ({
+ li = clib_dlist_remove_head(mfib_signal_dlist_pool,
+ mfib_signal_pending.mip_head);
+ }));
+
+ if (~0 != li)
+ {
+ mfib_signal_t *mfs;
+ mfib_itf_t *mfi;
+ dlist_elt_t *elt;
+
+ elt = pool_elt_at_index(mfib_signal_dlist_pool, li);
+ si = elt->value;
+
+ mfs = pool_elt_at_index(mfib_signal_pool, si);
+ mfi = mfib_itf_get(mfs->mfs_itf);
+ mfi->mfi_si = INDEX_INVALID;
+ __sync_fetch_and_and(&mfi->mfi_flags,
+ ~MFIB_ITF_FLAG_SIGNAL_PRESENT);
+
+
+ vl_mfib_signal_send_one(q, context, mfs);
+
+ /*
+ * with the lock held, return the resoruces of the signals posted
+ */
+ MFIB_SIGNAL_CRITICAL_SECTION(
+ ({
+ pool_put_index(mfib_signal_pool, si);
+ pool_put_index(mfib_signal_dlist_pool, li);
+ }));
+
+ return (1);
+ }
+ return (0);
+}
+
+void
+mfib_signal_push (const mfib_entry_t *mfe,
+ mfib_itf_t *mfi,
+ vlib_buffer_t *b0)
+{
+ mfib_signal_t *mfs;
+ dlist_elt_t *elt;
+ u32 si, li;
+
+ MFIB_SIGNAL_CRITICAL_SECTION(
+ ({
+ pool_get(mfib_signal_pool, mfs);
+ pool_get(mfib_signal_dlist_pool, elt);
+
+ si = mfs - mfib_signal_pool;
+ li = elt - mfib_signal_dlist_pool;
+
+ elt->value = si;
+ mfi->mfi_si = li;
+
+ clib_dlist_addhead(mfib_signal_dlist_pool,
+ mfib_signal_pending.mip_head,
+ li);
+ }));
+
+ mfs->mfs_entry = mfib_entry_get_index(mfe);
+ mfs->mfs_itf = mfib_itf_get_index(mfi);
+
+ if (NULL != b0)
+ {
+ mfs->mfs_buffer_len = b0->current_length;
+ memcpy(mfs->mfs_buffer,
+ vlib_buffer_get_current(b0),
+ (mfs->mfs_buffer_len > MFIB_SIGNAL_BUFFER_SIZE ?
+ MFIB_SIGNAL_BUFFER_SIZE :
+ mfs->mfs_buffer_len));
+ }
+ else
+ {
+ mfs->mfs_buffer_len = 0;
+ }
+}
+
+void
+mfib_signal_remove_itf (const mfib_itf_t *mfi)
+{
+ u32 li;
+
+ /*
+ * lock the queue to prevent further additions while we fiddle.
+ */
+ li = mfi->mfi_si;
+
+ if (INDEX_INVALID != li)
+ {
+ /*
+ * it's in the pending q
+ */
+ MFIB_SIGNAL_CRITICAL_SECTION(
+ ({
+ dlist_elt_t *elt;
+
+ /*
+ * with the lock held;
+ * - remove the signal from the pending list
+ * - free up the signal and list entry obejcts
+ */
+ clib_dlist_remove(mfib_signal_dlist_pool, li);
+
+ elt = pool_elt_at_index(mfib_signal_dlist_pool, li);
+ pool_put_index(mfib_signal_pool, elt->value);
+ pool_put(mfib_signal_dlist_pool, elt);
+ }));
+ }
+}
diff --git a/src/vnet/mfib/mfib_signal.h b/src/vnet/mfib/mfib_signal.h
new file mode 100644
index 00000000..732d8aff
--- /dev/null
+++ b/src/vnet/mfib/mfib_signal.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_SIGNAL_H__
+#define __MFIB_SIGNAL_H__
+
+#include <vlib/vlib.h>
+#include <vnet/mfib/mfib_types.h>
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_entry.h>
+
+#define MFIB_SIGNAL_BUFFER_SIZE 255
+
+/**
+ * A pair of indicies, for the entry and interface resp.
+ */
+typedef struct mfib_signal_t_
+{
+ fib_node_index_t mfs_entry;
+ index_t mfs_itf;
+
+ /**
+ * @brief A buffer copied from the DP plane that triggered the signal
+ */
+ u8 mfs_buffer[MFIB_SIGNAL_BUFFER_SIZE];
+
+ u8 mfs_buffer_len;
+} mfib_signal_t;
+
+
+extern void mfib_signal_push(const mfib_entry_t *mfe,
+ mfib_itf_t *mfi,
+ vlib_buffer_t *b0);
+extern void mfib_signal_remove_itf(const mfib_itf_t *mfi);
+
+extern void mfib_signal_module_init(void);
+
+struct _unix_shared_memory_queue;
+
+extern void vl_mfib_signal_send_one(struct _unix_shared_memory_queue *q,
+ u32 context,
+ const mfib_signal_t *mfs);
+extern int mfib_signal_send_one(struct _unix_shared_memory_queue *q,
+ u32 context);
+
+#endif
+
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
new file mode 100644
index 00000000..838864ff
--- /dev/null
+++ b/src/vnet/mfib/mfib_table.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/mfib/mfib_signal.h>
+
+mfib_table_t *
+mfib_table_get (fib_node_index_t index,
+ fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (pool_elt_at_index(ip4_main.mfibs, index));
+ case FIB_PROTOCOL_IP6:
+ return (pool_elt_at_index(ip6_main.mfibs, index));
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+static inline fib_node_index_t
+mfib_table_lookup_i (const mfib_table_t *mfib_table,
+ const mfib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_mfib_table_lookup(&mfib_table->v4,
+ &prefix->fp_src_addr.ip4,
+ &prefix->fp_grp_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_mfib_table_lookup(&mfib_table->v6,
+ &prefix->fp_src_addr.ip6,
+ &prefix->fp_grp_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+mfib_table_lookup (u32 fib_index,
+ const mfib_prefix_t *prefix)
+{
+ return (mfib_table_lookup_i(mfib_table_get(fib_index, prefix->fp_proto), prefix));
+}
+
+static inline fib_node_index_t
+mfib_table_lookup_exact_match_i (const mfib_table_t *mfib_table,
+ const mfib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_mfib_table_lookup_exact_match(&mfib_table->v4,
+ &prefix->fp_grp_addr.ip4,
+ &prefix->fp_src_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_mfib_table_lookup_exact_match(&mfib_table->v6,
+ &prefix->fp_grp_addr.ip6,
+ &prefix->fp_src_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+mfib_table_lookup_exact_match (u32 fib_index,
+ const mfib_prefix_t *prefix)
+{
+ return (mfib_table_lookup_exact_match_i(mfib_table_get(fib_index,
+ prefix->fp_proto),
+ prefix));
+}
+
+static void
+mfib_table_entry_remove (mfib_table_t *mfib_table,
+ const mfib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ mfib_table->mft_total_route_counts--;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_mfib_table_entry_remove(&mfib_table->v4,
+ &prefix->fp_grp_addr.ip4,
+ &prefix->fp_src_addr.ip4,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_mfib_table_entry_remove(&mfib_table->v6,
+ &prefix->fp_grp_addr.ip6,
+ &prefix->fp_src_addr.ip6,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
+ }
+
+ mfib_entry_unlock(fib_entry_index);
+}
+
+static void
+mfib_table_entry_insert (mfib_table_t *mfib_table,
+ const mfib_prefix_t *prefix,
+ fib_node_index_t mfib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ mfib_entry_lock(mfib_entry_index);
+ mfib_table->mft_total_route_counts++;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_mfib_table_entry_insert(&mfib_table->v4,
+ &prefix->fp_grp_addr.ip4,
+ &prefix->fp_src_addr.ip4,
+ prefix->fp_len,
+ mfib_entry_index);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_mfib_table_entry_insert(&mfib_table->v6,
+ &prefix->fp_grp_addr.ip6,
+ &prefix->fp_src_addr.ip6,
+ prefix->fp_len,
+ mfib_entry_index);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+}
+
+fib_node_index_t
+mfib_table_entry_update (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ fib_rpf_id_t rpf_id,
+ mfib_entry_flags_t entry_flags)
+{
+ fib_node_index_t mfib_entry_index;
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+ mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+ {
+ if (MFIB_ENTRY_FLAG_NONE != entry_flags)
+ {
+ /*
+ * update to a non-existing entry with non-zero flags
+ */
+ mfib_entry_index = mfib_entry_create(fib_index, source,
+ prefix, rpf_id,
+ entry_flags);
+
+ mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+ }
+ /*
+ * else
+ * the entry doesn't exist and the request is to set no flags
+ * the result would be an entry that doesn't exist - so do nothing
+ */
+ }
+ else
+ {
+ mfib_entry_lock(mfib_entry_index);
+
+ if (mfib_entry_update(mfib_entry_index,
+ source,
+ entry_flags,
+ rpf_id,
+ INDEX_INVALID))
+ {
+ /*
+ * this update means we can now remove the entry.
+ */
+ mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+ }
+
+ mfib_entry_unlock(mfib_entry_index);
+ }
+
+ return (mfib_entry_index);
+}
+
+fib_node_index_t
+mfib_table_entry_path_update (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ const fib_route_path_t *rpath,
+ mfib_itf_flags_t itf_flags)
+{
+ fib_node_index_t mfib_entry_index;
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+ mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+ {
+ mfib_entry_index = mfib_entry_create(fib_index,
+ source,
+ prefix,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_NONE);
+
+ mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+ }
+
+ mfib_entry_path_update(mfib_entry_index,
+ source,
+ rpath,
+ itf_flags);
+
+ return (mfib_entry_index);
+}
+
+void
+mfib_table_entry_path_remove (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t mfib_entry_index;
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+ mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist. i'll allow it.
+ */
+ }
+ else
+ {
+ int no_more_sources;
+
+ /*
+ * don't nobody go nowhere
+ */
+ mfib_entry_lock(mfib_entry_index);
+
+ no_more_sources = mfib_entry_path_remove(mfib_entry_index,
+ source,
+ rpath);
+
+ if (no_more_sources)
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+ }
+
+ mfib_entry_unlock(mfib_entry_index);
+ }
+}
+
+fib_node_index_t
+mfib_table_entry_special_add (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
+ index_t rep_dpo)
+{
+ fib_node_index_t mfib_entry_index;
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+ mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+ {
+ mfib_entry_index = mfib_entry_create(fib_index,
+ source,
+ prefix,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_NONE);
+
+ mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+ }
+
+ mfib_entry_update(mfib_entry_index, source,
+ (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags),
+ MFIB_RPF_ID_NONE,
+ rep_dpo);
+
+ return (mfib_entry_index);
+}
+
+static void
+mfib_table_entry_delete_i (u32 fib_index,
+ fib_node_index_t mfib_entry_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source)
+{
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+
+ /*
+ * don't nobody go nowhere
+ */
+ mfib_entry_lock(mfib_entry_index);
+
+ if (mfib_entry_delete(mfib_entry_index, source))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+
+ mfib_entry_unlock(mfib_entry_index);
+}
+
+void
+mfib_table_entry_delete (u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source)
+{
+ fib_node_index_t mfib_entry_index;
+
+ mfib_entry_index = mfib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist.
+ * i'll allow it, but i won't like it.
+ */
+ clib_warning("%U not in FIB", format_mfib_prefix, prefix);
+ }
+ else
+ {
+ mfib_table_entry_delete_i(fib_index, mfib_entry_index,
+ prefix, source);
+ }
+}
+
+void
+mfib_table_entry_delete_index (fib_node_index_t mfib_entry_index,
+ mfib_source_t source)
+{
+ mfib_prefix_t prefix;
+
+ mfib_entry_get_prefix(mfib_entry_index, &prefix);
+
+ mfib_table_entry_delete_i(mfib_entry_get_fib_index(mfib_entry_index),
+ mfib_entry_index, &prefix, source);
+}
+
+u32
+mfib_table_get_index_for_sw_if_index (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_mfib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_mfib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
+ }
+ return (~0);
+}
+
+u32
+mfib_table_find (fib_protocol_t proto,
+ u32 table_id)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_mfib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_mfib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
+ }
+ return (~0);
+}
+
+static u32
+mfib_table_find_or_create_and_lock_i (fib_protocol_t proto,
+ u32 table_id,
+ mfib_source_t src,
+ const u8 *name)
+{
+ mfib_table_t *mfib_table;
+ fib_node_index_t fi;
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fi = ip4_mfib_table_find_or_create_and_lock(table_id, src);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fi = ip6_mfib_table_find_or_create_and_lock(table_id, src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ default:
+ return (~0);
+ }
+
+ mfib_table = mfib_table_get(fi, proto);
+
+ if (NULL == mfib_table->mft_desc)
+ {
+ if (name && name[0])
+ {
+ mfib_table->mft_desc = format(NULL, "%s", name);
+ }
+ else
+ {
+ mfib_table->mft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+ }
+ }
+
+ return (fi);
+}
+
+u32
+mfib_table_find_or_create_and_lock (fib_protocol_t proto,
+ u32 table_id,
+ mfib_source_t src)
+{
+ return (mfib_table_find_or_create_and_lock_i(proto, table_id,
+ src, NULL));
+}
+
+u32
+mfib_table_find_or_create_and_lock_w_name (fib_protocol_t proto,
+ u32 table_id,
+ mfib_source_t src,
+ const u8 *name)
+{
+ return (mfib_table_find_or_create_and_lock_i(proto, table_id,
+ src, name));
+}
+
+/**
+ * @brief Table flush context. Store the indicies of matching FIB entries
+ * that need to be removed.
+ */
+typedef struct mfib_table_flush_ctx_t_
+{
+ /**
+ * The list of entries to flush
+ */
+ fib_node_index_t *mftf_entries;
+
+ /**
+ * The source we are flushing
+ */
+ mfib_source_t mftf_source;
+} mfib_table_flush_ctx_t;
+
+static int
+mfib_table_flush_cb (fib_node_index_t mfib_entry_index,
+ void *arg)
+{
+ mfib_table_flush_ctx_t *ctx = arg;
+
+ if (mfib_entry_is_sourced(mfib_entry_index, ctx->mftf_source))
+ {
+ vec_add1(ctx->mftf_entries, mfib_entry_index);
+ }
+ return (1);
+}
+
+void
+mfib_table_flush (u32 mfib_index,
+ fib_protocol_t proto,
+ mfib_source_t source)
+{
+ fib_node_index_t *mfib_entry_index;
+ mfib_table_flush_ctx_t ctx = {
+ .mftf_entries = NULL,
+ .mftf_source = source,
+ };
+
+ mfib_table_walk(mfib_index, proto,
+ mfib_table_flush_cb,
+ &ctx);
+
+ vec_foreach(mfib_entry_index, ctx.mftf_entries)
+ {
+ mfib_table_entry_delete_index(*mfib_entry_index, source);
+ }
+
+ vec_free(ctx.mftf_entries);
+}
+
+static void
+mfib_table_destroy (mfib_table_t *mfib_table)
+{
+ vec_free(mfib_table->mft_desc);
+
+ switch (mfib_table->mft_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_mfib_table_destroy(&mfib_table->v4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_mfib_table_destroy(&mfib_table->v6);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
+ }
+}
+
+void
+mfib_table_unlock (u32 fib_index,
+ fib_protocol_t proto,
+ mfib_source_t source)
+{
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, proto);
+ mfib_table->mft_locks[source]--;
+ mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]--;
+
+ if (0 == mfib_table->mft_locks[source])
+ {
+ /*
+ * The source no longer needs the table. flush any routes
+ * from it just in case
+ */
+ mfib_table_flush(fib_index, proto, source);
+ }
+
+ if (0 == mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS])
+ {
+ /*
+ * no more locak from any source - kill it
+ */
+ mfib_table_destroy(mfib_table);
+ }
+}
+
+void
+mfib_table_lock (u32 fib_index,
+ fib_protocol_t proto,
+ mfib_source_t source)
+{
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, proto);
+ mfib_table->mft_locks[source]++;
+ mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]++;
+}
+
+void
+mfib_table_walk (u32 fib_index,
+ fib_protocol_t proto,
+ mfib_table_walk_fn_t fn,
+ void *ctx)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_mfib_table_walk(ip4_mfib_get(fib_index), fn, ctx);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_mfib_table_walk(ip6_mfib_get(fib_index), fn, ctx);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+}
+
+u8*
+format_mfib_table_name (u8* s, va_list ap)
+{
+ fib_node_index_t fib_index = va_arg(ap, fib_node_index_t);
+ fib_protocol_t proto = va_arg(ap, int); // int promotion
+ mfib_table_t *mfib_table;
+
+ mfib_table = mfib_table_get(fib_index, proto);
+
+ s = format(s, "%v", mfib_table->mft_desc);
+
+ return (s);
+}
+
+static clib_error_t *
+mfib_module_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, fib_module_init)))
+ return (error);
+ if ((error = vlib_call_init_function (vm, rn_module_init)))
+ return (error);
+
+ mfib_entry_module_init();
+ mfib_signal_module_init();
+
+ return (error);
+}
+
+VLIB_INIT_FUNCTION(mfib_module_init);
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
new file mode 100644
index 00000000..93f90dd5
--- /dev/null
+++ b/src/vnet/mfib/mfib_table.h
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_TABLE_H__
+#define __MFIB_TABLE_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * Keep a lock per-source and a total
+ */
+#define MFIB_TABLE_N_LOCKS (MFIB_N_SOURCES+1)
+#define MFIB_TABLE_TOTAL_LOCKS MFIB_N_SOURCES
+
+/**
+ * @brief
+ * A protocol Independent IP multicast FIB table
+ */
+typedef struct mfib_table_t_
+{
+ /**
+ * A union of the protocol specific FIBs that provide the
+ * underlying LPM mechanism.
+ * This element is first in the struct so that it is in the
+ * first cache line.
+ */
+ union {
+ ip4_mfib_t v4;
+ ip6_mfib_t v6;
+ };
+
+ /**
+ * Which protocol this table serves. Used to switch on the union above.
+ */
+ fib_protocol_t mft_proto;
+
+ /**
+ * number of locks on the table
+ */
+ u16 mft_locks[MFIB_TABLE_N_LOCKS];
+
+ /**
+ * Table ID (hash key) for this FIB.
+ */
+ u32 mft_table_id;
+
+ /**
+ * Index into FIB vector.
+ */
+ fib_node_index_t mft_index;
+
+ /**
+ * Total route counters
+ */
+ u32 mft_total_route_counts;
+
+ /**
+ * Table description
+ */
+ u8* mft_desc;
+} mfib_table_t;
+
+/**
+ * @brief
+ * Format the description/name of the table
+ */
+extern u8* format_mfib_table_name(u8* s, va_list ap);
+
+/**
+ * @brief
+ * Perfom a longest prefix match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the best match, which may be the default route
+ */
+extern fib_node_index_t mfib_table_lookup(u32 fib_index,
+ const mfib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Perfom an exact match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the exact match, or INVALID
+ * is there is no match.
+ */
+extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index,
+ const mfib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Add a new (with no replication) or lock an existing entry
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_update(u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ fib_rpf_id_t rpf_id,
+ mfib_entry_flags_t flags);
+
+/**
+ * @brief
+ * Add n paths to an entry (aka route) in the FIB. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_path_update(u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ const fib_route_path_t *rpath,
+ mfib_itf_flags_t flags);
+
+/**
+ * @brief
+ * Remove n paths to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ */
+extern void mfib_table_entry_path_remove(u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ const fib_route_path_t *paths);
+
+
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to remove
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void mfib_table_entry_delete(u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param entry_index
+ * The index of the FIB entry
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void mfib_table_entry_delete_index(fib_node_index_t entry_index,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Add a 'special' entry to the mFIB that links to the DPO passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the index of a replicate DPO to link to.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param rep_dpo
+ * The replicate DPO index to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index,
+ const mfib_prefix_t *prefix,
+ mfib_source_t source,
+ mfib_entry_flags_t flags,
+ index_t rep_dpo);
+
+/**
+ * @brief
+ * Flush all entries from a table for the source
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the entries in the table
+ *
+ * @param source
+ * the source to flush
+ */
+extern void mfib_table_flush(u32 fib_index,
+ fib_protocol_t proto,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Get the index of the FIB bound to the interface
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ * The interface index
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 mfib_table_get_index_for_sw_if_index(fib_protocol_t proto,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES NOT create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB, which may be INVALID.
+ */
+extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id);
+
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto,
+ u32 table_id,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB
+ *
+ * @param source
+ * The ID of the client/source.
+ *
+ * @param name
+ * The client is choosing the name they want the table to have
+ */
+extern u32 mfib_table_find_or_create_and_lock_w_name(fib_protocol_t proto,
+ u32 table_id,
+ mfib_source_t source,
+ const u8 *name);
+
+
+/**
+ * @brief
+ * Take a reference counting lock on the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern void mfib_table_unlock(u32 fib_index,
+ fib_protocol_t proto,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Release a reference counting lock on the table. When the last lock
+ * has gone. the FIB is deleted.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ * The ID of the client/source.
+ */
+extern void mfib_table_lock(u32 fib_index,
+ fib_protocol_t proto,
+ mfib_source_t source);
+
+/**
+ * @brief
+ * Return the number of entries in the FIB added by a given source.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return number of sourced entries.
+ */
+extern u32 mfib_table_get_num_entries(u32 fib_index,
+ fib_protocol_t proto);
+
+/**
+ * @brief
+ * Get a pointer to a FIB table
+ */
+extern mfib_table_t *mfib_table_get(fib_node_index_t index,
+ fib_protocol_t proto);
+
+/**
+ * @brief Call back function when walking entries in a FIB table
+ */
+typedef int (*mfib_table_walk_fn_t)(fib_node_index_t fei,
+ void *ctx);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete, walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void mfib_table_walk(u32 fib_index,
+ fib_protocol_t proto,
+ mfib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
new file mode 100644
index 00000000..2562bc14
--- /dev/null
+++ b/src/vnet/mfib/mfib_test.c
@@ -0,0 +1,1405 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls_types.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_test.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/mpls_fib.h>
+
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/adj/adj_mcast.h>
+
+#define MFIB_TEST_I(_cond, _comment, _args...) \
+({ \
+ int _evald = (_cond); \
+ if (!(_evald)) { \
+ fformat(stderr, "FAIL:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } else { \
+ fformat(stderr, "PASS:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } \
+ _evald; \
+})
+#define MFIB_TEST(_cond, _comment, _args...) \
+{ \
+ if (!MFIB_TEST_I(_cond, _comment, ##_args)) { \
+ return 1;\
+ ASSERT(!("FAIL: " _comment)); \
+ } \
+}
+#define MFIB_TEST_NS(_cond) \
+{ \
+ if (!MFIB_TEST_I(_cond, "")) { \
+ return 1;\
+ ASSERT(!("FAIL: ")); \
+ } \
+}
+
+/**
+ * A 'i'm not fussed is this is not efficient' store of test data
+ */
+typedef struct test_main_t_ {
+ /**
+ * HW if indicies
+ */
+ u32 hw_if_indicies[4];
+ /**
+ * HW interfaces
+ */
+ vnet_hw_interface_t * hw[4];
+
+} test_main_t;
+static test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 * format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+test_interface_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+ return 0;
+}
+
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = test_interface_admin_up_down,
+};
+
+static u8 *hw_address;
+
+static int
+mfib_test_mk_intf (u32 ninterfaces)
+{
+ clib_error_t * error = NULL;
+ test_main_t *tm = &test_main;
+ u8 byte;
+ u32 i;
+
+ ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies));
+
+ for (i=0; i<6; i++)
+ {
+ byte = 0xd0+i;
+ vec_add1(hw_address, byte);
+ }
+
+ for (i = 0; i < ninterfaces; i++)
+ {
+ hw_address[5] = i;
+
+ error = ethernet_register_interface(vnet_get_main(),
+ test_interface_device_class.index,
+ i /* instance */,
+ hw_address,
+ &tm->hw_if_indicies[i],
+ /* flag change */ 0);
+
+ MFIB_TEST((NULL == error), "ADD interface %d", i);
+
+ error = vnet_hw_interface_set_flags(vnet_get_main(),
+ tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ vec_validate (ip4_main.fib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ vec_validate (ip6_main.fib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ vec_validate (ip4_main.mfib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ vec_validate (ip6_main.mfib_index_by_sw_if_index,
+ tm->hw[i]->sw_if_index);
+ ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ MFIB_TEST((NULL == error), "UP interface %d", i);
+ }
+ /*
+ * re-eval after the inevitable realloc
+ */
+ for (i = 0; i < ninterfaces; i++)
+ {
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ }
+
+ return (0);
+}
+
+#define MFIB_TEST_REP(_cond, _comment, _args...) \
+{ \
+ if (!MFIB_TEST_I(_cond, _comment, ##_args)) { \
+ return (0); \
+ } \
+}
+
+static int
+mfib_test_validate_rep_v (const replicate_t *rep,
+ u16 n_buckets,
+ va_list ap)
+{
+ const dpo_id_t *dpo;
+ adj_index_t ai;
+ dpo_type_t dt;
+ int bucket;
+
+ MFIB_TEST_REP((n_buckets == rep->rep_n_buckets),
+ "n_buckets = %d", rep->rep_n_buckets);
+
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ dt = va_arg(ap, int); // type promotion
+ ai = va_arg(ap, adj_index_t);
+ dpo = replicate_get_bucket_i(rep, bucket);
+
+ MFIB_TEST_REP((dt == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ if (DPO_RECEIVE != dt)
+ {
+ MFIB_TEST_REP((ai == dpo->dpoi_index),
+ "bucket %d [exp:%d] stacks on %U",
+ bucket, ai,
+ format_dpo_id, dpo, 0);
+ }
+ }
+ return (!0);
+}
+
+static fib_forward_chain_type_t
+fib_forw_chain_type_from_fib_proto (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ default:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+
+static int
+mfib_test_entry (fib_node_index_t fei,
+ mfib_entry_flags_t eflags,
+ u16 n_buckets,
+ ...)
+{
+ const mfib_entry_t *mfe;
+ const replicate_t *rep;
+ mfib_prefix_t pfx;
+ va_list ap;
+ int res;
+
+ va_start(ap, n_buckets);
+
+ mfe = mfib_entry_get(fei);
+ mfib_entry_get_prefix(fei, &pfx);
+
+ MFIB_TEST_REP((eflags == mfe->mfe_flags),
+ "%U has %U expect %U",
+ format_mfib_prefix, &pfx,
+ format_mfib_entry_flags, mfe->mfe_flags,
+ format_mfib_entry_flags, eflags);
+
+ if (0 == n_buckets)
+ {
+ MFIB_TEST_REP((DPO_DROP == mfe->mfe_rep.dpoi_type),
+ "%U links to %U",
+ format_mfib_prefix, &pfx,
+ format_dpo_id, &mfe->mfe_rep, 0);
+ res = !0;
+ }
+ else
+ {
+ dpo_id_t tmp = DPO_INVALID;
+
+ mfib_entry_contribute_forwarding(
+ fei,
+ fib_forw_chain_type_from_fib_proto(pfx.fp_proto),
+ &tmp);
+ rep = replicate_get(tmp.dpoi_index);
+
+ MFIB_TEST_REP((DPO_REPLICATE == tmp.dpoi_type),
+ "%U links to %U",
+ format_mfib_prefix, &pfx,
+ format_dpo_type, tmp.dpoi_type);
+
+ res = mfib_test_validate_rep_v(rep, n_buckets, ap);
+
+ dpo_reset(&tmp);
+ }
+
+ va_end(ap);
+
+ return (res);
+}
+
+static int
+mfib_test_entry_itf (fib_node_index_t fei,
+ u32 sw_if_index,
+ mfib_itf_flags_t flags)
+{
+ const mfib_entry_t *mfe;
+ const mfib_itf_t *mfi;
+ mfib_prefix_t pfx;
+
+ mfe = mfib_entry_get(fei);
+ mfi = mfib_entry_get_itf(mfe, sw_if_index);
+ mfib_entry_get_prefix(fei, &pfx);
+
+ MFIB_TEST_REP((NULL != mfi),
+ "%U has interface %d",
+ format_mfib_prefix, &pfx, sw_if_index);
+
+ MFIB_TEST_REP((flags == mfi->mfi_flags),
+ "%U interface %d has flags %U expect %U",
+ format_mfib_prefix, &pfx, sw_if_index,
+ format_mfib_itf_flags, flags,
+ format_mfib_itf_flags, mfi->mfi_flags);
+
+ return (!0);
+}
+
+static int
+mfib_test_entry_no_itf (fib_node_index_t fei,
+ u32 sw_if_index)
+{
+ const mfib_entry_t *mfe;
+ const mfib_itf_t *mfi;
+ mfib_prefix_t pfx;
+
+ mfe = mfib_entry_get(fei);
+ mfi = mfib_entry_get_itf(mfe, sw_if_index);
+ mfib_entry_get_prefix(fei, &pfx);
+
+ MFIB_TEST_REP((NULL == mfi),
+ "%U has no interface %d",
+ format_mfib_prefix, &pfx, sw_if_index);
+
+ return (!0);
+}
+
+static int
+mfib_test_i (fib_protocol_t PROTO,
+ vnet_link_t LINKT,
+ const mfib_prefix_t *pfx_no_forward,
+ const mfib_prefix_t *pfx_s_g,
+ const mfib_prefix_t *pfx_star_g_1,
+ const mfib_prefix_t *pfx_star_g_2,
+ const mfib_prefix_t *pfx_star_g_3,
+ const mfib_prefix_t *pfx_star_g_slash_m)
+{
+ fib_node_index_t mfei, mfei_dflt, mfei_no_f, mfei_s_g, mfei_g_1, mfei_g_2, mfei_g_3, mfei_g_m;
+ u32 fib_index, n_entries, n_itfs, n_reps, n_pls;
+ fib_node_index_t ai_1, ai_2, ai_3;
+ test_main_t *tm;
+
+ mfib_prefix_t all_1s;
+ memset(&all_1s, 0xfd, sizeof(all_1s));
+
+ n_entries = pool_elts(mfib_entry_pool);
+ n_itfs = pool_elts(mfib_itf_pool);
+ n_reps = pool_elts(replicate_pool);
+ n_pls = fib_path_list_pool_size();
+ tm = &test_main;
+
+ ai_1 = adj_mcast_add_or_lock(PROTO,
+ LINKT,
+ tm->hw[1]->sw_if_index);
+ ai_2 = adj_mcast_add_or_lock(PROTO,
+ LINKT,
+ tm->hw[2]->sw_if_index);
+ ai_3 = adj_mcast_add_or_lock(PROTO,
+ LINKT,
+ tm->hw[3]->sw_if_index);
+
+ MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs");
+
+ /* Find or create FIB table 11 */
+ fib_index = mfib_table_find_or_create_and_lock(PROTO, 11, MFIB_SOURCE_API);
+
+ mfib_prefix_t pfx_dft = {
+ .fp_len = 0,
+ .fp_proto = PROTO,
+ };
+ mfei_dflt = mfib_table_lookup_exact_match(fib_index, &pfx_dft);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet");
+ MFIB_TEST(mfib_test_entry(mfei_dflt,
+ MFIB_ENTRY_FLAG_DROP,
+ 0),
+ "(*,*) no replcaitions");
+
+ MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet");
+ MFIB_TEST(mfib_test_entry(mfei_dflt,
+ MFIB_ENTRY_FLAG_DROP,
+ 0),
+ "(*,*) no replcaitions");
+
+
+ fib_route_path_t path_via_if0 = {
+ .frp_proto = fib_proto_to_dpo(PROTO),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = 0,
+ };
+
+ mfib_table_entry_path_update(fib_index,
+ pfx_no_forward,
+ MFIB_SOURCE_API,
+ &path_via_if0,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ mfei_no_f = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
+ MFIB_TEST(mfib_test_entry(mfei_no_f,
+ MFIB_ENTRY_FLAG_NONE,
+ 0),
+ "%U no replcaitions",
+ format_mfib_prefix, pfx_no_forward);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei_no_f, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+
+ fib_route_path_t path_via_if1 = {
+ .frp_proto = fib_proto_to_dpo(PROTO),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = tm->hw[1]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = 0,
+ };
+ fib_route_path_t path_via_if2 = {
+ .frp_proto = fib_proto_to_dpo(PROTO),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = tm->hw[2]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = 0,
+ };
+ fib_route_path_t path_via_if3 = {
+ .frp_proto = fib_proto_to_dpo(PROTO),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = tm->hw[3]->sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = 0,
+ };
+ fib_route_path_t path_for_us = {
+ .frp_proto = fib_proto_to_dpo(PROTO),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+
+ /*
+ * An (S,G) with 1 accepting and 3 forwarding paths
+ */
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if0,
+ MFIB_ITF_FLAG_ACCEPT);
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if1,
+ MFIB_ITF_FLAG_FORWARD);
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if2,
+ MFIB_ITF_FLAG_FORWARD);
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if3,
+ (MFIB_ITF_FLAG_FORWARD |
+ MFIB_ITF_FLAG_NEGATE_SIGNAL));
+
+ mfei_s_g = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+
+ MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_s_g,
+ "%U present",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST(mfib_test_entry(mfei_s_g,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate ok",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[1]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[3]->sw_if_index,
+ (MFIB_ITF_FLAG_FORWARD |
+ MFIB_ITF_FLAG_NEGATE_SIGNAL)));
+
+ /*
+ * A (*,G), which the same G as the (S,G).
+ * different paths. test our LPM.
+ */
+ mfei_g_1 = mfib_table_entry_path_update(fib_index,
+ pfx_star_g_1,
+ MFIB_SOURCE_API,
+ &path_via_if0,
+ MFIB_ITF_FLAG_ACCEPT);
+ mfib_table_entry_path_update(fib_index,
+ pfx_star_g_1,
+ MFIB_SOURCE_API,
+ &path_via_if1,
+ MFIB_ITF_FLAG_FORWARD);
+
+ /*
+ * test we find the *,G and S,G via LPM and exact matches
+ */
+ mfei = mfib_table_lookup_exact_match(fib_index,
+ pfx_star_g_1);
+ MFIB_TEST(mfei == mfei_g_1,
+ "%U found via exact match",
+ format_mfib_prefix, pfx_star_g_1);
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_1),
+ "%U replicate ok",
+ format_mfib_prefix, pfx_star_g_1);
+
+ mfei = mfib_table_lookup(fib_index,
+ pfx_star_g_1);
+ MFIB_TEST(mfei == mfei_g_1,
+ "%U found via LP match",
+ format_mfib_prefix, pfx_star_g_1);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_1),
+ "%U replicate ok",
+ format_mfib_prefix, pfx_star_g_1);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+ MFIB_TEST(mfei == mfei_s_g,
+ "%U found via exact match",
+ format_mfib_prefix, pfx_s_g);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ mfei = mfib_table_lookup(fib_index, pfx_s_g);
+ MFIB_TEST(mfei == mfei_s_g,
+ "%U found via LP match",
+ format_mfib_prefix, pfx_s_g);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * A (*,G/m), which the same root G as the (*,G).
+ * different paths. test our LPM.
+ */
+ mfei_g_m = mfib_table_entry_path_update(fib_index,
+ pfx_star_g_slash_m,
+ MFIB_SOURCE_API,
+ &path_via_if2,
+ MFIB_ITF_FLAG_ACCEPT);
+ mfib_table_entry_path_update(fib_index,
+ pfx_star_g_slash_m,
+ MFIB_SOURCE_API,
+ &path_via_if3,
+ MFIB_ITF_FLAG_FORWARD);
+
+ /*
+ * test we find the (*,G/m), (*,G) and (S,G) via LPM and exact matches
+ */
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1);
+ MFIB_TEST((mfei_g_1 == mfei),
+ "%U found via DP LPM: %d",
+ format_mfib_prefix, pfx_star_g_1, mfei);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_1),
+ "%U replicate ok",
+ format_mfib_prefix, pfx_star_g_1);
+
+ mfei = mfib_table_lookup(fib_index, pfx_star_g_1);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_1),
+ "%U replicate ok",
+ format_mfib_prefix, pfx_star_g_1);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ mfei = mfib_table_lookup(fib_index, pfx_s_g);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m);
+ MFIB_TEST(mfei = mfei_g_m,
+ "%U Found via exact match",
+ format_mfib_prefix, pfx_star_g_slash_m);
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_star_g_slash_m);
+ MFIB_TEST(mfei_g_m == mfib_table_lookup(fib_index, pfx_star_g_slash_m),
+ "%U found via LPM",
+ format_mfib_prefix, pfx_star_g_slash_m);
+
+ /*
+ * Add a for-us path
+ */
+ mfei = mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_for_us,
+ MFIB_ITF_FLAG_FORWARD);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 4,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3,
+ DPO_RECEIVE, 0),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * remove a for-us path
+ */
+ mfib_table_entry_path_remove(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_for_us);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * update an existing forwarding path to be only accepting
+ * - expect it to be removed from the replication set.
+ */
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if3,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 2,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ /*
+ * Make the path forwarding again
+ * - expect it to be added back to the replication set
+ */
+ mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if3,
+ (MFIB_ITF_FLAG_FORWARD |
+ MFIB_ITF_FLAG_ACCEPT |
+ MFIB_ITF_FLAG_NEGATE_SIGNAL));
+
+ mfei = mfib_table_lookup_exact_match(fib_index,
+ pfx_s_g);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index,
+ (MFIB_ITF_FLAG_FORWARD |
+ MFIB_ITF_FLAG_ACCEPT |
+ MFIB_ITF_FLAG_NEGATE_SIGNAL)));
+
+ /*
+ * update flags on the entry
+ */
+ mfib_table_entry_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_SIGNAL);
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_SIGNAL,
+ 3,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2,
+ DPO_ADJACENCY_MCAST, ai_3),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * remove paths
+ */
+ mfib_table_entry_path_remove(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if3);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_SIGNAL,
+ 2,
+ DPO_ADJACENCY_MCAST, ai_1,
+ DPO_ADJACENCY_MCAST, ai_2),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+ mfib_table_entry_path_remove(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if1);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_SIGNAL,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_2),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+ MFIB_ITF_FLAG_ACCEPT));
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+ /*
+ * remove the accpeting only interface
+ */
+ mfib_table_entry_path_remove(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if0);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_SIGNAL,
+ 1,
+ DPO_ADJACENCY_MCAST, ai_2),
+ "%U replicate OK",
+ format_mfib_prefix, pfx_s_g);
+ MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+ MFIB_ITF_FLAG_FORWARD));
+ MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[0]->sw_if_index));
+ MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[1]->sw_if_index));
+ MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+ /*
+ * remove the last path, the entry still has flags so it remains
+ */
+ mfib_table_entry_path_remove(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_if2);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_SIGNAL,
+ 0),
+ "%U no replications",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * update flags on the entry
+ */
+ mfib_table_entry_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ MFIB_RPF_ID_NONE,
+ (MFIB_ENTRY_FLAG_SIGNAL |
+ MFIB_ENTRY_FLAG_CONNECTED));
+ MFIB_TEST(mfib_test_entry(mfei,
+ (MFIB_ENTRY_FLAG_SIGNAL |
+ MFIB_ENTRY_FLAG_CONNECTED),
+ 0),
+ "%U no replications",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * An entry with a NS interface
+ */
+ mfei_g_2 = mfib_table_entry_path_update(fib_index,
+ pfx_star_g_2,
+ MFIB_SOURCE_API,
+ &path_via_if0,
+ (MFIB_ITF_FLAG_ACCEPT |
+ MFIB_ITF_FLAG_NEGATE_SIGNAL));
+ MFIB_TEST(mfib_test_entry(mfei_g_2,
+ MFIB_ENTRY_FLAG_NONE,
+ 0),
+ "%U No replications",
+ format_mfib_prefix, pfx_star_g_2);
+
+ /*
+ * Simulate a signal from the data-plane
+ */
+ {
+ mfib_entry_t *mfe;
+ mfib_itf_t *mfi;
+
+ mfe = mfib_entry_get(mfei_g_2);
+ mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index);
+
+ mfib_signal_push(mfe, mfi, NULL);
+ }
+
+ /*
+ * An entry with a NS interface
+ */
+ mfei_g_3 = mfib_table_entry_path_update(fib_index,
+ pfx_star_g_3,
+ MFIB_SOURCE_API,
+ &path_via_if0,
+ (MFIB_ITF_FLAG_ACCEPT |
+ MFIB_ITF_NEGATE_SIGNAL));
+ MFIB_TEST(mfib_test_entry(mfei_g_3,
+ MFIB_ENTRY_FLAG_NONE,
+ 0),
+ "%U No replications",
+ format_mfib_prefix, pfx_star_g_3);
+
+ /*
+ * Simulate a signal from the data-plane
+ */
+ {
+ mfib_entry_t *mfe;
+ mfib_itf_t *mfi;
+
+ mfe = mfib_entry_get(mfei_g_3);
+ mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index);
+
+ mfib_signal_push(mfe, mfi, NULL);
+ }
+
+ if (FIB_PROTOCOL_IP6 == PROTO)
+ {
+ /*
+ * All the entries are present. let's ensure we can find them all
+ * via exact and longest prefix matches.
+ */
+ /*
+ * A source address we will never match
+ */
+ ip6_address_t src = {
+ .as_u64[0] = clib_host_to_net_u64(0x3001000000000000),
+ .as_u64[1] = clib_host_to_net_u64(0xffffffffffffffff),
+ };
+
+ /*
+ * Find the (*,G/m)
+ */
+ MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2(
+ ip6_mfib_get(fib_index),
+ &src,
+ &pfx_star_g_slash_m->fp_grp_addr.ip6)),
+ "%U found via DP LPM grp=%U",
+ format_mfib_prefix, pfx_star_g_slash_m,
+ format_ip6_address, &pfx_star_g_slash_m->fp_grp_addr.ip6);
+
+ ip6_address_t tmp = pfx_star_g_slash_m->fp_grp_addr.ip6;
+ tmp.as_u8[15] = 0xff;
+
+ MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2(
+ ip6_mfib_get(fib_index),
+ &pfx_s_g->fp_src_addr.ip6,
+ &tmp)),
+ "%U found via DP LPM grp=%U",
+ format_mfib_prefix, pfx_star_g_slash_m,
+ format_ip6_address, &tmp);
+
+ /*
+ * Find the (S,G).
+ */
+ mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+ &pfx_s_g->fp_src_addr.ip6,
+ &pfx_s_g->fp_grp_addr.ip6);
+ MFIB_TEST((mfei_s_g == mfei),
+ "%U found via DP LPM: %d",
+ format_mfib_prefix, pfx_s_g, mfei);
+
+ /*
+ * Find the 3 (*,G) s
+ */
+ mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+ &src,
+ &pfx_star_g_1->fp_grp_addr.ip6);
+ MFIB_TEST((mfei_g_1 == mfei),
+ "%U found via DP LPM: %d",
+ format_mfib_prefix, pfx_star_g_1, mfei);
+ mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+ &src,
+ &pfx_star_g_2->fp_grp_addr.ip6);
+ MFIB_TEST((mfei_g_2 == mfei),
+ "%U found via DP LPM: %d",
+ format_mfib_prefix, pfx_star_g_2, mfei);
+ mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+ &src,
+ &pfx_star_g_3->fp_grp_addr.ip6);
+ MFIB_TEST((mfei_g_3 == mfei),
+ "%U found via DP LPM: %d",
+ format_mfib_prefix, pfx_star_g_3, mfei);
+ }
+
+ /*
+ * remove flags on the entry. This is the last of the
+ * state associated with the entry, so now it goes.
+ */
+ mfib_table_entry_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_NONE);
+ mfei = mfib_table_lookup_exact_match(fib_index,
+ pfx_s_g);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U gone",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * remove the last path on the no forward entry - the last entry
+ */
+ mfib_table_entry_path_remove(fib_index,
+ pfx_no_forward,
+ MFIB_SOURCE_API,
+ &path_via_if0);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U gone",
+ format_mfib_prefix, pfx_no_forward);
+
+ /*
+ * hard delete the (*,232.1.1.1)
+ */
+ mfib_table_entry_delete(fib_index,
+ pfx_star_g_1,
+ MFIB_SOURCE_API);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U gone",
+ format_mfib_prefix, pfx_star_g_1);
+ /*
+ * remove the entry whilst the signal is pending
+ */
+ mfib_table_entry_delete(fib_index,
+ pfx_star_g_2,
+ MFIB_SOURCE_API);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_2);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U Gone",
+ format_mfib_prefix, pfx_star_g_2);
+ mfib_table_entry_delete(fib_index,
+ pfx_star_g_3,
+ MFIB_SOURCE_API);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_3);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U Gone",
+ format_mfib_prefix, pfx_star_g_3);
+
+ mfib_table_entry_delete(fib_index,
+ pfx_star_g_slash_m,
+ MFIB_SOURCE_API);
+
+ mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m);
+ MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+ "%U Gone",
+ format_mfib_prefix, pfx_star_g_slash_m);
+
+ /*
+ * Add a prefix as a special/exclusive route
+ */
+ dpo_id_t td = DPO_INVALID;
+ index_t repi = replicate_create(1, fib_proto_to_dpo(PROTO));
+
+ dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_2);
+ replicate_set_bucket(repi, 0, &td);
+
+ mfei = mfib_table_entry_special_add(fib_index,
+ pfx_star_g_3,
+ MFIB_SOURCE_SRv6,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF,
+ repi);
+ MFIB_TEST(mfib_test_entry(mfei,
+ (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF |
+ MFIB_ENTRY_FLAG_EXCLUSIVE),
+ 1,
+ DPO_ADJACENCY_MCAST, ai_2),
+ "%U exclusive replicate OK",
+ format_mfib_prefix, pfx_star_g_3);
+
+ /*
+ * update a special/exclusive route
+ */
+ index_t repi2 = replicate_create(1, fib_proto_to_dpo(PROTO));
+
+ dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_1);
+ replicate_set_bucket(repi2, 0, &td);
+
+ mfei = mfib_table_entry_special_add(fib_index,
+ pfx_star_g_3,
+ MFIB_SOURCE_SRv6,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF,
+ repi2);
+ MFIB_TEST(mfib_test_entry(mfei,
+ (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF |
+ MFIB_ENTRY_FLAG_EXCLUSIVE),
+ 1,
+ DPO_ADJACENCY_MCAST, ai_1),
+ "%U exclusive update replicate OK",
+ format_mfib_prefix, pfx_star_g_3);
+
+ mfib_table_entry_delete(fib_index,
+ pfx_star_g_3,
+ MFIB_SOURCE_SRv6);
+ dpo_reset(&td);
+
+ /*
+ * A Multicast LSP. This a mLDP head-end
+ */
+ fib_node_index_t ai_mpls_10_10_10_1, lfei;
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ VNET_LINK_MPLS,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+
+ fib_prefix_t pfx_3500 = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 3500,
+ .fp_eos = MPLS_EOS,
+ .fp_payload_proto = DPO_PROTO_IP4,
+ };
+ fib_test_rep_bucket_t mc_0 = {
+ .type = FT_REP_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 3300,
+ .eos = MPLS_EOS,
+ },
+ };
+ mpls_label_t *l3300 = NULL;
+ vec_add1(l3300, 3300);
+
+ /*
+ * MPLS enable an interface so we get the MPLS table created
+ */
+ mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 1, 0);
+
+ lfei = fib_table_entry_update_one_path(0, // default MPLS Table
+ &pfx_3500,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_MULTICAST,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ l3300,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ MFIB_TEST(fib_test_validate_entry(lfei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 1,
+ &mc_0),
+ "3500 via replicate over 10.10.10.1");
+
+ /*
+ * An (S,G) that resolves via the mLDP head-end
+ */
+ fib_route_path_t path_via_mldp = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_local_label = pfx_3500.fp_label,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ };
+ dpo_id_t mldp_dpo = DPO_INVALID;
+
+ fib_entry_contribute_forwarding(lfei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ &mldp_dpo);
+
+ mfei = mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_via_mldp,
+ MFIB_ITF_FLAG_FORWARD);
+
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 1,
+ DPO_REPLICATE, mldp_dpo.dpoi_index),
+ "%U over-mLDP replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ /*
+ * add a for-us path. this tests two types of non-attached paths on one entry
+ */
+ mfei = mfib_table_entry_path_update(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API,
+ &path_for_us,
+ MFIB_ITF_FLAG_FORWARD);
+ MFIB_TEST(mfib_test_entry(mfei,
+ MFIB_ENTRY_FLAG_NONE,
+ 2,
+ DPO_REPLICATE, mldp_dpo.dpoi_index,
+ DPO_RECEIVE, 0),
+ "%U mLDP+for-us replicate OK",
+ format_mfib_prefix, pfx_s_g);
+
+ mfib_table_entry_delete(fib_index,
+ pfx_s_g,
+ MFIB_SOURCE_API);
+ fib_table_entry_delete(0,
+ &pfx_3500,
+ FIB_SOURCE_API);
+ dpo_reset(&mldp_dpo);
+
+ /*
+ * Unlock the table - it's the last lock so should be gone thereafter
+ */
+ mfib_table_unlock(fib_index, PROTO, MFIB_SOURCE_API);
+
+ MFIB_TEST((FIB_NODE_INDEX_INVALID ==
+ mfib_table_find(PROTO, fib_index)),
+ "MFIB table %d gone", fib_index);
+
+ adj_unlock(ai_1);
+ adj_unlock(ai_2);
+ adj_unlock(ai_3);
+
+ /*
+ * MPLS disable the interface
+ */
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 0, 0);
+ mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
+
+ /*
+ * test we've leaked no resources
+ */
+ MFIB_TEST(0 == adj_mcast_db_size(), "%d MCAST adjs", adj_mcast_db_size());
+ MFIB_TEST(n_pls == fib_path_list_pool_size(), "%d=%d path-lists",
+ n_pls, fib_path_list_pool_size());
+ MFIB_TEST(n_reps == pool_elts(replicate_pool), "%d=%d replicates",
+ n_reps, pool_elts(replicate_pool));
+ MFIB_TEST(n_entries == pool_elts(mfib_entry_pool),
+ " No more entries %d!=%d",
+ n_entries, pool_elts(mfib_entry_pool));
+ MFIB_TEST(n_itfs == pool_elts(mfib_itf_pool),
+ " No more Interfaces %d!=%d",
+ n_itfs, pool_elts(mfib_itf_pool));
+
+ return (0);
+}
+
+static int
+mfib_test_v4 (void)
+{
+ const mfib_prefix_t pfx_224_s_8 = {
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xe0000000),
+ }
+ };
+ const mfib_prefix_t pfx_1_1_1_1_c_239_1_1_1 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xef010101),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ };
+ const mfib_prefix_t pfx_239_1_1_1 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xef010101),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = 0,
+ },
+ };
+ const mfib_prefix_t pfx_239_1_1_2 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xef010102),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = 0,
+ },
+ };
+ const mfib_prefix_t pfx_239_1_1_3 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xef010103),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = 0,
+ },
+ };
+ const mfib_prefix_t pfx_239 = {
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xef000000),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = 0,
+ },
+ };
+
+ return (mfib_test_i(FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &pfx_224_s_8,
+ &pfx_1_1_1_1_c_239_1_1_1,
+ &pfx_239_1_1_1,
+ &pfx_239_1_1_2,
+ &pfx_239_1_1_3,
+ &pfx_239));
+}
+
+static int
+mfib_test_v6 (void)
+{
+ const mfib_prefix_t pfx_ffd_s_12 = {
+ .fp_len = 12,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xffd0000000000000),
+ }
+ };
+ const mfib_prefix_t pfx_2001_1_c_ff_1 = {
+ .fp_len = 256,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ .fp_src_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ };
+ const mfib_prefix_t pfx_ff_1 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ };
+ const mfib_prefix_t pfx_ff_2 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ };
+ const mfib_prefix_t pfx_ff_3 = {
+ /*
+ * this is the ALL DHCP routers address
+ */
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xff02000100000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ };
+ const mfib_prefix_t pfx_ff = {
+ .fp_len = 16,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+ .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ };
+
+ return (mfib_test_i(FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ &pfx_ffd_s_12,
+ &pfx_2001_1_c_ff_1,
+ &pfx_ff_1,
+ &pfx_ff_2,
+ &pfx_ff_3,
+ &pfx_ff));
+}
+
+static clib_error_t *
+mfib_test (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ int res = 0;
+
+ res += mfib_test_mk_intf(4);
+ res += mfib_test_v4();
+ res += mfib_test_v6();
+
+ if (res)
+ {
+ return clib_error_return(0, "MFIB Unit Test Failed");
+ }
+ else
+ {
+ return (NULL);
+ }
+}
+
+VLIB_CLI_COMMAND (test_fib_command, static) = {
+ .path = "test mfib",
+ .short_help = "mfib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+ .function = mfib_test,
+};
+
+clib_error_t *
+mfib_test_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mfib_test_init);
diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c
new file mode 100644
index 00000000..8452d86f
--- /dev/null
+++ b/src/vnet/mfib/mfib_types.c
@@ -0,0 +1,213 @@
+ /*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/mfib_types.h>
+
+#include <vnet/ip/ip.h>
+
+/**
+ * String names for each flag
+ */
+static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT;
+static const char *mfib_flag_names_long[] = MFIB_ENTRY_NAMES_LONG;
+
+static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG;
+static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT;
+
+u8 *
+format_mfib_prefix (u8 * s, va_list * args)
+{
+ mfib_prefix_t *fp = va_arg (*args, mfib_prefix_t *);
+
+ /*
+ * protocol specific so it prints ::/0 correctly.
+ */
+ switch (fp->fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ {
+ ip6_address_t p6 = fp->fp_grp_addr.ip6;
+ u32 len = (fp->fp_len > 128 ? 128 : fp->fp_len);
+
+ ip6_address_mask(&p6, &(ip6_main.fib_masks[len]));
+
+ if (ip6_address_is_zero(&fp->fp_src_addr.ip6))
+ {
+ s = format(s, "(*, ");
+ }
+ else
+ {
+ s = format (s, "(%U, ", format_ip6_address, &fp->fp_src_addr.ip6);
+ }
+ s = format (s, "%U", format_ip6_address, &p6);
+ s = format (s, "/%d)", len);
+ break;
+ }
+ case FIB_PROTOCOL_IP4:
+ {
+ ip4_address_t p4 = fp->fp_grp_addr.ip4;
+ u32 len = (fp->fp_len > 32 ? 32 : fp->fp_len);
+
+ p4.as_u32 &= ip4_main.fib_masks[len];
+
+ if (0 == fp->fp_src_addr.ip4.as_u32)
+ {
+ s = format(s, "(*, ");
+ }
+ else
+ {
+ s = format (s, "(%U, ", format_ip4_address, &fp->fp_src_addr.ip4);
+ }
+ s = format (s, "%U", format_ip4_address, &p4);
+ s = format (s, "/%d)", len);
+ break;
+ }
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+
+ return (s);
+}
+
+u8 *
+format_mfib_entry_flags (u8 * s, va_list * args)
+{
+ mfib_entry_attribute_t attr;
+ mfib_entry_flags_t flags;
+
+ flags = va_arg (*args, mfib_entry_flags_t);
+
+ if (MFIB_ENTRY_FLAG_NONE != flags) {
+ s = format(s, " flags:");
+ FOR_EACH_MFIB_ATTRIBUTE(attr) {
+ if ((1<<attr) & flags) {
+ s = format (s, "%s,", mfib_flag_names[attr]);
+ }
+ }
+ }
+
+ return (s);
+}
+
+u8 *
+format_mfib_itf_flags (u8 * s, va_list * args)
+{
+ mfib_itf_attribute_t attr;
+ mfib_itf_flags_t flags;
+
+ flags = va_arg (*args, mfib_itf_flags_t);
+
+ FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+ if ((1<<attr) & flags) {
+ s = format (s, "%s,", mfib_itf_flag_long_names[attr]);
+ }
+ }
+
+ return (s);
+}
+
+uword
+unformat_mfib_itf_flags (unformat_input_t * input,
+ va_list * args)
+{
+ mfib_itf_flags_t old, *iflags = va_arg (*args, mfib_itf_flags_t*);
+ mfib_itf_attribute_t attr;
+
+ old = *iflags;
+ FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+ if (unformat (input, mfib_itf_flag_long_names[attr]))
+ *iflags |= (1 << attr);
+ }
+ FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+ if (unformat (input, mfib_itf_flag_names[attr]))
+ *iflags |= (1 << attr);
+ }
+
+ return (old == *iflags ? 0 : 1);
+}
+
+uword
+unformat_mfib_entry_flags (unformat_input_t * input,
+ va_list * args)
+{
+ mfib_entry_flags_t old, *eflags = va_arg (*args, mfib_entry_flags_t*);
+ mfib_entry_attribute_t attr;
+
+ old = *eflags;
+ FOR_EACH_MFIB_ATTRIBUTE(attr) {
+ if (unformat (input, mfib_flag_names[attr]))
+ *eflags |= (1 << attr);
+ }
+
+ return (old == *eflags ? 0 : 1);
+}
+
+clib_error_t *
+mfib_show_route_flags (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
+{
+ mfib_entry_attribute_t attr;
+
+ FOR_EACH_MFIB_ATTRIBUTE(attr) {
+ vlib_cli_output(vm, "%s = %s",
+ mfib_flag_names[attr],
+ mfib_flag_names_long[attr]);
+ }
+
+ return (NULL);
+}
+
+/*?
+ * This command displays the set of supported flags applicable to an MFIB route
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
+{
+ .path = "show mfib route flags",
+ .short_help = "Flags applicable to an MFIB route",
+ .function = mfib_show_route_flags,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+mfib_show_itf_flags (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
+{
+ mfib_itf_attribute_t attr;
+
+ FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+ vlib_cli_output(vm, "%s = %s",
+ mfib_itf_flag_names[attr],
+ mfib_itf_flag_long_names[attr]);
+ }
+
+ return (NULL);
+}
+
+/*?
+ * This command displays the set of supported flags applicable to an MFIB interface
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
+{
+ .path = "show mfib itf flags",
+ .short_help = "Flags applicable to an MFIB interfaces",
+ .function = mfib_show_itf_flags,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h
new file mode 100644
index 00000000..50aede04
--- /dev/null
+++ b/src/vnet/mfib/mfib_types.h
@@ -0,0 +1,205 @@
+ /*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_TYPES_H__
+#define __MFIB_TYPES_H__
+
+#include <vnet/fib/fib_types.h>
+
+/**
+ * Aggregrate type for a prefix
+ */
+typedef struct mfib_prefix_t_ {
+ /**
+ * The mask length
+ */
+ u16 fp_len;
+
+ /**
+ * protocol type
+ */
+ fib_protocol_t fp_proto;
+
+ /**
+ * Pad to keep the address 4 byte aligned
+ */
+ u8 ___fp___pad;
+
+ /**
+ * The address type is not deriveable from the fp_addr member.
+ * If it's v4, then the first 3 u32s of the address will be 0.
+ * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned
+ * to non-zero values. true. but when it's all zero, one cannot decide.
+ */
+ ip46_address_t fp_grp_addr;
+ ip46_address_t fp_src_addr;
+} mfib_prefix_t;
+
+typedef enum mfib_entry_attribute_t_
+{
+ MFIB_ENTRY_ATTRIBUTE_FIRST = 0,
+ /**
+ * The control planes needs packets mathing this entry to generate
+ * a signal.
+ */
+ MFIB_ENTRY_SIGNAL = MFIB_ENTRY_ATTRIBUTE_FIRST,
+ /**
+ * Drop all traffic to this route
+ */
+ MFIB_ENTRY_DROP,
+ /**
+ * The control plane needs to be informed of coneected sources
+ */
+ MFIB_ENTRY_CONNECTED,
+ /**
+ * Accept packets from any incpoming interface
+ * Use with extreme caution
+ */
+ MFIB_ENTRY_ACCEPT_ALL_ITF,
+ /**
+ * Exclusive - like its unicast counterpart. the source has provided
+ * the forwarding DPO directly. The entry therefore does not resolve
+ * paths via a path-list
+ */
+ MFIB_ENTRY_EXCLUSIVE,
+
+ MFIB_ENTRY_INHERIT_ACCEPT,
+ MFIB_ENTRY_ATTRIBUTE_LAST = MFIB_ENTRY_INHERIT_ACCEPT,
+} mfib_entry_attribute_t;
+
+#define FOR_EACH_MFIB_ATTRIBUTE(_item) \
+ for (_item = MFIB_ENTRY_ATTRIBUTE_FIRST; \
+ _item <= MFIB_ENTRY_ATTRIBUTE_LAST; \
+ _item++)
+
+#define MFIB_ENTRY_NAMES_SHORT { \
+ [MFIB_ENTRY_SIGNAL] = "S", \
+ [MFIB_ENTRY_CONNECTED] = "C", \
+ [MFIB_ENTRY_DROP] = "D", \
+ [MFIB_ENTRY_ACCEPT_ALL_ITF] = "AA", \
+ [MFIB_ENTRY_INHERIT_ACCEPT] = "IA", \
+ [MFIB_ENTRY_EXCLUSIVE] = "E", \
+}
+
+#define MFIB_ENTRY_NAMES_LONG { \
+ [MFIB_ENTRY_SIGNAL] = "Signal", \
+ [MFIB_ENTRY_CONNECTED] = "Connected", \
+ [MFIB_ENTRY_DROP] = "Drop", \
+ [MFIB_ENTRY_ACCEPT_ALL_ITF] = "Accept-all-itf", \
+ [MFIB_ENTRY_INHERIT_ACCEPT] = "Inherit-Accept", \
+ [MFIB_ENTRY_EXCLUSIVE] = "Exclusive", \
+}
+
+typedef enum mfib_entry_flags_t_
+{
+ MFIB_ENTRY_FLAG_NONE,
+ MFIB_ENTRY_FLAG_SIGNAL = (1 << MFIB_ENTRY_SIGNAL),
+ MFIB_ENTRY_FLAG_DROP = (1 << MFIB_ENTRY_DROP),
+ MFIB_ENTRY_FLAG_CONNECTED = (1 << MFIB_ENTRY_CONNECTED),
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF = (1 << MFIB_ENTRY_ACCEPT_ALL_ITF),
+ MFIB_ENTRY_FLAG_EXCLUSIVE = (1 << MFIB_ENTRY_EXCLUSIVE),
+ MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT),
+} mfib_entry_flags_t;
+
+typedef enum mfib_itf_attribute_t_
+{
+ MFIB_ITF_ATTRIBUTE_FIRST,
+ MFIB_ITF_NEGATE_SIGNAL = MFIB_ITF_ATTRIBUTE_FIRST,
+ MFIB_ITF_ACCEPT,
+ MFIB_ITF_FORWARD,
+ MFIB_ITF_SIGNAL_PRESENT,
+ MFIB_ITF_DONT_PRESERVE,
+ MFIB_ITF_ATTRIBUTE_LAST = MFIB_ITF_DONT_PRESERVE,
+} mfib_itf_attribute_t;
+
+#define FOR_EACH_MFIB_ITF_ATTRIBUTE(_item) \
+ for (_item = MFIB_ITF_ATTRIBUTE_FIRST; \
+ _item <= MFIB_ITF_ATTRIBUTE_LAST; \
+ _item++)
+
+#define MFIB_ITF_NAMES_SHORT { \
+ [MFIB_ITF_NEGATE_SIGNAL] = "NS", \
+ [MFIB_ITF_ACCEPT] = "A", \
+ [MFIB_ITF_FORWARD] = "F", \
+ [MFIB_ITF_SIGNAL_PRESENT] = "SP", \
+ [MFIB_ITF_DONT_PRESERVE] = "DP", \
+}
+
+#define MFIB_ITF_NAMES_LONG { \
+ [MFIB_ITF_NEGATE_SIGNAL] = "Negate-Signal", \
+ [MFIB_ITF_ACCEPT] = "Accept", \
+ [MFIB_ITF_FORWARD] = "Forward", \
+ [MFIB_ITF_SIGNAL_PRESENT] = "Signal-Present", \
+ [MFIB_ITF_DONT_PRESERVE] = "Don't-Preserve", \
+}
+
+typedef enum mfib_itf_flags_t_
+{
+ MFIB_ITF_FLAG_NONE,
+ MFIB_ITF_FLAG_NEGATE_SIGNAL = (1 << MFIB_ITF_NEGATE_SIGNAL),
+ MFIB_ITF_FLAG_ACCEPT = (1 << MFIB_ITF_ACCEPT),
+ MFIB_ITF_FLAG_FORWARD = (1 << MFIB_ITF_FORWARD),
+ MFIB_ITF_FLAG_SIGNAL_PRESENT = (1 << MFIB_ITF_SIGNAL_PRESENT),
+ MFIB_ITF_FLAG_DONT_PRESERVE = (1 << MFIB_ITF_DONT_PRESERVE),
+} mfib_itf_flags_t;
+
+/**
+ * Possible [control plane] sources of MFIB entries
+ */
+typedef enum mfib_source_t_
+{
+ MFIB_SOURCE_SPECIAL,
+ MFIB_SOURCE_API,
+ MFIB_SOURCE_CLI,
+ MFIB_SOURCE_VXLAN,
+ MFIB_SOURCE_DHCP,
+ MFIB_SOURCE_SRv6,
+ MFIB_SOURCE_GTPU,
+ MFIB_SOURCE_VXLAN_GPE,
+ MFIB_SOURCE_RR,
+ MFIB_SOURCE_DEFAULT_ROUTE,
+} mfib_source_t;
+
+#define MFIB_SOURCE_NAMES { \
+ [MFIB_SOURCE_SPECIAL] = "Special", \
+ [MFIB_SOURCE_API] = "API", \
+ [MFIB_SOURCE_CLI] = "CLI", \
+ [MFIB_SOURCE_DHCP] = "DHCP", \
+ [MFIB_SOURCE_VXLAN] = "VXLAN", \
+ [MFIB_SOURCE_SRv6] = "SRv6", \
+ [MFIB_SOURCE_GTPU] = "GTPU", \
+ [MFIB_SOURCE_VXLAN_GPE] = "VXLAN-GPE", \
+ [MFIB_SOURCE_RR] = "Recursive-resolution", \
+ [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
+}
+
+#define MFIB_N_SOURCES (MFIB_SOURCE_DEFAULT_ROUTE)
+
+/**
+ * \brief Compare two prefixes for equality
+ */
+extern int mfib_prefix_cmp(const mfib_prefix_t *p1,
+ const mfib_prefix_t *p2);
+
+extern u8 * format_mfib_prefix(u8 * s, va_list * args);
+
+extern u8 *format_mfib_entry_flags(u8 * s, va_list * args);
+extern u8 *format_mfib_itf_flags(u8 * s, va_list * args);
+extern uword unformat_mfib_itf_flags(unformat_input_t * input,
+ va_list * args);
+extern uword unformat_mfib_entry_flags(unformat_input_t * input,
+ va_list * args);
+
+#endif
diff --git a/src/vnet/misc.c b/src/vnet/misc.c
new file mode 100644
index 00000000..9cfe8394
--- /dev/null
+++ b/src/vnet/misc.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * misc.c: vnet misc
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+vnet_main_t vnet_main;
+
+vnet_main_t *
+vnet_get_main (void)
+{
+ return &vnet_main;
+}
+
+static uword
+vnet_local_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ ASSERT (0);
+ return f->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (vnet_local_interface_device_class) = {
+ .name = "local",
+ .tx_function = vnet_local_interface_tx,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = {
+ .name = "local",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vnet_main_init (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hw;
+
+ if ((error = vlib_call_init_function (vm, vnet_interface_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, fib_module_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, mfib_module_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, mpls_init)))
+ return error;
+
+ vnm->vlib_main = vm;
+
+ hw_if_index = vnet_register_interface
+ (vnm, vnet_local_interface_device_class.index, /* instance */ 0,
+ vnet_local_interface_hw_class.index, /* instance */ 0);
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ vnm->local_interface_hw_if_index = hw_if_index;
+ vnm->local_interface_sw_if_index = hw->sw_if_index;
+
+ /* the local interface is used as an input interface when decapping from
+ * an IPSEC tunnel. so it needs to be IP enabled */
+ ip4_sw_interface_enable_disable (hw->sw_if_index, 1);
+ ip6_sw_interface_enable_disable (hw->sw_if_index, 1);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
new file mode 100644
index 00000000..34a46522
--- /dev/null
+++ b/src/vnet/mpls/error.def
@@ -0,0 +1,31 @@
+/*
+ * mpls_error.def: mpls errors
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+mpls_error (NONE, "no error")
+mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
+mpls_error (UNSUPPORTED_VERSION, "unsupported version")
+mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
+mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (NO_LABEL, "MPLS no label for fib/dst")
+mpls_error (TTL_EXPIRED, "MPLS ttl expired")
+mpls_error (S_NOT_SET, "MPLS s-bit not set")
+mpls_error (BAD_LABEL, "invalid FIB id in label")
+mpls_error (NOT_IP4, "non-ip4 packets dropped")
+mpls_error (DISALLOWED_FIB, "disallowed FIB id")
+mpls_error (NOT_ENABLED, "MPLS not enabled")
+mpls_error (DROP, "MPLS DROP DPO")
+mpls_error (PUNT, "MPLS PUNT DPO")
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
new file mode 100644
index 00000000..d7c8e7d3
--- /dev/null
+++ b/src/vnet/mpls/interface.c
@@ -0,0 +1,132 @@
+/*
+ * interface.c: mpls interfaces
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/dpo/classify_dpo.h>
+
+
+u8
+mpls_sw_interface_is_enabled (u32 sw_if_index)
+{
+ mpls_main_t * mm = &mpls_main;
+
+ if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index)
+ return (0);
+
+ return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
+}
+
+int
+mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable,
+ u8 is_api)
+{
+ fib_node_index_t lfib_index;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+
+ lfib_index = fib_table_find(FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID);
+
+ if (~0 == lfib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return (0);
+
+ fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS,
+ (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+
+ vec_validate(mm->fib_index_by_sw_if_index, 0);
+ mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index;
+ }
+ else
+ {
+ ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return (0);
+
+ fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_MPLS,
+ (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+ }
+
+ vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled",
+ sw_if_index, !is_enable, 0, 0);
+
+ return (0);
+}
+
+static clib_error_t *
+mpls_interface_enable_disable (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, enable;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ {
+ error = clib_error_return (0, "expected 'enable' or 'disable'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0);
+
+ done:
+ return error;
+}
+
+/*?
+ * This command enables an interface to accpet MPLS packets
+ *
+ * @cliexpar
+ * @cliexstart{set interface mpls}
+ * set interface mpls GigEthernet0/8/0 enable
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
+ .path = "set interface mpls",
+ .function = mpls_interface_enable_disable,
+ .short_help = "Enable/Disable an interface for MPLS forwarding",
+};
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
new file mode 100644
index 00000000..36488d0c
--- /dev/null
+++ b/src/vnet/mpls/mpls.api
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create
+ a per-prefix label entry.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in
+ @param mb_label - The MPLS label value to bind
+ @param mb_ip_table_id - The IP table-id of the IP prefix to bind to.
+ @param mb_create_table_if_needed - Create either/both tables if required.
+ @param mb_is_bind - Bind or unbind
+ @param mb_is_ip4 - The prefix to bind to is IPv4
+ @param mb_address_length - Length of IP prefix
+ @param mb_address[16] - IP prefix/
+*/
+autoreply define mpls_ip_bind_unbind
+{
+ u32 client_index;
+ u32 context;
+ u32 mb_mpls_table_id;
+ u32 mb_label;
+ u32 mb_ip_table_id;
+ u8 mb_create_table_if_needed;
+ u8 mb_is_bind;
+ u8 mb_is_ip4;
+ u8 mb_address_length;
+ u8 mb_address[16];
+};
+
+/** \brief MPLS tunnel Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mt_is_add - Is this a route add or delete
+ @param mt_sw_if_index - The SW interface index of the tunnel to delete
+ @param mt_is_multicast - Is the tunnel's underlying LSP multicast
+ @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4
+ @param mt_next_hop_weight - The weight, for UCMP
+ @param mt_next_hop_preference - The preference
+ @param mt_next_hop[16] - the nextop address
+ @param mt_next_hop_sw_if_index - the next-hop SW interface
+ @param mt_next_hop_table_id - the next-hop table-id (if appropriate)
+ @param mt_next_hop_n_out_labels - the number of next-hop output labels
+ @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first
+*/
+define mpls_tunnel_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mt_sw_if_index;
+ u8 mt_is_add;
+ u8 mt_l2_only;
+ u8 mt_is_multicast;
+ u8 mt_next_hop_proto_is_ip4;
+ u8 mt_next_hop_weight;
+ u8 mt_next_hop_preference;
+ u8 mt_next_hop[16];
+ u8 mt_next_hop_n_out_labels;
+ u32 mt_next_hop_sw_if_index;
+ u32 mt_next_hop_table_id;
+ u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels];
+};
+
+/** \brief Reply for MPLS tunnel add / del request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - SW interface index of the tunnel created
+*/
+define mpls_tunnel_add_del_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Dump mpls eth tunnel table
+ @param client_index - opaque cookie to identify the sender
+ @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels
+*/
+define mpls_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ i32 tunnel_index;
+};
+
+/** \brief FIB path
+ @param sw_if_index - index of the interface
+ @param weight - The weight, for UCMP
+ @param is_local - local if non-zero, else remote
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+ @param next_hop[16] - the next hop address
+
+ WARNING: this type is replicated, pending cleanup completion
+
+*/
+typeonly manual_print manual_endian define fib_path2
+{
+ u32 sw_if_index;
+ u8 weight;
+ u8 preference;
+ u8 is_local;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 afi;
+ u8 next_hop[16];
+ u32 labels[16];
+};
+
+/** \brief mpls tunnel details
+*/
+manual_endian manual_print define mpls_tunnel_details
+{
+ u32 context;
+ u8 mt_sw_if_index;
+ u8 mt_tunnel_index;
+ u8 mt_l2_only;
+ u8 mt_is_multicast;
+ u32 mt_count;
+ vl_api_fib_path2_t mt_paths[mt_count];
+};
+
+/** \brief MPLS Route Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mt_table_id - The MPLS table-id the route is added in
+ @param mt_is_add - Is this a route add or delete
+ @param mt_name - A client provided name/tag for the table. If this
+ is not set by the client, then VPP will generate
+ something meaningfull.
+*/
+autoreply define mpls_table_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mt_table_id;
+ u8 mt_is_add;
+ u8 mt_name[64];
+};
+
+/** \brief MPLS Route Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mr_label - The MPLS label value
+ @param mr_eos - The End of stack bit
+ @param mr_table_id - The MPLS table-id the route is added in
+ @param mr_classify_table_index - If this is a classify route,
+ this is the classify table index
+ @param mr_create_table_if_needed - If the MPLS or IP tables do not exist,
+ create them
+ @param mr_is_add - Is this a route add or delete
+ @param mr_is_classify - Is this route result a classify
+ @param mr_is_multicast - Is this a multicast route
+ @param mr_is_multipath - Is this route update a multipath - i.e. is this
+ a path addition to an existing route
+ @param mr_is_resolve_host - Recurse resolution constraint via a host prefix
+ @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix
+ @param mr_is_interface_rx - Interface Receive path
+ @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface
+ is used as the RPF-ID
+ @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t
+ @param mr_next_hop_weight - The weight, for UCMP
+ @param mr_next_hop[16] - the nextop address
+ @param mr_next_hop_sw_if_index - the next-hop SW interface
+ @param mr_next_hop_table_id - the next-hop table-id (if appropriate)
+ @param mr_next_hop_n_out_labels - the number of labels in the label stack
+ @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first
+ @param next_hop_via_label - The next-hop is a resolved via a local label
+*/
+autoreply define mpls_route_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mr_label;
+ u8 mr_eos;
+ u32 mr_table_id;
+ u32 mr_classify_table_index;
+ u8 mr_create_table_if_needed;
+ u8 mr_is_add;
+ u8 mr_is_classify;
+ u8 mr_is_multicast;
+ u8 mr_is_multipath;
+ u8 mr_is_resolve_host;
+ u8 mr_is_resolve_attached;
+ u8 mr_is_interface_rx;
+ u8 mr_is_rpf_id;
+ u8 mr_next_hop_proto;
+ u8 mr_next_hop_weight;
+ u8 mr_next_hop_preference;
+ u8 mr_next_hop[16];
+ u8 mr_next_hop_n_out_labels;
+ u32 mr_next_hop_sw_if_index;
+ u32 mr_next_hop_table_id;
+ u32 mr_next_hop_via_label;
+ u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels];
+};
+
+/** \brief Dump MPLS fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define mpls_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief mpls FIB table response
+ @param table_id - MPLS fib table id
+ @param s_bit - End-of-stack bit
+ @param label - MPLS label value
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define mpls_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 eos_bit;
+ u32 label;
+ u32 count;
+ vl_api_fib_path2_t path[count];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
new file mode 100644
index 00000000..ed24f75f
--- /dev/null
+++ b/src/vnet/mpls/mpls.c
@@ -0,0 +1,627 @@
+/*
+ * mpls.c: mpls
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS;
+
+mpls_main_t mpls_main;
+
+u8 * format_mpls_unicast_label (u8 * s, va_list * args)
+{
+ mpls_label_t label = va_arg (*args, mpls_label_t);
+
+ switch (label) {
+ case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ROUTER_ALERT_LABEL:
+ s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING);
+ break;
+ case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_IMPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ELI_LABEL:
+ s = format (s, "%s", MPLS_IETF_ELI_STRING);
+ break;
+ case MPLS_IETF_GAL_LABEL:
+ s = format (s, "%s", MPLS_IETF_GAL_STRING);
+ break;
+ default:
+ s = format (s, "%d", label);
+ break;
+ }
+ return s;
+}
+
+uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args)
+{
+ mpls_label_t *label = va_arg (*args, mpls_label_t*);
+
+ if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING))
+ *label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IMPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_ROUTER_ALERT_BRIEF_STRING))
+ *label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IMPLICIT_NULL_LABEL;
+ else if (unformat (input, "%d", label))
+ ;
+ else
+ return (0);
+
+ return (1);
+}
+
+u8 * format_mpls_eos_bit (u8 * s, va_list * args)
+{
+ mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t);
+
+ ASSERT(eb <= MPLS_EOS);
+
+ s = format(s, "%s", mpls_eos_bit_names[eb]);
+
+ return (s);
+}
+
+u8 * format_mpls_header (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t);
+
+ return (format(s, "[%U:%d:%d:%U]",
+ format_mpls_unicast_label,
+ vnet_mpls_uc_get_label(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl),
+ format_mpls_eos_bit,
+ vnet_mpls_uc_get_s(hdr.label_exp_s_ttl)));
+}
+
+uword
+unformat_mpls_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ mpls_unicast_header_t _h, * h = &_h;
+ u32 label, label_exp_s_ttl;
+
+ if (! unformat (input, "MPLS %d", &label))
+ return 0;
+
+ label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF;
+ h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl);
+
+ /* Add gre, mpls headers to result. */
+ {
+ void * p;
+ u32 h_n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, h_n_bytes);
+ clib_memcpy (p, h, h_n_bytes);
+ }
+
+ return 1;
+}
+
+uword
+unformat_mpls_label_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u32 * result = va_arg (*args, u32 *);
+ u32 label;
+
+ if (!unformat (input, "MPLS: label %d", &label))
+ return 0;
+
+ label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */;
+
+ *result = clib_host_to_net_u32 (label);
+ return 1;
+}
+
+u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *);
+ u32 label = h->label_exp_s_ttl;
+
+ s = format (s, "label %d exp %d, s %d, ttl %d",
+ vnet_mpls_uc_get_label (label),
+ vnet_mpls_uc_get_exp (label),
+ vnet_mpls_uc_get_s (label),
+ vnet_mpls_uc_get_ttl (label));
+ return s;
+}
+
+u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *);
+ mpls_unicast_header_t h_host;
+
+ h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl);
+
+ return format (s, "%U", format_mpls_unicast_header_host_byte_order,
+ &h_host);
+}
+
+typedef struct {
+ u32 fib_index;
+ u32 entry_index;
+ u32 dest;
+ u32 s_bit;
+ u32 label;
+} show_mpls_fib_t;
+
+int
+mpls_dest_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return clib_net_to_host_u32(r1->dest) - clib_net_to_host_u32(r2->dest);
+}
+
+int
+mpls_fib_index_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return r1->fib_index - r2->fib_index;
+}
+
+int
+mpls_label_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return r1->label - r2->label;
+}
+
+static clib_error_t *
+vnet_mpls_local_label (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ u32 table_id, is_del, is_ip;
+ mpls_label_t local_label;
+ mpls_label_t out_label;
+ clib_error_t * error;
+ mpls_eos_bit_t eos;
+ vnet_main_t * vnm;
+ fib_prefix_t pfx;
+
+ vnm = vnet_get_main();
+ error = NULL;
+ is_ip = 0;
+ table_id = 0;
+ eos = MPLS_EOS;
+ is_del = 0;
+ local_label = MPLS_LABEL_INVALID;
+ memset(&pfx, 0, sizeof(pfx));
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ memset(&rpath, 0, sizeof(rpath));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "eos"))
+ pfx.fp_eos = MPLS_EOS;
+ else if (unformat (line_input, "non-eos"))
+ pfx.fp_eos = MPLS_NON_EOS;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address,
+ &pfx.fp_addr.ip4,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address,
+ &pfx.fp_addr.ip6,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "rx-ip4 %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "%d", &local_label))
+ ;
+ else if (unformat (line_input,
+ "ip4-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ pfx.fp_payload_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "ip6-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ vec_add1(rpaths, rpath);
+ pfx.fp_payload_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input,
+ "mpls-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_MPLS;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ pfx.fp_payload_proto = DPO_PROTO_MPLS;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "l2-input-on %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_proto = DPO_PROTO_ETHERNET;
+ pfx.fp_payload_proto = DPO_PROTO_ETHERNET;
+ rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "out-labels"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then labels");
+ goto done;
+ }
+ else
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label,
+ &out_label))
+ {
+ vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack,
+ out_label);
+ }
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unkown input: %U",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ }
+
+ if (MPLS_LABEL_INVALID == local_label)
+ {
+ error = clib_error_return (0, "local-label required: %U",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+ if (is_ip)
+ {
+ u32 fib_index = fib_table_find(pfx.fp_proto, table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "%U table-id %d does not exist",
+ format_fib_protocol, pfx.fp_proto, table_id);
+ goto done;
+ }
+
+ if (is_del)
+ {
+ fib_table_entry_local_label_remove(fib_index, &pfx, local_label);
+ }
+ else
+ {
+ fib_table_entry_local_label_add(fib_index, &pfx, local_label);
+ }
+ }
+ else
+ {
+ fib_node_index_t fib_index;
+ u32 fi;
+
+ if (NULL == rpaths)
+ {
+ error = clib_error_return(0 , "no paths");
+ goto done;
+ }
+
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+ pfx.fp_label = local_label;
+ pfx.fp_payload_proto = rpaths[0].frp_proto;
+
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ if (FIB_NODE_INDEX_INVALID == rpath.frp_sw_if_index)
+ {
+ fi = fib_table_find(dpo_proto_to_fib(pfx.fp_payload_proto),
+ rpaths[0].frp_fib_index);
+
+ if (~0 == fi)
+ {
+ error = clib_error_return(0 , "%U Via table %d does not exist",
+ format_dpo_proto, pfx.fp_payload_proto,
+ rpaths[0].frp_fib_index);
+ goto done;
+ }
+ rpaths[0].frp_fib_index = fi;
+ }
+
+ fib_index = mpls_fib_index_from_table_id(table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "MPLS table-id %d does not exist",
+ table_id);
+ goto done;
+ }
+
+ if (is_del)
+ {
+ fib_table_entry_path_remove2(fib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ rpaths);
+ }
+ else
+ {
+ fib_node_index_t lfe;
+
+ lfe = fib_table_entry_path_add2(fib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ if (FIB_NODE_INDEX_INVALID == lfe)
+ {
+ error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d",
+ format_mpls_unicast_label, local_label,
+ format_mpls_eos_bit, eos,
+ table_id);
+ goto done;
+ }
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
+ .path = "mpls local-label",
+ .function = vnet_mpls_local_label,
+ .short_help = "Create/Delete MPL local labels",
+};
+
+clib_error_t *
+vnet_mpls_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmdo)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ u32 table_id, is_add;
+ u8 *name = NULL;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "name %s", &name))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ else
+ {
+ if (is_add)
+ {
+ mpls_table_create (table_id, 0, name);
+ }
+ else
+ {
+ mpls_table_delete (table_id, 0);
+ }
+ }
+
+ done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete MPLS Tables. All
+ * Tables must be explicitly added before that can be used,
+ * Including the default table.
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mpls_table_command, static) = {
+ .path = "mpls table",
+ .short_help = "mpls table [add|del] <table-id>",
+ .function = vnet_mpls_table_cmd,
+ .is_mp_safe = 1,
+};
+
+int
+mpls_fib_reset_labels (u32 fib_id)
+{
+ // FIXME
+ return 0;
+}
+
+static clib_error_t *
+mpls_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ return vlib_call_init_function (vm, mpls_input_init);
+}
+
+VLIB_INIT_FUNCTION (mpls_init);
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
new file mode 100644
index 00000000..cc3eeed0
--- /dev/null
+++ b/src/vnet/mpls/mpls.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_mpls_h
+#define included_vnet_mpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
+
+typedef enum
+{
+#define mpls_error(n,s) MPLS_ERROR_##n,
+#include <vnet/mpls/error.def>
+#undef mpls_error
+ MPLS_N_ERROR,
+} mpls_error_t;
+
+/**
+ * @brief Definition of a callback for receiving MPLS interface state change
+ * notifications
+ */
+typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index,
+ u32 is_enable);
+
+typedef struct
+{
+ /* MPLS FIB index for each software interface */
+ u32 *fib_index_by_sw_if_index;
+
+ /** A pool of all the MPLS FIBs */
+ struct fib_table_t_ *fibs;
+
+ /** A pool of all the MPLS FIBs */
+ struct mpls_fib_t_ *mpls_fibs;
+
+ /** A hash table to lookup the mpls_fib by table ID */
+ uword *fib_index_by_table_id;
+
+ /* Feature arc indices */
+ u8 input_feature_arc_index;
+ u8 output_feature_arc_index;
+
+ /* IP4 enabled count by software interface */
+ u8 *mpls_enabled_by_sw_if_index;
+} mpls_main_t;
+
+extern mpls_main_t mpls_main;
+
+extern clib_error_t *mpls_feature_init (vlib_main_t * vm);
+
+format_function_t format_mpls_eos_bit;
+format_function_t format_mpls_unicast_header_net_byte_order;
+format_function_t format_mpls_unicast_label;
+format_function_t format_mpls_header;
+
+extern vlib_node_registration_t mpls_input_node;
+extern vlib_node_registration_t mpls_output_node;
+extern vlib_node_registration_t mpls_midchain_node;
+
+/* Parse mpls protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_mpls_label_net_byte_order;
+unformat_function_t unformat_mpls_unicast_label;
+
+/* Parse mpls header. */
+unformat_function_t unformat_mpls_header;
+unformat_function_t unformat_pg_mpls_header;
+
+int mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable, u8 is_api);
+
+u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+
+int mpls_fib_reset_labels (u32 fib_id);
+
+int mpls_dest_cmp (void *a1, void *a2);
+
+int mpls_fib_index_cmp (void *a1, void *a2);
+
+int mpls_label_cmp (void *a1, void *a2);
+
+void mpls_table_create (u32 table_id, u8 is_api, const u8 * name);
+void mpls_table_delete (u32 table_id, u8 is_api);
+
+#endif /* included_vnet_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
new file mode 100644
index 00000000..762c40ff
--- /dev/null
+++ b/src/vnet/mpls/mpls_api.c
@@ -0,0 +1,582 @@
+/*
+ *------------------------------------------------------------------
+ * mpls_api.c - mpls api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/mpls_tunnel.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/fib/fib_path_list.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \
+_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \
+_(MPLS_TABLE_ADD_DEL, mpls_table_add_del) \
+_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \
+_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \
+_(MPLS_FIB_DUMP, mpls_fib_dump)
+
+extern void stats_dslock_with_hint (int hint, int tag);
+extern void stats_dsunlock (void);
+
+void
+mpls_table_delete (u32 table_id, u8 is_api)
+{
+ u32 fib_index;
+
+ /*
+ * The MPLS defult table must also be explicitly created via the API.
+ * So in contrast to IP, it gets no special treatment here.
+ *
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ */
+ fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+ if (~0 != fib_index)
+ {
+ fib_table_unlock (fib_index,
+ FIB_PROTOCOL_MPLS,
+ (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+ }
+}
+
+void
+vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp)
+{
+ vl_api_mpls_table_add_del_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv = 0;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ if (mp->mt_is_add)
+ mpls_table_create (ntohl (mp->mt_table_id), 1, mp->mt_name);
+ else
+ mpls_table_delete (ntohl (mp->mt_table_id), 1);
+
+ // NB: Nothing sets rv; none of the above returns an error
+
+ REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY);
+}
+
+static int
+mpls_ip_bind_unbind_handler (vnet_main_t * vnm,
+ vl_api_mpls_ip_bind_unbind_t * mp)
+{
+ u32 mpls_fib_index, ip_fib_index;
+
+ mpls_fib_index =
+ fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id));
+
+ if (~0 == mpls_fib_index)
+ {
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+
+ ip_fib_index = fib_table_find ((mp->mb_is_ip4 ?
+ FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6),
+ ntohl (mp->mb_ip_table_id));
+ if (~0 == ip_fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->mb_address_length,
+ };
+
+ if (mp->mb_is_ip4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address,
+ sizeof (pfx.fp_addr.ip4));
+ }
+ else
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address,
+ sizeof (pfx.fp_addr.ip6));
+ }
+
+ if (mp->mb_is_bind)
+ fib_table_entry_local_label_add (ip_fib_index, &pfx,
+ ntohl (mp->mb_label));
+ else
+ fib_table_entry_local_label_remove (ip_fib_index, &pfx,
+ ntohl (mp->mb_label));
+
+ return (0);
+}
+
+void
+vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp)
+{
+ vl_api_mpls_ip_bind_unbind_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ rv = mpls_ip_bind_unbind_handler (vnm, mp);
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_MPLS_IP_BIND_UNBIND_REPLY);
+}
+
+static int
+mpls_route_add_del_t_handler (vnet_main_t * vnm,
+ vl_api_mpls_route_add_del_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = mp->mr_eos,
+ .fp_label = ntohl (mp->mr_label),
+ };
+ if (pfx.fp_eos)
+ {
+ pfx.fp_payload_proto = mp->mr_next_hop_proto;
+ }
+ else
+ {
+ pfx.fp_payload_proto = DPO_PROTO_MPLS;
+ }
+
+ rv = add_del_route_check (FIB_PROTOCOL_MPLS,
+ mp->mr_table_id,
+ mp->mr_next_hop_sw_if_index,
+ pfx.fp_payload_proto,
+ mp->mr_next_hop_table_id,
+ mp->mr_is_rpf_id,
+ &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+
+ if (DPO_PROTO_IP4 == mp->mr_next_hop_proto)
+ memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4));
+ else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto)
+ memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6));
+
+ n_labels = mp->mr_next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop,
+ 0, // mp->is_unreach,
+ 0, // mp->is_prohibit,
+ 0, // mp->is_local,
+ mp->mr_is_multicast,
+ mp->mr_is_classify,
+ mp->mr_classify_table_index,
+ mp->mr_is_resolve_host,
+ mp->mr_is_resolve_attached,
+ mp->mr_is_interface_rx,
+ mp->mr_is_rpf_id,
+ fib_index, &pfx,
+ mp->mr_next_hop_proto,
+ &nh, ntohl (mp->mr_next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->mr_next_hop_weight,
+ mp->mr_next_hop_preference,
+ ntohl (mp->mr_next_hop_via_label),
+ label_stack));
+}
+
+void
+vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
+{
+ vl_api_mpls_route_add_del_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ rv = mpls_route_add_del_t_handler (vnm, mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY);
+}
+
+void
+mpls_table_create (u32 table_id, u8 is_api, const u8 * name)
+{
+ u32 fib_index;
+
+ /*
+ * The MPLS defult table must also be explicitly created via the API.
+ * So in contrast to IP, it gets no special treatment here.
+ */
+
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ */
+ fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+ if (~0 == fib_index)
+ {
+ fib_table_find_or_create_and_lock_w_name (FIB_PROTOCOL_MPLS,
+ table_id,
+ (is_api ?
+ FIB_SOURCE_API :
+ FIB_SOURCE_CLI), name);
+ }
+}
+
+static void
+vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
+{
+ vl_api_mpls_tunnel_add_del_reply_t *rmp;
+ int rv = 0;
+ u32 tunnel_sw_if_index;
+ int ii;
+ fib_route_path_t rpath, *rpaths = NULL;
+
+ memset (&rpath, 0, sizeof (rpath));
+
+ stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ );
+
+ if (mp->mt_next_hop_proto_is_ip4)
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ clib_memcpy (&rpath.frp_addr.ip4,
+ mp->mt_next_hop, sizeof (rpath.frp_addr.ip4));
+ }
+ else
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ clib_memcpy (&rpath.frp_addr.ip6,
+ mp->mt_next_hop, sizeof (rpath.frp_addr.ip6));
+ }
+ rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index);
+ rpath.frp_weight = 1;
+
+ if (mp->mt_is_add)
+ {
+ for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++)
+ vec_add1 (rpath.frp_label_stack,
+ ntohl (mp->mt_next_hop_out_label_stack[ii]));
+ }
+
+ vec_add1 (rpaths, rpath);
+
+ tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
+
+ if (mp->mt_is_add)
+ {
+ if (~0 == tunnel_sw_if_index)
+ tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only,
+ mp->mt_is_multicast);
+ vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths);
+ }
+ else
+ {
+ tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
+ if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths))
+ vnet_mpls_tunnel_del (tunnel_sw_if_index);
+ }
+
+ vec_free (rpaths);
+
+ stats_dsunlock ();
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl(tunnel_sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+typedef struct mpls_tunnel_send_walk_ctx_t_
+{
+ unix_shared_memory_queue_t *q;
+ u32 index;
+ u32 context;
+} mpls_tunnel_send_walk_ctx_t;
+
+static void
+send_mpls_tunnel_entry (u32 mti, void *arg)
+{
+ fib_route_path_encode_t *api_rpaths, *api_rpath;
+ mpls_tunnel_send_walk_ctx_t *ctx;
+ vl_api_mpls_tunnel_details_t *mp;
+ const mpls_tunnel_t *mt;
+ vl_api_fib_path2_t *fp;
+ u32 n;
+
+ ctx = arg;
+
+ if (~0 != ctx->index && mti != ctx->index)
+ return;
+
+ mt = mpls_tunnel_get (mti);
+ n = fib_path_list_get_n_paths (mt->mt_path_list);
+
+ mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
+ memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
+
+ mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS);
+ mp->context = ctx->context;
+
+ mp->mt_tunnel_index = ntohl (mti);
+ mp->mt_count = ntohl (n);
+
+ fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths);
+
+ fp = mp->mt_paths;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ // FIXME
+ // memcpy (mp->mt_next_hop_out_labels,
+ // mt->mt_label_stack, nlabels * sizeof (u32));
+
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & mp);
+}
+
+static void
+vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ mpls_tunnel_send_walk_ctx_t ctx = {
+ .q = q,
+ .index = ntohl (mp->tunnel_index),
+ .context = mp->context,
+ };
+ mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx);
+}
+
+static void
+send_mpls_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ const fib_table_t * table,
+ u32 label, u32 eos,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_mpls_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path2_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table->ft_table_id);
+ memcpy (mp->table_name, table->ft_desc,
+ clib_min (vec_len (table->ft_desc), sizeof (mp->table_name)));
+ mp->eos_bit = eos;
+ mp->label = htonl (label);
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_
+{
+ fib_node_index_t *lfeis;
+} vl_api_mpls_fib_dump_table_walk_ctx_t;
+
+static int
+vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->lfeis, fei);
+
+ return (1);
+}
+
+static void
+vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ mpls_main_t *mm = &mpls_main;
+ fib_table_t *fib_table;
+ mpls_fib_t *mpls_fib;
+ fib_node_index_t *lfeip = NULL;
+ fib_prefix_t pfx;
+ u32 fib_index;
+ fib_route_path_encode_t *api_rpaths;
+ vl_api_mpls_fib_dump_table_walk_ctx_t ctx = {
+ .lfeis = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (mpls_fib, mm->mpls_fibs,
+ ({
+ mpls_fib_table_walk (mpls_fib,
+ vl_api_mpls_fib_dump_table_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+ vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort);
+
+ vec_foreach (lfeip, ctx.lfeis)
+ {
+ fib_entry_get_prefix (*lfeip, &pfx);
+ fib_index = fib_entry_get_fib_index (*lfeip);
+ fib_table = fib_table_get (fib_index, pfx.fp_proto);
+ api_rpaths = NULL;
+ fib_entry_encode (*lfeip, &api_rpaths);
+ send_mpls_fib_details (am, q,
+ fib_table, pfx.fp_label,
+ pfx.fp_eos, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.lfeis);
+}
+
+/*
+ * mpls_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_mpls;
+#undef _
+}
+
+static clib_error_t *
+mpls_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Trace space for 8 MPLS encap labels
+ */
+ am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (mpls_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c
new file mode 100644
index 00000000..0281d0c2
--- /dev/null
+++ b/src/vnet/mpls/mpls_features.c
@@ -0,0 +1,154 @@
+/*
+ * mpls_features.c: MPLS input and output features
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls.h>
+
+always_inline uword
+mpls_terminate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int error_code)
+{
+ u32 * buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node,
+ buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ mpls_input_node.index,
+ error_code);
+
+ return n_packets;
+}
+
+static uword
+mpls_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT));
+}
+
+VLIB_REGISTER_NODE (mpls_punt_node) = {
+ .function = mpls_punt,
+ .name = "mpls-punt",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt)
+
+static uword
+mpls_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP));
+}
+
+VLIB_REGISTER_NODE (mpls_drop_node) = {
+ .function = mpls_drop,
+ .name = "mpls-drop",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop)
+
+static uword
+mpls_not_enabled (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED));
+}
+
+VLIB_REGISTER_NODE (mpls_not_enabled_node) = {
+ .function = mpls_not_enabled,
+ .name = "mpls-not-enabled",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled)
+
+VNET_FEATURE_ARC_INIT (mpls_input, static) =
+{
+ .arc_name = "mpls-input",
+ .start_nodes = VNET_FEATURES ("mpls-input"),
+ .arc_index_ptr = &mpls_main.input_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (mpls_not_enabled, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "mpls-not-enabled",
+ .runs_before = VNET_FEATURES ("mpls-lookup"),
+};
+
+VNET_FEATURE_INIT (mpls_lookup, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "mpls-lookup",
+ .runs_before = VNET_FEATURES (0), /* not before any other features */
+};
+
+VNET_FEATURE_ARC_INIT (mpls_output, static) =
+{
+ .arc_name = "mpls-output",
+ .start_nodes = VNET_FEATURES ("mpls-output", "mpls-midchain"),
+ .arc_index_ptr = &mpls_main.output_feature_arc_index,
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_INIT (mpls_interface_output, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+
+static clib_error_t *
+mpls_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ mpls_main_t * mm = &mpls_main;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+ vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0);
+
+ vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del);
+
+
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
new file mode 100644
index 00000000..86ad8bba
--- /dev/null
+++ b/src/vnet/mpls/mpls_input.c
@@ -0,0 +1,324 @@
+/*
+ * node.c: MPLS input
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/feature/feature.h>
+
+typedef struct {
+ u32 next_index;
+ u32 label_net_byte_order;
+} mpls_input_trace_t;
+
+#define foreach_mpls_input_next \
+_(DROP, "error-drop") \
+_(LOOKUP, "mpls-lookup")
+
+typedef enum {
+#define _(s,n) MPLS_INPUT_NEXT_##s,
+ foreach_mpls_input_next
+#undef _
+ MPLS_INPUT_N_NEXT,
+} mpls_input_next_t;
+
+static u8 *
+format_mpls_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
+ char * next_name;
+ u32 label;
+ next_name = "BUG!";
+ label = clib_net_to_host_u32(t->label_net_byte_order);
+
+#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
+ foreach_mpls_input_next;
+#undef _
+
+ s = format (s, "MPLS: next %s[%d] label %d ttl %d",
+ next_name, t->next_index,
+ vnet_mpls_uc_get_label(label),
+ vnet_mpls_uc_get_ttl(label));
+
+ return s;
+}
+
+vlib_node_registration_t mpls_input_node;
+
+typedef struct {
+ u32 last_label;
+ u32 last_inner_fib_index;
+ u32 last_outer_fib_index;
+ mpls_main_t * mpls_main;
+} mpls_input_runtime_t;
+
+static inline uword
+mpls_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_input_runtime_t * rt;
+ mpls_main_t * mm;
+ u32 thread_index = vlib_get_thread_index();
+ vlib_simple_counter_main_t * cm;
+ vnet_main_t * vnm = vnet_get_main();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ mm = rt->mpls_main;
+ /*
+ * Force an initial lookup every time, in case the control-plane
+ * changed the label->FIB mapping.
+ */
+ rt->last_label = ~0;
+
+ next_index = node->cached_next_index;
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_MPLS);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, next0, sw_if_index0;
+ u32 bi1, next1, sw_if_index1;
+ vlib_buffer_t *b0, *b1;
+ char *h0, *h1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* TTL expired? */
+ if (PREDICT_FALSE(h0[3] == 0))
+ {
+ next0 = MPLS_INPUT_NEXT_DROP;
+ b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next0 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index0, &next0, b0);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ }
+
+ if (PREDICT_FALSE(h1[3] == 0))
+ {
+ next1 = MPLS_INPUT_NEXT_DROP;
+ b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next1 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index1, &next1, b1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->label_net_byte_order = *((u32*)h0);
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->label_net_byte_order = *((u32*)h1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1,
+ next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 sw_if_index0, next0, bi0;
+ vlib_buffer_t * b0;
+ char * h0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* TTL expired? */
+ if (PREDICT_FALSE(h0[3] == 0))
+ {
+ next0 = MPLS_INPUT_NEXT_DROP;
+ b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next0 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->label_net_byte_order = *(u32*)h0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_input_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static uword
+mpls_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return mpls_input_inline (vm, node, from_frame);
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_input_node) = {
+ .function = mpls_input,
+ .name = "mpls-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof(mpls_input_runtime_t),
+
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
+ foreach_mpls_input_next
+#undef _
+ },
+
+ .format_buffer = format_mpls_unicast_header_net_byte_order,
+ .format_trace = format_mpls_input_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
+
+static void
+mpls_setup_nodes (vlib_main_t * vm)
+{
+ mpls_input_runtime_t * rt;
+ pg_node_t * pn;
+
+ pn = pg_get_node (mpls_input_node.index);
+ pn->unformat_edit = unformat_pg_mpls_header;
+
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ rt->last_label = (u32) ~0;
+ rt->last_inner_fib_index = 0;
+ rt->last_outer_fib_index = 0;
+ rt->mpls_main = &mpls_main;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS,
+ mpls_input_node.index);
+}
+
+static clib_error_t * mpls_input_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, mpls_init);
+ if (error)
+ clib_error_report (error);
+
+ mpls_setup_nodes (vm);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mpls_input_init);
+
+static clib_error_t * mpls_input_worker_init (vlib_main_t * vm)
+{
+ mpls_input_runtime_t * rt;
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ rt->last_label = (u32) ~0;
+ rt->last_inner_fib_index = 0;
+ rt->last_outer_fib_index = 0;
+ rt->mpls_main = &mpls_main;
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init);
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
new file mode 100644
index 00000000..30031e51
--- /dev/null
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -0,0 +1,723 @@
+/*
+ * mpls_lookup.c: MPLS lookup
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls_lookup.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+/**
+ * Static MPLS VLIB forwarding node
+ */
+static vlib_node_registration_t mpls_lookup_node;
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+u32 mpls_lookup_to_replicate_edge;
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+ u32 lfib_index;
+ u32 label_net_byte_order;
+ u32 hash;
+} mpls_lookup_trace_t;
+
+static u8 *
+format_mpls_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
+
+ s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label(
+ clib_net_to_host_u32(t->label_net_byte_order)),
+ vnet_mpls_uc_get_s(
+ clib_net_to_host_u32(t->label_net_byte_order)));
+ return s;
+}
+
+static inline uword
+mpls_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_main_t * mm = &mpls_main;
+ u32 thread_index = vlib_get_thread_index();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 lbi0, next0, lfib_index0, bi0, hash_c0;
+ const mpls_unicast_header_t * h0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 lbi1, next1, lfib_index1, bi1, hash_c1;
+ const mpls_unicast_header_t * h1;
+ const load_balance_t *lb1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+ u32 lbi2, next2, lfib_index2, bi2, hash_c2;
+ const mpls_unicast_header_t * h2;
+ const load_balance_t *lb2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 lbi3, next3, lfib_index3, bi3, hash_c3;
+ const mpls_unicast_header_t * h3;
+ const load_balance_t *lb3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, *p4, *p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
+
+ from += 4;
+ n_left_from -= 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b2)->sw_if_index[VLIB_RX]);
+ lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b3)->sw_if_index[VLIB_RX]);
+
+ lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
+ lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
+ lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
+ lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
+
+ hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
+
+ if (MPLS_IS_REPLICATE & lbi0)
+ {
+ next0 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ (lbi0 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb0 = load_balance_get(lbi0);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0,
+ (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+ if (MPLS_IS_REPLICATE & lbi1)
+ {
+ next1 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+ (lbi1 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb1 = load_balance_get(lbi1);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
+ {
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+ mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+ dpo1 = load_balance_get_fwd_bucket
+ (lb1,
+ (hash_c1 & (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, b1));
+ }
+ if (MPLS_IS_REPLICATE & lbi2)
+ {
+ next2 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+ (lbi2 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb2 = load_balance_get(lbi2);
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+ {
+ hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+ mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+ dpo2 = load_balance_get_fwd_bucket
+ (lb2,
+ (hash_c2 & (lb2->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo2 = load_balance_get_bucket_i (lb2, 0);
+ }
+ next2 = dpo2->dpoi_next_node;
+
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi2, 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ }
+ if (MPLS_IS_REPLICATE & lbi3)
+ {
+ next3 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+ (lbi3 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb3 = load_balance_get(lbi3);
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+ {
+ hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+ mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+ dpo3 = load_balance_get_fwd_bucket
+ (lb3,
+ (hash_c3 & (lb3->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo3 = load_balance_get_bucket_i (lb3, 0);
+ }
+ next3 = dpo3->dpoi_next_node;
+
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi3, 1,
+ vlib_buffer_length_in_chain (vm, b3));
+ }
+
+ /*
+ * before we pop the label copy th values we need to maintain.
+ * The label header is in network byte order.
+ * last byte is the TTL.
+ * bits 2 to 4 inclusive are the EXP bits
+ */
+ vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3];
+ vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1;
+ vnet_buffer (b0)->mpls.first = 1;
+ vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3];
+ vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1;
+ vnet_buffer (b1)->mpls.first = 1;
+ vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3];
+ vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1;
+ vnet_buffer (b2)->mpls.first = 1;
+ vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3];
+ vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1;
+ vnet_buffer (b3)->mpls.first = 1;
+
+ /*
+ * pop the label that was just used in the lookup
+ */
+ vlib_buffer_advance(b0, sizeof(*h0));
+ vlib_buffer_advance(b1, sizeof(*h1));
+ vlib_buffer_advance(b2, sizeof(*h2));
+ vlib_buffer_advance(b3, sizeof(*h3));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->lfib_index = lfib_index0;
+ tr->hash = hash_c0;
+ tr->label_net_byte_order = h0->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->lfib_index = lfib_index1;
+ tr->hash = hash_c1;
+ tr->label_net_byte_order = h1->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->next_index = next2;
+ tr->lb_index = lbi2;
+ tr->lfib_index = lfib_index2;
+ tr->hash = hash_c2;
+ tr->label_net_byte_order = h2->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->next_index = next3;
+ tr->lb_index = lbi3;
+ tr->lfib_index = lfib_index3;
+ tr->hash = hash_c3;
+ tr->label_net_byte_order = h3->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 lbi0, next0, lfib_index0, bi0, hash_c0;
+ const mpls_unicast_header_t * h0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+ lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0);
+ hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
+
+ if (MPLS_IS_REPLICATE & lbi0)
+ {
+ next0 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ (lbi0 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb0 = load_balance_get(lbi0);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0,
+ (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ /*
+ * before we pop the label copy, values we need to maintain.
+ * The label header is in network byte order.
+ * last byte is the TTL.
+ * bits 2 to 4 inclusive are the EXP bits
+ */
+ vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3];
+ vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1;
+ vnet_buffer (b0)->mpls.first = 1;
+
+ /*
+ * pop the label that was just used in the lookup
+ */
+ vlib_buffer_advance(b0, sizeof(*h0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->lfib_index = lfib_index0;
+ tr->hash = hash_c0;
+ tr->label_net_byte_order = h0->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_lookup_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_lookup_node, static) = {
+ .function = mpls_lookup,
+ .name = "mpls-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .sibling_of = "mpls-load-balance",
+
+ .format_buffer = format_mpls_header,
+ .format_trace = format_mpls_lookup_trace,
+ .unformat_buffer = unformat_mpls_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup)
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+ u32 hash;
+} mpls_load_balance_trace_t;
+
+static u8 *
+format_mpls_load_balance_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
+
+ s = format (s, "MPLS: next [%d], LB index %d hash %d",
+ t->next_index, t->lb_index, t->hash);
+ return s;
+}
+
+always_inline uword
+mpls_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index();
+ u32 next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t * p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1;
+ const mpls_unicast_header_t *mpls0, *mpls1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ mpls0 = vlib_buffer_get_current (p0);
+ mpls1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+ lb1 = load_balance_get(lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
+ }
+ dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
+ }
+ dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const load_balance_t *lb0;
+ vlib_buffer_t * p0;
+ u32 pi0, lbi0, hc0, next0;
+ const mpls_unicast_header_t *mpls0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ mpls0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
+ }
+ dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (mpls_load_balance_node) = {
+ .function = mpls_load_balance,
+ .name = "mpls-load-balance",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_load_balance_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "mpls-drop",
+ },
+
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
+
+
+static clib_error_t *
+mpls_lookup_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, mpls_init)))
+ return error;
+
+ mpls_lookup_to_replicate_edge =
+ vlib_node_add_named_next(vm,
+ mpls_lookup_node.index,
+ "mpls-replicate");
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (mpls_lookup_init);
diff --git a/src/vnet/mpls/mpls_lookup.h b/src/vnet/mpls/mpls_lookup.h
new file mode 100644
index 00000000..28c9124f
--- /dev/null
+++ b/src/vnet/mpls/mpls_lookup.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LOOKUP_H__
+#define __MPLS_LOOKUP_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+u32 mpls_lookup_to_replicate_edge;
+
+/*
+ * Compute flow hash.
+ * We'll use it to select which adjacency to use for this flow. And other things.
+ */
+always_inline u32
+mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
+ flow_hash_config_t flow_hash_config)
+{
+ /*
+ * We need to byte swap so we use the numerical value. i.e. an odd label
+ * leads to an odd bucket. as opposed to a label above and below value X.
+ */
+ u8 next_label_is_entropy;
+ mpls_label_t ho_label;
+ u32 hash, value;
+
+ ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ hash = vnet_mpls_uc_get_label(ho_label);
+ next_label_is_entropy = 0;
+
+ while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label))
+ {
+ hdr++;
+ ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ value = vnet_mpls_uc_get_label(ho_label);
+
+ if (1 == next_label_is_entropy)
+ {
+ /*
+ * The label is an entropy value, use it alone as the hash
+ */
+ return (ho_label);
+ }
+ if (MPLS_IETF_ENTROPY_LABEL == value)
+ {
+ /*
+ * we've met a label in the stack indicating that tha next
+ * label is an entropy value
+ */
+ next_label_is_entropy = 1;
+ }
+ else
+ {
+ /*
+ * XOR the label values in the stack together to
+ * build up the hash value
+ */
+ hash ^= value;
+ }
+ }
+
+ /*
+ * check the top nibble for v4 and v6
+ */
+ hdr++;
+
+ switch (((u8*)hdr)[0] >> 4)
+ {
+ case 4:
+ /* incorporate the v4 flow-hash */
+ hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr,
+ IP_FLOW_HASH_DEFAULT);
+ break;
+ case 6:
+ /* incorporate the v6 flow-hash */
+ hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr,
+ IP_FLOW_HASH_DEFAULT);
+ break;
+ default:
+ break;
+ }
+
+ return (hash);
+}
+
+#endif /* __MPLS_LOOKUP_H__ */
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
new file mode 100644
index 00000000..241a7835
--- /dev/null
+++ b/src/vnet/mpls/mpls_output.c
@@ -0,0 +1,498 @@
+/*
+ * mpls_output.c: MPLS Adj rewrite
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1*sizeof(u32)];
+} mpls_output_trace_t;
+
+#define foreach_mpls_output_next \
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) MPLS_OUTPUT_NEXT_##s,
+ foreach_mpls_output_next
+#undef _
+ MPLS_OUTPUT_N_NEXT,
+} mpls_output_next_t;
+
+static u8 *
+format_mpls_output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->adj_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static inline uword
+mpls_output_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int is_midchain)
+{
+ u32 n_left_from, next_index, * from, * to_next, thread_index;
+ vlib_node_runtime_t * error_node;
+ u32 n_left_to_next;
+ mpls_main_t *mm;
+
+ thread_index = vlib_get_thread_index();
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ mm = &mpls_main;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t * adj0;
+ mpls_unicast_header_t *hdr0;
+ vlib_buffer_t * p0;
+ u32 pi0, rw_len0, adj_index0, next0, error0;
+
+ ip_adjacency_t * adj1;
+ mpls_unicast_header_t *hdr1;
+ vlib_buffer_t * p1;
+ u32 pi1, rw_len1, adj_index1, next1, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ adj1 = adj_get(adj_index1);
+ hdr0 = vlib_buffer_get_current (p0);
+ hdr1 = vlib_buffer_get_current (p1);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+
+ /* Bump the adj counters for packet and bytes */
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0,
+ 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1,
+ 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj0[0].rewrite_header.sw_if_index,
+ &next0, p0);
+ }
+ else
+ {
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
+ adj1[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ adj1[0].rewrite_header.sw_if_index;
+ next1 = adj1[0].rewrite_header.next_index;
+ error1 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj1[0].rewrite_header.sw_if_index,
+ &next1, p1);
+ }
+ else
+ {
+ error1 = IP4_ERROR_MTU_EXCEEDED;
+ next1 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+ clib_memcpy (tr->packet_data,
+ vlib_buffer_get_current (p0),
+ sizeof (tr->packet_data));
+ }
+ if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p1, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p1)->ip.flow_hash;
+ clib_memcpy (tr->packet_data,
+ vlib_buffer_get_current (p1),
+ sizeof (tr->packet_data));
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ mpls_unicast_header_t *hdr0;
+ vlib_buffer_t * p0;
+ u32 pi0, rw_len0, adj_index0, next0, error0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ hdr0 = vlib_buffer_get_current (p0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], hdr0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0,
+ 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj0[0].rewrite_header.sw_if_index,
+ &next0, p0);
+ }
+ else
+ {
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_ENCAP,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+static inline uword
+mpls_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0));
+}
+
+VLIB_REGISTER_NODE (mpls_output_node) = {
+ .function = mpls_output,
+ .name = "mpls-output",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
+ foreach_mpls_output_next
+#undef _
+ },
+
+ .format_trace = format_mpls_output_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output)
+
+static inline uword
+mpls_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1));
+}
+
+VLIB_REGISTER_NODE (mpls_midchain_node) = {
+ .function = mpls_midchain,
+ .name = "mpls-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_output_trace,
+
+ .sibling_of = "mpls-output",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain)
+
+/**
+ * @brief Next index values from the MPLS incomplete adj node
+ */
+#define foreach_mpls_adj_incomplete_next \
+_(DROP, "error-drop") \
+_(IP4, "ip4-arp") \
+_(IP6, "ip6-discover-neighbor")
+
+typedef enum {
+#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ MPLS_ADJ_INCOMPLETE_N_NEXT,
+} mpls_adj_incomplete_next_t;
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_adj_incomplete_trace_t_
+{
+ u32 next;
+} mpls_adj_incomplete_trace_t;
+
+
+/**
+ * @brief Graph node for incomplete MPLS adjacency.
+ * This node will push traffic to either the v4-arp or v6-nd node
+ * based on the next-hop proto of the adj.
+ * We pay a cost for this 'routing' node, but an incomplete adj is the
+ * exception case.
+ */
+static inline uword
+mpls_adj_incomplete (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, next0, adj_index0;
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+
+ if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto))
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4;
+ }
+ else
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_adj_incomplete_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_adj_incomplete_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_adj_incomplete_trace_t * t;
+ uword indent;
+
+ t = va_arg (*args, mpls_adj_incomplete_trace_t *);
+ indent = format_get_indent (s);
+
+ s = format (s, "%Unext:%d",
+ format_white_space, indent,
+ t->next);
+ return (s);
+}
+
+VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
+ .function = mpls_adj_incomplete,
+ .name = "mpls-adj-incomplete",
+ .format_trace = format_mpls_adj_incomplete_trace,
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node,
+ mpls_adj_incomplete)
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
new file mode 100644
index 00000000..2d5521f4
--- /dev/null
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -0,0 +1,1070 @@
+/*
+ * mpls_tunnel.c: MPLS tunnel interfaces (i.e. for RSVP-TE)
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls_tunnel.h>
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/fib/mpls_fib.h>
+
+/**
+ * @brief pool of tunnel instances
+ */
+static mpls_tunnel_t *mpls_tunnel_pool;
+
+/**
+ * @brief Pool of free tunnel SW indices - i.e. recycled indices
+ */
+static u32 * mpls_tunnel_free_hw_if_indices;
+
+/**
+ * @brief DB of SW index to tunnel index
+ */
+static u32 *mpls_tunnel_db;
+
+/**
+ * @brief MPLS tunnel flags strings
+ */
+static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES;
+
+/**
+ * @brief Get a tunnel object from a SW interface index
+ */
+static mpls_tunnel_t*
+mpls_tunnel_get_from_sw_if_index (u32 sw_if_index)
+{
+ if ((vec_len(mpls_tunnel_db) < sw_if_index) ||
+ (~0 == mpls_tunnel_db[sw_if_index]))
+ return (NULL);
+
+ return (pool_elt_at_index(mpls_tunnel_pool,
+ mpls_tunnel_db[sw_if_index]));
+}
+
+/**
+ * @brief Build a rewrite string for the MPLS tunnel.
+ */
+static u8*
+mpls_tunnel_build_rewrite_i (void)
+{
+ /*
+ * passing the adj code a NULL rewirte means 'i don't have one cos
+ * t'other end is unresolved'. That's not the case here. For the mpls
+ * tunnel there are just no bytes of encap to apply in the adj. We'll impose
+ * the label stack once we choose a path. So return a zero length rewrite.
+ */
+ u8 *rewrite = NULL;
+
+ vec_validate(rewrite, 0);
+ vec_reset_length(rewrite);
+
+ return (rewrite);
+}
+
+/**
+ * @brief Build a rewrite string for the MPLS tunnel.
+ */
+static u8*
+mpls_tunnel_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ return (mpls_tunnel_build_rewrite_i());
+}
+
+typedef struct mpls_tunnel_collect_forwarding_ctx_t_
+{
+ load_balance_path_t * next_hops;
+ const mpls_tunnel_t *mt;
+ fib_forward_chain_type_t fct;
+} mpls_tunnel_collect_forwarding_ctx_t;
+
+static fib_path_list_walk_rc_t
+mpls_tunnel_collect_forwarding (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ mpls_tunnel_collect_forwarding_ctx_t *ctx;
+ fib_path_ext_t *path_ext;
+
+ ctx = arg;
+
+ /*
+ * if the path is not resolved, don't include it.
+ */
+ if (!fib_path_is_resolved(path_index))
+ {
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ }
+
+ /*
+ * get the matching path-extension for the path being visited.
+ */
+ path_ext = fib_path_ext_list_find_by_path_index(&ctx->mt->mt_path_exts,
+ path_index);
+
+ if (NULL != path_ext)
+ {
+ /*
+ * found a matching extension. stack it to obtain the forwarding
+ * info for this path.
+ */
+ ctx->next_hops = fib_path_ext_stack(path_ext,
+ ctx->fct,
+ ctx->fct,
+ ctx->next_hops);
+ }
+ else
+ ASSERT(0);
+ /*
+ * else
+ * There should be a path-extenios associated with each path
+ */
+
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+static void
+mpls_tunnel_mk_lb (mpls_tunnel_t *mt,
+ vnet_link_t linkt,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb)
+{
+ dpo_proto_t lb_proto;
+
+ /*
+ * If the entry has path extensions then we construct a load-balance
+ * by stacking the extensions on the forwarding chains of the paths.
+ * Otherwise we use the load-balance of the path-list
+ */
+ mpls_tunnel_collect_forwarding_ctx_t ctx = {
+ .mt = mt,
+ .next_hops = NULL,
+ .fct = fct,
+ };
+
+ /*
+ * As an optimisation we allocate the vector of next-hops to be sized
+ * equal to the maximum nuber of paths we will need, which is also the
+ * most likely number we will need, since in most cases the paths are 'up'.
+ */
+ vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list));
+ vec_reset_length(ctx.next_hops);
+
+ lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
+
+ fib_path_list_walk(mt->mt_path_list,
+ mpls_tunnel_collect_forwarding,
+ &ctx);
+
+ if (!dpo_id_is_valid(dpo_lb))
+ {
+ /*
+ * first time create
+ */
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+ {
+ dpo_set(dpo_lb,
+ DPO_REPLICATE,
+ lb_proto,
+ replicate_create(0, lb_proto));
+ }
+ else
+ {
+ flow_hash_config_t fhc;
+
+ switch (linkt)
+ {
+ case VNET_LINK_MPLS:
+ fhc = MPLS_FLOW_HASH_DEFAULT;
+ break;
+ case VNET_LINK_IP4:
+ case VNET_LINK_IP6:
+ fhc = IP_FLOW_HASH_DEFAULT;
+ break;
+ default:
+ fhc = 0;
+ break;
+ }
+
+ dpo_set(dpo_lb,
+ DPO_LOAD_BALANCE,
+ lb_proto,
+ load_balance_create(0, lb_proto, fhc));
+ }
+ }
+
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+ {
+ /*
+ * MPLS multicast
+ */
+ replicate_multipath_update(dpo_lb, ctx.next_hops);
+ }
+ else
+ {
+ load_balance_multipath_update(dpo_lb,
+ ctx.next_hops,
+ LOAD_BALANCE_FLAG_NONE);
+ vec_free(ctx.next_hops);
+ }
+}
+
+/**
+ * mpls_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+static void
+mpls_tunnel_stack (adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+ mpls_tunnel_t *mt;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ /*
+ * while we're stacking the adj, remove the tunnel from the child list
+ * of the path list. this breaks a circular dependency of walk updates
+ * where the create of adjacencies in the children can lead to walks
+ * that get back here.
+ */
+ fib_path_list_lock(mt->mt_path_list);
+
+ fib_path_list_child_remove(mt->mt_path_list,
+ mt->mt_sibling_index);
+
+ /*
+ * Construct the DPO (load-balance or replicate) that we can stack
+ * the tunnel's midchain on
+ */
+ if (vnet_hw_interface_get_flags(vnet_get_main(),
+ mt->mt_hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ mpls_tunnel_mk_lb(mt,
+ adj->ia_link,
+ (VNET_LINK_MPLS == adj_get_link_type(ai) ?
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS),
+ &dpo);
+
+ adj_nbr_midchain_stack(ai, &dpo);
+ dpo_reset(&dpo);
+ }
+ else
+ {
+ adj_nbr_midchain_unstack(ai);
+ }
+
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mt - mpls_tunnel_pool);
+
+ fib_path_list_unlock(mt->mt_path_list);
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a MPLS interface
+ */
+static adj_walk_rc_t
+mpls_adj_walk_cb (adj_index_t ai,
+ void *ctx)
+{
+ mpls_tunnel_stack(ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+mpls_tunnel_restack (mpls_tunnel_t *mt)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk all the adjacencies on the MPLS interface and restack them
+ */
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
+ {
+ /*
+ * Stack a load-balance that drops, whilst we have no paths
+ */
+ vnet_hw_interface_t * hi;
+ dpo_id_t dpo = DPO_INVALID;
+
+ mpls_tunnel_mk_lb(mt,
+ VNET_LINK_MPLS,
+ FIB_FORW_CHAIN_TYPE_ETHERNET,
+ &dpo);
+
+ hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index);
+ dpo_stack_from_node(hi->tx_node_index,
+ &mt->mt_l2_lb,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ else
+ {
+ FOR_EACH_FIB_PROTOCOL(proto)
+ {
+ adj_nbr_walk(mt->mt_sw_if_index,
+ proto,
+ mpls_adj_walk_cb,
+ NULL);
+ }
+ }
+}
+
+static clib_error_t *
+mpls_tunnel_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ vnet_hw_interface_t * hi;
+ mpls_tunnel_t *mt;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index);
+
+ if (NULL == mt)
+ return (NULL);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+
+ mpls_tunnel_restack(mt);
+
+ return (NULL);
+}
+
+/**
+ * @brief Fixup the adj rewrite post encap. This is a no-op since the
+ * rewrite is a stack of labels.
+ */
+static void
+mpls_tunnel_fixup (vlib_main_t *vm,
+ ip_adjacency_t *adj,
+ vlib_buffer_t *b0)
+{
+ /*
+ * A no-op w.r.t. the header. but reset the 'have we pushed any
+ * MPLS labels onto the packet' flag. That way when we enter the
+ * tunnel we'll get a TTL set to 255
+ */
+ vnet_buffer(b0)->mpls.first = 0;
+}
+
+static void
+mpls_tunnel_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != ai);
+
+ adj = adj_get(ai);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+ ADJ_FLAG_NONE,
+ mpls_tunnel_build_rewrite_i());
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ * There's no MAC fixup, so the last 2 parameters are 0
+ */
+ adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+ ADJ_FLAG_NONE,
+ mpls_tunnel_build_rewrite_i(),
+ 0, 0);
+ break;
+
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+
+ mpls_tunnel_stack(ai);
+}
+
+static u8 *
+format_mpls_tunnel_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "mpls-tunnel%d", dev_instance);
+}
+
+static u8 *
+format_mpls_tunnel_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+
+ return (format (s, "MPLS-tunnel: id %d\n", dev_instance));
+}
+
+/**
+ * @brief Packet trace structure
+ */
+typedef struct mpls_tunnel_trace_t_
+{
+ /**
+ * Tunnel-id / index in tunnel vector
+ */
+ u32 tunnel_id;
+} mpls_tunnel_trace_t;
+
+static u8 *
+format_mpls_tunnel_tx_trace (u8 * s,
+ va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_tunnel_trace_t * t = va_arg (*args, mpls_tunnel_trace_t *);
+
+ s = format (s, "MPLS: tunnel %d", t->tunnel_id);
+ return s;
+}
+
+/**
+ * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ */
+static uword
+mpls_tunnel_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 next_index;
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
+ const mpls_tunnel_t *mt;
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, rd->dev_instance);
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /*
+ * FIXME DUAL LOOP
+ */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = rd->dev_instance;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, mt->mt_l2_lb.dpoi_next_node);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS (mpls_tunnel_class) = {
+ .name = "MPLS tunnel device",
+ .format_device_name = format_mpls_tunnel_name,
+ .format_device = format_mpls_tunnel_device,
+ .format_tx_trace = format_mpls_tunnel_tx_trace,
+ .tx_function = mpls_tunnel_tx,
+ .admin_up_down_function = mpls_tunnel_admin_up_down,
+};
+
+VNET_HW_INTERFACE_CLASS (mpls_tunnel_hw_interface_class) = {
+ .name = "MPLS-Tunnel",
+ .update_adjacency = mpls_tunnel_update_adj,
+ .build_rewrite = mpls_tunnel_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+const mpls_tunnel_t *
+mpls_tunnel_get (u32 mti)
+{
+ return (pool_elt_at_index(mpls_tunnel_pool, mti));
+}
+
+/**
+ * @brief Walk all the MPLS tunnels
+ */
+void
+mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb,
+ void *ctx)
+{
+ u32 mti;
+
+ pool_foreach_index(mti, mpls_tunnel_pool,
+ ({
+ cb(mti, ctx);
+ }));
+}
+
+void
+vnet_mpls_tunnel_del (u32 sw_if_index)
+{
+ mpls_tunnel_t *mt;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ if (FIB_NODE_INDEX_INVALID != mt->mt_path_list)
+ fib_path_list_child_remove(mt->mt_path_list,
+ mt->mt_sibling_index);
+ dpo_reset(&mt->mt_l2_lb);
+
+ vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index);
+ pool_put(mpls_tunnel_pool, mt);
+ mpls_tunnel_db[sw_if_index] = ~0;
+}
+
+u32
+vnet_mpls_tunnel_create (u8 l2_only,
+ u8 is_multicast)
+{
+ vnet_hw_interface_t * hi;
+ mpls_tunnel_t *mt;
+ vnet_main_t * vnm;
+ u32 mti;
+
+ vnm = vnet_get_main();
+ pool_get(mpls_tunnel_pool, mt);
+ memset (mt, 0, sizeof (*mt));
+ mti = mt - mpls_tunnel_pool;
+ fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL);
+ mt->mt_path_list = FIB_NODE_INDEX_INVALID;
+ mt->mt_sibling_index = FIB_NODE_INDEX_INVALID;
+
+ if (is_multicast)
+ mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST;
+ if (l2_only)
+ mt->mt_flags |= MPLS_TUNNEL_FLAG_L2;
+
+ /*
+ * Create a new, or re=use and old, tunnel HW interface
+ */
+ if (vec_len (mpls_tunnel_free_hw_if_indices) > 0)
+ {
+ mt->mt_hw_if_index =
+ mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1];
+ _vec_len (mpls_tunnel_free_hw_if_indices) -= 1;
+ hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
+ hi->hw_instance = mti;
+ hi->dev_instance = mti;
+ }
+ else
+ {
+ mt->mt_hw_if_index = vnet_register_interface(
+ vnm,
+ mpls_tunnel_class.index,
+ mti,
+ mpls_tunnel_hw_interface_class.index,
+ mti);
+ hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
+ }
+
+ /*
+ * Add the new tunnel to the tunnel DB - key:SW if index
+ */
+ mt->mt_sw_if_index = hi->sw_if_index;
+ vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0);
+ mpls_tunnel_db[mt->mt_sw_if_index] = mti;
+
+ return (mt->mt_sw_if_index);
+}
+
+void
+vnet_mpls_tunnel_path_add (u32 sw_if_index,
+ fib_route_path_t *rpaths)
+{
+ mpls_tunnel_t *mt;
+ u32 mti;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ mti = mt - mpls_tunnel_pool;
+
+ /*
+ * construct a path-list from the path provided
+ */
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths);
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ }
+ else
+ {
+ fib_node_index_t old_pl_index;
+
+ old_pl_index = mt->mt_path_list;
+
+ mt->mt_path_list =
+ fib_path_list_copy_and_path_add(old_pl_index,
+ FIB_PATH_LIST_FLAG_SHARED,
+ rpaths);
+
+ fib_path_list_child_remove(old_pl_index,
+ mt->mt_sibling_index);
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ fib_path_ext_list_resolve(&mt->mt_path_exts, mt->mt_path_list);
+ }
+ fib_path_ext_list_insert(&mt->mt_path_exts,
+ mt->mt_path_list,
+ FIB_PATH_EXT_MPLS,
+ rpaths);
+ mpls_tunnel_restack(mt);
+}
+
+int
+vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+ fib_route_path_t *rpaths)
+{
+ mpls_tunnel_t *mt;
+ u32 mti;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return (0);
+
+ mti = mt - mpls_tunnel_pool;
+
+ /*
+ * construct a path-list from the path provided
+ */
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ /* can't remove a path if we have onoe */
+ return (0);
+ }
+ else
+ {
+ fib_node_index_t old_pl_index;
+
+ old_pl_index = mt->mt_path_list;
+
+ mt->mt_path_list =
+ fib_path_list_copy_and_path_remove(old_pl_index,
+ FIB_PATH_LIST_FLAG_SHARED,
+ rpaths);
+
+ fib_path_list_child_remove(old_pl_index,
+ mt->mt_sibling_index);
+
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ /* no paths left */
+ return (0);
+ }
+ else
+ {
+ mt->mt_sibling_index =
+ fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ }
+ /*
+ * find the matching path extension and remove it
+ */
+ fib_path_ext_list_remove(&mt->mt_path_exts,
+ FIB_PATH_EXT_MPLS,
+ rpaths);
+
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ fib_path_ext_list_resolve(&mt->mt_path_exts,
+ mt->mt_path_list);
+
+ mpls_tunnel_restack(mt);
+ }
+
+ return (fib_path_list_get_n_paths(mt->mt_path_list));
+}
+
+
+static clib_error_t *
+vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ u8 is_del = 0, l2_only = 0, is_multicast =0;
+ fib_route_path_t rpath, *rpaths = NULL;
+ mpls_label_t out_label = MPLS_LABEL_INVALID;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
+
+ memset(&rpath, 0, sizeof(rpath));
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del %U",
+ unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ is_del = 1;
+ else if (unformat (line_input, "add %U",
+ unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ is_del = 0;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "out-labels"))
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label,
+ &out_label))
+ {
+ vec_add1 (rpath.frp_label_stack, out_label);
+ }
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = 0;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4))
+ {
+ rpath.frp_fib_index = 0;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (line_input, "l2-only"))
+ l2_only = 1;
+ else if (unformat (line_input, "multicast"))
+ is_multicast = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vec_add1(rpaths, rpath);
+
+ if (is_del)
+ {
+ if (!vnet_mpls_tunnel_path_remove(sw_if_index, rpaths))
+ {
+ vnet_mpls_tunnel_del(sw_if_index);
+ }
+ }
+ else
+ {
+ if (0 == vec_len(rpath.frp_label_stack))
+ {
+ error = clib_error_return (0, "No Output Labels '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if (~0 == sw_if_index)
+ {
+ sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast);
+ }
+ vnet_mpls_tunnel_path_add(sw_if_index, rpaths);
+ }
+
+done:
+ vec_free(rpaths);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command create a uni-directional MPLS tunnel
+ *
+ * @cliexpar
+ * @cliexstart{create mpls tunnel}
+ * create mpls tunnel via 10.0.0.1 GigEthernet0/8/0 out-label 33 out-label 34
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
+ .path = "mpls tunnel",
+ .short_help =
+ "mpls tunnel via [addr] [interface] [out-labels]",
+ .function = vnet_create_mpls_tunnel_command_fn,
+};
+
+static u8 *
+format_mpls_tunnel (u8 * s, va_list * args)
+{
+ mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *);
+ mpls_tunnel_attribute_t attr;
+
+ s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d",
+ mt - mpls_tunnel_pool,
+ mt->mt_sw_if_index,
+ mt->mt_hw_if_index);
+ if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) {
+ s = format(s, " \n flags:");
+ FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) {
+ if ((1<<attr) & mt->mt_flags) {
+ s = format (s, "%s,", mpls_tunnel_attribute_names[attr]);
+ }
+ }
+ }
+ s = format(s, "\n via:\n");
+ s = fib_path_list_format(mt->mt_path_list, s);
+ s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts);
+ s = format(s, "\n");
+
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
+ {
+ s = format(s, " forwarding: %U\n",
+ format_fib_forw_chain_type,
+ FIB_FORW_CHAIN_TYPE_ETHERNET);
+ s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2);
+ }
+
+ return (s);
+}
+
+static clib_error_t *
+show_mpls_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_tunnel_t * mt;
+ u32 mti = ~0;
+
+ if (pool_elts (mpls_tunnel_pool) == 0)
+ vlib_cli_output (vm, "No MPLS tunnels configured...");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &mti))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == mti)
+ {
+ pool_foreach (mt, mpls_tunnel_pool,
+ ({
+ vlib_cli_output (vm, "[@%d] %U",
+ mt - mpls_tunnel_pool,
+ format_mpls_tunnel, mt);
+ }));
+ }
+ else
+ {
+ if (pool_is_free_index(mpls_tunnel_pool, mti))
+ return clib_error_return (0, "Not atunnel index %d", mti);
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, mti);
+
+ vlib_cli_output (vm, "[@%d] %U",
+ mt - mpls_tunnel_pool,
+ format_mpls_tunnel, mt);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command to show MPLS tunnels
+ *
+ * @cliexpar
+ * @cliexstart{sh mpls tunnel 2}
+ * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5
+ * label-stack:
+ * 3,
+ * via:
+ * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ]
+ * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved,
+ * 10.0.0.2 loop0
+ * [@0]: ipv4 via 10.0.0.2 loop0: IP4: de:ad:00:00:00:00 -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = {
+ .path = "show mpls tunnel",
+ .function = show_mpls_tunnel_command_fn,
+};
+
+static mpls_tunnel_t *
+mpls_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_MPLS_TUNNEL == node->fn_type);
+#endif
+ return ((mpls_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(mpls_tunnel_t, mt_node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+mpls_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ mpls_tunnel_restack(mpls_tunnel_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+mpls_tunnel_fib_node_get (fib_node_index_t index)
+{
+ mpls_tunnel_t * mt;
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, index);
+
+ return (&mt->mt_node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+mpls_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The MPLS MPLS tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by MPLS MPLS tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t mpls_vft = {
+ .fnv_get = mpls_tunnel_fib_node_get,
+ .fnv_last_lock = mpls_tunnel_last_lock_gone,
+ .fnv_back_walk = mpls_tunnel_back_walk,
+};
+
+static clib_error_t *
+mpls_tunnel_init (vlib_main_t *vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_MPLS_TUNNEL, &mpls_vft);
+
+ return 0;
+}
+VLIB_INIT_FUNCTION(mpls_tunnel_init);
diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h
new file mode 100644
index 00000000..285817c3
--- /dev/null
+++ b/src/vnet/mpls/mpls_tunnel.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_TUNNEL_H__
+#define __MPLS_TUNNEL_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_path_ext.h>
+
+typedef enum mpls_tunnel_attribute_t_
+{
+ MPLS_TUNNEL_ATTRIBUTE_FIRST = 0,
+ /**
+ * @brief The tunnel is L2 only
+ */
+ MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST,
+ /**
+ * @brief The tunnel has an underlying multicast LSP
+ */
+ MPLS_TUNNEL_ATTRIBUTE_MCAST,
+ MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST,
+} mpls_tunnel_attribute_t;
+
+#define MPLS_TUNNEL_ATTRIBUTES { \
+ [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \
+ [MPLS_TUNNEL_ATTRIBUTE_L2] = "L2", \
+}
+#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \
+ for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \
+ _item <= MPLS_TUNNEL_ATTRIBUTE_LAST; \
+ _item++)
+
+typedef enum mpls_tunnel_flag_t_ {
+ MPLS_TUNNEL_FLAG_NONE = 0,
+ MPLS_TUNNEL_FLAG_L2 = (1 << MPLS_TUNNEL_ATTRIBUTE_L2),
+ MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST),
+} __attribute__ ((packed)) mpls_tunnel_flags_t;
+
+
+/**
+ * @brief A uni-directional MPLS tunnel
+ */
+typedef struct mpls_tunnel_t_
+{
+ /**
+ * @brief The tunnel hooks into the FIB control plane graph.
+ */
+ fib_node_t mt_node;
+
+ /**
+ * @brief Tunnel flags
+ */
+ mpls_tunnel_flags_t mt_flags;
+
+ /**
+ * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET
+ * load-balance
+ */
+ dpo_id_t mt_l2_lb;
+
+ /**
+ * @brief The HW interface index of the tunnel interfaces
+ */
+ u32 mt_hw_if_index;
+
+ /**
+ * @brief The SW interface index of the tunnel interfaces
+ */
+ u32 mt_sw_if_index;
+
+ /**
+ * @brief The path-list over which the tunnel's destination is reachable
+ */
+ fib_node_index_t mt_path_list;
+
+ /**
+ * @brief sibling index on the path-list so notifications are received.
+ */
+ u32 mt_sibling_index;
+
+ /**
+ * A vector of path extensions o hold the label stack for each path
+ */
+ fib_path_ext_list_t mt_path_exts;
+} mpls_tunnel_t;
+
+/**
+ * @brief Create a new MPLS tunnel
+ * @return the SW Interface index of the newly created tuneel
+ */
+extern u32 vnet_mpls_tunnel_create (u8 l2_only,
+ u8 is_multicast);
+
+/**
+ * @brief Add a path to an MPLS tunnel
+ */
+extern void vnet_mpls_tunnel_path_add (u32 sw_if_index,
+ fib_route_path_t *rpath);
+
+/**
+ * @brief remove a path from a tunnel.
+ * @return the number of remaining paths. 0 implies the tunnel can be deleted
+ */
+extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+ fib_route_path_t *rpath);
+
+/**
+ * @brief Delete an MPLS tunnel
+ */
+extern void vnet_mpls_tunnel_del (u32 sw_if_index);
+
+extern const mpls_tunnel_t *mpls_tunnel_get(u32 index);
+
+/**
+ * @brief Callback function invoked while walking MPLS tunnels
+ */
+typedef void (*mpls_tunnel_walk_cb_t)(u32 index, void *ctx);
+
+/**
+ * @brief Walk all the MPLS tunnels
+ */
+extern void mpls_tunnel_walk(mpls_tunnel_walk_cb_t cb,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h
new file mode 100644
index 00000000..f1c3191e
--- /dev/null
+++ b/src/vnet/mpls/mpls_types.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __MPLS_TYPES_H__
+#define __MPLS_TYPES_H__
+
+#define MPLS_IETF_MIN_LABEL 0x00000
+#define MPLS_IETF_MAX_LABEL 0xfffff
+
+#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000
+#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f
+
+#define MPLS_IETF_MIN_UNRES_LABEL 0x00010
+#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000
+#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002
+#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003
+#define MPLS_IETF_ELI_LABEL 0x00007
+#define MPLS_IETF_GAL_LABEL 0x0000D
+#define MPLS_IETF_ENTROPY_LABEL 0x0000E
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null"
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul"
+#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null"
+#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul"
+#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert"
+#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl"
+#define MPLS_IETF_ELI_STRING "entropy-label-indicator"
+#define MPLS_IETF_ELI_BRIEF_STRING "eli"
+#define MPLS_IETF_GAL_STRING "gal"
+#define MPLS_IETF_GAL_BRIEF_STRING "gal"
+
+#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1)
+
+#define MPLS_LABEL_IS_REAL(_lbl) \
+ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \
+ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL))
+
+/**
+ * The top bit of the index, which is the result of the MPLS lookup
+ * is used to determine if the DPO is a load-balance or a replicate
+ */
+#define MPLS_IS_REPLICATE 0x80000000
+
+#endif
diff --git a/src/vnet/mpls/packet.h b/src/vnet/mpls/packet.h
new file mode 100644
index 00000000..bc67445b
--- /dev/null
+++ b/src/vnet/mpls/packet.h
@@ -0,0 +1,125 @@
+#ifndef included_vnet_mpls_packet_h
+#define included_vnet_mpls_packet_h
+
+/*
+ * MPLS packet format
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A label value only, i.e. 20bits.
+ */
+typedef u32 mpls_label_t;
+
+typedef struct {
+ /* Label: top 20 bits [in network byte order] */
+ /* Experimental: 3 bits ... */
+ /* S (bottom of label stack): 1 bit */
+ /* TTL: 8 bits */
+ mpls_label_t label_exp_s_ttl;
+} mpls_unicast_header_t;
+
+typedef enum mpls_eos_bit_t_
+{
+ MPLS_NON_EOS = 0,
+ MPLS_EOS = 1,
+} mpls_eos_bit_t;
+
+#define MPLS_EOS_BITS { \
+ [MPLS_NON_EOS] = "neos", \
+ [MPLS_EOS] = "eos", \
+}
+
+#define FOR_EACH_MPLS_EOS_BIT(_eos) \
+ for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++)
+
+#define MPLS_ENTRY_LABEL_OFFSET 0
+#define MPLS_ENTRY_LABEL_SHIFT 12
+#define MPLS_ENTRY_LABEL_MASK 0x000fffff
+#define MPLS_ENTRY_LABEL_BITS \
+ (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT)
+
+#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */
+#define MPLS_ENTRY_EXP_SHIFT 9
+#define MPLS_ENTRY_EXP_MASK 0x07
+#define MPLS_ENTRY_EXP(mpls) \
+ (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK)
+#define MPLS_ENTRY_EXP_BITS \
+ (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT)
+
+#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */
+#define MPLS_ENTRY_EOS_SHIFT 8
+#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */
+#define MPLS_ENTRY_EOS(mpls) \
+ (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK)
+#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT)
+
+#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */
+#define MPLS_ENTRY_TTL_SHIFT 0
+#define MPLS_ENTRY_TTL_MASK 0xff
+#define MPLS_ENTRY_TTL(mpls) \
+ (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK)
+#define MPLS_ENTRY_TTL_BITS \
+ (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT)
+
+static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl)
+{
+ return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT);
+}
+
+static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EXP(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EOS(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_TTL(label_exp_s_ttl));
+}
+
+static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl,
+ u32 value)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) |
+ ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl,
+ u32 exp)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) |
+ ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl,
+ u32 eos)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) |
+ ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl,
+ u32 ttl)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) |
+ ((ttl & MPLS_ENTRY_TTL_MASK)));
+}
+
+#endif /* included_vnet_mpls_packet_h */
diff --git a/src/vnet/mpls/pg.c b/src/vnet/mpls/pg.c
new file mode 100644
index 00000000..6ff86e32
--- /dev/null
+++ b/src/vnet/mpls/pg.c
@@ -0,0 +1,71 @@
+/*
+ * pg.c: packet generator mpls interface
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/gre/gre.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ pg_edit_t label;
+} pg_mpls_header_t;
+
+static inline void
+pg_mpls_header_init (pg_mpls_header_t * e)
+{
+ pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl);
+}
+
+uword
+unformat_pg_mpls_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_mpls_header_t * h;
+ vlib_main_t * vm = vlib_get_main();
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t),
+ &group_index);
+ pg_mpls_header_init (h);
+
+ error = 1;
+ if (! unformat (input, "%U",
+ unformat_pg_edit,
+ unformat_mpls_label_net_byte_order, &h->label))
+ goto done;
+
+ {
+ pg_node_t * pg_node = 0;
+ vlib_node_t * ip_lookup_node;
+
+ ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input");
+ ASSERT (ip_lookup_node);
+
+ pg_node = pg_get_node (ip_lookup_node->index);
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ }
+
+ error = 0;
+ done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
diff --git a/src/vnet/osi/node.c b/src/vnet/osi/node.c
new file mode 100644
index 00000000..12075aa8
--- /dev/null
+++ b/src/vnet/osi/node.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi_node.c: osi packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/osi/osi.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+#include <vnet/llc/llc.h>
+
+#define foreach_osi_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) OSI_INPUT_NEXT_##s,
+ foreach_osi_input_next
+#undef _
+ OSI_INPUT_N_NEXT,
+} osi_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} osi_input_trace_t;
+
+static u8 *
+format_osi_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ osi_input_trace_t *t = va_arg (*va, osi_input_trace_t *);
+
+ s = format (s, "%U", format_osi_header, t->packet_data);
+
+ return s;
+}
+
+static uword
+osi_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ osi_main_t *lm = &osi_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (osi_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ osi_header_t *h0, *h1;
+ u8 next0, next1, enqueue_code;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = (void *) (b0->data + b0->current_data);
+ h1 = (void *) (b1->data + b1->current_data);
+
+ next0 = lm->input_next_by_protocol[h0->protocol];
+ next1 = lm->input_next_by_protocol[h1->protocol];
+
+ b0->error =
+ node->errors[next0 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+ b1->error =
+ node->errors[next1 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+
+ enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ osi_header_t *h0;
+ u8 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ next0 = lm->input_next_by_protocol[h0->protocol];
+
+ b0->error =
+ node->errors[next0 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *osi_error_strings[] = {
+#define _(f,s) s,
+ foreach_osi_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (osi_input_node) = {
+ .function = osi_input,
+ .name = "osi-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = OSI_N_ERROR,
+ .error_strings = osi_error_strings,
+
+ .n_next_nodes = OSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [OSI_INPUT_NEXT_##s] = n,
+ foreach_osi_input_next
+#undef _
+ },
+
+ .format_buffer = format_osi_header_with_length,
+ .format_trace = format_osi_input_trace,
+ .unformat_buffer = unformat_osi_header,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+osi_input_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+ osi_main_t *lm = &osi_main;
+
+ if ((error = vlib_call_init_function (vm, osi_init)))
+ return error;
+
+ osi_setup_node (vm, osi_input_node.index);
+
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++)
+ lm->input_next_by_protocol[i] = OSI_INPUT_NEXT_DROP;
+ }
+
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_osi, osi_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_osi, osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer1,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer2,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer3,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer4,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer5,
+ osi_input_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (osi_input_init);
+
+void
+osi_register_input_protocol (osi_protocol_t protocol, u32 node_index)
+{
+ osi_main_t *lm = &osi_main;
+ vlib_main_t *vm = lm->vlib_main;
+ osi_protocol_info_t *pi;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, osi_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = osi_get_protocol_info (lm, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, osi_input_node.index, node_index);
+
+ lm->input_next_by_protocol[protocol] = pi->next_index;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/osi/osi.c b/src/vnet/osi/osi.c
new file mode 100644
index 00000000..34c867f1
--- /dev/null
+++ b/src/vnet/osi/osi.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi.c: osi support
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/osi/osi.h>
+
+/* Global main structure. */
+osi_main_t osi_main;
+
+u8 *
+format_osi_protocol (u8 * s, va_list * args)
+{
+ osi_protocol_t p = va_arg (*args, u32);
+ osi_main_t *pm = &osi_main;
+ osi_protocol_info_t *pi = osi_get_protocol_info (pm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%02x", p);
+
+ return s;
+}
+
+u8 *
+format_osi_header_with_length (u8 * s, va_list * args)
+{
+ osi_main_t *pm = &osi_main;
+ osi_header_t *h = va_arg (*args, osi_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ osi_protocol_t p = h->protocol;
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "osi header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "OSI %U", format_osi_protocol, p);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ osi_protocol_info_t *pi = osi_get_protocol_info (pm, p);
+ vlib_node_t *node = vlib_get_node (pm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_osi_header (u8 * s, va_list * args)
+{
+ osi_header_t *h = va_arg (*args, osi_header_t *);
+ return format (s, "%U", format_osi_header_with_length, h, 0);
+}
+
+/* Returns osi protocol as an int in host byte order. */
+uword
+unformat_osi_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ osi_main_t *pm = &osi_main;
+ int p, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &p) || unformat (input, "%d", &p))
+ {
+ if (p >= (1 << 8))
+ return 0;
+ *result = p;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ pm->protocol_info_by_name, &i))
+ {
+ osi_protocol_info_t *pi = vec_elt_at_index (pm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_osi_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ osi_header_t _h, *h = &_h;
+ u8 p;
+
+ if (!unformat (input, "%U", unformat_osi_protocol, &p))
+ return 0;
+
+ h->protocol = p;
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static void
+add_protocol (osi_main_t * pm, osi_protocol_t protocol, char *protocol_name)
+{
+ osi_protocol_info_t *pi;
+ u32 i;
+
+ vec_add2 (pm->protocol_infos, pi, 1);
+ i = pi - pm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (pm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (pm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t *
+osi_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+ osi_main_t *pm = &osi_main;
+
+ /* init order dependency: llc_init -> osi_init */
+ if ((error = vlib_call_init_function (vm, llc_init)))
+ return error;
+
+ memset (pm, 0, sizeof (pm[0]));
+ pm->vlib_main = vm;
+
+ pm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ pm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+
+#define _(f,n) add_protocol (pm, OSI_PROTOCOL_##f, #f);
+ foreach_osi_protocol;
+#undef _
+
+ return vlib_call_init_function (vm, osi_input_init);
+}
+
+VLIB_INIT_FUNCTION (osi_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/osi/osi.h b/src/vnet/osi/osi.h
new file mode 100644
index 00000000..0ff267a3
--- /dev/null
+++ b/src/vnet/osi/osi.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi.h: OSI definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_osi_h
+#define included_osi_h
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+
+#define foreach_osi_protocol \
+ _ (null, 0x0) \
+ _ (x_29, 0x01) \
+ _ (x_633, 0x03) \
+ _ (q_931, 0x08) \
+ _ (q_933, 0x08) \
+ _ (q_2931, 0x09) \
+ _ (q_2119, 0x0c) \
+ _ (snap, 0x80) \
+ _ (clnp, 0x81) \
+ _ (esis, 0x82) \
+ _ (isis, 0x83) \
+ _ (idrp, 0x85) \
+ _ (x25_esis, 0x8a) \
+ _ (iso10030, 0x8c) \
+ _ (iso11577, 0x8d) \
+ _ (ip6, 0x8e) \
+ _ (compressed, 0xb0) \
+ _ (sndcf, 0xc1) \
+ _ (ip4, 0xcc) \
+ _ (ppp, 0xcf)
+
+typedef enum
+{
+#define _(f,n) OSI_PROTOCOL_##f = n,
+ foreach_osi_protocol
+#undef _
+} osi_protocol_t;
+
+typedef struct
+{
+ u8 protocol;
+
+ u8 payload[0];
+} osi_header_t;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* OSI protocol (SAP type). */
+ osi_protocol_t protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} osi_protocol_info_t;
+
+#define foreach_osi_error \
+ _ (NONE, "no error") \
+ _ (UNKNOWN_PROTOCOL, "unknown osi protocol")
+
+typedef enum
+{
+#define _(f,s) OSI_ERROR_##f,
+ foreach_osi_error
+#undef _
+ OSI_N_ERROR,
+} osi_error_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ osi_protocol_info_t *protocol_infos;
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *protocol_info_by_name, *protocol_info_by_protocol;
+
+ /* osi-input next index indexed by protocol. */
+ u8 input_next_by_protocol[256];
+} osi_main_t;
+
+always_inline osi_protocol_info_t *
+osi_get_protocol_info (osi_main_t * m, osi_protocol_t protocol)
+{
+ uword *p = hash_get (m->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0;
+}
+
+extern osi_main_t osi_main;
+
+/* Register given node index to take input for given osi type. */
+void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index);
+
+format_function_t format_osi_protocol;
+format_function_t format_osi_header;
+format_function_t format_osi_header_with_length;
+
+/* Parse osi protocol as 0xXXXX or protocol name. */
+unformat_function_t unformat_osi_protocol;
+
+/* Parse osi header. */
+unformat_function_t unformat_osi_header;
+unformat_function_t unformat_pg_osi_header;
+
+always_inline void
+osi_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_osi_header_with_length;
+ n->unformat_buffer = unformat_osi_header;
+ pn->unformat_edit = unformat_pg_osi_header;
+}
+
+void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index);
+
+format_function_t format_osi_header;
+
+#endif /* included_osi_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/osi/pg.c b/src/vnet/osi/pg.c
new file mode 100644
index 00000000..c87a869b
--- /dev/null
+++ b/src/vnet/osi/pg.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi_pg.c: packet generator osi interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/osi/osi.h>
+
+typedef struct
+{
+ pg_edit_t protocol;
+} pg_osi_header_t;
+
+static inline void
+pg_osi_header_init (pg_osi_header_t * e)
+{
+ pg_edit_init (&e->protocol, osi_header_t, protocol);
+}
+
+uword
+unformat_pg_osi_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_osi_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (osi_header_t),
+ &group_index);
+ pg_osi_header_init (h);
+
+ error = 1;
+ if (!unformat (input, "%U",
+ unformat_pg_edit, unformat_osi_protocol, &h->protocol))
+ goto done;
+
+ {
+ osi_main_t *pm = &osi_main;
+ osi_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u8 t = *h->protocol.values[PG_EDIT_LO];
+ pi = osi_get_protocol_info (pm, t);
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c
new file mode 100644
index 00000000..3c249a7b
--- /dev/null
+++ b/src/vnet/pg/cli.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_cli.c: packet generator cli
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/stat.h>
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+
+#ifdef CLIB_UNIX
+#include <vnet/unix/pcap.h>
+#endif
+
+/* Root of all packet generator cli commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = {
+ .path = "packet-generator",
+ .short_help = "Packet generator commands",
+};
+/* *INDENT-ON* */
+
+void
+pg_enable_disable (u32 stream_index, int is_enable)
+{
+ pg_main_t *pg = &pg_main;
+ pg_stream_t *s;
+
+ if (stream_index == ~0)
+ {
+ /* No stream specified: enable/disable all streams. */
+ /* *INDENT-OFF* */
+ pool_foreach (s, pg->streams, ({
+ pg_stream_enable_disable (pg, s, is_enable);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* enable/disable specified stream. */
+ s = pool_elt_at_index (pg->streams, stream_index);
+ pg_stream_enable_disable (pg, s, is_enable);
+ }
+}
+
+clib_error_t *
+pg_capture (pg_capture_args_t * a)
+{
+ pg_main_t *pg = &pg_main;
+ pg_interface_t *pi;
+
+ if (a->is_enabled == 1)
+ {
+ struct stat sb;
+ if (stat ((char *) a->pcap_file_name, &sb) != -1)
+ return clib_error_return (0, "Cannot create pcap file");
+ }
+
+ pi = pool_elt_at_index (pg->interfaces, a->dev_instance);
+ vec_free (pi->pcap_file_name);
+ memset (&pi->pcap_main, 0, sizeof (pi->pcap_main));
+
+ if (a->is_enabled == 0)
+ return 0;
+
+ pi->pcap_file_name = a->pcap_file_name;
+ pi->pcap_main.file_name = (char *) pi->pcap_file_name;
+ pi->pcap_main.n_packets_to_capture = a->count;
+ pi->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet;
+
+ return 0;
+}
+
+static clib_error_t *
+enable_disable_stream (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pg_main_t *pg = &pg_main;
+ int is_enable = cmd->function_arg != 0;
+ u32 stream_index = ~0;
+
+ if (unformat (input, "%U", unformat_eof))
+ ;
+ else if (unformat (input, "%U", unformat_hash_vec_string,
+ pg->stream_index_by_name, &stream_index))
+ ;
+ else
+ return clib_error_create ("unknown input `%U'",
+ format_unformat_error, input);
+
+ pg_enable_disable (stream_index, is_enable);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_streams_cli, static) = {
+ .path = "packet-generator enable-stream",
+ .short_help = "Enable packet generator streams",
+ .function = enable_disable_stream,
+ .function_arg = 1, /* is_enable */
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (disable_streams_cli, static) = {
+ .path = "packet-generator disable-stream",
+ .short_help = "Disable packet generator streams",
+ .function = enable_disable_stream,
+ .function_arg = 0, /* is_enable */
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_pg_stream (u8 * s, va_list * va)
+{
+ pg_stream_t *t = va_arg (*va, pg_stream_t *);
+ u8 *v;
+
+ if (!t)
+ return format (s, "%=16s%=12s%=16s%s",
+ "Name", "Enabled", "Count", "Parameters");
+
+ s = format (s, "%-16v%=12s%16Ld",
+ t->name,
+ pg_stream_is_enabled (t) ? "Yes" : "No",
+ t->n_packets_generated);
+
+ v = 0;
+
+ v = format (v, "limit %Ld, ", t->n_packets_limit);
+
+ v = format (v, "rate %.2e pps, ", t->rate_packets_per_second);
+
+ v = format (v, "size %d%c%d, ",
+ t->min_packet_bytes,
+ t->packet_size_edit_type == PG_EDIT_RANDOM ? '+' : '-',
+ t->max_packet_bytes);
+
+ v = format (v, "buffer-size %d, ", t->buffer_bytes);
+
+ v = format (v, "worker %d, ", t->worker_index);
+
+ if (v)
+ {
+ s = format (s, " %v", v);
+ vec_free (v);
+ }
+
+ return s;
+}
+
+static clib_error_t *
+show_streams (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pg_main_t *pg = &pg_main;
+ pg_stream_t *s;
+
+ if (pool_elts (pg->streams) == 0)
+ {
+ vlib_cli_output (vm, "no streams currently defined");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U", format_pg_stream, 0);
+ /* *INDENT-OFF* */
+ pool_foreach (s, pg->streams, ({
+ vlib_cli_output (vm, "%U", format_pg_stream, s);
+ }));
+ /* *INDENT-ON* */
+
+done:
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_streams_cli, static) = {
+ .path = "show packet-generator",
+ .short_help = "Show packet generator streams",
+ .function = show_streams,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+pg_pcap_read (pg_stream_t * s, char *file_name)
+{
+#ifndef CLIB_UNIX
+ return clib_error_return (0, "no pcap support");
+#else
+ pcap_main_t pm;
+ clib_error_t *error;
+ memset (&pm, 0, sizeof (pm));
+ pm.file_name = file_name;
+ error = pcap_read (&pm);
+ s->replay_packet_templates = pm.packets_read;
+ s->min_packet_bytes = pm.min_packet_bytes;
+ s->max_packet_bytes = pm.max_packet_bytes;
+ s->buffer_bytes = pm.max_packet_bytes;
+ /* For PCAP buffers we never re-use buffers. */
+ s->flags |= PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE;
+
+ if (s->n_packets_limit == 0)
+ s->n_packets_limit = vec_len (pm.packets_read);
+
+ return error;
+#endif /* CLIB_UNIX */
+}
+
+static uword
+unformat_pg_stream_parameter (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ f64 x;
+
+ if (unformat (input, "limit %f", &x))
+ s->n_packets_limit = x;
+
+ else if (unformat (input, "rate %f", &x))
+ s->rate_packets_per_second = x;
+
+ else if (unformat (input, "size %d-%d", &s->min_packet_bytes,
+ &s->max_packet_bytes))
+ s->packet_size_edit_type = PG_EDIT_INCREMENT;
+
+ else if (unformat (input, "size %d+%d", &s->min_packet_bytes,
+ &s->max_packet_bytes))
+ s->packet_size_edit_type = PG_EDIT_RANDOM;
+
+ else if (unformat (input, "buffer-size %d", &s->buffer_bytes))
+ ;
+
+ else
+ return 0;
+
+ return 1;
+}
+
+static clib_error_t *
+validate_stream (pg_stream_t * s)
+{
+ if (s->max_packet_bytes < s->min_packet_bytes)
+ return clib_error_create ("max-size < min-size");
+
+ if (s->buffer_bytes >= 4096 || s->buffer_bytes == 0)
+ return
+ clib_error_create ("buffer-size must be positive and < 4096, given %d",
+ s->buffer_bytes);
+
+ if (s->rate_packets_per_second < 0)
+ return clib_error_create ("negative rate");
+
+ return 0;
+}
+
+static clib_error_t *
+new_stream (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ u8 *tmp = 0;
+ u32 hw_if_index;
+ unformat_input_t sub_input = { 0 };
+ int sub_input_given = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ pg_main_t *pg = &pg_main;
+ pg_stream_t s = { 0 };
+ char *pcap_file_name;
+
+ s.sw_if_index[VLIB_RX] = s.sw_if_index[VLIB_TX] = ~0;
+ s.node_index = ~0;
+ s.max_packet_bytes = s.min_packet_bytes = 64;
+ s.buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ s.if_id = 0;
+ pcap_file_name = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "name %v", &tmp))
+ {
+ if (s.name)
+ vec_free (s.name);
+ s.name = tmp;
+ }
+
+ else if (unformat (input, "node %U",
+ unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ s.node_index = hi->output_node_index;
+ s.sw_if_index[VLIB_TX] = hi->sw_if_index;
+ }
+
+ else if (unformat (input, "source pg%u", &s.if_id))
+ ;
+
+ else if (unformat (input, "node %U",
+ unformat_vlib_node, vm, &s.node_index))
+ ;
+
+ else if (unformat (input, "worker %u", &s.worker_index))
+ ;
+
+ else if (unformat (input, "interface %U",
+ unformat_vnet_sw_interface, vnm,
+ &s.sw_if_index[VLIB_RX]))
+ ;
+
+ else if (unformat (input, "pcap %s", &pcap_file_name))
+ ;
+
+ else if (!sub_input_given
+ && unformat (input, "data %U", unformat_input, &sub_input))
+ sub_input_given++;
+
+ else if (unformat_user (input, unformat_pg_stream_parameter, &s))
+ ;
+
+ else if (unformat (input, "no-recycle"))
+ s.flags |= PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE;
+
+ else
+ {
+ error = clib_error_create ("unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ error = validate_stream (&s);
+ if (error)
+ return error;
+
+ if (!sub_input_given && !pcap_file_name)
+ {
+ error = clib_error_create ("no packet data given");
+ goto done;
+ }
+
+ if (s.node_index == ~0)
+ {
+ if (pcap_file_name != 0)
+ {
+ vlib_node_t *n =
+ vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
+ s.node_index = n->index;
+ }
+ else
+ {
+ error = clib_error_create ("output interface or node not given");
+ goto done;
+ }
+ }
+
+ {
+ pg_node_t *n;
+
+ if (s.node_index < vec_len (pg->nodes))
+ n = pg->nodes + s.node_index;
+ else
+ n = 0;
+
+ if (s.worker_index >= vlib_num_workers ())
+ s.worker_index = 0;
+
+ if (pcap_file_name != 0)
+ {
+ error = pg_pcap_read (&s, pcap_file_name);
+ if (error)
+ goto done;
+ vec_free (pcap_file_name);
+ }
+
+ else if (n && n->unformat_edit
+ && unformat_user (&sub_input, n->unformat_edit, &s))
+ ;
+
+ else if (!unformat_user (&sub_input, unformat_pg_payload, &s))
+ {
+ error = clib_error_create
+ ("failed to parse packet data from `%U'",
+ format_unformat_error, &sub_input);
+ goto done;
+ }
+ }
+
+ pg_stream_add (pg, &s);
+ return 0;
+
+done:
+ pg_stream_free (&s);
+ unformat_free (&sub_input);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (new_stream_cli, static) = {
+ .path = "packet-generator new",
+ .function = new_stream,
+ .short_help = "Create packet generator stream",
+ .long_help =
+ "Create packet generator stream\n"
+ "\n"
+ "Arguments:\n"
+ "\n"
+ "name STRING sets stream name\n"
+ "interface STRING interface for stream output \n"
+ "node NODE-NAME node for stream output\n"
+ "data STRING specifies packet data\n"
+ "pcap FILENAME read packet data from pcap file\n",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+del_stream (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pg_main_t *pg = &pg_main;
+ u32 i;
+
+ if (!unformat (input, "%U",
+ &unformat_hash_vec_string, pg->stream_index_by_name, &i))
+ return clib_error_create ("expected stream name `%U'",
+ format_unformat_error, input);
+
+ pg_stream_del (pg, i);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (del_stream_cli, static) = {
+ .path = "packet-generator delete",
+ .function = del_stream,
+ .short_help = "Delete stream with given name",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+change_stream_parameters (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pg_main_t *pg = &pg_main;
+ pg_stream_t *s, s_new;
+ u32 stream_index = ~0;
+ clib_error_t *error;
+
+ if (unformat (input, "%U", unformat_hash_vec_string,
+ pg->stream_index_by_name, &stream_index))
+ ;
+ else
+ return clib_error_create ("expecting stream name; got `%U'",
+ format_unformat_error, input);
+
+ s = pool_elt_at_index (pg->streams, stream_index);
+ s_new = s[0];
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (input, unformat_pg_stream_parameter, &s_new))
+ ;
+
+ else
+ return clib_error_create ("unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ error = validate_stream (&s_new);
+ if (!error)
+ s[0] = s_new;
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = {
+ .path = "packet-generator configure",
+ .short_help = "Change packet generator stream parameters",
+ .function = change_stream_parameters,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+pg_capture_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_hw_interface_t *hi = 0;
+ u8 *pcap_file_name = 0;
+ u32 hw_if_index;
+ u32 is_disable = 0;
+ u32 count = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U",
+ unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ }
+
+ else if (unformat (line_input, "pcap %s", &pcap_file_name))
+ ;
+ else if (unformat (line_input, "count %u", &count))
+ ;
+ else if (unformat (line_input, "disable"))
+ is_disable = 1;
+
+ else
+ {
+ error = clib_error_create ("unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!hi)
+ {
+ error = clib_error_return (0, "Please specify interface name");
+ goto done;
+ }
+
+ if (hi->dev_class_index != pg_dev_class.index)
+ {
+ error =
+ clib_error_return (0, "Please specify packet-generator interface");
+ goto done;
+ }
+
+ if (!pcap_file_name && is_disable == 0)
+ {
+ error = clib_error_return (0, "Please specify pcap file name");
+ goto done;
+ }
+
+
+ pg_capture_args_t _a, *a = &_a;
+
+ a->hw_if_index = hw_if_index;
+ a->dev_instance = hi->dev_instance;
+ a->is_enabled = !is_disable;
+ a->pcap_file_name = pcap_file_name;
+ a->count = count;
+
+ error = pg_capture (a);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (pg_capture_cmd, static) = {
+ .path = "packet-generator capture",
+ .short_help = "packet-generator capture <interface name> pcap <filename> [count <n>]",
+ .function = pg_capture_cmd_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+create_pg_if_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pg_main_t *pg = &pg_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 if_id;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "interface pg%u", &if_id))
+ ;
+
+ else
+ {
+ error = clib_error_create ("unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ pg_interface_add_or_get (pg, if_id);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
+ .path = "create packet-generator",
+ .short_help = "create packet-generator interface <interface name>",
+ .function = create_pg_if_cmd_fn,
+};
+/* *INDENT-ON* */
+
+/* Dummy init function so that we can be linked in. */
+static clib_error_t *
+pg_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (pg_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/edit.c b/src/vnet/pg/edit.c
new file mode 100644
index 00000000..cb4d070f
--- /dev/null
+++ b/src/vnet/pg/edit.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_edit.c: packet generator edits
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+
+static void
+pg_edit_set_value_helper (pg_edit_t * e, u64 value, u8 * result)
+{
+ int i, j, n_bits_left;
+ u8 *v, tmp[8];
+
+ v = tmp;
+
+ n_bits_left = e->n_bits;
+ i = 0;
+ j = e->lsb_bit_offset % BITS (v[0]);
+
+ if (n_bits_left > 0 && j != 0)
+ {
+ v[i] = (value & 0xff) << j;
+ value >>= BITS (v[0]) - j;
+ n_bits_left -= BITS (v[0]) - j;
+ i += 1;
+ }
+
+ while (n_bits_left > 0)
+ {
+ v[i] = value & 0xff;
+ value >>= 8;
+ n_bits_left -= 8;
+ i += 1;
+ }
+
+ /* Convert to network byte order. */
+ for (j = 0; j < i; j++)
+ result[j] = v[i - 1 - j];
+}
+
+void
+pg_edit_set_value (pg_edit_t * e, int hi_or_lo, u64 value)
+{
+ pg_edit_alloc_value (e, hi_or_lo);
+ pg_edit_set_value_helper (e, value, e->values[hi_or_lo]);
+}
+
+/* Parse an int either %d or 0x%x into network byte order. */
+uword
+unformat_pg_number (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ pg_edit_t *e = va_arg (*args, pg_edit_t *);
+ u64 value;
+
+ ASSERT (BITS (value) >= e->n_bits);
+
+ if (!unformat (input, "0x%X", sizeof (value), &value)
+ && !unformat (input, "%D", sizeof (value), &value))
+ return 0;
+
+ /* Number given does not fit into bit field. */
+ if (e->n_bits < 64 && value >= (u64) 1 << (u64) e->n_bits)
+ return 0;
+
+ pg_edit_set_value_helper (e, value, result);
+ return 1;
+}
+
+uword
+unformat_pg_edit (unformat_input_t * input, va_list * args)
+{
+ unformat_function_t *f = va_arg (*args, unformat_function_t *);
+ pg_edit_t *e = va_arg (*args, pg_edit_t *);
+
+ pg_edit_alloc_value (e, PG_EDIT_LO);
+ if (!unformat_user (input, f, e->values[PG_EDIT_LO], e))
+ return 0;
+
+ pg_edit_alloc_value (e, PG_EDIT_HI);
+ if (unformat (input, "-%U", f, e->values[PG_EDIT_HI], e))
+ e->type = PG_EDIT_INCREMENT;
+ else if (unformat (input, "+%U", f, e->values[PG_EDIT_HI], e))
+ e->type = PG_EDIT_RANDOM;
+ else
+ e->type = PG_EDIT_FIXED;
+
+ return 1;
+}
+
+uword
+unformat_pg_payload (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ pg_edit_t *e;
+ u32 i, node_index, len, max_len;
+ u8 *v;
+
+ v = 0;
+
+ if (unformat (input, "incrementing %d", &len))
+ {
+ vec_resize (v, len);
+ for (i = 0; i < len; i++)
+ v[i] = i;
+ }
+ else if (unformat (input, "hex 0x%U", unformat_hex_string, &v))
+ ;
+
+ else if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ pg_node_t *pn = pg_get_node (node_index);
+ if (!pn->unformat_edit)
+ return 0;
+ return unformat (input, "%U", pn->unformat_edit, s);
+ }
+
+ else
+ return 0;
+
+ /* Length not including this payload. */
+ max_len = pg_edit_group_n_bytes (s, 0);
+ if (max_len + vec_len (v) >= s->max_packet_bytes)
+ {
+ if (s->max_packet_bytes >= max_len)
+ _vec_len (v) = s->max_packet_bytes - max_len;
+ else
+ _vec_len (v) = 0;
+ }
+
+ e = pg_create_edit_group (s, sizeof (e[0]), vec_len (v), 0);
+
+ e->type = PG_EDIT_FIXED;
+ e->n_bits = vec_len (v) * BITS (v[0]);
+
+ /* Least significant bit is at end of bitstream, since everything is always bigendian. */
+ e->lsb_bit_offset = e->n_bits - BITS (v[0]);
+
+ e->values[PG_EDIT_LO] = v;
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/edit.h b/src/vnet/pg/edit.h
new file mode 100644
index 00000000..3bfdad57
--- /dev/null
+++ b/src/vnet/pg/edit.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_edit.h: packet generator edits
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_packet_generator_pg_edit_h
+#define included_packet_generator_pg_edit_h
+
+#include <vppinfra/format.h>
+#include <vppinfra/vec.h>
+
+typedef enum
+{
+ /* Invalid type used to poison edits. */
+ PG_EDIT_INVALID_TYPE,
+
+ /* Value is fixed: does not change for all packets in sequence. */
+ PG_EDIT_FIXED,
+
+ /* Value v increments between low and high values v_low <= v <= v_high. */
+ PG_EDIT_INCREMENT,
+
+ /* Random value between low and high values v_low <= v <= v_high. */
+ PG_EDIT_RANDOM,
+
+ /* Unspecified value; will be specified by some edit function. */
+ PG_EDIT_UNSPECIFIED,
+} pg_edit_type_t;
+
+typedef struct
+{
+ pg_edit_type_t type;
+
+ /* Bit offset within packet where value is to be written.
+ Bits are written in network byte order: high bits first.
+ This is the bit offset of the least significant bit: i.e. the
+ highest numbered byte * 8 plus bit offset within that byte.
+ Negative offsets encode special edits. */
+ i32 lsb_bit_offset;
+
+ /* Special offset indicating this edit is for packet length. */
+#define PG_EDIT_PACKET_LENGTH (-1)
+
+ /* Number of bits in edit. */
+ u32 n_bits;
+
+ /* Low and high values for this edit. Network byte order. */
+ u8 *values[2];
+#define PG_EDIT_LO 0
+#define PG_EDIT_HI 1
+
+ /* Last value used for increment edit type. */
+ u64 last_increment_value;
+} pg_edit_t;
+
+always_inline void
+pg_edit_free (pg_edit_t * e)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (e->values); i++)
+ vec_free (e->values[i]);
+}
+
+#define pg_edit_init_bitfield(e,type,field,field_offset,field_n_bits) \
+do { \
+ u32 _bo; \
+ \
+ ASSERT ((field_offset) < STRUCT_BITS_OF (type, field)); \
+ \
+ /* Start byte offset. */ \
+ _bo = STRUCT_OFFSET_OF (type, field); \
+ \
+ /* Adjust for big endian byte order. */ \
+ _bo += ((STRUCT_BITS_OF (type, field) \
+ - (field_offset) - 1) / BITS (u8)); \
+ \
+ (e)->lsb_bit_offset = _bo * BITS (u8) + ((field_offset) % BITS (u8)); \
+ (e)->n_bits = (field_n_bits); \
+} while (0)
+
+/* Initialize edit for byte aligned fields. */
+#define pg_edit_init(e,type,field) \
+ pg_edit_init_bitfield(e,type,field,0,STRUCT_BITS_OF(type,field))
+
+static inline uword
+pg_edit_n_alloc_bytes (pg_edit_t * e)
+{
+ int i0, i1, n_bytes, n_bits_left;
+
+ i0 = e->lsb_bit_offset;
+ i1 = i0 % BITS (u8);
+
+ n_bytes = 0;
+ n_bits_left = e->n_bits;
+
+ if (n_bits_left > 0 && i1 != 0)
+ {
+ n_bytes++;
+ n_bits_left -= i1;
+ if (n_bits_left < 0)
+ n_bits_left = 0;
+ }
+
+ n_bytes += (n_bits_left / BITS (u8));
+ n_bytes += (n_bits_left % BITS (u8)) != 0;
+
+ return n_bytes;
+}
+
+static inline void
+pg_edit_alloc_value (pg_edit_t * e, int i)
+{
+ vec_validate (e->values[i], e->lsb_bit_offset / BITS (u8));
+}
+
+extern void pg_edit_set_value (pg_edit_t * e, int hi_or_lo, u64 value);
+
+static inline void
+pg_edit_set_fixed (pg_edit_t * e, u64 value)
+{
+ e->type = PG_EDIT_FIXED;
+ pg_edit_set_value (e, PG_EDIT_LO, value);
+}
+
+static inline void
+pg_edit_copy_type_and_values (pg_edit_t * dst, pg_edit_t * src)
+{
+ int i;
+ dst->type = src->type;
+ src->type = PG_EDIT_INVALID_TYPE;
+ for (i = 0; i < ARRAY_LEN (dst->values); i++)
+ {
+ dst->values[i] = src->values[i];
+ src->values[i] = 0;
+ }
+}
+
+static inline u64
+pg_edit_get_value (pg_edit_t * e, int hi_or_lo)
+{
+ u64 r = 0;
+ int i, n;
+ u8 *v = e->values[hi_or_lo];
+
+ n = round_pow2 (e->n_bits, BITS (u8)) / BITS (u8);
+
+ ASSERT (n <= vec_len (v));
+ ASSERT (n <= sizeof (r));
+
+ for (i = 0; i < n; i++)
+ r = (r << BITS (v[i])) + v[i];
+
+ return r;
+}
+
+static inline uword
+pg_edit_is_fixed_with_value (pg_edit_t * e, u64 value)
+{
+ return (e->type == PG_EDIT_FIXED
+ && value == pg_edit_get_value (e, PG_EDIT_LO));
+}
+
+uword unformat_pg_edit (unformat_input_t * input, va_list * args);
+uword unformat_pg_payload (unformat_input_t * input, va_list * args);
+uword unformat_pg_number (unformat_input_t * input, va_list * args);
+uword unformat_pg_interface (unformat_input_t * input, va_list * args);
+
+#endif /* included_packet_generator_pg_edit_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/example.script b/src/vnet/pg/example.script
new file mode 100644
index 00000000..0e29b9ec
--- /dev/null
+++ b/src/vnet/pg/example.script
@@ -0,0 +1,6 @@
+packet-generator new {
+ name x
+ limit 1
+ node ethernet-input
+ data { IP: 1.2.3 -> 4.5.6 incrementing 100 }
+}
diff --git a/src/vnet/pg/init.c b/src/vnet/pg/init.c
new file mode 100644
index 00000000..631be25e
--- /dev/null
+++ b/src/vnet/pg/init.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_init.c: VLIB packet generator
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+
+/* Global main structure. */
+pg_main_t pg_main;
+
+static clib_error_t *
+pg_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ pg_main_t *pg = &pg_main;
+
+ pg->if_index_by_if_id = hash_create (0, sizeof (uword));
+
+ if ((error = vlib_call_init_function (vm, vnet_main_init)))
+ goto done;
+
+ if ((error = vlib_call_init_function (vm, pg_cli_init)))
+ goto done;
+
+done:
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pg_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
new file mode 100644
index 00000000..c3738a6a
--- /dev/null
+++ b/src/vnet/pg/input.c
@@ -0,0 +1,1674 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_input.c: buffer generator input
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/vnet.h>
+#include <vnet/feature/feature.h>
+#include <vnet/devices/devices.h>
+
+static int
+validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
+ u32 data_offset, u32 n_bytes)
+{
+ u8 *bd, *pd, *pm;
+ u32 i;
+
+ bd = b->data;
+ pd = s->fixed_packet_data + data_offset;
+ pm = s->fixed_packet_data_mask + data_offset;
+
+ if (pd + n_bytes >= vec_end (s->fixed_packet_data))
+ n_bytes = (pd < vec_end (s->fixed_packet_data)
+ ? vec_end (s->fixed_packet_data) - pd : 0);
+
+ for (i = 0; i < n_bytes; i++)
+ if ((bd[i] & pm[i]) != pd[i])
+ break;
+
+ if (i >= n_bytes)
+ return 1;
+
+ clib_warning ("buffer %U", format_vlib_buffer, b);
+ clib_warning ("differ at index %d", i);
+ clib_warning ("is %U", format_hex_bytes, bd, n_bytes);
+ clib_warning ("mask %U", format_hex_bytes, pm, n_bytes);
+ clib_warning ("expect %U", format_hex_bytes, pd, n_bytes);
+ return 0;
+}
+
+static int
+validate_buffer_data (vlib_buffer_t * b, pg_stream_t * s)
+{
+ return validate_buffer_data2 (b, s, 0, s->buffer_bytes);
+}
+
+always_inline void
+set_1 (void *a0,
+ u64 v0, u64 v_min, u64 v_max, u32 n_bits, u32 is_net_byte_order)
+{
+ ASSERT (v0 >= v_min && v0 <= v_max);
+ if (n_bits == BITS (u8))
+ {
+ ((u8 *) a0)[0] = v0;
+ }
+ else if (n_bits == BITS (u16))
+ {
+ if (is_net_byte_order)
+ v0 = clib_host_to_net_u16 (v0);
+ clib_mem_unaligned (a0, u16) = v0;
+ }
+ else if (n_bits == BITS (u32))
+ {
+ if (is_net_byte_order)
+ v0 = clib_host_to_net_u32 (v0);
+ clib_mem_unaligned (a0, u32) = v0;
+ }
+ else if (n_bits == BITS (u64))
+ {
+ if (is_net_byte_order)
+ v0 = clib_host_to_net_u64 (v0);
+ clib_mem_unaligned (a0, u64) = v0;
+ }
+}
+
+always_inline void
+set_2 (void *a0, void *a1,
+ u64 v0, u64 v1,
+ u64 v_min, u64 v_max,
+ u32 n_bits, u32 is_net_byte_order, u32 is_increment)
+{
+ ASSERT (v0 >= v_min && v0 <= v_max);
+ ASSERT (v1 >= v_min && v1 <= (v_max + is_increment));
+ if (n_bits == BITS (u8))
+ {
+ ((u8 *) a0)[0] = v0;
+ ((u8 *) a1)[0] = v1;
+ }
+ else if (n_bits == BITS (u16))
+ {
+ if (is_net_byte_order)
+ {
+ v0 = clib_host_to_net_u16 (v0);
+ v1 = clib_host_to_net_u16 (v1);
+ }
+ clib_mem_unaligned (a0, u16) = v0;
+ clib_mem_unaligned (a1, u16) = v1;
+ }
+ else if (n_bits == BITS (u32))
+ {
+ if (is_net_byte_order)
+ {
+ v0 = clib_host_to_net_u32 (v0);
+ v1 = clib_host_to_net_u32 (v1);
+ }
+ clib_mem_unaligned (a0, u32) = v0;
+ clib_mem_unaligned (a1, u32) = v1;
+ }
+ else if (n_bits == BITS (u64))
+ {
+ if (is_net_byte_order)
+ {
+ v0 = clib_host_to_net_u64 (v0);
+ v1 = clib_host_to_net_u64 (v1);
+ }
+ clib_mem_unaligned (a0, u64) = v0;
+ clib_mem_unaligned (a1, u64) = v1;
+ }
+}
+
+static_always_inline void
+do_set_fixed (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 n_bits,
+ u32 byte_offset, u32 is_net_byte_order, u64 v_min, u64 v_max)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ set_2 (a0, a1, v_min, v_min, v_min, v_max, n_bits, is_net_byte_order,
+ /* is_increment */ 0);
+
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ set_1 (a0, v_min, v_min, v_max, n_bits, is_net_byte_order);
+
+ ASSERT (validate_buffer_data (b0, s));
+ }
+}
+
+static_always_inline u64
+do_set_increment (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 n_bits,
+ u32 byte_offset,
+ u32 is_net_byte_order,
+ u32 want_sum, u64 * sum_result, u64 v_min, u64 v_max, u64 v)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u64 sum = 0;
+
+ ASSERT (v >= v_min && v <= v_max);
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+ u64 v_old;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ v_old = v;
+ v = v_old + 2;
+ v = v > v_max ? v_min : v;
+ set_2 (a0, a1,
+ v_old + 0, v_old + 1, v_min, v_max, n_bits, is_net_byte_order,
+ /* is_increment */ 1);
+
+ if (want_sum)
+ sum += 2 * v_old + 1;
+
+ if (PREDICT_FALSE (v_old + 1 > v_max))
+ {
+ if (want_sum)
+ sum -= 2 * v_old + 1;
+
+ v = v_old;
+ set_1 (a0, v + 0, v_min, v_max, n_bits, is_net_byte_order);
+ if (want_sum)
+ sum += v;
+ v += 1;
+
+ v = v > v_max ? v_min : v;
+ set_1 (a1, v + 0, v_min, v_max, n_bits, is_net_byte_order);
+ if (want_sum)
+ sum += v;
+ v += 1;
+ }
+
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+ u64 v_old;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ v_old = v;
+ if (want_sum)
+ sum += v_old;
+ v += 1;
+ v = v > v_max ? v_min : v;
+
+ ASSERT (v_old >= v_min && v_old <= v_max);
+ set_1 (a0, v_old, v_min, v_max, n_bits, is_net_byte_order);
+
+ ASSERT (validate_buffer_data (b0, s));
+ }
+
+ if (want_sum)
+ *sum_result = sum;
+
+ return v;
+}
+
+static_always_inline void
+do_set_random (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 n_bits,
+ u32 byte_offset,
+ u32 is_net_byte_order,
+ u32 want_sum, u64 * sum_result, u64 v_min, u64 v_max)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u64 v_diff = v_max - v_min + 1;
+ u64 r_mask = max_pow2 (v_diff) - 1;
+ u64 v0, v1;
+ u64 sum = 0;
+ void *random_data;
+
+ random_data = clib_random_buffer_get_data
+ (&vm->random_buffer, n_buffers * n_bits / BITS (u8));
+
+ v0 = v1 = v_min;
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+ u64 r0 = 0, r1 = 0; /* warnings be gone */
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ switch (n_bits)
+ {
+#define _(n) \
+ case BITS (u##n): \
+ { \
+ u##n * r = random_data; \
+ r0 = r[0]; \
+ r1 = r[1]; \
+ random_data = r + 2; \
+ } \
+ break;
+
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+
+#undef _
+ }
+
+ /* Add power of 2 sized random number which may be out of range. */
+ v0 += r0 & r_mask;
+ v1 += r1 & r_mask;
+
+ /* Twice should be enough to reduce to v_min .. v_max range. */
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v1 = v1 > v_max ? v1 - v_diff : v1;
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v1 = v1 > v_max ? v1 - v_diff : v1;
+
+ if (want_sum)
+ sum += v0 + v1;
+
+ set_2 (a0, a1, v0, v1, v_min, v_max, n_bits, is_net_byte_order,
+ /* is_increment */ 0);
+
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+ u64 r0 = 0; /* warnings be gone */
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ switch (n_bits)
+ {
+#define _(n) \
+ case BITS (u##n): \
+ { \
+ u##n * r = random_data; \
+ r0 = r[0]; \
+ random_data = r + 1; \
+ } \
+ break;
+
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+
+#undef _
+ }
+
+ /* Add power of 2 sized random number which may be out of range. */
+ v0 += r0 & r_mask;
+
+ /* Twice should be enough to reduce to v_min .. v_max range. */
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+
+ if (want_sum)
+ sum += v0;
+
+ set_1 (a0, v0, v_min, v_max, n_bits, is_net_byte_order);
+
+ ASSERT (validate_buffer_data (b0, s));
+ }
+
+ if (want_sum)
+ *sum_result = sum;
+}
+
+#define _(i,t) \
+ clib_mem_unaligned (a##i, t) = \
+ clib_host_to_net_##t ((clib_net_to_host_mem_##t (a##i) &~ mask) \
+ | (v##i << shift))
+
+always_inline void
+setbits_1 (void *a0,
+ u64 v0,
+ u64 v_min, u64 v_max,
+ u32 max_bits, u32 n_bits, u64 mask, u32 shift)
+{
+ ASSERT (v0 >= v_min && v0 <= v_max);
+ if (max_bits == BITS (u8))
+ ((u8 *) a0)[0] = (((u8 *) a0)[0] & ~mask) | (v0 << shift);
+
+ else if (max_bits == BITS (u16))
+ {
+ _(0, u16);
+ }
+ else if (max_bits == BITS (u32))
+ {
+ _(0, u32);
+ }
+ else if (max_bits == BITS (u64))
+ {
+ _(0, u64);
+ }
+}
+
+always_inline void
+setbits_2 (void *a0, void *a1,
+ u64 v0, u64 v1,
+ u64 v_min, u64 v_max,
+ u32 max_bits, u32 n_bits, u64 mask, u32 shift, u32 is_increment)
+{
+ ASSERT (v0 >= v_min && v0 <= v_max);
+ ASSERT (v1 >= v_min && v1 <= v_max + is_increment);
+ if (max_bits == BITS (u8))
+ {
+ ((u8 *) a0)[0] = (((u8 *) a0)[0] & ~mask) | (v0 << shift);
+ ((u8 *) a1)[0] = (((u8 *) a1)[0] & ~mask) | (v1 << shift);
+ }
+
+ else if (max_bits == BITS (u16))
+ {
+ _(0, u16);
+ _(1, u16);
+ }
+ else if (max_bits == BITS (u32))
+ {
+ _(0, u32);
+ _(1, u32);
+ }
+ else if (max_bits == BITS (u64))
+ {
+ _(0, u64);
+ _(1, u64);
+ }
+}
+
+#undef _
+
+static_always_inline void
+do_setbits_fixed (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 max_bits,
+ u32 n_bits,
+ u32 byte_offset, u64 v_min, u64 v_max, u64 mask, u32 shift)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ setbits_2 (a0, a1,
+ v_min, v_min, v_min, v_max, max_bits, n_bits, mask, shift,
+ /* is_increment */ 0);
+
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ setbits_1 (a0, v_min, v_min, v_max, max_bits, n_bits, mask, shift);
+ ASSERT (validate_buffer_data (b0, s));
+ }
+}
+
+static_always_inline u64
+do_setbits_increment (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 max_bits,
+ u32 n_bits,
+ u32 byte_offset,
+ u64 v_min, u64 v_max, u64 v, u64 mask, u32 shift)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ ASSERT (v >= v_min && v <= v_max);
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+ u64 v_old;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ v_old = v;
+ v = v_old + 2;
+ v = v > v_max ? v_min : v;
+ setbits_2 (a0, a1,
+ v_old + 0, v_old + 1,
+ v_min, v_max, max_bits, n_bits, mask, shift,
+ /* is_increment */ 1);
+
+ if (PREDICT_FALSE (v_old + 1 > v_max))
+ {
+ v = v_old;
+ setbits_1 (a0, v + 0, v_min, v_max, max_bits, n_bits, mask, shift);
+ v += 1;
+
+ v = v > v_max ? v_min : v;
+ setbits_1 (a1, v + 0, v_min, v_max, max_bits, n_bits, mask, shift);
+ v += 1;
+ }
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+ u64 v_old;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ v_old = v;
+ v = v_old + 1;
+ v = v > v_max ? v_min : v;
+
+ ASSERT (v_old >= v_min && v_old <= v_max);
+ setbits_1 (a0, v_old, v_min, v_max, max_bits, n_bits, mask, shift);
+
+ ASSERT (validate_buffer_data (b0, s));
+ }
+
+ return v;
+}
+
+static_always_inline void
+do_setbits_random (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 max_bits,
+ u32 n_bits,
+ u32 byte_offset, u64 v_min, u64 v_max, u64 mask, u32 shift)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u64 v_diff = v_max - v_min + 1;
+ u64 r_mask = max_pow2 (v_diff) - 1;
+ u64 v0, v1;
+ void *random_data;
+
+ random_data = clib_random_buffer_get_data
+ (&vm->random_buffer, n_buffers * max_bits / BITS (u8));
+ v0 = v1 = v_min;
+
+ while (n_buffers >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ void *a0, *a1;
+ u64 r0 = 0, r1 = 0; /* warnings be gone */
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ b2 = vlib_get_buffer (vm, buffers[2]);
+ b3 = vlib_get_buffer (vm, buffers[3]);
+ buffers += 2;
+ n_buffers -= 2;
+
+ a0 = (void *) b0 + byte_offset;
+ a1 = (void *) b1 + byte_offset;
+ CLIB_PREFETCH ((void *) b2 + byte_offset, sizeof (v_min), WRITE);
+ CLIB_PREFETCH ((void *) b3 + byte_offset, sizeof (v_min), WRITE);
+
+ switch (max_bits)
+ {
+#define _(n) \
+ case BITS (u##n): \
+ { \
+ u##n * r = random_data; \
+ r0 = r[0]; \
+ r1 = r[1]; \
+ random_data = r + 2; \
+ } \
+ break;
+
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+
+#undef _
+ }
+
+ /* Add power of 2 sized random number which may be out of range. */
+ v0 += r0 & r_mask;
+ v1 += r1 & r_mask;
+
+ /* Twice should be enough to reduce to v_min .. v_max range. */
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v1 = v1 > v_max ? v1 - v_diff : v1;
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v1 = v1 > v_max ? v1 - v_diff : v1;
+
+ setbits_2 (a0, a1, v0, v1, v_min, v_max, max_bits, n_bits, mask, shift,
+ /* is_increment */ 0);
+
+ ASSERT (validate_buffer_data (b0, s));
+ ASSERT (validate_buffer_data (b1, s));
+ }
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b0;
+ void *a0;
+ u64 r0 = 0; /* warnings be gone */
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ buffers += 1;
+ n_buffers -= 1;
+
+ a0 = (void *) b0 + byte_offset;
+
+ switch (max_bits)
+ {
+#define _(n) \
+ case BITS (u##n): \
+ { \
+ u##n * r = random_data; \
+ r0 = r[0]; \
+ random_data = r + 1; \
+ } \
+ break;
+
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+
+#undef _
+ }
+
+ /* Add power of 2 sized random number which may be out of range. */
+ v0 += r0 & r_mask;
+
+ /* Twice should be enough to reduce to v_min .. v_max range. */
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+ v0 = v0 > v_max ? v0 - v_diff : v0;
+
+ setbits_1 (a0, v0, v_min, v_max, max_bits, n_bits, mask, shift);
+
+ ASSERT (validate_buffer_data (b0, s));
+ }
+}
+
+static u64
+do_it (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 lo_bit, u32 hi_bit,
+ u64 v_min, u64 v_max, u64 v, pg_edit_type_t edit_type)
+{
+ u32 max_bits, l0, l1, h1, start_bit;
+
+ if (v_min == v_max)
+ edit_type = PG_EDIT_FIXED;
+
+ l0 = lo_bit / BITS (u8);
+ l1 = lo_bit % BITS (u8);
+ h1 = hi_bit % BITS (u8);
+
+ start_bit = l0 * BITS (u8);
+
+ max_bits = hi_bit - start_bit;
+ ASSERT (max_bits <= 64);
+
+#define _(n) \
+ case (n): \
+ if (edit_type == PG_EDIT_INCREMENT) \
+ v = do_set_increment (pg, s, buffers, n_buffers, \
+ BITS (u##n), \
+ l0, \
+ /* is_net_byte_order */ 1, \
+ /* want sum */ 0, 0, \
+ v_min, v_max, \
+ v); \
+ else if (edit_type == PG_EDIT_RANDOM) \
+ do_set_random (pg, s, buffers, n_buffers, \
+ BITS (u##n), \
+ l0, \
+ /* is_net_byte_order */ 1, \
+ /* want sum */ 0, 0, \
+ v_min, v_max); \
+ else /* edit_type == PG_EDIT_FIXED */ \
+ do_set_fixed (pg, s, buffers, n_buffers, \
+ BITS (u##n), \
+ l0, \
+ /* is_net_byte_order */ 1, \
+ v_min, v_max); \
+ goto done;
+
+ if (l1 == 0 && h1 == 0)
+ {
+ switch (max_bits)
+ {
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+ }
+ }
+
+#undef _
+
+ {
+ u64 mask;
+ u32 shift = l1;
+ u32 n_bits = max_bits;
+
+ max_bits = clib_max (max_pow2 (n_bits), 8);
+
+ mask = ((u64) 1 << (u64) n_bits) - 1;
+ mask &= ~(((u64) 1 << (u64) shift) - 1);
+
+ mask <<= max_bits - n_bits;
+ shift += max_bits - n_bits;
+
+ switch (max_bits)
+ {
+#define _(n) \
+ case (n): \
+ if (edit_type == PG_EDIT_INCREMENT) \
+ v = do_setbits_increment (pg, s, buffers, n_buffers, \
+ BITS (u##n), n_bits, \
+ l0, v_min, v_max, v, \
+ mask, shift); \
+ else if (edit_type == PG_EDIT_RANDOM) \
+ do_setbits_random (pg, s, buffers, n_buffers, \
+ BITS (u##n), n_bits, \
+ l0, v_min, v_max, \
+ mask, shift); \
+ else /* edit_type == PG_EDIT_FIXED */ \
+ do_setbits_fixed (pg, s, buffers, n_buffers, \
+ BITS (u##n), n_bits, \
+ l0, v_min, v_max, \
+ mask, shift); \
+ goto done;
+
+ _(8);
+ _(16);
+ _(32);
+ _(64);
+
+#undef _
+ }
+ }
+
+done:
+ return v;
+}
+
+static void
+pg_generate_set_lengths (pg_main_t * pg,
+ pg_stream_t * s, u32 * buffers, u32 n_buffers)
+{
+ u64 v_min, v_max, length_sum;
+ pg_edit_type_t edit_type;
+
+ v_min = s->min_packet_bytes;
+ v_max = s->max_packet_bytes;
+ edit_type = s->packet_size_edit_type;
+
+ if (edit_type == PG_EDIT_INCREMENT)
+ s->last_increment_packet_size
+ = do_set_increment (pg, s, buffers, n_buffers,
+ 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length),
+ STRUCT_OFFSET_OF (vlib_buffer_t, current_length),
+ /* is_net_byte_order */ 0,
+ /* want sum */ 1, &length_sum,
+ v_min, v_max, s->last_increment_packet_size);
+
+ else if (edit_type == PG_EDIT_RANDOM)
+ do_set_random (pg, s, buffers, n_buffers,
+ 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length),
+ STRUCT_OFFSET_OF (vlib_buffer_t, current_length),
+ /* is_net_byte_order */ 0,
+ /* want sum */ 1, &length_sum,
+ v_min, v_max);
+
+ else /* edit_type == PG_EDIT_FIXED */
+ {
+ do_set_fixed (pg, s, buffers, n_buffers,
+ 8 * STRUCT_SIZE_OF (vlib_buffer_t, current_length),
+ STRUCT_OFFSET_OF (vlib_buffer_t, current_length),
+ /* is_net_byte_order */ 0,
+ v_min, v_max);
+ length_sum = v_min * n_buffers;
+ }
+
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si =
+ vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]);
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (),
+ si->sw_if_index, n_buffers, length_sum);
+ }
+
+}
+
+static void
+pg_generate_fix_multi_buffer_lengths (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers, u32 n_buffers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ pg_buffer_index_t *pbi;
+ uword n_bytes_left;
+ static u32 *unused_buffers = 0;
+
+ while (n_buffers > 0)
+ {
+ vlib_buffer_t *b;
+ u32 bi;
+
+ bi = buffers[0];
+ b = vlib_get_buffer (vm, bi);
+
+ /* Current length here is length of whole packet. */
+ n_bytes_left = b->current_length;
+
+ pbi = s->buffer_indices;
+ while (1)
+ {
+ uword n = clib_min (n_bytes_left, s->buffer_bytes);
+
+ b->current_length = n;
+ n_bytes_left -= n;
+ if (n_bytes_left > 0)
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ else
+ b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+
+ /* Return unused buffers to fifos. */
+ if (n == 0)
+ vec_add1 (unused_buffers, bi);
+
+ pbi++;
+ if (pbi >= vec_end (s->buffer_indices))
+ break;
+
+ bi = b->next_buffer;
+ b = vlib_get_buffer (vm, bi);
+ }
+ ASSERT (n_bytes_left == 0);
+
+ buffers += 1;
+ n_buffers -= 1;
+ }
+
+ if (vec_len (unused_buffers) > 0)
+ {
+ vlib_buffer_free_no_next (vm, unused_buffers, vec_len (unused_buffers));
+ _vec_len (unused_buffers) = 0;
+ }
+}
+
+static void
+pg_generate_edit (pg_main_t * pg,
+ pg_stream_t * s, u32 * buffers, u32 n_buffers)
+{
+ pg_edit_t *e;
+
+ vec_foreach (e, s->non_fixed_edits)
+ {
+ switch (e->type)
+ {
+ case PG_EDIT_RANDOM:
+ case PG_EDIT_INCREMENT:
+ {
+ u32 lo_bit, hi_bit;
+ u64 v_min, v_max;
+
+ v_min = pg_edit_get_value (e, PG_EDIT_LO);
+ v_max = pg_edit_get_value (e, PG_EDIT_HI);
+
+ hi_bit = (BITS (u8) * STRUCT_OFFSET_OF (vlib_buffer_t, data)
+ + BITS (u8) + e->lsb_bit_offset);
+ lo_bit = hi_bit - e->n_bits;
+
+ e->last_increment_value
+ = do_it (pg, s, buffers, n_buffers, lo_bit, hi_bit, v_min, v_max,
+ e->last_increment_value, e->type);
+ }
+ break;
+
+ case PG_EDIT_UNSPECIFIED:
+ break;
+
+ default:
+ /* Should not be any fixed edits left. */
+ ASSERT (0);
+ break;
+ }
+ }
+
+ /* Call any edit functions to e.g. completely IP lengths, checksums, ... */
+ {
+ int i;
+ for (i = vec_len (s->edit_groups) - 1; i >= 0; i--)
+ {
+ pg_edit_group_t *g = s->edit_groups + i;
+ if (g->edit_function)
+ g->edit_function (pg, s, g, buffers, n_buffers);
+ }
+ }
+}
+
+static void
+pg_set_next_buffer_pointers (pg_main_t * pg,
+ pg_stream_t * s,
+ u32 * buffers, u32 * next_buffers, u32 n_buffers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_buffers >= 4)
+ {
+ u32 ni0, ni1;
+ vlib_buffer_t *b0, *b1;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ b1 = vlib_get_buffer (vm, buffers[1]);
+ ni0 = next_buffers[0];
+ ni1 = next_buffers[1];
+
+ vlib_prefetch_buffer_with_index (vm, buffers[2], WRITE);
+ vlib_prefetch_buffer_with_index (vm, buffers[3], WRITE);
+
+ b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b1->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b0->next_buffer = ni0;
+ b1->next_buffer = ni1;
+
+ buffers += 2;
+ next_buffers += 2;
+ n_buffers -= 2;
+ }
+
+ while (n_buffers > 0)
+ {
+ u32 ni0;
+ vlib_buffer_t *b0;
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ ni0 = next_buffers[0];
+ buffers += 1;
+ next_buffers += 1;
+ n_buffers -= 1;
+
+ b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b0->next_buffer = ni0;
+ }
+}
+
+static_always_inline void
+init_replay_buffers_inline (vlib_main_t * vm,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers, u32 data_offset, u32 n_data)
+{
+ u32 n_left, *b, i, l;
+
+ n_left = n_buffers;
+ b = buffers;
+ i = s->current_replay_packet_index;
+ l = vec_len (s->replay_packet_templates);
+
+ while (n_left >= 1)
+ {
+ u32 bi0, n0;
+ vlib_buffer_t *b0;
+ u8 *d0;
+
+ bi0 = b[0];
+ b += 1;
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
+ /* was s->sw_if_index[VLIB_TX]; */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ d0 = vec_elt (s->replay_packet_templates, i);
+
+ n0 = n_data;
+ if (data_offset + n_data >= vec_len (d0))
+ n0 = vec_len (d0) > data_offset ? vec_len (d0) - data_offset : 0;
+
+ b0->current_length = n0;
+
+ clib_memcpy (b0->data, d0 + data_offset, n0);
+ i = i + 1 == l ? 0 : i + 1;
+ }
+}
+
+static_always_inline void
+init_buffers_inline (vlib_main_t * vm,
+ pg_stream_t * s,
+ u32 * buffers,
+ u32 n_buffers, u32 data_offset, u32 n_data, u32 set_data)
+{
+ u32 n_left, *b;
+ u8 *data, *mask;
+
+ if (vec_len (s->replay_packet_templates) > 0)
+ return init_replay_buffers_inline (vm, s, buffers, n_buffers, data_offset,
+ n_data);
+
+ data = s->fixed_packet_data + data_offset;
+ mask = s->fixed_packet_data_mask + data_offset;
+ if (data + n_data >= vec_end (s->fixed_packet_data))
+ n_data = (data < vec_end (s->fixed_packet_data)
+ ? vec_end (s->fixed_packet_data) - data : 0);
+ if (n_data > 0)
+ {
+ ASSERT (data + n_data <= vec_end (s->fixed_packet_data));
+ ASSERT (mask + n_data <= vec_end (s->fixed_packet_data_mask));
+ }
+
+ n_left = n_buffers;
+ b = buffers;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, b[2], STORE);
+ vlib_prefetch_buffer_with_index (vm, b[3], STORE);
+
+ bi0 = b[0];
+ bi1 = b[1];
+ b += 2;
+ n_left -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ if (set_data)
+ {
+ clib_memcpy (b0->data, data, n_data);
+ clib_memcpy (b1->data, data, n_data);
+ }
+ else
+ {
+ ASSERT (validate_buffer_data2 (b0, s, data_offset, n_data));
+ ASSERT (validate_buffer_data2 (b1, s, data_offset, n_data));
+ }
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = b[0];
+ b += 1;
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = s->sw_if_index[VLIB_RX];
+ /* s->sw_if_index[VLIB_TX]; */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ if (set_data)
+ clib_memcpy (b0->data, data, n_data);
+ else
+ ASSERT (validate_buffer_data2 (b0, s, data_offset, n_data));
+ }
+}
+
+static void
+pg_buffer_init (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl, u32 * buffers, u32 n_buffers)
+{
+ pg_main_t *pg = &pg_main;
+ pg_stream_t *s;
+ uword bi, si;
+
+ si = fl->buffer_init_function_opaque & pow2_mask (24);
+ bi = fl->buffer_init_function_opaque >> 24;
+
+ s = pool_elt_at_index (pg->streams, si);
+
+ init_buffers_inline (vm, s, buffers, n_buffers,
+ /* data_offset */ bi * s->buffer_bytes,
+ /* n_data */ s->buffer_bytes,
+ /* set_data */ 1);
+}
+
+static u32
+pg_stream_fill_helper (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_buffer_index_t * bi,
+ u32 * buffers, u32 * next_buffers, u32 n_alloc)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_free_list_t *f;
+ uword is_start_of_packet = bi == s->buffer_indices;
+ u32 n_allocated;
+
+ f = vlib_buffer_get_free_list (vm, bi->free_list_index);
+
+ /*
+ * Historically, the pg maintained its own free lists and
+ * device drivers tx paths would return pkts.
+ */
+ if (vm->buffer_main->callbacks_registered == 0 &&
+ !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE))
+ f->buffer_init_function = pg_buffer_init;
+ f->buffer_init_function_opaque =
+ (s - pg->streams) | ((bi - s->buffer_indices) << 24);
+
+ if (is_start_of_packet)
+ vnet_buffer (&f->buffer_init_template)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ n_allocated = vlib_buffer_alloc_from_free_list (vm,
+ buffers,
+ n_alloc,
+ bi->free_list_index);
+ if (n_allocated == 0)
+ return 0;
+
+ /*
+ * We can't assume we got all the buffers we asked for...
+ * This never worked until recently.
+ */
+ n_alloc = n_allocated;
+
+ /* Reinitialize buffers */
+ if (vm->buffer_main->callbacks_registered == 0 || CLIB_DEBUG > 0
+ || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE))
+ init_buffers_inline
+ (vm, s,
+ buffers,
+ n_alloc, (bi - s->buffer_indices) * s->buffer_bytes /* data offset */ ,
+ s->buffer_bytes,
+ /* set_data */
+ vm->buffer_main->callbacks_registered != 0
+ || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0);
+
+ if (next_buffers)
+ pg_set_next_buffer_pointers (pg, s, buffers, next_buffers, n_alloc);
+
+ if (is_start_of_packet)
+ {
+ if (vec_len (s->replay_packet_templates) > 0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si =
+ vnet_get_sw_interface (vnm, s->sw_if_index[VLIB_RX]);
+ u32 l = 0;
+ u32 i;
+ for (i = 0; i < n_alloc; i++)
+ l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (),
+ si->sw_if_index, n_alloc, l);
+ s->current_replay_packet_index += n_alloc;
+ s->current_replay_packet_index %=
+ vec_len (s->replay_packet_templates);
+ }
+ else
+ {
+ pg_generate_set_lengths (pg, s, buffers, n_alloc);
+ if (vec_len (s->buffer_indices) > 1)
+ pg_generate_fix_multi_buffer_lengths (pg, s, buffers, n_alloc);
+
+ pg_generate_edit (pg, s, buffers, n_alloc);
+ }
+ }
+
+ return n_alloc;
+}
+
+static u32
+pg_stream_fill (pg_main_t * pg, pg_stream_t * s, u32 n_buffers)
+{
+ pg_buffer_index_t *bi;
+ word i, n_in_fifo, n_alloc, n_free, n_added;
+ u32 *tail, *start, *end, *last_tail, *last_start;
+
+ bi = s->buffer_indices;
+
+ n_in_fifo = clib_fifo_elts (bi->buffer_fifo);
+ if (n_in_fifo >= n_buffers)
+ return n_in_fifo;
+
+ n_alloc = n_buffers - n_in_fifo;
+
+ /* Round up, but never generate more than limit. */
+ n_alloc = clib_max (VLIB_FRAME_SIZE, n_alloc);
+
+ if (s->n_packets_limit > 0
+ && s->n_packets_generated + n_in_fifo + n_alloc >= s->n_packets_limit)
+ {
+ n_alloc = s->n_packets_limit - s->n_packets_generated - n_in_fifo;
+ if (n_alloc < 0)
+ n_alloc = 0;
+ }
+
+ /* All buffer fifos should have the same size. */
+ if (CLIB_DEBUG > 0)
+ {
+ uword l = ~0, e;
+ vec_foreach (bi, s->buffer_indices)
+ {
+ e = clib_fifo_elts (bi->buffer_fifo);
+ if (bi == s->buffer_indices)
+ l = e;
+ ASSERT (l == e);
+ }
+ }
+
+ last_tail = last_start = 0;
+ n_added = n_alloc;
+
+ for (i = vec_len (s->buffer_indices) - 1; i >= 0; i--)
+ {
+ bi = vec_elt_at_index (s->buffer_indices, i);
+
+ n_free = clib_fifo_free_elts (bi->buffer_fifo);
+ if (n_free < n_alloc)
+ clib_fifo_resize (bi->buffer_fifo, n_alloc - n_free);
+
+ tail = clib_fifo_advance_tail (bi->buffer_fifo, n_alloc);
+ start = bi->buffer_fifo;
+ end = clib_fifo_end (bi->buffer_fifo);
+
+ if (tail + n_alloc <= end)
+ {
+ n_added =
+ pg_stream_fill_helper (pg, s, bi, tail, last_tail, n_alloc);
+ }
+ else
+ {
+ u32 n = clib_min (end - tail, n_alloc);
+ n_added = pg_stream_fill_helper (pg, s, bi, tail, last_tail, n);
+
+ if (n_added == n && n_alloc > n_added)
+ {
+ n_added += pg_stream_fill_helper
+ (pg, s, bi, start, last_start, n_alloc - n_added);
+ }
+ }
+
+ if (PREDICT_FALSE (n_added < n_alloc))
+ tail = clib_fifo_advance_tail (bi->buffer_fifo, n_added - n_alloc);
+
+ last_tail = tail;
+ last_start = start;
+
+ /* Verify that pkts in the fifo are properly allocated */
+ }
+
+ return n_in_fifo + n_added;
+}
+
+typedef struct
+{
+ u32 stream_index;
+
+ u32 packet_length;
+ u32 sw_if_index;
+
+ /* Use pre data for packet data. */
+ vlib_buffer_t buffer;
+} pg_input_trace_t;
+
+static u8 *
+format_pg_input_trace (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ pg_input_trace_t *t = va_arg (*va, pg_input_trace_t *);
+ pg_main_t *pg = &pg_main;
+ pg_stream_t *stream;
+ vlib_node_t *n;
+ uword indent = format_get_indent (s);
+
+ stream = 0;
+ if (!pool_is_free_index (pg->streams, t->stream_index))
+ stream = pool_elt_at_index (pg->streams, t->stream_index);
+
+ if (stream)
+ s = format (s, "stream %v", pg->streams[t->stream_index].name);
+ else
+ s = format (s, "stream %d", t->stream_index);
+
+ s = format (s, ", %d bytes", t->packet_length);
+ s = format (s, ", %d sw_if_index", t->sw_if_index);
+
+ s = format (s, "\n%U%U",
+ format_white_space, indent, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U", format_white_space, indent);
+
+ n = 0;
+ if (stream)
+ n = vlib_get_node (vm, stream->node_index);
+
+ if (n && n->format_buffer)
+ s = format (s, "%U", n->format_buffer,
+ t->buffer.pre_data, sizeof (t->buffer.pre_data));
+ else
+ s = format (s, "%U",
+ format_hex_bytes, t->buffer.pre_data,
+ ARRAY_LEN (t->buffer.pre_data));
+ return s;
+}
+
+static void
+pg_input_trace (pg_main_t * pg,
+ vlib_node_runtime_t * node,
+ pg_stream_t * s, u32 * buffers, u32 n_buffers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 *b, n_left, stream_index, next_index;
+
+ n_left = n_buffers;
+ b = buffers;
+ stream_index = s - pg->streams;
+ next_index = s->next_index;
+
+ while (n_left >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ pg_input_trace_t *t0, *t1;
+
+ bi0 = b[0];
+ bi1 = b[1];
+ b += 2;
+ n_left -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
+ vlib_trace_buffer (vm, node, next_index, b1, /* follow_chain */ 1);
+
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+
+ t0->stream_index = stream_index;
+ t1->stream_index = stream_index;
+
+ t0->packet_length = vlib_buffer_length_in_chain (vm, b0);
+ t1->packet_length = vlib_buffer_length_in_chain (vm, b1);
+
+ t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ clib_memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b1->pre_data));
+
+ clib_memcpy (t0->buffer.pre_data, b0->data,
+ sizeof (t0->buffer.pre_data));
+ clib_memcpy (t1->buffer.pre_data, b1->data,
+ sizeof (t1->buffer.pre_data));
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ pg_input_trace_t *t0;
+
+ bi0 = b[0];
+ b += 1;
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 1);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+
+ t0->stream_index = stream_index;
+ t0->packet_length = vlib_buffer_length_in_chain (vm, b0);
+ t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
+ clib_memcpy (t0->buffer.pre_data, b0->data,
+ sizeof (t0->buffer.pre_data));
+ }
+}
+
+static uword
+pg_generate_packets (vlib_node_runtime_t * node,
+ pg_main_t * pg,
+ pg_stream_t * s, uword n_packets_to_generate)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 *to_next, n_this_frame, n_left, n_trace, n_packets_in_fifo;
+ uword n_packets_generated;
+ pg_buffer_index_t *bi, *bi0;
+ u32 next_index = s->next_index;
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u8 feature_arc_index = fm->device_input_feature_arc_index;
+ cm = &fm->feature_config_mains[feature_arc_index];
+ u32 current_config_index = ~(u32) 0;
+ int i;
+
+ bi0 = s->buffer_indices;
+
+ n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate);
+ n_packets_to_generate = clib_min (n_packets_in_fifo, n_packets_to_generate);
+ n_packets_generated = 0;
+
+ if (PREDICT_FALSE
+ (vnet_have_features (feature_arc_index, s->sw_if_index[VLIB_RX])))
+ {
+ current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, s->sw_if_index[VLIB_RX]);
+ vnet_get_config_data (&cm->config_main, &current_config_index,
+ &next_index, 0);
+ }
+
+ while (n_packets_to_generate > 0)
+ {
+ u32 *head, *start, *end;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left);
+
+ n_this_frame = n_packets_to_generate;
+ if (n_this_frame > n_left)
+ n_this_frame = n_left;
+
+ start = bi0->buffer_fifo;
+ end = clib_fifo_end (bi0->buffer_fifo);
+ head = clib_fifo_head (bi0->buffer_fifo);
+
+ if (head + n_this_frame <= end)
+ vlib_copy_buffers (to_next, head, n_this_frame);
+ else
+ {
+ u32 n = end - head;
+ vlib_copy_buffers (to_next + 0, head, n);
+ vlib_copy_buffers (to_next + n, start, n_this_frame - n);
+ }
+
+ vec_foreach (bi, s->buffer_indices)
+ clib_fifo_advance_head (bi->buffer_fifo, n_this_frame);
+
+ if (current_config_index != ~(u32) 0)
+ for (i = 0; i < n_this_frame; i++)
+ {
+ vlib_buffer_t *b;
+ b = vlib_get_buffer (vm, to_next[i]);
+ vnet_buffer (b)->device_input_feat.saved_next_index =
+ s->next_index;
+ vnet_buffer (b)->device_input_feat.buffer_advance = 0;
+ b->current_config_index = current_config_index;
+ b->feature_arc_index = feature_arc_index;
+ }
+
+ n_trace = vlib_get_trace_count (vm, node);
+ if (n_trace > 0)
+ {
+ u32 n = clib_min (n_trace, n_this_frame);
+ pg_input_trace (pg, node, s, to_next, n);
+ vlib_set_trace_count (vm, node, n_trace - n);
+ }
+ n_packets_to_generate -= n_this_frame;
+ n_packets_generated += n_this_frame;
+ n_left -= n_this_frame;
+ vlib_put_next_frame (vm, node, next_index, n_left);
+ }
+
+ return n_packets_generated;
+}
+
+static uword
+pg_input_stream (vlib_node_runtime_t * node, pg_main_t * pg, pg_stream_t * s)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ uword n_packets;
+ f64 time_now, dt;
+
+ if (s->n_packets_limit > 0 && s->n_packets_generated >= s->n_packets_limit)
+ {
+ pg_stream_enable_disable (pg, s, /* want_enabled */ 0);
+ return 0;
+ }
+
+ /* Apply rate limit. */
+ time_now = vlib_time_now (vm);
+ if (s->time_last_generate == 0)
+ s->time_last_generate = time_now;
+
+ dt = time_now - s->time_last_generate;
+ s->time_last_generate = time_now;
+
+ n_packets = VLIB_FRAME_SIZE;
+ if (s->rate_packets_per_second > 0)
+ {
+ s->packet_accumulator += dt * s->rate_packets_per_second;
+ n_packets = s->packet_accumulator;
+
+ /* Never allow accumulator to grow if we get behind. */
+ s->packet_accumulator -= n_packets;
+ }
+
+ /* Apply fixed limit. */
+ if (s->n_packets_limit > 0
+ && s->n_packets_generated + n_packets > s->n_packets_limit)
+ n_packets = s->n_packets_limit - s->n_packets_generated;
+
+ /* Generate up to one frame's worth of packets. */
+ if (n_packets > VLIB_FRAME_SIZE)
+ n_packets = VLIB_FRAME_SIZE;
+
+ if (n_packets > 0)
+ n_packets = pg_generate_packets (node, pg, s, n_packets);
+
+ s->n_packets_generated += n_packets;
+
+ return n_packets;
+}
+
+uword
+pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ uword i;
+ pg_main_t *pg = &pg_main;
+ uword n_packets = 0;
+ u32 worker_index = 0;
+
+ if (vlib_num_workers ())
+ worker_index = vlib_get_current_worker_index ();
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, pg->enabled_streams[worker_index], ({
+ pg_stream_t *s = vec_elt_at_index (pg->streams, i);
+ n_packets += pg_input_stream (node, pg, s);
+ }));
+ /* *INDENT-ON* */
+
+ return n_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (pg_input_node) = {
+ .function = pg_input,
+ .name = "pg-input",
+ .sibling_of = "device-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+
+ .format_trace = format_pg_input_trace,
+
+ /* Input node will be left disabled until a stream is active. */
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/output.c b/src/vnet/pg/output.c
new file mode 100644
index 00000000..ab57deef
--- /dev/null
+++ b/src/vnet/pg/output.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_output.c: packet generator output
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vppinfra/string.h>
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+
+uword
+pg_output (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ pg_main_t *pg = &pg_main;
+ u32 *buffers = vlib_frame_args (frame);
+ uword n_buffers = frame->n_vectors;
+ uword n_left = n_buffers;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ pg_interface_t *pif = pool_elt_at_index (pg->interfaces, rd->dev_instance);
+
+ if (PREDICT_FALSE (pif->lockp != 0))
+ while (__sync_lock_test_and_set (pif->lockp, 1))
+ ;
+
+ while (n_left > 0)
+ {
+ n_left--;
+ u32 bi0 = buffers[0];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi0);
+ buffers++;
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ pg_output_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->buffer_index = bi0;
+ clib_memcpy (&t->buffer, b, sizeof (b[0]) - sizeof (b->pre_data));
+ clib_memcpy (t->buffer.pre_data, b->data + b->current_data,
+ sizeof (t->buffer.pre_data));
+ }
+
+ if (pif->pcap_file_name != 0)
+ pcap_add_buffer (&pif->pcap_main, vm, bi0, ETHERNET_MAX_PACKET_BYTES);
+ }
+ if (pif->pcap_file_name != 0)
+ pcap_write (&pif->pcap_main);
+
+
+ vlib_buffer_free (vm, vlib_frame_args (frame), n_buffers);
+ if (PREDICT_FALSE (pif->lockp != 0))
+ *pif->lockp = 0;
+ return n_buffers;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h
new file mode 100644
index 00000000..111df91a
--- /dev/null
+++ b/src/vnet/pg/pg.h
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg.h: VLIB packet generator
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_pg_h
+#define included_vlib_pg_h
+
+#include <vlib/vlib.h> /* for VLIB_N_RX_TX */
+#include <vnet/pg/edit.h>
+#include <vppinfra/fifo.h> /* for buffer_fifo */
+#include <vnet/unix/pcap.h>
+#include <vnet/interface.h>
+
+extern vnet_device_class_t pg_dev_class;
+
+struct pg_main_t;
+struct pg_stream_t;
+
+typedef struct pg_edit_group_t
+{
+ /* Edits in this group. */
+ pg_edit_t *edits;
+
+ /* Vector of non-fixed edits for this group. */
+ pg_edit_t *non_fixed_edits;
+
+ /* Fixed edits for this group. */
+ u8 *fixed_packet_data;
+ u8 *fixed_packet_data_mask;
+
+ /* Byte offset where packet data begins. */
+ u32 start_byte_offset;
+
+ /* Number of packet bytes for this edit group. */
+ u32 n_packet_bytes;
+
+ /* Function to perform miscellaneous edits (e.g. set IP checksum, ...). */
+ void (*edit_function) (struct pg_main_t * pg,
+ struct pg_stream_t * s,
+ struct pg_edit_group_t * g,
+ u32 * buffers, u32 n_buffers);
+
+ /* Opaque data for edit function's use. */
+ uword edit_function_opaque;
+} pg_edit_group_t;
+
+/* Packets are made of multiple buffers chained together.
+ This struct keeps track of data per-chain index. */
+typedef struct
+{
+ /* Vector of buffer edits for this stream and buffer index. */
+ pg_edit_t *edits;
+
+ /* Buffers pre-initialized with fixed buffer data for this stream. */
+ u32 *buffer_fifo;
+
+ /* Buffer free list for this buffer index in stream. */
+ u32 free_list_index;
+} pg_buffer_index_t;
+
+typedef struct pg_stream_t
+{
+ /* Stream name. */
+ u8 *name;
+
+ u32 flags;
+
+ /* Stream is currently enabled. */
+#define PG_STREAM_FLAGS_IS_ENABLED (1 << 0)
+#define PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE (1 << 1)
+
+ /* Edit groups are created by each protocol level (e.g. ethernet,
+ ip4, tcp, ...). */
+ pg_edit_group_t *edit_groups;
+
+ pg_edit_type_t packet_size_edit_type;
+
+ /* Min/max packet size. */
+ u32 min_packet_bytes, max_packet_bytes;
+
+ /* Vector of non-fixed edits for this stream.
+ All fixed edits are performed and placed into fixed_packet_data. */
+ pg_edit_t *non_fixed_edits;
+
+ /* Packet data with all fixed edits performed.
+ All packets in stream are initialized according with this data.
+ Mask specifies which bits of packet data are covered by fixed edits. */
+ u8 *fixed_packet_data, *fixed_packet_data_mask;
+
+ /* Size to use for buffers. 0 means use buffers big enough
+ for max_packet_bytes. */
+ u32 buffer_bytes;
+
+ /* Last packet length if packet size edit type is increment. */
+ u32 last_increment_packet_size;
+
+ /* Index into main interface pool for this stream. */
+ u32 pg_if_index;
+
+ /* Interface used to mark packets for this stream. May be different
+ than hw/sw index from pg main interface pool. They will be
+ different if this stream is being used generate buffers as if
+ they were received on a non-pg interface. For example, suppose you
+ are trying to test vlan code and you want to generate buffers that
+ appear to come from an ethernet interface. */
+ u32 sw_if_index[VLIB_N_RX_TX];
+
+ /* Node where stream's buffers get put. */
+ u32 node_index;
+
+ /* Worker thread index */
+ u32 worker_index;
+
+ /* Output next index to reach output node from stream input node. */
+ u32 next_index;
+
+ u32 if_id;
+
+ /* Number of packets currently generated. */
+ u64 n_packets_generated;
+
+ /* Stream is disabled when packet limit is reached.
+ Zero means no packet limit. */
+ u64 n_packets_limit;
+
+ /* Rate for this stream in packets/second.
+ Zero means unlimited rate. */
+ f64 rate_packets_per_second;
+
+ f64 time_last_generate;
+
+ f64 packet_accumulator;
+
+ pg_buffer_index_t *buffer_indices;
+
+ u8 **replay_packet_templates;
+ u32 current_replay_packet_index;
+} pg_stream_t;
+
+always_inline void
+pg_buffer_index_free (pg_buffer_index_t * bi)
+{
+ vec_free (bi->edits);
+ clib_fifo_free (bi->buffer_fifo);
+}
+
+always_inline void
+pg_edit_group_free (pg_edit_group_t * g)
+{
+ pg_edit_t *e;
+ vec_foreach (e, g->edits) pg_edit_free (e);
+ vec_free (g->edits);
+ vec_free (g->fixed_packet_data);
+ vec_free (g->fixed_packet_data_mask);
+}
+
+always_inline void
+pg_stream_free (pg_stream_t * s)
+{
+ pg_edit_group_t *g;
+ pg_edit_t *e;
+ vec_foreach (e, s->non_fixed_edits) pg_edit_free (e);
+ vec_free (s->non_fixed_edits);
+ vec_foreach (g, s->edit_groups) pg_edit_group_free (g);
+ vec_free (s->edit_groups);
+ vec_free (s->fixed_packet_data);
+ vec_free (s->fixed_packet_data_mask);
+ vec_free (s->name);
+
+ {
+ pg_buffer_index_t *bi;
+ vec_foreach (bi, s->buffer_indices) pg_buffer_index_free (bi);
+ vec_free (s->buffer_indices);
+ }
+}
+
+always_inline int
+pg_stream_is_enabled (pg_stream_t * s)
+{
+ return (s->flags & PG_STREAM_FLAGS_IS_ENABLED) != 0;
+}
+
+always_inline pg_edit_group_t *
+pg_stream_get_group (pg_stream_t * s, u32 group_index)
+{
+ return vec_elt_at_index (s->edit_groups, group_index);
+}
+
+always_inline void *
+pg_create_edit_group (pg_stream_t * s,
+ int n_edit_bytes, int n_packet_bytes, u32 * group_index)
+{
+ pg_edit_group_t *g;
+ int n_edits;
+
+ vec_add2 (s->edit_groups, g, 1);
+ if (group_index)
+ *group_index = g - s->edit_groups;
+
+ ASSERT (n_edit_bytes % sizeof (pg_edit_t) == 0);
+ n_edits = n_edit_bytes / sizeof (pg_edit_t);
+ vec_resize (g->edits, n_edits);
+
+ g->n_packet_bytes = n_packet_bytes;
+
+ return g->edits;
+}
+
+always_inline void *
+pg_add_edits (pg_stream_t * s, int n_edit_bytes, int n_packet_bytes,
+ u32 group_index)
+{
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ pg_edit_t *e;
+ int n_edits;
+ ASSERT (n_edit_bytes % sizeof (pg_edit_t) == 0);
+ n_edits = n_edit_bytes / sizeof (pg_edit_t);
+ vec_add2 (g->edits, e, n_edits);
+ g->n_packet_bytes += n_packet_bytes;
+ return e;
+}
+
+always_inline void *
+pg_get_edit_group (pg_stream_t * s, u32 group_index)
+{
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ return g->edits;
+}
+
+/* Number of bytes for all groups >= given group. */
+always_inline uword
+pg_edit_group_n_bytes (pg_stream_t * s, u32 group_index)
+{
+ pg_edit_group_t *g;
+ uword n_bytes = 0;
+
+ for (g = s->edit_groups + group_index; g < vec_end (s->edit_groups); g++)
+ n_bytes += g->n_packet_bytes;
+ return n_bytes;
+}
+
+always_inline void
+pg_free_edit_group (pg_stream_t * s)
+{
+ uword i = vec_len (s->edit_groups) - 1;
+ pg_edit_group_t *g = pg_stream_get_group (s, i);
+
+ pg_edit_group_free (g);
+ memset (g, 0, sizeof (g[0]));
+ _vec_len (s->edit_groups) = i;
+}
+
+typedef struct
+{
+ /* TX lock */
+ volatile u32 *lockp;
+
+ /* VLIB interface indices. */
+ u32 hw_if_index, sw_if_index;
+
+ /* Identifies stream for this interface. */
+ u32 id;
+
+ pcap_main_t pcap_main;
+ u8 *pcap_file_name;
+} pg_interface_t;
+
+/* Per VLIB node data. */
+typedef struct
+{
+ /* Parser function indexed by node index. */
+ unformat_function_t *unformat_edit;
+} pg_node_t;
+
+typedef struct pg_main_t
+{
+ /* Pool of streams. */
+ pg_stream_t *streams;
+
+ /* Bitmap indicating which streams are currently enabled. */
+ uword **enabled_streams;
+
+ /* Hash mapping name -> stream index. */
+ uword *stream_index_by_name;
+
+ /* Pool of interfaces. */
+ pg_interface_t *interfaces;
+ uword *if_index_by_if_id;
+
+ /* Per VLIB node information. */
+ pg_node_t *nodes;
+} pg_main_t;
+
+/* Global main structure. */
+extern pg_main_t pg_main;
+
+/* Global node. */
+extern vlib_node_registration_t pg_input_node;
+
+/* Buffer generator input, output node functions. */
+vlib_node_function_t pg_input, pg_output;
+
+/* Stream add/delete. */
+void pg_stream_del (pg_main_t * pg, uword index);
+void pg_stream_add (pg_main_t * pg, pg_stream_t * s_init);
+
+/* Enable/disable stream. */
+void pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s,
+ int is_enable);
+
+/* Find/create free packet-generator interface index. */
+u32 pg_interface_add_or_get (pg_main_t * pg, uword stream_index);
+
+always_inline pg_node_t *
+pg_get_node (uword node_index)
+{
+ pg_main_t *pg = &pg_main;
+ vec_validate (pg->nodes, node_index);
+ return pg->nodes + node_index;
+}
+
+void pg_edit_group_get_fixed_packet_data (pg_stream_t * s,
+ u32 group_index,
+ void *fixed_packet_data,
+ void *fixed_packet_data_mask);
+
+void pg_enable_disable (u32 stream_index, int is_enable);
+
+typedef struct
+{
+ u32 hw_if_index;
+ u32 dev_instance;
+ u8 is_enabled;
+ u8 *pcap_file_name;
+ u32 count;
+} pg_capture_args_t;
+
+clib_error_t *pg_capture (pg_capture_args_t * a);
+
+typedef struct
+{
+ vlib_buffer_t buffer;
+ u32 buffer_index;
+}
+pg_output_trace_t;
+
+#endif /* included_vlib_pg_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c
new file mode 100644
index 00000000..a540b32b
--- /dev/null
+++ b/src/vnet/pg/stream.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pg_stream.c: packet generator streams
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/devices/devices.h>
+
+/* Mark stream active or inactive. */
+void
+pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s, int want_enabled)
+{
+ vlib_main_t *vm;
+ vnet_main_t *vnm = vnet_get_main ();
+ pg_interface_t *pi = pool_elt_at_index (pg->interfaces, s->pg_if_index);
+
+ want_enabled = want_enabled != 0;
+
+ if (pg_stream_is_enabled (s) == want_enabled)
+ /* No change necessary. */
+ return;
+
+ if (want_enabled)
+ s->n_packets_generated = 0;
+
+ /* Toggle enabled flag. */
+ s->flags ^= PG_STREAM_FLAGS_IS_ENABLED;
+
+ ASSERT (!pool_is_free (pg->streams, s));
+
+ vec_validate (pg->enabled_streams, s->worker_index);
+ pg->enabled_streams[s->worker_index] =
+ clib_bitmap_set (pg->enabled_streams[s->worker_index], s - pg->streams,
+ want_enabled);
+
+ if (want_enabled)
+ {
+ vnet_hw_interface_set_flags (vnm, pi->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ vnet_sw_interface_set_flags (vnm, pi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ }
+
+ if (vlib_num_workers ())
+ vm = vlib_get_worker_vlib_main (s->worker_index);
+ else
+ vm = vlib_get_main ();
+
+ vlib_node_set_state (vm, pg_input_node.index,
+ (clib_bitmap_is_zero
+ (pg->enabled_streams[s->worker_index]) ?
+ VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING));
+
+ s->packet_accumulator = 0;
+ s->time_last_generate = 0;
+}
+
+static u8 *
+format_pg_output_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ pg_output_trace_t *t = va_arg (*va, pg_output_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%Ubuffer 0x%x: %U",
+ format_white_space, indent,
+ t->buffer_index, format_vlib_buffer, &t->buffer);
+
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ethernet_header_with_length, t->buffer.pre_data,
+ sizeof (t->buffer.pre_data));
+
+ return s;
+}
+
+static u8 *
+format_pg_interface_name (u8 * s, va_list * args)
+{
+ pg_main_t *pg = &pg_main;
+ u32 if_index = va_arg (*args, u32);
+ pg_interface_t *pi;
+
+ pi = pool_elt_at_index (pg->interfaces, if_index);
+ s = format (s, "pg%d", pi->id);
+
+ return s;
+}
+
+static clib_error_t *
+pg_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = 0;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (pg_dev_class) = {
+ .name = "pg",
+ .tx_function = pg_output,
+ .format_device_name = format_pg_interface_name,
+ .format_tx_trace = format_pg_output_trace,
+ .admin_up_down_function = pg_interface_admin_up_down,
+};
+/* *INDENT-ON* */
+
+static u8 *
+pg_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ u8 *rewrite = NULL;
+ u16 *h;
+
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (u16 *) rewrite;
+ h[0] = clib_host_to_net_u16 (vnet_link_to_l3_proto (link_type));
+
+ return (rewrite);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
+ .name = "Packet generator",
+ .build_rewrite = pg_build_rewrite,
+};
+/* *INDENT-ON* */
+
+static u32
+pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+u32
+pg_interface_add_or_get (pg_main_t * pg, uword if_id)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ pg_interface_t *pi;
+ vnet_hw_interface_t *hi;
+ uword *p;
+ u32 i;
+
+ p = hash_get (pg->if_index_by_if_id, if_id);
+
+ if (p)
+ {
+ return p[0];
+ }
+ else
+ {
+ u8 hw_addr[6];
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+
+ pool_get (pg->interfaces, pi);
+ i = pi - pg->interfaces;
+
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+ clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+
+ pi->id = if_id;
+ ethernet_register_interface (vnm, pg_dev_class.index, i, hw_addr,
+ &pi->hw_if_index, pg_eth_flag_change);
+ hi = vnet_get_hw_interface (vnm, pi->hw_if_index);
+ pi->sw_if_index = hi->sw_if_index;
+
+ hash_set (pg->if_index_by_if_id, if_id, i);
+
+ if (vlib_num_workers ())
+ {
+ pi->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ *pi->lockp = 0;
+ }
+ }
+
+ return i;
+}
+
+static void
+do_edit (pg_stream_t * stream,
+ pg_edit_group_t * g, pg_edit_t * e, uword want_commit)
+{
+ u32 i, i0, i1, mask, n_bits_left;
+ u8 *v, *s, *m;
+
+ i0 = e->lsb_bit_offset / BITS (u8);
+
+ /* Make space for edit in value and mask. */
+ vec_validate (g->fixed_packet_data, i0);
+ vec_validate (g->fixed_packet_data_mask, i0);
+
+ if (e->type != PG_EDIT_FIXED)
+ {
+ switch (e->type)
+ {
+ case PG_EDIT_RANDOM:
+ case PG_EDIT_INCREMENT:
+ e->last_increment_value = pg_edit_get_value (e, PG_EDIT_LO);
+ break;
+
+ default:
+ break;
+ }
+
+ if (want_commit)
+ {
+ ASSERT (e->type != PG_EDIT_INVALID_TYPE);
+ vec_add1 (g->non_fixed_edits, e[0]);
+ }
+ return;
+ }
+
+ s = g->fixed_packet_data;
+ m = g->fixed_packet_data_mask;
+
+ n_bits_left = e->n_bits;
+ i0 = e->lsb_bit_offset / BITS (u8);
+ i1 = e->lsb_bit_offset % BITS (u8);
+
+ v = e->values[PG_EDIT_LO];
+ i = pg_edit_n_alloc_bytes (e) - 1;
+
+ /* Odd low order bits?. */
+ if (i1 != 0 && n_bits_left > 0)
+ {
+ u32 n = clib_min (n_bits_left, BITS (u8) - i1);
+
+ mask = pow2_mask (n) << i1;
+
+ ASSERT (i0 < vec_len (s));
+ ASSERT (i < vec_len (v));
+ ASSERT ((v[i] & ~mask) == 0);
+
+ s[i0] |= v[i] & mask;
+ m[i0] |= mask;
+
+ i0--;
+ i--;
+ n_bits_left -= n;
+ }
+
+ /* Even bytes. */
+ while (n_bits_left >= 8)
+ {
+ ASSERT (i0 < vec_len (s));
+ ASSERT (i < vec_len (v));
+
+ s[i0] = v[i];
+ m[i0] = ~0;
+
+ i0--;
+ i--;
+ n_bits_left -= 8;
+ }
+
+ /* Odd high order bits. */
+ if (n_bits_left > 0)
+ {
+ mask = pow2_mask (n_bits_left);
+
+ ASSERT (i0 < vec_len (s));
+ ASSERT (i < vec_len (v));
+ ASSERT ((v[i] & ~mask) == 0);
+
+ s[i0] |= v[i] & mask;
+ m[i0] |= mask;
+ }
+
+ if (want_commit)
+ pg_edit_free (e);
+}
+
+void
+pg_edit_group_get_fixed_packet_data (pg_stream_t * s,
+ u32 group_index,
+ void *packet_data,
+ void *packet_data_mask)
+{
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ pg_edit_t *e;
+
+ vec_foreach (e, g->edits) do_edit (s, g, e, /* want_commit */ 0);
+
+ clib_memcpy (packet_data, g->fixed_packet_data,
+ vec_len (g->fixed_packet_data));
+ clib_memcpy (packet_data_mask, g->fixed_packet_data_mask,
+ vec_len (g->fixed_packet_data_mask));
+}
+
+static void
+perform_fixed_edits (pg_stream_t * s)
+{
+ pg_edit_group_t *g;
+ pg_edit_t *e;
+ word i;
+
+ for (i = vec_len (s->edit_groups) - 1; i >= 0; i--)
+ {
+ g = vec_elt_at_index (s->edit_groups, i);
+ vec_foreach (e, g->edits) do_edit (s, g, e, /* want_commit */ 1);
+
+ /* All edits have either been performed or added to
+ g->non_fixed_edits. So, we can delete the vector. */
+ vec_free (g->edits);
+ }
+
+ vec_free (s->fixed_packet_data_mask);
+ vec_free (s->fixed_packet_data);
+ vec_foreach (g, s->edit_groups)
+ {
+ int i;
+ g->start_byte_offset = vec_len (s->fixed_packet_data);
+
+ /* Relocate and copy non-fixed edits from group to stream. */
+ vec_foreach (e, g->non_fixed_edits)
+ e->lsb_bit_offset += g->start_byte_offset * BITS (u8);
+
+ for (i = 0; i < vec_len (g->non_fixed_edits); i++)
+ ASSERT (g->non_fixed_edits[i].type != PG_EDIT_INVALID_TYPE);
+
+ vec_add (s->non_fixed_edits,
+ g->non_fixed_edits, vec_len (g->non_fixed_edits));
+ vec_free (g->non_fixed_edits);
+
+ vec_add (s->fixed_packet_data,
+ g->fixed_packet_data, vec_len (g->fixed_packet_data));
+ vec_add (s->fixed_packet_data_mask,
+ g->fixed_packet_data_mask, vec_len (g->fixed_packet_data_mask));
+ }
+}
+
+void
+pg_stream_add (pg_main_t * pg, pg_stream_t * s_init)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ pg_stream_t *s;
+ uword *p;
+
+ if (!pg->stream_index_by_name)
+ pg->stream_index_by_name
+ = hash_create_vec (0, sizeof (s->name[0]), sizeof (uword));
+
+ /* Delete any old stream with the same name. */
+ if (s_init->name
+ && (p = hash_get_mem (pg->stream_index_by_name, s_init->name)))
+ {
+ pg_stream_del (pg, p[0]);
+ }
+
+ pool_get (pg->streams, s);
+ s[0] = s_init[0];
+
+ /* Give it a name. */
+ if (!s->name)
+ s->name = format (0, "stream%d", s - pg->streams);
+ else
+ s->name = vec_dup (s->name);
+
+ hash_set_mem (pg->stream_index_by_name, s->name, s - pg->streams);
+
+ /* Get fixed part of buffer data. */
+ if (s->edit_groups)
+ perform_fixed_edits (s);
+
+ /* Determine packet size. */
+ switch (s->packet_size_edit_type)
+ {
+ case PG_EDIT_INCREMENT:
+ case PG_EDIT_RANDOM:
+ if (s->min_packet_bytes == s->max_packet_bytes)
+ s->packet_size_edit_type = PG_EDIT_FIXED;
+ break;
+
+ default:
+ /* Get packet size from fixed edits. */
+ s->packet_size_edit_type = PG_EDIT_FIXED;
+ if (!s->replay_packet_templates)
+ s->min_packet_bytes = s->max_packet_bytes =
+ vec_len (s->fixed_packet_data);
+ break;
+ }
+
+ s->last_increment_packet_size = s->min_packet_bytes;
+
+ {
+ pg_buffer_index_t *bi;
+ int n;
+
+ if (vm->buffer_main->callbacks_registered)
+ s->buffer_bytes = VLIB_BUFFER_DATA_SIZE;
+
+ if (!s->buffer_bytes)
+ s->buffer_bytes = s->max_packet_bytes;
+
+ s->buffer_bytes = vlib_buffer_round_size (s->buffer_bytes);
+
+ n = s->max_packet_bytes / s->buffer_bytes;
+ n += (s->max_packet_bytes % s->buffer_bytes) != 0;
+
+ vec_resize (s->buffer_indices, n);
+
+ vec_foreach (bi, s->buffer_indices)
+ {
+ bi->free_list_index =
+ vlib_buffer_create_free_list (vm, s->buffer_bytes,
+ "pg stream %d buffer #%d",
+ s - pg->streams,
+ 1 + (bi - s->buffer_indices));
+ }
+ }
+
+ /* Find an interface to use. */
+ s->pg_if_index = pg_interface_add_or_get (pg, s->if_id);
+
+ {
+ pg_interface_t *pi = pool_elt_at_index (pg->interfaces, s->pg_if_index);
+ vlib_rx_or_tx_t rx_or_tx;
+
+ vlib_foreach_rx_tx (rx_or_tx)
+ {
+ if (s->sw_if_index[rx_or_tx] == ~0)
+ s->sw_if_index[rx_or_tx] = pi->sw_if_index;
+ }
+ }
+
+ /* Connect the graph. */
+ s->next_index = vlib_node_add_next (vm, device_input_node.index,
+ s->node_index);
+}
+
+void
+pg_stream_del (pg_main_t * pg, uword index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ pg_stream_t *s;
+ pg_buffer_index_t *bi;
+
+ s = pool_elt_at_index (pg->streams, index);
+
+ pg_stream_enable_disable (pg, s, /* want_enabled */ 0);
+ hash_unset_mem (pg->stream_index_by_name, s->name);
+
+ vec_foreach (bi, s->buffer_indices)
+ {
+ vlib_buffer_delete_free_list (vm, bi->free_list_index);
+ clib_fifo_free (bi->buffer_fifo);
+ }
+
+ pg_stream_free (s);
+ pool_put (pg->streams, s);
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/pipeline.h b/src/vnet/pipeline.h
new file mode 100644
index 00000000..a4aa5cf5
--- /dev/null
+++ b/src/vnet/pipeline.h
@@ -0,0 +1,456 @@
+/*
+ * vnet/pipeline.h: software pipeline
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Usage example.
+ *
+ * #define NSTAGES 3 or whatever
+ *
+ * <Define pipeline stages>
+ *
+ * #include <vnet/pipeline.h>
+ *
+ * static uword my_node_fn (vlib_main_t * vm,
+ * vlib_node_runtime_t * node,
+ * vlib_frame_t * frame)
+ * {
+ * return dispatch_pipeline (vm, node, frame);
+ * }
+ *
+ */
+
+#ifndef NSTAGES
+#error files which #include <vnet/pipeline.h> must define NSTAGES
+#endif
+
+#ifndef STAGE_INLINE
+#define STAGE_INLINE inline
+#endif
+
+/*
+ * A prefetch stride of 2 is quasi-equivalent to doubling the number
+ * of stages with every other pipeline stage empty.
+ */
+
+/*
+ * This is a typical first pipeline stage, which prefetches
+ * buffer metadata and the first line of pkt data.
+ * To use it:
+ * #define stage0 generic_stage0
+ */
+static STAGE_INLINE void
+generic_stage0 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, u32 buffer_index)
+{
+ /* generic default stage 0 here */
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ vlib_prefetch_buffer_header (b, STORE);
+ CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+#if NSTAGES == 2
+
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ to_next[0] = from[pi - 1];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 1]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 1], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ {
+ to_next[0] = from[pi - 1];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 1]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 1], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 3
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ to_next[0] = from[pi - 2];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 2]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 2], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ {
+ to_next[0] = from[pi - 2];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 2]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 2], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 4
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ to_next[0] = from[pi - 3];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 3]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 3], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ {
+ to_next[0] = from[pi - 3];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 3]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 3], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+
+#if NSTAGES == 5
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ if (pi - 3 >= 0)
+ stage3 (vm, node, from[pi - 3]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ stage3 (vm, node, from[pi - 3]);
+ to_next[0] = from[pi - 4];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 4]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 4], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ stage3 (vm, node, from[pi - 3]);
+ if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
+ {
+ to_next[0] = from[pi - 4];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 4]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 4], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 6
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ if (pi - 3 >= 0)
+ stage3 (vm, node, from[pi - 3]);
+ if (pi - 4 >= 0)
+ stage4 (vm, node, from[pi - 4]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ stage3 (vm, node, from[pi - 3]);
+ stage4 (vm, node, from[pi - 4]);
+ to_next[0] = from[pi - 5];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 5]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 5], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ stage3 (vm, node, from[pi - 3]);
+ if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
+ stage4 (vm, node, from[pi - 4]);
+ if (((pi - 5) >= 0) && ((pi - 5) < pi_limit))
+ {
+ to_next[0] = from[pi - 5];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 5]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 5], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/plugin/plugin.h b/src/vnet/plugin/plugin.h
new file mode 100644
index 00000000..6e1a3264
--- /dev/null
+++ b/src/vnet/plugin/plugin.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_plugin_h
+#define included_vnet_plugin_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+#include <vlib/unix/plugin.h>
+
+#endif /* included_vnet_plugin_h */
diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c
new file mode 100644
index 00000000..fd031d02
--- /dev/null
+++ b/src/vnet/policer/node_funcs.c
@@ -0,0 +1,942 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/policer/policer.h>
+#include <vnet/ip/ip.h>
+#include <vnet/classify/policer_classify.h>
+#include <vnet/classify/vnet_classify.h>
+
+#define IP4_NON_DSCP_BITS 0x03
+#define IP4_DSCP_SHIFT 2
+#define IP6_NON_DSCP_BITS 0xf03fffff
+#define IP6_DSCP_SHIFT 22
+
+/* Dispatch functions meant to be instantiated elsewhere */
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 policer_index;
+} vnet_policer_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_policer_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vnet_policer_trace_t *t = va_arg (*args, vnet_policer_trace_t *);
+
+ s = format (s, "VNET_POLICER: sw_if_index %d policer_index %d next %d",
+ t->sw_if_index, t->policer_index, t->next_index);
+ return s;
+}
+
+#define foreach_vnet_policer_error \
+_(TRANSMIT, "Packets Transmitted") \
+_(DROP, "Packets Dropped")
+
+typedef enum
+{
+#define _(sym,str) VNET_POLICER_ERROR_##sym,
+ foreach_vnet_policer_error
+#undef _
+ VNET_POLICER_N_ERROR,
+} vnet_policer_error_t;
+
+static char *vnet_policer_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vnet_policer_error
+#undef _
+};
+
+static_always_inline void
+vnet_policer_mark (vlib_buffer_t * b, u8 dscp)
+{
+ ethernet_header_t *eh;
+ ip4_header_t *ip4h;
+ ip6_header_t *ip6h;
+ u16 type;
+
+ eh = (ethernet_header_t *) b->data;
+ type = clib_net_to_host_u16 (eh->type);
+
+ if (PREDICT_TRUE (type == ETHERNET_TYPE_IP4))
+ {
+ ip4h = (ip4_header_t *) & (b->data[sizeof (ethernet_header_t)]);;
+ ip4h->tos &= IP4_NON_DSCP_BITS;
+ ip4h->tos |= dscp << IP4_DSCP_SHIFT;
+ ip4h->checksum = ip4_header_checksum (ip4h);
+ }
+ else
+ {
+ if (PREDICT_TRUE (type == ETHERNET_TYPE_IP6))
+ {
+ ip6h = (ip6_header_t *) & (b->data[sizeof (ethernet_header_t)]);
+ ip6h->ip_version_traffic_class_and_flow_label &=
+ clib_host_to_net_u32 (IP6_NON_DSCP_BITS);
+ ip6h->ip_version_traffic_class_and_flow_label |=
+ clib_host_to_net_u32 (dscp << IP6_DSCP_SHIFT);
+ }
+ }
+}
+
+static_always_inline
+ u8 vnet_policer_police (vlib_main_t * vm,
+ vlib_buffer_t * b,
+ u32 policer_index,
+ u64 time_in_policer_periods,
+ policer_result_e packet_color)
+{
+ u8 act;
+ u32 len;
+ u32 col;
+ policer_read_response_type_st *pol;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+
+ len = vlib_buffer_length_in_chain (vm, b);
+ pol = &pm->policers[policer_index];
+ col = vnet_police_packet (pol, len, packet_color, time_in_policer_periods);
+ act = pol->action[col];
+ if (PREDICT_TRUE (act == SSE2_QOS_ACTION_MARK_AND_TRANSMIT))
+ vnet_policer_mark (b, pol->mark_dscp[col]);
+
+ return act;
+}
+
+static inline uword
+vnet_policer_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vnet_policer_index_t which)
+{
+ u32 n_left_from, *from, *to_next;
+ vnet_policer_next_t next_index;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u64 time_in_policer_periods;
+ u32 transmitted = 0;
+
+ time_in_policer_periods =
+ clib_cpu_time_now () >> POLICER_TICKS_PER_PERIOD_SHIFT;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 pi0 = 0, pi1 = 0;
+ u8 act0, act1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = VNET_POLICER_NEXT_TRANSMIT;
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ next1 = VNET_POLICER_NEXT_TRANSMIT;
+
+
+ if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX)
+ {
+ pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
+ pi1 = pm->policer_index_by_sw_if_index[sw_if_index1];
+ }
+
+ if (which == VNET_POLICER_INDEX_BY_OPAQUE)
+ {
+ pi0 = vnet_buffer (b0)->policer.index;
+ pi1 = vnet_buffer (b1)->policer.index;
+ }
+
+ if (which == VNET_POLICER_INDEX_BY_EITHER)
+ {
+ pi0 = vnet_buffer (b0)->policer.index;
+ pi0 = (pi0 != ~0) ? pi0 :
+ pm->policer_index_by_sw_if_index[sw_if_index0];
+ pi1 = vnet_buffer (b1)->policer.index;
+ pi1 = (pi1 != ~0) ? pi1 :
+ pm->policer_index_by_sw_if_index[sw_if_index1];
+ }
+
+ act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
+ POLICE_CONFORM /* no chaining */ );
+
+ act1 = vnet_policer_police (vm, b1, pi1, time_in_policer_periods,
+ POLICE_CONFORM /* no chaining */ );
+
+ if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) /* drop action */
+ {
+ next0 = VNET_POLICER_NEXT_DROP;
+ b0->error = node->errors[VNET_POLICER_ERROR_DROP];
+ }
+ else /* transmit or mark-and-transmit action */
+ {
+ transmitted++;
+ }
+
+ if (PREDICT_FALSE (act1 == SSE2_QOS_ACTION_DROP)) /* drop action */
+ {
+ next1 = VNET_POLICER_NEXT_DROP;
+ b1->error = node->errors[VNET_POLICER_ERROR_DROP];
+ }
+ else /* transmit or mark-and-transmit action */
+ {
+ transmitted++;
+ }
+
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vnet_policer_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vnet_policer_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 pi0 = 0;
+ u8 act0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = VNET_POLICER_NEXT_TRANSMIT;
+
+ if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX)
+ pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
+
+ if (which == VNET_POLICER_INDEX_BY_OPAQUE)
+ pi0 = vnet_buffer (b0)->policer.index;
+
+ if (which == VNET_POLICER_INDEX_BY_EITHER)
+ {
+ pi0 = vnet_buffer (b0)->policer.index;
+ pi0 = (pi0 != ~0) ? pi0 :
+ pm->policer_index_by_sw_if_index[sw_if_index0];
+ }
+
+ act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
+ POLICE_CONFORM /* no chaining */ );
+
+ if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) /* drop action */
+ {
+ next0 = VNET_POLICER_NEXT_DROP;
+ b0->error = node->errors[VNET_POLICER_ERROR_DROP];
+ }
+ else /* transmit or mark-and-transmit action */
+ {
+ transmitted++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ vnet_policer_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->policer_index = pi0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ VNET_POLICER_ERROR_TRANSMIT, transmitted);
+ return frame->n_vectors;
+}
+
+uword
+vnet_policer_by_sw_if_index (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return vnet_policer_inline (vm, node, frame,
+ VNET_POLICER_INDEX_BY_SW_IF_INDEX);
+}
+
+uword
+vnet_policer_by_opaque (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return vnet_policer_inline (vm, node, frame, VNET_POLICER_INDEX_BY_OPAQUE);
+}
+
+uword
+vnet_policer_by_either (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return vnet_policer_inline (vm, node, frame, VNET_POLICER_INDEX_BY_EITHER);
+}
+
+void
+vnet_policer_node_funcs_reference (void)
+{
+}
+
+
+#define TEST_CODE 1
+
+#ifdef TEST_CODE
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (policer_by_sw_if_index_node, static) = {
+ .function = vnet_policer_by_sw_if_index,
+ .name = "policer-by-sw-if-index",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vnet_policer_error_strings),
+ .error_strings = vnet_policer_error_strings,
+
+ .n_next_nodes = VNET_POLICER_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [VNET_POLICER_NEXT_TRANSMIT] = "ethernet-input",
+ [VNET_POLICER_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (policer_by_sw_if_index_node,
+ vnet_policer_by_sw_if_index);
+/* *INDENT-ON* */
+
+
+int
+test_policer_add_del (u32 rx_sw_if_index, u8 * config_name, int is_add)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_read_response_type_st *template;
+ policer_read_response_type_st *policer;
+ vnet_hw_interface_t *rxhi;
+ uword *p;
+
+ rxhi = vnet_get_sup_hw_interface (pm->vnet_main, rx_sw_if_index);
+
+ /* Make sure caller didn't pass a vlan subif, etc. */
+ if (rxhi->sw_if_index != rx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (is_add)
+ {
+
+ p = hash_get_mem (pm->policer_config_by_name, config_name);
+
+ if (p == 0)
+ return -2;
+
+ template = pool_elt_at_index (pm->policer_templates, p[0]);
+
+ vnet_hw_interface_rx_redirect_to_node
+ (pm->vnet_main, rxhi->hw_if_index, policer_by_sw_if_index_node.index);
+
+ pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
+
+ policer[0] = template[0];
+
+ vec_validate (pm->policer_index_by_sw_if_index, rx_sw_if_index);
+ pm->policer_index_by_sw_if_index[rx_sw_if_index]
+ = policer - pm->policers;
+ }
+ else
+ {
+ u32 pi;
+ vnet_hw_interface_rx_redirect_to_node (pm->vnet_main,
+ rxhi->hw_if_index,
+ ~0 /* disable */ );
+
+ pi = pm->policer_index_by_sw_if_index[rx_sw_if_index];
+ pm->policer_index_by_sw_if_index[rx_sw_if_index] = ~0;
+ pool_put_index (pm->policers, pi);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+test_policer_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 rx_sw_if_index;
+ int rv;
+ u8 *config_name = 0;
+ int rx_set = 0;
+ int is_add = 1;
+ int is_show = 0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "intfc %U", unformat_vnet_sw_interface,
+ pm->vnet_main, &rx_sw_if_index))
+ rx_set = 1;
+ else if (unformat (line_input, "show"))
+ is_show = 1;
+ else if (unformat (line_input, "policer %s", &config_name))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (rx_set == 0)
+ {
+ error = clib_error_return (0, "interface not set");
+ goto done;
+ }
+
+ if (is_show)
+ {
+ u32 pi = pm->policer_index_by_sw_if_index[rx_sw_if_index];
+ policer_read_response_type_st *policer;
+ policer = pool_elt_at_index (pm->policers, pi);
+
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ goto done;
+ }
+
+ if (is_add && config_name == 0)
+ {
+ error = clib_error_return (0, "policer config name required");
+ goto done;
+ }
+
+ rv = test_policer_add_del (rx_sw_if_index, config_name, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ error = clib_error_return
+ (0, "WARNING: vnet_vnet_policer_add_del returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_patch_command, static) = {
+ .path = "test policer",
+ .short_help =
+ "intfc <intfc> policer <policer-config-name> [del]",
+ .function = test_policer_command_fn,
+};
+/* *INDENT-ON* */
+
+#endif /* TEST_CODE */
+
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+ u32 policer_index;
+} policer_classify_trace_t;
+
+static u8 *
+format_policer_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ policer_classify_trace_t *t = va_arg (*args, policer_classify_trace_t *);
+
+ s = format (s, "POLICER_CLASSIFY: sw_if_index %d next %d table %d offset %d"
+ " policer_index %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset,
+ t->policer_index);
+ return s;
+}
+
+#define foreach_policer_classify_error \
+_(MISS, "Policer classify misses") \
+_(HIT, "Policer classify hits") \
+_(CHAIN_HIT, "Polcier classify hits after chain walk") \
+_(DROP, "Policer classify action drop")
+
+typedef enum
+{
+#define _(sym,str) POLICER_CLASSIFY_ERROR_##sym,
+ foreach_policer_classify_error
+#undef _
+ POLICER_CLASSIFY_N_ERROR,
+} policer_classify_error_t;
+
+static char *policer_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_policer_classify_error
+#undef _
+};
+
+static inline uword
+policer_classify_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ policer_classify_table_id_t tid)
+{
+ u32 n_left_from, *from, *to_next;
+ policer_classify_next_index_t next_index;
+ policer_classify_main_t *pcm = &policer_classify_main;
+ vnet_classify_main_t *vcm = pcm->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ u32 drop = 0;
+ u32 n_next_nodes;
+ u64 time_in_policer_periods;
+
+ time_in_policer_periods =
+ clib_cpu_time_now () >> POLICER_TICKS_PER_PERIOD_SHIFT;
+
+ n_next_nodes = node->n_next_nodes;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* Prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = b1->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ pcm->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ pcm->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ pcm->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 act0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* Speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ if (tid == POLICER_CLASSIFY_TABLE_L2)
+ {
+ /* Feature bitmap update and determine the next node */
+ next0 = vnet_l2_feature_next (b0, pcm->feat_next_node_index,
+ L2INPUT_FEAT_POLICER_CLAS);
+ }
+ else
+ vnet_get_config_data (pcm->vnet_config_main[tid],
+ &b0->current_config_index, &next0,
+ /* # bytes of config data */ 0);
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+
+ if (e0)
+ {
+ act0 = vnet_policer_police (vm,
+ b0,
+ e0->next_index,
+ time_in_policer_periods,
+ e0->opaque_index);
+ if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+ {
+ next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP;
+ b0->error = node->errors[POLICER_CLASSIFY_ERROR_DROP];
+ drop++;
+ }
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ {
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ }
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 =
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ act0 = vnet_policer_police (vm,
+ b0,
+ e0->next_index,
+ time_in_policer_periods,
+ e0->opaque_index);
+ if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+ {
+ next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP;
+ b0->error =
+ node->errors[POLICER_CLASSIFY_ERROR_DROP];
+ drop++;
+ }
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ policer_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ t->policer_index = e0 ? e0->next_index : ~0;
+ }
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ POLICER_CLASSIFY_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ POLICER_CLASSIFY_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ POLICER_CLASSIFY_ERROR_CHAIN_HIT, chain_hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ POLICER_CLASSIFY_ERROR_DROP, drop);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_policer_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return policer_classify_inline (vm, node, frame,
+ POLICER_CLASSIFY_TABLE_IP4);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
+ .function = ip4_policer_classify,
+ .name = "ip4-policer-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_classify_trace,
+ .n_errors = ARRAY_LEN(policer_classify_error_strings),
+ .error_strings = policer_classify_error_strings,
+ .n_next_nodes = POLICER_CLASSIFY_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_policer_classify_node, ip4_policer_classify);
+/* *INDENT-ON* */
+
+static uword
+ip6_policer_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return policer_classify_inline (vm, node, frame,
+ POLICER_CLASSIFY_TABLE_IP6);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
+ .function = ip6_policer_classify,
+ .name = "ip6-policer-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_classify_trace,
+ .n_errors = ARRAY_LEN(policer_classify_error_strings),
+ .error_strings = policer_classify_error_strings,
+ .n_next_nodes = POLICER_CLASSIFY_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_policer_classify_node, ip6_policer_classify);
+/* *INDENT-ON* */
+
+static uword
+l2_policer_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2);
+}
+
+VLIB_REGISTER_NODE (l2_policer_classify_node) =
+{
+ .function = l2_policer_classify,.name = "l2-policer-classify",.vector_size =
+ sizeof (u32),.format_trace = format_policer_classify_trace,.n_errors =
+ ARRAY_LEN (policer_classify_error_strings),.error_strings =
+ policer_classify_error_strings,.n_next_nodes =
+ POLICER_CLASSIFY_NEXT_INDEX_N_NEXT,.next_nodes =
+ {
+ [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",}
+,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_policer_classify_node, l2_policer_classify);
+
+
+static clib_error_t *
+policer_classify_init (vlib_main_t * vm)
+{
+ policer_classify_main_t *pcm = &policer_classify_main;
+
+ pcm->vlib_main = vm;
+ pcm->vnet_main = vnet_get_main ();
+ pcm->vnet_classify_main = &vnet_classify_main;
+
+ /* Initialize L2 feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_policer_classify_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ pcm->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (policer_classify_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h
new file mode 100644
index 00000000..34bcf9ca
--- /dev/null
+++ b/src/vnet/policer/police.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __POLICE_H__
+#define __POLICE_H__
+
+typedef enum
+{
+ POLICE_CONFORM = 0,
+ POLICE_EXCEED = 1,
+ POLICE_VIOLATE = 2,
+} policer_result_e;
+
+// This is the hardware representation of the policer.
+// To be multithread-safe, the policer is accessed through a spin-lock
+// on the lock field. (For a policer update operation, 24B needs to be
+// modified and this would be a challenge to do with atomic instructions.)
+// The structure is padded so that no other data is put into the same
+// 64B cache-line. This reduces cache-thrashing between threads.
+//
+// A note on scale:
+// The HW TSC tick is roughly one CPU clock cycle.
+// This is shifted to create a larger period, with a goal to be around 50usec.
+// The period time will vary based on CPU clock speed.
+// CPU speeds of 1Ghz to 8Ghz are targetted.
+// The shift amount is a constant 17 bits, resulting in a period between
+// 16usec (8Ghz CPU) and 131usec (1Ghz CPU).
+// The token_per_period computation takes into account the clock speed.
+//
+// The 32-bit bucket/limit supports about 850ms of burst on a 40GE port,
+// or 340ms on a 100GE port. If a larger burst is configued, then the
+// programmed value is simply capped at 2^32-1. If we needed to support
+// more than that, the bucket and limit fields could be expanded.
+//
+// tokens_per_period should be > 1000 to support 0.1% granularity.
+// To support lower rates (which would not meet this requirement), the packet
+// length, bucket, and limit values can be scaled. The scale is a power of 2
+// so the multiplication can be implemented as a shift. The control plane
+// computes the shift amount be the largest possible that still supports the
+// burst size. This makes the rate accuracy as high as possible.
+//
+// The 64-bit last_update_time supports a 4Ghz CPU without rollover for 100 years
+//
+// The lock field should be used for a spin-lock on the struct.
+
+#define POLICER_TICKS_PER_PERIOD_SHIFT 17
+#define POLICER_TICKS_PER_PERIOD (1 << POLICER_TICKS_PER_PERIOD_SHIFT)
+
+typedef struct
+{
+
+ u32 lock; // for exclusive access to the struct
+
+ u32 single_rate; // 1 = single rate policer, 0 = two rate policer
+ u32 color_aware; // for hierarchical policing
+ u32 scale; // power-of-2 shift amount for lower rates
+ u8 action[3];
+ u8 mark_dscp[3];
+ u8 pad[2];
+
+ // Fields are marked as 2R if they are only used for a 2-rate policer,
+ // and MOD if they are modified as part of the update operation.
+ // 1 token = 1 byte.
+
+ u32 cir_tokens_per_period; // # of tokens for each period
+ u32 pir_tokens_per_period; // 2R
+
+ u32 current_limit;
+ u32 current_bucket; // MOD
+ u32 extended_limit;
+ u32 extended_bucket; // MOD
+
+ u64 last_update_time; // MOD
+ u64 pad64;
+
+} policer_read_response_type_st;
+
+static inline policer_result_e
+vnet_police_packet (policer_read_response_type_st * policer,
+ u32 packet_length,
+ policer_result_e packet_color, u64 time)
+{
+ u64 n_periods;
+ u64 current_tokens, extended_tokens;
+ policer_result_e result;
+
+ // Scale packet length to support a wide range of speeds
+ packet_length = packet_length << policer->scale;
+
+ // Compute the number of policer periods that have passed since the last
+ // operation.
+ n_periods = time - policer->last_update_time;
+ policer->last_update_time = time;
+
+ // Since there is no background last-update-time adjustment, n_periods
+ // could grow large if the policer is idle for a long time. This could
+ // cause a 64-bit overflow when computing tokens_per_period * num_periods.
+ // It will overflow if log2(n_periods) + log2(tokens_per_period) > 64.
+ //
+ // To mitigate this, the policer configuration algorithm insures that
+ // tokens_per_period is less than 2^22, i.e. this is a 22 bit value not
+ // a 32-bit value. Thus overflow will only occur if n_periods > 64-22 or
+ // 42. 2^42 min-sized periods is 16us * 2^42, or 2 years. So this can
+ // rarely occur. If overflow does happen, the only effect will be that
+ // fewer tokens than the max burst will be added to the bucket for this
+ // packet. This constraint on tokens_per_period lets the ucode omit
+ // code to dynamically check for or prevent the overflow.
+
+ if (policer->single_rate)
+ {
+
+ // Compute number of tokens for this time period
+ current_tokens =
+ policer->current_bucket + n_periods * policer->cir_tokens_per_period;
+ if (current_tokens > policer->current_limit)
+ {
+ current_tokens = policer->current_limit;
+ }
+
+ extended_tokens =
+ policer->extended_bucket + n_periods * policer->cir_tokens_per_period;
+ if (extended_tokens > policer->extended_limit)
+ {
+ extended_tokens = policer->extended_limit;
+ }
+
+ // Determine color
+
+ if ((!policer->color_aware || (packet_color == POLICE_CONFORM))
+ && (current_tokens >= packet_length))
+ {
+ policer->current_bucket = current_tokens - packet_length;
+ policer->extended_bucket = extended_tokens - packet_length;
+ result = POLICE_CONFORM;
+ }
+ else if ((!policer->color_aware || (packet_color != POLICE_VIOLATE))
+ && (extended_tokens >= packet_length))
+ {
+ policer->current_bucket = current_tokens;
+ policer->extended_bucket = extended_tokens - packet_length;
+ result = POLICE_EXCEED;
+ }
+ else
+ {
+ policer->current_bucket = current_tokens;
+ policer->extended_bucket = extended_tokens;
+ result = POLICE_VIOLATE;
+ }
+
+ }
+ else
+ {
+ // Two-rate policer
+
+ // Compute number of tokens for this time period
+ current_tokens =
+ policer->current_bucket + n_periods * policer->cir_tokens_per_period;
+ extended_tokens =
+ policer->extended_bucket + n_periods * policer->pir_tokens_per_period;
+ if (current_tokens > policer->current_limit)
+ {
+ current_tokens = policer->current_limit;
+ }
+ if (extended_tokens > policer->extended_limit)
+ {
+ extended_tokens = policer->extended_limit;
+ }
+
+ // Determine color
+
+ if ((policer->color_aware && (packet_color == POLICE_VIOLATE))
+ || (extended_tokens < packet_length))
+ {
+ policer->current_bucket = current_tokens;
+ policer->extended_bucket = extended_tokens;
+ result = POLICE_VIOLATE;
+ }
+ else if ((policer->color_aware && (packet_color == POLICE_EXCEED))
+ || (current_tokens < packet_length))
+ {
+ policer->current_bucket = current_tokens;
+ policer->extended_bucket = extended_tokens - packet_length;
+ result = POLICE_EXCEED;
+ }
+ else
+ {
+ policer->current_bucket = current_tokens - packet_length;
+ policer->extended_bucket = extended_tokens - packet_length;
+ result = POLICE_CONFORM;
+ }
+ }
+ return result;
+}
+
+#endif // __POLICE_H__
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/policer.api b/src/vnet/policer/policer.api
new file mode 100644
index 00000000..26c69032
--- /dev/null
+++ b/src/vnet/policer/policer.api
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Add/del policer
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add policer if non-zero, else delete
+ @param name - policer name
+ @param cir - CIR
+ @param eir - EIR
+ @param cb - Committed Burst
+ @param eb - Excess or Peak Burst
+ @param rate_type - rate type
+ @param round_type - rounding type
+ @param type - policer algorithm
+ @param color_aware - 0=color-blind, 1=color-aware
+ @param conform_action_type - conform action type
+ @param conform_dscp - DSCP for conform mar-and-transmit action
+ @param exceed_action_type - exceed action type
+ @param exceed_dscp - DSCP for exceed mar-and-transmit action
+ @param violate_action_type - violate action type
+ @param violate_dscp - DSCP for violate mar-and-transmit action
+*/
+define policer_add_del
+{
+ u32 client_index;
+ u32 context;
+
+ u8 is_add;
+ u8 name[64];
+ u32 cir;
+ u32 eir;
+ u64 cb;
+ u64 eb;
+ u8 rate_type;
+ u8 round_type;
+ u8 type;
+ u8 color_aware;
+ u8 conform_action_type;
+ u8 conform_dscp;
+ u8 exceed_action_type;
+ u8 exceed_dscp;
+ u8 violate_action_type;
+ u8 violate_dscp;
+};
+
+/** \brief Add/del policer response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param policer_index - for add, returned index of the new policer
+*/
+define policer_add_del_reply
+{
+ u32 context;
+ i32 retval;
+ u32 policer_index;
+};
+
+/** \brief Get list of policers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param match_name_valid - if 0 request all policers otherwise use match_name
+ @param match_name - policer name
+*/
+define policer_dump
+{
+ u32 client_index;
+ u32 context;
+
+ u8 match_name_valid;
+ u8 match_name[64];
+};
+
+/** \brief Policer operational state response.
+ @param context - sender context, to match reply w/ request
+ @param name - policer name
+ @param cir - CIR
+ @param eir - EIR
+ @param cb - Committed Burst
+ @param eb - Excess or Peak Burst
+ @param rate_type - rate type
+ @param round_type - rounding type
+ @param type - policer algorithm
+ @param conform_action_type - conform action type
+ @param conform_dscp - DSCP for conform mar-and-transmit action
+ @param exceed_action_type - exceed action type
+ @param exceed_dscp - DSCP for exceed mar-and-transmit action
+ @param violate_action_type - violate action type
+ @param violate_dscp - DSCP for violate mar-and-transmit action
+ @param single_rate - 1 = single rate policer, 0 = two rate policer
+ @param color_aware - for hierarchical policing
+ @param scale - power-of-2 shift amount for lower rates
+ @param cir_tokens_per_period - number of tokens for each period
+ @param pir_tokens_per_period - number of tokens for each period for 2-rate policer
+ @param current_limit - current limit
+ @param current_bucket - current bucket
+ @param extended_limit - extended limit
+ @param extended_bucket - extended bucket
+ @param last_update_time - last update time
+*/
+define policer_details
+{
+ u32 context;
+
+ u8 name[64];
+ u32 cir;
+ u32 eir;
+ u64 cb;
+ u64 eb;
+ u8 rate_type;
+ u8 round_type;
+ u8 type;
+ u8 conform_action_type;
+ u8 conform_dscp;
+ u8 exceed_action_type;
+ u8 exceed_dscp;
+ u8 violate_action_type;
+ u8 violate_dscp;
+ u8 single_rate;
+ u8 color_aware;
+ u32 scale;
+ u32 cir_tokens_per_period;
+ u32 pir_tokens_per_period;
+ u32 current_limit;
+ u32 current_bucket;
+ u32 extended_limit;
+ u32 extended_bucket;
+ u64 last_update_time;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c
new file mode 100644
index 00000000..5a7b7711
--- /dev/null
+++ b/src/vnet/policer/policer.c
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdint.h>
+#include <vnet/policer/policer.h>
+#include <vnet/classify/vnet_classify.h>
+
+clib_error_t *
+policer_add_del (vlib_main_t * vm,
+ u8 * name,
+ sse2_qos_pol_cfg_params_st * cfg,
+ u32 * policer_index, u8 is_add)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_read_response_type_st test_policer;
+ policer_read_response_type_st *policer;
+ uword *p;
+ u32 pi;
+ int rv;
+
+ p = hash_get_mem (pm->policer_config_by_name, name);
+
+ if (is_add == 0)
+ {
+ /* free policer config and template */
+ if (p == 0)
+ {
+ vec_free (name);
+ return clib_error_return (0, "No such policer configuration");
+ }
+ pool_put_index (pm->configs, p[0]);
+ pool_put_index (pm->policer_templates, p[0]);
+ hash_unset_mem (pm->policer_config_by_name, name);
+
+ /* free policer */
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p == 0)
+ {
+ vec_free (name);
+ return clib_error_return (0, "No such policer");
+ }
+ pool_put_index (pm->policers, p[0]);
+ hash_unset_mem (pm->policer_index_by_name, name);
+
+ vec_free (name);
+ return 0;
+ }
+
+ if (p != 0)
+ {
+ vec_free (name);
+ return clib_error_return (0, "Policer already exists");
+ }
+
+ /* Vet the configuration before adding it to the table */
+ rv = sse2_pol_logical_2_physical (cfg, &test_policer);
+
+ if (rv == 0)
+ {
+ policer_read_response_type_st *pp;
+ sse2_qos_pol_cfg_params_st *cp;
+
+ pool_get (pm->configs, cp);
+ pool_get (pm->policer_templates, pp);
+
+ ASSERT (cp - pm->configs == pp - pm->policer_templates);
+
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (pp, &test_policer, sizeof (*pp));
+
+ hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs);
+ pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
+ policer[0] = pp[0];
+ pi = policer - pm->policers;
+ hash_set_mem (pm->policer_index_by_name, name, pi);
+ *policer_index = pi;
+ }
+ else
+ {
+ vec_free (name);
+ return clib_error_return (0, "Config failed sanity check");
+ }
+
+ return 0;
+}
+
+u8 *
+format_policer_instance (u8 * s, va_list * va)
+{
+ policer_read_response_type_st *i
+ = va_arg (*va, policer_read_response_type_st *);
+
+ s = format (s, "policer at %llx: %s rate, %s color-aware\n",
+ i, i->single_rate ? "single" : "dual",
+ i->color_aware ? "is" : "not");
+ s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n",
+ i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale);
+ s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n",
+ i->current_limit,
+ i->current_bucket, i->extended_limit, i->extended_bucket);
+ s = format (s, "last update %llu\n", i->last_update_time);
+ return s;
+}
+
+static u8 *
+format_policer_round_type (u8 * s, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (c->rnd_type == SSE2_QOS_ROUND_TO_CLOSEST)
+ s = format (s, "closest");
+ else if (c->rnd_type == SSE2_QOS_ROUND_TO_UP)
+ s = format (s, "up");
+ else if (c->rnd_type == SSE2_QOS_ROUND_TO_DOWN)
+ s = format (s, "down");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+
+static u8 *
+format_policer_rate_type (u8 * s, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (c->rate_type == SSE2_QOS_RATE_KBPS)
+ s = format (s, "kbps");
+ else if (c->rate_type == SSE2_QOS_RATE_PPS)
+ s = format (s, "pps");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_policer_type (u8 * s, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (c->rfc == SSE2_QOS_POLICER_TYPE_1R2C)
+ s = format (s, "1r2c");
+
+ else if (c->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ s = format (s, "1r3c");
+
+ else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698)
+ s = format (s, "2r3c-2698");
+
+ else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)
+ s = format (s, "2r3c-4115");
+
+ else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1)
+ s = format (s, "2r3c-mef5cf1");
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+static u8 *
+format_dscp (u8 * s, va_list * va)
+{
+ u32 i = va_arg (*va, u32);
+ char *t = 0;
+
+ switch (i)
+ {
+#define _(v,f,str) case VNET_DSCP_##f: t = str; break;
+ foreach_vnet_dscp
+#undef _
+ default:
+ return format (s, "ILLEGAL");
+ }
+ s = format (s, "%s", t);
+ return s;
+}
+
+static u8 *
+format_policer_action_type (u8 * s, va_list * va)
+{
+ sse2_qos_pol_action_params_st *a
+ = va_arg (*va, sse2_qos_pol_action_params_st *);
+
+ if (a->action_type == SSE2_QOS_ACTION_DROP)
+ s = format (s, "drop");
+ else if (a->action_type == SSE2_QOS_ACTION_TRANSMIT)
+ s = format (s, "transmit");
+ else if (a->action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ s = format (s, "mark-and-transmit %U", format_dscp, a->dscp);
+ else
+ s = format (s, "ILLEGAL");
+ return s;
+}
+
+u8 *
+format_policer_config (u8 * s, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ s = format (s, "type %U cir %u eir %u cb %u eb %u\n",
+ format_policer_type, c,
+ c->rb.kbps.cir_kbps,
+ c->rb.kbps.eir_kbps, c->rb.kbps.cb_bytes, c->rb.kbps.eb_bytes);
+ s = format (s, "rate type %U, round type %U\n",
+ format_policer_rate_type, c, format_policer_round_type, c);
+ s = format (s, "conform action %U, exceed action %U, violate action %U\n",
+ format_policer_action_type, &c->conform_action,
+ format_policer_action_type, &c->exceed_action,
+ format_policer_action_type, &c->violate_action);
+ return s;
+}
+
+static uword
+unformat_policer_type (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (!unformat (input, "type"))
+ return 0;
+
+ if (unformat (input, "1r2c"))
+ c->rfc = SSE2_QOS_POLICER_TYPE_1R2C;
+ else if (unformat (input, "1r3c"))
+ c->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697;
+ else if (unformat (input, "2r3c-2698"))
+ c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698;
+ else if (unformat (input, "2r3c-4115"))
+ c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115;
+ else if (unformat (input, "2r3c-mef5cf1"))
+ c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_round_type (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (!unformat (input, "round"))
+ return 0;
+
+ if (unformat (input, "closest"))
+ c->rnd_type = SSE2_QOS_ROUND_TO_CLOSEST;
+ else if (unformat (input, "up"))
+ c->rnd_type = SSE2_QOS_ROUND_TO_UP;
+ else if (unformat (input, "down"))
+ c->rnd_type = SSE2_QOS_ROUND_TO_DOWN;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_rate_type (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (!unformat (input, "rate"))
+ return 0;
+
+ if (unformat (input, "kbps"))
+ c->rate_type = SSE2_QOS_RATE_KBPS;
+ else if (unformat (input, "pps"))
+ c->rate_type = SSE2_QOS_RATE_PPS;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_cir (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (unformat (input, "cir %u", &c->rb.kbps.cir_kbps))
+ return 1;
+ return 0;
+}
+
+static uword
+unformat_policer_eir (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (unformat (input, "eir %u", &c->rb.kbps.eir_kbps))
+ return 1;
+ return 0;
+}
+
+static uword
+unformat_policer_cb (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (unformat (input, "cb %u", &c->rb.kbps.cb_bytes))
+ return 1;
+ return 0;
+}
+
+static uword
+unformat_policer_eb (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (unformat (input, "eb %u", &c->rb.kbps.eb_bytes))
+ return 1;
+ return 0;
+}
+
+static uword
+unformat_dscp (unformat_input_t * input, va_list * va)
+{
+ u8 *r = va_arg (*va, u8 *);
+
+ if (0);
+#define _(v,f,str) else if (unformat (input, str)) *r = VNET_DSCP_##f;
+ foreach_vnet_dscp
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_action_type (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_action_params_st *a
+ = va_arg (*va, sse2_qos_pol_action_params_st *);
+
+ if (unformat (input, "drop"))
+ a->action_type = SSE2_QOS_ACTION_DROP;
+ else if (unformat (input, "transmit"))
+ a->action_type = SSE2_QOS_ACTION_TRANSMIT;
+ else if (unformat (input, "mark-and-transmit %U", unformat_dscp, &a->dscp))
+ a->action_type = SSE2_QOS_ACTION_MARK_AND_TRANSMIT;
+ else
+ return 0;
+ return 1;
+}
+
+static uword
+unformat_policer_action (unformat_input_t * input, va_list * va)
+{
+ sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *);
+
+ if (unformat (input, "conform-action %U", unformat_policer_action_type,
+ &c->conform_action))
+ return 1;
+ else if (unformat (input, "exceed-action %U", unformat_policer_action_type,
+ &c->exceed_action))
+ return 1;
+ else if (unformat (input, "violate-action %U", unformat_policer_action_type,
+ &c->violate_action))
+ return 1;
+ return 0;
+}
+
+static uword
+unformat_policer_classify_next_index (unformat_input_t * input, va_list * va)
+{
+ u32 *r = va_arg (*va, u32 *);
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ uword *p;
+ u8 *match_name = 0;
+
+ if (unformat (input, "%s", &match_name))
+ ;
+ else
+ return 0;
+
+ p = hash_get_mem (pm->policer_index_by_name, match_name);
+
+ if (p == 0)
+ return 0;
+
+ *r = p[0];
+
+ return 1;
+}
+
+static uword
+unformat_policer_classify_precolor (unformat_input_t * input, va_list * va)
+{
+ u32 *r = va_arg (*va, u32 *);
+
+ if (unformat (input, "conform-color"))
+ *r = POLICE_CONFORM;
+ else if (unformat (input, "exceed-color"))
+ *r = POLICE_EXCEED;
+ else
+ return 0;
+
+ return 1;
+}
+
+#define foreach_config_param \
+_(eb) \
+_(cb) \
+_(eir) \
+_(cir) \
+_(rate_type) \
+_(round_type) \
+_(type) \
+_(action)
+
+static clib_error_t *
+configure_policer_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ sse2_qos_pol_cfg_params_st c;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_add = 1;
+ u8 *name = 0;
+ u32 pi;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&c, 0, sizeof (c));
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "color-aware"))
+ c.color_aware = 1;
+
+#define _(a) else if (unformat (line_input, "%U", unformat_policer_##a, &c)) ;
+ foreach_config_param
+#undef _
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ error = policer_add_del (vm, name, &c, &pi, is_add);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (configure_policer_command, static) = {
+ .path = "configure policer",
+ .short_help = "configure policer name <name> <params> ",
+ .function = configure_policer_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_policer_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ hash_pair_t *p;
+ u32 pool_index;
+ u8 *match_name = 0;
+ u8 *name;
+ sse2_qos_pol_cfg_params_st *config;
+ policer_read_response_type_st *templ;
+
+ (void) unformat (input, "name %s", &match_name);
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, pm->policer_config_by_name,
+ ({
+ name = (u8 *) p->key;
+ if (match_name == 0 || !strcmp((char *) name, (char *) match_name))
+ {
+ pool_index = p->value[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ templ = pool_elt_at_index (pm->policer_templates, pool_index);
+ vlib_cli_output (vm, "Name \"%s\" %U ",
+ name, format_policer_config, config);
+ vlib_cli_output (vm, "Template %U",
+ format_policer_instance, templ);
+ vlib_cli_output (vm, "-----------");
+ }
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_policer_command, static) = {
+ .path = "show policer",
+ .short_help = "show policer [name]",
+ .function = show_policer_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_policer_pools_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+
+ vlib_cli_output (vm, "pool sizes: configs=%d templates=%d policers=%d",
+ pool_elts (pm->configs),
+ pool_elts (pm->policer_templates),
+ pool_elts (pm->policers));
+ return 0;
+}
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_policer_pools_command, static) = {
+ .path = "show policer pools",
+ .short_help = "show policer pools",
+ .function = show_policer_pools_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+policer_init (vlib_main_t * vm)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ void vnet_policer_node_funcs_reference (void);
+
+ vnet_policer_node_funcs_reference ();
+
+ pm->vlib_main = vm;
+ pm->vnet_main = vnet_get_main ();
+
+ pm->policer_config_by_name = hash_create_string (0, sizeof (uword));
+ pm->policer_index_by_name = hash_create_string (0, sizeof (uword));
+
+ vnet_classify_register_unformat_policer_next_index_fn
+ (unformat_policer_classify_next_index);
+ vnet_classify_register_unformat_opaque_index_fn
+ (unformat_policer_classify_precolor);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (policer_init);
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h
new file mode 100644
index 00000000..8e2d7c79
--- /dev/null
+++ b/src/vnet/policer/policer.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_policer_h__
+#define __included_policer_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+#include <vnet/policer/xlate.h>
+#include <vnet/policer/police.h>
+
+typedef struct
+{
+ /* policer pool, aligned */
+ policer_read_response_type_st *policers;
+
+ /* config + template h/w policer instance parallel pools */
+ sse2_qos_pol_cfg_params_st *configs;
+ policer_read_response_type_st *policer_templates;
+
+ /* Config by name hash */
+ uword *policer_config_by_name;
+
+ /* Policer by name hash */
+ uword *policer_index_by_name;
+
+ /* Policer by sw_if_index vector */
+ u32 *policer_index_by_sw_if_index;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} vnet_policer_main_t;
+
+vnet_policer_main_t vnet_policer_main;
+
+typedef enum
+{
+ VNET_POLICER_INDEX_BY_SW_IF_INDEX,
+ VNET_POLICER_INDEX_BY_OPAQUE,
+ VNET_POLICER_INDEX_BY_EITHER,
+} vnet_policer_index_t;
+
+typedef enum
+{
+ VNET_POLICER_NEXT_TRANSMIT,
+ VNET_POLICER_NEXT_DROP,
+ VNET_POLICER_N_NEXT,
+} vnet_policer_next_t;
+
+#define foreach_vnet_dscp \
+ _(0 , CS0, "CS0") \
+ _(8 , CS1, "CS1") \
+ _(10, AF11, "AF11") \
+ _(12, AF12, "AF12") \
+ _(14, AF13, "AF13") \
+ _(16, CS2, "CS2") \
+ _(18, AF21, "AF21") \
+ _(20, AF22, "AF22") \
+ _(22, AF23, "AF23") \
+ _(24, CS3, "CS3") \
+ _(26, AF31, "AF31") \
+ _(28, AF32, "AF32") \
+ _(30, AF33, "AF33") \
+ _(32, CS4, "CS4") \
+ _(34, AF41, "AF41") \
+ _(36, AF42, "AF42") \
+ _(38, AF43, "AF43") \
+ _(40, CS5, "CS5") \
+ _(46, EF, "EF") \
+ _(48, CS6, "CS6") \
+ _(50, CS7, "CS7")
+
+typedef enum
+{
+#define _(v,f,str) VNET_DSCP_##f = v,
+ foreach_vnet_dscp
+#undef _
+} vnet_dscp_t;
+
+u8 *format_policer_instance (u8 * s, va_list * va);
+clib_error_t *policer_add_del (vlib_main_t * vm,
+ u8 * name,
+ sse2_qos_pol_cfg_params_st * cfg,
+ u32 * policer_index, u8 is_add);
+
+#endif /* __included_policer_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/policer_api.c b/src/vnet/policer/policer_api.c
new file mode 100644
index 00000000..67fb9a4e
--- /dev/null
+++ b/src/vnet/policer/policer_api.c
@@ -0,0 +1,232 @@
+/*
+ *------------------------------------------------------------------
+ * policer_api.c - policer api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/policer/policer.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(POLICER_ADD_DEL, policer_add_del) \
+_(POLICER_DUMP, policer_dump)
+
+static void
+vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_add_del_reply_t *rmp;
+ int rv = 0;
+ u8 *name = NULL;
+ sse2_qos_pol_cfg_params_st cfg;
+ clib_error_t *error;
+ u32 policer_index;
+
+ name = format (0, "%s", mp->name);
+
+ memset (&cfg, 0, sizeof (cfg));
+ cfg.rfc = mp->type;
+ cfg.rnd_type = mp->round_type;
+ cfg.rate_type = mp->rate_type;
+ cfg.rb.kbps.cir_kbps = mp->cir;
+ cfg.rb.kbps.eir_kbps = mp->eir;
+ cfg.rb.kbps.cb_bytes = mp->cb;
+ cfg.rb.kbps.eb_bytes = mp->eb;
+ cfg.conform_action.action_type = mp->conform_action_type;
+ cfg.conform_action.dscp = mp->conform_dscp;
+ cfg.exceed_action.action_type = mp->exceed_action_type;
+ cfg.exceed_action.dscp = mp->exceed_dscp;
+ cfg.violate_action.action_type = mp->violate_action_type;
+ cfg.violate_action.dscp = mp->violate_dscp;
+ cfg.color_aware = mp->color_aware;
+
+ error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add);
+
+ if (error)
+ rv = VNET_API_ERROR_UNSPECIFIED;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY,
+ ({
+ if (rv == 0 && mp->is_add)
+ rmp->policer_index = ntohl(policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+send_policer_details (u8 * name,
+ sse2_qos_pol_cfg_params_st * config,
+ policer_read_response_type_st * templ,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_policer_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_POLICER_DETAILS);
+ mp->context = context;
+ mp->cir = htonl (config->rb.kbps.cir_kbps);
+ mp->eir = htonl (config->rb.kbps.eir_kbps);
+ mp->cb = clib_host_to_net_u64 (config->rb.kbps.cb_bytes);
+ mp->eb = clib_host_to_net_u64 (config->rb.kbps.eb_bytes);
+ mp->rate_type = config->rate_type;
+ mp->round_type = config->rnd_type;
+ mp->type = config->rfc;
+ mp->conform_action_type = config->conform_action.action_type;
+ mp->conform_dscp = config->conform_action.dscp;
+ mp->exceed_action_type = config->exceed_action.action_type;
+ mp->exceed_dscp = config->exceed_action.dscp;
+ mp->violate_action_type = config->violate_action.action_type;
+ mp->violate_dscp = config->violate_action.dscp;
+ mp->single_rate = templ->single_rate ? 1 : 0;
+ mp->color_aware = templ->color_aware ? 1 : 0;
+ mp->scale = htonl (templ->scale);
+ mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period);
+ mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period);
+ mp->current_limit = htonl (templ->current_limit);
+ mp->current_bucket = htonl (templ->current_bucket);
+ mp->extended_limit = htonl (templ->extended_limit);
+ mp->extended_bucket = htonl (templ->extended_bucket);
+ mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time);
+
+ strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ hash_pair_t *hp;
+ uword *p;
+ u32 pool_index;
+ u8 *match_name = 0;
+ u8 *name;
+ sse2_qos_pol_cfg_params_st *config;
+ policer_read_response_type_st *templ;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ if (mp->match_name_valid)
+ {
+ match_name = format (0, "%s%c", mp->match_name, 0);
+ }
+
+ if (mp->match_name_valid)
+ {
+ p = hash_get_mem (pm->policer_config_by_name, match_name);
+ if (p)
+ {
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ templ = pool_elt_at_index (pm->policer_templates, pool_index);
+ send_policer_details (match_name, config, templ, q, mp->context);
+ }
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, pm->policer_config_by_name,
+ ({
+ name = (u8 *) hp->key;
+ pool_index = hp->value[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ templ = pool_elt_at_index (pm->policer_templates, pool_index);
+ send_policer_details(name, config, templ, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+/*
+ * policer_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_policer;
+#undef _
+}
+
+static clib_error_t *
+policer_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (policer_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c
new file mode 100644
index 00000000..af8bc5a9
--- /dev/null
+++ b/src/vnet/policer/xlate.c
@@ -0,0 +1,1501 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+#include <vnet/policer/xlate.h>
+#include <vnet/policer/police.h>
+
+#define INTERNAL_SS 1
+
+/* debugs */
+#define SSE2_QOS_DEBUG_ERROR(msg, args...) \
+ fformat(stderr, msg "\n", ##args);
+
+#define SSE2_QOS_DEBUG_INFO(msg, args...) \
+ fformat(stderr, msg "\n", ##args);
+
+
+#define SSE2_QOS_TR_ERR(TpParms...)
+// {
+// }
+
+#define SSE2_QOS_TR_INFO(TpParms...)
+
+#ifndef MIN
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y))?(x):(y))
+#endif
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_OFFSET 0
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_MASK 8
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_SHIFT 24
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_OFFSET 2
+#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_MASK 2
+#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_SHIFT 10
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_OFFSET 3
+#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_MASK 2
+#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_SHIFT 0
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_OFFSET 4
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_MASK 32
+#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_SHIFT 0
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_OFFSET 8
+#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_MASK 2
+#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_SHIFT 30
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_AN_OFFSET 8
+#define IPE_POLICER_FULL_WRITE_REQUEST_AN_MASK 1
+#define IPE_POLICER_FULL_WRITE_REQUEST_AN_SHIFT 29
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_OFFSET 8
+#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_MASK 4
+#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_SHIFT 22
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_OFFSET 9
+#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_MASK 11
+#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_SHIFT 11
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_OFFSET 10
+#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_MASK 11
+#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_SHIFT 0
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_OFFSET 12
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_MASK 5
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_SHIFT 27
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_OFFSET 12
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_MASK 7
+#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_SHIFT 20
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_OFFSET 13
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_MASK 5
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_SHIFT 15
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_OFFSET 14
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_MASK 7
+#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_SHIFT 8
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_CB_OFFSET 16
+#define IPE_POLICER_FULL_WRITE_REQUEST_CB_MASK 31
+#define IPE_POLICER_FULL_WRITE_REQUEST_CB_SHIFT 0
+
+#define IPE_POLICER_FULL_WRITE_REQUEST_EB_OFFSET 20
+#define IPE_POLICER_FULL_WRITE_REQUEST_EB_MASK 31
+#define IPE_POLICER_FULL_WRITE_REQUEST_EB_SHIFT 0
+
+#define IPE_RFC_RFC2697 0x00000000
+#define IPE_RFC_RFC2698 0x00000001
+#define IPE_RFC_RFC4115 0x00000002
+#define IPE_RFC_MEF5CF1 0x00000003
+
+/* End of constants copied from sse_ipe_desc_fmt.h */
+
+/* Misc Policer specific definitions */
+#define SSE2_QOS_POLICER_FIXED_PKT_SIZE 256
+
+// TODO check what can be provided by hw macro based on ASIC
+#define SSE2_QOS_POL_TICKS_PER_SEC 1000LL /* 1 tick = 1 ms */
+
+/*
+ * Default burst, in ms (byte format)
+ */
+#define SSE2_QOS_POL_DEF_BURST_BYTE 100
+
+/*
+ * Minimum burst needs to be such that the largest packet size is accomodated
+ */
+// Do we need to get it from some lib?
+#define SSE2_QOS_POL_MIN_BURST_BYTE 9*1024
+
+
+/*
+ * Flag to indicate if AN is employed or not
+ * 1 - TRUE, 0 - FALSE
+ */
+#define SSE2_QOS_POL_ALLOW_NEGATIVE 1
+
+// Various Macros to take care of policer calculations
+
+#define SSE2_QOS_POL_COMM_BKT_MAX \
+ (1<<IPE_POLICER_FULL_WRITE_REQUEST_CB_MASK)
+#define SSE2_QOS_POL_EXTD_BKT_MAX \
+ (1<<IPE_POLICER_FULL_WRITE_REQUEST_EB_MASK)
+
+#define SSE2_QOS_POL_RATE_EXP_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_REXP_MASK)
+#define SSE2_QOS_POL_RATE_EXP_MAX ((1<<SSE2_QOS_POL_RATE_EXP_SIZE) - 1)
+#define SSE2_QOS_POL_AVG_RATE_MANT_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_ARM_MASK)
+#define SSE2_QOS_POL_AVG_RATE_MANT_MAX \
+ ((1<< SSE2_QOS_POL_AVG_RATE_MANT_SIZE) - 1)
+#define SSE2_QOS_POL_AVG_RATE_MAX \
+ (SSE2_QOS_POL_AVG_RATE_MANT_MAX << \
+ SSE2_QOS_POL_RATE_EXP_MAX)
+
+#define SSE2_QOS_POL_PEAK_RATE_MANT_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_PRM_MASK)
+#define SSE2_QOS_POL_PEAK_RATE_MANT_MAX \
+ ((1<<SSE2_QOS_POL_PEAK_RATE_MANT_SIZE) - 1)
+#define SSE2_QOS_POL_PEAK_RATE_MAX \
+ (SSE2_QOS_POL_PEAK_RATE_MANT_MAX << \
+ SSE2_QOS_POL_RATE_EXP_MAX)
+
+#define SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_CBLM_MASK)
+#define SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX \
+ ((1<<SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_SIZE) - 1)
+#define SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_CBLE_MASK)
+#define SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX \
+ ((1<<SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_SIZE) - 1)
+#define SSE2_QOS_POL_COMM_BKT_LIMIT_MAX \
+ ((u64)SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX << \
+ (u64)SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX)
+
+#define SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_EBLM_MASK)
+#define SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX \
+ ((1<<SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_SIZE) - 1)
+#define SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_SIZE \
+ (IPE_POLICER_FULL_WRITE_REQUEST_EBLE_MASK)
+#define SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX \
+ ((1<<SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_SIZE) - 1)
+#define SSE2_QOS_POL_EXT_BKT_LIMIT_MAX \
+ ((u64)SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX << \
+ (u64)SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX)
+
+/*
+ * Rates determine the units of the bucket
+ * 256.114688 Gbps < Rate 8 byte units
+ * 128.057344 Gbps < Rate <= 256.114688 Gbps 4 byte units
+ * 64.028672 Gbps < Rate <= 128.057344 Gbps 2 byte units
+ * Rate <= 64.028672 Gbps 1 byte units
+ *
+ * The code uses bytes per tick as oppose to Gigabits per second.
+ */
+#define RATE256 (256114688000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC)
+#define RATE128 (128057344000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC)
+#define RATE64 ( 64028672000LL / 8LL / SSE2_QOS_POL_TICKS_PER_SEC)
+
+#define RATE_OVER256_UNIT 8LL
+#define RATE_128TO256_UNIT 4LL
+#define RATE_64TO128_UNIT 2LL
+
+static int
+sse2_qos_pol_round (u64 numerator,
+ u64 denominator,
+ u64 * rounded_value, sse2_qos_round_type_en round_type)
+{
+ int rc = 0;
+
+ if (denominator == 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Illegal denominator");
+ SSE2_QOS_TR_ERR (SSE2_QOSRM_TP_ERR_59);
+ return (EINVAL);
+ }
+
+ switch (round_type)
+ {
+ case SSE2_QOS_ROUND_TO_CLOSEST:
+ *rounded_value = ((numerator + (denominator >> 1)) / denominator);
+ break;
+
+ case SSE2_QOS_ROUND_TO_UP:
+ *rounded_value = (numerator / denominator);
+ if ((*rounded_value * denominator) < numerator)
+ {
+ *rounded_value += 1;
+ }
+ break;
+
+ case SSE2_QOS_ROUND_TO_DOWN:
+ *rounded_value = (numerator / denominator);
+ break;
+
+ case SSE2_QOS_ROUND_INVALID:
+ default:
+ SSE2_QOS_DEBUG_ERROR ("Illegal round type");
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_60, round_type);
+ rc = EINVAL;
+ break;
+ }
+ return (rc);
+}
+
+
+static int
+sse2_pol_validate_cfg_params (sse2_qos_pol_cfg_params_st * cfg)
+{
+ u64 numer, denom, rnd_value;
+ u32 cir_hw, eir_hw;
+ int rc = 0;
+
+ if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) &&
+ (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps))
+ {
+ SSE2_QOS_DEBUG_ERROR ("CIR (%u kbps) is greater than PIR (%u kbps)",
+ cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_39, cfg->rb.kbps.cir_kbps,
+ cfg->rb.kbps.eir_kbps);
+ return (EINVAL);
+ }
+
+ /*
+ * convert rates to bytes-per-tick
+ */
+ numer = (u64) (cfg->rb.kbps.cir_kbps);
+ denom = (u64) (8 * SSE2_QOS_POL_TICKS_PER_SEC) / 1000;
+ rc = sse2_qos_pol_round (numer, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert CIR to bytes/tick format");
+ // Error traced
+ return (rc);
+ }
+ cir_hw = (u32) rnd_value;
+
+ numer = (u64) (cfg->rb.kbps.eir_kbps);
+ rc = sse2_qos_pol_round (numer, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert EIR to bytes/tick format");
+ // Error traced
+ return (rc);
+ }
+ eir_hw = (u32) rnd_value;
+
+ if (cir_hw > SSE2_QOS_POL_AVG_RATE_MAX)
+ {
+ SSE2_QOS_DEBUG_ERROR ("hw cir (%u bytes/tick) is greater than the "
+ "max supported value (%u)", cir_hw,
+ SSE2_QOS_POL_AVG_RATE_MAX);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_84, cir_hw, SSE2_QOS_POL_AVG_RATE_MAX);
+ return (EINVAL);
+ }
+
+ if (eir_hw > SSE2_QOS_POL_PEAK_RATE_MAX)
+ {
+ SSE2_QOS_DEBUG_ERROR ("hw eir (%u bytes/tick) is greater than the "
+ "max supported value (%u). Capping it to the max. "
+ "supported value", eir_hw,
+ SSE2_QOS_POL_PEAK_RATE_MAX);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_85, eir_hw,
+ SSE2_QOS_POL_PEAK_RATE_MAX);
+ return (EINVAL);
+ }
+ /*
+ * CIR = 0, with bc != 0 is not allowed
+ */
+ if ((cfg->rb.kbps.cir_kbps == 0) && cfg->rb.kbps.cb_bytes)
+ {
+ SSE2_QOS_DEBUG_ERROR ("CIR = 0 with bc != 0");
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_55);
+ return (EINVAL);
+ }
+
+ if ((cfg->rb.kbps.eir_kbps == 0) &&
+ (cfg->rfc > SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697))
+ {
+ SSE2_QOS_DEBUG_ERROR ("EIR = 0 for a 2R3C policer (rfc: %u)", cfg->rfc);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc);
+ return (EINVAL);
+ }
+
+ if (cfg->rb.kbps.eir_kbps &&
+ (cfg->rfc < SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698))
+ {
+ SSE2_QOS_DEBUG_ERROR ("EIR: %u kbps for a 1-rate policer (rfc: %u)",
+ cfg->rb.kbps.eir_kbps, cfg->rfc);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc);
+ return (EINVAL);
+ }
+
+ if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes)
+ {
+ SSE2_QOS_DEBUG_ERROR ("For a 1R1B policer, EB burst cannot be > 0");
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_56);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static void
+sse2_qos_convert_value_to_exp_mant_fmt (u64 value,
+ u16 max_exp_value,
+ u16 max_mant_value,
+ sse2_qos_round_type_en type,
+ u8 * exp, u32 * mant)
+{
+ u64 rnd_value;
+ u64 temp_mant;
+ u8 temp_exp;
+
+ /*
+ * Select the lowest possible exp, and the largest possible mant
+ */
+ temp_exp = 0;
+ temp_mant = value;
+ while (temp_exp <= max_exp_value)
+ {
+ if (temp_mant <= max_mant_value)
+ {
+ break;
+ }
+
+ temp_exp++;
+ rnd_value = 0;
+ (void) sse2_qos_pol_round ((u64) value, (u64) (1 << temp_exp),
+ &rnd_value, type);
+ temp_mant = rnd_value;
+ }
+
+ if (temp_exp > max_exp_value)
+ {
+ /*
+ * CAP mant to its max value, and decrement exp
+ */
+ temp_exp--;
+ temp_mant = max_mant_value;
+ }
+
+ *exp = temp_exp;
+ *mant = (u32) temp_mant;
+
+ SSE2_QOS_DEBUG_INFO ("value: 0x%llx, mant: %u, exp: %u", value, *mant,
+ *exp);
+ return;
+}
+
+static int
+sse2_pol_convert_cfg_rates_to_hw (sse2_qos_pol_cfg_params_st * cfg,
+ sse2_qos_pol_hw_params_st * hw)
+{
+ int rc = 0;
+ u32 cir_hw, eir_hw, hi_mant, hi_rate, cir_rnded, eir_rnded, eir_kbps;
+ u64 numer, denom, rnd_value;
+ u8 exp;
+
+ /*
+ * convert rates to bytes-per-tick (tick is 1ms)
+ * For rate conversion, the denominator is gonna be the same
+ */
+ denom = (u64) ((SSE2_QOS_POL_TICKS_PER_SEC * 8) / 1000);
+ numer = (u64) (cfg->rb.kbps.cir_kbps);
+ rc = sse2_qos_pol_round (numer, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR
+ ("Rounding error, rate: %d kbps, rounding_type: %d",
+ cfg->rb.kbps.cir_kbps, cfg->rnd_type);
+ // Error is traced
+ return (rc);
+ }
+ cir_hw = (u32) rnd_value;
+
+ if (cfg->rb.kbps.cir_kbps && (cir_hw == 0))
+ {
+ /*
+ * After rounding, cir_hw = 0. Bump it up
+ */
+ cir_hw = 1;
+ }
+
+ if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C)
+ {
+ eir_kbps = 0;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ {
+ eir_kbps = cfg->rb.kbps.cir_kbps;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)
+ {
+ eir_kbps = cfg->rb.kbps.eir_kbps - cfg->rb.kbps.cir_kbps;
+ }
+ else
+ {
+ eir_kbps = cfg->rb.kbps.eir_kbps;
+ }
+
+ numer = (u64) eir_kbps;
+ rc = sse2_qos_pol_round (numer, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR
+ ("Rounding error, rate: %d kbps, rounding_type: %d", eir_kbps,
+ cfg->rnd_type);
+ // Error is traced
+ return (rc);
+ }
+ eir_hw = (u32) rnd_value;
+
+ if (eir_kbps && (eir_hw == 0))
+ {
+ /*
+ * After rounding, eir_hw = 0. Bump it up
+ */
+ eir_hw = 1;
+ }
+
+ SSE2_QOS_DEBUG_INFO ("cir_hw: %u bytes/tick, eir_hw: %u bytes/tick", cir_hw,
+ eir_hw);
+
+ if (cir_hw > eir_hw)
+ {
+ hi_rate = cir_hw;
+ }
+ else
+ {
+ hi_rate = eir_hw;
+ }
+
+ if ((cir_hw == 0) && (eir_hw == 0))
+ {
+ /*
+ * Both the rates are 0. Use exp = 15, and set the RFC to 4115. Also
+ * set AN = 0
+ */
+ exp = (u8) SSE2_QOS_POL_RATE_EXP_MAX;
+ hi_mant = 0;
+ hw->rfc = IPE_RFC_RFC4115;
+ hw->allow_negative = 0;
+ }
+ else
+ {
+ sse2_qos_convert_value_to_exp_mant_fmt (hi_rate,
+ (u16) SSE2_QOS_POL_RATE_EXP_MAX,
+ (u16)
+ SSE2_QOS_POL_AVG_RATE_MANT_MAX,
+ (sse2_qos_round_type_en)
+ cfg->rnd_type, &exp, &hi_mant);
+ }
+
+ denom = (1ULL << exp);
+ if (hi_rate == eir_hw)
+ {
+ hw->peak_rate_man = (u16) hi_mant;
+ rc = sse2_qos_pol_round ((u64) cir_hw, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ hw->avg_rate_man = (u16) rnd_value;
+ }
+ else
+ {
+ hw->avg_rate_man = (u16) hi_mant;
+ rc = sse2_qos_pol_round ((u64) eir_hw, denom, &rnd_value,
+ (sse2_qos_round_type_en) cfg->rnd_type);
+ hw->peak_rate_man = (u16) rnd_value;
+ }
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Rounding error");
+ // Error is traced
+ return (rc);
+ }
+ hw->rate_exp = exp;
+
+ if ((hw->avg_rate_man == 0) && (cfg->rb.kbps.cir_kbps))
+ {
+ /*
+ * cir was reduced to 0 during rounding. Bump it up
+ */
+ hw->avg_rate_man = 1;
+ SSE2_QOS_DEBUG_INFO ("CIR = 0 during rounding. Bump it up to %u "
+ "bytes/tick", (hw->avg_rate_man << hw->rate_exp));
+ }
+
+ if ((hw->peak_rate_man == 0) && eir_kbps)
+ {
+ /*
+ * eir was reduced to 0 during rounding. Bump it up
+ */
+ hw->peak_rate_man = 1;
+ SSE2_QOS_DEBUG_INFO ("EIR = 0 during rounding. Bump it up to %u "
+ "bytes/tick", (hw->peak_rate_man << hw->rate_exp));
+ }
+
+ cir_rnded = (hw->avg_rate_man << hw->rate_exp);
+ eir_rnded = (hw->peak_rate_man << hw->rate_exp);
+
+ SSE2_QOS_DEBUG_INFO ("Configured(rounded) values, cir: %u "
+ "kbps (mant: %u, exp: %u, rate: %u bytes/tick)",
+ cfg->rb.kbps.cir_kbps, hw->avg_rate_man,
+ hw->rate_exp, cir_rnded);
+
+ SSE2_QOS_DEBUG_INFO ("Configured(rounded) values, eir: %u "
+ "kbps (mant: %u, exp: %u, rate: %u bytes/tick)",
+ cfg->rb.kbps.eir_kbps, hw->peak_rate_man,
+ hw->rate_exp, eir_rnded);
+
+ return (rc);
+}
+
+/*****
+ * NAME
+ * sse2_pol_get_bkt_max
+ *
+ * PARAMETERS
+ * rate_hw - either the averate rate or peak rate
+ * bkt_max - bit width in the current bucket or extended bucket
+ *
+ * RETURNS
+ * u64 - maximum token bytes for the current or extended bucket
+ *
+ * DESCRIPTION
+ * The current bucket or extended bucket fields are in units of either
+ * 1,2,4,8 bytes based on the average or peak rate respective to current
+ * or extended bucket.
+ *
+ * To get the actual maximum number of bytes that can be stored in the
+ * field, the value must be multiplied by the units of either 1,2,4,8
+ * bytes based on the rate.
+ *****/
+u64
+sse2_pol_get_bkt_max (u64 rate_hw, u64 bkt_max)
+{
+ if (rate_hw <= RATE64)
+ {
+ return (bkt_max - 1);
+ }
+ else if (rate_hw <= RATE128)
+ {
+ return ((bkt_max * RATE_64TO128_UNIT) - RATE_64TO128_UNIT);
+ }
+ else if (rate_hw <= RATE256)
+ {
+ return ((bkt_max * RATE_128TO256_UNIT) - RATE_128TO256_UNIT);
+ }
+ /* rate must be over 256 */
+ return ((bkt_max * RATE_OVER256_UNIT) - RATE_OVER256_UNIT);
+}
+
+/*****
+ * NAME
+ * sse2_pol_get_bkt_value
+ *
+ * PARAMETERS
+ * rate_hw - either the averate rate or peak rate
+ * byte_value - bytes for this token bucket
+ *
+ * RETURNS
+ * u64 - unit value for the current or extended bucket field
+ *
+ * DESCRIPTION
+ * The current bucket or extended bucket fields are in units of either
+ * 1,2,4,8 bytes based on the average or peak rate respective to current
+ * or extended bucket.
+ *
+ * To get the units that can be stored in the field, the byte value must
+ * be divided by the units of either 1,2,4,8 bytes based on the rate.
+ *****/
+u64
+sse2_pol_get_bkt_value (u64 rate_hw, u64 byte_value)
+{
+ if (rate_hw <= RATE64)
+ {
+ return (byte_value);
+ }
+ else if (rate_hw <= RATE128)
+ {
+ return (byte_value / RATE_64TO128_UNIT);
+ }
+ else if (rate_hw <= RATE256)
+ {
+ return (byte_value / RATE_128TO256_UNIT);
+ }
+ /* rate must be over 256 */
+ return (byte_value / RATE_OVER256_UNIT);
+}
+
+static void
+sse2_pol_rnd_burst_byte_fmt (u64 cfg_burst,
+ u16 max_exp_value,
+ u16 max_mant_value,
+ u32 max_bkt_value,
+ u32 rate_hw,
+ u8 * exp, u32 * mant, u32 * bkt_value)
+{
+ u64 bkt_max = max_bkt_value;
+ u64 bkt_limit_max;
+ u64 rnd_burst;
+ u64 temp_bkt_value;
+
+ bkt_limit_max = ((u64) max_mant_value << (u64) max_exp_value);
+ bkt_max = sse2_pol_get_bkt_max (rate_hw, bkt_max);
+ bkt_max = MIN (bkt_max, bkt_limit_max);
+ if (!cfg_burst)
+ {
+ /*
+ * If configured burst = 0, compute the burst to be 100ms at a given
+ * rate. Note that for rate_hw = 0, exp = mant = 0.
+ */
+ cfg_burst = (u64) rate_hw *(u64) SSE2_QOS_POL_DEF_BURST_BYTE;
+ }
+
+ if (cfg_burst > bkt_max)
+ {
+ SSE2_QOS_DEBUG_ERROR ("burst 0x%llx bytes is greater than the max. "
+ "supported value 0x%llx bytes. Capping it to the "
+ "max", cfg_burst, bkt_max);
+ SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_38,
+ (uint) cfg_burst, (uint) bkt_max);
+ cfg_burst = bkt_max;
+ }
+
+ if (cfg_burst < SSE2_QOS_POL_MIN_BURST_BYTE)
+ {
+ /*
+ * Bump up the burst value ONLY if the cfg_burst is non-zero AND
+ * less than the min. supported value
+ */
+ SSE2_QOS_DEBUG_INFO ("burst 0x%llx bytes is less than the min "
+ "supported value %u bytes. Rounding it up to "
+ "the min", cfg_burst, SSE2_QOS_POL_MIN_BURST_BYTE);
+ SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_39, (uint) cfg_burst,
+ SSE2_QOS_POL_MIN_BURST_BYTE);
+ cfg_burst = SSE2_QOS_POL_MIN_BURST_BYTE;
+ }
+
+ sse2_qos_convert_value_to_exp_mant_fmt (cfg_burst,
+ max_exp_value,
+ max_mant_value,
+ SSE2_QOS_ROUND_TO_DOWN, exp, mant);
+
+ /* Bucket value is based on rate. */
+ rnd_burst = ((u64) (*mant) << (u64) (*exp));
+ temp_bkt_value = sse2_pol_get_bkt_value (rate_hw, rnd_burst);
+ *bkt_value = (u32) temp_bkt_value;
+}
+
+static int
+sse2_pol_convert_cfg_burst_to_hw (sse2_qos_pol_cfg_params_st * cfg,
+ sse2_qos_pol_hw_params_st * hw)
+{
+ u8 temp_exp;
+ u32 temp_mant, rate_hw;
+ u64 eb_bytes;
+ u32 bkt_value;
+
+ /*
+ * compute Committed Burst
+ */
+ SSE2_QOS_DEBUG_INFO ("Compute commit burst ...");
+ rate_hw = (hw->avg_rate_man) << (hw->rate_exp);
+ sse2_pol_rnd_burst_byte_fmt (cfg->rb.kbps.cb_bytes,
+ (u16) SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX,
+ (u16) SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX,
+ (u32) SSE2_QOS_POL_COMM_BKT_MAX,
+ rate_hw, &temp_exp, &temp_mant, &bkt_value);
+ SSE2_QOS_DEBUG_INFO ("Committed burst, burst_limit: 0x%llx mant : %u, "
+ "exp: %u, rnded: 0x%llx cb:%u bytes",
+ cfg->rb.kbps.cb_bytes, temp_mant, temp_exp,
+ ((u64) temp_mant << (u64) temp_exp), bkt_value);
+
+ hw->comm_bkt_limit_exp = temp_exp;
+ hw->comm_bkt_limit_man = (u8) temp_mant;
+ hw->comm_bkt = bkt_value;
+
+ /*
+ * compute Exceed Burst
+ */
+ SSE2_QOS_DEBUG_INFO ("Compute exceed burst ...");
+
+ if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C)
+ {
+ /*
+ * For 1R2C, hw uses 2R3C (RFC-4115). As such, the Exceed Bucket
+ * params are set to 0. Recommendation is to use EB_exp = max_exp (=15)
+ * and EB_mant = 0
+ */
+ hw->extd_bkt_limit_exp = (u8) SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX;
+ hw->extd_bkt_limit_man = 0;
+ SSE2_QOS_DEBUG_INFO ("Excess burst, burst: 0x%llx mant: %u, "
+ "exp: %u, rnded: 0x%llx bytes",
+ cfg->rb.kbps.eb_bytes, hw->extd_bkt_limit_man,
+ hw->extd_bkt_limit_exp,
+ ((u64) hw->extd_bkt_limit_man <<
+ (u64) hw->extd_bkt_limit_exp));
+ SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_20, (uint) cfg->rb.kbps.eb_bytes,
+ hw->extd_bkt_limit_man, hw->extd_bkt_limit_exp);
+ return (0);
+ }
+
+ if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ {
+ eb_bytes = cfg->rb.kbps.cb_bytes + cfg->rb.kbps.eb_bytes;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)
+ {
+ eb_bytes = cfg->rb.kbps.eb_bytes - cfg->rb.kbps.cb_bytes;
+ }
+ else
+ {
+ eb_bytes = cfg->rb.kbps.eb_bytes;
+ }
+
+ rate_hw = (hw->peak_rate_man) << (hw->rate_exp);
+ sse2_pol_rnd_burst_byte_fmt (eb_bytes,
+ (u16) SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX,
+ (u16) SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX,
+ (u32) SSE2_QOS_POL_EXTD_BKT_MAX,
+ rate_hw, &temp_exp, &temp_mant, &bkt_value);
+
+ SSE2_QOS_DEBUG_INFO ("Excess burst, burst_limit: 0x%llx mant: %u, "
+ "exp: %u, rnded: 0x%llx eb:%u bytes",
+ cfg->rb.kbps.eb_bytes, temp_mant, temp_exp,
+ ((u64) temp_mant << (u64) temp_exp), bkt_value);
+
+ hw->extd_bkt_limit_exp = (u8) temp_exp;
+ hw->extd_bkt_limit_man = (u8) temp_mant;
+ hw->extd_bkt = bkt_value;
+
+ return (0);
+}
+
+
+/*
+ * Input: configured parameter values in 'cfg'.
+ * Output: h/w programmable parameter values in 'hw'.
+ * Return: success or failure code.
+ */
+static int
+sse2_pol_convert_cfg_to_hw_params (sse2_qos_pol_cfg_params_st * cfg,
+ sse2_qos_pol_hw_params_st * hw)
+{
+ int rc = 0;
+
+ /*
+ * clear the hw_params
+ */
+ memset (hw, 0, sizeof (sse2_qos_pol_hw_params_st));
+
+ hw->allow_negative = SSE2_QOS_POL_ALLOW_NEGATIVE;
+
+ if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) ||
+ (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115))
+ {
+ hw->rfc = IPE_RFC_RFC4115;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ {
+ hw->rfc = IPE_RFC_RFC2697;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698)
+ {
+ hw->rfc = IPE_RFC_RFC2698;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1)
+ {
+ hw->rfc = IPE_RFC_MEF5CF1;
+ }
+ else
+ {
+ SSE2_QOS_DEBUG_ERROR ("Invalid RFC type %d\n", cfg->rfc);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_61, cfg->rfc);
+ return (EINVAL);
+ }
+
+ rc = sse2_pol_convert_cfg_rates_to_hw (cfg, hw);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert config rates to hw. Error: %d",
+ rc);
+ // Error is traced
+ return (rc);
+ }
+
+ rc = sse2_pol_convert_cfg_burst_to_hw (cfg, hw);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert config burst to hw. Error: %d",
+ rc);
+ // Error is traced
+ return (rc);
+ }
+
+ return 0;
+}
+
+
+u32
+sse2_qos_convert_pps_to_kbps (u32 rate_pps)
+{
+ // sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT,
+ // SSE2_QOS_SHIP_CNT_POL_CONV_PPS_TO_KBPS);
+
+ u64 numer, rnd_value = 0;
+
+ numer = (u64) ((u64) rate_pps *
+ (u64) SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL);
+ (void) sse2_qos_pol_round (numer, 1000LL, &rnd_value,
+ SSE2_QOS_ROUND_TO_CLOSEST);
+
+ return ((u32) rnd_value);
+}
+
+u32
+sse2_qos_convert_burst_ms_to_bytes (u32 burst_ms, u32 rate_kbps)
+{
+ u64 numer, rnd_value = 0;
+
+ //sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT,
+ // SSE2_QOS_SHIP_CNT_POL_CONV_BURST_MS_TO_BYTES);
+
+ numer = (u64) ((u64) burst_ms * (u64) rate_kbps);
+
+ (void) sse2_qos_pol_round (numer, 8LL, &rnd_value,
+ SSE2_QOS_ROUND_TO_CLOSEST);
+
+ return ((u32) rnd_value);
+}
+
+
+/*
+ * Input: configured parameters in 'cfg'.
+ * Output: h/w parameters are returned in 'hw',
+ * Return: Status, success or failure code.
+ */
+int
+sse2_pol_compute_hw_params (sse2_qos_pol_cfg_params_st * cfg,
+ sse2_qos_pol_hw_params_st * hw)
+{
+ int rc = 0;
+
+ if (!cfg || !hw)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Illegal parameters");
+ return (-1);
+ }
+
+ /*
+ * Validate the police config params being presented to RM
+ */
+ rc = sse2_pol_validate_cfg_params (cfg);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Config parameter validation failed. Error: %d",
+ rc);
+ // Error is traced
+ return (-1);
+ }
+
+ /*
+ * first round configured values to h/w supported values. This func
+ * also determines whether 'tick' or 'byte' format
+ */
+ rc = sse2_pol_convert_cfg_to_hw_params (cfg, hw);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert config params to hw params. "
+ "Error: %d", rc);
+ SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_53, rc);
+ return (-1);
+ }
+
+ return 0;
+}
+
+
+#if defined (INTERNAL_SS) || defined (X86)
+
+// For initializing the x86 policer format
+
+/*
+ * Return the number of hardware TSC timer ticks per second for the dataplane.
+ * This is approximately, but not exactly, the clock speed.
+ */
+static u64
+get_tsc_hz (void)
+{
+ f64 cpu_freq;
+
+ cpu_freq = os_cpu_clock_frequency ();
+ return (u64) cpu_freq;
+}
+
+/*
+ * Convert rates into bytes_per_period and scale.
+ * Return 0 if ok or 1 if error.
+ */
+static int
+compute_policer_params (u64 hz, // CPU speed in clocks per second
+ u64 cir_rate, // in bytes per second
+ u64 pir_rate, // in bytes per second
+ u32 * current_limit, // in bytes, output may scale the input
+ u32 * extended_limit, // in bytes, output may scale the input
+ u32 * cir_bytes_per_period,
+ u32 * pir_bytes_per_period, u32 * scale)
+{
+ double period;
+ double internal_cir_bytes_per_period;
+ double internal_pir_bytes_per_period;
+ u32 max;
+ u32 scale_shift;
+ u32 scale_amount;
+ u32 __attribute__ ((unused)) orig_current_limit = *current_limit;
+
+ // Compute period. For 1Ghz-to-8Ghz CPUs, the period will be in
+ // the range of 16 to 116 usec.
+ period = ((double) hz) / ((double) POLICER_TICKS_PER_PERIOD);
+
+ // Determine bytes per period for each rate
+ internal_cir_bytes_per_period = (double) cir_rate / period;
+ internal_pir_bytes_per_period = (double) pir_rate / period;
+
+ // Scale if possible. Scaling helps rate accuracy, but is contrained
+ // by the scaled rates and limits fitting in 32-bits.
+ // In addition, we need to insure the scaled rate is no larger than
+ // 2^22 tokens per period. This allows the dataplane to ignore overflow
+ // in the tokens-per-period multiplication since it could only
+ // happen if the policer were idle for more than a year.
+ // This is not really a constraint because 100Gbps at 1Ghz is only
+ // 1.6M tokens per period.
+#define MAX_RATE_SHIFT 10
+ max = MAX (*current_limit, *extended_limit);
+ max = MAX (max, (u32) internal_cir_bytes_per_period << MAX_RATE_SHIFT);
+ max = MAX (max, (u32) internal_pir_bytes_per_period << MAX_RATE_SHIFT);
+ scale_shift = __builtin_clz (max);
+
+ scale_amount = 1 << scale_shift;
+ *scale = scale_shift;
+
+ // Scale the limits
+ *current_limit = *current_limit << scale_shift;
+ *extended_limit = *extended_limit << scale_shift;
+
+ // Scale the rates
+ internal_cir_bytes_per_period =
+ internal_cir_bytes_per_period * ((double) scale_amount);
+ internal_pir_bytes_per_period =
+ internal_pir_bytes_per_period * ((double) scale_amount);
+
+ // Make sure the new rates are reasonable
+ // Only needed for very low rates with large bursts
+ if (internal_cir_bytes_per_period < 1.0)
+ {
+ internal_cir_bytes_per_period = 1.0;
+ }
+ if (internal_pir_bytes_per_period < 1.0)
+ {
+ internal_pir_bytes_per_period = 1.0;
+ }
+
+ *cir_bytes_per_period = (u32) internal_cir_bytes_per_period;
+ *pir_bytes_per_period = (u32) internal_pir_bytes_per_period;
+
+// #define PRINT_X86_POLICE_PARAMS
+#ifdef PRINT_X86_POLICE_PARAMS
+ {
+ u64 effective_BPS;
+
+ // This value actually slightly conservative because it doesn't take into account
+ // the partial period at the end of a second. This really matters only for very low
+ // rates.
+ effective_BPS =
+ (((u64) (*cir_bytes_per_period * (u64) period)) >> *scale);
+
+ printf ("hz=%llu, cir_rate=%llu, limit=%u => "
+ "periods-per-sec=%d usec-per-period=%d => "
+ "scale=%d cir_BPP=%u, scaled_limit=%u => "
+ "effective BPS=%llu, accuracy=%f\n",
+ // input values
+ (unsigned long long) hz,
+ (unsigned long long) cir_rate, orig_current_limit,
+ // computed values
+ (u32) (period), // periods per second
+ (u32) (1000.0 * 1000.0 / period), // in usec
+ *scale, *cir_bytes_per_period, *current_limit,
+ // accuracy
+ (unsigned long long) effective_BPS,
+ (double) cir_rate / (double) effective_BPS);
+ }
+#endif
+
+ return 0; // ok
+}
+
+
+/*
+ * Input: configured parameters in 'cfg'.
+ * Output: h/w parameters are returned in 'hw',
+ * Return: Status, success or failure code.
+ */
+int
+x86_pol_compute_hw_params (sse2_qos_pol_cfg_params_st * cfg,
+ policer_read_response_type_st * hw)
+{
+ const int BYTES_PER_KBIT = (1000 / 8);
+ u64 hz;
+ u32 cap;
+
+ if (!cfg || !hw)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Illegal parameters");
+ return (-1);
+ }
+
+ hz = get_tsc_hz ();
+ hw->last_update_time = 0;
+
+ // Cap the bursts to 32-bits. This allows up to almost one second of
+ // burst on a 40GE interface, which should be fine for x86.
+ cap =
+ (cfg->rb.kbps.cb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.cb_bytes;
+ hw->current_limit = cap;
+ cap =
+ (cfg->rb.kbps.eb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.eb_bytes;
+ hw->extended_limit = cap;
+
+ if ((cfg->rb.kbps.cir_kbps == 0) && (cfg->rb.kbps.cb_bytes == 0)
+ && (cfg->rb.kbps.eb_bytes == 0))
+ {
+ // This is a uninitialized, always-violate policer
+ hw->single_rate = 1;
+ hw->cir_tokens_per_period = 0;
+ return 0;
+ }
+
+ if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) ||
+ (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697))
+ {
+ // Single-rate policer
+
+ hw->single_rate = 1;
+
+ if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes)
+ {
+ SSE2_QOS_DEBUG_ERROR
+ ("Policer parameter validation failed -- 1R2C.");
+ return (-1);
+ }
+
+ if ((cfg->rb.kbps.cir_kbps == 0) ||
+ (cfg->rb.kbps.eir_kbps != 0) ||
+ ((cfg->rb.kbps.cb_bytes == 0) && (cfg->rb.kbps.eb_bytes == 0)))
+ {
+ SSE2_QOS_DEBUG_ERROR ("Policer parameter validation failed -- 1R.");
+ return (-1);
+ }
+
+ if (compute_policer_params (hz,
+ (u64) cfg->rb.kbps.cir_kbps *
+ BYTES_PER_KBIT, 0, &hw->current_limit,
+ &hw->extended_limit,
+ &hw->cir_tokens_per_period,
+ &hw->pir_tokens_per_period, &hw->scale))
+ {
+ SSE2_QOS_DEBUG_ERROR ("Policer parameter computation failed.");
+ return (-1);
+ }
+
+ }
+ else if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) ||
+ (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115))
+ {
+ // Two-rate policer
+
+ if ((cfg->rb.kbps.cir_kbps == 0) || (cfg->rb.kbps.eir_kbps == 0)
+ || (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps)
+ || (cfg->rb.kbps.cb_bytes == 0) || (cfg->rb.kbps.eb_bytes == 0))
+ {
+ SSE2_QOS_DEBUG_ERROR ("Config parameter validation failed.");
+ return (-1);
+ }
+
+ if (compute_policer_params (hz,
+ (u64) cfg->rb.kbps.cir_kbps *
+ BYTES_PER_KBIT,
+ (u64) cfg->rb.kbps.eir_kbps *
+ BYTES_PER_KBIT, &hw->current_limit,
+ &hw->extended_limit,
+ &hw->cir_tokens_per_period,
+ &hw->pir_tokens_per_period, &hw->scale))
+ {
+ SSE2_QOS_DEBUG_ERROR ("Policer parameter computation failed.");
+ return (-1);
+ }
+
+ }
+ else
+ {
+ SSE2_QOS_DEBUG_ERROR
+ ("Config parameter validation failed. RFC not supported");
+ return (-1);
+ }
+
+ hw->current_bucket = hw->current_limit;
+ hw->extended_bucket = hw->extended_limit;
+
+ return 0;
+}
+#endif
+
+
+/*
+ * Input: configured parameters in 'cfg'.
+ * Output: physical structure is returned in 'phys',
+ * Return: Status, success or failure code.
+ */
+int
+sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st * cfg,
+ policer_read_response_type_st * phys)
+{
+ int rc;
+ sse2_qos_pol_cfg_params_st kbps_cfg;
+
+ memset (phys, 0, sizeof (policer_read_response_type_st));
+ memset (&kbps_cfg, 0, sizeof (sse2_qos_pol_cfg_params_st));
+
+ if (!cfg)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Illegal parameters");
+ return (-1);
+ }
+
+ switch (cfg->rate_type)
+ {
+ case SSE2_QOS_RATE_KBPS:
+ /* copy all the data into kbps_cfg */
+ kbps_cfg.rb.kbps.cir_kbps = cfg->rb.kbps.cir_kbps;
+ kbps_cfg.rb.kbps.eir_kbps = cfg->rb.kbps.eir_kbps;
+ kbps_cfg.rb.kbps.cb_bytes = cfg->rb.kbps.cb_bytes;
+ kbps_cfg.rb.kbps.eb_bytes = cfg->rb.kbps.eb_bytes;
+ break;
+ case SSE2_QOS_RATE_PPS:
+ kbps_cfg.rb.kbps.cir_kbps =
+ sse2_qos_convert_pps_to_kbps (cfg->rb.pps.cir_pps);
+ kbps_cfg.rb.kbps.eir_kbps =
+ sse2_qos_convert_pps_to_kbps (cfg->rb.pps.eir_pps);
+ kbps_cfg.rb.kbps.cb_bytes = sse2_qos_convert_burst_ms_to_bytes ((u32)
+ cfg->
+ rb.pps.cb_ms,
+ kbps_cfg.rb.
+ kbps.cir_kbps);
+ kbps_cfg.rb.kbps.eb_bytes =
+ sse2_qos_convert_burst_ms_to_bytes ((u32) cfg->rb.pps.eb_ms,
+ kbps_cfg.rb.kbps.eir_kbps);
+ break;
+ default:
+ SSE2_QOS_DEBUG_ERROR ("Illegal rate type");
+ return (-1);
+ }
+
+ /* rate type is now converted to kbps */
+ kbps_cfg.rate_type = SSE2_QOS_RATE_KBPS;
+ kbps_cfg.rnd_type = cfg->rnd_type;
+ kbps_cfg.rfc = cfg->rfc;
+
+ phys->action[POLICE_CONFORM] = cfg->conform_action.action_type;
+ phys->mark_dscp[POLICE_CONFORM] = cfg->conform_action.dscp;
+ phys->action[POLICE_EXCEED] = cfg->exceed_action.action_type;
+ phys->mark_dscp[POLICE_EXCEED] = cfg->exceed_action.dscp;
+ phys->action[POLICE_VIOLATE] = cfg->violate_action.action_type;
+ phys->mark_dscp[POLICE_VIOLATE] = cfg->violate_action.dscp;
+
+ phys->color_aware = cfg->color_aware;
+
+#if !defined (INTERNAL_SS) && !defined (X86)
+ // convert logical into hw params which involves qos calculations
+ rc = sse2_pol_compute_hw_params (&kbps_cfg, &pol_hw);
+ if (rc == -1)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to compute hw param. Error: %d", rc);
+ return (rc);
+ }
+
+ // convert hw params into the physical
+ phys->rfc = pol_hw.rfc;
+ phys->an = pol_hw.allow_negative;
+ phys->rexp = pol_hw.rate_exp;
+ phys->arm = pol_hw.avg_rate_man;
+ phys->prm = pol_hw.peak_rate_man;
+ phys->cble = pol_hw.comm_bkt_limit_exp;
+ phys->cblm = pol_hw.comm_bkt_limit_man;
+ phys->eble = pol_hw.extd_bkt_limit_exp;
+ phys->eblm = pol_hw.extd_bkt_limit_man;
+ phys->cb = pol_hw.comm_bkt;
+ phys->eb = pol_hw.extd_bkt;
+
+ /* for debugging purposes, the bucket token values can be overwritten */
+ if (cfg->overwrite_bucket)
+ {
+ phys->cb = cfg->current_bucket;
+ phys->eb = cfg->extended_bucket;
+ }
+#else
+ // convert logical into hw params which involves qos calculations
+ rc = x86_pol_compute_hw_params (&kbps_cfg, phys);
+ if (rc == -1)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to compute hw param. Error: %d", rc);
+ return (rc);
+ }
+
+ /* for debugging purposes, the bucket token values can be overwritten */
+ if (cfg->overwrite_bucket)
+ {
+ phys->current_bucket = cfg->current_bucket;
+ phys->extended_bucket = cfg->extended_bucket;
+ }
+
+#endif // if !defined (INTERNAL_SS) && !defined (X86)
+
+ return 0;
+}
+
+
+static void
+sse2_qos_convert_pol_bucket_to_hw_fmt (policer_read_response_type_st * bkt,
+ sse2_qos_pol_hw_params_st * hw_fmt)
+{
+ memset (hw_fmt, 0, sizeof (sse2_qos_pol_hw_params_st));
+#if !defined (INTERNAL_SS) && !defined (X86)
+ hw_fmt->rfc = (u8) bkt->rfc;
+ hw_fmt->allow_negative = (u8) bkt->an;
+ hw_fmt->rate_exp = (u8) bkt->rexp;
+ hw_fmt->avg_rate_man = (u16) bkt->arm;
+ hw_fmt->peak_rate_man = (u16) bkt->prm;
+ hw_fmt->comm_bkt_limit_man = (u8) bkt->cblm;
+ hw_fmt->comm_bkt_limit_exp = (u8) bkt->cble;
+ hw_fmt->extd_bkt_limit_man = (u8) bkt->eblm;
+ hw_fmt->extd_bkt_limit_exp = (u8) bkt->eble;
+ hw_fmt->extd_bkt = bkt->eb;
+ hw_fmt->comm_bkt = bkt->cb;
+#endif // if !defined (INTERNAL_SS) && !defined (X86)
+}
+
+/*
+ * Input: h/w programmable parameter values in 'hw'
+ * Output: configured parameter values in 'cfg'
+ * Return: Status, success or failure code.
+ */
+static int
+sse2_pol_convert_hw_to_cfg_params (sse2_qos_pol_hw_params_st * hw,
+ sse2_qos_pol_cfg_params_st * cfg)
+{
+ u64 temp_rate;
+
+ if ((hw == NULL) || (cfg == NULL))
+ {
+ return EINVAL;
+ }
+
+ if ((hw->rfc == IPE_RFC_RFC4115) &&
+ (hw->peak_rate_man << hw->rate_exp) == 0 && !(hw->extd_bkt_limit_man))
+ {
+ /*
+ * For a 1R2C, we set EIR = 0, EB = 0
+ */
+ cfg->rfc = SSE2_QOS_POLICER_TYPE_1R2C;
+ }
+ else if (hw->rfc == IPE_RFC_RFC2697)
+ {
+ cfg->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697;
+ }
+ else if (hw->rfc == IPE_RFC_RFC2698)
+ {
+ cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698;
+ }
+ else if (hw->rfc == IPE_RFC_RFC4115)
+ {
+ cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115;
+ }
+ else if (hw->rfc == IPE_RFC_MEF5CF1)
+ {
+ cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1;
+ }
+ else
+ {
+ return EINVAL;
+ }
+
+ temp_rate = (((u64) hw->avg_rate_man << hw->rate_exp) * 8LL *
+ SSE2_QOS_POL_TICKS_PER_SEC) / 1000;
+ cfg->rb.kbps.cir_kbps = (u32) temp_rate;
+
+ temp_rate = (((u64) hw->peak_rate_man << hw->rate_exp) * 8LL *
+ SSE2_QOS_POL_TICKS_PER_SEC) / 1000;
+ cfg->rb.kbps.eir_kbps = (u32) temp_rate;
+
+ cfg->rb.kbps.cb_bytes = ((u64) hw->comm_bkt_limit_man <<
+ (u64) hw->comm_bkt_limit_exp);
+ cfg->rb.kbps.eb_bytes = ((u64) hw->extd_bkt_limit_man <<
+ (u64) hw->extd_bkt_limit_exp);
+
+ if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)
+ {
+ /*
+ * For 1R3C in the hardware, EB = sum(CB, EB). Also, EIR = CIR. Restore
+ * values such that the configured params don't reflect this adjustment
+ */
+ cfg->rb.kbps.eb_bytes = (cfg->rb.kbps.eb_bytes - cfg->rb.kbps.cb_bytes);
+ cfg->rb.kbps.eir_kbps = 0;
+ }
+ else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)
+ {
+ /*
+ * For 4115 in the hardware is excess rate and burst, but EA provides
+ * peak-rate, so adjust it to be eir
+ */
+ cfg->rb.kbps.eir_kbps += cfg->rb.kbps.cir_kbps;
+ cfg->rb.kbps.eb_bytes += cfg->rb.kbps.cb_bytes;
+ }
+ /* h/w conversion to cfg is in kbps */
+ cfg->rate_type = SSE2_QOS_RATE_KBPS;
+ cfg->overwrite_bucket = 0;
+ cfg->current_bucket = hw->comm_bkt;
+ cfg->extended_bucket = hw->extd_bkt;
+
+ SSE2_QOS_DEBUG_INFO ("configured params, cir: %u kbps, eir: %u kbps, cb "
+ "burst: 0x%llx bytes, eb burst: 0x%llx bytes",
+ cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps,
+ cfg->rb.kbps.cb_bytes, cfg->rb.kbps.eb_bytes);
+ SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_22, cfg->rb.kbps.cir_kbps,
+ cfg->rb.kbps.eir_kbps,
+ (uint) cfg->rb.kbps.cb_bytes,
+ (uint) cfg->rb.kbps.eb_bytes);
+
+ return 0;
+}
+
+u32
+sse2_qos_convert_kbps_to_pps (u32 rate_kbps)
+{
+ u64 numer, denom, rnd_value = 0;
+
+ // sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT,
+ // SSE2_QOS_SHIP_CNT_POL_CONV_KBPS_TO_PPS);
+
+ numer = (u64) ((u64) rate_kbps * 1000LL);
+ denom = (u64) ((u64) SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL);
+
+ (void) sse2_qos_pol_round (numer, denom, &rnd_value,
+ SSE2_QOS_ROUND_TO_CLOSEST);
+
+ return ((u32) rnd_value);
+}
+
+u32
+sse2_qos_convert_burst_bytes_to_ms (u64 burst_bytes, u32 rate_kbps)
+{
+ u64 numer, denom, rnd_value = 0;
+
+ //sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT,
+ // SSE2_QOS_SHIP_CNT_POL_CONV_BYTES_TO_BURST_MS);
+
+ numer = burst_bytes * 8LL;
+ denom = (u64) rate_kbps;
+
+ (void) sse2_qos_pol_round (numer, denom, &rnd_value,
+ SSE2_QOS_ROUND_TO_CLOSEST);
+
+ return ((u32) rnd_value);
+}
+
+/*
+ * Input: physical structure in 'phys', rate_type in cfg
+ * Output: configured parameters in 'cfg'.
+ * Return: Status, success or failure code.
+ */
+int
+sse2_pol_physical_2_logical (policer_read_response_type_st * phys,
+ sse2_qos_pol_cfg_params_st * cfg)
+{
+ int rc;
+ sse2_qos_pol_hw_params_st pol_hw;
+ sse2_qos_pol_cfg_params_st kbps_cfg;
+
+ memset (&pol_hw, 0, sizeof (sse2_qos_pol_hw_params_st));
+ memset (&kbps_cfg, 0, sizeof (sse2_qos_pol_cfg_params_st));
+
+ if (!phys)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Illegal parameters");
+ return (-1);
+ }
+
+ sse2_qos_convert_pol_bucket_to_hw_fmt (phys, &pol_hw);
+
+ rc = sse2_pol_convert_hw_to_cfg_params (&pol_hw, &kbps_cfg);
+ if (rc != 0)
+ {
+ SSE2_QOS_DEBUG_ERROR ("Unable to convert hw params to config params. "
+ "Error: %d", rc);
+ return (-1);
+ }
+
+ /* check what rate type is required */
+ switch (cfg->rate_type)
+ {
+ case SSE2_QOS_RATE_KBPS:
+ /* copy all the data into kbps_cfg */
+ cfg->rb.kbps.cir_kbps = kbps_cfg.rb.kbps.cir_kbps;
+ cfg->rb.kbps.eir_kbps = kbps_cfg.rb.kbps.eir_kbps;
+ cfg->rb.kbps.cb_bytes = kbps_cfg.rb.kbps.cb_bytes;
+ cfg->rb.kbps.eb_bytes = kbps_cfg.rb.kbps.eb_bytes;
+ break;
+ case SSE2_QOS_RATE_PPS:
+ cfg->rb.pps.cir_pps =
+ sse2_qos_convert_kbps_to_pps (kbps_cfg.rb.kbps.cir_kbps);
+ cfg->rb.pps.eir_pps =
+ sse2_qos_convert_kbps_to_pps (kbps_cfg.rb.kbps.eir_kbps);
+ cfg->rb.pps.cb_ms =
+ sse2_qos_convert_burst_bytes_to_ms (kbps_cfg.rb.kbps.cb_bytes,
+ kbps_cfg.rb.kbps.cir_kbps);
+ cfg->rb.pps.eb_ms =
+ sse2_qos_convert_burst_bytes_to_ms (kbps_cfg.rb.kbps.eb_bytes,
+ kbps_cfg.rb.kbps.eir_kbps);
+ break;
+ default:
+ SSE2_QOS_DEBUG_ERROR ("Illegal rate type");
+ return (-1);
+ }
+
+ /* cfg->rate_type remains what it was */
+ cfg->rnd_type = kbps_cfg.rnd_type;
+ cfg->rfc = kbps_cfg.rfc;
+ cfg->overwrite_bucket = kbps_cfg.overwrite_bucket;
+ cfg->current_bucket = kbps_cfg.current_bucket;
+ cfg->extended_bucket = kbps_cfg.extended_bucket;
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h
new file mode 100644
index 00000000..16742f80
--- /dev/null
+++ b/src/vnet/policer/xlate.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*---------------------------------------------------------------------------
+ * from gdp_logical_qos.h
+ *---------------------------------------------------------------------------
+ */
+
+#ifndef __included_xlate_h__
+#define __included_xlate_h__
+
+#include <vnet/policer/police.h>
+
+/*
+ * edt: * enum sse2_qos_policer_type_en
+ * Defines type of policer to be allocated
+ */
+typedef enum sse2_qos_policer_type_en_
+{
+ SSE2_QOS_POLICER_TYPE_1R2C = 0,
+ SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697 = 1,
+ SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698 = 2,
+ SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115 = 3,
+ SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1 = 4,
+ SSE2_QOS_POLICER_TYPE_MAX
+} sse2_qos_policer_type_en;
+
+/*
+ * edt: * enum
+ * Enum used to define type of rounding used when calculating policer values
+ */
+typedef enum
+{
+ SSE2_QOS_ROUND_TO_CLOSEST = 0,
+ SSE2_QOS_ROUND_TO_UP,
+ SSE2_QOS_ROUND_TO_DOWN,
+ SSE2_QOS_ROUND_INVALID
+} sse2_qos_round_type_en;
+
+/*
+ * edt: * enum
+ * Enum used to define type of rate for configuration, either pps or kbps.
+ * If kbps, then burst is in bytes, if pps, then burst is in ms.
+ *
+ * Default of zero is kbps, which is inline with how it is programmed
+ * in actual hardware. However, the warning is that this is reverse logic
+ * of units_in_bits field in sse2_static_policer_parameters_st, which is
+ * inline with sse_punt_drop.h.
+ */
+typedef enum
+{
+ SSE2_QOS_RATE_KBPS = 0,
+ SSE2_QOS_RATE_PPS,
+ SSE2_QOS_RATE_INVALID
+} sse2_qos_rate_type_en;
+
+/*
+ * edt: * enum
+ * Defines type of policer actions.
+ */
+typedef enum
+{
+ SSE2_QOS_ACTION_DROP = 0,
+ SSE2_QOS_ACTION_TRANSMIT,
+ SSE2_QOS_ACTION_MARK_AND_TRANSMIT
+} sse2_qos_action_type_en;
+
+/*
+ * edt * struct sse2_qos_pol_action_params_st
+ * This structure is used to hold user configured police action parameters.
+ *
+ * element: action_type
+ * Action type (see sse2_qos_action_type_en).
+ * elemtnt: dscp
+ * DSCP value to set when action is SSE2_QOS_ACTION_MARK_AND_TRANSMIT.
+ */
+typedef struct sse2_qos_pol_action_params_st_
+{
+ u8 action_type;
+ u8 dscp;
+} sse2_qos_pol_action_params_st;
+
+/*
+ * edt: * struct sse2_qos_pol_cfg_params_st
+ *
+ * Description:
+ * This structure is used to hold user configured policing parameters.
+ *
+ * element: cir_kbps
+ * CIR in kbps.
+ * element: eir_kbps
+ * EIR or PIR in kbps.
+ * element: cb_bytes
+ * Committed Burst in bytes.
+ * element: eb_bytes
+ * Excess or Peak Burst in bytes.
+ * element: cir_pps
+ * CIR in pps.
+ * element: eir_pps
+ * EIR or PIR in pps.
+ * element: cb_ms
+ * Committed Burst in milliseconds.
+ * element: eb_ms
+ * Excess or Peak Burst in milliseconds.
+ * element: rate_type
+ * Indicates the union if in kbps/bytes or pps/ms.
+ * element: rfc
+ * Policer algorithm - 1R2C, 1R3C (2697), 2R3C (2698) or 2R3C (4115). See
+ * sse_qos_policer_type_en
+ * element: rnd_type
+ * Rounding type (see sse_qos_round_type_en). Needed when policer values
+ * need to be rounded. Caller can decide on type of rounding used
+ */
+typedef struct sse2_qos_pol_cfg_params_st_
+{
+ union
+ {
+ struct
+ {
+ u32 cir_kbps;
+ u32 eir_kbps;
+ u64 cb_bytes;
+ u64 eb_bytes;
+ } kbps;
+ struct
+ {
+ u32 cir_pps;
+ u32 eir_pps;
+ u64 cb_ms;
+ u64 eb_ms;
+ } pps;
+ } rb; /* rate burst config */
+ u8 rate_type; /* sse2_qos_rate_type_en */
+ u8 rnd_type; /* sse2_qos_round_type_en */
+ u8 rfc; /* sse2_qos_policer_type_en */
+ u8 color_aware;
+ u8 overwrite_bucket; /* for debugging purposes */
+ u32 current_bucket; /* for debugging purposes */
+ u32 extended_bucket; /* for debugging purposes */
+ sse2_qos_pol_action_params_st conform_action;
+ sse2_qos_pol_action_params_st exceed_action;
+ sse2_qos_pol_action_params_st violate_action;
+} sse2_qos_pol_cfg_params_st;
+
+
+typedef struct sse2_qos_pol_hw_params_st_
+{
+ u8 rfc;
+ u8 allow_negative;
+ u8 rate_exp;
+ u16 avg_rate_man;
+ u16 peak_rate_man;
+ u8 comm_bkt_limit_exp;
+ u8 comm_bkt_limit_man;
+ u8 extd_bkt_limit_exp;
+ u8 extd_bkt_limit_man;
+ u32 comm_bkt;
+ u32 extd_bkt;
+} sse2_qos_pol_hw_params_st;
+
+
+int
+sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st * cfg,
+ policer_read_response_type_st * phys);
+
+
+#endif /* __included_xlate_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ppp/error.def b/src/vnet/ppp/error.def
new file mode 100644
index 00000000..ba645408
--- /dev/null
+++ b/src/vnet/ppp/error.def
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ppp_error.def: ppp errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ppp_error (NONE, "no error")
+ppp_error (UNKNOWN_PROTOCOL, "unknown ppp protocol")
+ppp_error (UNKNOWN_ADDRESS_CONTROL, "address, control != 0xff03")
diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c
new file mode 100644
index 00000000..2f6e0c33
--- /dev/null
+++ b/src/vnet/ppp/node.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ppp_node.c: ppp packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ppp/ppp.h>
+#include <vppinfra/sparse_vec.h>
+
+#define foreach_ppp_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) PPP_INPUT_NEXT_##s,
+ foreach_ppp_input_next
+#undef _
+ PPP_INPUT_N_NEXT,
+} ppp_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} ppp_input_trace_t;
+
+static u8 *
+format_ppp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ppp_input_trace_t *t = va_arg (*va, ppp_input_trace_t *);
+
+ s = format (s, "%U", format_ppp_header, t->packet_data);
+
+ return s;
+}
+
+typedef struct
+{
+ /* Sparse vector mapping ppp protocol in network byte order
+ to next index. */
+ u16 *next_by_protocol;
+
+ u32 *sparse_index_by_next_index;
+} ppp_input_runtime_t;
+
+static uword
+ppp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ ppp_input_runtime_t *rt = (void *) node->runtime_data;
+ u32 n_left_from, next_index, i_next, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (ppp_input_trace_t));
+
+ next_index = node->cached_next_index;
+ i_next = vec_elt (rt->sparse_index_by_next_index, next_index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ppp_header_t *h0, *h1;
+ u32 i0, i1, protocol0, protocol1, enqueue_code;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = (void *) (b0->data + b0->current_data);
+ h1 = (void *) (b1->data + b1->current_data);
+
+ b0->current_data += sizeof (h0[0]);
+ b1->current_data += sizeof (h1[0]);
+
+ b0->current_length -= sizeof (h0[0]);
+ b1->current_length -= sizeof (h1[0]);
+
+ /* Index sparse array with network byte order. */
+ protocol0 = h0->protocol;
+ protocol1 = h1->protocol;
+ sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, &i0,
+ &i1);
+
+ b0->error =
+ node->errors[i0 ==
+ SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL
+ : PPP_ERROR_NONE];
+ b1->error =
+ node->errors[i1 ==
+ SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL
+ : PPP_ERROR_NONE];
+
+ enqueue_code = (i0 != i_next) + 2 * (i1 != i_next);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node,
+ vec_elt (rt->next_by_protocol,
+ i0), bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node,
+ vec_elt (rt->next_by_protocol,
+ i1), bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node,
+ vec_elt (rt->next_by_protocol,
+ i0), bi0);
+ vlib_set_next_frame_buffer (vm, node,
+ vec_elt (rt->next_by_protocol,
+ i1), bi1);
+ if (i0 == i1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ i_next = i1;
+ next_index = vec_elt (rt->next_by_protocol, i_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ppp_header_t *h0;
+ u32 i0, protocol0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ b0->current_data += sizeof (h0[0]);
+ b0->current_length -= sizeof (h0[0]);
+
+ protocol0 = h0->protocol;
+ i0 = sparse_vec_index (rt->next_by_protocol, protocol0);
+
+ b0->error =
+ node->errors[i0 ==
+ SPARSE_VEC_INVALID_INDEX ? PPP_ERROR_UNKNOWN_PROTOCOL
+ : PPP_ERROR_NONE];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (i0 != i_next))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ i_next = i0;
+ next_index = vec_elt (rt->next_by_protocol, i_next);
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *ppp_error_strings[] = {
+#define ppp_error(n,s) s,
+#include "error.def"
+#undef ppp_error
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ppp_input_node) = {
+ .function = ppp_input,
+ .name = "ppp-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (ppp_input_runtime_t),
+
+ .n_errors = PPP_N_ERROR,
+ .error_strings = ppp_error_strings,
+
+ .n_next_nodes = PPP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PPP_INPUT_NEXT_##s] = n,
+ foreach_ppp_input_next
+#undef _
+ },
+
+ .format_buffer = format_ppp_header_with_length,
+ .format_trace = format_ppp_input_trace,
+ .unformat_buffer = unformat_ppp_header,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ppp_input_runtime_init (vlib_main_t * vm)
+{
+ ppp_input_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, ppp_input_node.index);
+
+ rt->next_by_protocol = sparse_vec_new
+ ( /* elt bytes */ sizeof (rt->next_by_protocol[0]),
+ /* bits in index */ BITS (((ppp_header_t *) 0)->protocol));
+
+ vec_validate (rt->sparse_index_by_next_index, PPP_INPUT_NEXT_DROP);
+ vec_validate (rt->sparse_index_by_next_index, PPP_INPUT_NEXT_PUNT);
+ rt->sparse_index_by_next_index[PPP_INPUT_NEXT_DROP]
+ = SPARSE_VEC_INVALID_INDEX;
+ rt->sparse_index_by_next_index[PPP_INPUT_NEXT_PUNT]
+ = SPARSE_VEC_INVALID_INDEX;
+
+ return 0;
+}
+
+static clib_error_t *
+ppp_input_init (vlib_main_t * vm)
+{
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, ppp_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ ppp_setup_node (vm, ppp_input_node.index);
+ ppp_input_runtime_init (vm);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ppp_input_init);
+VLIB_WORKER_INIT_FUNCTION (ppp_input_runtime_init);
+
+void
+ppp_register_input_protocol (vlib_main_t * vm,
+ ppp_protocol_t protocol, u32 node_index)
+{
+ ppp_main_t *em = &ppp_main;
+ ppp_protocol_info_t *pi;
+ ppp_input_runtime_t *rt;
+ u16 *n;
+ u32 i;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, ppp_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = ppp_get_protocol_info (em, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, ppp_input_node.index, node_index);
+
+ /* Setup ppp protocol -> next index sparse vector mapping. */
+ rt = vlib_node_get_runtime_data (vm, ppp_input_node.index);
+ n =
+ sparse_vec_validate (rt->next_by_protocol,
+ clib_host_to_net_u16 (protocol));
+ n[0] = pi->next_index;
+
+ /* Rebuild next index -> sparse index inverse mapping when sparse vector
+ is updated. */
+ vec_validate (rt->sparse_index_by_next_index, pi->next_index);
+ for (i = 1; i < vec_len (rt->next_by_protocol); i++)
+ rt->sparse_index_by_next_index[rt->next_by_protocol[i]] = i;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ppp/packet.h b/src/vnet/ppp/packet.h
new file mode 100644
index 00000000..cab9743d
--- /dev/null
+++ b/src/vnet/ppp/packet.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_ppp_packet_h
+#define included_vnet_ppp_packet_h
+
+/*
+ * PPP packet format
+ *
+ * Copyright (c) 2009 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+See http://www.iana.org/assignments/ppp-numbers.
+
+The Point-to-Point Protocol (PPP) Data Link Layer [146,147,175]
+contains a 16 bit Protocol field to identify the the encapsulated
+protocol. The Protocol field is consistent with the ISO 3309 (HDLC)
+extension mechanism for Address fields. All Protocols MUST be
+assigned such that the least significant bit of the most significant
+octet equals "0", and the least significant bit of the least
+significant octet equals "1".
+*/
+
+#define foreach_ppp_protocol \
+_ (0x0001, padding) \
+_ (0x0003, rohc_small_cid) \
+_ (0x0005, rohc_large_cid) \
+_ (0x0021, ip4) \
+_ (0x0023, osi) \
+_ (0x0025, xerox_ns_idp) \
+_ (0x0027, decnet) \
+_ (0x0029, appletalk) \
+_ (0x002b, ipx) \
+_ (0x002d, vj_compressed_tcp) \
+_ (0x002f, vj_uncompressed_tcp) \
+_ (0x0031, bpdu) \
+_ (0x0033, streams) \
+_ (0x0035, vines) \
+_ (0x0039, appletalk_eddp) \
+_ (0x003b, appletalk_smart_buffered) \
+_ (0x003d, multilink) \
+_ (0x003f, netbios_framing) \
+_ (0x0041, cisco) \
+_ (0x0043, timeplex) \
+_ (0x0045, fujitsu_lblb) \
+_ (0x0047, dca_remote_lan) \
+_ (0x0049, sdtp) \
+_ (0x004b, sna_over_802_2) \
+_ (0x004d, sna) \
+_ (0x004f, ip6_header_compression) \
+_ (0x0051, knx) \
+_ (0x0053, encryption) \
+_ (0x0055, link_encryption) \
+_ (0x0057, ip6) \
+_ (0x0059, ppp_mux) \
+_ (0x005b, vendor_specific_a) \
+_ (0x0061, rtp_iphc_full_header) \
+_ (0x0063, rtp_iphc_compressed_tcp) \
+_ (0x0065, rtp_iphc_compressed_non_tcp) \
+_ (0x0067, rtp_iphc_compressed_udp_8) \
+_ (0x0069, rtp_iphc_compressed_rtp_8) \
+_ (0x006f, stampede) \
+_ (0x0073, mp_plus) \
+_ (0x007d, control) \
+_ (0x00c1, ntcits_ipi) \
+_ (0x00cf, ppp_nlpid) \
+_ (0x00fb, multilink_compression) \
+_ (0x00fd, compressed_datagram) \
+_ (0x0201, 802_1d_hello) \
+_ (0x0203, ibm_source_routing) \
+_ (0x0205, dec_lanbridge) \
+_ (0x0207, cdp) \
+_ (0x0209, netcs) \
+_ (0x020b, stp) \
+_ (0x020d, edp) \
+_ (0x0211, oscp_a) \
+_ (0x0213, oscp_b) \
+_ (0x0231, luxcom) \
+_ (0x0233, sigma) \
+_ (0x0235, apple_client_server) \
+_ (0x0281, mpls_unicast) \
+_ (0x0283, mpls_multicast) \
+_ (0x0285, ieee_p1284_4) \
+_ (0x0287, tetra) \
+_ (0x0289, multichannel_flow_treatment) \
+_ (0x2063, rtp_iphc_compressed_tcp_no_delta) \
+_ (0x2065, rtp_iphc_context_state) \
+_ (0x2067, rtp_iphc_compressed_udp_16) \
+_ (0x2069, rtp_iphc_compressed_rtp_16) \
+_ (0x4001, cray) \
+_ (0x4003, cdpd) \
+_ (0x4005, expand) \
+_ (0x4007, odsicp) \
+_ (0x4009, docsis_dll) \
+_ (0x400B, cetacean) \
+_ (0x4021, lzs) \
+_ (0x4023, reftek) \
+_ (0x4025, fibre_channel) \
+_ (0x4027, emit) \
+_ (0x405b, vendor_specific_b) \
+_ (0xc021, lcp) \
+_ (0xc023, pap) \
+_ (0xc025, link_quality_report) \
+_ (0xc027, shiva_password) \
+_ (0xc029, cbcp) \
+_ (0xc02b, bacp) \
+_ (0xc02d, bap) \
+_ (0xc05b, vendor_specific_password) \
+_ (0xc081, container_control) \
+_ (0xc223, chap) \
+_ (0xc225, rsa) \
+_ (0xc227, extensible_authentication) \
+_ (0xc229, mitsubishi_security_info) \
+_ (0xc26f, stampede_authorization) \
+_ (0xc281, proprietary_authentication_a) \
+_ (0xc283, proprietary_authentication_b) \
+_ (0xc481, proprietary_node_id_authentication)
+
+typedef enum
+{
+#define _(n,f) PPP_PROTOCOL_##f = n,
+ foreach_ppp_protocol
+#undef _
+} ppp_protocol_t;
+
+/* PPP Link Control Protocol (LCP) and Internet Protocol Control Protocol (IPCP) Codes
+
+The Point-to-Point Protocol (PPP) Link Control Protocol (LCP),
+the Compression Control Protocol (CCP), Internet Protocol Control
+Protocol (IPCP), and other control protocols, contain an 8 bit
+Code field which identifies the type of packet. */
+
+#define foreach_ppp_lcp_code \
+_ (0, vendor_specific) \
+_ (1, configure_request) \
+_ (2, configure_ack) \
+_ (3, configure_nak) \
+_ (4, configure_reject) \
+_ (5, terminate_request) \
+_ (6, terminate_ack) \
+_ (7, code_reject) \
+_ (8, protocol_reject) \
+_ (9, echo_request) \
+_ (10, echo_reply) \
+_ (11, discard_request) \
+_ (12, identification) \
+_ (13, time_remaining) \
+_ (14, reset_request) \
+_ (15, reset_reply)
+
+typedef struct
+{
+ /* Set to 0xff 0x03 */
+ u8 address, control;
+
+ /* Layer 3 protocol for this packet. */
+ u16 protocol;
+} ppp_header_t;
+
+#endif /* included_vnet_ppp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ppp/pg.c b/src/vnet/ppp/pg.c
new file mode 100644
index 00000000..0b46ccb9
--- /dev/null
+++ b/src/vnet/ppp/pg.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ppp_pg.c: packet generator ppp interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ppp/ppp.h>
+
+typedef struct
+{
+ pg_edit_t address;
+ pg_edit_t control;
+ pg_edit_t protocol;
+} pg_ppp_header_t;
+
+static inline void
+pg_ppp_header_init (pg_ppp_header_t * e)
+{
+ pg_edit_init (&e->address, ppp_header_t, address);
+ pg_edit_init (&e->control, ppp_header_t, control);
+ pg_edit_init (&e->protocol, ppp_header_t, protocol);
+}
+
+uword
+unformat_pg_ppp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ppp_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (ppp_header_t),
+ &group_index);
+ pg_ppp_header_init (h);
+
+ pg_edit_set_fixed (&h->address, 0xff);
+ pg_edit_set_fixed (&h->control, 0x03);
+
+ error = 1;
+ if (!unformat (input, "%U",
+ unformat_pg_edit,
+ unformat_ppp_protocol_net_byte_order, &h->protocol))
+ goto done;
+
+ {
+ ppp_main_t *pm = &ppp_main;
+ ppp_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
+ pi = ppp_get_protocol_info (pm, clib_net_to_host_u16 (t));
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ppp/ppp.c b/src/vnet/ppp/ppp.c
new file mode 100644
index 00000000..a0eefbad
--- /dev/null
+++ b/src/vnet/ppp/ppp.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ppp.c: ppp support
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ppp/ppp.h>
+
+/* Global main structure. */
+ppp_main_t ppp_main;
+
+u8 *
+format_ppp_protocol (u8 * s, va_list * args)
+{
+ ppp_protocol_t p = va_arg (*args, u32);
+ ppp_main_t *pm = &ppp_main;
+ ppp_protocol_info_t *pi = ppp_get_protocol_info (pm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%04x", p);
+
+ return s;
+}
+
+u8 *
+format_ppp_header_with_length (u8 * s, va_list * args)
+{
+ ppp_main_t *pm = &ppp_main;
+ ppp_header_t *h = va_arg (*args, ppp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ ppp_protocol_t p = clib_net_to_host_u16 (h->protocol);
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "ppp header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "PPP %U", format_ppp_protocol, p);
+
+ if (h->address != 0xff)
+ s = format (s, ", address 0x%02x", h->address);
+ if (h->control != 0x03)
+ s = format (s, ", control 0x%02x", h->control);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ ppp_protocol_info_t *pi = ppp_get_protocol_info (pm, p);
+ vlib_node_t *node = vlib_get_node (pm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_ppp_header (u8 * s, va_list * args)
+{
+ ppp_header_t *h = va_arg (*args, ppp_header_t *);
+ return format (s, "%U", format_ppp_header_with_length, h, 0);
+}
+
+/* Returns ppp protocol as an int in host byte order. */
+uword
+unformat_ppp_protocol_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ppp_main_t *pm = &ppp_main;
+ int p, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &p) || unformat (input, "%d", &p))
+ {
+ if (p >= (1 << 16))
+ return 0;
+ *result = p;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ pm->protocol_info_by_name, &i))
+ {
+ ppp_protocol_info_t *pi = vec_elt_at_index (pm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_ppp_protocol_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ if (!unformat_user (input, unformat_ppp_protocol_host_byte_order, result))
+ return 0;
+ *result = clib_host_to_net_u16 ((u16) * result);
+ return 1;
+}
+
+uword
+unformat_ppp_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ppp_header_t _h, *h = &_h;
+ u16 p;
+
+ if (!unformat (input, "%U", unformat_ppp_protocol_host_byte_order, &p))
+ return 0;
+
+ h->address = 0xff;
+ h->control = 0x03;
+ h->protocol = clib_host_to_net_u16 (p);
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static u8 *
+ppp_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_hw_address)
+{
+ ppp_header_t *h;
+ u8 *rewrite = NULL;
+ ppp_protocol_t protocol;
+
+ switch (link_type)
+ {
+#define _(a,b) case VNET_LINK_##a: protocol = PPP_PROTOCOL_##b; break
+ _(IP4, ip4);
+ _(IP6, ip6);
+ _(MPLS, mpls_unicast);
+#undef _
+ default:
+ return (NULL);
+ }
+
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (ppp_header_t *) rewrite;
+ h->address = 0xff;
+ h->control = 0x03;
+ h->protocol = clib_host_to_net_u16 (protocol);
+
+ return (rewrite);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
+ .name = "PPP",
+ .format_header = format_ppp_header_with_length,
+ .unformat_header = unformat_ppp_header,
+ .build_rewrite = ppp_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+static void
+add_protocol (ppp_main_t * pm, ppp_protocol_t protocol, char *protocol_name)
+{
+ ppp_protocol_info_t *pi;
+ u32 i;
+
+ vec_add2 (pm->protocol_infos, pi, 1);
+ i = pi - pm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (pm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (pm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t *
+ppp_init (vlib_main_t * vm)
+{
+ ppp_main_t *pm = &ppp_main;
+
+ memset (pm, 0, sizeof (pm[0]));
+ pm->vlib_main = vm;
+
+ pm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ pm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+
+#define _(n,s) add_protocol (pm, PPP_PROTOCOL_##s, #s);
+ foreach_ppp_protocol;
+#undef _
+
+ return vlib_call_init_function (vm, ppp_input_init);
+}
+
+VLIB_INIT_FUNCTION (ppp_init);
+
+ppp_main_t *
+ppp_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, ppp_init);
+ return &ppp_main;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ppp/ppp.h b/src/vnet/ppp/ppp.h
new file mode 100644
index 00000000..726eca66
--- /dev/null
+++ b/src/vnet/ppp/ppp.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ppp.h: types/functions for ppp.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ppp_h
+#define included_ppp_h
+
+#include <vnet/vnet.h>
+#include <vnet/ppp/packet.h>
+#include <vnet/pg/pg.h>
+
+extern vnet_hw_interface_class_t ppp_hw_interface_class;
+
+typedef enum
+{
+#define ppp_error(n,s) PPP_ERROR_##n,
+#include <vnet/ppp/error.def>
+#undef ppp_error
+ PPP_N_ERROR,
+} ppp_error_t;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* PPP protocol type in host byte order. */
+ ppp_protocol_t protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} ppp_protocol_info_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ ppp_protocol_info_t *protocol_infos;
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *protocol_info_by_name, *protocol_info_by_protocol;
+} ppp_main_t;
+
+always_inline ppp_protocol_info_t *
+ppp_get_protocol_info (ppp_main_t * em, ppp_protocol_t protocol)
+{
+ uword *p = hash_get (em->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
+}
+
+extern ppp_main_t ppp_main;
+
+/* Register given node index to take input for given ppp type. */
+void
+ppp_register_input_type (vlib_main_t * vm,
+ ppp_protocol_t protocol, u32 node_index);
+
+format_function_t format_ppp_protocol;
+format_function_t format_ppp_header;
+format_function_t format_ppp_header_with_length;
+
+/* Parse ppp protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_ppp_protocol_host_byte_order;
+unformat_function_t unformat_ppp_protocol_net_byte_order;
+
+/* Parse ppp header. */
+unformat_function_t unformat_ppp_header;
+unformat_function_t unformat_pg_ppp_header;
+
+always_inline void
+ppp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_ppp_header_with_length;
+ n->unformat_buffer = unformat_ppp_header;
+ pn->unformat_edit = unformat_pg_ppp_header;
+}
+
+void
+ppp_register_input_protocol (vlib_main_t * vm,
+ ppp_protocol_t protocol, u32 node_index);
+
+#endif /* included_ppp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/replication.c b/src/vnet/replication.c
new file mode 100644
index 00000000..5a8a0fe6
--- /dev/null
+++ b/src/vnet/replication.c
@@ -0,0 +1,285 @@
+/*
+ * replication.c : packet replication
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/replication.h>
+
+
+replication_main_t replication_main;
+
+
+replication_context_t *
+replication_prep (vlib_main_t * vm,
+ vlib_buffer_t * b0, u32 recycle_node_index, u32 l2_packet)
+{
+ replication_main_t *rm = &replication_main;
+ replication_context_t *ctx;
+ uword thread_index = vm->thread_index;
+ ip4_header_t *ip;
+ u32 ctx_id;
+
+ /* Allocate a context, reserve context 0 */
+ if (PREDICT_FALSE (rm->contexts[thread_index] == 0))
+ pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
+
+ pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
+ ctx_id = ctx - rm->contexts[thread_index];
+
+ /* Save state from vlib buffer */
+ ctx->saved_free_list_index = vlib_buffer_get_free_list_index (b0);
+ ctx->current_data = b0->current_data;
+ ctx->flags = b0->flags & VNET_BUFFER_FLAGS_VLAN_BITS;
+
+ /* Set up vlib buffer hooks */
+ b0->recycle_count = ctx_id;
+ vlib_buffer_set_free_list_index (b0, rm->recycle_list_index);
+ b0->flags |= VLIB_BUFFER_RECYCLE;
+
+ /* Save feature state */
+ ctx->recycle_node_index = recycle_node_index;
+
+ /* Save vnet state */
+ clib_memcpy (ctx->vnet_buffer, vnet_buffer (b0),
+ sizeof (vnet_buffer_opaque_t));
+
+ /* Save packet contents */
+ ctx->l2_packet = l2_packet;
+ ip = (ip4_header_t *) vlib_buffer_get_current (b0);
+ if (l2_packet)
+ {
+ /* Save ethernet header */
+ ctx->l2_header[0] = ((u64 *) ip)[0];
+ ctx->l2_header[1] = ((u64 *) ip)[1];
+ ctx->l2_header[2] = ((u64 *) ip)[2];
+ /* set ip to the true ip header */
+ ip = (ip4_header_t *) (((u8 *) ip) + vnet_buffer (b0)->l2.l2_len);
+ }
+
+ /*
+ * Copy L3 fields.
+ * We need to save TOS for ip4 and ip6 packets.
+ * Fortunately the TOS field is
+ * in the first two bytes of both the ip4 and ip6 headers.
+ */
+ ctx->ip_tos = *((u16 *) (ip));
+
+ /*
+ * Save the ip4 checksum as well. We just blindly save the corresponding two
+ * bytes even for ip6 packets.
+ */
+ ctx->ip4_checksum = ip->checksum;
+
+ return ctx;
+}
+
+
+replication_context_t *
+replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last)
+{
+ replication_main_t *rm = &replication_main;
+ replication_context_t *ctx;
+ uword thread_index = vm->thread_index;
+ ip4_header_t *ip;
+
+ /* Get access to the replication context */
+ ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
+
+ /* Restore vnet buffer state */
+ clib_memcpy (vnet_buffer (b0), ctx->vnet_buffer,
+ sizeof (vnet_buffer_opaque_t));
+
+ /* Restore the vlan flags */
+ b0->flags &= ~VNET_BUFFER_FLAGS_VLAN_BITS;
+ b0->flags |= ctx->flags;
+
+ /* Restore the packet start (current_data) and length */
+ vlib_buffer_advance (b0, ctx->current_data - b0->current_data);
+
+ /* Restore packet contents */
+ ip = (ip4_header_t *) vlib_buffer_get_current (b0);
+ if (ctx->l2_packet)
+ {
+ /* Restore ethernet header */
+ ((u64 *) ip)[0] = ctx->l2_header[0];
+ ((u64 *) ip)[1] = ctx->l2_header[1];
+ ((u64 *) ip)[2] = ctx->l2_header[2];
+ /* set ip to the true ip header */
+ ip = (ip4_header_t *) (((u8 *) ip) + vnet_buffer (b0)->l2.l2_len);
+ }
+
+ // Restore L3 fields
+ *((u16 *) (ip)) = ctx->ip_tos;
+ ip->checksum = ctx->ip4_checksum;
+
+ if (is_last)
+ {
+ /*
+ * This is the last replication in the list.
+ * Restore original buffer free functionality.
+ */
+ vlib_buffer_set_free_list_index (b0, ctx->saved_free_list_index);
+ b0->flags &= ~VLIB_BUFFER_RECYCLE;
+
+ /* Free context back to its pool */
+ pool_put (rm->contexts[thread_index], ctx);
+ }
+
+ return ctx;
+}
+
+
+
+/*
+ * fish pkts back from the recycle queue/freelist
+ * un-flatten the context chains
+ */
+static void
+replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
+{
+ vlib_frame_t *f = 0;
+ u32 n_left_from;
+ u32 n_left_to_next = 0;
+ u32 n_this_frame = 0;
+ u32 *from;
+ u32 *to_next = 0;
+ u32 bi0, pi0;
+ vlib_buffer_t *b0;
+ int i;
+ replication_main_t *rm = &replication_main;
+ replication_context_t *ctx;
+ u32 feature_node_index = 0;
+ uword thread_index = vm->thread_index;
+
+ /*
+ * All buffers in the list are destined to the same recycle node.
+ * Pull the recycle node index from the first buffer.
+ * Note: this could be sped up if the node index were stuffed into
+ * the freelist itself.
+ */
+ if (vec_len (fl->buffers) > 0)
+ {
+ bi0 = fl->buffers[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
+ feature_node_index = ctx->recycle_node_index;
+ }
+
+ /* buffers */
+ for (i = 0; i < 2; i++)
+ {
+ if (i == 0)
+ {
+ from = fl->buffers;
+ n_left_from = vec_len (from);
+ }
+
+ while (n_left_from > 0)
+ {
+ if (PREDICT_FALSE (n_left_to_next == 0))
+ {
+ if (f)
+ {
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, feature_node_index, f);
+ }
+
+ f = vlib_get_frame_to_node (vm, feature_node_index);
+ to_next = vlib_frame_vector_args (f);
+ n_left_to_next = VLIB_FRAME_SIZE;
+ n_this_frame = 0;
+ }
+
+ bi0 = from[0];
+ if (PREDICT_TRUE (n_left_from > 1))
+ {
+ pi0 = from[1];
+ vlib_prefetch_buffer_with_index (vm, pi0, LOAD);
+ }
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Mark that this buffer was just recycled */
+ b0->flags |= VLIB_BUFFER_IS_RECYCLED;
+
+#if (CLIB_DEBUG > 0)
+ if (vm->buffer_main->callbacks_registered == 0)
+ vlib_buffer_set_known_state (vm, bi0,
+ VLIB_BUFFER_KNOWN_ALLOCATED);
+#endif
+
+ /* If buffer is traced, mark frame as traced */
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ f->flags |= VLIB_FRAME_TRACE;
+
+ to_next[0] = bi0;
+
+ from++;
+ to_next++;
+ n_this_frame++;
+ n_left_to_next--;
+ n_left_from--;
+ }
+ }
+
+ vec_reset_length (fl->buffers);
+
+ if (f)
+ {
+ ASSERT (n_this_frame);
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, feature_node_index, f);
+ }
+}
+
+clib_error_t *
+replication_init (vlib_main_t * vm)
+{
+ replication_main_t *rm = &replication_main;
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+ __attribute__ ((unused)) replication_context_t *ctx;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ rm->vlib_main = vm;
+ rm->vnet_main = vnet_get_main ();
+ rm->recycle_list_index =
+ vlib_buffer_create_free_list (vm, 1024 /* fictional */ ,
+ "replication-recycle");
+
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, rm->recycle_list_index);
+
+ fl->buffers_added_to_freelist_function = replication_recycle_callback;
+
+ /* Verify the replication context is the expected size */
+ ASSERT (sizeof (replication_context_t) == 128); /* 2 cache lines */
+
+ vec_validate (rm->contexts, tm->n_vlib_mains - 1);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (replication_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/replication.h b/src/vnet/replication.h
new file mode 100644
index 00000000..531a61c2
--- /dev/null
+++ b/src/vnet/replication.h
@@ -0,0 +1,137 @@
+/*
+ * replication.h : packet replication
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_replication_h
+#define included_replication_h
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/replication.h>
+
+
+typedef struct
+{
+ /* The entire vnet buffer header restored for each replica */
+ u8 vnet_buffer[40]; /* 16B aligned to allow vector unit copy */
+ u8 reserved[24]; /* space for future expansion of vnet buffer header */
+
+ /* feature state used during this replication */
+ u64 feature_replicas; /* feature's id for its set of replicas */
+ u32 feature_counter; /* feature's current index into set of replicas */
+ u32 recycle_node_index; /* feature's recycle node index */
+
+ /*
+ * data saved from the start of replication and restored
+ * at the end of replication
+ */
+ u32 saved_free_list_index; /* from vlib buffer */
+
+ /* data saved from the original packet and restored for each replica */
+ u64 l2_header[3]; /* 24B (must be at least 22B for l2 packets) */
+ u32 flags; /* vnet buffer flags */
+ u16 ip_tos; /* v4 and v6 */
+ u16 ip4_checksum; /* needed for v4 only */
+
+ /* data saved from the vlib buffer header and restored for each replica */
+ i16 current_data; /* offset of first byte of packet in packet data */
+ u8 pad[2]; /* to 64B */
+ u8 l2_packet; /* flag for l2 vs l3 packet data */
+
+} replication_context_t; /* 128B */
+
+
+typedef struct
+{
+
+ u32 recycle_list_index;
+
+ /* per-thread pools of replication contexts */
+ replication_context_t **contexts;
+
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+} replication_main_t;
+
+
+extern replication_main_t replication_main;
+
+
+/* Return 1 if this buffer just came from the replication recycle handler. */
+always_inline u32
+replication_is_recycled (vlib_buffer_t * b0)
+{
+ return b0->flags & VLIB_BUFFER_IS_RECYCLED;
+}
+
+/*
+ * Clear the recycle flag. If buffer came from the replication recycle
+ * handler, this flag must be cleared before the packet is transmitted again.
+ */
+always_inline void
+replication_clear_recycled (vlib_buffer_t * b0)
+{
+ b0->flags &= ~VLIB_BUFFER_IS_RECYCLED;
+}
+
+/*
+ * Return the active replication context if this buffer has
+ * been recycled, otherwise return 0. (Note that this essentially
+ * restricts access to the replication context to the replication
+ * feature's prep and recycle nodes.)
+ */
+always_inline replication_context_t *
+replication_get_ctx (vlib_buffer_t * b0)
+{
+ replication_main_t *rm = &replication_main;
+
+ return replication_is_recycled (b0) ?
+ pool_elt_at_index (rm->contexts[vlib_get_thread_index ()],
+ b0->recycle_count) : 0;
+}
+
+/* Prefetch the replication context for this buffer, if it exists */
+always_inline void
+replication_prefetch_ctx (vlib_buffer_t * b0)
+{
+ replication_context_t *ctx = replication_get_ctx (b0);
+
+ if (ctx)
+ {
+ CLIB_PREFETCH (ctx, (2 * CLIB_CACHE_LINE_BYTES), STORE);
+ }
+}
+
+replication_context_t *replication_prep (vlib_main_t * vm,
+ vlib_buffer_t * b0,
+ u32 recycle_node_index,
+ u32 l2_packet);
+
+replication_context_t *replication_recycle (vlib_main_t * vm,
+ vlib_buffer_t * b0, u32 is_last);
+
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
new file mode 100644
index 00000000..2b789c5f
--- /dev/null
+++ b/src/vnet/session/application.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+
+/**
+ * Pool from which we allocate all applications
+ */
+static application_t *app_pool;
+
+/**
+ * Hash table of apps by api client index
+ */
+static uword *app_by_api_client_index;
+
+/**
+ * Default application event queue size
+ */
+static u32 default_app_evt_queue_size = 128;
+
+int
+application_api_queue_is_full (application_t * app)
+{
+ unix_shared_memory_queue_t *q;
+
+ /* builtin servers are always OK */
+ if (app->api_client_index == ~0)
+ return 0;
+
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+ if (!q)
+ return 1;
+
+ if (q->cursize == q->maxsize)
+ return 1;
+ return 0;
+}
+
+static void
+application_table_add (application_t * app)
+{
+ hash_set (app_by_api_client_index, app->api_client_index, app->index);
+}
+
+static void
+application_table_del (application_t * app)
+{
+ hash_unset (app_by_api_client_index, app->api_client_index);
+}
+
+application_t *
+application_lookup (u32 api_client_index)
+{
+ uword *p;
+ p = hash_get (app_by_api_client_index, api_client_index);
+ if (p)
+ return application_get (p[0]);
+
+ return 0;
+}
+
+application_t *
+application_new ()
+{
+ application_t *app;
+ pool_get (app_pool, app);
+ memset (app, 0, sizeof (*app));
+ app->index = application_get_index (app);
+ app->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
+ app->first_segment_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] New app (%d)", getpid (), app->index);
+ return app;
+}
+
+void
+application_del (application_t * app)
+{
+ segment_manager_t *sm;
+ u64 handle;
+ u32 index, *handles = 0;
+ int i;
+ vnet_unbind_args_t _a, *a = &_a;
+
+ /*
+ * The app event queue allocated in first segment is cleared with
+ * the segment manager. No need to explicitly free it.
+ */
+ if (CLIB_DEBUG > 1)
+ clib_warning ("[%d] Delete app (%d)", getpid (), app->index);
+
+ /*
+ * Listener cleanup
+ */
+
+ /* *INDENT-OFF* */
+ hash_foreach (handle, index, app->listeners_table,
+ ({
+ vec_add1 (handles, handle);
+ sm = segment_manager_get (index);
+ sm->app_index = SEGMENT_MANAGER_INVALID_APP_INDEX;
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (handles); i++)
+ {
+ a->app_index = app->index;
+ a->handle = handles[i];
+ /* seg manager is removed when unbind completes */
+ vnet_unbind (a);
+ }
+
+ /*
+ * Connects segment manager cleanup
+ */
+
+ if (app->connects_seg_manager != APP_INVALID_SEGMENT_MANAGER_INDEX)
+ {
+ sm = segment_manager_get (app->connects_seg_manager);
+ sm->app_index = SEGMENT_MANAGER_INVALID_APP_INDEX;
+ segment_manager_init_del (sm);
+ }
+
+
+ /* If first segment manager is used by a listener */
+ if (app->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX
+ && app->first_segment_manager != app->connects_seg_manager)
+ {
+ sm = segment_manager_get (app->first_segment_manager);
+ /* .. and has no fifos, e.g. it might be used for redirected sessions,
+ * remove it */
+ if (!segment_manager_has_fifos (sm))
+ {
+ sm->app_index = SEGMENT_MANAGER_INVALID_APP_INDEX;
+ segment_manager_del (sm);
+ }
+ }
+
+ application_table_del (app);
+ pool_put (app_pool, app);
+}
+
+static void
+application_verify_cb_fns (session_cb_vft_t * cb_fns)
+{
+ if (cb_fns->session_accept_callback == 0)
+ clib_warning ("No accept callback function provided");
+ if (cb_fns->session_connected_callback == 0)
+ clib_warning ("No session connected callback function provided");
+ if (cb_fns->session_disconnect_callback == 0)
+ clib_warning ("No session disconnect callback function provided");
+ if (cb_fns->session_reset_callback == 0)
+ clib_warning ("No session reset callback function provided");
+}
+
+int
+application_init (application_t * app, u32 api_client_index, u64 * options,
+ session_cb_vft_t * cb_fns)
+{
+ segment_manager_t *sm;
+ segment_manager_properties_t *props;
+ u32 app_evt_queue_size, first_seg_size;
+ u32 default_rx_fifo_size = 16 << 10, default_tx_fifo_size = 16 << 10;
+ int rv;
+
+ app_evt_queue_size = options[APP_EVT_QUEUE_SIZE] > 0 ?
+ options[APP_EVT_QUEUE_SIZE] : default_app_evt_queue_size;
+
+ /* Setup segment manager */
+ sm = segment_manager_new ();
+ sm->app_index = app->index;
+ props = &app->sm_properties;
+ props->add_segment_size = options[SESSION_OPTIONS_ADD_SEGMENT_SIZE];
+ props->rx_fifo_size = options[SESSION_OPTIONS_RX_FIFO_SIZE];
+ props->rx_fifo_size =
+ props->rx_fifo_size ? props->rx_fifo_size : default_rx_fifo_size;
+ props->tx_fifo_size = options[SESSION_OPTIONS_TX_FIFO_SIZE];
+ props->tx_fifo_size =
+ props->tx_fifo_size ? props->tx_fifo_size : default_tx_fifo_size;
+ props->add_segment = props->add_segment_size != 0;
+ props->preallocated_fifo_pairs = options[APP_OPTIONS_PREALLOC_FIFO_PAIRS];
+ props->use_private_segment = options[APP_OPTIONS_FLAGS]
+ & APP_OPTIONS_FLAGS_BUILTIN_APP;
+ props->private_segment_count = options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT];
+ props->private_segment_size = options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE];
+
+ first_seg_size = options[SESSION_OPTIONS_SEGMENT_SIZE];
+ if ((rv = segment_manager_init (sm, props, first_seg_size)))
+ return rv;
+ sm->first_is_protected = 1;
+
+ app->first_segment_manager = segment_manager_index (sm);
+ app->api_client_index = api_client_index;
+ app->flags = options[APP_OPTIONS_FLAGS];
+ app->cb_fns = *cb_fns;
+
+ /* Allocate app event queue in the first shared-memory segment */
+ app->event_queue = segment_manager_alloc_queue (sm, app_evt_queue_size);
+
+ /* Check that the obvious things are properly set up */
+ application_verify_cb_fns (cb_fns);
+
+ /* Add app to lookup by api_client_index table */
+ application_table_add (app);
+
+ return 0;
+}
+
+application_t *
+application_get (u32 index)
+{
+ return pool_elt_at_index (app_pool, index);
+}
+
+application_t *
+application_get_if_valid (u32 index)
+{
+ if (pool_is_free_index (app_pool, index))
+ return 0;
+
+ return pool_elt_at_index (app_pool, index);
+}
+
+u32
+application_get_index (application_t * app)
+{
+ return app - app_pool;
+}
+
+static segment_manager_t *
+application_alloc_segment_manager (application_t * app)
+{
+ segment_manager_t *sm = 0;
+
+ /* If the first segment manager is not in use, don't allocate a new one */
+ if (app->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX
+ && app->first_segment_manager_in_use == 0)
+ {
+ sm = segment_manager_get (app->first_segment_manager);
+ app->first_segment_manager_in_use = 1;
+ return sm;
+ }
+
+ sm = segment_manager_new ();
+ sm->properties = &app->sm_properties;
+
+ return sm;
+}
+
+/**
+ * Start listening local transport endpoint for requested transport.
+ *
+ * Creates a 'dummy' stream session with state LISTENING to be used in session
+ * lookups, prior to establishing connection. Requests transport to build
+ * it's own specific listening connection.
+ */
+int
+application_start_listen (application_t * srv, session_type_t session_type,
+ transport_endpoint_t * tep, u64 * res)
+{
+ segment_manager_t *sm;
+ stream_session_t *s;
+ u64 handle;
+
+ s = listen_session_new (session_type);
+ s->app_index = srv->index;
+
+ if (stream_session_listen (s, tep))
+ goto err;
+
+ /* Allocate segment manager. All sessions derived out of a listen session
+ * have fifos allocated by the same segment manager. */
+ sm = application_alloc_segment_manager (srv);
+ if (sm == 0)
+ goto err;
+
+ /* Add to app's listener table. Useful to find all child listeners
+ * when app goes down, although, just for unbinding this is not needed */
+ handle = listen_session_get_handle (s);
+ hash_set (srv->listeners_table, handle, segment_manager_index (sm));
+
+ *res = handle;
+ return 0;
+
+err:
+ listen_session_del (s);
+ return -1;
+}
+
+/**
+ * Stop listening on session associated to handle
+ */
+int
+application_stop_listen (application_t * srv, u64 handle)
+{
+ stream_session_t *listener;
+ uword *indexp;
+ segment_manager_t *sm;
+
+ if (srv && hash_get (srv->listeners_table, handle) == 0)
+ {
+ clib_warning ("app doesn't own handle %llu!", handle);
+ return -1;
+ }
+
+ listener = listen_session_get_from_handle (handle);
+ stream_session_stop_listen (listener);
+
+ indexp = hash_get (srv->listeners_table, handle);
+ ASSERT (indexp);
+
+ sm = segment_manager_get (*indexp);
+ if (srv->first_segment_manager == *indexp)
+ {
+ /* Delete sessions but don't remove segment manager */
+ srv->first_segment_manager_in_use = 0;
+ segment_manager_del_sessions (sm);
+ }
+ else
+ {
+ segment_manager_init_del (sm);
+ }
+ hash_unset (srv->listeners_table, handle);
+ listen_session_del (listener);
+
+ return 0;
+}
+
+int
+application_open_session (application_t * app, session_type_t sst,
+ transport_endpoint_t * tep, u32 api_context)
+{
+ segment_manager_t *sm;
+ transport_connection_t *tc = 0;
+ int rv;
+
+ /* Make sure we have a segment manager for connects */
+ if (app->connects_seg_manager == (u32) ~ 0)
+ {
+ sm = application_alloc_segment_manager (app);
+ if (sm == 0)
+ return -1;
+ app->connects_seg_manager = segment_manager_index (sm);
+ }
+
+ if ((rv = stream_session_open (app->index, sst, tep, &tc)))
+ return rv;
+
+ /* Store api_context for when the reply comes. Not the nicest thing
+ * but better than allocating a separate half-open pool. */
+ tc->s_index = api_context;
+
+ return 0;
+}
+
+segment_manager_t *
+application_get_connect_segment_manager (application_t * app)
+{
+ ASSERT (app->connects_seg_manager != (u32) ~ 0);
+ return segment_manager_get (app->connects_seg_manager);
+}
+
+segment_manager_t *
+application_get_listen_segment_manager (application_t * app,
+ stream_session_t * s)
+{
+ uword *smp;
+ smp = hash_get (app->listeners_table, listen_session_get_handle (s));
+ ASSERT (smp != 0);
+ return segment_manager_get (*smp);
+}
+
+static u8 *
+app_get_name_from_reg_index (application_t * app)
+{
+ u8 *app_name;
+
+ vl_api_registration_t *regp;
+ regp = vl_api_client_index_to_registration (app->api_client_index);
+ if (!regp)
+ app_name = format (0, "builtin-%d%c", app->index, 0);
+ else
+ app_name = format (0, "%s%c", regp->name, 0);
+
+ return app_name;
+}
+
+int
+application_is_proxy (application_t * app)
+{
+ return !(app->flags & APP_OPTIONS_FLAGS_IS_PROXY);
+}
+
+int
+application_add_segment_notify (u32 app_index, u32 fifo_segment_index)
+{
+ application_t *app = application_get (app_index);
+ u32 seg_size = 0;
+ u8 *seg_name;
+
+ /* Send an API message to the external app, to map new segment */
+ ASSERT (app->cb_fns.add_segment_callback);
+
+ segment_manager_get_segment_info (fifo_segment_index, &seg_name, &seg_size);
+ return app->cb_fns.add_segment_callback (app->api_client_index, seg_name,
+ seg_size);
+}
+
+u8 *
+format_application_listener (u8 * s, va_list * args)
+{
+ application_t *app = va_arg (*args, application_t *);
+ u64 handle = va_arg (*args, u64);
+ u32 index = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ stream_session_t *listener;
+ u8 *app_name, *str;
+
+ if (app == 0)
+ {
+ if (verbose)
+ s = format (s, "%-40s%-20s%-15s%-15s%-10s", "Connection", "App",
+ "API Client", "ListenerID", "SegManager");
+ else
+ s = format (s, "%-40s%-20s", "Connection", "App");
+
+ return s;
+ }
+
+ app_name = app_get_name_from_reg_index (app);
+ listener = listen_session_get_from_handle (handle);
+ str = format (0, "%U", format_stream_session, listener, verbose);
+
+ if (verbose)
+ {
+ s = format (s, "%-40s%-20s%-15u%-15u%-10u", str, app_name,
+ app->api_client_index, handle, index);
+ }
+ else
+ s = format (s, "%-40s%-20s", str, app_name);
+
+ vec_free (app_name);
+ return s;
+}
+
+void
+application_format_connects (application_t * app, int verbose)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ segment_manager_t *sm;
+ u8 *app_name, *s = 0;
+ int j;
+
+ /* Header */
+ if (app == 0)
+ {
+ if (verbose)
+ vlib_cli_output (vm, "%-40s%-20s%-15s%-10s", "Connection", "App",
+ "API Client", "SegManager");
+ else
+ vlib_cli_output (vm, "%-40s%-20s", "Connection", "App");
+ return;
+ }
+
+ /* make sure */
+ if (app->connects_seg_manager == (u32) ~ 0)
+ return;
+
+ app_name = app_get_name_from_reg_index (app);
+
+ /* Across all fifo segments */
+ sm = segment_manager_get (app->connects_seg_manager);
+ for (j = 0; j < vec_len (sm->segment_indices); j++)
+ {
+ svm_fifo_segment_private_t *fifo_segment;
+ svm_fifo_t *fifo;
+ u8 *str;
+
+ fifo_segment = svm_fifo_segment_get_segment (sm->segment_indices[j]);
+ fifo = svm_fifo_segment_get_fifo_list (fifo_segment);
+ while (fifo)
+ {
+ u32 session_index, thread_index;
+ stream_session_t *session;
+
+ session_index = fifo->master_session_index;
+ thread_index = fifo->master_thread_index;
+
+ session = stream_session_get (session_index, thread_index);
+ str = format (0, "%U", format_stream_session, session, verbose);
+
+ if (verbose)
+ s = format (s, "%-40s%-20s%-15u%-10u", str, app_name,
+ app->api_client_index, app->connects_seg_manager);
+ else
+ s = format (s, "%-40s%-20s", str, app_name);
+
+ vlib_cli_output (vm, "%v", s);
+ vec_reset_length (s);
+ vec_free (str);
+
+ fifo = fifo->next;
+ }
+ vec_free (s);
+ }
+
+ vec_free (app_name);
+}
+
+u8 *
+format_application (u8 * s, va_list * args)
+{
+ application_t *app = va_arg (*args, application_t *);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+ u8 *app_name;
+
+ if (app == 0)
+ {
+ if (verbose)
+ s = format (s, "%-10s%-20s%-15s%-15s%-15s%-15s", "Index", "Name",
+ "API Client", "Add seg size", "Rx fifo size",
+ "Tx fifo size");
+ else
+ s = format (s, "%-10s%-20s%-20s", "Index", "Name", "API Client");
+ return s;
+ }
+
+ app_name = app_get_name_from_reg_index (app);
+ if (verbose)
+ s = format (s, "%-10d%-20s%-15d%-15d%-15d%-15d", app->index, app_name,
+ app->api_client_index, app->sm_properties.add_segment_size,
+ app->sm_properties.rx_fifo_size,
+ app->sm_properties.tx_fifo_size);
+ else
+ s = format (s, "%-10d%-20s%-20d", app->index, app_name,
+ app->api_client_index);
+ return s;
+}
+
+static clib_error_t *
+show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ application_t *app;
+ int do_server = 0;
+ int do_client = 0;
+ int verbose = 0;
+
+ if (!session_manager_is_enabled ())
+ {
+ clib_error_return (0, "session layer is not enabled");
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "server"))
+ do_server = 1;
+ else if (unformat (input, "client"))
+ do_client = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ break;
+ }
+
+ if (do_server)
+ {
+ u64 handle;
+ u32 index;
+ if (pool_elts (app_pool))
+ {
+ vlib_cli_output (vm, "%U", format_application_listener,
+ 0 /* header */ , 0, 0,
+ verbose);
+ /* *INDENT-OFF* */
+ pool_foreach (app, app_pool,
+ ({
+ /* App's listener sessions */
+ if (hash_elts (app->listeners_table) == 0)
+ continue;
+ hash_foreach (handle, index, app->listeners_table,
+ ({
+ vlib_cli_output (vm, "%U", format_application_listener, app,
+ handle, index, verbose);
+ }));
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ vlib_cli_output (vm, "No active server bindings");
+ }
+
+ if (do_client)
+ {
+ if (pool_elts (app_pool))
+ {
+ application_format_connects (0, verbose);
+
+ /* *INDENT-OFF* */
+ pool_foreach (app, app_pool,
+ ({
+ if (app->connects_seg_manager == (u32)~0)
+ continue;
+ application_format_connects (app, verbose);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ vlib_cli_output (vm, "No active client bindings");
+ }
+
+ /* Print app related info */
+ if (!do_server && !do_client)
+ {
+ vlib_cli_output (vm, "%U", format_application, 0, verbose);
+ pool_foreach (app, app_pool, (
+ {
+ vlib_cli_output (vm, "%U",
+ format_application, app,
+ verbose);
+ }
+ ));
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_app_command, static) =
+{
+ .path = "show app",
+ .short_help = "show app [server|client] [verbose]",
+ .function = show_app_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
new file mode 100644
index 00000000..e030c376
--- /dev/null
+++ b/src/vnet/session/application.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_SESSION_APPLICATION_H_
+#define SRC_VNET_SESSION_APPLICATION_H_
+
+#include <vnet/vnet.h>
+#include <vnet/session/session.h>
+#include <vnet/session/segment_manager.h>
+
+typedef enum
+{
+ APP_SERVER,
+ APP_CLIENT,
+ APP_N_TYPES
+} application_type_t;
+
+typedef struct _stream_session_cb_vft
+{
+ /** Notify server of new segment */
+ int (*add_segment_callback) (u32 api_client_index, const u8 * seg_name,
+ u32 seg_size);
+
+ /** Notify server of newly accepted session */
+ int (*session_accept_callback) (stream_session_t * new_session);
+
+ /* Connection request callback */
+ int (*session_connected_callback) (u32 app_index, u32 opaque,
+ stream_session_t * s, u8 code);
+
+ /** Notify app that session is closing */
+ void (*session_disconnect_callback) (stream_session_t * s);
+
+ /** Notify app that session was reset */
+ void (*session_reset_callback) (stream_session_t * s);
+
+ /* Direct RX callback, for built-in servers */
+ int (*builtin_server_rx_callback) (stream_session_t * session);
+
+ /* Redirect connection to local server */
+ int (*redirect_connect_callback) (u32 api_client_index, void *mp);
+} session_cb_vft_t;
+
+typedef struct _application
+{
+ /** Index in server pool */
+ u32 index;
+
+ /** Flags */
+ u32 flags;
+
+ /*
+ * Binary API interface to external app
+ */
+
+ /** Binary API connection index, ~0 if internal */
+ u32 api_client_index;
+
+ /** Application listens for events on this svm queue */
+ unix_shared_memory_queue_t *event_queue;
+
+ /*
+ * Callbacks: shoulder-taps for the server/client
+ */
+
+ session_cb_vft_t cb_fns;
+
+ /*
+ * svm segment management
+ */
+ u32 connects_seg_manager;
+
+ /** Lookup tables for listeners. Value is segment manager index */
+ uword *listeners_table;
+
+ /** First segment manager has in the the first segment the application's
+ * event fifo. Depending on what the app does, it may be either used for
+ * a listener or for connects. */
+ u32 first_segment_manager;
+ u8 first_segment_manager_in_use;
+
+ /** Segment manager properties. Shared by all segment managers */
+ segment_manager_properties_t sm_properties;
+} application_t;
+
+#define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0)
+
+application_t *application_new ();
+int
+application_init (application_t * app, u32 api_client_index, u64 * options,
+ session_cb_vft_t * cb_fns);
+void application_del (application_t * app);
+application_t *application_get (u32 index);
+application_t *application_get_if_valid (u32 index);
+application_t *application_lookup (u32 api_client_index);
+u32 application_get_index (application_t * app);
+
+int
+application_start_listen (application_t * app, session_type_t session_type,
+ transport_endpoint_t * tep, u64 * handle);
+int application_stop_listen (application_t * srv, u64 handle);
+int
+application_open_session (application_t * app, session_type_t sst,
+ transport_endpoint_t * tep, u32 api_context);
+int application_api_queue_is_full (application_t * app);
+
+segment_manager_t *application_get_listen_segment_manager (application_t *
+ app,
+ stream_session_t *
+ s);
+segment_manager_t *application_get_connect_segment_manager (application_t *
+ app);
+int application_is_proxy (application_t * app);
+int application_add_segment_notify (u32 app_index, u32 fifo_segment_index);
+
+#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
new file mode 100644
index 00000000..7e7449aa
--- /dev/null
+++ b/src/vnet/session/application_interface.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/session/application_interface.h>
+
+#include <vnet/session/session.h>
+#include <vlibmemory/api.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+
+/** @file
+ VPP's application/session API bind/unbind/connect/disconnect calls
+*/
+
+static u8
+ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4)
+{
+ if (is_ip4)
+ return (ip46_address->ip4.as_u32 == 0);
+ else
+ return (ip46_address->as_u64[0] == 0 && ip46_address->as_u64[1] == 0);
+}
+
+static u8
+ip_is_local (ip46_address_t * ip46_address, u8 is_ip4)
+{
+ fib_node_index_t fei;
+ fib_entry_flag_t flags;
+ fib_prefix_t prefix;
+
+ /* Check if requester is local */
+ if (is_ip4)
+ {
+ prefix.fp_len = 32;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ }
+ else
+ {
+ prefix.fp_len = 128;
+ prefix.fp_proto = FIB_PROTOCOL_IP6;
+ }
+
+ clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address_t));
+ fei = fib_table_lookup (0, &prefix);
+ flags = fib_entry_get_flags (fei);
+
+ return (flags & FIB_ENTRY_FLAG_LOCAL);
+}
+
+int
+api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ stream_session_t *pool;
+
+ *thread_index = handle & 0xFFFFFFFF;
+ *session_index = handle >> 32;
+
+ if (*thread_index >= vec_len (smm->sessions))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool = smm->sessions[*thread_index];
+
+ if (pool_is_free_index (pool, *session_index))
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ return 0;
+}
+
+int
+vnet_bind_i (u32 app_index, session_type_t sst,
+ transport_endpoint_t * tep, u64 * handle)
+{
+ application_t *app;
+ stream_session_t *listener;
+
+ app = application_get_if_valid (app_index);
+ if (!app)
+ {
+ clib_warning ("app not attached");
+ return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+ listener = stream_session_lookup_listener (&tep->ip, tep->port, sst);
+ if (listener)
+ return VNET_API_ERROR_ADDRESS_IN_USE;
+
+ if (!ip_is_zero (&tep->ip, tep->is_ip4)
+ && !ip_is_local (&tep->ip, tep->is_ip4))
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* Setup listen path down to transport */
+ return application_start_listen (app, sst, tep, handle);
+}
+
+int
+vnet_unbind_i (u32 app_index, u64 handle)
+{
+ application_t *app = application_get_if_valid (app_index);
+
+ if (!app)
+ {
+ clib_warning ("app (%d) not attached", app_index);
+ return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+ /* Clear the listener */
+ return application_stop_listen (app, handle);
+}
+
+int
+vnet_connect_i (u32 app_index, u32 api_context, session_type_t sst,
+ transport_endpoint_t * tep, void *mp)
+{
+ stream_session_t *listener;
+ application_t *server, *app;
+
+ /*
+ * Figure out if connecting to a local server
+ */
+ listener = stream_session_lookup_listener (&tep->ip, tep->port, sst);
+ if (listener)
+ {
+ server = application_get (listener->app_index);
+
+ /*
+ * Server is willing to have a direct fifo connection created
+ * instead of going through the state machine, etc.
+ */
+ if (server->flags & APP_OPTIONS_FLAGS_USE_FIFO)
+ return server->cb_fns.
+ redirect_connect_callback (server->api_client_index, mp);
+ }
+
+ /*
+ * Not connecting to a local server. Create regular session
+ */
+ app = application_get (app_index);
+ return application_open_session (app, sst, tep, api_context);
+}
+
+/**
+ * unformat a vnet URI
+ *
+ * fifo://name
+ * tcp://ip46-addr:port
+ * udp://ip46-addr:port
+ *
+ * u8 ip46_address[16];
+ * u16 port_in_host_byte_order;
+ * stream_session_type_t sst;
+ * u8 *fifo_name;
+ *
+ * if (unformat (input, "%U", unformat_vnet_uri, &ip46_address,
+ * &sst, &port, &fifo_name))
+ * etc...
+ *
+ */
+uword
+unformat_vnet_uri (unformat_input_t * input, va_list * args)
+{
+ session_type_t *sst = va_arg (*args, session_type_t *);
+ transport_endpoint_t *tep = va_arg (*args, transport_endpoint_t *);
+
+ if (unformat (input, "tcp://%U/%d", unformat_ip4_address, &tep->ip.ip4,
+ &tep->port))
+ {
+ *sst = SESSION_TYPE_IP4_TCP;
+ tep->port = clib_host_to_net_u16 (tep->port);
+ tep->is_ip4 = 1;
+ return 1;
+ }
+ if (unformat (input, "udp://%U/%d", unformat_ip4_address, &tep->ip.ip4,
+ &tep->port))
+ {
+ *sst = SESSION_TYPE_IP4_UDP;
+ tep->port = clib_host_to_net_u16 (tep->port);
+ tep->is_ip4 = 1;
+ return 1;
+ }
+ if (unformat (input, "udp://%U/%d", unformat_ip6_address, &tep->ip.ip6,
+ &tep->port))
+ {
+ *sst = SESSION_TYPE_IP6_UDP;
+ tep->port = clib_host_to_net_u16 (tep->port);
+ return 1;
+ }
+ if (unformat (input, "tcp://%U/%d", unformat_ip6_address, &tep->ip.ip6,
+ &tep->port))
+ {
+ *sst = SESSION_TYPE_IP6_TCP;
+ tep->port = clib_host_to_net_u16 (tep->port);
+ return 1;
+ }
+
+ return 0;
+}
+
+static u8 *cache_uri;
+static session_type_t cache_sst;
+static transport_endpoint_t *cache_tep;
+
+int
+parse_uri (char *uri, session_type_t * sst, transport_endpoint_t * tep)
+{
+ unformat_input_t _input, *input = &_input;
+
+ if (cache_uri && !strncmp (uri, (char *) cache_uri, vec_len (cache_uri)))
+ {
+ *sst = cache_sst;
+ *tep = *cache_tep;
+ return 0;
+ }
+
+ /* Make sure */
+ uri = (char *) format (0, "%s%c", uri, 0);
+
+ /* Parse uri */
+ unformat_init_string (input, uri, strlen (uri));
+ if (!unformat (input, "%U", unformat_vnet_uri, sst, tep))
+ {
+ unformat_free (input);
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ unformat_free (input);
+
+ vec_free (cache_uri);
+ cache_uri = (u8 *) uri;
+ cache_sst = *sst;
+ if (cache_tep)
+ clib_mem_free (cache_tep);
+ cache_tep = clib_mem_alloc (sizeof (*tep));
+ *cache_tep = *tep;
+
+ return 0;
+}
+
+/**
+ * Attaches application.
+ *
+ * Allocates a vpp app, i.e., a structure that keeps back pointers
+ * to external app and a segment manager for shared memory fifo based
+ * communication with the external app.
+ */
+int
+vnet_application_attach (vnet_app_attach_args_t * a)
+{
+ application_t *app = 0;
+ segment_manager_t *sm;
+ u8 *seg_name;
+ int rv;
+
+ app = application_new ();
+ if ((rv = application_init (app, a->api_client_index, a->options,
+ a->session_cb_vft)))
+ return rv;
+
+ a->app_event_queue_address = pointer_to_uword (app->event_queue);
+ sm = segment_manager_get (app->first_segment_manager);
+ segment_manager_get_segment_info (sm->segment_indices[0],
+ &seg_name, &a->segment_size);
+
+ a->segment_name_length = vec_len (seg_name);
+ a->segment_name = seg_name;
+ ASSERT (vec_len (a->segment_name) <= 128);
+ a->app_index = app->index;
+ return 0;
+}
+
+int
+vnet_application_detach (vnet_app_detach_args_t * a)
+{
+ application_t *app;
+ app = application_get_if_valid (a->app_index);
+
+ if (!app)
+ {
+ clib_warning ("app not attached");
+ return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+ application_del (app);
+ return 0;
+}
+
+int
+vnet_bind_uri (vnet_bind_args_t * a)
+{
+ session_type_t sst = SESSION_N_TYPES;
+ transport_endpoint_t tep;
+ int rv;
+
+ memset (&tep, 0, sizeof (tep));
+ rv = parse_uri (a->uri, &sst, &tep);
+ if (rv)
+ return rv;
+
+ if ((rv = vnet_bind_i (a->app_index, sst, &tep, &a->handle)))
+ return rv;
+
+ return 0;
+}
+
+int
+vnet_unbind_uri (vnet_unbind_args_t * a)
+{
+ session_type_t sst = SESSION_N_TYPES;
+ stream_session_t *listener;
+ transport_endpoint_t tep;
+ int rv;
+
+ rv = parse_uri (a->uri, &sst, &tep);
+ if (rv)
+ return rv;
+
+ listener = stream_session_lookup_listener (&tep.ip,
+ clib_host_to_net_u16 (tep.port),
+ sst);
+ if (!listener)
+ return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+
+ return vnet_unbind_i (a->app_index, listen_session_get_handle (listener));
+}
+
+int
+vnet_connect_uri (vnet_connect_args_t * a)
+{
+ transport_endpoint_t tep;
+ session_type_t sst;
+ int rv;
+
+ /* Parse uri */
+ memset (&tep, 0, sizeof (tep));
+ rv = parse_uri (a->uri, &sst, &tep);
+ if (rv)
+ return rv;
+
+ return vnet_connect_i (a->app_index, a->api_context, sst, &tep, a->mp);
+}
+
+int
+vnet_disconnect_session (vnet_disconnect_args_t * a)
+{
+ u32 index, thread_index;
+ stream_session_t *s;
+
+ stream_session_parse_handle (a->handle, &index, &thread_index);
+ s = stream_session_get_if_valid (index, thread_index);
+
+ if (!s || s->app_index != a->app_index)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* We're peeking into another's thread pool. Make sure */
+ ASSERT (s->session_index == index);
+
+ session_send_session_evt_to_thread (a->handle, FIFO_EVENT_DISCONNECT,
+ thread_index);
+ return 0;
+}
+
+int
+vnet_bind (vnet_bind_args_t * a)
+{
+ session_type_t sst = SESSION_N_TYPES;
+ int rv;
+
+ sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4);
+ if ((rv = vnet_bind_i (a->app_index, sst, &a->tep, &a->handle)))
+ return rv;
+
+ return 0;
+}
+
+int
+vnet_unbind (vnet_unbind_args_t * a)
+{
+ return vnet_unbind_i (a->app_index, a->handle);
+}
+
+int
+vnet_connect (vnet_connect_args_t * a)
+{
+ session_type_t sst;
+
+ sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4);
+ return vnet_connect_i (a->app_index, a->api_context, sst, &a->tep, a->mp);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
new file mode 100644
index 00000000..1d63f6cc
--- /dev/null
+++ b/src/vnet/session/application_interface.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_uri_h__
+#define __included_uri_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+#include <vnet/session/transport.h>
+
+typedef struct _vnet_app_attach_args_t
+{
+ /** Binary API client index */
+ u32 api_client_index;
+
+ /** Application and segment manager options */
+ u64 *options;
+
+ /** Session to application callback functions */
+ session_cb_vft_t *session_cb_vft;
+
+ /** Flag that indicates if app is builtin */
+ u8 builtin;
+
+ /*
+ * Results
+ */
+ u8 *segment_name;
+ u32 segment_name_length;
+ u32 segment_size;
+ u64 app_event_queue_address;
+ u32 app_index;
+} vnet_app_attach_args_t;
+
+typedef struct _vnet_app_detach_args_t
+{
+ u32 app_index;
+} vnet_app_detach_args_t;
+
+typedef struct _vnet_bind_args_t
+{
+ union
+ {
+ char *uri;
+ struct
+ {
+ transport_endpoint_t tep;
+ transport_proto_t proto;
+ };
+ };
+
+ u32 app_index;
+
+ /*
+ * Results
+ */
+ char *segment_name;
+ u32 segment_name_length;
+ u64 server_event_queue_address;
+ u64 handle;
+} vnet_bind_args_t;
+
+typedef struct _vnet_unbind_args_t
+{
+ union
+ {
+ char *uri;
+ u64 handle;
+ };
+ u32 app_index;
+} vnet_unbind_args_t;
+
+typedef struct _vnet_connect_args
+{
+ union
+ {
+ char *uri;
+ struct
+ {
+ transport_endpoint_t tep;
+ transport_proto_t proto;
+ };
+ };
+ u32 app_index;
+ u32 api_context;
+
+ /* Used for redirects */
+ void *mp;
+
+ /* used for proxy connections */
+ u64 server_handle;
+} vnet_connect_args_t;
+
+typedef struct _vnet_disconnect_args_t
+{
+ u64 handle;
+ u32 app_index;
+} vnet_disconnect_args_t;
+
+/* Application attach options */
+typedef enum
+{
+ APP_EVT_QUEUE_SIZE,
+ APP_OPTIONS_FLAGS,
+ APP_OPTIONS_PREALLOC_FIFO_PAIRS,
+ APP_OPTIONS_PRIVATE_SEGMENT_COUNT,
+ APP_OPTIONS_PRIVATE_SEGMENT_SIZE,
+ SESSION_OPTIONS_SEGMENT_SIZE,
+ SESSION_OPTIONS_ADD_SEGMENT_SIZE,
+ SESSION_OPTIONS_RX_FIFO_SIZE,
+ SESSION_OPTIONS_TX_FIFO_SIZE,
+ SESSION_OPTIONS_PREALLOCATED_FIFO_PAIRS,
+ SESSION_OPTIONS_ACCEPT_COOKIE,
+ SESSION_OPTIONS_N_OPTIONS
+} app_attach_options_index_t;
+
+#define foreach_app_options_flags \
+ _(USE_FIFO, "Use FIFO with redirects") \
+ _(ADD_SEGMENT, "Add segment and signal app if needed") \
+ _(BUILTIN_APP, "Application is builtin") \
+ _(IS_PROXY, "Application is proxying")
+
+typedef enum _app_options
+{
+#define _(sym, str) APP_OPTIONS_##sym,
+ foreach_app_options_flags
+#undef _
+} app_options_t;
+
+typedef enum _app_options_flags
+{
+#define _(sym, str) APP_OPTIONS_FLAGS_##sym = 1 << APP_OPTIONS_##sym,
+ foreach_app_options_flags
+#undef _
+} app_options_flags_t;
+
+///** Server can handle delegated connect requests from local clients */
+//#define APP_OPTIONS_FLAGS_USE_FIFO (1<<0)
+//
+///** Server wants vpp to add segments when out of memory for fifos */
+//#define APP_OPTIONS_FLAGS_ADD_SEGMENT (1<<1)
+
+#define VNET_CONNECT_REDIRECTED 123
+
+int vnet_application_attach (vnet_app_attach_args_t * a);
+int vnet_application_detach (vnet_app_detach_args_t * a);
+
+int vnet_bind_uri (vnet_bind_args_t *);
+int vnet_unbind_uri (vnet_unbind_args_t * a);
+int vnet_connect_uri (vnet_connect_args_t * a);
+int vnet_disconnect_session (vnet_disconnect_args_t * a);
+
+int vnet_bind (vnet_bind_args_t * a);
+int vnet_connect (vnet_connect_args_t * a);
+int vnet_unbind (vnet_unbind_args_t * a);
+
+int
+api_parse_session_handle (u64 handle, u32 * session_index,
+ u32 * thread_index);
+
+#endif /* __included_uri_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
new file mode 100644
index 00000000..48d02755
--- /dev/null
+++ b/src/vnet/session/segment_manager.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/segment_manager.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+
+/**
+ * Counter used to build segment names
+ */
+u32 segment_name_counter = 0;
+
+/**
+ * Pool of segment managers
+ */
+segment_manager_t *segment_managers = 0;
+
+/**
+ * Process private segment index
+ */
+u32 *private_segment_indices;
+
+/**
+ * Default fifo and segment size. TODO config.
+ */
+u32 default_fifo_size = 1 << 16;
+u32 default_segment_size = 1 << 20;
+
+void
+segment_manager_get_segment_info (u32 index, u8 ** name, u32 * size)
+{
+ svm_fifo_segment_private_t *s;
+ s = svm_fifo_segment_get_segment (index);
+ *name = s->h->segment_name;
+ *size = s->ssvm.ssvm_size;
+}
+
+always_inline int
+session_manager_add_segment_i (segment_manager_t * sm, u32 segment_size,
+ u8 * segment_name)
+{
+ svm_fifo_segment_create_args_t _ca, *ca = &_ca;
+ int rv;
+
+ memset (ca, 0, sizeof (*ca));
+
+ if (!sm->properties->use_private_segment)
+ {
+ ca->segment_name = (char *) segment_name;
+ ca->segment_size = segment_size;
+ ca->rx_fifo_size = sm->properties->rx_fifo_size;
+ ca->tx_fifo_size = sm->properties->tx_fifo_size;
+ ca->preallocated_fifo_pairs = sm->properties->preallocated_fifo_pairs;
+
+ rv = svm_fifo_segment_create (ca);
+ if (rv)
+ {
+ clib_warning ("svm_fifo_segment_create ('%s', %d) failed",
+ ca->segment_name, ca->segment_size);
+ return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
+ }
+ }
+ else
+ {
+ u32 rx_fifo_size, tx_fifo_size, rx_rounded_data_size,
+ tx_rounded_data_size;
+ u32 approx_segment_count;
+ u64 approx_total_size;
+
+ ca->segment_name = "process-private-segment";
+ ca->segment_size = ~0;
+ ca->rx_fifo_size = sm->properties->rx_fifo_size;
+ ca->tx_fifo_size = sm->properties->tx_fifo_size;
+ ca->preallocated_fifo_pairs = sm->properties->preallocated_fifo_pairs;
+ ca->private_segment_count = sm->properties->private_segment_count;
+ ca->private_segment_size = sm->properties->private_segment_size;
+
+ /* Default to a small private segment */
+ if (ca->private_segment_size == 0)
+ ca->private_segment_size = 128 << 20;
+
+ /* Calculate space requirements */
+ rx_rounded_data_size = (1 << (max_log2 (ca->rx_fifo_size)));
+ tx_rounded_data_size = (1 << (max_log2 (ca->tx_fifo_size)));
+
+ rx_fifo_size = sizeof (svm_fifo_t) + rx_rounded_data_size;
+ tx_fifo_size = sizeof (svm_fifo_t) + tx_rounded_data_size;
+
+ approx_total_size = (u64) ca->preallocated_fifo_pairs
+ * (rx_fifo_size + tx_fifo_size);
+ approx_segment_count =
+ (approx_total_size +
+ (ca->private_segment_size - 1)) / (u64) ca->private_segment_size;
+
+ /* The user asked us to figure it out... */
+ if (ca->private_segment_count == 0)
+ {
+ ca->private_segment_count = approx_segment_count;
+ }
+ /* Follow directions, but issue a warning */
+ else if (approx_segment_count != ca->private_segment_count)
+ {
+ clib_warning
+ ("Honoring segment count %u, but calculated count was %u",
+ ca->private_segment_count, approx_segment_count);
+ }
+
+ if (svm_fifo_segment_create_process_private (ca))
+ clib_warning ("Failed to create process private segment");
+
+ ASSERT (vec_len (ca->new_segment_indices));
+ }
+ vec_append (sm->segment_indices, ca->new_segment_indices);
+ vec_free (ca->new_segment_indices);
+ return 0;
+}
+
+int
+session_manager_add_segment (segment_manager_t * sm)
+{
+ u8 *segment_name;
+ svm_fifo_segment_create_args_t _ca, *ca = &_ca;
+ u32 add_segment_size;
+ int rv;
+
+ memset (ca, 0, sizeof (*ca));
+ segment_name = format (0, "%d-%d%c", getpid (), segment_name_counter++, 0);
+ add_segment_size = sm->properties->add_segment_size ?
+ sm->properties->add_segment_size : default_segment_size;
+
+ rv = session_manager_add_segment_i (sm, add_segment_size, segment_name);
+ vec_free (segment_name);
+ return rv;
+}
+
+int
+session_manager_add_first_segment (segment_manager_t * sm, u32 segment_size)
+{
+ u8 *segment_name;
+ int rv;
+
+ segment_name = format (0, "%d-%d%c", getpid (), segment_name_counter++, 0);
+ rv = session_manager_add_segment_i (sm, segment_size, segment_name);
+ vec_free (segment_name);
+ return rv;
+}
+
+segment_manager_t *
+segment_manager_new ()
+{
+ segment_manager_t *sm;
+ pool_get (segment_managers, sm);
+ memset (sm, 0, sizeof (*sm));
+ return sm;
+}
+
+/**
+ * Initializes segment manager based on options provided.
+ * Returns error if svm segment allocation fails.
+ */
+int
+segment_manager_init (segment_manager_t * sm,
+ segment_manager_properties_t * properties,
+ u32 first_seg_size)
+{
+ int rv;
+
+ /* app allocates these */
+ sm->properties = properties;
+
+ first_seg_size = first_seg_size > 0 ? first_seg_size : default_segment_size;
+
+ rv = session_manager_add_first_segment (sm, first_seg_size);
+ if (rv)
+ {
+ clib_warning ("Failed to allocate segment");
+ return rv;
+ }
+
+ clib_spinlock_init (&sm->lockp);
+ return 0;
+}
+
+u8
+segment_manager_has_fifos (segment_manager_t * sm)
+{
+ svm_fifo_segment_private_t *segment;
+ int i;
+
+ for (i = 0; i < vec_len (sm->segment_indices); i++)
+ {
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[i]);
+ if (CLIB_DEBUG && i && !svm_fifo_segment_has_fifos (segment)
+ && !(segment->h->flags & FIFO_SEGMENT_F_IS_PREALLOCATED))
+ clib_warning ("segment %d has no fifos!", sm->segment_indices[i]);
+ if (svm_fifo_segment_has_fifos (segment))
+ return 1;
+ }
+ return 0;
+}
+
+static u8
+segment_manager_app_detached (segment_manager_t * sm)
+{
+ return (sm->app_index == SEGMENT_MANAGER_INVALID_APP_INDEX);
+}
+
+static void
+segment_manager_del_segment (segment_manager_t * sm, u32 segment_index)
+{
+ svm_fifo_segment_private_t *fifo_segment;
+ u32 svm_segment_index;
+ clib_spinlock_lock (&sm->lockp);
+ svm_segment_index = sm->segment_indices[segment_index];
+ fifo_segment = svm_fifo_segment_get_segment (svm_segment_index);
+ if (!fifo_segment
+ || ((fifo_segment->h->flags & FIFO_SEGMENT_F_IS_PREALLOCATED)
+ && !segment_manager_app_detached (sm)))
+ {
+ clib_spinlock_unlock (&sm->lockp);
+ return;
+ }
+ svm_fifo_segment_delete (fifo_segment);
+ vec_del1 (sm->segment_indices, segment_index);
+ clib_spinlock_unlock (&sm->lockp);
+}
+
+/**
+ * Initiate disconnects for all sessions 'owned' by a segment manager
+ */
+void
+segment_manager_del_sessions (segment_manager_t * sm)
+{
+ int j;
+ svm_fifo_segment_private_t *fifo_segment;
+ svm_fifo_t *fifo;
+
+ ASSERT (vec_len (sm->segment_indices));
+
+ /* Across all fifo segments used by the server */
+ for (j = 0; j < vec_len (sm->segment_indices); j++)
+ {
+ fifo_segment = svm_fifo_segment_get_segment (sm->segment_indices[j]);
+ fifo = svm_fifo_segment_get_fifo_list (fifo_segment);
+
+ /*
+ * Remove any residual sessions from the session lookup table
+ * Don't bother deleting the individual fifos, we're going to
+ * throw away the fifo segment in a minute.
+ */
+ while (fifo)
+ {
+ u32 session_index, thread_index;
+ stream_session_t *session;
+
+ session_index = fifo->master_session_index;
+ thread_index = fifo->master_thread_index;
+ session = stream_session_get (session_index, thread_index);
+
+ /* Instead of directly removing the session call disconnect */
+ if (session->session_state != SESSION_STATE_CLOSED)
+ {
+ session->session_state = SESSION_STATE_CLOSED;
+ session_send_session_evt_to_thread (stream_session_handle
+ (session),
+ FIFO_EVENT_DISCONNECT,
+ thread_index);
+ }
+ fifo = fifo->next;
+ }
+
+ /* Instead of removing the segment, test when cleaning up disconnected
+ * sessions if the segment can be removed.
+ */
+ }
+}
+
+/**
+ * Removes segment manager.
+ *
+ * Since the fifos allocated in the segment keep backpointers to the sessions
+ * prior to removing the segment, we call session disconnect. This
+ * subsequently propagates into transport.
+ */
+void
+segment_manager_del (segment_manager_t * sm)
+{
+ int i;
+
+ ASSERT (!segment_manager_has_fifos (sm)
+ && segment_manager_app_detached (sm));
+
+ /* If we have empty preallocated segments that haven't been removed, remove
+ * them now. Apart from that, the first segment in the first segment manager
+ * is not removed when all fifos are removed. It can only be removed when
+ * the manager is explicitly deleted/detached by the app. */
+ for (i = vec_len (sm->segment_indices) - 1; i >= 0; i--)
+ {
+ if (CLIB_DEBUG)
+ {
+ svm_fifo_segment_private_t *segment;
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[i]);
+ ASSERT (!svm_fifo_segment_has_fifos (segment));
+ }
+ segment_manager_del_segment (sm, i);
+ }
+ clib_spinlock_free (&sm->lockp);
+ if (CLIB_DEBUG)
+ memset (sm, 0xfe, sizeof (*sm));
+ pool_put (segment_managers, sm);
+}
+
+void
+segment_manager_init_del (segment_manager_t * sm)
+{
+ if (segment_manager_has_fifos (sm))
+ segment_manager_del_sessions (sm);
+ else
+ {
+ ASSERT (!sm->first_is_protected || segment_manager_app_detached (sm));
+ segment_manager_del (sm);
+ }
+}
+
+int
+segment_manager_alloc_session_fifos (segment_manager_t * sm,
+ svm_fifo_t ** server_rx_fifo,
+ svm_fifo_t ** server_tx_fifo,
+ u32 * fifo_segment_index)
+{
+ svm_fifo_segment_private_t *fifo_segment;
+ u32 fifo_size, sm_index;
+ u8 added_a_segment = 0;
+ int i;
+
+ ASSERT (vec_len (sm->segment_indices));
+
+ /* Make sure we don't have multiple threads trying to allocate segments
+ * at the same time. */
+ clib_spinlock_lock (&sm->lockp);
+
+ /* Allocate svm fifos */
+again:
+ for (i = 0; i < vec_len (sm->segment_indices); i++)
+ {
+ *fifo_segment_index = sm->segment_indices[i];
+ fifo_segment = svm_fifo_segment_get_segment (*fifo_segment_index);
+
+ fifo_size = sm->properties->rx_fifo_size;
+ fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
+ *server_rx_fifo =
+ svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size,
+ FIFO_SEGMENT_RX_FREELIST);
+
+ fifo_size = sm->properties->tx_fifo_size;
+ fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
+ *server_tx_fifo =
+ svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size,
+ FIFO_SEGMENT_TX_FREELIST);
+
+ if (*server_rx_fifo == 0)
+ {
+ /* This would be very odd, but handle it... */
+ if (*server_tx_fifo != 0)
+ {
+ svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo,
+ FIFO_SEGMENT_TX_FREELIST);
+ *server_tx_fifo = 0;
+ }
+ continue;
+ }
+ if (*server_tx_fifo == 0)
+ {
+ if (*server_rx_fifo != 0)
+ {
+ svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo,
+ FIFO_SEGMENT_RX_FREELIST);
+ *server_rx_fifo = 0;
+ }
+ continue;
+ }
+ break;
+ }
+
+ /* See if we're supposed to create another segment */
+ if (*server_rx_fifo == 0)
+ {
+ if (sm->properties->add_segment && !sm->properties->use_private_segment)
+ {
+ if (added_a_segment)
+ {
+ clib_warning ("added a segment, still can't allocate a fifo");
+ clib_spinlock_unlock (&sm->lockp);
+ return SESSION_ERROR_NEW_SEG_NO_SPACE;
+ }
+
+ if (session_manager_add_segment (sm))
+ {
+ clib_spinlock_unlock (&sm->lockp);
+ return VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+ }
+
+ added_a_segment = 1;
+ goto again;
+ }
+ else
+ {
+ clib_warning ("No space to allocate fifos!");
+ clib_spinlock_unlock (&sm->lockp);
+ return SESSION_ERROR_NO_SPACE;
+ }
+ }
+
+ /* Backpointers to segment manager */
+ sm_index = segment_manager_index (sm);
+ (*server_tx_fifo)->segment_manager = sm_index;
+ (*server_rx_fifo)->segment_manager = sm_index;
+
+ clib_spinlock_unlock (&sm->lockp);
+
+ if (added_a_segment)
+ return application_add_segment_notify (sm->app_index,
+ *fifo_segment_index);
+
+ return 0;
+}
+
+void
+segment_manager_dealloc_fifos (u32 svm_segment_index, svm_fifo_t * rx_fifo,
+ svm_fifo_t * tx_fifo)
+{
+ segment_manager_t *sm;
+ svm_fifo_segment_private_t *fifo_segment;
+ u32 i, segment_index = ~0;
+ u8 is_first;
+
+ sm = segment_manager_get_if_valid (rx_fifo->segment_manager);
+
+ /* It's possible to have no segment manager if the session was removed
+ * as result of a detach. */
+ if (!sm)
+ return;
+
+ fifo_segment = svm_fifo_segment_get_segment (svm_segment_index);
+ svm_fifo_segment_free_fifo (fifo_segment, rx_fifo,
+ FIFO_SEGMENT_RX_FREELIST);
+ svm_fifo_segment_free_fifo (fifo_segment, tx_fifo,
+ FIFO_SEGMENT_TX_FREELIST);
+
+ /*
+ * Try to remove svm segment if it has no fifos. This can be done only if
+ * the segment is not the first in the segment manager or if it is first
+ * and it is not protected. Moreover, if the segment is first and the app
+ * has detached from the segment manager, remove the segment manager.
+ */
+ if (!svm_fifo_segment_has_fifos (fifo_segment))
+ {
+ is_first = sm->segment_indices[0] == svm_segment_index;
+
+ /* Remove segment if it holds no fifos or first but not protected */
+ if (!is_first || !sm->first_is_protected)
+ {
+ /* Find the segment manager segment index */
+ for (i = 0; i < vec_len (sm->segment_indices); i++)
+ if (sm->segment_indices[i] == svm_segment_index)
+ {
+ segment_index = i;
+ break;
+ }
+ ASSERT (segment_index != (u32) ~ 0);
+ segment_manager_del_segment (sm, segment_index);
+ }
+
+ /* Remove segment manager if no sessions and detached from app */
+ if (segment_manager_app_detached (sm)
+ && !segment_manager_has_fifos (sm))
+ segment_manager_del (sm);
+ }
+}
+
+/**
+ * Allocates shm queue in the first segment
+ */
+unix_shared_memory_queue_t *
+segment_manager_alloc_queue (segment_manager_t * sm, u32 queue_size)
+{
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_private_t *segment;
+ unix_shared_memory_queue_t *q;
+ void *oldheap;
+
+ ASSERT (sm->segment_indices != 0);
+
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[0]);
+ sh = segment->ssvm.sh;
+
+ oldheap = ssvm_push_heap (sh);
+ q = unix_shared_memory_queue_init (queue_size,
+ sizeof (session_fifo_event_t),
+ 0 /* consumer pid */ ,
+ 0 /* signal when queue non-empty */ );
+ ssvm_pop_heap (oldheap);
+ return q;
+}
+
+/**
+ * Frees shm queue allocated in the first segment
+ */
+void
+segment_manager_dealloc_queue (segment_manager_t * sm,
+ unix_shared_memory_queue_t * q)
+{
+ ssvm_shared_header_t *sh;
+ svm_fifo_segment_private_t *segment;
+ void *oldheap;
+
+ ASSERT (sm->segment_indices != 0);
+
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[0]);
+ sh = segment->ssvm.sh;
+
+ oldheap = ssvm_push_heap (sh);
+ unix_shared_memory_queue_free (q);
+ ssvm_pop_heap (oldheap);
+}
+
+static clib_error_t *
+segment_manager_show_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ svm_fifo_segment_private_t *segments, *seg;
+ segment_manager_t *sm;
+ u8 show_segments = 0, verbose = 0, *name;
+ uword address;
+ u64 size;
+ u32 active_fifos;
+ u32 free_fifos;
+
+ mheap_t *heap_header;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "segments"))
+ show_segments = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ vlib_cli_output (vm, "%d segment managers allocated",
+ pool_elts (segment_managers));
+ if (verbose && pool_elts (segment_managers))
+ {
+ vlib_cli_output (vm, "%-10s%=15s%=12s", "Index", "App Index",
+ "Segments");
+
+ /* *INDENT-OFF* */
+ pool_foreach (sm, segment_managers, ({
+ vlib_cli_output (vm, "%-10d%=15d%=12d", segment_manager_index(sm),
+ sm->app_index, vec_len (sm->segment_indices));
+ }));
+ /* *INDENT-ON* */
+
+ }
+ if (show_segments)
+ {
+ segments = svm_fifo_segment_segments_pool ();
+ vlib_cli_output (vm, "%d svm fifo segments allocated",
+ pool_elts (segments));
+ vlib_cli_output (vm, "%-20s%=12s%=16s%=16s%=16s", "Name",
+ "HeapSize (M)", "ActiveFifos", "FreeFifos", "Address");
+
+ /* *INDENT-OFF* */
+ pool_foreach (seg, segments, ({
+ if (seg->h->flags & FIFO_SEGMENT_F_IS_PRIVATE)
+ {
+ address = pointer_to_uword (seg->ssvm.sh->heap);
+ if (seg->h->flags & FIFO_SEGMENT_F_IS_MAIN_HEAP)
+ name = format (0, "main heap");
+ else
+ name = format (0, "private heap");
+ heap_header = mheap_header (seg->ssvm.sh->heap);
+ size = heap_header->max_size;
+ }
+ else
+ {
+ address = seg->ssvm.sh->ssvm_va;
+ size = seg->ssvm.ssvm_size;
+ name = seg->ssvm.sh->name;
+ }
+ active_fifos = svm_fifo_segment_num_fifos (seg);
+ free_fifos = svm_fifo_segment_num_free_fifos (seg, ~0 /* size */);
+ vlib_cli_output (vm, "%-20v%=16llu%=16u%=16u%16llx",
+ name, size >> 20ULL, active_fifos, free_fifos,
+ address);
+ if (verbose)
+ vlib_cli_output (vm, "%U",
+ format_svm_fifo_segment, seg, verbose);
+ if (seg->h->flags & FIFO_SEGMENT_F_IS_PRIVATE)
+ vec_free (name);
+ }));
+ /* *INDENT-ON* */
+
+ }
+ return 0;
+}
+
+ /* *INDENT-OFF* */
+VLIB_CLI_COMMAND (segment_manager_show_command, static) =
+{
+ .path = "show segment-manager",
+ .short_help = "show segment-manager [segments][verbose]",
+ .function = segment_manager_show_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
new file mode 100644
index 00000000..6e5b8989
--- /dev/null
+++ b/src/vnet/session/segment_manager.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_VNET_SESSION_SEGMENT_MANAGER_H_
+#define SRC_VNET_SESSION_SEGMENT_MANAGER_H_
+
+#include <vnet/vnet.h>
+#include <svm/svm_fifo_segment.h>
+
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/lock.h>
+
+typedef struct _segment_manager_properties
+{
+ /** Session fifo sizes. */
+ u32 rx_fifo_size;
+ u32 tx_fifo_size;
+
+ /** Preallocated pool sizes */
+ u32 preallocated_fifo_pairs;
+
+ /** Configured additional segment size */
+ u32 add_segment_size;
+
+ /** Flag that indicates if additional segments should be created */
+ u8 add_segment;
+
+ /** Use private memory segment instead of shared memory */
+ u8 use_private_segment;
+
+ /** Use one or more private mheaps, instead of the global heap */
+ u32 private_segment_count;
+ u32 private_segment_size;
+} segment_manager_properties_t;
+
+typedef struct _segment_manager
+{
+ clib_spinlock_t lockp;
+
+ /** segments mapped by this manager */
+ u32 *segment_indices;
+
+ /** Owner app index */
+ u32 app_index;
+
+ /**
+ * Pointer to manager properties. Could be shared among all of
+ * an app's segment managers s
+ */
+ segment_manager_properties_t *properties;
+
+ /**
+ * First segment should not be deleted unless segment manger is deleted.
+ * This also indicates that the segment manager is the first to have been
+ * allocated for the app.
+ */
+ u8 first_is_protected;
+} segment_manager_t;
+
+#define SEGMENT_MANAGER_INVALID_APP_INDEX ((u32) ~0)
+
+/** Pool of segment managers */
+extern segment_manager_t *segment_managers;
+
+always_inline segment_manager_t *
+segment_manager_get (u32 index)
+{
+ return pool_elt_at_index (segment_managers, index);
+}
+
+always_inline segment_manager_t *
+segment_manager_get_if_valid (u32 index)
+{
+ if (pool_is_free_index (segment_managers, index))
+ return 0;
+ return pool_elt_at_index (segment_managers, index);
+}
+
+always_inline u32
+segment_manager_index (segment_manager_t * sm)
+{
+ return sm - segment_managers;
+}
+
+segment_manager_t *segment_manager_new ();
+int
+segment_manager_init (segment_manager_t * sm,
+ segment_manager_properties_t * properties,
+ u32 seg_size);
+
+void segment_manager_get_segment_info (u32 index, u8 ** name, u32 * size);
+int
+session_manager_add_first_segment (segment_manager_t * sm, u32 segment_size);
+int session_manager_add_segment (segment_manager_t * sm);
+void segment_manager_del_sessions (segment_manager_t * sm);
+void segment_manager_del (segment_manager_t * sm);
+void segment_manager_init_del (segment_manager_t * sm);
+u8 segment_manager_has_fifos (segment_manager_t * sm);
+int
+segment_manager_alloc_session_fifos (segment_manager_t * sm,
+ svm_fifo_t ** server_rx_fifo,
+ svm_fifo_t ** server_tx_fifo,
+ u32 * fifo_segment_index);
+void
+segment_manager_dealloc_fifos (u32 svm_segment_index, svm_fifo_t * rx_fifo,
+ svm_fifo_t * tx_fifo);
+unix_shared_memory_queue_t *segment_manager_alloc_queue (segment_manager_t *
+ sm, u32 queue_size);
+void segment_manager_dealloc_queue (segment_manager_t * sm,
+ unix_shared_memory_queue_t * q);
+
+#endif /* SRC_VNET_SESSION_SEGMENT_MANAGER_H_ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
new file mode 100644
index 00000000..30d2ae96
--- /dev/null
+++ b/src/vnet/session/session.api
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief client->vpp, attach application to session layer
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param initial_segment_size - size of the initial shm segment to be
+ allocated
+ @param options - segment size, fifo sizes, etc.
+*/
+ define application_attach {
+ u32 client_index;
+ u32 context;
+ u32 initial_segment_size;
+ u64 options[16];
+ };
+
+ /** \brief Application attach reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param app_event_queue_address - vpp event queue address or 0 if this
+ connection shouldn't send events
+ @param segment_size - size of first shm segment
+ @param segment_name_length - length of segment name
+ @param segment_name - name of segment client needs to attach to
+*/
+define application_attach_reply {
+ u32 context;
+ i32 retval;
+ u64 app_event_queue_address;
+ u32 segment_size;
+ u8 segment_name_length;
+ u8 segment_name[128];
+};
+
+ /** \brief client->vpp, attach application to session layer
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define application_detach {
+ u32 client_index;
+ u32 context;
+ };
+
+/** \brief vpp->client, please map an additional shared memory segment
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param segment_name -
+*/
+autoreply define map_another_segment {
+ u32 client_index;
+ u32 context;
+ u32 segment_size;
+ u8 segment_name[128];
+};
+
+ /** \brief Bind to a given URI
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param accept_cookie - sender accept cookie, to identify this bind flavor
+ @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4]
+ "tcp://::/0/80" [ipv6] etc.
+ @param options - socket options, fifo sizes, etc.
+*/
+autoreply define bind_uri {
+ u32 client_index;
+ u32 context;
+ u32 accept_cookie;
+ u8 uri[128];
+};
+
+/** \brief Unbind a given URI
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4]
+ "tcp://::/0/80" [ipv6], etc.
+ @param options - socket options, fifo sizes, etc.
+*/
+autoreply define unbind_uri {
+ u32 client_index;
+ u32 context;
+ u8 uri[128];
+};
+
+/** \brief Connect to a given URI
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param accept_cookie - sender accept cookie, to identify this bind flavor
+ @param uri - a URI, e.g. "tcp4://0.0.0.0/0/80"
+ "tcp6://::/0/80" [ipv6], etc.
+ @param options - socket options, fifo sizes, etc. passed by vpp to the
+ server when redirecting connects
+ @param client_queue_address - binary API client queue address. Used by
+ local server when connect was redirected.
+*/
+autoreply define connect_uri {
+ u32 client_index;
+ u32 context;
+ u8 uri[128];
+ u64 client_queue_address;
+ u64 options[16];
+};
+
+/** \brief vpp->client, accept this session
+ @param context - sender context, to match reply w/ request
+ @param listener_handle - tells client which listener this pertains to
+ @param handle - unique session identifier
+ @param session_thread_index - thread index of new session
+ @param rx_fifo_address - rx (vpp -> vpp-client) fifo address
+ @param tx_fifo_address - tx (vpp-client -> vpp) fifo address
+ @param vpp_event_queue_address - vpp's event queue address
+ @param port - remote port
+ @param is_ip4 - 1 if the ip is ip4
+ @param ip - remote ip
+*/
+define accept_session {
+ u32 client_index;
+ u32 context;
+ u64 listener_handle;
+ u64 handle;
+ u64 server_rx_fifo;
+ u64 server_tx_fifo;
+ u64 vpp_event_queue_address;
+ u16 port;
+ u8 is_ip4;
+ u8 ip[16];
+};
+
+/** \brief client->vpp, reply to an accept message
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param session_index - session index from accept_session / connect_reply
+ @param session_thread_index - thread index from accept_session /
+ connect_reply
+*/
+define accept_session_reply {
+ u32 context;
+ i32 retval;
+ u64 handle;
+};
+
+/** \brief bidirectional disconnect API
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param handle - session handle obtained from accept/connect
+*/
+define disconnect_session {
+ u32 client_index;
+ u32 context;
+ u64 handle;
+};
+
+/** \brief bidirectional disconnect reply API
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param handle - session handle
+*/
+define disconnect_session_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+ u64 handle;
+};
+
+/** \brief vpp->client reset session API
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param handle - session handle obtained via accept/connects
+*/
+define reset_session {
+ u32 client_index;
+ u32 context;
+ u64 handle;
+};
+
+/** \brief client->vpp reset session reply
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param handle - session handle obtained via accept/connect
+*/
+define reset_session_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+ u64 handle;
+};
+
+/** \brief Bind to an ip:port pair for a given transport protocol
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf - bind namespace
+ @param is_ip4 - flag that is 1 if ip address family is IPv4
+ @param ip - ip address
+ @param port - port
+ @param proto - protocol 0 - TCP 1 - UDP
+ @param options - socket options, fifo sizes, etc.
+*/
+define bind_sock {
+ u32 client_index;
+ u32 context;
+ u32 vrf;
+ u8 is_ip4;
+ u8 ip[16];
+ u16 port;
+ u8 proto;
+ u64 options[16];
+};
+
+/** \brief Unbind
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param handle - bind handle obtained from bind reply
+*/
+autoreply define unbind_sock {
+ u32 client_index;
+ u32 context;
+ u64 handle;
+};
+
+/** \brief Connect to a remote peer
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param app_connect - application connection id to be returned in reply
+ @param vrf - connection namespace
+ @param is_ip4 - flag that is 1 if ip address family is IPv4
+ @param ip - ip address
+ @param port - port
+ @param proto - protocol 0 - TCP 1 - UDP
+ @param client_queue_address - client's API queue address. Non-zero when
+ used to perform redirects
+ @param options - socket options, fifo sizes, etc. when doing redirects
+*/
+autoreply define connect_sock {
+ u32 client_index;
+ u32 context;
+ u32 vrf;
+ u8 is_ip4;
+ u8 ip[16];
+ u16 port;
+ u8 proto;
+ u64 client_queue_address;
+ u64 options[16];
+};
+
+/** \brief Bind reply
+ @param context - sender context, to match reply w/ request
+ @param handle - bind handle
+ @param retval - return code for the request
+ @param event_queue_address - vpp event queue address or 0 if this
+ connection shouldn't send events
+ @param segment_name_length - length of segment name
+ @param segment_name - name of segment client needs to attach to
+*/
+define bind_sock_reply {
+ u32 context;
+ u64 handle;
+ i32 retval;
+ u64 server_event_queue_address;
+ u32 segment_size;
+ u8 segment_name_length;
+ u8 segment_name[128];
+};
+
+/* Dummy connect message -- needed to satisfy api generators
+*
+* NEVER USED, doxygen tags elided on purpose.
+*/
+define connect_session {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief vpp/server->client, connect reply -- used for all connect_* messages
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param handle - connection handle
+ @param server_rx_fifo - rx (vpp -> vpp-client) fifo address
+ @param server_tx_fifo - tx (vpp-client -> vpp) fifo address
+ @param vpp_event_queue_address - vpp's event queue address
+ @param segment_size - size of segment to be attached. Only for redirects.
+ @param segment_name_length - non-zero if the client needs to attach to
+ the fifo segment
+ @param segment_name - set if the client needs to attach to the segment
+*/
+define connect_session_reply {
+ u32 context;
+ i32 retval;
+ u64 handle;
+ u64 server_rx_fifo;
+ u64 server_tx_fifo;
+ u64 vpp_event_queue_address;
+ u32 segment_size;
+ u8 segment_name_length;
+ u8 segment_name[128];
+};
+
+/** \brief enable/disable session layer
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param is_enable - disable session layer if 0, enable otherwise
+*/
+autoreply define session_enable_disable {
+ u32 client_index;
+ u32 context;
+ u8 is_enable;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
new file mode 100644
index 00000000..dc930ce8
--- /dev/null
+++ b/src/vnet/session/session.c
@@ -0,0 +1,1036 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Session and session manager
+ */
+
+#include <vnet/session/session.h>
+#include <vnet/session/session_debug.h>
+#include <vnet/session/application.h>
+#include <vlibmemory/api.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/tcp/tcp.h>
+
+session_manager_main_t session_manager_main;
+extern transport_proto_vft_t *tp_vfts;
+
+int
+stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
+ u8 alloc_fifos, stream_session_t ** ret_s)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0;
+ u32 fifo_segment_index;
+ u32 pool_index;
+ stream_session_t *s;
+ u64 value;
+ u32 thread_index = tc->thread_index;
+ int rv;
+
+ ASSERT (thread_index == vlib_get_thread_index ());
+
+ /* Create the session */
+ pool_get_aligned (smm->sessions[thread_index], s, CLIB_CACHE_LINE_BYTES);
+ memset (s, 0, sizeof (*s));
+ pool_index = s - smm->sessions[thread_index];
+
+ /* Allocate fifos */
+ if (alloc_fifos)
+ {
+ if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
+ &server_tx_fifo,
+ &fifo_segment_index)))
+ {
+ pool_put (smm->sessions[thread_index], s);
+ return rv;
+ }
+ /* Initialize backpointers */
+ server_rx_fifo->master_session_index = pool_index;
+ server_rx_fifo->master_thread_index = thread_index;
+
+ server_tx_fifo->master_session_index = pool_index;
+ server_tx_fifo->master_thread_index = thread_index;
+
+ s->server_rx_fifo = server_rx_fifo;
+ s->server_tx_fifo = server_tx_fifo;
+ s->svm_segment_index = fifo_segment_index;
+ }
+
+ /* Initialize state machine, such as it is... */
+ s->session_type = session_type_from_proto_and_ip (tc->transport_proto,
+ tc->is_ip4);
+ s->session_state = SESSION_STATE_CONNECTING;
+ s->thread_index = thread_index;
+ s->session_index = pool_index;
+
+ /* Attach transport to session */
+ s->connection_index = tc->c_index;
+
+ /* Attach session to transport */
+ tc->s_index = s->session_index;
+
+ /* Add to the main lookup table */
+ value = stream_session_handle (s);
+ stream_session_table_add_for_tc (tc, value);
+
+ *ret_s = s;
+
+ return 0;
+}
+
+/**
+ * Discards bytes from buffer chain
+ *
+ * It discards n_bytes_to_drop starting at first buffer after chain_b
+ */
+always_inline void
+session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b,
+ vlib_buffer_t ** chain_b,
+ u32 n_bytes_to_drop)
+{
+ vlib_buffer_t *next = *chain_b;
+ u32 to_drop = n_bytes_to_drop;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ next = vlib_get_buffer (vm, next->next_buffer);
+ if (next->current_length > to_drop)
+ {
+ vlib_buffer_advance (next, to_drop);
+ to_drop = 0;
+ }
+ else
+ {
+ to_drop -= next->current_length;
+ next->current_length = 0;
+ }
+ }
+ *chain_b = next;
+
+ if (to_drop == 0)
+ b->total_length_not_including_first_buffer -= n_bytes_to_drop;
+}
+
+/**
+ * Enqueue buffer chain tail
+ */
+always_inline int
+session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b,
+ u32 offset, u8 is_in_order)
+{
+ vlib_buffer_t *chain_b;
+ u32 chain_bi, len, diff;
+ vlib_main_t *vm = vlib_get_main ();
+ u8 *data;
+ u32 written = 0;
+ int rv = 0;
+
+ if (is_in_order && offset)
+ {
+ diff = offset - b->current_length;
+ if (diff > b->total_length_not_including_first_buffer)
+ return 0;
+ chain_b = b;
+ session_enqueue_discard_chain_bytes (vm, b, &chain_b, diff);
+ chain_bi = vlib_get_buffer_index (vm, chain_b);
+ }
+ else
+ chain_bi = b->next_buffer;
+
+ do
+ {
+ chain_b = vlib_get_buffer (vm, chain_bi);
+ data = vlib_buffer_get_current (chain_b);
+ len = chain_b->current_length;
+ if (!len)
+ continue;
+ if (is_in_order)
+ {
+ rv = svm_fifo_enqueue_nowait (s->server_rx_fifo, len, data);
+ if (rv == len)
+ {
+ written += rv;
+ }
+ else if (rv < len)
+ {
+ return (rv > 0) ? (written + rv) : written;
+ }
+ else if (rv > len)
+ {
+ written += rv;
+
+ /* written more than what was left in chain */
+ if (written > b->total_length_not_including_first_buffer)
+ return written;
+
+ /* drop the bytes that have already been delivered */
+ session_enqueue_discard_chain_bytes (vm, b, &chain_b, rv - len);
+ }
+ }
+ else
+ {
+ rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset, len,
+ data);
+ if (rv)
+ {
+ clib_warning ("failed to enqueue multi-buffer seg");
+ return -1;
+ }
+ offset += len;
+ }
+ }
+ while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ ? chain_b->next_buffer : 0));
+
+ if (is_in_order)
+ return written;
+
+ return 0;
+}
+
+/*
+ * Enqueue data for delivery to session peer. Does not notify peer of enqueue
+ * event but on request can queue notification events for later delivery by
+ * calling stream_server_flush_enqueue_events().
+ *
+ * @param tc Transport connection which is to be enqueued data
+ * @param b Buffer to be enqueued
+ * @param offset Offset at which to start enqueueing if out-of-order
+ * @param queue_event Flag to indicate if peer is to be notified or if event
+ * is to be queued. The former is useful when more data is
+ * enqueued and only one event is to be generated.
+ * @param is_in_order Flag to indicate if data is in order
+ * @return Number of bytes enqueued or a negative value if enqueueing failed.
+ */
+int
+stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
+ u32 offset, u8 queue_event, u8 is_in_order)
+{
+ stream_session_t *s;
+ int enqueued = 0, rv, in_order_off;
+
+ s = stream_session_get (tc->s_index, tc->thread_index);
+
+ if (is_in_order)
+ {
+ enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo,
+ b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ && enqueued >= 0))
+ {
+ in_order_off = enqueued > b->current_length ? enqueued : 0;
+ rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
+ if (rv > 0)
+ enqueued += rv;
+ }
+ }
+ else
+ {
+ rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset,
+ b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
+ session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
+ /* if something was enqueued, report even this as success for ooo
+ * segment handling */
+ return rv;
+ }
+
+ if (queue_event)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be flushed
+ * by calling stream_server_flush_enqueue_events () */
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ u32 thread_index = s->thread_index;
+ u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index];
+
+ if (s->enqueue_epoch != my_enqueue_epoch)
+ {
+ s->enqueue_epoch = my_enqueue_epoch;
+ vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index],
+ s - smm->sessions[thread_index]);
+ }
+ }
+
+ return enqueued;
+}
+
+/** Check if we have space in rx fifo to push more bytes */
+u8
+stream_session_no_space (transport_connection_t * tc, u32 thread_index,
+ u16 data_len)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, thread_index);
+
+ if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY))
+ return 1;
+
+ if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo))
+ return 1;
+
+ return 0;
+}
+
+u32
+stream_session_tx_fifo_max_dequeue (transport_connection_t * tc)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ if (!s->server_tx_fifo)
+ return 0;
+ return svm_fifo_max_dequeue (s->server_tx_fifo);
+}
+
+int
+stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
+ u32 offset, u32 max_bytes)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ return svm_fifo_peek (s->server_tx_fifo, offset, max_bytes, buffer);
+}
+
+u32
+stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ return svm_fifo_dequeue_drop (s->server_tx_fifo, max_bytes);
+}
+
+/**
+ * Notify session peer that new data has been enqueued.
+ *
+ * @param s Stream session for which the event is to be generated.
+ * @param block Flag to indicate if call should block if event queue is full.
+ *
+ * @return 0 on succes or negative number if failed to send notification.
+ */
+static int
+stream_session_enqueue_notify (stream_session_t * s, u8 block)
+{
+ application_t *app;
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+ static u32 serial_number;
+
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED))
+ {
+ /* Session is closed so app will never clean up. Flush rx fifo */
+ u32 to_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
+ if (to_dequeue)
+ svm_fifo_dequeue_drop (s->server_rx_fifo, to_dequeue);
+ return 0;
+ }
+
+ /* Get session's server */
+ app = application_get_if_valid (s->app_index);
+
+ if (PREDICT_FALSE (app == 0))
+ {
+ clib_warning ("invalid s->app_index = %d", s->app_index);
+ return 0;
+ }
+
+ /* Built-in server? Hand event to the callback... */
+ if (app->cb_fns.builtin_server_rx_callback)
+ return app->cb_fns.builtin_server_rx_callback (s);
+
+ /* If no event, send one */
+ if (svm_fifo_set_event (s->server_rx_fifo))
+ {
+ /* Fabricate event */
+ evt.fifo = s->server_rx_fifo;
+ evt.event_type = FIFO_EVENT_APP_RX;
+ evt.event_id = serial_number++;
+
+ /* Add event to server's event queue */
+ q = app->event_queue;
+
+ /* Based on request block (or not) for lack of space */
+ if (block || PREDICT_TRUE (q->cursize < q->maxsize))
+ unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ else
+ {
+ clib_warning ("fifo full");
+ return -1;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ SESSION_EVT_DBG(SESSION_EVT_ENQ, s, ({
+ ed->data[0] = evt.event_id;
+ ed->data[1] = svm_fifo_max_dequeue (s->server_rx_fifo);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ * failures due to API queue being full.
+ */
+int
+session_manager_flush_enqueue_events (u32 thread_index)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ u32 *session_indices_to_enqueue;
+ int i, errors = 0;
+
+ session_indices_to_enqueue =
+ smm->session_indices_to_enqueue_by_thread[thread_index];
+
+ for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
+ {
+ stream_session_t *s0;
+
+ /* Get session */
+ s0 = stream_session_get_if_valid (session_indices_to_enqueue[i],
+ thread_index);
+ if (s0 == 0 || stream_session_enqueue_notify (s0, 0 /* don't block */ ))
+ {
+ errors++;
+ }
+ }
+
+ vec_reset_length (session_indices_to_enqueue);
+
+ smm->session_indices_to_enqueue_by_thread[thread_index] =
+ session_indices_to_enqueue;
+
+ /* Increment enqueue epoch for next round */
+ smm->current_enqueue_epoch[thread_index]++;
+
+ return errors;
+}
+
+/**
+ * Init fifo tail and head pointers
+ *
+ * Useful if transport uses absolute offsets for tracking ooo segments.
+ */
+void
+stream_session_init_fifos_pointers (transport_connection_t * tc,
+ u32 rx_pointer, u32 tx_pointer)
+{
+ stream_session_t *s;
+ s = stream_session_get (tc->s_index, tc->thread_index);
+ svm_fifo_init_pointers (s->server_rx_fifo, rx_pointer);
+ svm_fifo_init_pointers (s->server_tx_fifo, tx_pointer);
+}
+
+int
+stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
+{
+ application_t *app;
+ stream_session_t *new_s = 0;
+ u64 handle;
+ u32 opaque = 0;
+ int error = 0;
+ u8 st;
+
+ st = session_type_from_proto_and_ip (tc->transport_proto, tc->is_ip4);
+ handle = stream_session_half_open_lookup_handle (&tc->lcl_ip, &tc->rmt_ip,
+ tc->lcl_port, tc->rmt_port,
+ st);
+ if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
+ {
+ TCP_DBG ("half-open was removed!");
+ return -1;
+ }
+
+ /* Cleanup half-open table */
+ stream_session_half_open_table_del (tc);
+
+ /* Get the app's index from the handle we stored when opening connection
+ * and the opaque (api_context for external apps) from transport session
+ * index */
+ app = application_get_if_valid (handle >> 32);
+ if (!app)
+ return -1;
+
+ opaque = tc->s_index;
+
+ if (!is_fail)
+ {
+ segment_manager_t *sm;
+ u8 alloc_fifos;
+ sm = application_get_connect_segment_manager (app);
+ alloc_fifos = application_is_proxy (app);
+ /* Create new session (svm segments are allocated if needed) */
+ if (stream_session_create_i (sm, tc, alloc_fifos, &new_s))
+ {
+ is_fail = 1;
+ error = -1;
+ }
+ else
+ new_s->app_index = app->index;
+ }
+
+ /* Notify client application */
+ if (app->cb_fns.session_connected_callback (app->index, opaque, new_s,
+ is_fail))
+ {
+ clib_warning ("failed to notify app");
+ if (!is_fail)
+ stream_session_disconnect (new_s);
+ }
+ else
+ {
+ if (!is_fail)
+ new_s->session_state = SESSION_STATE_READY;
+ }
+
+ return error;
+}
+
+void
+stream_session_accept_notify (transport_connection_t * tc)
+{
+ application_t *server;
+ stream_session_t *s;
+
+ s = stream_session_get (tc->s_index, tc->thread_index);
+ server = application_get (s->app_index);
+ server->cb_fns.session_accept_callback (s);
+}
+
+/**
+ * Notification from transport that connection is being closed.
+ *
+ * A disconnect is sent to application but state is not removed. Once
+ * disconnect is acknowledged by application, session disconnect is called.
+ * Ultimately this leads to close being called on transport (passive close).
+ */
+void
+stream_session_disconnect_notify (transport_connection_t * tc)
+{
+ application_t *server;
+ stream_session_t *s;
+
+ s = stream_session_get (tc->s_index, tc->thread_index);
+ server = application_get (s->app_index);
+ server->cb_fns.session_disconnect_callback (s);
+}
+
+/**
+ * Cleans up session and lookup table.
+ */
+void
+stream_session_delete (stream_session_t * s)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ int rv;
+
+ /* Delete from the main lookup table. */
+ if ((rv = stream_session_table_del (s)))
+ clib_warning ("hash delete error, rv %d", rv);
+
+ /* Cleanup fifo segments */
+ segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo,
+ s->server_tx_fifo);
+
+ pool_put (smm->sessions[s->thread_index], s);
+ if (CLIB_DEBUG)
+ memset (s, 0xFA, sizeof (*s));
+}
+
+/**
+ * Notification from transport that connection is being deleted
+ *
+ * This removes the session if it is still valid. It should be called only on
+ * previously fully established sessions. For instance failed connects should
+ * call stream_session_connect_notify and indicate that the connect has
+ * failed.
+ */
+void
+stream_session_delete_notify (transport_connection_t * tc)
+{
+ stream_session_t *s;
+
+ /* App might've been removed already */
+ s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
+ if (!s)
+ return;
+ stream_session_delete (s);
+}
+
+/**
+ * Notify application that connection has been reset.
+ */
+void
+stream_session_reset_notify (transport_connection_t * tc)
+{
+ stream_session_t *s;
+ application_t *app;
+ s = stream_session_get (tc->s_index, tc->thread_index);
+
+ app = application_get (s->app_index);
+ app->cb_fns.session_reset_callback (s);
+}
+
+/**
+ * Accept a stream session. Optionally ping the server by callback.
+ */
+int
+stream_session_accept (transport_connection_t * tc, u32 listener_index,
+ u8 sst, u8 notify)
+{
+ application_t *server;
+ stream_session_t *s, *listener;
+ segment_manager_t *sm;
+
+ int rv;
+
+ /* Find the server */
+ listener = listen_session_get (sst, listener_index);
+ server = application_get (listener->app_index);
+
+ sm = application_get_listen_segment_manager (server, listener);
+ if ((rv = stream_session_create_i (sm, tc, 1, &s)))
+ return rv;
+
+ s->app_index = server->index;
+ s->listener_index = listener_index;
+ s->session_state = SESSION_STATE_ACCEPTING;
+
+ /* Shoulder-tap the server */
+ if (notify)
+ {
+ server->cb_fns.session_accept_callback (s);
+ }
+
+ return 0;
+}
+
+/**
+ * Ask transport to open connection to remote transport endpoint.
+ *
+ * Stores handle for matching request with reply since the call can be
+ * asynchronous. For instance, for TCP the 3-way handshake must complete
+ * before reply comes. Session is only created once connection is established.
+ *
+ * @param app_index Index of the application requesting the connect
+ * @param st Session type requested.
+ * @param tep Remote transport endpoint
+ * @param res Resulting transport connection .
+ */
+int
+stream_session_open (u32 app_index, session_type_t st,
+ transport_endpoint_t * rmt,
+ transport_connection_t ** res)
+{
+ transport_connection_t *tc;
+ int rv;
+ u64 handle;
+
+ rv = tp_vfts[st].open (rmt);
+ if (rv < 0)
+ {
+ clib_warning ("Transport failed to open connection.");
+ return VNET_API_ERROR_SESSION_CONNECT_FAIL;
+ }
+
+ tc = tp_vfts[st].get_half_open ((u32) rv);
+
+ /* Save app and tc index. The latter is needed to help establish the
+ * connection while the former is needed when the connect notify comes
+ * and we have to notify the external app */
+ handle = (((u64) app_index) << 32) | (u64) tc->c_index;
+
+ /* Add to the half-open lookup table */
+ stream_session_half_open_table_add (tc, handle);
+
+ *res = tc;
+
+ return 0;
+}
+
+/**
+ * Ask transport to listen on local transport endpoint.
+ *
+ * @param s Session for which listen will be called. Note that unlike
+ * established sessions, listen sessions are not associated to a
+ * thread.
+ * @param tep Local endpoint to be listened on.
+ */
+int
+stream_session_listen (stream_session_t * s, transport_endpoint_t * tep)
+{
+ transport_connection_t *tc;
+ u32 tci;
+
+ /* Transport bind/listen */
+ tci = tp_vfts[s->session_type].bind (s->session_index, tep);
+
+ if (tci == (u32) ~ 0)
+ return -1;
+
+ /* Attach transport to session */
+ s->connection_index = tci;
+ tc = tp_vfts[s->session_type].get_listener (tci);
+
+ /* Weird but handle it ... */
+ if (tc == 0)
+ return -1;
+
+ /* Add to the main lookup table */
+ stream_session_table_add_for_tc (tc, s->session_index);
+
+ return 0;
+}
+
+/**
+ * Ask transport to stop listening on local transport endpoint.
+ *
+ * @param s Session to stop listening on. It must be in state LISTENING.
+ */
+int
+stream_session_stop_listen (stream_session_t * s)
+{
+ transport_connection_t *tc;
+
+ if (s->session_state != SESSION_STATE_LISTENING)
+ {
+ clib_warning ("not a listening session");
+ return -1;
+ }
+
+ tc = tp_vfts[s->session_type].get_listener (s->connection_index);
+ if (!tc)
+ {
+ clib_warning ("no transport");
+ return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+ }
+
+ stream_session_table_del_for_tc (tc);
+ tp_vfts[s->session_type].unbind (s->connection_index);
+ return 0;
+}
+
+void
+session_send_session_evt_to_thread (u64 session_handle,
+ fifo_event_type_t evt_type,
+ u32 thread_index)
+{
+ static u16 serial_number = 0;
+ u32 tries = 0;
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+
+ /* Fabricate event */
+ evt.session_handle = session_handle;
+ evt.event_type = evt_type;
+ evt.event_id = serial_number++;
+
+ q = session_manager_get_vpp_event_queue (thread_index);
+ while (unix_shared_memory_queue_add (q, (u8 *) & evt, 1))
+ {
+ if (tries++ == 3)
+ {
+ TCP_DBG ("failed to enqueue evt");
+ break;
+ }
+ }
+}
+
+/**
+ * Disconnect session and propagate to transport. This should eventually
+ * result in a delete notification that allows us to cleanup session state.
+ * Called for both active/passive disconnects.
+ *
+ * Should be called from the session's thread.
+ */
+void
+stream_session_disconnect (stream_session_t * s)
+{
+ s->session_state = SESSION_STATE_CLOSED;
+ tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
+}
+
+/**
+ * Cleanup transport and session state.
+ *
+ * Notify transport of the cleanup, wait for a delete notify to actually
+ * remove the session state.
+ */
+void
+stream_session_cleanup (stream_session_t * s)
+{
+ int rv;
+
+ s->session_state = SESSION_STATE_CLOSED;
+
+ /* Delete from the main lookup table to avoid more enqueues */
+ rv = stream_session_table_del (s);
+ if (rv)
+ clib_warning ("hash delete error, rv %d", rv);
+
+ tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
+}
+
+/**
+ * Allocate vpp event queue (once) per worker thread
+ */
+void
+session_vpp_event_queue_allocate (session_manager_main_t * smm,
+ u32 thread_index)
+{
+ api_main_t *am = &api_main;
+ void *oldheap;
+ u32 event_queue_length = 2048;
+
+ if (smm->vpp_event_queues[thread_index] == 0)
+ {
+ /* Allocate event fifo in the /vpe-api shared-memory segment */
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ if (smm->configured_event_queue_length)
+ event_queue_length = smm->configured_event_queue_length;
+
+ smm->vpp_event_queues[thread_index] =
+ unix_shared_memory_queue_init
+ (event_queue_length,
+ sizeof (session_fifo_event_t), 0 /* consumer pid */ ,
+ 0 /* (do not) send signal when queue non-empty */ );
+
+ svm_pop_heap (oldheap);
+ }
+}
+
+session_type_t
+session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4)
+{
+ if (proto == TRANSPORT_PROTO_TCP)
+ {
+ if (is_ip4)
+ return SESSION_TYPE_IP4_TCP;
+ else
+ return SESSION_TYPE_IP6_TCP;
+ }
+ else
+ {
+ if (is_ip4)
+ return SESSION_TYPE_IP4_UDP;
+ else
+ return SESSION_TYPE_IP6_UDP;
+ }
+
+ return SESSION_N_TYPES;
+}
+
+static clib_error_t *
+session_manager_main_enable (vlib_main_t * vm)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ u32 preallocated_sessions_per_worker;
+ int i;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+
+ if (num_threads < 1)
+ return clib_error_return (0, "n_thread_stacks not set");
+
+ /* $$$ config parameters */
+ svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ ,
+ 20 /* timeout in seconds */ );
+
+ /* configure per-thread ** vectors */
+ vec_validate (smm->sessions, num_threads - 1);
+ vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1);
+ vec_validate (smm->tx_buffers, num_threads - 1);
+ vec_validate (smm->pending_event_vector, num_threads - 1);
+ vec_validate (smm->free_event_vector, num_threads - 1);
+ vec_validate (smm->current_enqueue_epoch, num_threads - 1);
+ vec_validate (smm->vpp_event_queues, num_threads - 1);
+
+ for (i = 0; i < num_threads; i++)
+ {
+ vec_validate (smm->free_event_vector[i], 0);
+ _vec_len (smm->free_event_vector[i]) = 0;
+ vec_validate (smm->pending_event_vector[i], 0);
+ _vec_len (smm->pending_event_vector[i]) = 0;
+ }
+
+#if SESSION_DBG
+ vec_validate (smm->last_event_poll_by_thread, num_threads - 1);
+#endif
+
+ /* Allocate vpp event queues */
+ for (i = 0; i < vec_len (smm->vpp_event_queues); i++)
+ session_vpp_event_queue_allocate (smm, i);
+
+ /* Preallocate sessions */
+ if (smm->preallocated_sessions)
+ {
+ if (num_threads == 1)
+ {
+ pool_init_fixed (smm->sessions[0], smm->preallocated_sessions);
+ }
+ else
+ {
+ int j;
+ preallocated_sessions_per_worker =
+ (1.1 * (f64) smm->preallocated_sessions /
+ (f64) (num_threads - 1));
+
+ for (j = 1; j < num_threads; j++)
+ {
+ pool_init_fixed (smm->sessions[j],
+ preallocated_sessions_per_worker);
+ }
+ }
+ }
+
+ session_lookup_init ();
+
+ smm->is_enabled = 1;
+
+ /* Enable TCP transport */
+ vnet_tcp_enable_disable (vm, 1);
+
+ return 0;
+}
+
+void
+session_node_enable_disable (u8 is_en)
+{
+ u8 state = is_en ? VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_DISABLED;
+ /* *INDENT-OFF* */
+ foreach_vlib_main (({
+ vlib_node_set_state (this_vlib_main, session_queue_node.index,
+ state);
+ }));
+ /* *INDENT-ON* */
+}
+
+clib_error_t *
+vnet_session_enable_disable (vlib_main_t * vm, u8 is_en)
+{
+ if (is_en)
+ {
+ if (session_manager_main.is_enabled)
+ return 0;
+
+ session_node_enable_disable (is_en);
+
+ return session_manager_main_enable (vm);
+ }
+ else
+ {
+ session_manager_main.is_enabled = 0;
+ session_node_enable_disable (is_en);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+session_manager_main_init (vlib_main_t * vm)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ smm->is_enabled = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (session_manager_main_init);
+
+static clib_error_t *
+session_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ u32 nitems;
+ uword tmp;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "event-queue-length %d", &nitems))
+ {
+ if (nitems >= 2048)
+ smm->configured_event_queue_length = nitems;
+ else
+ clib_warning ("event queue length %d too small, ignored", nitems);
+ }
+ else if (unformat (input, "preallocated-sessions %d",
+ &smm->preallocated_sessions))
+ ;
+ else if (unformat (input, "v4-session-table-buckets %d",
+ &smm->configured_v4_session_table_buckets))
+ ;
+ else if (unformat (input, "v4-halfopen-table-buckets %d",
+ &smm->configured_v4_halfopen_table_buckets))
+ ;
+ else if (unformat (input, "v6-session-table-buckets %d",
+ &smm->configured_v6_session_table_buckets))
+ ;
+ else if (unformat (input, "v6-halfopen-table-buckets %d",
+ &smm->configured_v6_halfopen_table_buckets))
+ ;
+ else if (unformat (input, "v4-session-table-memory %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000)
+ return clib_error_return (0, "memory size %llx (%lld) too large",
+ tmp, tmp);
+ smm->configured_v4_session_table_memory = tmp;
+ }
+ else if (unformat (input, "v4-halfopen-table-memory %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000)
+ return clib_error_return (0, "memory size %llx (%lld) too large",
+ tmp, tmp);
+ smm->configured_v4_halfopen_table_memory = tmp;
+ }
+ else if (unformat (input, "v6-session-table-memory %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000)
+ return clib_error_return (0, "memory size %llx (%lld) too large",
+ tmp, tmp);
+ smm->configured_v6_session_table_memory = tmp;
+ }
+ else if (unformat (input, "v6-halfopen-table-memory %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000)
+ return clib_error_return (0, "memory size %llx (%lld) too large",
+ tmp, tmp);
+ smm->configured_v6_halfopen_table_memory = tmp;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (session_config_fn, "session");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
new file mode 100644
index 00000000..83addec2
--- /dev/null
+++ b/src/vnet/session/session.h
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_session_h__
+#define __included_session_h__
+
+#include <vnet/session/stream_session.h>
+#include <vnet/session/session_lookup.h>
+#include <vnet/session/transport_interface.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vnet/session/session_debug.h>
+#include <vnet/session/segment_manager.h>
+
+#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0)
+#define INVALID_INDEX ((u32)~0)
+
+/* TODO decide how much since we have pre-data as well */
+#define MAX_HDRS_LEN 100 /* Max number of bytes for headers */
+
+typedef enum
+{
+ FIFO_EVENT_APP_RX,
+ FIFO_EVENT_APP_TX,
+ FIFO_EVENT_TIMEOUT,
+ FIFO_EVENT_DISCONNECT,
+ FIFO_EVENT_BUILTIN_RX,
+ FIFO_EVENT_RPC,
+} fifo_event_type_t;
+
+static inline const char *
+fifo_event_type_str (fifo_event_type_t et)
+{
+ switch (et)
+ {
+ case FIFO_EVENT_APP_RX:
+ return "FIFO_EVENT_APP_RX";
+ case FIFO_EVENT_APP_TX:
+ return "FIFO_EVENT_APP_TX";
+ case FIFO_EVENT_TIMEOUT:
+ return "FIFO_EVENT_TIMEOUT";
+ case FIFO_EVENT_DISCONNECT:
+ return "FIFO_EVENT_DISCONNECT";
+ case FIFO_EVENT_BUILTIN_RX:
+ return "FIFO_EVENT_BUILTIN_RX";
+ case FIFO_EVENT_RPC:
+ return "FIFO_EVENT_RPC";
+ default:
+ return "UNKNOWN FIFO EVENT";
+ }
+}
+
+#define foreach_session_input_error \
+_(NO_SESSION, "No session drops") \
+_(NO_LISTENER, "No listener for dst port drops") \
+_(ENQUEUED, "Packets pushed into rx fifo") \
+_(NOT_READY, "Session not ready packets") \
+_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \
+_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \
+_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \
+_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair") \
+_(NO_SPACE, "Couldn't allocate a fifo pair")
+
+typedef enum
+{
+#define _(sym,str) SESSION_ERROR_##sym,
+ foreach_session_input_error
+#undef _
+ SESSION_N_ERROR,
+} session_error_t;
+
+/* Event queue input node static next indices */
+typedef enum
+{
+ SESSION_QUEUE_NEXT_DROP,
+ SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT,
+ SESSION_QUEUE_NEXT_IP4_LOOKUP,
+ SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT,
+ SESSION_QUEUE_NEXT_IP6_LOOKUP,
+ SESSION_QUEUE_N_NEXT,
+} session_queue_next_t;
+
+typedef struct
+{
+ void *fp;
+ void *arg;
+} rpc_args_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ union
+ {
+ svm_fifo_t * fifo;
+ u64 session_handle;
+ rpc_args_t rpc_args;
+ };
+ u8 event_type;
+ u16 event_id;
+}) session_fifo_event_t;
+/* *INDENT-ON* */
+
+/* Forward definition */
+typedef struct _session_manager_main session_manager_main_t;
+
+typedef int
+ (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0, stream_session_t * s0,
+ u32 thread_index, int *n_tx_pkts);
+
+extern session_fifo_rx_fn session_tx_fifo_peek_and_snd;
+extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd;
+
+u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e);
+
+struct _session_manager_main
+{
+ /** Per worker thread session pools */
+ stream_session_t **sessions;
+
+ /** Pool of listen sessions. Same type as stream sessions to ease lookups */
+ stream_session_t *listen_sessions[SESSION_N_TYPES];
+
+ /** Sparse vector to map dst port to stream server */
+ u16 *stream_server_by_dst_port[SESSION_N_TYPES];
+
+ /** per-worker enqueue epoch counters */
+ u8 *current_enqueue_epoch;
+
+ /** Per-worker thread vector of sessions to enqueue */
+ u32 **session_indices_to_enqueue_by_thread;
+
+ /** per-worker tx buffer free lists */
+ u32 **tx_buffers;
+
+ /** Per worker-thread vector of partially read events */
+ session_fifo_event_t **free_event_vector;
+
+ /** per-worker active event vectors */
+ session_fifo_event_t **pending_event_vector;
+
+ /** vpp fifo event queue */
+ unix_shared_memory_queue_t **vpp_event_queues;
+
+ /** vpp fifo event queue configured length */
+ u32 configured_event_queue_length;
+
+ /** session table size parameters */
+ u32 configured_v4_session_table_buckets;
+ u32 configured_v4_session_table_memory;
+ u32 configured_v4_halfopen_table_buckets;
+ u32 configured_v4_halfopen_table_memory;
+ u32 configured_v6_session_table_buckets;
+ u32 configured_v6_session_table_memory;
+ u32 configured_v6_halfopen_table_buckets;
+ u32 configured_v6_halfopen_table_memory;
+
+ /** Unique segment name counter */
+ u32 unique_segment_name_counter;
+
+ /** Per transport rx function that can either dequeue or peek */
+ session_fifo_rx_fn *session_tx_fns[SESSION_N_TYPES];
+
+ /** Session manager is enabled */
+ u8 is_enabled;
+
+ /** Preallocate session config parameter */
+ u32 preallocated_sessions;
+
+#if SESSION_DBG
+ /**
+ * last event poll time by thread
+ * Debug only. Will cause false cache-line sharing as-is
+ */
+ f64 *last_event_poll_by_thread;
+#endif
+
+};
+
+extern session_manager_main_t session_manager_main;
+extern vlib_node_registration_t session_queue_node;
+
+/*
+ * Session manager function
+ */
+always_inline session_manager_main_t *
+vnet_get_session_manager_main ()
+{
+ return &session_manager_main;
+}
+
+always_inline u8
+stream_session_is_valid (u32 si, u8 thread_index)
+{
+ stream_session_t *s;
+ s = pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+ if (s->thread_index != thread_index || s->session_index != si
+ /* || s->server_rx_fifo->master_session_index != si
+ || s->server_tx_fifo->master_session_index != si
+ || s->server_rx_fifo->master_thread_index != thread_index
+ || s->server_tx_fifo->master_thread_index != thread_index */ )
+ return 0;
+ return 1;
+}
+
+always_inline stream_session_t *
+stream_session_get (u32 si, u32 thread_index)
+{
+ ASSERT (stream_session_is_valid (si, thread_index));
+ return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+}
+
+always_inline stream_session_t *
+stream_session_get_if_valid (u64 si, u32 thread_index)
+{
+ if (thread_index >= vec_len (session_manager_main.sessions))
+ return 0;
+
+ if (pool_is_free_index (session_manager_main.sessions[thread_index], si))
+ return 0;
+
+ ASSERT (stream_session_is_valid (si, thread_index));
+ return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+}
+
+always_inline u64
+stream_session_handle (stream_session_t * s)
+{
+ return ((u64) s->thread_index << 32) | (u64) s->session_index;
+}
+
+always_inline u32
+stream_session_index_from_handle (u64 handle)
+{
+ return handle & 0xFFFFFFFF;
+}
+
+always_inline u32
+stream_session_thread_from_handle (u64 handle)
+{
+ return handle >> 32;
+}
+
+always_inline void
+stream_session_parse_handle (u64 handle, u32 * index, u32 * thread_index)
+{
+ *index = stream_session_index_from_handle (handle);
+ *thread_index = stream_session_thread_from_handle (handle);
+}
+
+always_inline stream_session_t *
+stream_session_get_from_handle (u64 handle)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ return pool_elt_at_index (smm->sessions[stream_session_thread_from_handle
+ (handle)],
+ stream_session_index_from_handle (handle));
+}
+
+always_inline stream_session_t *
+stream_session_listener_get (u8 sst, u64 si)
+{
+ return pool_elt_at_index (session_manager_main.listen_sessions[sst], si);
+}
+
+always_inline u32
+stream_session_get_index (stream_session_t * s)
+{
+ if (s->session_state == SESSION_STATE_LISTENING)
+ return s - session_manager_main.listen_sessions[s->session_type];
+
+ return s - session_manager_main.sessions[s->thread_index];
+}
+
+always_inline u32
+stream_session_max_rx_enqueue (transport_connection_t * tc)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ return svm_fifo_max_enqueue (s->server_rx_fifo);
+}
+
+always_inline u32
+stream_session_rx_fifo_size (transport_connection_t * tc)
+{
+ stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+ return s->server_rx_fifo->nitems;
+}
+
+u32 stream_session_tx_fifo_max_dequeue (transport_connection_t * tc);
+
+int
+stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
+ u32 offset, u8 queue_event, u8 is_in_order);
+int
+stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
+ u32 offset, u32 max_bytes);
+u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+
+int stream_session_connect_notify (transport_connection_t * tc, u8 is_fail);
+void stream_session_init_fifos_pointers (transport_connection_t * tc,
+ u32 rx_pointer, u32 tx_pointer);
+
+void stream_session_accept_notify (transport_connection_t * tc);
+void stream_session_disconnect_notify (transport_connection_t * tc);
+void stream_session_delete_notify (transport_connection_t * tc);
+void stream_session_reset_notify (transport_connection_t * tc);
+int
+stream_session_accept (transport_connection_t * tc, u32 listener_index,
+ u8 sst, u8 notify);
+int
+stream_session_open (u32 app_index, session_type_t st,
+ transport_endpoint_t * tep,
+ transport_connection_t ** tc);
+int stream_session_listen (stream_session_t * s, transport_endpoint_t * tep);
+int stream_session_stop_listen (stream_session_t * s);
+void stream_session_disconnect (stream_session_t * s);
+void stream_session_cleanup (stream_session_t * s);
+void session_send_session_evt_to_thread (u64 session_handle,
+ fifo_event_type_t evt_type,
+ u32 thread_index);
+
+u8 *format_stream_session (u8 * s, va_list * args);
+uword unformat_stream_session (unformat_input_t * input, va_list * args);
+uword unformat_transport_connection (unformat_input_t * input,
+ va_list * args);
+
+int
+send_session_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail);
+
+
+clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+
+always_inline unix_shared_memory_queue_t *
+session_manager_get_vpp_event_queue (u32 thread_index)
+{
+ return session_manager_main.vpp_event_queues[thread_index];
+}
+
+int session_manager_flush_enqueue_events (u32 thread_index);
+
+always_inline u64
+listen_session_get_handle (stream_session_t * s)
+{
+ ASSERT (s->session_state == SESSION_STATE_LISTENING);
+ return ((u64) s->session_type << 32) | s->session_index;
+}
+
+always_inline stream_session_t *
+listen_session_get_from_handle (u64 handle)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ stream_session_t *s;
+ u32 type, index;
+ type = handle >> 32;
+ index = handle & 0xFFFFFFFF;
+
+ if (pool_is_free_index (smm->listen_sessions[type], index))
+ return 0;
+
+ s = pool_elt_at_index (smm->listen_sessions[type], index);
+ ASSERT (s->session_state == SESSION_STATE_LISTENING);
+ return s;
+}
+
+always_inline stream_session_t *
+listen_session_new (session_type_t type)
+{
+ stream_session_t *s;
+ pool_get_aligned (session_manager_main.listen_sessions[type], s,
+ CLIB_CACHE_LINE_BYTES);
+ memset (s, 0, sizeof (*s));
+
+ s->session_type = type;
+ s->session_state = SESSION_STATE_LISTENING;
+ s->session_index = s - session_manager_main.listen_sessions[type];
+
+ return s;
+}
+
+always_inline stream_session_t *
+listen_session_get (session_type_t type, u32 index)
+{
+ return pool_elt_at_index (session_manager_main.listen_sessions[type],
+ index);
+}
+
+always_inline void
+listen_session_del (stream_session_t * s)
+{
+ pool_put (session_manager_main.listen_sessions[s->session_type], s);
+}
+
+always_inline stream_session_t *
+session_manager_get_listener (u8 type, u32 index)
+{
+ return pool_elt_at_index (session_manager_main.listen_sessions[type],
+ index);
+}
+
+always_inline void
+session_manager_set_transport_rx_fn (u8 type, u8 is_peek)
+{
+ /* If an offset function is provided, then peek instead of dequeue */
+ session_manager_main.session_tx_fns[type] = (is_peek) ?
+ session_tx_fifo_peek_and_snd : session_tx_fifo_dequeue_and_snd;
+}
+
+session_type_t
+session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4);
+
+always_inline u8
+session_manager_is_enabled ()
+{
+ return session_manager_main.is_enabled == 1;
+}
+
+#endif /* __included_session_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
new file mode 100755
index 00000000..250f9906
--- /dev/null
+++ b/src/vnet/session/session_api.c
@@ -0,0 +1,763 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+
+#include <vnet/vnet_msg_enum.h>
+#include "application_interface.h"
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_session_api_msg \
+_(MAP_ANOTHER_SEGMENT_REPLY, map_another_segment_reply) \
+_(APPLICATION_ATTACH, application_attach) \
+_(APPLICATION_DETACH, application_detach) \
+_(BIND_URI, bind_uri) \
+_(UNBIND_URI, unbind_uri) \
+_(CONNECT_URI, connect_uri) \
+_(DISCONNECT_SESSION, disconnect_session) \
+_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \
+_(ACCEPT_SESSION_REPLY, accept_session_reply) \
+_(RESET_SESSION_REPLY, reset_session_reply) \
+_(BIND_SOCK, bind_sock) \
+_(UNBIND_SOCK, unbind_sock) \
+_(CONNECT_SOCK, connect_sock) \
+_(SESSION_ENABLE_DISABLE, session_enable_disable) \
+
+static int
+send_add_segment_callback (u32 api_client_index, const u8 * segment_name,
+ u32 segment_size)
+{
+ vl_api_map_another_segment_t *mp;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (api_client_index);
+
+ if (!q)
+ return -1;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT);
+ mp->segment_size = segment_size;
+ strncpy ((char *) mp->segment_name, (char *) segment_name,
+ sizeof (mp->segment_name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+ return 0;
+}
+
+static int
+send_session_accept_callback (stream_session_t * s)
+{
+ vl_api_accept_session_t *mp;
+ unix_shared_memory_queue_t *q, *vpp_queue;
+ application_t *server = application_get (s->app_index);
+ transport_connection_t *tc;
+ transport_proto_vft_t *tp_vft;
+ stream_session_t *listener;
+
+ q = vl_api_client_index_to_input_queue (server->api_client_index);
+ vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+
+ if (!q)
+ return -1;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION);
+ mp->context = server->index;
+ listener = listen_session_get (s->session_type, s->listener_index);
+ tp_vft = session_get_transport_vft (s->session_type);
+ tc = tp_vft->get_connection (s->connection_index, s->thread_index);
+ mp->listener_handle = listen_session_get_handle (listener);
+ mp->handle = stream_session_handle (s);
+ mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
+ mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
+ mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
+ mp->port = tc->rmt_port;
+ mp->is_ip4 = tc->is_ip4;
+ clib_memcpy (&mp->ip, &tc->rmt_ip, sizeof (tc->rmt_ip));
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+ return 0;
+}
+
+static void
+send_session_disconnect_callback (stream_session_t * s)
+{
+ vl_api_disconnect_session_t *mp;
+ unix_shared_memory_queue_t *q;
+ application_t *app = application_get (s->app_index);
+
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+ if (!q)
+ return;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION);
+ mp->handle = stream_session_handle (s);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+send_session_reset_callback (stream_session_t * s)
+{
+ vl_api_reset_session_t *mp;
+ unix_shared_memory_queue_t *q;
+ application_t *app = application_get (s->app_index);
+
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+ if (!q)
+ return;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SESSION);
+ mp->handle = stream_session_handle (s);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+int
+send_session_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail)
+{
+ vl_api_connect_session_reply_t *mp;
+ unix_shared_memory_queue_t *q;
+ application_t *app;
+ unix_shared_memory_queue_t *vpp_queue;
+
+ app = application_get (app_index);
+ q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+ if (!q)
+ return -1;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_SESSION_REPLY);
+ mp->context = api_context;
+ if (!is_fail)
+ {
+ vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+ mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
+ mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
+ mp->handle = stream_session_handle (s);
+ mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
+ mp->retval = 0;
+ }
+ else
+ {
+ mp->retval = clib_host_to_net_u32 (VNET_API_ERROR_SESSION_CONNECT_FAIL);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ return 0;
+}
+
+/**
+ * Redirect a connect_uri message to the indicated server.
+ * Only sent if the server has bound the related port with
+ * URI_OPTIONS_FLAGS_USE_FIFO
+ */
+static int
+redirect_connect_callback (u32 server_api_client_index, void *mp_arg)
+{
+ vl_api_connect_uri_t *mp = mp_arg;
+ unix_shared_memory_queue_t *server_q, *client_q;
+ vlib_main_t *vm = vlib_get_main ();
+ f64 timeout = vlib_time_now (vm) + 0.5;
+ application_t *app;
+ int rv = 0;
+
+ server_q = vl_api_client_index_to_input_queue (server_api_client_index);
+
+ if (!server_q)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ client_q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!client_q)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE_2;
+ goto out;
+ }
+
+ /* Tell the server the client's API queue address, so it can reply */
+ mp->client_queue_address = pointer_to_uword (client_q);
+ app = application_lookup (mp->client_index);
+ if (!app)
+ {
+ clib_warning ("no client application");
+ return -1;
+ }
+
+ mp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = app->sm_properties.rx_fifo_size;
+ mp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = app->sm_properties.tx_fifo_size;
+
+ /*
+ * Bounce message handlers MUST NOT block the data-plane.
+ * Spin waiting for the queue lock, but
+ */
+
+ while (vlib_time_now (vm) < timeout)
+ {
+ rv =
+ unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ );
+ switch (rv)
+ {
+ /* correctly enqueued */
+ case 0:
+ return VNET_CONNECT_REDIRECTED;
+
+ /* continue spinning, wait for pthread_mutex_trylock to work */
+ case -1:
+ continue;
+
+ /* queue stuffed, drop the msg */
+ case -2:
+ rv = VNET_API_ERROR_QUEUE_FULL;
+ goto out;
+ }
+ }
+out:
+ /* Dispose of the message */
+ vl_msg_api_free (mp);
+ return rv;
+}
+
+static session_cb_vft_t uri_session_cb_vft = {
+ .session_accept_callback = send_session_accept_callback,
+ .session_disconnect_callback = send_session_disconnect_callback,
+ .session_connected_callback = send_session_connected_callback,
+ .session_reset_callback = send_session_reset_callback,
+ .add_segment_callback = send_add_segment_callback,
+ .redirect_connect_callback = redirect_connect_callback
+};
+
+static void
+vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp)
+{
+ vl_api_session_enable_disable_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+
+ vnet_session_enable_disable (vm, mp->is_enable);
+ REPLY_MACRO (VL_API_SESSION_ENABLE_DISABLE_REPLY);
+}
+
+static void
+vl_api_application_attach_t_handler (vl_api_application_attach_t * mp)
+{
+ vl_api_application_attach_reply_t *rmp;
+ vnet_app_attach_args_t _a, *a = &_a;
+ int rv;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ STATIC_ASSERT (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <=
+ sizeof (mp->options),
+ "Out of options, fix api message definition");
+
+ memset (a, 0, sizeof (*a));
+
+ a->api_client_index = mp->client_index;
+ a->options = mp->options;
+ a->session_cb_vft = &uri_session_cb_vft;
+
+ rv = vnet_application_attach (a);
+
+done:
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_APPLICATION_ATTACH_REPLY, ({
+ if (!rv)
+ {
+ rmp->segment_name_length = 0;
+ /* $$$$ policy? */
+ rmp->segment_size = a->segment_size;
+ if (a->segment_name_length)
+ {
+ memcpy (rmp->segment_name, a->segment_name,
+ a->segment_name_length);
+ rmp->segment_name_length = a->segment_name_length;
+ }
+ rmp->app_event_queue_address = a->app_event_queue_address;
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_application_detach_t_handler (vl_api_application_detach_t * mp)
+{
+ vl_api_application_detach_reply_t *rmp;
+ int rv = VNET_API_ERROR_INVALID_VALUE_2;
+ vnet_app_detach_args_t _a, *a = &_a;
+ application_t *app;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->app_index = app->index;
+ rv = vnet_application_detach (a);
+ }
+
+done:
+ REPLY_MACRO (VL_API_APPLICATION_DETACH_REPLY);
+}
+
+static void
+vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp)
+{
+ vl_api_bind_uri_reply_t *rmp;
+ vnet_bind_args_t _a, *a = &_a;
+ application_t *app;
+ int rv;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ memset (a, 0, sizeof (*a));
+ a->uri = (char *) mp->uri;
+ a->app_index = app->index;
+ rv = vnet_bind_uri (a);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+done:
+ REPLY_MACRO (VL_API_BIND_URI_REPLY);
+}
+
+static void
+vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp)
+{
+ vl_api_unbind_uri_reply_t *rmp;
+ application_t *app;
+ vnet_unbind_args_t _a, *a = &_a;
+ int rv;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->uri = (char *) mp->uri;
+ a->app_index = app->index;
+ rv = vnet_unbind_uri (a);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+done:
+ REPLY_MACRO (VL_API_UNBIND_URI_REPLY);
+}
+
+static void
+vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
+{
+ vl_api_connect_session_reply_t *rmp;
+ vnet_connect_args_t _a, *a = &_a;
+ application_t *app;
+ int rv;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->uri = (char *) mp->uri;
+ a->api_context = mp->context;
+ a->app_index = app->index;
+ a->mp = mp;
+ rv = vnet_connect_uri (a);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+ if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+ return;
+
+ /* Got some error, relay it */
+
+done:
+ /* *INDENT-OFF* */
+ REPLY_MACRO (VL_API_CONNECT_SESSION_REPLY);
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+ vl_api_disconnect_session_reply_t *rmp;
+ vnet_disconnect_args_t _a, *a = &_a;
+ application_t *app;
+ int rv = 0;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->handle = mp->handle;
+ a->app_index = app->index;
+ rv = vnet_disconnect_session (a);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+done:
+ REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY);
+}
+
+static void
+vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
+ mp)
+{
+ vnet_disconnect_args_t _a, *a = &_a;
+ application_t *app;
+
+ /* Client objected to disconnecting the session, log and continue */
+ if (mp->retval)
+ {
+ clib_warning ("client retval %d", mp->retval);
+ return;
+ }
+
+ /* Disconnect has been confirmed. Confirm close to transport */
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->handle = mp->handle;
+ a->app_index = app->index;
+ vnet_disconnect_session (a);
+ }
+}
+
+static void
+vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp)
+{
+ application_t *app;
+ stream_session_t *s;
+ u32 index, thread_index;
+
+ app = application_lookup (mp->client_index);
+ if (!app)
+ return;
+
+ stream_session_parse_handle (mp->handle, &index, &thread_index);
+ s = stream_session_get_if_valid (index, thread_index);
+ if (s == 0 || app->index != s->app_index)
+ {
+ clib_warning ("Invalid session!");
+ return;
+ }
+
+ /* Client objected to resetting the session, log and continue */
+ if (mp->retval)
+ {
+ clib_warning ("client retval %d", mp->retval);
+ return;
+ }
+
+ /* This comes as a response to a reset, transport only waiting for
+ * confirmation to remove connection state, no need to disconnect */
+ stream_session_cleanup (s);
+}
+
+static void
+vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp)
+{
+ stream_session_t *s;
+ u32 session_index, thread_index;
+ vnet_disconnect_args_t _a, *a = &_a;
+
+ /* Server isn't interested, kill the session */
+ if (mp->retval)
+ {
+ a->app_index = mp->context;
+ a->handle = mp->handle;
+ vnet_disconnect_session (a);
+ }
+ else
+ {
+ stream_session_parse_handle (mp->handle, &session_index, &thread_index);
+ s = stream_session_get_if_valid (session_index, thread_index);
+ if (!s)
+ {
+ clib_warning ("session doesn't exist");
+ return;
+ }
+ if (s->app_index != mp->context)
+ {
+ clib_warning ("app doesn't own session");
+ return;
+ }
+ /* XXX volatile? */
+ s->session_state = SESSION_STATE_READY;
+ }
+}
+
+static void
+vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t
+ * mp)
+{
+ clib_warning ("not implemented");
+}
+
+static void
+vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp)
+{
+ vl_api_bind_sock_reply_t *rmp;
+ vnet_bind_args_t _a, *a = &_a;
+ int rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ application_t *app;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ ip46_address_t *ip46 = (ip46_address_t *) mp->ip;
+
+ memset (a, 0, sizeof (*a));
+ a->tep.is_ip4 = mp->is_ip4;
+ a->tep.ip = *ip46;
+ a->tep.port = mp->port;
+ a->tep.vrf = mp->vrf;
+ a->app_index = app->index;
+
+ rv = vnet_bind (a);
+ }
+done:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2 (VL_API_BIND_SOCK_REPLY,({
+ if (!rv)
+ rmp->handle = a->handle;
+ }));
+ /* *INDENT-ONF* */
+}
+
+static void
+vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp)
+{
+ vl_api_unbind_sock_reply_t *rmp;
+ vnet_unbind_args_t _a, *a = &_a;
+ application_t *app;
+ int rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ a->app_index = mp->client_index;
+ a->handle = mp->handle;
+ rv = vnet_unbind (a);
+ }
+
+done:
+ REPLY_MACRO (VL_API_UNBIND_SOCK_REPLY);
+}
+
+static void
+vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
+{
+ vl_api_connect_session_reply_t *rmp;
+ vnet_connect_args_t _a, *a = &_a;
+ application_t *app;
+ int rv;
+
+ if (session_manager_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+
+ app = application_lookup (mp->client_index);
+ if (app)
+ {
+ unix_shared_memory_queue_t *client_q;
+ ip46_address_t *ip46 = (ip46_address_t *) mp->ip;
+
+ client_q = vl_api_client_index_to_input_queue (mp->client_index);
+ mp->client_queue_address = pointer_to_uword (client_q);
+ a->tep.is_ip4 = mp->is_ip4;
+ a->tep.ip = *ip46;
+ a->tep.port = mp->port;
+ a->tep.vrf = mp->vrf;
+ a->api_context = mp->context;
+ a->app_index = app->index;
+ a->proto = mp->proto;
+ a->mp = mp;
+ rv = vnet_connect (a);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ }
+
+ if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+ return;
+
+ /* Got some error, relay it */
+
+done:
+ REPLY_MACRO (VL_API_CONNECT_SESSION_REPLY);
+}
+
+static clib_error_t *
+application_reaper_cb (u32 client_index)
+{
+ application_t *app = application_lookup (client_index);
+ vnet_app_detach_args_t _a, *a = &_a;
+ if (app)
+ {
+ a->app_index = app->index;
+ vnet_application_detach (a);
+ }
+ return 0;
+}
+
+VL_MSG_API_REAPER_FUNCTION (application_reaper_cb);
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_session;
+#undef _
+}
+
+/*
+ * session_api_hookup
+ * Add uri's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process()
+ */
+static clib_error_t *
+session_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_session_api_msg;
+#undef _
+
+ /*
+ * Messages which bounce off the data-plane to
+ * an API client. Simply tells the message handling infra not
+ * to free the message.
+ *
+ * Bounced message handlers MUST NOT block the data plane
+ */
+ am->message_bounce[VL_API_CONNECT_URI] = 1;
+ am->message_bounce[VL_API_CONNECT_SOCK] = 1;
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (session_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
new file mode 100755
index 00000000..8c30a1df
--- /dev/null
+++ b/src/vnet/session/session_cli.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/session/application.h>
+#include <vnet/session/session.h>
+
+u8 *
+format_stream_session_fifos (u8 * s, va_list * args)
+{
+ stream_session_t *ss = va_arg (*args, stream_session_t *);
+ int verbose = va_arg (*args, int);
+ session_fifo_event_t _e, *e = &_e;
+ u8 found;
+
+ s = format (s, " Rx fifo: %U", format_svm_fifo, ss->server_rx_fifo, 1);
+ if (verbose > 2 && ss->server_rx_fifo->has_event)
+ {
+ found = session_node_lookup_fifo_event (ss->server_rx_fifo, e);
+ s = format (s, " session node event: %s\n",
+ found ? "found" : "not found");
+ }
+ s = format (s, " Tx fifo: %U", format_svm_fifo, ss->server_tx_fifo, 1);
+ if (verbose > 2 && ss->server_tx_fifo->has_event)
+ {
+ found = session_node_lookup_fifo_event (ss->server_tx_fifo, e);
+ s = format (s, " session node event: %s\n",
+ found ? "found" : "not found");
+ }
+ return s;
+}
+
+/**
+ * Format stream session as per the following format
+ *
+ * verbose:
+ * "Connection", "Rx fifo", "Tx fifo", "Session Index"
+ * non-verbose:
+ * "Connection"
+ */
+u8 *
+format_stream_session (u8 * s, va_list * args)
+{
+ stream_session_t *ss = va_arg (*args, stream_session_t *);
+ int verbose = va_arg (*args, int);
+ transport_proto_vft_t *tp_vft;
+ u8 *str = 0;
+ tp_vft = session_get_transport_vft (ss->session_type);
+
+ if (verbose == 1 && ss->session_state >= SESSION_STATE_ACCEPTING)
+ str = format (0, "%-10u%-10u%-10lld",
+ svm_fifo_max_dequeue (ss->server_rx_fifo),
+ svm_fifo_max_enqueue (ss->server_tx_fifo),
+ stream_session_get_index (ss));
+
+ if (ss->session_state == SESSION_STATE_READY
+ || ss->session_state == SESSION_STATE_ACCEPTING
+ || ss->session_state == SESSION_STATE_CLOSED)
+ {
+ s = format (s, "%U", tp_vft->format_connection, ss->connection_index,
+ ss->thread_index, verbose);
+ if (verbose == 1)
+ s = format (s, "%v", str);
+ if (verbose > 1)
+ s = format (s, "%U", format_stream_session_fifos, ss, verbose);
+ }
+ else if (ss->session_state == SESSION_STATE_LISTENING)
+ {
+ s = format (s, "%-40U%v", tp_vft->format_listener, ss->connection_index,
+ str);
+ }
+ else if (ss->session_state == SESSION_STATE_CONNECTING)
+ {
+ s = format (s, "%-40U%v", tp_vft->format_half_open,
+ ss->connection_index, str);
+ }
+ else
+ {
+ clib_warning ("Session in state: %d!", ss->session_state);
+ }
+ vec_free (str);
+
+ return s;
+}
+
+uword
+unformat_stream_session_id (unformat_input_t * input, va_list * args)
+{
+ u8 *proto = va_arg (*args, u8 *);
+ ip46_address_t *lcl = va_arg (*args, ip46_address_t *);
+ ip46_address_t *rmt = va_arg (*args, ip46_address_t *);
+ u16 *lcl_port = va_arg (*args, u16 *);
+ u16 *rmt_port = va_arg (*args, u16 *);
+ u8 *is_ip4 = va_arg (*args, u8 *);
+ u8 tuple_is_set = 0;
+
+ memset (lcl, 0, sizeof (*lcl));
+ memset (rmt, 0, sizeof (*rmt));
+
+ if (unformat (input, "tcp"))
+ {
+ *proto = TRANSPORT_PROTO_TCP;
+ }
+ if (unformat (input, "udp"))
+ {
+ *proto = TRANSPORT_PROTO_UDP;
+ }
+ if (unformat (input, "%U:%d->%U:%d", unformat_ip4_address, &lcl->ip4,
+ lcl_port, unformat_ip4_address, &rmt->ip4, rmt_port))
+ {
+ *is_ip4 = 1;
+ tuple_is_set = 1;
+ }
+ else if (unformat (input, "%U:%d->%U:%d", unformat_ip6_address, &lcl->ip6,
+ lcl_port, unformat_ip6_address, &rmt->ip6, rmt_port))
+ {
+ *is_ip4 = 0;
+ tuple_is_set = 1;
+ }
+
+ return tuple_is_set;
+}
+
+uword
+unformat_stream_session (unformat_input_t * input, va_list * args)
+{
+ stream_session_t **result = va_arg (*args, stream_session_t **);
+ stream_session_t *s;
+ u8 proto = ~0;
+ ip46_address_t lcl, rmt;
+ u32 lcl_port = 0, rmt_port = 0;
+ u8 is_ip4 = 0, s_type = ~0;
+
+ if (!unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt,
+ &lcl_port, &rmt_port, &is_ip4))
+ return 0;
+
+ s_type = session_type_from_proto_and_ip (proto, is_ip4);
+ if (is_ip4)
+ s = stream_session_lookup4 (&lcl.ip4, &rmt.ip4,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port), s_type);
+ else
+ s = stream_session_lookup6 (&lcl.ip6, &rmt.ip6,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port), s_type);
+ if (s)
+ {
+ *result = s;
+ return 1;
+ }
+ return 0;
+}
+
+uword
+unformat_transport_connection (unformat_input_t * input, va_list * args)
+{
+ transport_connection_t **result = va_arg (*args, transport_connection_t **);
+ u32 suggested_proto = va_arg (*args, u32);
+ transport_connection_t *tc;
+ u8 proto = ~0;
+ ip46_address_t lcl, rmt;
+ u32 lcl_port = 0, rmt_port = 0;
+ u8 is_ip4 = 0, s_type = ~0;
+
+ if (!unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt,
+ &lcl_port, &rmt_port, &is_ip4))
+ return 0;
+
+ proto = (proto == (u8) ~ 0) ? suggested_proto : proto;
+ if (proto == (u8) ~ 0)
+ return 0;
+ s_type = session_type_from_proto_and_ip (proto, is_ip4);
+ if (is_ip4)
+ tc = stream_session_lookup_transport4 (&lcl.ip4, &rmt.ip4,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port),
+ s_type);
+ else
+ tc = stream_session_lookup_transport6 (&lcl.ip6, &rmt.ip6,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port),
+ s_type);
+
+ if (tc)
+ {
+ *result = tc;
+ return 1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ int verbose = 0, i;
+ stream_session_t *pool;
+ stream_session_t *s;
+ u8 *str = 0, one_session = 0;
+
+ if (!smm->is_enabled)
+ {
+ return clib_error_return (0, "session layer is not enabled");
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "%U", unformat_stream_session, &s))
+ {
+ one_session = 1;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (one_session)
+ {
+ vlib_cli_output (vm, "%U", format_stream_session, s, 3);
+ return 0;
+ }
+
+ for (i = 0; i < vec_len (smm->sessions); i++)
+ {
+ u32 once_per_pool;
+ pool = smm->sessions[i];
+
+ once_per_pool = 1;
+
+ if (pool_elts (pool))
+ {
+
+ vlib_cli_output (vm, "Thread %d: %d active sessions",
+ i, pool_elts (pool));
+ if (verbose)
+ {
+ if (once_per_pool && verbose == 1)
+ {
+ str = format (str, "%-50s%-15s%-10s%-10s%-10s",
+ "Connection", "State", "Rx-f", "Tx-f",
+ "S-idx");
+ vlib_cli_output (vm, "%v", str);
+ vec_reset_length (str);
+ once_per_pool = 0;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (s, pool,
+ ({
+ vec_reset_length (str);
+ str = format (str, "%U", format_stream_session, s, verbose);
+ vlib_cli_output (vm, "%v", str);
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ else
+ vlib_cli_output (vm, "Thread %d: no active sessions", i);
+ vec_reset_length (str);
+ }
+ vec_free (str);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
+{
+ .path = "show session",
+ .short_help = "show session [verbose [nnn]]",
+ .function = show_session_command_fn,
+};
+/* *INDENT-ON* */
+
+static int
+clear_session (stream_session_t * s)
+{
+ application_t *server = application_get (s->app_index);
+ server->cb_fns.session_disconnect_callback (s);
+ return 0;
+}
+
+static clib_error_t *
+clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ u32 thread_index = 0, clear_all = 0;
+ u32 session_index = ~0;
+ stream_session_t **pool, *session;
+
+ if (!smm->is_enabled)
+ {
+ return clib_error_return (0, "session layer is not enabled");
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "thread %d", &thread_index))
+ ;
+ else if (unformat (input, "session %d", &session_index))
+ ;
+ else if (unformat (input, "all"))
+ clear_all = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!clear_all && session_index == ~0)
+ return clib_error_return (0, "session <nn> required, but not set.");
+
+ if (session_index != ~0)
+ {
+ session = stream_session_get_if_valid (session_index, thread_index);
+ if (!session)
+ return clib_error_return (0, "no session %d on thread %d",
+ session_index, thread_index);
+ clear_session (session);
+ }
+
+ if (clear_all)
+ {
+ /* *INDENT-OFF* */
+ vec_foreach (pool, smm->sessions)
+ {
+ pool_foreach(session, *pool, ({
+ clear_session (session);
+ }));
+ };
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_session_command, static) =
+{
+ .path = "clear session",
+ .short_help = "clear session thread <thread> session <index>",
+ .function = clear_session_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_session_fifo_trace_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ stream_session_t *s = 0;
+ u8 is_rx = 0, *str = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_stream_session, &s))
+ ;
+ else if (unformat (input, "rx"))
+ is_rx = 1;
+ else if (unformat (input, "tx"))
+ is_rx = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!SVM_FIFO_TRACE)
+ {
+ vlib_cli_output (vm, "fifo tracing not enabled");
+ return 0;
+ }
+
+ if (!s)
+ {
+ vlib_cli_output (vm, "could not find session");
+ return 0;
+ }
+
+ str = is_rx ?
+ svm_fifo_dump_trace (str, s->server_rx_fifo) :
+ svm_fifo_dump_trace (str, s->server_tx_fifo);
+
+ vlib_cli_output (vm, "%v", str);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) =
+{
+ .path = "show session fifo trace",
+ .short_help = "show session fifo trace <session>",
+ .function = show_session_fifo_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ stream_session_t *s = 0;
+ u8 is_rx = 0, *str = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_stream_session, &s))
+ ;
+ else if (unformat (input, "rx"))
+ is_rx = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!SVM_FIFO_TRACE)
+ {
+ vlib_cli_output (vm, "fifo tracing not enabled");
+ return 0;
+ }
+
+ if (!s)
+ {
+ vlib_cli_output (vm, "could not find session");
+ return 0;
+ }
+
+ str = is_rx ?
+ svm_fifo_replay (str, s->server_rx_fifo, 0, 1) :
+ svm_fifo_replay (str, s->server_tx_fifo, 0, 1);
+
+ vlib_cli_output (vm, "%v", str);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) =
+{
+ .path = "session replay fifo",
+ .short_help = "session replay fifo <session>",
+ .function = session_replay_fifo_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 is_en = 1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ is_en = 1;
+ else if (unformat (input, "disable"))
+ is_en = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return vnet_session_enable_disable (vm, is_en);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (session_enable_disable_command, static) =
+{
+ .path = "session",
+ .short_help = "session [enable|disable]",
+ .function = session_enable_disable_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h
new file mode 100644
index 00000000..eb11f1a0
--- /dev/null
+++ b/src/vnet/session/session_debug.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_VNET_SESSION_SESSION_DEBUG_H_
+#define SRC_VNET_SESSION_SESSION_DEBUG_H_
+
+#include <vnet/session/transport.h>
+#include <vlib/vlib.h>
+
+#define foreach_session_dbg_evt \
+ _(ENQ, "enqueue") \
+ _(DEQ, "dequeue") \
+ _(DEQ_NODE, "dequeue") \
+ _(POLL_GAP_TRACK, "poll gap track") \
+
+typedef enum _session_evt_dbg
+{
+#define _(sym, str) SESSION_EVT_##sym,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_dbg_e;
+
+#define SESSION_DBG (0)
+#define SESSION_DEQ_NODE_EVTS (0)
+#define SESSION_EVT_POLL_DBG (1)
+
+#if TRANSPORT_DEBUG && SESSION_DBG
+
+#define DEC_SESSION_ETD(_s, _e, _size) \
+ struct \
+ { \
+ u32 data[_size]; \
+ } * ed; \
+ transport_proto_vft_t *vft = \
+ session_get_transport_vft (_s->session_type); \
+ transport_connection_t *_tc = \
+ vft->get_connection (_s->connection_index, _s->thread_index); \
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, \
+ _e, _tc->elog_track)
+
+#define DEC_SESSION_ED(_e, _size) \
+ struct \
+ { \
+ u32 data[_size]; \
+ } * ed; \
+ ed = ELOG_DATA (&vlib_global_main.elog_main, _e)
+
+#define SESSION_EVT_DEQ_HANDLER(_s, _body) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "deq: id %d len %d rd %d wnd %d", \
+ .format_args = "i4i4i4i4", \
+ }; \
+ DEC_SESSION_ETD(_s, _e, 4); \
+ do { _body; } while (0); \
+}
+
+#define SESSION_EVT_ENQ_HANDLER(_s, _body) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "enq: id %d length %d", \
+ .format_args = "i4i4", \
+ }; \
+ DEC_SESSION_ETD(_s, _e, 2); \
+ do { _body; } while (0); \
+}
+
+#if SESSION_DEQ_NODE_EVTS
+#define SESSION_EVT_DEQ_NODE_HANDLER(_node_evt) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "deq-node: %s", \
+ .format_args = "t4", \
+ .n_enum_strings = 2, \
+ .enum_strings = { \
+ "start", \
+ "end", \
+ }, \
+ }; \
+ DEC_SESSION_ED(_e, 1); \
+ ed->data[0] = _node_evt; \
+}
+#else
+#define SESSION_EVT_DEQ_NODE_HANDLER(_node_evt)
+#endif
+
+#if SESSION_DBG && SESSION_EVT_POLL_DBG
+#define SESSION_EVT_POLL_GAP(_smm, _my_thread_index) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "nixon-gap: %d MS", \
+ .format_args = "i4", \
+ }; \
+ DEC_SESSION_ED(_e, 1); \
+ ed->data[0] = (u32) ((now - \
+ _smm->last_event_poll_by_thread[my_thread_index])*1000.0); \
+}
+#define SESSION_EVT_POLL_GAP_TRACK_HANDLER(_smm, _my_thread_index) \
+{ \
+ if (PREDICT_TRUE( \
+ smm->last_event_poll_by_thread[my_thread_index] != 0.0)) \
+ if (now > smm->last_event_poll_by_thread[_my_thread_index] + 500e-6)\
+ SESSION_EVT_POLL_GAP(smm, my_thread_index); \
+ _smm->last_event_poll_by_thread[my_thread_index] = now; \
+}
+
+#else
+#define SESSION_EVT_POLL_GAP(_smm, _my_thread_index)
+#define SESSION_EVT_POLL_GAP_TRACK_HANDLER(_smm, _my_thread_index)
+#endif
+
+#define CONCAT_HELPER(_a, _b) _a##_b
+#define CC(_a, _b) CONCAT_HELPER(_a, _b)
+#define SESSION_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
+
+#else
+#define SESSION_EVT_DBG(_evt, _args...)
+#endif
+
+#endif /* SRC_VNET_SESSION_SESSION_DEBUG_H_ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
new file mode 100644
index 00000000..4487b1c3
--- /dev/null
+++ b/src/vnet/session/session_lookup.c
@@ -0,0 +1,619 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Generate typed init functions for multiple hash table styles... */
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_template.h>
+
+#include <vppinfra/bihash_template.c>
+
+#undef __included_bihash_template_h__
+
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/bihash_template.h>
+
+#include <vppinfra/bihash_template.c>
+#include <vnet/session/session_lookup.h>
+#include <vnet/session/session.h>
+
+static session_lookup_t session_lookup;
+extern transport_proto_vft_t *tp_vfts;
+
+/* *INDENT-OFF* */
+/* 16 octets */
+typedef CLIB_PACKED (struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t src;
+ ip4_address_t dst;
+ u16 src_port;
+ u16 dst_port;
+ /* align by making this 4 octets even though its a 1-bit field
+ * NOTE: avoid key overlap with other transports that use 5 tuples for
+ * session identification.
+ */
+ u32 proto;
+ };
+ u64 as_u64[2];
+ };
+}) v4_connection_key_t;
+
+typedef CLIB_PACKED (struct {
+ union
+ {
+ struct
+ {
+ /* 48 octets */
+ ip6_address_t src;
+ ip6_address_t dst;
+ u16 src_port;
+ u16 dst_port;
+ u32 proto;
+ u64 unused;
+ };
+ u64 as_u64[6];
+ };
+}) v6_connection_key_t;
+/* *INDENT-ON* */
+
+typedef clib_bihash_kv_16_8_t session_kv4_t;
+typedef clib_bihash_kv_48_8_t session_kv6_t;
+
+always_inline void
+make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ v4_connection_key_t *key = (v4_connection_key_t *) kv->key;
+
+ key->src.as_u32 = lcl->as_u32;
+ key->dst.as_u32 = rmt->as_u32;
+ key->src_port = lcl_port;
+ key->dst_port = rmt_port;
+ key->proto = proto;
+
+ kv->value = ~0ULL;
+}
+
+always_inline void
+make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port,
+ u8 proto)
+{
+ v4_connection_key_t *key = (v4_connection_key_t *) kv->key;
+
+ key->src.as_u32 = lcl->as_u32;
+ key->dst.as_u32 = 0;
+ key->src_port = lcl_port;
+ key->dst_port = 0;
+ key->proto = proto;
+
+ kv->value = ~0ULL;
+}
+
+always_inline void
+make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t)
+{
+ make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, t->rmt_port,
+ session_type_from_proto_and_ip (t->transport_proto, 1));
+}
+
+always_inline void
+make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ v6_connection_key_t *key = (v6_connection_key_t *) kv->key;
+
+ key->src.as_u64[0] = lcl->as_u64[0];
+ key->src.as_u64[1] = lcl->as_u64[1];
+ key->dst.as_u64[0] = rmt->as_u64[0];
+ key->dst.as_u64[1] = rmt->as_u64[1];
+ key->src_port = lcl_port;
+ key->dst_port = rmt_port;
+ key->proto = proto;
+ key->unused = 0;
+
+ kv->value = ~0ULL;
+}
+
+always_inline void
+make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port,
+ u8 proto)
+{
+ v6_connection_key_t *key = (v6_connection_key_t *) kv->key;
+
+ key->src.as_u64[0] = lcl->as_u64[0];
+ key->src.as_u64[1] = lcl->as_u64[1];
+ key->dst.as_u64[0] = 0;
+ key->dst.as_u64[1] = 0;
+ key->src_port = lcl_port;
+ key->dst_port = 0;
+ key->proto = proto;
+ key->unused = 0;
+
+ kv->value = ~0ULL;
+}
+
+always_inline void
+make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t)
+{
+ make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, t->rmt_port,
+ session_type_from_proto_and_ip (t->transport_proto, 0));
+}
+
+/*
+ * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type)
+ * Value: (owner thread index << 32 | session_index);
+ */
+void
+stream_session_table_add_for_tc (transport_connection_t * tc, u64 value)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+
+ if (tc->is_ip4)
+ {
+ make_v4_ss_kv_from_tc (&kv4, tc);
+ kv4.value = value;
+ clib_bihash_add_del_16_8 (&sl->v4_session_hash, &kv4, 1 /* is_add */ );
+ }
+ else
+ {
+ make_v6_ss_kv_from_tc (&kv6, tc);
+ kv6.value = value;
+ clib_bihash_add_del_48_8 (&sl->v6_session_hash, &kv6, 1 /* is_add */ );
+ }
+}
+
+void
+stream_session_table_add (session_manager_main_t * smm, stream_session_t * s,
+ u64 value)
+{
+ transport_connection_t *tc;
+
+ tc = tp_vfts[s->session_type].get_connection (s->connection_index,
+ s->thread_index);
+ stream_session_table_add_for_tc (tc, value);
+}
+
+int
+stream_session_table_del_for_tc (transport_connection_t * tc)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+
+ if (tc->is_ip4)
+ {
+ make_v4_ss_kv_from_tc (&kv4, tc);
+ return clib_bihash_add_del_16_8 (&sl->v4_session_hash, &kv4,
+ 0 /* is_add */ );
+ }
+ else
+ {
+ make_v6_ss_kv_from_tc (&kv6, tc);
+ return clib_bihash_add_del_48_8 (&sl->v6_session_hash, &kv6,
+ 0 /* is_add */ );
+ }
+
+ return 0;
+}
+
+int
+stream_session_table_del (stream_session_t * s)
+{
+ transport_connection_t *ts;
+ ts = tp_vfts[s->session_type].get_connection (s->connection_index,
+ s->thread_index);
+ return stream_session_table_del_for_tc (ts);
+}
+
+
+void
+stream_session_half_open_table_add (transport_connection_t * tc, u64 value)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+
+ if (tc->is_ip4)
+ {
+ make_v4_ss_kv_from_tc (&kv4, tc);
+ kv4.value = value;
+ (void) clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4,
+ 1 /* is_add */ );
+ }
+ else
+ {
+ make_v6_ss_kv_from_tc (&kv6, tc);
+ kv6.value = value;
+ (void) clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6,
+ 1 /* is_add */ );
+ }
+}
+
+void
+stream_session_half_open_table_del (transport_connection_t * tc)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+
+ if (tc->is_ip4)
+ {
+ make_v4_ss_kv_from_tc (&kv4, tc);
+ clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4,
+ 0 /* is_add */ );
+ }
+ else
+ {
+ make_v6_ss_kv_from_tc (&kv6, tc);
+ clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6,
+ 0 /* is_add */ );
+ }
+}
+
+stream_session_t *
+stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ int rv;
+
+ make_v4_listener_kv (&kv4, lcl, lcl_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4);
+ if (rv == 0)
+ return session_manager_get_listener (proto, (u32) kv4.value);
+
+ /* Zero out the lcl ip */
+ kv4.key[0] = 0;
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4);
+ if (rv == 0)
+ return session_manager_get_listener (proto, (u32) kv4.value);
+
+ return 0;
+}
+
+/** Looks up a session based on the 5-tuple passed as argument.
+ *
+ * First it tries to find an established session, if this fails, it tries
+ * finding a listener session if this fails, it tries a lookup with a
+ * wildcarded local source (listener bound to all interfaces)
+ */
+stream_session_t *
+stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ stream_session_t *s;
+ int rv;
+
+ /* Lookup session amongst established ones */
+ make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4);
+ if (rv == 0)
+ return stream_session_get_from_handle (kv4.value);
+
+ /* If nothing is found, check if any listener is available */
+ if ((s = stream_session_lookup_listener4 (lcl, lcl_port, proto)))
+ return s;
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return stream_session_get_from_handle (kv4.value);
+ return 0;
+}
+
+stream_session_t *
+stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv6_t kv6;
+ int rv;
+
+ make_v6_listener_kv (&kv6, lcl, lcl_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6);
+ if (rv == 0)
+ return session_manager_get_listener (proto, (u32) kv6.value);
+
+ /* Zero out the lcl ip */
+ kv6.key[0] = kv6.key[1] = 0;
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6);
+ if (rv == 0)
+ return session_manager_get_listener (proto, (u32) kv6.value);
+
+ return 0;
+}
+
+/* Looks up a session based on the 5-tuple passed as argument.
+ * First it tries to find an established session, if this fails, it tries
+ * finding a listener session if this fails, it tries a lookup with a
+ * wildcarded local source (listener bound to all interfaces) */
+stream_session_t *
+stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv6_t kv6;
+ stream_session_t *s;
+ int rv;
+
+ make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6);
+ if (rv == 0)
+ return stream_session_get_from_handle (kv6.value);
+
+ /* If nothing is found, check if any listener is available */
+ if ((s = stream_session_lookup_listener6 (lcl, lcl_port, proto)))
+ return s;
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6);
+ if (rv == 0)
+ return stream_session_get_from_handle (kv6.value);
+ return 0;
+}
+
+stream_session_t *
+stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto)
+{
+ switch (proto)
+ {
+ case SESSION_TYPE_IP4_UDP:
+ case SESSION_TYPE_IP4_TCP:
+ return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto);
+ break;
+ case SESSION_TYPE_IP6_UDP:
+ case SESSION_TYPE_IP6_TCP:
+ return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto);
+ break;
+ }
+ return 0;
+}
+
+u64
+stream_session_half_open_lookup_handle (ip46_address_t * lcl,
+ ip46_address_t * rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+ int rv;
+
+ switch (proto)
+ {
+ case SESSION_TYPE_IP4_UDP:
+ case SESSION_TYPE_IP4_TCP:
+ make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4);
+
+ if (rv == 0)
+ return kv4.value;
+
+ return HALF_OPEN_LOOKUP_INVALID_VALUE;
+ break;
+ case SESSION_TYPE_IP6_UDP:
+ case SESSION_TYPE_IP6_TCP:
+ make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6);
+
+ if (rv == 0)
+ return kv6.value;
+
+ return HALF_OPEN_LOOKUP_INVALID_VALUE;
+ break;
+ }
+ return HALF_OPEN_LOOKUP_INVALID_VALUE;
+}
+
+transport_connection_t *
+stream_session_half_open_lookup (ip46_address_t * lcl, ip46_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ u64 handle;
+ handle =
+ stream_session_half_open_lookup_handle (lcl, rmt, lcl_port, rmt_port,
+ proto);
+ if (handle != HALF_OPEN_LOOKUP_INVALID_VALUE)
+ return tp_vfts[proto].get_half_open (handle & 0xFFFFFFFF);
+ return 0;
+}
+
+always_inline stream_session_t *
+stream_session_get_tsi (u64 ti_and_si, u32 thread_index)
+{
+ ASSERT ((u32) (ti_and_si >> 32) == thread_index);
+ return pool_elt_at_index (session_manager_main.sessions[thread_index],
+ ti_and_si & 0xFFFFFFFFULL);
+}
+
+transport_connection_t *
+stream_session_lookup_transport_wt4 (ip4_address_t * lcl, ip4_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto,
+ u32 my_thread_index)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ stream_session_t *s;
+ int rv;
+
+ /* Lookup session amongst established ones */
+ make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4);
+ if (rv == 0)
+ {
+ s = stream_session_get_tsi (kv4.value, my_thread_index);
+ return tp_vfts[s->session_type].get_connection (s->connection_index,
+ my_thread_index);
+ }
+
+ /* If nothing is found, check if any listener is available */
+ s = stream_session_lookup_listener4 (lcl, lcl_port, proto);
+ if (s)
+ return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF);
+ return 0;
+}
+
+transport_connection_t *
+stream_session_lookup_transport4 (ip4_address_t * lcl, ip4_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ session_kv4_t kv4;
+ stream_session_t *s;
+ int rv;
+
+ /* Lookup session amongst established ones */
+ make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4);
+ if (rv == 0)
+ {
+ s = stream_session_get_from_handle (kv4.value);
+ return tp_vfts[s->session_type].get_connection (s->connection_index,
+ s->thread_index);
+ }
+
+ /* If nothing is found, check if any listener is available */
+ s = stream_session_lookup_listener4 (lcl, lcl_port, proto);
+ if (s)
+ return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF);
+ return 0;
+}
+
+transport_connection_t *
+stream_session_lookup_transport_wt6 (ip6_address_t * lcl, ip6_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto,
+ u32 my_thread_index)
+{
+ session_lookup_t *sl = &session_lookup;
+ stream_session_t *s;
+ session_kv6_t kv6;
+ int rv;
+
+ make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6);
+ if (rv == 0)
+ {
+ s = stream_session_get_tsi (kv6.value, my_thread_index);
+ return tp_vfts[s->session_type].get_connection (s->connection_index,
+ my_thread_index);
+ }
+
+ /* If nothing is found, check if any listener is available */
+ s = stream_session_lookup_listener6 (lcl, lcl_port, proto);
+ if (s)
+ return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6);
+ if (rv == 0)
+ return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF);
+
+ return 0;
+}
+
+transport_connection_t *
+stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
+{
+ session_lookup_t *sl = &session_lookup;
+ stream_session_t *s;
+ session_kv6_t kv6;
+ int rv;
+
+ make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6);
+ if (rv == 0)
+ {
+ s = stream_session_get_from_handle (kv6.value);
+ return tp_vfts[s->session_type].get_connection (s->connection_index,
+ s->thread_index);
+ }
+
+ /* If nothing is found, check if any listener is available */
+ s = stream_session_lookup_listener6 (lcl, lcl_port, proto);
+ if (s)
+ return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+ /* Finally, try half-open connections */
+ rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6);
+ if (rv == 0)
+ return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF);
+
+ return 0;
+}
+
+#define foreach_hash_table_parameter \
+ _(v4,session,buckets,20000) \
+ _(v4,session,memory,(64<<20)) \
+ _(v6,session,buckets,20000) \
+ _(v6,session,memory,(64<<20)) \
+ _(v4,halfopen,buckets,20000) \
+ _(v4,halfopen,memory,(64<<20)) \
+ _(v6,halfopen,buckets,20000) \
+ _(v6,halfopen,memory,(64<<20))
+
+void
+session_lookup_init (void)
+{
+ session_lookup_t *sl = &session_lookup;
+
+#define _(af,table,parm,value) \
+ u32 configured_##af##_##table##_table_##parm = value;
+ foreach_hash_table_parameter;
+#undef _
+
+#define _(af,table,parm,value) \
+ if (session_manager_main.configured_##af##_##table##_table_##parm) \
+ configured_##af##_##table##_table_##parm = \
+ session_manager_main.configured_##af##_##table##_table_##parm;
+ foreach_hash_table_parameter;
+#undef _
+
+ clib_bihash_init_16_8 (&sl->v4_session_hash, "v4 session table",
+ configured_v4_session_table_buckets,
+ configured_v4_session_table_memory);
+ clib_bihash_init_48_8 (&sl->v6_session_hash, "v6 session table",
+ configured_v6_session_table_buckets,
+ configured_v6_session_table_memory);
+ clib_bihash_init_16_8 (&sl->v4_half_open_hash, "v4 half-open table",
+ configured_v4_halfopen_table_buckets,
+ configured_v4_halfopen_table_memory);
+ clib_bihash_init_48_8 (&sl->v6_half_open_hash, "v6 half-open table",
+ configured_v6_halfopen_table_buckets,
+ configured_v6_halfopen_table_memory);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
new file mode 100644
index 00000000..cf1dc013
--- /dev/null
+++ b/src/vnet/session/session_lookup.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_SESSION_SESSION_LOOKUP_H_
+#define SRC_VNET_SESSION_SESSION_LOOKUP_H_
+
+#include <vnet/session/stream_session.h>
+#include <vnet/session/transport.h>
+
+typedef struct _session_lookup
+{
+ /** Lookup tables for established sessions and listeners */
+ clib_bihash_16_8_t v4_session_hash;
+ clib_bihash_48_8_t v6_session_hash;
+
+ /** Lookup tables for half-open sessions */
+ clib_bihash_16_8_t v4_half_open_hash;
+ clib_bihash_48_8_t v6_half_open_hash;
+} session_lookup_t;
+
+stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl,
+ u16 lcl_port, u8 proto);
+stream_session_t *stream_session_lookup4 (ip4_address_t * lcl,
+ ip4_address_t * rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto);
+stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl,
+ u16 lcl_port, u8 proto);
+stream_session_t *stream_session_lookup6 (ip6_address_t * lcl,
+ ip6_address_t * rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto);
+transport_connection_t *stream_session_lookup_transport_wt4 (ip4_address_t *
+ lcl,
+ ip4_address_t *
+ rmt,
+ u16 lcl_port,
+ u16 rmt_port,
+ u8 proto,
+ u32
+ thread_index);
+transport_connection_t *stream_session_lookup_transport4 (ip4_address_t * lcl,
+ ip4_address_t * rmt,
+ u16 lcl_port,
+ u16 rmt_port,
+ u8 proto);
+transport_connection_t *stream_session_lookup_transport_wt6 (ip6_address_t *
+ lcl,
+ ip6_address_t *
+ rmt,
+ u16 lcl_port,
+ u16 rmt_port,
+ u8 proto,
+ u32
+ thread_index);
+transport_connection_t *stream_session_lookup_transport6 (ip6_address_t * lcl,
+ ip6_address_t * rmt,
+ u16 lcl_port,
+ u16 rmt_port,
+ u8 proto);
+
+stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl,
+ u16 lcl_port, u8 proto);
+u64 stream_session_half_open_lookup_handle (ip46_address_t * lcl,
+ ip46_address_t * rmt,
+ u16 lcl_port,
+ u16 rmt_port, u8 proto);
+transport_connection_t *stream_session_half_open_lookup (ip46_address_t * lcl,
+ ip46_address_t * rmt,
+ u16 lcl_port,
+ u16 rmt_port,
+ u8 proto);
+void stream_session_table_add_for_tc (transport_connection_t * tc, u64 value);
+int stream_session_table_del_for_tc (transport_connection_t * tc);
+int stream_session_table_del (stream_session_t * s);
+void stream_session_half_open_table_del (transport_connection_t * tc);
+void stream_session_half_open_table_add (transport_connection_t * tc,
+ u64 value);
+
+void session_lookup_init (void);
+
+#endif /* SRC_VNET_SESSION_SESSION_LOOKUP_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
new file mode 100644
index 00000000..d0155849
--- /dev/null
+++ b/src/vnet/session/session_node.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/tcp/tcp.h>
+#include <vppinfra/elog.h>
+#include <vnet/session/application.h>
+#include <vnet/session/session_debug.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+vlib_node_registration_t session_queue_node;
+
+typedef struct
+{
+ u32 session_index;
+ u32 server_thread_index;
+} session_queue_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_session_queue_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *);
+
+ s = format (s, "SESSION_QUEUE: session index %d, server thread index %d",
+ t->session_index, t->server_thread_index);
+ return s;
+}
+
+vlib_node_registration_t session_queue_node;
+
+#define foreach_session_queue_error \
+_(TX, "Packets transmitted") \
+_(TIMER, "Timer events") \
+_(NO_BUFFER, "Out of buffers")
+
+typedef enum
+{
+#define _(sym,str) SESSION_QUEUE_ERROR_##sym,
+ foreach_session_queue_error
+#undef _
+ SESSION_QUEUE_N_ERROR,
+} session_queue_error_t;
+
+static char *session_queue_error_strings[] = {
+#define _(sym,string) string,
+ foreach_session_queue_error
+#undef _
+};
+
+static u32 session_type_to_next[] = {
+ SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT,
+ SESSION_QUEUE_NEXT_IP4_LOOKUP,
+ SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT,
+ SESSION_QUEUE_NEXT_IP6_LOOKUP,
+};
+
+always_inline void
+session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm,
+ u8 thread_index, svm_fifo_t * fifo,
+ vlib_buffer_t * b0, u32 bi0, u8 n_bufs_per_seg,
+ u32 left_from_seg, u32 * left_to_snd0,
+ u16 * n_bufs, u32 * tx_offset, u16 deq_per_buf,
+ u8 peek_data)
+{
+ vlib_buffer_t *chain_b0, *prev_b0;
+ u32 chain_bi0, to_deq;
+ u16 len_to_deq0, n_bytes_read;
+ u8 *data0, j;
+
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b0->total_length_not_including_first_buffer = 0;
+
+ chain_bi0 = bi0;
+ chain_b0 = b0;
+ to_deq = left_from_seg;
+ for (j = 1; j < n_bufs_per_seg; j++)
+ {
+ prev_b0 = chain_b0;
+ len_to_deq0 = clib_min (to_deq, deq_per_buf);
+
+ *n_bufs -= 1;
+ chain_bi0 = smm->tx_buffers[thread_index][*n_bufs];
+ _vec_len (smm->tx_buffers[thread_index]) = *n_bufs;
+
+ chain_b0 = vlib_get_buffer (vm, chain_bi0);
+ chain_b0->current_data = 0;
+ data0 = vlib_buffer_get_current (chain_b0);
+ if (peek_data)
+ {
+ n_bytes_read = svm_fifo_peek (fifo, *tx_offset, len_to_deq0, data0);
+ *tx_offset += n_bytes_read;
+ }
+ else
+ {
+ n_bytes_read = svm_fifo_dequeue_nowait (fifo, len_to_deq0, data0);
+ }
+ ASSERT (n_bytes_read == len_to_deq0);
+ chain_b0->current_length = n_bytes_read;
+ b0->total_length_not_including_first_buffer += chain_b0->current_length;
+
+ /* update previous buffer */
+ prev_b0->next_buffer = chain_bi0;
+ prev_b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ chain_b0->next_buffer = 0;
+
+ to_deq -= n_bytes_read;
+ if (to_deq == 0)
+ break;
+ }
+ ASSERT (to_deq == 0
+ && b0->total_length_not_including_first_buffer == left_from_seg);
+ *left_to_snd0 -= left_from_seg;
+}
+
+always_inline int
+session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_packets, u8 peek_data)
+{
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 left_to_snd0, max_len_to_snd0, len_to_deq0, snd_space0;
+ u32 n_bufs_per_evt, n_frames_per_evt, n_bufs_per_frame;
+ transport_connection_t *tc0;
+ transport_proto_vft_t *transport_vft;
+ u32 next_index, next0, *to_next, n_left_to_next, bi0;
+ vlib_buffer_t *b0;
+ u32 tx_offset = 0, max_dequeue0, n_bytes_per_seg, left_for_seg;
+ u16 snd_mss0, n_bufs_per_seg, n_bufs;
+ u8 *data0;
+ int i, n_bytes_read;
+ u32 n_bytes_per_buf, deq_per_buf, deq_per_first_buf;
+ u32 buffers_allocated, buffers_allocated_this_call;
+
+ next_index = next0 = session_type_to_next[s0->session_type];
+
+ transport_vft = session_get_transport_vft (s0->session_type);
+ tc0 = transport_vft->get_connection (s0->connection_index, thread_index);
+
+ /* Make sure we have space to send and there's something to dequeue */
+ snd_mss0 = transport_vft->send_mss (tc0);
+ snd_space0 = transport_vft->send_space (tc0);
+
+ /* Can't make any progress */
+ if (snd_space0 == 0 || snd_mss0 == 0)
+ {
+ vec_add1 (smm->pending_event_vector[thread_index], *e0);
+ return 0;
+ }
+
+ /* Check how much we can pull. */
+ max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo);
+
+ if (peek_data)
+ {
+ /* Offset in rx fifo from where to peek data */
+ tx_offset = transport_vft->tx_fifo_offset (tc0);
+ if (PREDICT_FALSE (tx_offset >= max_dequeue0))
+ max_dequeue0 = 0;
+ else
+ max_dequeue0 -= tx_offset;
+ }
+
+ /* Nothing to read return */
+ if (max_dequeue0 == 0)
+ {
+ svm_fifo_unset_event (s0->server_tx_fifo);
+ return 0;
+ }
+
+ /* Ensure we're not writing more than transport window allows */
+ if (max_dequeue0 < snd_space0)
+ {
+ /* Constrained by tx queue. Try to send only fully formed segments */
+ max_len_to_snd0 = (max_dequeue0 > snd_mss0) ?
+ max_dequeue0 - max_dequeue0 % snd_mss0 : max_dequeue0;
+ /* TODO Nagle ? */
+ }
+ else
+ {
+ /* Expectation is that snd_space0 is already a multiple of snd_mss */
+ max_len_to_snd0 = snd_space0;
+ }
+
+ n_bytes_per_buf = vlib_buffer_free_list_buffer_size
+ (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ ASSERT (n_bytes_per_buf > MAX_HDRS_LEN);
+ n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0;
+ n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf);
+ n_bufs_per_evt = ceil ((double) max_len_to_snd0 / n_bytes_per_seg);
+ n_frames_per_evt = ceil ((double) n_bufs_per_evt / VLIB_FRAME_SIZE);
+ n_bufs_per_frame = n_bufs_per_seg * VLIB_FRAME_SIZE;
+
+ deq_per_buf = clib_min (snd_mss0, n_bytes_per_buf);
+ deq_per_first_buf = clib_min (snd_mss0, n_bytes_per_buf - MAX_HDRS_LEN);
+
+ n_bufs = vec_len (smm->tx_buffers[thread_index]);
+ left_to_snd0 = max_len_to_snd0;
+ for (i = 0; i < n_frames_per_evt; i++)
+ {
+ /* Make sure we have at least one full frame of buffers ready */
+ if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
+ {
+ vec_validate (smm->tx_buffers[thread_index],
+ n_bufs + n_bufs_per_frame - 1);
+ buffers_allocated = 0;
+ do
+ {
+ buffers_allocated_this_call = vlib_buffer_alloc (vm,
+ &smm->tx_buffers
+ [thread_index]
+ [n_bufs +
+ buffers_allocated],
+ n_bufs_per_frame
+ -
+ buffers_allocated);
+ buffers_allocated += buffers_allocated_this_call;
+ }
+ while (buffers_allocated_this_call > 0
+ && ((buffers_allocated + n_bufs < n_bufs_per_frame)));
+
+ n_bufs += buffers_allocated;
+ _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+
+ if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
+ {
+ vec_add1 (smm->pending_event_vector[thread_index], *e0);
+ return -1;
+ }
+ ASSERT (n_bufs >= n_bufs_per_frame);
+ }
+ /* Allow enqueuing of a new event */
+ svm_fifo_unset_event (s0->server_tx_fifo);
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (left_to_snd0 && n_left_to_next)
+ {
+ /*
+ * Handle first buffer in chain separately
+ */
+
+ /* Get free buffer */
+ ASSERT (n_bufs >= 1);
+ bi0 = smm->tx_buffers[thread_index][--n_bufs];
+ _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+
+ /* usual speculation, or the enqueue_x1 macro will barf */
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = 0;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b0->current_data = 0;
+ b0->total_length_not_including_first_buffer = 0;
+
+ len_to_deq0 = clib_min (left_to_snd0, deq_per_first_buf);
+ data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
+ if (peek_data)
+ {
+ n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, tx_offset,
+ len_to_deq0, data0);
+ if (n_bytes_read <= 0)
+ goto dequeue_fail;
+ /* Keep track of progress locally, transport is also supposed to
+ * increment it independently when pushing the header */
+ tx_offset += n_bytes_read;
+ }
+ else
+ {
+ n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo,
+ len_to_deq0, data0);
+ if (n_bytes_read <= 0)
+ goto dequeue_fail;
+ }
+
+ b0->current_length = n_bytes_read;
+
+ left_to_snd0 -= n_bytes_read;
+ *n_tx_packets = *n_tx_packets + 1;
+
+ /*
+ * Fill in the remaining buffers in the chain, if any
+ */
+ if (PREDICT_FALSE (n_bufs_per_seg > 1 && left_to_snd0))
+ {
+ left_for_seg = clib_min (snd_mss0 - n_bytes_read, left_to_snd0);
+ session_tx_fifo_chain_tail (smm, vm, thread_index,
+ s0->server_tx_fifo, b0, bi0,
+ n_bufs_per_seg, left_for_seg,
+ &left_to_snd0, &n_bufs, &tx_offset,
+ deq_per_buf, peek_data);
+ }
+
+ /* Ask transport to push header after current_length and
+ * total_length_not_including_first_buffer are updated */
+ transport_vft->push_header (tc0, b0);
+
+ /* *INDENT-OFF* */
+ SESSION_EVT_DBG(SESSION_EVT_DEQ, s0, ({
+ ed->data[0] = e0->event_id;
+ ed->data[1] = max_dequeue0;
+ ed->data[2] = len_to_deq0;
+ ed->data[3] = left_to_snd0;
+ }));
+ /* *INDENT-ON* */
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ if (VLIB_BUFFER_TRACE_TRAJECTORY)
+ b0->pre_data[1] = 3;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ session_queue_trace_t *t0;
+ vlib_trace_buffer (vm, node, next_index, b0,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->session_index = s0->session_index;
+ t0->server_thread_index = s0->thread_index;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* If we couldn't dequeue all bytes mark as partially read */
+ if (max_len_to_snd0 < max_dequeue0)
+ {
+ /* If we don't already have new event */
+ if (svm_fifo_set_event (s0->server_tx_fifo))
+ {
+ vec_add1 (smm->pending_event_vector[thread_index], *e0);
+ }
+ }
+ return 0;
+
+dequeue_fail:
+ /*
+ * Can't read from fifo. If we don't already have an event, save as partially
+ * read, return buff to free list and return
+ */
+ clib_warning ("dequeue fail");
+
+ if (svm_fifo_set_event (s0->server_tx_fifo))
+ {
+ vec_add1 (smm->pending_event_vector[thread_index], *e0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+ _vec_len (smm->tx_buffers[thread_index]) += 1;
+
+ return 0;
+}
+
+int
+session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_pkts)
+{
+ return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+ n_tx_pkts, 1);
+}
+
+int
+session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+ session_manager_main_t * smm,
+ session_fifo_event_t * e0,
+ stream_session_t * s0, u32 thread_index,
+ int *n_tx_pkts)
+{
+ return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+ n_tx_pkts, 0);
+}
+
+always_inline stream_session_t *
+session_event_get_session (session_fifo_event_t * e, u8 thread_index)
+{
+ return stream_session_get_if_valid (e->fifo->master_session_index,
+ thread_index);
+}
+
+void
+dump_thread_0_event_queue (void)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ vlib_main_t *vm = &vlib_global_main;
+ u32 my_thread_index = vm->thread_index;
+ session_fifo_event_t _e, *e = &_e;
+ stream_session_t *s0;
+ int i, index;
+ i8 *headp;
+
+ unix_shared_memory_queue_t *q;
+ q = smm->vpp_event_queues[my_thread_index];
+
+ index = q->head;
+
+ for (i = 0; i < q->cursize; i++)
+ {
+ headp = (i8 *) (&q->data[0] + q->elsize * index);
+ clib_memcpy (e, headp, q->elsize);
+
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_TX:
+ s0 = session_event_get_session (e, my_thread_index);
+ fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index);
+ break;
+
+ case FIFO_EVENT_DISCONNECT:
+ s0 = stream_session_get_from_handle (e->session_handle);
+ fformat (stdout, "[%04d] disconnect session %d\n", i,
+ s0->session_index);
+ break;
+
+ case FIFO_EVENT_BUILTIN_RX:
+ s0 = session_event_get_session (e, my_thread_index);
+ fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index);
+ break;
+
+ case FIFO_EVENT_RPC:
+ fformat (stdout, "[%04d] RPC call %llx with %llx\n",
+ i, (u64) (e->rpc_args.fp), (u64) (e->rpc_args.arg));
+ break;
+
+ default:
+ fformat (stdout, "[%04d] unhandled event type %d\n",
+ i, e->event_type);
+ break;
+ }
+
+ index++;
+
+ if (index == q->maxsize)
+ index = 0;
+ }
+}
+
+static u8
+session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f)
+{
+ stream_session_t *s;
+ switch (e->event_type)
+ {
+ case FIFO_EVENT_APP_RX:
+ case FIFO_EVENT_APP_TX:
+ case FIFO_EVENT_BUILTIN_RX:
+ if (e->fifo == f)
+ return 1;
+ break;
+ case FIFO_EVENT_DISCONNECT:
+ break;
+ case FIFO_EVENT_RPC:
+ s = stream_session_get_from_handle (e->session_handle);
+ if (!s)
+ {
+ clib_warning ("session has event but doesn't exist!");
+ break;
+ }
+ if (s->server_rx_fifo == f || s->server_tx_fifo == f)
+ return 1;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+u8
+session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ unix_shared_memory_queue_t *q;
+ session_fifo_event_t *pending_event_vector, *evt;
+ int i, index, found = 0;
+ i8 *headp;
+ u8 thread_index;
+
+ ASSERT (e);
+ thread_index = f->master_thread_index;
+ /*
+ * Search evt queue
+ */
+ q = smm->vpp_event_queues[thread_index];
+ index = q->head;
+ for (i = 0; i < q->cursize; i++)
+ {
+ headp = (i8 *) (&q->data[0] + q->elsize * index);
+ clib_memcpy (e, headp, q->elsize);
+ found = session_node_cmp_event (e, f);
+ if (found)
+ break;
+ if (++index == q->maxsize)
+ index = 0;
+ }
+ /*
+ * Search pending events vector
+ */
+ pending_event_vector = smm->pending_event_vector[thread_index];
+ vec_foreach (evt, pending_event_vector)
+ {
+ found = session_node_cmp_event (evt, f);
+ if (found)
+ {
+ clib_memcpy (e, evt, sizeof (*evt));
+ break;
+ }
+ }
+ return found;
+}
+
+static uword
+session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ session_fifo_event_t *my_pending_event_vector, *e;
+ session_fifo_event_t *my_fifo_events;
+ u32 n_to_dequeue, n_events;
+ unix_shared_memory_queue_t *q;
+ application_t *app;
+ int n_tx_packets = 0;
+ u32 my_thread_index = vm->thread_index;
+ int i, rv;
+ f64 now = vlib_time_now (vm);
+ void (*fp) (void *);
+
+ SESSION_EVT_DBG (SESSION_EVT_POLL_GAP_TRACK, smm, my_thread_index);
+
+ /*
+ * Update TCP time
+ */
+ tcp_update_time (now, my_thread_index);
+
+ /*
+ * Get vpp queue events
+ */
+ q = smm->vpp_event_queues[my_thread_index];
+ if (PREDICT_FALSE (q == 0))
+ return 0;
+
+ my_fifo_events = smm->free_event_vector[my_thread_index];
+
+ /* min number of events we can dequeue without blocking */
+ n_to_dequeue = q->cursize;
+ my_pending_event_vector = smm->pending_event_vector[my_thread_index];
+
+ if (n_to_dequeue == 0 && vec_len (my_pending_event_vector) == 0)
+ return 0;
+
+ SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 0);
+
+ /*
+ * If we didn't manage to process previous events try going
+ * over them again without dequeuing new ones.
+ */
+ /* XXX: Block senders to sessions that can't keep up */
+ if (0 && vec_len (my_pending_event_vector) >= 100)
+ {
+ clib_warning ("too many fifo events unsolved");
+ goto skip_dequeue;
+ }
+
+ /* See you in the next life, don't be late */
+ if (pthread_mutex_trylock (&q->mutex))
+ return 0;
+
+ for (i = 0; i < n_to_dequeue; i++)
+ {
+ vec_add2 (my_fifo_events, e, 1);
+ unix_shared_memory_queue_sub_raw (q, (u8 *) e);
+ }
+
+ /* The other side of the connection is not polling */
+ if (q->cursize < (q->maxsize / 8))
+ (void) pthread_cond_broadcast (&q->condvar);
+ pthread_mutex_unlock (&q->mutex);
+
+ vec_append (my_fifo_events, my_pending_event_vector);
+
+ _vec_len (my_pending_event_vector) = 0;
+ smm->pending_event_vector[my_thread_index] = my_pending_event_vector;
+
+skip_dequeue:
+ n_events = vec_len (my_fifo_events);
+ for (i = 0; i < n_events; i++)
+ {
+ stream_session_t *s0; /* $$$ prefetch 1 ahead maybe */
+ session_fifo_event_t *e0;
+
+ e0 = &my_fifo_events[i];
+
+ switch (e0->event_type)
+ {
+ case FIFO_EVENT_APP_TX:
+ s0 = session_event_get_session (e0, my_thread_index);
+
+ if (PREDICT_FALSE (!s0))
+ {
+ clib_warning ("It's dead, Jim!");
+ continue;
+ }
+ /* Can retransmit for closed sessions but can't do anything if
+ * session is not ready or closed */
+ if (PREDICT_FALSE (s0->session_state < SESSION_STATE_READY))
+ continue;
+ /* Spray packets in per session type frames, since they go to
+ * different nodes */
+ rv = (smm->session_tx_fns[s0->session_type]) (vm, node, smm, e0, s0,
+ my_thread_index,
+ &n_tx_packets);
+ /* Out of buffers */
+ if (PREDICT_FALSE (rv < 0))
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ SESSION_QUEUE_ERROR_NO_BUFFER, 1);
+ continue;
+ }
+ break;
+ case FIFO_EVENT_DISCONNECT:
+ s0 = stream_session_get_from_handle (e0->session_handle);
+ stream_session_disconnect (s0);
+ break;
+ case FIFO_EVENT_BUILTIN_RX:
+ s0 = session_event_get_session (e0, my_thread_index);
+ if (PREDICT_FALSE (!s0))
+ continue;
+ svm_fifo_unset_event (s0->server_rx_fifo);
+ app = application_get (s0->app_index);
+ app->cb_fns.builtin_server_rx_callback (s0);
+ break;
+ case FIFO_EVENT_RPC:
+ fp = e0->rpc_args.fp;
+ (*fp) (e0->rpc_args.arg);
+ break;
+
+ default:
+ clib_warning ("unhandled event type %d", e0->event_type);
+ }
+ }
+
+ _vec_len (my_fifo_events) = 0;
+ smm->free_event_vector[my_thread_index] = my_fifo_events;
+
+ vlib_node_increment_counter (vm, session_queue_node.index,
+ SESSION_QUEUE_ERROR_TX, n_tx_packets);
+
+ SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 1);
+
+ return n_tx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (session_queue_node) =
+{
+ .function = session_queue_node_fn,
+ .name = "session-queue",
+ .format_trace = format_session_queue_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .n_errors = ARRAY_LEN (session_queue_error_strings),
+ .error_strings = session_queue_error_strings,
+ .n_next_nodes = SESSION_QUEUE_N_NEXT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .next_nodes =
+ {
+ [SESSION_QUEUE_NEXT_DROP] = "error-drop",
+ [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output",
+ [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/stream_session.h b/src/vnet/session/stream_session.h
new file mode 100644
index 00000000..275052d3
--- /dev/null
+++ b/src/vnet/session/stream_session.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_SESSION_STREAM_SESSION_H_
+#define SRC_VNET_SESSION_STREAM_SESSION_H_
+
+#include <vnet/vnet.h>
+#include <svm/svm_fifo.h>
+
+#define foreach_session_type \
+ _(IP4_TCP, ip4_tcp) \
+ _(IP4_UDP, ip4_udp) \
+ _(IP6_TCP, ip6_tcp) \
+ _(IP6_UDP, ip6_udp)
+
+typedef enum
+{
+#define _(A, a) SESSION_TYPE_##A,
+ foreach_session_type
+#undef _
+ SESSION_N_TYPES,
+} session_type_t;
+
+/*
+ * Application session state
+ */
+typedef enum
+{
+ SESSION_STATE_LISTENING,
+ SESSION_STATE_CONNECTING,
+ SESSION_STATE_ACCEPTING,
+ SESSION_STATE_READY,
+ SESSION_STATE_CLOSED,
+ SESSION_STATE_N_STATES,
+} stream_session_state_t;
+
+typedef struct _stream_session_t
+{
+ /** fifo pointers. Once allocated, these do not move */
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+
+ /** Type */
+ u8 session_type;
+
+ /** State */
+ volatile u8 session_state;
+
+ u8 thread_index;
+
+ /** To avoid n**2 "one event per frame" check */
+ u8 enqueue_epoch;
+
+ /** svm segment index where fifos were allocated */
+ u32 svm_segment_index;
+
+ /** Session index in per_thread pool */
+ u32 session_index;
+
+ /** Transport specific */
+ u32 connection_index;
+
+ /** stream server pool index */
+ u32 app_index;
+
+ /** Parent listener session if the result of an accept */
+ u32 listener_index;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (pad);
+} stream_session_t;
+
+#endif /* SRC_VNET_SESSION_STREAM_SESSION_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
new file mode 100644
index 00000000..e2c47949
--- /dev/null
+++ b/src/vnet/session/transport.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_VNET_URI_TRANSPORT_H_
+#define VNET_VNET_URI_TRANSPORT_H_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vnet/tcp/tcp_debug.h>
+
+/*
+ * Protocol independent transport properties associated to a session
+ */
+typedef struct _transport_connection
+{
+ ip46_address_t rmt_ip; /**< Remote IP */
+ ip46_address_t lcl_ip; /**< Local IP */
+ u16 lcl_port; /**< Local port */
+ u16 rmt_port; /**< Remote port */
+ u8 transport_proto; /**< Protocol id */
+ u8 is_ip4; /**< Flag if IP4 connection */
+ u32 vrf; /**< FIB table id */
+
+ u32 s_index; /**< Parent session index */
+ u32 c_index; /**< Connection index in transport pool */
+ u32 thread_index; /**< Worker-thread index */
+
+ fib_node_index_t rmt_fei; /**< FIB entry index for rmt */
+ dpo_id_t rmt_dpo; /**< Forwarding DPO for rmt */
+
+#if TRANSPORT_DEBUG
+ elog_track_t elog_track; /**< Event logging */
+ u32 cc_stat_tstamp; /**< CC stats timestamp */
+#endif
+
+ /** Macros for 'derived classes' where base is named "connection" */
+#define c_lcl_ip connection.lcl_ip
+#define c_rmt_ip connection.rmt_ip
+#define c_lcl_ip4 connection.lcl_ip.ip4
+#define c_rmt_ip4 connection.rmt_ip.ip4
+#define c_lcl_ip6 connection.lcl_ip.ip6
+#define c_rmt_ip6 connection.rmt_ip.ip6
+#define c_lcl_port connection.lcl_port
+#define c_rmt_port connection.rmt_port
+#define c_transport_proto connection.transport_proto
+#define c_vrf connection.vrf
+#define c_state connection.state
+#define c_s_index connection.s_index
+#define c_c_index connection.c_index
+#define c_is_ip4 connection.is_ip4
+#define c_thread_index connection.thread_index
+#define c_elog_track connection.elog_track
+#define c_cc_stat_tstamp connection.cc_stat_tstamp
+#define c_rmt_fei connection.rmt_fei
+#define c_rmt_dpo connection.rmt_dpo
+} transport_connection_t;
+
+typedef enum _transport_proto
+{
+ TRANSPORT_PROTO_TCP,
+ TRANSPORT_PROTO_UDP
+} transport_proto_t;
+
+typedef struct _transport_endpoint
+{
+ ip46_address_t ip; /** ip address */
+ u16 port; /** port in net order */
+ u8 is_ip4; /** 1 if ip4 */
+ u32 vrf; /** fib table the endpoint is associated with */
+} transport_endpoint_t;
+
+#endif /* VNET_VNET_URI_TRANSPORT_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/transport_interface.c b/src/vnet/session/transport_interface.c
new file mode 100644
index 00000000..ef8d1e49
--- /dev/null
+++ b/src/vnet/session/transport_interface.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/transport_interface.h>
+#include <vnet/session/session.h>
+
+/**
+ * Per-type vector of transport protocol virtual function tables
+ */
+transport_proto_vft_t *tp_vfts;
+
+u32
+transport_endpoint_lookup (transport_endpoint_table_t * ht,
+ ip46_address_t * ip, u16 port)
+{
+ clib_bihash_kv_24_8_t kv;
+ int rv;
+
+ kv.key[0] = ip->as_u64[0];
+ kv.key[1] = ip->as_u64[1];
+ kv.key[2] = port;
+
+ rv = clib_bihash_search_inline_24_8 (ht, &kv);
+ if (rv == 0)
+ return kv.value;
+
+ return TRANSPORT_ENDPOINT_INVALID_INDEX;
+}
+
+void
+transport_endpoint_table_add (transport_endpoint_table_t * ht,
+ transport_endpoint_t * te, u32 value)
+{
+ clib_bihash_kv_24_8_t kv;
+
+ kv.key[0] = te->ip.as_u64[0];
+ kv.key[1] = te->ip.as_u64[1];
+ kv.key[2] = te->port;
+ kv.value = value;
+
+ clib_bihash_add_del_24_8 (ht, &kv, 1);
+}
+
+void
+transport_endpoint_table_del (transport_endpoint_table_t * ht,
+ transport_endpoint_t * te)
+{
+ clib_bihash_kv_24_8_t kv;
+
+ kv.key[0] = te->ip.as_u64[0];
+ kv.key[1] = te->ip.as_u64[1];
+ kv.key[2] = te->port;
+
+ clib_bihash_add_del_24_8 (ht, &kv, 0);
+}
+
+/**
+ * Register transport virtual function table.
+ *
+ * @param type - session type (not protocol type)
+ * @param vft - virtual function table
+ */
+void
+session_register_transport (transport_proto_t transport_proto, u8 is_ip4,
+ const transport_proto_vft_t * vft)
+{
+ u8 session_type;
+ session_type = session_type_from_proto_and_ip (transport_proto, is_ip4);
+
+ vec_validate (tp_vfts, session_type);
+ tp_vfts[session_type] = *vft;
+
+ /* If an offset function is provided, then peek instead of dequeue */
+ session_manager_set_transport_rx_fn (session_type,
+ vft->tx_fifo_offset != 0);
+}
+
+/**
+ * Get transport virtual function table
+ *
+ * @param type - session type (not protocol type)
+ */
+transport_proto_vft_t *
+session_get_transport_vft (u8 session_type)
+{
+ if (session_type >= vec_len (tp_vfts))
+ return 0;
+ return &tp_vfts[session_type];
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/transport_interface.h b/src/vnet/session/transport_interface.h
new file mode 100644
index 00000000..661221c4
--- /dev/null
+++ b/src/vnet/session/transport_interface.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_
+#define SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_
+
+#include <vnet/vnet.h>
+#include <vnet/session/transport.h>
+
+/*
+ * Transport protocol virtual function table
+ */
+typedef struct _transport_proto_vft
+{
+ /*
+ * Setup
+ */
+ u32 (*bind) (u32 session_index, transport_endpoint_t * lcl);
+ u32 (*unbind) (u32);
+ int (*open) (transport_endpoint_t * rmt);
+ void (*close) (u32 conn_index, u32 thread_index);
+ void (*cleanup) (u32 conn_index, u32 thread_index);
+
+ /*
+ * Transmission
+ */
+ u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
+ u16 (*send_mss) (transport_connection_t * tc);
+ u32 (*send_space) (transport_connection_t * tc);
+ u32 (*tx_fifo_offset) (transport_connection_t * tc);
+
+ /*
+ * Connection retrieval
+ */
+ transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx);
+ transport_connection_t *(*get_listener) (u32 conn_index);
+ transport_connection_t *(*get_half_open) (u32 conn_index);
+
+ /*
+ * Format
+ */
+ u8 *(*format_connection) (u8 * s, va_list * args);
+ u8 *(*format_listener) (u8 * s, va_list * args);
+ u8 *(*format_half_open) (u8 * s, va_list * args);
+} transport_proto_vft_t;
+
+typedef clib_bihash_24_8_t transport_endpoint_table_t;
+
+#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0)
+
+u32 transport_endpoint_lookup (transport_endpoint_table_t * ht,
+ ip46_address_t * ip, u16 port);
+void transport_endpoint_table_add (transport_endpoint_table_t * ht,
+ transport_endpoint_t * te, u32 value);
+void transport_endpoint_table_del (transport_endpoint_table_t * ht,
+ transport_endpoint_t * te);
+
+void session_register_transport (transport_proto_t transport_proto, u8 is_ip4,
+ const transport_proto_vft_t * vft);
+transport_proto_vft_t *session_get_transport_vft (u8 session_type);
+
+#endif /* SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/snap/node.c b/src/vnet/snap/node.c
new file mode 100644
index 00000000..884ff324
--- /dev/null
+++ b/src/vnet/snap/node.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * snap_node.c: snap packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/llc/llc.h>
+#include <vnet/snap/snap.h>
+
+typedef enum
+{
+ SNAP_INPUT_NEXT_DROP,
+ SNAP_INPUT_NEXT_PUNT,
+ SNAP_INPUT_NEXT_ETHERNET_TYPE,
+ SNAP_INPUT_N_NEXT,
+} snap_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} snap_input_trace_t;
+
+static u8 *
+format_snap_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ snap_input_trace_t *t = va_arg (*va, snap_input_trace_t *);
+
+ s = format (s, "%U", format_snap_header, t->packet_data);
+
+ return s;
+}
+
+static uword
+snap_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ snap_main_t *sm = &snap_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (snap_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ snap_header_t *h0, *h1;
+ snap_protocol_info_t *pi0, *pi1;
+ u8 next0, next1, is_ethernet0, is_ethernet1, len0, len1,
+ enqueue_code;
+ u32 oui0, oui1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = (void *) (b0->data + b0->current_data);
+ h1 = (void *) (b1->data + b1->current_data);
+
+ oui0 = snap_header_get_oui (h0);
+ oui1 = snap_header_get_oui (h1);
+
+ is_ethernet0 = oui0 == IEEE_OUI_ethernet;
+ is_ethernet1 = oui1 == IEEE_OUI_ethernet;
+
+ len0 = sizeof (h0[0]) - (is_ethernet0 ? sizeof (h0->protocol) : 0);
+ len1 = sizeof (h1[0]) - (is_ethernet1 ? sizeof (h1->protocol) : 0);
+
+ b0->current_data += len0;
+ b1->current_data += len1;
+
+ b0->current_length -= len0;
+ b1->current_length -= len1;
+
+ pi0 = snap_get_protocol_info (sm, h0);
+ pi1 = snap_get_protocol_info (sm, h1);
+
+ next0 = pi0 ? pi0->next_index : SNAP_INPUT_NEXT_DROP;
+ next1 = pi1 ? pi1->next_index : SNAP_INPUT_NEXT_DROP;
+
+ next0 = is_ethernet0 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next0;
+ next1 = is_ethernet1 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next1;
+
+ /* In case of error. */
+ b0->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL];
+ b1->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL];
+
+ enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ snap_header_t *h0;
+ snap_protocol_info_t *pi0;
+ u8 next0, is_ethernet0, len0;
+ u32 oui0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ oui0 = snap_header_get_oui (h0);
+
+ is_ethernet0 = oui0 == IEEE_OUI_ethernet;
+
+ len0 = sizeof (h0[0]) - (is_ethernet0 ? sizeof (h0->protocol) : 0);
+
+ b0->current_data += len0;
+
+ b0->current_length -= len0;
+
+ pi0 = snap_get_protocol_info (sm, h0);
+
+ next0 = pi0 ? pi0->next_index : SNAP_INPUT_NEXT_DROP;
+
+ next0 = is_ethernet0 ? SNAP_INPUT_NEXT_ETHERNET_TYPE : next0;
+
+ /* In case of error. */
+ b0->error = node->errors[SNAP_ERROR_UNKNOWN_PROTOCOL];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *snap_error_strings[] = {
+#define _(f,s) s,
+ foreach_snap_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (snap_input_node) = {
+ .function = snap_input,
+ .name = "snap-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = SNAP_N_ERROR,
+ .error_strings = snap_error_strings,
+
+ .n_next_nodes = SNAP_INPUT_N_NEXT,
+ .next_nodes = {
+ [SNAP_INPUT_NEXT_DROP] = "error-drop",
+ [SNAP_INPUT_NEXT_PUNT] = "error-punt",
+ [SNAP_INPUT_NEXT_ETHERNET_TYPE] = "ethernet-input-type",
+ },
+
+ .format_buffer = format_snap_header_with_length,
+ .format_trace = format_snap_input_trace,
+ .unformat_buffer = unformat_snap_header,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+snap_input_init (vlib_main_t * vm)
+{
+ {
+ clib_error_t *error = vlib_call_init_function (vm, snap_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ snap_setup_node (vm, snap_input_node.index);
+
+ llc_register_input_protocol (vm, LLC_PROTOCOL_snap, snap_input_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (snap_input_init);
+
+void
+snap_register_input_protocol (vlib_main_t * vm,
+ char *name,
+ u32 ieee_oui, u16 protocol, u32 node_index)
+{
+ snap_main_t *sm = &snap_main;
+ snap_protocol_info_t *pi;
+ snap_header_t h;
+ snap_oui_and_protocol_t key;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, snap_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ h.protocol = clib_host_to_net_u16 (protocol);
+ h.oui[0] = (ieee_oui >> 16) & 0xff;
+ h.oui[1] = (ieee_oui >> 8) & 0xff;
+ h.oui[2] = (ieee_oui >> 0) & 0xff;
+ pi = snap_get_protocol_info (sm, &h);
+ if (pi)
+ return;
+
+ vec_add2 (sm->protocols, pi, 1);
+
+ pi->name = format (0, "%s", name);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, snap_input_node.index, node_index);
+
+ key.oui = ieee_oui;
+ key.protocol = clib_host_to_net_u16 (protocol);
+
+ mhash_set (&sm->protocol_hash, &key, pi - sm->protocols, /* old_value */ 0);
+ hash_set_mem (sm->protocol_info_by_name, name, pi - sm->protocols);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/snap/pg.c b/src/vnet/snap/pg.c
new file mode 100644
index 00000000..aad125f3
--- /dev/null
+++ b/src/vnet/snap/pg.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * snap_pg.c: packet generator snap interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/snap/snap.h>
+
+typedef struct
+{
+ pg_edit_t oui;
+ pg_edit_t protocol;
+} pg_snap_header_t;
+
+static inline void
+pg_snap_header_init (pg_snap_header_t * e)
+{
+ pg_edit_init (&e->oui, snap_header_t, oui);
+ pg_edit_init (&e->protocol, snap_header_t, protocol);
+}
+
+uword
+unformat_pg_snap_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_snap_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (snap_header_t),
+ &group_index);
+ pg_snap_header_init (h);
+
+ error = 1;
+ if (!unformat (input, "%U -> %U",
+ unformat_pg_edit,
+ unformat_snap_protocol, &h->oui, &h->protocol))
+ goto done;
+
+ {
+ snap_main_t *pm = &snap_main;
+ snap_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->oui.type == PG_EDIT_FIXED && h->protocol.type == PG_EDIT_FIXED)
+ {
+ u8 *o = h->oui.values[PG_EDIT_LO];
+ u8 *p = h->protocol.values[PG_EDIT_LO];
+ snap_header_t h;
+
+ h.oui[0] = o[0];
+ h.oui[1] = o[1];
+ h.oui[2] = o[2];
+ h.protocol = *(u16 *) p;
+ pi = snap_get_protocol_info (pm, &h);
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/snap/snap.c b/src/vnet/snap/snap.c
new file mode 100644
index 00000000..64482bfc
--- /dev/null
+++ b/src/vnet/snap/snap.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * snap.c: snap support
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/snap/snap.h>
+#include <vnet/ethernet/ethernet.h>
+
+/* Global main structure. */
+snap_main_t snap_main;
+
+static u8 *
+format_cisco_snap_protocol (u8 * s, va_list * args)
+{
+ snap_header_t *h = va_arg (*args, snap_header_t *);
+ u16 protocol = clib_net_to_host_u16 (h->protocol);
+ char *t = 0;
+ switch (protocol)
+ {
+#define _(n,f) case n: t = #f; break;
+ foreach_snap_cisco_protocol;
+#undef _
+ default:
+ break;
+ }
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", protocol);
+}
+
+u8 *
+format_snap_protocol (u8 * s, va_list * args)
+{
+ snap_header_t *h = va_arg (*args, snap_header_t *);
+ u32 oui = snap_header_get_oui (h);
+ u16 protocol = clib_net_to_host_u16 (h->protocol);
+
+ switch (oui)
+ {
+ case IEEE_OUI_ethernet:
+ return format (s, "ethernet %U", format_ethernet_type, h->protocol);
+
+ case IEEE_OUI_cisco:
+ return format (s, "cisco %U", format_cisco_snap_protocol, h);
+
+ default:
+ return format (s, "oui 0x%06x 0x%04x", oui, protocol);
+ }
+}
+
+u8 *
+format_snap_header_with_length (u8 * s, va_list * args)
+{
+ snap_main_t *sm = &snap_main;
+ snap_header_t *h = va_arg (*args, snap_header_t *);
+ snap_protocol_info_t *pi = snap_get_protocol_info (sm, h);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "snap header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "SNAP %U", format_snap_protocol, h);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes && pi != 0)
+ {
+ vlib_node_t *node = vlib_get_node (sm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_snap_header (u8 * s, va_list * args)
+{
+ snap_header_t *h = va_arg (*args, snap_header_t *);
+ return format (s, "%U", format_snap_header_with_length, h, 0);
+}
+
+/* Returns snap protocol as an int in host byte order. */
+uword
+unformat_snap_protocol (unformat_input_t * input, va_list * args)
+{
+ snap_header_t *result = va_arg (*args, snap_header_t *);
+ snap_main_t *sm = &snap_main;
+ snap_oui_and_protocol_t p;
+ u32 i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x 0x%x", &p.oui, &p.protocol))
+ {
+ if (p.oui >= (1 << 24))
+ return 0;
+ if (p.protocol >= (1 << 16))
+ return 0;
+ }
+
+ /* Named type. */
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ sm->protocol_info_by_name, &i))
+ {
+ snap_protocol_info_t *pi = vec_elt_at_index (sm->protocols, i);
+ p = pi->oui_and_protocol;
+ }
+
+ else
+ return 0;
+
+ snap_header_set_protocol (result, &p);
+ return 1;
+}
+
+uword
+unformat_snap_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ snap_header_t _h, *h = &_h;
+
+ if (!unformat (input, "%U", unformat_snap_protocol, h))
+ return 0;
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static clib_error_t *
+snap_init (vlib_main_t * vm)
+{
+ snap_main_t *sm = &snap_main;
+
+ memset (sm, 0, sizeof (sm[0]));
+ sm->vlib_main = vm;
+
+ mhash_init (&sm->protocol_hash, sizeof (uword),
+ sizeof (snap_oui_and_protocol_t));
+
+ sm->protocol_info_by_name
+ = hash_create_string ( /* elts */ 0, sizeof (uword));
+
+ return vlib_call_init_function (vm, snap_input_init);
+}
+
+VLIB_INIT_FUNCTION (snap_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/snap/snap.h b/src/vnet/snap/snap.h
new file mode 100644
index 00000000..7cd453e7
--- /dev/null
+++ b/src/vnet/snap/snap.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * snap.h: SNAP definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_snap_h
+#define included_snap_h
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+
+#define foreach_ieee_oui \
+ _ (0x000000, ethernet) \
+ _ (0x00000c, cisco)
+
+typedef enum
+{
+#define _(n,f) IEEE_OUI_##f = n,
+ foreach_ieee_oui
+#undef _
+} ieee_oui_t;
+
+#define foreach_snap_cisco_protocol \
+ _ (0x0102, drip) \
+ _ (0x0104, port_aggregation_protocol) \
+ _ (0x0105, mls_hello) \
+ _ (0x010b, per_vlan_spanning_tree) \
+ _ (0x010c, vlan_bridge) \
+ _ (0x0111, unidirectional_link_detection) \
+ _ (0x2000, cdp) \
+ _ (0x2001, cgmp) \
+ _ (0x2003, vtp) \
+ _ (0x2004, dtp) \
+ _ (0x200a, stp_uplink_fast)
+
+typedef enum
+{
+#define _(n,f) SNAP_cisco_##f = n,
+ foreach_snap_cisco_protocol
+#undef _
+} snap_cisco_protocol_t;
+
+typedef union
+{
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ /* OUI: organization unique identifier. */
+ u8 oui[3];
+
+ /* Per-OUI protocol. */
+ u16 protocol;
+ });
+ /* *INDENT-ON* */
+
+ u8 as_u8[5];
+} snap_header_t;
+
+typedef struct
+{
+ u32 oui;
+ u32 protocol;
+} snap_oui_and_protocol_t;
+
+typedef struct
+{
+ /* Name vector string. */
+ u8 *name;
+
+ snap_oui_and_protocol_t oui_and_protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* snap-input next index for this type. */
+ u32 next_index;
+} snap_protocol_info_t;
+
+always_inline void
+snap_header_set_protocol (snap_header_t * h, snap_oui_and_protocol_t * p)
+{
+ u16 protocol = p->protocol;
+ u32 oui = p->oui;
+ h->protocol = clib_host_to_net_u16 (protocol);
+ h->oui[0] = (oui >> 16) & 0xff;
+ h->oui[1] = (oui >> 8) & 0xff;
+ h->oui[2] = (oui >> 0) & 0xff;
+}
+
+#define foreach_snap_error \
+ _ (NONE, "no error") \
+ _ (UNKNOWN_PROTOCOL, "unknown oui/snap protocol")
+
+typedef enum
+{
+#define _(f,s) SNAP_ERROR_##f,
+ foreach_snap_error
+#undef _
+ SNAP_N_ERROR,
+} snap_error_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ /* Vector of known SNAP oui/protocol pairs. */
+ snap_protocol_info_t *protocols;
+
+ /* Hash table mapping oui/protocol to protocol index. */
+ mhash_t protocol_hash;
+
+ /* Hash table mapping protocol by name. */
+ uword *protocol_info_by_name;
+} snap_main_t;
+
+always_inline u32
+snap_header_get_oui (snap_header_t * h)
+{
+ return (h->oui[0] << 16) | (h->oui[1] << 8) | h->oui[2];
+}
+
+always_inline snap_protocol_info_t *
+snap_get_protocol_info (snap_main_t * sm, snap_header_t * h)
+{
+ snap_oui_and_protocol_t key;
+ uword *p;
+
+ key.oui = snap_header_get_oui (h);
+ key.protocol = h->protocol;
+
+ p = mhash_get (&sm->protocol_hash, &key);
+ return p ? vec_elt_at_index (sm->protocols, p[0]) : 0;
+}
+
+snap_main_t snap_main;
+
+/* Register given node index to take input for given snap type. */
+void
+snap_register_input_protocol (vlib_main_t * vm,
+ char *name,
+ u32 ieee_oui, u16 protocol, u32 node_index);
+
+format_function_t format_snap_protocol;
+format_function_t format_snap_header;
+format_function_t format_snap_header_with_length;
+
+/* Parse snap protocol as 0xXXXX or protocol name. */
+unformat_function_t unformat_snap_protocol;
+
+/* Parse snap header. */
+unformat_function_t unformat_snap_header;
+unformat_function_t unformat_pg_snap_header;
+
+always_inline void
+snap_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_snap_header_with_length;
+ n->unformat_buffer = unformat_snap_header;
+ pn->unformat_edit = unformat_pg_snap_header;
+}
+
+#endif /* included_snap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c
new file mode 100644
index 00000000..9d83d4ef
--- /dev/null
+++ b/src/vnet/span/node.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+
+#include <vnet/span/span.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+vlib_node_registration_t span_node;
+
+/* packet trace format function */
+u8 *
+format_span_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ span_trace_t *t = va_arg (*args, span_trace_t *);
+
+ vnet_main_t *vnm = &vnet_main;
+ s = format (s, "SPAN: mirrored %U -> %U",
+ format_vnet_sw_if_index_name, vnm, t->src_sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->mirror_sw_if_index);
+
+ return s;
+}
+
+#define foreach_span_error \
+_(HITS, "SPAN incomming packets processed")
+
+typedef enum
+{
+#define _(sym,str) SPAN_ERROR_##sym,
+ foreach_span_error
+#undef _
+ SPAN_N_ERROR,
+} span_error_t;
+
+static char *span_error_strings[] = {
+#define _(sym,string) string,
+ foreach_span_error
+#undef _
+};
+
+static_always_inline void
+span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
+ vlib_buffer_t * b0, vlib_frame_t ** mirror_frames,
+ vlib_rx_or_tx_t rxtx, span_feat_t sf)
+{
+ vlib_buffer_t *c0;
+ span_main_t *sm = &span_main;
+ vnet_main_t *vnm = &vnet_main;
+ u32 *to_mirror_next = 0;
+ u32 i;
+
+ span_interface_t *si0 = vec_elt_at_index (sm->interfaces, sw_if_index0);
+ span_mirror_t *sm0 = &si0->mirror_rxtx[sf][rxtx];
+
+ if (sm0->num_mirror_ports == 0)
+ return;
+
+ /* Don't do it again */
+ if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE))
+ return;
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, sm0->mirror_ports, (
+ {
+ if (mirror_frames[i] == 0)
+ {
+ if (sf == SPAN_FEAT_L2)
+ mirror_frames[i] = vlib_get_frame_to_node (vnm->vlib_main, l2output_node.index);
+ else
+ mirror_frames[i] = vnet_get_frame_to_sw_interface (vnm, i);
+ }
+ to_mirror_next = vlib_frame_vector_args (mirror_frames[i]);
+ to_mirror_next += mirror_frames[i]->n_vectors;
+ /* This can fail */
+ c0 = vlib_buffer_copy (vm, b0);
+ if (PREDICT_TRUE(c0 != 0))
+ {
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] = i;
+ c0->flags |= VNET_BUFFER_F_SPAN_CLONE;
+ if (sf == SPAN_FEAT_L2)
+ vnet_buffer (c0)->l2.feature_bitmap = L2OUTPUT_FEAT_OUTPUT;
+ to_mirror_next[0] = vlib_get_buffer_index (vm, c0);
+ mirror_frames[i]->n_vectors++;
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ span_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->src_sw_if_index = sw_if_index0;
+ t->mirror_sw_if_index = i;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static_always_inline uword
+span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t rxtx,
+ span_feat_t sf)
+{
+ span_main_t *sm = &span_main;
+ vnet_main_t *vnm = &vnet_main;
+ u32 n_left_from, *from, *to_next;
+ u32 n_span_packets = 0;
+ u32 next_index;
+ u32 sw_if_index;
+ static __thread vlib_frame_t **mirror_frames = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ vec_validate_aligned (mirror_frames, sm->max_sw_if_index,
+ CLIB_CACHE_LINE_BYTES);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0;
+ u32 bi1;
+ vlib_buffer_t *b0;
+ vlib_buffer_t *b1;
+ u32 sw_if_index0;
+ u32 next0 = 0;
+ u32 sw_if_index1;
+ u32 next1 = 0;
+
+ /* speculatively enqueue b0, b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next += 2;
+ n_left_to_next -= 2;
+ from += 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[rxtx];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[rxtx];
+
+ span_mirror (vm, node, sw_if_index0, b0, mirror_frames, rxtx, sf);
+ span_mirror (vm, node, sw_if_index1, b1, mirror_frames, rxtx, sf);
+
+ switch (sf)
+ {
+ case SPAN_FEAT_L2:
+ if (rxtx == VLIB_RX)
+ {
+ next0 = vnet_l2_feature_next (b0, sm->l2_input_next,
+ L2INPUT_FEAT_SPAN);
+ next1 = vnet_l2_feature_next (b1, sm->l2_input_next,
+ L2INPUT_FEAT_SPAN);
+ }
+ else
+ {
+ next0 = vnet_l2_feature_next (b0, sm->l2_output_next,
+ L2OUTPUT_FEAT_SPAN);
+ next1 = vnet_l2_feature_next (b1, sm->l2_output_next,
+ L2OUTPUT_FEAT_SPAN);
+ }
+ break;
+ case SPAN_FEAT_DEVICE:
+ default:
+ vnet_feature_next (sw_if_index0, &next0, b0);
+ vnet_feature_next (sw_if_index1, &next1, b1);
+ break;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 next0 = 0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next += 1;
+ n_left_to_next -= 1;
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[rxtx];
+
+ span_mirror (vm, node, sw_if_index0, b0, mirror_frames, rxtx, sf);
+
+ switch (sf)
+ {
+ case SPAN_FEAT_L2:
+ if (rxtx == VLIB_RX)
+ next0 = vnet_l2_feature_next (b0, sm->l2_input_next,
+ L2INPUT_FEAT_SPAN);
+ else
+ next0 = vnet_l2_feature_next (b0, sm->l2_output_next,
+ L2OUTPUT_FEAT_SPAN);
+ break;
+ case SPAN_FEAT_DEVICE:
+ default:
+ vnet_feature_next (sw_if_index0, &next0, b0);
+ break;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+
+ for (sw_if_index = 0; sw_if_index < vec_len (mirror_frames); sw_if_index++)
+ {
+ vlib_frame_t *f = mirror_frames[sw_if_index];
+ if (f == 0)
+ continue;
+
+ if (sf == SPAN_FEAT_L2)
+ vlib_put_frame_to_node (vnm->vlib_main, l2output_node.index, f);
+ else
+ vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
+ mirror_frames[sw_if_index] = 0;
+ }
+ vlib_node_increment_counter (vm, span_node.index, SPAN_ERROR_HITS,
+ n_span_packets);
+
+ return frame->n_vectors;
+}
+
+static uword
+span_device_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return span_node_inline_fn (vm, node, frame, VLIB_RX, SPAN_FEAT_DEVICE);
+}
+
+static uword
+span_device_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return span_node_inline_fn (vm, node, frame, VLIB_TX, SPAN_FEAT_DEVICE);
+}
+
+static uword
+span_l2_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return span_node_inline_fn (vm, node, frame, VLIB_RX, SPAN_FEAT_L2);
+}
+
+static uword
+span_l2_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return span_node_inline_fn (vm, node, frame, VLIB_TX, SPAN_FEAT_L2);
+}
+
+#define span_node_defs \
+ .vector_size = sizeof (u32), \
+ .format_trace = format_span_trace, \
+ .type = VLIB_NODE_TYPE_INTERNAL, \
+ .n_errors = ARRAY_LEN(span_error_strings), \
+ .error_strings = span_error_strings, \
+ .n_next_nodes = 0, \
+ .next_nodes = { \
+ [0] = "error-drop" \
+ }
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (span_input_node) = {
+ span_node_defs,
+ .function = span_device_input_node_fn,
+ .name = "span-input",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (span_input_node, span_device_input_node_fn)
+
+VLIB_REGISTER_NODE (span_output_node) = {
+ span_node_defs,
+ .function = span_device_output_node_fn,
+ .name = "span-output",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (span_output_node, span_device_output_node_fn)
+
+VLIB_REGISTER_NODE (span_l2_input_node) = {
+ span_node_defs,
+ .function = span_l2_input_node_fn,
+ .name = "span-l2-input",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (span_l2_input_node, span_l2_input_node_fn)
+
+VLIB_REGISTER_NODE (span_l2_output_node) = {
+ span_node_defs,
+ .function = span_l2_output_node_fn,
+ .name = "span-l2-output",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (span_l2_output_node, span_l2_output_node_fn)
+
+clib_error_t *span_init (vlib_main_t * vm)
+{
+ span_main_t *sm = &span_main;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ span_l2_input_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ sm->l2_input_next);
+
+ feat_bitmap_init_next_nodes (vm,
+ span_l2_output_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ sm->l2_output_next);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (span_init);
+/* *INDENT-ON* */
+
+#undef span_node_defs
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api
new file mode 100644
index 00000000..03cd60ec
--- /dev/null
+++ b/src/vnet/span/span.api
@@ -0,0 +1,56 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /** \brief Enable/Disable span to mirror traffic from one interface to another
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context which was passed in the request
+ @param sw_if_index_from - interface to be mirorred
+ @param sw_if_index_to - interface where the traffic is mirrored
+ @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled
+ @param is_l2 - 0 = mirror at hw device level, 1 = mirror at L2
+*/
+autoreply define sw_interface_span_enable_disable {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index_from;
+ u32 sw_if_index_to;
+ u8 state;
+ u8 is_l2;
+};
+
+/** \brief SPAN dump request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_l2 - 0 = hw device level, 1 = L2
+*/
+define sw_interface_span_dump {
+ u32 client_index;
+ u32 context;
+ u8 is_l2;
+};
+
+/** \brief Reply to SPAN dump request
+ @param context - sender context which was passed in the request
+ @param sw_if_index_from - mirorred interface
+ @param sw_if_index_to - interface where the traffic is mirrored
+ @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled
+*/
+define sw_interface_span_details {
+ u32 context;
+ u32 sw_if_index_from;
+ u32 sw_if_index_to;
+ u8 state;
+};
diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c
new file mode 100644
index 00000000..2fd9cd87
--- /dev/null
+++ b/src/vnet/span/span.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/error.h>
+#include <vnet/feature/feature.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vnet/span/span.h>
+
+typedef enum
+{
+ SPAN_DISABLE = 0,
+ SPAN_RX = 1,
+ SPAN_TX = 2,
+ SPAN_BOTH = SPAN_RX | SPAN_TX
+} span_state_t;
+
+static_always_inline u32
+span_dst_set (span_mirror_t * sm, u32 dst_sw_if_index, int enable)
+{
+ if (dst_sw_if_index == ~0)
+ {
+ ASSERT (enable == 0);
+ clib_bitmap_zero (sm->mirror_ports);
+ }
+ else
+ sm->mirror_ports =
+ clib_bitmap_set (sm->mirror_ports, dst_sw_if_index, enable);
+
+ u32 last = sm->num_mirror_ports;
+ sm->num_mirror_ports = clib_bitmap_count_set_bits (sm->mirror_ports);
+ return last;
+}
+
+int
+span_add_delete_entry (vlib_main_t * vm,
+ u32 src_sw_if_index, u32 dst_sw_if_index, u8 state,
+ span_feat_t sf)
+{
+ span_main_t *sm = &span_main;
+
+ if (state > SPAN_BOTH)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ if ((src_sw_if_index == ~0) || (dst_sw_if_index == ~0 && state > 0)
+ || (src_sw_if_index == dst_sw_if_index))
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ vec_validate_aligned (sm->interfaces, src_sw_if_index,
+ CLIB_CACHE_LINE_BYTES);
+
+ span_interface_t *si = vec_elt_at_index (sm->interfaces, src_sw_if_index);
+
+ int rx = ! !(state & SPAN_RX);
+ int tx = ! !(state & SPAN_TX);
+
+ span_mirror_t *rxm = &si->mirror_rxtx[sf][VLIB_RX];
+ span_mirror_t *txm = &si->mirror_rxtx[sf][VLIB_TX];
+
+ u32 last_rx_ports_count = span_dst_set (rxm, dst_sw_if_index, rx);
+ u32 last_tx_ports_count = span_dst_set (txm, dst_sw_if_index, tx);
+
+ int enable_rx = last_rx_ports_count == 0 && rxm->num_mirror_ports == 1;
+ int disable_rx = last_rx_ports_count > 0 && rxm->num_mirror_ports == 0;
+ int enable_tx = last_tx_ports_count == 0 && txm->num_mirror_ports == 1;
+ int disable_tx = last_tx_ports_count > 0 && txm->num_mirror_ports == 0;
+
+ switch (sf)
+ {
+ case SPAN_FEAT_DEVICE:
+ if (enable_rx || disable_rx)
+ vnet_feature_enable_disable ("device-input", "span-input",
+ src_sw_if_index, rx, 0, 0);
+ if (enable_tx || disable_tx)
+ vnet_feature_enable_disable ("interface-output", "span-output",
+ src_sw_if_index, tx, 0, 0);
+ break;
+ case SPAN_FEAT_L2:
+ if (enable_rx || disable_rx)
+ l2input_intf_bitmap_enable (src_sw_if_index, L2INPUT_FEAT_SPAN, rx);
+ if (enable_tx || disable_tx)
+ l2output_intf_bitmap_enable (src_sw_if_index, L2OUTPUT_FEAT_SPAN, tx);
+ break;
+ default:
+ return VNET_API_ERROR_UNIMPLEMENTED;
+ }
+
+ if (dst_sw_if_index != ~0 && dst_sw_if_index > sm->max_sw_if_index)
+ sm->max_sw_if_index = dst_sw_if_index;
+
+ return 0;
+}
+
+static uword
+unformat_span_state (unformat_input_t * input, va_list * args)
+{
+ span_state_t *state = va_arg (*args, span_state_t *);
+ if (unformat (input, "disable"))
+ *state = SPAN_DISABLE;
+ else if (unformat (input, "rx"))
+ *state = SPAN_RX;
+ else if (unformat (input, "tx"))
+ *state = SPAN_TX;
+ else if (unformat (input, "both"))
+ *state = SPAN_BOTH;
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+set_interface_span_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ span_main_t *sm = &span_main;
+ u32 src_sw_if_index = ~0;
+ u32 dst_sw_if_index = ~0;
+ span_feat_t sf = SPAN_FEAT_DEVICE;
+ span_state_t state = SPAN_BOTH;
+ int state_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface,
+ sm->vnet_main, &src_sw_if_index))
+ ;
+ else if (unformat (input, "destination %U", unformat_vnet_sw_interface,
+ sm->vnet_main, &dst_sw_if_index))
+ ;
+ else if (unformat (input, "%U", unformat_span_state, &state))
+ {
+ if (state_set)
+ return clib_error_return (0, "Multiple mirror states in input");
+ state_set = 1;
+ }
+ else if (unformat (input, "l2"))
+ sf = SPAN_FEAT_L2;
+ else
+ return clib_error_return (0, "Invalid input");
+ }
+
+ int rv =
+ span_add_delete_entry (vm, src_sw_if_index, dst_sw_if_index, state, sf);
+ if (rv == VNET_API_ERROR_INVALID_INTERFACE)
+ return clib_error_return (0, "Invalid interface");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_span_command, static) = {
+ .path = "set interface span",
+ .short_help = "set interface span <if-name> [l2] {disable | destination <if-name> [both|rx|tx]}",
+ .function = set_interface_span_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_interfaces_span_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ span_main_t *sm = &span_main;
+ span_interface_t *si;
+ vnet_main_t *vnm = &vnet_main;
+ u8 header = 1;
+ static const char *states[] = {
+ [SPAN_DISABLE] = "none",
+ [SPAN_RX] = "rx",
+ [SPAN_TX] = "tx",
+ [SPAN_BOTH] = "both"
+ };
+ u8 *s = 0;
+
+ /* *INDENT-OFF* */
+ vec_foreach (si, sm->interfaces)
+ {
+ span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX];
+ span_mirror_t * dtxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_TX];
+
+ span_mirror_t * lrxm = &si->mirror_rxtx[SPAN_FEAT_L2][VLIB_RX];
+ span_mirror_t * ltxm = &si->mirror_rxtx[SPAN_FEAT_L2][VLIB_TX];
+
+ if (drxm->num_mirror_ports || dtxm->num_mirror_ports ||
+ lrxm->num_mirror_ports || ltxm->num_mirror_ports)
+ {
+ u32 i;
+ clib_bitmap_t *d = clib_bitmap_dup_or (drxm->mirror_ports, dtxm->mirror_ports);
+ clib_bitmap_t *l = clib_bitmap_dup_or (lrxm->mirror_ports, ltxm->mirror_ports);
+ clib_bitmap_t *b = clib_bitmap_dup_or (d, l);
+ if (header)
+ {
+ vlib_cli_output (vm, "%-20s %-20s %6s %6s", "Source", "Destination",
+ "Device", "L2");
+ header = 0;
+ }
+ s = format (s, "%U", format_vnet_sw_if_index_name, vnm,
+ si - sm->interfaces);
+ clib_bitmap_foreach (i, b, (
+ {
+ int device = (clib_bitmap_get (drxm->mirror_ports, i) +
+ clib_bitmap_get (dtxm->mirror_ports, i) * 2);
+ int l2 = (clib_bitmap_get (lrxm->mirror_ports, i) +
+ clib_bitmap_get (ltxm->mirror_ports, i) * 2);
+
+ vlib_cli_output (vm, "%-20v %-20U (%6s) (%6s)", s,
+ format_vnet_sw_if_index_name, vnm, i,
+ states[device], states[l2]);
+ vec_reset_length (s);
+ }));
+ clib_bitmap_free (b);
+ clib_bitmap_free (l);
+ clib_bitmap_free (d);
+ }
+ }
+ /* *INDENT-ON* */
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_interfaces_span_command, static) = {
+ .path = "show interface span",
+ .short_help = "Shows SPAN mirror table",
+ .function = show_interfaces_span_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/span/span.h b/src/vnet/span/span.h
new file mode 100644
index 00000000..10de8272
--- /dev/null
+++ b/src/vnet/span/span.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __span_h__
+#define __span_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_output.h>
+
+typedef enum
+{
+ SPAN_FEAT_DEVICE,
+ SPAN_FEAT_L2,
+ SPAN_FEAT_N
+} span_feat_t;
+
+typedef struct
+{
+ clib_bitmap_t *mirror_ports;
+ u32 num_mirror_ports;
+} span_mirror_t;
+
+typedef struct
+{
+ span_mirror_t mirror_rxtx[SPAN_FEAT_N][VLIB_N_RX_TX];
+} span_interface_t;
+
+typedef struct
+{
+ /* l2 feature Next nodes */
+ u32 l2_input_next[32];
+ u32 l2_output_next[32];
+
+ /* per-interface vector of span instances */
+ span_interface_t *interfaces;
+
+ /* biggest sw_if_index used so far */
+ u32 max_sw_if_index;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} span_main_t;
+
+span_main_t span_main;
+
+typedef struct
+{
+ u32 src_sw_if_index; /* mirrored interface index */
+ u32 mirror_sw_if_index; /* output interface index */
+} span_trace_t;
+
+#endif /* __span_h__ */
+
+int
+span_add_delete_entry (vlib_main_t * vm, u32 src_sw_if_index,
+ u32 dst_sw_if_index, u8 state, span_feat_t sf);
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c
new file mode 100644
index 00000000..64a71a2e
--- /dev/null
+++ b/src/vnet/span/span_api.c
@@ -0,0 +1,159 @@
+/*
+ *------------------------------------------------------------------
+ * span_api.c - span mirroring api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/span/span.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SW_INTERFACE_SPAN_ENABLE_DISABLE, sw_interface_span_enable_disable) \
+_(SW_INTERFACE_SPAN_DUMP, sw_interface_span_dump) \
+
+static void
+ vl_api_sw_interface_span_enable_disable_t_handler
+ (vl_api_sw_interface_span_enable_disable_t * mp)
+{
+ vl_api_sw_interface_span_enable_disable_reply_t *rmp;
+ int rv;
+
+ vlib_main_t *vm = vlib_get_main ();
+
+ rv = span_add_delete_entry (vm, ntohl (mp->sw_if_index_from),
+ ntohl (mp->sw_if_index_to), mp->state,
+ mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE);
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SPAN_ENABLE_DISABLE_REPLY);
+}
+
+static void
+vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
+{
+
+ unix_shared_memory_queue_t *q;
+ span_interface_t *si;
+ vl_api_sw_interface_span_details_t *rmp;
+ span_main_t *sm = &span_main;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE;
+ /* *INDENT-OFF* */
+ vec_foreach (si, sm->interfaces)
+ {
+ span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX];
+ span_mirror_t * txm = &si->mirror_rxtx[sf][VLIB_TX];
+ if (rxm->num_mirror_ports || txm->num_mirror_ports)
+ {
+ clib_bitmap_t *b;
+ u32 i;
+ b = clib_bitmap_dup_or (rxm->mirror_ports, txm->mirror_ports);
+ clib_bitmap_foreach (i, b, (
+ {
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SPAN_DETAILS);
+ rmp->context = mp->context;
+
+ rmp->sw_if_index_from = htonl (si - sm->interfaces);
+ rmp->sw_if_index_to = htonl (i);
+ rmp->state = (u8) (clib_bitmap_get (rxm->mirror_ports, i) +
+ clib_bitmap_get (txm->mirror_ports, i) * 2);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+ }));
+ clib_bitmap_free (b);
+ }
+ }
+ /* *INDENT-ON* */
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_span;
+#undef _
+}
+
+static clib_error_t *
+span_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (span_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/span/span_doc.md b/src/vnet/span/span_doc.md
new file mode 100644
index 00000000..46480b28
--- /dev/null
+++ b/src/vnet/span/span_doc.md
@@ -0,0 +1,65 @@
+# VPP SPAN implementation {#span_doc}
+
+This is a memo intended to contain documentation of the VPP SPAN implementation.
+Everything that is not directly obvious should come here.
+
+
+## Switched Port Analyzer (SPAN)
+Port mirroring is used on a network switch to send a copy of network packets seen on one switch port to a network monitoring connection on another switch port.
+Can be used by network engineers or administrators to measure performnce, analyze and debug data or diagnose errors on a network.
+
+### RX traffic node
+There is one static node to mirror incomming packets.
+* span-input: Creates a copy of incomming buffer due to incomming buffers can be reused internally.
+
+Chaining: dpdk-input -> span-input ->
+* original buffer is sent to ethernet-input for processing
+* buffer copy is sent to interface-output
+
+### Configuration
+SPAN supports the following CLI configuration commands:
+
+#### Enable/Disable SPAN (CLI)
+ set interface span <if-name> [disable | destination <if-name>]
+
+<if-name>: mirrored interface name
+destination <if-name>: monitoring interface name
+disable: delete mirroring
+
+#### Enable/Disabl SPAN (API)
+SPAN supports the following API configuration command:
+ sw_interface_span_enable_disable src GigabitEthernet0/8/0 dst GigabitEthernet0/9/0
+ sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2
+
+src/src_sw_if_index: mirrored interface name
+dst/dst_sw_if_index: monitoring interface name
+
+#### Remove SPAN entry (API)
+SPAN supports the following API configuration command:
+ sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 disable
+
+src_sw_if_index: mirrored interface name
+dst_sw_if_index: monitoring interface name
+
+### Configuration example
+
+Mirror all packets on interface GigabitEthernet0/10/0 to interface GigabitEthernet0/11/0.
+
+Configure IPv4 addresses on mirrored interface:
+set interface ip address GigabitEthernet0/10/0 192.168.1.13/24
+set interface state GigabitEthernet0/10/0 up
+
+Configure IPv4 addresses on monitoring interface:
+set interface ip address GigabitEthernet0/11/0 192.168.2.13/24
+set interface state GigabitEthernet0/11/0 up
+
+Configure SPAN
+set span src GigabitEthernet0/10/0 dst GigabitEthernet0/11/0
+
+### Operational data
+
+Active SPAN mirroring CLI show command:
+ show interfaces span
+
+Active SPAN mirroring API dump command:
+ sw_interface_span_dump
diff --git a/src/vnet/srmpls/dir.dox b/src/vnet/srmpls/dir.dox
new file mode 100755
index 00000000..76ec1d6a
--- /dev/null
+++ b/src/vnet/srmpls/dir.dox
@@ -0,0 +1,22 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing MPLS code
+
+ An implementation of Segment Routing for the MPLS dataplane.
+
+*/ \ No newline at end of file
diff --git a/src/vnet/srmpls/sr.h b/src/vnet/srmpls/sr.h
new file mode 100755
index 00000000..0e106697
--- /dev/null
+++ b/src/vnet/srmpls/sr.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing MPLS data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srmpls_h
+#define included_vnet_srmpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+ /**
+ * SIDs (key)
+ */
+ mpls_label_t *segments;
+
+ /**
+ * SID list weight (wECMP / UCMP)
+ */
+ u32 weight;
+
+} mpls_sr_sl_t;
+
+typedef struct
+{
+ u32 *segments_lists; /**< Pool of SID lists indexes */
+
+ mpls_label_t bsid; /**< BindingSID (key) */
+
+ u8 type; /**< Type (default is 0) */
+ /* SR Policy specific DPO */
+ /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */
+ /* IF Type = SPRAY then Spray DPO with all SID lists */
+
+} mpls_sr_policy_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ */
+typedef struct
+{
+ ip46_address_t prefix; /**< IP address of the prefix */
+ u32 mask_width; /**< Mask width of the prefix */
+ u32 fib_table; /**< VRF of the prefix */
+ u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
+ u8 padding[3];
+} sr_mpls_steering_key_t;
+
+typedef struct
+{
+ sr_mpls_steering_key_t classify; /**< Traffic classification */
+ u32 sr_policy; /**< SR Policy index */
+} mpls_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+ /**
+ * SR SID lists
+ */
+ mpls_sr_sl_t *sid_lists;
+
+ /**
+ * SR MPLS policies
+ */
+ mpls_sr_policy_t *sr_policies;
+
+ /**
+ * Hash table mapping BindingSID to SR MPLS policy
+ */
+ uword *sr_policies_index_hash;
+
+ /**
+ * Pool of SR steer policies instances
+ */
+ mpls_sr_steering_policy_t *steer_policies;
+
+ /**
+ * MHash table mapping steering rules to SR steer instance
+ */
+ mhash_t sr_steer_policies_hash;
+
+ /**
+ * convenience
+ */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} mpls_sr_main_t;
+
+extern mpls_sr_main_t sr_mpls_main;
+
+extern int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight);
+
+extern int
+sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight);
+
+extern int sr_mpls_policy_del (mpls_label_t bsid, u32 index);
+
+#endif /* included_vnet_sr_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srmpls/sr_doc.md b/src/vnet/srmpls/sr_doc.md
new file mode 100644
index 00000000..d60592bb
--- /dev/null
+++ b/src/vnet/srmpls/sr_doc.md
@@ -0,0 +1,87 @@
+# SR-MPLS: Segment Routing for MPLS {#srmpls_doc}
+
+This is a memo intended to contain documentation of the VPP SR-MPLS implementation.
+Everything that is not directly obvious should come here.
+For any feedback on content that should be explained please mailto:pcamaril@cisco.com
+
+## Segment Routing
+
+Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
+
+Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SR-MPLS documentation.**
+
+## Segment Routing terminology
+
+* SegmentID (SID): is an MPLS label.
+* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
+* SR Policy: is a set of candidate paths (SID list+weight). An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
+* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with MPLS label corresponding to a BindingSID, then the SR policy will be applied to such packet. (BindingSID is popped first.)
+
+## SR-MPLS features in VPP
+
+The SR-MPLS implementation is focused on the SR policies, as well on its steering. Others SR-MPLS features, such as for example AdjSIDs, can be achieved using the regular VPP MPLS implementation.
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
+
+## Creating a SR Policy
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
+
+A new SR policy is created with a first SID list using:
+
+ sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
+
+* The weight parameter is only used if more than one SID list is associated with the policy.
+
+An SR policy is deleted with:
+
+ sr mpls policy del bsid 40001
+
+The existing SR policies are listed with:
+
+ show sr mpls policies
+
+### Adding/Removing SID Lists from an SR policy
+
+An additional SID list is associated with an existing SR policy with:
+
+ sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+ sr mpls policy mod bsid 4001 del sl index 1
+
+Note that this CLI cannot be used to remove the last SID list of a policy. Instead the SR policy delete CLI must be used.
+
+The weight of a SID list can also be modified with:
+
+ sr mpls policy mod bsid 40001 mod sl index 1 weight 4
+ sr mpls policy mod index 1 mod sl index 1 weight 4
+
+### SR Policies: Spray policies
+
+Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a regular SR-MPLS policy command, as in:
+
+ sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
+
+Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.
+
+## Steering packets into a SR Policy
+
+To steer packets in Transit into an SR policy, the user needs to create an 'sr steering policy'.
+
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
new file mode 100755
index 00000000..db4ad2a7
--- /dev/null
+++ b/src/vnet/srmpls/sr_mpls_policy.c
@@ -0,0 +1,569 @@
+/*
+ * sr_mpls_policy.c: SR-MPLS policies
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief SR MPLS policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with MPLS_label == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srmpls/sr.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+mpls_sr_main_t sr_mpls_main;
+
+/*************************** SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline mpls_sr_sl_t *
+create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list;
+
+ pool_get (sm->sid_lists, segment_list);
+ memset (segment_list, 0, sizeof (*segment_list));
+
+ vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+ /* Fill in segment list */
+ segment_list->weight =
+ (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+ segment_list->segments = vec_dup (sl);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = NULL,
+ .frp_local_label = sl[0],
+ };
+
+ vec_add (path.frp_label_stack, sl + 1, vec_len (sl) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ mpls_eos_bit_t eos;
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ /* *INDENT-OFF* */
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+ /* *INDENT-ON* */
+
+ fib_table_entry_path_add2 (0,
+ &pfx,
+ FIB_SOURCE_SR,
+ (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
+ FIB_ENTRY_FLAG_NONE :
+ FIB_ENTRY_FLAG_MULTICAST), paths);
+ }
+
+ vec_free (paths);
+
+ return segment_list;
+}
+
+/******************************* SR rewrite API *******************************/
+/* Three functions for handling sr policies:
+ * -> sr_mpls_policy_add
+ * -> sr_mpls_policy_del
+ * -> sr_mpls_policy_mod
+ * All of them are API. CLI function on sr_policy_command_fn */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of MPLS labels composing the segment list
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param weight is the weight of this specific SID list
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p;
+
+ /* Search for existing keys (BSID) */
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ {
+ /* Add SR policy that already exists; complain */
+ return -12;
+ }
+
+ /* Add an SR policy object */
+ pool_get (sm->sr_policies, sr_policy);
+ memset (sr_policy, 0, sizeof (*sr_policy));
+ sr_policy->bsid = bsid;
+ sr_policy->type = behavior;
+
+ /* Copy the key */
+ hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);
+
+ /* Create a segment list and add the index to the SR policy */
+ create_sl (sr_policy, segments, weight);
+
+ return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_del (mpls_label_t bsid, u32 index)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ mpls_eos_bit_t eos;
+ u32 *sl_index;
+ uword *p;
+
+ if (bsid)
+ {
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ /* Clean SID Lists */
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ /* remove each of the MPLS routes */
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ /* *INDENT-OFF* */
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+ /* *INDENT-ON* */
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, *sl_index);
+ }
+
+ /* Remove SR policy entry */
+ hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
+ pool_put (sm->sr_policies, sr_policy);
+
+ return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ u32 *sl_index_iterate;
+ uword *p;
+
+ if (bsid)
+ {
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ if (operation == 1) /* Add SR List to an existing SR policy */
+ {
+ /* Create the new SL */
+ segment_list = create_sl (sr_policy, segments, weight);
+
+ }
+ else if (operation == 2) /* Delete SR List from an existing SR policy */
+ {
+ /* Check that currently there are more than one SID list */
+ if (vec_len (sr_policy->segments_lists) == 1)
+ return -21;
+
+ /* Check that the SR list does exist and is assigned to the sr policy */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -22;
+
+ /* Remove the lucky SR list that is being kicked out */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+ mpls_eos_bit_t eos;
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ /* *INDENT-OFF* */
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+ /* *INDENT-ON* */
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, sl_index);
+ vec_del1 (sr_policy->segments_lists,
+ sl_index_iterate - sr_policy->segments_lists);
+ }
+ else if (operation == 3) /* Modify the weight of an existing SR List */
+ {
+ /* Find the corresponding SL */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -32;
+
+ /* Change the weight */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+ segment_list->weight = weight;
+
+ /* Update LB */
+ //FIXME
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI for 'sr mpls policies' command family
+ */
+static clib_error_t *
+sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = -1;
+ char is_del = 0, is_add = 0, is_mod = 0;
+ char policy_set = 0;
+ mpls_label_t bsid, next_label;
+ u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
+ u32 weight = (u32) ~ 0;
+ mpls_label_t *segments = 0;
+ u8 operation = 0;
+ u8 is_spray = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+ is_add = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+ is_del = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+ is_mod = 1;
+ else if (!policy_set
+ && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+ &bsid))
+ policy_set = 1;
+ else if (!is_add && !policy_set
+ && unformat (input, "index %d", &sr_policy_index))
+ policy_set = 1;
+ else if (unformat (input, "weight %d", &weight));
+ else
+ if (unformat
+ (input, "next %U", unformat_mpls_unicast_label, &next_label))
+ {
+ vec_add (segments, &next_label, 1);
+ }
+ else if (unformat (input, "add sl"))
+ operation = 1;
+ else if (unformat (input, "del sl index %d", &sl_index))
+ operation = 2;
+ else if (unformat (input, "mod sl index %d", &sl_index))
+ operation = 3;
+ else if (unformat (input, "spray"))
+ is_spray = 1;
+ else
+ break;
+ }
+
+ if (!is_add && !is_mod && !is_del)
+ return clib_error_return (0, "Incorrect CLI");
+
+ if (!policy_set)
+ return clib_error_return (0, "No SR policy BSID or index specified");
+
+ if (is_add)
+ {
+ if (vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+
+ rv = sr_mpls_policy_add (bsid, segments,
+ (is_spray ? SR_POLICY_TYPE_SPRAY :
+ SR_POLICY_TYPE_DEFAULT), weight);
+ }
+ else if (is_del)
+ rv =
+ sr_mpls_policy_del ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid),
+ sr_policy_index);
+ else if (is_mod)
+ {
+ if (!operation)
+ return clib_error_return (0, "No SL modification specified");
+ if (operation != 1 && sl_index == (u32) ~ 0)
+ return clib_error_return (0, "No Segment List index specified");
+ if (operation == 1 && vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ if (operation == 3 && weight == (u32) ~ 0)
+ return clib_error_return (0, "No new weight for the SL specified");
+ rv =
+ sr_mpls_policy_mod ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid),
+ sr_policy_index, operation, segments,
+ sl_index, weight);
+ }
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -12:
+ return clib_error_return (0,
+ "There is already a FIB entry for the BindingSID address.\n"
+ "The SR policy could not be created.");
+ case -21:
+ return clib_error_return (0,
+ "The selected SR policy only contains ONE segment list. "
+ "Please remove the SR policy instead");
+ case -22:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -32:
+ return clib_error_return (0,
+ "Could not modify the segment list. "
+ "The given SL is not associated with such SR policy.");
+ default:
+ return clib_error_return (0, "BUG: sr policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_mpls_policy_command, static) = {
+ .path = "sr mpls policy",
+ .short_help = "sr mpls policy [add||del||mod] bsid 2999 "
+ "next 10 next 20 next 30 (weight 1) (spray)",
+ .long_help = "TBD.\n",
+ .function = sr_mpls_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI to display onscreen all the SR MPLS policies
+ */
+static clib_error_t *
+show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list = 0;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_policy_t **vec_policies = 0;
+ mpls_label_t *label;
+ u32 *sl_index;
+ u8 *s;
+ int i = 0;
+
+ vlib_cli_output (vm, "SR MPLS policies:");
+
+ /* *INDENT-OFF* */
+ pool_foreach (sr_policy, sm->sr_policies, {vec_add1 (vec_policies, sr_policy); } );
+ /* *INDENT-ON* */
+
+ vec_foreach_index (i, vec_policies)
+ {
+ sr_policy = vec_policies[i];
+ vlib_cli_output (vm, "[%u].-\tBSID: %U",
+ (u32) (sr_policy - sm->sr_policies),
+ format_mpls_unicast_label, sr_policy->bsid);
+ vlib_cli_output (vm, "\tType: %s",
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ vlib_cli_output (vm, "\tSegment Lists:");
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ s = NULL;
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ s = format (s, "\t[%u].- ", *sl_index);
+ s = format (s, "< ");
+ vec_foreach (label, segment_list->segments)
+ {
+ s = format (s, "%U, ", format_mpls_unicast_label, *label);
+ }
+ s = format (s, "\b\b > ");
+ vlib_cli_output (vm, " %s", s);
+ }
+ vlib_cli_output (vm, "-----------");
+ }
+ vec_free (vec_policies);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_mpls_policies_command, static) = {
+ .path = "show sr mpls policies",
+ .short_help = "show sr mpls policies",
+ .function = show_sr_mpls_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+/********************* SR MPLS Policy initialization ***********************/
+/**
+ * @brief SR MPLS Policy initialization
+ */
+clib_error_t *
+sr_mpls_policy_rewrite_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
new file mode 100755
index 00000000..3a9aea2d
--- /dev/null
+++ b/src/vnet/srmpls/sr_mpls_steering.c
@@ -0,0 +1,453 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR-MPLS Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ * - Steering of IPv6 traffic Destination Address based
+ * - Steering of IPv4 traffic Destination Address based
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srmpls/sr.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/mpls_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Steer traffic L3 traffic through a given SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index,
+ u32 table_id, ip46_address_t * prefix,
+ u32 mask_width, u8 traffic_type)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ sr_mpls_steering_key_t key;
+ mpls_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p = 0;
+
+ memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+ /* Compute the steer policy key */
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ key.prefix.as_u64[0] = prefix->as_u64[0];
+ key.prefix.as_u64[1] = prefix->as_u64[1];
+ key.mask_width = mask_width;
+ key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ }
+ else
+ return -1;
+
+ key.traffic_type = traffic_type;
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+ if (p)
+ {
+ /* Retrieve Steer Policy function */
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+ if (is_del)
+ {
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_MPLS,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_MPLS,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+
+ /* Delete SR steering policy entry */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+
+ return 1;
+ }
+ else /* It means user requested to update an existing SR steering policy */
+ {
+ /* Retrieve SR steering policy */
+ if (bsid) //TODO FIXME
+ {
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ if (!sr_policy)
+ return -2;
+
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Remove old FIB/hw redirection and create a new one */
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ }
+ }
+ else
+ /* delete; steering policy does not exist; complain */
+ if (is_del)
+ return -4;
+
+ /* Retrieve SR policy */
+ if (bsid) //FIX
+ {
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ /* Create a new steering policy */
+ pool_get (sm->steer_policies, steer_pl);
+ memset (steer_pl, 0, sizeof (*steer_pl));
+
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ clib_memcpy (&steer_pl->classify.prefix, prefix,
+ sizeof (ip46_address_t));
+ steer_pl->classify.mask_width = mask_width;
+ steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ steer_pl->classify.traffic_type = traffic_type;
+ }
+ else
+ {
+ /* Incorrect API usage. Should never get here */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -1;
+ }
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Create and store key */
+ mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+ NULL);
+
+update_fib:;
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_local_label = sr_policy->bsid,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = NULL
+ };
+
+ fib_route_path_t *paths = NULL;
+
+ /* FIB API calls - Recursive route through the BindingSID */
+ if (traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ path.frp_fib_index = 0;
+
+ vec_add1 (paths, path);
+
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ vec_free (paths);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ path.frp_fib_index = 0;
+
+ vec_add1 (paths, path);
+
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ vec_free (paths);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+
+ ip46_address_t prefix;
+ u32 dst_mask_width = 0;
+ u8 traffic_type = 0;
+ u32 fib_table = (u32) ~ 0;
+
+ mpls_label_t bsid;
+ u32 sr_policy_index = (u32) ~ 0;
+
+ u8 sr_policy_set = 0;
+
+ memset (&prefix, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip6_address,
+ &prefix.ip6, &dst_mask_width))
+ traffic_type = SR_STEER_IPV6;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip4_address,
+ &prefix.ip4, &dst_mask_width))
+ traffic_type = SR_STEER_IPV4;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy index %d",
+ &sr_policy_index))
+ sr_policy_set = 1;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy bsid %U",
+ unformat_mpls_unicast_label, &bsid))
+ sr_policy_set = 1;
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else
+ break;
+ }
+
+ if (!traffic_type)
+ return clib_error_return (0, "No L3 traffic specified");
+ if (!sr_policy_set)
+ return clib_error_return (0, "No SR policy specified");
+
+ /* Make sure that the prefixes are clean */
+ if (traffic_type == SR_STEER_IPV4)
+ {
+ u32 mask =
+ (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+ prefix.ip4.as_u32 &= mask;
+ }
+ else if (traffic_type == SR_STEER_IPV6)
+ {
+ ip6_address_t mask;
+ ip6_address_mask_from_width (&mask, dst_mask_width);
+ ip6_address_mask (&prefix.ip6, &mask);
+ }
+
+ rv =
+ sr_mpls_steering_policy (is_del, bsid,
+ sr_policy_index, fib_table, &prefix,
+ dst_mask_width, traffic_type);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0, "Incorrect API usage.");
+ case -2:
+ return clib_error_return (0,
+ "The requested SR policy could not be located. Review the BSID/index.");
+ case -3:
+ return clib_error_return (0,
+ "Unable to do SW redirect. Incorrect interface.");
+ case -4:
+ return clib_error_return (0,
+ "The requested SR steering policy could not be deleted.");
+ case -5:
+ return clib_error_return (0,
+ "The SR policy is not an encapsulation one.");
+ default:
+ return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_mpls_steer_policy_command, static) = {
+ .path = "sr mpls steer",
+ .short_help = "sr mpls steer (del) l3 <ip_addr/mask>"
+ "via sr policy bsid <mpls_label> (fib-table <fib_table_index>)",
+ .long_help =
+ "\tSteer L3 traffic through an existing SR policy.\n"
+ "\tExamples:\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n"
+ "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
+ .function = sr_mpls_steer_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_steering_policy_t **steer_policies = 0;
+ mpls_sr_steering_policy_t *steer_pl;
+
+ mpls_sr_policy_t *pl = 0;
+ int i;
+
+ vlib_cli_output (vm, "SR MPLS steering policies:");
+ /* *INDENT-OFF* */
+ pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
+ for (i = 0; i < vec_len (steer_policies); i++)
+ {
+ steer_pl = steer_policies[i];
+ pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip4_address,
+ &steer_pl->classify.prefix.ip4,
+ steer_pl->classify.mask_width,
+ format_mpls_unicast_label, pl->bsid);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip6_address,
+ &steer_pl->classify.prefix.ip6,
+ steer_pl->classify.mask_width,
+ format_mpls_unicast_label, pl->bsid);
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_mpls_steering_policies_command, static) = {
+ .path = "show sr mpls steering policies",
+ .short_help = "show sr mpls steering policies",
+ .function = show_sr_mpls_steering_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_mpls_steering_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for function keys */
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_mpls_steering_key_t));
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (sr_mpls_steering_init);
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srp/format.c b/src/vnet/srp/format.c
new file mode 100644
index 00000000..a0250cc9
--- /dev/null
+++ b/src/vnet/srp/format.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * srp_format.c: srp formatting/parsing.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/srp/srp.h>
+#include <vnet/ethernet/ethernet.h>
+
+static u8 * format_srp_mode (u8 * s, va_list * args)
+{
+ u32 mode = va_arg (*args, u32);
+ char * t = 0;
+ switch (mode)
+ {
+#define _(f) case SRP_MODE_##f: t = #f; break;
+ foreach_srp_mode
+#undef _
+ default: t = 0; break;
+ }
+ if (t)
+ s = format (s, "%s", t);
+ else
+ s = format (s, "unknown 0x%x", mode);
+
+ return s;
+}
+
+u8 * format_srp_header_with_length (u8 * s, va_list * args)
+{
+ srp_and_ethernet_header_t * h = va_arg (*args, srp_and_ethernet_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ ethernet_main_t * em = &ethernet_main;
+ uword indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "srp header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "mode %U, ring %s, priority %d, ttl %d",
+ format_srp_mode, h->srp.mode,
+ h->srp.is_inner_ring ? "inner" : "outer",
+ h->srp.priority, h->srp.ttl);
+
+ s = format (s, "\n%U%U: %U -> %U",
+ format_white_space, indent,
+ format_ethernet_type, clib_net_to_host_u16 (h->ethernet.type),
+ format_ethernet_address, h->ethernet.src_address,
+ format_ethernet_address, h->ethernet.dst_address);
+
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ethernet_type_info_t * ti;
+ vlib_node_t * node;
+
+ ti = ethernet_get_type_info (em, h->ethernet.type);
+ node = ti ? vlib_get_node (em->vlib_main, ti->node_index) : 0;
+ if (node && node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) h + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 * format_srp_header (u8 * s, va_list * args)
+{
+ srp_header_t * m = va_arg (*args, srp_header_t *);
+ return format (s, "%U", format_srp_header_with_length, m, 0);
+}
+
+uword
+unformat_srp_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ srp_and_ethernet_header_t * h;
+
+ {
+ void * p;
+ vec_add2 (*result, p, sizeof (h[0]));
+ h = p;
+ }
+
+ if (! unformat (input, "%U: %U -> %U",
+ unformat_ethernet_type_net_byte_order, &h->ethernet.type,
+ unformat_ethernet_address, &h->ethernet.src_address,
+ unformat_ethernet_address, &h->ethernet.dst_address))
+ return 0;
+
+ h->srp.mode = SRP_MODE_data;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 x;
+
+ if (unformat (input, "control"))
+ h->srp.mode = SRP_MODE_control_pass_to_host;
+
+ else if (unformat (input, "pri %d", &x))
+ h->srp.priority = x;
+
+ else if (unformat (input, "ttl %d", &x))
+ h->srp.ttl = x;
+
+ else
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c
new file mode 100644
index 00000000..44e2b0d6
--- /dev/null
+++ b/src/vnet/srp/interface.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * srp_interface.c: srp interfaces
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/srp/srp.h>
+
+static u8*
+srp_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void * dst_address)
+{
+ vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ srp_main_t * sm = &srp_main;
+ srp_and_ethernet_header_t * h;
+ u8* rewrite = NULL;
+ u16 type;
+ uword n_bytes = sizeof (h[0]);
+
+ switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
+ _ (IP4, IP4);
+ _ (IP6, IP6);
+ _ (MPLS, MPLS);
+ _ (ARP, ARP);
+#undef _
+ default:
+ return (NULL);
+ }
+
+ vec_validate(rewrite, n_bytes-1);
+ h = (srp_and_ethernet_header_t *)rewrite;
+
+ clib_memcpy (h->ethernet.src_address, hw->hw_address, sizeof (h->ethernet.src_address));
+ if (dst_address)
+ clib_memcpy (h->ethernet.dst_address, dst_address, sizeof (h->ethernet.dst_address));
+ else
+ memset (h->ethernet.dst_address, ~0, sizeof (h->ethernet.dst_address)); /* broadcast */
+
+ h->ethernet.type = clib_host_to_net_u16 (type);
+
+ h->srp.as_u16 = 0;
+ h->srp.mode = SRP_MODE_data;
+ h->srp.ttl = sm->default_data_ttl;
+ srp_header_compute_parity (&h->srp);
+
+ return (rewrite);
+}
+
+static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute);
+
+void serialize_srp_main (serialize_main_t * m, va_list * va)
+{
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si;
+
+ serialize_integer (m, pool_elts (sm->interface_pool), sizeof (u32));
+ pool_foreach (si, sm->interface_pool, ({
+ serialize_integer (m, si->rings[SRP_RING_OUTER].hw_if_index, sizeof (u32));
+ serialize_integer (m, si->rings[SRP_RING_INNER].hw_if_index, sizeof (u32));
+ }));
+}
+
+void unserialize_srp_main (serialize_main_t * m, va_list * va)
+{
+ u32 i, n_ifs, hw_if_indices[SRP_N_RING];
+
+ unserialize_integer (m, &n_ifs, sizeof (u32));
+ for (i = 0; i < n_ifs; i++)
+ {
+ unserialize_integer (m, &hw_if_indices[SRP_RING_OUTER], sizeof (u32));
+ unserialize_integer (m, &hw_if_indices[SRP_RING_INNER], sizeof (u32));
+ srp_register_interface_helper (hw_if_indices, /* redistribute */ 0);
+ }
+}
+
+static void serialize_srp_register_interface_msg (serialize_main_t * m, va_list * va)
+{
+ u32 * hw_if_indices = va_arg (*va, u32 *);
+ serialize_integer (m, hw_if_indices[SRP_SIDE_A], sizeof (hw_if_indices[SRP_SIDE_A]));
+ serialize_integer (m, hw_if_indices[SRP_SIDE_B], sizeof (hw_if_indices[SRP_SIDE_B]));
+}
+
+static void unserialize_srp_register_interface_msg (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *);
+ u32 hw_if_indices[SRP_N_SIDE];
+ srp_main_t * sm = &srp_main;
+ uword * p;
+
+ unserialize_integer (m, &hw_if_indices[SRP_SIDE_A], sizeof (hw_if_indices[SRP_SIDE_A]));
+ unserialize_integer (m, &hw_if_indices[SRP_SIDE_B], sizeof (hw_if_indices[SRP_SIDE_B]));
+
+ p = hash_get (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index,
+ hw_if_indices[0]);
+ if (p)
+ {
+ vlib_one_time_waiting_process_t * wp = pool_elt_at_index (sm->srp_register_interface_waiting_process_pool, p[0]);
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ pool_put (sm->srp_register_interface_waiting_process_pool, wp);
+ hash_unset (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index,
+ hw_if_indices[0]);
+ }
+ else
+ srp_register_interface_helper (hw_if_indices, /* redistribute */ 0);
+}
+
+MC_SERIALIZE_MSG (srp_register_interface_msg, static) = {
+ .name = "vnet_srp_register_interface",
+ .serialize = serialize_srp_register_interface_msg,
+ .unserialize = unserialize_srp_register_interface_msg,
+};
+
+static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ srp_main_t * sm = &srp_main;
+ vlib_main_t * vm = sm->vlib_main;
+ srp_interface_t * si;
+ vnet_hw_interface_t * hws[SRP_N_RING];
+ uword s, * p;
+
+ if (vm->mc_main && redistribute)
+ {
+ vlib_one_time_waiting_process_t * wp;
+ mc_serialize (vm->mc_main, &srp_register_interface_msg, hw_if_indices_by_side);
+ pool_get (sm->srp_register_interface_waiting_process_pool, wp);
+ hash_set (sm->srp_register_interface_waiting_process_pool_index_by_hw_if_index,
+ hw_if_indices_by_side[0],
+ wp - sm->srp_register_interface_waiting_process_pool);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+ }
+
+ /* Check if interface has already been registered. */
+ p = hash_get (sm->interface_index_by_hw_if_index, hw_if_indices_by_side[0]);
+ if (p)
+ {
+ si = pool_elt_at_index (sm->interface_pool, p[0]);
+ }
+ else
+ {
+ pool_get (sm->interface_pool, si);
+ memset (si, 0, sizeof (si[0]));
+ }
+ for (s = 0; s < SRP_N_SIDE; s++)
+ {
+ hws[s] = vnet_get_hw_interface (vnm, hw_if_indices_by_side[s]);
+ si->rings[s].ring = s;
+ si->rings[s].hw_if_index = hw_if_indices_by_side[s];
+ si->rings[s].sw_if_index = hws[s]->sw_if_index;
+ hash_set (sm->interface_index_by_hw_if_index, hw_if_indices_by_side[s], si - sm->interface_pool);
+ }
+
+ /* Inherit MAC address from outer ring. */
+ clib_memcpy (si->my_address, hws[SRP_RING_OUTER]->hw_address,
+ vec_len (hws[SRP_RING_OUTER]->hw_address));
+
+ /* Default time to wait to restore signal. */
+ si->config.wait_to_restore_idle_delay = 60;
+ si->config.ips_tx_interval = 1;
+}
+
+void srp_register_interface (u32 * hw_if_indices_by_side)
+{
+ srp_register_interface_helper (hw_if_indices_by_side, /* redistribute */ 1);
+}
+
+void srp_interface_set_hw_wrap_function (u32 hw_if_index, srp_hw_wrap_function_t * f)
+{
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+ si->hw_wrap_function = f;
+}
+
+void srp_interface_set_hw_enable_function (u32 hw_if_index, srp_hw_enable_function_t * f)
+{
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+ si->hw_enable_function = f;
+}
+
+void srp_interface_enable_ips (u32 hw_if_index)
+{
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+
+ si->ips_process_enable = 1;
+
+ vlib_node_set_state (sm->vlib_main, srp_ips_process_node.index, VLIB_NODE_STATE_POLLING);
+}
+
+static uword
+srp_is_valid_class_for_interface (vnet_main_t * vnm, u32 hw_if_index, u32 hw_class_index)
+{
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+
+ if (! si)
+ return 0;
+
+ /* Both sides must be admin down. */
+ if (vnet_sw_interface_is_admin_up (vnm, si->rings[SRP_RING_OUTER].sw_if_index))
+ return 0;
+ if (vnet_sw_interface_is_admin_up (vnm, si->rings[SRP_RING_INNER].sw_if_index))
+ return 0;
+
+ return 1;
+}
+
+static void
+srp_interface_hw_class_change (vnet_main_t * vnm, u32 hw_if_index,
+ u32 old_hw_class_index, u32 new_hw_class_index)
+{
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+ vnet_hw_interface_t * hi;
+ vnet_device_class_t * dc;
+ u32 r, to_srp;
+
+ if (!si) {
+ clib_warning ("srp interface no set si = 0");
+ return;
+ }
+
+ to_srp = new_hw_class_index == srp_hw_interface_class.index;
+
+ /* Changing class on either outer or inner rings implies changing the class
+ of the other. */
+ for (r = 0; r < SRP_N_RING; r++)
+ {
+ srp_interface_ring_t * ir = &si->rings[r];
+
+ hi = vnet_get_hw_interface (vnm, ir->hw_if_index);
+ dc = vnet_get_device_class (vnm, hi->dev_class_index);
+
+ /* hw_if_index itself will be handled by caller. */
+ if (ir->hw_if_index != hw_if_index)
+ {
+ vnet_hw_interface_init_for_class (vnm, ir->hw_if_index,
+ new_hw_class_index,
+ to_srp ? si - sm->interface_pool : ~0);
+
+ if (dc->hw_class_change)
+ dc->hw_class_change (vnm, ir->hw_if_index, new_hw_class_index);
+ }
+ else
+ hi->hw_instance = to_srp ? si - sm->interface_pool : ~0;
+ }
+
+ if (si->hw_enable_function)
+ si->hw_enable_function (si, /* enable */ to_srp);
+}
+
+VNET_HW_INTERFACE_CLASS (srp_hw_interface_class) = {
+ .name = "SRP",
+ .format_address = format_ethernet_address,
+ .format_header = format_srp_header_with_length,
+ .format_device = format_srp_device,
+ .unformat_hw_address = unformat_ethernet_address,
+ .unformat_header = unformat_srp_header,
+ .build_rewrite = srp_build_rewrite,
+ .update_adjacency = ethernet_update_adjacency,
+ .is_valid_class_for_interface = srp_is_valid_class_for_interface,
+ .hw_class_change = srp_interface_hw_class_change,
+};
+
+static void serialize_srp_interface_config_msg (serialize_main_t * m, va_list * va)
+{
+ srp_interface_t * si = va_arg (*va, srp_interface_t *);
+ srp_main_t * sm = &srp_main;
+
+ ASSERT (! pool_is_free (sm->interface_pool, si));
+ serialize_integer (m, si - sm->interface_pool, sizeof (u32));
+ serialize (m, serialize_f64, si->config.wait_to_restore_idle_delay);
+ serialize (m, serialize_f64, si->config.ips_tx_interval);
+}
+
+static void unserialize_srp_interface_config_msg (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *);
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si;
+ u32 si_index;
+
+ unserialize_integer (m, &si_index, sizeof (u32));
+ si = pool_elt_at_index (sm->interface_pool, si_index);
+ unserialize (m, unserialize_f64, &si->config.wait_to_restore_idle_delay);
+ unserialize (m, unserialize_f64, &si->config.ips_tx_interval);
+}
+
+MC_SERIALIZE_MSG (srp_interface_config_msg, static) = {
+ .name = "vnet_srp_interface_config",
+ .serialize = serialize_srp_interface_config_msg,
+ .unserialize = unserialize_srp_interface_config_msg,
+};
+
+void srp_interface_get_interface_config (u32 hw_if_index, srp_interface_config_t * c)
+{
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+ ASSERT (si != 0);
+ c[0] = si->config;
+}
+
+void srp_interface_set_interface_config (u32 hw_if_index, srp_interface_config_t * c)
+{
+ srp_main_t * sm = &srp_main;
+ vlib_main_t * vm = sm->vlib_main;
+ srp_interface_t * si = srp_get_interface_from_vnet_hw_interface (hw_if_index);
+ ASSERT (si != 0);
+ if (memcmp (&si->config, &c[0], sizeof (c[0])))
+ {
+ si->config = c[0];
+ if (vm->mc_main)
+ mc_serialize (vm->mc_main, &srp_interface_config_msg, si);
+ }
+}
+
+#if DEBUG > 0
+
+#define VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT VNET_INTERFACE_TX_N_NEXT
+
+/* Echo packets back to srp input. */
+static uword
+simulated_srp_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, n_left_to_next, n_copy, * from, * to_next;
+ u32 next_index = VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT;
+ u32 i;
+ vlib_buffer_t * b;
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ n_copy = clib_min (n_left_from, n_left_to_next);
+
+ clib_memcpy (to_next, from, n_copy * sizeof (from[0]));
+ n_left_to_next -= n_copy;
+ n_left_from -= n_copy;
+ for (i = 0; i < n_copy; i++)
+ {
+ b = vlib_get_buffer (vm, from[i]);
+ /* TX interface will be fake eth; copy to RX for benefit of srp-input. */
+ b->sw_if_index[VLIB_RX] = b->sw_if_index[VLIB_TX];
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return n_left_from;
+}
+
+static u8 * format_simulated_srp_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "fake-srp%d", dev_instance);
+}
+
+VNET_DEVICE_CLASS (srp_simulated_device_class,static) = {
+ .name = "Simulated srp",
+ .format_device_name = format_simulated_srp_name,
+ .tx_function = simulated_srp_interface_tx,
+};
+
+static clib_error_t *
+create_simulated_srp_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t * hi;
+ static u32 instance;
+
+ if (! unformat_user (input, unformat_ethernet_address, &address))
+ {
+ memset (address, 0, sizeof (address));
+ address[0] = 0xde;
+ address[1] = 0xad;
+ address[5] = instance;
+ }
+
+ hw_if_index = vnet_register_interface (vnm,
+ srp_simulated_device_class.index,
+ instance++,
+ srp_hw_interface_class.index, 0);
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ srp_setup_node (vm, hi->output_node_index);
+
+ hi->min_packet_bytes = 40 + 16;
+
+ /* Standard default ethernet MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500;
+
+ vec_free (hi->hw_address);
+ vec_add (hi->hw_address, address, sizeof (address));
+
+ {
+ uword slot;
+
+ slot = vlib_node_add_named_next_with_slot
+ (vm, hi->tx_node_index,
+ "srp-input",
+ VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT);
+ ASSERT (slot == VNET_SIMULATED_SRP_TX_NEXT_SRP_INPUT);
+ }
+
+ return /* no error */ 0;
+}
+
+static VLIB_CLI_COMMAND (create_simulated_srp_interface_command) = {
+ .path = "srp create-interfaces",
+ .short_help = "Create simulated srp interface",
+ .function = create_simulated_srp_interfaces,
+};
+#endif
diff --git a/src/vnet/srp/node.c b/src/vnet/srp/node.c
new file mode 100644
index 00000000..897be254
--- /dev/null
+++ b/src/vnet/srp/node.c
@@ -0,0 +1,932 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.c: srp packet processing
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip_packet.h> /* for ip_csum_fold */
+#include <vnet/srp/srp.h>
+
+typedef struct {
+ u8 packet_data[32];
+} srp_input_trace_t;
+
+static u8 * format_srp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ srp_input_trace_t * t = va_arg (*va, srp_input_trace_t *);
+
+ s = format (s, "%U", format_srp_header, t->packet_data);
+
+ return s;
+}
+
+typedef enum {
+ SRP_INPUT_NEXT_ERROR,
+ SRP_INPUT_NEXT_ETHERNET_INPUT,
+ SRP_INPUT_NEXT_CONTROL,
+ SRP_INPUT_N_NEXT,
+} srp_input_next_t;
+
+typedef struct {
+ u8 next_index;
+ u8 buffer_advance;
+ u16 error;
+} srp_input_disposition_t;
+
+static srp_input_disposition_t srp_input_disposition_by_mode[8] = {
+ [SRP_MODE_reserved0] = {
+ .next_index = SRP_INPUT_NEXT_ERROR,
+ .error = SRP_ERROR_UNKNOWN_MODE,
+ },
+ [SRP_MODE_reserved1] = {
+ .next_index = SRP_INPUT_NEXT_ERROR,
+ .error = SRP_ERROR_UNKNOWN_MODE,
+ },
+ [SRP_MODE_reserved2] = {
+ .next_index = SRP_INPUT_NEXT_ERROR,
+ .error = SRP_ERROR_UNKNOWN_MODE,
+ },
+ [SRP_MODE_reserved3] = {
+ .next_index = SRP_INPUT_NEXT_ERROR,
+ .error = SRP_ERROR_UNKNOWN_MODE,
+ },
+ [SRP_MODE_keep_alive] = {
+ .next_index = SRP_INPUT_NEXT_ERROR,
+ .error = SRP_ERROR_KEEP_ALIVE_DROPPED,
+ },
+ [SRP_MODE_data] = {
+ .next_index = SRP_INPUT_NEXT_ETHERNET_INPUT,
+ .buffer_advance = sizeof (srp_header_t),
+ },
+ [SRP_MODE_control_pass_to_host] = {
+ .next_index = SRP_INPUT_NEXT_CONTROL,
+ },
+ [SRP_MODE_control_locally_buffered_for_host] = {
+ .next_index = SRP_INPUT_NEXT_CONTROL,
+ },
+};
+
+static uword
+srp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ srp_main_t * sm = &srp_main;
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (srp_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1, sw_if_index0, sw_if_index1;
+ vlib_buffer_t * b0, * b1;
+ u8 next0, next1, error0, error1;
+ srp_header_t * s0, * s1;
+ srp_input_disposition_t * d0, * d1;
+ vnet_hw_interface_t * hi0, * hi1;
+ srp_interface_t * si0, * si1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * b2, * b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (srp_header_t), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (srp_header_t), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ s0 = (void *) (b0->data + b0->current_data);
+ s1 = (void *) (b1->data + b1->current_data);
+
+ /* Data packets are always assigned to side A (outer ring) interface. */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1);
+
+ si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
+ si1 = pool_elt_at_index (sm->interface_pool, hi1->hw_instance);
+
+ sw_if_index0 = (s0->mode == SRP_MODE_data
+ ? si0->rings[SRP_RING_OUTER].sw_if_index
+ : sw_if_index0);
+ sw_if_index1 = (s1->mode == SRP_MODE_data
+ ? si1->rings[SRP_RING_OUTER].sw_if_index
+ : sw_if_index1);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+
+ d0 = srp_input_disposition_by_mode + s0->mode;
+ d1 = srp_input_disposition_by_mode + s1->mode;
+
+ next0 = d0->next_index;
+ next1 = d1->next_index;
+
+ error0 = d0->error;
+ error1 = d1->error;
+
+ vlib_buffer_advance (b0, d0->buffer_advance);
+ vlib_buffer_advance (b1, d1->buffer_advance);
+
+ b0->error = node->errors[error0];
+ b1->error = node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, sw_if_index0;
+ vlib_buffer_t * b0;
+ u8 next0, error0;
+ srp_header_t * s0;
+ srp_input_disposition_t * d0;
+ srp_interface_t * si0;
+ vnet_hw_interface_t * hi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ s0 = (void *) (b0->data + b0->current_data);
+
+ /* Data packets are always assigned to side A (outer ring) interface. */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
+
+ sw_if_index0 = (s0->mode == SRP_MODE_data
+ ? si0->rings[SRP_RING_OUTER].sw_if_index
+ : sw_if_index0);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ d0 = srp_input_disposition_by_mode + s0->mode;
+
+ next0 = d0->next_index;
+
+ error0 = d0->error;
+
+ vlib_buffer_advance (b0, d0->buffer_advance);
+
+ b0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char * srp_error_strings[] = {
+#define _(f,s) s,
+ foreach_srp_error
+#undef _
+};
+
+static vlib_node_registration_t srp_input_node = {
+ .function = srp_input,
+ .name = "srp-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = SRP_N_ERROR,
+ .error_strings = srp_error_strings,
+
+ .n_next_nodes = SRP_INPUT_N_NEXT,
+ .next_nodes = {
+ [SRP_INPUT_NEXT_ERROR] = "error-drop",
+ [SRP_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [SRP_INPUT_NEXT_CONTROL] = "srp-control",
+ },
+
+ .format_buffer = format_srp_header_with_length,
+ .format_trace = format_srp_input_trace,
+ .unformat_buffer = unformat_srp_header,
+};
+
+static uword
+srp_topology_packet (vlib_main_t * vm, u32 sw_if_index, u8 ** contents)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ srp_topology_header_t * t;
+ srp_topology_mac_binding_t * mb;
+ u32 nb, nmb;
+
+ t = (void *) *contents;
+
+ nb = clib_net_to_host_u16 (t->n_bytes_of_data_that_follows);
+ nmb = (nb - sizeof (t->originator_address)) / sizeof (mb[0]);
+ if (vec_len (*contents) < sizeof (t[0]) + nmb * sizeof (mb[0]))
+ return SRP_ERROR_TOPOLOGY_BAD_LENGTH;
+
+ /* Fill in our source MAC address. */
+ clib_memcpy (t->ethernet.src_address, hi->hw_address, vec_len (hi->hw_address));
+
+ /* Make space for our MAC binding. */
+ vec_resize (*contents, sizeof (srp_topology_mac_binding_t));
+ t = (void *) *contents;
+ t->n_bytes_of_data_that_follows = clib_host_to_net_u16 (nb + sizeof (mb[0]));
+
+ mb = t->bindings + nmb;
+
+ mb->flags =
+ ((t->srp.is_inner_ring ? SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING : 0)
+ | (/* is wrapped FIXME */ 0));
+ clib_memcpy (mb->address, hi->hw_address, vec_len (hi->hw_address));
+
+ t->control.checksum
+ = ~ip_csum_fold (ip_incremental_checksum (0, &t->control,
+ vec_len (*contents) - STRUCT_OFFSET_OF (srp_generic_control_header_t, control)));
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ vlib_buffer_t * b;
+ u32 * to_next = vlib_frame_vector_args (f);
+ u32 bi;
+
+ bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
+ /* buffer to append to */ 0,
+ *contents, vec_len (*contents));
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ return SRP_ERROR_CONTROL_PACKETS_PROCESSED;
+}
+
+typedef uword (srp_control_handler_function_t) (vlib_main_t * vm,
+ u32 sw_if_index,
+ u8 ** contents);
+
+static uword
+srp_control_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ vlib_node_runtime_t * error_node;
+ static u8 * contents;
+
+ error_node = vlib_node_get_runtime (vm, srp_input_node.index);
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (srp_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, l2_len0, l3_len0;
+ vlib_buffer_t * b0;
+ u8 next0, error0;
+ srp_generic_control_header_t * s0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ s0 = (void *) (b0->data + b0->current_data);
+ l2_len0 = vlib_buffer_length_in_chain (vm, b0);
+ l3_len0 = l2_len0 - STRUCT_OFFSET_OF (srp_generic_control_header_t, control);
+
+ error0 = SRP_ERROR_CONTROL_PACKETS_PROCESSED;
+
+ error0 = s0->control.version != 0 ? SRP_ERROR_CONTROL_VERSION_NON_ZERO : error0;
+
+ {
+ u16 save0 = s0->control.checksum;
+ u16 computed0;
+ s0->control.checksum = 0;
+ computed0 = ~ip_csum_fold (ip_incremental_checksum (0, &s0->control, l3_len0));
+ error0 = save0 != computed0 ? SRP_ERROR_CONTROL_BAD_CHECKSUM : error0;
+ }
+
+ if (error0 == SRP_ERROR_CONTROL_PACKETS_PROCESSED)
+ {
+ static srp_control_handler_function_t * t[SRP_N_CONTROL_PACKET_TYPE] = {
+ [SRP_CONTROL_PACKET_TYPE_topology] = srp_topology_packet,
+ };
+ srp_control_handler_function_t * f;
+
+ f = 0;
+ if (s0->control.type < ARRAY_LEN (t))
+ f = t[s0->control.type];
+
+ if (f)
+ {
+ vec_validate (contents, l2_len0 - 1);
+ vlib_buffer_contents (vm, bi0, contents);
+ error0 = f (vm, vnet_buffer (b0)->sw_if_index[VLIB_RX], &contents);
+ }
+ else
+ error0 = SRP_ERROR_UNKNOWN_CONTROL;
+ }
+
+ b0->error = error_node->errors[error0];
+ next0 = 0;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static vlib_node_registration_t srp_control_input_node = {
+ .function = srp_control_input,
+ .name = "srp-control",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+
+ .format_buffer = format_srp_header_with_length,
+ .format_trace = format_srp_input_trace,
+ .unformat_buffer = unformat_srp_header,
+};
+
+static u8 * format_srp_ips_request_type (u8 * s, va_list * args)
+{
+ u32 x = va_arg (*args, u32);
+ char * t = 0;
+ switch (x)
+ {
+#define _(f,n) case SRP_IPS_REQUEST_##f: t = #f; break;
+ foreach_srp_ips_request_type
+#undef _
+ default:
+ return format (s, "unknown 0x%x", x);
+ }
+ return format (s, "%U", format_c_identifier, t);
+}
+
+static u8 * format_srp_ips_status (u8 * s, va_list * args)
+{
+ u32 x = va_arg (*args, u32);
+ char * t = 0;
+ switch (x)
+ {
+#define _(f,n) case SRP_IPS_STATUS_##f: t = #f; break;
+ foreach_srp_ips_status
+#undef _
+ default:
+ return format (s, "unknown 0x%x", x);
+ }
+ return format (s, "%U", format_c_identifier, t);
+}
+
+static u8 * format_srp_ips_state (u8 * s, va_list * args)
+{
+ u32 x = va_arg (*args, u32);
+ char * t = 0;
+ switch (x)
+ {
+#define _(f) case SRP_IPS_STATE_##f: t = #f; break;
+ foreach_srp_ips_state
+#undef _
+ default:
+ return format (s, "unknown 0x%x", x);
+ }
+ return format (s, "%U", format_c_identifier, t);
+}
+
+static u8 * format_srp_ring (u8 * s, va_list * args)
+{
+ u32 ring = va_arg (*args, u32);
+ return format (s, "%s", ring == SRP_RING_INNER ? "inner" : "outer");
+}
+
+static u8 * format_srp_ips_header (u8 * s, va_list * args)
+{
+ srp_ips_header_t * h = va_arg (*args, srp_ips_header_t *);
+
+ s = format (s, "%U, %U, %U, %s-path",
+ format_srp_ips_request_type, h->request_type,
+ format_ethernet_address, h->originator_address,
+ format_srp_ips_status, h->status,
+ h->is_long_path ? "long" : "short");
+
+ return s;
+}
+
+static u8 * format_srp_interface (u8 * s, va_list * args)
+{
+ srp_interface_t * si = va_arg (*args, srp_interface_t *);
+ srp_interface_ring_t * ir;
+
+ s = format (s, "address %U, IPS state %U",
+ format_ethernet_address, si->my_address,
+ format_srp_ips_state, si->current_ips_state);
+ for (ir = si->rings; ir < si->rings + SRP_N_RING; ir++)
+ if (ir->rx_neighbor_address_valid)
+ s = format (s, ", %U neighbor %U",
+ format_srp_ring, ir->ring,
+ format_ethernet_address, ir->rx_neighbor_address);
+
+ return s;
+}
+
+u8 * format_srp_device (u8 * s, va_list * args)
+{
+ u32 hw_if_index = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+ vnet_main_t * vnm = vnet_get_main();
+ srp_main_t * sm = &srp_main;
+ vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index);
+ srp_interface_t * si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
+ return format (s, "%U", format_srp_interface, si);
+}
+
+always_inline srp_interface_t *
+srp_get_interface (u32 sw_if_index, srp_ring_type_t * ring)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ srp_main_t * sm = &srp_main;
+ vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ srp_interface_t * si;
+
+ ASSERT (hi->hw_class_index == srp_hw_interface_class.index);
+ si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
+
+ ASSERT (si->rings[SRP_RING_INNER].hw_if_index == hi->hw_if_index
+ || si->rings[SRP_RING_OUTER].hw_if_index == hi->hw_if_index);
+ if (ring)
+ *ring =
+ (hi->hw_if_index == si->rings[SRP_RING_INNER].hw_if_index
+ ? SRP_RING_INNER
+ : SRP_RING_OUTER);
+
+ return si;
+}
+
+static void init_ips_packet (srp_interface_t * si,
+ srp_ring_type_t tx_ring,
+ srp_ips_header_t * i)
+{
+ memset (i, 0, sizeof (i[0]));
+
+ i->srp.ttl = 1;
+ i->srp.is_inner_ring = tx_ring;
+ i->srp.priority = 7;
+ i->srp.mode = SRP_MODE_control_locally_buffered_for_host;
+ srp_header_compute_parity (&i->srp);
+
+ clib_memcpy (&i->ethernet.src_address, &si->my_address, sizeof (si->my_address));
+ i->ethernet.type = clib_host_to_net_u16 (ETHERNET_TYPE_SRP_CONTROL);
+
+ /* Checksum will be filled in later. */
+ i->control.version = 0;
+ i->control.type = SRP_CONTROL_PACKET_TYPE_ips;
+ i->control.ttl = 255;
+
+ clib_memcpy (&i->originator_address, &si->my_address, sizeof (si->my_address));
+}
+
+static void tx_ips_packet (srp_interface_t * si,
+ srp_ring_type_t tx_ring,
+ srp_ips_header_t * i)
+{
+ srp_main_t * sm = &srp_main;
+ vnet_main_t * vnm = vnet_get_main();
+ vlib_main_t * vm = sm->vlib_main;
+ vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si->rings[tx_ring].hw_if_index);
+ vlib_frame_t * f;
+ vlib_buffer_t * b;
+ u32 * to_next, bi;
+
+ if (! vnet_sw_interface_is_admin_up (vnm, hi->sw_if_index))
+ return;
+ if (hi->hw_class_index != srp_hw_interface_class.index)
+ return;
+
+ i->control.checksum
+ = ~ip_csum_fold (ip_incremental_checksum (0, &i->control,
+ sizeof (i[0]) - STRUCT_OFFSET_OF (srp_ips_header_t, control)));
+
+ bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
+ /* buffer to append to */ 0,
+ i, sizeof (i[0]));
+
+ /* FIXME trace. */
+ if (0)
+ clib_warning ("%U %U",
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index,
+ format_srp_ips_header, i);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = hi->sw_if_index;
+
+ f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+}
+
+static void serialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
+{
+ srp_interface_t * si = va_arg (*va, srp_interface_t *);
+ srp_main_t * sm = &srp_main;
+ int r;
+
+ ASSERT (! pool_is_free (sm->interface_pool, si));
+ serialize_integer (m, si - sm->interface_pool, sizeof (u32));
+ serialize_likely_small_unsigned_integer (m, si->current_ips_state);
+ for (r = 0; r < SRP_N_RING; r++)
+ {
+ srp_interface_ring_t * ir = &si->rings[r];
+ void * p;
+ serialize_likely_small_unsigned_integer (m, ir->rx_neighbor_address_valid);
+ if (ir->rx_neighbor_address_valid)
+ {
+ p = serialize_get (m, sizeof (ir->rx_neighbor_address));
+ clib_memcpy (p, ir->rx_neighbor_address, sizeof (ir->rx_neighbor_address));
+ }
+ serialize_likely_small_unsigned_integer (m, ir->waiting_to_restore);
+ if (ir->waiting_to_restore)
+ serialize (m, serialize_f64, ir->wait_to_restore_start_time);
+ }
+}
+
+static void unserialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
+{
+ CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *);
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si;
+ u32 si_index, r;
+
+ unserialize_integer (m, &si_index, sizeof (u32));
+ si = pool_elt_at_index (sm->interface_pool, si_index);
+ si->current_ips_state = unserialize_likely_small_unsigned_integer (m);
+ for (r = 0; r < SRP_N_RING; r++)
+ {
+ srp_interface_ring_t * ir = &si->rings[r];
+ void * p;
+ ir->rx_neighbor_address_valid = unserialize_likely_small_unsigned_integer (m);
+ if (ir->rx_neighbor_address_valid)
+ {
+ p = unserialize_get (m, sizeof (ir->rx_neighbor_address));
+ clib_memcpy (ir->rx_neighbor_address, p, sizeof (ir->rx_neighbor_address));
+ }
+ ir->waiting_to_restore = unserialize_likely_small_unsigned_integer (m);
+ if (ir->waiting_to_restore)
+ unserialize (m, unserialize_f64, &ir->wait_to_restore_start_time);
+ }
+}
+
+MC_SERIALIZE_MSG (srp_interface_state_msg, static) = {
+ .name = "vnet_srp_interface_state",
+ .serialize = serialize_srp_interface_state_msg,
+ .unserialize = unserialize_srp_interface_state_msg,
+};
+
+static int requests_switch (srp_ips_request_type_t r)
+{
+ static u8 t[16] = {
+ [SRP_IPS_REQUEST_forced_switch] = 1,
+ [SRP_IPS_REQUEST_manual_switch] = 1,
+ [SRP_IPS_REQUEST_signal_fail] = 1,
+ [SRP_IPS_REQUEST_signal_degrade] = 1,
+ };
+ return (int) r < ARRAY_LEN (t) ? t[r] : 0;
+}
+
+/* Called when an IPS control packet is received on given interface. */
+void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * h)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vlib_main_t * vm = srp_main.vlib_main;
+ srp_ring_type_t rx_ring;
+ srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
+ srp_interface_ring_t * ir = &si->rings[rx_ring];
+ int si_needs_broadcast = 0;
+
+ /* FIXME trace. */
+ if (0)
+ clib_warning ("%U %U %U",
+ format_time_interval, "h:m:s:u", vlib_time_now (vm),
+ format_vnet_sw_if_index_name, vnm, sw_if_index,
+ format_srp_ips_header, h);
+
+ /* Ignore self-generated IPS packets. */
+ if (! memcmp (h->originator_address, si->my_address, sizeof (h->originator_address)))
+ goto done;
+
+ /* Learn neighbor address from short path messages. */
+ if (! h->is_long_path)
+ {
+ if (ir->rx_neighbor_address_valid
+ && memcmp (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address)))
+ {
+ ASSERT (0);
+ }
+ ir->rx_neighbor_address_valid = 1;
+ clib_memcpy (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address));
+ }
+
+ switch (si->current_ips_state)
+ {
+ case SRP_IPS_STATE_idle:
+ /* Received {REQ,NEIGHBOR,W,S} in idle state: wrap. */
+ if (requests_switch (h->request_type)
+ && ! h->is_long_path
+ && h->status == SRP_IPS_STATUS_wrapped)
+ {
+ srp_ips_header_t to_tx[2];
+
+ si_needs_broadcast = 1;
+ si->current_ips_state = SRP_IPS_STATE_wrapped;
+ si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 1);
+ si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 1);
+
+ init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
+ to_tx[0].request_type = SRP_IPS_REQUEST_idle;
+ to_tx[0].status = SRP_IPS_STATUS_wrapped;
+ to_tx[0].is_long_path = 0;
+ tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
+
+ init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
+ to_tx[1].request_type = h->request_type;
+ to_tx[1].status = SRP_IPS_STATUS_wrapped;
+ to_tx[1].is_long_path = 1;
+ tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
+ }
+ break;
+
+ case SRP_IPS_STATE_wrapped:
+ if (! h->is_long_path
+ && h->request_type == SRP_IPS_REQUEST_idle
+ && h->status == SRP_IPS_STATUS_idle)
+ {
+ si_needs_broadcast = 1;
+ si->current_ips_state = SRP_IPS_STATE_idle;
+ si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 0);
+ si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 0);
+ }
+ break;
+
+ case SRP_IPS_STATE_pass_thru:
+ /* FIXME */
+ break;
+
+ default:
+ abort ();
+ break;
+ }
+
+ done:
+ if (vm->mc_main && si_needs_broadcast)
+ mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
+}
+
+/* Preform local IPS request on given interface. */
+void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ srp_main_t * sm = &srp_main;
+ vlib_main_t * vm = sm->vlib_main;
+ srp_ring_type_t rx_ring;
+ srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
+ srp_interface_ring_t * ir = &si->rings[rx_ring];
+ int si_needs_broadcast = 0;
+
+ if (request == SRP_IPS_REQUEST_wait_to_restore)
+ {
+ if (si->current_ips_state != SRP_IPS_STATE_wrapped)
+ return;
+ if (! ir->waiting_to_restore)
+ {
+ ir->wait_to_restore_start_time = vlib_time_now (sm->vlib_main);
+ ir->waiting_to_restore = 1;
+ si_needs_broadcast = 1;
+ }
+ }
+ else
+ {
+ /* FIXME handle local signal fail. */
+ si_needs_broadcast = ir->waiting_to_restore;
+ ir->wait_to_restore_start_time = 0;
+ ir->waiting_to_restore = 0;
+ }
+
+ /* FIXME trace. */
+ if (0)
+ clib_warning ("%U %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index,
+ format_srp_ips_request_type, request);
+
+ if (vm->mc_main && si_needs_broadcast)
+ mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
+}
+
+static void maybe_send_ips_message (srp_interface_t * si)
+{
+ srp_main_t * sm = &srp_main;
+ srp_ips_header_t to_tx[2];
+ srp_ring_type_t rx_ring = SRP_RING_OUTER;
+ srp_interface_ring_t * r0 = &si->rings[rx_ring ^ 0];
+ srp_interface_ring_t * r1 = &si->rings[rx_ring ^ 1];
+ f64 now = vlib_time_now (sm->vlib_main);
+
+ if (! si->ips_process_enable)
+ return;
+
+ if (si->current_ips_state == SRP_IPS_STATE_wrapped
+ && r0->waiting_to_restore
+ && r1->waiting_to_restore
+ && now >= r0->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay
+ && now >= r1->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay)
+ {
+ si->current_ips_state = SRP_IPS_STATE_idle;
+ r0->waiting_to_restore = r1->waiting_to_restore = 0;
+ r0->wait_to_restore_start_time = r1->wait_to_restore_start_time = 0;
+ }
+
+ if (si->current_ips_state != SRP_IPS_STATE_idle)
+ return;
+
+ init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
+ init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
+
+ if (si->current_ips_state == SRP_IPS_STATE_idle)
+ {
+ to_tx[0].request_type = to_tx[1].request_type = SRP_IPS_REQUEST_idle;
+ to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_idle;
+ to_tx[0].is_long_path = to_tx[1].is_long_path = 0;
+ }
+
+ else if (si->current_ips_state == SRP_IPS_STATE_wrapped)
+ {
+ to_tx[0].request_type =
+ (si->rings[rx_ring ^ 0].waiting_to_restore
+ ? SRP_IPS_REQUEST_wait_to_restore
+ : SRP_IPS_REQUEST_signal_fail);
+ to_tx[1].request_type =
+ (si->rings[rx_ring ^ 1].waiting_to_restore
+ ? SRP_IPS_REQUEST_wait_to_restore
+ : SRP_IPS_REQUEST_signal_fail);
+ to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_wrapped;
+ to_tx[0].is_long_path = 0;
+ to_tx[1].is_long_path = 1;
+ }
+
+ tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
+ tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
+}
+
+static uword
+srp_ips_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ srp_main_t * sm = &srp_main;
+ srp_interface_t * si;
+
+ while (1)
+ {
+ pool_foreach (si, sm->interface_pool, ({
+ maybe_send_ips_message (si);
+ }));
+ vlib_process_suspend (vm, 1.0);
+ }
+
+ return 0;
+}
+
+vlib_node_registration_t srp_ips_process_node = {
+ .function = srp_ips_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "srp-ips-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+static clib_error_t * srp_init (vlib_main_t * vm)
+{
+ srp_main_t * sm = &srp_main;
+
+ sm->default_data_ttl = 255;
+ sm->vlib_main = vm;
+ vlib_register_node (vm, &srp_ips_process_node);
+ vlib_register_node (vm, &srp_input_node);
+ vlib_register_node (vm, &srp_control_input_node);
+ srp_setup_node (vm, srp_input_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (srp_init);
diff --git a/src/vnet/srp/packet.h b/src/vnet/srp/packet.h
new file mode 100644
index 00000000..96dab648
--- /dev/null
+++ b/src/vnet/srp/packet.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * srp/packet.h: srp packet format.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_srp_packet_h
+#define included_srp_packet_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/bitops.h>
+#include <vnet/ethernet/packet.h>
+
+/* SRP version 2. */
+
+#define foreach_srp_mode \
+ _ (reserved0) \
+ _ (reserved1) \
+ _ (reserved2) \
+ _ (reserved3) \
+ _ (control_pass_to_host) \
+ _ (control_locally_buffered_for_host) \
+ _ (keep_alive) \
+ _ (data)
+
+typedef enum {
+#define _(f) SRP_MODE_##f,
+ foreach_srp_mode
+#undef _
+ SRP_N_MODE,
+} srp_mode_t;
+
+typedef union {
+ /* For computing parity bit. */
+ u16 as_u16;
+
+ struct {
+ u8 ttl;
+
+#if CLIB_ARCH_IS_BIG_ENDIAN
+ u8 is_inner_ring : 1;
+ u8 mode : 3;
+ u8 priority : 3;
+ u8 parity : 1;
+#endif
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 parity : 1;
+ u8 priority : 3;
+ u8 mode : 3;
+ u8 is_inner_ring : 1;
+#endif
+ };
+} srp_header_t;
+
+always_inline void
+srp_header_compute_parity (srp_header_t * h)
+{
+ h->parity = 0;
+ h->parity = count_set_bits (h->as_u16) ^ 1; /* odd parity */
+}
+
+typedef struct {
+ srp_header_t srp;
+ ethernet_header_t ethernet;
+} srp_and_ethernet_header_t;
+
+#define foreach_srp_control_packet_type \
+ _ (reserved) \
+ _ (topology) \
+ _ (ips)
+
+typedef enum {
+#define _(f) SRP_CONTROL_PACKET_TYPE_##f,
+ foreach_srp_control_packet_type
+#undef _
+ SRP_N_CONTROL_PACKET_TYPE,
+} srp_control_packet_type_t;
+
+typedef CLIB_PACKED (struct {
+ /* Set to 0. */
+ u8 version;
+
+ srp_control_packet_type_t type : 8;
+
+ /* IP4-like checksum of packet starting with start of control header. */
+ u16 checksum;
+
+ u16 ttl;
+}) srp_control_header_t;
+
+typedef struct {
+ srp_header_t srp;
+ ethernet_header_t ethernet;
+ srp_control_header_t control;
+} srp_generic_control_header_t;
+
+typedef struct {
+ u8 flags;
+#define SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING (1 << 6)
+#define SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_WRAPPED (1 << 5)
+
+ /* MAC address. */
+ u8 address[6];
+} srp_topology_mac_binding_t;
+
+typedef CLIB_PACKED (struct {
+ srp_header_t srp;
+ ethernet_header_t ethernet;
+ srp_control_header_t control;
+
+ /* Length in bytes of data that follows. */
+ u16 n_bytes_of_data_that_follows;
+
+ /* MAC address of originator of this topology request. */
+ u8 originator_address[6];
+
+ /* Bindings follow. */
+ srp_topology_mac_binding_t bindings[0];
+}) srp_topology_header_t;
+
+#define foreach_srp_ips_request_type \
+ _ (idle, 0x0) \
+ _ (wait_to_restore, 0x5) \
+ _ (manual_switch, 0x6) \
+ _ (signal_degrade, 0x8) \
+ _ (signal_fail, 0xb) \
+ _ (forced_switch, 0xd)
+
+typedef enum {
+#define _(f,n) SRP_IPS_REQUEST_##f = n,
+ foreach_srp_ips_request_type
+#undef _
+} srp_ips_request_type_t;
+
+#define foreach_srp_ips_status \
+ _ (idle, 0x0) \
+ _ (wrapped, 0x2)
+
+typedef enum {
+#define _(f,n) SRP_IPS_STATUS_##f = n,
+ foreach_srp_ips_status
+#undef _
+} srp_ips_status_t;
+
+typedef struct {
+ srp_header_t srp;
+ ethernet_header_t ethernet;
+ srp_control_header_t control;
+ u8 originator_address[6];
+
+ union {
+ u8 ips_octet;
+
+ struct {
+#if CLIB_ARCH_IS_BIG_ENDIAN
+ u8 request_type : 4;
+ u8 is_long_path : 1;
+ u8 status : 3;
+#endif
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ u8 status : 3;
+ u8 is_long_path : 1;
+ u8 request_type : 4;
+#endif
+ };
+ };
+
+ u8 reserved;
+} srp_ips_header_t;
+
+#endif /* included_srp_packet_h */
diff --git a/src/vnet/srp/pg.c b/src/vnet/srp/pg.c
new file mode 100644
index 00000000..54f1a3bb
--- /dev/null
+++ b/src/vnet/srp/pg.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * srp/pg.c: packet generator srp interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/srp/srp.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct {
+ pg_edit_t ttl;
+ pg_edit_t is_inner_ring;
+ pg_edit_t mode;
+ pg_edit_t priority;
+ pg_edit_t parity;
+ pg_edit_t type;
+ pg_edit_t src_address;
+ pg_edit_t dst_address;
+} pg_srp_header_t;
+
+static inline void
+pg_srp_header_init (pg_srp_header_t * e)
+{
+ pg_edit_init (&e->ttl, srp_and_ethernet_header_t, srp.ttl);
+ pg_edit_init_bitfield (&e->is_inner_ring, srp_and_ethernet_header_t,
+ srp.as_u16,
+ 7, 1);
+ pg_edit_init_bitfield (&e->mode, srp_and_ethernet_header_t,
+ srp.as_u16,
+ 4, 3);
+ pg_edit_init_bitfield (&e->priority, srp_and_ethernet_header_t,
+ srp.as_u16,
+ 1, 3);
+ pg_edit_init_bitfield (&e->parity, srp_and_ethernet_header_t,
+ srp.as_u16,
+ 0, 1);
+ pg_edit_init (&e->type, srp_and_ethernet_header_t, ethernet.type);
+ pg_edit_init (&e->src_address, srp_and_ethernet_header_t, ethernet.src_address);
+ pg_edit_init (&e->dst_address, srp_and_ethernet_header_t, ethernet.dst_address);
+}
+
+uword
+unformat_pg_srp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_srp_header_t * e;
+ u32 error, group_index;
+
+ e = pg_create_edit_group (s, sizeof (e[0]), sizeof (srp_header_t),
+ &group_index);
+ pg_srp_header_init (e);
+
+ error = 1;
+ if (! unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ethernet_type_net_byte_order, &e->type,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->src_address,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->dst_address))
+ goto done;
+
+ {
+ srp_header_t h;
+
+ h.as_u16 = 0;
+ h.mode = SRP_MODE_data;
+ h.ttl = 255;
+ h.parity = count_set_bits (h.as_u16) ^ 1;
+
+ pg_edit_set_fixed (&e->mode, h.mode);
+ pg_edit_set_fixed (&e->ttl, h.ttl);
+ pg_edit_set_fixed (&e->is_inner_ring, h.is_inner_ring);
+ pg_edit_set_fixed (&e->priority, h.priority);
+ pg_edit_set_fixed (&e->parity, h.parity);
+ }
+
+ error = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mode %U",
+ unformat_pg_edit,
+ unformat_pg_number, &e->mode))
+ ;
+ else if (unformat (input, "ttl %U",
+ unformat_pg_edit,
+ unformat_pg_number, &e->ttl))
+ ;
+ else if (unformat (input, "priority %U",
+ unformat_pg_edit,
+ unformat_pg_number, &e->priority))
+ ;
+ else
+ break;
+ }
+
+ {
+ ethernet_main_t * em = &ethernet_main;
+ ethernet_type_info_t * ti = 0;
+ pg_node_t * pg_node = 0;
+
+ if (e->type.type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) e->type.values[PG_EDIT_LO];
+ ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t));
+ if (ti && ti->node_index != ~0)
+ pg_node = pg_get_node (ti->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
diff --git a/src/vnet/srp/srp.h b/src/vnet/srp/srp.h
new file mode 100644
index 00000000..5288ebe4
--- /dev/null
+++ b/src/vnet/srp/srp.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * srp.h: types/functions for srp.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_srp_h
+#define included_srp_h
+
+#include <vnet/vnet.h>
+#include <vnet/srp/packet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/pg/pg.h>
+
+extern vnet_hw_interface_class_t srp_hw_interface_class;
+
+/* See RFC 2892. */
+#define foreach_srp_ips_state \
+ _ (idle) \
+ _ (pass_thru) \
+ _ (wrapped)
+
+typedef enum {
+#define _(f) SRP_IPS_STATE_##f,
+ foreach_srp_ips_state
+#undef _
+ SRP_N_IPS_STATE,
+} srp_ips_state_t;
+
+typedef enum {
+ SRP_RING_OUTER,
+ SRP_RING_INNER,
+ SRP_N_RING = 2,
+ SRP_SIDE_A = SRP_RING_OUTER, /* outer rx, inner tx */
+ SRP_SIDE_B = SRP_RING_INNER, /* inner rx, outer tx */
+ SRP_N_SIDE = 2,
+} srp_ring_type_t;
+
+typedef struct {
+ srp_ring_type_t ring;
+
+ /* Hardware interface for this ring/side. */
+ u32 hw_if_index;
+
+ /* Software interface corresponding to hardware interface. */
+ u32 sw_if_index;
+
+ /* Mac address of neighbor on RX fiber. */
+ u8 rx_neighbor_address[6];
+
+ u8 rx_neighbor_address_valid;
+
+ /* True if we are waiting to restore signal. */
+ u8 waiting_to_restore;
+
+ /* Time stamp when signal became valid. */
+ f64 wait_to_restore_start_time;
+} srp_interface_ring_t;
+
+struct srp_interface_t;
+typedef void (srp_hw_wrap_function_t) (u32 hw_if_index, u32 wrap_enable);
+typedef void (srp_hw_enable_function_t) (struct srp_interface_t * si, u32 wrap_enable);
+
+typedef struct {
+ /* Delay between wait to restore event and entering idle state in seconds. */
+ f64 wait_to_restore_idle_delay;
+
+ /* Number of seconds between sending ips messages to neighbors. */
+ f64 ips_tx_interval;
+} srp_interface_config_t;
+
+typedef struct srp_interface_t {
+ /* Current IPS state. */
+ srp_ips_state_t current_ips_state;
+
+ /* Address for this interface. */
+ u8 my_address[6];
+
+ /* Enable IPS process handling for this interface. */
+ u8 ips_process_enable;
+
+ srp_interface_ring_t rings[SRP_N_RING];
+
+ /* Configurable parameters. */
+ srp_interface_config_t config;
+
+ srp_hw_wrap_function_t * hw_wrap_function;
+
+ srp_hw_enable_function_t * hw_enable_function;
+} srp_interface_t;
+
+typedef struct {
+ vlib_main_t * vlib_main;
+
+ /* Pool of SRP interfaces. */
+ srp_interface_t * interface_pool;
+
+ uword * interface_index_by_hw_if_index;
+
+ /* TTL to use for outgoing data packets. */
+ u32 default_data_ttl;
+
+ vlib_one_time_waiting_process_t * srp_register_interface_waiting_process_pool;
+
+ uword * srp_register_interface_waiting_process_pool_index_by_hw_if_index;
+} srp_main_t;
+
+/* Registers sides A/B hardware interface as being SRP capable. */
+void srp_register_interface (u32 * hw_if_indices);
+
+/* Enable sending IPS messages for interface implied by given vlib hardware interface. */
+void srp_interface_enable_ips (u32 hw_if_index);
+
+/* Set function to wrap hardware side of SRP interface. */
+void srp_interface_set_hw_wrap_function (u32 hw_if_index, srp_hw_wrap_function_t * f);
+
+void srp_interface_set_hw_enable_function (u32 hw_if_index, srp_hw_enable_function_t * f);
+
+extern vlib_node_registration_t srp_ips_process_node;
+
+/* Called when an IPS control packet is received on given interface. */
+void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * ips_packet);
+
+/* Preform local IPS request on given interface. */
+void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request);
+
+always_inline void
+srp_ips_link_change (u32 sw_if_index, u32 link_is_up)
+{
+ srp_ips_local_request (sw_if_index,
+ link_is_up
+ ? SRP_IPS_REQUEST_wait_to_restore
+ : SRP_IPS_REQUEST_signal_fail);
+}
+
+void srp_interface_get_interface_config (u32 hw_if_index, srp_interface_config_t * c);
+void srp_interface_set_interface_config (u32 hw_if_index, srp_interface_config_t * c);
+
+srp_main_t srp_main;
+
+always_inline srp_interface_t *
+srp_get_interface_from_vnet_hw_interface (u32 hw_if_index)
+{
+ srp_main_t * sm = &srp_main;
+ uword * p = hash_get (sm->interface_index_by_hw_if_index, hw_if_index);
+ return p ? pool_elt_at_index (sm->interface_pool, p[0]) : 0;
+}
+
+u8 * format_srp_header (u8 * s, va_list * args);
+u8 * format_srp_header_with_length (u8 * s, va_list * args);
+u8 * format_srp_device (u8 * s, va_list * args);
+
+/* Parse srp header. */
+uword
+unformat_srp_header (unformat_input_t * input, va_list * args);
+
+uword unformat_pg_srp_header (unformat_input_t * input, va_list * args);
+
+always_inline void
+srp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ pg_node_t * pn = pg_get_node (node_index);
+ n->format_buffer = format_srp_header_with_length;
+ n->unformat_buffer = unformat_srp_header;
+ pn->unformat_edit = unformat_pg_srp_header;
+}
+
+#define foreach_srp_error \
+ _ (NONE, "no error") \
+ _ (UNKNOWN_MODE, "unknown mode in SRP header") \
+ _ (KEEP_ALIVE_DROPPED, "v1 keep alive mode in SRP header") \
+ _ (CONTROL_PACKETS_PROCESSED, "control packets processed") \
+ _ (IPS_PACKETS_PROCESSED, "IPS packets processed") \
+ _ (UNKNOWN_CONTROL, "unknown control packet") \
+ _ (CONTROL_VERSION_NON_ZERO, "control packet with non-zero version") \
+ _ (CONTROL_BAD_CHECKSUM, "control packet with bad checksum") \
+ _ (TOPOLOGY_BAD_LENGTH, "topology packet with bad length")
+
+typedef enum {
+#define _(n,s) SRP_ERROR_##n,
+ foreach_srp_error
+#undef _
+ SRP_N_ERROR,
+} srp_error_t;
+
+serialize_function_t serialize_srp_main, unserialize_srp_main;
+
+#endif /* included_srp_h */
diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox
new file mode 100755
index 00000000..3f539a58
--- /dev/null
+++ b/src/vnet/srv6/dir.dox
@@ -0,0 +1,25 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing code
+
+ An implementation of Segment Routing as per:
+ draft-ietf-6man-segment-routing-header-05
+
+ @see ietf_draft_05.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/srv6/ietf_draft_05.txt b/src/vnet/srv6/ietf_draft_05.txt
new file mode 100755
index 00000000..e9bff04f
--- /dev/null
+++ b/src/vnet/srv6/ietf_draft_05.txt
@@ -0,0 +1,1564 @@
+Network Working Group S. Previdi, Ed.
+Internet-Draft C. Filsfils
+Intended status: Standards Track Cisco Systems, Inc.
+Expires: August 5, 2017 B. Field
+ Comcast
+ I. Leung
+ Rogers Communications
+ J. Linkova
+ Google
+ E. Aries
+ Facebook
+ T. Kosugi
+ NTT
+ E. Vyncke
+ Cisco Systems, Inc.
+ D. Lebrun
+ Universite Catholique de Louvain
+ February 1, 2017
+
+
+ IPv6 Segment Routing Header (SRH)
+ draft-ietf-6man-segment-routing-header-05
+
+Abstract
+
+ Segment Routing (SR) allows a node to steer a packet through a
+ controlled set of instructions, called segments, by prepending an SR
+ header to the packet. A segment can represent any instruction,
+ topological or service-based. SR allows to enforce a flow through
+ any path (topological, or application/service based) while
+ maintaining per-flow state only at the ingress node to the SR domain.
+
+ Segment Routing can be applied to the IPv6 data plane with the
+ addition of a new type of Routing Extension Header. This draft
+ describes the Segment Routing Extension Header Type and how it is
+ used by SR capable nodes.
+
+Requirements Language
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [RFC2119].
+
+Status of This Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 1]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+ This Internet-Draft will expire on August 5, 2017.
+
+Copyright Notice
+
+ Copyright (c) 2017 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+Table of Contents
+
+ 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3
+ 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3
+ 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4
+ 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4
+ 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5
+ 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6
+ 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7
+ 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9
+ 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10
+ 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11
+ 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11
+ 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12
+ 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13
+ 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14
+ 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14
+ 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14
+ 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15
+ 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16
+ 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 2]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17
+ 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17
+ 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17
+ 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18
+ 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18
+ 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18
+ 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19
+ 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20
+ 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21
+ 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21
+ 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22
+ 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22
+ 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22
+ 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23
+ 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23
+ 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24
+ 7. Manageability Considerations . . . . . . . . . . . . . . . . 24
+ 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24
+ 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24
+ 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25
+ 10.1. Normative References . . . . . . . . . . . . . . . . . . 25
+ 10.2. Informative References . . . . . . . . . . . . . . . . . 25
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27
+
+1. Segment Routing Documents
+
+ Segment Routing terminology is defined in
+ [I-D.ietf-spring-segment-routing].
+
+ Segment Routing use cases are described in [RFC7855] and
+ [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing protocol extensions are defined in
+ [I-D.ietf-isis-segment-routing-extensions], and
+ [I-D.ietf-ospf-ospfv3-segment-routing-extensions].
+
+2. Introduction
+
+ Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing],
+ allows a node to steer a packet through a controlled set of
+ instructions, called segments, by prepending an SR header to the
+ packet. A segment can represent any instruction, topological or
+ service-based. SR allows to enforce a flow through any path
+ (topological or service/application based) while maintaining per-flow
+ state only at the ingress node to the SR domain. Segments can be
+ derived from different components: IGP, BGP, Services, Contexts,
+ Locators, etc. The list of segment forming the path is called the
+ Segment List and is encoded in the packet header.
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 3]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ SR allows the use of strict and loose source based routing paradigms
+ without requiring any additional signaling protocols in the
+ infrastructure hence delivering an excellent scalability property.
+
+ The source based routing model described in
+ [I-D.ietf-spring-segment-routing] is inherited from the ones proposed
+ by [RFC1940] and [RFC2460]. The source based routing model offers
+ the support for explicit routing capability.
+
+2.1. Data Planes supporting Segment Routing
+
+ Segment Routing (SR), can be instantiated over MPLS
+ ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document
+ defines its instantiation over the IPv6 data-plane based on the use-
+ cases defined in [I-D.ietf-spring-ipv6-use-cases].
+
+ This document defines a new type of Routing Header (originally
+ defined in [RFC2460]) called the Segment Routing Header (SRH) in
+ order to convey the Segment List in the packet header as defined in
+ [I-D.ietf-spring-segment-routing]. Mechanisms through which segment
+ are known and advertised are outside the scope of this document.
+
+ A segment is materialized by an IPv6 address. A segment identifies a
+ topological instruction or a service instruction. A segment can be
+ either:
+
+ o global: a global segment represents an instruction supported by
+ all nodes in the SR domain and it is instantiated through an IPv6
+ address globally known in the SR domain.
+
+ o local: a local segment represents an instruction supported only by
+ the node who originates it and it is instantiated through an IPv6
+ address that is known only by the local node.
+
+2.2. Segment Routing (SR) Domain
+
+ We define the concept of the Segment Routing Domain (SR Domain) as
+ the set of nodes participating into the source based routing model.
+ These nodes may be connected to the same physical infrastructure
+ (e.g.: a Service Provider's network) as well as nodes remotely
+ connected to each other (e.g.: an enterprise VPN or an overlay).
+
+ A non-exhaustive list of examples of SR Domains is:
+
+ o The network of an operator, service provider, content provider,
+ enterprise including nodes, links and Autonomous Systems.
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 4]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o A set of nodes connected as an overlay over one or more transit
+ providers. The overlay nodes exchange SR-enabled traffic with
+ segments belonging solely to the overlay routers (the SR domain).
+ None of the segments in the SR-enabled packets exchanged by the
+ overlay belong to the transit networks
+
+ The source based routing model through its instantiation of the
+ Segment Routing Header (SRH) defined in this document equally applies
+ to all the above examples.
+
+ It is assumed in this document that the SRH is added to the packet by
+ its source, consistently with the source routing model defined in
+ [RFC2460]. For example:
+
+ o At the node originating the packet (host, server).
+
+ o At the ingress node of an SR domain where the ingress node
+ receives an IPv6 packet and encapsulates it into an outer IPv6
+ header followed by a Segment Routing header.
+
+2.2.1. SR Domain in a Service Provider Network
+
+ The following figure illustrates an SR domain consisting of an
+ operator's network infrastructure.
+
+ (-------------------------- Operator 1 -----------------------)
+ ( )
+ ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) )
+ ( ( ) ( ) ( ) )
+ A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1
+ ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) )
+ A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2
+ ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) )
+ ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) )
+ A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3
+ ( ( ) ( ) ( ) )
+ ( (--------------) (------------------) (---------------) )
+ ( )
+ (-------------------------------------------------------------)
+
+ Figure 1: Service Provider SR Domain
+
+ Figure 1 describes an operator network including several ASes and
+ delivering connectivity between endpoints. In this scenario, Segment
+ Routing is used within the operator networks and across the ASes
+ boundaries (all being under the control of the same operator). In
+ this case segment routing can be used in order to address use cases
+ such as end-to-end traffic engineering, fast re-route, egress peer
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 5]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ engineering, data-center traffic engineering as described in
+ [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and
+ [I-D.ietf-spring-resiliency-use-cases].
+
+ Typically, an IPv6 packet received at ingress (i.e.: from outside the
+ SR domain), is classified according to network operator policies and
+ such classification results into an outer header with an SRH applied
+ to the incoming packet. The SRH contains the list of segment
+ representing the path the packet must take inside the SR domain.
+ Thus, the SA of the packet is the ingress node, the DA (due to SRH
+ procedures described in Section 4) is set as the first segment of the
+ path and the last segment of the path is the egress node of the SR
+ domain.
+
+ The path may include intra-AS as well as inter-AS segments. It has
+ to be noted that all nodes within the SR domain are under control of
+ the same administration. When the packet reaches the egress point of
+ the SR domain, the outer header and its SRH are removed so that the
+ destination of the packet is unaware of the SR domain the packet has
+ traversed.
+
+ The outer header with the SRH is no different from any other
+ tunneling encapsulation mechanism and allows a network operator to
+ implement traffic engineering mechanisms so to efficiently steer
+ traffic across his infrastructure.
+
+2.2.2. SR Domain in a Overlay Network
+
+ The following figure illustrates an SR domain consisting of an
+ overlay network over multiple operator's networks.
+
+ (--Operator 1---) (-----Operator 2-----) (--Operator 3---)
+ ( ) ( ) ( )
+ A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1
+ ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ )
+ A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2
+ ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | )
+ ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| )
+ A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3
+ ( ) ( | | | ) ( )
+ (---------------) (--|----|---------|--) (---------------)
+ | | |
+ B1 B2 B3
+
+ Figure 2: Overlay SR Domain
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 6]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Figure 2 describes an overlay consisting of nodes connected to three
+ different network operators and forming a single overlay network
+ where Segment routing packets are exchanged.
+
+ The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3.
+ These nodes are connected to their respective network operator and
+ form an overlay network.
+
+ Each node may originate packets with an SRH which contains, in the
+ segment list of the SRH or in the DA, segments identifying other
+ overlay nodes. This implies that packets with an SRH may traverse
+ operator's networks but, obviously, these SRHs cannot contain an
+ address/segment of the transit operators 1, 2 and 3. The SRH
+ originated by the overlay can only contain address/segment under the
+ administration of the overlay (e.g. address/segments supported by A1,
+ A2, A3, B1, B2, B3, C1,C2 or C3).
+
+ In this model, the operator network nodes are transit nodes and,
+ according to [RFC2460], MUST NOT inspect the routing extension header
+ since they are not the DA of the packet.
+
+ It is a common practice in operators networks to filter out, at
+ ingress, any packet whose DA is the address of an internal node and
+ it is also possible that an operator would filter out any packet
+ destined to an internal address and having an extension header in it.
+
+ This common practice does not impact the SR-enabled traffic between
+ the overlay nodes as the intermediate transit networks never see a
+ destination address belonging to their infrastructure. These SR-
+ enabled overlay packets will thus never be filtered by the transit
+ operators.
+
+ In all cases, transit packets (i.e.: packets whose DA is outside the
+ domain of the operator's network) will be forwarded accordingly
+ without introducing any security concern in the operator's network.
+ This is similar to tunneled packets.
+
+3. Segment Routing Extension Header (SRH)
+
+ A new type of the Routing Header (originally defined in [RFC2460]) is
+ defined: the Segment Routing Header (SRH) which has a new Routing
+ Type, (suggested value 4) to be assigned by IANA.
+
+ The Segment Routing Header (SRH) is defined as follows:
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 7]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | First Segment | Flags | RESERVED |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[0] (128 bits IPv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ ...
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[n] (128 bits IPv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // //
+ // Optional Type Length Value objects (variable) //
+ // //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Next Header: 8-bit selector. Identifies the type of header
+ immediately following the SRH.
+
+ o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ header in 8-octet units, not including the first 8 octets.
+
+ o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+
+ o Segments Left. Defined in [RFC2460], it contains the index, in
+ the Segment List, of the next segment to inspect. Segments Left
+ is decremented at each segment.
+
+ o First Segment: contains the index, in the Segment List, of the
+ first segment of the path which is in fact the last element of the
+ Segment List.
+
+ o Flags: 8 bits of flags. Following flags are defined:
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 8]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3 4 5 6 7
+ +-+-+-+-+-+-+-+-+
+ |U|P|O|A|H| U |
+ +-+-+-+-+-+-+-+-+
+
+ U: Unused and for future use. SHOULD be unset on transmission
+ and MUST be ignored on receipt.
+
+ P-flag: Protected flag. Set when the packet has been rerouted
+ through FRR mechanism by an SR endpoint node.
+
+ O-flag: OAM flag. When set, it indicates that this packet is
+ an operations and management (OAM) packet.
+
+ A-flag: Alert flag. If present, it means important Type Length
+ Value (TLV) objects are present. See Section 3.1 for details
+ on TLVs objects.
+
+ H-flag: HMAC flag. If set, the HMAC TLV is present and is
+ encoded as the last TLV of the SRH. In other words, the last
+ 36 octets of the SRH represent the HMAC information. See
+ Section 3.1.5 for details on the HMAC TLV.
+
+ o RESERVED: SHOULD be unset on transmission and MUST be ignored on
+ receipt.
+
+ o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ segment in the Segment List. The Segment List is encoded starting
+ from the last segment of the path. I.e., the first element of the
+ segment list (Segment List [0]) contains the last segment of the
+ path while the last segment of the Segment List (Segment List[n])
+ contains the first segment of the path. The index contained in
+ "Segments Left" identifies the current active segment.
+
+ o Type Length Value (TLV) are described in Section 3.1.
+
+3.1. SRH TLVs
+
+ This section defines TLVs of the Segment Routing Header.
+
+ Type Length Value (TLV) contain optional information that may be used
+ by the node identified in the DA of the packet. It has to be noted
+ that the information carried in the TLVs is not intended to be used
+ by the routing layer. Typically, TLVs carry information that is
+ consumed by other components (e.g.: OAM) than the routing function.
+
+ Each TLV has its own length, format and semantic. The code-point
+ allocated (by IANA) to each TLV defines both the format and the
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 9]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ semantic of the information carried in the TLV. Multiple TLVs may be
+ encoded in the same SRH.
+
+ The "Length" field of the TLV is primarily used to skip the TLV while
+ inspecting the SRH in case the node doesn't support or recognize the
+ TLV codepoint. The "Length" defines the TLV length in octets and not
+ including the "Type" and "Length" fields.
+
+ The primary scope of TLVs is to give the receiver of the packet
+ information related to the source routed path (e.g.: where the packet
+ entered in the SR domain and where it is expected to exit).
+
+ Additional TLVs may be defined in the future.
+
+3.1.1. Ingress Node TLV
+
+ The Ingress Node TLV is optional and identifies the node this packet
+ traversed when entered the SR domain. The Ingress Node TLV has
+ following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Ingress Node (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 1).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Ingress Node: 128 bits. Defines the node where the packet is
+ expected to enter the SR domain. In the encapsulation case
+ described in Section 2.2.1, this information corresponds to the SA
+ of the encapsulating header.
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 10]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+3.1.2. Egress Node TLV
+
+ The Egress Node TLV is optional and identifies the node this packet
+ is expected to traverse when exiting the SR domain. The Egress Node
+ TLV has following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Egress Node (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 2).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Egress Node: 128 bits. Defines the node where the packet is
+ expected to exit the SR domain. In the encapsulation case
+ described in Section 2.2.1, this information corresponds to the
+ last segment of the SRH in the encapsulating header.
+
+3.1.3. Opaque Container TLV
+
+ The Opaque Container TLV is optional and has the following format:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 11]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Opaque Container (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 3).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Opaque Container: 128 bits of opaque data not relevant for the
+ routing layer. Typically, this information is consumed by a non-
+ routing component of the node receiving the packet (i.e.: the node
+ in the DA).
+
+3.1.4. Padding TLV
+
+ The Padding TLV is optional and with the purpose of aligning the SRH
+ on a 8 octet boundary. The Padding TLV has the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | Padding (variable) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // Padding (variable) //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 4).
+
+ o Length: 1 to 7
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 12]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o Padding: from 1 to 7 octets of padding. Padding bits have no
+ semantic. They SHOULD be set to 0 on transmission and MUST be
+ ignored on receipt.
+
+ The following applies to the Padding TLV:
+
+ o Padding TLV is optional and MAY only appear once in the SRH. If
+ present, it MUST have a length between 1 and 7 octets.
+
+ o The Padding TLV is used in order to align the SRH total length on
+ the 8 octet boundary.
+
+ o When present, the Padding TLV MUST appear as the last TLV before
+ the HMAC TLV (if HMAC TLV is present).
+
+ o When present, the Padding TLV MUST have a length from 1 to 7 in
+ order to align the SRH total lenght on a 8-octet boundary.
+
+ o When a router inspecting the SRH encounters the Padding TLV, it
+ MUST assume that no other TLV (other than the HMAC) follow the
+ Padding TLV.
+
+3.1.5. HMAC TLV
+
+ HMAC TLV is optional and contains the HMAC information. The HMAC TLV
+ has the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | HMAC Key ID (4 octets) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | //
+ | HMAC (32 octets) //
+ | //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 5).
+
+ o Length: 38.
+
+ o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 13]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o HMAC Key ID: 4 octets.
+
+ o HMAC: 32 octets.
+
+ o HMAC and HMAC Key ID usage is described in Section 5
+
+ The Following applies to the HMAC TLV:
+
+ o When present, the HMAC TLV MUST be encoded as the last TLV of the
+ SRH.
+
+ o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set.
+
+ o When the H-flag is set in the SRH, the router inspecting the SRH
+ MUST find the HMAC TLV in the last 38 octets of the SRH.
+
+3.2. SRH and RFC2460 behavior
+
+ The SRH being a new type of the Routing Header, it also has the same
+ properties:
+
+ SHOULD only appear once in the packet.
+
+ Only the router whose address is in the DA field of the packet
+ header MUST inspect the SRH.
+
+ Therefore, Segment Routing in IPv6 networks implies that the segment
+ identifier (i.e.: the IPv6 address of the segment) is moved into the
+ DA of the packet.
+
+ The DA of the packet changes at each segment termination/completion
+ and therefore the final DA of the packet MUST be encoded as the last
+ segment of the path.
+
+4. SRH Procedures
+
+ In this section we describe the different procedures on the SRH.
+
+4.1. Source SR Node
+
+ A Source SR Node can be any node originating an IPv6 packet with its
+ IPv6 and Segment Routing Headers. This include either:
+
+ A host originating an IPv6 packet.
+
+ An SR domain ingress router encapsulating a received IPv6 packet
+ into an outer IPv6 header followed by an SRH.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 14]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ The mechanism through which a Segment List is derived is outside of
+ the scope of this document. As an example, the Segment List may be
+ obtained through:
+
+ Local path computation.
+
+ Local configuration.
+
+ Interaction with a centralized controller delivering the path.
+
+ Any other mechanism.
+
+ The following are the steps of the creation of the SRH:
+
+ Next Header and Hdr Ext Len fields are set according to [RFC2460].
+
+ Routing Type field is set as TBD (to be allocated by IANA,
+ suggested value 4).
+
+ The Segment List is built with the FIRST segment of the path
+ encoded in the LAST element of the Segment List. Subsequent
+ segments are encoded on top of the first segment. Finally, the
+ LAST segment of the path is encoded in the FIRST element of the
+ Segment List. In other words, the Segment List is encoded in the
+ reverse order of the path.
+
+ The final DA of the packet is encoded as the last segment of the
+ path (encoded in the first element of the Segment List).
+
+ The DA of the packet is set with the value of the first segment
+ (found in the last element of the segment list).
+
+ The Segments Left field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The First Segment field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The packet is sent out towards the first segment (i.e.:
+ represented in the packet DA).
+
+ HMAC TLV may be set according to Section 5.
+
+4.2. Transit Node
+
+ According to [RFC2460], the only node who is allowed to inspect the
+ Routing Extension Header (and therefore the SRH), is the node
+ corresponding to the DA of the packet. Any other transit node MUST
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 15]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ NOT inspect the underneath routing header and MUST forward the packet
+ towards the DA and according to the IPv6 routing table.
+
+ In the example case described in Section 2.2.2, when SR capable nodes
+ are connected through an overlay spanning multiple third-party
+ infrastructure, it is safe to send SRH packets (i.e.: packet having a
+ Segment Routing Header) between each other overlay/SR-capable nodes
+ as long as the segment list does not include any of the transit
+ provider nodes. In addition, as a generic security measure, any
+ service provider will block any packet destined to one of its
+ internal routers, especially if these packets have an extended header
+ in it.
+
+4.3. SR Segment Endpoint Node
+
+ The SR segment endpoint node is the node whose address is in the DA.
+ The segment endpoint node inspects the SRH and does:
+
+ 1. IF DA = myself (segment endpoint)
+ 2. IF Segments Left > 0 THEN
+ decrement Segments Left
+ update DA with Segment List[Segments Left]
+ 3. ELSE continue IPv6 processing of the packet
+ End of processing.
+ 4. Forward the packet out
+
+5. Security Considerations
+
+ This section analyzes the security threat model, the security issues
+ and proposed solutions related to the new Segment Routing Header.
+
+ The Segment Routing Header (SRH) is simply another type of the
+ routing header as described in RFC 2460 [RFC2460] and is:
+
+ o Added by an SR edge router when entering the segment routing
+ domain or by the originating host itself. The source host can
+ even be outside the SR domain;
+
+ o inspected and acted upon when reaching the destination address of
+ the IP header per RFC 2460 [RFC2460].
+
+ Per RFC2460 [RFC2460], routers on the path that simply forward an
+ IPv6 packet (i.e. the IPv6 destination address is none of theirs)
+ will never inspect and process the content of the SRH. Routers whose
+ one interface IPv6 address equals the destination address field of
+ the IPv6 packet MUST parse the SRH and, if supported and if the local
+ configuration allows it, MUST act accordingly to the SRH content.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 16]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ According to RFC2460 [RFC2460], the default behavior of a non SR-
+ capable router upon receipt of an IPv6 packet with SRH destined to an
+ address of its, is to:
+
+ o ignore the SRH completely if the Segment Left field is 0 and
+ proceed to process the next header in the IPv6 packet;
+
+ o discard the IPv6 packet if Segment Left field is greater than 0,
+ it MAY send a Parameter Problem ICMP message back to the Source
+ Address.
+
+5.1. Threat model
+
+5.1.1. Source routing threats
+
+ Using an SRH is similar to source routing, therefore it has some
+ well-known security issues as described in RFC4942 [RFC4942] section
+ 2.1.1 and RFC5095 [RFC5095]:
+
+ o amplification attacks: where a packet could be forged in such a
+ way to cause looping among a set of SR-enabled routers causing
+ unnecessary traffic, hence a Denial of Service (DoS) against
+ bandwidth;
+
+ o reflection attack: where a hacker could force an intermediate node
+ to appear as the immediate attacker, hence hiding the real
+ attacker from naive forensic;
+
+ o bypass attack: where an intermediate node could be used as a
+ stepping stone (for example in a De-Militarized Zone) to attack
+ another host (for example in the datacenter or any back-end
+ server).
+
+5.1.2. Applicability of RFC 5095 to SRH
+
+ First of all, the reader must remember this specific part of section
+ 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates
+ benign RH0 use-cases; however, such applications may be facilitated
+ by future Routing Header specifications.". In short, it is not
+ forbidden to create new secure type of Routing Header; for example,
+ RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a
+ specific application confined in a single network.
+
+ In the segment routing architecture described in
+ [I-D.ietf-spring-segment-routing] there are basically two kinds of
+ nodes (routers and hosts):
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 17]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o nodes within the SR domain, which is within one single
+ administrative domain, i.e., where all nodes are trusted anyway
+ else the damage caused by those nodes could be worse than
+ amplification attacks: traffic interception, man-in-the-middle
+ attacks, more server DoS by dropping packets, and so on.
+
+ o nodes outside of the SR domain, which is outside of the
+ administrative segment routing domain hence they cannot be trusted
+ because there is no physical security for those nodes, i.e., they
+ can be replaced by hostile nodes or can be coerced in wrong
+ behaviors.
+
+ The main use case for SR consists of the single administrative domain
+ where only trusted nodes with SR enabled and configured participate
+ in SR: this is the same model as in RFC6554 [RFC6554]. All non-
+ trusted nodes do not participate as either SR processing is not
+ enabled by default or because they only process SRH from nodes within
+ their domain.
+
+ Moreover, all SR nodes ignore SRH created by outsiders based on
+ topology information (received on a peering or internal interface) or
+ on presence and validity of the HMAC field. Therefore, if
+ intermediate nodes ONLY act on valid and authorized SRH (such as
+ within a single administrative domain), then there is no security
+ threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are
+ not applicable.
+
+5.1.3. Service stealing threat
+
+ Segment routing is used for added value services, there is also a
+ need to prevent non-participating nodes to use those services; this
+ is called 'service stealing prevention'.
+
+5.1.4. Topology disclosure
+
+ The SRH may also contains IPv6 addresses of some intermediate SR-
+ nodes in the path towards the destination, this obviously reveals
+ those addresses to the potentially hostile attackers if those
+ attackers are able to intercept packets containing SRH. On the other
+ hand, if the attacker can do a traceroute whose probes will be
+ forwarded along the SR path, then there is little learned by
+ intercepting the SRH itself.
+
+5.1.5. ICMP Generation
+
+ Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e.
+ where the destination address is one of theirs) receive a Routing
+ Header with unsupported Routing Type, the required behavior is:
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 18]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o If Segments Left is zero, the node must ignore the Routing header
+ and proceed to process the next header in the packet.
+
+ o If Segments Left is non-zero, the node must discard the packet and
+ send an ICMP Parameter Problem, Code 0, message to the packet's
+ Source Address, pointing to the unrecognized Routing Type.
+
+ This required behavior could be used by an attacker to force the
+ generation of ICMP message by any node. The attacker could send
+ packets with SRH (with Segment Left set to 0) destined to a node not
+ supporting SRH. Per RFC2460 [RFC2460], the destination node could
+ generate an ICMP message, causing a local CPU utilization and if the
+ source of the offending packet with SRH was spoofed could lead to a
+ reflection attack without any amplification.
+
+ It must be noted that this is a required behavior for any unsupported
+ Routing Type and not limited to SRH packets. So, it is not specific
+ to SRH and the usual rate limiting for ICMP generation is required
+ anyway for any IPv6 implementation and has been implemented and
+ deployed for many years.
+
+5.2. Security fields in SRH
+
+ This section summarizes the use of specific fields in the SRH. They
+ are based on a key-hashed message authentication code (HMAC).
+
+ The security-related fields in the SRH are instantiated by the HMAC
+ TLV, containing:
+
+ o HMAC Key-id, 32 bits wide;
+
+ o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not
+ 0).
+
+ The HMAC field is the output of the HMAC computation (per RFC 2104
+ [RFC2104]) using a pre-shared key identified by HMAC Key-id and of
+ the text which consists of the concatenation of:
+
+ o the source IPv6 address;
+
+ o First Segment field;
+
+ o an octet of bit flags;
+
+ o HMAC Key-id;
+
+ o all addresses in the Segment List.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 19]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ The purpose of the HMAC TLV is to verify the validity, the integrity
+ and the authorization of the SRH itself. If an outsider of the SR
+ domain does not have access to a current pre-shared secret, then it
+ cannot compute the right HMAC field and the first SR router on the
+ path processing the SRH and configured to check the validity of the
+ HMAC will simply reject the packet.
+
+ The HMAC TLV is located at the end of the SRH simply because only the
+ router on the ingress of the SR domain needs to process it, then all
+ other SR nodes can ignore it (based on local policy) because they
+ trust the upstream router. This is to speed up forwarding operations
+ because SR routers which do not validate the SRH do not need to parse
+ the SRH until the end.
+
+ The HMAC Key-id field allows for the simultaneous existence of
+ several hash algorithms (SHA-256, SHA3-256 ... or future ones) as
+ well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it
+ has neither syntax nor semantic except as an index to the right
+ combination of pre-shared key and hash algorithm and except that a
+ value of 0 means that there is no HMAC field. Having an HMAC Key-id
+ field allows for pre-shared key roll-over when two pre-shared keys
+ are supported for a while when all SR nodes converged to a fresher
+ pre-shared key. It could also allow for interoperation among
+ different SR domains if allowed by local policy and assuming a
+ collision-free HMAC Key Id allocation.
+
+ When a specific SRH is linked to a time-related service (such as
+ turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are
+ identical, then it is important to refresh the shared-secret
+ frequently as the HMAC validity period expires only when the HMAC
+ Key-id and its associated shared-secret expires.
+
+5.2.1. Selecting a hash algorithm
+
+ The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC
+ MUST be based on a hash function whose output is at least 256 bits.
+ If the output of the hash function is 256, then this output is simply
+ inserted in the HMAC field. If the output of the hash function is
+ larger than 256 bits, then the output value is truncated to 256 by
+ taking the least-significant 256 bits and inserting them in the HMAC
+ field.
+
+ SRH implementations can support multiple hash functions but MUST
+ implement SHA-2 [FIPS180-4] in its SHA-256 variant.
+
+ NOTE: SHA-1 is currently used by some early implementations used for
+ quick interoperations testing, the 160-bit hash value must then be
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 20]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ right-hand padded with 96 bits set to 0. The authors understand that
+ this is not secure but is ok for limited tests.
+
+5.2.2. Performance impact of HMAC
+
+ While adding an HMAC to each and every SR packet increases the
+ security, it has a performance impact. Nevertheless, it must be
+ noted that:
+
+ o the HMAC field is used only when SRH is added by a device (such as
+ a home set-up box) which is outside of the segment routing domain.
+ If the SRH is added by a router in the trusted segment routing
+ domain, then, there is no need for an HMAC field, hence no
+ performance impact.
+
+ o when present, the HMAC field MUST only be checked and validated by
+ the first router of the segment routing domain, this router is
+ named 'validating SR router'. Downstream routers may not inspect
+ the HMAC field.
+
+ o this validating router can also have a cache of <IPv6 header +
+ SRH, HMAC field value> to improve the performance. It is not the
+ same use case as in IPsec where HMAC value was unique per packet,
+ in SRH, the HMAC value is unique per flow.
+
+ o Last point, hash functions such as SHA-2 have been optimized for
+ security and performance and there are multiple implementations
+ with good performance.
+
+ With the above points in mind, the performance impact of using HMAC
+ is minimized.
+
+5.2.3. Pre-shared key management
+
+ The field HMAC Key-id allows for:
+
+ o key roll-over: when there is a need to change the key (the hash
+ pre-shared secret), then multiple pre-shared keys can be used
+ simultaneously. The validating routing can have a table of <HMAC
+ Key-id, pre-shared secret> for the currently active and future
+ keys.
+
+ o different algorithms: by extending the previous table to <HMAC
+ Key-id, hash function, pre-shared secret>, the validating router
+ can also support simultaneously several hash algorithms (see
+ section Section 5.2.1)
+
+ The pre-shared secret distribution can be done:
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 21]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o in the configuration of the validating routers, either by static
+ configuration or any SDN oriented approach;
+
+ o dynamically using a trusted key distribution such as [RFC6407]
+
+ The intent of this document is NOT to define yet-another-key-
+ distribution-protocol.
+
+5.3. Deployment Models
+
+5.3.1. Nodes within the SR domain
+
+ An SR domain is defined as a set of interconnected routers where all
+ routers at the perimeter are configured to add and act on SRH. Some
+ routers inside the SR domain can also act on SRH or simply forward
+ IPv6 packets.
+
+ The routers inside an SR domain can be trusted to generate SRH and to
+ process SRH received on interfaces that are part of the SR domain.
+ These nodes MUST drop all SRH packets received on an interface that
+ is not part of the SR domain and containing an SRH whose HMAC field
+ cannot be validated by local policies. This includes obviously
+ packet with an SRH generated by a non-cooperative SR domain.
+
+ If the validation fails, then these packets MUST be dropped, ICMP
+ error messages (parameter problem) SHOULD be generated (but rate
+ limited) and SHOULD be logged.
+
+5.3.2. Nodes outside of the SR domain
+
+ Nodes outside of the SR domain cannot be trusted for physical
+ security; hence, they need to request by some trusted means (outside
+ of the scope of this document) a complete SRH for each new connection
+ (i.e. new destination address). The received SRH MUST include an
+ HMAC TLV which is computed correctly (see Section 5.2).
+
+ When an outside node sends a packet with an SRH and towards an SR
+ domain ingress node, the packet MUST contain the HMAC TLV (with a
+ Key-id and HMAC fields) and the the destination address MUST be an
+ address of an SR domain ingress node .
+
+ The ingress SR router, i.e., the router with an interface address
+ equals to the destination address, MUST verify the HMAC TLV.
+
+ If the validation is successful, then the packet is simply forwarded
+ as usual for an SR packet. As long as the packet travels within the
+ SR domain, no further HMAC check needs to be done. Subsequent
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 22]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ routers in the SR domain MAY verify the HMAC TLV when they process
+ the SRH (i.e. when they are the destination).
+
+ If the validation fails, then this packet MUST be dropped, an ICMP
+ error message (parameter problem) SHOULD be generated (but rate
+ limited) and SHOULD be logged.
+
+5.3.3. SR path exposure
+
+ As the intermediate SR nodes addresses appears in the SRH, if this
+ SRH is visible to an outsider then he/she could reuse this knowledge
+ to launch an attack on the intermediate SR nodes or get some insider
+ knowledge on the topology. This is especially applicable when the
+ path between the source node and the first SR domain ingress router
+ is on the public Internet.
+
+ The first remark is to state that 'security by obscurity' is never
+ enough; in other words, the security policy of the SR domain MUST
+ assume that the internal topology and addressing is known by the
+ attacker. A simple traceroute will also give the same information
+ (with even more information as all intermediate nodes between SID
+ will also be exposed). IPsec Encapsulating Security Payload
+ [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP
+ header must appear after any routing header (including SRH).
+
+ To prevent a user to leverage the gained knowledge by intercepting
+ SRH, it it recommended to apply an infrastructure Access Control List
+ (iACL) at the edge of the SR domain. This iACL will drop all packets
+ from outside the SR-domain whose destination is any address of any
+ router inside the domain. This security policy should be tuned for
+ local operations.
+
+5.3.4. Impact of BCP-38
+
+ BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks
+ whether the source address of packets received on an interface is
+ valid for this interface. The use of loose source routing such as
+ SRH forces packets to follow a path which differs from the expected
+ routing. Therefore, if BCP-38 was implemented in all routers inside
+ the SR domain, then SR packets could be received by an interface
+ which is not expected one and the packets could be dropped.
+
+ As an SR domain is usually a subset of one administrative domain, and
+ as BCP-38 is only deployed at the ingress routers of this
+ administrative domain and as packets arriving at those ingress
+ routers have been normally forwarded using the normal routing
+ information, then there is no reason why this ingress router should
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 23]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ drop the SRH packet based on BCP-38. Routers inside the domain
+ commonly do not apply BCP-38; so, this is not a problem.
+
+6. IANA Considerations
+
+ This document makes the following registrations in the Internet
+ Protocol Version 6 (IPv6) Parameters "Routing Type" registry
+ maintained by IANA:
+
+ Suggested Description Reference
+ Value
+ ----------------------------------------------------------
+ 4 Segment Routing Header (SRH) This document
+
+ In addition, this document request IANA to create and maintain a new
+ Registry: "Segment Routing Header Type-Value Objects". The following
+ code-points are requested from the registry:
+
+ Registry: Segment Routing Header Type-Value Objects
+
+ Suggested Description Reference
+ Value
+ -----------------------------------------------------
+ 1 Ingress Node TLV This document
+ 2 Egress Node TLV This document
+ 3 Opaque Container TLV This document
+ 4 Padding TLV This document
+ 5 HMAC TLV This document
+
+7. Manageability Considerations
+
+ TBD
+
+8. Contributors
+
+ Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra
+ Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James
+ Connolly, Aloys Augustin contributed to the content of this document.
+
+9. Acknowledgements
+
+ The authors would like to thank Ole Troan, Bob Hinden, Fred Baker,
+ Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their
+ comments to this document.
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 24]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+10. References
+
+10.1. Normative References
+
+ [FIPS180-4]
+ National Institute of Standards and Technology, "FIPS
+ 180-4 Secure Hash Standard (SHS)", March 2012,
+ <http://csrc.nist.gov/publications/fips/fips180-4/
+ fips-180-4.pdf>.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119,
+ DOI 10.17487/RFC2119, March 1997,
+ <http://www.rfc-editor.org/info/rfc2119>.
+
+ [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6
+ (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460,
+ December 1998, <http://www.rfc-editor.org/info/rfc2460>.
+
+ [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)",
+ RFC 4303, DOI 10.17487/RFC4303, December 2005,
+ <http://www.rfc-editor.org/info/rfc4303>.
+
+ [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation
+ of Type 0 Routing Headers in IPv6", RFC 5095,
+ DOI 10.17487/RFC5095, December 2007,
+ <http://www.rfc-editor.org/info/rfc5095>.
+
+ [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain
+ of Interpretation", RFC 6407, DOI 10.17487/RFC6407,
+ October 2011, <http://www.rfc-editor.org/info/rfc6407>.
+
+10.2. Informative References
+
+ [I-D.ietf-isis-segment-routing-extensions]
+ Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
+ Litkowski, S., Decraene, B., and j. jefftant@gmail.com,
+ "IS-IS Extensions for Segment Routing", draft-ietf-isis-
+ segment-routing-extensions-09 (work in progress), October
+ 2016.
+
+ [I-D.ietf-ospf-ospfv3-segment-routing-extensions]
+ Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
+ Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
+ Extensions for Segment Routing", draft-ietf-ospf-ospfv3-
+ segment-routing-extensions-07 (work in progress), October
+ 2016.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 25]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ [I-D.ietf-spring-ipv6-use-cases]
+ Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and
+ R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring-
+ ipv6-use-cases-08 (work in progress), January 2017.
+
+ [I-D.ietf-spring-resiliency-use-cases]
+ Filsfils, C., Previdi, S., Decraene, B., and R. Shakir,
+ "Resiliency use cases in SPRING networks", draft-ietf-
+ spring-resiliency-use-cases-08 (work in progress), October
+ 2016.
+
+ [I-D.ietf-spring-segment-routing]
+ Filsfils, C., Previdi, S., Decraene, B., Litkowski, S.,
+ and R. Shakir, "Segment Routing Architecture", draft-ietf-
+ spring-segment-routing-10 (work in progress), November
+ 2016.
+
+ [I-D.ietf-spring-segment-routing-mpls]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Shakir, R.,
+ jefftant@gmail.com, j., and E. Crabbe, "Segment Routing
+ with MPLS data plane", draft-ietf-spring-segment-routing-
+ mpls-06 (work in progress), January 2017.
+
+ [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
+ Zappala, "Source Demand Routing: Packet Format and
+ Forwarding Specification (Version 1)", RFC 1940,
+ DOI 10.17487/RFC1940, May 1996,
+ <http://www.rfc-editor.org/info/rfc1940>.
+
+ [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed-
+ Hashing for Message Authentication", RFC 2104,
+ DOI 10.17487/RFC2104, February 1997,
+ <http://www.rfc-editor.org/info/rfc2104>.
+
+ [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering:
+ Defeating Denial of Service Attacks which employ IP Source
+ Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827,
+ May 2000, <http://www.rfc-editor.org/info/rfc2827>.
+
+ [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/
+ Co-existence Security Considerations", RFC 4942,
+ DOI 10.17487/RFC4942, September 2007,
+ <http://www.rfc-editor.org/info/rfc4942>.
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 26]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6
+ Routing Header for Source Routes with the Routing Protocol
+ for Low-Power and Lossy Networks (RPL)", RFC 6554,
+ DOI 10.17487/RFC6554, March 2012,
+ <http://www.rfc-editor.org/info/rfc6554>.
+
+ [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B.,
+ Litkowski, S., Horneffer, M., and R. Shakir, "Source
+ Packet Routing in Networking (SPRING) Problem Statement
+ and Requirements", RFC 7855, DOI 10.17487/RFC7855, May
+ 2016, <http://www.rfc-editor.org/info/rfc7855>.
+
+Authors' Addresses
+
+ Stefano Previdi (editor)
+ Cisco Systems, Inc.
+ Via Del Serafico, 200
+ Rome 00142
+ Italy
+
+ Email: sprevidi@cisco.com
+
+
+ Clarence Filsfils
+ Cisco Systems, Inc.
+ Brussels
+ BE
+
+ Email: cfilsfil@cisco.com
+
+
+ Brian Field
+ Comcast
+ 4100 East Dry Creek Road
+ Centennial, CO 80122
+ US
+
+ Email: Brian_Field@cable.comcast.com
+
+
+ Ida Leung
+ Rogers Communications
+ 8200 Dixie Road
+ Brampton, ON L6T 0C1
+ CA
+
+ Email: Ida.Leung@rci.rogers.com
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 27]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Jen Linkova
+ Google
+ 1600 Amphitheatre Parkway
+ Mountain View, CA 94043
+ US
+
+ Email: furry@google.com
+
+
+ Ebben Aries
+ Facebook
+ US
+
+ Email: exa@fb.com
+
+
+ Tomoya Kosugi
+ NTT
+ 3-9-11, Midori-Cho Musashino-Shi,
+ Tokyo 180-8585
+ JP
+
+ Email: kosugi.tomoya@lab.ntt.co.jp
+
+
+ Eric Vyncke
+ Cisco Systems, Inc.
+ De Kleetlaann 6A
+ Diegem 1831
+ Belgium
+
+ Email: evyncke@cisco.com
+
+
+ David Lebrun
+ Universite Catholique de Louvain
+ Place Ste Barbe, 2
+ Louvain-la-Neuve, 1348
+ Belgium
+
+ Email: david.lebrun@uclouvain.be
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 28] \ No newline at end of file
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
new file mode 100644
index 00000000..9e900741
--- /dev/null
+++ b/src/vnet/srv6/sr.api
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief IPv6 SR LocalSID add/del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del Boolean of whether its a delete instruction
+ @param localsid_addr IPv6 address of the localsid
+ @param end_psp Boolean of whether decapsulation is allowed in this function
+ @param behavior Type of behavior (function) for this localsid
+ @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ @param fib_table FIB table in which we should install the localsid entry
+ @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+autoreply define sr_localsid_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_del;
+ u8 localsid_addr[16];
+ u8 end_psp;
+ u8 behavior;
+ u32 sw_if_index;
+ u32 vlan_index;
+ u32 fib_table;
+ u8 nh_addr[16];
+};
+
+/** \brief IPv6 SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param weight is the weight of the sid list. optional.
+ @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+ @param type is the type of the SR policy. (0.Default // 1.Spray)
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param segments is a vector of IPv6 address composing the segment list
+*/
+autoreply define sr_policy_add
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 weight;
+ u8 is_encap;
+ u8 type;
+ u32 fib_table;
+ u8 n_segments;
+ u8 segments[0];
+};
+
+/** \brief IPv6 SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param segments is a vector of IPv6 address composing the segment list
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param is_encap Mode. Encapsulation or SRH insertion.
+*/
+autoreply define sr_policy_mod
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+ u32 fib_table;
+ u8 operation;
+ u32 sl_index;
+ u32 weight;
+ u8 n_segments;
+ u8 segments[0];
+};
+
+/** \brief IPv6 SR policy deletion
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param index is the index of the SR policy
+*/
+autoreply define sr_policy_del
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+};
+
+/** \brief IPv6 SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del
+ @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ @param sr_policy is the index of the SR Policy (alt to bsid)
+ @param table_id is the VRF where to install the FIB entry for the BSID
+ @param prefix is the IPv4/v6 address for L3 traffic type
+ @param mask_width is the mask for L3 traffic type
+ @param sw_if_index is the incoming interface for L2 traffic
+ @param traffic_type describes the type of traffic
+*/
+autoreply define sr_steering_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_del;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+ u32 table_id;
+ u8 prefix_addr[16];
+ u32 mask_width;
+ u32 sw_if_index;
+ u8 traffic_type;
+};
+
+/** \brief Dump the list of SR LocalSIDs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+/**define sr_localsids_dump
+{
+ u32 client_index;
+ u32 context;
+};*/
+
+/** \brief Details about a single SR LocalSID
+ @param context - returned sender context, to match reply w/ request
+ @param localsid_addr IPv6 address of the localsid
+ @param behavior Type of behavior (function) for this localsid
+ @param end_psp Boolean of whether decapsulation is allowed in this function
+ @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ @param fib_table FIB table in which we should install the localsid entry
+ @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+/**manual_endian define sr_localsid_details
+{
+ u32 context;
+ u8 localsid_addr[16];
+ u8 behavior;
+ u8 end_psp;
+ u32 sw_if_index;
+ u32 vlan_index;
+ u32 fib_table;
+ u8 nh_addr[16];
+};*/
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr.c b/src/vnet/srv6/sr.c
new file mode 100755
index 00000000..eb4f09e7
--- /dev/null
+++ b/src/vnet/srv6/sr.c
@@ -0,0 +1,57 @@
+/*
+ * sr.c: ipv6 segment routing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing initialization
+ *
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+ip6_sr_main_t sr_main;
+
+/**
+ * @brief no-op lock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
new file mode 100755
index 00000000..d0f42869
--- /dev/null
+++ b/src/vnet/srv6/sr.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srv6_h
+#define included_vnet_srv6_h
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#define IPv6_DEFAULT_HEADER_LENGTH 40
+#define IPv6_DEFAULT_HOP_LIMIT 64
+#define IPv6_DEFAULT_MAX_MASK_WIDTH 128
+
+#define SR_BEHAVIOR_END 1
+#define SR_BEHAVIOR_X 2
+#define SR_BEHAVIOR_T 3
+#define SR_BEHAVIOR_D_FIRST 4 /* Unused. Separator in between regular and D */
+#define SR_BEHAVIOR_DX2 5
+#define SR_BEHAVIOR_DX6 6
+#define SR_BEHAVIOR_DX4 7
+#define SR_BEHAVIOR_DT6 8
+#define SR_BEHAVIOR_DT4 9
+#define SR_BEHAVIOR_LAST 10 /* Must always be the last one */
+
+#define SR_STEER_L2 2
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+#define SR_FUNCTION_SIZE 4
+#define SR_ARGUMENT_SIZE 4
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+ ip6_address_t *segments; /**< SIDs (key) */
+
+ u32 weight; /**< SID list weight (wECMP / UCMP) */
+
+ u8 *rewrite; /**< Precomputed rewrite header */
+ u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */
+
+ dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */
+ dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */
+ dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */
+} ip6_sr_sl_t;
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+/**
+ * @brief SR Policy
+ */
+typedef struct
+{
+ u32 *segments_lists; /**< SID lists indexes (vector) */
+
+ ip6_address_t bsid; /**< BindingSID (key) */
+
+ u8 type; /**< Type (default is 0) */
+ /* SR Policy specific DPO */
+ /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */
+ /* IF Type = SPRAY then Spray DPO with all SID lists */
+ dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */
+ dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */
+ dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */
+
+ u32 fib_table; /**< FIB table */
+
+ u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */
+} ip6_sr_policy_t;
+
+/**
+ * @brief SR LocalSID
+ */
+typedef struct
+{
+ ip6_address_t localsid; /**< LocalSID IPv6 address */
+
+ char end_psp; /**< Combined with End.PSP? */
+
+ u16 behavior; /**< Behavior associated to this localsid */
+
+ union
+ {
+ u32 sw_if_index; /**< xconnect only */
+ u32 vrf_index; /**< vrf only */
+ };
+
+ u32 fib_table; /**< FIB table where localsid is registered */
+
+ u32 vlan_index; /**< VLAN tag (not an index) */
+
+ ip46_address_t next_hop; /**< Next_hop for xconnect usage only */
+
+ u32 nh_adj; /**< Next_adj for xconnect usage only */
+
+ void *plugin_mem; /**< Memory to be used by the plugin callback functions */
+} ip6_sr_localsid_t;
+
+typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid);
+
+/**
+ * @brief SR LocalSID behavior registration
+ */
+typedef struct
+{
+ u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */
+
+ u8 *function_name; /**< Function name. (key). */
+
+ u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */
+
+ u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */
+
+ u8 *params_str; /**< Behavior parameters (i.e. <oif> <IP46next_hop>) */
+
+ dpo_type_t dpo; /**< DPO type registration */
+
+ format_function_t *ls_format; /**< LocalSID format function */
+
+ unformat_function_t *ls_unformat; /**< LocalSID unformat function */
+
+ sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/
+
+ sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */
+} sr_localsid_fn_registration_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ * L2 is sf_if_index + vlan
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ip46_address_t prefix; /**< IP address of the prefix */
+ u32 mask_width; /**< Mask width of the prefix */
+ u32 fib_table; /**< VRF of the prefix */
+ } l3;
+ struct
+ {
+ u32 sw_if_index; /**< Incoming software interface */
+ } l2;
+ };
+ u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
+ u8 padding[3];
+} sr_steering_key_t;
+
+typedef struct
+{
+ sr_steering_key_t classify; /**< Traffic classification */
+ u32 sr_policy; /**< SR Policy index */
+} ip6_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+ /* L2-input -> SR rewrite next index */
+ u32 l2_sr_policy_rewrite_index;
+
+ /* SR SID lists */
+ ip6_sr_sl_t *sid_lists;
+
+ /* SRv6 policies */
+ ip6_sr_policy_t *sr_policies;
+
+ /* Hash table mapping BindingSID to SRv6 policy */
+ mhash_t sr_policies_index_hash;
+
+ /* Pool of SR localsid instances */
+ ip6_sr_localsid_t *localsids;
+
+ /* Hash table mapping LOC:FUNC to SR LocalSID instance */
+ mhash_t sr_localsids_index_hash;
+
+ /* Pool of SR steer policies instances */
+ ip6_sr_steering_policy_t *steer_policies;
+
+ /* Hash table mapping steering rules to SR steer instance */
+ mhash_t sr_steer_policies_hash;
+
+ /* L2 steering ifaces - sr_policies */
+ u32 *sw_iface_sr_policies;
+
+ /* Spray DPO */
+ dpo_type_t sr_pr_spray_dpo_type;
+
+ /* Plugin functions */
+ sr_localsid_fn_registration_t *plugin_functions;
+
+ /* Find plugin function by name */
+ uword *plugin_functions_by_key;
+
+ /* Counters */
+ vlib_combined_counter_main_t sr_ls_valid_counters;
+ vlib_combined_counter_main_t sr_ls_invalid_counters;
+
+ /* SR Policies FIBs */
+ u32 fib_table_ip6;
+ u32 fib_table_ip4;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_sr_main_t;
+
+extern ip6_sr_main_t sr_main;
+
+extern vlib_node_registration_t sr_policy_rewrite_encaps_node;
+extern vlib_node_registration_t sr_policy_rewrite_insert_node;
+extern vlib_node_registration_t sr_localsid_node;
+extern vlib_node_registration_t sr_localsid_d_node;
+
+extern void sr_dpo_lock (dpo_id_t * dpo);
+extern void sr_dpo_unlock (dpo_id_t * dpo);
+
+extern int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+ u8 * keyword_str, u8 * def_str,
+ u8 * params_str, dpo_type_t * dpo,
+ format_function_t * ls_format,
+ unformat_function_t * ls_unformat,
+ sr_plugin_callback_t * creation_fn,
+ sr_plugin_callback_t * removal_fn);
+
+extern int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+ u32 weight, u8 behavior, u32 fib_table, u8 is_encap);
+extern int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t * segments, u32 sl_index,
+ u32 weight);
+extern int sr_policy_del (ip6_address_t * bsid, u32 index);
+
+extern int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+ char end_psp, u8 behavior, u32 sw_if_index,
+ u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr,
+ void *ls_plugin_mem);
+
+extern int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+ u32 table_id, ip46_address_t * prefix, u32 mask_width,
+ u32 sw_if_index, u8 traffic_type);
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl);
+ srh->first_segment = vec_len (sl);
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl);
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+
+#endif /* included_vnet_sr_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
new file mode 100644
index 00000000..623f672a
--- /dev/null
+++ b/src/vnet/srv6/sr_api.c
@@ -0,0 +1,254 @@
+/*
+ *------------------------------------------------------------------
+ * sr_api.c - ipv6 segment routing api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \
+_(SR_POLICY_DEL, sr_policy_del) \
+_(SR_STEERING_ADD_DEL, sr_steering_add_del)
+//_(SR_LOCALSIDS, sr_localsids_dump)
+//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump)
+
+static void vl_api_sr_localsid_add_del_t_handler
+ (vl_api_sr_localsid_add_del_t * mp)
+{
+ vl_api_sr_localsid_add_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr,
+ * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table,
+ * ip46_address_t *nh_addr, void *ls_plugin_mem)
+ */
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_cli_localsid (mp->is_del,
+ (ip6_address_t *) & mp->localsid_addr,
+ mp->end_psp,
+ mp->behavior,
+ ntohl (mp->sw_if_index),
+ ntohl (mp->vlan_index),
+ ntohl (mp->fib_table),
+ (ip46_address_t *) & mp->nh_addr, NULL);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
+{
+ vl_api_sr_policy_add_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+/*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+ */
+ int rv = 0;
+ rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr,
+ segments,
+ ntohl (mp->weight),
+ mp->type, ntohl (mp->fib_table), mp->is_encap);
+
+ REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
+{
+ vl_api_sr_policy_mod_reply_t *rmp;
+
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+ int rv = 0;
+/*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table),
+ mp->operation,
+ segments, ntohl (mp->sl_index), ntohl (mp->weight));
+
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp)
+{
+ vl_api_sr_policy_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int
+ * sr_policy_del (ip6_address_t *bsid, u32 index)
+ */
+ rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index));
+
+ REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY);
+}
+
+static void vl_api_sr_steering_add_del_t_handler
+ (vl_api_sr_steering_add_del_t * mp)
+{
+ vl_api_sr_steering_add_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int
+ * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index,
+ * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index,
+ * u8 traffic_type)
+ */
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_steering_policy (mp->is_del,
+ (ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index),
+ ntohl (mp->table_id),
+ (ip46_address_t *) & mp->prefix_addr,
+ ntohl (mp->mask_width),
+ ntohl (mp->sw_if_index), mp->traffic_type);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY);
+}
+
+/*
+ * sr_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_sr;
+#undef _
+}
+
+static clib_error_t *
+sr_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Manually register the sr policy add msg, so we trace
+ * enough bytes to capture a typical segment list
+ */
+ vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD,
+ "sr_policy_add",
+ vl_api_sr_policy_add_t_handler,
+ vl_noop_handler,
+ vl_api_sr_policy_add_t_endian,
+ vl_api_sr_policy_add_t_print, 256, 1);
+
+ /*
+ * Manually register the sr policy mod msg, so we trace
+ * enough bytes to capture a typical segment list
+ */
+ vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD,
+ "sr_policy_mod",
+ vl_api_sr_policy_mod_t_handler,
+ vl_noop_handler,
+ vl_api_sr_policy_mod_t_endian,
+ vl_api_sr_policy_mod_t_print, 256, 1);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_doc.md b/src/vnet/srv6/sr_doc.md
new file mode 100644
index 00000000..5cdfc906
--- /dev/null
+++ b/src/vnet/srv6/sr_doc.md
@@ -0,0 +1,55 @@
+# SRv6: Segment Routing for IPv6 {#srv6_doc}
+
+This is a memo intended to contain documentation of the VPP SRv6 implementation.
+Everything that is not directly obvious should come here.
+For any feedback on content that should be explained please mailto:pcamaril@cisco.com
+
+## Segment Routing
+
+Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
+
+Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SRv6 documentation.**
+
+## Segment Routing terminology
+
+* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05)
+* SegmentID (SID): is an IPv6 address.
+* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
+* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
+* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed.
+* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet.
+
+## SRv6 Features in VPP
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/">SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*)</a> defines the SRv6 architecture.
+
+VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
+
+For further information and how to configure each specific function: @subpage srv6_localsid_doc
+
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
+
+VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
+
+For further information on how to create SR Policies: @subpage srv6_policy_doc
+
+For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc
+
+## SRv6 LocalSID development framework
+
+One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function.
+
+However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code.
+
+The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code.
+
+For more information please refer to: @subpage srv6_plugin_doc
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
new file mode 100755
index 00000000..1be68334
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.c
@@ -0,0 +1,1654 @@
+/*
+ * sr_localsid.c: ipv6 segment routing Endpoint behaviors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Processing of packets with a SRH
+ *
+ * CLI to define new Segment Routing End processing functions.
+ * Graph node to support such functions.
+ *
+ * Each function associates an SRv6 segment (IPv6 address) with an specific
+ * Segment Routing function.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Dynamically added SR localsid DPO type
+ */
+static dpo_type_t sr_localsid_dpo_type;
+static dpo_type_t sr_localsid_d_dpo_type;
+
+/**
+ * @brief SR localsid add/del
+ *
+ * Function to add or delete SR LocalSIDs.
+ *
+ * @param is_del Boolean of whether its a delete instruction
+ * @param localsid_addr IPv6 address of the localsid
+ * @param is_decap Boolean of whether decapsulation is allowed in this function
+ * @param behavior Type of behavior (function) for this localsid
+ * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ * @param fib_table FIB table in which we should install the localsid entry
+ * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+ *
+ * @return 0 on success, error otherwise.
+ */
+int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+ char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index,
+ u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ uword *p;
+ int rv;
+
+ ip6_sr_localsid_t *ls = 0;
+
+ dpo_id_t dpo = DPO_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr);
+
+ if (p)
+ {
+ if (is_del)
+ {
+ /* Retrieve localsid */
+ ls = pool_elt_at_index (sm->localsids, p[0]);
+ /* Delete FIB entry */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *localsid_addr,
+ }
+ };
+
+ fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6,
+ fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */
+ if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6
+ || ls->behavior == SR_BEHAVIOR_DX4)
+ adj_unlock (ls->nh_adj);
+
+ if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin = 0;
+ plugin = pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+
+ /* Callback plugin removal function */
+ rv = plugin->removal (ls);
+ }
+
+ /* Delete localsid registry */
+ pool_put (sm->localsids, ls);
+ mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL);
+ return 0;
+ }
+ else /* create with function already existing; complain */
+ return -1;
+ }
+ else
+ /* delete; localsid does not exist; complain */
+ if (is_del)
+ return -2;
+
+ /* Check whether there exists a FIB entry with such address */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ };
+
+ pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0];
+ pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1];
+
+ /* Lookup the FIB index associated to the table id provided */
+ u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table);
+ if (fib_index == ~0)
+ return -3;
+
+ /* Lookup the localsid in such FIB table */
+ fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ return -4; //There is an entry for such address (the localsid addr)
+
+ /* Create a new localsid registry */
+ pool_get (sm->localsids, ls);
+ memset (ls, 0, sizeof (*ls));
+
+ clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t));
+ ls->end_psp = end_psp;
+ ls->behavior = behavior;
+ ls->nh_adj = (u32) ~ 0;
+ ls->fib_table = fib_table;
+ switch (behavior)
+ {
+ case SR_BEHAVIOR_END:
+ break;
+ case SR_BEHAVIOR_X:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+ break;
+ case SR_BEHAVIOR_T:
+ ls->vrf_index = sw_if_index;
+ break;
+ case SR_BEHAVIOR_DX4:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t));
+ break;
+ case SR_BEHAVIOR_DX6:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+ break;
+ case SR_BEHAVIOR_DT6:
+ ls->vrf_index = sw_if_index;
+ break;
+ case SR_BEHAVIOR_DT4:
+ ls->vrf_index = sw_if_index;
+ break;
+ case SR_BEHAVIOR_DX2:
+ ls->sw_if_index = sw_if_index;
+ ls->vlan_index = vlan_index;
+ break;
+ }
+
+ /* Figure out the adjacency magic for Xconnect variants */
+ if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4
+ || ls->behavior == SR_BEHAVIOR_DX6)
+ {
+ adj_index_t nh_adj_index = ADJ_INDEX_INVALID;
+
+ /* Retrieve the adjacency corresponding to the (OIF, next_hop) */
+ if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X)
+ nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+ nh_addr, sw_if_index);
+
+ else if (ls->behavior == SR_BEHAVIOR_DX4)
+ nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
+ nh_addr, sw_if_index);
+
+ /* Check for ADJ creation error. If so panic */
+ if (nh_adj_index == ADJ_INDEX_INVALID)
+ {
+ pool_put (sm->localsids, ls);
+ return -5;
+ }
+
+ ls->nh_adj = nh_adj_index;
+ }
+
+ /* Set DPO */
+ if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X)
+ dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+ else if (ls->behavior > SR_BEHAVIOR_D_FIRST
+ && ls->behavior < SR_BEHAVIOR_LAST)
+ dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+ else if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin = 0;
+ plugin = pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+ /* Copy the unformat memory result */
+ ls->plugin_mem = ls_plugin_mem;
+ /* Callback plugin creation function */
+ rv = plugin->creation (ls);
+ if (rv)
+ {
+ pool_put (sm->localsids, ls);
+ return -6;
+ }
+ dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids);
+ }
+
+ /* Set hash key for searching localsid by address */
+ mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids,
+ NULL);
+
+ fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ /* Set counter to zero */
+ vlib_validate_combined_counter (&(sm->sr_ls_valid_counters),
+ ls - sm->localsids);
+ vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters),
+ ls - sm->localsids);
+
+ vlib_zero_combined_counter (&(sm->sr_ls_valid_counters),
+ ls - sm->localsids);
+ vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters),
+ ls - sm->localsids);
+
+ return 0;
+}
+
+/**
+ * @brief SR LocalSID CLI function.
+ *
+ * @see sr_cli_localsid
+ */
+static clib_error_t *
+sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_sr_main_t *sm = &sr_main;
+ u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0;
+ int is_del = 0;
+ int end_psp = 0;
+ ip6_address_t resulting_address;
+ ip46_address_t next_hop;
+ char address_set = 0;
+ char behavior = 0;
+ void *ls_plugin_mem = 0;
+
+ int rv;
+
+ memset (&resulting_address, 0, sizeof (ip6_address_t));
+ ip46_address_reset (&next_hop);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!address_set
+ && unformat (input, "address %U", unformat_ip6_address,
+ &resulting_address))
+ address_set = 1;
+ else if (!address_set
+ && unformat (input, "addr %U", unformat_ip6_address,
+ &resulting_address))
+ address_set = 1;
+ else if (unformat (input, "fib-table %u", &fib_index));
+ else if (vlan_index == (u32) ~ 0
+ && unformat (input, "vlan %u", &vlan_index));
+ else if (!behavior && unformat (input, "behavior"))
+ {
+ if (unformat (input, "end.x %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &next_hop.ip6))
+ behavior = SR_BEHAVIOR_X;
+ else if (unformat (input, "end.t %u", &sw_if_index))
+ behavior = SR_BEHAVIOR_T;
+ else if (unformat (input, "end.dx6 %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &next_hop.ip6))
+ behavior = SR_BEHAVIOR_DX6;
+ else if (unformat (input, "end.dx4 %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip4_address, &next_hop.ip4))
+ behavior = SR_BEHAVIOR_DX4;
+ else if (unformat (input, "end.dx2 %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ behavior = SR_BEHAVIOR_DX2;
+ else if (unformat (input, "end.dt6 %u", &sw_if_index))
+ behavior = SR_BEHAVIOR_DT6;
+ else if (unformat (input, "end.dt4 %u", &sw_if_index))
+ behavior = SR_BEHAVIOR_DT4;
+ else
+ {
+ /* Loop over all the plugin behavior format functions */
+ sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_localsid_fn_registration_t **plugin_it = 0;
+
+ /* Create a vector out of the plugin pool as recommended */
+ /* *INDENT-OFF* */
+ pool_foreach (plugin, sm->plugin_functions,
+ {
+ vec_add1 (vec_plugins, plugin);
+ });
+ /* *INDENT-ON* */
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (unformat
+ (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem))
+ {
+ behavior = (*plugin_it)->sr_localsid_function_number;
+ break;
+ }
+ }
+ }
+
+ if (!behavior)
+ {
+ if (unformat (input, "end"))
+ behavior = SR_BEHAVIOR_END;
+ else
+ break;
+ }
+ }
+ else if (!end_psp && unformat (input, "psp"))
+ end_psp = 1;
+ else
+ break;
+ }
+
+ if (!behavior && end_psp)
+ behavior = SR_BEHAVIOR_END;
+
+ if (!address_set)
+ return clib_error_return (0,
+ "Error: SRv6 LocalSID address is mandatory.");
+ if (!is_del && !behavior)
+ return clib_error_return (0,
+ "Error: SRv6 LocalSID behavior is mandatory.");
+ if (vlan_index != (u32) ~ 0)
+ return clib_error_return (0,
+ "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now.");
+ if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X))
+ return clib_error_return (0,
+ "Error: SRv6 PSP only compatible with End and End.X");
+
+ rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior,
+ sw_if_index, vlan_index, fib_index, &next_hop,
+ ls_plugin_mem);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0,
+ "Identical localsid already exists. Requested localsid not created.");
+ case -2:
+ return clib_error_return (0,
+ "The requested localsid could not be deleted. SR localsid not found");
+ case -3:
+ return clib_error_return (0, "FIB table %u does not exist", fib_index);
+ case -4:
+ return clib_error_return (0, "There is already one FIB entry for the"
+ "requested localsid non segment routing related");
+ case -5:
+ return clib_error_return (0,
+ "Could not create ARP/ND entry for such next_hop. Internal error.");
+ case -6:
+ return clib_error_return (0,
+ "Error on the plugin based localsid creation.");
+ default:
+ return clib_error_return (0, "BUG: sr localsid returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_localsid_command, static) = {
+ .path = "sr localsid",
+ .short_help = "sr localsid (del) address XX:XX::YY:YY"
+ "(fib-table 8) behavior STRING",
+ .long_help =
+ "Create SR LocalSID and binds it to a particular behavior\n"
+ "Arguments:\n"
+ "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n"
+ "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n"
+ "\tbehavior STRING Specifies the behavior\n"
+ "\n\tBehaviors:\n"
+ "\tEnd\t-> Endpoint.\n"
+ "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+ "\t\tParameters: '<iface>'\n"
+ "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+ "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+ "\t\tParameters: '<ip6_fib_table>'\n"
+ "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+ "\t\tParameters: '<ip4_fib_table>'\n",
+ .function = sr_cli_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI function to 'show' all SR LocalSIDs on console.
+ */
+static clib_error_t *
+show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_localsid_t **localsid_list = 0;
+ ip6_sr_localsid_t *ls;
+ int i;
+
+ vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
+ vlib_cli_output (vm, "=========================");
+ /* *INDENT-OFF* */
+ pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); }));
+ /* *INDENT-ON* */
+ for (i = 0; i < vec_len (localsid_list); i++)
+ {
+ ls = localsid_list[i];
+ switch (ls->behavior)
+ {
+ case SR_BEHAVIOR_END:
+ vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd",
+ format_ip6_address, &ls->localsid);
+ break;
+ case SR_BEHAVIOR_X:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip6_address, &ls->next_hop.ip6);
+ break;
+ case SR_BEHAVIOR_T:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tT (Endpoint with specific IPv6 table lookup)"
+ "\n\tTable: \t%u",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->vrf_index);
+ break;
+ case SR_BEHAVIOR_DX4:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip4_address, &ls->next_hop.ip4);
+ break;
+ case SR_BEHAVIOR_DX6:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip6_address, &ls->next_hop.ip6);
+ break;
+ case SR_BEHAVIOR_DX2:
+ if (ls->vlan_index == (u32) ~ 0)
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)"
+ "\n\tIface: \t%U", format_ip6_address,
+ &ls->localsid, format_vnet_sw_if_index_name, vnm,
+ ls->sw_if_index);
+ else
+ vlib_cli_output (vm,
+ "Unsupported yet. (DX2 with egress VLAN rewrite)");
+ break;
+ case SR_BEHAVIOR_DT6:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)"
+ "\n\tTable: %u", format_ip6_address, &ls->localsid,
+ ls->vrf_index);
+ break;
+ case SR_BEHAVIOR_DT4:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)"
+ "\n\tTable: \t%u", format_ip6_address,
+ &ls->localsid, ls->vrf_index);
+ break;
+ default:
+ if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin =
+ pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+
+ vlib_cli_output (vm, "\tAddress: \t%U\n"
+ "\tBehavior: \t%s (%s)\n\t%U",
+ format_ip6_address, &ls->localsid,
+ plugin->keyword_str, plugin->def_str,
+ plugin->ls_format, ls->plugin_mem);
+ }
+ else
+ //Should never get here...
+ vlib_cli_output (vm, "Internal error");
+ break;
+ }
+ if (ls->end_psp)
+ vlib_cli_output (vm, "\tPSP: \tTrue\n");
+
+ /* Print counters */
+ vlib_counter_t valid, invalid;
+ vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid);
+ vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid);
+ vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n",
+ valid.packets, valid.bytes);
+ vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n",
+ invalid.packets, invalid.bytes);
+ vlib_cli_output (vm, "--------------------");
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
+ .path = "show sr localsids",
+ .short_help = "show sr localsids",
+ .function = show_sr_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function to 'clear' ALL SR localsid counters
+ */
+static clib_error_t *
+clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ vlib_clear_combined_counters (&(sm->sr_ls_valid_counters));
+ vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters));
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
+ .path = "clear sr localsid counters",
+ .short_help = "clear sr localsid counters",
+ .function = clear_sr_localsid_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+/************************ SR LocalSID graphs node ****************************/
+/**
+ * @brief SR localsid node trace
+ */
+typedef struct
+{
+ ip6_address_t localsid;
+ u16 behavior;
+ u8 sr[256];
+ u8 num_segments;
+ u8 segments_left;
+} sr_localsid_trace_t;
+
+#define foreach_sr_localsid_error \
+_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \
+_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \
+_(NO_SRH, "(SR-Error) No SR header") \
+_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \
+_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \
+_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest")
+
+typedef enum
+{
+#define _(sym,str) SR_LOCALSID_ERROR_##sym,
+ foreach_sr_localsid_error
+#undef _
+ SR_LOCALSID_N_ERROR,
+} sr_localsid_error_t;
+
+static char *sr_localsid_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sr_localsid_error
+#undef _
+};
+
+#define foreach_sr_localsid_next \
+_(ERROR, "error-drop") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(IP4_LOOKUP, "ip4-lookup") \
+_(IP6_REWRITE, "ip6-rewrite") \
+_(IP4_REWRITE, "ip4-rewrite") \
+_(INTERFACE_OUTPUT, "interface-output")
+
+typedef enum
+{
+#define _(s,n) SR_LOCALSID_NEXT_##s,
+ foreach_sr_localsid_next
+#undef _
+ SR_LOCALSID_N_NEXT,
+} sr_localsid_next_t;
+
+/**
+ * @brief SR LocalSID graph node trace function
+ *
+ * @see sr_localsid
+ */
+u8 *
+format_sr_localsid_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *);
+
+ s =
+ format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address,
+ &t->localsid);
+ switch (t->behavior)
+ {
+ case SR_BEHAVIOR_END:
+ s = format (s, "\tBehavior: End\n");
+ break;
+ case SR_BEHAVIOR_DX6:
+ s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_DX4:
+ s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_X:
+ s = format (s, "\tBehavior: IPv6 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_T:
+ s = format (s, "\tBehavior: IPv6 specific table lookup\n");
+ break;
+ case SR_BEHAVIOR_DT6:
+ s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n");
+ break;
+ case SR_BEHAVIOR_DT4:
+ s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n");
+ break;
+ case SR_BEHAVIOR_DX2:
+ s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n");
+ break;
+ default:
+ s = format (s, "\tBehavior: defined in plugin\n"); //TODO
+ break;
+ }
+ if (t->num_segments != 0xFF)
+ {
+ if (t->num_segments > 0)
+ {
+ s = format (s, "\tSegments left: %d\n", t->segments_left);
+ s = format (s, "\tSID list: [in ietf order]");
+ int i = 0;
+ for (i = 0; i < t->num_segments; i++)
+ {
+ s = format (s, "\n\t-> %U", format_ip6_address,
+ (ip6_address_t *) & t->sr[i *
+ sizeof (ip6_address_t)]);
+ }
+ }
+ }
+ return s;
+}
+
+/**
+ * @brief Function doing End processing.
+ */
+static_always_inline void
+end_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0,
+ u32 * next0, u8 psp, ip6_ext_header_t * prev0)
+{
+ ip6_address_t *new_dst0;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (sr0->segments_left == 1 && psp)
+ {
+ u32 new_l0, sr_len;
+ u64 *copy_dst0, *copy_src0;
+ u32 copy_len_u64s0 = 0;
+
+ ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0];
+ ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1];
+
+ /* Remove the SRH taking care of the rest of IPv6 ext header */
+ if (prev0)
+ prev0->next_hdr = sr0->protocol;
+ else
+ ip0->protocol = sr0->protocol;
+
+ sr_len = ip6_ext_header_len (sr0);
+ vlib_buffer_advance (b0, sr_len);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+ /* number of 8 octet units to copy
+ * By default in absence of extension headers it is equal to length of ip6 header
+ * With extension headers it number of 8 octet units of ext headers preceding
+ * SR header
+ */
+ copy_len_u64s0 =
+ (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3;
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ int i;
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_T)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ }
+ }
+ else if (PREDICT_TRUE (sr0->segments_left > 0))
+ {
+ sr0->segments_left -= 1;
+ new_dst0 = (ip6_address_t *) (sr0->segments);
+ new_dst0 += sr0->segments_left;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_T)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ }
+ }
+ else
+ {
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS];
+ }
+ }
+ else
+ {
+ /* Error. Routing header of type != SR */
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH];
+ }
+}
+
+/*
+ * @brief Function doing SRH processing for D* variants
+ */
+static_always_inline void
+end_decaps_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0, u32 * next0)
+{
+ /* Compute the size of the IPv6 header with all Ext. headers */
+ u8 next_proto;
+ ip6_ext_header_t *next_ext_header;
+ u16 total_size = 0;
+
+ next_proto = ip0->protocol;
+ next_ext_header = (void *) (ip0 + 1);
+ total_size = sizeof (ip6_header_t);
+ while (ip6_ext_hdr (next_proto))
+ {
+ total_size += ip6_ext_header_len (next_ext_header);
+ next_proto = next_ext_header->next_hdr;
+ next_ext_header = ip6_ext_next_header (next_ext_header);
+ }
+
+ /* Ensure this is the last segment. Otherwise drop. */
+ if (sr0 && sr0->segments_left != 0)
+ {
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS];
+ return;
+ }
+
+ switch (next_proto)
+ {
+ case IP_PROTOCOL_IPV6:
+ /* Encap-End IPv6. Pop outer IPv6 header. */
+ if (ls0->behavior == SR_BEHAVIOR_DX6)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ return;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_DT6)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ return;
+ }
+ break;
+ case IP_PROTOCOL_IP_IN_IP:
+ /* Encap-End IPv4. Pop outer IPv6 header */
+ if (ls0->behavior == SR_BEHAVIOR_DX4)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP4_REWRITE;
+ return;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_DT4)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->vrf_index;
+ *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP;
+ return;
+ }
+ break;
+ case IP_PROTOCOL_IP6_NONXT:
+ /* L2 encaps */
+ if (ls0->behavior == SR_BEHAVIOR_DX2)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index;
+ *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT;
+ return;
+ }
+ break;
+ }
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER];
+ return;
+}
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint variants with decaps
+ */
+static uword
+sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+4 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls1 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls2 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls3 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+ end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+ end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1);
+ end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2);
+ end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls0->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls0->behavior;
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls1->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls1->behavior;
+ if (ip1 == vlib_buffer_get_current (b1))
+ {
+ if (ip1->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr1->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr1->segments, sr1->length * 8);
+ tr->num_segments =
+ sr1->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr1->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls2->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls2->behavior;
+ if (ip2 == vlib_buffer_get_current (b2))
+ {
+ if (ip2->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr2->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr2->segments, sr2->length * 8);
+ tr->num_segments =
+ sr2->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr2->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls3->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls3->behavior;
+ if (ip3 == vlib_buffer_get_current (b3))
+ {
+ if (ip3->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr3->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr3->segments, sr3->length * 8);
+ tr->num_segments =
+ sr3->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr3->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_increment_combined_counter
+ (((next1 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b1));
+
+ vlib_increment_combined_counter
+ (((next2 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b2));
+
+ vlib_increment_combined_counter
+ (((next3 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Lookup the SR End behavior based on IP DA (adj) */
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ /* Find SRH as well as previous header */
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+ /* SRH processing and End variants */
+ end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls0->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls0->behavior;
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ /* Increase the counters */
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_d_node) = {
+ .function = sr_localsid_d_fn,
+ .name = "sr-localsid-d",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_localsid_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_LOCALSID_N_ERROR,
+ .error_strings = sr_localsid_error_strings,
+ .n_next_nodes = SR_LOCALSID_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+ foreach_sr_localsid_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint without decaps
+ */
+static uword
+sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls1 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls2 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls3 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ end_srh_processing (node, b0, ip0, sr0, ls0, &next0, ls0->end_psp,
+ prev0);
+ end_srh_processing (node, b1, ip1, sr1, ls1, &next1, ls1->end_psp,
+ prev1);
+ end_srh_processing (node, b2, ip2, sr2, ls2, &next2, ls2->end_psp,
+ prev2);
+ end_srh_processing (node, b3, ip3, sr3, ls3, &next3, ls3->end_psp,
+ prev3);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls0->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls0->behavior;
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls1->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls1->behavior;
+ if (ip1 == vlib_buffer_get_current (b1))
+ {
+ if (ip1->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr1->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr1->segments, sr1->length * 8);
+ tr->num_segments =
+ sr1->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr1->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls2->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls2->behavior;
+ if (ip2 == vlib_buffer_get_current (b2))
+ {
+ if (ip2->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr2->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr2->segments, sr2->length * 8);
+ tr->num_segments =
+ sr2->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr2->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls3->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls3->behavior;
+ if (ip3 == vlib_buffer_get_current (b3))
+ {
+ if (ip3->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr3->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr3->segments, sr3->length * 8);
+ tr->num_segments =
+ sr3->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr3->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_increment_combined_counter
+ (((next1 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b1));
+
+ vlib_increment_combined_counter
+ (((next2 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b2));
+
+ vlib_increment_combined_counter
+ (((next3 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+ /* Lookup the SR End behavior based on IP DA (adj) */
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ /* SRH processing */
+ end_srh_processing (node, b0, ip0, sr0, ls0, &next0, ls0->end_psp,
+ prev0);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ clib_memcpy (tr->localsid.as_u8, ls0->localsid.as_u8,
+ sizeof (tr->localsid.as_u8));
+ tr->behavior = ls0->behavior;
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_node) = {
+ .function = sr_localsid_fn,
+ .name = "sr-localsid",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_localsid_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_LOCALSID_N_ERROR,
+ .error_strings = sr_localsid_error_strings,
+ .n_next_nodes = SR_LOCALSID_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+ foreach_sr_localsid_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_sr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "SR: localsid_index:[%d]", index));
+}
+
+const static dpo_vft_t sr_loc_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_dpo,
+};
+
+const static char *const sr_loc_ip6_nodes[] = {
+ "sr-localsid",
+ NULL,
+};
+
+const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_loc_ip6_nodes,
+};
+
+const static char *const sr_loc_d_ip6_nodes[] = {
+ "sr-localsid-d",
+ NULL,
+};
+
+const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes,
+};
+
+
+/*************************** SR LocalSID plugins ******************************/
+/**
+ * @brief SR LocalSID plugin registry
+ */
+int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+ u8 * keyword_str, u8 * def_str,
+ u8 * params_str, dpo_type_t * dpo,
+ format_function_t * ls_format,
+ unformat_function_t * ls_unformat,
+ sr_plugin_callback_t * creation_fn,
+ sr_plugin_callback_t * removal_fn)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ uword *p;
+
+ sr_localsid_fn_registration_t *plugin;
+
+ /* Did this function exist? If so update it */
+ p = hash_get_mem (sm->plugin_functions_by_key, fn_name);
+ if (p)
+ {
+ plugin = pool_elt_at_index (sm->plugin_functions, p[0]);
+ }
+ /* Else create a new one and set hash key */
+ else
+ {
+ pool_get (sm->plugin_functions, plugin);
+ hash_set_mem (sm->plugin_functions_by_key, fn_name,
+ plugin - sm->plugin_functions);
+ }
+
+ memset (plugin, 0, sizeof (*plugin));
+
+ plugin->sr_localsid_function_number = (plugin - sm->plugin_functions);
+ plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST;
+ plugin->ls_format = ls_format;
+ plugin->ls_unformat = ls_unformat;
+ plugin->creation = creation_fn;
+ plugin->removal = removal_fn;
+ clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t));
+ plugin->function_name = format (0, "%s%c", fn_name, 0);
+ plugin->keyword_str = format (0, "%s%c", keyword_str, 0);
+ plugin->def_str = format (0, "%s%c", def_str, 0);
+ plugin->params_str = format (0, "%s%c", params_str, 0);
+
+ return plugin->sr_localsid_function_number;
+}
+
+/**
+ * @brief CLI function to 'show' all available SR LocalSID behaviors
+ */
+static clib_error_t *
+show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ sr_localsid_fn_registration_t *plugin;
+ sr_localsid_fn_registration_t **plugins_vec = 0;
+ int i;
+
+ vlib_cli_output (vm,
+ "SR LocalSIDs behaviors:\n-----------------------\n\n");
+
+ /* *INDENT-OFF* */
+ pool_foreach (plugin, sm->plugin_functions,
+ ({ vec_add1 (plugins_vec, plugin); }));
+ /* *INDENT-ON* */
+
+ /* Print static behaviors */
+ vlib_cli_output (vm, "Default behaviors:\n"
+ "\tEnd\t-> Endpoint.\n"
+ "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.T\t-> Endpoint with specific IPv6 table lookup.\n"
+ "\t\tParameters: '<fib_table>'\n"
+ "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+ "\t\tParameters: '<iface>'\n"
+ "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+ "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+ "\t\tParameters: '<ip6_fib_table>'\n"
+ "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+ "\t\tParameters: '<ip4_fib_table>'\n");
+ vlib_cli_output (vm, "Plugin behaviors:\n");
+ for (i = 0; i < vec_len (plugins_vec); i++)
+ {
+ plugin = plugins_vec[i];
+ vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str,
+ plugin->def_str);
+ vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
+ .path = "show sr localsids behaviors",
+ .short_help = "show sr localsids behaviors",
+ .function = show_sr_localsid_behaviors_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID initialization
+ */
+clib_error_t *
+sr_localsids_init (vlib_main_t * vm)
+{
+ /* Init memory for function keys */
+ ip6_sr_main_t *sm = &sr_main;
+ mhash_init (&sm->sr_localsids_index_hash, sizeof (uword),
+ sizeof (ip6_address_t));
+ /* Init SR behaviors DPO type */
+ sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes);
+ /* Init SR behaviors DPO type */
+ sr_localsid_d_dpo_type =
+ dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes);
+ /* Init memory for localsid plugins */
+ sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword));
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_localsids_init);
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_localsid.md b/src/vnet/srv6/sr_localsid.md
new file mode 100644
index 00000000..340af4a3
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.md
@@ -0,0 +1,58 @@
+# SR LocalSIDs {#srv6_localsid_doc}
+
+A local SID is associated to a Segment Routing behavior -or function- on the current node.
+
+The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA.
+
+A local END SID is instantiated using the following CLI:
+
+ sr localsid (del) address XX::YY behavior end
+
+This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above.
+
+Other examples of local SIDs are the following:
+
+ sr localsid (del) address XX::YY behavior end
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
+ sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
+ sr localsid (del) address XX::YY behavior end.dt6 5
+ sr localsid (del) address XX::YY behavior end.dt6 5
+
+Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior.
+
+Note also that you can configure the PSP flavor of the End and End.X behaviors by typing:
+
+ sr localsid (del) address XX::YY behavior end psp
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
+
+Help on the available local SID behaviors and their usage can be obtained with:
+
+ help sr localsid
+
+Alternatively they can be obtained using.
+
+ show sr localsids behavior
+
+The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework.
+
+
+VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes.
+
+The contents of the 'My LocalSID Table' is shown with:
+
+ vpp# show sr localsid
+ SRv6 - My LocalSID Table:
+ =========================
+ Address: c3::1
+ Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect)
+ Iface: GigabitEthernet0/5/0
+ Next hop: b:c3::b
+ Good traffic: [51277 packets : 5332808 bytes]
+ Bad traffic: [0 packets : 0 bytes]
+ --------------------
+
+The traffic counters can be reset with:
+
+ vpp# clear sr localsid counters
diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h
new file mode 100755
index 00000000..7af4ad4d
--- /dev/null
+++ b/src/vnet/srv6/sr_packet.h
@@ -0,0 +1,159 @@
+#ifndef included_vnet_sr_packet_h
+#define included_vnet_sr_packet_h
+
+#include <vnet/ip/ip.h>
+
+/*
+ * ipv6 segment-routing header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The Segment Routing Header (SRH) is defined as follows:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | First Segment | Flags | RESERVED |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[0] (128 bits IPv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * ...
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[n] (128 bits IPv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * // //
+ * // Optional Type Length Value objects (variable) //
+ * // //
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * where:
+ *
+ * o Next Header: 8-bit selector. Identifies the type of header
+ * immediately following the SRH.
+ *
+ * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ * header in 8-octet units, not including the first 8 octets.
+ *
+ * o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+ *
+ * o Segments Left. Defined in [RFC2460], it contains the index, in
+ * the Segment List, of the next segment to inspect. Segments Left
+ * is decremented at each segment.
+ *
+ * o First Segment: contains the index, in the Segment List, of the
+ * first segment of the path which is in fact the last element of the
+ * Segment List.
+ *
+ * o Flags: 8 bits of flags. Following flags are defined:
+ *
+ * 0 1 2 3 4 5 6 7
+ * +-+-+-+-+-+-+-+-+
+ * |U|P|O|A|H| U |
+ * +-+-+-+-+-+-+-+-+
+ *
+ * U: Unused and for future use. SHOULD be unset on transmission
+ * and MUST be ignored on receipt.
+ *
+ * P-flag: Protected flag. Set when the packet has been rerouted
+ * through FRR mechanism by an SR endpoint node.
+ *
+ * O-flag: OAM flag. When set, it indicates that this packet is
+ * an operations and management (OAM) packet.
+ *
+ * A-flag: Alert flag. If present, it means important Type Length
+ * Value (TLV) objects are present. See Section 3.1 for details
+ * on TLVs objects.
+ *
+ * H-flag: HMAC flag. If set, the HMAC TLV is present and is
+ * encoded as the last TLV of the SRH. In other words, the last
+ * 36 octets of the SRH represent the HMAC information. See
+ * Section 3.1.5 for details on the HMAC TLV.
+ *
+ * o RESERVED: SHOULD be unset on transmission and MUST be ignored on
+ * receipt.
+ *
+ * o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ * segment in the Segment List. The Segment List is encoded starting
+ * from the last segment of the path. I.e., the first element of the
+ * segment list (Segment List [0]) contains the last segment of the
+ * path while the last segment of the Segment List (Segment List[n])
+ * contains the first segment of the path. The index contained in
+ * "Segments Left" identifies the current active segment.
+ *
+ * o Type Length Value (TLV) are described in Section 3.1.
+ *
+ */
+
+#ifndef IPPROTO_IPV6_ROUTE
+#define IPPROTO_IPV6_ROUTE 43
+#endif
+
+#define ROUTING_HEADER_TYPE_SR 4
+
+typedef struct
+{
+ /* Protocol for next header. */
+ u8 protocol;
+ /*
+ * Length of routing header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+
+ /* Type of routing header; type 4 = segement routing */
+ u8 type;
+
+ /* Next segment in the segment list */
+ u8 segments_left;
+
+ /* Pointer to the first segment in the header */
+ u8 first_segment;
+
+ /* Flag bits */
+#define IP6_SR_HEADER_FLAG_PROTECTED (0x40)
+#define IP6_SR_HEADER_FLAG_OAM (0x20)
+#define IP6_SR_HEADER_FLAG_ALERT (0x10)
+#define IP6_SR_HEADER_FLAG_HMAC (0x80)
+
+ /* values 0x0, 0x4 - 0x7 are reserved */
+ u8 flags;
+ u16 reserved;
+
+ /* The segment elts */
+ ip6_address_t segments[0];
+} __attribute__ ((packed)) ip6_sr_header_t;
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
+
+#endif /* included_vnet_sr_packet_h */
diff --git a/src/vnet/srv6/sr_policy.md b/src/vnet/srv6/sr_policy.md
new file mode 100644
index 00000000..521b8461
--- /dev/null
+++ b/src/vnet/srv6/sr_policy.md
@@ -0,0 +1,56 @@
+# Creating a SR Policy {#srv6_policy_doc}
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
+
+A new SR policy is created with a first SID list using:
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
+
+* The weight parameter is only used if more than one SID list is associated with the policy.
+* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed.
+
+An SR policy is deleted with:
+
+ sr policy del bsid 2001::1
+ sr policy del index 1
+
+The existing SR policies are listed with:
+
+ show sr policies
+
+## Adding/Removing SID Lists from an SR policy
+
+An additional SID list is associated with an existing SR policy with:
+
+ sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
+ sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+ sr policy mod bsid 2001::1 del sl index 1
+ sr policy mod index 3 del sl index 1
+
+Note that this cannot be used to remove the last SID list of a policy.
+
+The weight of a SID list can also be modified with:
+
+ sr policy mod bsid 2001::1 mod sl index 1 weight 4
+ sr policy mod index 3 mod sl index 1 weight 4
+
+## SR Policies: Spray policies
+
+Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in:
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
+
+Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.
+
+## Encapsulation SR policies
+
+In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command:
+
+ set sr encaps source addr XXXX::YYYY
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
new file mode 100755
index 00000000..2f90993a
--- /dev/null
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -0,0 +1,3231 @@
+/*
+ * sr_policy_rewrite.c: ipv6 sr policy creation
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief SR policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with IPv6 DA == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ *
+ * SR policies can be applied either by using IPv6 encapsulation or
+ * SRH insertion. Both methods can be found on this file.
+ *
+ * Traffic input usually is IPv6 packets. However it is possible to have
+ * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH)
+ *
+ * This file provides the appropiates VPP graph nodes to do any of these
+ * methods.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief SR policy rewrite trace
+ */
+typedef struct
+{
+ ip6_address_t src, dst;
+} sr_policy_rewrite_trace_t;
+
+/* Graph arcs */
+#define foreach_sr_policy_rewrite_next \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(ERROR, "error-drop")
+
+typedef enum
+{
+#define _(s,n) SR_POLICY_REWRITE_NEXT_##s,
+ foreach_sr_policy_rewrite_next
+#undef _
+ SR_POLICY_REWRITE_N_NEXT,
+} sr_policy_rewrite_next_t;
+
+/* SR rewrite errors */
+#define foreach_sr_policy_rewrite_error \
+_(INTERNAL_ERROR, "Segment Routing undefined error") \
+_(BSID_ZERO, "BSID with SL = 0") \
+_(COUNTER_TOTAL, "SR steered IPv6 packets") \
+_(COUNTER_ENCAP, "SR: Encaps packets") \
+_(COUNTER_INSERT, "SR: SRH inserted packets") \
+_(COUNTER_BSID, "SR: BindingSID steered packets")
+
+typedef enum
+{
+#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym,
+ foreach_sr_policy_rewrite_error
+#undef _
+ SR_POLICY_REWRITE_N_ERROR,
+} sr_policy_rewrite_error_t;
+
+static char *sr_policy_rewrite_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sr_policy_rewrite_error
+#undef _
+};
+
+/**
+ * @brief Dynamically added SR SL DPO type
+ */
+static dpo_type_t sr_pr_encaps_dpo_type;
+static dpo_type_t sr_pr_insert_dpo_type;
+static dpo_type_t sr_pr_bsid_encaps_dpo_type;
+static dpo_type_t sr_pr_bsid_insert_dpo_type;
+
+/**
+ * @brief IPv6 SA for encapsulated packets
+ */
+static ip6_address_t sr_pr_encaps_src;
+
+/******************* SR rewrite set encaps IPv6 source addr *******************/
+/* Note: This is temporal. We don't know whether to follow this path or
+ take the ip address of a loopback interface or even the OIF */
+
+static clib_error_t *
+set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src))
+ return 0;
+ else
+ return clib_error_return (0, "No address specified");
+ }
+ return clib_error_return (0, "No address specified");
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_sr_src_command, static) = {
+ .path = "set sr encaps source",
+ .short_help = "set sr encaps source addr <ip6_addr>",
+ .function = set_sr_src_command_fn,
+};
+/* *INDENT-ON* */
+
+/*********************** SR rewrite string computation ************************/
+/**
+ * @brief SR rewrite string computation for IPv6 encapsulation (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for encapsulation
+ */
+static inline u8 *
+compute_rewrite_encaps (ip6_address_t * sl)
+{
+ ip6_header_t *iph;
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += IPv6_DEFAULT_HEADER_LENGTH;
+ if (vec_len (sl) > 1)
+ {
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+ }
+
+ vec_validate (rs, header_length - 1);
+
+ iph = (ip6_header_t *) rs;
+ iph->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
+ iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
+ iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+ iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
+ iph->protocol = IP_PROTOCOL_IPV6;
+ iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT;
+
+ srh = (ip6_sr_header_t *) (iph + 1);
+ iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ srh->protocol = IP_PROTOCOL_IPV6;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->first_segment = vec_len (sl) - 1;
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x00;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ iph->dst_address.as_u64[0] = sl->as_u64[0];
+ iph->dst_address.as_u64[1] = sl->as_u64[1];
+ return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+compute_rewrite_insert (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl);
+ srh->first_segment = vec_len (sl);
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl);
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion with BSID (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion with BSID
+ */
+static inline u8 *
+compute_rewrite_bsid (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->first_segment = vec_len (sl) - 1;
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+/*************************** SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline ip6_sr_sl_t *
+create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
+ u8 is_encap)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_sl_t *segment_list;
+
+ pool_get (sm->sid_lists, segment_list);
+ memset (segment_list, 0, sizeof (*segment_list));
+
+ vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+ /* Fill in segment list */
+ segment_list->weight =
+ (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+ segment_list->segments = vec_dup (sl);
+
+ if (is_encap)
+ {
+ segment_list->rewrite = compute_rewrite_encaps (sl);
+ segment_list->rewrite_bsid = segment_list->rewrite;
+ }
+ else
+ {
+ segment_list->rewrite = compute_rewrite_insert (sl);
+ segment_list->rewrite_bsid = compute_rewrite_bsid (sl);
+ }
+
+ /* Create DPO */
+ dpo_reset (&segment_list->bsid_dpo);
+ dpo_reset (&segment_list->ip6_dpo);
+ dpo_reset (&segment_list->ip4_dpo);
+
+ if (is_encap)
+ {
+ dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type,
+ DPO_PROTO_IP6, segment_list - sm->sid_lists);
+ }
+ else
+ {
+ dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type,
+ DPO_PROTO_IP6, segment_list - sm->sid_lists);
+ }
+
+ return segment_list;
+}
+
+/**
+ * @brief Updates the Load Balancer after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy
+ */
+static inline void
+update_lb (ip6_sr_policy_t * sr_policy)
+{
+ flow_hash_config_t fhc;
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list;
+ ip6_sr_main_t *sm = &sr_main;
+ load_balance_path_t path;
+ path.path_index = FIB_NODE_INDEX_INVALID;
+ load_balance_path_t *ip4_path_vector = 0;
+ load_balance_path_t *ip6_path_vector = 0;
+ load_balance_path_t *b_path_vector = 0;
+
+ /* In case LB does not exist, create it */
+ if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ };
+
+ /* Add FIB entry for BSID */
+ fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
+ dpo_proto_to_fib (DPO_PROTO_IP6));
+
+ dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+ load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+ dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+ load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+ /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */
+ fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->bsid_dpo);
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip6_dpo);
+
+ if (sr_policy->is_encap)
+ {
+ dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4,
+ load_balance_create (0, DPO_PROTO_IP4, fhc));
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip4_dpo);
+ }
+
+ }
+
+ /* Create the LB path vector */
+ //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists));
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ path.path_dpo = segment_list->bsid_dpo;
+ path.path_weight = segment_list->weight;
+ vec_add1 (b_path_vector, path);
+ path.path_dpo = segment_list->ip6_dpo;
+ vec_add1 (ip6_path_vector, path);
+ if (sr_policy->is_encap)
+ {
+ path.path_dpo = segment_list->ip4_dpo;
+ vec_add1 (ip4_path_vector, path);
+ }
+ }
+
+ /* Update LB multipath */
+ load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+ load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+ if (sr_policy->is_encap)
+ load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+
+ /* Cleanup */
+ vec_free (b_path_vector);
+ vec_free (ip6_path_vector);
+ vec_free (ip4_path_vector);
+
+}
+
+/**
+ * @brief Updates the Replicate DPO after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy (type spray)
+ */
+static inline void
+update_replicate (ip6_sr_policy_t * sr_policy)
+{
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list;
+ ip6_sr_main_t *sm = &sr_main;
+ load_balance_path_t path;
+ path.path_index = FIB_NODE_INDEX_INVALID;
+ load_balance_path_t *b_path_vector = 0;
+ load_balance_path_t *ip6_path_vector = 0;
+ load_balance_path_t *ip4_path_vector = 0;
+
+ /* In case LB does not exist, create it */
+ if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE,
+ DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+ dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE,
+ DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+ /* Update FIB entry's DPO to point to SR without LB */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ };
+ fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->bsid_dpo);
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip6_dpo);
+
+ if (sr_policy->is_encap)
+ {
+ dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4,
+ replicate_create (0, DPO_PROTO_IP4));
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip4_dpo);
+ }
+
+ }
+
+ /* Create the replicate path vector */
+ path.path_weight = 1;
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ path.path_dpo = segment_list->bsid_dpo;
+ vec_add1 (b_path_vector, path);
+ path.path_dpo = segment_list->ip6_dpo;
+ vec_add1 (ip6_path_vector, path);
+ if (sr_policy->is_encap)
+ {
+ path.path_dpo = segment_list->ip4_dpo;
+ vec_add1 (ip4_path_vector, path);
+ }
+ }
+
+ /* Update replicate multipath */
+ replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector);
+ replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector);
+ if (sr_policy->is_encap)
+ replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector);
+}
+
+/******************************* SR rewrite API *******************************/
+/* Three functions for handling sr policies:
+ * -> sr_policy_add
+ * -> sr_policy_del
+ * -> sr_policy_mod
+ * All of them are API. CLI function on sr_policy_command_fn */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param weight is the weight of the sid list. optional.
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+ u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ uword *p;
+
+ /* Search for existing keys (BSID) */
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ {
+ /* Add SR policy that already exists; complain */
+ return -12;
+ }
+
+ /* Search collision in FIB entries */
+ /* Explanation: It might be possible that some other entity has already
+ * created a route for the BSID. This in theory is impossible, but in
+ * practise we could see it. Assert it and scream if needed */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *bsid,
+ }
+ };
+
+ /* Lookup the FIB index associated to the table selected */
+ u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6,
+ (fib_table != (u32) ~ 0 ? fib_table : 0));
+ if (fib_index == ~0)
+ return -13;
+
+ /* Lookup whether there exists an entry for the BSID */
+ fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ return -12; //There is an entry for such lookup
+
+ /* Add an SR policy object */
+ pool_get (sm->sr_policies, sr_policy);
+ memset (sr_policy, 0, sizeof (*sr_policy));
+ clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t));
+ sr_policy->type = behavior;
+ sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ?
+ sr_policy->is_encap = is_encap;
+
+ /* Copy the key */
+ mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies,
+ NULL);
+
+ /* Create a segment list and add the index to the SR policy */
+ create_sl (sr_policy, segments, weight, is_encap);
+
+ /* If FIB doesnt exist, create them */
+ if (sm->fib_table_ip6 == (u32) ~ 0)
+ {
+ sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+ FIB_SOURCE_SR,
+ "SRv6 steering of IP6 prefixes through BSIDs");
+ sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+ FIB_SOURCE_SR,
+ "SRv6 steering of IP4 prefixes through BSIDs");
+ }
+
+ /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_del (ip6_address_t * bsid, u32 index)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_sl_t *segment_list;
+ u32 *sl_index;
+ uword *p;
+
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ /* Remove BindingSID FIB entry */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ ,
+ };
+
+ fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR);
+
+ if (sr_policy->is_encap)
+ fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR);
+
+ if (dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ dpo_reset (&sr_policy->bsid_dpo);
+ dpo_reset (&sr_policy->ip4_dpo);
+ dpo_reset (&sr_policy->ip6_dpo);
+ }
+
+ /* Clean SID Lists */
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ vec_free (segment_list->segments);
+ vec_free (segment_list->rewrite);
+ if (!sr_policy->is_encap)
+ vec_free (segment_list->rewrite_bsid);
+ pool_put_index (sm->sid_lists, *sl_index);
+ }
+
+ /* Remove SR policy entry */
+ mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL);
+ pool_put (sm->sr_policies, sr_policy);
+
+ /* If FIB empty unlock it */
+ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+ {
+ fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+ fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ * @param is_encap Mode. Encapsulation or SRH insertion.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t * segments, u32 sl_index,
+ u32 weight)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_sl_t *segment_list;
+ u32 *sl_index_iterate;
+ uword *p;
+
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ if (operation == 1) /* Add SR List to an existing SR policy */
+ {
+ /* Create the new SL */
+ segment_list =
+ create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+
+ /* Create a new LB DPO */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ }
+ else if (operation == 2) /* Delete SR List from an existing SR policy */
+ {
+ /* Check that currently there are more than one SID list */
+ if (vec_len (sr_policy->segments_lists) == 1)
+ return -21;
+
+ /* Check that the SR list does exist and is assigned to the sr policy */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -22;
+
+ /* Remove the lucky SR list that is being kicked out */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+ vec_free (segment_list->segments);
+ vec_free (segment_list->rewrite);
+ if (!sr_policy->is_encap)
+ vec_free (segment_list->rewrite_bsid);
+ pool_put_index (sm->sid_lists, sl_index);
+ vec_del1 (sr_policy->segments_lists,
+ sl_index_iterate - sr_policy->segments_lists);
+
+ /* Create a new LB DPO */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ }
+ else if (operation == 3) /* Modify the weight of an existing SR List */
+ {
+ /* Find the corresponding SL */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -32;
+
+ /* Change the weight */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+ segment_list->weight = weight;
+
+ /* Update LB */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ }
+ else /* Incorrect op. */
+ return -1;
+
+ return 0;
+}
+
+/**
+ * @brief CLI for 'sr policies' command family
+ */
+static clib_error_t *
+sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = -1;
+ char is_del = 0, is_add = 0, is_mod = 0;
+ char policy_set = 0;
+ ip6_address_t bsid, next_address;
+ u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
+ u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
+ ip6_address_t *segments = 0, *this_seg;
+ u8 operation = 0;
+ char is_encap = 1;
+ char is_spray = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+ is_add = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+ is_del = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+ is_mod = 1;
+ else if (!policy_set
+ && unformat (input, "bsid %U", unformat_ip6_address, &bsid))
+ policy_set = 1;
+ else if (!is_add && !policy_set
+ && unformat (input, "index %d", &sr_policy_index))
+ policy_set = 1;
+ else if (unformat (input, "weight %d", &weight));
+ else
+ if (unformat (input, "next %U", unformat_ip6_address, &next_address))
+ {
+ vec_add2 (segments, this_seg, 1);
+ clib_memcpy (this_seg->as_u8, next_address.as_u8,
+ sizeof (*this_seg));
+ }
+ else if (unformat (input, "add sl"))
+ operation = 1;
+ else if (unformat (input, "del sl index %d", &sl_index))
+ operation = 2;
+ else if (unformat (input, "mod sl index %d", &sl_index))
+ operation = 3;
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else if (unformat (input, "encap"))
+ is_encap = 1;
+ else if (unformat (input, "insert"))
+ is_encap = 0;
+ else if (unformat (input, "spray"))
+ is_spray = 1;
+ else
+ break;
+ }
+
+ if (!is_add && !is_mod && !is_del)
+ return clib_error_return (0, "Incorrect CLI");
+
+ if (!policy_set)
+ return clib_error_return (0, "No SR policy BSID or index specified");
+
+ if (is_add)
+ {
+ if (vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ rv = sr_policy_add (&bsid, segments, weight,
+ (is_spray ? SR_POLICY_TYPE_SPRAY :
+ SR_POLICY_TYPE_DEFAULT), fib_table, is_encap);
+ }
+ else if (is_del)
+ rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ sr_policy_index);
+ else if (is_mod)
+ {
+ if (!operation)
+ return clib_error_return (0, "No SL modification specified");
+ if (operation != 1 && sl_index == (u32) ~ 0)
+ return clib_error_return (0, "No Segment List index specified");
+ if (operation == 1 && vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ if (operation == 3 && weight == (u32) ~ 0)
+ return clib_error_return (0, "No new weight for the SL specified");
+ rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ sr_policy_index, fib_table, operation, segments,
+ sl_index, weight);
+ }
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -12:
+ return clib_error_return (0,
+ "There is already a FIB entry for the BindingSID address.\n"
+ "The SR policy could not be created.");
+ case -13:
+ return clib_error_return (0, "The specified FIB table does not exist.");
+ case -21:
+ return clib_error_return (0,
+ "The selected SR policy only contains ONE segment list. "
+ "Please remove the SR policy instead");
+ case -22:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -32:
+ return clib_error_return (0,
+ "Could not modify the segment list. "
+ "The given SL is not associated with such SR policy.");
+ default:
+ return clib_error_return (0, "BUG: sr policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_policy_command, static) = {
+ .path = "sr policy",
+ .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
+ "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)",
+ .long_help =
+ "Manipulation of SR policies.\n"
+ "A Segment Routing policy may contain several SID lists. Each SID list has\n"
+ "an associated weight (default 1), which will result in wECMP (uECMP).\n"
+ "Segment Routing policies might be of type encapsulation or srh insertion\n"
+ "Each SR policy will be associated with a unique BindingSID.\n"
+ "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n"
+ "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n"
+ "The add command will create a SR policy with its first segment list (sl)\n"
+ "The mod command allows you to add, remove, or modify the existing segment lists\n"
+ "within an SR policy.\n"
+ "The del command allows you to delete a SR policy along with all its associated\n"
+ "SID lists.\n",
+ .function = sr_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI to display onscreen all the SR policies
+ */
+static clib_error_t *
+show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list = 0;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_policy_t **vec_policies = 0;
+ ip6_address_t *addr;
+ u8 *s;
+ int i = 0;
+
+ vlib_cli_output (vm, "SR policies:");
+
+ /* *INDENT-OFF* */
+ pool_foreach (sr_policy, sm->sr_policies,
+ {vec_add1 (vec_policies, sr_policy); } );
+ /* *INDENT-ON* */
+
+ vec_foreach_index (i, vec_policies)
+ {
+ sr_policy = vec_policies[i];
+ vlib_cli_output (vm, "[%u].-\tBSID: %U",
+ (u32) (sr_policy - sm->sr_policies),
+ format_ip6_address, &sr_policy->bsid);
+ vlib_cli_output (vm, "\tBehavior: %s",
+ (sr_policy->is_encap ? "Encapsulation" :
+ "SRH insertion"));
+ vlib_cli_output (vm, "\tType: %s",
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ vlib_cli_output (vm, "\tFIB table: %u",
+ (sr_policy->fib_table !=
+ (u32) ~ 0 ? sr_policy->fib_table : 0));
+ vlib_cli_output (vm, "\tSegment Lists:");
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ s = NULL;
+ s = format (s, "\t[%u].- ", *sl_index);
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ s = format (s, "< ");
+ vec_foreach (addr, segment_list->segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, addr);
+ }
+ s = format (s, "\b\b > ");
+ s = format (s, "weight: %u", segment_list->weight);
+ vlib_cli_output (vm, " %s", s);
+ }
+ vlib_cli_output (vm, "-----------");
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
+ .path = "show sr policies",
+ .short_help = "show sr policies",
+ .function = show_sr_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+/*************************** SR rewrite graph node ****************************/
+/**
+ * @brief Trace for the SR Policy Rewrite graph node
+ */
+static u8 *
+format_sr_policy_rewrite_trace (u8 * s, va_list * args)
+{
+ //TODO
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *);
+
+ s = format
+ (s, "SR-policy-rewrite: src %U dst %U",
+ format_ip6_address, &t->src, format_ip6_address, &t->dst);
+
+ return s;
+}
+
+/**
+ * @brief IPv6 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v6 (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0, ip6_header_t * ip0_encap)
+{
+ u32 new_l0;
+
+ ip0_encap->hop_limit -= 1;
+ new_l0 =
+ ip0->payload_length + sizeof (ip6_header_t) +
+ clib_net_to_host_u16 (ip0_encap->payload_length);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->ip_version_traffic_class_and_flow_label =
+ ip0_encap->ip_version_traffic_class_and_flow_label;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0, *ip0_encap = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
+ .function = sr_policy_rewrite_encaps,
+ .name = "sr-pl-rewrite-encaps",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief IPv4 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v4 (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0, ip4_header_t * ip0_encap)
+{
+ u32 new_l0;
+ ip6_sr_header_t *sr0;
+
+ u32 checksum0;
+
+ /* Inner IPv4: Decrement TTL & update checksum */
+ ip0_encap->ttl -= 1;
+ checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+ ip0_encap->checksum = checksum0;
+
+ /* Outer IPv6: Update length, FL, proto */
+ new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) |
+ ((ip0_encap->tos & 0xFF) << 20));
+ sr0 = (void *) (ip0 + 1);
+ sr0->protocol = IP_PROTOCOL_IP_IN_IP;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v4 (node, b0, ip0, ip0_encap);
+ encaps_processing_v4 (node, b1, ip1, ip1_encap);
+ encaps_processing_v4 (node, b2, ip2, ip2_encap);
+ encaps_processing_v4 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip4_header_t *ip0_encap = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v4 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
+ .function = sr_policy_rewrite_encaps_v4,
+ .name = "sr-pl-rewrite-encaps-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+always_inline u32
+ip_flow_hash (void *data)
+{
+ ip4_header_t *iph = (ip4_header_t *) data;
+
+ if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+ return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+ else
+ return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+ return (*((u64 *) m) & 0xffffffffffff);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+ ethernet_header_t *eh;
+ u64 a, b, c;
+ uword is_ip, eh_size;
+ u16 eh_type;
+
+ eh = vlib_buffer_get_current (b0);
+ eh_type = clib_net_to_host_u16 (eh->type);
+ eh_size = ethernet_buffer_header_size (b0);
+
+ is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+ /* since we have 2 cache lines, use them */
+ if (is_ip)
+ a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+ else
+ a = eh->type;
+
+ b = mac_to_u64 ((u8 *) eh->dst_address);
+ c = mac_to_u64 ((u8 *) eh->src_address);
+ hash_mix64 (a, b, c);
+
+ return (u32) c;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into a L2 frame
+ */
+static uword
+sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ethernet_header_t *en0, *en1, *en2, *en3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sp0 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b0)->sw_if_index
+ [VLIB_RX]]);
+
+ sp1 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b1)->sw_if_index
+ [VLIB_RX]]);
+
+ sp2 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b2)->sw_if_index
+ [VLIB_RX]]);
+
+ sp3 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b3)->sw_if_index
+ [VLIB_RX]]);
+
+ if (vec_len (sp0->segments_lists) == 1)
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+ else
+ {
+ vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+ (vec_len (sp0->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp1->segments_lists) == 1)
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1];
+ else
+ {
+ vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+ sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash &
+ (vec_len (sp1->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp2->segments_lists) == 1)
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2];
+ else
+ {
+ vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2);
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+ sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash &
+ (vec_len (sp2->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp3->segments_lists) == 1)
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3];
+ else
+ {
+ vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3);
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+ sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash &
+ (vec_len (sp3->segments_lists) - 1))];
+ }
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ en0 = vlib_buffer_get_current (b0);
+ en1 = vlib_buffer_get_current (b1);
+ en2 = vlib_buffer_get_current (b2);
+ en3 = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+ vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite,
+ vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite,
+ vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite,
+ vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+ ip1->payload_length =
+ clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t));
+ ip2->payload_length =
+ clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t));
+ ip3->payload_length =
+ clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t));
+
+ sr0 = (void *) (ip0 + 1);
+ sr1 = (void *) (ip1 + 1);
+ sr2 = (void *) (ip2 + 1);
+ sr3 = (void *) (ip3 + 1);
+
+ sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol =
+ IP_PROTOCOL_IP6_NONXT;
+
+ /* Which Traffic class and flow label do I set ? */
+ //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20));
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0;
+ ethernet_header_t *en0;
+ ip6_sr_policy_t *sp0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Find the SR policy */
+ sp0 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b0)->sw_if_index
+ [VLIB_RX]]);
+
+ /* In case there is more than one SL, LB among them */
+ if (vec_len (sp0->segments_lists) == 1)
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+ else
+ {
+ vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+ (vec_len (sp0->segments_lists) - 1))];
+ }
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ en0 = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+ vec_len (sl0->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+
+ sr0 = (void *) (ip0 + 1);
+ sr0->protocol = IP_PROTOCOL_IP6_NONXT;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
+ .function = sr_policy_rewrite_encaps_l2,
+ .name = "sr-pl-rewrite-encaps-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. SRH insertion.
+ */
+static uword
+sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int insert_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+ u16 new_l0, new_l1, new_l2, new_l3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr1 =
+ (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+ ip6_ext_header_len (ip1 + 1));
+ else
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+ if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr2 =
+ (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+ ip6_ext_header_len (ip2 + 1));
+ else
+ sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+ if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr3 =
+ (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+ ip6_ext_header_len (ip3 + 1));
+ else
+ sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1,
+ (void *) sr1 - (void *) ip1);
+ clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2,
+ (void *) sr2 - (void *) ip2);
+ clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3,
+ (void *) sr3 - (void *) ip3);
+
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+ vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite,
+ vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite,
+ vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite,
+ vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+ ip1 = ((void *) ip1) - vec_len (sl1->rewrite);
+ ip2 = ((void *) ip2) - vec_len (sl2->rewrite);
+ ip3 = ((void *) ip3) - vec_len (sl3->rewrite);
+
+ ip0->hop_limit -= 1;
+ ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
+
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite);
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) +
+ vec_len (sl1->rewrite);
+ new_l2 =
+ clib_net_to_host_u16 (ip2->payload_length) +
+ vec_len (sl2->rewrite);
+ new_l3 =
+ clib_net_to_host_u16 (ip3->payload_length) +
+ vec_len (sl3->rewrite);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+ ip2->payload_length = clib_host_to_net_u16 (new_l2);
+ ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+ sr1 = ((void *) sr1) - vec_len (sl1->rewrite);
+ sr2 = ((void *) sr2) - vec_len (sl2->rewrite);
+ sr3 = ((void *) sr3) - vec_len (sl3->rewrite);
+
+ sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+ sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+ sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0];
+ sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1];
+ sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0];
+ sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1];
+ sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0];
+ sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1];
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+ ip1->dst_address.as_u64[0] =
+ (sr1->segments + sr1->segments_left)->as_u64[0];
+ ip1->dst_address.as_u64[1] =
+ (sr1->segments + sr1->segments_left)->as_u64[1];
+ ip2->dst_address.as_u64[0] =
+ (sr2->segments + sr2->segments_left)->as_u64[0];
+ ip2->dst_address.as_u64[1] =
+ (sr2->segments + sr2->segments_left)->as_u64[1];
+ ip3->dst_address.as_u64[0] =
+ (sr3->segments + sr3->segments_left)->as_u64[0];
+ ip3->dst_address.as_u64[1] =
+ (sr3->segments + sr3->segments_left)->as_u64[1];
+
+ ip6_ext_header_t *ip_ext;
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip1 + 1 == (void *) sr1)
+ {
+ sr1->protocol = ip1->protocol;
+ ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip2 + 1 == (void *) sr2)
+ {
+ sr2->protocol = ip2->protocol;
+ ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip3 + 1 == (void *) sr3)
+ {
+ sr3->protocol = ip3->protocol;
+ ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip3 + 1);
+ sr3->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ insert_pkts += 4;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0 = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+ vec_len (sl0->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+ ip0->hop_limit -= 1;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+ sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+ sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ insert_pkts++;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ insert_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
+ .function = sr_policy_rewrite_insert,
+ .name = "sr-pl-rewrite-insert",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
+ */
+static uword
+sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int insert_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+ u16 new_l0, new_l1, new_l2, new_l3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite_bsid));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite_bsid));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite_bsid));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite_bsid));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr1 =
+ (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+ ip6_ext_header_len (ip1 + 1));
+ else
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+ if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr2 =
+ (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+ ip6_ext_header_len (ip2 + 1));
+ else
+ sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+ if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr3 =
+ (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+ ip6_ext_header_len (ip3 + 1));
+ else
+ sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1,
+ (void *) sr1 - (void *) ip1);
+ clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2,
+ (void *) sr2 - (void *) ip2);
+ clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3,
+ (void *) sr3 - (void *) ip3);
+
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+ sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+ clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)),
+ sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid));
+ clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)),
+ sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid));
+ clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)),
+ sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+ ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid);
+ ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid);
+ ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid);
+
+ ip0->hop_limit -= 1;
+ ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
+
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite_bsid);
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) +
+ vec_len (sl1->rewrite_bsid);
+ new_l2 =
+ clib_net_to_host_u16 (ip2->payload_length) +
+ vec_len (sl2->rewrite_bsid);
+ new_l3 =
+ clib_net_to_host_u16 (ip3->payload_length) +
+ vec_len (sl3->rewrite_bsid);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+ ip2->payload_length = clib_host_to_net_u16 (new_l2);
+ ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+ sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid);
+ sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid);
+ sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid);
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+ ip1->dst_address.as_u64[0] =
+ (sr1->segments + sr1->segments_left)->as_u64[0];
+ ip1->dst_address.as_u64[1] =
+ (sr1->segments + sr1->segments_left)->as_u64[1];
+ ip2->dst_address.as_u64[0] =
+ (sr2->segments + sr2->segments_left)->as_u64[0];
+ ip2->dst_address.as_u64[1] =
+ (sr2->segments + sr2->segments_left)->as_u64[1];
+ ip3->dst_address.as_u64[0] =
+ (sr3->segments + sr3->segments_left)->as_u64[0];
+ ip3->dst_address.as_u64[1] =
+ (sr3->segments + sr3->segments_left)->as_u64[1];
+
+ ip6_ext_header_t *ip_ext;
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip1 + 1 == (void *) sr1)
+ {
+ sr1->protocol = ip1->protocol;
+ ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip2 + 1 == (void *) sr2)
+ {
+ sr2->protocol = ip2->protocol;
+ ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip3 + 1 == (void *) sr3)
+ {
+ sr3->protocol = ip3->protocol;
+ ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip3 + 1);
+ sr3->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ insert_pkts += 4;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0 = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite_bsid));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+ sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+ ip0->hop_limit -= 1;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite_bsid);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ insert_pkts++;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ insert_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
+ .function = sr_policy_rewrite_b_insert,
+ .name = "sr-pl-rewrite-b-insert",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function BSID encapsulation
+ */
+static_always_inline void
+end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0, u32 * next0)
+{
+ ip6_address_t *new_dst0;
+
+ if (PREDICT_FALSE (!sr0))
+ goto error_bsid_encaps;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (PREDICT_TRUE (sr0->segments_left != 0))
+ {
+ sr0->segments_left -= 1;
+ new_dst0 = (ip6_address_t *) (sr0->segments);
+ new_dst0 += sr0->segments_left;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+ return;
+ }
+ }
+
+error_bsid_encaps:
+ *next0 = SR_POLICY_REWRITE_NEXT_ERROR;
+ b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO];
+}
+
+/**
+ * @brief Graph node for applying a SR policy BSID - Encapsulation
+ */
+static uword
+sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1_encap, prev1, sr1,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2_encap, prev2, sr2,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3_encap, prev3, sr3,
+ IP_PROTOCOL_IPV6_ROUTE);
+
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+ end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1);
+ end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2);
+ end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0, *ip0_encap = 0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+ IP_PROTOCOL_IPV6_ROUTE);
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
+ .function = sr_policy_rewrite_b_encaps,
+ .name = "sr-pl-rewrite-b-encaps",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/*************************** SR Segment Lists DPOs ****************************/
+static u8 *
+format_sr_segment_list_dpo (u8 * s, va_list * args)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_address_t *addr;
+ ip6_sr_sl_t *sl;
+
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+ s = format (s, "SR: Segment List index:[%d]", index);
+ s = format (s, "\n\tSegments:");
+
+ sl = pool_elt_at_index (sm->sid_lists, index);
+
+ s = format (s, "< ");
+ vec_foreach (addr, sl->segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, addr);
+ }
+ s = format (s, "\b\b > - ");
+ s = format (s, "Weight: %u", sl->weight);
+
+ return s;
+}
+
+const static dpo_vft_t sr_policy_rewrite_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_segment_list_dpo,
+};
+
+const static char *const sr_pr_encaps_ip6_nodes[] = {
+ "sr-pl-rewrite-encaps",
+ NULL,
+};
+
+const static char *const sr_pr_encaps_ip4_nodes[] = {
+ "sr-pl-rewrite-encaps-v4",
+ NULL,
+};
+
+const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes,
+ [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes,
+};
+
+const static char *const sr_pr_insert_ip6_nodes[] = {
+ "sr-pl-rewrite-insert",
+ NULL,
+};
+
+const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_insert_ip6_nodes[] = {
+ "sr-pl-rewrite-b-insert",
+ NULL,
+};
+
+const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_encaps_ip6_nodes[] = {
+ "sr-pl-rewrite-b-encaps",
+ NULL,
+};
+
+const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes,
+};
+
+/********************* SR Policy Rewrite initialization ***********************/
+/**
+ * @brief SR Policy Rewrite initialization
+ */
+clib_error_t *
+sr_policy_rewrite_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+ mhash_init (&sm->sr_policies_index_hash, sizeof (uword),
+ sizeof (ip6_address_t));
+
+ /* Init SR VPO DPOs type */
+ sr_pr_encaps_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes);
+
+ sr_pr_insert_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes);
+
+ sr_pr_bsid_encaps_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes);
+
+ sr_pr_bsid_insert_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes);
+
+ /* Register the L2 encaps node used in HW redirect */
+ sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index;
+
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_policy_rewrite_init);
+
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
new file mode 100755
index 00000000..cf4e81ab
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.c
@@ -0,0 +1,575 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ * - Steering of IPv6 traffic Destination Address based
+ * - Steering of IPv4 traffic Destination Address based
+ * - Steering of L2 frames, interface based (sw interface)
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Steer traffic L2 and L3 traffic through a given SR policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param sw_if_index is the incoming interface for L2 traffic
+ * @param traffic_type describes the type of traffic
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+ u32 table_id, ip46_address_t * prefix, u32 mask_width,
+ u32 sw_if_index, u8 traffic_type)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ sr_steering_key_t key;
+ ip6_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+
+ ip6_sr_policy_t *sr_policy = 0;
+ uword *p = 0;
+
+ memset (&key, 0, sizeof (sr_steering_key_t));
+
+ /* Compute the steer policy key */
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ key.l3.prefix.as_u64[0] = prefix->as_u64[0];
+ key.l3.prefix.as_u64[1] = prefix->as_u64[1];
+ key.l3.mask_width = mask_width;
+ key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ key.l2.sw_if_index = sw_if_index;
+
+ /* Sanitise the SW_IF_INDEX */
+ if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return -3;
+
+ vnet_sw_interface_t *sw =
+ vnet_get_sw_interface (sm->vnet_main, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return -3;
+ }
+ else
+ return -1;
+
+ key.traffic_type = traffic_type;
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+ if (p)
+ {
+ /* Retrieve Steer Policy function */
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+ if (is_del)
+ {
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.l3.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ /* Remove HW redirection */
+ vnet_feature_enable_disable ("device-input",
+ "sr-policy-rewrite-encaps-l2",
+ sw_if_index, 0, 0, 0);
+ sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0;
+
+ /* Remove promiscous mode from interface */
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif =
+ ethernet_get_interface (em, sw_if_index);
+
+ if (!eif)
+ goto cleanup_error_redirection;
+
+ ethernet_set_flags (vnm, sw_if_index, 0);
+ }
+
+ /* Delete SR steering policy entry */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+
+ /* If no more SR policies or steering policies */
+ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+ {
+ fib_table_unlock (sm->fib_table_ip6,
+ FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+ fib_table_unlock (sm->fib_table_ip4,
+ FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+ }
+
+ return 0;
+ }
+ else /* It means user requested to update an existing SR steering policy */
+ {
+ /* Retrieve SR steering policy */
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ if (!sr_policy)
+ return -2;
+
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Remove old FIB/hw redirection and create a new one */
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ /* Update L2-HW redirection */
+ goto update_fib;
+ }
+ }
+ }
+ else
+ /* delete; steering policy does not exist; complain */
+ if (is_del)
+ return -4;
+
+ /* Retrieve SR policy */
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ /* Create a new steering policy */
+ pool_get (sm->steer_policies, steer_pl);
+ memset (steer_pl, 0, sizeof (*steer_pl));
+
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ clib_memcpy (&steer_pl->classify.l3.prefix, prefix,
+ sizeof (ip46_address_t));
+ steer_pl->classify.l3.mask_width = mask_width;
+ steer_pl->classify.l3.fib_table =
+ (table_id != (u32) ~ 0 ? table_id : 0);
+ steer_pl->classify.traffic_type = traffic_type;
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ steer_pl->classify.l2.sw_if_index = sw_if_index;
+ steer_pl->classify.traffic_type = traffic_type;
+ }
+ else
+ {
+ /* Incorrect API usage. Should never get here */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -1;
+ }
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Create and store key */
+ mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+ NULL);
+
+ if (traffic_type == SR_STEER_L2)
+ {
+ if (!sr_policy->is_encap)
+ goto cleanup_error_encap;
+
+ if (vnet_feature_enable_disable
+ ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0))
+ goto cleanup_error_redirection;
+
+ /* Set promiscous mode on interface */
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index);
+
+ if (!eif)
+ goto cleanup_error_redirection;
+
+ ethernet_set_flags (vnm, sw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ if (!sr_policy->is_encap)
+ goto cleanup_error_encap;
+
+update_fib:
+ /* FIB API calls - Recursive route through the BindingSID */
+ if (traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6,
+ (table_id !=
+ (u32) ~ 0 ?
+ table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ DPO_PROTO_IP6,
+ (ip46_address_t *) & sr_policy->bsid, ~0,
+ sm->fib_table_ip6, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4,
+ (table_id !=
+ (u32) ~ 0 ?
+ table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ DPO_PROTO_IP6,
+ (ip46_address_t *) & sr_policy->bsid, ~0,
+ sm->fib_table_ip4, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ if (sw_if_index < vec_len (sm->sw_iface_sr_policies))
+ sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+ else
+ {
+ vec_resize (sm->sw_iface_sr_policies,
+ (pool_len (sm->vnet_main->interface_main.sw_interfaces)
+ - vec_len (sm->sw_iface_sr_policies)));
+ sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+ }
+ }
+
+ return 0;
+
+cleanup_error_encap:
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -5;
+
+cleanup_error_redirection:
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -3;
+}
+
+static clib_error_t *
+sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ int is_del = 0;
+
+ ip46_address_t prefix;
+ u32 dst_mask_width = 0;
+ u32 sw_if_index = (u32) ~ 0;
+ u8 traffic_type = 0;
+ u32 fib_table = (u32) ~ 0;
+
+ ip6_address_t bsid;
+ u32 sr_policy_index = (u32) ~ 0;
+
+ u8 sr_policy_set = 0;
+
+ memset (&prefix, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip6_address,
+ &prefix.ip6, &dst_mask_width))
+ traffic_type = SR_STEER_IPV6;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip4_address,
+ &prefix.ip4, &dst_mask_width))
+ traffic_type = SR_STEER_IPV4;
+ else if (!traffic_type
+ && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ traffic_type = SR_STEER_L2;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy index %d",
+ &sr_policy_index))
+ sr_policy_set = 1;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy bsid %U",
+ unformat_ip6_address, &bsid))
+ sr_policy_set = 1;
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else
+ break;
+ }
+
+ if (!traffic_type)
+ return clib_error_return (0, "No L2/L3 traffic specified");
+ if (!sr_policy_set)
+ return clib_error_return (0, "No SR policy specified");
+
+ /* Make sure that the prefixes are clean */
+ if (traffic_type == SR_STEER_IPV4)
+ {
+ u32 mask =
+ (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+ prefix.ip4.as_u32 &= mask;
+ }
+ else if (traffic_type == SR_STEER_IPV6)
+ {
+ ip6_address_t mask;
+ ip6_address_mask_from_width (&mask, dst_mask_width);
+ ip6_address_mask (&prefix.ip6, &mask);
+ }
+
+ rv =
+ sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL),
+ sr_policy_index, fib_table, &prefix, dst_mask_width,
+ sw_if_index, traffic_type);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0, "Incorrect API usage.");
+ case -2:
+ return clib_error_return (0,
+ "The requested SR policy could not be located. Review the BSID/index.");
+ case -3:
+ return clib_error_return (0,
+ "Unable to do SW redirect. Incorrect interface.");
+ case -4:
+ return clib_error_return (0,
+ "The requested SR steering policy could not be deleted.");
+ case -5:
+ return clib_error_return (0,
+ "The SR policy is not an encapsulation one.");
+ default:
+ return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
+ .path = "sr steer",
+ .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>]"
+ "via sr policy [index <sr_policy_index>|bsid <bsid_ip6_addr>]"
+ "(fib-table <fib_table_index>)",
+ .long_help =
+ "\tSteer a L2 or L3 traffic through an existing SR policy.\n"
+ "\tExamples:\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n"
+ "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n"
+ "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
+ .function = sr_steer_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_sr_steering_policies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_steering_policy_t **steer_policies = 0;
+ ip6_sr_steering_policy_t *steer_pl;
+
+ vnet_main_t *vnm = vnet_get_main ();
+
+ ip6_sr_policy_t *pl = 0;
+ int i;
+
+ vlib_cli_output (vm, "SR steering policies:");
+ /* *INDENT-OFF* */
+ pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
+ for (i = 0; i < vec_len (steer_policies); i++)
+ {
+ steer_pl = steer_policies[i];
+ pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
+ if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ vlib_cli_output (vm, "L2 %U\t%U",
+ format_vnet_sw_if_index_name, vnm,
+ steer_pl->classify.l2.sw_if_index,
+ format_ip6_address, &pl->bsid);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip4_address,
+ &steer_pl->classify.l3.prefix.ip4,
+ steer_pl->classify.l3.mask_width,
+ format_ip6_address, &pl->bsid);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip6_address,
+ &steer_pl->classify.l3.prefix.ip6,
+ steer_pl->classify.l3.mask_width,
+ format_ip6_address, &pl->bsid);
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
+ .path = "show sr steering policies",
+ .short_help = "show sr steering policies",
+ .function = show_sr_steering_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_steering_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ /* Init memory for function keys */
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_steering_key_t));
+
+ sm->sw_iface_sr_policies = 0;
+
+ sm->vnet_main = vnet_get_main ();
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (sr_steering_init);
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
+{
+ .arc_name = "device-input",
+ .node_name = "sr-pl-rewrite-encaps-l2",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.md b/src/vnet/srv6/sr_steering.md
new file mode 100644
index 00000000..cf446f81
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.md
@@ -0,0 +1,11 @@
+# Steering packets into a SR Policy {#srv6_steering_doc}
+
+To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'.
+
+ sr steer l3 2001::/64 via sr policy index 1
+ sr steer l3 2001::/64 via sr policy bsid cafe::1
+ sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3
+ sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1
+ sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1
+
+Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*.
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
new file mode 100644
index 00000000..527b3289
--- /dev/null
+++ b/src/vnet/tcp/builtin_client.c
@@ -0,0 +1,770 @@
+/*
+ * builtin_client.c - vpp built-in tcp client/connect code
+ *
+ * Copyright (c) 2017 by Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/tcp/builtin_client.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vpp/app/version.h>
+
+#define TCP_BUILTIN_CLIENT_DBG (0)
+
+static void
+signal_evt_to_cli_i (int *code)
+{
+ tclient_main_t *tm = &tclient_main;
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, *code, 0);
+}
+
+static void
+signal_evt_to_cli (int code)
+{
+ if (vlib_get_thread_index () != 0)
+ vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
+ sizeof (code));
+ else
+ signal_evt_to_cli_i (&code);
+}
+
+static void
+send_test_chunk (tclient_main_t * tm, session_t * s)
+{
+ u8 *test_data = tm->connect_test_data;
+ int test_buf_offset;
+ u32 bytes_this_chunk;
+ session_fifo_event_t evt;
+ static int serial_number = 0;
+ svm_fifo_t *txf;
+ int rv;
+
+ ASSERT (vec_len (test_data) > 0);
+
+ test_buf_offset = s->bytes_sent % vec_len (test_data);
+ bytes_this_chunk = vec_len (test_data) - test_buf_offset;
+
+ bytes_this_chunk = bytes_this_chunk < s->bytes_to_send
+ ? bytes_this_chunk : s->bytes_to_send;
+
+ txf = s->server_tx_fifo;
+ rv = svm_fifo_enqueue_nowait (txf, bytes_this_chunk,
+ test_data + test_buf_offset);
+
+ /* If we managed to enqueue data... */
+ if (rv > 0)
+ {
+ /* Account for it... */
+ s->bytes_to_send -= rv;
+ s->bytes_sent += rv;
+
+ if (TCP_BUILTIN_CLIENT_DBG)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-enq: xfer %d bytes, sent %u remain %u",
+ .format_args = "i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 data[3];
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->data[0] = rv;
+ ed->data[1] = s->bytes_sent;
+ ed->data[2] = s->bytes_to_send;
+ }
+
+ /* Poke the session layer */
+ if (svm_fifo_set_event (txf))
+ {
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = txf;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = serial_number++;
+
+ if (unix_shared_memory_queue_add
+ (tm->vpp_event_queue[txf->master_thread_index], (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("could not enqueue event");
+ }
+ }
+}
+
+static void
+receive_test_chunk (tclient_main_t * tm, session_t * s)
+{
+ svm_fifo_t *rx_fifo = s->server_rx_fifo;
+ int n_read, test_bytes = 0;
+ u32 my_thread_index = vlib_get_thread_index ();
+
+ /* Allow enqueuing of new event */
+ // svm_fifo_unset_event (rx_fifo);
+
+ if (test_bytes)
+ {
+ n_read = svm_fifo_dequeue_nowait (rx_fifo,
+ vec_len (tm->rx_buf[my_thread_index]),
+ tm->rx_buf[my_thread_index]);
+ }
+ else
+ {
+ n_read = svm_fifo_max_dequeue (rx_fifo);
+ svm_fifo_dequeue_drop (rx_fifo, n_read);
+ }
+
+ if (n_read > 0)
+ {
+ if (TCP_BUILTIN_CLIENT_DBG)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "rx-deq: %d bytes",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 data[1];
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->data[0] = n_read;
+ }
+
+ if (test_bytes)
+ {
+ int i;
+ for (i = 0; i < n_read; i++)
+ {
+ if (tm->rx_buf[my_thread_index][i]
+ != ((s->bytes_received + i) & 0xff))
+ {
+ clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
+ n_read, s->bytes_received + i,
+ tm->rx_buf[my_thread_index][i],
+ ((s->bytes_received + i) & 0xff));
+ }
+ }
+ }
+ s->bytes_to_receive -= n_read;
+ s->bytes_received += n_read;
+ }
+}
+
+static uword
+builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ tclient_main_t *tm = &tclient_main;
+ int my_thread_index = vlib_get_thread_index ();
+ session_t *sp;
+ int i;
+ int delete_session;
+ u32 *connection_indices;
+ u32 *connections_this_batch;
+ u32 nconnections_this_batch;
+
+ connection_indices = tm->connection_index_by_thread[my_thread_index];
+ connections_this_batch =
+ tm->connections_this_batch_by_thread[my_thread_index];
+
+ if ((tm->run_test == 0) ||
+ ((vec_len (connection_indices) == 0)
+ && vec_len (connections_this_batch) == 0))
+ return 0;
+
+ /* Grab another pile of connections */
+ if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+ {
+ nconnections_this_batch =
+ clib_min (tm->connections_per_batch, vec_len (connection_indices));
+
+ ASSERT (nconnections_this_batch > 0);
+ vec_validate (connections_this_batch, nconnections_this_batch - 1);
+ clib_memcpy (connections_this_batch,
+ connection_indices + vec_len (connection_indices)
+ - nconnections_this_batch,
+ nconnections_this_batch * sizeof (u32));
+ _vec_len (connection_indices) -= nconnections_this_batch;
+ }
+
+ if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch
+ && tm->prev_conns == vec_len (connections_this_batch)))
+ {
+ tm->repeats++;
+ tm->prev_conns = vec_len (connections_this_batch);
+ if (tm->repeats == 500000)
+ {
+ clib_warning ("stuck clients");
+ }
+ }
+ else
+ {
+ tm->prev_conns = vec_len (connections_this_batch);
+ tm->repeats = 0;
+ }
+
+ for (i = 0; i < vec_len (connections_this_batch); i++)
+ {
+ delete_session = 1;
+
+ sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]);
+
+ if (sp->bytes_to_send > 0)
+ {
+ send_test_chunk (tm, sp);
+ delete_session = 0;
+ }
+ if (sp->bytes_to_receive > 0)
+ {
+ receive_test_chunk (tm, sp);
+ delete_session = 0;
+ }
+ if (PREDICT_FALSE (delete_session == 1))
+ {
+ u32 index, thread_index;
+ stream_session_t *s;
+
+ __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent);
+ __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
+
+ stream_session_parse_handle (sp->vpp_session_handle,
+ &index, &thread_index);
+ s = stream_session_get_if_valid (index, thread_index);
+
+ if (s)
+ {
+ vnet_disconnect_args_t _a, *a = &_a;
+ a->handle = stream_session_handle (s);
+ a->app_index = tm->app_index;
+ vnet_disconnect_session (a);
+
+ vec_delete (connections_this_batch, 1, i);
+ i--;
+ __sync_fetch_and_add (&tm->ready_connections, -1);
+ }
+ else
+ clib_warning ("session AWOL?");
+
+ /* Kick the debug CLI process */
+ if (tm->ready_connections == 0)
+ {
+ signal_evt_to_cli (2);
+ }
+ }
+ }
+
+ tm->connection_index_by_thread[my_thread_index] = connection_indices;
+ tm->connections_this_batch_by_thread[my_thread_index] =
+ connections_this_batch;
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (builtin_client_node) =
+{
+ .function = builtin_client_node_fn,
+ .name = "builtin-tcp-client",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+static int
+create_api_loopback (tclient_main_t * tm)
+{
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ shmem_hdr = am->shmem_hdr;
+ tm->vl_input_queue = shmem_hdr->vl_input_queue;
+ tm->my_client_index =
+ vl_api_memclnt_create_internal ("tcp_test_client", tm->vl_input_queue);
+ return 0;
+}
+
+static int
+tcp_test_clients_init (vlib_main_t * vm)
+{
+ tclient_main_t *tm = &tclient_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ int i;
+
+ if (create_api_loopback (tm))
+ return -1;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+
+ /* Init test data. Big buffer */
+ vec_validate (tm->connect_test_data, 1024 * 1024 - 1);
+ for (i = 0; i < vec_len (tm->connect_test_data); i++)
+ tm->connect_test_data[i] = i & 0xff;
+
+ vec_validate (tm->rx_buf, num_threads - 1);
+ for (i = 0; i < num_threads; i++)
+ vec_validate (tm->rx_buf[i], vec_len (tm->connect_test_data) - 1);
+
+ tm->is_init = 1;
+
+ vec_validate (tm->connection_index_by_thread, vtm->n_vlib_mains);
+ vec_validate (tm->connections_this_batch_by_thread, vtm->n_vlib_mains);
+ vec_validate (tm->vpp_event_queue, vtm->n_vlib_mains);
+
+ return 0;
+}
+
+static int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail)
+{
+ tclient_main_t *tm = &tclient_main;
+ session_t *session;
+ u32 session_index;
+ u8 thread_index = vlib_get_thread_index ();
+
+ if (is_fail)
+ {
+ clib_warning ("connection %d failed!", api_context);
+ signal_evt_to_cli (-1);
+ return 0;
+ }
+
+ ASSERT (s->thread_index == thread_index);
+
+ if (!tm->vpp_event_queue[thread_index])
+ tm->vpp_event_queue[thread_index] =
+ session_manager_get_vpp_event_queue (thread_index);
+
+ /*
+ * Setup session
+ */
+ clib_spinlock_lock_if_init (&tm->sessions_lock);
+ pool_get (tm->sessions, session);
+ clib_spinlock_unlock_if_init (&tm->sessions_lock);
+
+ memset (session, 0, sizeof (*session));
+ session_index = session - tm->sessions;
+ session->bytes_to_send = tm->bytes_to_send;
+ session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send;
+ session->server_rx_fifo = s->server_rx_fifo;
+ session->server_rx_fifo->client_session_index = session_index;
+ session->server_tx_fifo = s->server_tx_fifo;
+ session->server_tx_fifo->client_session_index = session_index;
+ session->vpp_session_handle = stream_session_handle (s);
+
+ vec_add1 (tm->connection_index_by_thread[thread_index], session_index);
+ __sync_fetch_and_add (&tm->ready_connections, 1);
+ if (tm->ready_connections == tm->expected_connections)
+ {
+ tm->run_test = 1;
+ /* Signal the CLI process that the action is starting... */
+ signal_evt_to_cli (1);
+ }
+
+ return 0;
+}
+
+static void
+builtin_session_reset_callback (stream_session_t * s)
+{
+ if (s->session_state == SESSION_STATE_READY)
+ clib_warning ("Reset active connection %U", format_stream_session, s, 2);
+ stream_session_cleanup (s);
+ return;
+}
+
+static int
+builtin_session_create_callback (stream_session_t * s)
+{
+ return 0;
+}
+
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+ tclient_main_t *tm = &tclient_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+ a->handle = stream_session_handle (s);
+ a->app_index = tm->app_index;
+ vnet_disconnect_session (a);
+ return;
+}
+
+static int
+builtin_server_rx_callback (stream_session_t * s)
+{
+ return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_clients = {
+ .session_reset_callback = builtin_session_reset_callback,
+ .session_connected_callback = builtin_session_connected_callback,
+ .session_accept_callback = builtin_session_create_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .builtin_server_rx_callback = builtin_server_rx_callback
+};
+/* *INDENT-ON* */
+
+static int
+attach_builtin_test_clients_app (void)
+{
+ tclient_main_t *tm = &tclient_main;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u8 segment_name[128];
+ u32 segment_name_length, prealloc_fifos;
+ u64 options[16];
+
+ segment_name_length = ARRAY_LEN (segment_name);
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->api_client_index = tm->my_client_index;
+ a->segment_name = segment_name;
+ a->segment_name_length = segment_name_length;
+ a->session_cb_vft = &builtin_clients;
+
+ prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1;
+
+ options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+ options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
+ options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
+ options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size;
+ options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
+ options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = tm->private_segment_size;
+ options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
+
+ options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+
+ a->options = options;
+
+ if (vnet_application_attach (a))
+ return -1;
+
+ tm->app_index = a->app_index;
+ return 0;
+}
+
+static void *
+tclient_thread_fn (void *arg)
+{
+ return 0;
+}
+
+/** Start a transmit thread */
+int
+start_tx_pthread (tclient_main_t * tm)
+{
+ if (tm->client_thread_handle == 0)
+ {
+ int rv = pthread_create (&tm->client_thread_handle,
+ NULL /*attr */ ,
+ tclient_thread_fn, 0);
+ if (rv)
+ {
+ tm->client_thread_handle = 0;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients)
+{
+ tclient_main_t *tm = &tclient_main;
+ vnet_connect_args_t _a, *a = &_a;
+ int i;
+ for (i = 0; i < n_clients; i++)
+ {
+ memset (a, 0, sizeof (*a));
+
+ a->uri = (char *) uri;
+ a->api_context = i;
+ a->app_index = tm->app_index;
+ a->mp = 0;
+ vnet_connect_uri (a);
+
+ /* Crude pacing for call setups */
+ if ((i % 4) == 0)
+ vlib_process_suspend (vm, 10e-6);
+ ASSERT (i + 1 >= tm->ready_connections);
+ while (i + 1 - tm->ready_connections > 1000)
+ {
+ vlib_process_suspend (vm, 100e-6);
+ }
+ }
+}
+
+static clib_error_t *
+test_tcp_clients_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ tclient_main_t *tm = &tclient_main;
+ vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+ uword *event_data = 0, event_type;
+ u8 *default_connect_uri = (u8 *) "tcp://6.0.1.1/1234", *uri;
+ u64 tmp, total_bytes;
+ f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
+ f64 time_before_connects;
+ u32 n_clients = 1;
+ int preallocate_sessions = 0;
+ char *transfer_type;
+ int i;
+
+ tm->bytes_to_send = 8192;
+ tm->no_return = 0;
+ tm->fifo_size = 64 << 10;
+ tm->connections_per_batch = 1000;
+ tm->private_segment_count = 0;
+ tm->private_segment_size = 0;
+ tm->vlib_main = vm;
+ if (thread_main->n_vlib_mains > 1)
+ clib_spinlock_init (&tm->sessions_lock);
+ vec_free (tm->connect_uri);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "nclients %d", &n_clients))
+ ;
+ else if (unformat (input, "mbytes %lld", &tmp))
+ tm->bytes_to_send = tmp << 20;
+ else if (unformat (input, "gbytes %lld", &tmp))
+ tm->bytes_to_send = tmp << 30;
+ else if (unformat (input, "bytes %lld", &tm->bytes_to_send))
+ ;
+ else if (unformat (input, "uri %s", &tm->connect_uri))
+ ;
+ else if (unformat (input, "test-timeout %f", &test_timeout))
+ ;
+ else if (unformat (input, "syn-timeout %f", &syn_timeout))
+ ;
+ else if (unformat (input, "no-return"))
+ tm->no_return = 1;
+ else if (unformat (input, "fifo-size %d", &tm->fifo_size))
+ tm->fifo_size <<= 10;
+ else if (unformat (input, "private-segment-count %d",
+ &tm->private_segment_count))
+ ;
+ else if (unformat (input, "private-segment-size %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000ULL)
+ return clib_error_return
+ (0, "private segment size %lld (%llu) too large", tmp, tmp);
+ tm->private_segment_size = tmp;
+ }
+ else if (unformat (input, "preallocate-fifos"))
+ tm->prealloc_fifos = 1;
+ else if (unformat (input, "preallocate-sessions"))
+ preallocate_sessions = 1;
+ else
+ if (unformat (input, "client-batch %d", &tm->connections_per_batch))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ /* Store cli process node index for signalling */
+ tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+ if (tm->is_init == 0)
+ {
+ if (tcp_test_clients_init (vm))
+ return clib_error_return (0, "failed init");
+ }
+
+
+ tm->ready_connections = 0;
+ tm->expected_connections = n_clients;
+ tm->rx_total = 0;
+ tm->tx_total = 0;
+
+ uri = default_connect_uri;
+ if (tm->connect_uri)
+ uri = tm->connect_uri;
+
+#if TCP_BUILTIN_CLIENT_PTHREAD
+ start_tx_pthread ();
+#endif
+
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+ vlib_worker_thread_barrier_release (vm);
+
+ if (tm->test_client_attached == 0)
+ {
+ if (attach_builtin_test_clients_app ())
+ {
+ return clib_error_return (0, "app attach failed");
+ }
+ }
+ tm->test_client_attached = 1;
+
+ /* Turn on the builtin client input nodes */
+ for (i = 0; i < thread_main->n_vlib_mains; i++)
+ vlib_node_set_state (vlib_mains[i], builtin_client_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ if (preallocate_sessions)
+ {
+ session_t *sp __attribute__ ((unused));
+ for (i = 0; i < n_clients; i++)
+ pool_get (tm->sessions, sp);
+ for (i = 0; i < n_clients; i++)
+ pool_put_index (tm->sessions, i);
+ }
+
+ /* Fire off connect requests */
+ time_before_connects = vlib_time_now (vm);
+ clients_connect (vm, uri, n_clients);
+
+ /* Park until the sessions come up, or ten seconds elapse... */
+ vlib_process_wait_for_event_or_clock (vm, syn_timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ vlib_cli_output (vm, "Timeout with only %d sessions active...",
+ tm->ready_connections);
+ goto cleanup;
+
+ case 1:
+ delta = vlib_time_now (vm) - time_before_connects;
+
+ if (delta != 0.0)
+ {
+ vlib_cli_output
+ (vm, "%d three-way handshakes in %.2f seconds, %.2f/sec",
+ n_clients, delta, ((f64) n_clients) / delta);
+ }
+
+ tm->test_start_time = vlib_time_now (tm->vlib_main);
+ vlib_cli_output (vm, "Test started at %.6f", tm->test_start_time);
+ break;
+
+ default:
+ vlib_cli_output (vm, "unexpected event(1): %d", event_type);
+ goto cleanup;
+ }
+
+ /* Now wait for the sessions to finish... */
+ vlib_process_wait_for_event_or_clock (vm, test_timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ vlib_cli_output (vm, "Timeout with %d sessions still active...",
+ tm->ready_connections);
+ goto cleanup;
+
+ case 2:
+ tm->test_end_time = vlib_time_now (vm);
+ vlib_cli_output (vm, "Test finished at %.6f", tm->test_end_time);
+ break;
+
+ default:
+ vlib_cli_output (vm, "unexpected event(2): %d", event_type);
+ goto cleanup;
+ }
+
+ delta = tm->test_end_time - tm->test_start_time;
+
+ if (delta != 0.0)
+ {
+ total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total);
+ transfer_type = tm->no_return ? "half-duplex" : "full-duplex";
+ vlib_cli_output (vm,
+ "%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
+ total_bytes, total_bytes / (1ULL << 20),
+ total_bytes / (1ULL << 30), delta);
+ vlib_cli_output (vm, "%.2f bytes/second %s",
+ ((f64) total_bytes) / (delta), transfer_type);
+ vlib_cli_output (vm, "%.4f gbit/second %s",
+ (((f64) total_bytes * 8.0) / delta / 1e9),
+ transfer_type);
+ }
+ else
+ vlib_cli_output (vm, "zero delta-t?");
+
+cleanup:
+ tm->run_test = 0;
+ for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
+ {
+ vec_reset_length (tm->connection_index_by_thread[i]);
+ vec_reset_length (tm->connections_this_batch_by_thread[i]);
+ }
+
+ pool_free (tm->sessions);
+
+ /* Detach the application, so we can use different fifo sizes next time */
+ if (tm->test_client_attached)
+ {
+ vnet_app_detach_args_t _da, *da = &_da;
+ int rv;
+
+ da->app_index = tm->app_index;
+
+ rv = vnet_application_detach (da);
+ if (rv)
+ vlib_cli_output (vm, "WARNING: app detach failed...");
+ tm->test_client_attached = 0;
+ tm->app_index = ~0;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_clients_command, static) =
+{
+ .path = "test tcp clients",
+ .short_help = "test tcp clients [nclients %d] [[m|g]bytes <bytes>] "
+ "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
+ "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+ "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+ "[uri <tcp://ip/port>]",
+ .function = test_tcp_clients_command_fn,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+tcp_test_clients_main_init (vlib_main_t * vm)
+{
+ tclient_main_t *tm = &tclient_main;
+ tm->is_init = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tcp_test_clients_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/builtin_client.h b/src/vnet/tcp/builtin_client.h
new file mode 100644
index 00000000..06d239ef
--- /dev/null
+++ b/src/vnet/tcp/builtin_client.h
@@ -0,0 +1,121 @@
+
+/*
+ * tclient.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_tclient_h__
+#define __included_tclient_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+ u64 bytes_to_send;
+ u64 bytes_sent;
+ u64 bytes_to_receive;
+ u64 bytes_received;
+
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+
+ u64 vpp_session_handle;
+} session_t;
+
+typedef struct
+{
+ /*
+ * Application setup parameters
+ */
+ unix_shared_memory_queue_t *vl_input_queue; /**< vpe input queue */
+ unix_shared_memory_queue_t **vpp_event_queue;
+
+ u32 cli_node_index; /**< cli process node index */
+ u32 my_client_index; /**< loopback API client handle */
+ u32 app_index; /**< app index after attach */
+
+ /*
+ * Configuration params
+ */
+ u8 *connect_uri; /**< URI for slave's connect */
+ u64 bytes_to_send; /**< Bytes to send */
+ u32 configured_segment_size;
+ u32 fifo_size;
+ u32 expected_connections; /**< Number of clients/connections */
+ u32 connections_per_batch; /**< Connections to rx/tx at once */
+ u32 private_segment_count; /**< Number of private fifo segs */
+ u32 private_segment_size; /**< size of private fifo segs */
+
+ /*
+ * Test state variables
+ */
+ session_t *sessions; /**< Session pool, shared */
+ clib_spinlock_t sessions_lock;
+ u8 **rx_buf; /**< intermediate rx buffers */
+ u8 *connect_test_data; /**< Pre-computed test data */
+ u32 **connection_index_by_thread;
+ u32 **connections_this_batch_by_thread; /**< active connection batch */
+ pthread_t client_thread_handle;
+
+ volatile u32 ready_connections;
+ volatile u32 finished_connections;
+ volatile u64 rx_total;
+ volatile u64 tx_total;
+ volatile int run_test; /**< Signal start of test */
+
+ f64 test_start_time;
+ f64 test_end_time;
+ u32 prev_conns;
+ u32 repeats;
+ /*
+ * Flags
+ */
+ u8 is_init;
+ u8 test_client_attached;
+ u8 no_return;
+ u8 test_return_packets;
+ int i_am_master;
+ int drop_packets; /**< drop all packets */
+ u8 prealloc_fifos; /**< Request fifo preallocation */
+
+ /*
+ * Convenience
+ */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} tclient_main_t;
+
+tclient_main_t tclient_main;
+
+vlib_node_registration_t tclient_node;
+
+#endif /* __included_tclient_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/builtin_http_server.c b/src/vnet/tcp/builtin_http_server.c
new file mode 100644
index 00000000..9ba19ce9
--- /dev/null
+++ b/src/vnet/tcp/builtin_http_server.c
@@ -0,0 +1,564 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+
+typedef enum
+{
+ EVENT_WAKEUP = 1,
+} http_process_event_t;
+
+typedef struct
+{
+ u64 session_handle;
+ u64 node_index;
+ u8 *data;
+} builtin_http_server_args;
+
+typedef struct
+{
+ u8 *rx_buf;
+ unix_shared_memory_queue_t **vpp_queue;
+ u64 byte_index;
+
+ uword *handler_by_get_request;
+
+ u32 *free_http_cli_process_node_indices;
+
+ /* Sever's event queue */
+ unix_shared_memory_queue_t *vl_input_queue;
+
+ /* API client handle */
+ u32 my_client_index;
+
+ u32 app_index;
+
+ /* process node index for evnt scheduling */
+ u32 node_index;
+ vlib_main_t *vlib_main;
+} http_server_main_t;
+
+http_server_main_t http_server_main;
+
+static void
+free_http_process (builtin_http_server_args * args)
+{
+ vlib_node_runtime_t *rt;
+ vlib_main_t *vm = &vlib_global_main;
+ http_server_main_t *hsm = &http_server_main;
+ vlib_node_t *n;
+ u32 node_index;
+ builtin_http_server_args **save_args;
+
+ node_index = args->node_index;
+ ASSERT (node_index != 0);
+
+ n = vlib_get_node (vm, node_index);
+ rt = vlib_node_get_runtime (vm, n->index);
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+
+ /* Reset process session pointer */
+ clib_mem_free (*save_args);
+ *save_args = 0;
+
+ /* Turn off the process node */
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* add node index to the freelist */
+ vec_add1 (hsm->free_http_cli_process_node_indices, node_index);
+}
+
+static const char
+ *http_response = "HTTP/1.1 200 OK\r\n"
+ "Content-Type: text/html\r\n"
+ "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
+ "Connection: close\r\n"
+ "Pragma: no-cache\r\n" "Content-Length: %d\r\n\r\n%s";
+
+static const char
+ *http_error_template = "HTTP/1.1 %s\r\n"
+ "Content-Type: text/html\r\n"
+ "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
+ "Connection: close\r\n" "Pragma: no-cache\r\n" "Content-Length: 0\r\n\r\n";
+
+/* Header, including incantation to suppress favicon.ico requests */
+static const char
+ *html_header_template = "<html><head><title>%v</title>"
+ "</head><link rel=\"icon\" href=\"data:,\"><body><pre>";
+
+static const char *html_footer = "</pre></body></html>\r\n";
+
+static void
+http_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
+{
+ u8 **output_vecp = (u8 **) arg;
+ u8 *output_vec;
+ u32 offset;
+
+ output_vec = *output_vecp;
+
+ offset = vec_len (output_vec);
+ vec_validate (output_vec, offset + buffer_bytes - 1);
+ clib_memcpy (output_vec + offset, buffer, buffer_bytes);
+
+ *output_vecp = output_vec;
+}
+
+void
+send_data (builtin_http_server_args * args, u8 * data)
+{
+ session_fifo_event_t evt;
+ u32 offset, bytes_to_send;
+ f64 delay = 10e-3;
+ http_server_main_t *hsm = &http_server_main;
+ vlib_main_t *vm = hsm->vlib_main;
+ f64 last_sent_timer = vlib_time_now (vm);
+ stream_session_t *s;
+
+ s = stream_session_get_from_handle (args->session_handle);
+ ASSERT (s);
+ bytes_to_send = vec_len (data);
+ offset = 0;
+
+ while (bytes_to_send > 0)
+ {
+ int actual_transfer;
+
+ actual_transfer = svm_fifo_enqueue_nowait
+ (s->server_tx_fifo, bytes_to_send, data + offset);
+
+ /* Made any progress? */
+ if (actual_transfer <= 0)
+ {
+ vlib_process_suspend (vm, delay);
+ /* 10s deadman timer */
+ if (vlib_time_now (vm) > last_sent_timer + 10.0)
+ {
+ /* $$$$ FC: reset transport session here? */
+ break;
+ }
+ /* Exponential backoff, within reason */
+ if (delay < 1.0)
+ delay = delay * 2.0;
+ }
+ else
+ {
+ last_sent_timer = vlib_time_now (vm);
+ offset += actual_transfer;
+ bytes_to_send -= actual_transfer;
+
+ if (svm_fifo_set_event (s->server_tx_fifo))
+ {
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = s->server_tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = 0;
+
+ unix_shared_memory_queue_add (hsm->vpp_queue[s->thread_index],
+ (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+ delay = 10e-3;
+ }
+ }
+}
+
+static void
+send_error (builtin_http_server_args * args, char *str)
+{
+ u8 *data;
+
+ data = format (0, http_error_template, str);
+ send_data (args, data);
+ vec_free (data);
+}
+
+static uword
+http_cli_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ http_server_main_t *hsm = &http_server_main;
+ u8 *request = 0, *reply = 0;
+ builtin_http_server_args **save_args;
+ builtin_http_server_args *args;
+ unformat_input_t input;
+ int i;
+ u8 *http = 0, *html = 0;
+
+ save_args = vlib_node_get_runtime_data (hsm->vlib_main, rt->node_index);
+ args = *save_args;
+
+ request = (u8 *) (void *) (args->data);
+ if (vec_len (request) < 7)
+ {
+ send_error (args, "400 Bad Request");
+ goto out;
+ }
+
+ for (i = 0; i < vec_len (request) - 4; i++)
+ {
+ if (request[i] == 'G' &&
+ request[i + 1] == 'E' &&
+ request[i + 2] == 'T' && request[i + 3] == ' ')
+ goto found;
+ }
+bad_request:
+ send_error (args, "400 Bad Request");
+ goto out;
+
+found:
+ /* Lose "GET " */
+ vec_delete (request, i + 5, 0);
+
+ /* Replace slashes with spaces, stop at the end of the path */
+ i = 0;
+ while (1)
+ {
+ if (request[i] == '/')
+ request[i] = ' ';
+ else if (request[i] == ' ')
+ {
+ /* vlib_cli_input is vector-based, no need for a NULL */
+ _vec_len (request) = i;
+ break;
+ }
+ i++;
+ /* Should never happen */
+ if (i == vec_len (request))
+ goto bad_request;
+ }
+
+ /* Generate the html header */
+ html = format (0, html_header_template, request /* title */ );
+
+ /* Run the command */
+ unformat_init_vector (&input, request);
+ vlib_cli_input (vm, &input, http_cli_output, (uword) & reply);
+ unformat_free (&input);
+ request = 0;
+
+ /* Generate the html page */
+ html = format (html, "%v", reply);
+ html = format (html, html_footer);
+ /* And the http reply */
+ http = format (0, http_response, vec_len (html), html);
+
+ /* Send it */
+ send_data (args, http);
+
+out:
+ /* Cleanup */
+ vec_free (request);
+ vec_free (reply);
+ vec_free (html);
+ vec_free (http);
+
+ free_http_process (args);
+ return (0);
+}
+
+static void
+alloc_http_process (builtin_http_server_args * args)
+{
+ char *name;
+ vlib_node_t *n;
+ http_server_main_t *hsm = &http_server_main;
+ vlib_main_t *vm = hsm->vlib_main;
+ uword l = vec_len (hsm->free_http_cli_process_node_indices);
+ builtin_http_server_args **save_args;
+
+ if (vec_len (hsm->free_http_cli_process_node_indices) > 0)
+ {
+ n = vlib_get_node (vm, hsm->free_http_cli_process_node_indices[l - 1]);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ _vec_len (hsm->free_http_cli_process_node_indices) = l - 1;
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = http_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ name = (char *) format (0, "http-cli-%d", l);
+ r.name = name;
+ vlib_register_node (vm, &r);
+ vec_free (name);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ /* Save the node index in the args. It won't be zero. */
+ args->node_index = n->index;
+
+ /* Save the args (pointer) in the node runtime */
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+ *save_args = args;
+
+ vlib_start_process (vm, n->runtime_index);
+}
+
+static void
+alloc_http_process_callback (void *cb_args)
+{
+ alloc_http_process ((builtin_http_server_args *) cb_args);
+}
+
+static int
+http_server_rx_callback (stream_session_t * s)
+{
+ u32 max_dequeue;
+ int actual_transfer;
+ http_server_main_t *hsm = &http_server_main;
+ svm_fifo_t *rx_fifo;
+ builtin_http_server_args *args;
+
+ rx_fifo = s->server_rx_fifo;
+ max_dequeue = svm_fifo_max_dequeue (rx_fifo);
+ svm_fifo_unset_event (rx_fifo);
+ if (PREDICT_FALSE (max_dequeue == 0))
+ return 0;
+
+ vec_validate (hsm->rx_buf, max_dequeue - 1);
+ _vec_len (hsm->rx_buf) = max_dequeue;
+
+ actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_dequeue,
+ hsm->rx_buf);
+ ASSERT (actual_transfer > 0);
+ _vec_len (hsm->rx_buf) = actual_transfer;
+
+ /* send the command to a new/recycled vlib process */
+ args = clib_mem_alloc (sizeof (*args));
+ args->data = vec_dup (hsm->rx_buf);
+ args->session_handle = stream_session_handle (s);
+
+ /* Send an RPC request via the thread-0 input node */
+ if (vlib_get_thread_index () != 0)
+ {
+ session_fifo_event_t evt;
+ evt.rpc_args.fp = alloc_http_process_callback;
+ evt.rpc_args.arg = args;
+ evt.event_type = FIFO_EVENT_RPC;
+ unix_shared_memory_queue_add
+ (session_manager_get_vpp_event_queue (0 /* main thread */ ),
+ (u8 *) & evt, 0 /* do wait for mutex */ );
+ }
+ else
+ alloc_http_process (args);
+ return 0;
+}
+
+static int
+builtin_session_accept_callback (stream_session_t * s)
+{
+ http_server_main_t *bsm = &http_server_main;
+
+ bsm->vpp_queue[s->thread_index] =
+ session_manager_get_vpp_event_queue (s->thread_index);
+ s->session_state = SESSION_STATE_READY;
+ bsm->byte_index = 0;
+ return 0;
+}
+
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+ http_server_main_t *bsm = &http_server_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+
+ a->handle = stream_session_handle (s);
+ a->app_index = bsm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+builtin_session_reset_callback (stream_session_t * s)
+{
+ clib_warning ("called.. ");
+
+ stream_session_cleanup (s);
+}
+
+static int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static int
+builtin_add_segment_callback (u32 client_index,
+ const u8 * seg_name, u32 seg_size)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static int
+builtin_redirect_connect_callback (u32 client_index, void *mp)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+ .session_accept_callback = builtin_session_accept_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .session_connected_callback = builtin_session_connected_callback,
+ .add_segment_callback = builtin_add_segment_callback,
+ .redirect_connect_callback = builtin_redirect_connect_callback,
+ .builtin_server_rx_callback = http_server_rx_callback,
+ .session_reset_callback = builtin_session_reset_callback
+};
+
+/* Abuse VPP's input queue */
+static int
+create_api_loopback (vlib_main_t * vm)
+{
+ http_server_main_t *hsm = &http_server_main;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ shmem_hdr = am->shmem_hdr;
+ hsm->vl_input_queue = shmem_hdr->vl_input_queue;
+ hsm->my_client_index =
+ vl_api_memclnt_create_internal ("tcp_test_client", hsm->vl_input_queue);
+ return 0;
+}
+
+static int
+server_attach ()
+{
+ http_server_main_t *hsm = &http_server_main;
+ u8 segment_name[128];
+ u64 options[SESSION_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->api_client_index = hsm->my_client_index;
+ a->session_cb_vft = &builtin_session_cb_vft;
+ a->options = options;
+ a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 128 << 20;
+ a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 8 << 10;
+ a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 32 << 10;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
+ a->segment_name = segment_name;
+ a->segment_name_length = ARRAY_LEN (segment_name);
+
+ if (vnet_application_attach (a))
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ hsm->app_index = a->app_index;
+ return 0;
+}
+
+static int
+server_listen ()
+{
+ http_server_main_t *hsm = &http_server_main;
+ vnet_bind_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (*a));
+ a->app_index = hsm->app_index;
+ a->uri = "tcp://0.0.0.0/80";
+ return vnet_bind_uri (a);
+}
+
+static int
+server_create (vlib_main_t * vm)
+{
+ http_server_main_t *hsm = &http_server_main;
+ u32 num_threads;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+
+ ASSERT (hsm->my_client_index == (u32) ~ 0);
+ if (create_api_loopback (vm))
+ return -1;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (http_server_main.vpp_queue, num_threads - 1);
+
+ if (server_attach ())
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ if (server_listen ())
+ {
+ clib_warning ("failed to start listening");
+ return -1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+server_create_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ http_server_main_t *hsm = &http_server_main;
+ int rv;
+
+ if (hsm->my_client_index != (u32) ~ 0)
+ return clib_error_return (0, "test http server is already running");
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+ rv = server_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+ .path = "test http server",
+ .short_help = "test http server",
+ .function = server_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+builtin_http_server_main_init (vlib_main_t * vm)
+{
+ http_server_main_t *hsm = &http_server_main;
+ hsm->my_client_index = ~0;
+ hsm->vlib_main = vm;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_http_server_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/builtin_proxy.c b/src/vnet/tcp/builtin_proxy.c
new file mode 100644
index 00000000..91377e76
--- /dev/null
+++ b/src/vnet/tcp/builtin_proxy.c
@@ -0,0 +1,601 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/tcp/builtin_proxy.h>
+
+builtin_proxy_main_t builtin_proxy_main;
+
+static void
+delete_proxy_session (stream_session_t * s, int is_active_open)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ proxy_session_t *ps = 0;
+ vnet_disconnect_args_t _a, *a = &_a;
+ stream_session_t *active_open_session = 0;
+ stream_session_t *server_session = 0;
+ uword *p;
+ u64 handle;
+
+ handle = stream_session_handle (s);
+
+ clib_spinlock_lock_if_init (&bpm->sessions_lock);
+ if (is_active_open)
+ {
+ active_open_session = s;
+
+ p = hash_get (bpm->proxy_session_by_active_open_handle, handle);
+ if (p == 0)
+ {
+ clib_warning ("proxy session for %s handle %lld (%llx) AWOL",
+ is_active_open ? "active open" : "server",
+ handle, handle);
+ }
+ else
+ {
+ ps = pool_elt_at_index (bpm->sessions, p[0]);
+ if (ps->vpp_server_handle != ~0)
+ server_session = stream_session_get_from_handle
+ (ps->vpp_server_handle);
+ else
+ server_session = 0;
+ }
+ }
+ else
+ {
+ server_session = s;
+
+ p = hash_get (bpm->proxy_session_by_server_handle, handle);
+ if (p == 0)
+ {
+ clib_warning ("proxy session for %s handle %lld (%llx) AWOL",
+ is_active_open ? "active open" : "server",
+ handle, handle);
+ }
+ else
+ {
+ ps = pool_elt_at_index (bpm->sessions, p[0]);
+ if (ps->vpp_server_handle != ~0)
+ active_open_session = stream_session_get_from_handle
+ (ps->vpp_server_handle);
+ else
+ active_open_session = 0;
+ }
+ }
+
+ if (ps)
+ {
+ if (CLIB_DEBUG > 0)
+ memset (ps, 0xFE, sizeof (*ps));
+ pool_put (bpm->sessions, ps);
+ }
+
+ clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+ if (active_open_session)
+ {
+ a->handle = stream_session_handle (active_open_session);
+ a->app_index = bpm->active_open_app_index;
+ hash_unset (bpm->proxy_session_by_active_open_handle,
+ stream_session_handle (active_open_session));
+ vnet_disconnect_session (a);
+ }
+
+ if (server_session)
+ {
+ a->handle = stream_session_handle (server_session);
+ a->app_index = bpm->server_app_index;
+ hash_unset (bpm->proxy_session_by_server_handle,
+ stream_session_handle (server_session));
+ vnet_disconnect_session (a);
+ }
+}
+
+static int
+server_accept_callback (stream_session_t * s)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+
+ s->session_state = SESSION_STATE_READY;
+
+ clib_spinlock_lock_if_init (&bpm->sessions_lock);
+
+ return 0;
+}
+
+static void
+server_disconnect_callback (stream_session_t * s)
+{
+ delete_proxy_session (s, 0 /* is_active_open */ );
+}
+
+static void
+server_reset_callback (stream_session_t * s)
+{
+ clib_warning ("Reset session %U", format_stream_session, s, 2);
+ delete_proxy_session (s, 0 /* is_active_open */ );
+}
+
+static int
+server_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static int
+server_add_segment_callback (u32 client_index,
+ const u8 * seg_name, u32 seg_size)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static int
+server_redirect_connect_callback (u32 client_index, void *mp)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static int
+server_rx_callback (stream_session_t * s)
+{
+ u32 max_dequeue;
+ int actual_transfer __attribute__ ((unused));
+ svm_fifo_t *tx_fifo, *rx_fifo;
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ u32 thread_index = vlib_get_thread_index ();
+ vnet_connect_args_t _a, *a = &_a;
+ proxy_session_t *ps;
+ int proxy_index;
+ uword *p;
+ svm_fifo_t *active_open_tx_fifo;
+ session_fifo_event_t evt;
+
+ ASSERT (s->thread_index == thread_index);
+
+ clib_spinlock_lock_if_init (&bpm->sessions_lock);
+ p =
+ hash_get (bpm->proxy_session_by_server_handle, stream_session_handle (s));
+
+ if (PREDICT_TRUE (p != 0))
+ {
+ clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+ active_open_tx_fifo = s->server_rx_fifo;
+
+ /*
+ * Send event for active open tx fifo
+ */
+ if (svm_fifo_set_event (active_open_tx_fifo))
+ {
+ evt.fifo = active_open_tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ if (unix_shared_memory_queue_add
+ (bpm->active_open_event_queue[thread_index], (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("failed to enqueue tx evt");
+ }
+ }
+ else
+ {
+ rx_fifo = s->server_rx_fifo;
+ tx_fifo = s->server_tx_fifo;
+
+ ASSERT (rx_fifo->master_thread_index == thread_index);
+ ASSERT (tx_fifo->master_thread_index == thread_index);
+
+ max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
+
+ if (PREDICT_FALSE (max_dequeue == 0))
+ return 0;
+
+ actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ ,
+ max_dequeue,
+ bpm->rx_buf[thread_index]);
+
+ /* $$$ your message in this space: parse url, etc. */
+
+ memset (a, 0, sizeof (*a));
+
+ clib_spinlock_lock_if_init (&bpm->sessions_lock);
+ pool_get (bpm->sessions, ps);
+ memset (ps, 0, sizeof (*ps));
+ ps->server_rx_fifo = rx_fifo;
+ ps->server_tx_fifo = tx_fifo;
+ ps->vpp_server_handle = stream_session_handle (s);
+
+ proxy_index = ps - bpm->sessions;
+
+ hash_set (bpm->proxy_session_by_server_handle, ps->vpp_server_handle,
+ proxy_index);
+
+ clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+ a->uri = "tcp://6.0.2.2/23";
+ a->api_context = proxy_index;
+ a->app_index = bpm->active_open_app_index;
+ a->mp = 0;
+ vnet_connect_uri (a);
+ }
+
+ return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+ .session_accept_callback = server_accept_callback,
+ .session_disconnect_callback = server_disconnect_callback,
+ .session_connected_callback = server_connected_callback,
+ .add_segment_callback = server_add_segment_callback,
+ .redirect_connect_callback = server_redirect_connect_callback,
+ .builtin_server_rx_callback = server_rx_callback,
+ .session_reset_callback = server_reset_callback
+};
+
+static int
+active_open_connected_callback (u32 app_index, u32 opaque,
+ stream_session_t * s, u8 is_fail)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ proxy_session_t *ps;
+ u8 thread_index = vlib_get_thread_index ();
+ session_fifo_event_t evt;
+
+ if (is_fail)
+ {
+ clib_warning ("connection %d failed!", opaque);
+ return 0;
+ }
+
+ /*
+ * Setup proxy session handle.
+ */
+ clib_spinlock_lock_if_init (&bpm->sessions_lock);
+
+ ps = pool_elt_at_index (bpm->sessions, opaque);
+ ps->vpp_active_open_handle = stream_session_handle (s);
+
+ s->server_tx_fifo = ps->server_rx_fifo;
+ s->server_rx_fifo = ps->server_tx_fifo;
+
+ /*
+ * Reset the active-open tx-fifo master indices so the active-open session
+ * will receive data, etc.
+ */
+ s->server_tx_fifo->master_session_index = s->session_index;
+ s->server_tx_fifo->master_thread_index = s->thread_index;
+
+ /*
+ * Account for the active-open session's use of the fifos
+ * so they won't disappear until the last session which uses
+ * them disappears
+ */
+ s->server_tx_fifo->refcnt++;
+ s->server_rx_fifo->refcnt++;
+
+ hash_set (bpm->proxy_session_by_active_open_handle,
+ ps->vpp_active_open_handle, opaque);
+
+ clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+ /*
+ * Send event for active open tx fifo
+ */
+ if (svm_fifo_set_event (s->server_tx_fifo))
+ {
+ evt.fifo = s->server_tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ if (unix_shared_memory_queue_add
+ (bpm->active_open_event_queue[thread_index], (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("failed to enqueue tx evt");
+ }
+
+ return 0;
+}
+
+static void
+active_open_reset_callback (stream_session_t * s)
+{
+ delete_proxy_session (s, 1 /* is_active_open */ );
+}
+
+static int
+active_open_create_callback (stream_session_t * s)
+{
+ return 0;
+}
+
+static void
+active_open_disconnect_callback (stream_session_t * s)
+{
+ delete_proxy_session (s, 1 /* is_active_open */ );
+}
+
+static int
+active_open_rx_callback (stream_session_t * s)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ session_fifo_event_t evt;
+ svm_fifo_t *server_rx_fifo;
+ u32 thread_index = vlib_get_thread_index ();
+
+ server_rx_fifo = s->server_rx_fifo;
+
+ /*
+ * Send event for server tx fifo
+ */
+ if (svm_fifo_set_event (server_rx_fifo))
+ {
+ evt.fifo = server_rx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ if (unix_shared_memory_queue_add
+ (bpm->server_event_queue[thread_index], (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("failed to enqueue server rx evt");
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_clients = {
+ .session_reset_callback = active_open_reset_callback,
+ .session_connected_callback = active_open_connected_callback,
+ .session_accept_callback = active_open_create_callback,
+ .session_disconnect_callback = active_open_disconnect_callback,
+ .builtin_server_rx_callback = active_open_rx_callback
+};
+/* *INDENT-ON* */
+
+
+static void
+create_api_loopbacks (vlib_main_t * vm)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ shmem_hdr = am->shmem_hdr;
+ bpm->vl_input_queue = shmem_hdr->vl_input_queue;
+ bpm->server_client_index =
+ vl_api_memclnt_create_internal ("proxy_server", bpm->vl_input_queue);
+ bpm->active_open_client_index =
+ vl_api_memclnt_create_internal ("proxy_active_open", bpm->vl_input_queue);
+}
+
+static int
+server_attach ()
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ u8 segment_name[128];
+ u64 options[SESSION_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->api_client_index = bpm->server_client_index;
+ a->session_cb_vft = &builtin_session_cb_vft;
+ a->options = options;
+ a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+ a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = bpm->fifo_size;
+ a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = bpm->fifo_size;
+ a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bpm->private_segment_count;
+ a->options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bpm->private_segment_size;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+ bpm->prealloc_fifos ? bpm->prealloc_fifos : 1;
+
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+
+ a->segment_name = segment_name;
+ a->segment_name_length = ARRAY_LEN (segment_name);
+
+ if (vnet_application_attach (a))
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ bpm->server_app_index = a->app_index;
+
+ return 0;
+}
+
+static int
+active_open_attach (void)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u8 segment_name[128];
+ u32 segment_name_length;
+ u64 options[16];
+
+ segment_name_length = ARRAY_LEN (segment_name);
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->api_client_index = bpm->active_open_client_index;
+ a->segment_name = segment_name;
+ a->segment_name_length = segment_name_length;
+ a->session_cb_vft = &builtin_clients;
+
+ options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+ options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+ options[SESSION_OPTIONS_RX_FIFO_SIZE] = bpm->fifo_size;
+ options[SESSION_OPTIONS_TX_FIFO_SIZE] = bpm->fifo_size;
+ options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bpm->private_segment_count;
+ options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bpm->private_segment_size;
+ options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+ bpm->prealloc_fifos ? bpm->prealloc_fifos : 1;
+
+ options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP
+ | APP_OPTIONS_FLAGS_IS_PROXY;
+
+ a->options = options;
+
+ if (vnet_application_attach (a))
+ return -1;
+
+ bpm->active_open_app_index = a->app_index;
+
+ return 0;
+}
+
+static int
+server_listen ()
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ vnet_bind_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (*a));
+ a->app_index = bpm->server_app_index;
+ a->uri = "tcp://0.0.0.0/23";
+ return vnet_bind_uri (a);
+}
+
+static int
+server_create (vlib_main_t * vm)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ int i;
+
+ if (bpm->server_client_index == (u32) ~ 0)
+ create_api_loopbacks (vm);
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (builtin_proxy_main.server_event_queue, num_threads - 1);
+ vec_validate (builtin_proxy_main.active_open_event_queue, num_threads - 1);
+ vec_validate (bpm->rx_buf, num_threads - 1);
+
+ for (i = 0; i < num_threads; i++)
+ vec_validate (bpm->rx_buf[i], bpm->rcv_buffer_size);
+
+ if (server_attach ())
+ {
+ clib_warning ("failed to attach server app");
+ return -1;
+ }
+ if (server_listen ())
+ {
+ clib_warning ("failed to start listening");
+ return -1;
+ }
+ if (active_open_attach ())
+ {
+ clib_warning ("failed to attach active open app");
+ return -1;
+ }
+
+ for (i = 0; i < num_threads; i++)
+ {
+ bpm->active_open_event_queue[i] =
+ session_manager_get_vpp_event_queue (i);
+
+ ASSERT (bpm->active_open_event_queue[i]);
+
+ bpm->server_event_queue[i] = session_manager_get_vpp_event_queue (i);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ int rv;
+ u64 tmp;
+
+ bpm->fifo_size = 64 << 10;
+ bpm->rcv_buffer_size = 1024;
+ bpm->prealloc_fifos = 0;
+ bpm->private_segment_count = 0;
+ bpm->private_segment_size = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "fifo-size %d", &bpm->fifo_size))
+ bpm->fifo_size <<= 10;
+ else if (unformat (input, "rcv-buf-size %d", &bpm->rcv_buffer_size))
+ ;
+ else if (unformat (input, "prealloc-fifos %d", &bpm->prealloc_fifos))
+ ;
+ else if (unformat (input, "private-segment-count %d",
+ &bpm->private_segment_count))
+ ;
+ else if (unformat (input, "private-segment-size %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000ULL)
+ return clib_error_return
+ (0, "private segment size %lld (%llu) too large", tmp, tmp);
+ bpm->private_segment_size = tmp;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+ rv = server_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+ .path = "test proxy server",
+ .short_help = "test proxy server",
+ .function = proxy_server_create_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+builtin_tcp_proxy_main_init (vlib_main_t * vm)
+{
+ builtin_proxy_main_t *bpm = &builtin_proxy_main;
+ bpm->server_client_index = ~0;
+ bpm->active_open_client_index = ~0;
+ bpm->proxy_session_by_active_open_handle = hash_create (0, sizeof (uword));
+ bpm->proxy_session_by_server_handle = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_tcp_proxy_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/builtin_proxy.h b/src/vnet/tcp/builtin_proxy.h
new file mode 100644
index 00000000..cf707a15
--- /dev/null
+++ b/src/vnet/tcp/builtin_proxy.h
@@ -0,0 +1,100 @@
+
+/*
+ * builtin_proxy.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_builtin_proxy_h__
+#define __included_builtin_proxy_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+ svm_fifo_t *server_rx_fifo;
+ svm_fifo_t *server_tx_fifo;
+
+ u64 vpp_server_handle;
+ u64 vpp_active_open_handle;
+} proxy_session_t;
+
+typedef struct
+{
+ unix_shared_memory_queue_t *vl_input_queue; /**< vpe input queue */
+ /** per-thread vectors */
+ unix_shared_memory_queue_t **server_event_queue;
+ unix_shared_memory_queue_t **active_open_event_queue;
+ u8 **rx_buf; /**< intermediate rx buffers */
+
+ u32 cli_node_index; /**< cli process node index */
+ u32 server_client_index; /**< server API client handle */
+ u32 server_app_index; /**< server app index */
+ u32 active_open_client_index; /**< active open API client handle */
+ u32 active_open_app_index; /**< active open index after attach */
+
+ uword *proxy_session_by_server_handle;
+ uword *proxy_session_by_active_open_handle;
+
+ /*
+ * Configuration params
+ */
+ u8 *connect_uri; /**< URI for slave's connect */
+ u32 configured_segment_size;
+ u32 fifo_size;
+ u32 private_segment_count; /**< Number of private fifo segs */
+ u32 private_segment_size; /**< size of private fifo segs */
+ int rcv_buffer_size;
+
+ /*
+ * Test state variables
+ */
+ proxy_session_t *sessions; /**< Session pool, shared */
+ clib_spinlock_t sessions_lock;
+ u32 **connection_index_by_thread;
+ pthread_t client_thread_handle;
+
+ /*
+ * Flags
+ */
+ u8 is_init;
+ u8 prealloc_fifos; /**< Request fifo preallocation */
+
+ /*
+ * Convenience
+ */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} builtin_proxy_main_t;
+
+builtin_proxy_main_t builtin_proxy_main;
+
+#endif /* __included_builtin_proxy_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
new file mode 100644
index 00000000..93314529
--- /dev/null
+++ b/src/vnet/tcp/builtin_server.c
@@ -0,0 +1,455 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+ /*
+ * Server app parameters
+ */
+ unix_shared_memory_queue_t **vpp_queue;
+ unix_shared_memory_queue_t *vl_input_queue; /**< Sever's event queue */
+
+ u32 app_index; /**< Server app index */
+ u32 my_client_index; /**< API client handle */
+ u32 node_index; /**< process node index for evnt scheduling */
+
+ /*
+ * Config params
+ */
+ u8 no_echo; /**< Don't echo traffic */
+ u32 fifo_size; /**< Fifo size */
+ u32 rcv_buffer_size; /**< Rcv buffer size */
+ u32 prealloc_fifos; /**< Preallocate fifos */
+ u32 private_segment_count; /**< Number of private segments */
+ u32 private_segment_size; /**< Size of private segments */
+ char *server_uri; /**< Server URI */
+
+ /*
+ * Test state
+ */
+ u8 **rx_buf; /**< Per-thread RX buffer */
+ u64 byte_index;
+ u32 **rx_retries;
+
+ vlib_main_t *vlib_main;
+} builtin_server_main_t;
+
+builtin_server_main_t builtin_server_main;
+
+int
+builtin_session_accept_callback (stream_session_t * s)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+
+ bsm->vpp_queue[s->thread_index] =
+ session_manager_get_vpp_event_queue (s->thread_index);
+ s->session_state = SESSION_STATE_READY;
+ bsm->byte_index = 0;
+ vec_validate (bsm->rx_retries[s->thread_index], s->session_index);
+ bsm->rx_retries[s->thread_index][s->session_index] = 0;
+ return 0;
+}
+
+void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+
+ a->handle = stream_session_handle (s);
+ a->app_index = bsm->app_index;
+ vnet_disconnect_session (a);
+}
+
+void
+builtin_session_reset_callback (stream_session_t * s)
+{
+ clib_warning ("Reset session %U", format_stream_session, s, 2);
+ stream_session_cleanup (s);
+}
+
+
+int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+ stream_session_t * s, u8 is_fail)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+int
+builtin_add_segment_callback (u32 client_index,
+ const u8 * seg_name, u32 seg_size)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+int
+builtin_redirect_connect_callback (u32 client_index, void *mp)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+void
+test_bytes (builtin_server_main_t * bsm, int actual_transfer)
+{
+ int i;
+ u32 my_thread_id = vlib_get_thread_index ();
+
+ for (i = 0; i < actual_transfer; i++)
+ {
+ if (bsm->rx_buf[my_thread_id][i] != ((bsm->byte_index + i) & 0xff))
+ {
+ clib_warning ("at %lld expected %d got %d", bsm->byte_index + i,
+ (bsm->byte_index + i) & 0xff,
+ bsm->rx_buf[my_thread_id][i]);
+ }
+ }
+ bsm->byte_index += actual_transfer;
+}
+
+/*
+ * If no-echo, just read the data and be done with it
+ */
+int
+builtin_server_rx_callback_no_echo (stream_session_t * s)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ u32 my_thread_id = vlib_get_thread_index ();
+ int actual_transfer;
+ svm_fifo_t *rx_fifo;
+
+ rx_fifo = s->server_rx_fifo;
+
+ do
+ {
+ actual_transfer =
+ svm_fifo_dequeue_nowait (rx_fifo, bsm->rcv_buffer_size,
+ bsm->rx_buf[my_thread_id]);
+ }
+ while (actual_transfer > 0);
+ return 0;
+}
+
+int
+builtin_server_rx_callback (stream_session_t * s)
+{
+ u32 n_written, max_dequeue, max_enqueue, max_transfer;
+ int actual_transfer;
+ svm_fifo_t *tx_fifo, *rx_fifo;
+ builtin_server_main_t *bsm = &builtin_server_main;
+ session_fifo_event_t evt;
+ static int serial_number = 0;
+ u32 thread_index = vlib_get_thread_index ();
+
+ ASSERT (s->thread_index == thread_index);
+
+ rx_fifo = s->server_rx_fifo;
+ tx_fifo = s->server_tx_fifo;
+
+ ASSERT (rx_fifo->master_thread_index == thread_index);
+ ASSERT (tx_fifo->master_thread_index == thread_index);
+
+ max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
+ max_enqueue = svm_fifo_max_enqueue (s->server_tx_fifo);
+
+ if (PREDICT_FALSE (max_dequeue == 0))
+ return 0;
+
+ /* Number of bytes we're going to copy */
+ max_transfer = (max_dequeue < max_enqueue) ? max_dequeue : max_enqueue;
+
+ /* No space in tx fifo */
+ if (PREDICT_FALSE (max_transfer == 0))
+ {
+ /* XXX timeout for session that are stuck */
+
+ rx_event:
+ /* Program self-tap to retry */
+ if (svm_fifo_set_event (rx_fifo))
+ {
+ unix_shared_memory_queue_t *q;
+ evt.fifo = rx_fifo;
+ evt.event_type = FIFO_EVENT_BUILTIN_RX;
+ evt.event_id = 0;
+
+ q = bsm->vpp_queue[thread_index];
+ if (PREDICT_FALSE (q->cursize == q->maxsize))
+ clib_warning ("out of event queue space");
+ else if (unix_shared_memory_queue_add (q, (u8 *) & evt, 0))
+ clib_warning ("failed to enqueue self-tap");
+
+ if (bsm->rx_retries[thread_index][s->session_index] == 500000)
+ {
+ clib_warning ("session stuck: %U", format_stream_session, s, 2);
+ }
+ if (bsm->rx_retries[thread_index][s->session_index] < 500001)
+ bsm->rx_retries[thread_index][s->session_index]++;
+ }
+
+ return 0;
+ }
+
+ _vec_len (bsm->rx_buf[thread_index]) = max_transfer;
+
+ actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
+ bsm->rx_buf[thread_index]);
+ ASSERT (actual_transfer == max_transfer);
+
+// test_bytes (bsm, actual_transfer);
+
+ /*
+ * Echo back
+ */
+
+ n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer,
+ bsm->rx_buf[thread_index]);
+
+ if (n_written != max_transfer)
+ clib_warning ("short trout!");
+
+ if (svm_fifo_set_event (tx_fifo))
+ {
+ /* Fabricate TX event, send to vpp */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = serial_number++;
+
+ if (unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index],
+ (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("failed to enqueue tx evt");
+ }
+
+ if (PREDICT_FALSE (n_written < max_dequeue))
+ goto rx_event;
+
+ return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+ .session_accept_callback = builtin_session_accept_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .session_connected_callback = builtin_session_connected_callback,
+ .add_segment_callback = builtin_add_segment_callback,
+ .redirect_connect_callback = builtin_redirect_connect_callback,
+ .builtin_server_rx_callback = builtin_server_rx_callback,
+ .session_reset_callback = builtin_session_reset_callback
+};
+
+/* Abuse VPP's input queue */
+static int
+create_api_loopback (vlib_main_t * vm)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ shmem_hdr = am->shmem_hdr;
+ bsm->vl_input_queue = shmem_hdr->vl_input_queue;
+ bsm->my_client_index =
+ vl_api_memclnt_create_internal ("tcp_test_server", bsm->vl_input_queue);
+ return 0;
+}
+
+static int
+server_attach ()
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ u8 segment_name[128];
+ u64 options[SESSION_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ if (bsm->no_echo)
+ builtin_session_cb_vft.builtin_server_rx_callback =
+ builtin_server_rx_callback_no_echo;
+ else
+ builtin_session_cb_vft.builtin_server_rx_callback =
+ builtin_server_rx_callback;
+ a->api_client_index = bsm->my_client_index;
+ a->session_cb_vft = &builtin_session_cb_vft;
+ a->options = options;
+ a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+ a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = bsm->fifo_size;
+ a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = bsm->fifo_size;
+ a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bsm->private_segment_count;
+ a->options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bsm->private_segment_size;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+ bsm->prealloc_fifos ? bsm->prealloc_fifos : 1;
+
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+
+ a->segment_name = segment_name;
+ a->segment_name_length = ARRAY_LEN (segment_name);
+
+ if (vnet_application_attach (a))
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ bsm->app_index = a->app_index;
+ return 0;
+}
+
+static int
+server_listen ()
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ vnet_bind_args_t _a, *a = &_a;
+ memset (a, 0, sizeof (*a));
+ a->app_index = bsm->app_index;
+ a->uri = bsm->server_uri;
+ return vnet_bind_uri (a);
+}
+
+static int
+server_create (vlib_main_t * vm)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ int i;
+
+ if (bsm->my_client_index == (u32) ~ 0)
+ {
+ if (create_api_loopback (vm))
+ {
+ clib_warning ("failed to create api loopback");
+ return -1;
+ }
+ }
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
+ vec_validate (bsm->rx_buf, num_threads - 1);
+ vec_validate (bsm->rx_retries, num_threads - 1);
+
+ for (i = 0; i < num_threads; i++)
+ vec_validate (bsm->rx_buf[i], bsm->rcv_buffer_size);
+
+ if (server_attach ())
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ if (server_listen ())
+ {
+ clib_warning ("failed to start listening");
+ return -1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ u8 server_uri_set = 0;
+ int rv;
+ u64 tmp;
+
+ bsm->no_echo = 0;
+ bsm->fifo_size = 64 << 10;
+ bsm->rcv_buffer_size = 128 << 10;
+ bsm->prealloc_fifos = 0;
+ bsm->private_segment_count = 0;
+ bsm->private_segment_size = 0;
+ vec_free (bsm->server_uri);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "no-echo"))
+ bsm->no_echo = 1;
+ else if (unformat (input, "fifo-size %d", &bsm->fifo_size))
+ bsm->fifo_size <<= 10;
+ else if (unformat (input, "rcv-buf-size %d", &bsm->rcv_buffer_size))
+ ;
+ else if (unformat (input, "prealloc-fifos %d", &bsm->prealloc_fifos))
+ ;
+ else if (unformat (input, "private-segment-count %d",
+ &bsm->private_segment_count))
+ ;
+ else if (unformat (input, "private-segment-size %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000ULL)
+ return clib_error_return
+ (0, "private segment size %lld (%llu) too large", tmp, tmp);
+ bsm->private_segment_size = tmp;
+ }
+ else if (unformat (input, "uri %s", &bsm->server_uri))
+ server_uri_set = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+ if (!server_uri_set)
+ bsm->server_uri = (char *) format (0, "tcp://0.0.0.0/1234%c", 0);
+
+ rv = server_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+ .path = "test tcp server",
+ .short_help = "test tcp server [no echo][fifo-size <mbytes>] "
+ "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
+ "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
+ "[uri <tcp://ip/port>]",
+ .function = server_create_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+builtin_tcp_server_main_init (vlib_main_t * vm)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ bsm->my_client_index = ~0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_tcp_server_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/tcp.api b/src/vnet/tcp/tcp.api
new file mode 100644
index 00000000..093a5a89
--- /dev/null
+++ b/src/vnet/tcp/tcp.api
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Configure TCP source addresses, for active-open TCP sessions
+
+ TCP src/dst ports are 16 bits, with the low-order 1024 ports
+ reserved. So, it's necessary to provide a considerable number of
+ source IP addresses if one wishes to initiate a large number of
+ connections.
+
+ Each of those addresses needs to have a receive adjacency -
+ either a /32 or a /128 - and vpp needs to answer (proxy) arps or
+ neighbor discovery requests for the addresses.
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - 1 for ipv6, 0 for ipv4
+ @param vrf_id - fib table / vrf id for local adjacencies
+ @param first_address - first address that TCP will use
+ @param last_address - last address that TCP will use
+*/
+autoreply define tcp_configure_src_addresses {
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u32 vrf_id;
+ u8 first_address[16];
+ u8 last_address[16];
+ };
+
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
new file mode 100644
index 00000000..a365cb48
--- /dev/null
+++ b/src/vnet/tcp/tcp.c
@@ -0,0 +1,1943 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief TCP host stack utilities
+ */
+
+#include <vnet/tcp/tcp.h>
+#include <vnet/session/session.h>
+#include <vnet/fib/fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <math.h>
+
+tcp_main_t tcp_main;
+
+static u32
+tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
+{
+ tcp_main_t *tm = &tcp_main;
+ tcp_connection_t *listener;
+
+ pool_get (tm->listener_pool, listener);
+ memset (listener, 0, sizeof (*listener));
+
+ listener->c_c_index = listener - tm->listener_pool;
+ listener->c_lcl_port = lcl->port;
+
+ if (lcl->is_ip4)
+ {
+ listener->c_lcl_ip4.as_u32 = lcl->ip.ip4.as_u32;
+ listener->c_is_ip4 = 1;
+ }
+ else
+ {
+ clib_memcpy (&listener->c_lcl_ip6, &lcl->ip.ip6,
+ sizeof (ip6_address_t));
+
+ }
+ listener->c_transport_proto = TRANSPORT_PROTO_TCP;
+ listener->c_s_index = session_index;
+ listener->state = TCP_STATE_LISTEN;
+
+ tcp_connection_timers_init (listener);
+
+ TCP_EVT_DBG (TCP_EVT_BIND, listener);
+
+ return listener->c_c_index;
+}
+
+u32
+tcp_session_bind (u32 session_index, transport_endpoint_t * tep)
+{
+ return tcp_connection_bind (session_index, tep);
+}
+
+static void
+tcp_connection_unbind (u32 listener_index)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+
+ tc = pool_elt_at_index (tm->listener_pool, listener_index);
+
+ TCP_EVT_DBG (TCP_EVT_UNBIND, tc);
+
+ /* Poison the entry */
+ if (CLIB_DEBUG > 0)
+ memset (tc, 0xFA, sizeof (*tc));
+
+ pool_put_index (tm->listener_pool, listener_index);
+}
+
+u32
+tcp_session_unbind (u32 listener_index)
+{
+ tcp_connection_unbind (listener_index);
+ return 0;
+}
+
+transport_connection_t *
+tcp_session_get_listener (u32 listener_index)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+ tc = pool_elt_at_index (tm->listener_pool, listener_index);
+ return &tc->connection;
+}
+
+always_inline void
+transport_endpoint_del (u32 tepi)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ clib_spinlock_lock_if_init (&tm->local_endpoints_lock);
+ pool_put_index (tm->local_endpoints, tepi);
+ clib_spinlock_unlock_if_init (&tm->local_endpoints_lock);
+}
+
+always_inline transport_endpoint_t *
+transport_endpoint_new (void)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ transport_endpoint_t *tep;
+ pool_get (tm->local_endpoints, tep);
+ return tep;
+}
+
+/**
+ * Cleanup half-open connection
+ *
+ */
+void
+tcp_half_open_connection_del (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ clib_spinlock_lock_if_init (&tm->half_open_lock);
+ pool_put_index (tm->half_open_connections, tc->c_c_index);
+ if (CLIB_DEBUG)
+ memset (tc, 0xFA, sizeof (*tc));
+ clib_spinlock_unlock_if_init (&tm->half_open_lock);
+}
+
+/**
+ * Try to cleanup half-open connection
+ *
+ * If called from a thread that doesn't own tc, the call won't have any
+ * effect.
+ *
+ * @param tc - connection to be cleaned up
+ * @return non-zero if cleanup failed.
+ */
+int
+tcp_half_open_connection_cleanup (tcp_connection_t * tc)
+{
+ /* Make sure this is the owning thread */
+ if (tc->c_thread_index != vlib_get_thread_index ())
+ return 1;
+ tcp_timer_reset (tc, TCP_TIMER_ESTABLISH);
+ tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT_SYN);
+ tcp_half_open_connection_del (tc);
+ return 0;
+}
+
+tcp_connection_t *
+tcp_half_open_connection_new (void)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc = 0;
+ ASSERT (vlib_get_thread_index () == 0);
+ pool_get (tm->half_open_connections, tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->c_c_index = tc - tm->half_open_connections;
+ return tc;
+}
+
+/**
+ * Cleans up connection state.
+ *
+ * No notifications.
+ */
+void
+tcp_connection_cleanup (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = &tcp_main;
+ u32 tepi;
+ transport_endpoint_t *tep;
+
+ /* Cleanup local endpoint if this was an active connect */
+ tepi = transport_endpoint_lookup (&tm->local_endpoints_table, &tc->c_lcl_ip,
+ clib_net_to_host_u16 (tc->c_lcl_port));
+ if (tepi != TRANSPORT_ENDPOINT_INVALID_INDEX)
+ {
+ tep = pool_elt_at_index (tm->local_endpoints, tepi);
+ transport_endpoint_table_del (&tm->local_endpoints_table, tep);
+ transport_endpoint_del (tepi);
+ }
+
+ /* Check if connection is not yet fully established */
+ if (tc->state == TCP_STATE_SYN_SENT)
+ {
+ /* Try to remove the half-open connection. If this is not the owning
+ * thread, tc won't be removed. Retransmit or establish timers will
+ * eventually expire and call again cleanup on the right thread. */
+ tcp_half_open_connection_cleanup (tc);
+ }
+ else
+ {
+ int thread_index = tc->c_thread_index;
+
+ /* Make sure all timers are cleared */
+ tcp_connection_timers_reset (tc);
+
+ /* Poison the entry */
+ if (CLIB_DEBUG > 0)
+ memset (tc, 0xFA, sizeof (*tc));
+ pool_put (tm->connections[thread_index], tc);
+ }
+}
+
+/**
+ * Connection removal.
+ *
+ * This should be called only once connection enters CLOSED state. Note
+ * that it notifies the session of the removal event, so if the goal is to
+ * just remove the connection, call tcp_connection_cleanup instead.
+ */
+void
+tcp_connection_del (tcp_connection_t * tc)
+{
+ TCP_EVT_DBG (TCP_EVT_DELETE, tc);
+ stream_session_delete_notify (&tc->connection);
+ tcp_connection_cleanup (tc);
+}
+
+tcp_connection_t *
+tcp_connection_new (u8 thread_index)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+
+ pool_get (tm->connections[thread_index], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->c_c_index = tc - tm->connections[thread_index];
+ tc->c_thread_index = thread_index;
+ return tc;
+}
+
+/** Notify session that connection has been reset.
+ *
+ * Switch state to closed and wait for session to call cleanup.
+ */
+void
+tcp_connection_reset (tcp_connection_t * tc)
+{
+ TCP_EVT_DBG (TCP_EVT_RST_RCVD, tc);
+ switch (tc->state)
+ {
+ case TCP_STATE_SYN_RCVD:
+ /* Cleanup everything. App wasn't notified yet */
+ stream_session_delete_notify (&tc->connection);
+ tcp_connection_cleanup (tc);
+ break;
+ case TCP_STATE_SYN_SENT:
+ stream_session_connect_notify (&tc->connection, 1 /* fail */ );
+ tcp_connection_cleanup (tc);
+ break;
+ case TCP_STATE_ESTABLISHED:
+ case TCP_STATE_CLOSE_WAIT:
+ case TCP_STATE_FIN_WAIT_1:
+ case TCP_STATE_FIN_WAIT_2:
+ case TCP_STATE_CLOSING:
+ tc->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
+
+ /* Make sure all timers are cleared */
+ tcp_connection_timers_reset (tc);
+ stream_session_reset_notify (&tc->connection);
+
+ /* Wait for cleanup from session layer but not forever */
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ break;
+ case TCP_STATE_CLOSED:
+ return;
+ }
+}
+
+/**
+ * Begin connection closing procedure.
+ *
+ * If at the end the connection is not in CLOSED state, it is not removed.
+ * Instead, we rely on on TCP to advance through state machine to either
+ * 1) LAST_ACK (passive close) whereby when the last ACK is received
+ * tcp_connection_del is called. This notifies session of the delete and
+ * calls cleanup.
+ * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
+ * and cleanup is called.
+ *
+ * N.B. Half-close connections are not supported
+ */
+void
+tcp_connection_close (tcp_connection_t * tc)
+{
+ TCP_EVT_DBG (TCP_EVT_CLOSE, tc);
+
+ /* Send/Program FIN if needed and switch state */
+ switch (tc->state)
+ {
+ case TCP_STATE_SYN_SENT:
+ tc->state = TCP_STATE_CLOSED;
+ break;
+ case TCP_STATE_SYN_RCVD:
+ tcp_send_fin (tc);
+ tc->state = TCP_STATE_FIN_WAIT_1;
+ break;
+ case TCP_STATE_ESTABLISHED:
+ if (!stream_session_tx_fifo_max_dequeue (&tc->connection))
+ tcp_send_fin (tc);
+ else
+ tc->flags |= TCP_CONN_FINPNDG;
+ tc->state = TCP_STATE_FIN_WAIT_1;
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ tcp_send_fin (tc);
+ tc->state = TCP_STATE_LAST_ACK;
+ break;
+ case TCP_STATE_FIN_WAIT_1:
+ break;
+ default:
+ clib_warning ("state: %u", tc->state);
+ }
+
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
+
+ /* If in CLOSED and WAITCLOSE timer is not set, delete connection now */
+ if (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID
+ && tc->state == TCP_STATE_CLOSED)
+ tcp_connection_del (tc);
+}
+
+void
+tcp_session_close (u32 conn_index, u32 thread_index)
+{
+ tcp_connection_t *tc;
+ tc = tcp_connection_get (conn_index, thread_index);
+ tcp_connection_close (tc);
+}
+
+void
+tcp_session_cleanup (u32 conn_index, u32 thread_index)
+{
+ tcp_connection_t *tc;
+ tc = tcp_connection_get (conn_index, thread_index);
+
+ /* Wait for the session tx events to clear */
+ tc->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+}
+
+void *
+ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
+{
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_interface_address_t *ia = 0;
+
+ if (is_ip4)
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
+ ({
+ return ip_interface_address_get_address (lm4, ia);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
+ ({
+ ip6_address_t *rv;
+ rv = ip_interface_address_get_address (lm6, ia);
+ /* Trying to use a link-local ip6 src address is a fool's errand */
+ if (!ip6_address_is_link_local_unicast (rv))
+ return rv;
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+#define PORT_MASK ((1 << 16)- 1)
+/**
+ * Allocate local port and add if successful add entry to local endpoint
+ * table to mark the pair as used.
+ */
+int
+tcp_allocate_local_port (ip46_address_t * ip)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ transport_endpoint_t *tep;
+ u32 tei;
+ u16 min = 1024, max = 65535; /* XXX configurable ? */
+ int tries, limit;
+
+ limit = max - min;
+
+ /* Only support active opens from thread 0 */
+ ASSERT (vlib_get_thread_index () == 0);
+
+ /* Search for first free slot */
+ for (tries = 0; tries < limit; tries++)
+ {
+ u16 port = 0;
+
+ /* Find a port in the specified range */
+ while (1)
+ {
+ port = random_u32 (&tm->port_allocator_seed) & PORT_MASK;
+ if (PREDICT_TRUE (port >= min && port < max))
+ break;
+ }
+
+ /* Look it up */
+ tei = transport_endpoint_lookup (&tm->local_endpoints_table, ip, port);
+ /* If not found, we're done */
+ if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+ {
+ clib_spinlock_lock_if_init (&tm->local_endpoints_lock);
+ tep = transport_endpoint_new ();
+ clib_memcpy (&tep->ip, ip, sizeof (*ip));
+ tep->port = port;
+ transport_endpoint_table_add (&tm->local_endpoints_table, tep,
+ tep - tm->local_endpoints);
+ clib_spinlock_unlock_if_init (&tm->local_endpoints_lock);
+
+ return tep->port;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Initialize all connection timers as invalid
+ */
+void
+tcp_connection_timers_init (tcp_connection_t * tc)
+{
+ int i;
+
+ /* Set all to invalid */
+ for (i = 0; i < TCP_N_TIMERS; i++)
+ {
+ tc->timers[i] = TCP_TIMER_HANDLE_INVALID;
+ }
+
+ tc->rto = TCP_RTO_INIT;
+}
+
+/**
+ * Stop all connection timers
+ */
+void
+tcp_connection_timers_reset (tcp_connection_t * tc)
+{
+ int i;
+ for (i = 0; i < TCP_N_TIMERS; i++)
+ {
+ tcp_timer_reset (tc, i);
+ }
+}
+
+#if 0
+typedef struct ip4_tcp_hdr
+{
+ ip4_header_t ip;
+ tcp_header_t tcp;
+} ip4_tcp_hdr_t;
+
+typedef struct ip6_tcp_hdr
+{
+ ip6_header_t ip;
+ tcp_header_t tcp;
+} ip6_tcp_hdr_t;
+
+static void
+tcp_connection_select_lb_bucket (tcp_connection_t * tc, const dpo_id_t * dpo,
+ dpo_id_t * result)
+{
+ const dpo_id_t *choice;
+ load_balance_t *lb;
+ int hash;
+
+ lb = load_balance_get (dpo->dpoi_index);
+ if (tc->c_is_ip4)
+ {
+ ip4_tcp_hdr_t hdr;
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.ip.protocol = IP_PROTOCOL_TCP;
+ hdr.ip.address_pair.src.as_u32 = tc->c_lcl_ip.ip4.as_u32;
+ hdr.ip.address_pair.dst.as_u32 = tc->c_rmt_ip.ip4.as_u32;
+ hdr.tcp.src_port = tc->c_lcl_port;
+ hdr.tcp.dst_port = tc->c_rmt_port;
+ hash = ip4_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
+ }
+ else
+ {
+ ip6_tcp_hdr_t hdr;
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.ip.protocol = IP_PROTOCOL_TCP;
+ clib_memcpy (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
+ sizeof (ip6_address_t));
+ clib_memcpy (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
+ sizeof (ip6_address_t));
+ hdr.tcp.src_port = tc->c_lcl_port;
+ hdr.tcp.dst_port = tc->c_rmt_port;
+ hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
+ }
+ choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+ dpo_copy (result, choice);
+}
+
+fib_node_index_t
+tcp_lookup_rmt_in_fib (tcp_connection_t * tc)
+{
+ fib_prefix_t prefix;
+ u32 fib_index;
+
+ clib_memcpy (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
+ prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ prefix.fp_len = tc->c_is_ip4 ? 32 : 128;
+ fib_index = fib_table_find (prefix.fp_proto, tc->c_vrf);
+ return fib_table_lookup (fib_index, &prefix);
+}
+
+static int
+tcp_connection_stack_on_fib_entry (tcp_connection_t * tc)
+{
+ dpo_id_t choice = DPO_INVALID;
+ u32 output_node_index;
+ fib_entry_t *fe;
+
+ fe = fib_entry_get (tc->c_rmt_fei);
+ if (fe->fe_lb.dpoi_type != DPO_LOAD_BALANCE)
+ return -1;
+
+ tcp_connection_select_lb_bucket (tc, &fe->fe_lb, &choice);
+
+ output_node_index =
+ tc->c_is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+ dpo_stack_from_node (output_node_index, &tc->c_rmt_dpo, &choice);
+ return 0;
+}
+
+/** Stack tcp connection on peer's fib entry.
+ *
+ * This ultimately populates the dpo the connection will use to send packets.
+ */
+static void
+tcp_connection_fib_attach (tcp_connection_t * tc)
+{
+ tc->c_rmt_fei = tcp_lookup_rmt_in_fib (tc);
+
+ ASSERT (tc->c_rmt_fei != FIB_NODE_INDEX_INVALID);
+
+ tcp_connection_stack_on_fib_entry (tc);
+}
+#endif /* 0 */
+
+/**
+ * Initialize connection send variables.
+ */
+void
+tcp_init_snd_vars (tcp_connection_t * tc)
+{
+ u32 time_now;
+
+ /*
+ * We use the time to randomize iss and for setting up the initial
+ * timestamp. Make sure it's updated otherwise syn and ack in the
+ * handshake may make it look as if time has flown in the opposite
+ * direction for us.
+ */
+ tcp_set_time_now (vlib_get_thread_index ());
+ time_now = tcp_time_now ();
+
+ tc->iss = random_u32 (&time_now);
+ tc->snd_una = tc->iss;
+ tc->snd_nxt = tc->iss + 1;
+ tc->snd_una_max = tc->snd_nxt;
+}
+
+/** Initialize tcp connection variables
+ *
+ * Should be called after having received a msg from the peer, i.e., a SYN or
+ * a SYNACK, such that connection options have already been exchanged. */
+void
+tcp_connection_init_vars (tcp_connection_t * tc)
+{
+ tcp_connection_timers_init (tc);
+ tcp_init_mss (tc);
+ scoreboard_init (&tc->sack_sb);
+ tcp_cc_init (tc);
+ if (tc->state == TCP_STATE_SYN_RCVD)
+ tcp_init_snd_vars (tc);
+
+ // tcp_connection_fib_attach (tc);
+}
+
+int
+tcp_connection_open (transport_endpoint_t * rmt)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+ fib_prefix_t prefix;
+ fib_node_index_t fei;
+ u32 sw_if_index, fib_index;
+ ip46_address_t lcl_addr;
+ int lcl_port;
+
+ /*
+ * Find the local address and allocate port
+ */
+ memset (&lcl_addr, 0, sizeof (lcl_addr));
+
+ /* Find a FIB path to the destination */
+ clib_memcpy (&prefix.fp_addr, &rmt->ip, sizeof (rmt->ip));
+ prefix.fp_proto = rmt->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ prefix.fp_len = rmt->is_ip4 ? 32 : 128;
+
+ fib_index = fib_table_find (prefix.fp_proto, rmt->vrf);
+ if (fib_index == (u32) ~ 0)
+ {
+ clib_warning ("no fib table");
+ return -1;
+ }
+
+ fei = fib_table_lookup (fib_index, &prefix);
+
+ /* Couldn't find route to destination. Bail out. */
+ if (fei == FIB_NODE_INDEX_INVALID)
+ {
+ clib_warning ("no route to destination");
+ return -1;
+ }
+
+ sw_if_index = fib_entry_get_resolving_interface (fei);
+
+ if (sw_if_index == (u32) ~ 0)
+ {
+ clib_warning ("no resolving interface for %U", format_ip46_address,
+ &rmt->ip, IP46_TYPE_IP4);
+ return -1;
+ }
+
+ if (rmt->is_ip4)
+ {
+ ip4_address_t *ip4;
+ int index;
+ if (vec_len (tm->ip4_src_addresses))
+ {
+ index = tm->last_v4_address_rotor++;
+ if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses))
+ tm->last_v4_address_rotor = 0;
+ lcl_addr.ip4.as_u32 = tm->ip4_src_addresses[index].as_u32;
+ }
+ else
+ {
+ ip4 = ip_interface_get_first_ip (sw_if_index, 1);
+ lcl_addr.ip4.as_u32 = ip4->as_u32;
+ }
+ }
+ else
+ {
+ ip6_address_t *ip6;
+ int index;
+
+ if (vec_len (tm->ip6_src_addresses))
+ {
+ index = tm->last_v6_address_rotor++;
+ if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
+ tm->last_v6_address_rotor = 0;
+ clib_memcpy (&lcl_addr.ip6, &tm->ip6_src_addresses[index],
+ sizeof (*ip6));
+ }
+ else
+ {
+ ip6 = ip_interface_get_first_ip (sw_if_index, 0);
+ if (ip6 == 0)
+ {
+ clib_warning ("no routable ip6 addresses on %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return -1;
+ }
+
+ clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6));
+ }
+ }
+
+ /* Allocate source port */
+ lcl_port = tcp_allocate_local_port (&lcl_addr);
+ if (lcl_port < 1)
+ {
+ clib_warning ("Failed to allocate src port");
+ return -1;
+ }
+
+ /*
+ * Create connection and send SYN
+ */
+ clib_spinlock_lock_if_init (&tm->half_open_lock);
+ tc = tcp_half_open_connection_new ();
+ clib_memcpy (&tc->c_rmt_ip, &rmt->ip, sizeof (ip46_address_t));
+ clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t));
+ tc->c_rmt_port = rmt->port;
+ tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ tc->c_is_ip4 = rmt->is_ip4;
+ tc->c_transport_proto = TRANSPORT_PROTO_TCP;
+ tc->c_vrf = rmt->vrf;
+ /* The other connection vars will be initialized after SYN ACK */
+ tcp_connection_timers_init (tc);
+
+ TCP_EVT_DBG (TCP_EVT_OPEN, tc);
+ tc->state = TCP_STATE_SYN_SENT;
+ tcp_init_snd_vars (tc);
+ tcp_send_syn (tc);
+ clib_spinlock_unlock_if_init (&tm->half_open_lock);
+
+ return tc->c_c_index;
+}
+
+int
+tcp_session_open (transport_endpoint_t * tep)
+{
+ return tcp_connection_open (tep);
+}
+
+const char *tcp_dbg_evt_str[] = {
+#define _(sym, str) str,
+ foreach_tcp_dbg_evt
+#undef _
+};
+
+const char *tcp_fsm_states[] = {
+#define _(sym, str) str,
+ foreach_tcp_fsm_state
+#undef _
+};
+
+u8 *
+format_tcp_state (u8 * s, va_list * args)
+{
+ u32 state = va_arg (*args, u32);
+
+ if (state < TCP_N_STATES)
+ s = format (s, "%s", tcp_fsm_states[state]);
+ else
+ s = format (s, "UNKNOWN (%d (0x%x))", state, state);
+ return s;
+}
+
+const char *tcp_conn_timers[] = {
+#define _(sym, str) str,
+ foreach_tcp_timer
+#undef _
+};
+
+u8 *
+format_tcp_timers (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < TCP_N_TIMERS; i++)
+ if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
+ last = i;
+
+ s = format (s, "[");
+ for (i = 0; i < last; i++)
+ {
+ if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
+ s = format (s, "%s,", tcp_conn_timers[i]);
+ }
+
+ if (last >= 0)
+ s = format (s, "%s]", tcp_conn_timers[i]);
+ else
+ s = format (s, "]");
+
+ return s;
+}
+
+u8 *
+format_tcp_congestion_status (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ if (tcp_in_recovery (tc))
+ s = format (s, "recovery");
+ else if (tcp_in_fastrecovery (tc))
+ s = format (s, "fastrecovery");
+ else
+ s = format (s, "none");
+ return s;
+}
+
+u8 *
+format_tcp_vars (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ s = format (s, " snd_una %u snd_nxt %u snd_una_max %u",
+ tc->snd_una - tc->iss, tc->snd_nxt - tc->iss,
+ tc->snd_una_max - tc->iss);
+ s = format (s, " rcv_nxt %u rcv_las %u\n",
+ tc->rcv_nxt - tc->irs, tc->rcv_las - tc->irs);
+ s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
+ tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
+ tc->snd_wl2 - tc->iss);
+ s = format (s, " flight size %u send space %u rcv_wnd_av %d\n",
+ tcp_flight_size (tc), tcp_available_output_snd_space (tc),
+ tcp_rcv_wnd_available (tc));
+ s = format (s, " cong %U ", format_tcp_congestion_status, tc);
+ s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
+ tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
+ s = format (s, " prev_ssthresh %u snd_congestion %u dupack %u",
+ tc->prev_ssthresh, tc->snd_congestion - tc->iss,
+ tc->rcv_dupacks);
+ s = format (s, " limited_transmit %u\n", tc->limited_transmit - tc->iss);
+ s = format (s, " tsecr %u tsecr_last_ack %u\n", tc->rcv_opts.tsecr,
+ tc->tsecr_last_ack);
+ s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
+ tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
+ s = format (s, "rtt_seq %u\n", tc->rtt_seq);
+ s = format (s, " tsval_recent %u tsval_recent_age %u\n", tc->tsval_recent,
+ tcp_time_now () - tc->tsval_recent_age);
+ if (tc->state >= TCP_STATE_ESTABLISHED)
+ s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb,
+ tc);
+ if (vec_len (tc->snd_sacks))
+ s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
+
+ return s;
+}
+
+u8 *
+format_tcp_connection_id (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ if (!tc)
+ return s;
+ if (tc->c_is_ip4)
+ {
+ s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
+ format_ip4_address, &tc->c_lcl_ip4,
+ clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address,
+ &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port));
+ }
+ else
+ {
+ s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
+ format_ip6_address, &tc->c_lcl_ip6,
+ clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address,
+ &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port));
+ }
+
+ return s;
+}
+
+u8 *
+format_tcp_connection (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ u32 verbose = va_arg (*args, u32);
+
+ if (!tc)
+ return s;
+ s = format (s, "%-50U", format_tcp_connection_id, tc);
+ if (verbose)
+ {
+ s = format (s, "%-15U", format_tcp_state, tc->state);
+ if (verbose > 1)
+ s = format (s, " %U\n%U", format_tcp_timers, tc, format_tcp_vars, tc);
+ }
+
+ return s;
+}
+
+u8 *
+format_tcp_session (u8 * s, va_list * args)
+{
+ u32 tci = va_arg (*args, u32);
+ u32 thread_index = va_arg (*args, u32);
+ u32 verbose = va_arg (*args, u32);
+ tcp_connection_t *tc;
+
+ tc = tcp_connection_get (tci, thread_index);
+ if (tc)
+ s = format (s, "%U", format_tcp_connection, tc, verbose);
+ else
+ s = format (s, "empty\n");
+ return s;
+}
+
+u8 *
+format_tcp_listener_session (u8 * s, va_list * args)
+{
+ u32 tci = va_arg (*args, u32);
+ tcp_connection_t *tc = tcp_listener_get (tci);
+ return format (s, "%U", format_tcp_connection_id, tc);
+}
+
+u8 *
+format_tcp_half_open_session (u8 * s, va_list * args)
+{
+ u32 tci = va_arg (*args, u32);
+ tcp_connection_t *tc = tcp_half_open_connection_get (tci);
+ return format (s, "%U", format_tcp_connection_id, tc);
+}
+
+u8 *
+format_tcp_sacks (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ sack_block_t *sacks = tc->snd_sacks;
+ sack_block_t *block;
+ int i, len = 0;
+
+ len = vec_len (sacks);
+ for (i = 0; i < len - 1; i++)
+ {
+ block = &sacks[i];
+ s = format (s, " start %u end %u\n", block->start - tc->irs,
+ block->end - tc->irs);
+ }
+ if (len)
+ {
+ block = &sacks[len - 1];
+ s = format (s, " start %u end %u", block->start - tc->irs,
+ block->end - tc->irs);
+ }
+ return s;
+}
+
+u8 *
+format_tcp_rcv_sacks (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ sack_block_t *sacks = tc->rcv_opts.sacks;
+ sack_block_t *block;
+ int i, len = 0;
+
+ len = vec_len (sacks);
+ for (i = 0; i < len - 1; i++)
+ {
+ block = &sacks[i];
+ s = format (s, " start %u end %u\n", block->start - tc->iss,
+ block->end - tc->iss);
+ }
+ if (len)
+ {
+ block = &sacks[len - 1];
+ s = format (s, " start %u end %u", block->start - tc->iss,
+ block->end - tc->iss);
+ }
+ return s;
+}
+
+u8 *
+format_tcp_sack_hole (u8 * s, va_list * args)
+{
+ sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *);
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ if (tc)
+ s = format (s, " [%u, %u]", hole->start - tc->iss, hole->end - tc->iss);
+ else
+ s = format (s, " [%u, %u]", hole->start, hole->end);
+ return s;
+}
+
+u8 *
+format_tcp_scoreboard (u8 * s, va_list * args)
+{
+ sack_scoreboard_t *sb = va_arg (*args, sack_scoreboard_t *);
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ sack_scoreboard_hole_t *hole;
+ s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n",
+ sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes);
+ s = format (s, " last_bytes_delivered %u high_sacked %u snd_una_adv %u\n",
+ sb->last_bytes_delivered, sb->high_sacked, sb->snd_una_adv);
+ s = format (s, " cur_rxt_hole %u high_rxt %u rescue_rxt %u",
+ sb->cur_rxt_hole, sb->high_rxt, sb->rescue_rxt);
+
+ hole = scoreboard_first_hole (sb);
+ if (hole)
+ s = format (s, "\n head %u tail %u holes:\n", sb->head, sb->tail);
+
+ while (hole)
+ {
+ s = format (s, "%U", format_tcp_sack_hole, hole, tc);
+ hole = scoreboard_next_hole (sb, hole);
+ }
+
+ return s;
+}
+
+transport_connection_t *
+tcp_session_get_transport (u32 conn_index, u32 thread_index)
+{
+ tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
+ return &tc->connection;
+}
+
+transport_connection_t *
+tcp_half_open_session_get_transport (u32 conn_index)
+{
+ tcp_connection_t *tc = tcp_half_open_connection_get (conn_index);
+ return &tc->connection;
+}
+
+/**
+ * Compute maximum segment size for session layer.
+ *
+ * Since the result needs to be the actual data length, it first computes
+ * the tcp options to be used in the next burst and subtracts their
+ * length from the connection's snd_mss.
+ */
+u16
+tcp_session_send_mss (transport_connection_t * trans_conn)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+
+ /* Ensure snd_mss does accurately reflect the amount of data we can push
+ * in a segment. This also makes sure that options are updated according to
+ * the current state of the connection. */
+ tcp_update_snd_mss (tc);
+
+ return tc->snd_mss;
+}
+
+always_inline u32
+tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
+{
+ if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
+ {
+ return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
+ }
+
+ /* If not snd_wnd constrained and we can't write at least a segment,
+ * don't try at all */
+ if (PREDICT_FALSE (snd_space < tc->snd_mss))
+ return snd_space < tc->cwnd ? 0 : snd_space;
+
+ /* round down to mss multiple */
+ return snd_space - (snd_space % tc->snd_mss);
+}
+
+/**
+ * Compute tx window session is allowed to fill.
+ *
+ * Takes into account available send space, snd_mss and the congestion
+ * state of the connection. If possible, the value returned is a multiple
+ * of snd_mss.
+ *
+ * @param tc tcp connection
+ * @return number of bytes session is allowed to write
+ */
+u32
+tcp_snd_space (tcp_connection_t * tc)
+{
+ int snd_space, snt_limited;
+
+ if (PREDICT_TRUE (tcp_in_cong_recovery (tc) == 0))
+ {
+ snd_space = tcp_available_output_snd_space (tc);
+
+ /* If we haven't gotten dupacks or if we did and have gotten sacked
+ * bytes then we can still send as per Limited Transmit (RFC3042) */
+ if (PREDICT_FALSE (tc->rcv_dupacks != 0
+ && (tcp_opts_sack_permitted (tc)
+ && tc->sack_sb.last_sacked_bytes == 0)))
+ {
+ if (tc->rcv_dupacks == 1 && tc->limited_transmit != tc->snd_nxt)
+ tc->limited_transmit = tc->snd_nxt;
+ ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt));
+
+ snt_limited = tc->snd_nxt - tc->limited_transmit;
+ snd_space = clib_max (2 * tc->snd_mss - snt_limited, 0);
+ }
+ return tcp_round_snd_space (tc, snd_space);
+ }
+
+ if (tcp_in_recovery (tc))
+ {
+ tc->snd_nxt = tc->snd_una_max;
+ snd_space = tcp_available_snd_wnd (tc) - tc->snd_rxt_bytes
+ - (tc->snd_una_max - tc->snd_congestion);
+ if (snd_space <= 0 || (tc->snd_una_max - tc->snd_una) >= tc->snd_wnd)
+ return 0;
+ return tcp_round_snd_space (tc, snd_space);
+ }
+
+ /* RFC 5681: When previously unsent data is available and the new value of
+ * cwnd and the receiver's advertised window allow, a TCP SHOULD send 1*SMSS
+ * bytes of previously unsent data. */
+ if (tcp_in_fastrecovery (tc) && !tcp_fastrecovery_sent_1_smss (tc))
+ {
+ if (tcp_available_output_snd_space (tc) < tc->snd_mss)
+ return 0;
+ tcp_fastrecovery_1_smss_on (tc);
+ return tc->snd_mss;
+ }
+
+ return 0;
+}
+
+u32
+tcp_session_send_space (transport_connection_t * trans_conn)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+ return clib_min (tcp_snd_space (tc),
+ tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
+}
+
+i32
+tcp_rcv_wnd_available (tcp_connection_t * tc)
+{
+ return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+}
+
+u32
+tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+
+ ASSERT (seq_geq (tc->snd_nxt, tc->snd_una));
+
+ /* This still works if fast retransmit is on */
+ return (tc->snd_nxt - tc->snd_una);
+}
+
+/* *INDENT-OFF* */
+const static transport_proto_vft_t tcp_proto = {
+ .bind = tcp_session_bind,
+ .unbind = tcp_session_unbind,
+ .push_header = tcp_push_header,
+ .get_connection = tcp_session_get_transport,
+ .get_listener = tcp_session_get_listener,
+ .get_half_open = tcp_half_open_session_get_transport,
+ .open = tcp_session_open,
+ .close = tcp_session_close,
+ .cleanup = tcp_session_cleanup,
+ .send_mss = tcp_session_send_mss,
+ .send_space = tcp_session_send_space,
+ .tx_fifo_offset = tcp_session_tx_fifo_offset,
+ .format_connection = format_tcp_session,
+ .format_listener = format_tcp_listener_session,
+ .format_half_open = format_tcp_half_open_session,
+};
+/* *INDENT-ON* */
+
+void
+tcp_timer_keep_handler (u32 conn_index)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ tcp_connection_t *tc;
+
+ tc = tcp_connection_get (conn_index, thread_index);
+ tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
+
+ tcp_connection_close (tc);
+}
+
+void
+tcp_timer_establish_handler (u32 conn_index)
+{
+ tcp_connection_t *tc;
+
+ tc = tcp_half_open_connection_get (conn_index);
+ if (tc)
+ {
+ ASSERT (tc->state == TCP_STATE_SYN_SENT);
+ stream_session_connect_notify (&tc->connection, 1 /* fail */ );
+ TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
+ }
+ else
+ {
+ tc = tcp_connection_get (conn_index, vlib_get_thread_index ());
+ /* note: the connection may have already disappeared */
+ if (PREDICT_FALSE (tc == 0))
+ return;
+ TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
+ ASSERT (tc->state == TCP_STATE_SYN_RCVD);
+ /* Start cleanup. App wasn't notified yet so use delete notify as
+ * opposed to delete to cleanup session layer state. */
+ stream_session_delete_notify (&tc->connection);
+ }
+ tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID;
+ tcp_connection_cleanup (tc);
+}
+
+void
+tcp_timer_waitclose_handler (u32 conn_index)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ tcp_connection_t *tc;
+
+ tc = tcp_connection_get (conn_index, thread_index);
+ if (!tc)
+ return;
+ tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
+
+ /* Session didn't come back with a close(). Send FIN either way
+ * and switch to LAST_ACK. */
+ if (tc->state == TCP_STATE_CLOSE_WAIT)
+ {
+ if (tc->flags & TCP_CONN_FINSNT)
+ {
+ clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!");
+ }
+
+ tcp_send_fin (tc);
+ tc->state = TCP_STATE_LAST_ACK;
+
+ /* Make sure we don't wait in LAST ACK forever */
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+
+ /* Don't delete the connection yet */
+ return;
+ }
+
+ tcp_connection_del (tc);
+}
+
+/* *INDENT-OFF* */
+static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
+{
+ tcp_timer_retransmit_handler,
+ tcp_timer_delack_handler,
+ tcp_timer_persist_handler,
+ tcp_timer_keep_handler,
+ tcp_timer_waitclose_handler,
+ tcp_timer_retransmit_syn_handler,
+ tcp_timer_establish_handler
+};
+/* *INDENT-ON* */
+
+static void
+tcp_expired_timers_dispatch (u32 * expired_timers)
+{
+ int i;
+ u32 connection_index, timer_id;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ /* Get session index and timer id */
+ connection_index = expired_timers[i] & 0x0FFFFFFF;
+ timer_id = expired_timers[i] >> 28;
+
+ TCP_EVT_DBG (TCP_EVT_TIMER_POP, connection_index, timer_id);
+
+ /* Handle expiration */
+ (*timer_expiration_handlers[timer_id]) (connection_index);
+ }
+}
+
+void
+tcp_initialize_timer_wheels (tcp_main_t * tm)
+{
+ tw_timer_wheel_16t_2w_512sl_t *tw;
+ /* *INDENT-OFF* */
+ foreach_vlib_main (({
+ tw = &tm->timer_wheels[ii];
+ tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
+ 100e-3 /* timer period 100ms */ , ~0);
+ tw->last_run_time = vlib_time_now (this_vlib_main);
+ }));
+ /* *INDENT-ON* */
+}
+
+clib_error_t *
+tcp_main_enable (vlib_main_t * vm)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ ip_protocol_info_t *pi;
+ ip_main_t *im = &ip_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ clib_error_t *error = 0;
+ u32 num_threads;
+ int thread;
+ tcp_connection_t *tc __attribute__ ((unused));
+ u32 preallocated_connections_per_thread;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ /*
+ * Registrations
+ */
+
+ /* Register with IP */
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP);
+ if (pi == 0)
+ return clib_error_return (0, "TCP protocol info AWOL");
+ pi->format_header = format_tcp_header;
+ pi->unformat_pg_edit = unformat_pg_tcp_header;
+
+ ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index);
+ ip6_register_protocol (IP_PROTOCOL_TCP, tcp6_input_node.index);
+
+ /* Register as transport with session layer */
+ session_register_transport (TRANSPORT_PROTO_TCP, 1, &tcp_proto);
+ session_register_transport (TRANSPORT_PROTO_TCP, 0, &tcp_proto);
+
+ /*
+ * Initialize data structures
+ */
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (tm->connections, num_threads - 1);
+
+ /*
+ * Preallocate connections. Assume that thread 0 won't
+ * use preallocated threads when running multi-core
+ */
+ if (num_threads == 1)
+ {
+ thread = 0;
+ preallocated_connections_per_thread = tm->preallocated_connections;
+ }
+ else
+ {
+ thread = 1;
+ preallocated_connections_per_thread =
+ tm->preallocated_connections / (num_threads - 1);
+ }
+ for (; thread < num_threads; thread++)
+ {
+ if (preallocated_connections_per_thread)
+ pool_init_fixed (tm->connections[thread],
+ preallocated_connections_per_thread);
+ }
+
+ /*
+ * Use a preallocated half-open connection pool?
+ */
+ if (tm->preallocated_half_open_connections)
+ pool_init_fixed (tm->half_open_connections,
+ tm->preallocated_half_open_connections);
+
+ /* Initialize per worker thread tx buffers (used for control messages) */
+ vec_validate (tm->tx_buffers, num_threads - 1);
+
+ /* Initialize timer wheels */
+ vec_validate (tm->timer_wheels, num_threads - 1);
+ tcp_initialize_timer_wheels (tm);
+
+ /* Initialize clocks per tick for TCP timestamp. Used to compute
+ * monotonically increasing timestamps. */
+ tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
+ / TCP_TSTAMP_RESOLUTION;
+
+ if (tm->local_endpoints_table_buckets == 0)
+ tm->local_endpoints_table_buckets = 250000;
+ if (tm->local_endpoints_table_memory == 0)
+ tm->local_endpoints_table_memory = 512 << 20;
+
+ clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table",
+ tm->local_endpoints_table_buckets,
+ tm->local_endpoints_table_memory);
+
+ /* Initialize [port-allocator] random number seed */
+ tm->port_allocator_seed = (u32) clib_cpu_time_now ();
+
+ if (num_threads > 1)
+ {
+ clib_spinlock_init (&tm->half_open_lock);
+ clib_spinlock_init (&tm->local_endpoints_lock);
+ }
+
+ vec_validate (tm->tx_frames[0], num_threads - 1);
+ vec_validate (tm->tx_frames[1], num_threads - 1);
+ vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1);
+ vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1);
+
+ tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size
+ (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ vec_validate (tm->time_now, num_threads - 1);
+ return error;
+}
+
+clib_error_t *
+vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en)
+{
+ if (is_en)
+ {
+ if (tcp_main.is_enabled)
+ return 0;
+
+ return tcp_main_enable (vm);
+ }
+ else
+ {
+ tcp_main.is_enabled = 0;
+ }
+
+ return 0;
+}
+
+void
+tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
+{
+ tcp_main_t *tm = &tcp_main;
+ if (is_ip4)
+ tm->punt_unknown4 = is_add;
+ else
+ tm->punt_unknown6 = is_add;
+}
+
+clib_error_t *
+tcp_init (vlib_main_t * vm)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tm->is_enabled = 0;
+ tcp_api_reference ();
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tcp_init);
+
+static clib_error_t *
+tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u64 tmp;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "preallocated-connections %d",
+ &tm->preallocated_connections))
+ ;
+ else if (unformat (input, "preallocated-half-open-connections %d",
+ &tm->preallocated_half_open_connections))
+ ;
+ else if (unformat (input, "local-endpoints-table-memory %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000)
+ return clib_error_return (0, "memory size %llx (%lld) too large",
+ tmp, tmp);
+ tm->local_endpoints_table_memory = tmp;
+ }
+ else if (unformat (input, "local-endpoints-table-buckets %d",
+ &tm->local_endpoints_table_buckets))
+ ;
+
+
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (tcp_config_fn, "tcp");
+
+
+/**
+ * \brief Configure an ipv4 source address range
+ * @param vm vlib_main_t pointer
+ * @param start first ipv4 address in the source address range
+ * @param end last ipv4 address in the source address range
+ * @param table_id VRF / table ID, 0 for the default FIB
+ * @return 0 if all OK, else an error indication from api_errno.h
+ */
+
+int
+tcp_configure_v4_source_address_range (vlib_main_t * vm,
+ ip4_address_t * start,
+ ip4_address_t * end, u32 table_id)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 start_host_byte_order, end_host_byte_order;
+ fib_prefix_t prefix;
+ vnet_sw_interface_t *si;
+ fib_node_index_t fei;
+ u32 fib_index = 0;
+ u32 sw_if_index;
+ int rv;
+ int vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
+ ip4_address_t * hi_addr, u32 fib_index,
+ int is_del);
+
+ memset (&prefix, 0, sizeof (prefix));
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
+
+ if (fib_index == ~0)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ start_host_byte_order = clib_net_to_host_u32 (start->as_u32);
+ end_host_byte_order = clib_net_to_host_u32 (end->as_u32);
+
+ /* sanity check for reversed args or some such */
+ if ((end_host_byte_order - start_host_byte_order) > (10 << 10))
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
+ /* Lookup the last address, to identify the interface involved */
+ prefix.fp_len = 32;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ memcpy (&prefix.fp_addr.ip4, end, sizeof (ip4_address_t));
+
+ fei = fib_table_lookup (fib_index, &prefix);
+
+ /* Couldn't find route to destination. Bail out. */
+ if (fei == FIB_NODE_INDEX_INVALID)
+ return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
+
+ sw_if_index = fib_entry_get_resolving_interface (fei);
+
+ /* Enable proxy arp on the interface */
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+
+ /* Configure proxy arp across the range */
+ rv = vnet_proxy_arp_add_del (start, end, fib_index, 0 /* is_del */ );
+
+ if (rv)
+ return rv;
+
+ do
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ vec_add1 (tm->ip4_src_addresses, start[0]);
+
+ /* Add local adjacencies for the range */
+
+ receive_dpo_add_or_lock (DPO_PROTO_IP4, ~0 /* sw_if_index */ ,
+ NULL, &dpo);
+ prefix.fp_len = 32;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ prefix.fp_addr.ip4.as_u32 = start->as_u32;
+
+ fib_table_entry_special_dpo_update (fib_index,
+ &prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ start_host_byte_order++;
+ start->as_u32 = clib_host_to_net_u32 (start_host_byte_order);
+ }
+ while (start_host_byte_order <= end_host_byte_order);
+
+ return 0;
+}
+
+/**
+ * \brief Configure an ipv6 source address range
+ * @param vm vlib_main_t pointer
+ * @param start first ipv6 address in the source address range
+ * @param end last ipv6 address in the source address range
+ * @param table_id VRF / table ID, 0 for the default FIB
+ * @return 0 if all OK, else an error indication from api_errno.h
+ */
+
+int
+tcp_configure_v6_source_address_range (vlib_main_t * vm,
+ ip6_address_t * start,
+ ip6_address_t * end, u32 table_id)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ fib_prefix_t prefix;
+ u32 fib_index = 0;
+ fib_node_index_t fei;
+ u32 sw_if_index;
+
+ memset (&prefix, 0, sizeof (prefix));
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
+
+ if (fib_index == ~0)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ while (1)
+ {
+ int i;
+ ip6_address_t tmp;
+ dpo_id_t dpo = DPO_INVALID;
+
+ /* Remember this address */
+ vec_add1 (tm->ip6_src_addresses, start[0]);
+
+ /* Lookup the prefix, to identify the interface involved */
+ prefix.fp_len = 128;
+ prefix.fp_proto = FIB_PROTOCOL_IP6;
+ memcpy (&prefix.fp_addr.ip6, start, sizeof (ip6_address_t));
+
+ fei = fib_table_lookup (fib_index, &prefix);
+
+ /* Couldn't find route to destination. Bail out. */
+ if (fei == FIB_NODE_INDEX_INVALID)
+ return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
+
+ sw_if_index = fib_entry_get_resolving_interface (fei);
+
+ if (sw_if_index == (u32) ~ 0)
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ /* Add a proxy neighbor discovery entry for this address */
+ ip6_neighbor_proxy_add_del (sw_if_index, start, 0 /* is_del */ );
+
+ /* Add a receive adjacency for this address */
+ receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ ,
+ NULL, &dpo);
+
+ fib_table_entry_special_dpo_update (fib_index,
+ &prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ /* Done with the entire range? */
+ if (!memcmp (start, end, sizeof (start[0])))
+ break;
+
+ /* Increment the address. DGMS. */
+ tmp = start[0];
+ for (i = 15; i >= 0; i--)
+ {
+ tmp.as_u8[i] += 1;
+ if (tmp.as_u8[i] != 0)
+ break;
+ }
+ start[0] = tmp;
+ }
+ return 0;
+}
+
+static clib_error_t *
+tcp_src_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd_arg)
+{
+ ip4_address_t v4start, v4end;
+ ip6_address_t v6start, v6end;
+ u32 table_id = 0;
+ int v4set = 0;
+ int v6set = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U - %U", unformat_ip4_address, &v4start,
+ unformat_ip4_address, &v4end))
+ v4set = 1;
+ else if (unformat (input, "%U", unformat_ip4_address, &v4start))
+ {
+ memcpy (&v4end, &v4start, sizeof (v4start));
+ v4set = 1;
+ }
+ else if (unformat (input, "%U - %U", unformat_ip6_address, &v6start,
+ unformat_ip6_address, &v6end))
+ v6set = 1;
+ else if (unformat (input, "%U", unformat_ip6_address, &v6start))
+ {
+ memcpy (&v6end, &v6start, sizeof (v6start));
+ v6set = 1;
+ }
+ else if (unformat (input, "fib-table %d", &table_id))
+ ;
+ else
+ break;
+ }
+
+ if (!v4set && !v6set)
+ return clib_error_return (0, "at least one v4 or v6 address required");
+
+ if (v4set)
+ {
+ rv = tcp_configure_v4_source_address_range (vm, &v4start, &v4end,
+ table_id);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "Invalid table-id %d", table_id);
+
+ case VNET_API_ERROR_INVALID_ARGUMENT:
+ return clib_error_return (0, "Invalid address range %U - %U",
+ format_ip4_address, &v4start,
+ format_ip4_address, &v4end);
+ default:
+ return clib_error_return (0, "error %d", rv);
+ break;
+ }
+ }
+ if (v6set)
+ {
+ rv = tcp_configure_v6_source_address_range (vm, &v6start, &v6end,
+ table_id);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "Invalid table-id %d", table_id);
+
+ default:
+ return clib_error_return (0, "error %d", rv);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tcp_src_address_command, static) =
+{
+ .path = "tcp src-address",
+ .short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
+ .function = tcp_src_address,
+};
+/* *INDENT-ON* */
+
+static u8 *
+tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb)
+{
+#if TCP_SCOREBOARD_TRACE
+
+ scoreboard_trace_elt_t *block;
+ int i = 0;
+
+ if (!sb->trace)
+ return s;
+
+ s = format (s, "scoreboard trace:");
+ vec_foreach (block, sb->trace)
+ {
+ s = format (s, "{%u, %u, %u, %u, %u}, ", block->start, block->end,
+ block->ack, block->snd_una_max, block->group);
+ if ((++i % 3) == 0)
+ s = format (s, "\n");
+ }
+ return s;
+#else
+ return 0;
+#endif
+}
+
+static clib_error_t *
+tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ transport_connection_t *tconn = 0;
+ tcp_connection_t *tc;
+ u8 *s = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_transport_connection, &tconn,
+ TRANSPORT_PROTO_TCP))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!TCP_SCOREBOARD_TRACE)
+ {
+ vlib_cli_output (vm, "scoreboard tracing not enabled");
+ return 0;
+ }
+
+ tc = tcp_get_connection_from_transport (tconn);
+ s = tcp_scoreboard_dump_trace (s, &tc->sack_sb);
+ vlib_cli_output (vm, "%v", s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) =
+{
+ .path = "show tcp scoreboard trace",
+ .short_help = "show tcp scoreboard trace <connection>",
+ .function = tcp_show_scoreboard_trace_fn,
+};
+/* *INDENT-ON* */
+
+u8 *
+tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose)
+{
+ int i, trace_len;
+ scoreboard_trace_elt_t *trace;
+ u32 next_ack, left, group, has_new_ack = 0;
+ tcp_connection_t _dummy_tc, *dummy_tc = &_dummy_tc;
+ sack_block_t *block;
+
+ if (!tc)
+ return s;
+
+ memset (dummy_tc, 0, sizeof (*dummy_tc));
+ tcp_connection_timers_init (dummy_tc);
+ scoreboard_init (&dummy_tc->sack_sb);
+ dummy_tc->rcv_opts.flags |= TCP_OPTS_FLAG_SACK;
+
+#if TCP_SCOREBOARD_TRACE
+ trace = tc->sack_sb.trace;
+ trace_len = vec_len (tc->sack_sb.trace);
+#else
+ trace = 0;
+ trace_len = 0;
+#endif
+
+ for (i = 0; i < trace_len; i++)
+ {
+ if (trace[i].ack != 0)
+ {
+ dummy_tc->snd_una = trace[i].ack - 1448;
+ dummy_tc->snd_una_max = trace[i].ack;
+ }
+ }
+
+ left = 0;
+ while (left < trace_len)
+ {
+ group = trace[left].group;
+ vec_reset_length (dummy_tc->rcv_opts.sacks);
+ has_new_ack = 0;
+ while (trace[left].group == group)
+ {
+ if (trace[left].ack != 0)
+ {
+ if (verbose)
+ s = format (s, "Adding ack %u, snd_una_max %u, segs: ",
+ trace[left].ack, trace[left].snd_una_max);
+ dummy_tc->snd_una_max = trace[left].snd_una_max;
+ next_ack = trace[left].ack;
+ has_new_ack = 1;
+ }
+ else
+ {
+ if (verbose)
+ s = format (s, "[%u, %u], ", trace[left].start,
+ trace[left].end);
+ vec_add2 (dummy_tc->rcv_opts.sacks, block, 1);
+ block->start = trace[left].start;
+ block->end = trace[left].end;
+ }
+ left++;
+ }
+
+ /* Push segments */
+ tcp_rcv_sacks (dummy_tc, next_ack);
+ if (has_new_ack)
+ dummy_tc->snd_una = next_ack + dummy_tc->sack_sb.snd_una_adv;
+
+ if (verbose)
+ s = format (s, "result: %U", format_tcp_scoreboard,
+ &dummy_tc->sack_sb);
+
+ }
+ s = format (s, "result: %U", format_tcp_scoreboard, &dummy_tc->sack_sb);
+
+ return s;
+}
+
+static clib_error_t *
+tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ transport_connection_t *tconn = 0;
+ tcp_connection_t *tc = 0;
+ u8 *str = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_transport_connection, &tconn,
+ TRANSPORT_PROTO_TCP))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!TCP_SCOREBOARD_TRACE)
+ {
+ vlib_cli_output (vm, "scoreboard tracing not enabled");
+ return 0;
+ }
+
+ tc = tcp_get_connection_from_transport (tconn);
+ if (!tc)
+ {
+ vlib_cli_output (vm, "connection not found");
+ return 0;
+ }
+ str = tcp_scoreboard_replay (str, tc, 1);
+ vlib_cli_output (vm, "%v", str);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) =
+{
+ .path = "tcp replay scoreboard",
+ .short_help = "tcp replay scoreboard <connection>",
+ .function = tcp_scoreboard_trace_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+ vlib_cli_output (vm, "IPv4 TCP punt: %s",
+ tm->punt_unknown4 ? "enabled" : "disabled");
+ vlib_cli_output (vm, "IPv6 TCP punt: %s",
+ tm->punt_unknown6 ? "enabled" : "disabled");
+ return 0;
+}
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
+{
+ .path = "show tcp punt",
+ .short_help = "show tcp punt",
+ .function = show_tcp_punt_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
new file mode 100644
index 00000000..259dbca1
--- /dev/null
+++ b/src/vnet/tcp/tcp.h
@@ -0,0 +1,985 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _vnet_tcp_h_
+#define _vnet_tcp_h_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/tcp/tcp_timer.h>
+#include <vnet/session/transport.h>
+#include <vnet/session/session.h>
+#include <vnet/tcp/tcp_debug.h>
+
+#define TCP_TICK 0.001 /**< TCP tick period (s) */
+#define THZ (u32) (1/TCP_TICK) /**< TCP tick frequency */
+#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */
+#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
+#define TCP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */
+#define TCP_MAX_OPTION_SPACE 40
+
+#define TCP_DUPACK_THRESHOLD 3
+#define TCP_MAX_RX_FIFO_SIZE 4 << 20
+#define TCP_MIN_RX_FIFO_SIZE 4 << 10
+#define TCP_IW_N_SEGMENTS 10
+#define TCP_ALWAYS_ACK 1 /**< On/off delayed acks */
+#define TCP_USE_SACKS 1 /**< Disable only for testing */
+
+/** TCP FSM state definitions as per RFC793. */
+#define foreach_tcp_fsm_state \
+ _(CLOSED, "CLOSED") \
+ _(LISTEN, "LISTEN") \
+ _(SYN_SENT, "SYN_SENT") \
+ _(SYN_RCVD, "SYN_RCVD") \
+ _(ESTABLISHED, "ESTABLISHED") \
+ _(CLOSE_WAIT, "CLOSE_WAIT") \
+ _(FIN_WAIT_1, "FIN_WAIT_1") \
+ _(LAST_ACK, "LAST_ACK") \
+ _(CLOSING, "CLOSING") \
+ _(FIN_WAIT_2, "FIN_WAIT_2") \
+ _(TIME_WAIT, "TIME_WAIT")
+
+typedef enum _tcp_state
+{
+#define _(sym, str) TCP_STATE_##sym,
+ foreach_tcp_fsm_state
+#undef _
+ TCP_N_STATES
+} tcp_state_t;
+
+format_function_t format_tcp_state;
+format_function_t format_tcp_flags;
+format_function_t format_tcp_sacks;
+format_function_t format_tcp_rcv_sacks;
+
+/** TCP timers */
+#define foreach_tcp_timer \
+ _(RETRANSMIT, "RETRANSMIT") \
+ _(DELACK, "DELAYED ACK") \
+ _(PERSIST, "PERSIST") \
+ _(KEEP, "KEEP") \
+ _(WAITCLOSE, "WAIT CLOSE") \
+ _(RETRANSMIT_SYN, "RETRANSMIT SYN") \
+ _(ESTABLISH, "ESTABLISH")
+
+typedef enum _tcp_timers
+{
+#define _(sym, str) TCP_TIMER_##sym,
+ foreach_tcp_timer
+#undef _
+ TCP_N_TIMERS
+} tcp_timers_e;
+
+typedef void (timer_expiration_handler) (u32 index);
+
+extern timer_expiration_handler tcp_timer_delack_handler;
+extern timer_expiration_handler tcp_timer_retransmit_handler;
+extern timer_expiration_handler tcp_timer_persist_handler;
+extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
+
+#define TCP_TIMER_HANDLE_INVALID ((u32) ~0)
+
+/* Timer delays as multiples of 100ms */
+#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP
+ * ticks to timer units */
+#define TCP_DELACK_TIME 1 /* 0.1s */
+#define TCP_ESTABLISH_TIME 750 /* 75s */
+#define TCP_SYN_RCVD_TIME 600 /* 60s */
+#define TCP_2MSL_TIME 300 /* 30s */
+#define TCP_CLOSEWAIT_TIME 20 /* 2s */
+#define TCP_TIMEWAIT_TIME 20 /* 2s */
+#define TCP_CLEANUP_TIME 10 /* 1s Time to wait before cleanup */
+#define TCP_TIMER_PERSIST_MIN 2 /* 0.2s */
+
+#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */
+#define TCP_RTO_MIN 0.2 * THZ /* Min RTO (200ms) - lower than standard */
+#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */
+#define TCP_RTO_SYN_RETRIES 3 /* SYN retries without doubling RTO */
+#define TCP_RTO_INIT 1 * THZ /* Initial retransmit timer */
+
+/** TCP connection flags */
+#define foreach_tcp_connection_flag \
+ _(SNDACK, "Send ACK") \
+ _(FINSNT, "FIN sent") \
+ _(SENT_RCV_WND0, "Sent 0 receive window") \
+ _(RECOVERY, "Recovery on") \
+ _(FAST_RECOVERY, "Fast Recovery on") \
+ _(FR_1_SMSS, "Sent 1 SMSS") \
+ _(HALF_OPEN_DONE, "Half-open completed") \
+ _(FINPNDG, "FIN pending")
+
+typedef enum _tcp_connection_flag_bits
+{
+#define _(sym, str) TCP_CONN_##sym##_BIT,
+ foreach_tcp_connection_flag
+#undef _
+ TCP_CONN_N_FLAG_BITS
+} tcp_connection_flag_bits_e;
+
+typedef enum _tcp_connection_flag
+{
+#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT,
+ foreach_tcp_connection_flag
+#undef _
+ TCP_CONN_N_FLAGS
+} tcp_connection_flags_e;
+
+/** TCP buffer flags */
+#define foreach_tcp_buf_flag \
+ _ (ACK) /**< Sending ACK. */ \
+ _ (DUPACK) /**< Sending DUPACK. */ \
+
+enum
+{
+#define _(f) TCP_BUF_BIT_##f,
+ foreach_tcp_buf_flag
+#undef _
+ TCP_N_BUF_BITS,
+};
+
+enum
+{
+#define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f,
+ foreach_tcp_buf_flag
+#undef _
+};
+
+#define TCP_SCOREBOARD_TRACE (0)
+#define TCP_MAX_SACK_BLOCKS 15 /**< Max number of SACK blocks stored */
+#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0)
+
+typedef struct _scoreboard_trace_elt
+{
+ u32 start;
+ u32 end;
+ u32 ack;
+ u32 snd_una_max;
+ u32 group;
+} scoreboard_trace_elt_t;
+
+typedef struct _sack_scoreboard_hole
+{
+ u32 next; /**< Index for next entry in linked list */
+ u32 prev; /**< Index for previous entry in linked list */
+ u32 start; /**< Start sequence number */
+ u32 end; /**< End sequence number */
+ u8 is_lost; /**< Mark hole as lost */
+} sack_scoreboard_hole_t;
+
+typedef struct _sack_scoreboard
+{
+ sack_scoreboard_hole_t *holes; /**< Pool of holes */
+ u32 head; /**< Index of first entry */
+ u32 tail; /**< Index of last entry */
+ u32 sacked_bytes; /**< Number of bytes sacked in sb */
+ u32 last_sacked_bytes; /**< Number of bytes last sacked */
+ u32 last_bytes_delivered; /**< Number of sack bytes delivered */
+ u32 snd_una_adv; /**< Bytes to add to snd_una */
+ u32 high_sacked; /**< Highest byte sacked (fack) */
+ u32 high_rxt; /**< Highest retransmitted sequence */
+ u32 rescue_rxt; /**< Rescue sequence number */
+ u32 lost_bytes; /**< Bytes lost as per RFC6675 */
+ u32 cur_rxt_hole; /**< Retransmitting from this hole */
+
+#if TCP_SCOREBOARD_TRACE
+ scoreboard_trace_elt_t *trace;
+#endif
+
+} sack_scoreboard_t;
+
+#if TCP_SCOREBOARD_TRACE
+#define tcp_scoreboard_trace_add(_tc, _ack) \
+{ \
+ static u64 _group = 0; \
+ sack_scoreboard_t *_sb = &_tc->sack_sb; \
+ sack_block_t *_sack, *_sacks; \
+ scoreboard_trace_elt_t *_elt; \
+ int i; \
+ _group++; \
+ _sacks = _tc->rcv_opts.sacks; \
+ for (i = 0; i < vec_len (_sacks); i++) \
+ { \
+ _sack = &_sacks[i]; \
+ vec_add2 (_sb->trace, _elt, 1); \
+ _elt->start = _sack->start; \
+ _elt->end = _sack->end; \
+ _elt->ack = _elt->end == _ack ? _ack : 0; \
+ _elt->snd_una_max = _elt->end == _ack ? _tc->snd_una_max : 0; \
+ _elt->group = _group; \
+ } \
+}
+#else
+#define tcp_scoreboard_trace_add(_tc, _ack)
+#endif
+
+typedef enum _tcp_cc_algorithm_type
+{
+ TCP_CC_NEWRENO,
+} tcp_cc_algorithm_type_e;
+
+typedef struct _tcp_cc_algorithm tcp_cc_algorithm_t;
+
+typedef enum _tcp_cc_ack_t
+{
+ TCP_CC_ACK,
+ TCP_CC_DUPACK,
+ TCP_CC_PARTIALACK
+} tcp_cc_ack_t;
+
+typedef struct _tcp_connection
+{
+ transport_connection_t connection; /**< Common transport data. First! */
+
+ u8 state; /**< TCP state as per tcp_state_t */
+ u16 flags; /**< Connection flags (see tcp_conn_flags_e) */
+ u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */
+
+ /* TODO RFC4898 */
+
+ /** Send sequence variables RFC793 */
+ u32 snd_una; /**< oldest unacknowledged sequence number */
+ u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/
+ u32 snd_wnd; /**< send window */
+ u32 snd_wl1; /**< seq number used for last snd.wnd update */
+ u32 snd_wl2; /**< ack number used for last snd.wnd update */
+ u32 snd_nxt; /**< next seq number to be sent */
+ u16 snd_mss; /**< Effective send max seg (data) size */
+
+ /** Receive sequence variables RFC793 */
+ u32 rcv_nxt; /**< next sequence number expected */
+ u32 rcv_wnd; /**< receive window we expect */
+
+ u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */
+ u32 iss; /**< initial sent sequence */
+ u32 irs; /**< initial remote sequence */
+
+ /* Options */
+ tcp_options_t rcv_opts; /**< Rx options for connection */
+ tcp_options_t snd_opts; /**< Tx options for connection */
+ u8 snd_opts_len; /**< Tx options len */
+ u8 rcv_wscale; /**< Window scale to advertise to peer */
+ u8 snd_wscale; /**< Window scale to use when sending */
+ u32 tsval_recent; /**< Last timestamp received */
+ u32 tsval_recent_age; /**< When last updated tstamp_recent*/
+
+ sack_block_t *snd_sacks; /**< Vector of SACKs to send. XXX Fixed size? */
+ sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */
+
+ u16 rcv_dupacks; /**< Number of DUPACKs received */
+ u8 snt_dupacks; /**< Number of DUPACKs sent in a burst */
+
+ /* Congestion control */
+ u32 cwnd; /**< Congestion window */
+ u32 ssthresh; /**< Slow-start threshold */
+ u32 prev_ssthresh; /**< ssthresh before congestion */
+ u32 prev_cwnd; /**< ssthresh before congestion */
+ u32 bytes_acked; /**< Bytes acknowledged by current segment */
+ u32 snd_rxt_bytes; /**< Retransmitted bytes */
+ u32 snd_rxt_ts; /**< Timestamp when first packet is retransmitted */
+ u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */
+ u32 snd_congestion; /**< snd_una_max when congestion is detected */
+ tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */
+
+ /* RTT and RTO */
+ u32 rto; /**< Retransmission timeout */
+ u32 rto_boff; /**< Index for RTO backoff */
+ u32 srtt; /**< Smoothed RTT */
+ u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */
+ u32 rtt_ts; /**< Timestamp for tracked ACK */
+ u32 rtt_seq; /**< Sequence number for tracked ACK */
+
+ u16 mss; /**< Our max seg size that includes options */
+ u32 limited_transmit; /**< snd_nxt when limited transmit starts */
+ u32 last_fib_check; /**< Last time we checked fib route for peer */
+} tcp_connection_t;
+
+struct _tcp_cc_algorithm
+{
+ void (*rcv_ack) (tcp_connection_t * tc);
+ void (*rcv_cong_ack) (tcp_connection_t * tc, tcp_cc_ack_t ack);
+ void (*congestion) (tcp_connection_t * tc);
+ void (*recovered) (tcp_connection_t * tc);
+ void (*init) (tcp_connection_t * tc);
+};
+
+#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY
+#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
+#define tcp_recovery_on(tc) (tc)->flags |= TCP_CONN_RECOVERY
+#define tcp_recovery_off(tc) (tc)->flags &= ~TCP_CONN_RECOVERY
+#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
+#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY))
+#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
+#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
+#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
+#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
+
+#define tcp_in_cong_recovery(tc) ((tc)->flags & \
+ (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
+
+always_inline void
+tcp_cong_recovery_off (tcp_connection_t * tc)
+{
+ tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
+ tcp_fastrecovery_1_smss_off (tc);
+}
+
+typedef enum
+{
+ TCP_IP4,
+ TCP_IP6,
+ TCP_N_AF,
+} tcp_af_t;
+
+typedef enum _tcp_error
+{
+#define tcp_error(n,s) TCP_ERROR_##n,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+ TCP_N_ERROR,
+} tcp_error_t;
+
+typedef struct _tcp_lookup_dispatch
+{
+ u8 next, error;
+} tcp_lookup_dispatch_t;
+
+typedef struct _tcp_main
+{
+ /* Per-worker thread tcp connection pools */
+ tcp_connection_t **connections;
+
+ /* Pool of listeners. */
+ tcp_connection_t *listener_pool;
+
+ /** Dispatch table by state and flags */
+ tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
+
+ u8 log2_tstamp_clocks_per_tick;
+ f64 tstamp_ticks_per_clock;
+ u32 *time_now;
+
+ /** per-worker tx buffer free lists */
+ u32 **tx_buffers;
+ /** per-worker tx frames to tcp 4/6 output nodes */
+ vlib_frame_t **tx_frames[2];
+ /** per-worker tx frames to ip 4/6 lookup nodes */
+ vlib_frame_t **ip_lookup_tx_frames[2];
+
+ /* Per worker-thread timer wheel for connections timers */
+ tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+ /* Pool of half-open connections on which we've sent a SYN */
+ tcp_connection_t *half_open_connections;
+ clib_spinlock_t half_open_lock;
+
+ /* Pool of local TCP endpoints */
+ transport_endpoint_t *local_endpoints;
+
+ /* Local endpoints lookup table */
+ transport_endpoint_table_t local_endpoints_table;
+ clib_spinlock_t local_endpoints_lock;
+
+ /* Congestion control algorithms registered */
+ tcp_cc_algorithm_t *cc_algos;
+
+ /* Flag that indicates if stack is on or off */
+ u8 is_enabled;
+
+ /** Number of preallocated connections */
+ u32 preallocated_connections;
+ u32 preallocated_half_open_connections;
+
+ /** Transport table (preallocation) size parameters */
+ u32 local_endpoints_table_memory;
+ u32 local_endpoints_table_buckets;
+
+ /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
+ ip4_address_t *ip4_src_addresses;
+ u32 last_v4_address_rotor;
+ u32 last_v6_address_rotor;
+ ip6_address_t *ip6_src_addresses;
+
+ /** Port allocator random number generator seed */
+ u32 port_allocator_seed;
+
+ /** vlib buffer size */
+ u32 bytes_per_buffer;
+
+ u8 punt_unknown4;
+ u8 punt_unknown6;
+} tcp_main_t;
+
+extern tcp_main_t tcp_main;
+extern vlib_node_registration_t tcp4_input_node;
+extern vlib_node_registration_t tcp6_input_node;
+extern vlib_node_registration_t tcp4_output_node;
+extern vlib_node_registration_t tcp6_output_node;
+
+always_inline tcp_main_t *
+vnet_get_tcp_main ()
+{
+ return &tcp_main;
+}
+
+always_inline tcp_header_t *
+tcp_buffer_hdr (vlib_buffer_t * b)
+{
+ ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ return (tcp_header_t *) (b->data + b->current_data
+ + vnet_buffer (b)->tcp.hdr_offset);
+}
+
+clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
+
+void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+always_inline tcp_connection_t *
+tcp_connection_get (u32 conn_index, u32 thread_index)
+{
+ if (PREDICT_FALSE
+ (pool_is_free_index (tcp_main.connections[thread_index], conn_index)))
+ return 0;
+ return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
+}
+
+always_inline tcp_connection_t *
+tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
+{
+ if (tcp_main.connections[thread_index] == 0)
+ return 0;
+ if (pool_is_free_index (tcp_main.connections[thread_index], conn_index))
+ return 0;
+ return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
+}
+
+always_inline tcp_connection_t *
+tcp_get_connection_from_transport (transport_connection_t * tconn)
+{
+ return (tcp_connection_t *) tconn;
+}
+
+void tcp_connection_close (tcp_connection_t * tc);
+void tcp_connection_cleanup (tcp_connection_t * tc);
+void tcp_connection_del (tcp_connection_t * tc);
+int tcp_half_open_connection_cleanup (tcp_connection_t * tc);
+tcp_connection_t *tcp_connection_new (u8 thread_index);
+void tcp_connection_reset (tcp_connection_t * tc);
+int tcp_configure_v4_source_address_range (vlib_main_t * vm,
+ ip4_address_t * start,
+ ip4_address_t * end, u32 table_id);
+int tcp_configure_v6_source_address_range (vlib_main_t * vm,
+ ip6_address_t * start,
+ ip6_address_t * end, u32 table_id);
+void tcp_api_reference (void);
+u8 *format_tcp_connection_id (u8 * s, va_list * args);
+u8 *format_tcp_connection (u8 * s, va_list * args);
+u8 *format_tcp_scoreboard (u8 * s, va_list * args);
+
+u8 *tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose);
+
+always_inline tcp_connection_t *
+tcp_listener_get (u32 tli)
+{
+ return pool_elt_at_index (tcp_main.listener_pool, tli);
+}
+
+always_inline tcp_connection_t *
+tcp_half_open_connection_get (u32 conn_index)
+{
+ tcp_connection_t *tc = 0;
+ clib_spinlock_lock_if_init (&tcp_main.half_open_lock);
+ if (!pool_is_free_index (tcp_main.half_open_connections, conn_index))
+ tc = pool_elt_at_index (tcp_main.half_open_connections, conn_index);
+ clib_spinlock_unlock_if_init (&tcp_main.half_open_lock);
+ return tc;
+}
+
+void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b);
+void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b);
+void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b);
+void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
+ u8 is_ip4);
+void tcp_send_reset (tcp_connection_t * tc);
+void tcp_send_syn (tcp_connection_t * tc);
+void tcp_send_fin (tcp_connection_t * tc);
+void tcp_init_mss (tcp_connection_t * tc);
+void tcp_update_snd_mss (tcp_connection_t * tc);
+void tcp_update_rto (tcp_connection_t * tc);
+void tcp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4);
+void tcp_flush_frames_to_output (u8 thread_index);
+
+always_inline u32
+tcp_end_seq (tcp_header_t * th, u32 len)
+{
+ return th->seq_number + tcp_is_syn (th) + tcp_is_fin (th) + len;
+}
+
+/* Modulo arithmetic for TCP sequence numbers */
+#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
+#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
+#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
+#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
+#define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
+
+/* Modulo arithmetic for timestamps */
+#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
+#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
+
+/**
+ * Our estimate of the number of bytes that have left the network
+ */
+always_inline u32
+tcp_bytes_out (const tcp_connection_t * tc)
+{
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ return tc->sack_sb.sacked_bytes + tc->sack_sb.lost_bytes;
+ else
+ return tc->rcv_dupacks * tc->snd_mss;
+}
+
+/**
+ * Our estimate of the number of bytes in flight (pipe size)
+ */
+always_inline u32
+tcp_flight_size (const tcp_connection_t * tc)
+{
+ int flight_size;
+
+ flight_size = (int) (tc->snd_una_max - tc->snd_una) - tcp_bytes_out (tc)
+ + tc->snd_rxt_bytes;
+
+ if (flight_size < 0)
+ {
+ if (0)
+ clib_warning
+ ("Negative: %u %u %u dupacks %u sacked bytes %u flags %d",
+ tc->snd_una_max - tc->snd_una, tcp_bytes_out (tc),
+ tc->snd_rxt_bytes, tc->rcv_dupacks, tc->sack_sb.sacked_bytes,
+ tc->rcv_opts.flags);
+ return 0;
+ }
+
+ return flight_size;
+}
+
+/**
+ * Initial cwnd as per RFC5681
+ */
+always_inline u32
+tcp_initial_cwnd (const tcp_connection_t * tc)
+{
+ if (tc->snd_mss > 2190)
+ return 2 * tc->snd_mss;
+ else if (tc->snd_mss > 1095)
+ return 3 * tc->snd_mss;
+ else
+ return 4 * tc->snd_mss;
+}
+
+always_inline u32
+tcp_loss_wnd (const tcp_connection_t * tc)
+{
+ return tc->snd_mss;
+}
+
+always_inline u32
+tcp_available_snd_wnd (const tcp_connection_t * tc)
+{
+ return clib_min (tc->cwnd, tc->snd_wnd);
+}
+
+always_inline u32
+tcp_available_output_snd_space (const tcp_connection_t * tc)
+{
+ u32 available_wnd = tcp_available_snd_wnd (tc);
+ int flight_size = (int) (tc->snd_nxt - tc->snd_una);
+
+ if (available_wnd <= flight_size)
+ return 0;
+
+ return available_wnd - flight_size;
+}
+
+/**
+ * Estimate of how many bytes we can still push into the network
+ */
+always_inline u32
+tcp_available_snd_space (const tcp_connection_t * tc)
+{
+ u32 available_wnd = tcp_available_snd_wnd (tc);
+ u32 flight_size = tcp_flight_size (tc);
+
+ if (available_wnd <= flight_size)
+ return 0;
+
+ return available_wnd - flight_size;
+}
+
+always_inline u8
+tcp_is_lost_fin (tcp_connection_t * tc)
+{
+ if ((tc->flags & TCP_CONN_FINSNT) && tc->snd_una_max - tc->snd_una == 1)
+ return 1;
+ return 0;
+}
+
+i32 tcp_rcv_wnd_available (tcp_connection_t * tc);
+u32 tcp_snd_space (tcp_connection_t * tc);
+void tcp_update_rcv_wnd (tcp_connection_t * tc);
+
+void tcp_retransmit_first_unacked (tcp_connection_t * tc);
+void tcp_fast_retransmit_no_sack (tcp_connection_t * tc);
+void tcp_fast_retransmit_sack (tcp_connection_t * tc);
+void tcp_fast_retransmit (tcp_connection_t * tc);
+void tcp_cc_init_congestion (tcp_connection_t * tc);
+int tcp_cc_recover (tcp_connection_t * tc);
+void tcp_cc_fastrecovery_exit (tcp_connection_t * tc);
+
+fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
+
+/* Made public for unit testing only */
+void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end);
+
+always_inline u32
+tcp_time_now (void)
+{
+ return tcp_main.time_now[vlib_get_thread_index ()];
+}
+
+always_inline u32
+tcp_set_time_now (u32 thread_index)
+{
+ tcp_main.time_now[thread_index] = clib_cpu_time_now ()
+ * tcp_main.tstamp_ticks_per_clock;
+ return tcp_main.time_now[thread_index];
+}
+
+always_inline void
+tcp_update_time (f64 now, u32 thread_index)
+{
+ tcp_set_time_now (thread_index);
+ tw_timer_expire_timers_16t_2w_512sl (&tcp_main.timer_wheels[thread_index],
+ now);
+ tcp_flush_frames_to_output (thread_index);
+}
+
+u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
+
+u32
+tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset,
+ u32 max_bytes, vlib_buffer_t ** b);
+
+void tcp_connection_timers_init (tcp_connection_t * tc);
+void tcp_connection_timers_reset (tcp_connection_t * tc);
+void tcp_init_snd_vars (tcp_connection_t * tc);
+void tcp_connection_init_vars (tcp_connection_t * tc);
+
+always_inline void
+tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ /* Reset flags, make sure ack is sent */
+ tc->flags = TCP_CONN_SNDACK;
+ vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
+}
+
+always_inline void
+tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval)
+{
+ ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID);
+ tc->timers[timer_id]
+ = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+ tc->c_c_index, timer_id, interval);
+}
+
+always_inline void
+tcp_timer_reset (tcp_connection_t * tc, u8 timer_id)
+{
+ ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
+ return;
+
+ tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+ tc->timers[timer_id]);
+ tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
+}
+
+always_inline void
+tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval)
+{
+ ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
+ tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+ tc->timers[timer_id]);
+ tc->timers[timer_id] =
+ tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+ tc->c_c_index, timer_id, interval);
+}
+
+always_inline void
+tcp_retransmit_timer_set (tcp_connection_t * tc)
+{
+ ASSERT (tc->snd_una != tc->snd_una_max);
+ tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline void
+tcp_retransmit_timer_reset (tcp_connection_t * tc)
+{
+ tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT);
+}
+
+always_inline void
+tcp_retransmit_timer_force_update (tcp_connection_t * tc)
+{
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline void
+tcp_persist_timer_set (tcp_connection_t * tc)
+{
+ /* Reuse RTO. It's backed off in handler */
+ tcp_timer_set (tc, TCP_TIMER_PERSIST,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK,
+ TCP_TIMER_PERSIST_MIN));
+}
+
+always_inline void
+tcp_persist_timer_update (tcp_connection_t * tc)
+{
+ tcp_timer_update (tc, TCP_TIMER_PERSIST,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK,
+ TCP_TIMER_PERSIST_MIN));
+}
+
+always_inline void
+tcp_persist_timer_reset (tcp_connection_t * tc)
+{
+ tcp_timer_reset (tc, TCP_TIMER_PERSIST);
+}
+
+always_inline void
+tcp_retransmit_timer_update (tcp_connection_t * tc)
+{
+ if (tc->snd_una == tc->snd_una_max)
+ {
+ tcp_retransmit_timer_reset (tc);
+ if (tc->snd_wnd < tc->snd_mss)
+ tcp_persist_timer_update (tc);
+ }
+ else
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline u8
+tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
+{
+ return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID;
+}
+
+#define tcp_validate_txf_size(_tc, _a) \
+ ASSERT(_tc->state != TCP_STATE_ESTABLISHED \
+ || stream_session_tx_fifo_max_dequeue (&_tc->connection) >= _a)
+
+void
+scoreboard_remove_hole (sack_scoreboard_t * sb,
+ sack_scoreboard_hole_t * hole);
+void scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb);
+sack_scoreboard_hole_t *scoreboard_insert_hole (sack_scoreboard_t * sb,
+ u32 prev_index, u32 start,
+ u32 end);
+sack_scoreboard_hole_t *scoreboard_next_rxt_hole (sack_scoreboard_t * sb,
+ sack_scoreboard_hole_t *
+ start, u8 have_sent_1_smss,
+ u8 * can_rescue,
+ u8 * snd_limited);
+void scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 seq);
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_get_hole (sack_scoreboard_t * sb, u32 index)
+{
+ if (index != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, index);
+ return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+ if (hole->next != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, hole->next);
+ return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_prev_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+ if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, hole->prev);
+ return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_first_hole (sack_scoreboard_t * sb)
+{
+ if (sb->head != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, sb->head);
+ return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_last_hole (sack_scoreboard_t * sb)
+{
+ if (sb->tail != TCP_INVALID_SACK_HOLE_INDEX)
+ return pool_elt_at_index (sb->holes, sb->tail);
+ return 0;
+}
+
+always_inline void
+scoreboard_clear (sack_scoreboard_t * sb)
+{
+ sack_scoreboard_hole_t *hole;
+ while ((hole = scoreboard_first_hole (sb)))
+ {
+ scoreboard_remove_hole (sb, hole);
+ }
+ ASSERT (sb->head == sb->tail && sb->head == TCP_INVALID_SACK_HOLE_INDEX);
+ ASSERT (pool_elts (sb->holes) == 0);
+ sb->sacked_bytes = 0;
+ sb->last_sacked_bytes = 0;
+ sb->last_bytes_delivered = 0;
+ sb->snd_una_adv = 0;
+ sb->high_sacked = 0;
+ sb->high_rxt = 0;
+ sb->lost_bytes = 0;
+ sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+}
+
+always_inline u32
+scoreboard_hole_bytes (sack_scoreboard_hole_t * hole)
+{
+ return hole->end - hole->start;
+}
+
+always_inline u32
+scoreboard_hole_index (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+ ASSERT (!pool_is_free_index (sb->holes, hole - sb->holes));
+ return hole - sb->holes;
+}
+
+always_inline void
+scoreboard_init (sack_scoreboard_t * sb)
+{
+ sb->head = TCP_INVALID_SACK_HOLE_INDEX;
+ sb->tail = TCP_INVALID_SACK_HOLE_INDEX;
+ sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+}
+
+void tcp_rcv_sacks (tcp_connection_t * tc, u32 ack);
+
+always_inline void
+tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
+ const tcp_cc_algorithm_t * vft)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vec_validate (tm->cc_algos, type);
+
+ tm->cc_algos[type] = *vft;
+}
+
+always_inline tcp_cc_algorithm_t *
+tcp_cc_algo_get (tcp_cc_algorithm_type_e type)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ return &tm->cc_algos[type];
+}
+
+void tcp_cc_init (tcp_connection_t * tc);
+
+/**
+ * Push TCP header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param seq - sequence number net order
+ * @param ack - ack number net order
+ * @param tcp_hdr_opts_len - header and options length in bytes
+ * @param flags - header flags
+ * @param wnd - window size
+ *
+ * @return - pointer to start of TCP header
+ */
+always_inline void *
+vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq,
+ u32 ack, u8 tcp_hdr_opts_len, u8 flags,
+ u16 wnd)
+{
+ tcp_header_t *th;
+
+ th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len);
+
+ th->src_port = sp;
+ th->dst_port = dp;
+ th->seq_number = seq;
+ th->ack_number = ack;
+ th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4;
+ th->flags = flags;
+ th->window = wnd;
+ th->checksum = 0;
+ th->urgent_pointer = 0;
+ return th;
+}
+
+/**
+ * Push TCP header to buffer
+ *
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param seq - sequence number host order
+ * @param ack - ack number host order
+ * @param tcp_hdr_opts_len - header and options length in bytes
+ * @param flags - header flags
+ * @param wnd - window size
+ *
+ * @return - pointer to start of TCP header
+ */
+always_inline void *
+vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq,
+ u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
+{
+ return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net,
+ clib_host_to_net_u32 (seq),
+ clib_host_to_net_u32 (ack),
+ tcp_hdr_opts_len, flags,
+ clib_host_to_net_u16 (wnd));
+}
+
+#endif /* _vnet_tcp_h_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_api.c b/src/vnet/tcp/tcp_api.c
new file mode 100644
index 00000000..4c3e49ee
--- /dev/null
+++ b/src/vnet/tcp/tcp_api.c
@@ -0,0 +1,119 @@
+/*
+ *------------------------------------------------------------------
+ * tcp_api.c - vnet tcp-layer apis
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/tcp/tcp.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_tcp_api_msg \
+_(TCP_CONFIGURE_SRC_ADDRESSES, tcp_configure_src_addresses)
+
+static void
+ vl_api_tcp_configure_src_addresses_t_handler
+ (vl_api_tcp_configure_src_addresses_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_tcp_configure_src_addresses_reply_t *rmp;
+ u32 vrf_id;
+ int rv;
+
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+
+ if (mp->is_ipv6)
+ rv = tcp_configure_v6_source_address_range
+ (vm,
+ (ip6_address_t *) mp->first_address,
+ (ip6_address_t *) mp->last_address, vrf_id);
+ else
+ rv = tcp_configure_v4_source_address_range
+ (vm,
+ (ip4_address_t *) mp->first_address,
+ (ip4_address_t *) mp->last_address, vrf_id);
+
+ REPLY_MACRO (VL_API_TCP_CONFIGURE_SRC_ADDRESSES_REPLY);
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/tcp/tcp.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_tcp;
+#undef _
+}
+
+static clib_error_t *
+tcp_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_tcp_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (tcp_api_hookup);
+
+void
+tcp_api_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
new file mode 100755
index 00000000..eb318cde
--- /dev/null
+++ b/src/vnet/tcp/tcp_debug.h
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_TCP_TCP_DEBUG_H_
+#define SRC_VNET_TCP_TCP_DEBUG_H_
+
+#include <vlib/vlib.h>
+
+#define TCP_DEBUG (1)
+#define TCP_DEBUG_SM (0)
+#define TCP_DEBUG_CC (0)
+#define TCP_DEBUG_CC_STAT (1)
+
+#define foreach_tcp_dbg_evt \
+ _(INIT, "") \
+ _(DEALLOC, "") \
+ _(OPEN, "open") \
+ _(CLOSE, "close") \
+ _(BIND, "bind") \
+ _(UNBIND, "unbind") \
+ _(DELETE, "delete") \
+ _(SYN_SENT, "SYN sent") \
+ _(SYNACK_SENT, "SYNACK sent") \
+ _(SYNACK_RCVD, "SYNACK rcvd") \
+ _(SYN_RXT, "SYN retransmit") \
+ _(FIN_SENT, "FIN sent") \
+ _(ACK_SENT, "ACK sent") \
+ _(DUPACK_SENT, "DUPACK sent") \
+ _(RST_SENT, "RST sent") \
+ _(SYN_RCVD, "SYN rcvd") \
+ _(ACK_RCVD, "ACK rcvd") \
+ _(DUPACK_RCVD, "DUPACK rcvd") \
+ _(FIN_RCVD, "FIN rcvd") \
+ _(RST_RCVD, "RST rcvd") \
+ _(STATE_CHANGE, "state change") \
+ _(PKTIZE, "packetize") \
+ _(INPUT, "in") \
+ _(SND_WND, "snd_wnd update") \
+ _(OUTPUT, "output") \
+ _(TIMER_POP, "timer pop") \
+ _(CC_RTX, "retransmit") \
+ _(CC_EVT, "cc event") \
+ _(CC_PACK, "cc partial ack") \
+ _(CC_STAT, "cc stats") \
+ _(CC_RTO_STAT, "cc rto stats") \
+ _(SEG_INVALID, "invalid segment") \
+ _(PAWS_FAIL, "failed paws check") \
+ _(ACK_RCV_ERR, "invalid ack") \
+ _(RCV_WND_SHRUNK, "shrunk rcv_wnd") \
+
+typedef enum _tcp_dbg
+{
+#define _(sym, str) TCP_DBG_##sym,
+ foreach_tcp_dbg_evt
+#undef _
+} tcp_dbg_e;
+
+typedef enum _tcp_dbg_evt
+{
+#define _(sym, str) TCP_EVT_##sym,
+ foreach_tcp_dbg_evt
+#undef _
+} tcp_dbg_evt_e;
+
+#if TCP_DEBUG
+
+#define TRANSPORT_DEBUG (1)
+
+/*
+ * Infra and evt track setup
+ */
+
+#define TCP_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+
+#define DECLARE_ETD(_tc, _e, _size) \
+ struct \
+ { \
+ u32 data[_size]; \
+ } * ed; \
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, \
+ _e, _tc->c_elog_track)
+
+#define TCP_DBG_IP_TAG_LCL(_tc) \
+{ \
+ if (_tc->c_is_ip4) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "lcl: %d.%d.%d.%d:%d", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->c_lcl_ip.ip4.as_u8[0]; \
+ ed->data[1] = _tc->c_lcl_ip.ip4.as_u8[1]; \
+ ed->data[2] = _tc->c_lcl_ip.ip4.as_u8[2]; \
+ ed->data[3] = _tc->c_lcl_ip.ip4.as_u8[3]; \
+ ed->data[4] = clib_net_to_host_u16(_tc->c_lcl_port); \
+ } \
+}
+
+#define TCP_DBG_IP_TAG_RMT(_tc) \
+{ \
+ if (_tc->c_is_ip4) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rmt: %d.%d.%d.%d:%d", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->c_rmt_ip.ip4.as_u8[0]; \
+ ed->data[1] = _tc->c_rmt_ip.ip4.as_u8[1]; \
+ ed->data[2] = _tc->c_rmt_ip.ip4.as_u8[2]; \
+ ed->data[3] = _tc->c_rmt_ip.ip4.as_u8[3]; \
+ ed->data[4] = clib_net_to_host_u16(_tc->c_rmt_port); \
+ } \
+}
+
+#define TCP_EVT_INIT_HANDLER(_tc, _is_l, ...) \
+{ \
+ char *_fmt = _is_l ? "l[%d].%d:%d%c" : "[%d].%d:%d->.%d:%d%c"; \
+ if (_tc->c_is_ip4) \
+ { \
+ _tc->c_elog_track.name = \
+ (char *) format (0, _fmt, _tc->c_thread_index, \
+ _tc->c_lcl_ip.ip4.as_u8[3], \
+ clib_net_to_host_u16(_tc->c_lcl_port), \
+ _tc->c_rmt_ip.ip4.as_u8[3], \
+ clib_net_to_host_u16(_tc->c_rmt_port), 0); \
+ } \
+ else \
+ _tc->c_elog_track.name = \
+ (char *) format (0, _fmt, _tc->c_thread_index, \
+ _tc->c_lcl_ip.ip6.as_u8[15], \
+ clib_net_to_host_u16(_tc->c_lcl_port), \
+ _tc->c_rmt_ip.ip6.as_u8[15], \
+ clib_net_to_host_u16(_tc->c_rmt_port), 0); \
+ elog_track_register (&vlib_global_main.elog_main, &_tc->c_elog_track);\
+ TCP_DBG_IP_TAG_LCL(_tc); \
+ TCP_DBG_IP_TAG_RMT(_tc); \
+}
+
+#define TCP_EVT_DEALLOC_HANDLER(_tc, ...) \
+{ \
+ vec_free (_tc->c_elog_track.name); \
+}
+
+#define TCP_EVT_OPEN_HANDLER(_tc, ...) \
+{ \
+ TCP_EVT_INIT_HANDLER(_tc, 0); \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "open: index %d", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->c_c_index; \
+}
+
+#define TCP_EVT_CLOSE_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "close: %d", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->c_c_index; \
+}
+
+#define TCP_EVT_BIND_HANDLER(_tc, ...) \
+{ \
+ TCP_EVT_INIT_HANDLER(_tc, 1); \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "bind: listener %d", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->c_c_index; \
+}
+
+#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...) \
+{ \
+ if (_init) \
+ TCP_EVT_INIT_HANDLER(_tc, 0); \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "syn-rx: irs %u", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
+}
+
+#define TCP_EVT_UNBIND_HANDLER(_tc, ...) \
+{ \
+ TCP_EVT_DEALLOC_HANDLER(_tc); \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "unbind: listener %d", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->c_c_index; \
+ TCP_EVT_DEALLOC_HANDLER(_tc); \
+}
+
+#define TCP_EVT_DELETE_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "delete: %d", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->c_c_index; \
+ TCP_EVT_DEALLOC_HANDLER(_tc); \
+}
+
+#define CONCAT_HELPER(_a, _b) _a##_b
+#define CC(_a, _b) CONCAT_HELPER(_a, _b)
+#define TCP_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
+#else
+#define TCP_EVT_DBG(_evt, _args...)
+#define TCP_DBG(_fmt, _args...)
+#endif
+
+/*
+ * State machine
+ */
+#if TCP_DEBUG_SM
+
+#define TCP_EVT_STATE_CHANGE_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "state: %s", \
+ .format_args = "t4", \
+ .n_enum_strings = 11, \
+ .enum_strings = { \
+ "closed", \
+ "listen", \
+ "syn-sent", \
+ "syn-rcvd", \
+ "established", \
+ "close_wait", \
+ "fin-wait-1", \
+ "last-ack", \
+ "closing", \
+ "fin-wait-2", \
+ "time-wait", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->state; \
+}
+
+#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "syn-tx: iss %u snd_una %u snd_una_max %u snd_nxt %u", \
+ .format_args = "i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 4); \
+ ed->data[0] = _tc->iss; \
+ ed->data[1] = _tc->snd_una - _tc->iss; \
+ ed->data[2] = _tc->snd_una_max - _tc->iss; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
+}
+
+#define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "synack-tx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->iss; \
+ ed->data[1] = _tc->irs; \
+ ed->data[2] = _tc->snd_una - _tc->iss; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->rcv_nxt - _tc->irs; \
+}
+
+#define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "synack-rx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->iss; \
+ ed->data[1] = _tc->irs; \
+ ed->data[2] = _tc->snd_una - _tc->iss; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->rcv_nxt - _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
+}
+
+#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "fin-tx: snd_nxt %d rcv_nxt %d", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+}
+
+#define TCP_EVT_RST_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rst-tx: snd_nxt %d rcv_nxt %d", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \
+}
+
+#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "fin-rx: snd_nxt %d rcv_nxt %d", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+}
+
+#define TCP_EVT_RST_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rst-rx: snd_nxt %d rcv_nxt %d", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+}
+
+#define TCP_EVT_SYN_RXT_HANDLER(_tc, _type, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "%s-rxt: iss %u irs %u snd_nxt %u rcv_nxt %u", \
+ .format_args = "t4i4i4i4i4", \
+ .n_enum_strings = 2, \
+ .enum_strings = { \
+ "syn", \
+ "syn-ack", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _type; \
+ ed->data[1] = _tc->iss; \
+ ed->data[2] = _tc->irs; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->rcv_nxt - _tc->irs; \
+}
+
+#else
+#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYNACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_SYN_RXT_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_RST_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_RST_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_STATE_CHANGE_HANDLER(_tc, ...)
+#endif
+
+#if TCP_DEBUG_SM > 1
+
+#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "ack-tx: acked %u rcv_nxt %u rcv_wnd %u snd_nxt %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_nxt - _tc->rcv_las; \
+ ed->data[1] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[2] = _tc->rcv_wnd; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "ack-rx: %u snd_una %u snd_wnd %u cwnd %u inflight %u", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->bytes_acked; \
+ ed->data[1] = _tc->snd_una - _tc->iss; \
+ ed->data[2] = _tc->snd_wnd; \
+ ed->data[3] = _tc->cwnd; \
+ ed->data[4] = tcp_flight_size(_tc); \
+}
+
+#define TCP_EVT_PKTIZE_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "tx: una %u snd_nxt %u space %u flight %u rcv_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->snd_una - _tc->iss; \
+ ed->data[1] = _tc->snd_nxt - _tc->iss; \
+ ed->data[2] = tcp_available_output_snd_space (_tc); \
+ ed->data[3] = tcp_flight_size (_tc); \
+ ed->data[4] = _tc->rcv_wnd; \
+}
+
+#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "in: %s len %u written %d rcv_nxt %u rcv_wnd(o) %d", \
+ .format_args = "t4i4i4i4i4", \
+ .n_enum_strings = 2, \
+ .enum_strings = { \
+ "order", \
+ "ooo", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _type; \
+ ed->data[1] = _len; \
+ ed->data[2] = _written; \
+ ed->data[3] = (_tc->rcv_nxt - _tc->irs) + _written; \
+ ed->data[4] = _tc->rcv_wnd - (_tc->rcv_nxt - _tc->rcv_las); \
+}
+
+#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...) \
+{ \
+ tcp_connection_t *_tc; \
+ if (_timer_id == TCP_TIMER_RETRANSMIT_SYN \
+ || _timer_id == TCP_TIMER_ESTABLISH) \
+ { \
+ _tc = tcp_half_open_connection_get (_tc_index); \
+ } \
+ else \
+ { \
+ u32 _thread_index = vlib_get_thread_index (); \
+ _tc = tcp_connection_get (_tc_index, _thread_index); \
+ } \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "timer-pop: %s (%d)", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 7, \
+ .enum_strings = { \
+ "retransmit", \
+ "delack", \
+ "persist", \
+ "keep", \
+ "waitclose", \
+ "retransmit syn", \
+ "establish", \
+ }, \
+ }; \
+ if (_tc) \
+ { \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _timer_id; \
+ ed->data[1] = _timer_id; \
+ } \
+ else \
+ { \
+ clib_warning ("pop %d for unexisting connection %d", _timer_id, \
+ _tc_index); \
+ } \
+}
+
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "seg-inv: seq %u end %u rcv_las %u rcv_nxt %u rcv_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _seq - _tc->irs; \
+ ed->data[1] = _end - _tc->irs; \
+ ed->data[2] = _tc->rcv_las - _tc->irs; \
+ ed->data[3] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[4] = _tc->rcv_wnd; \
+}
+
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "paws-err: seq %u end %u tsval %u tsval_recent %u", \
+ .format_args = "i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 4); \
+ ed->data[0] = _seq - _tc->irs; \
+ ed->data[1] = _end - _tc->irs; \
+ ed->data[2] = _tc->rcv_opts.tsval; \
+ ed->data[3] = _tc->tsval_recent; \
+}
+
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "ack-err: %s ack %u snd_una %u snd_nxt %u una_max %u", \
+ .format_args = "t4i4i4i4i4", \
+ .n_enum_strings = 3, \
+ .enum_strings = { \
+ "invalid", \
+ "old", \
+ "future", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _type; \
+ ed->data[1] = _ack - _tc->iss; \
+ ed->data[2] = _tc->snd_una - _tc->iss; \
+ ed->data[3] = _tc->snd_nxt - _tc->iss; \
+ ed->data[4] = _tc->snd_una_max - _tc->iss; \
+}
+
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...) \
+{ \
+if (_av > 0) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_wnd; \
+ ed->data[1] = _obs; \
+ ed->data[2] = _av; \
+ ed->data[3] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[4] = _tc->rcv_las - _tc->irs; \
+} \
+}
+#else
+#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
+#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
+#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
+#endif
+
+/*
+ * State machine verbose
+ */
+#if TCP_DEBUG_SM > 2
+#define TCP_EVT_SND_WND_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "snd-wnd update: %u ", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "out: flags %x, bytes %u", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = flags; \
+ ed->data[1] = n_bytes; \
+}
+#else
+#define TCP_EVT_SND_WND_HANDLER(_tc, ...)
+#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...)
+#endif
+
+/*
+ * Congestion Control
+ */
+
+#if TCP_DEBUG_CC
+
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
+ .format_args = "t4i4i4i4", \
+ .n_enum_strings = 6, \
+ .enum_strings = { \
+ "fast-rxt", \
+ "rxt-timeout", \
+ "first-rxt", \
+ "recovered", \
+ "congestion", \
+ "undo", \
+ }, \
+ }; \
+ DECLARE_ETD(_tc, _e, 4); \
+ ed->data[0] = _sub_evt; \
+ ed->data[1] = tcp_available_snd_space (_tc); \
+ ed->data[2] = _tc->snd_congestion - _tc->iss; \
+ ed->data[3] = _tc->snd_rxt_bytes; \
+}
+
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rxt: snd_nxt %u offset %u snd %u rxt %u", \
+ .format_args = "i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 4); \
+ ed->data[0] = _tc->snd_nxt - _tc->iss; \
+ ed->data[1] = offset; \
+ ed->data[2] = n_bytes; \
+ ed->data[3] = _tc->snd_rxt_bytes; \
+}
+
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[1] = _tc->rcv_wnd; \
+ ed->data[2] = _tc->snd_nxt - _tc->iss; \
+ ed->data[3] = tcp_available_snd_wnd(_tc); \
+ ed->data[4] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->snd_una - _tc->iss; \
+ ed->data[1] = _tc->cwnd; \
+ ed->data[2] = _tc->snd_wnd; \
+ ed->data[3] = tcp_flight_size(_tc); \
+ ed->data[4] = _tc->rcv_wnd; \
+}
+
+#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "pack: snd_una %u snd_una_max %u", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = _tc->snd_una - _tc->iss; \
+ ed->data[1] = _tc->snd_una_max - _tc->iss; \
+}
+#else
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#endif
+
+/*
+ * Congestion control stats
+ */
+#if TCP_DEBUG_CC_STAT
+
+#define STATS_INTERVAL 1
+
+#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
+{ \
+if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "rto_stat: rto %u srtt %u rttvar %u ", \
+ .format_args = "i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 3); \
+ ed->data[0] = _tc->rto; \
+ ed->data[1] = _tc->srtt; \
+ ed->data[2] = _tc->rttvar; \
+} \
+}
+
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
+{ \
+if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "cc_stat: cwnd %u flight %u space %u ssthresh %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->cwnd; \
+ ed->data[1] = tcp_flight_size (_tc); \
+ ed->data[2] = tcp_snd_space (_tc); \
+ ed->data[3] = _tc->ssthresh; \
+ ed->data[4] = _tc->snd_wnd; \
+ TCP_EVT_CC_RTO_STAT_HANDLER (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now(); \
+} \
+}
+
+#else
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
+#endif
+
+#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
new file mode 100644
index 00000000..a179717f
--- /dev/null
+++ b/src/vnet/tcp/tcp_error.def
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+tcp_error (NONE, "no error")
+tcp_error (LENGTH, "inconsistent ip/tcp lengths")
+tcp_error (NO_LISTENER, "no listener for dst port")
+tcp_error (LOOKUP_DROPS, "lookup drops")
+tcp_error (DISPATCH, "Dispatch error")
+tcp_error (ENQUEUED, "Packets pushed into rx fifo")
+tcp_error (PARTIALLY_ENQUEUED, "Packets partially pushed into rx fifo")
+tcp_error (PURE_ACK, "Pure acks")
+tcp_error (SYNS_RCVD, "SYNs received")
+tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received")
+tcp_error (NOT_READY, "Session not ready for packets")
+tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")
+tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")
+tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space")
+tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
+tcp_error (SEGMENT_INVALID, "Invalid segments")
+tcp_error (SEGMENT_OLD, "Old segment")
+tcp_error (ACK_INVALID, "Invalid ACK")
+tcp_error (ACK_DUP, "Duplicate ACK")
+tcp_error (ACK_OLD, "Old ACK")
+tcp_error (ACK_FUTURE, "Future ACK")
+tcp_error (PKTS_SENT, "Packets sent")
+tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs")
+tcp_error (RST_SENT, "Resets sent")
+tcp_error (INVALID_CONNECTION, "Invalid connection")
+tcp_error (NO_WND, "No window")
+tcp_error (CONNECTION_CLOSED, "Connection closed")
+tcp_error (CREATE_EXISTS, "Connection already exists")
+tcp_error (PUNT, "Packets punted") \ No newline at end of file
diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c
new file mode 100644
index 00000000..1ca2f58e
--- /dev/null
+++ b/src/vnet/tcp/tcp_format.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * tcp/tcp_format.c: tcp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/tcp/tcp.h>
+
+u8 *
+format_tcp_flags (u8 * s, va_list * args)
+{
+ int flags = va_arg (*args, int);
+
+ s = format (s, "0x%02x", flags);
+#define _(f) if (flags & TCP_FLAG_##f) s = format (s, " %s", #f);
+ foreach_tcp_flag
+#undef _
+ return s;
+}
+
+/* Format TCP header. */
+u8 *
+format_tcp_header (u8 * s, va_list * args)
+{
+ tcp_header_t *tcp = va_arg (*args, tcp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (tcp[0]))
+ return format (s, "TCP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+ header_bytes = tcp_header_bytes (tcp);
+
+ s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src),
+ clib_net_to_host_u16 (tcp->dst));
+
+ s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent,
+ clib_net_to_host_u32 (tcp->seq_number),
+ clib_net_to_host_u32 (tcp->ack_number));
+
+ s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space,
+ indent, format_tcp_flags, tcp->flags, header_bytes);
+
+ s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent,
+ clib_net_to_host_u16 (tcp->window),
+ clib_net_to_host_u16 (tcp->checksum));
+
+
+#if 0
+ /* Format TCP options. */
+ {
+ u8 *o;
+ u8 *option_start = (void *) (tcp + 1);
+ u8 *option_end = (void *) tcp + header_bytes;
+
+ for (o = option_start; o < option_end;)
+ {
+ u32 length = o[1];
+ switch (o[0])
+ {
+ case TCP_OPTION_END:
+ length = 1;
+ o = option_end;
+ break;
+
+ case TCP_OPTION_NOOP:
+ length = 1;
+ break;
+
+ }
+ }
+ }
+#endif
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, tcp->dst);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U", format_white_space, indent - 2,
+ pi->format_header,
+ /* next protocol header */ (void *) tcp + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
new file mode 100644
index 00000000..63d6fd87
--- /dev/null
+++ b/src/vnet/tcp/tcp_input.c
@@ -0,0 +1,3215 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/sparse_vec.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/session/session.h>
+#include <math.h>
+
+static char *tcp_error_strings[] = {
+#define tcp_error(n,s) s,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+};
+
+/* All TCP nodes have the same outgoing arcs */
+#define foreach_tcp_state_next \
+ _ (DROP, "error-drop") \
+ _ (TCP4_OUTPUT, "tcp4-output") \
+ _ (TCP6_OUTPUT, "tcp6-output")
+
+typedef enum _tcp_established_next
+{
+#define _(s,n) TCP_ESTABLISHED_NEXT_##s,
+ foreach_tcp_state_next
+#undef _
+ TCP_ESTABLISHED_N_NEXT,
+} tcp_established_next_t;
+
+typedef enum _tcp_rcv_process_next
+{
+#define _(s,n) TCP_RCV_PROCESS_NEXT_##s,
+ foreach_tcp_state_next
+#undef _
+ TCP_RCV_PROCESS_N_NEXT,
+} tcp_rcv_process_next_t;
+
+typedef enum _tcp_syn_sent_next
+{
+#define _(s,n) TCP_SYN_SENT_NEXT_##s,
+ foreach_tcp_state_next
+#undef _
+ TCP_SYN_SENT_N_NEXT,
+} tcp_syn_sent_next_t;
+
+typedef enum _tcp_listen_next
+{
+#define _(s,n) TCP_LISTEN_NEXT_##s,
+ foreach_tcp_state_next
+#undef _
+ TCP_LISTEN_N_NEXT,
+} tcp_listen_next_t;
+
+/* Generic, state independent indices */
+typedef enum _tcp_state_next
+{
+#define _(s,n) TCP_NEXT_##s,
+ foreach_tcp_state_next
+#undef _
+ TCP_STATE_N_NEXT,
+} tcp_state_next_t;
+
+#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \
+ : TCP_NEXT_TCP6_OUTPUT)
+
+vlib_node_registration_t tcp4_established_node;
+vlib_node_registration_t tcp6_established_node;
+
+/**
+ * Validate segment sequence number. As per RFC793:
+ *
+ * Segment Receive Test
+ * Length Window
+ * ------- ------- -------------------------------------------
+ * 0 0 SEG.SEQ = RCV.NXT
+ * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * >0 0 not acceptable
+ * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+ *
+ * This ultimately consists in checking if segment falls within the window.
+ * The one important difference compared to RFC793 is that we use rcv_las,
+ * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the
+ * peer's reference when computing our receive window.
+ *
+ * This:
+ * seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd) && seq_geq (seq, tc->rcv_las)
+ * however, is too strict when we have retransmits. Instead we just check that
+ * the seq is not beyond the right edge and that the end of the segment is not
+ * less than the left edge.
+ *
+ * N.B. rcv_nxt and rcv_wnd are both updated in this node if acks are sent, so
+ * use rcv_nxt in the right edge window test instead of rcv_las.
+ *
+ */
+always_inline u8
+tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq)
+{
+ return (seq_geq (end_seq, tc->rcv_las)
+ && seq_leq (seq, tc->rcv_nxt + tc->rcv_wnd));
+}
+
+/**
+ * Parse TCP header options.
+ *
+ * @param th TCP header
+ * @param to TCP options data structure to be populated
+ * @return -1 if parsing failed
+ */
+int
+tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
+{
+ const u8 *data;
+ u8 opt_len, opts_len, kind;
+ int j;
+ sack_block_t b;
+
+ opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
+ data = (const u8 *) (th + 1);
+
+ /* Zero out all flags but those set in SYN */
+ to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE);
+
+ for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
+ {
+ kind = data[0];
+
+ /* Get options length */
+ if (kind == TCP_OPTION_EOL)
+ break;
+ else if (kind == TCP_OPTION_NOOP)
+ {
+ opt_len = 1;
+ continue;
+ }
+ else
+ {
+ /* broken options */
+ if (opts_len < 2)
+ return -1;
+ opt_len = data[1];
+
+ /* weird option length */
+ if (opt_len < 2 || opt_len > opts_len)
+ return -1;
+ }
+
+ /* Parse options */
+ switch (kind)
+ {
+ case TCP_OPTION_MSS:
+ if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
+ {
+ to->flags |= TCP_OPTS_FLAG_MSS;
+ to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
+ }
+ break;
+ case TCP_OPTION_WINDOW_SCALE:
+ if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
+ {
+ to->flags |= TCP_OPTS_FLAG_WSCALE;
+ to->wscale = data[2];
+ if (to->wscale > TCP_MAX_WND_SCALE)
+ {
+ clib_warning ("Illegal window scaling value: %d",
+ to->wscale);
+ to->wscale = TCP_MAX_WND_SCALE;
+ }
+ }
+ break;
+ case TCP_OPTION_TIMESTAMP:
+ if (opt_len == TCP_OPTION_LEN_TIMESTAMP)
+ {
+ to->flags |= TCP_OPTS_FLAG_TSTAMP;
+ to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
+ to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
+ }
+ break;
+ case TCP_OPTION_SACK_PERMITTED:
+ if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
+ to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+ break;
+ case TCP_OPTION_SACK_BLOCK:
+ /* If SACK permitted was not advertised or a SYN, break */
+ if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
+ break;
+
+ /* If too short or not correctly formatted, break */
+ if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
+ break;
+
+ to->flags |= TCP_OPTS_FLAG_SACK;
+ to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
+ vec_reset_length (to->sacks);
+ for (j = 0; j < to->n_sack_blocks; j++)
+ {
+ b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
+ b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
+ vec_add1 (to->sacks, b);
+ }
+ break;
+ default:
+ /* Nothing to see here */
+ continue;
+ }
+ }
+ return 0;
+}
+
+/**
+ * RFC1323: Check against wrapped sequence numbers (PAWS). If we have
+ * timestamp to echo and it's less than tsval_recent, drop segment
+ * but still send an ACK in order to retain TCP's mechanism for detecting
+ * and recovering from half-open connections
+ *
+ * Or at least that's what the theory says. It seems that this might not work
+ * very well with packet reordering and fast retransmit. XXX
+ */
+always_inline int
+tcp_segment_check_paws (tcp_connection_t * tc)
+{
+ return tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
+ && timestamp_lt (tc->rcv_opts.tsval, tc->tsval_recent);
+}
+
+/**
+ * Update tsval recent
+ */
+always_inline void
+tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
+{
+ /*
+ * RFC1323: If Last.ACK.sent falls within the range of sequence numbers
+ * of an incoming segment:
+ * SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN
+ * then the TSval from the segment is copied to TS.Recent;
+ * otherwise, the TSval is ignored.
+ */
+ if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
+ && seq_leq (tc->rcv_las, seq_end))
+ {
+ ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
+ tc->tsval_recent = tc->rcv_opts.tsval;
+ tc->tsval_recent_age = tcp_time_now ();
+ }
+}
+
+/**
+ * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19
+ *
+ * It first verifies if segment has a wrapped sequence number (PAWS) and then
+ * does the processing associated to the first four steps (ignoring security
+ * and precedence): sequence number, rst bit and syn bit checks.
+ *
+ * @return 0 if segments passes validation.
+ */
+static int
+tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
+ vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0)
+{
+ if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
+ return -1;
+
+ if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts)))
+ {
+ clib_warning ("options parse error");
+ return -1;
+ }
+
+ if (tcp_segment_check_paws (tc0))
+ {
+ if (CLIB_DEBUG > 2)
+ {
+ clib_warning ("paws failed\n%U", format_tcp_connection, tc0, 2);
+ clib_warning ("seq %u seq_end %u ack %u",
+ vnet_buffer (b0)->tcp.seq_number - tc0->irs,
+ vnet_buffer (b0)->tcp.seq_end - tc0->irs,
+ vnet_buffer (b0)->tcp.ack_number - tc0->iss);
+ }
+ TCP_EVT_DBG (TCP_EVT_PAWS_FAIL, tc0, vnet_buffer (b0)->tcp.seq_number,
+ vnet_buffer (b0)->tcp.seq_end);
+
+ /* If it just so happens that a segment updates tsval_recent for a
+ * segment over 24 days old, invalidate tsval_recent. */
+ if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE,
+ tcp_time_now ()))
+ {
+ /* Age isn't reset until we get a valid tsval (bsd inspired) */
+ tc0->tsval_recent = 0;
+ clib_warning ("paws failed - really old segment. REALLY?");
+ }
+ else
+ {
+ /* Drop after ack if not rst */
+ if (!tcp_rst (th0))
+ {
+ tcp_make_ack (tc0, b0);
+ *next0 = tcp_next_output (tc0->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0);
+ return -1;
+ }
+ }
+ }
+
+ /* 1st: check sequence number */
+ if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number,
+ vnet_buffer (b0)->tcp.seq_end))
+ {
+ /* If our window is 0 and the packet is in sequence, let it pass
+ * through for ack processing. It should be dropped later.*/
+ if (tc0->rcv_wnd == 0
+ && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number)
+ {
+ /* TODO Should segment be tagged? */
+ }
+ else
+ {
+ /* If not RST, send dup ack */
+ if (!tcp_rst (th0))
+ {
+ tcp_make_ack (tc0, b0);
+ *next0 = tcp_next_output (tc0->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc0);
+ }
+ return -1;
+ }
+ }
+
+ /* 2nd: check the RST bit */
+ if (tcp_rst (th0))
+ {
+ tcp_connection_reset (tc0);
+ return -1;
+ }
+
+ /* 3rd: check security and precedence (skip) */
+
+ /* 4th: check the SYN bit */
+ if (tcp_syn (th0))
+ {
+ /* TODO implement RFC 5961 */
+ if (tc0->state == TCP_STATE_SYN_RCVD)
+ {
+ tcp_make_synack (tc0, b0);
+ TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0, 0);
+ }
+ else
+ {
+ tcp_make_ack (tc0, b0);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, tc0);
+ }
+ *next0 = tcp_next_output (tc0->c_is_ip4);
+ return -1;
+ }
+
+ /* If segment in window, save timestamp */
+ tcp_update_timestamp (tc0, vnet_buffer (b0)->tcp.seq_number,
+ vnet_buffer (b0)->tcp.seq_end);
+ return 0;
+}
+
+always_inline int
+tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0)
+{
+ /* SND.UNA =< SEG.ACK =< SND.NXT */
+ return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number)
+ && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_nxt));
+}
+
+/**
+ * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298
+ *
+ * Note that although the original article, srtt and rttvar are scaled
+ * to minimize round-off errors, here we don't. Instead, we rely on
+ * better precision time measurements.
+ *
+ * TODO support us rtt resolution
+ */
+static void
+tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
+{
+ int err, diff;
+
+ if (tc->srtt != 0)
+ {
+ err = mrtt - tc->srtt;
+
+ /* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
+ * The increase should be bound */
+ tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
+ diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
+ tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
+ }
+ else
+ {
+ /* First measurement. */
+ tc->srtt = mrtt;
+ tc->rttvar = mrtt >> 1;
+ }
+}
+
+void
+tcp_update_rto (tcp_connection_t * tc)
+{
+ tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+ tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
+}
+
+/**
+ * Update RTT estimate and RTO timer
+ *
+ * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
+ * timing. Middle boxes are known to fiddle with TCP options so we
+ * should give higher priority to ACK timing.
+ *
+ * This should be called only if previously sent bytes have been acked.
+ *
+ * return 1 if valid rtt 0 otherwise
+ */
+static int
+tcp_update_rtt (tcp_connection_t * tc, u32 ack)
+{
+ u32 mrtt = 0;
+
+ /* Karn's rule, part 1. Don't use retransmitted segments to estimate
+ * RTT because they're ambiguous. */
+ if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes)
+ goto done;
+
+ if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
+ {
+ mrtt = tcp_time_now () - tc->rtt_ts;
+ }
+ /* As per RFC7323 TSecr can be used for RTTM only if the segment advances
+ * snd_una, i.e., the left side of the send window:
+ * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
+ else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
+ {
+ mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
+ }
+
+ /* Ignore dubious measurements */
+ if (mrtt == 0 || mrtt > TCP_RTT_MAX)
+ goto done;
+
+ tcp_estimate_rtt (tc, mrtt);
+
+done:
+
+ /* Allow measuring of a new RTT */
+ tc->rtt_ts = 0;
+
+ /* If we got here something must've been ACKed so make sure boff is 0,
+ * even if mrrt is not valid since we update the rto lower */
+ tc->rto_boff = 0;
+ tcp_update_rto (tc);
+
+ return 0;
+}
+
+/**
+ * Dequeue bytes that have been acked and while at it update RTT estimates.
+ */
+static void
+tcp_dequeue_acked (tcp_connection_t * tc, u32 ack)
+{
+ /* Dequeue the newly ACKed add SACKed bytes */
+ stream_session_dequeue_drop (&tc->connection,
+ tc->bytes_acked + tc->sack_sb.snd_una_adv);
+
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
+
+ /* Update rtt and rto */
+ tcp_update_rtt (tc, ack);
+
+ /* If everything has been acked, stop retransmit timer
+ * otherwise update. */
+ tcp_retransmit_timer_update (tc);
+}
+
+/**
+ * Check if duplicate ack as per RFC5681 Sec. 2
+ */
+static u8
+tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 prev_snd_wnd,
+ u32 prev_snd_una)
+{
+ return ((vnet_buffer (b)->tcp.ack_number == prev_snd_una)
+ && seq_gt (tc->snd_una_max, tc->snd_una)
+ && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number)
+ && (prev_snd_wnd == tc->snd_wnd));
+}
+
+/**
+ * Checks if ack is a congestion control event.
+ */
+static u8
+tcp_ack_is_cc_event (tcp_connection_t * tc, vlib_buffer_t * b,
+ u32 prev_snd_wnd, u32 prev_snd_una, u8 * is_dack)
+{
+ /* Check if ack is duplicate. Per RFC 6675, ACKs that SACK new data are
+ * defined to be 'duplicate' */
+ *is_dack = tc->sack_sb.last_sacked_bytes
+ || tcp_ack_is_dupack (tc, b, prev_snd_wnd, prev_snd_una);
+
+ return ((*is_dack || tcp_in_cong_recovery (tc)) && !tcp_is_lost_fin (tc));
+}
+
+void
+scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+ sack_scoreboard_hole_t *next, *prev;
+
+ if (hole->next != TCP_INVALID_SACK_HOLE_INDEX)
+ {
+ next = pool_elt_at_index (sb->holes, hole->next);
+ next->prev = hole->prev;
+ }
+ else
+ {
+ sb->tail = hole->prev;
+ }
+
+ if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX)
+ {
+ prev = pool_elt_at_index (sb->holes, hole->prev);
+ prev->next = hole->next;
+ }
+ else
+ {
+ sb->head = hole->next;
+ }
+
+ if (scoreboard_hole_index (sb, hole) == sb->cur_rxt_hole)
+ sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+
+ /* Poison the entry */
+ if (CLIB_DEBUG > 0)
+ memset (hole, 0xfe, sizeof (*hole));
+
+ pool_put (sb->holes, hole);
+}
+
+sack_scoreboard_hole_t *
+scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index,
+ u32 start, u32 end)
+{
+ sack_scoreboard_hole_t *hole, *next, *prev;
+ u32 hole_index;
+
+ pool_get (sb->holes, hole);
+ memset (hole, 0, sizeof (*hole));
+
+ hole->start = start;
+ hole->end = end;
+ hole_index = scoreboard_hole_index (sb, hole);
+
+ prev = scoreboard_get_hole (sb, prev_index);
+ if (prev)
+ {
+ hole->prev = prev_index;
+ hole->next = prev->next;
+
+ if ((next = scoreboard_next_hole (sb, hole)))
+ next->prev = hole_index;
+ else
+ sb->tail = hole_index;
+
+ prev->next = hole_index;
+ }
+ else
+ {
+ sb->head = hole_index;
+ hole->prev = TCP_INVALID_SACK_HOLE_INDEX;
+ hole->next = TCP_INVALID_SACK_HOLE_INDEX;
+ }
+
+ return hole;
+}
+
+void
+scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb)
+{
+ sack_scoreboard_hole_t *hole, *prev;
+ u32 bytes = 0, blks = 0;
+
+ sb->lost_bytes = 0;
+ sb->sacked_bytes = 0;
+ hole = scoreboard_last_hole (sb);
+ if (!hole)
+ return;
+
+ if (seq_gt (sb->high_sacked, hole->end))
+ {
+ bytes = sb->high_sacked - hole->end;
+ blks = 1;
+ }
+
+ while ((prev = scoreboard_prev_hole (sb, hole))
+ && (bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss
+ && blks < TCP_DUPACK_THRESHOLD))
+ {
+ bytes += hole->start - prev->end;
+ blks++;
+ hole = prev;
+ }
+
+ while (hole)
+ {
+ sb->lost_bytes += scoreboard_hole_bytes (hole);
+ hole->is_lost = 1;
+ prev = hole;
+ hole = scoreboard_prev_hole (sb, hole);
+ if (hole)
+ bytes += prev->start - hole->end;
+ }
+ sb->sacked_bytes = bytes;
+}
+
+/**
+ * Figure out the next hole to retransmit
+ *
+ * Follows logic proposed in RFC6675 Sec. 4, NextSeg()
+ */
+sack_scoreboard_hole_t *
+scoreboard_next_rxt_hole (sack_scoreboard_t * sb,
+ sack_scoreboard_hole_t * start,
+ u8 have_sent_1_smss,
+ u8 * can_rescue, u8 * snd_limited)
+{
+ sack_scoreboard_hole_t *hole = 0;
+
+ hole = start ? start : scoreboard_first_hole (sb);
+ while (hole && seq_leq (hole->end, sb->high_rxt) && hole->is_lost)
+ hole = scoreboard_next_hole (sb, hole);
+
+ /* Nothing, return */
+ if (!hole)
+ {
+ sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+ return 0;
+ }
+
+ /* Rule (1): if higher than rxt, less than high_sacked and lost */
+ if (hole->is_lost && seq_lt (hole->start, sb->high_sacked))
+ {
+ sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
+ }
+ else
+ {
+ /* Rule (2): output takes care of transmitting new data */
+ if (!have_sent_1_smss)
+ {
+ hole = 0;
+ sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+ }
+ /* Rule (3): if hole not lost */
+ else if (seq_lt (hole->start, sb->high_sacked))
+ {
+ *snd_limited = 1;
+ sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
+ }
+ /* Rule (4): if hole beyond high_sacked */
+ else
+ {
+ ASSERT (seq_geq (hole->start, sb->high_sacked));
+ *snd_limited = 1;
+ *can_rescue = 1;
+ /* HighRxt MUST NOT be updated */
+ return 0;
+ }
+ }
+
+ if (hole && seq_lt (sb->high_rxt, hole->start))
+ sb->high_rxt = hole->start;
+
+ return hole;
+}
+
+void
+scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 seq)
+{
+ sack_scoreboard_hole_t *hole;
+ hole = scoreboard_first_hole (sb);
+ if (hole)
+ {
+ seq = seq_gt (seq, hole->start) ? seq : hole->start;
+ sb->cur_rxt_hole = sb->head;
+ }
+ sb->high_rxt = seq;
+}
+
+/**
+ * Test that scoreboard is sane after recovery
+ *
+ * Returns 1 if scoreboard is empty or if first hole beyond
+ * snd_una.
+ */
+u8
+tcp_scoreboard_is_sane_post_recovery (tcp_connection_t * tc)
+{
+ sack_scoreboard_hole_t *hole;
+ hole = scoreboard_first_hole (&tc->sack_sb);
+ return (!hole || seq_geq (hole->start, tc->snd_una));
+}
+
+void
+tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
+{
+ sack_scoreboard_t *sb = &tc->sack_sb;
+ sack_block_t *blk, tmp;
+ sack_scoreboard_hole_t *hole, *next_hole, *last_hole;
+ u32 blk_index = 0, old_sacked_bytes, hole_index;
+ int i, j;
+
+ sb->last_sacked_bytes = 0;
+ sb->snd_una_adv = 0;
+ old_sacked_bytes = sb->sacked_bytes;
+ sb->last_bytes_delivered = 0;
+
+ if (!tcp_opts_sack (&tc->rcv_opts)
+ && sb->head == TCP_INVALID_SACK_HOLE_INDEX)
+ return;
+
+ /* Remove invalid blocks */
+ blk = tc->rcv_opts.sacks;
+ while (blk < vec_end (tc->rcv_opts.sacks))
+ {
+ if (seq_lt (blk->start, blk->end)
+ && seq_gt (blk->start, tc->snd_una)
+ && seq_gt (blk->start, ack) && seq_leq (blk->end, tc->snd_una_max))
+ {
+ blk++;
+ continue;
+ }
+ vec_del1 (tc->rcv_opts.sacks, blk - tc->rcv_opts.sacks);
+ }
+
+ /* Add block for cumulative ack */
+ if (seq_gt (ack, tc->snd_una))
+ {
+ tmp.start = tc->snd_una;
+ tmp.end = ack;
+ vec_add1 (tc->rcv_opts.sacks, tmp);
+ }
+
+ if (vec_len (tc->rcv_opts.sacks) == 0)
+ return;
+
+ tcp_scoreboard_trace_add (tc, ack);
+
+ /* Make sure blocks are ordered */
+ for (i = 0; i < vec_len (tc->rcv_opts.sacks); i++)
+ for (j = i + 1; j < vec_len (tc->rcv_opts.sacks); j++)
+ if (seq_lt (tc->rcv_opts.sacks[j].start, tc->rcv_opts.sacks[i].start))
+ {
+ tmp = tc->rcv_opts.sacks[i];
+ tc->rcv_opts.sacks[i] = tc->rcv_opts.sacks[j];
+ tc->rcv_opts.sacks[j] = tmp;
+ }
+
+ if (sb->head == TCP_INVALID_SACK_HOLE_INDEX)
+ {
+ /* If no holes, insert the first that covers all outstanding bytes */
+ last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX,
+ tc->snd_una, tc->snd_una_max);
+ sb->tail = scoreboard_hole_index (sb, last_hole);
+ tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
+ sb->high_sacked = tmp.end;
+ }
+ else
+ {
+ /* If we have holes but snd_una_max is beyond the last hole, update
+ * last hole end */
+ tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
+ last_hole = scoreboard_last_hole (sb);
+ if (seq_gt (tc->snd_una_max, last_hole->end))
+ {
+ if (seq_geq (last_hole->start, sb->high_sacked))
+ {
+ last_hole->end = tc->snd_una_max;
+ }
+ /* New hole after high sacked block */
+ else if (seq_lt (sb->high_sacked, tc->snd_una_max))
+ {
+ scoreboard_insert_hole (sb, sb->tail, sb->high_sacked,
+ tc->snd_una_max);
+ }
+ }
+ /* Keep track of max byte sacked for when the last hole
+ * is acked */
+ if (seq_gt (tmp.end, sb->high_sacked))
+ sb->high_sacked = tmp.end;
+ }
+
+ /* Walk the holes with the SACK blocks */
+ hole = pool_elt_at_index (sb->holes, sb->head);
+ while (hole && blk_index < vec_len (tc->rcv_opts.sacks))
+ {
+ blk = &tc->rcv_opts.sacks[blk_index];
+ if (seq_leq (blk->start, hole->start))
+ {
+ /* Block covers hole. Remove hole */
+ if (seq_geq (blk->end, hole->end))
+ {
+ next_hole = scoreboard_next_hole (sb, hole);
+
+ /* Byte accounting: snd_una needs to be advanced */
+ if (blk->end == ack)
+ {
+ if (next_hole)
+ {
+ if (seq_lt (ack, next_hole->start))
+ sb->snd_una_adv = next_hole->start - ack;
+ sb->last_bytes_delivered +=
+ next_hole->start - hole->end;
+ }
+ else
+ {
+ ASSERT (seq_geq (sb->high_sacked, ack));
+ sb->snd_una_adv = sb->high_sacked - ack;
+ sb->last_bytes_delivered += sb->high_sacked - hole->end;
+ }
+ }
+
+ scoreboard_remove_hole (sb, hole);
+ hole = next_hole;
+ }
+ /* Partial 'head' overlap */
+ else
+ {
+ if (seq_gt (blk->end, hole->start))
+ {
+ hole->start = blk->end;
+ }
+ blk_index++;
+ }
+ }
+ else
+ {
+ /* Hole must be split */
+ if (seq_lt (blk->end, hole->end))
+ {
+ hole_index = scoreboard_hole_index (sb, hole);
+ next_hole = scoreboard_insert_hole (sb, hole_index, blk->end,
+ hole->end);
+
+ /* Pool might've moved */
+ hole = scoreboard_get_hole (sb, hole_index);
+ hole->end = blk->start;
+ blk_index++;
+ ASSERT (hole->next == scoreboard_hole_index (sb, next_hole));
+ }
+ else if (seq_lt (blk->start, hole->end))
+ {
+ hole->end = blk->start;
+ }
+ hole = scoreboard_next_hole (sb, hole);
+ }
+ }
+
+ scoreboard_update_bytes (tc, sb);
+ sb->last_sacked_bytes = sb->sacked_bytes
+ - (old_sacked_bytes - sb->last_bytes_delivered);
+ ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes);
+ ASSERT (sb->sacked_bytes == 0
+ || sb->sacked_bytes < tc->snd_una_max - seq_max (tc->snd_una, ack));
+ ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_una_max
+ - seq_max (tc->snd_una, ack));
+ ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc)
+ || sb->holes[sb->head].start == ack + sb->snd_una_adv);
+}
+
+/**
+ * Try to update snd_wnd based on feedback received from peer.
+ *
+ * If successful, and new window is 'effectively' 0, activate persist
+ * timer.
+ */
+static void
+tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
+{
+ /* If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
+ * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
+ if (seq_lt (tc->snd_wl1, seq)
+ || (tc->snd_wl1 == seq && seq_leq (tc->snd_wl2, ack)))
+ {
+ tc->snd_wnd = snd_wnd;
+ tc->snd_wl1 = seq;
+ tc->snd_wl2 = ack;
+ TCP_EVT_DBG (TCP_EVT_SND_WND, tc);
+
+ if (tc->snd_wnd < tc->snd_mss)
+ {
+ /* Set persist timer if not set and we just got 0 wnd */
+ if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST)
+ && !tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
+ tcp_persist_timer_set (tc);
+ }
+ else
+ {
+ tcp_persist_timer_reset (tc);
+ if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
+ {
+ tc->rto_boff = 0;
+ tcp_update_rto (tc);
+ }
+ }
+ }
+}
+
+void
+tcp_cc_init_congestion (tcp_connection_t * tc)
+{
+ tcp_fastrecovery_on (tc);
+ tc->snd_congestion = tc->snd_una_max;
+ tc->cc_algo->congestion (tc);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 4);
+}
+
+static void
+tcp_cc_recovery_exit (tcp_connection_t * tc)
+{
+ /* Deflate rto */
+ tc->rto_boff = 0;
+ tcp_update_rto (tc);
+ tc->snd_rxt_ts = 0;
+ tc->snd_nxt = tc->snd_una_max;
+ tcp_recovery_off (tc);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
+}
+
+void
+tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
+{
+ tc->cc_algo->recovered (tc);
+ tc->snd_rxt_bytes = 0;
+ tc->rcv_dupacks = 0;
+ tc->snd_nxt = tc->snd_una_max;
+ tcp_fastrecovery_off (tc);
+ tcp_fastrecovery_1_smss_off (tc);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
+}
+
+static void
+tcp_cc_congestion_undo (tcp_connection_t * tc)
+{
+ tc->cwnd = tc->prev_cwnd;
+ tc->ssthresh = tc->prev_ssthresh;
+ tc->snd_nxt = tc->snd_una_max;
+ tc->rcv_dupacks = 0;
+ if (tcp_in_recovery (tc))
+ tcp_cc_recovery_exit (tc);
+ ASSERT (tc->rto_boff == 0);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
+ /* TODO extend for fastrecovery */
+}
+
+static u8
+tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
+{
+ return (tcp_in_recovery (tc) && tc->rto_boff == 1
+ && tc->snd_rxt_ts
+ && tcp_opts_tstamp (&tc->rcv_opts)
+ && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
+}
+
+int
+tcp_cc_recover (tcp_connection_t * tc)
+{
+ ASSERT (tcp_in_cong_recovery (tc));
+ if (tcp_cc_is_spurious_retransmit (tc))
+ {
+ tcp_cc_congestion_undo (tc);
+ return 1;
+ }
+
+ if (tcp_in_recovery (tc))
+ tcp_cc_recovery_exit (tc);
+ else if (tcp_in_fastrecovery (tc))
+ tcp_cc_fastrecovery_exit (tc);
+
+ ASSERT (tc->rto_boff == 0);
+ ASSERT (!tcp_in_cong_recovery (tc));
+ ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
+ return 0;
+}
+
+static void
+tcp_cc_update (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ ASSERT (!tcp_in_cong_recovery (tc) || tcp_is_lost_fin (tc));
+
+ /* Congestion avoidance */
+ tc->cc_algo->rcv_ack (tc);
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+
+ /* If a cumulative ack, make sure dupacks is 0 */
+ tc->rcv_dupacks = 0;
+
+ /* When dupacks hits the threshold we only enter fast retransmit if
+ * cumulative ack covers more than snd_congestion. Should snd_una
+ * wrap this test may fail under otherwise valid circumstances.
+ * Therefore, proactively update snd_congestion when wrap detected. */
+ if (PREDICT_FALSE
+ (seq_leq (tc->snd_congestion, tc->snd_una - tc->bytes_acked)
+ && seq_gt (tc->snd_congestion, tc->snd_una)))
+ tc->snd_congestion = tc->snd_una - 1;
+}
+
+static u8
+tcp_should_fastrecover_sack (tcp_connection_t * tc)
+{
+ return (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss < tc->sack_sb.sacked_bytes;
+}
+
+static u8
+tcp_should_fastrecover (tcp_connection_t * tc)
+{
+ return (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD
+ || tcp_should_fastrecover_sack (tc));
+}
+
+/**
+ * One function to rule them all ... and in the darkness bind them
+ */
+static void
+tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
+{
+ u32 rxt_delivered;
+
+ /*
+ * Duplicate ACK. Check if we should enter fast recovery, or if already in
+ * it account for the bytes that left the network.
+ */
+ if (is_dack)
+ {
+ ASSERT (tc->snd_una != tc->snd_una_max
+ || tc->sack_sb.last_sacked_bytes);
+
+ tc->rcv_dupacks++;
+
+ if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD && !tc->bytes_acked)
+ {
+ ASSERT (tcp_in_fastrecovery (tc));
+ /* Pure duplicate ack. If some data got acked, it's handled lower */
+ tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+ return;
+ }
+ else if (tcp_should_fastrecover (tc))
+ {
+ /* Things are already bad */
+ if (tcp_in_cong_recovery (tc))
+ {
+ tc->rcv_dupacks = 0;
+ goto partial_ack_test;
+ }
+
+ /* If of of the two conditions lower hold, reset dupacks because
+ * we're probably after timeout (RFC6582 heuristics).
+ * If Cumulative ack does not cover more than congestion threshold,
+ * and:
+ * 1) The following doesn't hold: The congestion window is greater
+ * than SMSS bytes and the difference between highest_ack
+ * and prev_highest_ack is at most 4*SMSS bytes
+ * 2) Echoed timestamp in the last non-dup ack does not equal the
+ * stored timestamp
+ */
+ if (seq_leq (tc->snd_una, tc->snd_congestion)
+ && ((!(tc->cwnd > tc->snd_mss
+ && tc->bytes_acked <= 4 * tc->snd_mss))
+ || (tc->rcv_opts.tsecr != tc->tsecr_last_ack)))
+ {
+ tc->rcv_dupacks = 0;
+ return;
+ }
+
+ tcp_cc_init_congestion (tc);
+ tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+
+ /* The first segment MUST be retransmitted */
+ tcp_retransmit_first_unacked (tc);
+
+ /* Post retransmit update cwnd to ssthresh and account for the
+ * three segments that have left the network and should've been
+ * buffered at the receiver XXX */
+ tc->cwnd = tc->ssthresh + tc->rcv_dupacks * tc->snd_mss;
+ ASSERT (tc->cwnd >= tc->snd_mss);
+
+ /* If cwnd allows, send more data */
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ scoreboard_init_high_rxt (&tc->sack_sb,
+ tc->snd_una + tc->snd_mss);
+ tcp_fast_retransmit_sack (tc);
+ }
+ else
+ {
+ tcp_fast_retransmit_no_sack (tc);
+ }
+
+ return;
+ }
+ else if (!tc->bytes_acked
+ || (tc->bytes_acked && !tcp_in_cong_recovery (tc)))
+ {
+ tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+ return;
+ }
+ else
+ goto partial_ack;
+ }
+
+partial_ack_test:
+
+ if (!tc->bytes_acked)
+ return;
+
+partial_ack:
+ /*
+ * Legitimate ACK. 1) See if we can exit recovery
+ */
+ /* XXX limit this only to first partial ack? */
+ tcp_retransmit_timer_update (tc);
+
+ if (seq_geq (tc->snd_una, tc->snd_congestion))
+ {
+ /* If spurious return, we've already updated everything */
+ if (tcp_cc_recover (tc))
+ {
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ return;
+ }
+
+ tc->snd_nxt = tc->snd_una_max;
+
+ /* Treat as congestion avoidance ack */
+ tc->cc_algo->rcv_ack (tc);
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ return;
+ }
+
+ /*
+ * Legitimate ACK. 2) If PARTIAL ACK try to retransmit
+ */
+ TCP_EVT_DBG (TCP_EVT_CC_PACK, tc);
+
+ /* RFC6675: If the incoming ACK is a cumulative acknowledgment,
+ * reset dupacks to 0 */
+ tc->rcv_dupacks = 0;
+
+ tcp_retransmit_first_unacked (tc);
+
+ /* Post RTO timeout don't try anything fancy */
+ if (tcp_in_recovery (tc))
+ return;
+
+ /* Remove retransmitted bytes that have been delivered */
+ ASSERT (tc->bytes_acked + tc->sack_sb.snd_una_adv
+ >= tc->sack_sb.last_bytes_delivered
+ || (tc->flags & TCP_CONN_FINSNT));
+
+ if (seq_lt (tc->snd_una, tc->sack_sb.high_rxt))
+ {
+ /* If we have sacks and we haven't gotten an ack beyond high_rxt,
+ * remove sacked bytes delivered */
+ rxt_delivered = tc->bytes_acked + tc->sack_sb.snd_una_adv
+ - tc->sack_sb.last_bytes_delivered;
+ ASSERT (tc->snd_rxt_bytes >= rxt_delivered);
+ tc->snd_rxt_bytes -= rxt_delivered;
+ }
+ else
+ {
+ /* Either all retransmitted holes have been acked, or we're
+ * "in the blind" and retransmitting segment by segment */
+ tc->snd_rxt_bytes = 0;
+ }
+
+ tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK);
+
+ /*
+ * Since this was a partial ack, try to retransmit some more data
+ */
+ tcp_fast_retransmit (tc);
+}
+
+void
+tcp_cc_init (tcp_connection_t * tc)
+{
+ tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO);
+ tc->cc_algo->init (tc);
+}
+
+/**
+ * Process incoming ACK
+ */
+static int
+tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
+ tcp_header_t * th, u32 * next, u32 * error)
+{
+ u32 prev_snd_wnd, prev_snd_una;
+ u8 is_dack;
+
+ TCP_EVT_DBG (TCP_EVT_CC_STAT, tc);
+
+ /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
+ if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
+ {
+ /* If we have outstanding data and this is within the window, accept it,
+ * probably retransmit has timed out. Otherwise ACK segment and then
+ * drop it */
+ if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max))
+ {
+ tcp_make_ack (tc, b);
+ *next = tcp_next_output (tc->c_is_ip4);
+ *error = TCP_ERROR_ACK_INVALID;
+ TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 0,
+ vnet_buffer (b)->tcp.ack_number);
+ return -1;
+ }
+
+ TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 2,
+ vnet_buffer (b)->tcp.ack_number);
+
+ tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
+ *error = TCP_ERROR_ACK_FUTURE;
+ }
+
+ /* If old ACK, probably it's an old dupack */
+ if (PREDICT_FALSE (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)))
+ {
+ *error = TCP_ERROR_ACK_OLD;
+ TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 1,
+ vnet_buffer (b)->tcp.ack_number);
+ if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
+ {
+ TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc);
+ tcp_cc_handle_event (tc, 1);
+ }
+ /* Don't drop yet */
+ return 0;
+ }
+
+ /*
+ * Looks okay, process feedback
+ */
+
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
+
+ prev_snd_wnd = tc->snd_wnd;
+ prev_snd_una = tc->snd_una;
+ tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number,
+ vnet_buffer (b)->tcp.ack_number,
+ clib_net_to_host_u16 (th->window) << tc->snd_wscale);
+ tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
+ tc->snd_una = vnet_buffer (b)->tcp.ack_number + tc->sack_sb.snd_una_adv;
+ tcp_validate_txf_size (tc, tc->bytes_acked);
+
+ if (tc->bytes_acked)
+ tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number);
+
+ TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
+
+ /*
+ * Check if we have congestion event
+ */
+
+ if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack))
+ {
+ tcp_cc_handle_event (tc, is_dack);
+ if (!tcp_in_cong_recovery (tc))
+ return 0;
+ *error = TCP_ERROR_ACK_DUP;
+ TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1);
+ return vnet_buffer (b)->tcp.data_len ? 0 : -1;
+ }
+
+ /*
+ * Update congestion control (slow start/congestion avoidance)
+ */
+ tcp_cc_update (tc, b);
+
+ return 0;
+}
+
+static u8
+tcp_sack_vector_is_sane (sack_block_t * sacks)
+{
+ int i;
+ for (i = 1; i < vec_len (sacks); i++)
+ {
+ if (sacks[i - 1].end == sacks[i].start)
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Build SACK list as per RFC2018.
+ *
+ * Makes sure the first block contains the segment that generated the current
+ * ACK and the following ones are the ones most recently reported in SACK
+ * blocks.
+ *
+ * @param tc TCP connection for which the SACK list is updated
+ * @param start Start sequence number of the newest SACK block
+ * @param end End sequence of the newest SACK block
+ */
+void
+tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end)
+{
+ sack_block_t *new_list = 0, *block = 0;
+ int i;
+
+ /* If the first segment is ooo add it to the list. Last write might've moved
+ * rcv_nxt over the first segment. */
+ if (seq_lt (tc->rcv_nxt, start))
+ {
+ vec_add2 (new_list, block, 1);
+ block->start = start;
+ block->end = end;
+ }
+
+ /* Find the blocks still worth keeping. */
+ for (i = 0; i < vec_len (tc->snd_sacks); i++)
+ {
+ /* Discard if rcv_nxt advanced beyond current block */
+ if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt))
+ continue;
+
+ /* Merge or drop if segment overlapped by the new segment */
+ if (block && (seq_geq (tc->snd_sacks[i].end, new_list[0].start)
+ && seq_leq (tc->snd_sacks[i].start, new_list[0].end)))
+ {
+ if (seq_lt (tc->snd_sacks[i].start, new_list[0].start))
+ new_list[0].start = tc->snd_sacks[i].start;
+ if (seq_lt (new_list[0].end, tc->snd_sacks[i].end))
+ new_list[0].end = tc->snd_sacks[i].end;
+ continue;
+ }
+
+ /* Save to new SACK list if we have space. */
+ if (vec_len (new_list) < TCP_MAX_SACK_BLOCKS)
+ {
+ vec_add1 (new_list, tc->snd_sacks[i]);
+ }
+ else
+ {
+ clib_warning ("sack discarded");
+ }
+ }
+
+ ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS);
+
+ /* Replace old vector with new one */
+ vec_free (tc->snd_sacks);
+ tc->snd_sacks = new_list;
+
+ /* Segments should not 'touch' */
+ ASSERT (tcp_sack_vector_is_sane (tc->snd_sacks));
+}
+
+/** Enqueue data for delivery to application */
+always_inline int
+tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
+ u16 data_len)
+{
+ int written, error = TCP_ERROR_ENQUEUED;
+
+ ASSERT (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
+
+ /* Pure ACK. Update rcv_nxt and be done. */
+ if (PREDICT_FALSE (data_len == 0))
+ {
+ return TCP_ERROR_PURE_ACK;
+ }
+
+ written = stream_session_enqueue_data (&tc->connection, b, 0,
+ 1 /* queue event */ , 1);
+
+ TCP_EVT_DBG (TCP_EVT_INPUT, tc, 0, data_len, written);
+
+ /* Update rcv_nxt */
+ if (PREDICT_TRUE (written == data_len))
+ {
+ tc->rcv_nxt += written;
+ }
+ /* If more data written than expected, account for out-of-order bytes. */
+ else if (written > data_len)
+ {
+ tc->rcv_nxt += written;
+
+ /* Send ACK confirming the update */
+ tc->flags |= TCP_CONN_SNDACK;
+ }
+ else if (written > 0)
+ {
+ /* We've written something but FIFO is probably full now */
+ tc->rcv_nxt += written;
+
+ /* Depending on how fast the app is, all remaining buffers in burst will
+ * not be enqueued. Inform peer */
+ tc->flags |= TCP_CONN_SNDACK;
+
+ error = TCP_ERROR_PARTIALLY_ENQUEUED;
+ }
+ else
+ {
+ tc->flags |= TCP_CONN_SNDACK;
+ return TCP_ERROR_FIFO_FULL;
+ }
+
+ /* Update SACK list if need be */
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ /* Remove SACK blocks that have been delivered */
+ tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
+ }
+
+ return error;
+}
+
+/** Enqueue out-of-order data */
+always_inline int
+tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
+ u16 data_len)
+{
+ stream_session_t *s0;
+ int rv, offset;
+
+ ASSERT (seq_gt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
+
+ /* Pure ACK. Do nothing */
+ if (PREDICT_FALSE (data_len == 0))
+ {
+ return TCP_ERROR_PURE_ACK;
+ }
+
+ /* Enqueue out-of-order data with relative offset */
+ rv = stream_session_enqueue_data (&tc->connection, b,
+ vnet_buffer (b)->tcp.seq_number -
+ tc->rcv_nxt, 0 /* queue event */ , 0);
+
+ /* Nothing written */
+ if (rv)
+ {
+ TCP_EVT_DBG (TCP_EVT_INPUT, tc, 1, data_len, 0);
+ return TCP_ERROR_FIFO_FULL;
+ }
+
+ TCP_EVT_DBG (TCP_EVT_INPUT, tc, 1, data_len, data_len);
+
+ /* Update SACK list if in use */
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ ooo_segment_t *newest;
+ u32 start, end;
+
+ s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
+
+ /* Get the newest segment from the fifo */
+ newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
+ if (newest)
+ {
+ offset = ooo_segment_offset (s0->server_rx_fifo, newest);
+ ASSERT (offset <= vnet_buffer (b)->tcp.seq_number - tc->rcv_nxt);
+ start = tc->rcv_nxt + offset;
+ end = start + ooo_segment_length (s0->server_rx_fifo, newest);
+ tcp_update_sack_list (tc, start, end);
+ svm_fifo_newest_ooo_segment_reset (s0->server_rx_fifo);
+ }
+ }
+
+ return TCP_ERROR_ENQUEUED;
+}
+
+/**
+ * Check if ACK could be delayed. If ack can be delayed, it should return
+ * true for a full frame. If we're always acking return 0.
+ */
+always_inline int
+tcp_can_delack (tcp_connection_t * tc)
+{
+ /* Send ack if ... */
+ if (TCP_ALWAYS_ACK
+ /* just sent a rcv wnd 0 */
+ || (tc->flags & TCP_CONN_SENT_RCV_WND0) != 0
+ /* constrained to send ack */
+ || (tc->flags & TCP_CONN_SNDACK) != 0
+ /* we're almost out of tx wnd */
+ || tcp_available_snd_space (tc) < 4 * tc->snd_mss)
+ return 0;
+
+ return 1;
+}
+
+static int
+tcp_buffer_discard_bytes (vlib_buffer_t * b, u32 n_bytes_to_drop)
+{
+ u32 discard, first = b->current_length;
+ vlib_main_t *vm = vlib_get_main ();
+
+ /* Handle multi-buffer segments */
+ if (n_bytes_to_drop > b->current_length)
+ {
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ return -1;
+ do
+ {
+ discard = clib_min (n_bytes_to_drop, b->current_length);
+ vlib_buffer_advance (b, discard);
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n_bytes_to_drop -= discard;
+ }
+ while (n_bytes_to_drop);
+ if (n_bytes_to_drop > first)
+ b->total_length_not_including_first_buffer -= n_bytes_to_drop - first;
+ }
+ else
+ vlib_buffer_advance (b, n_bytes_to_drop);
+ vnet_buffer (b)->tcp.data_len -= n_bytes_to_drop;
+ return 0;
+}
+
+static int
+tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
+ u32 * next0)
+{
+ u32 error = 0, n_bytes_to_drop, n_data_bytes;
+
+ vlib_buffer_advance (b, vnet_buffer (b)->tcp.data_offset);
+ n_data_bytes = vnet_buffer (b)->tcp.data_len;
+ ASSERT (n_data_bytes);
+
+ /* Handle out-of-order data */
+ if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt))
+ {
+ /* Old sequence numbers allowed through because they overlapped
+ * the rx window */
+ if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
+ {
+ error = TCP_ERROR_SEGMENT_OLD;
+ *next0 = TCP_NEXT_DROP;
+
+ /* Completely in the past (possible retransmit) */
+ if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
+ {
+ /* Ack retransmissions since we may not have any data to send */
+ tcp_make_ack (tc, b);
+ *next0 = tcp_next_output (tc->c_is_ip4);
+ goto done;
+ }
+
+ /* Chop off the bytes in the past */
+ n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
+ n_data_bytes -= n_bytes_to_drop;
+ vnet_buffer (b)->tcp.seq_number = tc->rcv_nxt;
+ if (tcp_buffer_discard_bytes (b, n_bytes_to_drop))
+ goto done;
+
+ goto in_order;
+ }
+
+ error = tcp_session_enqueue_ooo (tc, b, n_data_bytes);
+
+ /* N.B. Should not filter burst of dupacks. Two issues 1) dupacks open
+ * cwnd on remote peer when congested 2) acks leaving should have the
+ * latest rcv_wnd since the burst may eaten up all of it, so only the
+ * old ones could be filtered.
+ */
+
+ /* RFC2581: Send DUPACK for fast retransmit */
+ tcp_make_ack (tc, b);
+ *next0 = tcp_next_output (tc->c_is_ip4);
+
+ /* Mark as DUPACK. We may filter these in output if
+ * the burst fills the holes. */
+ if (n_data_bytes)
+ vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK;
+
+ TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc);
+ goto done;
+ }
+
+in_order:
+
+ /* In order data, enqueue. Fifo figures out by itself if any out-of-order
+ * segments can be enqueued after fifo tail offset changes. */
+ error = tcp_session_enqueue_data (tc, b, n_data_bytes);
+
+ /* Check if ACK can be delayed */
+ if (tcp_can_delack (tc))
+ {
+ if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK))
+ tcp_timer_set (tc, TCP_TIMER_DELACK, TCP_DELACK_TIME);
+ goto done;
+ }
+
+ *next0 = tcp_next_output (tc->c_is_ip4);
+ tcp_make_ack (tc, b);
+
+done:
+ return error;
+}
+
+typedef struct
+{
+ tcp_header_t tcp_header;
+ tcp_connection_t tcp_connection;
+} tcp_rx_trace_t;
+
+u8 *
+format_tcp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U",
+ format_tcp_header, &t->tcp_header, 128,
+ format_white_space, indent,
+ format_tcp_connection, &t->tcp_connection, 1);
+
+ return s;
+}
+
+u8 *
+format_tcp_rx_trace_short (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+
+ s = format (s, "%d -> %d (%U)",
+ clib_net_to_host_u16 (t->tcp_header.src_port),
+ clib_net_to_host_u16 (t->tcp_header.dst_port), format_tcp_state,
+ t->tcp_connection.state);
+
+ return s;
+}
+
+void
+tcp_set_rx_trace_data (tcp_rx_trace_t * t0, tcp_connection_t * tc0,
+ tcp_header_t * th0, vlib_buffer_t * b0, u8 is_ip4)
+{
+ if (tc0)
+ {
+ clib_memcpy (&t0->tcp_connection, tc0, sizeof (t0->tcp_connection));
+ }
+ else
+ {
+ th0 = tcp_buffer_hdr (b0);
+ }
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+}
+
+always_inline void
+tcp_established_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
+{
+ if (PREDICT_TRUE (!val))
+ return;
+
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_established_node.index, evt, val);
+ else
+ vlib_node_increment_counter (vm, tcp6_established_node.index, evt, val);
+}
+
+always_inline uword
+tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index, errors = 0;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u8 is_fin = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_header_t *th0 = 0;
+ tcp_connection_t *tc0;
+ u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+ my_thread_index);
+
+ if (PREDICT_FALSE (tc0 == 0))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ goto done;
+ }
+
+ th0 = tcp_buffer_hdr (b0);
+ /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
+ * dangling reference. */
+ is_fin = tcp_is_fin (th0);
+
+ /* SYNs, FINs and data consume sequence numbers */
+ vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
+ + tcp_is_syn (th0) + is_fin + vnet_buffer (b0)->tcp.data_len;
+
+ /* TODO header prediction fast path */
+
+ /* 1-4: check SEQ, RST, SYN */
+ if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, th0, &next0)))
+ {
+ error0 = TCP_ERROR_SEGMENT_INVALID;
+ TCP_EVT_DBG (TCP_EVT_SEG_INVALID, tc0,
+ vnet_buffer (b0)->tcp.seq_number,
+ vnet_buffer (b0)->tcp.seq_end);
+ goto done;
+ }
+
+ /* 5: check the ACK field */
+ if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0))
+ goto done;
+
+ /* 6: check the URG bit TODO */
+
+ /* 7: process the segment text */
+ if (vnet_buffer (b0)->tcp.data_len)
+ error0 = tcp_segment_rcv (tm, tc0, b0, &next0);
+
+ /* 8: check the FIN bit */
+ if (PREDICT_FALSE (is_fin))
+ {
+ /* Enter CLOSE-WAIT and notify session. To avoid lingering
+ * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
+ /* Account for the FIN if nothing else was received */
+ if (vnet_buffer (b0)->tcp.data_len == 0)
+ tc0->rcv_nxt += 1;
+ tcp_make_ack (tc0, b0);
+ next0 = tcp_next_output (tc0->c_is_ip4);
+ tc0->state = TCP_STATE_CLOSE_WAIT;
+ stream_session_disconnect_notify (&tc0->connection);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
+ }
+
+ done:
+ b0->error = node->errors[error0];
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, th0, b0, is_ip4);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ errors = session_manager_flush_enqueue_events (my_thread_index);
+ tcp_established_inc_counter (vm, is_ip4, TCP_ERROR_EVENT_FIFO_FULL, errors);
+ tcp_flush_frame_to_output (vm, my_thread_index, is_ip4);
+
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_established (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_established (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_established_node) =
+{
+ .function = tcp4_established,
+ .name = "tcp4-established",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_established_node, tcp4_established);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_established_node) =
+{
+ .function = tcp6_established,
+ .name = "tcp6-established",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established);
+
+vlib_node_registration_t tcp4_syn_sent_node;
+vlib_node_registration_t tcp6_syn_sent_node;
+
+static u8
+tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr)
+{
+ transport_connection_t *tmp;
+ if (!tc)
+ return 1;
+
+ u8 is_valid = (tc->c_lcl_port == hdr->dst_port
+ && (tc->state == TCP_STATE_LISTEN
+ || tc->c_rmt_port == hdr->src_port));
+
+ if (!is_valid)
+ {
+ if ((tmp =
+ stream_session_half_open_lookup (&tc->c_lcl_ip, &tc->c_rmt_ip,
+ tc->c_lcl_port, tc->c_rmt_port,
+ tc->c_transport_proto)))
+ {
+ if (tmp->lcl_port == hdr->dst_port
+ && tmp->rmt_port == hdr->src_port)
+ {
+ clib_warning ("half-open is valid!");
+ }
+ }
+ }
+ return is_valid;
+}
+
+/**
+ * Lookup transport connection
+ */
+static tcp_connection_t *
+tcp_lookup_connection (vlib_buffer_t * b, u8 thread_index, u8 is_ip4)
+{
+ tcp_header_t *tcp;
+ transport_connection_t *tconn;
+ tcp_connection_t *tc;
+ if (is_ip4)
+ {
+ ip4_header_t *ip4;
+ ip4 = vlib_buffer_get_current (b);
+ tcp = ip4_next_header (ip4);
+ tconn = stream_session_lookup_transport_wt4 (&ip4->dst_address,
+ &ip4->src_address,
+ tcp->dst_port,
+ tcp->src_port,
+ SESSION_TYPE_IP4_TCP,
+ thread_index);
+ tc = tcp_get_connection_from_transport (tconn);
+ ASSERT (tcp_lookup_is_valid (tc, tcp));
+ }
+ else
+ {
+ ip6_header_t *ip6;
+ ip6 = vlib_buffer_get_current (b);
+ tcp = ip6_next_header (ip6);
+ tconn = stream_session_lookup_transport_wt6 (&ip6->dst_address,
+ &ip6->src_address,
+ tcp->dst_port,
+ tcp->src_port,
+ SESSION_TYPE_IP6_TCP,
+ thread_index);
+ tc = tcp_get_connection_from_transport (tconn);
+ ASSERT (tcp_lookup_is_valid (tc, tcp));
+ }
+ return tc;
+}
+
+always_inline uword
+tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index, errors = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, ack0, seq0;
+ vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
+ tcp_header_t *tcp0 = 0;
+ tcp_connection_t *tc0;
+ tcp_connection_t *new_tc0;
+ u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ tc0 =
+ tcp_half_open_connection_get (vnet_buffer (b0)->
+ tcp.connection_index);
+ if (PREDICT_FALSE (tc0 == 0))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ goto drop;
+ }
+
+ /* Half-open completed recently but the connection was't removed
+ * yet by the owning thread */
+ if (PREDICT_FALSE (tc0->flags & TCP_CONN_HALF_OPEN_DONE))
+ {
+ /* Make sure the connection actually exists */
+ ASSERT (tcp_lookup_connection (b0, my_thread_index, is_ip4));
+ goto drop;
+ }
+
+ ack0 = vnet_buffer (b0)->tcp.ack_number;
+ seq0 = vnet_buffer (b0)->tcp.seq_number;
+ tcp0 = tcp_buffer_hdr (b0);
+
+ /* Crude check to see if the connection handle does not match
+ * the packet. Probably connection just switched to established */
+ if (PREDICT_FALSE (tcp0->dst_port != tc0->c_lcl_port
+ || tcp0->src_port != tc0->c_rmt_port))
+ goto drop;
+
+ if (PREDICT_FALSE
+ (!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
+ goto drop;
+
+ /* SYNs, FINs and data consume sequence numbers */
+ vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0)
+ + tcp_is_fin (tcp0) + vnet_buffer (b0)->tcp.data_len;
+
+ /*
+ * 1. check the ACK bit
+ */
+
+ /*
+ * If the ACK bit is set
+ * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless
+ * the RST bit is set, if so drop the segment and return)
+ * <SEQ=SEG.ACK><CTL=RST>
+ * and discard the segment. Return.
+ * If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable.
+ */
+ if (tcp_ack (tcp0))
+ {
+ if (seq_leq (ack0, tc0->iss) || seq_gt (ack0, tc0->snd_nxt))
+ {
+ clib_warning ("ack not in rcv wnd");
+ if (!tcp_rst (tcp0))
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
+ goto drop;
+ }
+
+ /* Make sure ACK is valid */
+ if (seq_gt (tc0->snd_una, ack0))
+ {
+ clib_warning ("ack invalid");
+ goto drop;
+ }
+ }
+
+ /*
+ * 2. check the RST bit
+ */
+
+ if (tcp_rst (tcp0))
+ {
+ /* If ACK is acceptable, signal client that peer is not
+ * willing to accept connection and drop connection*/
+ if (tcp_ack (tcp0))
+ tcp_connection_reset (tc0);
+ goto drop;
+ }
+
+ /*
+ * 3. check the security and precedence (skipped)
+ */
+
+ /*
+ * 4. check the SYN bit
+ */
+
+ /* No SYN flag. Drop. */
+ if (!tcp_syn (tcp0))
+ {
+ clib_warning ("not synack");
+ goto drop;
+ }
+
+ /* Parse options */
+ if (tcp_options_parse (tcp0, &tc0->rcv_opts))
+ {
+ clib_warning ("options parse fail");
+ goto drop;
+ }
+
+ /* Valid SYN or SYN-ACK. Move connection from half-open pool to
+ * current thread pool. */
+ pool_get (tm->connections[my_thread_index], new_tc0);
+ clib_memcpy (new_tc0, tc0, sizeof (*new_tc0));
+ new_tc0->c_c_index = new_tc0 - tm->connections[my_thread_index];
+ new_tc0->c_thread_index = my_thread_index;
+ new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end;
+ new_tc0->irs = seq0;
+ new_tc0->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID;
+ new_tc0->timers[TCP_TIMER_RETRANSMIT_SYN] =
+ TCP_TIMER_HANDLE_INVALID;
+
+ /* If this is not the owning thread, wait for syn retransmit to
+ * expire and cleanup then */
+ if (tcp_half_open_connection_cleanup (tc0))
+ tc0->flags |= TCP_CONN_HALF_OPEN_DONE;
+
+ if (tcp_opts_tstamp (&new_tc0->rcv_opts))
+ {
+ new_tc0->tsval_recent = new_tc0->rcv_opts.tsval;
+ new_tc0->tsval_recent_age = tcp_time_now ();
+ }
+
+ if (tcp_opts_wscale (&new_tc0->rcv_opts))
+ new_tc0->snd_wscale = new_tc0->rcv_opts.wscale;
+
+ /* RFC1323: SYN and SYN-ACK wnd not scaled */
+ new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window);
+ new_tc0->snd_wl1 = seq0;
+ new_tc0->snd_wl2 = ack0;
+
+ tcp_connection_init_vars (new_tc0);
+
+ /* SYN-ACK: See if we can switch to ESTABLISHED state */
+ if (PREDICT_TRUE (tcp_ack (tcp0)))
+ {
+ /* Our SYN is ACKed: we have iss < ack = snd_una */
+
+ /* TODO Dequeue acknowledged segments if we support Fast Open */
+ new_tc0->snd_una = ack0;
+ new_tc0->state = TCP_STATE_ESTABLISHED;
+
+ /* Make sure las is initialized for the wnd computation */
+ new_tc0->rcv_las = new_tc0->rcv_nxt;
+
+ /* Notify app that we have connection. If session layer can't
+ * allocate session send reset */
+ if (stream_session_connect_notify (&new_tc0->connection, 0))
+ {
+ clib_warning ("connect notify fail");
+ tcp_send_reset_w_pkt (new_tc0, b0, is_ip4);
+ tcp_connection_cleanup (new_tc0);
+ goto drop;
+ }
+
+ /* Make sure after data segment processing ACK is sent */
+ new_tc0->flags |= TCP_CONN_SNDACK;
+
+ /* Update rtt with the syn-ack sample */
+ tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
+ }
+ /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */
+ else
+ {
+ new_tc0->state = TCP_STATE_SYN_RCVD;
+
+ /* Notify app that we have connection */
+ if (stream_session_connect_notify (&new_tc0->connection, 0))
+ {
+ tcp_connection_cleanup (new_tc0);
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc0);
+ goto drop;
+ }
+
+ tc0->rtt_ts = 0;
+ tcp_init_snd_vars (tc0);
+ tcp_make_synack (new_tc0, b0);
+ next0 = tcp_next_output (is_ip4);
+
+ goto drop;
+ }
+
+ /* Read data, if any */
+ if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len))
+ {
+ ASSERT (0);
+ error0 = tcp_segment_rcv (tm, new_tc0, b0, &next0);
+ if (error0 == TCP_ERROR_PURE_ACK)
+ error0 = TCP_ERROR_SYN_ACKS_RCVD;
+ }
+ else
+ {
+ tcp_make_ack (new_tc0, b0);
+ next0 = tcp_next_output (new_tc0->c_is_ip4);
+ }
+
+ drop:
+
+ b0->error = error0 ? node->errors[error0] : 0;
+ if (PREDICT_FALSE
+ ((b0->flags & VLIB_BUFFER_IS_TRACED) && tcp0 != 0))
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ errors = session_manager_flush_enqueue_events (my_thread_index);
+ if (errors)
+ {
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_established_node.index,
+ TCP_ERROR_EVENT_FIFO_FULL, errors);
+ else
+ vlib_node_increment_counter (vm, tcp6_established_node.index,
+ TCP_ERROR_EVENT_FIFO_FULL, errors);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_syn_sent (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_syn_sent_rcv (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
+{
+ .function = tcp4_syn_sent,
+ .name = "tcp4-syn-sent",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_SYN_SENT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_syn_sent_node, tcp4_syn_sent);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
+{
+ .function = tcp6_syn_sent_rcv,
+ .name = "tcp6-syn-sent",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_SYN_SENT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
+
+/**
+ * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
+ * as per RFC793 p. 64
+ */
+always_inline uword
+tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index, errors = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_header_t *tcp0 = 0;
+ tcp_connection_t *tc0;
+ u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+ u8 is_fin0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+ my_thread_index);
+ if (PREDICT_FALSE (tc0 == 0))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ goto drop;
+ }
+
+ tcp0 = tcp_buffer_hdr (b0);
+ is_fin0 = tcp_is_fin (tcp0);
+
+ /* SYNs, FINs and data consume sequence numbers */
+ vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
+ + tcp_is_syn (tcp0) + is_fin0 + vnet_buffer (b0)->tcp.data_len;
+
+ if (CLIB_DEBUG)
+ {
+ tcp_connection_t *tmp;
+ tmp = tcp_lookup_connection (b0, my_thread_index, is_ip4);
+ if (tmp->state != tc0->state)
+ {
+ clib_warning ("state changed");
+ ASSERT (0);
+ goto drop;
+ }
+ }
+
+ /*
+ * Special treatment for CLOSED
+ */
+ switch (tc0->state)
+ {
+ case TCP_STATE_CLOSED:
+ goto drop;
+ break;
+ }
+
+ /*
+ * For all other states (except LISTEN)
+ */
+
+ /* 1-4: check SEQ, RST, SYN */
+ if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, tcp0,
+ &next0)))
+ {
+ error0 = TCP_ERROR_SEGMENT_INVALID;
+ goto drop;
+ }
+
+ /* 5: check the ACK field */
+ switch (tc0->state)
+ {
+ case TCP_STATE_SYN_RCVD:
+ /*
+ * If the segment acknowledgment is not acceptable, form a
+ * reset segment,
+ * <SEQ=SEG.ACK><CTL=RST>
+ * and send it.
+ */
+ if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+ {
+ clib_warning ("connection not accepted");
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
+ goto drop;
+ }
+
+ /* Update rtt and rto */
+ tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number);
+
+ /* Switch state to ESTABLISHED */
+ tc0->state = TCP_STATE_ESTABLISHED;
+
+ /* Initialize session variables */
+ tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
+ tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
+ << tc0->rcv_opts.wscale;
+ tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
+ tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
+ stream_session_accept_notify (&tc0->connection);
+
+ /* Reset SYN-ACK retransmit and SYN_RCV establish timers */
+ tcp_retransmit_timer_reset (tc0);
+ tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH);
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ break;
+ case TCP_STATE_ESTABLISHED:
+ /* We can get packets in established state here because they
+ * were enqueued before state change */
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+
+ break;
+ case TCP_STATE_FIN_WAIT_1:
+ /* In addition to the processing for the ESTABLISHED state, if
+ * our FIN is now acknowledged then enter FIN-WAIT-2 and
+ * continue processing in that state. */
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+
+ /* Still have to send the FIN */
+ if (tc0->flags & TCP_CONN_FINPNDG)
+ {
+ /* TX fifo finally drained */
+ if (!stream_session_tx_fifo_max_dequeue (&tc0->connection))
+ tcp_send_fin (tc0);
+ }
+ /* If FIN is ACKed */
+ else if (tc0->snd_una == tc0->snd_una_max)
+ {
+ tc0->state = TCP_STATE_FIN_WAIT_2;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+
+ /* Stop all retransmit timers because we have nothing more
+ * to send. Enable waitclose though because we're willing to
+ * wait for peer's FIN but not indefinitely. */
+ tcp_connection_timers_reset (tc0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ }
+ break;
+ case TCP_STATE_FIN_WAIT_2:
+ /* In addition to the processing for the ESTABLISHED state, if
+ * the retransmission queue is empty, the user's CLOSE can be
+ * acknowledged ("ok") but do not delete the TCB. */
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ /* Do the same processing as for the ESTABLISHED state. */
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+ break;
+ case TCP_STATE_CLOSING:
+ /* In addition to the processing for the ESTABLISHED state, if
+ * the ACK acknowledges our FIN then enter the TIME-WAIT state,
+ * otherwise ignore the segment. */
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+
+ tc0->state = TCP_STATE_TIME_WAIT;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ goto drop;
+
+ break;
+ case TCP_STATE_LAST_ACK:
+ /* The only thing that [should] arrive in this state is an
+ * acknowledgment of our FIN. If our FIN is now acknowledged,
+ * delete the TCB, enter the CLOSED state, and return. */
+
+ if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+ goto drop;
+
+ tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
+ /* Apparently our FIN was lost */
+ if (is_fin0)
+ {
+ tcp_send_fin (tc0);
+ goto drop;
+ }
+
+ tc0->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+
+ /* Don't delete the connection/session yet. Instead, wait a
+ * reasonable amount of time until the pipes are cleared. In
+ * particular, this makes sure that we won't have dead sessions
+ * when processing events on the tx path */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_retransmit_timer_reset (tc0);
+
+ goto drop;
+
+ break;
+ case TCP_STATE_TIME_WAIT:
+ /* The only thing that can arrive in this state is a
+ * retransmission of the remote FIN. Acknowledge it, and restart
+ * the 2 MSL timeout. */
+
+ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+ goto drop;
+
+ tcp_make_ack (tc0, b0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+
+ goto drop;
+
+ break;
+ default:
+ ASSERT (0);
+ }
+
+ /* 6: check the URG bit TODO */
+
+ /* 7: process the segment text */
+ switch (tc0->state)
+ {
+ case TCP_STATE_ESTABLISHED:
+ case TCP_STATE_FIN_WAIT_1:
+ case TCP_STATE_FIN_WAIT_2:
+ if (vnet_buffer (b0)->tcp.data_len)
+ error0 = tcp_segment_rcv (tm, tc0, b0, &next0);
+ else if (is_fin0)
+ tc0->rcv_nxt += 1;
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ case TCP_STATE_CLOSING:
+ case TCP_STATE_LAST_ACK:
+ case TCP_STATE_TIME_WAIT:
+ /* This should not occur, since a FIN has been received from the
+ * remote side. Ignore the segment text. */
+ break;
+ }
+
+ /* 8: check the FIN bit */
+ if (!is_fin0)
+ goto drop;
+
+ switch (tc0->state)
+ {
+ case TCP_STATE_ESTABLISHED:
+ case TCP_STATE_SYN_RCVD:
+ /* Send FIN-ACK notify app and enter CLOSE-WAIT */
+ tcp_connection_timers_reset (tc0);
+ tcp_make_fin (tc0, b0);
+ tc0->snd_nxt += 1;
+ next0 = tcp_next_output (tc0->c_is_ip4);
+ stream_session_disconnect_notify (&tc0->connection);
+ tc0->state = TCP_STATE_CLOSE_WAIT;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ case TCP_STATE_CLOSING:
+ case TCP_STATE_LAST_ACK:
+ /* move along .. */
+ break;
+ case TCP_STATE_FIN_WAIT_1:
+ tc0->state = TCP_STATE_CLOSING;
+ tcp_make_ack (tc0, b0);
+ next0 = tcp_next_output (is_ip4);
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ /* Wait for ACK but not forever */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ break;
+ case TCP_STATE_FIN_WAIT_2:
+ /* Got FIN, send ACK! Be more aggressive with resource cleanup */
+ tc0->state = TCP_STATE_TIME_WAIT;
+ tcp_connection_timers_reset (tc0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ tcp_make_ack (tc0, b0);
+ next0 = tcp_next_output (is_ip4);
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ break;
+ case TCP_STATE_TIME_WAIT:
+ /* Remain in the TIME-WAIT state. Restart the time-wait
+ * timeout.
+ */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ break;
+ }
+ TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
+
+ drop:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, tcp0, b0, is_ip4);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ errors = session_manager_flush_enqueue_events (my_thread_index);
+ if (errors)
+ {
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_established_node.index,
+ TCP_ERROR_EVENT_FIFO_FULL, errors);
+ else
+ vlib_node_increment_counter (vm, tcp6_established_node.index,
+ TCP_ERROR_EVENT_FIFO_FULL, errors);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_rcv_process_node) =
+{
+ .function = tcp4_rcv_process,
+ .name = "tcp4-rcv-process",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_rcv_process_node, tcp4_rcv_process);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_rcv_process_node) =
+{
+ .function = tcp6_rcv_process,
+ .name = "tcp6-rcv-process",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_rcv_process_node, tcp6_rcv_process);
+
+vlib_node_registration_t tcp4_listen_node;
+vlib_node_registration_t tcp6_listen_node;
+
+/**
+ * LISTEN state processing as per RFC 793 p. 65
+ */
+always_inline uword
+tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index;
+ u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
+ tcp_header_t *th0 = 0;
+ tcp_connection_t *lc0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ tcp_connection_t *child0;
+ u32 error0 = TCP_ERROR_SYNS_RCVD, next0 = TCP_LISTEN_NEXT_DROP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index);
+
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ th0 = ip4_next_header (ip40);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ th0 = ip6_next_header (ip60);
+ }
+
+ /* Create child session. For syn-flood protection use filter */
+
+ /* 1. first check for an RST: handled in dispatch */
+ /* if (tcp_rst (th0))
+ goto drop; */
+
+ /* 2. second check for an ACK: handled in dispatch */
+ /* if (tcp_ack (th0))
+ {
+ tcp_send_reset (b0, is_ip4);
+ goto drop;
+ } */
+
+ /* 3. check for a SYN (did that already) */
+
+ /* Make sure connection wasn't just created */
+ child0 = tcp_lookup_connection (b0, my_thread_index, is_ip4);
+ if (PREDICT_FALSE (child0->state != TCP_STATE_LISTEN))
+ {
+ error0 = TCP_ERROR_CREATE_EXISTS;
+ goto drop;
+ }
+
+ /* Create child session and send SYN-ACK */
+ child0 = tcp_connection_new (my_thread_index);
+ child0->c_lcl_port = lc0->c_lcl_port;
+ child0->c_rmt_port = th0->src_port;
+ child0->c_is_ip4 = is_ip4;
+ child0->state = TCP_STATE_SYN_RCVD;
+
+ if (is_ip4)
+ {
+ child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32;
+ child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32;
+ }
+ else
+ {
+ clib_memcpy (&child0->c_lcl_ip6, &ip60->dst_address,
+ sizeof (ip6_address_t));
+ clib_memcpy (&child0->c_rmt_ip6, &ip60->src_address,
+ sizeof (ip6_address_t));
+ }
+
+ if (stream_session_accept (&child0->connection, lc0->c_s_index, sst,
+ 0 /* notify */ ))
+ {
+ clib_warning ("session accept fail");
+ tcp_connection_cleanup (child0);
+ error0 = TCP_ERROR_CREATE_SESSION_FAIL;
+ goto drop;
+ }
+
+ if (tcp_options_parse (th0, &child0->rcv_opts))
+ {
+ clib_warning ("options parse fail");
+ goto drop;
+ }
+
+ child0->irs = vnet_buffer (b0)->tcp.seq_number;
+ child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
+ child0->rcv_las = child0->rcv_nxt;
+
+ /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
+ * segments are used to initialize PAWS. */
+ if (tcp_opts_tstamp (&child0->rcv_opts))
+ {
+ child0->tsval_recent = child0->rcv_opts.tsval;
+ child0->tsval_recent_age = tcp_time_now ();
+ }
+
+ if (tcp_opts_wscale (&child0->rcv_opts))
+ child0->snd_wscale = child0->rcv_opts.wscale;
+
+ child0->snd_wnd = clib_net_to_host_u16 (th0->window)
+ << child0->snd_wscale;
+ child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
+ child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
+
+ tcp_connection_init_vars (child0);
+ TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0, 1);
+
+ /* Reuse buffer to make syn-ack and send */
+ tcp_make_synack (child0, b0);
+ next0 = tcp_next_output (is_ip4);
+ tcp_timer_set (child0, TCP_TIMER_ESTABLISH, TCP_SYN_RCVD_TIME);
+
+ drop:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, lc0,
+ sizeof (t0->tcp_connection));
+ }
+
+ b0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_listen (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_listen (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_listen_node) =
+{
+ .function = tcp4_listen,
+ .name = "tcp4-listen",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_LISTEN_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_listen_node, tcp4_listen);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_listen_node) =
+{
+ .function = tcp6_listen,
+ .name = "tcp6-listen",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_LISTEN_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
+ foreach_tcp_state_next
+#undef _
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_listen_node, tcp6_listen);
+
+vlib_node_registration_t tcp4_input_node;
+vlib_node_registration_t tcp6_input_node;
+
+typedef enum _tcp_input_next
+{
+ TCP_INPUT_NEXT_DROP,
+ TCP_INPUT_NEXT_LISTEN,
+ TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_INPUT_NEXT_SYN_SENT,
+ TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_INPUT_NEXT_RESET,
+ TCP_INPUT_NEXT_PUNT,
+ TCP_INPUT_N_NEXT
+} tcp_input_next_t;
+
+#define foreach_tcp4_input_next \
+ _ (DROP, "error-drop") \
+ _ (LISTEN, "tcp4-listen") \
+ _ (RCV_PROCESS, "tcp4-rcv-process") \
+ _ (SYN_SENT, "tcp4-syn-sent") \
+ _ (ESTABLISHED, "tcp4-established") \
+ _ (RESET, "tcp4-reset") \
+ _ (PUNT, "error-punt")
+
+#define foreach_tcp6_input_next \
+ _ (DROP, "error-drop") \
+ _ (LISTEN, "tcp6-listen") \
+ _ (RCV_PROCESS, "tcp6-rcv-process") \
+ _ (SYN_SENT, "tcp6-syn-sent") \
+ _ (ESTABLISHED, "tcp6-established") \
+ _ (RESET, "tcp6-reset") \
+ _ (PUNT, "error-punt")
+
+#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+
+always_inline uword
+tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ tcp_set_time_now (my_thread_index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ int n_advance_bytes0, n_data_bytes0;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_header_t *tcp0 = 0;
+ tcp_connection_t *tc0;
+ transport_connection_t *tconn;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP;
+ u8 flags0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->tcp.flags = 0;
+
+ /* Checksum computed by ipx_local no need to compute again */
+
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ tcp0 = ip4_next_header (ip40);
+ n_advance_bytes0 = (ip4_header_bytes (ip40)
+ + tcp_header_bytes (tcp0));
+ n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
+ - n_advance_bytes0;
+ tconn = stream_session_lookup_transport_wt4 (&ip40->dst_address,
+ &ip40->src_address,
+ tcp0->dst_port,
+ tcp0->src_port,
+ SESSION_TYPE_IP4_TCP,
+ my_thread_index);
+ tc0 = tcp_get_connection_from_transport (tconn);
+ ASSERT (tcp_lookup_is_valid (tc0, tcp0));
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ tcp0 = ip6_next_header (ip60);
+ n_advance_bytes0 = tcp_header_bytes (tcp0);
+ n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
+ - n_advance_bytes0;
+ n_advance_bytes0 += sizeof (ip60[0]);
+ tconn = stream_session_lookup_transport_wt6 (&ip60->dst_address,
+ &ip60->src_address,
+ tcp0->dst_port,
+ tcp0->src_port,
+ SESSION_TYPE_IP6_TCP,
+ my_thread_index);
+ tc0 = tcp_get_connection_from_transport (tconn);
+ ASSERT (tcp_lookup_is_valid (tc0, tcp0));
+ }
+
+ /* Length check */
+ if (PREDICT_FALSE (n_advance_bytes0 < 0))
+ {
+ error0 = TCP_ERROR_LENGTH;
+ goto done;
+ }
+
+ /* Session exists */
+ if (PREDICT_TRUE (0 != tc0))
+ {
+ /* Save connection index */
+ vnet_buffer (b0)->tcp.connection_index = tc0->c_c_index;
+ vnet_buffer (b0)->tcp.seq_number =
+ clib_net_to_host_u32 (tcp0->seq_number);
+ vnet_buffer (b0)->tcp.ack_number =
+ clib_net_to_host_u32 (tcp0->ack_number);
+
+ vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
+ - (u8 *) vlib_buffer_get_current (b0);
+ vnet_buffer (b0)->tcp.data_offset = n_advance_bytes0;
+ vnet_buffer (b0)->tcp.data_len = n_data_bytes0;
+
+ flags0 = tcp0->flags & filter_flags;
+ next0 = tm->dispatch_table[tc0->state][flags0].next;
+ error0 = tm->dispatch_table[tc0->state][flags0].error;
+
+ if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH
+ || next0 == TCP_INPUT_NEXT_RESET))
+ {
+ /* Overload tcp flags to store state */
+ tcp_state_t state0 = tc0->state;
+ vnet_buffer (b0)->tcp.flags = tc0->state;
+
+ if (error0 == TCP_ERROR_DISPATCH)
+ clib_warning ("disp error state %U flags %U",
+ format_tcp_state, state0, format_tcp_flags,
+ (int) flags0);
+ }
+ }
+ else
+ {
+ if ((is_ip4 && tm->punt_unknown4) ||
+ (!is_ip4 && tm->punt_unknown6))
+ {
+ next0 = TCP_INPUT_NEXT_PUNT;
+ error0 = TCP_ERROR_PUNT;
+ }
+ else
+ {
+ /* Send reset */
+ next0 = TCP_INPUT_NEXT_RESET;
+ error0 = TCP_ERROR_NO_LISTENER;
+ }
+ }
+
+ done:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, tcp0, b0, is_ip4);
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_input_node) =
+{
+ .function = tcp4_input,
+ .name = "tcp4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_INPUT_NEXT_##s] = n,
+ foreach_tcp4_input_next
+#undef _
+ },
+ .format_buffer = format_tcp_header,
+ .format_trace = format_tcp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_input_node, tcp4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_input_node) =
+{
+ .function = tcp6_input,
+ .name = "tcp6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_INPUT_N_NEXT,
+ .next_nodes =
+ {
+#define _(s,n) [TCP_INPUT_NEXT_##s] = n,
+ foreach_tcp6_input_next
+#undef _
+ },
+ .format_buffer = format_tcp_header,
+ .format_trace = format_tcp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input);
+
+static void
+tcp_dispatch_table_init (tcp_main_t * tm)
+{
+ int i, j;
+ for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++)
+ for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++)
+ {
+ tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP;
+ tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH;
+ }
+
+#define _(t,f,n,e) \
+do { \
+ tm->dispatch_table[TCP_STATE_##t][f].next = (n); \
+ tm->dispatch_table[TCP_STATE_##t][f].error = (e); \
+} while (0)
+
+ /* SYNs for new connections -> tcp-listen. */
+ _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
+ _(LISTEN, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_NONE);
+ _(LISTEN, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_NONE);
+ _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
+ TCP_ERROR_NONE);
+ /* ACK for for a SYN-ACK -> tcp-rcv-process. */
+ _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(SYN_RCVD, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(SYN_RCVD, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ /* SYN-ACK for a SYN */
+ _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
+ TCP_ERROR_NONE);
+ _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
+ _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
+ _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
+ TCP_ERROR_NONE);
+ /* ACK for for established connection -> tcp-established. */
+ _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+ /* FIN for for established connection -> tcp-established. */
+ _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_SYN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+ _(ESTABLISHED, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_ERROR_NONE);
+ /* ACK or FIN-ACK to our FIN */
+ _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
+ /* FIN in reply to our FIN from the other side */
+ _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(FIN_WAIT_1, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ /* FIN confirming that the peer (app) has closed */
+ _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(FIN_WAIT_2, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
+ _(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
+ _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
+ _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
+ _(TIME_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(TIME_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
+ _(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
+ _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
+ TCP_ERROR_CONNECTION_CLOSED);
+#undef _
+}
+
+clib_error_t *
+tcp_input_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+
+ if ((error = vlib_call_init_function (vm, tcp_init)))
+ return error;
+
+ /* Initialize dispatch table. */
+ tcp_dispatch_table_init (tm);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (tcp_input_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
new file mode 100644
index 00000000..103fea4c
--- /dev/null
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/tcp/tcp.h>
+
+void
+newreno_congestion (tcp_connection_t * tc)
+{
+ tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
+}
+
+void
+newreno_recovered (tcp_connection_t * tc)
+{
+ tc->cwnd = tc->ssthresh;
+}
+
+void
+newreno_rcv_ack (tcp_connection_t * tc)
+{
+ if (tcp_in_slowstart (tc))
+ {
+ tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked);
+ }
+ else
+ {
+ /* Round up to 1 if needed */
+ tc->cwnd += clib_max ((tc->snd_mss * tc->snd_mss) / tc->cwnd, 1);
+ }
+}
+
+void
+newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type)
+{
+ if (ack_type == TCP_CC_DUPACK)
+ {
+ if (!tcp_opts_sack_permitted (tc))
+ tc->cwnd += tc->snd_mss;
+ }
+ else if (ack_type == TCP_CC_PARTIALACK)
+ {
+ /* RFC 6582 Sec. 3.2 */
+ if (!tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ /* Deflate the congestion window by the amount of new data
+ * acknowledged by the Cumulative Acknowledgment field.
+ * If the partial ACK acknowledges at least one SMSS of new data,
+ * then add back SMSS bytes to the congestion window. This
+ * artificially inflates the congestion window in order to reflect
+ * the additional segment that has left the network. This "partial
+ * window deflation" attempts to ensure that, when fast recovery
+ * eventually ends, approximately ssthresh amount of data will be
+ * outstanding in the network.*/
+ tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
+ tc->cwnd - tc->bytes_acked : tc->snd_mss;
+ if (tc->bytes_acked > tc->snd_mss)
+ tc->cwnd += tc->snd_mss;
+ }
+ }
+}
+
+void
+newreno_conn_init (tcp_connection_t * tc)
+{
+ tc->ssthresh = tc->snd_wnd;
+ tc->cwnd = tcp_initial_cwnd (tc);
+}
+
+const static tcp_cc_algorithm_t tcp_newreno = {
+ .congestion = newreno_congestion,
+ .recovered = newreno_recovered,
+ .rcv_ack = newreno_rcv_ack,
+ .rcv_cong_ack = newreno_rcv_cong_ack,
+ .init = newreno_conn_init
+};
+
+clib_error_t *
+newreno_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+
+ tcp_cc_algo_register (TCP_CC_NEWRENO, &tcp_newreno);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (newreno_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
new file mode 100644
index 00000000..a954bfa7
--- /dev/null
+++ b/src/vnet/tcp/tcp_output.c
@@ -0,0 +1,2113 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/tcp/tcp.h>
+#include <vnet/lisp-cp/packets.h>
+#include <math.h>
+
+vlib_node_registration_t tcp4_output_node;
+vlib_node_registration_t tcp6_output_node;
+
+typedef enum _tcp_output_next
+{
+ TCP_OUTPUT_NEXT_DROP,
+ TCP_OUTPUT_NEXT_IP_LOOKUP,
+ TCP_OUTPUT_N_NEXT
+} tcp_output_next_t;
+
+#define foreach_tcp4_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_tcp6_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
+
+static char *tcp_error_strings[] = {
+#define tcp_error(n,s) s,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+};
+
+typedef struct
+{
+ tcp_header_t tcp_header;
+ tcp_connection_t tcp_connection;
+} tcp_tx_trace_t;
+
+u16 dummy_mtu = 1460;
+
+u8 *
+format_tcp_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U",
+ format_tcp_header, &t->tcp_header, 128,
+ format_white_space, indent,
+ format_tcp_connection, &t->tcp_connection, 1);
+
+ return s;
+}
+
+static u8
+tcp_window_compute_scale (u32 window)
+{
+ u8 wnd_scale = 0;
+ while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
+ wnd_scale++;
+ return wnd_scale;
+}
+
+/**
+ * Update max segment size we're able to process.
+ *
+ * The value is constrained by our interface's MTU and IP options. It is
+ * also what we advertise to our peer.
+ */
+void
+tcp_update_rcv_mss (tcp_connection_t * tc)
+{
+ /* TODO find our iface MTU */
+ tc->mss = dummy_mtu - sizeof (tcp_header_t);
+}
+
+/**
+ * TCP's initial window
+ */
+always_inline u32
+tcp_initial_wnd_unscaled (tcp_connection_t * tc)
+{
+ /* RFC 6928 recommends the value lower. However at the time our connections
+ * are initialized, fifos may not be allocated. Therefore, advertise the
+ * smallest possible unscaled window size and update once fifos are
+ * assigned to the session.
+ */
+ /*
+ tcp_update_rcv_mss (tc);
+ TCP_IW_N_SEGMENTS * tc->mss;
+ */
+ return TCP_MIN_RX_FIFO_SIZE;
+}
+
+/**
+ * Compute initial window and scale factor. As per RFC1323, window field in
+ * SYN and SYN-ACK segments is never scaled.
+ */
+u32
+tcp_initial_window_to_advertise (tcp_connection_t * tc)
+{
+ u32 max_fifo;
+
+ /* Initial wnd for SYN. Fifos are not allocated yet.
+ * Use some predefined value. For SYN-ACK we still want the
+ * scale to be computed in the same way */
+ max_fifo = TCP_MAX_RX_FIFO_SIZE;
+
+ tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
+ tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
+
+ return clib_min (tc->rcv_wnd, TCP_WND_MAX);
+}
+
+/**
+ * Compute and return window to advertise, scaled as per RFC1323
+ */
+u32
+tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
+{
+ if (state < TCP_STATE_ESTABLISHED)
+ return tcp_initial_window_to_advertise (tc);
+
+ tcp_update_rcv_wnd (tc);
+
+ if (tc->rcv_wnd == 0)
+ {
+ tc->flags |= TCP_CONN_SENT_RCV_WND0;
+ }
+ else
+ {
+ tc->flags &= ~TCP_CONN_SENT_RCV_WND0;
+ }
+
+ return tc->rcv_wnd >> tc->rcv_wscale;
+}
+
+void
+tcp_update_rcv_wnd (tcp_connection_t * tc)
+{
+ i32 observed_wnd;
+ u32 available_space, max_fifo, wnd;
+
+ /*
+ * Figure out how much space we have available
+ */
+ available_space = stream_session_max_rx_enqueue (&tc->connection);
+ max_fifo = stream_session_rx_fifo_size (&tc->connection);
+
+ ASSERT (tc->rcv_opts.mss < max_fifo);
+ if (available_space < tc->rcv_opts.mss && available_space < max_fifo >> 3)
+ available_space = 0;
+
+ /*
+ * Use the above and what we know about what we've previously advertised
+ * to compute the new window
+ */
+ observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+ if (observed_wnd < 0)
+ observed_wnd = 0;
+
+ /* Bad. Thou shalt not shrink */
+ if (available_space < observed_wnd)
+ {
+ wnd = observed_wnd;
+ TCP_EVT_DBG (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
+ }
+ else
+ {
+ wnd = available_space;
+ }
+
+ /* Make sure we have a multiple of rcv_wscale */
+ if (wnd && tc->rcv_wscale)
+ {
+ wnd &= ~(1 << tc->rcv_wscale);
+ if (wnd == 0)
+ wnd = 1 << tc->rcv_wscale;
+ }
+
+ tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
+}
+
+/**
+ * Write TCP options to segment.
+ */
+u32
+tcp_options_write (u8 * data, tcp_options_t * opts)
+{
+ u32 opts_len = 0;
+ u32 buf, seq_len = 4;
+
+ if (tcp_opts_mss (opts))
+ {
+ *data++ = TCP_OPTION_MSS;
+ *data++ = TCP_OPTION_LEN_MSS;
+ buf = clib_host_to_net_u16 (opts->mss);
+ clib_memcpy (data, &buf, sizeof (opts->mss));
+ data += sizeof (opts->mss);
+ opts_len += TCP_OPTION_LEN_MSS;
+ }
+
+ if (tcp_opts_wscale (opts))
+ {
+ *data++ = TCP_OPTION_WINDOW_SCALE;
+ *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
+ *data++ = opts->wscale;
+ opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
+ }
+
+ if (tcp_opts_sack_permitted (opts))
+ {
+ *data++ = TCP_OPTION_SACK_PERMITTED;
+ *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
+ opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
+ }
+
+ if (tcp_opts_tstamp (opts))
+ {
+ *data++ = TCP_OPTION_TIMESTAMP;
+ *data++ = TCP_OPTION_LEN_TIMESTAMP;
+ buf = clib_host_to_net_u32 (opts->tsval);
+ clib_memcpy (data, &buf, sizeof (opts->tsval));
+ data += sizeof (opts->tsval);
+ buf = clib_host_to_net_u32 (opts->tsecr);
+ clib_memcpy (data, &buf, sizeof (opts->tsecr));
+ data += sizeof (opts->tsecr);
+ opts_len += TCP_OPTION_LEN_TIMESTAMP;
+ }
+
+ if (tcp_opts_sack (opts))
+ {
+ int i;
+ u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
+ TCP_OPTS_MAX_SACK_BLOCKS);
+
+ if (n_sack_blocks != 0)
+ {
+ *data++ = TCP_OPTION_SACK_BLOCK;
+ *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ for (i = 0; i < n_sack_blocks; i++)
+ {
+ buf = clib_host_to_net_u32 (opts->sacks[i].start);
+ clib_memcpy (data, &buf, seq_len);
+ data += seq_len;
+ buf = clib_host_to_net_u32 (opts->sacks[i].end);
+ clib_memcpy (data, &buf, seq_len);
+ data += seq_len;
+ }
+ opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ }
+ }
+
+ /* Terminate TCP options */
+ if (opts_len % 4)
+ {
+ *data++ = TCP_OPTION_EOL;
+ opts_len += TCP_OPTION_LEN_EOL;
+ }
+
+ /* Pad with zeroes to a u32 boundary */
+ while (opts_len % 4)
+ {
+ *data++ = TCP_OPTION_NOOP;
+ opts_len += TCP_OPTION_LEN_NOOP;
+ }
+ return opts_len;
+}
+
+always_inline int
+tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
+{
+ u8 len = 0;
+
+ opts->flags |= TCP_OPTS_FLAG_MSS;
+ opts->mss = dummy_mtu; /*XXX discover that */
+ len += TCP_OPTION_LEN_MSS;
+
+ opts->flags |= TCP_OPTS_FLAG_WSCALE;
+ opts->wscale = wnd_scale;
+ len += TCP_OPTION_LEN_WINDOW_SCALE;
+
+ opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+ opts->tsval = tcp_time_now ();
+ opts->tsecr = 0;
+ len += TCP_OPTION_LEN_TIMESTAMP;
+
+ if (TCP_USE_SACKS)
+ {
+ opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+ len += TCP_OPTION_LEN_SACK_PERMITTED;
+ }
+
+ /* Align to needed boundary */
+ len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ return len;
+}
+
+always_inline int
+tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
+{
+ u8 len = 0;
+
+ opts->flags |= TCP_OPTS_FLAG_MSS;
+ opts->mss = tc->mss;
+ len += TCP_OPTION_LEN_MSS;
+
+ if (tcp_opts_wscale (&tc->rcv_opts))
+ {
+ opts->flags |= TCP_OPTS_FLAG_WSCALE;
+ opts->wscale = tc->rcv_wscale;
+ len += TCP_OPTION_LEN_WINDOW_SCALE;
+ }
+
+ if (tcp_opts_tstamp (&tc->rcv_opts))
+ {
+ opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+ opts->tsval = tcp_time_now ();
+ opts->tsecr = tc->tsval_recent;
+ len += TCP_OPTION_LEN_TIMESTAMP;
+ }
+
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+ len += TCP_OPTION_LEN_SACK_PERMITTED;
+ }
+
+ /* Align to needed boundary */
+ len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ return len;
+}
+
+always_inline int
+tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
+{
+ u8 len = 0;
+
+ opts->flags = 0;
+
+ if (tcp_opts_tstamp (&tc->rcv_opts))
+ {
+ opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+ opts->tsval = tcp_time_now ();
+ opts->tsecr = tc->tsval_recent;
+ len += TCP_OPTION_LEN_TIMESTAMP;
+ }
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ {
+ if (vec_len (tc->snd_sacks))
+ {
+ opts->flags |= TCP_OPTS_FLAG_SACK;
+ opts->sacks = tc->snd_sacks;
+ opts->n_sack_blocks = clib_min (vec_len (tc->snd_sacks),
+ TCP_OPTS_MAX_SACK_BLOCKS);
+ len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
+ }
+ }
+
+ /* Align to needed boundary */
+ len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ return len;
+}
+
+always_inline int
+tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
+ tcp_state_t state)
+{
+ switch (state)
+ {
+ case TCP_STATE_ESTABLISHED:
+ case TCP_STATE_FIN_WAIT_1:
+ return tcp_make_established_options (tc, opts);
+ case TCP_STATE_SYN_RCVD:
+ return tcp_make_synack_options (tc, opts);
+ case TCP_STATE_SYN_SENT:
+ return tcp_make_syn_options (opts, tc->rcv_wscale);
+ default:
+ clib_warning ("Not handled!");
+ return 0;
+ }
+}
+
+/**
+ * Update snd_mss to reflect the effective segment size that we can send
+ * by taking into account all TCP options, including SACKs
+ */
+void
+tcp_update_snd_mss (tcp_connection_t * tc)
+{
+ /* Compute options to be used for connection. These may be reused when
+ * sending data or to compute the effective mss (snd_mss) */
+ tc->snd_opts_len =
+ tcp_make_options (tc, &tc->snd_opts, TCP_STATE_ESTABLISHED);
+
+ /* XXX check if MTU has been updated */
+ tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
+ ASSERT (tc->snd_mss > 0);
+}
+
+void
+tcp_init_mss (tcp_connection_t * tc)
+{
+ u16 default_min_mss = 536;
+ tcp_update_rcv_mss (tc);
+
+ /* TODO cache mss and consider PMTU discovery */
+ tc->snd_mss = clib_min (tc->rcv_opts.mss, tc->mss);
+
+ if (tc->snd_mss < 45)
+ {
+ clib_warning ("snd mss is 0");
+ /* Assume that at least the min default mss works */
+ tc->snd_mss = default_min_mss;
+ tc->rcv_opts.mss = default_min_mss;
+ }
+
+ /* We should have enough space for 40 bytes of options */
+ ASSERT (tc->snd_mss > 45);
+
+ /* If we use timestamp option, account for it */
+ if (tcp_opts_tstamp (&tc->rcv_opts))
+ tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
+}
+
+always_inline int
+tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u32 n_free_buffers)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 current_length = vec_len (tm->tx_buffers[thread_index]);
+ u32 n_allocated;
+
+ vec_validate (tm->tx_buffers[thread_index],
+ current_length + n_free_buffers - 1);
+ n_allocated =
+ vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length],
+ n_free_buffers);
+ _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated;
+ /* buffer shortage, report failure */
+ if (vec_len (tm->tx_buffers[thread_index]) == 0)
+ {
+ clib_warning ("out of buffers");
+ return -1;
+ }
+ return 0;
+}
+
+always_inline int
+tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx)
+{
+ u32 *my_tx_buffers;
+ u32 thread_index = vlib_get_thread_index ();
+ if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
+ {
+ if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
+ return -1;
+ }
+ my_tx_buffers = tm->tx_buffers[thread_index];
+ *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1];
+ _vec_len (my_tx_buffers) -= 1;
+ return 0;
+}
+
+always_inline void
+tcp_return_buffer (tcp_main_t * tm)
+{
+ _vec_len (tm->tx_buffers[vlib_get_thread_index ()]) += 1;
+}
+
+always_inline void *
+tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ vlib_buffer_free_one (vm, b->next_buffer);
+ /* Zero all flags but free list index and trace flag */
+ b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
+ b->current_data = 0;
+ b->current_length = 0;
+ b->total_length_not_including_first_buffer = 0;
+ vnet_buffer (b)->tcp.flags = 0;
+
+ /* Leave enough space for headers */
+ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+}
+
+always_inline void *
+tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+ b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->total_length_not_including_first_buffer = 0;
+ vnet_buffer (b)->tcp.flags = 0;
+
+ /* Leave enough space for headers */
+ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+}
+
+/**
+ * Prepare ACK
+ */
+void
+tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
+ u8 flags)
+{
+ tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+ u8 tcp_opts_len, tcp_hdr_opts_len;
+ tcp_header_t *th;
+ u16 wnd;
+
+ wnd = tcp_window_to_advertise (tc, state);
+
+ /* Make and write options */
+ tcp_opts_len = tcp_make_established_options (tc, snd_opts);
+ tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
+ tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
+
+ tcp_options_write ((u8 *) (th + 1), snd_opts);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+}
+
+/**
+ * Convert buffer to ACK
+ */
+void
+tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ tcp_reuse_buffer (vm, b);
+ tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
+ TCP_EVT_DBG (TCP_EVT_ACK_SENT, tc);
+ vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
+ tc->rcv_las = tc->rcv_nxt;
+}
+
+/**
+ * Convert buffer to FIN-ACK
+ */
+void
+tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u8 flags = 0;
+
+ tcp_reuse_buffer (vm, b);
+
+ flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
+ tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags);
+
+ /* Reset flags, make sure ack is sent */
+ vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
+}
+
+/**
+ * Convert buffer to SYN
+ */
+void
+tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ u8 tcp_hdr_opts_len, tcp_opts_len;
+ tcp_header_t *th;
+ u16 initial_wnd;
+ tcp_options_t snd_opts;
+
+ initial_wnd = tcp_initial_window_to_advertise (tc);
+
+ /* Make and write options */
+ memset (&snd_opts, 0, sizeof (snd_opts));
+ tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
+ tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
+ tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
+ initial_wnd);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ tcp_options_write ((u8 *) (th + 1), &snd_opts);
+
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+ tc->rto * TCP_TO_TIMER_TICK);
+}
+
+/**
+ * Convert buffer to SYN-ACK
+ */
+void
+tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+ u8 tcp_opts_len, tcp_hdr_opts_len;
+ tcp_header_t *th;
+ u16 initial_wnd;
+
+ memset (snd_opts, 0, sizeof (*snd_opts));
+ tcp_reuse_buffer (vm, b);
+
+ initial_wnd = tcp_initial_window_to_advertise (tc);
+ tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
+ tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
+ tc->rcv_nxt, tcp_hdr_opts_len,
+ TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
+ tcp_options_write ((u8 *) (th + 1), snd_opts);
+
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
+
+ /* Init retransmit timer. Use update instead of set because of
+ * retransmissions */
+ tcp_retransmit_timer_force_update (tc);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_SENT, tc);
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4, u8 flush)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ u32 *to_next, next_index;
+ vlib_frame_t *f;
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->error = 0;
+
+ /* Default FIB for now */
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+
+ /* Send to IP lookup */
+ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+ if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+ {
+ b->pre_data[0] = 2;
+ b->pre_data[1] = next_index;
+ }
+
+ f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, next_index);
+ ASSERT (f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f;
+ }
+
+ to_next = vlib_frame_vector_args (f);
+ to_next[f->n_vectors] = bi;
+ f->n_vectors += 1;
+ if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, next_index, f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1);
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
+}
+
+always_inline void
+tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4, u8 flush)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ u32 *to_next, next_index;
+ vlib_frame_t *f;
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->error = 0;
+
+ /* Decide where to send the packet */
+ next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+ if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+ {
+ b->pre_data[0] = 1;
+ b->pre_data[1] = next_index;
+ }
+
+ /* Get frame to v4/6 output node */
+ f = tm->tx_frames[!is_ip4][thread_index];
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, next_index);
+ ASSERT (f);
+ tm->tx_frames[!is_ip4][thread_index] = f;
+ }
+ to_next = vlib_frame_vector_args (f);
+ to_next[f->n_vectors] = bi;
+ f->n_vectors += 1;
+ if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, next_index, f);
+ tm->tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+always_inline void
+tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4)
+{
+ tcp_enqueue_to_output_i (vm, b, bi, is_ip4, 0);
+}
+
+always_inline void
+tcp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_output_i (vm, b, bi, is_ip4, 1);
+}
+
+int
+tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
+ tcp_state_t state, u8 thread_index, u8 is_ip4)
+{
+ ip4_header_t *ih4;
+ ip6_header_t *ih6;
+ tcp_header_t *th0;
+ ip4_address_t src_ip40, dst_ip40;
+ ip6_address_t src_ip60, dst_ip60;
+ u16 src_port, dst_port;
+ u32 tmp;
+ u32 seq, ack;
+ u8 flags;
+
+ /* Find IP and TCP headers */
+ th0 = tcp_buffer_hdr (b0);
+
+ /* Save src and dst ip */
+ if (is_ip4)
+ {
+ ih4 = vlib_buffer_get_current (b0);
+ ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
+ src_ip40.as_u32 = ih4->src_address.as_u32;
+ dst_ip40.as_u32 = ih4->dst_address.as_u32;
+ }
+ else
+ {
+ ih6 = vlib_buffer_get_current (b0);
+ ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
+ clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
+ clib_memcpy (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t));
+ }
+
+ src_port = th0->src_port;
+ dst_port = th0->dst_port;
+
+ /* Try to determine what/why we're actually resetting */
+ if (state == TCP_STATE_CLOSED)
+ {
+ if (!tcp_syn (th0))
+ return -1;
+
+ tmp = clib_net_to_host_u32 (th0->seq_number);
+
+ /* Got a SYN for no listener. */
+ flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+ ack = clib_host_to_net_u32 (tmp + 1);
+ seq = 0;
+ }
+ else
+ {
+ flags = TCP_FLAG_RST;
+ seq = th0->ack_number;
+ ack = 0;
+ }
+
+ tcp_reuse_buffer (vm, b0);
+ th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
+ sizeof (tcp_header_t), flags, 0);
+
+ if (is_ip4)
+ {
+ ih4 = vlib_buffer_push_ip4 (vm, b0, &dst_ip40, &src_ip40,
+ IP_PROTOCOL_TCP, 1);
+ th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
+ }
+ else
+ {
+ int bogus = ~0;
+ ih6 = vlib_buffer_push_ip6 (vm, b0, &dst_ip60, &src_ip60,
+ IP_PROTOCOL_TCP);
+ th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
+ ASSERT (!bogus);
+ }
+
+ return 0;
+}
+
+/**
+ * Send reset without reusing existing buffer
+ *
+ * It extracts connection info out of original packet
+ */
+void
+tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4)
+{
+ vlib_buffer_t *b;
+ u32 bi;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u8 tcp_hdr_len, flags = 0;
+ tcp_header_t *th, *pkt_th;
+ u32 seq, ack;
+ ip4_header_t *ih4, *pkt_ih4;
+ ip6_header_t *ih6, *pkt_ih6;
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+
+ b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
+
+ /* Make and write options */
+ tcp_hdr_len = sizeof (tcp_header_t);
+
+ if (is_ip4)
+ {
+ pkt_ih4 = vlib_buffer_get_current (pkt);
+ pkt_th = ip4_next_header (pkt_ih4);
+ }
+ else
+ {
+ pkt_ih6 = vlib_buffer_get_current (pkt);
+ pkt_th = ip6_next_header (pkt_ih6);
+ }
+
+ if (tcp_ack (pkt_th))
+ {
+ flags = TCP_FLAG_RST;
+ seq = pkt_th->ack_number;
+ ack = (tc && tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
+ }
+ else
+ {
+ flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+ seq = 0;
+ ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
+ }
+
+ th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
+ seq, ack, tcp_hdr_len, flags, 0);
+
+ /* Swap src and dst ip */
+ if (is_ip4)
+ {
+ ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
+ ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
+ &pkt_ih4->src_address, IP_PROTOCOL_TCP, 1);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+ }
+ else
+ {
+ int bogus = ~0;
+ ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
+ 0x60);
+ ih6 = vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address,
+ &pkt_ih6->src_address, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+ ASSERT (!bogus);
+ }
+
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
+}
+
+/**
+ * Build and set reset packet for connection
+ */
+void
+tcp_send_reset (tcp_connection_t * tc)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_buffer_t *b;
+ u32 bi;
+ tcp_header_t *th;
+ u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
+ u8 flags;
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+ b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
+
+ tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
+ tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
+ advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
+ flags = TCP_FLAG_RST;
+ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
+ tc->rcv_nxt, tcp_hdr_opts_len, flags,
+ advertise_wnd);
+ opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
+ ASSERT (opts_write_len == tc->snd_opts_len);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ if (tc->c_is_ip4)
+ {
+ ip4_header_t *ih4;
+ ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
+ &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+ }
+ else
+ {
+ int bogus = ~0;
+ ip6_header_t *ih6;
+ ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
+ &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+ ASSERT (!bogus);
+ }
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
+}
+
+void
+tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ tcp_header_t *th = vlib_buffer_get_current (b);
+ vlib_main_t *vm = vlib_get_main ();
+ if (tc->c_is_ip4)
+ {
+ ip4_header_t *ih;
+ ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
+ &tc->c_rmt_ip4, IP_PROTOCOL_TCP, 1);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
+ }
+ else
+ {
+ ip6_header_t *ih;
+ int bogus = ~0;
+
+ ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6,
+ &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih, &bogus);
+ ASSERT (!bogus);
+ }
+}
+
+/**
+ * Send SYN
+ *
+ * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
+ * The packet is not forwarded through tcpx_output to avoid doing lookups
+ * in the half_open pool.
+ */
+void
+tcp_send_syn (tcp_connection_t * tc)
+{
+ vlib_buffer_t *b;
+ u32 bi;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+
+ b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
+ tcp_make_syn (tc, b);
+
+ /* Measure RTT with this */
+ tc->rtt_ts = tcp_time_now ();
+ tc->rtt_seq = tc->snd_nxt;
+ tc->rto_boff = 0;
+
+ /* Set the connection establishment timer */
+ tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
+
+ tcp_push_ip_hdr (tm, tc, b);
+ tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
+}
+
+/**
+ * Flush tx frame populated by retransmits and timer pops
+ */
+void
+tcp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+ if (tcp_main.tx_frames[!is_ip4][thread_index])
+ {
+ u32 next_index;
+ next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+ vlib_put_frame_to_node (vm, next_index,
+ tcp_main.tx_frames[!is_ip4][thread_index]);
+ tcp_main.tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+/**
+ * Flush ip lookup tx frames populated by timer pops
+ */
+always_inline void
+tcp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+ if (tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index])
+ {
+ u32 next_index;
+ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+ vlib_put_frame_to_node (vm, next_index,
+ tcp_main.ip_lookup_tx_frames[!is_ip4]
+ [thread_index]);
+ tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+/**
+ * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
+ */
+void
+tcp_flush_frames_to_output (u8 thread_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ tcp_flush_frame_to_output (vm, thread_index, 1);
+ tcp_flush_frame_to_output (vm, thread_index, 0);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 1);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 0);
+}
+
+/**
+ * Send FIN
+ */
+void
+tcp_send_fin (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_t *b;
+ u32 bi;
+ u8 fin_snt = 0;
+
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+ b = vlib_get_buffer (vm, bi);
+ fin_snt = tc->flags & TCP_CONN_FINSNT;
+ if (fin_snt)
+ tc->snd_nxt = tc->snd_una;
+ tcp_make_fin (tc, b);
+ tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
+ if (!fin_snt)
+ {
+ tc->flags |= TCP_CONN_FINSNT;
+ tc->flags &= ~TCP_CONN_FINPNDG;
+ /* Account for the FIN */
+ tc->snd_una_max += 1;
+ tc->snd_nxt = tc->snd_una_max;
+ }
+ tcp_retransmit_timer_force_update (tc);
+ TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
+}
+
+always_inline u8
+tcp_make_state_flags (tcp_connection_t * tc, tcp_state_t next_state)
+{
+ switch (next_state)
+ {
+ case TCP_STATE_ESTABLISHED:
+ return TCP_FLAG_ACK;
+ case TCP_STATE_SYN_RCVD:
+ return TCP_FLAG_SYN | TCP_FLAG_ACK;
+ case TCP_STATE_SYN_SENT:
+ return TCP_FLAG_SYN;
+ case TCP_STATE_LAST_ACK:
+ case TCP_STATE_FIN_WAIT_1:
+ if (tc->snd_nxt + 1 < tc->snd_una_max)
+ return TCP_FLAG_ACK;
+ else
+ return TCP_FLAG_FIN;
+ default:
+ clib_warning ("Shouldn't be here!");
+ }
+ return 0;
+}
+
+/**
+ * Push TCP header and update connection variables
+ */
+static void
+tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
+ tcp_state_t next_state, u8 compute_opts)
+{
+ u32 advertise_wnd, data_len;
+ u8 tcp_hdr_opts_len, opts_write_len, flags;
+ tcp_header_t *th;
+
+ data_len = b->current_length + b->total_length_not_including_first_buffer;
+ ASSERT (!b->total_length_not_including_first_buffer
+ || (b->flags & VLIB_BUFFER_NEXT_PRESENT));
+ vnet_buffer (b)->tcp.flags = 0;
+
+ if (compute_opts)
+ tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
+
+ tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
+ advertise_wnd = tcp_window_to_advertise (tc, next_state);
+ flags = tcp_make_state_flags (tc, next_state);
+
+ /* Push header and options */
+ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
+ tc->rcv_nxt, tcp_hdr_opts_len, flags,
+ advertise_wnd);
+ opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
+
+ ASSERT (opts_write_len == tc->snd_opts_len);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+
+ /*
+ * Update connection variables
+ */
+
+ tc->snd_nxt += data_len;
+ tc->rcv_las = tc->rcv_nxt;
+
+ /* TODO this is updated in output as well ... */
+ if (seq_gt (tc->snd_nxt, tc->snd_una_max))
+ {
+ tc->snd_una_max = tc->snd_nxt;
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
+ }
+
+ TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
+}
+
+void
+tcp_send_ack (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ vlib_buffer_t *b;
+ u32 bi;
+
+ /* Get buffer */
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+ b = vlib_get_buffer (vm, bi);
+
+ /* Fill in the ACK */
+ tcp_make_ack (tc, b);
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+}
+
+/**
+ * Delayed ack timer handler
+ *
+ * Sends delayed ACK when timer expires
+ */
+void
+tcp_timer_delack_handler (u32 index)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ tcp_connection_t *tc;
+
+ tc = tcp_connection_get (index, thread_index);
+ tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
+ tcp_send_ack (tc);
+}
+
+/**
+ * Build a retransmit segment
+ *
+ * @return the number of bytes in the segment or 0 if there's nothing to
+ * retransmit
+ */
+u32
+tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset,
+ u32 max_deq_bytes, vlib_buffer_t ** b)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ int n_bytes = 0;
+ u32 start, bi, available_bytes, seg_size;
+ u8 *data;
+
+ ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
+ ASSERT (max_deq_bytes != 0);
+
+ /*
+ * Make sure we can retransmit something
+ */
+ available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection);
+ ASSERT (available_bytes >= offset);
+ available_bytes -= offset;
+ if (!available_bytes)
+ return 0;
+ max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
+ max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
+
+ /* Start is beyond snd_congestion */
+ start = tc->snd_una + offset;
+ if (seq_geq (start, tc->snd_congestion))
+ goto done;
+
+ /* Don't overshoot snd_congestion */
+ if (seq_gt (start + max_deq_bytes, tc->snd_congestion))
+ {
+ max_deq_bytes = tc->snd_congestion - start;
+ if (max_deq_bytes == 0)
+ goto done;
+ }
+
+ seg_size = max_deq_bytes + MAX_HDRS_LEN;
+
+ /*
+ * Prepare options
+ */
+ tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
+
+ /*
+ * Allocate and fill in buffer(s)
+ */
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return 0;
+ *b = vlib_get_buffer (vm, bi);
+ data = tcp_init_buffer (vm, *b);
+
+ /* Easy case, buffer size greater than mss */
+ if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer))
+ {
+ n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
+ max_deq_bytes);
+ ASSERT (n_bytes == max_deq_bytes);
+ b[0]->current_length = n_bytes;
+ tcp_push_hdr_i (tc, *b, tc->state, 0);
+ }
+ /* Split mss into multiple buffers */
+ else
+ {
+ u32 chain_bi = ~0, n_bufs_per_seg;
+ u32 thread_index = vlib_get_thread_index ();
+ u16 n_peeked, len_to_deq, available_bufs;
+ vlib_buffer_t *chain_b, *prev_b;
+ int i;
+
+ n_bufs_per_seg = ceil ((double) seg_size / tm->bytes_per_buffer);
+
+ /* Make sure we have enough buffers */
+ available_bufs = vec_len (tm->tx_buffers[thread_index]);
+ if (n_bufs_per_seg > available_bufs)
+ {
+ if (tcp_alloc_tx_buffers (tm, thread_index,
+ VLIB_FRAME_SIZE - available_bufs))
+ {
+ tcp_return_buffer (tm);
+ *b = 0;
+ return 0;
+ }
+ }
+
+ n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
+ tm->bytes_per_buffer -
+ MAX_HDRS_LEN);
+ b[0]->current_length = n_bytes;
+ b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b[0]->total_length_not_including_first_buffer = 0;
+ max_deq_bytes -= n_bytes;
+
+ chain_b = *b;
+ for (i = 1; i < n_bufs_per_seg; i++)
+ {
+ prev_b = chain_b;
+ len_to_deq = clib_min (max_deq_bytes, tm->bytes_per_buffer);
+ tcp_get_free_buffer_index (tm, &chain_bi);
+ ASSERT (chain_bi != (u32) ~ 0);
+ chain_b = vlib_get_buffer (vm, chain_bi);
+ chain_b->current_data = 0;
+ data = vlib_buffer_get_current (chain_b);
+ n_peeked = stream_session_peek_bytes (&tc->connection, data,
+ offset + n_bytes, len_to_deq);
+ ASSERT (n_peeked == len_to_deq);
+ n_bytes += n_peeked;
+ chain_b->current_length = n_peeked;
+ chain_b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+ chain_b->next_buffer = 0;
+
+ /* update previous buffer */
+ prev_b->next_buffer = chain_bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ max_deq_bytes -= n_peeked;
+ b[0]->total_length_not_including_first_buffer += n_peeked;
+ }
+
+ tcp_push_hdr_i (tc, *b, tc->state, 0);
+ }
+
+ ASSERT (n_bytes > 0);
+ ASSERT (((*b)->current_data + (*b)->current_length) <=
+ tm->bytes_per_buffer);
+
+ if (tcp_in_fastrecovery (tc))
+ tc->snd_rxt_bytes += n_bytes;
+
+done:
+ TCP_EVT_DBG (TCP_EVT_CC_RTX, tc, offset, n_bytes);
+ return n_bytes;
+}
+
+/**
+ * Reset congestion control, switch cwnd to loss window and try again.
+ */
+static void
+tcp_rtx_timeout_cc (tcp_connection_t * tc)
+{
+ tc->prev_ssthresh = tc->ssthresh;
+ tc->prev_cwnd = tc->cwnd;
+
+ /* Cleanly recover cc (also clears up fast retransmit) */
+ if (tcp_in_fastrecovery (tc))
+ tcp_cc_fastrecovery_exit (tc);
+
+ /* Start again from the beginning */
+ tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
+ tc->cwnd = tcp_loss_wnd (tc);
+ tc->snd_congestion = tc->snd_una_max;
+ tc->rtt_ts = 0;
+ tcp_recovery_on (tc);
+}
+
+static void
+tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ tcp_connection_t *tc;
+ vlib_buffer_t *b = 0;
+ u32 bi, n_bytes;
+
+ if (is_syn)
+ {
+ tc = tcp_half_open_connection_get (index);
+ /* Note: the connection may have transitioned to ESTABLISHED... */
+ if (PREDICT_FALSE (tc == 0))
+ return;
+ tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
+ }
+ else
+ {
+ tc = tcp_connection_get (index, thread_index);
+ /* Note: the connection may have been closed and pool_put */
+ if (PREDICT_FALSE (tc == 0))
+ return;
+ tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
+ }
+
+ if (tc->state >= TCP_STATE_ESTABLISHED)
+ {
+ /* Lost FIN, retransmit and return */
+ if (tcp_is_lost_fin (tc))
+ {
+ tcp_send_fin (tc);
+ return;
+ }
+
+ /* We're not in recovery so make sure rto_boff is 0 */
+ if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
+ {
+ tc->rto_boff = 0;
+ tcp_update_rto (tc);
+ }
+
+ /* Increment RTO backoff (also equal to number of retries) and go back
+ * to first un-acked byte */
+ tc->rto_boff += 1;
+
+ /* First retransmit timeout */
+ if (tc->rto_boff == 1)
+ tcp_rtx_timeout_cc (tc);
+
+ tc->snd_nxt = tc->snd_una;
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
+
+ /* Send one segment. Note that n_bytes may be zero due to buffer shortfall */
+ n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
+
+ /* TODO be less aggressive about this */
+ scoreboard_clear (&tc->sack_sb);
+
+ if (n_bytes == 0)
+ {
+ ASSERT (!b);
+ if (tc->snd_una == tc->snd_una_max)
+ return;
+ ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion);
+ clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2);
+ /* Try again eventually */
+ tcp_retransmit_timer_set (tc);
+ return;
+ }
+
+ bi = vlib_get_buffer_index (vm, b);
+
+ /* For first retransmit, record timestamp (Eifel detection RFC3522) */
+ if (tc->rto_boff == 1)
+ tc->snd_rxt_ts = tcp_time_now ();
+
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ tcp_retransmit_timer_update (tc);
+ }
+ /* Retransmit for SYN */
+ else if (tc->state == TCP_STATE_SYN_SENT)
+ {
+ /* Half-open connection actually moved to established but we were
+ * waiting for syn retransmit to pop to call cleanup from the right
+ * thread. */
+ if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
+ {
+ if (tcp_half_open_connection_cleanup (tc))
+ {
+ clib_warning ("could not remove half-open connection");
+ ASSERT (0);
+ }
+ return;
+ }
+
+ /* Try without increasing RTO a number of times. If this fails,
+ * start growing RTO exponentially */
+ tc->rto_boff += 1;
+ if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+
+ b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
+ tcp_make_syn (tc, b);
+
+ tc->rtt_ts = 0;
+ TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, 0);
+
+ /* This goes straight to ipx_lookup. Retransmit timer set already */
+ tcp_push_ip_hdr (tm, tc, b);
+ tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ }
+ /* Retransmit SYN-ACK */
+ else if (tc->state == TCP_STATE_SYN_RCVD)
+ {
+ tc->rto_boff += 1;
+ if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ tc->rtt_ts = 0;
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+
+ b = vlib_get_buffer (vm, bi);
+ tcp_make_synack (tc, b);
+ TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, 1);
+
+ /* Retransmit timer already updated, just enqueue to output */
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ }
+ else
+ {
+ ASSERT (tc->state == TCP_STATE_CLOSED);
+ TCP_DBG ("connection state: %d", tc->state);
+ return;
+ }
+}
+
+void
+tcp_timer_retransmit_handler (u32 index)
+{
+ tcp_timer_retransmit_handler_i (index, 0);
+}
+
+void
+tcp_timer_retransmit_syn_handler (u32 index)
+{
+ tcp_timer_retransmit_handler_i (index, 1);
+}
+
+/**
+ * Got 0 snd_wnd from peer, try to do something about it.
+ *
+ */
+void
+tcp_timer_persist_handler (u32 index)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ tcp_connection_t *tc;
+ vlib_buffer_t *b;
+ u32 bi, max_snd_bytes, available_bytes, offset;
+ int n_bytes = 0;
+ u8 *data;
+
+ tc = tcp_connection_get_if_valid (index, thread_index);
+
+ if (!tc)
+ return;
+
+ /* Make sure timer handle is set to invalid */
+ tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
+
+ /* Problem already solved or worse */
+ if (tc->state == TCP_STATE_CLOSED || tc->state > TCP_STATE_ESTABLISHED
+ || tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc))
+ return;
+
+ available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection);
+ offset = tc->snd_una_max - tc->snd_una;
+
+ /* Reprogram persist if no new bytes available to send. We may have data
+ * next time */
+ if (!available_bytes)
+ {
+ tcp_persist_timer_set (tc);
+ return;
+ }
+
+ if (available_bytes <= offset)
+ {
+ ASSERT (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT));
+ return;
+ }
+
+ /* Increment RTO backoff */
+ tc->rto_boff += 1;
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+
+ /*
+ * Try to force the first unsent segment (or buffer)
+ */
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return;
+ b = vlib_get_buffer (vm, bi);
+ data = tcp_init_buffer (vm, b);
+
+ tcp_validate_txf_size (tc, offset);
+ tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
+ max_snd_bytes = clib_min (tc->snd_mss, tm->bytes_per_buffer - MAX_HDRS_LEN);
+ n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
+ max_snd_bytes);
+ b->current_length = n_bytes;
+ ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
+ || tc->snd_nxt == tc->snd_una_max
+ || tc->rto_boff > 1));
+
+ tcp_push_hdr_i (tc, b, tc->state, 0);
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+
+ /* Just sent new data, enable retransmit */
+ tcp_retransmit_timer_update (tc);
+}
+
+/**
+ * Retransmit first unacked segment
+ */
+void
+tcp_retransmit_first_unacked (tcp_connection_t * tc)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_t *b;
+ u32 bi, old_snd_nxt, n_bytes;
+
+ old_snd_nxt = tc->snd_nxt;
+ tc->snd_nxt = tc->snd_una;
+
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+ n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
+ if (!n_bytes)
+ return;
+ bi = vlib_get_buffer_index (vm, b);
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+
+ tc->snd_nxt = old_snd_nxt;
+}
+
+/**
+ * Do fast retransmit with SACKs
+ */
+void
+tcp_fast_retransmit_sack (tcp_connection_t * tc)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 n_written = 0, offset, max_bytes;
+ vlib_buffer_t *b = 0;
+ sack_scoreboard_hole_t *hole;
+ sack_scoreboard_t *sb;
+ u32 bi, old_snd_nxt;
+ int snd_space;
+ u8 snd_limited = 0, can_rescue = 0;
+
+ ASSERT (tcp_in_fastrecovery (tc));
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
+
+ old_snd_nxt = tc->snd_nxt;
+ sb = &tc->sack_sb;
+ snd_space = tcp_available_snd_space (tc);
+
+ hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
+ while (hole && snd_space > 0)
+ {
+ hole = scoreboard_next_rxt_hole (sb, hole,
+ tcp_fastrecovery_sent_1_smss (tc),
+ &can_rescue, &snd_limited);
+ if (!hole)
+ {
+ if (!can_rescue || !(seq_lt (sb->rescue_rxt, tc->snd_una)
+ || seq_gt (sb->rescue_rxt,
+ tc->snd_congestion)))
+ break;
+
+ /* If rescue rxt undefined or less than snd_una then one segment of
+ * up to SMSS octets that MUST include the highest outstanding
+ * unSACKed sequence number SHOULD be returned, and RescueRxt set to
+ * RecoveryPoint. HighRxt MUST NOT be updated.
+ */
+ max_bytes = clib_min (tc->snd_mss,
+ tc->snd_congestion - tc->snd_una);
+ max_bytes = clib_min (max_bytes, snd_space);
+ offset = tc->snd_congestion - tc->snd_una - max_bytes;
+ sb->rescue_rxt = tc->snd_congestion;
+ tc->snd_nxt = tc->snd_una + offset;
+ n_written = tcp_prepare_retransmit_segment (tc, offset, max_bytes,
+ &b);
+ ASSERT (n_written);
+ bi = vlib_get_buffer_index (vm, b);
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ break;
+ }
+
+ max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
+ max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
+ if (max_bytes == 0)
+ break;
+ offset = sb->high_rxt - tc->snd_una;
+ tc->snd_nxt = sb->high_rxt;
+ n_written = tcp_prepare_retransmit_segment (tc, offset, max_bytes, &b);
+
+ /* Nothing left to retransmit */
+ if (n_written == 0)
+ break;
+
+ bi = vlib_get_buffer_index (vm, b);
+ sb->high_rxt += n_written;
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ ASSERT (n_written <= snd_space);
+ snd_space -= n_written;
+ }
+
+ /* If window allows, send 1 SMSS of new data */
+ tc->snd_nxt = old_snd_nxt;
+}
+
+/**
+ * Fast retransmit without SACK info
+ */
+void
+tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 n_written = 0, offset = 0, bi, old_snd_nxt;
+ int snd_space;
+ vlib_buffer_t *b;
+
+ ASSERT (tcp_in_fastrecovery (tc));
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
+
+ /* Start resending from first un-acked segment */
+ old_snd_nxt = tc->snd_nxt;
+ tc->snd_nxt = tc->snd_una;
+ snd_space = tcp_available_snd_space (tc);
+
+ while (snd_space > 0)
+ {
+ offset += n_written;
+ n_written = tcp_prepare_retransmit_segment (tc, offset, snd_space, &b);
+
+ /* Nothing left to retransmit */
+ if (n_written == 0)
+ break;
+
+ bi = vlib_get_buffer_index (vm, b);
+ tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ snd_space -= n_written;
+ }
+
+ /* Restore snd_nxt. If window allows, send 1 SMSS of new data */
+ tc->snd_nxt = old_snd_nxt;
+}
+
+/**
+ * Do fast retransmit
+ */
+void
+tcp_fast_retransmit (tcp_connection_t * tc)
+{
+ if (tcp_opts_sack_permitted (&tc->rcv_opts)
+ && scoreboard_first_hole (&tc->sack_sb))
+ tcp_fast_retransmit_sack (tc);
+ else
+ tcp_fast_retransmit_no_sack (tc);
+}
+
+always_inline u32
+tcp_session_has_ooo_data (tcp_connection_t * tc)
+{
+ stream_session_t *s =
+ stream_session_get (tc->c_s_index, tc->c_thread_index);
+ return svm_fifo_has_ooo_data (s->server_rx_fifo);
+}
+
+always_inline uword
+tcp46_output_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ tcp_set_time_now (my_thread_index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_connection_t *tc0;
+ tcp_tx_trace_t *t0;
+ tcp_header_t *th0 = 0;
+ u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+ my_thread_index);
+ if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
+ {
+ error0 = TCP_ERROR_INVALID_CONNECTION;
+ next0 = TCP_OUTPUT_NEXT_DROP;
+ goto done;
+ }
+
+ th0 = vlib_buffer_get_current (b0);
+ TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
+
+ if (is_ip4)
+ {
+ vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
+ IP_PROTOCOL_TCP, 1);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
+ }
+ else
+ {
+ ip6_header_t *ih0;
+ ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
+ &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
+ }
+
+ /* Filter out DUPACKs if there are no OOO segments left */
+ if (PREDICT_FALSE
+ (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
+ {
+ if (!tcp_session_has_ooo_data (tc0))
+ {
+ error0 = TCP_ERROR_FILTERED_DUPACKS;
+ next0 = TCP_OUTPUT_NEXT_DROP;
+ goto done;
+ }
+ }
+
+ /* Stop DELACK timer and fix flags */
+ tc0->flags &= ~(TCP_CONN_SNDACK);
+ tcp_timer_reset (tc0, TCP_TIMER_DELACK);
+
+ /* If not retransmitting
+ * 1) update snd_una_max (SYN, SYNACK, FIN)
+ * 2) If we're not tracking an ACK, start tracking */
+ if (seq_lt (tc0->snd_una_max, tc0->snd_nxt))
+ {
+ tc0->snd_una_max = tc0->snd_nxt;
+ if (tc0->rtt_ts == 0)
+ {
+ tc0->rtt_ts = tcp_time_now ();
+ tc0->rtt_seq = tc0->snd_nxt;
+ }
+ }
+
+ /* Set the retransmit timer if not set already and not
+ * doing a pure ACK */
+ if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
+ && tc0->snd_nxt != tc0->snd_una)
+ {
+ tcp_retransmit_timer_set (tc0);
+ tc0->rto_boff = 0;
+ }
+
+#if 0
+ /* Make sure we haven't lost route to our peer */
+ if (PREDICT_FALSE (tc0->last_fib_check
+ < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD))
+ {
+ if (PREDICT_TRUE
+ (tc0->c_rmt_fei == tcp_lookup_rmt_in_fib (tc0)))
+ {
+ tc0->last_fib_check = tc0->snd_opts.tsval;
+ }
+ else
+ {
+ clib_warning ("lost connection to peer");
+ tcp_connection_reset (tc0);
+ goto done;
+ }
+ }
+
+ /* Use pre-computed dpo to set next node */
+ next0 = tc0->c_rmt_dpo.dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
+#endif
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ done:
+ b0->error = node->errors[error0];
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ if (th0)
+ {
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ }
+ else
+ {
+ memset (&t0->tcp_header, 0, sizeof (t0->tcp_header));
+ }
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_output_node) =
+{
+ .function = tcp4_output,.name = "tcp4-output",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
+ foreach_tcp4_output_next
+#undef _
+ },
+ .format_buffer = format_tcp_header,
+ .format_trace = format_tcp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_output_node) =
+{
+ .function = tcp6_output,
+ .name = "tcp6-output",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
+ foreach_tcp6_output_next
+#undef _
+ },
+ .format_buffer = format_tcp_header,
+ .format_trace = format_tcp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output);
+
+u32
+tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+ tcp_connection_t *tc;
+
+ tc = (tcp_connection_t *) tconn;
+ tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0);
+ ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
+
+ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
+ {
+ tc->rtt_ts = tcp_time_now ();
+ tc->rtt_seq = tc->snd_nxt;
+ }
+ return 0;
+}
+
+typedef enum _tcp_reset_next
+{
+ TCP_RESET_NEXT_DROP,
+ TCP_RESET_NEXT_IP_LOOKUP,
+ TCP_RESET_N_NEXT
+} tcp_reset_next_t;
+
+#define foreach_tcp4_reset_next \
+ _(DROP, "error-drop") \
+ _(IP_LOOKUP, "ip4-lookup")
+
+#define foreach_tcp6_reset_next \
+ _(DROP, "error-drop") \
+ _(IP_LOOKUP, "ip6-lookup")
+
+static uword
+tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 my_thread_index = vm->thread_index;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ tcp_tx_trace_t *t0;
+ tcp_header_t *th0;
+ u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
+ my_thread_index, is_ip4))
+ {
+ error0 = TCP_ERROR_LOOKUP_DROPS;
+ next0 = TCP_RESET_NEXT_DROP;
+ goto done;
+ }
+
+ /* Prepare to send to IP lookup */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
+ next0 = TCP_RESET_NEXT_IP_LOOKUP;
+
+ done:
+ b0->error = node->errors[error0];
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ th0 = vlib_buffer_get_current (b0);
+ if (is_ip4)
+ th0 = ip4_next_header ((ip4_header_t *) th0);
+ else
+ th0 = ip6_next_header ((ip6_header_t *) th0);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static uword
+tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_send_reset_inline (vm, node, from_frame, 1);
+}
+
+static uword
+tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return tcp46_send_reset_inline (vm, node, from_frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_reset_node) = {
+ .function = tcp4_send_reset,
+ .name = "tcp4-reset",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_RESET_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [TCP_RESET_NEXT_##s] = n,
+ foreach_tcp4_reset_next
+#undef _
+ },
+ .format_trace = format_tcp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_reset_node, tcp4_send_reset);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_reset_node) = {
+ .function = tcp6_send_reset,
+ .name = "tcp6-reset",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+ .n_next_nodes = TCP_RESET_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [TCP_RESET_NEXT_##s] = n,
+ foreach_tcp6_reset_next
+#undef _
+ },
+ .format_trace = format_tcp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_reset_node, tcp6_send_reset);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
new file mode 100644
index 00000000..9ccfe655
--- /dev/null
+++ b/src/vnet/tcp/tcp_packet.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_tcp_packet_h
+#define included_tcp_packet_h
+
+#include <vnet/vnet.h>
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag \
+ _ (FIN) /**< No more data from sender. */ \
+ _ (SYN) /**< Synchronize sequence numbers. */ \
+ _ (RST) /**< Reset the connection. */ \
+ _ (PSH) /**< Push function. */ \
+ _ (ACK) /**< Ack field significant. */ \
+ _ (URG) /**< Urgent pointer field significant. */ \
+ _ (ECE) /**< ECN-echo. Receiver got CE packet */ \
+ _ (CWR) /**< Sender reduced congestion window */
+
+enum
+{
+#define _(f) TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+ TCP_N_FLAG_BITS,
+};
+
+enum
+{
+#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+};
+
+typedef struct _tcp_header
+{
+ union
+ {
+ struct
+ {
+ u16 src_port; /**< Source port. */
+ u16 dst_port; /**< Destination port. */
+ };
+ struct
+ {
+ u16 src, dst;
+ };
+ };
+
+ u32 seq_number; /**< Sequence number of the first data octet in this
+ * segment, except when SYN is present. If SYN
+ * is present the seq number is is the ISN and the
+ * first data octet is ISN+1 */
+ u32 ack_number; /**< Acknowledgement number if ACK is set. It contains
+ * the value of the next sequence number the sender
+ * of the segment is expecting to receive. */
+ u8 data_offset_and_reserved;
+ u8 flags; /**< Flags: see the macro above */
+ u16 window; /**< Number of bytes sender is willing to receive. */
+
+ u16 checksum; /**< Checksum of TCP pseudo header and data. */
+ u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */
+} __attribute__ ((packed)) tcp_header_t;
+
+/* Flag tests that return 0 or !0 */
+#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
+#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
+#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
+#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
+#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
+#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
+#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
+#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
+#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
+
+/* Flag tests that return 0 or 1 */
+#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
+#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
+
+always_inline int
+tcp_header_bytes (tcp_header_t * t)
+{
+ return tcp_doff (t) * sizeof (u32);
+}
+
+/*
+ * TCP options.
+ */
+
+typedef enum tcp_option_type
+{
+ TCP_OPTION_EOL = 0, /**< End of options. */
+ TCP_OPTION_NOOP = 1, /**< No operation. */
+ TCP_OPTION_MSS = 2, /**< Limit MSS. */
+ TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */
+ TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */
+ TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */
+ TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */
+ TCP_OPTION_UTO = 28, /**< User timeout. */
+ TCP_OPTION_AO = 29, /**< Authentication Option. */
+} tcp_option_type_t;
+
+#define foreach_tcp_options_flag \
+ _ (MSS) /**< MSS advertised in SYN */ \
+ _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \
+ _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \
+ _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \
+ _ (SACK) /**< SACK present */
+
+enum
+{
+#define _(f) TCP_OPTS_FLAG_BIT_##f,
+ foreach_tcp_options_flag
+#undef _
+ TCP_OPTIONS_N_FLAG_BITS,
+};
+
+enum
+{
+#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
+ foreach_tcp_options_flag
+#undef _
+};
+
+typedef struct _sack_block
+{
+ u32 start; /**< Start sequence number */
+ u32 end; /**< End sequence number (first outside) */
+} sack_block_t;
+
+typedef struct
+{
+ u8 flags; /** Option flags, see above */
+
+ u16 mss; /**< Maximum segment size advertised */
+ u8 wscale; /**< Window scale advertised */
+ u32 tsval; /**< Timestamp value */
+ u32 tsecr; /**< Echoed/reflected time stamp */
+ sack_block_t *sacks; /**< SACK blocks */
+ u8 n_sack_blocks; /**< Number of SACKs blocks */
+} tcp_options_t;
+
+/* Flag tests that return 0 or !0 */
+#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
+#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
+#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
+#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
+#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
+
+/* TCP option lengths */
+#define TCP_OPTION_LEN_EOL 1
+#define TCP_OPTION_LEN_NOOP 1
+#define TCP_OPTION_LEN_MSS 4
+#define TCP_OPTION_LEN_WINDOW_SCALE 3
+#define TCP_OPTION_LEN_SACK_PERMITTED 2
+#define TCP_OPTION_LEN_TIMESTAMP 10
+#define TCP_OPTION_LEN_SACK_BLOCK 8
+
+#define TCP_HDR_LEN_MAX 60
+#define TCP_WND_MAX 65535U
+#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */
+#define TCP_OPTS_ALIGN 4
+#define TCP_OPTS_MAX_SACK_BLOCKS 3
+#endif /* included_tcp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c
new file mode 100644
index 00000000..3be4592c
--- /dev/null
+++ b/src/vnet/tcp/tcp_pg.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp_pg: TCP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag \
+ _ (FIN) \
+ _ (SYN) \
+ _ (RST) \
+ _ (PSH) \
+ _ (ACK) \
+ _ (URG) \
+ _ (ECE) \
+ _ (CWR)
+
+static void
+tcp_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, tcp_offset;
+
+ tcp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ tcp_header_t *tcp0;
+ ip_csum_t sum0;
+ u32 tcp_len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ tcp0 = (void *) (p0->data + tcp_offset);
+ tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+ /* Initialize checksum with header. */
+ if (BITS (sum0) == 32)
+ {
+ sum0 = clib_mem_unaligned (&ip0->src_address, u32);
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+ sum0 = ip_csum_with_carry
+ (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16)));
+
+ /* Invalidate possibly old checksum. */
+ tcp0->checksum = 0;
+
+ sum0 =
+ ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0);
+
+ tcp0->checksum = ~ip_csum_fold (sum0);
+ }
+}
+
+typedef struct
+{
+ pg_edit_t src, dst;
+ pg_edit_t seq_number, ack_number;
+ pg_edit_t data_offset_and_reserved;
+#define _(f) pg_edit_t f##_flag;
+ foreach_tcp_flag
+#undef _
+ pg_edit_t window;
+ pg_edit_t checksum;
+ pg_edit_t urgent_pointer;
+} pg_tcp_header_t;
+
+static inline void
+pg_tcp_header_init (pg_tcp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, tcp_header_t, f);
+ _(src);
+ _(dst);
+ _(seq_number);
+ _(ack_number);
+ _(window);
+ _(checksum);
+ _(urgent_pointer);
+#undef _
+
+ /* Initialize bit fields. */
+#define _(f) \
+ pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \
+ flags, \
+ TCP_FLAG_BIT_##f, 1);
+
+ foreach_tcp_flag
+#undef _
+ pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t,
+ data_offset_and_reserved, 4, 4);
+}
+
+uword
+unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_tcp_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
+ &group_index);
+ pg_tcp_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->seq_number, 0);
+ pg_edit_set_fixed (&p->ack_number, 0);
+
+ pg_edit_set_fixed (&p->data_offset_and_reserved,
+ sizeof (tcp_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->window, 4096);
+ pg_edit_set_fixed (&p->urgent_pointer, 0);
+
+#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+ foreach_tcp_flag
+#undef _
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "TCP: %U -> %U",
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->src,
+ unformat_pg_edit, unformat_tcp_udp_port, &p->dst))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "window %U",
+ unformat_pg_edit, unformat_pg_number, &p->window))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ else if (unformat (input, "seqnum %U", unformat_pg_edit,
+ unformat_pg_number, &p->seq_number))
+ ;
+ else if (unformat (input, "acknum %U", unformat_pg_edit,
+ unformat_pg_number, &p->ack_number))
+ ;
+ /* Flags. */
+#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+ foreach_tcp_flag
+#undef _
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ u16 dst_port;
+ tcp_udp_port_info_t *pi;
+
+ pi = 0;
+ if (p->dst.type == PG_EDIT_FIXED)
+ {
+ dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO);
+ pi = ip_get_tcp_udp_port_info (im, dst_port);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = tcp_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
new file mode 100644
index 00000000..9b2a8ac7
--- /dev/null
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+ f64 next_reset;
+ f64 reset_interval;
+ u8 *syn_counts;
+} syn_filter4_runtime_t;
+
+typedef struct
+{
+ u32 next_index;
+ int not_a_syn;
+ u8 filter_value;
+} syn_filter4_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_syn_filter4_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ syn_filter4_trace_t *t = va_arg (*args, syn_filter4_trace_t *);
+
+ s = format (s, "SYN_FILTER4: next index %d, %s",
+ t->next_index, t->not_a_syn ? "not a syn" : "syn");
+ if (t->not_a_syn == 0)
+ s = format (s, ", filter value %d\n", t->filter_value);
+ else
+ s = format (s, "\n");
+ return s;
+}
+
+static vlib_node_registration_t syn_filter4_node;
+
+#define foreach_syn_filter_error \
+_(THROTTLED, "TCP SYN packet throttle drops") \
+_(OK, "TCP SYN packets passed")
+
+typedef enum
+{
+#define _(sym,str) SYN_FILTER_ERROR_##sym,
+ foreach_syn_filter_error
+#undef _
+ SYN_FILTER_N_ERROR,
+} syn_filter_error_t;
+
+static char *syn_filter4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_syn_filter_error
+#undef _
+};
+
+typedef enum
+{
+ SYN_FILTER_NEXT_DROP,
+ SYN_FILTER_N_NEXT,
+} syn_filter_next_t;
+
+extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
+
+static uword
+syn_filter4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ syn_filter_next_t next_index;
+ u32 ok_syn_packets = 0;
+ vnet_feature_main_t *fm = &feature_main;
+ u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+ vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
+ syn_filter4_runtime_t *rt = (syn_filter4_runtime_t *) node->runtime_data;
+ f64 now = vlib_time_now (vm);
+ /* Shut up spurious gcc warnings. */
+ u8 *c0 = 0, *c1 = 0, *c2 = 0, *c3 = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (now > rt->next_reset)
+ {
+ memset (rt->syn_counts, 0, vec_len (rt->syn_counts));
+ rt->next_reset = now + rt->reset_interval;
+ }
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ ip4_header_t *ip0, *ip1, *ip2, *ip3;
+ tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
+ u32 not_a_syn0 = 1, not_a_syn1 = 1, not_a_syn2 = 1, not_a_syn3 = 1;
+ u64 hash0, hash1, hash2, hash3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ vnet_get_config_data
+ (&cm->config_main, &b0->current_config_index,
+ &next0, 0 /* sizeof (c0[0]) */ );
+ vnet_get_config_data
+ (&cm->config_main, &b1->current_config_index,
+ &next1, 0 /* sizeof (c0[0]) */ );
+ vnet_get_config_data
+ (&cm->config_main, &b2->current_config_index,
+ &next2, 0 /* sizeof (c0[0]) */ );
+ vnet_get_config_data
+ (&cm->config_main, &b3->current_config_index,
+ &next3, 0 /* sizeof (c0[0]) */ );
+
+ /* Not TCP? */
+ ip0 = vlib_buffer_get_current (b0);
+ if (ip0->protocol != IP_PROTOCOL_TCP)
+ goto trace00;
+
+ tcp0 = ip4_next_header (ip0);
+ /*
+ * Not a SYN?
+ * $$$$ hack: the TCP bitfield flags seem not to compile
+ * correct code.
+ */
+ if (PREDICT_TRUE (!(tcp0->flags & 0x2)))
+ goto trace00;
+
+ not_a_syn0 = 0;
+ hash0 = clib_xxhash ((u64) ip0->src_address.as_u32);
+ c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)];
+ if (PREDICT_FALSE (*c0 >= 0x80))
+ {
+ next0 = SYN_FILTER_NEXT_DROP;
+ b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+ goto trace00;
+ }
+ *c0 += 1;
+ ok_syn_packets++;
+
+ trace00:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ syn_filter4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->not_a_syn = not_a_syn0;
+ t->next_index = next0;
+ t->filter_value = not_a_syn0 ? 0 : *c0;
+ }
+
+ /* Not TCP? */
+ ip1 = vlib_buffer_get_current (b1);
+ if (ip1->protocol != IP_PROTOCOL_TCP)
+ goto trace01;
+
+ tcp1 = ip4_next_header (ip1);
+ /*
+ * Not a SYN?
+ * $$$$ hack: the TCP bitfield flags seem not to compile
+ * correct code.
+ */
+ if (PREDICT_TRUE (!(tcp1->flags & 0x2)))
+ goto trace01;
+
+ not_a_syn1 = 0;
+ hash1 = clib_xxhash ((u64) ip1->src_address.as_u32);
+ c1 = &rt->syn_counts[hash1 & (_vec_len (rt->syn_counts) - 1)];
+ if (PREDICT_FALSE (*c1 >= 0x80))
+ {
+ next1 = SYN_FILTER_NEXT_DROP;
+ b1->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+ goto trace01;
+ }
+ *c1 += 1;
+ ok_syn_packets++;
+
+ trace01:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ syn_filter4_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->not_a_syn = not_a_syn1;
+ t->next_index = next1;
+ t->filter_value = not_a_syn1 ? 0 : *c1;
+ }
+
+ /* Not TCP? */
+ ip2 = vlib_buffer_get_current (b2);
+ if (ip2->protocol != IP_PROTOCOL_TCP)
+ goto trace02;
+
+ tcp2 = ip4_next_header (ip2);
+ /*
+ * Not a SYN?
+ * $$$$ hack: the TCP bitfield flags seem not to compile
+ * correct code.
+ */
+ if (PREDICT_TRUE (!(tcp2->flags & 0x2)))
+ goto trace02;
+
+ not_a_syn2 = 0;
+ hash2 = clib_xxhash ((u64) ip2->src_address.as_u32);
+ c2 = &rt->syn_counts[hash2 & (_vec_len (rt->syn_counts) - 1)];
+ if (PREDICT_FALSE (*c2 >= 0x80))
+ {
+ next2 = SYN_FILTER_NEXT_DROP;
+ b2->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+ goto trace02;
+ }
+ *c2 += 1;
+ ok_syn_packets++;
+
+ trace02:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b2->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ syn_filter4_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->not_a_syn = not_a_syn2;
+ t->next_index = next2;
+ t->filter_value = not_a_syn2 ? 0 : *c2;
+ }
+
+ /* Not TCP? */
+ ip3 = vlib_buffer_get_current (b3);
+ if (ip3->protocol != IP_PROTOCOL_TCP)
+ goto trace03;
+
+ tcp3 = ip4_next_header (ip3);
+ /*
+ * Not a SYN?
+ * $$$$ hack: the TCP bitfield flags seem not to compile
+ * correct code.
+ */
+ if (PREDICT_TRUE (!(tcp3->flags & 0x2)))
+ goto trace03;
+
+ not_a_syn3 = 0;
+ hash3 = clib_xxhash ((u64) ip3->src_address.as_u32);
+ c3 = &rt->syn_counts[hash3 & (_vec_len (rt->syn_counts) - 1)];
+ if (PREDICT_FALSE (*c3 >= 0x80))
+ {
+ next3 = SYN_FILTER_NEXT_DROP;
+ b3->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+ goto trace03;
+ }
+ *c3 += 1;
+ ok_syn_packets++;
+
+ trace03:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b3->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ syn_filter4_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->not_a_syn = not_a_syn3;
+ t->next_index = next3;
+ t->filter_value = not_a_syn3 ? 0 : *c3;
+ }
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip4_header_t *ip0;
+ tcp_header_t *tcp0;
+ u32 not_a_syn0 = 1;
+ u32 hash0;
+ u8 *c0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vnet_get_config_data
+ (&cm->config_main, &b0->current_config_index,
+ &next0, 0 /* sizeof (c0[0]) */ );
+
+ /* Not TCP? */
+ ip0 = vlib_buffer_get_current (b0);
+ if (ip0->protocol != IP_PROTOCOL_TCP)
+ goto trace0;
+
+ tcp0 = ip4_next_header (ip0);
+ /*
+ * Not a SYN?
+ * $$$$ hack: the TCP bitfield flags seem not to compile
+ * correct code.
+ */
+ if (PREDICT_TRUE (!(tcp0->flags & 0x2)))
+ goto trace0;
+
+ not_a_syn0 = 0;
+ hash0 = clib_xxhash ((u64) ip0->src_address.as_u32);
+ c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)];
+ if (PREDICT_FALSE (*c0 >= 0x80))
+ {
+ next0 = SYN_FILTER_NEXT_DROP;
+ b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+ goto trace0;
+ }
+ *c0 += 1;
+ ok_syn_packets++;
+
+ trace0:
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ syn_filter4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->not_a_syn = not_a_syn0;
+ t->next_index = next0;
+ t->filter_value = not_a_syn0 ? 0 : *c0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, syn_filter4_node.index,
+ SYN_FILTER_ERROR_OK, ok_syn_packets);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (syn_filter4_node, static) =
+{
+ .function = syn_filter4_node_fn,
+ .name = "syn-filter-4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_syn_filter4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .runtime_data_bytes = sizeof (syn_filter4_runtime_t),
+ .n_errors = ARRAY_LEN(syn_filter4_error_strings),
+ .error_strings = syn_filter4_error_strings,
+
+ .n_next_nodes = SYN_FILTER_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SYN_FILTER_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (syn_filter4_node, syn_filter4_node_fn);
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (syn_filter_4, static) =
+{
+ .arc_name = "ip4-local",
+ .node_name = "syn-filter-4",
+ .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
+};
+/* *INDENT-ON* */
+
+int
+syn_filter_enable_disable (u32 sw_if_index, int enable_disable)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw;
+ int rv = 0;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ /* Not a physical port? */
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (enable_disable)
+ {
+ syn_filter4_runtime_t *rt;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main ({
+ rt = vlib_node_get_runtime_data (this_vlib_main, syn_filter4_node.index);
+ vec_validate (rt->syn_counts, 1023);
+ /*
+ * Given perfect disperson / optimal hashing results:
+ * Allow 128k (successful) syns/sec. 1024, buckets each of which
+ * absorb 128 syns before filtering. Reset table once a second.
+ * Reality bites, lets try resetting once every 100ms.
+ */
+ rt->reset_interval = 0.1; /* reset interval in seconds */
+ });
+ /* *INDENT-ON* */
+ }
+
+ rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4",
+ sw_if_index, enable_disable, 0, 0);
+
+ return rv;
+}
+
+static clib_error_t *
+syn_filter_enable_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ int enable_disable = 1;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "disable"))
+ enable_disable = 0;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ rv = syn_filter_enable_disable (sw_if_index, enable_disable);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return
+ (0, "Invalid interface, only works on physical ports");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0,
+ "Device driver doesn't support redirection");
+ break;
+
+ case VNET_API_ERROR_INVALID_VALUE:
+ return clib_error_return (0, "feature arc not found");
+
+ case VNET_API_ERROR_INVALID_VALUE_2:
+ return clib_error_return (0, "feature node not found");
+
+ default:
+ return clib_error_return (0, "syn_filter_enable_disable returned %d",
+ rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_content_command, static) =
+{
+ .path = "ip syn filter",
+ .short_help = "ip syn filter <interface-name> [disable]",
+ .function = syn_filter_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c
new file mode 100644
index 00000000..37640cc6
--- /dev/null
+++ b/src/vnet/tcp/tcp_test.c
@@ -0,0 +1,1764 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/tcp/tcp.h>
+
+#define TCP_TEST_I(_cond, _comment, _args...) \
+({ \
+ int _evald = (_cond); \
+ if (!(_evald)) { \
+ fformat(stderr, "FAIL:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } else { \
+ fformat(stderr, "PASS:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } \
+ _evald; \
+})
+
+#define TCP_TEST(_cond, _comment, _args...) \
+{ \
+ if (!TCP_TEST_I(_cond, _comment, ##_args)) { \
+ return 1; \
+ } \
+}
+
+/* *INDENT-OFF* */
+scoreboard_trace_elt_t sb_trace[] = {};
+/* *INDENT-ON* */
+
+static int
+tcp_test_scoreboard_replay (vlib_main_t * vm, unformat_input_t * input)
+{
+ int verbose = 0;
+ tcp_connection_t _tc, *tc = &_tc;
+ u8 *s = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "detail"))
+ verbose = 1;
+ else
+ {
+ clib_error_t *e = clib_error_return
+ (0, "unknown input `%U'", format_unformat_error, input);
+ clib_error_report (e);
+ return -1;
+ }
+ }
+
+#if TCP_SCOREBOARD_TRACE
+ tc->sack_sb.trace = sb_trace;
+#endif
+ s = tcp_scoreboard_replay (s, tc, verbose);
+ vlib_cli_output (vm, "%v", s);
+ return 0;
+}
+
+static int
+tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
+{
+ tcp_connection_t _tc, *tc = &_tc;
+ sack_scoreboard_t *sb = &tc->sack_sb;
+ sack_block_t *sacks = 0, block;
+ sack_scoreboard_hole_t *hole;
+ int i, verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "replay"))
+ return tcp_test_scoreboard_replay (vm, input);
+ }
+
+ memset (tc, 0, sizeof (*tc));
+
+ tc->snd_una = 0;
+ tc->snd_una_max = 1000;
+ tc->snd_nxt = 1000;
+ tc->rcv_opts.flags |= TCP_OPTS_FLAG_SACK;
+ scoreboard_init (&tc->sack_sb);
+
+ for (i = 0; i < 1000 / 100; i++)
+ {
+ block.start = i * 100;
+ block.end = (i + 1) * 100;
+ vec_add1 (sacks, block);
+ }
+
+ /*
+ * Inject even blocks
+ */
+
+ for (i = 0; i < 1000 / 200; i++)
+ {
+ vec_add1 (tc->rcv_opts.sacks, sacks[i * 2]);
+ }
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+ tcp_rcv_sacks (tc, 0);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb after even blocks:\n%U", format_tcp_scoreboard,
+ sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 5),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+
+ /* First SACK block should be rejected */
+ hole = scoreboard_first_hole (sb);
+ TCP_TEST ((hole->start == 0 && hole->end == 200),
+ "first hole start %u end %u", hole->start, hole->end);
+ hole = scoreboard_last_hole (sb);
+ TCP_TEST ((hole->start == 900 && hole->end == 1000),
+ "last hole start %u end %u", hole->start, hole->end);
+ TCP_TEST ((sb->sacked_bytes == 400), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
+ TCP_TEST ((sb->last_sacked_bytes == 400),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+ TCP_TEST ((sb->high_sacked == 900), "max byte sacked %u", sb->high_sacked);
+ /*
+ * Inject odd blocks
+ */
+
+ vec_reset_length (tc->rcv_opts.sacks);
+ for (i = 0; i < 1000 / 200; i++)
+ {
+ vec_add1 (tc->rcv_opts.sacks, sacks[i * 2 + 1]);
+ }
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+ tcp_rcv_sacks (tc, 0);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb after odd blocks:\n%U", format_tcp_scoreboard,
+ sb);
+
+ hole = scoreboard_first_hole (sb);
+ TCP_TEST ((pool_elts (sb->holes) == 1),
+ "scoreboard has %d holes", pool_elts (sb->holes));
+ TCP_TEST ((hole->start == 0 && hole->end == 100),
+ "first hole start %u end %u", hole->start, hole->end);
+ TCP_TEST ((sb->sacked_bytes == 900), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
+ TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked);
+ TCP_TEST ((sb->last_sacked_bytes == 500),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+
+ /*
+ * Ack until byte 100, all bytes are now acked + sacked
+ */
+ tcp_rcv_sacks (tc, 100);
+ if (verbose)
+ vlib_cli_output (vm, "ack until byte 100:\n%U", format_tcp_scoreboard,
+ sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 0),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+ TCP_TEST ((sb->snd_una_adv == 900),
+ "snd_una_adv after ack %u", sb->snd_una_adv);
+ TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked);
+ TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->last_sacked_bytes == 0),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+
+ /*
+ * Add new block
+ */
+
+ vec_reset_length (tc->rcv_opts.sacks);
+
+ block.start = 1200;
+ block.end = 1300;
+ vec_add1 (tc->rcv_opts.sacks, block);
+
+ if (verbose)
+ vlib_cli_output (vm, "add [1200, 1300]:\n%U", format_tcp_scoreboard, sb);
+ tc->snd_una_max = 1500;
+ tc->snd_una = 1000;
+ tc->snd_nxt = 1500;
+ tcp_rcv_sacks (tc, 1000);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb snd_una_max 1500, snd_una 1000:\n%U",
+ format_tcp_scoreboard, sb);
+
+ TCP_TEST ((sb->snd_una_adv == 0),
+ "snd_una_adv after ack %u", sb->snd_una_adv);
+ TCP_TEST ((pool_elts (sb->holes) == 2),
+ "scoreboard has %d holes", pool_elts (sb->holes));
+ hole = scoreboard_first_hole (sb);
+ TCP_TEST ((hole->start == 1000 && hole->end == 1200),
+ "first hole start %u end %u", hole->start, hole->end);
+ TCP_TEST ((sb->snd_una_adv == 0),
+ "snd_una_adv after ack %u", sb->snd_una_adv);
+ TCP_TEST ((sb->high_sacked == 1300), "max sacked byte %u", sb->high_sacked);
+ hole = scoreboard_last_hole (sb);
+ TCP_TEST ((hole->start == 1300 && hole->end == 1500),
+ "last hole start %u end %u", hole->start, hole->end);
+ TCP_TEST ((sb->sacked_bytes == 100), "sacked bytes %d", sb->sacked_bytes);
+
+ /*
+ * Ack first hole
+ */
+
+ vec_reset_length (tc->rcv_opts.sacks);
+ tcp_rcv_sacks (tc, 1200);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb ack up to byte 1200:\n%U", format_tcp_scoreboard,
+ sb);
+
+ TCP_TEST ((sb->snd_una_adv == 100),
+ "snd_una_adv after ack %u", sb->snd_una_adv);
+ TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((pool_elts (sb->holes) == 1),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+ hole = scoreboard_first_hole (sb);
+ TCP_TEST ((hole->prev == TCP_INVALID_SACK_HOLE_INDEX
+ && hole->next == TCP_INVALID_SACK_HOLE_INDEX), "hole is valid");
+ TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d",
+ sb->last_bytes_delivered);
+
+ /*
+ * Add some more blocks and then remove all
+ */
+ vec_reset_length (tc->rcv_opts.sacks);
+ tc->snd_una += sb->snd_una_adv;
+ tc->snd_una_max = 1900;
+ for (i = 0; i < 5; i++)
+ {
+ block.start = i * 100 + 1200;
+ block.end = (i + 1) * 100 + 1200;
+ vec_add1 (tc->rcv_opts.sacks, block);
+ }
+ tcp_rcv_sacks (tc, 1900);
+
+ scoreboard_clear (sb);
+ if (verbose)
+ vlib_cli_output (vm, "sb cleared all:\n%U", format_tcp_scoreboard, sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 0),
+ "number of holes %d", pool_elts (sb->holes));
+ TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head);
+ TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail);
+
+ /*
+ * Re-inject odd blocks and ack them all
+ */
+
+ tc->snd_una = 0;
+ tc->snd_una_max = 1000;
+ tc->snd_nxt = 1000;
+ for (i = 0; i < 5; i++)
+ {
+ vec_add1 (tc->rcv_opts.sacks, sacks[i * 2 + 1]);
+ }
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+ tcp_rcv_sacks (tc, 0);
+ if (verbose)
+ vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U",
+ format_tcp_scoreboard, sb);
+
+ tcp_rcv_sacks (tc, 950);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U",
+ format_tcp_scoreboard, sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 0),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+ TCP_TEST ((sb->snd_una_adv == 50), "snd_una_adv %u", sb->snd_una_adv);
+ TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->last_sacked_bytes == 0),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+
+ /*
+ * Inject one block, ack it and overlap hole
+ */
+
+ tc->snd_una = 0;
+ tc->snd_una_max = 1000;
+ tc->snd_nxt = 1000;
+
+ block.start = 100;
+ block.end = 500;
+ vec_add1 (tc->rcv_opts.sacks, block);
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+
+ tcp_rcv_sacks (tc, 0);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb added [100, 500]:\n%U",
+ format_tcp_scoreboard, sb);
+
+ tcp_rcv_sacks (tc, 800);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb ack [0, 800]:\n%U", format_tcp_scoreboard, sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 1),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+ TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
+ TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->last_sacked_bytes == 0),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+ TCP_TEST ((sb->last_bytes_delivered == 400),
+ "last bytes delivered %d", sb->last_bytes_delivered);
+
+ /*
+ * One hole close to head, patch head, split in two and start acking
+ * the lowest part
+ */
+ scoreboard_clear (sb);
+ tc->snd_una = 0;
+ tc->snd_una_max = 1000;
+ tc->snd_nxt = 1000;
+
+ block.start = 500;
+ block.end = 1000;
+ vec_add1 (tc->rcv_opts.sacks, block);
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+
+ tcp_rcv_sacks (tc, 0);
+ if (verbose)
+ vlib_cli_output (vm, "sb added [500, 1000]:\n%U",
+ format_tcp_scoreboard, sb);
+
+ vec_reset_length (tc->rcv_opts.sacks);
+ block.start = 300;
+ block.end = 400;
+ vec_add1 (tc->rcv_opts.sacks, block);
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+ tcp_rcv_sacks (tc, 100);
+ if (verbose)
+ vlib_cli_output (vm, "sb added [0, 100] [300, 400]:\n%U",
+ format_tcp_scoreboard, sb);
+ TCP_TEST ((pool_elts (sb->holes) == 2),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+
+ tc->snd_una = 100;
+ tcp_rcv_sacks (tc, 200);
+ tcp_rcv_sacks (tc, 300);
+ if (verbose)
+ vlib_cli_output (vm, "sb added [0, 300]:\n%U", format_tcp_scoreboard, sb);
+ TCP_TEST ((sb->sacked_bytes == 500), "sacked bytes %d", sb->sacked_bytes);
+
+ return 0;
+}
+
+static int
+tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input)
+{
+ tcp_connection_t _tc, *tc = &_tc;
+ sack_block_t *sacks;
+ int i, verbose = 0, expected;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ input);
+ return -1;
+ }
+ }
+
+ memset (tc, 0, sizeof (*tc));
+
+ /*
+ * Add odd sack block pairs
+ */
+ for (i = 1; i < 10; i += 2)
+ {
+ tcp_update_sack_list (tc, i * 100, (i + 1) * 100);
+ }
+
+ TCP_TEST ((vec_len (tc->snd_sacks) == 5), "sack blocks %d expected %d",
+ vec_len (tc->snd_sacks), 5);
+ TCP_TEST ((tc->snd_sacks[0].start = 900),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 900);
+
+ /*
+ * Try to add one extra
+ */
+ sacks = vec_dup (tc->snd_sacks);
+
+ tcp_update_sack_list (tc, 1100, 1200);
+ if (verbose)
+ vlib_cli_output (vm, "add new segment [1100, 1200]\n%U",
+ format_tcp_sacks, tc);
+ expected = 5 < TCP_MAX_SACK_BLOCKS ? 6 : 5;
+ TCP_TEST ((vec_len (tc->snd_sacks) == expected),
+ "sack blocks %d expected %d", vec_len (tc->snd_sacks), expected);
+ TCP_TEST ((tc->snd_sacks[0].start == 1100),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 1100);
+
+ /* restore */
+ vec_free (tc->snd_sacks);
+ tc->snd_sacks = sacks;
+
+ /*
+ * Overlap first 2 segment
+ */
+ tc->rcv_nxt = 300;
+ tcp_update_sack_list (tc, 300, 300);
+ if (verbose)
+ vlib_cli_output (vm, "overlap first 2 segments:\n%U",
+ format_tcp_sacks, tc);
+ TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d",
+ vec_len (tc->snd_sacks), 3);
+ TCP_TEST ((tc->snd_sacks[0].start == 900),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 500);
+
+ /*
+ * Add a new segment
+ */
+ tcp_update_sack_list (tc, 1100, 1200);
+ if (verbose)
+ vlib_cli_output (vm, "add new segment [1100, 1200]\n%U",
+ format_tcp_sacks, tc);
+ TCP_TEST ((vec_len (tc->snd_sacks) == 4), "sack blocks %d expected %d",
+ vec_len (tc->snd_sacks), 4);
+ TCP_TEST ((tc->snd_sacks[0].start == 1100),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 1100);
+
+ /*
+ * Join middle segments
+ */
+ tcp_update_sack_list (tc, 800, 900);
+ if (verbose)
+ vlib_cli_output (vm, "join middle segments [800, 900]\n%U",
+ format_tcp_sacks, tc);
+
+ TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d",
+ vec_len (tc->snd_sacks), 3);
+ TCP_TEST ((tc->snd_sacks[0].start == 700),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 1100);
+
+ /*
+ * Advance rcv_nxt to overlap all
+ */
+ tc->rcv_nxt = 1200;
+ tcp_update_sack_list (tc, 1200, 1200);
+ if (verbose)
+ vlib_cli_output (vm, "advance rcv_nxt to 1200\n%U", format_tcp_sacks, tc);
+ TCP_TEST ((vec_len (tc->snd_sacks) == 0), "sack blocks %d expected %d",
+ vec_len (tc->snd_sacks), 0);
+
+
+ /*
+ * Add 2 blocks, overwrite first and update rcv_nxt to also remove it
+ */
+
+ vec_reset_length (tc->snd_sacks);
+ tc->rcv_nxt = 0;
+
+ tcp_update_sack_list (tc, 100, 200);
+ tcp_update_sack_list (tc, 300, 400);
+
+ if (verbose)
+ vlib_cli_output (vm, "add [100, 200] [300, 400]\n%U",
+ format_tcp_sacks, tc);
+ TCP_TEST ((vec_len (tc->snd_sacks) == 2),
+ "sack blocks %d expected %d", vec_len (tc->snd_sacks), 2);
+ TCP_TEST ((tc->snd_sacks[0].start == 300),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 300);
+
+ tc->rcv_nxt = 100;
+ tcp_update_sack_list (tc, 100, 100);
+ if (verbose)
+ vlib_cli_output (vm, "add [100, 200] rcv_nxt = 100\n%U",
+ format_tcp_sacks, tc);
+ TCP_TEST ((vec_len (tc->snd_sacks) == 1),
+ "sack blocks %d expected %d", vec_len (tc->snd_sacks), 1);
+ TCP_TEST ((tc->snd_sacks[0].start == 300),
+ "first sack block start %u expected %u", tc->snd_sacks[0].start,
+ 300);
+ return 0;
+}
+
+static int
+tcp_test_sack (vlib_main_t * vm, unformat_input_t * input)
+{
+ int res = 0;
+
+ /* Run all tests */
+ if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT)
+ {
+ if (tcp_test_sack_tx (vm, input))
+ {
+ return -1;
+ }
+
+ if (tcp_test_sack_rx (vm, input))
+ {
+ return -1;
+ }
+ }
+ else
+ {
+ if (unformat (input, "tx"))
+ {
+ res = tcp_test_sack_tx (vm, input);
+ }
+ else if (unformat (input, "rx"))
+ {
+ res = tcp_test_sack_rx (vm, input);
+ }
+ }
+
+ return res;
+}
+
+
+typedef struct
+{
+ u32 offset;
+ u32 len;
+} test_pattern_t;
+
+/* *INDENT-OFF* */
+test_pattern_t test_pattern[] = {
+ {380, 8}, {768, 8}, {1156, 8}, {1544, 8}, {1932, 8}, {2320, 8}, {2708, 8},
+ {2992, 8}, {372, 8}, {760, 8}, {1148, 8}, {1536, 8}, {1924, 8}, {2312, 8},
+ {2700, 8}, {2984, 8}, {364, 8}, {752, 8}, {1140, 8}, {1528, 8}, {1916, 8},
+ {2304, 8}, {2692, 8}, {2976, 8}, {356, 8}, {744, 8}, {1132, 8}, {1520, 8},
+ {1908, 8}, {2296, 8}, {2684, 8}, {2968, 8}, {348, 8}, {736, 8}, {1124, 8},
+ {1512, 8}, {1900, 8}, {2288, 8}, {2676, 8}, {2960, 8}, {340, 8}, {728, 8},
+ {1116, 8}, {1504, 8}, {1892, 8}, {2280, 8}, {2668, 8}, {2952, 8}, {332, 8},
+ {720, 8}, {1108, 8}, {1496, 8}, {1884, 8}, {2272, 8}, {2660, 8}, {2944, 8},
+ {324, 8}, {712, 8}, {1100, 8}, {1488, 8}, {1876, 8}, {2264, 8}, {2652, 8},
+ {2936, 8}, {316, 8}, {704, 8}, {1092, 8}, {1480, 8}, {1868, 8}, {2256, 8},
+ {2644, 8}, {2928, 8}, {308, 8}, {696, 8}, {1084, 8}, {1472, 8}, {1860, 8},
+ {2248, 8}, {2636, 8}, {2920, 8}, {300, 8}, {688, 8}, {1076, 8}, {1464, 8},
+ {1852, 8}, {2240, 8}, {2628, 8}, {2912, 8}, {292, 8}, {680, 8}, {1068, 8},
+ {1456, 8}, {1844, 8}, {2232, 8}, {2620, 8}, {2904, 8}, {284, 8}, {672, 8},
+ {1060, 8}, {1448, 8}, {1836, 8}, {2224, 8}, {2612, 8}, {2896, 8}, {276, 8},
+ {664, 8}, {1052, 8}, {1440, 8}, {1828, 8}, {2216, 8}, {2604, 8}, {2888, 8},
+ {268, 8}, {656, 8}, {1044, 8}, {1432, 8}, {1820, 8}, {2208, 8}, {2596, 8},
+ {2880, 8}, {260, 8}, {648, 8}, {1036, 8}, {1424, 8}, {1812, 8}, {2200, 8},
+ {2588, 8}, {2872, 8}, {252, 8}, {640, 8}, {1028, 8}, {1416, 8}, {1804, 8},
+ {2192, 8}, {2580, 8}, {2864, 8}, {244, 8}, {632, 8}, {1020, 8}, {1408, 8},
+ {1796, 8}, {2184, 8}, {2572, 8}, {2856, 8}, {236, 8}, {624, 8}, {1012, 8},
+ {1400, 8}, {1788, 8}, {2176, 8}, {2564, 8}, {2848, 8}, {228, 8}, {616, 8},
+ {1004, 8}, {1392, 8}, {1780, 8}, {2168, 8}, {2556, 8}, {2840, 8}, {220, 8},
+ {608, 8}, {996, 8}, {1384, 8}, {1772, 8}, {2160, 8}, {2548, 8}, {2832, 8},
+ {212, 8}, {600, 8}, {988, 8}, {1376, 8}, {1764, 8}, {2152, 8}, {2540, 8},
+ {2824, 8}, {204, 8}, {592, 8}, {980, 8}, {1368, 8}, {1756, 8}, {2144, 8},
+ {2532, 8}, {2816, 8}, {196, 8}, {584, 8}, {972, 8}, {1360, 8}, {1748, 8},
+ {2136, 8}, {2524, 8}, {2808, 8}, {188, 8}, {576, 8}, {964, 8}, {1352, 8},
+ {1740, 8}, {2128, 8}, {2516, 8}, {2800, 8}, {180, 8}, {568, 8}, {956, 8},
+ {1344, 8}, {1732, 8}, {2120, 8}, {2508, 8}, {2792, 8}, {172, 8}, {560, 8},
+ {948, 8}, {1336, 8}, {1724, 8}, {2112, 8}, {2500, 8}, {2784, 8}, {164, 8},
+ {552, 8}, {940, 8}, {1328, 8}, {1716, 8}, {2104, 8}, {2492, 8}, {2776, 8},
+ {156, 8}, {544, 8}, {932, 8}, {1320, 8}, {1708, 8}, {2096, 8}, {2484, 8},
+ {2768, 8}, {148, 8}, {536, 8}, {924, 8}, {1312, 8}, {1700, 8}, {2088, 8},
+ {2476, 8}, {2760, 8}, {140, 8}, {528, 8}, {916, 8}, {1304, 8}, {1692, 8},
+ {2080, 8}, {2468, 8}, {2752, 8}, {132, 8}, {520, 8}, {908, 8}, {1296, 8},
+ {1684, 8}, {2072, 8}, {2460, 8}, {2744, 8}, {124, 8}, {512, 8}, {900, 8},
+ {1288, 8}, {1676, 8}, {2064, 8}, {2452, 8}, {2736, 8}, {116, 8}, {504, 8},
+ {892, 8}, {1280, 8}, {1668, 8}, {2056, 8}, {2444, 8}, {2728, 8}, {108, 8},
+ {496, 8}, {884, 8}, {1272, 8}, {1660, 8}, {2048, 8}, {2436, 8}, {2720, 8},
+ {100, 8}, {488, 8}, {876, 8}, {1264, 8}, {1652, 8}, {2040, 8}, {2428, 8},
+ {2716, 4}, {92, 8}, {480, 8}, {868, 8}, {1256, 8}, {1644, 8}, {2032, 8},
+ {2420, 8}, {84, 8}, {472, 8}, {860, 8}, {1248, 8}, {1636, 8}, {2024, 8},
+ {2412, 8}, {76, 8}, {464, 8}, {852, 8}, {1240, 8}, {1628, 8}, {2016, 8},
+ {2404, 8}, {68, 8}, {456, 8}, {844, 8}, {1232, 8}, {1620, 8}, {2008, 8},
+ {2396, 8}, {60, 8}, {448, 8}, {836, 8}, {1224, 8}, {1612, 8}, {2000, 8},
+ {2388, 8}, {52, 8}, {440, 8}, {828, 8}, {1216, 8}, {1604, 8}, {1992, 8},
+ {2380, 8}, {44, 8}, {432, 8}, {820, 8}, {1208, 8}, {1596, 8}, {1984, 8},
+ {2372, 8}, {36, 8}, {424, 8}, {812, 8}, {1200, 8}, {1588, 8}, {1976, 8},
+ {2364, 8}, {28, 8}, {416, 8}, {804, 8}, {1192, 8}, {1580, 8}, {1968, 8},
+ {2356, 8}, {20, 8}, {408, 8}, {796, 8}, {1184, 8}, {1572, 8}, {1960, 8},
+ {2348, 8}, {12, 8}, {400, 8}, {788, 8}, {1176, 8}, {1564, 8}, {1952, 8},
+ {2340, 8}, {4, 8}, {392, 8}, {780, 8}, {1168, 8}, {1556, 8}, {1944, 8},
+ {2332, 8},
+ /* missing from original data set */
+ {388, 4}, {776, 4}, {1164, 4}, {1552, 4}, {1940, 4}, {2328, 4},
+};
+/* *INDENT-ON* */
+
+int
+pattern_cmp (const void *arg1, const void *arg2)
+{
+ test_pattern_t *a1 = (test_pattern_t *) arg1;
+ test_pattern_t *a2 = (test_pattern_t *) arg2;
+
+ if (a1->offset < a2->offset)
+ return -1;
+ else if (a1->offset > a2->offset)
+ return 1;
+ return 0;
+}
+
+static u8
+fifo_validate_pattern (vlib_main_t * vm, test_pattern_t * pattern,
+ u32 pattern_length)
+{
+ test_pattern_t *tp = pattern;
+ int i;
+
+ /* Go through the pattern and make 100% sure it's sane */
+ for (i = 0; i < pattern_length - 1; i++)
+ {
+ if (tp->offset + tp->len != (tp + 1)->offset)
+ {
+ vlib_cli_output (vm, "[%d] missing {%d, %d}", i,
+ (tp->offset + tp->len),
+ (tp + 1)->offset - (tp->offset + tp->len));
+ return 0;
+ }
+ tp++;
+ }
+ return 1;
+}
+
+static test_pattern_t *
+fifo_get_validate_pattern (vlib_main_t * vm, test_pattern_t * test_data,
+ u32 test_data_len)
+{
+ test_pattern_t *validate_pattern = 0;
+
+ /* Validate, and try segments in order... */
+ vec_validate (validate_pattern, test_data_len - 1);
+ memcpy (validate_pattern, test_data,
+ test_data_len * sizeof (test_pattern_t));
+ qsort ((u8 *) validate_pattern, test_data_len, sizeof (test_pattern_t),
+ pattern_cmp);
+
+ if (fifo_validate_pattern (vm, validate_pattern, test_data_len) == 0)
+ return 0;
+
+ return validate_pattern;
+}
+
+static svm_fifo_t *
+fifo_prepare (u32 fifo_size)
+{
+ svm_fifo_t *f;
+ f = svm_fifo_create (fifo_size);
+
+ /* Paint fifo data vector with -1's */
+ memset (f->data, 0xFF, fifo_size);
+
+ return f;
+}
+
+static int
+compare_data (u8 * data1, u8 * data2, u32 start, u32 len, u32 * index)
+{
+ int i;
+
+ for (i = start; i < len; i++)
+ {
+ if (data1[i] != data2[i])
+ {
+ *index = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
+{
+ svm_fifo_t *f;
+ u32 fifo_size = 1 << 20;
+ u32 *test_data = 0;
+ u32 offset;
+ int i, rv, verbose = 0;
+ u32 data_word, test_data_len, j;
+ ooo_segment_t *ooo_seg;
+ u8 *data, *s, *data_buf = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ }
+
+ test_data_len = fifo_size / sizeof (u32);
+ vec_validate (test_data, test_data_len - 1);
+
+ for (i = 0; i < vec_len (test_data); i++)
+ test_data[i] = i;
+
+ f = fifo_prepare (fifo_size);
+
+ /*
+ * Enqueue an initial (un-dequeued) chunk
+ */
+ rv = svm_fifo_enqueue_nowait (f, sizeof (u32), (u8 *) test_data);
+ TCP_TEST ((rv == sizeof (u32)), "enqueued %d", rv);
+ TCP_TEST ((f->tail == 4), "fifo tail %u", f->tail);
+
+ /*
+ * Create 3 chunks in the future. The offsets are relative
+ * to the current fifo tail
+ */
+ for (i = 0; i < 3; i++)
+ {
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
+ data = (u8 *) (test_data + (2 * i + 1));
+ if (i == 0)
+ {
+ rv = svm_fifo_enqueue_nowait (f, sizeof (u32), data);
+ rv = rv > 0 ? 0 : rv;
+ }
+ else
+ rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
+ if (verbose)
+ vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i + 1, offset,
+ offset + sizeof (u32));
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ goto err;
+ }
+ }
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo after odd segs: %U", format_svm_fifo, f, 1);
+
+ TCP_TEST ((f->tail == 8), "fifo tail %u", f->tail);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 2),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ /*
+ * Try adding a completely overlapped segment
+ */
+ offset = 3 * sizeof (u32) - f->tail;
+ data = (u8 *) (test_data + 3);
+ rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ goto err;
+ }
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo after overlap seg: %U", format_svm_fifo, f, 1);
+
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 2),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ /*
+ * Make sure format functions are not buggy
+ */
+ s = format (0, "%U", format_svm_fifo, f, 2);
+ vec_free (s);
+
+ /*
+ * Paint some of missing data backwards
+ */
+ for (i = 3; i > 1; i--)
+ {
+ offset = (2 * i + 0) * sizeof (u32) - f->tail;
+ data = (u8 *) (test_data + (2 * i + 0));
+ rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
+ if (verbose)
+ vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i, offset,
+ offset + sizeof (u32));
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ goto err;
+ }
+ }
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo before missing link: %U", format_svm_fifo, f,
+ 1);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == 12),
+ "first ooo seg position %u", ooo_seg->start);
+ TCP_TEST ((ooo_seg->length == 16),
+ "first ooo seg length %u", ooo_seg->length);
+
+ /*
+ * Enqueue the missing u32
+ */
+ rv = svm_fifo_enqueue_nowait (f, sizeof (u32), (u8 *) (test_data + 2));
+ if (verbose)
+ vlib_cli_output (vm, "fifo after missing link: %U", format_svm_fifo, f,
+ 1);
+ TCP_TEST ((rv == 20), "bytes to be enqueued %u", rv);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ /*
+ * Collect results
+ */
+ for (i = 0; i < 7; i++)
+ {
+ rv = svm_fifo_dequeue_nowait (f, sizeof (u32), (u8 *) & data_word);
+ if (rv != sizeof (u32))
+ {
+ clib_warning ("bytes dequeues %u", rv);
+ goto err;
+ }
+ if (data_word != test_data[i])
+ {
+ clib_warning ("recovered [%d] %d not %d", i, data_word,
+ test_data[i]);
+ goto err;
+ }
+ }
+
+ /*
+ * Test segment overlaps: last ooo segment overlaps all
+ */
+ svm_fifo_free (f);
+ f = fifo_prepare (fifo_size);
+
+ for (i = 0; i < 4; i++)
+ {
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
+ data = (u8 *) (test_data + (2 * i + 1));
+ rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
+ if (verbose)
+ vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i + 1, offset,
+ offset + sizeof (u32));
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ goto err;
+ }
+ }
+
+ rv = svm_fifo_enqueue_with_offset (f, 8 - f->tail, 21, data);
+ TCP_TEST ((rv == 0), "ooo enqueued %u", rv);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ vec_validate (data_buf, vec_len (data));
+ svm_fifo_peek (f, 0, vec_len (data), data_buf);
+ if (compare_data (data_buf, data, 8, vec_len (data), &j))
+ {
+ TCP_TEST (0, "[%d] peeked %u expected %u", j, data_buf[j], data[j]);
+ }
+ vec_reset_length (data_buf);
+
+ /*
+ * Test segment overlaps: enqueue and overlap ooo segments
+ */
+ svm_fifo_free (f);
+ f = fifo_prepare (fifo_size);
+
+ for (i = 0; i < 4; i++)
+ {
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
+ data = (u8 *) (test_data + (2 * i + 1));
+ rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
+ if (verbose)
+ vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i + 1, offset,
+ offset + sizeof (u32));
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ goto err;
+ }
+ }
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo after enqueue: %U", format_svm_fifo, f, 1);
+
+ rv = svm_fifo_enqueue_nowait (f, 29, data);
+ if (verbose)
+ vlib_cli_output (vm, "fifo after enqueueing 29: %U", format_svm_fifo, f,
+ 1);
+ TCP_TEST ((rv == 32), "ooo enqueued %u", rv);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ vec_validate (data_buf, vec_len (data));
+ svm_fifo_peek (f, 0, vec_len (data), data_buf);
+ if (compare_data (data_buf, data, 0, vec_len (data), &j))
+ {
+ TCP_TEST (0, "[%d] peeked %u expected %u", j, data_buf[j], data[j]);
+ }
+
+ /* Try to peek beyond the data */
+ rv = svm_fifo_peek (f, svm_fifo_max_dequeue (f), vec_len (data), data_buf);
+ TCP_TEST ((rv == 0), "peeked %u expected 0", rv);
+
+ vec_free (data_buf);
+ svm_fifo_free (f);
+ vec_free (test_data);
+
+ return 0;
+
+err:
+ svm_fifo_free (f);
+ vec_free (test_data);
+ return -1;
+}
+
+static int
+tcp_test_fifo2 (vlib_main_t * vm)
+{
+ svm_fifo_t *f;
+ u32 fifo_size = 1 << 20;
+ int i, rv, test_data_len;
+ u64 data64;
+ test_pattern_t *tp, *vp, *test_data;
+ ooo_segment_t *ooo_seg;
+
+ test_data = test_pattern;
+ test_data_len = ARRAY_LEN (test_pattern);
+
+ vp = fifo_get_validate_pattern (vm, test_data, test_data_len);
+
+ /* Create a fifo */
+ f = fifo_prepare (fifo_size);
+
+ /*
+ * Try with sorted data
+ */
+ for (i = 0; i < test_data_len; i++)
+ {
+ tp = vp + i;
+ data64 = tp->offset;
+ svm_fifo_enqueue_with_offset (f, tp->offset - f->tail, tp->len,
+ (u8 *) & data64);
+ }
+
+ /* Expected result: one big fat chunk at offset 4 */
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == 4),
+ "first ooo seg position %u", ooo_seg->start);
+ TCP_TEST ((ooo_seg->length == 2996),
+ "first ooo seg length %u", ooo_seg->length);
+
+ data64 = 0;
+ rv = svm_fifo_enqueue_nowait (f, sizeof (u32), (u8 *) & data64);
+ TCP_TEST ((rv == 3000), "bytes to be enqueued %u", rv);
+
+ svm_fifo_free (f);
+ vec_free (vp);
+
+ /*
+ * Now try it again w/ unsorted data...
+ */
+
+ f = fifo_prepare (fifo_size);
+
+ for (i = 0; i < test_data_len; i++)
+ {
+ tp = &test_data[i];
+ data64 = tp->offset;
+ rv = svm_fifo_enqueue_with_offset (f, tp->offset - f->tail, tp->len,
+ (u8 *) & data64);
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ }
+ }
+
+ /* Expecting the same result: one big fat chunk at offset 4 */
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == 4),
+ "first ooo seg position %u", ooo_seg->start);
+ TCP_TEST ((ooo_seg->length == 2996),
+ "first ooo seg length %u", ooo_seg->length);
+
+ data64 = 0;
+ rv = svm_fifo_enqueue_nowait (f, sizeof (u32), (u8 *) & data64);
+
+ TCP_TEST ((rv == 3000), "bytes to be enqueued %u", rv);
+
+ svm_fifo_free (f);
+
+ return 0;
+}
+
+static int
+tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input)
+{
+ svm_fifo_t *f;
+ u32 fifo_size = 4 << 10;
+ u32 fifo_initial_offset = 0;
+ u32 total_size = 2 << 10;
+ int overlap = 0, verbose = 0, randomize = 1, drop = 0, in_seq_all = 0;
+ u8 *data_pattern = 0, *data_buf = 0;
+ test_pattern_t *tp, *generate = 0;
+ u32 nsegs = 2, seg_size, length_so_far;
+ u32 current_offset, offset_increment, len_this_chunk;
+ u32 seed = 0xdeaddabe, j;
+ int i, rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "fifo-size %d", &fifo_size))
+ ;
+ else if (unformat (input, "total-size %d", &total_size))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "overlap"))
+ overlap = 1;
+ else if (unformat (input, "initial-offset %d", &fifo_initial_offset))
+ ;
+ else if (unformat (input, "seed %d", &seed))
+ ;
+ else if (unformat (input, "nsegs %d", &nsegs))
+ ;
+ else if (unformat (input, "no-randomize"))
+ randomize = 0;
+ else if (unformat (input, "in-seq-all"))
+ in_seq_all = 1;
+ else if (unformat (input, "drop"))
+ drop = 1;
+ else
+ {
+ clib_error_t *e = clib_error_return
+ (0, "unknown input `%U'", format_unformat_error, input);
+ clib_error_report (e);
+ return -1;
+ }
+ }
+
+ if (total_size > fifo_size)
+ {
+ clib_warning ("total_size %d greater than fifo size %d", total_size,
+ fifo_size);
+ return -1;
+ }
+ if (overlap && randomize == 0)
+ {
+ clib_warning ("Can't enqueue in-order with overlap");
+ return -1;
+ }
+
+ /*
+ * Generate data
+ */
+ vec_validate (data_pattern, total_size - 1);
+ for (i = 0; i < vec_len (data_pattern); i++)
+ data_pattern[i] = i & 0xff;
+
+ /*
+ * Generate segments
+ */
+ seg_size = total_size / nsegs;
+ length_so_far = 0;
+ current_offset = randomize;
+ while (length_so_far < total_size)
+ {
+ vec_add2 (generate, tp, 1);
+ len_this_chunk = clib_min (seg_size, total_size - length_so_far);
+ tp->offset = current_offset;
+ tp->len = len_this_chunk;
+
+ if (overlap && (len_this_chunk == seg_size))
+ do
+ {
+ offset_increment = len_this_chunk
+ % (1 + (random_u32 (&seed) % len_this_chunk));
+ }
+ while (offset_increment == 0);
+ else
+ offset_increment = len_this_chunk;
+
+ current_offset += offset_increment;
+ length_so_far = tp->offset + tp->len;
+ }
+
+ /*
+ * Validate segment list. Only valid for non-overlap cases.
+ */
+ if (overlap == 0)
+ fifo_validate_pattern (vm, generate, vec_len (generate));
+
+ if (verbose)
+ {
+ vlib_cli_output (vm, "raw data pattern:");
+ for (i = 0; i < vec_len (generate); i++)
+ {
+ vlib_cli_output (vm, "[%d] offset %u len %u", i,
+ generate[i].offset, generate[i].len);
+ }
+ }
+
+ /* Randomize data pattern */
+ if (randomize)
+ {
+ for (i = 0; i < vec_len (generate) / 2; i++)
+ {
+ u32 src_index, dst_index;
+ test_pattern_t _tmp, *tmp = &_tmp;
+
+ src_index = random_u32 (&seed) % vec_len (generate);
+ dst_index = random_u32 (&seed) % vec_len (generate);
+
+ tmp[0] = generate[dst_index];
+ generate[dst_index] = generate[src_index];
+ generate[src_index] = tmp[0];
+ }
+ if (verbose)
+ {
+ vlib_cli_output (vm, "randomized data pattern:");
+ for (i = 0; i < vec_len (generate); i++)
+ {
+ vlib_cli_output (vm, "[%d] offset %u len %u", i,
+ generate[i].offset, generate[i].len);
+ }
+ }
+ }
+
+ /*
+ * Create a fifo and add segments
+ */
+ f = fifo_prepare (fifo_size);
+
+ /* manually set head and tail pointers to validate modular arithmetic */
+ fifo_initial_offset = fifo_initial_offset % fifo_size;
+ f->head = fifo_initial_offset;
+ f->tail = fifo_initial_offset;
+
+ for (i = !randomize; i < vec_len (generate); i++)
+ {
+ tp = generate + i;
+ svm_fifo_enqueue_with_offset (f,
+ fifo_initial_offset + tp->offset -
+ f->tail, tp->len,
+ (u8 *) data_pattern + tp->offset);
+ }
+
+ /* Add the first segment in order for non random data */
+ if (!randomize)
+ svm_fifo_enqueue_nowait (f, generate[0].len, (u8 *) data_pattern);
+
+ /*
+ * Expected result: one big fat chunk at offset 1 if randomize == 1
+ */
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo before missing link: %U",
+ format_svm_fifo, f, 1 /* verbose */ );
+
+ /*
+ * Add the missing byte if segments were randomized
+ */
+ if (randomize)
+ {
+ u32 bytes_to_enq = 1;
+ if (in_seq_all)
+ bytes_to_enq = total_size;
+ rv = svm_fifo_enqueue_nowait (f, bytes_to_enq, data_pattern + 0);
+
+ if (verbose)
+ vlib_cli_output (vm, "in-order enqueue returned %d", rv);
+
+ TCP_TEST ((rv == total_size), "enqueued %u expected %u", rv,
+ total_size);
+
+ }
+
+ TCP_TEST ((svm_fifo_has_ooo_data (f) == 0), "number of ooo segments %u",
+ svm_fifo_number_ooo_segments (f));
+
+ /*
+ * Test if peeked data is the same as original data
+ */
+ vec_validate (data_buf, vec_len (data_pattern));
+ svm_fifo_peek (f, 0, vec_len (data_pattern), data_buf);
+ if (compare_data (data_buf, data_pattern, 0, vec_len (data_pattern), &j))
+ {
+ TCP_TEST (0, "[%d] peeked %u expected %u", j, data_buf[j],
+ data_pattern[j]);
+ }
+ vec_reset_length (data_buf);
+
+ /*
+ * Dequeue or drop all data
+ */
+ if (drop)
+ {
+ svm_fifo_dequeue_drop (f, vec_len (data_pattern));
+ }
+ else
+ {
+ svm_fifo_dequeue_nowait (f, vec_len (data_pattern), data_buf);
+ if (compare_data
+ (data_buf, data_pattern, 0, vec_len (data_pattern), &j))
+ {
+ TCP_TEST (0, "[%d] dequeued %u expected %u", j, data_buf[j],
+ data_pattern[j]);
+ }
+ }
+
+ TCP_TEST ((svm_fifo_max_dequeue (f) == 0), "fifo has %d bytes",
+ svm_fifo_max_dequeue (f));
+
+ svm_fifo_free (f);
+ vec_free (data_pattern);
+ vec_free (data_buf);
+
+ return 0;
+}
+
+static int
+tcp_test_fifo4 (vlib_main_t * vm, unformat_input_t * input)
+{
+ svm_fifo_t *f;
+ u32 fifo_size = 6 << 10;
+ u32 fifo_initial_offset = 1000000000;
+ u32 test_n_bytes = 5000, j;
+ u8 *test_data = 0, *data_buf = 0;
+ int i, rv, verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ clib_error_t *e = clib_error_return
+ (0, "unknown input `%U'", format_unformat_error, input);
+ clib_error_report (e);
+ return -1;
+ }
+ }
+
+ /*
+ * Create a fifo and add segments
+ */
+ f = fifo_prepare (fifo_size);
+
+ /* Set head and tail pointers */
+ fifo_initial_offset = fifo_initial_offset % fifo_size;
+ svm_fifo_init_pointers (f, fifo_initial_offset);
+
+ vec_validate (test_data, test_n_bytes - 1);
+ for (i = 0; i < vec_len (test_data); i++)
+ test_data[i] = i;
+
+ for (i = test_n_bytes - 1; i > 0; i--)
+ {
+ rv = svm_fifo_enqueue_with_offset (f, fifo_initial_offset + i - f->tail,
+ sizeof (u8), &test_data[i]);
+ if (verbose)
+ vlib_cli_output (vm, "add [%d] [%d, %d]", i, i, i + sizeof (u8));
+ if (rv)
+ {
+ clib_warning ("enqueue returned %d", rv);
+ svm_fifo_free (f);
+ vec_free (test_data);
+ return -1;
+ }
+ }
+
+ svm_fifo_enqueue_nowait (f, sizeof (u8), &test_data[0]);
+
+ vec_validate (data_buf, vec_len (test_data));
+
+ svm_fifo_dequeue_nowait (f, vec_len (test_data), data_buf);
+ rv = compare_data (data_buf, test_data, 0, vec_len (test_data), &j);
+ if (rv)
+ vlib_cli_output (vm, "[%d] dequeued %u expected %u", j, data_buf[j],
+ test_data[j]);
+ TCP_TEST ((rv == 0), "dequeued compared to original returned %d", rv);
+
+ svm_fifo_free (f);
+ vec_free (test_data);
+ return 0;
+}
+
+static u32
+fifo_pos (svm_fifo_t * f, u32 pos)
+{
+ return pos % f->nitems;
+}
+
+static int
+tcp_test_fifo5 (vlib_main_t * vm, unformat_input_t * input)
+{
+ svm_fifo_t *f;
+ u32 fifo_size = 400, j = 0, offset = 200;
+ int i, rv, verbose = 0;
+ u8 *test_data = 0, *data_buf = 0;
+ ooo_segment_t *ooo_seg;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ clib_error_t *e = clib_error_return
+ (0, "unknown input `%U'", format_unformat_error, input);
+ clib_error_report (e);
+ return -1;
+ }
+ }
+
+ f = fifo_prepare (fifo_size);
+ svm_fifo_init_pointers (f, offset);
+
+ vec_validate (test_data, 399);
+ for (i = 0; i < vec_len (test_data); i++)
+ test_data[i] = i % 0xff;
+
+ /*
+ * Start with [100, 200] and [300, 400]
+ */
+ svm_fifo_enqueue_with_offset (f, 100, 100, &test_data[100]);
+ svm_fifo_enqueue_with_offset (f, 300, 100, &test_data[300]);
+
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 2),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ TCP_TEST ((f->ooos_newest == 1), "newest %u", f->ooos_newest);
+ if (verbose)
+ vlib_cli_output (vm, "fifo after [100, 200] and [300, 400] : %U",
+ format_svm_fifo, f, 2 /* verbose */ );
+
+ /*
+ * Add [225, 275]
+ */
+
+ rv = svm_fifo_enqueue_with_offset (f, 225, 50, &test_data[200]);
+ if (verbose)
+ vlib_cli_output (vm, "fifo after [225, 275] : %U",
+ format_svm_fifo, f, 2 /* verbose */ );
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 3),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == fifo_pos (f, 100 + offset)),
+ "first seg start %u expected %u", ooo_seg->start,
+ fifo_pos (f, 100 + offset));
+ TCP_TEST ((ooo_seg->length == 100), "first seg length %u expected %u",
+ ooo_seg->length, 100);
+ ooo_seg = ooo_segment_next (f, ooo_seg);
+ TCP_TEST ((ooo_seg->start == fifo_pos (f, 225 + offset)),
+ "second seg start %u expected %u",
+ ooo_seg->start, fifo_pos (f, 225 + offset));
+ TCP_TEST ((ooo_seg->length == 50), "second seg length %u expected %u",
+ ooo_seg->length, 50);
+ ooo_seg = ooo_segment_next (f, ooo_seg);
+ TCP_TEST ((ooo_seg->start == fifo_pos (f, 300 + offset)),
+ "third seg start %u expected %u",
+ ooo_seg->start, fifo_pos (f, 300 + offset));
+ TCP_TEST ((ooo_seg->length == 100), "third seg length %u expected %u",
+ ooo_seg->length, 100);
+ TCP_TEST ((f->ooos_newest == 2), "newest %u", f->ooos_newest);
+ /*
+ * Add [190, 310]
+ */
+ rv = svm_fifo_enqueue_with_offset (f, 190, 120, &test_data[190]);
+ if (verbose)
+ vlib_cli_output (vm, "fifo after [190, 310] : %U",
+ format_svm_fifo, f, 1 /* verbose */ );
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == fifo_pos (f, offset + 100)),
+ "first seg start %u expected %u",
+ ooo_seg->start, fifo_pos (f, offset + 100));
+ TCP_TEST ((ooo_seg->length == 300), "first seg length %u expected %u",
+ ooo_seg->length, 300);
+
+ /*
+ * Add [0, 150]
+ */
+ rv = svm_fifo_enqueue_nowait (f, 150, test_data);
+
+ if (verbose)
+ vlib_cli_output (vm, "fifo after [0 150] : %U", format_svm_fifo, f,
+ 2 /* verbose */ );
+
+ TCP_TEST ((rv == 400), "managed to enqueue %u expected %u", rv, 400);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+
+ vec_validate (data_buf, 399);
+ svm_fifo_peek (f, 0, 400, data_buf);
+ if (compare_data (data_buf, test_data, 0, 400, &j))
+ {
+ TCP_TEST (0, "[%d] peeked %u expected %u", j, data_buf[j],
+ test_data[j]);
+ }
+
+ /*
+ * Add [100 200] and overlap it with [50 250]
+ */
+ svm_fifo_free (f);
+ f = fifo_prepare (fifo_size);
+
+ svm_fifo_enqueue_with_offset (f, 100, 100, &test_data[100]);
+ svm_fifo_enqueue_with_offset (f, 50, 200, &test_data[50]);
+ TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
+ "number of ooo segments %u", svm_fifo_number_ooo_segments (f));
+ ooo_seg = svm_fifo_first_ooo_segment (f);
+ TCP_TEST ((ooo_seg->start == 50), "first seg start %u expected %u",
+ ooo_seg->start, 50);
+ TCP_TEST ((ooo_seg->length == 200), "first seg length %u expected %u",
+ ooo_seg->length, 200);
+
+ svm_fifo_free (f);
+ vec_free (test_data);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+svm_fifo_trace_elem_t fifo_trace[] = {};
+/* *INDENT-ON* */
+
+static int
+tcp_test_fifo_replay (vlib_main_t * vm, unformat_input_t * input)
+{
+ svm_fifo_t f;
+ int verbose = 0;
+ u8 no_read = 0, *str = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "no-read"))
+ no_read = 1;
+ else
+ {
+ clib_error_t *e = clib_error_return
+ (0, "unknown input `%U'", format_unformat_error, input);
+ clib_error_report (e);
+ return -1;
+ }
+ }
+
+#if SVMF_FIFO_TRACE
+ f.trace = fifo_trace;
+#endif
+
+ str = svm_fifo_replay (str, &f, no_read, verbose);
+ vlib_cli_output (vm, "%v", str);
+ return 0;
+}
+
+static int
+tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input)
+{
+ int res = 0;
+ char *str;
+
+ /* Run all tests */
+ if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT)
+ {
+ res = tcp_test_fifo1 (vm, input);
+ if (res)
+ return res;
+
+ res = tcp_test_fifo2 (vm);
+ if (res)
+ return res;
+
+ /*
+ * Run a number of fifo3 configs
+ */
+ str = "nsegs 10 overlap seed 123";
+ unformat_init_cstring (input, str);
+ if (tcp_test_fifo3 (vm, input))
+ return -1;
+ unformat_free (input);
+
+ str = "nsegs 10 overlap seed 123 in-seq-all";
+ unformat_init_cstring (input, str);
+ if (tcp_test_fifo3 (vm, input))
+ return -1;
+ unformat_free (input);
+
+ str = "nsegs 10 overlap seed 123 initial-offset 3917";
+ unformat_init_cstring (input, str);
+ if (tcp_test_fifo3 (vm, input))
+ return -1;
+ unformat_free (input);
+
+ str = "nsegs 10 overlap seed 123 initial-offset 3917 drop";
+ unformat_init_cstring (input, str);
+ if (tcp_test_fifo3 (vm, input))
+ return -1;
+ unformat_free (input);
+
+ str = "nsegs 10 seed 123 initial-offset 3917 drop no-randomize";
+ unformat_init_cstring (input, str);
+ if (tcp_test_fifo3 (vm, input))
+ return -1;
+ unformat_free (input);
+
+ res = tcp_test_fifo4 (vm, input);
+ if (res)
+ return res;
+
+ res = tcp_test_fifo5 (vm, input);
+ if (res)
+ return res;
+ }
+ else
+ {
+ if (unformat (input, "fifo3"))
+ {
+ res = tcp_test_fifo3 (vm, input);
+ }
+ else if (unformat (input, "fifo2"))
+ {
+ res = tcp_test_fifo2 (vm);
+ }
+ else if (unformat (input, "fifo1"))
+ {
+ res = tcp_test_fifo1 (vm, input);
+ }
+ else if (unformat (input, "fifo4"))
+ {
+ res = tcp_test_fifo4 (vm, input);
+ }
+ else if (unformat (input, "fifo5"))
+ {
+ res = tcp_test_fifo5 (vm, input);
+ }
+ else if (unformat (input, "replay"))
+ {
+ res = tcp_test_fifo_replay (vm, input);
+ }
+ }
+
+ return res;
+}
+
+static int
+tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ tcp_main_t *tm = &tcp_main;
+ transport_connection_t _tc1, *tc1 = &_tc1, _tc2, *tc2 = &_tc2, *tconn;
+ tcp_connection_t *tc;
+ stream_session_t *s;
+ u8 cmp = 0;
+
+ pool_get (smm->sessions[0], s);
+ memset (s, 0, sizeof (*s));
+ s->session_index = s - smm->sessions[0];
+
+ pool_get (tm->connections[0], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->connection.c_index = tc - tm->connections[0];
+ tc->connection.s_index = s->session_index;
+ s->connection_index = tc->connection.c_index;
+
+ tc->connection.lcl_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000101);
+ tc->connection.rmt_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000103);
+ tc->connection.lcl_port = 35051;
+ tc->connection.rmt_port = 53764;
+ tc->connection.transport_proto = 0;
+ clib_memcpy (tc1, &tc->connection, sizeof (*tc1));
+
+ pool_get (session_manager_main.sessions[0], s);
+ memset (s, 0, sizeof (*s));
+ s->session_index = s - smm->sessions[0];
+ pool_get (tm->connections[0], tc);
+ memset (tc, 0, sizeof (*tc));
+ tc->connection.c_index = tc - tm->connections[0];
+ tc->connection.s_index = s->session_index;
+ s->connection_index = tc->connection.c_index;
+
+ tc->connection.lcl_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000101);
+ tc->connection.rmt_ip.ip4.as_u32 = clib_host_to_net_u32 (0x06000102);
+ tc->connection.lcl_port = 38225;
+ tc->connection.rmt_port = 53764;
+ tc->connection.transport_proto = 0;
+ clib_memcpy (tc2, &tc->connection, sizeof (*tc2));
+
+ /*
+ * Confirm that connection lookup works
+ */
+
+ stream_session_table_add_for_tc (tc1, tc1->s_index);
+ tconn = stream_session_lookup_transport_wt4 (&tc1->lcl_ip.ip4,
+ &tc1->rmt_ip.ip4,
+ tc1->lcl_port, tc1->rmt_port,
+ tc1->transport_proto, 0);
+ cmp = (memcmp (&tconn->rmt_ip, &tc1->rmt_ip, sizeof (tc1->rmt_ip)) == 0);
+ TCP_TEST ((cmp), "rmt ip is identical %d", cmp);
+ TCP_TEST ((tconn->lcl_port == tc1->lcl_port),
+ "rmt port is identical %d", tconn->lcl_port == tc1->lcl_port);
+
+ /*
+ * Non-existing connection lookup should not work
+ */
+
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->transport_proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ /*
+ * Delete and lookup again
+ */
+ stream_session_table_del_for_tc (tc1);
+ tconn = stream_session_lookup_transport_wt4 (&tc1->lcl_ip.ip4,
+ &tc1->rmt_ip.ip4,
+ tc1->lcl_port, tc1->rmt_port,
+ tc1->transport_proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->transport_proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ /*
+ * Re-add and lookup tc2
+ */
+ stream_session_table_add_for_tc (tc1, tc1->s_index);
+ tconn = stream_session_lookup_transport_wt4 (&tc2->lcl_ip.ip4,
+ &tc2->rmt_ip.ip4,
+ tc2->lcl_port, tc2->rmt_port,
+ tc2->transport_proto, 0);
+ TCP_TEST ((tconn == 0), "lookup result should be null");
+
+ return 0;
+}
+
+static int
+tcp_test_session (vlib_main_t * vm, unformat_input_t * input)
+{
+ int rv = 0;
+ tcp_connection_t *tc0;
+ u8 sst = SESSION_TYPE_IP4_TCP;
+ ip4_address_t local, remote;
+ u16 local_port, remote_port;
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ int is_add = 1;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else
+ break;
+ }
+
+ if (is_add)
+ {
+ local.as_u32 = clib_host_to_net_u32 (0x06000101);
+ remote.as_u32 = clib_host_to_net_u32 (0x06000102);
+ local_port = clib_host_to_net_u16 (1234);
+ remote_port = clib_host_to_net_u16 (11234);
+
+ pool_get (tm->connections[0], tc0);
+ memset (tc0, 0, sizeof (*tc0));
+
+ tc0->state = TCP_STATE_ESTABLISHED;
+ tc0->rcv_las = 1;
+ tc0->c_c_index = tc0 - tm->connections[0];
+ tc0->c_lcl_port = local_port;
+ tc0->c_rmt_port = remote_port;
+ tc0->c_is_ip4 = 1;
+ tc0->c_thread_index = 0;
+ tc0->c_lcl_ip4.as_u32 = local.as_u32;
+ tc0->c_rmt_ip4.as_u32 = remote.as_u32;
+ tc0->rcv_opts.mss = 1450;
+ tcp_connection_init_vars (tc0);
+
+ TCP_EVT_DBG (TCP_EVT_OPEN, tc0);
+
+ if (stream_session_accept (&tc0->connection, 0 /* listener index */ ,
+ sst, 0 /* notify */ ))
+ clib_warning ("stream_session_accept failed");
+
+ stream_session_accept_notify (&tc0->connection);
+ }
+ else
+ {
+ tc0 = tcp_connection_get (0 /* connection index */ , 0 /* thread */ );
+ tc0->state = TCP_STATE_CLOSED;
+ stream_session_disconnect_notify (&tc0->connection);
+ }
+
+ return rv;
+}
+
+static clib_error_t *
+tcp_test (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd_arg)
+{
+ int res = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "sack"))
+ {
+ res = tcp_test_sack (vm, input);
+ }
+ else if (unformat (input, "fifo"))
+ {
+ res = tcp_test_fifo (vm, input);
+ }
+ else if (unformat (input, "session"))
+ {
+ res = tcp_test_session (vm, input);
+ }
+ else if (unformat (input, "lookup"))
+ {
+ res = tcp_test_lookup (vm, input);
+ }
+ else
+ break;
+ }
+
+ if (res)
+ {
+ return clib_error_return (0, "TCP unit test failed");
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tcp_test_command, static) =
+{
+ .path = "test tcp",
+ .short_help = "internal tcp unit tests",
+ .function = tcp_test,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h
new file mode 100644
index 00000000..fa25268c
--- /dev/null
+++ b/src/vnet/tcp/tcp_timer.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_tcp_timer_h__
+#define __included_tcp_timer_h__
+
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+#include <vppinfra/tw_timer_16t_1w_2048sl.h>
+
+#endif /* __included_tcp_timer_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c
new file mode 100644
index 00000000..7dd03670
--- /dev/null
+++ b/src/vnet/udp/builtin_server.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+ udp builtin server
+*/
+
+#include <vnet/udp/udp.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+/** per-worker built-in server copy buffers */
+u8 **copy_buffers;
+
+static int
+builtin_session_create_callback (stream_session_t * s)
+{
+ /* Simple version: declare session ready-to-go... */
+ s->session_state = SESSION_STATE_READY;
+ return 0;
+}
+
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+ stream_session_disconnect (s);
+}
+
+static int
+builtin_server_rx_callback (stream_session_t * s)
+{
+ svm_fifo_t *rx_fifo, *tx_fifo;
+ u32 this_transfer, max_deq, max_enq;
+ int actual_transfer;
+ u8 *my_copy_buffer;
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+
+ my_copy_buffer = copy_buffers[s->thread_index];
+ rx_fifo = s->server_rx_fifo;
+ tx_fifo = s->server_tx_fifo;
+
+ max_deq = svm_fifo_max_dequeue (rx_fifo);
+ max_enq = svm_fifo_max_enqueue (tx_fifo);
+ this_transfer = max_enq < max_deq ? max_enq : max_deq;
+
+ vec_validate (my_copy_buffer, this_transfer - 1);
+ _vec_len (my_copy_buffer) = this_transfer;
+
+ actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, this_transfer,
+ my_copy_buffer);
+ ASSERT (actual_transfer == this_transfer);
+ actual_transfer = svm_fifo_enqueue_nowait (tx_fifo, this_transfer,
+ my_copy_buffer);
+ ASSERT (actual_transfer == this_transfer);
+
+ copy_buffers[s->thread_index] = my_copy_buffer;
+
+ if (svm_fifo_set_event (tx_fifo))
+ {
+ /* Fabricate TX event, send to ourselves */
+ evt.fifo = tx_fifo;
+ evt.event_type = FIFO_EVENT_APP_TX;
+ evt.event_id = 0;
+ q = session_manager_get_vpp_event_queue (s->thread_index);
+ unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_server = {
+ .session_accept_callback = builtin_session_create_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .builtin_server_rx_callback = builtin_server_rx_callback
+};
+/* *INDENT-ON* */
+
+static int
+attach_builtin_uri_server ()
+{
+ vnet_app_attach_args_t _a, *a = &_a;
+ u8 segment_name[128];
+ u32 segment_name_length;
+ u64 options[16];
+
+ segment_name_length = ARRAY_LEN (segment_name);
+
+ memset (a, 0, sizeof (*a));
+ memset (options, 0, sizeof (options));
+
+ a->api_client_index = ~0;
+ a->segment_name = segment_name;
+ a->segment_name_length = segment_name_length;
+ a->session_cb_vft = &builtin_server;
+
+ options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+ options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30); /*$$$$ config / arg */
+ options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+ options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 1024;
+
+ a->options = options;
+
+ return vnet_application_attach (a);
+}
+
+static int
+bind_builtin_uri_server (u8 * uri)
+{
+ vnet_bind_args_t _a, *a = &_a;
+ int rv;
+
+ rv = attach_builtin_uri_server ();
+ if (rv)
+ return rv;
+
+ memset (a, 0, sizeof (*a));
+ a->uri = (char *) uri;
+ a->app_index = ~0; /* built-in server */
+
+ rv = vnet_bind_uri (a);
+
+ return rv;
+}
+
+static int
+unbind_builtin_uri_server (u8 * uri)
+{
+ vnet_unbind_args_t _a, *a = &_a;
+
+ a->app_index = ~0;
+ a->uri = (char *) uri;
+
+ return vnet_unbind_uri (a);
+}
+
+static clib_error_t *
+builtin_server_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+
+ vec_validate (copy_buffers, num_threads - 1);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_server_init);
+
+static clib_error_t *
+builtin_uri_bind_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *uri = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "uri %s", &uri))
+ ;
+ else
+ break;
+ }
+
+ if (uri == 0)
+ return clib_error_return (0, "uri to bind not specified...");
+
+ rv = bind_builtin_uri_server (uri);
+
+ vec_free (uri);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "bind_uri_server returned %d", rv);
+ break;
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (builtin_uri_bind_command, static) =
+{
+ .path = "builtin uri bind",
+ .short_help = "builtin uri bind",
+ .function = builtin_uri_bind_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+builtin_uri_unbind_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *uri = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "uri %s", &uri))
+ ;
+ else
+ break;
+ }
+
+ if (uri == 0)
+ return clib_error_return (0, "uri to unbind not specified...");
+
+ rv = unbind_builtin_uri_server (uri);
+
+ vec_free (uri);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "unbind_uri_server returned %d", rv);
+ break;
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (builtin_uri_unbind_command, static) =
+{
+ .path = "builtin uri unbind",
+ .short_help = "builtin uri unbind",
+ .function = builtin_uri_unbind_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
new file mode 100644
index 00000000..0e0336b5
--- /dev/null
+++ b/src/vnet/udp/udp.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+ udp state machine, etc.
+*/
+
+#include <vnet/udp/udp.h>
+#include <vnet/session/session.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+
+udp_uri_main_t udp_uri_main;
+
+u32
+udp_session_bind_ip4 (u32 session_index, transport_endpoint_t * lcl)
+{
+ udp_uri_main_t *um = vnet_get_udp_main ();
+ udp_connection_t *listener;
+
+ pool_get (um->udp_listeners, listener);
+ memset (listener, 0, sizeof (udp_connection_t));
+ listener->c_lcl_port = lcl->port;
+ listener->c_lcl_ip4.as_u32 = lcl->ip.ip4.as_u32;
+ listener->c_transport_proto = TRANSPORT_PROTO_UDP;
+ udp_register_dst_port (um->vlib_main, clib_net_to_host_u16 (lcl->port),
+ udp4_uri_input_node.index, 1 /* is_ipv4 */ );
+ return 0;
+}
+
+u32
+udp_session_bind_ip6 (u32 session_index, transport_endpoint_t * lcl)
+{
+ udp_uri_main_t *um = vnet_get_udp_main ();
+ udp_connection_t *listener;
+
+ pool_get (um->udp_listeners, listener);
+ listener->c_lcl_port = lcl->port;
+ clib_memcpy (&listener->c_lcl_ip6, &lcl->ip.ip6, sizeof (ip6_address_t));
+ listener->c_transport_proto = TRANSPORT_PROTO_UDP;
+ udp_register_dst_port (um->vlib_main, clib_net_to_host_u16 (lcl->port),
+ udp4_uri_input_node.index, 0 /* is_ipv4 */ );
+ return 0;
+}
+
+u32
+udp_session_unbind_ip4 (u32 listener_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ udp_connection_t *listener;
+ listener = udp_listener_get (listener_index);
+
+ /* deregister the udp_local mapping */
+ udp_unregister_dst_port (vm, listener->c_lcl_port, 1 /* is_ipv4 */ );
+ return 0;
+}
+
+u32
+udp_session_unbind_ip6 (u32 listener_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ udp_connection_t *listener;
+
+ listener = udp_listener_get (listener_index);
+
+ /* deregister the udp_local mapping */
+ udp_unregister_dst_port (vm, listener->c_lcl_port, 0 /* is_ipv4 */ );
+ return 0;
+}
+
+transport_connection_t *
+udp_session_get_listener (u32 listener_index)
+{
+ udp_connection_t *us;
+
+ us = udp_listener_get (listener_index);
+ return &us->connection;
+}
+
+u32
+udp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+ udp_connection_t *us;
+ u8 *data;
+ udp_header_t *udp;
+
+ us = (udp_connection_t *) tconn;
+
+ if (tconn->is_ip4)
+ {
+ ip4_header_t *ip;
+
+ data = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (data - sizeof (udp_header_t));
+ ip = (ip4_header_t *) ((u8 *) udp - sizeof (ip4_header_t));
+
+ /* Build packet header, swap rx key src + dst fields */
+ ip->src_address.as_u32 = us->c_lcl_ip4.as_u32;
+ ip->dst_address.as_u32 = us->c_rmt_ip4.as_u32;
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->length = clib_host_to_net_u16 (b->current_length + sizeof (*udp));
+ ip->checksum = ip4_header_checksum (ip);
+
+ udp->src_port = us->c_lcl_port;
+ udp->dst_port = us->c_rmt_port;
+ udp->length = clib_host_to_net_u16 (b->current_length);
+ udp->checksum = 0;
+
+ b->current_length = sizeof (*ip) + sizeof (*udp);
+ return SESSION_QUEUE_NEXT_IP4_LOOKUP;
+ }
+ else
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_header_t *ip;
+ u16 payload_length;
+ int bogus = ~0;
+
+ data = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (data - sizeof (udp_header_t));
+ ip = (ip6_header_t *) ((u8 *) udp - sizeof (ip6_header_t));
+
+ /* Build packet header, swap rx key src + dst fields */
+ clib_memcpy (&ip->src_address, &us->c_lcl_ip6, sizeof (ip6_address_t));
+ clib_memcpy (&ip->dst_address, &us->c_rmt_ip6, sizeof (ip6_address_t));
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ ip->hop_limit = 0xff;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ payload_length = vlib_buffer_length_in_chain (vm, b);
+ payload_length -= sizeof (*ip);
+
+ ip->payload_length = clib_host_to_net_u16 (payload_length);
+
+ udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip, &bogus);
+ ASSERT (!bogus);
+
+ udp->src_port = us->c_lcl_port;
+ udp->dst_port = us->c_rmt_port;
+ udp->length = clib_host_to_net_u16 (b->current_length);
+ udp->checksum = 0;
+
+ b->current_length = sizeof (*ip) + sizeof (*udp);
+
+ return SESSION_QUEUE_NEXT_IP6_LOOKUP;
+ }
+}
+
+transport_connection_t *
+udp_session_get (u32 connection_index, u32 my_thread_index)
+{
+ udp_uri_main_t *um = vnet_get_udp_main ();
+
+ udp_connection_t *us;
+ us =
+ pool_elt_at_index (um->udp_sessions[my_thread_index], connection_index);
+ return &us->connection;
+}
+
+void
+udp_session_close (u32 connection_index, u32 my_thread_index)
+{
+ udp_uri_main_t *um = vnet_get_udp_main ();
+ pool_put_index (um->udp_sessions[my_thread_index], connection_index);
+}
+
+u8 *
+format_udp_session_ip4 (u8 * s, va_list * args)
+{
+ u32 uci = va_arg (*args, u32);
+ u32 thread_index = va_arg (*args, u32);
+ udp_connection_t *u4;
+
+ u4 = udp_connection_get (uci, thread_index);
+
+ s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address,
+ &u4->c_lcl_ip4, clib_net_to_host_u16 (u4->c_lcl_port),
+ format_ip4_address, &u4->c_rmt_ip4,
+ clib_net_to_host_u16 (u4->c_rmt_port));
+ return s;
+}
+
+u8 *
+format_udp_session_ip6 (u8 * s, va_list * args)
+{
+ u32 uci = va_arg (*args, u32);
+ u32 thread_index = va_arg (*args, u32);
+ udp_connection_t *tc = udp_connection_get (uci, thread_index);
+ s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address,
+ &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+ format_ip6_address, &tc->c_rmt_ip6,
+ clib_net_to_host_u16 (tc->c_rmt_port));
+ return s;
+}
+
+u8 *
+format_udp_listener_session_ip4 (u8 * s, va_list * args)
+{
+ u32 tci = va_arg (*args, u32);
+ udp_connection_t *tc = udp_listener_get (tci);
+ s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address,
+ &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port),
+ format_ip4_address, &tc->c_rmt_ip4,
+ clib_net_to_host_u16 (tc->c_rmt_port));
+ return s;
+}
+
+u8 *
+format_udp_listener_session_ip6 (u8 * s, va_list * args)
+{
+ u32 tci = va_arg (*args, u32);
+ udp_connection_t *tc = udp_listener_get (tci);
+ s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address,
+ &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+ format_ip6_address, &tc->c_rmt_ip6,
+ clib_net_to_host_u16 (tc->c_rmt_port));
+ return s;
+}
+
+u16
+udp_send_mss_uri (transport_connection_t * t)
+{
+ /* TODO figure out MTU of output interface */
+ return 400;
+}
+
+u32
+udp_send_space_uri (transport_connection_t * t)
+{
+ /* No constraint on TX window */
+ return ~0;
+}
+
+int
+udp_open_connection (transport_endpoint_t * tep)
+{
+ clib_warning ("Not implemented");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+const static transport_proto_vft_t udp4_proto = {
+ .bind = udp_session_bind_ip4,
+ .open = udp_open_connection,
+ .unbind = udp_session_unbind_ip4,
+ .push_header = udp_push_header,
+ .get_connection = udp_session_get,
+ .get_listener = udp_session_get_listener,
+ .close = udp_session_close,
+ .send_mss = udp_send_mss_uri,
+ .send_space = udp_send_space_uri,
+ .format_connection = format_udp_session_ip4,
+ .format_listener = format_udp_listener_session_ip4
+};
+
+const static transport_proto_vft_t udp6_proto = {
+ .bind = udp_session_bind_ip6,
+ .open = udp_open_connection,
+ .unbind = udp_session_unbind_ip6,
+ .push_header = udp_push_header,
+ .get_connection = udp_session_get,
+ .get_listener = udp_session_get_listener,
+ .close = udp_session_close,
+ .send_mss = udp_send_mss_uri,
+ .send_space = udp_send_space_uri,
+ .format_connection = format_udp_session_ip6,
+ .format_listener = format_udp_listener_session_ip6
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+udp_init (vlib_main_t * vm)
+{
+ udp_uri_main_t *um = vnet_get_udp_main ();
+ ip_main_t *im = &ip_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 num_threads;
+ clib_error_t *error = 0;
+ ip_protocol_info_t *pi;
+
+ um->vlib_main = vm;
+ um->vnet_main = vnet_get_main ();
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ /*
+ * Registrations
+ */
+
+ /* IP registration */
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP);
+ if (pi == 0)
+ return clib_error_return (0, "UDP protocol info AWOL");
+ pi->format_header = format_udp_header;
+ pi->unformat_pg_edit = unformat_pg_udp_header;
+
+
+ /* Register as transport with URI */
+ session_register_transport (TRANSPORT_PROTO_UDP, 1, &udp4_proto);
+ session_register_transport (TRANSPORT_PROTO_UDP, 0, &udp6_proto);
+
+ /*
+ * Initialize data structures
+ */
+
+ num_threads = 1 /* main thread */ + tm->n_threads;
+ vec_validate (um->udp_sessions, num_threads - 1);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (udp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
new file mode 100644
index 00000000..a9a62c27
--- /dev/null
+++ b/src/vnet/udp/udp.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_udp_h__
+#define __included_udp_h__
+
+#include <vnet/vnet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/session/transport.h>
+
+typedef struct
+{
+ transport_connection_t connection; /** must be first */
+
+ /** ersatz MTU to limit fifo pushes to test data size */
+ u32 mtu;
+} udp_connection_t;
+
+typedef struct _udp_uri_main
+{
+ /* Per-worker thread udp connection pools */
+ udp_connection_t **udp_sessions;
+ udp_connection_t *udp_listeners;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ip4_main_t *ip4_main;
+ ip6_main_t *ip6_main;
+} udp_uri_main_t;
+
+extern udp_uri_main_t udp_uri_main;
+extern vlib_node_registration_t udp4_uri_input_node;
+
+always_inline udp_uri_main_t *
+vnet_get_udp_main ()
+{
+ return &udp_uri_main;
+}
+
+always_inline udp_connection_t *
+udp_connection_get (u32 conn_index, u32 thread_index)
+{
+ return pool_elt_at_index (udp_uri_main.udp_sessions[thread_index],
+ conn_index);
+}
+
+always_inline udp_connection_t *
+udp_listener_get (u32 conn_index)
+{
+ return pool_elt_at_index (udp_uri_main.udp_listeners, conn_index);
+}
+
+typedef enum
+{
+#define udp_error(n,s) UDP_ERROR_##n,
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+ UDP_N_ERROR,
+} udp_error_t;
+
+#define foreach_udp4_dst_port \
+_ (67, dhcp_to_server) \
+_ (68, dhcp_to_client) \
+_ (500, ikev2) \
+_ (2152, GTPU) \
+_ (3784, bfd4) \
+_ (3785, bfd_echo4) \
+_ (4341, lisp_gpe) \
+_ (4342, lisp_cp) \
+_ (4739, ipfix) \
+_ (4789, vxlan) \
+_ (4789, vxlan6) \
+_ (4790, VXLAN_GPE) \
+_ (6633, vpath_3)
+
+
+#define foreach_udp6_dst_port \
+_ (547, dhcpv6_to_server) \
+_ (546, dhcpv6_to_client) \
+_ (2152, GTPU6) \
+_ (3784, bfd6) \
+_ (3785, bfd_echo6) \
+_ (4341, lisp_gpe6) \
+_ (4342, lisp_cp6) \
+_ (4790, VXLAN6_GPE) \
+_ (6633, vpath6_3)
+
+typedef enum
+{
+#define _(n,f) UDP_DST_PORT_##f = n,
+ foreach_udp4_dst_port foreach_udp6_dst_port
+#undef _
+} udp_dst_port_t;
+
+typedef enum
+{
+#define _(n,f) UDP6_DST_PORT_##f = n,
+ foreach_udp6_dst_port
+#undef _
+} udp6_dst_port_t;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* GRE protocol type in host byte order. */
+ udp_dst_port_t dst_port;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} udp_dst_port_info_t;
+
+typedef enum
+{
+ UDP_IP6 = 0,
+ UDP_IP4, /* the code is full of is_ip4... */
+ N_UDP_AF,
+} udp_af_t;
+
+typedef struct
+{
+ udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *dst_port_info_by_name[N_UDP_AF];
+ uword *dst_port_info_by_dst_port[N_UDP_AF];
+
+ /* Sparse vector mapping udp dst_port in network byte order
+ to next index. */
+ u16 *next_by_dst_port4;
+ u16 *next_by_dst_port6;
+ u8 punt_unknown4;
+ u8 punt_unknown6;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+} udp_main_t;
+
+always_inline udp_dst_port_info_t *
+udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4)
+{
+ uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port);
+ return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0;
+}
+
+format_function_t format_udp_header;
+format_function_t format_udp_rx_trace;
+
+unformat_function_t unformat_udp_header;
+
+void udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port,
+ u32 node_index, u8 is_ip4);
+
+void
+udp_unregister_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port, u8 is_ip4);
+
+void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+always_inline void
+ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+{
+ u16 new_l0;
+ udp_header_t *udp0;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip0;
+ ip_csum_t sum0;
+ u16 old_l0 = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ udp0->length = new_l0;
+ }
+ else
+ {
+ ip6_header_t *ip0;
+ int bogus0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ ip0->payload_length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp0->length = new_l0;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+ ASSERT (bogus0 == 0);
+
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+}
+
+always_inline void
+ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
+ u8 is_ip4)
+{
+ vlib_buffer_advance (b0, -ec_len);
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ ip_udp_fixup_one (vm, b0, 1);
+ }
+ else
+ {
+ ip6_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ ip_udp_fixup_one (vm, b0, 0);
+ }
+}
+
+always_inline void
+ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+{
+ u16 new_l0, new_l1;
+ udp_header_t *udp0, *udp1;
+
+ ASSERT (_vec_len (ec0) == _vec_len (ec1));
+
+ vlib_buffer_advance (b0, -ec_len);
+ vlib_buffer_advance (b1, -ec_len);
+
+ if (is_v4)
+ {
+ ip4_header_t *ip0, *ip1;
+ ip_csum_t sum0, sum1;
+ u16 old_l0 = 0, old_l1 = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Apply the encap string */
+ clib_memcpy (ip0, ec0, ec_len);
+ clib_memcpy (ip1, ec1, ec_len);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */ );
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ip0->length = new_l0;
+ ip1->length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp1 = (udp_header_t *) (ip1 + 1);
+
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+ sizeof (*ip0));
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) -
+ sizeof (*ip1));
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+ }
+ else
+ {
+ ip6_header_t *ip0, *ip1;
+ int bogus0, bogus1;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ clib_memcpy (ip1, ec1, ec_len);
+
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof (*ip1));
+ ip0->payload_length = new_l0;
+ ip1->payload_length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp1 = (udp_header_t *) (ip1 + 1);
+
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+ udp1->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1);
+ ASSERT (bogus0 == 0);
+ ASSERT (bogus1 == 0);
+
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif /* __included_udp_h__ */
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
new file mode 100644
index 00000000..bfdae0ac
--- /dev/null
+++ b/src/vnet/udp/udp_error.def
@@ -0,0 +1,21 @@
+/*
+ * udp_error.def: udp errors
+ *
+ * Copyright (c) 2013-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+udp_error (NONE, "no error")
+udp_error (NO_LISTENER, "no listener for dst port")
+udp_error (LENGTH_ERROR, "UDP packets with length errors")
+udp_error (PUNT, "no listener punt")
diff --git a/src/vnet/udp/udp_format.c b/src/vnet/udp/udp_format.c
new file mode 100644
index 00000000..abdf561e
--- /dev/null
+++ b/src/vnet/udp/udp_format.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_format.c: udp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format UDP header. */
+u8 *
+format_udp_header (u8 * s, va_list * args)
+{
+ udp_header_t *udp = va_arg (*args, udp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword indent;
+ u32 header_bytes = sizeof (udp[0]);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (udp[0]))
+ return format (s, "UDP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "UDP: %d -> %d",
+ clib_net_to_host_u16 (udp->src_port),
+ clib_net_to_host_u16 (udp->dst_port));
+
+ s = format (s, "\n%Ulength %d, checksum 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (udp->length),
+ clib_net_to_host_u16 (udp->checksum));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, udp->dst_port);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (udp + 1),
+ max_header_bytes - sizeof (udp[0]));
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
new file mode 100644
index 00000000..6ccb1e52
--- /dev/null
+++ b/src/vnet/udp/udp_input.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/udp/udp.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/udp/udp_packet.h>
+
+#include <vlibmemory/api.h>
+#include "../session/application_interface.h"
+
+vlib_node_registration_t udp4_uri_input_node;
+
+typedef struct
+{
+ u32 session;
+ u32 disposition;
+ u32 thread_index;
+} udp4_uri_input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_udp4_uri_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *);
+
+ s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d",
+ t->session, t->disposition, t->thread_index);
+ return s;
+}
+
+typedef enum
+{
+ UDP4_URI_INPUT_NEXT_DROP,
+ UDP4_URI_INPUT_N_NEXT,
+} udp4_uri_input_next_t;
+
+static char *udp4_uri_input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_session_input_error
+#undef _
+};
+
+static uword
+udp4_uri_input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ udp4_uri_input_next_t next_index;
+ udp_uri_main_t *um = vnet_get_udp_main ();
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ u32 my_thread_index = vm->thread_index;
+ u8 my_enqueue_epoch;
+ u32 *session_indices_to_enqueue;
+ static u32 serial_number;
+ int i;
+
+ my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index];
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = UDP4_URI_INPUT_NEXT_DROP;
+ u32 error0 = SESSION_ERROR_ENQUEUED;
+ udp_header_t *udp0;
+ ip4_header_t *ip0;
+ stream_session_t *s0;
+ svm_fifo_t *f0;
+ u16 udp_len0;
+ u8 *data0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp_local hands us a pointer to the udp data */
+
+ data0 = vlib_buffer_get_current (b0);
+ udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
+
+ /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */
+ ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0));
+ s0 = 0;
+
+ /* lookup session */
+ s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address,
+ udp0->dst_port, udp0->src_port,
+ SESSION_TYPE_IP4_UDP);
+
+ /* no listener */
+ if (PREDICT_FALSE (s0 == 0))
+ {
+ error0 = SESSION_ERROR_NO_LISTENER;
+ goto trace0;
+ }
+
+ f0 = s0->server_rx_fifo;
+
+ /* established hit */
+ if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY))
+ {
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+
+ if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0)))
+ {
+ error0 = SESSION_ERROR_FIFO_FULL;
+ goto trace0;
+ }
+
+ svm_fifo_enqueue_nowait (f0, udp_len0 - sizeof (*udp0),
+ (u8 *) (udp0 + 1));
+
+ b0->error = node->errors[SESSION_ERROR_ENQUEUED];
+
+ /* We need to send an RX event on this fifo */
+ if (s0->enqueue_epoch != my_enqueue_epoch)
+ {
+ s0->enqueue_epoch = my_enqueue_epoch;
+
+ vec_add1 (smm->session_indices_to_enqueue_by_thread
+ [my_thread_index],
+ s0 - smm->sessions[my_thread_index]);
+ }
+ }
+ /* listener hit */
+ else if (s0->session_state == SESSION_STATE_LISTENING)
+ {
+ udp_connection_t *us;
+ int rv;
+
+ error0 = SESSION_ERROR_NOT_READY;
+
+ /*
+ * create udp transport session
+ */
+ pool_get (um->udp_sessions[my_thread_index], us);
+
+ us->mtu = 1024; /* $$$$ policy */
+
+ us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32;
+ us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32;
+ us->c_lcl_port = udp0->dst_port;
+ us->c_rmt_port = udp0->src_port;
+ us->c_transport_proto = TRANSPORT_PROTO_UDP;
+ us->c_c_index = us - um->udp_sessions[my_thread_index];
+
+ /*
+ * create stream session and attach the udp session to it
+ */
+ rv = stream_session_accept (&us->connection, s0->session_index,
+ SESSION_TYPE_IP4_UDP,
+ 1 /*notify */ );
+ if (rv)
+ error0 = rv;
+
+ }
+ else
+ {
+
+ error0 = SESSION_ERROR_NOT_READY;
+ goto trace0;
+ }
+
+ trace0:
+ b0->error = node->errors[error0];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ udp4_uri_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+
+ t->session = ~0;
+ if (s0)
+ t->session = s0 - smm->sessions[my_thread_index];
+ t->disposition = error0;
+ t->thread_index = my_thread_index;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Send enqueue events */
+
+ session_indices_to_enqueue =
+ smm->session_indices_to_enqueue_by_thread[my_thread_index];
+
+ for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
+ {
+ session_fifo_event_t evt;
+ unix_shared_memory_queue_t *q;
+ stream_session_t *s0;
+ application_t *server0;
+
+ /* Get session */
+ s0 = pool_elt_at_index (smm->sessions[my_thread_index],
+ session_indices_to_enqueue[i]);
+
+ /* Get session's server */
+ server0 = application_get (s0->app_index);
+
+ /* Built-in server? Deliver the goods... */
+ if (server0->cb_fns.builtin_server_rx_callback)
+ {
+ server0->cb_fns.builtin_server_rx_callback (s0);
+ continue;
+ }
+
+ if (svm_fifo_set_event (s0->server_rx_fifo))
+ {
+ /* Fabricate event */
+ evt.fifo = s0->server_rx_fifo;
+ evt.event_type = FIFO_EVENT_APP_RX;
+ evt.event_id = serial_number++;
+
+ /* Add event to server's event queue */
+ q = server0->event_queue;
+
+ /* Don't block for lack of space */
+ if (PREDICT_TRUE (q->cursize < q->maxsize))
+ {
+ unix_shared_memory_queue_add (server0->event_queue,
+ (u8 *) & evt,
+ 0 /* do wait for mutex */ );
+ }
+ else
+ {
+ vlib_node_increment_counter (vm, udp4_uri_input_node.index,
+ SESSION_ERROR_FIFO_FULL, 1);
+ }
+ }
+ /* *INDENT-OFF* */
+ if (1)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "evt-enqueue: id %d length %d",
+ .format_args = "i4i4",};
+ struct
+ {
+ u32 data[2];
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->data[0] = evt.event_id;
+ ed->data[1] = svm_fifo_max_dequeue (s0->server_rx_fifo);
+ }
+ /* *INDENT-ON* */
+
+ }
+
+ vec_reset_length (session_indices_to_enqueue);
+
+ smm->session_indices_to_enqueue_by_thread[my_thread_index] =
+ session_indices_to_enqueue;
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (udp4_uri_input_node) =
+{
+ .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size =
+ sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (udp4_uri_input_error_strings),.error_strings =
+ udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",}
+,};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
new file mode 100644
index 00000000..8c0ac465
--- /dev/null
+++ b/src/vnet/udp/udp_local.c
@@ -0,0 +1,656 @@
+/*
+ * node.c: udp packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/udp/udp.h>
+#include <vnet/udp/udp_packet.h>
+#include <vppinfra/sparse_vec.h>
+
+udp_main_t udp_main;
+
+#define foreach_udp_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (ICMP4_ERROR, "ip4-icmp-error") \
+ _ (ICMP6_ERROR, "ip6-icmp-error")
+
+typedef enum
+{
+#define _(s,n) UDP_INPUT_NEXT_##s,
+ foreach_udp_input_next
+#undef _
+ UDP_INPUT_N_NEXT,
+} udp_input_next_t;
+
+typedef struct
+{
+ u16 src_port;
+ u16 dst_port;
+ u8 bound;
+} udp_rx_trace_t;
+
+u8 *
+format_udp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *);
+
+ s = format (s, "UDP: src-port %d dst-port %d%s",
+ clib_net_to_host_u16 (t->src_port),
+ clib_net_to_host_u16 (t->dst_port),
+ t->bound ? "" : " (no listener)");
+ return s;
+}
+
+vlib_node_registration_t udp4_input_node;
+vlib_node_registration_t udp6_input_node;
+
+always_inline uword
+udp46_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
+ word n_no_listener = 0;
+ u8 punt_unknown = is_ip4 ? um->punt_unknown4 : um->punt_unknown6;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ udp_header_t *h0 = 0, *h1 = 0;
+ u32 i0, i1, dst_port0, dst_port1;
+ u32 advance0, advance1;
+ u32 error0, next0, error1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ {
+ advance0 = sizeof (ip4_header_t);
+ advance1 = sizeof (ip4_header_t);
+ }
+ else
+ {
+ advance0 = sizeof (ip6_header_t);
+ advance1 = sizeof (ip6_header_t);
+ }
+
+ if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+ {
+ error0 = UDP_ERROR_LENGTH_ERROR;
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b0, advance0);
+ h0 = vlib_buffer_get_current (b0);
+ error0 = next0 = 0;
+ if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
+ vlib_buffer_length_in_chain (vm, b0)))
+ {
+ error0 = UDP_ERROR_LENGTH_ERROR;
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+ }
+
+ if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
+ {
+ error1 = UDP_ERROR_LENGTH_ERROR;
+ next1 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b1, advance1);
+ h1 = vlib_buffer_get_current (b1);
+ error1 = next1 = 0;
+ if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
+ vlib_buffer_length_in_chain (vm, b1)))
+ {
+ error1 = UDP_ERROR_LENGTH_ERROR;
+ next1 = UDP_INPUT_NEXT_DROP;
+ }
+ }
+
+ /* Index sparse array with network byte order. */
+ dst_port0 = (error0 == 0) ? h0->dst_port : 0;
+ dst_port1 = (error1 == 0) ? h1->dst_port : 0;
+ sparse_vec_index2 (is_ip4 ? um->next_by_dst_port4 :
+ um->next_by_dst_port6,
+ dst_port0, dst_port1, &i0, &i1);
+ next0 = (error0 == 0) ?
+ vec_elt (is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6,
+ i0) : next0;
+ next1 = (error1 == 0) ?
+ vec_elt (is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6,
+ i1) : next1;
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b0, -(word) advance0);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b0->error = node->errors[UDP_ERROR_PUNT];
+ next0 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b1, -(word) advance1);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b1->error = node->errors[UDP_ERROR_PUNT];
+ next1 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b1,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next1 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b1,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next1 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0 ? h0->src_port : 0;
+ tr->dst_port = h0 ? h0->dst_port : 0;
+ tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h1 ? h1->src_port : 0;
+ tr->dst_port = h1 ? h1->dst_port : 0;
+ tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next1 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ udp_header_t *h0 = 0;
+ u32 i0, next0;
+ u32 advance0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ advance0 = sizeof (ip4_header_t);
+ else
+ advance0 = sizeof (ip6_header_t);
+
+ if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ goto trace_x1;
+ }
+
+ vlib_buffer_advance (b0, advance0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <=
+ vlib_buffer_length_in_chain (vm, b0)))
+ {
+ i0 = sparse_vec_index (is_ip4 ? um->next_by_dst_port4 :
+ um->next_by_dst_port6, h0->dst_port);
+ next0 = vec_elt (is_ip4 ? um->next_by_dst_port4 :
+ um->next_by_dst_port6, i0);
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b0, -(word) advance0);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b0->error = node->errors[UDP_ERROR_PUNT];
+ next0 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+
+ trace_x1:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0->src_port;
+ tr->dst_port = h0->dst_port;
+ tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER,
+ n_no_listener);
+ return from_frame->n_vectors;
+}
+
+static char *udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
+
+static uword
+udp4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+udp6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_input_node) = {
+ .function = udp4_input,
+ .name = "ip4-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp6_input_node) = {
+ .function = udp6_input,
+ .name = "ip6-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input);
+
+static void
+add_dst_port (udp_main_t * um,
+ udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4)
+{
+ udp_dst_port_info_t *pi;
+ u32 i;
+
+ vec_add2 (um->dst_port_infos[is_ip4], pi, 1);
+ i = pi - um->dst_port_infos[is_ip4];
+
+ pi->name = dst_port_name;
+ pi->dst_port = dst_port;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i);
+
+ if (pi->name)
+ hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i);
+}
+
+void
+udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port, u32 node_index, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ udp_dst_port_info_t *pi;
+ u16 *n;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ if (!pi)
+ {
+ add_dst_port (um, dst_port, 0, is_ip4);
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ ASSERT (pi);
+ }
+
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm,
+ is_ip4 ? udp4_input_node.index
+ : udp6_input_node.index, node_index);
+
+ /* Setup udp protocol -> next index sparse vector mapping. */
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4,
+ clib_host_to_net_u16 (dst_port));
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6,
+ clib_host_to_net_u16 (dst_port));
+
+ n[0] = pi->next_index;
+}
+
+void
+udp_unregister_dst_port (vlib_main_t * vm, udp_dst_port_t dst_port, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ udp_dst_port_info_t *pi;
+ u16 *n;
+
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ /* Not registered? Fagedaboudit */
+ if (!pi)
+ return;
+
+ /* Kill the mapping. Don't bother killing the pi, it may be back. */
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4,
+ clib_host_to_net_u16 (dst_port));
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6,
+ clib_host_to_net_u16 (dst_port));
+
+ n[0] = SPARSE_VEC_INVALID_INDEX;
+}
+
+void
+udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
+{
+ udp_main_t *um = &udp_main;
+ {
+ clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ if (is_ip4)
+ um->punt_unknown4 = is_add;
+ else
+ um->punt_unknown6 = is_add;
+}
+
+/* Parse a UDP header. */
+uword
+unformat_udp_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ udp_header_t *udp;
+ __attribute__ ((unused)) int old_length;
+ u16 src_port, dst_port;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ udp = p;
+ }
+
+ memset (udp, 0, sizeof (udp[0]));
+ if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port))
+ {
+ udp->src_port = clib_host_to_net_u16 (src_port);
+ udp->dst_port = clib_host_to_net_u16 (dst_port);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+udp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_udp_header;
+ n->unformat_buffer = unformat_udp_header;
+ pn->unformat_edit = unformat_pg_udp_header;
+}
+
+clib_error_t *
+udp_local_init (vlib_main_t * vm)
+{
+ udp_main_t *um = &udp_main;
+ int i;
+
+ {
+ clib_error_t *error;
+ error = vlib_call_init_function (vm, udp_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+
+ for (i = 0; i < 2; i++)
+ {
+ um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword));
+ um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword));
+ }
+
+ udp_setup_node (vm, udp4_input_node.index);
+ udp_setup_node (vm, udp6_input_node.index);
+
+ um->punt_unknown4 = 0;
+ um->punt_unknown6 = 0;
+
+ um->next_by_dst_port4 = sparse_vec_new
+ ( /* elt bytes */ sizeof (um->next_by_dst_port4[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ um->next_by_dst_port6 = sparse_vec_new
+ ( /* elt bytes */ sizeof (um->next_by_dst_port6[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
+ foreach_udp4_dst_port
+#undef _
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
+ foreach_udp6_dst_port
+#undef _
+ ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
+ /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_local_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_packet.h b/src/vnet/udp/udp_packet.h
new file mode 100644
index 00000000..beea3059
--- /dev/null
+++ b/src/vnet/udp/udp_packet.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/udp_packet.h: UDP packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_udp_packet_h
+#define included_udp_packet_h
+
+typedef struct
+{
+ /* Source and destination port. */
+ u16 src_port, dst_port;
+
+ /* Length of UDP header plus payload. */
+ u16 length;
+
+ /* Checksum of UDP pseudo-header and data or
+ zero if checksum is disabled. */
+ u16 checksum;
+} udp_header_t;
+
+#endif /* included_udp_packet_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_pg.c b/src/vnet/udp/udp_pg.c
new file mode 100644
index 00000000..c9d8d38c
--- /dev/null
+++ b/src/vnet/udp/udp_pg.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_pg: UDP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h> /* for unformat_udp_udp_port */
+
+#define UDP_PG_EDIT_LENGTH (1 << 0)
+#define UDP_PG_EDIT_CHECKSUM (1 << 1)
+
+always_inline void
+udp_pg_edit_function_inline (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets, u32 n_packets, u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, udp_offset;
+
+ udp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ udp_header_t *udp0;
+ u32 udp_len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_offset);
+ udp0 = (void *) (p0->data + udp_offset);
+ udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+ if (flags & UDP_PG_EDIT_LENGTH)
+ udp0->length =
+ clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0)
+ - ip_offset);
+
+ /* Initialize checksum with header. */
+ if (flags & UDP_PG_EDIT_CHECKSUM)
+ {
+ ip_csum_t sum0;
+
+ sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+ sum0 = ip_csum_with_carry
+ (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16)));
+
+ /* Invalidate possibly old checksum. */
+ udp0->checksum = 0;
+
+ sum0 =
+ ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0,
+ sum0);
+
+ sum0 = ~ip_csum_fold (sum0);
+
+ /* Zero checksum means checksumming disabled. */
+ sum0 = sum0 != 0 ? sum0 : 0xffff;
+
+ udp0->checksum = sum0;
+ }
+ }
+}
+
+static void
+udp_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ switch (g->edit_function_opaque)
+ {
+ case UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_LENGTH);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct
+{
+ pg_edit_t src_port, dst_port;
+ pg_edit_t length;
+ pg_edit_t checksum;
+} pg_udp_header_t;
+
+static inline void
+pg_udp_header_init (pg_udp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, udp_header_t, f);
+ _(src_port);
+ _(dst_port);
+ _(length);
+ _(checksum);
+#undef _
+}
+
+uword
+unformat_pg_udp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_udp_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t),
+ &group_index);
+ pg_udp_header_init (p);
+
+ /* Defaults. */
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+ p->length.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "UDP: %U -> %U",
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->src_port,
+ unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "length %U",
+ unformat_pg_edit, unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ u16 dst_port;
+ tcp_udp_port_info_t *pi;
+
+ pi = 0;
+ if (p->dst_port.type == PG_EDIT_FIXED)
+ {
+ dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO);
+ pi = ip_get_tcp_udp_port_info (im, dst_port);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED
+ || p->length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = udp_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_LENGTH;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c
new file mode 100644
index 00000000..32e22d92
--- /dev/null
+++ b/src/vnet/unix/gdb_funcs.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Host utility functions
+ */
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vnet/vnet.h>
+#include <vppinfra/format.h>
+
+/**
+ * @brief GDB callable function: vl - Return vector length of vector
+ *
+ * @param *p - void - address of vector
+ *
+ * @return length - u32
+ *
+ */
+u32 vl(void *p)
+{
+ return vec_len (p);
+}
+
+/**
+ * @brief GDB callable function: pe - call pool_elts - number of elements in a pool
+ *
+ * @param *v - void - address of pool
+ *
+ * @return number - uword
+ *
+ */
+uword pe (void *v)
+{
+ return (pool_elts(v));
+}
+
+/**
+ * @brief GDB callable function: pifi - call pool_is_free_index - is passed index free?
+ *
+ * @param *p - void - address of pool
+ * @param *index - u32
+ *
+ * @return 0|1 - int
+ *
+ */
+int pifi (void *p, u32 index)
+{
+ return pool_is_free_index (p, index);
+}
+
+/**
+ * @brief GDB callable function: debug_hex_bytes - return formatted hex string
+ *
+ * @param *s - u8
+ * @param n - u32 - number of bytes to format
+ *
+ */
+void debug_hex_bytes (u8 *s, u32 n)
+{
+ fformat (stderr, "%U\n", format_hex_bytes, s, n);
+}
+
+/**
+ * @brief GDB callable function: vlib_dump_frame_ownership
+ *
+ */
+void vlib_dump_frame_ownership (void)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_runtime_t * this_node_runtime;
+ vlib_next_frame_t * nf;
+ u32 first_nf_index;
+ u32 index;
+
+ vec_foreach(this_node_runtime, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ first_nf_index = this_node_runtime->next_frame_index;
+
+ for (index = first_nf_index; index < first_nf_index +
+ this_node_runtime->n_next_nodes; index++)
+ {
+ vlib_node_runtime_t * owned_runtime;
+ nf = vec_elt_at_index (vm->node_main.next_frames, index);
+ if (nf->flags & VLIB_FRAME_OWNER)
+ {
+ owned_runtime = vec_elt_at_index (nm->nodes_by_type[0],
+ nf->node_runtime_index);
+ fformat(stderr,
+ "%s next index %d owns enqueue rights to %s\n",
+ nm->nodes[this_node_runtime->node_index]->name,
+ index - first_nf_index,
+ nm->nodes[owned_runtime->node_index]->name);
+ fformat (stderr, " nf index %d nf->frame_index %d\n",
+ nf - vm->node_main.next_frames,
+ nf->frame_index);
+ }
+ }
+ }
+}
+
+/**
+ * @brief GDB callable function: vlib_runtime_index_to_node_name
+ *
+ * Takes node index and will return the node name.
+ *
+ * @param index - u32
+ */
+void vlib_runtime_index_to_node_name (u32 index)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vlib_node_main_t * nm = &vm->node_main;
+
+ if (index > vec_len (nm->nodes))
+ {
+ fformat(stderr, "%d out of range, max %d\n", vec_len(nm->nodes));
+ return;
+ }
+
+ fformat(stderr, "node runtime index %d name %s\n", index, nm->nodes[index]->name);
+}
+
+void gdb_show_errors (int verbose)
+{
+ extern vlib_cli_command_t vlib_cli_show_errors;
+ unformat_input_t input;
+ vlib_main_t * vm = vlib_get_main();
+
+ if (verbose == 0)
+ unformat_init_string (&input, "verbose 0", 9);
+ else if (verbose == 1)
+ unformat_init_string (&input, "verbose 1", 9);
+ else
+ {
+ fformat(stderr, "verbose not 0 or 1\n");
+ return;
+ }
+
+ vlib_cli_show_errors.function (vm, &input, 0 /* cmd */);
+ unformat_free (&input);
+}
+
+void gdb_show_session (int verbose)
+{
+ extern vlib_cli_command_t vlib_cli_show_session_command;
+ unformat_input_t input;
+ vlib_main_t * vm = vlib_get_main();
+
+ if (verbose == 0)
+ unformat_init_string (&input, "verbose 0", 9);
+ else if (verbose == 1)
+ unformat_init_string (&input, "verbose 1", 9);
+ else if (verbose == 2)
+ unformat_init_string (&input, "verbose 2", 9);
+ else
+ {
+ fformat(stderr, "verbose not 0 - 2\n");
+ return;
+ }
+
+ vlib_cli_show_session_command.function (vm, &input, 0 /* cmd */);
+ unformat_free (&input);
+}
+
+/**
+ * @brief GDB callable function: show_gdb_command_fn - show gdb
+ *
+ * Shows list of functions for VPP available in GDB
+ *
+ * @return error - clib_error_t
+ */
+static clib_error_t *
+show_gdb_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_cli_output (vm, "vl(p) returns vec_len(p)");
+ vlib_cli_output (vm, "pe(p) returns pool_elts(p)");
+ vlib_cli_output (vm, "pifi(p, i) returns pool_is_free_index(p, i)");
+ vlib_cli_output (vm, "gdb_show_errors(0|1) dumps error counters");
+ vlib_cli_output (vm, "gdb_show_session dumps session counters");
+ vlib_cli_output (vm, "debug_hex_bytes (ptr, n_bytes) dumps n_bytes in hex");
+ vlib_cli_output (vm, "vlib_dump_frame_ownership() does what it says");
+ vlib_cli_output (vm, "vlib_runtime_index_to_node_name (index) prints NN");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = {
+ .path = "show gdb",
+ .short_help = "Describe functions which can be called from gdb",
+ .function = show_gdb_command_fn,
+};
+
+vnet_buffer_opaque_t *vb (void *vb_arg)
+{
+ vlib_buffer_t *b = (vlib_buffer_t *)vb_arg;
+ vnet_buffer_opaque_t *rv;
+
+ rv = vnet_buffer (b);
+
+ return rv;
+}
+
+/* Cafeteria plan, maybe you don't want these functions */
+clib_error_t *
+gdb_func_init (vlib_main_t * vm) { return 0; }
+
+VLIB_INIT_FUNCTION (gdb_func_init);
diff --git a/src/vnet/unix/pcap.c b/src/vnet/unix/pcap.c
new file mode 100644
index 00000000..bba225f7
--- /dev/null
+++ b/src/vnet/unix/pcap.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pcap.c: libpcap packet capture format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/unix/pcap.h>
+#include <sys/fcntl.h>
+
+/**
+ * @file
+ * @brief PCAP function.
+ *
+ * Usage:
+ *
+ * <code><pre>
+ * \#include <vnet/unix/pcap.h>
+ *
+ * static pcap_main_t pcap = {
+ * .file_name = "/tmp/ip4",
+ * .n_packets_to_capture = 2,
+ * .packet_type = PCAP_PACKET_TYPE_ip,
+ * };
+ * </pre></code>
+ *
+ * To add a buffer:
+ *
+ * <code><pre>pcap_add_buffer (&pcap, vm, pi0, 128);</pre></code>
+ *
+ * File will be written after @c n_packets_to_capture or call to pcap_write (&amp;pcap).
+ *
+*/
+
+/**
+ * @brief Close PCAP file
+ *
+ * @return rc - clib_error_t
+ *
+ */
+clib_error_t *
+pcap_close (pcap_main_t * pm)
+{
+ close (pm->file_descriptor);
+ pm->flags &= ~PCAP_MAIN_INIT_DONE;
+ pm->file_descriptor = -1;
+ return 0;
+}
+
+/**
+ * @brief Write PCAP file
+ *
+ * @return rc - clib_error_t
+ *
+ */
+clib_error_t *
+pcap_write (pcap_main_t * pm)
+{
+ clib_error_t * error = 0;
+
+ if (! (pm->flags & PCAP_MAIN_INIT_DONE))
+ {
+ pcap_file_header_t fh;
+ int n;
+
+ if (! pm->file_name)
+ pm->file_name = "/tmp/vnet.pcap";
+
+ pm->file_descriptor = open (pm->file_name, O_CREAT | O_TRUNC | O_WRONLY, 0664);
+ if (pm->file_descriptor < 0)
+ {
+ error = clib_error_return_unix (0, "failed to open `%s'", pm->file_name);
+ goto done;
+ }
+
+ pm->flags |= PCAP_MAIN_INIT_DONE;
+ pm->n_packets_captured = 0;
+ pm->n_pcap_data_written = 0;
+
+ /* Write file header. */
+ memset (&fh, 0, sizeof (fh));
+ fh.magic = 0xa1b2c3d4;
+ fh.major_version = 2;
+ fh.minor_version = 4;
+ fh.time_zone = 0;
+ fh.max_packet_size_in_bytes = 1 << 16;
+ fh.packet_type = pm->packet_type;
+ n = write (pm->file_descriptor, &fh, sizeof (fh));
+ if (n != sizeof (fh))
+ {
+ if (n < 0)
+ error = clib_error_return_unix (0, "write file header `%s'", pm->file_name);
+ else
+ error = clib_error_return (0, "short write of file header `%s'", pm->file_name);
+ goto done;
+ }
+ }
+
+ while (vec_len (pm->pcap_data) > pm->n_pcap_data_written)
+ {
+ int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written;
+
+ n = write (pm->file_descriptor,
+ vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written), n);
+
+ if (n < 0 && unix_error_is_fatal (errno))
+ {
+ error = clib_error_return_unix (0, "write `%s'", pm->file_name);
+ goto done;
+ }
+ pm->n_pcap_data_written += n;
+ }
+
+ if (pm->n_pcap_data_written >= vec_len (pm->pcap_data))
+ {
+ vec_reset_length (pm->pcap_data);
+ pm->n_pcap_data_written = 0;
+ }
+
+ if (pm->n_packets_captured >= pm->n_packets_to_capture)
+ pcap_close(pm);
+
+ done:
+ if (error)
+ {
+ if (pm->file_descriptor >= 0)
+ close (pm->file_descriptor);
+ }
+ return error;
+}
+
+/**
+ * @brief Read PCAP file
+ *
+ * @return rc - clib_error_t
+ *
+ */
+clib_error_t * pcap_read (pcap_main_t * pm)
+{
+ clib_error_t * error = 0;
+ int fd, need_swap, n;
+ pcap_file_header_t fh;
+ pcap_packet_header_t ph;
+
+ fd = open (pm->file_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", pm->file_name);
+ goto done;
+ }
+
+ if (read (fd, &fh, sizeof (fh)) != sizeof (fh))
+ {
+ error = clib_error_return_unix (0, "read file header `%s'", pm->file_name);
+ goto done;
+ }
+
+ need_swap = 0;
+ if (fh.magic == 0xd4c3b2a1)
+ {
+ need_swap = 1;
+#define _(t,f) fh.f = clib_byte_swap_##t (fh.f);
+ foreach_pcap_file_header;
+#undef _
+ }
+
+ if (fh.magic != 0xa1b2c3d4)
+ {
+ error = clib_error_return (0, "bad magic `%s'", pm->file_name);
+ goto done;
+ }
+
+ pm->min_packet_bytes = 0;
+ pm->max_packet_bytes = 0;
+ while ((n = read (fd, &ph, sizeof (ph))) != 0)
+ {
+ u8 * data;
+
+ if (need_swap)
+ {
+#define _(t,f) ph.f = clib_byte_swap_##t (ph.f);
+ foreach_pcap_packet_header;
+#undef _
+ }
+
+ data = vec_new (u8, ph.n_bytes_in_packet);
+ if (read (fd, data, ph.n_packet_bytes_stored_in_file) != ph.n_packet_bytes_stored_in_file)
+ {
+ error = clib_error_return (0, "short read `%s'", pm->file_name);
+ goto done;
+ }
+
+ if (vec_len (pm->packets_read) == 0)
+ pm->min_packet_bytes = pm->max_packet_bytes = ph.n_bytes_in_packet;
+ else
+ {
+ pm->min_packet_bytes = clib_min (pm->min_packet_bytes, ph.n_bytes_in_packet);
+ pm->max_packet_bytes = clib_max (pm->max_packet_bytes, ph.n_bytes_in_packet);
+ }
+
+ vec_add1 (pm->packets_read, data);
+ }
+
+ done:
+ if (fd >= 0)
+ close (fd);
+ return error;
+
+}
diff --git a/src/vnet/unix/pcap.h b/src/vnet/unix/pcap.h
new file mode 100644
index 00000000..6aaf32be
--- /dev/null
+++ b/src/vnet/unix/pcap.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pcap.h: libpcap packet capture format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * @file
+ * @brief PCAP utility definitions
+ */
+#ifndef included_vnet_pcap_h
+#define included_vnet_pcap_h
+
+#include <vlib/vlib.h>
+
+/**
+ * @brief Packet types supported by PCAP
+ *
+ * null 0
+ * ethernet 1
+ * ppp 9
+ * ip 12
+ * hdlc 104
+ */
+#define foreach_vnet_pcap_packet_type \
+ _ (null, 0) \
+ _ (ethernet, 1) \
+ _ (ppp, 9) \
+ _ (ip, 12) \
+ _ (hdlc, 104)
+
+typedef enum {
+#define _(f,n) PCAP_PACKET_TYPE_##f = (n),
+ foreach_vnet_pcap_packet_type
+#undef _
+} pcap_packet_type_t;
+
+#define foreach_pcap_file_header \
+ /** 0xa1b2c3d4 host byte order. \
+ 0xd4c3b2a1 => need to byte swap everything. */ \
+ _ (u32, magic) \
+ \
+ /** Currently major 2 minor 4. */ \
+ _ (u16, major_version) \
+ _ (u16, minor_version) \
+ \
+ /** 0 for GMT. */ \
+ _ (u32, time_zone) \
+ \
+ /** Accuracy of timestamps. Typically set to 0. */ \
+ _ (u32, sigfigs) \
+ \
+ /** Size of largest packet in file. */ \
+ _ (u32, max_packet_size_in_bytes) \
+ \
+ /** One of vnet_pcap_packet_type_t. */ \
+ _ (u32, packet_type)
+
+/** File header struct */
+typedef struct {
+#define _(t, f) t f;
+ foreach_pcap_file_header
+#undef _
+} pcap_file_header_t;
+
+#define foreach_pcap_packet_header \
+ /** Time stamp in seconds */ \
+ _ (u32, time_in_sec) \
+ /** Time stamp in microseconds. */ \
+ _ (u32, time_in_usec) \
+ \
+ /** Number of bytes stored in file. */ \
+ _ (u32, n_packet_bytes_stored_in_file) \
+ /** Number of bytes in actual packet. */ \
+ _ (u32, n_bytes_in_packet)
+
+/** Packet header. */
+typedef struct {
+#define _(t, f) t f;
+ foreach_pcap_packet_header
+#undef _
+
+ /** Packet data follows. */
+ u8 data[0];
+} pcap_packet_header_t;
+
+/**
+ * @brief PCAP main state data structure
+ */
+typedef struct {
+ /** File name of pcap output. */
+ char * file_name;
+
+ /** Number of packets to capture. */
+ u32 n_packets_to_capture;
+
+ /** Packet type */
+ pcap_packet_type_t packet_type;
+
+ /** Number of packets currently captured. */
+ u32 n_packets_captured;
+
+ /** flags */
+ u32 flags;
+#define PCAP_MAIN_INIT_DONE (1 << 0)
+
+ /** File descriptor for reading/writing. */
+ int file_descriptor;
+
+ /** Bytes written */
+ u32 n_pcap_data_written;
+
+ /** Vector of pcap data. */
+ u8 * pcap_data;
+
+ /** Packets read from file. */
+ u8 ** packets_read;
+
+ /** Min/Max Packet bytes */
+ u32 min_packet_bytes, max_packet_bytes;
+} pcap_main_t;
+
+/** Write out data to output file. */
+clib_error_t * pcap_write (pcap_main_t * pm);
+
+/** Read data from file. */
+clib_error_t * pcap_read (pcap_main_t * pm);
+
+/**
+ * @brief Add packet
+ *
+ * @param *pm - pcap_main_t
+ * @param time_now - f64
+ * @param n_bytes_in_trace - u32
+ * @param n_bytes_in_packet - u32
+ *
+ * @return Packet Data
+ *
+ */
+static inline void *
+pcap_add_packet (pcap_main_t * pm,
+ f64 time_now,
+ u32 n_bytes_in_trace,
+ u32 n_bytes_in_packet)
+{
+ pcap_packet_header_t * h;
+ u8 * d;
+
+ vec_add2 (pm->pcap_data, d, sizeof (h[0]) + n_bytes_in_trace);
+ h = (void *) (d);
+ h->time_in_sec = time_now;
+ h->time_in_usec = 1e6*(time_now - h->time_in_sec);
+ h->n_packet_bytes_stored_in_file = n_bytes_in_trace;
+ h->n_bytes_in_packet = n_bytes_in_packet;
+ pm->n_packets_captured++;
+ return h->data;
+}
+
+/**
+ * @brief Add buffer (vlib_buffer_t) to the trace
+ *
+ * @param *pm - pcap_main_t
+ * @param *vm - vlib_main_t
+ * @param buffer_index - u32
+ * @param n_bytes_in_trace - u32
+ *
+ */
+static inline void
+pcap_add_buffer (pcap_main_t * pm,
+ vlib_main_t * vm, u32 buffer_index,
+ u32 n_bytes_in_trace)
+{
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index);
+ u32 n = vlib_buffer_length_in_chain (vm, b);
+ i32 n_left = clib_min (n_bytes_in_trace, n);
+ f64 time_now = vlib_time_now (vm);
+ void * d;
+
+ d = pcap_add_packet (pm, time_now, n_left, n);
+ while (1)
+ {
+ u32 copy_length = clib_min ((u32) n_left, b->current_length);
+ clib_memcpy (d, b->data + b->current_data, copy_length);
+ n_left -= b->current_length;
+ if (n_left <= 0)
+ break;
+ d += b->current_length;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ /** Flush output vector. */
+ if (vec_len (pm->pcap_data) >= 64*1024
+ || pm->n_packets_captured >= pm->n_packets_to_capture)
+ pcap_write (pm);
+}
+
+#endif /* included_vnet_pcap_h */
diff --git a/src/vnet/unix/pcap2pg.c b/src/vnet/unix/pcap2pg.c
new file mode 100644
index 00000000..217a61f4
--- /dev/null
+++ b/src/vnet/unix/pcap2pg.c
@@ -0,0 +1,182 @@
+/*
+ * pcap2pg.c: convert pcap input to pg input
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions to convert PCAP file format to VPP PG (Packet Generator)
+ *
+ */
+#include <vnet/unix/pcap.h>
+#include <vnet/ethernet/packet.h>
+#include <stdio.h>
+
+pcap_main_t pcap_main;
+
+/**
+ * @brief char * to seed a PG file
+ */
+static char * pg_fmt =
+ "packet-generator new {\n"
+ " name s%d\n"
+ " limit 1\n"
+ " size %d-%d\n"
+ " node ethernet-input\n";
+
+
+/**
+ * @brief Packet Generator Stream boilerplate
+ *
+ * @param *ofp - FILE
+ * @param i - int
+ * @param *pkt - u8
+ */
+void stream_boilerplate (FILE *ofp, int i, u8 * pkt)
+{
+ fformat(ofp, pg_fmt, i, vec_len(pkt), vec_len(pkt));
+}
+
+/**
+ * @brief Conversion of PCAP file to PG file format
+ *
+ * @param *pm - pcap_main_t
+ * @param *ofp - FILE
+ *
+ * @return rc - int
+ *
+ */
+int pcap2pg (pcap_main_t * pm, FILE *ofp)
+{
+ int i, j;
+ u8 *pkt;
+
+ for (i = 0; i < vec_len (pm->packets_read); i++)
+ {
+ int offset;
+ ethernet_header_t * h;
+ u64 ethertype;
+
+ pkt = pm->packets_read[i];
+ h = (ethernet_header_t *)pkt;
+
+ stream_boilerplate (ofp, i, pkt);
+
+ fformat (ofp, " data {\n");
+
+ ethertype = clib_net_to_host_u16 (h->type);
+
+ /**
+ * In vnet terms, packet generator interfaces are not ethernets.
+ * They don't have vlan tables.
+ * This transforms captured 802.1q VLAN packets into
+ * regular Ethernet packets.
+ */
+ if (ethertype == 0x8100 /* 802.1q vlan */)
+ {
+ u16 * vlan_ethertype = (u16 *)(h+1);
+ ethertype = clib_net_to_host_u16(vlan_ethertype[0]);
+ offset = 18;
+ }
+ else
+ offset = 14;
+
+ fformat (ofp,
+ " 0x%04x: %02x%02x.%02x%02x.%02x%02x"
+ " -> %02x%02x.%02x%02x.%02x%02x\n",
+ ethertype,
+ h->src_address[0],
+ h->src_address[1],
+ h->src_address[2],
+ h->src_address[3],
+ h->src_address[4],
+ h->src_address[5],
+ h->dst_address[0],
+ h->dst_address[1],
+ h->dst_address[2],
+ h->dst_address[3],
+ h->dst_address[4],
+ h->dst_address[5]);
+
+ fformat (ofp, " hex 0x");
+
+ for (j = offset; j < vec_len (pkt); j++)
+ fformat (ofp, "%02x", pkt[j]);
+
+ fformat (ofp, " }\n");
+ fformat (ofp, "}\n\n");
+ }
+ return 0;
+}
+
+/**
+ * @brief pcap2pg.
+ * usage: pcap2pg -i <input-file> [-o <output-file>]
+ */
+int main (int argc, char **argv)
+{
+ unformat_input_t input;
+ pcap_main_t * pm = &pcap_main;
+ u8 * input_file = 0, * output_file = 0;
+ FILE * ofp;
+ clib_error_t * error;
+
+ unformat_init_command_line (&input, argv);
+
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(&input, "-i %s", &input_file)
+ || unformat (&input, "input %s", &input_file))
+ ;
+ else if (unformat (&input, "-o %s", &output_file)
+ || unformat (&input, "output %s", &output_file))
+ ;
+ else
+ {
+ usage:
+ fformat(stderr,
+ "usage: pcap2pg -i <input-file> [-o <output-file>]\n");
+ exit (1);
+ }
+ }
+
+ if (input_file == 0)
+ goto usage;
+
+ pm->file_name = (char *)input_file;
+ error = pcap_read (pm);
+
+ if (error)
+ {
+ clib_error_report (error);
+ exit (1);
+ }
+
+ if (output_file)
+ {
+ ofp = fopen ((char *)output_file, "rw");
+ if (ofp == NULL)
+ clib_unix_warning ("Couldn't create '%s'", output_file);
+ exit (1);
+ }
+ else
+ {
+ ofp = stdout;
+ }
+
+ pcap2pg (pm, ofp);
+
+ fclose (ofp);
+ exit (0);
+}
diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api
new file mode 100644
index 00000000..d9fba371
--- /dev/null
+++ b/src/vnet/unix/tap.api
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpe control-plane API messages for
+ the Linux kernel TAP device driver
+*/
+
+/** \brief Initialize a new tap interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param use_random_mac - let the system generate a unique mac address
+ @param tap_name - name to associate with the new interface
+ @param mac_address - mac addr to assign to the interface if use_radom not set
+*/
+define tap_connect
+{
+ u32 client_index;
+ u32 context;
+ u8 use_random_mac;
+ u8 tap_name[64];
+ u8 mac_address[6];
+ u8 renumber;
+ u32 custom_dev_instance;
+ u8 ip4_address_set;
+ u8 ip4_address[4];
+ u8 ip4_mask_width;
+ u8 ip6_address_set;
+ u8 ip6_address[16];
+ u8 ip6_mask_width;
+ u8 tag[64];
+};
+
+/** \brief Reply for tap connect request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new tap interface
+*/
+define tap_connect_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Modify a tap interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of existing tap interface
+ @param use_random_mac - let the system generate a unique mac address
+ @param tap_name - name to associate with the new interface
+ @param mac_address - mac addr to assign to the interface if use_radom not set
+*/
+define tap_modify
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 use_random_mac;
+ u8 tap_name[64];
+ u8 mac_address[6];
+ u8 renumber;
+ u32 custom_dev_instance;
+};
+
+/** \brief Reply for tap modify request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index if the modified tap interface
+*/
+define tap_modify_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete tap interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of existing tap interface
+*/
+autoreply define tap_delete
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Dump tap interfaces request */
+define sw_interface_tap_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for tap dump request
+ @param sw_if_index - software index of tap interface
+ @param dev_name - Linux tap device name
+*/
+define sw_interface_tap_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 dev_name[64];
+};
diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c
new file mode 100644
index 00000000..3a64e100
--- /dev/null
+++ b/src/vnet/unix/tap_api.c
@@ -0,0 +1,284 @@
+/*
+ *------------------------------------------------------------------
+ * tap_api.c - vnet tap device driver API support
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/unix/tuntap.h>
+#include <vnet/unix/tapcli.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_tap_api_msg \
+_(TAP_CONNECT, tap_connect) \
+_(TAP_MODIFY, tap_modify) \
+_(TAP_DELETE, tap_delete) \
+_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump)
+
+#define vl_msg_name_crc_list
+#include <vnet/unix/tap.api.h>
+#undef vl_msg_name_crc_list
+
+/*
+ * WARNING: replicated pending api refactor completion
+ */
+static void
+send_sw_interface_event_deleted (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index)
+{
+ vl_api_sw_interface_event_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ mp->admin_up_down = 0;
+ mp->link_up_down = 0;
+ mp->deleted = 1;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_tap_connect_t_handler (vl_api_tap_connect_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+ vl_api_tap_connect_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ unix_shared_memory_queue_t *q;
+ u32 sw_if_index = (u32) ~ 0;
+ u8 *tag;
+ vnet_tap_connect_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->intfc_name = mp->tap_name;
+ if (!mp->use_random_mac)
+ ap->hwaddr_arg = mp->mac_address;
+ ap->renumber = mp->renumber;
+ ap->sw_if_indexp = &sw_if_index;
+ ap->custom_dev_instance = ntohl (mp->custom_dev_instance);
+ if (mp->ip4_address_set)
+ {
+ ap->ip4_address = (ip4_address_t *) mp->ip4_address;
+ ap->ip4_mask_width = mp->ip4_mask_width;
+ ap->ip4_address_set = 1;
+ }
+ if (mp->ip6_address_set)
+ {
+ ap->ip6_address = (ip6_address_t *) mp->ip6_address;
+ ap->ip6_mask_width = mp->ip6_mask_width;
+ ap->ip6_address_set = 1;
+ }
+
+ rv = vnet_tap_connect_renumber (vm, ap);
+
+ /* Add tag if supplied */
+ if (rv == 0 && mp->tag[0])
+ {
+ mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
+ tag = format (0, "%s%c", mp->tag, 0);
+ vnet_set_sw_interface_tag (vnm, tag, sw_if_index);
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_TAP_CONNECT_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->sw_if_index = ntohl (sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_tap_modify_t_handler (vl_api_tap_modify_t * mp)
+{
+ int rv;
+ vl_api_tap_modify_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ u32 sw_if_index = (u32) ~ 0;
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_tap_connect_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->orig_sw_if_index = ntohl (mp->sw_if_index);
+ ap->intfc_name = mp->tap_name;
+ if (!mp->use_random_mac)
+ ap->hwaddr_arg = mp->mac_address;
+ ap->sw_if_indexp = &sw_if_index;
+ ap->renumber = mp->renumber;
+ ap->custom_dev_instance = ntohl (mp->custom_dev_instance);
+
+ rv = vnet_tap_modify (vm, ap);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_TAP_MODIFY_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->sw_if_index = ntohl (sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_tap_delete_t_handler (vl_api_tap_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+ vpe_api_main_t *vam = &vpe_api_main;
+ vl_api_tap_delete_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ rv = vnet_tap_delete (vm, sw_if_index);
+ if (!rv)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_clear_sw_interface_tag (vnm, sw_if_index);
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_TAP_DELETE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+
+ if (!rv)
+ send_sw_interface_event_deleted (vam, q, sw_if_index);
+}
+
+static void
+send_sw_interface_tap_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ tapcli_interface_details_t * tap_if,
+ u32 context)
+{
+ vl_api_sw_interface_tap_details_t *mp;
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_TAP_DETAILS);
+ mp->sw_if_index = ntohl (tap_if->sw_if_index);
+ strncpy ((char *) mp->dev_name,
+ (char *) tap_if->dev_name, ARRAY_LEN (mp->dev_name) - 1);
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_sw_interface_tap_dump_t_handler (vl_api_sw_interface_tap_dump_t * mp)
+{
+ int rv = 0;
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ tapcli_interface_details_t *tapifs = NULL;
+ tapcli_interface_details_t *tap_if = NULL;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ rv = vnet_tap_dump_ifs (&tapifs);
+ if (rv)
+ return;
+
+ vec_foreach (tap_if, tapifs)
+ {
+ send_sw_interface_tap_details (am, q, tap_if, mp->context);
+ }
+
+ vec_free (tapifs);
+}
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_tap;
+#undef _
+}
+
+static clib_error_t *
+tap_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_tap_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (tap_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
new file mode 100644
index 00000000..d80cca3d
--- /dev/null
+++ b/src/vnet/unix/tapcli.c
@@ -0,0 +1,1491 @@
+/*
+ *------------------------------------------------------------------
+ * tapcli.c - dynamic tap interface hookup
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/**
+ * @file
+ * @brief dynamic tap interface hookup
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/feature/feature.h>
+#include <vnet/devices/devices.h>
+#include <vnet/unix/tuntap.h>
+#include <vnet/unix/tapcli.h>
+
+static vnet_device_class_t tapcli_dev_class;
+static vnet_hw_interface_class_t tapcli_interface_class;
+static vlib_node_registration_t tapcli_rx_node;
+
+static void tapcli_nopunt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+/**
+ * @brief Struct for the tapcli interface
+ */
+typedef struct {
+ u32 unix_fd;
+ u32 clib_file_index;
+ u32 provision_fd;
+ /** For counters */
+ u32 sw_if_index;
+ u32 hw_if_index;
+ u32 is_promisc;
+ struct ifreq ifr;
+ u32 per_interface_next_index;
+ /** for delete */
+ u8 active;
+} tapcli_interface_t;
+
+/**
+ * @brief Struct for RX trace
+ */
+typedef struct {
+ u16 sw_if_index;
+} tapcli_rx_trace_t;
+
+/**
+ * @brief Function to format TAP CLI trace
+ *
+ * @param *s - u8 - formatting string
+ * @param *va - va_list
+ *
+ * @return *s - u8 - formatted string
+ *
+ */
+u8 * format_tapcli_rx_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ tapcli_rx_trace_t * t = va_arg (*va, tapcli_rx_trace_t *);
+ s = format (s, "%U", format_vnet_sw_if_index_name,
+ vnm, t->sw_if_index);
+ return s;
+}
+
+/**
+ * @brief TAPCLI per thread struct
+ */
+typedef struct
+{
+ /** Vector of VLIB rx buffers to use. We allocate them in blocks
+ of VLIB_FRAME_SIZE (256). */
+ u32 * rx_buffers;
+
+ /** Vector of iovecs for readv/writev calls. */
+ struct iovec * iovecs;
+} tapcli_per_thread_t;
+
+/**
+ * @brief TAPCLI main state struct
+ */
+typedef struct {
+ /** per thread variables */
+ tapcli_per_thread_t * threads;
+
+ /** tap device destination MAC address. Required, or Linux drops pkts */
+ u8 ether_dst_mac[6];
+
+ /** Interface MTU in bytes and # of default sized buffers. */
+ u32 mtu_bytes, mtu_buffers;
+
+ /** Vector of tap interfaces */
+ tapcli_interface_t * tapcli_interfaces;
+
+ /** Vector of deleted tap interfaces */
+ u32 * tapcli_inactive_interfaces;
+
+ /** Bitmap of tap interfaces with pending reads */
+ uword * pending_read_bitmap;
+
+ /** Hash table to find tapcli interface given hw_if_index */
+ uword * tapcli_interface_index_by_sw_if_index;
+
+ /** Hash table to find tapcli interface given unix fd */
+ uword * tapcli_interface_index_by_unix_fd;
+
+ /** renumbering table */
+ u32 * show_dev_instance_by_real_dev_instance;
+
+ /** 1 => disable CLI */
+ int is_disabled;
+
+ /** convenience - vlib_main_t */
+ vlib_main_t * vlib_main;
+ /** convenience - vnet_main_t */
+ vnet_main_t * vnet_main;
+} tapcli_main_t;
+
+static tapcli_main_t tapcli_main;
+
+/**
+ * @brief tapcli TX node function
+ * @node tap-cli-tx
+ *
+ * Output node, writes the buffers comprising the incoming frame
+ * to the tun/tap device, aka hands them to the Linux kernel stack.
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ * @return n_packets - uword
+ *
+ */
+static uword
+tapcli_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ tapcli_main_t * tm = &tapcli_main;
+ tapcli_interface_t * ti;
+ int i;
+ u16 thread_index = vlib_get_thread_index ();
+
+ for (i = 0; i < n_packets; i++)
+ {
+ struct iovec * iov;
+ vlib_buffer_t * b;
+ uword l;
+ vnet_hw_interface_t * hw;
+ uword * p;
+ u32 tx_sw_if_index;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX];
+ if (tx_sw_if_index == (u32)~0)
+ tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
+
+ ASSERT(tx_sw_if_index != (u32)~0);
+
+ /* Use the sup intfc to finesse vlan subifs */
+ hw = vnet_get_sup_hw_interface (tm->vnet_main, tx_sw_if_index);
+ tx_sw_if_index = hw->sw_if_index;
+
+ p = hash_get (tm->tapcli_interface_index_by_sw_if_index,
+ tx_sw_if_index);
+ if (p == 0)
+ {
+ clib_warning ("sw_if_index %d unknown", tx_sw_if_index);
+ /* $$$ leak, but this should never happen... */
+ continue;
+ }
+ else
+ ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]);
+
+ /* Re-set iovecs if present. */
+ if (tm->threads[thread_index].iovecs)
+ _vec_len (tm->threads[thread_index].iovecs) = 0;
+
+ /* VLIB buffer chain -> Unix iovec(s). */
+ vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = l = b->current_length;
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ do {
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
+
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = b->current_length;
+ l += b->current_length;
+ } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
+
+ if (writev (ti->unix_fd, tm->threads[thread_index].iovecs,
+ vec_len (tm->threads[thread_index].iovecs)) < l)
+ clib_unix_warning ("writev");
+ }
+
+ vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors);
+
+ return n_packets;
+}
+
+VLIB_REGISTER_NODE (tapcli_tx_node,static) = {
+ .function = tapcli_tx,
+ .name = "tapcli-tx",
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .vector_size = 4,
+};
+
+/**
+ * @brief Dispatch tapcli RX node function for node tap_cli_rx
+ *
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *ti - tapcli_interface_t
+ *
+ * @return n_packets - uword
+ *
+ */
+static uword tapcli_rx_iface(vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ tapcli_interface_t * ti)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u8 set_trace = 0;
+ u16 thread_index = vlib_get_thread_index ();
+ vnet_main_t *vnm;
+ vnet_sw_interface_t * si;
+ u8 admin_down;
+ u32 next = node->cached_next_index;
+ u32 n_left_to_next, next_index;
+ u32 *to_next;
+
+ vnm = vnet_get_main();
+ si = vnet_get_sw_interface (vnm, ti->sw_if_index);
+ admin_down = !(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ vlib_get_next_frame(vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_to_next) { // Fill at most one vector
+ vlib_buffer_t *b_first, *b, *prev;
+ u32 bi_first, bi;
+ word n_bytes_in_packet;
+ int j, n_bytes_left;
+
+ if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) <
+ tm->mtu_buffers)) {
+ uword len = vec_len(tm->threads[thread_index].rx_buffers);
+ _vec_len(tm->threads[thread_index].rx_buffers) +=
+ vlib_buffer_alloc_from_free_list(vm, &tm->threads[thread_index].rx_buffers[len],
+ VLIB_FRAME_SIZE - len, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) <
+ tm->mtu_buffers)) {
+ vlib_node_increment_counter(vm, tapcli_rx_node.index,
+ TAPCLI_ERROR_BUFFER_ALLOC,
+ tm->mtu_buffers -
+ vec_len(tm->threads[thread_index].rx_buffers));
+ break;
+ }
+ }
+
+ uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1;
+
+ /* Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1);
+ for (j = 0; j < tm->mtu_buffers; j++) {
+ b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - j]);
+ tm->threads[thread_index].iovecs[j].iov_base = b->data;
+ tm->threads[thread_index].iovecs[j].iov_len = buffer_size;
+ }
+
+ n_bytes_left = readv (ti->unix_fd, tm->threads[thread_index].iovecs,
+ tm->mtu_buffers);
+ n_bytes_in_packet = n_bytes_left;
+ if (n_bytes_left <= 0) {
+ if (errno != EAGAIN) {
+ vlib_node_increment_counter(vm, tapcli_rx_node.index,
+ TAPCLI_ERROR_READ, 1);
+ }
+ break;
+ }
+
+ bi_first = tm->threads[thread_index].rx_buffers[i_rx];
+ b = b_first = vlib_get_buffer (vm,
+ tm->threads[thread_index].rx_buffers[i_rx]);
+ prev = NULL;
+
+ while (1) {
+ b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+ n_bytes_left -= buffer_size;
+
+ if (prev) {
+ prev->next_buffer = bi;
+ prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ }
+ prev = b;
+
+ /* last segment */
+ if (n_bytes_left <= 0)
+ break;
+
+ i_rx--;
+ bi = tm->threads[thread_index].rx_buffers[i_rx];
+ b = vlib_get_buffer (vm, bi);
+ }
+
+ _vec_len (tm->threads[thread_index].rx_buffers) = i_rx;
+
+ b_first->total_length_not_including_first_buffer =
+ (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0;
+ b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b_first);
+
+ vnet_buffer (b_first)->sw_if_index[VLIB_RX] = ti->sw_if_index;
+ vnet_buffer (b_first)->sw_if_index[VLIB_TX] = (u32)~0;
+
+ b_first->error = node->errors[TAPCLI_ERROR_NONE];
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ next_index = (ti->per_interface_next_index != ~0) ?
+ ti->per_interface_next_index : next_index;
+ next_index = admin_down ? VNET_DEVICE_INPUT_NEXT_DROP : next_index;
+
+ to_next[0] = bi_first;
+ to_next++;
+ n_left_to_next--;
+
+ vnet_feature_start_device_input_x1 (ti->sw_if_index, &next_index, b_first);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ bi_first, next_index);
+
+ /* Interface counters for tapcli interface. */
+ if (PREDICT_TRUE(!admin_down)) {
+ vlib_increment_combined_counter (
+ vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, ti->sw_if_index,
+ 1, n_bytes_in_packet);
+
+ if (PREDICT_FALSE(n_trace > 0)) {
+ vlib_trace_buffer (vm, node, next_index,
+ b_first, /* follow_chain */ 1);
+ n_trace--;
+ set_trace = 1;
+ tapcli_rx_trace_t *t0 = vlib_add_trace (vm, node, b_first, sizeof (*t0));
+ t0->sw_if_index = si->sw_if_index;
+ }
+ }
+ }
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ if (set_trace)
+ vlib_set_trace_count (vm, node, n_trace);
+ return VLIB_FRAME_SIZE - n_left_to_next;
+}
+
+/**
+ * @brief tapcli RX node function
+ * @node tap-cli-rx
+ *
+ * Input node from the Kernel tun/tap device
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ * @return n_packets - uword
+ *
+ */
+static uword
+tapcli_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ static u32 * ready_interface_indices;
+ tapcli_interface_t * ti;
+ int i;
+ u32 total_count = 0;
+
+ vec_reset_length (ready_interface_indices);
+ clib_bitmap_foreach (i, tm->pending_read_bitmap,
+ ({
+ vec_add1 (ready_interface_indices, i);
+ }));
+
+ if (vec_len (ready_interface_indices) == 0)
+ return 0;
+
+ for (i = 0; i < vec_len(ready_interface_indices); i++)
+ {
+ tm->pending_read_bitmap =
+ clib_bitmap_set (tm->pending_read_bitmap,
+ ready_interface_indices[i], 0);
+
+ ti = vec_elt_at_index (tm->tapcli_interfaces, ready_interface_indices[i]);
+ total_count += tapcli_rx_iface(vm, node, ti);
+ }
+ return total_count; //This might return more than 256.
+}
+
+/** TAPCLI error strings */
+static char * tapcli_rx_error_strings[] = {
+#define _(sym,string) string,
+ foreach_tapcli_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (tapcli_rx_node, static) = {
+ .function = tapcli_rx,
+ .name = "tapcli-rx",
+ .sibling_of = "device-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = 4,
+ .n_errors = TAPCLI_N_ERROR,
+ .error_strings = tapcli_rx_error_strings,
+ .format_trace = format_tapcli_rx_trace,
+};
+
+
+/**
+ * @brief Gets called when file descriptor is ready from epoll.
+ *
+ * @param *uf - clib_file_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t * tapcli_read_ready (clib_file_t * uf)
+{
+ vlib_main_t * vm = vlib_get_main();
+ tapcli_main_t * tm = &tapcli_main;
+ uword * p;
+
+ /** Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, tapcli_rx_node.index);
+
+ p = hash_get (tm->tapcli_interface_index_by_unix_fd, uf->file_descriptor);
+
+ /** Mark the specific tap interface ready-to-read */
+ if (p)
+ tm->pending_read_bitmap = clib_bitmap_set (tm->pending_read_bitmap,
+ p[0], 1);
+ else
+ clib_warning ("fd %d not in hash table", uf->file_descriptor);
+
+ return 0;
+}
+
+/**
+ * @brief CLI function for TAPCLI configuration
+ *
+ * @param *vm - vlib_main_t
+ * @param *input - unformat_input_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tapcli_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ tapcli_main_t *tm = &tapcli_main;
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mtu %d", &tm->mtu_bytes))
+ ;
+ else if (unformat (input, "disable"))
+ tm->is_disabled = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (tm->is_disabled)
+ return 0;
+
+ if (geteuid())
+ {
+ clib_warning ("tapcli disabled: must be superuser");
+ tm->is_disabled = 1;
+ return 0;
+ }
+
+ tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
+
+ return 0;
+}
+
+/**
+ * @brief Renumber TAPCLI interface
+ *
+ * @param *hi - vnet_hw_interface_t
+ * @param new_dev_instance - u32
+ *
+ * @return rc - int
+ *
+ */
+static int tap_name_renumber (vnet_hw_interface_t * hi,
+ u32 new_dev_instance)
+{
+ tapcli_main_t *tm = &tapcli_main;
+
+ vec_validate_init_empty (tm->show_dev_instance_by_real_dev_instance,
+ hi->dev_instance, ~0);
+
+ tm->show_dev_instance_by_real_dev_instance [hi->dev_instance] =
+ new_dev_instance;
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (tapcli_config, "tapcli");
+
+/**
+ * @brief Free "no punt" frame
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ */
+static void
+tapcli_nopunt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ vlib_buffer_free (vm, buffers, n_packets);
+ vlib_frame_free (vm, node, frame);
+}
+
+VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = {
+ .name = "tapcli",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+/**
+ * @brief Formatter for TAPCLI interface name
+ *
+ * @param *s - formatter string
+ * @param *args - va_list
+ *
+ * @return *s - formatted string
+ *
+ */
+static u8 * format_tapcli_interface_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+ tapcli_main_t * tm = &tapcli_main;
+
+ if (i < vec_len (tm->show_dev_instance_by_real_dev_instance))
+ show_dev_instance = tm->show_dev_instance_by_real_dev_instance[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ s = format (s, "tap-%d", i);
+ return s;
+}
+
+/**
+ * @brief Modify interface flags for TAPCLI interface
+ *
+ * @param *vnm - vnet_main_t
+ * @param *hw - vnet_hw_interface_t
+ * @param flags - u32
+ *
+ * @return rc - u32
+ *
+ */
+static u32 tapcli_flag_change (vnet_main_t * vnm,
+ vnet_hw_interface_t * hw,
+ u32 flags)
+{
+ tapcli_main_t *tm = &tapcli_main;
+ tapcli_interface_t *ti;
+
+ ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance);
+
+ if (flags & ETHERNET_INTERFACE_FLAG_MTU)
+ {
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+ tm->mtu_bytes = hw->max_packet_bytes;
+ tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
+ }
+ else
+ {
+ struct ifreq ifr;
+ u32 want_promisc;
+
+ memcpy (&ifr, &ti->ifr, sizeof (ifr));
+
+ /* get flags, modify to bring up interface... */
+ if (ioctl (ti->provision_fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ clib_unix_warning ("Couldn't get interface flags for %s", hw->name);
+ return 0;
+ }
+
+ want_promisc = (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
+
+ if (want_promisc == ti->is_promisc)
+ return 0;
+
+ if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
+ ifr.ifr_flags |= IFF_PROMISC;
+ else
+ ifr.ifr_flags &= ~(IFF_PROMISC);
+
+ /* get flags, modify to bring up interface... */
+ if (ioctl (ti->provision_fd, SIOCSIFFLAGS, &ifr) < 0)
+ {
+ clib_unix_warning ("Couldn't set interface flags for %s", hw->name);
+ return 0;
+ }
+
+ ti->is_promisc = want_promisc;
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Setting the TAP interface's next processing node
+ *
+ * @param *vnm - vnet_main_t
+ * @param hw_if_index - u32
+ * @param node_index - u32
+ *
+ */
+static void tapcli_set_interface_next_node (vnet_main_t *vnm,
+ u32 hw_if_index,
+ u32 node_index)
+{
+ tapcli_main_t *tm = &tapcli_main;
+ tapcli_interface_t *ti;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance);
+
+ /** Shut off redirection */
+ if (node_index == ~0)
+ {
+ ti->per_interface_next_index = node_index;
+ return;
+ }
+
+ ti->per_interface_next_index =
+ vlib_node_add_next (tm->vlib_main, tapcli_rx_node.index, node_index);
+}
+
+/**
+ * @brief Set link_state == admin_state otherwise things like ip6 neighbor discovery breaks
+ *
+ * @param *vnm - vnet_main_t
+ * @param hw_if_index - u32
+ * @param flags - u32
+ *
+ * @return error - clib_error_t
+ */
+static clib_error_t *
+tapcli_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ uword is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ u32 hw_flags;
+ u32 speed_duplex = VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
+ | VNET_HW_INTERFACE_FLAG_SPEED_1G;
+
+ if (is_admin_up)
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP | speed_duplex;
+ else
+ hw_flags = speed_duplex;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+ return 0;
+}
+
+VNET_DEVICE_CLASS (tapcli_dev_class,static) = {
+ .name = "tapcli",
+ .tx_function = tapcli_tx,
+ .format_device_name = format_tapcli_interface_name,
+ .rx_redirect_to_node = tapcli_set_interface_next_node,
+ .name_renumber = tap_name_renumber,
+ .admin_up_down_function = tapcli_interface_admin_up_down,
+};
+
+/**
+ * @brief Dump TAP interfaces
+ *
+ * @param **out_tapids - tapcli_interface_details_t
+ *
+ * @return rc - int
+ *
+ */
+int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ tapcli_interface_t * ti;
+
+ tapcli_interface_details_t * r_tapids = NULL;
+ tapcli_interface_details_t * tapid = NULL;
+
+ vec_foreach (ti, tm->tapcli_interfaces) {
+ if (!ti->active)
+ continue;
+ vec_add2(r_tapids, tapid, 1);
+ tapid->sw_if_index = ti->sw_if_index;
+ strncpy((char *)tapid->dev_name, ti->ifr.ifr_name, sizeof (ti->ifr.ifr_name)-1);
+ }
+
+ *out_tapids = r_tapids;
+
+ return 0;
+}
+
+/**
+ * @brief Get tap interface from inactive interfaces or create new
+ *
+ * @return interface - tapcli_interface_t
+ *
+ */
+static tapcli_interface_t *tapcli_get_new_tapif()
+{
+ tapcli_main_t * tm = &tapcli_main;
+ tapcli_interface_t *ti = NULL;
+
+ int inactive_cnt = vec_len(tm->tapcli_inactive_interfaces);
+ // if there are any inactive ifaces
+ if (inactive_cnt > 0) {
+ // take last
+ u32 ti_idx = tm->tapcli_inactive_interfaces[inactive_cnt - 1];
+ if (vec_len(tm->tapcli_interfaces) > ti_idx) {
+ ti = vec_elt_at_index (tm->tapcli_interfaces, ti_idx);
+ clib_warning("reusing tap interface");
+ }
+ // "remove" from inactive list
+ _vec_len(tm->tapcli_inactive_interfaces) -= 1;
+ }
+
+ // ti was not retrieved from inactive ifaces - create new
+ if (!ti)
+ vec_add2 (tm->tapcli_interfaces, ti, 1);
+
+ return ti;
+}
+
+typedef struct
+{
+ ip6_address_t addr;
+ u32 mask_width;
+ unsigned int ifindex;
+} ip6_ifreq_t;
+
+/**
+ * @brief Connect a TAP interface
+ *
+ * @param vm - vlib_main_t
+ * @param ap - vnet_tap_connect_args_t
+ *
+ * @return rc - int
+ *
+ */
+int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ tapcli_interface_t * ti = NULL;
+ struct ifreq ifr;
+ int flags;
+ int dev_net_tun_fd;
+ int dev_tap_fd = -1;
+ clib_error_t * error;
+ u8 hwaddr [6];
+ int rv = 0;
+
+ if (tm->is_disabled)
+ {
+ return VNET_API_ERROR_FEATURE_DISABLED;
+ }
+
+ flags = IFF_TAP | IFF_NO_PI;
+
+ if ((dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ memset (&ifr, 0, sizeof (ifr));
+ strncpy(ifr.ifr_name, (char *) ap->intfc_name, sizeof (ifr.ifr_name)-1);
+ ifr.ifr_flags = flags;
+ if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_2;
+ goto error;
+ }
+
+ /* Open a provisioning socket */
+ if ((dev_tap_fd = socket(PF_PACKET, SOCK_RAW,
+ htons(ETH_P_ALL))) < 0 )
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ goto error;
+ }
+
+ /* Find the interface index. */
+ {
+ struct ifreq ifr;
+ struct sockaddr_ll sll;
+
+ memset (&ifr, 0, sizeof(ifr));
+ strncpy (ifr.ifr_name, (char *) ap->intfc_name, sizeof (ifr.ifr_name)-1);
+ if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 )
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_4;
+ goto error;
+ }
+
+ /* Bind the provisioning socket to the interface. */
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = ifr.ifr_ifindex;
+ sll.sll_protocol = htons(ETH_P_ALL);
+
+ if (bind(dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_5;
+ goto error;
+ }
+ }
+
+ /* non-blocking I/O on /dev/tapX */
+ {
+ int one = 1;
+ if (ioctl (dev_net_tun_fd, FIONBIO, &one) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_6;
+ goto error;
+ }
+ }
+ ifr.ifr_mtu = tm->mtu_bytes;
+ if (ioctl (dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_7;
+ goto error;
+ }
+
+ /* get flags, modify to bring up interface... */
+ if (ioctl (dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_8;
+ goto error;
+ }
+
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+
+ if (ioctl (dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_9;
+ goto error;
+ }
+
+ if (ap->ip4_address_set)
+ {
+ struct sockaddr_in sin;
+ /* ip4: mask defaults to /24 */
+ u32 mask = clib_host_to_net_u32 (0xFFFFFF00);
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ /* sin.sin_port = 0; */
+ sin.sin_addr.s_addr = ap->ip4_address->as_u32;
+ memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin));
+
+ if (ioctl (dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ goto error;
+ }
+
+ if (ap->ip4_mask_width > 0 && ap->ip4_mask_width < 33)
+ {
+ mask = ~0;
+ mask <<= (32 - ap->ip4_mask_width);
+ }
+
+ mask = clib_host_to_net_u32(mask);
+ sin.sin_family = AF_INET;
+ sin.sin_port = 0;
+ sin.sin_addr.s_addr = mask;
+ memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin));
+
+ if (ioctl (dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ goto error;
+ }
+ }
+
+ if (ap->ip6_address_set)
+ {
+ struct ifreq ifr2;
+ ip6_ifreq_t ifr6;
+ int sockfd6;
+
+ sockfd6 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_IP);
+ if (sockfd6 < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ goto error;
+ }
+
+ memset (&ifr2, 0, sizeof(ifr));
+ strncpy (ifr2.ifr_name, (char *) ap->intfc_name,
+ sizeof (ifr2.ifr_name)-1);
+ if (ioctl (sockfd6, SIOCGIFINDEX, &ifr2) < 0 )
+ {
+ close (sockfd6);
+ rv = VNET_API_ERROR_SYSCALL_ERROR_4;
+ goto error;
+ }
+
+ memcpy (&ifr6.addr, ap->ip6_address, sizeof (ip6_address_t));
+ ifr6.mask_width = ap->ip6_mask_width;
+ ifr6.ifindex = ifr2.ifr_ifindex;
+
+ if (ioctl (sockfd6, SIOCSIFADDR, &ifr6) < 0)
+ {
+ close (sockfd6);
+ clib_unix_warning ("ifr6");
+ rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ goto error;
+ }
+ close (sockfd6);
+ }
+
+ ti = tapcli_get_new_tapif();
+ ti->per_interface_next_index = ~0;
+
+ if (ap->hwaddr_arg != 0)
+ clib_memcpy(hwaddr, ap->hwaddr_arg, 6);
+ else
+ {
+ f64 now = vlib_time_now(vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (hwaddr+2, &rnd, sizeof(rnd));
+ hwaddr[0] = 2;
+ hwaddr[1] = 0xfe;
+ }
+
+ error = ethernet_register_interface
+ (tm->vnet_main,
+ tapcli_dev_class.index,
+ ti - tm->tapcli_interfaces /* device instance */,
+ hwaddr /* ethernet address */,
+ &ti->hw_if_index,
+ tapcli_flag_change);
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto error;
+ }
+
+ {
+ clib_file_t template = {0};
+ template.read_function = tapcli_read_ready;
+ template.file_descriptor = dev_net_tun_fd;
+ ti->clib_file_index = clib_file_add (&file_main, &template);
+ ti->unix_fd = dev_net_tun_fd;
+ ti->provision_fd = dev_tap_fd;
+ clib_memcpy (&ti->ifr, &ifr, sizeof (ifr));
+ }
+
+ {
+ vnet_hw_interface_t * hw;
+ hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
+ hw->min_supported_packet_bytes = TAP_MTU_MIN;
+ hw->max_supported_packet_bytes = TAP_MTU_MAX;
+ hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = hw->max_supported_packet_bytes - sizeof(ethernet_header_t);
+ ti->sw_if_index = hw->sw_if_index;
+ if (ap->sw_if_indexp)
+ *(ap->sw_if_indexp) = hw->sw_if_index;
+ }
+
+ ti->active = 1;
+
+ hash_set (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index,
+ ti - tm->tapcli_interfaces);
+
+ hash_set (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd,
+ ti - tm->tapcli_interfaces);
+
+ return rv;
+
+ error:
+ close (dev_net_tun_fd);
+ if (dev_tap_fd >= 0)
+ close (dev_tap_fd);
+
+ return rv;
+}
+
+/**
+ * @brief Renumber a TAP interface
+ *
+ * @param *vm - vlib_main_t
+ * @param *intfc_name - u8
+ * @param *hwaddr_arg - u8
+ * @param *sw_if_indexp - u32
+ * @param renumber - u8
+ * @param custom_dev_instance - u32
+ *
+ * @return rc - int
+ *
+ */
+int vnet_tap_connect_renumber (vlib_main_t * vm,
+ vnet_tap_connect_args_t *ap)
+{
+ int rv = vnet_tap_connect(vm, ap);
+
+ if (!rv && ap->renumber)
+ vnet_interface_name_renumber (*(ap->sw_if_indexp), ap->custom_dev_instance);
+
+ return rv;
+}
+
+/**
+ * @brief Disconnect TAP CLI interface
+ *
+ * @param *ti - tapcli_interface_t
+ *
+ * @return rc - int
+ *
+ */
+static int tapcli_tap_disconnect (tapcli_interface_t *ti)
+{
+ int rv = 0;
+ vnet_main_t * vnm = vnet_get_main();
+ tapcli_main_t * tm = &tapcli_main;
+ u32 sw_if_index = ti->sw_if_index;
+
+ // bring interface down
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0);
+
+ if (ti->clib_file_index != ~0) {
+ clib_file_del (&file_main, file_main.file_pool + ti->clib_file_index);
+ ti->clib_file_index = ~0;
+ }
+ else
+ close(ti->unix_fd);
+
+ hash_unset (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd);
+ hash_unset (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index);
+ close(ti->provision_fd);
+ ti->unix_fd = -1;
+ ti->provision_fd = -1;
+
+ return rv;
+}
+
+/**
+ * @brief Delete TAP interface
+ *
+ * @param *vm - vlib_main_t
+ * @param sw_if_index - u32
+ *
+ * @return rc - int
+ *
+ */
+int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index)
+{
+ int rv = 0;
+ tapcli_main_t * tm = &tapcli_main;
+ tapcli_interface_t *ti;
+ uword *p = NULL;
+
+ p = hash_get (tm->tapcli_interface_index_by_sw_if_index,
+ sw_if_index);
+ if (p == 0) {
+ clib_warning ("sw_if_index %d unknown", sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]);
+
+ // inactive
+ ti->active = 0;
+ tapcli_tap_disconnect(ti);
+ // add to inactive list
+ vec_add1(tm->tapcli_inactive_interfaces, ti - tm->tapcli_interfaces);
+
+ // reset renumbered iface
+ if (p[0] < vec_len (tm->show_dev_instance_by_real_dev_instance))
+ tm->show_dev_instance_by_real_dev_instance[p[0]] = ~0;
+
+ ethernet_delete_interface (tm->vnet_main, ti->hw_if_index);
+ return rv;
+}
+
+/**
+ * @brief CLI function to delete TAP interface
+ *
+ * @param *vm - vlib_main_t
+ * @param *input - unformat_input_t
+ * @param *cmd - vlib_cli_command_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tap_delete_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ u32 sw_if_index = ~0;
+
+ if (tm->is_disabled)
+ {
+ return clib_error_return (0, "device disabled...");
+ }
+
+ if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main,
+ &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+
+ int rc = vnet_tap_delete (vm, sw_if_index);
+
+ if (!rc) {
+ vlib_cli_output (vm, "Deleted.");
+ } else {
+ vlib_cli_output (vm, "Error during deletion of tap interface. (rc: %d)", rc);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tap_delete_command, static) = {
+ .path = "tap delete",
+ .short_help = "tap delete <vpp-tap-intfc-name>",
+ .function = tap_delete_command_fn,
+};
+
+/**
+ * @brief Modifies tap interface - can result in new interface being created
+ *
+ * @param *vm - vlib_main_t
+ * @param orig_sw_if_index - u32
+ * @param *intfc_name - u8
+ * @param *hwaddr_arg - u8
+ * @param *sw_if_indexp - u32
+ * @param renumber - u8
+ * @param custom_dev_instance - u32
+ *
+ * @return rc - int
+ *
+ */
+int vnet_tap_modify (vlib_main_t * vm, vnet_tap_connect_args_t *ap)
+{
+ int rv = vnet_tap_delete (vm, ap->orig_sw_if_index);
+
+ if (rv)
+ return rv;
+
+ rv = vnet_tap_connect_renumber(vm, ap);
+
+ return rv;
+}
+
+/**
+ * @brief CLI function to modify TAP interface
+ *
+ * @param *vm - vlib_main_t
+ * @param *input - unformat_input_t
+ * @param *cmd - vlib_cli_command_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tap_modify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 * intfc_name;
+ tapcli_main_t * tm = &tapcli_main;
+ u32 sw_if_index = ~0;
+ u32 new_sw_if_index = ~0;
+ int user_hwaddr = 0;
+ u8 hwaddr[6];
+ vnet_tap_connect_args_t _a, *ap= &_a;
+
+ if (tm->is_disabled)
+ {
+ return clib_error_return (0, "device disabled...");
+ }
+
+ if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main,
+ &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ if (unformat (input, "%s", &intfc_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ if (unformat(input, "hwaddr %U", unformat_ethernet_address,
+ &hwaddr))
+ user_hwaddr = 1;
+
+
+ memset (ap, 0, sizeof(*ap));
+ ap->orig_sw_if_index = sw_if_index;
+ ap->intfc_name = intfc_name;
+ ap->sw_if_indexp = &new_sw_if_index;
+ if (user_hwaddr)
+ ap->hwaddr_arg = hwaddr;
+
+ int rc = vnet_tap_modify (vm, ap);
+
+ if (!rc) {
+ vlib_cli_output (vm, "Modified %U for Linux tap '%s'",
+ format_vnet_sw_if_index_name, tm->vnet_main,
+ *(ap->sw_if_indexp), ap->intfc_name);
+ } else {
+ vlib_cli_output (vm, "Error during modification of tap interface. (rc: %d)", rc);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tap_modify_command, static) = {
+ .path = "tap modify",
+ .short_help = "tap modify <vpp-tap-intfc-name> <linux-intfc-name> [hwaddr <addr>]",
+ .function = tap_modify_command_fn,
+};
+
+/**
+ * @brief CLI function to connect TAP interface
+ *
+ * @param *vm - vlib_main_t
+ * @param *input - unformat_input_t
+ * @param *cmd - vlib_cli_command_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tap_connect_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 * intfc_name = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_tap_connect_args_t _a, *ap= &_a;
+ tapcli_main_t * tm = &tapcli_main;
+ u8 hwaddr[6];
+ u8 *hwaddr_arg = 0;
+ u32 sw_if_index;
+ ip4_address_t ip4_address;
+ int ip4_address_set = 0;
+ ip6_address_t ip6_address;
+ int ip6_address_set = 0;
+ u32 ip4_mask_width = 0;
+ u32 ip6_mask_width = 0;
+ clib_error_t *error = NULL;
+
+ if (tm->is_disabled)
+ return clib_error_return (0, "device disabled...");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "hwaddr %U", unformat_ethernet_address,
+ &hwaddr))
+ hwaddr_arg = hwaddr;
+
+ /* It is here for backward compatibility */
+ else if (unformat(line_input, "hwaddr random"))
+ ;
+
+ else if (unformat (line_input, "address %U/%d",
+ unformat_ip4_address, &ip4_address, &ip4_mask_width))
+ ip4_address_set = 1;
+
+ else if (unformat (line_input, "address %U/%d",
+ unformat_ip6_address, &ip6_address, &ip6_mask_width))
+ ip6_address_set = 1;
+
+ else if (unformat (line_input, "%s", &intfc_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (intfc_name == 0)
+ {
+ error = clib_error_return (0, "interface name must be specified");
+ goto done;
+ }
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->intfc_name = intfc_name;
+ ap->hwaddr_arg = hwaddr_arg;
+ if (ip4_address_set)
+ {
+ ap->ip4_address = &ip4_address;
+ ap->ip4_mask_width = ip4_mask_width;
+ ap->ip4_address_set = 1;
+ }
+ if (ip6_address_set)
+ {
+ ap->ip6_address = &ip6_address;
+ ap->ip6_mask_width = ip6_mask_width;
+ ap->ip6_address_set = 1;
+ }
+
+ ap->sw_if_indexp = &sw_if_index;
+
+ int rv = vnet_tap_connect(vm, ap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_SYSCALL_ERROR_1:
+ error = clib_error_return (0, "Couldn't open /dev/net/tun");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_2:
+ error = clib_error_return (0, "Error setting flags on '%s'", intfc_name);
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_3:
+ error = clib_error_return (0, "Couldn't open provisioning socket");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_4:
+ error = clib_error_return (0, "Couldn't get if_index");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_5:
+ error = clib_error_return (0, "Couldn't bind provisioning socket");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_6:
+ error = clib_error_return (0, "Couldn't set device non-blocking flag");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_7:
+ error = clib_error_return (0, "Couldn't set device MTU");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_8:
+ error = clib_error_return (0, "Couldn't get interface flags");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_9:
+ error = clib_error_return (0, "Couldn't set intfc admin state up");
+ goto done;
+
+ case VNET_API_ERROR_SYSCALL_ERROR_10:
+ error = clib_error_return (0, "Couldn't set intfc address/mask");
+ goto done;
+
+ case VNET_API_ERROR_INVALID_REGISTRATION:
+ error = clib_error_return (0, "Invalid registration");
+ goto done;
+
+ case 0:
+ break;
+
+ default:
+ error = clib_error_return (0, "Unknown error: %d", rv);
+ goto done;
+ }
+
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main(), sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (tap_connect_command, static) = {
+ .path = "tap connect",
+ .short_help =
+ "tap connect <intfc-name> [address <ip-addr>/mw] [hwaddr <addr>]",
+ .function = tap_connect_command_fn,
+};
+
+/**
+ * @brief TAPCLI main init
+ *
+ * @param *vm - vlib_main_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+clib_error_t *
+tapcli_init (vlib_main_t * vm)
+{
+ tapcli_main_t * tm = &tapcli_main;
+ vlib_thread_main_t * m = vlib_get_thread_main ();
+ tapcli_per_thread_t * thread;
+
+ tm->vlib_main = vm;
+ tm->vnet_main = vnet_get_main();
+ tm->mtu_bytes = TAP_MTU_DEFAULT;
+ tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword));
+ tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword));
+ vm->os_punt_frame = tapcli_nopunt_frame;
+ vec_validate_aligned (tm->threads, m->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach (thread, tm->threads)
+ {
+ thread->iovecs = 0;
+ thread->rx_buffers = 0;
+ vec_alloc(thread->rx_buffers, VLIB_FRAME_SIZE);
+ vec_reset_length(thread->rx_buffers);
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tapcli_init);
diff --git a/src/vnet/unix/tapcli.h b/src/vnet/unix/tapcli.h
new file mode 100644
index 00000000..fcd82dbf
--- /dev/null
+++ b/src/vnet/unix/tapcli.h
@@ -0,0 +1,52 @@
+/*
+ * tapcli.h : tap support
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief TAPCLI definitions
+ */
+
+#ifndef __included_tapcli_h__
+#define __included_tapcli_h__
+
+/** TAP CLI errors */
+#define foreach_tapcli_error \
+ /* Must be first. */ \
+ _(NONE, "no error") \
+ _(READ, "read error") \
+ _(BUFFER_ALLOC, "buffer allocation error") \
+ _(UNKNOWN, "unknown error")
+
+typedef enum {
+#define _(sym,str) TAPCLI_ERROR_##sym,
+ foreach_tapcli_error
+#undef _
+ TAPCLI_N_ERROR,
+ } tapcli_error_t;
+
+/** TAP CLI interface details struct */
+typedef struct {
+ u32 sw_if_index;
+ u8 dev_name[64];
+} tapcli_interface_details_t;
+
+int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids);
+
+#define TAP_MTU_MIN 68
+#define TAP_MTU_MAX 65535
+#define TAP_MTU_DEFAULT 1500
+
+#endif /* __included_tapcli_h__ */
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
new file mode 100644
index 00000000..6e2a53fe
--- /dev/null
+++ b/src/vnet/unix/tuntap.c
@@ -0,0 +1,1037 @@
+/*
+ *------------------------------------------------------------------
+ * tuntap.c - kernel stack (reverse) punt/inject path
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/**
+ * @file
+ * @brief TunTap Kernel stack (reverse) punt/inject path.
+ *
+ * This driver runs in one of two distinct modes:
+ * - "punt/inject" mode, where we send pkts not otherwise processed
+ * by the forwarding to the Linux kernel stack, and
+ *
+ * - "normal interface" mode, where we treat the Linux kernel stack
+ * as a peer.
+ *
+ * By default, we select punt/inject mode.
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+static vnet_device_class_t tuntap_dev_class;
+static vnet_hw_interface_class_t tuntap_interface_class;
+
+static void tuntap_punt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+static void tuntap_nopunt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+
+typedef struct {
+ u32 sw_if_index;
+ u8 is_v6;
+ u8 addr[16];
+} subif_address_t;
+
+/**
+ * @brief TUNTAP per thread struct
+ */
+typedef struct
+{
+ /** Vector of VLIB rx buffers to use. We allocate them in blocks
+ of VLIB_FRAME_SIZE (256). */
+ u32 * rx_buffers;
+
+ /** Vector of iovecs for readv/writev calls. */
+ struct iovec * iovecs;
+} tuntap_per_thread_t;
+
+/**
+ * @brief TUNTAP node main state
+ */
+typedef struct {
+ /** per thread variables */
+ tuntap_per_thread_t * threads;
+
+ /** File descriptors for /dev/net/tun and provisioning socket. */
+ int dev_net_tun_fd, dev_tap_fd;
+
+ /** Create a "tap" [ethernet] encaps device */
+ int is_ether;
+
+ /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */
+
+ int have_normal_interface;
+
+ /** tap device destination MAC address. Required, or Linux drops pkts */
+ u8 ether_dst_mac[6];
+
+ /** Interface MTU in bytes and # of default sized buffers. */
+ u32 mtu_bytes, mtu_buffers;
+
+ /** Linux interface name for tun device. */
+ char * tun_name;
+
+ /** Pool of subinterface addresses */
+ subif_address_t *subifs;
+
+ /** Hash for subif addresses */
+ mhash_t subif_mhash;
+
+ /** Unix file index */
+ u32 clib_file_index;
+
+ /** For the "normal" interface, if configured */
+ u32 hw_if_index, sw_if_index;
+
+} tuntap_main_t;
+
+static tuntap_main_t tuntap_main = {
+ .tun_name = "vnet",
+
+ /** Suitable defaults for an Ethernet-like tun/tap device */
+ .mtu_bytes = 4096 + 256,
+};
+
+/**
+ * @brief tuntap_tx
+ * @node tuntap-tx
+ *
+ * Output node, writes the buffers comprising the incoming frame
+ * to the tun/tap device, aka hands them to the Linux kernel stack.
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ * @return rc - uword
+ *
+ */
+static uword
+tuntap_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ tuntap_main_t * tm = &tuntap_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 n_bytes = 0;
+ int i;
+ u16 thread_index = vlib_get_thread_index ();
+
+ for (i = 0; i < n_packets; i++)
+ {
+ struct iovec * iov;
+ vlib_buffer_t * b;
+ uword l;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ if (tm->is_ether && (!tm->have_normal_interface))
+ {
+ vlib_buffer_reset(b);
+ clib_memcpy (vlib_buffer_get_current (b), tm->ether_dst_mac, 6);
+ }
+
+ /* Re-set iovecs if present. */
+ if (tm->threads[thread_index].iovecs)
+ _vec_len (tm->threads[thread_index].iovecs) = 0;
+
+ /** VLIB buffer chain -> Unix iovec(s). */
+ vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = l = b->current_length;
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ do {
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
+
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = b->current_length;
+ l += b->current_length;
+ } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
+
+ if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
+ vec_len (tm->threads[thread_index].iovecs)) < l)
+ clib_unix_warning ("writev");
+
+ n_bytes += l;
+ }
+
+ /* Update tuntap interface output stats. */
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ vm->thread_index,
+ tm->sw_if_index, n_packets, n_bytes);
+
+
+ /** The normal interface path flattens the buffer chain */
+ if (tm->have_normal_interface)
+ vlib_buffer_free_no_next (vm, buffers, n_packets);
+ else
+ vlib_buffer_free (vm, buffers, n_packets);
+
+ return n_packets;
+}
+
+VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
+ .function = tuntap_tx,
+ .name = "tuntap-tx",
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .vector_size = 4,
+};
+
+/**
+ * @brief TUNTAP receive node
+ * @node tuntap-rx
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ * @return rc - uword
+ *
+ */
+static uword
+tuntap_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ tuntap_main_t * tm = &tuntap_main;
+ vlib_buffer_t * b;
+ u32 bi;
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+ u16 thread_index = vlib_get_thread_index ();
+
+ /** Make sure we have some RX buffers. */
+ {
+ uword n_left = vec_len (tm->threads[thread_index].rx_buffers);
+ uword n_alloc;
+
+ if (n_left < VLIB_FRAME_SIZE / 2)
+ {
+ if (! tm->threads[thread_index].rx_buffers)
+ vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE);
+
+ n_alloc = vlib_buffer_alloc (vm, tm->threads[thread_index].rx_buffers + n_left, VLIB_FRAME_SIZE - n_left);
+ _vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc;
+ }
+ }
+
+ /** Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ {
+ uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1;
+ vlib_buffer_t * b;
+ word i, n_bytes_left, n_bytes_in_packet;
+
+ /** We should have enough buffers left for an MTU sized packet. */
+ ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >= tm->mtu_buffers);
+
+ vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1);
+ for (i = 0; i < tm->mtu_buffers; i++)
+ {
+ b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - i]);
+ tm->threads[thread_index].iovecs[i].iov_base = b->data;
+ tm->threads[thread_index].iovecs[i].iov_len = buffer_size;
+ }
+
+ n_bytes_left = readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
+ tm->mtu_buffers);
+ n_bytes_in_packet = n_bytes_left;
+ if (n_bytes_left <= 0)
+ {
+ if (errno != EAGAIN)
+ clib_unix_warning ("readv %d", n_bytes_left);
+ return 0;
+ }
+
+ bi = tm->threads[thread_index].rx_buffers[i_rx];
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]);
+ b->flags = 0;
+ b->current_data = 0;
+ b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+
+ n_bytes_left -= buffer_size;
+
+ if (n_bytes_left <= 0)
+ {
+ break;
+ }
+
+ i_rx--;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx];
+ }
+
+ /** Interface counters for tuntap interface. */
+ vlib_increment_combined_counter
+ (vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ tm->sw_if_index,
+ 1, n_bytes_in_packet);
+
+ _vec_len (tm->threads[thread_index].rx_buffers) = i_rx;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+
+ {
+ u32 next_index;
+ uword n_trace = vlib_get_trace_count (vm, node);
+
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0;
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited...
+ */
+ if (VLIB_BUFFER_TRACE_TRAJECTORY)
+ b->pre_data[0] = 0;
+
+ b->error = node->errors[0];
+
+ if (tm->is_ether)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ }
+ else
+ switch (b->data[0] & 0xf0)
+ {
+ case 0x40:
+ next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ break;
+ case 0x60:
+ next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ break;
+ default:
+ next_index = VNET_DEVICE_INPUT_NEXT_DROP;
+ break;
+ }
+
+ /* The linux kernel couldn't care less if our interface is up */
+ if (tm->have_normal_interface)
+ {
+ vnet_main_t *vnm = vnet_get_main();
+ vnet_sw_interface_t * si;
+ si = vnet_get_sw_interface (vnm, tm->sw_if_index);
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ next_index = VNET_DEVICE_INPUT_NEXT_DROP;
+ }
+
+ vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b);
+
+ vlib_set_next_frame_buffer (vm, node, next_index, bi);
+
+ if (n_trace > 0)
+ {
+ vlib_trace_buffer (vm, node, next_index,
+ b, /* follow_chain */ 1);
+ vlib_set_trace_count (vm, node, n_trace - 1);
+ }
+ }
+
+ return 1;
+}
+
+/**
+ * @brief TUNTAP_RX error strings
+ */
+static char * tuntap_rx_error_strings[] = {
+ "unknown packet type",
+};
+
+VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
+ .function = tuntap_rx,
+ .name = "tuntap-rx",
+ .sibling_of = "device-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = 4,
+ .n_errors = 1,
+ .error_strings = tuntap_rx_error_strings,
+};
+
+/**
+ * @brief Gets called when file descriptor is ready from epoll.
+ *
+ * @param *uf - clib_file_t
+ *
+ * @return error - clib_error_t
+ */
+static clib_error_t * tuntap_read_ready (clib_file_t * uf)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index);
+ return 0;
+}
+
+/**
+ * @brief Clean up the tun/tap device
+ *
+ * @param *vm - vlib_main_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tuntap_exit (vlib_main_t * vm)
+{
+ tuntap_main_t *tm = &tuntap_main;
+ struct ifreq ifr;
+ int sfd;
+
+ /* Not present. */
+ if (! tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0)
+ return 0;
+
+ sfd = socket (AF_INET, SOCK_STREAM, 0);
+ if (sfd < 0)
+ clib_unix_warning("provisioning socket");
+
+ memset(&ifr, 0, sizeof (ifr));
+ strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name)-1);
+
+ /* get flags, modify to bring down interface... */
+ if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0)
+ clib_unix_warning ("SIOCGIFFLAGS");
+
+ ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
+
+ if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0)
+ clib_unix_warning ("SIOCSIFFLAGS");
+
+ /* Turn off persistence */
+ if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0)
+ clib_unix_warning ("TUNSETPERSIST");
+ close(tm->dev_tap_fd);
+ if (tm->dev_net_tun_fd >= 0)
+ close(tm->dev_net_tun_fd);
+ if (sfd >= 0)
+ close (sfd);
+
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit);
+
+/**
+ * @brief CLI function for tun/tap config
+ *
+ * @param *vm - vlib_main_t
+ * @param *input - unformat_input_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tuntap_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ tuntap_main_t *tm = &tuntap_main;
+ clib_error_t * error = 0;
+ struct ifreq ifr;
+ u8 * name;
+ int flags = IFF_TUN | IFF_NO_PI;
+ int is_enabled = 0, is_ether = 0, have_normal_interface = 0;
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mtu %d", &tm->mtu_bytes))
+ ;
+ else if (unformat (input, "enable"))
+ is_enabled = 1;
+ else if (unformat (input, "disable"))
+ is_enabled = 0;
+ else if (unformat (input, "ethernet") ||
+ unformat (input, "ether"))
+ is_ether = 1;
+ else if (unformat (input, "have-normal-interface") ||
+ unformat (input, "have-normal"))
+ have_normal_interface = 1;
+ else if (unformat (input, "name %s", &name))
+ tm->tun_name = (char *) name;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ tm->dev_net_tun_fd = -1;
+ tm->dev_tap_fd = -1;
+
+ if (is_enabled == 0)
+ return 0;
+
+ if (geteuid())
+ {
+ clib_warning ("tuntap disabled: must be superuser");
+ return 0;
+ }
+
+ tm->is_ether = is_ether;
+ tm->have_normal_interface = have_normal_interface;
+
+ if (is_ether)
+ flags = IFF_TAP | IFF_NO_PI;
+
+ if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
+ {
+ error = clib_error_return_unix (0, "open /dev/net/tun");
+ goto done;
+ }
+
+ memset (&ifr, 0, sizeof (ifr));
+ strncpy(ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1);
+ ifr.ifr_flags = flags;
+ if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl TUNSETIFF");
+ goto done;
+ }
+
+ /* Make it persistent, at least until we split. */
+ if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0)
+ {
+ error = clib_error_return_unix (0, "TUNSETPERSIST");
+ goto done;
+ }
+
+ /* Open a provisioning socket */
+ if ((tm->dev_tap_fd = socket(PF_PACKET, SOCK_RAW,
+ htons(ETH_P_ALL))) < 0 )
+ {
+ error = clib_error_return_unix (0, "socket");
+ goto done;
+ }
+
+ /* Find the interface index. */
+ {
+ struct ifreq ifr;
+ struct sockaddr_ll sll;
+
+ memset (&ifr, 0, sizeof(ifr));
+ strncpy (ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1);
+ if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 )
+ {
+ error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX");
+ goto done;
+ }
+
+ /* Bind the provisioning socket to the interface. */
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = ifr.ifr_ifindex;
+ sll.sll_protocol = htons(ETH_P_ALL);
+
+ if (bind(tm->dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0)
+ {
+ error = clib_error_return_unix (0, "bind");
+ goto done;
+ }
+ }
+
+ /* non-blocking I/O on /dev/tapX */
+ {
+ int one = 1;
+ if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl FIONBIO");
+ goto done;
+ }
+ }
+
+ tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
+
+ ifr.ifr_mtu = tm->mtu_bytes;
+ if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl SIOCSIFMTU");
+ goto done;
+ }
+
+ /* get flags, modify to bring up interface... */
+ if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS");
+ goto done;
+ }
+
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+
+ if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS");
+ goto done;
+ }
+
+ if (is_ether)
+ {
+ if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR");
+ goto done;
+ }
+ else
+ clib_memcpy (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6);
+ }
+
+ if (have_normal_interface)
+ {
+ vnet_main_t *vnm = vnet_get_main();
+ error = ethernet_register_interface
+ (vnm,
+ tuntap_dev_class.index,
+ 0 /* device instance */,
+ tm->ether_dst_mac /* ethernet address */,
+ &tm->hw_if_index,
+ 0 /* flag change */);
+ if (error)
+ clib_error_report (error);
+ tm->sw_if_index = tm->hw_if_index;
+ vm->os_punt_frame = tuntap_nopunt_frame;
+ }
+ else
+ {
+ vnet_main_t *vnm = vnet_get_main();
+ vnet_hw_interface_t * hi;
+
+ vm->os_punt_frame = tuntap_punt_frame;
+
+ tm->hw_if_index = vnet_register_interface
+ (vnm,
+ tuntap_dev_class.index, 0 /* device instance */,
+ tuntap_interface_class.index, 0);
+ hi = vnet_get_hw_interface (vnm, tm->hw_if_index);
+ tm->sw_if_index = hi->sw_if_index;
+
+ /* Interface is always up. */
+ vnet_hw_interface_set_flags (vnm, tm->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ vnet_sw_interface_set_flags (vnm, tm->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ }
+
+ {
+ clib_file_t template = {0};
+ template.read_function = tuntap_read_ready;
+ template.file_descriptor = tm->dev_net_tun_fd;
+ tm->clib_file_index = clib_file_add (&file_main, &template);
+ }
+
+ done:
+ if (error)
+ {
+ if (tm->dev_net_tun_fd >= 0)
+ close (tm->dev_net_tun_fd);
+ if (tm->dev_tap_fd >= 0)
+ close (tm->dev_tap_fd);
+ }
+
+ return error;
+}
+
+VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap");
+
+/**
+ * @brief Add or Del IP4 address to tun/tap interface
+ *
+ * @param *im - ip4_main_t
+ * @param opaque - uword
+ * @param sw_if_index - u32
+ * @param *address - ip4_address_t
+ * @param is_delete - u32
+ *
+ */
+void
+tuntap_ip4_add_del_interface_address (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ tuntap_main_t * tm = &tuntap_main;
+ struct ifreq ifr;
+ subif_address_t subif_addr, * ap;
+ uword * p;
+
+ /** Tuntap disabled, or using a "normal" interface. */
+ if (tm->have_normal_interface || tm->dev_tap_fd < 0)
+ return;
+
+ /* if the address is being applied to an interface that is not in
+ * the same table/VRF as this tap, then ignore it.
+ * If we don't do this overlapping address spaces in the diferent tables
+ * breaks the linux host's routing tables */
+ if (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
+ sw_if_index) !=
+ fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
+ tm->sw_if_index))
+ return;
+
+ /** See if we already know about this subif */
+ memset (&subif_addr, 0, sizeof (subif_addr));
+ subif_addr.sw_if_index = sw_if_index;
+ clib_memcpy (&subif_addr.addr, address, sizeof (*address));
+
+ p = mhash_get (&tm->subif_mhash, &subif_addr);
+
+ if (p)
+ ap = pool_elt_at_index (tm->subifs, p[0]);
+ else
+ {
+ pool_get (tm->subifs, ap);
+ *ap = subif_addr;
+ mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
+ }
+
+ /* Use subif pool index to select alias device. */
+ memset (&ifr, 0, sizeof (ifr));
+ snprintf (ifr.ifr_name, sizeof(ifr.ifr_name),
+ "%s:%d", tm->tun_name, (int)(ap - tm->subifs));
+
+ /* the tuntap punt/inject is enabled for IPv4 RX so long as
+ * any vpp interface has an IPv4 address.
+ * this is also ref counted.
+ */
+ ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
+
+ if (! is_delete)
+ {
+ struct sockaddr_in * sin;
+
+ sin = (struct sockaddr_in *)&ifr.ifr_addr;
+
+ /* Set ipv4 address, netmask. */
+ sin->sin_family = AF_INET;
+ clib_memcpy (&sin->sin_addr.s_addr, address, 4);
+ if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
+ clib_unix_warning ("ioctl SIOCSIFADDR");
+
+ sin->sin_addr.s_addr = im->fib_masks[address_length];
+ if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
+ clib_unix_warning ("ioctl SIOCSIFNETMASK");
+ }
+ else
+ {
+ mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */);
+ pool_put (tm->subifs, ap);
+ }
+
+ /* get flags, modify to bring up interface... */
+ if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
+ clib_unix_warning ("ioctl SIOCGIFFLAGS");
+
+ if (is_delete)
+ ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
+ else
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+
+ if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
+ clib_unix_warning ("ioctl SIOCSIFFLAGS");
+}
+
+/**
+ * @brief workaround for a known include file bug.
+ * including @c <linux/ipv6.h> causes multiple definitions if
+ * @c <netinet/in.h is also included.
+ */
+struct in6_ifreq {
+ struct in6_addr ifr6_addr;
+ u32 ifr6_prefixlen;
+ int ifr6_ifindex;
+};
+
+/**
+ * @brief Add or Del tun/tap interface address.
+ *
+ * Both the v6 interface address API and the way ifconfig
+ * displays subinterfaces differ from their v4 couterparts.
+ * The code given here seems to work but YMMV.
+ *
+ * @param *im - ip6_main_t
+ * @param opaque - uword
+ * @param sw_if_index - u32
+ * @param *address - ip6_address_t
+ * @param address_length - u32
+ * @param if_address_index - u32
+ * @param is_delete - u32
+ */
+void
+tuntap_ip6_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ tuntap_main_t * tm = &tuntap_main;
+ struct ifreq ifr;
+ struct in6_ifreq ifr6;
+ subif_address_t subif_addr, * ap;
+ uword * p;
+
+ /* Tuntap disabled, or using a "normal" interface. */
+ if (tm->have_normal_interface || tm->dev_tap_fd < 0)
+ return;
+
+ /* if the address is being applied to an interface that is not in
+ * the same table/VRF as this tap, then ignore it.
+ * If we don't do this overlapping address spaces in the diferent tables
+ * breaks the linux host's routing tables */
+ if (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP6,
+ sw_if_index) !=
+ fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP6,
+ tm->sw_if_index))
+ return;
+
+ /* See if we already know about this subif */
+ memset (&subif_addr, 0, sizeof (subif_addr));
+ subif_addr.sw_if_index = sw_if_index;
+ subif_addr.is_v6 = 1;
+ clib_memcpy (&subif_addr.addr, address, sizeof (*address));
+
+ p = mhash_get (&tm->subif_mhash, &subif_addr);
+
+ if (p)
+ ap = pool_elt_at_index (tm->subifs, p[0]);
+ else
+ {
+ pool_get (tm->subifs, ap);
+ *ap = subif_addr;
+ mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
+ }
+
+ /* Use subif pool index to select alias device. */
+ memset (&ifr, 0, sizeof (ifr));
+ memset (&ifr6, 0, sizeof (ifr6));
+ snprintf (ifr.ifr_name, sizeof(ifr.ifr_name),
+ "%s:%d", tm->tun_name, (int)(ap - tm->subifs));
+
+ /* the tuntap punt/inject is enabled for IPv6 RX so long as
+ * any vpp interface has an IPv6 address.
+ * this is also ref counted.
+ */
+ ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
+
+ if (! is_delete)
+ {
+ int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ clib_unix_warning ("get ifindex socket");
+
+ if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
+ clib_unix_warning ("get ifindex");
+
+ ifr6.ifr6_ifindex = ifr.ifr_ifindex;
+ ifr6.ifr6_prefixlen = address_length;
+ clib_memcpy (&ifr6.ifr6_addr, address, 16);
+
+ if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0)
+ clib_unix_warning ("set address");
+
+ if (sockfd >= 0)
+ close (sockfd);
+ }
+ else
+ {
+ int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ clib_unix_warning ("get ifindex socket");
+
+ if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
+ clib_unix_warning ("get ifindex");
+
+ ifr6.ifr6_ifindex = ifr.ifr_ifindex;
+ ifr6.ifr6_prefixlen = address_length;
+ clib_memcpy (&ifr6.ifr6_addr, address, 16);
+
+ if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0)
+ clib_unix_warning ("del address");
+
+ if (sockfd >= 0)
+ close (sockfd);
+
+ mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */);
+ pool_put (tm->subifs, ap);
+ }
+}
+
+/**
+ * @brief TX the tun/tap frame
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ */
+static void
+tuntap_punt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ tuntap_tx (vm, node, frame);
+ vlib_frame_free (vm, node, frame);
+}
+
+/**
+ * @brief Free the tun/tap frame
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ */
+static void
+tuntap_nopunt_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ vlib_buffer_free (vm, buffers, n_packets);
+ vlib_frame_free (vm, node, frame);
+}
+
+VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
+ .name = "tuntap",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+/**
+ * @brief Format tun/tap interface name
+ *
+ * @param *s - u8 - formatter string
+ * @param *args - va_list
+ *
+ * @return *s - u8 - formatted string
+ *
+ */
+static u8 * format_tuntap_interface_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+
+ s = format (s, "tuntap-%d", i);
+ return s;
+}
+
+/**
+ * @brief TX packet out tun/tap
+ *
+ * @param *vm - vlib_main_t
+ * @param *node - vlib_node_runtime_t
+ * @param *frame - vlib_frame_t
+ *
+ * @return n_buffers - uword - Packets transmitted
+ *
+ */
+static uword
+tuntap_intfc_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ tuntap_main_t * tm = &tuntap_main;
+ u32 * buffers = vlib_frame_args (frame);
+ uword n_buffers = frame->n_vectors;
+
+ /* Normal interface transmit happens only on the normal interface... */
+ if (tm->have_normal_interface)
+ return tuntap_tx (vm, node, frame);
+
+ vlib_buffer_free (vm, buffers, n_buffers);
+ return n_buffers;
+}
+
+VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
+ .name = "tuntap",
+ .tx_function = tuntap_intfc_tx,
+ .format_device_name = format_tuntap_interface_name,
+};
+
+/**
+ * @brief tun/tap node init
+ *
+ * @param *vm - vlib_main_t
+ *
+ * @return error - clib_error_t
+ *
+ */
+static clib_error_t *
+tuntap_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+ ip4_main_t * im4 = &ip4_main;
+ ip6_main_t * im6 = &ip6_main;
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+ tuntap_main_t * tm = &tuntap_main;
+ vlib_thread_main_t * m = vlib_get_thread_main ();
+
+ error = vlib_call_init_function (vm, ip4_init);
+ if (error)
+ return error;
+
+ mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t));
+
+ cb4.function = tuntap_ip4_add_del_interface_address;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = tuntap_ip6_add_del_interface_address;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+ vec_validate_aligned (tm->threads, m->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tuntap_init);
diff --git a/src/vnet/unix/tuntap.h b/src/vnet/unix/tuntap.h
new file mode 100644
index 00000000..7c2d5510
--- /dev/null
+++ b/src/vnet/unix/tuntap.h
@@ -0,0 +1,69 @@
+/*
+ *------------------------------------------------------------------
+ * tuntap.h - kernel stack (reverse) punt/inject path
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/**
+ * @file
+ * @brief Call from VLIB_INIT_FUNCTION to set the Linux kernel inject node name.
+ */
+void register_tuntap_inject_node_name (char *name);
+
+/** arguments structure for vnet_tap_connect, vnet_tap_connect_renumber, etc.
+ */
+
+typedef struct
+{
+ /** Interface name */
+ u8 *intfc_name;
+ /** Mac address */
+ u8 *hwaddr_arg;
+ /** Please set the indicated ip4 address/mask on the interface */
+ u8 ip4_address_set;
+ /** Please set the indicated ip4 address/mask on the interface */
+ u8 ip6_address_set;
+ /** Renumber the (existing) interface */
+ u8 renumber;
+ /** (optional) ip4 address to set */
+ ip4_address_t *ip4_address;
+ /** (optional) ip4 mask width to set */
+ u32 ip4_mask_width;
+ /** (optional) ip6 address to set */
+ ip6_address_t *ip6_address;
+ /** (optional) ip6 mask width to set */
+ u32 ip6_mask_width;
+ /** Output parameter: result sw_if_index */
+ u32 *sw_if_indexp;
+ /** Custom device instance */
+ u32 custom_dev_instance;
+ /** original sw_if_index (renumber) */
+ u32 orig_sw_if_index;
+} vnet_tap_connect_args_t;
+
+/** Connect a tap interface */
+int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *args);
+
+/** Connect / renumber a tap interface */
+int vnet_tap_connect_renumber (vlib_main_t * vm,
+ vnet_tap_connect_args_t *args);
+
+/** Modify a tap interface */
+int vnet_tap_modify (vlib_main_t * vm, vnet_tap_connect_args_t *args);
+
+/** delete a tap interface */
+int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index);
+
+
diff --git a/src/vnet/util/radix.c b/src/vnet/util/radix.c
new file mode 100644
index 00000000..ff0b0f7b
--- /dev/null
+++ b/src/vnet/util/radix.c
@@ -0,0 +1,1104 @@
+/* $NetBSD: radix.c,v 1.47 2016/12/12 03:55:57 ozaki-r Exp $ */
+
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.c 8.6 (Berkeley) 10/17/95
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+
+#include <vnet/util/radix.h>
+
+typedef void (*rn_printer_t)(void *, const char *fmt, ...);
+
+static int max_keylen = 33; // me
+struct radix_mask *rn_mkfreelist;
+struct radix_node_head *mask_rnhead;
+static char *addmask_key;
+static const char normal_chars[] =
+ {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
+static char *rn_zeros, *rn_ones;
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+
+static int rn_satisfies_leaf(const char *, struct radix_node *, int);
+static int rn_lexobetter(const void *, const void *);
+static struct radix_mask *rn_new_radix_mask(struct radix_node *,
+ struct radix_mask *);
+static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t,
+ void *);
+static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t,
+ void *);
+static void rn_nodeprint(struct radix_node *, rn_printer_t, void *,
+ const char *);
+
+#define SUBTREE_OPEN "[ "
+#define SUBTREE_CLOSE " ]"
+
+#ifdef RN_DEBUG
+static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *);
+#endif /* RN_DEBUG */
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+static struct radix_mask*
+rm_alloc (void)
+{
+ struct radix_mask *rm = clib_mem_alloc(sizeof(struct radix_mask));
+
+ memset(rm, 0, sizeof(*rm));
+
+ return (rm);
+}
+
+static void
+rm_free (struct radix_mask *rm)
+{
+ clib_mem_free(rm);
+}
+
+#define R_Malloc(p, t, n) \
+{ \
+ p = (t) clib_mem_alloc((unsigned int)(n)); \
+ memset(p, 0, n); \
+}
+#define Free(p) clib_mem_free((p))
+#define log(a,b, c...)
+#define bool i32
+
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed. The index rn_b at an internal node n represents a bit
+ * position to be tested. The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_b - 1.
+ * (We say the index of n is rn_b.)
+ *
+ * There is at least one descendant which has a one bit at position rn_b,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask. We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ *
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_b, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * Similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go.
+ *
+ * The present version of the code makes use of normal routes in short-
+ * circuiting an explicit mask and compare operation when testing whether
+ * a key satisfies a normal route, and also in remembering the unique leaf
+ * that governs a subtree.
+ */
+
+struct radix_node *
+rn_search(
+ const void *v_arg,
+ struct radix_node *head)
+{
+ const u8 * const v = v_arg;
+ struct radix_node *x;
+
+ for (x = head; x->rn_b >= 0;) {
+ if (x->rn_bmask & v[x->rn_off])
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return x;
+}
+
+struct radix_node *
+rn_search_m(
+ const void *v_arg,
+ struct radix_node *head,
+ const void *m_arg)
+{
+ struct radix_node *x;
+ const u8 * const v = v_arg;
+ const u8 * const m = m_arg;
+
+ for (x = head; x->rn_b >= 0;) {
+ if ((x->rn_bmask & m[x->rn_off]) &&
+ (x->rn_bmask & v[x->rn_off]))
+ x = x->rn_r;
+ else
+ x = x->rn_l;
+ }
+ return x;
+}
+
+int
+rn_refines(
+ const void *m_arg,
+ const void *n_arg)
+{
+ const char *m = m_arg;
+ const char *n = n_arg;
+ const char *lim = n + *(const u8 *)n;
+ const char *lim2 = lim;
+ int longer = (*(const u8 *)n++) - (int)(*(const u8 *)m++);
+ int masks_are_equal = 1;
+
+ if (longer > 0)
+ lim -= longer;
+ while (n < lim) {
+ if (*n & ~(*m))
+ return 0;
+ if (*n++ != *m++)
+ masks_are_equal = 0;
+ }
+ while (n < lim2)
+ if (*n++)
+ return 0;
+ if (masks_are_equal && (longer < 0))
+ for (lim2 = m - longer; m < lim2; )
+ if (*m++)
+ return 1;
+ return !masks_are_equal;
+}
+
+struct radix_node *
+rn_lookup(
+ const void *v_arg,
+ const void *m_arg,
+ struct radix_node_head *head)
+{
+ struct radix_node *x;
+ const char *netmask = NULL;
+
+ if (m_arg) {
+ if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
+ return NULL;
+ netmask = x->rn_key;
+ }
+ x = rn_match(v_arg, head);
+ if (x != NULL && netmask != NULL) {
+ while (x != NULL && x->rn_mask != netmask)
+ x = x->rn_dupedkey;
+ }
+ return x;
+}
+
+static int
+rn_satisfies_leaf(
+ const char *trial,
+ struct radix_node *leaf,
+ int skip)
+{
+ const char *cp = trial;
+ const char *cp2 = leaf->rn_key;
+ const char *cp3 = leaf->rn_mask;
+ const char *cplim;
+ int length = MIN(*(const u8 *)cp, *(const u8 *)cp2);
+
+ if (cp3 == 0)
+ cp3 = rn_ones;
+ else
+ length = MIN(length, *(const u8 *)cp3);
+ cplim = cp + length; cp3 += skip; cp2 += skip;
+ for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
+ if ((*cp ^ *cp2) & *cp3)
+ return 0;
+ return 1;
+}
+
+struct radix_node *
+rn_match(
+ const void *v_arg,
+ struct radix_node_head *head)
+{
+ const char * const v = v_arg;
+ struct radix_node *t = head->rnh_treetop;
+ struct radix_node *top = t;
+ struct radix_node *x;
+ struct radix_node *saved_t;
+ const char *cp = v;
+ const char *cp2;
+ const char *cplim;
+ int off = t->rn_off;
+ int vlen = *(const u8 *)cp;
+ int matched_off;
+ int test, b, rn_b;
+
+ /*
+ * Open code rn_search(v, top) to avoid overhead of extra
+ * subroutine call.
+ */
+ for (; t->rn_b >= 0; ) {
+ if (t->rn_bmask & cp[t->rn_off])
+ t = t->rn_r;
+ else
+ t = t->rn_l;
+ }
+ /*
+ * See if we match exactly as a host destination
+ * or at least learn how many bits match, for normal mask finesse.
+ *
+ * It doesn't hurt us to limit how many bytes to check
+ * to the length of the mask, since if it matches we had a genuine
+ * match and the leaf we have is the most specific one anyway;
+ * if it didn't match with a shorter length it would fail
+ * with a long one. This wins big for class B&C netmasks which
+ * are probably the most common case...
+ */
+ if (t->rn_mask)
+ vlen = *(const u8 *)t->rn_mask;
+ cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
+ for (; cp < cplim; cp++, cp2++)
+ if (*cp != *cp2)
+ goto on1;
+ /*
+ * This extra grot is in case we are explicitly asked
+ * to look up the default. Ugh!
+ */
+ if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
+ t = t->rn_dupedkey;
+ return t;
+on1:
+ test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
+ for (b = 7; (test >>= 1) > 0;)
+ b--;
+ matched_off = cp - v;
+ b += matched_off << 3;
+ rn_b = -1 - b;
+ /*
+ * If there is a host route in a duped-key chain, it will be first.
+ */
+ if ((saved_t = t)->rn_mask == 0)
+ t = t->rn_dupedkey;
+ for (; t; t = t->rn_dupedkey)
+ /*
+ * Even if we don't match exactly as a host,
+ * we may match if the leaf we wound up at is
+ * a route to a net.
+ */
+ if (t->rn_flags & RNF_NORMAL) {
+ if (rn_b <= t->rn_b)
+ return t;
+ } else if (rn_satisfies_leaf(v, t, matched_off))
+ return t;
+ t = saved_t;
+ /* start searching up the tree */
+ do {
+ struct radix_mask *m;
+ t = t->rn_p;
+ m = t->rn_mklist;
+ if (m) {
+ /*
+ * If non-contiguous masks ever become important
+ * we can restore the masking and open coding of
+ * the search and satisfaction test and put the
+ * calculation of "off" back before the "do".
+ */
+ do {
+ if (m->rm_flags & RNF_NORMAL) {
+ if (rn_b <= m->rm_b)
+ return m->rm_leaf;
+ } else {
+ off = MIN(t->rn_off, matched_off);
+ x = rn_search_m(v, t, m->rm_mask);
+ while (x && x->rn_mask != m->rm_mask)
+ x = x->rn_dupedkey;
+ if (x && rn_satisfies_leaf(v, x, off))
+ return x;
+ }
+ m = m->rm_mklist;
+ } while (m);
+ }
+ } while (t != top);
+ return NULL;
+}
+
+static void
+rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg,
+ const char *delim)
+{
+ (*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)",
+ delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p,
+ rn->rn_l, rn->rn_r);
+}
+
+#ifdef RN_DEBUG
+int rn_debug = 1;
+
+static void
+rn_dbg_print(void *arg, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vlog(LOG_DEBUG, fmt, ap);
+ va_end(ap);
+}
+
+static void
+rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg)
+{
+ struct radix_node *dup, *rn;
+ const char *delim;
+
+ if (printer == NULL)
+ return;
+
+ rn = rn_walkfirst(h->rnh_treetop, printer, arg);
+ for (;;) {
+ /* Process leaves */
+ delim = "";
+ for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) {
+ if ((dup->rn_flags & RNF_ROOT) != 0)
+ continue;
+ rn_nodeprint(dup, printer, arg, delim);
+ delim = ", ";
+ }
+ rn = rn_walknext(rn, printer, arg);
+ if (rn->rn_flags & RNF_ROOT)
+ return;
+ }
+ /* NOTREACHED */
+}
+
+#define traverse(__head, __rn) rn_treeprint((__head), rn_dbg_print, (__rn))
+#endif /* RN_DEBUG */
+
+struct radix_node *
+rn_newpair(
+ const void *v,
+ int b,
+ struct radix_node nodes[2])
+{
+ struct radix_node *tt = nodes;
+ struct radix_node *t = tt + 1;
+ t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
+ t->rn_l = tt; t->rn_off = b >> 3;
+ tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t;
+ tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+ return t;
+}
+
+struct radix_node *
+rn_insert(
+ const void *v_arg,
+ struct radix_node_head *head,
+ int *dupentry,
+ struct radix_node nodes[2])
+{
+ struct radix_node *top = head->rnh_treetop;
+ struct radix_node *t = rn_search(v_arg, top);
+ struct radix_node *tt;
+ const char *v = v_arg;
+ int head_off = top->rn_off;
+ int vlen = *((const u8 *)v);
+ const char *cp = v + head_off;
+ int b;
+ /*
+ * Find first bit at which v and t->rn_key differ
+ */
+ {
+ const char *cp2 = t->rn_key + head_off;
+ const char *cplim = v + vlen;
+ int cmp_res;
+
+ while (cp < cplim)
+ if (*cp2++ != *cp++)
+ goto on1;
+ *dupentry = 1;
+ return t;
+on1:
+ *dupentry = 0;
+ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+ for (b = (cp - v) << 3; cmp_res; b--)
+ cmp_res >>= 1;
+ }
+ {
+ struct radix_node *p, *x = top;
+ cp = v;
+ do {
+ p = x;
+ if (cp[x->rn_off] & x->rn_bmask)
+ x = x->rn_r;
+ else x = x->rn_l;
+ } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p);
+#endif
+ t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
+ if ((cp[p->rn_off] & p->rn_bmask) == 0)
+ p->rn_l = t;
+ else
+ p->rn_r = t;
+ x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
+ if ((cp[t->rn_off] & t->rn_bmask) == 0) {
+ t->rn_r = x;
+ } else {
+ t->rn_r = tt; t->rn_l = x;
+ }
+#ifdef RN_DEBUG
+ if (rn_debug) {
+ log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
+ traverse(head, p);
+ }
+#endif /* RN_DEBUG */
+ }
+ return tt;
+}
+
+struct radix_node *
+rn_addmask(
+ const void *n_arg,
+ int search,
+ int skip)
+{
+ const char *netmask = n_arg;
+ const char *cp;
+ const char *cplim;
+ struct radix_node *x;
+ struct radix_node *saved_x;
+ int b = 0, mlen, j;
+ int maskduplicated, m0, isnormal;
+ static int last_zeroed = 0;
+
+ if ((mlen = *(const u8 *)netmask) > max_keylen)
+ mlen = max_keylen;
+ if (skip == 0)
+ skip = 1;
+ if (mlen <= skip)
+ return mask_rnhead->rnh_nodes;
+ if (skip > 1)
+ memmove(addmask_key + 1, rn_ones + 1, skip - 1);
+ if ((m0 = mlen) > skip)
+ memmove(addmask_key + skip, netmask + skip, mlen - skip);
+ /*
+ * Trim trailing zeroes.
+ */
+ for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
+ cp--;
+ mlen = cp - addmask_key;
+ if (mlen <= skip) {
+ if (m0 >= last_zeroed)
+ last_zeroed = mlen;
+ return mask_rnhead->rnh_nodes;
+ }
+ if (m0 < last_zeroed)
+ memset(addmask_key + m0, 0, last_zeroed - m0);
+ *addmask_key = last_zeroed = mlen;
+ x = rn_search(addmask_key, rn_masktop);
+ if (memcmp(addmask_key, x->rn_key, mlen) != 0)
+ x = 0;
+ if (x || search)
+ return x;
+ R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+ if ((saved_x = x) == NULL)
+ return NULL;
+ memset(x, 0, max_keylen + 2 * sizeof (*x));
+ cp = netmask = (void *)(x + 2);
+ memmove(x + 2, addmask_key, mlen);
+ x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+ if (maskduplicated) {
+ log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
+ Free(saved_x);
+ return x;
+ }
+ /*
+ * Calculate index of mask, and check for normalcy.
+ */
+ cplim = netmask + mlen; isnormal = 1;
+ for (cp = netmask + skip; (cp < cplim) && *(const u8 *)cp == 0xff;)
+ cp++;
+ if (cp != cplim) {
+ for (j = 0x80; (j & *cp) != 0; j >>= 1)
+ b++;
+ if (*cp != normal_chars[b] || cp != (cplim - 1))
+ isnormal = 0;
+ }
+ b += (cp - netmask) << 3;
+ x->rn_b = -1 - b;
+ if (isnormal)
+ x->rn_flags |= RNF_NORMAL;
+ return x;
+}
+
+static int /* XXX: arbitrary ordering for non-contiguous masks */
+rn_lexobetter(
+ const void *m_arg,
+ const void *n_arg)
+{
+ const u8 *mp = m_arg;
+ const u8 *np = n_arg;
+ const u8 *lim;
+
+ if (*mp > *np)
+ return 1; /* not really, but need to check longer one first */
+ if (*mp == *np)
+ for (lim = mp + *mp; mp < lim;)
+ if (*mp++ > *np++)
+ return 1;
+ return 0;
+}
+
+static struct radix_mask *
+rn_new_radix_mask(
+ struct radix_node *tt,
+ struct radix_mask *next)
+{
+ struct radix_mask *m;
+
+ m = rm_alloc();
+ if (m == NULL) {
+ log(LOG_ERR, "Mask for route not entered\n");
+ return NULL;
+ }
+ memset(m, 0, sizeof(*m));
+ m->rm_b = tt->rn_b;
+ m->rm_flags = tt->rn_flags;
+ if (tt->rn_flags & RNF_NORMAL)
+ m->rm_leaf = tt;
+ else
+ m->rm_mask = tt->rn_mask;
+ m->rm_mklist = next;
+ tt->rn_mklist = m;
+ return m;
+}
+
+struct radix_node *
+rn_addroute(
+ const void *v_arg,
+ const void *n_arg,
+ struct radix_node_head *head,
+ struct radix_node treenodes[2])
+{
+ const char *v = v_arg, *netmask = n_arg;
+ struct radix_node *t, *x = NULL, *tt;
+ struct radix_node *saved_tt, *top = head->rnh_treetop;
+ short b = 0, b_leaf = 0;
+ int keyduplicated;
+ const char *mmask;
+ struct radix_mask *m, **mp;
+
+ /*
+ * In dealing with non-contiguous masks, there may be
+ * many different routes which have the same mask.
+ * We will find it useful to have a unique pointer to
+ * the mask to speed avoiding duplicate references at
+ * nodes and possibly save time in calculating indices.
+ */
+ if (netmask != NULL) {
+ if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL)
+ return NULL;
+ b_leaf = x->rn_b;
+ b = -1 - x->rn_b;
+ netmask = x->rn_key;
+ }
+ /*
+ * Deal with duplicated keys: attach node to previous instance
+ */
+ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+ if (keyduplicated) {
+ for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
+ if (tt->rn_mask == netmask)
+ return NULL;
+ if (netmask == NULL ||
+ (tt->rn_mask != NULL &&
+ (b_leaf < tt->rn_b || /* index(netmask) > node */
+ rn_refines(netmask, tt->rn_mask) ||
+ rn_lexobetter(netmask, tt->rn_mask))))
+ break;
+ }
+ /*
+ * If the mask is not duplicated, we wouldn't
+ * find it among possible duplicate key entries
+ * anyway, so the above test doesn't hurt.
+ *
+ * We sort the masks for a duplicated key the same way as
+ * in a masklist -- most specific to least specific.
+ * This may require the unfortunate nuisance of relocating
+ * the head of the list.
+ *
+ * We also reverse, or doubly link the list through the
+ * parent pointer.
+ */
+ if (tt == saved_tt) {
+ struct radix_node *xx = x;
+ /* link in at head of list */
+ (tt = treenodes)->rn_dupedkey = t;
+ tt->rn_flags = t->rn_flags;
+ tt->rn_p = x = t->rn_p;
+ t->rn_p = tt;
+ if (x->rn_l == t)
+ x->rn_l = tt;
+ else
+ x->rn_r = tt;
+ saved_tt = tt;
+ x = xx;
+ } else {
+ (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+ t->rn_dupedkey = tt;
+ tt->rn_p = t;
+ if (tt->rn_dupedkey)
+ tt->rn_dupedkey->rn_p = tt;
+ }
+ tt->rn_key = v;
+ tt->rn_b = -1;
+ tt->rn_flags = RNF_ACTIVE;
+ }
+ /*
+ * Put mask in tree.
+ */
+ if (netmask != NULL) {
+ tt->rn_mask = netmask;
+ tt->rn_b = x->rn_b;
+ tt->rn_flags |= x->rn_flags & RNF_NORMAL;
+ }
+ t = saved_tt->rn_p;
+ if (keyduplicated)
+ goto on2;
+ b_leaf = -1 - t->rn_b;
+ if (t->rn_r == saved_tt)
+ x = t->rn_l;
+ else
+ x = t->rn_r;
+ /* Promote general routes from below */
+ if (x->rn_b < 0) {
+ for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) {
+ if (x->rn_mask != NULL && x->rn_b >= b_leaf &&
+ x->rn_mklist == NULL) {
+ *mp = m = rn_new_radix_mask(x, NULL);
+ if (m != NULL)
+ mp = &m->rm_mklist;
+ }
+ }
+ } else if (x->rn_mklist != NULL) {
+ /*
+ * Skip over masks whose index is > that of new node
+ */
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+ if (m->rm_b >= b_leaf)
+ break;
+ t->rn_mklist = m;
+ *mp = NULL;
+ }
+on2:
+ /* Add new route to highest possible ancestor's list */
+ if (netmask == NULL || b > t->rn_b)
+ return tt; /* can't lift at all */
+ b_leaf = tt->rn_b;
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ /*
+ * Search through routes associated with node to
+ * insert new route according to index.
+ * Need same criteria as when sorting dupedkeys to avoid
+ * double loop on deletion.
+ */
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
+ if (m->rm_b < b_leaf)
+ continue;
+ if (m->rm_b > b_leaf)
+ break;
+ if (m->rm_flags & RNF_NORMAL) {
+ mmask = m->rm_leaf->rn_mask;
+ if (tt->rn_flags & RNF_NORMAL) {
+ log(LOG_ERR, "Non-unique normal route,"
+ " mask not entered\n");
+ return tt;
+ }
+ } else
+ mmask = m->rm_mask;
+ if (mmask == netmask) {
+ m->rm_refs++;
+ tt->rn_mklist = m;
+ return tt;
+ }
+ if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask))
+ break;
+ }
+ *mp = rn_new_radix_mask(tt, *mp);
+ return tt;
+}
+
+struct radix_node *
+rn_delete1(
+ const void *v_arg,
+ const void *netmask_arg,
+ struct radix_node_head *head,
+ struct radix_node *rn)
+{
+ struct radix_node *t, *p, *x, *tt;
+ struct radix_mask *m, *saved_m, **mp;
+ struct radix_node *dupedkey, *saved_tt, *top;
+ const char *v, *netmask;
+ int b, head_off, vlen;
+
+ v = v_arg;
+ netmask = netmask_arg;
+ x = head->rnh_treetop;
+ tt = rn_search(v, x);
+ head_off = x->rn_off;
+ vlen = *(const u8 *)v;
+ saved_tt = tt;
+ top = x;
+ if (tt == NULL ||
+ memcmp(v + head_off, tt->rn_key + head_off, vlen - head_off) != 0)
+ return NULL;
+ /*
+ * Delete our route from mask lists.
+ */
+ if (netmask != NULL) {
+ if ((x = rn_addmask(netmask, 1, head_off)) == NULL)
+ return NULL;
+ netmask = x->rn_key;
+ while (tt->rn_mask != netmask)
+ if ((tt = tt->rn_dupedkey) == NULL)
+ return NULL;
+ }
+ if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
+ goto on1;
+ if (tt->rn_flags & RNF_NORMAL) {
+ if (m->rm_leaf != tt || m->rm_refs > 0) {
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+ return NULL; /* dangling ref could cause disaster */
+ }
+ } else {
+ if (m->rm_mask != tt->rn_mask) {
+ log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+ goto on1;
+ }
+ if (--m->rm_refs >= 0)
+ goto on1;
+ }
+ b = -1 - tt->rn_b;
+ t = saved_tt->rn_p;
+ if (b > t->rn_b)
+ goto on1; /* Wasn't lifted at all */
+ do {
+ x = t;
+ t = t->rn_p;
+ } while (b <= t->rn_b && x != top);
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
+ if (m == saved_m) {
+ *mp = m->rm_mklist;
+ rm_free(m);
+ break;
+ }
+ }
+ if (m == NULL) {
+ log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
+ if (tt->rn_flags & RNF_NORMAL)
+ return NULL; /* Dangling ref to us */
+ }
+on1:
+ /*
+ * Eliminate us from tree
+ */
+ if (tt->rn_flags & RNF_ROOT)
+ return NULL;
+#ifdef RN_DEBUG
+ if (rn_debug)
+ log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt);
+#endif
+ t = tt->rn_p;
+ dupedkey = saved_tt->rn_dupedkey;
+ if (dupedkey != NULL) {
+ /*
+ * Here, tt is the deletion target, and
+ * saved_tt is the head of the dupedkey chain.
+ */
+ if (tt == saved_tt) {
+ x = dupedkey;
+ x->rn_p = t;
+ if (t->rn_l == tt)
+ t->rn_l = x;
+ else
+ t->rn_r = x;
+ } else {
+ /* find node in front of tt on the chain */
+ for (x = p = saved_tt;
+ p != NULL && p->rn_dupedkey != tt;)
+ p = p->rn_dupedkey;
+ if (p != NULL) {
+ p->rn_dupedkey = tt->rn_dupedkey;
+ if (tt->rn_dupedkey != NULL)
+ tt->rn_dupedkey->rn_p = p;
+ } else
+ log(LOG_ERR, "rn_delete: couldn't find us\n");
+ }
+ t = tt + 1;
+ if (t->rn_flags & RNF_ACTIVE) {
+ *++x = *t;
+ p = t->rn_p;
+ if (p->rn_l == t)
+ p->rn_l = x;
+ else
+ p->rn_r = x;
+ x->rn_l->rn_p = x;
+ x->rn_r->rn_p = x;
+ }
+ goto out;
+ }
+ if (t->rn_l == tt)
+ x = t->rn_r;
+ else
+ x = t->rn_l;
+ p = t->rn_p;
+ if (p->rn_r == t)
+ p->rn_r = x;
+ else
+ p->rn_l = x;
+ x->rn_p = p;
+ /*
+ * Demote routes attached to us.
+ */
+ if (t->rn_mklist == NULL)
+ ;
+ else if (x->rn_b >= 0) {
+ for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+ ;
+ *mp = t->rn_mklist;
+ } else {
+ /* If there are any key,mask pairs in a sibling
+ duped-key chain, some subset will appear sorted
+ in the same order attached to our mklist */
+ for (m = t->rn_mklist;
+ m != NULL && x != NULL;
+ x = x->rn_dupedkey) {
+ if (m == x->rn_mklist) {
+ struct radix_mask *mm = m->rm_mklist;
+ x->rn_mklist = NULL;
+ if (--(m->rm_refs) < 0)
+ rm_free(m);
+ m = mm;
+ }
+ }
+ if (m != NULL) {
+ log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n",
+ m, x);
+ }
+ }
+ /*
+ * We may be holding an active internal node in the tree.
+ */
+ x = tt + 1;
+ if (t != x) {
+ *t = *x;
+ t->rn_l->rn_p = t;
+ t->rn_r->rn_p = t;
+ p = x->rn_p;
+ if (p->rn_l == x)
+ p->rn_l = t;
+ else
+ p->rn_r = t;
+ }
+out:
+#ifdef RN_DEBUG
+ if (rn_debug) {
+ log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
+ traverse(head, tt);
+ }
+#endif /* RN_DEBUG */
+ tt->rn_flags &= ~RNF_ACTIVE;
+ tt[1].rn_flags &= ~RNF_ACTIVE;
+ return tt;
+}
+
+struct radix_node *
+rn_delete(
+ const void *v_arg,
+ const void *netmask_arg,
+ struct radix_node_head *head)
+{
+ return rn_delete1(v_arg, netmask_arg, head, NULL);
+}
+
+static struct radix_node *
+rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg)
+{
+ /* If at right child go back up, otherwise, go right */
+ while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) {
+ if (printer != NULL)
+ (*printer)(arg, SUBTREE_CLOSE);
+ rn = rn->rn_p;
+ }
+ if (printer)
+ rn_nodeprint(rn->rn_p, printer, arg, "");
+ /* Find the next *leaf* since next node might vanish, too */
+ for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) {
+ if (printer != NULL)
+ (*printer)(arg, SUBTREE_OPEN);
+ rn = rn->rn_l;
+ }
+ return rn;
+}
+
+static struct radix_node *
+rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg)
+{
+ /* First time through node, go left */
+ while (rn->rn_b >= 0) {
+ if (printer != NULL)
+ (*printer)(arg, SUBTREE_OPEN);
+ rn = rn->rn_l;
+ }
+ return rn;
+}
+
+int
+rn_walktree(
+ struct radix_node_head *h,
+ int (*f)(struct radix_node *, void *),
+ void *w)
+{
+ int error;
+ struct radix_node *base, *next, *rn;
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+ rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
+ for (;;) {
+ base = rn;
+ next = rn_walknext(rn, NULL, NULL);
+ /* Process leaves */
+ while ((rn = base) != NULL) {
+ base = rn->rn_dupedkey;
+ if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
+ return error;
+ }
+ rn = next;
+ if (rn->rn_flags & RNF_ROOT)
+ return 0;
+ }
+ /* NOTREACHED */
+}
+
+struct radix_node *
+rn_search_matched(struct radix_node_head *h,
+ int (*matcher)(struct radix_node *, void *), void *w)
+{
+ bool matched;
+ struct radix_node *base, *next, *rn;
+ /*
+ * This gets complicated because we may delete the node
+ * while applying the function f to it, so we need to calculate
+ * the successor node in advance.
+ */
+ rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
+ for (;;) {
+ base = rn;
+ next = rn_walknext(rn, NULL, NULL);
+ /* Process leaves */
+ while ((rn = base) != NULL) {
+ base = rn->rn_dupedkey;
+ if (!(rn->rn_flags & RNF_ROOT)) {
+ matched = (*matcher)(rn, w);
+ if (matched)
+ return rn;
+ }
+ }
+ rn = next;
+ if (rn->rn_flags & RNF_ROOT)
+ return NULL;
+ }
+ /* NOTREACHED */
+}
+
+int
+rn_inithead(void **head, int off)
+{
+ struct radix_node_head *rnh;
+
+ if (*head != NULL)
+ return 1;
+ R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ if (rnh == NULL)
+ return 0;
+ *head = rnh;
+ return rn_inithead0(rnh, off);
+}
+
+int
+rn_inithead0(struct radix_node_head *rnh, int off)
+{
+ struct radix_node *t;
+ struct radix_node *tt;
+ struct radix_node *ttt;
+
+ memset(rnh, 0, sizeof(*rnh));
+ t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+ ttt = rnh->rnh_nodes + 2;
+ t->rn_r = ttt;
+ t->rn_p = t;
+ tt = t->rn_l;
+ tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+ tt->rn_b = -1 - off;
+ *ttt = *tt;
+ ttt->rn_key = rn_ones;
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_treetop = t;
+ return 1;
+}
+
+static clib_error_t *
+rn_module_init (vlib_main_t * vm)
+{
+ char *cp, *cplim;
+
+ R_Malloc(rn_zeros, char *, 3 * max_keylen);
+ if (rn_zeros == NULL)
+ return (clib_error_return (0, "RN Zeros..."));
+
+ memset(rn_zeros, 0, 3 * max_keylen);
+ rn_ones = cp = rn_zeros + max_keylen;
+ addmask_key = cplim = rn_ones + max_keylen;
+ while (cp < cplim)
+ *cp++ = -1;
+ if (rn_inithead((void *)&mask_rnhead, 0) == 0)
+ return (clib_error_return (0, "RN Init 2"));
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(rn_module_init);
diff --git a/src/vnet/util/radix.h b/src/vnet/util/radix.h
new file mode 100644
index 00000000..d9ba6659
--- /dev/null
+++ b/src/vnet/util/radix.h
@@ -0,0 +1,147 @@
+/* $NetBSD: radix.h,v 1.23 2016/11/15 01:50:06 ozaki-r Exp $ */
+
+/*
+ * Copyright (c) 1988, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)radix.h 8.2 (Berkeley) 10/31/94
+ */
+
+#ifndef _NET_RADIX_H_
+#define _NET_RADIX_H_
+
+#include <vlib/vlib.h>
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+ struct radix_mask *rn_mklist; /* list of masks contained in subtree */
+ struct radix_node *rn_p; /* parent */
+ i16 rn_b; /* bit offset; -1-index(netmask) */
+ u8 rn_bmask; /* node: mask for bit test*/
+ u8 rn_flags; /* enumerated next */
+#define RNF_NORMAL 1 /* leaf contains normal route */
+#define RNF_ROOT 2 /* leaf is root leaf for tree */
+#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
+ union {
+ struct { /* leaf only data: */
+ const char *rn_Key; /* object of search */
+ const char *rn_Mask; /* netmask, if present */
+ struct radix_node *rn_Dupedkey;
+ } rn_leaf;
+ struct { /* node only data: */
+ int rn_Off; /* where to start compare */
+ struct radix_node *rn_L;/* progeny */
+ struct radix_node *rn_R;/* progeny */
+ } rn_node;
+ } rn_u;
+#ifdef RN_DEBUG
+ i32 rn_info;
+ struct radix_node *rn_twin;
+ struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+struct radix_mask {
+ i16 rm_b; /* bit offset; -1-index(netmask) */
+ i8 rm_unused; /* cf. rn_bmask */
+ u8 rm_flags; /* cf. rn_flags */
+ struct radix_mask *rm_mklist; /* more masks to try */
+ union {
+ const char *rmu_mask; /* the mask */
+ struct radix_node *rmu_leaf; /* for normal routes */
+ } rm_rmu;
+ i32 rm_refs; /* # of references to this struct */
+};
+
+#define rm_mask rm_rmu.rmu_mask
+#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+
+struct radix_node_head {
+ struct radix_node *rnh_treetop;
+ i32 rnh_addrsize; /* permit, but not require fixed keys */
+ i32 rnh_pktsize; /* permit, but not require fixed keys */
+ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
+ (const void *v, const void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]);
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ (const void *v, const void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]);
+ struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
+ (const void *v, const void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ (const void *v, const void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
+ (const void *v, struct radix_node_head *head);
+ struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
+ (const void *v, const void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ (const void *v, struct radix_node_head *head);
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+};
+
+void rn_init(void);
+int rn_inithead(void **, int);
+void rn_delayedinit(void **, int);
+int rn_inithead0(struct radix_node_head *, int);
+int rn_refines(const void *, const void *);
+int rn_walktree(struct radix_node_head *,
+ int (*)(struct radix_node *, void *),
+ void *);
+struct radix_node *
+ rn_search_matched(struct radix_node_head *,
+ int (*)(struct radix_node *, void *),
+ void *);
+struct radix_node
+ *rn_addmask(const void *, int, int),
+ *rn_addroute(const void *, const void *, struct radix_node_head *,
+ struct radix_node [2]),
+ *rn_delete1(const void *, const void *, struct radix_node_head *,
+ struct radix_node *),
+ *rn_delete(const void *, const void *, struct radix_node_head *),
+ *rn_insert(const void *, struct radix_node_head *, int *,
+ struct radix_node [2]),
+ *rn_lookup(const void *, const void *, struct radix_node_head *),
+ *rn_match(const void *, struct radix_node_head *),
+ *rn_newpair(const void *, int, struct radix_node[2]),
+ *rn_search(const void *, struct radix_node *),
+ *rn_search_m(const void *, struct radix_node *, const void *);
+
+#endif /* !_NET_RADIX_H_ */
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
new file mode 100644
index 00000000..8405f61b
--- /dev/null
+++ b/src/vnet/vnet.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vnet.h: general networking definitions
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_vnet_h
+#define included_vnet_vnet_h
+
+#include <vppinfra/types.h>
+
+#include <vnet/unix/pcap.h>
+#include <vnet/buffer.h>
+#include <vnet/config.h>
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+typedef struct vnet_main_t
+{
+ u32 local_interface_hw_if_index;
+ u32 local_interface_sw_if_index;
+
+ vnet_interface_main_t interface_main;
+
+ /* set up by constructors */
+ vnet_device_class_t *device_class_registrations;
+ vnet_hw_interface_class_t *hw_interface_class_registrations;
+ _vnet_interface_function_list_elt_t
+ * hw_interface_add_del_functions[VNET_ITF_FUNC_N_PRIO];
+ _vnet_interface_function_list_elt_t
+ * hw_interface_link_up_down_functions[VNET_ITF_FUNC_N_PRIO];
+ _vnet_interface_function_list_elt_t
+ * sw_interface_add_del_functions[VNET_ITF_FUNC_N_PRIO];
+ _vnet_interface_function_list_elt_t
+ * sw_interface_admin_up_down_functions[VNET_ITF_FUNC_N_PRIO];
+
+ uword *interface_tag_by_sw_if_index;
+
+ /*
+ * Last "api" error, preserved so we can issue reasonable diagnostics
+ * at or near the top of the food chain
+ */
+ vnet_api_error_t api_errno;
+
+ vlib_main_t *vlib_main;
+} vnet_main_t;
+
+vnet_main_t vnet_main;
+vnet_main_t **vnet_mains;
+
+#include <vnet/interface_funcs.h>
+#include <vnet/global_funcs.h>
+
+#endif /* included_vnet_vnet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
new file mode 100644
index 00000000..0b225340
--- /dev/null
+++ b/src/vnet/vnet_all_api_h.h
@@ -0,0 +1,69 @@
+/*
+ *------------------------------------------------------------------
+ * vl_memory_api_h.h - memory API headers, in a specific order.
+ *
+ * Copyright (c) 2009-2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Add to the bottom of the #include list, or elves will steal your
+ * keyboard in the middle of the night!
+ *
+ * Include current layer (2) last, or an artistic disagreement
+ * about message numbering will occur
+ */
+
+#ifndef included_from_layer_3
+#include <vlibmemory/vl_memory_api_h.h>
+#endif /* included_from_layer_3 */
+
+#include <vnet/devices/af_packet/af_packet.api.h>
+#include <vnet/devices/netmap/netmap.api.h>
+#include <vnet/devices/virtio/vhost_user.api.h>
+#include <vnet/gre/gre.api.h>
+#include <vnet/interface.api.h>
+#include <vnet/map/map.api.h>
+#include <vnet/l2/l2.api.h>
+#include <vnet/l2tp/l2tp.api.h>
+#include <vnet/span/span.api.h>
+#include <vnet/ip/ip.api.h>
+#include <vnet/unix/tap.api.h>
+#include <vnet/vxlan/vxlan.api.h>
+#include <vnet/lldp/lldp.api.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.api.h>
+#include <vnet/bfd/bfd.api.h>
+#include <vnet/ipsec/ipsec.api.h>
+#include <vnet/ipsec-gre/ipsec_gre.api.h>
+#include <vnet/lisp-cp/lisp.api.h>
+#include <vnet/lisp-gpe/lisp_gpe.api.h>
+#include <vnet/lisp-cp/one.api.h>
+#include <vnet/session/session.api.h>
+#include <vnet/mpls/mpls.api.h>
+#include <vnet/srv6/sr.api.h>
+#include <vnet/classify/classify.api.h>
+#include <vnet/flow/flow.api.h>
+#include <vnet/dhcp/dhcp.api.h>
+#include <vnet/cop/cop.api.h>
+#include <vnet/policer/policer.api.h>
+#include <vnet/ethernet/p2p_ethernet.api.h>
+#include <vnet/tcp/tcp.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vnet_msg_enum.h b/src/vnet/vnet_msg_enum.h
new file mode 100644
index 00000000..9899471e
--- /dev/null
+++ b/src/vnet/vnet_msg_enum.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_msg_enum_h
+#define included_vnet_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+ VL_ILLEGAL_MESSAGE_ID = 0,
+#include <vnet/vnet_all_api_h.h>
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vnet_msg_enum_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
new file mode 100644
index 00000000..1b3a8b00
--- /dev/null
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -0,0 +1,1178 @@
+/*
+ * decap.c - decapsulate VXLAN GPE
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions for decapsulating VXLAN GPE tunnels
+ *
+*/
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+vlib_node_registration_t vxlan_gpe_input_node;
+
+/**
+ * @brief Struct for VXLAN GPE decap packet tracing
+ *
+ */
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+} vxlan_gpe_rx_trace_t;
+
+/**
+ * @brief Tracing function for VXLAN GPE packet decapsulation
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_rx_trace_t * t = va_arg (*args, vxlan_gpe_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "VXLAN-GPE: tunnel %d next %d error %d", t->tunnel_index,
+ t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "VXLAN-GPE: no tunnel next %d error %d\n", t->next_index,
+ t->error);
+ }
+ return s;
+}
+
+/**
+ * @brief Tracing function for VXLAN GPE packet decapsulation including length
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_with_length (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+
+ return s;
+}
+
+/**
+ * @brief Common processing for IPv4 and IPv6 VXLAN GPE decap dispatch functions
+ *
+ * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
+ * tunnels are "terminate local". This means that there is no "TX" interface for this
+ * decap case, so that field in the buffer_metadata can be "used for something else".
+ * The something else in this case is, for the IPv4/IPv6 inner-packet type case, the
+ * FIB index used to look up the inner-packet's adjacency.
+ *
+ * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+ *
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ * @param is_ip4
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+always_inline uword
+vxlan_gpe_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t * nngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = nngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 last_tunnel_index = ~0;
+ vxlan4_gpe_tunnel_key_t last_key4;
+ vxlan6_gpe_tunnel_key_t last_key6;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ if (is_ip4)
+ memset (&last_key4, 0xff, sizeof(last_key4));
+ else
+ memset (&last_key6, 0xff, sizeof(last_key6));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, *b1;
+ u32 next0, next1;
+ ip4_vxlan_gpe_header_t * iuvn4_0, *iuvn4_1;
+ ip6_vxlan_gpe_header_t * iuvn6_0, *iuvn6_1;
+ uword * p0, *p1;
+ u32 tunnel_index0, tunnel_index1;
+ vxlan_gpe_tunnel_t * t0, *t1;
+ vxlan4_gpe_tunnel_key_t key4_0, key4_1;
+ vxlan6_gpe_tunnel_key_t key6_0, key6_1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, LOAD);
+ vlib_prefetch_buffer_header(p3, LOAD);
+
+ CLIB_PREFETCH(p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH(p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (is_ip4)
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+ vlib_buffer_advance (b1, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+
+ iuvn4_0 = vlib_buffer_get_current (b0);
+ iuvn4_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn4_0));
+ vlib_buffer_advance (b1, sizeof(*iuvn4_1));
+ }
+ else
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+ vlib_buffer_advance (b1, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+
+ iuvn6_0 = vlib_buffer_get_current (b0);
+ iuvn6_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn6_0));
+ vlib_buffer_advance (b1, sizeof(*iuvn6_1));
+ }
+
+ tunnel_index0 = ~0;
+ tunnel_index1 = ~0;
+ error0 = 0;
+ error1 = 0;
+
+ if (is_ip4)
+ {
+ next0 =
+ (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 =
+ (iuvn4_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_1->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
+ key4_1.local = iuvn4_1->ip4.dst_address.as_u32;
+
+ key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
+ key4_1.remote = iuvn4_1->ip4.src_address.as_u32;
+
+ key4_0.vni = iuvn4_0->vxlan.vni_res;
+ key4_1.vni = iuvn4_1->vxlan.vni_res;
+
+ key4_0.pad = 0;
+ key4_1.pad = 0;
+ }
+ else /* is_ip6 */
+ {
+ next0 = (iuvn6_0->vxlan.protocol < node->n_next_nodes) ?
+ iuvn6_0->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 = (iuvn6_1->vxlan.protocol < node->n_next_nodes) ?
+ iuvn6_1->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
+ key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
+
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
+ key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
+
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+ key6_1.vni = iuvn6_1->vxlan.vni_res;
+ }
+
+ /* Processing packet 0*/
+ if (is_ip4)
+ {
+ /* Processing for key4_0 */
+ if (PREDICT_FALSE((key4_0.as_u64[0] != last_key4.as_u64[0])
+ || (key4_0.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p0 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ last_key4.as_u64[0] = key4_0.as_u64[0];
+ last_key4.as_u64[1] = key4_0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ next0 =
+ (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 =
+ (iuvn6_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_1->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
+ key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
+
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
+ key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
+
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+ key6_1.vni = iuvn6_1->vxlan.vni_res;
+
+ /* Processing for key6_0 */
+ if (PREDICT_FALSE(memcmp (&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+
+ t0 = pool_elt_at_index(nngm->tunnels, tunnel_index0);
+
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /**
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0: b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+
+ /* Process packet 1 */
+ if (is_ip4)
+ {
+ /* Processing for key4_1 */
+ if (PREDICT_FALSE(
+ (key4_1.as_u64[0] != last_key4.as_u64[0])
+ || (key4_1.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p1 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_1);
+
+ if (p1 == 0)
+ {
+ error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ last_key4.as_u64[0] = key4_1.as_u64[0];
+ last_key4.as_u64[1] = key4_1.as_u64[1];
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ /* Processing for key6_1 */
+ if (PREDICT_FALSE(memcmp (&key6_1, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p1 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_1);
+
+ if (p1 == 0)
+ {
+ error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ memcpy (&last_key6, &key6_1, sizeof(key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ }
+
+ t1 = pool_elt_at_index(nngm->tunnels, tunnel_index1);
+
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b1);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+ trace1: b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof(*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_vxlan_gpe_header_t * iuvn4_0;
+ ip6_vxlan_gpe_header_t * iuvn6_0;
+ uword * p0;
+ u32 tunnel_index0;
+ vxlan_gpe_tunnel_t * t0;
+ vxlan4_gpe_tunnel_key_t key4_0;
+ vxlan6_gpe_tunnel_key_t key6_0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (is_ip4)
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (
+ b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+
+ iuvn4_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn4_0));
+ }
+ else
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (
+ b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+
+ iuvn6_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn6_0));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ next0 =
+ (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
+ key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
+ key4_0.vni = iuvn4_0->vxlan.vni_res;
+ key4_0.pad = 0;
+
+ /* Processing for key4_0 */
+ if (PREDICT_FALSE(
+ (key4_0.as_u64[0] != last_key4.as_u64[0])
+ || (key4_0.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p0 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ last_key4.as_u64[0] = key4_0.as_u64[0];
+ last_key4.as_u64[1] = key4_0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ next0 =
+ (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+
+ /* Processing for key6_0 */
+ if (PREDICT_FALSE(memcmp (&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+
+ t0 = pool_elt_at_index(nngm->tunnels, tunnel_index0);
+
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan-gpe tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00: b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, vxlan_gpe_input_node.index,
+ VXLAN_GPE_ERROR_DECAPSULATED, pkts_decapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+ return from_frame->n_vectors;
+}
+
+/**
+ * @brief Graph processing dispatch function for IPv4 VXLAN GPE
+ *
+ * @node vxlan4-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+static uword
+vxlan4_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */1);
+}
+
+
+void
+vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index)
+{
+ vxlan_gpe_main_t *hm = &vxlan_gpe_main;
+ hm->decap_next_node_list[protocol_id] = next_node_index;
+ return;
+}
+
+void
+vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index)
+{
+ vxlan_gpe_main_t *hm = &vxlan_gpe_main;
+ hm->decap_next_node_list[protocol_id] = VXLAN_GPE_INPUT_NEXT_DROP;
+ return;
+}
+
+
+/**
+ * @brief Graph processing dispatch function for IPv6 VXLAN GPE
+ *
+ * @node vxlan6-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors - uword
+ *
+ */
+static uword
+vxlan6_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */0);
+}
+
+/**
+ * @brief VXLAN GPE error strings
+ */
+static char * vxlan_gpe_error_strings[] = {
+#define vxlan_gpe_error(n,s) s,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
+ .function = vxlan4_gpe_input,
+ .name = "vxlan4-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
+ .error_strings = vxlan_gpe_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_vxlan_gpe_with_length,
+ .format_trace = format_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_gpe_input_node, vxlan4_gpe_input);
+
+VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
+ .function = vxlan6_gpe_input,
+ .name = "vxlan6-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
+ .error_strings = vxlan_gpe_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_vxlan_gpe_with_length,
+ .format_trace = format_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_gpe_input_node, vxlan6_gpe_input);
+typedef enum {
+ IP_VXLAN_BYPASS_NEXT_DROP,
+ IP_VXLAN_BYPASS_NEXT_VXLAN,
+ IP_VXLAN_BYPASS_N_NEXT,
+} ip_vxan_bypass_next_t;
+
+always_inline uword
+ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u32 is_ip4)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ u32 * from, * to_next, n_left_from, n_left_to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
+ ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ if (is_ip4) addr4.data_u32 = ~0;
+ else ip6_address_set_zero (&addr6);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ ip4_header_t * ip40, * ip41;
+ ip6_header_t * ip60, * ip61;
+ udp_header_t * udp0, * udp1;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u32 bi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, good_udp0, proto0;
+ u8 error1, good_udp1, proto1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ ip41 = vlib_buffer_get_current (b1);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ ip61 = vlib_buffer_get_current (b1);
+ }
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+ vnet_feature_next(vnet_buffer(b1)->sw_if_index[VLIB_RX], &next1, b1);
+
+ if (is_ip4)
+ {
+ proto0 = ip40->protocol;
+ proto1 = ip41->protocol;
+ }
+ else
+ {
+ proto0 = ip60->protocol;
+ proto1 = ip61->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit0; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit0; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip40->dst_address.as_u32))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip60->dst_address))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit0:
+ /* Process packet 1 */
+ if (proto1 != IP_PROTOCOL_UDP)
+ goto exit1; /* not UDP packet */
+
+ if (is_ip4)
+ udp1 = ip4_next_header (ip41);
+ else
+ udp1 = ip6_next_header (ip61);
+
+ if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit1; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip41->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip41->dst_address.as_u32))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr4 = ip41->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip61->dst_address))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr6 = ip61->dst_address;
+ }
+ }
+
+ flags1 = b1->flags;
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp1 |= udp1->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len1 = clib_net_to_host_u16 (ip41->length);
+ else
+ ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp1))
+ {
+ if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
+ else
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
+ good_udp1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next1 = error1 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b1->error = error1 ? error_node->errors[error1] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b1, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b1, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit1:
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ ip4_header_t * ip40;
+ ip6_header_t * ip60;
+ udp_header_t * udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, good_udp0, proto0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ if (is_ip4)
+ ip40 = vlib_buffer_get_current (b0);
+ else
+ ip60 = vlib_buffer_get_current (b0);
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+
+ if (is_ip4)
+ proto0 = ip40->protocol;
+ else
+ proto0 = ip60->protocol;
+
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip40->dst_address.as_u32))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip60->dst_address))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit:
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_vxlan_gpe_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
+ .function = ip4_vxlan_gpe_bypass,
+ .name = "ip4-vxlan-gpe-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-gpe-input",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_vxlan_gpe_bypass_node,ip4_vxlan_gpe_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip4_vxlan_gpe_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_vxlan_gpe_bypass_init);
+
+static uword
+ip6_vxlan_gpe_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
+ .function = ip6_vxlan_gpe_bypass,
+ .name = "ip6-vxlan-gpe-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-gpe-input",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_vxlan_gpe_bypass_node,ip6_vxlan_gpe_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip6_vxlan_gpe_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip6_vxlan_gpe_bypass_init);
diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/vnet/vxlan-gpe/dir.dox
new file mode 100644
index 00000000..c154733b
--- /dev/null
+++ b/src/vnet/vxlan-gpe/dir.dox
@@ -0,0 +1,32 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief VXLAN GPE
+
+ Based on IETF: draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+ See file: vxlan-gpe-rfc.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
new file mode 100644
index 00000000..67ed94b4
--- /dev/null
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions for encapsulating VXLAN GPE tunnels
+ *
+*/
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+/** Statistics (not really errors) */
+#define foreach_vxlan_gpe_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+/**
+ * @brief VXLAN GPE encap error strings
+ */
+static char * vxlan_gpe_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_encap_error
+#undef _
+};
+
+/**
+ * @brief Struct for VXLAN GPE errors/counters
+ */
+typedef enum {
+#define _(sym,str) VXLAN_GPE_ENCAP_ERROR_##sym,
+ foreach_vxlan_gpe_encap_error
+#undef _
+ VXLAN_GPE_ENCAP_N_ERROR,
+} vxlan_gpe_encap_error_t;
+
+/**
+ * @brief Struct for tracing VXLAN GPE encapsulated packets
+ */
+typedef struct {
+ u32 tunnel_index;
+} vxlan_gpe_encap_trace_t;
+
+/**
+ * @brief Trace of packets encapsulated in VXLAN GPE
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_encap_trace_t * t
+ = va_arg (*args, vxlan_gpe_encap_trace_t *);
+
+ s = format (s, "VXLAN-GPE-ENCAP: tunnel %d", t->tunnel_index);
+ return s;
+}
+
+/**
+ * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup
+ *
+ * @param *ngm
+ * @param *b0
+ * @param *t0 contains rewrite header
+ * @param *next0 relative index of next dispatch function (next node)
+ * @param is_v4 Is this IPv4? (or IPv6)
+ *
+ */
+always_inline void
+vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
+ vxlan_gpe_tunnel_t * t0, u32 * next0,
+ u8 is_v4)
+{
+ ASSERT(sizeof(ip4_vxlan_gpe_header_t) == 36);
+ ASSERT(sizeof(ip6_vxlan_gpe_header_t) == 56);
+
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ next0[0] = t0->encap_next_node;
+}
+
+/**
+ * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup for two packets
+ *
+ * @param *ngm
+ * @param *b0 Packet0
+ * @param *b1 Packet1
+ * @param *t0 contains rewrite header for Packet0
+ * @param *t1 contains rewrite header for Packet1
+ * @param *next0 relative index of next dispatch function (next node) for Packet0
+ * @param *next1 relative index of next dispatch function (next node) for Packet1
+ * @param is_v4 Is this IPv4? (or IPv6)
+ *
+ */
+always_inline void
+vxlan_gpe_encap_two_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
+ vlib_buffer_t * b1, vxlan_gpe_tunnel_t * t0,
+ vxlan_gpe_tunnel_t * t1, u32 * next0,
+ u32 * next1, u8 is_v4)
+{
+ ASSERT(sizeof(ip4_vxlan_gpe_header_t) == 36);
+ ASSERT(sizeof(ip6_vxlan_gpe_header_t) == 56);
+
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, is_v4);
+ next0[0] = next1[0] = t0->encap_next_node;
+}
+
+/**
+ * @brief Common processing for IPv4 and IPv6 VXLAN GPE encap dispatch functions
+ *
+ * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
+ * tunnels are "establish local". This means that we don't have a TX interface as yet
+ * as we need to look up where the outer-header dest is. By setting the TX index in the
+ * buffer metadata to the encap FIB, we can do a lookup to get the adjacency and real TX.
+ *
+ * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ *
+ * @node vxlan-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+static uword
+vxlan_gpe_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_encapsulated = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+ vnet_hw_interface_t * hi0, *hi1;
+ vxlan_gpe_tunnel_t * t0, *t1;
+ u8 is_ip4_0, is_ip4_1;
+
+ next0 = next1 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, LOAD);
+ vlib_prefetch_buffer_header(p3, LOAD);
+
+ CLIB_PREFETCH(p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH(p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+ hi1 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index(ngm->tunnels, hi0->dev_instance);
+ t1 = pool_elt_at_index(ngm->tunnels, hi1->dev_instance);
+
+ is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ is_ip4_1 = (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ if (PREDICT_TRUE(is_ip4_0 == is_ip4_1))
+ {
+ vxlan_gpe_encap_two_inline (ngm, b0, b1, t0, t1, &next0, &next1,is_ip4_0);
+ }
+ else
+ {
+ vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b1, t1, &next1, is_ip4_1);
+ }
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ pkts_encapsulated += 2;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE((sw_if_index0 != stats_sw_if_index)
+ || (sw_if_index1 != stats_sw_if_index)))
+ {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+ if (sw_if_index0 == sw_if_index1)
+ {
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_sw_if_index = sw_if_index0;
+ stats_n_packets = 2;
+ stats_n_bytes = len0 + len1;
+ }
+ else
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index0, 1, len0);
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index1, 1, len1);
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b1,
+ sizeof(*tr));
+ tr->tunnel_index = t1 - ngm->tunnels;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 sw_if_index0, len0;
+ vnet_hw_interface_t * hi0;
+ vxlan_gpe_tunnel_t * t0;
+ u8 is_ip4_0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index(ngm->tunnels, hi0->dev_instance);
+
+ is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ pkts_encapsulated++;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ * incremented per packet. Note stats are still incremented for deleted
+ * and admin-down tunnel where packets are dropped. It is not worthwhile
+ * to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof(*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
+ .function = vxlan_gpe_encap,
+ .name = "vxlan-gpe-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_encap_error_strings),
+ .error_strings = vxlan_gpe_encap_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_ENCAP_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
new file mode 100644
index 00000000..35cee50f
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
@@ -0,0 +1,868 @@
+Network Working Group P. Quinn
+Internet-Draft Cisco Systems, Inc.
+Intended status: Experimental P. Agarwal
+Expires: January 4, 2015 Broadcom
+ R. Fernando
+ L. Kreeger
+ D. Lewis
+ F. Maino
+ M. Smith
+ N. Yadav
+ Cisco Systems, Inc.
+ L. Yong
+ Huawei USA
+ X. Xu
+ Huawei Technologies
+ U. Elzur
+ Intel
+ P. Garg
+ Microsoft
+ July 3, 2014
+
+
+ Generic Protocol Extension for VXLAN
+ draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+Status of this Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 1]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ This Internet-Draft will expire on January 4, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 2]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5
+ 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6
+ 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6
+ 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7
+ 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7
+ 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
+ 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8
+ 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8
+ 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8
+ 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8
+ 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
+ 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
+ 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
+ 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
+ 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13
+ 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13
+ 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13
+ 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+ 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+ 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 3]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+1. Introduction
+
+ Virtual eXtensible Local Area Network [VXLAN] defines an
+ encapsulation format that encapsulates Ethernet frames in an outer
+ UDP/IP transport. As data centers evolve, the need to carry other
+ protocols encapsulated in an IP packet is required, as well as the
+ need to provide increased visibility and diagnostic capabilities
+ within the overlay. The VXLAN header does not specify the protocol
+ being encapsulated and therefore is currently limited to
+ encapsulating only Ethernet frame payload, nor does it provide the
+ ability to define OAM protocols. Rather than defining yet another
+ encapsulation, VXLAN is extended to provide protocol typing and OAM
+ capabilities.
+
+ This document describes extending VXLAN via the following changes:
+
+ Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
+ in the VXLAN-gpe header to indicate that a next protocol field is
+ present.
+
+ OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
+ the VXLAN-gpe header, to indicate that the packet is an OAM
+ packet.
+
+ Version: Two reserved bits are allocated, and set in the VXLAN-gpe
+ header, to indicate VXLAN-gpe protocol version.
+
+ Next Protocol: A 8 bit next protocol field is present in the VXLAN-
+ gpe header.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 4]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+2. VXLAN Without Protocol Extension
+
+ As described in the introduction, the VXLAN header has no protocol
+ identifier that indicates the type of payload being carried by VXLAN.
+ Because of this, VXLAN is limited to an Ethernet payload.
+ Furthermore, the VXLAN header has no mechanism to signal OAM packets.
+
+ The VXLAN header defines bits 0-7 as flags (some defined, some
+ reserved), the VXLAN network identifier (VNI) field and several
+ reserved bits. The flags provide flexibility to define how the
+ reserved bits can be used to change the definition of the VXLAN
+ header.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|R|R|R| Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Figure 1: VXLAN Header
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 5]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3. Generic Protocol Extension VXLAN (VXLAN-gpe)
+
+3.1. Multi Protocol Support
+
+ This draft defines the following two changes to the VXLAN header in
+ order to support multi-protocol encapsulation:
+
+ P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ MUST be set to 1 to indicate the presence of the 8 bit next
+ protocol field.
+
+ P = 0 indicates that the payload MUST conform to VXLAN as defined
+ in [VXLAN].
+
+ Flag bit 5 was chosen as the P bit because this flag bit is
+ currently reserved in VXLAN.
+
+ Next Protocol Field: The lower 8 bits of the first word are used to
+ carry a next protocol. This next protocol field contains the
+ protocol of the encapsulated payload packet. A new protocol
+ registry will be requested from IANA.
+
+ This draft defines the following Next Protocol values:
+
+ 0x1 : IPv4
+ 0x2 : IPv6
+ 0x3 : Ethernet
+ 0x4 : Network Service Header [NSH]
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|R| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 2: VXLAN-gpe Next Protocol
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 6]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3.2. OAM Support
+
+ Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ packet is an OAM packet and OAM processing MUST occur. The OAM
+ protocol details are out of scope for this document. As with the
+ P-bit, bit 7 is currently a reserved flag in VXLAN.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 3: VXLAN-gpe OAM Bit
+
+3.3. Version Bits
+
+ VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ reserved in VXLAN. The version field is used to ensure backward
+ compatibility going forward with future VXLAN-gpe updates.
+
+ The initial version for VXLAN-gpe is 0.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+ Figure 4: VXLAN-gpe Version Bits
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 7]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+4. Backward Compatibility
+
+4.1. VXLAN VTEP to VXLAN-gpe VTEP
+
+ As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits
+ respectively must be set to zero. The remaining reserved bits must
+ be zero, including the VXLAN-gpe version field, bits 8 and 9. The
+ encapsulated payload MUST be Ethernet.
+
+4.2. VXLAN-gpe VTEP to VXLAN VTEP
+
+ A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN
+ VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN-
+ gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP
+ destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver
+ = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving
+ VXLAN VTEP will threat this packet as a VXLAN packet.
+
+ A method for determining the capabilities of a VXLAN VTEP (gpe or
+ non-gpe) is out of the scope of this draft.
+
+4.3. VXLAN-gpe UDP Ports
+
+ VXLAN-gpe uses a new UDP destination port (to be assigned by IANA)
+ when sending traffic to VXLAN-gpe VTEPs.
+
+4.4. VXLAN-gpe and Encapsulated IP Header Fields
+
+ When encapsulating and decapsulating IPv4 and IPv6 packets, certain
+ fields, such as IPv4 Time to Live (TTL) from the inner IP header need
+ to be considered. VXLAN-gpe IP encapsulation and decapsulation
+ utilizes the techniques described in [RFC6830], section 5.3.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 8]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+5. VXLAN-gpe Examples
+
+ This section provides three examples of protocols encapsulated using
+ the Generic Protocol Extension for VXLAN described in this document.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv4 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 5: IPv4 and VXLAN-gpe
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv6 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 6: IPv6 and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 9]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original Ethernet Frame |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 7: Ethernet and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 10]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+6. Security Considerations
+
+ VXLAN's security is focused on issues around L2 encapsulation into
+ L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic
+ redirection are dependent on the particular protocol payload
+ encapsulated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 11]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+7. Acknowledgments
+
+ A special thank you goes to Dino Farinacci for his guidance and
+ detailed review.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 12]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+8. IANA Considerations
+
+8.1. UDP Port
+
+ A new UDP port will be requested from IANA.
+
+8.2. VXLAN-gpe Next Protocol
+
+ IANA is requested to set up a registry of "Next Protocol". These are
+ 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
+ this draft. New values are assigned via Standards Action [RFC5226].
+
+ +---------------+-------------+---------------+
+ | Next Protocol | Description | Reference |
+ +---------------+-------------+---------------+
+ | 0 | Reserved | This document |
+ | | | |
+ | 1 | IPv4 | This document |
+ | | | |
+ | 2 | IPv6 | This document |
+ | | | |
+ | 3 | Ethernet | This document |
+ | | | |
+ | 4 | NSH | This document |
+ | | | |
+ | 5..253 | Unassigned | |
+ +---------------+-------------+---------------+
+
+ Table 1
+
+8.3. VXLAN-gpe Reserved Bits
+
+ There are ten bits at the beginning of the VXLAN-gpe header. New
+ bits are assigned via Standards Action [RFC5226].
+
+ Bits 0-3 - Reserved
+ Bit 4 - Instance ID (I bit)
+ Bit 5 - Next Protocol (P bit)
+ Bit 6 - Reserved
+ Bit 7 - OAM (O bit)
+ Bits 8-9 - Version
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 13]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+9. References
+
+9.1. Normative References
+
+ [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+ August 1980.
+
+ [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
+ September 1981.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+ IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+ May 2008.
+
+9.2. Informative References
+
+ [NSH] Quinn, P. and et al. , "Network Service Header", 2014.
+
+ [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
+ October 1994.
+
+ [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
+ Locator/ID Separation Protocol (LISP)", RFC 6830,
+ January 2013.
+
+ [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
+ L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
+ Framework for Overlaying Virtualized Layer 2 Networks over
+ Layer 3 Networks", 2013.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 14]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Authors' Addresses
+
+ Paul Quinn
+ Cisco Systems, Inc.
+
+ Email: paulq@cisco.com
+
+
+ Puneet Agarwal
+ Broadcom
+
+ Email: pagarwal@broadcom.com
+
+
+ Rex Fernando
+ Cisco Systems, Inc.
+
+ Email: rex@cisco.com
+
+
+ Larry Kreeger
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Darrel Lewis
+ Cisco Systems, Inc.
+
+ Email: darlewis@cisco.com
+
+
+ Fabio Maino
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Michael Smith
+ Cisco Systems, Inc.
+
+ Email: michsmit@cisco.com
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 15]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ Navindra Yadav
+ Cisco Systems, Inc.
+
+ Email: nyadav@cisco.com
+
+
+ Lucy Yong
+ Huawei USA
+
+ Email: lucy.yong@huawei.com
+
+
+ Xiaohu Xu
+ Huawei Technologies
+
+ Email: xuxiaohu@huawei.com
+
+
+ Uri Elzur
+ Intel
+
+ Email: uri.elzur@intel.com
+
+
+ Pankaj Garg
+ Microsoft
+
+ Email: Garg.Pankaj@microsoft.com
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/vnet/vxlan-gpe/vxlan_gpe.api
new file mode 100644
index 00000000..04082d69
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.api
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+define vxlan_gpe_add_del_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u8 local[16];
+ u8 remote[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ u8 protocol;
+ u32 vni;
+ u8 is_add;
+};
+
+define vxlan_gpe_add_del_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define vxlan_gpe_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+define vxlan_gpe_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 local[16];
+ u8 remote[16];
+ u32 vni;
+ u8 protocol;
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ u8 is_ipv6;
+};
+
+/** \brief Interface set vxlan-gpe-bypass request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_vxlan_gpe_bypass
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 enable;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
new file mode 100644
index 00000000..462c79a0
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels
+ *
+*/
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/fib/fib.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/interface.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief VXLAN-GPE.
+ *
+ * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN-GPE tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment.
+ */
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+/**
+ * @brief Tracing function for VXLAN GPE tunnel packets
+ *
+ * @param *s formatting string
+ * @param *args
+ *
+ * @return *s formatted string
+ *
+ */
+u8 * format_vxlan_gpe_tunnel (u8 * s, va_list * args)
+{
+ vxlan_gpe_tunnel_t * t = va_arg (*args, vxlan_gpe_tunnel_t *);
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+
+ s = format (s, "[%d] local: %U remote: %U ",
+ t - ngm->tunnels,
+ format_ip46_address, &t->local, IP46_TYPE_ANY,
+ format_ip46_address, &t->remote, IP46_TYPE_ANY);
+
+ s = format (s, " vxlan VNI %d ", t->vni);
+
+ switch (t->protocol)
+ {
+ case VXLAN_GPE_PROTOCOL_IP4:
+ s = format (s, "next-protocol ip4");
+ break;
+ case VXLAN_GPE_PROTOCOL_IP6:
+ s = format (s, "next-protocol ip6");
+ break;
+ case VXLAN_GPE_PROTOCOL_ETHERNET:
+ s = format (s, "next-protocol ethernet");
+ break;
+ case VXLAN_GPE_PROTOCOL_NSH:
+ s = format (s, "next-protocol nsh");
+ break;
+ default:
+ s = format (s, "next-protocol unknown %d", t->protocol);
+ }
+
+ if (ip46_address_is_multicast (&t->remote))
+ s = format (s, "mcast_sw_if_index %d ", t->mcast_sw_if_index);
+
+ s = format (s, " fibs: (encap %d, decap %d)",
+ t->encap_fib_index,
+ t->decap_fib_index);
+
+ return s;
+}
+
+/**
+ * @brief Naming for VXLAN GPE tunnel
+ *
+ * @param *s formatting string
+ * @param *args
+ *
+ * @return *s formatted string
+ *
+ */
+static u8 * format_vxlan_gpe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "vxlan_gpe_tunnel%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/**
+ * @brief CLI function for VXLAN GPE admin up/down
+ *
+ * @param *vnm
+ * @param hw_if_index
+ * @param flag
+ *
+ * @return *rc
+ *
+ */
+static clib_error_t *
+vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
+ .name = "VXLAN_GPE",
+ .format_device_name = format_vxlan_gpe_name,
+ .format_tx_trace = format_vxlan_gpe_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = vxlan_gpe_interface_admin_up_down,
+};
+
+
+/**
+ * @brief Formatting function for tracing VXLAN GPE with length
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
+ .name = "VXLAN_GPE",
+ .format_header = format_vxlan_gpe_header_with_length,
+ .build_rewrite = default_build_rewrite,
+};
+
+static void
+vxlan_gpe_tunnel_restack_dpo(vxlan_gpe_tunnel_t * t)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ u32 encap_index = vxlan_gpe_encap_node.index;
+ fib_forward_chain_type_t forw_type = ip46_address_is_ip4(&t->remote) ?
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset(&dpo);
+}
+
+static vxlan_gpe_tunnel_t *
+vxlan_gpe_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_VXLAN_GPE_TUNNEL == node->fn_type);
+#endif
+ return ((vxlan_gpe_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(vxlan_gpe_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of VXLAN_GPE DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+vxlan_gpe_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ vxlan_gpe_tunnel_restack_dpo(vxlan_gpe_tunnel_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+vxlan_gpe_tunnel_fib_node_get (fib_node_index_t index)
+{
+ vxlan_gpe_tunnel_t * t;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+
+ t = pool_elt_at_index(ngm->tunnels, index);
+
+ return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_gpe_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The VXLAN_GPE tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by VXLAN_GPE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_gpe_vft = {
+ .fnv_get = vxlan_gpe_tunnel_fib_node_get,
+ .fnv_last_lock = vxlan_gpe_tunnel_last_lock_gone,
+ .fnv_back_walk = vxlan_gpe_tunnel_back_walk,
+};
+
+#define foreach_gpe_copy_field \
+_(vni) \
+_(protocol) \
+_(mcast_sw_if_index) \
+_(encap_fib_index) \
+_(decap_fib_index)
+
+#define foreach_copy_ipv4 { \
+ _(local.ip4.as_u32) \
+ _(remote.ip4.as_u32) \
+}
+
+#define foreach_copy_ipv6 { \
+ _(local.ip6.as_u64[0]) \
+ _(local.ip6.as_u64[1]) \
+ _(remote.ip6.as_u64[0]) \
+ _(remote.ip6.as_u64[1]) \
+}
+
+
+/**
+ * @brief Calculate IPv4 VXLAN GPE rewrite header
+ *
+ * @param *t
+ *
+ * @return rc
+ *
+ */
+int vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node)
+{
+ u8 *rw = 0;
+ ip4_header_t * ip0;
+ ip4_vxlan_gpe_header_t * h0;
+ int len;
+
+ len = sizeof (*h0) + extension_size;
+
+ vec_free(t->rewrite);
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_vxlan_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip0->src_address.as_u32 = t->local.ip4.as_u32;
+ ip0->dst_address.as_u32 = t->remote.ip4.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ if (protocol_override)
+ {
+ h0->vxlan.protocol = protocol_override;
+ }
+ else
+ {
+ h0->vxlan.protocol = t->protocol;
+ }
+ t->rewrite_size = sizeof(ip4_vxlan_gpe_header_t) + extension_size;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ t->rewrite = rw;
+ t->encap_next_node = encap_next_node;
+ return (0);
+}
+
+/**
+ * @brief Calculate IPv6 VXLAN GPE rewrite header
+ *
+ * @param *t
+ *
+ * @return rc
+ *
+ */
+int vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node)
+{
+ u8 *rw = 0;
+ ip6_header_t * ip0;
+ ip6_vxlan_gpe_header_t * h0;
+ int len;
+
+ len = sizeof (*h0) + extension_size;
+
+ vec_free(t->rewrite);
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip6_vxlan_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip6;
+ ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
+ ip0->hop_limit = 255;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ ip0->src_address.as_u64[0] = t->local.ip6.as_u64[0];
+ ip0->src_address.as_u64[1] = t->local.ip6.as_u64[1];
+ ip0->dst_address.as_u64[0] = t->remote.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] = t->remote.ip6.as_u64[1];
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ if (protocol_override)
+ {
+ h0->vxlan.protocol = t->protocol;
+ }
+ else
+ {
+ h0->vxlan.protocol = protocol_override;
+ }
+ t->rewrite_size = sizeof(ip4_vxlan_gpe_header_t) + extension_size;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ t->rewrite = rw;
+ t->encap_next_node = encap_next_node;
+ return (0);
+}
+
+static void
+hash_set_key_copy (uword ** h, void * key, uword v) {
+ size_t ksz = hash_header(*h)->user;
+ void * copy = clib_mem_alloc (ksz);
+ clib_memcpy (copy, key, ksz);
+ hash_set_mem (*h, copy, v);
+}
+
+static void
+hash_unset_key_free (uword ** h, void * key) {
+ hash_pair_t * hp = hash_get_pair_mem (*h, key);
+ ASSERT (hp);
+ key = uword_to_pointer (hp->key, void *);
+ hash_unset_mem (*h, key);
+ clib_mem_free (key);
+}
+
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_gpe_main.vtep6, &ip->ip6);
+ if (vtep)
+ return ++(*vtep);
+ ip46_address_is_ip4(ip) ?
+ hash_set (vxlan_gpe_main.vtep4, ip->ip4.as_u32, 1) :
+ hash_set_key_copy (&vxlan_gpe_main.vtep6, &ip->ip6, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_gpe_main.vtep6, &ip->ip6);
+ ASSERT(vtep);
+ if (--(*vtep) != 0)
+ return *vtep;
+ ip46_address_is_ip4(ip) ?
+ hash_unset (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_unset_key_free (&vxlan_gpe_main.vtep6, &ip->ip6);
+ return 0;
+}
+
+typedef CLIB_PACKED(union {
+ struct {
+ fib_node_index_t mfib_entry_index;
+ adj_index_t mcast_adj_index;
+ };
+ u64 as_u64;
+}) mcast_shared_t;
+
+static inline mcast_shared_t
+mcast_shared_get(ip46_address_t * ip)
+{
+ ASSERT(ip46_address_is_multicast(ip));
+ uword * p = hash_get_mem (vxlan_gpe_main.mcast_shared, ip);
+ ASSERT(p);
+ return (mcast_shared_t) { .as_u64 = *p };
+}
+
+static inline void
+mcast_shared_add(ip46_address_t *remote,
+ fib_node_index_t mfei,
+ adj_index_t ai)
+{
+ mcast_shared_t new_ep = {
+ .mcast_adj_index = ai,
+ .mfib_entry_index = mfei,
+ };
+
+ hash_set_key_copy (&vxlan_gpe_main.mcast_shared, remote, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove(ip46_address_t *remote)
+{
+ mcast_shared_t ep = mcast_shared_get(remote);
+
+ adj_unlock(ep.mcast_adj_index);
+ mfib_table_entry_delete_index(ep.mfib_entry_index,
+ MFIB_SOURCE_VXLAN_GPE);
+
+ hash_unset_key_free (&vxlan_gpe_main.mcast_shared, remote);
+}
+
+static inline fib_protocol_t
+fib_ip_proto(bool is_ip6)
+{
+ return (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
+}
+
+/**
+ * @brief Add or Del a VXLAN GPE tunnel
+ *
+ * @param *a
+ * @param *sw_if_index
+ *
+ * @return rc
+ *
+ */
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_hw_interface_t * hi;
+ uword * p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ int rv;
+ vxlan4_gpe_tunnel_key_t key4, *key4_copy;
+ vxlan6_gpe_tunnel_key_t key6, *key6_copy;
+ u32 is_ip6 = a->is_ip6;
+
+ if (!is_ip6)
+ {
+ key4.local = a->local.ip4.as_u32;
+ key4.remote = a->remote.ip4.as_u32;
+ key4.vni = clib_host_to_net_u32 (a->vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem(ngm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ key6.local.as_u64[0] = a->local.ip6.as_u64[0];
+ key6.local.as_u64[1] = a->local.ip6.as_u64[1];
+ key6.remote.as_u64[0] = a->remote.ip6.as_u64[0];
+ key6.remote.as_u64[1] = a->remote.ip6.as_u64[1];
+ key6.vni = clib_host_to_net_u32 (a->vni << 8);
+
+ p = hash_get_mem(ngm->vxlan6_gpe_tunnel_by_key, &key6);
+ }
+
+ if (a->is_add)
+ {
+ l2input_main_t * l2im = &l2input_main;
+
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_gpe_copy_field;
+ if (!a->is_ip6) foreach_copy_ipv4
+ else foreach_copy_ipv6
+#undef _
+
+ if (!a->is_ip6) t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
+
+ if (!a->is_ip6) {
+ rv = vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP);
+ } else {
+ rv = vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ }
+
+ if (rv)
+ {
+ pool_put (ngm->tunnels, t);
+ return rv;
+ }
+
+ if (!is_ip6)
+ {
+ key4_copy = clib_mem_alloc (sizeof (*key4_copy));
+ clib_memcpy (key4_copy, &key4, sizeof (*key4_copy));
+ hash_set_mem (ngm->vxlan4_gpe_tunnel_by_key, key4_copy,
+ t - ngm->tunnels);
+ }
+ else
+ {
+ key6_copy = clib_mem_alloc (sizeof (*key6_copy));
+ clib_memcpy (key6_copy, &key6, sizeof (*key6_copy));
+ hash_set_mem (ngm->vxlan6_gpe_tunnel_by_key, key6_copy,
+ t - ngm->tunnels);
+ }
+
+ if (vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t * im = &vnm->interface_main;
+ hw_if_index = ngm->free_vxlan_gpe_tunnel_hw_if_indices
+ [vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices)-1];
+ _vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - ngm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock(im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
+ vlib_zero_simple_counter
+ (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
+ vnet_interface_counter_unlock(im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, vxlan_gpe_device_class.index, t - ngm->tunnels,
+ vxlan_gpe_hw_class.index, t - ngm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = vxlan_gpe_encap_node.index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+ vec_validate_init_empty (ngm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ ngm->tunnel_index_by_sw_if_index[sw_if_index] = t - ngm->tunnels;
+
+ /* setup l2 input config with l2 feature and bd 0 to drop packet */
+ vec_validate (l2im->configs, sw_if_index);
+ l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+ l2im->configs[sw_if_index].bd_index = 0;
+
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ fib_node_init(&t->node, FIB_NODE_TYPE_VXLAN_GPE_TUNNEL);
+ fib_prefix_t tun_remote_pfx;
+ u32 encap_index = vxlan_gpe_encap_node.index;
+ vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
+
+ fib_prefix_from_ip46_addr(&t->remote, &tun_remote_pfx);
+ if (!ip46_address_is_multicast(&t->remote))
+ {
+ /* Unicast tunnel -
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ vtep_addr_ref(&t->local);
+ t->fib_entry_index = fib_table_entry_special_add
+ (t->encap_fib_index, &tun_remote_pfx, FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ t->sibling_index = fib_entry_child_add
+ (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_GPE_TUNNEL, t - ngm->tunnels);
+ vxlan_gpe_tunnel_restack_dpo(t);
+ }
+ else
+ {
+ /* Multicast tunnel -
+ * as the same mcast group can be used for mutiple mcast tunnels
+ * with different VNIs, create the output fib adjecency only if
+ * it does not already exist
+ */
+ fib_protocol_t fp = fib_ip_proto(is_ip6);
+
+ if (vtep_addr_ref(&t->remote) == 1)
+ {
+ fib_node_index_t mfei;
+ adj_index_t ai;
+ fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo(fp),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ const mfib_prefix_t mpfx = {
+ .fp_proto = fp,
+ .fp_len = (is_ip6 ? 128 : 32),
+ .fp_grp_addr = tun_remote_pfx.fp_addr,
+ };
+
+ /*
+ * Setup the (*,G) to receive traffic on the mcast group
+ * - the forwarding interface is for-us
+ * - the accepting interface is that from the API
+ */
+ mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN_GPE,
+ &path,
+ MFIB_ITF_FLAG_FORWARD);
+
+ path.frp_sw_if_index = a->mcast_sw_if_index;
+ path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ mfei = mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN_GPE,
+ &path,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ /*
+ * Create the mcast adjacency to send traffic to the group
+ */
+ ai = adj_mcast_add_or_lock(fp,
+ fib_proto_to_link(fp),
+ a->mcast_sw_if_index);
+
+ /*
+ * create a new end-point
+ */
+ mcast_shared_add(&t->remote, mfei, ai);
+ }
+
+ dpo_id_t dpo = DPO_INVALID;
+ mcast_shared_t ep = mcast_shared_get(&t->remote);
+
+ /* Stack shared mcast remote mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY_MCAST,
+ fib_proto_to_dpo(fp),
+ ep.mcast_adj_index);
+
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
+ }
+
+ /* Set vxlan tunnel output node */
+ hi->output_node_index = encap_index;
+
+ vnet_get_sw_interface (vnet_get_main(), sw_if_index)->flood_class = flood_class;
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (ngm->tunnels, p[0]);
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, t->sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
+ set_int_l2_mode(ngm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0, 0);
+ vec_add1 (ngm->free_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index);
+
+ ngm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+
+ if (!is_ip6)
+ hash_unset (ngm->vxlan4_gpe_tunnel_by_key, key4.as_u64);
+ else
+ hash_unset_key_free (&ngm->vxlan6_gpe_tunnel_by_key, &key6);
+
+ if (!ip46_address_is_multicast(&t->remote))
+ {
+ vtep_addr_unref(&t->local);
+ fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
+ fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
+ }
+ else if (vtep_addr_unref(&t->remote) == 0)
+ {
+ mcast_shared_remove(&t->remote);
+ }
+
+ fib_node_deinit(&t->node);
+ vec_free (t->rewrite);
+ pool_put (ngm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static clib_error_t *
+vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ u8 is_add = 1;
+ ip46_address_t local, remote;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u8 grp_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_fib_index = 0;
+ u32 decap_fib_index = 0;
+ u8 protocol = VXLAN_GPE_PROTOCOL_IP4;
+ u32 vni;
+ u8 vni_set = 0;
+ int rv;
+ u32 tmp;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a;
+ u32 sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "local %U",
+ unformat_ip4_address, &local.ip4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote.ip4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local.ip6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote.ip6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &remote.ip4,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &remote.ip6,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ if (ipv6_set)
+ encap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
+ else
+ encap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
+
+ if (encap_fib_index == ~0)
+ {
+ error = clib_error_return (0, "nonexistent encap fib id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &tmp))
+ {
+ if (ipv6_set)
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
+ else
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
+
+ if (decap_fib_index == ~0)
+ {
+ error = clib_error_return (0, "nonexistent decap fib id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat(line_input, "next-ip4"))
+ protocol = VXLAN_GPE_PROTOCOL_IP4;
+ else if (unformat(line_input, "next-ip6"))
+ protocol = VXLAN_GPE_PROTOCOL_IP6;
+ else if (unformat(line_input, "next-ethernet"))
+ protocol = VXLAN_GPE_PROTOCOL_ETHERNET;
+ else if (unformat(line_input, "next-nsh"))
+ protocol = VXLAN_GPE_PROTOCOL_NSH;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ error = clib_error_return (0, "tunnel local address not specified");
+ goto done;
+ }
+
+ if (remote_set == 0)
+ {
+ error = clib_error_return (0, "tunnel remote address not specified");
+ goto done;
+ }
+
+ if (grp_set && !ip46_address_is_multicast(&remote))
+ {
+ error = clib_error_return (0, "tunnel group address not multicast");
+ goto done;
+ }
+
+ if (grp_set == 0 && ip46_address_is_multicast(&remote))
+ {
+ error = clib_error_return (0, "remote address must be unicast");
+ goto done;
+ }
+
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "tunnel nonexistent multicast device");
+ goto done;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ goto done;
+ }
+
+ if ((ipv4_set && memcmp(&local.ip4, &remote.ip4, sizeof(local.ip4)) == 0) ||
+ (ipv6_set && memcmp(&local.ip6, &remote.ip6, sizeof(local.ip6)) == 0))
+ {
+ error = clib_error_return (0, "src and remote addresses are identical");
+ goto done;
+ }
+
+ if (vni_set == 0)
+ {
+ error = clib_error_return (0, "vni not specified");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+ a->is_ip6 = ipv6_set;
+
+#define _(x) a->x = x;
+ foreach_gpe_copy_field;
+ if (ipv4_set) foreach_copy_ipv4
+ else foreach_copy_ipv6
+#undef _
+
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
+
+ switch(rv)
+ {
+ case 0:
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
+ break;
+ case VNET_API_ERROR_INVALID_DECAP_NEXT:
+ error = clib_error_return (0, "invalid decap-next...");
+ goto done;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a VXLAN-GPE Tunnel.
+ *
+ * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN-GPE tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE sengment.
+ *
+ * @cliexpar
+ * Example of how to create a VXLAN-GPE Tunnel:
+ * @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 local 10.0.3.3 vni 13 encap-vrf-id 7}
+ * Example of how to delete a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 remote 10.0.3.3 vni 13 del}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
+ .path = "create vxlan-gpe tunnel",
+ .short_help =
+ "create vxlan-gpe tunnel local <local-addr> "
+ " {remote <remote-addr>|group <mcast-addr> <intf-name>}"
+ " vni <nn> [next-ip4][next-ip6][next-ethernet][next-nsh]"
+ " [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
+ .function = vxlan_gpe_add_del_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI function for showing VXLAN GPE tunnels
+ *
+ * @param *vm
+ * @param *input
+ * @param *cmd
+ *
+ * @return error
+ *
+ */
+static clib_error_t *
+show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t * t;
+
+ if (pool_elts (ngm->tunnels) == 0)
+ vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
+
+ pool_foreach (t, ngm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
+ }));
+
+ return 0;
+}
+
+/*?
+ * Display all the VXLAN-GPE Tunnel entries.
+ *
+ * @cliexpar
+ * Example of how to display the VXLAN-GPE Tunnel entries:
+ * @cliexstart{show vxlan-gpe tunnel}
+ * [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
+ .path = "show vxlan-gpe",
+ .function = show_vxlan_gpe_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index,
+ u8 is_ip6,
+ u8 is_enable)
+{
+ if (is_ip6)
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gpe-bypass",
+ sw_if_index, is_enable, 0, 0);
+ else
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gpe-bypass",
+ sw_if_index, is_enable, 0, 0);
+}
+
+
+static clib_error_t *
+set_ip_vxlan_gpe_bypass (u32 is_ip6,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_vxlan_gpe_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+ done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_gpe_bypass (0, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip4-vxlan-gpe-bypass' graph node for a given interface.
+ * By adding the IPv4 vxlan-gpe-bypass graph node to an interface, the node checks
+ * for and validate input vxlan_gpe packet and bypass ip4-lookup, ip4-local,
+ * ip4-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
+ * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip4-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip4-vxlan-gpe-bypass error-drop [0]
+ * vxlan4-gpe-input [1]
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip4-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip4-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip4-vxlan-gpe-bypass error-drop [0] ip4-input
+ * vxlan4-gpe-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv4 unicast:
+ * ip4-vxlan-gpe-bypass
+ * ip4-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip4-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
+ .path = "set interface ip vxlan-gpe-bypass",
+ .function = set_ip4_vxlan_gpe_bypass,
+ .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_gpe_bypass (1, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip6-vxlan-gpe-bypass' graph node for a given interface.
+ * By adding the IPv6 vxlan-gpe-bypass graph node to an interface, the node checks
+ * for and validate input vxlan_gpe packet and bypass ip6-lookup, ip6-local,
+ * ip6-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
+ * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip6-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip6-vxlan-gpe-bypass error-drop [0]
+ * vxlan6-gpe-input [1]
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip6-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip6-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip6-vxlan-gpe-bypass error-drop [0] ip6-input
+ * vxlan6-gpe-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv6 unicast:
+ * ip6-vxlan-gpe-bypass
+ * ip6-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip6-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
+ .path = "set interface ip6 vxlan-gpe-bypass",
+ .function = set_ip6_vxlan_gpe_bypass,
+ .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-vxlan-gpe-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-vxlan-gpe-bypass",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Feature init function for VXLAN GPE
+ *
+ * @param *vm
+ *
+ * @return error
+ *
+ */
+clib_error_t *vxlan_gpe_init (vlib_main_t *vm)
+{
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ ngm->vnet_main = vnet_get_main();
+ ngm->vlib_main = vm;
+
+ ngm->vxlan4_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(vxlan4_gpe_tunnel_key_t), sizeof (uword));
+
+ ngm->vxlan6_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(vxlan6_gpe_tunnel_key_t), sizeof (uword));
+
+
+ ngm->mcast_shared = hash_create_mem(0,
+ sizeof(ip46_address_t),
+ sizeof(mcast_shared_t));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_VXLAN_GPE,
+ vxlan4_gpe_input_node.index, 1 /* is_ip4 */);
+ udp_register_dst_port (vm, UDP_DST_PORT_VXLAN6_GPE,
+ vxlan6_gpe_input_node.index, 0 /* is_ip4 */);
+
+ /* Register the list of standard decap protocols supported */
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP4,
+ VXLAN_GPE_INPUT_NEXT_IP4_INPUT);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP6,
+ VXLAN_GPE_INPUT_NEXT_IP6_INPUT);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_ETHERNET,
+ VXLAN_GPE_INPUT_NEXT_ETHERNET_INPUT);
+
+ fib_node_register_type(FIB_NODE_TYPE_VXLAN_GPE_TUNNEL, &vxlan_gpe_vft);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(vxlan_gpe_init);
+
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
new file mode 100644
index 00000000..c348b5d5
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief VXLAN GPE definitions
+ *
+*/
+#ifndef included_vnet_vxlan_gpe_h
+#define included_vnet_vxlan_gpe_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief VXLAN GPE header struct
+ *
+ */
+typedef CLIB_PACKED (struct {
+ /** 20 bytes */
+ ip4_header_t ip4;
+ /** 8 bytes */
+ udp_header_t udp;
+ /** 8 bytes */
+ vxlan_gpe_header_t vxlan;
+}) ip4_vxlan_gpe_header_t;
+
+typedef CLIB_PACKED (struct {
+ /** 40 bytes */
+ ip6_header_t ip6;
+ /** 8 bytes */
+ udp_header_t udp;
+ /** 8 bytes */
+ vxlan_gpe_header_t vxlan;
+}) ip6_vxlan_gpe_header_t;
+
+/**
+ * @brief Key struct for IPv4 VXLAN GPE tunnel.
+ * Key fields: local remote, vni
+ * all fields in NET byte order
+ * VNI shifted 8 bits
+ */
+typedef CLIB_PACKED(struct {
+ union {
+ struct {
+ u32 local;
+ u32 remote;
+
+ u32 vni;
+ u32 pad;
+ };
+ u64 as_u64[2];
+ };
+}) vxlan4_gpe_tunnel_key_t;
+
+/**
+ * @brief Key struct for IPv6 VXLAN GPE tunnel.
+ * Key fields: local remote, vni
+ * all fields in NET byte order
+ * VNI shifted 8 bits
+ */
+typedef CLIB_PACKED(struct {
+ ip6_address_t local;
+ ip6_address_t remote;
+ u32 vni;
+}) vxlan6_gpe_tunnel_key_t;
+
+/**
+ * @brief Struct for VXLAN GPE tunnel
+ */
+typedef struct {
+ /** Rewrite string. $$$$ embed vnet_rewrite header */
+ u8 * rewrite;
+
+ /** encapsulated protocol */
+ u8 protocol;
+
+ /* FIB DPO for IP forwarding of VXLAN-GPE encap packet */
+ dpo_id_t next_dpo;
+ /** tunnel local address */
+ ip46_address_t local;
+ /** tunnel remote address */
+ ip46_address_t remote;
+
+ /* mcast packet output intfc index (used only if dst is mcast) */
+ u32 mcast_sw_if_index;
+
+ /** FIB indices - tunnel partner lookup here */
+ u32 encap_fib_index;
+ /** FIB indices - inner IP packet lookup here */
+ u32 decap_fib_index;
+
+ /** VXLAN GPE VNI in HOST byte order, shifted left 8 bits */
+ u32 vni;
+
+ /** vnet intfc hw_if_index */
+ u32 hw_if_index;
+ /** vnet intfc sw_if_index */
+ u32 sw_if_index;
+
+ /** flags */
+ u32 flags;
+
+ /** rewrite size for dynamic plugins like iOAM */
+ u8 rewrite_size;
+
+ /** Next node after VxLAN-GPE encap */
+ uword encap_next_node;
+
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /*
+ * The FIB entry for (depending on VXLAN-GPE tunnel is unicast or mcast)
+ * sending unicast VXLAN-GPE encap packets or receiving mcast VXLAN-GPE packets
+ */
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its desintion. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+
+} vxlan_gpe_tunnel_t;
+
+/** Flags for vxlan_gpe_tunnel_t */
+#define VXLAN_GPE_TUNNEL_IS_IPV4 1
+
+/** next nodes for VXLAN GPE input */
+#define foreach_vxlan_gpe_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(ETHERNET_INPUT, "ethernet-input")
+
+/** struct for next nodes for VXLAN GPE input */
+typedef enum {
+#define _(s,n) VXLAN_GPE_INPUT_NEXT_##s,
+ foreach_vxlan_gpe_input_next
+#undef _
+ VXLAN_GPE_INPUT_N_NEXT,
+} vxlan_gpe_input_next_t;
+
+/** struct for VXLAN GPE errors */
+typedef enum {
+#define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+ VXLAN_GPE_N_ERROR,
+} vxlan_gpe_input_error_t;
+
+/** Struct for VXLAN GPE node state */
+typedef struct {
+ /** vector of encap tunnel instances */
+ vxlan_gpe_tunnel_t *tunnels;
+
+ /** lookup IPv4 VXLAN GPE tunnel by key */
+ uword * vxlan4_gpe_tunnel_by_key;
+ /** lookup IPv6 VXLAN GPE tunnel by key */
+ uword * vxlan6_gpe_tunnel_by_key;
+
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
+ uword * vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
+ uword * vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+ /* mcast shared info */
+ uword * mcast_shared; /* keyed on mcast ip46 addr */
+ /** Free vlib hw_if_indices */
+ u32 * free_vxlan_gpe_tunnel_hw_if_indices;
+
+ /** Mapping from sw_if_index to tunnel index */
+ u32 * tunnel_index_by_sw_if_index;
+
+ /** State convenience vlib_main_t */
+ vlib_main_t * vlib_main;
+ /** State convenience vnet_main_t */
+ vnet_main_t * vnet_main;
+
+ /** List of next nodes for the decap indexed on protocol */
+ uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
+} vxlan_gpe_main_t;
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+extern vlib_node_registration_t vxlan_gpe_encap_node;
+extern vlib_node_registration_t vxlan4_gpe_input_node;
+extern vlib_node_registration_t vxlan6_gpe_input_node;
+
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args);
+
+/** Struct for VXLAN GPE add/del args */
+typedef struct {
+ u8 is_add;
+ u8 is_ip6;
+ ip46_address_t local, remote;
+ u8 protocol;
+ u32 mcast_sw_if_index;
+ u32 encap_fib_index;
+ u32 decap_fib_index;
+ u32 vni;
+} vnet_vxlan_gpe_add_del_tunnel_args_t;
+
+
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+
+int vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node);
+int vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node);
+
+/**
+ * @brief Struct for defining VXLAN GPE next nodes
+ */
+typedef enum {
+ VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP,
+ VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP,
+ VXLAN_GPE_ENCAP_NEXT_DROP,
+ VXLAN_GPE_ENCAP_N_NEXT
+} vxlan_gpe_encap_next_t;
+
+
+void vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index);
+
+void vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index);
+
+void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
+
+#endif /* included_vnet_vxlan_gpe_h */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
new file mode 100644
index 00000000..8e268418
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
@@ -0,0 +1,272 @@
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_api.c - vxlan_gpe api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SW_INTERFACE_SET_VXLAN_GPE_BYPASS, sw_interface_set_vxlan_gpe_bypass) \
+_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \
+_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump)
+
+static void
+ vl_api_sw_interface_set_vxlan_gpe_bypass_t_handler
+ (vl_api_sw_interface_set_vxlan_gpe_bypass_t * mp)
+{
+ vl_api_sw_interface_set_vxlan_gpe_bypass_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_int_vxlan_gpe_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GPE_BYPASS_REPLY);
+}
+
+static void
+ vl_api_vxlan_gpe_add_del_tunnel_t_handler
+ (vl_api_vxlan_gpe_add_del_tunnel_t * mp)
+{
+ vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
+ u32 encap_fib_index, decap_fib_index;
+ u8 protocol;
+ uword *p;
+ ip4_main_t *im = &ip4_main;
+ u32 sw_if_index = ~0;
+
+
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ encap_fib_index = p[0];
+
+ protocol = mp->protocol;
+
+ /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */
+ if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT)
+ {
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
+ goto out;
+ }
+ decap_fib_index = p[0];
+ }
+ else
+ {
+ decap_fib_index = ntohl (mp->decap_vrf_id);
+ }
+
+ /* Check src & dst are different */
+ if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) ||
+ (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0))
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = mp->is_add;
+ a->is_ip6 = mp->is_ipv6;
+ /* ip addresses sent in network byte order */
+ if (a->is_ip6)
+ {
+ clib_memcpy (&(a->local.ip6), mp->local, 16);
+ clib_memcpy (&(a->remote.ip6), mp->remote, 16);
+ }
+ else
+ {
+ clib_memcpy (&(a->local.ip4), mp->local, 4);
+ clib_memcpy (&(a->remote.ip4), mp->remote, 4);
+ }
+ a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index);
+ a->encap_fib_index = encap_fib_index;
+ a->decap_fib_index = decap_fib_index;
+ a->protocol = protocol;
+ a->vni = ntohl (mp->vni);
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_vxlan_gpe_tunnel_details
+ (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_vxlan_gpe_tunnel_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GPE_TUNNEL_DETAILS);
+ if (is_ipv6)
+ {
+ memcpy (rmp->local, &(t->local.ip6.as_u8), 16);
+ memcpy (rmp->remote, &(t->remote.ip6.as_u8), 16);
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
+ }
+ else
+ {
+ memcpy (rmp->local, &(t->local.ip4.as_u8), 4);
+ memcpy (rmp->remote, &(t->remote.ip4.as_u8), 4);
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
+ }
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->protocol = t->protocol;
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->is_ipv6 = is_ipv6;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void vl_api_vxlan_gpe_tunnel_dump_t_handler
+ (vl_api_vxlan_gpe_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vxlan_gpe_main_t *vgm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, vgm->tunnels,
+ ({
+ send_vxlan_gpe_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_gpe_tunnel_details (t, q, mp->context);
+ }
+}
+
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_vxlan_gpe;
+#undef _
+}
+
+static clib_error_t *
+vxlan_gpe_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ am->api_trace_cfg[VL_API_VXLAN_GPE_ADD_DEL_TUNNEL].size +=
+ 17 * sizeof (u32);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vxlan_gpe_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/vnet/vxlan-gpe/vxlan_gpe_error.def
new file mode 100644
index 00000000..9cf1b1cb
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_error.def
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+vxlan_gpe_error (DECAPSULATED, "good packets decapsulated")
+vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets")
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
new file mode 100644
index 00000000..ec3c2e58
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief VXLAN GPE packet header structure
+ *
+*/
+#ifndef included_vxlan_gpe_packet_h
+#define included_vxlan_gpe_packet_h
+
+/**
+ * From draft-quinn-vxlan-gpe-03.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * I Bit: Flag bit 4 indicates that the VNI is valid.
+ *
+ * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ * MUST be set to 1 to indicate the presence of the 8 bit next
+ * protocol field.
+ *
+ * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1,
+ *
+ * the packet is an OAM packet and OAM processing MUST occur. The OAM
+ * protocol details are out of scope for this document. As with the
+ * P-bit, bit 7 is currently a reserved flag in VXLAN.
+ *
+ * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ * reserved in VXLAN. The version field is used to ensure backward
+ * compatibility going forward with future VXLAN-gpe updates.
+ *
+ * The initial version for VXLAN-gpe is 0.
+ *
+ * This draft defines the following Next Protocol values:
+ *
+ * 0x1 : IPv4
+ * 0x2 : IPv6
+ * 0x3 : Ethernet
+ * 0x4 : Network Service Header [NSH]
+ */
+
+/**
+ * @brief VXLAN GPE support inner protocol definition.
+ * 1 - IP4
+ * 2 - IP6
+ * 3 - ETHERNET
+ * 4 - NSH
+ */
+#define foreach_vxlan_gpe_protocol \
+_ (0x01, IP4) \
+_ (0x02, IP6) \
+_ (0x03, ETHERNET) \
+_ (0x04, NSH) \
+_ (0x05, IOAM)
+
+
+/**
+ * @brief Struct for VXLAN GPE support inner protocol definition.
+ * 1 - IP4
+ * 2 - IP6
+ * 3 - ETHERNET
+ * 4 - NSH
+ * 5 - IOAM
+ */
+typedef enum {
+#define _(n,f) VXLAN_GPE_PROTOCOL_##f = n,
+ foreach_vxlan_gpe_protocol
+#undef _
+ VXLAN_GPE_PROTOCOL_MAX,
+} vxlan_gpe_protocol_t;
+
+/**
+ * @brief VXLAN GPE Header definition
+ */
+typedef struct {
+ u8 flags;
+ /** Version and Reserved */
+ u8 ver_res;
+ /** Reserved */
+ u8 res;
+ /** see vxlan_gpe_protocol_t */
+ u8 protocol;
+ /** VNI and Reserved */
+ u32 vni_res;
+} vxlan_gpe_header_t;
+
+#define VXLAN_GPE_FLAGS_I 0x08
+#define VXLAN_GPE_FLAGS_P 0x04
+#define VXLAN_GPE_FLAGS_O 0x01
+#define VXLAN_GPE_VERSION 0x0
+
+#endif /* included_vxlan_gpe_packet_h */
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
new file mode 100644
index 00000000..0dc89d3f
--- /dev/null
+++ b/src/vnet/vxlan/decap.c
@@ -0,0 +1,1239 @@
+/*
+ * decap.c: vxlan tunnel decap packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/vxlan/vxlan.h>
+
+vlib_node_registration_t vxlan4_input_node;
+vlib_node_registration_t vxlan6_input_node;
+
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+ u32 vni;
+} vxlan_rx_trace_t;
+
+static u8 * format_vxlan_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_rx_trace_t * t = va_arg (*args, vxlan_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
+ t->tunnel_index, t->vni, t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "VXLAN decap error - tunnel for vni %d does not exist",
+ t->vni);
+ }
+ return s;
+}
+
+always_inline u32
+validate_vxlan_fib (vlib_buffer_t *b, vxlan_tunnel_t *t, u32 is_ip4)
+{
+ u32 fib_index, sw_if_index;
+
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+
+ if (is_ip4)
+ fib_index = (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index, sw_if_index) :
+ vnet_buffer (b)->sw_if_index[VLIB_TX];
+ else
+ fib_index = (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index, sw_if_index) :
+ vnet_buffer (b)->sw_if_index[VLIB_TX];
+
+ return (fib_index == t->encap_fib_index);
+}
+
+always_inline uword
+vxlan_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u32 is_ip4)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ vxlan_main_t * vxm = &vxlan_main;
+ vnet_main_t * vnm = vxm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 last_tunnel_index = ~0;
+ vxlan4_tunnel_key_t last_key4;
+ vxlan6_tunnel_key_t last_key6;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ if (is_ip4)
+ last_key4.as_u64 = ~0;
+ else
+ memset (&last_key6, 0xff, sizeof (last_key6));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ ip4_header_t * ip4_0, * ip4_1;
+ ip6_header_t * ip6_0, * ip6_1;
+ vxlan_header_t * vxlan0, * vxlan1;
+ uword * p0, * p1;
+ u32 tunnel_index0, tunnel_index1;
+ vxlan_tunnel_t * t0, * t1, * mt0 = NULL, * mt1 = NULL;
+ vxlan4_tunnel_key_t key4_0, key4_1;
+ vxlan6_tunnel_key_t key6_0, key6_1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vxlan0 = vlib_buffer_get_current (b0);
+ vxlan1 = vlib_buffer_get_current (b1);
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ ip4_0 = vlib_buffer_get_current (b0);
+ ip4_1 = vlib_buffer_get_current (b1);
+ } else {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ ip6_0 = vlib_buffer_get_current (b0);
+ ip6_1 = vlib_buffer_get_current (b1);
+ }
+
+ /* pop (ip, udp, vxlan) */
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
+ vlib_buffer_advance
+ (b1, sizeof(*ip4_1)+sizeof(udp_header_t)+sizeof(*vxlan1));
+ } else {
+ vlib_buffer_advance
+ (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
+ vlib_buffer_advance
+ (b1, sizeof(*ip6_1)+sizeof(udp_header_t)+sizeof(*vxlan1));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+
+ tunnel_index1 = ~0;
+ error1 = 0;
+
+ if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+ {
+ error0 = VXLAN_ERROR_BAD_FLAGS;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ if (is_ip4) {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.vni = vxlan0->vni_reserved;
+
+ /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
+ goto next0; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.vni = vxlan0->vni_reserved;
+ /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
+ p0 = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (vxm->tunnels, p0[0]);
+ goto next0; /* valid packet */
+ }
+ }
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+
+ } else /* !is_ip4 */ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.vni = vxlan0->vni_reserved;
+
+ /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
+ if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ clib_memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
+ &t0->src.ip6)))
+ goto next0; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
+ {
+ key6_0.src.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ key6_0.vni = vxlan0->vni_reserved;
+ p0 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (vxm->tunnels, p0[0]);
+ goto next0; /* valid packet */
+ }
+ }
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ next0:
+ next0 = t0->decap_next_index;
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next0 == VXLAN_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
+
+ /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->vni = vnet_get_vni (vxlan0);
+ }
+
+ if (PREDICT_FALSE (vxlan1->flags != VXLAN_FLAGS_I))
+ {
+ error1 = VXLAN_ERROR_BAD_FLAGS;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ if (is_ip4) {
+ key4_1.src = ip4_1->src_address.as_u32;
+ key4_1.vni = vxlan1->vni_reserved;
+
+ /* Make sure unicast VXLAN tunnel exist by packet SIP and VNI */
+ if (PREDICT_FALSE (key4_1.as_u64 != last_key4.as_u64))
+ {
+ p1 = hash_get (vxm->vxlan4_tunnel_by_key, key4_1.as_u64);
+ if (PREDICT_FALSE (p1 == NULL))
+ {
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ last_key4.as_u64 = key4_1.as_u64;
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (vxm->tunnels, tunnel_index1);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_1->dst_address.as_u32 == t1->src.ip4.as_u32))
+ goto next1; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_1->dst_address)))
+ {
+ key4_1.src = ip4_1->dst_address.as_u32;
+ key4_1.vni = vxlan1->vni_reserved;
+ /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
+ p1 = hash_get (vxm->vxlan4_tunnel_by_key, key4_1.as_u64);
+ if (PREDICT_TRUE (p1 != NULL))
+ {
+ mt1 = pool_elt_at_index (vxm->tunnels, p1[0]);
+ goto next1; /* valid packet */
+ }
+ }
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+
+ } else /* !is_ip4 */ {
+ key6_1.src.as_u64[0] = ip6_1->src_address.as_u64[0];
+ key6_1.src.as_u64[1] = ip6_1->src_address.as_u64[1];
+ key6_1.vni = vxlan1->vni_reserved;
+
+ /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
+ if (PREDICT_FALSE (memcmp(&key6_1, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p1 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_1);
+
+ if (PREDICT_FALSE (p1 == NULL))
+ {
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ clib_memcpy (&last_key6, &key6_1, sizeof(key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (vxm->tunnels, tunnel_index1);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_1->dst_address,
+ &t1->src.ip6)))
+ goto next1; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_1->dst_address)))
+ {
+ key6_1.src.as_u64[0] = ip6_1->dst_address.as_u64[0];
+ key6_1.src.as_u64[1] = ip6_1->dst_address.as_u64[1];
+ key6_1.vni = vxlan1->vni_reserved;
+ p1 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_1);
+ if (PREDICT_TRUE (p1 != NULL))
+ {
+ mt1 = pool_elt_at_index (vxm->tunnels, p1[0]);
+ goto next1; /* valid packet */
+ }
+ }
+ error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next1 = VXLAN_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ next1:
+ next1 = t1->decap_next_index;
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next1 == VXLAN_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b1);
+
+ /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ sw_if_index1 = (mt1) ? mt1->sw_if_index : sw_if_index1;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+
+ trace1:
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ tr->vni = vnet_get_vni (vxlan1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_header_t * ip4_0;
+ ip6_header_t * ip6_0;
+ vxlan_header_t * vxlan0;
+ uword * p0;
+ u32 tunnel_index0;
+ vxlan_tunnel_t * t0, * mt0 = NULL;
+ vxlan4_tunnel_key_t key4_0;
+ vxlan6_tunnel_key_t key6_0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vxlan0 = vlib_buffer_get_current (b0);
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ ip4_0 = vlib_buffer_get_current (b0);
+ } else {
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip6_header_t)));
+ ip6_0 = vlib_buffer_get_current (b0);
+ }
+
+ /* pop (ip, udp, vxlan) */
+ if (is_ip4) {
+ vlib_buffer_advance
+ (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
+ } else {
+ vlib_buffer_advance
+ (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+
+ if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+ {
+ error0 = VXLAN_ERROR_BAD_FLAGS;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ if (is_ip4) {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.vni = vxlan0->vni_reserved;
+
+ /* Make sure unicast VXLAN tunnel exist by packet SIP and VNI */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.vni = vxlan0->vni_reserved;
+ /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
+ p0 = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (vxm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+
+ } else /* !is_ip4 */ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.vni = vxlan0->vni_reserved;
+
+ /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
+ if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ clib_memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
+
+ /* Validate VXLAN tunnel encap-fib index agaist packet */
+ if (PREDICT_FALSE (validate_vxlan_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
+ &t0->src.ip6)))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
+ {
+ key6_0.src.as_u64[0] = ip6_0->dst_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->dst_address.as_u64[1];
+ key6_0.vni = vxlan0->vni_reserved;
+ p0 = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (vxm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
+ next0 = VXLAN_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ next00:
+ next0 = t0->decap_next_index;
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE(next0 == VXLAN_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
+
+ /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->vni = vnet_get_vni (vxlan0);
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ /* Do we still need this now that tunnel tx stats is kept? */
+ vlib_node_increment_counter (vm, is_ip4?
+ vxlan4_input_node.index:vxlan6_input_node.index,
+ VXLAN_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+vxlan4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_input(vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+static uword
+vxlan6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_input(vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+static char * vxlan_error_strings[] = {
+#define vxlan_error(n,s) s,
+#include <vnet/vxlan/vxlan_error.def>
+#undef vxlan_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (vxlan4_input_node) = {
+ .function = vxlan4_input,
+ .name = "vxlan4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = VXLAN_N_ERROR,
+ .error_strings = vxlan_error_strings,
+
+ .n_next_nodes = VXLAN_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
+ foreach_vxlan_input_next
+#undef _
+ },
+
+//temp .format_buffer = format_vxlan_header,
+ .format_trace = format_vxlan_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_input_node, vxlan4_input)
+
+VLIB_REGISTER_NODE (vxlan6_input_node) = {
+ .function = vxlan6_input,
+ .name = "vxlan6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = VXLAN_N_ERROR,
+ .error_strings = vxlan_error_strings,
+
+ .n_next_nodes = VXLAN_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
+ foreach_vxlan_input_next
+#undef _
+ },
+
+//temp .format_buffer = format_vxlan_header,
+ .format_trace = format_vxlan_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_input_node, vxlan6_input)
+
+
+typedef enum {
+ IP_VXLAN_BYPASS_NEXT_DROP,
+ IP_VXLAN_BYPASS_NEXT_VXLAN,
+ IP_VXLAN_BYPASS_N_NEXT,
+} ip_vxan_bypass_next_t;
+
+always_inline uword
+ip_vxlan_bypass_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u32 is_ip4)
+{
+ vxlan_main_t * vxm = &vxlan_main;
+ u32 * from, * to_next, n_left_from, n_left_to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
+ ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ if (is_ip4) addr4.data_u32 = ~0;
+ else ip6_address_set_zero (&addr6);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ ip4_header_t * ip40, * ip41;
+ ip6_header_t * ip60, * ip61;
+ udp_header_t * udp0, * udp1;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u32 bi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, good_udp0, proto0;
+ u8 error1, good_udp1, proto1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ ip41 = vlib_buffer_get_current (b1);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ ip61 = vlib_buffer_get_current (b1);
+ }
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+ vnet_feature_next(vnet_buffer(b1)->sw_if_index[VLIB_RX], &next1, b1);
+
+ if (is_ip4)
+ {
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
+ proto1 = ip4_is_fragment(ip41) ? 0xfe : ip41->protocol;
+ }
+ else
+ {
+ proto0 = ip60->protocol;
+ proto1 = ip61->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit0; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
+ goto exit0; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit0:
+ /* Process packet 1 */
+ if (proto1 != IP_PROTOCOL_UDP)
+ goto exit1; /* not UDP packet */
+
+ if (is_ip4)
+ udp1 = ip4_next_header (ip41);
+ else
+ udp1 = ip6_next_header (ip61);
+
+ if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
+ goto exit1; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip41->dst_address.as_u32)
+ {
+ if (!hash_get (vxm->vtep4, ip41->dst_address.as_u32))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr4 = ip41->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
+ {
+ if (!hash_get_mem (vxm->vtep6, &ip61->dst_address))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr6 = ip61->dst_address;
+ }
+ }
+
+ flags1 = b1->flags;
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp1 |= udp1->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len1 = clib_net_to_host_u16 (ip41->length);
+ else
+ ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp1))
+ {
+ if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
+ else
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
+ good_udp1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next1 = error1 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b1->error = error1 ? error_node->errors[error1] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b1, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b1, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit1:
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ ip4_header_t * ip40;
+ ip6_header_t * ip60;
+ udp_header_t * udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, good_udp0, proto0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ if (is_ip4)
+ ip40 = vlib_buffer_get_current (b0);
+ else
+ ip60 = vlib_buffer_get_current (b0);
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+
+ if (is_ip4)
+ /* Treat IP4 frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
+ else
+ proto0 = ip60->protocol;
+
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
+ goto exit; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit:
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_vxlan_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) = {
+ .function = ip4_vxlan_bypass,
+ .name = "ip4-vxlan-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_vxlan_bypass_node,ip4_vxlan_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip4_vxlan_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init);
+
+static uword
+ip6_vxlan_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) = {
+ .function = ip6_vxlan_bypass,
+ .name = "ip6-vxlan-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_vxlan_bypass_node,ip6_vxlan_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip6_vxlan_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init);
diff --git a/src/vnet/vxlan/dir.dox b/src/vnet/vxlan/dir.dox
new file mode 100644
index 00000000..31a9e2b6
--- /dev/null
+++ b/src/vnet/vxlan/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief VXLAN Code.
+
+This directory contains source code to support VXLAN.
+
+*/
+/*? %%clicmd:group_label VXLAN CLI %% ?*/
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
new file mode 100644
index 00000000..87e75e5d
--- /dev/null
+++ b/src/vnet/vxlan/encap.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan/vxlan.h>
+
+/* Statistics (not all errors) */
+#define foreach_vxlan_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char * vxlan_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_encap_error
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) VXLAN_ENCAP_ERROR_##sym,
+ foreach_vxlan_encap_error
+#undef _
+ VXLAN_ENCAP_N_ERROR,
+} vxlan_encap_error_t;
+
+typedef enum {
+ VXLAN_ENCAP_NEXT_DROP,
+ VXLAN_ENCAP_N_NEXT,
+} vxlan_encap_next_t;
+
+typedef struct {
+ u32 tunnel_index;
+ u32 vni;
+} vxlan_encap_trace_t;
+
+u8 * format_vxlan_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_encap_trace_t * t
+ = va_arg (*args, vxlan_encap_trace_t *);
+
+ s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d",
+ t->tunnel_index, t->vni);
+ return s;
+}
+
+
+#define foreach_fixed_header4_offset \
+ _(0) _(1) _(2) _(3)
+
+#define foreach_fixed_header6_offset \
+ _(0) _(1) _(2) _(3) _(4) _(5) _(6)
+
+always_inline uword
+vxlan_encap_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 is_ip4, u8 csum_offload)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ vxlan_main_t * vxm = &vxlan_main;
+ vnet_main_t * vnm = vxm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_encapsulated = 0;
+ u16 old_l0 = 0, old_l1 = 0;
+ u32 thread_index = vlib_get_thread_index();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ u32 sw_if_index0 = 0, sw_if_index1 = 0;
+ u32 next0 = 0, next1 = 0;
+ vnet_hw_interface_t * hi0, * hi1;
+ vxlan_tunnel_t * t0 = NULL, * t1 = NULL;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 flow_hash0, flow_hash1;
+ u32 len0, len1;
+ ip4_header_t * ip4_0, * ip4_1;
+ ip6_header_t * ip6_0, * ip6_1;
+ udp_header_t * udp0, * udp1;
+ u64 * copy_src0, * copy_dst0;
+ u64 * copy_src1, * copy_dst1;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u32 * copy_src_last1, * copy_dst_last1;
+ u16 new_l0, new_l1;
+ ip_csum_t sum0, sum1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ flow_hash0 = vnet_l2_compute_flow_hash (b0);
+ flow_hash1 = vnet_l2_compute_flow_hash (b1);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = &vxm->tunnels[hi0->dev_instance];
+ /* Note: change to always set next0 if it may be set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ }
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index1 != vnet_buffer(b1)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+ hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1);
+ t1 = &vxm->tunnels[hi1->dev_instance];
+ /* Note: change to always set next1 if it may be set to drop */
+ next1 = t1->next_dpo.dpoi_next_node;
+ }
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = t1->next_dpo.dpoi_index;
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
+
+ if (is_ip4)
+ {
+ /* IP4 VXLAN header should be 36 octects */
+ ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+ ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+ ASSERT(vec_len(t1->rewrite) == sizeof(ip4_vxlan_header_t));
+
+ ip4_0 = vlib_buffer_get_current(b0);
+ ip4_1 = vlib_buffer_get_current(b1);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip4_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip4_1;
+ copy_src1 = (u64 *) t1->rewrite;
+ /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header4_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header4_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+ copy_src_last0 = (u32 *)(&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ copy_dst_last1 = (u32 *)(&copy_dst1[4]);
+ copy_src_last1 = (u32 *)(&copy_src1[4]);
+ copy_dst_last1[0] = copy_src_last1[0];
+
+ /* Fix the IP4 checksum and length */
+ if (csum_offload)
+ {
+ ip4_0->length = clib_host_to_net_u16
+ (vlib_buffer_length_in_chain (vm, b0));
+ b0->flags |=
+ VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4_0 - b0->data;
+ ip4_1->length = clib_host_to_net_u16
+ (vlib_buffer_length_in_chain (vm, b1));
+ b1->flags |=
+ VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4;
+ vnet_buffer (b1)->l3_hdr_offset = (u8 *) ip4_1 - b1->data;
+ }
+ else
+ {
+ sum0 = ip4_0->checksum;
+ new_l0 = /* old_l0 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ ip4_0->length = new_l0;
+ sum1 = ip4_1->checksum;
+ new_l1 = /* old_l1 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */);
+ ip4_1->checksum = ip_csum_fold (sum1);
+ ip4_1->length = new_l1;
+ }
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip4_0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0));
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+ udp1 = (udp_header_t *)(ip4_1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+ - sizeof (*ip4_1));
+ udp1->length = new_l1;
+ udp1->src_port = flow_hash1;
+
+ /* UDP checksum only if checksum offload is used */
+ if (csum_offload)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ b1->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b1)->l4_hdr_offset = (u8 *) udp1 - b1->data;
+ }
+ }
+ else /* ipv6 */
+ {
+ int bogus = 0;
+
+ /* IP6 VXLAN header should be 56 octects */
+ ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+ ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+ ASSERT(vec_len(t1->rewrite) == sizeof(ip6_vxlan_header_t));
+ ip6_0 = vlib_buffer_get_current(b0);
+ ip6_1 = vlib_buffer_get_current(b1);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip6_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip6_1;
+ copy_src1 = (u64 *) t1->rewrite;
+ /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header6_offset;
+#undef _
+ /* Fix IP6 payload length */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof(*ip6_0));
+ ip6_0->payload_length = new_l0;
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof(*ip6_1));
+ ip6_1->payload_length = new_l1;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip6_0+1);
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+ udp1 = (udp_header_t *)(ip6_1+1);
+ udp1->length = new_l1;
+ udp1->src_port = flow_hash1;
+
+ /* IPv6 UDP checksum is mandatory */
+ if (csum_offload)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6_0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ b1->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b1)->l3_hdr_offset = (u8 *) ip6_1 - b1->data;
+ vnet_buffer (b1)->l4_hdr_offset = (u8 *) udp1 - b1->data;
+ }
+ else
+ {
+ udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
+ (vm, b0, ip6_0, &bogus);
+ ASSERT(bogus == 0);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
+ (vm, b1, ip6_1, &bogus);
+ ASSERT(bogus == 0);
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
+ }
+
+ pkts_encapsulated += 2;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE ((sw_if_index0 != stats_sw_if_index) ||
+ (sw_if_index1 != stats_sw_if_index)))
+ {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+ if (sw_if_index0 == sw_if_index1)
+ {
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_sw_if_index = sw_if_index0;
+ stats_n_packets = 2;
+ stats_n_bytes = len0 + len1;
+ }
+ else
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index0, 1, len0);
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index1, 1, len1);
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - vxm->tunnels;
+ tr->vni = t0->vni;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - vxm->tunnels;
+ tr->vni = t1->vni;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 flow_hash0;
+ u32 len0;
+ ip4_header_t * ip4_0;
+ ip6_header_t * ip6_0;
+ udp_header_t * udp0;
+ u64 * copy_src0, * copy_dst0;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u16 new_l0;
+ ip_csum_t sum0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ flow_hash0 = vnet_l2_compute_flow_hash(b0);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = &vxm->tunnels[hi0->dev_instance];
+ /* Note: change to always set next0 if it may be set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ }
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+
+ if (is_ip4)
+ {
+ /* IP4 VXLAN header should be 36 octects */
+ ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+ ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+ ip4_0 = vlib_buffer_get_current(b0);
+
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip4_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header4_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+ copy_src_last0 = (u32 *)(&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+
+ /* Fix the IP4 checksum and length */
+ if (csum_offload)
+ {
+ ip4_0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ b0->flags |=
+ VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4_0 - b0->data;
+ }
+ else
+ {
+ sum0 = ip4_0->checksum;
+ new_l0 = /* old_l0 always 0, see the rewrite setup */
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ ip4_0->length = new_l0;
+ }
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip4_0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+ - sizeof (*ip4_0));
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+
+ /* UDP checksum only if checksum offload is used */
+ if (csum_offload)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ }
+ }
+
+ else /* ip6 path */
+ {
+ int bogus = 0;
+
+ /* IP6 VXLAN header should be 56 octects */
+ ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+ ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+ ip6_0 = vlib_buffer_get_current(b0);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip6_0;
+ copy_src0 = (u64 *) t0->rewrite;
+ /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header6_offset;
+#undef _
+ /* Fix IP6 payload length */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof(*ip6_0));
+ ip6_0->payload_length = new_l0;
+
+ /* Fix UDP length and set source port */
+ udp0 = (udp_header_t *)(ip6_0+1);
+ udp0->length = new_l0;
+ udp0->src_port = flow_hash0;
+
+ /* IPv6 UDP checksum is mandatory */
+ if (csum_offload)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6_0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ }
+ else
+ {
+ udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
+ (vm, b0, ip6_0, &bogus);
+ ASSERT(bogus == 0);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+ }
+
+ pkts_encapsulated ++;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - vxm->tunnels;
+ tr->vni = t0->vni;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Do we still need this now that tunnel tx stats is kept? */
+ vlib_node_increment_counter (vm, node->node_index,
+ VXLAN_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+vxlan4_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ /* Disable chksum offload as setup overhead in tx node is not worthwhile
+ for ip4 header checksum only, unless udp checksum is also required */
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1,
+ /* csum_offload */ 0);
+}
+
+static uword
+vxlan6_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ /* Enable checksum offload for ip6 as udp checksum is mandatory, */
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0,
+ /* csum_offload */ 1);
+}
+
+VLIB_REGISTER_NODE (vxlan4_encap_node) = {
+ .function = vxlan4_encap,
+ .name = "vxlan4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+ .error_strings = vxlan_encap_error_strings,
+ .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+ .next_nodes = {
+ [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_encap_node, vxlan4_encap)
+
+VLIB_REGISTER_NODE (vxlan6_encap_node) = {
+ .function = vxlan6_encap,
+ .name = "vxlan6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+ .error_strings = vxlan_encap_error_strings,
+ .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+ .next_nodes = {
+ [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_encap_node, vxlan6_encap)
+
diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api
new file mode 100644
index 00000000..6c331a58
--- /dev/null
+++ b/src/vnet/vxlan/vxlan.api
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ define vxlan_add_del_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+};
+
+define vxlan_add_del_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define vxlan_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+define vxlan_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 src_address[16];
+ u8 dst_address[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+ u8 is_ipv6;
+};
+
+/** \brief Interface set vxlan-bypass request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_vxlan_bypass
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 enable;
+};
diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c
new file mode 100644
index 00000000..dc973372
--- /dev/null
+++ b/src/vnet/vxlan/vxlan.c
@@ -0,0 +1,1113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vxlan/vxlan.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/interface.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief VXLAN.
+ *
+ * VXLAN provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN
+ * (Virtual eXtensible VLAN) segment.
+ */
+
+
+vxlan_main_t vxlan_main;
+
+static u8 * format_decap_next (u8 * s, va_list * args)
+{
+ u32 next_index = va_arg (*args, u32);
+
+ switch (next_index)
+ {
+ case VXLAN_INPUT_NEXT_DROP:
+ return format (s, "drop");
+ case VXLAN_INPUT_NEXT_L2_INPUT:
+ return format (s, "l2");
+ default:
+ return format (s, "index %d", next_index);
+ }
+ return s;
+}
+
+u8 * format_vxlan_tunnel (u8 * s, va_list * args)
+{
+ vxlan_tunnel_t * t = va_arg (*args, vxlan_tunnel_t *);
+ vxlan_main_t * ngm = &vxlan_main;
+
+ s = format (s, "[%d] src %U dst %U vni %d sw_if_index %d ",
+ t - ngm->tunnels,
+ format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY,
+ t->vni, t->sw_if_index);
+
+ if (ip46_address_is_multicast (&t->dst))
+ s = format (s, "mcast_sw_if_index %d ", t->mcast_sw_if_index);
+
+ s = format (s, "encap_fib_index %d fib_entry_index %d decap_next %U\n",
+ t->encap_fib_index, t->fib_entry_index,
+ format_decap_next, t->decap_next_index);
+ return s;
+}
+
+static u8 * format_vxlan_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "vxlan_tunnel%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return /* no error */ 0;
+}
+
+VNET_DEVICE_CLASS (vxlan_device_class,static) = {
+ .name = "VXLAN",
+ .format_device_name = format_vxlan_name,
+ .format_tx_trace = format_vxlan_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = vxlan_interface_admin_up_down,
+};
+
+static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
+ .name = "VXLAN",
+ .format_header = format_vxlan_header_with_length,
+ .build_rewrite = default_build_rewrite,
+};
+
+static void
+vxlan_tunnel_restack_dpo(vxlan_tunnel_t * t)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ u32 encap_index = ip46_address_is_ip4(&t->dst) ?
+ vxlan4_encap_node.index : vxlan6_encap_node.index;
+ fib_forward_chain_type_t forw_type = ip46_address_is_ip4(&t->dst) ?
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset(&dpo);
+}
+
+static vxlan_tunnel_t *
+vxlan_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
+#endif
+ return ((vxlan_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(vxlan_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of VXLAN DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+vxlan_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ vxlan_tunnel_restack_dpo(vxlan_tunnel_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+vxlan_tunnel_fib_node_get (fib_node_index_t index)
+{
+ vxlan_tunnel_t * t;
+ vxlan_main_t * vxm = &vxlan_main;
+
+ t = pool_elt_at_index(vxm->tunnels, index);
+
+ return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The VXLAN tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by VXLAN tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_vft = {
+ .fnv_get = vxlan_tunnel_fib_node_get,
+ .fnv_last_lock = vxlan_tunnel_last_lock_gone,
+ .fnv_back_walk = vxlan_tunnel_back_walk,
+};
+
+
+#define foreach_copy_field \
+_(vni) \
+_(mcast_sw_if_index) \
+_(encap_fib_index) \
+_(decap_next_index) \
+_(src) \
+_(dst)
+
+static int
+vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
+{
+ union {
+ ip4_vxlan_header_t * h4;
+ ip6_vxlan_header_t * h6;
+ u8 *rw;
+ } r = { .rw = 0 };
+ int len = is_ip6 ? sizeof *r.h6 : sizeof *r.h4;
+
+ vec_validate_aligned (r.rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ udp_header_t * udp;
+ vxlan_header_t * vxlan;
+ /* Fixed portion of the (outer) ip header */
+ if (!is_ip6)
+ {
+ ip4_header_t * ip = &r.h4->ip4;
+ udp = &r.h4->udp, vxlan = &r.h4->vxlan;
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip4;
+ ip->dst_address = t->dst.ip4;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip->checksum = ip4_header_checksum (ip);
+ }
+ else
+ {
+ ip6_header_t * ip = &r.h6->ip6;
+ udp = &r.h6->udp, vxlan = &r.h6->vxlan;
+ ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
+ ip->hop_limit = 255;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip6;
+ ip->dst_address = t->dst.ip6;
+ }
+
+ /* UDP header, randomize src port on something, maybe? */
+ udp->src_port = clib_host_to_net_u16 (4789);
+ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan);
+
+ /* VXLAN header */
+ vnet_set_vni_and_flags(vxlan, t->vni);
+
+ t->rewrite = r.rw;
+ return (0);
+}
+
+static bool
+vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6, u32 decap_next_index)
+{
+ vlib_main_t * vm = vxm->vlib_main;
+ u32 input_idx = (!is_ip6) ? vxlan4_input_node.index : vxlan6_input_node.index;
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
+
+ return decap_next_index < r->n_next_nodes;
+}
+
+static void
+hash_set_key_copy (uword ** h, void * key, uword v) {
+ size_t ksz = hash_header(*h)->user;
+ void * copy = clib_mem_alloc (ksz);
+ clib_memcpy (copy, key, ksz);
+ hash_set_mem (*h, copy, v);
+}
+
+static void
+hash_unset_key_free (uword ** h, void * key) {
+ hash_pair_t * hp = hash_get_pair_mem (*h, key);
+ ASSERT (hp);
+ key = uword_to_pointer (hp->key, void *);
+ hash_unset_mem (*h, key);
+ clib_mem_free (key);
+}
+
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_main.vtep6, &ip->ip6);
+ if (vtep)
+ return ++(*vtep);
+ ip46_address_is_ip4(ip) ?
+ hash_set (vxlan_main.vtep4, ip->ip4.as_u32, 1) :
+ hash_set_key_copy (&vxlan_main.vtep6, &ip->ip6, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_main.vtep6, &ip->ip6);
+ ASSERT(vtep);
+ if (--(*vtep) != 0)
+ return *vtep;
+ ip46_address_is_ip4(ip) ?
+ hash_unset (vxlan_main.vtep4, ip->ip4.as_u32) :
+ hash_unset_key_free (&vxlan_main.vtep6, &ip->ip6);
+ return 0;
+}
+
+typedef CLIB_PACKED(union {
+ struct {
+ fib_node_index_t mfib_entry_index;
+ adj_index_t mcast_adj_index;
+ };
+ u64 as_u64;
+}) mcast_shared_t;
+
+static inline mcast_shared_t
+mcast_shared_get(ip46_address_t * ip)
+{
+ ASSERT(ip46_address_is_multicast(ip));
+ uword * p = hash_get_mem (vxlan_main.mcast_shared, ip);
+ ASSERT(p);
+ return (mcast_shared_t) { .as_u64 = *p };
+}
+
+static inline void
+mcast_shared_add(ip46_address_t *dst,
+ fib_node_index_t mfei,
+ adj_index_t ai)
+{
+ mcast_shared_t new_ep = {
+ .mcast_adj_index = ai,
+ .mfib_entry_index = mfei,
+ };
+
+ hash_set_key_copy (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove(ip46_address_t *dst)
+{
+ mcast_shared_t ep = mcast_shared_get(dst);
+
+ adj_unlock(ep.mcast_adj_index);
+ mfib_table_entry_delete_index(ep.mfib_entry_index,
+ MFIB_SOURCE_VXLAN);
+
+ hash_unset_key_free (&vxlan_main.mcast_shared, dst);
+}
+
+static inline fib_protocol_t
+fib_ip_proto(bool is_ip6)
+{
+ return (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
+}
+
+int vnet_vxlan_add_del_tunnel
+(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+{
+ vxlan_main_t * vxm = &vxlan_main;
+ vxlan_tunnel_t *t = 0;
+ vnet_main_t * vnm = vxm->vnet_main;
+ uword * p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ int rv;
+ vxlan4_tunnel_key_t key4;
+ vxlan6_tunnel_key_t key6;
+ u32 is_ip6 = a->is_ip6;
+
+ if (!is_ip6)
+ {
+ key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
+ key4.vni = clib_host_to_net_u32 (a->vni << 8);
+ p = hash_get (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ }
+ else
+ {
+ key6.src = a->dst.ip6;
+ key6.vni = clib_host_to_net_u32 (a->vni << 8);
+ p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6);
+ }
+
+ if (a->is_add)
+ {
+ l2input_main_t * l2im = &l2input_main;
+
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ /*if not set explicitly, default to l2 */
+ if(a->decap_next_index == ~0)
+ a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
+ if (!vxlan_decap_next_is_valid(vxm, is_ip6, a->decap_next_index))
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_copy_field;
+#undef _
+
+ rv = vxlan_rewrite (t, is_ip6);
+ if (rv)
+ {
+ pool_put (vxm->tunnels, t);
+ return rv;
+ }
+
+ /* copy the key */
+ if (is_ip6)
+ hash_set_key_copy (&vxm->vxlan6_tunnel_by_key, &key6, t - vxm->tunnels);
+ else
+ hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
+
+ vnet_hw_interface_t * hi;
+ if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t * im = &vnm->interface_main;
+ hw_if_index = vxm->free_vxlan_tunnel_hw_if_indices
+ [vec_len (vxm->free_vxlan_tunnel_hw_if_indices)-1];
+ _vec_len (vxm->free_vxlan_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - vxm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock(im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
+ vlib_zero_simple_counter
+ (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
+ vnet_interface_counter_unlock(im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, vxlan_device_class.index, t - vxm->tunnels,
+ vxlan_hw_class.index, t - vxm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ vxm->tunnel_index_by_sw_if_index[sw_if_index] = t - vxm->tunnels;
+
+ /* setup l2 input config with l2 feature and bd 0 to drop packet */
+ vec_validate (l2im->configs, sw_if_index);
+ l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+ l2im->configs[sw_if_index].bd_index = 0;
+
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ fib_node_init(&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
+ fib_prefix_t tun_dst_pfx;
+ u32 encap_index = !is_ip6 ?
+ vxlan4_encap_node.index : vxlan6_encap_node.index;
+ vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
+
+ fib_prefix_from_ip46_addr(&t->dst, &tun_dst_pfx);
+ if (!ip46_address_is_multicast(&t->dst))
+ {
+ /* Unicast tunnel -
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ vtep_addr_ref(&t->src);
+ t->fib_entry_index = fib_table_entry_special_add
+ (t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ t->sibling_index = fib_entry_child_add
+ (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
+ vxlan_tunnel_restack_dpo(t);
+ }
+ else
+ {
+ /* Multicast tunnel -
+ * as the same mcast group can be used for mutiple mcast tunnels
+ * with different VNIs, create the output fib adjecency only if
+ * it does not already exist
+ */
+ fib_protocol_t fp = fib_ip_proto(is_ip6);
+
+ if (vtep_addr_ref(&t->dst) == 1)
+ {
+ fib_node_index_t mfei;
+ adj_index_t ai;
+ fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo(fp),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ const mfib_prefix_t mpfx = {
+ .fp_proto = fp,
+ .fp_len = (is_ip6 ? 128 : 32),
+ .fp_grp_addr = tun_dst_pfx.fp_addr,
+ };
+
+ /*
+ * Setup the (*,G) to receive traffic on the mcast group
+ * - the forwarding interface is for-us
+ * - the accepting interface is that from the API
+ */
+ mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN,
+ &path,
+ MFIB_ITF_FLAG_FORWARD);
+
+ path.frp_sw_if_index = a->mcast_sw_if_index;
+ path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ mfei = mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN,
+ &path,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ /*
+ * Create the mcast adjacency to send traffic to the group
+ */
+ ai = adj_mcast_add_or_lock(fp,
+ fib_proto_to_link(fp),
+ a->mcast_sw_if_index);
+
+ /*
+ * create a new end-point
+ */
+ mcast_shared_add(&t->dst, mfei, ai);
+ }
+
+ dpo_id_t dpo = DPO_INVALID;
+ mcast_shared_t ep = mcast_shared_get(&t->dst);
+
+ /* Stack shared mcast dst mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY_MCAST,
+ fib_proto_to_dpo(fp),
+ ep.mcast_adj_index);
+
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
+ }
+
+ /* Set vxlan tunnel output node */
+ hi->output_node_index = encap_index;
+
+ vnet_get_sw_interface (vnet_get_main(), sw_if_index)->flood_class = flood_class;
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (vxm->tunnels, p[0]);
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, t->sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
+
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode(vxm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0, 0);
+ vec_add1 (vxm->free_vxlan_tunnel_hw_if_indices, t->hw_if_index);
+
+ vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+
+ if (!is_ip6)
+ hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ else
+ hash_unset_key_free (&vxm->vxlan6_tunnel_by_key, &key6);
+
+ if (!ip46_address_is_multicast(&t->dst))
+ {
+ vtep_addr_unref(&t->src);
+ fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
+ fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
+ }
+ else if (vtep_addr_unref(&t->dst) == 0)
+ {
+ mcast_shared_remove(&t->dst);
+ }
+
+ fib_node_deinit(&t->node);
+ vec_free (t->rewrite);
+ pool_put (vxm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static uword get_decap_next_for_node(u32 node_index, u32 ipv4_set)
+{
+ vxlan_main_t * vxm = &vxlan_main;
+ vlib_main_t * vm = vxm->vlib_main;
+ uword input_node = (ipv4_set) ? vxlan4_input_node.index :
+ vxlan6_input_node.index;
+
+ return vlib_node_add_next (vm, input_node, node_index);
+}
+
+static uword unformat_decap_next (unformat_input_t * input, va_list * args)
+{
+ u32 * result = va_arg (*args, u32 *);
+ u32 ipv4_set = va_arg (*args, int);
+ vxlan_main_t * vxm = &vxlan_main;
+ vlib_main_t * vm = vxm->vlib_main;
+ u32 node_index;
+ u32 tmp;
+
+ if (unformat (input, "l2"))
+ *result = VXLAN_INPUT_NEXT_L2_INPUT;
+ else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
+ *result = get_decap_next_for_node(node_index, ipv4_set);
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ ip46_address_t src , dst;
+ u8 is_add = 1;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 encap_fib_index = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
+ u32 vni = 0;
+ u32 tmp;
+ int rv;
+ vnet_vxlan_add_del_tunnel_args_t _a, * a = &_a;
+ u32 tunnel_sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ memset(&src, 0, sizeof src);
+ memset(&dst, 0, sizeof dst);
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "src %U",
+ unformat_ip4_address, &src.ip4))
+ {
+ src_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst %U",
+ unformat_ip4_address, &dst.ip4))
+ {
+ dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "src %U",
+ unformat_ip6_address, &src.ip6))
+ {
+ src_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "dst %U",
+ unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &dst.ip4,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &dst.ip6,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp);
+ if (encap_fib_index == ~0)
+ {
+ error = clib_error_return (0, "nonexistent encap-vrf-id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index, ipv4_set))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ {
+ if (vni >> 24)
+ {
+ error = clib_error_return (0, "vni %d out of range", vni);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (src_set == 0)
+ {
+ error = clib_error_return (0, "tunnel src address not specified");
+ goto done;
+ }
+
+ if (dst_set == 0)
+ {
+ error = clib_error_return (0, "tunnel dst address not specified");
+ goto done;
+ }
+
+ if (grp_set && !ip46_address_is_multicast(&dst))
+ {
+ error = clib_error_return (0, "tunnel group address not multicast");
+ goto done;
+ }
+
+ if (grp_set == 0 && ip46_address_is_multicast(&dst))
+ {
+ error = clib_error_return (0, "dst address must be unicast");
+ goto done;
+ }
+
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "tunnel nonexistent multicast device");
+ goto done;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ goto done;
+ }
+
+ if (ip46_address_cmp(&src, &dst) == 0)
+ {
+ error = clib_error_return (0, "src and dst addresses are identical");
+ goto done;
+ }
+
+ if (decap_next_index == ~0)
+ {
+ error = clib_error_return (0, "next node not found");
+ goto done;
+ }
+
+ if (vni == 0)
+ {
+ error = clib_error_return (0, "vni not specified");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+ a->is_ip6 = ipv6_set;
+
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ rv = vnet_vxlan_add_del_tunnel (a, &tunnel_sw_if_index);
+
+ switch(rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main(), tunnel_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a VXLAN Tunnel.
+ *
+ * VXLAN provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN
+ * (Virtual eXtensible VLAN) segment.
+ *
+ * @cliexpar
+ * Example of how to create a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7}
+ * Example of how to delete a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
+ .path = "create vxlan tunnel",
+ .short_help =
+ "create vxlan tunnel src <local-vtep-addr>"
+ " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
+ " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del]",
+ .function = vxlan_add_del_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_vxlan_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_main_t * vxm = &vxlan_main;
+ vxlan_tunnel_t * t;
+
+ if (pool_elts (vxm->tunnels) == 0)
+ vlib_cli_output (vm, "No vxlan tunnels configured...");
+
+ pool_foreach (t, vxm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
+ }));
+
+ return 0;
+}
+
+/*?
+ * Display all the VXLAN Tunnel entries.
+ *
+ * @cliexpar
+ * Example of how to display the VXLAN Tunnel entries:
+ * @cliexstart{show vxlan tunnel}
+ * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
+ .path = "show vxlan tunnel",
+ .short_help = "show vxlan tunnel",
+ .function = show_vxlan_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+
+void vnet_int_vxlan_bypass_mode (u32 sw_if_index,
+ u8 is_ip6,
+ u8 is_enable)
+{
+ if (is_ip6)
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
+ sw_if_index, is_enable, 0, 0);
+ else
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
+ sw_if_index, is_enable, 0, 0);
+}
+
+
+static clib_error_t *
+set_ip_vxlan_bypass (u32 is_ip6,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+ done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_vxlan_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_bypass (0, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
+ * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
+ * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
+ * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
+ * cause extra overhead to for non-vxlan packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip4-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-bypass}
+ * Name Next Previous
+ * ip4-vxlan-bypass error-drop [0]
+ * vxlan4-input [1]
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip4-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip4-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-bypass}
+ * Name Next Previous
+ * ip4-vxlan-bypass error-drop [0] ip4-input
+ * vxlan4-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv4 unicast:
+ * ip4-vxlan-bypass
+ * ip4-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip4-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
+ .path = "set interface ip vxlan-bypass",
+ .function = set_ip4_vxlan_bypass,
+ .short_help = "set interface ip vxlan-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_vxlan_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_bypass (1, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
+ * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
+ * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
+ * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
+ * cause extra overhead to for non-vxlan packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip6-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-bypass}
+ * Name Next Previous
+ * ip6-vxlan-bypass error-drop [0]
+ * vxlan6-input [1]
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip6-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip6-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-bypass}
+ * Name Next Previous
+ * ip6-vxlan-bypass error-drop [0] ip6-input
+ * vxlan6-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv6 unicast:
+ * ip6-vxlan-bypass
+ * ip6-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip6-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
+ .path = "set interface ip6 vxlan-bypass",
+ .function = set_ip6_vxlan_bypass,
+ .short_help = "set interface ip vxlan-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+clib_error_t *vxlan_init (vlib_main_t *vm)
+{
+ vxlan_main_t * vxm = &vxlan_main;
+
+ vxm->vnet_main = vnet_get_main();
+ vxm->vlib_main = vm;
+
+ /* initialize the ip6 hash */
+ vxm->vxlan6_tunnel_by_key = hash_create_mem(0,
+ sizeof(vxlan6_tunnel_key_t),
+ sizeof(uword));
+ vxm->vtep6 = hash_create_mem(0,
+ sizeof(ip6_address_t),
+ sizeof(uword));
+ vxm->mcast_shared = hash_create_mem(0,
+ sizeof(ip46_address_t),
+ sizeof(mcast_shared_t));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_vxlan,
+ vxlan4_input_node.index, /* is_ip4 */ 1);
+ udp_register_dst_port (vm, UDP_DST_PORT_vxlan6,
+ vxlan6_input_node.index, /* is_ip4 */ 0);
+
+ fib_node_register_type(FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(vxlan_init);
diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h
new file mode 100644
index 00000000..dca1cd12
--- /dev/null
+++ b/src/vnet/vxlan/vxlan.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_vxlan_h
+#define included_vnet_vxlan_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan/vxlan_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_header_t vxlan; /* 8 bytes */
+}) ip4_vxlan_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6; /* 40 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_header_t vxlan; /* 8 bytes */
+}) ip6_vxlan_header_t;
+
+typedef CLIB_PACKED(struct {
+ /*
+ * Key fields: ip src and vxlan vni on incoming VXLAN packet
+ * all fields in NET byte order
+ */
+ union {
+ struct {
+ u32 src;
+ u32 vni; /* shifted left 8 bits */
+ };
+ u64 as_u64;
+ };
+}) vxlan4_tunnel_key_t;
+
+typedef CLIB_PACKED(struct {
+ /*
+ * Key fields: ip src and vxlan vni on incoming VXLAN packet
+ * all fields in NET byte order
+ */
+ ip6_address_t src;
+ u32 vni; /* shifted left 8 bits */
+}) vxlan6_tunnel_key_t;
+
+typedef struct {
+ /* Rewrite string. $$$$ embed vnet_rewrite header */
+ u8 * rewrite;
+
+ /* FIB DPO for IP forwarding of VXLAN encap packet */
+ dpo_id_t next_dpo;
+
+ /* vxlan VNI in HOST byte order */
+ u32 vni;
+
+ /* tunnel src and dst addresses */
+ ip46_address_t src;
+ ip46_address_t dst;
+
+ /* mcast packet output intfc index (used only if dst is mcast) */
+ u32 mcast_sw_if_index;
+
+ /* decap next index */
+ u32 decap_next_index;
+
+ /* The FIB index for src/dst addresses */
+ u32 encap_fib_index;
+
+ /* vnet intfc index */
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /*
+ * The FIB entry for (depending on VXLAN tunnel is unicast or mcast)
+ * sending unicast VXLAN encap packets or receiving mcast VXLAN packets
+ */
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its desintion. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+} vxlan_tunnel_t;
+
+#define foreach_vxlan_input_next \
+_(DROP, "error-drop") \
+_(L2_INPUT, "l2-input")
+
+typedef enum {
+#define _(s,n) VXLAN_INPUT_NEXT_##s,
+ foreach_vxlan_input_next
+#undef _
+ VXLAN_INPUT_N_NEXT,
+} vxlan_input_next_t;
+
+typedef enum {
+#define vxlan_error(n,s) VXLAN_ERROR_##n,
+#include <vnet/vxlan/vxlan_error.def>
+#undef vxlan_error
+ VXLAN_N_ERROR,
+} vxlan_input_error_t;
+
+typedef struct {
+ /* vector of encap tunnel instances */
+ vxlan_tunnel_t * tunnels;
+
+ /* lookup tunnel by key */
+ uword * vxlan4_tunnel_by_key; /* keyed on ipv4.dst + vni */
+ uword * vxlan6_tunnel_by_key; /* keyed on ipv6.dst + vni */
+
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
+ uword * vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
+ uword * vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+
+ /* mcast shared info */
+ uword * mcast_shared; /* keyed on mcast ip46 addr */
+
+ /* Free vlib hw_if_indices */
+ u32 * free_vxlan_tunnel_hw_if_indices;
+
+ /* Mapping from sw_if_index to tunnel index */
+ u32 * tunnel_index_by_sw_if_index;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} vxlan_main_t;
+
+vxlan_main_t vxlan_main;
+
+extern vlib_node_registration_t vxlan4_input_node;
+extern vlib_node_registration_t vxlan6_input_node;
+extern vlib_node_registration_t vxlan4_encap_node;
+extern vlib_node_registration_t vxlan6_encap_node;
+
+u8 * format_vxlan_encap_trace (u8 * s, va_list * args);
+
+typedef struct {
+ u8 is_add;
+
+ /* we normally use is_ip4, but since this adds to the
+ * structure, this seems less of abreaking change */
+ u8 is_ip6;
+ ip46_address_t src, dst;
+ u32 mcast_sw_if_index;
+ u32 encap_fib_index;
+ u32 decap_next_index;
+ u32 vni;
+} vnet_vxlan_add_del_tunnel_args_t;
+
+int vnet_vxlan_add_del_tunnel
+(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+void vnet_int_vxlan_bypass_mode
+(u32 sw_if_index, u8 is_ip6, u8 is_enable);
+#endif /* included_vnet_vxlan_h */
diff --git a/src/vnet/vxlan/vxlan_api.c b/src/vnet/vxlan/vxlan_api.c
new file mode 100644
index 00000000..a2d41232
--- /dev/null
+++ b/src/vnet/vxlan/vxlan_api.c
@@ -0,0 +1,240 @@
+/*
+ *------------------------------------------------------------------
+ * vxlan_api.c - vxlan api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/vxlan/vxlan.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \
+_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \
+_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump)
+
+static void
+ vl_api_sw_interface_set_vxlan_bypass_t_handler
+ (vl_api_sw_interface_set_vxlan_bypass_t * mp)
+{
+ vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_int_vxlan_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY);
+}
+
+static void vl_api_vxlan_add_del_tunnel_t_handler
+ (vl_api_vxlan_add_del_tunnel_t * mp)
+{
+ vl_api_vxlan_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ ip4_main_t *im = &ip4_main;
+
+ uword *p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ vnet_vxlan_add_del_tunnel_args_t a = {
+ .is_add = mp->is_add,
+ .is_ip6 = mp->is_ipv6,
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .encap_fib_index = p[0],
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .vni = ntohl (mp->vni),
+ .dst = to_ip46 (mp->is_ipv6, mp->dst_address),
+ .src = to_ip46 (mp->is_ipv6, mp->src_address),
+ };
+
+ /* Check src & dst are different */
+ if (ip46_address_cmp (&a.dst, &a.src) == 0)
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_vxlan_tunnel_details
+ (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_vxlan_tunnel_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_VXLAN_TUNNEL_DETAILS);
+ if (is_ipv6)
+ {
+ memcpy (rmp->src_address, t->src.ip6.as_u8, 16);
+ memcpy (rmp->dst_address, t->dst.ip6.as_u8, 16);
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+ }
+ else
+ {
+ memcpy (rmp->src_address, t->src.ip4.as_u8, 4);
+ memcpy (rmp->dst_address, t->dst.ip4.as_u8, 4);
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ }
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->is_ipv6 = is_ipv6;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void vl_api_vxlan_tunnel_dump_t_handler
+ (vl_api_vxlan_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, vxm->tunnels,
+ ({
+ send_vxlan_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_tunnel_details (t, q, mp->context);
+ }
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_vxlan;
+#undef _
+}
+
+static clib_error_t *
+vxlan_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ am->api_trace_cfg[VL_API_VXLAN_ADD_DEL_TUNNEL].size += 16 * sizeof (u32);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vxlan_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vxlan/vxlan_error.def b/src/vnet/vxlan/vxlan_error.def
new file mode 100644
index 00000000..17f90595
--- /dev/null
+++ b/src/vnet/vxlan/vxlan_error.def
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+vxlan_error (DECAPSULATED, "good packets decapsulated")
+vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets")
+vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header")
diff --git a/src/vnet/vxlan/vxlan_packet.h b/src/vnet/vxlan/vxlan_packet.h
new file mode 100644
index 00000000..5f93a36f
--- /dev/null
+++ b/src/vnet/vxlan/vxlan_packet.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_packet_h__
+#define __included_vxlan_packet_h__ 1
+
+/*
+ * From RFC-7384
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|R|R|R| Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * VXLAN Header: This is an 8-byte field that has:
+ *
+ * - Flags (8 bits): where the I flag MUST be set to 1 for a valid
+ * VXLAN Network ID (VNI). The other 7 bits (designated "R") are
+ * reserved fields and MUST be set to zero on transmission and
+ * ignored on receipt.
+ *
+ * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a
+ * 24-bit value used to designate the individual VXLAN overlay
+ * network on which the communicating VMs are situated. VMs in
+ * different VXLAN overlay networks cannot communicate with each
+ * other.
+ *
+ * - Reserved fields (24 bits and 8 bits): MUST be set to zero on
+ * transmission and ignored on receipt.
+ *
+ */
+
+typedef struct {
+ u8 flags;
+ u8 res1;
+ u8 res2;
+ u8 res3;
+ u32 vni_reserved;
+} vxlan_header_t;
+
+#define VXLAN_FLAGS_I 0x08
+
+static inline u32 vnet_get_vni (vxlan_header_t * h)
+{
+ u32 vni_reserved_host_byte_order;
+
+ vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
+ return vni_reserved_host_byte_order >> 8;
+}
+
+static inline void vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni)
+{
+ h->vni_reserved = clib_host_to_net_u32 (vni<<8);
+ * (u32 *) h = 0;
+ h->flags = VXLAN_FLAGS_I;
+}
+
+#endif /* __included_vxlan_packet_h__ */
diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am
new file mode 100644
index 00000000..9465dc68
--- /dev/null
+++ b/src/vpp-api-test.am
@@ -0,0 +1,62 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+libvatplugin_la_LIBADD = libvppinfra.la
+libvatplugin_la_DEPENDENCIES = libvppinfra.la
+libvatplugin_la_SOURCES = \
+ vat/plugin_api.c
+
+lib_LTLIBRARIES += libvatplugin.la
+
+bin_PROGRAMS += vpp_api_test vpp_json_test vpp_restart
+
+vpp_api_test_SOURCES = \
+ vat/api_format.c \
+ vat/main.c \
+ vat/plugin.c \
+ vat/vat.h \
+ vat/json_format.h \
+ vat/json_format.c
+
+vpp_json_test_SOURCES = \
+ vat/json_format.h \
+ vat/json_format.c \
+ vat/json_test.c
+
+vpp_api_test_LDADD = \
+ libvlibmemoryclient.la \
+ libsvm.la \
+ libvatplugin.la \
+ libvppinfra.la \
+ -lpthread -lm -lrt -ldl -lcrypto
+
+vpp_api_test_LDFLAGS = -Wl,--export-dynamic
+
+vpp_json_test_LDADD = libvppinfra.la -lm
+vpp_json_test_LDFLAGS = -Wl,--export-dynamic
+
+nobase_include_HEADERS += \
+ vat/vat.h \
+ vat/json_format.h
+
+vpp_restart_SOURCES = \
+ vat/restart.c
+vpp_restart_LDADD = \
+ libsvmdb.la \
+ libsvm.la \
+ libvppinfra.la \
+ -lpthread -lrt
+
+dist_bin_SCRIPTS += scripts/vppctl
+
+# vi:syntax=automake
diff --git a/src/vpp-api.am b/src/vpp-api.am
new file mode 100644
index 00000000..553eafa8
--- /dev/null
+++ b/src/vpp-api.am
@@ -0,0 +1,46 @@
+# Copyright (c) 2017 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# VPP API C wrapper extension
+#
+lib_LTLIBRARIES += libvppapiclient.la
+libvppapiclient_la_SOURCES = \
+ vpp-api/client/client.c \
+ vpp-api/client/libvppapiclient.map
+
+libvppapiclient_la_LIBADD = \
+ -lpthread -lm -lrt
+
+libvppapiclient_la_LDFLAGS = \
+ -Wl,-L$(top_builddir)/.libs,--whole-archive,-l:libsvm.a,-l:libvppinfra.a,-l:libvlibmemoryclient.a,--no-whole-archive \
+ -Wl,--version-script=$(srcdir)/vpp-api/client/libvppapiclient.map,-lrt
+
+libvppapiclient_la_DEPENDENCIES = libvppinfra.la libvlibmemoryclient.la libsvm.la
+
+libvppapiclient_la_CPPFLAGS =
+
+nobase_include_HEADERS += vpp-api/client/vppapiclient.h
+
+#
+# Test client
+#
+if ENABLE_TESTS
+noinst_PROGRAMS += vac_test
+vac_test_SOURCES = vpp-api/client/test.c
+vac_test_LDADD = \
+ $(builddir)/libvppapiclient.la \
+ -lpthread -lm -lrt
+endif
+
+# vi:syntax=automake
diff --git a/src/vpp-api/client/client.c b/src/vpp-api/client/client.c
new file mode 100644
index 00000000..8bdcda01
--- /dev/null
+++ b/src/vpp-api/client/client.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#include "vppapiclient.h"
+
+/*
+ * Asynchronous mode:
+ * Client registers a callback. All messages are sent to the callback.
+ * Synchronous mode:
+ * Client calls blocking read().
+ * Clients are expected to collate events on a queue.
+ * vac_write() -> suspends RX thread
+ * vac_read() -> resumes RX thread
+ */
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+typedef struct {
+ u8 connected_to_vlib;
+ pthread_t rx_thread_handle;
+ pthread_t timeout_thread_handle;
+ pthread_mutex_t queue_lock;
+ pthread_cond_t suspend_cv;
+ pthread_cond_t resume_cv;
+ pthread_mutex_t timeout_lock;
+ pthread_cond_t timeout_cv;
+ pthread_cond_t timeout_cancel_cv;
+ pthread_cond_t terminate_cv;
+} vac_main_t;
+
+vac_main_t vac_main;
+vac_callback_t vac_callback;
+u16 read_timeout = 0;
+bool rx_is_running = false;
+
+static void
+init (void)
+{
+ vac_main_t *pm = &vac_main;
+ memset(pm, 0, sizeof(*pm));
+ pthread_mutex_init(&pm->queue_lock, NULL);
+ pthread_cond_init(&pm->suspend_cv, NULL);
+ pthread_cond_init(&pm->resume_cv, NULL);
+ pthread_mutex_init(&pm->timeout_lock, NULL);
+ pthread_cond_init(&pm->timeout_cv, NULL);
+ pthread_cond_init(&pm->timeout_cancel_cv, NULL);
+ pthread_cond_init(&pm->terminate_cv, NULL);
+}
+
+static void
+cleanup (void)
+{
+ vac_main_t *pm = &vac_main;
+ pthread_cond_destroy(&pm->suspend_cv);
+ pthread_cond_destroy(&pm->resume_cv);
+ pthread_cond_destroy(&pm->timeout_cv);
+ pthread_cond_destroy(&pm->timeout_cancel_cv);
+ pthread_cond_destroy(&pm->terminate_cv);
+ pthread_mutex_destroy(&pm->queue_lock);
+ pthread_mutex_destroy(&pm->timeout_lock);
+ memset (pm, 0, sizeof (*pm));
+}
+
+/*
+ * Satisfy external references when -lvlib is not available.
+ */
+void vlib_cli_output (struct vlib_main_t * vm, char * fmt, ...)
+{
+ clib_warning ("vlib_cli_output called...");
+}
+
+void
+vac_free (void * msg)
+{
+ vl_msg_api_free (msg);
+}
+
+static void
+vac_api_handler (void *msg)
+{
+ u16 id = ntohs(*((u16 *)msg));
+ msgbuf_t *msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data));
+ int l = ntohl(msgbuf->data_len);
+ if (l == 0)
+ clib_warning("Message ID %d has wrong length: %d\n", id, l);
+
+ /* Call Python callback */
+ ASSERT(vac_callback);
+ (vac_callback)(msg, l);
+ vac_free(msg);
+}
+
+static void *
+vac_rx_thread_fn (void *arg)
+{
+ unix_shared_memory_queue_t *q;
+ vac_main_t *pm = &vac_main;
+ api_main_t *am = &api_main;
+ uword msg;
+
+ q = am->vl_input_queue;
+
+ while (1)
+ while (!unix_shared_memory_queue_sub(q, (u8 *)&msg, 0))
+ {
+ u16 id = ntohs(*((u16 *)msg));
+ switch (id) {
+ case VL_API_RX_THREAD_EXIT:
+ vl_msg_api_free((void *) msg);
+ /* signal waiting threads that this thread is about to terminate */
+ pthread_mutex_lock(&pm->queue_lock);
+ pthread_cond_signal(&pm->terminate_cv);
+ pthread_mutex_unlock(&pm->queue_lock);
+ pthread_exit(0);
+ return 0;
+ break;
+
+ case VL_API_MEMCLNT_RX_THREAD_SUSPEND:
+ vl_msg_api_free((void * )msg);
+ /* Suspend thread and signal reader */
+ pthread_mutex_lock(&pm->queue_lock);
+ pthread_cond_signal(&pm->suspend_cv);
+ /* Wait for the resume signal */
+ pthread_cond_wait (&pm->resume_cv, &pm->queue_lock);
+ pthread_mutex_unlock(&pm->queue_lock);
+ break;
+
+ case VL_API_MEMCLNT_READ_TIMEOUT:
+ clib_warning("Received read timeout in async thread\n");
+ vl_msg_api_free((void *) msg);
+ break;
+
+ default:
+ vac_api_handler((void *)msg);
+ }
+ }
+}
+
+static void *
+vac_timeout_thread_fn (void *arg)
+{
+ vl_api_memclnt_read_timeout_t *ep;
+ vac_main_t *pm = &vac_main;
+ api_main_t *am = &api_main;
+ struct timespec ts;
+ struct timeval tv;
+ u16 timeout;
+ int rv;
+
+ while (1)
+ {
+ /* Wait for poke */
+ pthread_mutex_lock(&pm->timeout_lock);
+ pthread_cond_wait (&pm->timeout_cv, &pm->timeout_lock);
+ timeout = read_timeout;
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec + timeout;
+ ts.tv_nsec = 0;
+ rv = pthread_cond_timedwait (&pm->timeout_cancel_cv,
+ &pm->timeout_lock, &ts);
+ pthread_mutex_unlock(&pm->timeout_lock);
+ if (rv == ETIMEDOUT)
+ {
+ ep = vl_msg_api_alloc (sizeof (*ep));
+ ep->_vl_msg_id = ntohs(VL_API_MEMCLNT_READ_TIMEOUT);
+ vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
+ }
+ }
+ pthread_exit(0);
+}
+
+void
+vac_rx_suspend (void)
+{
+ api_main_t *am = &api_main;
+ vac_main_t *pm = &vac_main;
+ vl_api_memclnt_rx_thread_suspend_t *ep;
+
+ if (!pm->rx_thread_handle) return;
+ pthread_mutex_lock(&pm->queue_lock);
+ if (rx_is_running)
+ {
+ ep = vl_msg_api_alloc (sizeof (*ep));
+ ep->_vl_msg_id = ntohs(VL_API_MEMCLNT_RX_THREAD_SUSPEND);
+ vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
+ /* Wait for RX thread to tell us it has suspendend */
+ pthread_cond_wait(&pm->suspend_cv, &pm->queue_lock);
+ rx_is_running = false;
+ }
+ pthread_mutex_unlock(&pm->queue_lock);
+}
+
+void
+vac_rx_resume (void)
+{
+ vac_main_t *pm = &vac_main;
+ if (!pm->rx_thread_handle) return;
+ pthread_mutex_lock(&pm->queue_lock);
+ if (rx_is_running) goto unlock;
+ pthread_cond_signal(&pm->resume_cv);
+ rx_is_running = true;
+ unlock:
+ pthread_mutex_unlock(&pm->queue_lock);
+}
+
+static uword *
+vac_msg_table_get_hash (void)
+{
+ api_main_t *am = &api_main;
+ return (am->msg_index_by_name_and_crc);
+}
+
+int
+vac_msg_table_size(void)
+{
+ api_main_t *am = &api_main;
+ return hash_elts(am->msg_index_by_name_and_crc);
+}
+
+int
+vac_connect (char * name, char * chroot_prefix, vac_callback_t cb,
+ int rx_qlen)
+{
+ int rv = 0;
+ vac_main_t *pm = &vac_main;
+
+ init();
+ if (chroot_prefix != NULL)
+ vl_set_memory_root_path (chroot_prefix);
+
+ if ((rv = vl_client_api_map("/vpe-api"))) {
+ clib_warning ("vl_client_api map rv %d", rv);
+ return rv;
+ }
+
+ if (vl_client_connect(name, 0, rx_qlen) < 0) {
+ vl_client_api_unmap();
+ return (-1);
+ }
+
+ if (cb) {
+ /* Start the rx queue thread */
+ rv = pthread_create(&pm->rx_thread_handle, NULL, vac_rx_thread_fn, 0);
+ if (rv) {
+ clib_warning("pthread_create returned %d", rv);
+ vl_client_api_unmap();
+ return (-1);
+ }
+ vac_callback = cb;
+ rx_is_running = true;
+ }
+
+ /* Start read timeout thread */
+ rv = pthread_create(&pm->timeout_thread_handle, NULL,
+ vac_timeout_thread_fn, 0);
+ if (rv) {
+ clib_warning("pthread_create returned %d", rv);
+ vl_client_api_unmap();
+ return (-1);
+ }
+
+ pm->connected_to_vlib = 1;
+
+ return (0);
+}
+
+int
+vac_disconnect (void)
+{
+ api_main_t *am = &api_main;
+ vac_main_t *pm = &vac_main;
+
+ if (!pm->connected_to_vlib) return 0;
+
+ if (pm->rx_thread_handle) {
+ vl_api_rx_thread_exit_t *ep;
+ uword junk;
+ ep = vl_msg_api_alloc (sizeof (*ep));
+ ep->_vl_msg_id = ntohs(VL_API_RX_THREAD_EXIT);
+ vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
+
+ /* wait (with timeout) until RX thread has finished */
+ struct timespec ts;
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec + 5;
+ ts.tv_nsec = 0;
+ pthread_mutex_lock(&pm->queue_lock);
+ int rv = pthread_cond_timedwait(&pm->terminate_cv, &pm->queue_lock, &ts);
+ pthread_mutex_unlock(&pm->queue_lock);
+ /* now join so we wait until thread has -really- finished */
+ if (rv == ETIMEDOUT)
+ pthread_cancel(pm->rx_thread_handle);
+ else
+ pthread_join(pm->rx_thread_handle, (void **) &junk);
+ }
+ if (pm->timeout_thread_handle)
+ pthread_cancel(pm->timeout_thread_handle);
+
+ vl_client_disconnect();
+ vl_client_api_unmap();
+ vac_callback = 0;
+
+ cleanup();
+
+ return (0);
+}
+
+static void
+set_timeout (unsigned short timeout)
+{
+ vac_main_t *pm = &vac_main;
+ pthread_mutex_lock(&pm->timeout_lock);
+ read_timeout = timeout;
+ pthread_cond_signal(&pm->timeout_cv);
+ pthread_mutex_unlock(&pm->timeout_lock);
+}
+
+static void
+unset_timeout (void)
+{
+ vac_main_t *pm = &vac_main;
+ pthread_mutex_lock(&pm->timeout_lock);
+ pthread_cond_signal(&pm->timeout_cancel_cv);
+ pthread_mutex_unlock(&pm->timeout_lock);
+}
+
+int
+vac_read (char **p, int *l, u16 timeout)
+{
+ unix_shared_memory_queue_t *q;
+ api_main_t *am = &api_main;
+ vac_main_t *pm = &vac_main;
+ uword msg;
+ msgbuf_t *msgbuf;
+
+ if (!pm->connected_to_vlib) return -1;
+
+ *l = 0;
+
+ if (am->our_pid == 0) return (-1);
+
+ /* Poke timeout thread */
+ if (timeout)
+ set_timeout(timeout);
+
+ q = am->vl_input_queue;
+ int rv = unix_shared_memory_queue_sub(q, (u8 *)&msg, 0);
+ if (rv == 0) {
+ u16 msg_id = ntohs(*((u16 *)msg));
+ switch (msg_id) {
+ case VL_API_RX_THREAD_EXIT:
+ printf("Received thread exit\n");
+ return -1;
+ case VL_API_MEMCLNT_RX_THREAD_SUSPEND:
+ printf("Received thread suspend\n");
+ goto error;
+ case VL_API_MEMCLNT_READ_TIMEOUT:
+ printf("Received read timeout %ds\n", timeout);
+ goto error;
+
+ default:
+ msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data));
+ *l = ntohl(msgbuf->data_len);
+ if (*l == 0) {
+ printf("Unregistered API message: %d\n", msg_id);
+ goto error;
+ }
+ }
+ *p = (char *)msg;
+
+ /* Let timeout notification thread know we're done */
+ unset_timeout();
+
+ } else {
+ printf("Read failed with %d\n", rv);
+ }
+ return (rv);
+
+ error:
+ vl_msg_api_free((void *) msg);
+ /* Client might forget to resume RX thread on failure */
+ vac_rx_resume ();
+ return -1;
+}
+
+/*
+ * XXX: Makes the assumption that client_index is the first member
+ */
+typedef VL_API_PACKED(struct _vl_api_header {
+ u16 _vl_msg_id;
+ u32 client_index;
+}) vl_api_header_t;
+
+static unsigned int
+vac_client_index (void)
+{
+ return (api_main.my_client_index);
+}
+
+int
+vac_write (char *p, int l)
+{
+ int rv = -1;
+ api_main_t *am = &api_main;
+ vl_api_header_t *mp = vl_msg_api_alloc(l);
+ unix_shared_memory_queue_t *q;
+ vac_main_t *pm = &vac_main;
+
+ if (!pm->connected_to_vlib) return -1;
+ if (!mp) return (-1);
+
+ memcpy(mp, p, l);
+ mp->client_index = vac_client_index();
+ q = am->shmem_hdr->vl_input_queue;
+ rv = unix_shared_memory_queue_add(q, (u8 *)&mp, 0);
+ if (rv != 0) {
+ clib_warning("vpe_api_write fails: %d\n", rv);
+ /* Clear message */
+ vac_free(mp);
+ }
+ return (rv);
+}
+
+int
+vac_get_msg_index (unsigned char * name)
+{
+ return vl_api_get_msg_index (name);
+}
+
+int
+vac_msg_table_max_index(void)
+{
+ int max = 0;
+ hash_pair_t *hp;
+ uword *h = vac_msg_table_get_hash();
+ hash_foreach_pair (hp, h,
+ ({
+ if (hp->value[0] > max)
+ max = hp->value[0];
+ }));
+
+ return max;
+}
+
+void
+vac_set_error_handler (vac_error_callback_t cb)
+{
+ if (cb) clib_error_register_handler (cb, 0);
+}
diff --git a/src/vpp-api/client/libvppapiclient.map b/src/vpp-api/client/libvppapiclient.map
new file mode 100644
index 00000000..a9d8f7dd
--- /dev/null
+++ b/src/vpp-api/client/libvppapiclient.map
@@ -0,0 +1,19 @@
+
+VPPAPICLIENT_17.07 {
+ global:
+ vac_read;
+ vac_write;
+ vac_connect;
+ vac_disconnect;
+ vac_set_error_handler;
+ vac_msg_table_max_index;
+ vac_get_msg_index;
+ vac_rx_suspend;
+ vac_rx_resume;
+ vac_free;
+ vac_msg_table_size;
+
+ api_main;
+
+ local: *;
+};
diff --git a/src/vpp-api/client/test.c b/src/vpp-api/client/test.c
new file mode 100644
index 00000000..020115d9
--- /dev/null
+++ b/src/vpp-api/client/test.c
@@ -0,0 +1,140 @@
+/*
+ *------------------------------------------------------------------
+ * test.c
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include <time.h> /* time_t, time (for timestamp in second) */
+#include <sys/timeb.h> /* ftime, timeb (for timestamp in millisecond) */
+#include <sys/time.h> /* gettimeofday, timeval (for timestamp in microsecond) */
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+#include <signal.h>
+#include "vppapiclient.h"
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* we are not linking with vlib */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+volatile int sigterm_received = 0;
+volatile u32 result_ready;
+volatile u16 result_msg_id;
+
+/* M_NOALLOC: construct, but don't yet send a message */
+
+#define M_NOALLOC(T,t) \
+ do { \
+ result_ready = 0; \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T); \
+ mp->client_index = am->my_client_index; \
+ } while(0);
+
+
+
+int
+wrap_vac_callback (char *data, int len)
+{
+ //printf("Callback %d\n", len);
+ result_ready = 1;
+ result_msg_id = ntohs(*((u16 *)data));
+ return (0);
+}
+
+int main (int argc, char ** argv)
+{
+ api_main_t * am = &api_main;
+ vl_api_show_version_t message;
+ vl_api_show_version_t *mp;
+ int async = 1;
+ int rv = vac_connect("vac_client", NULL, NULL, 32 /* rx queue-length*/);
+
+ if (rv != 0) {
+ printf("Connect failed: %d\n", rv);
+ exit(rv);
+ }
+
+ struct timeb timer_msec;
+ long long int timestamp_msec_start; /* timestamp in millisecond. */
+ if (!ftime(&timer_msec)) {
+ timestamp_msec_start = ((long long int) timer_msec.time) * 1000ll +
+ (long long int) timer_msec.millitm;
+ }
+ else {
+ timestamp_msec_start = -1;
+ }
+
+
+ /*
+ * Test vpe_api_write and vpe_api_read to send and recv message for an
+ * API
+ */
+ int i;
+ long int no_msgs = 10000;
+ mp = &message;
+
+ for (i = 0; i < no_msgs; i++) {
+ /* Construct the API message */
+ M_NOALLOC(SHOW_VERSION, show_version);
+ vac_write((char *)mp, sizeof(*mp));
+#ifndef __COVERITY__
+ /* As given, async is always 1. Shut up Coverity about it */
+ if (!async)
+ while (result_ready == 0);
+#endif
+ }
+ if (async) {
+ vl_api_control_ping_t control;
+ vl_api_control_ping_t *mp;
+ mp = &control;
+ M_NOALLOC(CONTROL_PING, control_ping);
+ vac_write((char *)mp, sizeof(*mp));
+
+ while (result_msg_id != VL_API_CONTROL_PING_REPLY);
+ }
+
+ long long int timestamp_msec_end; /* timestamp in millisecond. */
+ if (!ftime(&timer_msec)) {
+ timestamp_msec_end = ((long long int) timer_msec.time) * 1000ll +
+ (long long int) timer_msec.millitm;
+ }
+ else {
+ timestamp_msec_end = -1;
+ }
+
+ printf("Took %lld msec, %lld msgs/msec \n", (timestamp_msec_end - timestamp_msec_start),
+ no_msgs/(timestamp_msec_end - timestamp_msec_start));
+ printf("Exiting...\n");
+ vac_disconnect();
+ exit (0);
+}
diff --git a/src/vpp-api/client/vppapiclient.h b/src/vpp-api/client/vppapiclient.h
new file mode 100644
index 00000000..839ec1f8
--- /dev/null
+++ b/src/vpp-api/client/vppapiclient.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vppapiclient_h
+#define included_vppapiclient_h
+
+#include <stdint.h>
+
+typedef void (*vac_callback_t)(unsigned char * data, int len);
+typedef void (*vac_error_callback_t)(void *, unsigned char *, int);
+int vac_connect(char * name, char * chroot_prefix, vac_callback_t cb,
+ int rx_qlen);
+int vac_disconnect(void);
+int vac_read(char **data, int *l, unsigned short timeout);
+int vac_write(char *data, int len);
+void vac_free(void * msg);
+
+int vac_get_msg_index(unsigned char * name);
+int vac_msg_table_size(void);
+int vac_msg_table_max_index(void);
+
+void vac_rx_suspend (void);
+void vac_rx_resume (void);
+void vac_set_error_handler(vac_error_callback_t);
+#endif
diff --git a/src/vpp-api/java/Makefile.am b/src/vpp-api/java/Makefile.am
new file mode 100644
index 00000000..637bb774
--- /dev/null
+++ b/src/vpp-api/java/Makefile.am
@@ -0,0 +1,263 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTOMAKE_OPTIONS = foreign
+ACLOCAL_AMFLAGS = -I m4
+AM_LIBTOOLFLAGS = --quiet
+
+AM_CFLAGS = -Wall -I${top_srcdir} -I${top_builddir} \
+ -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux \
+ -I@top_srcdir@/plugins -I@top_builddir@/plugins
+
+AM_LDFLAGS = -shared -avoid-version -rpath /none -no-undefined
+
+BUILT_SOURCES =
+bin_PROGRAMS =
+noinst_LTLIBRARIES =
+JAR_FILES =
+CLEANDIRS =
+
+#
+# jvpp-common
+#
+
+nobase_include_HEADERS = \
+ jvpp-common/jvpp_common.h
+
+lib_LTLIBRARIES = libjvpp_common.la
+libjvpp_common_la_SOURCES = jvpp-common/jvpp_common.c
+libjvpp_common_la_LDFLAGS = shared -rpath /none -no-undefined
+
+JVPP_LIBS = \
+ libjvpp_common.la \
+ $(top_builddir)/libvppinfra.la \
+ $(top_builddir)/libvlibmemoryclient.la \
+ $(top_builddir)/libsvm.la \
+ -lpthread -lm -lrt
+
+#
+# jvpp-registry (connection management + plugin registry)
+#
+
+noinst_LTLIBRARIES += libjvpp_registry.la
+
+libjvpp_registry_la_SOURCES = jvpp-registry/jvpp_registry.c
+libjvpp_registry_la_CPPFLAGS = -Ijvpp-registry
+libjvpp_registry_la_LIBADD = $(JVPP_LIBS)
+libjvpp_registry_la_DEPENDENCIES = libjvpp_common.la
+
+packagedir_jvpp_registry = io/fd/vpp/jvpp
+jvpp_registry_src_files := \
+ $(wildcard @srcdir@/jvpp-registry/$(packagedir_jvpp_registry)/*.java) \
+ $(wildcard @srcdir@/jvpp-registry/$(packagedir_jvpp_registry)/**/*.java)
+
+BUILT_SOURCES += jvpp-registry/io_fd_vpp_jvpp_VppJNIConnection.h
+CLEANDIRS += jvpp-registry/target
+JAR_FILES += jvpp-registry-$(PACKAGE_VERSION).jar
+
+jvpp_registry_ok = jvpp-registry/io_fd_vpp_jvpp_VppJNIConnection.h
+
+jvpp-registry/io_fd_vpp_jvpp_VppJNIConnection.h: $(jvpp_registry_src_files)
+ @echo " JAPIGEN $@"
+ @rm -rf jvpp-registry/target
+ @mkdir -p jvpp-registry/target
+ @$(JAVAC) -d jvpp-registry/target $^
+ @$(JAVAH) -force -classpath jvpp-registry/target -d jvpp-registry io.fd.vpp.jvpp.VppJNIConnection
+ @$(JAVAH) -force -classpath jvpp-registry/target -d jvpp-registry io.fd.vpp.jvpp.JVppRegistryImpl
+ @touch jvpp-registry.ok
+
+define japigen
+ @echo " JAPIGEN $@"
+ @rm -rf jvpp-$(1)/target
+ @ @srcdir@/jvpp/gen/jvpp_gen.py --plugin_name $(1) --root_dir jvpp-$(1) \
+ -i $(jvpp_$(1)_json_files) > /dev/null
+ @find jvpp-$(1)/target -name \*.java > jvpp-$(1).generated.files
+ @find @srcdir@/jvpp-$(1) -name \*.java > jvpp-$(1).static.files
+ @$(JAVAC) -classpath jvpp-registry/target \
+ -d jvpp-$(1)/target @jvpp-$(1).generated.files @jvpp-$(1).static.files
+ @$(JAVAH) -force \
+ -classpath jvpp-registry/target:jvpp-$(1)/target \
+ -d jvpp-$(1) io.fd.vpp.jvpp.$(1).$(2)
+endef
+
+#
+# jvpp-core (Java wrapper for vpe.api)
+#
+noinst_LTLIBRARIES += libjvpp_core.la
+libjvpp_core_la_SOURCES = jvpp-core/jvpp_core.c jvpp-core/jvpp_core_gen.h
+libjvpp_core_la_CPPFLAGS = -Ijvpp-registry -Ijvpp-core
+libjvpp_core_la_LIBADD = $(JVPP_LIBS)
+libjvpp_core_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-core/io_fd_vpp_jvpp_core_JVppCoreImpl.h
+JAR_FILES += jvpp-core-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-core/target
+jvpp_core_json_files = $(shell find @top_builddir@/vnet/ -type f -name '*.api.json')
+jvpp_core_json_files += @top_builddir@/vpp/api/vpe.api.json
+
+jvpp-core/io_fd_vpp_jvpp_core_JVppCoreImpl.h: $(jvpp_registry_ok) $(jvpp_core_json_files)
+ $(call japigen,core,JVppCoreImpl)
+
+#
+# ACL Plugin
+#
+if ENABLE_ACL_PLUGIN
+noinst_LTLIBRARIES += libjvpp_acl.la
+libjvpp_acl_la_SOURCES = jvpp-acl/jvpp_acl.c
+libjvpp_acl_la_CPPFLAGS = -Ijvpp-acl
+libjvpp_acl_la_LIBADD = $(JVPP_LIBS)
+libjvpp_acl_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-acl/io_fd_vpp_jvpp_acl_JVppAclImpl.h
+JAR_FILES += jvpp-acl-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-acl/target
+
+jvpp_acl_json_files = @top_builddir@/plugins/acl/acl.api.json
+
+jvpp-acl/io_fd_vpp_jvpp_acl_JVppAclImpl.h: $(jvpp_registry_ok) $(jvpp_acl_json_files)
+ $(call japigen,acl,JVppAclImpl)
+endif
+
+#
+# GTPU Plugin
+#
+if ENABLE_GTPU_PLUGIN
+noinst_LTLIBRARIES += libjvpp_gtpu.la
+libjvpp_gtpu_la_SOURCES = jvpp-gtpu/jvpp_gtpu.c
+libjvpp_gtpu_la_CPPFLAGS = -Ijvpp-gtpu
+libjvpp_gtpu_la_LIBADD = $(JVPP_LIBS)
+libjvpp_gtpu_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-gtpu/io_fd_vpp_jvpp_gtpu_JVppGtpuImpl.h
+JAR_FILES += jvpp-gtpu-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-gtpu/target
+
+jvpp_gtpu_json_files = @top_builddir@/plugins/gtpu/gtpu.api.json
+
+jvpp-gtpu/io_fd_vpp_jvpp_gtpu_JVppGtpuImpl.h: $(jvpp_registry_ok) $(jvpp_gtpu_json_files)
+ $(call japigen,gtpu,JVppGtpuImpl)
+endif
+
+#
+# PPPOE Plugin
+#
+if ENABLE_PPPOE_PLUGIN
+noinst_LTLIBRARIES += libjvpp_pppoe.la
+libjvpp_pppoe_la_SOURCES = jvpp-pppoe/jvpp_pppoe.c
+libjvpp_pppoe_la_CPPFLAGS = -Ijvpp-pppoe
+libjvpp_pppoe_la_LIBADD = $(JVPP_LIBS)
+libjvpp_pppoe_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-pppoe/io_fd_vpp_jvpp_pppoe_JVppPppoeImpl.h
+JAR_FILES += jvpp-pppoe-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-pppoe/target
+
+jvpp_pppoe_json_files = @top_builddir@/plugins/pppoe/pppoe.api.json
+
+jvpp-pppoe/io_fd_vpp_jvpp_pppoe_JVppPppoeImpl.h: $(jvpp_registry_ok) $(jvpp_pppoe_json_files)
+ $(call japigen,pppoe,JVppPppoeImpl)
+endif
+
+#
+# NAT Plugin
+#
+if ENABLE_NAT_PLUGIN
+noinst_LTLIBRARIES += libjvpp_nat.la
+libjvpp_nat_la_SOURCES = jvpp-nat/jvpp_nat.c
+libjvpp_nat_la_CPPFLAGS = -Ijvpp-nat
+libjvpp_nat_la_LIBADD = $(JVPP_LIBS)
+libjvpp_nat_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-nat/io_fd_vpp_jvpp_nat_JVppNatImpl.h
+JAR_FILES += jvpp-nat-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-nat/target
+
+jvpp_nat_json_files = @top_builddir@/plugins/nat/nat.api.json
+
+jvpp-nat/io_fd_vpp_jvpp_nat_JVppNatImpl.h: $(jvpp_registry_ok) $(jvpp_nat_json_files)
+ $(call japigen,nat,JVppNatImpl)
+endif
+
+#
+# iOAM Trace Plugin
+#
+if ENABLE_IOAM_PLUGIN
+noinst_LTLIBRARIES += libjvpp_ioamtrace.la
+libjvpp_ioamtrace_la_SOURCES = jvpp-ioamtrace/jvpp_ioam_trace.c
+libjvpp_ioamtrace_la_LIBADD = $(JVPP_LIBS)
+libjvpp_ioamtrace_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-ioamtrace/io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl.h
+JAR_FILES += jvpp-ioamtrace-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-ioamtrace/target
+
+jvpp_ioamtrace_json_files = @top_builddir@/plugins/ioam/lib-trace/trace.api.json
+
+jvpp-ioamtrace/io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl.h: $(jvpp_registry_ok) $(jvpp_ioamtrace_json_files)
+ $(call japigen,ioamtrace,JVppIoamtraceImpl)
+
+#
+# iOAM POT Plugin
+#
+noinst_LTLIBRARIES += libjvpp_ioampot.la
+libjvpp_ioampot_la_SOURCES = jvpp-ioampot/jvpp_ioam_pot.c
+libjvpp_ioampot_la_LIBADD = $(JVPP_LIBS)
+libjvpp_ioampot_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-ioampot/io_fd_vpp_jvpp_ioampot_JVppIoampotImpl.h
+JAR_FILES += jvpp-ioampot-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-ioampot/target
+
+jvpp_ioampot_json_files = @top_builddir@/plugins/ioam/lib-pot/pot.api.json
+
+jvpp-ioampot/io_fd_vpp_jvpp_ioampot_JVppIoampotImpl.h: $(jvpp_registry_ok) $(jvpp_ioampot_json_files)
+ $(call japigen,ioampot,JVppIoampotImpl)
+
+#
+# iOAM Export Plugin
+#
+noinst_LTLIBRARIES += libjvpp_ioamexport.la
+libjvpp_ioamexport_la_SOURCES = jvpp-ioamexport/jvpp_ioam_export.c
+libjvpp_ioamexport_la_LIBADD = $(JVPP_LIBS)
+libjvpp_ioamexport_la_DEPENDENCIES = libjvpp_common.la
+
+BUILT_SOURCES += jvpp-ioamexport/io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl.h
+JAR_FILES += jvpp-ioamexport-$(PACKAGE_VERSION).jar
+CLEANDIRS += jvpp-ioamexport/target
+
+jvpp_ioamexport_json_files = @top_builddir@/plugins/ioam/export/ioam_export.api.json
+
+jvpp-ioamexport/io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl.h: $(jvpp_registry_ok) $(jvpp_ioamexport_json_files)
+ $(call japigen,ioamexport,JVppIoamexportImpl)
+endif
+
+#
+# JAR creation
+#
+jvpp-%-$(PACKAGE_VERSION).jar: libjvpp_%.la
+ @echo " JAR $@"
+ @cp .libs/libjvpp_$*.so jvpp-$*/target
+ @$(JAR) cf $(JARFLAGS) $@ -C jvpp-$*/target .
+
+jardir = $(prefix)/share/java
+jar_DATA = $(JAR_FILES)
+
+all-local: $(JAR_FILES)
+
+#
+# Cleanup
+#
+CLEANFILES = jvpp-registry.ok $(JAR_FILES) $(BUILT_SOURCES) *.files */*.h
+
+clean-local:
+ rm -rf $(CLEANDIRS)
diff --git a/src/vpp-api/java/Readme.txt b/src/vpp-api/java/Readme.txt
new file mode 100644
index 00000000..689b9b37
--- /dev/null
+++ b/src/vpp-api/java/Readme.txt
@@ -0,0 +1,236 @@
+= JVpp
+
+JVpp is JNI based Java API for VPP.
+
+== Features
+It is:
+
+* Asynchronous
+* Fully generated
+* Lightweight
+
+== Architecture
+
+=== Plugin support
+
+ /-------------\ /--------------\ /---------------\
+ | JvppPlugin1 +<-------+ JVppRegistry +--------->+ VppConnection |
+ \-------------/ inits \--+-----------/ uses \---------------/
+ |
+ /-------------\ |
+ | JvppPlugin2 +<----------+ inits
+ \-------------/ |
+ |
+ ... |
+ |
+ /----------\ |
+ | JVppCore +<-------------+
+ \----------/
+
+
+VppRegistry opens connection to vpp (VppConnection) and manages jvpp plugins.
+Each plugin needs to be registered in the VppRegistry. Registration involves
+plugin initialization (providing JNI implementation with JVppCallback reference,
+vpp client identifier and vpp shared memory queue address).
+
+API user sends message by calling a method of appropriate plugin interface.
+The call is delegated to JNI implementation provided by the particular plugin.
+When JNI code receives reply, it invokes callback method of JVppCallback
+that corresponds to the received message reply.
+
+=== JVppCore as an example of JVpp plugin architecture
+
+ JVpp Java
+
+ /--------------\ /----------\ /------------\ /------\
+ | JVppRegistry | | JVppCore | | Callbacks | | DTOs |
+ \----+---------/ \----+-----/ \------+-----/ \------/
+ ^ ^ ^
+ | implements | implements | implements
+ /----+--------------\ /---+----------\ /-----+---------\
+ | JVppRegistryImpl* +-------->+ JVppCoreImpl | | JVppCallback |
+ \-------+-----------/ inits \---+----------/ \-------+-------/
+ | | ^
+ | | uses | calls back
+ | | |
+----------|--------------------------|-----------------------|---------------------
+ | | |
+ C JNI | +-------------------+ | /-----------------\
+ v | | +-->+ jvpp_core_gen.h |
+ /--------+--------\ | | | \-----------------/
+ | jpp_registry.c* +---+ /--------+----+----\ | | |
+ \-----------------/ | | << shared lib >> | /-+--+---+------\
+ + ->+ jvpp_common* <--------+ jvpp_core.c* |
+ uses \------------------/ uses \---------------/
+
+
+* Components marked with an asterisk contain manually crafted code, which in addition
+to generated classes form jvpp. Exception applies to Callbacks and DTOs, since there are
+manually crafted marker interfaces in callback and dto package (dto/JVppRequest, dto/JVppReply,
+dto/JVppDump, dto/JVppReplyDump, callback/JVppCallback)
+
+Note: jvpp_core.c calls back the JVppCallback instance with every response. An instance of the
+JVppCallback is provided to jvpp_core.c by JVppRegistryImpl on JVppCoreImpl initialization.
+
+Part of the JVpp is also Future facade. It is asynchronous API returning Future objects
+on top of low level JVpp. It wraps dump reply messages in one DTO using control_ping message
+(provided by JVppRegistry).
+
+
+Future facade
+
+ /----------------\ /---------------\
+ | FutureJVppCore | +-->+ JVppRegistry* |
+ \-----+----------/ | \---------------/
+ ^ |
+ | implements | uses
+ | |
+ /--------+-------------\ | /------------------------------\
+ | FutureJVppCoreFacade +---+--->+ FutureJVppCoreFacadeCallback |
+ \---------+------------/ uses \-------+----------------------/
+ | |
+---------------|-----------------------------|-------------------------------
+ | uses | implements
+JVpp Java | |
+ | |
+ /----------\ | |
+ | JVppCore +<-+ |
+ \----+-----/ |
+ ^ |
+ | implements v
+ /----+---------\ /--------+---------------\
+ | JVppCoreImpl | | JVppCoreGlobalCallback |
+ \--------------/ \------------------------/
+
+
+
+Another useful utility of the JVpp is Callback facade. It is asynchronous API
+capable of calling specific callback instance (provided when performing a call)
+per call.
+
+
+Callback facade
+
+ /------------------\ /---------------\
+ | CallbackJVppCore | +-->+ JVppRegistry* |
+ \-----+------------/ | \---------------/
+ ^ |
+ | implements | uses
+ | |
+ /--------+---------------\ | /--------------------------\
+ | CallbackJVppCoreFacade +---+--->+ CallbackJVppCoreCallback |
+ \---------+--------------/ uses \-----+--------------------/
+ | |
+---------------|-----------------------------|-------------------------------
+ | uses | implements
+JVpp Java | |
+ | |
+ /----------\ | |
+ | JVppCore +<-+ |
+ \----+-----/ |
+ ^ |
+ | implements v
+ /----+---------\ /----------+-------------\
+ | JVppCoreImpl | | JVppCoreGlobalCallback |
+ \--------------/ \------------------------/
+
+
+== Package structure
+
+* *io.fd.vpp.jvpp* - top level package for generated JVpp interface+ implementation and hand-crafted
+VppConnection interface + implementation - packaged as jvpp-registry-version.jar
+
+* *io.fd.vpp.jvpp.[plugin]* - top level package for generated JVpp interface + implementation
++ plugin's API tests - packaged as jvpp-[plugin]-version.jar
+
+** *dto* - package for DTOs generated from VPP API structures + base/marker hand-crafted interfaces
+(in case of jvpp-registry)
+** *callback* - package for low-level JVpp callbacks and a global callback interface implementing each of
+the low-level JVppcallbacks
+** *future* - package for future based facade on top of JVpp and callbacks
+** *callfacade* - package for callback based facade on top of JVpp and callbacks. Allowing
+users to provide callback per request
+** *test* - package for JVpp standalone tests. Can also serve as samples for JVpp.
+
+C code is structured into modules:
+
+* *jvpp_common* - shared library that provides jvpp_main_t reference used by jvpp_registry and plugins.
+
+* *jvpp_registry* - native library used by JVppRegistryImpl, responsible for:
+
+** VPP connection open/close
+** Rx thread to java thread attach
+** control ping message handling
+
+* *jvpp_core* - native library used by jvpp core plugin:
+** *jvpp_core.c* - contains hand crafted code for core plugin initialization
+** *jvpp_core_gen.h* - contains generated JNI compatible handlers for all requests and replies defined in vpe.api
+
+== Code generators
+All of the required code except the base/marker interfaces is generated using
+simple python2 code generators. The generators use __defs_vpp_papi.py__ input
+file produced by __vppapigen__ from vpe.api file.
+
+=== JNI compatible C code
+Produces __jvpp_[plugin]_gen.h__ file containing JNI compatible handlers for each VPP
+request and reply.
+
+[NOTE]
+====
+Source: jvpp_c_gen.py
+====
+
+=== Request/Reply DTOs
+For all the structures in __defs_vpp_papi.py__ a POJO DTO is produced. Logically,
+there are 4 types of DTOs:
+
+* Request - requests that can be sent to VPP and only a single response is expected
+* DumpRequest - requests that can be sent to VPP and a stream of responses is expected
+* Reply - reply to a simple request or a single response from dump triggered response stream
+* ReplyDump - collection of replies from a single dump request
+* Notifications/Events - Not implemented yet
+
+[NOTE]
+====
+Source: dto_gen.py
+====
+
+=== JVpp
+Produces __JVpp.java__ and __JVppImpl.java__. This is the layer right above JNI compatible C
+code.
+
+[NOTE]
+====
+Source: jvpp_impl_gen.py
+====
+
+=== Callbacks
+Produces callback interface for each VPP reply + a global callback interface called
+__JVpp[plugin]GlobalCallback.java__ aggregating all of the callback interfaces. The JNI
+compatible C code expects only a single instance of this global callback and calls
+it with every reply.
+
+[NOTE]
+====
+Source: callback_gen.py
+====
+
+=== Future facade
+Produces an asynchronous facade on top of JVpp and callbacks, which returns a Future that provides
+matching reply once VPP invocation finishes. Sources produced:
+__FutureJVpp[plugin].java, FutureJVpp[plugin]Facade.java and FutureJVpp[plugin]Callback.java__
+
+[NOTE]
+====
+Source: jvpp_future_facade_gen.py
+====
+
+=== Callback facade
+Similar to future facade, only this facade takes callback objects as part of the invocation
+and the callback is called with result once VPP invocation finishes. Sources produced:
+__CallbackJVpp[plugin].java, CallbackJVpp[plugin]Facade.java and CallbackJVpp[plugin]Callback.java__
+
+[NOTE]
+====
+Source: jvpp_callback_facade_gen.py
+====
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclExpectedDumpData.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclExpectedDumpData.java
new file mode 100644
index 00000000..4806052f
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclExpectedDumpData.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.examples;
+
+
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_ADDRESS_2_AS_ARRAY;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_ADDRESS_AS_ARRAY;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_DST_ICMP_TYPE_END;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_DST_ICMP_TYPE_START;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_MAC;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_MAC_MASK;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_PREFIX;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_PREFIX_2;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_SRC_ICMP_TYPE_END;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.FIRST_RULE_SRC_ICMP_TYPE_START;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.ICMP_PROTOCOL;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_ADDRESS_2_AS_ARRAY;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_ADDRESS_AS_ARRAY;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_DST_PORT_RANGE_END;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_DST_PORT_RANGE_START;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_MAC;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_MAC_MASK;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_PREFIX;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_PREFIX_2;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_SRC_PORT_RANGE_END;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.SECOND_RULE_SRC_PORT_RANGE_START;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.UDP_PROTOCOL;
+
+import io.fd.vpp.jvpp.acl.dto.AclDetails;
+import io.fd.vpp.jvpp.acl.dto.AclInterfaceListDetails;
+import io.fd.vpp.jvpp.acl.dto.MacipAclDetails;
+import io.fd.vpp.jvpp.acl.types.AclRule;
+import io.fd.vpp.jvpp.acl.types.MacipAclRule;
+import java.util.Arrays;
+
+class AclExpectedDumpData {
+
+ static void verifyMacIpDump(final MacipAclDetails macipAclDetails) {
+ // asserting data create by previous call
+ assertEquals(0, macipAclDetails.aclIndex);
+ assertEquals(2, macipAclDetails.count);
+
+ final MacipAclRule currentIpv4Rule = macipAclDetails.r[0];
+ final MacipAclRule currentIpv6Rule = macipAclDetails.r[1];
+
+ // Comparing one property at the time to better pointer if something is wrong
+ //Ipv4 rule
+ assertEquals(0, currentIpv4Rule.isIpv6);
+ assertEquals(1, currentIpv4Rule.isPermit);
+
+ // cutting expected ipv4 to 4 bytes,vpp sends it as 16 always
+ assertArrays(FIRST_RULE_ADDRESS_AS_ARRAY, Arrays.copyOfRange(currentIpv4Rule.srcIpAddr, 0, 4));
+ assertEquals(FIRST_RULE_PREFIX, currentIpv4Rule.srcIpPrefixLen);
+ assertArrays(FIRST_RULE_MAC, currentIpv4Rule.srcMac);
+ assertArrays(FIRST_RULE_MAC_MASK, currentIpv4Rule.srcMacMask);
+
+ //Ipv6 rule
+ assertEquals(1, currentIpv6Rule.isIpv6);
+ assertEquals(0, currentIpv6Rule.isPermit);
+ assertArrays(SECOND_RULE_ADDRESS_AS_ARRAY, currentIpv6Rule.srcIpAddr);
+ assertEquals(SECOND_RULE_PREFIX, currentIpv6Rule.srcIpPrefixLen);
+ assertArrays(SECOND_RULE_MAC, currentIpv6Rule.srcMac);
+ assertArrays(SECOND_RULE_MAC_MASK, currentIpv6Rule.srcMacMask);
+ }
+
+ static void verifyAclDump(final AclDetails aclDetails) {
+ assertEquals(0, aclDetails.aclIndex);
+ assertEquals(2, aclDetails.count);
+
+ final AclRule currentIpv4Rule = aclDetails.r[0];
+ final AclRule currentIpv6Rule = aclDetails.r[1];
+
+ // Comparing one property at the time to better pointer if something is wrong
+ //Ipv4 rule
+ assertEquals(0, currentIpv4Rule.isIpv6);
+ assertEquals(1, currentIpv4Rule.isPermit);
+
+ // cutting expected ipv4 to 4 bytes,vpp sends it as 16 always
+ assertArrays(FIRST_RULE_ADDRESS_AS_ARRAY, Arrays.copyOfRange(currentIpv4Rule.srcIpAddr, 0, 4));
+ assertEquals(FIRST_RULE_PREFIX, currentIpv4Rule.srcIpPrefixLen);
+ assertArrays(FIRST_RULE_ADDRESS_2_AS_ARRAY, Arrays.copyOfRange(currentIpv4Rule.dstIpAddr, 0, 4));
+ assertEquals(FIRST_RULE_PREFIX_2, currentIpv4Rule.dstIpPrefixLen);
+
+ assertEquals(ICMP_PROTOCOL, currentIpv4Rule.proto);
+ assertEquals(FIRST_RULE_SRC_ICMP_TYPE_START, currentIpv4Rule.srcportOrIcmptypeFirst);
+ assertEquals(FIRST_RULE_SRC_ICMP_TYPE_END, currentIpv4Rule.srcportOrIcmptypeLast);
+ assertEquals(FIRST_RULE_DST_ICMP_TYPE_START, currentIpv4Rule.dstportOrIcmpcodeFirst);
+ assertEquals(FIRST_RULE_DST_ICMP_TYPE_END, currentIpv4Rule.dstportOrIcmpcodeLast);
+
+ assertArrays(SECOND_RULE_ADDRESS_AS_ARRAY, currentIpv6Rule.srcIpAddr);
+ assertEquals(SECOND_RULE_PREFIX, currentIpv6Rule.srcIpPrefixLen);
+ assertArrays(SECOND_RULE_ADDRESS_2_AS_ARRAY, currentIpv6Rule.dstIpAddr);
+ assertEquals(SECOND_RULE_PREFIX_2, currentIpv6Rule.dstIpPrefixLen);
+
+ assertEquals(UDP_PROTOCOL, currentIpv6Rule.proto);
+ assertEquals(SECOND_RULE_SRC_PORT_RANGE_START, currentIpv6Rule.srcportOrIcmptypeFirst);
+ assertEquals(SECOND_RULE_SRC_PORT_RANGE_END, currentIpv6Rule.srcportOrIcmptypeLast);
+ assertEquals(SECOND_RULE_DST_PORT_RANGE_START, currentIpv6Rule.dstportOrIcmpcodeFirst);
+ assertEquals(SECOND_RULE_DST_PORT_RANGE_END, currentIpv6Rule.dstportOrIcmpcodeLast);
+ }
+
+ static void verifyAclInterfaceList(final AclInterfaceListDetails aclInterfaceListDetails) {
+ assertEquals(1, aclInterfaceListDetails.count);
+ assertEquals(1, aclInterfaceListDetails.acls[0]);
+ assertEquals(0, aclInterfaceListDetails.nInput);
+ assertEquals(0, aclInterfaceListDetails.swIfIndex);
+ }
+
+ private static void assertArrays(final byte[] expected, final byte[] actual) {
+ if (!Arrays.equals(expected, actual)) {
+ throw new IllegalArgumentException(
+ String.format("Expected[%s]/Actual[%s]", Arrays.toString(expected), Arrays.toString(actual)));
+ }
+ }
+
+ private static void assertEquals(final int expected, final int actual) {
+ if (expected != actual) {
+ throw new IllegalArgumentException(String.format("Expected[%s]/Actual[%s]", expected, actual));
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestData.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestData.java
new file mode 100644
index 00000000..199b1b6b
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestData.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.examples;
+
+
+import io.fd.vpp.jvpp.acl.types.AclRule;
+import io.fd.vpp.jvpp.acl.types.MacipAclRule;
+
+class AclTestData {
+
+ static final byte[] FIRST_RULE_ADDRESS_AS_ARRAY = {-64, -88, 2, 1};
+ static final byte[] FIRST_RULE_ADDRESS_2_AS_ARRAY = {-64, -88, 2, 3};
+ static final byte[] SECOND_RULE_ADDRESS_AS_ARRAY =
+ {32, 1, 13, -72, 10, 11, 18, -16, 0, 0, 0, 0, 0, 0, 0, 1};
+ static final byte[] SECOND_RULE_ADDRESS_2_AS_ARRAY =
+ {32, 1, 13, -72, 10, 11, 18, -16, 0, 0, 0, 0, 0, 0, 0, 1};
+ static final byte[] FIRST_RULE_MAC = {11, 11, 11, 11, 11, 11};
+ static final byte[] FIRST_RULE_MAC_MASK = {0, 0, 0, 0, 0, 0};
+ static final byte[] SECOND_RULE_MAC = {11, 12, 11, 11, 12, 11};
+ static final byte[] SECOND_RULE_MAC_MASK = {(byte) 170, 0, 0, 0, 0, 0};
+ static final int FIRST_RULE_PREFIX = 32;
+ static final int FIRST_RULE_PREFIX_2 = 24;
+ static final int SECOND_RULE_PREFIX = 64;
+ static final int SECOND_RULE_PREFIX_2 = 62;
+ static final int FIRST_RULE_DST_ICMP_TYPE_START = 0;
+ static final int FIRST_RULE_DST_ICMP_TYPE_END = 8;
+ static final int FIRST_RULE_SRC_ICMP_TYPE_START = 1;
+ static final int FIRST_RULE_SRC_ICMP_TYPE_END = 7;
+ static final int ICMP_PROTOCOL = 1;
+ static final int SECOND_RULE_DST_PORT_RANGE_START = 2000;
+ static final int SECOND_RULE_DST_PORT_RANGE_END = 6000;
+ static final int SECOND_RULE_SRC_PORT_RANGE_START = 400;
+ static final int SECOND_RULE_SRC_PORT_RANGE_END = 2047;
+ static final int UDP_PROTOCOL = 17;
+
+
+ static MacipAclRule[] createMacipRules() {
+ MacipAclRule ruleOne = new MacipAclRule();
+ ruleOne.isIpv6 = 0;
+ ruleOne.isPermit = 1;
+ ruleOne.srcIpAddr = FIRST_RULE_ADDRESS_AS_ARRAY;
+ ruleOne.srcIpPrefixLen = FIRST_RULE_PREFIX;
+ ruleOne.srcMac = FIRST_RULE_MAC;
+ ruleOne.srcMacMask = FIRST_RULE_MAC_MASK;// no mask
+
+ MacipAclRule ruleTwo = new MacipAclRule();
+ ruleTwo.isIpv6 = 1;
+ ruleTwo.isPermit = 0;
+ ruleTwo.srcIpAddr = SECOND_RULE_ADDRESS_AS_ARRAY;
+ ruleTwo.srcIpPrefixLen = SECOND_RULE_PREFIX;
+ ruleTwo.srcMac = SECOND_RULE_MAC;
+ ruleTwo.srcMacMask = SECOND_RULE_MAC_MASK;
+
+ return new MacipAclRule[]{ruleOne, ruleTwo};
+ }
+
+ static AclRule[] createAclRules() {
+ AclRule ruleOne = new AclRule();
+
+ ruleOne.isIpv6 = 0;
+ ruleOne.isPermit = 1;
+ ruleOne.srcIpAddr = FIRST_RULE_ADDRESS_AS_ARRAY;
+ ruleOne.srcIpPrefixLen = FIRST_RULE_PREFIX;
+ ruleOne.dstIpAddr = FIRST_RULE_ADDRESS_2_AS_ARRAY;
+ ruleOne.dstIpPrefixLen = FIRST_RULE_PREFIX_2;
+ ruleOne.dstportOrIcmpcodeFirst = FIRST_RULE_DST_ICMP_TYPE_START;
+ ruleOne.dstportOrIcmpcodeLast = FIRST_RULE_DST_ICMP_TYPE_END;
+ ruleOne.srcportOrIcmptypeFirst = FIRST_RULE_SRC_ICMP_TYPE_START;
+ ruleOne.srcportOrIcmptypeLast = FIRST_RULE_SRC_ICMP_TYPE_END;
+ ruleOne.proto = ICMP_PROTOCOL; //ICMP
+
+ AclRule ruleTwo = new AclRule();
+ ruleTwo.isIpv6 = 1;
+ ruleTwo.isPermit = 0;
+ ruleTwo.srcIpAddr = SECOND_RULE_ADDRESS_AS_ARRAY;
+ ruleTwo.srcIpPrefixLen = SECOND_RULE_PREFIX;
+ ruleTwo.dstIpAddr = SECOND_RULE_ADDRESS_2_AS_ARRAY;
+ ruleTwo.dstIpPrefixLen = SECOND_RULE_PREFIX_2;
+ ruleTwo.dstportOrIcmpcodeFirst = SECOND_RULE_DST_PORT_RANGE_START;
+ ruleTwo.dstportOrIcmpcodeLast = SECOND_RULE_DST_PORT_RANGE_END;
+ ruleTwo.srcportOrIcmptypeFirst = SECOND_RULE_SRC_PORT_RANGE_START;
+ ruleTwo.srcportOrIcmptypeLast = SECOND_RULE_SRC_PORT_RANGE_END;
+ ruleTwo.proto = UDP_PROTOCOL; //UDP
+
+ return new AclRule[]{ruleOne, ruleTwo};
+ }
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestRequests.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestRequests.java
new file mode 100644
index 00000000..149ea46e
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/AclTestRequests.java
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.examples;
+
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.createAclRules;
+import static io.fd.vpp.jvpp.acl.examples.AclTestData.createMacipRules;
+
+import io.fd.vpp.jvpp.VppInvocationException;
+import io.fd.vpp.jvpp.acl.dto.AclAddReplace;
+import io.fd.vpp.jvpp.acl.dto.AclAddReplaceReply;
+import io.fd.vpp.jvpp.acl.dto.AclDel;
+import io.fd.vpp.jvpp.acl.dto.AclDelReply;
+import io.fd.vpp.jvpp.acl.dto.AclDetailsReplyDump;
+import io.fd.vpp.jvpp.acl.dto.AclDump;
+import io.fd.vpp.jvpp.acl.dto.AclInterfaceListDetailsReplyDump;
+import io.fd.vpp.jvpp.acl.dto.AclInterfaceListDump;
+import io.fd.vpp.jvpp.acl.dto.AclInterfaceSetAclList;
+import io.fd.vpp.jvpp.acl.dto.AclInterfaceSetAclListReply;
+import io.fd.vpp.jvpp.acl.dto.MacipAclAdd;
+import io.fd.vpp.jvpp.acl.dto.MacipAclAddReply;
+import io.fd.vpp.jvpp.acl.dto.MacipAclAddReplace;
+import io.fd.vpp.jvpp.acl.dto.MacipAclAddReplaceReply;
+import io.fd.vpp.jvpp.acl.dto.MacipAclDel;
+import io.fd.vpp.jvpp.acl.dto.MacipAclDelReply;
+import io.fd.vpp.jvpp.acl.dto.MacipAclDetailsReplyDump;
+import io.fd.vpp.jvpp.acl.dto.MacipAclDump;
+import io.fd.vpp.jvpp.acl.future.FutureJVppAclFacade;
+import java.util.concurrent.ExecutionException;
+
+class AclTestRequests {
+
+ static MacipAclDetailsReplyDump sendMacIpDumpRequest(final FutureJVppAclFacade jvpp)
+ throws ExecutionException, InterruptedException {
+ System.out.println("Sending MacipAclDump request...");
+ MacipAclDetailsReplyDump dump = jvpp.macipAclDump(new MacipAclDump()).toCompletableFuture().get();
+ System.out.println("MacipAclDump returned");
+ return dump;
+ }
+
+ static void sendMacIpAddRequest(final FutureJVppAclFacade jvpp) throws InterruptedException, ExecutionException {
+ final MacipAclAdd request = createMacIpAddRequest();
+ System.out.printf("Sending MacipAclAdd request %s%n", request.toString());
+ final MacipAclAddReply reply = jvpp.macipAclAdd(createMacIpAddRequest()).toCompletableFuture().get();
+ System.out.printf("MacipAclAdd send result = %s%n", reply);
+ }
+
+ static void sendMacIpAddReplaceRequest(final FutureJVppAclFacade jvpp) throws InterruptedException, ExecutionException {
+ final MacipAclAddReplace request = createMacIpAddReplaceRequest();
+ System.out.printf("Sending MacipAclAddReplace request %s%n", request.toString());
+ final MacipAclAddReplaceReply reply = jvpp.macipAclAddReplace(createMacIpAddReplaceRequest()).toCompletableFuture().get();
+ System.out.printf("MacipAclAddReplace send result = %s%n", reply);
+ }
+
+ static void sendMacIpDelRequest(final FutureJVppAclFacade jvpp) throws InterruptedException, ExecutionException {
+ final MacipAclDel request = new MacipAclDel();
+ request.aclIndex = 0;
+ System.out.printf("Sending MacipAclDel request %s%n", request.toString());
+ final MacipAclDelReply reply = jvpp.macipAclDel(request).toCompletableFuture().get();
+ System.out.printf("MacipAclDel send result = %s%n", reply);
+ }
+
+ static void sendAclAddRequest(final FutureJVppAclFacade jvpp) throws InterruptedException, ExecutionException {
+ final AclAddReplace request = createAclAddRequest();
+ System.out.printf("Sending AclAddReplace request %s%n", request.toString());
+ final AclAddReplaceReply reply = jvpp.aclAddReplace(request).toCompletableFuture().get();
+ System.out.printf("AclAddReplace send result = %s%n", reply);
+ }
+
+ static AclDetailsReplyDump sendAclDumpRequest(final FutureJVppAclFacade jvpp)
+ throws InterruptedException, VppInvocationException, ExecutionException {
+ System.out.println("Sending AclDump request...");
+ final AclDetailsReplyDump dump = jvpp.aclDump(new AclDump()).toCompletableFuture().get();
+ System.out.printf("AclDump send result = %s%n", dump);
+ return dump;
+ }
+
+ static void sendAclDelRequest(final FutureJVppAclFacade jvpp) throws InterruptedException, ExecutionException {
+ final AclDel request = new AclDel();
+ request.aclIndex = 0;
+ System.out.printf("Sending AclDel request %s%n", request.toString());
+ final AclDelReply reply = jvpp.aclDel(request).toCompletableFuture().get();
+ System.out.printf("AclDel send result = %s%n", reply);
+ }
+
+ static AclInterfaceListDetailsReplyDump sendAclInterfaceListDumpRequest(final FutureJVppAclFacade jvpp)
+ throws InterruptedException, ExecutionException {
+ final AclInterfaceListDump request = new AclInterfaceListDump();
+ request.swIfIndex = 0;
+ System.out.printf("Sending AclInterfaceListDump request %s%n", request.toString());
+ final AclInterfaceListDetailsReplyDump dump = jvpp.aclInterfaceListDump(request).toCompletableFuture().get();
+ System.out.printf("AclInterfaceListDump send result = %s%n", dump);
+ return dump;
+ }
+
+ static void sendAclInterfaceSetAclList(final FutureJVppAclFacade jvpp)
+ throws InterruptedException, ExecutionException {
+ final AclInterfaceSetAclList request = new AclInterfaceSetAclList();
+ request.count = 1;
+ request.acls = new int[]{1};
+ request.swIfIndex = 0;
+ request.nInput = 0;
+ System.out.printf("Sending AclInterfaceSetAclList request %s%n", request.toString());
+ final AclInterfaceSetAclListReply reply = jvpp.aclInterfaceSetAclList(request).toCompletableFuture().get();
+ System.out.printf("AclInterfaceSetAclList send result = %s%n", reply);
+ }
+
+ static void sendAclInterfaceDeleteList(final FutureJVppAclFacade jvpp)
+ throws InterruptedException, ExecutionException {
+ // uses same api but sets list to empty
+ final AclInterfaceSetAclList request = new AclInterfaceSetAclList();
+ request.count = 0;
+ request.acls = new int[]{};
+ request.swIfIndex = 0;
+ request.nInput = 0;
+ System.out.printf("Sending AclInterfaceSetAclList(Delete) request %s%n", request.toString());
+ final AclInterfaceSetAclListReply reply = jvpp.aclInterfaceSetAclList(request).toCompletableFuture().get();
+ System.out.printf("AclInterfaceSetAclList(Delete) send result = %s%n", reply);
+ }
+
+ private static MacipAclAdd createMacIpAddRequest() {
+ MacipAclAdd request = new MacipAclAdd();
+
+ request.count = 2;
+ request.r = createMacipRules();
+ return request;
+ }
+
+ private static MacipAclAddReplace createMacIpAddReplaceRequest() {
+ MacipAclAddReplace request = new MacipAclAddReplace();
+
+ request.count = 2;
+ request.aclIndex = 0;
+ request.r = createMacipRules();
+ return request;
+ }
+
+ private static AclAddReplace createAclAddRequest() {
+ AclAddReplace request = new AclAddReplace();
+
+ request.aclIndex = -1;// to define new one
+ request.count = 2;
+ request.r = createAclRules();
+ return request;
+ }
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/FutureApiExample.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/FutureApiExample.java
new file mode 100644
index 00000000..862df8df
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/FutureApiExample.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.examples;
+
+import static io.fd.vpp.jvpp.acl.examples.AclExpectedDumpData.verifyAclDump;
+import static io.fd.vpp.jvpp.acl.examples.AclExpectedDumpData.verifyAclInterfaceList;
+import static io.fd.vpp.jvpp.acl.examples.AclExpectedDumpData.verifyMacIpDump;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclAddRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclDelRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclDumpRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclInterfaceDeleteList;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclInterfaceListDumpRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendAclInterfaceSetAclList;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendMacIpAddRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendMacIpDelRequest;
+import static io.fd.vpp.jvpp.acl.examples.AclTestRequests.sendMacIpDumpRequest;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.acl.JVppAclImpl;
+import io.fd.vpp.jvpp.acl.future.FutureJVppAclFacade;
+
+public class FutureApiExample {
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi();
+ }
+
+ private static void testCallbackApi() throws Exception {
+ System.out.println("Testing Java callback API for acl plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("macipAclAddTest");
+ final FutureJVppAclFacade jvpp = new FutureJVppAclFacade(registry, new JVppAclImpl())) {
+
+ // adds,dump and verifies Mac-Ip acl
+ sendMacIpAddRequest(jvpp);
+ verifyMacIpDump(sendMacIpDumpRequest(jvpp).macipAclDetails.get(0));
+
+ // adds,dumps and verifies Acl acl
+ sendAclAddRequest(jvpp);
+ verifyAclDump(sendAclDumpRequest(jvpp).aclDetails.get(0));
+
+ // adds,dumps and verifies Interface for acl
+ sendAclInterfaceSetAclList(jvpp);
+ verifyAclInterfaceList(sendAclInterfaceListDumpRequest(jvpp).aclInterfaceListDetails.get(0));
+
+ // deletes all created data
+ sendAclInterfaceDeleteList(jvpp);
+ sendAclDelRequest(jvpp);
+ sendMacIpDelRequest(jvpp);
+
+ System.out.println("Disconnecting...");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/Readme.txt b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/Readme.txt
new file mode 100644
index 00000000..d17fbfc2
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/examples/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-acl-17.10.jar io.fd.vpp.jvpp.acl.examples.FutureApiExample
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-debug-native/vpp/vpp-api/java/jvpp-acl-17.10.jar io.fd.vpp.jvpp.acl.examples.FutureApiExample
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/CallbackApiTest.java
new file mode 100644
index 00000000..a7bbb7f4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.acl.JVppAclImpl;
+
+import java.util.logging.Logger;
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for core plugin");
+ testControlPing(args[0], new JVppAclImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/FutureApiTest.java b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/FutureApiTest.java
new file mode 100644
index 00000000..ff1c73c4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/FutureApiTest.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.acl.test;
+
+import io.fd.vpp.jvpp.Assertions;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.acl.JVppAclImpl;
+import io.fd.vpp.jvpp.acl.dto.AclDetailsReplyDump;
+import io.fd.vpp.jvpp.acl.dto.AclDump;
+import io.fd.vpp.jvpp.acl.future.FutureJVppAclFacade;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testFutureApi(args);
+ }
+
+ private static void testFutureApi(String[] args) throws Exception {
+ LOG.info("Testing Java future API for core plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppAclFacade jvppFacade = new FutureJVppAclFacade(registry, new JVppAclImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testAclDump(jvppFacade);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testAclDump(final FutureJVppAclFacade jvpp) throws Exception {
+ LOG.info("Sending AclDump request...");
+ final AclDump request = new AclDump();
+
+ final CompletableFuture<AclDetailsReplyDump>
+ replyFuture = jvpp.aclDump(request).toCompletableFuture();
+ final AclDetailsReplyDump reply = replyFuture.get();
+
+ Assertions.assertNotNull(reply);
+ }
+
+
+}
diff --git a/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/Readme.txt b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/Readme.txt
new file mode 100644
index 00000000..1b465851
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/io/fd/vpp/jvpp/acl/test/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-acl-17.10.jar io.fd.vpp.jvpp.acl.test.[test-name]
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-acl-17.10.jar io.fd.vpp.jvpp.acl.test.[test-name]
diff --git a/src/vpp-api/java/jvpp-acl/jvpp_acl.c b/src/vpp-api/java/jvpp-acl/jvpp_acl.c
new file mode 100644
index 00000000..f5467e99
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/jvpp_acl.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <acl/acl_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <acl/acl_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-acl/io_fd_vpp_jvpp_acl_JVppAclImpl.h"
+#include "jvpp_acl.h"
+#include "jvpp-acl/jvpp_acl_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_acl_JVppaclImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_acl_JVppAclImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ acl_main_t * plugin_main = &acl_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_acl_JVppAclImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N); \
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_acl_JVppAclImpl_close0
+(JNIEnv *env, jclass clazz) {
+ acl_main_t * plugin_main = &acl_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP ACL */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP ACL */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-acl/jvpp_acl.h b/src/vpp-api/java/jvpp-acl/jvpp_acl.h
new file mode 100644
index 00000000..726f7298
--- /dev/null
+++ b/src/vpp-api/java/jvpp-acl/jvpp_acl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_acl_h__
+#define __included_jvpp_acl_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-acl */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} acl_main_t;
+
+acl_main_t acl_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_acl_h__ */
diff --git a/src/vpp-api/java/jvpp-common/jvpp_common.c b/src/vpp-api/java/jvpp-common/jvpp_common.c
new file mode 100644
index 00000000..c00298bf
--- /dev/null
+++ b/src/vpp-api/java/jvpp-common/jvpp_common.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE /* for strcasestr(3) */
+
+#include <vnet/api_errno.h>
+#include "jvpp_common.h"
+
+#ifndef JVPP_DEBUG
+#define JVPP_DEBUG 0
+#endif
+
+#if JVPP_DEBUG == 1
+#define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+#define DEBUG_LOG(...)
+#endif
+
+#define _(error,errorCode,msg) \
+if (errorCode == code) \
+ message = msg; \
+else
+
+#define get_error_message(errno) \
+int code = errno; \
+foreach_vnet_api_error \
+ message = "Reason unknown";
+
+/* shared jvpp main structure */
+jvpp_main_t jvpp_main __attribute__((aligned (64)));
+
+void call_on_error(const char* callName, int contextId, int retval,
+ jclass callbackClass, jobject callbackObject,
+ jclass callbackExceptionClass) {
+ DEBUG_LOG("\nCallOnError : callback=%s, retval=%d, context=%d\n", callName,
+ clib_net_to_host_u32(retval), clib_net_to_host_u32(context));
+ JNIEnv *env = jvpp_main.jenv;
+ if (!callbackClass) {
+ DEBUG_LOG("CallOnError : jm->callbackClass is null!\n");
+ return;
+ }
+ jmethodID excConstructor = (*env)->GetMethodID(env, callbackExceptionClass,
+ "<init>", "(Ljava/lang/String;Ljava/lang/String;II)V");
+ if (!excConstructor) {
+ DEBUG_LOG("CallOnError : excConstructor is null!\n");
+ return;
+ }
+ jmethodID callbackExcMethod = (*env)->GetMethodID(env, callbackClass,
+ "onError", "(Lio/fd/vpp/jvpp/VppCallbackException;)V");
+ if (!callbackExcMethod) {
+ DEBUG_LOG("CallOnError : callbackExcMethod is null!\n");
+ return;
+ }
+
+ char *message;
+ get_error_message(clib_net_to_host_u32(retval));
+ jobject excObject = (*env)->NewObject(env, callbackExceptionClass,
+ excConstructor, (*env)->NewStringUTF(env, callName),
+ (*env)->NewStringUTF(env, message),
+ clib_net_to_host_u32(contextId), clib_net_to_host_u32(retval));
+ if (!excObject) {
+ DEBUG_LOG("CallOnError : excObject is null!\n");
+ return;
+ }
+
+ (*env)->CallVoidMethod(env, callbackObject, callbackExcMethod, excObject);
+ DEBUG_LOG("CallOnError : Response sent\n");
+}
+#undef _
+
+u32 get_message_id(JNIEnv *env, const char *key) {
+ uword *p = hash_get(jvpp_main.messages_hash, key);
+ if (!p) {
+ jclass exClass = (*env)->FindClass(env, "java/lang/IllegalStateException");
+ char *msgBuf = clib_mem_alloc(strlen(key) + 40);
+ strcpy(msgBuf, "API mismatch detected: ");
+ strcat(msgBuf, key);
+ strcat(msgBuf, " is missing");
+ DEBUG_LOG("get_message_id : %s\n", msgBuf);
+ (*env)->ThrowNew(env, exClass, msgBuf);
+ clib_mem_free(msgBuf);
+ return 0;
+ }
+ return (u32) p[0];
+}
diff --git a/src/vpp-api/java/jvpp-common/jvpp_common.h b/src/vpp-api/java/jvpp-common/jvpp_common.h
new file mode 100644
index 00000000..34502d04
--- /dev/null
+++ b/src/vpp-api/java/jvpp-common/jvpp_common.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_common_h__
+#define __included_jvpp_common_h__
+//
+#include <vppinfra/types.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+typedef struct {
+ /* Unique identifier used for matching replays with requests */
+ volatile u32 context_id;
+
+ /* Spinlock */
+ volatile u32 lock;
+ u32 tag;
+
+ /* JNI Native Method Interface pointer for message handlers */
+ JNIEnv *jenv;
+
+ /* JNI Invoke Interface pointer for attachment of rx thread to java thread */
+ JavaVM *jvm;
+
+ /* Convenience */
+ unix_shared_memory_queue_t * vl_input_queue;
+ u32 my_client_index;
+ uword *messages_hash;
+} jvpp_main_t;
+
+extern jvpp_main_t jvpp_main __attribute__((aligned (64)));
+
+static_always_inline u32 vppjni_get_context_id(jvpp_main_t * jm) {
+ return __sync_add_and_fetch(&jm->context_id, 1);
+}
+
+static_always_inline void vppjni_lock(jvpp_main_t * jm, u32 tag) {
+ while (__sync_lock_test_and_set(&jm->lock, 1))
+ ;
+ jm->tag = tag;
+}
+
+static_always_inline void vppjni_unlock(jvpp_main_t * jm) {
+ jm->tag = 0;
+ CLIB_MEMORY_BARRIER();
+ jm->lock = 0;
+}
+
+/**
+ * Calls onError callback on callbackObject reference. Passes instance of callbackExceptionClass as parameter.
+ */
+void call_on_error(const char* callName, int contextId, int retval,
+ jclass callbackClass, jobject callbackObject,
+ jclass callbackExceptionClass);
+
+/**
+ * Retrieves message id based on message name and crc (key format: name_crc).
+ * Throws java/lang/IllegalStateException on failure.
+ */
+u32 get_message_id(JNIEnv *env, const char* key);
+
+#endif /* __included_jvpp_common_h__ */
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java
new file mode 100644
index 00000000..554a21bd
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.callback.GetNodeIndexCallback;
+import io.fd.vpp.jvpp.core.callback.ShowVersionCallback;
+import io.fd.vpp.jvpp.core.callback.SwInterfaceCallback;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndex;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndexReply;
+import io.fd.vpp.jvpp.core.dto.ShowVersion;
+import io.fd.vpp.jvpp.core.dto.ShowVersionReply;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetails;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDump;
+import java.nio.charset.StandardCharsets;
+
+public class CallbackApiExample {
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi();
+ }
+
+ private static void testCallbackApi() throws Exception {
+ System.out.println("Testing Java callback API with JVppRegistry");
+ try (final JVppRegistry registry = new JVppRegistryImpl("CallbackApiExample");
+ final JVpp jvpp = new JVppCoreImpl()) {
+ registry.register(jvpp, new TestCallback());
+
+ System.out.println("Sending ShowVersion request...");
+ final int result = jvpp.send(new ShowVersion());
+ System.out.printf("ShowVersion send result = %d%n", result);
+
+ System.out.println("Sending GetNodeIndex request...");
+ GetNodeIndex getNodeIndexRequest = new GetNodeIndex();
+ getNodeIndexRequest.nodeName = "non-existing-node".getBytes(StandardCharsets.UTF_8);
+ jvpp.send(getNodeIndexRequest);
+
+ System.out.println("Sending SwInterfaceDump request...");
+ SwInterfaceDump swInterfaceDumpRequest = new SwInterfaceDump();
+ swInterfaceDumpRequest.nameFilterValid = 0;
+ swInterfaceDumpRequest.nameFilter = "".getBytes(StandardCharsets.UTF_8);
+ jvpp.send(swInterfaceDumpRequest);
+
+ Thread.sleep(1000);
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ static class TestCallback implements GetNodeIndexCallback, ShowVersionCallback, SwInterfaceCallback {
+
+ @Override
+ public void onGetNodeIndexReply(final GetNodeIndexReply msg) {
+ System.out.printf("Received GetNodeIndexReply: %s%n", msg);
+ }
+
+ @Override
+ public void onShowVersionReply(final ShowVersionReply msg) {
+ System.out.printf("Received ShowVersionReply: context=%d, program=%s, version=%s, "
+ + "buildDate=%s, buildDirectory=%s%n",
+ msg.context,
+ new String(msg.program, StandardCharsets.UTF_8),
+ new String(msg.version, StandardCharsets.UTF_8),
+ new String(msg.buildDate, StandardCharsets.UTF_8),
+ new String(msg.buildDirectory, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void onSwInterfaceDetails(final SwInterfaceDetails msg) {
+ System.out.printf("Received SwInterfaceDetails: interfaceName=%s, l2AddressLength=%d, adminUpDown=%d, "
+ + "linkUpDown=%d, linkSpeed=%d, linkMtu=%d%n",
+ new String(msg.interfaceName, StandardCharsets.UTF_8), msg.l2AddressLength, msg.adminUpDown,
+ msg.linkUpDown, msg.linkSpeed, (int) msg.linkMtu);
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n", ex.getMethodName(),
+ ex.getCtxId(), ex.getErrorCode());
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeExample.java
new file mode 100644
index 00000000..2f77f0f1
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeExample.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.callback.GetNodeIndexCallback;
+import io.fd.vpp.jvpp.core.callback.ShowVersionCallback;
+import io.fd.vpp.jvpp.core.callfacade.CallbackJVppCoreFacade;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndex;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndexReply;
+import io.fd.vpp.jvpp.core.dto.ShowVersionReply;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * CallbackJVppFacade together with CallbackJVppFacadeCallback allow for setting different callback for each request.
+ * This is more convenient than the approach shown in CallbackApiExample.
+ */
+public class CallbackJVppFacadeExample {
+
+ private static ShowVersionCallback showVersionCallback1 = new ShowVersionCallback() {
+ @Override
+ public void onShowVersionReply(final ShowVersionReply msg) {
+ System.out.printf("ShowVersionCallback1 received ShowVersionReply: context=%d, program=%s,"
+ + "version=%s, buildDate=%s, buildDirectory=%s%n", msg.context,
+ new String(msg.program, StandardCharsets.UTF_8),
+ new String(msg.version, StandardCharsets.UTF_8),
+ new String(msg.buildDate, StandardCharsets.UTF_8),
+ new String(msg.buildDirectory, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception in showVersionCallback1: call=%s, reply=%d, context=%d%n",
+ ex.getMethodName(), ex.getErrorCode(), ex.getCtxId());
+ }
+ };
+
+ private static ShowVersionCallback showVersionCallback2 = new ShowVersionCallback() {
+ @Override
+ public void onShowVersionReply(final ShowVersionReply msg) {
+ System.out.printf("ShowVersionCallback2 received ShowVersionReply: context=%d, program=%s,"
+ + "version=%s, buildDate=%s, buildDirectory=%s%n", msg.context,
+ new String(msg.program, StandardCharsets.UTF_8),
+ new String(msg.version, StandardCharsets.UTF_8),
+ new String(msg.buildDate, StandardCharsets.UTF_8),
+ new String(msg.buildDirectory, StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception in showVersionCallback2: call=%s, reply=%d, context=%d%n",
+ ex.getMethodName(), ex.getErrorCode(), ex.getCtxId());
+ }
+
+ };
+
+ private static GetNodeIndexCallback getNodeIndexCallback = new GetNodeIndexCallback() {
+ @Override
+ public void onGetNodeIndexReply(final GetNodeIndexReply msg) {
+ System.out.printf("Received GetNodeIndexReply: %s%n", msg);
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception in getNodeIndexCallback: call=%s, reply=%d, context=%d%n",
+ ex.getMethodName(), ex.getErrorCode(), ex.getCtxId());
+ }
+ };
+
+ private static void testCallbackFacade() throws Exception {
+ System.out.println("Testing CallbackJVppFacade");
+
+ try (final JVppRegistry registry = new JVppRegistryImpl("CallbackFacadeExample");
+ final CallbackJVppCoreFacade callbackFacade = new CallbackJVppCoreFacade(registry, new JVppCoreImpl())) {
+ System.out.println("Successfully connected to VPP");
+
+ callbackFacade.showVersion(showVersionCallback1);
+ callbackFacade.showVersion(showVersionCallback2);
+
+ GetNodeIndex getNodeIndexRequest = new GetNodeIndex();
+ getNodeIndexRequest.nodeName = "dummyNode".getBytes(StandardCharsets.UTF_8);
+ callbackFacade.getNodeIndex(getNodeIndexRequest, getNodeIndexCallback);
+
+ Thread.sleep(2000);
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ public static void main(String[] args) throws Exception {
+ testCallbackFacade();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java
new file mode 100644
index 00000000..308dad9f
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.core.JVppCore;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.callback.WantInterfaceEventsCallback;
+import io.fd.vpp.jvpp.core.callfacade.CallbackJVppCoreFacade;
+import io.fd.vpp.jvpp.core.dto.WantInterfaceEventsReply;
+
+public class CallbackJVppFacadeNotificationExample {
+
+ private static void testCallbackFacade() throws Exception {
+ System.out.println("Testing CallbackJVppFacade for notifications");
+
+ try (final JVppRegistry registry = new JVppRegistryImpl("CallbackFacadeExample");
+ final JVppCore jvpp = new JVppCoreImpl()) {
+ final CallbackJVppCoreFacade jvppCallbackFacade = new CallbackJVppCoreFacade(registry, jvpp);
+ System.out.println("Successfully connected to VPP");
+
+ final AutoCloseable notificationListenerReg =
+ jvppCallbackFacade.getNotificationRegistry().registerSwInterfaceEventNotificationCallback(
+ NotificationUtils::printNotification
+ );
+
+ jvppCallbackFacade.wantInterfaceEvents(NotificationUtils.getEnableInterfaceNotificationsReq(),
+ new WantInterfaceEventsCallback() {
+ @Override
+ public void onWantInterfaceEventsReply(final WantInterfaceEventsReply reply) {
+ System.out.println("Interface events started");
+ }
+
+ @Override
+ public void onError(final VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n",
+ ex.getMethodName(), ex.getCtxId(), ex.getErrorCode());
+ }
+ });
+
+ System.out.println("Changing interface configuration");
+ NotificationUtils.getChangeInterfaceState().send(jvpp);
+
+ Thread.sleep(1000);
+
+ jvppCallbackFacade.wantInterfaceEvents(NotificationUtils.getDisableInterfaceNotificationsReq(),
+ new WantInterfaceEventsCallback() {
+ @Override
+ public void onWantInterfaceEventsReply(final WantInterfaceEventsReply reply) {
+ System.out.println("Interface events stopped");
+ }
+
+ @Override
+ public void onError(final VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n",
+ ex.getMethodName(), ex.getCtxId(), ex.getErrorCode());
+ }
+ });
+
+ notificationListenerReg.close();
+
+ Thread.sleep(2000);
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ public static void main(String[] args) throws Exception {
+ testCallbackFacade();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java
new file mode 100644
index 00000000..7d56b7ea
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getChangeInterfaceState;
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getDisableInterfaceNotificationsReq;
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getEnableInterfaceNotificationsReq;
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.printNotification;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.callback.SwInterfaceEventNotificationCallback;
+import io.fd.vpp.jvpp.core.callback.WantInterfaceEventsCallback;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsReply;
+import io.fd.vpp.jvpp.core.dto.WantInterfaceEventsReply;
+
+public class CallbackNotificationApiExample {
+
+ private static void testCallbackApi() throws Exception {
+ System.out.println("Testing Java callback API for notifications");
+ try (final JVppRegistry registry = new JVppRegistryImpl("CallbackNotificationApiExample");
+ final JVpp jvpp = new JVppCoreImpl()) {
+ registry.register(jvpp, new TestCallback());
+ System.out.println("Successfully connected to VPP");
+
+ getEnableInterfaceNotificationsReq().send(jvpp);
+ System.out.println("Interface notifications started");
+ // TODO test ifc dump which also triggers interface flags send
+
+ System.out.println("Changing interface configuration");
+ getChangeInterfaceState().send(jvpp);
+
+ // Notifications are received
+ Thread.sleep(500);
+
+ getDisableInterfaceNotificationsReq().send(jvpp);
+ System.out.println("Interface events stopped");
+
+ Thread.sleep(2000);
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi();
+ }
+
+ private static class TestCallback implements SwInterfaceEventNotificationCallback,
+ WantInterfaceEventsCallback {
+
+ @Override
+ public void onSwInterfaceEventNotification(
+ final SwInterfaceEventNotification msg) {
+ printNotification(msg);
+ }
+
+ @Override
+ public void onWantInterfaceEventsReply(final WantInterfaceEventsReply wantInterfaceEventsReply) {
+ System.out.println("Interface notification stream updated");
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception in getNodeIndexCallback: call=%s, reply=%d, context=%d%n",
+ ex.getMethodName(), ex.getErrorCode(), ex.getCtxId());
+
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CreateSubInterfaceExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CreateSubInterfaceExample.java
new file mode 100644
index 00000000..3db6d30a
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CreateSubInterfaceExample.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import static java.util.Objects.requireNonNull;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.dto.CreateSubif;
+import io.fd.vpp.jvpp.core.dto.CreateSubifReply;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDump;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * <p>Tests sub-interface creation.<br> Equivalent to:<br>
+ *
+ * <pre>{@code
+ * vppctl create sub GigabitEthernet0/9/0 1 dot1q 100 inner-dot1q any
+ * }
+ * </pre>
+ *
+ * To verify invoke:<br>
+ * <pre>{@code
+ * vpp_api_test json
+ * vat# sw_interface_dump
+ * }
+ */
+public class CreateSubInterfaceExample {
+
+ private static SwInterfaceDump createSwInterfaceDumpRequest(final String ifaceName) {
+ SwInterfaceDump request = new SwInterfaceDump();
+ request.nameFilter = ifaceName.getBytes(StandardCharsets.UTF_8);
+ request.nameFilterValid = 1;
+ return request;
+ }
+
+ private static void requireSingleIface(final SwInterfaceDetailsReplyDump response, final String ifaceName) {
+ if (response.swInterfaceDetails.size() != 1) {
+ throw new IllegalStateException(
+ String.format("Expected one interface matching filter %s but was %d", ifaceName,
+ response.swInterfaceDetails.size()));
+ }
+ }
+
+ private static CreateSubif createSubifRequest(final int swIfIndex, final int subId) {
+ CreateSubif request = new CreateSubif();
+ request.swIfIndex = swIfIndex; // super interface id
+ request.subId = subId;
+ request.noTags = 0;
+ request.oneTag = 0;
+ request.twoTags = 1;
+ request.dot1Ad = 0;
+ request.exactMatch = 1;
+ request.defaultSub = 0;
+ request.outerVlanIdAny = 0;
+ request.innerVlanIdAny = 1;
+ request.outerVlanId = 100;
+ request.innerVlanId = 0;
+ return request;
+ }
+
+ private static void print(CreateSubifReply reply) {
+ System.out.printf("CreateSubifReply: %s%n", reply);
+ }
+
+ private static void testCreateSubInterface() throws Exception {
+ System.out.println("Testing sub-interface creation using Java callback API");
+ try (final JVppRegistry registry = new JVppRegistryImpl("CreateSubInterfaceExample");
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl())) {
+ System.out.println("Successfully connected to VPP");
+ Thread.sleep(1000);
+
+ final String ifaceName = "Gigabitethernet0/8/0";
+
+ final SwInterfaceDetailsReplyDump swInterfaceDetails =
+ jvppFacade.swInterfaceDump(createSwInterfaceDumpRequest(ifaceName)).toCompletableFuture().get();
+
+ requireNonNull(swInterfaceDetails, "swInterfaceDump returned null");
+ requireNonNull(swInterfaceDetails.swInterfaceDetails, "swInterfaceDetails is null");
+ requireSingleIface(swInterfaceDetails, ifaceName);
+
+ final int swIfIndex = swInterfaceDetails.swInterfaceDetails.get(0).swIfIndex;
+ final int subId = 1;
+
+ final CreateSubifReply createSubifReply =
+ jvppFacade.createSubif(createSubifRequest(swIfIndex, subId)).toCompletableFuture().get();
+ print(createSubifReply);
+
+ final String subIfaceName = "Gigabitethernet0/8/0." + subId;
+ final SwInterfaceDetailsReplyDump subIface =
+ jvppFacade.swInterfaceDump(createSwInterfaceDumpRequest(subIfaceName)).toCompletableFuture().get();
+ requireNonNull(swInterfaceDetails, "swInterfaceDump returned null");
+ requireNonNull(subIface.swInterfaceDetails, "swInterfaceDump returned null");
+ requireSingleIface(swInterfaceDetails, ifaceName);
+
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ public static void main(String[] args) throws Exception {
+ testCreateSubInterface();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java
new file mode 100644
index 00000000..931c9b33
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.dto.BridgeDomainDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.BridgeDomainDump;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndex;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndexReply;
+import io.fd.vpp.jvpp.core.dto.ShowVersion;
+import io.fd.vpp.jvpp.core.dto.ShowVersionReply;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetails;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDump;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Future;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class FutureApiExample {
+
+ private static final Logger LOG = Logger.getLogger(FutureApiExample.class.getName());
+
+ private static void testShowVersion(final FutureJVppCoreFacade jvpp) throws Exception {
+ LOG.info("Sending ShowVersion request...");
+ final Future<ShowVersionReply> replyFuture = jvpp.showVersion(new ShowVersion()).toCompletableFuture();
+ final ShowVersionReply reply = replyFuture.get();
+ LOG.info(
+ String.format(
+ "Received ShowVersionReply: context=%d, program=%s, version=%s, buildDate=%s, buildDirectory=%s%n",
+ reply.context, new String(reply.program, StandardCharsets.UTF_8),
+ new String(reply.version, StandardCharsets.UTF_8),
+ new String(reply.buildDate, StandardCharsets.UTF_8),
+ new String(reply.buildDirectory, StandardCharsets.UTF_8)));
+ }
+
+ private static void testEmptyBridgeDomainDump(final FutureJVppCoreFacade jvpp) throws Exception {
+ LOG.info("Sending ShowVersion request...");
+ final BridgeDomainDump request = new BridgeDomainDump();
+ request.bdId = -1; // dump call
+
+ final CompletableFuture<BridgeDomainDetailsReplyDump>
+ replyFuture = jvpp.bridgeDomainDump(request).toCompletableFuture();
+ final BridgeDomainDetailsReplyDump reply = replyFuture.get();
+
+ if (reply == null || reply.bridgeDomainDetails == null) {
+ LOG.severe("Received null response for empty dump: " + reply);
+ } else {
+ LOG.info(
+ String.format(
+ "Received bridge-domain dump reply with list of bridge-domains: %s",
+ reply.bridgeDomainDetails));
+ }
+ }
+
+ private static void testGetNodeIndex(final FutureJVppCoreFacade jvpp) {
+ LOG.info("Sending GetNodeIndex request...");
+ final GetNodeIndex request = new GetNodeIndex();
+ request.nodeName = "non-existing-node".getBytes(StandardCharsets.UTF_8);
+ final Future<GetNodeIndexReply> replyFuture = jvpp.getNodeIndex(request).toCompletableFuture();
+ try {
+ final GetNodeIndexReply reply = replyFuture.get();
+ LOG.info(
+ String.format(
+ "Received GetNodeIndexReply: context=%d, nodeIndex=%d%n", reply.context, reply.nodeIndex));
+ } catch (Exception e) {
+ LOG.log(Level.SEVERE, "GetNodeIndex request failed", e);
+ }
+ }
+
+ private static void testSwInterfaceDump(final FutureJVppCoreFacade jvpp) throws Exception {
+ LOG.info("Sending SwInterfaceDump request...");
+ final SwInterfaceDump request = new SwInterfaceDump();
+ request.nameFilterValid = 0;
+ request.nameFilter = "".getBytes(StandardCharsets.UTF_8);
+
+ final Future<SwInterfaceDetailsReplyDump> replyFuture = jvpp.swInterfaceDump(request).toCompletableFuture();
+ final SwInterfaceDetailsReplyDump reply = replyFuture.get();
+ for (SwInterfaceDetails details : reply.swInterfaceDetails) {
+ Objects.requireNonNull(details, "reply.swInterfaceDetails contains null element!");
+ LOG.info(
+ String.format("Received SwInterfaceDetails: interfaceName=%s, l2AddressLength=%d, adminUpDown=%d, "
+ + "linkUpDown=%d, linkSpeed=%d, linkMtu=%d%n",
+ new String(details.interfaceName, StandardCharsets.UTF_8),
+ details.l2AddressLength, details.adminUpDown,
+ details.linkUpDown, details.linkSpeed, (int) details.linkMtu));
+ }
+ }
+
+ private static void testFutureApi() throws Exception {
+ LOG.info("Testing Java future API");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiExample");
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testEmptyBridgeDomainDump(jvppFacade);
+ testShowVersion(jvppFacade);
+ testGetNodeIndex(jvppFacade);
+ testSwInterfaceDump(jvppFacade);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ testFutureApi();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java
new file mode 100644
index 00000000..7460401e
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getChangeInterfaceState;
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getDisableInterfaceNotificationsReq;
+import static io.fd.vpp.jvpp.core.examples.NotificationUtils.getEnableInterfaceNotificationsReq;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+
+public class FutureApiNotificationExample {
+
+ private static void testFutureApi() throws Exception {
+ System.out.println("Testing Java future API for notifications");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiNotificationExample");
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl());
+ final AutoCloseable notificationListenerReg =
+ jvppFacade.getNotificationRegistry()
+ .registerSwInterfaceEventNotificationCallback(NotificationUtils::printNotification)) {
+ System.out.println("Successfully connected to VPP");
+ jvppFacade.wantInterfaceEvents(getEnableInterfaceNotificationsReq()).toCompletableFuture().get();
+ System.out.println("Interface events started");
+
+ System.out.println("Changing interface configuration");
+ jvppFacade.swInterfaceSetFlags(getChangeInterfaceState()).toCompletableFuture().get();
+
+ Thread.sleep(1000);
+
+ jvppFacade.wantInterfaceEvents(getDisableInterfaceNotificationsReq()).toCompletableFuture().get();
+ System.out.println("Interface events stopped");
+ System.out.println("Disconnecting...");
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ testFutureApi();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java
new file mode 100644
index 00000000..f89043a3
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/L2AclExample.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.dto.ClassifyAddDelSession;
+import io.fd.vpp.jvpp.core.dto.ClassifyAddDelSessionReply;
+import io.fd.vpp.jvpp.core.dto.ClassifyAddDelTable;
+import io.fd.vpp.jvpp.core.dto.ClassifyAddDelTableReply;
+import io.fd.vpp.jvpp.core.dto.ClassifySessionDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.ClassifySessionDump;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableByInterface;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableByInterfaceReply;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableIds;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableIdsReply;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableInfo;
+import io.fd.vpp.jvpp.core.dto.ClassifyTableInfoReply;
+import io.fd.vpp.jvpp.core.dto.InputAclSetInterface;
+import io.fd.vpp.jvpp.core.dto.InputAclSetInterfaceReply;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+import javax.xml.bind.DatatypeConverter;
+
+/**
+ * <p>Tests L2 ACL creation and read.<br> Equivalent to the following vppctl commands:<br>
+ *
+ * <pre>{@code
+ * vppctl classify table mask l2 src
+ * vppctl classify session acl-hit-next deny opaque-index 0 table-index 0 match l2 src 01:02:03:04:05:06
+ * vppctl set int input acl intfc local0 l2-table 0
+ * vppctl sh class table verbose
+ * }
+ * </pre>
+ */
+public class L2AclExample {
+
+ private static final int LOCAL0_IFACE_ID = 0;
+
+ private static ClassifyAddDelTable createClassifyTable() {
+ ClassifyAddDelTable request = new ClassifyAddDelTable();
+ request.isAdd = 1;
+ request.tableIndex = ~0; // default
+ request.nbuckets = 2;
+ request.memorySize = 2 << 20;
+ request.nextTableIndex = ~0; // default
+ request.missNextIndex = ~0; // default
+ request.skipNVectors = 0;
+ request.matchNVectors = 1;
+ request.mask =
+ new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
+ (byte) 0xff, (byte) 0xff, 0x00, 0x00, 0x00, 0x00};
+ return request;
+ }
+
+ private static ClassifyTableInfo createClassifyTableInfoRequest(final int tableId) {
+ ClassifyTableInfo request = new ClassifyTableInfo();
+ request.tableId = tableId;
+ return request;
+ }
+
+ private static ClassifyAddDelSession createClassifySession(final int tableIndex) {
+ ClassifyAddDelSession request = new ClassifyAddDelSession();
+ request.isAdd = 1;
+ request.tableIndex = tableIndex;
+ request.hitNextIndex = 0; // deny
+ request.opaqueIndex = 0;
+ request.advance = 0; // default
+ // match 01:02:03:04:05:06 mac address
+ request.match =
+ new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0x01, (byte) 0x02, (byte) 0x03, (byte) 0x04,
+ (byte) 0x05, (byte) 0x06, 0x00, 0x00, 0x00, 0x00};
+ return request;
+ }
+
+ private static ClassifySessionDump createClassifySessionDumpRequest(final int newTableIndex) {
+ ClassifySessionDump request = new ClassifySessionDump();
+ request.tableId = newTableIndex;
+ return request;
+ }
+
+ private static InputAclSetInterface aclSetInterface() {
+ InputAclSetInterface request = new InputAclSetInterface();
+ request.isAdd = 1;
+ request.swIfIndex = LOCAL0_IFACE_ID;
+ request.ip4TableIndex = ~0; // skip
+ request.ip6TableIndex = ~0; // skip
+ request.l2TableIndex = 0;
+ return request;
+ }
+
+ private static ClassifyTableByInterface createClassifyTableByInterfaceRequest() {
+ ClassifyTableByInterface request = new ClassifyTableByInterface();
+ request.swIfIndex = LOCAL0_IFACE_ID;
+ return request;
+ }
+
+ private static void print(ClassifyAddDelTableReply reply) {
+ System.out.printf("ClassifyAddDelTableReply: %s%n", reply);
+ }
+
+ private static void print(ClassifyTableIdsReply reply) {
+ System.out.printf("ClassifyTableIdsReply: %s%n", reply);
+ }
+
+ private static void print(final ClassifyTableInfoReply reply) {
+ System.out.println(reply);
+ if (reply != null) {
+ System.out.println("Mask hex: " + DatatypeConverter.printHexBinary(reply.mask));
+ }
+ }
+
+ private static void print(ClassifyAddDelSessionReply reply) {
+ System.out.printf("ClassifyAddDelSessionReply: context=%s%n", reply);
+ }
+
+ private static void print(final ClassifySessionDetailsReplyDump reply) {
+ System.out.println(reply);
+ reply.classifySessionDetails.forEach(detail -> {
+ System.out.println(detail);
+ System.out.println("Match hex: " + DatatypeConverter.printHexBinary(detail.match));
+ });
+ }
+
+ private static void print(final InputAclSetInterfaceReply reply) {
+ System.out.printf("InputAclSetInterfaceReply: context=%s%n", reply);
+ }
+
+ private static void print(final ClassifyTableByInterfaceReply reply) {
+ System.out.printf("ClassifyAddDelTableReply: %s%n", reply);
+ }
+
+ private static void testL2Acl() throws Exception {
+ System.out.println("Testing L2 ACLs using Java callback API");
+ try (final JVppRegistry registry = new JVppRegistryImpl("L2AclExample");
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl())) {
+
+ System.out.println("Successfully connected to VPP");
+ Thread.sleep(1000);
+
+ final ClassifyAddDelTableReply classifyAddDelTableReply =
+ jvppFacade.classifyAddDelTable(createClassifyTable()).toCompletableFuture().get();
+ print(classifyAddDelTableReply);
+
+ final ClassifyTableIdsReply classifyTableIdsReply =
+ jvppFacade.classifyTableIds(new ClassifyTableIds()).toCompletableFuture().get();
+ print(classifyTableIdsReply);
+
+ final ClassifyTableInfoReply classifyTableInfoReply =
+ jvppFacade.classifyTableInfo(createClassifyTableInfoRequest(classifyAddDelTableReply.newTableIndex))
+ .toCompletableFuture().get();
+ print(classifyTableInfoReply);
+
+ final ClassifyAddDelSessionReply classifyAddDelSessionReply =
+ jvppFacade.classifyAddDelSession(createClassifySession(classifyAddDelTableReply.newTableIndex))
+ .toCompletableFuture().get();
+ print(classifyAddDelSessionReply);
+
+ final ClassifySessionDetailsReplyDump classifySessionDetailsReplyDump =
+ jvppFacade.classifySessionDump(createClassifySessionDumpRequest(classifyAddDelTableReply.newTableIndex))
+ .toCompletableFuture().get();
+ print(classifySessionDetailsReplyDump);
+
+ final InputAclSetInterfaceReply inputAclSetInterfaceReply =
+ jvppFacade.inputAclSetInterface(aclSetInterface()).toCompletableFuture().get();
+ print(inputAclSetInterfaceReply);
+
+ final ClassifyTableByInterfaceReply classifyTableByInterfaceReply =
+ jvppFacade.classifyTableByInterface(createClassifyTableByInterfaceRequest()).toCompletableFuture()
+ .get();
+ print(classifyTableByInterfaceReply);
+
+ System.out.println("Disconnecting...");
+ }
+ Thread.sleep(1000);
+ }
+
+ public static void main(String[] args) throws Exception {
+ testL2Acl();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/LispAdjacencyExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/LispAdjacencyExample.java
new file mode 100644
index 00000000..f637669d
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/LispAdjacencyExample.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.dto.LispAddDelAdjacency;
+import io.fd.vpp.jvpp.core.dto.LispAddDelLocalEid;
+import io.fd.vpp.jvpp.core.dto.LispAddDelLocatorSet;
+import io.fd.vpp.jvpp.core.dto.LispAddDelRemoteMapping;
+import io.fd.vpp.jvpp.core.dto.LispAdjacenciesGet;
+import io.fd.vpp.jvpp.core.dto.LispAdjacenciesGetReply;
+import io.fd.vpp.jvpp.core.dto.LispEnableDisable;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.ExecutionException;
+import java.util.logging.Logger;
+
+/**
+ * Tests lisp adjacency creation and read (custom vpe.api type support showcase).
+ */
+public class LispAdjacencyExample {
+
+ private static final Logger LOG = Logger.getLogger(LispAdjacencyExample.class.getName());
+
+ private static void enableLisp(final FutureJVppCoreFacade jvpp) throws ExecutionException, InterruptedException {
+ final LispEnableDisable request = new LispEnableDisable();
+ request.isEn = 1;
+ jvpp.lispEnableDisable(request).toCompletableFuture().get();
+ LOG.info("Lisp enabled successfully");
+ }
+
+ private static void addLocatorSet(final FutureJVppCoreFacade jvpp) throws ExecutionException, InterruptedException {
+ final LispAddDelLocatorSet request = new LispAddDelLocatorSet();
+ request.isAdd = 1;
+ request.locatorSetName = "ls1".getBytes(StandardCharsets.UTF_8);
+ jvpp.lispAddDelLocatorSet(request).toCompletableFuture().get();
+ LOG.info("Locator set created successfully:" + request.toString());
+ }
+
+ private static void addLocalEid(final FutureJVppCoreFacade jvpp) throws ExecutionException, InterruptedException {
+ final LispAddDelLocalEid request = new LispAddDelLocalEid();
+ request.isAdd = 1;
+ request.locatorSetName = "ls1".getBytes(StandardCharsets.UTF_8);
+ request.eid = new byte[] {1, 2, 1, 10};
+ request.eidType = 0; // ip4
+ request.vni = 0;
+ request.prefixLen = 32;
+ jvpp.lispAddDelLocalEid(request).toCompletableFuture().get();
+ LOG.info("Local EID created successfully:" + request.toString());
+ }
+
+ private static void addRemoteMapping(final FutureJVppCoreFacade jvpp)
+ throws ExecutionException, InterruptedException {
+ final LispAddDelRemoteMapping request = new LispAddDelRemoteMapping();
+ request.isAdd = 1;
+ request.vni = 0;
+ request.eid = new byte[] {1, 2, 1, 20};
+ request.eidLen = 32;
+ request.rlocNum = 1;
+ // FIXME!!!!
+ //request.rlocs = new byte[] {1, 1, 1, 1, 2, 1, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ jvpp.lispAddDelRemoteMapping(request).toCompletableFuture().get();
+ LOG.info("Remote mapping created successfully:" + request.toString());
+ }
+
+ private static void addAdjacency(final FutureJVppCoreFacade jvpp) throws ExecutionException, InterruptedException {
+ final LispAddDelAdjacency request = new LispAddDelAdjacency();
+ request.isAdd = 1;
+ request.leid = new byte[] {1, 2, 1, 10};
+ request.leidLen = 32;
+ request.reid = new byte[] {1, 2, 1, 20};
+ request.reidLen = 32;
+ request.eidType = 0; // ip4
+ request.vni = 0;
+ jvpp.lispAddDelAdjacency(request).toCompletableFuture().get();
+ LOG.info("Lisp adjacency created successfully:" + request.toString());
+ }
+
+ private static void showAdjacencies(final FutureJVppCoreFacade jvpp)
+ throws ExecutionException, InterruptedException {
+ final LispAdjacenciesGetReply reply =
+ jvpp.lispAdjacenciesGet(new LispAdjacenciesGet()).toCompletableFuture().get();
+ LOG.info("Lisp adjacency received successfully:" + reply.toString());
+ }
+
+ private static void testAdjacency(final FutureJVppCoreFacade jvpp) throws Exception {
+ enableLisp(jvpp);
+ addLocatorSet(jvpp);
+ addLocalEid(jvpp);
+ addRemoteMapping(jvpp);
+ addAdjacency(jvpp);
+ showAdjacencies(jvpp);
+ }
+
+ private static void testFutureApi() throws Exception {
+ LOG.info("Create lisp adjacency test");
+ try (final JVppRegistry registry = new JVppRegistryImpl("LispAdjacencyExample");
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testAdjacency(jvppFacade);
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ testFutureApi();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java
new file mode 100644
index 00000000..d3f9dd2c
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.examples;
+
+import java.io.PrintStream;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlags;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification;
+import io.fd.vpp.jvpp.core.dto.WantInterfaceEvents;
+
+final class NotificationUtils {
+
+ private NotificationUtils() {}
+
+ static PrintStream printNotification(final SwInterfaceEventNotification msg) {
+ return System.out.printf("Received interface notification: ifc: %s%n", msg);
+ }
+
+ static SwInterfaceSetFlags getChangeInterfaceState() {
+ final SwInterfaceSetFlags swInterfaceSetFlags = new SwInterfaceSetFlags();
+ swInterfaceSetFlags.swIfIndex = 0;
+ swInterfaceSetFlags.adminUpDown = 1;
+ return swInterfaceSetFlags;
+ }
+
+ static WantInterfaceEvents getEnableInterfaceNotificationsReq() {
+ WantInterfaceEvents wantInterfaceEvents = new WantInterfaceEvents();
+ wantInterfaceEvents.pid = 1;
+ wantInterfaceEvents.enableDisable = 1;
+ return wantInterfaceEvents;
+ }
+
+ static WantInterfaceEvents getDisableInterfaceNotificationsReq() {
+ WantInterfaceEvents wantInterfaceEvents = new WantInterfaceEvents();
+ wantInterfaceEvents.pid = 1;
+ wantInterfaceEvents.enableDisable = 0;
+ return wantInterfaceEvents;
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/Readme.txt b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/Readme.txt
new file mode 100644
index 00000000..10c603f5
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/Readme.txt
@@ -0,0 +1,17 @@
+This package contains basic examples for jvpp. To run the examples:
+
+- Make sure VPP is running
+- From VPP's build-root/ folder execute:
+ - release version: sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-core-17.10.jar io.fd.vpp.jvpp.core.examples.[test name]
+ - debug version: sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-core-17.10.jar io.fd.vpp.jvpp.core.examples.[test name]
+
+Available examples:
+CallbackApiExample - Similar to ControlPingTest, invokes more complex calls (e.g. interface dump) using low level JVpp APIs
+CallbackJVppFacadeNotificationExample - Example of interface notifications using Callback based JVpp facade
+CallbackJVppFacadeExample - Execution of more complex calls using Callback based JVpp facade
+CallbackNotificationApiExample - Example of interface notifications using low level JVpp APIs
+CreateSubInterfaceExample - Example of sub-interface creation
+FutureApiNotificationExample - Example of interface notifications using Future based JVpp facade
+FutureApiExample - Execution of more complex calls using Future based JVpp facade
+L2AclExample - Example of L2 ACL creation
+LispAdjacencyExample - Example of lisp adjacency creation and read (custom vpe.api type support showcase)
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/CallbackApiTest.java
new file mode 100644
index 00000000..493116c8
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+
+import java.util.logging.Logger;
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for core plugin");
+ testControlPing(args[0], new JVppCoreImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java
new file mode 100644
index 00000000..d3acecc2
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.core.test;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.core.JVppCoreImpl;
+import io.fd.vpp.jvpp.core.dto.BridgeDomainDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.BridgeDomainDump;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndex;
+import io.fd.vpp.jvpp.core.dto.GetNodeIndexReply;
+import io.fd.vpp.jvpp.core.dto.ShowVersion;
+import io.fd.vpp.jvpp.core.dto.ShowVersionReply;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetails;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDetailsReplyDump;
+import io.fd.vpp.jvpp.core.dto.SwInterfaceDump;
+import io.fd.vpp.jvpp.core.future.FutureJVppCoreFacade;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Future;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testFutureApi(args);
+ }
+
+ private static void testFutureApi(String[] args) throws Exception {
+ LOG.info("Testing Java future API for core plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testEmptyBridgeDomainDump(jvppFacade);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testEmptyBridgeDomainDump(final FutureJVppCoreFacade jvpp) throws Exception {
+ LOG.info("Sending BridgeDomainDump request...");
+ final BridgeDomainDump request = new BridgeDomainDump();
+ request.bdId = -1; // dump call
+
+ final CompletableFuture<BridgeDomainDetailsReplyDump>
+ replyFuture = jvpp.bridgeDomainDump(request).toCompletableFuture();
+ final BridgeDomainDetailsReplyDump reply = replyFuture.get();
+
+ if (reply == null || reply.bridgeDomainDetails == null) {
+ throw new IllegalStateException("Received null response for empty dump: " + reply);
+ } else {
+ LOG.info(
+ String.format(
+ "Received bridge-domain dump reply with list of bridge-domains: %s",
+ reply.bridgeDomainDetails));
+ }
+ }
+
+
+}
diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/Readme.txt b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/Readme.txt
new file mode 100644
index 00000000..b74cf60a
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/Readme.txt
@@ -0,0 +1,18 @@
+This package contains basic tests for jvpp. To run the tests:
+
+- Make sure VPP is running
+- From VPP's build-root/ folder execute:
+ - release version: sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-core-17.10.jar io.fd.vpp.jvpp.core.test.[test name]
+ - debug version: sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-core-17.10.jar io.fd.vpp.jvpp.core.test.[test name]
+
+Available tests:
+CallbackApiTest - Similar to ControlPingTest, invokes more complex calls (e.g. interface dump) using low level JVpp APIs
+CallbackJVppFacadeNotificationTest - Tests interface notifications using Callback based JVpp facade
+CallbackJVppFacadeTest - Execution of more complex calls using Callback based JVpp facade
+CallbackNotificationApiTest - Tests interface notifications using low level JVpp APIs
+ControlPingTest - Simple test executing a single control ping using low level JVpp APIs
+CreateSubInterfaceTest - Tests sub-interface creation
+FutureApiNotificationTest - Tests interface notifications using Future based JVpp facade
+FutureApiTest - Execution of more complex calls using Future based JVpp facade
+L2AclTest - Tests L2 ACL creation
+LispAdjacencyTest - Tests lisp adjacency creation and read (custom vpe.api type support showcase)
diff --git a/src/vpp-api/java/jvpp-core/jvpp_core.c b/src/vpp-api/java/jvpp-core/jvpp_core.c
new file mode 100644
index 00000000..e57c62a3
--- /dev/null
+++ b/src/vpp-api/java/jvpp-core/jvpp_core.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+#include <jvpp-common/jvpp_common.h>
+
+// TODO: generate jvpp_plugin_name.c files (or at least reuse plugin's main structure)
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} core_main_t;
+
+core_main_t core_main __attribute__((aligned (64)));
+
+#include "io_fd_vpp_jvpp_core_JVppCoreImpl.h"
+#include "jvpp_core_gen.h"
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_core_JVppCoreImpl_init0
+(JNIEnv * env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ core_main_t * plugin_main = &core_main;
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N); \
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_core_JVppCoreImpl_close0
+(JNIEnv *env, jclass clazz) {
+ core_main_t * plugin_main = &core_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
+
+
+
diff --git a/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.c b/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.c
new file mode 100644
index 00000000..12b3090e
--- /dev/null
+++ b/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <gtpu/gtpu_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <gtpu/gtpu_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-gtpu/io_fd_vpp_jvpp_gtpu_JVppGtpuImpl.h"
+#include "jvpp_gtpu.h"
+#include "jvpp-gtpu/jvpp_gtpu_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_gtpu_JVppgtpuImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_gtpu_JVppGtpuImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ gtpu_main_t * plugin_main = &gtpu_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_gtpu_JVppGtpuImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = (unix_shared_memory_queue_t *)queue_address;
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N); \
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_gtpu_JVppGtpuImpl_close0
+(JNIEnv *env, jclass clazz) {
+ gtpu_main_t * plugin_main = &gtpu_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP ACL */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP ACL */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.h b/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.h
new file mode 100644
index 00000000..447776ce
--- /dev/null
+++ b/src/vpp-api/java/jvpp-gtpu/jvpp_gtpu.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_gtpu_h__
+#define __included_jvpp_gtpu_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-gtpu */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} gtpu_main_t;
+
+gtpu_main_t gtpu_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_gtpu_h__ */
diff --git a/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/IoamExportApiExample.java b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/IoamExportApiExample.java
new file mode 100644
index 00000000..2f5b7dbb
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/IoamExportApiExample.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamexport.examples;
+
+import java.net.InetAddress;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.ioamexport.JVppIoamexportImpl;
+import io.fd.vpp.jvpp.ioamexport.future.FutureJVppIoamexportFacade;
+import io.fd.vpp.jvpp.ioamexport.dto.IoamExportIp6EnableDisable;
+import io.fd.vpp.jvpp.ioamexport.dto.IoamExportIp6EnableDisableReply;
+
+public class IoamExportApiExample {
+
+ public static void main(String[] args) throws Exception {
+ ioamExportTestApi();
+ }
+
+ private static void ioamExportTestApi() throws Exception {
+ System.out.println("Testing Java API for ioam export plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("ioamExportApiExample");
+ final JVpp jvpp = new JVppIoamexportImpl()) {
+ FutureJVppIoamexportFacade ioamexportJvpp = new FutureJVppIoamexportFacade(registry,jvpp);
+ System.out.println("Sending ioam export request...");
+ IoamExportIp6EnableDisable request = new IoamExportIp6EnableDisable();
+ request.isDisable = 0;
+ InetAddress collectorAddress = InetAddress.getByName("2001:0DB8:AC10:FE01:0000:0000:0000:0000");
+ InetAddress srcAddress = InetAddress.getByName("2001:0DB8:AC10:FE01:0000:0000:0000:0001");
+ request.collectorAddress = collectorAddress.getAddress();
+ request.srcAddress = srcAddress.getAddress();
+ IoamExportIp6EnableDisableReply reply = ioamexportJvpp.ioamExportIp6EnableDisable(request).toCompletableFuture().get();
+ System.out.printf("IoamExportIp6EnableDisableReply = "+reply.toString()+"%n");
+
+ Thread.sleep(1000);
+
+ System.out.println("Disconnecting...");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/Readme.txt b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/Readme.txt
new file mode 100644
index 00000000..f2dfe917
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/examples/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-ioamexport-17.10.jar io.fd.vpp.jvpp.ioamexport.examples.IoamExportApiExample
+debug vresion:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamexport-17.10.jar io.fd.vpp.jvpp.ioamexport.examples.IoamExportApiExample
diff --git a/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/CallbackApiTest.java
new file mode 100644
index 00000000..ba49d77d
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamexport.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.ioamexport.JVppIoamexportImpl;
+
+import java.util.logging.Logger;
+
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for ioamexport plugin");
+ testControlPing(args[0], new JVppIoamexportImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/FutureApiTest.java b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/FutureApiTest.java
new file mode 100644
index 00000000..048d2445
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/FutureApiTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamexport.test;
+
+
+import io.fd.vpp.jvpp.Assertions;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.ioamexport.JVppIoamexportImpl;
+import io.fd.vpp.jvpp.ioamexport.dto.IoamExportIp6EnableDisable;
+import io.fd.vpp.jvpp.ioamexport.dto.IoamExportIp6EnableDisableReply;
+import io.fd.vpp.jvpp.ioamexport.future.FutureJVppIoamexportFacade;
+
+import java.util.concurrent.Future;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi(args);
+ }
+
+ private static void testCallbackApi(String[] args) throws Exception {
+ LOG.info("Testing Java callback API for ioamexport plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppIoamexportFacade jvpp = new FutureJVppIoamexportFacade(registry, new JVppIoamexportImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testIoamExportIp6EnableDisable(jvpp);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testIoamExportIp6EnableDisable(FutureJVppIoamexportFacade jvpp) throws Exception {
+ LOG.info("Sending IoamExportIp6EnableDisable request...");
+ final IoamExportIp6EnableDisable request = new IoamExportIp6EnableDisable();
+
+ final Future<IoamExportIp6EnableDisableReply> replyFuture = jvpp.ioamExportIp6EnableDisable(request).toCompletableFuture();
+ final IoamExportIp6EnableDisableReply reply = replyFuture.get();
+
+ Assertions.assertNotNull(reply);
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/Readme.txt b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/Readme.txt
new file mode 100644
index 00000000..820071a8
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/io/fd/vpp/jvpp/ioamexport/test/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamexport-17.10.jar io.fd.vpp.jvpp.ioamexport.test.[test-name]
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamexport-17.10.jar io.fd.vpp.jvpp.ioamexport.test.[test-name]
diff --git a/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.c b/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.c
new file mode 100644
index 00000000..cf4499d5
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <ioam/export/ioam_export_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-ioamexport/io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl.h"
+#include "jvpp_ioam_export.h"
+#include "jvpp-ioamexport/jvpp_ioamexport_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ ioamexport_main_t * plugin_main = &ioamexport_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N);
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioamexport_JVppIoamexportImpl_close0
+(JNIEnv *env, jclass clazz) {
+ ioamexport_main_t * plugin_main = &ioamexport_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP iOAM EXPORT */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP iOAM EXPORT */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.h b/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.h
new file mode 100644
index 00000000..8b243def
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamexport/jvpp_ioam_export.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_ioam_export_h__
+#define __included_jvpp_ioam_export_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-IOAM-EXPORT */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} ioamexport_main_t;
+
+ioamexport_main_t ioamexport_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_ioam_export_h__ */
diff --git a/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/IoamPotApiExample.java b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/IoamPotApiExample.java
new file mode 100644
index 00000000..e97d24fb
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/IoamPotApiExample.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioampot.examples;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.ioampot.JVppIoampotImpl;
+import io.fd.vpp.jvpp.ioampot.callback.PotProfileAddCallback;
+import io.fd.vpp.jvpp.ioampot.dto.PotProfileAdd;
+import io.fd.vpp.jvpp.ioampot.dto.PotProfileAddReply;
+import java.nio.charset.StandardCharsets;
+
+public class IoamPotApiExample {
+
+ static class IoamPotTestCallback implements PotProfileAddCallback {
+
+ @Override
+ public void onPotProfileAddReply(final PotProfileAddReply reply) {
+ System.out.printf("Received PotProfileAddReply reply: context=%d%n",
+ reply.context);
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n", ex.getMethodName(),
+ ex.getCtxId(), ex.getErrorCode());
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ ioamPotTestApi();
+ }
+
+ private static void ioamPotTestApi() throws Exception {
+ System.out.println("Testing Java API for ioam pot plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("ioamPotApiExample");
+ final JVpp jvpp = new JVppIoampotImpl()) {
+ registry.register(jvpp, new IoamPotTestCallback());
+
+ System.out.println("Sending ioam pot profile add request...");
+ PotProfileAdd request = new PotProfileAdd();
+ request.id = 0;
+ request.validator = 4;
+ request.secretKey = 1;
+ request.secretShare = 2;
+ request.prime = 1234;
+ request.maxBits = 53;
+ request.lpc = 1234;
+ request.polynomialPublic = 1234;
+ request.listNameLen = (byte)"test pot profile".getBytes(StandardCharsets.UTF_8).length;
+ request.listName = "test pot profile".getBytes(StandardCharsets.UTF_8);
+ final int result = jvpp.send(request);
+ System.out.printf("PotProfileAdd send result = %d%n", result);
+
+ Thread.sleep(1000);
+
+ System.out.println("Disconnecting...");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/Readme.txt b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/Readme.txt
new file mode 100644
index 00000000..e91550bf
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/examples/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-ioampot-17.10.jar io.fd.vpp.jvpp.ioampot.examples.IoamPotApiExample
+debug vresion:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioampot-17.10.jar io.fd.vpp.jvpp.ioampot.examples.IoamPotApiExample
diff --git a/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/CallbackApiTest.java
new file mode 100644
index 00000000..20b85d89
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioampot.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.ioampot.JVppIoampotImpl;
+
+import java.util.logging.Logger;
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for ioampot plugin");
+ testControlPing(args[0], new JVppIoampotImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/FutureApiTest.java b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/FutureApiTest.java
new file mode 100644
index 00000000..6401c678
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/FutureApiTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioampot.test;
+
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.ioampot.JVppIoampotImpl;
+import io.fd.vpp.jvpp.ioampot.dto.PotProfileShowConfigDetailsReplyDump;
+import io.fd.vpp.jvpp.ioampot.dto.PotProfileShowConfigDump;
+import io.fd.vpp.jvpp.ioampot.future.FutureJVppIoampotFacade;
+
+import java.util.concurrent.Future;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(io.fd.vpp.jvpp.ioampot.test.FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi(args);
+ }
+
+ private static void testCallbackApi(String[] args) throws Exception {
+ LOG.info("Testing Java callback API for ioampot plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppIoampotFacade jvpp = new FutureJVppIoampotFacade(registry, new JVppIoampotImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testPotProfileShowConfigDump(jvpp);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testPotProfileShowConfigDump(FutureJVppIoampotFacade jvpp) throws Exception {
+ LOG.info("Sending PotProfileShowConfigDump request...");
+ final PotProfileShowConfigDump request = new PotProfileShowConfigDump();
+
+ final Future<PotProfileShowConfigDetailsReplyDump> replyFuture = jvpp.potProfileShowConfigDump(request).toCompletableFuture();
+ final PotProfileShowConfigDetailsReplyDump reply = replyFuture.get();
+
+ if (reply == null || reply.potProfileShowConfigDetails == null) {
+ throw new IllegalStateException("Received null response for empty dump: " + reply);
+ } else {
+ LOG.info(
+ String.format(
+ "Received pot profile show config dump reply: %s",
+ reply.potProfileShowConfigDetails));
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/Readme.txt b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/Readme.txt
new file mode 100644
index 00000000..f3cae262
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/io/fd/vpp/jvpp/ioampot/test/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioampot-17.10.jar io.fd.vpp.jvpp.ioampot.test.[test-name]
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioampot-17.10.jar io.fd.vpp.jvpp.ioampot.test.[test-name]
diff --git a/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.c b/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.c
new file mode 100644
index 00000000..8f396989
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <ioam/lib-pot/pot_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-ioampot/io_fd_vpp_jvpp_ioampot_JVppIoampotImpl.h"
+#include "jvpp_ioam_pot.h"
+#include "jvpp-ioampot/jvpp_ioampot_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_ioampot_JVppIoampotImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioampot_JVppIoampotImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ ioampot_main_t * plugin_main = &ioampot_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_ioampot_JVppIoampotImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N);
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioampot_JVppIoampotImpl_close0
+(JNIEnv *env, jclass clazz) {
+ ioampot_main_t * plugin_main = &ioampot_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP iOAM POT */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP iOAM POT */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.h b/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.h
new file mode 100644
index 00000000..81e2a1bb
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioampot/jvpp_ioam_pot.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_ioam_pot_h__
+#define __included_jvpp_ioam_pot_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-IOAM-POT */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} ioampot_main_t;
+
+ioampot_main_t ioampot_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_ioam_pot_h__ */
diff --git a/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/IoamTraceApiExample.java b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/IoamTraceApiExample.java
new file mode 100644
index 00000000..827466bd
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/IoamTraceApiExample.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamtrace.examples;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.ioamtrace.future.FutureJVppIoamtraceFacade;
+import io.fd.vpp.jvpp.ioamtrace.JVppIoamtraceImpl;
+import io.fd.vpp.jvpp.ioamtrace.callback.TraceProfileAddCallback;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileAdd;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileAddReply;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileShowConfig;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileShowConfigReply;
+
+public class IoamTraceApiExample {
+
+ static class IoamTraceTestCallback implements TraceProfileAddCallback {
+
+ @Override
+ public void onTraceProfileAddReply(final TraceProfileAddReply reply) {
+ System.out.printf("Received TraceProfileAddReply reply: context=%d%n",
+ reply.context);
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n", ex.getMethodName(),
+ ex.getCtxId(), ex.getErrorCode());
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ ioamTraceTestApi();
+ }
+
+ private static void ioamTraceTestApi() throws Exception {
+ System.out.println("Testing Java API for ioam trace plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("ioamTraceApiTest");
+ final JVpp jvpp = new JVppIoamtraceImpl()) {
+ FutureJVppIoamtraceFacade ioamtraceJvpp = new FutureJVppIoamtraceFacade(registry,jvpp);
+
+ System.out.println("Sending ioam trace profile add request...");
+ TraceProfileAdd request = new TraceProfileAdd();
+ request.traceType = 0x1f;
+ request.numElts = 4;
+ request.nodeId = 1;
+ request.traceTsp = 2;
+ request.appData = 1234;
+ final int result = jvpp.send(request);
+ System.out.printf("TraceProfileAdd send result = %d%n", result);
+
+ Thread.sleep(1000);
+
+ TraceProfileShowConfig showRequest = new TraceProfileShowConfig();
+ TraceProfileShowConfigReply reply = ioamtraceJvpp.traceProfileShowConfig(showRequest).toCompletableFuture().get();
+ System.out.printf("TraceProfileShowConfig result = "+ reply.toString());
+
+ System.out.println("Disconnecting...");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/Readme.txt b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/Readme.txt
new file mode 100644
index 00000000..e8c3907e
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/examples/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-ioamtrace-17.10.jar io.fd.vpp.jvpp.ioamtrace.examples.IoamTraceApiExample
+debug vresion:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamtrace-17.10.jar io.fd.vpp.jvpp.ioamtrace.examples.IoamTraceApiExample
diff --git a/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/CallbackApiTest.java
new file mode 100644
index 00000000..4a71db52
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamtrace.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.ioamtrace.JVppIoamtraceImpl;
+
+import java.util.logging.Logger;
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for ioamtrace plugin");
+ testControlPing(args[0], new JVppIoamtraceImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/FutureApiTest.java b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/FutureApiTest.java
new file mode 100644
index 00000000..4e13ed1f
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/FutureApiTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.ioamtrace.test;
+
+
+import io.fd.vpp.jvpp.Assertions;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.ioamtrace.JVppIoamtraceImpl;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileShowConfig;
+import io.fd.vpp.jvpp.ioamtrace.dto.TraceProfileShowConfigReply;
+import io.fd.vpp.jvpp.ioamtrace.future.FutureJVppIoamtraceFacade;
+
+import java.util.concurrent.Future;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(io.fd.vpp.jvpp.ioamtrace.test.FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi(args);
+ }
+
+ private static void testCallbackApi(String[] args) throws Exception {
+ LOG.info("Testing Java callback API for ioamtrace plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppIoamtraceFacade jvpp = new FutureJVppIoamtraceFacade(registry, new JVppIoamtraceImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testTraceProfileShowConfig(jvpp);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testTraceProfileShowConfig(FutureJVppIoamtraceFacade jvpp) throws Exception {
+ LOG.info("Sending TraceProfileShowConfig request...");
+ final TraceProfileShowConfig request = new TraceProfileShowConfig();
+
+ final Future<TraceProfileShowConfigReply> replyFuture = jvpp.traceProfileShowConfig(request).toCompletableFuture();
+ final TraceProfileShowConfigReply reply = replyFuture.get();
+
+ Assertions.assertNotNull(reply);
+ }
+}
diff --git a/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/Readme.txt b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/Readme.txt
new file mode 100644
index 00000000..9a1ba829
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/io/fd/vpp/jvpp/ioamtrace/test/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamtrace-17.10.jar io.fd.vpp.jvpp.ioamtrace.test.[test-name]
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-ioamtrace-17.10.jar io.fd.vpp.jvpp.ioamtrace.test.[test-name]
diff --git a/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.c b/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.c
new file mode 100644
index 00000000..f53937e7
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <ioam/lib-trace/trace_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-ioamtrace/io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl.h"
+#include "jvpp_ioam_trace.h"
+#include "jvpp-ioamtrace/jvpp_ioamtrace_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ ioamtrace_main_t * plugin_main = &ioamtrace_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N);
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_ioamtrace_JVppIoamtraceImpl_close0
+(JNIEnv *env, jclass clazz) {
+ ioamtrace_main_t * plugin_main = &ioamtrace_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP iOAM Trace */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP iOAM Trace */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.h b/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.h
new file mode 100644
index 00000000..cb0b27e1
--- /dev/null
+++ b/src/vpp-api/java/jvpp-ioamtrace/jvpp_ioam_trace.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_ioam_trace_h__
+#define __included_jvpp_ioam_trace_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-IOAM-TRACE */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} ioamtrace_main_t;
+
+ioamtrace_main_t ioamtrace_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_ioam_trace_h__ */
diff --git a/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/CallbackApiExample.java b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/CallbackApiExample.java
new file mode 100644
index 00000000..e4d5cb33
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/CallbackApiExample.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.nat.examples;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.VppCallbackException;
+import io.fd.vpp.jvpp.nat.JVppNatImpl;
+import io.fd.vpp.jvpp.nat.callback.Nat44InterfaceAddDelFeatureCallback;
+import io.fd.vpp.jvpp.nat.dto.Nat44InterfaceAddDelFeature;
+import io.fd.vpp.jvpp.nat.dto.Nat44InterfaceAddDelFeatureReply;
+
+public class CallbackApiExample {
+
+ static class TestCallback implements Nat44InterfaceAddDelFeatureCallback {
+
+ @Override
+ public void onNat44InterfaceAddDelFeatureReply(final Nat44InterfaceAddDelFeatureReply msg) {
+ System.out.printf("Received Nat44InterfaceAddDelFeatureReply: context=%d%n",
+ msg.context);
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, context=%d, retval=%d%n", ex.getMethodName(),
+ ex.getCtxId(), ex.getErrorCode());
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi();
+ }
+
+ private static void testCallbackApi() throws Exception {
+ System.out.println("Testing Java callback API for nat plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("NatCallbackApiTest");
+ final JVpp jvpp = new JVppNatImpl()) {
+ registry.register(jvpp, new TestCallback());
+
+ System.out.println("Sending Nat44InterfaceAddDelFeature request...");
+ Nat44InterfaceAddDelFeature request = new Nat44InterfaceAddDelFeature();
+ request.isAdd = 1;
+ request.isInside = 1;
+ request.swIfIndex = 1;
+ final int result = jvpp.send(request);
+ System.out.printf("Nat44InterfaceAddDelFeature send result = %d%n", result);
+
+ Thread.sleep(1000);
+
+ System.out.println("Disconnecting...");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/Readme.txt b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/Readme.txt
new file mode 100644
index 00000000..ac75e04e
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/examples/Readme.txt
@@ -0,0 +1 @@
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp-native/vpp/vpp-api/java/jvpp-nat-17.10.jar io.fd.vpp.jvpp.nat.examples.CallbackApiExample
diff --git a/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/CallbackApiTest.java b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/CallbackApiTest.java
new file mode 100644
index 00000000..a6f82148
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/CallbackApiTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.nat.test;
+
+import io.fd.vpp.jvpp.AbstractCallbackApiTest;
+import io.fd.vpp.jvpp.nat.JVppNatImpl;
+
+import java.util.logging.Logger;
+
+public class CallbackApiTest extends AbstractCallbackApiTest {
+
+ private static Logger LOG = Logger.getLogger(CallbackApiTest.class.getName());
+
+
+ public static void main(String[] args) throws Exception {
+ LOG.info("Testing ControlPing using Java callback API for core plugin");
+ testControlPing(args[0], new JVppNatImpl());
+ }
+}
diff --git a/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/FutureApiTest.java b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/FutureApiTest.java
new file mode 100644
index 00000000..8643dcf4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/FutureApiTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.nat.test;
+
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+import io.fd.vpp.jvpp.nat.JVppNatImpl;
+import io.fd.vpp.jvpp.nat.dto.SnatAddressDetailsReplyDump;
+import io.fd.vpp.jvpp.nat.dto.SnatAddressDump;
+import io.fd.vpp.jvpp.nat.future.FutureJVppNatFacade;
+
+import java.util.concurrent.Future;
+import java.util.logging.Logger;
+
+public class FutureApiTest {
+
+ private static final Logger LOG = Logger.getLogger(io.fd.vpp.jvpp.nat.test.FutureApiTest.class.getName());
+
+ public static void main(String[] args) throws Exception {
+ testCallbackApi(args);
+ }
+
+ private static void testCallbackApi(String[] args) throws Exception {
+ LOG.info("Testing Java callback API for snat plugin");
+ try (final JVppRegistry registry = new JVppRegistryImpl("FutureApiTest", args[0]);
+ final FutureJVppNatFacade jvpp = new FutureJVppNatFacade(registry, new JVppNatImpl())) {
+ LOG.info("Successfully connected to VPP");
+
+ testAclDump(jvpp);
+
+ LOG.info("Disconnecting...");
+ }
+ }
+
+ private static void testAclDump(FutureJVppNatFacade jvpp) throws Exception {
+ LOG.info("Sending SnatAddressDump request...");
+ final SnatAddressDump request = new SnatAddressDump();
+
+ final Future<SnatAddressDetailsReplyDump> replyFuture = jvpp.snatAddressDump(request).toCompletableFuture();
+ final SnatAddressDetailsReplyDump reply = replyFuture.get();
+
+ if (reply == null || reply.snatAddressDetails == null) {
+ throw new IllegalStateException("Received null response for empty dump: " + reply);
+ } else {
+ LOG.info(
+ String.format(
+ "Received snat address dump reply with list of snat address: %s",
+ reply.snatAddressDetails));
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/Readme.txt b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/Readme.txt
new file mode 100644
index 00000000..6f758089
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/io/fd/vpp/jvpp/nat/test/Readme.txt
@@ -0,0 +1,4 @@
+release version:
+sudo java -cp build-vpp-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-nat-17.10.jar io.fd.vpp.jvpp.nat.test.[test-name]
+debug version:
+sudo java -cp build-vpp_debug-native/vpp/vpp-api/java/jvpp-registry-17.10.jar:build-vpp_debug-native/vpp/vpp-api/java/jvpp-nat-17.10.jar io.fd.vpp.jvpp.nat.test.[test-name]
diff --git a/src/vpp-api/java/jvpp-nat/jvpp_nat.c b/src/vpp-api/java/jvpp-nat/jvpp_nat.c
new file mode 100644
index 00000000..85217f04
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/jvpp_nat.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <nat/nat_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <nat/nat_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-nat/io_fd_vpp_jvpp_nat_JVppNatImpl.h"
+#include "jvpp_nat.h"
+#include "jvpp-nat/jvpp_nat_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_nat_JVppNatImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_nat_JVppNatImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ nat_main_t * plugin_main = &nat_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_nat_JVppNatImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = uword_to_pointer (queue_address, unix_shared_memory_queue_t *);
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N);
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_nat_JVppNatImpl_close0
+(JNIEnv *env, jclass clazz) {
+ nat_main_t * plugin_main = &nat_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP SNAT */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP SNAT */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-nat/jvpp_nat.h b/src/vpp-api/java/jvpp-nat/jvpp_nat.h
new file mode 100644
index 00000000..c8f6b683
--- /dev/null
+++ b/src/vpp-api/java/jvpp-nat/jvpp_nat.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_nat_h__
+#define __included_jvpp_nat_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-NAT */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} nat_main_t;
+
+nat_main_t nat_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_nat_h__ */
diff --git a/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.c b/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.c
new file mode 100644
index 00000000..c9c30305
--- /dev/null
+++ b/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <pppoe/pppoe_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <pppoe/pppoe_all_api_h.h>
+#undef vl_typedefs
+
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#if VPPJNI_DEBUG == 1
+ #define DEBUG_LOG(...) clib_warning(__VA_ARGS__)
+#else
+ #define DEBUG_LOG(...)
+#endif
+
+#include <jvpp-common/jvpp_common.h>
+
+#include "jvpp-pppoe/io_fd_vpp_jvpp_pppoe_JVppPppoeImpl.h"
+#include "jvpp_pppoe.h"
+#include "jvpp-pppoe/jvpp_pppoe_gen.h"
+
+/*
+ * Class: io_fd_vpp_jvpp_pppoe_JVpppppoeImpl
+ * Method: init0
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_pppoe_JVppPppoeImpl_init0
+ (JNIEnv *env, jclass clazz, jobject callback, jlong queue_address, jint my_client_index) {
+ pppoe_main_t * plugin_main = &pppoe_main;
+ clib_warning ("Java_io_fd_vpp_jvpp_pppoe_JVppPppoeImpl_init0");
+
+ plugin_main->my_client_index = my_client_index;
+ plugin_main->vl_input_queue = (unix_shared_memory_queue_t *)queue_address;
+
+ plugin_main->callbackObject = (*env)->NewGlobalRef(env, callback);
+ plugin_main->callbackClass = (jclass)(*env)->NewGlobalRef(env, (*env)->GetObjectClass(env, callback));
+
+ // verify API has not changed since jar generation
+ #define _(N) \
+ get_message_id(env, #N); \
+ foreach_supported_api_message;
+ #undef _
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(get_message_id(env, #N), #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ vl_noop_handler, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_reply_handler;
+ #undef _
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_pppoe_JVppPppoeImpl_close0
+(JNIEnv *env, jclass clazz) {
+ pppoe_main_t * plugin_main = &pppoe_main;
+
+ // cleanup:
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackClass);
+ (*env)->DeleteGlobalRef(env, plugin_main->callbackObject);
+
+ plugin_main->callbackClass = NULL;
+ plugin_main->callbackObject = NULL;
+}
+
+/* Attach thread to JVM and cache class references when initiating JVPP ACL */
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ if (cache_class_references(env) != 0) {
+ clib_warning ("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ return JNI_VERSION_1_8;
+}
+
+/* Clean up cached references when disposing JVPP ACL */
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+ delete_class_references(env);
+}
diff --git a/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.h b/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.h
new file mode 100644
index 00000000..4523ba9c
--- /dev/null
+++ b/src/vpp-api/java/jvpp-pppoe/jvpp_pppoe.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_jvpp_pppoe_h__
+#define __included_jvpp_pppoe_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <jni.h>
+
+/* Global state for JVPP-pppoe */
+typedef struct {
+ /* Pointer to shared memory queue */
+ unix_shared_memory_queue_t * vl_input_queue;
+
+ /* VPP api client index */
+ u32 my_client_index;
+
+ /* Callback object and class references enabling asynchronous Java calls */
+ jobject callbackObject;
+ jclass callbackClass;
+
+} pppoe_main_t;
+
+pppoe_main_t pppoe_main __attribute__((aligned (64)));
+
+
+#endif /* __included_jvpp_pppoe_h__ */
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/AbstractCallbackApiTest.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/AbstractCallbackApiTest.java
new file mode 100644
index 00000000..d221d1e0
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/AbstractCallbackApiTest.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import io.fd.vpp.jvpp.callback.ControlPingCallback;
+import io.fd.vpp.jvpp.dto.ControlPing;
+import io.fd.vpp.jvpp.dto.ControlPingReply;
+
+public abstract class AbstractCallbackApiTest {
+
+ private static int receivedPingCount = 0;
+ private static int errorPingCount = 0;
+
+ public static void testControlPing(String shm_prefix, JVpp jvpp) throws Exception {
+ try (JVppRegistry registry = new JVppRegistryImpl("CallbackApiTest", shm_prefix)) {
+
+ registry.register(jvpp, new ControlPingCallback() {
+ @Override
+ public void onControlPingReply(final ControlPingReply reply) {
+ System.out.printf("Received ControlPingReply: %s%n", reply);
+ receivedPingCount++;
+ }
+
+ @Override
+ public void onError(VppCallbackException ex) {
+ System.out.printf("Received onError exception: call=%s, reply=%d, context=%d ", ex.getMethodName(),
+ ex.getErrorCode(), ex.getCtxId());
+ errorPingCount++;
+ }
+
+ });
+ System.out.println("Successfully connected to VPP");
+ Thread.sleep(1000);
+
+ System.out.println("Sending control ping using JVppRegistry");
+ registry.controlPing(jvpp.getClass());
+
+ Thread.sleep(2000);
+
+ System.out.println("Sending control ping using JVpp plugin");
+ jvpp.send(new ControlPing());
+
+ Thread.sleep(2000);
+ System.out.println("Disconnecting...");
+ Assertions.assertEquals(2, receivedPingCount);
+ Assertions.assertEquals(0, errorPingCount);
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/Assertions.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/Assertions.java
new file mode 100644
index 00000000..f8b591f5
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/Assertions.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+public class Assertions {
+
+ public static void assertEquals(final int expected, final int actual) {
+ if (expected != actual) {
+ throw new IllegalArgumentException(String.format("Expected[%s]/Actual[%s]", expected, actual));
+ }
+ }
+
+ public static void assertNotNull(final Object value) {
+ if (value == null) {
+ throw new IllegalArgumentException("Variable is null");
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVpp.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVpp.java
new file mode 100644
index 00000000..55f25a7b
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVpp.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import io.fd.vpp.jvpp.callback.JVppCallback;
+import io.fd.vpp.jvpp.dto.ControlPing;
+import io.fd.vpp.jvpp.dto.JVppRequest;
+
+/**
+ * Base interface for plugin's Java API.
+ */
+public interface JVpp extends AutoCloseable {
+
+ /**
+ * Sends request to vpp.
+ *
+ * @param request request to be sent
+ * @return unique identifer of message in message queue
+ * @throws VppInvocationException when message could not be sent
+ */
+ int send(final JVppRequest request) throws VppInvocationException;
+
+ /**
+ * Initializes plugin's Java API.
+ *
+ * @param registry plugin registry
+ * @param callback called by vpe.api message handlers
+ * @param queueAddress address of vpp shared memory queue
+ * @param clientIndex vpp client identifier
+ */
+ void init(final JVppRegistry registry, final JVppCallback callback, final long queueAddress,
+ final int clientIndex);
+
+ /**
+ * Sends control_ping message.
+ *
+ * @param controlPing request DTO
+ * @return unique identifer of message in message queue
+ * @throws VppInvocationException when message could not be sent
+ */
+ int controlPing(final ControlPing controlPing) throws VppInvocationException;
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistry.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistry.java
new file mode 100644
index 00000000..6535db02
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistry.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import io.fd.vpp.jvpp.callback.JVppCallback;
+
+/**
+ * Manages VPP connection and stores plugin callbacks.
+ */
+public interface JVppRegistry extends AutoCloseable {
+
+ /**
+ * Vpp connection managed by the registry.
+ *
+ * @return representation of vpp connection
+ */
+ VppConnection getConnection();
+
+ /**
+ * Registers callback and initializes Java API for given plugin.
+ *
+ * @param jvpp plugin name
+ * @param callback callback provided by the plugin
+ * @throws NullPointerException if name or callback is null
+ * @throws IllegalArgumentException if plugin was already registered
+ */
+ void register(final JVpp jvpp, final JVppCallback callback);
+
+ /**
+ * Unregisters callback for the given plugin.
+ *
+ * @param name plugin name
+ * @throws NullPointerException if name is null
+ * @throws IllegalArgumentException if plugin was not registered
+ */
+ void unregister(final String name);
+
+ /**
+ * Returns callback registered for the plugin.
+ *
+ * @param name plugin name
+ * @return callback provided by the plugin
+ * @throws NullPointerException if name is null
+ * @throws IllegalArgumentException if plugin was not registered
+ */
+ JVppCallback get(final String name);
+
+ /**
+ * Sends control ping. Reply handler calls callback registered for give plugin.
+ *
+ * Control ping is used for initial RX thread to Java thread attachment
+ * that takes place in the plugin's JNI lib
+ * and to wrap dump message replies in one list.
+ *
+ * VPP plugins don't have to provide special control ping, therefore
+ * it is necessary to providing control ping support in JVppRegistry.
+
+ * @param clazz identifies plugin that should receive ping callback
+ * @return unique identifier of message in message queue
+ */
+ int controlPing(final Class<? extends JVpp> clazz) throws VppInvocationException;
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java
new file mode 100644
index 00000000..6e938ae3
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import static java.util.Objects.requireNonNull;
+
+import io.fd.vpp.jvpp.callback.ControlPingCallback;
+import io.fd.vpp.jvpp.callback.JVppCallback;
+import io.fd.vpp.jvpp.dto.ControlPingReply;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Default implementation of JVppRegistry.
+ */
+public final class JVppRegistryImpl implements JVppRegistry, ControlPingCallback {
+
+ private static final Logger LOG = Logger.getLogger(JVppRegistryImpl.class.getName());
+
+ private final VppJNIConnection connection;
+ // Unguarded concurrent map, no race conditions expected on top of that
+ private final Map<String, JVppCallback> pluginRegistry;
+ // Guarded by self
+ private final Map<Integer, ControlPingCallback> pingCalls;
+
+ public JVppRegistryImpl(final String clientName) throws IOException {
+ connection = new VppJNIConnection(clientName);
+ connection.connect();
+ pluginRegistry = new ConcurrentHashMap<>();
+ pingCalls = new HashMap<>();
+ }
+
+ public JVppRegistryImpl(final String clientName, final String shmPrefix) throws IOException {
+ connection = new VppJNIConnection(clientName, shmPrefix);
+ connection.connect();
+ pluginRegistry = new ConcurrentHashMap<>();
+ pingCalls = new HashMap<>();
+ }
+
+ @Override
+ public VppConnection getConnection() {
+ return connection;
+ }
+
+ @Override
+ public void register(final JVpp jvpp, final JVppCallback callback) {
+ requireNonNull(jvpp, "jvpp should not be null");
+ requireNonNull(callback, "Callback should not be null");
+ final String name = jvpp.getClass().getName();
+ if (pluginRegistry.containsKey(name)) {
+ throw new IllegalArgumentException(
+ String.format("Callback for plugin %s was already registered", name));
+ }
+ jvpp.init(this, callback, connection.getConnectionInfo().queueAddress,
+ connection.getConnectionInfo().clientIndex);
+ pluginRegistry.put(name, callback);
+ }
+
+ @Override
+ public void unregister(final String name) {
+ requireNonNull(name, "Plugin name should not be null");
+ final JVppCallback previous = pluginRegistry.remove(name);
+ assertPluginWasRegistered(name, previous);
+ }
+
+ @Override
+ public JVppCallback get(final String name) {
+ requireNonNull(name, "Plugin name should not be null");
+ JVppCallback value = pluginRegistry.get(name);
+ assertPluginWasRegistered(name, value);
+ return value;
+ }
+
+ private native int controlPing0() throws VppInvocationException;
+
+ @Override
+ public int controlPing(final Class<? extends JVpp> clazz) throws VppInvocationException {
+ connection.checkActive();
+ final String name = clazz.getName();
+
+ final ControlPingCallback callback = (ControlPingCallback) pluginRegistry.get(clazz.getName());
+ assertPluginWasRegistered(name, callback);
+
+ synchronized (pingCalls) {
+ int context = controlPing0();
+ if (context < 0) {
+ throw new VppInvocationException("controlPing", context);
+ }
+
+ pingCalls.put(context, callback);
+ return context;
+ }
+ }
+
+ @Override
+ public void onControlPingReply(final ControlPingReply reply) {
+ final ControlPingCallback callback;
+ synchronized (pingCalls) {
+ callback = pingCalls.remove(reply.context);
+ if (callback == null) {
+ LOG.log(Level.WARNING, "No callback was registered for reply context=" + reply.context + " Contexts waiting="
+ + pingCalls.keySet());
+ return;
+ }
+ }
+ // pass the reply to the callback registered by the ping caller
+ callback.onControlPingReply(reply);
+ }
+
+ @Override
+ public void onError(final VppCallbackException ex) {
+ final int ctxId = ex.getCtxId();
+ final ControlPingCallback callback;
+
+ synchronized (pingCalls) {
+ callback = pingCalls.get(ctxId);
+ }
+ if (callback == null) {
+ LOG.log(Level.WARNING, "No callback was registered for reply id={0} ", ctxId);
+ return;
+ }
+ // pass the error to the callback registered by the ping caller
+ callback.onError(ex);
+ }
+
+ private static void assertPluginWasRegistered(final String name, final JVppCallback value) {
+ if (value == null) {
+ throw new IllegalArgumentException(String.format("Callback for plugin %s is not registered", name));
+ }
+ }
+
+ @Override
+ public void close() throws Exception {
+ connection.close();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/NativeLibraryLoader.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/NativeLibraryLoader.java
new file mode 100644
index 00000000..ce6d1bfc
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/NativeLibraryLoader.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.attribute.PosixFilePermission;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Utility class for loading JNI libraries.
+ */
+public final class NativeLibraryLoader {
+
+ private static final Logger LOG = Logger.getLogger(NativeLibraryLoader.class.getName());
+
+ private NativeLibraryLoader() {
+ throw new UnsupportedOperationException("This utility class cannot be instantiated.");
+ }
+
+ /**
+ * Loads JNI library using class loader of the given class.
+ *
+ * @param libName name of the library to be loaded
+ */
+ public static void loadLibrary(final String libName, final Class clazz) throws IOException {
+ java.util.Objects.requireNonNull(libName, "libName should not be null");
+ java.util.Objects.requireNonNull(clazz, "clazz should not be null");
+ try (final InputStream is = clazz.getResourceAsStream('/' + libName)) {
+ if (is == null) {
+ throw new IOException("Failed to open library resource " + libName);
+ }
+ loadStream(libName, is);
+ }
+ }
+
+ private static void loadStream(final String libName, final InputStream is) throws IOException {
+ final Set<PosixFilePermission> perms = PosixFilePermissions.fromString("rwxr-x---");
+ final Path p = Files.createTempFile(libName, null, PosixFilePermissions.asFileAttribute(perms));
+ try {
+ Files.copy(is, p, StandardCopyOption.REPLACE_EXISTING);
+ Runtime.getRuntime().load(p.toString());
+ } catch (Exception e) {
+ throw new IOException("Failed to load library " + p, e);
+ } finally {
+ try {
+ Files.deleteIfExists(p);
+ } catch (IOException e) {
+ LOG.log(Level.WARNING, String.format("Failed to delete temporary file %s.", p), e);
+ }
+ }
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppBaseCallException.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppBaseCallException.java
new file mode 100644
index 00000000..7fc1682b
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppBaseCallException.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+
+/**
+ * Base exception representing failed operation of JVpp request call
+ */
+public abstract class VppBaseCallException extends Exception {
+ private final String methodName;
+ private final int errorCode;
+
+ /**
+ * Constructs an VppCallbackException with the specified api method name and error code.
+ *
+ * @param methodName name of a method, which invocation or execution failed
+ * @param errorCode negative error code value associated with this failure
+ * @throws NullPointerException if apiMethodName is null
+ */
+ public VppBaseCallException(final String methodName, final int errorCode) {
+ super(String.format("vppApi.%s failed with error code: %d", methodName, errorCode));
+ this.methodName = java.util.Objects.requireNonNull(methodName, "apiMethodName is null!");
+ this.errorCode = errorCode;
+ if(errorCode >= 0) {
+ throw new IllegalArgumentException("Error code must be < 0. Was " + errorCode +
+ " for " + methodName );
+ }
+ }
+
+ /**
+ * Constructs an VppCallbackException with the specified api method name, error description and error code.
+ *
+ * @param methodName name of a method, which invocation or execution failed
+ * @param message description of error reason
+ * @param errorCode negative error code value associated with this failure
+ * @throws NullPointerException if apiMethodName is null
+ */
+ public VppBaseCallException(final String methodName, final String message, final int errorCode) {
+ super(String.format("vppApi.%s failed: %s (error code: %d)", methodName,message, errorCode));
+ this.methodName = java.util.Objects.requireNonNull(methodName, "apiMethodName is null!");
+ this.errorCode = errorCode;
+ if(errorCode >= 0) {
+ throw new IllegalArgumentException("Error code must be < 0. Was " + errorCode +
+ " for " + methodName );
+ }
+ }
+
+ /**
+ * Returns name of a method, which invocation failed.
+ *
+ * @return method name
+ */
+ public String getMethodName() {
+ return methodName;
+ }
+
+ /**
+ * Returns the error code associated with this failure.
+ *
+ * @return a negative integer error code
+ */
+ public int getErrorCode() {
+ return errorCode;
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppCallbackException.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppCallbackException.java
new file mode 100644
index 00000000..adcc5d26
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppCallbackException.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+/**
+ * Callback Exception representing failed operation of JVpp request call
+ */
+public class VppCallbackException extends VppBaseCallException {
+ private final int ctxId;
+
+ /**
+ * Constructs an VppCallbackException with the specified api method name and error code.
+ *
+ * @param methodName name of a method, which invocation failed.
+ * @param message description of error reason
+ * @param ctxId api request context identifier
+ * @param errorCode negative error code value associated with this failure
+ * @throws NullPointerException if apiMethodName is null
+ */
+ public VppCallbackException(final String methodName, final String message, final int ctxId, final int errorCode ){
+ super(methodName, message, errorCode);
+ this.ctxId = ctxId;
+ }
+
+ /**
+ * Returns api request context identifier.
+ *
+ * @return value of context identifier
+ */
+ public int getCtxId() {
+ return ctxId;
+ }
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppConnection.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppConnection.java
new file mode 100644
index 00000000..e6fd3bdb
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppConnection.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import java.io.IOException;
+
+/**
+ * Representation of a management connection to VPP.
+ */
+public interface VppConnection extends AutoCloseable {
+
+ /**
+ * Opens VppConnection for communication with VPP.
+ *
+ * @throws IOException if connection is not established
+ */
+ void connect() throws IOException;
+
+ /**
+ * Checks if this instance connection is active.
+ *
+ * @throws IllegalStateException if this instance was disconnected.
+ */
+ void checkActive();
+
+ /**
+ * Closes Vpp connection.
+ */
+ @Override
+ void close();
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppInvocationException.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppInvocationException.java
new file mode 100644
index 00000000..a7ccb197
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppInvocationException.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+/**
+ * Exception thrown when Vpp jAPI method invocation failed.
+ */
+public class VppInvocationException extends VppBaseCallException {
+ /**
+ * Constructs an VppApiInvocationFailedException with the specified api method name and error code.
+ *
+ * @param methodName name of a method, which invocation failed.
+ * @param errorCode negative error code value associated with this failure
+ * @throws NullPointerException if apiMethodName is null
+ */
+ public VppInvocationException(final String methodName, final int errorCode) {
+ super(methodName, errorCode);
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java
new file mode 100644
index 00000000..6a414f36
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp;
+
+import static io.fd.vpp.jvpp.NativeLibraryLoader.loadLibrary;
+import static java.lang.String.format;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * JNI based representation of a management connection to VPP.
+ */
+public final class VppJNIConnection implements VppConnection {
+ private static final Logger LOG = Logger.getLogger(VppJNIConnection.class.getName());
+ private static final String DEFAULT_SHM_PREFIX = "/vpe-api";
+
+ static {
+ final String libName = "libjvpp_registry.so";
+ try {
+ loadLibrary(libName, VppJNIConnection.class);
+ } catch (IOException e) {
+ LOG.log(Level.SEVERE, format("Can't find vpp jni library: %s", libName), e);
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ private ConnectionInfo connectionInfo;
+
+ private final String clientName;
+ private final String shmPrefix;
+ private volatile boolean disconnected = false;
+
+ /**
+ * Create VPPJNIConnection instance for client connecting to VPP.
+ *
+ * @param clientName client name instance to be used for communication. Single connection per clientName is
+ * allowed.
+ */
+ public VppJNIConnection(final String clientName) {
+ this.clientName = Objects.requireNonNull(clientName, "Null clientName");
+ this.shmPrefix = DEFAULT_SHM_PREFIX;
+ }
+
+ public VppJNIConnection(final String clientName, final String shmPrefix) {
+ this.clientName = Objects.requireNonNull(clientName, "Null clientName");
+ this.shmPrefix = Objects.requireNonNull(shmPrefix, "Null shmPrefix");
+ }
+
+ /**
+ * Guarded by VppJNIConnection.class
+ */
+ private static final Map<String, VppJNIConnection> connections = new HashMap<>();
+
+ /**
+ * Initiate VPP connection for current instance
+ *
+ * Multiple instances are allowed since this class is not a singleton (VPP allows multiple management connections).
+ *
+ * However only a single connection per clientName is allowed.
+ *
+ * @throws IOException in case the connection could not be established
+ */
+
+ @Override
+ public void connect() throws IOException {
+ _connect(shmPrefix);
+ }
+
+ private void _connect(final String shmPrefix) throws IOException {
+ Objects.requireNonNull(shmPrefix, "Shared memory prefix must be defined");
+
+ synchronized (VppJNIConnection.class) {
+ if (connections.containsKey(clientName)) {
+ throw new IOException("Client " + clientName + " already connected");
+ }
+
+ connectionInfo = clientConnect(shmPrefix, clientName);
+ if (connectionInfo.status != 0) {
+ throw new IOException("Connection returned error " + connectionInfo.status);
+ }
+ connections.put(clientName, this);
+ }
+ }
+
+ @Override
+ public final void checkActive() {
+ if (disconnected) {
+ throw new IllegalStateException("Disconnected client " + clientName);
+ }
+ }
+
+ @Override
+ public final synchronized void close() {
+ if (!disconnected) {
+ disconnected = true;
+ try {
+ clientDisconnect();
+ } finally {
+ synchronized (VppJNIConnection.class) {
+ connections.remove(clientName);
+ }
+ }
+ }
+ }
+
+ public ConnectionInfo getConnectionInfo() {
+ return connectionInfo;
+ }
+
+ /**
+ * VPP connection information used by plugins to reuse the connection.
+ */
+ public static final class ConnectionInfo {
+ public final long queueAddress;
+ public final int clientIndex;
+ public final int status; // FIXME throw exception instead
+ public final int pid;
+
+ public ConnectionInfo(long queueAddress, int clientIndex, int status, int pid) {
+ this.queueAddress = queueAddress;
+ this.clientIndex = clientIndex;
+ this.status = status;
+ this.pid = pid;
+ }
+ }
+
+ private static native ConnectionInfo clientConnect(String shmPrefix, String clientName);
+
+ private static native void clientDisconnect();
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/ControlPingCallback.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/ControlPingCallback.java
new file mode 100644
index 00000000..efddfdbb
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/ControlPingCallback.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.callback;
+
+import io.fd.vpp.jvpp.dto.ControlPingReply;
+
+/**
+ * Represents callback for control_ping message.
+ */
+public interface ControlPingCallback extends JVppCallback {
+
+ void onControlPingReply(ControlPingReply reply);
+
+}
+
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppCallback.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppCallback.java
new file mode 100644
index 00000000..ae02063b
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppCallback.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.callback;
+import io.fd.vpp.jvpp.VppCallbackException;
+
+/**
+ * Base JVppCallback interface
+ */
+public interface JVppCallback {
+ /**
+ * onError callback handler used to report failing operation
+ * @param ex VppCallbackException object containing details about failing operation
+ */
+ void onError(VppCallbackException ex);
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppNotificationCallback.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppNotificationCallback.java
new file mode 100644
index 00000000..8ab0cb21
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/callback/JVppNotificationCallback.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.callback;
+
+/**
+* Notification callback
+*/
+public interface JVppNotificationCallback {
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/coverity/SuppressFBWarnings.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/coverity/SuppressFBWarnings.java
new file mode 100644
index 00000000..1e780bb4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/coverity/SuppressFBWarnings.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.coverity;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+/**
+ * Used to suppress FindBugs warnings found by Coverity. <br>
+ * We don't want extra dependency, so we define our own annotation version.
+ *
+ * @see <a href="https://sourceforge.net/p/findbugs/feature-requests/298/#5e88"/>Findbugs sourceforge</a>
+ */
+@Retention(RetentionPolicy.CLASS)
+public @interface SuppressFBWarnings {
+ /**
+ * The set of FindBugs warnings that are to be suppressed in annotated element. The value can be a bug category,
+ * kind or pattern.
+ */
+ String[] value() default {};
+
+ /**
+ * Optional documentation of the reason why the warning is suppressed
+ */
+ String justification() default "";
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPing.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPing.java
new file mode 100644
index 00000000..984e1674
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPing.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.VppInvocationException;
+
+/**
+ * Represents request DTO for control_ping message.
+ */
+public final class ControlPing implements JVppRequest {
+
+ @Override
+ public int send(final JVpp jvpp) throws VppInvocationException {
+ return jvpp.controlPing(this);
+ }
+
+}
+
+
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPingReply.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPingReply.java
new file mode 100644
index 00000000..61e4d0e4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/ControlPingReply.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+import java.util.Objects;
+
+/**
+ * Represents reply DTO for control_ping message.
+ */
+public final class ControlPingReply implements JVppReply<ControlPing> {
+
+ public int context;
+ public int clientIndex;
+ public int vpePid;
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ final ControlPingReply that = (ControlPingReply) o;
+ return context == that.context &&
+ clientIndex == that.clientIndex &&
+ vpePid == that.vpePid;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(context, clientIndex, vpePid);
+ }
+
+ @Override
+ public String toString() {
+ return "ControlPingReply{" +
+ "context=" + context +
+ ", clientIndex=" + clientIndex +
+ ", vpePid=" + vpePid +
+ '}';
+ }
+}
+
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppDump.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppDump.java
new file mode 100644
index 00000000..60b98984
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppDump.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+/**
+* Base interface for all dump requests
+*/
+public interface JVppDump extends JVppRequest {
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppNotification.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppNotification.java
new file mode 100644
index 00000000..5554f501
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppNotification.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+/**
+* Base interface for all notification DTOs
+*/
+public interface JVppNotification {
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReply.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReply.java
new file mode 100644
index 00000000..73f512d4
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReply.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+/**
+* Base interface for all reply DTOs
+*/
+public interface JVppReply<REQ extends JVppRequest> {
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReplyDump.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReplyDump.java
new file mode 100644
index 00000000..15111395
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppReplyDump.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+/**
+* Base interface for all dump replies
+*/
+public interface JVppReplyDump<REQ extends JVppRequest, RESP extends JVppReply<REQ>>
+ extends JVppReply<REQ> {
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppRequest.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppRequest.java
new file mode 100644
index 00000000..9b301da2
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/dto/JVppRequest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.dto;
+
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.VppInvocationException;
+
+/**
+* Base interface for all request DTOs
+*/
+public interface JVppRequest {
+
+ /**
+ * Invoke current operation asynchronously on VPP
+ *
+ * @return context id of this request. Can be used to track incoming response
+ */
+ int send(JVpp jvpp) throws VppInvocationException;
+
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/AbstractFutureJVppInvoker.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/AbstractFutureJVppInvoker.java
new file mode 100644
index 00000000..e7df528a
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/AbstractFutureJVppInvoker.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.future;
+
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionStage;
+import io.fd.vpp.jvpp.JVpp;
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.VppInvocationException;
+import io.fd.vpp.jvpp.dto.JVppDump;
+import io.fd.vpp.jvpp.dto.JVppReply;
+import io.fd.vpp.jvpp.dto.JVppReplyDump;
+import io.fd.vpp.jvpp.dto.JVppRequest;
+
+/**
+ * Future facade on top of JVpp
+ */
+public abstract class AbstractFutureJVppInvoker implements FutureJVppInvoker {
+
+ private final JVpp jvpp;
+ private final JVppRegistry registry;
+
+ /**
+ * Guarded by self
+ */
+ private final Map<Integer, CompletableFuture<? extends JVppReply<?>>> requests;
+
+ protected AbstractFutureJVppInvoker(final JVpp jvpp, final JVppRegistry registry,
+ final Map<Integer, CompletableFuture<? extends JVppReply<?>>> requestMap) {
+ this.jvpp = Objects.requireNonNull(jvpp, "jvpp should not be null");
+ this.registry = Objects.requireNonNull(registry, "registry should not be null");
+ // Request map represents the shared state between this facade and it's callback
+ // where facade puts futures in and callback completes + removes them
+ this.requests = Objects.requireNonNull(requestMap, "Null requestMap");
+ }
+
+ protected final Map<Integer, CompletableFuture<? extends JVppReply<?>>> getRequests() {
+ synchronized (requests) {
+ return requests;
+ }
+ }
+
+ // TODO use Optional in Future, java8
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <REQ extends JVppRequest, REPLY extends JVppReply<REQ>> CompletionStage<REPLY> send(REQ req) {
+ synchronized(requests) {
+ try {
+ final CompletableFuture<REPLY> replyCompletableFuture;
+ final int contextId = jvpp.send(req);
+
+ if(req instanceof JVppDump) {
+ throw new IllegalArgumentException("Send with empty reply dump has to be used in case of dump calls");
+ }
+ replyCompletableFuture = new CompletableFuture<>();
+ requests.put(contextId, replyCompletableFuture);
+
+ // TODO in case of timeouts/missing replies, requests from the map are not removed
+ // consider adding cancel method, that would remove requests from the map and cancel
+ // associated replyCompletableFuture
+
+ return replyCompletableFuture;
+ } catch (VppInvocationException ex) {
+ final CompletableFuture<REPLY> replyCompletableFuture = new CompletableFuture<>();
+ replyCompletableFuture.completeExceptionally(ex);
+ return replyCompletableFuture;
+ }
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <REQ extends JVppRequest, REPLY extends JVppReply<REQ>, DUMP extends JVppReplyDump<REQ, REPLY>> CompletionStage<DUMP> send(
+ REQ req, DUMP emptyReplyDump) {
+ synchronized(requests) {
+ try {
+ final CompletableDumpFuture<DUMP> replyCompletableFuture;
+ final int contextId = jvpp.send(req);
+
+ if(!(req instanceof JVppDump)) {
+ throw new IllegalArgumentException("Send without empty reply dump has to be used in case of regular calls");
+ }
+ replyCompletableFuture = new CompletableDumpFuture<>(contextId, emptyReplyDump);
+
+ requests.put(contextId, replyCompletableFuture);
+ requests.put(registry.controlPing(jvpp.getClass()), replyCompletableFuture);
+
+ // TODO in case of timeouts/missing replies, requests from the map are not removed
+ // consider adding cancel method, that would remove requests from the map and cancel
+ // associated replyCompletableFuture
+
+ return replyCompletableFuture;
+ } catch (VppInvocationException ex) {
+ final CompletableFuture<DUMP> replyCompletableFuture = new CompletableFuture<>();
+ replyCompletableFuture.completeExceptionally(ex);
+ return replyCompletableFuture;
+ }
+ }
+ }
+
+ public static final class CompletableDumpFuture<T extends JVppReplyDump<?, ?>> extends CompletableFuture<T> {
+ private final T replyDump;
+ private final int contextId;
+
+ public CompletableDumpFuture(final int contextId, final T emptyDump) {
+ this.contextId = contextId;
+ this.replyDump = emptyDump;
+ }
+
+ public int getContextId() {
+ return contextId;
+ }
+
+ public T getReplyDump() {
+ return replyDump;
+ }
+ }
+
+ @Override
+ public void close() throws Exception {
+ jvpp.close();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/FutureJVppInvoker.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/FutureJVppInvoker.java
new file mode 100644
index 00000000..7a48e418
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/future/FutureJVppInvoker.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.future;
+
+
+import io.fd.vpp.jvpp.dto.JVppReply;
+import io.fd.vpp.jvpp.dto.JVppReplyDump;
+import io.fd.vpp.jvpp.dto.JVppRequest;
+
+import java.util.concurrent.CompletionStage;
+import io.fd.vpp.jvpp.notification.NotificationRegistryProvider;
+
+/**
+* Future facade on top of JVpp
+*/
+public interface FutureJVppInvoker extends NotificationRegistryProvider, AutoCloseable {
+
+ /**
+ * Invoke asynchronous operation on VPP
+ *
+ * @return CompletionStage with future result of an async VPP call
+ * @throws io.fd.vpp.jvpp.VppInvocationException when send request failed with details
+ */
+ <REQ extends JVppRequest, REPLY extends JVppReply<REQ>> CompletionStage<REPLY> send(REQ req);
+
+
+ /**
+ * Invoke asynchronous dump operation on VPP
+ *
+ * @return CompletionStage with aggregated future result of an async VPP dump call
+ * @throws io.fd.vpp.jvpp.VppInvocationException when send request failed with details
+ */
+ <REQ extends JVppRequest, REPLY extends JVppReply<REQ>, DUMP extends JVppReplyDump<REQ, REPLY>> CompletionStage<DUMP> send(
+ REQ req, DUMP emptyReplyDump);
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistry.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistry.java
new file mode 100644
index 00000000..3c72ff79
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistry.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.notification;
+
+/**
+ * Base registry for notification callbacks.
+ */
+public interface NotificationRegistry extends AutoCloseable {
+
+ void close();
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistryProvider.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistryProvider.java
new file mode 100644
index 00000000..4a6e06b7
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/notification/NotificationRegistryProvider.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.notification;
+
+/**
+ * Provides notification registry
+ */
+public interface NotificationRegistryProvider {
+
+ /**
+ * Get current notification registry instance
+ */
+ NotificationRegistry getNotificationRegistry();
+}
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/test/ConnectionTest.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/test/ConnectionTest.java
new file mode 100644
index 00000000..27b4d29f
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/test/ConnectionTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.fd.vpp.jvpp.test;
+
+import io.fd.vpp.jvpp.JVppRegistry;
+import io.fd.vpp.jvpp.JVppRegistryImpl;
+
+/**
+ * Run using:
+ * sudo java -cp build-vpp-native/vpp-api/java/jvpp-registry-16.09.jar io.fd.vpp.jvpp.test.ConnectionTest
+ */
+public class ConnectionTest {
+
+ private static void testConnect() throws Exception {
+ System.out.println("Testing JNI connection with JVppRegistry");
+ final JVppRegistry registry = new JVppRegistryImpl("ConnectionTest");
+ try {
+ System.out.println("Successfully connected to vpp");
+ Thread.sleep(5000);
+ System.out.println("Disconnecting...");
+ Thread.sleep(1000);
+ } finally {
+ registry.close();
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ testConnect();
+ }
+}
diff --git a/src/vpp-api/java/jvpp-registry/jvpp_registry.c b/src/vpp-api/java/jvpp-registry/jvpp_registry.c
new file mode 100644
index 00000000..c90822d7
--- /dev/null
+++ b/src/vpp-api/java/jvpp-registry/jvpp_registry.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE /* for strcasestr(3) */
+#include <vnet/vnet.h>
+
+#define vl_api_version(n,v) static u32 vpe_api_version = (v);
+#include <vpp/api/vpe.api.h>
+#undef vl_api_version
+
+
+#include <jni.h>
+#include <jvpp-common/jvpp_common.h>
+#include "io_fd_vpp_jvpp_VppJNIConnection.h"
+#include "io_fd_vpp_jvpp_JVppRegistryImpl.h"
+
+#include <vpp/api/vpe_msg_enum.h>
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+/*
+ * The Java runtime isn't compile w/ -fstack-protector,
+ * so we have to supply missing external references for the
+ * regular vpp libraries.
+ */
+void __stack_chk_guard(void) __attribute__((weak));
+void __stack_chk_guard(void) {
+}
+
+#define CONTROL_PING_MESSAGE "control_ping"
+#define CONTROL_PING_REPLY_MESSAGE "control_ping_reply"
+
+typedef struct {
+ /* UThread attachment */
+ volatile u32 control_ping_result_ready;
+ volatile i32 control_ping_retval;
+
+ /* Control ping callback */
+ jobject registryObject;
+ jclass registryClass;
+ jclass controlPingReplyClass;
+ jclass callbackExceptionClass;
+ int control_ping_msg_id;
+ int control_ping_reply_msg_id;
+
+ /* Thread cleanup */
+ pthread_key_t cleanup_rx_thread_key;
+
+ /* Connected indication */
+ volatile u8 is_connected;
+ u32 vpe_pid;
+} jvpp_registry_main_t;
+
+jvpp_registry_main_t jvpp_registry_main __attribute__((aligned (64)));
+
+void vl_client_add_api_signatures(vl_api_memclnt_create_t *mp) {
+ /*
+ * Send the main API signature in slot 0. This bit of code must
+ * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+ */
+ mp->api_versions[0] = clib_host_to_net_u32(vpe_api_version);
+}
+
+/* cleanup handler for RX thread */
+static_always_inline void cleanup_rx_thread(void *arg) {
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+
+ vppjni_lock(jm, 99);
+
+ int getEnvStat = (*jm->jvm)->GetEnv(jm->jvm, (void **) &(jm->jenv),
+ JNI_VERSION_1_8);
+ if (getEnvStat == JNI_EVERSION) {
+ clib_warning("Unsupported JNI version\n");
+ rm->control_ping_retval = VNET_API_ERROR_UNSUPPORTED_JNI_VERSION;
+ goto out;
+ } else if (getEnvStat != JNI_EDETACHED) {
+ (*jm->jvm)->DetachCurrentThread(jm->jvm);
+ }
+ out: vppjni_unlock(jm);
+}
+
+static void vl_api_control_ping_reply_t_handler(
+ vl_api_control_ping_reply_t * mp) {
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+ char was_thread_connected = 0;
+
+ // attach to java thread if not attached
+ int getEnvStat = (*jm->jvm)->GetEnv(jm->jvm, (void **) &(jm->jenv),
+ JNI_VERSION_1_8);
+ if (getEnvStat == JNI_EDETACHED) {
+ if ((*jm->jvm)->AttachCurrentThread(jm->jvm, (void **) &(jm->jenv),
+ NULL) != 0) {
+ clib_warning("Failed to attach thread\n");
+ rm->control_ping_retval =
+ VNET_API_ERROR_FAILED_TO_ATTACH_TO_JAVA_THREAD;
+ goto out;
+ }
+
+ // workaround as we can't use pthread_cleanup_push
+ pthread_key_create(&rm->cleanup_rx_thread_key, cleanup_rx_thread);
+ // destructor is only called if the value of key is non null
+ pthread_setspecific(rm->cleanup_rx_thread_key, (void *) 1);
+ was_thread_connected = 1;
+ } else if (getEnvStat == JNI_EVERSION) {
+ clib_warning("Unsupported JNI version\n");
+ rm->control_ping_retval = VNET_API_ERROR_UNSUPPORTED_JNI_VERSION;
+ goto out;
+ }
+
+ if (was_thread_connected == 0) {
+ JNIEnv *env = jm->jenv;
+ if (mp->retval < 0) {
+ call_on_error("controlPing", mp->context, mp->retval,
+ rm->registryClass, rm->registryObject,
+ rm->callbackExceptionClass);
+ } else {
+ jmethodID constructor = (*env)->GetMethodID(env,
+ rm->controlPingReplyClass, "<init>", "()V");
+ jmethodID callbackMethod = (*env)->GetMethodID(env,
+ rm->registryClass, "onControlPingReply",
+ "(Lio/fd/vpp/jvpp/dto/ControlPingReply;)V");
+
+ jobject dto = (*env)->NewObject(env, rm->controlPingReplyClass,
+ constructor);
+
+ jfieldID contextFieldId = (*env)->GetFieldID(env,
+ rm->controlPingReplyClass, "context", "I");
+ (*env)->SetIntField(env, dto, contextFieldId,
+ clib_net_to_host_u32(mp->context));
+
+ jfieldID clientIndexFieldId = (*env)->GetFieldID(env,
+ rm->controlPingReplyClass, "clientIndex", "I");
+ (*env)->SetIntField(env, dto, clientIndexFieldId,
+ clib_net_to_host_u32(mp->client_index));
+
+ jfieldID vpePidFieldId = (*env)->GetFieldID(env,
+ rm->controlPingReplyClass, "vpePid", "I");
+ (*env)->SetIntField(env, dto, vpePidFieldId,
+ clib_net_to_host_u32(mp->vpe_pid));
+
+ (*env)->CallVoidMethod(env, rm->registryObject, callbackMethod,
+ dto);
+ (*env)->DeleteLocalRef(env, dto);
+ }
+ }
+
+ out: rm->vpe_pid = clib_net_to_host_u32(mp->vpe_pid);
+ rm->control_ping_result_ready = 1;
+}
+
+static int find_ping_id() {
+ int rv = 0;
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+ api_main_t *am = &api_main;
+ hash_pair_t *hp;
+ jm->messages_hash = am->msg_index_by_name_and_crc;
+
+ rm->control_ping_msg_id = -1;
+ rm->control_ping_reply_msg_id = -1;
+
+ hash_foreach_pair (hp, jm->messages_hash,
+ ({
+ char *key = (char *)hp->key; // key format: name_crc
+ int msg_name_len = strlen(key) - 9; // ignore crc
+ if (strlen(CONTROL_PING_MESSAGE) == msg_name_len &&
+ strncmp(CONTROL_PING_MESSAGE, (char *)hp->key, msg_name_len) == 0) {
+ rm->control_ping_msg_id = (u32)hp->value[0];
+ }
+ if (strlen(CONTROL_PING_REPLY_MESSAGE) == msg_name_len &&
+ strncmp(CONTROL_PING_REPLY_MESSAGE, (char *)hp->key, msg_name_len) == 0) {
+ rm->control_ping_reply_msg_id = (u32)hp->value[0];
+ }
+ }));
+ if (rm->control_ping_msg_id == -1) {
+ clib_warning("failed to find id for %s", CONTROL_PING_MESSAGE);
+ rv = -1;
+ }
+ if (rm->control_ping_reply_msg_id == -1) {
+ clib_warning("failed to find id for %s", CONTROL_PING_REPLY_MESSAGE);
+ rv = -1;
+ }
+ return rv;
+}
+
+static int send_initial_control_ping() {
+ f64 timeout;
+ clib_time_t clib_time;
+ vl_api_control_ping_t * mp;
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+
+ clib_time_init(&clib_time);
+
+ rm->control_ping_result_ready = 0;
+ mp = vl_msg_api_alloc(sizeof(*mp));
+ memset(mp, 0, sizeof(*mp));
+ mp->_vl_msg_id = ntohs(rm->control_ping_msg_id);
+ mp->client_index = jm->my_client_index;
+
+ // send message:
+ vl_msg_api_send_shmem(jm->vl_input_queue, (u8 *) &mp);
+
+ // wait for results: Current time + 10 seconds is the timeout
+ timeout = clib_time_now(&clib_time) + 10.0;
+ int rv = VNET_API_ERROR_RESPONSE_NOT_READY;
+ while (clib_time_now(&clib_time) < timeout) {
+ if (rm->control_ping_result_ready == 1) {
+ rv = rm->control_ping_retval;
+ break;
+ }
+ }
+
+ if (rv != 0) {
+ vl_msg_api_clean_handlers(rm->control_ping_reply_msg_id);
+ clib_warning("first control ping failed: %d", rv);
+ }
+ return rv;
+}
+
+static int connect_to_vpe(char *shm_prefix, char *name) {
+ jvpp_main_t * jm = &jvpp_main;
+ api_main_t * am = &api_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+
+ if (vl_client_connect_to_vlib(shm_prefix, name, 32) < 0)
+ return -1;
+ jm->my_client_index = am->my_client_index;
+
+ jm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+
+ if (find_ping_id() < 0)
+ return -1;
+
+ vl_msg_api_set_handlers(rm->control_ping_reply_msg_id, CONTROL_PING_REPLY_MESSAGE,
+ vl_api_control_ping_reply_t_handler, vl_noop_handler,
+ vl_api_control_ping_reply_t_endian,
+ vl_api_control_ping_reply_t_print,
+ sizeof(vl_api_control_ping_reply_t), 1);
+
+ return send_initial_control_ping();
+}
+
+JNIEXPORT jobject JNICALL Java_io_fd_vpp_jvpp_VppJNIConnection_clientConnect(
+ JNIEnv *env, jclass obj, jstring shmPrefix, jstring clientName) {
+ /*
+ * TODO introducing memory prefix as variable can be used in hc2vpp
+ * to be able to run without root privileges
+ * https://jira.fd.io/browse/HC2VPP-176
+ */
+ int rv;
+ const char *client_name;
+ const char *shm_prefix;
+ void vl_msg_reply_handler_hookup(void);
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+
+ jclass connectionInfoClass = (*env)->FindClass(env,
+ "io/fd/vpp/jvpp/VppJNIConnection$ConnectionInfo");
+ jmethodID connectionInfoConstructor = (*env)->GetMethodID(env,
+ connectionInfoClass, "<init>", "(JIII)V");
+
+ if (rm->is_connected) {
+ return (*env)->NewObject(env, connectionInfoClass,
+ connectionInfoConstructor, 0, 0,
+ VNET_API_ERROR_ALREADY_CONNECTED, 0);
+ }
+
+ client_name = (*env)->GetStringUTFChars(env, clientName, 0);
+ shm_prefix = (*env)->GetStringUTFChars(env, shmPrefix, 0);
+
+ if (!client_name) {
+ return (*env)->NewObject(env, connectionInfoClass,
+ connectionInfoConstructor, 0, 0, VNET_API_ERROR_INVALID_VALUE, 0, shmPrefix);
+ }
+
+ if (!shm_prefix) {
+ return (*env)->NewObject(env, connectionInfoClass,
+ connectionInfoConstructor, 0, 0, VNET_API_ERROR_INVALID_VALUE, 0, shmPrefix);
+ }
+
+ rv = connect_to_vpe((char *) shm_prefix, (char *) client_name);
+
+ if (rv < 0)
+ clib_warning("connection failed, rv %d", rv);
+
+ (*env)->ReleaseStringUTFChars(env, clientName, client_name);
+ (*env)->ReleaseStringUTFChars(env, shmPrefix, shm_prefix);
+
+ return (*env)->NewObject(env, connectionInfoClass,
+ connectionInfoConstructor, (jlong) pointer_to_uword (jm->vl_input_queue),
+ (jint) jm->my_client_index, (jint) rv, (jint) rm->vpe_pid);
+}
+
+JNIEXPORT jint JNICALL Java_io_fd_vpp_jvpp_JVppRegistryImpl_controlPing0(
+ JNIEnv *env, jobject regstryObject) {
+ jvpp_main_t * jm = &jvpp_main;
+ vl_api_control_ping_t * mp;
+ u32 my_context_id = vppjni_get_context_id(&jvpp_main);
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+
+ if (rm->registryObject == 0) {
+ rm->registryObject = (*env)->NewGlobalRef(env, regstryObject);
+ }
+ if (rm->registryClass == 0) {
+ rm->registryClass = (jclass) (*env)->NewGlobalRef(env,
+ (*env)->GetObjectClass(env, regstryObject));
+ }
+
+ mp = vl_msg_api_alloc(sizeof(*mp));
+ memset(mp, 0, sizeof(*mp));
+ mp->_vl_msg_id = ntohs(rm->control_ping_msg_id);
+ mp->client_index = jm->my_client_index;
+ mp->context = clib_host_to_net_u32(my_context_id);
+
+ // send message:
+ vl_msg_api_send_shmem(jm->vl_input_queue, (u8 *) &mp);
+ return my_context_id;
+}
+
+JNIEXPORT void JNICALL Java_io_fd_vpp_jvpp_VppJNIConnection_clientDisconnect(
+ JNIEnv *env, jclass clazz) {
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+ rm->is_connected = 0; // TODO make thread safe
+ vl_client_disconnect_from_vlib();
+
+ // cleanup:
+ if (rm->registryObject) {
+ (*env)->DeleteGlobalRef(env, rm->registryObject);
+ rm->registryObject = 0;
+ }
+ if (rm->registryClass) {
+ (*env)->DeleteGlobalRef(env, rm->registryClass);
+ rm->registryClass = 0;
+ }
+}
+
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+ jvpp_main_t * jm = &jvpp_main;
+ jvpp_registry_main_t * rm = &jvpp_registry_main;
+ JNIEnv* env;
+
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return JNI_EVERSION;
+ }
+
+ rm->controlPingReplyClass = (jclass) (*env)->NewGlobalRef(env,
+ (*env)->FindClass(env, "io/fd/vpp/jvpp/dto/ControlPingReply"));
+ if ((*env)->ExceptionCheck(env)) {
+ (*env)->ExceptionDescribe(env);
+ clib_warning("Failed to cache class references\n");
+ return JNI_ERR;
+ }
+
+ rm->callbackExceptionClass = (jclass) (*env)->NewGlobalRef(env,
+ (*env)->FindClass(env, "io/fd/vpp/jvpp/VppCallbackException"));
+ if ((*env)->ExceptionCheck(env)) {
+ (*env)->ExceptionDescribe(env);
+ return JNI_ERR;
+ }
+
+ jm->jvm = vm;
+ return JNI_VERSION_1_8;
+}
+
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+ jvpp_main_t * jm = &jvpp_main;
+ JNIEnv* env;
+ if ((*vm)->GetEnv(vm, (void**) &env, JNI_VERSION_1_8) != JNI_OK) {
+ return;
+ }
+
+ jm->jenv = NULL;
+ jm->jvm = NULL;
+}
diff --git a/src/vpp-api/java/jvpp/gen/jvpp_gen.py b/src/vpp-api/java/jvpp/gen/jvpp_gen.py
new file mode 100755
index 00000000..6648a4f7
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvpp_gen.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import importlib
+import sys
+import os
+import json
+
+from jvppgen import types_gen
+from jvppgen import callback_gen
+from jvppgen import notification_gen
+from jvppgen import dto_gen
+from jvppgen import jvpp_callback_facade_gen
+from jvppgen import jvpp_future_facade_gen
+from jvppgen import jvpp_impl_gen
+from jvppgen import jvpp_c_gen
+from jvppgen import util
+
+blacklist = [ "memclnt.api", "flowprobe.api" ]
+
+# Invocation:
+# ~/Projects/vpp/vpp-api/jvpp/gen$ mkdir -p java/io/fd/vpp/jvpp && cd java/io/fd/vpp/jvpp
+# ~/Projects/vpp/vpp-api/jvpp/gen/java/io/fd/vpp/jvpp$ ../../../../jvpp_gen.py -idefs_api_vpp_papi.py
+#
+# Compilation:
+# ~/Projects/vpp/vpp-api/jvpp/gen/java/io/fd/vpp/jvpp$ javac *.java dto/*.java callback/*.java
+#
+# where
+# defs_api_vpp_papi.py - vpe.api in python format (generated by vppapigen)
+
+parser = argparse.ArgumentParser(description='VPP Java API generator')
+parser.add_argument('-i', action="store", dest="inputfiles", nargs='+')
+parser.add_argument('--plugin_name', action="store", dest="plugin_name")
+parser.add_argument('--root_dir', action="store", dest="root_dir")
+args = parser.parse_args()
+
+sys.path.append(".")
+cwd = os.getcwd()
+
+print "Generating Java API for %s" % args.inputfiles
+print "inputfiles %s" % args.inputfiles
+plugin_name = args.plugin_name
+print "plugin_name %s" % plugin_name
+
+cfg = {}
+
+base_package = 'io.fd.vpp.jvpp'
+plugin_package = base_package + '.' + plugin_name
+root_dir = os.path.abspath(args.root_dir)
+print "root_dir %s" % root_dir
+work_dir = root_dir + "/target/" + plugin_package.replace(".","/")
+
+try:
+ os.makedirs(work_dir)
+except OSError:
+ if not os.path.isdir(work_dir):
+ raise
+
+os.chdir(work_dir)
+
+for inputfile in args.inputfiles:
+ if any(substring in inputfile for substring in blacklist):
+ print "WARNING: Imput file %s blacklisted" % inputfile
+ continue
+ _cfg = json.load(open(cwd + "/" + inputfile, 'r'))
+ if 'types' in cfg:
+ cfg['types'].extend(_cfg['types'])
+ else:
+ cfg['types'] = _cfg['types']
+ if 'messages' in cfg:
+ cfg['messages'].extend(_cfg['messages'])
+ else:
+ cfg['messages'] = _cfg['messages']
+
+
+def is_request_field(field_name):
+ return field_name not in {'_vl_msg_id', 'client_index', 'context'}
+
+
+def is_response_field(field_name):
+ return field_name not in {'_vl_msg_id'}
+
+
+def get_args(t, filter):
+ arg_names = []
+ arg_types = []
+ for i in t:
+ if is_crc(i):
+ continue
+ if not filter(i[1]):
+ continue
+ arg_types.append(i[0])
+ arg_names.append(i[1])
+ return arg_types, arg_names
+
+
+def get_types(t, filter):
+ types_list = []
+ lengths_list = []
+ crc = None
+ for i in t:
+ if is_crc(i):
+ crc = ('crc', i['crc'][2:])
+ continue
+ if not filter(i[1]):
+ continue
+ if len(i) is 3: # array type
+ types_list.append(i[0] + '[]')
+ lengths_list.append((i[2], False))
+ elif len(i) is 4: # variable length array type
+ types_list.append(i[0] + '[]')
+ lengths_list.append((i[3], True))
+ else: # primitive type
+ types_list.append(i[0])
+ lengths_list.append((0, False))
+ return types_list, lengths_list, crc
+
+
+def is_crc(arg):
+ """ Check whether the argument inside message definition is just crc """
+ return 'crc' in arg
+
+
+def get_definitions(defs):
+ # Pass 1
+ func_list = []
+ func_name = {}
+ for a in defs:
+ java_name = util.underscore_to_camelcase(a[0])
+
+ # For replies include all the arguments except message_id
+ if util.is_reply(java_name):
+ types, lengths, crc = get_types(a[1:], is_response_field)
+ args = get_args(a[1:], is_response_field)
+ func_name[a[0]] = dict(
+ [('name', a[0]), ('java_name', java_name),
+ ('args', args[1]), ('arg_types', args[0]),
+ ('types', types), ('lengths', lengths), crc])
+ # For requests skip message_id, client_id and context
+ else:
+ types, lengths, crc = get_types(a[1:], is_request_field)
+ args = get_args(a[1:], is_request_field)
+ func_name[a[0]] = dict(
+ [('name', a[0]), ('java_name', java_name),
+ ('args', args[1]), ('arg_types', args[0]),
+ ('types', types), ('lengths', lengths), crc])
+
+ # Indexed by name
+ func_list.append(func_name[a[0]])
+ return func_list, func_name
+
+
+types_package = 'types'
+dto_package = 'dto'
+callback_package = 'callback'
+notification_package = 'notification'
+future_package = 'future'
+# TODO find better package name
+callback_facade_package = 'callfacade'
+
+types_list, types_name = get_definitions(cfg['types'])
+
+types_gen.generate_types(types_list, plugin_package, types_package, args.inputfiles)
+
+func_list, func_name = get_definitions(cfg['messages'])
+
+dto_gen.generate_dtos(func_list, base_package, plugin_package, plugin_name.title(), dto_package, args.inputfiles)
+jvpp_impl_gen.generate_jvpp(func_list, base_package, plugin_package, plugin_name, dto_package, args.inputfiles)
+callback_gen.generate_callbacks(func_list, base_package, plugin_package, plugin_name.title(), callback_package, dto_package, args.inputfiles)
+notification_gen.generate_notification_registry(func_list, base_package, plugin_package, plugin_name.title(), notification_package, callback_package, dto_package, args.inputfiles)
+jvpp_c_gen.generate_jvpp(func_list, plugin_name, args.inputfiles, root_dir)
+jvpp_future_facade_gen.generate_jvpp(func_list, base_package, plugin_package, plugin_name.title(), dto_package, callback_package, notification_package, future_package, args.inputfiles)
+jvpp_callback_facade_gen.generate_jvpp(func_list, base_package, plugin_package, plugin_name.title(), dto_package, callback_package, notification_package, callback_facade_package, args.inputfiles)
+
+print "Java API for %s generated successfully" % args.inputfiles
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/__init__.py b/src/vpp-api/java/jvpp/gen/jvppgen/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/__init__.py
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/callback_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/callback_gen.py
new file mode 100644
index 00000000..b3024b9c
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/callback_gen.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import util
+from string import Template
+
+from util import remove_suffix
+
+callback_suffix = "Callback"
+
+callback_template = Template("""
+package $plugin_package.$callback_package;
+
+/**
+ * <p>Represents callback for plugin's api file message.
+ * <br>It was generated by callback_gen.py based on $inputfile preparsed data:
+ * <pre>
+$docs
+ * </pre>
+ */
+public interface $cls_name extends $base_package.$callback_package.$callback_type {
+
+ $callback_method
+
+}
+""")
+
+global_callback_template = Template("""
+package $plugin_package.$callback_package;
+
+/**
+ * <p>Global aggregated callback interface.
+ * <br>It was generated by callback_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface JVpp${plugin_name}GlobalCallback extends $base_package.$callback_package.ControlPingCallback, $callbacks {
+}
+""")
+
+
+def generate_callbacks(func_list, base_package, plugin_package, plugin_name, callback_package, dto_package, inputfile):
+ """ Generates callback interfaces """
+ print "Generating Callback interfaces"
+
+ if not os.path.exists(callback_package):
+ os.mkdir(callback_package)
+
+ callbacks = []
+ for func in func_list:
+
+ camel_case_name_with_suffix = util.underscore_to_camelcase_upper(func['name'])
+
+ if util.is_ignored(func['name']) or util.is_control_ping(camel_case_name_with_suffix):
+ continue
+ if not util.is_reply(camel_case_name_with_suffix) and not util.is_notification(func['name']):
+ continue
+
+ if util.is_reply(camel_case_name_with_suffix):
+ camel_case_name = util.remove_reply_suffix(camel_case_name_with_suffix)
+ callback_type = "JVppCallback"
+ else:
+ camel_case_name_with_suffix = util.add_notification_suffix(camel_case_name_with_suffix)
+ camel_case_name = camel_case_name_with_suffix
+ callback_type = "JVppNotificationCallback"
+
+ callbacks.append("{0}.{1}.{2}".format(plugin_package, callback_package, camel_case_name + callback_suffix))
+ callback_path = os.path.join(callback_package, camel_case_name + callback_suffix + ".java")
+ callback_file = open(callback_path, 'w')
+
+ reply_type = "%s.%s.%s" % (plugin_package, dto_package, camel_case_name_with_suffix)
+ method = "void on{0}({1} reply);".format(camel_case_name_with_suffix, reply_type)
+ callback_file.write(
+ callback_template.substitute(inputfile=inputfile,
+ docs=util.api_message_to_javadoc(func),
+ cls_name=camel_case_name + callback_suffix,
+ callback_method=method,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ callback_package=callback_package,
+ callback_type=callback_type))
+ callback_file.flush()
+ callback_file.close()
+
+ callback_file = open(os.path.join(callback_package, "JVpp%sGlobalCallback.java" % plugin_name), 'w')
+ callback_file.write(global_callback_template.substitute(inputfile=inputfile,
+ callbacks=", ".join(callbacks),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ callback_package=callback_package))
+ callback_file.flush()
+ callback_file.close()
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/dto_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/dto_gen.py
new file mode 100644
index 00000000..e831557c
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/dto_gen.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from string import Template
+
+import util
+
+dto_template = Template("""
+package $plugin_package.$dto_package;
+
+/**
+ * <p>This class represents $description.
+ * <br>It was generated by dto_gen.py based on $inputfile preparsed data:
+ * <pre>
+$docs
+ * </pre>
+ */
+public final class $cls_name implements $base_package.$dto_package.$base_type {
+
+$fields
+$methods
+}
+""")
+
+field_template = Template(""" public $type $name;\n""")
+
+send_template = Template(""" @Override
+ public int send(final $base_package.JVpp jvpp) throws io.fd.vpp.jvpp.VppInvocationException {
+ return (($plugin_package.JVpp${plugin_name})jvpp).$method_name($args);
+ }""")
+
+
+def generate_dtos(func_list, base_package, plugin_package, plugin_name, dto_package, inputfile):
+ """ Generates dto objects in a dedicated package """
+ print "Generating DTOs"
+
+ if not os.path.exists(dto_package):
+ os.mkdir(dto_package)
+
+ for func in func_list:
+ camel_case_dto_name = util.underscore_to_camelcase_upper(func['name'])
+ camel_case_method_name = util.underscore_to_camelcase(func['name'])
+ dto_path = os.path.join(dto_package, camel_case_dto_name + ".java")
+
+ if util.is_ignored(func['name']) or util.is_control_ping(camel_case_dto_name):
+ continue
+
+ fields = generate_dto_fields(camel_case_dto_name, func)
+ methods = generate_dto_base_methods(camel_case_dto_name, func)
+ base_type = ""
+
+ # Generate request/reply or dump/dumpReply even if structure can be used as notification
+ if not util.is_just_notification(func["name"]):
+ if util.is_reply(camel_case_dto_name):
+ description = "reply DTO"
+ request_dto_name = get_request_name(camel_case_dto_name, func['name'])
+ if util.is_details(camel_case_dto_name):
+ # FIXME assumption that dump calls end with "Dump" suffix. Not enforced in vpe.api
+ base_type += "JVppReply<%s.%s.%s>" % (plugin_package, dto_package, request_dto_name + "Dump")
+ generate_dump_reply_dto(request_dto_name, base_package, plugin_package, dto_package,
+ camel_case_dto_name, camel_case_method_name, func)
+ else:
+ base_type += "JVppReply<%s.%s.%s>" % (plugin_package, dto_package, request_dto_name)
+ else:
+ args = "" if fields is "" else "this"
+ methods += send_template.substitute(method_name=camel_case_method_name,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ args=args)
+ if util.is_dump(camel_case_dto_name):
+ base_type += "JVppDump"
+ description = "dump request DTO"
+ else:
+ base_type += "JVppRequest"
+ description = "request DTO"
+
+ write_dto_file(base_package, plugin_package, base_type, camel_case_dto_name, description, dto_package,
+ dto_path, fields, func, inputfile, methods)
+
+ # for structures that are also used as notifications, generate dedicated notification DTO
+ if util.is_notification(func["name"]):
+ base_type = "JVppNotification"
+ description = "notification DTO"
+ camel_case_dto_name = util.add_notification_suffix(camel_case_dto_name)
+ dto_path = os.path.join(dto_package, camel_case_dto_name + ".java")
+ methods = generate_dto_base_methods(camel_case_dto_name, func)
+ write_dto_file(base_package, plugin_package, base_type, camel_case_dto_name, description, dto_package,
+ dto_path, fields, func, inputfile, methods)
+
+ flush_dump_reply_dtos(inputfile)
+
+
+def generate_dto_base_methods(camel_case_dto_name, func):
+ methods = generate_dto_hash(func)
+ methods += generate_dto_equals(camel_case_dto_name, func)
+ methods += generate_dto_tostring(camel_case_dto_name, func)
+ return methods
+
+
+def generate_dto_fields(camel_case_dto_name, func):
+ fields = ""
+ for t in zip(func['types'], func['args']):
+ # for retval don't generate dto field in Reply
+ field_name = util.underscore_to_camelcase(t[1])
+ if util.is_reply(camel_case_dto_name) and util.is_retval_field(field_name):
+ continue
+ fields += field_template.substitute(type=util.jni_2_java_type_mapping[t[0]],
+ name=field_name)
+ return fields
+
+
+tostring_field_template = Template(""" \"$field_name=\" + $field_name + ", " +\n""")
+tostring_array_field_template = Template(""" \"$field_name=\" + java.util.Arrays.toString($field_name) + ", " +\n""")
+tostring_template = Template(""" @Override
+ public String toString() {
+ return "$cls_name{" +
+$fields_tostring "}";
+ }\n\n""")
+
+
+def generate_dto_tostring(camel_case_dto_name, func):
+ tostring_fields = ""
+ for t in zip(func['types'], func['args']):
+
+ field_name = util.underscore_to_camelcase(t[1])
+ # for retval don't generate dto field in Reply
+ if util.is_retval_field(field_name):
+ continue
+
+ # handle array types
+ if util.is_array(util.jni_2_java_type_mapping[t[0]]):
+ tostring_fields += tostring_array_field_template.substitute(field_name=field_name)
+ else:
+ tostring_fields += tostring_field_template.substitute(field_name=field_name)
+
+ return tostring_template.substitute(cls_name=camel_case_dto_name,
+ fields_tostring=tostring_fields[:-8])
+
+equals_other_template = Template("""
+ final $cls_name other = ($cls_name) o;
+\n""")
+equals_field_template = Template(""" if (!java.util.Objects.equals(this.$field_name, other.$field_name)) {
+ return false;
+ }\n""")
+equals_array_field_template = Template(""" if (!java.util.Arrays.equals(this.$field_name, other.$field_name)) {
+ return false;
+ }\n""")
+equals_template = Template(""" @Override
+ public boolean equals(final Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+$comparisons
+ return true;
+ }\n\n""")
+
+
+def generate_dto_equals(camel_case_dto_name, func):
+ equals_fields = ""
+ for t in zip(func['types'], func['args']):
+ field_name = util.underscore_to_camelcase(t[1])
+ # for retval don't generate dto field in Reply
+ if util.is_retval_field(field_name):
+ continue
+
+ # handle array types
+ if util.is_array(util.jni_2_java_type_mapping[t[0]]):
+ equals_fields += equals_array_field_template.substitute(field_name=field_name)
+ else:
+ equals_fields += equals_field_template.substitute(field_name=field_name)
+
+ if equals_fields != "":
+ equals_fields = equals_other_template.substitute(cls_name=camel_case_dto_name) + equals_fields
+
+ return equals_template.substitute(comparisons=equals_fields)
+
+
+hash_template = Template(""" @Override
+ @io.fd.vpp.jvpp.coverity.SuppressFBWarnings("UWF_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD")
+ public int hashCode() {
+ return java.util.Objects.hash($fields);
+ }\n\n""")
+hash_single_array_type_template = Template(""" @Override
+ @io.fd.vpp.jvpp.coverity.SuppressFBWarnings("UWF_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD")
+ public int hashCode() {
+ return java.util.Arrays.hashCode($fields);
+ }\n\n""")
+
+
+def generate_dto_hash(func):
+ hash_fields = ""
+
+ # Special handling for hashCode in case just a single array field is present. Cannot use Objects.equals since the
+ # array is mistaken for a varargs parameter. Instead use Arrays.hashCode in such case.
+ if len(func['args']) == 1:
+ single_type = func['types'][0]
+ single_type_name = func['args'][0]
+ if util.is_array(util.jni_2_java_type_mapping[single_type]):
+ return hash_single_array_type_template.substitute(fields=util.underscore_to_camelcase(single_type_name))
+
+ for t in zip(func['types'], func['args']):
+ field_name = util.underscore_to_camelcase(t[1])
+ # for retval don't generate dto field in Reply
+ if util.is_retval_field(field_name):
+ continue
+
+ hash_fields += field_name + ", "
+
+ return hash_template.substitute(fields=hash_fields[:-2])
+
+
+def write_dto_file(base_package, plugin_package, base_type, camel_case_dto_name, description, dto_package, dto_path,
+ fields, func, inputfile, methods):
+ dto_file = open(dto_path, 'w')
+ dto_file.write(dto_template.substitute(inputfile=inputfile,
+ description=description,
+ docs=util.api_message_to_javadoc(func),
+ cls_name=camel_case_dto_name,
+ fields=fields,
+ methods=methods,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ base_type=base_type,
+ dto_package=dto_package))
+ dto_file.flush()
+ dto_file.close()
+
+
+dump_dto_suffix = "ReplyDump"
+dump_reply_artificial_dtos = {}
+
+
+# Returns request name or special one from unconventional_naming_rep_req map
+def get_request_name(camel_case_dto_name, func_name):
+ return util.underscore_to_camelcase_upper(
+ util.unconventional_naming_rep_req[func_name]) if func_name in util.unconventional_naming_rep_req \
+ else util.remove_reply_suffix(camel_case_dto_name)
+
+
+def flush_dump_reply_dtos(inputfile):
+ for dump_reply_artificial_dto in dump_reply_artificial_dtos.values():
+ dto_path = os.path.join(dump_reply_artificial_dto['dto_package'],
+ dump_reply_artificial_dto['cls_name'] + ".java")
+ dto_file = open(dto_path, 'w')
+ dto_file.write(dto_template.substitute(inputfile=inputfile,
+ description="dump reply wrapper",
+ docs=dump_reply_artificial_dto['docs'],
+ cls_name=dump_reply_artificial_dto['cls_name'],
+ fields=dump_reply_artificial_dto['fields'],
+ methods=dump_reply_artificial_dto['methods'],
+ plugin_package=dump_reply_artificial_dto['plugin_package'],
+ base_package=dump_reply_artificial_dto['base_package'],
+ base_type=dump_reply_artificial_dto['base_type'],
+ dto_package=dump_reply_artificial_dto['dto_package']))
+ dto_file.flush()
+ dto_file.close()
+
+
+def generate_dump_reply_dto(request_dto_name, base_package, plugin_package, dto_package, camel_case_dto_name,
+ camel_case_method_name, func):
+ base_type = "JVppReplyDump<%s.%s.%s, %s.%s.%s>" % (
+ plugin_package, dto_package, util.remove_reply_suffix(camel_case_dto_name) + "Dump",
+ plugin_package, dto_package, camel_case_dto_name)
+ fields = " public java.util.List<%s> %s = new java.util.ArrayList<>();" % (camel_case_dto_name, camel_case_method_name)
+ cls_name = camel_case_dto_name + dump_dto_suffix
+ # using artificial type for fields, just to bypass the is_array check in base methods generators
+ # the type is not really used
+ artificial_type = 'u8'
+
+ # In case of already existing artificial reply dump DTO, just update it
+ # Used for sub-dump dtos
+ if request_dto_name in dump_reply_artificial_dtos.keys():
+ dump_reply_artificial_dtos[request_dto_name]['fields'] += '\n' + fields
+ dump_reply_artificial_dtos[request_dto_name]['field_names'].append(func['name'])
+ dump_reply_artificial_dtos[request_dto_name]['field_types'].append(artificial_type)
+ methods = '\n' + generate_dto_base_methods(dump_reply_artificial_dtos[request_dto_name]['cls_name'],
+ {'args': dump_reply_artificial_dtos[request_dto_name]['field_names'],
+ 'types': dump_reply_artificial_dtos[request_dto_name]['field_types']})
+ dump_reply_artificial_dtos[request_dto_name]['methods'] = methods
+ else:
+ methods = '\n' + generate_dto_base_methods(cls_name, {'args': [func['name']],
+ 'types': [artificial_type]})
+ dump_reply_artificial_dtos[request_dto_name] = ({'docs': util.api_message_to_javadoc(func),
+ 'cls_name': cls_name,
+ 'fields': fields,
+ 'field_names': [func['name']],
+ 'field_types': [artificial_type],
+ # strip too many newlines at the end of base method block
+ 'methods': methods,
+ 'plugin_package': plugin_package,
+ 'base_package': base_package,
+ 'base_type': base_type,
+ 'dto_package': dto_package})
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/jni_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/jni_gen.py
new file mode 100644
index 00000000..cb0d66e8
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/jni_gen.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from string import Template
+
+import util
+
+variable_length_array_value_template = Template("""mp->${length_var_name}""")
+variable_length_array_template = Template("""clib_net_to_host_${length_field_type}(${value})""")
+
+dto_field_id_template = Template("""
+ jfieldID ${field_reference_name}FieldId = (*env)->GetFieldID(env, ${class_ref_name}Class, "${field_name}", "${jni_signature}");""")
+
+default_dto_field_setter_template = Template("""
+ (*env)->Set${jni_setter}(env, ${object_name}, ${field_reference_name}FieldId, mp->${c_name});
+""")
+
+variable_length_array_value_template = Template("""mp->${length_var_name}""")
+variable_length_array_template = Template("""clib_net_to_host_${length_field_type}(${value})""")
+
+u16_dto_field_setter_template = Template("""
+ (*env)->Set${jni_setter}(env, ${object_name}, ${field_reference_name}FieldId, clib_net_to_host_u16(mp->${c_name}));
+""")
+
+u32_dto_field_setter_template = Template("""
+ (*env)->Set${jni_setter}(env, ${object_name}, ${field_reference_name}FieldId, clib_net_to_host_u32(mp->${c_name}));
+""")
+
+u64_dto_field_setter_template = Template("""
+ (*env)->Set${jni_setter}(env, ${object_name}, ${field_reference_name}FieldId, clib_net_to_host_u64(mp->${c_name}));
+""")
+
+u8_array_dto_field_setter_template = Template("""
+ jbyteArray ${field_reference_name} = (*env)->NewByteArray(env, ${field_length});
+ (*env)->SetByteArrayRegion(env, ${field_reference_name}, 0, ${field_length}, (const jbyte*)mp->${c_name});
+ (*env)->SetObjectField(env, ${object_name}, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+""")
+
+u16_array_dto_field_setter_template = Template("""
+ {
+ jshortArray ${field_reference_name} = (*env)->NewShortArray(env, ${field_length});
+ jshort * ${field_reference_name}ArrayElements = (*env)->GetShortArrayElements(env, ${field_reference_name}, NULL);
+ unsigned int _i;
+ for (_i = 0; _i < ${field_length}; _i++) {
+ ${field_reference_name}ArrayElements[_i] = clib_net_to_host_u16(mp->${c_name}[_i]);
+ }
+
+ (*env)->ReleaseShortArrayElements(env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ (*env)->SetObjectField(env, ${object_name}, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+ }
+""")
+
+u32_array_dto_field_setter_template = Template("""
+ {
+ jintArray ${field_reference_name} = (*env)->NewIntArray(env, ${field_length});
+ jint * ${field_reference_name}ArrayElements = (*env)->GetIntArrayElements(env, ${field_reference_name}, NULL);
+ unsigned int _i;
+ for (_i = 0; _i < ${field_length}; _i++) {
+ ${field_reference_name}ArrayElements[_i] = clib_net_to_host_u32(mp->${c_name}[_i]);
+ }
+
+ (*env)->ReleaseIntArrayElements(env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ (*env)->SetObjectField(env, ${object_name}, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+ }
+""")
+
+# For each u64 array we get its elements. Then we convert values to host byte order.
+# All changes to jlong* buffer are written to jlongArray (isCopy is set to NULL)
+u64_array_dto_field_setter_template = Template("""
+ {
+ jlongArray ${field_reference_name} = (*env)->NewLongArray(env, ${field_length});
+ jlong * ${field_reference_name}ArrayElements = (*env)->GetLongArrayElements(env, ${field_reference_name}, NULL);
+ unsigned int _i;
+ for (_i = 0; _i < ${field_length}; _i++) {
+ ${field_reference_name}ArrayElements[_i] = clib_net_to_host_u64(mp->${c_name}[_i]);
+ }
+
+ (*env)->ReleaseLongArrayElements(env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ (*env)->SetObjectField(env, ${object_name}, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+ }
+""")
+
+dto_field_setter_templates = {'u8': default_dto_field_setter_template,
+ 'u16': u16_dto_field_setter_template,
+ 'u32': u32_dto_field_setter_template,
+ 'i32': u32_dto_field_setter_template,
+ 'u64': u64_dto_field_setter_template,
+ 'f64': default_dto_field_setter_template, # fixme
+ 'u8[]': u8_array_dto_field_setter_template,
+ 'u16[]': u16_array_dto_field_setter_template,
+ 'u32[]': u32_array_dto_field_setter_template,
+ 'u64[]': u64_array_dto_field_setter_template
+ }
+
+
+def jni_reply_handler_for_type(handler_name, ref_name, field_type, c_name, field_reference_name,
+ field_name, field_length, is_variable_len_array, length_field_type,
+ object_name="dto"):
+ """
+ Generates jni code that initializes a field of java object (dto or custom type).
+ To be used in reply message handlers.
+ :param field_type: type of the field to be initialized (as defined in vpe.api)
+ :param c_name: name of the message struct member that stores initialization value
+ :param field_reference_name: name of the field reference in generated code
+ :param field_name: name of the field (camelcase)
+ :param field_length: integer or name of variable that stores field length
+ :param object_name: name of the object to be initialized
+ """
+
+ # todo move validation to vppapigen
+ if field_type.endswith('[]') and field_length == '0':
+ raise Exception('Variable array \'%s\' defined in \'%s\' '
+ 'should have defined length (e.g. \'%s[%s_length]\''
+ % (c_name, handler_name, c_name, c_name))
+
+ if is_variable_len_array:
+ length_var_name = field_length
+ field_length = variable_length_array_value_template.substitute(length_var_name=length_var_name)
+ if length_field_type != 'u8': # we need net to host conversion:
+ field_length = variable_length_array_template.substitute(
+ length_field_type=length_field_type, value=field_length)
+
+ # for retval don't generate setters
+ if util.is_retval_field(c_name):
+ return ""
+
+ jni_signature = util.jni_2_signature_mapping[field_type]
+ jni_setter = util.jni_field_accessors[field_type]
+
+ result = dto_field_id_template.substitute(
+ field_reference_name=field_reference_name,
+ field_name=field_name,
+ class_ref_name=ref_name,
+ jni_signature=jni_signature)
+
+ dto_setter_template = dto_field_setter_templates[field_type]
+
+ result += dto_setter_template.substitute(
+ jni_signature=jni_signature,
+ object_name=object_name,
+ field_reference_name=field_reference_name,
+ c_name=c_name,
+ jni_setter=jni_setter,
+ field_length=field_length)
+ return result
+
+
+request_field_identifier_template = Template("""
+ jfieldID ${field_reference_name}FieldId = (*env)->GetFieldID(env, ${object_name}Class, "${field_name}", "${jni_signature}");
+ ${jni_type} ${field_reference_name} = (*env)->Get${jni_getter}(env, ${object_name}, ${field_reference_name}FieldId);
+ """)
+
+array_length_enforcement_template = Template("""
+ size_t max_size = ${field_length};
+ if (cnt > max_size) cnt = max_size;""")
+
+u8_struct_setter_template = Template("""
+ mp->${c_name} = ${field_reference_name};""")
+
+u16_struct_setter_template = Template("""
+ mp->${c_name} = clib_host_to_net_u16(${field_reference_name});""")
+
+u32_struct_setter_template = Template("""
+ mp->${c_name} = clib_host_to_net_u32(${field_reference_name});""")
+
+i32_struct_setter_template = Template("""
+ mp->${c_name} = clib_host_to_net_i32(${field_reference_name});!""")
+
+u64_struct_setter_template = Template("""
+ mp->${c_name} = clib_host_to_net_u64(${field_reference_name});""")
+
+array_length_enforcement_template = Template("""
+ size_t max_size = ${field_length};
+ if (cnt > max_size) cnt = max_size;""")
+
+u8_array_struct_setter_template = Template("""
+ if (${field_reference_name}) {
+ jsize cnt = (*env)->GetArrayLength (env, ${field_reference_name});
+ ${field_length_check}
+ (*env)->GetByteArrayRegion(env, ${field_reference_name}, 0, cnt, (jbyte *)mp->${c_name});
+ }
+""")
+
+u16_array_struct_setter_template = Template("""
+ if (${field_reference_name}) {
+ jshort * ${field_reference_name}ArrayElements = (*env)->GetShortArrayElements(env, ${field_reference_name}, NULL);
+ size_t _i;
+ jsize cnt = (*env)->GetArrayLength (env, ${field_reference_name});
+ ${field_length_check}
+ for (_i = 0; _i < cnt; _i++) {
+ mp->${c_name}[_i] = clib_host_to_net_u16(${field_reference_name}ArrayElements[_i]);
+ }
+ (*env)->ReleaseShortArrayElements (env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ }
+ """)
+
+u32_array_struct_setter_template = Template("""
+ if (${field_reference_name}) {
+ jint * ${field_reference_name}ArrayElements = (*env)->GetIntArrayElements(env, ${field_reference_name}, NULL);
+ size_t _i;
+ jsize cnt = (*env)->GetArrayLength (env, ${field_reference_name});
+ ${field_length_check}
+ for (_i = 0; _i < cnt; _i++) {
+ mp->${c_name}[_i] = clib_host_to_net_u32(${field_reference_name}ArrayElements[_i]);
+ }
+ (*env)->ReleaseIntArrayElements (env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ }
+ """)
+
+u64_array_struct_setter_template = Template("""
+ if (${field_reference_name}) {
+ jlong * ${field_reference_name}ArrayElements = (*env)->GetLongArrayElements(env, ${field_reference_name}, NULL);
+ size_t _i;
+ jsize cnt = (*env)->GetArrayLength (env, ${field_reference_name});
+ ${field_length_check}
+ for (_i = 0; _i < cnt; _i++) {
+ mp->${c_name}[_i] = clib_host_to_net_u64(${field_reference_name}ArrayElements[_i]);
+ }
+ (*env)->ReleaseLongArrayElements (env, ${field_reference_name}, ${field_reference_name}ArrayElements, 0);
+ }
+ """)
+
+struct_setter_templates = {'u8': u8_struct_setter_template,
+ 'u16': u16_struct_setter_template,
+ 'u32': u32_struct_setter_template,
+ 'i32': u32_struct_setter_template,
+ 'u64': u64_struct_setter_template,
+ 'u8[]': u8_array_struct_setter_template,
+ 'u16[]': u16_array_struct_setter_template,
+ 'u32[]': u32_array_struct_setter_template,
+ 'u64[]': u64_array_struct_setter_template
+ }
+
+
+def jni_request_identifiers_for_type(field_type, field_reference_name, field_name, object_name="request"):
+ """
+ Generates jni code that defines C variable corresponding to field of java object
+ (dto or custom type). To be used in request message handlers.
+ :param field_type: type of the field to be initialized (as defined in vpe.api)
+ :param field_reference_name: name of the field reference in generated code
+ :param field_name: name of the field (camelcase)
+ :param object_name: name of the object to be initialized
+ """
+ # field identifiers
+ jni_type = util.vpp_2_jni_type_mapping[field_type]
+ jni_signature = util.jni_2_signature_mapping[field_type]
+ jni_getter = util.jni_field_accessors[field_type]
+
+ # field identifier
+ return request_field_identifier_template.substitute(
+ jni_type=jni_type,
+ field_reference_name=field_reference_name,
+ field_name=field_name,
+ jni_signature=jni_signature,
+ jni_getter=jni_getter,
+ object_name=object_name)
+
+
+def jni_request_binding_for_type(field_type, c_name, field_reference_name, field_length, is_variable_len_array):
+ """
+ Generates jni code that initializes C structure that corresponds to a field of java object
+ (dto or custom type). To be used in request message handlers.
+ :param field_type: type of the field to be initialized (as defined in vpe.api)
+ :param c_name: name of the message struct member to be initialized
+ :param field_reference_name: name of the field reference in generated code
+ :param field_length: integer or name of variable that stores field length
+ """
+
+ # field setter
+ field_length_check = ""
+
+ # check if we are processing variable length array:
+ if is_variable_len_array:
+ field_length = util.underscore_to_camelcase(field_length)
+
+ # enforce max length if array has fixed length or uses variable length syntax
+ if str(field_length) != "0":
+ field_length_check = array_length_enforcement_template.substitute(field_length=field_length)
+
+ struct_setter_template = struct_setter_templates[field_type]
+
+ msg_initialization = struct_setter_template.substitute(
+ c_name=c_name,
+ field_reference_name=field_reference_name,
+ field_length_check=field_length_check)
+
+ return msg_initialization
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_c_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_c_gen.py
new file mode 100644
index 00000000..8761eb13
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_c_gen.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+# l
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os, util
+from string import Template
+
+import jni_gen
+
+
+def is_manually_generated(f_name, plugin_name):
+ return f_name in {'control_ping_reply'}
+
+
+class_reference_template = Template("""jclass ${ref_name}Class;
+""")
+
+find_class_invocation_template = Template("""
+ ${ref_name}Class = (jclass)(*env)->NewGlobalRef(env, (*env)->FindClass(env, "io/fd/vpp/jvpp/${plugin_name}/dto/${class_name}"));
+ if ((*env)->ExceptionCheck(env)) {
+ (*env)->ExceptionDescribe(env);
+ return JNI_ERR;
+ }""")
+
+find_class_template = Template("""
+ ${ref_name}Class = (jclass)(*env)->NewGlobalRef(env, (*env)->FindClass(env, "${class_name}"));
+ if ((*env)->ExceptionCheck(env)) {
+ (*env)->ExceptionDescribe(env);
+ return JNI_ERR;
+ }""")
+
+delete_class_invocation_template = Template("""
+ if (${ref_name}Class) {
+ (*env)->DeleteGlobalRef(env, ${ref_name}Class);
+ }""")
+
+class_cache_template = Template("""
+$class_references
+static int cache_class_references(JNIEnv* env) {
+ $find_class_invocations
+ return 0;
+}
+
+static void delete_class_references(JNIEnv* env) {
+ $delete_class_invocations
+}""")
+
+
+def generate_class_cache(func_list, plugin_name):
+ class_references = []
+ find_class_invocations = []
+ delete_class_invocations = []
+ for f in func_list:
+ c_name = f['name']
+ class_name = util.underscore_to_camelcase_upper(c_name)
+ ref_name = util.underscore_to_camelcase(c_name)
+
+ if util.is_ignored(c_name) or util.is_control_ping(class_name):
+ continue
+
+ if util.is_reply(class_name):
+ class_references.append(class_reference_template.substitute(
+ ref_name=ref_name))
+ find_class_invocations.append(find_class_invocation_template.substitute(
+ plugin_name=plugin_name,
+ ref_name=ref_name,
+ class_name=class_name))
+ delete_class_invocations.append(delete_class_invocation_template.substitute(ref_name=ref_name))
+ elif util.is_notification(c_name):
+ class_references.append(class_reference_template.substitute(
+ ref_name=util.add_notification_suffix(ref_name)))
+ find_class_invocations.append(find_class_invocation_template.substitute(
+ plugin_name=plugin_name,
+ ref_name=util.add_notification_suffix(ref_name),
+ class_name=util.add_notification_suffix(class_name)))
+ delete_class_invocations.append(delete_class_invocation_template.substitute(
+ ref_name=util.add_notification_suffix(ref_name)))
+
+ # add exception class to class cache
+ ref_name = 'callbackException'
+ class_name = 'io/fd/vpp/jvpp/VppCallbackException'
+ class_references.append(class_reference_template.substitute(
+ ref_name=ref_name))
+ find_class_invocations.append(find_class_template.substitute(
+ ref_name=ref_name,
+ class_name=class_name))
+ delete_class_invocations.append(delete_class_invocation_template.substitute(ref_name=ref_name))
+
+ return class_cache_template.substitute(
+ class_references="".join(class_references), find_class_invocations="".join(find_class_invocations),
+ delete_class_invocations="".join(delete_class_invocations))
+
+
+# TODO: cache method and field identifiers to achieve better performance
+# https://jira.fd.io/browse/HONEYCOMB-42
+request_class_template = Template("""
+ jclass requestClass = (*env)->FindClass(env, "io/fd/vpp/jvpp/${plugin_name}/dto/${java_name_upper}");""")
+
+request_field_identifier_template = Template("""
+ jfieldID ${field_reference_name}FieldId = (*env)->GetFieldID(env, ${object_name}Class, "${field_name}", "${jni_signature}");
+ ${jni_type} ${field_reference_name} = (*env)->Get${jni_getter}(env, ${object_name}, ${field_reference_name}FieldId);
+ """)
+
+jni_msg_size_template = Template(""" + ${array_length}*sizeof(${element_type})""")
+
+jni_impl_template = Template("""
+/**
+ * JNI binding for sending ${c_name} message.
+ * Generated based on $inputfile preparsed data:
+$api_data
+ */
+JNIEXPORT jint JNICALL Java_io_fd_vpp_jvpp_${plugin_name}_JVpp${java_plugin_name}Impl_${field_name}0
+(JNIEnv * env, jclass clazz$args) {
+ ${plugin_name}_main_t *plugin_main = &${plugin_name}_main;
+ vl_api_${c_name}_t * mp;
+ u32 my_context_id = vppjni_get_context_id (&jvpp_main);
+ $request_class
+
+ $jni_identifiers
+
+ // create message:
+ mp = vl_msg_api_alloc(${msg_size});
+ memset (mp, 0, ${msg_size});
+ mp->_vl_msg_id = ntohs (get_message_id(env, "${c_name}_${crc}"));
+ mp->client_index = plugin_main->my_client_index;
+ mp->context = clib_host_to_net_u32 (my_context_id);
+
+ $msg_initialization
+
+ // send message:
+ vl_msg_api_send_shmem (plugin_main->vl_input_queue, (u8 *)&mp);
+ if ((*env)->ExceptionCheck(env)) {
+ return JNI_ERR;
+ }
+ return my_context_id;
+}""")
+
+def generate_jni_impl(func_list, plugin_name, inputfile):
+ jni_impl = []
+ for f in func_list:
+ f_name = f['name']
+ camel_case_function_name = util.underscore_to_camelcase(f_name)
+ if is_manually_generated(f_name, plugin_name) or util.is_reply(camel_case_function_name) \
+ or util.is_ignored(f_name) or util.is_just_notification(f_name):
+ continue
+
+ arguments = ''
+ request_class = ''
+ jni_identifiers = ''
+ msg_initialization = ''
+ f_name_uppercase = f_name.upper()
+ msg_size = 'sizeof(*mp)'
+
+ if f['args']:
+ arguments = ', jobject request'
+ camel_case_function_name_upper = util.underscore_to_camelcase_upper(f_name)
+
+ request_class = request_class_template.substitute(
+ java_name_upper=camel_case_function_name_upper,
+ plugin_name=plugin_name)
+
+ for t in zip(f['types'], f['args'], f['lengths'], f['arg_types']):
+ field_name = util.underscore_to_camelcase(t[1])
+ is_variable_len_array = t[2][1]
+ if is_variable_len_array:
+ msg_size += jni_msg_size_template.substitute(array_length=util.underscore_to_camelcase(t[2][0]),
+ element_type=t[3])
+ jni_identifiers += jni_gen.jni_request_identifiers_for_type(field_type=t[0],
+ field_reference_name=field_name,
+ field_name=field_name)
+ msg_initialization += jni_gen.jni_request_binding_for_type(field_type=t[0], c_name=t[1],
+ field_reference_name=field_name,
+ field_length=t[2][0],
+ is_variable_len_array=is_variable_len_array)
+
+ jni_impl.append(jni_impl_template.substitute(
+ inputfile=inputfile,
+ api_data=util.api_message_to_javadoc(f),
+ field_reference_name=camel_case_function_name,
+ field_name=camel_case_function_name,
+ c_name_uppercase=f_name_uppercase,
+ c_name=f_name,
+ crc=f['crc'],
+ plugin_name=plugin_name,
+ java_plugin_name=plugin_name.title(),
+ request_class=request_class,
+ jni_identifiers=jni_identifiers,
+ msg_size=msg_size,
+ msg_initialization=msg_initialization,
+ args=arguments))
+
+ return "\n".join(jni_impl)
+
+# code fragment for checking result of the operation before sending request reply
+callback_err_handler_template = Template("""
+ // for negative result don't send callback message but send error callback
+ if (mp->retval<0) {
+ call_on_error("${handler_name}", mp->context, mp->retval, plugin_main->callbackClass, plugin_main->callbackObject, callbackExceptionClass);
+ return;
+ }
+ if (mp->retval == VNET_API_ERROR_IN_PROGRESS) {
+ clib_warning("Result in progress");
+ return;
+ }
+""")
+
+msg_handler_template = Template("""
+/**
+ * Handler for ${handler_name} message.
+ * Generated based on $inputfile preparsed data:
+$api_data
+ */
+static void vl_api_${handler_name}_t_handler (vl_api_${handler_name}_t * mp)
+{
+ ${plugin_name}_main_t *plugin_main = &${plugin_name}_main;
+ JNIEnv *env = jvpp_main.jenv;
+ jthrowable exc;
+ $err_handler
+
+ jmethodID constructor = (*env)->GetMethodID(env, ${class_ref_name}Class, "<init>", "()V");
+
+ // User does not have to provide callbacks for all VPP messages.
+ // We are ignoring messages that are not supported by user.
+ (*env)->ExceptionClear(env); // just in case exception occurred in different place and was not properly cleared
+ jmethodID callbackMethod = (*env)->GetMethodID(env, plugin_main->callbackClass, "on${dto_name}", "(Lio/fd/vpp/jvpp/${plugin_name}/dto/${dto_name};)V");
+ exc = (*env)->ExceptionOccurred(env);
+ if (exc) {
+ clib_warning("Unable to extract on${dto_name} method reference from ${plugin_name} plugin's callbackClass. Ignoring message.\\n");
+ (*env)->ExceptionDescribe(env);
+ (*env)->ExceptionClear(env);
+ return;
+ }
+
+ jobject dto = (*env)->NewObject(env, ${class_ref_name}Class, constructor);
+ $dto_setters
+
+ (*env)->CallVoidMethod(env, plugin_main->callbackObject, callbackMethod, dto);
+ // free DTO as per http://stackoverflow.com/questions/1340938/memory-leak-when-calling-java-code-from-c-using-jni
+ (*env)->DeleteLocalRef(env, dto);
+}""")
+
+
+def generate_msg_handlers(func_list, plugin_name, inputfile):
+ handlers = []
+ for f in func_list:
+ handler_name = f['name']
+ dto_name = util.underscore_to_camelcase_upper(handler_name)
+ ref_name = util.underscore_to_camelcase(handler_name)
+
+ if is_manually_generated(handler_name, plugin_name) or util.is_ignored(handler_name):
+ continue
+
+ if not util.is_reply(dto_name) and not util.is_notification(handler_name):
+ continue
+
+ if util.is_notification(handler_name):
+ dto_name = util.add_notification_suffix(dto_name)
+ ref_name = util.add_notification_suffix(ref_name)
+
+ dto_setters = ''
+ err_handler = ''
+ # dto setters
+ for t in zip(f['types'], f['args'], f['lengths']):
+ c_name = t[1]
+ java_name = util.underscore_to_camelcase(c_name)
+ field_length = t[2][0]
+ is_variable_len_array = t[2][1]
+ length_field_type = None
+ if is_variable_len_array:
+ length_field_type = f['types'][f['args'].index(field_length)]
+ dto_setters += jni_gen.jni_reply_handler_for_type(handler_name=handler_name, ref_name=ref_name,
+ field_type=t[0], c_name=t[1],
+ field_reference_name=java_name,
+ field_name=java_name, field_length=field_length,
+ is_variable_len_array=is_variable_len_array,
+ length_field_type=length_field_type)
+
+ # for retval don't generate setters and generate retval check
+ if util.is_retval_field(c_name):
+ err_handler = callback_err_handler_template.substitute(
+ handler_name=handler_name
+ )
+ continue
+
+ handlers.append(msg_handler_template.substitute(
+ inputfile=inputfile,
+ api_data=util.api_message_to_javadoc(f),
+ handler_name=handler_name,
+ plugin_name=plugin_name,
+ dto_name=dto_name,
+ class_ref_name=ref_name,
+ dto_setters=dto_setters,
+ err_handler=err_handler))
+
+ return "\n".join(handlers)
+
+
+handler_registration_template = Template("""_(${name}_${crc}, ${name}) \\
+""")
+
+
+def generate_handler_registration(func_list):
+ handler_registration = ["#define foreach_api_reply_handler \\\n"]
+ for f in func_list:
+ name = f['name']
+ camelcase_name = util.underscore_to_camelcase(f['name'])
+
+ if (not util.is_reply(camelcase_name) and not util.is_notification(name)) or util.is_ignored(name) \
+ or util.is_control_ping(camelcase_name):
+ continue
+
+ handler_registration.append(handler_registration_template.substitute(
+ name=name,
+ crc=f['crc']))
+
+ return "".join(handler_registration)
+
+
+api_verification_template = Template("""_(${name}_${crc}) \\
+""")
+
+
+def generate_api_verification(func_list):
+ api_verification = ["#define foreach_supported_api_message \\\n"]
+ for f in func_list:
+ name = f['name']
+
+ if util.is_ignored(name):
+ continue
+
+ api_verification.append(api_verification_template.substitute(
+ name=name,
+ crc=f['crc']))
+
+ return "".join(api_verification)
+
+
+jvpp_c_template = Template("""/**
+ * This file contains JNI bindings for jvpp Java API.
+ * It was generated by jvpp_c_gen.py based on $inputfile
+ * (python representation of api file generated by vppapigen).
+ */
+
+// JAVA class reference cache
+$class_cache
+
+// List of supported API messages used for verification
+$api_verification
+
+// JNI bindings
+$jni_implementations
+
+// Message handlers
+$msg_handlers
+
+// Registration of message handlers in vlib
+$handler_registration
+""")
+
+def generate_jvpp(func_list, plugin_name, inputfile, path):
+ """ Generates jvpp C file """
+ print "Generating jvpp C"
+
+ class_cache = generate_class_cache(func_list, plugin_name)
+ jni_impl = generate_jni_impl(func_list, plugin_name, inputfile)
+ msg_handlers = generate_msg_handlers(func_list, plugin_name, inputfile)
+ handler_registration = generate_handler_registration(func_list)
+ api_verification = generate_api_verification(func_list)
+
+ jvpp_c_file = open("%s/jvpp_%s_gen.h" % (path, plugin_name), 'w')
+ jvpp_c_file.write(jvpp_c_template.substitute(
+ inputfile=inputfile,
+ class_cache=class_cache,
+ api_verification=api_verification,
+ jni_implementations=jni_impl,
+ msg_handlers=msg_handlers,
+ handler_registration=handler_registration))
+ jvpp_c_file.flush()
+ jvpp_c_file.close()
+
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_callback_facade_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_callback_facade_gen.py
new file mode 100644
index 00000000..9aaa4c64
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_callback_facade_gen.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, util
+from string import Template
+
+import callback_gen
+import dto_gen
+
+jvpp_ifc_template = Template("""
+package $plugin_package.$callback_facade_package;
+
+/**
+ * <p>Callback Java API representation of $plugin_package plugin.
+ * <br>It was generated by jvpp_callback_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface CallbackJVpp${plugin_name} extends $base_package.$notification_package.NotificationRegistryProvider, java.lang.AutoCloseable {
+
+ // TODO add send
+
+$methods
+}
+""")
+
+jvpp_impl_template = Template("""
+package $plugin_package.$callback_facade_package;
+
+/**
+ * <p>Default implementation of Callback${plugin_name}JVpp interface.
+ * <br>It was generated by jvpp_callback_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public final class CallbackJVpp${plugin_name}Facade implements CallbackJVpp${plugin_name} {
+
+ private final $plugin_package.JVpp${plugin_name} jvpp;
+ private final java.util.Map<Integer, $base_package.$callback_package.JVppCallback> callbacks;
+ private final $plugin_package.$notification_package.${plugin_name}NotificationRegistryImpl notificationRegistry = new $plugin_package.$notification_package.${plugin_name}NotificationRegistryImpl();
+ /**
+ * <p>Create CallbackJVpp${plugin_name}Facade object for provided JVpp instance.
+ * Constructor internally creates CallbackJVppFacadeCallback class for processing callbacks
+ * and then connects to provided JVpp instance
+ *
+ * @param jvpp provided $base_package.JVpp instance
+ *
+ * @throws java.io.IOException in case instance cannot connect to JVPP
+ */
+ public CallbackJVpp${plugin_name}Facade(final $base_package.JVppRegistry registry, final $plugin_package.JVpp${plugin_name} jvpp) throws java.io.IOException {
+ this.jvpp = java.util.Objects.requireNonNull(jvpp,"jvpp is null");
+ this.callbacks = new java.util.HashMap<>();
+ java.util.Objects.requireNonNull(registry, "JVppRegistry should not be null");
+ registry.register(jvpp, new CallbackJVpp${plugin_name}FacadeCallback(this.callbacks, notificationRegistry));
+ }
+
+ @Override
+ public $plugin_package.$notification_package.${plugin_name}NotificationRegistry getNotificationRegistry() {
+ return notificationRegistry;
+ }
+
+ @Override
+ public void close() throws Exception {
+ jvpp.close();
+ }
+
+ // TODO add send()
+
+$methods
+}
+""")
+
+method_template = Template(
+ """ void $name($plugin_package.$dto_package.$request request, $plugin_package.$callback_package.$callback callback) throws $base_package.VppInvocationException;""")
+
+method_impl_template = Template(""" public final void $name($plugin_package.$dto_package.$request request, $plugin_package.$callback_package.$callback callback) throws $base_package.VppInvocationException {
+ synchronized (callbacks) {
+ callbacks.put(jvpp.$name(request), callback);
+ }
+ }
+""")
+
+no_arg_method_template = Template(""" void $name($plugin_package.$callback_package.$callback callback) throws $base_package.VppInvocationException;""")
+no_arg_method_impl_template = Template(""" public final void $name($plugin_package.$callback_package.$callback callback) throws $base_package.VppInvocationException {
+ synchronized (callbacks) {
+ callbacks.put(jvpp.$name(), callback);
+ }
+ }
+""")
+
+
+def generate_jvpp(func_list, base_package, plugin_package, plugin_name, dto_package, callback_package, notification_package, callback_facade_package, inputfile):
+ """ Generates callback facade """
+ print "Generating JVpp callback facade"
+
+ if os.path.exists(callback_facade_package):
+ util.remove_folder(callback_facade_package)
+
+ os.mkdir(callback_facade_package)
+
+ methods = []
+ methods_impl = []
+ for func in func_list:
+
+ if util.is_notification(func['name']) or util.is_ignored(func['name']):
+ continue
+
+ camel_case_name = util.underscore_to_camelcase(func['name'])
+ camel_case_name_upper = util.underscore_to_camelcase_upper(func['name'])
+ if util.is_reply(camel_case_name) or util.is_control_ping(camel_case_name):
+ continue
+
+ # Strip suffix for dump calls
+ callback_type = get_request_name(camel_case_name_upper, func['name']) + callback_gen.callback_suffix
+
+ if len(func['args']) == 0:
+ methods.append(no_arg_method_template.substitute(name=camel_case_name,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=callback_type))
+ methods_impl.append(no_arg_method_impl_template.substitute(name=camel_case_name,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=callback_type))
+ else:
+ methods.append(method_template.substitute(name=camel_case_name,
+ request=camel_case_name_upper,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=callback_type))
+ methods_impl.append(method_impl_template.substitute(name=camel_case_name,
+ request=camel_case_name_upper,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=callback_type))
+
+ join = os.path.join(callback_facade_package, "CallbackJVpp%s.java" % plugin_name)
+ jvpp_file = open(join, 'w')
+ jvpp_file.write(
+ jvpp_ifc_template.substitute(inputfile=inputfile,
+ methods="\n".join(methods),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package,
+ notification_package=notification_package,
+ callback_facade_package=callback_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+ jvpp_file = open(os.path.join(callback_facade_package, "CallbackJVpp%sFacade.java" % plugin_name), 'w')
+ jvpp_file.write(jvpp_impl_template.substitute(inputfile=inputfile,
+ methods="\n".join(methods_impl),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package,
+ notification_package=notification_package,
+ callback_package=callback_package,
+ callback_facade_package=callback_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+ generate_callback(func_list, base_package, plugin_package, plugin_name, dto_package, callback_package, notification_package, callback_facade_package, inputfile)
+
+
+jvpp_facade_callback_template = Template("""
+package $plugin_package.$callback_facade_package;
+
+/**
+ * <p>Implementation of JVppGlobalCallback interface for Java Callback API.
+ * <br>It was generated by jvpp_callback_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public final class CallbackJVpp${plugin_name}FacadeCallback implements $plugin_package.$callback_package.JVpp${plugin_name}GlobalCallback {
+
+ private final java.util.Map<Integer, $base_package.$callback_package.JVppCallback> requests;
+ private final $plugin_package.$notification_package.Global${plugin_name}NotificationCallback notificationCallback;
+ private static final java.util.logging.Logger LOG = java.util.logging.Logger.getLogger(CallbackJVpp${plugin_name}FacadeCallback.class.getName());
+
+ public CallbackJVpp${plugin_name}FacadeCallback(final java.util.Map<Integer, $base_package.$callback_package.JVppCallback> requestMap,
+ final $plugin_package.$notification_package.Global${plugin_name}NotificationCallback notificationCallback) {
+ this.requests = requestMap;
+ this.notificationCallback = notificationCallback;
+ }
+
+ @Override
+ public void onError($base_package.VppCallbackException reply) {
+
+ $base_package.$callback_package.JVppCallback failedCall;
+ synchronized(requests) {
+ failedCall = requests.remove(reply.getCtxId());
+ }
+
+ if(failedCall != null) {
+ try {
+ failedCall.onError(reply);
+ } catch(RuntimeException ex) {
+ ex.addSuppressed(reply);
+ LOG.log(java.util.logging.Level.WARNING, String.format("Callback: %s failed while handling exception: %s", failedCall, reply), ex);
+ }
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void onControlPingReply(final $base_package.$dto_package.ControlPingReply reply) {
+
+ $base_package.$callback_package.ControlPingCallback callback;
+ final int replyId = reply.context;
+ synchronized(requests) {
+ callback = ($base_package.$callback_package.ControlPingCallback) requests.remove(replyId);
+ }
+
+ if(callback != null) {
+ callback.onControlPingReply(reply);
+ }
+ }
+
+$methods
+}
+""")
+
+jvpp_facade_callback_method_template = Template("""
+ @Override
+ @SuppressWarnings("unchecked")
+ public void on$callback_dto(final $plugin_package.$dto_package.$callback_dto reply) {
+
+ $plugin_package.$callback_package.$callback callback;
+ final int replyId = reply.context;
+ synchronized(requests) {
+ callback = ($plugin_package.$callback_package.$callback) requests.remove(replyId);
+ }
+
+ if(callback != null) {
+ callback.on$callback_dto(reply);
+ }
+ }
+""")
+
+jvpp_facade_callback_notification_method_template = Template("""
+ @Override
+ @SuppressWarnings("unchecked")
+ public void on$callback_dto($plugin_package.$dto_package.$callback_dto notification) {
+ notificationCallback.on$callback_dto(notification);
+ }
+""")
+
+
+def generate_callback(func_list, base_package, plugin_package, plugin_name, dto_package, callback_package, notification_package, callback_facade_package, inputfile):
+ callbacks = []
+ for func in func_list:
+
+ camel_case_name_with_suffix = util.underscore_to_camelcase_upper(func['name'])
+
+ if util.is_ignored(func['name']) or util.is_control_ping(camel_case_name_with_suffix):
+ continue
+
+ if util.is_reply(camel_case_name_with_suffix):
+ callbacks.append(jvpp_facade_callback_method_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=util.remove_reply_suffix(camel_case_name_with_suffix) + callback_gen.callback_suffix,
+ callback_dto=camel_case_name_with_suffix))
+
+ if util.is_notification(func["name"]):
+ with_notification_suffix = util.add_notification_suffix(camel_case_name_with_suffix)
+ callbacks.append(jvpp_facade_callback_notification_method_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_package=callback_package,
+ callback=with_notification_suffix + callback_gen.callback_suffix,
+ callback_dto=with_notification_suffix))
+
+ jvpp_file = open(os.path.join(callback_facade_package, "CallbackJVpp%sFacadeCallback.java" % plugin_name), 'w')
+ jvpp_file.write(jvpp_facade_callback_template.substitute(inputfile=inputfile,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package,
+ notification_package=notification_package,
+ callback_package=callback_package,
+ methods="".join(callbacks),
+ callback_facade_package=callback_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+
+# Returns request name or special one from unconventional_naming_rep_req map
+def get_request_name(camel_case_dto_name, func_name):
+ if func_name in reverse_dict(util.unconventional_naming_rep_req):
+ request_name = util.underscore_to_camelcase_upper(reverse_dict(util.unconventional_naming_rep_req)[func_name])
+ else:
+ request_name = camel_case_dto_name
+ return remove_suffix(request_name)
+
+
+def reverse_dict(map):
+ return dict((v, k) for k, v in map.iteritems())
+
+
+def remove_suffix(name):
+ if util.is_reply(name):
+ return util.remove_reply_suffix(name)
+ else:
+ if util.is_dump(name):
+ return util.remove_suffix(name, util.dump_suffix)
+ else:
+ return name
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_future_facade_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_future_facade_gen.py
new file mode 100644
index 00000000..07947e30
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_future_facade_gen.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from string import Template
+
+import dto_gen
+import util
+
+jvpp_facade_callback_template = Template("""
+package $plugin_package.$future_package;
+
+/**
+ * <p>Async facade callback setting values to future objects
+ * <br>It was generated by jvpp_future_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public final class FutureJVpp${plugin_name}FacadeCallback implements $plugin_package.$callback_package.JVpp${plugin_name}GlobalCallback {
+
+ private final java.util.Map<java.lang.Integer, java.util.concurrent.CompletableFuture<? extends $base_package.$dto_package.JVppReply<?>>> requests;
+ private final $plugin_package.$notification_package.Global${plugin_name}NotificationCallback notificationCallback;
+
+ public FutureJVpp${plugin_name}FacadeCallback(
+ final java.util.Map<java.lang.Integer, java.util.concurrent.CompletableFuture<? extends $base_package.$dto_package.JVppReply<?>>> requestMap,
+ final $plugin_package.$notification_package.Global${plugin_name}NotificationCallback notificationCallback) {
+ this.requests = requestMap;
+ this.notificationCallback = notificationCallback;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void onError($base_package.VppCallbackException reply) {
+ final java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>> completableFuture;
+
+ synchronized(requests) {
+ completableFuture = (java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>>) requests.get(reply.getCtxId());
+ }
+
+ if(completableFuture != null) {
+ completableFuture.completeExceptionally(reply);
+
+ synchronized(requests) {
+ requests.remove(reply.getCtxId());
+ }
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void onControlPingReply(final $base_package.$dto_package.ControlPingReply reply) {
+ final java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>> completableFuture;
+
+ final int replyId = reply.context;
+ synchronized(requests) {
+ completableFuture = (java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>>) requests.get(replyId);
+ }
+
+ if(completableFuture != null) {
+ // Finish dump call
+ if (completableFuture instanceof $base_package.$future_package.AbstractFutureJVppInvoker.CompletableDumpFuture) {
+ completableFuture.complete((($base_package.$future_package.AbstractFutureJVppInvoker.CompletableDumpFuture) completableFuture).getReplyDump());
+ // Remove future mapped to dump call context id
+ synchronized(requests) {
+ requests.remove((($base_package.$future_package.AbstractFutureJVppInvoker.CompletableDumpFuture) completableFuture).getContextId());
+ }
+ } else {
+ completableFuture.complete(reply);
+ }
+ synchronized(requests) {
+ requests.remove(replyId);
+ }
+ }
+ }
+
+$methods
+}
+""")
+
+jvpp_facade_callback_method_template = Template("""
+ @Override
+ @SuppressWarnings("unchecked")
+ public void on$callback_dto(final $plugin_package.$dto_package.$callback_dto reply) {
+ final java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>> completableFuture;
+ final int replyId = reply.context;
+ synchronized(requests) {
+ completableFuture = (java.util.concurrent.CompletableFuture<$base_package.$dto_package.JVppReply<?>>) requests.get(replyId);
+ }
+
+ if(completableFuture != null) {
+ completableFuture.complete(reply);
+
+ synchronized(requests) {
+ requests.remove(replyId);
+ }
+ }
+ }
+""")
+
+jvpp_facade_callback_notification_method_template = Template("""
+ @Override
+ public void on$callback_dto($plugin_package.$dto_package.$callback_dto notification) {
+ notificationCallback.on$callback_dto(notification);
+ }
+""")
+
+jvpp_facade_details_callback_method_template = Template("""
+ @Override
+ @SuppressWarnings("unchecked")
+ public void on$callback_dto(final $plugin_package.$dto_package.$callback_dto reply) {
+ final $base_package.$future_package.AbstractFutureJVppInvoker.CompletableDumpFuture<$plugin_package.$dto_package.$callback_dto_reply_dump> completableFuture;
+ final int replyId = reply.context;
+ synchronized(requests) {
+ completableFuture = ($base_package.$future_package.AbstractFutureJVppInvoker.CompletableDumpFuture<$plugin_package.$dto_package.$callback_dto_reply_dump>) requests.get(replyId);
+ }
+
+ if(completableFuture != null) {
+ completableFuture.getReplyDump().$callback_dto_field.add(reply);
+ }
+ }
+""")
+
+
+def generate_jvpp(func_list, base_package, plugin_package, plugin_name, dto_package, callback_package, notification_package, future_facade_package, inputfile):
+ """ Generates JVpp interface and JNI implementation """
+ print "Generating JVpp future facade"
+
+ if not os.path.exists(future_facade_package):
+ os.mkdir(future_facade_package)
+
+ methods = []
+ methods_impl = []
+ callbacks = []
+ for func in func_list:
+ camel_case_name_with_suffix = util.underscore_to_camelcase_upper(func['name'])
+
+ if util.is_ignored(func['name']) or util.is_control_ping(camel_case_name_with_suffix):
+ continue
+
+ if not util.is_reply(camel_case_name_with_suffix) and not util.is_notification(func['name']):
+ continue
+
+ camel_case_method_name = util.underscore_to_camelcase(func['name'])
+
+ if not util.is_notification(func["name"]):
+ camel_case_request_method_name = util.remove_reply_suffix(util.underscore_to_camelcase(func['name']))
+ if util.is_details(camel_case_name_with_suffix):
+ camel_case_reply_name = get_standard_dump_reply_name(util.underscore_to_camelcase_upper(func['name']),
+ func['name'])
+ callbacks.append(jvpp_facade_details_callback_method_template.substitute(base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_dto=camel_case_name_with_suffix,
+ callback_dto_field=camel_case_method_name,
+ callback_dto_reply_dump=camel_case_reply_name + dto_gen.dump_dto_suffix,
+ future_package=future_facade_package))
+
+ methods.append(future_jvpp_method_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ method_name=camel_case_request_method_name +
+ util.underscore_to_camelcase_upper(util.dump_suffix),
+ reply_name=camel_case_reply_name + dto_gen.dump_dto_suffix,
+ request_name=util.remove_reply_suffix(camel_case_reply_name) +
+ util.underscore_to_camelcase_upper(util.dump_suffix)))
+ methods_impl.append(future_jvpp_dump_method_impl_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ method_name=camel_case_request_method_name +
+ util.underscore_to_camelcase_upper(util.dump_suffix),
+ reply_name=camel_case_reply_name + dto_gen.dump_dto_suffix,
+ request_name=util.remove_reply_suffix(camel_case_reply_name) +
+ util.underscore_to_camelcase_upper(util.dump_suffix)))
+ else:
+ request_name = util.underscore_to_camelcase_upper(util.unconventional_naming_rep_req[func['name']]) \
+ if func['name'] in util.unconventional_naming_rep_req else util.remove_reply_suffix(camel_case_name_with_suffix)
+
+ methods.append(future_jvpp_method_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ method_name=camel_case_request_method_name,
+ reply_name=camel_case_name_with_suffix,
+ request_name=request_name))
+ methods_impl.append(future_jvpp_method_impl_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ method_name=camel_case_request_method_name,
+ reply_name=camel_case_name_with_suffix,
+ request_name=request_name))
+
+ callbacks.append(jvpp_facade_callback_method_template.substitute(base_package=base_package,
+ plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_dto=camel_case_name_with_suffix))
+
+ if util.is_notification(func["name"]):
+ callbacks.append(jvpp_facade_callback_notification_method_template.substitute(plugin_package=plugin_package,
+ dto_package=dto_package,
+ callback_dto=util.add_notification_suffix(camel_case_name_with_suffix)))
+
+ jvpp_file = open(os.path.join(future_facade_package, "FutureJVpp%sFacadeCallback.java" % plugin_name), 'w')
+ jvpp_file.write(jvpp_facade_callback_template.substitute(inputfile=inputfile,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package,
+ notification_package=notification_package,
+ callback_package=callback_package,
+ methods="".join(callbacks),
+ future_package=future_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+ jvpp_file = open(os.path.join(future_facade_package, "FutureJVpp%s.java" % plugin_name), 'w')
+ jvpp_file.write(future_jvpp_template.substitute(inputfile=inputfile,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ notification_package=notification_package,
+ methods="".join(methods),
+ future_package=future_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+ jvpp_file = open(os.path.join(future_facade_package, "FutureJVpp%sFacade.java" % plugin_name), 'w')
+ jvpp_file.write(future_jvpp_facade_template.substitute(inputfile=inputfile,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package,
+ notification_package=notification_package,
+ methods="".join(methods_impl),
+ future_package=future_facade_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+
+future_jvpp_template = Template('''
+package $plugin_package.$future_package;
+
+/**
+ * <p>Async facade extension adding specific methods for each request invocation
+ * <br>It was generated by jvpp_future_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface FutureJVpp${plugin_name} extends $base_package.$future_package.FutureJVppInvoker {
+$methods
+
+ @Override
+ public $plugin_package.$notification_package.${plugin_name}NotificationRegistry getNotificationRegistry();
+
+}
+''')
+
+future_jvpp_method_template = Template('''
+ java.util.concurrent.CompletionStage<$plugin_package.$dto_package.$reply_name> $method_name($plugin_package.$dto_package.$request_name request);
+''')
+
+
+future_jvpp_facade_template = Template('''
+package $plugin_package.$future_package;
+
+/**
+ * <p>Implementation of FutureJVpp based on AbstractFutureJVppInvoker
+ * <br>It was generated by jvpp_future_facade_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public class FutureJVpp${plugin_name}Facade extends $base_package.$future_package.AbstractFutureJVppInvoker implements FutureJVpp${plugin_name} {
+
+ private final $plugin_package.$notification_package.${plugin_name}NotificationRegistryImpl notificationRegistry = new $plugin_package.$notification_package.${plugin_name}NotificationRegistryImpl();
+
+ /**
+ * <p>Create FutureJVpp${plugin_name}Facade object for provided JVpp instance.
+ * Constructor internally creates FutureJVppFacadeCallback class for processing callbacks
+ * and then connects to provided JVpp instance
+ *
+ * @param jvpp provided $base_package.JVpp instance
+ *
+ * @throws java.io.IOException in case instance cannot connect to JVPP
+ */
+ public FutureJVpp${plugin_name}Facade(final $base_package.JVppRegistry registry, final $base_package.JVpp jvpp) throws java.io.IOException {
+ super(jvpp, registry, new java.util.HashMap<>());
+ java.util.Objects.requireNonNull(registry, "JVppRegistry should not be null");
+ registry.register(jvpp, new FutureJVpp${plugin_name}FacadeCallback(getRequests(), notificationRegistry));
+ }
+
+ @Override
+ public $plugin_package.$notification_package.${plugin_name}NotificationRegistry getNotificationRegistry() {
+ return notificationRegistry;
+ }
+
+$methods
+}
+''')
+
+future_jvpp_method_impl_template = Template('''
+ @Override
+ public java.util.concurrent.CompletionStage<$plugin_package.$dto_package.$reply_name> $method_name($plugin_package.$dto_package.$request_name request) {
+ return send(request);
+ }
+''')
+
+future_jvpp_dump_method_impl_template = Template('''
+ @Override
+ public java.util.concurrent.CompletionStage<$plugin_package.$dto_package.$reply_name> $method_name($plugin_package.$dto_package.$request_name request) {
+ return send(request, new $plugin_package.$dto_package.$reply_name());
+ }
+''')
+
+
+# Returns request name or special one from unconventional_naming_rep_req map
+def get_standard_dump_reply_name(camel_case_dto_name, func_name):
+ # FIXME this is a hotfix for sub-details callbacks
+ # FIXME also for L2FibTableEntry
+ # It's all because unclear mapping between
+ # request -> reply,
+ # dump -> reply, details,
+ # notification_start -> reply, notifications
+
+ # vpe.api needs to be "standardized" so we can parse the information and create maps before generating java code
+ suffix = func_name.split("_")[-1]
+ return util.underscore_to_camelcase_upper(
+ util.unconventional_naming_rep_req[func_name]) + util.underscore_to_camelcase_upper(suffix) if func_name in util.unconventional_naming_rep_req \
+ else camel_case_dto_name
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_impl_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_impl_gen.py
new file mode 100644
index 00000000..7bf91138
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/jvpp_impl_gen.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, util
+from string import Template
+
+jvpp_ifc_template = Template("""
+package $plugin_package;
+
+/**
+ * <p>Java representation of plugin's api file.
+ * <br>It was generated by jvpp_impl_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface JVpp${plugin_name} extends $base_package.JVpp {
+
+ /**
+ * Generic dispatch method for sending requests to VPP
+ *
+ * @throws io.fd.vpp.jvpp.VppInvocationException if send request had failed
+ */
+ int send($base_package.$dto_package.JVppRequest request) throws io.fd.vpp.jvpp.VppInvocationException;
+
+$methods
+}
+""")
+
+jvpp_impl_template = Template("""
+package $plugin_package;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.attribute.PosixFilePermission;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.Set;
+import java.util.logging.Logger;
+import $base_package.callback.JVppCallback;
+import $base_package.VppConnection;
+import $base_package.JVppRegistry;
+
+/**
+ * <p>Default implementation of JVpp interface.
+ * <br>It was generated by jvpp_impl_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public final class JVpp${plugin_name}Impl implements $plugin_package.JVpp${plugin_name} {
+
+ private final static Logger LOG = Logger.getLogger(JVpp${plugin_name}Impl.class.getName());
+ private static final String LIBNAME = "libjvpp_${plugin_name_underscore}.so";
+
+ // FIXME using NativeLibraryLoader makes load fail could not find (WantInterfaceEventsReply).
+ static {
+ try {
+ loadLibrary();
+ } catch (Exception e) {
+ LOG.severe("Can't find jvpp jni library: " + LIBNAME);
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ private static void loadStream(final InputStream is) throws IOException {
+ final Set<PosixFilePermission> perms = PosixFilePermissions.fromString("rwxr-x---");
+ final Path p = Files.createTempFile(LIBNAME, null, PosixFilePermissions.asFileAttribute(perms));
+ try {
+ Files.copy(is, p, StandardCopyOption.REPLACE_EXISTING);
+
+ try {
+ Runtime.getRuntime().load(p.toString());
+ } catch (UnsatisfiedLinkError e) {
+ throw new IOException("Failed to load library " + p, e);
+ }
+ } finally {
+ try {
+ Files.deleteIfExists(p);
+ } catch (IOException e) {
+ }
+ }
+ }
+
+ private static void loadLibrary() throws IOException {
+ try (final InputStream is = JVpp${plugin_name}Impl.class.getResourceAsStream('/' + LIBNAME)) {
+ if (is == null) {
+ throw new IOException("Failed to open library resource " + LIBNAME);
+ }
+ loadStream(is);
+ }
+ }
+
+ private VppConnection connection;
+ private JVppRegistry registry;
+
+ private static native void init0(final JVppCallback callback, final long queueAddress, final int clientIndex);
+ @Override
+ public void init(final JVppRegistry registry, final JVppCallback callback, final long queueAddress, final int clientIndex) {
+ this.registry = java.util.Objects.requireNonNull(registry, "registry should not be null");
+ this.connection = java.util.Objects.requireNonNull(registry.getConnection(), "connection should not be null");
+ connection.checkActive();
+ init0(callback, queueAddress, clientIndex);
+ }
+
+ private static native void close0();
+ @Override
+ public void close() {
+ close0();
+ }
+
+ @Override
+ public int send($base_package.$dto_package.JVppRequest request) throws io.fd.vpp.jvpp.VppInvocationException {
+ return request.send(this);
+ }
+
+ @Override
+ public final int controlPing(final io.fd.vpp.jvpp.dto.ControlPing controlPing) throws io.fd.vpp.jvpp.VppInvocationException {
+ return registry.controlPing(JVpp${plugin_name}Impl.class);
+ }
+
+$methods
+}
+""")
+
+method_template = Template(""" int $name($plugin_package.$dto_package.$request request) throws io.fd.vpp.jvpp.VppInvocationException;""")
+method_native_template = Template(
+ """ private static native int ${name}0($plugin_package.$dto_package.$request request);""")
+method_impl_template = Template(""" public final int $name($plugin_package.$dto_package.$request request) throws io.fd.vpp.jvpp.VppInvocationException {
+ java.util.Objects.requireNonNull(request,"Null request object");
+ connection.checkActive();
+ int result=${name}0(request);
+ if(result<0){
+ throw new io.fd.vpp.jvpp.VppInvocationException("${name}",result);
+ }
+ return result;
+ }
+""")
+
+no_arg_method_template = Template(""" int $name() throws io.fd.vpp.jvpp.VppInvocationException;""")
+no_arg_method_native_template = Template(""" private static native int ${name}0() throws io.fd.vpp.jvpp.VppInvocationException;""")
+no_arg_method_impl_template = Template(""" public final int $name() throws io.fd.vpp.jvpp.VppInvocationException {
+ connection.checkActive();
+ int result=${name}0();
+ if(result<0){
+ throw new io.fd.vpp.jvpp.VppInvocationException("${name}",result);
+ }
+ return result;
+ }
+""")
+
+
+def generate_jvpp(func_list, base_package, plugin_package, plugin_name_underscore, dto_package, inputfile):
+ """ Generates JVpp interface and JNI implementation """
+ print "Generating JVpp"
+ plugin_name = util.underscore_to_camelcase_upper(plugin_name_underscore)
+
+ methods = []
+ methods_impl = []
+ for func in func_list:
+
+ # Skip structures that are used only as notifications
+ if util.is_just_notification(func['name']) or util.is_ignored(func['name']):
+ continue
+
+ camel_case_name = util.underscore_to_camelcase(func['name'])
+ camel_case_name_upper = util.underscore_to_camelcase_upper(func['name'])
+ if util.is_reply(camel_case_name):
+ continue
+
+ if len(func['args']) == 0:
+ methods.append(no_arg_method_template.substitute(name=camel_case_name))
+ methods_impl.append(no_arg_method_native_template.substitute(name=camel_case_name))
+ methods_impl.append(no_arg_method_impl_template.substitute(name=camel_case_name))
+ else:
+ methods.append(method_template.substitute(name=camel_case_name,
+ request=camel_case_name_upper,
+ plugin_package=plugin_package,
+ dto_package=dto_package))
+ methods_impl.append(method_native_template.substitute(name=camel_case_name,
+ request=camel_case_name_upper,
+ plugin_package=plugin_package,
+ dto_package=dto_package))
+ methods_impl.append(method_impl_template.substitute(name=camel_case_name,
+ request=camel_case_name_upper,
+ plugin_package=plugin_package,
+ dto_package=dto_package))
+
+ jvpp_file = open("JVpp%s.java" % plugin_name, 'w')
+ jvpp_file.write(
+ jvpp_ifc_template.substitute(inputfile=inputfile,
+ methods="\n".join(methods),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ dto_package=dto_package))
+ jvpp_file.flush()
+ jvpp_file.close()
+
+ jvpp_file = open("JVpp%sImpl.java" % plugin_name, 'w')
+ jvpp_file.write(jvpp_impl_template.substitute(inputfile=inputfile,
+ methods="\n".join(methods_impl),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ plugin_name_underscore=plugin_name_underscore,
+ dto_package=dto_package))
+ jvpp_file.flush()
+ jvpp_file.close()
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/notification_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/notification_gen.py
new file mode 100644
index 00000000..94302d56
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/notification_gen.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import callback_gen
+import util
+from string import Template
+
+notification_registry_template = Template("""
+package $plugin_package.$notification_package;
+
+/**
+ * <p>Registry for notification callbacks defined in ${plugin_name}.
+ * <br>It was generated by notification_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface ${plugin_name}NotificationRegistry extends $base_package.$notification_package.NotificationRegistry {
+
+ $register_callback_methods
+
+ @Override
+ void close();
+}
+""")
+
+global_notification_callback_template = Template("""
+package $plugin_package.$notification_package;
+
+/**
+ * <p>Aggregated callback interface for notifications only.
+ * <br>It was generated by notification_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface Global${plugin_name}NotificationCallback$callbacks {
+
+}
+""")
+
+notification_registry_impl_template = Template("""
+package $plugin_package.$notification_package;
+
+/**
+ * <p>Notification registry delegating notification processing to registered callbacks.
+ * <br>It was generated by notification_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public final class ${plugin_name}NotificationRegistryImpl implements ${plugin_name}NotificationRegistry, Global${plugin_name}NotificationCallback {
+
+ // TODO add a special NotificationCallback interface and only allow those to be registered
+ private final java.util.concurrent.ConcurrentMap<Class<? extends $base_package.$dto_package.JVppNotification>, $base_package.$callback_package.JVppNotificationCallback> registeredCallbacks =
+ new java.util.concurrent.ConcurrentHashMap<>();
+
+ $register_callback_methods
+ $handler_methods
+
+ @Override
+ public void close() {
+ registeredCallbacks.clear();
+ }
+}
+""")
+
+register_callback_impl_template = Template("""
+ public java.lang.AutoCloseable register$callback(final $plugin_package.$callback_package.$callback callback){
+ if(null != registeredCallbacks.putIfAbsent($plugin_package.$dto_package.$notification.class, callback)){
+ throw new IllegalArgumentException("Callback for " + $plugin_package.$dto_package.$notification.class +
+ "notification already registered");
+ }
+ return () -> registeredCallbacks.remove($plugin_package.$dto_package.$notification.class);
+ }
+""")
+
+handler_impl_template = Template("""
+ @Override
+ public void on$notification(
+ final $plugin_package.$dto_package.$notification notification) {
+ final $base_package.$callback_package.JVppNotificationCallback jVppNotificationCallback = registeredCallbacks.get($plugin_package.$dto_package.$notification.class);
+ if (null != jVppNotificationCallback) {
+ (($plugin_package.$callback_package.$callback) registeredCallbacks
+ .get($plugin_package.$dto_package.$notification.class))
+ .on$notification(notification);
+ }
+ }
+""")
+
+notification_provider_template = Template("""
+package $plugin_package.$notification_package;
+
+ /**
+ * Provides ${plugin_name}NotificationRegistry.
+ * <br>The file was generated by notification_gen.py based on $inputfile
+ * <br>(python representation of api file generated by vppapigen).
+ */
+public interface ${plugin_name}NotificationRegistryProvider extends $base_package.$notification_package.NotificationRegistryProvider {
+
+ @Override
+ public ${plugin_name}NotificationRegistry getNotificationRegistry();
+}
+""")
+
+
+def generate_notification_registry(func_list, base_package, plugin_package, plugin_name, notification_package, callback_package, dto_package, inputfile):
+ """ Generates notification registry interface and implementation """
+ print "Generating Notification interfaces and implementation"
+
+ if not os.path.exists(notification_package):
+ os.mkdir(notification_package)
+
+ callbacks = []
+ register_callback_methods = []
+ register_callback_methods_impl = []
+ handler_methods = []
+ for func in func_list:
+
+ if not util.is_notification(func['name']):
+ continue
+
+ camel_case_name_with_suffix = util.underscore_to_camelcase_upper(func['name'])
+ notification_dto = util.add_notification_suffix(camel_case_name_with_suffix)
+ callback_ifc = notification_dto + callback_gen.callback_suffix
+ fully_qualified_callback_ifc = "{0}.{1}.{2}".format(plugin_package, callback_package, callback_ifc)
+ callbacks.append(fully_qualified_callback_ifc)
+
+ # TODO create NotificationListenerRegistration and return that instead of AutoCloseable to better indicate
+ # that the registration should be closed
+ register_callback_methods.append("java.lang.AutoCloseable register{0}({1} callback);"
+ .format(callback_ifc, fully_qualified_callback_ifc))
+ register_callback_methods_impl.append(register_callback_impl_template.substitute(plugin_package=plugin_package,
+ callback_package=callback_package,
+ dto_package=dto_package,
+ notification=notification_dto,
+ callback=callback_ifc))
+ handler_methods.append(handler_impl_template.substitute(base_package=base_package,
+ plugin_package=plugin_package,
+ callback_package=callback_package,
+ dto_package=dto_package,
+ notification=notification_dto,
+ callback=callback_ifc))
+
+
+ callback_file = open(os.path.join(notification_package, "%sNotificationRegistry.java" % plugin_name), 'w')
+ callback_file.write(notification_registry_template.substitute(inputfile=inputfile,
+ register_callback_methods="\n ".join(register_callback_methods),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ notification_package=notification_package))
+ callback_file.flush()
+ callback_file.close()
+
+ callback_file = open(os.path.join(notification_package, "Global%sNotificationCallback.java" % plugin_name), 'w')
+
+ global_notification_callback_callbacks = ""
+ if (callbacks):
+ global_notification_callback_callbacks = " extends " + ", ".join(callbacks)
+
+ callback_file.write(global_notification_callback_template.substitute(inputfile=inputfile,
+ callbacks=global_notification_callback_callbacks,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ notification_package=notification_package))
+ callback_file.flush()
+ callback_file.close()
+
+ callback_file = open(os.path.join(notification_package, "%sNotificationRegistryImpl.java" % plugin_name), 'w')
+ callback_file.write(notification_registry_impl_template.substitute(inputfile=inputfile,
+ callback_package=callback_package,
+ dto_package=dto_package,
+ register_callback_methods="".join(register_callback_methods_impl),
+ handler_methods="".join(handler_methods),
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ notification_package=notification_package))
+ callback_file.flush()
+ callback_file.close()
+
+ callback_file = open(os.path.join(notification_package, "%sNotificationRegistryProvider.java" % plugin_name), 'w')
+ callback_file.write(notification_provider_template.substitute(inputfile=inputfile,
+ base_package=base_package,
+ plugin_package=plugin_package,
+ plugin_name=plugin_name,
+ notification_package=notification_package))
+ callback_file.flush()
+ callback_file.close()
+
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/types_gen.py b/src/vpp-api/java/jvpp/gen/jvppgen/types_gen.py
new file mode 100644
index 00000000..858ea8ba
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/types_gen.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from string import Template
+
+import util
+import jni_gen
+import dto_gen
+
+type_template = Template("""
+package $plugin_package.$type_package;
+
+/**
+ * <p>This class represents $c_type_name type definition.
+ * <br>It was generated by types_gen.py based on $inputfile preparsed data:
+ * <pre>
+$docs
+ * </pre>
+ */
+public final class $java_type_name {
+$fields
+$methods
+}
+""")
+
+field_template = Template(""" public $type $name;\n""")
+
+
+def generate_type_fields(type_definition):
+ """
+ Generates fields for class representing typeonly definition
+ :param type_definition: python representation of typeonly definition
+ :return: string representing class fields
+ """
+ fields = ""
+ for t in zip(type_definition['types'], type_definition['args']):
+ field_name = util.underscore_to_camelcase(t[1])
+ fields += field_template.substitute(type=util.jni_2_java_type_mapping[t[0]],
+ name=field_name)
+ return fields
+
+object_struct_setter_template = Template("""
+ {
+ jclass ${field_reference_name}Class = (*env)->FindClass(env, "${class_FQN}");
+ memset (&(mp->${c_name}), 0, sizeof (mp->${c_name}));
+ ${struct_initialization}
+ }
+""")
+
+object_array_struct_setter_template = Template("""
+ {
+ jclass ${field_reference_name}ArrayElementClass = (*env)->FindClass(env, "${class_FQN}");
+ if (${field_reference_name}) {
+ size_t _i;
+ jsize cnt = (*env)->GetArrayLength (env, ${field_reference_name});
+ ${field_length_check}
+ for (_i = 0; _i < cnt; _i++) {
+ jobject ${field_reference_name}ArrayElement = (*env)->GetObjectArrayElement(env, ${field_reference_name}, _i);
+ memset (&(mp->${c_name}[_i]), 0, sizeof (mp->${c_name}[_i]));
+ ${struct_initialization}
+ }
+ }
+ }
+""")
+
+object_dto_field_setter_template = Template("""
+ {
+ jclass ${field_reference_name}Class = (*env)->FindClass(env, "${class_FQN}");
+ jmethodID ${field_reference_name}Constructor = (*env)->GetMethodID(env, ${field_reference_name}Class, "<init>", "()V");
+ jobject ${field_reference_name} = (*env)->NewObject(env, ${field_reference_name}Class, ${field_reference_name}Constructor);
+ ${type_initialization}
+ (*env)->SetObjectField(env, dto, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+ }
+""")
+
+object_array_dto_field_setter_template = Template("""
+ {
+ jclass ${field_reference_name}Class = (*env)->FindClass(env, "${class_FQN}");
+ jobjectArray ${field_reference_name} = (*env)->NewObjectArray(env, ${field_length}, ${field_reference_name}Class, 0);
+ jmethodID ${field_reference_name}Constructor = (*env)->GetMethodID(env, ${field_reference_name}Class, "<init>", "()V");
+ unsigned int _i;
+ for (_i = 0; _i < ${field_length}; _i++) {
+ jobject ${field_reference_name}ArrayElement = (*env)->NewObject(env, ${field_reference_name}Class, ${field_reference_name}Constructor);
+ ${type_initialization}
+ (*env)->SetObjectArrayElement(env, ${field_reference_name}, _i, ${field_reference_name}ArrayElement);
+ (*env)->DeleteLocalRef(env, ${field_reference_name}ArrayElement);
+ }
+ (*env)->SetObjectField(env, dto, ${field_reference_name}FieldId, ${field_reference_name});
+ (*env)->DeleteLocalRef(env, ${field_reference_name});
+ }
+""")
+
+
+def generate_struct_initialization(type_def, c_name_prefix, object_name, indent):
+ struct_initialization = ""
+ # field identifiers
+ for t in zip(type_def['types'], type_def['args'], type_def['lengths']):
+ field_reference_name = "${c_name}" + util.underscore_to_camelcase_upper(t[1])
+ field_name = util.underscore_to_camelcase(t[1])
+ struct_initialization += jni_gen.jni_request_identifiers_for_type(field_type=t[0],
+ field_reference_name=field_reference_name,
+ field_name=field_name,
+ object_name=object_name)
+ struct_initialization += jni_gen.jni_request_binding_for_type(field_type=t[0], c_name=c_name_prefix + t[1],
+ field_reference_name=field_reference_name,
+ field_length=t[2][0],
+ is_variable_len_array=t[2][1])
+ return indent + struct_initialization.replace('\n', '\n' + indent)
+
+
+def generate_type_setter(handler_name, type_def, c_name_prefix, object_name, indent):
+ type_initialization = ""
+ for t in zip(type_def['types'], type_def['args'], type_def['lengths']):
+ field_length = t[2][0]
+ is_variable_len_array = t[2][1]
+ length_field_type = None
+ if is_variable_len_array:
+ length_field_type = type_def['types'][type_def['args'].index(field_length)]
+ type_initialization += jni_gen.jni_reply_handler_for_type(handler_name=handler_name,
+ ref_name="${field_reference_name}",
+ field_type=t[0], c_name=c_name_prefix + t[1],
+ field_reference_name="${c_name}" + util.underscore_to_camelcase_upper(t[1]),
+ field_name=util.underscore_to_camelcase(t[1]),
+ field_length=field_length,
+ is_variable_len_array=is_variable_len_array,
+ length_field_type=length_field_type,
+ object_name=object_name)
+ return indent + type_initialization.replace('\n', '\n' + indent)
+
+
+def generate_types(types_list, plugin_package, types_package, inputfile):
+ """
+ Generates Java representation of custom types defined in api file.
+ """
+
+ #
+ if not types_list:
+ print "Skipping custom types generation (%s does not define custom types)." % inputfile
+ return
+
+ print "Generating custom types"
+
+ if not os.path.exists(types_package):
+ os.mkdir(types_package)
+
+ for type in types_list:
+ c_type_name = type['name']
+ java_type_name = util.underscore_to_camelcase_upper(type['name'])
+ dto_path = os.path.join(types_package, java_type_name + ".java")
+
+ fields = generate_type_fields(type)
+
+ dto_file = open(dto_path, 'w')
+ dto_file.write(type_template.substitute(plugin_package=plugin_package,
+ type_package=types_package,
+ c_type_name=c_type_name,
+ inputfile=inputfile,
+ docs=util.api_message_to_javadoc(type),
+ java_type_name=java_type_name,
+ fields=fields,
+ methods=dto_gen.generate_dto_base_methods(java_type_name, type)
+ ))
+
+ # update type mappings:
+ # todo fix vpe.api to use type_name instead of vl_api_type_name_t
+ type_name = "vl_api_" + c_type_name + "_t"
+ java_fqn = "%s.%s.%s" % (plugin_package, types_package, java_type_name)
+ util.vpp_2_jni_type_mapping[type_name] = "jobject"
+ util.vpp_2_jni_type_mapping[type_name + "[]"] = "jobjectArray"
+ util.jni_2_java_type_mapping[type_name] = java_fqn
+ util.jni_2_java_type_mapping[type_name + "[]"] = java_fqn + "[]"
+ jni_name = java_fqn.replace('.', "/")
+ jni_signature = "L" + jni_name + ";"
+ util.jni_2_signature_mapping[type_name] = "L" + jni_name + ";"
+ util.jni_2_signature_mapping[type_name + "[]"] = "[" + jni_signature
+ util.jni_field_accessors[type_name] = "ObjectField"
+ util.jni_field_accessors[type_name + "[]"] = "ObjectField"
+
+ jni_gen.struct_setter_templates[type_name] = Template(
+ object_struct_setter_template.substitute(
+ c_name="${c_name}",
+ field_reference_name="${field_reference_name}",
+ class_FQN=jni_name,
+ struct_initialization=generate_struct_initialization(type, "${c_name}.",
+ "${field_reference_name}", ' ' * 4))
+ )
+
+ jni_gen.struct_setter_templates[type_name+ "[]"] = Template(
+ object_array_struct_setter_template.substitute(
+ c_name="${c_name}",
+ field_reference_name="${field_reference_name}",
+ field_length_check="${field_length_check}",
+ class_FQN=jni_name,
+ struct_initialization=generate_struct_initialization(type, "${c_name}[_i].",
+ "${field_reference_name}ArrayElement", ' ' * 8))
+ )
+
+ jni_gen.dto_field_setter_templates[type_name] = Template(
+ object_dto_field_setter_template.substitute(
+ field_reference_name="${field_reference_name}",
+ field_length="${field_length}",
+ class_FQN=jni_name,
+ type_initialization=generate_type_setter(c_type_name, type, "${c_name}.",
+ "${field_reference_name}", ' ' * 4))
+ )
+
+ jni_gen.dto_field_setter_templates[type_name + "[]"] = Template(
+ object_array_dto_field_setter_template.substitute(
+ field_reference_name="${field_reference_name}",
+ field_length="${field_length}",
+ class_FQN=jni_name,
+ type_initialization=generate_type_setter(c_type_name, type, "${c_name}[_i].",
+ "${field_reference_name}ArrayElement", ' ' * 8))
+ )
+
+ dto_file.flush()
+ dto_file.close()
+
diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/util.py b/src/vpp-api/java/jvpp/gen/jvppgen/util.py
new file mode 100644
index 00000000..42394419
--- /dev/null
+++ b/src/vpp-api/java/jvpp/gen/jvppgen/util.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, pprint
+from os import removedirs
+
+
+def underscore_to_camelcase(name):
+ name = name.title().replace("_", "")
+ return name[0].lower() + name[1:]
+
+
+def underscore_to_camelcase_upper(name):
+ name = name.title().replace("_", "")
+ return name[0].upper() + name[1:]
+
+
+def remove_folder(folder):
+ """ Remove folder with all its files """
+ for root, dirs, files in os.walk(folder, topdown=False):
+ for name in files:
+ os.remove(os.path.join(root, name))
+ removedirs(folder)
+
+
+reply_suffixes = ("reply", "details", "l2fibtableentry")
+
+
+def is_reply(name):
+ return name.lower().endswith(reply_suffixes)
+
+
+def is_details(name):
+ return name.lower().endswith(reply_suffixes[1]) or name.lower().endswith(reply_suffixes[2])
+
+
+def is_retval_field(name):
+ return name == 'retval'
+
+dump_suffix = "dump"
+
+
+def is_dump(name):
+ return name.lower().endswith(dump_suffix)
+
+
+def get_reply_suffix(name):
+ for reply_suffix in reply_suffixes:
+ if name.lower().endswith(reply_suffix):
+ if reply_suffix == reply_suffixes[2]:
+ # FIXME workaround for l2_fib_table_entry
+ return 'entry'
+ else:
+ return reply_suffix
+
+# Mapping according to:
+# http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html
+#
+# Unsigned types are converted to signed java types that have the same size.
+# It is the API user responsibility to interpret them correctly.
+jni_2_java_type_mapping = {'u8': 'byte',
+ 'u8[]': 'byte[]',
+ 'i8': 'byte',
+ 'i8[]': 'byte[]',
+ 'u16': 'short',
+ 'u16[]': 'short[]',
+ 'i16': 'short',
+ 'i16[]': 'short[]',
+ 'u32': 'int',
+ 'u32[]': 'int[]',
+ 'i32': 'int',
+ 'i32[]': 'int[]',
+ 'u64': 'long',
+ 'u64[]': 'long[]',
+ 'i64': 'long',
+ 'i64[]': 'long[]',
+ 'f64': 'double',
+ 'f64[]': 'double[]'
+ }
+
+vpp_2_jni_type_mapping = {'u8': 'jbyte',
+ 'u8[]': 'jbyteArray',
+ 'i8': 'jbyte',
+ 'u8[]': 'jbyteArray',
+ 'u16': 'jshort',
+ 'u16[]': 'jshortArray',
+ 'i16': 'jshort',
+ 'i16[]': 'jshortArray',
+ 'u32': 'jint',
+ 'u32[]': 'jintArray',
+ 'i32': 'jint',
+ 'i32[]': 'jintArray',
+ 'u64': 'jlong',
+ 'u64[]': 'jlongArray',
+ 'i64': 'jlong',
+ 'i64[]': 'jlongArray',
+ 'f64': 'jdouble',
+ 'f64[]': 'jdoubleArray'
+ }
+
+# https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/types.html#type_signatures
+jni_2_signature_mapping = {'u8': 'B',
+ 'u8[]': '[B',
+ 'i8': 'B',
+ 'i8[]': '[B',
+ 'u16': 'S',
+ 'u16[]': '[S',
+ 'i16': 'S',
+ 'i16[]': '[S',
+ 'u32': 'I',
+ 'u32[]': '[I',
+ 'i32': 'I',
+ 'i32[]': '[I',
+ 'u64': 'J',
+ 'u64[]': '[J',
+ 'i64': 'J',
+ 'i64[]': '[J',
+ 'f64': 'D',
+ 'f64[]': '[D'
+ }
+
+# https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/functions.html#Get_type_Field_routines
+jni_field_accessors = {'u8': 'ByteField',
+ 'u8[]': 'ObjectField',
+ 'i8': 'ByteField',
+ 'i8[]': 'ObjectField',
+ 'u16': 'ShortField',
+ 'u16[]': 'ObjectField',
+ 'i16': 'ShortField',
+ 'i16[]': 'ObjectField',
+ 'u32': 'IntField',
+ 'u32[]': 'ObjectField',
+ 'i32': 'IntField',
+ 'i32[]': 'ObjectField',
+ 'u64': 'LongField',
+ 'u64[]': 'ObjectField',
+ 'i64': 'LongField',
+ 'i64[]': 'ObjectField',
+ 'f64': 'DoubleField',
+ 'f64[]': 'ObjectField'
+ }
+
+
+# vpe.api calls that do not follow naming conventions and have to be handled exceptionally when finding reply -> request mapping
+# FIXME in vpe.api
+unconventional_naming_rep_req = {
+ }
+
+#
+# FIXME no convention in the naming of events (notifications) in vpe.api
+notifications_message_suffixes = ("event", "counters")
+
+# messages that must be ignored. These messages are INSUFFICIENTLY marked as disabled in vpe.api
+# FIXME
+ignored_messages = []
+
+
+def is_notification(name):
+ """ Returns true if the structure is a notification regardless of its no other use """
+ return is_just_notification(name)
+
+
+def is_just_notification(name):
+ """ Returns true if the structure is just a notification and has no other use """
+ return name.lower().endswith(notifications_message_suffixes)
+
+
+def is_ignored(param):
+ return param.lower() in ignored_messages
+
+
+def remove_reply_suffix(camel_case_name_with_suffix):
+ return remove_suffix(camel_case_name_with_suffix, get_reply_suffix(camel_case_name_with_suffix))
+
+
+def remove_suffix(camel_case_name_with_suffix, suffix):
+ suffix_length = len(suffix)
+ return camel_case_name_with_suffix[:-suffix_length] if suffix_length != 0 else camel_case_name_with_suffix
+
+
+def is_control_ping(camel_case_name_with_suffix):
+ return camel_case_name_with_suffix.lower().startswith("controlping");
+
+
+def api_message_to_javadoc(api_message):
+ """ Converts vpe.api message description to javadoc """
+ str = pprint.pformat(api_message, indent=4, width=120, depth=None)
+ return " * " + str.replace("\n", "\n * ")
+
+
+notification_dto_suffix = "Notification"
+
+
+def add_notification_suffix(camel_case_dto_name):
+ camel_case_dto_name += notification_dto_suffix
+ return camel_case_dto_name
+
+
+def is_array(java_type_as_string):
+ return java_type_as_string.endswith("[]")
diff --git a/src/vpp-api/lua/README.md b/src/vpp-api/lua/README.md
new file mode 100644
index 00000000..4ecdb34d
--- /dev/null
+++ b/src/vpp-api/lua/README.md
@@ -0,0 +1,50 @@
+This is the experimental version of Lua API, aimed for the luajit use.
+
+Please take a look and send the feedback to ayourtch@gmail.com.
+
+To run the examples here:
+
+1) install luajit - "sudo apt-get install luajit" on ubuntu
+
+2) "make build-vpp-api" in the top VPP directory
+
+3) "make run" in a separate terminal window
+ This ensures you have an instance of VPP running
+
+4) sudo luajit examples/example-cli.lua
+
+This will result in something like this:
+
+Version:
+00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+
+{ [1] = { ["luaapi_message_name"] = show_version_reply,["program"] = vpe,["version"] = ,["build_date"] = Fri Nov 25 10:58:48 UTC 2016,["retval"] = 0,["build_directory"] = /home/ubuntu/vpp,["_vl_msg_id"] = 170,["context"] = 0,} ,}
+---
+{ [1] = { ["luaapi_message_name"] = cli_inband_reply,["_vl_msg_id"] = 94,["length"] = 66,["reply"] = vpp v built by ubuntu on vpp-toys at Fri Nov 25 10:58:48 UTC 2016
+,["retval"] = 0,["context"] = 0,} ,}
+---
+
+5) You can also run the performance test bench:
+
+$ sudo luajit bench.lua
+10001 iterations, average speed 5624LL per second
+10001 iterations, average speed 6650LL per second
+10001 iterations, average speed 6053LL per second
+10001 iterations, average speed 7056LL per second
+10001 iterations, average speed 6388LL per second
+10001 iterations, average speed 5849LL per second
+10001 iterations, average speed 6321LL per second
+10001 iterations, average speed 6368LL per second
+10001 iterations, average speed 5958LL per second
+10001 iterations, average speed 6482LL per second
+Average tps across the tests: 6274LL
+
+Note: the above is run in an lxd container running inside 2-core
+xhyve VM on a Macbook Pro, so I would not take the performance numbers for granted :)
+
+The "examples" directory contains a few naive examples, as well as a couple of more
+advanced ones - a tab-completing CLI for VPP that can call both the APIs and CLI,
+and also a small test utility which I use for automating some small tests using
+VPP.
+
diff --git a/src/vpp-api/lua/bench.lua b/src/vpp-api/lua/bench.lua
new file mode 100644
index 00000000..c7231b90
--- /dev/null
+++ b/src/vpp-api/lua/bench.lua
@@ -0,0 +1,70 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+local vpp = require "vpp-lapi"
+
+local ffi = require "ffi"
+
+ffi.cdef([[
+ struct timespec {
+ long tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+ };
+
+ int clock_gettime(int clk_id, struct timespec *tp);
+]])
+
+
+local time_cache = ffi.new("struct timespec[1]")
+local time_cache_1 = time_cache[0]
+function get_ns()
+ ffi.C.clock_gettime(0, time_cache)
+ return time_cache_1.tv_nsec + 1000000000 * time_cache_1.tv_sec
+end
+
+function do_bench()
+ local cycle_start = get_ns()
+ local n_iterations = 10000
+ local count = 1
+ for i = 1,n_iterations do
+ -- print(i)
+ vpp:api_call("show_version")
+ count = count + 1
+ -- print(i, "done")
+ end
+ cycle_end = get_ns()
+ local tps = n_iterations*1000000000LL/(cycle_end - cycle_start)
+ print (tostring(count) .. " iterations, average speed " .. tostring(tps) .. " per second")
+ return tps
+end
+
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+vpp:init({ pneum_path = pneum_path })
+vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json")
+
+vpp:connect("lua-bench")
+local n_tests = 10
+local tps_acc = 0LL
+for i=1,n_tests do
+ tps_acc = tps_acc + do_bench()
+end
+print("Average tps across the tests: " .. tostring(tps_acc/n_tests))
+
+vpp:disconnect()
+
+
diff --git a/src/vpp-api/lua/examples/cli/README.md b/src/vpp-api/lua/examples/cli/README.md
new file mode 100644
index 00000000..3a5f8ee9
--- /dev/null
+++ b/src/vpp-api/lua/examples/cli/README.md
@@ -0,0 +1,5 @@
+This is a small experiment to have a wrapper CLI which can call both API functions as well as debug CLI.
+
+To facilitate tab completion and help, the API call names are broken up with spaces replacing the underscores.
+
+
diff --git a/src/vpp-api/lua/examples/cli/lua-cli.lua b/src/vpp-api/lua/examples/cli/lua-cli.lua
new file mode 100644
index 00000000..4a27af53
--- /dev/null
+++ b/src/vpp-api/lua/examples/cli/lua-cli.lua
@@ -0,0 +1,747 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+-- Experimental prototype CLI using API to VPP, with tab completion
+--
+-- Written by Andrew Yourtchenko (ayourtch@cisco.com) 2010,2016
+--
+
+vpp = require "vpp-lapi"
+
+
+local dotdotdot = "..."
+
+-- First the "readline" routine
+
+readln = {
+split = function(str, pat)
+ local t = {} -- NOTE: use {n = 0} in Lua-5.0
+ local fpat = "(.-)" .. pat
+ local last_end = 1
+ if str then
+ local s, e, cap = str:find(fpat, 1)
+ while s do
+ if s ~= 1 or cap ~= "" then
+ table.insert(t,cap)
+ end
+ last_end = e+1
+ s, e, cap = str:find(fpat, last_end)
+ end
+ if last_end <= #str then
+ cap = str:sub(last_end)
+ table.insert(t, cap)
+ end
+ end
+ return t
+end,
+
+reader = function()
+ local rl = {}
+
+ rl.init = function()
+ os.execute("stty -icanon min 1 -echo")
+ rl.rawmode = true
+ end
+
+ rl.done = function()
+ os.execute("stty icanon echo")
+ rl.rawmode = false
+ end
+
+ rl.prompt = ">"
+ rl.history = { "" }
+ rl.history_index = 1
+ rl.history_length = 1
+
+ rl.hide_cmd = function()
+ local bs = string.char(8) .. " " .. string.char(8)
+ for i = 1, #rl.command do
+ io.stdout:write(bs)
+ end
+ end
+
+ rl.show_cmd = function()
+ if rl.command then
+ io.stdout:write(rl.command)
+ end
+ end
+
+ rl.store_history = function(cmd)
+ if cmd == "" then
+ return
+ end
+ rl.history[rl.history_length] = cmd
+ rl.history_length = rl.history_length + 1
+ rl.history_index = rl.history_length
+ rl.history[rl.history_length] = ""
+ end
+
+ rl.readln = function()
+ local done = false
+ local need_prompt = true
+ rl.command = ""
+
+ if not rl.rawmode then
+ rl.init()
+ end
+
+ while not done do
+ if need_prompt then
+ io.stdout:write(rl.prompt)
+ io.stdout:write(rl.command)
+ need_prompt = false
+ end
+
+ local ch = io.stdin:read(1)
+ if ch:byte(1) == 27 then
+ -- CONTROL
+ local ch2 = io.stdin:read(1)
+ -- arrows
+ if ch2:byte(1) == 91 then
+ local ch3 = io.stdin:read(1)
+ local b = ch3:byte(1)
+ if b == 65 then
+ ch = "UP"
+ elseif b == 66 then
+ ch = "DOWN"
+ elseif b == 67 then
+ ch = "RIGHT"
+ elseif b == 68 then
+ ch = "LEFT"
+ end
+ -- print("Byte: " .. ch3:byte(1))
+ -- if ch3:byte(1)
+ end
+ end
+
+ if ch == "?" then
+ io.stdout:write(ch)
+ io.stdout:write("\n")
+ if rl.help then
+ rl.help(rl)
+ end
+ need_prompt = true
+ elseif ch == "\t" then
+ if rl.tab_complete then
+ rl.tab_complete(rl)
+ end
+ io.stdout:write("\n")
+ need_prompt = true
+ elseif ch == "\n" then
+ io.stdout:write(ch)
+ done = true
+ elseif ch == "\004" then
+ io.stdout:write("\n")
+ rl.command = nil
+ done = true
+ elseif ch == string.char(127) then
+ if rl.command ~= "" then
+ io.stdout:write(string.char(8) .. " " .. string.char(8))
+ rl.command = string.sub(rl.command, 1, -2)
+ end
+ elseif #ch > 1 then
+ -- control char
+ if ch == "UP" then
+ rl.hide_cmd()
+ if rl.history_index == #rl.history then
+ rl.history[rl.history_index] = rl.command
+ end
+ if rl.history_index > 1 then
+ rl.history_index = rl.history_index - 1
+ rl.command = rl.history[rl.history_index]
+ end
+ rl.show_cmd()
+ elseif ch == "DOWN" then
+ rl.hide_cmd()
+ if rl.history_index < rl.history_length then
+ rl.history_index = rl.history_index + 1
+ rl.command = rl.history[rl.history_index]
+ end
+ rl.show_cmd()
+ end
+ else
+ io.stdout:write(ch)
+ rl.command = rl.command .. ch
+ end
+ end
+ if rl.command then
+ rl.store_history(rl.command)
+ end
+ return rl.command
+ end
+ return rl
+end
+
+}
+
+--[[
+
+r = reader()
+
+local done = false
+
+while not done do
+ local cmd = r.readln()
+ print("Command: " .. tostring(cmd))
+ if not cmd or cmd == "quit" then
+ done = true
+ end
+end
+
+r.done()
+
+]]
+
+--------- MDS show tech parser
+
+local print_section = nil
+local list_sections = false
+
+local curr_section = "---"
+local curr_parser = nil
+
+-- by default operate in batch mode
+local batch_mode = true
+
+local db = {}
+local device = {}
+device.output = {}
+local seen_section = {}
+
+function start_collection(name)
+ device = {}
+ seen_section = {}
+end
+
+function print_error(errmsg)
+ print("@#$:" .. errmsg)
+end
+
+function keys(tbl)
+ local t = {}
+ for k, v in pairs(tbl) do
+ table.insert(t, k)
+ end
+ return t
+end
+
+function tset (parent, ...)
+
+ -- print ('set', ...)
+
+ local len = select ('#', ...)
+ local key, value = select (len-1, ...)
+ local cutpoint, cutkey
+
+ for i=1,len-2 do
+
+ local key = select (i, ...)
+ local child = parent[key]
+
+ if value == nil then
+ if child == nil then return
+ elseif next (child, next (child)) then cutpoint = nil cutkey = nil
+ elseif cutpoint == nil then cutpoint = parent cutkey = key end
+
+ elseif child == nil then child = {} parent[key] = child end
+
+ parent = child
+ end
+
+ if value == nil and cutpoint then cutpoint[cutkey] = nil
+ else parent[key] = value return value end
+ end
+
+
+function tget (parent, ...)
+ local len = select ('#', ...)
+ for i=1,len do
+ parent = parent[select (i, ...)]
+ if parent == nil then break end
+ end
+ return parent
+ end
+
+
+local pager_lines = 23
+local pager_printed = 0
+local pager_skipping = false
+local pager_filter_pipe = nil
+
+function pager_reset()
+ pager_printed = 0
+ pager_skipping = false
+ if pager_filter_pipe then
+ pager_filter_pipe:close()
+ pager_filter_pipe = nil
+ end
+end
+
+
+function print_more()
+ io.stdout:write(" --More-- ")
+end
+
+function print_nomore()
+ local bs = string.char(8)
+ local bs10 = bs .. bs .. bs .. bs .. bs .. bs .. bs .. bs .. bs .. bs
+ io.stdout:write(bs10 .. " " .. bs10)
+end
+
+function print_line(txt)
+ if pager_filter_pipe then
+ pager_filter_pipe:write(txt .. "\n")
+ return
+ end
+ if pager_printed >= pager_lines then
+ print_more()
+ local ch = io.stdin:read(1)
+ if ch == " " then
+ pager_printed = 0
+ elseif ch == "\n" then
+ pager_printed = pager_printed - 1
+ elseif ch == "q" then
+ pager_printed = 0
+ pager_skipping = true
+ end
+ print_nomore()
+ end
+ if not pager_skipping then
+ print(txt)
+ pager_printed = pager_printed + 1
+ else
+ -- skip printing
+ end
+end
+
+function paged_write(text)
+ local t = readln.split(text, "[\n]")
+ if string.sub(text, -1) == "\n" then
+ table.insert(t, "")
+ end
+ for i, v in ipairs(t) do
+ if i < #t then
+ print_line(v)
+ else
+ if pager_filter_pipe then
+ pager_filter_pipe:write(v)
+ else
+ io.stdout:write(v)
+ end
+ end
+ end
+end
+
+
+
+
+
+function get_choices(tbl, key)
+ local res = {}
+ for k, v in pairs(tbl) do
+ if string.sub(k, 1, #key) == key then
+ table.insert(res, k)
+ elseif 0 < #key and dotdotdot == k then
+ table.insert(res, k)
+ end
+ end
+ return res
+end
+
+function get_exact_choice(choices, val)
+ local exact_idx = nil
+ local substr_idx = nil
+ local substr_seen = false
+
+ if #choices == 1 then
+ if choices[1] == dotdotdot then
+ return 1
+ elseif string.sub(choices[1], 1, #val) == val then
+ return 1
+ else
+ return nil
+ end
+ else
+ for i, v in ipairs(choices) do
+ if v == val then
+ exact_idx = i
+ substr_seen = true
+ elseif choices[i] ~= dotdotdot and string.sub(choices[i], 1, #val) == val then
+ if substr_seen then
+ substr_idx = nil
+ else
+ substr_idx = i
+ substr_seen = true
+ end
+ elseif choices[i] == dotdotdot then
+ if substr_seen then
+ substr_idx = nil
+ else
+ substr_idx = i
+ substr_seen = true
+ end
+ end
+ end
+ end
+ return exact_idx or substr_idx
+end
+
+function device_cli_help(rl)
+ local key = readln.split(rl.command, "[ ]+")
+ local tree = rl.tree
+ local keylen = #key
+ local fullcmd = ""
+ local error = false
+ local terse = true
+
+ if ((#rl.command >= 1) and (string.sub(rl.command, -1) == " ")) or (#rl.command == 0) then
+ table.insert(key, "")
+ terse = false
+ end
+
+ for i, v in ipairs(key) do
+ local choices = get_choices(tree, v)
+ local idx = get_exact_choice(choices, v)
+ if idx then
+ local choice = choices[idx]
+ tree = tree[choice]
+ fullcmd = fullcmd .. choice .. " "
+ else
+ if i < #key then
+ error = true
+ end
+ end
+
+ if i == #key and not error then
+ for j, w in ipairs(choices) do
+ if terse then
+ paged_write(w .. "\t")
+ else
+ paged_write(" " .. w .. "\n")
+ end
+ end
+ paged_write("\n")
+ if terse then
+ paged_write(" \n")
+ end
+ end
+ end
+ pager_reset()
+end
+
+function device_cli_tab_complete(rl)
+ local key = readln.split(rl.command, "[ ]+")
+ local tree = rl.tree
+ local keylen = #key
+ local fullcmd = ""
+ local error = false
+
+ for i, v in ipairs(key) do
+ local choices = get_choices(tree, v)
+ local idx = get_exact_choice(choices, v)
+ if idx and choices[idx] ~= dotdotdot then
+ local choice = choices[idx]
+ tree = tree[choice]
+ -- print("level " .. i .. " '" .. choice .. "'")
+ fullcmd = fullcmd .. choice .. " "
+ else
+ -- print("level " .. i .. " : " .. table.concat(choices, " ") .. " ")
+ error = true
+ end
+ end
+ if not error then
+ rl.command = fullcmd
+ else
+ -- print("\n\nerror\n")
+ end
+ pager_reset()
+end
+
+function device_cli_exec(rl)
+
+ local cmd_nopipe = rl.command
+ local cmd_pipe = nil
+
+ local pipe1, pipe2 = string.find(rl.command, "[|]")
+ if pipe1 then
+ cmd_nopipe = string.sub(rl.command, 1, pipe1-1)
+ cmd_pipe = string.sub(rl.command, pipe2+1, -1)
+ end
+
+ local key = readln.split(cmd_nopipe .. " <cr>", "[ ]+")
+ local tree = rl.tree
+ local keylen = #key
+ local fullcmd = ""
+ local error = false
+ local func = nil
+
+ if cmd_pipe then
+ pager_filter_pipe = io.popen(cmd_pipe, "w")
+ end
+
+
+ rl.choices = {}
+
+ for i, v in ipairs(key) do
+ local choices = get_choices(tree, v)
+ local idx = get_exact_choice(choices, v)
+ if idx then
+ local choice = choices[idx]
+ if i == #key then
+ func = tree[choice]
+ else
+ if choice == dotdotdot then
+ -- keep the tree the same, update the choice value to match the input string
+ choices[idx] = v
+ choice = v
+ else
+ tree = tree[choice]
+ end
+ end
+ -- print("level " .. i .. " '" .. choice .. "'")
+ table.insert(rl.choices, choice)
+ else
+ -- print("level " .. i .. " : " .. table.concat(choices, " ") .. " ")
+ error = true
+ return nil
+ end
+ end
+ return func
+end
+
+function populate_tree(commands)
+ local tree = {}
+
+ for k, v in pairs(commands) do
+ local key = readln.split(k .. " <cr>", "[ ]+")
+ local xtree = tree
+ for i, kk in ipairs(key) do
+ if i == 1 and kk == "sh" then
+ kk = "show"
+ end
+ if i == #key then
+ if type(v) == "function" then
+ xtree[kk] = v
+ else
+ xtree[kk] = function(rl) paged_write(table.concat(v, "\n") .. "\n") end
+ end
+ else
+ if not xtree[kk] then
+ xtree[kk] = {}
+ end
+ xtree = xtree[kk]
+ end
+ end
+ end
+ return tree
+end
+
+function trim (s)
+ return (string.gsub(s, "^%s*(.-)%s*$", "%1"))
+end
+
+
+function init_vpp(vpp)
+ local root_dir = "/home/ubuntu/vpp"
+ local pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+
+ vpp:init({ pneum_path = pneum_path })
+
+ vpp:init({ pneum_path = pneum_path })
+ vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json")
+
+
+
+ vpp:connect("lua_cli")
+end
+
+function run_cli(vpp, cli)
+ local reply = vpp:api_call("cli_inband", { cmd = cli })
+ if reply and #reply == 1 then
+ local rep = reply[1]
+ if 0 == rep.retval then
+ return rep.reply
+ else
+ return "XXXXXLUACLI: API RETVAL ERROR : " .. tostring(rep.retval)
+ end
+ else
+ return "XXXXXLUACLI ERROR, RAW REPLY: " .. vpp.dump(reply)
+ end
+end
+
+
+function toprintablestring(s)
+ if type(s) == "string" then
+ return "\n"..vpp.hex_dump(s)
+ else
+ return tostring(s)
+ end
+end
+
+function interactive_cli(r)
+ while not done do
+ pager_reset()
+ local cmd = r.readln()
+ if not cmd then
+ done = true
+ elseif cmd == "quit" or cmd == "exit" then
+ done = true
+ else
+ local func = device_cli_exec(r)
+ if func then
+ func(r)
+ else
+ if trim(cmd) == "" then
+ else
+ for i = 1, #r.prompt do
+ paged_write(" ")
+ end
+ paged_write("^\n% Invalid input detected at '^' marker.\n\n")
+ end
+ end
+ end
+ end
+end
+
+device = {}
+device.output = {}
+
+init_vpp(vpp)
+cmds_str = run_cli(vpp, "?")
+vpp_cmds = readln.split(cmds_str, "\n")
+vpp_clis = {}
+
+for linenum, line in ipairs(vpp_cmds) do
+ local m,h = string.match(line, "^ (.-) (.*)$")
+ if m and #m > 0 then
+ table.insert(vpp_clis, m)
+ device.output["vpp debug cli " .. m] = function(rl)
+ -- print("ARBITRARY CLI" .. vpp.dump(rl.choices))
+ print("LUACLI command: " .. table.concat(rl.choices, " "))
+ local sub = {}
+ --
+ for i=4, #rl.choices -1 do
+ table.insert(sub, rl.choices[i])
+ end
+ local cli = table.concat(sub, " ")
+ print("Running CLI: " .. tostring(cli))
+ paged_write(run_cli(vpp, cli))
+ end
+ device.output["vpp debug cli " .. m .. " " .. dotdotdot] = function(rl)
+ print("ARGH")
+ end
+
+ local ret = run_cli(vpp, "help " .. m)
+ device.output["help vpp debug cli " .. m] = { ret }
+ end
+end
+
+for linenum, line in ipairs(vpp_clis) do
+ -- print(line, ret)
+end
+
+for msgnum, msgname in pairs(vpp.msg_number_to_name) do
+ local cli, numspaces = string.gsub(msgname, "_", " ")
+ device.output["call " .. cli .. " " .. dotdotdot] = function(rl)
+ print("ARGH")
+ end
+ device.output["call " .. cli] = function(rl)
+ print("LUACLI command: " .. table.concat(rl.choices, " "))
+ print("Running API: " .. msgname) -- vpp.dump(rl.choices))
+ local out = {}
+ local args = {}
+ local ntaken = 0
+ local argname = ""
+ for i=(1+1+numspaces+1), #rl.choices-1 do
+ -- print(i, rl.choices[i])
+ if ntaken > 0 then
+ ntaken = ntaken -1
+ else
+ local fieldname = rl.choices[i]
+ local field = vpp.msg_name_to_fields[msgname][fieldname]
+ if field then
+ local s = rl.choices[i+1]
+ s=s:gsub("\\x(%x%x)",function (x) return string.char(tonumber(x,16)) end)
+ args[fieldname] = s
+ ntaken = 1
+ end
+ end
+ end
+ -- print("ARGS: ", vpp.dump(args))
+ local ret = vpp:api_call(msgname, args)
+ for i, reply in ipairs(ret) do
+ table.insert(out, "=================== Entry #" .. tostring(i))
+ for k, v in pairs(reply) do
+ table.insert(out, " " .. tostring(k) .. " : " .. toprintablestring(v))
+ end
+ end
+ -- paged_write(vpp.dump(ret) .. "\n\n")
+ paged_write(table.concat(out, "\n").."\n\n")
+ end
+ device.output["call " .. cli .. " help"] = function(rl)
+ local out = {}
+ for k, v in pairs(vpp.msg_name_to_fields[msgname]) do
+ table.insert(out, tostring(k) .. " : " .. v["ctype"] .. " ; " .. tostring(vpp.dump(v)) )
+ end
+ -- paged_write(vpp.dump(vpp.msg_name_to_fields[msgname]) .. "\n\n")
+ paged_write(table.concat(out, "\n").."\n\n")
+ end
+-- vpp.msg_name_to_number = {}
+end
+
+
+
+local r = readln.reader()
+local done = false
+
+r.prompt = "VPP(luaCLI)#"
+
+r.help = device_cli_help
+r.tab_complete = device_cli_tab_complete
+print("===== CLI view, use ^D to end =====")
+
+r.tree = populate_tree(device.output)
+-- readln.pretty("xxxx", r.tree)
+
+
+for idx, an_arg in ipairs(arg) do
+ local fname = an_arg
+ if fname == "-i" then
+ pager_lines = 23
+ interactive_cli(r)
+ else
+ pager_lines = 100000000
+ for line in io.lines(fname) do
+ r.command = line
+ local func = device_cli_exec(r)
+ if func then
+ func(r)
+ end
+ end
+ end
+end
+
+if #arg == 0 then
+ print("You should specify '-i' as an argument for the interactive session,")
+ print("but with no other sources of commands, we start interactive session now anyway")
+ interactive_cli(r)
+end
+
+vpp:disconnect()
+r.done()
+
+
diff --git a/src/vpp-api/lua/examples/example-acl-plugin.lua b/src/vpp-api/lua/examples/example-acl-plugin.lua
new file mode 100644
index 00000000..ca01f18d
--- /dev/null
+++ b/src/vpp-api/lua/examples/example-acl-plugin.lua
@@ -0,0 +1,110 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+
+vpp = require "vpp-lapi"
+
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+
+vpp:init({ pneum_path = pneum_path })
+
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vlib-api/vlibmemory/memclnt.api")
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api")
+vpp:connect("aytest")
+vpp:consume_api(root_dir .. "/plugins/acl-plugin/acl/acl.api", "acl")
+
+-- api calls
+reply = vpp:api_call("show_version")
+print("Version: ", reply[1].version)
+print(vpp.hex_dump(reply[1].version))
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 230 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 8 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 15 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 2, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 0 })
+print(vpp.dump(reply))
+print("---")
+
+acl_index_to_delete = reply[1].acl_index
+print("Deleting " .. tostring(acl_index_to_delete))
+reply = vpp:api_call("acl_del", { context = 42, acl_index = acl_index_to_delete })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+for ri, rv in ipairs(reply) do
+ print("Reply message #" .. tostring(ri))
+ print(vpp.dump(rv))
+ for ai, av in ipairs(rv.r) do
+ print("ACL rule #" .. tostring(ai) .. " : " .. vpp.dump(av))
+ end
+
+end
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+print(vpp.dump(reply))
+print("---")
+
+
+vpp:disconnect()
+
+
diff --git a/src/vpp-api/lua/examples/example-classifier.lua b/src/vpp-api/lua/examples/example-classifier.lua
new file mode 100644
index 00000000..b1270757
--- /dev/null
+++ b/src/vpp-api/lua/examples/example-classifier.lua
@@ -0,0 +1,51 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+
+local vpp = require "vpp-lapi"
+local bit = require("bit")
+
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+
+
+vpp:init({ pneum_path = pneum_path })
+
+vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json")
+
+vpp:connect("aytest")
+
+-- api calls
+
+print("Calling API to add a new classifier table")
+reply = vpp:api_call("classify_add_del_table", {
+ context = 43,
+ memory_size = bit.lshift(2, 20),
+ client_index = 42,
+ is_add = 1,
+ nbuckets = 32,
+ skip_n_vectors = 0,
+ match_n_vectors = 1,
+ mask = "\255\255\255\255\255\255\255\255" .. "\255\255\255\255\255\255\255\255"
+})
+print(vpp.dump(reply))
+print("---")
+
+
+vpp:disconnect()
+
+
diff --git a/src/vpp-api/lua/examples/example-cli.lua b/src/vpp-api/lua/examples/example-cli.lua
new file mode 100644
index 00000000..85425caf
--- /dev/null
+++ b/src/vpp-api/lua/examples/example-cli.lua
@@ -0,0 +1,44 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+vpp = require "vpp-lapi"
+
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+
+vpp:init({ pneum_path = pneum_path })
+
+vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json")
+
+vpp:connect("aytest")
+
+-- api calls
+reply = vpp:api_call("show_version")
+print("Version: ", reply[1].version)
+print(vpp.hex_dump(reply[1].version))
+print(vpp.dump(reply))
+print("---")
+
+
+reply = vpp:api_call("cli_inband", { cmd = "show vers" })
+print(vpp.dump(reply))
+print("---")
+
+
+vpp:disconnect()
+
+
diff --git a/src/vpp-api/lua/examples/lute/README.md b/src/vpp-api/lua/examples/lute/README.md
new file mode 100644
index 00000000..8d37250a
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/README.md
@@ -0,0 +1,66 @@
+LUTE: Lua Unit Test Environment
+
+This is a small helper utility to automate some simple tests
+that one might need to do.
+
+Think of it as a hybrid of a screen and expect who
+also took some habits from HTML inline code.
+
+It is quite probably useless for building anything serious,
+but practice shows it is quite efficient at allowing
+convenient temporary quick tests, and for something
+that was written over a course of a couple of evenings it
+is quite a nice little helper tool.
+
+It allows do launch and drive multiple shell sessions,
+and by virtue of having been written in Lua, it of course
+also allows to add the business logic using the Lua code.
+
+If you launch the lute without parameters, it gives you
+the interactive shell to execute the commands in.
+
+If you launch it with an argument, it will attempt to
+read and execute the commands from the file.
+
+Commands:
+
+shell FOO
+
+ spawn a shell in a new PTY under the label FOO.
+
+run FOO bar
+
+ Send "bar" keystrokes followed by "ENTER" to the session FOO
+
+ Special case: "break" word on its own gets translated into ^C being sent.
+
+cd FOO
+
+ "change domain" into session FOO. All subsequent inputs will go,
+ line-buffered, into the session FOO. To jump back up, use ^D (Control-D),
+ or within the file, use ^D^D^D (caret D caret D caret D on its own line)
+
+expect FOO blablabla
+
+ Pause further interpretation of the batch mode until you see "blablabla"
+ in the output of session FOO, or until timeout happens.
+
+sleep N
+
+ Sleep an integer N seconds, if you are in batch mode.
+
+echo blabla
+
+ Echo the remainder of the line to standard output.
+
+For Lua code, there is a pre-existing pseudo-session called "lua",
+which accepts "run lua" command which does what you would expect
+(evaluate the rest of the string in Lua context - being the same
+as lute itself). Also you can do "cd lua" and get into a
+multiline-enabled interpreter shell.
+
+This way for the VPP case you can automate some of the things in your routine
+that you would have to have done manually, and test drive API as well
+as use the realistic native OS components to create the environment around it.
+
+
diff --git a/src/vpp-api/lua/examples/lute/lute.lua b/src/vpp-api/lua/examples/lute/lute.lua
new file mode 100644
index 00000000..89b9924b
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/lute.lua
@@ -0,0 +1,777 @@
+--[[
+version = 1
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+-- LUTE: Lua Unit Test Environment
+-- AKA what happens when screen tries to marry with lua and expect,
+-- but escapes mid-ceremony.
+--
+-- comments: @ayourtch
+
+ffi = require("ffi")
+
+vpp = {}
+function vpp.dump(o)
+ if type(o) == 'table' then
+ local s = '{ '
+ for k,v in pairs(o) do
+ if type(k) ~= 'number' then k = '"'..k..'"' end
+ s = s .. '['..k..'] = ' .. vpp.dump(v) .. ','
+ end
+ return s .. '} '
+ else
+ return tostring(o)
+ end
+end
+
+
+ffi.cdef([[
+
+int posix_openpt(int flags);
+int grantpt(int fd);
+int unlockpt(int fd);
+char *ptsname(int fd);
+
+typedef long pid_t;
+typedef long ssize_t;
+typedef long size_t;
+typedef int nfds_t;
+typedef long time_t;
+typedef long suseconds_t;
+
+pid_t fork(void);
+pid_t setsid(void);
+
+int close(int fd);
+int open(char *pathname, int flags);
+
+int dup2(int oldfd, int newfd);
+
+ssize_t read(int fd, void *buf, size_t count);
+ssize_t write(int fd, const void *buf, size_t count);
+
+struct pollfd {
+ int fd; /* file descriptor */
+ short events; /* requested events */
+ short revents; /* returned events */
+ };
+
+int poll(struct pollfd *fds, nfds_t nfds, int timeout);
+
+struct timeval {
+ time_t tv_sec; /* seconds */
+ suseconds_t tv_usec; /* microseconds */
+ };
+
+int gettimeofday(struct timeval *tv, struct timezone *tz);
+
+int inet_pton(int af, const char *src, void *dst);
+
+]])
+
+ffi.cdef([[
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *dest, void *src, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+void *memmem(const void *haystack, size_t haystacklen,
+ const void *needle, size_t needlelen);
+]])
+
+
+
+local O_RDWR = 2
+
+
+function os_time()
+ local tv = ffi.new("struct timeval[1]")
+ local ret = ffi.C.gettimeofday(tv, nil)
+ return tonumber(tv[0].tv_sec) + (tonumber(tv[0].tv_usec)/1000000.0)
+end
+
+function sleep(n)
+ local when_wakeup = os_time() + n
+ while os_time() <= when_wakeup do
+ ffi.C.poll(nil, 0, 10)
+ end
+end
+
+
+function c_str(text_in)
+ local text = text_in
+ local c_str = ffi.new("char[?]", #text+1)
+ ffi.copy(c_str, text)
+ return c_str
+end
+
+function ip46(addr_text)
+ local out = ffi.new("char [200]")
+ local AF_INET6 = 10
+ local AF_INET = 2
+ local is_ip6 = ffi.C.inet_pton(AF_INET6, c_str(addr_text), out)
+ if is_ip6 == 1 then
+ return ffi.string(out, 16), true
+ end
+ local is_ip4 = ffi.C.inet_pton(AF_INET, c_str(addr_text), out)
+ if is_ip4 then
+ return (string.rep("4", 12).. ffi.string(out, 4)), false
+ end
+end
+
+function pty_master_open()
+ local fd = ffi.C.posix_openpt(O_RDWR)
+ ffi.C.grantpt(fd)
+ ffi.C.unlockpt(fd)
+ local p = ffi.C.ptsname(fd)
+ print("PTS:" .. ffi.string(p))
+ return fd, ffi.string(p)
+end
+
+function pty_run(cmd)
+ local master_fd, pts_name = pty_master_open()
+ local child_pid = ffi.C.fork()
+ if (child_pid == -1) then
+ print("Error fork()ing")
+ return -1
+ end
+
+ if child_pid ~= 0 then
+ -- print("Parent")
+ return master_fd, child_pid
+ end
+
+ -- print("Child")
+ if (ffi.C.setsid() == -1) then
+ print("Child error setsid")
+ os.exit(-1)
+ end
+
+ ffi.C.close(master_fd)
+
+ local slave_fd = ffi.C.open(c_str(pts_name), O_RDWR)
+ if slave_fd == -1 then
+ print("Child can not open slave fd")
+ os.exit(-2)
+ end
+
+ ffi.C.dup2(slave_fd, 0)
+ ffi.C.dup2(slave_fd, 1)
+ ffi.C.dup2(slave_fd, 2)
+ os.execute(cmd)
+end
+
+function readch()
+ local buf = ffi.new("char[1]")
+ local nread= ffi.C.read(0, buf, 1)
+ -- print("\nREADCH : " .. string.char(buf[0]))
+ return string.char(buf[0])
+end
+
+function stdout_write(str)
+ ffi.C.write(1, c_str(str), #str)
+end
+
+
+readln = {
+split = function(str, pat)
+ local t = {} -- NOTE: use {n = 0} in Lua-5.0
+ local fpat = "(.-)" .. pat
+ local last_end = 1
+ if str then
+ local s, e, cap = str:find(fpat, 1)
+ while s do
+ if s ~= 1 or cap ~= "" then
+ table.insert(t,cap)
+ end
+ last_end = e+1
+ s, e, cap = str:find(fpat, last_end)
+ end
+ if last_end <= #str then
+ cap = str:sub(last_end)
+ table.insert(t, cap)
+ end
+ end
+ return t
+end,
+
+reader = function()
+ local rl = {}
+
+ rl.init = function()
+ os.execute("stty -icanon min 1 -echo")
+ rl.rawmode = true
+ end
+
+ rl.done = function()
+ os.execute("stty icanon echo")
+ rl.rawmode = false
+ end
+
+ rl.prompt = ">"
+ rl.history = { "" }
+ rl.history_index = 1
+ rl.history_length = 1
+
+ rl.hide_cmd = function()
+ local bs = string.char(8) .. " " .. string.char(8)
+ for i = 1, #rl.command do
+ stdout_write(bs)
+ end
+ end
+
+ rl.show_cmd = function()
+ if rl.command then
+ stdout_write(rl.command)
+ end
+ end
+
+ rl.store_history = function(cmd)
+ if cmd == "" then
+ return
+ end
+ rl.history[rl.history_length] = cmd
+ rl.history_length = rl.history_length + 1
+ rl.history_index = rl.history_length
+ rl.history[rl.history_length] = ""
+ end
+
+ rl.readln = function(stdin_select_fn, batch_cmd, batch_when, batch_expect)
+ local done = false
+ local need_prompt = true
+ rl.command = ""
+
+ if not rl.rawmode then
+ rl.init()
+ end
+
+ while not done do
+ local indent_value = #rl.prompt + #rl.command
+ if need_prompt then
+ stdout_write(rl.prompt)
+ stdout_write(rl.command)
+ need_prompt = false
+ end
+ if type(stdin_select_fn) == "function" then
+ while not stdin_select_fn(indent_value, batch_cmd, batch_when, batch_expect) do
+ stdout_write(rl.prompt)
+ stdout_write(rl.command)
+ indent_value = #rl.prompt + #rl.command
+ end
+ if batch_cmd and ((os_time() > batch_when) or (batch_expect and expect_success(batch_expect, buf, 0))) then
+ stdout_write("\n" .. rl.prompt .. batch_cmd .. "\n")
+ if batch_expect then
+ expect_done(batch_expect)
+ end
+ return batch_cmd, batch_expect
+ end
+ end
+ local ch = readch()
+ if ch:byte(1) == 27 then
+ -- CONTROL
+ local ch2 = readch()
+ -- arrows
+ if ch2:byte(1) == 91 then
+ local ch3 = readch()
+ local b = ch3:byte(1)
+ if b == 65 then
+ ch = "UP"
+ elseif b == 66 then
+ ch = "DOWN"
+ elseif b == 67 then
+ ch = "RIGHT"
+ elseif b == 68 then
+ ch = "LEFT"
+ end
+ -- print("Byte: " .. ch3:byte(1))
+ -- if ch3:byte(1)
+ end
+ end
+
+ if ch == "?" then
+ stdout_write(ch)
+ stdout_write("\n")
+ if rl.help then
+ rl.help(rl)
+ end
+ need_prompt = true
+ elseif ch == "\t" then
+ if rl.tab_complete then
+ rl.tab_complete(rl)
+ end
+ stdout_write("\n")
+ need_prompt = true
+ elseif ch == "\n" then
+ stdout_write(ch)
+ done = true
+ elseif ch == "\004" then
+ stdout_write("\n")
+ rl.command = nil
+ done = true
+ elseif ch == string.char(127) then
+ if rl.command ~= "" then
+ stdout_write(string.char(8) .. " " .. string.char(8))
+ rl.command = string.sub(rl.command, 1, -2)
+ end
+ elseif #ch > 1 then
+ -- control char
+ if ch == "UP" then
+ rl.hide_cmd()
+ if rl.history_index == #rl.history then
+ rl.history[rl.history_index] = rl.command
+ end
+ if rl.history_index > 1 then
+ rl.history_index = rl.history_index - 1
+ rl.command = rl.history[rl.history_index]
+ end
+ rl.show_cmd()
+ elseif ch == "DOWN" then
+ rl.hide_cmd()
+ if rl.history_index < rl.history_length then
+ rl.history_index = rl.history_index + 1
+ rl.command = rl.history[rl.history_index]
+ end
+ rl.show_cmd()
+ end
+ else
+ stdout_write(ch)
+ rl.command = rl.command .. ch
+ end
+ end
+ if rl.command then
+ rl.store_history(rl.command)
+ end
+ return rl.command
+ end
+ return rl
+end
+
+}
+
+local select_fds = {}
+local sessions = {}
+
+local line_erased = false
+
+function erase_line(indent)
+ if not line_erased then
+ line_erased = true
+ stdout_write(string.rep(string.char(8), indent)..string.rep(" ", indent)..string.rep(string.char(8), indent))
+ end
+end
+
+function do_select_stdin(indent, batch_cmd, batch_when, batch_expect)
+ while true do
+ local nfds = 1+#select_fds
+ local pfds = ffi.new("struct pollfd[?]", nfds)
+ pfds[0].fd = 0;
+ pfds[0].events = 1;
+ pfds[0].revents = 0;
+ for i = 1,#select_fds do
+ pfds[i].fd = select_fds[i].fd
+ pfds[i].events = 1
+ pfds[i].revents = 0
+ end
+ if batch_cmd and ((os_time() > batch_when) or (batch_expect and expect_success(batch_expect, buf, 0))) then
+ return true
+ end
+ while ffi.C.poll(pfds, nfds, 10) == 0 do
+ if batch_cmd and ((os_time() > batch_when) or (batch_expect and expect_success(batch_expect, buf, 0))) then
+ return true
+ end
+ if line_erased then
+ line_erased = false
+ return false
+ end
+ end
+ if pfds[0].revents == 1 then
+ return true
+ end
+ for i = 1,#select_fds do
+ if(pfds[i].revents > 0) then
+ if pfds[i].fd ~= select_fds[i].fd then
+ print("File descriptors unequal", pfds[i].fd, select_fds[i].fd)
+ end
+ select_fds[i].cb(select_fds[i], pfds[i].revents, indent)
+ end
+ end
+ end
+end
+
+local buf = ffi.new("char [32768]")
+
+function session_stdout_write(prefix, data)
+ data = prefix .. data:gsub("\n", "\n"..prefix):gsub("\n"..prefix.."$", "\n")
+
+ stdout_write(data)
+end
+
+function expect_success(sok, buf, nread)
+ local expect_buf_sz = ffi.sizeof(sok.expect_buf) - 128
+ local expect_buf_avail = expect_buf_sz - sok.expect_buf_idx
+ -- print("EXPECT_SUCCESS: nread ".. tostring(nread).. " expect_buf_idx: " .. tostring(sok.expect_buf_idx) .. " expect_buf_avail: " .. tostring(expect_buf_avail) )
+ if expect_buf_avail < 0 then
+ print "EXPECT BUFFER OVERRUN ALREADY"
+ os.exit(1)
+ end
+ if expect_buf_avail < nread then
+ if (nread >= ffi.sizeof(sok.expect_buf)) then
+ print("Read too large of a chunk to fit into expect buffer")
+ return nil
+ end
+ local delta = nread - expect_buf_avail
+
+ ffi.C.memmove(sok.expect_buf, sok.expect_buf + delta, expect_buf_sz - delta)
+ sok.expect_buf_idx = sok.expect_buf_idx - delta
+ expect_buf_avail = nread
+ end
+ if sok.expect_buf_idx + nread > expect_buf_sz then
+ print("ERROR, I have just overrun the buffer !")
+ os.exit(1)
+ end
+ ffi.C.memcpy(sok.expect_buf + sok.expect_buf_idx, buf, nread)
+ sok.expect_buf_idx = sok.expect_buf_idx + nread
+ if sok.expect_str == nil then
+ return true
+ end
+ local match_p = ffi.C.memmem(sok.expect_buf, sok.expect_buf_idx, sok.expect_str, sok.expect_str_len)
+ if match_p ~= nil then
+ return true
+ end
+ return false
+end
+
+function expect_done(sok)
+ local expect_buf_sz = ffi.sizeof(sok.expect_buf) - 128
+ if not sok.expect_str then
+ return false
+ end
+ local match_p = ffi.C.memmem(sok.expect_buf, sok.expect_buf_idx, sok.expect_str, sok.expect_str_len)
+ if match_p ~= nil then
+ if sok.expect_cb then
+ sok.expect_cb(sok)
+ end
+ local match_idx = ffi.cast("char *", match_p) - ffi.cast("char *", sok.expect_buf)
+ ffi.C.memmove(sok.expect_buf, ffi.cast("char *", match_p) + sok.expect_str_len, expect_buf_sz - match_idx - sok.expect_str_len)
+ sok.expect_buf_idx = match_idx + sok.expect_str_len
+ sok.expect_success = true
+
+ sok.expect_str = nil
+ sok.expect_str_len = 0
+ return true
+ end
+end
+
+function slave_events(sok, revents, indent)
+ local fd = sok.fd
+ local nread = ffi.C.read(fd, buf, ffi.sizeof(buf)-128)
+ local idx = nread - 1
+ while idx >= 0 and buf[idx] ~= 10 do
+ idx = idx - 1
+ end
+ if idx >= 0 then
+ erase_line(indent)
+ session_stdout_write(sok.prefix, sok.buf .. ffi.string(buf, idx+1))
+ sok.buf = ""
+ end
+ sok.buf = sok.buf .. ffi.string(buf+idx+1, nread-idx-1)
+ -- print("\nRead: " .. tostring(nread))
+ -- stdout_write(ffi.string(buf, nread))
+ if expect_success(sok, buf, nread) then
+ return true
+ end
+ return false
+end
+
+
+function start_session(name)
+ local mfd, cpid = pty_run("/bin/bash")
+ local sok = { ["fd"] = mfd, ["cb"] = slave_events, ["buf"] = "", ["prefix"] = name .. ":", ["expect_buf"] = ffi.new("char [165536]"), ["expect_buf_idx"] = 0, ["expect_str"] = nil }
+ table.insert(select_fds, sok)
+ sessions[name] = sok
+end
+
+function command_transform(exe)
+ if exe == "break" then
+ exe = string.char(3)
+ end
+ return exe
+end
+
+function session_write(a_session, a_str)
+ if has_session(a_session) then
+ return tonumber(ffi.C.write(sessions[a_session].fd, c_str(a_str), #a_str))
+ else
+ return 0
+ end
+end
+
+function session_exec(a_session, a_cmd)
+ local exe = command_transform(a_cmd) .. "\n"
+ session_write(a_session, exe)
+end
+
+function session_cmd(ui, a_session, a_cmd)
+ if not has_session(a_session) then
+ stdout_write("ERR: No such session '" .. tostring(a_session) .. "'\n")
+ return nil
+ end
+ if a_session == "lua" then
+ local func, msg = loadstring(ui.lua_acc .. a_cmd)
+ -- stdout_write("LOADSTR: " .. vpp.dump({ ret, msg }) .. "\n")
+ if not func and string.match(msg, "<eof>") then
+ if a_session ~= ui.in_session then
+ stdout_write("ERR LOADSTR: " .. tostring(msg) .. "\n")
+ return nil
+ end
+ ui.lua_acc = ui.lua_acc .. a_cmd .. "\n"
+ return true
+ end
+ ui.lua_acc = ""
+ local ret, msg = pcall(func)
+ if ret then
+ return true
+ else
+ stdout_write("ERR: " .. msg .. "\n")
+ return nil
+ end
+ else
+ session_exec(a_session, a_cmd)
+ if ui.session_cmd_delay then
+ return { "delay", ui.session_cmd_delay }
+ end
+ return true
+ end
+end
+
+function has_session(a_session)
+ if a_session == "lua" then
+ return true
+ end
+ return (sessions[a_session] ~= nil)
+end
+
+function command_match(list, input, output)
+ for i, v in ipairs(list) do
+ local m = {}
+ m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9] = string.match(input, v[1])
+ -- print("MATCH: ", vpp.dump(m))
+ if m[1] then
+ output["result"] = m
+ output["result_index"] = i
+ return m
+ end
+ end
+ return nil
+end
+
+function cmd_spawn_shell(ui, a_arg)
+ start_session(a_arg[1])
+ return true
+end
+
+function cmd_run_cmd(ui, a_arg)
+ local a_sess = a_arg[1]
+ local a_cmd = a_arg[2]
+ return session_cmd(ui, a_sess, a_cmd)
+end
+
+function cmd_cd(ui, a_arg)
+ local a_sess = a_arg[1]
+ if has_session(a_sess) then
+ ui.in_session = a_sess
+ return true
+ else
+ stdout_write("ERR: Unknown session '".. tostring(a_sess) .. "'\n")
+ return nil
+ end
+end
+
+function cmd_sleep(ui, a_arg)
+ return { "delay", tonumber(a_arg[1]) }
+end
+
+function cmd_expect(ui, a_arg)
+ local a_sess = a_arg[1]
+ local a_expect = a_arg[2]
+ local sok = sessions[a_sess]
+ if not sok then
+ stdout_write("ERR: unknown session '" .. tostring(a_sess) .. "'\n")
+ return nil
+ end
+ sok.expect_str = c_str(a_expect)
+ sok.expect_str_len = #a_expect
+ return { "expect", a_sess }
+end
+
+function cmd_info(ui, a_arg)
+ local a_sess = a_arg[1]
+ local sok = sessions[a_sess]
+ if not sok then
+ stdout_write("ERR: unknown session '" .. tostring(a_sess) .. "'\n")
+ return nil
+ end
+ print("Info for session " .. tostring(a_sess) .. "\n")
+ print("Expect buffer index: " .. tostring(sok.expect_buf_idx))
+ print("Expect buffer: '" .. tostring(ffi.string(sok.expect_buf, sok.expect_buf_idx)) .. "'\n")
+ if sok.expect_str then
+ print("Expect string: '" .. tostring(ffi.string(sok.expect_str, sok.expect_str_len)) .. "'\n")
+ else
+ print("Expect string not set\n")
+ end
+end
+
+function cmd_echo(ui, a_arg)
+ local a_data = a_arg[1]
+ print("ECHO: " .. tostring(a_data))
+end
+
+main_command_table = {
+ { "^shell ([a-zA-Z0-9_]+)$", cmd_spawn_shell },
+ { "^run ([a-zA-Z0-9_]+) (.+)$", cmd_run_cmd },
+ { "^cd ([a-zA-Z0-9_]+)$", cmd_cd },
+ { "^sleep ([0-9]+)$", cmd_sleep },
+ { "^expect ([a-zA-Z0-9_]+) (.-)$", cmd_expect },
+ { "^info ([a-zA-Z0-9_]+)$", cmd_info },
+ { "^echo (.-)$", cmd_echo }
+}
+
+
+
+function ui_set_prompt(ui)
+ if ui.in_session then
+ if ui.in_session == "lua" then
+ if #ui.lua_acc > 0 then
+ ui.r.prompt = ui.in_session .. ">>"
+ else
+ ui.r.prompt = ui.in_session .. ">"
+ end
+ else
+ ui.r.prompt = ui.in_session .. "> "
+ end
+ else
+ ui.r.prompt = "> "
+ end
+ return ui.r.prompt
+end
+
+function ui_run_command(ui, cmd)
+ -- stdout_write("Command: " .. tostring(cmd) .. "\n")
+ local ret = false
+ if ui.in_session then
+ if cmd then
+ if cmd == "^D^D^D" then
+ ui.in_session = nil
+ ret = true
+ else
+ ret = session_cmd(ui, ui.in_session, cmd)
+ end
+ else
+ ui.in_session = nil
+ ret = true
+ end
+ else
+ if cmd then
+ local out = {}
+ if cmd == "" then
+ ret = true
+ end
+ if command_match(main_command_table, cmd, out) then
+ local i = out.result_index
+ local m = out.result
+ if main_command_table[i][2] then
+ ret = main_command_table[i][2](ui, m)
+ end
+ end
+ end
+ if not cmd or cmd == "quit" then
+ return "quit"
+ end
+ end
+ return ret
+end
+
+local ui = {}
+ui.in_session = nil
+ui.r = readln.reader()
+ui.lua_acc = ""
+ui.session_cmd_delay = 0.3
+
+local lines = ""
+
+local done = false
+-- a helper function which always returns nil
+local no_next_line = function() return nil end
+
+-- a function which returns the next batch line
+local next_line = no_next_line
+
+local batchfile = arg[1]
+
+if batchfile then
+ local f = io.lines(batchfile)
+ next_line = function()
+ local line = f()
+ if line then
+ return line
+ else
+ next_line = no_next_line
+ session_stdout_write(batchfile .. ":", "End of batch\n")
+ return nil
+ end
+ end
+end
+
+
+local batch_when = 0
+local batch_expect = nil
+while not done do
+ local prompt = ui_set_prompt(ui)
+ local batch_cmd = next_line()
+ local cmd, expect_sok = ui.r.readln(do_select_stdin, batch_cmd, batch_when, batch_expect)
+ if expect_sok and not expect_success(expect_sok, buf, 0) then
+ if not cmd_ret and next_line ~= no_next_line then
+ print("ERR: expect timeout\n")
+ next_line = no_next_line
+ end
+ else
+ local cmd_ret = ui_run_command(ui, cmd)
+ if not cmd_ret and next_line ~= no_next_line then
+ print("ERR: Error during batch execution\n")
+ next_line = no_next_line
+ end
+
+ if cmd_ret == "quit" then
+ done = true
+ end
+ batch_expect = nil
+ batch_when = 0
+ if type(cmd_ret) == "table" then
+ if cmd_ret[1] == "delay" then
+ batch_when = os_time() + tonumber(cmd_ret[2])
+ end
+ if cmd_ret[1] == "expect" then
+ batch_expect = sessions[cmd_ret[2]]
+ batch_when = os_time() + 15
+ end
+ end
+ end
+end
+ui.r.done()
+
+os.exit(1)
+
+
+
diff --git a/src/vpp-api/lua/examples/lute/script-inout-acl-noacl.lute b/src/vpp-api/lua/examples/lute/script-inout-acl-noacl.lute
new file mode 100644
index 00000000..a24d04bf
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/script-inout-acl-noacl.lute
@@ -0,0 +1,329 @@
+shell vppbuild
+run vppbuild stty -echo
+run vppbuild sudo -u ubuntu -i bash -c "(cd vpp && make plugins && echo ALLGOOD)"
+expect vppbuild ALLGOOD
+
+shell s0
+shell s1
+shell s2
+
+
+cd s1
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+cd s2
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+
+cd lua
+
+function session_get_bash_pid(s)
+ if not has_session(s) then
+ return nil
+ end
+ local fname = "/tmp/lute-"..s.."-pid.txt"
+
+ session_exec(s, "echo $$ >" .. fname)
+ -- it's a dirty hack but it's quick
+ sleep(0.5)
+ local pid = io.lines(fname)()
+ print("Got pid for " .. s .. " : " .. tostring(pid))
+ return(tonumber(pid))
+end
+
+function session_connect_with(s0, s1)
+ -- local pid0 = tostring(session_get_bash_pid(s0))
+ local pid1 = tostring(session_get_bash_pid(s1))
+ local eth_options = { "rx", "tx", "sg", "tso", "ufo", "gso", "gro", "lro", "rxvlan", "txvlan", "rxhash" }
+ local this_end = s0 .. "_" .. s1
+ local other_end = s1 .. "_" .. s0
+ session_exec(s0, "ip link add name " .. this_end .. " type veth peer name " .. other_end)
+ session_exec(s0, "ip link set dev " .. this_end .. " up promisc on")
+ for i, option in ipairs(eth_options) do
+ session_exec(s0, "/sbin/ethtool --offload " .. this_end .. " " .. option .. " off")
+ session_exec(s0, "/sbin/ethtool --offload " .. other_end .. " " .. option .. " off")
+ end
+ session_exec(s0, "ip link set dev " .. other_end .. " up promisc on netns /proc/" .. pid1 .. "/ns/net")
+ sleep(0.5)
+end
+
+^D^D^D
+run lua session_connect_with("s0", "s1")
+run lua session_connect_with("s0", "s2")
+
+cd s1
+ip -6 addr add dev s1_s0 2001:db8:1::1/64
+ip -4 addr add dev s1_s0 192.0.2.1/24
+ip link set dev s1_s0 up promisc on
+^D^D^D
+
+cd s2
+ip -6 addr add dev s2_s0 2001:db8:1::2/64
+ip -6 addr add dev s2_s0 2001:db8:1::3/64
+ip -6 addr add dev s2_s0 2001:db8:1::4/64
+ip -4 addr add dev s2_s0 192.0.2.2/24
+ip -4 addr add dev s2_s0:1 192.0.2.3/24
+ip -4 addr add dev s2_s0:2 192.0.2.4/24
+ip link set dev s2_s0 up promisc on
+^D^D^D
+
+run s1 ip addr
+run s2 ip addr
+shell VPP
+cd VPP
+cd /home/ubuntu/vpp
+make debug
+r
+^D^D^D
+expect VPP DBGvpp#
+
+cd lua
+-- Initialization of the Lua environment for talking to VPP
+vpp = require("vpp-lapi")
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+vpp:init({ pneum_path = pneum_path })
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vlib-api/vlibmemory/memclnt.api")
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api")
+vpp:connect("aytest")
+vpp:consume_api(root_dir .. "/plugins/acl-plugin/acl/acl.api", "acl")
+
+^D^D^D
+
+cd lua
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s1", hw_addr = "AAAAAA" })
+vpp_if_to_s1 = reply[1].sw_if_index
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s2", hw_addr = "AAAAAA" })
+vpp_if_to_s2 = reply[1].sw_if_index
+
+ifaces = { vpp_if_to_s1, vpp_if_to_s2 }
+
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s1, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s2, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+
+bd_id = 42
+
+reply = vpp:api_call("bridge_domain_add_del", { bd_id = bd_id, flood = 1, uu_flood = 1, forward = 1, learn = 1, arp_term = 0, is_add = 1 })
+print(vpp.dump(reply))
+
+for i, v in ipairs(ifaces) do
+ reply = vpp:api_call("sw_interface_set_l2_bridge", { rx_sw_if_index = v, bd_id = bd_id, shg = 0, bvi = 0, enable = 1 } )
+ print(vpp.dump(reply))
+end
+
+^D^D^D
+
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.4
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+
+
+cd lua
+--- ACL testing
+
+--[[ temporary comment out
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 230 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 8 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 15 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 2, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 0 })
+print(vpp.dump(reply))
+print("---")
+
+acl_index_to_delete = reply[1].acl_index
+print("Deleting " .. tostring(acl_index_to_delete))
+reply = vpp:api_call("acl_del", { context = 42, acl_index = acl_index_to_delete })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+for ri, rv in ipairs(reply) do
+ print("Reply message #" .. tostring(ri))
+ print(vpp.dump(rv))
+ for ai, av in ipairs(rv.r) do
+ print("ACL rule #" .. tostring(ai) .. " : " .. vpp.dump(av))
+ end
+
+end
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 4294967295 })
+print(vpp.dump(reply))
+print("---")
+
+
+]] -- end of comment out
+
+---- Should be nothing ^^
+r = {
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::3"), dst_ip_prefix_len = 128 },
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8::"), dst_ip_prefix_len = 32 },
+ { is_permit = 1, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.2"), dst_ip_prefix_len = 32},
+ { is_permit = 0, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.3"), dst_ip_prefix_len = 32 },
+}
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 5, r = r })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_in
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s1, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+--reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+-- print(vpp.dump(reply))
+--print("---")
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 2
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 3
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 4
+
+
+cd lua
+
+--- TEST OUTBOUND ACL
+
+r1 = {
+ { is_permit = 1, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::4"), dst_ip_prefix_len = 128 }
+}
+
+reply = vpp:api_call("acl_add_replace", { context = 42, count = 3, r = r1 })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 1
+
+run lua print("ALL GOOD!")
+
diff --git a/src/vpp-api/lua/examples/lute/script-inout-acl-old.lute b/src/vpp-api/lua/examples/lute/script-inout-acl-old.lute
new file mode 100644
index 00000000..9edebf02
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/script-inout-acl-old.lute
@@ -0,0 +1,329 @@
+shell vppbuild
+run vppbuild stty -echo
+run vppbuild sudo -u ubuntu -i bash -c "(cd vpp && make plugins && echo ALLGOOD)"
+expect vppbuild ALLGOOD
+
+shell s0
+shell s1
+shell s2
+
+
+cd s1
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+cd s2
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+
+cd lua
+
+function session_get_bash_pid(s)
+ if not has_session(s) then
+ return nil
+ end
+ local fname = "/tmp/lute-"..s.."-pid.txt"
+
+ session_exec(s, "echo $$ >" .. fname)
+ -- it's a dirty hack but it's quick
+ sleep(0.5)
+ local pid = io.lines(fname)()
+ print("Got pid for " .. s .. " : " .. tostring(pid))
+ return(tonumber(pid))
+end
+
+function session_connect_with(s0, s1)
+ -- local pid0 = tostring(session_get_bash_pid(s0))
+ local pid1 = tostring(session_get_bash_pid(s1))
+ local eth_options = { "rx", "tx", "sg", "tso", "ufo", "gso", "gro", "lro", "rxvlan", "txvlan", "rxhash" }
+ local this_end = s0 .. "_" .. s1
+ local other_end = s1 .. "_" .. s0
+ session_exec(s0, "ip link add name " .. this_end .. " type veth peer name " .. other_end)
+ session_exec(s0, "ip link set dev " .. this_end .. " up promisc on")
+ for i, option in ipairs(eth_options) do
+ session_exec(s0, "/sbin/ethtool --offload " .. this_end .. " " .. option .. " off")
+ session_exec(s0, "/sbin/ethtool --offload " .. other_end .. " " .. option .. " off")
+ end
+ session_exec(s0, "ip link set dev " .. other_end .. " up promisc on netns /proc/" .. pid1 .. "/ns/net")
+ sleep(0.5)
+end
+
+^D^D^D
+run lua session_connect_with("s0", "s1")
+run lua session_connect_with("s0", "s2")
+
+cd s1
+ip -6 addr add dev s1_s0 2001:db8:1::1/64
+ip -4 addr add dev s1_s0 192.0.2.1/24
+ip link set dev s1_s0 up promisc on
+^D^D^D
+
+cd s2
+ip -6 addr add dev s2_s0 2001:db8:1::2/64
+ip -6 addr add dev s2_s0 2001:db8:1::3/64
+ip -6 addr add dev s2_s0 2001:db8:1::4/64
+ip -4 addr add dev s2_s0 192.0.2.2/24
+ip -4 addr add dev s2_s0:1 192.0.2.3/24
+ip -4 addr add dev s2_s0:2 192.0.2.4/24
+ip link set dev s2_s0 up promisc on
+^D^D^D
+
+run s1 ip addr
+run s2 ip addr
+shell VPP
+cd VPP
+cd /home/ubuntu/vpp
+make debug
+r
+^D^D^D
+expect VPP DBGvpp#
+
+cd lua
+-- Initialization of the Lua environment for talking to VPP
+vpp = require("vpp-lapi")
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+vpp:init({ pneum_path = pneum_path })
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vlib-api/vlibmemory/memclnt.api")
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api")
+vpp:connect("aytest")
+vpp:consume_api(root_dir .. "/plugins/acl-plugin/acl/acl.api", "acl")
+
+^D^D^D
+
+cd lua
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s1", hw_addr = "AAAAAA" })
+vpp_if_to_s1 = reply[1].sw_if_index
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s2", hw_addr = "AAAAAA" })
+vpp_if_to_s2 = reply[1].sw_if_index
+
+ifaces = { vpp_if_to_s1, vpp_if_to_s2 }
+
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s1, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s2, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+
+bd_id = 42
+
+reply = vpp:api_call("bridge_domain_add_del", { bd_id = bd_id, flood = 1, uu_flood = 1, forward = 1, learn = 1, arp_term = 0, is_add = 1 })
+print(vpp.dump(reply))
+
+for i, v in ipairs(ifaces) do
+ reply = vpp:api_call("sw_interface_set_l2_bridge", { rx_sw_if_index = v, bd_id = bd_id, shg = 0, bvi = 0, enable = 1 } )
+ print(vpp.dump(reply))
+end
+
+^D^D^D
+
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.4
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+
+
+cd lua
+--- ACL testing
+
+--[[ temporary comment out
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 230 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 8 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 15 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 2, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 0 })
+print(vpp.dump(reply))
+print("---")
+
+acl_index_to_delete = reply[1].acl_index
+print("Deleting " .. tostring(acl_index_to_delete))
+reply = vpp:api_call("acl_del", { context = 42, acl_index = acl_index_to_delete })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+for ri, rv in ipairs(reply) do
+ print("Reply message #" .. tostring(ri))
+ print(vpp.dump(rv))
+ for ai, av in ipairs(rv.r) do
+ print("ACL rule #" .. tostring(ai) .. " : " .. vpp.dump(av))
+ end
+
+end
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 4294967295 })
+print(vpp.dump(reply))
+print("---")
+
+
+]] -- end of comment out
+
+---- Should be nothing ^^
+r = {
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::3"), dst_ip_prefix_len = 128 },
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8::"), dst_ip_prefix_len = 32 },
+ { is_permit = 1, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.2"), dst_ip_prefix_len = 32},
+ { is_permit = 0, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.3"), dst_ip_prefix_len = 32 },
+}
+
+reply = vpp:api_call("acl_add", { context = 42, count = 5, r = r })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_in
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s1, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+--reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+-- print(vpp.dump(reply))
+--print("---")
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 2
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 3
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 4
+
+
+cd lua
+
+--- TEST OUTBOUND ACL
+
+r1 = {
+ { is_permit = 1, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::4"), dst_ip_prefix_len = 128 }
+}
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = r1 })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 1
+
+run lua print("ALL GOOD!")
+
diff --git a/src/vpp-api/lua/examples/lute/script-inout-acl.lute b/src/vpp-api/lua/examples/lute/script-inout-acl.lute
new file mode 100644
index 00000000..d7e7423c
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/script-inout-acl.lute
@@ -0,0 +1,329 @@
+shell vppbuild
+run vppbuild stty -echo
+run vppbuild sudo -u ubuntu -i bash -c "(cd vpp && make plugins && echo ALLGOOD)"
+expect vppbuild ALLGOOD
+
+shell s0
+shell s1
+shell s2
+
+
+cd s1
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+cd s2
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+
+cd lua
+
+function session_get_bash_pid(s)
+ if not has_session(s) then
+ return nil
+ end
+ local fname = "/tmp/lute-"..s.."-pid.txt"
+
+ session_exec(s, "echo $$ >" .. fname)
+ -- it's a dirty hack but it's quick
+ sleep(0.5)
+ local pid = io.lines(fname)()
+ print("Got pid for " .. s .. " : " .. tostring(pid))
+ return(tonumber(pid))
+end
+
+function session_connect_with(s0, s1)
+ -- local pid0 = tostring(session_get_bash_pid(s0))
+ local pid1 = tostring(session_get_bash_pid(s1))
+ local eth_options = { "rx", "tx", "sg", "tso", "ufo", "gso", "gro", "lro", "rxvlan", "txvlan", "rxhash" }
+ local this_end = s0 .. "_" .. s1
+ local other_end = s1 .. "_" .. s0
+ session_exec(s0, "ip link add name " .. this_end .. " type veth peer name " .. other_end)
+ session_exec(s0, "ip link set dev " .. this_end .. " up promisc on")
+ for i, option in ipairs(eth_options) do
+ session_exec(s0, "/sbin/ethtool --offload " .. this_end .. " " .. option .. " off")
+ session_exec(s0, "/sbin/ethtool --offload " .. other_end .. " " .. option .. " off")
+ end
+ session_exec(s0, "ip link set dev " .. other_end .. " up promisc on netns /proc/" .. pid1 .. "/ns/net")
+ sleep(0.5)
+end
+
+^D^D^D
+run lua session_connect_with("s0", "s1")
+run lua session_connect_with("s0", "s2")
+
+cd s1
+ip -6 addr add dev s1_s0 2001:db8:1::1/64
+ip -4 addr add dev s1_s0 192.0.2.1/24
+ip link set dev s1_s0 up promisc on
+^D^D^D
+
+cd s2
+ip -6 addr add dev s2_s0 2001:db8:1::2/64
+ip -6 addr add dev s2_s0 2001:db8:1::3/64
+ip -6 addr add dev s2_s0 2001:db8:1::4/64
+ip -4 addr add dev s2_s0 192.0.2.2/24
+ip -4 addr add dev s2_s0:1 192.0.2.3/24
+ip -4 addr add dev s2_s0:2 192.0.2.4/24
+ip link set dev s2_s0 up promisc on
+^D^D^D
+
+run s1 ip addr
+run s2 ip addr
+shell VPP
+cd VPP
+cd /home/ubuntu/vpp
+make debug
+r
+^D^D^D
+expect VPP DBGvpp#
+
+cd lua
+-- Initialization of the Lua environment for talking to VPP
+vpp = require("vpp-lapi")
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+vpp:init({ pneum_path = pneum_path })
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vlib-api/vlibmemory/memclnt.api")
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api")
+vpp:connect("aytest")
+vpp:consume_api(root_dir .. "/plugins/acl-plugin/acl/acl.api", "acl")
+
+^D^D^D
+
+cd lua
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s1", hw_addr = "AAAAAA" })
+vpp_if_to_s1 = reply[1].sw_if_index
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s2", hw_addr = "AAAAAA" })
+vpp_if_to_s2 = reply[1].sw_if_index
+
+ifaces = { vpp_if_to_s1, vpp_if_to_s2 }
+
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s1, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s2, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+
+bd_id = 42
+
+reply = vpp:api_call("bridge_domain_add_del", { bd_id = bd_id, flood = 1, uu_flood = 1, forward = 1, learn = 1, arp_term = 0, is_add = 1 })
+print(vpp.dump(reply))
+
+for i, v in ipairs(ifaces) do
+ reply = vpp:api_call("sw_interface_set_l2_bridge", { rx_sw_if_index = v, bd_id = bd_id, shg = 0, bvi = 0, enable = 1 } )
+ print(vpp.dump(reply))
+end
+
+^D^D^D
+
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.4
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+
+
+cd lua
+--- ACL testing
+
+--[[ temporary comment out
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 230 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 8 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 15 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 2, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 0 })
+print(vpp.dump(reply))
+print("---")
+
+acl_index_to_delete = reply[1].acl_index
+print("Deleting " .. tostring(acl_index_to_delete))
+reply = vpp:api_call("acl_del", { context = 42, acl_index = acl_index_to_delete })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+for ri, rv in ipairs(reply) do
+ print("Reply message #" .. tostring(ri))
+ print(vpp.dump(rv))
+ for ai, av in ipairs(rv.r) do
+ print("ACL rule #" .. tostring(ai) .. " : " .. vpp.dump(av))
+ end
+
+end
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 4294967295 })
+print(vpp.dump(reply))
+print("---")
+
+
+]] -- end of comment out
+
+---- Should be nothing ^^
+r = {
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::3"), dst_ip_prefix_len = 128 },
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8::"), dst_ip_prefix_len = 32 },
+ { is_permit = 1, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.2"), dst_ip_prefix_len = 32},
+ { is_permit = 0, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.3"), dst_ip_prefix_len = 32 },
+}
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 5, r = r })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_in
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s1, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+--reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+-- print(vpp.dump(reply))
+--print("---")
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 2
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 3
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 4
+
+
+cd lua
+
+--- TEST OUTBOUND ACL
+
+r1 = {
+ { is_permit = 1, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::4"), dst_ip_prefix_len = 128 }
+}
+
+reply = vpp:api_call("acl_add_replace", { context = 42, acl_index = -1, count = 3, r = r1 })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+
+^D^D^D
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 0
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run VPP show trace
+expect VPP match: inacl 0 rule 1
+
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+run VPP show trace
+expect VPP match: outacl 2 rule 1
+
+run lua print("ALL GOOD!")
+
diff --git a/src/vpp-api/lua/examples/lute/script.lute b/src/vpp-api/lua/examples/lute/script.lute
new file mode 100644
index 00000000..c3dd90f2
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/script.lute
@@ -0,0 +1,7 @@
+shell s1
+expect s1 $
+run s1 echo testing123
+expect s1 $
+run s1 echo done
+quit
+
diff --git a/src/vpp-api/lua/examples/lute/sessions-acl.lute b/src/vpp-api/lua/examples/lute/sessions-acl.lute
new file mode 100644
index 00000000..ac237ef9
--- /dev/null
+++ b/src/vpp-api/lua/examples/lute/sessions-acl.lute
@@ -0,0 +1,308 @@
+run lua -- collectgarbage("stop")
+
+shell vppbuild
+run vppbuild stty -echo
+run vppbuild sudo -u ubuntu -i bash -c "(cd vpp && make plugins && echo ALLGOOD)"
+expect vppbuild ALLGOOD
+
+shell s0
+shell s1
+shell s2
+
+
+cd s1
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+cd s2
+unshare -n /bin/bash
+/sbin/ifconfig -a
+^D^D^D
+
+
+cd lua
+
+function session_get_bash_pid(s)
+ if not has_session(s) then
+ return nil
+ end
+ local fname = "/tmp/lute-"..s.."-pid.txt"
+
+ session_exec(s, "echo $$ >" .. fname)
+ -- it's a dirty hack but it's quick
+ sleep(0.5)
+ local pid = io.lines(fname)()
+ print("Got pid for " .. s .. " : " .. tostring(pid))
+ return(tonumber(pid))
+end
+
+function session_connect_with(s0, s1)
+ -- local pid0 = tostring(session_get_bash_pid(s0))
+ local pid1 = tostring(session_get_bash_pid(s1))
+ local eth_options = { "rx", "tx", "sg", "tso", "ufo", "gso", "gro", "lro", "rxvlan", "txvlan", "rxhash" }
+ local this_end = s0 .. "_" .. s1
+ local other_end = s1 .. "_" .. s0
+ session_exec(s0, "ip link add name " .. this_end .. " type veth peer name " .. other_end)
+ session_exec(s0, "ip link set dev " .. this_end .. " up promisc on")
+ for i, option in ipairs(eth_options) do
+ session_exec(s0, "/sbin/ethtool --offload " .. this_end .. " " .. option .. " off")
+ session_exec(s0, "/sbin/ethtool --offload " .. other_end .. " " .. option .. " off")
+ end
+ session_exec(s0, "ip link set dev " .. other_end .. " up promisc on netns /proc/" .. pid1 .. "/ns/net")
+ sleep(0.5)
+end
+
+^D^D^D
+run lua session_connect_with("s0", "s1")
+run lua session_connect_with("s0", "s2")
+
+cd s1
+ip -6 addr add dev s1_s0 2001:db8:1::1/64
+ip -4 addr add dev s1_s0 192.0.2.1/24
+ip link set dev s1_s0 up promisc on
+^D^D^D
+
+cd s2
+ip -6 addr add dev s2_s0 2001:db8:1::2/64
+ip -6 addr add dev s2_s0 2001:db8:1::3/64
+ip -6 addr add dev s2_s0 2001:db8:1::4/64
+ip -4 addr add dev s2_s0 192.0.2.2/24
+ip -4 addr add dev s2_s0:1 192.0.2.3/24
+ip -4 addr add dev s2_s0:2 192.0.2.4/24
+ip link set dev s2_s0 up promisc on
+^D^D^D
+
+run s1 ip addr
+run s2 ip addr
+shell VPP
+cd VPP
+cd /home/ubuntu/vpp
+make debug
+r
+^D^D^D
+expect VPP DBGvpp#
+
+cd lua
+-- Initialization of the Lua environment for talking to VPP
+vpp = require("vpp-lapi")
+root_dir = "/home/ubuntu/vpp"
+pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so"
+vpp:init({ pneum_path = pneum_path })
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vlib-api/vlibmemory/memclnt.api")
+vpp:consume_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api")
+vpp:connect("aytest")
+vpp:consume_api(root_dir .. "/plugins/acl-plugin/acl/acl.api", "acl")
+
+^D^D^D
+
+cd lua
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s1", hw_addr = "AAAAAA" })
+vpp_if_to_s1 = reply[1].sw_if_index
+
+reply = vpp:api_call("af_packet_create", { host_if_name = "s0_s2", hw_addr = "AAAAAA" })
+vpp_if_to_s2 = reply[1].sw_if_index
+
+ifaces = { vpp_if_to_s1, vpp_if_to_s2 }
+
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s1, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+reply = vpp:api_call("sw_interface_set_flags", { sw_if_index = vpp_if_to_s2, admin_up_down = 1, link_up_down = 1 })
+print(vpp.dump(reply))
+
+bd_id = 42
+
+reply = vpp:api_call("bridge_domain_add_del", { bd_id = bd_id, flood = 1, uu_flood = 1, forward = 1, learn = 1, arp_term = 0, is_add = 1 })
+print(vpp.dump(reply))
+
+for i, v in ipairs(ifaces) do
+ reply = vpp:api_call("sw_interface_set_l2_bridge", { rx_sw_if_index = v, bd_id = bd_id, shg = 0, bvi = 0, enable = 1 } )
+ print(vpp.dump(reply))
+end
+
+^D^D^D
+
+run s1 ping -c 3 192.0.2.2
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.3
+expect s1 packet loss
+run s1 ping -c 3 192.0.2.4
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::2
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::3
+expect s1 packet loss
+run s1 ping6 -c 3 2001:db8:1::4
+expect s1 packet loss
+
+
+cd lua
+--- ACL testing
+
+--[[ temporary comment out
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 230 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 8 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = 15 })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 2, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = 0, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_add", { context = 42, count = 0 })
+print(vpp.dump(reply))
+print("---")
+
+acl_index_to_delete = reply[1].acl_index
+print("Deleting " .. tostring(acl_index_to_delete))
+reply = vpp:api_call("acl_del", { context = 42, acl_index = acl_index_to_delete })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+for ri, rv in ipairs(reply) do
+ print("Reply message #" .. tostring(ri))
+ print(vpp.dump(rv))
+ for ai, av in ipairs(rv.r) do
+ print("ACL rule #" .. tostring(ai) .. " : " .. vpp.dump(av))
+ end
+
+end
+print("---")
+
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+reply = vpp:api_call("acl_del", { context = 42, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 0})
+print(vpp.dump(reply))
+print("---")
+
+reply = vpp:api_call("acl_dump", { context = 42, sw_if_index = 4294967295 })
+print(vpp.dump(reply))
+print("---")
+
+
+]] -- end of comment out
+
+---- Should be nothing ^^
+r = {
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8:1::3"), dst_ip_prefix_len = 128 },
+ { is_permit = 1, is_ipv6 = 1, dst_ip_addr = ip46("2001:db8::"), dst_ip_prefix_len = 32 },
+ { is_permit = 1, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.2"), dst_ip_prefix_len = 32},
+ { is_permit = 0, is_ipv6 = 0, dst_ip_addr = ip46("192.0.2.3"), dst_ip_prefix_len = 32 },
+}
+
+reply = vpp:api_call("acl_add", { context = 42, count = 5, r = r })
+print(vpp.dump(reply))
+print("---")
+interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = { { is_permit = 1, is_ipv6 = 1 }, { is_permit = 0, is_ipv6 = 1 }, { is_permit = 1, is_ipv6 = 0 } } })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_in
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s1, is_add = 1, is_input = 1, acl_index = interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+--- TEST OUTBOUND ACL
+
+r1 = {
+ { is_permit = 1, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::2"), dst_ip_prefix_len = 128 },
+ { is_permit = 0, is_ipv6 = 1, src_ip_addr = ip46("2001:db8:1::1"), src_ip_prefix_len = 128, dst_ip_addr = ip46("2001:db8:1::4"), dst_ip_prefix_len = 128 },
+ { is_permit = 2, is_ipv6 = 0 }
+}
+
+reply = vpp:api_call("acl_add", { context = 42, count = 3, r = r1 })
+print(vpp.dump(reply))
+print("---")
+interface_acl_out = reply[1].acl_index
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 0, acl_index = interface_acl_out })
+print(vpp.dump(reply))
+print("---")
+
+r2 = {
+ { is_permit = 1, is_ipv6 = 1 },
+ { is_permit = 0, is_ipv6 = 0 }
+}
+
+reply = vpp:api_call("acl_add", { context = 42, count = 2, r = r2 })
+print(vpp.dump(reply))
+print("---")
+second_interface_acl_in = reply[1].acl_index
+
+reply = vpp:api_call("acl_interface_add_del", { context = 42, sw_if_index = vpp_if_to_s2, is_add = 1, is_input = 1, acl_index = second_interface_acl_in })
+print(vpp.dump(reply))
+print("---")
+
+^D^D^D
+
+run VPP show classify tables
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s2 nc -v -l -p 22
+run s1 nc 192.0.2.2 22
+run s1 echo
+sleep 1
+run s1 break
+sleep 1
+run VPP show trace
+expect VPP match: outacl 2 rule 2
+run VPP show classify tables
+
+
+run VPP show classify tables
+run VPP clear trace
+run VPP trace add af-packet-input 100
+run s2 nc -v -l -p 22
+run s1 nc 192.0.2.2 22
+run s1 echo
+sleep 1
+run s1 break
+sleep 1
+run VPP show trace
+expect VPP match: outacl 2 rule 2
+run VPP show classify tables
+
+
+run lua print("ALL GOOD!")
+
diff --git a/src/vpp-api/lua/vpp-lapi.lua b/src/vpp-api/lua/vpp-lapi.lua
new file mode 100644
index 00000000..587eb110
--- /dev/null
+++ b/src/vpp-api/lua/vpp-lapi.lua
@@ -0,0 +1,989 @@
+--[[
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+]]
+
+-- json decode/encode from https://gist.github.com/tylerneylon/59f4bcf316be525b30ab
+-- licensed by the author tylerneylon into public domain. Thanks!
+
+local json = {}
+
+-- Internal functions.
+
+local function kind_of(obj)
+ if type(obj) ~= 'table' then return type(obj) end
+ local i = 1
+ for _ in pairs(obj) do
+ if obj[i] ~= nil then i = i + 1 else return 'table' end
+ end
+ if i == 1 then return 'table' else return 'array' end
+end
+
+local function escape_str(s)
+ local in_char = {'\\', '"', '/', '\b', '\f', '\n', '\r', '\t'}
+ local out_char = {'\\', '"', '/', 'b', 'f', 'n', 'r', 't'}
+ for i, c in ipairs(in_char) do
+ s = s:gsub(c, '\\' .. out_char[i])
+ end
+ return s
+end
+
+-- Returns pos, did_find; there are two cases:
+-- 1. Delimiter found: pos = pos after leading space + delim; did_find = true.
+-- 2. Delimiter not found: pos = pos after leading space; did_find = false.
+-- This throws an error if err_if_missing is true and the delim is not found.
+local function skip_delim(str, pos, delim, err_if_missing)
+ pos = pos + #str:match('^%s*', pos)
+ if str:sub(pos, pos) ~= delim then
+ if err_if_missing then
+ error('Expected ' .. delim .. ' near position ' .. pos)
+ end
+ return pos, false
+ end
+ return pos + 1, true
+end
+
+-- Expects the given pos to be the first character after the opening quote.
+-- Returns val, pos; the returned pos is after the closing quote character.
+local function parse_str_val(str, pos, val)
+ val = val or ''
+ local early_end_error = 'End of input found while parsing string.'
+ if pos > #str then error(early_end_error) end
+ local c = str:sub(pos, pos)
+ if c == '"' then return val, pos + 1 end
+ if c ~= '\\' then return parse_str_val(str, pos + 1, val .. c) end
+ -- We must have a \ character.
+ local esc_map = {b = '\b', f = '\f', n = '\n', r = '\r', t = '\t'}
+ local nextc = str:sub(pos + 1, pos + 1)
+ if not nextc then error(early_end_error) end
+ return parse_str_val(str, pos + 2, val .. (esc_map[nextc] or nextc))
+end
+
+-- Returns val, pos; the returned pos is after the number's final character.
+local function parse_num_val(str, pos)
+ local num_str = str:match('^-?%d+%.?%d*[eE]?[+-]?%d*', pos)
+ local val = tonumber(num_str)
+ if not val then error('Error parsing number at position ' .. pos .. '.') end
+ return val, pos + #num_str
+end
+
+
+-- Public values and functions.
+
+function json.stringify(obj, as_key)
+ local s = {} -- We'll build the string as an array of strings to be concatenated.
+ local kind = kind_of(obj) -- This is 'array' if it's an array or type(obj) otherwise.
+ if kind == 'array' then
+ if as_key then error('Can\'t encode array as key.') end
+ s[#s + 1] = '['
+ for i, val in ipairs(obj) do
+ if i > 1 then s[#s + 1] = ', ' end
+ s[#s + 1] = json.stringify(val)
+ end
+ s[#s + 1] = ']'
+ elseif kind == 'table' then
+ if as_key then error('Can\'t encode table as key.') end
+ s[#s + 1] = '{'
+ for k, v in pairs(obj) do
+ if #s > 1 then s[#s + 1] = ', ' end
+ s[#s + 1] = json.stringify(k, true)
+ s[#s + 1] = ':'
+ s[#s + 1] = json.stringify(v)
+ end
+ s[#s + 1] = '}'
+ elseif kind == 'string' then
+ return '"' .. escape_str(obj) .. '"'
+ elseif kind == 'number' then
+ if as_key then return '"' .. tostring(obj) .. '"' end
+ return tostring(obj)
+ elseif kind == 'boolean' then
+ return tostring(obj)
+ elseif kind == 'nil' then
+ return 'null'
+ else
+ error('Unjsonifiable type: ' .. kind .. '.')
+ end
+ return table.concat(s)
+end
+
+json.null = {} -- This is a one-off table to represent the null value.
+
+function json.parse(str, pos, end_delim)
+ pos = pos or 1
+ if pos > #str then error('Reached unexpected end of input.') end
+ local pos = pos + #str:match('^%s*', pos) -- Skip whitespace.
+ local first = str:sub(pos, pos)
+ if first == '{' then -- Parse an object.
+ local obj, key, delim_found = {}, true, true
+ pos = pos + 1
+ while true do
+ key, pos = json.parse(str, pos, '}')
+ if key == nil then return obj, pos end
+ if not delim_found then error('Comma missing between object items.') end
+ pos = skip_delim(str, pos, ':', true) -- true -> error if missing.
+ obj[key], pos = json.parse(str, pos)
+ pos, delim_found = skip_delim(str, pos, ',')
+ end
+ elseif first == '[' then -- Parse an array.
+ local arr, val, delim_found = {}, true, true
+ pos = pos + 1
+ while true do
+ val, pos = json.parse(str, pos, ']')
+ if val == nil then return arr, pos end
+ if not delim_found then error('Comma missing between array items.') end
+ arr[#arr + 1] = val
+ pos, delim_found = skip_delim(str, pos, ',')
+ end
+ elseif first == '"' then -- Parse a string.
+ return parse_str_val(str, pos + 1)
+ elseif first == '-' or first:match('%d') then -- Parse a number.
+ return parse_num_val(str, pos)
+ elseif first == end_delim then -- End of an object or array.
+ return nil, pos + 1
+ else -- Parse true, false, or null.
+ local literals = {['true'] = true, ['false'] = false, ['null'] = json.null}
+ for lit_str, lit_val in pairs(literals) do
+ local lit_end = pos + #lit_str - 1
+ if str:sub(pos, lit_end) == lit_str then return lit_val, lit_end + 1 end
+ end
+ local pos_info_str = 'position ' .. pos .. ': ' .. str:sub(pos, pos + 10)
+ error('Invalid json syntax starting at ' .. pos_info_str)
+ end
+end
+
+
+local vpp = {}
+
+local ffi = require("ffi")
+
+--[[
+
+The basic type definitions. A bit of weird gymnastic with
+unionization of the hton* and ntoh* functions results
+is to make handling of signed and unsigned types a bit cleaner,
+essentially building typecasting into a C union.
+
+The vl_api_opaque_message_t is a synthetic type assumed to have
+enough storage to hold the entire API message regardless of the type.
+During the operation it is casted to the specific message struct types.
+
+]]
+
+
+ffi.cdef([[
+
+typedef uint8_t u8;
+typedef int8_t i8;
+typedef uint16_t u16;
+typedef int16_t i16;
+typedef uint32_t u32;
+typedef int32_t i32;
+typedef uint64_t u64;
+typedef int64_t i64;
+typedef double f64;
+typedef float f32;
+
+#pragma pack(1)
+typedef union {
+ u16 u16;
+ i16 i16;
+} lua_ui16t;
+
+#pragma pack(1)
+typedef union {
+ u32 u32;
+ i32 i32;
+} lua_ui32t;
+
+u16 ntohs(uint16_t hostshort);
+u16 htons(uint16_t hostshort);
+u32 htonl(uint32_t along);
+u32 ntohl(uint32_t along);
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *dest, void *src, size_t n);
+
+#pragma pack(1)
+typedef struct _vl_api_opaque_message {
+ u16 _vl_msg_id;
+ u8 data[65536];
+} vl_api_opaque_message_t;
+]])
+
+
+-- CRC-based version stuff
+
+local crc32c_table = ffi.new('const uint32_t[256]',
+ { 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 }
+);
+
+local function CRC8(crc, d)
+ return bit.bxor(bit.rshift(crc, 8), crc32c_table[bit.band(0xff, bit.bxor(crc, d))])
+end
+
+local function CRC16(crc, d)
+ crc = CRC8(crc, bit.band(d, 0xFF))
+ d = bit.rshift(d, 8)
+ crc = CRC8(crc, bit.band(d, 0xFF))
+ return crc
+end
+
+local function string_crc(str, crc)
+ for i=1,#str do
+ -- print("S", i, string.byte(str, i), string.char(string.byte(str, i)))
+ crc = CRC8(crc, string.byte(str, i))
+ end
+ return crc
+end
+
+local tokens = {
+ { ["match"] =' ', ["act"] = { } },
+ { ["match"] ='\n', ["act"] = { } },
+ { ["match"] ="manual_endian", ["act"] = { "NODE_MANUAL_ENDIAN", "MANUAL_ENDIAN", 276 } },
+ { ["match"] ="define", ["act"] = { "NODE_DEFINE", "DEFINE", 267 } },
+ { ["match"] ="dont_trace", ["act"] = { "NODE_DONT_TRACE", "DONT_TRACE", 279 } },
+ { ["match"] ="f64", ["act"] = { "NODE_F64", "PRIMTYPE", string_crc } },
+ { ["match"] ="i16", ["act"] = { "NODE_I16", "PRIMTYPE", string_crc } },
+ { ["match"] ="i32", ["act"] = { "NODE_I32", "PRIMTYPE", string_crc } },
+ { ["match"] ="i64", ["act"] = { "NODE_I64", "PRIMTYPE", string_crc } },
+ { ["match"] ="i8", ["act"] = { "NODE_I8", "PRIMTYPE", string_crc } },
+ { ["match"] ="manual_print", ["act"] = { "NODE_MANUAL_PRINT", "MANUAL_PRINT", 275 } },
+ { ["match"] ="noversion", ["act"] = { "NODE_NOVERSION", "NOVERSION", 274 } },
+ { ["match"] ="packed", ["act"] = { "NODE_PACKED", "TPACKED", 266 } },
+ { ["match"] ="typeonly", ["act"] = { "NODE_TYPEONLY", "TYPEONLY", 278 } },
+ { ["match"] ="u16", ["act"] = { "NODE_U16", "PRIMTYPE", string_crc } },
+ { ["match"] ="u32", ["act"] = { "NODE_U32", "PRIMTYPE", string_crc } },
+ { ["match"] ="u64", ["act"] = { "NODE_U64", "PRIMTYPE", string_crc } },
+ { ["match"] ="u8", ["act"] = { "NODE_U8", "PRIMTYPE", string_crc } },
+ { ["match"] ="union", ["act"] = { "NODE_UNION", "UNION", 271 } },
+ { ["match"] ="uword", ["act"] = { "NODE_UWORD", "PRIMTYPE", string_crc } },
+ { ["match"] ="%(", ["act"] = { "NODE_LPAR", "LPAR", 259 } },
+ { ["match"] ="%)", ["act"] = { "NODE_RPAR", "RPAR", 258 } },
+ { ["match"] =";", ["act"] = { "NODE_SEMI", "SEMI", 260 } },
+ { ["match"] ="%[", ["act"] = { "NODE_LBRACK", "LBRACK", 261 } },
+ { ["match"] ="%]", ["act"] = { "NODE_RBRACK", "RBRACK", 262 } },
+ { ["match"] ="%{", ["act"] = { "NODE_LCURLY", "LCURLY", 268 } },
+ { ["match"] ="%}", ["act"] = { "NODE_RCURLY", "RCURLY", 269 } },
+ { ["match"] ='%b""', ["act"] = { "NODE_STRING", "STRING", string_crc } },
+ { ["match"] ='%b@@', ["act"] = { "NODE_HELPER", "HELPER_STRING", string_crc } },
+ -- TODO: \ must be consumed
+ { ["match"] ='[_a-zA-Z][_a-zA-Z0-9]*',
+ ["act"] = { "NODE_NAME", "NAME", string_crc } },
+ { ["match"] ='[0-9]+', ["act"] = { "NODE_NUMBER", "NUMBER", string_crc } },
+ { ["match"] ='#[^\n]+', ["act"] = { "NODE_PRAGMA", "PRAGMA", nil } },
+}
+
+
+function vpp.crc_version_string(data)
+ local input_crc = 0
+ -- Get rid of comments
+ data = data:gsub("/%*.-%*/", "")
+ data = data:gsub("//[^\n]+", "")
+ -- print(data)
+ idx = 1
+ while (true) do
+ local matched = nil
+ for k, v in ipairs(tokens) do
+ if not matched then
+ local x, y, cap = string.find(data, v["match"], idx)
+ if x == idx then
+ matched = { ["node"] = v["act"], ["x"] = x, ["y"] = y, ["cap"] = cap, ["chars"] = string.sub(data, x, y) }
+ -- print(k, v, x, y, cap, matched.chars, matched.node[0] )
+ end
+ end
+ end
+ if matched then
+ idx = idx + (matched.y - matched.x + 1)
+ if matched.node[1] then
+ local act = matched.node[3]
+ if type(act) == "function" then
+ input_crc = act(matched.chars, input_crc)
+ elseif type(act) == "number" then
+ input_crc = CRC16(input_crc, act)
+ end
+ -- print(vpp.dump(matched))
+ end
+ else
+ -- print("NOT MATCHED!")
+ local crc = CRC16(input_crc, 0xFFFFFFFF)
+ return string.sub(string.format("%x", crc), -8)
+ end
+ end
+end
+
+
+function vpp.dump(o)
+ if type(o) == 'table' then
+ local s = '{ '
+ for k,v in pairs(o) do
+ if type(k) ~= 'number' then k = '"'..k..'"' end
+ s = s .. '['..k..'] = ' .. vpp.dump(v) .. ','
+ end
+ return s .. '} '
+ else
+ return tostring(o)
+ end
+end
+
+function vpp.hex_dump(buf)
+ local ret = {}
+ for i=1,math.ceil(#buf/16) * 16 do
+ if (i-1) % 16 == 0 then table.insert(ret, string.format('%08X ', i-1)) end
+ table.insert(ret, ( i > #buf and ' ' or string.format('%02X ', buf:byte(i)) ))
+ if i % 8 == 0 then table.insert(ret, ' ') end
+ if i % 16 == 0 then table.insert(ret, buf:sub(i-16+1, i):gsub('%c','.')..'\n' ) end
+ end
+ return table.concat(ret)
+end
+
+
+function vpp.c_str(text_in)
+ local text = text_in -- \000 will be helpfully added by ffi.copy
+ local c_str = ffi.new("char[?]", #text+1)
+ ffi.copy(c_str, text)
+ return c_str
+end
+
+
+function vpp.init(vpp, args)
+ local vac_api = args.vac_api or [[
+ int cough_vac_attach(char *vac_path, char *cough_path);
+ int vac_connect(char *name, char *chroot_prefix, void *cb);
+ int vac_disconnect(void);
+ int vac_read(char **data, int *l);
+ int vac_write(char *data, int len);
+ void vac_free(char *data);
+ uint32_t vac_get_msg_index(unsigned char * name);
+]]
+
+ vpp.vac_path = args.vac_path
+ ffi.cdef(vac_api)
+ local init_res = 0
+ vpp.vac = ffi.load(vpp.vac_path)
+ if (init_res < 0) then
+ return nil
+ end
+
+ vpp.next_msg_num = 1
+ vpp.msg_name_to_number = {}
+ vpp.msg_name_to_fields = {}
+ vpp.msg_number_to_name = {}
+ vpp.msg_number_to_type = {}
+ vpp.msg_number_to_pointer_type = {}
+ vpp.msg_name_to_crc = {}
+ vpp.c_type_to_fields = {}
+ vpp.events = {}
+ vpp.plugin_version = {}
+ vpp.is_connected = false
+
+
+ vpp.t_lua2c = {}
+ vpp.t_c2lua = {}
+ vpp.t_lua2c["u8"] = function(c_type, src, dst_c_ptr)
+ if type(src) == "string" then
+ -- ffi.copy adds a zero byte at the end. Grrr.
+ -- ffi.copy(dst_c_ptr, src)
+ ffi.C.memcpy(dst_c_ptr, vpp.c_str(src), #src)
+ return(#src)
+ elseif type(src) == "table" then
+ for i,v in ipairs(src) do
+ ffi.cast("u8 *", dst_c_ptr)[i-1] = v
+ end
+ return(#src)
+ else
+ return 1, src -- ffi.cast("u8", src)
+ end
+ end
+ vpp.t_c2lua["u8"] = function(c_type, src_ptr, src_len)
+ if src_len then
+ return ffi.string(src_ptr, src_len)
+ else
+ return (tonumber(src_ptr))
+ end
+ end
+
+ vpp.t_lua2c["u16"] = function(c_type, src, dst_c_ptr)
+ if type(src) == "table" then
+ for i,v in ipairs(src) do
+ ffi.cast("u16 *", dst_c_ptr)[i-1] = ffi.C.htons(v)
+ end
+ return(2 * #src)
+ else
+ return 2, (ffi.C.htons(src))
+ end
+ end
+ vpp.t_c2lua["u16"] = function(c_type, src_ptr, src_len)
+ if src_len then
+ local out = {}
+ for i = 0,src_len-1 do
+ out[i+1] = tonumber(ffi.C.ntohs(src_ptr[i]))
+ end
+ return out
+ else
+ return (tonumber(ffi.C.ntohs(src_ptr)))
+ end
+ end
+
+ vpp.t_lua2c["u32"] = function(c_type, src, dst_c_ptr)
+ if type(src) == "table" then
+ for i,v in ipairs(src) do
+ ffi.cast("u32 *", dst_c_ptr)[i-1] = ffi.C.htonl(v)
+ end
+ return(4 * #src)
+ else
+ return 4, (ffi.C.htonl(src))
+ end
+ end
+ vpp.t_c2lua["u32"] = function(c_type, src_ptr, src_len)
+ if src_len then
+ local out = {}
+ for i = 0,src_len-1 do
+ out[i+1] = tonumber(ffi.C.ntohl(src_ptr[i]))
+ end
+ return out
+ else
+ return (tonumber(ffi.C.ntohl(src_ptr)))
+ end
+ end
+ vpp.t_lua2c["i32"] = function(c_type, src, dst_c_ptr)
+ if type(src) == "table" then
+ for i,v in ipairs(src) do
+ ffi.cast("i32 *", dst_c_ptr)[i-1] = ffi.C.htonl(v)
+ end
+ return(4 * #src)
+ else
+ return 4, (ffi.C.htonl(src))
+ end
+ end
+ vpp.t_c2lua["i32"] = function(c_type, src_ptr, src_len)
+ local ntohl = function(src)
+ local u32val = ffi.cast("u32", src)
+ local ntohlval = (ffi.C.ntohl(u32val))
+ local out = tonumber(ffi.cast("i32", ntohlval + 0LL))
+ return out
+ end
+ if src_len then
+ local out = {}
+ for i = 0,src_len-1 do
+ out[i+1] = tonumber(ntohl(src_ptr[i]))
+ end
+ else
+ return (tonumber(ntohl(src_ptr)))
+ end
+ end
+
+ vpp.t_lua2c["u64"] = function(c_type, src, dst_c_ptr)
+ if type(src) == "table" then
+ for i,v in ipairs(src) do
+ ffi.cast("u64 *", dst_c_ptr)[i-1] = v --- FIXME ENDIAN
+ end
+ return(8 * #src)
+ else
+ return 8, ffi.cast("u64", src) --- FIXME ENDIAN
+ end
+ end
+ vpp.t_c2lua["u64"] = function(c_type, src_ptr, src_len)
+ if src_len then
+ local out = {}
+ for i = 0,src_len-1 do
+ out[i+1] = tonumber(src_ptr[i]) -- FIXME ENDIAN
+ end
+ return out
+ else
+ return (tonumber(src_ptr)) --FIXME ENDIAN
+ end
+ end
+
+
+
+
+ vpp.t_lua2c["__MSG__"] = function(c_type, src, dst_c_ptr)
+ local dst = ffi.cast(c_type .. " *", dst_c_ptr)
+ local additional_len = 0
+ local fields_info = vpp.c_type_to_fields[c_type]
+ -- print("__MSG__ type: " .. tostring(c_type))
+ ffi.C.memset(dst_c_ptr, 0, ffi.sizeof(dst[0]))
+ -- print(vpp.dump(fields_info))
+ -- print(vpp.dump(src))
+ for k,v in pairs(src) do
+ local field = fields_info[k]
+ if not field then
+ print("ERROR: field " .. tostring(k) .. " in message " .. tostring(c_type) .. " is unknown")
+ end
+ local lua2c = vpp.t_lua2c[field.c_type]
+ -- print("__MSG__ field " .. tostring(k) .. " : " .. vpp.dump(field))
+ -- if the field is not an array type, try to coerce the argument to a number
+ if not field.array and type(v) == "string" then
+ v = tonumber(v)
+ end
+ if not lua2c then
+ print("__MSG__ " .. tostring(c_type) .. " t_lua2c: can not store field " .. field.name ..
+ " type " .. field.c_type .. " dst " .. tostring(dst[k]))
+ return 0
+ end
+ local len = 0
+ local val = nil
+ if field.array and (type(v) == "table") then
+ -- print("NTFY: field " .. tostring(k) .. " in message " .. tostring(c_type) .. " is an array")
+ for field_i, field_v in ipairs(v) do
+ -- print("NTFY: setting member#" .. tostring(field_i) .. " to value " .. vpp.dump(field_v))
+ local field_len, field_val = lua2c(field.c_type, field_v, dst[k][field_i-1])
+ len = len + field_len
+ end
+ else
+ len, val = lua2c(field.c_type, v, dst[k])
+ end
+ if not field.array then
+ dst[k] = val
+ else
+ if 0 == field.array then
+ additional_len = additional_len + len
+ -- print("Adding " .. tostring(len) .. " bytes due to field " .. tostring(field.name))
+ -- If there is a variable storing the length
+ -- and the input table does not set it, do magic
+ if field.array_size and not src[field.array_size] then
+ local size_field = fields_info[field.array_size]
+ if size_field then
+ dst[field.array_size] = vpp.t_c2lua[size_field.c_type](size_field.c_type, len)
+ end
+ end
+ end
+ end
+ -- print("Full message:\n" .. vpp.hex_dump(ffi.string(ffi.cast('void *', req_store_cache), 64)))
+ end
+ return (ffi.sizeof(dst[0])+additional_len)
+ end
+
+ vpp.t_c2lua["__MSG__"] = function(c_type, src_ptr, src_len)
+ local out = {}
+ local reply_typed_ptr = ffi.cast(c_type .. " *", src_ptr)
+ local field_desc = vpp.c_type_to_fields[c_type]
+ if src_len then
+ for i = 0,src_len-1 do
+ out[i+1] = vpp.t_c2lua[c_type](c_type, src_ptr[i])
+ end
+ return out
+ end
+
+ for k, v in pairs(field_desc) do
+ local v_c2lua = vpp.t_c2lua[v.c_type]
+ if v_c2lua then
+ local len = v.array
+ -- print(dump(v))
+ if len then
+ local len_field_name = k .. "_length"
+ local len_field = field_desc[len_field_name]
+ if (len_field) then
+ local real_len = vpp.t_c2lua[len_field.c_type](len_field.c_type, reply_typed_ptr[len_field_name])
+ out[k] = v_c2lua(v.c_type, reply_typed_ptr[k], real_len)
+ elseif len == 0 then
+ -- check if len = 0, then must be a field which contains the size
+ len_field = field_desc[v.array_size]
+ local real_len = vpp.t_c2lua[len_field.c_type](len_field.c_type, reply_typed_ptr[v.array_size])
+ -- print("REAL length: " .. vpp.dump(v) .. " : " .. tostring(real_len))
+ out[k] = v_c2lua(v.c_type, reply_typed_ptr[k], real_len)
+ else
+ -- alas, just stuff the entire array
+ out[k] = v_c2lua(v.c_type, reply_typed_ptr[k], len)
+ end
+ else
+ out[k] = v_c2lua(v.c_type, reply_typed_ptr[k])
+ end
+ else
+ out[k] = "<no accessor function for type " .. tostring(v.c_type) .. ">"
+ end
+ -- print(k, out[k])
+ end
+ return out
+ end
+
+ return vpp
+end
+
+function vpp.resolve_message_number(msgname)
+ local name = msgname .. "_" .. vpp.msg_name_to_crc[msgname]
+ local idx = vpp.vac.vac_get_msg_index(vpp.c_str(name))
+ if vpp.debug_dump then
+ print("Index for " .. tostring(name) .. " is " .. tostring(idx))
+ end
+ vpp.msg_name_to_number[msgname] = idx
+ vpp.msg_number_to_name[idx] = msgname
+ vpp.msg_number_to_type[idx] = "vl_api_" .. msgname .. "_t"
+ vpp.msg_number_to_pointer_type[idx] = vpp.msg_number_to_type[idx] .. " *"
+ ffi.cdef("\n\n enum { vl_msg_" .. msgname .. " = " .. idx .. " };\n\n")
+end
+
+function vpp.connect(vpp, client_name)
+ local name = "lua_client"
+ if client_name then
+ name = client_name
+ end
+ local ret = vpp.vac.vac_connect(vpp.c_str(client_name), nil, nil)
+ if tonumber(ret) == 0 then
+ vpp.is_connected = true
+ end
+ for k, v in pairs(vpp.msg_name_to_number) do
+ vpp.resolve_message_number(k)
+ end
+ end
+
+function vpp.disconnect(vpp)
+ vpp.vac.vac_disconnect()
+ end
+
+function vpp.json_api(vpp, path, plugin_name)
+ -- print("Consuming the VPP api from "..path)
+ local ffii = {}
+ local f = io.open(path, "r")
+ if not f then
+ print("Could not open " .. path)
+ return nil
+ end
+ local data = f:read("*all")
+ local json = json.parse(data)
+ if not (json.types or json.messages) then
+ print("Can not parse " .. path)
+ return nil
+ end
+
+ local all_types = {}
+
+ for i, v in ipairs(json.types) do
+ table.insert(all_types, { typeonly = 1, desc = v })
+ end
+ for i, v in ipairs(json.messages) do
+ table.insert(all_types, { typeonly = 0, desc = v })
+ end
+ for i, v in ipairs(all_types) do
+ local typeonly = v.typeonly
+ local name = v.desc[1]
+ local c_type = "vl_api_" .. name .. "_t"
+
+ local fields = {}
+ -- vpp.msg_name_to_fields[name] = fields
+ -- print("CTYPE " .. c_type)
+ vpp.c_type_to_fields[c_type] = fields
+ vpp.t_lua2c[c_type] = vpp.t_lua2c["__MSG__"]
+ vpp.t_c2lua[c_type] = vpp.t_c2lua["__MSG__"]
+
+ local cdef = { "\n\n#pragma pack(1)\ntypedef struct _vl_api_", name, " {\n" }
+ for ii, vv in ipairs(v.desc) do
+ if type(vv) == "table" then
+ if vv.crc then
+ vpp.msg_name_to_crc[name] = string.sub(vv.crc, 3) -- strip the leading 0x
+ else
+ local fieldtype = vv[1]
+ local fieldname = vv[2]
+ local fieldcount = vv[3]
+ local fieldcountvar = vv[4]
+ local fieldrec = { name = fieldname, c_type = fieldtype, array = fieldcount, array_size = fieldcountvar }
+ if fieldcount then
+ table.insert(cdef, " " .. fieldtype .. " " .. fieldname .. "[" .. fieldcount .. "];\n")
+ if fieldtype == "u8" then
+ -- any array of bytes is treated as a string
+ elseif vpp.t_lua2c[fieldtype] then
+ -- print("Array of " .. fieldtype .. " is ok!")
+ else
+ print("Unknown array type: ", name, " : " , fieldname, " : ", fieldtype, ":", fieldcount, ":", fieldcountvar)
+ end
+ else
+ table.insert(cdef, " " .. fieldtype .. " " .. fieldname .. ";\n")
+ end
+ fields[fieldname] = fieldrec
+ end
+ end
+ end
+
+ table.insert(cdef, "} vl_api_" .. name .. "_t;")
+ table.insert(ffii, table.concat(cdef))
+
+ if typeonly == 0 then
+ -- we will want to resolve this later
+ if vpp.debug_dump then
+ print("Remember to resolve " .. name)
+ end
+ vpp.msg_name_to_number[name] = -1
+ if vpp.is_connected then
+ vpp.resolve_message_number(name)
+ end
+ end
+
+ end
+ local cdef_full = table.concat(ffii)
+ ffi.cdef(cdef_full)
+end
+
+function vpp.consume_api(vpp, path, plugin_name)
+ -- print("Consuming the VPP api from "..path)
+ local ffii = {}
+ local f = io.open(path, "r")
+ if not f then
+ print("Could not open " .. path)
+ return nil
+ end
+ local data = f:read("*all")
+ -- Remove all C comments
+ data = data:gsub("/%*.-%*/", "")
+ if vpp.is_connected and not plugin_name then
+ print(path .. ": must specify plugin name!")
+ return
+ end
+ if plugin_name then
+ vpp.plugin_version[plugin_name] = vpp.crc_version_string(data)
+ local full_plugin_name = plugin_name .. "_" .. vpp.plugin_version[plugin_name]
+ local reply = vpp:api_call("get_first_msg_id", { name = full_plugin_name } )
+ vpp.next_msg_num = tonumber(reply[1].first_msg_id)
+ print("Plugin " .. full_plugin_name .. " first message is " .. tostring(vpp.next_msg_num))
+ end
+ -- print ("data len: ", #data)
+ data = data:gsub("\n(.-)(%S+)%s*{([^}]*)}", function (preamble, name, members)
+ local _, typeonly = preamble:gsub("typeonly", "")
+ local maybe_msg_id_field = { [0] = "u16 _vl_msg_id;", "" }
+ local onedef = "\n\n#pragma pack(1)\ntypedef struct _vl_api_"..name.. " {\n" ..
+ -- " u16 _vl_msg_id;" ..
+ maybe_msg_id_field[typeonly] ..
+ members:gsub("%[[a-zA-Z_]+]", "[0]") ..
+ "} vl_api_" .. name .. "_t;"
+
+ local c_type = "vl_api_" .. name .. "_t"
+
+ local fields = {}
+ -- vpp.msg_name_to_fields[name] = fields
+ -- print("CTYPE " .. c_type)
+ vpp.c_type_to_fields[c_type] = fields
+ vpp.t_lua2c[c_type] = vpp.t_lua2c["__MSG__"]
+ vpp.t_c2lua[c_type] = vpp.t_c2lua["__MSG__"]
+ local mirec = { name = "_vl_msg_id", c_type = "u16", array = nil, array_size = nil }
+ if typeonly == 0 then
+ fields[mirec.name] = mirec
+ end
+
+ -- populate the field reflection table for the message
+ -- sets the various type information as well as the accessors for lua<->C conversion
+ members:gsub("(%S+)%s+(%S+);", function (fieldtype, fieldname)
+ local fieldcount = nil
+ local fieldcountvar = nil
+ -- data = data:gsub("%[[a-zA-Z_]+]", "[0]")
+ fieldname = fieldname:gsub("(%b[])", function(cnt)
+ fieldcount = tonumber(cnt:sub(2, -2));
+ if not fieldcount then
+ fieldcount = 0
+ fieldcountvar = cnt:sub(2, -2)
+ end
+ return ""
+ end)
+ local fieldrec = { name = fieldname, c_type = fieldtype, array = fieldcount, array_size = fieldcountvar }
+ if fieldcount then
+ if fieldtype == "u8" then
+ -- any array of bytes is treated as a string
+ elseif vpp.t_lua2c[fieldtype] then
+ -- print("Array of " .. fieldtype .. " is ok!")
+ else
+ print("Unknown array type: ", name, " : " , fieldname, " : ", fieldtype, ":", fieldcount, ":", fieldcountvar)
+ end
+ end
+ fields[fieldname] = fieldrec
+ end)
+
+ -- print(dump(fields))
+
+ if typeonly == 0 then
+ local this_message_number = vpp.next_msg_num
+ vpp.next_msg_num = vpp.next_msg_num + 1
+ vpp.msg_name_to_number[name] = this_message_number
+ vpp.msg_number_to_name[this_message_number] = name
+ vpp.msg_number_to_type[this_message_number] = "vl_api_" .. name .. "_t"
+ vpp.msg_number_to_pointer_type[this_message_number] = vpp.msg_number_to_type[this_message_number] .. " *"
+ onedef = onedef .. "\n\n enum { vl_msg_" .. name .. " = " .. this_message_number .. " };\n\n"
+ end
+ table.insert(ffii, onedef);
+ return "";
+ end)
+ local cdef = table.concat(ffii)
+ -- print(cdef)
+ ffi.cdef(cdef)
+ end
+
+
+function vpp.lua2c(vpp, c_type, src, dst_c_ptr)
+ -- returns the number of bytes written to memory pointed by dst
+ local lua2c = vpp.t_lua2c[c_type]
+ if lua2c then
+ return(lua2c(c_type, src, dst_c_ptr))
+ else
+ print("vpp.lua2c: do not know how to store type " .. tostring(c_type))
+ local x = "a" .. nil
+ return 0
+ end
+end
+
+function vpp.c2lua(vpp, c_type, src_ptr, src_len)
+ -- returns the lua data structure
+ local c2lua = vpp.t_c2lua[c_type]
+ if c2lua then
+ return(c2lua(c_type, src_ptr, src_len))
+ else
+ print("vpp.c2lua: do not know how to load type " .. c_type)
+ return nil
+ end
+end
+
+local req_store_cache = ffi.new("vl_api_opaque_message_t[1]")
+
+function vpp.api_write(vpp, api_name, req_table)
+ local msg_num = vpp.msg_name_to_number[api_name]
+ if not msg_num then
+ print ("API call "..api_name.." is not known")
+ return nil
+ end
+
+ if not req_table then
+ req_table = {}
+ end
+ req_table._vl_msg_id = msg_num
+
+ local packed_len = vpp:lua2c(vpp.msg_number_to_type[msg_num], req_table, req_store_cache)
+ if vpp.debug_dump then
+ print("Write Message length: " .. tostring(packed_len) .. "\n" .. vpp.hex_dump(ffi.string(ffi.cast('void *', req_store_cache), packed_len)))
+ end
+
+ res = vpp.vac.vac_write(ffi.cast('void *', req_store_cache), packed_len)
+ return res
+ end
+
+local rep_store_cache = ffi.new("vl_api_opaque_message_t *[1]")
+local rep_len_cache = ffi.new("int[1]")
+
+function vpp.api_read(vpp)
+ local rep_type = "vl_api_opaque_message_t"
+ local rep = rep_store_cache
+ local replen = rep_len_cache
+ res = vpp.vac.vac_read(ffi.cast("void *", rep), replen)
+ if vpp.debug_dump then
+ print("Read Message length: " .. tostring(replen[0]) .. "\n" .. vpp.hex_dump(ffi.string(ffi.cast('void *', rep[0]), replen[0])))
+ end
+
+ local reply_msg_num = ffi.C.ntohs(rep[0]._vl_msg_id)
+ local reply_msg_name = vpp.msg_number_to_name[reply_msg_num]
+
+ local reply_typed_ptr = ffi.cast(vpp.msg_number_to_pointer_type[reply_msg_num], rep[0])
+ local out = vpp:c2lua(vpp.msg_number_to_type[reply_msg_num], rep[0], nil, replen[0])
+ if type(out) == "table" then
+ out["luaapi_message_name"] = reply_msg_name
+ end
+
+ vpp.vac.vac_free(ffi.cast('void *',rep[0]))
+
+ return reply_msg_name, out
+ end
+
+function vpp.api_call(vpp, api_name, req_table, options_in)
+ local msg_num = vpp.msg_name_to_number[api_name]
+ local end_message_name = api_name .."_reply"
+ local replies = {}
+ local cstruct = ""
+ local options = options_in or {}
+ if msg_num then
+ if vpp.debug_dump then
+ print("Message #" .. tostring(msg_num) .. " for name " .. tostring(api_name))
+ end
+ vpp:api_write(api_name, req_table)
+ if not vpp.msg_name_to_number[end_message_name] or options.force_ping then
+ end_message_name = "control_ping_reply"
+ vpp:api_write("control_ping")
+ end
+ repeat
+ reply_message_name, reply = vpp:api_read()
+ if reply and not reply.context then
+ -- there may be async events inbetween
+ table.insert(vpp.events, reply)
+ else
+ if reply_message_name ~= "control_ping_reply" then
+ -- do not insert the control ping encapsulation
+ table.insert(replies, reply)
+ end
+ end
+ -- print(reply)
+ until reply_message_name == end_message_name
+ else
+ print(api_name .. " is an unknown API call")
+ return nil
+ end
+ return replies
+ end
+
+return vpp
diff --git a/src/vpp-api/python/LICENSE.txt b/src/vpp-api/python/LICENSE.txt
new file mode 100644
index 00000000..8f71f43f
--- /dev/null
+++ b/src/vpp-api/python/LICENSE.txt
@@ -0,0 +1,202 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/src/vpp-api/python/Makefile.am b/src/vpp-api/python/Makefile.am
new file mode 100644
index 00000000..e6c064e1
--- /dev/null
+++ b/src/vpp-api/python/Makefile.am
@@ -0,0 +1,23 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+install-exec-local:
+ (cd $(srcdir) ; $(PYTHON) $(srcdir)/setup.py build \
+ --build-base $(shell readlink -f $(builddir))/build \
+ install \
+ --root / \
+ --prefix $(DESTDIR)$(prefix) \
+ --single-version-externally-managed \
+ --verbose \
+ bdist_egg \
+ --dist-dir=$(DESTDIR)$(prefix))
diff --git a/src/vpp-api/python/README.rst b/src/vpp-api/python/README.rst
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/vpp-api/python/README.rst
diff --git a/src/vpp-api/python/setup.cfg b/src/vpp-api/python/setup.cfg
new file mode 100644
index 00000000..79bc6784
--- /dev/null
+++ b/src/vpp-api/python/setup.cfg
@@ -0,0 +1,5 @@
+[bdist_wheel]
+# This flag says that the code is written to work on both Python 2 and Python
+# 3. If at all possible, it is good practice to do this. If you cannot, you
+# will need to generate wheels for each Python version that you support.
+universal=1
diff --git a/src/vpp-api/python/setup.py b/src/vpp-api/python/setup.py
new file mode 100644
index 00000000..626dddee
--- /dev/null
+++ b/src/vpp-api/python/setup.py
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+ from setuptools import setup
+except ImportError:
+ from distutils.core import setup
+
+setup (name = 'vpp_papi',
+ version = '1.4',
+ description = 'VPP Python binding',
+ author = 'Ole Troan',
+ author_email = 'ot@cisco.com',
+ url = 'https://wiki.fd.io/view/VPP/Python_API',
+ python_requires='>=2.7, >=3.3',
+ license = 'Apache-2.0',
+ test_suite = 'tests',
+ install_requires=['cffi >= 1.10'],
+ py_modules=['vpp_papi'],
+ long_description = '''VPP Python language binding.''',
+ zip_safe = True,
+)
diff --git a/src/vpp-api/python/tests/test_cli.py b/src/vpp-api/python/tests/test_cli.py
new file mode 100755
index 00000000..66fb6943
--- /dev/null
+++ b/src/vpp-api/python/tests/test_cli.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import unittest, sys, time, threading, struct
+import test_base
+import vpp_papi
+from ipaddress import *
+
+import glob, subprocess
+class TestPAPI(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ #
+ # Start main VPP process
+ cls.vpp_bin = glob.glob(test_base.scriptdir+'/../../../build-root/install-vpp*-native/vpp/bin/vpp')[0]
+ print("VPP BIN:", cls.vpp_bin)
+ cls.vpp = subprocess.Popen([cls.vpp_bin, "unix", "nodaemon"], stderr=subprocess.PIPE)
+ print('Started VPP')
+ # For some reason unless we let VPP start up the API cannot connect.
+ time.sleep(0.3)
+ @classmethod
+ def tearDownClass(cls):
+ cls.vpp.terminate()
+
+ def setUp(self):
+ print("Connecting API")
+ r = vpp_papi.connect("test_papi")
+ self.assertEqual(r, 0)
+
+ def tearDown(self):
+ r = vpp_papi.disconnect()
+ self.assertEqual(r, 0)
+
+ #
+ # The tests themselves
+ #
+
+ #
+ # Basic request / reply
+ #
+ def test_cli_request(self):
+ print(vpp_papi.cli_exec('show version verbose'))
+ #t = vpp_papi.cli_inband_request(len(cmd), cmd)
+ #print('T:',t)
+ #reply = t.reply[0].decode().rstrip('\x00')
+ #print(reply)
+ #program = t.program.decode().rstrip('\x00')
+ #self.assertEqual('vpe', program)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/vpp-api/python/tests/test_modules.py b/src/vpp-api/python/tests/test_modules.py
new file mode 100755
index 00000000..fdcd092c
--- /dev/null
+++ b/src/vpp-api/python/tests/test_modules.py
@@ -0,0 +1,18 @@
+from __future__ import print_function
+import unittest
+import vpp_papi
+import pot, snat
+print('Plugins:')
+vpp_papi.plugin_show()
+r = vpp_papi.connect('ole')
+
+r = vpp_papi.show_version()
+print('R:', r)
+
+r = snat.snat_interface_add_del_feature(1, 1, 1)
+print('R:', r)
+
+list_name = 'foobar'
+r = pot.pot_profile_add(0, 1, 123, 123, 0, 12, 0, 23, len(list_name), list_name)
+print('R:', r)
+vpp_papi.disconnect()
diff --git a/src/vpp-api/python/tests/test_papi.py b/src/vpp-api/python/tests/test_papi.py
new file mode 100755
index 00000000..8cbbfc59
--- /dev/null
+++ b/src/vpp-api/python/tests/test_papi.py
@@ -0,0 +1,119 @@
+from __future__ import print_function
+import unittest, sys, time, threading, struct, logging, os
+import vpp_papi
+from ipaddress import *
+scriptdir = os.path.dirname(os.path.realpath(__file__))
+papi_event = threading.Event()
+print(vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS)
+def papi_event_handler(result):
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS:
+ return
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_INTERFACE_COUNTERS:
+ print('Interface counters', result)
+ return
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_IP6_FIB_COUNTERS:
+ print('IPv6 FIB counters', result)
+ papi_event.set()
+ return
+
+ print('Unknown message id:', result.vl_msg_id)
+
+import glob, subprocess
+class TestPAPI(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ #
+ # Start main VPP process
+ cls.vpp_bin = glob.glob(scriptdir+'/../../../build-root/install-vpp*-native/vpp/bin/vpp')[0]
+ print("VPP BIN:", cls.vpp_bin)
+ cls.vpp = subprocess.Popen([cls.vpp_bin, "unix", "nodaemon"], stderr=subprocess.PIPE)
+ print('Started VPP')
+ # For some reason unless we let VPP start up the API cannot connect.
+ time.sleep(0.3)
+ @classmethod
+ def tearDownClass(cls):
+ cls.vpp.terminate()
+
+ def setUp(self):
+ print("Connecting API")
+ r = vpp_papi.connect("test_papi")
+ self.assertEqual(r, 0)
+
+ def tearDown(self):
+ r = vpp_papi.disconnect()
+ self.assertEqual(r, 0)
+
+ #
+ # The tests themselves
+ #
+
+ #
+ # Basic request / reply
+ #
+ def test_show_version(self):
+ t = vpp_papi.show_version()
+ print('T', t);
+ program = t.program.decode().rstrip('\x00')
+ self.assertEqual('vpe', program)
+
+ #
+ # Details / Dump
+ #
+ def test_details_dump(self):
+ t = vpp_papi.sw_interface_dump(0, b'')
+ print('Dump/details T', t)
+
+ #
+ # Arrays
+ #
+ def test_arrays(self):
+ t = vpp_papi.vnet_get_summary_stats()
+ print('Summary stats', t)
+ print('Packets:', t.total_pkts[0])
+ print('Packets:', t.total_pkts[1])
+ #
+ # Variable sized arrays and counters
+ #
+ #@unittest.skip("stats")
+ def test_want_stats(self):
+ pid = 123
+ vpp_papi.register_event_callback(papi_event_handler)
+ papi_event.clear()
+
+ # Need to configure IPv6 to get som IPv6 FIB stats
+ t = vpp_papi.create_loopback('')
+ print(t)
+ self.assertEqual(t.retval, 0)
+
+ ifindex = t.sw_if_index
+ addr = str(IPv6Address(u'1::1').packed)
+ t = vpp_papi.sw_interface_add_del_address(ifindex, 1, 1, 0, 16, addr)
+ print(t)
+ self.assertEqual(t.retval, 0)
+
+ # Check if interface is up
+ # XXX: Add new API to query interface state based on ifindex, instead of dump all.
+ t = vpp_papi.sw_interface_set_flags(ifindex, 1, 1, 0)
+ self.assertEqual(t.retval, 0)
+
+ t = vpp_papi.want_stats(True, pid)
+
+ print (t)
+
+ #
+ # Wait for some stats
+ #
+ self.assertEqual(papi_event.wait(15), True)
+ t = vpp_papi.want_stats(False, pid)
+ print (t)
+
+
+ #
+ # Plugins?
+ #
+
+if __name__ == '__main__':
+ #logging.basicConfig(level=logging.DEBUG)
+ unittest.main()
+def test_papi():
+ print('test')
diff --git a/src/vpp-api/python/tests/test_version.py b/src/vpp-api/python/tests/test_version.py
new file mode 100755
index 00000000..de39cc24
--- /dev/null
+++ b/src/vpp-api/python/tests/test_version.py
@@ -0,0 +1,35 @@
+from __future__ import print_function
+import unittest, sys, time, threading, struct
+
+import vpp_papi
+from ipaddress import *
+import glob, subprocess
+class TestPAPI(unittest.TestCase):
+ def setUp(self):
+ print("Connecting API")
+ r = vpp_papi.connect("test_papi")
+ self.assertEqual(r, 0)
+
+ def tearDown(self):
+ r = vpp_papi.disconnect()
+ self.assertEqual(r, 0)
+
+ #
+ # The tests themselves
+ #
+
+ #
+ # Basic request / reply
+ #
+ def test_show_version(self):
+ print(vpp_papi.show_version())
+
+ #
+ # Details / Dump
+ #
+ def test_details_dump(self):
+ t = vpp_papi.sw_interface_dump(0, b'')
+ print('Dump/details T', t)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/vpp-api/python/tests/test_vpp_papi2.py b/src/vpp-api/python/tests/test_vpp_papi2.py
new file mode 100755
index 00000000..f45f791e
--- /dev/null
+++ b/src/vpp-api/python/tests/test_vpp_papi2.py
@@ -0,0 +1,487 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import unittest, sys, threading, struct, logging, os
+from vpp_papi import VPP
+from ipaddress import *
+import glob, json
+
+papi_event = threading.Event()
+import glob
+
+import fnmatch
+import os
+
+jsonfiles = []
+for root, dirnames, filenames in os.walk('../../../build-root/'):
+ if root.find('install-') == -1: continue
+ for filename in fnmatch.filter(filenames, '*.api.json'):
+ jsonfiles.append(os.path.join(root, filename))
+
+class TestPAPI(unittest.TestCase):
+ show_version_msg = '''["show_version",
+ ["u16", "_vl_msg_id"],
+ ["u32", "client_index"],
+ ["u32", "context"],
+ {"crc" : "0xf18f9480"}
+ ]'''
+
+ ip_address_details_msg = '''["ip_address_details",
+ ["u16", "_vl_msg_id"],
+ ["u32", "client_index"],
+ ["u32", "context"],
+ ["u8", "ip", 16],
+ ["u8", "prefix_length"],
+ {"crc" : "0x87d522a1"}
+ ]'''
+
+ cli_inband_msg = '''["cli_inband",
+ ["u16", "_vl_msg_id"],
+ ["u32", "client_index"],
+ ["u32", "context"],
+ ["u32", "length"],
+ ["u8", "cmd", 0, "length"],
+ {"crc" : "0x22345937"}
+ ]'''
+
+ def test_adding_new_message_object(self):
+ p = json.loads(TestPAPI.show_version_msg)
+ msglist = VPP(testmode=json)
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ # Verify that message can be retrieved
+ self.assertTrue(msglist['show_version'])
+ self.assertFalse(msglist['foobar'])
+
+ # Test duplicate
+ self.assertRaises(ValueError, msglist.add_message, p[0], p[1:])
+
+ # Look at return tuple
+ self.assertTrue(msglist.ret_tup('show_version'))
+
+ def test_adding_new_message_object_with_array(self):
+ p = json.loads(TestPAPI.ip_address_details_msg)
+ msglist = VPP(testmode=True)
+ msglist.add_message(p[0], p[1:])
+
+ self.assertTrue(msglist['ip_address_details'])
+
+ def test_message_to_bytes(self):
+ msglist = VPP(testmode=True)
+ p = json.loads(TestPAPI.show_version_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ # Give me a byte string for given message and given arguments
+
+ b = msglist.encode(msgdef, {'_vl_msg_id' : 50, 'context' : 123 })
+ self.assertEqual(10, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(rv._0, 50)
+ self.assertEqual(rv.context, 123)
+
+
+ p = json.loads(TestPAPI.ip_address_details_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ # Give me a byte string for given message and given arguments
+ b = msglist.encode(msgdef, {'_vl_msg_id' : 50, 'context' : 123,
+ 'ip' : b'\xf0\xf1\xf2',
+ 'prefix_length' : 12})
+ self.assertEqual(27, len(b))
+ rv = msglist.decode(msgdef, b)
+
+ self.assertEqual(rv.context, 123)
+ self.assertEqual(rv.ip, b'\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+ self.assertEqual(rv.prefix_length, 12)
+
+ p = json.loads(TestPAPI.cli_inband_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ # Give me a byte string for given message and given arguments
+ b = msglist.encode(msgdef, { '_vl_msg_id' : 50, 'context' : 123,
+ 'length' : 20, 'cmd' : 'show version verbose'})
+ self.assertEqual(34, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(rv._0, 50)
+ self.assertEqual(rv.context, 123)
+ self.assertEqual(rv.cmd.decode('ascii'), 'show version verbose')
+
+ variable_array_16_msg = '''["variable_array_16",
+ ["u32", "length"],
+ ["u16", "list", 0, "length"]
+ ]'''
+
+ p = json.loads(variable_array_16_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ # Give me a byte string for given message and given arguments
+ b = msglist.encode(msgdef, { 'list' : [1, 2], 'length' :2})
+ self.assertEqual(8, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+ self.assertEqual([1,2], rv.list)
+
+ def test_add_new_types(self):
+ counter_type = '''["ip4_fib_counter",
+ ["u32", "address"],
+ ["u8", "address_length"],
+ ["u64", "packets"],
+ ["u64", "bytes"],
+ {"crc" : "0xb2739495"}
+ ]'''
+
+ with_type_msg = '''["with_type_msg",
+ ["u32", "length"],
+ ["u16", "list", 0, "length"],
+ ["vl_api_ip4_fib_counter_t", "counter"]
+ ]'''
+
+ # Add new type
+ msglist = VPP(testmode=True)
+ p = json.loads(counter_type)
+ msglist.add_type(p[0], p[1:])
+ p = json.loads(with_type_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length' : 2, 'list' : [1,2],
+ 'counter' : { 'address' : 4, 'address_length' : 12,
+ 'packets': 1235, 'bytes' : 5678}})
+ self.assertEqual(29, len(b)) # feil
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+ self.assertEqual(5678, rv.counter.bytes)
+
+ def test_add_new_compound_type_with_array(self):
+ counter_type = '''["ip4_fib_counter",
+ ["u32", "address"],
+ ["u8", "address_length"],
+ ["u64", "packets"],
+ ["u64", "bytes"],
+ {"crc" : "0xb2739495"}
+ ]'''
+
+ with_type_msg = '''["with_type_msg",
+ ["u32", "length"],
+ ["u16", "list", 0, "length"],
+ ["vl_api_ip4_fib_counter_t", "counter", 2]
+
+ ]'''
+
+ # Add new type
+ msglist = VPP(testmode=True)
+ p = json.loads(counter_type)
+ msglist.add_type(p[0], p[1:])
+ p = json.loads(with_type_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length' : 2, 'list' : [1,2],
+ 'counter' : [{ 'address' : 4, 'address_length' : 12,
+ 'packets': 1235, 'bytes' : 5678},
+ { 'address' : 111, 'address_length' : 222,
+ 'packets': 333, 'bytes' : 444}]})
+ self.assertEqual(50, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual([1,2], rv.list)
+ self.assertEqual(1235, rv.counter[0].packets)
+
+ with_type_variable_msg = '''["with_type_variable_msg",
+ ["u32", "length"],
+ ["vl_api_ip4_fib_counter_t", "counter", 0, "length"]
+
+ ]'''
+
+ p = json.loads(with_type_variable_msg)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length' : 2,
+ 'counter' : [{ 'address' : 4, 'address_length' : 12,
+ 'packets': 1235, 'bytes' : 5678},
+ { 'address' : 111, 'address_length' : 222,
+ 'packets': 333, 'bytes' : 444}]})
+ self.assertEqual(46, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+ self.assertEqual(1235, rv.counter[0].packets)
+ self.assertEqual(333, rv.counter[1].packets)
+
+ def test_simple_array(self):
+ msglist = VPP(testmode=True)
+
+ simple_byte_array = '''["simple_byte_array",
+ ["u32", "length"],
+ ["u8", "namecommand", 64]
+
+ ]'''
+ p = json.loads(simple_byte_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length': 2, 'namecommand': 'foobar'})
+ self.assertEqual(68, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+
+ simple_array = '''["simple_array",
+ ["u32", "length"],
+ ["u32", "list", 2]
+
+ ]'''
+ p = json.loads(simple_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length': 2, 'list': [1,2]})
+ self.assertEqual(12, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+ self.assertEqual([1,2], rv.list)
+
+ simple_variable_array = '''["simple_variable_array",
+ ["u32", "length"],
+ ["u32", "list", 0, "length"]
+
+ ]'''
+ p = json.loads(simple_variable_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length':2, 'list': [1,2]})
+ self.assertEqual(12, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(2, rv.length)
+ self.assertEqual([1,2], rv.list)
+
+ simple_variable_byte_array = '''["simple_variable_byte_array",
+ ["u32", "length"],
+ ["u8", "list", 0, "length"]
+ ]'''
+ p = json.loads(simple_variable_byte_array)
+ msgdef =msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length': 6, 'list' : 'foobar'})
+ self.assertEqual(10, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(6, rv.length)
+ self.assertEqual('foobar', rv.list)
+
+ def test_old_vla_array(self):
+ msglist = VPP(testmode = True)
+
+ # VLA
+ vla_byte_array = '''["vla_byte_array",
+ ["u32", "foobar"],
+ ["u32", "list", 2],
+ ["u32", "propercount"],
+ ["u8", "propermask", 0, "propercount"],
+ ["u8", "oldmask", 0],
+ {"crc" : "0xb2739495"}
+ ]'''
+ p = json.loads(vla_byte_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'list' : [123, 456], 'oldmask': b'foobar',
+ 'propercount' : 2,
+ 'propermask' : [8,9]})
+ self.assertEqual(24, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(b'foobar', rv.oldmask)
+
+ def test_old_vla_array_not_last_member(self):
+ msglist = VPP(testmode = True)
+
+ # VLA
+ vla_byte_array = '''["vla_byte_array",
+ ["u8", "oldmask", 0],
+ ["u32", "foobar"],
+ {"crc" : "0xb2739495"}
+ ]'''
+ p = json.loads(vla_byte_array)
+ self.assertRaises(ValueError, msglist.add_message, p[0], p[1:])
+
+ def test_old_vla_array_u32(self):
+ msglist = VPP(testmode = True)
+
+ # VLA
+ vla_byte_array = '''["vla_byte_array",
+ ["u32", "foobar"],
+ ["u32", "oldmask", 0],
+ {"crc" : "0xb2739495"}
+ ]'''
+ p = json.loads(vla_byte_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'foobar' : 123, 'oldmask': [123, 456, 789]})
+ self.assertEqual(16, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual([123, 456, 789], rv.oldmask)
+
+ def test_old_vla_array_compound(self):
+ msglist = VPP(testmode = True)
+
+ # VLA
+ counter_type = '''["ip4_fib_counter",
+ ["u32", "address"],
+ ["u8", "address_length"],
+ ["u64", "packets"],
+ ["u64", "bytes"],
+ {"crc" : "0xb2739495"}
+ ]'''
+
+ vla_byte_array = '''["vla_byte_array",
+ ["vl_api_ip4_fib_counter_t", "counter", 0],
+ {"crc" : "0xb2739495"}
+ ]'''
+
+ p = json.loads(counter_type)
+ msglist.add_type(p[0], p[1:])
+
+ p = json.loads(vla_byte_array)
+ with self.assertRaises(NotImplementedError):
+ msgdef = msglist.add_message(p[0], p[1:])
+
+ def test_array_count_not_previous(self):
+ msglist = VPP(testmode = True)
+
+ # VLA
+ vla_byte_array = '''["vla_byte_array",
+ ["u32", "count"],
+ ["u32", "filler"],
+ ["u32", "lst", 0, "count"],
+ {"crc" : "0xb2739495"}
+ ]'''
+
+ p = json.loads(vla_byte_array)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'count': 3, 'lst': [1,2,3], 'filler' : 1 })
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(rv.lst, [1,2,3])
+
+ def test_argument_name(self):
+ msglist = VPP(testmode=True)
+
+
+ simple_name = '''["simple_name",
+ ["u32", "length"],
+ ["u8", "name"]
+ ]'''
+ p = json.loads(simple_name)
+ msgdef = msglist.add_message(p[0], p[1:])
+ b = msglist.encode(msgdef, {'length': 6, 'name': 1})
+ self.assertEqual(5, len(b))
+ rv = msglist.decode(msgdef, b)
+ self.assertEqual(6, rv.length)
+ self.assertEqual(1, rv.name)
+
+class TestConnectedPAPI(unittest.TestCase):
+ def test_request_reply_function(self):
+ vpp = VPP(jsonfiles)
+
+ vpp.connect('test_vpp_papi2')
+
+ rv = vpp.show_version()
+ self.assertEqual(0, rv.retval)
+ self.assertEqual('vpe', rv.program.decode().rstrip('\0x00'))
+ vpp.disconnect()
+
+
+ def test_dump_details_function(self):
+ vpp = VPP(jsonfiles)
+ vpp.connect('test_vpp_papi3')
+
+ rv = vpp.sw_interface_dump()
+ #self.assertEqual(0, rv.retval)
+ print('RV', rv)
+ vpp.disconnect()
+
+ def test_vla(self):
+ vpp = VPP(jsonfiles)
+
+ vpp.connect('test_vpp_papi3')
+
+ cmd = 'show version verbose'
+ rv = vpp.cli_inband(length=len(cmd), cmd=cmd)
+ self.assertEqual(0, rv.retval)
+ print('RV', rv.reply)
+
+ cmd = 'show vlib graph'
+ rv = vpp.cli_inband(length=len(cmd), cmd=cmd)
+ self.assertEqual(0, rv.retval)
+ print('RV', rv.reply)
+ vpp.disconnect()
+
+ def test_events(self):
+ vpp = VPP(jsonfiles)
+
+ vpp.connect('test_vpp_papi3')
+
+ vpp.register_event_callback(event_handler)
+
+ rv = vpp.want_interface_events(enable_disable = True)
+ self.assertEqual(0, rv.retval)
+ print('RV', rv)
+
+ rv = vpp.create_loopback()
+ print('RV', rv)
+ self.assertEqual(0, rv.retval)
+
+ rv = vpp.sw_interface_set_flags(sw_if_index = 1, admin_up_down = 1)
+ print('RV', rv)
+ self.assertEqual(0, rv.retval)
+ rv = vpp.sw_interface_set_flags(sw_if_index = 1, admin_up_down = 0)
+ print('RV', rv)
+ self.assertEqual(0, rv.retval)
+ self.assertEqual(papi_event.wait(10), True)
+
+ vpp.disconnect()
+
+def event_handler(msgname, result):
+ print('IN EVENT HANDLER:', msgname, result)
+ papi_event.set()
+
+class TestACL(unittest.TestCase):
+ def test_acl_create(self):
+ vpp = VPP(jsonfiles)
+
+ vpp.connect('acl-test')
+
+ rv = vpp.acl_plugin_get_version()
+ print('RV', rv)
+ self.assertEqual(rv.major, 1)
+ self.assertEqual(rv.minor, 1)
+
+ rv = vpp.acl_add_replace(acl_index = 0xFFFFFFFF,
+ r = [{
+ "is_permit" : 1,
+ "is_ipv6" : 0,
+ "proto" : 6,
+ "srcport_or_icmptype_first" : 80,
+ }],
+ count = 1)
+ print ('RV', rv)
+ rv = vpp.acl_add_replace(acl_index = 0xFFFFFFFF,
+ r = [{
+ "is_permit" : 1,
+ "is_ipv6" : 0,
+ "proto" : 6,
+ "srcport_or_icmptype_first" : 81,
+ }],
+ count = 1)
+ self.assertEqual(rv.retval, 0)
+ print ('RV', rv)
+ ai = rv.acl_index
+ rv = vpp.acl_dump()
+ print ('RV', rv)
+
+ #rv = vpp.acl_del(acl_index = ai)
+ #self.assertEqual(rv.retval, 0)
+
+ #rv = vpp.acl_dump()
+ #self.assertEqual([], vpp.acl_dump())
+
+ vpp.disconnect()
+
+ def test_status(self):
+ vpp = VPP(jsonfiles)
+ vpp.status()
+
+ def test_acl_interface_get(self):
+ vpp = VPP(jsonfiles)
+
+ vpp.connect('test_vpp_papi2')
+
+ rv = vpp.macip_acl_interface_get()
+
+ print('RV', rv)
+
+ vpp.disconnect()
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/vpp-api/python/vpp_papi.py b/src/vpp-api/python/vpp_papi.py
new file mode 100644
index 00000000..7b66c0f4
--- /dev/null
+++ b/src/vpp-api/python/vpp_papi.py
@@ -0,0 +1,706 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import sys
+import os
+import logging
+import collections
+import struct
+import json
+import threading
+import glob
+import atexit
+from cffi import FFI
+
+if sys.version[0] == '2':
+ import Queue as queue
+else:
+ import queue as queue
+
+ffi = FFI()
+ffi.cdef("""
+typedef void (*vac_callback_t)(unsigned char * data, int len);
+typedef void (*vac_error_callback_t)(void *, unsigned char *, int);
+int vac_connect(char * name, char * chroot_prefix, vac_callback_t cb,
+ int rx_qlen);
+int vac_disconnect(void);
+int vac_read(char **data, int *l, unsigned short timeout);
+int vac_write(char *data, int len);
+void vac_free(void * msg);
+
+int vac_get_msg_index(unsigned char * name);
+int vac_msg_table_size(void);
+int vac_msg_table_max_index(void);
+
+void vac_rx_suspend (void);
+void vac_rx_resume (void);
+void vac_set_error_handler(vac_error_callback_t);
+ """)
+
+# Barfs on failure, no need to check success.
+vpp_api = ffi.dlopen('libvppapiclient.so')
+
+
+def vpp_atexit(self):
+ """Clean up VPP connection on shutdown."""
+ if self.connected:
+ self.logger.debug('Cleaning up VPP on exit')
+ self.disconnect()
+
+vpp_object = None
+
+
+def vpp_iterator(d):
+ if sys.version[0] == '2':
+ return d.iteritems()
+ else:
+ return d.items()
+
+
+@ffi.callback("void(unsigned char *, int)")
+def vac_callback_sync(data, len):
+ vpp_object.msg_handler_sync(ffi.buffer(data, len))
+
+
+@ffi.callback("void(unsigned char *, int)")
+def vac_callback_async(data, len):
+ vpp_object.msg_handler_async(ffi.buffer(data, len))
+
+
+@ffi.callback("void(void *, unsigned char *, int)")
+def vac_error_handler(arg, msg, msg_len):
+ vpp_object.logger.warning("VPP API client:: %s", ffi.string(msg, msg_len))
+
+
+class Empty(object):
+ pass
+
+
+class FuncWrapper(object):
+ def __init__(self, func):
+ self._func = func
+ self.__name__ = func.__name__
+
+ def __call__(self, **kwargs):
+ return self._func(**kwargs)
+
+
+class VPP():
+ """VPP interface.
+
+ This class provides the APIs to VPP. The APIs are loaded
+ from provided .api.json files and makes functions accordingly.
+ These functions are documented in the VPP .api files, as they
+ are dynamically created.
+
+ Additionally, VPP can send callback messages; this class
+ provides a means to register a callback function to receive
+ these messages in a background thread.
+ """
+ def __init__(self, apifiles=None, testmode=False, async_thread=True,
+ logger=logging.getLogger('vpp_papi'), loglevel='debug'):
+ """Create a VPP API object.
+
+ apifiles is a list of files containing API
+ descriptions that will be loaded - methods will be
+ dynamically created reflecting these APIs. If not
+ provided this will load the API files from VPP's
+ default install location.
+ """
+ global vpp_object
+ vpp_object = self
+ self.logger = logger
+ logging.basicConfig(level=getattr(logging, loglevel.upper()))
+
+ self.messages = {}
+ self.id_names = []
+ self.id_msgdef = []
+ self.connected = False
+ self.header = struct.Struct('>HI')
+ self.apifiles = []
+ self.event_callback = None
+ self.message_queue = queue.Queue()
+ self.read_timeout = 0
+ self.vpp_api = vpp_api
+ if async_thread:
+ self.event_thread = threading.Thread(
+ target=self.thread_msg_handler)
+ self.event_thread.daemon = True
+ self.event_thread.start()
+
+ if not apifiles:
+ # Pick up API definitions from default directory
+ apifiles = glob.glob('/usr/share/vpp/api/*.api.json')
+
+ for file in apifiles:
+ with open(file) as apidef_file:
+ api = json.load(apidef_file)
+ for t in api['types']:
+ self.add_type(t[0], t[1:])
+
+ for m in api['messages']:
+ self.add_message(m[0], m[1:])
+ self.apifiles = apifiles
+
+ # Basic sanity check
+ if len(self.messages) == 0 and not testmode:
+ raise ValueError(1, 'Missing JSON message definitions')
+
+ # Make sure we allow VPP to clean up the message rings.
+ atexit.register(vpp_atexit, self)
+
+ # Register error handler
+ vpp_api.vac_set_error_handler(vac_error_handler)
+
+ class ContextId(object):
+ """Thread-safe provider of unique context IDs."""
+ def __init__(self):
+ self.context = 0
+ self.lock = threading.Lock()
+
+ def __call__(self):
+ """Get a new unique (or, at least, not recently used) context."""
+ with self.lock:
+ self.context += 1
+ return self.context
+ get_context = ContextId()
+
+ def status(self):
+ """Debug function: report current VPP API status to stdout."""
+ print('Connected') if self.connected else print('Not Connected')
+ print('Read API definitions from', ', '.join(self.apifiles))
+
+ def __struct(self, t, n=None, e=-1, vl=None):
+ """Create a packing structure for a message."""
+ base_types = {'u8': 'B',
+ 'u16': 'H',
+ 'u32': 'I',
+ 'i32': 'i',
+ 'u64': 'Q',
+ 'f64': 'd', }
+ pack = None
+ if t in base_types:
+ pack = base_types[t]
+ if not vl:
+ if e > 0 and t == 'u8':
+ # Fixed byte array
+ s = struct.Struct('>' + str(e) + 's')
+ return s.size, s
+ if e > 0:
+ # Fixed array of base type
+ s = struct.Struct('>' + base_types[t])
+ return s.size, [e, s]
+ elif e == 0:
+ # Old style variable array
+ s = struct.Struct('>' + base_types[t])
+ return s.size, [-1, s]
+ else:
+ # Variable length array
+ if t == 'u8':
+ s = struct.Struct('>s')
+ return s.size, [vl, s]
+ else:
+ s = struct.Struct('>' + base_types[t])
+ return s.size, [vl, s]
+
+ s = struct.Struct('>' + base_types[t])
+ return s.size, s
+
+ if t in self.messages:
+ size = self.messages[t]['sizes'][0]
+
+ # Return a list in case of array
+ if e > 0 and not vl:
+ return size, [e, lambda self, encode, buf, offset, args: (
+ self.__struct_type(encode, self.messages[t], buf, offset,
+ args))]
+ if vl:
+ return size, [vl, lambda self, encode, buf, offset, args: (
+ self.__struct_type(encode, self.messages[t], buf, offset,
+ args))]
+ elif e == 0:
+ # Old style VLA
+ raise NotImplementedError(1,
+ 'No support for compound types ' + t)
+ return size, lambda self, encode, buf, offset, args: (
+ self.__struct_type(encode, self.messages[t], buf, offset, args)
+ )
+
+ raise ValueError(1, 'Invalid message type: ' + t)
+
+ def __struct_type(self, encode, msgdef, buf, offset, kwargs):
+ """Get a message packer or unpacker."""
+ if encode:
+ return self.__struct_type_encode(msgdef, buf, offset, kwargs)
+ else:
+ return self.__struct_type_decode(msgdef, buf, offset)
+
+ def __struct_type_encode(self, msgdef, buf, offset, kwargs):
+ off = offset
+ size = 0
+
+ for k in kwargs:
+ if k not in msgdef['args']:
+ raise ValueError(1,'Non existing argument [' + k + ']' + \
+ ' used in call to: ' + \
+ self.id_names[kwargs['_vl_msg_id']] + '()' )
+
+ for k, v in vpp_iterator(msgdef['args']):
+ off += size
+ if k in kwargs:
+ if type(v) is list:
+ if callable(v[1]):
+ e = kwargs[v[0]] if v[0] in kwargs else v[0]
+ if e != len(kwargs[k]):
+ raise (ValueError(1, 'Input list length mismatch: %s (%s != %s)' % (k, e, len(kwargs[k]))))
+ size = 0
+ for i in range(e):
+ size += v[1](self, True, buf, off + size,
+ kwargs[k][i])
+ else:
+ if v[0] in kwargs:
+ l = kwargs[v[0]]
+ if l != len(kwargs[k]):
+ raise ValueError(1, 'Input list length mistmatch: %s (%s != %s)' % (k, l, len(kwargs[k])))
+ else:
+ l = len(kwargs[k])
+ if v[1].size == 1:
+ buf[off:off + l] = bytearray(kwargs[k])
+ size = l
+ else:
+ size = 0
+ for i in kwargs[k]:
+ v[1].pack_into(buf, off + size, i)
+ size += v[1].size
+ else:
+ if callable(v):
+ size = v(self, True, buf, off, kwargs[k])
+ else:
+ if type(kwargs[k]) is str and v.size < len(kwargs[k]):
+ raise ValueError(1, 'Input list length mistmatch: %s (%s < %s)' % (k, v.size, len(kwargs[k])))
+ v.pack_into(buf, off, kwargs[k])
+ size = v.size
+ else:
+ size = v.size if not type(v) is list else 0
+
+ return off + size - offset
+
+ def __getitem__(self, name):
+ if name in self.messages:
+ return self.messages[name]
+ return None
+
+ def get_size(self, sizes, kwargs):
+ total_size = sizes[0]
+ for e in sizes[1]:
+ if e in kwargs and type(kwargs[e]) is list:
+ total_size += len(kwargs[e]) * sizes[1][e]
+ return total_size
+
+ def encode(self, msgdef, kwargs):
+ # Make suitably large buffer
+ size = self.get_size(msgdef['sizes'], kwargs)
+ buf = bytearray(size)
+ offset = 0
+ size = self.__struct_type(True, msgdef, buf, offset, kwargs)
+ return buf[:offset + size]
+
+ def decode(self, msgdef, buf):
+ return self.__struct_type(False, msgdef, buf, 0, None)[1]
+
+ def __struct_type_decode(self, msgdef, buf, offset):
+ res = []
+ off = offset
+ size = 0
+ for k, v in vpp_iterator(msgdef['args']):
+ off += size
+ if type(v) is list:
+ lst = []
+ if callable(v[1]): # compound type
+ size = 0
+ if v[0] in msgdef['args']: # vla
+ e = res[v[2]]
+ else: # fixed array
+ e = v[0]
+ res.append(lst)
+ for i in range(e):
+ (s, l) = v[1](self, False, buf, off + size, None)
+ lst.append(l)
+ size += s
+ continue
+ if v[1].size == 1:
+ if type(v[0]) is int:
+ size = len(buf) - off
+ else:
+ size = res[v[2]]
+ res.append(buf[off:off + size])
+ else:
+ e = v[0] if type(v[0]) is int else res[v[2]]
+ if e == -1:
+ e = (len(buf) - off) / v[1].size
+ lst = []
+ res.append(lst)
+ size = 0
+ for i in range(e):
+ lst.append(v[1].unpack_from(buf, off + size)[0])
+ size += v[1].size
+ else:
+ if callable(v):
+ (s, l) = v(self, False, buf, off, None)
+ res.append(l)
+ size += s
+ else:
+ res.append(v.unpack_from(buf, off)[0])
+ size = v.size
+
+ return off + size - offset, msgdef['return_tuple']._make(res)
+
+ def ret_tup(self, name):
+ if name in self.messages and 'return_tuple' in self.messages[name]:
+ return self.messages[name]['return_tuple']
+ return None
+
+ def add_message(self, name, msgdef, typeonly=False):
+ if name in self.messages:
+ raise ValueError('Duplicate message name: ' + name)
+
+ args = collections.OrderedDict()
+ argtypes = collections.OrderedDict()
+ fields = []
+ msg = {}
+ total_size = 0
+ sizes = {}
+ for i, f in enumerate(msgdef):
+ if type(f) is dict and 'crc' in f:
+ msg['crc'] = f['crc']
+ continue
+ field_type = f[0]
+ field_name = f[1]
+ if len(f) == 3 and f[2] == 0 and i != len(msgdef) - 2:
+ raise ValueError('Variable Length Array must be last: ' + name)
+ size, s = self.__struct(*f)
+ args[field_name] = s
+ if type(s) == list and type(s[0]) == int and type(s[1]) == struct.Struct:
+ if s[0] < 0:
+ sizes[field_name] = size
+ else:
+ sizes[field_name] = size
+ total_size += s[0] * size
+ else:
+ sizes[field_name] = size
+ total_size += size
+
+ argtypes[field_name] = field_type
+ if len(f) == 4: # Find offset to # elements field
+ idx = list(args.keys()).index(f[3]) - i
+ args[field_name].append(idx)
+ fields.append(field_name)
+ msg['return_tuple'] = collections.namedtuple(name, fields,
+ rename=True)
+ self.messages[name] = msg
+ self.messages[name]['args'] = args
+ self.messages[name]['argtypes'] = argtypes
+ self.messages[name]['typeonly'] = typeonly
+ self.messages[name]['sizes'] = [total_size, sizes]
+ return self.messages[name]
+
+ def add_type(self, name, typedef):
+ return self.add_message('vl_api_' + name + '_t', typedef,
+ typeonly=True)
+
+ def make_function(self, name, i, msgdef, multipart, async):
+ if (async):
+ f = lambda **kwargs: (self._call_vpp_async(i, msgdef, **kwargs))
+ else:
+ f = lambda **kwargs: (self._call_vpp(i, msgdef, multipart,
+ **kwargs))
+ args = self.messages[name]['args']
+ argtypes = self.messages[name]['argtypes']
+ f.__name__ = str(name)
+ f.__doc__ = ", ".join(["%s %s" %
+ (argtypes[k], k) for k in args.keys()])
+ return f
+
+ @property
+ def api(self):
+ if not hasattr(self, "_api"):
+ raise Exception("Not connected, api definitions not available")
+ return self._api
+
+ def _register_functions(self, async=False):
+ self.id_names = [None] * (self.vpp_dictionary_maxid + 1)
+ self.id_msgdef = [None] * (self.vpp_dictionary_maxid + 1)
+ self._api = Empty()
+ for name, msgdef in vpp_iterator(self.messages):
+ if self.messages[name]['typeonly']:
+ continue
+ crc = self.messages[name]['crc']
+ n = name + '_' + crc[2:]
+ i = vpp_api.vac_get_msg_index(n.encode())
+ if i > 0:
+ self.id_msgdef[i] = msgdef
+ self.id_names[i] = name
+ multipart = True if name.find('_dump') > 0 else False
+ f = self.make_function(name, i, msgdef, multipart, async)
+ setattr(self._api, name, FuncWrapper(f))
+
+ # old API stuff starts here - will be removed in 17.07
+ if hasattr(self, name):
+ raise NameError(
+ 3, "Conflicting name in JSON definition: `%s'" % name)
+ setattr(self, name, f)
+ # old API stuff ends here
+ else:
+ self.logger.debug(
+ 'No such message type or failed CRC checksum: %s', n)
+
+ def _write(self, buf):
+ """Send a binary-packed message to VPP."""
+ if not self.connected:
+ raise IOError(1, 'Not connected')
+ return vpp_api.vac_write(ffi.from_buffer(buf), len(buf))
+
+ def _read(self):
+ if not self.connected:
+ raise IOError(1, 'Not connected')
+ mem = ffi.new("char **")
+ size = ffi.new("int *")
+ rv = vpp_api.vac_read(mem, size, self.read_timeout)
+ if rv:
+ raise IOError(rv, 'vac_read failed')
+ msg = bytes(ffi.buffer(mem[0], size[0]))
+ vpp_api.vac_free(mem[0])
+ return msg
+
+ def connect_internal(self, name, msg_handler, chroot_prefix, rx_qlen,
+ async):
+ pfx = chroot_prefix.encode() if chroot_prefix else ffi.NULL
+ rv = vpp_api.vac_connect(name.encode(), pfx, msg_handler, rx_qlen)
+ if rv != 0:
+ raise IOError(2, 'Connect failed')
+ self.connected = True
+
+ self.vpp_dictionary_maxid = vpp_api.vac_msg_table_max_index()
+ self._register_functions(async=async)
+
+ # Initialise control ping
+ crc = self.messages['control_ping']['crc']
+ self.control_ping_index = vpp_api.vac_get_msg_index(
+ ('control_ping' + '_' + crc[2:]).encode())
+ self.control_ping_msgdef = self.messages['control_ping']
+ return rv
+
+ def connect(self, name, chroot_prefix=None, async=False, rx_qlen=32):
+ """Attach to VPP.
+
+ name - the name of the client.
+ chroot_prefix - if VPP is chroot'ed, the prefix of the jail
+ async - if true, messages are sent without waiting for a reply
+ rx_qlen - the length of the VPP message receive queue between
+ client and server.
+ """
+ msg_handler = vac_callback_sync if not async else vac_callback_async
+ return self.connect_internal(name, msg_handler, chroot_prefix, rx_qlen,
+ async)
+
+ def connect_sync(self, name, chroot_prefix=None, rx_qlen=32):
+ """Attach to VPP in synchronous mode. Application must poll for events.
+
+ name - the name of the client.
+ chroot_prefix - if VPP is chroot'ed, the prefix of the jail
+ rx_qlen - the length of the VPP message receive queue between
+ client and server.
+ """
+
+ return self.connect_internal(name, ffi.NULL, chroot_prefix, rx_qlen,
+ async=False)
+
+ def disconnect(self):
+ """Detach from VPP."""
+ rv = vpp_api.vac_disconnect()
+ self.connected = False
+ return rv
+
+ def msg_handler_sync(self, msg):
+ """Process an incoming message from VPP in sync mode.
+
+ The message may be a reply or it may be an async notification.
+ """
+ r = self.decode_incoming_msg(msg)
+ if r is None:
+ return
+
+ # If we have a context, then use the context to find any
+ # request waiting for a reply
+ context = 0
+ if hasattr(r, 'context') and r.context > 0:
+ context = r.context
+
+ msgname = type(r).__name__
+
+ if context == 0:
+ # No context -> async notification that we feed to the callback
+ self.message_queue.put_nowait(r)
+ else:
+ raise IOError(2, 'RPC reply message received in event handler')
+
+ def decode_incoming_msg(self, msg):
+ if not msg:
+ self.logger.warning('vpp_api.read failed')
+ return
+
+ i, ci = self.header.unpack_from(msg, 0)
+ if self.id_names[i] == 'rx_thread_exit':
+ return
+
+ #
+ # Decode message and returns a tuple.
+ #
+ msgdef = self.id_msgdef[i]
+ if not msgdef:
+ raise IOError(2, 'Reply message undefined')
+
+ r = self.decode(msgdef, msg)
+
+ return r
+
+ def msg_handler_async(self, msg):
+ """Process a message from VPP in async mode.
+
+ In async mode, all messages are returned to the callback.
+ """
+ r = self.decode_incoming_msg(msg)
+ if r is None:
+ return
+
+ msgname = type(r).__name__
+
+ if self.event_callback:
+ self.event_callback(msgname, r)
+
+ def _control_ping(self, context):
+ """Send a ping command."""
+ self._call_vpp_async(self.control_ping_index,
+ self.control_ping_msgdef,
+ context=context)
+
+ def _call_vpp(self, i, msgdef, multipart, **kwargs):
+ """Given a message, send the message and await a reply.
+
+ msgdef - the message packing definition
+ i - the message type index
+ multipart - True if the message returns multiple
+ messages in return.
+ context - context number - chosen at random if not
+ supplied.
+ The remainder of the kwargs are the arguments to the API call.
+
+ The return value is the message or message array containing
+ the response. It will raise an IOError exception if there was
+ no response within the timeout window.
+ """
+
+ if 'context' not in kwargs:
+ context = self.get_context()
+ kwargs['context'] = context
+ else:
+ context = kwargs['context']
+ kwargs['_vl_msg_id'] = i
+ b = self.encode(msgdef, kwargs)
+
+ vpp_api.vac_rx_suspend()
+ self._write(b)
+
+ if multipart:
+ # Send a ping after the request - we use its response
+ # to detect that we have seen all results.
+ self._control_ping(context)
+
+ # Block until we get a reply.
+ rl = []
+ while (True):
+ msg = self._read()
+ if not msg:
+ raise IOError(2, 'VPP API client: read failed')
+
+ r = self.decode_incoming_msg(msg)
+ msgname = type(r).__name__
+ if context not in r or r.context == 0 or context != r.context:
+ self.message_queue.put_nowait(r)
+ continue
+
+ if not multipart:
+ rl = r
+ break
+ if msgname == 'control_ping_reply':
+ break
+
+ rl.append(r)
+
+ vpp_api.vac_rx_resume()
+
+ return rl
+
+ def _call_vpp_async(self, i, msgdef, **kwargs):
+ """Given a message, send the message and await a reply.
+
+ msgdef - the message packing definition
+ i - the message type index
+ context - context number - chosen at random if not
+ supplied.
+ The remainder of the kwargs are the arguments to the API call.
+ """
+ if 'context' not in kwargs:
+ context = self.get_context()
+ kwargs['context'] = context
+ else:
+ context = kwargs['context']
+ kwargs['_vl_msg_id'] = i
+ b = self.encode(msgdef, kwargs)
+
+ self._write(b)
+
+ def register_event_callback(self, callback):
+ """Register a callback for async messages.
+
+ This will be called for async notifications in sync mode,
+ and all messages in async mode. In sync mode, replies to
+ requests will not come here.
+
+ callback is a fn(msg_type_name, msg_type) that will be
+ called when a message comes in. While this function is
+ executing, note that (a) you are in a background thread and
+ may wish to use threading.Lock to protect your datastructures,
+ and (b) message processing from VPP will stop (so if you take
+ a long while about it you may provoke reply timeouts or cause
+ VPP to fill the RX buffer). Passing None will disable the
+ callback.
+ """
+ self.event_callback = callback
+
+ def thread_msg_handler(self):
+ """Python thread calling the user registerd message handler.
+
+ This is to emulate the old style event callback scheme. Modern
+ clients should provide their own thread to poll the event
+ queue.
+ """
+ while True:
+ r = self.message_queue.get()
+ msgname = type(r).__name__
+ if self.event_callback:
+ self.event_callback(msgname, r)
diff --git a/src/vpp-api/vapi/Makefile.am b/src/vpp-api/vapi/Makefile.am
new file mode 100644
index 00000000..74b2b47e
--- /dev/null
+++ b/src/vpp-api/vapi/Makefile.am
@@ -0,0 +1,74 @@
+# Copyright (c) 2017 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTOMAKE_OPTIONS = foreign
+ACLOCAL_AMFLAGS = -I m4
+AM_LIBTOOLFLAGS = --quiet
+
+AM_CFLAGS = -Wall -I${top_srcdir} -I${top_builddir} -I. -I$(top_srcdir)/vpp-api/
+
+AM_LDFLAGS = -shared -avoid-version -rpath /none -no-undefined
+
+bin_PROGRAMS =
+noinst_LTLIBRARIES =
+CLEANDIRS =
+
+vapi/%.api.vapi.h: %.api.json vapi_c_gen.py vapi_json_parser.py
+ @echo " VAPI C GEN $< " $@ ; \
+ mkdir -p `dirname $@` ; \
+ $(top_srcdir)/vpp-api/vapi/vapi_c_gen.py --prefix=vapi $<
+
+vapi/%.api.vapi.hpp: %.api.json vapi_cpp_gen.py vapi_c_gen.py vapi_json_parser.py
+ @echo " VAPI CPP GEN $< " $@ ; \
+ mkdir -p `dirname $@` ; \
+ $(top_srcdir)/vpp-api/vapi/vapi_cpp_gen.py --prefix=vapi --gen-h-prefix=vapi $<
+
+%.api.json:
+ find $(top_builddir) -name '$@' | xargs ln -s
+
+BUILT_SOURCES = $(shell find $(top_builddir) -name '*.api.json' | xargs -n1 basename) \
+ $(patsubst %.api.json,vapi/%.api.vapi.h,$(JSON_FILES)) \
+ $(patsubst %.api.json,vapi/%.api.vapi.hpp,$(JSON_FILES))
+
+vapi.c: $(BUILT_SOURCES)
+
+JSON_FILES = $(wildcard *.api.json)
+
+lib_LTLIBRARIES = libvapiclient.la
+
+libvapiclient_la_SOURCES = vapi.c
+
+libvapiclient_la_DEPENDENCIES = libvapiclient.map
+
+libvapiclient_la_LIBADD = -lpthread -lm -lrt \
+ $(top_builddir)/libvppinfra.la \
+ $(top_builddir)/libvlibmemoryclient.la \
+ $(top_builddir)/libsvm.la
+
+libvapiclient_la_LDFLAGS = \
+ -Wl,-L$(top_builddir)/.libs,--whole-archive,--no-whole-archive \
+ -Wl,--version-script=$(srcdir)/libvapiclient.map,-lrt
+
+libvapiclient_la_CPPFLAGS = -I. -I$(top_builddir)/vpp-api/vapi
+
+vapiincludedir = $(includedir)/vapi
+
+vapiinclude_HEADERS = vapi.h \
+ vapi.hpp \
+ vapi_dbg.h \
+ vapi_common.h \
+ vapi_internal.h \
+ $(patsubst %.api.json,vapi/%.api.vapi.h,$(JSON_FILES)) \
+ $(patsubst %.api.json,vapi/%.api.vapi.hpp,$(JSON_FILES))
+
+# vi:syntax=automake
diff --git a/src/vpp-api/vapi/libvapiclient.map b/src/vpp-api/vapi/libvapiclient.map
new file mode 100644
index 00000000..6b58d1e9
--- /dev/null
+++ b/src/vpp-api/vapi/libvapiclient.map
@@ -0,0 +1,44 @@
+
+VAPICLIENT_17.07 {
+ global:
+ vapi_msg_alloc;
+ vapi_msg_free;
+ vapi_ctx_alloc;
+ vapi_ctx_free;
+ vapi_is_msg_available;
+ vapi_connect;
+ vapi_disconnect;
+ vapi_get_fd;
+ vapi_send;
+ vapi_send2;
+ vapi_recv;
+ vapi_wait;
+ vapi_dispatch_one;
+ vapi_dispatch;
+ vapi_set_event_cb;
+ vapi_clear_event_cb;
+ vapi_set_generic_event_cb;
+ vapi_clear_generic_event_cb;
+ vapi_get_client_index;
+ vapi_register_msg;
+ vapi_get_client_index;
+ vapi_is_nonblocking;
+ vapi_requests_empty;
+ vapi_requests_full;
+ vapi_gen_req_context;
+ vapi_producer_lock;
+ vapi_send_with_control_ping;
+ vapi_store_request;
+ vapi_is_nonblocking;
+ vapi_producer_unlock;
+ vapi_lookup_vl_msg_id;
+ vapi_lookup_vapi_msg_id_t;
+ vapi_msg_is_with_context;
+ vapi_get_context_offset;
+ vapi_msg_id_control_ping;
+ vapi_msg_id_control_ping_reply;
+ vapi_get_message_count;
+ vapi_get_msg_name;
+
+ local: *;
+};
diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c
new file mode 100644
index 00000000..3150d2b4
--- /dev/null
+++ b/src/vpp-api/vapi/vapi.c
@@ -0,0 +1,933 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <arpa/inet.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include <vpp-api/vapi/vapi_dbg.h>
+#include <vpp-api/vapi/vapi.h>
+#include <vpp-api/vapi/vapi_internal.h>
+#include <vppinfra/types.h>
+#include <vlibapi/api_common.h>
+#include <vlibmemory/api_common.h>
+
+/* we need to use control pings for some stuff and because we're forced to put
+ * the code in headers, we need a way to be able to grab the ids of these
+ * messages - so declare them here as extern */
+vapi_msg_id_t vapi_msg_id_control_ping = 0;
+vapi_msg_id_t vapi_msg_id_control_ping_reply = 0;
+
+struct
+{
+ size_t count;
+ vapi_message_desc_t **msgs;
+ size_t max_len_name_with_crc;
+} __vapi_metadata;
+
+typedef struct
+{
+ u32 context;
+ vapi_cb_t callback;
+ void *callback_ctx;
+ bool is_dump;
+} vapi_req_t;
+
+static const u32 context_counter_mask = (1 << 31);
+
+typedef struct
+{
+ vapi_error_e (*cb) (vapi_ctx_t ctx, void *callback_ctx, vapi_msg_id_t id,
+ void *payload);
+ void *ctx;
+} vapi_generic_cb_with_ctx;
+
+typedef struct
+{
+ vapi_error_e (*cb) (vapi_ctx_t ctx, void *callback_ctx, void *payload);
+ void *ctx;
+} vapi_event_cb_with_ctx;
+
+struct vapi_ctx_s
+{
+ vapi_mode_e mode;
+ int requests_size; /* size of the requests array (circular queue) */
+ int requests_start; /* index of first request */
+ int requests_count; /* number of used slots */
+ vapi_req_t *requests;
+ u32 context_counter;
+ vapi_generic_cb_with_ctx generic_cb;
+ vapi_event_cb_with_ctx *event_cbs;
+ u16 *vapi_msg_id_t_to_vl_msg_id;
+ u16 vl_msg_id_max;
+ vapi_msg_id_t *vl_msg_id_to_vapi_msg_t;
+ bool connected;
+ pthread_mutex_t requests_mutex;
+};
+
+u32
+vapi_gen_req_context (vapi_ctx_t ctx)
+{
+ ++ctx->context_counter;
+ ctx->context_counter %= context_counter_mask;
+ return ctx->context_counter | context_counter_mask;
+}
+
+size_t
+vapi_get_request_count (vapi_ctx_t ctx)
+{
+ return ctx->requests_count;
+}
+
+bool
+vapi_requests_full (vapi_ctx_t ctx)
+{
+ return (ctx->requests_count == ctx->requests_size);
+}
+
+bool
+vapi_requests_empty (vapi_ctx_t ctx)
+{
+ return (0 == ctx->requests_count);
+}
+
+static int
+vapi_requests_end (vapi_ctx_t ctx)
+{
+ return (ctx->requests_start + ctx->requests_count) % ctx->requests_size;
+}
+
+void
+vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
+ vapi_cb_t callback, void *callback_ctx)
+{
+ assert (!vapi_requests_full (ctx));
+ /* if the mutex is not held, bad things will happen */
+ assert (0 != pthread_mutex_trylock (&ctx->requests_mutex));
+ const int requests_end = vapi_requests_end (ctx);
+ vapi_req_t *slot = &ctx->requests[requests_end];
+ slot->is_dump = is_dump;
+ slot->context = context;
+ slot->callback = callback;
+ slot->callback_ctx = callback_ctx;
+ VAPI_DBG ("stored@%d: context:%x (start is @%d)", requests_end, context,
+ ctx->requests_start);
+ ++ctx->requests_count;
+ assert (!vapi_requests_empty (ctx));
+}
+
+#if VAPI_DEBUG_ALLOC
+struct to_be_freed_s;
+struct to_be_freed_s
+{
+ void *v;
+ struct to_be_freed_s *next;
+};
+
+static struct to_be_freed_s *to_be_freed = NULL;
+
+void
+vapi_add_to_be_freed (void *v)
+{
+ struct to_be_freed_s *prev = NULL;
+ struct to_be_freed_s *tmp;
+ tmp = to_be_freed;
+ while (tmp && tmp->v)
+ {
+ prev = tmp;
+ tmp = tmp->next;
+ }
+ if (!tmp)
+ {
+ if (!prev)
+ {
+ tmp = to_be_freed = calloc (1, sizeof (*to_be_freed));
+ }
+ else
+ {
+ tmp = prev->next = calloc (1, sizeof (*to_be_freed));
+ }
+ }
+ VAPI_DBG ("To be freed %p", v);
+ tmp->v = v;
+}
+
+void
+vapi_trace_free (void *v)
+{
+ struct to_be_freed_s *tmp = to_be_freed;
+ while (tmp && tmp->v != v)
+ {
+ tmp = tmp->next;
+ }
+ if (tmp && tmp->v == v)
+ {
+ VAPI_DBG ("Freed %p", v);
+ tmp->v = NULL;
+ }
+ else
+ {
+ VAPI_ERR ("Trying to free untracked pointer %p", v);
+ abort ();
+ }
+}
+
+void
+vapi_to_be_freed_validate ()
+{
+ struct to_be_freed_s *tmp = to_be_freed;
+ while (tmp)
+ {
+ if (tmp->v)
+ {
+ VAPI_ERR ("Unfreed msg %p!", tmp->v);
+ }
+ tmp = tmp->next;
+ }
+}
+
+#endif
+
+void *
+vapi_msg_alloc (vapi_ctx_t ctx, size_t size)
+{
+ if (!ctx->connected)
+ {
+ return NULL;
+ }
+ void *rv = vl_msg_api_alloc_or_null (size);
+ return rv;
+}
+
+void
+vapi_msg_free (vapi_ctx_t ctx, void *msg)
+{
+ if (!ctx->connected)
+ {
+ return;
+ }
+#if VAPI_DEBUG_ALLOC
+ vapi_trace_free (msg);
+#endif
+ vl_msg_api_free (msg);
+}
+
+vapi_msg_id_t
+vapi_lookup_vapi_msg_id_t (vapi_ctx_t ctx, u16 vl_msg_id)
+{
+ if (vl_msg_id <= ctx->vl_msg_id_max)
+ {
+ return ctx->vl_msg_id_to_vapi_msg_t[vl_msg_id];
+ }
+ return ~0;
+}
+
+vapi_error_e
+vapi_ctx_alloc (vapi_ctx_t * result)
+{
+ vapi_ctx_t ctx = calloc (1, sizeof (struct vapi_ctx_s));
+ if (!ctx)
+ {
+ return VAPI_ENOMEM;
+ }
+ ctx->context_counter = 0;
+ ctx->vapi_msg_id_t_to_vl_msg_id =
+ malloc (__vapi_metadata.count *
+ sizeof (*ctx->vapi_msg_id_t_to_vl_msg_id));
+ if (!ctx->vapi_msg_id_t_to_vl_msg_id)
+ {
+ goto fail;
+ }
+ ctx->event_cbs = calloc (__vapi_metadata.count, sizeof (*ctx->event_cbs));
+ if (!ctx->event_cbs)
+ {
+ goto fail;
+ }
+ pthread_mutex_init (&ctx->requests_mutex, NULL);
+ *result = ctx;
+ return VAPI_OK;
+fail:
+ vapi_ctx_free (ctx);
+ return VAPI_ENOMEM;
+}
+
+void
+vapi_ctx_free (vapi_ctx_t ctx)
+{
+ assert (!ctx->connected);
+ free (ctx->requests);
+ free (ctx->vapi_msg_id_t_to_vl_msg_id);
+ free (ctx->event_cbs);
+ free (ctx->vl_msg_id_to_vapi_msg_t);
+ pthread_mutex_destroy (&ctx->requests_mutex);
+ free (ctx);
+}
+
+bool
+vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+ return vapi_lookup_vl_msg_id (ctx, id) != UINT16_MAX;
+}
+
+vapi_error_e
+vapi_connect (vapi_ctx_t ctx, const char *name,
+ const char *chroot_prefix,
+ int max_outstanding_requests,
+ int response_queue_size, vapi_mode_e mode)
+{
+ if (response_queue_size <= 0 || max_outstanding_requests <= 0)
+ {
+ return VAPI_EINVAL;
+ }
+ ctx->requests_size = max_outstanding_requests;
+ const size_t size = ctx->requests_size * sizeof (*ctx->requests);
+ void *tmp = realloc (ctx->requests, size);
+ if (!tmp)
+ {
+ return VAPI_ENOMEM;
+ }
+ ctx->requests = tmp;
+ memset (ctx->requests, 0, size);
+ /* coverity[MISSING_LOCK] - 177211 requests_mutex is not needed here */
+ ctx->requests_start = ctx->requests_count = 0;
+ if (chroot_prefix)
+ {
+ VAPI_DBG ("set memory root path `%s'", chroot_prefix);
+ vl_set_memory_root_path ((char *) chroot_prefix);
+ }
+ static char api_map[] = "/vpe-api";
+ VAPI_DBG ("client api map `%s'", api_map);
+ if ((vl_client_api_map (api_map)) < 0)
+ {
+ return VAPI_EMAP_FAIL;
+ }
+ VAPI_DBG ("connect client `%s'", name);
+ if (vl_client_connect ((char *) name, 0, response_queue_size) < 0)
+ {
+ vl_client_api_unmap ();
+ return VAPI_ECON_FAIL;
+ }
+#if VAPI_DEBUG_CONNECT
+ VAPI_DBG ("start probing messages");
+#endif
+ int rv;
+ int i;
+ for (i = 0; i < __vapi_metadata.count; ++i)
+ {
+ vapi_message_desc_t *m = __vapi_metadata.msgs[i];
+ u8 scratch[m->name_with_crc_len + 1];
+ memcpy (scratch, m->name_with_crc, m->name_with_crc_len + 1);
+ u32 id = vl_api_get_msg_index (scratch);
+ if (~0 != id)
+ {
+ if (id > UINT16_MAX)
+ {
+ VAPI_ERR ("Returned vl_msg_id `%u' > UINT16MAX `%u'!", id,
+ UINT16_MAX);
+ rv = VAPI_EINVAL;
+ goto fail;
+ }
+ if (id > ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t *tmp = realloc (ctx->vl_msg_id_to_vapi_msg_t,
+ sizeof
+ (*ctx->vl_msg_id_to_vapi_msg_t) *
+ (id + 1));
+ if (!tmp)
+ {
+ rv = VAPI_ENOMEM;
+ goto fail;
+ }
+ ctx->vl_msg_id_to_vapi_msg_t = tmp;
+ ctx->vl_msg_id_max = id;
+ }
+ ctx->vl_msg_id_to_vapi_msg_t[id] = m->id;
+ ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = id;
+#if VAPI_DEBUG_CONNECT
+ VAPI_DBG ("Message `%s' has vl_msg_id `%u'", m->name_with_crc,
+ (unsigned) id);
+#endif
+ }
+ else
+ {
+ ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = UINT16_MAX;
+ VAPI_DBG ("Message `%s' not available", m->name_with_crc);
+ }
+ }
+#if VAPI_DEBUG_CONNECT
+ VAPI_DBG ("finished probing messages");
+#endif
+ if (!vapi_is_msg_available (ctx, vapi_msg_id_control_ping) ||
+ !vapi_is_msg_available (ctx, vapi_msg_id_control_ping_reply))
+ {
+ VAPI_ERR
+ ("control ping or control ping reply not available, cannot connect");
+ rv = VAPI_EINCOMPATIBLE;
+ goto fail;
+ }
+ ctx->mode = mode;
+ ctx->connected = true;
+ return VAPI_OK;
+fail:
+ vl_client_disconnect ();
+ vl_client_api_unmap ();
+ return rv;
+}
+
+vapi_error_e
+vapi_disconnect (vapi_ctx_t ctx)
+{
+ if (!ctx->connected)
+ {
+ return VAPI_EINVAL;
+ }
+ vl_client_disconnect ();
+ vl_client_api_unmap ();
+#if VAPI_DEBUG_ALLOC
+ vapi_to_be_freed_validate ();
+#endif
+ ctx->connected = false;
+ return VAPI_OK;
+}
+
+vapi_error_e
+vapi_get_fd (vapi_ctx_t ctx, int *fd)
+{
+ return VAPI_ENOTSUP;
+}
+
+vapi_error_e
+vapi_send (vapi_ctx_t ctx, void *msg)
+{
+ vapi_error_e rv = VAPI_OK;
+ if (!ctx || !msg || !ctx->connected)
+ {
+ rv = VAPI_EINVAL;
+ goto out;
+ }
+ int tmp;
+ unix_shared_memory_queue_t *q = api_main.shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+ unsigned msgid = be16toh (*(u16 *) msg);
+ if (msgid <= ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid];
+ if (id < __vapi_metadata.count)
+ {
+ VAPI_DBG ("send msg@%p:%u[%s]", msg, msgid,
+ __vapi_metadata.msgs[id]->name);
+ }
+ else
+ {
+ VAPI_DBG ("send msg@%p:%u[UNKNOWN]", msg, msgid);
+ }
+ }
+ else
+ {
+ VAPI_DBG ("send msg@%p:%u[UNKNOWN]", msg, msgid);
+ }
+#endif
+ tmp = unix_shared_memory_queue_add (q, (u8 *) & msg,
+ VAPI_MODE_BLOCKING ==
+ ctx->mode ? 0 : 1);
+ if (tmp < 0)
+ {
+ rv = VAPI_EAGAIN;
+ }
+out:
+ VAPI_DBG ("vapi_send() rv = %d", rv);
+ return rv;
+}
+
+vapi_error_e
+vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
+{
+ vapi_error_e rv = VAPI_OK;
+ if (!ctx || !msg1 || !msg2 || !ctx->connected)
+ {
+ rv = VAPI_EINVAL;
+ goto out;
+ }
+ unix_shared_memory_queue_t *q = api_main.shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+ unsigned msgid1 = be16toh (*(u16 *) msg1);
+ unsigned msgid2 = be16toh (*(u16 *) msg2);
+ const char *name1 = "UNKNOWN";
+ const char *name2 = "UNKNOWN";
+ if (msgid1 <= ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid1];
+ if (id < __vapi_metadata.count)
+ {
+ name1 = __vapi_metadata.msgs[id]->name;
+ }
+ }
+ if (msgid2 <= ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid2];
+ if (id < __vapi_metadata.count)
+ {
+ name2 = __vapi_metadata.msgs[id]->name;
+ }
+ }
+ VAPI_DBG ("send two: %u[%s], %u[%s]", msgid1, name1, msgid2, name2);
+#endif
+ int tmp = unix_shared_memory_queue_add2 (q, (u8 *) & msg1, (u8 *) & msg2,
+ VAPI_MODE_BLOCKING ==
+ ctx->mode ? 0 : 1);
+ if (tmp < 0)
+ {
+ rv = VAPI_EAGAIN;
+ }
+out:
+ VAPI_DBG ("vapi_send() rv = %d", rv);
+ return rv;
+}
+
+vapi_error_e
+vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size)
+{
+ if (!ctx || !ctx->connected || !msg || !msg_size)
+ {
+ return VAPI_EINVAL;
+ }
+ vapi_error_e rv = VAPI_OK;
+ api_main_t *am = &api_main;
+ uword data;
+
+ if (am->our_pid == 0)
+ {
+ return VAPI_EINVAL;
+ }
+
+ unix_shared_memory_queue_t *q = am->vl_input_queue;
+ VAPI_DBG ("doing shm queue sub");
+ int tmp = unix_shared_memory_queue_sub (q, (u8 *) & data, 0);
+ if (tmp == 0)
+ {
+#if VAPI_DEBUG_ALLOC
+ vapi_add_to_be_freed ((void *) data);
+#endif
+ msgbuf_t *msgbuf =
+ (msgbuf_t *) ((u8 *) data - offsetof (msgbuf_t, data));
+ if (!msgbuf->data_len)
+ {
+ vapi_msg_free (ctx, (u8 *) data);
+ return VAPI_EAGAIN;
+ }
+ *msg = (u8 *) data;
+ *msg_size = ntohl (msgbuf->data_len);
+#if VAPI_DEBUG
+ unsigned msgid = be16toh (*(u16 *) * msg);
+ if (msgid <= ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid];
+ if (id < __vapi_metadata.count)
+ {
+ VAPI_DBG ("recv msg@%p:%u[%s]", *msg, msgid,
+ __vapi_metadata.msgs[id]->name);
+ }
+ else
+ {
+ VAPI_DBG ("recv msg@%p:%u[UNKNOWN]", *msg, msgid);
+ }
+ }
+ else
+ {
+ VAPI_DBG ("recv msg@%p:%u[UNKNOWN]", *msg, msgid);
+ }
+#endif
+ }
+ else
+ {
+ rv = VAPI_EAGAIN;
+ }
+ return rv;
+}
+
+vapi_error_e
+vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode)
+{
+ return VAPI_ENOTSUP;
+}
+
+static vapi_error_e
+vapi_dispatch_response (vapi_ctx_t ctx, vapi_msg_id_t id,
+ u32 context, void *msg)
+{
+ int mrv;
+ if (0 != (mrv = pthread_mutex_lock (&ctx->requests_mutex)))
+ {
+ VAPI_DBG ("pthread_mutex_lock() failed, rv=%d:%s", mrv, strerror (mrv));
+ return VAPI_MUTEX_FAILURE;
+ }
+ int tmp = ctx->requests_start;
+ const int requests_end = vapi_requests_end (ctx);
+ while (ctx->requests[tmp].context != context && tmp != requests_end)
+ {
+ ++tmp;
+ if (tmp == ctx->requests_size)
+ {
+ tmp = 0;
+ }
+ }
+ VAPI_DBG ("dispatch, search from %d, %s at %d", ctx->requests_start,
+ ctx->requests[tmp].context == context ? "matched" : "stopped",
+ tmp);
+ vapi_error_e rv = VAPI_OK;
+ if (ctx->requests[tmp].context == context)
+ {
+ while (ctx->requests_start != tmp)
+ {
+ VAPI_ERR ("No response to req with context=%u",
+ (unsigned) ctx->requests[tmp].context);
+ ctx->requests[ctx->requests_start].callback (ctx,
+ ctx->requests
+ [ctx->
+ requests_start].callback_ctx,
+ VAPI_ENORESP, true,
+ NULL);
+ memset (&ctx->requests[ctx->requests_start], 0,
+ sizeof (ctx->requests[ctx->requests_start]));
+ ++ctx->requests_start;
+ --ctx->requests_count;
+ if (ctx->requests_start == ctx->requests_size)
+ {
+ ctx->requests_start = 0;
+ }
+ }
+ // now ctx->requests_start == tmp
+ int payload_offset = vapi_get_payload_offset (id);
+ void *payload = ((u8 *) msg) + payload_offset;
+ bool is_last = true;
+ if (ctx->requests[tmp].is_dump)
+ {
+ if (vapi_msg_id_control_ping_reply == id)
+ {
+ payload = NULL;
+ }
+ else
+ {
+ is_last = false;
+ }
+ }
+ if (payload_offset != -1)
+ {
+ rv =
+ ctx->requests[tmp].callback (ctx, ctx->requests[tmp].callback_ctx,
+ VAPI_OK, is_last, payload);
+ }
+ else
+ {
+ /* this is a message without payload, so bend the callback a little
+ */
+ rv =
+ ((vapi_error_e (*)(vapi_ctx_t, void *, vapi_error_e, bool))
+ ctx->requests[tmp].callback) (ctx,
+ ctx->requests[tmp].callback_ctx,
+ VAPI_OK, is_last);
+ }
+ if (is_last)
+ {
+ memset (&ctx->requests[ctx->requests_start], 0,
+ sizeof (ctx->requests[ctx->requests_start]));
+ ++ctx->requests_start;
+ --ctx->requests_count;
+ if (ctx->requests_start == ctx->requests_size)
+ {
+ ctx->requests_start = 0;
+ }
+ }
+ VAPI_DBG ("after dispatch, req start = %d, end = %d, count = %d",
+ ctx->requests_start, requests_end, ctx->requests_count);
+ }
+ if (0 != (mrv = pthread_mutex_unlock (&ctx->requests_mutex)))
+ {
+ VAPI_DBG ("pthread_mutex_unlock() failed, rv=%d:%s", mrv,
+ strerror (mrv));
+ abort (); /* this really shouldn't happen */
+ }
+ return rv;
+}
+
+static vapi_error_e
+vapi_dispatch_event (vapi_ctx_t ctx, vapi_msg_id_t id, void *msg)
+{
+ if (ctx->event_cbs[id].cb)
+ {
+ return ctx->event_cbs[id].cb (ctx, ctx->event_cbs[id].ctx, msg);
+ }
+ else if (ctx->generic_cb.cb)
+ {
+ return ctx->generic_cb.cb (ctx, ctx->generic_cb.ctx, id, msg);
+ }
+ else
+ {
+ VAPI_DBG
+ ("No handler/generic handler for msg id %u[%s], message ignored",
+ (unsigned) id, __vapi_metadata.msgs[id]->name);
+ }
+ return VAPI_OK;
+}
+
+bool
+vapi_msg_is_with_context (vapi_msg_id_t id)
+{
+ assert (id <= __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->has_context;
+}
+
+vapi_error_e
+vapi_dispatch_one (vapi_ctx_t ctx)
+{
+ VAPI_DBG ("vapi_dispatch_one()");
+ void *msg;
+ size_t size;
+ vapi_error_e rv = vapi_recv (ctx, &msg, &size);
+ if (VAPI_OK != rv)
+ {
+ VAPI_DBG ("vapi_recv failed with rv=%d", rv);
+ return rv;
+ }
+ u16 vpp_id = be16toh (*(u16 *) msg);
+ if (vpp_id > ctx->vl_msg_id_max)
+ {
+ VAPI_ERR ("Unknown msg ID received, id `%u', out of range <0,%u>",
+ (unsigned) vpp_id, (unsigned) ctx->vl_msg_id_max);
+ vapi_msg_free (ctx, msg);
+ return VAPI_EINVAL;
+ }
+ if (~0 == (unsigned) ctx->vl_msg_id_to_vapi_msg_t[vpp_id])
+ {
+ VAPI_ERR ("Unknown msg ID received, id `%u' marked as not supported",
+ (unsigned) vpp_id);
+ vapi_msg_free (ctx, msg);
+ return VAPI_EINVAL;
+ }
+ const vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[vpp_id];
+ const size_t expect_size = vapi_get_message_size (id);
+ if (size < expect_size)
+ {
+ VAPI_ERR
+ ("Invalid msg received, unexpected size `%zu' < expected min `%zu'",
+ size, expect_size);
+ vapi_msg_free (ctx, msg);
+ return VAPI_EINVAL;
+ }
+ u32 context;
+ vapi_get_swap_to_host_func (id) (msg);
+ if (vapi_msg_is_with_context (id))
+ {
+ context = *(u32 *) (((u8 *) msg) + vapi_get_context_offset (id));
+ /* is this a message originating from VAPI? */
+ VAPI_DBG ("dispatch, context is %x", context);
+ if (context & context_counter_mask)
+ {
+ rv = vapi_dispatch_response (ctx, id, context, msg);
+ goto done;
+ }
+ }
+ rv = vapi_dispatch_event (ctx, id, msg);
+
+done:
+ vapi_msg_free (ctx, msg);
+ return rv;
+}
+
+vapi_error_e
+vapi_dispatch (vapi_ctx_t ctx)
+{
+ vapi_error_e rv = VAPI_OK;
+ while (!vapi_requests_empty (ctx))
+ {
+ rv = vapi_dispatch_one (ctx);
+ if (VAPI_OK != rv)
+ {
+ return rv;
+ }
+ }
+ return rv;
+}
+
+void
+vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
+ vapi_event_cb callback, void *callback_ctx)
+{
+ vapi_event_cb_with_ctx *c = &ctx->event_cbs[id];
+ c->cb = callback;
+ c->ctx = callback_ctx;
+}
+
+void
+vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+ vapi_set_event_cb (ctx, id, NULL, NULL);
+}
+
+void
+vapi_set_generic_event_cb (vapi_ctx_t ctx, vapi_generic_event_cb callback,
+ void *callback_ctx)
+{
+ ctx->generic_cb.cb = callback;
+ ctx->generic_cb.ctx = callback_ctx;
+}
+
+void
+vapi_clear_generic_event_cb (vapi_ctx_t ctx)
+{
+ ctx->generic_cb.cb = NULL;
+ ctx->generic_cb.ctx = NULL;
+}
+
+u16
+vapi_lookup_vl_msg_id (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+ assert (id < __vapi_metadata.count);
+ return ctx->vapi_msg_id_t_to_vl_msg_id[id];
+}
+
+int
+vapi_get_client_index (vapi_ctx_t ctx)
+{
+ return api_main.my_client_index;
+}
+
+bool
+vapi_is_nonblocking (vapi_ctx_t ctx)
+{
+ return (VAPI_MODE_NONBLOCKING == ctx->mode);
+}
+
+size_t
+vapi_get_max_request_count (vapi_ctx_t ctx)
+{
+ return ctx->requests_size - 1;
+}
+
+int
+vapi_get_payload_offset (vapi_msg_id_t id)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->payload_offset;
+}
+
+void (*vapi_get_swap_to_host_func (vapi_msg_id_t id)) (void *msg)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->swap_to_host;
+}
+
+void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *msg)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->swap_to_be;
+}
+
+size_t
+vapi_get_message_size (vapi_msg_id_t id)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->size;
+}
+
+size_t
+vapi_get_context_offset (vapi_msg_id_t id)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->context_offset;
+}
+
+vapi_msg_id_t
+vapi_register_msg (vapi_message_desc_t * msg)
+{
+ int i = 0;
+ for (i = 0; i < __vapi_metadata.count; ++i)
+ {
+ if (!strcmp
+ (msg->name_with_crc, __vapi_metadata.msgs[i]->name_with_crc))
+ {
+ /* this happens if somebody is linking together several objects while
+ * using the static inline headers, just fill in the already
+ * assigned id here so that all the objects are in sync */
+ msg->id = __vapi_metadata.msgs[i]->id;
+ return msg->id;
+ }
+ }
+ vapi_msg_id_t id = __vapi_metadata.count;
+ ++__vapi_metadata.count;
+ __vapi_metadata.msgs =
+ realloc (__vapi_metadata.msgs,
+ sizeof (*__vapi_metadata.msgs) * __vapi_metadata.count);
+ __vapi_metadata.msgs[id] = msg;
+ size_t s = strlen (msg->name_with_crc);
+ if (s > __vapi_metadata.max_len_name_with_crc)
+ {
+ __vapi_metadata.max_len_name_with_crc = s;
+ }
+ msg->id = id;
+ return id;
+}
+
+vapi_error_e
+vapi_producer_lock (vapi_ctx_t ctx)
+{
+ int mrv;
+ if (0 != (mrv = pthread_mutex_lock (&ctx->requests_mutex)))
+ {
+ VAPI_DBG ("pthread_mutex_lock() failed, rv=%d:%s", mrv, strerror (mrv));
+ (void) mrv; /* avoid warning if the above debug is not enabled */
+ return VAPI_MUTEX_FAILURE;
+ }
+ return VAPI_OK;
+}
+
+vapi_error_e
+vapi_producer_unlock (vapi_ctx_t ctx)
+{
+ int mrv;
+ if (0 != (mrv = pthread_mutex_unlock (&ctx->requests_mutex)))
+ {
+ VAPI_DBG ("pthread_mutex_unlock() failed, rv=%d:%s", mrv,
+ strerror (mrv));
+ (void) mrv; /* avoid warning if the above debug is not enabled */
+ return VAPI_MUTEX_FAILURE;
+ }
+ return VAPI_OK;
+}
+
+size_t
+vapi_get_message_count ()
+{
+ return __vapi_metadata.count;
+}
+
+const char *
+vapi_get_msg_name (vapi_msg_id_t id)
+{
+ return __vapi_metadata.msgs[id]->name;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi.h b/src/vpp-api/vapi/vapi.h
new file mode 100644
index 00000000..245bf654
--- /dev/null
+++ b/src/vpp-api/vapi/vapi.h
@@ -0,0 +1,263 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef vpp_api_h_included
+#define vpp_api_h_included
+
+#include <string.h>
+#include <stdbool.h>
+#include <vppinfra/types.h>
+#include <vapi/vapi_common.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @file vapi.h
+ *
+ * common vpp api C declarations
+ *
+ * This file declares the common C API functions. These include connect,
+ * disconnect and utility functions as well as the low-level vapi_send and
+ * vapi_recv API. This is only the transport layer.
+ *
+ * Message formats and higher-level APIs are generated by running the
+ * vapi_c_gen.py script (which is run for in-tree APIs as part of the build
+ * process). It's not recommended to mix the higher and lower level APIs. Due
+ * to version issues, the higher-level APIs are not part of the shared library.
+ */
+ typedef struct vapi_ctx_s *vapi_ctx_t;
+
+/**
+ * @brief allocate vapi message of given size
+ *
+ * @note message must be freed by vapi_msg_free if not consumed by vapi_send
+ * call
+ *
+ * @param ctx opaque vapi context
+ *
+ * @return pointer to message or NULL if out of memory
+ */
+ void *vapi_msg_alloc (vapi_ctx_t ctx, size_t size);
+
+/**
+ * @brief free a vapi message
+ *
+ * @note messages received by vapi_recv must be freed when no longer needed
+ *
+ * @param ctx opaque vapi context
+ * @param msg message to be freed
+ */
+ void vapi_msg_free (vapi_ctx_t ctx, void *msg);
+
+/**
+ * @brief allocate vapi context
+ *
+ * @param[out] pointer to result variable
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_ctx_alloc (vapi_ctx_t * result);
+
+/**
+ * @brief free vapi context
+ */
+ void vapi_ctx_free (vapi_ctx_t ctx);
+
+/**
+ * @brief check if message identified by it's message id is known by the vpp to
+ * which the connection is open
+ */
+ bool vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t type);
+
+/**
+ * @brief connect to vpp
+ *
+ * @param ctx opaque vapi context, must be allocated using vapi_ctx_alloc first
+ * @param name application name
+ * @param chroot_prefix shared memory prefix
+ * @param max_outstanding_requests max number of outstanding requests queued
+ * @param response_queue_size size of the response queue
+ * @param mode mode of operation - blocking or nonblocking
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_connect (vapi_ctx_t ctx, const char *name,
+ const char *chroot_prefix,
+ int max_outstanding_requests,
+ int response_queue_size, vapi_mode_e mode);
+
+/**
+ * @brief disconnect from vpp
+ *
+ * @param ctx opaque vapi context
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_disconnect (vapi_ctx_t ctx);
+
+/**
+ * @brief get event file descriptor
+ *
+ * @note this file descriptor becomes readable when messages (from vpp)
+ * are waiting in queue
+ *
+ * @param ctx opaque vapi context
+ * @param[out] fd pointer to result variable
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_get_fd (vapi_ctx_t ctx, int *fd);
+
+/**
+ * @brief low-level api for sending messages to vpp
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param msg message to send
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_send (vapi_ctx_t ctx, void *msg);
+
+/**
+ * @brief low-level api for atomically sending two messages to vpp - either
+ * both messages are sent or neither one is
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param msg1 first message to send
+ * @param msg2 second message to send
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2);
+
+/**
+ * @brief low-level api for reading messages from vpp
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param[out] msg pointer to result variable containing message
+ * @param[out] msg_size pointer to result variable containing message size
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size);
+
+/**
+ * @brief wait for connection to become readable or writable
+ *
+ * @param ctx opaque vapi context
+ * @param mode type of property to wait for - readability, writability or both
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode);
+
+/**
+ * @brief pick next message sent by vpp and call the appropriate callback
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_dispatch_one (vapi_ctx_t ctx);
+
+/**
+ * @brief loop vapi_dispatch_one until responses to all currently outstanding
+ * requests have been received and their callbacks called
+ *
+ * @note the dispatch loop is interrupted if any error is encountered or
+ * returned from the callback, in which case this error is returned as the
+ * result of vapi_dispatch. In this case it might be necessary to call dispatch
+ * again to process the remaining messages. Returning VAPI_EUSER from
+ * a callback allows the user to break the dispatch loop (and distinguish
+ * this case in the calling code from other failures). VAPI never returns
+ * VAPI_EUSER on its own.
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e vapi_dispatch (vapi_ctx_t ctx);
+
+/** generic vapi event callback */
+ typedef vapi_error_e (*vapi_event_cb) (vapi_ctx_t ctx, void *callback_ctx,
+ void *payload);
+
+/**
+ * @brief set event callback to call when message with given id is dispatched
+ *
+ * @param ctx opaque vapi context
+ * @param id message id
+ * @param callback callback
+ * @param callback_ctx context pointer stored and passed to callback
+ */
+ void vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
+ vapi_event_cb callback, void *callback_ctx);
+
+/**
+ * @brief clear event callback for given message id
+ *
+ * @param ctx opaque vapi context
+ * @param id message id
+ */
+ void vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id);
+
+/** generic vapi event callback */
+ typedef vapi_error_e (*vapi_generic_event_cb) (vapi_ctx_t ctx,
+ void *callback_ctx,
+ vapi_msg_id_t id, void *msg);
+/**
+ * @brief set generic event callback
+ *
+ * @note this callback is called by dispatch if no message-type specific
+ * callback is set (so it's a fallback callback)
+ *
+ * @param ctx opaque vapi context
+ * @param callback callback
+ * @param callback_ctx context pointer stored and passed to callback
+ */
+ void vapi_set_generic_event_cb (vapi_ctx_t ctx,
+ vapi_generic_event_cb callback,
+ void *callback_ctx);
+
+/**
+ * @brief clear generic event callback
+ *
+ * @param ctx opaque vapi context
+ */
+ void vapi_clear_generic_event_cb (vapi_ctx_t ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi.hpp b/src/vpp-api/vapi/vapi.hpp
new file mode 100644
index 00000000..3be78b41
--- /dev/null
+++ b/src/vpp-api/vapi/vapi.hpp
@@ -0,0 +1,905 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef vapi_hpp_included
+#define vapi_hpp_included
+
+#include <cstddef>
+#include <vector>
+#include <mutex>
+#include <queue>
+#include <cassert>
+#include <functional>
+#include <algorithm>
+#include <atomic>
+#include <vppinfra/types.h>
+#include <vapi/vapi.h>
+#include <vapi/vapi_internal.h>
+#include <vapi/vapi_dbg.h>
+#include <vapi/vpe.api.vapi.h>
+
+#if VAPI_CPP_DEBUG_LEAKS
+#include <unordered_set>
+#endif
+
+/**
+ * @file
+ * @brief C++ VPP API
+ */
+
+namespace vapi
+{
+
+class Connection;
+
+template <typename Req, typename Resp, typename... Args> class Request;
+template <typename M> class Msg;
+template <typename M> void vapi_swap_to_be (M *msg);
+template <typename M> void vapi_swap_to_host (M *msg);
+template <typename M, typename... Args>
+M *vapi_alloc (Connection &con, Args...);
+template <typename M> vapi_msg_id_t vapi_get_msg_id_t ();
+template <typename M> class Event_registration;
+
+class Unexpected_msg_id_exception : public std::exception
+{
+public:
+ virtual const char *what () const throw ()
+ {
+ return "unexpected message id";
+ }
+};
+
+class Msg_not_available_exception : public std::exception
+{
+public:
+ virtual const char *what () const throw ()
+ {
+ return "message unavailable";
+ }
+};
+
+typedef enum {
+ /** response not ready yet */
+ RESPONSE_NOT_READY,
+
+ /** response to request is ready */
+ RESPONSE_READY,
+
+ /** no response to request (will never come) */
+ RESPONSE_NO_RESPONSE,
+} vapi_response_state_e;
+
+/**
+ * Class representing common functionality of a request - response state
+ * and context
+ */
+class Common_req
+{
+public:
+ virtual ~Common_req (){};
+
+ Connection &get_connection ()
+ {
+ return con;
+ };
+
+ vapi_response_state_e get_response_state (void) const
+ {
+ return response_state;
+ }
+
+private:
+ Connection &con;
+ Common_req (Connection &con) : con{con}, response_state{RESPONSE_NOT_READY}
+ {
+ }
+
+ void set_response_state (vapi_response_state_e state)
+ {
+ response_state = state;
+ }
+
+ virtual std::tuple<vapi_error_e, bool> assign_response (vapi_msg_id_t id,
+ void *shm_data) = 0;
+
+ void set_context (u32 context)
+ {
+ this->context = context;
+ }
+
+ u32 get_context ()
+ {
+ return context;
+ }
+
+ u32 context;
+ vapi_response_state_e response_state;
+
+ friend class Connection;
+
+ template <typename M> friend class Msg;
+
+ template <typename Req, typename Resp, typename... Args>
+ friend class Request;
+
+ template <typename Req, typename Resp, typename... Args> friend class Dump;
+
+ template <typename M> friend class Event_registration;
+};
+
+/**
+ * Class representing a connection to VPP
+ *
+ * After creating a Connection object, call connect() to actually connect
+ * to VPP. Use is_msg_available to discover whether a specific message is known
+ * and supported by the VPP connected to.
+ */
+class Connection
+{
+public:
+ Connection (void) : vapi_ctx{0}, event_count{0}
+ {
+
+ vapi_error_e rv = VAPI_OK;
+ if (!vapi_ctx)
+ {
+ if (VAPI_OK != (rv = vapi_ctx_alloc (&vapi_ctx)))
+ {
+ throw std::bad_alloc ();
+ }
+ }
+ events.reserve (vapi_get_message_count () + 1);
+ }
+
+ Connection (const Connection &) = delete;
+
+ ~Connection (void)
+ {
+ vapi_ctx_free (vapi_ctx);
+#if VAPI_CPP_DEBUG_LEAKS
+ for (auto x : shm_data_set)
+ {
+ printf ("Leaked shm_data@%p!\n", x);
+ }
+#endif
+ }
+
+ /**
+ * @brief check if message identified by it's message id is known by the
+ * vpp to which the connection is open
+ */
+ bool is_msg_available (vapi_msg_id_t type)
+ {
+ return vapi_is_msg_available (vapi_ctx, type);
+ }
+
+ /**
+ * @brief connect to vpp
+ *
+ * @param name application name
+ * @param chroot_prefix shared memory prefix
+ * @param max_queued_request max number of outstanding requests queued
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e connect (const char *name, const char *chroot_prefix,
+ int max_outstanding_requests, int response_queue_size)
+ {
+ return vapi_connect (vapi_ctx, name, chroot_prefix,
+ max_outstanding_requests, response_queue_size,
+ VAPI_MODE_BLOCKING);
+ }
+
+ /**
+ * @brief disconnect from vpp
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e disconnect ()
+ {
+ auto x = requests.size ();
+ while (x > 0)
+ {
+ VAPI_DBG ("popping request @%p", requests.front ());
+ requests.pop_front ();
+ --x;
+ }
+ return vapi_disconnect (vapi_ctx);
+ };
+
+ /**
+ * @brief get event file descriptor
+ *
+ * @note this file descriptor becomes readable when messages (from vpp)
+ * are waiting in queue
+ *
+ * @param[out] fd pointer to result variable
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e get_fd (int *fd)
+ {
+ return vapi_get_fd (vapi_ctx, fd);
+ }
+
+ /**
+ * @brief wait for responses from vpp and assign them to appropriate objects
+ *
+ * @param limit stop dispatch after the limit object received it's response
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e dispatch (const Common_req *limit = nullptr)
+ {
+ std::lock_guard<std::mutex> lock (dispatch_mutex);
+ vapi_error_e rv = VAPI_OK;
+ bool loop_again = true;
+ while (loop_again)
+ {
+ void *shm_data;
+ size_t shm_data_size;
+ rv = vapi_recv (vapi_ctx, &shm_data, &shm_data_size);
+ if (VAPI_OK != rv)
+ {
+ return rv;
+ }
+#if VAPI_CPP_DEBUG_LEAKS
+ on_shm_data_alloc (shm_data);
+#endif
+ std::lock_guard<std::recursive_mutex> requests_lock (requests_mutex);
+ std::lock_guard<std::recursive_mutex> events_lock (events_mutex);
+ vapi_msg_id_t id = vapi_lookup_vapi_msg_id_t (
+ vapi_ctx, be16toh (*static_cast<u16 *> (shm_data)));
+ bool has_context = vapi_msg_is_with_context (id);
+ bool break_dispatch = false;
+ Common_req *matching_req = nullptr;
+ if (has_context)
+ {
+ u32 context = *reinterpret_cast<u32 *> (
+ (static_cast<u8 *> (shm_data) + vapi_get_context_offset (id)));
+ const auto x = requests.front ();
+ matching_req = x;
+ if (context == x->context)
+ {
+ std::tie (rv, break_dispatch) =
+ x->assign_response (id, shm_data);
+ }
+ else
+ {
+ std::tie (rv, break_dispatch) =
+ x->assign_response (id, nullptr);
+ }
+ if (break_dispatch)
+ {
+ requests.pop_front ();
+ }
+ }
+ else
+ {
+ if (events[id])
+ {
+ std::tie (rv, break_dispatch) =
+ events[id]->assign_response (id, shm_data);
+ matching_req = events[id];
+ }
+ else
+ {
+ msg_free (shm_data);
+ }
+ }
+ if ((matching_req && matching_req == limit && break_dispatch) ||
+ VAPI_OK != rv)
+ {
+ return rv;
+ }
+ loop_again = !requests.empty () || (event_count > 0);
+ }
+ return rv;
+ }
+
+ /**
+ * @brief convenience wrapper function
+ */
+ vapi_error_e dispatch (const Common_req &limit)
+ {
+ return dispatch (&limit);
+ }
+
+ /**
+ * @brief wait for response to a specific request
+ *
+ * @param req request to wait for response for
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+ vapi_error_e wait_for_response (const Common_req &req)
+ {
+ if (RESPONSE_READY == req.get_response_state ())
+ {
+ return VAPI_OK;
+ }
+ return dispatch (req);
+ }
+
+private:
+ void msg_free (void *shm_data)
+ {
+#if VAPI_CPP_DEBUG_LEAKS
+ on_shm_data_free (shm_data);
+#endif
+ vapi_msg_free (vapi_ctx, shm_data);
+ }
+
+ template <template <typename XReq, typename XResp, typename... XArgs>
+ class X,
+ typename Req, typename Resp, typename... Args>
+ vapi_error_e send (X<Req, Resp, Args...> *req)
+ {
+ if (!req)
+ {
+ return VAPI_EINVAL;
+ }
+ u32 req_context =
+ req_context_counter.fetch_add (1, std::memory_order_relaxed);
+ req->request.shm_data->header.context = req_context;
+ vapi_swap_to_be<Req> (req->request.shm_data);
+ std::lock_guard<std::recursive_mutex> lock (requests_mutex);
+ vapi_error_e rv = vapi_send (vapi_ctx, req->request.shm_data);
+ if (VAPI_OK == rv)
+ {
+ VAPI_DBG ("Push %p", req);
+ requests.emplace_back (req);
+ req->set_context (req_context);
+#if VAPI_CPP_DEBUG_LEAKS
+ on_shm_data_free (req->request.shm_data);
+#endif
+ req->request.shm_data = nullptr; /* consumed by vapi_send */
+ }
+ else
+ {
+ vapi_swap_to_host<Req> (req->request.shm_data);
+ }
+ return rv;
+ }
+
+ template <template <typename XReq, typename XResp, typename... XArgs>
+ class X,
+ typename Req, typename Resp, typename... Args>
+ vapi_error_e send_with_control_ping (X<Req, Resp, Args...> *req)
+ {
+ if (!req)
+ {
+ return VAPI_EINVAL;
+ }
+ u32 req_context =
+ req_context_counter.fetch_add (1, std::memory_order_relaxed);
+ req->request.shm_data->header.context = req_context;
+ vapi_swap_to_be<Req> (req->request.shm_data);
+ std::lock_guard<std::recursive_mutex> lock (requests_mutex);
+ vapi_error_e rv = vapi_send_with_control_ping (
+ vapi_ctx, req->request.shm_data, req_context);
+ if (VAPI_OK == rv)
+ {
+ VAPI_DBG ("Push %p", req);
+ requests.emplace_back (req);
+ req->set_context (req_context);
+#if VAPI_CPP_DEBUG_LEAKS
+ on_shm_data_free (req->request.shm_data);
+#endif
+ req->request.shm_data = nullptr; /* consumed by vapi_send */
+ }
+ else
+ {
+ vapi_swap_to_host<Req> (req->request.shm_data);
+ }
+ return rv;
+ }
+
+ void unregister_request (Common_req *request)
+ {
+ std::lock_guard<std::recursive_mutex> lock (requests_mutex);
+ std::remove (requests.begin (), requests.end (), request);
+ }
+
+ template <typename M> void register_event (Event_registration<M> *event)
+ {
+ const vapi_msg_id_t id = M::get_msg_id ();
+ std::lock_guard<std::recursive_mutex> lock (events_mutex);
+ events[id] = event;
+ ++event_count;
+ }
+
+ template <typename M> void unregister_event (Event_registration<M> *event)
+ {
+ const vapi_msg_id_t id = M::get_msg_id ();
+ std::lock_guard<std::recursive_mutex> lock (events_mutex);
+ events[id] = nullptr;
+ --event_count;
+ }
+
+ vapi_ctx_t vapi_ctx;
+ std::atomic_ulong req_context_counter;
+ std::mutex dispatch_mutex;
+
+ std::recursive_mutex requests_mutex;
+ std::recursive_mutex events_mutex;
+ std::deque<Common_req *> requests;
+ std::vector<Common_req *> events;
+ int event_count;
+
+ template <typename Req, typename Resp, typename... Args>
+ friend class Request;
+
+ template <typename Req, typename Resp, typename... Args> friend class Dump;
+
+ template <typename M> friend class Result_set;
+
+ template <typename M> friend class Event_registration;
+
+ template <typename M, typename... Args>
+ friend M *vapi_alloc (Connection &con, Args...);
+
+ template <typename M> friend class Msg;
+
+#if VAPI_CPP_DEBUG_LEAKS
+ void on_shm_data_alloc (void *shm_data)
+ {
+ if (shm_data)
+ {
+ auto pos = shm_data_set.find (shm_data);
+ if (pos == shm_data_set.end ())
+ {
+ shm_data_set.insert (shm_data);
+ }
+ else
+ {
+ printf ("Double-add shm_data @%p!\n", shm_data);
+ }
+ }
+ }
+
+ void on_shm_data_free (void *shm_data)
+ {
+ auto pos = shm_data_set.find (shm_data);
+ if (pos == shm_data_set.end ())
+ {
+ printf ("Freeing untracked shm_data @%p!\n", shm_data);
+ }
+ else
+ {
+ shm_data_set.erase (pos);
+ }
+ }
+ std::unordered_set<void *> shm_data_set;
+#endif
+};
+
+template <typename Req, typename Resp, typename... Args> class Request;
+
+template <typename Req, typename Resp, typename... Args> class Dump;
+
+template <class, class = void> struct vapi_has_payload_trait : std::false_type
+{
+};
+
+template <class... T> using vapi_void_t = void;
+
+template <class T>
+struct vapi_has_payload_trait<T, vapi_void_t<decltype (&T::payload)>>
+ : std::true_type
+{
+};
+
+template <typename M> void vapi_msg_set_msg_id (vapi_msg_id_t id)
+{
+ Msg<M>::set_msg_id (id);
+}
+
+/**
+ * Class representing a message stored in shared memory
+ */
+template <typename M> class Msg
+{
+public:
+ Msg (const Msg &) = delete;
+
+ ~Msg ()
+ {
+ VAPI_DBG ("Destroy Msg<%s>@%p, shm_data@%p",
+ vapi_get_msg_name (get_msg_id ()), this, shm_data);
+ if (shm_data)
+ {
+ con.get ().msg_free (shm_data);
+ shm_data = nullptr;
+ }
+ }
+
+ static vapi_msg_id_t get_msg_id ()
+ {
+ return *msg_id_holder ();
+ }
+
+ template <typename X = M>
+ typename std::enable_if<vapi_has_payload_trait<X>::value,
+ decltype (X::payload) &>::type
+ get_payload () const
+ {
+ return shm_data->payload;
+ }
+
+private:
+ Msg (Msg<M> &&msg) : con{msg.con}
+ {
+ VAPI_DBG ("Move construct Msg<%s> from msg@%p to msg@%p, shm_data@%p",
+ vapi_get_msg_name (get_msg_id ()), &msg, this, msg.shm_data);
+ shm_data = msg.shm_data;
+ msg.shm_data = nullptr;
+ }
+
+ Msg<M> &operator= (Msg<M> &&msg)
+ {
+ VAPI_DBG ("Move assign Msg<%s> from msg@%p to msg@%p, shm_data@%p",
+ vapi_get_msg_name (get_msg_id ()), &msg, this, msg.shm_data);
+ con.get ().msg_free (shm_data);
+ con = msg.con;
+ shm_data = msg.shm_data;
+ msg.shm_data = nullptr;
+ return *this;
+ }
+
+ struct Msg_allocator : std::allocator<Msg<M>>
+ {
+ template <class U, class... Args> void construct (U *p, Args &&... args)
+ {
+ ::new ((void *)p) U (std::forward<Args> (args)...);
+ }
+
+ template <class U> struct rebind
+ {
+ typedef Msg_allocator other;
+ };
+ };
+
+ static void set_msg_id (vapi_msg_id_t id)
+ {
+ assert ((~0 == *msg_id_holder ()) || (id == *msg_id_holder ()));
+ *msg_id_holder () = id;
+ }
+
+ static vapi_msg_id_t *msg_id_holder ()
+ {
+ static vapi_msg_id_t my_id{~0};
+ return &my_id;
+ }
+
+ Msg (Connection &con, void *shm_data) throw (Msg_not_available_exception)
+ : con{con}
+ {
+ if (!con.is_msg_available (get_msg_id ()))
+ {
+ throw Msg_not_available_exception ();
+ }
+ this->shm_data = static_cast<shm_data_type *> (shm_data);
+ VAPI_DBG ("New Msg<%s>@%p shm_data@%p", vapi_get_msg_name (get_msg_id ()),
+ this, shm_data);
+ }
+
+ void assign_response (vapi_msg_id_t resp_id,
+ void *shm_data) throw (Unexpected_msg_id_exception)
+ {
+ assert (nullptr == this->shm_data);
+ if (resp_id != get_msg_id ())
+ {
+ throw Unexpected_msg_id_exception ();
+ }
+ this->shm_data = static_cast<M *> (shm_data);
+ vapi_swap_to_host<M> (this->shm_data);
+ VAPI_DBG ("Assign response to Msg<%s>@%p shm_data@%p",
+ vapi_get_msg_name (get_msg_id ()), this, shm_data);
+ }
+
+ std::reference_wrapper<Connection> con;
+ using shm_data_type = M;
+ shm_data_type *shm_data;
+
+ friend class Connection;
+
+ template <typename Req, typename Resp, typename... Args>
+ friend class Request;
+
+ template <typename Req, typename Resp, typename... Args> friend class Dump;
+
+ template <typename X> friend class Event_registration;
+
+ template <typename X> friend class Result_set;
+
+ friend struct Msg_allocator;
+
+ template <typename X> friend void vapi_msg_set_msg_id (vapi_msg_id_t id);
+};
+
+/**
+ * Class representing a simple request - with a single response message
+ */
+template <typename Req, typename Resp, typename... Args>
+class Request : public Common_req
+{
+public:
+ Request (Connection &con, Args... args,
+ std::function<vapi_error_e (Request<Req, Resp, Args...> &)>
+ callback = nullptr)
+ : Common_req{con}, callback{callback},
+ request{con, vapi_alloc<Req> (con, args...)}, response{con, nullptr}
+ {
+ }
+
+ Request (const Request &) = delete;
+
+ virtual ~Request ()
+ {
+ if (RESPONSE_NOT_READY == get_response_state ())
+ {
+ con.unregister_request (this);
+ }
+ }
+
+ vapi_error_e execute ()
+ {
+ return con.send (this);
+ }
+
+ const Msg<Req> &get_request (void) const
+ {
+ return request;
+ }
+
+ const Msg<Resp> &get_response (void)
+ {
+ return response;
+ }
+
+private:
+ virtual std::tuple<vapi_error_e, bool> assign_response (vapi_msg_id_t id,
+ void *shm_data)
+ {
+ assert (RESPONSE_NOT_READY == get_response_state ());
+ response.assign_response (id, shm_data);
+ set_response_state (RESPONSE_READY);
+ if (nullptr != callback)
+ {
+ return std::make_pair (callback (*this), true);
+ }
+ return std::make_pair (VAPI_OK, true);
+ }
+ std::function<vapi_error_e (Request<Req, Resp, Args...> &)> callback;
+ Msg<Req> request;
+ Msg<Resp> response;
+
+ friend class Connection;
+};
+
+/**
+ * Class representing iterable set of responses of the same type
+ */
+template <typename M> class Result_set
+{
+public:
+ ~Result_set ()
+ {
+ }
+
+ Result_set (const Result_set &) = delete;
+
+ bool is_complete () const
+ {
+ return complete;
+ }
+
+ size_t size () const
+ {
+ return set.size ();
+ }
+
+ using const_iterator =
+ typename std::vector<Msg<M>,
+ typename Msg<M>::Msg_allocator>::const_iterator;
+
+ const_iterator begin () const
+ {
+ return set.begin ();
+ }
+
+ const_iterator end () const
+ {
+ return set.end ();
+ }
+
+ void free_response (const_iterator pos)
+ {
+ set.erase (pos);
+ }
+
+ void free_all_responses ()
+ {
+ set.clear ();
+ }
+
+private:
+ void mark_complete ()
+ {
+ complete = true;
+ }
+
+ void assign_response (vapi_msg_id_t resp_id,
+ void *shm_data) throw (Unexpected_msg_id_exception)
+ {
+ if (resp_id != Msg<M>::get_msg_id ())
+ {
+ {
+ throw Unexpected_msg_id_exception ();
+ }
+ }
+ else if (shm_data)
+ {
+ vapi_swap_to_host<M> (static_cast<M *> (shm_data));
+ set.emplace_back (con, shm_data);
+ VAPI_DBG ("Result_set@%p emplace_back shm_data@%p", this, shm_data);
+ }
+ }
+
+ Result_set (Connection &con) : con{con}, complete{false}
+ {
+ }
+
+ Connection &con;
+ bool complete;
+ std::vector<Msg<M>, typename Msg<M>::Msg_allocator> set;
+
+ template <typename Req, typename Resp, typename... Args> friend class Dump;
+
+ template <typename X> friend class Event_registration;
+};
+
+/**
+ * Class representing a dump request - zero or more identical responses to a
+ * single request message
+ */
+template <typename Req, typename Resp, typename... Args>
+class Dump : public Common_req
+{
+public:
+ Dump (Connection &con, Args... args,
+ std::function<vapi_error_e (Dump<Req, Resp, Args...> &)> callback =
+ nullptr)
+ : Common_req{con}, request{con, vapi_alloc<Req> (con, args...)},
+ result_set{con}, callback{callback}
+ {
+ }
+
+ Dump (const Dump &) = delete;
+
+ virtual ~Dump ()
+ {
+ }
+
+ virtual std::tuple<vapi_error_e, bool> assign_response (vapi_msg_id_t id,
+ void *shm_data)
+ {
+ if (id == vapi_msg_id_control_ping_reply)
+ {
+ con.msg_free (shm_data);
+ result_set.mark_complete ();
+ set_response_state (RESPONSE_READY);
+ if (nullptr != callback)
+ {
+ return std::make_pair (callback (*this), true);
+ }
+ return std::make_pair (VAPI_OK, true);
+ }
+ else
+ {
+ result_set.assign_response (id, shm_data);
+ }
+ return std::make_pair (VAPI_OK, false);
+ }
+
+ vapi_error_e execute ()
+ {
+ return con.send_with_control_ping (this);
+ }
+
+ Msg<Req> &get_request (void)
+ {
+ return request;
+ }
+
+ using resp_type = typename Msg<Resp>::shm_data_type;
+
+ const Result_set<Resp> &get_result_set (void) const
+ {
+ return result_set;
+ }
+
+private:
+ Msg<Req> request;
+ Result_set<resp_type> result_set;
+ std::function<vapi_error_e (Dump<Req, Resp, Args...> &)> callback;
+
+ friend class Connection;
+};
+
+/**
+ * Class representing event registration - incoming events (messages) from
+ * vpp as a result of a subscription (typically a want_* simple request)
+ */
+template <typename M> class Event_registration : public Common_req
+{
+public:
+ Event_registration (
+ Connection &con,
+ std::function<vapi_error_e (Event_registration<M> &)> callback =
+ nullptr) throw (Msg_not_available_exception)
+ : Common_req{con}, result_set{con}, callback{callback}
+ {
+ if (!con.is_msg_available (M::get_msg_id ()))
+ {
+ throw Msg_not_available_exception ();
+ }
+ con.register_event (this);
+ }
+
+ Event_registration (const Event_registration &) = delete;
+
+ virtual ~Event_registration ()
+ {
+ con.unregister_event (this);
+ }
+
+ virtual std::tuple<vapi_error_e, bool> assign_response (vapi_msg_id_t id,
+ void *shm_data)
+ {
+ result_set.assign_response (id, shm_data);
+ if (nullptr != callback)
+ {
+ return std::make_pair (callback (*this), true);
+ }
+ return std::make_pair (VAPI_OK, true);
+ }
+
+ using resp_type = typename M::shm_data_type;
+
+ Result_set<resp_type> &get_result_set (void)
+ {
+ return result_set;
+ }
+
+private:
+ Result_set<resp_type> result_set;
+ std::function<vapi_error_e (Event_registration<M> &)> callback;
+};
+};
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi_c_gen.py b/src/vpp-api/vapi/vapi_c_gen.py
new file mode 100755
index 00000000..d7a7272a
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_c_gen.py
@@ -0,0 +1,693 @@
+#!/usr/bin/env python2
+
+import argparse
+import os
+import sys
+import logging
+from vapi_json_parser import Field, Struct, Message, JsonParser,\
+ SimpleType, StructType
+
+
+class CField(Field):
+ def __init__(
+ self,
+ field_name,
+ field_type,
+ array_len=None,
+ nelem_field=None):
+ super(CField, self).__init__(
+ field_name, field_type, array_len, nelem_field)
+
+ def get_c_def(self):
+ if self.len is not None:
+ return "%s %s[%d]" % (self.type.get_c_name(), self.name, self.len)
+ else:
+ return "%s %s" % (self.type.get_c_name(), self.name)
+
+ def get_swap_to_be_code(self, struct, var):
+ if self.len is not None:
+ if self.len > 0:
+ return "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"\
+ " while(0);" % (
+ self.len,
+ self.type.get_swap_to_be_code(struct, "%s[i]" % var))
+ else:
+ if self.nelem_field.needs_byte_swap():
+ nelem_field = "%s(%s%s)" % (
+ self.nelem_field.type.get_swap_to_host_func_name(),
+ struct, self.nelem_field.name)
+ else:
+ nelem_field = "%s%s" % (struct, self.nelem_field.name)
+ return (
+ "do { unsigned i; for (i = 0; i < %s; ++i) { %s } }"
+ " while(0);" %
+ (nelem_field, self.type.get_swap_to_be_code(
+ struct, "%s[i]" % var)))
+ return self.type.get_swap_to_be_code(struct, "%s" % var)
+
+ def get_swap_to_host_code(self, struct, var):
+ if self.len is not None:
+ if self.len > 0:
+ return "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"\
+ " while(0);" % (
+ self.len,
+ self.type.get_swap_to_host_code(struct, "%s[i]" % var))
+ else:
+ # nelem_field already swapped to host here...
+ return (
+ "do { unsigned i; for (i = 0; i < %s%s; ++i) { %s } }"
+ " while(0);" %
+ (struct, self.nelem_field.name,
+ self.type.get_swap_to_host_code(
+ struct, "%s[i]" % var)))
+ return self.type.get_swap_to_host_code(struct, "%s" % var)
+
+ def needs_byte_swap(self):
+ return self.type.needs_byte_swap()
+
+
+class CStruct(Struct):
+ def __init__(self, name, fields):
+ super(CStruct, self).__init__(name, fields)
+
+ def get_c_def(self):
+ return "\n".join([
+ "typedef struct __attribute__((__packed__)) {",
+ "%s;" % ";\n".join([" %s" % x.get_c_def()
+ for x in self.fields]),
+ "} %s;" % self.get_c_name()])
+
+
+class CSimpleType (SimpleType):
+
+ swap_to_be_dict = {
+ 'i16': 'htobe16', 'u16': 'htobe16',
+ 'i32': 'htobe32', 'u32': 'htobe32',
+ 'i64': 'htobe64', 'u64': 'htobe64',
+ }
+
+ swap_to_host_dict = {
+ 'i16': 'be16toh', 'u16': 'be16toh',
+ 'i32': 'be32toh', 'u32': 'be32toh',
+ 'i64': 'be64toh', 'u64': 'be64toh',
+ }
+
+ def __init__(self, name):
+ super(CSimpleType, self).__init__(name)
+
+ def get_c_name(self):
+ return self.name
+
+ def get_swap_to_be_func_name(self):
+ return self.swap_to_be_dict[self.name]
+
+ def get_swap_to_host_func_name(self):
+ return self.swap_to_host_dict[self.name]
+
+ def get_swap_to_be_code(self, struct, var):
+ x = "%s%s" % (struct, var)
+ return "%s = %s(%s);" % (x, self.get_swap_to_be_func_name(), x)
+
+ def get_swap_to_host_code(self, struct, var):
+ x = "%s%s" % (struct, var)
+ return "%s = %s(%s);" % (x, self.get_swap_to_host_func_name(), x)
+
+ def needs_byte_swap(self):
+ try:
+ self.get_swap_to_host_func_name()
+ return True
+ except:
+ pass
+ return False
+
+
+class CStructType (StructType, CStruct):
+ def __init__(self, definition, typedict, field_class):
+ super(CStructType, self).__init__(definition, typedict, field_class)
+
+ def get_c_name(self):
+ return "vapi_type_%s" % self.name
+
+ def get_swap_to_be_func_name(self):
+ return "%s_hton" % self.get_c_name()
+
+ def get_swap_to_host_func_name(self):
+ return "%s_ntoh" % self.get_c_name()
+
+ def get_swap_to_be_func_decl(self):
+ return "void %s(%s *msg)" % (
+ self.get_swap_to_be_func_name(), self.get_c_name())
+
+ def get_swap_to_be_func_def(self):
+ return "%s\n{\n%s\n}" % (
+ self.get_swap_to_be_func_decl(),
+ "\n".join([
+ " %s" % p.get_swap_to_be_code("msg->", "%s" % p.name)
+ for p in self.fields if p.needs_byte_swap()]),
+ )
+
+ def get_swap_to_host_func_decl(self):
+ return "void %s(%s *msg)" % (
+ self.get_swap_to_host_func_name(), self.get_c_name())
+
+ def get_swap_to_host_func_def(self):
+ return "%s\n{\n%s\n}" % (
+ self.get_swap_to_host_func_decl(),
+ "\n".join([
+ " %s" % p.get_swap_to_host_code("msg->", "%s" % p.name)
+ for p in self.fields if p.needs_byte_swap()]),
+ )
+
+ def get_swap_to_be_code(self, struct, var):
+ return "%s(&%s%s);" % (self.get_swap_to_be_func_name(), struct, var)
+
+ def get_swap_to_host_code(self, struct, var):
+ return "%s(&%s%s);" % (self.get_swap_to_host_func_name(), struct, var)
+
+ def needs_byte_swap(self):
+ for f in self.fields:
+ if f.needs_byte_swap():
+ return True
+ return False
+
+
+class CMessage (Message):
+ def __init__(self, logger, definition, typedict,
+ struct_type_class, simple_type_class, field_class):
+ super(CMessage, self).__init__(logger, definition, typedict,
+ struct_type_class, simple_type_class,
+ field_class)
+ self.payload_members = [
+ " %s" % p.get_c_def()
+ for p in self.fields
+ if p.type != self.header
+ ]
+
+ def has_payload(self):
+ return len(self.payload_members) > 0
+
+ def get_msg_id_name(self):
+ return "vapi_msg_id_%s" % self.name
+
+ def get_c_name(self):
+ return "vapi_msg_%s" % self.name
+
+ def get_payload_struct_name(self):
+ return "vapi_payload_%s" % self.name
+
+ def get_alloc_func_vla_field_length_name(self, field):
+ return "%s_array_size" % field.name
+
+ def get_alloc_func_name(self):
+ return "vapi_alloc_%s" % self.name
+
+ def get_alloc_vla_param_names(self):
+ return [self.get_alloc_func_vla_field_length_name(f)
+ for f in self.fields
+ if f.nelem_field is not None]
+
+ def get_alloc_func_decl(self):
+ return "%s* %s(struct vapi_ctx_s *ctx%s)" % (
+ self.get_c_name(),
+ self.get_alloc_func_name(),
+ "".join([", size_t %s" % n for n in
+ self.get_alloc_vla_param_names()]))
+
+ def get_alloc_func_def(self):
+ extra = []
+ if self.header.has_field('client_index'):
+ extra.append(
+ " msg->header.client_index = vapi_get_client_index(ctx);")
+ if self.header.has_field('context'):
+ extra.append(" msg->header.context = 0;")
+ return "\n".join([
+ "%s" % self.get_alloc_func_decl(),
+ "{",
+ " %s *msg = NULL;" % self.get_c_name(),
+ " const size_t size = sizeof(%s)%s;" % (
+ self.get_c_name(),
+ "".join([
+ " + sizeof(msg->payload.%s[0]) * %s" % (
+ f.name,
+ self.get_alloc_func_vla_field_length_name(f))
+ for f in self.fields
+ if f.nelem_field is not None
+ ])),
+ " /* cast here required to play nicely with C++ world ... */",
+ " msg = (%s*)vapi_msg_alloc(ctx, size);" % self.get_c_name(),
+ " if (!msg) {",
+ " return NULL;",
+ " }",
+ ] + extra + [
+ " msg->header._vl_msg_id = vapi_lookup_vl_msg_id(ctx, %s);" %
+ self.get_msg_id_name(),
+ "\n".join([" msg->payload.%s = %s;" % (
+ f.nelem_field.name,
+ self.get_alloc_func_vla_field_length_name(f))
+ for f in self.fields
+ if f.nelem_field is not None]),
+ " return msg;",
+ "}"])
+
+ def get_calc_msg_size_func_name(self):
+ return "vapi_calc_%s_msg_size" % self.name
+
+ def get_calc_msg_size_func_decl(self):
+ return "uword %s(%s *msg)" % (
+ self.get_calc_msg_size_func_name(),
+ self.get_c_name())
+
+ def get_calc_msg_size_func_def(self):
+ return "\n".join([
+ "%s" % self.get_calc_msg_size_func_decl(),
+ "{",
+ " return sizeof(*msg)%s;" %
+ "".join(["+ msg->payload.%s * sizeof(msg->payload.%s[0])" % (
+ f.nelem_field.name,
+ f.name)
+ for f in self.fields
+ if f.nelem_field is not None
+ ]),
+ "}",
+ ])
+
+ def get_c_def(self):
+ if self.has_payload():
+ return "\n".join([
+ "typedef struct __attribute__ ((__packed__)) {",
+ "%s; " %
+ ";\n".join(self.payload_members),
+ "} %s;" % self.get_payload_struct_name(),
+ "",
+ "typedef struct __attribute__ ((__packed__)) {",
+ (" %s %s;" % (self.header.get_c_name(),
+ self.fields[0].name)
+ if self.header is not None else ""),
+ " %s payload;" % self.get_payload_struct_name(),
+ "} %s;" % self.get_c_name(), ])
+ else:
+ return "\n".join([
+ "typedef struct __attribute__ ((__packed__)) {",
+ (" %s %s;" % (self.header.get_c_name(),
+ self.fields[0].name)
+ if self.header is not None else ""),
+ "} %s;" % self.get_c_name(), ])
+
+ def get_swap_payload_to_host_func_name(self):
+ return "%s_payload_ntoh" % self.get_c_name()
+
+ def get_swap_payload_to_be_func_name(self):
+ return "%s_payload_hton" % self.get_c_name()
+
+ def get_swap_payload_to_host_func_decl(self):
+ return "void %s(%s *payload)" % (
+ self.get_swap_payload_to_host_func_name(),
+ self.get_payload_struct_name())
+
+ def get_swap_payload_to_be_func_decl(self):
+ return "void %s(%s *payload)" % (
+ self.get_swap_payload_to_be_func_name(),
+ self.get_payload_struct_name())
+
+ def get_swap_payload_to_be_func_def(self):
+ return "%s\n{\n%s\n}" % (
+ self.get_swap_payload_to_be_func_decl(),
+ "\n".join([
+ " %s" % p.get_swap_to_be_code("payload->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap() and p.type != self.header]),
+ )
+
+ def get_swap_payload_to_host_func_def(self):
+ return "%s\n{\n%s\n}" % (
+ self.get_swap_payload_to_host_func_decl(),
+ "\n".join([
+ " %s" % p.get_swap_to_host_code("payload->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap() and p.type != self.header]),
+ )
+
+ def get_swap_to_host_func_name(self):
+ return "%s_ntoh" % self.get_c_name()
+
+ def get_swap_to_be_func_name(self):
+ return "%s_hton" % self.get_c_name()
+
+ def get_swap_to_host_func_decl(self):
+ return "void %s(%s *msg)" % (
+ self.get_swap_to_host_func_name(), self.get_c_name())
+
+ def get_swap_to_be_func_decl(self):
+ return "void %s(%s *msg)" % (
+ self.get_swap_to_be_func_name(), self.get_c_name())
+
+ def get_swap_to_be_func_def(self):
+ return "\n".join([
+ "%s" % self.get_swap_to_be_func_decl(),
+ "{",
+ (" VAPI_DBG(\"Swapping `%s'@%%p to big endian\", msg);" %
+ self.get_c_name()),
+ " %s(&msg->header);" % self.header.get_swap_to_be_func_name()
+ if self.header is not None else "",
+ " %s(&msg->payload);" % self.get_swap_payload_to_be_func_name()
+ if self.has_payload() else "",
+ "}",
+ ])
+
+ def get_swap_to_host_func_def(self):
+ return "\n".join([
+ "%s" % self.get_swap_to_host_func_decl(),
+ "{",
+ (" VAPI_DBG(\"Swapping `%s'@%%p to host byte order\", msg);" %
+ self.get_c_name()),
+ " %s(&msg->header);" % self.header.get_swap_to_host_func_name()
+ if self.header is not None else "",
+ " %s(&msg->payload);" % self.get_swap_payload_to_host_func_name()
+ if self.has_payload() else "",
+ "}",
+ ])
+
+ def get_op_func_name(self):
+ return "vapi_%s" % self.name
+
+ def get_op_func_decl(self):
+ if self.reply.has_payload():
+ return "vapi_error_e %s(%s)" % (
+ self.get_op_func_name(),
+ ",\n ".join([
+ 'struct vapi_ctx_s *ctx',
+ '%s *msg' % self.get_c_name(),
+ 'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
+ ' void *callback_ctx',
+ ' vapi_error_e rv',
+ ' bool is_last',
+ ' %s *reply)' %
+ self.reply.get_payload_struct_name(),
+ 'void *callback_ctx',
+ ])
+ )
+ else:
+ return "vapi_error_e %s(%s)" % (
+ self.get_op_func_name(),
+ ",\n ".join([
+ 'struct vapi_ctx_s *ctx',
+ '%s *msg' % self.get_c_name(),
+ 'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
+ ' void *callback_ctx',
+ ' vapi_error_e rv',
+ ' bool is_last)',
+ 'void *callback_ctx',
+ ])
+ )
+
+ def get_op_func_def(self):
+ return "\n".join([
+ "%s" % self.get_op_func_decl(),
+ "{",
+ " if (!msg || !callback) {",
+ " return VAPI_EINVAL;",
+ " }",
+ " if (vapi_is_nonblocking(ctx) && vapi_requests_full(ctx)) {",
+ " return VAPI_EAGAIN;",
+ " }",
+ " vapi_error_e rv;",
+ " if (VAPI_OK != (rv = vapi_producer_lock (ctx))) {",
+ " return rv;",
+ " }",
+ " u32 req_context = vapi_gen_req_context(ctx);",
+ " msg->header.context = req_context;",
+ " %s(msg);" % self.get_swap_to_be_func_name(),
+ (" if (VAPI_OK == (rv = vapi_send_with_control_ping "
+ "(ctx, msg, req_context))) {"
+ if self.is_dump() else
+ " if (VAPI_OK == (rv = vapi_send (ctx, msg))) {"
+ ),
+ (" vapi_store_request(ctx, req_context, %s, "
+ "(vapi_cb_t)callback, callback_ctx);" %
+ ("true" if self.is_dump() else "false")),
+ " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+ " abort (); /* this really shouldn't happen */",
+ " }",
+ " if (vapi_is_nonblocking(ctx)) {",
+ " rv = VAPI_OK;",
+ " } else {",
+ " rv = vapi_dispatch(ctx);",
+ " }",
+ " } else {",
+ " %s(msg);" % self.get_swap_to_host_func_name(),
+ " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+ " abort (); /* this really shouldn't happen */",
+ " }",
+ " }",
+ " return rv;",
+ "}",
+ "",
+ ])
+
+ def get_event_cb_func_decl(self):
+ if not self.is_reply():
+ raise Exception(
+ "Cannot register event callback for non-reply message")
+ if self.has_payload():
+ return "\n".join([
+ "void vapi_set_%s_event_cb (" %
+ self.get_c_name(),
+ " struct vapi_ctx_s *ctx, ",
+ (" vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+ "void *callback_ctx, %s *payload)," %
+ self.get_payload_struct_name()),
+ " void *callback_ctx)",
+ ])
+ else:
+ return "\n".join([
+ "void vapi_set_%s_event_cb (" %
+ self.get_c_name(),
+ " struct vapi_ctx_s *ctx, ",
+ " vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+ "void *callback_ctx),",
+ " void *callback_ctx)",
+ ])
+
+ def get_event_cb_func_def(self):
+ if not self.is_reply():
+ raise Exception(
+ "Cannot register event callback for non-reply function")
+ return "\n".join([
+ "%s" % self.get_event_cb_func_decl(),
+ "{",
+ (" vapi_set_event_cb(ctx, %s, (vapi_event_cb)callback, "
+ "callback_ctx);" %
+ self.get_msg_id_name()),
+ "}"])
+
+ def get_c_metadata_struct_name(self):
+ return "__vapi_metadata_%s" % self.name
+
+ def get_c_constructor(self):
+ has_context = False
+ if self.header is not None:
+ has_context = self.header.has_field('context')
+ return '\n'.join([
+ 'static void __attribute__((constructor)) __vapi_constructor_%s()'
+ % self.name,
+ '{',
+ ' static const char name[] = "%s";' % self.name,
+ ' static const char name_with_crc[] = "%s_%s";'
+ % (self.name, self.crc[2:]),
+ ' static vapi_message_desc_t %s = {' %
+ self.get_c_metadata_struct_name(),
+ ' name,',
+ ' sizeof(name) - 1,',
+ ' name_with_crc,',
+ ' sizeof(name_with_crc) - 1,',
+ ' true,' if has_context else ' false,',
+ ' offsetof(%s, context),' % self.header.get_c_name()
+ if has_context else ' 0,',
+ (' offsetof(%s, payload),' % self.get_c_name())
+ if self.has_payload() else ' ~0,',
+ ' sizeof(%s),' % self.get_c_name(),
+ ' (generic_swap_fn_t)%s,' % self.get_swap_to_be_func_name(),
+ ' (generic_swap_fn_t)%s,' % self.get_swap_to_host_func_name(),
+ ' ~0,',
+ ' };',
+ '',
+ ' %s = vapi_register_msg(&%s);' %
+ (self.get_msg_id_name(), self.get_c_metadata_struct_name()),
+ ' VAPI_DBG("Assigned msg id %%d to %s", %s);' %
+ (self.name, self.get_msg_id_name()),
+ '}',
+ ])
+
+
+vapi_send_with_control_ping = """
+static inline vapi_error_e
+vapi_send_with_control_ping (vapi_ctx_t ctx, void *msg, u32 context)
+{
+ vapi_msg_control_ping *ping = vapi_alloc_control_ping (ctx);
+ if (!ping)
+ {
+ return VAPI_ENOMEM;
+ }
+ ping->header.context = context;
+ vapi_msg_control_ping_hton (ping);
+ return vapi_send2 (ctx, msg, ping);
+}
+"""
+
+
+def gen_json_unified_header(parser, logger, j, io, name):
+ logger.info("Generating header `%s'" % name)
+ orig_stdout = sys.stdout
+ sys.stdout = io
+ include_guard = "__included_%s" % (
+ j.replace(".", "_").replace("/", "_").replace("-", "_"))
+ print("#ifndef %s" % include_guard)
+ print("#define %s" % include_guard)
+ print("")
+ print("#include <stdlib.h>")
+ print("#include <stddef.h>")
+ print("#include <arpa/inet.h>")
+ print("#include <vapi/vapi_internal.h>")
+ print("#include <vapi/vapi.h>")
+ print("#include <vapi/vapi_dbg.h>")
+ print("")
+ print("#ifdef __cplusplus")
+ print("extern \"C\" {")
+ print("#endif")
+ if name == "vpe.api.vapi.h":
+ print("")
+ print("static inline vapi_error_e vapi_send_with_control_ping "
+ "(vapi_ctx_t ctx, void * msg, u32 context);")
+ else:
+ print("#include <vapi/vpe.api.vapi.h>")
+ print("")
+ for m in parser.messages_by_json[j].values():
+ print("extern vapi_msg_id_t %s;" % m.get_msg_id_name())
+ print("")
+ print("#define DEFINE_VAPI_MSG_IDS_%s\\" %
+ j.replace(".", "_").replace("/", "_").replace("-", "_").upper())
+ print("\\\n".join([
+ " vapi_msg_id_t %s;" % m.get_msg_id_name()
+ for m in parser.messages_by_json[j].values()
+ ]))
+ print("")
+ print("")
+ for t in parser.types_by_json[j].values():
+ try:
+ print("%s" % t.get_c_def())
+ print("")
+ except:
+ pass
+ for m in parser.messages_by_json[j].values():
+ print("%s" % m.get_c_def())
+ print("")
+
+ print("")
+ function_attrs = "static inline "
+ for t in parser.types_by_json[j].values():
+ print("%s%s" % (function_attrs, t.get_swap_to_be_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, t.get_swap_to_host_func_def()))
+ print("")
+ for m in parser.messages_by_json[j].values():
+ if m.has_payload():
+ print("%s%s" % (function_attrs,
+ m.get_swap_payload_to_be_func_def()))
+ print("")
+ print("%s%s" % (function_attrs,
+ m.get_swap_payload_to_host_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, m.get_calc_msg_size_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, m.get_swap_to_be_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, m.get_swap_to_host_func_def()))
+ print("")
+ for m in parser.messages_by_json[j].values():
+ if m.is_reply():
+ continue
+ print("%s%s" % (function_attrs, m.get_alloc_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, m.get_op_func_def()))
+ print("")
+ print("")
+ for m in parser.messages_by_json[j].values():
+ print("%s" % m.get_c_constructor())
+ print("")
+ print("")
+ for m in parser.messages_by_json[j].values():
+ if not m.is_reply():
+ continue
+ print("%s%s;" % (function_attrs, m.get_event_cb_func_def()))
+ print("")
+ print("")
+
+ if name == "vpe.api.vapi.h":
+ print("%s" % vapi_send_with_control_ping)
+ print("")
+
+ print("#ifdef __cplusplus")
+ print("}")
+ print("#endif")
+ print("")
+ print("#endif")
+ sys.stdout = orig_stdout
+
+
+def json_to_c_header_name(json_name):
+ if json_name.endswith(".json"):
+ return "%s.vapi.h" % os.path.splitext(json_name)[0]
+ raise Exception("Unexpected json name `%s'!" % json_name)
+
+
+def gen_c_unified_headers(parser, logger, prefix):
+ if prefix == "" or prefix is None:
+ prefix = ""
+ else:
+ prefix = "%s/" % prefix
+ for j in parser.json_files:
+ with open('%s%s' % (prefix, json_to_c_header_name(j)), "w") as io:
+ gen_json_unified_header(
+ parser, logger, j, io, json_to_c_header_name(j))
+
+
+if __name__ == '__main__':
+ try:
+ verbose = int(os.getenv("V", 0))
+ except:
+ verbose = 0
+
+ if verbose >= 2:
+ log_level = 10
+ elif verbose == 1:
+ log_level = 20
+ else:
+ log_level = 40
+
+ logging.basicConfig(stream=sys.stdout, level=log_level)
+ logger = logging.getLogger("VAPI C GEN")
+ logger.setLevel(log_level)
+
+ argparser = argparse.ArgumentParser(description="VPP C API generator")
+ argparser.add_argument('files', metavar='api-file', action='append',
+ type=str, help='json api file'
+ '(may be specified multiple times)')
+ argparser.add_argument('--prefix', action='store', default=None,
+ help='path prefix')
+ args = argparser.parse_args()
+
+ jsonparser = JsonParser(logger, args.files,
+ simple_type_class=CSimpleType,
+ struct_type_class=CStructType,
+ field_class=CField,
+ message_class=CMessage)
+
+ # not using the model of having separate generated header and code files
+ # with generated symbols present in shared library (per discussion with
+ # Damjan), to avoid symbol version issues in .so
+ # gen_c_headers_and_code(jsonparser, logger, args.prefix)
+
+ gen_c_unified_headers(jsonparser, logger, args.prefix)
+
+ for e in jsonparser.exceptions:
+ logger.error(e)
diff --git a/src/vpp-api/vapi/vapi_common.h b/src/vpp-api/vapi/vapi_common.h
new file mode 100644
index 00000000..ce64469d
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_common.h
@@ -0,0 +1,61 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef vapi_common_h_included
+#define vapi_common_h_included
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum
+{
+ VAPI_OK = 0, /**< success */
+ VAPI_EINVAL, /**< invalid value encountered */
+ VAPI_EAGAIN, /**< operation would block */
+ VAPI_ENOTSUP, /**< operation not supported */
+ VAPI_ENOMEM, /**< out of memory */
+ VAPI_ENORESP, /**< no response to request */
+ VAPI_EMAP_FAIL, /**< failure while mapping api */
+ VAPI_ECON_FAIL, /**< failure while connecting to vpp */
+ VAPI_EINCOMPATIBLE, /**< fundamental incompatibility while connecting to vpp
+ (control ping/control ping reply mismatch) */
+ VAPI_MUTEX_FAILURE, /**< failure manipulating internal mutex(es) */
+ VAPI_EUSER, /**< user error used for breaking dispatch,
+ never used by VAPI */
+} vapi_error_e;
+
+typedef enum
+{
+ VAPI_MODE_BLOCKING = 1, /**< operations block until response received */
+ VAPI_MODE_NONBLOCKING = 2, /**< operations never block */
+} vapi_mode_e;
+
+typedef enum
+{
+ VAPI_WAIT_FOR_READ, /**< wait until some message is readable */
+ VAPI_WAIT_FOR_WRITE, /**< wait until a message can be written */
+ VAPI_WAIT_FOR_READ_WRITE, /**< wait until a read or write can be done */
+} vapi_wait_mode_e;
+
+typedef int vapi_msg_id_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/vpp-api/vapi/vapi_cpp_gen.py b/src/vpp-api/vapi/vapi_cpp_gen.py
new file mode 100755
index 00000000..3010f3e1
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_cpp_gen.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python2
+
+import argparse
+import os
+import sys
+import logging
+from vapi_c_gen import CField, CStruct, CSimpleType, CStructType, CMessage, \
+ json_to_c_header_name
+from vapi_json_parser import JsonParser
+
+
+class CppField(CField):
+ def __init__(
+ self,
+ field_name,
+ field_type,
+ array_len=None,
+ nelem_field=None):
+ super(CppField, self).__init__(
+ field_name, field_type, array_len, nelem_field)
+
+
+class CppStruct(CStruct):
+ def __init__(self, name, fields):
+ super(CppStruct, self).__init__(name, fields)
+
+
+class CppSimpleType (CSimpleType):
+ def __init__(self, name):
+ super(CppSimpleType, self).__init__(name)
+
+
+class CppStructType (CStructType, CppStruct):
+ def __init__(self, definition, typedict, field_class):
+ super(CppStructType, self).__init__(definition, typedict, field_class)
+
+
+class CppMessage (CMessage):
+ def __init__(self, logger, definition, typedict,
+ struct_type_class, simple_type_class, field_class):
+ super(CppMessage, self).__init__(
+ logger, definition, typedict, struct_type_class,
+ simple_type_class, field_class)
+
+ def get_swap_to_be_template_instantiation(self):
+ return "\n".join([
+ "template <> inline void vapi_swap_to_be<%s>(%s *msg)" %
+ (self.get_c_name(), self.get_c_name()),
+ "{",
+ " %s(msg);" % self.get_swap_to_be_func_name(),
+ "}",
+ ])
+
+ def get_swap_to_host_template_instantiation(self):
+ return "\n".join([
+ "template <> inline void vapi_swap_to_host<%s>(%s *msg)" %
+ (self.get_c_name(), self.get_c_name()),
+ "{",
+ " %s(msg);" % self.get_swap_to_host_func_name(),
+ "}",
+ ])
+
+ def get_alloc_template_instantiation(self):
+ return "\n".join([
+ "template <> inline %s* vapi_alloc<%s%s>"
+ "(Connection &con%s)" %
+ (self.get_c_name(), self.get_c_name(),
+ ", size_t" * len(self.get_alloc_vla_param_names()),
+ "".join([", size_t %s" % n for n in
+ self.get_alloc_vla_param_names()])
+ ),
+ "{",
+ " %s* result = %s(con.vapi_ctx%s);" %
+ (self.get_c_name(), self.get_alloc_func_name(),
+ "".join([", %s" % n
+ for n in self.get_alloc_vla_param_names()])),
+ "#if VAPI_CPP_DEBUG_LEAKS",
+ " con.on_shm_data_alloc(result);",
+ "#endif",
+ " return result;",
+ "}",
+ ])
+
+ def get_cpp_name(self):
+ return "%s%s" % (self.name[0].upper(), self.name[1:])
+
+ def get_req_template_name(self):
+ if self.is_dump():
+ template = "Dump"
+ else:
+ template = "Request"
+
+ return "%s<%s, %s%s>" % (
+ template,
+ self.get_c_name(),
+ self.reply.get_c_name(),
+ "".join([", size_t"] * len(self.get_alloc_vla_param_names()))
+ )
+
+ def get_req_template_instantiation(self):
+ return "template class %s;" % self.get_req_template_name()
+
+ def get_type_alias(self):
+ return "using %s = %s;" % (
+ self.get_cpp_name(), self.get_req_template_name())
+
+ def get_reply_template_name(self):
+ return "Msg<%s>" % (self.get_c_name())
+
+ def get_reply_type_alias(self):
+ return "using %s = %s;" % (
+ self.get_cpp_name(), self.get_reply_template_name())
+
+ def get_msg_class_instantiation(self):
+ return "template class Msg<%s>;" % self.get_c_name()
+
+ def get_get_msg_id_t_instantiation(self):
+ return "\n".join([
+ ("template <> inline vapi_msg_id_t vapi_get_msg_id_t<%s>()"
+ % self.get_c_name()),
+ "{",
+ " return ::%s; " % self.get_msg_id_name(),
+ "}",
+ "",
+ ("template <> inline vapi_msg_id_t "
+ "vapi_get_msg_id_t<Msg<%s>>()" % self.get_c_name()),
+ "{",
+ " return ::%s; " % self.get_msg_id_name(),
+ "}",
+ ])
+
+ def get_cpp_constructor(self):
+ return '\n'.join([
+ ('static void __attribute__((constructor)) '
+ '__vapi_cpp_constructor_%s()'
+ % self.name),
+ '{',
+ (' vapi::vapi_msg_set_msg_id<%s>(%s);' % (
+ self.get_c_name(), self.get_msg_id_name())),
+ '}',
+ ])
+
+
+def gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments):
+ logger.info("Generating header `%s'" % io.name)
+ orig_stdout = sys.stdout
+ sys.stdout = io
+ include_guard = "__included_hpp_%s" % (
+ j.replace(".", "_").replace("/", "_").replace("-", "_"))
+ print("#ifndef %s" % include_guard)
+ print("#define %s" % include_guard)
+ print("")
+ print("#include <vapi/vapi.hpp>")
+ print("#include <%s%s>" % (gen_h_prefix, json_to_c_header_name(j)))
+ print("")
+ print("namespace vapi {")
+ print("")
+ for m in parser.messages_by_json[j].values():
+ # utility functions need to go first, otherwise internal instantiation
+ # causes headaches ...
+ if add_debug_comments:
+ print("/* m.get_swap_to_be_template_instantiation() */")
+ print("%s" % m.get_swap_to_be_template_instantiation())
+ print("")
+ if add_debug_comments:
+ print("/* m.get_swap_to_host_template_instantiation() */")
+ print("%s" % m.get_swap_to_host_template_instantiation())
+ print("")
+ if add_debug_comments:
+ print("/* m.get_get_msg_id_t_instantiation() */")
+ print("%s" % m.get_get_msg_id_t_instantiation())
+ print("")
+ if add_debug_comments:
+ print("/* m.get_cpp_constructor() */")
+ print("%s" % m.get_cpp_constructor())
+ print("")
+ if not m.is_reply():
+ if add_debug_comments:
+ print("/* m.get_alloc_template_instantiation() */")
+ print("%s" % m.get_alloc_template_instantiation())
+ print("")
+ if add_debug_comments:
+ print("/* m.get_msg_class_instantiation() */")
+ print("%s" % m.get_msg_class_instantiation())
+ print("")
+ if m.is_reply():
+ if add_debug_comments:
+ print("/* m.get_reply_type_alias() */")
+ print("%s" % m.get_reply_type_alias())
+ continue
+ if add_debug_comments:
+ print("/* m.get_req_template_instantiation() */")
+ print("%s" % m.get_req_template_instantiation())
+ print("")
+ if add_debug_comments:
+ print("/* m.get_type_alias() */")
+ print("%s" % m.get_type_alias())
+ print("")
+ print("}") # namespace vapi
+
+ print("#endif")
+ sys.stdout = orig_stdout
+
+
+def json_to_cpp_header_name(json_name):
+ if json_name.endswith(".json"):
+ return "%s.vapi.hpp" % os.path.splitext(json_name)[0]
+ raise Exception("Unexpected json name `%s'!" % json_name)
+
+
+def gen_cpp_headers(parser, logger, prefix, gen_h_prefix,
+ add_debug_comments=False):
+ if prefix == "" or prefix is None:
+ prefix = ""
+ else:
+ prefix = "%s/" % prefix
+ if gen_h_prefix is None:
+ gen_h_prefix = ""
+ else:
+ gen_h_prefix = "%s/" % gen_h_prefix
+ for j in parser.json_files:
+ with open('%s%s' % (prefix, json_to_cpp_header_name(j)), "w") as io:
+ gen_json_header(parser, logger, j, io,
+ gen_h_prefix, add_debug_comments)
+
+
+if __name__ == '__main__':
+ try:
+ verbose = int(os.getenv("V", 0))
+ except:
+ verbose = 0
+
+ if verbose >= 2:
+ log_level = 10
+ elif verbose == 1:
+ log_level = 20
+ else:
+ log_level = 40
+
+ logging.basicConfig(stream=sys.stdout, level=log_level)
+ logger = logging.getLogger("VAPI CPP GEN")
+ logger.setLevel(log_level)
+
+ argparser = argparse.ArgumentParser(description="VPP C++ API generator")
+ argparser.add_argument('files', metavar='api-file', action='append',
+ type=str, help='json api file'
+ '(may be specified multiple times)')
+ argparser.add_argument('--prefix', action='store', default=None,
+ help='path prefix')
+ argparser.add_argument('--gen-h-prefix', action='store', default=None,
+ help='generated C header prefix')
+ args = argparser.parse_args()
+
+ jsonparser = JsonParser(logger, args.files,
+ simple_type_class=CppSimpleType,
+ struct_type_class=CppStructType,
+ field_class=CppField,
+ message_class=CppMessage)
+
+ gen_cpp_headers(jsonparser, logger, args.prefix, args.gen_h_prefix)
+
+ for e in jsonparser.exceptions:
+ logger.error(e)
diff --git a/src/vpp-api/vapi/vapi_dbg.h b/src/vpp-api/vapi/vapi_dbg.h
new file mode 100644
index 00000000..ec3a3006
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_dbg.h
@@ -0,0 +1,77 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_vapi_debug_h__
+#define __included_vapi_debug_h__
+
+/* controls debug prints */
+#define VAPI_DEBUG (0)
+#define VAPI_DEBUG_CONNECT (0)
+#define VAPI_DEBUG_ALLOC (0)
+#define VAPI_CPP_DEBUG_LEAKS (0)
+
+#if VAPI_DEBUG
+#include <stdio.h>
+#define VAPI_DEBUG_FILE_DEF \
+ static const char *__file = NULL; \
+ { \
+ __file = strrchr (__FILE__, '/'); \
+ if (__file) \
+ { \
+ ++__file; \
+ } \
+ else \
+ { \
+ __file = __FILE__; \
+ } \
+ }
+
+#define VAPI_DBG(fmt, ...) \
+ do \
+ { \
+ VAPI_DEBUG_FILE_DEF \
+ printf ("DBG:%s:%d:%s():" fmt, __file, __LINE__, __func__, \
+ ##__VA_ARGS__); \
+ printf ("\n"); \
+ fflush (stdout); \
+ } \
+ while (0);
+
+#define VAPI_ERR(fmt, ...) \
+ do \
+ { \
+ VAPI_DEBUG_FILE_DEF \
+ printf ("ERR:%s:%d:%s():" fmt, __file, __LINE__, __func__, \
+ ##__VA_ARGS__); \
+ printf ("\n"); \
+ fflush (stdout); \
+ } \
+ while (0);
+#else
+#define VAPI_DBG(...)
+#define VAPI_ERR(...)
+#endif
+
+#endif /* __included_vapi_debug_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi_doc.md b/src/vpp-api/vapi/vapi_doc.md
new file mode 100644
index 00000000..0e7e29dd
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_doc.md
@@ -0,0 +1,155 @@
+# VPP API module {#vapi_doc}
+
+## Overview
+
+VPP API module allows communicating with VPP over shared memory interface.
+The API consists of 3 parts:
+
+* common code - low-level API
+* generated code - high-level API
+* code generator - to generate your own high-level API e.g. for custom plugins
+
+### Common code
+
+#### C common code
+
+C common code represents the basic, low-level API, providing functions to
+connect/disconnect, perform message discovery and send/receive messages.
+The C variant is in vapi.h.
+
+#### C++ common code
+
+C++ is provided by vapi.hpp and contains high-level API templates,
+which are specialized by generated code.
+
+### Generated code
+
+Each API file present in the source tree is automatically translated to JSON
+file, which the code generator parses and generates either C (`vapi_c_gen.py`)
+or C++ (`vapi_cpp_gen.py`) code.
+
+This can then be included in the client application and provides convenient way
+to interact with VPP. This includes:
+
+* automatic byte-swapping
+* automatic request-response matching based on context
+* automatic casts to appropriate types (type-safety) when calling callbacks
+* automatic sending of control-pings for dump messages
+
+The API supports two modes of operation:
+
+* blocking
+* non-blocking
+
+In blocking mode, whenever an operation is initiated, the code waits until it
+can finish. This means that when sending a message, the call blocks until
+the message can be written to shared memory. Similarly, receiving a message
+blocks until a message becomes available. On higher level, this also means that
+when doing a request (e.g. `show_version`), the call blocks until a response
+comes back (e.g. `show_version_reply`).
+
+In non-blocking mode, these are decoupled, the API returns VAPI_EAGAIN whenever
+an operation cannot be performed and after sending a request, it's up to
+the client to wait for and process a response.
+
+### Code generator
+
+Python code generator comes in two flavors - C and C++ and generates high-level
+API headers. All the code is stored in the headers.
+
+## Usage
+
+### Low-level API
+
+Refer to inline API documentation in doxygen format in `vapi.h` header
+for description of functions. It's recommened to use the safer, high-level
+API provided by specialized headers (e.g. `vpe.api.vapi.h`
+or `vpe.api.vapi.hpp`).
+
+#### C high-level API
+
+##### Callbacks
+
+The C high-level API is strictly callback-based for maximum efficiency.
+Whenever an operation is initiated a callback with a callback context is part
+of that operation. The callback is then invoked when the response (or multiple
+responses) arrive which are tied to the request. Also, callbacks are invoked
+whenever an event arrives, if such callback is registered. All the pointers
+to responses/events point to shared memory and are immediately freed after
+callback finishes so the client needs to extract/copy any data in which it
+is interested in.
+
+#### Blocking mode
+
+In simple blocking mode, the whole operation (being a simple request or a dump)
+is finished and it's callback is called (potentially multiple times for dumps)
+during function call.
+
+Example pseudo-code for a simple request in this mode:
+
+`
+vapi_show_version(message, callback, callback_context)
+
+1. generate unique internal context and assign it to message.header.context
+2. byteswap the message to network byte order
+3. send message to vpp (message is now consumed and vpp will free it)
+4. create internal "outstanding request context" which stores the callback,
+ callback context and the internal context value
+5. call dispatch, which in this mode receives and processes responses until
+ the internal "outstanding requests" queue is empty. In blocking mode, this
+ queue always contains at most one item.
+`
+
+**Note**: it's possible for different - unrelated callbacks to be called before
+the response callbacks is called in cases where e.g. events are stored
+in shared memory queue.
+
+#### Non-blocking mode
+
+In non-blocking mode, all the requests are only byte-swapped and the context
+information along with callbacks is stored locally (so in the above example,
+only steps 1-4 are executed and step 5 is skipped). Calling dispatch is up to
+the client application. This allows to alternate between sending/receiving
+messages or have a dedicated thread which calls dispatch.
+
+### C++ high level API
+
+#### Callbacks
+
+In C++ API, the response is automatically tied to the corresponding `Request`,
+`Dump` or `Event_registration` object. Optionally a callback might be specified,
+which then gets called when the response is received.
+
+**Note**: responses take up shared memory space and should be freed either
+manually (in case of result sets) or automatically (by destroying the object
+owning them) when no longer needed. Once a Request or Dump object was executed,
+it cannot be re-sent, since the request itself (stores in shared memory)
+is consumed by vpp and inaccessible (set to nullptr) anymore.
+
+#### Usage
+
+#### Requests & dumps
+
+0. Create on object of `Connection` type and call `connect()` to connect to vpp.
+1. Create an object of `Request` or `Dump` type using it's typedef (e.g.
+ `Show_version`)
+2. Use `get_request()` to obtain and manipulate the underlying request if
+ required.
+3. Issue `execute()` to send the request.
+4. Use either `wait_for_response()` or `dispatch()` to wait for the response.
+5. Use `get_response_state()` to get the state and `get_response()` to read
+ the response.
+
+#### Events
+
+0. Create a `Connection` and execute the appropriate `Request` to subscribe to
+ events (e.g. `Want_stats`)
+1. Create an `Event_registration` with a template argument being the type of
+ event you are insterested in.
+2. Call `dispatch()` or `wait_for_response()` to wait for the event. A callback
+ will be called when an event occurs (if passed to `Event_registration()`
+ constructor). Alternatively, read the result set.
+
+**Note**: events stored in the result set take up space in shared memory
+and should be freed regularly (e.g. in the callback, once the event is
+processed).
diff --git a/src/vpp-api/vapi/vapi_internal.h b/src/vpp-api/vapi/vapi_internal.h
new file mode 100644
index 00000000..2c51c673
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_internal.h
@@ -0,0 +1,138 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef VAPI_INTERNAL_H
+#define VAPI_INTERNAL_H
+
+#include <endian.h>
+#include <string.h>
+#include <vppinfra/types.h>
+
+/**
+ * @file vapi_internal.h
+ *
+ * internal vpp api C declarations
+ *
+ * This file contains internal vpp api C declarations. It's not intended to be
+ * used by the client programmer and the API defined here might change at any
+ * time..
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct vapi_ctx_s;
+
+typedef struct __attribute__ ((__packed__))
+{
+ u16 _vl_msg_id;
+ u32 context;
+} vapi_type_msg_header1_t;
+
+typedef struct __attribute__ ((__packed__))
+{
+ u16 _vl_msg_id;
+ u32 client_index;
+ u32 context;
+} vapi_type_msg_header2_t;
+
+static inline void
+vapi_type_msg_header1_t_hton (vapi_type_msg_header1_t * h)
+{
+ h->_vl_msg_id = htobe16 (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header1_t_ntoh (vapi_type_msg_header1_t * h)
+{
+ h->_vl_msg_id = be16toh (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header2_t_hton (vapi_type_msg_header2_t * h)
+{
+ h->_vl_msg_id = htobe16 (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header2_t_ntoh (vapi_type_msg_header2_t * h)
+{
+ h->_vl_msg_id = be16toh (h->_vl_msg_id);
+}
+
+
+#include <vapi/vapi.h>
+
+typedef vapi_error_e (*vapi_cb_t) (struct vapi_ctx_s *, void *, vapi_error_e,
+ bool, void *);
+
+typedef void (*generic_swap_fn_t) (void *payload);
+
+typedef struct
+{
+ const char *name;
+ size_t name_len;
+ const char *name_with_crc;
+ size_t name_with_crc_len;
+ bool has_context;
+ int context_offset;
+ int payload_offset;
+ size_t size;
+ generic_swap_fn_t swap_to_be;
+ generic_swap_fn_t swap_to_host;
+ vapi_msg_id_t id; /* assigned at run-time */
+} vapi_message_desc_t;
+
+typedef struct
+{
+ const char *name;
+ int payload_offset;
+ size_t size;
+ void (*swap_to_be) (void *payload);
+ void (*swap_to_host) (void *payload);
+} vapi_event_desc_t;
+
+vapi_msg_id_t vapi_register_msg (vapi_message_desc_t * msg);
+u16 vapi_lookup_vl_msg_id (vapi_ctx_t ctx, vapi_msg_id_t id);
+vapi_msg_id_t vapi_lookup_vapi_msg_id_t (vapi_ctx_t ctx, u16 vl_msg_id);
+int vapi_get_client_index (vapi_ctx_t ctx);
+bool vapi_is_nonblocking (vapi_ctx_t ctx);
+bool vapi_requests_empty (vapi_ctx_t ctx);
+bool vapi_requests_full (vapi_ctx_t ctx);
+size_t vapi_get_request_count (vapi_ctx_t ctx);
+size_t vapi_get_max_request_count (vapi_ctx_t ctx);
+u32 vapi_gen_req_context (vapi_ctx_t ctx);
+void vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
+ vapi_cb_t callback, void *callback_ctx);
+int vapi_get_payload_offset (vapi_msg_id_t id);
+void (*vapi_get_swap_to_host_func (vapi_msg_id_t id)) (void *payload);
+void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *payload);
+size_t vapi_get_message_size (vapi_msg_id_t id);
+size_t vapi_get_context_offset (vapi_msg_id_t id);
+bool vapi_msg_is_with_context (vapi_msg_id_t id);
+size_t vapi_get_message_count();
+const char *vapi_get_msg_name(vapi_msg_id_t id);
+
+vapi_error_e vapi_producer_lock (vapi_ctx_t ctx);
+vapi_error_e vapi_producer_unlock (vapi_ctx_t ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/vpp-api/vapi/vapi_json_parser.py b/src/vpp-api/vapi/vapi_json_parser.py
new file mode 100644
index 00000000..4e62720d
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_json_parser.py
@@ -0,0 +1,305 @@
+#!/usr/bin/env python2
+
+import json
+
+
+def msg_is_reply(name):
+ return name.endswith('_reply') or name.endswith('_details') \
+ or name.endswith('_event') or name.endswith('_counters')
+
+
+class ParseError (Exception):
+ pass
+
+
+magic_prefix = "vl_api_"
+magic_suffix = "_t"
+
+
+def remove_magic(what):
+ if what.startswith(magic_prefix) and what.endswith(magic_suffix):
+ return what[len(magic_prefix): - len(magic_suffix)]
+ return what
+
+
+class Field(object):
+
+ def __init__(
+ self,
+ field_name,
+ field_type,
+ array_len=None,
+ nelem_field=None):
+ self.name = field_name
+ self.type = field_type
+ self.len = array_len
+ self.nelem_field = nelem_field
+
+ def __str__(self):
+ if self.len is None:
+ return "name: %s, type: %s" % (self.name, self.type)
+ elif self.len > 0:
+ return "name: %s, type: %s, length: %s" % (self.name, self.type,
+ self.len)
+ else:
+ return ("name: %s, type: %s, variable length stored in: %s" %
+ (self.name, self.type, self.nelem_field))
+
+
+class Type(object):
+ def __init__(self, name):
+ self.name = name
+
+
+class SimpleType (Type):
+
+ def __init__(self, name):
+ super(SimpleType, self).__init__(name)
+
+ def __str__(self):
+ return self.name
+
+
+def get_msg_header_defs(struct_type_class, field_class, typedict):
+ return [
+ struct_type_class(['msg_header1_t',
+ ['u16', '_vl_msg_id'],
+ ['u32', 'context'],
+ ],
+ typedict, field_class
+ ),
+ struct_type_class(['msg_header2_t',
+ ['u16', '_vl_msg_id'],
+ ['u32', 'client_index'],
+ ['u32', 'context'],
+ ],
+ typedict, field_class
+ ),
+ ]
+
+
+class Struct(object):
+
+ def __init__(self, name, fields):
+ self.name = name
+ self.fields = fields
+ self.field_names = [n.name for n in self.fields]
+
+
+class Message(object):
+
+ def __init__(self, logger, definition, typedict,
+ struct_type_class, simple_type_class, field_class):
+ self.request = None
+ self.logger = logger
+ m = definition
+ logger.debug("Parsing message definition `%s'" % m)
+ name = m[0]
+ self.name = name
+ logger.debug("Message name is `%s'" % name)
+ ignore = True
+ self.header = None
+ fields = []
+ for header in get_msg_header_defs(struct_type_class, field_class,
+ typedict):
+ logger.debug("Probing header `%s'" % header.name)
+ if header.is_part_of_def(m[1:]):
+ self.header = header
+ logger.debug("Found header `%s'" % header.name)
+ fields.append(field_class(field_name='header',
+ field_type=self.header))
+ ignore = False
+ break
+ if ignore and not msg_is_reply(name):
+ raise ParseError("While parsing message `%s': could not find all "
+ "common header fields" % name)
+ for field in m[1:]:
+ if len(field) == 1 and 'crc' in field:
+ self.crc = field['crc']
+ logger.debug("Found CRC `%s'" % self.crc)
+ continue
+ else:
+ field_type = field[0]
+ if field_type in typedict:
+ field_type = typedict[field_type]
+ else:
+ field_type = typedict[remove_magic(field_type)]
+ if len(field) == 2:
+ if self.header is not None and\
+ self.header.has_field(field[1]):
+ continue
+ p = field_class(field_name=field[1],
+ field_type=field_type)
+ elif len(field) == 3:
+ if field[2] == 0:
+ raise ParseError(
+ "While parsing message `%s': variable length "
+ "array `%s' doesn't have reference to member "
+ "containing the actual length" % (
+ name, field[1]))
+ p = field_class(
+ field_name=field[1],
+ field_type=field_type,
+ array_len=field[2])
+ elif len(field) == 4:
+ nelem_field = None
+ for f in fields:
+ if f.name == field[3]:
+ nelem_field = f
+ if nelem_field is None:
+ raise ParseError(
+ "While parsing message `%s': couldn't find "
+ "variable length array `%s' member containing "
+ "the actual length `%s'" % (
+ name, field[1], field[3]))
+ p = field_class(
+ field_name=field[1],
+ field_type=field_type,
+ array_len=field[2],
+ nelem_field=nelem_field)
+ else:
+ raise Exception("Don't know how to parse message "
+ "definition for message `%s': `%s'" %
+ (m, m[1:]))
+ logger.debug("Parsed field `%s'" % p)
+ fields.append(p)
+ self.fields = fields
+
+ def is_dump(self):
+ return self.name.endswith('_dump')
+
+ def is_reply(self):
+ return msg_is_reply(self.name)
+
+
+class StructType (Type, Struct):
+
+ def __init__(self, definition, typedict, field_class):
+ t = definition
+ name = t[0]
+ fields = []
+ for field in t[1:]:
+ if len(field) == 1 and 'crc' in field:
+ self.crc = field['crc']
+ continue
+ elif len(field) == 2:
+ p = field_class(field_name=field[1],
+ field_type=typedict[field[0]])
+ elif len(field) == 3:
+ if field[2] == 0:
+ raise ParseError("While parsing type `%s': array `%s' has "
+ "variable length" % (name, field[1]))
+ p = field_class(field_name=field[1],
+ field_type=typedict[field[0]],
+ array_len=field[2])
+ else:
+ raise ParseError(
+ "Don't know how to parse type definition for "
+ "type `%s': `%s'" % (t, t[1:]))
+ fields.append(p)
+ Type.__init__(self, name)
+ Struct.__init__(self, name, fields)
+
+ def has_field(self, name):
+ return name in self.field_names
+
+ def is_part_of_def(self, definition):
+ for idx in range(len(self.fields)):
+ field = definition[idx]
+ p = self.fields[idx]
+ if field[1] != p.name:
+ return False
+ if field[0] != p.type.name:
+ raise ParseError(
+ "Unexpected field type `%s' (should be `%s'), "
+ "while parsing msg/def/field `%s/%s/%s'" %
+ (field[0], p.type, p.name, definition, field))
+ return True
+
+
+class JsonParser(object):
+ def __init__(self, logger, files, simple_type_class=SimpleType,
+ struct_type_class=StructType, field_class=Field,
+ message_class=Message):
+ self.messages = {}
+ self.types = {
+ x: simple_type_class(x) for x in [
+ 'i8', 'i16', 'i32', 'i64',
+ 'u8', 'u16', 'u32', 'u64',
+ 'f64'
+ ]
+ }
+
+ self.simple_type_class = simple_type_class
+ self.struct_type_class = struct_type_class
+ self.field_class = field_class
+ self.message_class = message_class
+
+ self.exceptions = []
+ self.json_files = []
+ self.types_by_json = {}
+ self.messages_by_json = {}
+ self.logger = logger
+ for f in files:
+ self.parse_json_file(f)
+ self.finalize_parsing()
+
+ def parse_json_file(self, path):
+ self.logger.info("Parsing json api file: `%s'" % path)
+ self.json_files.append(path)
+ self.types_by_json[path] = {}
+ self.messages_by_json[path] = {}
+ with open(path) as f:
+ j = json.load(f)
+ for t in j['types']:
+ try:
+ type_ = self.struct_type_class(t, self.types,
+ self.field_class)
+ if type_.name in self.types:
+ raise ParseError("Duplicate type `%s'" % type_.name)
+ except ParseError as e:
+ self.exceptions.append(e)
+ continue
+ self.types[type_.name] = type_
+ self.types_by_json[path][type_.name] = type_
+ for m in j['messages']:
+ try:
+ msg = self.message_class(self.logger, m, self.types,
+ self.struct_type_class,
+ self.simple_type_class,
+ self.field_class)
+ if msg.name in self.messages:
+ raise ParseError("Duplicate message `%s'" % msg.name)
+ except ParseError as e:
+ self.exceptions.append(e)
+ continue
+ self.messages[msg.name] = msg
+ self.messages_by_json[path][msg.name] = msg
+
+ def get_reply(self, message):
+ if self.messages[message].is_dump():
+ return self.messages["%s_details" % message[:-len("_dump")]]
+ return self.messages["%s_reply" % message]
+
+ def finalize_parsing(self):
+ if len(self.messages) == 0:
+ for e in self.exceptions:
+ self.logger.error(e)
+ raise Exception("No messages parsed.")
+ for jn, j in self.messages_by_json.items():
+ remove = []
+ for n, m in j.items():
+ try:
+ if not m.is_reply():
+ try:
+ m.reply = self.get_reply(n)
+ m.reply.request = m
+ except:
+ raise ParseError(
+ "Cannot find reply to message `%s'" % n)
+ except ParseError as e:
+ self.exceptions.append(e)
+ remove.append(n)
+
+ self.messages_by_json[jn] = {
+ k: v for k, v in j.items() if k not in remove}
diff --git a/src/vpp.am b/src/vpp.am
new file mode 100644
index 00000000..9a07cefe
--- /dev/null
+++ b/src/vpp.am
@@ -0,0 +1,142 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin_PROGRAMS += bin/vpp
+
+bin_vpp_SOURCES = \
+ vpp/vnet/main.c \
+ vpp/app/vpe_cli.c \
+ vpp/app/version.c \
+ vpp/oam/oam.c \
+ vpp/stats/stats.c
+
+bin_vpp_SOURCES += \
+ vpp/api/api.c \
+ vpp/api/custom_dump.c \
+ vpp/api/json_format.c
+
+if WITH_APICLI
+ bin_vpp_SOURCES += \
+ vpp/api/api_format.c \
+ vpp/api/api_main.c \
+ vpp/api/plugin.c \
+ vpp/api/plugin.h
+endif
+
+# uncomment to enable stats upload to gmond
+# bin_vpp_SOURCES += \
+# vpp/api/gmon.c
+
+bin_vpp_CFLAGS = @APICLI@
+
+nobase_include_HEADERS += \
+ vpp/api/vpe_all_api_h.h \
+ vpp/api/vpe_msg_enum.h \
+ vpp/stats/stats.api.h \
+ vpp/api/vpe.api.h
+
+API_FILES += vpp/api/vpe.api
+API_FILES += vpp/stats/stats.api
+
+BUILT_SOURCES += .version
+
+vpp/app/version.o: vpp/app/version.h
+
+.PHONY: .version
+
+VPP_VERSION = $(shell $(srcdir)/scripts/version)
+
+VPP_BUILD_DATE ?= $$(date)
+VPP_BUILD_USER ?= $$(whoami)
+VPP_BUILD_HOST ?= $$(hostname)
+
+# update version.h only when version changes, to avoid
+# unnecessary re-linking of vpp binary
+
+.version:
+ @if [ "$$(cat .version 2> /dev/null)" != "$(VPP_VERSION)" ] ; then \
+ f="vpp/app/version.h" ;\
+ echo " VERSION $$f ($(VPP_VERSION))" ;\
+ echo $(VPP_VERSION) > .version ;\
+ echo "#define VPP_BUILD_DATE \"$(VPP_BUILD_DATE)\"" > $$f ;\
+ echo "#define VPP_BUILD_USER \"$(VPP_BUILD_USER)\"" >> $$f ;\
+ echo "#define VPP_BUILD_HOST \"$(VPP_BUILD_HOST)\"" >> $$f ;\
+ echo -n "#define VPP_BUILD_TOPDIR " >> $$f ;\
+ echo "\"$$(cd $(srcdir) && git rev-parse --show-toplevel)\"" >> $$f ;\
+ echo "#define VPP_BUILD_VER \"$(VPP_VERSION)\"" >> $$f ;\
+ fi
+
+bin_vpp_LDADD = \
+ libvlibmemory.la \
+ libvlib.la \
+ libvnet.la \
+ libsvm.la \
+ libsvmdb.la \
+ libvppinfra.la \
+ -lrt -lm -lpthread -ldl
+
+bin_vpp_LDFLAGS = -Wl,--export-dynamic
+
+bin_PROGRAMS += bin/vppctl
+bin_vppctl_SOURCES = vpp/app/vppctl.c
+bin_vppctl_LDADD = libvppinfra.la
+
+if ENABLE_TESTS
+noinst_PROGRAMS += bin/test_client
+
+bin_test_client_SOURCES = \
+ vpp/api/test_client.c
+
+bin_test_client_LDADD = \
+ libvlibmemoryclient.la \
+ libsvm.la \
+ libvppinfra.la \
+ -lpthread -lm -lrt
+
+noinst_PROGRAMS += bin/test_client bin/test_ha
+
+bin_test_ha_SOURCES = \
+ vpp/api/test_ha.c
+
+bin_test_ha_LDADD = \
+ libvlibmemoryclient.la \
+ libsvm.la \
+ libvppinfra.la \
+ -lpthread -lm -lrt
+endif
+
+noinst_PROGRAMS += bin/summary_stats_client
+
+bin_summary_stats_client_SOURCES = \
+ vpp/api/summary_stats_client.c
+
+bin_summary_stats_client_LDADD = \
+ libvlibmemoryclient.la \
+ libsvm.la \
+ libvppinfra.la \
+ -lpthread -lm -lrt
+
+bin_PROGRAMS += bin/vpp_get_metrics
+
+bin_vpp_get_metrics_SOURCES = \
+ vpp/api/vpp_get_metrics.c
+
+bin_vpp_get_metrics_LDADD = \
+ libsvmdb.la \
+ libsvm.la \
+ libvppinfra.la \
+ -lpthread -lm -lrt
+
+CLEANFILES += vpp/app/version.h
+
+# vi:syntax=automake
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
new file mode 100644
index 00000000..d020314b
--- /dev/null
+++ b/src/vpp/api/api.c
@@ -0,0 +1,2468 @@
+/*
+ *------------------------------------------------------------------
+ * api.c - message handler registration
+ *
+ * Copyright (c) 2010-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+#include <vnet/api_errno.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_neighbor.h>
+#if WITH_LIBSSL > 0
+#include <vnet/srv6/sr.h>
+#endif
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/classify/input_acl.h>
+#include <vnet/l2/l2_classify.h>
+#include <vnet/map/map.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/ip/punt.h>
+#include <vnet/feature/feature.h>
+
+#undef BIHASH_TYPE
+#undef __included_bihash_template_h__
+#include <vnet/l2/l2_fib.h>
+
+#include <vpp/stats/stats.h>
+#include <vpp/oam/oam.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/interface.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_bd.h>
+#include <vpp/api/vpe_msg_enum.h>
+#include <vnet/span/span.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+#include <vlibapi/api_helper_macros.h>
+#define foreach_vpe_api_msg \
+_(WANT_OAM_EVENTS, want_oam_events) \
+_(OAM_ADD_DEL, oam_add_del) \
+_(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \
+_(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \
+_(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \
+_(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \
+_(CREATE_VLAN_SUBIF, create_vlan_subif) \
+_(CREATE_SUBIF, create_subif) \
+_(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \
+_(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \
+_(RESET_FIB, reset_fib) \
+_(CREATE_LOOPBACK, create_loopback) \
+_(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \
+_(CONTROL_PING, control_ping) \
+_(CLI, cli) \
+_(CLI_INBAND, cli_inband) \
+_(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \
+_(L2_PATCH_ADD_DEL, l2_patch_add_del) \
+_(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \
+_(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \
+_(GET_NODE_INDEX, get_node_index) \
+_(ADD_NODE_NEXT, add_node_next) \
+_(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \
+_(SHOW_VERSION, show_version) \
+_(INTERFACE_NAME_RENUMBER, interface_name_renumber) \
+_(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \
+_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \
+_(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \
+_(DELETE_LOOPBACK, delete_loopback) \
+_(BD_IP_MAC_ADD_DEL, bd_ip_mac_add_del) \
+_(GET_NODE_GRAPH, get_node_graph) \
+_(IOAM_ENABLE, ioam_enable) \
+_(IOAM_DISABLE, ioam_disable) \
+_(GET_NEXT_INDEX, get_next_index) \
+_(PG_CREATE_INTERFACE, pg_create_interface) \
+_(PG_CAPTURE, pg_capture) \
+_(PG_ENABLE_DISABLE, pg_enable_disable) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, \
+ ip_source_and_port_range_check_add_del) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \
+ ip_source_and_port_range_check_interface_add_del) \
+_(DELETE_SUBIF, delete_subif) \
+_(PUNT, punt) \
+_(PUNT_SOCKET_REGISTER, punt_socket_register) \
+_(PUNT_SOCKET_DEREGISTER, punt_socket_deregister) \
+_(FEATURE_ENABLE_DISABLE, feature_enable_disable)
+
+#define QUOTE_(x) #x
+#define QUOTE(x) QUOTE_(x)
+typedef enum
+{
+ RESOLVE_IP4_ADD_DEL_ROUTE = 1,
+ RESOLVE_IP6_ADD_DEL_ROUTE,
+} resolve_t;
+
+static vlib_node_registration_t vpe_resolver_process_node;
+extern vpe_api_main_t vpe_api_main;
+
+static int arp_change_delete_callback (u32 pool_index, u8 * notused);
+static int nd_change_delete_callback (u32 pool_index, u8 * notused);
+
+/* Clean up all registrations belonging to the indicated client */
+static clib_error_t *
+memclnt_delete_callback (u32 client_index)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vpe_client_registration_t *rp;
+ uword *p;
+ int stats_memclnt_delete_callback (u32 client_index);
+
+ stats_memclnt_delete_callback (client_index);
+
+#define _(a) \
+ p = hash_get (vam->a##_registration_hash, client_index); \
+ if (p) { \
+ rp = pool_elt_at_index (vam->a##_registrations, p[0]); \
+ pool_put (vam->a##_registrations, rp); \
+ hash_unset (vam->a##_registration_hash, client_index); \
+ }
+ foreach_registration_hash;
+#undef _
+ return 0;
+}
+
+VL_MSG_API_REAPER_FUNCTION (memclnt_delete_callback);
+
+pub_sub_handler (oam_events, OAM_EVENTS);
+
+#define RESOLUTION_EVENT 1
+#define RESOLUTION_PENDING_EVENT 2
+#define IP4_ARP_EVENT 3
+#define IP6_ND_EVENT 4
+
+int ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp);
+
+int ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp);
+
+static void
+handle_ip4_arp_event (u32 pool_index)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vnet_main_t *vnm = vam->vnet_main;
+ vlib_main_t *vm = vam->vlib_main;
+ vl_api_ip4_arp_event_t *event;
+ vl_api_ip4_arp_event_t *mp;
+ unix_shared_memory_queue_t *q;
+
+ /* Client can cancel, die, etc. */
+ if (pool_is_free_index (vam->arp_events, pool_index))
+ return;
+
+ event = pool_elt_at_index (vam->arp_events, pool_index);
+
+ q = vl_api_client_index_to_input_queue (event->client_index);
+ if (!q)
+ {
+ (void) vnet_add_del_ip4_arp_change_event
+ (vnm, arp_change_delete_callback,
+ event->pid, &event->address,
+ vpe_resolver_process_node.index, IP4_ARP_EVENT,
+ ~0 /* pool index, notused */ , 0 /* is_add */ );
+ return;
+ }
+
+ if (q->cursize < q->maxsize)
+ {
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memcpy (mp, event, sizeof (*mp));
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ }
+ else
+ {
+ static f64 last_time;
+ /*
+ * Throttle syslog msgs.
+ * It's pretty tempting to just revoke the registration...
+ */
+ if (vlib_time_now (vm) > last_time + 10.0)
+ {
+ clib_warning ("arp event for %U to pid %d: queue stuffed!",
+ format_ip4_address, &event->address, event->pid);
+ last_time = vlib_time_now (vm);
+ }
+ }
+}
+
+void
+handle_ip6_nd_event (u32 pool_index)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ vnet_main_t *vnm = vam->vnet_main;
+ vlib_main_t *vm = vam->vlib_main;
+ vl_api_ip6_nd_event_t *event;
+ vl_api_ip6_nd_event_t *mp;
+ unix_shared_memory_queue_t *q;
+
+ /* Client can cancel, die, etc. */
+ if (pool_is_free_index (vam->nd_events, pool_index))
+ return;
+
+ event = pool_elt_at_index (vam->nd_events, pool_index);
+
+ q = vl_api_client_index_to_input_queue (event->client_index);
+ if (!q)
+ {
+ (void) vnet_add_del_ip6_nd_change_event
+ (vnm, nd_change_delete_callback,
+ event->pid, &event->address,
+ vpe_resolver_process_node.index, IP6_ND_EVENT,
+ ~0 /* pool index, notused */ , 0 /* is_add */ );
+ return;
+ }
+
+ if (q->cursize < q->maxsize)
+ {
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memcpy (mp, event, sizeof (*mp));
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ }
+ else
+ {
+ static f64 last_time;
+ /*
+ * Throttle syslog msgs.
+ * It's pretty tempting to just revoke the registration...
+ */
+ if (vlib_time_now (vm) > last_time + 10.0)
+ {
+ clib_warning ("ip6 nd event for %U to pid %d: queue stuffed!",
+ format_ip6_address, &event->address, event->pid);
+ last_time = vlib_time_now (vm);
+ }
+ }
+}
+
+static uword
+resolver_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ volatile f64 timeout = 100.0;
+ volatile uword *event_data = 0;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+
+ uword event_type =
+ vlib_process_get_events (vm, (uword **) & event_data);
+
+ int i;
+ switch (event_type)
+ {
+ case RESOLUTION_PENDING_EVENT:
+ timeout = 1.0;
+ break;
+
+ case RESOLUTION_EVENT:
+ clib_warning ("resolver: BOGUS TYPE");
+ break;
+
+ case IP4_ARP_EVENT:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_ip4_arp_event (event_data[i]);
+ break;
+
+ case IP6_ND_EVENT:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_ip6_nd_event (event_data[i]);
+ break;
+
+ case ~0: /* timeout */
+ break;
+ }
+
+ vec_reset_length (event_data);
+ }
+ return 0; /* or not */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vpe_resolver_process_node,static) = {
+ .function = resolver_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vpe-route-resolver-process",
+};
+/* *INDENT-ON* */
+
+static void
+vl_api_sw_interface_set_vpath_t_handler (vl_api_sw_interface_set_vpath_t * mp)
+{
+ vl_api_sw_interface_set_vpath_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VPATH, mp->enable);
+ vnet_feature_enable_disable ("ip4-unicast", "vpath-input-ip4",
+ sw_if_index, mp->enable, 0, 0);
+ vnet_feature_enable_disable ("ip4-multicast", "vpath-input-ip4",
+ sw_if_index, mp->enable, 0, 0);
+ vnet_feature_enable_disable ("ip6-unicast", "vpath-input-ip6",
+ sw_if_index, mp->enable, 0, 0);
+ vnet_feature_enable_disable ("ip6-multicast", "vpath-input-ip6",
+ sw_if_index, mp->enable, 0, 0);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_VPATH_REPLY);
+}
+
+static void
+ vl_api_sw_interface_set_l2_xconnect_t_handler
+ (vl_api_sw_interface_set_l2_xconnect_t * mp)
+{
+ vl_api_sw_interface_set_l2_xconnect_reply_t *rmp;
+ int rv = 0;
+ u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index);
+ u32 tx_sw_if_index = ntohl (mp->tx_sw_if_index);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+
+ VALIDATE_RX_SW_IF_INDEX (mp);
+
+ if (mp->enable)
+ {
+ VALIDATE_TX_SW_IF_INDEX (mp);
+ rv = set_int_l2_mode (vm, vnm, MODE_L2_XC,
+ rx_sw_if_index, 0, 0, 0, tx_sw_if_index);
+ }
+ else
+ {
+ rv = set_int_l2_mode (vm, vnm, MODE_L3, rx_sw_if_index, 0, 0, 0, 0);
+ }
+
+ BAD_RX_SW_IF_INDEX_LABEL;
+ BAD_TX_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_XCONNECT_REPLY);
+}
+
+static void
+ vl_api_sw_interface_set_l2_bridge_t_handler
+ (vl_api_sw_interface_set_l2_bridge_t * mp)
+{
+ bd_main_t *bdm = &bd_main;
+ vl_api_sw_interface_set_l2_bridge_reply_t *rmp;
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+
+ VALIDATE_RX_SW_IF_INDEX (mp);
+ u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index);
+
+
+ if (mp->enable)
+ {
+ VALIDATE_BD_ID (mp);
+ u32 bd_id = ntohl (mp->bd_id);
+ u32 bd_index = bd_find_or_add_bd_index (bdm, bd_id);
+ u32 bvi = mp->bvi;
+ u8 shg = mp->shg;
+ rv = set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE,
+ rx_sw_if_index, bd_index, bvi, shg, 0);
+ }
+ else
+ {
+ rv = set_int_l2_mode (vm, vnm, MODE_L3, rx_sw_if_index, 0, 0, 0, 0);
+ }
+
+ BAD_RX_SW_IF_INDEX_LABEL;
+ BAD_BD_ID_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY);
+}
+
+static void
+vl_api_bd_ip_mac_add_del_t_handler (vl_api_bd_ip_mac_add_del_t * mp)
+{
+ bd_main_t *bdm = &bd_main;
+ vl_api_bd_ip_mac_add_del_reply_t *rmp;
+ int rv = 0;
+ u32 bd_id = ntohl (mp->bd_id);
+ u32 bd_index;
+ uword *p;
+
+ if (bd_id == 0)
+ {
+ rv = VNET_API_ERROR_BD_NOT_MODIFIABLE;
+ goto out;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+
+ bd_index = p[0];
+ if (bd_add_del_ip_mac (bd_index, mp->ip_address,
+ mp->mac_address, mp->is_ipv6, mp->is_add))
+ rv = VNET_API_ERROR_UNSPECIFIED;
+
+out:
+ REPLY_MACRO (VL_API_BD_IP_MAC_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp)
+{
+ vl_api_create_vlan_subif_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = (u32) ~ 0;
+ vnet_hw_interface_t *hi;
+ int rv = 0;
+ u32 id;
+ vnet_sw_interface_t template;
+ uword *p;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u64 sup_and_sub_key;
+ unix_shared_memory_queue_t *q;
+ clib_error_t *error;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ hi = vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index));
+
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ {
+ rv = VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED;
+ goto out;
+ }
+
+ id = ntohl (mp->vlan_id);
+ if (id == 0 || id > 4095)
+ {
+ rv = VNET_API_ERROR_INVALID_VLAN;
+ goto out;
+ }
+
+ sup_and_sub_key = ((u64) (hi->sw_if_index) << 32) | (u64) id;
+
+ p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ if (p)
+ {
+ rv = VNET_API_ERROR_VLAN_ALREADY_EXISTS;
+ goto out;
+ }
+
+ memset (&template, 0, sizeof (template));
+ template.type = VNET_SW_INTERFACE_TYPE_SUB;
+ template.sup_sw_if_index = hi->sw_if_index;
+ template.sub.id = id;
+ template.sub.eth.raw_flags = 0;
+ template.sub.eth.flags.one_tag = 1;
+ template.sub.eth.outer_vlan_id = id;
+ template.sub.eth.flags.exact_match = 1;
+
+ error = vnet_create_sw_interface (vnm, &template, &sw_if_index);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto out;
+ }
+
+ u64 *kp = clib_mem_alloc (sizeof (*kp));
+ *kp = sup_and_sub_key;
+
+ hash_set (hi->sub_interface_sw_if_index_by_id, id, sw_if_index);
+ hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+out:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_CREATE_VLAN_SUBIF_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+ rmp->sw_if_index = htonl (sw_if_index);
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_create_subif_t_handler (vl_api_create_subif_t * mp)
+{
+ vl_api_create_subif_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ u32 sub_id;
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t template;
+ uword *p;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u64 sup_and_sub_key;
+ clib_error_t *error;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ si = vnet_get_sup_sw_interface (vnm, ntohl (mp->sw_if_index));
+ hi = vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index));
+
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ {
+ rv = VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED;
+ goto out;
+ }
+
+ sw_if_index = si->sw_if_index;
+ sub_id = ntohl (mp->sub_id);
+
+ sup_and_sub_key = ((u64) (sw_if_index) << 32) | (u64) sub_id;
+
+ p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ if (p)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning ("sup sw_if_index %d, sub id %d already exists\n",
+ sw_if_index, sub_id);
+ rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ goto out;
+ }
+
+ memset (&template, 0, sizeof (template));
+ template.type = VNET_SW_INTERFACE_TYPE_SUB;
+ template.sup_sw_if_index = sw_if_index;
+ template.sub.id = sub_id;
+ template.sub.eth.flags.no_tags = mp->no_tags;
+ template.sub.eth.flags.one_tag = mp->one_tag;
+ template.sub.eth.flags.two_tags = mp->two_tags;
+ template.sub.eth.flags.dot1ad = mp->dot1ad;
+ template.sub.eth.flags.exact_match = mp->exact_match;
+ template.sub.eth.flags.default_sub = mp->default_sub;
+ template.sub.eth.flags.outer_vlan_id_any = mp->outer_vlan_id_any;
+ template.sub.eth.flags.inner_vlan_id_any = mp->inner_vlan_id_any;
+ template.sub.eth.outer_vlan_id = ntohs (mp->outer_vlan_id);
+ template.sub.eth.inner_vlan_id = ntohs (mp->inner_vlan_id);
+
+ error = vnet_create_sw_interface (vnm, &template, &sw_if_index);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_SUBIF_CREATE_FAILED;
+ goto out;
+ }
+
+ u64 *kp = clib_mem_alloc (sizeof (*kp));
+ *kp = sup_and_sub_key;
+
+ hash_set (hi->sub_interface_sw_if_index_by_id, sub_id, sw_if_index);
+ hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+out:
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY,
+ ({
+ rmp->sw_if_index = ntohl(sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_proxy_arp_add_del_t_handler (vl_api_proxy_arp_add_del_t * mp)
+{
+ vl_api_proxy_arp_add_del_reply_t *rmp;
+ u32 fib_index;
+ int rv;
+ ip4_main_t *im = &ip4_main;
+ stats_main_t *sm = &stats_main;
+ int vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
+ ip4_address_t * hi_addr,
+ u32 fib_index, int is_del);
+ uword *p;
+
+ dslock (sm, 1 /* release hint */ , 6 /* tag */ );
+
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->vrf_id));
+
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ fib_index = p[0];
+
+ rv = vnet_proxy_arp_add_del ((ip4_address_t *) mp->low_address,
+ (ip4_address_t *) mp->hi_address,
+ fib_index, mp->is_add == 0);
+
+out:
+ dsunlock (sm);
+ REPLY_MACRO (VL_API_PROXY_ARP_ADD_DEL_REPLY);
+}
+
+static void
+ vl_api_proxy_arp_intfc_enable_disable_t_handler
+ (vl_api_proxy_arp_intfc_enable_disable_t * mp)
+{
+ int rv = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_proxy_arp_intfc_enable_disable_reply_t *rmp;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_sw_interface_t *si =
+ vnet_get_sw_interface (vnm, ntohl (mp->sw_if_index));
+
+ ASSERT (si);
+
+ if (mp->enable_disable)
+ si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+ else
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_sw_interface_set_mpls_enable_t_handler
+ (vl_api_sw_interface_set_mpls_enable_t * mp)
+{
+ vl_api_sw_interface_set_mpls_enable_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = mpls_sw_interface_enable_disable (&mpls_main,
+ ntohl (mp->sw_if_index),
+ mp->enable, 1);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY);
+}
+
+void
+send_oam_event (oam_target_t * t)
+{
+ vpe_api_main_t *vam = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ vpe_client_registration_t *reg;
+ vl_api_oam_event_t *mp;
+
+ /* *INDENT-OFF* */
+ pool_foreach(reg, vam->oam_events_registrations,
+ ({
+ q = vl_api_client_index_to_input_queue (reg->client_index);
+ if (q)
+ {
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_OAM_EVENT);
+ clib_memcpy (mp->dst_address, &t->dst_address,
+ sizeof (mp->dst_address));
+ mp->state = t->state;
+ vl_msg_api_send_shmem (q, (u8 *)&mp);
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_oam_add_del_t_handler (vl_api_oam_add_del_t * mp)
+{
+ vl_api_oam_add_del_reply_t *rmp;
+ int rv;
+
+ rv = vpe_oam_add_del_target ((ip4_address_t *) mp->src_address,
+ (ip4_address_t *) mp->dst_address,
+ ntohl (mp->vrf_id), (int) (mp->is_add));
+
+ REPLY_MACRO (VL_API_OAM_ADD_DEL_REPLY);
+}
+
+
+static int
+ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip4_main_t *im4 = &ip4_main;
+ static u32 *sw_if_indices_to_shut;
+ stats_main_t *sm = &stats_main;
+ fib_table_t *fib_table;
+ ip4_fib_t *fib;
+ u32 sw_if_index;
+ int i;
+ int rv = VNET_API_ERROR_NO_SUCH_FIB;
+ u32 target_fib_id = ntohl (mp->vrf_id);
+
+ dslock (sm, 1 /* release hint */ , 8 /* tag */ );
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im4->fibs,
+ ({
+ vnet_sw_interface_t * si;
+
+ fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index);
+
+ if (fib->table_id != target_fib_id)
+ continue;
+
+ /* remove any mpls encap/decap labels */
+ mpls_fib_reset_labels (fib->table_id);
+
+ /* remove any proxy arps in this fib */
+ vnet_proxy_arp_fib_reset (fib->table_id);
+
+ /* Set the flow hash for this fib to the default */
+ vnet_set_ip4_flow_hash (fib->table_id, IP_FLOW_HASH_DEFAULT);
+
+ vec_reset_length (sw_if_indices_to_shut);
+
+ /* Shut down interfaces in this FIB / clean out intfc routes */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ u32 sw_if_index = si->sw_if_index;
+
+ if (sw_if_index < vec_len (im4->fib_index_by_sw_if_index)
+ && (im4->fib_index_by_sw_if_index[si->sw_if_index] ==
+ fib->index))
+ vec_add1 (sw_if_indices_to_shut, si->sw_if_index);
+ }));
+
+ for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) {
+ sw_if_index = sw_if_indices_to_shut[i];
+
+ u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index);
+ flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
+ }
+
+ fib_table_flush(fib->index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
+
+ rv = 0;
+ break;
+ })); /* pool_foreach (fib) */
+ /* *INDENT-ON* */
+
+ dsunlock (sm);
+ return rv;
+}
+
+static int
+ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip6_main_t *im6 = &ip6_main;
+ stats_main_t *sm = &stats_main;
+ static u32 *sw_if_indices_to_shut;
+ fib_table_t *fib_table;
+ ip6_fib_t *fib;
+ u32 sw_if_index;
+ int i;
+ int rv = VNET_API_ERROR_NO_SUCH_FIB;
+ u32 target_fib_id = ntohl (mp->vrf_id);
+
+ dslock (sm, 1 /* release hint */ , 9 /* tag */ );
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ vnet_sw_interface_t * si;
+
+ fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index);
+
+ if (fib->table_id != target_fib_id)
+ continue;
+
+ vec_reset_length (sw_if_indices_to_shut);
+
+ /* Set the flow hash for this fib to the default */
+ vnet_set_ip6_flow_hash (fib->table_id, IP_FLOW_HASH_DEFAULT);
+
+ /* Shut down interfaces in this FIB / clean out intfc routes */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ if (im6->fib_index_by_sw_if_index[si->sw_if_index] ==
+ fib->index)
+ vec_add1 (sw_if_indices_to_shut, si->sw_if_index);
+ }));
+
+ for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) {
+ sw_if_index = sw_if_indices_to_shut[i];
+
+ u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index);
+ flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
+ }
+
+ fib_table_flush(fib->index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
+
+ rv = 0;
+ break;
+ })); /* pool_foreach (fib) */
+ /* *INDENT-ON* */
+
+ dsunlock (sm);
+ return rv;
+}
+
+static void
+vl_api_reset_fib_t_handler (vl_api_reset_fib_t * mp)
+{
+ int rv;
+ vl_api_reset_fib_reply_t *rmp;
+
+ if (mp->is_ipv6)
+ rv = ip6_reset_fib_t_handler (mp);
+ else
+ rv = ip4_reset_fib_t_handler (mp);
+
+ REPLY_MACRO (VL_API_RESET_FIB_REPLY);
+}
+
+static void
+vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp)
+{
+ vl_api_create_loopback_reply_t *rmp;
+ u32 sw_if_index;
+ int rv;
+
+ rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address, 0, 0);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void vl_api_create_loopback_instance_t_handler
+ (vl_api_create_loopback_instance_t * mp)
+{
+ vl_api_create_loopback_instance_reply_t *rmp;
+ u32 sw_if_index;
+ u8 is_specified = mp->is_specified;
+ u32 user_instance = ntohl (mp->user_instance);
+ int rv;
+
+ rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address,
+ is_specified, user_instance);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_delete_loopback_t_handler (vl_api_delete_loopback_t * mp)
+{
+ vl_api_delete_loopback_reply_t *rmp;
+ u32 sw_if_index;
+ int rv;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ rv = vnet_delete_loopback_interface (sw_if_index);
+
+ REPLY_MACRO (VL_API_DELETE_LOOPBACK_REPLY);
+}
+
+static void
+vl_api_control_ping_t_handler (vl_api_control_ping_t * mp)
+{
+ vl_api_control_ping_reply_t *rmp;
+ int rv = 0;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_CONTROL_PING_REPLY,
+ ({
+ rmp->vpe_pid = ntohl (getpid());
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+shmem_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
+{
+ u8 **shmem_vecp = (u8 **) arg;
+ u8 *shmem_vec;
+ void *oldheap;
+ api_main_t *am = &api_main;
+ u32 offset;
+
+ shmem_vec = *shmem_vecp;
+
+ offset = vec_len (shmem_vec);
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ vec_validate (shmem_vec, offset + buffer_bytes - 1);
+
+ clib_memcpy (shmem_vec + offset, buffer, buffer_bytes);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ *shmem_vecp = shmem_vec;
+}
+
+
+static void
+vl_api_cli_t_handler (vl_api_cli_t * mp)
+{
+ vl_api_cli_reply_t *rp;
+ unix_shared_memory_queue_t *q;
+ vlib_main_t *vm = vlib_get_main ();
+ api_main_t *am = &api_main;
+ unformat_input_t input;
+ u8 *shmem_vec = 0;
+ void *oldheap;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = ntohs (VL_API_CLI_REPLY);
+ rp->context = mp->context;
+
+ unformat_init_vector (&input, (u8 *) (uword) mp->cmd_in_shmem);
+
+ vlib_cli_input (vm, &input, shmem_cli_output, (uword) & shmem_vec);
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ vec_add1 (shmem_vec, 0);
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ rp->reply_in_shmem = (uword) shmem_vec;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rp);
+}
+
+static void
+inband_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
+{
+ u8 **mem_vecp = (u8 **) arg;
+ u8 *mem_vec = *mem_vecp;
+ u32 offset = vec_len (mem_vec);
+
+ vec_validate (mem_vec, offset + buffer_bytes - 1);
+ clib_memcpy (mem_vec + offset, buffer, buffer_bytes);
+ *mem_vecp = mem_vec;
+}
+
+static void
+vl_api_cli_inband_t_handler (vl_api_cli_inband_t * mp)
+{
+ vl_api_cli_inband_reply_t *rmp;
+ int rv = 0;
+ unix_shared_memory_queue_t *q;
+ vlib_main_t *vm = vlib_get_main ();
+ unformat_input_t input;
+ u8 *out_vec = 0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ unformat_init_string (&input, (char *) mp->cmd, ntohl (mp->length));
+ vlib_cli_input (vm, &input, inband_cli_output, (uword) & out_vec);
+
+ u32 len = vec_len (out_vec);
+ /* *INDENT-OFF* */
+ REPLY_MACRO3(VL_API_CLI_INBAND_REPLY, len,
+ ({
+ rmp->length = htonl (len);
+ clib_memcpy (rmp->reply, out_vec, len);
+ }));
+ /* *INDENT-ON* */
+ vec_free (out_vec);
+}
+
+static void
+vl_api_set_arp_neighbor_limit_t_handler (vl_api_set_arp_neighbor_limit_t * mp)
+{
+ int rv;
+ vl_api_set_arp_neighbor_limit_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error;
+
+ vnm->api_errno = 0;
+
+ if (mp->is_ipv6)
+ error = ip6_set_neighbor_limit (ntohl (mp->arp_neighbor_limit));
+ else
+ error = ip4_set_arp_limit (ntohl (mp->arp_neighbor_limit));
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ REPLY_MACRO (VL_API_SET_ARP_NEIGHBOR_LIMIT_REPLY);
+}
+
+static void vl_api_classify_set_interface_ip_table_t_handler
+ (vl_api_classify_set_interface_ip_table_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_classify_set_interface_ip_table_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 table_index = ntohl (mp->table_index);
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (mp->is_ipv6)
+ rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
+ else
+ rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_CLASSIFY_SET_INTERFACE_IP_TABLE_REPLY);
+}
+
+static void vl_api_classify_set_interface_l2_tables_t_handler
+ (vl_api_classify_set_interface_l2_tables_t * mp)
+{
+ vl_api_classify_set_interface_l2_tables_reply_t *rmp;
+ int rv;
+ u32 sw_if_index, ip4_table_index, ip6_table_index, other_table_index;
+ int enable;
+
+ ip4_table_index = ntohl (mp->ip4_table_index);
+ ip6_table_index = ntohl (mp->ip6_table_index);
+ other_table_index = ntohl (mp->other_table_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->is_input)
+ rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index,
+ other_table_index);
+ else
+ rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index,
+ other_table_index);
+
+ if (rv == 0)
+ {
+ if (ip4_table_index != ~0 || ip6_table_index != ~0
+ || other_table_index != ~0)
+ enable = 1;
+ else
+ enable = 0;
+
+ if (mp->is_input)
+ vnet_l2_input_classify_enable_disable (sw_if_index, enable);
+ else
+ vnet_l2_output_classify_enable_disable (sw_if_index, enable);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_CLASSIFY_SET_INTERFACE_L2_TABLES_REPLY);
+}
+
+extern void l2_efp_filter_configure (vnet_main_t * vnet_main,
+ u32 sw_if_index, u32 enable);
+
+static void
+vl_api_l2_interface_efp_filter_t_handler (vl_api_l2_interface_efp_filter_t *
+ mp)
+{
+ int rv;
+ vl_api_l2_interface_efp_filter_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ // enable/disable the feature
+ l2_efp_filter_configure (vnm, mp->sw_if_index, mp->enable_disable);
+ rv = vnm->api_errno;
+
+ REPLY_MACRO (VL_API_L2_INTERFACE_EFP_FILTER_REPLY);
+}
+
+static void
+vl_api_show_version_t_handler (vl_api_show_version_t * mp)
+{
+ vl_api_show_version_reply_t *rmp;
+ int rv = 0;
+ char *vpe_api_get_build_directory (void);
+ char *vpe_api_get_version (void);
+ char *vpe_api_get_build_date (void);
+
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ return;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_SHOW_VERSION_REPLY,
+ ({
+ strncpy ((char *) rmp->program, "vpe", ARRAY_LEN(rmp->program)-1);
+ strncpy ((char *) rmp->build_directory, vpe_api_get_build_directory(),
+ ARRAY_LEN(rmp->build_directory)-1);
+ strncpy ((char *) rmp->version, vpe_api_get_version(),
+ ARRAY_LEN(rmp->version)-1);
+ strncpy ((char *) rmp->build_date, vpe_api_get_build_date(),
+ ARRAY_LEN(rmp->build_date)-1);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_get_node_index_t_handler (vl_api_get_node_index_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_get_node_index_reply_t *rmp;
+ vlib_node_t *n;
+ int rv = 0;
+ u32 node_index = ~0;
+
+ n = vlib_get_node_by_name (vm, mp->node_name);
+
+ if (n == 0)
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ else
+ node_index = n->index;
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GET_NODE_INDEX_REPLY,
+ ({
+ rmp->node_index = ntohl(node_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_get_next_index_t_handler (vl_api_get_next_index_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_get_next_index_reply_t *rmp;
+ vlib_node_t *node, *next_node;
+ int rv = 0;
+ u32 next_node_index = ~0, next_index = ~0;
+ uword *p;
+
+ node = vlib_get_node_by_name (vm, mp->node_name);
+
+ if (node == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ goto out;
+ }
+
+ next_node = vlib_get_node_by_name (vm, mp->next_name);
+
+ if (next_node == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_NODE2;
+ goto out;
+ }
+ else
+ next_node_index = next_node->index;
+
+ p = hash_get (node->next_slot_by_node, next_node_index);
+
+ if (p == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ else
+ next_index = p[0];
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GET_NEXT_INDEX_REPLY,
+ ({
+ rmp->next_index = ntohl(next_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_add_node_next_t_handler (vl_api_add_node_next_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_add_node_next_reply_t *rmp;
+ vlib_node_t *n, *next;
+ int rv = 0;
+ u32 next_index = ~0;
+
+ n = vlib_get_node_by_name (vm, mp->node_name);
+
+ if (n == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ goto out;
+ }
+
+ next = vlib_get_node_by_name (vm, mp->next_name);
+
+ if (next == 0)
+ rv = VNET_API_ERROR_NO_SUCH_NODE2;
+ else
+ next_index = vlib_node_add_next (vm, n->index, next->index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GET_NODE_INDEX_REPLY,
+ ({
+ rmp->next_index = ntohl(next_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp)
+{
+ extern int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index,
+ int is_add);
+ vl_api_l2_patch_add_del_reply_t *rmp;
+ int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index,
+ int is_add);
+ int rv = 0;
+
+ VALIDATE_RX_SW_IF_INDEX (mp);
+ VALIDATE_TX_SW_IF_INDEX (mp);
+
+ rv = vnet_l2_patch_add_del (ntohl (mp->rx_sw_if_index),
+ ntohl (mp->tx_sw_if_index),
+ (int) (mp->is_add != 0));
+
+ BAD_RX_SW_IF_INDEX_LABEL;
+ BAD_TX_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t *
+ mp)
+{
+ vl_api_interface_name_renumber_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = vnet_interface_name_renumber
+ (ntohl (mp->sw_if_index), ntohl (mp->new_show_dev_instance));
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_INTERFACE_NAME_RENUMBER_REPLY);
+}
+
+static int
+arp_change_data_callback (u32 pool_index, u8 * new_mac,
+ u32 sw_if_index, u32 address)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vlib_main_t *vm = am->vlib_main;
+ vl_api_ip4_arp_event_t *event;
+
+ if (pool_is_free_index (am->arp_events, pool_index))
+ return 1;
+
+ event = pool_elt_at_index (am->arp_events, pool_index);
+ if (eth_mac_equal (event->new_mac, new_mac) &&
+ sw_if_index == ntohl (event->sw_if_index))
+ {
+ return 1;
+ }
+
+ clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac));
+ event->sw_if_index = htonl (sw_if_index);
+ return 0;
+}
+
+static int
+nd_change_data_callback (u32 pool_index, u8 * new_mac,
+ u32 sw_if_index, ip6_address_t * address)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vlib_main_t *vm = am->vlib_main;
+ vl_api_ip6_nd_event_t *event;
+
+ if (pool_is_free_index (am->nd_events, pool_index))
+ return 1;
+
+ event = pool_elt_at_index (am->nd_events, pool_index);
+ if (eth_mac_equal (event->new_mac, new_mac) &&
+ sw_if_index == ntohl (event->sw_if_index))
+ {
+ return 1;
+ }
+
+ clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac));
+ event->sw_if_index = htonl (sw_if_index);
+ return 0;
+}
+
+static int
+arp_change_delete_callback (u32 pool_index, u8 * notused)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+
+ if (pool_is_free_index (am->arp_events, pool_index))
+ return 1;
+
+ pool_put_index (am->arp_events, pool_index);
+ return 0;
+}
+
+static int
+nd_change_delete_callback (u32 pool_index, u8 * notused)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+
+ if (pool_is_free_index (am->nd_events, pool_index))
+ return 1;
+
+ pool_put_index (am->nd_events, pool_index);
+ return 0;
+}
+
+static vlib_node_registration_t wc_arp_process_node;
+
+enum
+{ WC_ARP_REPORT, WC_ND_REPORT };
+
+static uword
+wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ /* These cross the longjmp boundry (vlib_process_wait_for_event)
+ * and need to be volatile - to prevent them from being optimized into
+ * a register - which could change during suspension */
+
+ volatile wc_arp_report_t arp_prev = { 0 };
+ volatile wc_nd_report_t nd_prev = { 0 };
+ volatile f64 last_arp = vlib_time_now (vm);
+ volatile f64 last_nd = vlib_time_now (vm);
+
+ while (1)
+ {
+ vlib_process_wait_for_event (vm);
+ uword event_type;
+ void *event_data = vlib_process_get_event_data (vm, &event_type);
+
+ f64 now = vlib_time_now (vm);
+ int i;
+ if (event_type == WC_ARP_REPORT)
+ {
+ wc_arp_report_t *arp_events = event_data;
+ for (i = 0; i < vec_len (arp_events); i++)
+ {
+ /* discard dup event */
+ if (arp_prev.ip4 == arp_events[i].ip4 &&
+ eth_mac_equal ((u8 *) arp_prev.mac, arp_events[i].mac) &&
+ arp_prev.sw_if_index == arp_events[i].sw_if_index &&
+ (now - last_arp) < 10.0)
+ {
+ continue;
+ }
+ arp_prev = arp_events[i];
+ last_arp = now;
+ vpe_client_registration_t *reg;
+ /* *INDENT-OFF* */
+ pool_foreach(reg, vpe_api_main.wc_ip4_arp_events_registrations,
+ ({
+ unix_shared_memory_queue_t *q;
+ q = vl_api_client_index_to_input_queue (reg->client_index);
+ if (q && q->cursize < q->maxsize)
+ {
+ vl_api_ip4_arp_event_t * event = vl_msg_api_alloc (sizeof *event);
+ memset (event, 0, sizeof *event);
+ event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT);
+ event->client_index = reg->client_index;
+ event->pid = reg->client_pid;
+ event->mac_ip = 1;
+ event->address = arp_events[i].ip4;
+ event->sw_if_index = htonl(arp_events[i].sw_if_index);
+ memcpy(event->new_mac, arp_events[i].mac, sizeof event->new_mac);
+ vl_msg_api_send_shmem (q, (u8 *) &event);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ else if (event_type == WC_ND_REPORT)
+ {
+ wc_nd_report_t *nd_events = event_data;
+ for (i = 0; i < vec_len (nd_events); i++)
+ {
+ /* discard dup event */
+ if (ip6_address_is_equal
+ ((ip6_address_t *) & nd_prev.ip6, &nd_events[i].ip6)
+ && eth_mac_equal ((u8 *) nd_prev.mac, nd_events[i].mac)
+ && nd_prev.sw_if_index == nd_events[i].sw_if_index
+ && (now - last_nd) < 10.0)
+ {
+ continue;
+ }
+ nd_prev = nd_events[i];
+ last_nd = now;
+ vpe_client_registration_t *reg;
+ /* *INDENT-OFF* */
+ pool_foreach(reg, vpe_api_main.wc_ip6_nd_events_registrations,
+ ({
+ unix_shared_memory_queue_t *q;
+ q = vl_api_client_index_to_input_queue (reg->client_index);
+ if (q && q->cursize < q->maxsize)
+ {
+ vl_api_ip6_nd_event_t * event = vl_msg_api_alloc (sizeof *event);
+ memset (event, 0, sizeof *event);
+ event->_vl_msg_id = htons (VL_API_IP6_ND_EVENT);
+ event->client_index = reg->client_index;
+ event->pid = reg->client_pid;
+ event->mac_ip = 1;
+ memcpy(event->address, nd_events[i].ip6.as_u8, sizeof event->address);
+ event->sw_if_index = htonl(nd_events[i].sw_if_index);
+ memcpy(event->new_mac, nd_events[i].mac, sizeof event->new_mac);
+ vl_msg_api_send_shmem (q, (u8 *) &event);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ vlib_process_put_event_data (vm, event_data);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (wc_arp_process_node,static) = {
+ .function = wc_arp_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "wildcard-ip4-arp-publisher-process",
+};
+/* *INDENT-ON* */
+
+static void
+vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_want_ip4_arp_events_reply_t *rmp;
+ int rv = 0;
+
+ if (mp->address == 0)
+ {
+ uword *p =
+ hash_get (am->wc_ip4_arp_events_registration_hash, mp->client_index);
+ vpe_client_registration_t *rp;
+ if (p)
+ {
+ if (mp->enable_disable)
+ {
+ clib_warning ("pid %d: already enabled...", mp->pid);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto reply;
+ }
+ else
+ {
+ rp =
+ pool_elt_at_index (am->wc_ip4_arp_events_registrations, p[0]);
+ pool_put (am->wc_ip4_arp_events_registrations, rp);
+ hash_unset (am->wc_ip4_arp_events_registration_hash,
+ mp->client_index);
+ if (pool_elts (am->wc_ip4_arp_events_registrations) == 0)
+ wc_arp_set_publisher_node (~0, WC_ARP_REPORT);
+ goto reply;
+ }
+ }
+ if (mp->enable_disable == 0)
+ {
+ clib_warning ("pid %d: already disabled...", mp->pid);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto reply;
+ }
+ pool_get (am->wc_ip4_arp_events_registrations, rp);
+ rp->client_index = mp->client_index;
+ rp->client_pid = mp->pid;
+ hash_set (am->wc_ip4_arp_events_registration_hash, rp->client_index,
+ rp - am->wc_ip4_arp_events_registrations);
+ wc_arp_set_publisher_node (wc_arp_process_node.index, WC_ARP_REPORT);
+ goto reply;
+ }
+
+ if (mp->enable_disable)
+ {
+ vl_api_ip4_arp_event_t *event;
+ pool_get (am->arp_events, event);
+ rv = vnet_add_del_ip4_arp_change_event
+ (vnm, arp_change_data_callback,
+ mp->pid, &mp->address /* addr, in net byte order */ ,
+ vpe_resolver_process_node.index,
+ IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ );
+
+ if (rv)
+ {
+ pool_put (am->arp_events, event);
+ goto reply;
+ }
+ memset (event, 0, sizeof (*event));
+
+ /* Python API expects events to have no context */
+ event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT);
+ event->client_index = mp->client_index;
+ event->address = mp->address;
+ event->pid = mp->pid;
+ if (mp->address == 0)
+ event->mac_ip = 1;
+ }
+ else
+ {
+ rv = vnet_add_del_ip4_arp_change_event
+ (vnm, arp_change_delete_callback,
+ mp->pid, &mp->address /* addr, in net byte order */ ,
+ vpe_resolver_process_node.index,
+ IP4_ARP_EVENT, ~0 /* pool index */ , 0 /* is_add */ );
+ }
+reply:
+ REPLY_MACRO (VL_API_WANT_IP4_ARP_EVENTS_REPLY);
+}
+
+static void
+vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_want_ip6_nd_events_reply_t *rmp;
+ int rv = 0;
+
+ if (ip6_address_is_zero ((ip6_address_t *) mp->address))
+ {
+ uword *p =
+ hash_get (am->wc_ip6_nd_events_registration_hash, mp->client_index);
+ vpe_client_registration_t *rp;
+ if (p)
+ {
+ if (mp->enable_disable)
+ {
+ clib_warning ("pid %d: already enabled...", mp->pid);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto reply;
+ }
+ else
+ {
+ rp =
+ pool_elt_at_index (am->wc_ip6_nd_events_registrations, p[0]);
+ pool_put (am->wc_ip6_nd_events_registrations, rp);
+ hash_unset (am->wc_ip6_nd_events_registration_hash,
+ mp->client_index);
+ if (pool_elts (am->wc_ip6_nd_events_registrations) == 0)
+ wc_nd_set_publisher_node (~0, 2);
+ goto reply;
+ }
+ }
+ if (mp->enable_disable == 0)
+ {
+ clib_warning ("pid %d: already disabled...", mp->pid);
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ goto reply;
+ }
+ pool_get (am->wc_ip6_nd_events_registrations, rp);
+ rp->client_index = mp->client_index;
+ rp->client_pid = mp->pid;
+ hash_set (am->wc_ip6_nd_events_registration_hash, rp->client_index,
+ rp - am->wc_ip6_nd_events_registrations);
+ wc_nd_set_publisher_node (wc_arp_process_node.index, WC_ND_REPORT);
+ goto reply;
+ }
+
+ if (mp->enable_disable)
+ {
+ vl_api_ip6_nd_event_t *event;
+ pool_get (am->nd_events, event);
+
+ rv = vnet_add_del_ip6_nd_change_event
+ (vnm, nd_change_data_callback,
+ mp->pid, mp->address /* addr, in net byte order */ ,
+ vpe_resolver_process_node.index,
+ IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ );
+
+ if (rv)
+ {
+ pool_put (am->nd_events, event);
+ goto reply;
+ }
+ memset (event, 0, sizeof (*event));
+
+ event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT);
+ event->client_index = mp->client_index;
+ clib_memcpy (event->address, mp->address, sizeof event->address);
+ event->pid = mp->pid;
+ }
+ else
+ {
+ rv = vnet_add_del_ip6_nd_change_event
+ (vnm, nd_change_delete_callback,
+ mp->pid, mp->address /* addr, in net byte order */ ,
+ vpe_resolver_process_node.index,
+ IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ );
+ }
+reply:
+ REPLY_MACRO (VL_API_WANT_IP6_ND_EVENTS_REPLY);
+}
+
+static void vl_api_input_acl_set_interface_t_handler
+ (vl_api_input_acl_set_interface_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_input_acl_set_interface_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 ip4_table_index = ntohl (mp->ip4_table_index);
+ u32 ip6_table_index = ntohl (mp->ip6_table_index);
+ u32 l2_table_index = ntohl (mp->l2_table_index);
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index,
+ ip6_table_index, l2_table_index, mp->is_add);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_INPUT_ACL_SET_INTERFACE_REPLY);
+}
+
+static void
+vl_api_get_node_graph_t_handler (vl_api_get_node_graph_t * mp)
+{
+ int rv = 0;
+ u8 *vector = 0;
+ api_main_t *am = &api_main;
+ vlib_main_t *vm = vlib_get_main ();
+ void *oldheap;
+ vl_api_get_node_graph_reply_t *rmp;
+
+ pthread_mutex_lock (&am->vlib_rp->mutex);
+ oldheap = svm_push_data_heap (am->vlib_rp);
+
+ /*
+ * Keep the number of memcpy ops to a minimum (e.g. 1).
+ */
+ vec_validate (vector, 16384);
+ vec_reset_length (vector);
+
+ /* $$$$ FIXME */
+ vector = vlib_node_serialize (&vm->node_main, vector,
+ (u32) ~ 0 /* all threads */ ,
+ 1 /* include nexts */ ,
+ 1 /* include stats */ );
+
+ svm_pop_heap (oldheap);
+ pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_GET_NODE_GRAPH_REPLY,
+ ({
+ rmp->reply_in_shmem = (uword) vector;
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_ioam_enable_t_handler (vl_api_ioam_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_ioam_enable_reply_t *rmp;
+ clib_error_t *error;
+
+ /* Ignoring the profile id as currently a single profile
+ * is supported */
+ error = ip6_ioam_enable (mp->trace_enable, mp->pot_enable,
+ mp->seqno, mp->analyse);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ REPLY_MACRO (VL_API_IOAM_ENABLE_REPLY);
+}
+
+static void
+vl_api_ioam_disable_t_handler (vl_api_ioam_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_ioam_disable_reply_t *rmp;
+ clib_error_t *error;
+
+ error = clear_ioam_rewrite_fn ();
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ REPLY_MACRO (VL_API_IOAM_DISABLE_REPLY);
+}
+
+static void
+vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
+{
+ vl_api_pg_create_interface_reply_t *rmp;
+ int rv = 0;
+
+ pg_main_t *pg = &pg_main;
+ u32 pg_if_id = pg_interface_add_or_get (pg, ntohl (mp->interface_id));
+ pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY,
+ ({
+ rmp->sw_if_index = ntohl(pi->sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_pg_capture_t_handler (vl_api_pg_capture_t * mp)
+{
+ vl_api_pg_capture_reply_t *rmp;
+ int rv = 0;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hi = 0;
+
+ u8 *intf_name = format (0, "pg%d", ntohl (mp->interface_id), 0);
+ u32 hw_if_index = ~0;
+ uword *p = hash_get_mem (im->hw_interface_by_name, intf_name);
+ if (p)
+ hw_if_index = *p;
+ vec_free (intf_name);
+
+ if (hw_if_index != ~0)
+ {
+ pg_capture_args_t _a, *a = &_a;
+
+ u32 len = ntohl (mp->pcap_name_length);
+ u8 *pcap_file_name = vec_new (u8, len);
+ clib_memcpy (pcap_file_name, mp->pcap_file_name, len);
+
+ hi = vnet_get_sup_hw_interface (vnm, hw_if_index);
+ a->hw_if_index = hw_if_index;
+ a->dev_instance = hi->dev_instance;
+ a->is_enabled = mp->is_enabled;
+ a->pcap_file_name = pcap_file_name;
+ a->count = ntohl (mp->count);
+
+ clib_error_t *e = pg_capture (a);
+ if (e)
+ {
+ clib_error_report (e);
+ rv = VNET_API_ERROR_CANNOT_CREATE_PCAP_FILE;
+ }
+
+ vec_free (pcap_file_name);
+ }
+ REPLY_MACRO (VL_API_PG_CAPTURE_REPLY);
+}
+
+static void
+vl_api_pg_enable_disable_t_handler (vl_api_pg_enable_disable_t * mp)
+{
+ vl_api_pg_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ pg_main_t *pg = &pg_main;
+ u32 stream_index = ~0;
+
+ int is_enable = mp->is_enabled != 0;
+ u32 len = ntohl (mp->stream_name_length) - 1;
+
+ if (len > 0)
+ {
+ u8 *stream_name = vec_new (u8, len);
+ clib_memcpy (stream_name, mp->stream_name, len);
+ uword *p = hash_get_mem (pg->stream_index_by_name, stream_name);
+ if (p)
+ stream_index = *p;
+ vec_free (stream_name);
+ }
+
+ pg_enable_disable (stream_index, is_enable);
+
+ REPLY_MACRO (VL_API_PG_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_ip_source_and_port_range_check_add_del_t_handler
+ (vl_api_ip_source_and_port_range_check_add_del_t * mp)
+{
+ vl_api_ip_source_and_port_range_check_add_del_reply_t *rmp;
+ int rv = 0;
+
+ u8 is_ipv6 = mp->is_ipv6;
+ u8 is_add = mp->is_add;
+ u8 mask_length = mp->mask_length;
+ ip4_address_t ip4_addr;
+ ip6_address_t ip6_addr;
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u32 vrf_id;
+ u16 tmp_low, tmp_high;
+ u8 num_ranges;
+ int i;
+
+ // Validate port range
+ num_ranges = mp->number_of_ranges;
+ if (num_ranges > 32)
+ { // This is size of array in VPE.API
+ rv = VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY;
+ goto reply;
+ }
+
+ vec_reset_length (low_ports);
+ vec_reset_length (high_ports);
+
+ for (i = 0; i < num_ranges; i++)
+ {
+ tmp_low = mp->low_ports[i];
+ tmp_high = mp->high_ports[i];
+ // If tmp_low <= tmp_high then only need to check tmp_low = 0
+ // If tmp_low <= tmp_high then only need to check tmp_high > 65535
+ if (tmp_low > tmp_high || tmp_low == 0 || tmp_high > 65535)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto reply;
+ }
+ vec_add1 (low_ports, tmp_low);
+ vec_add1 (high_ports, tmp_high + 1);
+ }
+
+ // Validate mask_length
+ if ((is_ipv6 && mask_length > 128) || (!is_ipv6 && mask_length > 32))
+ {
+ rv = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
+ goto reply;
+ }
+
+ vrf_id = ntohl (mp->vrf_id);
+
+ if (vrf_id < 1)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto reply;
+ }
+
+
+ if (is_ipv6)
+ {
+ clib_memcpy (ip6_addr.as_u8, mp->address, sizeof (ip6_addr.as_u8));
+ rv = ip6_source_and_port_range_check_add_del (&ip6_addr,
+ mask_length,
+ vrf_id,
+ low_ports,
+ high_ports, is_add);
+ }
+ else
+ {
+ clib_memcpy (ip4_addr.data, mp->address, sizeof (ip4_addr));
+ rv = ip4_source_and_port_range_check_add_del (&ip4_addr,
+ mask_length,
+ vrf_id,
+ low_ports,
+ high_ports, is_add);
+ }
+
+reply:
+ vec_free (low_ports);
+ vec_free (high_ports);
+ REPLY_MACRO (VL_API_IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL_REPLY);
+}
+
+static void
+ vl_api_ip_source_and_port_range_check_interface_add_del_t_handler
+ (vl_api_ip_source_and_port_range_check_interface_add_del_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_ip_source_and_port_range_check_interface_add_del_reply_t *rmp;
+ ip4_main_t *im = &ip4_main;
+ int rv;
+ u32 sw_if_index;
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ uword *p = 0;
+ int i;
+
+ vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] =
+ ntohl (mp->tcp_out_vrf_id);
+ vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] =
+ ntohl (mp->udp_out_vrf_id);
+ vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] =
+ ntohl (mp->tcp_in_vrf_id);
+ vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] =
+ ntohl (mp->udp_in_vrf_id);
+
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ if (vrf_id[i] != 0 && vrf_id[i] != ~0)
+ {
+ p = hash_get (im->fib_index_by_table_id, vrf_id[i]);
+
+ if (p == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto reply;
+ }
+
+ fib_index[i] = p[0];
+ }
+ else
+ fib_index[i] = ~0;
+ }
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv =
+ set_ip_source_and_port_range_check (vm, fib_index, sw_if_index,
+ mp->is_add);
+
+ BAD_SW_IF_INDEX_LABEL;
+reply:
+
+ REPLY_MACRO (VL_API_IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_delete_subif_t_handler (vl_api_delete_subif_t * mp)
+{
+ vl_api_delete_subif_reply_t *rmp;
+ int rv;
+
+ rv = vnet_delete_sub_interface (ntohl (mp->sw_if_index));
+
+ REPLY_MACRO (VL_API_DELETE_SUBIF_REPLY);
+}
+
+static void
+vl_api_punt_t_handler (vl_api_punt_t * mp)
+{
+ vl_api_punt_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+
+ error = vnet_punt_add_del (vm, mp->ipv, mp->l4_protocol,
+ ntohs (mp->l4_port), mp->is_add);
+ if (error)
+ {
+ rv = -1;
+ clib_error_report (error);
+ }
+
+ REPLY_MACRO (VL_API_PUNT_REPLY);
+}
+
+static void
+vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp)
+{
+ vl_api_punt_socket_register_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+ unix_shared_memory_queue_t *q;
+ u32 handle;
+
+ error = vnet_punt_socket_add (vm, ntohl (mp->header_version),
+ mp->is_ip4, mp->l4_protocol,
+ ntohs (mp->l4_port), (char *) mp->pathname);
+ if (error)
+ {
+ rv = -1;
+ clib_error_report (error);
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_REGISTER_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+ char *p = vnet_punt_get_server_pathname ();
+ /* Abstract pathnames start with \0 */
+ memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname));
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_punt_socket_deregister_t_handler (vl_api_punt_socket_deregister_t * mp)
+{
+ vl_api_punt_socket_deregister_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+ unix_shared_memory_queue_t *q;
+ u32 handle;
+
+ error = vnet_punt_socket_del (vm, mp->is_ip4, mp->l4_protocol,
+ ntohs (mp->l4_port));
+ if (error)
+ {
+ rv = -1;
+ clib_error_report (error);
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_DEREGISTER_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp)
+{
+ vl_api_feature_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u8 *arc_name = format (0, "%s%c", mp->arc_name, 0);
+ u8 *feature_name = format (0, "%s%c", mp->feature_name, 0);
+
+ vnet_feature_registration_t *reg =
+ vnet_get_feature_reg ((const char *) arc_name,
+ (const char *) feature_name);
+ if (reg == 0)
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ else
+ {
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ clib_error_t *error = 0;
+
+ if (reg->enable_disable_cb)
+ error = reg->enable_disable_cb (sw_if_index, mp->enable);
+ if (!error)
+ vnet_feature_enable_disable ((const char *) arc_name,
+ (const char *) feature_name,
+ sw_if_index, mp->enable, 0, 0);
+ else
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+ }
+ }
+
+ vec_free (feature_name);
+ vec_free (arc_name);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_FEATURE_ENABLE_DISABLE_REPLY);
+}
+
+#define BOUNCE_HANDLER(nn) \
+static void vl_api_##nn##_t_handler ( \
+ vl_api_##nn##_t *mp) \
+{ \
+ vpe_client_registration_t *reg; \
+ vpe_api_main_t * vam = &vpe_api_main; \
+ unix_shared_memory_queue_t * q; \
+ \
+ /* One registration only... */ \
+ pool_foreach(reg, vam->nn##_registrations, \
+ ({ \
+ q = vl_api_client_index_to_input_queue (reg->client_index); \
+ if (q) { \
+ /* \
+ * If the queue is stuffed, turf the msg and complain \
+ * It's unlikely that the intended recipient is \
+ * alive; avoid deadlock at all costs. \
+ */ \
+ if (q->cursize == q->maxsize) { \
+ clib_warning ("ERROR: receiver queue full, drop msg"); \
+ vl_msg_api_free (mp); \
+ return; \
+ } \
+ vl_msg_api_send_shmem (q, (u8 *)&mp); \
+ return; \
+ } \
+ })); \
+ vl_msg_api_free (mp); \
+}
+
+static void setup_message_id_table (api_main_t * am);
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process()
+ */
+static clib_error_t *
+vpe_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Trace space for classifier mask+match
+ */
+ am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4);
+ am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size
+ += 5 * sizeof (u32x4);
+
+ /*
+ * Thread-safe API messages
+ */
+ am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
+ am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vpe_api_hookup);
+
+static clib_error_t *
+vpe_api_init (vlib_main_t * vm)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+
+ am->vlib_main = vm;
+ am->vnet_main = vnet_get_main ();
+#define _(a) \
+ am->a##_registration_hash = hash_create (0, sizeof (uword));
+ foreach_registration_hash;
+#undef _
+
+ vl_set_memory_region_name ("/vpe-api");
+ vl_enable_disable_memory_api (vm, 1 /* enable it */ );
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vpe_api_init);
+
+
+static clib_error_t *
+api_segment_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ u8 *chroot_path;
+ u64 baseva, size, pvt_heap_size;
+ int uid, gid, rv;
+ const int max_buf_size = 4096;
+ char *s, *buf;
+ struct passwd _pw, *pw;
+ struct group _grp, *grp;
+ clib_error_t *e;
+ buf = vec_new (char, 128);
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "prefix %s", &chroot_path))
+ {
+ vec_add1 (chroot_path, 0);
+ vl_set_memory_root_path ((char *) chroot_path);
+ }
+ else if (unformat (input, "uid %d", &uid))
+ vl_set_memory_uid (uid);
+ else if (unformat (input, "gid %d", &gid))
+ vl_set_memory_gid (gid);
+ else if (unformat (input, "baseva %llx", &baseva))
+ vl_set_global_memory_baseva (baseva);
+ else if (unformat (input, "global-size %lldM", &size))
+ vl_set_global_memory_size (size * (1ULL << 20));
+ else if (unformat (input, "global-size %lldG", &size))
+ vl_set_global_memory_size (size * (1ULL << 30));
+ else if (unformat (input, "global-size %lld", &size))
+ vl_set_global_memory_size (size);
+ else if (unformat (input, "global-pvt-heap-size %lldM", &pvt_heap_size))
+ vl_set_global_pvt_heap_size (pvt_heap_size * (1ULL << 20));
+ else if (unformat (input, "global-pvt-heap-size size %lld",
+ &pvt_heap_size))
+ vl_set_global_pvt_heap_size (pvt_heap_size);
+ else if (unformat (input, "api-pvt-heap-size %lldM", &pvt_heap_size))
+ vl_set_api_pvt_heap_size (pvt_heap_size * (1ULL << 20));
+ else if (unformat (input, "api-pvt-heap-size size %lld",
+ &pvt_heap_size))
+ vl_set_api_pvt_heap_size (pvt_heap_size);
+ else if (unformat (input, "api-size %lldM", &size))
+ vl_set_api_memory_size (size * (1ULL << 20));
+ else if (unformat (input, "api-size %lldG", &size))
+ vl_set_api_memory_size (size * (1ULL << 30));
+ else if (unformat (input, "api-size %lld", &size))
+ vl_set_api_memory_size (size);
+ else if (unformat (input, "uid %s", &s))
+ {
+ /* lookup the username */
+ pw = NULL;
+ while (((rv =
+ getpwnam_r (s, &_pw, buf, vec_len (buf), &pw)) == ERANGE)
+ && (vec_len (buf) <= max_buf_size))
+ {
+ vec_resize (buf, vec_len (buf) * 2);
+ }
+ if (rv < 0)
+ {
+ e = clib_error_return_code (0, rv,
+ CLIB_ERROR_ERRNO_VALID |
+ CLIB_ERROR_FATAL,
+ "cannot fetch username %s", s);
+ vec_free (s);
+ vec_free (buf);
+ return e;
+ }
+ if (pw == NULL)
+ {
+ e =
+ clib_error_return_fatal (0, "username %s does not exist", s);
+ vec_free (s);
+ vec_free (buf);
+ return e;
+ }
+ vec_free (s);
+ vl_set_memory_uid (pw->pw_uid);
+ }
+ else if (unformat (input, "gid %s", &s))
+ {
+ /* lookup the group name */
+ grp = NULL;
+ while (((rv =
+ getgrnam_r (s, &_grp, buf, vec_len (buf), &grp)) == ERANGE)
+ && (vec_len (buf) <= max_buf_size))
+ {
+ vec_resize (buf, vec_len (buf) * 2);
+ }
+ if (rv != 0)
+ {
+ e = clib_error_return_code (0, rv,
+ CLIB_ERROR_ERRNO_VALID |
+ CLIB_ERROR_FATAL,
+ "cannot fetch group %s", s);
+ vec_free (s);
+ vec_free (buf);
+ return e;
+ }
+ if (grp == NULL)
+ {
+ e = clib_error_return_fatal (0, "group %s does not exist", s);
+ vec_free (s);
+ vec_free (buf);
+ return e;
+ }
+ vec_free (s);
+ vec_free (buf);
+ vl_set_memory_gid (grp->gr_gid);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (api_segment_config, "api-segment");
+
+void *
+get_unformat_vnet_sw_interface (void)
+{
+ return (void *) &unformat_vnet_sw_interface;
+}
+
+static u8 *
+format_arp_event (u8 * s, va_list * args)
+{
+ vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *);
+
+ s = format (s, "pid %d: ", ntohl (event->pid));
+ s = format (s, "resolution for %U", format_ip4_address, &event->address);
+ return s;
+}
+
+static u8 *
+format_nd_event (u8 * s, va_list * args)
+{
+ vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *);
+
+ s = format (s, "pid %d: ", ntohl (event->pid));
+ s = format (s, "resolution for %U", format_ip6_address, event->address);
+ return s;
+}
+
+static clib_error_t *
+show_ip_arp_nd_events_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_ip4_arp_event_t *arp_event;
+ vl_api_ip6_nd_event_t *nd_event;
+
+ if (pool_elts (am->arp_events) == 0 && pool_elts (am->nd_events) == 0 &&
+ pool_elts (am->wc_ip4_arp_events_registrations) == 0 &&
+ pool_elts (am->wc_ip6_nd_events_registrations) == 0)
+ {
+ vlib_cli_output (vm, "No active arp or nd event registrations");
+ return 0;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (arp_event, am->arp_events,
+ ({
+ vlib_cli_output (vm, "%U", format_arp_event, arp_event);
+ }));
+
+ vpe_client_registration_t *reg;
+ pool_foreach(reg, am->wc_ip4_arp_events_registrations,
+ ({
+ vlib_cli_output (vm, "pid %d: bd mac/ip4 binding events",
+ ntohl (reg->client_pid));
+ }));
+
+ pool_foreach (nd_event, am->nd_events,
+ ({
+ vlib_cli_output (vm, "%U", format_nd_event, nd_event);
+ }));
+
+ pool_foreach(reg, am->wc_ip6_nd_events_registrations,
+ ({
+ vlib_cli_output (vm, "pid %d: bd mac/ip6 binding events",
+ ntohl (reg->client_pid));
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip_arp_nd_events, static) = {
+ .path = "show arp-nd-event registrations",
+ .function = show_ip_arp_nd_events_fn,
+ .short_help = "Show ip4 arp and ip6 nd event registrations",
+};
+/* *INDENT-ON* */
+
+#define vl_msg_name_crc_list
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_memclnt;
+ foreach_vl_msg_name_crc_vpe;
+#undef _
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/api_format.c b/src/vpp/api/api_format.c
new file mode 120000
index 00000000..cec72cc0
--- /dev/null
+++ b/src/vpp/api/api_format.c
@@ -0,0 +1 @@
+../../vat/api_format.c \ No newline at end of file
diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c
new file mode 100644
index 00000000..c355a5fd
--- /dev/null
+++ b/src/vpp/api/api_main.c
@@ -0,0 +1,250 @@
+#include "vat.h"
+
+vat_main_t vat_main;
+
+void
+vat_suspend (vlib_main_t * vm, f64 interval)
+{
+ vlib_process_suspend (vm, interval);
+}
+
+static u8 *
+format_api_error (u8 * s, va_list * args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ i32 error = va_arg (*args, u32);
+ uword *p;
+
+ p = hash_get (vam->error_string_by_error_number, -error);
+
+ if (p)
+ s = format (s, "%s", p[0]);
+ else
+ s = format (s, "%d", error);
+ return s;
+}
+
+
+static void
+init_error_string_table (vat_main_t * vam)
+{
+
+ vam->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (vam->error_string_by_error_number, -v, s);
+ foreach_vnet_api_error;
+#undef _
+
+ hash_set (vam->error_string_by_error_number, 99, "Misc");
+}
+
+static clib_error_t *
+api_main_init (vlib_main_t * vm)
+{
+ vat_main_t *vam = &vat_main;
+ int rv;
+ int vat_plugin_init (vat_main_t * vam);
+
+ vam->vlib_main = vm;
+ vam->my_client_index = (u32) ~ 0;
+ /* Ensure that vam->inbuf is never NULL */
+ vec_validate (vam->inbuf, 0);
+ init_error_string_table (vam);
+ rv = vat_plugin_init (vam);
+ if (rv)
+ clib_warning ("vat_plugin_init returned %d", rv);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (api_main_init);
+
+void
+vat_plugin_hash_create (void)
+{
+ vat_main_t *vam = &vat_main;
+
+ vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword));
+ vam->function_by_name = hash_create_string (0, sizeof (uword));
+ vam->help_by_name = hash_create_string (0, sizeof (uword));
+}
+
+static clib_error_t *
+api_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vat_main_t *vam = &vat_main;
+ unformat_input_t _input;
+ uword c;
+ u8 *cmdp, *argsp, *this_cmd;
+ uword *p;
+ u32 arg_len;
+ int rv;
+ int (*fp) (vat_main_t *);
+ api_main_t *am = &api_main;
+
+ vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+
+ /* vec_validated in the init routine */
+ _vec_len (vam->inbuf) = 0;
+
+ vam->input = &_input;
+
+ while (((c = unformat_get_input (input)) != '\n') &&
+ (c != UNFORMAT_END_OF_INPUT))
+ vec_add1 (vam->inbuf, c);
+
+ /* Null-terminate the command */
+ vec_add1 (vam->inbuf, 0);
+
+ /* In case no args given */
+ vec_add1 (vam->inbuf, 0);
+
+ /* Split input into cmd + args */
+ this_cmd = cmdp = vam->inbuf;
+
+ /* Skip leading whitespace */
+ while (cmdp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*cmdp == ' ' || *cmdp == '\t' || *cmdp == '\n')
+ {
+ cmdp++;
+ }
+ else
+ break;
+ }
+
+ argsp = cmdp;
+
+ /* Advance past the command */
+ while (argsp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n' && *argsp != 0)
+ {
+ argsp++;
+ }
+ else
+ break;
+ }
+ /* NULL terminate the command */
+ *argsp++ = 0;
+
+ /* No arguments? Ensure that argsp points to a proper (empty) string */
+ if (argsp == (this_cmd + vec_len (this_cmd) - 1))
+ argsp[0] = 0;
+ else
+ while (argsp < (this_cmd + vec_len (this_cmd)))
+ {
+ if (*argsp == ' ' || *argsp == '\t' || *argsp == '\n')
+ {
+ argsp++;
+ }
+ else
+ break;
+ }
+
+ /* Blank input line? */
+ if (*cmdp == 0)
+ return 0;
+
+ p = hash_get_mem (vam->function_by_name, cmdp);
+ if (p == 0)
+ {
+ return clib_error_return (0, "'%s': function not found\n", cmdp);
+ }
+
+ arg_len = strlen ((char *) argsp);
+
+ unformat_init_string (vam->input, (char *) argsp, arg_len);
+ fp = (void *) p[0];
+
+ rv = (*fp) (vam);
+
+ if (rv < 0)
+ {
+ unformat_free (vam->input);
+ return clib_error_return (0,
+ "%s error: %U\n", cmdp,
+ format_api_error, vam, rv);
+
+ }
+ if (vam->regenerate_interface_table)
+ {
+ vam->regenerate_interface_table = 0;
+ api_sw_interface_dump (vam);
+ }
+ unformat_free (vam->input);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (api_command, static) =
+{
+ .path = "binary-api",
+ .short_help = "binary-api [help] <name> [<args>]",
+ .function = api_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+api_cli_output (void *notused, const char *fmt, ...)
+{
+ va_list va;
+ vat_main_t *vam = &vat_main;
+ vlib_main_t *vm = vam->vlib_main;
+ vlib_process_t *cp = vlib_get_current_process (vm);
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* Terminate with \n if not present. */
+ if (vec_len (s) > 0 && s[vec_len (s) - 1] != '\n')
+ vec_add1 (s, '\n');
+
+ if ((!cp) || (!cp->output_function))
+ fformat (stdout, "%v", s);
+ else
+ cp->output_function (cp->output_function_arg, s, vec_len (s));
+
+ vec_free (s);
+}
+
+u16
+vl_client_get_first_plugin_msg_id (const char *plugin_name)
+{
+ api_main_t *am = &api_main;
+ vl_api_msg_range_t *rp;
+ uword *p;
+
+ p = hash_get_mem (am->msg_range_by_name, plugin_name);
+ if (p == 0)
+ return ~0;
+
+ rp = vec_elt_at_index (am->msg_ranges, p[0]);
+
+ return (rp->first_msg_id);
+}
+
+uword
+unformat_sw_if_index (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ *result = sw_if_index;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
new file mode 100644
index 00000000..2e1f980e
--- /dev/null
+++ b/src/vpp/api/custom_dump.c
@@ -0,0 +1,3231 @@
+/*
+ *------------------------------------------------------------------
+ * custom_dump.c - pretty-print API messages for replay
+ *
+ * Copyright (c) 2014-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/unix/tuntap.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/dhcp/dhcp_proxy.h>
+#include <vnet/l2tp/l2tp.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/classify/policer_classify.h>
+#include <vnet/policer/xlate.h>
+#include <vnet/policer/policer.h>
+#include <vnet/classify/flow_classify.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/lisp-cp/lisp_types.h>
+
+#include <vpp/stats/stats.h>
+#include <vpp/oam/oam.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2/l2_vtr.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+
+static void *vl_api_create_loopback_t_print
+ (vl_api_create_loopback_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: create_loopback ");
+ s = format (s, "mac %U ", format_ethernet_address, &mp->mac_address);
+
+ FINISH;
+}
+
+static void *vl_api_create_loopback_instance_t_print
+ (vl_api_create_loopback_instance_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: create_loopback ");
+ s = format (s, "mac %U ", format_ethernet_address, &mp->mac_address);
+ s = format (s, "instance %d ", ntohl (mp->user_instance));
+
+ FINISH;
+}
+
+static void *vl_api_delete_loopback_t_print
+ (vl_api_delete_loopback_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: delete_loopback ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_flags_t_print
+ (vl_api_sw_interface_set_flags_t * mp, void *handle)
+{
+ u8 *s;
+ s = format (0, "SCRIPT: sw_interface_set_flags ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->admin_up_down)
+ s = format (s, "admin-up ");
+ else
+ s = format (s, "admin-down ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_event_t_print
+ (vl_api_sw_interface_event_t * mp, void *handle)
+{
+ u8 *s;
+ s = format (0, "SCRIPT: sw_interface_event ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->admin_up_down)
+ s = format (s, "admin-up ");
+ else
+ s = format (s, "admin-down ");
+
+ if (mp->link_up_down)
+ s = format (s, "link-up");
+ else
+ s = format (s, "link-down");
+
+ if (mp->deleted)
+ s = format (s, " deleted");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_add_del_address_t_print
+ (vl_api_sw_interface_add_del_address_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_add_del_address ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->is_ipv6)
+ s = format (s, "%U/%d ", format_ip6_address,
+ (ip6_address_t *) mp->address, mp->address_length);
+ else
+ s = format (s, "%U/%d ", format_ip4_address,
+ (ip4_address_t *) mp->address, mp->address_length);
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+ if (mp->del_all)
+ s = format (s, "del-all ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_table_t_print
+ (vl_api_sw_interface_set_table_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_table ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ if (mp->is_ipv6)
+ s = format (s, "ipv6 ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_mpls_enable_t_print
+ (vl_api_sw_interface_set_mpls_enable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_mpls_enable ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->enable == 0)
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_vpath_t_print
+ (vl_api_sw_interface_set_vpath_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_vpath ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->enable)
+ s = format (s, "enable ");
+ else
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_vxlan_bypass_t_print
+ (vl_api_sw_interface_set_vxlan_bypass_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_vxlan_bypass ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->is_ipv6)
+ s = format (s, "ip6 ");
+
+ if (mp->enable)
+ s = format (s, "enable ");
+ else
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_l2_xconnect_t_print
+ (vl_api_sw_interface_set_l2_xconnect_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_l2_xconnect ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->rx_sw_if_index));
+
+ if (mp->enable)
+ {
+ s = format (s, "tx_sw_if_index %d ", ntohl (mp->tx_sw_if_index));
+ }
+ else
+ s = format (s, "delete ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_l2_bridge_t_print
+ (vl_api_sw_interface_set_l2_bridge_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_l2_bridge ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->rx_sw_if_index));
+
+ if (mp->enable)
+ {
+ s = format (s, "bd_id %d shg %d %senable ", ntohl (mp->bd_id),
+ mp->shg, ((mp->bvi) ? "bvi " : " "));
+ }
+ else
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void *vl_api_bridge_domain_add_del_t_print
+ (vl_api_bridge_domain_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: bridge_domain_add_del ");
+
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+ if (mp->is_add)
+ {
+ if (mp->bd_tag[0])
+ s = format (s, "bd_tag %s ", mp->bd_tag);
+ s = format (s, "flood %d uu-flood %d ", mp->flood, mp->uu_flood);
+ s = format (s, "forward %d learn %d ", mp->forward, mp->learn);
+ s = format (s, "arp-term %d mac-age %d", mp->arp_term, mp->mac_age);
+ }
+ else
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_bridge_domain_set_mac_age_t_print
+ (vl_api_bridge_domain_set_mac_age_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: bridge_domain_set_mac_age ");
+
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+ s = format (s, "mac-age %d", mp->mac_age);
+
+ FINISH;
+}
+
+static void *vl_api_bridge_domain_dump_t_print
+ (vl_api_bridge_domain_dump_t * mp, void *handle)
+{
+ u8 *s;
+ u32 bd_id = ntohl (mp->bd_id);
+
+ s = format (0, "SCRIPT: bridge_domain_dump ");
+
+ if (bd_id != ~0)
+ s = format (s, "bd_id %d ", bd_id);
+
+ FINISH;
+}
+
+static void *vl_api_l2fib_flush_all_t_print
+ (vl_api_l2fib_flush_all_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2fib_flush_all ");
+
+ FINISH;
+}
+
+
+static void *vl_api_l2fib_flush_bd_t_print
+ (vl_api_l2fib_flush_bd_t * mp, void *handle)
+{
+ u8 *s;
+ u32 bd_id = ntohl (mp->bd_id);
+
+ s = format (0, "SCRIPT: l2fib_flush_bd ");
+ s = format (s, "bd_id %d ", bd_id);
+
+ FINISH;
+}
+
+static void *vl_api_l2fib_flush_int_t_print
+ (vl_api_l2fib_flush_int_t * mp, void *handle)
+{
+ u8 *s;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ s = format (0, "SCRIPT: l2fib_flush_int ");
+ s = format (s, "sw_if_index %d ", sw_if_index);
+
+ FINISH;
+}
+
+static void *vl_api_l2fib_add_del_t_print
+ (vl_api_l2fib_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2fib_add_del ");
+
+ s = format (s, "mac %U ", format_ethernet_address, &mp->mac);
+
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+
+ if (mp->is_add)
+ {
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->static_mac)
+ s = format (s, "%s", "static ");
+ if (mp->filter_mac)
+ s = format (s, "%s", "filter ");
+ if (mp->bvi_mac)
+ s = format (s, "%s", "bvi ");
+ }
+ else
+ {
+ s = format (s, "del ");
+ }
+
+ FINISH;
+}
+
+static void *
+vl_api_l2_flags_t_print (vl_api_l2_flags_t * mp, void *handle)
+{
+ u8 *s;
+ u32 flags = ntohl (mp->feature_bitmap);
+
+ s = format (0, "SCRIPT: l2_flags ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (flags & L2_LEARN)
+ s = format (s, "learn ");
+ if (flags & L2_FWD)
+ s = format (s, "forward ");
+ if (flags & L2_FLOOD)
+ s = format (s, "flood ");
+ if (flags & L2_UU_FLOOD)
+ s = format (s, "uu-flood ");
+ if (flags & L2_ARP_TERM)
+ s = format (s, "arp-term ");
+
+ if (mp->is_set == 0)
+ s = format (s, "clear ");
+
+ FINISH;
+}
+
+static void *vl_api_bridge_flags_t_print
+ (vl_api_bridge_flags_t * mp, void *handle)
+{
+ u8 *s;
+ u32 flags = ntohl (mp->feature_bitmap);
+
+ s = format (0, "SCRIPT: bridge_flags ");
+
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+ if (flags & L2_LEARN)
+ s = format (s, "learn ");
+ if (flags & L2_FWD)
+ s = format (s, "forward ");
+ if (flags & L2_FLOOD)
+ s = format (s, "flood ");
+ if (flags & L2_UU_FLOOD)
+ s = format (s, "uu-flood ");
+ if (flags & L2_ARP_TERM)
+ s = format (s, "arp-term ");
+
+ if (mp->is_set == 0)
+ s = format (s, "clear ");
+
+ FINISH;
+}
+
+static void *vl_api_bd_ip_mac_add_del_t_print
+ (vl_api_bd_ip_mac_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: bd_ip_mac_add_del ");
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+ if (mp->is_ipv6)
+ s = format (s, "%U ", format_ip6_address,
+ (ip6_address_t *) mp->ip_address);
+ else
+ s = format (s, "%U ", format_ip4_address,
+ (ip4_address_t *) mp->ip_address);
+
+ s = format (s, "%U ", format_ethernet_address, mp->mac_address);
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_tap_connect_t_print
+ (vl_api_tap_connect_t * mp, void *handle)
+{
+ u8 *s;
+ u8 null_mac[6];
+
+ memset (null_mac, 0, sizeof (null_mac));
+
+ s = format (0, "SCRIPT: tap_connect ");
+ s = format (s, "tapname %s ", mp->tap_name);
+ if (mp->use_random_mac)
+ s = format (s, "random-mac ");
+ if (mp->tag[0])
+ s = format (s, "tag %s ", mp->tag);
+ if (memcmp (mp->mac_address, null_mac, 6))
+ s = format (s, "mac %U ", format_ethernet_address, mp->mac_address);
+ if (mp->ip4_address_set)
+ s = format (s, "address %U/%d ", format_ip4_address, mp->ip4_address,
+ mp->ip4_mask_width);
+ if (mp->ip6_address_set)
+ s = format (s, "address %U/%d ", format_ip6_address, mp->ip6_address,
+ mp->ip6_mask_width);
+ FINISH;
+}
+
+static void *vl_api_tap_modify_t_print
+ (vl_api_tap_modify_t * mp, void *handle)
+{
+ u8 *s;
+ u8 null_mac[6];
+
+ memset (null_mac, 0, sizeof (null_mac));
+
+ s = format (0, "SCRIPT: tap_modify ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "tapname %s ", mp->tap_name);
+ if (mp->use_random_mac)
+ s = format (s, "random-mac ");
+
+ if (memcmp (mp->mac_address, null_mac, 6))
+ s = format (s, "mac %U ", format_ethernet_address, mp->mac_address);
+
+ FINISH;
+}
+
+static void *vl_api_tap_delete_t_print
+ (vl_api_tap_delete_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: tap_delete ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_tap_dump_t_print
+ (vl_api_sw_interface_tap_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_tap_dump ");
+
+ FINISH;
+}
+
+
+static void *vl_api_ip_add_del_route_t_print
+ (vl_api_ip_add_del_route_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip_add_del_route ");
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ if (mp->next_hop_sw_if_index)
+ s = format (s, "sw_if_index %d ", ntohl (mp->next_hop_sw_if_index));
+
+ if (mp->is_ipv6)
+ s = format (s, "%U/%d ", format_ip6_address, mp->dst_address,
+ mp->dst_address_length);
+ else
+ s = format (s, "%U/%d ", format_ip4_address, mp->dst_address,
+ mp->dst_address_length);
+ if (mp->is_local)
+ s = format (s, "local ");
+ else if (mp->is_drop)
+ s = format (s, "drop ");
+ else if (mp->is_classify)
+ s = format (s, "classify %d", ntohl (mp->classify_table_index));
+ else
+ {
+ if (mp->is_ipv6)
+ s = format (s, "via %U ", format_ip6_address, mp->next_hop_address);
+ else
+ s = format (s, "via %U ", format_ip4_address, mp->next_hop_address);
+ }
+
+ if (mp->table_id != 0)
+ s = format (s, "vrf %d ", ntohl (mp->table_id));
+
+ if (mp->next_hop_weight != 1)
+ s = format (s, "weight %d ", mp->next_hop_weight);
+
+ if (mp->not_last)
+ s = format (s, "not-last ");
+
+ if (mp->is_multipath)
+ s = format (s, "multipath ");
+
+ if (mp->is_multipath)
+ s = format (s, "multipath ");
+
+ if (mp->next_hop_table_id)
+ s = format (s, "lookup-in-vrf %d ", ntohl (mp->next_hop_table_id));
+
+ FINISH;
+}
+
+static void *vl_api_proxy_arp_add_del_t_print
+ (vl_api_proxy_arp_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: proxy_arp_add_del ");
+
+ s = format (s, "%U - %U ", format_ip4_address, mp->low_address,
+ format_ip4_address, mp->hi_address);
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_proxy_arp_intfc_enable_disable_t_print
+ (vl_api_proxy_arp_intfc_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: proxy_arp_intfc_enable_disable ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "enable %d ", mp->enable_disable);
+
+ FINISH;
+}
+
+static void *vl_api_mpls_tunnel_add_del_t_print
+ (vl_api_mpls_tunnel_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: mpls_tunnel_add_del ");
+
+ if (mp->mt_next_hop_sw_if_index)
+ s = format (s, "sw_if_index %d ", ntohl (mp->mt_next_hop_sw_if_index));
+
+ if (mp->mt_next_hop_proto_is_ip4)
+ s = format (s, "%U ", format_ip4_address, mp->mt_next_hop);
+ else
+ s = format (s, "%U ", format_ip6_address, mp->mt_next_hop);
+
+ if (mp->mt_l2_only)
+ s = format (s, "l2-only ");
+
+ if (mp->mt_is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_unnumbered_t_print
+ (vl_api_sw_interface_set_unnumbered_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_unnumbered ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "unnum_if_index %d ", ntohl (mp->unnumbered_sw_if_index));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_ip_neighbor_add_del_t_print
+ (vl_api_ip_neighbor_add_del_t * mp, void *handle)
+{
+ u8 *s;
+ u8 null_mac[6];
+
+ memset (null_mac, 0, sizeof (null_mac));
+
+ s = format (0, "SCRIPT: ip_neighbor_add_del ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->is_static)
+ s = format (s, "is_static ");
+
+ if (mp->is_no_adj_fib)
+ s = format (s, "is_no_fib_entry ");
+
+ if (memcmp (mp->mac_address, null_mac, 6))
+ s = format (s, "mac %U ", format_ethernet_address, mp->mac_address);
+
+ if (mp->is_ipv6)
+ s =
+ format (s, "dst %U ", format_ip6_address,
+ (ip6_address_t *) mp->dst_address);
+ else
+ s =
+ format (s, "dst %U ", format_ip4_address,
+ (ip4_address_t *) mp->dst_address);
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *
+vl_api_reset_vrf_t_print (vl_api_reset_vrf_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: reset_vrf ");
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ if (mp->is_ipv6 != 0)
+ s = format (s, "ipv6 ");
+
+ FINISH;
+}
+
+static void *vl_api_create_vlan_subif_t_print
+ (vl_api_create_vlan_subif_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: create_vlan_subif ");
+
+ if (mp->sw_if_index)
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->vlan_id)
+ s = format (s, "vlan_id %d ", ntohl (mp->vlan_id));
+
+ FINISH;
+}
+
+#define foreach_create_subif_bit \
+_(no_tags) \
+_(one_tag) \
+_(two_tags) \
+_(dot1ad) \
+_(exact_match) \
+_(default_sub) \
+_(outer_vlan_id_any) \
+_(inner_vlan_id_any)
+
+static void *vl_api_create_subif_t_print
+ (vl_api_create_subif_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: create_subif ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "sub_id %d ", ntohl (mp->sub_id));
+
+ if (mp->outer_vlan_id)
+ s = format (s, "outer_vlan_id %d ", ntohs (mp->outer_vlan_id));
+
+ if (mp->inner_vlan_id)
+ s = format (s, "inner_vlan_id %d ", ntohs (mp->inner_vlan_id));
+
+#define _(a) if (mp->a) s = format (s, "%s ", #a);
+ foreach_create_subif_bit;
+#undef _
+
+ FINISH;
+}
+
+static void *vl_api_delete_subif_t_print
+ (vl_api_delete_subif_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: delete_subif ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_oam_add_del_t_print
+ (vl_api_oam_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: oam_add_del ");
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ s = format (s, "src %U ", format_ip4_address, mp->src_address);
+
+ s = format (s, "dst %U ", format_ip4_address, mp->dst_address);
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *
+vl_api_reset_fib_t_print (vl_api_reset_fib_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: reset_fib ");
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ if (mp->is_ipv6 != 0)
+ s = format (s, "ipv6 ");
+
+ FINISH;
+}
+
+static void *vl_api_dhcp_proxy_config_t_print
+ (vl_api_dhcp_proxy_config_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: dhcp_proxy_config_2 ");
+
+ s = format (s, "rx_vrf_id %d ", ntohl (mp->rx_vrf_id));
+ s = format (s, "server_vrf_id %d ", ntohl (mp->server_vrf_id));
+
+ if (mp->is_ipv6)
+ {
+ s = format (s, "svr %U ", format_ip6_address,
+ (ip6_address_t *) mp->dhcp_server);
+ s = format (s, "src %U ", format_ip6_address,
+ (ip6_address_t *) mp->dhcp_src_address);
+ }
+ else
+ {
+ s = format (s, "svr %U ", format_ip4_address,
+ (ip4_address_t *) mp->dhcp_server);
+ s = format (s, "src %U ", format_ip4_address,
+ (ip4_address_t *) mp->dhcp_src_address);
+ }
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_dhcp_proxy_set_vss_t_print
+ (vl_api_dhcp_proxy_set_vss_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: dhcp_proxy_set_vss ");
+
+ s = format (s, "tbl_id %d ", ntohl (mp->tbl_id));
+
+ s = format (s, "fib_id %d ", ntohl (mp->fib_id));
+
+ s = format (s, "oui %d ", ntohl (mp->oui));
+
+ if (mp->is_ipv6 != 0)
+ s = format (s, "ipv6 ");
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_dhcp_client_config_t_print
+ (vl_api_dhcp_client_config_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: dhcp_client_config ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "hostname %s ", mp->hostname);
+
+ s = format (s, "want_dhcp_event %d ", mp->want_dhcp_event);
+
+ s = format (s, "pid %d ", ntohl (mp->pid));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+
+static void *vl_api_set_ip_flow_hash_t_print
+ (vl_api_set_ip_flow_hash_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: set_ip_flow_hash ");
+
+ s = format (s, "vrf_id %d ", ntohl (mp->vrf_id));
+
+ if (mp->src)
+ s = format (s, "src ");
+
+ if (mp->dst)
+ s = format (s, "dst ");
+
+ if (mp->sport)
+ s = format (s, "sport ");
+
+ if (mp->dport)
+ s = format (s, "dport ");
+
+ if (mp->proto)
+ s = format (s, "proto ");
+
+ if (mp->reverse)
+ s = format (s, "reverse ");
+
+ if (mp->is_ipv6 != 0)
+ s = format (s, "ipv6 ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_ip6_set_link_local_address_t_print
+ (vl_api_sw_interface_ip6_set_link_local_address_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_ip6_set_link_local_address ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "%U ", format_ip6_address, mp->address);
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_ip6nd_ra_prefix_t_print
+ (vl_api_sw_interface_ip6nd_ra_prefix_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_ip6nd_ra_prefix ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "%U/%d ", format_ip6_address, mp->address,
+ mp->address_length);
+
+ s = format (s, "val_life %d ", ntohl (mp->val_lifetime));
+
+ s = format (s, "pref_life %d ", ntohl (mp->pref_lifetime));
+
+ if (mp->use_default)
+ s = format (s, "def ");
+
+ if (mp->no_advertise)
+ s = format (s, "noadv ");
+
+ if (mp->off_link)
+ s = format (s, "offl ");
+
+ if (mp->no_autoconfig)
+ s = format (s, "noauto ");
+
+ if (mp->no_onlink)
+ s = format (s, "nolink ");
+
+ if (mp->is_no)
+ s = format (s, "isno ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_ip6nd_ra_config_t_print
+ (vl_api_sw_interface_ip6nd_ra_config_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_ip6nd_ra_config ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "maxint %d ", ntohl (mp->max_interval));
+
+ s = format (s, "minint %d ", ntohl (mp->min_interval));
+
+ s = format (s, "life %d ", ntohl (mp->lifetime));
+
+ s = format (s, "count %d ", ntohl (mp->initial_count));
+
+ s = format (s, "interval %d ", ntohl (mp->initial_interval));
+
+ if (mp->suppress)
+ s = format (s, "suppress ");
+
+ if (mp->managed)
+ s = format (s, "managed ");
+
+ if (mp->other)
+ s = format (s, "other ");
+
+ if (mp->ll_option)
+ s = format (s, "ll ");
+
+ if (mp->send_unicast)
+ s = format (s, "send ");
+
+ if (mp->cease)
+ s = format (s, "cease ");
+
+ if (mp->is_no)
+ s = format (s, "isno ");
+
+ if (mp->default_router)
+ s = format (s, "def ");
+
+ FINISH;
+}
+
+static void *vl_api_set_arp_neighbor_limit_t_print
+ (vl_api_set_arp_neighbor_limit_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: set_arp_neighbor_limit ");
+
+ s = format (s, "arp_nbr_limit %d ", ntohl (mp->arp_neighbor_limit));
+
+ if (mp->is_ipv6 != 0)
+ s = format (s, "ipv6 ");
+
+ FINISH;
+}
+
+static void *vl_api_l2_patch_add_del_t_print
+ (vl_api_l2_patch_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2_patch_add_del ");
+
+ s = format (s, "rx_sw_if_index %d ", ntohl (mp->rx_sw_if_index));
+
+ s = format (s, "tx_sw_if_index %d ", ntohl (mp->tx_sw_if_index));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_sr_localsid_add_del_t_print
+ (vl_api_sr_localsid_add_del_t * mp, void *handle)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u8 *s;
+
+ s = format (0, "SCRIPT: sr_localsid_add_del ");
+
+ switch (mp->behavior)
+ {
+ case SR_BEHAVIOR_END:
+ s = format (s, "Address: %U\nBehavior: End",
+ format_ip6_address, (ip6_address_t *) mp->localsid_addr);
+ s = format (s, (mp->end_psp ? "End.PSP: True" : "End.PSP: False"));
+ break;
+ case SR_BEHAVIOR_X:
+ s =
+ format (s,
+ "Address: %U\nBehavior: X (Endpoint with Layer-3 cross-connect)"
+ "\nIface: %U\nNext hop: %U", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr,
+ format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index),
+ format_ip6_address, (ip6_address_t *) mp->nh_addr);
+ s = format (s, (mp->end_psp ? "End.PSP: True" : "End.PSP: False"));
+ break;
+ case SR_BEHAVIOR_DX4:
+ s =
+ format (s,
+ "Address: %U\nBehavior: DX4 (Endpoint with decapsulation with IPv4 cross-connect)"
+ "\nIface: %U\nNext hop: %U", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr,
+ format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index),
+ format_ip4_address, (ip4_address_t *) mp->nh_addr);
+ break;
+ case SR_BEHAVIOR_DX6:
+ s =
+ format (s,
+ "Address: %U\nBehavior: DX6 (Endpoint with decapsulation with IPv6 cross-connect)"
+ "\nIface: %UNext hop: %U", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr,
+ format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index),
+ format_ip6_address, (ip6_address_t *) mp->nh_addr);
+ break;
+ case SR_BEHAVIOR_DX2:
+ s =
+ format (s,
+ "Address: %U\nBehavior: DX2 (Endpoint with decapulation and Layer-2 cross-connect)"
+ "\nIface: %U", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr,
+ format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index));
+ break;
+ case SR_BEHAVIOR_DT6:
+ s =
+ format (s,
+ "Address: %U\nBehavior: DT6 (Endpoint with decapsulation and specific IPv6 table lookup)"
+ "\nTable: %u", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr, ntohl (mp->fib_table));
+ break;
+ case SR_BEHAVIOR_DT4:
+ s =
+ format (s,
+ "Address: %U\nBehavior: DT4 (Endpoint with decapsulation and specific IPv4 table lookup)"
+ "\nTable: %u", format_ip6_address,
+ (ip6_address_t *) mp->localsid_addr, ntohl (mp->fib_table));
+ break;
+ default:
+ if (mp->behavior >= SR_BEHAVIOR_LAST)
+ {
+ s = format (s, "Address: %U\n Behavior: %u",
+ format_ip6_address, (ip6_address_t *) mp->localsid_addr,
+ mp->behavior);
+ }
+ else
+ //Should never get here...
+ s = format (s, "Internal error");
+ break;
+ }
+ FINISH;
+}
+
+static void *vl_api_sr_steering_add_del_t_print
+ (vl_api_sr_steering_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sr_steering_add_del ");
+
+ s = format (s, (mp->is_del ? "Del: True" : "Del: False"));
+
+ switch (mp->traffic_type)
+ {
+ case SR_STEER_L2:
+ s = format (s, "Traffic type: L2 iface: %u", ntohl (mp->sw_if_index));
+ break;
+ case SR_STEER_IPV4:
+ s = format (s, "Traffic type: IPv4 %U/%u", format_ip4_address,
+ (ip4_address_t *) mp->prefix_addr, ntohl (mp->mask_width));
+ break;
+ case SR_STEER_IPV6:
+ s = format (s, "Traffic type: IPv6 %U/%u", format_ip6_address,
+ (ip6_address_t *) mp->prefix_addr, ntohl (mp->mask_width));
+ break;
+ default:
+ s = format (s, "Traffic type: Unknown(%u)", mp->traffic_type);
+ break;
+ }
+ s = format (s, "BindingSID: %U", format_ip6_address,
+ (ip6_address_t *) mp->bsid_addr);
+
+ s = format (s, "SR Policy Index: %u", ntohl (mp->sr_policy_index));
+
+ s = format (s, "FIB_table: %u", ntohl (mp->table_id));
+
+ FINISH;
+}
+
+static void *vl_api_sr_policy_add_t_print
+ (vl_api_sr_policy_add_t * mp, void *handle)
+{
+ u8 *s;
+
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+ s = format (0, "SCRIPT: sr_policy_add ");
+
+ s = format (s, "BSID: %U", format_ip6_address,
+ (ip6_address_t *) mp->bsid_addr);
+
+ s =
+ format (s,
+ (mp->is_encap ? "Behavior: Encapsulation" :
+ "Behavior: SRH insertion"));
+
+ s = format (s, "FIB_table: %u", ntohl (mp->fib_table));
+
+ s = format (s, (mp->type ? "Type: Default" : "Type: Spray"));
+
+ s = format (s, "SID list weight: %u", ntohl (mp->weight));
+
+ s = format (s, "{");
+ vec_foreach (seg, segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, seg);
+ }
+ s = format (s, "\b\b } ");
+
+ FINISH;
+}
+
+static void *vl_api_sr_policy_mod_t_print
+ (vl_api_sr_policy_mod_t * mp, void *handle)
+{
+ u8 *s;
+
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+ s = format (0, "SCRIPT: sr_policy_mod ");
+
+ s = format (s, "BSID: %U", format_ip6_address,
+ (ip6_address_t *) mp->bsid_addr);
+
+ s = format (s, "SR Policy index: %u", ntohl (mp->sr_policy_index));
+
+ s = format (s, "Operation: %u", mp->operation);
+
+ s = format (s, "SID list index: %u", ntohl (mp->sl_index));
+
+ s = format (s, "SID list weight: %u", ntohl (mp->weight));
+
+ s = format (s, "{");
+ vec_foreach (seg, segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, seg);
+ }
+ s = format (s, "\b\b } ");
+
+ FINISH;
+}
+
+static void *vl_api_sr_policy_del_t_print
+ (vl_api_sr_policy_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sr_policy_del ");
+ s = format (s, "To be delivered. Good luck.");
+ FINISH;
+}
+
+static void *vl_api_classify_add_del_table_t_print
+ (vl_api_classify_add_del_table_t * mp, void *handle)
+{
+ u8 *s;
+ int i;
+
+ s = format (0, "SCRIPT: classify_add_del_table ");
+
+ if (mp->is_add == 0)
+ {
+ s = format (s, "table %d ", ntohl (mp->table_index));
+ s = format (s, "%s ", mp->del_chain ? "del-chain" : "del");
+ }
+ else
+ {
+ s = format (s, "nbuckets %d ", ntohl (mp->nbuckets));
+ s = format (s, "memory_size %d ", ntohl (mp->memory_size));
+ s = format (s, "skip %d ", ntohl (mp->skip_n_vectors));
+ s = format (s, "match %d ", ntohl (mp->match_n_vectors));
+ s = format (s, "next-table %d ", ntohl (mp->next_table_index));
+ s = format (s, "miss-next %d ", ntohl (mp->miss_next_index));
+ s = format (s, "current-data-flag %d ", ntohl (mp->current_data_flag));
+ if (mp->current_data_flag)
+ s = format (s, "current-data-offset %d ",
+ ntohl (mp->current_data_offset));
+ s = format (s, "mask hex ");
+ for (i = 0; i < ntohl (mp->match_n_vectors) * sizeof (u32x4); i++)
+ s = format (s, "%02x", mp->mask[i]);
+ vec_add1 (s, ' ');
+ }
+
+ FINISH;
+}
+
+static void *vl_api_classify_add_del_session_t_print
+ (vl_api_classify_add_del_session_t * mp, void *handle)
+{
+ u8 *s;
+ int i, limit = 0;
+
+ s = format (0, "SCRIPT: classify_add_del_session ");
+
+ s = format (s, "table_index %d ", ntohl (mp->table_index));
+ s = format (s, "hit_next_index %d ", ntohl (mp->hit_next_index));
+ s = format (s, "opaque_index %d ", ntohl (mp->opaque_index));
+ s = format (s, "advance %d ", ntohl (mp->advance));
+ s = format (s, "action %d ", mp->action);
+ if (mp->action)
+ s = format (s, "metadata %d ", ntohl (mp->metadata));
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ s = format (s, "match hex ");
+ for (i = 5 * sizeof (u32x4) - 1; i > 0; i--)
+ {
+ if (mp->match[i] != 0)
+ {
+ limit = i + 1;
+ break;
+ }
+ }
+
+ for (i = 0; i < limit; i++)
+ s = format (s, "%02x", mp->match[i]);
+
+ FINISH;
+}
+
+static void *vl_api_classify_set_interface_ip_table_t_print
+ (vl_api_classify_set_interface_ip_table_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_set_interface_ip_table ");
+
+ if (mp->is_ipv6)
+ s = format (s, "ipv6 ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "table %d ", ntohl (mp->table_index));
+
+ FINISH;
+}
+
+static void *vl_api_classify_set_interface_l2_tables_t_print
+ (vl_api_classify_set_interface_l2_tables_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_set_interface_l2_tables ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index));
+ s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index));
+ s = format (s, "other-table %d ", ntohl (mp->other_table_index));
+ s = format (s, "is-input %d ", mp->is_input);
+
+ FINISH;
+}
+
+static void *vl_api_add_node_next_t_print
+ (vl_api_add_node_next_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: add_node_next ");
+
+ s = format (0, "node %s next %s ", mp->node_name, mp->next_name);
+
+ FINISH;
+}
+
+static void *vl_api_l2tpv3_create_tunnel_t_print
+ (vl_api_l2tpv3_create_tunnel_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2tpv3_create_tunnel ");
+
+ s = format (s, "client_address %U our_address %U ",
+ format_ip6_address, (ip6_address_t *) (mp->client_address),
+ format_ip6_address, (ip6_address_t *) (mp->our_address));
+ s = format (s, "local_session_id %d ", ntohl (mp->local_session_id));
+ s = format (s, "remote_session_id %d ", ntohl (mp->remote_session_id));
+ s = format (s, "local_cookie %lld ",
+ clib_net_to_host_u64 (mp->local_cookie));
+ s = format (s, "remote_cookie %lld ",
+ clib_net_to_host_u64 (mp->remote_cookie));
+ if (mp->l2_sublayer_present)
+ s = format (s, "l2-sublayer-present ");
+
+ FINISH;
+}
+
+static void *vl_api_l2tpv3_set_tunnel_cookies_t_print
+ (vl_api_l2tpv3_set_tunnel_cookies_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2tpv3_set_tunnel_cookies ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "new_local_cookie %llu ",
+ clib_net_to_host_u64 (mp->new_local_cookie));
+
+ s = format (s, "new_remote_cookie %llu ",
+ clib_net_to_host_u64 (mp->new_remote_cookie));
+
+ FINISH;
+}
+
+static void *vl_api_l2tpv3_interface_enable_disable_t_print
+ (vl_api_l2tpv3_interface_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2tpv3_interface_enable_disable ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->enable_disable == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_l2tpv3_set_lookup_key_t_print
+ (vl_api_l2tpv3_set_lookup_key_t * mp, void *handle)
+{
+ u8 *s;
+ char *str = "unknown";
+
+ s = format (0, "SCRIPT: l2tpv3_set_lookup_key ");
+
+ switch (mp->key)
+ {
+ case L2T_LOOKUP_SRC_ADDRESS:
+ str = "lookup_v6_src";
+ break;
+ case L2T_LOOKUP_DST_ADDRESS:
+ str = "lookup_v6_dst";
+ break;
+ case L2T_LOOKUP_SESSION_ID:
+ str = "lookup_session_id";
+ break;
+ default:
+ break;
+ }
+
+ s = format (s, "%s ", str);
+
+ FINISH;
+}
+
+static void *vl_api_sw_if_l2tpv3_tunnel_dump_t_print
+ (vl_api_sw_if_l2tpv3_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_if_l2tpv3_tunnel_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_vxlan_add_del_tunnel_t_print
+ (vl_api_vxlan_add_del_tunnel_t * mp, void *handle)
+{
+ u8 *s;
+ s = format (0, "SCRIPT: vxlan_add_del_tunnel ");
+
+ ip46_address_t src = to_ip46 (mp->is_ipv6, mp->src_address);
+ ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->dst_address);
+
+ u8 is_grp = ip46_address_is_multicast (&dst);
+ char *dst_name = is_grp ? "group" : "dst";
+
+ s = format (s, "src %U ", format_ip46_address, &src, IP46_TYPE_ANY);
+ s = format (s, "%s %U ", dst_name, format_ip46_address,
+ &dst, IP46_TYPE_ANY);
+
+ if (is_grp)
+ s = format (s, "mcast_sw_if_index %d ", ntohl (mp->mcast_sw_if_index));
+
+ if (mp->encap_vrf_id)
+ s = format (s, "encap-vrf-id %d ", ntohl (mp->encap_vrf_id));
+
+ s = format (s, "decap-next %d ", ntohl (mp->decap_next_index));
+
+ s = format (s, "vni %d ", ntohl (mp->vni));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_vxlan_tunnel_dump_t_print
+ (vl_api_vxlan_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: vxlan_tunnel_dump ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_gre_add_del_tunnel_t_print
+ (vl_api_gre_add_del_tunnel_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: gre_add_del_tunnel ");
+
+ s = format (s, "dst %U ", format_ip46_address,
+ (ip46_address_t *) & (mp->dst_address),
+ mp->is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4);
+
+ s = format (s, "src %U ", format_ip46_address,
+ (ip46_address_t *) & (mp->src_address),
+ mp->is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4);
+
+ if (mp->teb)
+ s = format (s, "teb ");
+
+ if (mp->outer_fib_id)
+ s = format (s, "outer-fib-id %d ", ntohl (mp->outer_fib_id));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_gre_tunnel_dump_t_print
+ (vl_api_gre_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: gre_tunnel_dump ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_l2_fib_clear_table_t_print
+ (vl_api_l2_fib_clear_table_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2_fib_clear_table ");
+
+ FINISH;
+}
+
+static void *vl_api_l2_interface_efp_filter_t_print
+ (vl_api_l2_interface_efp_filter_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2_interface_efp_filter ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->enable_disable)
+ s = format (s, "enable ");
+ else
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void *vl_api_l2_interface_vlan_tag_rewrite_t_print
+ (vl_api_l2_interface_vlan_tag_rewrite_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2_interface_vlan_tag_rewrite ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "vtr_op %d ", ntohl (mp->vtr_op));
+ s = format (s, "push_dot1q %d ", ntohl (mp->push_dot1q));
+ s = format (s, "tag1 %d ", ntohl (mp->tag1));
+ s = format (s, "tag2 %d ", ntohl (mp->tag2));
+
+ FINISH;
+}
+
+static void *vl_api_create_vhost_user_if_t_print
+ (vl_api_create_vhost_user_if_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: create_vhost_user_if ");
+
+ s = format (s, "socket %s ", mp->sock_filename);
+ if (mp->is_server)
+ s = format (s, "server ");
+ if (mp->renumber)
+ s = format (s, "renumber %d ", ntohl (mp->custom_dev_instance));
+ if (mp->tag[0])
+ s = format (s, "tag %s", mp->tag);
+
+ FINISH;
+}
+
+static void *vl_api_modify_vhost_user_if_t_print
+ (vl_api_modify_vhost_user_if_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: modify_vhost_user_if ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "socket %s ", mp->sock_filename);
+ if (mp->is_server)
+ s = format (s, "server ");
+ if (mp->renumber)
+ s = format (s, "renumber %d ", ntohl (mp->custom_dev_instance));
+
+ FINISH;
+}
+
+static void *vl_api_delete_vhost_user_if_t_print
+ (vl_api_delete_vhost_user_if_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: delete_vhost_user_if ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_vhost_user_dump_t_print
+ (vl_api_sw_interface_vhost_user_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_vhost_user_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_dump_t_print
+ (vl_api_sw_interface_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_dump ");
+
+ if (mp->name_filter_valid)
+ s = format (s, "name_filter %s ", mp->name_filter);
+ else
+ s = format (s, "all ");
+
+ FINISH;
+}
+
+static void *vl_api_l2_fib_table_dump_t_print
+ (vl_api_l2_fib_table_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: l2_fib_table_dump ");
+
+ s = format (s, "bd_id %d ", ntohl (mp->bd_id));
+
+ FINISH;
+}
+
+static void *vl_api_control_ping_t_print
+ (vl_api_control_ping_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: control_ping ");
+
+ FINISH;
+}
+
+static void *vl_api_want_interface_events_t_print
+ (vl_api_want_interface_events_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: want_interface_events pid %d enable %d ",
+ ntohl (mp->pid), ntohl (mp->enable_disable));
+
+ FINISH;
+}
+
+static void *
+vl_api_cli_t_print (vl_api_cli_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: cli ");
+
+ FINISH;
+}
+
+static void *vl_api_cli_inband_t_print
+ (vl_api_cli_inband_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: cli_inband ");
+
+ FINISH;
+}
+
+static void *vl_api_memclnt_create_t_print
+ (vl_api_memclnt_create_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: memclnt_create name %s ", mp->name);
+
+ FINISH;
+}
+
+static void *vl_api_show_version_t_print
+ (vl_api_show_version_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: show_version ");
+
+ FINISH;
+}
+
+static void *vl_api_vxlan_gpe_add_del_tunnel_t_print
+ (vl_api_vxlan_gpe_add_del_tunnel_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: vxlan_gpe_add_del_tunnel ");
+
+ ip46_address_t local = to_ip46 (mp->is_ipv6, mp->local);
+ ip46_address_t remote = to_ip46 (mp->is_ipv6, mp->remote);
+
+ u8 is_grp = ip46_address_is_multicast (&remote);
+ char *remote_name = is_grp ? "group" : "remote";
+
+ s = format (s, "local %U ", format_ip46_address, &local, IP46_TYPE_ANY);
+ s = format (s, "%s %U ", remote_name, format_ip46_address,
+ &remote, IP46_TYPE_ANY);
+
+ if (is_grp)
+ s = format (s, "mcast_sw_if_index %d ", ntohl (mp->mcast_sw_if_index));
+ s = format (s, "protocol %d ", ntohl (mp->protocol));
+
+ s = format (s, "vni %d ", ntohl (mp->vni));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ if (mp->encap_vrf_id)
+ s = format (s, "encap-vrf-id %d ", ntohl (mp->encap_vrf_id));
+
+ if (mp->decap_vrf_id)
+ s = format (s, "decap-vrf-id %d ", ntohl (mp->decap_vrf_id));
+
+ FINISH;
+}
+
+static void *vl_api_vxlan_gpe_tunnel_dump_t_print
+ (vl_api_vxlan_gpe_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: vxlan_gpe_tunnel_dump ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_interface_name_renumber_t_print
+ (vl_api_interface_name_renumber_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: interface_renumber ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ s = format (s, "new_show_dev_instance %d ",
+ ntohl (mp->new_show_dev_instance));
+
+ FINISH;
+}
+
+static void *vl_api_want_ip4_arp_events_t_print
+ (vl_api_want_ip4_arp_events_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: want_ip4_arp_events ");
+ s = format (s, "pid %d address %U ", ntohl (mp->pid),
+ format_ip4_address, &mp->address);
+ if (mp->enable_disable == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_want_ip6_nd_events_t_print
+ (vl_api_want_ip6_nd_events_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: want_ip6_nd_events ");
+ s = format (s, "pid %d address %U ", ntohl (mp->pid),
+ format_ip6_address, mp->address);
+ if (mp->enable_disable == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_want_l2_macs_events_t_print
+ (vl_api_want_l2_macs_events_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: want_l2_macs_events ");
+ s = format (s, "learn-limit %d ", ntohl (mp->learn_limit));
+ s = format (s, "scan-delay %d ", (u32) mp->scan_delay);
+ s = format (s, "max-entries %d ", (u32) mp->max_macs_in_event * 10);
+ if (mp->enable_disable == 0)
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_input_acl_set_interface_t_print
+ (vl_api_input_acl_set_interface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: input_acl_set_interface ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index));
+ s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index));
+ s = format (s, "l2-table %d ", ntohl (mp->l2_table_index));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_ip_address_dump_t_print
+ (vl_api_ip_address_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip6_address_dump ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "is_ipv6 %d ", mp->is_ipv6 != 0);
+
+ FINISH;
+}
+
+static void *
+vl_api_ip_dump_t_print (vl_api_ip_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip_dump ");
+ s = format (s, "is_ipv6 %d ", mp->is_ipv6 != 0);
+
+ FINISH;
+}
+
+static void *vl_api_cop_interface_enable_disable_t_print
+ (vl_api_cop_interface_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: cop_interface_enable_disable ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->enable_disable)
+ s = format (s, "enable ");
+ else
+ s = format (s, "disable ");
+
+ FINISH;
+}
+
+static void *vl_api_cop_whitelist_enable_disable_t_print
+ (vl_api_cop_whitelist_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: cop_whitelist_enable_disable ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "fib-id %d ", ntohl (mp->fib_id));
+ if (mp->ip4)
+ s = format (s, "ip4 ");
+ if (mp->ip6)
+ s = format (s, "ip6 ");
+ if (mp->default_cop)
+ s = format (s, "default ");
+
+ FINISH;
+}
+
+static void *vl_api_af_packet_create_t_print
+ (vl_api_af_packet_create_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: af_packet_create ");
+ s = format (s, "host_if_name %s ", mp->host_if_name);
+ if (mp->use_random_hw_addr)
+ s = format (s, "hw_addr random ");
+ else
+ s = format (s, "hw_addr %U ", format_ethernet_address, mp->hw_addr);
+
+ FINISH;
+}
+
+static void *vl_api_af_packet_delete_t_print
+ (vl_api_af_packet_delete_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: af_packet_delete ");
+ s = format (s, "host_if_name %s ", mp->host_if_name);
+
+ FINISH;
+}
+
+static u8 *
+format_policer_action (u8 * s, va_list * va)
+{
+ u32 action = va_arg (*va, u32);
+ u32 dscp = va_arg (*va, u32);
+ char *t = 0;
+
+ if (action == SSE2_QOS_ACTION_DROP)
+ s = format (s, "drop");
+ else if (action == SSE2_QOS_ACTION_TRANSMIT)
+ s = format (s, "transmit");
+ else if (action == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)
+ {
+ s = format (s, "mark-and-transmit ");
+ switch (dscp)
+ {
+#define _(v,f,str) case VNET_DSCP_##f: t = str; break;
+ foreach_vnet_dscp
+#undef _
+ default:
+ break;
+ }
+ s = format (s, "%s", t);
+ }
+
+ return s;
+}
+
+static void *vl_api_policer_add_del_t_print
+ (vl_api_policer_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: policer_add_del ");
+ s = format (s, "name %s ", mp->name);
+ s = format (s, "cir %d ", mp->cir);
+ s = format (s, "eir %d ", mp->eir);
+ s = format (s, "cb %d ", mp->cb);
+ s = format (s, "eb %d ", mp->eb);
+
+ switch (mp->rate_type)
+ {
+ case SSE2_QOS_RATE_KBPS:
+ s = format (s, "rate_type kbps ");
+ break;
+ case SSE2_QOS_RATE_PPS:
+ s = format (s, "rate_type pps ");
+ break;
+ default:
+ break;
+ }
+
+ switch (mp->round_type)
+ {
+ case SSE2_QOS_ROUND_TO_CLOSEST:
+ s = format (s, "round_type closest ");
+ break;
+ case SSE2_QOS_ROUND_TO_UP:
+ s = format (s, "round_type up ");
+ break;
+ case SSE2_QOS_ROUND_TO_DOWN:
+ s = format (s, "round_type down ");
+ break;
+ default:
+ break;
+ }
+
+ switch (mp->type)
+ {
+ case SSE2_QOS_POLICER_TYPE_1R2C:
+ s = format (s, "type 1r2c ");
+ break;
+ case SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697:
+ s = format (s, "type 1r3c ");
+ break;
+ case SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698:
+ s = format (s, "type 2r3c-2698 ");
+ break;
+ case SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115:
+ s = format (s, "type 2r3c-4115 ");
+ break;
+ case SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1:
+ s = format (s, "type 2r3c-mef5cf1 ");
+ break;
+ default:
+ break;
+ }
+
+ s = format (s, "conform_action %U ", format_policer_action,
+ mp->conform_action_type, mp->conform_dscp);
+ s = format (s, "exceed_action %U ", format_policer_action,
+ mp->exceed_action_type, mp->exceed_dscp);
+ s = format (s, "violate_action %U ", format_policer_action,
+ mp->violate_action_type, mp->violate_dscp);
+
+ if (mp->color_aware)
+ s = format (s, "color-aware ");
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_policer_dump_t_print
+ (vl_api_policer_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: policer_dump ");
+ if (mp->match_name_valid)
+ s = format (s, "name %s ", mp->match_name);
+
+ FINISH;
+}
+
+static void *vl_api_policer_classify_set_interface_t_print
+ (vl_api_policer_classify_set_interface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: policer_classify_set_interface ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->ip4_table_index != ~0)
+ s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index));
+ if (mp->ip6_table_index != ~0)
+ s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index));
+ if (mp->l2_table_index != ~0)
+ s = format (s, "l2-table %d ", ntohl (mp->l2_table_index));
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_policer_classify_dump_t_print
+ (vl_api_policer_classify_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: policer_classify_dump ");
+ switch (mp->type)
+ {
+ case POLICER_CLASSIFY_TABLE_IP4:
+ s = format (s, "type ip4 ");
+ break;
+ case POLICER_CLASSIFY_TABLE_IP6:
+ s = format (s, "type ip6 ");
+ break;
+ case POLICER_CLASSIFY_TABLE_L2:
+ s = format (s, "type l2 ");
+ break;
+ default:
+ break;
+ }
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_clear_stats_t_print
+ (vl_api_sw_interface_clear_stats_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_clear_stats ");
+ if (mp->sw_if_index != ~0)
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_mpls_tunnel_dump_t_print
+ (vl_api_mpls_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: mpls_tunnel_dump ");
+
+ s = format (s, "tunnel_index %d ", ntohl (mp->tunnel_index));
+
+ FINISH;
+}
+
+static void *vl_api_mpls_fib_dump_t_print
+ (vl_api_mpls_fib_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: mpls_fib_decap_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_ip_fib_dump_t_print
+ (vl_api_ip_fib_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip_fib_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_ip6_fib_dump_t_print
+ (vl_api_ip6_fib_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip6_fib_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_classify_table_ids_t_print
+ (vl_api_classify_table_ids_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_table_ids ");
+
+ FINISH;
+}
+
+static void *vl_api_classify_table_by_interface_t_print
+ (vl_api_classify_table_by_interface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_table_by_interface ");
+ if (mp->sw_if_index != ~0)
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_classify_table_info_t_print
+ (vl_api_classify_table_info_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_table_info ");
+ if (mp->table_id != ~0)
+ s = format (s, "table_id %d ", ntohl (mp->table_id));
+
+ FINISH;
+}
+
+static void *vl_api_classify_session_dump_t_print
+ (vl_api_classify_session_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: classify_session_dump ");
+ if (mp->table_id != ~0)
+ s = format (s, "table_id %d ", ntohl (mp->table_id));
+
+ FINISH;
+}
+
+static void *vl_api_set_ipfix_exporter_t_print
+ (vl_api_set_ipfix_exporter_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: set_ipfix_exporter ");
+
+ s = format (s, "collector-address %U ", format_ip4_address,
+ (ip4_address_t *) mp->collector_address);
+ s = format (s, "collector-port %d ", ntohs (mp->collector_port));
+ s = format (s, "src-address %U ", format_ip4_address,
+ (ip4_address_t *) mp->src_address);
+ s = format (s, "vrf-id %d ", ntohl (mp->vrf_id));
+ s = format (s, "path-mtu %d ", ntohl (mp->path_mtu));
+ s = format (s, "template-interval %d ", ntohl (mp->template_interval));
+ s = format (s, "udp-checksum %d ", mp->udp_checksum);
+
+ FINISH;
+}
+
+static void *vl_api_ipfix_exporter_dump_t_print
+ (vl_api_ipfix_exporter_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipfix_exporter_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_set_ipfix_classify_stream_t_print
+ (vl_api_set_ipfix_classify_stream_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: set_ipfix_classify_stream ");
+
+ s = format (s, "domain-id %d ", ntohl (mp->domain_id));
+ s = format (s, "src-port %d ", ntohs (mp->src_port));
+
+ FINISH;
+}
+
+static void *vl_api_ipfix_classify_stream_dump_t_print
+ (vl_api_ipfix_classify_stream_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipfix_classify_stream_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_ipfix_classify_table_add_del_t_print
+ (vl_api_ipfix_classify_table_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipfix_classify_table_add_del ");
+
+ s = format (s, "table-id %d ", ntohl (mp->table_id));
+ s = format (s, "ip-version %d ", mp->ip_version);
+ s = format (s, "transport-protocol %d ", mp->transport_protocol);
+
+ FINISH;
+}
+
+static void *vl_api_ipfix_classify_table_dump_t_print
+ (vl_api_ipfix_classify_table_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipfix_classify_table_dump ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_span_enable_disable_t_print
+ (vl_api_sw_interface_span_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_span_enable_disable ");
+ s = format (s, "src_sw_if_index %u ", ntohl (mp->sw_if_index_from));
+ s = format (s, "dst_sw_if_index %u ", ntohl (mp->sw_if_index_to));
+
+ if (mp->is_l2)
+ s = format (s, "l2 ");
+
+ switch (mp->state)
+ {
+ case 0:
+ s = format (s, "disable ");
+ break;
+ case 1:
+ s = format (s, "rx ");
+ break;
+ case 2:
+ s = format (s, "tx ");
+ break;
+ case 3:
+ default:
+ s = format (s, "both ");
+ break;
+ }
+
+ FINISH;
+}
+
+static void *
+vl_api_sw_interface_span_dump_t_print (vl_api_sw_interface_span_dump_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_span_dump ");
+
+ if (mp->is_l2)
+ s = format (s, "l2 ");
+
+ FINISH;
+}
+
+static void *vl_api_get_next_index_t_print
+ (vl_api_get_next_index_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: get_next_index ");
+ s = format (s, "node-name %s ", mp->node_name);
+ s = format (s, "next-node-name %s ", mp->next_name);
+
+ FINISH;
+}
+
+static void *vl_api_pg_create_interface_t_print
+ (vl_api_pg_create_interface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: pg_create_interface ");
+ s = format (0, "if_id %d", ntohl (mp->interface_id));
+
+ FINISH;
+}
+
+static void *vl_api_pg_capture_t_print
+ (vl_api_pg_capture_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: pg_capture ");
+ s = format (0, "if_id %d ", ntohl (mp->interface_id));
+ s = format (0, "pcap %s", mp->pcap_file_name);
+ if (mp->count != ~0)
+ s = format (s, "count %d ", ntohl (mp->count));
+ if (!mp->is_enabled)
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_pg_enable_disable_t_print
+ (vl_api_pg_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: pg_enable_disable ");
+ if (ntohl (mp->stream_name_length) > 0)
+ s = format (s, "stream %s", mp->stream_name);
+ if (!mp->is_enabled)
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_ip_source_and_port_range_check_add_del_t_print
+ (vl_api_ip_source_and_port_range_check_add_del_t * mp, void *handle)
+{
+ u8 *s;
+ int i;
+
+ s = format (0, "SCRIPT: ip_source_and_port_range_check_add_del ");
+ if (mp->is_ipv6)
+ s = format (s, "%U/%d ", format_ip6_address, mp->address,
+ mp->mask_length);
+ else
+ s = format (s, "%U/%d ", format_ip4_address, mp->address,
+ mp->mask_length);
+
+ for (i = 0; i < mp->number_of_ranges; i++)
+ {
+ s = format (s, "range %d - %d ", mp->low_ports[i], mp->high_ports[i]);
+ }
+
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_ip_source_and_port_range_check_interface_add_del_t_print
+ (vl_api_ip_source_and_port_range_check_interface_add_del_t * mp,
+ void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ip_source_and_port_range_check_interface_add_del ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ if (mp->tcp_out_vrf_id != ~0)
+ s = format (s, "tcp-out-vrf %d ", ntohl (mp->tcp_out_vrf_id));
+
+ if (mp->udp_out_vrf_id != ~0)
+ s = format (s, "udp-out-vrf %d ", ntohl (mp->udp_out_vrf_id));
+
+ if (mp->tcp_in_vrf_id != ~0)
+ s = format (s, "tcp-in-vrf %d ", ntohl (mp->tcp_in_vrf_id));
+
+ if (mp->udp_in_vrf_id != ~0)
+ s = format (s, "udp-in-vrf %d ", ntohl (mp->udp_in_vrf_id));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_enable_disable_t_print
+ (vl_api_lisp_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_enable_disable %s",
+ mp->is_en ? "enable" : "disable");
+
+ FINISH;
+}
+
+static void *vl_api_gpe_add_del_iface_t_print
+ (vl_api_gpe_add_del_iface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: gpe_add_del_iface ");
+
+ s = format (s, "%s ", mp->is_add ? "up" : "down");
+ s = format (s, "vni %d ", mp->vni);
+ s = format (s, "%s %d ", mp->is_l2 ? "bd_id" : "table_id", mp->dp_table);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_pitr_set_locator_set_t_print
+ (vl_api_lisp_pitr_set_locator_set_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_pitr_set_locator_set ");
+
+ if (mp->is_add)
+ s = format (s, "locator-set %s ", mp->ls_name);
+ else
+ s = format (s, "del");
+
+ FINISH;
+}
+
+static u8 *
+format_lisp_flat_eid (u8 * s, va_list * args)
+{
+ u32 type = va_arg (*args, u32);
+ u8 *eid = va_arg (*args, u8 *);
+ u32 eid_len = va_arg (*args, u32);
+
+ switch (type)
+ {
+ case 0:
+ return format (s, "%U/%d", format_ip4_address, eid, eid_len);
+ case 1:
+ return format (s, "%U/%d", format_ip6_address, eid, eid_len);
+ case 3:
+ return format (s, "%U", format_ethernet_address, eid);
+ }
+ return 0;
+}
+
+static void *vl_api_lisp_add_del_remote_mapping_t_print
+ (vl_api_lisp_add_del_remote_mapping_t * mp, void *handle)
+{
+ u8 *s;
+ u32 rloc_num = 0;
+
+ s = format (0, "SCRIPT: lisp_add_del_remote_mapping ");
+
+ if (mp->del_all)
+ s = format (s, "del-all ");
+
+ s = format (s, "%s ", mp->is_add ? "add" : "del");
+ s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni));
+
+ s = format (s, "eid %U ", format_lisp_flat_eid,
+ mp->eid_type, mp->eid, mp->eid_len);
+
+ if (mp->is_src_dst)
+ {
+ s = format (s, "seid %U ", format_lisp_flat_eid,
+ mp->eid_type, mp->seid, mp->seid_len);
+ }
+
+ rloc_num = clib_net_to_host_u32 (mp->rloc_num);
+
+ if (0 == rloc_num)
+ s = format (s, "action %d", mp->action);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_adjacency_t_print
+ (vl_api_lisp_add_del_adjacency_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_adjacency ");
+
+ s = format (s, "%s ", mp->is_add ? "add" : "del");
+ s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni));
+ s = format (s, "reid %U leid %U ",
+ format_lisp_flat_eid, mp->eid_type, mp->reid, mp->reid_len,
+ format_lisp_flat_eid, mp->eid_type, mp->leid, mp->leid_len);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_map_request_itr_rlocs_t_print
+ (vl_api_lisp_add_del_map_request_itr_rlocs_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_map_request_itr_rlocs ");
+
+ if (mp->is_add)
+ s = format (s, "%s", mp->locator_set_name);
+ else
+ s = format (s, "del");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_eid_table_add_del_map_t_print
+ (vl_api_lisp_eid_table_add_del_map_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_eid_table_add_del_map ");
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni));
+ s = format (s, "%s %d ",
+ mp->is_l2 ? "bd_index" : "vrf",
+ clib_net_to_host_u32 (mp->dp_table));
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_local_eid_t_print
+ (vl_api_lisp_add_del_local_eid_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_local_eid ");
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni));
+ s = format (s, "eid %U ", format_lisp_flat_eid, mp->eid_type, mp->eid,
+ mp->prefix_len);
+ s = format (s, "locator-set %s ", mp->locator_set_name);
+ if (*mp->key)
+ {
+ u32 key_id = mp->key_id;
+ s = format (s, "key-id %U", format_hmac_key_id, key_id);
+ s = format (s, "secret-key %s", mp->key);
+ }
+ FINISH;
+}
+
+static void *vl_api_gpe_add_del_fwd_entry_t_print
+ (vl_api_gpe_add_del_fwd_entry_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: gpe_add_del_fwd_entry TODO");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_map_resolver_t_print
+ (vl_api_lisp_add_del_map_resolver_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_map_resolver ");
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ if (mp->is_ipv6)
+ s = format (s, "%U ", format_ip6_address, mp->ip_address);
+ else
+ s = format (s, "%U ", format_ip4_address, mp->ip_address);
+
+ FINISH;
+}
+
+static void *vl_api_gpe_enable_disable_t_print
+ (vl_api_gpe_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: gpe_enable_disable ");
+
+ s = format (s, "%s ", mp->is_en ? "enable" : "disable");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_locator_set_t_print
+ (vl_api_lisp_add_del_locator_set_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_locator_set ");
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ s = format (s, "locator-set %s ", mp->locator_set_name);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_add_del_locator_t_print
+ (vl_api_lisp_add_del_locator_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_add_del_locator ");
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ s = format (s, "locator-set %s ", mp->locator_set_name);
+ s = format (s, "sw_if_index %d ", mp->sw_if_index);
+ s = format (s, "p %d w %d ", mp->priority, mp->weight);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_locator_set_dump_t_print
+ (vl_api_lisp_locator_set_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_locator_set_dump ");
+ if (mp->filter == 1)
+ s = format (s, "local");
+ else if (mp->filter == 2)
+ s = format (s, "remote");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_locator_dump_t_print
+ (vl_api_lisp_locator_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_locator_dump ");
+ if (mp->is_index_set)
+ s = format (s, "ls_index %d", clib_net_to_host_u32 (mp->ls_index));
+ else
+ s = format (s, "ls_name %s", mp->ls_name);
+
+ FINISH;
+}
+
+static void *vl_api_lisp_map_request_mode_t_print
+ (vl_api_lisp_map_request_mode_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_map_request_mode ");
+
+ switch (mp->mode)
+ {
+ case 0:
+ s = format (s, "dst-only");
+ break;
+ case 1:
+ s = format (s, "src-dst");
+ default:
+ break;
+ }
+
+ FINISH;
+}
+
+static void *vl_api_lisp_eid_table_dump_t_print
+ (vl_api_lisp_eid_table_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_eid_table_dump ");
+
+ if (mp->eid_set)
+ {
+ s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni));
+ s = format (s, "eid %U ", format_lisp_flat_eid, mp->eid_type,
+ mp->eid, mp->prefix_length);
+ switch (mp->filter)
+ {
+ case 1:
+ s = format (s, "local ");
+ break;
+ case 2:
+ s = format (s, "remote ");
+ break;
+ }
+ }
+
+ FINISH;
+}
+
+static void *vl_api_lisp_rloc_probe_enable_disable_t_print
+ (vl_api_lisp_rloc_probe_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_rloc_probe_enable_disable ");
+ if (mp->is_enabled)
+ s = format (s, "enable");
+ else
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_map_register_enable_disable_t_print
+ (vl_api_lisp_map_register_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_map_register_enable_disable ");
+ if (mp->is_enabled)
+ s = format (s, "enable");
+ else
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_lisp_adjacencies_get_t_print
+ (vl_api_lisp_adjacencies_get_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_adjacencies_get ");
+ s = format (s, "vni %d", clib_net_to_host_u32 (mp->vni));
+
+ FINISH;
+}
+
+static void *vl_api_lisp_eid_table_map_dump_t_print
+ (vl_api_lisp_eid_table_map_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: lisp_eid_table_map_dump ");
+
+ if (mp->is_l2)
+ s = format (s, "l2");
+ else
+ s = format (s, "l3");
+
+ FINISH;
+}
+
+static void *vl_api_ipsec_gre_add_del_tunnel_t_print
+ (vl_api_ipsec_gre_add_del_tunnel_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipsec_gre_add_del_tunnel ");
+
+ s = format (s, "dst %U ", format_ip4_address,
+ (ip4_address_t *) & (mp->dst_address));
+
+ s = format (s, "src %U ", format_ip4_address,
+ (ip4_address_t *) & (mp->src_address));
+
+ s = format (s, "local_sa %d ", ntohl (mp->local_sa_id));
+
+ s = format (s, "remote_sa %d ", ntohl (mp->remote_sa_id));
+
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_ipsec_gre_tunnel_dump_t_print
+ (vl_api_ipsec_gre_tunnel_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ipsec_gre_tunnel_dump ");
+
+ if (mp->sw_if_index != ~0)
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_l2_interface_pbb_tag_rewrite_t_print
+ (vl_api_l2_interface_pbb_tag_rewrite_t * mp, void *handle)
+{
+ u8 *s;
+ u32 vtr_op = ntohl (mp->vtr_op);
+
+ s = format (0, "SCRIPT: l2_interface_pbb_tag_rewrite ");
+
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "vtr_op %d ", vtr_op);
+ if (vtr_op != L2_VTR_DISABLED && vtr_op != L2_VTR_POP_2)
+ {
+ if (vtr_op == L2_VTR_TRANSLATE_2_2)
+ s = format (s, "%d ", ntohs (mp->outer_tag));
+ s = format (s, "dmac %U ", format_ethernet_address, &mp->b_dmac);
+ s = format (s, "smac %U ", format_ethernet_address, &mp->b_smac);
+ s = format (s, "sid %d ", ntohl (mp->i_sid));
+ s = format (s, "vlanid %d ", ntohs (mp->b_vlanid));
+ }
+
+ FINISH;
+}
+
+static void *vl_api_flow_classify_set_interface_t_print
+ (vl_api_flow_classify_set_interface_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: flow_classify_set_interface ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->ip4_table_index != ~0)
+ s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index));
+ if (mp->ip6_table_index != ~0)
+ s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index));
+ if (mp->is_add == 0)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *
+vl_api_punt_t_print (vl_api_punt_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: punt ");
+
+ if (mp->ipv != (u8) ~ 0)
+ s = format (s, "ip %d ", mp->ipv);
+
+ s = format (s, "protocol %d ", mp->l4_protocol);
+
+ if (mp->l4_port != (u16) ~ 0)
+ s = format (s, "port %d ", ntohs (mp->l4_port));
+
+ if (!mp->is_add)
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_flow_classify_dump_t_print
+ (vl_api_flow_classify_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: flow_classify_dump ");
+ switch (mp->type)
+ {
+ case FLOW_CLASSIFY_TABLE_IP4:
+ s = format (s, "type ip4 ");
+ break;
+ case FLOW_CLASSIFY_TABLE_IP6:
+ s = format (s, "type ip6 ");
+ break;
+ default:
+ break;
+ }
+
+ FINISH;
+}
+
+static void *vl_api_get_first_msg_id_t_print
+ (vl_api_get_first_msg_id_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: get_first_msg_id %s ", mp->name);
+
+ FINISH;
+}
+
+static void *vl_api_ioam_enable_t_print
+ (vl_api_ioam_enable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ioam_enable ");
+
+ if (mp->trace_enable)
+ s = format (s, "trace enabled");
+
+ if (mp->pot_enable)
+ s = format (s, "POT enabled");
+
+ if (mp->seqno)
+ s = format (s, "Seqno enabled");
+
+ if (mp->analyse)
+ s = format (s, "Analyse enabled");
+
+ FINISH;
+}
+
+static void *vl_api_ioam_disable_t_print
+ (vl_api_ioam_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: ioam_disable ");
+ s = format (s, "trace disabled");
+ s = format (s, "POT disabled");
+ s = format (s, "Seqno disabled");
+ s = format (s, "Analyse disabled");
+
+ FINISH;
+}
+
+static void *vl_api_feature_enable_disable_t_print
+ (vl_api_feature_enable_disable_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: feature_enable_disable ");
+ s = format (s, "arc_name %s ", mp->arc_name);
+ s = format (s, "feature_name %s ", mp->feature_name);
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (!mp->enable)
+ s = format (s, "disable");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_tag_add_del_t_print
+ (vl_api_sw_interface_tag_add_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_tag_add_del ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ if (mp->is_add)
+ s = format (s, "tag %s ", mp->tag);
+ else
+ s = format (s, "del ");
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_set_mtu_t_print
+ (vl_api_sw_interface_set_mtu_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_set_mtu ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+ s = format (s, "tag %d ", ntohs (mp->mtu));
+
+ FINISH;
+}
+
+static void *vl_api_p2p_ethernet_add_t_print
+ (vl_api_p2p_ethernet_add_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: p2p_ethernet_add ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->parent_if_index));
+ s = format (s, "remote_mac %U ", format_ethernet_address, mp->remote_mac);
+
+ FINISH;
+}
+
+static void *vl_api_p2p_ethernet_del_t_print
+ (vl_api_p2p_ethernet_del_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: p2p_ethernet_del ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->parent_if_index));
+ s = format (s, "remote_mac %U ", format_ethernet_address, mp->remote_mac);
+
+ FINISH;
+}
+
+static void *vl_api_tcp_configure_src_addresses_t_print
+ (vl_api_tcp_configure_src_addresses_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: tcp_configure_src_addresses ");
+ if (mp->is_ipv6)
+ s = format (s, "%U - %U ",
+ format_ip6_address, (ip6_address_t *) mp->first_address,
+ format_ip6_address, (ip6_address_t *) mp->last_address);
+ else
+ s = format (s, "%U - %U ",
+ format_ip4_address, (ip4_address_t *) mp->first_address,
+ format_ip4_address, (ip4_address_t *) mp->last_address);
+
+ if (mp->vrf_id)
+ s = format (s, "vrf %d ", ntohl (mp->vrf_id));
+
+ FINISH;
+}
+
+#define foreach_custom_print_no_arg_function \
+_(lisp_eid_table_vni_dump) \
+_(lisp_map_resolver_dump) \
+_(lisp_map_server_dump) \
+_(show_lisp_rloc_probe_state) \
+_(show_lisp_map_register_state) \
+_(show_lisp_map_request_mode)
+
+#define _(f) \
+static void * vl_api_ ## f ## _t_print \
+ (vl_api_ ## f ## _t * mp, void * handle) \
+{ \
+ u8 * s; \
+ s = format (0, "SCRIPT: " #f ); \
+ FINISH; \
+}
+foreach_custom_print_no_arg_function
+#undef _
+#define foreach_custom_print_function \
+_(CREATE_LOOPBACK, create_loopback) \
+_(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \
+_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
+_(SW_INTERFACE_EVENT, sw_interface_event) \
+_(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \
+_(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \
+_(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \
+_(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \
+_(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \
+_(TAP_CONNECT, tap_connect) \
+_(TAP_MODIFY, tap_modify) \
+_(TAP_DELETE, tap_delete) \
+_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) \
+_(IP_ADD_DEL_ROUTE, ip_add_del_route) \
+_(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \
+_(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \
+_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \
+_(SW_INTERFACE_SET_UNNUMBERED, sw_interface_set_unnumbered) \
+_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \
+_(RESET_VRF, reset_vrf) \
+_(CREATE_VLAN_SUBIF, create_vlan_subif) \
+_(CREATE_SUBIF, create_subif) \
+_(OAM_ADD_DEL, oam_add_del) \
+_(RESET_FIB, reset_fib) \
+_(DHCP_PROXY_CONFIG, dhcp_proxy_config) \
+_(DHCP_PROXY_SET_VSS, dhcp_proxy_set_vss) \
+_(SET_IP_FLOW_HASH, set_ip_flow_hash) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \
+ sw_interface_ip6_set_link_local_address) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \
+_(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \
+_(L2_PATCH_ADD_DEL, l2_patch_add_del) \
+_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \
+_(SR_STEERING_ADD_DEL, sr_steering_add_del) \
+_(SR_POLICY_ADD, sr_policy_add) \
+_(SR_POLICY_MOD, sr_policy_mod) \
+_(SR_POLICY_DEL, sr_policy_del) \
+_(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \
+_(L2FIB_ADD_DEL, l2fib_add_del) \
+_(L2FIB_FLUSH_ALL, l2fib_flush_all) \
+_(L2FIB_FLUSH_BD, l2fib_flush_bd) \
+_(L2FIB_FLUSH_INT, l2fib_flush_int) \
+_(L2_FLAGS, l2_flags) \
+_(BRIDGE_FLAGS, bridge_flags) \
+_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \
+_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \
+_(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \
+_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \
+_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \
+_(BRIDGE_DOMAIN_SET_MAC_AGE, bridge_domain_set_mac_age) \
+_(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \
+_(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \
+_(ADD_NODE_NEXT, add_node_next) \
+_(DHCP_CLIENT_CONFIG, dhcp_client_config) \
+_(L2TPV3_CREATE_TUNNEL, l2tpv3_create_tunnel) \
+_(L2TPV3_SET_TUNNEL_COOKIES, l2tpv3_set_tunnel_cookies) \
+_(L2TPV3_INTERFACE_ENABLE_DISABLE, l2tpv3_interface_enable_disable) \
+_(L2TPV3_SET_LOOKUP_KEY, l2tpv3_set_lookup_key) \
+_(SW_IF_L2TPV3_TUNNEL_DUMP, sw_if_l2tpv3_tunnel_dump) \
+_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \
+_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \
+_(GRE_ADD_DEL_TUNNEL, gre_add_del_tunnel) \
+_(GRE_TUNNEL_DUMP, gre_tunnel_dump) \
+_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \
+_(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \
+_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \
+_(CREATE_VHOST_USER_IF, create_vhost_user_if) \
+_(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \
+_(DELETE_VHOST_USER_IF, delete_vhost_user_if) \
+_(SW_INTERFACE_DUMP, sw_interface_dump) \
+_(CONTROL_PING, control_ping) \
+_(WANT_INTERFACE_EVENTS, want_interface_events) \
+_(CLI, cli) \
+_(CLI_INBAND, cli_inband) \
+_(MEMCLNT_CREATE, memclnt_create) \
+_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \
+_(SHOW_VERSION, show_version) \
+_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \
+_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \
+_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \
+_(INTERFACE_NAME_RENUMBER, interface_name_renumber) \
+_(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \
+_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \
+_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \
+_(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \
+_(IP_ADDRESS_DUMP, ip_address_dump) \
+_(IP_DUMP, ip_dump) \
+_(DELETE_LOOPBACK, delete_loopback) \
+_(BD_IP_MAC_ADD_DEL, bd_ip_mac_add_del) \
+_(COP_INTERFACE_ENABLE_DISABLE, cop_interface_enable_disable) \
+_(COP_WHITELIST_ENABLE_DISABLE, cop_whitelist_enable_disable) \
+_(AF_PACKET_CREATE, af_packet_create) \
+_(AF_PACKET_DELETE, af_packet_delete) \
+_(SW_INTERFACE_CLEAR_STATS, sw_interface_clear_stats) \
+_(MPLS_FIB_DUMP, mpls_fib_dump) \
+_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \
+_(CLASSIFY_TABLE_IDS,classify_table_ids) \
+_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \
+_(CLASSIFY_TABLE_INFO,classify_table_info) \
+_(CLASSIFY_SESSION_DUMP,classify_session_dump) \
+_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \
+_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \
+_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \
+_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \
+_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \
+_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump) \
+_(SW_INTERFACE_SPAN_ENABLE_DISABLE, sw_interface_span_enable_disable) \
+_(SW_INTERFACE_SPAN_DUMP, sw_interface_span_dump) \
+_(GET_NEXT_INDEX, get_next_index) \
+_(PG_CREATE_INTERFACE,pg_create_interface) \
+_(PG_CAPTURE, pg_capture) \
+_(PG_ENABLE_DISABLE, pg_enable_disable) \
+_(POLICER_ADD_DEL, policer_add_del) \
+_(POLICER_DUMP, policer_dump) \
+_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \
+_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, \
+ ip_source_and_port_range_check_add_del) \
+_(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \
+ ip_source_and_port_range_check_interface_add_del) \
+_(LISP_ENABLE_DISABLE, lisp_enable_disable) \
+_(GPE_ENABLE_DISABLE, gpe_enable_disable) \
+_(GPE_ADD_DEL_IFACE, gpe_add_del_iface) \
+_(LISP_PITR_SET_LOCATOR_SET, lisp_pitr_set_locator_set) \
+_(LISP_MAP_REQUEST_MODE, lisp_map_request_mode) \
+_(SHOW_LISP_MAP_REQUEST_MODE, show_lisp_map_request_mode) \
+_(LISP_ADD_DEL_REMOTE_MAPPING, lisp_add_del_remote_mapping) \
+_(LISP_ADD_DEL_ADJACENCY, lisp_add_del_adjacency) \
+_(LISP_ADD_DEL_MAP_REQUEST_ITR_RLOCS, \
+ lisp_add_del_map_request_itr_rlocs) \
+_(LISP_EID_TABLE_ADD_DEL_MAP, lisp_eid_table_add_del_map) \
+_(LISP_ADD_DEL_LOCAL_EID, lisp_add_del_local_eid) \
+_(GPE_ADD_DEL_FWD_ENTRY, gpe_add_del_fwd_entry) \
+_(LISP_ADD_DEL_LOCATOR_SET, lisp_add_del_locator_set) \
+_(LISP_ADD_DEL_MAP_RESOLVER, lisp_add_del_map_resolver) \
+_(LISP_ADD_DEL_LOCATOR, lisp_add_del_locator) \
+_(LISP_EID_TABLE_DUMP, lisp_eid_table_dump) \
+_(LISP_EID_TABLE_MAP_DUMP, lisp_eid_table_map_dump) \
+_(LISP_EID_TABLE_VNI_DUMP, lisp_eid_table_vni_dump) \
+_(LISP_MAP_RESOLVER_DUMP, lisp_map_resolver_dump) \
+_(LISP_MAP_SERVER_DUMP, lisp_map_server_dump) \
+_(LISP_LOCATOR_SET_DUMP, lisp_locator_set_dump) \
+_(LISP_LOCATOR_DUMP, lisp_locator_dump) \
+_(LISP_ADJACENCIES_GET, lisp_adjacencies_get) \
+_(SHOW_LISP_RLOC_PROBE_STATE, show_lisp_rloc_probe_state) \
+_(SHOW_LISP_MAP_REGISTER_STATE, show_lisp_map_register_state) \
+_(LISP_RLOC_PROBE_ENABLE_DISABLE, lisp_rloc_probe_enable_disable) \
+_(LISP_MAP_REGISTER_ENABLE_DISABLE, lisp_map_register_enable_disable) \
+_(IPSEC_GRE_ADD_DEL_TUNNEL, ipsec_gre_add_del_tunnel) \
+_(IPSEC_GRE_TUNNEL_DUMP, ipsec_gre_tunnel_dump) \
+_(DELETE_SUBIF, delete_subif) \
+_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \
+_(PUNT, punt) \
+_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \
+_(FLOW_CLASSIFY_DUMP, flow_classify_dump) \
+_(GET_FIRST_MSG_ID, get_first_msg_id) \
+_(IOAM_ENABLE, ioam_enable) \
+_(IOAM_DISABLE, ioam_disable) \
+_(IP_FIB_DUMP, ip_fib_dump) \
+_(IP6_FIB_DUMP, ip6_fib_dump) \
+_(FEATURE_ENABLE_DISABLE, feature_enable_disable) \
+_(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \
+_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \
+_(P2P_ETHERNET_ADD, p2p_ethernet_add) \
+_(P2P_ETHERNET_DEL, p2p_ethernet_del) \
+_(TCP_CONFIGURE_SRC_ADDRESSES, tcp_configure_src_addresses)
+ void
+vl_msg_api_custom_dump_configure (api_main_t * am)
+{
+#define _(n,f) am->msg_print_handlers[VL_API_##n] \
+ = (void *) vl_api_##f##_t_print;
+ foreach_custom_print_function;
+#undef _
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c
new file mode 100644
index 00000000..6b333730
--- /dev/null
+++ b/src/vpp/api/gmon.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/api_errno.h>
+
+#include <svm/svmdb.h>
+
+typedef struct
+{
+ svmdb_client_t *svmdb_client;
+ f64 *vector_rate_ptr;
+ f64 *input_rate_ptr;
+ f64 *sig_error_rate_ptr;
+ pid_t *vpef_pid_ptr;
+ u64 last_sig_errors;
+ u64 current_sig_errors;
+ uword *sig_error_bitmap;
+ vlib_main_t *vlib_main;
+ vlib_main_t **my_vlib_mains;
+
+} gmon_main_t;
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h>
+
+gmon_main_t gmon_main;
+
+static u64
+get_significant_errors (gmon_main_t * gm)
+{
+ vlib_main_t *this_vlib_main;
+ vlib_error_main_t *em;
+ uword code;
+ int vm_index;
+ u64 significant_errors = 0;
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (code, gm->sig_error_bitmap,
+ ({
+ for (vm_index = 0; vm_index < vec_len (gm->my_vlib_mains); vm_index++)
+ {
+ this_vlib_main = gm->my_vlib_mains[vm_index];
+ em = &this_vlib_main->error_main;
+ significant_errors += em->counters[code] -
+ ((vec_len(em->counters_last_clear) > code) ?
+ em->counters_last_clear[code] : 0);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ return (significant_errors);
+}
+
+static clib_error_t *
+publish_pid (vlib_main_t * vm)
+{
+ gmon_main_t *gm = &gmon_main;
+
+ *gm->vpef_pid_ptr = getpid ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (publish_pid);
+
+
+static uword
+gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ f64 vector_rate;
+ u64 input_packets, last_input_packets, new_sig_errors;
+ f64 last_runtime, dt, now;
+ gmon_main_t *gm = &gmon_main;
+ int i;
+
+ last_runtime = 0.0;
+ last_input_packets = 0;
+
+ last_runtime = 0.0;
+ last_input_packets = 0;
+
+ /* Initial wait for the world to settle down */
+ vlib_process_suspend (vm, 5.0);
+
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ vec_add1 (gm->my_vlib_mains, vlib_mains[i]);
+
+ while (1)
+ {
+ vlib_process_suspend (vm, 5.0);
+ vector_rate = vlib_last_vector_length_per_node (vm);
+ *gm->vector_rate_ptr = vector_rate;
+ now = vlib_time_now (vm);
+ dt = now - last_runtime;
+ input_packets = vnet_get_aggregate_rx_packets ();
+ *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt;
+ last_runtime = now;
+ last_input_packets = input_packets;
+
+ new_sig_errors = get_significant_errors (gm);
+ *gm->sig_error_rate_ptr =
+ ((f64) (new_sig_errors - gm->last_sig_errors)) / dt;
+ gm->last_sig_errors = new_sig_errors;
+ }
+
+ return 0; /* not so much */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (gmon_process_node,static) = {
+ .function = gmon_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "gmon-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+gmon_init (vlib_main_t * vm)
+{
+ gmon_main_t *gm = &gmon_main;
+ api_main_t *am = &api_main;
+ pid_t *swp = 0;
+ f64 *v = 0;
+ clib_error_t *error;
+ svmdb_map_args_t _ma, *ma = &_ma;
+
+ if ((error = vlib_call_init_function (vm, vpe_api_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, vlibmemory_init)))
+ return (error);
+
+ gm->vlib_main = vm;
+
+ memset (ma, 0, sizeof (*ma));
+ ma->root_path = am->root_path;
+ ma->uid = am->api_uid;
+ ma->gid = am->api_gid;
+
+ gm->svmdb_client = svmdb_map (ma);
+
+ /* Find or create, set to zero */
+ vec_add1 (v, 0.0);
+ svmdb_local_set_vec_variable (gm->svmdb_client,
+ "vpp_vector_rate", (char *) v, sizeof (*v));
+ vec_free (v);
+ vec_add1 (v, 0.0);
+ svmdb_local_set_vec_variable (gm->svmdb_client,
+ "vpp_input_rate", (char *) v, sizeof (*v));
+ vec_free (v);
+ vec_add1 (v, 0.0);
+ svmdb_local_set_vec_variable (gm->svmdb_client,
+ "vpp_sig_error_rate",
+ (char *) v, sizeof (*v));
+ vec_free (v);
+
+ vec_add1 (swp, 0.0);
+ svmdb_local_set_vec_variable (gm->svmdb_client,
+ "vpp_pid", (char *) swp, sizeof (*swp));
+ vec_free (swp);
+
+ /* the value cells will never move, so acquire references to them */
+ gm->vector_rate_ptr =
+ svmdb_local_get_variable_reference (gm->svmdb_client,
+ SVMDB_NAMESPACE_VEC,
+ "vpp_vector_rate");
+ gm->input_rate_ptr =
+ svmdb_local_get_variable_reference (gm->svmdb_client,
+ SVMDB_NAMESPACE_VEC,
+ "vpp_input_rate");
+ gm->sig_error_rate_ptr =
+ svmdb_local_get_variable_reference (gm->svmdb_client,
+ SVMDB_NAMESPACE_VEC,
+ "vpp_sig_error_rate");
+ gm->vpef_pid_ptr =
+ svmdb_local_get_variable_reference (gm->svmdb_client,
+ SVMDB_NAMESPACE_VEC, "vpp_pid");
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (gmon_init);
+
+static clib_error_t *
+gmon_exit (vlib_main_t * vm)
+{
+ gmon_main_t *gm = &gmon_main;
+
+ if (gm->vector_rate_ptr)
+ {
+ *gm->vector_rate_ptr = 0.0;
+ *gm->vpef_pid_ptr = 0;
+ *gm->input_rate_ptr = 0.0;
+ *gm->sig_error_rate_ptr = 0.0;
+ svm_region_unmap ((void *) gm->svmdb_client->db_rp);
+ vec_free (gm->svmdb_client);
+ }
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (gmon_exit);
+
+static int
+significant_error_enable_disable (gmon_main_t * gm, u32 index, int enable)
+{
+ vlib_main_t *vm = gm->vlib_main;
+ vlib_error_main_t *em = &vm->error_main;
+
+ if (index >= vec_len (em->counters))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ gm->sig_error_bitmap =
+ clib_bitmap_set (gm->sig_error_bitmap, index, enable);
+ return 0;
+}
+
+static clib_error_t *
+set_significant_error_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 index;
+ int enable = 1;
+ int rv;
+ gmon_main_t *gm = &gmon_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &index))
+ ;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = significant_error_enable_disable (gm, index, enable);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return
+ (0, "significant_error_enable_disable returned %d", rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_significant_error_command, static) = {
+ .path = "set significant error",
+ .short_help = "set significant error <counter-index-nnn> [disable]",
+ .function = set_significant_error_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/json_format.c b/src/vpp/api/json_format.c
new file mode 100644
index 00000000..63454b87
--- /dev/null
+++ b/src/vpp/api/json_format.c
@@ -0,0 +1,304 @@
+/*
+ *------------------------------------------------------------------
+ * json_format.c
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+
+*/
+#include <inttypes.h>
+#include "json_format.h"
+#include <vnet/ip/ip.h>
+#include <vppinfra/vec.h>
+
+#define VAT_TAB_WIDTH 2
+
+typedef struct vat_print_ctx_s
+{
+ FILE *ofp;
+ u32 indent;
+} vat_print_ctx_t;
+
+/* Format an IP4 address. */
+static u8 *
+vat_json_format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP6 address. */
+static u8 *
+vat_json_format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+static void
+vat_json_indent_print (vat_print_ctx_t * ctx)
+{
+ int i;
+ for (i = 0; i < ctx->indent * VAT_TAB_WIDTH; i++)
+ {
+ fformat (ctx->ofp, " ");
+ }
+}
+
+static void
+vat_json_indent_line (vat_print_ctx_t * ctx, char *fmt, ...)
+{
+ va_list va;
+
+ vat_json_indent_print (ctx);
+ va_start (va, fmt);
+ va_fformat (ctx->ofp, fmt, &va);
+ va_end (va);
+}
+
+static u8
+is_num_only (vat_json_node_t * p)
+{
+ vat_json_node_t *elem;
+ vec_foreach (elem, p)
+ {
+ if (VAT_JSON_INT != elem->type && VAT_JSON_UINT != elem->type)
+ {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void
+vat_json_print_internal (vat_print_ctx_t * ctx, vat_json_node_t * node)
+{
+#define P(fmt,...) fformat(ctx->ofp, fmt, ##__VA_ARGS__)
+#define PL(fmt,...) fformat(ctx->ofp, fmt"\n", ##__VA_ARGS__)
+#define PPL(fmt,...) vat_json_indent_line(ctx, fmt"\n", ##__VA_ARGS__)
+#define PP(fmt,...) vat_json_indent_line(ctx, fmt, ##__VA_ARGS__)
+#define INCR (ctx->indent++)
+#define DECR (ctx->indent--)
+
+ vat_json_pair_t *pair;
+ u32 i, count;
+ vat_json_node_t *elem;
+ u8 num_only = 0;
+
+ if (!node)
+ {
+ return;
+ }
+
+ switch (node->type)
+ {
+ case VAT_JSON_OBJECT:
+ count = vec_len (node->pairs);
+ if (count >= 1)
+ {
+ PL ("{");
+ INCR;
+ for (i = 0; i < count; i++)
+ {
+ pair = &node->pairs[i];
+ PP ("\"%s\": ", pair->name);
+ vat_json_print_internal (ctx, &pair->value);
+ if (i < count - 1)
+ {
+ P (",");
+ }
+ PL ();
+ }
+ DECR;
+ PP ("}");
+ }
+ else
+ {
+ P ("{}");
+ }
+ break;
+ case VAT_JSON_ARRAY:
+ num_only = is_num_only (node->array);
+ count = vec_len (node->array);
+ if (count >= 1)
+ {
+ if (num_only)
+ P ("[");
+ else
+ PL ("[ ");
+ INCR;
+ for (i = 0; i < count; i++)
+ {
+ elem = &node->array[i];
+ if (!num_only)
+ {
+ vat_json_indent_print (ctx);
+ }
+ vat_json_print_internal (ctx, elem);
+ if (i < count - 1)
+ {
+ if (num_only)
+ {
+ P (", ");
+ }
+ else
+ {
+ P (",");
+ }
+ }
+ if (!num_only)
+ PL ();
+ }
+ DECR;
+ if (!num_only)
+ PP ("]");
+ else
+ P ("]");
+ }
+ else
+ {
+ P ("[]");
+ }
+ break;
+ case VAT_JSON_INT:
+ P ("%d", node->sint);
+ break;
+ case VAT_JSON_UINT:
+ P ("%" PRIu64, node->uint);
+ break;
+ case VAT_JSON_REAL:
+ P ("%f", node->real);
+ break;
+ case VAT_JSON_STRING:
+ P ("\"%s\"", node->string);
+ break;
+ case VAT_JSON_IPV4:
+ P ("\"%U\"", vat_json_format_ip4_address, &node->ip4);
+ break;
+ case VAT_JSON_IPV6:
+ P ("\"%U\"", vat_json_format_ip6_address, &node->ip6);
+ break;
+ default:
+ break;
+ }
+#undef PPL
+#undef PP
+#undef PL
+#undef P
+}
+
+void
+vat_json_print (FILE * ofp, vat_json_node_t * node)
+{
+ vat_print_ctx_t ctx;
+ memset (&ctx, 0, sizeof ctx);
+ ctx.indent = 0;
+ ctx.ofp = ofp;
+ fformat (ofp, "\n");
+ vat_json_print_internal (&ctx, node);
+ fformat (ofp, "\n");
+}
+
+void
+vat_json_free (vat_json_node_t * node)
+{
+ int i = 0;
+
+ if (NULL == node)
+ {
+ return;
+ }
+ switch (node->type)
+ {
+ case VAT_JSON_OBJECT:
+ for (i = 0; i < vec_len (node->pairs); i++)
+ {
+ vat_json_free (&node->pairs[i].value);
+ }
+ if (NULL != node->pairs)
+ {
+ vec_free (node->pairs);
+ }
+ break;
+ case VAT_JSON_ARRAY:
+ for (i = 0; i < vec_len (node->array); i++)
+ {
+ vat_json_free (&node->array[i]);
+ }
+ if (NULL != node->array)
+ {
+ vec_free (node->array);
+ }
+ break;
+ case VAT_JSON_STRING:
+ if (NULL != node->string)
+ {
+ vec_free (node->string);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/json_format.h b/src/vpp/api/json_format.h
new file mode 100644
index 00000000..154fb3df
--- /dev/null
+++ b/src/vpp/api/json_format.h
@@ -0,0 +1,254 @@
+/*
+ *------------------------------------------------------------------
+ * json_format.h
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __JSON_FORMAT_H__
+#define __JSON_FORMAT_H__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <netinet/ip.h>
+
+/* JSON value type */
+typedef enum
+{
+ VAT_JSON_NONE,
+ VAT_JSON_OBJECT,
+ VAT_JSON_ARRAY,
+ VAT_JSON_STRING,
+ VAT_JSON_REAL,
+ VAT_JSON_UINT,
+ VAT_JSON_INT,
+ VAT_JSON_IPV4,
+ VAT_JSON_IPV6,
+ VAT_JSON_MAX
+} vat_json_val_type_t;
+
+typedef struct vat_json_node_s vat_json_node_t;
+typedef struct vat_json_pair_s vat_json_pair_t;
+
+/* JSON object structure */
+struct vat_json_node_s
+{
+ vat_json_val_type_t type;
+ union
+ {
+ vat_json_pair_t *pairs;
+ vat_json_node_t *array;
+ u8 *string;
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ u64 uint;
+ i64 sint;
+ f64 real;
+ };
+};
+
+struct vat_json_pair_s
+{
+ const char *name;
+ vat_json_node_t value;
+};
+
+void vat_json_print (FILE * ofp, vat_json_node_t * node);
+void vat_json_free (vat_json_node_t * node);
+
+static_always_inline void
+vat_json_init_object (vat_json_node_t * json)
+{
+ json->type = VAT_JSON_OBJECT;
+ json->pairs = NULL;
+}
+
+static_always_inline void
+vat_json_init_array (vat_json_node_t * json)
+{
+ json->type = VAT_JSON_ARRAY;
+ json->array = NULL;
+}
+
+static_always_inline void
+vat_json_set_string (vat_json_node_t * json, u8 * str)
+{
+ json->type = VAT_JSON_STRING;
+ json->string = str;
+}
+
+static_always_inline void
+vat_json_set_string_copy (vat_json_node_t * json, const u8 * str)
+{
+ u8 *ns = NULL;
+ vec_validate (ns, strlen ((const char *) str));
+ strcpy ((char *) ns, (const char *) str);
+ vec_add1 (ns, '\0');
+ vat_json_set_string (json, ns);
+}
+
+static_always_inline void
+vat_json_set_int (vat_json_node_t * json, i64 num)
+{
+ json->type = VAT_JSON_INT;
+ json->sint = num;
+}
+
+static_always_inline void
+vat_json_set_uint (vat_json_node_t * json, u64 num)
+{
+ json->type = VAT_JSON_UINT;
+ json->uint = num;
+}
+
+static_always_inline void
+vat_json_set_real (vat_json_node_t * json, f64 real)
+{
+ json->type = VAT_JSON_REAL;
+ json->real = real;
+}
+
+static_always_inline void
+vat_json_set_ip4 (vat_json_node_t * json, struct in_addr ip4)
+{
+ json->type = VAT_JSON_IPV4;
+ json->ip4 = ip4;
+}
+
+static_always_inline void
+vat_json_set_ip6 (vat_json_node_t * json, struct in6_addr ip6)
+{
+ json->type = VAT_JSON_IPV6;
+ json->ip6 = ip6;
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_add (vat_json_node_t * json, const char *name)
+{
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ uword pos = vec_len (json->pairs);
+ vec_validate (json->pairs, pos);
+ json->pairs[pos].name = name;
+ return &json->pairs[pos].value;
+}
+
+static_always_inline vat_json_node_t *
+vat_json_array_add (vat_json_node_t * json)
+{
+ ASSERT (VAT_JSON_ARRAY == json->type);
+ uword pos = vec_len (json->array);
+ vec_validate (json->array, pos);
+ return &json->array[pos];
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_add_list (vat_json_node_t * json, const char *name)
+{
+ vat_json_node_t *array_node = vat_json_object_add (json, name);
+ vat_json_init_array (array_node);
+ return array_node;
+}
+
+static_always_inline void
+vat_json_object_add_string_copy (vat_json_node_t * json,
+ const char *name, u8 * str)
+{
+ vat_json_set_string_copy (vat_json_object_add (json, name), str);
+}
+
+static_always_inline void
+vat_json_object_add_uint (vat_json_node_t * json,
+ const char *name, u64 number)
+{
+ vat_json_set_uint (vat_json_object_add (json, name), number);
+}
+
+static_always_inline void
+vat_json_object_add_int (vat_json_node_t * json, const char *name, i64 number)
+{
+ vat_json_set_int (vat_json_object_add (json, name), number);
+}
+
+static_always_inline void
+vat_json_object_add_real (vat_json_node_t * json, const char *name, f64 real)
+{
+ vat_json_set_real (vat_json_object_add (json, name), real);
+}
+
+static_always_inline void
+vat_json_object_add_ip4 (vat_json_node_t * json,
+ const char *name, struct in_addr ip4)
+{
+ vat_json_set_ip4 (vat_json_object_add (json, name), ip4);
+}
+
+static_always_inline void
+vat_json_object_add_ip6 (vat_json_node_t * json,
+ const char *name, struct in6_addr ip6)
+{
+ vat_json_set_ip6 (vat_json_object_add (json, name), ip6);
+}
+
+static_always_inline void
+vat_json_array_add_int (vat_json_node_t * json, i64 number)
+{
+ vat_json_set_int (vat_json_array_add (json), number);
+}
+
+static_always_inline void
+vat_json_array_add_uint (vat_json_node_t * json, u64 number)
+{
+ vat_json_set_uint (vat_json_array_add (json), number);
+}
+
+static_always_inline void
+vat_json_object_add_bytes (vat_json_node_t * json,
+ const char *name, u8 * array, uword size)
+{
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ vat_json_node_t *json_array = vat_json_object_add (json, name);
+ vat_json_init_array (json_array);
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ vat_json_array_add_uint (json_array, array[i]);
+ }
+}
+
+static_always_inline vat_json_node_t *
+vat_json_object_get_element (vat_json_node_t * json, const char *name)
+{
+ int i = 0;
+
+ ASSERT (VAT_JSON_OBJECT == json->type);
+ for (i = 0; i < vec_len (json->pairs); i++)
+ {
+ if (0 == strcmp (json->pairs[i].name, name))
+ {
+ return &json->pairs[i].value;
+ }
+ }
+ return NULL;
+}
+
+#endif /* __JSON_FORMAT_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/plugin.c b/src/vpp/api/plugin.c
new file mode 100644
index 00000000..c1cc928c
--- /dev/null
+++ b/src/vpp/api/plugin.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * plugin.c: plugin handling
+ */
+
+#include <vat/vat.h>
+#include <vat/plugin.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vat_plugin_main;
+
+static int
+load_one_plugin (plugin_main_t * pm, plugin_info_t * pi)
+{
+ void *handle, *register_handle;
+ clib_error_t *(*fp) (vat_main_t *);
+ clib_error_t *error;
+
+ handle = dlopen ((char *) pi->name, RTLD_LAZY);
+
+ /*
+ * Note: this can happen if the plugin has an undefined symbol reference,
+ * so print a warning. Otherwise, the poor slob won't know what happened.
+ * Ask me how I know that...
+ */
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror ());
+ return -1;
+ }
+
+ pi->handle = handle;
+
+ register_handle = dlsym (pi->handle, "vat_plugin_register");
+ if (register_handle == 0)
+ return 0;
+
+ fp = register_handle;
+
+ error = (*fp) (pm->vat_main);
+
+ if (error)
+ {
+ clib_error_report (error);
+ dlclose (handle);
+ return 1;
+ }
+
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+}
+
+static u8 **
+split_plugin_path (plugin_main_t * pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1 (this, path[i]);
+ continue;
+ }
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+int
+vat_load_new_plugins (plugin_main_t * pm)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *) plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ plugin_name = format (0, "%s/%s%c", plugin_path[i],
+ entry->d_name, 0);
+
+ /* unreadable */
+ if (stat ((char *) plugin_name, &statb) < 0)
+ {
+ ignore:
+ vec_free (plugin_name);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG (statb.st_mode))
+ goto ignore;
+
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->file_info = statb;
+
+ if (load_one_plugin (pm, pi))
+ {
+ vec_free (plugin_name);
+ _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ continue;
+ }
+ memset (pi, 0, sizeof (*pi));
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+ return 0;
+}
+
+#define QUOTE_(x) #x
+#define QUOTE(x) QUOTE_(x)
+
+/*
+ * Load plugins from /usr/lib/vpp_api_test_plugins by default
+ */
+char *vat_plugin_path = "/usr/lib/vpp_api_test_plugins";
+
+char *vat_plugin_name_filter = 0;
+
+int
+vat_plugin_init (vat_main_t * vam)
+{
+ plugin_main_t *pm = &vat_plugin_main;
+
+
+ pm->plugin_path = format (0, "%s%c", vat_plugin_path, 0);
+ if (vat_plugin_name_filter)
+ pm->plugin_name_filter = format (0, "%s%c", vat_plugin_name_filter, 0);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vat_main = vam;
+
+ return vat_load_new_plugins (pm);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/plugin.h b/src/vpp/api/plugin.h
new file mode 100644
index 00000000..559ec52f
--- /dev/null
+++ b/src/vpp/api/plugin.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * plugin.h: plugin handling
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+typedef struct
+{
+ u8 *name;
+ struct stat file_info;
+ void *handle;
+} plugin_info_t;
+
+typedef struct
+{
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* convenience */
+ vat_main_t *vat_main;
+
+} plugin_main_t;
+
+plugin_main_t vat_plugin_main;
+
+int vat_plugin_init (vat_main_t * vam);
+int vat_load_new_plugins (plugin_main_t * pm);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/summary_stats_client.c b/src/vpp/api/summary_stats_client.c
new file mode 100644
index 00000000..2c81d667
--- /dev/null
+++ b/src/vpp/api/summary_stats_client.c
@@ -0,0 +1,302 @@
+/*
+ *------------------------------------------------------------------
+ * summary_stats_client -
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#include <vnet/ip/ip.h>
+
+#define f64_endian(a)
+#define f64_print(a,b)
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+vl_shmem_hdr_t *shmem_hdr;
+
+typedef struct
+{
+ volatile int sigterm_received;
+
+ struct sockaddr_in send_data_addr;
+ int send_data_socket;
+ u8 *display_name;
+
+ /* convenience */
+ unix_shared_memory_queue_t *vl_input_queue;
+ u32 my_client_index;
+} test_main_t;
+
+test_main_t test_main;
+
+/*
+ * Satisfy external references when -lvlib is not available.
+ */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("vlib_cli_output callled...");
+}
+
+
+static void
+ vl_api_vnet_get_summary_stats_reply_t_handler
+ (vl_api_vnet_get_summary_stats_reply_t * mp)
+{
+ test_main_t *tm = &test_main;
+ static u8 *sb;
+ int n;
+
+ printf ("total rx pkts %llu, total rx bytes %llu\n",
+ (unsigned long long) mp->total_pkts[0],
+ (unsigned long long) mp->total_bytes[0]);
+ printf ("total tx pkts %llu, total tx bytes %llu\n",
+ (unsigned long long) mp->total_pkts[1],
+ (unsigned long long) mp->total_bytes[1]);
+ printf ("vector rate %.2f\n", mp->vector_rate);
+
+ vec_reset_length (sb);
+ sb = format (sb, "%v,%.0f,%llu,%llu,%llu,%llu\n%c",
+ tm->display_name, mp->vector_rate,
+ (unsigned long long) mp->total_pkts[0],
+ (unsigned long long) mp->total_bytes[0],
+ (unsigned long long) mp->total_pkts[1],
+ (unsigned long long) mp->total_bytes[1], 0);
+
+ n = sendto (tm->send_data_socket, sb, vec_len (sb),
+ 0, (struct sockaddr *) &tm->send_data_addr,
+ sizeof (tm->send_data_addr));
+
+ if (n != vec_len (sb))
+ clib_unix_warning ("sendto");
+
+}
+
+#define foreach_api_msg \
+_(VNET_GET_SUMMARY_STATS_REPLY, vnet_get_summary_stats_reply)
+
+int
+connect_to_vpe (char *name)
+{
+ int rv = 0;
+
+ rv = vl_client_connect_to_vlib ("/vpe-api", name, 32);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_msg;
+#undef _
+
+ shmem_hdr = api_main.shmem_hdr;
+
+ return rv;
+}
+
+int
+disconnect_from_vpe (void)
+{
+ vl_client_disconnect_from_vlib ();
+ return 0;
+}
+
+static void
+sigterm_handler (int sig)
+{
+ test_main_t *tm = &test_main;
+ tm->sigterm_received = 1;
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+int
+main (int argc, char **argv)
+{
+ api_main_t *am = &api_main;
+ test_main_t *tm = &test_main;
+ vl_api_vnet_get_summary_stats_t *mp;
+ unformat_input_t _input, *input = &_input;
+ clib_error_t *error = 0;
+ ip4_address_t collector_ip;
+ u8 *display_name = 0;
+ u16 collector_port = 7654;
+
+ collector_ip.as_u32 = (u32) ~ 0;
+
+ unformat_init_command_line (input, argv);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "collector-ip %U",
+ unformat_ip4_address, &collector_ip))
+ ;
+ else if (unformat (input, "display-name %v", &display_name))
+ ;
+ else if (unformat (input, "collector-port %d", &collector_port))
+ ;
+ else
+ {
+ error =
+ clib_error_return
+ (0, "Usage: %s collector-ip <ip>\n"
+ " [display-name <string>] [collector-port <num>]\n"
+ " port defaults to 7654", argv[0]);
+ break;
+ }
+ }
+
+ if (error == 0 && collector_ip.as_u32 == (u32) ~ 0)
+ error = clib_error_return (0, "collector-ip not set...\n");
+
+
+ if (error)
+ {
+ clib_error_report (error);
+ exit (1);
+ }
+
+ if (display_name == 0)
+ {
+ display_name = format (0, "vpe-to-%d.%d.%d.%d",
+ collector_ip.as_u8[0],
+ collector_ip.as_u8[1],
+ collector_ip.as_u8[2], collector_ip.as_u8[3]);
+ }
+
+
+ connect_to_vpe ("test_client");
+
+ tm->vl_input_queue = shmem_hdr->vl_input_queue;
+ tm->my_client_index = am->my_client_index;
+ tm->display_name = display_name;
+
+ signal (SIGTERM, sigterm_handler);
+ signal (SIGINT, sigterm_handler);
+ signal (SIGQUIT, sigterm_handler);
+
+ /* data (multicast) RX socket */
+ tm->send_data_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (tm->send_data_socket < 0)
+ {
+ clib_unix_warning (0, "data_rx_socket");
+ exit (1);
+ }
+
+ memset (&tm->send_data_addr, 0, sizeof (tm->send_data_addr));
+ tm->send_data_addr.sin_family = AF_INET;
+ tm->send_data_addr.sin_addr.s_addr = collector_ip.as_u32;
+ tm->send_data_addr.sin_port = htons (collector_port);
+
+ fformat (stdout, "Send SIGINT or SIGTERM to quit...\n");
+
+ while (1)
+ {
+ sleep (5);
+
+ if (tm->sigterm_received)
+ break;
+ /* Poll for stats */
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS);
+ mp->client_index = tm->my_client_index;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+ }
+
+ fformat (stdout, "Exiting...\n");
+
+ disconnect_from_vpe ();
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c
new file mode 100644
index 00000000..844b9702
--- /dev/null
+++ b/src/vpp/api/test_client.c
@@ -0,0 +1,1526 @@
+/*
+ *------------------------------------------------------------------
+ * api.c - message handler registration
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+#define f64_endian(a)
+#define f64_print(a,b)
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+vl_shmem_hdr_t *shmem_hdr;
+
+typedef struct
+{
+ int link_events_on;
+ int stats_on;
+ int oam_events_on;
+
+ /* convenience */
+ unix_shared_memory_queue_t *vl_input_queue;
+ u32 my_client_index;
+} test_main_t;
+
+test_main_t test_main;
+
+/*
+ * Satisfy external references when -lvlib is not available.
+ */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("vlib_cli_output callled...");
+}
+
+u8 *
+format_ethernet_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+static void
+vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp)
+{
+ char *duplex, *speed;
+
+ switch (mp->link_duplex << VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT)
+ {
+ case VNET_HW_INTERFACE_FLAG_HALF_DUPLEX:
+ duplex = "half";
+ break;
+ case VNET_HW_INTERFACE_FLAG_FULL_DUPLEX:
+ duplex = "full";
+ break;
+ default:
+ duplex = "bogus";
+ break;
+ }
+ switch (mp->link_speed << VNET_HW_INTERFACE_FLAG_SPEED_SHIFT)
+ {
+ case VNET_HW_INTERFACE_FLAG_SPEED_10M:
+ speed = "10Mbps";
+ break;
+ case VNET_HW_INTERFACE_FLAG_SPEED_100M:
+ speed = "100Mbps";
+ break;
+ case VNET_HW_INTERFACE_FLAG_SPEED_1G:
+ speed = "1Gbps";
+ break;
+ case VNET_HW_INTERFACE_FLAG_SPEED_10G:
+ speed = "10Gbps";
+ break;
+ case VNET_HW_INTERFACE_FLAG_SPEED_40G:
+ speed = "40Gbps";
+ break;
+ case VNET_HW_INTERFACE_FLAG_SPEED_100G:
+ speed = "100Gbps";
+ break;
+ default:
+ speed = "bogus";
+ break;
+ }
+ fformat (stdout, "details: %s sw_if_index %d sup_sw_if_index %d "
+ "link_duplex %s link_speed %s",
+ mp->interface_name, ntohl (mp->sw_if_index),
+ ntohl (mp->sup_sw_if_index), duplex, speed);
+
+ if (mp->l2_address_length)
+ fformat (stdout, " l2 address: %U\n",
+ format_ethernet_address, mp->l2_address);
+ else
+ fformat (stdout, "\n");
+}
+
+static void
+vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp)
+{
+ fformat (stdout, "set flags: sw_if_index %d, admin %s link %s\n",
+ ntohl (mp->sw_if_index),
+ mp->admin_up_down ? "up" : "down",
+ mp->link_up_down ? "up" : "down");
+}
+
+static void
+ vl_api_sw_interface_set_flags_reply_t_handler
+ (vl_api_sw_interface_set_flags_reply_t * mp)
+{
+ fformat (stdout, "set flags reply: reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_want_interface_events_reply_t_handler
+ (vl_api_want_interface_events_reply_t * mp)
+{
+}
+
+static void
+vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp)
+{
+ fformat (stdout, "want stats reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_want_oam_events_reply_t_handler (vl_api_want_oam_events_reply_t * mp)
+{
+ fformat (stdout, "want oam reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_ip_add_del_route_reply_t_handler (vl_api_ip_add_del_route_reply_t * mp)
+{
+ fformat (stdout, "add_route reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_add_del_address_reply_t_handler
+ (vl_api_sw_interface_add_del_address_reply_t * mp)
+{
+ fformat (stdout, "add_del_address reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_set_table_reply_t_handler
+ (vl_api_sw_interface_set_table_reply_t * mp)
+{
+ fformat (stdout, "set_table reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_tap_connect_reply_t_handler (vl_api_tap_connect_reply_t * mp)
+{
+ fformat (stdout, "tap connect reply %d, sw_if_index %d\n",
+ ntohl (mp->retval), ntohl (mp->sw_if_index));
+}
+
+static void
+vl_api_create_vlan_subif_reply_t_handler (vl_api_create_vlan_subif_reply_t *
+ mp)
+{
+ fformat (stdout, "create vlan subif reply %d, sw_if_index %d\n",
+ ntohl (mp->retval), ntohl (mp->sw_if_index));
+}
+
+static void vl_api_proxy_arp_add_del_reply_t_handler
+ (vl_api_proxy_arp_add_del_reply_t * mp)
+{
+ fformat (stdout, "add del proxy arp reply %d\n", ntohl (mp->retval));
+}
+
+static void vl_api_proxy_arp_intfc_enable_disable_reply_t_handler
+ (vl_api_proxy_arp_intfc_enable_disable_reply_t * mp)
+{
+ fformat (stdout, "proxy arp intfc ena/dis reply %d\n", ntohl (mp->retval));
+}
+
+static void vl_api_ip_neighbor_add_del_reply_t_handler
+ (vl_api_ip_neighbor_add_del_reply_t * mp)
+{
+ fformat (stdout, "ip neighbor add del reply %d\n", ntohl (mp->retval));
+}
+
+#if 0
+static void
+vl_api_vnet_interface_counters_t_handler (vl_api_vnet_interface_counters_t *
+ mp)
+{
+ char *counter_name;
+ u32 count, sw_if_index;
+ int i;
+
+ count = ntohl (mp->count);
+ sw_if_index = ntohl (mp->first_sw_if_index);
+ if (mp->is_combined == 0)
+ {
+ u64 *vp, v;
+ vp = (u64 *) mp->data;
+
+ switch (mp->vnet_counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_DROP:
+ counter_name = "drop";
+ break;
+ case VNET_INTERFACE_COUNTER_PUNT:
+ counter_name = "punt";
+ break;
+ case VNET_INTERFACE_COUNTER_IP4:
+ counter_name = "ip4";
+ break;
+ case VNET_INTERFACE_COUNTER_IP6:
+ counter_name = "ip6";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_NO_BUF:
+ counter_name = "rx-no-buf";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_MISS:
+ counter_name = "rx-miss";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_ERROR:
+ counter_name = "rx-error";
+ break;
+ case VNET_INTERFACE_COUNTER_TX_ERROR:
+ counter_name = "tx-error (fifo-full)";
+ break;
+ default:
+ counter_name = "bogus";
+ break;
+ }
+ for (i = 0; i < count; i++)
+ {
+ v = clib_mem_unaligned (vp, u64);
+ v = clib_net_to_host_u64 (v);
+ vp++;
+ fformat (stdout, "%d.%s %lld\n", sw_if_index, counter_name, v);
+ sw_if_index++;
+ }
+ }
+ else
+ {
+ vlib_counter_t *vp;
+ u64 packets, bytes;
+ vp = (vlib_counter_t *) mp->data;
+
+ switch (mp->vnet_counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_RX:
+ counter_name = "rx";
+ break;
+ case VNET_INTERFACE_COUNTER_TX:
+ counter_name = "tx";
+ break;
+ default:
+ counter_name = "bogus";
+ break;
+ }
+ for (i = 0; i < count; i++)
+ {
+ packets = clib_mem_unaligned (&vp->packets, u64);
+ packets = clib_net_to_host_u64 (packets);
+ bytes = clib_mem_unaligned (&vp->bytes, u64);
+ bytes = clib_net_to_host_u64 (bytes);
+ vp++;
+ fformat (stdout, "%d.%s.packets %lld\n",
+ sw_if_index, counter_name, packets);
+ fformat (stdout, "%d.%s.bytes %lld\n",
+ sw_if_index, counter_name, bytes);
+ sw_if_index++;
+ }
+ }
+}
+#endif
+
+/* Format an IP4 address. */
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP4 route destination and length. */
+u8 *
+format_ip4_address_and_length (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip4_address, a, l);
+}
+
+static void
+vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp)
+{
+ int i;
+ vl_api_ip4_fib_counter_t *ctrp;
+ u32 count;
+
+ count = ntohl (mp->count);
+
+ fformat (stdout, "fib id %d, count this msg %d\n",
+ ntohl (mp->vrf_id), count);
+
+ ctrp = mp->c;
+ for (i = 0; i < count; i++)
+ {
+ fformat (stdout, "%U: %lld packets, %lld bytes\n",
+ format_ip4_address_and_length, &ctrp->address,
+ (u32) ctrp->address_length,
+ clib_net_to_host_u64 (ctrp->packets),
+ clib_net_to_host_u64 (ctrp->bytes));
+ ctrp++;
+ }
+}
+
+/* Format an IP6 address. */
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros)
+ || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP6 route destination and length. */
+u8 *
+format_ip6_address_and_length (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip6_address, a, l);
+}
+
+static void
+vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp)
+{
+ int i;
+ vl_api_ip6_fib_counter_t *ctrp;
+ u32 count;
+
+ count = ntohl (mp->count);
+
+ fformat (stdout, "fib id %d, count this msg %d\n",
+ ntohl (mp->vrf_id), count);
+
+ ctrp = mp->c;
+ for (i = 0; i < count; i++)
+ {
+ fformat (stdout, "%U: %lld packets, %lld bytes\n",
+ format_ip6_address_and_length, &ctrp->address,
+ (u32) ctrp->address_length,
+ clib_net_to_host_u64 (ctrp->packets),
+ clib_net_to_host_u64 (ctrp->bytes));
+ ctrp++;
+ }
+}
+
+static void
+vl_api_oam_event_t_handler (vl_api_oam_event_t * mp)
+{
+ fformat (stdout, "OAM: %U now %s\n",
+ format_ip4_address, &mp->dst_address,
+ mp->state == 1 ? "alive" : "dead");
+}
+
+static void
+vl_api_oam_add_del_reply_t_handler (vl_api_oam_add_del_reply_t * mp)
+{
+ fformat (stdout, "oam add del reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_reset_fib_reply_t_handler (vl_api_reset_fib_reply_t * mp)
+{
+ fformat (stdout, "fib reset reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_dhcp_proxy_set_vss_reply_t_handler (vl_api_dhcp_proxy_set_vss_reply_t *
+ mp)
+{
+ fformat (stdout, "dhcp proxy set vss reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_dhcp_proxy_config_reply_t_handler (vl_api_dhcp_proxy_config_reply_t *
+ mp)
+{
+ fformat (stdout, "dhcp proxy config reply %d\n", ntohl (mp->retval));
+}
+
+static void
+vl_api_set_ip_flow_hash_reply_t_handler (vl_api_set_ip_flow_hash_reply_t * mp)
+{
+ fformat (stdout, "set ip flow hash reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_config_reply_t_handler
+ (vl_api_sw_interface_ip6nd_ra_config_reply_t * mp)
+{
+ fformat (stdout, "ip6 nd ra-config reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_prefix_reply_t_handler
+ (vl_api_sw_interface_ip6nd_ra_prefix_reply_t * mp)
+{
+ fformat (stdout, "ip6 nd ra-prefix reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_ip6_enable_disable_reply_t_handler
+ (vl_api_sw_interface_ip6_enable_disable_reply_t * mp)
+{
+ fformat (stdout, "ip6 enable/disable reply %d\n", ntohl (mp->retval));
+}
+
+static void
+ vl_api_sw_interface_ip6_set_link_local_address_reply_t_handler
+ (vl_api_sw_interface_ip6_set_link_local_address_reply_t * mp)
+{
+ fformat (stdout, "ip6 set link-local address reply %d\n",
+ ntohl (mp->retval));
+}
+
+static void vl_api_create_loopback_reply_t_handler
+ (vl_api_create_loopback_reply_t * mp)
+{
+ fformat (stdout, "create loopback status %d, sw_if_index %d\n",
+ ntohl (mp->retval), ntohl (mp->sw_if_index));
+}
+
+static void vl_api_create_loopback_instance_reply_t_handler
+ (vl_api_create_loopback_instance_reply_t * mp)
+{
+ fformat (stdout, "create loopback status %d, sw_if_index %d\n",
+ ntohl (mp->retval), ntohl (mp->sw_if_index));
+}
+
+static void vl_api_l2_patch_add_del_reply_t_handler
+ (vl_api_l2_patch_add_del_reply_t * mp)
+{
+ fformat (stdout, "l2 patch reply %d\n", ntohl (mp->retval));
+}
+
+static void vl_api_sw_interface_set_l2_xconnect_reply_t_handler
+ (vl_api_sw_interface_set_l2_xconnect_reply_t * mp)
+{
+ fformat (stdout, "l2_xconnect reply %d\n", ntohl (mp->retval));
+}
+
+static void vl_api_sw_interface_set_l2_bridge_reply_t_handler
+ (vl_api_sw_interface_set_l2_bridge_reply_t * mp)
+{
+ fformat (stdout, "l2_bridge reply %d\n", ntohl (mp->retval));
+}
+
+static void
+noop_handler (void *notused)
+{
+}
+
+#define vl_api_vnet_ip4_fib_counters_t_endian noop_handler
+#define vl_api_vnet_ip4_fib_counters_t_print noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_endian noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_print noop_handler
+
+#define foreach_api_msg \
+_(SW_INTERFACE_DETAILS, sw_interface_details) \
+_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
+_(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \
+_(WANT_INTERFACE_EVENTS_REPLY, want_interface_events_reply) \
+_(WANT_STATS_REPLY, want_stats_reply) \
+_(WANT_OAM_EVENTS_REPLY, want_oam_events_reply) \
+_(OAM_EVENT, oam_event) \
+_(OAM_ADD_DEL_REPLY, oam_add_del_reply) \
+_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \
+_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \
+_(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \
+_(SW_INTERFACE_ADD_DEL_ADDRESS_REPLY, sw_interface_add_del_address_reply) \
+_(SW_INTERFACE_SET_TABLE_REPLY, sw_interface_set_table_reply) \
+_(TAP_CONNECT_REPLY, tap_connect_reply) \
+_(CREATE_VLAN_SUBIF_REPLY, create_vlan_subif_reply) \
+_(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply) \
+_(PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY, proxy_arp_intfc_enable_disable_reply) \
+_(IP_NEIGHBOR_ADD_DEL_REPLY, ip_neighbor_add_del_reply) \
+_(RESET_FIB_REPLY, reset_fib_reply) \
+_(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply) \
+_(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply) \
+_(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG_REPLY, sw_interface_ip6nd_ra_config_reply) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX_REPLY, sw_interface_ip6nd_ra_prefix_reply) \
+_(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, sw_interface_ip6_enable_disable_reply) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, sw_interface_ip6_set_link_local_address_reply) \
+ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \
+ _(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \
+_(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \
+_(SW_INTERFACE_SET_L2_XCONNECT_REPLY, sw_interface_set_l2_xconnect_reply) \
+_(SW_INTERFACE_SET_L2_BRIDGE_REPLY, sw_interface_set_l2_bridge_reply)
+
+int
+connect_to_vpe (char *name)
+{
+ int rv = 0;
+
+ rv = vl_client_connect_to_vlib ("/vpe-api", name, 32);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_msg;
+#undef _
+
+ shmem_hdr = api_main.shmem_hdr;
+
+ return rv;
+}
+
+int
+disconnect_from_vpe (void)
+{
+ vl_client_disconnect_from_vlib ();
+ return 0;
+}
+
+void
+link_up_down_enable_disable (test_main_t * tm, int enable)
+{
+ vl_api_want_interface_events_t *mp;
+
+ /* Request admin / link up down messages */
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_EVENTS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->enable_disable = enable;
+ mp->pid = getpid ();
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+ tm->link_events_on = enable;
+}
+
+void
+stats_enable_disable (test_main_t * tm, int enable)
+{
+ vl_api_want_stats_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_WANT_STATS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->enable_disable = enable;
+ mp->pid = getpid ();
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+ tm->stats_on = enable;
+}
+
+void
+oam_events_enable_disable (test_main_t * tm, int enable)
+{
+ vl_api_want_oam_events_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_WANT_OAM_EVENTS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->enable_disable = enable;
+ mp->pid = getpid ();
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+ tm->oam_events_on = enable;
+}
+
+void
+oam_add_del (test_main_t * tm, int is_add)
+{
+ vl_api_oam_add_del_t *mp;
+ ip4_address_t tmp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_OAM_ADD_DEL);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->is_add = is_add;
+
+ tmp.as_u32 = ntohl (0xc0a80101); /* 192.168.1.1 */
+ clib_memcpy (mp->src_address, tmp.as_u8, 4);
+
+ tmp.as_u32 = ntohl (0xc0a80103); /* 192.168.1.3 */
+ clib_memcpy (mp->dst_address, tmp.as_u8, 4);
+
+ mp->vrf_id = 0;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+dump (test_main_t * tm)
+{
+ vl_api_sw_interface_dump_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_DUMP);
+ mp->client_index = tm->my_client_index;
+ mp->name_filter_valid = 1;
+ strncpy ((char *) mp->name_filter, "eth", sizeof (mp->name_filter) - 1);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_del_ip4_route (test_main_t * tm, int enable_disable)
+{
+ vl_api_ip_add_del_route_t *mp;
+ u32 tmp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->table_id = ntohl (0);
+ mp->create_vrf_if_needed = 1;
+
+ mp->next_hop_sw_if_index = ntohl (5);
+ mp->is_add = enable_disable;
+ mp->next_hop_weight = 1;
+
+ /* Next hop: 6.0.0.1 */
+ tmp = ntohl (0x06000001);
+ clib_memcpy (mp->next_hop_address, &tmp, sizeof (tmp));
+
+ /* Destination: 10.0.0.1/32 */
+ tmp = ntohl (0x0);
+ clib_memcpy (mp->dst_address, &tmp, sizeof (tmp));
+ mp->dst_address_length = 0;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_del_ip6_route (test_main_t * tm, int enable_disable)
+{
+ vl_api_ip_add_del_route_t *mp;
+ u64 tmp[2];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->next_hop_sw_if_index = ntohl (5);
+ mp->is_add = enable_disable;
+ mp->is_ipv6 = 1;
+ mp->next_hop_weight = 1;
+ mp->dst_address_length = 64;
+
+ /* add/del dabe::/64 via db01::11 */
+
+ tmp[0] = clib_host_to_net_u64 (0xdabe000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x0ULL);
+ clib_memcpy (mp->dst_address, &tmp[0], 8);
+ clib_memcpy (&mp->dst_address[8], &tmp[1], 8);
+
+ tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x11ULL);
+ clib_memcpy (mp->next_hop_address, &tmp[0], 8);
+ clib_memcpy (&mp->next_hop_address[8], &tmp[1], 8);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_del_interface_address (test_main_t * tm, int enable_disable)
+{
+ vl_api_sw_interface_add_del_address_t *mp;
+ u32 tmp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->is_add = enable_disable;
+ mp->address_length = 8;
+
+ tmp = ntohl (0x01020304);
+ clib_memcpy (mp->address, &tmp, 4);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_del_v6_interface_address (test_main_t * tm, int enable_disable)
+{
+ vl_api_sw_interface_add_del_address_t *mp;
+ u64 tmp[2];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->is_ipv6 = 1;
+ mp->sw_if_index = ntohl (5);
+ mp->is_add = enable_disable;
+ mp->address_length = 64;
+
+ tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x11ULL);
+
+ clib_memcpy (mp->address, &tmp[0], 8);
+ clib_memcpy (&mp->address[8], &tmp[1], 8);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+del_all_interface_addresses (test_main_t * tm)
+{
+ vl_api_sw_interface_add_del_address_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->del_all = 1;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+set_interface_table (test_main_t * tm, int is_ipv6, u32 vrf_id)
+{
+ vl_api_sw_interface_set_table_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_TABLE);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->is_ipv6 = is_ipv6;
+ mp->vrf_id = ntohl (vrf_id);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+connect_unix_tap (test_main_t * tm, char *name)
+{
+ vl_api_tap_connect_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_TAP_CONNECT);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ strncpy ((char *) mp->tap_name, name, sizeof (mp->tap_name) - 1);
+ mp->use_random_mac = 1;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+create_vlan_subif (test_main_t * tm, u32 vlan_id)
+{
+ vl_api_create_vlan_subif_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_CREATE_VLAN_SUBIF);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->vlan_id = ntohl (vlan_id);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_del_proxy_arp (test_main_t * tm, int is_add)
+{
+ vl_api_proxy_arp_add_del_t *mp;
+ u32 tmp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_ADD_DEL);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->vrf_id = ntohl (11);
+ mp->is_add = is_add;
+
+ /* proxy fib 11, 1.1.1.1 -> 1.1.1.10 */
+ tmp = ntohl (0x01010101);
+ clib_memcpy (mp->low_address, &tmp, 4);
+
+ tmp = ntohl (0x0101010a);
+ clib_memcpy (mp->hi_address, &tmp, 4);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+proxy_arp_intfc_enable_disable (test_main_t * tm, int enable_disable)
+{
+ vl_api_proxy_arp_intfc_enable_disable_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (6);
+ mp->enable_disable = enable_disable;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_ip4_neighbor (test_main_t * tm, int add_del)
+{
+ vl_api_ip_neighbor_add_del_t *mp;
+ u32 tmp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (6);
+ mp->is_add = add_del;
+
+ memset (mp->mac_address, 0xbe, sizeof (mp->mac_address));
+
+ tmp = ntohl (0x0101010a);
+ clib_memcpy (mp->dst_address, &tmp, 4);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+add_ip6_neighbor (test_main_t * tm, int add_del)
+{
+ vl_api_ip_neighbor_add_del_t *mp;
+ u64 tmp[2];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (6);
+ mp->is_add = add_del;
+ mp->is_ipv6 = 1;
+
+ memset (mp->mac_address, 0xbe, sizeof (mp->mac_address));
+
+ tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x11ULL);
+
+ clib_memcpy (mp->dst_address, &tmp[0], 8);
+ clib_memcpy (&mp->dst_address[8], &tmp[1], 8);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+reset_fib (test_main_t * tm, u8 is_ip6)
+{
+ vl_api_reset_fib_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_RESET_FIB);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->vrf_id = ntohl (11);
+ mp->is_ipv6 = is_ip6;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+dhcpv6_set_vss (test_main_t * tm)
+{
+ vl_api_dhcp_proxy_set_vss_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->oui = ntohl (6);
+ mp->fib_id = ntohl (60);
+ mp->is_add = 1;
+ mp->is_ipv6 = 1;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+dhcpv4_set_vss (test_main_t * tm)
+{
+ vl_api_dhcp_proxy_set_vss_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->oui = ntohl (4);
+ mp->fib_id = ntohl (40);
+ mp->is_add = 1;
+ mp->is_ipv6 = 0;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+dhcp_set_vss (test_main_t * tm)
+{
+ dhcpv4_set_vss (tm);
+ dhcpv6_set_vss (tm);
+}
+
+void
+dhcp_set_proxy (test_main_t * tm, int ipv6)
+{
+ vl_api_dhcp_proxy_config_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_CONFIG);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->is_ipv6 = ipv6;
+ mp->is_add = 1;
+ mp->dhcp_server[0] = 0x20;
+ mp->dhcp_server[1] = 0x01;
+ mp->dhcp_server[2] = 0xab;
+ mp->dhcp_server[3] = 0xcd;
+ mp->dhcp_server[4] = 0x12;
+ mp->dhcp_server[5] = 0x34;
+ mp->dhcp_server[6] = 0xfe;
+ mp->dhcp_server[7] = 0xdc;
+ mp->dhcp_server[14] = 0;
+ mp->dhcp_server[15] = 0x2;
+
+ mp->dhcp_src_address[0] = 0x20;
+ mp->dhcp_src_address[1] = 0x01;
+ mp->dhcp_src_address[2] = 0xab;
+ mp->dhcp_src_address[3] = 0xcd;
+ mp->dhcp_src_address[4] = 0x12;
+ mp->dhcp_src_address[5] = 0x34;
+ mp->dhcp_src_address[6] = 0x56;
+ mp->dhcp_src_address[7] = 0x78;
+ mp->dhcp_src_address[14] = 0;
+ mp->dhcp_src_address[15] = 0x2;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+set_ip_flow_hash (test_main_t * tm, u8 is_ip6)
+{
+ vl_api_set_ip_flow_hash_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SET_IP_FLOW_HASH);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->vrf_id = 0;
+ mp->is_ipv6 = is_ip6;
+ mp->dst = 1;
+ mp->reverse = 1;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+ip6nd_ra_config (test_main_t * tm, int is_no)
+{
+ vl_api_sw_interface_ip6nd_ra_config_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->is_no = is_no;
+
+ mp->suppress = 1;
+
+
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG);
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+ip6nd_ra_prefix (test_main_t * tm, int is_no)
+{
+ vl_api_sw_interface_ip6nd_ra_prefix_t *mp;
+ u64 tmp[2];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->is_no = is_no;
+
+ mp->use_default = 1;
+
+
+ tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x11ULL);
+
+
+ clib_memcpy (mp->address, &tmp[0], 8);
+ clib_memcpy (&mp->address[8], &tmp[1], 8);
+
+ mp->address_length = 64;
+
+
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX);
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+ip6_enable_disable (test_main_t * tm, int enable)
+{
+ vl_api_sw_interface_ip6_enable_disable_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->enable = (enable == 1);;
+
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE);
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+loop_create (test_main_t * tm)
+{
+ vl_api_create_loopback_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = ntohs (VL_API_CREATE_LOOPBACK);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+ip6_set_link_local_address (test_main_t * tm)
+{
+ vl_api_sw_interface_ip6_set_link_local_address_t *mp;
+ u64 tmp[2];
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+
+ tmp[0] = clib_host_to_net_u64 (0xfe80000000000000ULL);
+ tmp[1] = clib_host_to_net_u64 (0x11ULL);
+
+ clib_memcpy (mp->address, &tmp[0], 8);
+ clib_memcpy (&mp->address[8], &tmp[1], 8);
+
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+
+void
+set_flags (test_main_t * tm, int up_down)
+{
+ vl_api_sw_interface_set_flags_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->sw_if_index = ntohl (5);
+ mp->admin_up_down = up_down;
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+
+}
+
+void
+l2_patch_add_del (test_main_t * tm, int is_add)
+{
+ vl_api_l2_patch_add_del_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_L2_PATCH_ADD_DEL);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->is_add = is_add;
+ mp->rx_sw_if_index = ntohl (1);
+ mp->tx_sw_if_index = ntohl (2);
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+l2_xconnect (test_main_t * tm)
+{
+ vl_api_sw_interface_set_l2_xconnect_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_XCONNECT);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->rx_sw_if_index = ntohl (5);
+ mp->tx_sw_if_index = ntohl (6);
+ mp->enable = 1;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+void
+l2_bridge (test_main_t * tm)
+{
+ vl_api_sw_interface_set_l2_bridge_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_BRIDGE);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+ mp->rx_sw_if_index = ntohl (5);
+ mp->bd_id = ntohl (6);
+ mp->bvi = ntohl (1);
+ mp->shg = ntohl (0);
+ mp->enable = 1;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+int
+main (int argc, char **argv)
+{
+ api_main_t *am = &api_main;
+ test_main_t *tm = &test_main;
+ int ch;
+
+ connect_to_vpe ("test_client");
+
+ tm->vl_input_queue = shmem_hdr->vl_input_queue;
+ tm->my_client_index = am->my_client_index;
+
+ fformat (stdout, "Type 'h' for help, 'q' to quit...\n");
+
+ while (1)
+ {
+ ch = getchar ();
+ switch (ch)
+ {
+ case 'q':
+ goto done;
+ case 'd':
+ dump (tm);
+ break;
+ case 'L':
+ link_up_down_enable_disable (tm, 1 /* enable_disable */ );
+ break;
+ case 'l':
+ link_up_down_enable_disable (tm, 0 /* enable_disable */ );
+ break;
+ case 'S':
+ stats_enable_disable (tm, 1 /* enable_disable */ );
+ break;
+ case 's':
+ stats_enable_disable (tm, 0 /* enable_disable */ );
+ break;
+ case '3':
+ add_del_ip4_route (tm, 0 /* add */ );
+ break;
+ case '4':
+ add_del_ip4_route (tm, 1 /* add */ );
+ break;
+ case '5':
+ add_del_ip6_route (tm, 0 /* add */ );
+ break;
+ case '6':
+ add_del_ip6_route (tm, 1 /* add */ );
+ break;
+ case 'A':
+ add_del_interface_address (tm, 1 /* add */ );
+ break;
+ case 'a':
+ add_del_interface_address (tm, 0 /* add */ );
+ break;
+ case 'B':
+ add_del_v6_interface_address (tm, 1 /* add */ );
+ break;
+ case 'b':
+ add_del_v6_interface_address (tm, 0 /* add */ );
+ break;
+ case 'E':
+ l2_patch_add_del (tm, 1 /* is_add */ );
+ break;
+ case 'e':
+ l2_patch_add_del (tm, 0 /* is_add */ );
+ break;
+ case 'z':
+ del_all_interface_addresses (tm);
+ break;
+ case 't':
+ set_interface_table (tm, 0 /* is_ipv6 */ ,
+ 11 /* my amp goes to 11 */ );
+ break;
+ case 'T':
+ set_interface_table (tm, 1 /* is_ipv6 */ ,
+ 12 /* my amp goes to 12 */ );
+ break;
+
+ case 'u':
+ create_vlan_subif (tm, 123);
+ break;
+
+ case 'c':
+ connect_unix_tap (tm, "foo");
+ break;
+
+ case 'n':
+ add_ip4_neighbor (tm, 1 /* is_add */ );
+ add_ip6_neighbor (tm, 1 /* is_add */ );
+ break;
+
+ case 'N':
+ add_ip4_neighbor (tm, 0 /* is_add */ );
+ add_ip6_neighbor (tm, 0 /* is_add */ );
+ break;
+
+ case 'p':
+ add_del_proxy_arp (tm, 1 /* add */ );
+ break;
+
+ case 'i':
+ proxy_arp_intfc_enable_disable (tm, 1 /* enable */ );
+ break;
+
+ case 'O':
+ oam_events_enable_disable (tm, 0 /* enable */ );
+ break;
+
+ case 'o':
+ oam_events_enable_disable (tm, 1 /* enable */ );
+ break;
+
+ case '0':
+ oam_add_del (tm, 0 /* is_add */ );
+ break;
+
+ case '1':
+ oam_add_del (tm, 1 /* is_add */ );
+ break;
+
+ case 'r':
+ reset_fib (tm, 0 /* is_ip6 */ );
+ break;
+
+ case 'R':
+ reset_fib (tm, 1 /* is_ip6 */ );
+ break;
+
+ case 'j':
+ dhcp_set_vss (tm);
+ break;
+
+ case 'k':
+ dhcp_set_proxy (tm, 0);
+ break;
+
+ case 'K':
+ dhcp_set_proxy (tm, 1 /*ipv6 */ );
+ break;
+
+ case 'v':
+ set_ip_flow_hash (tm, 0 /* is_ip6 */ );
+ break;
+
+ case 'V':
+ ip6_set_link_local_address (tm);
+ break;
+
+ case 'w':
+ ip6_enable_disable (tm, 1 /* enable */ );
+ break;
+
+ case 'W':
+ ip6_enable_disable (tm, 0 /* disable */ );
+ break;
+
+ case 'x':
+ ip6nd_ra_config (tm, 0 /* is_no */ );
+ break;
+ case 'X':
+ ip6nd_ra_config (tm, 1 /* is_no */ );
+ break;
+ case 'y':
+ ip6nd_ra_prefix (tm, 0 /* is_no */ );
+ break;
+ case 'Y':
+ ip6nd_ra_prefix (tm, 1 /* is_no */ );
+ break;
+
+ case '7':
+ loop_create (tm);
+ break;
+
+ case 'F':
+ set_flags (tm, 1 /* up_down */ );
+ break;
+
+ case 'f':
+ set_flags (tm, 0 /* up_down */ );
+ break;
+
+ case '@':
+ l2_xconnect (tm);
+ break;
+
+ case '#':
+ l2_bridge (tm);
+ break;
+
+ case 'h':
+ fformat (stdout, "q=quit,d=dump,L=link evts on,l=link evts off\n");
+ fformat (stdout, "S=stats on,s=stats off\n");
+ fformat (stdout, "4=add v4 route, 3=del v4 route\n");
+ fformat (stdout, "6=add v6 route, 5=del v6 route\n");
+ fformat (stdout, "A=add v4 intfc route, a=del v4 intfc route\n");
+ fformat (stdout, "B=add v6 intfc route, b=del v6 intfc route\n");
+ fformat (stdout, "z=del all intfc routes\n");
+ fformat (stdout, "t=set v4 intfc table, T=set v6 intfc table\n");
+ fformat (stdout, "c=connect unix tap\n");
+ fformat (stdout,
+ "j=set dhcpv4 and v6 link-address/option-82 params\n");
+ fformat (stdout, "k=set dhcpv4 relay agent params\n");
+ fformat (stdout, "K=set dhcpv6 relay agent params\n");
+ fformat (stdout, "E=add l2 patch, e=del l2 patch\n");
+ fformat (stdout, "V=ip6 set link-local address \n");
+ fformat (stdout, "w=ip6 enable \n");
+ fformat (stdout, "W=ip6 disable \n");
+ fformat (stdout, "x=ip6 nd config \n");
+ fformat (stdout, "X=no ip6 nd config\n");
+ fformat (stdout, "y=ip6 nd prefix \n");
+ fformat (stdout, "Y=no ip6 nd prefix\n");
+ fformat (stdout, "@=l2 xconnect\n");
+ fformat (stdout, "#=l2 bridge\n");
+
+ default:
+ break;
+ }
+
+ }
+
+done:
+
+ if (tm->link_events_on)
+ link_up_down_enable_disable (tm, 0 /* enable */ );
+ if (tm->stats_on)
+ stats_enable_disable (tm, 0 /* enable */ );
+ if (tm->oam_events_on)
+ oam_events_enable_disable (tm, 0 /* enable */ );
+
+ disconnect_from_vpe ();
+ exit (0);
+}
+
+#undef vl_api_version
+#define vl_api_version(n,v) static u32 vpe_api_version = v;
+#include <vpp/api/vpe.api.h>
+#undef vl_api_version
+
+void
+vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
+{
+ /*
+ * Send the main API signature in slot 0. This bit of code must
+ * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+ */
+ mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/test_ha.c b/src/vpp/api/test_ha.c
new file mode 100644
index 00000000..3264d5f9
--- /dev/null
+++ b/src/vpp/api/test_ha.c
@@ -0,0 +1,249 @@
+/*
+ *------------------------------------------------------------------
+ * api.c - message handler registration
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <svm/svm.h>
+#include <svm/svmdb.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#include <vnet/ip/ip.h>
+
+#define f64_endian(a)
+#define f64_print(a,b)
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+vl_shmem_hdr_t *shmem_hdr;
+
+typedef struct
+{
+ u32 pings_sent;
+ u32 pings_replied;
+ volatile u32 signal_received;
+
+ /* convenience */
+ unix_shared_memory_queue_t *vl_input_queue;
+ u32 my_client_index;
+ svmdb_client_t *svmdb_client;
+} test_main_t;
+
+test_main_t test_main;
+
+static void vl_api_control_ping_reply_t_handler
+ (vl_api_control_ping_reply_t * mp)
+{
+ test_main_t *tm = &test_main;
+
+ fformat (stdout, "control ping reply from pid %d\n", ntohl (mp->vpe_pid));
+ tm->pings_replied++;
+}
+
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+ clib_warning ("BUG: vlib_cli_output callled...");
+}
+
+#define foreach_api_msg \
+_(CONTROL_PING_REPLY,control_ping_reply)
+
+void
+ping (test_main_t * tm)
+{
+ vl_api_control_ping_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_CONTROL_PING);
+ mp->client_index = tm->my_client_index;
+ mp->context = 0xdeadbeef;
+
+ vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
+}
+
+static void
+noop_handler (void *notused)
+{
+}
+
+int
+connect_to_vpe (char *name)
+{
+ int rv = 0;
+ test_main_t *tm = &test_main;
+ api_main_t *am = &api_main;
+
+ rv = vl_client_connect_to_vlib ("/vpe-api", name, 32);
+ if (rv < 0)
+ return rv;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_api_msg;
+#undef _
+
+ shmem_hdr = api_main.shmem_hdr;
+ tm->vl_input_queue = shmem_hdr->vl_input_queue;
+ tm->my_client_index = am->my_client_index;
+ return 0;
+}
+
+int
+disconnect_from_vpe (void)
+{
+ vl_client_disconnect_from_vlib ();
+
+ return 0;
+}
+
+void
+signal_handler (int signo)
+{
+ test_main_t *tm = &test_main;
+
+ tm->signal_received = 1;
+}
+
+
+int
+main (int argc, char **argv)
+{
+ test_main_t *tm = &test_main;
+ api_main_t *am = &api_main;
+ u32 swt_pid = 0;
+ int connected = 0;
+
+ signal (SIGINT, signal_handler);
+
+ while (1)
+ {
+ if (tm->signal_received)
+ break;
+
+ if (am->shmem_hdr)
+ swt_pid = am->shmem_hdr->vl_pid;
+
+ /* If kill returns 0, the vpe-f process is alive */
+ if (kill (swt_pid, 0) == 0)
+ {
+ /* Try to connect */
+ if (connected == 0)
+ {
+ fformat (stdout, "Connect to VPE-f\n");
+ if (connect_to_vpe ("test_ha_client") >= 0)
+ {
+ tm->pings_sent = 0;
+ tm->pings_replied = 0;
+ connected = 1;
+ }
+ else
+ {
+ fformat (stdout, "Connect failed, sleep and retry...\n");
+ sleep (1);
+ continue;
+ }
+ }
+ tm->pings_sent++;
+ ping (tm);
+
+ sleep (1);
+
+ /* havent heard back in 3 seconds, disco / reco */
+ if ((tm->pings_replied + 3) <= tm->pings_sent)
+ {
+ fformat (stdout, "VPE-f pid %d not responding\n", swt_pid);
+ swt_pid = 0;
+ disconnect_from_vpe ();
+ connected = 0;
+ }
+ }
+ else
+ {
+ if (connected)
+ {
+ fformat (stdout, "VPE-f pid %d died\n", swt_pid);
+ swt_pid = 0;
+ disconnect_from_vpe ();
+ connected = 0;
+ }
+ sleep (1);
+ }
+ }
+
+ fformat (stdout, "Signal received, graceful exit\n");
+ disconnect_from_vpe ();
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/vat.h b/src/vpp/api/vat.h
new file mode 120000
index 00000000..3adbdbae
--- /dev/null
+++ b/src/vpp/api/vat.h
@@ -0,0 +1 @@
+../../vat/vat.h \ No newline at end of file
diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api
new file mode 100644
index 00000000..d68beae1
--- /dev/null
+++ b/src/vpp/api/vpe.api
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpe control-plane API messages which are generally
+ called through a shared memory interface.
+*/
+
+/*
+ * Note: API placement cleanup in progress
+ * If you're looking for interface APIs, please
+ * see .../src/vnet/{interface.api,interface_api.c}
+ * IP APIs: see .../src/vnet/ip/{ip.api, ip_api.c}
+ * TAP APIs: see .../src/vnet/unix/{tap.api, tap_api.c}
+ * VXLAN APIs: see .../src/vnet/vxlan/{vxlan.api, vxlan_api.c}
+ * LLDP APIs: see .../src/vnet/lldp/{lldp.api, lldp_api.c}
+ * AF-PACKET APIs: see ... /vnet/devices/af_packet/{af_packet.api, af_packet_api.c}
+ * NETMAP APIs: see ... /src/vnet/devices/netmap/{netmap.api, netmap_api.c}
+ * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c}
+ * VXLAN GPE APIs: see .../src/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c}
+ * GRE APIs: see .../src/vnet/gre/{gre.api, gre_api.c}
+ * L2 APIs: see .../src/vnet/l2/{l2.api, l2_api.c}
+ * L2TP APIs: see .../src/vnet/l2tp/{l2tp.api, l2tp_api.c}
+ * BFD APIs: see .../src/vnet/bfd/{bfd.api, bfd_api.c}
+ * IPSEC APIs: see .../src/vnet/ipsec/{ipsec.api, ipsec_api.c}
+ * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c}
+ * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c}
+ * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c}
+ * SESSION APIs: .../vnet/session/{session.api session_api.c}
+ * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c}
+ * SR APIs: see .../src/vnet/srv6/{sr.api, sr_api.c}
+ * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c}
+ * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c}
+ * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c}
+ * COP APIs: see ... /src/vnet/cop/{cop.api, cop_api.c}
+ * POLICER APIs: see ... /src/vnet/policer/{policer.api, policer_api.c}
+ * STATS APIs: see .../src/vpp/stats/{stats.api, stats.c}
+ */
+
+/** \brief Create a new subinterface with the given vlan id
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vlan_id - vlan tag of the new interface
+*/
+define create_vlan_subif
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 vlan_id;
+};
+
+/** \brief Reply for the vlan subinterface create request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new subinterface
+*/
+define create_vlan_subif_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Enable or Disable MPLS on and interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_mpls_enable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable;
+};
+
+/** \brief Proxy ARP add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - VRF / Fib table ID
+ @param is_add - 1 if adding the Proxy ARP range, 0 if deleting
+ @param low_address[4] - Low address of the Proxy ARP range
+ @param hi_address[4] - High address of the Proxy ARP range
+*/
+autoreply define proxy_arp_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 is_add;
+ u8 low_address[4];
+ u8 hi_address[4];
+};
+
+/** \brief Proxy ARP add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - Which interface to enable / disable Proxy Arp on
+ @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable
+*/
+autoreply define proxy_arp_intfc_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = on, 0 = off */
+ u8 enable_disable;
+};
+
+/** \brief Reset VRF (remove all routes etc) request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param vrf_id - ID of th FIB table / VRF to reset
+*/
+autoreply define reset_vrf
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u32 vrf_id;
+};
+
+/** \brief OAM event structure
+ @param dst_address[] -
+ @param state
+*/
+define oam_event
+{
+ u8 dst_address[4];
+ u8 state;
+};
+
+/** \brief Want OAM events request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable- enable if non-zero, else disable
+ @param pid - pid of the requesting process
+*/
+autoreply define want_oam_events
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief OAM add / del target request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf_id of the target
+ @param src_address[] - source address to use for the updates
+ @param dst_address[] - destination address of the target
+ @param is_add - add target if non-zero, else delete
+*/
+autoreply define oam_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 src_address[4];
+ u8 dst_address[4];
+ u8 is_add;
+};
+
+/** \brief Reset fib table request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf/table id of the fib table to reset
+ @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4
+*/
+autoreply define reset_fib
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 is_ipv6;
+};
+
+/** \brief Create loopback interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mac_address - mac addr to assign to the interface if none-zero
+*/
+define create_loopback
+{
+ u32 client_index;
+ u32 context;
+ u8 mac_address[6];
+};
+
+/** \brief Create loopback interface response
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface that was created
+ @param retval - return code for the request
+*/
+define create_loopback_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Create loopback interface instance request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mac_address - mac addr to assign to the interface if none-zero
+ @param is_specified - if non-0, a specific user_instance is being requested
+ @param user_instance - requested instance, ~0 => dynamically allocate
+*/
+define create_loopback_instance
+{
+ u32 client_index;
+ u32 context;
+ u8 mac_address[6];
+ u8 is_specified;
+ u32 user_instance;
+};
+
+/** \brief Create loopback interface instance response
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface that was created
+ @param retval - return code for the request
+*/
+define create_loopback_instance_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete loopback interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface that was created
+*/
+autoreply define delete_loopback
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+/** \brief Process a vpe parser cli string request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param cmd_in_shmem - pointer to cli command string
+*/
+define cli
+{
+ u32 client_index;
+ u32 context;
+ u64 cmd_in_shmem;
+};
+define cli_inband
+{
+ u32 client_index;
+ u32 context;
+ u32 length;
+ u8 cmd[length];
+};
+
+/** \brief vpe parser cli string response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for request
+ @param reply_in_shmem - Reply string from cli processing if any
+*/
+define cli_reply
+{
+ u32 context;
+ i32 retval;
+ u64 reply_in_shmem;
+};
+define cli_inband_reply
+{
+ u32 context;
+ i32 retval;
+ u32 length;
+ u8 reply[length];
+};
+
+/** \brief Set max allowed ARP or ip6 neighbor entries request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - neighbor limit if non-zero, else ARP limit
+ @param arp_neighbor_limit - the new limit, defaults are ~ 50k
+*/
+autoreply define set_arp_neighbor_limit
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u32 arp_neighbor_limit;
+};
+
+/** \brief L2 interface patch add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param rx_sw_if_index - receive side interface
+ @param tx_sw_if_index - transmit side interface
+ @param is_add - if non-zero set up the interface patch, else remove it
+*/
+autoreply define l2_patch_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+ u8 is_add;
+};
+
+/** \brief Interface set vpath request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_vpath
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable;
+};
+
+/** \brief Set L2 XConnect between two interfaces request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param rx_sw_if_index - Receive interface index
+ @param tx_sw_if_index - Transmit interface index
+ @param enable - enable xconnect if not 0, else set to L3 mode
+*/
+autoreply define sw_interface_set_l2_xconnect
+{
+ u32 client_index;
+ u32 context;
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+ u8 enable;
+};
+
+/** \brief Interface bridge mode request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param rx_sw_if_index - the interface
+ @param bd_id - bridge domain id
+ @param bvi - Setup interface as a bvi, bridge mode only
+ @param shg - Shared horizon group, for bridge mode only
+ @param enable - Enable beige mode if not 0, else set to L3 mode
+*/
+autoreply define sw_interface_set_l2_bridge
+{
+ u32 client_index;
+ u32 context;
+ u32 rx_sw_if_index;
+ u32 bd_id;
+ u8 shg;
+ u8 bvi;
+ u8 enable;
+};
+
+/** \brief Set bridge domain ip to mac entry request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - the bridge domain to set the flags for
+ @param is_add - if non-zero, add the entry, else clear it
+ @param is_ipv6 - if non-zero, ipv6 address, else ipv4 address
+ @param mac_address - MAC address
+ @param
+*/
+autoreply define bd_ip_mac_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 ip_address[16];
+ u8 mac_address[6];
+};
+
+/** \brief Set/unset the classification table for an interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - ipv6 if non-zero, else ipv4
+ @param sw_if_index - interface to associate with the table
+ @param table_index - index of the table, if ~0 unset the table
+*/
+autoreply define classify_set_interface_ip_table
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u32 sw_if_index;
+ u32 table_index; /* ~0 => off */
+};
+
+/** \brief Set/unset l2 classification tables for an interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set/unset tables for
+ @param ip4_table_index - ip4 index, use ~0 for all 3 indexes to unset
+ @param ip6_table_index - ip6 index
+ @param other_table_index - other index
+*/
+autoreply define classify_set_interface_l2_tables
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 3 x ~0 => off */
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u32 other_table_index;
+ u8 is_input;
+};
+
+/** \brief Get node index using name request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name[] - name of the node
+*/
+define get_node_index
+{
+ u32 client_index;
+ u32 context;
+ u8 node_name[64];
+};
+
+/** \brief Get node index using name request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param node_index - index of the desired node if found, else ~0
+*/
+define get_node_index_reply
+{
+ u32 context;
+ i32 retval;
+ u32 node_index;
+};
+
+/** \brief Set the next node for a given node request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name[] - node to add the next node to
+ @param next_name[] - node to add as the next node
+*/
+define add_node_next
+{
+ u32 client_index;
+ u32 context;
+ u8 node_name[64];
+ u8 next_name[64];
+};
+
+/** \brief IP Set the next node for a given node response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the add next node request
+ @param next_index - the index of the next node if success, else ~0
+*/
+define add_node_next_reply
+{
+ u32 context;
+ i32 retval;
+ u32 next_index;
+};
+
+/** \brief L2 interface ethernet flow point filtering enable/disable request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to enable/disable filtering on
+ @param enable_disable - if non-zero enable filtering, else disable
+*/
+autoreply define l2_interface_efp_filter
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 enable_disable;
+};
+
+define create_subif
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 sub_id;
+
+ /* These fields map directly onto the subif template */
+ u8 no_tags;
+ u8 one_tag;
+ u8 two_tags;
+ u8 dot1ad; // 0 = dot1q, 1=dot1ad
+ u8 exact_match;
+ u8 default_sub;
+ u8 outer_vlan_id_any;
+ u8 inner_vlan_id_any;
+ u16 outer_vlan_id;
+ u16 inner_vlan_id;
+};
+
+define create_subif_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief show version
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define show_version
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief show version response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param program - name of the program (vpe)
+ @param version - version of the program
+ @param build_directory - root of the workspace where the program was built
+*/
+define show_version_reply
+{
+ u32 context;
+ i32 retval;
+ u8 program[32];
+ u8 version[32];
+ u8 build_date[32];
+ u8 build_directory[256];
+};
+
+/* Gross kludge, DGMS */
+autoreply define interface_name_renumber
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 new_show_dev_instance;
+};
+
+/** \brief Register for ip4 arp resolution events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+ @param address - the exact ip4 address of interest
+*/
+autoreply define want_ip4_arp_events
+{
+ u32 client_index;
+ u32 context;
+ u8 enable_disable;
+ u32 pid;
+ u32 address;
+};
+
+/** \brief Tell client about an ip4 arp resolution event
+ @param client_index - opaque cookie to identify the sender
+ @param address - the exact ip4 address of interest
+ @param pid - client pid registered to receive notification
+ @param sw_if_index - interface which received ARP packet
+ @param new_mac - the new mac address
+ @param mac_ip - 0: resolution event, 1: mac/ip binding in bd
+*/
+define ip4_arp_event
+{
+ u32 client_index;
+ u32 address;
+ u32 pid;
+ u32 sw_if_index;
+ u8 new_mac[6];
+ u8 mac_ip;
+};
+
+/** \brief Register for ip6 nd resolution events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+ @param address - the exact ip6 address of interest
+*/
+autoreply define want_ip6_nd_events
+{
+ u32 client_index;
+ u32 context;
+ u8 enable_disable;
+ u32 pid;
+ u8 address[16];
+};
+
+/** \brief Tell client about an ip6 nd resolution or mac/ip event
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param sw_if_index - interface which received ARP packet
+ @param address - the exact ip6 address of interest
+ @param new_mac - the new mac address
+ @param mac_ip - 0: resolution event, 1: mac/ip binding in bd
+*/
+define ip6_nd_event
+{
+ u32 client_index;
+ u32 pid;
+ u32 sw_if_index;
+ u8 address[16];
+ u8 new_mac[6];
+ u8 mac_ip;
+};
+
+/** \brief Set/unset input ACL interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set/unset input ACL
+ @param ip4_table_index - ip4 classify table index (~0 for skip)
+ @param ip6_table_index - ip6 classify table index (~0 for skip)
+ @param l2_table_index - l2 classify table index (~0 for skip)
+ @param is_add - Set input ACL if non-zero, else unset
+ Note: User is recommeneded to use just one valid table_index per call.
+ (ip4_table_index, ip6_table_index, or l2_table_index)
+*/
+autoreply define input_acl_set_interface
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u32 l2_table_index;
+ u8 is_add;
+};
+
+define get_node_graph
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief get_node_graph_reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param reply_in_shmem - result from vlib_node_serialize, in shared
+ memory. Process with vlib_node_unserialize, remember to switch
+ heaps and free the result.
+*/
+
+define get_node_graph_reply
+{
+ u32 context;
+ i32 retval;
+ u64 reply_in_shmem;
+};
+
+/** \brief IOAM enable : Enable in-band OAM
+ @param id - profile id
+ @param seqno - To enable Seqno Processing
+ @param analyse - Enabling analysis of iOAM at decap node
+ @param pow_enable - Proof of Work enabled or not flag
+ @param trace_enable - iOAM Trace enabled or not flag
+*/
+autoreply define ioam_enable
+{
+ u32 client_index;
+ u32 context;
+ u16 id;
+ u8 seqno;
+ u8 analyse;
+ u8 pot_enable;
+ u8 trace_enable;
+ u32 node_id;
+};
+
+/** \brief iOAM disable
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param index - MAP Domain index
+*/
+autoreply define ioam_disable
+{
+ u32 client_index;
+ u32 context;
+ u16 id;
+};
+
+/** \brief Query relative index via node names
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name - name of node to find relative index from
+ @param next_name - next node from node_name to find relative index of
+*/
+define get_next_index
+{
+ u32 client_index;
+ u32 context;
+ u8 node_name[64];
+ u8 next_name[64];
+};
+
+/** \brief Reply for get next node index
+ @param context - sender context which was passed in the request
+ @param retval - return value
+ @param next_index - index of the next_node
+*/
+define get_next_index_reply
+{
+ u32 context;
+ i32 retval;
+ u32 next_index;
+};
+
+/** \brief PacketGenerator create interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param interface_id - interface index
+*/
+define pg_create_interface
+{
+ u32 client_index;
+ u32 context;
+ u32 interface_id;
+};
+
+/** \brief PacketGenerator create interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define pg_create_interface_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief PacketGenerator capture packets on given interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param interface_id - pg interface index
+ @param is_enabled - 1 if enabling streams, 0 if disabling
+ @param count - number of packets to be captured
+ @param pcap_file - pacp file name to store captured packets
+*/
+autoreply define pg_capture
+{
+ u32 client_index;
+ u32 context;
+ u32 interface_id;
+ u8 is_enabled;
+ u32 count;
+ u32 pcap_name_length;
+ u8 pcap_file_name[pcap_name_length];
+};
+
+/** \brief Enable / disable packet generator request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_enabled - 1 if enabling streams, 0 if disabling
+ @param stream - stream name to be enable/disabled, if not specified handle all streams
+*/
+autoreply define pg_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u8 is_enabled;
+ u32 stream_name_length;
+ u8 stream_name[stream_name_length];
+};
+
+/** \brief Configure IP source and L4 port-range check
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip6 - 1 if source address type is IPv6
+ @param is_add - 1 if add, 0 if delete
+ @param mask_length - mask length for address entry
+ @param address - array of address bytes
+ @param number_of_ranges - length of low_port and high_port arrays (must match)
+ @param low_ports[32] - up to 32 low end of port range entries (must have corresponding high_ports entry)
+ @param high_ports[32] - up to 32 high end of port range entries (must have corresponding low_ports entry)
+ @param vrf_id - fib table/vrf id to associate the source and port-range check with
+ @note To specify a single port set low_port and high_port entry the same
+*/
+autoreply define ip_source_and_port_range_check_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u8 is_add;
+ u8 mask_length;
+ u8 address[16];
+ u8 number_of_ranges;
+ u16 low_ports[32];
+ u16 high_ports[32];
+ u32 vrf_id;
+};
+
+/** \brief Set interface source and L4 port-range request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param interface_id - interface index
+ @param tcp_vrf_id - VRF associated with source and TCP port-range check
+ @param udp_vrf_id - VRF associated with source and TCP port-range check
+*/
+autoreply define ip_source_and_port_range_check_interface_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u32 sw_if_index;
+ u32 tcp_in_vrf_id;
+ u32 tcp_out_vrf_id;
+ u32 udp_in_vrf_id;
+ u32 udp_out_vrf_id;
+};
+
+/** \brief Delete sub interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - sw index of the interface that was created by create_subif
+*/
+autoreply define delete_subif {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Punt traffic to the host
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add punt if non-zero, else delete
+ @param ipv - L3 protocol 4 - IPv4, 6 - IPv6, ~0 - All
+ @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported
+ @param l4_port - TCP/UDP port to be punted
+*/
+autoreply define punt {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 ipv;
+ u8 l4_protocol;
+ u16 l4_port;
+};
+
+/** \brief Punt traffic to the host via socket
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param header_version - expected meta data header version (currently 1)
+ @param is_ip4 - L3 protocol 1 - IPv4, 0 - IPv6
+ @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported
+ @param l4_port - TCP/UDP port to be punted
+*/
+define punt_socket_register {
+ u32 client_index;
+ u32 context;
+ u32 header_version;
+ u8 is_ip4;
+ u8 l4_protocol;
+ u16 l4_port;
+ u8 pathname[108]; /* Linux sun_path defined to be 108 bytes, see unix(7) */
+};
+
+define punt_socket_register_reply
+{
+ u32 context;
+ i32 retval;
+ u8 pathname[64];
+};
+
+autoreply define punt_socket_deregister {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 l4_protocol;
+ u16 l4_port;
+};
+
+/** \brief Feature path enable/disable request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface
+ @param enable - 1 = on, 0 = off
+*/
+autoreply define feature_enable_disable {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable;
+ u8 arc_name[64];
+ u8 feature_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/vpe_all_api_h.h b/src/vpp/api/vpe_all_api_h.h
new file mode 100644
index 00000000..d35a0535
--- /dev/null
+++ b/src/vpp/api/vpe_all_api_h.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Add to the bottom of the #include list, or elves will steal your
+ * keyboard in the middle of the night!
+ */
+
+/* Include the (first) vlib-api API definition layer */
+#include <vlibmemory/vl_memory_api_h.h>
+
+/* Include the (second) vnet API definition layer */
+#define included_from_layer_3
+#include <vnet/vnet_all_api_h.h>
+#undef included_from_layer_3
+
+/* Include the current layer (third) vpp API definition layer */
+#include <vpp/api/vpe.api.h>
+
+/* Include stats APIs */
+#include <vpp/stats/stats.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/vpe_msg_enum.h b/src/vpp/api/vpe_msg_enum.h
new file mode 100644
index 00000000..4fcc1c8c
--- /dev/null
+++ b/src/vpp/api/vpe_msg_enum.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vpe_msg_enum_h
+#define included_vpe_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+ VL_ILLEGAL_MESSAGE_ID = 0,
+#include <vpp/api/vpe_all_api_h.h>
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vpe_msg_enum_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/vpp_get_metrics.c b/src/vpp/api/vpp_get_metrics.c
new file mode 100644
index 00000000..3474133d
--- /dev/null
+++ b/src/vpp/api/vpp_get_metrics.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <pwd.h>
+#include <grp.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/api_errno.h>
+
+#include <svm/svmdb.h>
+
+svmdb_client_t *c;
+volatile int signal_received;
+
+static void
+unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ static int once;
+
+ if (once)
+ exit (1);
+
+ once = 1;
+ signal_received = 1;
+}
+
+static void
+setup_signal_handlers (void)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ return clib_unix_warning (0, "sigaction %U", format_signal, i);
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ unformat_input_t input;
+ char *chroot_path = 0;
+ u8 *chroot_path_u8;
+ int interval = 0;
+ f64 *vector_ratep, *rx_ratep, *sig_error_ratep;
+ pid_t *vpp_pidp;
+ svmdb_map_args_t _ma, *ma = &_ma;
+ int uid, gid, rv;
+ struct passwd _pw, *pw;
+ struct group _grp, *grp;
+ char *s, buf[128];
+
+ unformat_init_command_line (&input, argv);
+
+ uid = geteuid ();
+ gid = getegid ();
+
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "chroot %s", &chroot_path_u8))
+ {
+ chroot_path = (char *) chroot_path_u8;
+ }
+ else if (unformat (&input, "interval %d", &interval))
+ ;
+ else if (unformat (&input, "uid %d", &uid))
+ ;
+ else if (unformat (&input, "gid %d", &gid))
+ ;
+ else if (unformat (&input, "uid %s", &s))
+ {
+ /* lookup the username */
+ pw = NULL;
+ rv = getpwnam_r (s, &_pw, buf, sizeof (buf), &pw);
+ if (rv < 0)
+ {
+ fformat (stderr, "cannot fetch username %s", s);
+ exit (1);
+ }
+ if (pw == NULL)
+ {
+ fformat (stderr, "username %s does not exist", s);
+ exit (1);
+ }
+ vec_free (s);
+ uid = pw->pw_uid;
+ }
+ else if (unformat (&input, "gid %s", &s))
+ {
+ /* lookup the group name */
+ grp = NULL;
+ rv = getgrnam_r (s, &_grp, buf, sizeof (buf), &grp);
+ if (rv != 0)
+ {
+ fformat (stderr, "cannot fetch group %s", s);
+ exit (1);
+ }
+ if (grp == NULL)
+ {
+ fformat (stderr, "group %s does not exist", s);
+ exit (1);
+ }
+ vec_free (s);
+ gid = grp->gr_gid;
+ }
+ else
+ {
+ fformat (stderr,
+ "usage: vpp_get_metrics [chroot <path>] [interval <nn>]\n");
+ exit (1);
+ }
+ }
+
+ setup_signal_handlers ();
+
+ memset (ma, 0, sizeof (*ma));
+ ma->root_path = chroot_path;
+ ma->uid = uid;
+ ma->gid = gid;
+
+ c = svmdb_map (ma);
+
+ vpp_pidp =
+ svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC, "vpp_pid");
+ vector_ratep =
+ svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC,
+ "vpp_vector_rate");
+ rx_ratep =
+ svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC,
+ "vpp_input_rate");
+ sig_error_ratep =
+ svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC,
+ "vpp_sig_error_rate");
+
+ /*
+ * Make sure vpp is actually running. Otherwise, there's every
+ * chance that the database region will be wiped out by the
+ * process monitor script
+ */
+
+ if (vpp_pidp == 0 || vector_ratep == 0 || rx_ratep == 0
+ || sig_error_ratep == 0)
+ {
+ fformat (stdout, "vpp not running\n");
+ exit (1);
+ }
+
+ do
+ {
+ /*
+ * Once vpp exits, the svm db region will be recreated...
+ * Can't use kill (*vpp_pidp, 0) if running as non-root /
+ * accessing the shared-VM database via group perms.
+ */
+ if (*vpp_pidp == 0)
+ {
+ fformat (stdout, "vpp not running\n");
+ exit (1);
+ }
+ fformat (stdout,
+ "%d: vpp_vector_rate=%.2f, vpp_input_rate=%f, vpp_sig_error_rate=%f\n",
+ *vpp_pidp, *vector_ratep, *rx_ratep, *sig_error_ratep);
+
+ if (interval)
+ sleep (interval);
+ if (signal_received)
+ break;
+ }
+ while (interval);
+
+ svmdb_unmap (c);
+ exit (0);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/app/sticky_hash.c b/src/vpp/app/sticky_hash.c
new file mode 100644
index 00000000..5569c677
--- /dev/null
+++ b/src/vpp/app/sticky_hash.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/l2/l2_classify.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vppinfra/error.h>
+
+typedef struct
+{
+ u32 fwd_entry_index;
+ u32 rev_entry_index;
+ /* Not strictly needed, for show command */
+ u32 fib_index;
+} sticky_hash_session_t;
+
+typedef struct
+{
+ u32 cached_next_index;
+
+ /* next index added to l2_classify */
+ u32 fwd_miss_next_index;
+
+ /* session pool */
+ sticky_hash_session_t *sessions;
+
+ /* Forward and reverse data session setup buffers */
+ u8 fdata[3 * sizeof (u32x4)];
+ u8 rdata[3 * sizeof (u32x4)];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ vnet_classify_main_t *vnet_classify_main;
+ l2_input_classify_main_t *l2_input_classify_main;
+}
+sticky_hash_main_t;
+
+typedef struct
+{
+ /* $$$$ fill in with per-pkt trace data */
+ u32 next_index;
+ u32 sw_if_index;
+} sticky_hash_miss_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_sticky_hash_miss_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sticky_hash_miss_trace_t *t = va_arg (*args, sticky_hash_miss_trace_t *);
+
+ s = format (s, "STICKY_HASH_MISS: sw_if_index %d", t->sw_if_index);
+ return s;
+}
+
+typedef CLIB_PACKED (struct
+ {
+ ethernet_header_t eh; ip4_header_t ip;
+ }) classify_data_or_mask_t;
+
+sticky_hash_main_t sticky_hash_main;
+
+vlib_node_registration_t sticky_hash_miss_node;
+
+#define foreach_sticky_hash_miss_error \
+_(MISSES, "forward flow classify misses")
+
+typedef enum
+{
+#define _(sym,str) STICKY_HASH_MISS_ERROR_##sym,
+ foreach_sticky_hash_miss_error
+#undef _
+ STICKY_HASH_MISS_N_ERROR,
+} sticky_hash_miss_error_t;
+
+static char *sticky_hash_miss_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sticky_hash_miss_error
+#undef _
+};
+
+/*
+ * To drop a pkt and increment one of the previous counters:
+ *
+ * set b0->error = error_node->errors[STICKY_HASH_MISS_ERROR_EXAMPLE];
+ * set next0 to a disposition index bound to "error-drop".
+ *
+ * To manually increment the specific counter STICKY_HASH_MISS_ERROR_EXAMPLE:
+ *
+ * vlib_node_t *n = vlib_get_node (vm, sticky_hash_miss.index);
+ * u32 node_counter_base_index = n->error_heap_index;
+ * vlib_error_main_t * em = &vm->error_main;
+ * em->counters[node_counter_base_index + STICKY_HASH_MISS_ERROR_EXAMPLE] += 1;
+ *
+ */
+
+typedef enum
+{
+ STICKY_HASH_MISS_NEXT_IP4_INPUT,
+ STICKY_HASH_MISS_N_NEXT,
+} sticky_hash_miss_next_t;
+
+static uword
+sticky_hash_miss_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ sticky_hash_miss_next_t next_index;
+ sticky_hash_main_t *mp = &sticky_hash_main;
+ vlib_node_t *n = vlib_get_node (vm, sticky_hash_miss_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ vnet_classify_main_t *cm = mp->vnet_classify_main;
+ ip4_main_t *im = &ip4_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 fib_index0, ft_index0, rt_index0;
+ vnet_classify_table_3_t *ft0, *rt0;
+ vnet_classify_entry_3_t *fe0, *re0;
+ classify_data_or_mask_t *h0;
+ u8 was_found0;
+ ip4_fib_t *fib0;
+ sticky_hash_session_t *s;
+ u32 tmp;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = mp->cached_next_index;
+
+ h0 = vlib_buffer_get_current (b0);
+
+ /* Add forward and reverse entries for this flow */
+ clib_memcpy (mp->fdata, h0, sizeof (mp->fdata));
+ clib_memcpy (mp->rdata, h0, sizeof (mp->rdata));
+
+ h0 = (classify_data_or_mask_t *) (mp->rdata);
+
+ /* swap src + dst addresses to form reverse data */
+ tmp = h0->ip.src_address.as_u32;
+ h0->ip.src_address.as_u32 = h0->ip.dst_address.as_u32;
+ h0->ip.dst_address.as_u32 = tmp;
+
+ /* dig up fwd + rev tables */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib0 = vec_elt_at_index (im->fibs, fib_index0);
+
+ ft_index0 = fib0->fwd_classify_table_index;
+ rt_index0 = fib0->rev_classify_table_index;
+
+ ft0 = (vnet_classify_table_3_t *)
+ pool_elt_at_index (cm->tables, ft_index0);
+ rt0 = (vnet_classify_table_3_t *)
+ pool_elt_at_index (cm->tables, rt_index0);
+
+ fe0 =
+ vnet_classify_find_or_add_entry_3 (ft0, mp->fdata, &was_found0);
+ fe0->next_index = L2_INPUT_CLASSIFY_NEXT_IP4_INPUT;
+ fe0->advance = sizeof (ethernet_header_t);
+
+ re0 = vnet_classify_find_or_add_entry_3 (rt0, mp->rdata, 0);
+ re0->next_index = L2_INPUT_CLASSIFY_NEXT_IP4_INPUT; /* $$$ FIXME */
+ re0->advance = sizeof (ethernet_header_t);
+
+ /* Note: we could get a whole vector of misses for the same sess */
+ if (was_found0 == 0)
+ {
+ pool_get (mp->sessions, s);
+
+ fe0->opaque_index = s - mp->sessions;
+ re0->opaque_index = s - mp->sessions;
+
+ s->fwd_entry_index = fe0 - ft0->entries;
+ s->rev_entry_index = re0 - rt0->entries;
+ s->fib_index = fib_index0;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ sticky_hash_miss_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ em->counters[node_counter_base_index +
+ STICKY_HASH_MISS_ERROR_MISSES] += 1;
+
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sticky_hash_miss_node) = {
+ .function = sticky_hash_miss_node_fn,
+ .name = "sticky-hash-miss",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sticky_hash_miss_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(sticky_hash_miss_error_strings),
+ .error_strings = sticky_hash_miss_error_strings,
+
+ .n_next_nodes = STICKY_HASH_MISS_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [STICKY_HASH_MISS_NEXT_IP4_INPUT] = "ip4-input",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sticky_hash_miss_init (vlib_main_t * vm)
+{
+ sticky_hash_main_t *mp = &sticky_hash_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+ mp->vnet_classify_main = &vnet_classify_main;
+ mp->l2_input_classify_main = &l2_input_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sticky_hash_miss_init);
+
+static int ip4_sticky_hash_enable_disable
+ (sticky_hash_main_t * mp,
+ u32 fwd_sw_if_index, u8 * fwd_mask,
+ u32 rev_sw_if_index, u8 * rev_mask, u32 nbuckets, int enable_disable)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 fib_index;
+ ip4_fib_t *fib;
+ vnet_classify_main_t *cm = mp->vnet_classify_main;
+ l2_input_classify_main_t *l2cm = mp->l2_input_classify_main;
+ vnet_classify_table_3_t *ft, *rt;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, fwd_sw_if_index);
+ fib = vec_elt_at_index (im->fibs, fib_index);
+
+ if (fib->fwd_classify_table_index == ~0)
+ {
+ /* Set up forward table */
+ ft = (vnet_classify_table_3_t *)
+ vnet_classify_new_table (cm, fwd_mask, nbuckets,
+ 0 /* skip */ , 3 /* match */ );
+ fib->fwd_classify_table_index
+ = ft - (vnet_classify_table_3_t *) cm->tables;
+ mp->fwd_miss_next_index =
+ vlib_node_add_next (mp->vlib_main, l2_input_classify_node.index,
+ sticky_hash_miss_node.index);
+ ft->miss_next_index = mp->fwd_miss_next_index;
+
+ /* Set up reverse table */
+ rt = (vnet_classify_table_3_t *)
+ vnet_classify_new_table (cm, rev_mask, nbuckets,
+ 0 /* skip */ , 3 /* match */ );
+ fib->rev_classify_table_index
+ = rt - (vnet_classify_table_3_t *) cm->tables;
+ }
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4],
+ fwd_sw_if_index);
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6],
+ fwd_sw_if_index);
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER],
+ fwd_sw_if_index);
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4]
+ [fwd_sw_if_index] = fib->fwd_classify_table_index;
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6]
+ [fwd_sw_if_index] = ~0;
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER]
+ [fwd_sw_if_index] = ~0;
+
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4],
+ rev_sw_if_index);
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6],
+ rev_sw_if_index);
+
+ vec_validate
+ (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER],
+ rev_sw_if_index);
+
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4]
+ [rev_sw_if_index] = fib->rev_classify_table_index;
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6]
+ [rev_sw_if_index] = ~0;
+
+ l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER]
+ [rev_sw_if_index] = ~0;
+
+ vnet_l2_input_classify_enable_disable (fwd_sw_if_index, enable_disable);
+ vnet_l2_input_classify_enable_disable (rev_sw_if_index, enable_disable);
+ return 0;
+}
+
+static clib_error_t *
+ip4_sticky_hash_init_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 fwd_sw_if_index = ~0, rev_sw_if_index = ~0;
+ int enable_disable = 1;
+ u32 nbuckets = 2;
+ int rv;
+ sticky_hash_main_t *mp = &sticky_hash_main;
+ classify_data_or_mask_t fwd_mask, rev_mask;
+ u8 *fm = 0, *rm = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "fwd %U", unformat_vnet_sw_interface, mp->vnet_main,
+ &fwd_sw_if_index))
+ ;
+ if (unformat
+ (input, "rev %U", unformat_vnet_sw_interface, mp->vnet_main,
+ &rev_sw_if_index))
+ ;
+ else if (unformat (input, "nbuckets %d", &nbuckets))
+ ;
+ else if (unformat (input, "disable"))
+ enable_disable = 0;
+
+ else
+ break;
+ }
+
+ nbuckets = 1 << max_log2 (nbuckets);
+
+ if (fwd_sw_if_index == ~0)
+ return clib_error_return (0, "fwd interface not set");
+
+ if (rev_sw_if_index == ~0)
+ return clib_error_return (0, "rev interface not set");
+
+ if (!is_pow2 (nbuckets))
+ return clib_error_return (0, "nbuckets %d not a power of 2", nbuckets);
+
+ ASSERT (sizeof (fwd_mask) <= 3 * sizeof (u32x4));
+
+ /* Mask on src/dst address, depending on direction */
+ memset (&fwd_mask, 0, sizeof (fwd_mask));
+ memset (&fwd_mask.ip.src_address, 0xff, 4);
+
+ memset (&rev_mask, 0, sizeof (rev_mask));
+ memset (&rev_mask.ip.dst_address, 0xff, 4);
+
+ vec_validate (fm, 3 * sizeof (u32x4) - 1);
+ vec_validate (rm, 3 * sizeof (u32x4) - 1);
+
+ clib_memcpy (fm, &fwd_mask, sizeof (fwd_mask));
+ clib_memcpy (rm, &rev_mask, sizeof (rev_mask));
+
+ rv = ip4_sticky_hash_enable_disable (mp, fwd_sw_if_index, fm,
+ rev_sw_if_index, rm,
+ nbuckets, enable_disable);
+
+ vec_free (fm);
+ vec_free (rm);
+ switch (rv)
+ {
+ case 0:
+ return 0;
+
+ default:
+ return clib_error_return (0,
+ "ip4_sticky_hash_enable_disable returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sticky_hash_init_command, static) = {
+ .path = "ip sticky classify",
+ .short_help = "ip sticky classify fwd <intfc> rev <intfc> "
+ "[nbuckets <nn>][disable]",
+ .function = ip4_sticky_hash_init_command_fn,
+};
+/* *INDENT-ON* */
+
+
+u8 *
+format_sticky_hash_session (u8 * s, va_list * args)
+{
+ sticky_hash_main_t *mp = va_arg (*args, sticky_hash_main_t *);
+ sticky_hash_session_t *session = va_arg (*args, sticky_hash_session_t *);
+ vnet_classify_table_3_t *t;
+ vnet_classify_entry_3_t *e;
+ ip4_main_t *im = &ip4_main;
+ vnet_classify_main_t *cm = mp->vnet_classify_main;
+ ip4_fib_t *fib;
+ classify_data_or_mask_t *match;
+
+ fib = vec_elt_at_index (im->fibs, session->fib_index);
+
+ t = (vnet_classify_table_3_t *)
+ pool_elt_at_index (cm->tables, fib->fwd_classify_table_index);
+ e = pool_elt_at_index (t->entries, session->fwd_entry_index);
+ match = (classify_data_or_mask_t *) (e->key);
+
+ s = format
+ (s,
+ "[%6d] fwd src %U next index %d session %d fib %d\n"
+ " hits %lld last-heard %.6f\n",
+ e - t->entries,
+ format_ip4_address, &match->ip.src_address,
+ e->next_index, e->opaque_index, fib->table_id, e->hits, e->last_heard);
+
+ if (e->opaque_index != session - mp->sessions)
+ s = format (s, "WARNING: forward session index mismatch!\n");
+
+ t = (vnet_classify_table_3_t *)
+ pool_elt_at_index (cm->tables, fib->rev_classify_table_index);
+ e = pool_elt_at_index (t->entries, session->rev_entry_index);
+ match = (classify_data_or_mask_t *) (e->key);
+
+ s = format
+ (s,
+ "[%6d] rev dst %U next index %d session %d\n"
+ " hits %lld last-heard %.6f\n",
+ e - t->entries,
+ format_ip4_address, &match->ip.dst_address,
+ e->next_index, e->opaque_index, e->hits, e->last_heard);
+
+ if (e->opaque_index != session - mp->sessions)
+ s = format (s, "WARNING: reverse session index mismatch!\n");
+ s = format (s, "---------\n");
+
+ return s;
+}
+
+static clib_error_t *
+show_ip4_sticky_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ sticky_hash_main_t *mp = &sticky_hash_main;
+ sticky_hash_session_t *s;
+ int verbose = 0;
+ int dump_classifier_tables = 0;
+ ip4_fib_t *fib;
+ ip4_main_t *im4 = &ip4_main;
+ vnet_classify_main_t *cm = mp->vnet_classify_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "dump-tables")
+ || unformat (input, "dump-classifier-tables"))
+ dump_classifier_tables = 1;
+ else
+ break;
+ }
+
+ if (pool_elts (mp->sessions) == 0)
+ vlib_cli_output (vm, "No ip sticky hash sessions");
+
+
+ vlib_cli_output (vm, "%d active sessions\n", pool_elts (mp->sessions));
+
+ vec_foreach (fib, im4->fibs)
+ {
+ if (fib->fwd_classify_table_index != ~0)
+ vlib_cli_output (vm, "fib %d fwd table: \n%U",
+ fib->table_id,
+ format_classify_table,
+ cm,
+ pool_elt_at_index
+ (cm->tables, fib->fwd_classify_table_index),
+ dump_classifier_tables);
+ if (fib->rev_classify_table_index != ~0)
+ vlib_cli_output (vm, "fib %d rev table: \n%U",
+ fib->table_id,
+ format_classify_table,
+ cm,
+ pool_elt_at_index
+ (cm->tables, fib->rev_classify_table_index),
+ dump_classifier_tables);
+ }
+
+ if (verbose)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (s, mp->sessions,
+ ({
+ vlib_cli_output (vm, "%U", format_sticky_hash_session, mp, s);
+ }));
+ /* *INDENT-ON* */
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sticky_hash_command, static) = {
+ .path = "show sticky classify",
+ .short_help = "Display sticky classifier tables",
+ .function = show_ip4_sticky_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/app/version.c b/src/vpp/app/version.c
new file mode 100644
index 00000000..0a2c7fd4
--- /dev/null
+++ b/src/vpp/app/version.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vppinfra/cpu.h>
+#include <vpp/app/version.h>
+
+static char *vpe_version_string =
+ "vpp v" VPP_BUILD_VER
+ " built by " VPP_BUILD_USER " on " VPP_BUILD_HOST " at " VPP_BUILD_DATE;
+
+static char *vpe_compiler =
+#if defined(__INTEL_COMPILER)
+#define __(x) #x
+#define _(x) __(x)
+ "icc " _(__INTEL_COMPILER) " (" __VERSION__ ")";
+#undef _
+#undef __
+#elif defined(__clang__)
+ "Clang/LLVM " __clang_version__;
+#elif defined (__GNUC__)
+ "GCC " __VERSION__;
+#else
+ "unknown compiler";
+#endif
+
+static clib_error_t *
+show_vpe_version_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ if (unformat (input, "verbose"))
+ {
+#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c);
+ _("Version", "%s", "v" VPP_BUILD_VER);
+ _("Compiled by", "%s", VPP_BUILD_USER);
+ _("Compile host", "%s", VPP_BUILD_HOST);
+ _("Compile date", "%s", VPP_BUILD_DATE);
+ _("Compile location", "%s", VPP_BUILD_TOPDIR);
+ _("Compiler", "%s", vpe_compiler);
+ _("Current PID", "%d", getpid ());
+#undef _
+ }
+ else
+ vlib_cli_output (vm, "%s", vpe_version_string);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
+ .path = "show version",
+ .short_help = "show version information",
+ .function = show_vpe_version_command_fn,
+};
+/* *INDENT-ON* */
+
+char *
+vpe_api_get_build_directory (void)
+{
+ return VPP_BUILD_TOPDIR;
+}
+
+char *
+vpe_api_get_version (void)
+{
+ return VPP_BUILD_VER;
+}
+
+char *
+vpe_api_get_build_date (void)
+{
+ return VPP_BUILD_DATE;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c
new file mode 100644
index 00000000..e19c23de
--- /dev/null
+++ b/src/vpp/app/vpe_cli.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+
+typedef struct
+{
+ u8 mac_addr[6];
+} mac_addr_t;
+
+static clib_error_t *
+virtual_ip_cmd_fn_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip46_address_t next_hop, *next_hops;
+ fib_route_path_t *rpaths;
+ fib_prefix_t prefix;
+ u8 mac_addr[6];
+ mac_addr_t *mac_addrs = 0;
+ u32 sw_if_index;
+ u32 i;
+ clib_error_t *error = NULL;
+
+ next_hops = NULL;
+ rpaths = NULL;
+ prefix.fp_len = 32;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat (line_input, "%U %U",
+ unformat_ip4_address, &prefix.fp_addr.ip4,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "mac %U",
+ unformat_ethernet_address, &mac_addr))
+ {
+ mac_addr_t *ma;
+ vec_add2 (mac_addrs, ma, 1);
+ clib_memcpy (ma, mac_addr, sizeof (mac_addr));
+ }
+ else if (unformat (line_input, "next-hop %U",
+ unformat_ip4_address, &next_hop.ip4))
+ {
+ vec_add1 (next_hops, next_hop);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (mac_addrs) == 0 || vec_len (mac_addrs) != vec_len (next_hops))
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ /* Create / delete special interface route /32's */
+
+ for (i = 0; i < vec_len (mac_addrs); i++)
+ {
+ fib_route_path_t *rpath;
+
+ adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4,
+ &next_hops[i],
+ sw_if_index, mac_addrs[i].mac_addr);
+
+ vec_add2 (rpaths, rpath, 1);
+
+ rpath->frp_proto = DPO_PROTO_IP4;
+ rpath->frp_addr = next_hops[i];
+ rpath->frp_sw_if_index = sw_if_index;
+ rpath->frp_fib_index = ~0;
+ rpath->frp_weight = 1;
+ rpath->frp_label_stack = NULL;
+ }
+
+ fib_table_entry_path_add2 (0, // default FIB table
+ &prefix,
+ FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, rpaths);
+
+done:
+ vec_free (mac_addrs);
+ vec_free (next_hops);
+ vec_free (rpaths);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (virtual_ip_cmd_fn_command, static) = {
+ .path = "ip virtual",
+ .short_help = "ip virtual <addr> <interface> [mac <Mi>]+",
+ .function = virtual_ip_cmd_fn_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c
new file mode 100644
index 00000000..66fe00ab
--- /dev/null
+++ b/src/vpp/app/vppctl.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#include <termios.h>
+#include <unistd.h>
+#include <string.h>
+
+#define DEBUG 0
+
+#if DEBUG
+#define TELCMDS
+#define TELOPTS
+#endif
+
+#include <arpa/telnet.h>
+
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/socket.h>
+
+#define SOCKET_FILE "/run/vpp/cli.sock"
+
+volatile int window_resized = 0;
+struct termios orig_tio;
+
+static void
+send_ttype (clib_socket_t * s, int is_interactive)
+{
+ char *term;
+
+ term = is_interactive ? getenv ("TERM") : "vppctl";
+ if (term == NULL)
+ term = "dumb";
+
+ clib_socket_tx_add_formatted (s, "%c%c%c" "%c%s" "%c%c",
+ IAC, SB, TELOPT_TTYPE, 0, term, IAC, SE);
+ clib_socket_tx (s);
+}
+
+static void
+send_naws (clib_socket_t * s)
+{
+ struct winsize ws;
+
+ if (ioctl (STDIN_FILENO, TIOCGWINSZ, &ws) < 0)
+ {
+ clib_unix_warning ("ioctl(TIOCGWINSZ)");
+ return;
+ }
+
+ clib_socket_tx_add_formatted (s, "%c%c%c" "%c%c%c%c" "%c%c",
+ IAC, SB, TELOPT_NAWS,
+ ws.ws_col >> 8, ws.ws_col & 0xff,
+ ws.ws_row >> 8, ws.ws_row & 0xff, IAC, SE);
+ clib_socket_tx (s);
+}
+
+static void
+signal_handler_winch (int signum)
+{
+ window_resized = 1;
+}
+
+static void
+signal_handler_term (int signum)
+{
+ tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio);
+}
+
+static u8 *
+process_input (u8 * str, clib_socket_t * s, int is_interactive,
+ int *sent_ttype)
+{
+ int i = 0;
+
+ while (i < vec_len (s->rx_buffer))
+ {
+ if (s->rx_buffer[i] == IAC)
+ {
+ if (s->rx_buffer[i + 1] == SB)
+ {
+ u8 *sb = 0;
+ char opt = s->rx_buffer[i + 2];
+ i += 3;
+ while (s->rx_buffer[i] != IAC)
+ vec_add1 (sb, s->rx_buffer[i++]);
+
+#if DEBUG
+ clib_warning ("SB %s\n %U", TELOPT (opt),
+ format_hexdump, sb, vec_len (sb));
+#endif
+ vec_free (sb);
+ i += 2;
+ if (opt == TELOPT_TTYPE)
+ {
+ send_ttype (s, is_interactive);
+ *sent_ttype = 1;
+ }
+ else if (is_interactive && opt == TELOPT_NAWS)
+ send_naws (s);
+ }
+ else
+ {
+#if DEBUG
+ clib_warning ("IAC at %d, IAC %s %s", i,
+ TELCMD (s->rx_buffer[i + 1]),
+ TELOPT (s->rx_buffer[i + 2]));
+#endif
+ i += 3;
+ }
+ }
+ else
+ vec_add1 (str, s->rx_buffer[i++]);
+ }
+ vec_reset_length (s->rx_buffer);
+ return str;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ clib_socket_t _s = { 0 }, *s = &_s;
+ clib_error_t *error = 0;
+ struct epoll_event event;
+ struct sigaction sa;
+ struct termios tio;
+ int efd = -1;
+ u8 *str = 0;
+ u8 *cmd = 0;
+ int do_quit = 0;
+ int is_interactive = 0;
+ int acked = 1; /* counts messages from VPP; starts at 1 */
+ int sent_ttype = 0;
+
+
+ clib_mem_init (0, 64ULL << 10);
+
+ /* process command line */
+ argc--;
+ argv++;
+
+ if (argc > 1 && strcmp (argv[0], "-s") == 0)
+ {
+ s->config = argv[1];
+ argc -= 2;
+ argv += 2;
+ }
+ else
+ s->config = SOCKET_FILE;
+
+ while (argc--)
+ cmd = format (cmd, "%s%c", (argv++)[0], argc ? ' ' : 0);
+
+ s->flags = CLIB_SOCKET_F_IS_CLIENT;
+
+ error = clib_socket_init (s);
+ if (error)
+ goto done;
+
+ is_interactive = isatty (STDIN_FILENO) && cmd == 0;
+
+ if (is_interactive)
+ {
+ /* Capture terminal resize events */
+ memset (&sa, 0, sizeof (struct sigaction));
+ sa.sa_handler = signal_handler_winch;
+ if (sigaction (SIGWINCH, &sa, 0) < 0)
+ {
+ error = clib_error_return_unix (0, "sigaction");
+ goto done;
+ }
+
+ /* Capture SIGTERM to reset tty settings */
+ sa.sa_handler = signal_handler_term;
+ if (sigaction (SIGTERM, &sa, 0) < 0)
+ {
+ error = clib_error_return_unix (0, "sigaction");
+ goto done;
+ }
+
+ /* Save the original tty state so we can restore it later */
+ if (tcgetattr (STDIN_FILENO, &orig_tio) < 0)
+ {
+ error = clib_error_return_unix (0, "tcgetattr");
+ goto done;
+ }
+
+ /* Tweak the tty settings */
+ tio = orig_tio;
+ /* echo off, canonical mode off, ext'd input processing off */
+ tio.c_lflag &= ~(ECHO | ICANON | IEXTEN);
+ tio.c_cc[VMIN] = 1; /* 1 byte at a time */
+ tio.c_cc[VTIME] = 0; /* no timer */
+
+ if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio) < 0)
+ {
+ error = clib_error_return_unix (0, "tcsetattr");
+ goto done;
+ }
+ }
+
+ efd = epoll_create1 (0);
+
+ /* register STDIN */
+ event.events = EPOLLIN | EPOLLPRI | EPOLLERR;
+ event.data.fd = STDIN_FILENO;
+ if (epoll_ctl (efd, EPOLL_CTL_ADD, STDIN_FILENO, &event) != 0)
+ {
+ /* ignore EPERM; it means stdin is something like /dev/null */
+ if (errno != EPERM)
+ {
+ error = clib_error_return_unix (0, "epoll_ctl[%d]", STDIN_FILENO);
+ goto done;
+ }
+ }
+
+ /* register socket */
+ event.events = EPOLLIN | EPOLLPRI | EPOLLERR;
+ event.data.fd = s->fd;
+ if (epoll_ctl (efd, EPOLL_CTL_ADD, s->fd, &event) != 0)
+ {
+ error = clib_error_return_unix (0, "epoll_ctl[%d]", s->fd);
+ goto done;
+ }
+
+ while (1)
+ {
+ int n;
+
+ if (window_resized)
+ {
+ window_resized = 0;
+ send_naws (s);
+ }
+
+ if ((n = epoll_wait (efd, &event, 1, -1)) < 0)
+ {
+ /* maybe we received signal */
+ if (errno == EINTR)
+ continue;
+
+ error = clib_error_return_unix (0, "epoll_wait");
+ goto done;
+ }
+
+ if (n == 0)
+ continue;
+
+ if (event.data.fd == STDIN_FILENO)
+ {
+ int n;
+ char c[100];
+
+ if (!sent_ttype)
+ continue; /* not ready for this yet */
+
+ n = read (STDIN_FILENO, c, sizeof (c));
+ if (n > 0)
+ {
+ memcpy (clib_socket_tx_add (s, n), c, n);
+ error = clib_socket_tx (s);
+ if (error)
+ goto done;
+ }
+ else if (n < 0)
+ clib_warning ("read rv=%d", n);
+ else /* EOF */
+ do_quit = 1;
+ }
+ else if (event.data.fd == s->fd)
+ {
+ error = clib_socket_rx (s, 100);
+ if (error)
+ break;
+
+ if (clib_socket_rx_end_of_file (s))
+ break;
+
+ str = process_input (str, s, is_interactive, &sent_ttype);
+
+ if (vec_len (str) > 0)
+ {
+ int len = vec_len (str);
+ u8 *p = str, *q = str;
+
+ while (len)
+ {
+ /* Search for and skip NUL bytes */
+ while (q < (p + len) && *q)
+ q++;
+
+ n = write (STDOUT_FILENO, p, q - p);
+ if (n < 0)
+ {
+ error = clib_error_return_unix (0, "write");
+ goto done;
+ }
+
+ while (q < (p + len) && !*q)
+ {
+ q++;
+ acked++; /* every NUL is an acknowledgement */
+ }
+ len -= q - p;
+ p = q;
+ }
+
+ vec_reset_length (str);
+ }
+
+ if (do_quit && do_quit < acked)
+ {
+ /* Ask the other end to close the connection */
+ clib_socket_tx_add_formatted (s, "quit\n");
+ clib_socket_tx (s);
+ do_quit = 0;
+ }
+ if (cmd && sent_ttype)
+ {
+ /* We wait until after the TELNET TTYPE option has been sent.
+ * That is to make sure the session at the VPP end has switched
+ * to line-by-line mode, and thus avoid prompts and echoing.
+ * Note that it does also disable further TELNET option processing.
+ */
+ clib_socket_tx_add_formatted (s, "%s\n", cmd);
+ clib_socket_tx (s);
+ vec_free (cmd);
+ do_quit = acked; /* quit after the next response */
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown fd");
+ goto done;
+ }
+ }
+
+ error = clib_socket_close (s);
+
+done:
+ vec_free (cmd);
+ vec_free (str);
+ if (efd > -1)
+ close (efd);
+
+ if (is_interactive)
+ tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/conf/80-vpp.conf b/src/vpp/conf/80-vpp.conf
new file mode 100644
index 00000000..8fdf184c
--- /dev/null
+++ b/src/vpp/conf/80-vpp.conf
@@ -0,0 +1,15 @@
+# Number of 2MB hugepages desired
+vm.nr_hugepages=1024
+
+# Must be greater than or equal to (2 * vm.nr_hugepages).
+vm.max_map_count=3096
+
+# All groups allowed to access hugepages
+vm.hugetlb_shm_group=0
+
+# Shared Memory Max must be greator or equal to the total size of hugepages.
+# For 2MB pages, TotalHugepageSize = vm.nr_hugepages * 2 * 1024 * 1024
+# If the existing kernel.shmmax setting (cat /sys/proc/kernel/shmmax)
+# is greater than the calculated TotalHugepageSize then set this parameter
+# to current shmmax value.
+kernel.shmmax=2147483648
diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf
new file mode 100644
index 00000000..c3b9872e
--- /dev/null
+++ b/src/vpp/conf/startup.conf
@@ -0,0 +1,133 @@
+
+unix {
+ nodaemon
+ log /tmp/vpp.log
+ full-coredump
+ cli-listen /run/vpp/cli.sock
+ gid vpp
+}
+
+api-trace {
+## This stanza controls binary API tracing. Unless there is a very strong reason,
+## please leave this feature enabled.
+ on
+## Additional parameters:
+##
+## To set the number of binary API trace records in the circular buffer, configure nitems
+##
+## nitems <nnn>
+##
+## To save the api message table decode tables, configure a filename. Results in /tmp/<filename>
+## Very handy for understanding api message changes between versions, identifying missing
+## plugins, and so forth.
+##
+## save-api-table <filename>
+}
+
+api-segment {
+ gid vpp
+}
+
+cpu {
+ ## In the VPP there is one main thread and optionally the user can create worker(s)
+ ## The main thread and worker thread(s) can be pinned to CPU core(s) manually or automatically
+
+ ## Manual pinning of thread(s) to CPU core(s)
+
+ ## Set logical CPU core where main thread runs
+ # main-core 1
+
+ ## Set logical CPU core(s) where worker threads are running
+ # corelist-workers 2-3,18-19
+
+ ## Automatic pinning of thread(s) to CPU core(s)
+
+ ## Sets number of CPU core(s) to be skipped (1 ... N-1)
+ ## Skipped CPU core(s) are not used for pinning main thread and working thread(s).
+ ## The main thread is automatically pinned to the first available CPU core and worker(s)
+ ## are pinned to next free CPU core(s) after core assigned to main thread
+ # skip-cores 4
+
+ ## Specify a number of workers to be created
+ ## Workers are pinned to N consecutive CPU cores while skipping "skip-cores" CPU core(s)
+ ## and main thread's CPU core
+ # workers 2
+
+ ## Set scheduling policy and priority of main and worker threads
+
+ ## Scheduling policy options are: other (SCHED_OTHER), batch (SCHED_BATCH)
+ ## idle (SCHED_IDLE), fifo (SCHED_FIFO), rr (SCHED_RR)
+ # scheduler-policy fifo
+
+ ## Scheduling priority is used only for "real-time policies (fifo and rr),
+ ## and has to be in the range of priorities supported for a particular policy
+ # scheduler-priority 50
+}
+
+# dpdk {
+ ## Change default settings for all intefaces
+ # dev default {
+ ## Number of receive queues, enables RSS
+ ## Default is 1
+ # num-rx-queues 3
+
+ ## Number of transmit queues, Default is equal
+ ## to number of worker threads or 1 if no workers treads
+ # num-tx-queues 3
+
+ ## Number of descriptors in transmit and receive rings
+ ## increasing or reducing number can impact performance
+ ## Default is 1024 for both rx and tx
+ # num-rx-desc 512
+ # num-tx-desc 512
+
+ ## VLAN strip offload mode for interface
+ ## Default is off
+ # vlan-strip-offload on
+ # }
+
+ ## Whitelist specific interface by specifying PCI address
+ # dev 0000:02:00.0
+
+ ## Whitelist specific interface by specifying PCI address and in
+ ## addition specify custom parameters for this interface
+ # dev 0000:02:00.1 {
+ # num-rx-queues 2
+ # }
+
+ ## Specify bonded interface and its slaves via PCI addresses
+ ##
+ ## Bonded interface in XOR load balance mode (mode 2) with L3 and L4 headers
+ # vdev eth_bond0,mode=2,slave=0000:02:00.0,slave=0000:03:00.0,xmit_policy=l34
+ # vdev eth_bond1,mode=2,slave=0000:02:00.1,slave=0000:03:00.1,xmit_policy=l34
+ ##
+ ## Bonded interface in Active-Back up mode (mode 1)
+ # vdev eth_bond0,mode=1,slave=0000:02:00.0,slave=0000:03:00.0
+ # vdev eth_bond1,mode=1,slave=0000:02:00.1,slave=0000:03:00.1
+
+ ## Change UIO driver used by VPP, Options are: igb_uio, vfio-pci
+ ## and uio_pci_generic (default)
+ # uio-driver vfio-pci
+
+ ## Disable mutli-segment buffers, improves performance but
+ ## disables Jumbo MTU support
+ # no-multi-seg
+
+ ## Increase number of buffers allocated, needed only in scenarios with
+ ## large number of interfaces and worker threads. Value is per CPU socket.
+ ## Default is 16384
+ # num-mbufs 128000
+
+ ## Change hugepages allocation per-socket, needed only if there is need for
+ ## larger number of mbufs. Default is 256M on each detected CPU socket
+ # socket-mem 2048,2048
+# }
+
+# Adjusting the plugin path depending on where the VPP plugins are:
+#plugins
+#{
+# path /home/bms/vpp/build-root/install-vpp-native/vpp/lib64/vpp_plugins
+#}
+
+# Alternate syntax to choose plugin path
+#plugin_path /home/bms/vpp/build-root/install-vpp-native/vpp/lib64/vpp_plugins
diff --git a/src/vpp/oam/oam.c b/src/vpp/oam/oam.c
new file mode 100644
index 00000000..ef061207
--- /dev/null
+++ b/src/vpp/oam/oam.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vpp/oam/oam.h>
+
+oam_main_t oam_main;
+
+static vlib_node_registration_t oam_node;
+
+static void
+init_oam_packet_template (oam_main_t * om, oam_target_t * t)
+{
+ oam_template_t *h;
+ int i;
+ ip_csum_t sum;
+ u16 csum;
+
+ vec_validate_aligned (t->template, 0, CLIB_CACHE_LINE_BYTES);
+
+ h = t->template;
+ memset (h, 0, sizeof (*h));
+
+ h->ip4.src_address.as_u32 = t->src_address.as_u32;
+ h->ip4.dst_address.as_u32 = t->dst_address.as_u32;
+ h->ip4.ip_version_and_header_length = 0x45;
+ h->ip4.length = clib_host_to_net_u16 (sizeof (*h));
+ h->ip4.ttl = 64; /* as in linux */
+ h->ip4.protocol = IP_PROTOCOL_ICMP;
+ h->ip4.checksum = ip4_header_checksum (&h->ip4);
+
+ /*
+ * Template has seq = 0. Each time we send one of these puppies,
+ * change the sequence number and fix the execrated checksum
+ */
+ h->icmp.type = ICMP4_echo_request;
+ h->id = clib_host_to_net_u16 (t->id);
+
+ for (i = 0; i < ARRAY_LEN (h->data); i++)
+ h->data[i] = 'A' + i;
+
+ sum = ip_incremental_checksum (0, &h->icmp,
+ sizeof (h->icmp) + sizeof (h->id) +
+ sizeof (h->seq) + sizeof (h->data));
+ csum = ~ip_csum_fold (sum);
+ h->icmp.checksum = csum;
+}
+
+int
+vpe_oam_add_del_target (ip4_address_t * src_address,
+ ip4_address_t * dst_address, u32 fib_id, int is_add)
+{
+ u64 key;
+ uword *p;
+ oam_main_t *om = &oam_main;
+ oam_target_t *t;
+ ip4_main_t *im = &ip4_main;
+ u32 fib_index;
+
+ /* Make sure the FIB actually exists */
+ p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_index = p[0];
+
+ key = ((u64) fib_index << 32) | (dst_address->as_u32);
+ p = hash_get (om->target_by_address_and_fib_id, key);
+
+ if (is_add)
+ {
+ if (p)
+ return VNET_API_ERROR_INVALID_REGISTRATION; /* already there... */
+
+ pool_get (om->targets, t);
+ memset (t, 0, sizeof (*t));
+ t->src_address.as_u32 = src_address->as_u32;
+ t->dst_address.as_u32 = dst_address->as_u32;
+ t->fib_id = fib_id;
+ t->fib_index = fib_index;
+ t->state = OAM_STATE_DEAD;
+ t->last_heard_time = vlib_time_now (om->vlib_main);
+ t->last_heard_seq = (u16) ~ om->misses_allowed;
+ t->id = (u16) random_u32 (&om->random_seed);
+ t->seq = 1;
+ init_oam_packet_template (om, t);
+ hash_set (om->target_by_address_and_fib_id, key, t - om->targets);
+ }
+ else
+ {
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY; /* no such oam target */
+ t = pool_elt_at_index (om->targets, p[0]);
+ vec_free (t->template);
+ hash_unset (om->target_by_address_and_fib_id, key);
+ pool_put (om->targets, t);
+ }
+ return 0;
+}
+
+static clib_error_t *
+oam_add_del_target_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_add = -1;
+ ip4_address_t src_address;
+ int src_set = 0;
+ ip4_address_t dst_address;
+ int dst_set = 0;
+ u32 fib_id = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src_address))
+ src_set = 1;
+ else if (unformat (input, "dst %U", unformat_ip4_address, &dst_address))
+ dst_set = 1;
+ else if (unformat (input, "fib %d", &fib_id))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_add == -1)
+ return clib_error_return (0, "missing add / del qualifier");
+ if (src_set == 0)
+ return clib_error_return (0, "src address not set");
+ if (dst_set == 0)
+ return clib_error_return (0, "dst address not set");
+
+ (void) vpe_oam_add_del_target (&src_address, &dst_address, fib_id, is_add);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (oam_add_del_target_command, static) = {
+ .path = "oam",
+ .short_help = "oam [add|del] target <ip4-address> fib <fib-id>",
+ .function = oam_add_del_target_command_fn,
+};
+/* *INDENT-ON* */
+
+static uword
+oam_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f_arg)
+{
+ oam_main_t *om = &oam_main;
+ uword *event_data = 0;
+ oam_target_t *t;
+ oam_template_t *h0;
+ u32 bi0;
+ u16 new_seq;
+ ip_csum_t sum0;
+ vlib_frame_t *f;
+ u32 *to_next, *from;
+ u32 ip4_lookup_node_index;
+ vlib_node_t *ip4_lookup_node;
+ vlib_buffer_t *b0;
+ static u32 *buffers;
+ oam_template_copy_t *copy_src, *copy_dst;
+ void send_oam_event (oam_target_t * t);
+ u32 nalloc;
+
+ /* Enqueue pkts to ip4-lookup */
+ ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
+ ip4_lookup_node_index = ip4_lookup_node->index;
+
+ while (1)
+ {
+ /* Only timeout events at the moment */
+ vlib_process_wait_for_event_or_clock (vm, om->interval);
+ vec_reset_length (event_data);
+
+ if (pool_elts (om->targets) == 0)
+ continue;
+
+ if (vec_len (buffers) < pool_elts (om->targets))
+ vec_validate (buffers, pool_elts (om->targets) - 1);
+
+ nalloc = vlib_buffer_alloc (vm, buffers, pool_elts (om->targets));
+ if (nalloc < pool_elts (om->targets))
+ {
+ vlib_buffer_free (vm, buffers, nalloc);
+ continue;
+ }
+
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node_index);
+ f->n_vectors = 0;
+ to_next = vlib_frame_vector_args (f);
+ from = buffers;
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, om->targets,
+ ({
+ /* State transition announcement... */
+ if ((t->seq - t->last_heard_seq) >= om->misses_allowed)
+ {
+ if (t->state == OAM_STATE_ALIVE)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning ("oam target %U now DEAD",
+ format_ip4_address, &t->dst_address);
+ t->state = OAM_STATE_DEAD;
+ send_oam_event (t);
+ }
+ }
+ else
+ {
+ if (t->state == OAM_STATE_DEAD)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning ("oam target %U now ALIVE",
+ format_ip4_address, &t->dst_address);
+ t->state = OAM_STATE_ALIVE;
+ send_oam_event (t);
+ }
+ }
+
+ /* Send a new icmp */
+ t->seq++;
+ new_seq = clib_host_to_net_u16 (t->seq);
+
+ bi0 = from[0];
+ from++;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index [VLIB_TX] = t->fib_index;
+
+ /* Marginally faster than memcpy, probably */
+ copy_dst = (oam_template_copy_t *) b0->data;
+ copy_src = (oam_template_copy_t *) t->template;
+
+ copy_dst->v8[0] = copy_src->v8[0];
+ copy_dst->v8[1] = copy_src->v8[1];
+ copy_dst->v8[2] = copy_src->v8[2];
+ copy_dst->v8[3] = copy_src->v8[3];
+ copy_dst->v4 = copy_src->v4;
+
+ b0->current_data = 0;
+ b0->current_length = sizeof (*t->template);
+ h0 = vlib_buffer_get_current (b0);
+
+ sum0 = h0->icmp.checksum;
+ sum0 = ip_csum_update(sum0, 0 /* old seq */,
+ new_seq, oam_template_t, seq);
+ h0->seq = new_seq;
+ h0->icmp.checksum = ip_csum_fold (sum0);
+
+ to_next[0] = bi0;
+ to_next++;
+ f->n_vectors++;
+ if (f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ clib_warning ("Too many OAM clients...");
+ goto out;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ out:
+ vlib_put_frame_to_node (vm, ip4_lookup_node_index, f);
+ }
+ return 0; /* not so much */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (oam_process_node,static) = {
+ .function = oam_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vpe-oam-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+oam_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ oam_main_t *om = &oam_main;
+ f64 interval;
+ u32 misses_allowed;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "interval %f", &interval))
+ om->interval = interval;
+ else if (unformat (input, "misses-allowed %d", &misses_allowed))
+ om->interval = misses_allowed;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (oam_config, "oam");
+
+static clib_error_t *
+oam_init (vlib_main_t * vm)
+{
+ oam_main_t *om = &oam_main;
+
+ om->vlib_main = vm;
+ om->vnet_main = vnet_get_main ();
+ om->interval = 2.04;
+ om->misses_allowed = 3;
+ om->random_seed = (u32) (vlib_time_now (vm) * 1e6);
+ om->target_by_address_and_fib_id = hash_create (0, sizeof (uword));
+ om->icmp_id = random_u32 (&om->random_seed);
+
+ ip4_icmp_register_type (vm, ICMP4_echo_reply, oam_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (oam_init);
+
+static u8 *
+format_oam_target (u8 * s, va_list * args)
+{
+ oam_target_t *t = va_arg (*args, oam_target_t *);
+ int verbose = va_arg (*args, int);
+
+ if (t == 0)
+ return format (s, "%=6s%=14s%=14s%=12s%=10s",
+ "Fib", "Src", "Dst", "Last Heard", "State");
+
+ s = format (s, "%=6d%=14U%=14U%=12.2f%=10s",
+ t->fib_id,
+ format_ip4_address, &t->src_address,
+ format_ip4_address, &t->dst_address,
+ t->last_heard_time,
+ (t->state == OAM_STATE_ALIVE) ? "alive" : "dead");
+ if (verbose)
+ s = format (s, " seq %d last_heard_seq %d", t->seq, t->last_heard_seq);
+
+ return s;
+}
+
+static clib_error_t *
+show_oam_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ oam_main_t *om = &oam_main;
+ oam_target_t *t;
+ int verbose = 0;
+
+ if (unformat (input, "verbose") || unformat (input, "v"))
+ verbose = 1;
+
+ /* print header */
+ vlib_cli_output (vm, "%U", format_oam_target, 0, verbose);
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, om->targets,
+ ({
+ vlib_cli_output (vm, "%U", format_oam_target, t, verbose);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_oam_command, static) = {
+ .path = "show oam",
+ .short_help = "show oam",
+ .function = show_oam_command_fn,
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u32 target_pool_index;
+ ip4_address_t address;
+} oam_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_swap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ oam_trace_t *t = va_arg (*args, oam_trace_t *);
+
+ s = format (s, "OAM: rx from address %U, target index %d",
+ format_ip4_address, &t->address, t->target_pool_index);
+ return s;
+}
+
+
+#define foreach_oam_error \
+_(PROCESSED, "vpe icmp4 oam replies processed") \
+_(DROPPED, "icmp4 replies dropped (no registration)")
+
+typedef enum
+{
+#define _(sym,str) OAM_ERROR_##sym,
+ foreach_oam_error
+#undef _
+ OAM_N_ERROR,
+} oam_error_t;
+
+static char *oam_error_strings[] = {
+#define _(sym,string) string,
+ foreach_oam_error
+#undef _
+};
+
+/*
+ * To drop a pkt and increment one of the previous counters:
+ *
+ * set b0->error = error_node->errors[OAM_ERROR_EXAMPLE];
+ * set next0 to a disposition index bound to "error-drop".
+ *
+ * To manually increment the specific counter OAM_ERROR_EXAMPLE:
+ *
+ * vlib_node_t *n = vlib_get_node (vm, oam.index);
+ * u32 node_counter_base_index = n->error_heap_index;
+ * vlib_error_main_t * em = &vm->error_main;
+ * em->counters[node_counter_base_index + OAM_ERROR_EXAMPLE] += 1;
+ *
+ */
+
+typedef enum
+{
+ OAM_NEXT_DROP,
+ OAM_NEXT_PUNT,
+ OAM_N_NEXT,
+} oam_next_t;
+
+static uword
+oam_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ oam_next_t next_index;
+ oam_main_t *om = &oam_main;
+ u32 next0 = OAM_NEXT_DROP; /* all pkts go to the hopper... */
+ u32 next1 = OAM_NEXT_DROP;
+ uword *u0, *u1;
+ oam_template_t *oam0, *oam1;
+ u32 fib_index0, fib_index1;
+ u64 key0, key1;
+ oam_target_t *t0, *t1;
+ ip4_main_t *im = &ip4_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ oam0 = vlib_buffer_get_current (b0);
+ oam1 = vlib_buffer_get_current (b1);
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
+
+ key0 = ((u64) fib_index0 << 32) | oam0->ip4.src_address.as_u32;
+ u0 = hash_get (om->target_by_address_and_fib_id, key0);
+ if (u0)
+ {
+ t0 = pool_elt_at_index (om->targets, u0[0]);
+ t0->last_heard_time = vlib_time_now (vm);
+ t0->last_heard_seq = clib_net_to_host_u16 (oam0->seq);
+ b0->error = node->errors[OAM_ERROR_PROCESSED];
+ }
+ else
+ b0->error = node->errors[OAM_ERROR_DROPPED];
+
+ key1 = ((u64) fib_index1 << 32) | oam1->ip4.src_address.as_u32;
+ u1 = hash_get (om->target_by_address_and_fib_id, key1);
+ if (u1)
+ {
+ t1 = pool_elt_at_index (om->targets, u1[0]);
+ t1->last_heard_time = vlib_time_now (vm);
+ t1->last_heard_seq = clib_net_to_host_u16 (oam1->seq);
+ b1->error = node->errors[OAM_ERROR_PROCESSED];
+ }
+ else
+ b1->error = node->errors[OAM_ERROR_DROPPED];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ oam_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->target_pool_index = u0 ? u0[0] : (u32) ~ 0;
+ t->address.as_u32 = oam0->ip4.src_address.as_u32;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ oam_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->target_pool_index = u1 ? u1[0] : (u32) ~ 0;
+ t->address.as_u32 = oam1->ip4.src_address.as_u32;
+
+ }
+ }
+
+ if (vm->os_punt_frame)
+ next0 = next1 = OAM_NEXT_PUNT;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, sw_if_index0;
+ vlib_buffer_t *b0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ oam0 = vlib_buffer_get_current (b0);
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+
+ key0 = ((u64) fib_index0 << 32) | oam0->ip4.src_address.as_u32;
+ u0 = hash_get (om->target_by_address_and_fib_id, key0);
+ if (u0)
+ {
+ t0 = pool_elt_at_index (om->targets, u0[0]);
+ t0->last_heard_time = vlib_time_now (vm);
+ t0->last_heard_seq = clib_net_to_host_u16 (oam0->seq);
+ b0->error = node->errors[OAM_ERROR_PROCESSED];
+ }
+ else
+ b0->error = node->errors[OAM_ERROR_DROPPED];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ oam_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->target_pool_index = u0 ? u0[0] : (u32) ~ 0;
+ t->address.as_u32 = oam0->ip4.src_address.as_u32;
+ }
+
+ if (vm->os_punt_frame)
+ next0 = OAM_NEXT_PUNT;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (oam_node,static) = {
+ .function = oam_node_fn,
+ .name = "vpe-icmp4-oam",
+ .vector_size = sizeof (u32),
+ .format_trace = format_swap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(oam_error_strings),
+ .error_strings = oam_error_strings,
+
+ .n_next_nodes = OAM_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [OAM_NEXT_DROP] = "error-drop",
+ [OAM_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/oam/oam.h b/src/vpp/oam/oam.h
new file mode 100644
index 00000000..f6af9788
--- /dev/null
+++ b/src/vpp/oam/oam.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_oam_h__
+#define __included_oam_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/interface.h>
+
+/* 36 octets, make a note of it... */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ icmp46_header_t icmp;
+ u16 id;
+ u16 seq;
+ u8 data[8];
+}) oam_template_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u64 v8[4];
+ u32 v4;
+}) oam_template_copy_t;
+/* *INDENT-ON* */
+
+typedef enum
+{
+ OAM_STATE_UNKNOWN = 0,
+ OAM_STATE_ALIVE,
+ OAM_STATE_DEAD,
+} oam_state_t;
+
+typedef struct
+{
+ ip4_address_t src_address;
+ ip4_address_t dst_address;
+ u32 fib_id;
+ u32 fib_index;
+ f64 last_heard_time;
+ u16 seq;
+ u16 last_heard_seq;
+ u16 id;
+ u8 state;
+ oam_template_t *template;
+} oam_target_t;
+
+typedef struct
+{
+ /* OAM targets */
+ oam_target_t *targets;
+ uword *target_by_address_and_fib_id;
+
+ /* Config parameters */
+ f64 interval;
+ u32 misses_allowed;
+
+ /* random number seed */
+ u32 random_seed;
+ u16 icmp_id;
+
+ /* oam packet template */
+ vlib_packet_template_t packet_template;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} oam_main_t;
+
+int vpe_oam_add_del_target (ip4_address_t * src_address,
+ ip4_address_t * dst_address,
+ u32 fib_id, int is_add);
+
+#endif /* __included_oam_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/stats/stats.api b/src/vpp/stats/stats.api
new file mode 100644
index 00000000..caf8f515
--- /dev/null
+++ b/src/vpp/stats/stats.api
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines the stats API
+*/
+
+
+/** \brief Want Stats, enable/disable ALL stats updates
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+*/
+autoreply define want_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want Interface Simple Stats, register for detailed interface stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+
+ Please consider using want_per_interface_simple_stats with sw_if_index=~0
+*/
+autoreply define want_interface_simple_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want Per Interface simple Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+ @param num - number of sw_if_indexes
+ @param sw_ifs - array of sw_if_index
+*/
+autoreply define want_per_interface_simple_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+ u32 num;
+ u32 sw_ifs[num];
+
+};
+
+/** \brief Want Interface Combined Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+
+ Please consider using want_per_interface_combined_stats with sw_if_index=~0
+
+*/
+autoreply define want_interface_combined_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want Per Interface Combined Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+ @param num - number of sw_if_indexes
+ @param sw_ifs - array of sw_if_index
+*/
+autoreply define want_per_interface_combined_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+ u32 num;
+ u32 sw_ifs[num];
+
+};
+
+/** \brief Want IP4 FIB Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+*/
+autoreply define want_ip4_fib_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want IP6 FIB Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+*/
+autoreply define want_ip6_fib_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want IP4 NBR Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+*/
+autoreply define want_ip4_nbr_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+/** \brief Want IP6 NBR Stats, register for continuous stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 = enable stats, 0 = disable
+ @param pid - pid of process requesting stats updates
+*/
+autoreply define want_ip6_nbr_stats
+{
+ u32 client_index;
+ u32 context;
+ u32 enable_disable;
+ u32 pid;
+};
+
+typeonly manual_print manual_endian define ip4_fib_counter
+{
+ u32 address;
+ u8 address_length;
+ u64 packets;
+ u64 bytes;
+};
+
+manual_print manual_endian define vnet_ip4_fib_counters
+{
+ u32 vrf_id;
+ u32 count;
+ vl_api_ip4_fib_counter_t c[count];
+};
+
+typeonly manual_print manual_endian define ip4_nbr_counter
+{
+ u32 address;
+ u8 link_type;
+ u64 packets;
+ u64 bytes;
+};
+
+/**
+ * @brief Per-neighbour (i.e. per-adjacency) coutners
+ * @param count The size of the array of counters
+ * @param sw_if_index The interface the adjacency is on
+ * @param begin Flag to indicate this is the first set of stats for this
+ * interface. If this flag is not set the it is a continuation of
+ * stats for this interface
+ * @param c counters
+ */
+manual_print manual_endian define vnet_ip4_nbr_counters
+{
+ u32 count;
+ u32 sw_if_index;
+ u8 begin;
+ vl_api_ip4_nbr_counter_t c[count];
+};
+
+typeonly manual_print manual_endian define ip6_fib_counter
+{
+ u64 address[2];
+ u8 address_length;
+ u64 packets;
+ u64 bytes;
+};
+
+manual_print manual_endian define vnet_ip6_fib_counters
+{
+ u32 vrf_id;
+ u32 count;
+ vl_api_ip6_fib_counter_t c[count];
+};
+
+typeonly manual_print manual_endian define ip6_nbr_counter
+{
+ u64 address[2];
+ u8 link_type;
+ u64 packets;
+ u64 bytes;
+};
+
+manual_print manual_endian define vnet_ip6_nbr_counters
+{
+ u32 count;
+ u32 sw_if_index;
+ u8 begin;
+ vl_api_ip6_nbr_counter_t c[count];
+};
+
+
+/** \brief Request for a single block of summary stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define vnet_get_summary_stats
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for vnet_get_summary_stats request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for request
+ @param total_pkts -
+ @param total_bytes -
+ @param vector_rate -
+*/
+define vnet_get_summary_stats_reply
+{
+ u32 context;
+ i32 retval;
+ u64 total_pkts[2];
+ u64 total_bytes[2];
+ f64 vector_rate;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
new file mode 100644
index 00000000..ac364e88
--- /dev/null
+++ b/src/vpp/stats/stats.c
@@ -0,0 +1,2410 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vpp/stats/stats.h>
+#include <signal.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/dpo/load_balance.h>
+
+#define STATS_DEBUG 0
+
+stats_main_t stats_main;
+
+#include <vnet/ip/ip.h>
+
+#include <vpp/api/vpe_msg_enum.h>
+
+#define f64_endian(a)
+#define f64_print(a,b)
+
+#define vl_typedefs /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+#define foreach_stats_msg \
+_(WANT_STATS, want_stats) \
+_(VNET_INTERFACE_SIMPLE_COUNTERS, vnet_interface_simple_counters) \
+_(WANT_INTERFACE_SIMPLE_STATS, want_interface_simple_stats) \
+_(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \
+_(WANT_INTERFACE_COMBINED_STATS, want_interface_combined_stats) \
+_(WANT_PER_INTERFACE_COMBINED_STATS, want_per_interface_combined_stats) \
+_(WANT_PER_INTERFACE_SIMPLE_STATS, want_per_interface_simple_stats) \
+_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \
+_(WANT_IP4_FIB_STATS, want_ip4_fib_stats) \
+_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \
+_(WANT_IP6_FIB_STATS, want_ip6_fib_stats) \
+_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \
+_(WANT_IP4_NBR_STATS, want_ip4_nbr_stats) \
+_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \
+_(WANT_IP6_NBR_STATS, want_ip6_nbr_stats) \
+_(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats)
+
+
+#define vl_msg_name_crc_list
+#include <vpp/stats/stats.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_stats;
+#undef _
+}
+
+/* These constants ensure msg sizes <= 1024, aka ring allocation */
+#define SIMPLE_COUNTER_BATCH_SIZE 126
+#define COMBINED_COUNTER_BATCH_SIZE 63
+#define IP4_FIB_COUNTER_BATCH_SIZE 48
+#define IP6_FIB_COUNTER_BATCH_SIZE 30
+
+/* 5ms */
+#define STATS_RELEASE_DELAY_NS (1000 * 1000 * 5)
+/* ns/us us/ms */
+
+u8 *
+format_vnet_interface_combined_counters (u8 * s, va_list * args)
+{
+ stats_main_t *sm = &stats_main;
+ vl_api_vnet_interface_combined_counters_t *mp =
+ va_arg (*args, vl_api_vnet_interface_combined_counters_t *);
+
+ char *counter_name;
+ u32 count, sw_if_index;
+ int i;
+ count = ntohl (mp->count);
+ sw_if_index = ntohl (mp->first_sw_if_index);
+
+ vlib_counter_t *vp;
+ u64 packets, bytes;
+ vp = (vlib_counter_t *) mp->data;
+
+ switch (mp->vnet_counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_RX:
+ counter_name = "rx";
+ break;
+ case VNET_INTERFACE_COUNTER_TX:
+ counter_name = "tx";
+ break;
+ default:
+ counter_name = "bogus";
+ break;
+ }
+ for (i = 0; i < count; i++)
+ {
+ packets = clib_mem_unaligned (&vp->packets, u64);
+ packets = clib_net_to_host_u64 (packets);
+ bytes = clib_mem_unaligned (&vp->bytes, u64);
+ bytes = clib_net_to_host_u64 (bytes);
+ vp++;
+ s = format (s, "%U.%s.packets %lld\n",
+ format_vnet_sw_if_index_name,
+ sm->vnet_main, sw_if_index, counter_name, packets);
+ s = format (s, "%U.%s.bytes %lld\n",
+ format_vnet_sw_if_index_name,
+ sm->vnet_main, sw_if_index, counter_name, bytes);
+ sw_if_index++;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_interface_simple_counters (u8 * s, va_list * args)
+{
+ stats_main_t *sm = &stats_main;
+ vl_api_vnet_interface_simple_counters_t *mp =
+ va_arg (*args, vl_api_vnet_interface_simple_counters_t *);
+ char *counter_name;
+ u32 count, sw_if_index;
+ count = ntohl (mp->count);
+ sw_if_index = ntohl (mp->first_sw_if_index);
+ u64 *vp, v;
+ vp = (u64 *) mp->data;
+ int i;
+
+ switch (mp->vnet_counter_type)
+ {
+ case VNET_INTERFACE_COUNTER_DROP:
+ counter_name = "drop";
+ break;
+ case VNET_INTERFACE_COUNTER_PUNT:
+ counter_name = "punt";
+ break;
+ case VNET_INTERFACE_COUNTER_IP4:
+ counter_name = "ip4";
+ break;
+ case VNET_INTERFACE_COUNTER_IP6:
+ counter_name = "ip6";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_NO_BUF:
+ counter_name = "rx-no-buff";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_MISS:
+ counter_name = "rx-miss";
+ break;
+ case VNET_INTERFACE_COUNTER_RX_ERROR:
+ counter_name = "rx-error (fifo-full)";
+ break;
+ case VNET_INTERFACE_COUNTER_TX_ERROR:
+ counter_name = "tx-error (fifo-full)";
+ break;
+ default:
+ counter_name = "bogus";
+ break;
+ }
+ for (i = 0; i < count; i++)
+ {
+ v = clib_mem_unaligned (vp, u64);
+ v = clib_net_to_host_u64 (v);
+ vp++;
+ s = format (s, "%U.%s %lld\n", format_vnet_sw_if_index_name,
+ sm->vnet_main, sw_if_index, counter_name, v);
+ sw_if_index++;
+ }
+
+ return s;
+}
+
+void
+dslock (stats_main_t * sm, int release_hint, int tag)
+{
+ u32 thread_index;
+ data_structure_lock_t *l = sm->data_structure_lock;
+
+ if (PREDICT_FALSE (l == 0))
+ return;
+
+ thread_index = vlib_get_thread_index ();
+ if (l->lock && l->thread_index == thread_index)
+ {
+ l->count++;
+ return;
+ }
+
+ if (release_hint)
+ l->release_hint++;
+
+ while (__sync_lock_test_and_set (&l->lock, 1))
+ /* zzzz */ ;
+ l->tag = tag;
+ l->thread_index = thread_index;
+ l->count = 1;
+}
+
+void
+stats_dslock_with_hint (int hint, int tag)
+{
+ stats_main_t *sm = &stats_main;
+ dslock (sm, hint, tag);
+}
+
+void
+dsunlock (stats_main_t * sm)
+{
+ u32 thread_index;
+ data_structure_lock_t *l = sm->data_structure_lock;
+
+ if (PREDICT_FALSE (l == 0))
+ return;
+
+ thread_index = vlib_get_thread_index ();
+ ASSERT (l->lock && l->thread_index == thread_index);
+ l->count--;
+ if (l->count == 0)
+ {
+ l->tag = -l->tag;
+ l->release_hint = 0;
+ CLIB_MEMORY_BARRIER ();
+ l->lock = 0;
+ }
+}
+
+void
+stats_dsunlock (int hint, int tag)
+{
+ stats_main_t *sm = &stats_main;
+ dsunlock (sm);
+}
+
+static vpe_client_registration_t *
+get_client_for_stat (u32 reg, u32 item, u32 client_index)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_stats_registration_t *registration;
+ uword *p;
+
+ /* Is there anything listening for item in that reg */
+ p = hash_get (sm->stats_registration_hash[reg], item);
+
+ if (!p)
+ return 0; // Fail
+
+ /* If there is, is our client_index one of them */
+ registration = pool_elt_at_index (sm->stats_registrations[reg], p[0]);
+ p = hash_get (registration->client_hash, client_index);
+
+ if (!p)
+ return 0; // Fail
+
+ return pool_elt_at_index (registration->clients, p[0]);
+
+}
+
+static int
+set_client_for_stat (u32 reg, u32 item, vpe_client_registration_t * client)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_stats_registration_t *registration;
+ vpe_client_registration_t *cr;
+ uword *p;
+
+ /* Is there anything listening for item in that reg */
+ p = hash_get (sm->stats_registration_hash[reg], item);
+
+ if (!p)
+ {
+ pool_get (sm->stats_registrations[reg], registration);
+ registration->item = item;
+ hash_set (sm->stats_registration_hash[reg], item,
+ registration - sm->stats_registrations[reg]);
+ }
+ else
+ {
+ registration = pool_elt_at_index (sm->stats_registrations[reg], p[0]);
+ }
+
+ p = hash_get (registration->client_hash, client->client_index);
+
+ if (!p)
+ {
+ pool_get (registration->clients, cr);
+ cr->client_index = client->client_index;
+ cr->client_pid = client->client_pid;
+ hash_set (registration->client_hash, cr->client_index,
+ cr - registration->clients);
+ }
+
+ return 1; //At least one client is doing something ... poll
+}
+
+int
+clear_client_for_stat (u32 reg, u32 item, u32 client_index)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_stats_registration_t *registration;
+ vpe_client_registration_t *client;
+ uword *p;
+ int i, elts;
+
+ /* Clear the client first */
+ /* Is there anything listening for item in that reg */
+ p = hash_get (sm->stats_registration_hash[reg], item);
+
+ if (!p)
+ goto exit;
+
+ /* If there is, is our client_index one of them */
+ registration = pool_elt_at_index (sm->stats_registrations[reg], p[0]);
+ p = hash_get (registration->client_hash, client_index);
+
+ if (!p)
+ goto exit;
+
+ client = pool_elt_at_index (registration->clients, p[0]);
+ hash_unset (registration->client_hash, client->client_index);
+ pool_put (registration->clients, client);
+
+ /* Now check if that was the last client for that item */
+ if (0 == pool_elts (registration->clients))
+ {
+ hash_unset (sm->stats_registration_hash[reg], item);
+ pool_put (sm->stats_registrations[reg], registration);
+ }
+
+exit:
+ elts = 0;
+ /* Now check if that was the last item in any of the listened to stats */
+ for (i = 0; i < STATS_REG_N_IDX; i++)
+ {
+ elts += pool_elts (sm->stats_registrations[i]);
+ }
+ return elts;
+}
+
+vpe_client_registration_t *
+get_clients_for_stat (u32 reg, u32 item)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t *client, *clients = 0;
+ vpe_client_stats_registration_t *registration;
+ uword *p;
+
+ /* Is there anything listening for item in that reg */
+ p = hash_get (sm->stats_registration_hash[reg], item);
+
+ if (!p)
+ return 0; // Fail
+
+ /* If there is, is our client_index one of them */
+ registration = pool_elt_at_index (sm->stats_registrations[reg], p[0]);
+
+ vec_reset_length (clients);
+ pool_foreach (client, registration->clients, (
+ {
+ vec_add1 (clients, *client);}
+ ));
+ return clients;
+}
+
+
+static void
+clear_client_reg (u32 ** registrations)
+{
+ /* When registrations[x] is a vector of pool indices
+ here is a good place to clean up the pools
+ */
+#define stats_reg(n) vec_free(registrations[IDX_##n]);
+#include <vpp/stats/stats.reg>
+#undef stats_reg
+
+ vec_free (registrations);
+}
+
+u32 **
+init_client_reg (u32 ** registrations)
+{
+
+ /*
+ Initialise the stats registrations for each
+ type of stat a client can register for as well as
+ a vector of "interested" indexes.
+ Initially this is a u32 of either sw_if_index or fib_index
+ but eventually this should migrate to a pool_index (u32)
+ with a type specific pool that can include more complex things
+ such as timing and structured events.
+ */
+ vec_validate (registrations, STATS_REG_N_IDX);
+#define stats_reg(n) \
+ vec_reset_length(registrations[IDX_##n]);
+#include <vpp/stats/stats.reg>
+#undef stats_reg
+
+ /*
+ When registrations[x] is a vector of pool indices, here
+ is a good place to init the pools.
+ */
+ return registrations;
+}
+
+u32 **
+enable_all_client_reg (u32 ** registrations)
+{
+
+ /*
+ Enable all stats known by adding
+ ~0 to the index vector. Eventually this
+ should be deprecated.
+ */
+#define stats_reg(n) \
+ vec_add1(registrations[IDX_##n], ~0);
+#include <vpp/stats/stats.reg>
+#undef stats_reg
+ return registrations;
+}
+
+static void
+do_simple_interface_counters (stats_main_t * sm)
+{
+ vl_api_vnet_interface_simple_counters_t *mp = 0;
+ vnet_interface_main_t *im = sm->interface_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ vlib_simple_counter_main_t *cm;
+ u32 items_this_message = 0;
+ u64 v, *vp = 0;
+ int i, n_counts;
+
+ /*
+ * Prevent interface registration from expanding / moving the vectors...
+ * That tends never to happen, so we can hold this lock for a while.
+ */
+ vnet_interface_counter_lock (im);
+
+ vec_foreach (cm, im->sw_if_counters)
+ {
+ n_counts = vlib_simple_counter_n_counters (cm);
+ for (i = 0; i < n_counts; i++)
+ {
+ if (mp == 0)
+ {
+ items_this_message = clib_min (SIMPLE_COUNTER_BATCH_SIZE,
+ n_counts - i);
+
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) + items_this_message * sizeof (v));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_INTERFACE_SIMPLE_COUNTERS);
+ mp->vnet_counter_type = cm - im->sw_if_counters;
+ mp->first_sw_if_index = htonl (i);
+ mp->count = 0;
+ vp = (u64 *) mp->data;
+ }
+ v = vlib_get_simple_counter (cm, i);
+ clib_mem_unaligned (vp, u64) = clib_host_to_net_u64 (v);
+ vp++;
+ mp->count++;
+ if (mp->count == items_this_message)
+ {
+ mp->count = htonl (items_this_message);
+ /* Send to the main thread... */
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ mp = 0;
+ }
+ }
+ ASSERT (mp == 0);
+ }
+ vnet_interface_counter_unlock (im);
+}
+
+void
+handle_client_registration (vpe_client_registration_t * client, u32 stat,
+ u32 item, int enable_disable)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t *rp, _rp;
+
+ rp = get_client_for_stat (stat, item, client->client_index);
+
+ /* Disable case */
+ if (enable_disable == 0)
+ {
+ if (!rp) // No client to disable
+ {
+ clib_warning ("pid %d: already disabled for stats...",
+ client->client_pid);
+ return;
+ }
+ sm->enable_poller =
+ clear_client_for_stat (stat, item, client->client_index);
+ return;
+ }
+ /* Enable case */
+ if (!rp)
+ {
+ rp = &_rp;
+ rp->client_index = client->client_index;
+ rp->client_pid = client->client_pid;
+ sm->enable_poller = set_client_for_stat (stat, item, rp);
+ }
+}
+
+
+/**********************************
+ * ALL Interface Combined stats - to be deprecated
+ **********************************/
+
+/*
+ * This API should be deprecated as _per_interface_ works with ~0 as sw_if_index.
+ */
+static void
+ vl_api_want_interface_combined_stats_t_handler
+ (vl_api_want_interface_combined_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_interface_combined_stats_reply_t *rmp;
+ uword *p;
+ i32 retval = 0;
+ unix_shared_memory_queue_t *q;
+ u32 swif;
+
+ swif = ~0; //Using same mechanism as _per_interface_
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+
+ handle_client_registration (&rp, IDX_PER_INTERFACE_COMBINED_COUNTERS, swif,
+ mp->enable_disable);
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_COMBINED_COUNTERS, swif,
+ mp->client_index);
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_COMBINED_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_vnet_interface_combined_counters_t_handler
+ (vl_api_vnet_interface_combined_counters_t * mp)
+{
+ vpe_client_registration_t *clients, client;
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_interface_combined_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ int i;
+
+ mp_size = sizeof (*mp) + (ntohl (mp->count) * sizeof (vlib_counter_t));
+
+ clients =
+ get_clients_for_stat (IDX_PER_INTERFACE_COMBINED_COUNTERS,
+ ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ }
+#if STATS_DEBUG > 0
+ fformat (stdout, "%U\n", format_vnet_combined_counters, mp);
+#endif
+
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+static void
+do_combined_interface_counters (stats_main_t * sm)
+{
+ vl_api_vnet_interface_combined_counters_t *mp = 0;
+ vnet_interface_main_t *im = sm->interface_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ vlib_combined_counter_main_t *cm;
+ u32 items_this_message = 0;
+ vlib_counter_t v, *vp = 0;
+ int i, n_counts;
+
+ vnet_interface_counter_lock (im);
+
+ vec_foreach (cm, im->combined_sw_if_counters)
+ {
+ n_counts = vlib_combined_counter_n_counters (cm);
+ for (i = 0; i < n_counts; i++)
+ {
+ if (mp == 0)
+ {
+ items_this_message = clib_min (COMBINED_COUNTER_BATCH_SIZE,
+ n_counts - i);
+
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) + items_this_message * sizeof (v));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_INTERFACE_COMBINED_COUNTERS);
+ mp->vnet_counter_type = cm - im->combined_sw_if_counters;
+ mp->first_sw_if_index = htonl (i);
+ mp->count = 0;
+ vp = (vlib_counter_t *) mp->data;
+ }
+ vlib_get_combined_counter (cm, i, &v);
+ clib_mem_unaligned (&vp->packets, u64)
+ = clib_host_to_net_u64 (v.packets);
+ clib_mem_unaligned (&vp->bytes, u64) = clib_host_to_net_u64 (v.bytes);
+ vp++;
+ mp->count++;
+ if (mp->count == items_this_message)
+ {
+ mp->count = htonl (items_this_message);
+ /* Send to the main thread... */
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ mp = 0;
+ }
+ }
+ ASSERT (mp == 0);
+ }
+ vnet_interface_counter_unlock (im);
+}
+
+/**********************************
+ * Per Interface Combined stats
+ **********************************/
+
+/* Request from client registering interfaces it wants */
+static void
+ vl_api_want_per_interface_combined_stats_t_handler
+ (vl_api_want_per_interface_combined_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_per_interface_combined_stats_reply_t *rmp;
+ vlib_combined_counter_main_t *cm;
+ uword *p;
+ i32 retval = 0;
+ unix_shared_memory_queue_t *q;
+ int i;
+ u32 swif;
+
+ // Validate we have good sw_if_indexes before registering
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+
+ /* Check its a real sw_if_index that the client is allowed to see */
+ if (swif != ~0)
+ {
+ if (pool_is_free_index (sm->interface_main->sw_interfaces, swif))
+ {
+ retval = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto reply;
+ }
+ }
+ }
+
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+ handle_client_registration (&rp, IDX_PER_INTERFACE_COMBINED_COUNTERS,
+ swif, mp->enable_disable);
+ }
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_COMBINED_COUNTERS, swif,
+ mp->client_index);
+ }
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_PER_INTERFACE_COMBINED_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+/* Per Interface Combined distribution to client */
+static void
+do_combined_per_interface_counters (stats_main_t * sm)
+{
+ vl_api_vnet_per_interface_combined_counters_t *mp = 0;
+ vnet_interface_main_t *im = sm->interface_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = NULL;
+ vlib_combined_counter_main_t *cm;
+ /*
+ * items_this_message will eventually be used to optimise the batching
+ * of per client messages for each stat. For now setting this to 1 then
+ * iterate. This will not affect API.
+ *
+ * FIXME instead of enqueueing here, this should be sent to a batch
+ * storer for per-client transmission. Each "mp" sent would be a single entry
+ * and if a client is listening to other sw_if_indexes for same, it would be
+ * appended to that *mp
+ */
+ u32 items_this_message = 1;
+ vnet_combined_counter_t *vp = 0;
+ vlib_counter_t v;
+ int i, j;
+ u32 timestamp;
+ vpe_client_stats_registration_t *reg;
+ vpe_client_registration_t *client;
+ u32 *sw_if_index = 0;
+
+ /*
+ FIXME(s):
+ - capturing the timestamp of the counters "when VPP knew them" is important.
+ Less so is that the timing of the delivery to the control plane be in the same
+ timescale.
+
+ i.e. As long as the control plane can delta messages from VPP and work out
+ velocity etc based on the timestamp, it can do so in a more "batch mode".
+
+ It would be beneficial to keep a "per-client" message queue, and then
+ batch all the stat messages for a client into one message, with
+ discrete timestamps.
+
+ Given this particular API is for "per interface" one assumes that the scale
+ is less than the ~0 case, which the prior API is suited for.
+ */
+ vnet_interface_counter_lock (im);
+
+ timestamp = vlib_time_now (sm->vlib_main);
+
+ vec_reset_length (sm->regs_tmp);
+ pool_foreach (reg,
+ sm->stats_registrations[IDX_PER_INTERFACE_COMBINED_COUNTERS],
+ (
+ {
+ vec_add1 (sm->regs_tmp, reg);}));
+
+ for (i = 0; i < vec_len (sm->regs_tmp); i++)
+ {
+ reg = sm->regs_tmp[i];
+ if (reg->item == ~0)
+ {
+ vnet_interface_counter_unlock (im);
+ do_combined_interface_counters (sm);
+ vnet_interface_counter_lock (im);
+ continue;
+ }
+ vec_reset_length (sm->clients_tmp);
+ pool_foreach (client, reg->clients, (
+ {
+ vec_add1 (sm->clients_tmp,
+ client);}
+ ));
+
+ //FIXME - should be doing non-variant part of mp here and managing
+ // any alloc per client in that vec_foreach
+ for (j = 0; j < vec_len (sm->clients_tmp); j++)
+ {
+ client = sm->clients_tmp[j];
+ q = vl_api_client_index_to_input_queue (client->client_index);
+
+ //Client may have disconnected abrubtly, clean up so we don't poll nothing.
+ if (!q)
+ {
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_COMBINED_COUNTERS,
+ reg->item, client->client_index);
+ continue;
+ }
+
+ mp = vl_msg_api_alloc (sizeof (*mp) +
+ (items_this_message *
+ (sizeof (*vp) /* rx */ )));
+
+ // FIXME when optimising for items_this_message > 1 need to include a
+ // SIMPLE_INTERFACE_BATCH_SIZE check.
+ mp->_vl_msg_id =
+ ntohs (VL_API_VNET_PER_INTERFACE_COMBINED_COUNTERS);
+
+ mp->count = items_this_message;
+ mp->timestamp = timestamp;
+ vp = (vnet_combined_counter_t *) mp->data;
+
+ vp->sw_if_index = htonl (reg->item);
+
+ cm = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
+ vlib_get_combined_counter (cm, reg->item, &v);
+ clib_mem_unaligned (&vp->rx_packets, u64)
+ = clib_host_to_net_u64 (v.packets);
+ clib_mem_unaligned (&vp->rx_bytes, u64) =
+ clib_host_to_net_u64 (v.bytes);
+
+
+ /* TX vlib_counter_t packets/bytes */
+ cm = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
+ vlib_get_combined_counter (cm, reg->item, &v);
+ clib_mem_unaligned (&vp->tx_packets, u64)
+ = clib_host_to_net_u64 (v.packets);
+ clib_mem_unaligned (&vp->tx_bytes, u64) =
+ clib_host_to_net_u64 (v.bytes);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ }
+ }
+
+ vnet_interface_counter_unlock (im);
+}
+
+/**********************************
+ * Per Interface simple stats
+ **********************************/
+
+/* Request from client registering interfaces it wants */
+static void
+ vl_api_want_per_interface_simple_stats_t_handler
+ (vl_api_want_per_interface_simple_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_per_interface_simple_stats_reply_t *rmp;
+ vlib_simple_counter_main_t *cm;
+ uword *p;
+ i32 retval = 0;
+ unix_shared_memory_queue_t *q;
+ int i;
+ u32 swif;
+
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+
+ /* Check its a real sw_if_index that the client is allowed to see */
+ if (swif != ~0)
+ {
+ if (pool_is_free_index (sm->interface_main->sw_interfaces, swif))
+ {
+ retval = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto reply;
+ }
+ }
+ }
+
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+ handle_client_registration (&rp, IDX_PER_INTERFACE_SIMPLE_COUNTERS,
+ swif, mp->enable_disable);
+ }
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ //Client may have disconnected abrubtly, clean up so we don't poll nothing.
+ if (!q)
+ {
+ for (i = 0; i < mp->num; i++)
+ {
+ swif = mp->sw_ifs[i];
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_SIMPLE_COUNTERS, swif,
+ mp->client_index);
+ }
+
+ return;
+ }
+
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_PER_INTERFACE_SIMPLE_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+/* Per Interface Simple distribution to client */
+static void
+do_simple_per_interface_counters (stats_main_t * sm)
+{
+ vl_api_vnet_per_interface_simple_counters_t *mp = 0;
+ vnet_interface_main_t *im = sm->interface_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = NULL;
+ vlib_simple_counter_main_t *cm;
+ /*
+ * items_this_message will eventually be used to optimise the batching
+ * of per client messages for each stat. For now setting this to 1 then
+ * iterate. This will not affect API.
+ *
+ * FIXME instead of enqueueing here, this should be sent to a batch
+ * storer for per-client transmission. Each "mp" sent would be a single entry
+ * and if a client is listening to other sw_if_indexes for same, it would be
+ * appended to that *mp
+ */
+ u32 items_this_message = 1;
+ int i, j, size;
+ vpe_client_stats_registration_t *reg;
+ vpe_client_registration_t *client;
+ u32 timestamp;
+ u32 count;
+ vnet_simple_counter_t *vp = 0;
+ counter_t v;
+
+ /*
+ FIXME(s):
+ - capturing the timestamp of the counters "when VPP knew them" is important.
+ Less so is that the timing of the delivery to the control plane be in the same
+ timescale.
+
+ i.e. As long as the control plane can delta messages from VPP and work out
+ velocity etc based on the timestamp, it can do so in a more "batch mode".
+
+ It would be beneficial to keep a "per-client" message queue, and then
+ batch all the stat messages for a client into one message, with
+ discrete timestamps.
+
+ Given this particular API is for "per interface" one assumes that the scale
+ is less than the ~0 case, which the prior API is suited for.
+ */
+ vnet_interface_counter_lock (im);
+
+ timestamp = vlib_time_now (sm->vlib_main);
+
+ vec_reset_length (sm->regs_tmp);
+ pool_foreach (reg,
+ sm->stats_registrations[IDX_PER_INTERFACE_SIMPLE_COUNTERS], (
+ {
+ vec_add1
+ (sm->regs_tmp,
+ reg);}));
+
+ for (i = 0; i < vec_len (sm->regs_tmp); i++)
+ {
+ reg = sm->regs_tmp[i];
+ if (reg->item == ~0)
+ {
+ vnet_interface_counter_unlock (im);
+ do_simple_interface_counters (sm);
+ vnet_interface_counter_lock (im);
+ continue;
+ }
+ vec_reset_length (sm->clients_tmp);
+ pool_foreach (client, reg->clients, (
+ {
+ vec_add1 (sm->clients_tmp,
+ client);}
+ ));
+
+ //FIXME - should be doing non-variant part of mp here and managing
+ // any alloc per client in that vec_foreach
+ for (j = 0; j < vec_len (sm->clients_tmp); j++)
+ {
+ client = sm->clients_tmp[j];
+ q = vl_api_client_index_to_input_queue (client->client_index);
+
+ //Client may have disconnected abrubtly, clean up so we don't poll nothing.
+ if (!q)
+ {
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_SIMPLE_COUNTERS,
+ reg->item, client->client_index);
+ continue;
+ }
+
+ size = (sizeof (*mp) + (items_this_message * (sizeof (u64) * 10)));
+ mp = vl_msg_api_alloc (size);
+ // FIXME when optimising for items_this_message > 1 need to include a
+ // SIMPLE_INTERFACE_BATCH_SIZE check.
+ mp->_vl_msg_id = ntohs (VL_API_VNET_PER_INTERFACE_SIMPLE_COUNTERS);
+
+ mp->count = items_this_message;
+ mp->timestamp = timestamp;
+ vp = (vnet_simple_counter_t *) mp->data;
+
+ vp->sw_if_index = htonl (reg->item);
+
+ //FIXME will be simpler with a preprocessor macro
+ // VNET_INTERFACE_COUNTER_DROP
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->drop, u64) = clib_host_to_net_u64 (v);
+
+ // VNET_INTERFACE_COUNTER_PUNT
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_PUNT;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->punt, u64) = clib_host_to_net_u64 (v);
+
+ // VNET_INTERFACE_COUNTER_IP4
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_IP4;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_ip4, u64) = clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_IP6
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_IP6;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_ip6, u64) = clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_RX_NO_BUF
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_RX_NO_BUF;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_no_buffer, u64) =
+ clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_RX_MISS
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_RX_MISS;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_miss, u64) = clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_RX_ERROR
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_RX_ERROR;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_error, u64) = clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_TX_ERROR
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_TX_ERROR;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->tx_error, u64) = clib_host_to_net_u64 (v);
+
+ //VNET_INTERFACE_COUNTER_MPLS
+ cm = im->sw_if_counters + VNET_INTERFACE_COUNTER_MPLS;
+ v = vlib_get_simple_counter (cm, reg->item);
+ clib_mem_unaligned (&vp->rx_mpls, u64) = clib_host_to_net_u64 (v);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ }
+ }
+
+ vnet_interface_counter_unlock (im);
+}
+
+/**********************************
+ * Per FIB IP4 stats
+ **********************************/
+
+static void
+ip46_fib_stats_delay (stats_main_t * sm, u32 sec, u32 nsec)
+{
+ struct timespec _req, *req = &_req;
+ struct timespec _rem, *rem = &_rem;
+
+ req->tv_sec = sec;
+ req->tv_nsec = nsec;
+ while (1)
+ {
+ if (nanosleep (req, rem) == 0)
+ break;
+ *req = *rem;
+ if (errno == EINTR)
+ continue;
+ clib_unix_warning ("nanosleep");
+ break;
+ }
+}
+
+/**
+ * @brief The context passed when collecting adjacency counters
+ */
+typedef struct ip4_nbr_stats_ctx_t_
+{
+ /**
+ * The SW IF index all these adjs belong to
+ */
+ u32 sw_if_index;
+
+ /**
+ * A vector of ip4 nbr counters
+ */
+ vl_api_ip4_nbr_counter_t *counters;
+} ip4_nbr_stats_ctx_t;
+
+static adj_walk_rc_t
+ip4_nbr_stats_cb (adj_index_t ai, void *arg)
+{
+ vl_api_ip4_nbr_counter_t *vl_counter;
+ vlib_counter_t adj_counter;
+ ip4_nbr_stats_ctx_t *ctx;
+ ip_adjacency_t *adj;
+
+ ctx = arg;
+ vlib_get_combined_counter (&adjacency_counters, ai, &adj_counter);
+
+ if (0 != adj_counter.packets)
+ {
+ vec_add2 (ctx->counters, vl_counter, 1);
+ adj = adj_get (ai);
+
+ vl_counter->packets = clib_host_to_net_u64 (adj_counter.packets);
+ vl_counter->bytes = clib_host_to_net_u64 (adj_counter.bytes);
+ vl_counter->address = adj->sub_type.nbr.next_hop.ip4.as_u32;
+ vl_counter->link_type = adj->ia_link;
+ }
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+static void
+ip4_nbr_ship (stats_main_t * sm, ip4_nbr_stats_ctx_t * ctx)
+{
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ vl_api_vnet_ip4_nbr_counters_t *mp = 0;
+ int first = 0;
+
+ /*
+ * If the walk context has counters, which may be left over from the last
+ * suspend, then we continue from there.
+ */
+ while (0 != vec_len (ctx->counters))
+ {
+ u32 n_items = MIN (vec_len (ctx->counters),
+ IP4_FIB_COUNTER_BATCH_SIZE);
+ u8 pause = 0;
+
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) +
+ (n_items *
+ sizeof
+ (vl_api_ip4_nbr_counter_t)));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_NBR_COUNTERS);
+ mp->count = ntohl (n_items);
+ mp->sw_if_index = ntohl (ctx->sw_if_index);
+ mp->begin = first;
+ first = 0;
+
+ /*
+ * copy the counters from the back of the context, then we can easily
+ * 'erase' them by resetting the vector length.
+ * The order we push the stats to the caller is not important.
+ */
+ clib_memcpy (mp->c,
+ &ctx->counters[vec_len (ctx->counters) - n_items],
+ n_items * sizeof (*ctx->counters));
+
+ _vec_len (ctx->counters) = vec_len (ctx->counters) - n_items;
+
+ /*
+ * send to the shm q
+ */
+ unix_shared_memory_queue_lock (q);
+ pause = unix_shared_memory_queue_is_full (q);
+
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+ dsunlock (sm);
+
+ if (pause)
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ }
+}
+
+static void
+do_ip4_nbr_counters (stats_main_t * sm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si;
+
+ ip4_nbr_stats_ctx_t ctx = {
+ .sw_if_index = 0,
+ .counters = NULL,
+ };
+
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ /*
+ * update the interface we are now concerned with
+ */
+ ctx.sw_if_index = si->sw_if_index;
+
+ /*
+ * we are about to walk another interface, so we shouldn't have any pending
+ * stats to export.
+ */
+ ASSERT(ctx.counters == NULL);
+
+ /*
+ * visit each neighbour adjacency on the interface and collect
+ * its current stats.
+ * Because we hold the lock the walk is synchronous, so safe to routing
+ * updates. It's limited in work by the number of adjacenies on an
+ * interface, which is typically not huge.
+ */
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+ adj_nbr_walk (si->sw_if_index,
+ FIB_PROTOCOL_IP4,
+ ip4_nbr_stats_cb,
+ &ctx);
+ dsunlock (sm);
+
+ /*
+ * if this interface has some adjacencies with counters then ship them,
+ * else continue to the next interface.
+ */
+ if (NULL != ctx.counters)
+ {
+ ip4_nbr_ship(sm, &ctx);
+ }
+ }));
+ /* *INDENT-OFF* */
+}
+
+/**
+ * @brief The context passed when collecting adjacency counters
+ */
+typedef struct ip6_nbr_stats_ctx_t_
+{
+ /**
+ * The SW IF index all these adjs belong to
+ */
+ u32 sw_if_index;
+
+ /**
+ * A vector of ip6 nbr counters
+ */
+ vl_api_ip6_nbr_counter_t *counters;
+} ip6_nbr_stats_ctx_t;
+
+static adj_walk_rc_t
+ip6_nbr_stats_cb (adj_index_t ai,
+ void *arg)
+{
+ vl_api_ip6_nbr_counter_t *vl_counter;
+ vlib_counter_t adj_counter;
+ ip6_nbr_stats_ctx_t *ctx;
+ ip_adjacency_t *adj;
+
+ ctx = arg;
+ vlib_get_combined_counter(&adjacency_counters, ai, &adj_counter);
+
+ if (0 != adj_counter.packets)
+ {
+ vec_add2(ctx->counters, vl_counter, 1);
+ adj = adj_get(ai);
+
+ vl_counter->packets = clib_host_to_net_u64(adj_counter.packets);
+ vl_counter->bytes = clib_host_to_net_u64(adj_counter.bytes);
+ vl_counter->address[0] = adj->sub_type.nbr.next_hop.ip6.as_u64[0];
+ vl_counter->address[1] = adj->sub_type.nbr.next_hop.ip6.as_u64[1];
+ vl_counter->link_type = adj->ia_link;
+ }
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+static void
+ip6_nbr_ship (stats_main_t * sm,
+ ip6_nbr_stats_ctx_t *ctx)
+{
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ vl_api_vnet_ip6_nbr_counters_t *mp = 0;
+ int first = 0;
+
+ /*
+ * If the walk context has counters, which may be left over from the last
+ * suspend, then we continue from there.
+ */
+ while (0 != vec_len(ctx->counters))
+ {
+ u32 n_items = MIN (vec_len (ctx->counters),
+ IP6_FIB_COUNTER_BATCH_SIZE);
+ u8 pause = 0;
+
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) +
+ (n_items *
+ sizeof
+ (vl_api_ip6_nbr_counter_t)));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_NBR_COUNTERS);
+ mp->count = ntohl (n_items);
+ mp->sw_if_index = ntohl (ctx->sw_if_index);
+ mp->begin = first;
+ first = 0;
+
+ /*
+ * copy the counters from the back of the context, then we can easily
+ * 'erase' them by resetting the vector length.
+ * The order we push the stats to the caller is not important.
+ */
+ clib_memcpy (mp->c,
+ &ctx->counters[vec_len (ctx->counters) - n_items],
+ n_items * sizeof (*ctx->counters));
+
+ _vec_len (ctx->counters) = vec_len (ctx->counters) - n_items;
+
+ /*
+ * send to the shm q
+ */
+ unix_shared_memory_queue_lock (q);
+ pause = unix_shared_memory_queue_is_full (q);
+
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+ dsunlock (sm);
+
+ if (pause)
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ }
+}
+
+static void
+do_ip6_nbr_counters (stats_main_t * sm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si;
+
+ ip6_nbr_stats_ctx_t ctx = {
+ .sw_if_index = 0,
+ .counters = NULL,
+ };
+
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ /*
+ * update the interface we are now concerned with
+ */
+ ctx.sw_if_index = si->sw_if_index;
+
+ /*
+ * we are about to walk another interface, so we shouldn't have any pending
+ * stats to export.
+ */
+ ASSERT(ctx.counters == NULL);
+
+ /*
+ * visit each neighbour adjacency on the interface and collect
+ * its current stats.
+ * Because we hold the lock the walk is synchronous, so safe to routing
+ * updates. It's limited in work by the number of adjacenies on an
+ * interface, which is typically not huge.
+ */
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+ adj_nbr_walk (si->sw_if_index,
+ FIB_PROTOCOL_IP6,
+ ip6_nbr_stats_cb,
+ &ctx);
+ dsunlock (sm);
+
+ /*
+ * if this interface has some adjacencies with counters then ship them,
+ * else continue to the next interface.
+ */
+ if (NULL != ctx.counters)
+ {
+ ip6_nbr_ship(sm, &ctx);
+ }
+ }));
+ /* *INDENT-OFF* */
+}
+
+static void
+do_ip4_fib_counters (stats_main_t * sm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ ip4_route_t *r;
+ fib_table_t *fib;
+ ip4_fib_t *v4_fib;
+ do_ip46_fibs_t *do_fibs;
+ vl_api_vnet_ip4_fib_counters_t *mp = 0;
+ u32 items_this_message;
+ vl_api_ip4_fib_counter_t *ctrp = 0;
+ u32 start_at_fib_index = 0;
+ int i, j, k;
+
+ do_fibs = &sm->do_ip46_fibs;
+
+again:
+ vec_reset_length (do_fibs->fibs);
+ /* *INDENT-OFF* */
+ pool_foreach (fib, im4->fibs,
+ ({vec_add1(do_fibs->fibs,fib);}));
+
+ /* *INDENT-ON* */
+
+ for (j = 0; j < vec_len (do_fibs->fibs); j++)
+ {
+ fib = do_fibs->fibs[j];
+ /* We may have bailed out due to control-plane activity */
+ while ((fib - im4->fibs) < start_at_fib_index)
+ continue;
+
+ v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index);
+
+ if (mp == 0)
+ {
+ items_this_message = IP4_FIB_COUNTER_BATCH_SIZE;
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) +
+ items_this_message * sizeof (vl_api_ip4_fib_counter_t));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS);
+ mp->count = 0;
+ mp->vrf_id = ntohl (fib->ft_table_id);
+ ctrp = (vl_api_ip4_fib_counter_t *) mp->c;
+ }
+ else
+ {
+ /* happens if the last FIB was empty... */
+ ASSERT (mp->count == 0);
+ mp->vrf_id = ntohl (fib->ft_table_id);
+ }
+
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+
+ vec_reset_length (do_fibs->ip4routes);
+ vec_reset_length (do_fibs->results);
+
+ for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++)
+ {
+ uword *hash = v4_fib->fib_entry_by_dst_address[i];
+ hash_pair_t *p;
+ ip4_route_t x;
+
+ vec_reset_length (do_fibs->pvec);
+
+ x.address_length = i;
+
+ hash_foreach_pair (p, hash, (
+ {
+ vec_add1 (do_fibs->pvec, p);}
+ ));
+ for (k = 0; k < vec_len (do_fibs->pvec); k++)
+ {
+ p = do_fibs->pvec[k];
+ x.address.data_u32 = p->key;
+ x.index = p->value[0];
+
+ vec_add1 (do_fibs->ip4routes, x);
+ if (sm->data_structure_lock->release_hint)
+ {
+ start_at_fib_index = fib - im4->fibs;
+ dsunlock (sm);
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ mp->count = 0;
+ ctrp = (vl_api_ip4_fib_counter_t *) mp->c;
+ goto again;
+ }
+ }
+ }
+
+ vec_foreach (r, do_fibs->ip4routes)
+ {
+ vlib_counter_t c;
+ const dpo_id_t *dpo_id;
+ u32 index;
+
+ dpo_id = fib_entry_contribute_ip_forwarding (r->index);
+ index = (u32) dpo_id->dpoi_index;
+
+ vlib_get_combined_counter (&load_balance_main.lbm_to_counters,
+ index, &c);
+ /*
+ * If it has actually
+ * seen at least one packet, send it.
+ */
+ if (c.packets > 0)
+ {
+
+ /* already in net byte order */
+ ctrp->address = r->address.as_u32;
+ ctrp->address_length = r->address_length;
+ ctrp->packets = clib_host_to_net_u64 (c.packets);
+ ctrp->bytes = clib_host_to_net_u64 (c.bytes);
+ mp->count++;
+ ctrp++;
+
+ if (mp->count == items_this_message)
+ {
+ mp->count = htonl (items_this_message);
+ /*
+ * If the main thread's input queue is stuffed,
+ * drop the data structure lock (which the main thread
+ * may want), and take a pause.
+ */
+ unix_shared_memory_queue_lock (q);
+ if (unix_shared_memory_queue_is_full (q))
+ {
+ dsunlock (sm);
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+ mp = 0;
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ goto again;
+ }
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+
+ items_this_message = IP4_FIB_COUNTER_BATCH_SIZE;
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) +
+ items_this_message * sizeof (vl_api_ip4_fib_counter_t));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS);
+ mp->count = 0;
+ mp->vrf_id = ntohl (fib->ft_table_id);
+ ctrp = (vl_api_ip4_fib_counter_t *) mp->c;
+ }
+ } /* for each (mp or single) adj */
+ if (sm->data_structure_lock->release_hint)
+ {
+ start_at_fib_index = fib - im4->fibs;
+ dsunlock (sm);
+ ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS);
+ mp->count = 0;
+ ctrp = (vl_api_ip4_fib_counter_t *) mp->c;
+ goto again;
+ }
+ } /* vec_foreach (routes) */
+
+ dsunlock (sm);
+
+ /* Flush any data from this fib */
+ if (mp->count)
+ {
+ mp->count = htonl (mp->count);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ mp = 0;
+ }
+ }
+
+ /* If e.g. the last FIB had no reportable routes, free the buffer */
+ if (mp)
+ vl_msg_api_free (mp);
+}
+
+typedef struct
+{
+ u32 fib_index;
+ ip6_route_t **routep;
+ stats_main_t *sm;
+} add_routes_in_fib_arg_t;
+
+static void
+add_routes_in_fib (BVT (clib_bihash_kv) * kvp, void *arg)
+{
+ add_routes_in_fib_arg_t *ap = arg;
+ stats_main_t *sm = ap->sm;
+
+ if (sm->data_structure_lock->release_hint)
+ clib_longjmp (&sm->jmp_buf, 1);
+
+ if (kvp->key[2] >> 32 == ap->fib_index)
+ {
+ ip6_address_t *addr;
+ ip6_route_t *r;
+ addr = (ip6_address_t *) kvp;
+ vec_add2 (*ap->routep, r, 1);
+ r->address = addr[0];
+ r->address_length = kvp->key[2] & 0xFF;
+ r->index = kvp->value;
+ }
+}
+
+static void
+do_ip6_fib_counters (stats_main_t * sm)
+{
+ ip6_main_t *im6 = &ip6_main;
+ api_main_t *am = sm->api_main;
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue;
+ ip6_route_t *r;
+ fib_table_t *fib;
+ do_ip46_fibs_t *do_fibs;
+ vl_api_vnet_ip6_fib_counters_t *mp = 0;
+ u32 items_this_message;
+ vl_api_ip6_fib_counter_t *ctrp = 0;
+ u32 start_at_fib_index = 0;
+ BVT (clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash;
+ add_routes_in_fib_arg_t _a, *a = &_a;
+ int i;
+
+ do_fibs = &sm->do_ip46_fibs;
+again:
+ vec_reset_length (do_fibs->fibs);
+ /* *INDENT-OFF* */
+ pool_foreach (fib, im6->fibs,
+ ({vec_add1(do_fibs->fibs,fib);}));
+ /* *INDENT-ON* */
+
+
+ for (i = 0; i < vec_len (do_fibs->fibs); i++)
+ {
+ fib = do_fibs->fibs[i];
+ /* We may have bailed out due to control-plane activity */
+ while ((fib - im6->fibs) < start_at_fib_index)
+ continue;
+
+ if (mp == 0)
+ {
+ items_this_message = IP6_FIB_COUNTER_BATCH_SIZE;
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) +
+ items_this_message * sizeof (vl_api_ip6_fib_counter_t));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS);
+ mp->count = 0;
+ mp->vrf_id = ntohl (fib->ft_table_id);
+ ctrp = (vl_api_ip6_fib_counter_t *) mp->c;
+ }
+
+ dslock (sm, 0 /* release hint */ , 1 /* tag */ );
+
+ vec_reset_length (do_fibs->ip6routes);
+ vec_reset_length (do_fibs->results);
+
+ a->fib_index = fib - im6->fibs;
+ a->routep = &do_fibs->ip6routes;
+ a->sm = sm;
+
+ if (clib_setjmp (&sm->jmp_buf, 0) == 0)
+ {
+ start_at_fib_index = fib - im6->fibs;
+ BV (clib_bihash_foreach_key_value_pair) (h, add_routes_in_fib, a);
+ }
+ else
+ {
+ dsunlock (sm);
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ mp->count = 0;
+ ctrp = (vl_api_ip6_fib_counter_t *) mp->c;
+ goto again;
+ }
+
+ vec_foreach (r, do_fibs->ip6routes)
+ {
+ vlib_counter_t c;
+
+ vlib_get_combined_counter (&load_balance_main.lbm_to_counters,
+ r->index, &c);
+ /*
+ * If it has actually
+ * seen at least one packet, send it.
+ */
+ if (c.packets > 0)
+ {
+ /* already in net byte order */
+ ctrp->address[0] = r->address.as_u64[0];
+ ctrp->address[1] = r->address.as_u64[1];
+ ctrp->address_length = (u8) r->address_length;
+ ctrp->packets = clib_host_to_net_u64 (c.packets);
+ ctrp->bytes = clib_host_to_net_u64 (c.bytes);
+ mp->count++;
+ ctrp++;
+
+ if (mp->count == items_this_message)
+ {
+ mp->count = htonl (items_this_message);
+ /*
+ * If the main thread's input queue is stuffed,
+ * drop the data structure lock (which the main thread
+ * may want), and take a pause.
+ */
+ unix_shared_memory_queue_lock (q);
+ if (unix_shared_memory_queue_is_full (q))
+ {
+ dsunlock (sm);
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+ mp = 0;
+ ip46_fib_stats_delay (sm, 0 /* sec */ ,
+ STATS_RELEASE_DELAY_NS);
+ goto again;
+ }
+ vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+ unix_shared_memory_queue_unlock (q);
+
+ items_this_message = IP6_FIB_COUNTER_BATCH_SIZE;
+ mp = vl_msg_api_alloc_as_if_client
+ (sizeof (*mp) +
+ items_this_message * sizeof (vl_api_ip6_fib_counter_t));
+ mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS);
+ mp->count = 0;
+ mp->vrf_id = ntohl (fib->ft_table_id);
+ ctrp = (vl_api_ip6_fib_counter_t *) mp->c;
+ }
+ }
+
+ if (sm->data_structure_lock->release_hint)
+ {
+ start_at_fib_index = fib - im6->fibs;
+ dsunlock (sm);
+ ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS);
+ mp->count = 0;
+ ctrp = (vl_api_ip6_fib_counter_t *) mp->c;
+ goto again;
+ }
+ } /* vec_foreach (routes) */
+
+ dsunlock (sm);
+
+ /* Flush any data from this fib */
+ if (mp->count)
+ {
+ mp->count = htonl (mp->count);
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+ mp = 0;
+ }
+ }
+
+ /* If e.g. the last FIB had no reportable routes, free the buffer */
+ if (mp)
+ vl_msg_api_free (mp);
+}
+
+static void
+stats_thread_fn (void *arg)
+{
+ stats_main_t *sm = &stats_main;
+ vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ /* stats thread wants no signals. */
+ {
+ sigset_t s;
+ sigfillset (&s);
+ pthread_sigmask (SIG_SETMASK, &s, 0);
+ }
+
+ if (vec_len (tm->thread_prefix))
+ vlib_set_thread_name ((char *)
+ format (0, "%v_stats%c", tm->thread_prefix, '\0'));
+
+ clib_mem_set_heap (w->thread_mheap);
+
+ while (1)
+ {
+ /* 10 second poll interval */
+ ip46_fib_stats_delay (sm, 10 /* secs */ , 0 /* nsec */ );
+
+ if (!(sm->enable_poller))
+ {
+ continue;
+ }
+ if (pool_elts
+ (sm->stats_registrations[IDX_PER_INTERFACE_COMBINED_COUNTERS]))
+ do_combined_per_interface_counters (sm);
+
+ if (pool_elts
+ (sm->stats_registrations[IDX_PER_INTERFACE_SIMPLE_COUNTERS]))
+ do_simple_per_interface_counters (sm);
+
+ if (pool_elts (sm->stats_registrations[IDX_IP4_FIB_COUNTERS]))
+ do_ip4_fib_counters (sm);
+
+ if (pool_elts (sm->stats_registrations[IDX_IP6_FIB_COUNTERS]))
+ do_ip6_fib_counters (sm);
+
+ if (pool_elts (sm->stats_registrations[IDX_IP4_NBR_COUNTERS]))
+ do_ip4_nbr_counters (sm);
+
+ if (pool_elts (sm->stats_registrations[IDX_IP6_NBR_COUNTERS]))
+ do_ip6_nbr_counters (sm);
+ }
+}
+
+static void
+ vl_api_vnet_interface_simple_counters_t_handler
+ (vl_api_vnet_interface_simple_counters_t * mp)
+{
+ vpe_client_registration_t *clients, client;
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_interface_simple_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ int i;
+
+ mp_size = sizeof (*mp) + (ntohl (mp->count) * sizeof (u64));
+
+ clients =
+ get_clients_for_stat (IDX_PER_INTERFACE_SIMPLE_COUNTERS,
+ ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ else
+ {
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_SIMPLE_COUNTERS, ~0,
+ client.client_index);
+ continue;
+ }
+ }
+
+#if STATS_DEBUG > 0
+ fformat (stdout, "%U\n", format_vnet_simple_counters, mp);
+#endif
+
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+
+
+
+
+static void
+vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_ip4_fib_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ vpe_client_registration_t *clients, client;
+ int i;
+
+ mp_size = sizeof (*mp_copy) +
+ ntohl (mp->count) * sizeof (vl_api_ip4_fib_counter_t);
+
+ clients =
+ get_clients_for_stat (IDX_IP4_FIB_COUNTERS, ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ else
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP4_FIB_COUNTERS,
+ ~0, client.client_index);
+ continue;
+ }
+ }
+
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+static void
+vl_api_vnet_ip4_nbr_counters_t_handler (vl_api_vnet_ip4_nbr_counters_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_ip4_nbr_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ vpe_client_registration_t *clients, client;
+ int i;
+
+ mp_size = sizeof (*mp_copy) +
+ ntohl (mp->count) * sizeof (vl_api_ip4_nbr_counter_t);
+
+ clients =
+ get_clients_for_stat (IDX_IP4_NBR_COUNTERS, ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ else
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP4_NBR_COUNTERS,
+ ~0, client.client_index);
+ continue;
+ }
+ }
+
+ /* *INDENT-ON* */
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+static void
+vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_ip6_fib_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ vpe_client_registration_t *clients, client;
+ int i;
+
+ mp_size = sizeof (*mp_copy) +
+ ntohl (mp->count) * sizeof (vl_api_ip6_fib_counter_t);
+
+ clients =
+ get_clients_for_stat (IDX_IP6_FIB_COUNTERS, ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ else
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP6_FIB_COUNTERS,
+ ~0, client.client_index);
+ continue;
+ }
+ }
+ /* *INDENT-ON* */
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+static void
+vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ unix_shared_memory_queue_t *q, *q_prev = NULL;
+ vl_api_vnet_ip6_nbr_counters_t *mp_copy = NULL;
+ u32 mp_size;
+ vpe_client_registration_t *clients, client;
+ int i;
+
+ mp_size = sizeof (*mp_copy) +
+ ntohl (mp->count) * sizeof (vl_api_ip6_nbr_counter_t);
+
+ clients =
+ get_clients_for_stat (IDX_IP6_NBR_COUNTERS, ~0 /*flag for all */ );
+
+ for (i = 0; i < vec_len (clients); i++)
+ {
+ client = clients[i];
+ q = vl_api_client_index_to_input_queue (client.client_index);
+ if (q)
+ {
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ mp_copy = vl_msg_api_alloc_as_if_client (mp_size);
+ clib_memcpy (mp_copy, mp, mp_size);
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ mp = mp_copy;
+ }
+ q_prev = q;
+ }
+ else
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP6_NBR_COUNTERS,
+ ~0, client.client_index);
+ continue;
+ }
+ }
+ /* *INDENT-ON* */
+ if (q_prev && (q_prev->cursize < q_prev->maxsize))
+ {
+ vl_msg_api_send_shmem (q_prev, (u8 *) & mp);
+ }
+ else
+ {
+ vl_msg_api_free (mp);
+ }
+}
+
+static void
+vl_api_want_stats_t_handler (vl_api_want_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_stats_reply_t *rmp;
+ uword *p;
+ i32 retval = 0;
+ u32 item;
+ unix_shared_memory_queue_t *q;
+
+ item = ~0; //"ALL THE THINGS IN THE THINGS
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+
+ handle_client_registration (&rp, IDX_PER_INTERFACE_SIMPLE_COUNTERS,
+ item, mp->enable_disable);
+
+ handle_client_registration (&rp, IDX_PER_INTERFACE_COMBINED_COUNTERS,
+ item, mp->enable_disable);
+
+ handle_client_registration (&rp, IDX_IP4_FIB_COUNTERS,
+ item, mp->enable_disable);
+
+ handle_client_registration (&rp, IDX_IP4_NBR_COUNTERS,
+ item, mp->enable_disable);
+
+ handle_client_registration (&rp, IDX_IP6_FIB_COUNTERS,
+ item, mp->enable_disable);
+
+ handle_client_registration (&rp, IDX_IP6_NBR_COUNTERS,
+ item, mp->enable_disable);
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+ vl_api_want_interface_simple_stats_t_handler
+ (vl_api_want_interface_simple_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_interface_simple_stats_reply_t *rmp;
+ uword *p;
+ i32 retval = 0;
+ u32 swif;
+ unix_shared_memory_queue_t *q;
+
+ swif = ~0; //Using same mechanism as _per_interface_
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+
+ handle_client_registration (&rp, IDX_PER_INTERFACE_SIMPLE_COUNTERS, swif,
+ mp->enable_disable);
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ sm->enable_poller =
+ clear_client_for_stat (IDX_PER_INTERFACE_SIMPLE_COUNTERS, swif,
+ mp->client_index);
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_SIMPLE_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+
+static void
+vl_api_want_ip4_fib_stats_t_handler (vl_api_want_ip4_fib_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_ip4_fib_stats_reply_t *rmp;
+ uword *p;
+ i32 retval = 0;
+ unix_shared_memory_queue_t *q;
+ u32 fib;
+
+ fib = ~0; //Using same mechanism as _per_interface_
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+
+ handle_client_registration (&rp, IDX_IP4_FIB_COUNTERS, fib,
+ mp->enable_disable);
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP4_FIB_COUNTERS,
+ fib, mp->client_index);
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_IP4_FIB_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_want_ip6_fib_stats_t_handler (vl_api_want_ip6_fib_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vpe_client_registration_t rp;
+ vl_api_want_ip4_fib_stats_reply_t *rmp;
+ uword *p;
+ i32 retval = 0;
+ unix_shared_memory_queue_t *q;
+ u32 fib;
+
+ fib = ~0; //Using same mechanism as _per_interface_
+ rp.client_index = mp->client_index;
+ rp.client_pid = mp->pid;
+
+ handle_client_registration (&rp, IDX_IP6_FIB_COUNTERS, fib,
+ mp->enable_disable);
+
+reply:
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ sm->enable_poller = clear_client_for_stat (IDX_IP6_FIB_COUNTERS,
+ fib, mp->client_index);
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_WANT_IP6_FIB_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = retval;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+/* FIXME - NBR stats broken - this will be fixed in subsequent patch */
+static void
+vl_api_want_ip4_nbr_stats_t_handler (vl_api_want_ip4_nbr_stats_t * mp)
+{
+}
+
+static void
+vl_api_want_ip6_nbr_stats_t_handler (vl_api_want_ip6_nbr_stats_t * mp)
+{
+}
+
+static void
+vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp)
+{
+ stats_main_t *sm = &stats_main;
+ vnet_interface_main_t *im = sm->interface_main;
+ vl_api_vnet_get_summary_stats_reply_t *rmp;
+ vlib_combined_counter_main_t *cm;
+ vlib_counter_t v;
+ int i, which;
+ u64 total_pkts[VLIB_N_RX_TX];
+ u64 total_bytes[VLIB_N_RX_TX];
+
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ {
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = 0;
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ vnet_interface_counter_lock (im);
+
+ vec_foreach (cm, im->combined_sw_if_counters)
+ {
+ which = cm - im->combined_sw_if_counters;
+
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
+ {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+ vnet_interface_counter_unlock (im);
+
+ rmp->total_pkts[VLIB_RX] = clib_host_to_net_u64 (total_pkts[VLIB_RX]);
+ rmp->total_bytes[VLIB_RX] = clib_host_to_net_u64 (total_bytes[VLIB_RX]);
+ rmp->total_pkts[VLIB_TX] = clib_host_to_net_u64 (total_pkts[VLIB_TX]);
+ rmp->total_bytes[VLIB_TX] = clib_host_to_net_u64 (total_bytes[VLIB_TX]);
+ rmp->vector_rate =
+ clib_host_to_net_u64 (vlib_last_vector_length_per_node (sm->vlib_main));
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+int
+stats_memclnt_delete_callback (u32 client_index)
+{
+ vpe_client_stats_registration_t *rp;
+ stats_main_t *sm = &stats_main;
+ uword *p;
+
+ // FIXME
+ /* p = hash_get (sm->stats_registration_hash, client_index); */
+ /* if (p) */
+ /* { */
+ /* rp = pool_elt_at_index (sm->stats_registrations, p[0]); */
+ /* pool_put (sm->stats_registrations, rp); */
+ /* hash_unset (sm->stats_registration_hash, client_index); */
+ /* } */
+
+ return 0;
+}
+
+#define vl_api_vnet_interface_simple_counters_t_endian vl_noop_handler
+#define vl_api_vnet_interface_simple_counters_t_print vl_noop_handler
+#define vl_api_vnet_interface_combined_counters_t_endian vl_noop_handler
+#define vl_api_vnet_interface_combined_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip4_fib_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip4_fib_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip6_fib_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip4_nbr_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip4_nbr_counters_t_print vl_noop_handler
+#define vl_api_vnet_ip6_nbr_counters_t_endian vl_noop_handler
+#define vl_api_vnet_ip6_nbr_counters_t_print vl_noop_handler
+
+static clib_error_t *
+stats_init (vlib_main_t * vm)
+{
+ stats_main_t *sm = &stats_main;
+ api_main_t *am = &api_main;
+ void *vlib_worker_thread_bootstrap_fn (void *arg);
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main ();
+ sm->interface_main = &vnet_get_main ()->interface_main;
+ sm->api_main = am;
+ sm->stats_poll_interval_in_seconds = 10;
+ sm->data_structure_lock =
+ clib_mem_alloc_aligned (sizeof (data_structure_lock_t),
+ CLIB_CACHE_LINE_BYTES);
+ memset (sm->data_structure_lock, 0, sizeof (*sm->data_structure_lock));
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 0 /* do NOT trace! */);
+ foreach_stats_msg;
+#undef _
+
+ /* tell the msg infra not to free these messages... */
+ am->message_bounce[VL_API_VNET_INTERFACE_SIMPLE_COUNTERS] = 1;
+ am->message_bounce[VL_API_VNET_INTERFACE_COMBINED_COUNTERS] = 1;
+ am->message_bounce[VL_API_VNET_IP4_FIB_COUNTERS] = 1;
+ am->message_bounce[VL_API_VNET_IP6_FIB_COUNTERS] = 1;
+ am->message_bounce[VL_API_VNET_IP4_NBR_COUNTERS] = 1;
+ am->message_bounce[VL_API_VNET_IP6_NBR_COUNTERS] = 1;
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ vec_validate (sm->stats_registrations, STATS_REG_N_IDX);
+ vec_validate (sm->stats_registration_hash, STATS_REG_N_IDX);
+#define stats_reg(n) \
+ sm->stats_registrations[IDX_##n] = 0; \
+ sm->stats_registration_hash[IDX_##n] = 0;
+#include <vpp/stats/stats.reg>
+#undef stats_reg
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (stats_init);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_THREAD (stats_thread_reg, static) = {
+ .name = "stats",
+ .function = stats_thread_fn,
+ .fixed_count = 1,
+ .count = 1,
+ .no_data_structure_clone = 1,
+ .use_pthreads = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h
new file mode 100644
index 00000000..042bcb65
--- /dev/null
+++ b/src/vpp/stats/stats.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_stats_h__
+#define __included_stats_h__
+
+#include <time.h>
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/interface.h>
+#include <pthread.h>
+#include <vlib/threads.h>
+#include <vnet/fib/fib_table.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlibapi/api_helper_macros.h>
+
+typedef struct
+{
+ volatile u32 lock;
+ volatile u32 release_hint;
+ u32 thread_index;
+ u32 count;
+ int tag;
+} data_structure_lock_t;
+
+/**
+ * @brief stats request registration indexes
+ *
+ */
+/* from .../vnet/vnet/ip/lookup.c. Yuck */
+typedef CLIB_PACKED (struct
+ {
+ ip4_address_t address;
+u32 address_length: 6;
+u32 index: 26;
+ }) ip4_route_t;
+
+/* see interface.api */
+typedef struct
+{
+ u32 sw_if_index;
+ u64 drop;
+ u64 punt;
+ u64 rx_ip4;
+ u64 rx_ip6;
+ u64 rx_no_buffer;
+ u64 rx_miss;
+ u64 rx_error;
+ u64 tx_error;
+ u64 rx_mpls;
+} vnet_simple_counter_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u64 rx_packets; /**< packet counter */
+ u64 rx_bytes; /**< byte counter */
+ u64 tx_packets; /**< packet counter */
+ u64 tx_bytes; /**< byte counter */
+} vnet_combined_counter_t;
+
+typedef struct
+{
+ ip6_address_t address;
+ u32 address_length;
+ u32 index;
+} ip6_route_t;
+
+
+typedef struct
+{
+ ip4_route_t *ip4routes;
+ ip6_route_t *ip6routes;
+ fib_table_t **fibs;
+ hash_pair_t **pvec;
+ uword *results;
+} do_ip46_fibs_t;
+
+typedef struct
+{
+ u16 msg_id;
+ u32 size;
+ u32 client_index;
+ u32 context;
+ i32 retval;
+} client_registration_reply_t;
+
+typedef enum
+{
+#define stats_reg(n) IDX_##n,
+#include <vpp/stats/stats.reg>
+#undef stats_reg
+ STATS_REG_N_IDX,
+} stats_reg_index_t;
+
+typedef struct
+{
+ //Standard client information
+ uword *client_hash;
+ vpe_client_registration_t *clients;
+ u32 item;
+
+} vpe_client_stats_registration_t;
+
+
+typedef struct
+{
+ void *mheap;
+ pthread_t thread_self;
+ pthread_t thread_handle;
+
+ u32 stats_poll_interval_in_seconds;
+ u32 enable_poller;
+
+ /*
+ * stats_registrations is a vector, indexed by
+ * IDX_xxxx_COUNTER generated for each streaming
+ * stat a client can register for. (see stats.reg)
+ *
+ * The values in the vector refer to pools.
+ *
+ * The pool is of type vpe_client_stats_registration_t
+ *
+ * This typedef consists of:
+ *
+ * u32 item: This is the instance of the IDX_xxxx_COUNTER a
+ * client is interested in.
+ * vpe_client_registration_t *clients: The list of clients interested.
+ *
+ * e.g.
+ * stats_registrations[IDX_INTERFACE_SIMPLE_COUNTERS] refers to a pool
+ * containing elements:
+ *
+ * u32 item = sw_if_index1
+ * clients = ["clienta","clientb"]
+ *
+ * When clients == NULL the pool element is freed. When the pool is empty
+ *
+ * ie
+ * 0 == pool_elts(stats_registrations[IDX_INTERFACE_SIMPLE_COUNTERS]
+ *
+ * then there is no need to process INTERFACE_SIMPLE_COUNTERS
+ *
+ * Note that u32 item = ~0 is the simple case for ALL interfaces or fibs.
+ *
+ */
+
+ uword **stats_registration_hash;
+ vpe_client_stats_registration_t **stats_registrations;
+
+ /* control-plane data structure lock */
+ data_structure_lock_t *data_structure_lock;
+
+ /* bail out of FIB walk if set */
+ clib_longjmp_t jmp_buf;
+
+ /* Vectors for Distribution funcs: do_ip4_fibs and do_ip6_fibs. */
+ do_ip46_fibs_t do_ip46_fibs;
+
+ /*
+ Working vector vars so as to not thrash memory allocator.
+ Has effect of making "static"
+ */
+ vpe_client_stats_registration_t **regs_tmp;
+ vpe_client_registration_t **clients_tmp;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ vnet_interface_main_t *interface_main;
+ api_main_t *api_main;
+} stats_main_t;
+
+stats_main_t stats_main;
+
+void dslock (stats_main_t * sm, int release_hint, int tag);
+void dsunlock (stats_main_t * sm);
+
+#endif /* __included_stats_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/stats/stats.reg b/src/vpp/stats/stats.reg
new file mode 100644
index 00000000..d76443c4
--- /dev/null
+++ b/src/vpp/stats/stats.reg
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Client stats registrations
+ *
+ * Each entry is a specific REGISTRATION as
+ * opposed to an API call.
+ *
+ * For instance:
+ * want_stats()
+ * enables/disables ALL stats entries below for all
+ * ITEM.
+ *
+ * An item is the an incident of the thing we want stats for
+ * such as a FIB or sw_if_index for interface. In each case the
+ * value ~0 is treated as ALL ITEMs.
+ *
+ * As such want_interface_simple_counters() is translated to
+ * want_per_interface_simple_counters(item=~0)
+ */
+
+
+stats_reg (IP4_FIB_COUNTERS)
+stats_reg (IP4_NBR_COUNTERS)
+stats_reg (IP6_FIB_COUNTERS)
+stats_reg (IP6_NBR_COUNTERS)
+stats_reg (PER_INTERFACE_COMBINED_COUNTERS)
+stats_reg (PER_INTERFACE_SIMPLE_COUNTERS)
+
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
new file mode 100644
index 00000000..b330f60f
--- /dev/null
+++ b/src/vpp/vnet/main.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/cpu.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vpp/app/version.h>
+#include <vpp/api/vpe_msg_enum.h>
+
+
+static void
+vpe_main_init (vlib_main_t * vm)
+{
+ void vat_plugin_hash_create (void);
+
+ if (CLIB_DEBUG > 0)
+ vlib_unix_cli_set_prompt ("DBGvpp# ");
+ else
+ vlib_unix_cli_set_prompt ("vpp# ");
+
+ /* Turn off network stack components which we don't want */
+ vlib_mark_init_function_complete (vm, srp_init);
+
+ /*
+ * Create the binary api plugin hashes before loading plugins
+ */
+ vat_plugin_hash_create ();
+}
+
+/*
+ * Default path for runtime data
+ */
+char *vlib_default_runtime_dir = "vpp";
+
+/*
+ * Load plugins from /usr/lib/vpp_plugins by default
+ */
+char *vlib_plugin_path = "/usr/lib/vpp_plugins";
+char *vlib_plugin_app_version = VPP_BUILD_VER;
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+ vlib_main_t *vm = &vlib_global_main;
+ void vl_msg_api_set_first_available_msg_id (u16);
+ uword main_heap_size = (1ULL << 30);
+ u8 *sizep;
+ u32 size;
+
+#if __x86_64__
+ CLIB_UNUSED (const char *msg)
+ = "ERROR: This binary requires CPU with %s extensions.\n";
+#define _(a,b) \
+ if (!clib_cpu_supports_ ## a ()) \
+ { \
+ fprintf(stderr, msg, b); \
+ exit(1); \
+ }
+
+#if __AVX2__
+ _(avx2, "AVX2")
+#endif
+#if __AVX__
+ _(avx, "AVX")
+#endif
+#if __SSE4_2__
+ _(sse42, "SSE4.2")
+#endif
+#if __SSE4_1__
+ _(sse41, "SSE4.1")
+#endif
+#if __SSSE3__
+ _(ssse3, "SSSE3")
+#endif
+#if __SSE3__
+ _(sse3, "SSE3")
+#endif
+#undef _
+#endif
+ /*
+ * Load startup config from file.
+ * usage: vpp -c /etc/vpp/startup.conf
+ */
+ if ((argc == 3) && !strncmp (argv[1], "-c", 2))
+ {
+ FILE *fp;
+ char inbuf[4096];
+ int argc_ = 1;
+ char **argv_ = NULL;
+ char *arg = NULL;
+ char *p;
+
+ fp = fopen (argv[2], "r");
+ if (fp == NULL)
+ {
+ fprintf (stderr, "open configuration file '%s' failed\n", argv[2]);
+ return 1;
+ }
+ argv_ = calloc (1, sizeof (char *));
+ if (argv_ == NULL)
+ return 1;
+ arg = strndup (argv[0], 1024);
+ if (arg == NULL)
+ return 1;
+ argv_[0] = arg;
+
+ while (1)
+ {
+ if (fgets (inbuf, 4096, fp) == 0)
+ break;
+ p = strtok (inbuf, " \t\n");
+ while (p != NULL)
+ {
+ if (*p == '#')
+ break;
+ argc_++;
+ char **tmp = realloc (argv_, argc_ * sizeof (char *));
+ if (tmp == NULL)
+ return 1;
+ argv_ = tmp;
+ arg = strndup (p, 1024);
+ if (arg == NULL)
+ return 1;
+ argv_[argc_ - 1] = arg;
+ p = strtok (NULL, " \t\n");
+ }
+ }
+
+ fclose (fp);
+
+ char **tmp = realloc (argv_, (argc_ + 1) * sizeof (char *));
+ if (tmp == NULL)
+ return 1;
+ argv_ = tmp;
+ argv_[argc_] = NULL;
+
+ argc = argc_;
+ argv = argv_;
+ }
+
+ /*
+ * Look for and parse the "heapsize" config parameter.
+ * Manual since none of the clib infra has been bootstrapped yet.
+ *
+ * Format: heapsize <nn>[mM][gG]
+ */
+
+ for (i = 1; i < (argc - 1); i++)
+ {
+ if (!strncmp (argv[i], "plugin_path", 11))
+ {
+ if (i < (argc - 1))
+ vlib_plugin_path = argv[++i];
+ }
+ else if (!strncmp (argv[i], "heapsize", 8))
+ {
+ sizep = (u8 *) argv[i + 1];
+ size = 0;
+ while (*sizep >= '0' && *sizep <= '9')
+ {
+ size *= 10;
+ size += *sizep++ - '0';
+ }
+ if (size == 0)
+ {
+ fprintf
+ (stderr,
+ "warning: heapsize parse error '%s', use default %lld\n",
+ argv[i], (long long int) main_heap_size);
+ goto defaulted;
+ }
+
+ main_heap_size = size;
+
+ if (*sizep == 'g' || *sizep == 'G')
+ main_heap_size <<= 30;
+ else if (*sizep == 'm' || *sizep == 'M')
+ main_heap_size <<= 20;
+ }
+ }
+
+defaulted:
+
+ /* Set up the plugin message ID allocator right now... */
+ vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE);
+
+ /* Allocate main heap */
+ if (clib_mem_init (0, main_heap_size))
+ {
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ vpe_main_init (vm);
+ return vlib_unix_main (argc, argv);
+ }
+ else
+ {
+ {
+ int rv __attribute__ ((unused)) =
+ write (2, "Main heap allocation failure!\r\n", 31);
+ }
+ return 1;
+ }
+}
+
+static clib_error_t *
+heapsize_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ u32 junk;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%dm", &junk)
+ || unformat (input, "%dM", &junk)
+ || unformat (input, "%dg", &junk) || unformat (input, "%dG", &junk))
+ return 0;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (heapsize_config, "heapsize");
+
+static clib_error_t *
+plugin_path_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ u8 *junk;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%s", &junk))
+ {
+ vec_free (junk);
+ return 0;
+ }
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (plugin_path_config, "plugin_path");
+
+void vl_msg_api_post_mortem_dump (void);
+void elog_post_mortem_dump (void);
+
+void
+os_panic (void)
+{
+ vl_msg_api_post_mortem_dump ();
+ elog_post_mortem_dump ();
+ abort ();
+}
+
+void vhost_user_unmap_all (void) __attribute__ ((weak));
+void
+vhost_user_unmap_all (void)
+{
+}
+
+void
+os_exit (int code)
+{
+ static int recursion_block;
+
+ if (code)
+ {
+ if (recursion_block)
+ abort ();
+
+ recursion_block = 1;
+
+ vl_msg_api_post_mortem_dump ();
+ elog_post_mortem_dump ();
+ vhost_user_unmap_all ();
+ abort ();
+ }
+ exit (code);
+}
+
+#ifdef BARRIER_TRACING
+void
+vl_msg_api_barrier_trace_context (const char *context)
+{
+ vlib_worker_threads[0].barrier_context = context;
+}
+#endif
+
+void
+vl_msg_api_barrier_sync (void)
+{
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+}
+
+void
+vl_msg_api_barrier_release (void)
+{
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+}
+
+/* This application needs 1 thread stack for the stats pthread */
+u32
+vlib_app_num_thread_stacks_needed (void)
+{
+ return 1;
+}
+
+/*
+ * Depending on the configuration selected above,
+ * it may be necessary to generate stub graph nodes.
+ * It is never OK to ignore "node 'x' refers to unknown node 'y'
+ * messages!
+ */
+
+#if CLIB_DEBUG > 0
+
+static clib_error_t *
+test_crash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u64 *p = (u64 *) 0xdefec8ed;
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "deliberate crash: touching %x",.format_args = "i4",};
+
+ elog (&vm->elog_main, &e, 0xdefec8ed);
+
+ *p = 0xdeadbeef;
+
+ /* Not so much... */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_crash_command, static) = {
+ .path = "test crash",
+ .short_help = "crash the bus!",
+ .function = test_crash_command_fn,
+};
+/* *INDENT-ON* */
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppapigen.am b/src/vppapigen.am
new file mode 100644
index 00000000..fe9ff03e
--- /dev/null
+++ b/src/vppapigen.am
@@ -0,0 +1,29 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin_PROGRAMS += vppapigen
+
+tools/vppapigen/gram.h: tools/vppapigen/gram.y
+ @$(YACC) -d @srcdir@/tools/vppapigen/gram.y
+ @mv y.tab.h tools/vppapigen/gram.h
+ @rm y.tab.c
+
+tools/vppapigen/lex.o: tools/vppapigen/gram.h
+tools/vppapigen/node.o: tools/vppapigen/gram.h
+
+vppapigen_SOURCES = tools/vppapigen/gram.y tools/vppapigen/lex.c tools/vppapigen/node.c
+vppapigen_LDADD = libvppinfra.la
+vppapigen_LDFLAGS = -static
+
+CLEANFILES += tools/vppapigen/gram.c tools/vppapigen/gram.h
+# vi:syntax=automake
diff --git a/src/vppinfra.am b/src/vppinfra.am
new file mode 100644
index 00000000..daca9954
--- /dev/null
+++ b/src/vppinfra.am
@@ -0,0 +1,308 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lib_LTLIBRARIES += libvppinfra.la
+
+TESTS =
+
+if ENABLE_TESTS
+TESTS += test_bihash_template \
+ test_dlist \
+ test_elf \
+ test_elog \
+ test_fifo \
+ test_format \
+ test_fpool \
+ test_hash \
+ test_heap \
+ test_longjmp \
+ test_macros \
+ test_md5 \
+ test_mheap \
+ test_pool_iterate \
+ test_ptclosure \
+ test_random \
+ test_random_isaac \
+ test_serialize \
+ test_slist \
+ test_socket \
+ test_time \
+ test_timing_wheel \
+ test_tw_timer \
+ test_vec \
+ test_zvec
+endif
+
+noinst_PROGRAMS = $(TESTS)
+check_PROGRAMS = $(TESTS)
+
+test_bihash_template_SOURCES = vppinfra/test_bihash_template.c
+test_dlist_SOURCES = vppinfra/test_dlist.c
+test_elf_SOURCES = vppinfra/test_elf.c
+test_elog_SOURCES = vppinfra/test_elog.c
+test_fifo_SOURCES = vppinfra/test_fifo.c
+test_format_SOURCES = vppinfra/test_format.c
+test_fpool_SOURCES = vppinfra/test_fpool.c
+test_hash_SOURCES = vppinfra/test_hash.c
+test_heap_SOURCES = vppinfra/test_heap.c
+test_longjmp_SOURCES = vppinfra/test_longjmp.c
+test_macros_SOURCES = vppinfra/test_macros.c
+test_md5_SOURCES = vppinfra/test_md5.c
+test_mheap_SOURCES = vppinfra/test_mheap.c
+test_pool_iterate_SOURCES = vppinfra/test_pool_iterate.c
+test_ptclosure_SOURCES = vppinfra/test_ptclosure.c
+test_random_isaac_SOURCES = vppinfra/test_random_isaac.c
+test_random_SOURCES = vppinfra/test_random.c
+test_serialize_SOURCES = vppinfra/test_serialize.c
+test_slist_SOURCES = vppinfra/test_slist.c
+test_socket_SOURCES = vppinfra/test_socket.c
+test_time_SOURCES = vppinfra/test_time.c
+test_timing_wheel_SOURCES = vppinfra/test_timing_wheel.c
+test_tw_timer_SOURCES = vppinfra/test_tw_timer.c
+test_vec_SOURCES = vppinfra/test_vec.c
+test_zvec_SOURCES = vppinfra/test_zvec.c
+
+# All unit tests use ASSERT for failure
+# So we'll need -DDEBUG to enable ASSERTs
+test_bihash_template_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_dlist_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_elf_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_elog_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_fifo_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_format_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_fpool_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_hash_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_heap_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_longjmp_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_macros_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_md5_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_mheap_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_pool_iterate_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_ptclosure_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_random_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_random_isaac_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_serialize_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_slist_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_socket_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_time_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_timing_wheel_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_tw_timer_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_vec_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+test_zvec_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+
+test_bihash_template_LDADD = libvppinfra.la
+test_dlist_LDADD = libvppinfra.la
+test_elf_LDADD = libvppinfra.la
+test_elog_LDADD = libvppinfra.la
+test_fifo_LDADD = libvppinfra.la
+test_format_LDADD = libvppinfra.la
+test_fpool_LDADD = libvppinfra.la
+test_hash_LDADD = libvppinfra.la
+test_heap_LDADD = libvppinfra.la
+test_longjmp_LDADD = libvppinfra.la
+test_macros_LDADD = libvppinfra.la
+test_md5_LDADD = libvppinfra.la
+test_mheap_LDADD = libvppinfra.la
+test_pool_iterate_LDADD = libvppinfra.la
+test_ptclosure_LDADD = libvppinfra.la
+test_random_isaac_LDADD = libvppinfra.la
+test_random_LDADD = libvppinfra.la
+test_serialize_LDADD = libvppinfra.la
+test_slist_LDADD = libvppinfra.la
+test_socket_LDADD = libvppinfra.la
+test_time_LDADD = libvppinfra.la -lm
+test_timing_wheel_LDADD = libvppinfra.la -lm
+test_tw_timer_LDADD = libvppinfra.la
+test_vec_LDADD = libvppinfra.la
+test_zvec_LDADD = libvppinfra.la
+
+test_bihash_template_LDFLAGS = -static
+test_dlist_LDFLAGS = -static
+test_elf_LDFLAGS = -static
+test_elog_LDFLAGS = -static
+test_fifo_LDFLAGS = -static
+test_format_LDFLAGS = -static
+test_fpool_LDFLAGS = -static
+test_hash_LDFLAGS = -static
+test_heap_LDFLAGS = -static
+test_longjmp_LDFLAGS = -static
+test_macros_LDFLAGS = -static
+test_md5_LDFLAGS = -static
+test_mheap_LDFLAGS = -static
+test_pool_iterate_LDFLAGS = -static
+test_ptclosure_LDFLAGS = -static
+test_random_isaac_LDFLAGS = -static
+test_random_LDFLAGS = -static
+test_serialize_LDFLAGS = -static
+test_slist_LDFLAGS = -static
+test_socket_LDFLAGS = -static
+test_time_LDFLAGS = -static
+test_timing_wheel_LDFLAGS = -static
+test_tw_timer_LDFLAGS = -static
+test_vec_LDFLAGS = -static
+test_zvec_LDFLAGS = -static
+
+# noinst_PROGRAMS += test_vhash
+# test_vhash_SOURCES = vppinfra/test_vhash.c vppinfra/vhash.c
+# test_vhash_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG
+# test_vhash_LDADD = libvppinfra.la
+# test_vhash_LDFLAGS = -static
+
+nobase_include_HEADERS = \
+ vppinfra/asm_mips.h \
+ vppinfra/asm_x86.h \
+ vppinfra/bihash_8_8.h \
+ vppinfra/bihash_16_8.h \
+ vppinfra/bihash_24_8.h \
+ vppinfra/bihash_48_8.h \
+ vppinfra/bihash_template.h \
+ vppinfra/bihash_template.c \
+ vppinfra/bitmap.h \
+ vppinfra/bitops.h \
+ vppinfra/byte_order.h \
+ vppinfra/cache.h \
+ vppinfra/clib.h \
+ vppinfra/clib_error.h \
+ vppinfra/cpu.h \
+ vppinfra/crc32.h \
+ vppinfra/dlist.h \
+ vppinfra/elf.h \
+ vppinfra/elf_clib.h \
+ vppinfra/elog.h \
+ vppinfra/fheap.h \
+ vppinfra/error.h \
+ vppinfra/error_bootstrap.h \
+ vppinfra/fifo.h \
+ vppinfra/file.h \
+ vppinfra/format.h \
+ vppinfra/graph.h \
+ vppinfra/hash.h \
+ vppinfra/heap.h \
+ vppinfra/linux/sysfs.h \
+ vppinfra/linux/syscall.h \
+ vppinfra/lock.h \
+ vppinfra/longjmp.h \
+ vppinfra/macros.h \
+ vppinfra/math.h \
+ vppinfra/md5.h \
+ vppinfra/mem.h \
+ vppinfra/memcpy_sse3.h \
+ vppinfra/memcpy_avx.h \
+ vppinfra/mhash.h \
+ vppinfra/mheap.h \
+ vppinfra/mheap_bootstrap.h \
+ vppinfra/os.h \
+ vppinfra/pipeline.h \
+ vppinfra/pool.h \
+ vppinfra/ptclosure.h \
+ vppinfra/random.h \
+ vppinfra/random_buffer.h \
+ vppinfra/random_isaac.h \
+ vppinfra/serialize.h \
+ vppinfra/slist.h \
+ vppinfra/smp.h \
+ vppinfra/socket.h \
+ vppinfra/sparse_vec.h \
+ vppinfra/string.h \
+ vppinfra/time.h \
+ vppinfra/timing_wheel.h \
+ vppinfra/timer.h \
+ vppinfra/tw_timer_2t_1w_2048sl.h \
+ vppinfra/tw_timer_16t_2w_512sl.h \
+ vppinfra/tw_timer_16t_1w_2048sl.h \
+ vppinfra/tw_timer_4t_3w_256sl.h \
+ vppinfra/tw_timer_1t_3w_1024sl_ov.h \
+ vppinfra/tw_timer_template.h \
+ vppinfra/tw_timer_template.c \
+ vppinfra/types.h \
+ vppinfra/unix.h \
+ vppinfra/vec.h \
+ vppinfra/vec_bootstrap.h \
+ vppinfra/vector.h \
+ vppinfra/vector_altivec.h \
+ vppinfra/vector_funcs.h \
+ vppinfra/vector_iwmmxt.h \
+ vppinfra/vector_neon.h \
+ vppinfra/vector_sse2.h \
+ vppinfra/valgrind.h \
+ vppinfra/xxhash.h \
+ vppinfra/xy.h \
+ vppinfra/zvec.h
+
+CLIB_CORE = \
+ vppinfra/asm_x86.c \
+ vppinfra/backtrace.c \
+ vppinfra/bihash_8_8.h \
+ vppinfra/bihash_24_8.h \
+ vppinfra/bihash_template.h \
+ vppinfra/cpu.c \
+ vppinfra/elf.c \
+ vppinfra/elog.c \
+ vppinfra/error.c \
+ vppinfra/fifo.c \
+ vppinfra/fheap.c \
+ vppinfra/format.c \
+ vppinfra/pool.c \
+ vppinfra/graph.c \
+ vppinfra/hash.c \
+ vppinfra/heap.c \
+ vppinfra/longjmp.S \
+ vppinfra/macros.c \
+ vppinfra/mhash.c \
+ vppinfra/mheap.c \
+ vppinfra/md5.c \
+ vppinfra/mem_mheap.c \
+ vppinfra/ptclosure.c \
+ vppinfra/random.c \
+ vppinfra/random_buffer.c \
+ vppinfra/random_isaac.c \
+ vppinfra/serialize.c \
+ vppinfra/slist.c \
+ vppinfra/std-formats.c \
+ vppinfra/string.c \
+ vppinfra/time.c \
+ vppinfra/timing_wheel.c \
+ vppinfra/tw_timer_template.h \
+ vppinfra/tw_timer_2t_1w_2048sl.h \
+ vppinfra/tw_timer_2t_1w_2048sl.c \
+ vppinfra/tw_timer_16t_2w_512sl.h \
+ vppinfra/tw_timer_16t_2w_512sl.c \
+ vppinfra/tw_timer_16t_1w_2048sl.h \
+ vppinfra/tw_timer_16t_1w_2048sl.c \
+ vppinfra/tw_timer_4t_3w_256sl.h \
+ vppinfra/tw_timer_4t_3w_256sl.c \
+ vppinfra/tw_timer_1t_3w_1024sl_ov.h \
+ vppinfra/tw_timer_1t_3w_1024sl_ov.c \
+ vppinfra/unformat.c \
+ vppinfra/vec.c \
+ vppinfra/vector.c \
+ vppinfra/zvec.c
+
+# Core plus Unix additions
+libvppinfra_la_SOURCES = \
+ $(CLIB_CORE) \
+ vppinfra/elf_clib.c \
+ vppinfra/linux/mem.c \
+ vppinfra/linux/sysfs.c \
+ vppinfra/socket.c \
+ vppinfra/timer.c \
+ vppinfra/unix-formats.c \
+ vppinfra/unix-misc.c
+
+bin_PROGRAMS = elftool
+
+elftool_SOURCES = tools/elftool/elftool.c
+elftool_CPPFLAGS = $(AM_CPPFLAGS)
+elftool_LDADD = libvppinfra.la -lpthread -lrt -lm
+
+# vi:syntax=automake
diff --git a/src/vppinfra/README b/src/vppinfra/README
new file mode 100644
index 00000000..579696b6
--- /dev/null
+++ b/src/vppinfra/README
@@ -0,0 +1,43 @@
+Welcome to vppinfra a programming library of basic data structures.
+
+vec.c dynamic vectors
+bitmap.h dynamic bitmaps
+heap.c allocation heap of objects (sub-objects have variable size)
+pool.h allocation pool (like heap with size always 1)
+hash.c dynamic hash tables
+mheap.c memory allocator (a la dlmalloc)
+
+format.c extendable printf-like thing built on top of vectors
+std-formats.c formats for unix data structures, networking stuff, ...
+timer.c arrange for functions to be called at given times.
+
+
+Build, Test, Install, Use...
+----------------------------
+ If this package came from the distribution tar ball, skip to the
+ Build Section. If this was a gentoo ebuild, after emerge/ebuild,
+ skip to the Use Section; otherwise, start with Pre-Build.
+
+Pre-Build
+-----------
+ 1) svn checkout svn://teaktechnologies.com/fn/trunk/clib clib
+ 2) autoreconf [-v][-f][-i] # regenerate configuration files
+
+Build
+-----
+ 1) cd BUILD # which may be different than this SRC dir
+ 2) ${SRC}/configure [--host=CHOST]
+ 3) make
+
+Test
+----
+ If not cross-compiling (i.e. CBUILD == CHOST), use "make check" to
+ run the validation programs.
+
+Install
+-------
+ With the root effective user ID (i.e. su or sudo), run "make install".
+
+Use
+---
+ We need to reference man pages and theory of operation.
diff --git a/src/vppinfra/anneal.c b/src/vppinfra/anneal.c
new file mode 100644
index 00000000..35d10946
--- /dev/null
+++ b/src/vppinfra/anneal.c
@@ -0,0 +1,172 @@
+/*
+ Copyright (c) 2011 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/anneal.h>
+
+/*
+ * Optimize an objective function by simulated annealing
+ *
+ * Here are a couple of short, easily-understood
+ * descriptions of simulated annealing:
+ *
+ * http://www.cs.sandia.gov/opt/survey/sa.html
+ * Numerical Recipes in C, 2nd ed., 444ff
+ *
+ * The description in the Wikipedia is not helpful.
+ *
+ * The algorithm tries to produce a decent answer to combinatorially
+ * explosive optimization problems by analogy to slow cooling
+ * of hot metal, aka annealing.
+ *
+ * There are (at least) three problem-dependent annealing parameters
+ * to consider:
+ *
+ * t0, the initial "temperature. Should be set so that the probability
+ * of accepting a transition to a higher cost configuration is
+ * initially about 0.8.
+ *
+ * ntemps, the number of temperatures to use. Each successive temperature
+ * is some fraction of the previous temperature.
+ *
+ * nmoves_per_temp, the number of configurations to try at each temperature
+ *
+ * It is a black art to set ntemps, nmoves_per_temp, and the rate
+ * at which the temperature drops. Go too fast with too few iterations,
+ * and the computation falls into a local minimum instead of the
+ * (desired) global minimum.
+ */
+
+void
+clib_anneal (clib_anneal_param_t * p)
+{
+ f64 t;
+ f64 cost, prev_cost, delta_cost, initial_cost, best_cost;
+ f64 random_accept, delta_cost_over_t;
+ f64 total_increase = 0.0, average_increase;
+ u32 i, j;
+ u32 number_of_increases = 0;
+ u32 accepted_this_temperature;
+ u32 best_saves_this_temperature;
+ int accept;
+
+ t = p->initial_temperature;
+ best_cost = initial_cost = prev_cost = p->anneal_metric (p->opaque);
+ p->anneal_save_best_configuration (p->opaque);
+
+ if (p->flags & CLIB_ANNEAL_VERBOSE)
+ fformat (stdout, "Initial cost %.2f\n", initial_cost);
+
+ for (i = 0; i < p->number_of_temperatures; i++)
+ {
+ accepted_this_temperature = 0;
+ best_saves_this_temperature = 0;
+
+ p->anneal_restore_best_configuration (p->opaque);
+ cost = best_cost;
+
+ for (j = 0; j < p->number_of_configurations_per_temperature; j++)
+ {
+ p->anneal_new_configuration (p->opaque);
+ cost = p->anneal_metric (p->opaque);
+
+ delta_cost = cost - prev_cost;
+
+ /* cost function looks better, accept this move */
+ if (p->flags & CLIB_ANNEAL_MINIMIZE)
+ accept = delta_cost < 0.0;
+ else
+ accept = delta_cost > 0.0;
+
+ if (accept)
+ {
+ if (p->flags & CLIB_ANNEAL_MINIMIZE)
+ if (cost < best_cost)
+ {
+ if (p->flags & CLIB_ANNEAL_VERBOSE)
+ fformat (stdout, "New best cost %.2f\n", cost);
+ best_cost = cost;
+ p->anneal_save_best_configuration (p->opaque);
+ best_saves_this_temperature++;
+ }
+
+ accepted_this_temperature++;
+ prev_cost = cost;
+ continue;
+ }
+
+ /* cost function worse, keep stats to suggest t0 */
+ total_increase += (p->flags & CLIB_ANNEAL_MINIMIZE) ?
+ delta_cost : -delta_cost;
+
+ number_of_increases++;
+
+ /*
+ * Accept a higher cost with Pr { e^(-(delta_cost / T)) },
+ * equivalent to rnd[0,1] < e^(-(delta_cost / T))
+ *
+ * AKA, the Boltzmann factor.
+ */
+ random_accept = random_f64 (&p->random_seed);
+
+ delta_cost_over_t = delta_cost / t;
+
+ if (random_accept < exp (-delta_cost_over_t))
+ {
+ accepted_this_temperature++;
+ prev_cost = cost;
+ continue;
+ }
+ p->anneal_restore_previous_configuration (p->opaque);
+ }
+
+ if (p->flags & CLIB_ANNEAL_VERBOSE)
+ {
+ fformat (stdout, "Temp %.2f, cost %.2f, accepted %d, bests %d\n", t,
+ prev_cost, accepted_this_temperature,
+ best_saves_this_temperature);
+ fformat (stdout, "Improvement %.2f\n", initial_cost - prev_cost);
+ fformat (stdout, "-------------\n");
+ }
+
+ t = t * p->temperature_step;
+ }
+
+ /*
+ * Empirically, one wants the probability of accepting a move
+ * at the initial temperature to be about 0.8.
+ */
+ average_increase = total_increase / (f64) number_of_increases;
+ p->suggested_initial_temperature = average_increase / 0.22; /* 0.22 = -ln (0.8) */
+
+ p->final_temperature = t;
+ p->final_metric = p->anneal_metric (p->opaque);
+
+ if (p->flags & CLIB_ANNEAL_VERBOSE)
+ {
+ fformat (stdout, "Average cost increase from a bad move: %.2f\n",
+ average_increase);
+ fformat (stdout, "Suggested t0 = %.2f\n",
+ p->suggested_initial_temperature);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/anneal.h b/src/vppinfra/anneal.h
new file mode 100644
index 00000000..148d38ba
--- /dev/null
+++ b/src/vppinfra/anneal.h
@@ -0,0 +1,89 @@
+/*
+ Copyright (c) 2011 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __included_anneal_h__
+#define __included_anneal_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+#include <math.h>
+
+typedef struct
+{
+ /* Initial temperature */
+ f64 initial_temperature;
+
+ /* Temperature fraction at each step, 0.95 is reasonable */
+ f64 temperature_step;
+
+ /* Number of temperatures used */
+ u32 number_of_temperatures;
+
+ /* Number of configurations tried at each temperature */
+ u32 number_of_configurations_per_temperature;
+
+ u32 flags;
+#define CLIB_ANNEAL_VERBOSE (1<<0)
+#define CLIB_ANNEAL_MINIMIZE (1<<1) /* mutually exclusive */
+#define CLIB_ANNEAL_MAXIMIZE (1<<2) /* mutually exclusive */
+
+ /* Random number seed, set to ensure repeatable results */
+ u32 random_seed;
+
+ /* Opaque data passed to callbacks */
+ void *opaque;
+
+ /* Final temperature (output) */
+ f64 final_temperature;
+
+ /* Final metric (output) */
+ f64 final_metric;
+
+ /* Suggested initial temperature (output) */
+ f64 suggested_initial_temperature;
+
+
+ /*--- Callbacks ---*/
+
+ /* objective function to minimize */
+ f64 (*anneal_metric) (void *opaque);
+
+ /* Generate a new configuration */
+ void (*anneal_new_configuration) (void *opaque);
+
+ /* Restore the previous configuration */
+ void (*anneal_restore_previous_configuration) (void *opaque);
+
+ /* Save best configuration found e.g at a certain temperature */
+ void (*anneal_save_best_configuration) (void *opaque);
+
+ /* restore best configuration found e.g at a certain temperature */
+ void (*anneal_restore_best_configuration) (void *opaque);
+
+} clib_anneal_param_t;
+
+void clib_anneal (clib_anneal_param_t * p);
+
+#endif /* __included_anneal_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/asm_mips.h b/src/vppinfra/asm_mips.h
new file mode 100644
index 00000000..7c9e6958
--- /dev/null
+++ b/src/vppinfra/asm_mips.h
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_asm_mips_h
+#define included_asm_mips_h
+
+/* Encoding of MIPS instructions. */
+/* Encoding of opcode field (op). */
+#define mips_foreach_opcode \
+ _(SPECIAL) _(REGIMM) _(j) _(jal) _(beq) _(bne) _(blez) _(bgtz) \
+ _(addi) _(addiu) _(slti) _(sltiu) _(andi) _(ori) _(xori) _(lui) \
+ _(COP0) _(COP1) _(COP2) _(COP1X) _(beql) _(bnel) _(blezl) _(bgtzl) \
+ _(daddi) _(daddiu) _(ldl) _(ldr) _(SPECIAL2) _(jalx) _(MDMX) _(O37) \
+ _(lb) _(lh) _(lwl) _(lw) _(lbu) _(lhu) _(lwr) _(lwu) \
+ _(sb) _(sh) _(swl) _(sw) _(sdl) _(sdr) _(swr) _(cache) \
+ _(ll) _(lwc1) _(lwc2) _(pref) _(lld) _(ldc1) _(ldc2) _(ld) \
+ _(sc) _(swc1) _(swc2) _(o73) _(scd) _(sdc1) _(sdc2) _(sd)
+
+/* Encoding of funct field. */
+#define mips_foreach_special_funct \
+ _(sll) _(MOVCI) _(srl) _(sra) _(sllv) _(o05) _(srlv) _(srav) \
+ _(jr) _(jalr) _(movz) _(movn) _(syscall) _(break) _(o16) _(sync) \
+ _(mfhi) _(mthi) _(mflo) _(mtlo) _(dsllv) _(o25) _(dsrlv) _(dsrav) \
+ _(mult) _(multu) _(div) _(divu) _(dmult) _(dmultu) _(ddiv) _(ddivu) \
+ _(add) _(addu) _(sub) _(subu) _(and) _(or) _(xor) _(nor) \
+ _(o50) _(o51) _(slt) _(sltu) _(dadd) _(daddu) _(dsub) _(dsubu) \
+ _(tge) _(tgeu) _(tlt) _(tltu) _(teq) _(o65) _(tne) _(o67) \
+ _(dsll) _(o71) _(dsrl) _(dsra) _(dsll32) _(o75) _(dsrl32) _(dsra32)
+
+/* SPECIAL2 encoding of funct field. */
+#define mips_foreach_special2_funct \
+ _(madd) _(maddu) _(mul) _(o03) _(msub) _(msubu) _(o06) _(o07) \
+ _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
+ _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
+ _(clz) _(clo) _(o42) _(o43) _(dclz) _(dclo) _(o46) _(o47) \
+ _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
+ _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
+ _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(sdbbp)
+
+/* REGIMM encoding of rt field. */
+#define mips_foreach_regimm_rt \
+ _(bltz) _(bgez) _(bltzl) _(bgezl) _(o04) _(o05) _(o06) _(o07) \
+ _(tgei) _(tgeiu) _(tltiu) _(teqi) _(o14) _(tnei) _(o16) _(o17) \
+ _(bltzal) _(bgezal) _(bltzall) _(bgezall) _(o24) _(o25) _(o26) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
+
+/* COP0 encoding of rs field. */
+#define mips_foreach_cop0_rs \
+ _(mfc0) _(dmfc0) _(o02) _(o03) _(mtc0) _(dmtc0) _(o06) _(o07) \
+ _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
+ _(C0) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
+
+/* COP0 encoding of funct when rs == RS_CO */
+#define mips_foreach_cop0_funct \
+ _(o00) _(tlbr) _(tlbwi) _(o03) _(o04) _(o05) _(tlbwr) _(o07) \
+ _(tlbp) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
+ _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
+ _(eret) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(deret) \
+ _(wait) _(o41) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \
+ _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
+ _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
+ _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77)
+
+/* COP1 encoding of rs field. */
+#define mips_foreach_cop1_rs \
+ _(mfc1) _(dmfc1) _(cfc1) _(o03) _(mtc1) _(dmtc1) _(ctc1) _(o07) \
+ _(BC1) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
+ _(S) _(D) _(o22) _(o23) _(W) _(L) _(o26) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
+
+/* COP1 encoding of funct for S and D */
+#define mips_foreach_cop1_funct \
+ _(add) _(sub) _(mul) _(div) _(sqrt) _(abs) _(mov) _(neg) \
+ _(roundl) _(truncl) _(ceill) _(floorl) _(roundw) _(truncw) _(ceilw) _(floorw) \
+ _(o20) _(MOVCF) _(movz) _(movn) _(o24) _(recip) _(rsqrt) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
+ _(cvts) _(cvtd) _(o42) _(o43) _(cvtw) _(cvtl) _(o46) _(o47) \
+ _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
+ _(cf) _(cun) _(ceq) _(cueq) _(colt) _(cult) _(cole) _(cule) \
+ _(csf) _(cngle) _(cseq) _(cngl) _(clt) _(cnge) _(cle) _(cngt)
+
+/* COP1X encoding of funct */
+#define mips_foreach_cop1x_funct \
+ _(lwxc1) _(ldxc1) _(o02) _(o03) _(o04) _(luxc1) _(o06) _(o07) \
+ _(swxc1) _(sdxc1) _(o12) _(o13) _(o14) _(suxc1) _(o16) _(prefx) \
+ _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
+ _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
+ _(madds) _(maddd) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \
+ _(msubs) _(msubd) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
+ _(nmadds) _(nmaddd) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
+ _(nmsubs) _(nmsubd) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77)
+
+#define mips_foreach_mdmx_funct \
+ _(msgn) _(ceq) _(pickf) _(pickt) _(clt) _(cle) _(min) _(max) \
+ _(o10) _(o11) _(sub) _(add) _(and) _(xor) _(or) _(nor) \
+ _(sll) _(o21) _(srl) _(sra) _(o24) _(o25) _(o26) _(o27) \
+ _(alniob) _(alnvob) _(alniqh) _(alnvqh) _(o34) _(o35) _(o36) _(shfl) \
+ _(rzu) _(rnau) _(rneu) _(o43) _(rzs) _(rnas) _(rnes) _(o47) \
+ _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
+ _(mul) _(o61) _(muls) _(mula) _(o64) _(o65) _(suba) _(adda) \
+ _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(wac) _(rac)
+
+#define _(f) MIPS_OPCODE_##f,
+typedef enum
+{
+ mips_foreach_opcode
+} mips_insn_opcode_t;
+#undef _
+
+#define _(f) MIPS_SPECIAL_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_special_funct
+} mips_insn_special_funct_t;
+#undef _
+
+#define _(f) MIPS_SPECIAL2_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_special2_funct
+} mips_insn_special2_funct_t;
+#undef _
+
+#define _(f) MIPS_REGIMM_RT_##f,
+typedef enum
+{
+ mips_foreach_regimm_rt
+} mips_insn_regimm_rt_t;
+#undef _
+
+#define _(f) MIPS_COP0_RS_##f,
+typedef enum
+{
+ mips_foreach_cop0_rs
+} mips_insn_cop0_rs_t;
+#undef _
+
+#define _(f) MIPS_COP0_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_cop0_funct
+} mips_insn_cop0_funct_t;
+#undef _
+
+#define _(f) MIPS_COP1_RS_##f,
+typedef enum
+{
+ mips_foreach_cop1_rs
+} mips_insn_cop1_rs_t;
+#undef _
+
+#define _(f) MIPS_COP1_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_cop1_funct
+} mips_insn_cop1_funct_t;
+#undef _
+
+#define _(f) MIPS_COP1X_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_cop1x_funct
+} mips_insn_cop1x_funct_t;
+#undef _
+
+#define _(f) MIPS_MDMX_FUNCT_##f,
+typedef enum
+{
+ mips_foreach_mdmx_funct
+} mips_insn_mdmx_funct_t;
+#undef _
+
+always_inline mips_insn_opcode_t
+mips_insn_get_op (u32 insn)
+{
+ return (insn >> 26) & 0x3f;
+}
+
+always_inline u32
+mips_insn_get_rs (u32 insn)
+{
+ return (insn >> 21) & 0x1f;
+}
+
+always_inline u32
+mips_insn_get_rt (u32 insn)
+{
+ return (insn >> 16) & 0x1f;
+}
+
+always_inline u32
+mips_insn_get_rd (u32 insn)
+{
+ return (insn >> 11) & 0x1f;
+}
+
+always_inline u32
+mips_insn_get_sa (u32 insn)
+{
+ return (insn >> 6) & 0x1f;
+}
+
+always_inline u32
+mips_insn_get_funct (u32 insn)
+{
+ return (insn >> 0) & 0x3f;
+}
+
+always_inline i32
+mips_insn_get_immediate (u32 insn)
+{
+ return (((i32) insn) << 16) >> 16;
+}
+
+always_inline u32
+mips_insn_encode_i_type (int op, int rs, int rt, int immediate)
+{
+ u32 insn;
+ insn = immediate;
+ insn |= rt << 16;
+ insn |= rs << 21;
+ insn |= op << 26;
+
+ ASSERT (mips_insn_get_immediate (insn) == immediate);
+ ASSERT (mips_insn_get_rt (insn) == rt);
+ ASSERT (mips_insn_get_rs (insn) == rt);
+ ASSERT (mips_insn_get_op (insn) == op);
+
+ return insn;
+}
+
+always_inline u32
+mips_insn_encode_j_type (int op, u32 addr)
+{
+ u32 insn;
+
+ insn = (addr & ((1 << 28) - 1)) / 4;
+ insn |= op << 26;
+
+ return insn;
+}
+
+always_inline u32
+mips_insn_encode_r_type (int op, int rs, int rt, int rd, int sa, int funct)
+{
+ u32 insn;
+ insn = funct;
+ insn |= sa << 6;
+ insn |= rd << 11;
+ insn |= rt << 16;
+ insn |= rs << 21;
+ insn |= op << 26;
+
+ ASSERT (mips_insn_get_funct (insn) == funct);
+ ASSERT (mips_insn_get_sa (insn) == sa);
+ ASSERT (mips_insn_get_rd (insn) == rd);
+ ASSERT (mips_insn_get_rt (insn) == rt);
+ ASSERT (mips_insn_get_rs (insn) == rt);
+ ASSERT (mips_insn_get_op (insn) == op);
+
+ return insn;
+}
+
+#define mips_insn_r(op,funct,rd,rs,rt,sa) \
+ mips_insn_encode_r_type (MIPS_OPCODE_##op, \
+ (rs), (rt), (rd), (sa), \
+ MIPS_##op##_FUNCT_##funct)
+
+#define mips_insn_i(op,rs,rt,imm) \
+ mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm))
+
+#define mips_insn_j(op,target) \
+ mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm))
+
+/* Generate unsigned load instructions of data of various sizes. */
+always_inline u32
+mips_insn_load (u32 rd, i32 offset, u32 base, u32 log2_bytes)
+{
+ int op;
+
+ ASSERT (log2_bytes < 4);
+ switch (log2_bytes)
+ {
+ case 0:
+ op = MIPS_OPCODE_lbu;
+ break;
+ case 1:
+ op = MIPS_OPCODE_lhu;
+ break;
+ case 2:
+ op = MIPS_OPCODE_lwu;
+ break;
+ case 3:
+ op = MIPS_OPCODE_ld;
+ break;
+ }
+
+ return mips_insn_encode_i_type (op, base, rd, offset);
+}
+
+typedef enum
+{
+ MIPS_REG_SP = 29,
+ MIPS_REG_RA = 31,
+} mips_reg_t;
+
+#endif /* included_asm_mips_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/asm_x86.c b/src/vppinfra/asm_x86.c
new file mode 100644
index 00000000..16e41c24
--- /dev/null
+++ b/src/vppinfra/asm_x86.c
@@ -0,0 +1,1947 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* FIXME
+ opcode name remove to save table space; enum
+ x87
+ 3dnow
+ cbw naming
+*/
+
+#include <vppinfra/error.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/asm_x86.h>
+
+#define foreach_x86_gp_register \
+ _ (AX) _ (CX) _ (DX) _ (BX) \
+ _ (SP) _ (BP) _ (SI) _ (DI)
+
+typedef enum {
+#define _(r) X86_INSN_GP_REG_##r,
+ foreach_x86_gp_register
+#undef _
+} x86_insn_gp_register_t;
+
+typedef union {
+ struct {
+ u8 rm : 3;
+ u8 reg : 3;
+ u8 mode : 2;
+ };
+ u8 byte;
+} x86_insn_modrm_byte_t;
+
+typedef union {
+ struct {
+ u8 base : 3;
+ u8 index : 3;
+ u8 log2_scale : 2;
+ };
+ u8 byte;
+} x86_insn_sib_byte_t;
+
+always_inline uword
+x86_insn_has_modrm_byte (x86_insn_t * insn)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (insn->operands); i++)
+ switch (insn->operands[i].code)
+ {
+ case 'G': case 'E': case 'M': case 'R':
+ return 1;
+ }
+ return 0;
+}
+
+always_inline uword
+x86_insn_immediate_type (x86_insn_t * insn)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (insn->operands); i++)
+ switch (insn->operands[i].code)
+ {
+ case 'J':
+ case 'I':
+ case 'O':
+ return insn->operands[i].type;
+ }
+ return 0;
+}
+
+/* Opcode extension in modrm byte reg field. */
+#define foreach_x86_insn_modrm_reg_group \
+ _ (1) _ (1a) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) \
+ _ (8) _ (9) _ (10) _ (11) _ (12) _ (13) _ (14) \
+ _ (15) _ (16) _ (p)
+
+#define foreach_x86_insn_sse_group \
+ _ (10) _ (28) _ (50) _ (58) _ (60) _ (68) _ (70) _ (78) \
+ _ (c0) _ (d0) _ (d8) _ (e0) _ (e8) _ (f0) _ (f8)
+
+enum {
+#define _(x) X86_INSN_MODRM_REG_GROUP_##x,
+ foreach_x86_insn_modrm_reg_group
+#undef _
+#define _(x) X86_INSN_SSE_GROUP_##x,
+ foreach_x86_insn_sse_group
+#undef _
+};
+
+enum {
+#define _(x) \
+ X86_INSN_FLAG_MODRM_REG_GROUP_##x \
+ = X86_INSN_FLAG_SET_MODRM_REG_GROUP (1 + X86_INSN_MODRM_REG_GROUP_##x),
+ foreach_x86_insn_modrm_reg_group
+#undef _
+
+#define _(x) \
+ X86_INSN_FLAG_SSE_GROUP_##x \
+ = X86_INSN_FLAG_SET_SSE_GROUP (1 + X86_INSN_SSE_GROUP_##x),
+ foreach_x86_insn_sse_group
+#undef _
+};
+
+#define foreach_x86_gp_reg \
+ _ (AX) _ (CX) _ (DX) _ (BX) \
+ _ (SP) _ (BP) _ (SI) _ (DI)
+
+#define foreach_x86_condition \
+ _ (o) _ (no) _ (b) _ (nb) \
+ _ (z) _ (nz) _ (be) _ (nbe) \
+ _ (s) _ (ns) _ (p) _ (np) \
+ _ (l) _ (nl) _ (le) _ (nle)
+
+#define _3f(x,f,o0,o1,o2) \
+{ \
+ .name = #x, \
+ .flags = (f), \
+ .operands[0] = { .data = #o0 }, \
+ .operands[1] = { .data = #o1 }, \
+ .operands[2] = { .data = #o2 }, \
+}
+
+#define _2f(x,f,o0,o1) _3f(x,f,o0,o1,__)
+#define _1f(x,f,o0) _2f(x,f,o0,__)
+#define _0f(x,f) _1f(x,f,__)
+
+#define _3(x,o0,o1,o2) _3f(x,0,o0,o1,o2)
+#define _2(x,o0,o1) _2f(x,0,o0,o1)
+#define _1(x,o0) _1f(x,0,o0)
+#define _0(x) _0f(x,0)
+
+static x86_insn_t x86_insns_one_byte[256] = {
+
+#define _(x) \
+ _2 (x, Eb, Gb), \
+ _2 (x, Ev, Gv), \
+ _2 (x, Gb, Eb), \
+ _2 (x, Gv, Ev), \
+ _2 (x, AL, Ib), \
+ _2 (x, AX, Iz)
+
+ /* 0x00 */
+ _ (add),
+ _0 (push_es),
+ _0 (pop_es),
+ _ (or),
+ _0 (push_cs),
+ _0 (escape_two_byte),
+
+ /* 0x10 */
+ _ (adc),
+ _0 (push_ss),
+ _0 (pop_ss),
+ _ (sbb),
+ _0 (push_ds),
+ _0 (pop_ds),
+
+ /* 0x20 */
+ _ (and),
+ _0 (segment_es),
+ _0 (daa),
+ _ (sub),
+ _0 (segment_cs),
+ _0 (das),
+
+ /* 0x30 */
+ _ (xor),
+ _0 (segment_ss),
+ _0 (aaa),
+ _ (cmp),
+ _0 (segment_ds),
+ _0 (aas),
+
+#undef _
+
+ /* 0x40 */
+#define _(r) _1 (inc, r),
+ foreach_x86_gp_reg
+#undef _
+#define _(r) _1 (dec, r),
+ foreach_x86_gp_reg
+#undef _
+
+ /* 0x50 */
+#define _(r) _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, r),
+ foreach_x86_gp_reg
+#undef _
+#define _(r) _1f (pop, X86_INSN_FLAG_DEFAULT_64_BIT, r),
+ foreach_x86_gp_reg
+#undef _
+
+ /* 0x60 */
+ _0 (pusha),
+ _0 (popa),
+ _2 (bound, Gv, Ma),
+ _2 (movsxd, Gv, Ed),
+ _0 (segment_fs),
+ _0 (segment_gs),
+ _0 (operand_type),
+ _0 (address_size),
+ _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Iz),
+ _3 (imul, Gv, Ev, Iz),
+ _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ib),
+ _3 (imul, Gv, Ev, Ib),
+ _1 (insb, DX),
+ _1 (insw, DX),
+ _1 (outsb, DX),
+ _1 (outsw, DX),
+
+ /* 0x70 */
+#define _(x) _1 (j##x, Jb),
+ foreach_x86_condition
+#undef _
+
+ /* 0x80 */
+ _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib),
+ _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Iz),
+ _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib),
+ _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Ib),
+ _2 (test, Eb, Gb),
+ _2 (test, Ev, Gv),
+ _2 (xchg, Eb, Gb),
+ _2 (xchg, Ev, Gv),
+ _2 (mov, Eb, Gb),
+ _2 (mov, Ev, Gv),
+ _2 (mov, Gb, Eb),
+ _2 (mov, Gv, Ev),
+ _2 (mov, Ev, Sw),
+ _2 (lea, Gv, Ev),
+ _2 (mov, Sw, Ew),
+ _1f (modrm_group_1a, X86_INSN_FLAG_MODRM_REG_GROUP_1a, Ev),
+
+ /* 0x90 */
+ _0 (nop),
+ _1 (xchg, CX),
+ _1 (xchg, DX),
+ _1 (xchg, BX),
+ _1 (xchg, SP),
+ _1 (xchg, BP),
+ _1 (xchg, SI),
+ _1 (xchg, DI),
+ _0 (cbw),
+ _0 (cwd),
+ _1 (call, Ap),
+ _0 (wait),
+ _0 (pushf),
+ _0 (popf),
+ _0 (sahf),
+ _0 (lahf),
+
+ /* 0xa0 */
+ _2 (mov, AL, Ob),
+ _2 (mov, AX, Ov),
+ _2 (mov, Ob, AL),
+ _2 (mov, Ov, AX),
+ _0 (movsb),
+ _0 (movsw),
+ _0 (cmpsb),
+ _0 (cmpsw),
+ _2 (test, AL, Ib),
+ _2 (test, AX, Iz),
+ _1 (stosb, AL),
+ _1 (stosw, AX),
+ _1 (lodsb, AL),
+ _1 (lodsw, AX),
+ _1 (scasb, AL),
+ _1 (scasw, AX),
+
+ /* 0xb0 */
+ _2 (mov, AL, Ib),
+ _2 (mov, CL, Ib),
+ _2 (mov, DL, Ib),
+ _2 (mov, BL, Ib),
+ _2 (mov, AH, Ib),
+ _2 (mov, CH, Ib),
+ _2 (mov, DH, Ib),
+ _2 (mov, BH, Ib),
+#define _(r) _2 (mov, r, Iv),
+ foreach_x86_gp_reg
+#undef _
+
+ /* 0xc0 */
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, Ib),
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, Ib),
+ _1 (ret, Iw),
+ _0 (ret),
+ _2 (les, Gz, Mp),
+ _2 (lds, Gz, Mp),
+ _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Eb, Ib),
+ _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Ev, Iz),
+ _2 (enter, Iw, Ib),
+ _0 (leave),
+ _1 (ret, Iw),
+ _0 (ret),
+ _0 (int3),
+ _1 (int, Ib),
+ _0 (into),
+ _0 (iret),
+
+ /* 0xd0 */
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, 1b),
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, 1b),
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, CL),
+ _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, CL),
+ _0 (aam),
+ _0 (aad),
+ _0 (salc),
+ _0 (xlat),
+ /* FIXME x87 */
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+
+ /* 0xe0 */
+ _1 (loopnz, Jb),
+ _1 (loopz, Jb),
+ _1 (loop, Jb),
+ _1 (jcxz, Jb),
+ _2 (in, AL, Ib),
+ _2 (in, AX, Ib),
+ _2 (out, Ib, AL),
+ _2 (out, Ib, AX),
+ _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Jz),
+ _1f ( jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Jz),
+ _1 (jmp, Ap),
+ _1 (jmp, Jb),
+ _2 (in, AL, DX),
+ _2 (in, AX, DX),
+ _2 (out, DX, AL),
+ _2 (out, DX, AX),
+
+ /* 0xf0 */
+ _0 (lock),
+ _0 (int1),
+ _0 (repne),
+ _0 (rep),
+ _0 (hlt),
+ _0 (cmc),
+ _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3),
+ _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3),
+ _0 (clc),
+ _0 (stc),
+ _0 (cli),
+ _0 (sti),
+ _0 (cld),
+ _0 (std),
+ _1f (modrm_group_4, X86_INSN_FLAG_MODRM_REG_GROUP_4, Eb),
+ _0f (modrm_group_5, X86_INSN_FLAG_MODRM_REG_GROUP_5),
+};
+
+static x86_insn_t x86_insns_two_byte[256] = {
+ /* 0x00 */
+ _0f (modrm_group_6, X86_INSN_FLAG_MODRM_REG_GROUP_6),
+ _0f (modrm_group_7, X86_INSN_FLAG_MODRM_REG_GROUP_7),
+ _2 (lar, Gv, Ew),
+ _2 (lsl, Gv, Ew),
+ _0 (bad),
+ _0 (syscall),
+ _0 (clts),
+ _0 (sysret),
+ _0 (invd),
+ _0 (wbinvd),
+ _0 (bad),
+ _0 (ud2),
+ _0 (bad),
+ _0f (modrm_group_p, X86_INSN_FLAG_MODRM_REG_GROUP_p),
+ _0 (femms),
+ _0 (escape_3dnow),
+
+ /* 0x10 */
+ _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
+ _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
+ _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
+ _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
+ _2f (unpcklps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
+ _2f (unpckhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
+ _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
+ _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
+ _0f (modrm_group_16, X86_INSN_FLAG_MODRM_REG_GROUP_16),
+ _0 (nop),
+ _0 (nop),
+ _0 (nop),
+ _0 (nop),
+ _0 (nop),
+ _0 (nop),
+ _0 (nop),
+
+ /* 0x20 */
+ _2 (mov, Rv, Cv),
+ _2 (mov, Rv, Dv),
+ _2 (mov, Cv, Rv),
+ _2 (mov, Dv, Rv),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+ _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Ex, Gx),
+ _2f (cvtpi2ps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+ _2f (movntps, X86_INSN_FLAG_SSE_GROUP_28, Mx, Gx),
+ _2f (cvttps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+ _2f (cvtps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+ _2f (ucomiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+ _2f (comiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
+
+ /* 0x30 */
+ _0 (wrmsr),
+ _0 (rdtsc),
+ _0 (rdmsr),
+ _0 (rdpmc),
+ _0 (sysenter),
+ _0 (sysexit),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+
+ /* 0x40 */
+#define _(x) _2 (cmov##x, Gv, Ev),
+ foreach_x86_condition
+#undef _
+
+ /* 0x50 */
+ _2f (movmskps, X86_INSN_FLAG_SSE_GROUP_50, Gd, Rx),
+ _2f (sqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (rsqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (rcpps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (andps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (andnps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (orps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (xorps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
+ _2f (addps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (mulps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (cvtps2pd, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (cvtdq2ps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (subps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (minps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (divps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+ _2f (maxps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
+
+ /* 0x60 */
+ _2f (punpcklbw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (punpcklwd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (punpckldq, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (packsswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (pcmpgtb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (pcmpgtw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (pcmpgtd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (packuswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
+ _2f (punpckhbw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+ _2f (punpckhwd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+ _2f (punpckhdq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+ _2f (packssdw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_68),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_68),
+ _2f (movd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+ _2f (movq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
+
+ /* 0x70 */
+ _3f (pshufw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em, Ib),
+ _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12),
+ _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13),
+ _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14),
+ _2f (pcmpeqb, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
+ _2f (pcmpeqw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
+ _2f (pcmpeqd, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
+ _0f (emms, X86_INSN_FLAG_SSE_GROUP_70),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
+ _2f (movd, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm),
+ _2f (movq, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm),
+
+ /* 0x80 */
+#define _(x) _1 (jmp##x, Jz),
+ foreach_x86_condition
+#undef _
+
+ /* 0x90 */
+#define _(x) _1 (set##x, Eb),
+ foreach_x86_condition
+#undef _
+
+ /* 0xa0 */
+ _0 (push_fs),
+ _0 (pop_fs),
+ _0 (cpuid),
+ _2 (bt, Ev, Gv),
+ _3 (shld, Ev, Gv, Ib),
+ _3 (shld, Ev, Gv, CL),
+ _0 (bad),
+ _0 (bad),
+ _0 (push_gs),
+ _0 (pop_gs),
+ _0 (rsm),
+ _2 (bts, Ev, Gv),
+ _3 (shrd, Ev, Gv, Ib),
+ _3 (shrd, Ev, Gv, CL),
+ _0f (modrm_group_15, X86_INSN_FLAG_MODRM_REG_GROUP_15),
+ _2 (imul, Gv, Ev),
+
+ /* 0xb0 */
+ _2 (cmpxchg, Eb, Gb),
+ _2 (cmpxchg, Ev, Gv),
+ _2 (lss, Gz, Mp),
+ _2 (btr, Ev, Gv),
+ _2 (lfs, Gz, Mp),
+ _2 (lgs, Gz, Mp),
+ _2 (movzbl, Gv, Eb),
+ _2 (movzwl, Gv, Ew),
+ _0 (bad),
+ _0f (modrm_group_10, X86_INSN_FLAG_MODRM_REG_GROUP_10),
+ _2f (modrm_group_8, X86_INSN_FLAG_MODRM_REG_GROUP_8, Ev, Ib),
+ _2 (btc, Ev, Gv),
+ _2 (bsf, Gv, Ev),
+ _2 (bsr, Gv, Ev),
+ _2 (movsx, Gv, Eb),
+ _2 (movsx, Gv, Ew),
+
+ /* 0xc0 */
+ _2 (xadd, Eb, Gb),
+ _2 (xadd, Ev, Gv),
+ _3f (cmpps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib),
+ _2 (movnti, Mv, Gv),
+ _3f (pinsrw, X86_INSN_FLAG_SSE_GROUP_c0, Gm, Ew, Ib),
+ _3f (pextrw, X86_INSN_FLAG_SSE_GROUP_c0, Gd, Rm, Ib),
+ _3f (shufps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib),
+ _1f (modrm_group_9, X86_INSN_FLAG_MODRM_REG_GROUP_9, Mx),
+#define _(r) _1 (bswap, r),
+ foreach_x86_gp_reg
+#undef _
+
+ /* 0xd0 */
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0),
+ _2f (psrlw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
+ _2f (psrld, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
+ _2f (psrlq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
+ _2f (paddq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
+ _2f (pmullw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0),
+ _2f (pmovmskb, X86_INSN_FLAG_SSE_GROUP_d0, Gd, Rm),
+ _2f (psubusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (psubusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (pminub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (pand, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (paddusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (paddusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (pmaxub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+ _2f (pandn, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
+
+ /* 0xe0 */
+ _2f (pavgb, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (psraw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (psrad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (pavgw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (pmulhuw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (pmulhw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (bad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
+ _2f (movntq, X86_INSN_FLAG_SSE_GROUP_e0, Mm, Gm),
+ _2f (psubsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (psubsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (pminsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (por, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (paddsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (paddsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (pmaxsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+ _2f (pxor, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
+
+ /* 0xf0 */
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_f0),
+ _2f (psllw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (pslld, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (psllq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (pmuludq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (pmaddwd, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (psadbw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (maskmovq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
+ _2f (psubb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (psubw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (psubd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (psubq, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (paddb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (paddw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _2f (paddd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
+ _0f (bad, X86_INSN_FLAG_SSE_GROUP_f8),
+};
+
+typedef struct {
+ x86_insn_t insns[8];
+} x86_insn_group8_t;
+
+/* Escape groups are indexed by modrm reg field. */
+static x86_insn_group8_t x86_insn_modrm_reg_groups[] = {
+ [X86_INSN_MODRM_REG_GROUP_1].insns = {
+ _0 (add), _0 ( or), _0 (adc), _0 (sbb),
+ _0 (and), _0 (sub), _0 (xor), _0 (cmp),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_1a].insns = {
+ _0f (pop, X86_INSN_FLAG_DEFAULT_64_BIT),
+ _0 (bad), _0 (bad), _0 (bad),
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_2].insns = {
+ _0 (rol), _0 (ror), _0 (rcl), _0 (rcr),
+ _0 (shl), _0 (shr), _0 (sal), _0 (sar),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_3].insns = {
+ _0 (test), _0 (test), _0 (not), _0 (neg),
+ _0 (mul), _0 (imul), _0 (div), _0 (idiv),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_4].insns = {
+ _0 (inc), _0 (dec), _0 (bad), _0 (bad),
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_5].insns = {
+ _1 (inc, Ev),
+ _1 (dec, Ev),
+ _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
+ _1 (call, Mp),
+ _1f (jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
+ _1 (jmp, Mp),
+ _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
+ _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_6].insns = {
+ _1 (sldt, Ev),
+ _1 (str, Ev),
+ _1 (lldt, Ev),
+ _1 (ltr, Ev),
+ _1 (verr, Ev),
+ _1 (verw, Ev),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_7].insns = {
+ _1 (sgdt, Mv),
+ _1 (sidt, Mv),
+ _1 (lgdt, Mv),
+ _1 (lidt, Mv),
+ _1 (smsw, Ev),
+ _0 (bad),
+ _1 (lmsw, Ew),
+ _1 (invlpg, Mv),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (bt, Ev, Ib),
+ _2 (bts, Ev, Ib),
+ _2 (btr, Ev, Ib),
+ _2 (btc, Ev, Ib),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_9].insns = {
+ _0 (bad),
+ _1 (cmpxchg, Mx),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_10].insns = {
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_11].insns = {
+ _0 (mov), _0 (bad), _0 (bad), _0 (bad),
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_12].insns = {
+ _0 (bad),
+ _0 (bad),
+ _2 (psrlw, Rm, Ib),
+ _0 (bad),
+ _2 (psraw, Rm, Ib),
+ _0 (bad),
+ _2 (psllw, Rm, Ib),
+ _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_13].insns = {
+ _0 (bad),
+ _0 (bad),
+ _2 (psrld, Rm, Ib),
+ _0 (bad),
+ _2 (psrad, Rm, Ib),
+ _0 (bad),
+ _2 (pslld, Rm, Ib),
+ _0 (bad),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_14].insns = {
+ _0 (bad),
+ _0 (bad),
+ _2 (psrlq, Rm, Ib),
+ _0f (bad, 0),
+ _0 (bad),
+ _0 (bad),
+ _2 (psllq, Rm, Ib),
+ _0f (bad, 0),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_15].insns = {
+ _1 (fxsave, Mv),
+ _1 (fxrstor, Mv),
+ _1 (ldmxcsr, Mv),
+ _1 (stmxcsr, Mv),
+ _0 (bad),
+ _1 (lfence, Mv),
+ _1 (mfence, Mv),
+ _1 (sfence, Mv),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_16].insns = {
+ _1 (prefetch_nta, Mv),
+ _1 (prefetch_t0, Mv),
+ _1 (prefetch_t1, Mv),
+ _1 (prefetch_t2, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ },
+
+ [X86_INSN_MODRM_REG_GROUP_p].insns = {
+ _1 (prefetch_exclusive, Mv),
+ _1 (prefetch_modified, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_modified, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ _1 (prefetch_nop, Mv),
+ },
+};
+
+static x86_insn_group8_t x86_insn_sse_groups_repz[] = {
+ [X86_INSN_SSE_GROUP_10].insns = {
+ _2 (movss, Gx, Ex),
+ _2 (movss, Ex, Gx),
+ _2 (movsldup, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (movshdup, Gx, Ex),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_28].insns = {
+ _0 (bad),
+ _0 (bad),
+ _2 (cvtsi2ss, Gx, Ev),
+ _0 (bad),
+ _2 (cvttss2si, Gv, Ex),
+ _2 (cvtss2si, Gv, Ex),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_50].insns = {
+ _0 (bad),
+ _2 (sqrtss, Gx, Ex),
+ _2 (rsqrtps, Gx, Ex),
+ _2 (rcpss, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_58].insns = {
+ _2 (addss, Gx, Ex),
+ _2 (mulss, Gx, Ex),
+ _2 (cvtss2sd, Gx, Ex),
+ _2 (cvttps2dq, Gx, Ex),
+ _2 (subss, Gx, Ex),
+ _2 (minss, Gx, Ex),
+ _2 (divss, Gx, Ex),
+ _2 (maxss, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_60].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_68].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (movdqu, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_70].insns = {
+ _3 (pshufhw, Gx, Ex, Ib),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_78].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (movq, Gx, Ex),
+ _2 (movdqu, Ex, Gx),
+ },
+
+ [X86_INSN_SSE_GROUP_c0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _3 (cmpss, Gx, Ex, Ib),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_d0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (movq2dq, Gx, Em),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_d8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_e0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (cvtdq2pd, Gx, Ex),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_e8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_f0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_f8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+};
+
+static x86_insn_group8_t x86_insn_sse_groups_operand_size[] = {
+ [X86_INSN_SSE_GROUP_10].insns = {
+ _2 (movupd, Gx, Ex),
+ _2 (movupd, Ex, Gx),
+ _2 (movlpd, Gx, Ex),
+ _2 (movlpd, Ex, Gx),
+ _2 (unpcklpd, Gx, Ex),
+ _2 (unpckhpd, Gx, Ex),
+ _2 (movhpd, Gx, Mx),
+ _2 (movhpd, Mx, Gx),
+ },
+
+ [X86_INSN_SSE_GROUP_28].insns = {
+ _2 (movapd, Gx, Ex),
+ _2 (movapd, Ex, Gx),
+ _2 (cvtpi2pd, Gx, Ex),
+ _2 (movntpd, Mx, Gx),
+ _2 (cvttpd2pi, Gx, Mx),
+ _2 (cvtpd2pi, Gx, Mx),
+ _2 (ucomisd, Gx, Ex),
+ _2 (comisd, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_50].insns = {
+ _2 (movmskpd, Gd, Rx),
+ _2 (sqrtpd, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _2 (andpd, Gx, Ex),
+ _2 (andnpd, Gx, Ex),
+ _2 (orpd, Gx, Ex),
+ _2 (xorpd, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_58].insns = {
+ _2 (addpd, Gx, Ex),
+ _2 (mulpd, Gx, Ex),
+ _2 (cvtpd2ps, Gx, Ex),
+ _2 (cvtps2dq, Gx, Ex),
+ _2 (subpd, Gx, Ex),
+ _2 (minpd, Gx, Ex),
+ _2 (divpd, Gx, Ex),
+ _2 (maxpd, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_60].insns = {
+ _2 (punpcklbw, Gx, Ex),
+ _2 (punpcklwd, Gx, Ex),
+ _2 (punpckldq, Gx, Ex),
+ _2 (packsswb, Gx, Ex),
+ _2 (pcmpgtb, Gx, Ex),
+ _2 (pcmpgtw, Gx, Ex),
+ _2 (pcmpgtd, Gx, Ex),
+ _2 (packuswb, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_68].insns = {
+ _2 (punpckhbw, Gx, Ex),
+ _2 (punpckhwd, Gx, Ex),
+ _2 (punpckhdq, Gx, Ex),
+ _2 (packssdw, Gx, Ex),
+ _2 (punpcklqdq, Gx, Ex),
+ _2 (punpckhqdq, Gx, Ex),
+ _2 (movd, Gx, Ev),
+ _2 (movdqa, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_70].insns = {
+ _3 (pshufd, Gx, Ex, Ib),
+ _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12),
+ _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13),
+ _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14),
+ _2 (pcmpeqb, Gx, Ex),
+ _2 (pcmpeqw, Gx, Ex),
+ _2 (pcmpeqd, Gx, Ex),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_78].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (haddpd, Gx, Ex),
+ _2 (hsubpd, Gx, Ex),
+ _2 (movd, Ev, Gx),
+ _2 (movdqa, Ex, Gx),
+ },
+
+ [X86_INSN_SSE_GROUP_c0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _3 (cmppd, Gx, Ex, Ib),
+ _0 (bad),
+ _3 (pinsrw, Gx, Ew, Ib),
+ _3 (pextrw, Gd, Gx, Ib),
+ _3 (shufpd, Gx, Ex, Ib),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_d0].insns = {
+ _2 (addsubpd, Gx, Ex),
+ _2 (psrlw, Gx, Ex),
+ _2 (psrld, Gx, Ex),
+ _2 (psrlq, Gx, Ex),
+ _2 (paddq, Gx, Ex),
+ _2 (pmullw, Gx, Ex),
+ _2 (movq, Ex, Gx),
+ _2 (pmovmskb, Gd, Rx),
+ },
+
+ [X86_INSN_SSE_GROUP_d8].insns = {
+ _2 (psubusb, Gx, Ex),
+ _2 (psubusw, Gx, Ex),
+ _2 (pminub, Gx, Ex),
+ _2 (pand, Gx, Ex),
+ _2 (paddusb, Gx, Ex),
+ _2 (paddusw, Gx, Ex),
+ _2 (pmaxub, Gx, Ex),
+ _2 (pandn, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_e0].insns = {
+ _2 (pavgb, Gx, Ex),
+ _2 (psraw, Gx, Ex),
+ _2 (psrad, Gx, Ex),
+ _2 (pavgw, Gx, Ex),
+ _2 (pmulhuw, Gx, Ex),
+ _2 (pmulhw, Gx, Ex),
+ _2 (cvttpd2dq, Gx, Ex),
+ _2 (movntdq, Mx, Gx),
+ },
+
+ [X86_INSN_SSE_GROUP_e8].insns = {
+ _2 (psubsb, Gx, Ex),
+ _2 (psubsw, Gx, Ex),
+ _2 (pminsw, Gx, Ex),
+ _2 (por, Gx, Ex),
+ _2 (paddsb, Gx, Ex),
+ _2 (paddsw, Gx, Ex),
+ _2 (pmaxsw, Gx, Ex),
+ _2 (pxor, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_f0].insns = {
+ _0 (bad),
+ _2 (psllw, Gx, Ex),
+ _2 (pslld, Gx, Ex),
+ _2 (psllq, Gx, Ex),
+ _2 (pmuludq, Gx, Ex),
+ _2 (pmaddwd, Gx, Ex),
+ _2 (psadbw, Gx, Ex),
+ _2 (maskmovdqu, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_f8].insns = {
+ _2 (psubb, Gx, Ex),
+ _2 (psubw, Gx, Ex),
+ _2 (psubd, Gx, Ex),
+ _2 (psubq, Gx, Ex),
+ _2 (paddb, Gx, Ex),
+ _2 (paddw, Gx, Ex),
+ _2 (paddd, Gx, Ex),
+ _0 (bad),
+ },
+};
+
+static x86_insn_group8_t x86_insn_sse_groups_repnz[] = {
+ [X86_INSN_SSE_GROUP_10].insns = {
+ _2 (movsd, Gx, Ex),
+ _2 (movsd, Ex, Gx),
+ _2 (movddup, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_28].insns = {
+ _0 (bad),
+ _0 (bad),
+ _2 (cvtsi2sd, Gx, Ev),
+ _0 (bad),
+ _2 (cvttsd2si, Gv, Ex),
+ _2 (cvtsd2si, Gv, Ex),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_50].insns = {
+ _0 (bad),
+ _2 (sqrtsd, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_58].insns = {
+ _2 (addsd, Gx, Ex),
+ _2 (mulsd, Gx, Ex),
+ _2 (cvtsd2ss, Gx, Ex),
+ _0 (bad),
+ _2 (subsd, Gx, Ex),
+ _2 (minsd, Gx, Ex),
+ _2 (divsd, Gx, Ex),
+ _2 (maxsd, Gx, Ex),
+ },
+
+ [X86_INSN_SSE_GROUP_60].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_68].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_70].insns = {
+ _3 (pshuflw, Gx, Ex, Ib),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_78].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (haddps, Gx, Ex),
+ _2 (hsubps, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_c0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _3 (cmpsd, Gx, Ex, Ib),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_d0].insns = {
+ _2 (addsubps, Gx, Ex),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (movdq2q, Gm, Ex),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_d8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_e0].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _2 (cvtpd2dq, Gx, Ex),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_e8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_f0].insns = {
+ _2 (lddqu, Gx, Mx),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+
+ [X86_INSN_SSE_GROUP_f8].insns = {
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ _0 (bad),
+ },
+};
+
+#undef _
+
+/* Parses memory displacements and immediates. */
+static u8 * x86_insn_parse_number (u32 log2_n_bytes,
+ u8 * code, u8 * code_end,
+ i64 * result)
+{
+ i64 x = 0;
+
+ if (code + (1 << log2_n_bytes) > code_end)
+ return 0;
+
+ switch (log2_n_bytes)
+ {
+ case 3:
+ x = clib_little_to_host_unaligned_mem_u64 ((u64 *) code);
+ break;
+
+ case 2:
+ x = (i32) clib_little_to_host_unaligned_mem_u32 ((u32 *) code);
+ break;
+
+ case 1:
+ x = (i16) clib_little_to_host_unaligned_mem_u16 ((u16 *) code);
+ break;
+
+ case 0:
+ x = (i8) code[0];
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ *result = x;
+ return code + (1 << log2_n_bytes);
+}
+
+static u32
+x86_insn_log2_immediate_bytes (x86_insn_parse_t * p, x86_insn_t * insn)
+{
+ u32 i = ~0;
+ switch (x86_insn_immediate_type (insn))
+ {
+ case 'b': i = 0; break;
+ case 'w': i = 1; break;
+ case 'd': i = 2; break;
+ case 'q': i = 3; break;
+
+ case 'z':
+ i = p->log2_effective_operand_bytes;
+ if (i > 2) i = 2;
+ break;
+
+ case 'v':
+ i = p->log2_effective_operand_bytes;
+ break;
+
+ default:
+ i = ~0;
+ break;
+ }
+
+ return i;
+}
+
+static u8 *
+x86_insn_parse_modrm_byte (x86_insn_parse_t * x,
+ x86_insn_modrm_byte_t modrm,
+ u32 parse_flags,
+ u8 * code,
+ u8 * code_end)
+{
+ u8 effective_address_bits;
+
+ if (parse_flags & X86_INSN_PARSE_64_BIT)
+ effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 64;
+ else if (parse_flags & X86_INSN_PARSE_32_BIT)
+ effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 16 : 32;
+ else
+ effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 16;
+
+ x->log2_effective_address_bytes = 1;
+ x->log2_effective_address_bytes += effective_address_bits > 16;
+ x->log2_effective_address_bytes += effective_address_bits > 32;
+
+ x->regs[0] |= modrm.reg;
+ if (modrm.mode == 3)
+ x->regs[1] |= modrm.rm;
+ else
+ {
+ u32 log2_disp_bytes = ~0;
+
+ x->flags |= X86_INSN_IS_ADDRESS;
+
+ if (effective_address_bits != 16)
+ {
+ u8 has_sib_byte = 0;
+
+ switch (modrm.mode)
+ {
+ case 0:
+ /* When base is bp displacement is present for mode 0. */
+ if (modrm.rm == X86_INSN_GP_REG_BP)
+ {
+ log2_disp_bytes = x->log2_effective_address_bytes;
+ break;
+ }
+ else if (modrm.rm == X86_INSN_GP_REG_SP
+ && effective_address_bits != 16)
+ {
+ has_sib_byte = 1;
+ break;
+ }
+ /* fall through */
+ case 1:
+ case 2:
+ x->regs[1] |= modrm.rm;
+ x->flags |= X86_INSN_HAS_BASE;
+ if (modrm.mode != 0)
+ {
+ log2_disp_bytes = (modrm.mode == 1
+ ? 0
+ : x->log2_effective_address_bytes);
+ if (log2_disp_bytes > 2)
+ log2_disp_bytes = 2;
+ }
+ break;
+ }
+
+ if (has_sib_byte)
+ {
+ x86_insn_sib_byte_t sib;
+
+ if (code >= code_end)
+ return 0;
+ sib.byte = *code++;
+
+ x->log2_index_scale = 1 << sib.log2_scale;
+ x->regs[1] |= sib.base;
+ x->flags |= X86_INSN_HAS_BASE;
+
+ if (sib.index != X86_INSN_GP_REG_SP)
+ {
+ x->regs[2] |= sib.index;
+ x->flags |= X86_INSN_HAS_INDEX;
+ }
+ }
+ }
+ else
+ {
+ /* effective_address_bits == 16 */
+ switch (modrm.mode)
+ {
+ case 0:
+ if (modrm.rm == 6)
+ {
+ /* [disp16] */
+ log2_disp_bytes = 1;
+ break;
+ }
+ /* fall through */
+ case 1:
+ case 2:
+ switch (modrm.rm)
+ {
+ case 0: /* [bx + si/di] */
+ case 1:
+ x->regs[1] = X86_INSN_GP_REG_BX;
+ x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
+ x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX;
+ break;
+
+ case 2: /* [bp + si/di] */
+ case 3:
+ x->regs[1] = X86_INSN_GP_REG_BP;
+ x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
+ x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX;
+ break;
+
+ case 4: /* [si/di] */
+ case 5:
+ x->regs[1] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
+ x->flags |= X86_INSN_HAS_BASE;
+ break;
+
+ case 6: /* [bp + disp] */
+ x->regs[1] = X86_INSN_GP_REG_BP;
+ x->flags |= X86_INSN_HAS_BASE;
+ break;
+
+ case 7: /* [bx + disp] */
+ x->regs[1] = X86_INSN_GP_REG_BX;
+ x->flags |= X86_INSN_HAS_BASE;
+ break;
+ }
+
+ if (modrm.mode != 0)
+ log2_disp_bytes = modrm.mode == 1 ? 0 : 1;
+ break;
+ }
+ }
+
+ if (log2_disp_bytes != ~0)
+ {
+ i64 disp;
+ code = x86_insn_parse_number (log2_disp_bytes, code, code_end,
+ &disp);
+ if (code)
+ x->displacement = disp;
+ }
+ }
+
+ return code;
+}
+
+u8 * x86_insn_parse (x86_insn_parse_t * p, u8 * code_start)
+{
+ u8 i, * code, * code_end;
+ x86_insn_t * insn, * group_insn;
+ u8 default_operand_bits, effective_operand_bits;
+ u32 opcode, parse_flags;
+
+ /* Preserve global parse flags. */
+ parse_flags = p->flags & (X86_INSN_PARSE_32_BIT | X86_INSN_PARSE_64_BIT);
+ memset (p, 0, sizeof (p[0]));
+ p->flags = parse_flags;
+
+ /* 64 implies 32 bit parsing. */
+ if (parse_flags & X86_INSN_PARSE_64_BIT)
+ parse_flags |= X86_INSN_PARSE_32_BIT;
+
+ /* Instruction must be <= 15 bytes. */
+ code = code_start;
+ code_end = code + 15;
+
+ /* Parse legacy prefixes. */
+ while (1)
+ {
+ if (code >= code_end)
+ goto insn_too_long;
+ i = code[0];
+ code++;
+ switch (i)
+ {
+ default: goto prefix_done;
+
+ /* Set flags based on prefix. */
+#define _(x,o) case o: p->flags |= X86_INSN_##x; break;
+ foreach_x86_legacy_prefix;
+#undef _
+ }
+ }
+ prefix_done:
+
+ /* REX prefix. */
+ if ((parse_flags & X86_INSN_PARSE_64_BIT) && i >= 0x40 && i <= 0x4f)
+ {
+ p->regs[0] |= ((i & (1 << 2)) != 0) << 3; /* r bit */
+ p->regs[1] |= ((i & (1 << 0)) != 0) << 3; /* b bit */
+ p->regs[2] |= ((i & (1 << 1)) != 0) << 3; /* x bit */
+ p->flags |= ((i & (1 << 3)) /* w bit */
+ ? X86_INSN_OPERAND_SIZE_64 : 0);
+ if (code >= code_end)
+ goto insn_too_long;
+ i = *code++;
+ }
+
+ opcode = i;
+ if (opcode == 0x0f)
+ {
+ /* two byte opcode. */;
+ if (code >= code_end)
+ goto insn_too_long;
+ i = *code++;
+ opcode = (opcode << 8) | i;
+ insn = x86_insns_two_byte + i;
+ }
+ else
+ {
+ static x86_insn_t arpl = {
+ .name = "arpl",
+ .operands[0].data = "Ew",
+ .operands[1].data = "Gw",
+ };
+
+ if (PREDICT_FALSE (i == 0x63
+ && ! (parse_flags & X86_INSN_PARSE_64_BIT)))
+ insn = &arpl;
+ else
+ insn = x86_insns_one_byte + i;
+ }
+
+ if ((i = X86_INSN_FLAG_GET_SSE_GROUP (insn->flags)) != 0)
+ {
+ x86_insn_group8_t * g8;
+
+ if (p->flags & X86_INSN_OPERAND_SIZE)
+ g8 = x86_insn_sse_groups_operand_size;
+ else if (p->flags & X86_INSN_REPZ)
+ g8 = x86_insn_sse_groups_repz;
+ else if (p->flags & X86_INSN_REPNZ)
+ g8 = x86_insn_sse_groups_repnz;
+ else
+ g8 = 0;
+
+ /* insn flags have 1 + group so != 0 test above can work. */
+ ASSERT ((i - 1) < ARRAY_LEN (x86_insn_sse_groups_operand_size));
+ if (g8)
+ insn = g8[i - 1].insns + (opcode & 7);
+ }
+
+ /* Parse modrm and displacement if present. */
+ if (x86_insn_has_modrm_byte (insn))
+ {
+ x86_insn_modrm_byte_t modrm;
+
+ if (code >= code_end)
+ goto insn_too_long;
+ modrm.byte = *code++;
+
+ /* Handle special 0x0f01 and 0x0fae encodings. */
+ if (PREDICT_FALSE (modrm.mode == 3
+ && (opcode == 0x0f01
+ || opcode == 0x0fae)))
+ {
+ static x86_insn_t x86_insns_0f01_special[] = {
+ _0 (swapgs), _0 (rdtscp), _0 (bad), _0 (bad),
+ _0 (bad), _0 (bad), _0 (bad), _0 (bad),
+ };
+ static x86_insn_t x86_insns_0fae_special[] = {
+ _0 (vmrun), _0 (vmmcall), _0 (vmload), _0 (vmsave),
+ _0 (stgi), _0 (clgi), _0 (skinit), _0 (invlpga),
+ };
+
+ if (opcode == 0x0f01)
+ insn = x86_insns_0f01_special;
+ else
+ insn = x86_insns_0fae_special;
+ insn += modrm.rm;
+ opcode = (opcode << 8) | modrm.byte;
+ }
+ else
+ {
+ code = x86_insn_parse_modrm_byte (p, modrm, parse_flags,
+ code, code_end);
+ if (! code)
+ goto insn_too_long;
+ }
+ }
+
+ group_insn = 0;
+ if ((i = X86_INSN_FLAG_GET_MODRM_REG_GROUP (insn->flags)) != 0)
+ {
+ u32 g = i - 1;
+ ASSERT (g < ARRAY_LEN (x86_insn_modrm_reg_groups));
+ group_insn = x86_insn_modrm_reg_groups[g].insns + (p->regs[0] & 7);
+ }
+
+ p->insn = insn[0];
+ if (group_insn)
+ {
+ u32 k;
+ p->insn.name = group_insn->name;
+ p->insn.flags |= group_insn->flags;
+ for (k = 0; k < ARRAY_LEN (group_insn->operands); k++)
+ if (x86_insn_operand_is_valid (group_insn, k))
+ p->insn.operands[k] = group_insn->operands[k];
+ }
+
+ default_operand_bits
+ = ((((parse_flags & X86_INSN_PARSE_32_BIT) != 0)
+ ^ ((p->flags & X86_INSN_OPERAND_SIZE) != 0))
+ ? BITS (u32) : BITS (u16));
+
+ if ((parse_flags & X86_INSN_PARSE_64_BIT)
+ && (p->insn.flags & X86_INSN_FLAG_DEFAULT_64_BIT))
+ default_operand_bits = BITS (u64);
+
+ effective_operand_bits = default_operand_bits;
+ if (p->flags & X86_INSN_OPERAND_SIZE_64)
+ effective_operand_bits = BITS (u64);
+
+ p->log2_effective_operand_bytes = 1;
+ p->log2_effective_operand_bytes += effective_operand_bits > 16;
+ p->log2_effective_operand_bytes += effective_operand_bits > 32;
+
+ /* Parse immediate if present. */
+ {
+ u32 l = x86_insn_log2_immediate_bytes (p, insn);
+ if (l <= 3)
+ {
+ code = x86_insn_parse_number (l, code, code_end, &p->immediate);
+ if (! code)
+ goto insn_too_long;
+ }
+ }
+
+ return code;
+
+ insn_too_long:
+ return 0;
+}
+
+static u8 * format_x86_gp_reg_operand (u8 * s, va_list * va)
+{
+ u32 r = va_arg (*va, u32);
+ u32 log2_n_bytes = va_arg (*va, u32);
+
+ const char names8[8] = "acdbsbsd";
+ const char names16[8] = "xxxxppii";
+
+ ASSERT (r < 16);
+
+ /* Add % register prefix. */
+ vec_add1 (s, '%');
+
+ switch (log2_n_bytes)
+ {
+ case 0:
+ {
+
+ if (r < 8)
+ s = format (s, "%c%c", names8[r & 3], (r >> 2) ? 'l' : 'h');
+ else
+ s = format (s, "r%db", r);
+ }
+ break;
+
+ case 2:
+ case 3:
+ s = format (s, "%c", log2_n_bytes == 2 ? 'e' : 'r');
+ /* fall through */
+ case 1:
+ if (r < 8)
+ s = format (s, "%c%c", names8[r], names16[r]);
+ else
+ {
+ s = format (s, "%d", r);
+ if (log2_n_bytes != 3)
+ s = format (s, "%c", log2_n_bytes == 1 ? 'w' : 'd');
+ }
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ return s;
+}
+
+static u8 * format_x86_reg_operand (u8 * s, va_list * va)
+{
+ u32 reg = va_arg (*va, u32);
+ u32 log2_n_bytes = va_arg (*va, u32);
+ u32 type = va_arg (*va, u32);
+
+ switch (type)
+ {
+ default:
+ ASSERT (0);
+ break;
+
+ case 'x':
+ ASSERT (reg < 16);
+ return format (s, "%%xmm%d", reg);
+
+ case 'm':
+ ASSERT (reg < 8);
+ return format (s, "%%mm%d", reg);
+
+ /* Explicit byte/word/double-word/quad-word */
+ case 'b': log2_n_bytes = 0; break;
+ case 'w': log2_n_bytes = 1; break;
+ case 'd': log2_n_bytes = 2; break;
+ case 'q': log2_n_bytes = 3; break;
+
+ /* Use effective operand size. */
+ case 'v': break;
+
+ /* word or double-word depending on effective operand size. */
+ case 'z':
+ log2_n_bytes = clib_min (log2_n_bytes, 2);
+ break;
+ }
+
+ s = format (s, "%U", format_x86_gp_reg_operand, reg, log2_n_bytes);
+ return s;
+}
+
+static u8 * format_x86_mem_operand (u8 * s, va_list * va)
+{
+ x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
+
+ if (p->displacement != 0)
+ s = format (s, "0x%x", p->displacement);
+
+ if (p->flags & X86_INSN_HAS_BASE)
+ {
+ s = format (s, "(%U",
+ format_x86_gp_reg_operand, p->regs[1],
+ p->log2_effective_address_bytes);
+ if (p->flags & X86_INSN_HAS_INDEX)
+ {
+ s = format (s, ",%U",
+ format_x86_gp_reg_operand, p->regs[2],
+ p->log2_effective_address_bytes);
+ if (p->log2_index_scale != 0)
+ s = format (s, ",%d", 1 << p->log2_index_scale);
+ }
+ s = format (s, ")");
+ }
+
+ /* [RIP+disp] PC relative addressing in 64 bit mode. */
+ else if (p->flags & X86_INSN_PARSE_64_BIT)
+ s = format (s, "(%%rip)");
+
+ return s;
+}
+
+static u8 * format_x86_insn_operand (u8 * s, va_list * va)
+{
+ x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
+ x86_insn_t * insn = &p->insn;
+ u32 o = va_arg (*va, u32);
+ u8 c, t;
+
+ ASSERT (o < ARRAY_LEN (insn->operands));
+ c = insn->operands[o].code;
+ t = insn->operands[o].type;
+
+ /* Register encoded in instruction. */
+ if (c < 8)
+ return format (s, "%U",
+ format_x86_gp_reg_operand, c,
+ p->log2_effective_operand_bytes);
+
+ switch (c)
+ {
+ /* Memory or reg field from modrm byte. */
+ case 'M':
+ ASSERT (p->flags & X86_INSN_IS_ADDRESS);
+ /* FALLTHROUGH */
+ case 'E':
+ if (p->flags & X86_INSN_IS_ADDRESS)
+ s = format (s, "%U", format_x86_mem_operand, p);
+ else
+ s = format (s, "%U",
+ format_x86_reg_operand, p->regs[1],
+ p->log2_effective_operand_bytes, t);
+ break;
+
+ /* reg field from modrm byte. */
+ case 'R':
+ case 'G':
+ s = format (s, "%U",
+ format_x86_reg_operand, p->regs[0],
+ p->log2_effective_operand_bytes, t);
+ break;
+
+ case 'I':
+ {
+ u32 l = x86_insn_log2_immediate_bytes (p, insn);
+ i64 mask = pow2_mask (8ULL << l);
+ s = format (s, "$0x%Lx", p->immediate & mask);
+ }
+ break;
+
+ case 'J':
+ if (p->immediate < 0)
+ s = format (s, "- 0x%Lx", -p->immediate);
+ else
+ s = format (s, "+ 0x%Lx", p->immediate);
+ break;
+
+ case 'O':
+ s = format (s, "0x%Lx", p->immediate);
+ break;
+
+ case 'A':
+ /* AX/AL */
+ s = format (s, "%U",
+ format_x86_gp_reg_operand, X86_INSN_GP_REG_AX,
+ t == 'L' ? 0 : p->log2_effective_operand_bytes);
+ break;
+
+ case 'B':
+ /* BX/BL/BP */
+ s = format (s, "%U",
+ format_x86_gp_reg_operand,
+ t == 'P' ? X86_INSN_GP_REG_BP : X86_INSN_GP_REG_BX,
+ t == 'L' ? 0 : p->log2_effective_operand_bytes);
+ break;
+
+ case 'C':
+ /* CX/CL */
+ s = format (s, "%U",
+ format_x86_gp_reg_operand, X86_INSN_GP_REG_CX,
+ t == 'L' ? 0 : p->log2_effective_operand_bytes);
+ break;
+
+ case 'D':
+ /* DX/DL/DI */
+ s = format (s, "%U",
+ format_x86_gp_reg_operand,
+ t == 'I' ? X86_INSN_GP_REG_DI : X86_INSN_GP_REG_DX,
+ t == 'L' ? 0 : p->log2_effective_operand_bytes);
+ break;
+
+ case 'S':
+ /* SI/SP */
+ s = format (s, "%U",
+ format_x86_gp_reg_operand,
+ t == 'I' ? X86_INSN_GP_REG_SI : X86_INSN_GP_REG_SP,
+ p->log2_effective_operand_bytes);
+ break;
+
+ case '1':
+ s = format (s, "1");
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ return s;
+}
+
+u8 * format_x86_insn_parse (u8 * s, va_list * va)
+{
+ x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
+ x86_insn_t * insn = &p->insn;
+ u32 o, i, is_src_dst;
+
+ s = format (s, "%s", insn->name);
+
+ if (! x86_insn_operand_is_valid (insn, 0))
+ goto done;
+
+ is_src_dst = x86_insn_operand_is_valid (insn, 1);
+
+ /* If instruction has immediate add suffix to opcode to
+ indicate operand size. */
+ if (is_src_dst)
+ {
+ u32 b;
+
+ b = x86_insn_log2_immediate_bytes (p, insn);
+ if (b < p->log2_effective_operand_bytes
+ && (p->flags & X86_INSN_IS_ADDRESS))
+ s = format (s, "%c", "bwlq"[b]);
+ }
+
+ for (i = 0; i < ARRAY_LEN (insn->operands); i++)
+ {
+ o = is_src_dst + i;
+ if (! x86_insn_operand_is_valid (insn, o))
+ break;
+ s = format (s, "%s%U",
+ i == 0 ? " " : ", ",
+ format_x86_insn_operand, p, o);
+ }
+
+ if (is_src_dst)
+ s = format (s, ", %U",
+ format_x86_insn_operand, p, 0);
+
+ done:
+ return s;
+}
diff --git a/src/vppinfra/asm_x86.h b/src/vppinfra/asm_x86.h
new file mode 100644
index 00000000..dacef617
--- /dev/null
+++ b/src/vppinfra/asm_x86.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_asm_x86_h
+#define included_asm_x86_h
+
+#include <vppinfra/format.h>
+
+typedef union
+{
+ struct
+ {
+ u8 code;
+ u8 type;
+ };
+ u8 data[2];
+} x86_insn_operand_t;
+
+typedef struct
+{
+ /* Instruction name. */
+ char *name;
+
+ /* X86 instructions may have up to 3 operands. */
+ x86_insn_operand_t operands[3];
+
+ u16 flags;
+#define X86_INSN_FLAG_DEFAULT_64_BIT (1 << 0)
+#define X86_INSN_FLAG_SET_SSE_GROUP(n) ((n) << 5)
+#define X86_INSN_FLAG_GET_SSE_GROUP(f) (((f) >> 5) & 0x1f)
+#define X86_INSN_FLAG_SET_MODRM_REG_GROUP(n) (((n) & 0x3f) << 10)
+#define X86_INSN_FLAG_GET_MODRM_REG_GROUP(f) (((f) >> 10) & 0x3f)
+} x86_insn_t;
+
+always_inline uword
+x86_insn_operand_is_valid (x86_insn_t * i, uword o)
+{
+ ASSERT (o < ARRAY_LEN (i->operands));
+ return i->operands[o].code != '_';
+}
+
+#define foreach_x86_legacy_prefix \
+ _ (OPERAND_SIZE, 0x66) \
+ _ (ADDRESS_SIZE, 0x67) \
+ _ (SEGMENT_CS, 0x2e) \
+ _ (SEGMENT_DS, 0x3e) \
+ _ (SEGMENT_ES, 0x26) \
+ _ (SEGMENT_FS, 0x64) \
+ _ (SEGMENT_GS, 0x65) \
+ _ (SEGMENT_SS, 0x36) \
+ _ (LOCK, 0xf0) \
+ _ (REPZ, 0xf3) \
+ _ (REPNZ, 0xf2)
+
+#define foreach_x86_insn_parse_flag \
+ /* Parse in 32/64-bit mode. */ \
+ _ (PARSE_32_BIT, 0) \
+ _ (PARSE_64_BIT, 0) \
+ _ (IS_ADDRESS, 0) \
+ /* regs[1/2] is a valid base/index register */ \
+ _ (HAS_BASE, 0) \
+ _ (HAS_INDEX, 0) \
+ /* rex w bit */ \
+ _ (OPERAND_SIZE_64, 0)
+
+typedef enum
+{
+#define _(f,o) X86_INSN_FLAG_BIT_##f,
+ foreach_x86_insn_parse_flag foreach_x86_legacy_prefix
+#undef _
+} x86_insn_parse_flag_bit_t;
+
+typedef enum
+{
+#define _(f,o) X86_INSN_##f = 1 << X86_INSN_FLAG_BIT_##f,
+ foreach_x86_insn_parse_flag foreach_x86_legacy_prefix
+#undef _
+} x86_insn_parse_flag_t;
+
+typedef struct
+{
+ /* Registers in instruction.
+ [0] is modrm reg field
+ [1] is base reg
+ [2] is index reg. */
+ u8 regs[3];
+
+ /* Scale for index register. */
+ u8 log2_index_scale:2;
+ u8 log2_effective_operand_bytes:3;
+ u8 log2_effective_address_bytes:3;
+
+ i32 displacement;
+
+ /* Parser flags: set of x86_insn_parse_flag_t enums. */
+ u32 flags;
+
+ i64 immediate;
+
+ x86_insn_t insn;
+} x86_insn_parse_t;
+
+u8 *x86_insn_parse (x86_insn_parse_t * p, u8 * code_start);
+format_function_t format_x86_insn_parse;
+
+#endif /* included_asm_x86_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/backtrace.c b/src/vppinfra/backtrace.c
new file mode 100644
index 00000000..bbfb792c
--- /dev/null
+++ b/src/vppinfra/backtrace.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error.h>
+
+#ifdef __mips__
+
+/* Let code below know we've defined _clib_backtrace */
+#define clib_backtrace_defined
+
+#include <vppinfra/asm_mips.h>
+
+uword
+clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip)
+{
+ u32 *pc;
+ void *sp;
+ uword i, saved_pc;
+
+ /* Figure current PC, saved PC and stack pointer. */
+ asm volatile (".set push\n"
+ ".set noat\n" "move %[saved_pc], $31\n" "move %[sp], $29\n"
+ /* Fetches current PC. */
+ "la $at, 1f\n"
+ "jalr %[pc], $at\n"
+ "nop\n"
+ "1:\n"
+ ".set pop\n":[pc] "=r" (pc),
+ [saved_pc] "=r" (saved_pc),[sp] "=r" (sp));
+
+ /* Also skip current frame. */
+ n_frames_to_skip += 1;
+
+ for (i = 0; i < max_callers + n_frames_to_skip; i++)
+ {
+ mips_insn_opcode_t op;
+ mips_insn_special_funct_t funct;
+ i32 insn, rs, rt, rd, immediate, found_saved_pc;
+ u32 *start_pc;
+
+ /* Parse instructions until we reach prologue for this
+ stack frame. We'll need to figure out where saved
+ PC is and where previous stack frame lives. */
+ start_pc = pc;
+ found_saved_pc = 0;
+ while (1)
+ {
+ insn = *--pc;
+ op = mips_insn_get_op (insn);
+ funct = mips_insn_get_funct (insn);
+ rs = mips_insn_get_rs (insn);
+ rt = mips_insn_get_rt (insn);
+ rd = mips_insn_get_rd (insn);
+ immediate = mips_insn_get_immediate (insn);
+
+ switch (op)
+ {
+ default:
+ break;
+
+ case MIPS_OPCODE_sd:
+ case MIPS_OPCODE_sw:
+ /* Trace stores of return address. */
+ if (rt == MIPS_REG_RA)
+ {
+ void *addr = sp + immediate;
+
+ /* If RA is stored somewhere other than in the
+ stack frame, give up. */
+ if (rs != MIPS_REG_SP)
+ goto backtrace_done;
+
+ ASSERT (immediate % 4 == 0);
+ if (op == MIPS_OPCODE_sw)
+ saved_pc = ((u32 *) addr)[0];
+ else
+ saved_pc = ((u64 *) addr)[0];
+ found_saved_pc = 1;
+ }
+ break;
+
+ case MIPS_OPCODE_addiu:
+ case MIPS_OPCODE_daddiu:
+ case MIPS_OPCODE_addi:
+ case MIPS_OPCODE_daddi:
+ if (rt == MIPS_REG_SP)
+ {
+ if (rs != MIPS_REG_SP)
+ goto backtrace_done;
+
+ ASSERT (immediate % 4 == 0);
+
+ /* Assume positive offset is part of the epilogue.
+ E.g.
+ jr ra
+ add sp,sp,100
+ */
+ if (immediate > 0)
+ continue;
+
+ /* Negative offset means allocate stack space.
+ This could either be the prologue or could be due to
+ alloca. */
+ sp -= immediate;
+
+ /* This frame will not save RA. */
+ if (i == 0)
+ goto found_prologue;
+
+ /* Assume that addiu sp,sp,-N without store of ra means
+ that we have not found the prologue yet. */
+ if (found_saved_pc)
+ goto found_prologue;
+ }
+ break;
+
+ case MIPS_OPCODE_slti:
+ case MIPS_OPCODE_sltiu:
+ case MIPS_OPCODE_andi:
+ case MIPS_OPCODE_ori:
+ case MIPS_OPCODE_xori:
+ case MIPS_OPCODE_lui:
+ case MIPS_OPCODE_ldl:
+ case MIPS_OPCODE_ldr:
+ case MIPS_OPCODE_lb:
+ case MIPS_OPCODE_lh:
+ case MIPS_OPCODE_lwl:
+ case MIPS_OPCODE_lw:
+ case MIPS_OPCODE_lbu:
+ case MIPS_OPCODE_lhu:
+ case MIPS_OPCODE_lwr:
+ case MIPS_OPCODE_lwu:
+ case MIPS_OPCODE_ld:
+ /* Give up when we find anyone setting the stack pointer. */
+ if (rt == MIPS_REG_SP)
+ goto backtrace_done;
+ break;
+
+ case MIPS_OPCODE_SPECIAL:
+ if (rd == MIPS_REG_SP)
+ switch (funct)
+ {
+ default:
+ /* Give up when we find anyone setting the stack pointer. */
+ goto backtrace_done;
+
+ case MIPS_SPECIAL_FUNCT_break:
+ case MIPS_SPECIAL_FUNCT_jr:
+ case MIPS_SPECIAL_FUNCT_sync:
+ case MIPS_SPECIAL_FUNCT_syscall:
+ case MIPS_SPECIAL_FUNCT_tge:
+ case MIPS_SPECIAL_FUNCT_tgeu:
+ case MIPS_SPECIAL_FUNCT_tlt:
+ case MIPS_SPECIAL_FUNCT_tltu:
+ case MIPS_SPECIAL_FUNCT_teq:
+ case MIPS_SPECIAL_FUNCT_tne:
+ /* These instructions can validly have rd == MIPS_REG_SP */
+ break;
+ }
+ break;
+ }
+ }
+
+ found_prologue:
+ /* Check sanity of saved pc. */
+ if (saved_pc & 3)
+ goto backtrace_done;
+ if (saved_pc == 0)
+ goto backtrace_done;
+
+ if (i >= n_frames_to_skip)
+ callers[i - n_frames_to_skip] = saved_pc;
+ pc = uword_to_pointer (saved_pc, u32 *);
+ }
+
+backtrace_done:
+ if (i < n_frames_to_skip)
+ return 0;
+ else
+ return i - n_frames_to_skip;
+}
+#endif /* __mips__ */
+
+#ifndef clib_backtrace_defined
+#define clib_backtrace_defined
+
+typedef struct clib_generic_stack_frame_t
+{
+ struct clib_generic_stack_frame_t *prev;
+ void *return_address;
+} clib_generic_stack_frame_t;
+
+/* This will only work if we have a frame pointer.
+ Without a frame pointer we have to parse the machine code to
+ parse the stack frames. */
+uword
+clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip)
+{
+ clib_generic_stack_frame_t *f;
+ uword i;
+
+ f = __builtin_frame_address (0);
+
+ /* Also skip current frame. */
+ n_frames_to_skip += 1;
+
+ for (i = 0; i < max_callers + n_frames_to_skip; i++)
+ {
+ f = f->prev;
+ if (!f)
+ goto backtrace_done;
+ if (clib_abs ((void *) f - (void *) f->prev) > (64 * 1024))
+ goto backtrace_done;
+ if (i >= n_frames_to_skip)
+ callers[i - n_frames_to_skip] = pointer_to_uword (f->return_address);
+ }
+
+backtrace_done:
+ if (i < n_frames_to_skip)
+ return 0;
+ else
+ return i - n_frames_to_skip;
+}
+#endif /* clib_backtrace_defined */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h
new file mode 100644
index 00000000..361665be
--- /dev/null
+++ b/src/vppinfra/bihash_16_8.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_CACHE_SIZE
+#undef BIHASH_KVP_PER_PAGE
+
+#define BIHASH_TYPE _16_8
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_CACHE_SIZE 5
+
+#ifndef __included_bihash_16_8_h__
+#define __included_bihash_16_8_h__
+
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+#include <vppinfra/crc32.h>
+
+typedef struct
+{
+ u64 key[2];
+ u64 value;
+} clib_bihash_kv_16_8_t;
+
+static inline int
+clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v)
+{
+ /* Free values are memset to 0xff, check a bit... */
+ if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 16);
+#else
+ u64 tmp = v->key[0] ^ v->key[1];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_16_8 (u8 * s, va_list * args)
+{
+ clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
+
+ s = format (s, "key %llu %llu value %llu", v->key[0], v->key[1], v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_16_8 (u64 * a, u64 * b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_16_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h
new file mode 100644
index 00000000..173168fe
--- /dev/null
+++ b/src/vppinfra/bihash_24_8.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_CACHE_SIZE
+#undef BIHASH_KVP_PER_PAGE
+
+#define BIHASH_TYPE _24_8
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_CACHE_SIZE 0
+
+#ifndef __included_bihash_24_8_h__
+#define __included_bihash_24_8_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+ u64 key[3];
+ u64 value;
+} clib_bihash_kv_24_8_t;
+
+static inline int
+clib_bihash_is_free_24_8 (const clib_bihash_kv_24_8_t * v)
+{
+ /* Free values are memset to 0xff, check a bit... */
+ if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_24_8 (const clib_bihash_kv_24_8_t * v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 24);
+#else
+ u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_24_8 (u8 * s, va_list * args)
+{
+ clib_bihash_kv_24_8_t *v = va_arg (*args, clib_bihash_kv_24_8_t *);
+
+ s = format (s, "key %llu %llu %llu value %llu",
+ v->key[0], v->key[1], v->key[2], v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_24_8 (const u64 * a, const u64 * b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_24_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h
new file mode 100644
index 00000000..974a78d8
--- /dev/null
+++ b/src/vppinfra/bihash_40_8.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_CACHE_SIZE
+#undef BIHASH_KVP_PER_PAGE
+
+#define BIHASH_TYPE _40_8
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_CACHE_SIZE 2
+
+#ifndef __included_bihash_40_8_h__
+#define __included_bihash_40_8_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+ u64 key[5];
+ u64 value;
+} clib_bihash_kv_40_8_t;
+
+static inline int
+clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v)
+{
+ /* Free values are memset to 0xff, check a bit... */
+ if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_40_8 (const clib_bihash_kv_40_8_t * v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 40);
+#else
+ u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_40_8 (u8 * s, va_list * args)
+{
+ clib_bihash_kv_40_8_t *v = va_arg (*args, clib_bihash_kv_40_8_t *);
+
+ s = format (s, "key %llu %llu %llu %llu %llu value %llu", v->key[0],
+ v->key[1], v->key[2], v->key[3], v->key[4], v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_40_8 (const u64 * a, const u64 * b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])
+ | (a[4] ^ b[4])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_40_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h
new file mode 100644
index 00000000..107bcace
--- /dev/null
+++ b/src/vppinfra/bihash_48_8.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_CACHE_SIZE
+#undef BIHASH_KVP_PER_PAGE
+
+#define BIHASH_TYPE _48_8
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_CACHE_SIZE 2
+
+#ifndef __included_bihash_48_8_h__
+#define __included_bihash_48_8_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+ u64 key[6];
+ u64 value;
+} clib_bihash_kv_48_8_t;
+
+static inline int
+clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v)
+{
+ /* Free values are memset to 0xff, check a bit... */
+ if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 48);
+#else
+ u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]
+ ^ v->key[5];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_48_8 (u8 * s, va_list * args)
+{
+ clib_bihash_kv_48_8_t *v = va_arg (*args, clib_bihash_kv_48_8_t *);
+
+ s = format (s, "key %llu %llu %llu %llu %llu %llu value %llu", v->key[0],
+ v->key[1], v->key[2], v->key[3], v->key[4], v->key[5],
+ v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_48_8 (const u64 * a, const u64 * b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])
+ | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_48_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h
new file mode 100644
index 00000000..2deb64ef
--- /dev/null
+++ b/src/vppinfra/bihash_8_8.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_CACHE_SIZE
+#undef BIHASH_KVP_PER_PAGE
+
+#define BIHASH_TYPE _8_8
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_CACHE_SIZE 0
+
+#ifndef __included_bihash_8_8_h__
+#define __included_bihash_8_8_h__
+
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+#include <vppinfra/crc32.h>
+
+/** 8 octet key, 8 octet key value pair */
+typedef struct
+{
+ u64 key; /**< the key */
+ u64 value; /**< the value */
+} clib_bihash_kv_8_8_t;
+
+/** Decide if a clib_bihash_kv_8_8_t instance is free
+ @param v- pointer to the (key,value) pair
+*/
+static inline int
+clib_bihash_is_free_8_8 (clib_bihash_kv_8_8_t * v)
+{
+ if (v->key == ~0ULL && v->value == ~0ULL)
+ return 1;
+ return 0;
+}
+
+/** Hash a clib_bihash_kv_8_8_t instance
+ @param v - pointer to the (key,value) pair, hash the key (only)
+*/
+static inline u64
+clib_bihash_hash_8_8 (clib_bihash_kv_8_8_t * v)
+{
+ /* Note: to torture-test linear scan, make this fn return a constant */
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) & v->key, 8);
+#else
+ return clib_xxhash (v->key);
+#endif
+}
+
+/** Format a clib_bihash_kv_8_8_t instance
+ @param s - u8 * vector under construction
+ @param args (vararg) - the (key,value) pair to format
+ @return s - the u8 * vector under construction
+*/
+static inline u8 *
+format_bihash_kvp_8_8 (u8 * s, va_list * args)
+{
+ clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
+
+ s = format (s, "key %llu value %llu", v->key, v->value);
+ return s;
+}
+
+/** Compare two clib_bihash_kv_8_8_t instances
+ @param a - first key
+ @param b - second key
+*/
+static inline int
+clib_bihash_key_compare_8_8 (u64 a, u64 b)
+{
+ return a == b;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_8_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_doc.h b/src/vppinfra/bihash_doc.h
new file mode 100644
index 00000000..e6ab9db6
--- /dev/null
+++ b/src/vppinfra/bihash_doc.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#error do not #include this file!
+
+/** \file
+
+ Bounded-index extensible hashing. The basic algorithm performs
+ thread-safe constant-time lookups in the face of a rational number
+ of hash collisions. The computed hash code h(k) must have
+ reasonable statistics with respect to the key space. It won't do
+ to have h(k) = 0 or 1, for all values of k.
+
+ Each bucket in the power-of-two bucket array contains the index
+ (in a private vppinfra memory heap) of the "backing store" for the
+ bucket, as well as a size field. The size field (log2_pages)
+ corresponds to 1, 2, 4, ... contiguous "pages" containing the
+ (key,value) pairs in the bucket.
+
+ When a single page fills, we allocate two contiguous pages. We
+ recompute h(k) for each (key,value) pair, using an additional bit
+ to deal the (key, value) pairs into the "top" and "bottom" pages.
+
+ At lookup time, we compute h(k), using lg(bucket-array-size) to
+ pick the bucket. We read the bucket to find the base of the
+ backing pages. We use an additional log2_pages' worth of bits
+ from h(k) to compute the offset of the page which will contain the
+ (key,value) pair we're trying to find.
+*/
+
+/** template key/value backing page structure */
+typedef struct clib_bihash_value
+{
+ union
+ {
+
+ clib_bihash_kv kvp[BIHASH_KVP_PER_PAGE]; /**< the actual key/value pairs */
+ clib_bihash_value *next_free; /**< used when a KVP page (or block thereof) is on a freelist */
+ };
+} clib_bihash_value_t
+/** bihash bucket structure */
+ typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 offset; /**< backing page offset in the clib memory heap */
+ u8 pad[3]; /**< log2 (size of the packing page block) */
+ u8 log2_pages;
+ };
+ u64 as_u64;
+ };
+} clib_bihash_bucket_t;
+
+/** A bounded index extensible hash table */
+typedef struct
+{
+ clib_bihash_bucket_t *buckets; /**< Hash bucket vector, power-of-two in size */
+ volatile u32 *writer_lock; /**< Writer lock, in its own cache line */
+ BVT (clib_bihash_value) ** working_copies;
+ /**< Working copies (various sizes), to avoid locking against readers */
+ clib_bihash_bucket_t saved_bucket; /**< Saved bucket pointer */
+ u32 nbuckets; /**< Number of hash buckets */
+ u32 log2_nbuckets; /**< lg(nbuckets) */
+ u8 *name; /**< hash table name */
+ BVT (clib_bihash_value) ** freelists;
+ /**< power of two freelist vector */
+ void *mheap; /**< clib memory heap */
+} clib_bihash_t;
+
+/** Get pointer to value page given its clib mheap offset */
+static inline void *clib_bihash_get_value (clib_bihash * h, uword offset);
+
+/** Get clib mheap offset given a pointer */
+static inline uword clib_bihash_get_offset (clib_bihash * h, void *v);
+
+/** initialize a bounded index extensible hash table
+
+ @param h - the bi-hash table to initialize
+ @param name - name of the hash table
+ @param nbuckets - the number of buckets, will be rounded up to
+a power of two
+ @param memory_size - clib mheap size, in bytes
+*/
+
+void clib_bihash_init
+ (clib_bihash * h, char *name, u32 nbuckets, uword memory_size);
+
+/** Destroy a bounded index extensible hash table
+ @param h - the bi-hash table to free
+*/
+
+void clib_bihash_free (clib_bihash * h);
+
+/** Add or delete a (key,value) pair from a bi-hash table
+
+ @param h - the bi-hash table to search
+ @param add_v - the (key,value) pair to add
+ @param is_add - add=1, delete=0
+ @returns 0 on success, < 0 on error
+ @note This function will replace an existing (key,value) pair if the
+ new key matches an existing key
+*/
+int clib_bihash_add_del (clib_bihash * h, clib_bihash_kv * add_v, int is_add);
+
+
+/** Search a bi-hash table
+
+ @param h - the bi-hash table to search
+ @param search_v - (key,value) pair containing the search key
+ @param return_v - (key,value) pair which matches search_v.key
+ @returns 0 on success (with return_v set), < 0 on error
+*/
+int clib_bihash_search (clib_bihash * h,
+ clib_bihash_kv * search_v, clib_bihash_kv * return_v);
+
+
+/** Visit active (key,value) pairs in a bi-hash table
+
+ @param h - the bi-hash table to search
+ @param callback - function to call with each active (key,value) pair
+ @param arg - arbitrary second argument passed to the callback function
+ First argument is the (key,value) pair to visit
+ @note Trying to supply a proper function prototype for the
+ callback function appears to be a fool's errand.
+*/
+void clib_bihash_foreach_key_value_pair (clib_bihash * h,
+ void *callback, void *arg);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
new file mode 100644
index 00000000..56c410b5
--- /dev/null
+++ b/src/vppinfra/bihash_template.c
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */
+
+void BV (clib_bihash_init)
+ (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size)
+{
+ void *oldheap;
+ int i;
+
+ nbuckets = 1 << (max_log2 (nbuckets));
+
+ h->name = (u8 *) name;
+ h->nbuckets = nbuckets;
+ h->log2_nbuckets = max_log2 (nbuckets);
+ h->cache_hits = 0;
+ h->cache_misses = 0;
+
+ h->mheap = mheap_alloc (0 /* use VM */ , memory_size);
+
+ oldheap = clib_mem_set_heap (h->mheap);
+ vec_validate_aligned (h->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES);
+ h->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ h->writer_lock[0] = 0;
+
+ for (i = 0; i < nbuckets; i++)
+ BV (clib_bihash_reset_cache) (h->buckets + i);
+
+ clib_mem_set_heap (oldheap);
+}
+
+void BV (clib_bihash_free) (BVT (clib_bihash) * h)
+{
+ mheap_free (h->mheap);
+ memset (h, 0, sizeof (*h));
+}
+
+static
+BVT (clib_bihash_value) *
+BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
+{
+ BVT (clib_bihash_value) * rv = 0;
+ void *oldheap;
+
+ ASSERT (h->writer_lock[0]);
+ if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
+ {
+ oldheap = clib_mem_set_heap (h->mheap);
+
+ vec_validate (h->freelists, log2_pages);
+ rv = clib_mem_alloc_aligned ((sizeof (*rv) * (1 << log2_pages)),
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ goto initialize;
+ }
+ rv = h->freelists[log2_pages];
+ h->freelists[log2_pages] = rv->next_free;
+
+initialize:
+ ASSERT (rv);
+ /*
+ * Latest gcc complains that the length arg is zero
+ * if we replace (1<<log2_pages) with vec_len(rv).
+ * No clue.
+ */
+ memset (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
+ return rv;
+}
+
+static void
+BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
+ u32 log2_pages)
+{
+ ASSERT (h->writer_lock[0]);
+
+ ASSERT (vec_len (h->freelists) > log2_pages);
+
+ v->next_free = h->freelists[log2_pages];
+ h->freelists[log2_pages] = v;
+}
+
+static inline void
+BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
+{
+ BVT (clib_bihash_value) * v;
+ BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
+ void *oldheap;
+ BVT (clib_bihash_value) * working_copy;
+ u32 thread_index = os_get_thread_index ();
+ int log2_working_copy_length;
+
+ if (thread_index >= vec_len (h->working_copies))
+ {
+ oldheap = clib_mem_set_heap (h->mheap);
+ vec_validate (h->working_copies, thread_index);
+ vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0);
+ clib_mem_set_heap (oldheap);
+ }
+
+ /*
+ * working_copies are per-cpu so that near-simultaneous
+ * updates from multiple threads will not result in sporadic, spurious
+ * lookup failures.
+ */
+ working_copy = h->working_copies[thread_index];
+ log2_working_copy_length = h->working_copy_lengths[thread_index];
+
+ h->saved_bucket.as_u64 = b->as_u64;
+ oldheap = clib_mem_set_heap (h->mheap);
+
+ if (b->log2_pages > log2_working_copy_length)
+ {
+ if (working_copy)
+ clib_mem_free (working_copy);
+
+ working_copy = clib_mem_alloc_aligned
+ (sizeof (working_copy[0]) * (1 << b->log2_pages),
+ CLIB_CACHE_LINE_BYTES);
+ h->working_copy_lengths[thread_index] = b->log2_pages;
+ h->working_copies[thread_index] = working_copy;
+ }
+
+ clib_mem_set_heap (oldheap);
+
+ /* Lock the bucket... */
+ while (BV (clib_bihash_lock_bucket) (b) == 0)
+ ;
+
+ v = BV (clib_bihash_get_value) (h, b->offset);
+
+ clib_memcpy (working_copy, v, sizeof (*v) * (1 << b->log2_pages));
+ working_bucket.as_u64 = b->as_u64;
+ working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
+ CLIB_MEMORY_BARRIER ();
+ b->as_u64 = working_bucket.as_u64;
+ h->working_copies[thread_index] = working_copy;
+}
+
+static
+BVT (clib_bihash_value) *
+BV (split_and_rehash)
+ (BVT (clib_bihash) * h,
+ BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
+ u32 new_log2_pages)
+{
+ BVT (clib_bihash_value) * new_values, *new_v;
+ int i, j, length_in_kvs;
+
+ new_values = BV (value_alloc) (h, new_log2_pages);
+ length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
+
+ for (i = 0; i < length_in_kvs; i++)
+ {
+ u64 new_hash;
+
+ /* Entry not in use? Forget it */
+ if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
+ continue;
+
+ /* rehash the item onto its new home-page */
+ new_hash = BV (clib_bihash_hash) (&(old_values->kvp[i]));
+ new_hash >>= h->log2_nbuckets;
+ new_hash &= (1 << new_log2_pages) - 1;
+ new_v = &new_values[new_hash];
+
+ /* Across the new home-page */
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ /* Empty slot */
+ if (BV (clib_bihash_is_free) (&(new_v->kvp[j])))
+ {
+ clib_memcpy (&(new_v->kvp[j]), &(old_values->kvp[i]),
+ sizeof (new_v->kvp[j]));
+ goto doublebreak;
+ }
+ }
+ /* Crap. Tell caller to try again */
+ BV (value_free) (h, new_values, new_log2_pages);
+ return 0;
+ doublebreak:;
+ }
+
+ return new_values;
+}
+
+static
+BVT (clib_bihash_value) *
+BV (split_and_rehash_linear)
+ (BVT (clib_bihash) * h,
+ BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
+ u32 new_log2_pages)
+{
+ BVT (clib_bihash_value) * new_values;
+ int i, j, new_length, old_length;
+
+ new_values = BV (value_alloc) (h, new_log2_pages);
+ new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
+ old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
+
+ j = 0;
+ /* Across the old value array */
+ for (i = 0; i < old_length; i++)
+ {
+ /* Find a free slot in the new linear scan bucket */
+ for (; j < new_length; j++)
+ {
+ /* Old value not in use? Forget it. */
+ if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
+ goto doublebreak;
+
+ /* New value should never be in use */
+ if (BV (clib_bihash_is_free) (&(new_values->kvp[j])))
+ {
+ /* Copy the old value and move along */
+ clib_memcpy (&(new_values->kvp[j]), &(old_values->kvp[i]),
+ sizeof (new_values->kvp[j]));
+ j++;
+ goto doublebreak;
+ }
+ }
+ /* This should never happen... */
+ clib_warning ("BUG: linear rehash failed!");
+ BV (value_free) (h, new_values, new_log2_pages);
+ return 0;
+
+ doublebreak:;
+ }
+ return new_values;
+}
+
+int BV (clib_bihash_add_del)
+ (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add)
+{
+ u32 bucket_index;
+ BVT (clib_bihash_bucket) * b, tmp_b;
+ BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
+ int rv = 0;
+ int i, limit;
+ u64 hash, new_hash;
+ u32 new_log2_pages, old_log2_pages;
+ u32 thread_index = os_get_thread_index ();
+ int mark_bucket_linear;
+ int resplit_once;
+
+ hash = BV (clib_bihash_hash) (add_v);
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ hash >>= h->log2_nbuckets;
+
+ tmp_b.linear_search = 0;
+
+ while (__sync_lock_test_and_set (h->writer_lock, 1))
+ ;
+
+ /* First elt in the bucket? */
+ if (b->offset == 0)
+ {
+ if (is_add == 0)
+ {
+ rv = -1;
+ goto unlock;
+ }
+
+ v = BV (value_alloc) (h, 0);
+
+ *v->kvp = *add_v;
+ tmp_b.as_u64 = 0;
+ tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
+
+ b->as_u64 = tmp_b.as_u64;
+ goto unlock;
+ }
+
+ /* Note: this leaves the cache disabled */
+ BV (make_working_copy) (h, b);
+
+ v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
+
+ limit = BIHASH_KVP_PER_PAGE;
+ v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
+ if (b->linear_search)
+ limit <<= b->log2_pages;
+
+ if (is_add)
+ {
+ /*
+ * For obvious (in hindsight) reasons, see if we're supposed to
+ * replace an existing key, then look for an empty slot.
+ */
+ for (i = 0; i < limit; i++)
+ {
+ if (!memcmp (&(v->kvp[i]), &add_v->key, sizeof (add_v->key)))
+ {
+ clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
+ CLIB_MEMORY_BARRIER ();
+ /* Restore the previous (k,v) pairs */
+ b->as_u64 = h->saved_bucket.as_u64;
+ goto unlock;
+ }
+ }
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_is_free) (&(v->kvp[i])))
+ {
+ clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
+ CLIB_MEMORY_BARRIER ();
+ b->as_u64 = h->saved_bucket.as_u64;
+ goto unlock;
+ }
+ }
+ /* no room at the inn... split case... */
+ }
+ else
+ {
+ for (i = 0; i < limit; i++)
+ {
+ if (!memcmp (&(v->kvp[i]), &add_v->key, sizeof (add_v->key)))
+ {
+ memset (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
+ CLIB_MEMORY_BARRIER ();
+ b->as_u64 = h->saved_bucket.as_u64;
+ goto unlock;
+ }
+ }
+ rv = -3;
+ b->as_u64 = h->saved_bucket.as_u64;
+ goto unlock;
+ }
+
+ old_log2_pages = h->saved_bucket.log2_pages;
+ new_log2_pages = old_log2_pages + 1;
+ mark_bucket_linear = 0;
+
+ working_copy = h->working_copies[thread_index];
+ resplit_once = 0;
+
+ new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
+ new_log2_pages);
+ if (new_v == 0)
+ {
+ try_resplit:
+ resplit_once = 1;
+ new_log2_pages++;
+ /* Try re-splitting. If that fails, fall back to linear search */
+ new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
+ new_log2_pages);
+ if (new_v == 0)
+ {
+ mark_linear:
+ new_log2_pages--;
+ /* pinned collisions, use linear search */
+ new_v =
+ BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
+ new_log2_pages);
+ mark_bucket_linear = 1;
+ }
+ }
+
+ /* Try to add the new entry */
+ save_new_v = new_v;
+ new_hash = BV (clib_bihash_hash) (add_v);
+ limit = BIHASH_KVP_PER_PAGE;
+ if (mark_bucket_linear)
+ limit <<= new_log2_pages;
+ new_hash >>= h->log2_nbuckets;
+ new_hash &= (1 << new_log2_pages) - 1;
+ new_v += mark_bucket_linear ? 0 : new_hash;
+
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_is_free) (&(new_v->kvp[i])))
+ {
+ clib_memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v));
+ goto expand_ok;
+ }
+ }
+
+ /* Crap. Try again */
+ BV (value_free) (h, save_new_v, new_log2_pages);
+ /*
+ * If we've already doubled the size of the bucket once,
+ * fall back to linear search now.
+ */
+ if (resplit_once)
+ goto mark_linear;
+ else
+ goto try_resplit;
+
+expand_ok:
+ /* Keep track of the number of linear-scan buckets */
+ if (tmp_b.linear_search ^ mark_bucket_linear)
+ h->linear_buckets += (mark_bucket_linear == 1) ? 1 : -1;
+
+ tmp_b.log2_pages = new_log2_pages;
+ tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
+ tmp_b.linear_search = mark_bucket_linear;
+
+ CLIB_MEMORY_BARRIER ();
+ b->as_u64 = tmp_b.as_u64;
+ v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
+ BV (value_free) (h, v, old_log2_pages);
+
+unlock:
+ BV (clib_bihash_reset_cache) (b);
+ BV (clib_bihash_unlock_bucket) (b);
+ CLIB_MEMORY_BARRIER ();
+ h->writer_lock[0] = 0;
+ return rv;
+}
+
+int BV (clib_bihash_search)
+ (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
+{
+ u64 hash;
+ u32 bucket_index;
+ BVT (clib_bihash_value) * v;
+#if BIHASH_KVP_CACHE_SIZE > 0
+ BVT (clib_bihash_kv) * kvp;
+#endif
+ BVT (clib_bihash_bucket) * b;
+ int i, limit;
+
+ ASSERT (valuep);
+
+ hash = BV (clib_bihash_hash) (search_key);
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return -1;
+
+#if BIHASH_KVP_CACHE_SIZE > 0
+ /* Check the cache, if currently enabled */
+ if (PREDICT_TRUE ((b->cache_lru & (1 << 15)) == 0))
+ {
+ limit = BIHASH_KVP_CACHE_SIZE;
+ kvp = b->cache;
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (kvp[i].key, search_key->key))
+ {
+ *valuep = kvp[i];
+ h->cache_hits++;
+ return 0;
+ }
+ }
+ }
+#endif
+
+ hash >>= h->log2_nbuckets;
+
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ limit = BIHASH_KVP_PER_PAGE;
+ v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
+ if (PREDICT_FALSE (b->linear_search))
+ limit <<= b->log2_pages;
+
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
+ {
+ *valuep = v->kvp[i];
+
+#if BIHASH_KVP_CACHE_SIZE > 0
+ u8 cache_slot;
+ /* Shut off the cache */
+ if (BV (clib_bihash_lock_bucket) (b))
+ {
+ cache_slot = BV (clib_bihash_get_lru) (b);
+ b->cache[cache_slot] = v->kvp[i];
+ BV (clib_bihash_update_lru) (b, cache_slot);
+
+ /* Reenable the cache */
+ BV (clib_bihash_unlock_bucket) (b);
+ h->cache_misses++;
+ }
+#endif
+ return 0;
+ }
+ }
+ return -1;
+}
+
+u8 *BV (format_bihash_lru) (u8 * s, va_list * args)
+{
+#if BIHASH_KVP_SIZE > 0
+ int i;
+ BVT (clib_bihash_bucket) * b = va_arg (*args, BVT (clib_bihash_bucket) *);
+ u16 cache_lru = b->cache_lru;
+
+ s = format (s, "cache %s, order ", cache_lru & (1 << 15) ? "on" : "off");
+
+ for (i = 0; i < BIHASH_KVP_CACHE_SIZE; i++)
+ s = format (s, "[%d] ", ((cache_lru >> (3 * i)) & 7));
+
+ return (s);
+#else
+ return format (s, "cache not configured");
+#endif
+}
+
+void
+BV (clib_bihash_update_lru_not_inline) (BVT (clib_bihash_bucket) * b, u8 slot)
+{
+#if BIHASH_KVP_SIZE > 0
+ BV (clib_bihash_update_lru) (b, slot);
+#endif
+}
+
+u8 *BV (format_bihash) (u8 * s, va_list * args)
+{
+ BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *);
+ int verbose = va_arg (*args, int);
+ BVT (clib_bihash_bucket) * b;
+ BVT (clib_bihash_value) * v;
+ int i, j, k;
+ u64 active_elements = 0;
+
+ s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ {
+ if (verbose > 1)
+ s = format (s, "[%d]: empty\n", i);
+ continue;
+ }
+
+ if (verbose)
+ {
+ s = format (s, "[%d]: heap offset %d, len %d, linear %d\n", i,
+ b->offset, (1 << b->log2_pages), b->linear_search);
+ }
+
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
+ {
+ if (verbose > 1)
+ s = format (s, " %d: empty\n",
+ j * BIHASH_KVP_PER_PAGE + k);
+ continue;
+ }
+ if (verbose)
+ {
+ s = format (s, " %d: %U\n",
+ j * BIHASH_KVP_PER_PAGE + k,
+ BV (format_bihash_kvp), &(v->kvp[k]));
+ }
+ active_elements++;
+ }
+ v++;
+ }
+ }
+
+ s = format (s, " %lld active elements\n", active_elements);
+ s = format (s, " %d free lists\n", vec_len (h->freelists));
+ s = format (s, " %d linear search buckets\n", h->linear_buckets);
+ s = format (s, " %lld cache hits, %lld cache misses\n",
+ h->cache_hits, h->cache_misses);
+ return s;
+}
+
+void BV (clib_bihash_foreach_key_value_pair)
+ (BVT (clib_bihash) * h, void *callback, void *arg)
+{
+ int i, j, k;
+ BVT (clib_bihash_bucket) * b;
+ BVT (clib_bihash_value) * v;
+ void (*fp) (BVT (clib_bihash_kv) *, void *) = callback;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
+ continue;
+
+ (*fp) (&v->kvp[k], arg);
+ }
+ v++;
+ }
+ }
+}
+
+/** @endcond */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
new file mode 100644
index 00000000..ea1b6f7b
--- /dev/null
+++ b/src/vppinfra/bihash_template.h
@@ -0,0 +1,419 @@
+/*
+ Copyright (c) 2014 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+/** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */
+
+/*
+ * Note: to instantiate the template multiple times in a single file,
+ * #undef __included_bihash_template_h__...
+ */
+#ifndef __included_bihash_template_h__
+#define __included_bihash_template_h__
+
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+
+#ifndef BIHASH_TYPE
+#error BIHASH_TYPE not defined
+#endif
+
+#define _bv(a,b) a##b
+#define __bv(a,b) _bv(a,b)
+#define BV(a) __bv(a,BIHASH_TYPE)
+
+#define _bvt(a,b) a##b##_t
+#define __bvt(a,b) _bvt(a,b)
+#define BVT(a) __bvt(a,BIHASH_TYPE)
+
+typedef struct BV (clib_bihash_value)
+{
+ union
+ {
+ BVT (clib_bihash_kv) kvp[BIHASH_KVP_PER_PAGE];
+ struct BV (clib_bihash_value) * next_free;
+ };
+} BVT (clib_bihash_value);
+
+#if BIHASH_KVP_CACHE_SIZE > 5
+#error Requested KVP cache LRU data exceeds 16 bits
+#endif
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 offset;
+ u8 linear_search;
+ u8 log2_pages;
+ u16 cache_lru;
+ };
+ u64 as_u64;
+ };
+#if BIHASH_KVP_CACHE_SIZE > 0
+ BVT (clib_bihash_kv) cache[BIHASH_KVP_CACHE_SIZE];
+#endif
+} BVT (clib_bihash_bucket);
+
+typedef struct
+{
+ BVT (clib_bihash_value) * values;
+ BVT (clib_bihash_bucket) * buckets;
+ volatile u32 *writer_lock;
+
+ BVT (clib_bihash_value) ** working_copies;
+ int *working_copy_lengths;
+ BVT (clib_bihash_bucket) saved_bucket;
+
+ u32 nbuckets;
+ u32 log2_nbuckets;
+ u32 linear_buckets;
+ u8 *name;
+
+ u64 cache_hits;
+ u64 cache_misses;
+
+ BVT (clib_bihash_value) ** freelists;
+ void *mheap;
+
+} BVT (clib_bihash);
+
+
+static inline void
+BV (clib_bihash_update_lru) (BVT (clib_bihash_bucket) * b, u8 slot)
+{
+ u16 value, tmp, mask;
+ u8 found_lru_pos;
+ u16 save_hi;
+
+ if (BIHASH_KVP_CACHE_SIZE < 2)
+ return;
+
+ ASSERT (slot < BIHASH_KVP_CACHE_SIZE);
+
+ /* First, find the slot in cache_lru */
+ mask = slot;
+ if (BIHASH_KVP_CACHE_SIZE > 1)
+ mask |= slot << 3;
+ if (BIHASH_KVP_CACHE_SIZE > 2)
+ mask |= slot << 6;
+ if (BIHASH_KVP_CACHE_SIZE > 3)
+ mask |= slot << 9;
+ if (BIHASH_KVP_CACHE_SIZE > 4)
+ mask |= slot << 12;
+
+ value = b->cache_lru;
+ tmp = value ^ mask;
+
+ /* Already the most-recently used? */
+ if ((tmp & 7) == 0)
+ return;
+
+ found_lru_pos = ((tmp & (7 << 3)) == 0) ? 1 : 0;
+ if (BIHASH_KVP_CACHE_SIZE > 2)
+ found_lru_pos = ((tmp & (7 << 6)) == 0) ? 2 : found_lru_pos;
+ if (BIHASH_KVP_CACHE_SIZE > 3)
+ found_lru_pos = ((tmp & (7 << 9)) == 0) ? 3 : found_lru_pos;
+ if (BIHASH_KVP_CACHE_SIZE > 4)
+ found_lru_pos = ((tmp & (7 << 12)) == 0) ? 4 : found_lru_pos;
+
+ ASSERT (found_lru_pos);
+
+ /* create a mask to kill bits in or above slot */
+ mask = 0xFFFF << found_lru_pos;
+ mask <<= found_lru_pos;
+ mask <<= found_lru_pos;
+ mask ^= 0xFFFF;
+ tmp = value & mask;
+
+ /* Save bits above slot */
+ mask ^= 0xFFFF;
+ mask <<= 3;
+ save_hi = value & mask;
+
+ value = save_hi | (tmp << 3) | slot;
+
+ b->cache_lru = value;
+}
+
+void
+BV (clib_bihash_update_lru_not_inline) (BVT (clib_bihash_bucket) * b,
+ u8 slot);
+
+static inline u8 BV (clib_bihash_get_lru) (BVT (clib_bihash_bucket) * b)
+{
+#if BIHASH_KVP_CACHE_SIZE > 0
+ return (b->cache_lru >> (3 * (BIHASH_KVP_CACHE_SIZE - 1))) & 7;
+#else
+ return 0;
+#endif
+}
+
+static inline void BV (clib_bihash_reset_cache) (BVT (clib_bihash_bucket) * b)
+{
+#if BIHASH_KVP_CACHE_SIZE > 0
+ u16 initial_lru_value;
+
+ memset (b->cache, 0xff, sizeof (b->cache));
+
+ /*
+ * We'll want the cache to be loaded from slot 0 -> slot N, so
+ * the initial LRU order is reverse index order.
+ */
+ if (BIHASH_KVP_CACHE_SIZE == 1)
+ initial_lru_value = 0;
+ else if (BIHASH_KVP_CACHE_SIZE == 2)
+ initial_lru_value = (0 << 3) | (1 << 0);
+ else if (BIHASH_KVP_CACHE_SIZE == 3)
+ initial_lru_value = (0 << 6) | (1 << 3) | (2 << 0);
+ else if (BIHASH_KVP_CACHE_SIZE == 4)
+ initial_lru_value = (0 << 9) | (1 << 6) | (2 << 3) | (3 << 0);
+ else if (BIHASH_KVP_CACHE_SIZE == 5)
+ initial_lru_value = (0 << 12) | (1 << 9) | (2 << 6) | (3 << 3) | (4 << 0);
+
+ b->cache_lru = initial_lru_value;
+#endif
+}
+
+static inline int BV (clib_bihash_lock_bucket) (BVT (clib_bihash_bucket) * b)
+{
+ BVT (clib_bihash_bucket) tmp_b;
+ u64 rv;
+
+ tmp_b.as_u64 = 0;
+ tmp_b.cache_lru = 1 << 15;
+
+ rv = __sync_fetch_and_or (&b->as_u64, tmp_b.as_u64);
+ tmp_b.as_u64 = rv;
+ /* Was already locked? */
+ if (tmp_b.cache_lru & (1 << 15))
+ return 0;
+ return 1;
+}
+
+static inline void BV (clib_bihash_unlock_bucket)
+ (BVT (clib_bihash_bucket) * b)
+{
+ BVT (clib_bihash_bucket) tmp_b;
+
+ tmp_b.as_u64 = b->as_u64;
+ tmp_b.cache_lru &= ~(1 << 15);
+ b->as_u64 = tmp_b.as_u64;
+}
+
+static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h,
+ uword offset)
+{
+ u8 *hp = h->mheap;
+ u8 *vp = hp + offset;
+
+ return (void *) vp;
+}
+
+static inline uword BV (clib_bihash_get_offset) (BVT (clib_bihash) * h,
+ void *v)
+{
+ u8 *hp, *vp;
+
+ hp = (u8 *) h->mheap;
+ vp = (u8 *) v;
+
+ ASSERT ((vp - hp) < 0x100000000ULL);
+ return vp - hp;
+}
+
+void BV (clib_bihash_init)
+ (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size);
+
+void BV (clib_bihash_free) (BVT (clib_bihash) * h);
+
+int BV (clib_bihash_add_del) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, int is_add);
+int BV (clib_bihash_search) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * search_v,
+ BVT (clib_bihash_kv) * return_v);
+
+void BV (clib_bihash_foreach_key_value_pair) (BVT (clib_bihash) * h,
+ void *callback, void *arg);
+
+format_function_t BV (format_bihash);
+format_function_t BV (format_bihash_kvp);
+format_function_t BV (format_bihash_lru);
+
+static inline int BV (clib_bihash_search_inline)
+ (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * key_result)
+{
+ u64 hash;
+ u32 bucket_index;
+ BVT (clib_bihash_value) * v;
+ BVT (clib_bihash_bucket) * b;
+#if BIHASH_KVP_CACHE_SIZE > 0
+ BVT (clib_bihash_kv) * kvp;
+#endif
+ int i, limit;
+
+ hash = BV (clib_bihash_hash) (key_result);
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return -1;
+
+#if BIHASH_KVP_CACHE_SIZE > 0
+ /* Check the cache, if not currently locked */
+ if (PREDICT_TRUE ((b->cache_lru & (1 << 15)) == 0))
+ {
+ limit = BIHASH_KVP_CACHE_SIZE;
+ kvp = b->cache;
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (kvp[i].key, key_result->key))
+ {
+ *key_result = kvp[i];
+ h->cache_hits++;
+ return 0;
+ }
+ }
+ }
+#endif
+
+ hash >>= h->log2_nbuckets;
+
+ v = BV (clib_bihash_get_value) (h, b->offset);
+
+ /* If the bucket has unresolvable collisions, use linear search */
+ limit = BIHASH_KVP_PER_PAGE;
+ v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
+ if (PREDICT_FALSE (b->linear_search))
+ limit <<= b->log2_pages;
+
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (v->kvp[i].key, key_result->key))
+ {
+ *key_result = v->kvp[i];
+
+#if BIHASH_KVP_CACHE_SIZE > 0
+ u8 cache_slot;
+ /* Try to lock the bucket */
+ if (BV (clib_bihash_lock_bucket) (b))
+ {
+ cache_slot = BV (clib_bihash_get_lru) (b);
+ b->cache[cache_slot] = v->kvp[i];
+ BV (clib_bihash_update_lru) (b, cache_slot);
+
+ /* Unlock the bucket */
+ BV (clib_bihash_unlock_bucket) (b);
+ h->cache_misses++;
+ }
+#endif
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static inline int BV (clib_bihash_search_inline_2)
+ (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
+{
+ u64 hash;
+ u32 bucket_index;
+ BVT (clib_bihash_value) * v;
+ BVT (clib_bihash_bucket) * b;
+#if BIHASH_KVP_CACHE_SIZE > 0
+ BVT (clib_bihash_kv) * kvp;
+#endif
+ int i, limit;
+
+ ASSERT (valuep);
+
+ hash = BV (clib_bihash_hash) (search_key);
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return -1;
+
+ /* Check the cache, if currently unlocked */
+#if BIHASH_KVP_CACHE_SIZE > 0
+ if (PREDICT_TRUE ((b->cache_lru & (1 << 15)) == 0))
+ {
+ limit = BIHASH_KVP_CACHE_SIZE;
+ kvp = b->cache;
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (kvp[i].key, search_key->key))
+ {
+ *valuep = kvp[i];
+ h->cache_hits++;
+ return 0;
+ }
+ }
+ }
+#endif
+
+ hash >>= h->log2_nbuckets;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+
+ /* If the bucket has unresolvable collisions, use linear search */
+ limit = BIHASH_KVP_PER_PAGE;
+ v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
+ if (PREDICT_FALSE (b->linear_search))
+ limit <<= b->log2_pages;
+
+ for (i = 0; i < limit; i++)
+ {
+ if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
+ {
+ *valuep = v->kvp[i];
+
+#if BIHASH_KVP_CACHE_SIZE > 0
+ u8 cache_slot;
+
+ /* Try to lock the bucket */
+ if (BV (clib_bihash_lock_bucket) (b))
+ {
+ cache_slot = BV (clib_bihash_get_lru) (b);
+ b->cache[cache_slot] = v->kvp[i];
+ BV (clib_bihash_update_lru) (b, cache_slot);
+
+ /* Reenable the cache */
+ BV (clib_bihash_unlock_bucket) (b);
+ h->cache_misses++;
+ }
+#endif
+ return 0;
+ }
+ }
+ return -1;
+}
+
+#endif /* __included_bihash_template_h__ */
+
+/** @endcond */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h
new file mode 100644
index 00000000..9e1ae493
--- /dev/null
+++ b/src/vppinfra/bitmap.h
@@ -0,0 +1,774 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_bitmap_h
+#define included_clib_bitmap_h
+
+/** \file
+ Bitmaps built as vectors of machine words
+*/
+
+#include <vppinfra/vec.h>
+#include <vppinfra/random.h>
+#include <vppinfra/error.h>
+#include <vppinfra/bitops.h> /* for count_set_bits */
+
+typedef uword clib_bitmap_t;
+
+/** predicate function; is an entire bitmap empty?
+ @param ai - pointer to a bitmap
+ @returns 1 if the entire bitmap is zero, 0 otherwise
+*/
+always_inline uword
+clib_bitmap_is_zero (uword * ai)
+{
+ uword i;
+ for (i = 0; i < vec_len (ai); i++)
+ if (ai[i] != 0)
+ return 0;
+ return 1;
+}
+
+/** predicate function; are two bitmaps equal?
+ @param a - pointer to a bitmap
+ @param b - pointer to a bitmap
+ @returns 1 if the bitmaps are equal, 0 otherwise
+*/
+always_inline uword
+clib_bitmap_is_equal (uword * a, uword * b)
+{
+ uword i;
+ if (vec_len (a) != vec_len (b))
+ return 0;
+ for (i = 0; i < vec_len (a); i++)
+ if (a[i] != b[i])
+ return 0;
+ return 1;
+}
+
+/** Duplicate a bitmap
+ @param v - pointer to a bitmap
+ @returns a duplicate of the bitmap
+*/
+#define clib_bitmap_dup(v) vec_dup(v)
+
+/** Free a bitmap
+ @param v - pointer to the bitmap to free
+*/
+#define clib_bitmap_free(v) vec_free(v)
+
+/** Number of bytes in a bitmap
+ @param v - pointer to the bitmap
+*/
+#define clib_bitmap_bytes(v) vec_bytes(v)
+
+/** Clear a bitmap
+ @param v - pointer to the bitmap to clear
+*/
+#define clib_bitmap_zero(v) vec_zero(v)
+
+/** Allocate a bitmap with the supplied number of bits
+ @param [out] v - the resulting bitmap
+ @param n_bits - the required number of bits
+*/
+
+#define clib_bitmap_alloc(v,n_bits) \
+ v = vec_new (uword, ((n_bits) + BITS (uword) - 1) / BITS (uword))
+
+#define clib_bitmap_vec_validate(v,i) vec_validate_aligned((v),(i),sizeof(uword))
+
+/* Make sure that a bitmap is at least n_bits in size */
+#define clib_bitmap_validate(v,n_bits) \
+ clib_bitmap_vec_validate ((v), ((n_bits) - 1) / BITS (uword))
+
+/* low-level routine to remove trailing zeros from a bitmap */
+always_inline uword *
+_clib_bitmap_remove_trailing_zeros (uword * a)
+{
+ word i;
+ if (a)
+ {
+ for (i = _vec_len (a) - 1; i >= 0; i--)
+ if (a[i] != 0)
+ break;
+ _vec_len (a) = i + 1;
+ }
+ return a;
+}
+
+/** Sets the ith bit of a bitmap to new_value.
+ No sanity checking. Be careful.
+ @param a - pointer to the bitmap
+ @param i - the bit position to interrogate
+ @param new_value - new value for the bit
+ @returns the old value of the bit
+*/
+always_inline uword
+clib_bitmap_set_no_check (uword * a, uword i, uword new_value)
+{
+ uword i0 = i / BITS (a[0]);
+ uword i1 = i % BITS (a[0]);
+ uword bit = (uword) 1 << i1;
+ uword ai, old_value;
+
+ /* Removed ASSERT since uword * a may not be a vector. */
+ /* ASSERT (i0 < vec_len (a)); */
+
+ ai = a[i0];
+ old_value = (ai & bit) != 0;
+ ai &= ~bit;
+ ai |= ((uword) (new_value != 0)) << i1;
+ a[i0] = ai;
+ return old_value;
+}
+
+/** Sets the ith bit of a bitmap to new_value
+ Removes trailing zeros from the bitmap
+ @param ai - pointer to the bitmap
+ @param i - the bit position to interrogate
+ @param value - new value for the bit
+ @returns the old value of the bit
+*/
+always_inline uword *
+clib_bitmap_set (uword * ai, uword i, uword value)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ uword a;
+
+ /* Check for writing a zero to beyond end of bitmap. */
+ if (value == 0 && i0 >= vec_len (ai))
+ return ai; /* Implied trailing zeros. */
+
+ clib_bitmap_vec_validate (ai, i0);
+
+ a = ai[i0];
+ a &= ~((uword) 1 << i1);
+ a |= ((uword) (value != 0)) << i1;
+ ai[i0] = a;
+
+ /* If bits have been cleared, test for zero. */
+ if (a == 0)
+ ai = _clib_bitmap_remove_trailing_zeros (ai);
+
+ return ai;
+}
+
+/** Gets the ith bit value from a bitmap
+ @param ai - pointer to the bitmap
+ @param i - the bit position to interrogate
+ @returns the indicated bit value
+*/
+always_inline uword
+clib_bitmap_get (uword * ai, uword i)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ return i0 < vec_len (ai) && 0 != ((ai[i0] >> i1) & 1);
+}
+
+/** Gets the ith bit value from a bitmap
+ Does not sanity-check the bit position. Be careful.
+ @param ai - pointer to the bitmap
+ @param i - the bit position to interrogate
+ @returns the indicated bit value, or garbage if the bit position is
+ out of range.
+*/
+always_inline uword
+clib_bitmap_get_no_check (uword * ai, uword i)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ return 0 != ((ai[i0] >> i1) & 1);
+}
+
+always_inline uword
+clib_bitmap_get_multiple_no_check (uword * ai, uword i, uword n_bits)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ ASSERT (i1 + n_bits <= BITS (uword));
+ return 0 != ((ai[i0] >> i1) & pow2_mask (n_bits));
+}
+
+/** Gets the ith through ith + n_bits bit values from a bitmap
+ @param bitmap - pointer to the bitmap
+ @param i - the first bit position to retrieve
+ @param n_bits - the number of bit positions to retrieve
+ @returns the indicated range of bits
+*/
+always_inline uword
+clib_bitmap_get_multiple (uword * bitmap, uword i, uword n_bits)
+{
+ uword i0, i1, result;
+ uword l = vec_len (bitmap);
+
+ ASSERT (n_bits <= BITS (result));
+
+ i0 = i / BITS (bitmap[0]);
+ i1 = i % BITS (bitmap[0]);
+
+ /* Check first word. */
+ result = 0;
+ if (i0 < l)
+ {
+ result |= (bitmap[i0] >> i1);
+ if (n_bits < BITS (bitmap[0]))
+ result &= (((uword) 1 << n_bits) - 1);
+ }
+
+ /* Check for overlap into next word. */
+ i0++;
+ if (i1 + n_bits > BITS (bitmap[0]) && i0 < l)
+ {
+ n_bits -= BITS (bitmap[0]) - i1;
+ result |=
+ (bitmap[i0] & (((uword) 1 << n_bits) - 1)) << (BITS (bitmap[0]) - i1);
+ }
+
+ return result;
+}
+
+/** sets the ith through ith + n_bits bits in a bitmap
+ @param bitmap - pointer to the bitmap
+ @param i - the first bit position to retrieve
+ @param value - the values to set
+ @param n_bits - the number of bit positions to set
+ @returns a pointer to the updated bitmap, which may expand and move
+*/
+
+always_inline uword *
+clib_bitmap_set_multiple (uword * bitmap, uword i, uword value, uword n_bits)
+{
+ uword i0, i1, l, t, m;
+
+ ASSERT (n_bits <= BITS (value));
+
+ i0 = i / BITS (bitmap[0]);
+ i1 = i % BITS (bitmap[0]);
+
+ /* Allocate bitmap. */
+ clib_bitmap_vec_validate (bitmap, (i + n_bits) / BITS (bitmap[0]));
+ l = vec_len (bitmap);
+
+ m = ~0;
+ if (n_bits < BITS (value))
+ m = (((uword) 1 << n_bits) - 1);
+ value &= m;
+
+ /* Insert into first word. */
+ t = bitmap[i0];
+ t &= ~(m << i1);
+ t |= value << i1;
+ bitmap[i0] = t;
+
+ /* Insert into second word. */
+ i0++;
+ if (i1 + n_bits > BITS (bitmap[0]) && i0 < l)
+ {
+ t = BITS (bitmap[0]) - i1;
+ value >>= t;
+ n_bits -= t;
+ t = bitmap[i0];
+ m = ((uword) 1 << n_bits) - 1;
+ t &= ~m;
+ t |= value;
+ bitmap[i0] = t;
+ }
+
+ return bitmap;
+}
+
+always_inline uword *
+clfib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
+{
+ uword a0, a1, b0;
+ uword i_end, mask;
+
+ a0 = i / BITS (bitmap[0]);
+ a1 = i % BITS (bitmap[0]);
+
+ i_end = i + n_bits;
+ b0 = i_end / BITS (bitmap[0]);
+
+ clib_bitmap_vec_validate (bitmap, b0);
+
+ /* First word. */
+ mask = n_bits < BITS (bitmap[0]) ? pow2_mask (n_bits) : ~0;
+ mask <<= a1;
+
+ if (value)
+ bitmap[a0] |= mask;
+ else
+ bitmap[a0] &= ~mask;
+
+ for (a0++; a0 < b0; a0++)
+ bitmap[a0] = value ? ~0 : 0;
+
+ if (a0 == b0)
+ {
+ word n_bits_left = n_bits - (BITS (bitmap[0]) - a1);
+ mask = pow2_mask (n_bits_left);
+ if (value)
+ bitmap[a0] |= mask;
+ else
+ bitmap[a0] &= ~mask;
+ }
+
+ return bitmap;
+}
+
+/** Macro to iterate across set bits in a bitmap
+
+ @param i - the current set bit
+ @param ai - the bitmap
+ @param body - the expression to evaluate for each set bit
+*/
+#define clib_bitmap_foreach(i,ai,body) \
+do { \
+ uword __bitmap_i, __bitmap_ai, __bitmap_len, __bitmap_first_set; \
+ __bitmap_len = vec_len ((ai)); \
+ for (__bitmap_i = 0; __bitmap_i < __bitmap_len; __bitmap_i++) \
+ { \
+ __bitmap_ai = (ai)[__bitmap_i]; \
+ while (__bitmap_ai != 0) \
+ { \
+ __bitmap_first_set = first_set (__bitmap_ai); \
+ (i) = (__bitmap_i * BITS ((ai)[0]) \
+ + min_log2 (__bitmap_first_set)); \
+ do { body; } while (0); \
+ __bitmap_ai ^= __bitmap_first_set; \
+ } \
+ } \
+} while (0)
+
+
+/** Return the lowest numbered set bit in a bitmap
+ @param ai - pointer to the bitmap
+ @returns lowest numbered set bit, or ~0 if the entire bitmap is zero
+*/
+always_inline uword
+clib_bitmap_first_set (uword * ai)
+{
+ uword i;
+ for (i = 0; i < vec_len (ai); i++)
+ {
+ uword x = ai[i];
+ if (x != 0)
+ return i * BITS (ai[0]) + log2_first_set (x);
+ }
+ return ~0;
+}
+
+/** Return the higest numbered set bit in a bitmap
+ @param ai - pointer to the bitmap
+ @returns lowest numbered set bit, or ~0 if the entire bitmap is zero
+*/
+always_inline uword
+clib_bitmap_last_set (uword * ai)
+{
+ uword i;
+
+ for (i = vec_len (ai); i > 0; i--)
+ {
+ uword x = ai[i - 1];
+ if (x != 0)
+ {
+ uword first_bit;
+ count_leading_zeros (first_bit, x);
+ return (i) * BITS (ai[0]) - first_bit - 1;
+ }
+ }
+ return ~0;
+}
+
+/** Return the lowest numbered clear bit in a bitmap
+ @param ai - pointer to the bitmap
+ @returns lowest numbered clear bit
+*/
+always_inline uword
+clib_bitmap_first_clear (uword * ai)
+{
+ uword i;
+ for (i = 0; i < vec_len (ai); i++)
+ {
+ uword x = ~ai[i];
+ if (x != 0)
+ return i * BITS (ai[0]) + log2_first_set (x);
+ }
+ return i * BITS (ai[0]);
+}
+
+/** Return the number of set bits in a bitmap
+ @param ai - pointer to the bitmap
+ @returns the number of set bits in the bitmap
+*/
+always_inline uword
+clib_bitmap_count_set_bits (uword * ai)
+{
+ uword i;
+ uword n_set = 0;
+ for (i = 0; i < vec_len (ai); i++)
+ n_set += count_set_bits (ai[i]);
+ return n_set;
+}
+
+/** Logical operator across two bitmaps
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns ai = ai and bi. ai is modified, bi is not modified
+*/
+always_inline uword *clib_bitmap_and (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns ai = ai & ~bi. ai is modified, bi is not modified
+*/
+always_inline uword *clib_bitmap_andnot (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns ai = ai & ~bi. ai is modified, bi is not modified
+*/
+always_inline uword *clib_bitmap_or (uword * ai, uword * bi);
+/** Logical operator across two bitmaps
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns ai = ai or bi. ai is modified, bi is not modified
+*/
+always_inline uword *clib_bitmap_or (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns ai = ai xor bi. ai is modified, bi is not modified
+*/
+always_inline uword *clib_bitmap_xor (uword * ai, uword * bi);
+
+/* ALU function definition macro for functions taking two bitmaps. */
+#define _(name, body, check_zero) \
+always_inline uword * \
+clib_bitmap_##name (uword * ai, uword * bi) \
+{ \
+ uword i, a, b, bi_len, n_trailing_zeros; \
+ \
+ n_trailing_zeros = 0; \
+ bi_len = vec_len (bi); \
+ if (bi_len > 0) \
+ clib_bitmap_vec_validate (ai, bi_len - 1); \
+ for (i = 0; i < vec_len (ai); i++) \
+ { \
+ a = ai[i]; \
+ b = i < bi_len ? bi[i] : 0; \
+ do { body; } while (0); \
+ ai[i] = a; \
+ if (check_zero) \
+ n_trailing_zeros = a ? 0 : (n_trailing_zeros + 1); \
+ } \
+ if (check_zero) \
+ _vec_len (ai) -= n_trailing_zeros; \
+ return ai; \
+}
+
+/* ALU functions: */
+_(and, a = a & b, 1)
+_(andnot, a = a & ~b, 1) _(or, a = a | b, 0) _(xor, a = a ^ b, 1)
+#undef _
+/** Logical operator across two bitmaps which duplicates the first bitmap
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns aiDup = ai and bi. Neither ai nor bi are modified
+*/
+ always_inline uword *
+ clib_bitmap_dup_and (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps which duplicates the first bitmap
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns aiDup = ai & ~bi. Neither ai nor bi are modified
+*/
+ always_inline uword *
+ clib_bitmap_dup_andnot (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps which duplicates the first bitmap
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns aiDup = ai or bi. Neither ai nor bi are modified
+*/
+ always_inline uword *
+ clib_bitmap_dup_or (uword * ai, uword * bi);
+
+/** Logical operator across two bitmaps which duplicates the first bitmap
+
+ @param ai - pointer to the destination bitmap
+ @param bi - pointer to the source bitmap
+ @returns aiDup = ai xor bi. Neither ai nor bi are modified
+*/
+ always_inline uword *
+ clib_bitmap_dup_xor (uword * ai, uword * bi);
+
+#define _(name) \
+ always_inline uword * \
+ clib_bitmap_dup_##name (uword * ai, uword * bi) \
+{ return clib_bitmap_##name (clib_bitmap_dup (ai), bi); }
+
+_(and);
+_(andnot);
+_(or);
+_(xor);
+
+#undef _
+
+/* ALU function definition macro for functions taking one bitmap and an immediate. */
+#define _(name, body, check_zero) \
+always_inline uword * \
+clib_bitmap_##name (uword * ai, uword i) \
+{ \
+ uword i0 = i / BITS (ai[0]); \
+ uword i1 = i % BITS (ai[0]); \
+ uword a, b; \
+ clib_bitmap_vec_validate (ai, i0); \
+ a = ai[i0]; \
+ b = (uword) 1 << i1; \
+ do { body; } while (0); \
+ ai[i0] = a; \
+ if (check_zero && a == 0) \
+ ai = _clib_bitmap_remove_trailing_zeros (ai); \
+ return ai; \
+}
+
+/* ALU functions immediate: */
+_(andi, a = a & b, 1)
+_(andnoti, a = a & ~b, 1) _(ori, a = a | b, 0) _(xori, a = a ^ b, 1)
+#undef _
+/** Return a random bitmap of the requested length
+ @param ai - pointer to the destination bitmap
+ @param n_bits - number of bits to allocate
+ @param [in,out] seed - pointer to the random number seed
+ @returns a reasonably random bitmap based. See random.h.
+*/
+ always_inline uword *
+ clib_bitmap_random (uword * ai, uword n_bits, u32 * seed)
+{
+ vec_reset_length (ai);
+
+ if (n_bits > 0)
+ {
+ uword i = n_bits - 1;
+ uword i0, i1;
+ uword log2_rand_max;
+
+ log2_rand_max = min_log2 (random_u32_max ());
+
+ i0 = i / BITS (ai[0]);
+ i1 = i % BITS (ai[0]);
+
+ clib_bitmap_vec_validate (ai, i0);
+ for (i = 0; i <= i0; i++)
+ {
+ uword n;
+ for (n = 0; n < BITS (ai[i]); n += log2_rand_max)
+ ai[i] |= random_u32 (seed) << n;
+ }
+ if (i1 + 1 < BITS (ai[0]))
+ ai[i0] &= (((uword) 1 << (i1 + 1)) - 1);
+ }
+ return ai;
+}
+
+/** Return the next set bit in a bitmap starting at bit i
+ @param ai - pointer to the bitmap
+ @param i - first bit position to test
+ @returns first set bit position at or after i,
+ ~0 if no further set bits are found
+*/
+always_inline uword
+clib_bitmap_next_set (uword * ai, uword i)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ uword t;
+
+ if (i0 < vec_len (ai))
+ {
+ t = (ai[i0] >> i1) << i1;
+ if (t)
+ return log2_first_set (t) + i0 * BITS (ai[0]);
+
+ for (i0++; i0 < vec_len (ai); i0++)
+ {
+ t = ai[i0];
+ if (t)
+ return log2_first_set (t) + i0 * BITS (ai[0]);
+ }
+ }
+
+ return ~0;
+}
+
+/** Return the next clear bit in a bitmap starting at bit i
+ @param ai - pointer to the bitmap
+ @param i - first bit position to test
+ @returns first clear bit position at or after i
+*/
+always_inline uword
+clib_bitmap_next_clear (uword * ai, uword i)
+{
+ uword i0 = i / BITS (ai[0]);
+ uword i1 = i % BITS (ai[0]);
+ uword t;
+
+ if (i0 < vec_len (ai))
+ {
+ t = (~ai[i0] >> i1) << i1;
+ if (t)
+ return log2_first_set (t) + i0 * BITS (ai[0]);
+
+ for (i0++; i0 < vec_len (ai); i0++)
+ {
+ t = ~ai[i0];
+ if (t)
+ return log2_first_set (t) + i0 * BITS (ai[0]);
+ }
+ }
+ return i;
+}
+
+/** unformat a list of bit ranges into a bitmap (eg "0-3,5-7,11" )
+
+ uword * bitmap;
+ rv = unformat ("%U", unformat_bitmap_list, &bitmap);
+
+ Standard unformat_function_t arguments
+
+ @param input - pointer an unformat_input_t
+ @param va - varargs list comprising a single uword **
+ @returns 1 on success, 0 on failure
+*/
+static inline uword
+unformat_bitmap_list (unformat_input_t * input, va_list * va)
+{
+ uword **bitmap_return = va_arg (*va, uword **);
+ uword *bitmap = 0;
+
+ u32 a, b;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ int i;
+ if (unformat (input, "%u-%u,", &a, &b))
+ ;
+ else if (unformat (input, "%u,", &a))
+ b = a;
+ else if (unformat (input, "%u-%u", &a, &b))
+ ;
+ else if (unformat (input, "%u", &a))
+ b = a;
+ else if (bitmap)
+ {
+ unformat_put_input (input);
+ break;
+ }
+ else
+ goto error;
+
+ if (b < a)
+ goto error;
+
+ for (i = a; i <= b; i++)
+ bitmap = clib_bitmap_set (bitmap, i, 1);
+ }
+ *bitmap_return = bitmap;
+ return 1;
+error:
+ clib_bitmap_free (bitmap);
+ return 0;
+}
+
+/** Format a bitmap as a string of hex bytes
+
+ uword * bitmap;
+ s = format ("%U", format_bitmap_hex, bitmap);
+
+ Standard format_function_t arguments
+
+ @param s - string under construction
+ @param args - varargs list comprising a single uword *
+ @returns string under construction
+*/
+static inline u8 *
+format_bitmap_hex (u8 * s, va_list * args)
+{
+ uword *bitmap = va_arg (*args, uword *);
+ int i, is_trailing_zero = 1;
+
+ if (!bitmap)
+ return format (s, "0");
+
+ i = vec_bytes (bitmap) * 2;
+
+ while (i > 0)
+ {
+ u8 x = clib_bitmap_get_multiple (bitmap, --i * 4, 4);
+
+ if (x && is_trailing_zero)
+ is_trailing_zero = 0;
+
+ if (x || !is_trailing_zero)
+ s = format (s, "%x", x);
+ }
+ return s;
+}
+#endif /* included_clib_bitmap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
new file mode 100644
index 00000000..ab91b8ae
--- /dev/null
+++ b/src/vppinfra/bitops.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_bitops_h
+#define included_clib_bitops_h
+
+#include <vppinfra/clib.h>
+
+/* Population count from Hacker's Delight. */
+always_inline uword
+count_set_bits (uword x)
+{
+#if uword_bits == 64
+ const uword c1 = 0x5555555555555555;
+ const uword c2 = 0x3333333333333333;
+ const uword c3 = 0x0f0f0f0f0f0f0f0f;
+#else
+ const uword c1 = 0x55555555;
+ const uword c2 = 0x33333333;
+ const uword c3 = 0x0f0f0f0f;
+#endif
+
+ /* Sum 1 bit at a time. */
+ x = x - ((x >> (uword) 1) & c1);
+
+ /* 2 bits at a time. */
+ x = (x & c2) + ((x >> (uword) 2) & c2);
+
+ /* 4 bits at a time. */
+ x = (x + (x >> (uword) 4)) & c3;
+
+ /* 8, 16, 32 bits at a time. */
+ x = x + (x >> (uword) 8);
+ x = x + (x >> (uword) 16);
+#if uword_bits == 64
+ x = x + (x >> (uword) 32);
+#endif
+
+ return x & (2 * BITS (uword) - 1);
+}
+
+/* Based on "Hacker's Delight" code from GLS. */
+typedef struct
+{
+ uword masks[1 + log2_uword_bits];
+} compress_main_t;
+
+always_inline void
+compress_init (compress_main_t * cm, uword mask)
+{
+ uword q, m, zm, n, i;
+
+ m = ~mask;
+ zm = mask;
+
+ cm->masks[0] = mask;
+ for (i = 0; i < log2_uword_bits; i++)
+ {
+ q = m;
+ m ^= m << 1;
+ m ^= m << 2;
+ m ^= m << 4;
+ m ^= m << 8;
+ m ^= m << 16;
+#if uword_bits > 32
+ m ^= m << (uword) 32;
+#endif
+ cm->masks[1 + i] = n = (m << 1) & zm;
+ m = q & ~m;
+ q = zm & n;
+ zm = zm ^ q ^ (q >> (1 << i));
+ }
+}
+
+always_inline uword
+compress_bits (compress_main_t * cm, uword x)
+{
+ uword q, r;
+
+ r = x & cm->masks[0];
+ q = r & cm->masks[1];
+ r ^= q ^ (q >> 1);
+ q = r & cm->masks[2];
+ r ^= q ^ (q >> 2);
+ q = r & cm->masks[3];
+ r ^= q ^ (q >> 4);
+ q = r & cm->masks[4];
+ r ^= q ^ (q >> 8);
+ q = r & cm->masks[5];
+ r ^= q ^ (q >> 16);
+#if uword_bits > 32
+ q = r & cm->masks[6];
+ r ^= q ^ (q >> (uword) 32);
+#endif
+
+ return r;
+}
+
+always_inline uword
+rotate_left (uword x, uword i)
+{
+ return (x << i) | (x >> (BITS (i) - i));
+}
+
+always_inline uword
+rotate_right (uword x, uword i)
+{
+ return (x >> i) | (x << (BITS (i) - i));
+}
+
+/* Returns snoob from Hacker's Delight. Next highest number
+ with same number of set bits. */
+always_inline uword
+next_with_same_number_of_set_bits (uword x)
+{
+ uword smallest, ripple, ones;
+ smallest = x & -x;
+ ripple = x + smallest;
+ ones = x ^ ripple;
+ ones = ones >> (2 + log2_first_set (x));
+ return ripple | ones;
+}
+
+#define foreach_set_bit(var,mask,body) \
+do { \
+ uword _foreach_set_bit_m_##var = (mask); \
+ uword _foreach_set_bit_f_##var; \
+ while (_foreach_set_bit_m_##var != 0) \
+ { \
+ _foreach_set_bit_f_##var = first_set (_foreach_set_bit_m_##var); \
+ _foreach_set_bit_m_##var ^= _foreach_set_bit_f_##var; \
+ (var) = min_log2 (_foreach_set_bit_f_##var); \
+ do { body; } while (0); \
+ } \
+} while (0)
+
+#endif /* included_clib_bitops_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/byte_order.h b/src/vppinfra/byte_order.h
new file mode 100644
index 00000000..b263538c
--- /dev/null
+++ b/src/vppinfra/byte_order.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_byte_order_h
+#define included_clib_byte_order_h
+
+#include <vppinfra/clib.h>
+
+#if (__BYTE_ORDER__)==( __ORDER_LITTLE_ENDIAN__)
+#define CLIB_ARCH_IS_BIG_ENDIAN (0)
+#define CLIB_ARCH_IS_LITTLE_ENDIAN (1)
+#else
+/* Default is big endian. */
+#define CLIB_ARCH_IS_BIG_ENDIAN (1)
+#define CLIB_ARCH_IS_LITTLE_ENDIAN (0)
+#endif
+
+/* Big/little endian. */
+#define clib_arch_is_big_endian CLIB_ARCH_IS_BIG_ENDIAN
+#define clib_arch_is_little_endian CLIB_ARCH_IS_LITTLE_ENDIAN
+
+always_inline u16
+clib_byte_swap_u16 (u16 x)
+{
+ return (x >> 8) | (x << 8);
+}
+
+always_inline i16
+clib_byte_swap_i16 (i16 x)
+{
+ return clib_byte_swap_u16 (x);
+}
+
+always_inline u32
+clib_byte_swap_u32 (u32 x)
+{
+#if defined (i386) || defined (__x86_64__)
+ if (!__builtin_constant_p (x))
+ {
+ asm volatile ("bswap %0":"=r" (x):"0" (x));
+ return x;
+ }
+#endif
+ return ((x << 24) | ((x & 0xff00) << 8) | ((x >> 8) & 0xff00) | (x >> 24));
+}
+
+always_inline i32
+clib_byte_swap_i32 (i32 x)
+{
+ return clib_byte_swap_u32 (x);
+}
+
+always_inline u64
+clib_byte_swap_u64 (u64 x)
+{
+#if defined (__x86_64__)
+ if (!__builtin_constant_p (x))
+ {
+ asm volatile ("bswapq %0":"=r" (x):"0" (x));
+ return x;
+ }
+#endif
+#define _(x,n,i) \
+ ((((x) >> (8*(i))) & 0xff) << (8*((n)-(i)-1)))
+ return (_(x, 8, 0) | _(x, 8, 1)
+ | _(x, 8, 2) | _(x, 8, 3)
+ | _(x, 8, 4) | _(x, 8, 5) | _(x, 8, 6) | _(x, 8, 7));
+#undef _
+}
+
+always_inline i64
+clib_byte_swap_i64 (i64 x)
+{
+ return clib_byte_swap_u64 (x);
+}
+
+#define _(sex,type) \
+/* HOST -> SEX */ \
+always_inline type \
+clib_host_to_##sex##_##type (type x) \
+{ \
+ if (! clib_arch_is_##sex##_endian) \
+ x = clib_byte_swap_##type (x); \
+ return x; \
+} \
+ \
+always_inline type \
+clib_host_to_##sex##_mem_##type (type * x) \
+{ \
+ type v = x[0]; \
+ return clib_host_to_##sex##_##type (v); \
+} \
+ \
+always_inline type \
+clib_host_to_##sex##_unaligned_mem_##type (type * x) \
+{ \
+ type v = clib_mem_unaligned (x, type); \
+ return clib_host_to_##sex##_##type (v); \
+} \
+ \
+/* SEX -> HOST */ \
+always_inline type \
+clib_##sex##_to_host_##type (type x) \
+{ return clib_host_to_##sex##_##type (x); } \
+ \
+always_inline type \
+clib_##sex##_to_host_mem_##type (type * x) \
+{ return clib_host_to_##sex##_mem_##type (x); } \
+ \
+always_inline type \
+clib_##sex##_to_host_unaligned_mem_##type (type * x) \
+{ return clib_host_to_##sex##_unaligned_mem_##type (x); }
+
+#ifndef __cplusplus
+_(little, u16)
+_(little, u32)
+_(little, u64)
+_(little, i16)
+_(little, i32)
+_(little, i64)
+_(big, u16) _(big, u32) _(big, u64) _(big, i16) _(big, i32) _(big, i64)
+#endif
+#undef _
+/* Network "net" alias for "big". */
+#define _(type) \
+always_inline type \
+clib_net_to_host_##type (type x) \
+{ return clib_big_to_host_##type (x); } \
+ \
+always_inline type \
+clib_net_to_host_mem_##type (type * x) \
+{ return clib_big_to_host_mem_##type (x); } \
+ \
+always_inline type \
+clib_net_to_host_unaligned_mem_##type (type * x) \
+{ return clib_big_to_host_unaligned_mem_##type (x); } \
+ \
+always_inline type \
+clib_host_to_net_##type (type x) \
+{ return clib_host_to_big_##type (x); } \
+ \
+always_inline type \
+clib_host_to_net_mem_##type (type * x) \
+{ return clib_host_to_big_mem_##type (x); } \
+ \
+always_inline type \
+clib_host_to_net_unaligned_mem_##type (type * x) \
+{ return clib_host_to_big_unaligned_mem_##type (x); }
+#ifndef __cplusplus
+ _(u16);
+_(i16);
+_(u32);
+_(i32);
+_(u64);
+_(i64);
+#endif
+
+#undef _
+
+#endif /* included_clib_byte_order_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/cache.h b/src/vppinfra/cache.h
new file mode 100644
index 00000000..7464b77a
--- /dev/null
+++ b/src/vppinfra/cache.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_cache_h
+#define included_clib_cache_h
+
+#include <vppinfra/error_bootstrap.h>
+
+/*
+ * Allow CFLAGS to override the arch-specific cache line size
+ */
+#ifndef CLIB_LOG2_CACHE_LINE_BYTES
+
+#if defined(__x86_64__) || defined(__ARM_ARCH_7A__) || defined(__i386__)
+#define CLIB_LOG2_CACHE_LINE_BYTES 6
+#endif
+
+#ifdef __aarch64__
+#define CLIB_LOG2_CACHE_LINE_BYTES 7
+#endif
+
+/* Default cache line size of 32 bytes. */
+#ifndef CLIB_LOG2_CACHE_LINE_BYTES
+#define CLIB_LOG2_CACHE_LINE_BYTES 5
+#endif
+
+#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */
+
+#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9)
+#error Cache line size 512 bytes or greater
+#endif
+
+#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
+#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES)))
+
+/* Read/write arguments to __builtin_prefetch. */
+#define CLIB_PREFETCH_READ 0
+#define CLIB_PREFETCH_LOAD 0 /* alias for read */
+#define CLIB_PREFETCH_WRITE 1
+#define CLIB_PREFETCH_STORE 1 /* alias for write */
+
+#define _CLIB_PREFETCH(n,size,type) \
+ if ((size) > (n)*CLIB_CACHE_LINE_BYTES) \
+ __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES, \
+ CLIB_PREFETCH_##type, \
+ /* locality */ 3);
+
+#define CLIB_PREFETCH(addr,size,type) \
+do { \
+ void * _addr = (addr); \
+ \
+ ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES); \
+ _CLIB_PREFETCH (0, size, type); \
+ _CLIB_PREFETCH (1, size, type); \
+ _CLIB_PREFETCH (2, size, type); \
+ _CLIB_PREFETCH (3, size, type); \
+} while (0)
+
+#undef _
+
+#endif /* included_clib_cache_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
new file mode 100644
index 00000000..fbb2a21c
--- /dev/null
+++ b/src/vppinfra/clib.h
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_h
+#define included_clib_h
+
+/* Standalone means to not assume we are running on a Unix box. */
+#if ! defined (CLIB_STANDALONE) && ! defined (CLIB_LINUX_KERNEL)
+#define CLIB_UNIX
+#endif
+
+#include <vppinfra/types.h>
+
+/* Global DEBUG flag. Setting this to 1 or 0 turns off
+ ASSERT (see vppinfra/error.h) & other debugging code. */
+#ifndef CLIB_DEBUG
+#define CLIB_DEBUG 0
+#endif
+
+#ifndef NULL
+#define NULL ((void *) 0)
+#endif
+
+#define BITS(x) (8*sizeof(x))
+#define ARRAY_LEN(x) (sizeof (x)/sizeof (x[0]))
+
+#define _STRUCT_FIELD(t,f) (((t *) 0)->f)
+#define STRUCT_OFFSET_OF(t,f) ((uword) & _STRUCT_FIELD (t, f))
+#define STRUCT_BIT_OFFSET_OF(t,f) (BITS(u8) * (uword) & _STRUCT_FIELD (t, f))
+#define STRUCT_SIZE_OF(t,f) (sizeof (_STRUCT_FIELD (t, f)))
+#define STRUCT_BITS_OF(t,f) (BITS (_STRUCT_FIELD (t, f)))
+#define STRUCT_ARRAY_LEN(t,f) ARRAY_LEN (_STRUCT_FIELD (t, f))
+#define STRUCT_MARK(mark) u8 mark[0]
+#define STRUCT_MARK_PTR(v, f) &(v)->f
+
+/* Stride in bytes between struct array elements. */
+#define STRUCT_STRIDE_OF(t,f) \
+ ( ((uword) & (((t *) 0)[1].f)) \
+ - ((uword) & (((t *) 0)[0].f)))
+
+#define STRUCT_OFFSET_OF_VAR(v,f) ((uword) (&(v)->f) - (uword) (v))
+
+/* Used to pack structure elements. */
+#define CLIB_PACKED(x) x __attribute__ ((packed))
+#define CLIB_UNUSED(x) x __attribute__ ((unused))
+
+#define never_inline __attribute__ ((__noinline__))
+
+#if CLIB_DEBUG > 0
+#define always_inline static inline
+#define static_always_inline static inline
+#else
+#define always_inline static inline __attribute__ ((__always_inline__))
+#define static_always_inline static inline __attribute__ ((__always_inline__))
+#endif
+
+
+/* Reserved (unused) structure element with address offset between
+ from and to. */
+#define CLIB_PAD_FROM_TO(from,to) u8 pad_##from[(to) - (from)]
+
+/* Hints to compiler about hot/cold code. */
+#define PREDICT_FALSE(x) __builtin_expect((x),0)
+#define PREDICT_TRUE(x) __builtin_expect((x),1)
+
+/* Full memory barrier (read and write). */
+#define CLIB_MEMORY_BARRIER() __sync_synchronize ()
+
+#if __x86_64__
+#define CLIB_MEMORY_STORE_BARRIER() __builtin_ia32_sfence ()
+#else
+#define CLIB_MEMORY_STORE_BARRIER() __sync_synchronize ()
+#endif
+
+/* Arranges for function to be called before main. */
+#define INIT_FUNCTION(decl) \
+ decl __attribute ((constructor)); \
+ decl
+
+/* Arranges for function to be called before exit. */
+#define EXIT_FUNCTION(decl) \
+ decl __attribute ((destructor)); \
+ decl
+
+/* Use __builtin_clz if available. */
+#ifdef __GNUC__
+#include <features.h>
+#if __GNUC_PREREQ(3, 4)
+#if uword_bits == 64
+#define count_leading_zeros(count,x) count = __builtin_clzll (x)
+#define count_trailing_zeros(count,x) count = __builtin_ctzll (x)
+#else
+#define count_leading_zeros(count,x) count = __builtin_clzl (x)
+#define count_trailing_zeros(count,x) count = __builtin_ctzl (x)
+#endif
+#endif
+#endif
+
+#ifndef count_leading_zeros
+
+/* Misc. integer arithmetic functions. */
+#if defined (i386)
+#define count_leading_zeros(count, x) \
+ do { \
+ word _clz; \
+ __asm__ ("bsrl %1,%0" \
+ : "=r" (_clz) : "rm" ((word) (x)));\
+ (count) = _clz ^ 31; \
+ } while (0)
+
+#define count_trailing_zeros(count, x) \
+ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((word)(x)))
+#endif /* i386 */
+
+#if defined (__alpha__) && defined (HAVE_CIX)
+#define count_leading_zeros(count, x) \
+ __asm__ ("ctlz %1,%0" \
+ : "=r" ((word) (count)) \
+ : "r" ((word) (x)))
+#define count_trailing_zeros(count, x) \
+ __asm__ ("cttz %1,%0" \
+ : "=r" ((word) (count)) \
+ : "r" ((word) (x)))
+#endif /* alpha && HAVE_CIX */
+
+#if __mips >= 4
+
+/* Select between 32/64 opcodes. */
+#if uword_bits == 32
+#define count_leading_zeros(_count, _x) \
+ __asm__ ("clz %[count],%[x]" \
+ : [count] "=r" ((word) (_count)) \
+ : [x] "r" ((word) (_x)))
+#else
+#define count_leading_zeros(_count, _x) \
+ __asm__ ("dclz %[count],%[x]" \
+ : [count] "=r" ((word) (_count)) \
+ : [x] "r" ((word) (_x)))
+#endif
+
+#endif /* __mips >= 4 */
+
+#endif /* count_leading_zeros */
+
+#if defined (count_leading_zeros)
+always_inline uword
+min_log2 (uword x)
+{
+ uword n;
+ count_leading_zeros (n, x);
+ return BITS (uword) - n - 1;
+}
+#else
+always_inline uword
+min_log2 (uword x)
+{
+ uword a = x, b = BITS (uword) / 2, c = 0, r = 0;
+
+ /* Reduce x to 4 bit result. */
+#define _ \
+{ \
+ c = a >> b; \
+ if (c) a = c; \
+ if (c) r += b; \
+ b /= 2; \
+}
+
+ if (BITS (uword) > 32)
+ _;
+ _;
+ _;
+ _;
+#undef _
+
+ /* Do table lookup on 4 bit partial. */
+ if (BITS (uword) > 32)
+ {
+ const u64 table = 0x3333333322221104LL;
+ uword t = (table >> (4 * a)) & 0xf;
+ r = t < 4 ? r + t : ~0;
+ }
+ else
+ {
+ const u32 table = 0x22221104;
+ uword t = (a & 8) ? 3 : ((table >> (4 * a)) & 0xf);
+ r = t < 4 ? r + t : ~0;
+ }
+
+ return r;
+}
+#endif
+
+always_inline uword
+max_log2 (uword x)
+{
+ uword l = min_log2 (x);
+ if (x > ((uword) 1 << l))
+ l++;
+ return l;
+}
+
+always_inline u64
+min_log2_u64 (u64 x)
+{
+ if (BITS (uword) == 64)
+ return min_log2 (x);
+ else
+ {
+ uword l, y;
+ y = x;
+ l = 0;
+ if (y == 0)
+ {
+ l += 32;
+ x >>= 32;
+ }
+ l += min_log2 (x);
+ return l;
+ }
+}
+
+always_inline uword
+pow2_mask (uword x)
+{
+ return ((uword) 1 << x) - (uword) 1;
+}
+
+always_inline uword
+max_pow2 (uword x)
+{
+ word y = (word) 1 << min_log2 (x);
+ if (x > y)
+ y *= 2;
+ return y;
+}
+
+always_inline uword
+is_pow2 (uword x)
+{
+ return 0 == (x & (x - 1));
+}
+
+always_inline uword
+round_pow2 (uword x, uword pow2)
+{
+ return (x + pow2 - 1) & ~(pow2 - 1);
+}
+
+always_inline u64
+round_pow2_u64 (u64 x, u64 pow2)
+{
+ return (x + pow2 - 1) & ~(pow2 - 1);
+}
+
+always_inline uword
+first_set (uword x)
+{
+ return x & -x;
+}
+
+always_inline uword
+log2_first_set (uword x)
+{
+ uword result;
+#ifdef count_trailing_zeros
+ count_trailing_zeros (result, x);
+#else
+ result = min_log2 (first_set (x));
+#endif
+ return result;
+}
+
+always_inline f64
+flt_round_down (f64 x)
+{
+ return (int) x;
+}
+
+always_inline word
+flt_round_nearest (f64 x)
+{
+ return (word) (x + .5);
+}
+
+always_inline f64
+flt_round_to_multiple (f64 x, f64 f)
+{
+ return f * flt_round_nearest (x / f);
+}
+
+#define clib_max(x,y) \
+({ \
+ __typeof__ (x) _x = (x); \
+ __typeof__ (y) _y = (y); \
+ _x > _y ? _x : _y; \
+})
+
+#define clib_min(x,y) \
+({ \
+ __typeof__ (x) _x = (x); \
+ __typeof__ (y) _y = (y); \
+ _x < _y ? _x : _y; \
+})
+
+#define clib_abs(x) \
+({ \
+ __typeof__ (x) _x = (x); \
+ _x < 0 ? -_x : _x; \
+})
+
+/* Standard standalone-only function declarations. */
+#ifndef CLIB_UNIX
+void clib_standalone_init (void *memory, uword memory_bytes);
+
+void qsort (void *base, uword n, uword size,
+ int (*)(const void *, const void *));
+#endif
+
+/* Stack backtrace. */
+uword
+clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip);
+
+#endif /* included_clib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/clib_error.h b/src/vppinfra/clib_error.h
new file mode 100644
index 00000000..45f18eb1
--- /dev/null
+++ b/src/vppinfra/clib_error.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_error_h
+#define included_clib_error_h
+
+#include <vppinfra/types.h>
+
+typedef struct
+{
+ /* Error message. */
+ u8 *what;
+
+ /* Where error occurred (e.g. __FUNCTION__ __LINE__) */
+ const u8 *where;
+
+ uword flags;
+
+ /* Error code (e.g. errno for Unix errors). */
+ any code;
+} clib_error_t;
+
+#endif
diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c
new file mode 100644
index 00000000..a26d5c9a
--- /dev/null
+++ b/src/vppinfra/cpu.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cpu.h>
+
+#define foreach_x86_cpu_uarch \
+ _(0x06, 0x4f, "Broadwell", "Broadwell-EP/EX") \
+ _(0x06, 0x3d, "Broadwell", "Broadwell") \
+ _(0x06, 0x3f, "Haswell", "Haswell-E") \
+ _(0x06, 0x3c, "Haswell", "Haswell") \
+ _(0x06, 0x3e, "IvyBridge", "IvyBridge-E/EN/EP") \
+ _(0x06, 0x3a, "IvyBridge", "IvyBridge") \
+ _(0x06, 0x2a, "SandyBridge", "SandyBridge") \
+ _(0x06, 0x2d, "SandyBridge", "SandyBridge-E/EN/EP") \
+ _(0x06, 0x25, "Westmere", "Arrandale,Clarksdale") \
+ _(0x06, 0x2c, "Westmere", "Westmere-EP/EX,Gulftown") \
+ _(0x06, 0x2f, "Westmere", "Westmere-EX") \
+ _(0x06, 0x1e, "Nehalem", "Clarksfield,Lynnfield,Jasper Forest") \
+ _(0x06, 0x1a, "Nehalem", "Nehalem-EP,Bloomfield)") \
+ _(0x06, 0x2e, "Nehalem", "Nehalem-EX") \
+ _(0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown (DP)") \
+ _(0x06, 0x1d, "Penryn", "Dunnington (MP)") \
+ _(0x06, 0x37, "Atom", "Bay Trail") \
+ _(0x06, 0x36, "Atom", "Cedarview") \
+ _(0x06, 0x26, "Atom", "Lincroft") \
+ _(0x06, 0x1c, "Atom", "Pineview/Silverthorne")
+
+u8 *
+format_cpu_uarch (u8 * s, va_list * args)
+{
+#if __x86_64__
+ u32 __attribute__ ((unused)) eax, ebx, ecx, edx;
+ u8 model, family;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0)
+ return format (s, "unknown (missing cpuid)");
+
+ model = ((eax >> 4) & 0x0f) | ((eax >> 12) & 0xf0);
+ family = (eax >> 8) & 0x0f;
+
+#define _(f,m,a,c) if ((model == m) && (family == f)) return format(s, "%s (%s)", a, c);
+ foreach_x86_cpu_uarch
+#undef _
+ return format (s, "unknown (family 0x%02x model 0x%02x)", family, model);
+
+#else /* ! __x86_64__ */
+ return format (s, "unknown");
+#endif
+}
+
+u8 *
+format_cpu_model_name (u8 * s, va_list * args)
+{
+#if __x86_64__
+ u32 __attribute__ ((unused)) eax, ebx, ecx, edx;
+ u8 *name = 0;
+ u32 *name_u32;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0)
+ return format (s, "unknown (missing cpuid)");
+
+ __get_cpuid (0x80000000, &eax, &ebx, &ecx, &edx);
+ if (eax < 0x80000004)
+ return format (s, "unknown (missing ext feature)");
+
+ vec_validate (name, 48);
+ name_u32 = (u32 *) name;
+
+ __get_cpuid (0x80000002, &eax, &ebx, &ecx, &edx);
+ name_u32[0] = eax;
+ name_u32[1] = ebx;
+ name_u32[2] = ecx;
+ name_u32[3] = edx;
+
+ __get_cpuid (0x80000003, &eax, &ebx, &ecx, &edx);
+ name_u32[4] = eax;
+ name_u32[5] = ebx;
+ name_u32[6] = ecx;
+ name_u32[7] = edx;
+
+ __get_cpuid (0x80000004, &eax, &ebx, &ecx, &edx);
+ name_u32[8] = eax;
+ name_u32[9] = ebx;
+ name_u32[10] = ecx;
+ name_u32[11] = edx;
+
+ s = format (s, "%s", name);
+ vec_free (name);
+ return s;
+
+#elif defined(__aarch64__)
+ return format (s, "armv8");
+#else /* ! __x86_64__ */
+ return format (s, "unknown");
+#endif
+}
+
+u8 *
+format_cpu_flags (u8 * s, va_list * args)
+{
+#if defined(__x86_64__)
+#define _(flag, func, reg, bit) \
+ if (clib_cpu_supports_ ## flag()) \
+ s = format (s, #flag " ");
+ foreach_x86_64_flags return s;
+#undef _
+#else /* ! __x86_64__ */
+ return format (s, "unknown");
+#endif
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
new file mode 100644
index 00000000..9c149f3f
--- /dev/null
+++ b/src/vppinfra/cpu.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_cpu_h
+#define included_clib_cpu_h
+
+#include <vppinfra/format.h>
+
+/*
+ * multiarchitecture support. Adding new entry will produce
+ * new graph node function variant optimized for specific cpu
+ * microarchitecture.
+ * Order is important for runtime selection, as 1st match wins...
+ */
+
+#if __x86_64__ && CLIB_DEBUG == 0
+#define foreach_march_variant(macro, x) \
+ macro(avx2, x, "arch=core-avx2")
+#else
+#define foreach_march_variant(macro, x)
+#endif
+
+
+#if __GNUC__ > 4 && !__clang__
+#define CLIB_CPU_OPTIMIZED __attribute__ ((optimize ("tree-vectorize")))
+#else
+#define CLIB_CPU_OPTIMIZED
+#endif
+
+
+#define CLIB_MULTIARCH_ARCH_CHECK(arch, fn, tgt) \
+ if (clib_cpu_supports_ ## arch()) \
+ return & fn ## _ ##arch;
+
+#define CLIB_MULTIARCH_SELECT_FN(fn,...) \
+ __VA_ARGS__ void * fn ## _multiarch_select(void) \
+{ \
+ foreach_march_variant(CLIB_MULTIARCH_ARCH_CHECK, fn) \
+ return & fn; \
+}
+
+
+#define foreach_x86_64_flags \
+_ (sse3, 1, ecx, 0) \
+_ (ssse3, 1, ecx, 9) \
+_ (sse41, 1, ecx, 19) \
+_ (sse42, 1, ecx, 20) \
+_ (avx, 1, ecx, 28) \
+_ (avx2, 7, ebx, 5) \
+_ (avx512f, 7, ebx, 16) \
+_ (aes, 1, ecx, 25) \
+_ (sha, 7, ebx, 29) \
+_ (invariant_tsc, 0x80000007, edx, 8)
+
+#if defined(__x86_64__)
+#include "cpuid.h"
+
+static inline int
+clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx)
+{
+ if ((u32) __get_cpuid_max (0x80000000 & lev, 0) < lev)
+ return 0;
+ if (lev == 7)
+ __cpuid_count (lev, 0, *eax, *ebx, *ecx, *edx);
+ else
+ __cpuid (lev, *eax, *ebx, *ecx, *edx);
+ return 1;
+}
+
+
+#define _(flag, func, reg, bit) \
+static inline int \
+clib_cpu_supports_ ## flag() \
+{ \
+ u32 __attribute__((unused)) eax, ebx = 0, ecx = 0, edx = 0; \
+ clib_get_cpuid (func, &eax, &ebx, &ecx, &edx); \
+ \
+ return ((reg & (1 << bit)) != 0); \
+}
+foreach_x86_64_flags
+#undef _
+#else
+
+#define _(flag, func, reg, bit) \
+static inline int clib_cpu_supports_ ## flag() { return 0; }
+foreach_x86_64_flags
+#undef _
+#endif
+#endif
+ format_function_t format_cpu_uarch;
+format_function_t format_cpu_model_name;
+format_function_t format_cpu_flags;
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h
new file mode 100644
index 00000000..bbfc41cc
--- /dev/null
+++ b/src/vppinfra/crc32.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_crc32_h__
+#define __included_crc32_h__
+
+#include <vppinfra/clib.h>
+
+#if __SSE4_2__
+#define clib_crc32c_uses_intrinsics
+#include <x86intrin.h>
+
+static_always_inline u32
+clib_crc32c (u8 * s, int len)
+{
+ u32 v = 0;
+
+#if __x86_64__
+ for (; len >= 8; len -= 8, s += 8)
+ v = _mm_crc32_u64 (v, *((u64 *) s));
+#else
+ /* workaround weird GCC bug when using _mm_crc32_u32
+ which happens with -O2 optimization */
+ volatile ("":::"memory");
+#endif
+
+ for (; len >= 4; len -= 4, s += 4)
+ v = _mm_crc32_u32 (v, *((u32 *) s));
+
+ for (; len >= 2; len -= 2, s += 2)
+ v = _mm_crc32_u16 (v, *((u16 *) s));
+
+ for (; len >= 1; len -= 1, s += 1)
+ v = _mm_crc32_u8 (v, *((u16 *) s));
+
+ return v;
+}
+
+#elif __ARM_FEATURE_CRC32
+#define clib_crc32c_with_intrinsics
+#include <arm_acle.h>
+
+static_always_inline u32
+clib_crc32c (u8 * s, int len)
+{
+ u32 v = 0;
+
+ for (; len >= 8; len -= 8, s += 8)
+ v = __crc32cd (v, *((u64 *) s));
+
+ for (; len >= 4; len -= 4, s += 4)
+ v = __crc32cw (v, *((u32 *) s));
+
+ for (; len >= 2; len -= 2, s += 2)
+ v = __crc32ch (v, *((u16 *) s));
+
+ for (; len >= 1; len -= 1, s += 1)
+ v = __crc32cb (v, *((u8 *) s));
+
+ return v;
+}
+
+#endif
+#endif /* __included_crc32_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/dir.dox b/src/vppinfra/dir.dox
new file mode 100644
index 00000000..440c44e8
--- /dev/null
+++ b/src/vppinfra/dir.dox
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/** @dir
+ * @brief VPP infrastructure library source.
+ */
diff --git a/src/vppinfra/dlist.h b/src/vppinfra/dlist.h
new file mode 100644
index 00000000..e445b39f
--- /dev/null
+++ b/src/vppinfra/dlist.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef included_dlist_h
+#define included_dlist_h
+
+#include <stdarg.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cache.h>
+
+typedef struct
+{
+ u32 next;
+ u32 prev;
+ u32 value;
+} dlist_elt_t;
+
+static inline void
+clib_dlist_init (dlist_elt_t * pool, u32 index)
+{
+ dlist_elt_t *head = pool_elt_at_index (pool, index);
+ memset (head, 0xFF, sizeof (*head));
+}
+
+static inline void
+clib_dlist_addtail (dlist_elt_t * pool, u32 head_index, u32 new_index)
+{
+ dlist_elt_t *head = pool_elt_at_index (pool, head_index);
+ u32 old_last_index;
+ dlist_elt_t *old_last;
+ dlist_elt_t *new;
+
+ ASSERT (head->value == ~0);
+
+ new = pool_elt_at_index (pool, new_index);
+
+ if (PREDICT_FALSE (head->next == ~0))
+ {
+ head->next = head->prev = new_index;
+ new->next = new->prev = head_index;
+ return;
+ }
+
+ old_last_index = head->prev;
+ old_last = pool_elt_at_index (pool, old_last_index);
+
+ new->next = old_last->next;
+ new->prev = old_last_index;
+ old_last->next = new_index;
+ head->prev = new_index;
+}
+
+static inline void
+clib_dlist_addhead (dlist_elt_t * pool, u32 head_index, u32 new_index)
+{
+ dlist_elt_t *head = pool_elt_at_index (pool, head_index);
+ dlist_elt_t *old_first;
+ u32 old_first_index;
+ dlist_elt_t *new;
+
+ ASSERT (head->value == ~0);
+
+ new = pool_elt_at_index (pool, new_index);
+
+ if (PREDICT_FALSE (head->next == ~0))
+ {
+ head->next = head->prev = new_index;
+ new->next = new->prev = head_index;
+ return;
+ }
+
+ old_first_index = head->next;
+ old_first = pool_elt_at_index (pool, old_first_index);
+
+ new->next = old_first_index;
+ new->prev = old_first->prev;
+ old_first->prev = new_index;
+ head->next = new_index;
+}
+
+static inline void
+clib_dlist_remove (dlist_elt_t * pool, u32 index)
+{
+ dlist_elt_t *elt = pool_elt_at_index (pool, index);
+ dlist_elt_t *next_elt, *prev_elt;
+
+ /* listhead, not so much */
+ ASSERT (elt->value != ~0);
+
+ next_elt = pool_elt_at_index (pool, elt->next);
+ prev_elt = pool_elt_at_index (pool, elt->prev);
+
+ next_elt->prev = elt->prev;
+ prev_elt->next = elt->next;
+
+ elt->prev = elt->next = ~0;
+}
+
+static inline u32
+clib_dlist_remove_head (dlist_elt_t * pool, u32 head_index)
+{
+ dlist_elt_t *head = pool_elt_at_index (pool, head_index);
+ u32 rv;
+
+ ASSERT (head->value == ~0);
+
+ if (head->next == ~0 || (head->next == head_index))
+ return ~0;
+
+ rv = head->next;
+ clib_dlist_remove (pool, rv);
+ return rv;
+}
+
+static inline u32
+clib_dlist_remove_tail (dlist_elt_t * pool, u32 head_index)
+{
+ dlist_elt_t *head = pool_elt_at_index (pool, head_index);
+ u32 rv;
+
+ ASSERT (head->value == ~0);
+
+ if (head->prev == ~0)
+ return ~0;
+
+ rv = head->prev;
+ clib_dlist_remove (pool, rv);
+ return rv;
+}
+
+#endif /* included_dlist_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elf.c b/src/vppinfra/elf.c
new file mode 100644
index 00000000..931fbccc
--- /dev/null
+++ b/src/vppinfra/elf.c
@@ -0,0 +1,2040 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/bitmap.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/elf.h>
+
+always_inline void
+elf_swap_first_header (elf_main_t * em, elf_first_header_t * h)
+{
+ h->architecture = elf_swap_u16 (em, h->architecture);
+ h->file_type = elf_swap_u16 (em, h->file_type);
+ h->file_version = elf_swap_u32 (em, h->file_version);
+}
+
+always_inline void
+elf_swap_verneed (elf_dynamic_version_need_t * n)
+{
+#define _(t,f) n->f = clib_byte_swap_##t (n->f);
+ foreach_elf_dynamic_version_need_field
+#undef _
+}
+
+always_inline void
+elf_swap_verneed_aux (elf_dynamic_version_need_aux_t * n)
+{
+#define _(t,f) n->f = clib_byte_swap_##t (n->f);
+ foreach_elf_dynamic_version_need_aux_field
+#undef _
+}
+
+clib_error_t *
+elf_get_section_by_name (elf_main_t * em, char *section_name,
+ elf_section_t ** result)
+{
+ uword *p;
+
+ p = hash_get_mem (em->section_by_name, section_name);
+ if (!p)
+ return clib_error_return (0, "no such section `%s'", section_name);
+
+ *result = vec_elt_at_index (em->sections, p[0]);
+ return 0;
+}
+
+elf_section_t *
+elf_get_section_by_start_address_no_check (elf_main_t * em,
+ uword start_address)
+{
+ uword *p = hash_get (em->section_by_start_address, start_address);
+ return p ? vec_elt_at_index (em->sections, p[0]) : 0;
+}
+
+clib_error_t *
+elf_get_section_by_start_address (elf_main_t * em, uword start_address,
+ elf_section_t ** result)
+{
+ elf_section_t *s =
+ elf_get_section_by_start_address_no_check (em, start_address);
+ if (!s)
+ return clib_error_return (0, "no section with address 0x%wx",
+ start_address);
+ *result = s;
+ return 0;
+}
+
+static u8 *
+format_elf_section_type (u8 * s, va_list * args)
+{
+ elf_section_type_t type = va_arg (*args, elf_section_type_t);
+ char *t = 0;
+
+ switch (type)
+ {
+#define _(f,i) case ELF_SECTION_##f: t = #f; break;
+ foreach_elf_section_type
+#undef _
+ }
+
+ if (!t)
+ s = format (s, "unknown 0x%x", type);
+ else
+ s = format (s, "%s", t);
+ return s;
+}
+
+static u8 *
+format_elf_section (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ elf_section_t *es = va_arg (*args, elf_section_t *);
+ elf64_section_header_t *h = &es->header;
+
+ if (!h)
+ return format (s, "%=40s%=10s%=20s%=8s%=16s%=16s%=16s",
+ "Name", "Index", "Type", "Size", "Align", "Address",
+ "File offset");
+
+ s = format (s, "%-40s%10d%=20U%8Lx%16d%16Lx %Lx-%Lx",
+ elf_section_name (em, es),
+ es->index,
+ format_elf_section_type, h->type,
+ h->file_size,
+ h->align,
+ h->exec_address, h->file_offset, h->file_offset + h->file_size);
+
+ if (h->flags != 0)
+ {
+#define _(f,i) \
+ if (h->flags & ELF_SECTION_FLAG_##f) s = format (s, " %s", #f);
+ foreach_elf_section_flag;
+#undef _
+ }
+
+ return s;
+}
+
+static u8 *
+format_elf_segment_type (u8 * s, va_list * args)
+{
+ elf_segment_type_t type = va_arg (*args, elf_segment_type_t);
+ char *t = 0;
+
+ switch (type)
+ {
+#define _(f,i) case ELF_SEGMENT_##f: t = #f; break;
+ foreach_elf_segment_type
+#undef _
+ }
+
+ if (!t)
+ s = format (s, "unknown 0x%x", type);
+ else
+ s = format (s, "%s", t);
+ return s;
+}
+
+static u8 *
+format_elf_segment (u8 * s, va_list * args)
+{
+ elf_segment_t *es = va_arg (*args, elf_segment_t *);
+ elf64_segment_header_t *h = &es->header;
+
+ if (!h)
+ return format (s, "%=16s%=16s%=16s%=16s",
+ "Type", "Virt. Address", "Phys. Address", "Size");
+
+ s = format (s, "%=16U%16Lx%16Lx%16Lx%16Lx",
+ format_elf_segment_type, h->type,
+ h->virtual_address,
+ h->physical_address, h->memory_size, h->file_offset);
+
+ if (h->flags != 0)
+ {
+#define _(f,i) \
+ if (h->flags & ELF_SEGMENT_FLAG_##f) s = format (s, " %s", #f);
+ foreach_elf_segment_flag;
+#undef _
+ }
+
+ return s;
+}
+
+static u8 *
+format_elf_symbol_binding_and_type (u8 * s, va_list * args)
+{
+ int bt = va_arg (*args, int);
+ int b, t;
+ char *type_string = 0;
+ char *binding_string = 0;
+
+ switch ((b = ((bt >> 4) & 0xf)))
+ {
+#define _(f,n) case n: binding_string = #f; break;
+ foreach_elf_symbol_binding;
+#undef _
+ default:
+ break;
+ }
+
+ switch ((t = ((bt >> 0) & 0xf)))
+ {
+#define _(f,n) case n: type_string = #f; break;
+ foreach_elf_symbol_type;
+#undef _
+ default:
+ break;
+ }
+
+ if (binding_string)
+ s = format (s, "%s", binding_string);
+ else
+ s = format (s, "binding 0x%x", b);
+
+ if (type_string)
+ s = format (s, " %s", type_string);
+ else
+ s = format (s, " type 0x%x", t);
+
+ return s;
+}
+
+static u8 *
+format_elf_symbol_visibility (u8 * s, va_list * args)
+{
+ int visibility = va_arg (*args, int);
+ char *t = 0;
+
+ switch (visibility)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_symbol_visibility
+#undef _
+ }
+
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", visibility);
+}
+
+static u8 *
+format_elf_symbol_section_name (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ int si = va_arg (*args, int);
+ char *t = 0;
+
+ if (si < vec_len (em->sections))
+ {
+ elf_section_t *es = vec_elt_at_index (em->sections, si);
+ return format (s, "%s", elf_section_name (em, es));
+ }
+
+ if (si >= ELF_SYMBOL_SECTION_RESERVED_LO
+ && si <= ELF_SYMBOL_SECTION_RESERVED_HI)
+ {
+ switch (si)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_symbol_reserved_section_index
+#undef _
+ default:
+ break;
+ }
+ }
+
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", si);
+}
+
+u8 *
+format_elf_symbol (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ elf_symbol_table_t *t = va_arg (*args, elf_symbol_table_t *);
+ elf64_symbol_t *sym = va_arg (*args, elf64_symbol_t *);
+
+ if (!sym)
+ return format (s, "%=32s%=16s%=16s%=16s%=16s%=16s",
+ "Symbol", "Size", "Value", "Type", "Visibility",
+ "Section");
+
+ s = format (s, "%-32s%16Ld%16Lx%=16U%=16U%U",
+ elf_symbol_name (t, sym),
+ sym->size, sym->value,
+ format_elf_symbol_binding_and_type, sym->binding_and_type,
+ format_elf_symbol_visibility, sym->visibility,
+ format_elf_symbol_section_name, em, sym->section_index);
+
+ return s;
+}
+
+static u8 *
+format_elf_relocation_type (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ int type = va_arg (*args, int);
+ char *t = 0;
+
+ switch (em->first_header.architecture)
+ {
+#define _(f,i) [i] = #f,
+
+ case ELF_ARCH_X86_64:
+ {
+ static char *tab[] = {
+ foreach_elf_x86_64_relocation_type
+ };
+
+#undef _
+ if (type < ARRAY_LEN (tab))
+ t = tab[type];
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ if (!t)
+ s = format (s, "0x%02x", type);
+ else
+ s = format (s, "%s", t);
+
+ return s;
+}
+
+static u8 *
+format_elf_relocation (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ elf_relocation_with_addend_t *r =
+ va_arg (*args, elf_relocation_with_addend_t *);
+ elf_symbol_table_t *t;
+ elf64_symbol_t *sym;
+
+ if (!r)
+ return format (s, "%=16s%=16s%=16s", "Address", "Type", "Symbol");
+
+ t = vec_elt_at_index (em->symbol_tables, 0);
+ sym = vec_elt_at_index (t->symbols, r->symbol_and_type >> 32);
+
+ s = format (s, "%16Lx%16U",
+ r->address,
+ format_elf_relocation_type, em, r->symbol_and_type & 0xff);
+
+ if (sym->section_index != 0)
+ {
+ elf_section_t *es;
+ es = vec_elt_at_index (em->sections, sym->section_index);
+ s = format (s, " (section %s)", elf_section_name (em, es));
+ }
+
+ if (sym->name != 0)
+ s = format (s, " %s", elf_symbol_name (t, sym));
+
+ {
+ i64 a = r->addend;
+ if (a != 0)
+ s = format (s, " %c 0x%Lx", a > 0 ? '+' : '-', a > 0 ? a : -a);
+ }
+
+ return s;
+}
+
+static u8 *
+format_elf_dynamic_entry_type (u8 * s, va_list * args)
+{
+ u32 type = va_arg (*args, u32);
+ char *t = 0;
+ switch (type)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_dynamic_entry_type;
+#undef _
+ default:
+ break;
+ }
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", type);
+}
+
+static u8 *
+format_elf_dynamic_entry (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ elf64_dynamic_entry_t *e = va_arg (*args, elf64_dynamic_entry_t *);
+
+ if (!e)
+ return format (s, "%=40s%=16s", "Type", "Data");
+
+ s = format (s, "%=40U", format_elf_dynamic_entry_type, (u32) e->type);
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_NEEDED_LIBRARY:
+ case ELF_DYNAMIC_ENTRY_RPATH:
+ case ELF_DYNAMIC_ENTRY_RUN_PATH:
+ s = format (s, "%s", em->dynamic_string_table + e->data);
+ break;
+
+ case ELF_DYNAMIC_ENTRY_INIT_FUNCTION:
+ case ELF_DYNAMIC_ENTRY_FINI_FUNCTION:
+ case ELF_DYNAMIC_ENTRY_SYMBOL_HASH:
+ case ELF_DYNAMIC_ENTRY_GNU_HASH:
+ case ELF_DYNAMIC_ENTRY_STRING_TABLE:
+ case ELF_DYNAMIC_ENTRY_SYMBOL_TABLE:
+ case ELF_DYNAMIC_ENTRY_PLT_GOT:
+ case ELF_DYNAMIC_ENTRY_PLT_RELOCATION_ADDRESS:
+ case ELF_DYNAMIC_ENTRY_RELA_ADDRESS:
+ case ELF_DYNAMIC_ENTRY_VERSION_NEED:
+ case ELF_DYNAMIC_ENTRY_VERSYM:
+ {
+ elf_section_t *es =
+ elf_get_section_by_start_address_no_check (em, e->data);
+ if (es)
+ s = format (s, "section %s", elf_section_name (em, es));
+ else
+ s = format (s, "0x%Lx", e->data);
+ break;
+ }
+
+ default:
+ s = format (s, "0x%Lx", e->data);
+ break;
+ }
+
+ return s;
+}
+
+static u8 *
+format_elf_architecture (u8 * s, va_list * args)
+{
+ int a = va_arg (*args, int);
+ char *t;
+
+ switch (a)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_architecture;
+#undef _
+ default:
+ return format (s, "unknown 0x%x", a);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_elf_abi (u8 * s, va_list * args)
+{
+ int a = va_arg (*args, int);
+ char *t;
+
+ switch (a)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_abi;
+#undef _
+ default:
+ return format (s, "unknown 0x%x", a);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_elf_file_class (u8 * s, va_list * args)
+{
+ int a = va_arg (*args, int);
+ char *t;
+
+ switch (a)
+ {
+#define _(f) case ELF_##f: t = #f; break;
+ foreach_elf_file_class;
+#undef _
+ default:
+ return format (s, "unknown 0x%x", a);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_elf_file_type (u8 * s, va_list * args)
+{
+ int a = va_arg (*args, int);
+ char *t;
+
+ if (a >= ELF_ARCH_SPECIFIC_LO && a <= ELF_ARCH_SPECIFIC_HI)
+ return format (s, "arch-specific 0x%x", a - ELF_ARCH_SPECIFIC_LO);
+
+ if (a >= ELF_OS_SPECIFIC_LO && a <= ELF_OS_SPECIFIC_HI)
+ return format (s, "os-specific 0x%x", a - ELF_OS_SPECIFIC_LO);
+
+ switch (a)
+ {
+#define _(f,n) case n: t = #f; break;
+ foreach_elf_file_type;
+#undef _
+ default:
+ return format (s, "unknown 0x%x", a);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_elf_data_encoding (u8 * s, va_list * args)
+{
+ int a = va_arg (*args, int);
+ char *t;
+
+ switch (a)
+ {
+#define _(f) case ELF_##f: t = #f; break;
+ foreach_elf_data_encoding;
+#undef _
+ default:
+ return format (s, "unknown 0x%x", a);
+ }
+
+ return format (s, "%s", t);
+}
+
+static int
+elf_section_offset_compare (void *a1, void *a2)
+{
+ elf_section_t *s1 = a1;
+ elf_section_t *s2 = a2;
+
+ return ((i64) s1->header.file_offset - (i64) s2->header.file_offset);
+}
+
+static int
+elf_segment_va_compare (void *a1, void *a2)
+{
+ elf_segment_t *s1 = a1;
+ elf_segment_t *s2 = a2;
+
+ return ((i64) s1->header.virtual_address -
+ (i64) s2->header.virtual_address);
+}
+
+u8 *
+format_elf_main (u8 * s, va_list * args)
+{
+ elf_main_t *em = va_arg (*args, elf_main_t *);
+ u32 verbose = va_arg (*args, u32);
+ elf64_file_header_t *fh = &em->file_header;
+
+ s =
+ format (s,
+ "File header: machine: %U, file type/class %U/%U, data-encoding: %U, abi: %U version %d\n",
+ format_elf_architecture, em->first_header.architecture,
+ format_elf_file_type, em->first_header.file_type,
+ format_elf_file_class, em->first_header.file_class,
+ format_elf_data_encoding, em->first_header.data_encoding,
+ format_elf_abi, em->first_header.abi,
+ em->first_header.abi_version);
+
+ s = format (s, " entry 0x%Lx, arch-flags 0x%x",
+ em->file_header.entry_point, em->file_header.flags);
+
+ if (em->interpreter)
+ s = format (s, "\n interpreter: %s", em->interpreter);
+
+ {
+ elf_section_t *h, *copy;
+
+ copy = 0;
+ vec_foreach (h, em->sections) if (h->header.type != ~0)
+ vec_add1 (copy, h[0]);
+
+ vec_sort_with_function (copy, elf_section_offset_compare);
+
+ s = format (s, "\nSections %d at file offset 0x%Lx-0x%Lx:\n",
+ fh->section_header_count,
+ fh->section_header_file_offset,
+ fh->section_header_file_offset +
+ (u64) fh->section_header_count * fh->section_header_size);
+ s = format (s, "%U\n", format_elf_section, em, 0);
+ vec_foreach (h, copy) s = format (s, "%U\n", format_elf_section, em, h);
+
+ vec_free (copy);
+ }
+
+ {
+ elf_segment_t *h, *copy;
+
+ copy = 0;
+ vec_foreach (h, em->segments)
+ if (h->header.type != ELF_SEGMENT_UNUSED && h->header.type != ~0)
+ vec_add1 (copy, h[0]);
+
+ /* Sort segments by address. */
+ vec_sort_with_function (copy, elf_segment_va_compare);
+
+ s = format (s, "\nSegments: %d at file offset 0x%Lx-0x%Lx:\n",
+ fh->segment_header_count,
+ fh->segment_header_file_offset,
+ (u64) fh->segment_header_file_offset +
+ (u64) fh->segment_header_count *
+ (u64) fh->segment_header_size);
+
+ s = format (s, "%U\n", format_elf_segment, 0);
+ vec_foreach (h, copy) s = format (s, "%U\n", format_elf_segment, h);
+
+ vec_free (copy);
+ }
+
+ if ((verbose & FORMAT_ELF_MAIN_SYMBOLS) && vec_len (em->symbol_tables) > 0)
+ {
+ elf_symbol_table_t *t;
+ elf64_symbol_t *sym;
+ elf_section_t *es;
+
+ vec_foreach (t, em->symbol_tables)
+ {
+ es = vec_elt_at_index (em->sections, t->section_index);
+ s =
+ format (s, "\nSymbols for section %s:\n",
+ elf_section_name (em, es));
+
+ s = format (s, "%U\n", format_elf_symbol, em, 0, 0);
+ vec_foreach (sym, t->symbols)
+ s = format (s, "%U\n", format_elf_symbol, em, t, sym);
+ }
+ }
+
+ if ((verbose & FORMAT_ELF_MAIN_RELOCATIONS)
+ && vec_len (em->relocation_tables) > 0)
+ {
+ elf_relocation_table_t *t;
+ elf_relocation_with_addend_t *r;
+ elf_section_t *es;
+
+ vec_foreach (t, em->relocation_tables)
+ {
+ es = vec_elt_at_index (em->sections, t->section_index);
+ r = t->relocations;
+ s = format (s, "\nRelocations for section %s:\n",
+ elf_section_name (em, es));
+
+ s = format (s, "%U\n", format_elf_relocation, em, 0);
+ vec_foreach (r, t->relocations)
+ {
+ s = format (s, "%U\n", format_elf_relocation, em, r);
+ }
+ }
+ }
+
+ if ((verbose & FORMAT_ELF_MAIN_DYNAMIC)
+ && vec_len (em->dynamic_entries) > 0)
+ {
+ elf64_dynamic_entry_t *es, *e;
+ s = format (s, "\nDynamic linker information:\n");
+ es = vec_dup (em->dynamic_entries);
+ s = format (s, "%U\n", format_elf_dynamic_entry, em, 0);
+ vec_foreach (e, es)
+ s = format (s, "%U\n", format_elf_dynamic_entry, em, e);
+ }
+
+ return s;
+}
+
+static void
+elf_parse_segments (elf_main_t * em, void *data)
+{
+ void *d = data + em->file_header.segment_header_file_offset;
+ uword n = em->file_header.segment_header_count;
+ uword i;
+
+ vec_resize (em->segments, n);
+
+ for (i = 0; i < n; i++)
+ {
+ em->segments[i].index = i;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_segment_header_t *h = d;
+#define _(t,f) em->segments[i].header.f = elf_swap_##t (em, h->f);
+ foreach_elf64_segment_header
+#undef _
+ d = (h + 1);
+ }
+ else
+ {
+ elf32_segment_header_t *h = d;
+#define _(t,f) em->segments[i].header.f = elf_swap_##t (em, h->f);
+ foreach_elf32_segment_header
+#undef _
+ d = (h + 1);
+ }
+ }
+}
+
+static void
+elf_parse_sections (elf_main_t * em, void *data)
+{
+ elf64_file_header_t *fh = &em->file_header;
+ elf_section_t *s;
+ void *d = data + fh->section_header_file_offset;
+ uword n = fh->section_header_count;
+ uword i;
+
+ vec_resize (em->sections, n);
+
+ for (i = 0; i < n; i++)
+ {
+ s = em->sections + i;
+
+ s->index = i;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_section_header_t *h = d;
+#define _(t,f) em->sections[i].header.f = elf_swap_##t (em, h->f);
+ foreach_elf64_section_header
+#undef _
+ d = (h + 1);
+ }
+ else
+ {
+ elf32_section_header_t *h = d;
+#define _(t,f) em->sections[i].header.f = elf_swap_##t (em, h->f);
+ foreach_elf32_section_header
+#undef _
+ d = (h + 1);
+ }
+
+ if (s->header.type != ELF_SECTION_NO_BITS)
+ vec_add (s->contents, data + s->header.file_offset,
+ s->header.file_size);
+ }
+
+ s = vec_elt_at_index (em->sections, fh->section_header_string_table_index);
+
+ em->section_by_name
+ = hash_create_string ( /* # elts */ vec_len (em->sections),
+ /* sizeof of value */ sizeof (uword));
+
+ vec_foreach (s, em->sections)
+ {
+ hash_set_mem (em->section_by_name,
+ elf_section_name (em, s), s - em->sections);
+ hash_set (em->section_by_start_address,
+ s->header.exec_address, s - em->sections);
+ }
+}
+
+static void
+add_symbol_table (elf_main_t * em, elf_section_t * s)
+{
+ elf_symbol_table_t *tab;
+ elf32_symbol_t *sym32;
+ elf64_symbol_t *sym64;
+ uword i;
+
+ if (s->header.type == ELF_SECTION_DYNAMIC_SYMBOL_TABLE)
+ em->dynamic_symbol_table_index = vec_len (em->symbol_tables);
+
+ vec_add2 (em->symbol_tables, tab, 1);
+
+ tab->section_index = s->index;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ tab->symbols =
+ elf_get_section_contents (em, s - em->sections,
+ sizeof (tab->symbols[0]));
+ for (i = 0; i < vec_len (tab->symbols); i++)
+ {
+#define _(t,f) tab->symbols[i].f = elf_swap_##t (em, tab->symbols[i].f);
+ foreach_elf64_symbol_header;
+#undef _
+ }
+ }
+ else
+ {
+ sym32 =
+ elf_get_section_contents (em, s - em->sections, sizeof (sym32[0]));
+ vec_clone (tab->symbols, sym32);
+ for (i = 0; i < vec_len (tab->symbols); i++)
+ {
+#define _(t,f) tab->symbols[i].f = elf_swap_##t (em, sym32[i].f);
+ foreach_elf32_symbol_header;
+#undef _
+ }
+ }
+
+ if (s->header.link == 0)
+ return;
+
+ tab->string_table =
+ elf_get_section_contents (em, s->header.link,
+ sizeof (tab->string_table[0]));
+ tab->symbol_by_name =
+ hash_create_string ( /* # elts */ vec_len (tab->symbols),
+ /* sizeof of value */ sizeof (uword));
+
+ vec_foreach (sym64, tab->symbols)
+ {
+ if (sym64->name != 0)
+ hash_set_mem (tab->symbol_by_name,
+ tab->string_table + sym64->name, sym64 - tab->symbols);
+ }
+}
+
+static void
+add_relocation_table (elf_main_t * em, elf_section_t * s)
+{
+ uword has_addend = s->header.type == ELF_SECTION_RELOCATION_ADD;
+ elf_relocation_table_t *t;
+ uword i;
+
+ vec_add2 (em->relocation_tables, t, 1);
+ t->section_index = s - em->sections;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_relocation_t *r, *rs;
+
+ rs = elf_get_section_contents (em, t->section_index,
+ sizeof (rs[0]) +
+ has_addend * sizeof (rs->addend[0]));
+
+ if (em->need_byte_swap)
+ {
+ r = rs;
+ for (i = 0; i < vec_len (r); i++)
+ {
+ r->address = elf_swap_u64 (em, r->address);
+ r->symbol_and_type = elf_swap_u32 (em, r->symbol_and_type);
+ if (has_addend)
+ r->addend[0] = elf_swap_u64 (em, r->addend[0]);
+ r = elf_relocation_next (r, s->header.type);
+ }
+ }
+
+ vec_resize (t->relocations, vec_len (rs));
+ clib_memcpy (t->relocations, rs, vec_bytes (t->relocations));
+ vec_free (rs);
+ }
+ else
+ {
+ elf_relocation_with_addend_t *r;
+ elf32_relocation_t *r32, *r32s;
+
+ r32s = elf_get_section_contents (em, t->section_index,
+ sizeof (r32s[0]) +
+ has_addend * sizeof (r32s->addend[0]));
+ vec_resize (t->relocations, vec_len (r32s));
+
+ r32 = r32s;
+ vec_foreach (r, t->relocations)
+ {
+ r->address = elf_swap_u32 (em, r32->address);
+ r->symbol_and_type = elf_swap_u32 (em, r->symbol_and_type);
+ r->addend = has_addend ? elf_swap_u32 (em, r32->addend[0]) : 0;
+ r32 = elf_relocation_next (r32, s->header.type);
+ }
+
+ vec_free (r32s);
+ }
+}
+
+void
+elf_parse_symbols (elf_main_t * em)
+{
+ elf_section_t *s;
+
+ /* No need to parse symbols twice. */
+ if (em->parsed_symbols)
+ return;
+ em->parsed_symbols = 1;
+
+ vec_foreach (s, em->sections)
+ {
+ switch (s->header.type)
+ {
+ case ELF_SECTION_SYMBOL_TABLE:
+ case ELF_SECTION_DYNAMIC_SYMBOL_TABLE:
+ add_symbol_table (em, s);
+ break;
+
+ case ELF_SECTION_RELOCATION_ADD:
+ case ELF_SECTION_RELOCATION:
+ add_relocation_table (em, s);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+void
+elf_set_dynamic_entries (elf_main_t * em)
+{
+ uword i;
+
+ /* Start address for sections may have changed. */
+ {
+ elf64_dynamic_entry_t *e;
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_INIT_FUNCTION:
+ case ELF_DYNAMIC_ENTRY_FINI_FUNCTION:
+ case ELF_DYNAMIC_ENTRY_SYMBOL_HASH:
+ case ELF_DYNAMIC_ENTRY_GNU_HASH:
+ case ELF_DYNAMIC_ENTRY_STRING_TABLE:
+ case ELF_DYNAMIC_ENTRY_SYMBOL_TABLE:
+ case ELF_DYNAMIC_ENTRY_PLT_GOT:
+ case ELF_DYNAMIC_ENTRY_PLT_RELOCATION_ADDRESS:
+ case ELF_DYNAMIC_ENTRY_RELA_ADDRESS:
+ case ELF_DYNAMIC_ENTRY_VERSION_NEED:
+ case ELF_DYNAMIC_ENTRY_VERSYM:
+ {
+ elf_section_t *es =
+ elf_get_section_by_start_address_no_check (em, e->data);
+ /* If section is not found just leave e->data alone. */
+ if (es)
+ e->data = es->header.exec_address;
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_dynamic_entry_t *e, *es;
+
+ es = em->dynamic_entries;
+ if (em->need_byte_swap)
+ {
+ es = vec_dup (es);
+ vec_foreach (e, es)
+ {
+ e->type = elf_swap_u64 (em, e->type);
+ e->data = elf_swap_u64 (em, e->data);
+ }
+ }
+
+ elf_set_section_contents (em, em->dynamic_section_index, es,
+ vec_bytes (es));
+ if (es != em->dynamic_entries)
+ vec_free (es);
+ }
+ else
+ {
+ elf32_dynamic_entry_t *es;
+
+ vec_clone (es, em->dynamic_entries);
+ if (em->need_byte_swap)
+ {
+ for (i = 0; i < vec_len (es); i++)
+ {
+ es[i].type = elf_swap_u32 (em, em->dynamic_entries[i].type);
+ es[i].data = elf_swap_u32 (em, em->dynamic_entries[i].data);
+ }
+ }
+
+ elf_set_section_contents (em, em->dynamic_section_index, es,
+ vec_bytes (es));
+ vec_free (es);
+ }
+}
+
+clib_error_t *
+elf_parse (elf_main_t * em, void *data, uword data_bytes)
+{
+ elf_first_header_t *h = data;
+ elf64_file_header_t *fh = &em->file_header;
+ clib_error_t *error = 0;
+
+ {
+ char *save = em->file_name;
+ memset (em, 0, sizeof (em[0]));
+ em->file_name = save;
+ }
+
+ em->first_header = h[0];
+ em->need_byte_swap =
+ CLIB_ARCH_IS_BIG_ENDIAN != (h->data_encoding ==
+ ELF_TWOS_COMPLEMENT_BIG_ENDIAN);
+ elf_swap_first_header (em, &em->first_header);
+
+ if (!(h->magic[0] == 0x7f
+ && h->magic[1] == 'E' && h->magic[2] == 'L' && h->magic[3] == 'F'))
+ return clib_error_return (0, "`%s': bad magic", em->file_name);
+
+ if (h->file_class == ELF_64BIT)
+ {
+ elf64_file_header_t *h64 = (void *) (h + 1);
+#define _(t,f) fh->f = elf_swap_##t (em, h64->f);
+ foreach_elf64_file_header
+#undef _
+ }
+ else
+ {
+ elf32_file_header_t *h32 = (void *) (h + 1);
+
+#define _(t,f) fh->f = elf_swap_##t (em, h32->f);
+ foreach_elf32_file_header
+#undef _
+ }
+
+ elf_parse_segments (em, data);
+ elf_parse_sections (em, data);
+
+ /* Figure which sections are contained in each segment. */
+ {
+ elf_segment_t *g;
+ elf_section_t *s;
+ vec_foreach (g, em->segments)
+ {
+ u64 g_lo, g_hi;
+ u64 s_lo, s_hi;
+
+ if (g->header.memory_size == 0)
+ continue;
+
+ g_lo = g->header.virtual_address;
+ g_hi = g_lo + g->header.memory_size;
+
+ vec_foreach (s, em->sections)
+ {
+ s_lo = s->header.exec_address;
+ s_hi = s_lo + s->header.file_size;
+
+ if (s_lo >= g_lo && s_hi <= g_hi)
+ {
+ g->section_index_bitmap =
+ clib_bitmap_ori (g->section_index_bitmap, s->index);
+ s->segment_index_bitmap =
+ clib_bitmap_ori (s->segment_index_bitmap, g->index);
+ }
+ }
+ }
+ }
+
+ return error;
+}
+
+#ifdef CLIB_UNIX
+
+static void
+add_dynamic_entries (elf_main_t * em, elf_section_t * s)
+{
+ uword i;
+
+ /* Can't have more than one dynamic section. */
+ ASSERT (em->dynamic_section_index == 0);
+ em->dynamic_section_index = s->index;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_dynamic_entry_t *e;
+
+ e = elf_get_section_contents (em, s - em->sections, sizeof (e[0]));
+ if (em->need_byte_swap)
+ for (i = 0; i < vec_len (e); i++)
+ {
+ e[i].type = elf_swap_u64 (em, e[i].type);
+ e[i].data = elf_swap_u64 (em, e[i].data);
+ }
+
+ em->dynamic_entries = e;
+ }
+ else
+ {
+ elf32_dynamic_entry_t *e;
+
+ e = elf_get_section_contents (em, s - em->sections, sizeof (e[0]));
+ vec_clone (em->dynamic_entries, e);
+ if (em->need_byte_swap)
+ for (i = 0; i < vec_len (e); i++)
+ {
+ em->dynamic_entries[i].type = elf_swap_u32 (em, e[i].type);
+ em->dynamic_entries[i].data = elf_swap_u32 (em, e[i].data);
+ }
+
+ vec_free (e);
+ }
+}
+
+static void
+byte_swap_verneed (elf_main_t * em, elf_dynamic_version_need_union_t * vus)
+{
+ uword *entries_swapped = 0;
+ uword i, j;
+
+ for (i = 0; i < vec_len (vus); i++)
+ {
+ elf_dynamic_version_need_union_t *n = vec_elt_at_index (vus, i);
+ elf_dynamic_version_need_union_t *a;
+
+ if (clib_bitmap_get (entries_swapped, i))
+ continue;
+
+ elf_swap_verneed (&n->need);
+ entries_swapped = clib_bitmap_set (entries_swapped, i, 1);
+
+ if (n->need.first_aux_offset != 0)
+ {
+ ASSERT (n->need.first_aux_offset % sizeof (n[0]) == 0);
+ j = i + (n->need.first_aux_offset / sizeof (n[0]));
+ while (1)
+ {
+ a = vec_elt_at_index (vus, j);
+ if (!clib_bitmap_get (entries_swapped, j))
+ {
+ entries_swapped = clib_bitmap_set (entries_swapped, j, 1);
+ elf_swap_verneed_aux (&a->aux);
+ }
+ if (a->aux.next_offset == 0)
+ break;
+ ASSERT (a->aux.next_offset % sizeof (a->aux) == 0);
+ j += (a->aux.next_offset / sizeof (a->aux));
+ }
+ }
+ }
+
+ clib_bitmap_free (entries_swapped);
+}
+
+static void set_dynamic_verneed (elf_main_t * em) __attribute__ ((unused));
+static void
+set_dynamic_verneed (elf_main_t * em)
+{
+ elf_dynamic_version_need_union_t *vus = em->verneed;
+
+ if (em->need_byte_swap)
+ {
+ vus = vec_dup (vus);
+ byte_swap_verneed (em, vus);
+ }
+
+ elf_set_section_contents (em, em->verneed_section_index, vus,
+ vec_bytes (vus));
+ if (vus != em->verneed)
+ vec_free (vus);
+}
+
+static void
+set_symbol_table (elf_main_t * em, u32 table_index) __attribute__ ((unused));
+static void
+set_symbol_table (elf_main_t * em, u32 table_index)
+{
+ elf_symbol_table_t *tab = vec_elt_at_index (em->symbol_tables, table_index);
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+ elf64_symbol_t *s, *syms;
+
+ syms = vec_dup (tab->symbols);
+ vec_foreach (s, syms)
+ {
+#define _(t,f) s->f = elf_swap_##t (em, s->f);
+ foreach_elf64_symbol_header;
+#undef _
+ }
+
+ elf_set_section_contents (em, tab->section_index,
+ syms, vec_bytes (syms));
+ }
+ else
+ {
+ elf32_symbol_t *syms;
+ uword i;
+ vec_clone (syms, tab->symbols);
+ for (i = 0; i < vec_len (tab->symbols); i++)
+ {
+#define _(t,f) syms[i].f = elf_swap_##t (em, tab->symbols[i].f);
+ foreach_elf32_symbol_header;
+#undef _
+ }
+
+ elf_set_section_contents (em, tab->section_index,
+ syms, vec_bytes (syms));
+ }
+}
+
+static char *
+elf_find_interpreter (elf_main_t * em, void *data)
+{
+ elf_segment_t *g;
+ elf_section_t *s;
+ uword *p;
+
+ vec_foreach (g, em->segments)
+ {
+ if (g->header.type == ELF_SEGMENT_INTERP)
+ break;
+ }
+
+ if (g >= vec_end (em->segments))
+ return 0;
+
+ p = hash_get (em->section_by_start_address, g->header.virtual_address);
+ if (!p)
+ return 0;
+
+ s = vec_elt_at_index (em->sections, p[0]);
+ return (char *) vec_dup (s->contents);
+}
+
+static void *
+elf_get_section_contents_with_starting_address (elf_main_t * em,
+ uword start_address,
+ uword elt_size,
+ u32 * section_index_result)
+{
+ elf_section_t *s = 0;
+ clib_error_t *error;
+
+ error = elf_get_section_by_start_address (em, start_address, &s);
+ if (error)
+ {
+ clib_error_report (error);
+ return 0;
+ }
+
+ if (section_index_result)
+ *section_index_result = s->index;
+
+ return elf_get_section_contents (em, s->index, elt_size);
+}
+
+static void
+elf_parse_dynamic (elf_main_t * em)
+{
+ elf_section_t *s;
+ elf64_dynamic_entry_t *e;
+
+ vec_foreach (s, em->sections)
+ {
+ switch (s->header.type)
+ {
+ case ELF_SECTION_DYNAMIC:
+ add_dynamic_entries (em, s);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ em->dynamic_string_table_section_index = ~0;
+ em->dynamic_string_table = 0;
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_STRING_TABLE:
+ ASSERT (vec_len (em->dynamic_string_table) == 0);
+ em->dynamic_string_table
+ =
+ elf_get_section_contents_with_starting_address (em, e->data,
+ sizeof (u8),
+ &em->
+ dynamic_string_table_section_index);
+ break;
+
+ case ELF_DYNAMIC_ENTRY_SYMBOL_TABLE:
+ {
+ elf_section_t *s = 0;
+ clib_error_t *error;
+
+ error = elf_get_section_by_start_address (em, e->data, &s);
+ if (error)
+ {
+ clib_error_report (error);
+ return;
+ }
+
+ em->dynamic_symbol_table_section_index = s - em->sections;
+ }
+ break;
+
+ case ELF_DYNAMIC_ENTRY_VERSYM:
+ em->versym
+ =
+ elf_get_section_contents_with_starting_address (em, e->data,
+ sizeof (em->versym
+ [0]),
+ &em->
+ versym_section_index);
+ if (em->need_byte_swap)
+ {
+ uword i;
+ for (i = 0; i < vec_len (em->versym); i++)
+ em->versym[i] = clib_byte_swap_u16 (em->versym[i]);
+ }
+ break;
+
+ case ELF_DYNAMIC_ENTRY_VERSION_NEED:
+ em->verneed
+ =
+ elf_get_section_contents_with_starting_address (em, e->data,
+ sizeof (em->verneed
+ [0]),
+ &em->
+ verneed_section_index);
+ if (em->need_byte_swap)
+ byte_swap_verneed (em, em->verneed);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+clib_error_t *
+elf_read_file (elf_main_t * em, char *file_name)
+{
+ int fd;
+ struct stat fd_stat;
+ uword mmap_length = 0;
+ void *data = 0;
+ clib_error_t *error = 0;
+
+ elf_main_init (em);
+
+ fd = open (file_name, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &fd_stat) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+ mmap_length = fd_stat.st_size;
+
+ data = mmap (0, mmap_length, PROT_READ, MAP_SHARED, fd, /* offset */ 0);
+ if (~pointer_to_uword (data) == 0)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+ em->file_name = file_name;
+
+ error = elf_parse (em, data, mmap_length);
+ if (error)
+ goto done;
+
+ elf_parse_symbols (em);
+ elf_parse_dynamic (em);
+
+ em->interpreter = elf_find_interpreter (em, data);
+
+ munmap (data, mmap_length);
+ close (fd);
+
+ return /* no error */ 0;
+
+done:
+ elf_main_free (em);
+ if (fd >= 0)
+ close (fd);
+ if (data)
+ munmap (data, mmap_length);
+ return error;
+}
+
+typedef struct
+{
+ u8 *new_table;
+
+ u8 *old_table;
+
+ uword *hash;
+} string_table_builder_t;
+
+static u32
+string_table_add_name (string_table_builder_t * b, u8 * n)
+{
+ uword *p, i, j, l;
+
+ p = hash_get_mem (b->hash, n);
+ if (p)
+ return p[0];
+
+ l = strlen ((char *) n);
+ i = vec_len (b->new_table);
+ vec_add (b->new_table, n, l + 1);
+
+ for (j = 0; j <= l; j++)
+ {
+ if (j > 0)
+ {
+ p = hash_get_mem (b->hash, n + j);
+
+ /* Sub-string already in table? */
+ if (p)
+ continue;
+ }
+
+ hash_set_mem (b->hash, n + j, i + j);
+ }
+
+ return i;
+}
+
+static u32 string_table_add_name_index (string_table_builder_t * b, u32 index)
+ __attribute__ ((unused));
+static u32
+string_table_add_name_index (string_table_builder_t * b, u32 index)
+{
+ u8 *n = b->old_table + index;
+ return string_table_add_name (b, n);
+}
+
+static void string_table_init (string_table_builder_t * b, u8 * old_table)
+ __attribute__ ((unused));
+static void
+string_table_init (string_table_builder_t * b, u8 * old_table)
+{
+ memset (b, 0, sizeof (b[0]));
+ b->old_table = old_table;
+ b->hash = hash_create_string (0, sizeof (uword));
+}
+
+static u8 *string_table_done (string_table_builder_t * b)
+ __attribute__ ((unused));
+static u8 *
+string_table_done (string_table_builder_t * b)
+{
+ hash_free (b->hash);
+ return b->new_table;
+}
+
+static void
+layout_sections (elf_main_t * em)
+{
+ elf_section_t *s;
+ u32 n_sections_with_changed_exec_address = 0;
+ u32 *deferred_symbol_and_string_sections = 0;
+ u32 n_deleted_sections = 0;
+ /* note: rebuild is always zero. Intent lost in the sands of time */
+#if 0
+ int rebuild = 0;
+
+ /* Re-build section string table (sections may have been deleted). */
+ if (rebuild)
+ {
+ u8 *st = 0;
+
+ vec_foreach (s, em->sections)
+ {
+ u8 *name;
+ if (s->header.type == ~0)
+ continue;
+ name = elf_section_name (em, s);
+ s->header.name = vec_len (st);
+ vec_add (st, name, strlen ((char *) name) + 1);
+ }
+
+ s =
+ vec_elt_at_index (em->sections,
+ em->file_header.section_header_string_table_index);
+
+ vec_free (s->contents);
+ s->contents = st;
+ }
+
+ /* Re-build dynamic string table. */
+ if (rebuild && em->dynamic_string_table_section_index != ~0)
+ {
+ string_table_builder_t b;
+
+ string_table_init (&b, em->dynamic_string_table);
+
+ /* Add all dynamic symbols. */
+ {
+ elf_symbol_table_t *symtab;
+ elf64_symbol_t *sym;
+
+ symtab =
+ vec_elt_at_index (em->symbol_tables,
+ em->dynamic_symbol_table_index);
+ vec_foreach (sym, symtab->symbols)
+ {
+ u8 *name = elf_symbol_name (symtab, sym);
+ sym->name = string_table_add_name (&b, name);
+ }
+
+ set_symbol_table (em, em->dynamic_symbol_table_index);
+ }
+
+ /* Add all dynamic entries. */
+ {
+ elf64_dynamic_entry_t *e;
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_NEEDED_LIBRARY:
+ case ELF_DYNAMIC_ENTRY_RPATH:
+ case ELF_DYNAMIC_ENTRY_RUN_PATH:
+ e->data = string_table_add_name_index (&b, e->data);
+ break;
+ }
+ }
+ }
+
+ /* Add all version needs. */
+ if (vec_len (em->verneed) > 0)
+ {
+ elf_dynamic_version_need_union_t *n, *a;
+
+ n = em->verneed;
+ while (1)
+ {
+ n->need.file_name_offset =
+ string_table_add_name_index (&b, n->need.file_name_offset);
+
+ if (n->need.first_aux_offset != 0)
+ {
+ a = n + n->need.first_aux_offset / sizeof (n[0]);
+ while (1)
+ {
+ a->aux.name =
+ string_table_add_name_index (&b, a->aux.name);
+ if (a->aux.next_offset == 0)
+ break;
+ a += a->aux.next_offset / sizeof (a[0]);
+ }
+ }
+
+ if (n->need.next_offset == 0)
+ break;
+
+ n += n->need.next_offset / sizeof (n[0]);
+ }
+
+ set_dynamic_verneed (em);
+ }
+
+ s =
+ vec_elt_at_index (em->sections,
+ em->dynamic_string_table_section_index);
+
+ vec_free (s->contents);
+ s->contents = string_table_done (&b);
+ }
+#endif /* dead code */
+
+ /* Figure file offsets and exec addresses for sections. */
+ {
+ u64 exec_address = 0, file_offset = 0;
+ u64 file_size, align_size;
+
+ vec_foreach (s, em->sections)
+ {
+ /* Ignore deleted and unused sections. */
+ switch (s->header.type)
+ {
+ case ~0:
+ n_deleted_sections++;
+ case ELF_SECTION_UNUSED:
+ continue;
+
+ case ELF_SECTION_STRING_TABLE:
+ case ELF_SECTION_SYMBOL_TABLE:
+ if (!(s->index == em->dynamic_string_table_section_index
+ || s->index ==
+ em->file_header.section_header_string_table_index))
+ {
+ vec_add1 (deferred_symbol_and_string_sections, s->index);
+ continue;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ exec_address = round_pow2_u64 (exec_address, s->header.align);
+
+ /* Put sections we added at end of file. */
+ if (s->header.file_offset == ~0)
+ s->header.file_offset = file_offset;
+
+ /* Follow gaps in original file. */
+ if (s->header.exec_address > exec_address)
+ {
+ exec_address = s->header.exec_address;
+ file_offset = s->header.file_offset;
+ }
+
+ if (s->header.flags & ELF_SECTION_FLAG_ALLOC)
+ {
+ s->exec_address_change = exec_address - s->header.exec_address;
+ n_sections_with_changed_exec_address += s->exec_address_change != 0;
+ s->header.exec_address = exec_address;
+ }
+
+ if (s->header.type == ELF_SECTION_NO_BITS)
+ file_size = s->header.file_size;
+ else
+ file_size = vec_len (s->contents);
+
+ {
+ u64 align;
+
+ if (s + 1 >= vec_end (em->sections))
+ align = 16;
+ else if (s[1].header.type == ELF_SECTION_NO_BITS)
+ align = 8;
+ else
+ align = s[1].header.align;
+
+ if (s->header.flags & ELF_SECTION_FLAG_ALLOC)
+ {
+ u64 v = round_pow2_u64 (exec_address + file_size, align);
+ align_size = v - exec_address;
+ }
+ else
+ {
+ u64 v = round_pow2_u64 (file_offset + file_size, align);
+ align_size = v - file_offset;
+ }
+ }
+
+ s->header.file_offset = file_offset;
+ s->header.file_size = file_size;
+ s->align_size = align_size;
+
+ if (s->header.type != ELF_SECTION_NO_BITS)
+ file_offset += align_size;
+ exec_address += align_size;
+ }
+
+ /* Section headers go after last section but before symbol/string
+ tables. */
+ {
+ elf64_file_header_t *fh = &em->file_header;
+
+ fh->section_header_file_offset = file_offset;
+ fh->section_header_count = vec_len (em->sections) - n_deleted_sections;
+ file_offset += (u64) fh->section_header_count * fh->section_header_size;
+ }
+
+ {
+ int i;
+ for (i = 0; i < vec_len (deferred_symbol_and_string_sections); i++)
+ {
+ s =
+ vec_elt_at_index (em->sections,
+ deferred_symbol_and_string_sections[i]);
+
+ s->header.file_offset = file_offset;
+ s->header.file_size = vec_len (s->contents);
+
+ align_size = round_pow2 (vec_len (s->contents), 16);
+ s->align_size = align_size;
+ file_offset += align_size;
+ }
+ vec_free (deferred_symbol_and_string_sections);
+ }
+ }
+
+ /* Update dynamic entries now that sections have been assigned
+ possibly new addresses. */
+#if 0
+ if (rebuild)
+ elf_set_dynamic_entries (em);
+#endif
+
+ /* Update segments for changed section addresses. */
+ {
+ elf_segment_t *g;
+ uword si;
+
+ vec_foreach (g, em->segments)
+ {
+ u64 s_lo, s_hi, f_lo = 0;
+ u32 n_sections = 0;
+
+ if (g->header.memory_size == 0)
+ continue;
+
+ s_lo = s_hi = 0;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (si, g->section_index_bitmap, ({
+ u64 lo, hi;
+
+ s = vec_elt_at_index (em->sections, si);
+ lo = s->header.exec_address;
+ hi = lo + s->align_size;
+ if (n_sections == 0)
+ {
+ s_lo = lo;
+ s_hi = hi;
+ f_lo = s->header.file_offset;
+ n_sections++;
+ }
+ else
+ {
+ if (lo < s_lo)
+ {
+ s_lo = lo;
+ f_lo = s->header.file_offset;
+ }
+ if (hi > s_hi)
+ s_hi = hi;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (n_sections == 0)
+ continue;
+
+ /* File offset zero includes ELF headers/segment headers.
+ Don't change that. */
+ if (g->header.file_offset == 0 && g->header.type == ELF_SEGMENT_LOAD)
+ {
+ s_lo = g->header.virtual_address;
+ f_lo = g->header.file_offset;
+ }
+
+ g->header.virtual_address = s_lo;
+ g->header.physical_address = s_lo;
+ g->header.file_offset = f_lo;
+ g->header.memory_size = s_hi - s_lo;
+ }
+ }
+}
+
+clib_error_t *
+elf_write_file (elf_main_t * em, char *file_name)
+{
+ int fd;
+ FILE *f;
+ clib_error_t *error = 0;
+
+ fd = open (file_name, O_CREAT | O_RDWR | O_TRUNC, 0755);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ f = fdopen (fd, "w");
+
+ /* Section contents may have changed. So, we need to update
+ stuff to reflect this. */
+ layout_sections (em);
+
+ /* Write first header. */
+ {
+ elf_first_header_t h = em->first_header;
+
+ elf_swap_first_header (em, &h);
+ if (fwrite (&h, sizeof (h), 1, f) != 1)
+ {
+ error = clib_error_return_unix (0, "write first header");
+ goto error;
+ }
+ }
+
+ /* Write file header. */
+ {
+ elf64_file_header_t h = em->file_header;
+
+ /* Segment headers are after first header. */
+ h.segment_header_file_offset = sizeof (elf_first_header_t);
+ if (em->first_header.file_class == ELF_64BIT)
+ h.segment_header_file_offset += sizeof (elf64_file_header_t);
+ else
+ h.segment_header_file_offset += sizeof (elf32_file_header_t);
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+#define _(t,field) h.field = elf_swap_##t (em, h.field);
+ foreach_elf64_file_header;
+#undef _
+
+ if (fwrite (&h, sizeof (h), 1, f) != 1)
+ {
+ error = clib_error_return_unix (0, "write file header");
+ goto error;
+ }
+ }
+ else
+ {
+ elf32_file_header_t h32;
+
+#define _(t,field) h32.field = elf_swap_##t (em, h.field);
+ foreach_elf32_file_header;
+#undef _
+
+ if (fwrite (&h32, sizeof (h32), 1, f) != 1)
+ {
+ error = clib_error_return_unix (0, "write file header");
+ goto error;
+ }
+ }
+ }
+
+ /* Write segment headers. */
+ {
+ elf_segment_t *s;
+
+ vec_foreach (s, em->segments)
+ {
+ elf64_segment_header_t h;
+
+ if (s->header.type == ~0)
+ continue;
+
+ h = s->header;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+#define _(t,field) h.field = elf_swap_##t (em, h.field);
+ foreach_elf64_segment_header;
+#undef _
+
+ if (fwrite (&h, sizeof (h), 1, f) != 1)
+ {
+ error =
+ clib_error_return_unix (0, "write segment header %U",
+ format_elf_segment, em, s);
+ goto error;
+ }
+ }
+ else
+ {
+ elf32_segment_header_t h32;
+
+#define _(t,field) h32.field = elf_swap_##t (em, h.field);
+ foreach_elf32_segment_header;
+#undef _
+
+ if (fwrite (&h32, sizeof (h32), 1, f) != 1)
+ {
+ error =
+ clib_error_return_unix (0, "write segment header %U",
+ format_elf_segment, em, s);
+ goto error;
+ }
+ }
+ }
+ }
+
+ /* Write contents for all sections. */
+ {
+ elf_section_t *s;
+
+ vec_foreach (s, em->sections)
+ {
+ if (s->header.file_size == 0)
+ continue;
+
+ if (fseek (f, s->header.file_offset, SEEK_SET) < 0)
+ {
+ fclose (f);
+ return clib_error_return_unix (0, "fseek 0x%Lx",
+ s->header.file_offset);
+ }
+
+ if (s->header.type == ELF_SECTION_NO_BITS)
+ /* don't write for .bss sections */ ;
+ else if (fwrite (s->contents, vec_len (s->contents), 1, f) != 1)
+ {
+ error =
+ clib_error_return_unix (0, "write %s section contents",
+ elf_section_name (em, s));
+ goto error;
+ }
+ }
+
+ /* Finally write section headers. */
+ if (fseek (f, em->file_header.section_header_file_offset, SEEK_SET) < 0)
+ {
+ fclose (f);
+ return clib_error_return_unix
+ (0, "fseek 0x%Lx", em->file_header.section_header_file_offset);
+ }
+
+ vec_foreach (s, em->sections)
+ {
+ elf64_section_header_t h;
+
+ if (s->header.type == ~0)
+ continue;
+
+ h = s->header;
+
+ if (em->first_header.file_class == ELF_64BIT)
+ {
+#define _(t,field) h.field = elf_swap_##t (em, h.field);
+ foreach_elf64_section_header;
+#undef _
+
+ if (fwrite (&h, sizeof (h), 1, f) != 1)
+ {
+ error =
+ clib_error_return_unix (0, "write %s section header",
+ elf_section_name (em, s));
+ goto error;
+ }
+ }
+ else
+ {
+ elf32_section_header_t h32;
+
+#define _(t,field) h32.field = elf_swap_##t (em, h.field);
+ foreach_elf32_section_header;
+#undef _
+
+ if (fwrite (&h32, sizeof (h32), 1, f) != 1)
+ {
+ error =
+ clib_error_return_unix (0, "write %s section header",
+ elf_section_name (em, s));
+ goto error;
+ }
+ }
+ }
+ }
+
+error:
+ fclose (f);
+ return error;
+}
+
+clib_error_t *
+elf_delete_named_section (elf_main_t * em, char *section_name)
+{
+ elf_section_t *s = 0;
+ clib_error_t *error;
+
+ error = elf_get_section_by_name (em, section_name, &s);
+ if (error)
+ return error;
+
+ s->header.type = ~0;
+
+ return 0;
+}
+
+void
+elf_create_section_with_contents (elf_main_t * em,
+ char *section_name,
+ elf64_section_header_t * header,
+ void *contents, uword n_content_bytes)
+{
+ elf_section_t *s, *sts;
+ u8 *st, *c;
+ uword *p, is_new_section;
+
+ /* See if section already exists with given name.
+ If so, just replace contents. */
+ is_new_section = 0;
+ if ((p = hash_get_mem (em->section_by_name, section_name)))
+ {
+ s = vec_elt_at_index (em->sections, p[0]);
+ _vec_len (s->contents) = 0;
+ c = s->contents;
+ }
+ else
+ {
+ vec_add2 (em->sections, s, 1);
+ is_new_section = 1;
+ c = 0;
+ }
+
+ sts =
+ vec_elt_at_index (em->sections,
+ em->file_header.section_header_string_table_index);
+ st = sts->contents;
+
+ s->header = header[0];
+
+ s->header.file_offset = ~0;
+ s->header.file_size = n_content_bytes;
+ s->index = s - em->sections;
+
+ /* Add name to string table. */
+ s->header.name = vec_len (st);
+ vec_add (st, section_name, strlen (section_name));
+ vec_add1 (st, 0);
+ sts->contents = st;
+
+ vec_resize (c, n_content_bytes);
+ clib_memcpy (c, contents, n_content_bytes);
+ s->contents = c;
+
+ em->file_header.section_header_count += is_new_section
+ && s->header.type != ~0;
+}
+
+uword
+elf_delete_segment_with_type (elf_main_t * em,
+ elf_segment_type_t segment_type)
+{
+ uword n_deleted = 0;
+ elf_segment_t *s;
+
+ vec_foreach (s, em->segments) if (s->header.type == segment_type)
+ {
+ s->header.type = ~0;
+ n_deleted += 1;
+ }
+
+ ASSERT (em->file_header.segment_header_count >= n_deleted);
+ em->file_header.segment_header_count -= n_deleted;
+
+ return n_deleted;
+}
+
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elf.h b/src/vppinfra/elf.h
new file mode 100644
index 00000000..008ea284
--- /dev/null
+++ b/src/vppinfra/elf.h
@@ -0,0 +1,1062 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_elf_h
+#define included_clib_elf_h
+
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/byte_order.h>
+
+#define foreach_elf_file_class \
+ _ (CLASS_NONE) _ (32BIT) _ (64BIT)
+
+#define foreach_elf_data_encoding \
+ _ (ENCODING_NONE) \
+ _ (TWOS_COMPLEMENT_LITTLE_ENDIAN) \
+ _ (TWOS_COMPLEMENT_BIG_ENDIAN)
+
+#define ELF_VERSION_NONE (0)
+#define ELF_VERSION_CURRENT (1)
+
+#define foreach_elf_abi \
+ _ (SYSV, 0) \
+ _ (HPUX, 1) \
+ _ (NETBSD, 2) \
+ _ (LINUX, 3) \
+ _ (SOLARIS, 6) \
+ _ (AIX, 7) \
+ _ (IRIX, 8) \
+ _ (FREEBSD, 9) \
+ _ (COMPAQ_TRU64, 10) \
+ _ (MODESTO, 11) \
+ _ (OPENBSD, 12) \
+ _ (ARM, 97) \
+ _ (STANDALONE, 255)
+
+/* Legal values for type (object file type). */
+#define foreach_elf_file_type \
+ _ (NONE, 0) \
+ _ (RELOC, 1) \
+ _ (EXEC, 2) \
+ _ (SHARED, 3) \
+ _ (CORE, 4) \
+ _ (OS_SPECIFIC_LO, 0xfe00) \
+ _ (OS_SPECIFIC_HI, 0xfeff) \
+ _ (ARCH_SPECIFIC_LO, 0xff00) \
+ _ (ARCH_SPECIFIC_HI, 0xffff)
+
+/* Legal values for architecture. */
+#define foreach_elf_architecture \
+ _ (NONE, 0) /* No machine */ \
+ _ (M32, 1) /* AT&T WE 32100 */ \
+ _ (SPARC, 2) /* SUN SPARC */ \
+ _ (386, 3) /* Intel 80386 */ \
+ _ (68K, 4) /* Motorola m68k family */ \
+ _ (88K, 5) /* Motorola m88k family */ \
+ _ (860, 7) /* Intel 80860 */ \
+ _ (MIPS, 8) /* MIPS R3000 big-endian */ \
+ _ (S370, 9) /* IBM System/370 */ \
+ _ (MIPS_RS3_LE, 10) /* MIPS R3000 little-endian */ \
+ _ (PARISC, 15) /* HPPA */ \
+ _ (VPP500, 17) /* Fujitsu VPP500 */ \
+ _ (SPARC32PLUS, 18) /* Sun's "v8plus" */ \
+ _ (960, 19) /* Intel 80960 */ \
+ _ (PPC, 20) /* PowerPC */ \
+ _ (PPC64, 21) /* PowerPC 64-bit */ \
+ _ (S390, 22) /* IBM S390 */ \
+ _ (V800, 36) /* NEC V800 series */ \
+ _ (FR20, 37) /* Fujitsu FR20 */ \
+ _ (RH32, 38) /* TRW RH-32 */ \
+ _ (RCE, 39) /* Motorola RCE */ \
+ _ (ARM, 40) /* ARM */ \
+ _ (FAKE_ALPHA, 41) /* Digital Alpha */ \
+ _ (SH, 42) /* Hitachi SH */ \
+ _ (SPARCV9, 43) /* SPARC v9 64-bit */ \
+ _ (TRICORE, 44) /* Siemens Tricore */ \
+ _ (ARC, 45) /* Argonaut RISC Core */ \
+ _ (H8_300, 46) /* Hitachi H8/300 */ \
+ _ (H8_300H, 47) /* Hitachi H8/300H */ \
+ _ (H8S, 48) /* Hitachi H8S */ \
+ _ (H8_500, 49) /* Hitachi H8/500 */ \
+ _ (IA_64, 50) /* Intel Merced */ \
+ _ (MIPS_X, 51) /* Stanford MIPS-X */ \
+ _ (COLDFIRE, 52) /* Motorola Coldfire */ \
+ _ (68HC12, 53) /* Motorola M68HC12 */ \
+ _ (MMA, 54) /* Fujitsu MMA Multimedia Accel. */ \
+ _ (PCP, 55) /* Siemens PCP */ \
+ _ (NCPU, 56) /* Sony nCPU embeeded RISC */ \
+ _ (NDR1, 57) /* Denso NDR1 microprocessor */ \
+ _ (STARCORE, 58) /* Motorola Start*Core processor */ \
+ _ (ME16, 59) /* Toyota ME16 processor */ \
+ _ (ST100, 60) /* STMicroelectronic ST100 */ \
+ _ (TINYJ, 61) /* Advanced Logic Corp. Tinyj */ \
+ _ (X86_64, 62) /* AMD x86-64 architecture */ \
+ _ (PDSP, 63) /* Sony DSP Processor */ \
+ _ (FX66, 66) /* Siemens FX66 microcontroller */ \
+ _ (ST9PLUS, 67) /* STMicroelectronics ST9+ 8/16 mc */ \
+ _ (ST7, 68) /* STmicroelectronics ST7 8 bit mc */ \
+ _ (68HC16, 69) /* Motorola MC68HC16 */ \
+ _ (68HC11, 70) /* Motorola MC68HC11 */ \
+ _ (68HC08, 71) /* Motorola MC68HC08 */ \
+ _ (68HC05, 72) /* Motorola MC68HC05 */ \
+ _ (SVX, 73) /* Silicon Graphics SVx */ \
+ _ (ST19, 74) /* STMicroelectronics ST19 8 bit mc */ \
+ _ (VAX, 75) /* Digital VAX */ \
+ _ (CRIS, 76) /* Axis 32-bit embedded proc. */ \
+ _ (JAVELIN, 77) /* Infineon 32-bit embedded proc. */ \
+ _ (FIREPATH, 78) /* Element 14 64-bit DSP Processor */ \
+ _ (ZSP, 79) /* LSI Logic 16-bit DSP Processor */ \
+ _ (MMIX, 80) /* Knuth's 64-bit processor */ \
+ _ (HUANY, 81) /* Harvard machine-independent */ \
+ _ (PRISM, 82) /* SiTera Prism */ \
+ _ (AVR, 83) /* Atmel AVR 8-bit microcontroller */ \
+ _ (FR30, 84) /* Fujitsu FR30 */ \
+ _ (D10V, 85) /* Mitsubishi D10V */ \
+ _ (D30V, 86) /* Mitsubishi D30V */ \
+ _ (V850, 87) /* NEC v850 */ \
+ _ (M32R, 88) /* Mitsubishi M32R */ \
+ _ (MN10300, 89) /* Matsushita MN10300 */ \
+ _ (MN10200, 90) /* Matsushita MN10200 */ \
+ _ (PJ, 91) /* picoJava */ \
+ _ (OPENRISC, 92) /* OpenRISC 32-bit processor */ \
+ _ (ARC_A5, 93) /* ARC Cores Tangent-A5 */ \
+ _ (XTENSA, 94) /* Tensilica Xtensa Architecture */ \
+ _ (ALPHA, 0x9026)
+
+#define _(f) ELF_##f,
+
+typedef enum
+{
+ foreach_elf_file_class ELF_N_FILE_CLASS,
+} elf_file_class_t;
+
+typedef enum
+{
+ foreach_elf_data_encoding ELF_N_DATA_ENCODING,
+} elf_data_encoding_t;
+
+#undef _
+
+#define _(f,i) ELF_##f = i,
+
+typedef enum
+{
+ foreach_elf_abi
+} elf_abi_t;
+
+typedef enum
+{
+ foreach_elf_file_type
+} elf_file_type_t;
+
+#undef _
+
+typedef enum
+{
+#define _(f,i) ELF_ARCH_##f = i,
+ foreach_elf_architecture
+#undef _
+} elf_architecture_t;
+
+typedef struct
+{
+ /* 0x7f ELF */
+ u8 magic[4];
+
+ elf_file_class_t file_class:8;
+ elf_data_encoding_t data_encoding:8;
+ u8 file_version_ident;
+ elf_abi_t abi:8;
+ u8 abi_version;
+
+ u8 pad[7];
+
+ elf_file_type_t file_type:16;
+ elf_architecture_t architecture:16;
+
+ u32 file_version;
+} elf_first_header_t;
+
+/* 32/64 bit file header following basic file header. */
+#define foreach_elf32_file_header \
+ _ (u32, entry_point) \
+ _ (u32, segment_header_file_offset) \
+ _ (u32, section_header_file_offset) \
+ _ (u32, flags) \
+ _ (u16, n_bytes_this_header) \
+ _ (u16, segment_header_size) \
+ _ (u16, segment_header_count) \
+ _ (u16, section_header_size) \
+ _ (u16, section_header_count) \
+ _ (u16, section_header_string_table_index)
+
+#define foreach_elf64_file_header \
+ _ (u64, entry_point) \
+ _ (u64, segment_header_file_offset) \
+ _ (u64, section_header_file_offset) \
+ _ (u32, flags) \
+ _ (u16, n_bytes_this_header) \
+ _ (u16, segment_header_size) \
+ _ (u16, segment_header_count) \
+ _ (u16, section_header_size) \
+ _ (u16, section_header_count) \
+ _ (u16, section_header_string_table_index)
+
+/* Section header. */
+#define foreach_elf32_section_header \
+ _ (u32, name) \
+ _ (u32, type) \
+ _ (u32, flags) \
+ _ (u32, exec_address) \
+ _ (u32, file_offset) \
+ _ (u32, file_size) \
+ _ (u32, link) \
+ _ (u32, additional_info) \
+ _ (u32, align) \
+ _ (u32, entry_size)
+
+#define foreach_elf64_section_header \
+ _ (u32, name) \
+ _ (u32, type) \
+ _ (u64, flags) \
+ _ (u64, exec_address) \
+ _ (u64, file_offset) \
+ _ (u64, file_size) \
+ _ (u32, link) \
+ _ (u32, additional_info) \
+ _ (u64, align) \
+ _ (u64, entry_size)
+
+/* Program segment header. */
+#define foreach_elf32_segment_header \
+ _ (u32, type) \
+ _ (u32, file_offset) \
+ _ (u32, virtual_address) \
+ _ (u32, physical_address) \
+ _ (u32, file_size) \
+ _ (u32, memory_size) \
+ _ (u32, flags) \
+ _ (u32, align)
+
+#define foreach_elf64_segment_header \
+ _ (u32, type) \
+ _ (u32, flags) \
+ _ (u64, file_offset) \
+ _ (u64, virtual_address) \
+ _ (u64, physical_address) \
+ _ (u64, file_size) \
+ _ (u64, memory_size) \
+ _ (u64, align)
+
+/* Symbol table. */
+#define foreach_elf32_symbol_header \
+ _ (u32, name) \
+ _ (u32, value) \
+ _ (u32, size) \
+ /* binding upper 4 bits; type lower 4 bits */ \
+ _ (u8, binding_and_type) \
+ _ (u8, visibility) \
+ _ (u16, section_index)
+
+#define foreach_elf64_symbol_header \
+ _ (u32, name) \
+ _ (u8, binding_and_type) \
+ _ (u8, visibility) \
+ _ (u16, section_index) \
+ _ (u64, value) \
+ _ (u64, size)
+
+#define _(t,f) t f;
+
+typedef struct
+{
+foreach_elf32_file_header} elf32_file_header_t;
+
+typedef struct
+{
+foreach_elf64_file_header} elf64_file_header_t;
+
+typedef struct
+{
+foreach_elf32_section_header} elf32_section_header_t;
+
+typedef struct
+{
+foreach_elf64_section_header} elf64_section_header_t;
+
+typedef struct
+{
+foreach_elf32_segment_header} elf32_segment_header_t;
+
+typedef struct
+{
+foreach_elf64_segment_header} elf64_segment_header_t;
+
+typedef struct
+{
+foreach_elf32_symbol_header} elf32_symbol_t;
+
+typedef struct
+{
+foreach_elf64_symbol_header} elf64_symbol_t;
+#undef _
+
+/* Special section names. */
+#define foreach_elf_symbol_reserved_section_index \
+ _ (ABSOLUTE, 0xfff1) /* Associated symbol is absolute */ \
+ _ (COMMON, 0xfff2) /* Associated symbol is common */ \
+ _ (XINDEX, 0xffff) /* Index is in extra table. */
+
+#define ELF_SYMBOL_SECTION_RESERVED_LO 0xff00
+#define ELF_SYMBOL_SECTION_RESERVED_HI 0xffff
+#define ELF_SYMBOL_SECTION_ARCH_SPECIFIC_LO 0xff00
+#define ELF_SYMBOL_SECTION_ARCH_SPECIFIC_HI 0xff1f
+#define ELF_SYMBOL_SECTION_OS_SPECIFIC_LO 0xff20
+#define ELF_SYMBOL_SECTION_OS_SPECIFIC_HI 0xff3f
+
+/* Section types. */
+#define foreach_elf_section_type \
+ _ (UNUSED, 0) \
+ _ (PROGRAM_DATA, 1) \
+ _ (SYMBOL_TABLE, 2) \
+ _ (STRING_TABLE, 3) \
+ _ (RELOCATION_ADD, 4) \
+ _ (SYMBOL_TABLE_HASH, 5) \
+ _ (DYNAMIC, 6) /* Dynamic linking information */ \
+ _ (NOTE, 7) /* Notes */ \
+ _ (NO_BITS, 8) /* Program space with no data (bss) */ \
+ _ (RELOCATION, 9) /* Relocation entries, no addends */ \
+ _ (DYNAMIC_SYMBOL_TABLE, 11) /* Dynamic linker symbol table */ \
+ _ (INIT_ARRAY, 14) /* Array of constructors */ \
+ _ (FINI_ARRAY, 15) /* Array of destructors */ \
+ _ (PREINIT_ARRAY, 16) /* Array of pre-constructors */ \
+ _ (GROUP, 17) /* Section group */ \
+ _ (SYMTAB_SHNDX, 18) /* Extended section indices */ \
+ _ (OS_SPECIFIC_LO, 0x60000000) /* Start OS-specific */ \
+ _ (GNU_LIBLIST, 0x6ffffff7) /* Prelink library list */ \
+ _ (CHECKSUM, 0x6ffffff8) /* Checksum for DSO content. */ \
+ _ (SUNW_MOVE, 0x6ffffffa) \
+ _ (SUNW_COMDAT, 0x6ffffffb) \
+ _ (SUNW_SYMINFO, 0x6ffffffc) \
+ _ (GNU_VERDEF, 0x6ffffffd) /* Version definition section. */ \
+ _ (GNU_VERNEED, 0x6ffffffe) /* Version needs section. */ \
+ _ (GNU_VERSYM, 0x6fffffff) /* Version symbol table. */ \
+ _ (ARCH_SPECIFIC_LO, 0x70000000) /* Start of processor-specific */ \
+ _ (ARCH_SPECIFIC_HI, 0x7fffffff) /* End of processor-specific */ \
+ _ (APP_SPECIFIC_LO, 0x80000000) /* Start of application-specific */ \
+ _ (APP_SPECIFIC_HI, 0x8fffffff) /* End of application-specific */
+
+/* Section flags. */
+#define foreach_elf_section_flag \
+ _ (WRITE, 0) \
+ _ (ALLOC, 1) \
+ _ (EXEC, 2) \
+ _ (MERGE, 3) \
+ _ (STRING_TABLE, 5) \
+ _ (INFO_LINK, 6) \
+ _ (PRESERVE_LINK_ORDER, 7) \
+ _ (OS_NON_CONFORMING, 8) \
+ _ (GROUP, 9) \
+ _ (TLS, 10) \
+ _ (OS_SPECIFIC_LO, 20) \
+ _ (OS_SPECIFIC_HI, 27) \
+ _ (ARCH_SPECIFIC_LO, 28) \
+ _ (ARCH_SPECIFIC_HI, 31)
+
+typedef enum
+{
+#define _(f,i) ELF_SECTION_##f = i,
+ foreach_elf_section_type
+#undef _
+ ELF_SECTION_OS_SPECIFIC_HI = 0x6fffffff,
+} elf_section_type_t;
+
+typedef enum
+{
+#define _(f,i) ELF_SECTION_FLAG_BIT_##f = i,
+ foreach_elf_section_flag
+#undef _
+} elf_section_flag_bit_t;
+
+typedef enum
+{
+#define _(f,i) ELF_SECTION_FLAG_##f = 1 << ELF_SECTION_FLAG_BIT_##f,
+ foreach_elf_section_flag
+#undef _
+} elf_section_flag_t;
+
+/* Symbol bindings (upper 4 bits of binding_and_type). */
+#define foreach_elf_symbol_binding \
+ _ (LOCAL, 0) /* Local symbol */ \
+ _ (GLOBAL, 1) /* Global symbol */ \
+ _ (WEAK, 2) /* Weak symbol */ \
+ _ (OS_SPECIFIC_LO, 10) /* Start of OS-specific */ \
+ _ (OS_SPECIFIC_HI, 12) /* End of OS-specific */ \
+ _ (ARCH_SPECIFIC_LO, 13) /* Start of processor-specific */ \
+ _ (ARCH_SPECIFIC_HI, 15) /* End of processor-specific */
+
+/* Symbol types (lower 4 bits of binding_and_type). */
+#define foreach_elf_symbol_type \
+ _ (NONE, 0) \
+ _ (DATA, 1) /* Symbol is a data object */ \
+ _ (CODE, 2) /* Symbol is a code object */ \
+ _ (SECTION, 3) /* Symbol associated with a section */ \
+ _ (FILE, 4) /* Symbol's name is file name */ \
+ _ (COMMON, 5) /* Symbol is a common data object */ \
+ _ (TLS, 6) /* Symbol is thread-local data */ \
+ _ (OS_SPECIFIC_LO, 10) /* Start of OS-specific */ \
+ _ (OS_SPECIFIC_HI, 12) /* End of OS-specific */ \
+ _ (ARCH_SPECIFIC_LO, 13) /* Start of processor-specific */ \
+ _ (ARCH_SPECIFIC_HI, 15) /* End of processor-specific */
+
+/* Symbol visibility. */
+#define foreach_elf_symbol_visibility \
+ _ (DEFAULT, 0) /* Default symbol visibility rules */ \
+ _ (INTERNAL, 1) /* Processor specific hidden class */ \
+ _ (HIDDEN, 2) /* Unavailable in other modules */ \
+ _ (PROTECTED, 3) /* Not preemptible, not exported */
+
+/* The syminfo section if available contains additional
+ information about every dynamic symbol. */
+typedef struct
+{
+ u16 bound_to;
+ u16 flags;
+} elf_symbol_info_t;
+
+/* Possible values for bound_to. */
+#define foreach_elf_symbol_info_bound_to \
+ _ (SELF, 0xffff) /* Symbol bound to self */ \
+ _ (PARENT, 0xfffe) /* Symbol bound to parent */ \
+ _ (RESERVED_LO, 0xff00) \
+ _ (RESERVED_HI, 0xffff)
+
+/* Symbol info flags. */
+#define foreach_elf_symbol_info_flags \
+ _ (DIRECT) /* Direct bound symbol */ \
+ _ (PASS_THRU) /* Pass-thru symbol for translator */ \
+ _ (COPY) /* Symbol is a copy-reloc */ \
+ _ (LAZY_LOAD) /* Symbol bound to object to be lazy loaded */
+
+/* Relocation table entry with/without addend. */
+typedef struct
+{
+ u32 address;
+ u32 symbol_and_type; /* high 24 symbol, low 8 type. */
+ i32 addend[0];
+} elf32_relocation_t;
+
+typedef struct
+{
+ u64 address;
+ u64 symbol_and_type; /* high 32 symbol, low 32 type. */
+ i64 addend[0];
+} elf64_relocation_t;
+
+typedef struct
+{
+ u64 address;
+ u64 symbol_and_type;
+ u64 addend;
+} elf_relocation_with_addend_t;
+
+#define elf_relocation_next(r,type) \
+ ((void *) ((r) + 1) \
+ + ((type) == ELF_SECTION_RELOCATION_ADD ? sizeof ((r)->addend[0]) : 0))
+
+/* Segment type. */
+#define foreach_elf_segment_type \
+ _ (UNUSED, 0) \
+ _ (LOAD, 1) /* Loadable program segment */ \
+ _ (DYNAMIC, 2) /* Dynamic linking information */ \
+ _ (INTERP, 3) /* Program interpreter */ \
+ _ (NOTE, 4) /* Auxiliary information */ \
+ _ (SEGMENT_TABLE, 6) /* Entry for header table itself */ \
+ _ (TLS, 7) /* Thread-local storage segment */ \
+ _ (OS_SPECIFIC_LO, 0x60000000) /* Start of OS-specific */ \
+ _ (GNU_EH_FRAME, 0x6474e550) /* GCC .eh_frame_hdr segment */ \
+ _ (GNU_STACK, 0x6474e551) /* Indicates stack executability */ \
+ _ (GNU_RELRO, 0x6474e552) /* Read-only after relocation */ \
+ _ (SUNW_BSS, 0x6ffffffa) /* Sun specific BSS */ \
+ _ (SUNW_STACK, 0x6ffffffb) /* Sun specific stack */ \
+ _ (OS_SPECIFIC_HI, 0x6fffffff) /* End of OS-specific */ \
+ _ (ARCH_SPECIFIC_LO, 0x70000000) /* Start of processor-specific */ \
+ _ (ARCH_SPECIFIC_HI, 0x7fffffff) /* End of processor-specific */
+
+/* Segment flags. */
+#define foreach_elf_segment_flag \
+ _ (EXEC, 0) \
+ _ (WRITE, 1) \
+ _ (READ, 2) \
+ _ (OS_SPECIFIC_LO, 20) \
+ _ (OS_SPECIFIC_HI, 27) \
+ _ (ARCH_SPECIFIC_LO, 28) \
+ _ (ARCH_SPECIFIC_HI, 31)
+
+typedef enum
+{
+#define _(f,i) ELF_SEGMENT_##f = i,
+ foreach_elf_segment_type
+#undef _
+} elf_segment_type_t;
+
+typedef enum
+{
+#define _(f,i) ELF_SEGMENT_FLAG_BIT_##f = i,
+ foreach_elf_segment_flag
+#undef _
+} elf_segment_flag_bit_t;
+
+typedef enum
+{
+#define _(f,i) ELF_SEGMENT_FLAG_##f = 1 << ELF_SEGMENT_FLAG_BIT_##f,
+ foreach_elf_segment_flag
+#undef _
+} elf_segment_flag_t;
+
+#define foreach_elf32_dynamic_entry_header \
+ _ (u32, type) \
+ _ (u32, data)
+
+#define foreach_elf64_dynamic_entry_header \
+ _ (u64, type) \
+ _ (u64, data)
+
+#define _(t,f) t f;
+
+typedef struct
+{
+foreach_elf32_dynamic_entry_header} elf32_dynamic_entry_t;
+
+typedef struct
+{
+foreach_elf64_dynamic_entry_header} elf64_dynamic_entry_t;
+
+#undef _
+
+#define foreach_elf_dynamic_entry_type \
+ _ (END, 0) /* Marks end of dynamic section */ \
+ _ (NEEDED_LIBRARY, 1) /* Name of needed library */ \
+ _ (PLT_RELOCATION_SIZE, 2) /* Size in bytes of PLT relocs */ \
+ _ (PLT_GOT, 3) /* Processor defined value */ \
+ _ (SYMBOL_HASH, 4) /* Address of symbol hash table */ \
+ _ (STRING_TABLE, 5) /* Address of string table */ \
+ _ (SYMBOL_TABLE, 6) /* Address of symbol table */ \
+ _ (RELA_ADDRESS, 7) /* Address of Rela relocs */ \
+ _ (RELA_SIZE, 8) /* Total size of Rela relocs */ \
+ _ (RELA_ENTRY_SIZE, 9) /* Size of one Rela reloc */ \
+ _ (STRING_TABLE_SIZE, 10) /* Size of string table */ \
+ _ (SYMBOL_TABLE_ENTRY_SIZE, 11) /* Size of one symbol table entry */ \
+ _ (INIT_FUNCTION, 12) /* Address of init function */ \
+ _ (FINI_FUNCTION, 13) /* Address of termination function */ \
+ _ (SONAME, 14) /* Name of shared object */ \
+ _ (RPATH, 15) /* Library search path (deprecated) */ \
+ _ (SYMBOLIC, 16) /* Start symbol search here */ \
+ _ (REL, 17) /* Address of Rel relocs */ \
+ _ (RELSZ, 18) /* Total size of Rel relocs */ \
+ _ (RELENT, 19) /* Size of one Rel reloc */ \
+ _ (PLT_RELOCATION_TYPE, 20) /* Type of reloc in PLT */ \
+ _ (DEBUG, 21) /* For debugging; unspecified */ \
+ _ (TEXTREL, 22) /* Reloc might modify .text */ \
+ _ (PLT_RELOCATION_ADDRESS, 23) /* Address of PLT relocs */ \
+ _ (BIND_NOW, 24) /* Process relocations of object */ \
+ _ (INIT_ARRAY, 25) /* Array with addresses of init fct */ \
+ _ (FINI_ARRAY, 26) /* Array with addresses of fini fct */ \
+ _ (INIT_ARRAYSZ, 27) /* Size in bytes of DT_INIT_ARRAY */ \
+ _ (FINI_ARRAYSZ, 28) /* Size in bytes of DT_FINI_ARRAY */ \
+ _ (RUN_PATH, 29) /* Library search path */ \
+ _ (FLAGS, 30) /* Flags for object being loaded */ \
+ _ (ENCODING, 31) /* Start of encoded range */ \
+ _ (PREINIT_ARRAY, 32) /* Array with addresses of fns */ \
+ _ (PREINIT_ARRAY_SIZE, 33) /* Size of PREINIT_ARRAY in bytes. */ \
+ _ (GNU_PRELINKED, 0x6ffffdf5) /* Prelinking timestamp */ \
+ _ (GNU_CONFLICTSZ, 0x6ffffdf6) /* Size of conflict section */ \
+ _ (GNU_LIBLISTSZ, 0x6ffffdf7) /* Size of library list */ \
+ _ (CHECKSUM, 0x6ffffdf8) \
+ _ (PLTPADSZ, 0x6ffffdf9) \
+ _ (MOVEENT, 0x6ffffdfa) \
+ _ (MOVESZ, 0x6ffffdfb) \
+ _ (FEATURE_1, 0x6ffffdfc) /* Feature selection (DTF_*). */ \
+ _ (POSFLAG_1, 0x6ffffdfd) /* Flags for following entries. */ \
+ _ (SYMINSZ, 0x6ffffdfe) /* Size of syminfo table (in bytes) */ \
+ _ (SYMINENT, 0x6ffffdff) /* Entry size of syminfo */ \
+ _ (GNU_HASH, 0x6ffffef5) \
+ _ (GNU_CONFLICT, 0x6ffffef8) /* Start of conflict section */ \
+ _ (GNU_LIBLIST, 0x6ffffef9) /* Library list */ \
+ _ (CONFIG, 0x6ffffefa) /* Configuration information. */ \
+ _ (DEPAUDIT, 0x6ffffefb) /* Dependency auditing. */ \
+ _ (AUDIT, 0x6ffffefc) /* Object auditing. */ \
+ _ (PLTPAD, 0x6ffffefd) /* PLT padding. */ \
+ _ (MOVETAB, 0x6ffffefe) /* Move table. */ \
+ _ (SYMINFO, 0x6ffffeff) /* Syminfo table. */ \
+ _ (VERSYM, 0x6ffffff0) \
+ _ (RELACOUNT, 0x6ffffff9) \
+ _ (RELCOUNT, 0x6ffffffa) \
+ _ (FLAGS_1, 0x6ffffffb) /* State flags, see DF_1_* below. */ \
+ _ (VERSION_DEF, 0x6ffffffc) /* Address of version definition table */ \
+ _ (VERSION_DEF_COUNT, 0x6ffffffd) /* Number of version definitions */ \
+ _ (VERSION_NEED, 0x6ffffffe) /* Address of table with needed versions */ \
+ _ (VERSION_NEED_COUNT, 0x6fffffff) /* Number of needed versions */ \
+ _ (AUXILIARY, 0x7ffffffd) /* Shared object to load before self */ \
+ _ (FILTER, 0x7fffffff) /* Shared object to get values from */
+
+typedef enum
+{
+#define _(f,n) ELF_DYNAMIC_ENTRY_##f = (n),
+ foreach_elf_dynamic_entry_type
+#undef _
+} elf_dynamic_entry_type_t;
+
+/* Values of `d_un.d_val' in the DT_FLAGS entry. */
+#define ELF_DYNAMIC_FLAGS_ORIGIN (1 << 0) /* Object may use DF_ORIGIN */
+#define ELF_DYNAMIC_FLAGS_SYMBOLIC (1 << 1) /* Symbol resolutions starts here */
+#define ELF_DYNAMIC_FLAGS_TEXT_RELOCATIONS (1 << 2) /* Object contains text relocations */
+#define ELF_DYNAMIC_FLAGS_BIND_NOW (1 << 3) /* No lazy binding for this object */
+#define ELF_DYNAMIC_FLAGS_STATIC_TLS (1 << 4) /* Module uses the static TLS model */
+
+/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1
+ entry in the dynamic section. */
+#define DF_1_NOW 0x00000001 /* Set RTLD_NOW for this object. */
+#define DF_1_GLOBAL 0x00000002 /* Set RTLD_GLOBAL for this object. */
+#define DF_1_GROUP 0x00000004 /* Set RTLD_GROUP for this object. */
+#define DF_1_NODELETE 0x00000008 /* Set RTLD_NODELETE for this object. */
+#define DF_1_LOADFLTR 0x00000010 /* Trigger filtee loading at runtime. */
+#define DF_1_INITFIRST 0x00000020 /* Set RTLD_INITFIRST for this object */
+#define DF_1_NOOPEN 0x00000040 /* Set RTLD_NOOPEN for this object. */
+#define DF_1_ORIGIN 0x00000080 /* $ORIGIN must be handled. */
+#define DF_1_DIRECT 0x00000100 /* Direct binding enabled. */
+#define DF_1_TRANS 0x00000200
+#define DF_1_INTERPOSE 0x00000400 /* Object is used to interpose. */
+#define DF_1_NODEFLIB 0x00000800 /* Ignore default lib search path. */
+#define DF_1_NODUMP 0x00001000 /* Object can't be dldump'ed. */
+#define DF_1_CONFALT 0x00002000 /* Configuration alternative created. */
+#define DF_1_ENDFILTEE 0x00004000 /* Filtee terminates filters search. */
+#define DF_1_DISPRELDNE 0x00008000 /* Disp reloc applied at build time. */
+#define DF_1_DISPRELPND 0x00010000 /* Disp reloc applied at run-time. */
+
+/* Flags for the feature selection in DT_FEATURE_1. */
+#define DTF_1_PARINIT 0x00000001
+#define DTF_1_CONFEXP 0x00000002
+
+/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. */
+#define DF_P1_LAZYLOAD 0x00000001 /* Lazyload following object. */
+#define DF_P1_GROUPPERM 0x00000002 /* Symbols from next object are not
+ generally available. */
+
+/* Version definition sections. */
+typedef struct
+{
+ u16 version;
+ u16 flags;
+ u16 index;
+ u16 aux_count;
+ u32 name_hash;
+ u32 aux_byte_offset;
+ u32 byte_offset_next_version_definition;
+} elf_dynamic_version_definition_t;
+
+typedef struct
+{
+ u32 name;
+ u32 next_offset; /* byte offset of ver def aux next entry */
+} elf_dynamic_version_definition_aux_t;
+
+/* Version definition flags. */
+#define ELF_DYNAMIC_VERSION_FILE (1 << 0) /* Version definition of file itself */
+#define ELF_DYNAMIC_VERSION_WEAK (1 << 1) /* Weak version identifier */
+
+/* Version symbol index. */
+#define ELF_DYNAMIC_VERSYM_LOCAL 0 /* Symbol is local. */
+#define ELF_DYNAMIC_VERSYM_GLOBAL 1 /* Symbol is global. */
+#define ELF_DYNAMIC_VERSYM_RESERVED_LO 0xff00 /* Beginning of reserved entries. */
+#define ELF_DYNAMIC_VERSYM_ELIMINATE 0xff01 /* Symbol is to be eliminated. */
+
+/* Version dependency section. */
+#define foreach_elf_dynamic_version_need_field \
+ _ (u16, version) \
+ _ (u16, aux_count) \
+ _ (u32, file_name_offset) \
+ _ (u32, first_aux_offset) \
+ _ (u32, next_offset)
+
+#define foreach_elf_dynamic_version_need_aux_field \
+ _ (u32, hash) \
+ _ (u16, flags) \
+ _ (u16, versym_index) \
+ _ (u32, name) \
+ _ (u32, next_offset)
+
+typedef struct
+{
+#define _(t,f) t f;
+ foreach_elf_dynamic_version_need_field
+#undef _
+} elf_dynamic_version_need_t;
+
+typedef struct
+{
+#define _(t,f) t f;
+ foreach_elf_dynamic_version_need_aux_field
+#undef _
+} elf_dynamic_version_need_aux_t;
+
+typedef union
+{
+ elf_dynamic_version_need_t need;
+ elf_dynamic_version_need_aux_t aux;
+} elf_dynamic_version_need_union_t;
+
+/* Note section contents. Each entry in the note section begins with
+ a header of a fixed form. */
+
+typedef struct
+{
+ u32 name_size;
+ u32 descriptor_size;
+ u32 type;
+} elf_note_t;
+
+/* Known names of notes. */
+
+/* Solaris entries in the note section have this name. */
+#define ELF_NOTE_SOLARIS "SUNW Solaris"
+
+/* Note entries for GNU systems have this name. */
+#define ELF_NOTE_GNU "GNU"
+
+
+/* Defined types of notes for Solaris. */
+
+/* Value of descriptor (one word) is desired pagesize for the binary. */
+#define ELF_NOTE_PAGESIZE_HINT 1
+
+
+/* Defined note types for GNU systems. */
+
+/* ABI information. The descriptor consists of words:
+ word 0: OS descriptor
+ word 1: major version of the ABI
+ word 2: minor version of the ABI
+ word 3: subminor version of the ABI
+*/
+#ifndef ELF_NOTE_ABI
+#define ELF_NOTE_ABI 1
+#endif
+
+/* Known OSes. These value can appear in word 0 of an ELF_NOTE_ABI
+ note section entry. */
+#define ELF_NOTE_OS_LINUX 0
+#define ELF_NOTE_OS_GNU 1
+#define ELF_NOTE_OS_SOLARIS2 2
+#define ELF_NOTE_OS_FREEBSD 3
+
+/* AMD x86-64 relocations. */
+#define foreach_elf_x86_64_relocation_type \
+ _ (NONE, 0) /* No reloc */ \
+ _ (DIRECT_64, 1) /* Direct 64 bit */ \
+ _ (PC_REL_I32, 2) /* PC relative 32 bit signed */ \
+ _ (GOT_REL_32, 3) /* 32 bit GOT entry */ \
+ _ (PLT_REL_32, 4) /* 32 bit PLT address */ \
+ _ (COPY, 5) /* Copy symbol at runtime */ \
+ _ (CREATE_GOT, 6) /* Create GOT entry */ \
+ _ (CREATE_PLT, 7) /* Create PLT entry */ \
+ _ (RELATIVE, 8) /* Adjust by program base */ \
+ _ (PC_REL_I32_GOT, 9) /* 32 bit PC relative offset to GOT */ \
+ _ (DIRECT_U32, 10) /* Direct 32 bit zero extended */ \
+ _ (DIRECT_I32, 11) /* Direct 32 bit sign extended */ \
+ _ (DIRECT_U16, 12) /* Direct 16 bit zero extended */ \
+ _ (PC_REL_I16, 13) /* 16 bit sign extended pc relative */ \
+ _ (DIRECT_I8, 14) /* Direct 8 bit sign extended */ \
+ _ (PC_REL_I8, 15) /* 8 bit sign extended pc relative */ \
+ _ (DTPMOD64, 16) /* ID of module containing symbol */ \
+ _ (DTPOFF64, 17) /* Offset in module's TLS block */ \
+ _ (TPOFF64, 18) /* Offset in initial TLS block */ \
+ _ (TLSGD, 19) /* 32 bit signed PC relative offset to two GOT entries for GD symbol */ \
+ _ (TLSLD, 20) /* 32 bit signed PC relative offset to two GOT entries for LD symbol */ \
+ _ (DTPOFF32, 21) /* Offset in TLS block */ \
+ _ (GOTTPOFF, 22) /* 32 bit signed PC relative offset to GOT entry for IE symbol */ \
+ _ (TPOFF32, 23) /* Offset in initial TLS, block) */
+
+typedef struct
+{
+ elf64_symbol_t *symbols;
+
+ u32 section_index;
+
+ u8 *string_table;
+
+ uword *symbol_by_name;
+} elf_symbol_table_t;
+
+always_inline void
+elf_symbol_table_free (elf_symbol_table_t * s)
+{
+ vec_free (s->symbols);
+ hash_free (s->symbol_by_name);
+}
+
+always_inline u8 *
+elf_symbol_name (elf_symbol_table_t * t, elf64_symbol_t * sym)
+{
+ return vec_elt_at_index (t->string_table, sym->name);
+}
+
+typedef struct
+{
+ elf_relocation_with_addend_t *relocations;
+
+ u32 section_index;
+} elf_relocation_table_t;
+
+always_inline void
+elf_relocation_table_free (elf_relocation_table_t * r)
+{
+ vec_free (r->relocations);
+}
+
+typedef struct
+{
+ elf64_section_header_t header;
+
+ u32 index;
+
+ /* Index of segments containing this section. */
+ uword *segment_index_bitmap;
+
+ /* Aligned size (included padding not included in
+ header.file_size). */
+ u64 align_size;
+
+ i64 exec_address_change;
+
+ u8 *contents;
+} elf_section_t;
+
+typedef struct
+{
+ elf64_segment_header_t header;
+
+ /* Sections contained in this segment. */
+ uword *section_index_bitmap;
+
+ u32 index;
+
+ u8 *contents;
+} elf_segment_t;
+
+typedef struct
+{
+ u8 need_byte_swap;
+
+ u8 parsed_symbols;
+
+ char *file_name;
+
+ elf_first_header_t first_header;
+
+ elf64_file_header_t file_header;
+
+ elf_segment_t *segments;
+
+ elf_section_t *sections;
+
+ uword *section_by_name;
+ uword *section_by_start_address;
+
+ elf_symbol_table_t *symbol_tables;
+ elf_relocation_table_t *relocation_tables;
+
+ char *interpreter;
+
+ elf64_dynamic_entry_t *dynamic_entries;
+ u8 *dynamic_string_table;
+ u32 dynamic_string_table_section_index;
+ u32 dynamic_symbol_table_section_index;
+ u32 dynamic_symbol_table_index;
+ u32 dynamic_section_index;
+ u16 *versym;
+ u32 versym_section_index;
+ elf_dynamic_version_need_union_t *verneed;
+ u32 verneed_section_index;
+} elf_main_t;
+
+always_inline void
+elf_main_init (elf_main_t * em)
+{
+ memset (em, 0, sizeof (em[0]));
+}
+
+always_inline void
+elf_main_free (elf_main_t * em)
+{
+ uword i;
+
+ for (i = 0; i < vec_len (em->segments); i++)
+ vec_free (em->segments[i].contents);
+ vec_free (em->segments);
+
+ for (i = 0; i < vec_len (em->sections); i++)
+ vec_free (em->sections[i].contents);
+ vec_free (em->sections);
+
+ hash_free (em->section_by_name);
+ for (i = 0; i < vec_len (em->symbol_tables); i++)
+ elf_symbol_table_free (em->symbol_tables + i);
+ for (i = 0; i < vec_len (em->relocation_tables); i++)
+ elf_relocation_table_free (em->relocation_tables + i);
+
+ vec_free (em->dynamic_entries);
+ vec_free (em->interpreter);
+}
+
+always_inline void
+elf_get_segment_contents (elf_main_t * em, void *data, uword segment_index)
+{
+ elf_segment_t *g = vec_elt_at_index (em->segments, segment_index);
+ if (!g->contents)
+ vec_add (g->contents, data + g->header.file_offset,
+ g->header.memory_size);
+}
+
+always_inline void *
+elf_get_section_contents (elf_main_t * em,
+ uword section_index, uword elt_size)
+{
+ elf_section_t *s;
+ void *result;
+
+ s = vec_elt_at_index (em->sections, section_index);
+
+ result = 0;
+ if (vec_len (s->contents) > 0)
+ {
+ /* Make vector copy of contents with given element size. */
+ result = _vec_resize (result,
+ vec_len (s->contents) / elt_size,
+ vec_len (s->contents),
+ /* header_bytes */ 0,
+ /* align */ 0);
+ clib_memcpy (result, s->contents, vec_len (s->contents));
+ }
+
+ return result;
+}
+
+always_inline void
+elf_set_section_contents (elf_main_t * em,
+ uword section_index,
+ void *new_contents, uword n_content_bytes)
+{
+ elf_section_t *s;
+
+ s = vec_elt_at_index (em->sections, section_index);
+ vec_free (s->contents);
+ vec_add (s->contents, new_contents, n_content_bytes);
+}
+
+always_inline u8 *
+elf_section_name (elf_main_t * em, elf_section_t * s)
+{
+ elf_section_t *es = vec_elt_at_index (em->sections,
+ em->
+ file_header.section_header_string_table_index);
+ return vec_elt_at_index (es->contents, s->header.name);
+}
+
+always_inline u8
+elf_swap_u8 (elf_main_t * em, u8 x)
+{
+ return x;
+}
+
+always_inline u16
+elf_swap_u16 (elf_main_t * em, u16 x)
+{
+ return em->need_byte_swap ? clib_byte_swap_u16 (x) : x;
+}
+
+always_inline u32
+elf_swap_u32 (elf_main_t * em, u32 x)
+{
+ return em->need_byte_swap ? clib_byte_swap_u32 (x) : x;
+}
+
+always_inline u64
+elf_swap_u64 (elf_main_t * em, u64 x)
+{
+ return em->need_byte_swap ? clib_byte_swap_u64 (x) : x;
+}
+
+#define FORMAT_ELF_MAIN_SYMBOLS (1 << 0)
+#define FORMAT_ELF_MAIN_RELOCATIONS (1 << 1)
+#define FORMAT_ELF_MAIN_DYNAMIC (1 << 2)
+
+format_function_t format_elf_main;
+format_function_t format_elf_symbol;
+
+clib_error_t *elf_read_file (elf_main_t * em, char *file_name);
+clib_error_t *elf_write_file (elf_main_t * em, char *file_name);
+clib_error_t *elf_delete_named_section (elf_main_t * em, char *section_name);
+clib_error_t *elf_parse (elf_main_t * em, void *data, uword data_bytes);
+void elf_parse_symbols (elf_main_t * em);
+
+clib_error_t *elf_get_section_by_name (elf_main_t * em, char *section_name,
+ elf_section_t ** result);
+clib_error_t *elf_get_section_by_start_address (elf_main_t * em,
+ uword start_address,
+ elf_section_t ** result);
+
+void
+elf_create_section_with_contents (elf_main_t * em,
+ char *section_name,
+ elf64_section_header_t * header,
+ void *contents, uword n_content_bytes);
+uword elf_delete_segment_with_type (elf_main_t * em,
+ elf_segment_type_t segment_type);
+void elf_set_dynamic_entries (elf_main_t * em);
+
+#endif /* included_clib_elf_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elf_clib.c b/src/vppinfra/elf_clib.c
new file mode 100644
index 00000000..7bb72ee3
--- /dev/null
+++ b/src/vppinfra/elf_clib.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/elf_clib.h>
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+typedef struct
+{
+ char **path;
+} path_search_t;
+
+always_inline void
+path_search_free (path_search_t * p)
+{
+ uword i;
+ for (i = 0; i < vec_len (p->path); i++)
+ vec_free (p->path[i]);
+ vec_free (p->path);
+}
+
+static char **
+split_string (char *string, u8 delimiter)
+{
+ char **result = 0;
+ char *p, *start, *s;
+
+ p = string;
+ while (1)
+ {
+ start = p;
+ while (*p != 0 && *p != delimiter)
+ p++;
+ s = 0;
+ vec_add (s, start, p - start);
+ vec_add1 (s, 0);
+ vec_add1 (result, s);
+ if (*p == 0)
+ break;
+ p++;
+ }
+
+ return result;
+}
+
+static int
+file_exists_and_is_executable (char *dir, char *file)
+{
+ char *path = (char *) format (0, "%s/%s%c", dir, file, 0);
+ struct stat s;
+ uword yes;
+
+ yes = (stat (path, &s) >= 0
+ && S_ISREG (s.st_mode)
+ && 0 != (s.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)));
+
+ vec_free (path);
+
+ return yes;
+}
+
+static char *
+path_search (char *file)
+{
+ path_search_t ps;
+ uword i;
+ char *result;
+
+ /* Relative or absolute path. */
+ if (file[0] == '.' || file[0] == '/')
+ return file;
+
+ if (getenv ("PATH") == 0)
+ return file;
+
+ ps.path = split_string (getenv ("PATH"), ':');
+
+ for (i = 0; i < vec_len (ps.path); i++)
+ if (file_exists_and_is_executable (ps.path[i], file))
+ break;
+
+ result = 0;
+ if (i < vec_len (ps.path))
+ result = (char *) format (0, "%s/%s%c", ps.path[i], file);
+
+ path_search_free (&ps);
+
+ return result;
+}
+
+static clib_error_t *
+clib_elf_parse_file (clib_elf_main_t * cem,
+ char *file_name, void *link_address)
+{
+ elf_main_t *em;
+ elf_section_t *s;
+ int fd;
+ struct stat fd_stat;
+ uword mmap_length = 0;
+ void *data = 0;
+ clib_error_t *error = 0;
+
+ vec_add2 (cem->elf_mains, em, 1);
+
+ fd = open (file_name, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &fd_stat) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+ mmap_length = fd_stat.st_size;
+
+ data = mmap (0, mmap_length, PROT_READ, MAP_SHARED, fd, /* offset */ 0);
+ if (~pointer_to_uword (data) == 0)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+ error = elf_parse (em, data, mmap_length);
+ if (error)
+ goto done;
+
+ /* Look for CLIB special sections. */
+ {
+ char *section_name_start = CLIB_ELF_SECTION_ADD_PREFIX ();
+ uword section_name_start_len = strlen (section_name_start);
+
+ vec_foreach (s, em->sections)
+ {
+ u8 *name = elf_section_name (em, s);
+ uword *p;
+ clib_elf_section_t *vs;
+ clib_elf_section_bounds_t *b;
+
+ /* Section name must begin with CLIB_ELF_SECTION key. */
+ if (memcmp (name, section_name_start, section_name_start_len))
+ continue;
+
+ name += section_name_start_len;
+ p = hash_get_mem (cem->section_by_name, name);
+ if (p)
+ vs = vec_elt_at_index (cem->sections, p[0]);
+ else
+ {
+ name = format (0, "%s%c", name, 0);
+ if (!cem->section_by_name)
+ cem->section_by_name = hash_create_string (0, sizeof (uword));
+ hash_set_mem (cem->section_by_name, name, vec_len (cem->sections));
+ vec_add2 (cem->sections, vs, 1);
+ vs->name = name;
+ }
+
+ vec_add2 (vs->bounds, b, 1);
+ b->lo = link_address + s->header.exec_address;
+ b->hi = b->lo + s->header.file_size;
+ }
+ }
+
+ /* Parse symbols for this file. */
+ {
+ elf_symbol_table_t *t;
+ elf64_symbol_t *s;
+
+ elf_parse_symbols (em);
+ vec_foreach (t, em->symbol_tables)
+ {
+ vec_foreach (s, t->symbols)
+ {
+ s->value += pointer_to_uword (link_address);
+ }
+ }
+ }
+
+ /* No need to keep section contents around. */
+ {
+ elf_section_t *s;
+ vec_foreach (s, em->sections)
+ {
+ if (s->header.type != ELF_SECTION_STRING_TABLE)
+ vec_free (s->contents);
+ }
+ }
+
+done:
+ if (error)
+ elf_main_free (em);
+ if (fd >= 0)
+ close (fd);
+ if (data)
+ munmap (data, mmap_length);
+ return error;
+}
+
+#define __USE_GNU
+#include <link.h>
+
+static int
+add_section (struct dl_phdr_info *info, size_t size, void *opaque)
+{
+ clib_elf_main_t *cem = opaque;
+ clib_error_t *error;
+ char *name = (char *) info->dlpi_name;
+ void *addr = (void *) info->dlpi_addr;
+ uword is_main;
+
+ is_main = strlen (name) == 0;
+ if (is_main)
+ {
+ static int done;
+
+ /* Only do main program once. */
+ if (done++)
+ return 0;
+
+ name = path_search (cem->exec_path);
+ if (!name)
+ {
+ clib_error ("failed to find %s on PATH", cem->exec_path);
+ return 0;
+ }
+ addr = 0;
+ }
+
+ error = clib_elf_parse_file (cem, name, addr);
+ if (error)
+ clib_error_report (error);
+
+ if (is_main && name != cem->exec_path)
+ vec_free (name);
+
+ return 0;
+}
+
+static clib_elf_main_t clib_elf_main;
+
+void
+clib_elf_main_init (char *exec_path)
+{
+ clib_elf_main_t *cem = &clib_elf_main;
+
+ cem->exec_path = exec_path;
+
+ dl_iterate_phdr (add_section, cem);
+}
+
+clib_elf_section_bounds_t *
+clib_elf_get_section_bounds (char *name)
+{
+ clib_elf_main_t *em = &clib_elf_main;
+ uword *p = hash_get (em->section_by_name, name);
+ return p ? vec_elt_at_index (em->sections, p[0])->bounds : 0;
+}
+
+static uword
+symbol_by_address_or_name (char *by_name,
+ uword by_address, clib_elf_symbol_t * s)
+{
+ clib_elf_main_t *cem = &clib_elf_main;
+ elf_main_t *em;
+
+ vec_foreach (em, cem->elf_mains)
+ {
+ elf_symbol_table_t *t;
+ s->elf_main_index = em - cem->elf_mains;
+ vec_foreach (t, em->symbol_tables)
+ {
+ s->symbol_table_index = t - em->symbol_tables;
+ if (by_name)
+ {
+ uword *p = hash_get (t->symbol_by_name, by_name);
+ if (p)
+ {
+ s->symbol = vec_elt (t->symbols, p[0]);
+ return 1;
+ }
+ }
+ else
+ {
+ elf64_symbol_t *x;
+ /* FIXME linear search. */
+ vec_foreach (x, t->symbols)
+ {
+ if (by_address >= x->value && by_address < x->value + x->size)
+ {
+ s->symbol = x[0];
+ return 1;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+uword
+clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t * s)
+{
+ return symbol_by_address_or_name (by_name, /* by_address */ 0, s);
+}
+
+uword
+clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t * s)
+{
+ return symbol_by_address_or_name ( /* by_name */ 0, by_address, s);
+}
+
+u8 *
+format_clib_elf_symbol (u8 * s, va_list * args)
+{
+ clib_elf_main_t *cem = &clib_elf_main;
+ clib_elf_symbol_t *sym = va_arg (*args, clib_elf_symbol_t *);
+ elf_main_t *em;
+ elf_symbol_table_t *t;
+
+ if (!sym)
+ /* Just print table headings. */
+ return format (s, "%U", format_elf_symbol, 0, 0, 0);
+
+ else
+ {
+ em = vec_elt_at_index (cem->elf_mains, sym->elf_main_index);
+ t = vec_elt_at_index (em->symbol_tables, sym->symbol_table_index);
+ return format (s, "%U", format_elf_symbol, em, t, &sym->symbol);
+ }
+}
+
+u8 *
+format_clib_elf_symbol_with_address (u8 * s, va_list * args)
+{
+ uword address = va_arg (*args, uword);
+ clib_elf_main_t *cem = &clib_elf_main;
+ clib_elf_symbol_t sym;
+ elf_main_t *em;
+ elf_symbol_table_t *t;
+
+ if (clib_elf_symbol_by_address (address, &sym))
+ {
+ em = vec_elt_at_index (cem->elf_mains, sym.elf_main_index);
+ t = vec_elt_at_index (em->symbol_tables, sym.symbol_table_index);
+ s = format (s, "%s + 0x%wx",
+ elf_symbol_name (t, &sym.symbol),
+ address - sym.symbol.value);
+ }
+ else
+ s = format (s, "0x%wx", address);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elf_clib.h b/src/vppinfra/elf_clib.h
new file mode 100644
index 00000000..25b928c2
--- /dev/null
+++ b/src/vppinfra/elf_clib.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2012 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_elf_self_h
+#define included_clib_elf_self_h
+
+#include <vppinfra/elf.h>
+#include <vppinfra/hash.h>
+
+#define CLIB_ELF_SECTION_DATA_ALIGN 32
+
+#define CLIB_ELF_SECTION_ADD_PREFIX(n) "clib_elf_section_" n
+
+/* Attribute used is so that static registrations work even if
+ variable is not referenced. */
+#define CLIB_ELF_SECTION(SECTION) \
+ __attribute__ ((used, \
+ aligned (CLIB_ELF_SECTION_DATA_ALIGN), \
+ section (CLIB_ELF_SECTION_ADD_PREFIX (SECTION))))
+
+/* Given pointer to previous data A get next pointer. EXTRA gives extra
+ space beyond A + 1 used in object. */
+#define clib_elf_section_data_next(a,extra) \
+ uword_to_pointer (round_pow2 (pointer_to_uword (a + 1) + (extra), \
+ CLIB_ELF_SECTION_DATA_ALIGN), \
+ void *)
+
+typedef struct
+{
+ void *lo, *hi;
+} clib_elf_section_bounds_t;
+
+typedef struct
+{
+ /* Vector of bounds for this section. Multiple shared objects may have instances
+ of the same sections. */
+ clib_elf_section_bounds_t *bounds;
+
+ /* Name of ELF section (e.g. .text). */
+ u8 *name;
+} clib_elf_section_t;
+
+typedef struct
+{
+ /* Vector of sections. */
+ clib_elf_section_t *sections;
+
+ /* Hash map of name to section index. */
+ uword *section_by_name;
+
+ /* Unix path that we were exec()ed with. */
+ char *exec_path;
+
+ elf_main_t *elf_mains;
+} clib_elf_main_t;
+
+always_inline void
+clib_elf_main_free (clib_elf_main_t * m)
+{
+ clib_elf_section_t *s;
+ vec_foreach (s, m->sections)
+ {
+ vec_free (s->bounds);
+ vec_free (s->name);
+ }
+ vec_free (m->sections);
+ hash_free (m->section_by_name);
+
+ {
+ elf_main_t *em;
+ vec_foreach (em, m->elf_mains)
+ {
+ elf_main_free (em);
+ }
+ vec_free (m->elf_mains);
+ }
+}
+
+/* Call with exec_path equal to argv[0] from C main. */
+void clib_elf_main_init (char *exec_path);
+
+clib_elf_section_bounds_t *clib_elf_get_section_bounds (char *name);
+
+typedef struct
+{
+ /* The symbol. */
+ elf64_symbol_t symbol;
+
+ /* elf_main_t where symbol came from. */
+ u32 elf_main_index;
+
+ /* Symbol table in elf_main_t where this symbol came from. */
+ u32 symbol_table_index;
+} clib_elf_symbol_t;
+
+/* Returns 1 if found; otherwise zero. */
+uword clib_elf_symbol_by_name (char *name, clib_elf_symbol_t * result);
+uword clib_elf_symbol_by_address (uword address, clib_elf_symbol_t * result);
+
+format_function_t format_clib_elf_symbol, format_clib_elf_symbol_with_address;
+
+#endif /* included_clib_elf_self_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elog.c b/src/vppinfra/elog.c
new file mode 100644
index 00000000..182ca127
--- /dev/null
+++ b/src/vppinfra/elog.c
@@ -0,0 +1,1113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005,2009 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/elog.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/math.h>
+
+static inline void
+elog_lock (elog_main_t * em)
+{
+ if (PREDICT_FALSE (em->lock != 0))
+ while (__sync_lock_test_and_set (em->lock, 1))
+ ;
+}
+
+static inline void
+elog_unlock (elog_main_t * em)
+{
+ if (PREDICT_FALSE (em->lock != 0))
+ {
+ CLIB_MEMORY_BARRIER ();
+ *em->lock = 0;
+ }
+}
+
+/* Non-inline version. */
+void *
+elog_event_data (elog_main_t * em,
+ elog_event_type_t * type, elog_track_t * track, u64 cpu_time)
+{
+ return elog_event_data_inline (em, type, track, cpu_time);
+}
+
+static void
+new_event_type (elog_main_t * em, uword i)
+{
+ elog_event_type_t *t = vec_elt_at_index (em->event_types, i);
+
+ if (!em->event_type_by_format)
+ em->event_type_by_format =
+ hash_create_vec ( /* size */ 0, sizeof (u8), sizeof (uword));
+
+ t->type_index_plus_one = i + 1;
+ hash_set_mem (em->event_type_by_format, t->format, i);
+}
+
+static uword
+find_or_create_type (elog_main_t * em, elog_event_type_t * t)
+{
+ uword *p = hash_get_mem (em->event_type_by_format, t->format);
+ uword i;
+
+ if (p)
+ i = p[0];
+ else
+ {
+ i = vec_len (em->event_types);
+ vec_add1 (em->event_types, t[0]);
+ new_event_type (em, i);
+ }
+
+ return i;
+}
+
+/* External function to register types. */
+word
+elog_event_type_register (elog_main_t * em, elog_event_type_t * t)
+{
+ elog_event_type_t *static_type = t;
+ word l;
+
+ elog_lock (em);
+
+ /* Multiple simultaneous registration attempts, */
+ if (t->type_index_plus_one > 0)
+ {
+ elog_unlock (em);
+ return t->type_index_plus_one - 1;
+ }
+
+ l = vec_len (em->event_types);
+
+ t->type_index_plus_one = 1 + l;
+
+ ASSERT (t->format);
+
+ /* If format args are not specified try to be smart about providing defaults
+ so most of the time user does not have to specify them. */
+ if (!t->format_args)
+ {
+ uword i, l;
+ char *this_arg;
+
+ l = strlen (t->format);
+ for (i = 0; i < l; i++)
+ {
+ if (t->format[i] != '%')
+ continue;
+ if (i + 1 >= l)
+ continue;
+ if (t->format[i + 1] == '%') /* %% */
+ continue;
+
+ switch (t->format[i + 1])
+ {
+ default:
+ case 'd':
+ case 'x':
+ case 'u':
+ this_arg = "i4"; /* size of u32 */
+ break;
+ case 'f':
+ this_arg = "f8"; /* defaults to f64 */
+ break;
+ case 's':
+ this_arg = "s0"; /* defaults to null terminated string. */
+ break;
+ }
+
+ t->format_args =
+ (char *) format ((u8 *) t->format_args, "%s", this_arg);
+ }
+
+ /* Null terminate. */
+ vec_add1 (t->format_args, 0);
+ }
+
+ vec_add1 (em->event_types, t[0]);
+
+ t = em->event_types + l;
+
+ /* Make copies of strings for hashing etc. */
+ if (t->function)
+ t->format = (char *) format (0, "%s %s%c", t->function, t->format, 0);
+ else
+ t->format = (char *) format (0, "%s%c", t->format, 0);
+
+ t->format_args = (char *) format (0, "%s%c", t->format_args, 0);
+
+ /* Construct string table. */
+ {
+ uword i;
+ t->n_enum_strings = static_type->n_enum_strings;
+ for (i = 0; i < t->n_enum_strings; i++)
+ {
+ if (!static_type->enum_strings[i])
+ static_type->enum_strings[i] = "MISSING";
+ vec_add1 (t->enum_strings_vector,
+ (char *) format (0, "%s%c", static_type->enum_strings[i],
+ 0));
+ }
+ }
+
+ new_event_type (em, l);
+ elog_unlock (em);
+
+ return l;
+}
+
+word
+elog_track_register (elog_main_t * em, elog_track_t * t)
+{
+ word l;
+
+ elog_lock (em);
+
+ l = vec_len (em->tracks);
+
+ t->track_index_plus_one = 1 + l;
+
+ ASSERT (t->name);
+
+ vec_add1 (em->tracks, t[0]);
+
+ t = em->tracks + l;
+
+ t->name = (char *) format (0, "%s%c", t->name, 0);
+
+ elog_unlock (em);
+
+ return l;
+}
+
+static uword
+parse_2digit_decimal (char *p, uword * number)
+{
+ uword i = 0;
+ u8 digits[2];
+
+ digits[0] = digits[1] = 0;
+ while (p[i] >= '0' && p[i] <= '9')
+ {
+ if (i >= 2)
+ break;
+ digits[i] = p[i] - '0';
+ i++;
+ }
+
+ if (i >= 1 && i <= 2)
+ {
+ if (i == 1)
+ *number = digits[0];
+ else
+ *number = 10 * digits[0] + digits[1];
+ return i;
+ }
+ else
+ return 0;
+}
+
+static u8 *
+fixed_format (u8 * s, char *fmt, char *result, uword * result_len)
+{
+ char *f = fmt;
+ char *percent;
+ uword l = 0;
+
+ while (1)
+ {
+ if (f[0] == 0)
+ break;
+ if (f[0] == '%' && f[1] != '%')
+ break;
+ f++;
+ }
+ if (f > fmt)
+ vec_add (s, fmt, f - fmt);
+
+ if (f[0] != '%')
+ goto done;
+
+ /* Skip percent. */
+ percent = f++;
+
+ /* Skip possible +-= justification. */
+ f += f[0] == '+' || f[0] == '-' || f[0] == '=';
+
+ /* Skip possible X.Y width. */
+ while ((f[0] >= '0' && f[0] <= '9') || f[0] == '.')
+ f++;
+
+ /* Skip wlL as in e.g. %Ld. */
+ f += f[0] == 'w' || f[0] == 'l' || f[0] == 'L';
+
+ /* Finally skip format letter. */
+ f += f[0] != 0;
+
+ ASSERT (*result_len > f - percent);
+ l = clib_min (f - percent, *result_len - 1);
+ clib_memcpy (result, percent, l);
+ result[l] = 0;
+
+done:
+ *result_len = f - fmt;
+ return s;
+}
+
+u8 *
+format_elog_event (u8 * s, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ elog_event_t *e = va_arg (*va, elog_event_t *);
+ elog_event_type_t *t;
+ char *a, *f;
+ void *d = (u8 *) e->data;
+ char arg_format[64];
+
+ t = vec_elt_at_index (em->event_types, e->type);
+
+ f = t->format;
+ a = t->format_args;
+ while (1)
+ {
+ uword n_bytes = 0, n_digits, f_bytes = 0;
+
+ f_bytes = sizeof (arg_format);
+ s = fixed_format (s, f, arg_format, &f_bytes);
+ f += f_bytes;
+
+ if (a == 0 || a[0] == 0)
+ {
+ /* Format must also be at end. */
+ ASSERT (f[0] == 0);
+ break;
+ }
+
+ /* Don't go past end of event data. */
+ ASSERT (d < (void *) (e->data + sizeof (e->data)));
+
+ n_digits = parse_2digit_decimal (a + 1, &n_bytes);
+ switch (a[0])
+ {
+ case 'i':
+ case 't':
+ case 'T':
+ {
+ u32 i = 0;
+ u64 l = 0;
+
+ if (n_bytes == 1)
+ i = ((u8 *) d)[0];
+ else if (n_bytes == 2)
+ i = clib_mem_unaligned (d, u16);
+ else if (n_bytes == 4)
+ i = clib_mem_unaligned (d, u32);
+ else if (n_bytes == 8)
+ l = clib_mem_unaligned (d, u64);
+ else
+ ASSERT (0);
+ if (a[0] == 't')
+ {
+ char *e =
+ vec_elt (t->enum_strings_vector, n_bytes == 8 ? l : i);
+ s = format (s, arg_format, e);
+ }
+ else if (a[0] == 'T')
+ {
+ char *e =
+ vec_elt_at_index (em->string_table, n_bytes == 8 ? l : i);
+ s = format (s, arg_format, e);
+ }
+ else if (n_bytes == 8)
+ s = format (s, arg_format, l);
+ else
+ s = format (s, arg_format, i);
+ }
+ break;
+
+ case 'f':
+ {
+ f64 x = 0;
+ if (n_bytes == 4)
+ x = clib_mem_unaligned (d, f32);
+ else if (n_bytes == 8)
+ x = clib_mem_unaligned (d, f64);
+ else
+ ASSERT (0);
+ s = format (s, arg_format, x);
+ }
+ break;
+
+ case 's':
+ s = format (s, arg_format, d);
+ if (n_bytes == 0)
+ n_bytes = strlen (d) + 1;
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ ASSERT (n_digits > 0 && n_digits <= 2);
+ a += 1 + n_digits;
+ d += n_bytes;
+ }
+
+ return s;
+}
+
+u8 *
+format_elog_track (u8 * s, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ elog_event_t *e = va_arg (*va, elog_event_t *);
+ elog_track_t *t = vec_elt_at_index (em->tracks, e->track);
+ return format (s, "%s", t->name);
+}
+
+void
+elog_time_now (elog_time_stamp_t * et)
+{
+ u64 cpu_time_now, os_time_now_nsec;
+ struct timespec ts;
+
+#ifdef CLIB_UNIX
+ {
+#include <sys/syscall.h>
+ syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts);
+ cpu_time_now = clib_cpu_time_now ();
+ /* Subtract 3/30/2017's worth of seconds to retain precision */
+ os_time_now_nsec = 1e9 * (ts.tv_sec - 1490885108) + ts.tv_nsec;
+ }
+#else
+ cpu_time_now = clib_cpu_time_now ();
+ os_time_now_nsec = 0;
+#endif
+
+ et->cpu = cpu_time_now;
+ et->os_nsec = os_time_now_nsec;
+}
+
+always_inline i64
+elog_time_stamp_diff_os_nsec (elog_time_stamp_t * t1, elog_time_stamp_t * t2)
+{
+ return (i64) t1->os_nsec - (i64) t2->os_nsec;
+}
+
+always_inline i64
+elog_time_stamp_diff_cpu (elog_time_stamp_t * t1, elog_time_stamp_t * t2)
+{
+ return (i64) t1->cpu - (i64) t2->cpu;
+}
+
+always_inline f64
+elog_nsec_per_clock (elog_main_t * em)
+{
+ return ((f64) elog_time_stamp_diff_os_nsec (&em->serialize_time,
+ &em->init_time)
+ / (f64) elog_time_stamp_diff_cpu (&em->serialize_time,
+ &em->init_time));
+}
+
+void
+elog_alloc (elog_main_t * em, u32 n_events)
+{
+ if (em->event_ring)
+ vec_free (em->event_ring);
+
+ /* Ring size must be a power of 2. */
+ em->event_ring_size = n_events = max_pow2 (n_events);
+
+ /* Leave an empty ievent at end so we can always speculatively write
+ and event there (possibly a long form event). */
+ vec_resize_aligned (em->event_ring, n_events, CLIB_CACHE_LINE_BYTES);
+}
+
+void
+elog_init (elog_main_t * em, u32 n_events)
+{
+ memset (em, 0, sizeof (em[0]));
+
+ em->lock = 0;
+
+ if (n_events > 0)
+ elog_alloc (em, n_events);
+
+ clib_time_init (&em->cpu_timer);
+
+ em->n_total_events_disable_limit = ~0;
+
+ /* Make track 0. */
+ em->default_track.name = "default";
+ elog_track_register (em, &em->default_track);
+
+ elog_time_now (&em->init_time);
+}
+
+/* Returns number of events in ring and start index. */
+static uword
+elog_event_range (elog_main_t * em, uword * lo)
+{
+ uword l = em->event_ring_size;
+ u64 i = em->n_total_events;
+
+ /* Ring never wrapped? */
+ if (i <= (u64) l)
+ {
+ if (lo)
+ *lo = 0;
+ return i;
+ }
+ else
+ {
+ if (lo)
+ *lo = i & (l - 1);
+ return l;
+ }
+}
+
+elog_event_t *
+elog_peek_events (elog_main_t * em)
+{
+ elog_event_t *e, *f, *es = 0;
+ uword i, j, n;
+
+ n = elog_event_range (em, &j);
+ for (i = 0; i < n; i++)
+ {
+ vec_add2 (es, e, 1);
+ f = vec_elt_at_index (em->event_ring, j);
+ e[0] = f[0];
+
+ /* Convert absolute time from cycles to seconds from start. */
+ e->time =
+ (e->time_cycles -
+ em->init_time.cpu) * em->cpu_timer.seconds_per_clock;
+
+ j = (j + 1) & (em->event_ring_size - 1);
+ }
+
+ return es;
+}
+
+/* Add a formatted string to the string table. */
+u32
+elog_string (elog_main_t * em, char *fmt, ...)
+{
+ u32 offset;
+ va_list va;
+
+ va_start (va, fmt);
+ offset = vec_len (em->string_table);
+ em->string_table = (char *) va_format ((u8 *) em->string_table, fmt, &va);
+ va_end (va);
+
+ /* Null terminate string if it is not already. */
+ if (vec_end (em->string_table)[-1] != 0)
+ vec_add1 (em->string_table, 0);
+
+ return offset;
+}
+
+elog_event_t *
+elog_get_events (elog_main_t * em)
+{
+ if (!em->events)
+ em->events = elog_peek_events (em);
+ return em->events;
+}
+
+static void
+maybe_fix_string_table_offset (elog_event_t * e,
+ elog_event_type_t * t, u32 offset)
+{
+ void *d = (u8 *) e->data;
+ char *a;
+
+ if (offset == 0)
+ return;
+
+ a = t->format_args;
+
+ while (1)
+ {
+ uword n_bytes = 0, n_digits;
+
+ if (a[0] == 0)
+ break;
+
+ /* Don't go past end of event data. */
+ ASSERT (d < (void *) (e->data + sizeof (e->data)));
+
+ n_digits = parse_2digit_decimal (a + 1, &n_bytes);
+ switch (a[0])
+ {
+ case 'T':
+ ASSERT (n_bytes == 4);
+ clib_mem_unaligned (d, u32) += offset;
+ break;
+
+ case 'i':
+ case 't':
+ case 'f':
+ case 's':
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ ASSERT (n_digits > 0 && n_digits <= 2);
+ a += 1 + n_digits;
+ d += n_bytes;
+ }
+}
+
+static int
+elog_cmp (void *a1, void *a2)
+{
+ elog_event_t *e1 = a1;
+ elog_event_t *e2 = a2;
+
+ if (e1->time < e2->time)
+ return -1;
+
+ if (e1->time > e2->time)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * merge two event logs. Complicated and cranky.
+ */
+void
+elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag,
+ f64 align_tweak)
+{
+ elog_event_t *e;
+ uword l;
+ u32 string_table_offset_for_src_events;
+ u32 track_offset_for_src_tracks;
+ elog_track_t newt;
+ int i;
+
+ memset (&newt, 0, sizeof (newt));
+
+ /* Acquire src and dst events */
+ elog_get_events (src);
+ elog_get_events (dst);
+
+ string_table_offset_for_src_events = vec_len (dst->string_table);
+ vec_append (dst->string_table, src->string_table);
+
+ l = vec_len (dst->events);
+ vec_append (dst->events, src->events);
+
+ /* Prepend the supplied tag (if any) to all dst track names */
+ if (dst_tag)
+ {
+ for (i = 0; i < vec_len (dst->tracks); i++)
+ {
+ elog_track_t *t = vec_elt_at_index (dst->tracks, i);
+ char *new_name;
+
+ new_name = (char *) format (0, "%s:%s%c", dst_tag, t->name, 0);
+ vec_free (t->name);
+ t->name = new_name;
+ }
+ }
+
+ /*
+ * Remember where we started allocating new tracks while merging
+ */
+ track_offset_for_src_tracks = vec_len (dst->tracks);
+
+ /* Copy / tag source tracks */
+ for (i = 0; i < vec_len (src->tracks); i++)
+ {
+ elog_track_t *t = vec_elt_at_index (src->tracks, i);
+ if (src_tag)
+ newt.name = (char *) format (0, "%s:%s%c", src_tag, t->name, 0);
+ else
+ newt.name = (char *) format (0, "%s%c", t->name, 0);
+ (void) elog_track_register (dst, &newt);
+ vec_free (newt.name);
+ }
+
+ /* Across all (copied) src events... */
+ for (e = dst->events + l; e < vec_end (dst->events); e++)
+ {
+ elog_event_type_t *t = vec_elt_at_index (src->event_types, e->type);
+
+ /* Remap type from src -> dst. */
+ e->type = find_or_create_type (dst, t);
+
+ /* Remap string table offsets for 'T' format args */
+ maybe_fix_string_table_offset (e, t,
+ string_table_offset_for_src_events);
+
+ /* Remap track */
+ e->track += track_offset_for_src_tracks;
+ }
+
+ /* Adjust event times for relative starting times of event streams. */
+ {
+ f64 dt_event, dt_os_nsec, dt_clock_nsec;
+
+ /* Set clock parameters if dst was not generated by unserialize. */
+ if (dst->serialize_time.cpu == 0)
+ {
+ dst->init_time = src->init_time;
+ dst->serialize_time = src->serialize_time;
+ dst->nsec_per_cpu_clock = src->nsec_per_cpu_clock;
+ }
+
+ dt_os_nsec =
+ elog_time_stamp_diff_os_nsec (&src->init_time, &dst->init_time);
+
+ dt_event = dt_os_nsec;
+ dt_clock_nsec =
+ (elog_time_stamp_diff_cpu (&src->init_time, &dst->init_time) * .5 *
+ (dst->nsec_per_cpu_clock + src->nsec_per_cpu_clock));
+
+ /*
+ * Heuristic to see if src/dst came from same time source.
+ * If frequencies are "the same" and os clock and cpu clock agree
+ * to within 100e-9 secs about time difference between src/dst
+ * init_time, then we use cpu clock. Otherwise we use OS clock.
+ *
+ * When merging event logs from different systems, time paradoxes
+ * at the O(1ms) level are to be expected. Hence, the "align_tweak"
+ * parameter. If two events logged on different processors are known
+ * to occur in a specific order - and with a reasonably-estimated
+ * interval - supply a non-zero "align_tweak" parameter
+ */
+ if (fabs (src->nsec_per_cpu_clock - dst->nsec_per_cpu_clock) < 1e-2
+ && fabs (dt_os_nsec - dt_clock_nsec) < 100)
+ dt_event = dt_clock_nsec;
+
+ /* Convert to seconds. */
+ dt_event *= 1e-9;
+
+ /*
+ * Move the earlier set of events later, to avoid creating
+ * events which preceed the Big Bang (aka have negative timestamps).
+ *
+ * Not to any scale, we have something like the following picture:
+ *
+ * DST capture start point
+ * ^
+ * +--- dt_event --+
+ * v
+ * SRC capture start point
+ *
+ * In this case dt_event is positive, src started after dst,
+ * to put src events onto a common timebase we have to move them
+ * forward in time. Naturally, the opposite case is
+ * possible, too: dt_event will be negative, and so we have to
+ * move dst events forward in time by the |dt_event|.
+ * In both cases, we add align_tweak.
+ */
+ if (dt_event > 0)
+ {
+ /* Src started after dst. */
+ for (e = dst->events + l; e < vec_end (dst->events); e++)
+ e->time += dt_event + align_tweak;
+ }
+ else
+ {
+ /* Dst started after src. */
+ dt_event = -dt_event;
+ for (e = dst->events + 0; e < dst->events + l; e++)
+ e->time += dt_event + align_tweak;
+ }
+ }
+
+ /* Sort events by increasing time. */
+ vec_sort_with_function (dst->events, elog_cmp);
+
+ dst->n_total_events = vec_len (dst->events);
+
+ /* Recreate the event ring or the results won't serialize */
+ {
+ int i;
+
+ ASSERT (dst->cpu_timer.seconds_per_clock);
+
+ elog_alloc (dst, vec_len (dst->events));
+ for (i = 0; i < vec_len (dst->events); i++)
+ {
+ elog_event_t *es, *ed;
+
+ es = dst->events + i;
+ ed = dst->event_ring + i;
+
+ ed[0] = es[0];
+ }
+ }
+}
+
+static void
+serialize_elog_event (serialize_main_t * m, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ elog_event_t *e = va_arg (*va, elog_event_t *);
+ elog_event_type_t *t = vec_elt_at_index (em->event_types, e->type);
+ u8 *d = e->data;
+ u8 *p = (u8 *) t->format_args;
+
+ serialize_integer (m, e->type, sizeof (e->type));
+ serialize_integer (m, e->track, sizeof (e->track));
+ serialize (m, serialize_f64, e->time);
+
+ while (*p)
+ {
+ uword n_digits, n_bytes = 0;
+
+ n_digits = parse_2digit_decimal ((char *) p + 1, &n_bytes);
+
+ switch (p[0])
+ {
+ case 'i':
+ case 't':
+ case 'T':
+ if (n_bytes == 1)
+ serialize_integer (m, d[0], sizeof (u8));
+ else if (n_bytes == 2)
+ serialize_integer (m, clib_mem_unaligned (d, u16), sizeof (u16));
+ else if (n_bytes == 4)
+ serialize_integer (m, clib_mem_unaligned (d, u32), sizeof (u32));
+ else if (n_bytes == 8)
+ serialize (m, serialize_64, clib_mem_unaligned (d, u64));
+ else
+ ASSERT (0);
+ break;
+
+ case 's':
+ serialize_cstring (m, (char *) d);
+ if (n_bytes == 0)
+ n_bytes = strlen ((char *) d) + 1;
+ break;
+
+ case 'f':
+ if (n_bytes == 4)
+ serialize (m, serialize_f32, clib_mem_unaligned (d, f32));
+ else if (n_bytes == 8)
+ serialize (m, serialize_f64, clib_mem_unaligned (d, f64));
+ else
+ ASSERT (0);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ p += 1 + n_digits;
+ d += n_bytes;
+ }
+}
+
+static void
+unserialize_elog_event (serialize_main_t * m, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ elog_event_t *e = va_arg (*va, elog_event_t *);
+ elog_event_type_t *t;
+ u8 *p, *d;
+
+ {
+ u16 tmp[2];
+
+ unserialize_integer (m, &tmp[0], sizeof (e->type));
+ unserialize_integer (m, &tmp[1], sizeof (e->track));
+
+ e->type = tmp[0];
+ e->track = tmp[1];
+
+ /* Make sure it fits. */
+ ASSERT (e->type == tmp[0]);
+ ASSERT (e->track == tmp[1]);
+ }
+
+ t = vec_elt_at_index (em->event_types, e->type);
+
+ unserialize (m, unserialize_f64, &e->time);
+
+ d = e->data;
+ p = (u8 *) t->format_args;
+
+ while (p && *p)
+ {
+ uword n_digits, n_bytes = 0;
+ u32 tmp;
+
+ n_digits = parse_2digit_decimal ((char *) p + 1, &n_bytes);
+
+ switch (p[0])
+ {
+ case 'i':
+ case 't':
+ case 'T':
+ if (n_bytes == 1)
+ {
+ unserialize_integer (m, &tmp, sizeof (u8));
+ d[0] = tmp;
+ }
+ else if (n_bytes == 2)
+ {
+ unserialize_integer (m, &tmp, sizeof (u16));
+ clib_mem_unaligned (d, u16) = tmp;
+ }
+ else if (n_bytes == 4)
+ {
+ unserialize_integer (m, &tmp, sizeof (u32));
+ clib_mem_unaligned (d, u32) = tmp;
+ }
+ else if (n_bytes == 8)
+ {
+ u64 x;
+ unserialize (m, unserialize_64, &x);
+ clib_mem_unaligned (d, u64) = x;
+ }
+ else
+ ASSERT (0);
+ break;
+
+ case 's':
+ {
+ char *t;
+ unserialize_cstring (m, &t);
+ if (n_bytes == 0)
+ n_bytes = strlen (t) + 1;
+ clib_memcpy (d, t, clib_min (n_bytes, vec_len (t)));
+ vec_free (t);
+ break;
+ }
+
+ case 'f':
+ if (n_bytes == 4)
+ {
+ f32 x;
+ unserialize (m, unserialize_f32, &x);
+ clib_mem_unaligned (d, f32) = x;
+ }
+ else if (n_bytes == 8)
+ {
+ f64 x;
+ unserialize (m, unserialize_f64, &x);
+ clib_mem_unaligned (d, f64) = x;
+ }
+ else
+ ASSERT (0);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ p += 1 + n_digits;
+ d += n_bytes;
+ }
+}
+
+static void
+serialize_elog_event_type (serialize_main_t * m, va_list * va)
+{
+ elog_event_type_t *t = va_arg (*va, elog_event_type_t *);
+ int n = va_arg (*va, int);
+ int i, j;
+ for (i = 0; i < n; i++)
+ {
+ serialize_cstring (m, t[i].format);
+ serialize_cstring (m, t[i].format_args);
+ serialize_integer (m, t[i].type_index_plus_one,
+ sizeof (t->type_index_plus_one));
+ serialize_integer (m, t[i].n_enum_strings,
+ sizeof (t[i].n_enum_strings));
+ for (j = 0; j < t[i].n_enum_strings; j++)
+ serialize_cstring (m, t[i].enum_strings_vector[j]);
+ }
+}
+
+static void
+unserialize_elog_event_type (serialize_main_t * m, va_list * va)
+{
+ elog_event_type_t *t = va_arg (*va, elog_event_type_t *);
+ int n = va_arg (*va, int);
+ int i, j;
+ for (i = 0; i < n; i++)
+ {
+ unserialize_cstring (m, &t[i].format);
+ unserialize_cstring (m, &t[i].format_args);
+ unserialize_integer (m, &t[i].type_index_plus_one,
+ sizeof (t->type_index_plus_one));
+ unserialize_integer (m, &t[i].n_enum_strings,
+ sizeof (t[i].n_enum_strings));
+ vec_resize (t[i].enum_strings_vector, t[i].n_enum_strings);
+ for (j = 0; j < t[i].n_enum_strings; j++)
+ unserialize_cstring (m, &t[i].enum_strings_vector[j]);
+ }
+}
+
+static void
+serialize_elog_track (serialize_main_t * m, va_list * va)
+{
+ elog_track_t *t = va_arg (*va, elog_track_t *);
+ int n = va_arg (*va, int);
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ serialize_cstring (m, t[i].name);
+ }
+}
+
+static void
+unserialize_elog_track (serialize_main_t * m, va_list * va)
+{
+ elog_track_t *t = va_arg (*va, elog_track_t *);
+ int n = va_arg (*va, int);
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ unserialize_cstring (m, &t[i].name);
+ }
+}
+
+static void
+serialize_elog_time_stamp (serialize_main_t * m, va_list * va)
+{
+ elog_time_stamp_t *st = va_arg (*va, elog_time_stamp_t *);
+ serialize (m, serialize_64, st->os_nsec);
+ serialize (m, serialize_64, st->cpu);
+}
+
+static void
+unserialize_elog_time_stamp (serialize_main_t * m, va_list * va)
+{
+ elog_time_stamp_t *st = va_arg (*va, elog_time_stamp_t *);
+ unserialize (m, unserialize_64, &st->os_nsec);
+ unserialize (m, unserialize_64, &st->cpu);
+}
+
+static char *elog_serialize_magic = "elog v0";
+
+void
+serialize_elog_main (serialize_main_t * m, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ int flush_ring = va_arg (*va, int);
+ elog_event_t *e;
+
+ serialize_magic (m, elog_serialize_magic, strlen (elog_serialize_magic));
+
+ serialize_integer (m, em->event_ring_size, sizeof (u32));
+
+ elog_time_now (&em->serialize_time);
+ serialize (m, serialize_elog_time_stamp, &em->serialize_time);
+ serialize (m, serialize_elog_time_stamp, &em->init_time);
+
+ vec_serialize (m, em->event_types, serialize_elog_event_type);
+ vec_serialize (m, em->tracks, serialize_elog_track);
+ vec_serialize (m, em->string_table, serialize_vec_8);
+
+ /* Free old events (cached) in case they have changed. */
+ if (flush_ring)
+ {
+ vec_free (em->events);
+ elog_get_events (em);
+ }
+
+ serialize_integer (m, vec_len (em->events), sizeof (u32));
+
+ /* SMP logs can easily have local time paradoxes... */
+ vec_sort_with_function (em->events, elog_cmp);
+
+ vec_foreach (e, em->events) serialize (m, serialize_elog_event, em, e);
+}
+
+void
+unserialize_elog_main (serialize_main_t * m, va_list * va)
+{
+ elog_main_t *em = va_arg (*va, elog_main_t *);
+ uword i;
+ u32 rs;
+
+ unserialize_check_magic (m, elog_serialize_magic,
+ strlen (elog_serialize_magic));
+
+ unserialize_integer (m, &rs, sizeof (u32));
+ em->event_ring_size = rs;
+ elog_init (em, em->event_ring_size);
+
+ unserialize (m, unserialize_elog_time_stamp, &em->serialize_time);
+ unserialize (m, unserialize_elog_time_stamp, &em->init_time);
+ em->nsec_per_cpu_clock = elog_nsec_per_clock (em);
+
+ vec_unserialize (m, &em->event_types, unserialize_elog_event_type);
+ for (i = 0; i < vec_len (em->event_types); i++)
+ new_event_type (em, i);
+
+ vec_unserialize (m, &em->tracks, unserialize_elog_track);
+ vec_unserialize (m, &em->string_table, unserialize_vec_8);
+
+ {
+ u32 ne;
+ elog_event_t *e;
+
+ unserialize_integer (m, &ne, sizeof (u32));
+ vec_resize (em->events, ne);
+ vec_foreach (e, em->events)
+ unserialize (m, unserialize_elog_event, em, e);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h
new file mode 100644
index 00000000..05085b26
--- /dev/null
+++ b/src/vppinfra/elog.h
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005,2009 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* High speed event logger */
+
+/** \file
+ The fine-grained event logger allows lightweight, thread-safe
+ event logging at minimum cost. In typical operation, logging
+ a single event costs around 80ns on x86_64. It's appropriate
+ for at-least per-frame event-logging in vector packet processing.
+
+ See https://wiki.fd.io/view/VPP/elog for more information.
+*/
+
+#ifndef included_clib_elog_h
+#define included_clib_elog_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h> /* for ASSERT */
+#include <vppinfra/serialize.h>
+#include <vppinfra/time.h> /* for clib_cpu_time_now */
+#include <vppinfra/mhash.h>
+
+typedef struct
+{
+ union
+ {
+ /** Absolute time stamp in CPU clock cycles. */
+ u64 time_cycles;
+
+ /** Absolute time as floating point number in seconds. */
+ f64 time;
+ };
+
+ /** Event type index. */
+ u16 type;
+
+ /** Track for this event. Tracks allow events to be sorted and
+ displayed by track. Think of 2 dimensional display with time and
+ track being the x and y axes. */
+ u16 track;
+
+ /** 20-bytes of data follows, pads to 32 bytes. */
+ u8 data[20];
+} elog_event_t;
+
+typedef struct
+{
+ /** Type index plus one assigned to this type.
+ This is used to mark type as seen. */
+ u32 type_index_plus_one;
+
+ /** String table as a vector constructed when type is registered. */
+ char **enum_strings_vector;
+
+ /** Format string. (example: "my-event (%d,%d)"). */
+ char *format;
+
+ /** Specifies how arguments to format are parsed from event data.
+ String of characters '0' '1' or '2' '3' to specify log2 size of data
+ (e.g. for u8, u16, u32 or u64),
+ 's' means a null-terminated C string
+ 't' means argument is an index into enum string table for this type.
+ 'e' is a float,
+ 'f' is a double. */
+ char *format_args;
+
+ /** Function name generating event. */
+ char *function;
+
+ /** Number of elements in string enum table. */
+ u32 n_enum_strings;
+
+ /** String table for enum/number to string formatting. */
+ char *enum_strings[];
+} elog_event_type_t;
+
+typedef struct
+{
+ /** Track name vector. */
+ char *name;
+
+ /** Set to one when track has been added to
+ main structure. */
+ u32 track_index_plus_one;
+} elog_track_t;
+
+typedef struct
+{
+ /** CPU cycle counter. */
+ u64 cpu;
+
+ /** OS timer in nano secs since epoch 3/30/2017, see elog_time_now() */
+ u64 os_nsec;
+} elog_time_stamp_t;
+
+typedef struct
+{
+ /** Total number of events in buffer. */
+ u32 n_total_events;
+
+ /** When count reaches limit logging is disabled. This is
+ used for event triggers. */
+ u32 n_total_events_disable_limit;
+
+ /** Dummy event to use when logger is disabled. */
+ elog_event_t dummy_event;
+
+ /** Power of 2 number of elements in ring. */
+ uword event_ring_size;
+
+ /** Vector of events (circular buffer). Power of 2 size.
+ Used when events are being collected. */
+ elog_event_t *event_ring;
+
+ /** Vector of event types. */
+ elog_event_type_t *event_types;
+
+ /** Hash table mapping type format to type index. */
+ uword *event_type_by_format;
+
+ /** Events may refer to strings in string table. */
+ char *string_table;
+
+ /** Vector of tracks. */
+ elog_track_t *tracks;
+
+ /** Default track. */
+ elog_track_t default_track;
+
+ /** Place holder for CPU clock frequency. */
+ clib_time_t cpu_timer;
+
+ /** Timestamps */
+ elog_time_stamp_t init_time, serialize_time;
+
+ /** SMP lock, non-zero means locking required */
+ uword *lock;
+
+ /** Use serialize_time and init_time to give estimate for
+ cpu clock frequency. */
+ f64 nsec_per_cpu_clock;
+
+ /** Vector of events converted to generic form after collection. */
+ elog_event_t *events;
+} elog_main_t;
+
+/** @brief Return number of events in the event-log buffer
+ @param em elog_main_t *
+ @return number of events in the buffer
+*/
+
+always_inline uword
+elog_n_events_in_buffer (elog_main_t * em)
+{
+ return clib_min (em->n_total_events, em->event_ring_size);
+}
+
+/** @brief Return number of events which can fit in the event buffer
+ @param em elog_main_t *
+ @return number of events which can fit in the buffer
+*/
+always_inline uword
+elog_buffer_capacity (elog_main_t * em)
+{
+ return em->event_ring_size;
+}
+
+/** @brief Reset the event buffer
+ @param em elog_main_t *
+*/
+always_inline void
+elog_reset_buffer (elog_main_t * em)
+{
+ em->n_total_events = 0;
+ em->n_total_events_disable_limit = ~0;
+}
+
+/** @brief Enable or disable event logging
+ @param em elog_main_t *
+*/
+always_inline void
+elog_enable_disable (elog_main_t * em, int is_enabled)
+{
+ em->n_total_events = 0;
+ em->n_total_events_disable_limit = is_enabled ? ~0 : 0;
+}
+
+/** @brief disable logging after specified number of ievents have been logged.
+
+ This is used as a "debug trigger" when a certain event has occurred.
+ Events will be logged both before and after the "event" but the
+ event will not be lost as long as N < RING_SIZE.
+
+ @param em elog_main_t *
+ @param n uword number of events before disabling event logging
+*/
+always_inline void
+elog_disable_after_events (elog_main_t * em, uword n)
+{
+ em->n_total_events_disable_limit = em->n_total_events + n;
+}
+
+/* @brief mid-buffer logic-analyzer trigger
+
+ Currently, only midpoint triggering is supported, but it's pretty obvious
+ how to generalize the scheme.
+ @param em elog_main_t *
+*/
+always_inline void
+elog_disable_trigger (elog_main_t * em)
+{
+ em->n_total_events_disable_limit =
+ em->n_total_events + vec_len (em->event_ring) / 2;
+}
+
+/** @brief register an event type
+ @param em elog_main_t *
+ @param t elog_event_type_t * event to register
+ @return type index
+ @warning Typically not called directly
+*/
+
+word elog_event_type_register (elog_main_t * em, elog_event_type_t * t);
+
+/** @brief register an event track
+ @param em elog_main_t *
+ @param t elog_track_t * track to register
+ @return track index
+ @note this function is often called directly
+*/
+word elog_track_register (elog_main_t * em, elog_track_t * t);
+
+/** @brief event logging enabled predicate
+ @param em elog_main_t *
+ @return 1 if enabled, 0 if not enabled
+*/
+always_inline uword
+elog_is_enabled (elog_main_t * em)
+{
+ return em->n_total_events < em->n_total_events_disable_limit;
+}
+
+/** @brief Allocate an event to be filled in by the caller
+
+ Not normally called directly; this function underlies the
+ ELOG_DATA and ELOG_TRACK_DATA macros
+
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param track elog_track_t * track
+ @param cpu_time u64 current cpu tick value
+ @returns event to be filled in
+*/
+always_inline void *
+elog_event_data_inline (elog_main_t * em,
+ elog_event_type_t * type,
+ elog_track_t * track, u64 cpu_time)
+{
+ elog_event_t *e;
+ uword ei;
+ word type_index, track_index;
+
+ /* Return the user dummy memory to scribble data into. */
+ if (PREDICT_FALSE (!elog_is_enabled (em)))
+ return em->dummy_event.data;
+
+ type_index = (word) type->type_index_plus_one - 1;
+ track_index = (word) track->track_index_plus_one - 1;
+ if (PREDICT_FALSE ((type_index | track_index) < 0))
+ {
+ if (type_index < 0)
+ type_index = elog_event_type_register (em, type);
+ if (track_index < 0)
+ track_index = elog_track_register (em, track);
+ }
+
+ ASSERT (track_index < vec_len (em->tracks));
+ ASSERT (is_pow2 (vec_len (em->event_ring)));
+
+ if (em->lock)
+ ei = clib_smp_atomic_add (&em->n_total_events, 1);
+ else
+ ei = em->n_total_events++;
+
+ ei &= em->event_ring_size - 1;
+ e = vec_elt_at_index (em->event_ring, ei);
+
+ e->time_cycles = cpu_time;
+ e->type = type_index;
+ e->track = track_index;
+
+ /* Return user data for caller to fill in. */
+ return e->data;
+}
+
+/* External version of inline. */
+void *elog_event_data (elog_main_t * em,
+ elog_event_type_t * type,
+ elog_track_t * track, u64 cpu_time);
+
+/** @brief Allocate an event to be filled in by the caller, non-inline
+
+ Not normally called directly; this function underlies the
+ ELOG_DATA and ELOG_TRACK_DATA macros
+
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param track elog_track_t * track
+ @param cpu_time u64 current cpu tick value
+ @returns event to be filled in
+*/
+always_inline void *
+elog_event_data_not_inline (elog_main_t * em,
+ elog_event_type_t * type,
+ elog_track_t * track, u64 cpu_time)
+{
+ /* Return the user dummy memory to scribble data into. */
+ if (PREDICT_FALSE (!elog_is_enabled (em)))
+ return em->dummy_event.data;
+ return elog_event_data (em, type, track, cpu_time);
+}
+
+/** @brief Log a single-datum event
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param data u32 single datum to capture
+*/
+always_inline void
+elog (elog_main_t * em, elog_event_type_t * type, u32 data)
+{
+ u32 *d = elog_event_data_not_inline (em,
+ type,
+ &em->default_track,
+ clib_cpu_time_now ());
+ d[0] = data;
+}
+
+/** @brief Log a single-datum event, inline version
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param data u32 single datum to capture
+*/
+always_inline void
+elog_inline (elog_main_t * em, elog_event_type_t * type, u32 data)
+{
+ u32 *d = elog_event_data_inline (em,
+ type,
+ &em->default_track,
+ clib_cpu_time_now ());
+ d[0] = data;
+}
+
+/** @brief Log a single-datum event to a specific track, non-inline version
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param type elog_event_track_t * track
+ @param data u32 single datum to capture
+*/
+always_inline void
+elog_track (elog_main_t * em, elog_event_type_t * type, elog_track_t * track,
+ u32 data)
+{
+ u32 *d = elog_event_data_not_inline (em,
+ type,
+ track,
+ clib_cpu_time_now ());
+ d[0] = data;
+}
+
+/** @brief Log a single-datum event to a specific track
+ @param em elog_main_t *
+ @param type elog_event_type_t * type
+ @param type elog_event_track_t * track
+ @param data u32 single datum to capture
+*/
+always_inline void
+elog_track_inline (elog_main_t * em, elog_event_type_t * type,
+ elog_track_t * track, u32 data)
+{
+ u32 *d = elog_event_data_inline (em,
+ type,
+ track,
+ clib_cpu_time_now ());
+ d[0] = data;
+}
+
+always_inline void *
+elog_data (elog_main_t * em, elog_event_type_t * type, elog_track_t * track)
+{
+ return elog_event_data_not_inline (em, type, track, clib_cpu_time_now ());
+}
+
+always_inline void *
+elog_data_inline (elog_main_t * em, elog_event_type_t * type,
+ elog_track_t * track)
+{
+ return elog_event_data_inline (em, type, track, clib_cpu_time_now ());
+}
+
+/* Macro shorthands for generating/declaring events. */
+#define __ELOG_TYPE_VAR(f) f
+#define __ELOG_TRACK_VAR(f) f
+
+#define ELOG_TYPE_DECLARE(f) static elog_event_type_t __ELOG_TYPE_VAR(f)
+
+#define ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,func) \
+ { .format = fmt, .function = func, }
+
+#define ELOG_TYPE_INIT(fmt) \
+ ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,(char *) __FUNCTION__)
+
+#define ELOG_TYPE_DECLARE_HELPER(f,fmt,func) \
+ static elog_event_type_t __ELOG_TYPE_VAR(f) = \
+ ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, func)
+
+#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) \
+ ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __FUNCTION__)
+
+#define ELOG_TYPE_DECLARE_FORMAT(f,fmt) \
+ ELOG_TYPE_DECLARE_HELPER (f, fmt, 0)
+
+/* Shorthands with and without __FUNCTION__.
+ D for decimal; X for hex. F for __FUNCTION__. */
+#define ELOG_TYPE(f,fmt) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt)
+#define ELOG_TYPE_D(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " %d")
+#define ELOG_TYPE_X(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " 0x%x")
+#define ELOG_TYPE_DF(f) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION (f, #f " %d")
+#define ELOG_TYPE_XF(f) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION (f, #f " 0x%x")
+#define ELOG_TYPE_FD(f) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION (f, #f " %d")
+#define ELOG_TYPE_FX(f) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION (f, #f " 0x%x")
+
+#define ELOG_TRACK_DECLARE(f) static elog_track_t __ELOG_TRACK_VAR(f)
+#define ELOG_TRACK(f) ELOG_TRACK_DECLARE(f) = { .name = #f, }
+
+/* Log 32 bits of data. */
+#define ELOG(em,f,data) elog ((em), &__ELOG_TYPE_VAR(f), data)
+#define ELOG_INLINE(em,f,data) elog_inline ((em), &__ELOG_TYPE_VAR(f), data)
+
+/* Return data pointer to fill in. */
+#define ELOG_TRACK_DATA(em,f,track) \
+ elog_data ((em), &__ELOG_TYPE_VAR(f), &__ELOG_TRACK_VAR(track))
+#define ELOG_TRACK_DATA_INLINE(em,f,track) \
+ elog_data_inline ((em), &__ELOG_TYPE_VAR(f), &__ELOG_TRACK_VAR(track))
+
+/* Shorthand with default track. */
+#define ELOG_DATA(em,f) elog_data ((em), &__ELOG_TYPE_VAR (f), &(em)->default_track)
+#define ELOG_DATA_INLINE(em,f) elog_data_inline ((em), &__ELOG_TYPE_VAR (f), &(em)->default_track)
+
+/** @brief add a string to the event-log string table
+
+ Often combined with hashing and the T4 elog format specifier to
+ display complex strings in offline tooling
+
+ @param em elog_main_t *
+ @param format char *
+ @param VARARGS
+ @return u32 index to add to event log
+*/
+u32 elog_string (elog_main_t * em, char *format, ...);
+
+void elog_time_now (elog_time_stamp_t * et);
+
+/** @brief convert event ring events to events, and return them as a vector.
+ @param em elog_main_t *
+ @return event vector with timestamps in f64 seconds
+ @note sets em->events to resulting vector.
+*/
+elog_event_t *elog_get_events (elog_main_t * em);
+
+/** @brief convert event ring events to events, and return them as a vector.
+ @param em elog_main_t *
+ @return event vector with timestamps in f64 seconds
+ @note no side effects
+*/
+elog_event_t *elog_peek_events (elog_main_t * em);
+
+/* Merge two logs, add supplied track tags. */
+void elog_merge (elog_main_t * dst, u8 * dst_tag,
+ elog_main_t * src, u8 * src_tag, f64 align_tweak);
+
+/* 2 arguments elog_main_t and elog_event_t to format event or track name. */
+u8 *format_elog_event (u8 * s, va_list * va);
+u8 *format_elog_track (u8 * s, va_list * va);
+
+void serialize_elog_main (serialize_main_t * m, va_list * va);
+void unserialize_elog_main (serialize_main_t * m, va_list * va);
+
+void elog_init (elog_main_t * em, u32 n_events);
+void elog_alloc (elog_main_t * em, u32 n_events);
+
+#ifdef CLIB_UNIX
+always_inline clib_error_t *
+elog_write_file (elog_main_t * em, char *unix_file, int flush_ring)
+{
+ serialize_main_t m;
+ clib_error_t *error;
+
+ error = serialize_open_unix_file (&m, unix_file);
+ if (error)
+ return error;
+ error = serialize (&m, serialize_elog_main, em, flush_ring);
+ if (!error)
+ serialize_close (&m);
+ return error;
+}
+
+always_inline clib_error_t *
+elog_read_file (elog_main_t * em, char *unix_file)
+{
+ serialize_main_t m;
+ clib_error_t *error;
+
+ error = unserialize_open_unix_file (&m, unix_file);
+ if (error)
+ return error;
+ error = unserialize (&m, unserialize_elog_main, em);
+ if (!error)
+ unserialize_close (&m);
+ return error;
+}
+
+#endif /* CLIB_UNIX */
+
+#endif /* included_clib_elog_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/error.c b/src/vppinfra/error.c
new file mode 100644
index 00000000..2722fb7b
--- /dev/null
+++ b/src/vppinfra/error.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* Error reporting. */
+#include <stdarg.h>
+
+#include <vppinfra/clib.h> /* for HAVE_ERRNO */
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/unistd.h> /* for write */
+#include <linux/kernel.h> /* for printk */
+#endif
+
+#ifdef CLIB_UNIX
+#include <unistd.h> /* for write */
+#include <stdio.h> /* for printf */
+#define HAVE_ERRNO
+#endif
+
+#ifdef CLIB_STANDALONE
+#include <vppinfra/standalone_stdio.h> /* for printf */
+#endif
+
+#include <vppinfra/string.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/os.h> /* for os_panic/os_exit/os_puts */
+
+typedef struct
+{
+ clib_error_handler_func_t *func;
+ void *arg;
+} clib_error_handler_t;
+
+static clib_error_handler_t *handlers = 0;
+
+void
+clib_error_register_handler (clib_error_handler_func_t func, void *arg)
+{
+ clib_error_handler_t h = {.func = func,.arg = arg, };
+ vec_add1 (handlers, h);
+}
+
+static void
+debugger (void)
+{
+ os_panic ();
+}
+
+static void
+error_exit (int code)
+{
+ os_exit (code);
+}
+
+static u8 *
+dispatch_message (u8 * msg)
+{
+ word i;
+
+ if (!msg)
+ return msg;
+
+ for (i = 0; i < vec_len (handlers); i++)
+ handlers[i].func (handlers[i].arg, msg, vec_len (msg));
+
+ /* If no message handler is specified provide a default one. */
+ if (vec_len (handlers) == 0)
+ os_puts (msg, vec_len (msg), /* is_error */ 1);
+
+ return msg;
+}
+
+void
+_clib_error (int how_to_die,
+ char *function_name, uword line_number, char *fmt, ...)
+{
+ u8 *msg = 0;
+ va_list va;
+
+ if (function_name)
+ {
+ msg = format (msg, "%s:", function_name);
+ if (line_number > 0)
+ msg = format (msg, "%wd:", line_number);
+ msg = format (msg, " ");
+ }
+
+ va_start (va, fmt);
+ msg = va_format (msg, fmt, &va);
+ va_end (va);
+
+#ifdef HAVE_ERRNO
+ if (how_to_die & CLIB_ERROR_ERRNO_VALID)
+ msg = format (msg, ": %s (errno %d)", strerror (errno), errno);
+#endif
+
+ if (vec_end (msg)[-1] != '\n')
+ vec_add1 (msg, '\n');
+
+ msg = dispatch_message (msg);
+
+ vec_free (msg);
+
+ if (how_to_die & CLIB_ERROR_ABORT)
+ debugger ();
+ if (how_to_die & CLIB_ERROR_FATAL)
+ error_exit (1);
+}
+
+clib_error_t *
+_clib_error_return (clib_error_t * errors,
+ any code, uword flags, char *where, char *fmt, ...)
+{
+ clib_error_t *e;
+ va_list va;
+
+#ifdef HAVE_ERRNO
+ /* Save errno since it may be re-set before we'll need it. */
+ word errno_save = errno;
+#endif
+
+ va_start (va, fmt);
+ vec_add2 (errors, e, 1);
+ if (fmt)
+ e->what = va_format (0, fmt, &va);
+
+#ifdef HAVE_ERRNO
+ if (flags & CLIB_ERROR_ERRNO_VALID)
+ {
+ if (e->what)
+ e->what = format (e->what, ": ");
+ e->what = format (e->what, "%s", strerror (errno_save));
+ }
+#endif
+
+ e->where = (u8 *) where;
+ e->code = code;
+ e->flags = flags;
+ va_end (va);
+ return errors;
+}
+
+void *
+clib_error_free_vector (clib_error_t * errors)
+{
+ clib_error_t *e;
+ vec_foreach (e, errors) vec_free (e->what);
+ vec_free (errors);
+ return 0;
+}
+
+u8 *
+format_clib_error (u8 * s, va_list * va)
+{
+ clib_error_t *errors = va_arg (*va, clib_error_t *);
+ clib_error_t *e;
+
+ vec_foreach (e, errors)
+ {
+ if (!e->what)
+ continue;
+
+ if (e->where)
+ {
+ u8 *where = 0;
+
+ if (e > errors)
+ where = format (where, "from ");
+ where = format (where, "%s", e->where);
+
+ s = format (s, "%v: ", where);
+ vec_free (where);
+ }
+
+ s = format (s, "%v\n", e->what);
+ }
+
+ return s;
+}
+
+clib_error_t *
+_clib_error_report (clib_error_t * errors)
+{
+ if (errors)
+ {
+ u8 *msg = format (0, "%U", format_clib_error, errors);
+
+ msg = dispatch_message (msg);
+ vec_free (msg);
+
+ if (errors->flags & CLIB_ERROR_ABORT)
+ debugger ();
+ if (errors->flags & CLIB_ERROR_FATAL)
+ error_exit (1);
+
+ clib_error_free (errors);
+ }
+ return 0;
+}
+
+#ifdef TEST
+
+static error_t *
+foo1 (int x)
+{
+ return error_return (0, "x is odd %d", x);
+}
+
+static error_t *
+foo2 (int x)
+{
+ return error_return (0, "x is even %d", x);
+}
+
+static error_t *
+foo (int x)
+{
+ error_t *e;
+ if (x & 1)
+ e = foo1 (x);
+ else
+ e = foo2 (x);
+ if (e)
+ return error_return (e, 0);
+}
+
+static void
+error_handler (void *arg, char *msg, int msg_len)
+{
+ write (2, msg, msg_len);
+}
+
+int
+main (int argc, char *argv[])
+{
+ error_t *e;
+
+ register_error_handler (error_handler, 0);
+
+ e = foo (getpid ());
+ if (e)
+ error_report (e);
+ return 0;
+}
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/error.h b/src/vppinfra/error.h
new file mode 100644
index 00000000..e0e2d472
--- /dev/null
+++ b/src/vppinfra/error.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_error_h
+#define included_error_h
+
+#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
+#include <vppinfra/error_bootstrap.h>
+
+#ifdef CLIB_UNIX
+#include <errno.h>
+#endif
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/errno.h>
+#endif
+
+#include <stdarg.h>
+#include <vppinfra/vec.h>
+
+/* Callback functions for error reporting. */
+typedef void clib_error_handler_func_t (void *arg, u8 * msg, int msg_len);
+void clib_error_register_handler (clib_error_handler_func_t func, void *arg);
+
+#define clib_warning(format,args...) \
+ _clib_error (CLIB_ERROR_WARNING, clib_error_function, __LINE__, format, ## args)
+
+#define clib_error(format,args...) \
+ _clib_error (CLIB_ERROR_FATAL, clib_error_function, __LINE__, format, ## args)
+
+#define clib_unix_error(format,args...) \
+ _clib_error (CLIB_ERROR_FATAL | CLIB_ERROR_ERRNO_VALID, clib_error_function, __LINE__, format, ## args)
+
+#define clib_unix_warning(format,args...) \
+ _clib_error (CLIB_ERROR_WARNING | CLIB_ERROR_ERRNO_VALID, clib_error_function, __LINE__, format, ## args)
+
+/* For programming errors and assert. */
+#define clib_panic(format,args...) \
+ _clib_error (CLIB_ERROR_ABORT, (char *) clib_error_function, __LINE__, format, ## args)
+
+#include <vppinfra/clib_error.h>
+
+#define clib_error_get_code(err) ((err) ? (err)->code : 0)
+#define clib_error_set_code(err, c) \
+do { \
+ if (err) \
+ (err)->code = (c); \
+} while (0)
+
+extern void *clib_error_free_vector (clib_error_t * errors);
+
+#define clib_error_free(e) e = clib_error_free_vector(e)
+
+extern clib_error_t *_clib_error_return (clib_error_t * errors,
+ any code,
+ uword flags,
+ char *where, char *fmt, ...);
+
+#define clib_error_return_code(e,code,flags,args...) \
+ _clib_error_return((e),(code),(flags),(char *)clib_error_function,args)
+
+#define clib_error_create(args...) \
+ clib_error_return_code(0,0,0,args)
+
+#define clib_error_return(e,args...) \
+ clib_error_return_code(e,0,0,args)
+
+#define clib_error_return_unix(e,args...) \
+ clib_error_return_code(e,errno,CLIB_ERROR_ERRNO_VALID,args)
+
+#define clib_error_return_fatal(e,args...) \
+ clib_error_return_code(e,0,CLIB_ERROR_FATAL,args)
+
+#define clib_error_return_unix_fatal(e,args...) \
+ clib_error_return_code(e,errno,CLIB_ERROR_ERRNO_VALID|CLIB_ERROR_FATAL,args)
+
+extern clib_error_t *_clib_error_report (clib_error_t * errors);
+
+#define clib_error_report(e) do { (e) = _clib_error_report (e); } while (0)
+
+u8 *format_clib_error (u8 * s, va_list * va);
+
+always_inline word
+unix_error_is_fatal (word error)
+{
+#ifdef CLIB_UNIX
+ switch (error)
+ {
+ case EWOULDBLOCK:
+ case EINTR:
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+#define IF_ERROR_IS_FATAL_RETURN_ELSE_FREE(e) \
+do { \
+ if (e) \
+ { \
+ if (unix_error_is_fatal (clib_error_get_code (e))) \
+ return (e); \
+ else \
+ clib_error_free (e); \
+ } \
+} while (0)
+
+#define ERROR_RETURN_IF(x) \
+do { \
+ clib_error_t * _error_return_if = (x); \
+ if (_error_return_if) \
+ return clib_error_return (_error_return_if, 0); \
+} while (0)
+
+#define ERROR_ASSERT(truth) \
+({ \
+ clib_error_t * _error_assert = 0; \
+ if (CLIB_DEBUG > 0 && ! (truth)) \
+ { \
+ _error_assert = clib_error_return_fatal \
+ (0, "%s:%d (%s) assertion `%s' fails", \
+ __FILE__, \
+ (uword) __LINE__, \
+ clib_error_function, \
+ # truth); \
+ } \
+ _error_assert; \
+})
+
+/* Assert to remain even if CLIB_DEBUG is set to 0. */
+#define CLIB_ERROR_ASSERT(truth) \
+({ \
+ clib_error_t * _error_assert = 0; \
+ if (! (truth)) \
+ { \
+ _error_assert = \
+ clib_error_return_fatal \
+ (0, "%s:%d (%s) assertion `%s' fails", \
+ __FILE__, \
+ (uword) __LINE__, \
+ clib_error_function, \
+ # truth); \
+ } \
+ _error_assert; \
+})
+
+/*
+ * If we're running under Coverity, don't die on
+ * failed static assertions.
+ */
+#ifdef __COVERITY__
+#ifndef _Static_assert
+#define _Static_assert(x,y)
+#endif
+#endif
+
+#endif /* included_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h
new file mode 100644
index 00000000..3416c2f9
--- /dev/null
+++ b/src/vppinfra/error_bootstrap.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_error_bootstrap_h
+#define included_error_bootstrap_h
+
+/* Bootstrap include so that #include <vppinfra/mem.h> can include e.g.
+ <vppinfra/mheap.h> which depends on <vppinfra/vec.h>. */
+
+#include <vppinfra/clib.h> /* for uword */
+
+enum
+{
+ CLIB_ERROR_FATAL = 1 << 0,
+ CLIB_ERROR_ABORT = 1 << 1,
+ CLIB_ERROR_WARNING = 1 << 2,
+ CLIB_ERROR_ERRNO_VALID = 1 << 16,
+ CLIB_ERROR_NO_RATE_LIMIT = 1 << 17,
+};
+
+/* Current function name. Need (char *) cast to silence gcc4 pointer signedness warning. */
+#define clib_error_function ((char *) __FUNCTION__)
+
+#ifndef CLIB_ASSERT_ENABLE
+#define CLIB_ASSERT_ENABLE (CLIB_DEBUG > 0)
+#endif
+
+/* Low level error reporting function.
+ Code specifies whether to call exit, abort or nothing at
+ all (for non-fatal warnings). */
+extern void _clib_error (int code,
+ char *function_name,
+ uword line_number, char *format, ...);
+
+#define ASSERT(truth) \
+do { \
+ if (CLIB_ASSERT_ENABLE && ! (truth)) \
+ { \
+ _clib_error (CLIB_ERROR_ABORT, 0, 0, \
+ "%s:%d (%s) assertion `%s' fails", \
+ __FILE__, \
+ (uword) __LINE__, \
+ clib_error_function, \
+ # truth); \
+ } \
+} while (0)
+
+#if defined(__clang__)
+#define STATIC_ASSERT(truth,...)
+#else
+#define STATIC_ASSERT(truth,...) _Static_assert(truth, __VA_ARGS__)
+#endif
+
+#define STATIC_ASSERT_SIZEOF(d, s) \
+ STATIC_ASSERT (sizeof (d) == s, "Size of " #d " must be " # s " bytes")
+
+/* Assert without allocating memory. */
+#define ASSERT_AND_PANIC(truth) \
+do { \
+ if (CLIB_ASSERT_ENABLE && ! (truth)) \
+ os_panic (); \
+} while (0)
+
+#endif /* included_error_bootstrap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/fheap.c b/src/vppinfra/fheap.c
new file mode 100644
index 00000000..13692456
--- /dev/null
+++ b/src/vppinfra/fheap.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/fheap.h>
+
+/* Fibonacci heaps. */
+always_inline fheap_node_t *
+fheap_get_node (fheap_t * f, u32 i)
+{
+ return i != ~0 ? vec_elt_at_index (f->nodes, i) : 0;
+}
+
+always_inline fheap_node_t *
+fheap_get_root (fheap_t * f)
+{
+ return fheap_get_node (f, f->min_root);
+}
+
+static void
+fheap_validate (fheap_t * f)
+{
+ fheap_node_t *n, *m;
+ uword ni, si;
+
+ if (!CLIB_DEBUG || !f->enable_validate)
+ return;
+
+ vec_foreach_index (ni, f->nodes)
+ {
+ n = vec_elt_at_index (f->nodes, ni);
+
+ if (!n->is_valid)
+ continue;
+
+ /* Min root must have minimal key. */
+ m = vec_elt_at_index (f->nodes, f->min_root);
+ ASSERT (n->key >= m->key);
+
+ /* Min root must have no parent. */
+ if (ni == f->min_root)
+ ASSERT (n->parent == ~0);
+
+ /* Check sibling linkages. */
+ if (n->next_sibling == ~0)
+ ASSERT (n->prev_sibling == ~0);
+ else if (n->prev_sibling == ~0)
+ ASSERT (n->next_sibling == ~0);
+ else
+ {
+ fheap_node_t *prev, *next;
+ u32 si = n->next_sibling, si_start = si;
+ do
+ {
+ m = vec_elt_at_index (f->nodes, si);
+ prev = vec_elt_at_index (f->nodes, m->prev_sibling);
+ next = vec_elt_at_index (f->nodes, m->next_sibling);
+ ASSERT (prev->next_sibling == si);
+ ASSERT (next->prev_sibling == si);
+ si = m->next_sibling;
+ }
+ while (si != si_start);
+ }
+
+ /* Loop through all siblings. */
+ {
+ u32 n_siblings = 0;
+
+ foreach_fheap_node_sibling (f, si, n->next_sibling, (
+ {
+ m =
+ vec_elt_at_index
+ (f->nodes, si);
+ /* All siblings must have same parent. */
+ ASSERT (m->parent
+ ==
+ n->
+ parent);
+ n_siblings += 1;}
+ ));
+
+ /* Either parent is non-empty or there are siblings present. */
+ if (n->parent == ~0 && ni != f->min_root)
+ ASSERT (n_siblings > 0);
+ }
+
+ /* Loop through all children. */
+ {
+ u32 found_first_child = n->first_child == ~0;
+ u32 n_children = 0;
+
+ foreach_fheap_node_sibling (f, si, n->first_child, (
+ {
+ m =
+ vec_elt_at_index
+ (f->nodes, si);
+ /* Children must have larger keys than their parent. */
+ ASSERT (m->key >=
+ n->key);
+ if
+ (!found_first_child)
+ found_first_child =
+ si ==
+ n->first_child;
+ n_children += 1;}
+ ));
+
+ /* Check that first child is present on list. */
+ ASSERT (found_first_child);
+
+ /* Make sure rank is correct. */
+ ASSERT (n->rank == n_children);
+ }
+ }
+
+ /* Increment serial number for each successful validate.
+ Failure can be used as condition for gdb breakpoints. */
+ f->validate_serial++;
+}
+
+always_inline void
+fheap_node_add_sibling (fheap_t * f, u32 ni, u32 ni_to_add)
+{
+ fheap_node_t *n = vec_elt_at_index (f->nodes, ni);
+ fheap_node_t *n_to_add = vec_elt_at_index (f->nodes, ni_to_add);
+ fheap_node_t *n_next = fheap_get_node (f, n->next_sibling);
+ fheap_node_t *parent;
+
+ /* Empty list? */
+ if (n->next_sibling == ~0)
+ {
+ ASSERT (n->prev_sibling == ~0);
+ n->next_sibling = n->prev_sibling = ni_to_add;
+ n_to_add->next_sibling = n_to_add->prev_sibling = ni;
+ }
+ else
+ {
+ /* Add node after existing node. */
+ n_to_add->prev_sibling = ni;
+ n_to_add->next_sibling = n->next_sibling;
+
+ n->next_sibling = ni_to_add;
+ n_next->prev_sibling = ni_to_add;
+ }
+
+ n_to_add->parent = n->parent;
+ parent = fheap_get_node (f, n->parent);
+ if (parent)
+ parent->rank += 1;
+}
+
+void
+fheap_add (fheap_t * f, u32 ni, u32 key)
+{
+ fheap_node_t *r, *n;
+ u32 ri;
+
+ n = vec_elt_at_index (f->nodes, ni);
+
+ memset (n, 0, sizeof (n[0]));
+ n->parent = n->first_child = n->next_sibling = n->prev_sibling = ~0;
+ n->key = key;
+
+ r = fheap_get_root (f);
+ ri = f->min_root;
+ if (!r)
+ {
+ /* No root? Add node as new root. */
+ f->min_root = ni;
+ }
+ else
+ {
+ /* Add node as sibling of current root. */
+ fheap_node_add_sibling (f, ri, ni);
+
+ /* New node may become new root. */
+ if (r->key > n->key)
+ f->min_root = ni;
+ }
+
+ fheap_validate (f);
+}
+
+always_inline u32
+fheap_node_remove_internal (fheap_t * f, u32 ni, u32 invalidate)
+{
+ fheap_node_t *n = vec_elt_at_index (f->nodes, ni);
+ u32 prev_ni = n->prev_sibling;
+ u32 next_ni = n->next_sibling;
+ u32 list_has_single_element = prev_ni == ni;
+ fheap_node_t *prev = fheap_get_node (f, prev_ni);
+ fheap_node_t *next = fheap_get_node (f, next_ni);
+ fheap_node_t *p = fheap_get_node (f, n->parent);
+
+ if (p)
+ {
+ ASSERT (p->rank > 0);
+ p->rank -= 1;
+ p->first_child = list_has_single_element ? ~0 : next_ni;
+ }
+
+ if (prev)
+ {
+ ASSERT (prev->next_sibling == ni);
+ prev->next_sibling = next_ni;
+ }
+ if (next)
+ {
+ ASSERT (next->prev_sibling == ni);
+ next->prev_sibling = prev_ni;
+ }
+
+ n->prev_sibling = n->next_sibling = ni;
+ n->parent = ~0;
+ n->is_valid = invalidate == 0;
+
+ return list_has_single_element ? ~0 : next_ni;
+}
+
+always_inline u32
+fheap_node_remove (fheap_t * f, u32 ni)
+{
+ return fheap_node_remove_internal (f, ni, /* invalidate */ 0);
+}
+
+always_inline u32
+fheap_node_remove_and_invalidate (fheap_t * f, u32 ni)
+{
+ return fheap_node_remove_internal (f, ni, /* invalidate */ 1);
+}
+
+static void
+fheap_link_root (fheap_t * f, u32 ni)
+{
+ fheap_node_t *n = vec_elt_at_index (f->nodes, ni);
+ fheap_node_t *r, *lo, *hi;
+ u32 ri, lo_i, hi_i, k;
+
+ while (1)
+ {
+ k = n->rank;
+ vec_validate_init_empty (f->root_list_by_rank, k, ~0);
+ ri = f->root_list_by_rank[k];
+ r = fheap_get_node (f, ri);
+ if (!r)
+ {
+ f->root_list_by_rank[k] = ni;
+ return;
+ }
+
+ f->root_list_by_rank[k] = ~0;
+
+ /* Sort n/r into lo/hi by their keys. */
+ lo = r, lo_i = ri;
+ hi = n, hi_i = ni;
+ if (hi->key < lo->key)
+ {
+ u32 ti;
+ fheap_node_t *tn;
+ ti = lo_i, tn = lo;
+ lo = hi, lo_i = hi_i;
+ hi = tn, hi_i = ti;
+ }
+
+ /* Remove larger key. */
+ fheap_node_remove (f, hi_i);
+
+ /* Add larger key as child of smaller one. */
+ if (lo->first_child == ~0)
+ {
+ hi->parent = lo_i;
+ lo->first_child = hi_i;
+ lo->rank = 1;
+ }
+ else
+ fheap_node_add_sibling (f, lo->first_child, hi_i);
+
+ /* Following Fredman & Trajan: "When making a root node X a child of another node in a linking step,
+ we unmark X". */
+ hi->is_marked = 0;
+
+ ni = lo_i;
+ n = lo;
+ }
+}
+
+u32
+fheap_del_min (fheap_t * f, u32 * min_key)
+{
+ fheap_node_t *r = fheap_get_root (f);
+ u32 to_delete_min_ri = f->min_root;
+ u32 ri, ni;
+
+ /* Empty heap? */
+ if (!r)
+ return ~0;
+
+ /* Root's children become siblings. Call this step a; see below. */
+ if (r->first_child != ~0)
+ {
+ u32 ci, cni, rni;
+ fheap_node_t *c, *cn, *rn;
+
+ /* Splice child & root circular lists together. */
+ ci = r->first_child;
+ c = vec_elt_at_index (f->nodes, ci);
+
+ cni = c->next_sibling;
+ rni = r->next_sibling;
+ cn = vec_elt_at_index (f->nodes, cni);
+ rn = vec_elt_at_index (f->nodes, rni);
+
+ r->next_sibling = cni;
+ c->next_sibling = rni;
+ cn->prev_sibling = to_delete_min_ri;
+ rn->prev_sibling = ci;
+ }
+
+ /* Remove min root. */
+ ri = fheap_node_remove_and_invalidate (f, to_delete_min_ri);
+
+ /* Find new min root from among siblings including the ones we've just added. */
+ f->min_root = ~0;
+ if (ri != ~0)
+ {
+ u32 ri_last, ri_next, i, min_ds;
+
+ r = fheap_get_node (f, ri);
+ ri_last = r->prev_sibling;
+ while (1)
+ {
+ /* Step a above can put children (with r->parent != ~0) on root list. */
+ r->parent = ~0;
+
+ ri_next = r->next_sibling;
+ fheap_link_root (f, ri);
+ if (ri == ri_last)
+ break;
+ ri = ri_next;
+ r = fheap_get_node (f, ri);
+ }
+
+ min_ds = ~0;
+ vec_foreach_index (i, f->root_list_by_rank)
+ {
+ ni = f->root_list_by_rank[i];
+ if (ni == ~0)
+ continue;
+ f->root_list_by_rank[i] = ~0;
+ r = fheap_get_node (f, ni);
+ if (r->key < min_ds)
+ {
+ f->min_root = ni;
+ min_ds = r->key;
+ ASSERT (r->parent == ~0);
+ }
+ }
+ }
+
+ /* Return deleted min root. */
+ r = vec_elt_at_index (f->nodes, to_delete_min_ri);
+ if (min_key)
+ *min_key = r->key;
+
+ fheap_validate (f);
+
+ return to_delete_min_ri;
+}
+
+static void
+fheap_mark_parent (fheap_t * f, u32 pi)
+{
+ fheap_node_t *p = vec_elt_at_index (f->nodes, pi);
+
+ /* Parent is a root: do nothing. */
+ if (p->parent == ~0)
+ return;
+
+ /* If not marked, mark it. */
+ if (!p->is_marked)
+ {
+ p->is_marked = 1;
+ return;
+ }
+
+ /* Its a previously marked, non-root parent.
+ Cut edge to its parent and add to root list. */
+ fheap_node_remove (f, pi);
+ fheap_node_add_sibling (f, f->min_root, pi);
+
+ /* Unmark it since its now a root node. */
+ p->is_marked = 0;
+
+ /* "Cascading cuts": check parent. */
+ if (p->parent != ~0)
+ fheap_mark_parent (f, p->parent);
+}
+
+/* Set key to new smaller value. */
+void
+fheap_decrease_key (fheap_t * f, u32 ni, u32 new_key)
+{
+ fheap_node_t *n = vec_elt_at_index (f->nodes, ni);
+ fheap_node_t *r = fheap_get_root (f);
+
+ n->key = new_key;
+
+ if (n->parent != ~0)
+ {
+ fheap_mark_parent (f, n->parent);
+
+ /* Remove node and add to root list. */
+ fheap_node_remove (f, ni);
+ fheap_node_add_sibling (f, f->min_root, ni);
+ }
+
+ if (n->key < r->key)
+ f->min_root = ni;
+
+ fheap_validate (f);
+}
+
+void
+fheap_del (fheap_t * f, u32 ni)
+{
+ fheap_node_t *n;
+
+ n = vec_elt_at_index (f->nodes, ni);
+
+ if (n->parent == ~0)
+ {
+ ASSERT (ni == f->min_root);
+ fheap_del_min (f, 0);
+ }
+ else
+ {
+ u32 ci;
+
+ fheap_mark_parent (f, n->parent);
+
+ /* Add children to root list. */
+ foreach_fheap_node_sibling (f, ci, n->first_child, (
+ {
+ fheap_node_remove
+ (f, ci);
+ fheap_node_add_sibling
+ (f, f->min_root,
+ ci);}
+ ));
+
+ fheap_node_remove_and_invalidate (f, ni);
+ }
+
+ fheap_validate (f);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/fheap.h b/src/vppinfra/fheap.h
new file mode 100644
index 00000000..6d4965f1
--- /dev/null
+++ b/src/vppinfra/fheap.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_clib_fheap_h
+#define included_clib_fheap_h
+
+/* Fibonacci Heaps Fredman, M. L.; Tarjan (1987).
+ "Fibonacci heaps and their uses in improved network optimization algorithms" */
+
+#include <vppinfra/vec.h>
+
+typedef struct
+{
+ /* Node index of parent. */
+ u32 parent;
+
+ /* Node index of first child. */
+ u32 first_child;
+
+ /* Next and previous nodes in doubly linked list of siblings. */
+ u32 next_sibling, prev_sibling;
+
+ /* Key (distance) for this node. Parent always has key
+ <= than keys of children. */
+ u32 key;
+
+ /* Number of children (as opposed to descendents). */
+ u32 rank;
+
+ u32 is_marked;
+
+ /* Set to one when node is inserted; zero when deleted. */
+ u32 is_valid;
+} fheap_node_t;
+
+#define foreach_fheap_node_sibling(f,ni,first_ni,body) \
+do { \
+ u32 __fheap_foreach_first_ni = (first_ni); \
+ u32 __fheap_foreach_ni = __fheap_foreach_first_ni; \
+ u32 __fheap_foreach_next_ni; \
+ fheap_node_t * __fheap_foreach_n; \
+ if (__fheap_foreach_ni != ~0) \
+ while (1) \
+ { \
+ __fheap_foreach_n = fheap_get_node ((f), __fheap_foreach_ni); \
+ __fheap_foreach_next_ni = __fheap_foreach_n -> next_sibling; \
+ (ni) = __fheap_foreach_ni; \
+ \
+ body; \
+ \
+ /* End of circular list? */ \
+ if (__fheap_foreach_next_ni == __fheap_foreach_first_ni) \
+ break; \
+ \
+ __fheap_foreach_ni = __fheap_foreach_next_ni; \
+ \
+ } \
+} while (0)
+
+typedef struct
+{
+ u32 min_root;
+
+ /* Vector of nodes. */
+ fheap_node_t *nodes;
+
+ u32 *root_list_by_rank;
+
+ u32 enable_validate;
+
+ u32 validate_serial;
+} fheap_t;
+
+/* Initialize empty heap. */
+always_inline void
+fheap_init (fheap_t * f, u32 n_nodes)
+{
+ fheap_node_t *save_nodes = f->nodes;
+ u32 *save_root_list = f->root_list_by_rank;
+
+ memset (f, 0, sizeof (f[0]));
+
+ f->nodes = save_nodes;
+ f->root_list_by_rank = save_root_list;
+
+ vec_validate (f->nodes, n_nodes - 1);
+ vec_reset_length (f->root_list_by_rank);
+
+ f->min_root = ~0;
+}
+
+always_inline void
+fheap_free (fheap_t * f)
+{
+ vec_free (f->nodes);
+ vec_free (f->root_list_by_rank);
+}
+
+always_inline u32
+fheap_find_min (fheap_t * f)
+{
+ return f->min_root;
+}
+
+always_inline u32
+fheap_is_empty (fheap_t * f)
+{
+ return f->min_root == ~0;
+}
+
+/* Add/delete nodes. */
+void fheap_add (fheap_t * f, u32 ni, u32 key);
+void fheap_del (fheap_t * f, u32 ni);
+
+/* Delete and return minimum. */
+u32 fheap_del_min (fheap_t * f, u32 * min_key);
+
+/* Change key value. */
+void fheap_decrease_key (fheap_t * f, u32 ni, u32 new_key);
+
+#endif /* included_clib_fheap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/fifo.c b/src/vppinfra/fifo.c
new file mode 100644
index 00000000..5b4c76d1
--- /dev/null
+++ b/src/vppinfra/fifo.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/cache.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/error.h>
+#include <vppinfra/string.h>
+
+/*
+ General first in/first out queues.
+ FIFOs can have arbitrary size and type.
+ Let T be any type (i.e. char, int, struct foo, etc.).
+
+ A null fifo is initialized:
+
+ T * f = 0;
+
+ For example, typedef struct { int a, b; } T;
+
+ Elements can be added in 3 ways.
+
+ #1 1 element is added:
+ T x;
+ x.a = 10; x.b = 20;
+ fifo_add1 (f, x);
+
+ #2 n elements are added
+ T buf[10];
+ initialize buf[0] .. buf[9];
+ fifo_add (f, buf, 10);
+
+ #3 1 element is added, pointer is returned
+ T * x;
+ fifo_add2 (f, x);
+ x->a = 10;
+ x->b = 20;
+
+ Elements are removed 1 at a time:
+ T x;
+ fifo_sub1 (f, x);
+
+ fifo_free (f) frees fifo.
+*/
+
+void *
+_clib_fifo_resize (void *v_old, uword n_new_elts, uword elt_bytes)
+{
+ void *v_new, *end, *head;
+ uword n_old_elts, header_bytes;
+ uword n_copy_bytes, n_zero_bytes;
+ clib_fifo_header_t *f_new, *f_old;
+
+ n_old_elts = clib_fifo_elts (v_old);
+ n_new_elts += n_old_elts;
+ if (n_new_elts < 32)
+ n_new_elts = 32;
+ else
+ n_new_elts = max_pow2 (n_new_elts);
+
+ header_bytes = vec_header_bytes (sizeof (clib_fifo_header_t));
+
+ v_new = clib_mem_alloc_no_fail (n_new_elts * elt_bytes + header_bytes);
+ v_new += header_bytes;
+
+ f_new = clib_fifo_header (v_new);
+ f_new->head_index = 0;
+ f_new->tail_index = n_old_elts;
+ _vec_len (v_new) = n_new_elts;
+
+ /* Copy old -> new. */
+ n_copy_bytes = n_old_elts * elt_bytes;
+ if (n_copy_bytes > 0)
+ {
+ f_old = clib_fifo_header (v_old);
+ end = v_old + _vec_len (v_old) * elt_bytes;
+ head = v_old + f_old->head_index * elt_bytes;
+
+ if (head + n_copy_bytes >= end)
+ {
+ uword n = end - head;
+ clib_memcpy (v_new, head, n);
+ clib_memcpy (v_new + n, v_old, n_copy_bytes - n);
+ }
+ else
+ clib_memcpy (v_new, head, n_copy_bytes);
+ }
+
+ /* Zero empty space. */
+ n_zero_bytes = (n_new_elts - n_old_elts) * elt_bytes;
+ memset (v_new + n_copy_bytes, 0, n_zero_bytes);
+
+ clib_fifo_free (v_old);
+
+ return v_new;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/fifo.h b/src/vppinfra/fifo.h
new file mode 100644
index 00000000..b0b35e25
--- /dev/null
+++ b/src/vppinfra/fifo.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_fifo_h
+#define included_fifo_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h> /* for ASSERT */
+#include <vppinfra/vec.h>
+
+typedef struct
+{
+ /* First index of valid data in fifo. */
+ u32 head_index;
+
+ /* One beyond last index in fifo. */
+ u32 tail_index;
+} clib_fifo_header_t;
+
+always_inline clib_fifo_header_t *
+clib_fifo_header (void *f)
+{
+ return vec_header (f, sizeof (clib_fifo_header_t));
+}
+
+/* Aliases. */
+#define clib_fifo_len(v) vec_len(v)
+#define _clib_fifo_len(v) _vec_len(v)
+#define clib_fifo_end(v) vec_end(v)
+
+always_inline uword
+clib_fifo_elts (void *v)
+{
+ word l, r;
+ clib_fifo_header_t *f = clib_fifo_header (v);
+
+ if (!v)
+ return 0;
+
+ l = _clib_fifo_len (v);
+ r = (word) f->tail_index - (word) f->head_index;
+ r = r < 0 ? r + l : r;
+ ASSERT (r >= 0 && r <= l);
+ return r;
+}
+
+always_inline uword
+clib_fifo_free_elts (void *v)
+{
+ return clib_fifo_len (v) - clib_fifo_elts (v);
+}
+
+always_inline void
+clib_fifo_reset (void *v)
+{
+ clib_fifo_header_t *f = clib_fifo_header (v);
+ if (v)
+ {
+ f->head_index = f->tail_index = 0;
+ _vec_len (v) = 0;
+ }
+}
+
+/* External resize function. */
+void *_clib_fifo_resize (void *v, uword n_elts, uword elt_bytes);
+
+#define clib_fifo_resize(f,n_elts) \
+ f = _clib_fifo_resize ((f), (n_elts), sizeof ((f)[0]))
+
+always_inline void *
+_clib_fifo_validate (void *v, uword n_elts, uword elt_bytes)
+{
+ if (clib_fifo_free_elts (v) < n_elts)
+ v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ return v;
+}
+
+#define clib_fifo_validate(f,n_elts) \
+ f = _clib_fifo_validate ((f), (n_elts), sizeof (f[0]))
+
+/* Advance tail pointer by N_ELTS which can be either positive or negative. */
+always_inline void *
+_clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
+ uword * tail_return)
+{
+ word i, l, n_free;
+ clib_fifo_header_t *f;
+
+ n_free = clib_fifo_free_elts (v);
+ if (n_free < n_elts)
+ {
+ v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ n_free = clib_fifo_free_elts (v);
+ }
+
+ ASSERT (n_free >= n_elts);
+ n_free -= n_elts;
+
+ f = clib_fifo_header (v);
+ l = _clib_fifo_len (v);
+ i = f->tail_index;
+
+ if (n_free == 0)
+ {
+ /* Mark fifo full. */
+ f->tail_index = f->head_index + l;
+ }
+ else
+ {
+ word n = f->tail_index + n_elts;
+ if (n >= l)
+ n -= l;
+ else if (n < 0)
+ n += l;
+ ASSERT (n >= 0 && n < l);
+ f->tail_index = n;
+ }
+
+ ASSERT (clib_fifo_free_elts (v) == n_free);
+
+ if (tail_return)
+ *tail_return = n_elts > 0 ? i : f->tail_index;
+
+ return v;
+}
+
+#define clib_fifo_advance_tail(f,n_elts) \
+({ \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), (n_elts), sizeof ((f)[0]), &_i); \
+ (f) + _i; \
+})
+
+always_inline uword
+clib_fifo_advance_head (void *v, uword n_elts)
+{
+ clib_fifo_header_t *f;
+ uword l, i, n;
+
+ ASSERT (clib_fifo_elts (v) >= n_elts);
+ f = clib_fifo_header (v);
+ l = _clib_fifo_len (v);
+
+ /* If fifo was full, restore tail pointer. */
+ if (f->tail_index == f->head_index + l)
+ f->tail_index = f->head_index;
+
+ n = i = f->head_index;
+ n += n_elts;
+ n = n >= l ? n - l : n;
+ ASSERT (n < l);
+ f->head_index = n;
+
+ return i;
+}
+
+/* Add given element to fifo. */
+#define clib_fifo_add1(f,e) \
+do { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
+ (f)[_i] = (e); \
+} while (0)
+
+/* Add element to fifo; return pointer to new element. */
+#define clib_fifo_add2(f,p) \
+do { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
+ (p) = (f) + _i; \
+} while (0)
+
+/* Add several elements to fifo. */
+#define clib_fifo_add(f,e,n) \
+do { \
+ uword _i, _l; word _n0, _n1; \
+ \
+ _n0 = (n); \
+ (f) = _clib_fifo_advance_tail ((f), _n0, sizeof ((f)[0]), &_i); \
+ _l = clib_fifo_len (f); \
+ _n1 = _i + _n0 - _l; \
+ _n1 = _n1 < 0 ? 0 : _n1; \
+ _n0 -= _n1; \
+ clib_memcpy ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
+ if (_n1) \
+ clib_memcpy ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
+} while (0)
+
+/* Subtract element from fifo. */
+#define clib_fifo_sub1(f,e) \
+do { \
+ uword _i; \
+ ASSERT (clib_fifo_elts (f) >= 1); \
+ _i = clib_fifo_advance_head ((f), 1); \
+ (e) = (f)[_i]; \
+} while (0)
+
+#define clib_fifo_sub2(f,p) \
+do { \
+ uword _i; \
+ ASSERT (clib_fifo_elts (f) >= 1); \
+ _i = clib_fifo_advance_head ((f), 1); \
+ (p) = (f) + _i; \
+} while (0)
+
+always_inline uword
+clib_fifo_head_index (void *v)
+{
+ clib_fifo_header_t *f = clib_fifo_header (v);
+ return v ? f->head_index : 0;
+}
+
+always_inline uword
+clib_fifo_tail_index (void *v)
+{
+ clib_fifo_header_t *f = clib_fifo_header (v);
+ return v ? f->tail_index : 0;
+}
+
+#define clib_fifo_head(v) ((v) + clib_fifo_head_index (v))
+#define clib_fifo_tail(v) ((v) + clib_fifo_tail_index (v))
+
+#define clib_fifo_free(f) vec_free_h((f),sizeof(clib_fifo_header_t))
+
+always_inline uword
+clib_fifo_elt_index (void *v, uword i)
+{
+ clib_fifo_header_t *f = clib_fifo_header (v);
+ uword result = 0;
+
+ ASSERT (i < clib_fifo_elts (v));
+
+ if (v)
+ {
+ result = f->head_index + i;
+ if (result >= _vec_len (v))
+ result -= _vec_len (v);
+ }
+
+ return result;
+}
+
+#define clib_fifo_elt_at_index(v,i) ((v) + clib_fifo_elt_index (v, (i)))
+
+#define clib_fifo_foreach(v,f,body) \
+do { \
+ uword _i, _l, _n; \
+ \
+ _i = clib_fifo_head_index (f); \
+ _l = clib_fifo_len (f); \
+ _n = clib_fifo_elts (f); \
+ while (_n > 0) \
+ { \
+ (v) = (f) + _i; \
+ do { body; } while (0); \
+ _n--; \
+ _i++; \
+ _i = _i >= _l ? 0 : _i; \
+ } \
+} while (0)
+
+#endif /* included_fifo_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h
new file mode 100644
index 00000000..69facea9
--- /dev/null
+++ b/src/vppinfra/file.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * file.h: unix file handling
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_clib_file_h
+#define included_clib_file_h
+
+#include <vppinfra/socket.h>
+#include <termios.h>
+
+
+struct clib_file;
+typedef clib_error_t *(clib_file_function_t) (struct clib_file * f);
+
+typedef struct clib_file
+{
+ /* Unix file descriptor from open/socket. */
+ u32 file_descriptor;
+
+ u32 flags;
+#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
+#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1)
+
+ /* Data available for function's use. */
+ uword private_data;
+
+ /* Functions to be called when read/write data becomes ready. */
+ clib_file_function_t *read_function, *write_function, *error_function;
+} clib_file_t;
+
+typedef enum
+{
+ UNIX_FILE_UPDATE_ADD,
+ UNIX_FILE_UPDATE_MODIFY,
+ UNIX_FILE_UPDATE_DELETE,
+} unix_file_update_type_t;
+
+typedef struct
+{
+ /* Pool of files to poll for input/output. */
+ clib_file_t *file_pool;
+
+ void (*file_update) (clib_file_t * file,
+ unix_file_update_type_t update_type);
+
+} clib_file_main_t;
+
+always_inline uword
+clib_file_add (clib_file_main_t * um, clib_file_t * template)
+{
+ clib_file_t *f;
+ pool_get (um->file_pool, f);
+ f[0] = template[0];
+ um->file_update (f, UNIX_FILE_UPDATE_ADD);
+ return f - um->file_pool;
+}
+
+always_inline void
+clib_file_del (clib_file_main_t * um, clib_file_t * f)
+{
+ um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ close (f->file_descriptor);
+ f->file_descriptor = ~0;
+ pool_put (um->file_pool, f);
+}
+
+always_inline void
+clib_file_del_by_index (clib_file_main_t * um, uword index)
+{
+ clib_file_t *uf;
+ uf = pool_elt_at_index (um->file_pool, index);
+ clib_file_del (um, uf);
+}
+
+always_inline uword
+clib_file_set_data_available_to_write (clib_file_main_t * um,
+ u32 clib_file_index,
+ uword is_available)
+{
+ clib_file_t *uf = pool_elt_at_index (um->file_pool, clib_file_index);
+ uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ if ((was_available != 0) != (is_available != 0))
+ {
+ uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return was_available != 0;
+}
+
+
+#endif /* included_clib_file_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/format.c b/src/vppinfra/format.c
new file mode 100644
index 00000000..70292c04
--- /dev/null
+++ b/src/vppinfra/format.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*------------------------------------------------------------------
+ * format.c -- see notice below
+ *
+ * October 2003, Eliot Dresselhaus
+ *
+ * Modifications to this file Copyright (c) 2003 by cisco Systems, Inc.
+ * All rights reserved.
+ *------------------------------------------------------------------
+ */
+
+/*
+ Copyright (c) 2001, 2002, 2003, 2006 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <stdarg.h> /* va_start, etc */
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdio.h>
+#endif
+
+#ifdef CLIB_STANDALONE
+#include <vppinfra/standalone_stdio.h>
+#endif
+
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h>
+#include <vppinfra/string.h>
+#include <vppinfra/os.h> /* os_puts */
+#include <vppinfra/math.h>
+
+typedef struct
+{
+ /* Output number in this base. */
+ u8 base;
+
+ /* Number of show of 64 bit number. */
+ u8 n_bits;
+
+ /* Signed or unsigned. */
+ u8 is_signed;
+
+ /* Output digits uppercase (not lowercase) %X versus %x. */
+ u8 uppercase_digits;
+} format_integer_options_t;
+
+static u8 *format_integer (u8 * s, u64 number,
+ format_integer_options_t * options);
+static u8 *format_float (u8 * s, f64 x, uword n_digits_to_print,
+ uword output_style);
+
+typedef struct
+{
+ /* String justification: + => right, - => left, = => center. */
+ uword justify;
+
+ /* Width of string (before and after decimal point for numbers).
+ 0 => natural width. */
+ uword width[2];
+
+ /* Long => 'l', long long 'L', int 0. */
+ uword how_long;
+
+ /* Pad character. Defaults to space. */
+ uword pad_char;
+} format_info_t;
+
+static u8 *
+justify (u8 * s, format_info_t * fi, uword s_len_orig)
+{
+ uword i0, l0, l1;
+
+ i0 = s_len_orig;
+ l0 = i0 + fi->width[0];
+ l1 = vec_len (s);
+
+ /* If width is zero user returned width. */
+ if (l0 == i0)
+ l0 = l1;
+
+ if (l1 > l0)
+ _vec_len (s) = l0;
+ else if (l0 > l1)
+ {
+ uword n = l0 - l1;
+ uword n_left = 0, n_right = 0;
+
+ switch (fi->justify)
+ {
+ case '-':
+ n_right = n;
+ break;
+
+ case '+':
+ n_left = n;
+ break;
+
+ case '=':
+ n_right = n_left = n / 2;
+ if (n % 2)
+ n_left++;
+ break;
+ }
+ if (n_left > 0)
+ {
+ vec_insert (s, n_left, i0);
+ memset (s + i0, fi->pad_char, n_left);
+ l1 = vec_len (s);
+ }
+ if (n_right > 0)
+ {
+ vec_resize (s, n_right);
+ memset (s + l1, fi->pad_char, n_right);
+ }
+ }
+ return s;
+}
+
+static const u8 *
+do_percent (u8 ** _s, const u8 * fmt, va_list * va)
+{
+ u8 *s = *_s;
+ uword c;
+
+ const u8 *f = fmt;
+
+ format_info_t fi = {
+ .justify = '+',
+ .width = {0},
+ .pad_char = ' ',
+ .how_long = 0,
+ };
+
+ uword i;
+
+ ASSERT (f[0] == '%');
+
+ switch (c = *++f)
+ {
+ case '%':
+ /* %% => % */
+ vec_add1 (s, c);
+ f++;
+ goto done;
+
+ case '-':
+ case '+':
+ case '=':
+ fi.justify = c;
+ c = *++f;
+ break;
+ }
+
+ /* Parse width0 . width1. */
+ {
+ uword is_first_digit = 1;
+
+ fi.width[0] = fi.width[1] = 0;
+ for (i = 0; i < 2; i++)
+ {
+ if (c == '0' && i == 0 && is_first_digit)
+ fi.pad_char = '0';
+ is_first_digit = 0;
+ if (c == '*')
+ {
+ fi.width[i] = va_arg (*va, int);
+ c = *++f;
+ }
+ else
+ {
+ while (c >= '0' && c <= '9')
+ {
+ fi.width[i] = 10 * fi.width[i] + (c - '0');
+ c = *++f;
+ }
+ }
+ if (c != '.')
+ break;
+ c = *++f;
+ }
+ }
+
+ /* Parse %l* and %L* */
+ switch (c)
+ {
+ case 'w':
+ /* word format. */
+ fi.how_long = 'w';
+ c = *++f;
+ break;
+
+ case 'L':
+ case 'l':
+ fi.how_long = c;
+ c = *++f;
+ if (c == 'l' && *f == 'l')
+ {
+ fi.how_long = 'L';
+ c = *++f;
+ }
+ break;
+ }
+
+ /* Finally we are ready for format letter. */
+ if (c != 0)
+ {
+ uword s_initial_len = vec_len (s);
+ format_integer_options_t o = {
+ .is_signed = 0,
+ .base = 10,
+ .n_bits = BITS (uword),
+ .uppercase_digits = 0,
+ };
+
+ f++;
+
+ switch (c)
+ {
+ default:
+ {
+ /* Try to give a helpful error message. */
+ vec_free (s);
+ s = format (s, "**** CLIB unknown format `%%%c' ****", c);
+ goto done;
+ }
+
+ case 'c':
+ vec_add1 (s, va_arg (*va, int));
+ break;
+
+ case 'p':
+ vec_add1 (s, '0');
+ vec_add1 (s, 'x');
+
+ o.is_signed = 0;
+ o.n_bits = BITS (uword *);
+ o.base = 16;
+ o.uppercase_digits = 0;
+
+ s = format_integer (s, pointer_to_uword (va_arg (*va, void *)), &o);
+ break;
+
+ case 'x':
+ case 'X':
+ case 'u':
+ case 'd':
+ {
+ u64 number;
+
+ o.base = 10;
+ if (c == 'x' || c == 'X')
+ o.base = 16;
+ o.is_signed = c == 'd';
+ o.uppercase_digits = c == 'X';
+
+ switch (fi.how_long)
+ {
+ case 'L':
+ number = va_arg (*va, unsigned long long);
+ o.n_bits = BITS (unsigned long long);
+ break;
+
+ case 'l':
+ number = va_arg (*va, long);
+ o.n_bits = BITS (long);
+ break;
+
+ case 'w':
+ number = va_arg (*va, word);
+ o.n_bits = BITS (uword);
+ break;
+
+ default:
+ number = va_arg (*va, int);
+ o.n_bits = BITS (int);
+ break;
+ }
+
+ s = format_integer (s, number, &o);
+ }
+ break;
+
+ case 's':
+ case 'S':
+ {
+ char *cstring = va_arg (*va, char *);
+ uword len;
+
+ if (!cstring)
+ {
+ cstring = "(nil)";
+ len = 5;
+ }
+ else if (fi.width[1] != 0)
+ len = clib_min (strlen (cstring), fi.width[1]);
+ else
+ len = strlen (cstring);
+
+ /* %S => format string as C identifier (replace _ with space). */
+ if (c == 'S')
+ {
+ for (i = 0; i < len; i++)
+ vec_add1 (s, cstring[i] == '_' ? ' ' : cstring[i]);
+ }
+ else
+ vec_add (s, cstring, len);
+ }
+ break;
+
+ case 'v':
+ {
+ u8 *v = va_arg (*va, u8 *);
+ uword len;
+
+ if (fi.width[1] != 0)
+ len = clib_min (vec_len (v), fi.width[1]);
+ else
+ len = vec_len (v);
+
+ vec_add (s, v, len);
+ }
+ break;
+
+ case 'f':
+ case 'g':
+ case 'e':
+ /* Floating point. */
+ ASSERT (fi.how_long == 0 || fi.how_long == 'l');
+ s = format_float (s, va_arg (*va, double), fi.width[1], c);
+ break;
+
+ case 'U':
+ /* User defined function. */
+ {
+ typedef u8 *(user_func_t) (u8 * s, va_list * args);
+ user_func_t *u = va_arg (*va, user_func_t *);
+
+ s = (*u) (s, va);
+ }
+ break;
+ }
+
+ s = justify (s, &fi, s_initial_len);
+ }
+
+done:
+ *_s = s;
+ return f;
+}
+
+u8 *
+va_format (u8 * s, const char *fmt, va_list * va)
+{
+ const u8 *f = (u8 *) fmt, *g;
+ u8 c;
+
+ g = f;
+ while (1)
+ {
+ c = *f;
+
+ if (!c)
+ break;
+
+ if (c == '%')
+ {
+ if (f > g)
+ vec_add (s, g, f - g);
+ f = g = do_percent (&s, f, va);
+ }
+ else
+ {
+ f++;
+ }
+ }
+
+ if (f > g)
+ vec_add (s, g, f - g);
+
+ return s;
+}
+
+u8 *
+format (u8 * s, const char *fmt, ...)
+{
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+ return s;
+}
+
+word
+va_fformat (FILE * f, char *fmt, va_list * va)
+{
+ word ret;
+ u8 *s;
+
+ s = va_format (0, fmt, va);
+
+#ifdef CLIB_UNIX
+ if (f)
+ {
+ ret = fwrite (s, vec_len (s), 1, f);
+ }
+ else
+#endif /* CLIB_UNIX */
+ {
+ ret = 0;
+ os_puts (s, vec_len (s), /* is_error */ 0);
+ }
+
+ vec_free (s);
+ return ret;
+}
+
+word
+fformat (FILE * f, char *fmt, ...)
+{
+ va_list va;
+ word ret;
+
+ va_start (va, fmt);
+ ret = va_fformat (f, fmt, &va);
+ va_end (va);
+
+ return (ret);
+}
+
+#ifdef CLIB_UNIX
+word
+fdformat (int fd, char *fmt, ...)
+{
+ word ret;
+ u8 *s;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ ret = write (fd, s, vec_len (s));
+ vec_free (s);
+ return ret;
+}
+#endif
+
+/* Format integral type. */
+static u8 *
+format_integer (u8 * s, u64 number, format_integer_options_t * options)
+{
+ u64 q;
+ u32 r;
+ u8 digit_buffer[128];
+ u8 *d = digit_buffer + sizeof (digit_buffer);
+ word c, base;
+
+ if (options->is_signed && (i64) number < 0)
+ {
+ number = -number;
+ vec_add1 (s, '-');
+ }
+
+ if (options->n_bits < BITS (number))
+ number &= ((u64) 1 << options->n_bits) - 1;
+
+ base = options->base;
+
+ while (1)
+ {
+ q = number / base;
+ r = number % base;
+
+ if (r < 10 + 26 + 26)
+ {
+ if (r < 10)
+ c = '0' + r;
+ else if (r < 10 + 26)
+ c = 'a' + (r - 10);
+ else
+ c = 'A' + (r - 10 - 26);
+
+ if (options->uppercase_digits
+ && base <= 10 + 26 && c >= 'a' && c <= 'z')
+ c += 'A' - 'a';
+
+ *--d = c;
+ }
+ else /* will never happen, warning be gone */
+ {
+ *--d = '?';
+ }
+
+ if (q == 0)
+ break;
+
+ number = q;
+ }
+
+ vec_add (s, d, digit_buffer + sizeof (digit_buffer) - d);
+ return s;
+}
+
+/* Floating point formatting. */
+/* Deconstruct IEEE 64 bit number into sign exponent and fraction. */
+#define f64_down(f,sign,expon,fraction) \
+do { \
+ union { u64 u; f64 f; } _f64_down_tmp; \
+ _f64_down_tmp.f = (f); \
+ (sign) = (_f64_down_tmp.u >> 63); \
+ (expon) = ((_f64_down_tmp.u >> 52) & 0x7ff) - 1023; \
+ (fraction) = ((_f64_down_tmp.u << 12) >> 12) | ((u64) 1 << 52); \
+} while (0)
+
+/* Construct IEEE 64 bit number. */
+static f64
+f64_up (uword sign, word expon, u64 fraction)
+{
+ union
+ {
+ u64 u;
+ f64 f;
+ } tmp;
+
+ tmp.u = (u64) ((sign) != 0) << 63;
+
+ expon += 1023;
+ if (expon > 1023)
+ expon = 1023;
+ if (expon < 0)
+ expon = 0;
+ tmp.u |= (u64) expon << 52;
+
+ tmp.u |= fraction & (((u64) 1 << 52) - 1);
+
+ return tmp.f;
+}
+
+/* Returns approximate precision of number given its exponent. */
+static f64
+f64_precision (int base2_expon)
+{
+ static int n_bits = 0;
+
+ if (!n_bits)
+ {
+ /* Compute number of significant bits in floating point representation. */
+ f64 one = 0;
+ f64 small = 1;
+
+ while (one != 1)
+ {
+ small *= .5;
+ n_bits++;
+ one = 1 + small;
+ }
+ }
+
+ return f64_up (0, base2_expon - n_bits, 0);
+}
+
+/* Return x 10^n */
+static f64
+times_power_of_ten (f64 x, int n)
+{
+ if (n >= 0)
+ {
+ static f64 t[8] = { 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, };
+ while (n >= 8)
+ {
+ x *= 1e+8;
+ n -= 8;
+ }
+ return x * t[n];
+ }
+ else
+ {
+ static f64 t[8] = { 1e-0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, };
+ while (n <= -8)
+ {
+ x *= 1e-8;
+ n += 8;
+ }
+ return x * t[-n];
+ }
+
+}
+
+/* Write x = y * 10^expon with 1 < y < 10. */
+static f64
+normalize (f64 x, word * expon_return, f64 * prec_return)
+{
+ word expon2, expon10;
+ CLIB_UNUSED (u64 fraction);
+ CLIB_UNUSED (word sign);
+ f64 prec;
+
+ f64_down (x, sign, expon2, fraction);
+
+ expon10 =
+ .5 +
+ expon2 * .301029995663981195213738894724493 /* Log (2) / Log (10) */ ;
+
+ prec = f64_precision (expon2);
+ x = times_power_of_ten (x, -expon10);
+ prec = times_power_of_ten (prec, -expon10);
+
+ while (x < 1)
+ {
+ x *= 10;
+ prec *= 10;
+ expon10--;
+ }
+
+ while (x > 10)
+ {
+ x *= .1;
+ prec *= .1;
+ expon10++;
+ }
+
+ if (x + prec >= 10)
+ {
+ x = 1;
+ expon10++;
+ }
+
+ *expon_return = expon10;
+ *prec_return = prec;
+
+ return x;
+}
+
+static u8 *
+add_some_zeros (u8 * s, uword n_zeros)
+{
+ while (n_zeros > 0)
+ {
+ vec_add1 (s, '0');
+ n_zeros--;
+ }
+ return s;
+}
+
+/* Format a floating point number with the given number of fractional
+ digits (e.g. 1.2345 with 2 fraction digits yields "1.23") and output style. */
+static u8 *
+format_float (u8 * s, f64 x, uword n_fraction_digits, uword output_style)
+{
+ f64 prec;
+ word sign, expon, n_fraction_done, added_decimal_point;
+ /* Position of decimal point relative to where we are. */
+ word decimal_point;
+
+ /* Default number of digits to print when its not specified. */
+ if (n_fraction_digits == ~0)
+ n_fraction_digits = 7;
+ n_fraction_done = 0;
+ decimal_point = 0;
+ added_decimal_point = 0;
+ sign = expon = 0;
+
+ /* Special case: zero. */
+ if (x == 0)
+ {
+ do_zero:
+ vec_add1 (s, '0');
+ goto done;
+ }
+
+ if (x < 0)
+ {
+ x = -x;
+ sign = 1;
+ }
+
+ /* Check for not-a-number. */
+ if (isnan (x))
+ return format (s, "%cNaN", sign ? '-' : '+');
+
+ /* Check for infinity. */
+ if (isinf (x))
+ return format (s, "%cinfinity", sign ? '-' : '+');
+
+ x = normalize (x, &expon, &prec);
+
+ /* Not enough digits to print anything: so just print 0 */
+ if ((word) - expon > (word) n_fraction_digits
+ && (output_style == 'f' || (output_style == 'g')))
+ goto do_zero;
+
+ if (sign)
+ vec_add1 (s, '-');
+
+ if (output_style == 'f'
+ || (output_style == 'g' && expon > -10 && expon < 10))
+ {
+ if (expon < 0)
+ {
+ /* Add decimal point and leading zeros. */
+ vec_add1 (s, '.');
+ n_fraction_done = clib_min (-(expon + 1), n_fraction_digits);
+ s = add_some_zeros (s, n_fraction_done);
+ decimal_point = -n_fraction_done;
+ added_decimal_point = 1;
+ }
+ else
+ decimal_point = expon + 1;
+ }
+ else
+ {
+ /* Exponential output style. */
+ decimal_point = 1;
+ output_style = 'e';
+ }
+
+ while (1)
+ {
+ uword digit;
+
+ /* Number is smaller than precision: call it zero. */
+ if (x < prec)
+ break;
+
+ digit = x;
+ x -= digit;
+ if (x + prec >= 1)
+ {
+ digit++;
+ x -= 1;
+ }
+
+ /* Round last printed digit. */
+ if (decimal_point <= 0
+ && n_fraction_done + 1 == n_fraction_digits && digit < 9)
+ digit += x >= .5;
+
+ vec_add1 (s, '0' + digit);
+
+ /* Move rightwards towards/away from decimal point. */
+ decimal_point--;
+
+ n_fraction_done += decimal_point < 0;
+ if (decimal_point <= 0 && n_fraction_done >= n_fraction_digits)
+ break;
+
+ if (decimal_point == 0 && x != 0)
+ {
+ vec_add1 (s, '.');
+ added_decimal_point = 1;
+ }
+
+ x *= 10;
+ prec *= 10;
+ }
+
+done:
+ if (decimal_point > 0)
+ {
+ s = add_some_zeros (s, decimal_point);
+ decimal_point = 0;
+ }
+
+ if (n_fraction_done < n_fraction_digits)
+ {
+ if (!added_decimal_point)
+ vec_add1 (s, '.');
+ s = add_some_zeros (s, n_fraction_digits - n_fraction_done);
+ }
+
+ if (output_style == 'e')
+ s = format (s, "e%wd", expon);
+
+ return s;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/format.h b/src/vppinfra/format.h
new file mode 100644
index 00000000..5b7023a3
--- /dev/null
+++ b/src/vppinfra/format.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_format_h
+#define included_format_h
+
+#include <stdarg.h>
+
+#include <vppinfra/clib.h> /* for CLIB_UNIX, etc. */
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h> /* for ASSERT */
+#include <vppinfra/string.h>
+
+typedef u8 *(format_function_t) (u8 * s, va_list * args);
+
+u8 *va_format (u8 * s, const char *format, va_list * args);
+u8 *format (u8 * s, const char *format, ...);
+
+#ifdef CLIB_UNIX
+
+#include <stdio.h>
+
+#else /* ! CLIB_UNIX */
+
+/* We're not Unix and have not stdio.h */
+#define FILE void
+#define stdin ((FILE *) 0)
+#define stdout ((FILE *) 1)
+#define stderr ((FILE *) 2)
+
+#endif
+
+word va_fformat (FILE * f, char *fmt, va_list * va);
+word fformat (FILE * f, char *fmt, ...);
+word fdformat (int fd, char *fmt, ...);
+
+always_inline uword
+format_get_indent (u8 * s)
+{
+ uword indent = 0;
+ u8 *nl;
+
+ if (!s)
+ return indent;
+
+ nl = vec_end (s) - 1;
+ while (nl >= s)
+ {
+ if (*nl-- == '\n')
+ break;
+ indent++;
+ }
+ return indent;
+}
+
+#define _(f) u8 * f (u8 * s, va_list * va)
+
+/* Standard user-defined formats. */
+_(format_vec32);
+_(format_vec_uword);
+_(format_ascii_bytes);
+_(format_hex_bytes);
+_(format_white_space);
+_(format_f64);
+_(format_time_interval);
+
+#ifdef CLIB_UNIX
+/* Unix specific formats. */
+_(format_address_family);
+_(format_unix_arphrd);
+_(format_unix_interface_flags);
+_(format_network_address);
+_(format_network_protocol);
+_(format_network_port);
+_(format_sockaddr);
+_(format_ip4_tos_byte);
+_(format_ip4_packet);
+_(format_icmp4_type_and_code);
+_(format_ethernet_packet);
+_(format_hostname);
+_(format_timeval);
+_(format_time_float);
+_(format_signal);
+_(format_ucontext_pc);
+#endif
+
+#undef _
+
+/* Unformat. */
+
+typedef struct _unformat_input_t
+{
+ /* Input buffer (vector). */
+ u8 *buffer;
+
+ /* Current index in input buffer. */
+ uword index;
+
+ /* Vector of buffer marks. Used to delineate pieces of the buffer
+ for error reporting and for parse recovery. */
+ uword *buffer_marks;
+
+ /* User's function to fill the buffer when its empty
+ (and argument). */
+ uword (*fill_buffer) (struct _unformat_input_t * i);
+
+ /* Return values for fill buffer function which indicate whether not
+ input has been exhausted. */
+#define UNFORMAT_END_OF_INPUT (~0)
+#define UNFORMAT_MORE_INPUT 0
+
+ /* User controlled argument to fill buffer function. */
+ void *fill_buffer_arg;
+} unformat_input_t;
+
+always_inline void
+unformat_init (unformat_input_t * i,
+ uword (*fill_buffer) (unformat_input_t *),
+ void *fill_buffer_arg)
+{
+ memset (i, 0, sizeof (i[0]));
+ i->fill_buffer = fill_buffer;
+ i->fill_buffer_arg = fill_buffer_arg;
+}
+
+always_inline void
+unformat_free (unformat_input_t * i)
+{
+ vec_free (i->buffer);
+ vec_free (i->buffer_marks);
+ memset (i, 0, sizeof (i[0]));
+}
+
+always_inline uword
+unformat_check_input (unformat_input_t * i)
+{
+ /* Low level fill input function. */
+ extern uword _unformat_fill_input (unformat_input_t * i);
+
+ if (i->index >= vec_len (i->buffer) && i->index != UNFORMAT_END_OF_INPUT)
+ _unformat_fill_input (i);
+
+ return i->index;
+}
+
+/* Return true if input is exhausted */
+always_inline uword
+unformat_is_eof (unformat_input_t * input)
+{
+ return unformat_check_input (input) == UNFORMAT_END_OF_INPUT;
+}
+
+/* Return next element in input vector,
+ possibly calling fill input to get more. */
+always_inline uword
+unformat_get_input (unformat_input_t * input)
+{
+ uword i = unformat_check_input (input);
+ if (i < vec_len (input->buffer))
+ {
+ input->index = i + 1;
+ i = input->buffer[i];
+ }
+ return i;
+}
+
+/* Back up input pointer by one. */
+always_inline void
+unformat_put_input (unformat_input_t * input)
+{
+ input->index -= 1;
+}
+
+/* Peek current input character without advancing. */
+always_inline uword
+unformat_peek_input (unformat_input_t * input)
+{
+ uword c = unformat_get_input (input);
+ if (c != UNFORMAT_END_OF_INPUT)
+ unformat_put_input (input);
+ return c;
+}
+
+/* Skip current input line. */
+always_inline void
+unformat_skip_line (unformat_input_t * i)
+{
+ uword c;
+
+ while ((c = unformat_get_input (i)) != UNFORMAT_END_OF_INPUT && c != '\n')
+ ;
+}
+
+uword unformat_skip_white_space (unformat_input_t * input);
+
+/* Unformat function. */
+typedef uword (unformat_function_t) (unformat_input_t * input,
+ va_list * args);
+
+/* External functions. */
+
+/* General unformatting function with programmable input stream. */
+uword unformat (unformat_input_t * i, const char *fmt, ...);
+
+/* Call user defined parse function.
+ unformat_user (i, f, ...) is equivalent to unformat (i, "%U", f, ...) */
+uword unformat_user (unformat_input_t * input, unformat_function_t * func,
+ ...);
+
+/* Alternate version which allows for extensions. */
+uword va_unformat (unformat_input_t * i, const char *fmt, va_list * args);
+
+/* Setup for unformat of Unix style command line. */
+void unformat_init_command_line (unformat_input_t * input, char *argv[]);
+
+/* Setup for unformat of given string. */
+void unformat_init_string (unformat_input_t * input,
+ char *string, int string_len);
+
+always_inline void
+unformat_init_cstring (unformat_input_t * input, char *string)
+{
+ unformat_init_string (input, string, strlen (string));
+}
+
+/* Setup for unformat of given vector string; vector will be freed by unformat_string. */
+void unformat_init_vector (unformat_input_t * input, u8 * vector_string);
+
+/* Format function for unformat input usable when an unformat error
+ has occurred. */
+u8 *format_unformat_error (u8 * s, va_list * va);
+
+#define unformat_parse_error(input) \
+ clib_error_return (0, "parse error `%U'", format_unformat_error, input)
+
+/* Print all input: not just error context. */
+u8 *format_unformat_input (u8 * s, va_list * va);
+
+/* Unformat (parse) function which reads a %s string and converts it
+ to and unformat_input_t. */
+unformat_function_t unformat_input;
+
+/* Parse a line ending with \n and return it. */
+unformat_function_t unformat_line;
+
+/* Parse a line ending with \n and return it as an unformat_input_t. */
+unformat_function_t unformat_line_input;
+
+/* Parse a token containing given set of characters. */
+unformat_function_t unformat_token;
+
+/* Parses a hexstring into a vector of bytes. */
+unformat_function_t unformat_hex_string;
+
+/* Returns non-zero match if input is exhausted.
+ Useful to ensure that the entire input matches with no trailing junk. */
+unformat_function_t unformat_eof;
+
+/* Parse memory size e.g. 100, 100k, 100m, 100g. */
+unformat_function_t unformat_memory_size;
+
+/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
+u8 *format_memory_size (u8 * s, va_list * va);
+
+/* Format c identifier: e.g. a_name -> "a name". */
+u8 *format_c_identifier (u8 * s, va_list * va);
+
+/* Format hexdump with both hex and printable chars - compatible with text2pcap */
+u8 *format_hexdump (u8 * s, va_list * va);
+
+/* Unix specific formats. */
+#ifdef CLIB_UNIX
+/* Setup input from Unix file. */
+void unformat_init_unix_file (unformat_input_t * input, int file_descriptor);
+
+/* Take input from Unix environment variable; returns
+ 1 if variable exists zero otherwise. */
+uword unformat_init_unix_env (unformat_input_t * input, char *var);
+
+/* Unformat unix group id (gid) specified as integer or string */
+unformat_function_t unformat_unix_gid;
+#endif /* CLIB_UNIX */
+
+/* Test code. */
+int test_format_main (unformat_input_t * input);
+int test_unformat_main (unformat_input_t * input);
+
+/* This is not the right place for this, but putting it in vec.h
+created circular dependency problems. */
+int test_vec_main (unformat_input_t * input);
+
+#endif /* included_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/graph.c b/src/vppinfra/graph.c
new file mode 100644
index 00000000..98a29046
--- /dev/null
+++ b/src/vppinfra/graph.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/graph.h>
+
+/* Set link distance, creating link if not found. */
+u32
+graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance)
+{
+ graph_node_t *src_node, *dst_node;
+ graph_link_t *l;
+ u32 old_distance;
+
+ /* The following validate will not work if src or dst are on the
+ pool free list. */
+ if (src < vec_len (g->nodes))
+ ASSERT (!pool_is_free_index (g->nodes, src));
+ if (dst < vec_len (g->nodes))
+ ASSERT (!pool_is_free_index (g->nodes, dst));
+
+ /* Make new (empty) nodes to make src and dst valid. */
+ pool_validate_index (g->nodes, clib_max (src, dst));
+
+ src_node = pool_elt_at_index (g->nodes, src);
+ dst_node = pool_elt_at_index (g->nodes, dst);
+
+ l = graph_dir_get_link_to_node (&src_node->next, dst);
+ if (l)
+ {
+ old_distance = l->distance;
+ l->distance = distance;
+
+ l = graph_dir_get_link_to_node (&dst_node->prev, src);
+ l->distance = distance;
+ }
+ else
+ {
+ uword li_next, li_prev;
+
+ old_distance = ~0;
+
+ li_next = graph_dir_add_link (&src_node->next, dst, distance);
+ li_prev = graph_dir_add_link (&dst_node->prev, src, distance);
+
+ l = vec_elt_at_index (src_node->next.links, li_next);
+ l->link_to_self_index = li_prev;
+
+ l = vec_elt_at_index (dst_node->prev.links, li_prev);
+ l->link_to_self_index = li_next;
+ }
+
+ return old_distance;
+}
+
+void
+graph_del_link (graph_t * g, u32 src, u32 dst)
+{
+ graph_node_t *src_node, *dst_node;
+
+ src_node = pool_elt_at_index (g->nodes, src);
+ dst_node = pool_elt_at_index (g->nodes, dst);
+
+ graph_dir_del_link (&src_node->next, dst);
+ graph_dir_del_link (&dst_node->next, src);
+}
+
+/* Delete source node and all links from other nodes from/to source. */
+uword
+graph_del_node (graph_t * g, u32 src)
+{
+ graph_node_t *src_node, *n;
+ uword index;
+ graph_link_t *l;
+
+ src_node = pool_elt_at_index (g->nodes, src);
+
+ vec_foreach (l, src_node->next.links)
+ {
+ n = pool_elt_at_index (g->nodes, l->node_index);
+ graph_dir_del_link (&n->prev, src);
+ }
+
+ vec_foreach (l, src_node->prev.links)
+ {
+ n = pool_elt_at_index (g->nodes, l->node_index);
+ graph_dir_del_link (&n->next, src);
+ }
+
+ graph_dir_free (&src_node->next);
+ graph_dir_free (&src_node->prev);
+
+ index = src_node - g->nodes;
+ pool_put (g->nodes, src_node);
+ memset (src_node, ~0, sizeof (src_node[0]));
+
+ return index;
+}
+
+uword
+unformat_graph (unformat_input_t * input, va_list * args)
+{
+ graph_t *g = va_arg (*args, graph_t *);
+ typedef struct
+ {
+ u32 src, dst, distance;
+ } T;
+ T *links = 0, *l;
+ uword result;
+
+ while (1)
+ {
+ vec_add2 (links, l, 1);
+ if (!unformat (input, "%d%d%d", &l->src, &l->dst, &l->distance))
+ break;
+ }
+ _vec_len (links) -= 1;
+ result = vec_len (links) > 0;
+ vec_foreach (l, links)
+ {
+ graph_set_link (g, l->src, l->dst, l->distance);
+ graph_set_link (g, l->dst, l->src, l->distance);
+ }
+
+ vec_free (links);
+ return result;
+}
+
+u8 *
+format_graph_node (u8 * s, va_list * args)
+{
+ graph_t *g = va_arg (*args, graph_t *);
+ u32 node_index = va_arg (*args, u32);
+
+ if (g->format_node)
+ s = format (s, "%U", g->format_node, g, node_index);
+ else
+ s = format (s, "%d", node_index);
+
+ return s;
+}
+
+u8 *
+format_graph (u8 * s, va_list * args)
+{
+ graph_t *g = va_arg (*args, graph_t *);
+ graph_node_t *n;
+ graph_link_t *l;
+ uword indent = format_get_indent (s);
+
+ s = format (s, "graph %d nodes", pool_elts (g->nodes));
+ /* *INDENT-OFF* */
+ pool_foreach (n, g->nodes, ({
+ s = format (s, "\n%U", format_white_space, indent + 2);
+ s = format (s, "%U -> ", format_graph_node, g, n - g->nodes);
+ vec_foreach (l, n->next.links)
+ s = format (s, "%U (%d), ",
+ format_graph_node, g, l->node_index,
+ l->distance);
+ }));
+ /* *INDENT-ON* */
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/graph.h b/src/vppinfra/graph.h
new file mode 100644
index 00000000..1c26118f
--- /dev/null
+++ b/src/vppinfra/graph.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_clib_graph_h
+#define included_clib_graph_h
+
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+
+/* Generic graphs. */
+typedef struct
+{
+ /* Next node along this link. */
+ u32 node_index;
+
+ /* Other direction link index to reach back to current node. */
+ u32 link_to_self_index;
+
+ /* Distance to next node. */
+ u32 distance;
+} graph_link_t;
+
+/* Direction on graph: either next or previous. */
+typedef struct
+{
+ /* Vector of links. */
+ graph_link_t *links;
+
+ /* Hash mapping node index to link which visits this node. */
+ uword *link_index_by_node_index;
+} graph_dir_t;
+
+always_inline void
+graph_dir_free (graph_dir_t * d)
+{
+ vec_free (d->links);
+ hash_free (d->link_index_by_node_index);
+}
+
+always_inline graph_link_t *
+graph_dir_get_link_to_node (graph_dir_t * d, u32 node_index)
+{
+ uword *p = hash_get (d->link_index_by_node_index, node_index);
+ return p ? vec_elt_at_index (d->links, p[0]) : 0;
+}
+
+always_inline uword
+graph_dir_add_link (graph_dir_t * d, u32 node_index, u32 distance)
+{
+ graph_link_t *l;
+ ASSERT (!graph_dir_get_link_to_node (d, node_index));
+ vec_add2 (d->links, l, 1);
+ l->node_index = node_index;
+ l->distance = distance;
+ hash_set (d->link_index_by_node_index, node_index, l - d->links);
+ return l - d->links;
+}
+
+always_inline void
+graph_dir_del_link (graph_dir_t * d, u32 node_index)
+{
+ graph_link_t *l = graph_dir_get_link_to_node (d, node_index);
+ uword li = l - d->links;
+ uword n_links = vec_len (d->links);
+
+ ASSERT (l != 0);
+ hash_unset (d->link_index_by_node_index, node_index);
+ n_links -= 1;
+ if (li < n_links)
+ d->links[li] = d->links[n_links];
+ _vec_len (d->links) = n_links;
+}
+
+typedef struct
+{
+ /* Nodes we are connected to plus distances. */
+ graph_dir_t next, prev;
+} graph_node_t;
+
+typedef struct
+{
+ /* Pool of nodes. */
+ graph_node_t *nodes;
+
+ void *opaque;
+
+ format_function_t *format_node;
+} graph_t;
+
+/* Set link distance, creating link if not found. */
+u32 graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance);
+
+always_inline void
+graph_set_bidirectional_link (graph_t * g, u32 src, u32 dst, u32 distance)
+{
+ graph_set_link (g, src, dst, distance);
+ graph_set_link (g, dst, src, distance);
+}
+
+void graph_del_link (graph_t * g, u32 src, u32 dst);
+uword graph_del_node (graph_t * g, u32 src);
+
+unformat_function_t unformat_graph;
+format_function_t format_graph;
+format_function_t format_graph_node;
+
+#endif /* included_clib_graph_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/hash.c b/src/vppinfra/hash.c
new file mode 100644
index 00000000..062ad882
--- /dev/null
+++ b/src/vppinfra/hash.c
@@ -0,0 +1,1095 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001-2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/byte_order.h> /* for clib_arch_is_big_endian */
+
+always_inline void
+zero_pair (hash_t * h, hash_pair_t * p)
+{
+ memset (p, 0, hash_pair_bytes (h));
+}
+
+always_inline void
+init_pair (hash_t * h, hash_pair_t * p)
+{
+ memset (p->value, ~0, hash_value_bytes (h));
+}
+
+always_inline hash_pair_union_t *
+get_pair (void *v, uword i)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *p;
+ ASSERT (i < vec_len (v));
+ p = v;
+ p += i << h->log2_pair_size;
+ return (hash_pair_union_t *) p;
+}
+
+always_inline void
+set_is_user (void *v, uword i, uword is_user)
+{
+ hash_t *h = hash_header (v);
+ uword i0 = i / BITS (h->is_user[0]);
+ uword i1 = (uword) 1 << (i % BITS (h->is_user[0]));
+ if (is_user)
+ h->is_user[i0] |= i1;
+ else
+ h->is_user[i0] &= ~i1;
+}
+
+static u8 *hash_format_pair_default (u8 * s, va_list * args);
+
+#if uword_bits == 64
+
+static inline u64
+zap64 (u64 x, word n)
+{
+#define _(n) (((u64) 1 << (u64) (8*(n))) - (u64) 1)
+ static u64 masks_little_endian[] = {
+ 0, _(1), _(2), _(3), _(4), _(5), _(6), _(7),
+ };
+ static u64 masks_big_endian[] = {
+ 0, ~_(7), ~_(6), ~_(5), ~_(4), ~_(3), ~_(2), ~_(1),
+ };
+#undef _
+ if (clib_arch_is_big_endian)
+ return x & masks_big_endian[n];
+ else
+ return x & masks_little_endian[n];
+}
+
+static inline u64
+hash_memory64 (void *p, word n_bytes, u64 state)
+{
+ u64 *q = p;
+ u64 a, b, c, n;
+
+ a = b = 0x9e3779b97f4a7c13LL;
+ c = state;
+ n = n_bytes;
+
+ while (n >= 3 * sizeof (u64))
+ {
+ a += clib_mem_unaligned (q + 0, u64);
+ b += clib_mem_unaligned (q + 1, u64);
+ c += clib_mem_unaligned (q + 2, u64);
+ hash_mix64 (a, b, c);
+ n -= 3 * sizeof (u64);
+ q += 3;
+ }
+
+ c += n_bytes;
+ switch (n / sizeof (u64))
+ {
+ case 2:
+ a += clib_mem_unaligned (q + 0, u64);
+ b += clib_mem_unaligned (q + 1, u64);
+ if (n % sizeof (u64))
+ c += zap64 (clib_mem_unaligned (q + 2, u64), n % sizeof (u64)) << 8;
+ break;
+
+ case 1:
+ a += clib_mem_unaligned (q + 0, u64);
+ if (n % sizeof (u64))
+ b += zap64 (clib_mem_unaligned (q + 1, u64), n % sizeof (u64));
+ break;
+
+ case 0:
+ if (n % sizeof (u64))
+ a += zap64 (clib_mem_unaligned (q + 0, u64), n % sizeof (u64));
+ break;
+ }
+
+ hash_mix64 (a, b, c);
+
+ return c;
+}
+
+#else /* if uword_bits == 64 */
+
+static inline u32
+zap32 (u32 x, word n)
+{
+#define _(n) (((u32) 1 << (u32) (8*(n))) - (u32) 1)
+ static u32 masks_little_endian[] = {
+ 0, _(1), _(2), _(3),
+ };
+ static u32 masks_big_endian[] = {
+ 0, ~_(3), ~_(2), ~_(1),
+ };
+#undef _
+ if (clib_arch_is_big_endian)
+ return x & masks_big_endian[n];
+ else
+ return x & masks_little_endian[n];
+}
+
+static inline u32
+hash_memory32 (void *p, word n_bytes, u32 state)
+{
+ u32 *q = p;
+ u32 a, b, c, n;
+
+ a = b = 0x9e3779b9;
+ c = state;
+ n = n_bytes;
+
+ while (n >= 3 * sizeof (u32))
+ {
+ a += clib_mem_unaligned (q + 0, u32);
+ b += clib_mem_unaligned (q + 1, u32);
+ c += clib_mem_unaligned (q + 2, u32);
+ hash_mix32 (a, b, c);
+ n -= 3 * sizeof (u32);
+ q += 3;
+ }
+
+ c += n_bytes;
+ switch (n / sizeof (u32))
+ {
+ case 2:
+ a += clib_mem_unaligned (q + 0, u32);
+ b += clib_mem_unaligned (q + 1, u32);
+ if (n % sizeof (u32))
+ c += zap32 (clib_mem_unaligned (q + 2, u32), n % sizeof (u32)) << 8;
+ break;
+
+ case 1:
+ a += clib_mem_unaligned (q + 0, u32);
+ if (n % sizeof (u32))
+ b += zap32 (clib_mem_unaligned (q + 1, u32), n % sizeof (u32));
+ break;
+
+ case 0:
+ if (n % sizeof (u32))
+ a += zap32 (clib_mem_unaligned (q + 0, u32), n % sizeof (u32));
+ break;
+ }
+
+ hash_mix32 (a, b, c);
+
+ return c;
+}
+#endif
+
+uword
+hash_memory (void *p, word n_bytes, uword state)
+{
+ uword *q = p;
+
+#if uword_bits == 64
+ return hash_memory64 (q, n_bytes, state);
+#else
+ return hash_memory32 (q, n_bytes, state);
+#endif
+}
+
+#if uword_bits == 64
+always_inline uword
+hash_uword (uword x)
+{
+ u64 a, b, c;
+
+ a = b = 0x9e3779b97f4a7c13LL;
+ c = 0;
+ a += x;
+ hash_mix64 (a, b, c);
+ return c;
+}
+#else
+always_inline uword
+hash_uword (uword x)
+{
+ u32 a, b, c;
+
+ a = b = 0x9e3779b9;
+ c = 0;
+ a += x;
+ hash_mix32 (a, b, c);
+ return c;
+}
+#endif
+
+/* Call sum function. Hash code will be sum function value
+ modulo the prime length of the hash table. */
+always_inline uword
+key_sum (hash_t * h, uword key)
+{
+ uword sum;
+ switch (pointer_to_uword ((void *) h->key_sum))
+ {
+ case KEY_FUNC_NONE:
+ sum = hash_uword (key);
+ break;
+
+ case KEY_FUNC_POINTER_UWORD:
+ sum = hash_uword (*uword_to_pointer (key, uword *));
+ break;
+
+ case KEY_FUNC_POINTER_U32:
+ sum = hash_uword (*uword_to_pointer (key, u32 *));
+ break;
+
+ case KEY_FUNC_STRING:
+ sum = string_key_sum (h, key);
+ break;
+
+ default:
+ sum = h->key_sum (h, key);
+ break;
+ }
+
+ return sum;
+}
+
+always_inline uword
+key_equal1 (hash_t * h, uword key1, uword key2, uword e)
+{
+ switch (pointer_to_uword ((void *) h->key_equal))
+ {
+ case KEY_FUNC_NONE:
+ break;
+
+ case KEY_FUNC_POINTER_UWORD:
+ e =
+ *uword_to_pointer (key1, uword *) == *uword_to_pointer (key2,
+ uword *);
+ break;
+
+ case KEY_FUNC_POINTER_U32:
+ e = *uword_to_pointer (key1, u32 *) == *uword_to_pointer (key2, u32 *);
+ break;
+
+ case KEY_FUNC_STRING:
+ e = string_key_equal (h, key1, key2);
+ break;
+
+ default:
+ e = h->key_equal (h, key1, key2);
+ break;
+ }
+ return e;
+}
+
+/* Compares two keys: returns 1 if equal, 0 if not. */
+always_inline uword
+key_equal (hash_t * h, uword key1, uword key2)
+{
+ uword e = key1 == key2;
+ if (CLIB_DEBUG > 0 && key1 == key2)
+ ASSERT (key_equal1 (h, key1, key2, e));
+ if (!e)
+ e = key_equal1 (h, key1, key2, e);
+ return e;
+}
+
+static hash_pair_union_t *
+get_indirect (void *v, hash_pair_indirect_t * pi, uword key)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *p0, *p1;
+
+ p0 = p1 = pi->pairs;
+ if (h->log2_pair_size > 0)
+ p1 = hash_forward (h, p0, indirect_pair_get_len (pi));
+ else
+ p1 += vec_len (p0);
+
+ while (p0 < p1)
+ {
+ if (key_equal (h, p0->key, key))
+ return (hash_pair_union_t *) p0;
+ p0 = hash_forward1 (h, p0);
+ }
+
+ return (hash_pair_union_t *) 0;
+}
+
+static hash_pair_union_t *
+set_indirect_is_user (void *v, uword i, hash_pair_union_t * p, uword key)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *q;
+ hash_pair_indirect_t *pi = &p->indirect;
+ uword log2_bytes = 0;
+
+ if (h->log2_pair_size == 0)
+ q = vec_new (hash_pair_t, 2);
+ else
+ {
+ log2_bytes = 1 + hash_pair_log2_bytes (h);
+ q = clib_mem_alloc (1ULL << log2_bytes);
+ }
+ clib_memcpy (q, &p->direct, hash_pair_bytes (h));
+
+ pi->pairs = q;
+ if (h->log2_pair_size > 0)
+ indirect_pair_set (pi, log2_bytes, 2);
+
+ set_is_user (v, i, 0);
+
+ /* First element is used by existing pair, second will be used by caller. */
+ q = hash_forward1 (h, q);
+ q->key = key;
+ init_pair (h, q);
+ return (hash_pair_union_t *) q;
+}
+
+static hash_pair_union_t *
+set_indirect (void *v, hash_pair_indirect_t * pi, uword key,
+ uword * found_key)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *new_pair;
+ hash_pair_union_t *q;
+
+ q = get_indirect (v, pi, key);
+ if (q)
+ {
+ *found_key = 1;
+ return q;
+ }
+
+ if (h->log2_pair_size == 0)
+ vec_add2 (pi->pairs, new_pair, 1);
+ else
+ {
+ uword len, new_len, log2_bytes;
+
+ len = indirect_pair_get_len (pi);
+ log2_bytes = indirect_pair_get_log2_bytes (pi);
+
+ new_len = len + 1;
+ if (new_len * hash_pair_bytes (h) > (1ULL << log2_bytes))
+ {
+ pi->pairs = clib_mem_realloc (pi->pairs,
+ 1ULL << (log2_bytes + 1),
+ 1ULL << log2_bytes);
+ log2_bytes++;
+ }
+
+ indirect_pair_set (pi, log2_bytes, new_len);
+ new_pair = pi->pairs + (len << h->log2_pair_size);
+ }
+ new_pair->key = key;
+ init_pair (h, new_pair);
+ *found_key = 0;
+ return (hash_pair_union_t *) new_pair;
+}
+
+static void
+unset_indirect (void *v, uword i, hash_pair_t * q)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_union_t *p = get_pair (v, i);
+ hash_pair_t *e;
+ hash_pair_indirect_t *pi = &p->indirect;
+ uword len, is_vec;
+
+ is_vec = h->log2_pair_size == 0;
+
+ ASSERT (!hash_is_user (v, i));
+ len = is_vec ? vec_len (pi->pairs) : indirect_pair_get_len (pi);
+ e = hash_forward (h, pi->pairs, len - 1);
+ ASSERT (q >= pi->pairs && q <= e);
+
+ /* We have two or fewer pairs and we are delete one pair.
+ Make indirect pointer direct and free indirect memory. */
+ if (len <= 2)
+ {
+ hash_pair_t *r = pi->pairs;
+
+ if (len == 2)
+ {
+ clib_memcpy (p, q == r ? hash_forward1 (h, r) : r,
+ hash_pair_bytes (h));
+ set_is_user (v, i, 1);
+ }
+ else
+ zero_pair (h, &p->direct);
+
+ if (is_vec)
+ vec_free (r);
+ else if (r)
+ clib_mem_free (r);
+ }
+ else
+ {
+ /* If deleting a pair we need to keep non-null pairs together. */
+ if (q < e)
+ clib_memcpy (q, e, hash_pair_bytes (h));
+ else
+ zero_pair (h, q);
+ if (is_vec)
+ _vec_len (pi->pairs) -= 1;
+ else
+ indirect_pair_set (pi, indirect_pair_get_log2_bytes (pi), len - 1);
+ }
+}
+
+enum lookup_opcode
+{
+ GET = 1,
+ SET = 2,
+ UNSET = 3,
+};
+
+static hash_pair_t *
+lookup (void *v, uword key, enum lookup_opcode op,
+ void *new_value, void *old_value)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_union_t *p = 0;
+ uword found_key = 0;
+ uword i;
+
+ if (!v)
+ return 0;
+
+ i = key_sum (h, key) & (_vec_len (v) - 1);
+ p = get_pair (v, i);
+
+ if (hash_is_user (v, i))
+ {
+ found_key = key_equal (h, p->direct.key, key);
+ if (found_key)
+ {
+ if (op == UNSET)
+ {
+ set_is_user (v, i, 0);
+ if (old_value)
+ clib_memcpy (old_value, p->direct.value,
+ hash_value_bytes (h));
+ zero_pair (h, &p->direct);
+ }
+ }
+ else
+ {
+ if (op == SET)
+ p = set_indirect_is_user (v, i, p, key);
+ else
+ p = 0;
+ }
+ }
+ else
+ {
+ hash_pair_indirect_t *pi = &p->indirect;
+
+ if (op == SET)
+ {
+ if (!pi->pairs)
+ {
+ p->direct.key = key;
+ set_is_user (v, i, 1);
+ }
+ else
+ p = set_indirect (v, pi, key, &found_key);
+ }
+ else
+ {
+ p = get_indirect (v, pi, key);
+ found_key = p != 0;
+ if (found_key && op == UNSET)
+ {
+ if (old_value)
+ clib_memcpy (old_value, &p->direct.value,
+ hash_value_bytes (h));
+
+ unset_indirect (v, i, &p->direct);
+
+ /* Nullify p (since it's just been deleted).
+ Otherwise we might be tempted to play with it. */
+ p = 0;
+ }
+ }
+ }
+
+ if (op == SET && p != 0)
+ {
+ /* Save away old value for caller. */
+ if (old_value && found_key)
+ clib_memcpy (old_value, &p->direct.value, hash_value_bytes (h));
+ clib_memcpy (&p->direct.value, new_value, hash_value_bytes (h));
+ }
+
+ if (op == SET)
+ h->elts += !found_key;
+ if (op == UNSET)
+ h->elts -= found_key;
+
+ return &p->direct;
+}
+
+/* Fetch value of key. */
+uword *
+_hash_get (void *v, uword key)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *p;
+
+ /* Don't even search table if its empty. */
+ if (!v || h->elts == 0)
+ return 0;
+
+ p = lookup (v, key, GET, 0, 0);
+ if (!p)
+ return 0;
+ if (h->log2_pair_size == 0)
+ return &p->key;
+ else
+ return &p->value[0];
+}
+
+hash_pair_t *
+_hash_get_pair (void *v, uword key)
+{
+ return lookup (v, key, GET, 0, 0);
+}
+
+hash_pair_t *
+hash_next (void *v, hash_next_t * hn)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_t *p;
+
+ while (1)
+ {
+ if (hn->i == 0 && hn->j == 0)
+ {
+ /* Save flags. */
+ hn->f = h->flags;
+
+ /* Prevent others from re-sizing hash table. */
+ h->flags |=
+ (HASH_FLAG_NO_AUTO_GROW
+ | HASH_FLAG_NO_AUTO_SHRINK | HASH_FLAG_HASH_NEXT_IN_PROGRESS);
+ }
+ else if (hn->i >= hash_capacity (v))
+ {
+ /* Restore flags. */
+ h->flags = hn->f;
+ memset (hn, 0, sizeof (hn[0]));
+ return 0;
+ }
+
+ p = hash_forward (h, v, hn->i);
+ if (hash_is_user (v, hn->i))
+ {
+ hn->i++;
+ return p;
+ }
+ else
+ {
+ hash_pair_indirect_t *pi = (void *) p;
+ uword n;
+
+ if (h->log2_pair_size > 0)
+ n = indirect_pair_get_len (pi);
+ else
+ n = vec_len (pi->pairs);
+
+ if (hn->j >= n)
+ {
+ hn->i++;
+ hn->j = 0;
+ }
+ else
+ return hash_forward (h, pi->pairs, hn->j++);
+ }
+ }
+}
+
+/* Remove key from table. */
+void *
+_hash_unset (void *v, uword key, void *old_value)
+{
+ hash_t *h;
+
+ if (!v)
+ return v;
+
+ (void) lookup (v, key, UNSET, 0, old_value);
+
+ h = hash_header (v);
+ if (!(h->flags & HASH_FLAG_NO_AUTO_SHRINK))
+ {
+ /* Resize when 1/4 full. */
+ if (h->elts > 32 && 4 * (h->elts + 1) < vec_len (v))
+ v = hash_resize (v, vec_len (v) / 2);
+ }
+
+ return v;
+}
+
+void *
+_hash_create (uword elts, hash_t * h_user)
+{
+ hash_t *h;
+ uword log2_pair_size;
+ void *v;
+
+ /* Size of hash is power of 2 >= ELTS and larger than
+ number of bits in is_user bitmap elements. */
+ elts = clib_max (elts, BITS (h->is_user[0]));
+ elts = 1ULL << max_log2 (elts);
+
+ log2_pair_size = 1;
+ if (h_user)
+ log2_pair_size = h_user->log2_pair_size;
+
+ v = _vec_resize (0,
+ /* vec len: */ elts,
+ /* data bytes: */
+ (elts << log2_pair_size) * sizeof (hash_pair_t),
+ /* header bytes: */
+ sizeof (h[0]) +
+ (elts / BITS (h->is_user[0])) * sizeof (h->is_user[0]),
+ /* alignment */ sizeof (hash_pair_t));
+ h = hash_header (v);
+
+ if (h_user)
+ h[0] = h_user[0];
+
+ h->log2_pair_size = log2_pair_size;
+ h->elts = 0;
+
+ /* Default flags to never shrinking hash tables.
+ Shrinking tables can cause "jackpot" cases. */
+ if (!h_user)
+ h->flags = HASH_FLAG_NO_AUTO_SHRINK;
+
+ if (!h->format_pair)
+ {
+ h->format_pair = hash_format_pair_default;
+ h->format_pair_arg = 0;
+ }
+
+ return v;
+}
+
+void *
+_hash_free (void *v)
+{
+ hash_t *h = hash_header (v);
+ hash_pair_union_t *p;
+ uword i;
+
+ if (!v)
+ return v;
+
+ /* We zero all freed memory in case user would be tempted to use it. */
+ for (i = 0; i < hash_capacity (v); i++)
+ {
+ if (hash_is_user (v, i))
+ continue;
+ p = get_pair (v, i);
+ if (h->log2_pair_size == 0)
+ vec_free (p->indirect.pairs);
+ else if (p->indirect.pairs)
+ clib_mem_free (p->indirect.pairs);
+ }
+
+ vec_free_header (h);
+
+ return 0;
+}
+
+static void *
+hash_resize_internal (void *old, uword new_size, uword free_old)
+{
+ void *new;
+ hash_pair_t *p;
+
+ new = 0;
+ if (new_size > 0)
+ {
+ hash_t *h = old ? hash_header (old) : 0;
+ new = _hash_create (new_size, h);
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, old, {
+ new = _hash_set3 (new, p->key, &p->value[0], 0);
+ });
+ /* *INDENT-ON* */
+ }
+
+ if (free_old)
+ hash_free (old);
+ return new;
+}
+
+void *
+hash_resize (void *old, uword new_size)
+{
+ return hash_resize_internal (old, new_size, 1);
+}
+
+void *
+hash_dup (void *old)
+{
+ return hash_resize_internal (old, vec_len (old), 0);
+}
+
+void *
+_hash_set3 (void *v, uword key, void *value, void *old_value)
+{
+ hash_t *h;
+
+ if (!v)
+ v = hash_create (0, sizeof (uword));
+
+ h = hash_header (v);
+ (void) lookup (v, key, SET, value, old_value);
+
+ if (!(h->flags & HASH_FLAG_NO_AUTO_GROW))
+ {
+ /* Resize when 3/4 full. */
+ if (4 * (h->elts + 1) > 3 * vec_len (v))
+ v = hash_resize (v, 2 * vec_len (v));
+ }
+
+ return v;
+}
+
+uword
+vec_key_sum (hash_t * h, uword key)
+{
+ void *v = uword_to_pointer (key, void *);
+ return hash_memory (v, vec_len (v) * h->user, 0);
+}
+
+uword
+vec_key_equal (hash_t * h, uword key1, uword key2)
+{
+ void *v1 = uword_to_pointer (key1, void *);
+ void *v2 = uword_to_pointer (key2, void *);
+ uword l1 = vec_len (v1);
+ uword l2 = vec_len (v2);
+ return l1 == l2 && 0 == memcmp (v1, v2, l1 * h->user);
+}
+
+u8 *
+vec_key_format_pair (u8 * s, va_list * args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+ void *u = uword_to_pointer (p->key, void *);
+ int i;
+
+ switch (h->user)
+ {
+ case 1:
+ s = format (s, "%v", u);
+ break;
+
+ case 2:
+ {
+ u16 *w = u;
+ for (i = 0; i < vec_len (w); i++)
+ s = format (s, "0x%x, ", w[i]);
+ break;
+ }
+
+ case 4:
+ {
+ u32 *w = u;
+ for (i = 0; i < vec_len (w); i++)
+ s = format (s, "0x%x, ", w[i]);
+ break;
+ }
+
+ case 8:
+ {
+ u64 *w = u;
+ for (i = 0; i < vec_len (w); i++)
+ s = format (s, "0x%Lx, ", w[i]);
+ break;
+ }
+
+ default:
+ s = format (s, "0x%U", format_hex_bytes, u, vec_len (u) * h->user);
+ break;
+ }
+
+ if (hash_value_bytes (h) > 0)
+ s = format (s, " -> 0x%wx", p->value[0]);
+
+ return s;
+}
+
+uword
+mem_key_sum (hash_t * h, uword key)
+{
+ uword *v = uword_to_pointer (key, void *);
+ return hash_memory (v, h->user, 0);
+}
+
+uword
+mem_key_equal (hash_t * h, uword key1, uword key2)
+{
+ void *v1 = uword_to_pointer (key1, void *);
+ void *v2 = uword_to_pointer (key2, void *);
+ return v1 && v2 && 0 == memcmp (v1, v2, h->user);
+}
+
+uword
+string_key_sum (hash_t * h, uword key)
+{
+ char *v = uword_to_pointer (key, char *);
+ return hash_memory (v, strlen (v), 0);
+}
+
+uword
+string_key_equal (hash_t * h, uword key1, uword key2)
+{
+ void *v1 = uword_to_pointer (key1, void *);
+ void *v2 = uword_to_pointer (key2, void *);
+ return v1 && v2 && 0 == strcmp (v1, v2);
+}
+
+u8 *
+string_key_format_pair (u8 * s, va_list * args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+ void *u = uword_to_pointer (p->key, void *);
+
+ s = format (s, "%s", u);
+
+ if (hash_value_bytes (h) > 0)
+ s =
+ format (s, " -> 0x%8U", format_hex_bytes, &p->value[0],
+ hash_value_bytes (h));
+
+ return s;
+}
+
+static u8 *
+hash_format_pair_default (u8 * s, va_list * args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+
+ s = format (s, "0x%08x", p->key);
+ if (hash_value_bytes (h) > 0)
+ s =
+ format (s, " -> 0x%8U", format_hex_bytes, &p->value[0],
+ hash_value_bytes (h));
+ return s;
+}
+
+uword
+hash_bytes (void *v)
+{
+ uword i, bytes;
+ hash_t *h = hash_header (v);
+
+ if (!v)
+ return 0;
+
+ bytes = vec_capacity (v, hash_header_bytes (v));
+
+ for (i = 0; i < hash_capacity (v); i++)
+ {
+ if (!hash_is_user (v, i))
+ {
+ hash_pair_union_t *p = get_pair (v, i);
+ if (h->log2_pair_size > 0)
+ bytes += 1 << indirect_pair_get_log2_bytes (&p->indirect);
+ else
+ bytes += vec_capacity (p->indirect.pairs, 0);
+ }
+ }
+ return bytes;
+}
+
+u8 *
+format_hash (u8 * s, va_list * va)
+{
+ void *v = va_arg (*va, void *);
+ int verbose = va_arg (*va, int);
+ hash_pair_t *p;
+ hash_t *h = hash_header (v);
+ uword i;
+
+ s = format (s, "hash %p, %wd elts, capacity %wd, %wd bytes used,\n",
+ v, hash_elts (v), hash_capacity (v), hash_bytes (v));
+
+ {
+ uword *occupancy = 0;
+
+ /* Count number of buckets with each occupancy. */
+ for (i = 0; i < hash_capacity (v); i++)
+ {
+ uword j;
+
+ if (hash_is_user (v, i))
+ {
+ j = 1;
+ }
+ else
+ {
+ hash_pair_union_t *p = get_pair (v, i);
+ if (h->log2_pair_size > 0)
+ j = indirect_pair_get_len (&p->indirect);
+ else
+ j = vec_len (p->indirect.pairs);
+ }
+
+ vec_validate (occupancy, j);
+ occupancy[j]++;
+ }
+
+ s = format (s, " profile ");
+ for (i = 0; i < vec_len (occupancy); i++)
+ s = format (s, "%wd%c", occupancy[i],
+ i + 1 == vec_len (occupancy) ? '\n' : ' ');
+
+ s = format (s, " lookup # of compares: ");
+ for (i = 1; i < vec_len (occupancy); i++)
+ s = format (s, "%wd: .%03d%c", i,
+ (1000 * i * occupancy[i]) / hash_elts (v),
+ i + 1 == vec_len (occupancy) ? '\n' : ' ');
+
+ vec_free (occupancy);
+ }
+
+ if (verbose)
+ {
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, v, {
+ s = format (s, " %U\n", h->format_pair, h->format_pair_arg, v, p);
+ });
+ /* *INDENT-ON* */
+ }
+
+ return s;
+}
+
+static uword
+unformat_hash_string_internal (unformat_input_t * input,
+ va_list * va, int is_vec)
+{
+ uword *hash = va_arg (*va, uword *);
+ int *result = va_arg (*va, int *);
+ u8 *string = 0;
+ uword *p;
+
+ if (!unformat (input, is_vec ? "%v%_" : "%s%_", &string))
+ return 0;
+
+ p = hash_get_mem (hash, string);
+ if (p)
+ *result = *p;
+
+ vec_free (string);
+ return p ? 1 : 0;
+}
+
+uword
+unformat_hash_vec_string (unformat_input_t * input, va_list * va)
+{
+ return unformat_hash_string_internal (input, va, /* is_vec */ 1);
+}
+
+uword
+unformat_hash_string (unformat_input_t * input, va_list * va)
+{
+ return unformat_hash_string_internal (input, va, /* is_vec */ 0);
+}
+
+clib_error_t *
+hash_validate (void *v)
+{
+ hash_t *h = hash_header (v);
+ uword i, j;
+ uword *keys = 0;
+ clib_error_t *error = 0;
+
+#define CHECK(x) if ((error = ERROR_ASSERT (x))) goto done;
+
+ for (i = 0; i < hash_capacity (v); i++)
+ {
+ hash_pair_union_t *pu = get_pair (v, i);
+
+ if (hash_is_user (v, i))
+ {
+ CHECK (pu->direct.key != 0);
+ vec_add1 (keys, pu->direct.key);
+ }
+ else
+ {
+ hash_pair_t *p;
+ hash_pair_indirect_t *pi = &pu->indirect;
+ uword n;
+
+ n = h->log2_pair_size > 0
+ ? indirect_pair_get_len (pi) : vec_len (pi->pairs);
+
+ for (p = pi->pairs; n-- > 0; p = hash_forward1 (h, p))
+ {
+ /* Assert key uniqueness. */
+ for (j = 0; j < vec_len (keys); j++)
+ CHECK (keys[j] != p->key);
+ vec_add1 (keys, p->key);
+ }
+ }
+ }
+
+ CHECK (vec_len (keys) == h->elts);
+
+ vec_free (keys);
+done:
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/hash.h b/src/vppinfra/hash.h
new file mode 100644
index 00000000..4db5a576
--- /dev/null
+++ b/src/vppinfra/hash.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001-2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_hash_h
+#define included_hash_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/vector.h>
+
+struct hash_header;
+
+typedef uword (hash_key_sum_function_t) (struct hash_header *, uword key);
+typedef uword (hash_key_equal_function_t)
+ (struct hash_header *, uword key1, uword key2);
+
+/* Vector header for hash tables. */
+typedef struct hash_header
+{
+ /* Number of elements in hash table. */
+ uword elts;
+
+ /* Flags as follows. */
+ u32 flags;
+
+ /* Set if user does not want table to auto-resize when sufficiently full. */
+#define HASH_FLAG_NO_AUTO_GROW (1 << 0)
+ /* Set if user does not want table to auto-resize when sufficiently empty. */
+#define HASH_FLAG_NO_AUTO_SHRINK (1 << 1)
+ /* Set when hash_next is in the process of iterating through this hash table. */
+#define HASH_FLAG_HASH_NEXT_IN_PROGRESS (1 << 2)
+
+ u32 log2_pair_size;
+
+ /* Function to compute the "sum" of a hash key.
+ Hash function is this sum modulo the prime size of
+ the hash table (vec_len (v)). */
+ hash_key_sum_function_t *key_sum;
+
+ /* Special values for key_sum "function". */
+#define KEY_FUNC_NONE (0) /*< sum = key */
+#define KEY_FUNC_POINTER_UWORD (1) /*< sum = *(uword *) key */
+#define KEY_FUNC_POINTER_U32 (2) /*< sum = *(u32 *) key */
+#define KEY_FUNC_STRING (3) /*< sum = string_key_sum, etc. */
+
+ /* key comparison function */
+ hash_key_equal_function_t *key_equal;
+
+ /* Hook for user's data. Used to parameterize sum/equal functions. */
+ any user;
+
+ /* Format a (k,v) pair */
+ format_function_t *format_pair;
+
+ /* Format function arg */
+ void *format_pair_arg;
+
+ /* Bit i is set if pair i is a user object (as opposed to being
+ either zero or an indirect array of pairs). */
+ uword is_user[0];
+} hash_t;
+
+/* Hash header size in bytes */
+always_inline uword
+hash_header_bytes (void *v)
+{
+ hash_t *h;
+ uword is_user_bytes =
+ (sizeof (h->is_user[0]) * vec_len (v)) / BITS (h->is_user[0]);
+ return sizeof (h[0]) + is_user_bytes;
+}
+
+/* Returns a pointer to the hash header given the vector pointer */
+always_inline hash_t *
+hash_header (void *v)
+{
+ return vec_header (v, hash_header_bytes (v));
+}
+
+/* Number of elements in the hash table */
+always_inline uword
+hash_elts (void *v)
+{
+ hash_t *h = hash_header (v);
+ return v ? h->elts : 0;
+}
+
+/* Number of elements the hash table can hold */
+always_inline uword
+hash_capacity (void *v)
+{
+ return vec_len (v);
+}
+
+/* Returns 1 if the hash pair contains user data */
+always_inline uword
+hash_is_user (void *v, uword i)
+{
+ hash_t *h = hash_header (v);
+ uword i0 = i / BITS (h->is_user[0]);
+ uword i1 = i % BITS (h->is_user[0]);
+ return (h->is_user[i0] & ((uword) 1 << i1)) != 0;
+}
+
+/* Set the format function and format argument for a hash table */
+always_inline void
+hash_set_pair_format (void *v,
+ format_function_t * format_pair, void *format_pair_arg)
+{
+ hash_t *h = hash_header (v);
+ h->format_pair = format_pair;
+ h->format_pair_arg = format_pair_arg;
+}
+
+/* Set hash table flags */
+always_inline void
+hash_set_flags (void *v, uword flags)
+{
+ hash_header (v)->flags |= flags;
+}
+
+/* Key value pairs. */
+typedef struct
+{
+ /* The Key */
+ uword key;
+
+ /* The Value. Length is 2^log2_pair_size - 1. */
+ uword value[0];
+} hash_pair_t;
+
+/* The indirect pair structure
+
+ If log2_pair_size > 0 we overload hash pairs
+ with indirect pairs for buckets with more than one
+ pair. */
+typedef struct
+{
+ /* pair vector */
+ hash_pair_t *pairs;
+ /* padding */
+ u8 pad[sizeof (uword) - sizeof (hash_pair_t *)];
+ /* allocated length */
+ uword alloc_len;
+}
+hash_pair_indirect_t;
+
+/* Direct / Indirect pair union */
+typedef union
+{
+ hash_pair_t direct;
+ hash_pair_indirect_t indirect;
+} hash_pair_union_t;
+
+#define LOG2_ALLOC_BITS (5)
+#define PAIR_BITS (BITS (uword) - LOG2_ALLOC_BITS)
+
+/* Log2 number of bytes allocated in pairs array. */
+always_inline uword
+indirect_pair_get_log2_bytes (hash_pair_indirect_t * p)
+{
+ return p->alloc_len >> PAIR_BITS;
+}
+
+/* Get the length of an indirect pair */
+always_inline uword
+indirect_pair_get_len (hash_pair_indirect_t * p)
+{
+ if (!p->pairs)
+ return 0;
+ else
+ return p->alloc_len & (((uword) 1 << PAIR_BITS) - 1);
+}
+
+/* Set the length of an indirect pair */
+always_inline void
+indirect_pair_set (hash_pair_indirect_t * p, uword log2_alloc, uword len)
+{
+ ASSERT (len < ((uword) 1 << PAIR_BITS));
+ ASSERT (log2_alloc < ((uword) 1 << LOG2_ALLOC_BITS));
+ p->alloc_len = (log2_alloc << PAIR_BITS) | len;
+}
+
+/* internal routine to fetch value for given key */
+uword *_hash_get (void *v, uword key);
+
+/* internal routine to fetch value (key, value) pair for given key */
+hash_pair_t *_hash_get_pair (void *v, uword key);
+
+/* internal routine to unset a (key, value) pair */
+void *_hash_unset (void *v, uword key, void *old_value);
+
+/* internal routine to set a (key, value) pair, return the old value */
+void *_hash_set3 (void *v, uword key, void *value, void *old_value);
+
+/* Resize a hash table */
+void *hash_resize (void *old, uword new_size);
+
+/* duplicate a hash table */
+void *hash_dup (void *old);
+
+/* Returns the number of bytes used by a hash table */
+uword hash_bytes (void *v);
+
+/* Public macro to set a (key, value) pair, return the old value */
+#define hash_set3(h,key,value,old_value) \
+({ \
+ uword _v = (uword) (value); \
+ (h) = _hash_set3 ((h), (uword) (key), (void *) &_v, (old_value)); \
+})
+
+/* Public macro to fetch value for given key */
+#define hash_get(h,key) _hash_get ((h), (uword) (key))
+
+/* Public macro to fetch value (key, value) pair for given key */
+#define hash_get_pair(h,key) _hash_get_pair ((h), (uword) (key))
+
+/* Public macro to set a (key, value) pair */
+#define hash_set(h,key,value) hash_set3(h,key,value,0)
+
+/* Public macro to set (key, 0) pair */
+#define hash_set1(h,key) (h) = _hash_set3(h,(uword) (key),0,0)
+
+/* Public macro to unset a (key, value) pair */
+#define hash_unset(h,key) ((h) = _hash_unset ((h), (uword) (key),0))
+
+/* Public macro to unset a (key, value) pair, return the old value */
+#define hash_unset3(h,key,old_value) ((h) = _hash_unset ((h), (uword) (key), (void *) (old_value)))
+
+/* get/set/unset for pointer keys. */
+
+/* Public macro to fetch value for given pointer key */
+#define hash_get_mem(h,key) _hash_get ((h), pointer_to_uword (key))
+
+/* Public macro to fetch (key, value) for given pointer key */
+#define hash_get_pair_mem(h,key) _hash_get_pair ((h), pointer_to_uword (key))
+
+/* Public macro to set (key, value) for pointer key */
+#define hash_set_mem(h,key,value) hash_set3 (h, pointer_to_uword (key), (value), 0)
+
+/* Public macro to set (key, 0) for pointer key */
+#define hash_set1_mem(h,key) hash_set3 ((h), pointer_to_uword (key), 0, 0)
+
+/* Public macro to unset (key, value) for pointer key */
+#define hash_unset_mem(h,key) ((h) = _hash_unset ((h), pointer_to_uword (key),0))
+
+/* internal routine to free a hash table */
+extern void *_hash_free (void *v);
+
+/* Public macro to free a hash table */
+#define hash_free(h) (h) = _hash_free ((h))
+
+clib_error_t *hash_validate (void *v);
+
+/* Public inline funcion to get the number of value bytes for a hash table */
+always_inline uword
+hash_value_bytes (hash_t * h)
+{
+ hash_pair_t *p;
+ return (sizeof (p->value[0]) << h->log2_pair_size) - sizeof (p->key);
+}
+
+/* Public inline funcion to get log2(size of a (key,value) pair) */
+always_inline uword
+hash_pair_log2_bytes (hash_t * h)
+{
+ uword log2_bytes = h->log2_pair_size;
+ ASSERT (BITS (hash_pair_t) == 32 || BITS (hash_pair_t) == 64);
+ if (BITS (hash_pair_t) == 32)
+ log2_bytes += 2;
+ else if (BITS (hash_pair_t) == 64)
+ log2_bytes += 3;
+ return log2_bytes;
+}
+
+/* Public inline funcion to get size of a (key,value) pair */
+always_inline uword
+hash_pair_bytes (hash_t * h)
+{
+ return (uword) 1 << hash_pair_log2_bytes (h);
+}
+
+/* Public inline funcion to advance a pointer past one (key,value) pair */
+always_inline void *
+hash_forward1 (hash_t * h, void *v)
+{
+ return (u8 *) v + hash_pair_bytes (h);
+}
+
+/* Public inline funcion to advance a pointer past N (key,value) pairs */
+always_inline void *
+hash_forward (hash_t * h, void *v, uword n)
+{
+ return (u8 *) v + ((n * sizeof (hash_pair_t)) << h->log2_pair_size);
+}
+
+/** Iterate over hash pairs.
+
+ @param p The current (key,value) pair. This should be of type
+ <code>(hash_pair_t *)</code>.
+ @param v The hash table to iterate.
+ @param body The operation to perform on each (key,value) pair.
+
+ Executes the expression or code block @c body with each active hash pair.
+*/
+/* A previous version of this macro made use of the hash_pair_union_t
+ * structure; this version does not since that approach mightily upset
+ * the static analysis tool. In the rare chance someone is reading this
+ * code, pretend that _p below is of type hash_pair_union_t and that when
+ * used as an rvalue it's really using one of the union members as the
+ * rvalue. If you were confused before you might be marginally less
+ * confused after.
+ */
+#define hash_foreach_pair(p,v,body) \
+do { \
+ __label__ _hash_foreach_done; \
+ hash_t * _h = hash_header (v); \
+ void * _p; \
+ hash_pair_t * _q, * _q_end; \
+ uword _i, _i1, _id, _pair_increment; \
+ \
+ _p = (v); \
+ _i = 0; \
+ _pair_increment = 1; \
+ if ((v)) \
+ _pair_increment = 1 << _h->log2_pair_size; \
+ while (_i < hash_capacity (v)) \
+ { \
+ _id = _h->is_user[_i / BITS (_h->is_user[0])]; \
+ _i1 = _i + BITS (_h->is_user[0]); \
+ \
+ do { \
+ if (_id & 1) \
+ { \
+ _q = _p; \
+ _q_end = _q + _pair_increment; \
+ } \
+ else \
+ { \
+ hash_pair_indirect_t * _pi = _p; \
+ _q = _pi->pairs; \
+ if (_h->log2_pair_size > 0) \
+ _q_end = hash_forward (_h, _q, indirect_pair_get_len (_pi)); \
+ else \
+ _q_end = vec_end (_q); \
+ } \
+ \
+ /* Loop through all elements in bucket. \
+ Bucket may have 0 1 or more (indirect case) pairs. */ \
+ while (_q < _q_end) \
+ { \
+ uword _break_in_body = 1; \
+ (p) = _q; \
+ do { \
+ body; \
+ _break_in_body = 0; \
+ } while (0); \
+ if (_break_in_body) \
+ goto _hash_foreach_done; \
+ _q += _pair_increment; \
+ } \
+ \
+ _p = (hash_pair_t *)_p + _pair_increment; \
+ _id = _id / 2; \
+ _i++; \
+ } while (_i < _i1); \
+ } \
+ _hash_foreach_done: \
+ /* Be silent Mr. Compiler-Warning. */ \
+ ; \
+ } while (0)
+
+/* Iterate over key/value pairs
+
+ @param key_var the current key
+ @param value_var the current value
+ @param h the hash table to iterate across
+ @param body the operation to perform on each (key_var,value_var) pair.
+
+ calls body with each active hash pair
+*/
+/* Iteratate over key/value pairs. */
+#define hash_foreach(key_var,value_var,h,body) \
+do { \
+ hash_pair_t * _r; \
+ hash_foreach_pair (_r, (h), { \
+ (key_var) = (__typeof__ (key_var)) _r->key; \
+ (value_var) = (__typeof__ (value_var)) _r->value[0]; \
+ do { body; } while (0); \
+ }); \
+} while (0)
+
+/* Iterate over key/value pairs for pointer key hash tables
+
+ @param key_var the current key
+ @param value_var the current value
+ @param h the hash table to iterate across
+ @param body the operation to perform on each (key_var,value_var) pair.
+
+ calls body with each active hash pair
+*/
+#define hash_foreach_mem(key_var,value_var,h,body) \
+do { \
+ hash_pair_t * _r; \
+ hash_foreach_pair (_r, (h), { \
+ (key_var) = (__typeof__ (key_var)) uword_to_pointer (_r->key, void *); \
+ (value_var) = (__typeof__ (value_var)) _r->value[0]; \
+ do { body; } while (0); \
+ }); \
+} while (0)
+
+/* Support for iteration through hash table. */
+
+/* This struct saves iteration state for hash_next.
+ None of these fields are meant to be visible to the user.
+ Hence, the cryptic short-hand names. */
+typedef struct
+{
+ uword i, j, f;
+} hash_next_t;
+
+hash_pair_t *hash_next (void *v, hash_next_t * hn);
+
+void *_hash_create (uword elts, hash_t * h);
+
+always_inline void
+hash_set_value_bytes (hash_t * h, uword value_bytes)
+{
+ hash_pair_t *p;
+ h->log2_pair_size =
+ max_log2 ((sizeof (p->key) + value_bytes + sizeof (p->key) -
+ 1) / sizeof (p->key));
+}
+
+#define hash_create2(_elts,_user,_value_bytes, \
+ _key_sum,_key_equal, \
+ _format_pair,_format_pair_arg) \
+({ \
+ hash_t _h; \
+ memset (&_h, 0, sizeof (_h)); \
+ _h.user = (_user); \
+ _h.key_sum = (hash_key_sum_function_t *) (_key_sum); \
+ _h.key_equal = (_key_equal); \
+ hash_set_value_bytes (&_h, (_value_bytes)); \
+ _h.format_pair = (format_function_t *) (_format_pair); \
+ _h.format_pair_arg = (_format_pair_arg); \
+ _hash_create ((_elts), &_h); \
+})
+
+/* Hash function based on that of Bob Jenkins (bob_jenkins@compuserve.com).
+ Public domain per: http://www.burtleburtle.net/bob/hash/doobs.html
+ Thanks, Bob. */
+
+#define hash_mix_step(a,b,c,s0,s1,s2) \
+do { \
+ (a) -= (b) + (c); (a) ^= (c) >> (s0); \
+ (b) -= (c) + (a); (b) ^= (a) << (s1); \
+ (c) -= (a) + (b); (c) ^= (b) >> (s2); \
+} while (0)
+
+#define hash_mix32_step_1(a,b,c) hash_mix_step(a,b,c,13,8,13)
+#define hash_mix32_step_2(a,b,c) hash_mix_step(a,b,c,12,16,5)
+#define hash_mix32_step_3(a,b,c) hash_mix_step(a,b,c,3,10,15)
+
+#define hash_mix64_step_1(a,b,c) hash_mix_step(a,b,c,43,9,8)
+#define hash_mix64_step_2(a,b,c) hash_mix_step(a,b,c,38,23,5)
+#define hash_mix64_step_3(a,b,c) hash_mix_step(a,b,c,35,49,11)
+#define hash_mix64_step_4(a,b,c) hash_mix_step(a,b,c,12,18,22)
+
+/* Hash function based on that of Bob Jenkins (bob_jenkins@compuserve.com).
+ Thanks, Bob. */
+#define hash_mix64(a0,b0,c0) \
+do { \
+ hash_mix64_step_1 (a0, b0, c0); \
+ hash_mix64_step_2 (a0, b0, c0); \
+ hash_mix64_step_3 (a0, b0, c0); \
+ hash_mix64_step_4 (a0, b0, c0); \
+} while (0) \
+
+#define hash_mix32(a0,b0,c0) \
+do { \
+ hash_mix32_step_1 (a0, b0, c0); \
+ hash_mix32_step_2 (a0, b0, c0); \
+ hash_mix32_step_3 (a0, b0, c0); \
+} while (0) \
+
+/* Finalize from Bob Jenkins lookup3.c */
+
+always_inline uword
+hash32_rotate_left (u32 x, u32 i)
+{
+ return (x << i) | (x >> (BITS (i) - i));
+}
+
+#define hash_v3_mix32(a,b,c) \
+do { \
+ (a) -= (c); (a) ^= hash32_rotate_left ((c), 4); (c) += (b); \
+ (b) -= (a); (b) ^= hash32_rotate_left ((a), 6); (a) += (c); \
+ (c) -= (b); (c) ^= hash32_rotate_left ((b), 8); (b) += (a); \
+ (a) -= (c); (a) ^= hash32_rotate_left ((c),16); (c) += (b); \
+ (b) -= (a); (b) ^= hash32_rotate_left ((a),19); (a) += (c); \
+ (c) -= (b); (c) ^= hash32_rotate_left ((b), 4); (b) += (a); \
+} while (0)
+
+#define hash_v3_finalize32(a,b,c) \
+do { \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 14); \
+ (a) ^= (c); (a) -= hash32_rotate_left ((c), 11); \
+ (b) ^= (a); (b) -= hash32_rotate_left ((a), 25); \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 16); \
+ (a) ^= (c); (a) -= hash32_rotate_left ((c), 4); \
+ (b) ^= (a); (b) -= hash32_rotate_left ((a), 14); \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 24); \
+} while (0)
+
+/* 32 bit mixing/finalize in steps. */
+
+#define hash_v3_mix32_step1(a,b,c) \
+do { \
+ (a) -= (c); (a) ^= hash32_rotate_left ((c), 4); (c) += (b); \
+ (b) -= (a); (b) ^= hash32_rotate_left ((a), 6); (a) += (c); \
+} while (0)
+
+#define hash_v3_mix32_step2(a,b,c) \
+do { \
+ (c) -= (b); (c) ^= hash32_rotate_left ((b), 8); (b) += (a); \
+ (a) -= (c); (a) ^= hash32_rotate_left ((c),16); (c) += (b); \
+} while (0)
+
+#define hash_v3_mix32_step3(a,b,c) \
+do { \
+ (b) -= (a); (b) ^= hash32_rotate_left ((a),19); (a) += (c); \
+ (c) -= (b); (c) ^= hash32_rotate_left ((b), 4); (b) += (a); \
+} while (0)
+
+#define hash_v3_finalize32_step1(a,b,c) \
+do { \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 14); \
+ (a) ^= (c); (a) -= hash32_rotate_left ((c), 11); \
+} while (0)
+
+#define hash_v3_finalize32_step2(a,b,c) \
+do { \
+ (b) ^= (a); (b) -= hash32_rotate_left ((a), 25); \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 16); \
+} while (0)
+
+#define hash_v3_finalize32_step3(a,b,c) \
+do { \
+ (a) ^= (c); (a) -= hash32_rotate_left ((c), 4); \
+ (b) ^= (a); (b) -= hash32_rotate_left ((a), 14); \
+ (c) ^= (b); (c) -= hash32_rotate_left ((b), 24); \
+} while (0)
+
+/* Vector v3 mixing/finalize. */
+#define hash_v3_mix_step_1_u32x(a,b,c) \
+do { \
+ (a) -= (c); (a) ^= u32x_irotate_left ((c), 4); (c) += (b); \
+ (b) -= (a); (b) ^= u32x_irotate_left ((a), 6); (a) += (c); \
+ (c) -= (b); (c) ^= u32x_irotate_left ((b), 8); (b) += (a); \
+} while (0)
+
+#define hash_v3_mix_step_2_u32x(a,b,c) \
+do { \
+ (a) -= (c); (a) ^= u32x_irotate_left ((c),16); (c) += (b); \
+ (b) -= (a); (b) ^= u32x_irotate_left ((a),19); (a) += (c); \
+ (c) -= (b); (c) ^= u32x_irotate_left ((b), 4); (b) += (a); \
+} while (0)
+
+#define hash_v3_finalize_step_1_u32x(a,b,c) \
+do { \
+ (c) ^= (b); (c) -= u32x_irotate_left ((b), 14); \
+ (a) ^= (c); (a) -= u32x_irotate_left ((c), 11); \
+ (b) ^= (a); (b) -= u32x_irotate_left ((a), 25); \
+} while (0)
+
+#define hash_v3_finalize_step_2_u32x(a,b,c) \
+do { \
+ (c) ^= (b); (c) -= u32x_irotate_left ((b), 16); \
+ (a) ^= (c); (a) -= u32x_irotate_left ((c), 4); \
+ (b) ^= (a); (b) -= u32x_irotate_left ((a), 14); \
+ (c) ^= (b); (c) -= u32x_irotate_left ((b), 24); \
+} while (0)
+
+#define hash_v3_mix_u32x(a,b,c) \
+do { \
+ hash_v3_mix_step_1_u32x(a,b,c); \
+ hash_v3_mix_step_2_u32x(a,b,c); \
+} while (0)
+
+#define hash_v3_finalize_u32x(a,b,c) \
+do { \
+ hash_v3_finalize_step_1_u32x(a,b,c); \
+ hash_v3_finalize_step_2_u32x(a,b,c); \
+} while (0)
+
+extern uword hash_memory (void *p, word n_bytes, uword state);
+
+extern uword mem_key_sum (hash_t * h, uword key);
+extern uword mem_key_equal (hash_t * h, uword key1, uword key2);
+
+#define hash_create_mem(elts,key_bytes,value_bytes) \
+ hash_create2((elts),(key_bytes),(value_bytes),mem_key_sum,mem_key_equal,0,0)
+
+extern uword vec_key_sum (hash_t * h, uword key);
+extern uword vec_key_equal (hash_t * h, uword key1, uword key2);
+extern u8 *vec_key_format_pair (u8 * s, va_list * args);
+
+#define hash_create_vec(elts,key_bytes,value_bytes) \
+ hash_create2((elts),(key_bytes),(value_bytes),\
+ vec_key_sum,vec_key_equal,vec_key_format_pair,0)
+
+extern uword string_key_sum (hash_t * h, uword key);
+extern uword string_key_equal (hash_t * h, uword key1, uword key2);
+extern u8 *string_key_format_pair (u8 * s, va_list * args);
+
+#define hash_create_string(elts,value_bytes) \
+ hash_create2((elts),0,(value_bytes), \
+ (hash_key_sum_function_t *) KEY_FUNC_STRING, \
+ (hash_key_equal_function_t *)KEY_FUNC_STRING, \
+ 0, 0)
+
+#define hash_create(elts,value_bytes) \
+ hash_create2((elts),0,(value_bytes), \
+ (hash_key_sum_function_t *) KEY_FUNC_NONE, \
+ (hash_key_equal_function_t *) KEY_FUNC_NONE, \
+ 0,0)
+
+#define hash_create_uword(elts,value_bytes) \
+ hash_create2((elts),0,(value_bytes), \
+ (hash_key_sum_function_t *) KEY_FUNC_POINTER_UWORD, \
+ (hash_key_equal_function_t *) KEY_FUNC_POINTER_UWORD, \
+ 0,0)
+
+#define hash_create_u32(elts,value_bytes) \
+ hash_create2((elts),0,(value_bytes), \
+ (hash_key_sum_function_t *) KEY_FUNC_POINTER_U32, \
+ (hash_key_equal_function_t *) KEY_FUNC_POINTER_U32, \
+ 0,0)
+
+u8 *format_hash (u8 * s, va_list * va);
+
+/* Looks up input in hash table indexed by either vec string or
+ c string (null terminated). */
+unformat_function_t unformat_hash_vec_string;
+unformat_function_t unformat_hash_string;
+
+/* Main test routine. */
+int test_hash_main (unformat_input_t * input);
+
+#endif /* included_hash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c
new file mode 100644
index 00000000..2a5fb5c8
--- /dev/null
+++ b/src/vppinfra/heap.c
@@ -0,0 +1,828 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/cache.h> /* for CLIB_CACHE_LINE_BYTES */
+#include <vppinfra/mem.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/error.h>
+
+always_inline heap_elt_t *
+elt_at (heap_header_t * h, uword i)
+{
+ ASSERT (i < vec_len (h->elts));
+ return h->elts + i;
+}
+
+always_inline heap_elt_t *
+last (heap_header_t * h)
+{
+ return elt_at (h, h->tail);
+}
+
+always_inline heap_elt_t *
+first (heap_header_t * h)
+{
+ return elt_at (h, h->head);
+}
+
+/* Objects sizes are binned into N_BINS bins.
+ Objects with size <= SMALL_BINS have their own bins.
+ Larger objects are grouped together in power or 2 sized
+ bins.
+
+ Sizes are in units of elt_bytes bytes. */
+
+/* Convert size to bin. */
+always_inline uword
+size_to_bin (uword size)
+{
+ uword bin;
+
+ ASSERT (size > 0);
+
+ if (size <= HEAP_SMALL_BINS)
+ {
+ bin = size - 1;
+ if (size == 0)
+ bin = 0;
+ }
+ else
+ {
+ bin = HEAP_SMALL_BINS + max_log2 (size) - (HEAP_LOG2_SMALL_BINS + 1);
+ if (bin >= HEAP_N_BINS)
+ bin = HEAP_N_BINS - 1;
+ }
+
+ return bin;
+}
+
+/* Convert bin to size. */
+always_inline __attribute__ ((unused))
+ uword bin_to_size (uword bin)
+{
+ uword size;
+
+ if (bin <= HEAP_SMALL_BINS - 1)
+ size = bin + 1;
+ else
+ size = (uword) 1 << ((bin - HEAP_SMALL_BINS) + HEAP_LOG2_SMALL_BINS + 1);
+
+ return size;
+}
+
+static void
+elt_delete (heap_header_t * h, heap_elt_t * e)
+{
+ heap_elt_t *l = vec_end (h->elts) - 1;
+
+ ASSERT (e >= h->elts && e <= l);
+
+ /* Update doubly linked pointers. */
+ {
+ heap_elt_t *p = heap_prev (e);
+ heap_elt_t *n = heap_next (e);
+
+ if (p == e)
+ {
+ n->prev = 0;
+ h->head = n - h->elts;
+ }
+ else if (n == e)
+ {
+ p->next = 0;
+ h->tail = p - h->elts;
+ }
+ else
+ {
+ p->next = n - p;
+ n->prev = p - n;
+ }
+ }
+
+ /* Add to index free list or delete from end. */
+ if (e < l)
+ vec_add1 (h->free_elts, e - h->elts);
+ else
+ _vec_len (h->elts)--;
+}
+
+/*
+ Before: P ... E
+ After : P ... NEW ... E
+*/
+always_inline void
+elt_insert_before (heap_header_t * h, heap_elt_t * e, heap_elt_t * new)
+{
+ heap_elt_t *p = heap_prev (e);
+
+ if (p == e)
+ {
+ new->prev = 0;
+ new->next = e - new;
+ p->prev = new - p;
+ h->head = new - h->elts;
+ }
+ else
+ {
+ new->prev = p - new;
+ new->next = e - new;
+ e->prev = new - e;
+ p->next = new - p;
+ }
+}
+
+/*
+ Before: E ... N
+ After : E ... NEW ... N
+*/
+always_inline void
+elt_insert_after (heap_header_t * h, heap_elt_t * e, heap_elt_t * new)
+{
+ heap_elt_t *n = heap_next (e);
+
+ if (n == e)
+ {
+ new->next = 0;
+ new->prev = e - new;
+ e->next = new - e;
+ h->tail = new - h->elts;
+ }
+ else
+ {
+ new->prev = e - new;
+ new->next = n - new;
+ e->next = new - e;
+ n->prev = new - n;
+ }
+}
+
+always_inline heap_elt_t *
+elt_new (heap_header_t * h)
+{
+ heap_elt_t *e;
+ uword l;
+ if ((l = vec_len (h->free_elts)) > 0)
+ {
+ e = elt_at (h, h->free_elts[l - 1]);
+ _vec_len (h->free_elts) -= 1;
+ }
+ else
+ vec_add2 (h->elts, e, 1);
+ return e;
+}
+
+/* Return pointer to object at given offset.
+ Used to write free list index of free objects. */
+always_inline u32 *
+elt_data (void *v, heap_elt_t * e)
+{
+ heap_header_t *h = heap_header (v);
+ return v + heap_offset (e) * h->elt_bytes;
+}
+
+always_inline void
+set_free_elt (void *v, heap_elt_t * e, uword fi)
+{
+ heap_header_t *h = heap_header (v);
+
+ e->offset |= HEAP_ELT_FREE_BIT;
+ if (h->elt_bytes >= sizeof (u32))
+ {
+ *elt_data (v, e) = fi;
+ }
+ else
+ {
+ /* For elt_bytes < 4 we must store free index in separate
+ vector. */
+ uword elt_index = e - h->elts;
+ vec_validate (h->small_free_elt_free_index, elt_index);
+ h->small_free_elt_free_index[elt_index] = fi;
+ }
+}
+
+always_inline uword
+get_free_elt (void *v, heap_elt_t * e, uword * bin_result)
+{
+ heap_header_t *h = heap_header (v);
+ uword fb, fi;
+
+ ASSERT (heap_is_free (e));
+ fb = size_to_bin (heap_elt_size (v, e));
+
+ if (h->elt_bytes >= sizeof (u32))
+ {
+ fi = *elt_data (v, e);
+ }
+ else
+ {
+ uword elt_index = e - h->elts;
+ fi = vec_elt (h->small_free_elt_free_index, elt_index);
+ }
+
+ *bin_result = fb;
+ return fi;
+}
+
+always_inline void
+remove_free_block (void *v, uword b, uword i)
+{
+ heap_header_t *h = heap_header (v);
+ uword l;
+
+ ASSERT (b < vec_len (h->free_lists));
+ ASSERT (i < vec_len (h->free_lists[b]));
+
+ l = vec_len (h->free_lists[b]);
+
+ if (i < l - 1)
+ {
+ uword t = h->free_lists[b][l - 1];
+ h->free_lists[b][i] = t;
+ set_free_elt (v, elt_at (h, t), i);
+ }
+ _vec_len (h->free_lists[b]) = l - 1;
+}
+
+static heap_elt_t *
+search_free_list (void *v, uword size)
+{
+ heap_header_t *h = heap_header (v);
+ heap_elt_t *f, *u;
+ uword b, fb, f_size, f_index;
+ word s, l;
+
+ if (!v)
+ return 0;
+
+ /* Search free lists for bins >= given size. */
+ for (b = size_to_bin (size); b < vec_len (h->free_lists); b++)
+ if ((l = vec_len (h->free_lists[b])) > 0)
+ {
+ /* Find an object that is large enough.
+ Search list in reverse so that more recently freed objects will be
+ allocated again sooner. */
+ do
+ {
+ l--;
+ f_index = h->free_lists[b][l];
+ f = elt_at (h, f_index);
+ f_size = heap_elt_size (v, f);
+ if ((s = f_size - size) >= 0)
+ break;
+ }
+ while (l >= 0);
+
+ /* If we fail to find a large enough object, try the next larger size. */
+ if (l < 0)
+ continue;
+
+ ASSERT (heap_is_free (f));
+
+ /* Link in used object (u) after free object (f). */
+ if (s == 0)
+ {
+ u = f;
+ fb = HEAP_N_BINS;
+ }
+ else
+ {
+ u = elt_new (h);
+ f = elt_at (h, f_index);
+ elt_insert_after (h, f, u);
+ fb = size_to_bin (s);
+ }
+
+ u->offset = heap_offset (f) + s;
+
+ if (fb != b)
+ {
+ if (fb < HEAP_N_BINS)
+ {
+ uword i;
+ vec_validate (h->free_lists, fb);
+ i = vec_len (h->free_lists[fb]);
+ vec_add1 (h->free_lists[fb], f - h->elts);
+ set_free_elt (v, f, i);
+ }
+
+ remove_free_block (v, b, l);
+ }
+
+ return u;
+ }
+
+ return 0;
+}
+
+static void combine_free_blocks (void *v, heap_elt_t * e0, heap_elt_t * e1);
+
+static inline void
+dealloc_elt (void *v, heap_elt_t * e)
+{
+ heap_header_t *h = heap_header (v);
+ uword b, l;
+ heap_elt_t *n, *p;
+
+ b = size_to_bin (heap_elt_size (v, e));
+ vec_validate (h->free_lists, b);
+ l = vec_len (h->free_lists[b]);
+ vec_add1 (h->free_lists[b], e - h->elts);
+ set_free_elt (v, e, l);
+
+ /* See if we can combine the block we just freed with neighboring free blocks. */
+ p = heap_prev (e);
+ if (!heap_is_free (p))
+ p = e;
+
+ n = heap_next (e);
+ if (!heap_is_free (n))
+ n = e;
+
+ if (p != n)
+ combine_free_blocks (v, p, n);
+}
+
+void *
+_heap_alloc (void *v,
+ uword size,
+ uword align,
+ uword elt_bytes, uword * offset_return, uword * handle_return)
+{
+ uword offset = 0, align_size;
+ heap_header_t *h;
+ heap_elt_t *e;
+
+ if (size == 0)
+ goto error;
+
+ /* Round up alignment to power of 2. */
+ if (align <= 1)
+ {
+ align = 0;
+ align_size = size;
+ }
+ else
+ {
+ align = max_pow2 (align);
+ align_size = size + align - 1;
+ }
+
+ e = search_free_list (v, align_size);
+
+ /* If nothing found on free list, allocate object from end of vector. */
+ if (!e)
+ {
+ uword max_len;
+
+ offset = vec_len (v);
+ max_len = heap_get_max_len (v);
+
+ if (max_len && offset + align_size > max_len)
+ goto error;
+
+ h = heap_header (v);
+ if (!v || !(h->flags & HEAP_IS_STATIC))
+ v = _vec_resize (v,
+ align_size,
+ (offset + align_size) * elt_bytes,
+ sizeof (h[0]), HEAP_DATA_ALIGN);
+ else
+ _vec_len (v) += align_size;
+
+ if (offset == 0)
+ {
+ h = heap_header (v);
+ h->elt_bytes = elt_bytes;
+ }
+ }
+
+ h = heap_header (v);
+
+ /* Add new element to doubly linked chain of elements. */
+ if (!e)
+ {
+ e = elt_new (h);
+ e->offset = offset;
+ elt_insert_after (h, last (h), e);
+ }
+
+ if (align > 0)
+ {
+ uword e_index;
+ uword new_offset, old_offset;
+
+ old_offset = e->offset;
+ new_offset = (old_offset + align - 1) & ~(align - 1);
+ e->offset = new_offset;
+ e_index = e - h->elts;
+
+ /* Free fragments before and after aligned object. */
+ if (new_offset > old_offset)
+ {
+ heap_elt_t *before_e = elt_new (h);
+ before_e->offset = old_offset;
+ elt_insert_before (h, h->elts + e_index, before_e);
+ dealloc_elt (v, before_e);
+ }
+
+ if (new_offset + size < old_offset + align_size)
+ {
+ heap_elt_t *after_e = elt_new (h);
+ after_e->offset = new_offset + size;
+ elt_insert_after (h, h->elts + e_index, after_e);
+ dealloc_elt (v, after_e);
+ }
+
+ e = h->elts + e_index;
+ }
+
+ h->used_count++;
+
+ /* Keep track of used elements when debugging.
+ This allows deallocation to check that passed objects are valid. */
+ if (CLIB_DEBUG > 0)
+ {
+ uword handle = e - h->elts;
+ ASSERT (!clib_bitmap_get (h->used_elt_bitmap, handle));
+ h->used_elt_bitmap = clib_bitmap_ori (h->used_elt_bitmap, handle);
+ }
+
+ *offset_return = e->offset;
+ *handle_return = e - h->elts;
+ return v;
+
+error:
+ *offset_return = *handle_return = ~0;
+ return v;
+}
+
+void
+heap_dealloc (void *v, uword handle)
+{
+ heap_header_t *h = heap_header (v);
+ heap_elt_t *e;
+
+ ASSERT (handle < vec_len (h->elts));
+
+ /* For debugging we keep track of indices for valid objects.
+ We make sure user is not trying to free object with an invalid index. */
+ if (CLIB_DEBUG > 0)
+ {
+ ASSERT (clib_bitmap_get (h->used_elt_bitmap, handle));
+ h->used_elt_bitmap = clib_bitmap_andnoti (h->used_elt_bitmap, handle);
+ }
+
+ h->used_count--;
+
+ e = h->elts + handle;
+ ASSERT (!heap_is_free (e));
+
+ dealloc_elt (v, e);
+}
+
+/* While freeing objects at INDEX we noticed free blocks i0 <= index and
+ i1 >= index. We combine these two or three blocks into one big free block. */
+static void
+combine_free_blocks (void *v, heap_elt_t * e0, heap_elt_t * e1)
+{
+ heap_header_t *h = heap_header (v);
+ uword total_size, i, b, tb, ti, i_last, g_offset;
+ heap_elt_t *e;
+
+ struct
+ {
+ u32 index;
+ u32 bin;
+ u32 bin_index;
+ } f[3], g;
+
+ /* Compute total size of free objects i0 through i1. */
+ total_size = 0;
+ for (i = 0, e = e0; 1; e = heap_next (e), i++)
+ {
+ ASSERT (i < ARRAY_LEN (f));
+
+ ti = get_free_elt (v, e, &tb);
+
+ ASSERT (tb < vec_len (h->free_lists));
+ ASSERT (ti < vec_len (h->free_lists[tb]));
+
+ f[i].index = h->free_lists[tb][ti];
+ f[i].bin = tb;
+ f[i].bin_index = ti;
+
+ total_size += heap_elt_size (v, elt_at (h, f[i].index));
+
+ if (e == e1)
+ {
+ i_last = i;
+ break;
+ }
+ }
+
+ /* Compute combined bin. See if all objects can be
+ combined into existing bin. */
+ b = size_to_bin (total_size);
+ g.index = g.bin_index = 0;
+ for (i = 0; i <= i_last; i++)
+ if (b == f[i].bin)
+ {
+ g = f[i];
+ break;
+ }
+
+ /* Make sure we found a bin. */
+ if (i > i_last)
+ {
+ g.index = elt_new (h) - h->elts;
+ vec_validate (h->free_lists, b);
+ g.bin_index = vec_len (h->free_lists[b]);
+ vec_add1 (h->free_lists[b], g.index);
+ elt_insert_before (h, elt_at (h, f[0].index), elt_at (h, g.index));
+ }
+
+ g_offset = elt_at (h, f[0].index)->offset;
+
+ /* Delete unused bins. */
+ for (i = 0; i <= i_last; i++)
+ if (g.index != f[i].index)
+ {
+ ti = get_free_elt (v, elt_at (h, f[i].index), &tb);
+ remove_free_block (v, tb, ti);
+ elt_delete (h, elt_at (h, f[i].index));
+ }
+
+ /* Initialize new element. */
+ elt_at (h, g.index)->offset = g_offset;
+ set_free_elt (v, elt_at (h, g.index), g.bin_index);
+}
+
+uword
+heap_len (void *v, word handle)
+{
+ heap_header_t *h = heap_header (v);
+
+ if (CLIB_DEBUG > 0)
+ ASSERT (clib_bitmap_get (h->used_elt_bitmap, handle));
+ return heap_elt_size (v, elt_at (h, handle));
+}
+
+void *
+_heap_free (void *v)
+{
+ heap_header_t *h = heap_header (v);
+ uword b;
+
+ if (!v)
+ return v;
+
+ clib_bitmap_free (h->used_elt_bitmap);
+ for (b = 0; b < vec_len (h->free_lists); b++)
+ vec_free (h->free_lists[b]);
+ vec_free (h->free_lists);
+ vec_free (h->elts);
+ vec_free (h->free_elts);
+ vec_free (h->small_free_elt_free_index);
+ if (!(h->flags & HEAP_IS_STATIC))
+ vec_free_h (v, sizeof (h[0]));
+ return v;
+}
+
+uword
+heap_bytes (void *v)
+{
+ heap_header_t *h = heap_header (v);
+ uword bytes, b;
+
+ if (!v)
+ return 0;
+
+ bytes = sizeof (h[0]);
+ bytes += vec_len (v) * sizeof (h->elt_bytes);
+ for (b = 0; b < vec_len (h->free_lists); b++)
+ bytes += vec_capacity (h->free_lists[b], 0);
+ bytes += vec_bytes (h->free_lists);
+ bytes += vec_capacity (h->elts, 0);
+ bytes += vec_capacity (h->free_elts, 0);
+ bytes += vec_bytes (h->used_elt_bitmap);
+
+ return bytes;
+}
+
+static u8 *
+debug_elt (u8 * s, void *v, word i, word n)
+{
+ heap_elt_t *e, *e0, *e1;
+ heap_header_t *h = heap_header (v);
+ word j;
+
+ if (vec_len (h->elts) == 0)
+ return s;
+
+ if (i < 0)
+ e0 = first (h);
+ else
+ {
+ e0 = h->elts + i;
+ for (j = 0; j < n / 2; j++)
+ e0 = heap_prev (e0);
+ }
+
+ if (n < 0)
+ e1 = h->elts + h->tail;
+ else
+ {
+ e1 = h->elts + i;
+ for (j = 0; j < n / 2; j++)
+ e1 = heap_next (e1);
+ }
+
+ i = -n / 2;
+ for (e = e0; 1; e = heap_next (e))
+ {
+ if (heap_is_free (e))
+ s = format (s, "index %4d, free\n", e - h->elts);
+ else if (h->format_elt)
+ s = format (s, "%U", h->format_elt, v, elt_data (v, e));
+ else
+ s = format (s, "index %4d, used\n", e - h->elts);
+ i++;
+ if (e == e1)
+ break;
+ }
+
+ return s;
+}
+
+u8 *
+format_heap (u8 * s, va_list * va)
+{
+ void *v = va_arg (*va, void *);
+ uword verbose = va_arg (*va, uword);
+ heap_header_t *h = heap_header (v);
+ heap_header_t zero;
+
+ memset (&zero, 0, sizeof (zero));
+
+ if (!v)
+ h = &zero;
+
+ {
+ f64 elt_bytes = vec_len (v) * h->elt_bytes;
+ f64 overhead_bytes = heap_bytes (v);
+
+ s = format (s, "heap %p, %6d objects, size %.1fk + overhead %.1fk\n",
+ v, h->used_count, elt_bytes / 1024,
+ (overhead_bytes - elt_bytes) / 1024);
+ }
+
+ if (v && verbose)
+ s = debug_elt (s, v, -1, -1);
+
+ return s;
+}
+
+void
+heap_validate (void *v)
+{
+ heap_header_t *h = heap_header (v);
+ uword i, o, s;
+ u8 *free_map;
+ heap_elt_t *e, *n;
+
+ uword used_count, total_size;
+ uword free_count, free_size;
+
+ ASSERT (h->used_count == clib_bitmap_count_set_bits (h->used_elt_bitmap));
+
+ ASSERT (first (h)->prev == 0);
+ ASSERT (last (h)->next == 0);
+
+ /* Validate number of elements and size. */
+ free_size = free_count = 0;
+ for (i = 0; i < vec_len (h->free_lists); i++)
+ {
+ free_count += vec_len (h->free_lists[i]);
+ for (o = 0; o < vec_len (h->free_lists[i]); o++)
+ {
+ e = h->elts + h->free_lists[i][o];
+ s = heap_elt_size (v, e);
+ ASSERT (size_to_bin (s) == i);
+ ASSERT (heap_is_free (e));
+ free_size += s;
+ }
+ }
+
+ {
+ uword elt_free_size, elt_free_count;
+
+ used_count = total_size = elt_free_size = elt_free_count = 0;
+ for (e = first (h); 1; e = n)
+ {
+ int is_free = heap_is_free (e);
+ used_count++;
+ s = heap_elt_size (v, e);
+ total_size += s;
+ ASSERT (is_free ==
+ !clib_bitmap_get (h->used_elt_bitmap, e - h->elts));
+ if (is_free)
+ {
+ elt_free_count++;
+ elt_free_size += s;
+ }
+ n = heap_next (e);
+ if (e == n)
+ {
+ ASSERT (last (h) == n);
+ break;
+ }
+
+ /* We should never have two free adjacent elements. */
+ ASSERT (!(heap_is_free (e) && heap_is_free (n)));
+ }
+
+ ASSERT (free_count == elt_free_count);
+ ASSERT (free_size == elt_free_size);
+ ASSERT (used_count == h->used_count + free_count);
+ ASSERT (total_size == vec_len (v));
+ }
+
+ free_map = vec_new (u8, used_count);
+
+ e = first (h);
+ for (i = o = 0; 1; i++)
+ {
+ ASSERT (heap_offset (e) == o);
+ s = heap_elt_size (v, e);
+
+ if (heap_is_free (e))
+ {
+ uword fb, fi;
+
+ fi = get_free_elt (v, e, &fb);
+
+ ASSERT (fb < vec_len (h->free_lists));
+ ASSERT (fi < vec_len (h->free_lists[fb]));
+ ASSERT (h->free_lists[fb][fi] == e - h->elts);
+
+ ASSERT (!free_map[i]);
+ free_map[i] = 1;
+ }
+
+ n = heap_next (e);
+
+ if (e == n)
+ break;
+
+ ASSERT (heap_prev (n) == e);
+
+ o += s;
+ e = n;
+ }
+
+ vec_free (free_map);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/heap.h b/src/vppinfra/heap.h
new file mode 100644
index 00000000..8c1aae46
--- /dev/null
+++ b/src/vppinfra/heap.h
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* Heaps of objects of type T (e.g. int, struct foo, ...).
+
+ Usage. To declare a null heap:
+
+ T * heap = 0;
+
+ To allocate:
+
+ offset = heap_alloc (heap, size, handle);
+
+ New object is heap[offset] ... heap[offset + size]
+ Handle is used to free/query object.
+
+ To free object:
+
+ heap_dealloc (heap, handle);
+
+ To query the size of an object:
+
+ heap_size (heap, handle)
+
+*/
+
+#ifndef included_heap_h
+#define included_heap_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+/* Doubly linked list of elements. */
+typedef struct
+{
+ /* Offset of this element (plus free bit).
+ If element is free, data at offset contains pointer to free list. */
+ u32 offset;
+
+ /* Index of next and previous elements relative to current element. */
+ i32 next, prev;
+} heap_elt_t;
+
+/* Use high bit of offset as free bit. */
+#define HEAP_ELT_FREE_BIT (1 << 31)
+
+always_inline uword
+heap_is_free (heap_elt_t * e)
+{
+ return (e->offset & HEAP_ELT_FREE_BIT) != 0;
+}
+
+always_inline uword
+heap_offset (heap_elt_t * e)
+{
+ return e->offset & ~HEAP_ELT_FREE_BIT;
+}
+
+always_inline heap_elt_t *
+heap_next (heap_elt_t * e)
+{
+ return e + e->next;
+}
+
+always_inline heap_elt_t *
+heap_prev (heap_elt_t * e)
+{
+ return e + e->prev;
+}
+
+always_inline uword
+heap_elt_size (void *v, heap_elt_t * e)
+{
+ heap_elt_t *n = heap_next (e);
+ uword next_offset = n != e ? heap_offset (n) : vec_len (v);
+ return next_offset - heap_offset (e);
+}
+
+/* Sizes are binned. Sizes 1 to 2^log2_small_bins have their
+ own free lists. Larger sizes are grouped in powers of two. */
+#define HEAP_LOG2_SMALL_BINS (5)
+#define HEAP_SMALL_BINS (1 << HEAP_LOG2_SMALL_BINS)
+#define HEAP_N_BINS (2 * HEAP_SMALL_BINS)
+
+/* Header for heaps. */
+typedef struct
+{
+ /* Vector of used and free elements. */
+ heap_elt_t *elts;
+
+ /* For elt_bytes < sizeof (u32) we need some extra space
+ per elt to store free list index. */
+ u32 *small_free_elt_free_index;
+
+ /* Vector of free indices of elts array. */
+ u32 *free_elts;
+
+ /* Indices of free elts indexed by size bin. */
+ u32 **free_lists;
+
+ format_function_t *format_elt;
+
+ /* Used for validattion/debugging. */
+ uword *used_elt_bitmap;
+
+ /* First and last element of doubly linked chain of elements. */
+ u32 head, tail;
+
+ u32 used_count, max_len;
+
+ /* Number of bytes in a help element. */
+ u32 elt_bytes;
+
+ u32 flags;
+ /* Static heaps are made from external memory given to
+ us by user and are not re-sizeable vectors. */
+#define HEAP_IS_STATIC (1)
+} heap_header_t;
+
+/* Start of heap elements is always cache aligned. */
+#define HEAP_DATA_ALIGN (CLIB_CACHE_LINE_BYTES)
+
+always_inline heap_header_t *
+heap_header (void *v)
+{
+ return vec_header (v, sizeof (heap_header_t));
+}
+
+always_inline uword
+heap_header_bytes ()
+{
+ return vec_header_bytes (sizeof (heap_header_t));
+}
+
+always_inline void
+heap_dup_header (heap_header_t * old, heap_header_t * new)
+{
+ uword i;
+
+ new[0] = old[0];
+ new->elts = vec_dup (new->elts);
+ new->free_elts = vec_dup (new->free_elts);
+ new->free_lists = vec_dup (new->free_lists);
+ for (i = 0; i < vec_len (new->free_lists); i++)
+ new->free_lists[i] = vec_dup (new->free_lists[i]);
+ new->used_elt_bitmap = clib_bitmap_dup (new->used_elt_bitmap);
+ new->small_free_elt_free_index = vec_dup (new->small_free_elt_free_index);
+}
+
+/* Make a duplicate copy of a heap. */
+#define heap_dup(v) _heap_dup(v, vec_len (v) * sizeof (v[0]))
+
+always_inline void *
+_heap_dup (void *v_old, uword v_bytes)
+{
+ heap_header_t *h_old, *h_new;
+ void *v_new;
+
+ h_old = heap_header (v_old);
+
+ if (!v_old)
+ return v_old;
+
+ v_new = 0;
+ v_new =
+ _vec_resize (v_new, _vec_len (v_old), v_bytes, sizeof (heap_header_t),
+ HEAP_DATA_ALIGN);
+ h_new = heap_header (v_new);
+ heap_dup_header (h_old, h_new);
+ clib_memcpy (v_new, v_old, v_bytes);
+ return v_new;
+}
+
+always_inline uword
+heap_elts (void *v)
+{
+ heap_header_t *h = heap_header (v);
+ return h->used_count;
+}
+
+uword heap_bytes (void *v);
+
+always_inline void *
+_heap_new (u32 len, u32 n_elt_bytes)
+{
+ void *v = _vec_resize (0, len, (uword) len * n_elt_bytes,
+ sizeof (heap_header_t),
+ HEAP_DATA_ALIGN);
+ heap_header (v)->elt_bytes = n_elt_bytes;
+ return v;
+}
+
+#define heap_new(v) (v) = _heap_new (0, sizeof ((v)[0]))
+
+always_inline void
+heap_set_format (void *v, format_function_t * format_elt)
+{
+ ASSERT (v);
+ heap_header (v)->format_elt = format_elt;
+}
+
+always_inline void
+heap_set_max_len (void *v, uword max_len)
+{
+ ASSERT (v);
+ heap_header (v)->max_len = max_len;
+}
+
+always_inline uword
+heap_get_max_len (void *v)
+{
+ return v ? heap_header (v)->max_len : 0;
+}
+
+/* Create fixed size heap with given block of memory. */
+always_inline void *
+heap_create_from_memory (void *memory, uword max_len, uword elt_bytes)
+{
+ heap_header_t *h;
+ void *v;
+
+ if (max_len * elt_bytes < sizeof (h[0]))
+ return 0;
+
+ h = memory;
+ memset (h, 0, sizeof (h[0]));
+ h->max_len = max_len;
+ h->elt_bytes = elt_bytes;
+ h->flags = HEAP_IS_STATIC;
+
+ v = (void *) (memory + heap_header_bytes ());
+ _vec_len (v) = 0;
+ return v;
+}
+
+/* Execute BODY for each allocated heap element. */
+#define heap_foreach(var,len,heap,body) \
+do { \
+ if (vec_len (heap) > 0) \
+ { \
+ heap_header_t * _h = heap_header (heap); \
+ heap_elt_t * _e = _h->elts + _h->head; \
+ heap_elt_t * _end = _h->elts + _h->tail; \
+ while (1) \
+ { \
+ if (! heap_is_free (_e)) \
+ { \
+ (var) = (heap) + heap_offset (_e); \
+ (len) = heap_elt_size ((heap), _e); \
+ do { body; } while (0); \
+ } \
+ if (_e == _end) \
+ break; \
+ _e = heap_next (_e); \
+ } \
+ } \
+} while (0)
+
+#define heap_elt_at_index(v,index) vec_elt_at_index(v,index)
+
+always_inline heap_elt_t *
+heap_get_elt (void *v, uword handle)
+{
+ heap_header_t *h = heap_header (v);
+ heap_elt_t *e = vec_elt_at_index (h->elts, handle);
+ ASSERT (!heap_is_free (e));
+ return e;
+}
+
+#define heap_elt_with_handle(v,handle) \
+({ \
+ heap_elt_t * _e = heap_get_elt ((v), (handle)); \
+ (v) + heap_offset (_e); \
+})
+
+always_inline uword
+heap_is_free_handle (void *v, uword heap_handle)
+{
+ heap_header_t *h = heap_header (v);
+ heap_elt_t *e = vec_elt_at_index (h->elts, heap_handle);
+ return heap_is_free (e);
+}
+
+extern uword heap_len (void *v, word handle);
+
+/* Low level allocation call. */
+extern void *_heap_alloc (void *v, uword size, uword alignment,
+ uword elt_bytes, uword * offset, uword * handle);
+
+#define heap_alloc_aligned(v,size,align,handle) \
+({ \
+ uword _o, _h; \
+ uword _a = (align); \
+ uword _s = (size); \
+ (v) = _heap_alloc ((v), _s, _a, sizeof ((v)[0]), &_o, &_h); \
+ (handle) = _h; \
+ _o; \
+})
+
+#define heap_alloc(v,size,handle) heap_alloc_aligned((v),(size),0,(handle))
+
+extern void heap_dealloc (void *v, uword handle);
+extern void heap_validate (void *v);
+
+/* Format heap internal data structures as string. */
+extern u8 *format_heap (u8 * s, va_list * va);
+
+void *_heap_free (void *v);
+
+#define heap_free(v) (v)=_heap_free(v)
+
+#endif /* included_heap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c
new file mode 100644
index 00000000..2d8f593d
--- /dev/null
+++ b/src/vppinfra/linux/mem.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <linux/mempolicy.h>
+#include <linux/memfd.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/clib_error.h>
+#include <vppinfra/linux/syscall.h>
+#include <vppinfra/linux/sysfs.h>
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+int
+clib_mem_vm_get_log2_page_size (int fd)
+{
+ struct stat st = { 0 };
+ if (fstat (fd, &st) == -1)
+ return 0;
+ return min_log2 (st.st_blksize);
+}
+
+clib_error_t *
+clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a)
+{
+ int fd = -1;
+ clib_error_t *err = 0;
+ void *addr = 0;
+ u8 *filename = 0;
+ int mmap_flags = MAP_SHARED;
+ int log2_page_size;
+ int n_pages;
+ int old_mpol = -1;
+ u64 old_mask[16] = { 0 };
+
+ /* save old numa mem policy if needed */
+ if (a->flags & (CLIB_MEM_VM_F_NUMA_PREFER | CLIB_MEM_VM_F_NUMA_FORCE))
+ {
+ int rv;
+ rv =
+ get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0);
+
+ if (rv == -1)
+ {
+ if ((a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
+ {
+ err = clib_error_return_unix (0, "get_mempolicy");
+ goto error;
+ }
+ else
+ old_mpol = -1;
+ }
+ }
+
+ /* if we are creating shared segment, we need file descriptor */
+ if (a->flags & CLIB_MEM_VM_F_SHARED)
+ {
+ /* if hugepages are needed we need to create mount point */
+ if (a->flags & CLIB_MEM_VM_F_HUGETLB)
+ {
+ char *mount_dir;
+ char template[] = "/tmp/hugepage_mount.XXXXXX";
+
+ mount_dir = mkdtemp (template);
+ if (mount_dir == 0)
+ return clib_error_return_unix (0, "mkdtemp \'%s\'", template);
+
+ if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL))
+ {
+ err = clib_error_return_unix (0, "mount hugetlb directory '%s'",
+ mount_dir);
+ goto error;
+ }
+
+ filename = format (0, "%s/%s%c", mount_dir, a->name, 0);
+
+ if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
+ {
+ err = clib_error_return_unix (0, "open");
+ goto error;
+ }
+ umount2 ((char *) mount_dir, MNT_DETACH);
+ rmdir ((char *) mount_dir);
+ mmap_flags |= MAP_LOCKED;
+ }
+ else
+ {
+ if ((fd = memfd_create (a->name, MFD_ALLOW_SEALING)) == -1)
+ {
+ err = clib_error_return_unix (0, "memfd_create");
+ goto error;
+ }
+
+ if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
+ {
+ err = clib_error_return_unix (0, "fcntl (F_ADD_SEALS)");
+ goto error;
+ }
+ }
+ log2_page_size = clib_mem_vm_get_log2_page_size (fd);
+
+ if (log2_page_size == 0)
+ {
+ err = clib_error_return_unix (0, "cannot determine page size");
+ goto error;
+ }
+ }
+ else /* not CLIB_MEM_VM_F_SHARED */
+ {
+ if (a->flags & CLIB_MEM_VM_F_HUGETLB)
+ {
+ mmap_flags |= MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS;
+ log2_page_size = 21;
+ }
+ else
+ {
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+ log2_page_size = min_log2 (sysconf (_SC_PAGESIZE));
+ }
+ }
+
+ n_pages = ((a->size - 1) >> log2_page_size) + 1;
+
+
+ if (a->flags & CLIB_MEM_VM_F_HUGETLB_PREALLOC)
+ {
+ err = clib_sysfs_prealloc_hugepages (a->numa_node,
+ 1 << (log2_page_size - 10),
+ n_pages);
+ if (err)
+ goto error;
+
+ }
+
+ if (fd != -1)
+ if ((ftruncate (fd, a->size)) == -1)
+ {
+ err = clib_error_return_unix (0, "ftruncate");
+ goto error;
+ }
+
+ if (old_mpol != -1)
+ {
+ int rv;
+ u64 mask[16] = { 0 };
+ mask[0] = 1 << a->numa_node;
+ rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
+ if (rv)
+ {
+ err = clib_error_return_unix (0, "set_mempolicy");
+ goto error;
+ }
+ }
+
+ addr = mmap (0, a->size, (PROT_READ | PROT_WRITE), mmap_flags, fd, 0);
+ if (addr == MAP_FAILED)
+ {
+ err = clib_error_return_unix (0, "mmap");
+ goto error;
+ }
+
+ /* re-apply ole numa memory policy */
+ if (old_mpol != -1 &&
+ set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1)
+ {
+ err = clib_error_return_unix (0, "set_mempolicy");
+ goto error;
+ }
+
+ a->log2_page_size = log2_page_size;
+ a->n_pages = n_pages;
+ a->addr = addr;
+ a->fd = fd;
+ goto done;
+
+error:
+ if (fd != -1)
+ close (fd);
+
+done:
+ vec_free (filename);
+ return err;
+}
+
+u64 *
+clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages)
+{
+ int pagesize = sysconf (_SC_PAGESIZE);
+ int fd;
+ int i;
+ u64 *r = 0;
+
+ if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
+ return 0;
+
+ for (i = 0; i < n_pages; i++)
+ {
+ u64 seek, pagemap = 0;
+ uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
+ seek = ((u64) vaddr / pagesize) * sizeof (u64);
+ if (lseek (fd, seek, SEEK_SET) != seek)
+ goto done;
+
+ if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
+ goto done;
+
+ if ((pagemap & (1ULL << 63)) == 0)
+ goto done;
+
+ pagemap &= pow2_mask (55);
+ vec_add1 (r, pagemap * pagesize);
+ }
+
+done:
+ close (fd);
+ if (vec_len (r) != n_pages)
+ {
+ vec_free (r);
+ return 0;
+ }
+ return r;
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h
new file mode 100644
index 00000000..f8ec5919
--- /dev/null
+++ b/src/vppinfra/linux/syscall.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_linux_syscall_h
+#define included_linux_syscall_h
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline long
+set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode)
+{
+ return syscall (__NR_set_mempolicy, mode, nodemask, maxnode);
+}
+
+static inline int
+get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode,
+ void *addr, unsigned long flags)
+{
+ return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
+}
+
+static inline long
+move_pages (int pid, unsigned long count, void **pages, const int *nodes,
+ int *status, int flags)
+{
+ return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags);
+}
+
+static inline int
+memfd_create (const char *name, unsigned int flags)
+{
+ return syscall (__NR_memfd_create, name, flags);
+}
+
+#endif /* included_linux_syscall_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c
new file mode 100644
index 00000000..5f611e6a
--- /dev/null
+++ b/src/vppinfra/linux/sysfs.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/clib_error.h>
+#include <vppinfra/format.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+clib_error_t *
+clib_sysfs_write (char *file_name, char *fmt, ...)
+{
+ u8 *s;
+ int fd;
+ clib_error_t *error = 0;
+
+ fd = open (file_name, O_WRONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (write (fd, s, vec_len (s)) < 0)
+ error = clib_error_return_unix (0, "write `%s'", file_name);
+
+ vec_free (s);
+ close (fd);
+ return error;
+}
+
+clib_error_t *
+clib_sysfs_read (char *file_name, char *fmt, ...)
+{
+ unformat_input_t input;
+ u8 *s = 0;
+ int fd;
+ ssize_t sz;
+ uword result;
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ vec_validate (s, 4095);
+
+ sz = read (fd, s, vec_len (s));
+ if (sz < 0)
+ {
+ close (fd);
+ vec_free (s);
+ return clib_error_return_unix (0, "read `%s'", file_name);
+ }
+
+ _vec_len (s) = sz;
+ unformat_init_vector (&input, s);
+
+ va_list va;
+ va_start (va, fmt);
+ result = va_unformat (&input, fmt, &va);
+ va_end (va);
+
+ vec_free (s);
+ close (fd);
+
+ if (result == 0)
+ return clib_error_return (0, "unformat error");
+
+ return 0;
+}
+
+u8 *
+clib_sysfs_link_to_name (char *link)
+{
+ char *p, buffer[64];
+ unformat_input_t in;
+ u8 *s = 0;
+ int r;
+
+ r = readlink (link, buffer, sizeof (buffer) - 1);
+
+ if (r < 0)
+ return 0;
+
+ buffer[r] = 0;
+ p = strrchr (buffer, '/');
+
+ if (!p)
+ return 0;
+
+ unformat_init_string (&in, p + 1, strlen (p + 1));
+ if (unformat (&in, "%s", &s) != 1)
+ clib_unix_warning ("no string?");
+ unformat_free (&in);
+
+ return s;
+}
+
+clib_error_t *
+clib_sysfs_set_nr_hugepages (int numa_node, int page_size, int nr)
+{
+ clib_error_t *error = 0;
+ struct stat sb;
+ u8 *p = 0;
+
+ p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0);
+
+ if (stat ((char *) p, &sb) == 0)
+ {
+ if (S_ISDIR (sb.st_mode) == 0)
+ {
+ error = clib_error_return (0, "'%s' is not directory", p);
+ goto done;
+ }
+ }
+ else if (numa_node == 0)
+ {
+ vec_reset_length (p);
+ p = format (p, "/sys/kernel/mm%c", 0);
+ if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0)
+ {
+ error = clib_error_return (0, "'%s' does not exist or it is not "
+ "directory", p);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "'%s' does not exist", p);
+ goto done;
+ }
+
+ _vec_len (p) -= 1;
+ p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0);
+ clib_sysfs_write ((char *) p, "%d", nr);
+
+done:
+ vec_free (p);
+ return error;
+}
+
+
+static clib_error_t *
+clib_sysfs_get_xxx_hugepages (char *type, int numa_node,
+ int page_size, int *val)
+{
+ clib_error_t *error = 0;
+ struct stat sb;
+ u8 *p = 0;
+
+ p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0);
+
+ if (stat ((char *) p, &sb) == 0)
+ {
+ if (S_ISDIR (sb.st_mode) == 0)
+ {
+ error = clib_error_return (0, "'%s' is not directory", p);
+ goto done;
+ }
+ }
+ else if (numa_node == 0)
+ {
+ vec_reset_length (p);
+ p = format (p, "/sys/kernel/mm%c", 0);
+ if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0)
+ {
+ error = clib_error_return (0, "'%s' does not exist or it is not "
+ "directory", p);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "'%s' does not exist", p);
+ goto done;
+ }
+
+ _vec_len (p) -= 1;
+ p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size,
+ type, 0);
+ error = clib_sysfs_read ((char *) p, "%d", val);
+
+done:
+ vec_free (p);
+ return error;
+}
+
+clib_error_t *
+clib_sysfs_get_free_hugepages (int numa_node, int page_size, int *v)
+{
+ return clib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v);
+}
+
+clib_error_t *
+clib_sysfs_get_nr_hugepages (int numa_node, int page_size, int *v)
+{
+ return clib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v);
+}
+
+clib_error_t *
+clib_sysfs_get_surplus_hugepages (int numa_node, int page_size, int *v)
+{
+ return clib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v);
+}
+
+clib_error_t *
+clib_sysfs_prealloc_hugepages (int numa_node, int page_size, int nr)
+{
+ clib_error_t *error = 0;
+ int n, needed;
+ error = clib_sysfs_get_free_hugepages (numa_node, page_size, &n);
+ if (error)
+ return error;
+ needed = nr - n;
+ if (needed <= 0)
+ return 0;
+
+ error = clib_sysfs_get_nr_hugepages (numa_node, page_size, &n);
+ if (error)
+ return error;
+ clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u",
+ needed, page_size, numa_node);
+ return clib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed);
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h
new file mode 100644
index 00000000..6c80cf95
--- /dev/null
+++ b/src/vppinfra/linux/sysfs.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_linux_sysfs_h
+#define included_linux_sysfs_h
+
+#include <vppinfra/error.h>
+
+clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...);
+
+clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...);
+
+u8 *clib_sysfs_link_to_name (char *link);
+
+clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node,
+ int page_size, int nr);
+clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node,
+ int page_size, int *v);
+clib_error_t *clib_sysfs_get_free_hugepages (int numa_node,
+ int page_size, int *v);
+clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node,
+ int page_size, int *v);
+clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node,
+ int page_size, int nr);
+
+#endif /* included_linux_sysfs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h
new file mode 100644
index 00000000..7d241675
--- /dev/null
+++ b/src/vppinfra/lock.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_lock_h
+#define included_clib_lock_h
+
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 lock;
+#if CLIB_DEBUG > 0
+ pid_t pid;
+ uword thread_index;
+ void *frame_address;
+#endif
+} *clib_spinlock_t;
+
+static inline void
+clib_spinlock_init (clib_spinlock_t * p)
+{
+ *p = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ memset ((void *) *p, 0, CLIB_CACHE_LINE_BYTES);
+}
+
+static inline void
+clib_spinlock_free (clib_spinlock_t * p)
+{
+ if (*p)
+ {
+ clib_mem_free ((void *) *p);
+ *p = 0;
+ }
+}
+
+static_always_inline void
+clib_spinlock_lock (clib_spinlock_t * p)
+{
+ while (__sync_lock_test_and_set (&(*p)->lock, 1))
+#if __x86_64__
+ __builtin_ia32_pause ()
+#endif
+ ;
+#if CLIB_DEBUG > 0
+ (*p)->frame_address = __builtin_frame_address (0);
+ (*p)->pid = getpid ();
+ (*p)->thread_index = os_get_thread_index ();
+#endif
+}
+
+static_always_inline void
+clib_spinlock_lock_if_init (clib_spinlock_t * p)
+{
+ if (PREDICT_FALSE (*p != 0))
+ clib_spinlock_lock (p);
+}
+
+static_always_inline void
+clib_spinlock_unlock (clib_spinlock_t * p)
+{
+#if CLIB_DEBUG > 0
+ (*p)->frame_address = 0;
+ (*p)->pid = 0;
+ (*p)->thread_index = 0;
+#endif
+ /* Make sure all writes are complete before releasing the lock */
+ CLIB_MEMORY_BARRIER ();
+ (*p)->lock = 0;
+}
+
+static_always_inline void
+clib_spinlock_unlock_if_init (clib_spinlock_t * p)
+{
+ if (PREDICT_FALSE (*p != 0))
+ clib_spinlock_unlock (p);
+}
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/longjmp.S b/src/vppinfra/longjmp.S
new file mode 100644
index 00000000..d4dd4c7d
--- /dev/null
+++ b/src/vppinfra/longjmp.S
@@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#if defined(__x86_64__)
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, @function
+clib_setjmp:
+ movq %rbx, 8*0(%rdi)
+ movq %rbp, 8*1(%rdi)
+ movq %r12, 8*2(%rdi)
+ movq %r13, 8*3(%rdi)
+ movq %r14, 8*4(%rdi)
+ movq %r15, 8*5(%rdi)
+
+ /* Save SP after return. */
+ leaq 8(%rsp), %rdx
+ movq %rdx, 8*6(%rdi)
+
+ /* Save PC we are returning to from stack frame. */
+ movq 0(%rsp), %rax
+ movq %rax, 8*7(%rdi)
+
+ /* Give back user's return value. */
+ movq %rsi, %rax
+ ret
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, @function
+clib_longjmp:
+ /* Restore regs. */
+ movq 8*0(%rdi), %rbx
+ movq 8*1(%rdi), %rbp
+ movq 8*2(%rdi), %r12
+ movq 8*3(%rdi), %r13
+ movq 8*4(%rdi), %r14
+ movq 8*5(%rdi), %r15
+ movq 8*6(%rdi), %rsp
+ movq 8*7(%rdi), %rdx
+
+ /* Give back user's return value. */
+ movq %rsi, %rax
+
+ /* Away we go. */
+ jmpq *%rdx
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, @function
+clib_calljmp:
+ /* Make sure stack is 16-byte aligned. */
+ movq %rdx, %rax
+ andq $0xf, %rax
+ subq %rax, %rdx
+
+ /* Get return address. */
+ pop %rax
+
+ /* Switch to new stack. */
+ xchgq %rsp, %rdx
+
+ /* Save return address on new stack. */
+ push %rax
+
+ /* Save old stack pointer on new stack. */
+ push %rdx
+
+ /* Get function. */
+ movq %rdi, %rdx
+
+ /* Move argument into place. */
+ movq %rsi, %rdi
+
+ /* Away we go. */
+ callq *%rdx
+
+ /* Switch back to old stack. */
+ movq 8(%rsp), %rdx
+ movq 0(%rsp), %rcx
+ xchgq %rcx, %rsp
+
+ /* Return to caller. */
+ jmpq *%rdx
+
+#elif defined(i386)
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, @function
+clib_setjmp:
+ movl 4(%esp), %ecx
+
+ movl %ebp, 4*0(%ecx)
+ movl %ebx, 4*1(%ecx)
+ movl %edi, 4*2(%ecx)
+ movl %esi, 4*3(%ecx)
+
+ /* Save SP after return. */
+ leal 4(%esp), %edx
+ movl %edx, 4*4(%ecx)
+
+ /* Save PC we are returning to from stack frame. */
+ movl 0(%esp), %eax
+ movl %eax, 4*5(%ecx)
+
+ /* Give back user's return value. */
+ movl 8(%esp), %eax
+ ret
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, @function
+clib_longjmp:
+ movl 4(%esp), %ecx
+
+ /* Give back user's return value. */
+ movl 8(%esp), %eax
+
+ /* Restore regs. */
+ movl 4*0(%ecx), %ebp
+ movl 4*1(%ecx), %ebx
+ movl 4*2(%ecx), %edi
+ movl 4*3(%ecx), %esi
+ movl 4*4(%ecx), %esp
+ movl 4*5(%ecx), %edx
+
+ /* Away we go. */
+ jmp *%edx
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, @function
+clib_calljmp:
+ /* Get new stack pointer. */
+ movl 12(%esp), %edx
+
+ /* Switch stacks. */
+ xchgl %esp, %edx
+
+ /* Save old stack pointer on new stack. */
+ sub $8, %esp
+ movl %edx, 4(%esp)
+
+ /* Put function argument in stack frame. */
+ movl 8(%edx), %eax
+ movl %eax, 0(%esp)
+
+ /* Get function. */
+ movl 4(%edx), %eax
+
+ /* Away we go. */
+ call *%eax
+
+ /* Switch back to old stack. */
+ movl 4(%esp), %edx
+ xchgl %edx, %esp
+
+ /* Return to caller. */
+ ret
+
+#elif defined(__SPU__)
+
+#elif defined(__powerpc64__)
+
+ .text
+
+#define _prologue(n) \
+ .align 2 ; \
+ .globl n, .##n ; \
+ .section ".opd", "aw" ; \
+ .align 3 ; \
+n: .quad .##n, .TOC.@tocbase, 0 ; \
+ .previous ; \
+ .size n, 24 ; \
+ .type .##n, @function ; \
+.##n:
+
+#define _foreach_14_31 \
+_ (14, 0) _ (15, 1) _ (16, 2) _ (17, 3) _ (18, 4) _ (19, 5) \
+_ (20, 6) _ (21, 7) _ (22, 8) _ (23, 9) _ (24, 10) _ (25, 11) \
+_ (26, 12) _ (27, 13) _ (28, 14) _ (29, 15) _ (30, 16) _ (31, 17)
+
+#define _foreach_20_31 \
+_ (20, 0) _ (21, 1) _ (22, 2) _ (23, 3) _ (24, 4) _ (25, 5) \
+_ (26, 6) _ (27, 7) _ (28, 8) _ (29, 9) _ (30, 10) _ (31, 11)
+
+#ifdef __ALTIVEC__
+#define CLIB_POWERPC_ALTIVEC_N_REGS 12
+#else
+#define CLIB_POWERPC_ALTIVEC_N_REGS 0
+#endif
+
+_prologue (clib_setjmp)
+ mflr 0
+ std 0, 8*0(3)
+ std 1, 8*1(3)
+ std 2, 8*2(3)
+ mfcr 0
+ std 0, 8*3(3)
+ mfspr 0, 256
+ stw 0, 8*4(3)
+
+ /* gprs 14 - 31 */
+#define _(a,b) std a, 8*((b) + 4 + 18*0)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* fprs 14 - 31 */
+#define _(a,b) stfd a, 8*((b) + 4 + 18*1)(3) ;
+ _foreach_14_31
+#undef _
+
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ /* vrs 20 - 31 */
+ li 5, 8*(4 + 18*2)
+#define _(a,b) stvx a, 5, 3 ; addi 5, 5, 16 ;
+ _foreach_20_31
+#undef _
+#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
+
+ /* Return value. */
+ mr 3, 4
+
+ blr
+
+_prologue (clib_longjmp)
+ ld 0, 8*0(3)
+ mtlr 0
+ ld 1, 8*1(3)
+ ld 2, 8*2(3)
+ ld 0, 8*3(3)
+ mtcrf 0xff, 0
+ lwz 0, 8*3(3)
+ mtspr 256, 0
+
+ /* gprs 14 - 31 */
+#define _(a,b) ld a, 8*((b) + 4 + 18*0)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* fprs 14 - 31 */
+#define _(a,b) lfd a, 8*((b) + 4 + 18*1)(3) ;
+ _foreach_14_31
+#undef _
+
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ /* vrs 20 - 31 */
+ li 5, 8*(4 + 18*2)
+#define _(a,b) lvx a, 5, 3 ; addi 5, 5, 16 ;
+ _foreach_20_31
+#undef _
+#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
+
+ /* Return value. */
+ mr 3, 4
+
+ blr
+
+ .globl clib_calljmp
+ .section ".opd","aw"
+ .align 3
+clib_calljmp:
+ .quad .L.clib_calljmp,.TOC.@tocbase,0
+ .previous
+ .type clib_calljmp, @function
+.L.clib_calljmp:
+ mflr 0
+ mr 9,3
+ std 0,16(1)
+ stdu 1,-112(1)
+#APP
+ std 1,-8(5)
+ addi 5,5,-256
+ mr 1,5
+#NO_APP
+ ld 10,0(9)
+ std 2,40(1)
+ mr 3,4
+ mtctr 10
+ ld 11,16(9)
+ ld 2,8(9)
+ bctrl
+ ld 2,40(1)
+#APP
+ addi 1,1,256
+ ld 1,-8(1)
+#NO_APP
+ addi 1,1,112
+ ld 0,16(1)
+ mtlr 0
+ blr
+ .long 0
+ .byte 0,0,0,1,128,0,0,0
+ .size clib_calljmp,.-.L.clib_calljmp
+
+#elif defined(__powerpc__)
+
+#define _foreach_14_31 \
+_ (14, 0) _ (15, 1) _ (16, 2) _ (17, 3) _ (18, 4) _ (19, 5) \
+_ (20, 6) _ (21, 7) _ (22, 8) _ (23, 9) _ (24, 10) _ (25, 11) \
+_ (26, 12) _ (27, 13) _ (28, 14) _ (29, 15) _ (30, 16) _ (31, 17)
+
+#define _foreach_20_31 \
+_ (20, 0) _ (21, 1) _ (22, 2) _ (23, 3) _ (24, 4) _ (25, 5) \
+_ (26, 6) _ (27, 7) _ (28, 8) _ (29, 9) _ (30, 10) _ (31, 11)
+
+#ifdef __ALTIVEC__
+#define CLIB_POWERPC_ALTIVEC_N_REGS 12
+#else
+#define CLIB_POWERPC_ALTIVEC_N_REGS 0
+#endif
+
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, @function
+clib_setjmp:
+ mflr 0
+ stw 0, 4*0(3)
+ stw 1, 4*1(3)
+ mfcr 0
+ stw 0, 4*2(3)
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ mfspr 0, 256
+#endif
+ stw 0, 4*3(3)
+
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ li 5, 4*4
+#define _(a,b) stvx a, 3, 5 ; addi 5, 5, 16 ;
+ _foreach_20_31
+#undef _
+#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
+
+ /* gp 14 - 31 */
+#define _(a,b) stw a, 4*(1*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 0*18)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* fp 14 - 31 */
+#define _(a,b) stfd a, 4*(2*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 1*18)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* Return value. */
+ mr 3, 4
+
+ blr
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, @function
+clib_longjmp:
+
+ lwz 0, 4*0(3)
+ mtlr 0
+ lwz 1, 4*1(3)
+ lwz 0, 4*2(3)
+ mtcr 0
+ lwz 0, 4*3(3)
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ mtspr 256, 0
+#endif
+
+#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
+ li 5, 4*4
+#define _(a,b) lvx a, 3, 5 ; addi 5, 5, 16 ;
+ _foreach_20_31
+#undef _
+#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
+
+ /* gp 14 - 31 */
+#define _(a,b) lwz a, 4*(1*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 0*18)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* fp 14 - 31 */
+#define _(a,b) lfd a, 4*(2*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 1*18)(3) ;
+ _foreach_14_31
+#undef _
+
+ /* Return value. */
+ mr 3, 4
+
+ blr
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, @function
+clib_calljmp:
+ /* Make sure stack is 16 byte aligned. */
+ andi. 0, 5, 0xf
+ sub 5, 5, 0
+ addi 5, 5, -16
+
+ /* Save old stack/link pointer on new stack. */
+ stw 1, 0(5)
+ mflr 0
+ stw 0, 4(5)
+
+ /* account for (sp, lr) tuple, and keep aligned */
+ addi 5, 5, -16
+
+ /* Switch stacks. */
+ mr 1, 5
+
+ /* Move argument into place. */
+ mtctr 3
+ mr 3, 4
+
+ /* Away we go. */
+ bctrl
+
+ /* back to our synthetic frame */
+ addi 1,1,16
+
+ /* Switch back to old stack. */
+ lwz 0, 4(1)
+ mtlr 0
+ lwz 0, 0(1)
+ mr 1, 0
+
+ /* Return to caller. */
+ blr
+
+#elif defined(__arm__)
+
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, %function
+clib_setjmp:
+ mov ip, r0 /* jmp buffer */
+
+ /* Save integer registers */
+ stmia ip!, {v1-v6, sl, fp, sp, lr}
+
+#ifdef __IWMMXT__
+ /* Save the call-preserved iWMMXt registers. */
+ wstrd wr10, [ip], #8
+ wstrd wr11, [ip], #8
+ wstrd wr12, [ip], #8
+ wstrd wr13, [ip], #8
+ wstrd wr14, [ip], #8
+ wstrd wr15, [ip], #8
+#endif
+
+ /* Give back user's return value. */
+ mov r0, r1
+ bx lr
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, %function
+clib_longjmp:
+ mov ip, r0 /* jmp buffer */
+
+ /* Restore integer registers. */
+ ldmia ip!, {v1-v6, sl, fp, sp, lr}
+
+#ifdef __IWMMXT__
+ /* Save the call-preserved iWMMXt registers. */
+ wldrd wr10, [ip], #8
+ wldrd wr11, [ip], #8
+ wldrd wr12, [ip], #8
+ wldrd wr13, [ip], #8
+ wldrd wr14, [ip], #8
+ wldrd wr15, [ip], #8
+#endif
+
+ /* Give back user's return value. */
+ mov r0, r1
+ bx lr
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, %function
+clib_calljmp:
+ /* Make sure stack is 8 byte aligned. */
+ bic r2, r2, #7
+
+ /* Allocate space for stack/link pointer on new stack. */
+ sub r2, r2, #8
+
+ /* Save old stack/link pointer on new stack. */
+ str sp, [r2, #0]
+ str lr, [r2, #4]
+
+ /* Switch stacks. */
+ mov sp, r2
+
+ /* Save function to call. */
+ mov ip, r0
+
+ /* Move argument into place. */
+ mov r0, r1
+
+ /* Away we go. */
+ bx ip
+
+ /* Switch back to old stack. */
+ ldr lr, [sp, #4]
+ ldr ip, [sp, #0]
+ mov sp, ip
+
+ /* Return to caller. */
+ bx lr
+
+#elif defined(__xtensa__)
+
+ /* FIXME implement if needed. */
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, %function
+clib_setjmp:
+1: j 1b
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, @function
+clib_longjmp:
+1: j 1b
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, %function
+clib_calljmp:
+1: j 1b
+
+#elif defined(__TMS320C6X__)
+
+ /* FIXME implement if needed. */
+ .global clib_setjmp
+ .align 4
+ .type clib_setjmp, %function
+clib_setjmp:
+1: B .S1 1b
+
+ .global clib_longjmp
+ .align 4
+ .type clib_longjmp, @function
+clib_longjmp:
+1: B .S1 1b
+
+ .global clib_calljmp
+ .align 4
+ .type clib_calljmp, %function
+clib_calljmp:
+1: B .S1 1b
+
+#elif defined (__aarch64__)
+/*
+ Copyright (c) 2011, 2012 ARM Ltd
+ All rights reserved.
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the company may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+ THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define GPR_LAYOUT \
+ REG_PAIR (x19, x20, 0); \
+ REG_PAIR (x21, x22, 16); \
+ REG_PAIR (x23, x24, 32); \
+ REG_PAIR (x25, x26, 48); \
+ REG_PAIR (x27, x28, 64); \
+ REG_PAIR (x29, x30, 80); \
+ REG_ONE (x16, 96)
+#define FPR_LAYOUT \
+ REG_PAIR ( d8, d9, 112); \
+ REG_PAIR (d10, d11, 128); \
+ REG_PAIR (d12, d13, 144); \
+ REG_PAIR (d14, d15, 160);
+// int clib_setjmp (jmp_buf)
+ .global clib_setjmp
+ .type clib_setjmp, %function
+clib_setjmp:
+ mov x16, sp
+#define REG_PAIR(REG1, REG2, OFFS) stp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS) str REG1, [x0, OFFS]
+ GPR_LAYOUT
+ FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+ mov x0, x1
+ ret
+ .size clib_setjmp, .-clib_setjmp
+// void clib_longjmp (jmp_buf, int) __attribute__ ((noreturn))
+ .global clib_longjmp
+ .type clib_longjmp, %function
+clib_longjmp:
+#define REG_PAIR(REG1, REG2, OFFS) ldp REG1, REG2, [x0, OFFS]
+#define REG_ONE(REG1, OFFS) ldr REG1, [x0, OFFS]
+ GPR_LAYOUT
+ FPR_LAYOUT
+#undef REG_PAIR
+#undef REG_ONE
+ mov sp, x16
+ mov x0, x1
+ // cmp w1, #0
+ // cinc w0, w1, eq
+ // use br not ret, as ret is guaranteed to mispredict
+ br x30
+ .size clib_longjmp, .-clib_longjmp
+
+
+// void clib_calljmp (x0=function, x1=arg, x2=new_stack)
+ .global clib_calljmp
+ .type clib_calljmp, %function
+clib_calljmp:
+ // save fn ptr
+ mov x3, x0
+ // set up fn arg
+ mov x0, x1
+ // switch stacks
+ mov x4, sp
+
+ // space for saved sp, lr on new stack
+ sub x2, x2, #16
+ mov sp, x2
+
+ // save old sp and link register on new stack
+ str x4, [sp]
+ str x30,[sp,#8]
+ mov x4, sp
+
+ // go there
+ blr x3
+
+ // restore old sp and link register
+ mov x4, sp
+
+ ldr x3, [x4]
+ ldr x30,[x4, #8]
+ mov sp, x3
+ ret
+ .size clib_calljmp, .-clib_calljmp
+#else
+#error "unknown machine"
+#endif
+
+.section .note.GNU-stack,"",%progbits
diff --git a/src/vppinfra/longjmp.h b/src/vppinfra/longjmp.h
new file mode 100644
index 00000000..8d83203e
--- /dev/null
+++ b/src/vppinfra/longjmp.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_longjmp_h
+#define included_clib_longjmp_h
+
+#include <vppinfra/types.h>
+
+#if defined(__x86_64__)
+/* rbx, rbp, r12, r13, r14, r15, eip, rsp */
+#define CLIB_ARCH_LONGJMP_REGS 8
+
+#elif defined(i386)
+/* ebx, ebp, esi, edi, eip, rsp */
+#define CLIB_ARCH_LONGJMP_REGS 6
+
+#elif (defined(__powerpc64__) || defined(__powerpc__))
+
+#ifdef __ALTIVEC__
+#define CLIB_POWERPC_ALTIVEC_N_REGS 12
+#else
+#define CLIB_POWERPC_ALTIVEC_N_REGS 0
+#endif
+
+/* r1 r2 link condition+vsave regs 14-31 fp regs 14-31 vector regs 20-31 */
+#define CLIB_ARCH_LONGJMP_REGS \
+ (/* r1 lr cr vrsave */ \
+ 4 \
+ /* gp */ \
+ + (31 - 14 + 1) \
+ /* fp */ \
+ + (sizeof (f64) / sizeof (uword)) * (31 - 14 + 1) \
+ /* vector regs */ \
+ + (16 / sizeof (uword)) * CLIB_POWERPC_ALTIVEC_N_REGS)
+
+#elif defined(__SPU__)
+/* FIXME */
+#define CLIB_ARCH_LONGJMP_REGS (10)
+
+#elif defined(__arm__)
+
+#ifndef __IWMMXT__
+/* v1-v6 sl fp sp lr */
+#define CLIB_ARCH_LONGJMP_REGS (10)
+#else
+/* For iwmmxt we save 6 extra 8 byte registers. */
+#define CLIB_ARCH_LONGJMP_REGS (10 + (6*2))
+#endif
+
+#elif defined(__xtensa__)
+
+/* setjmp/longjmp not supported for the moment. */
+#define CLIB_ARCH_LONGJMP_REGS 0
+
+#elif defined(__TMS320C6X__)
+
+/* setjmp/longjmp not supported for the moment. */
+#define CLIB_ARCH_LONGJMP_REGS 0
+
+#elif defined(__aarch64__)
+#define CLIB_ARCH_LONGJMP_REGS (22)
+#else
+#error "unknown machine"
+#endif
+
+typedef struct
+{
+ uword regs[CLIB_ARCH_LONGJMP_REGS];
+} clib_longjmp_t __attribute__ ((aligned (16)));
+
+/* Return given value to saved context. */
+void clib_longjmp (clib_longjmp_t * save, uword return_value);
+
+/* Save context. Returns given value if jump is not taken;
+ otherwise returns value from clib_longjmp if long jump is taken. */
+uword clib_setjmp (clib_longjmp_t * save, uword return_value_not_taken);
+
+/* Call function on given stack. */
+uword clib_calljmp (uword (*func) (uword func_arg),
+ uword func_arg, void *stack);
+
+#endif /* included_clib_longjmp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/macros.c b/src/vppinfra/macros.c
new file mode 100644
index 00000000..ce4cc9bc
--- /dev/null
+++ b/src/vppinfra/macros.c
@@ -0,0 +1,266 @@
+/*
+ macros.c - a simple macro expander
+
+ Copyright (c) 2010, 2014 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/macros.h>
+
+static inline int
+macro_isalnum (i8 c)
+{
+ if ((c >= 'A' && c <= 'Z')
+ || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_'))
+ return 1;
+ return 0;
+}
+
+static i8 *
+builtin_eval (macro_main_t * mm, i8 * varname, i32 complain)
+{
+ uword *p;
+ i8 *(*fp) (macro_main_t *, i32);
+
+ p = hash_get_mem (mm->the_builtin_eval_hash, varname);
+ if (p == 0)
+ return 0;
+ fp = (void *) (p[0]);
+ return (*fp) (mm, complain);
+}
+
+int
+clib_macro_unset (macro_main_t * mm, char *name)
+{
+ hash_pair_t *p;
+ u8 *key, *value;
+
+ p = hash_get_pair (mm->the_value_table_hash, name);
+
+ if (p == 0)
+ return 1;
+
+ key = (u8 *) (p->key);
+ value = (u8 *) (p->value[0]);
+ hash_unset_mem (mm->the_value_table_hash, name);
+
+ vec_free (value);
+ vec_free (key);
+ return 0;
+}
+
+int
+clib_macro_set_value (macro_main_t * mm, char *name, char *value)
+{
+ u8 *key_copy, *value_copy;
+ int rv;
+
+ rv = clib_macro_unset (mm, name);
+
+ key_copy = format (0, "%s%c", name, 0);
+ value_copy = format (0, "%s%c", value, 0);
+
+ hash_set_mem (mm->the_value_table_hash, key_copy, value_copy);
+ return rv;
+}
+
+i8 *
+clib_macro_get_value (macro_main_t * mm, char *name)
+{
+ uword *p;
+
+ p = hash_get_mem (mm->the_value_table_hash, name);
+ if (p)
+ return (i8 *) (p[0]);
+ else
+ return 0;
+}
+
+/*
+ * eval: takes a string, returns a vector.
+ * looks up $foobar in the variable table.
+ */
+i8 *
+clib_macro_eval (macro_main_t * mm, i8 * s, i32 complain)
+{
+ i8 *rv = 0;
+ i8 *varname, *varvalue;
+ i8 *ts;
+
+ while (*s)
+ {
+ switch (*s)
+ {
+ case '\\':
+ s++;
+ /* fallthrough */
+
+ default:
+ vec_add1 (rv, *s);
+ s++;
+ break;
+
+ case '$':
+ s++;
+ varname = 0;
+ /*
+ * Make vector with variable name in it.
+ */
+ while (*s && (macro_isalnum (*s) || (*s == '_') || (*s == '(')))
+ {
+
+ /* handle $(foo) */
+ if (*s == '(')
+ {
+ s++; /* skip '(' */
+ while (*s && *s != ')')
+ {
+ vec_add1 (varname, *s);
+ s++;
+ }
+ if (*s)
+ s++; /* skip ')' */
+ break;
+ }
+ vec_add1 (varname, *s);
+ s++;
+ }
+ /* null terminate */
+ vec_add1 (varname, 0);
+ /* Look for a builtin, e.g. $my_hostname */
+ if (!(varvalue = builtin_eval (mm, varname, complain)))
+ {
+ /* Look in value table */
+ if (!varvalue)
+ {
+ char *tmp = clib_macro_get_value (mm, varname);
+ if (tmp)
+ varvalue = (i8 *) format (0, "%s%c", tmp, 0);
+ }
+#ifdef CLIB_UNIX
+ /* Look in environment. */
+ if (!varvalue)
+ {
+ char *tmp = getenv (varname);
+ if (tmp)
+ varvalue = (i8 *) format (0, "%s%c", tmp, 0);
+ }
+#endif /* CLIB_UNIX */
+ }
+ if (varvalue)
+ {
+ /* recursively evaluate */
+ ts = clib_macro_eval (mm, varvalue, complain);
+ vec_free (varvalue);
+ /* add results to answer */
+ vec_append (rv, ts);
+ /* Remove NULL termination or the results are sad */
+ _vec_len (rv) = vec_len (rv) - 1;
+ vec_free (ts);
+ }
+ else
+ {
+ if (complain)
+ clib_warning ("Undefined Variable Reference: %s\n", varname);
+ vec_append (rv, format (0, "UNSET "));
+ _vec_len (rv) = vec_len (rv) - 1;
+
+ }
+ vec_free (varname);
+ }
+ }
+ vec_add1 (rv, 0);
+ return (rv);
+}
+
+/*
+ * eval: takes a string, returns a vector.
+ * looks up $foobar in the variable table.
+ */
+i8 *
+clib_macro_eval_dollar (macro_main_t * mm, i8 * s, i32 complain)
+{
+ i8 *s2;
+ i8 *rv;
+
+ s2 = (i8 *) format (0, "$(%s)%c", s, 0);
+ rv = clib_macro_eval (mm, s2, complain);
+ vec_free (s2);
+ return (rv);
+}
+
+void
+clib_macro_add_builtin (macro_main_t * mm, char *name, void *eval_fn)
+{
+ hash_set_mem (mm->the_builtin_eval_hash, name, (uword) eval_fn);
+}
+
+#ifdef CLIB_UNIX
+static i8 *
+eval_hostname (macro_main_t * mm, i32 complain)
+{
+ char tmp[128];
+ if (gethostname (tmp, sizeof (tmp)))
+ return ((i8 *) format (0, "gethostname-error%c", 0));
+ return ((i8 *) format (0, "%s%c", tmp, 0));
+}
+#endif
+
+void
+clib_macro_init (macro_main_t * mm)
+{
+ if (mm->the_builtin_eval_hash != 0)
+ {
+ clib_warning ("mm %p already initialized", mm);
+ return;
+ }
+
+ mm->the_builtin_eval_hash = hash_create_string (0, sizeof (uword));
+ mm->the_value_table_hash = hash_create_string (0, sizeof (uword));
+
+#ifdef CLIB_UNIX
+ hash_set_mem (mm->the_builtin_eval_hash, "hostname", (uword) eval_hostname);
+#endif
+}
+
+void
+clib_macro_free (macro_main_t * mm)
+{
+ hash_pair_t *p;
+ u8 **strings_to_free = 0;
+ int i;
+
+ hash_free (mm->the_builtin_eval_hash);
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, mm->the_value_table_hash,
+ ({
+ vec_add1 (strings_to_free, (u8 *) (p->key));
+ vec_add1 (strings_to_free, (u8 *) (p->value[0]));
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (strings_to_free); i++)
+ vec_free (strings_to_free[i]);
+ vec_free (strings_to_free);
+ hash_free (mm->the_value_table_hash);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/macros.h b/src/vppinfra/macros.h
new file mode 100644
index 00000000..5c2e7033
--- /dev/null
+++ b/src/vppinfra/macros.h
@@ -0,0 +1,54 @@
+/*
+ macros.h - definitions for a simple macro expander
+
+ Copyright (c) 2010, 2014 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef included_macros_h
+#define included_macros_h
+
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/format.h>
+
+#ifdef CLIB_UNIX
+#include <stdlib.h>
+#include <unistd.h>
+#endif
+
+typedef struct
+{
+ uword *the_builtin_eval_hash;
+ uword *the_value_table_hash;
+} macro_main_t;
+
+int clib_macro_unset (macro_main_t * mm, char *name);
+int clib_macro_set_value (macro_main_t * mm, char *name, char *value);
+void clib_macro_add_builtin (macro_main_t * mm, char *name, void *eval_fn);
+i8 *clib_macro_get_value (macro_main_t * mm, char *name);
+i8 *clib_macro_eval (macro_main_t * mm, i8 * s, i32 complain);
+i8 *clib_macro_eval_dollar (macro_main_t * mm, i8 * s, i32 complain);
+void clib_macro_init (macro_main_t * mm);
+void clib_macro_free (macro_main_t * mm);
+
+#endif /* included_macros_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/math.h b/src/vppinfra/math.h
new file mode 100644
index 00000000..cafa1cb3
--- /dev/null
+++ b/src/vppinfra/math.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_math_h
+#define included_math_h
+
+#include <vppinfra/clib.h>
+
+always_inline f64
+sqrt (f64 x)
+{
+ return __builtin_sqrt (x);
+}
+
+always_inline f64
+fabs (f64 x)
+{
+ return __builtin_fabs (x);
+}
+
+#ifndef isnan
+#define isnan(x) __builtin_isnan(x)
+#endif
+
+#ifndef isinf
+#define isinf(x) __builtin_isinf(x)
+#endif
+
+#endif /* included_math_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/md5.c b/src/vppinfra/md5.c
new file mode 100644
index 00000000..9ac1efc7
--- /dev/null
+++ b/src/vppinfra/md5.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm */
+
+/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+rights reserved.
+
+License to copy and use this software is granted provided that it
+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+Algorithm" in all material mentioning or referencing this software
+or this function.
+
+License is also granted to make and use derivative works provided
+that such works are identified as "derived from the RSA Data
+Security, Inc. MD5 Message-Digest Algorithm" in all material
+mentioning or referencing the derived work.
+
+RSA Data Security, Inc. makes no representations concerning either
+the merchantability of this software or the suitability of this
+software for any particular purpose. It is provided "as is"
+without express or implied warranty of any kind.
+
+These notices must be retained in any copies of any part of this
+documentation and/or software.
+ */
+
+#include <vppinfra/string.h> /* for memset */
+#include <vppinfra/byte_order.h>
+#include <vppinfra/md5.h>
+
+/* F, G, H and I are basic MD5 functions. */
+#define F(b, c, d) (d ^ (b & (c ^ d)))
+#define G(b, c, d) F (d, b, c)
+#define H(b, c, d) (b ^ c ^ d)
+#define I(b, c, d) (c ^ (b | ~d))
+
+/* ROTATE_LEFT rotates x left n bits. */
+#define ROTATE_LEFT(x,n) \
+ (((x) << (n)) | ((x) >> (32 - (n))))
+
+/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+ Rotation is separate from addition to prevent recomputation. */
+#define FF(a,b,c,d,x,s,ac) \
+do { \
+ a += F (b, c, d) + x + ac; \
+ a = ROTATE_LEFT (a, s); \
+ a += b; \
+} while (0)
+
+#define GG(a,b,c,d,x,s,ac) \
+do { \
+ a += G (b, c, d) + x + ac; \
+ a = ROTATE_LEFT (a, s); \
+ a += b; \
+} while (0)
+
+#define HH(a,b,c,d,x,s,ac) \
+do { \
+ a += H (b, c, d) + x + ac; \
+ a = ROTATE_LEFT (a, s); \
+ a += b; \
+} while (0)
+
+#define II(a,b,c,d,x,s,ac) \
+do { \
+ a += I (b, c, d) + x + ac; \
+ a = ROTATE_LEFT (a, s); \
+ a += b; \
+} while (0)
+
+#undef _
+
+/* MD5 basic transformation. Transforms state based on block. */
+static void
+md5_transform (md5_context_t * m, u32 * data, u32 * result, int zero_buffer)
+{
+ u32 a = m->state[0], b = m->state[1], c = m->state[2], d = m->state[3];
+ u32 *x = data;
+
+/* Constants for MD5Transform routine. */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+ /* Round 1 */
+ FF (a, b, c, d, clib_host_to_little_u32 (x[0]), S11, 0xd76aa478); /* 1 */
+ FF (d, a, b, c, clib_host_to_little_u32 (x[1]), S12, 0xe8c7b756); /* 2 */
+ FF (c, d, a, b, clib_host_to_little_u32 (x[2]), S13, 0x242070db); /* 3 */
+ FF (b, c, d, a, clib_host_to_little_u32 (x[3]), S14, 0xc1bdceee); /* 4 */
+ FF (a, b, c, d, clib_host_to_little_u32 (x[4]), S11, 0xf57c0faf); /* 5 */
+ FF (d, a, b, c, clib_host_to_little_u32 (x[5]), S12, 0x4787c62a); /* 6 */
+ FF (c, d, a, b, clib_host_to_little_u32 (x[6]), S13, 0xa8304613); /* 7 */
+ FF (b, c, d, a, clib_host_to_little_u32 (x[7]), S14, 0xfd469501); /* 8 */
+ FF (a, b, c, d, clib_host_to_little_u32 (x[8]), S11, 0x698098d8); /* 9 */
+ FF (d, a, b, c, clib_host_to_little_u32 (x[9]), S12, 0x8b44f7af); /* 10 */
+ FF (c, d, a, b, clib_host_to_little_u32 (x[10]), S13, 0xffff5bb1); /* 11 */
+ FF (b, c, d, a, clib_host_to_little_u32 (x[11]), S14, 0x895cd7be); /* 12 */
+ FF (a, b, c, d, clib_host_to_little_u32 (x[12]), S11, 0x6b901122); /* 13 */
+ FF (d, a, b, c, clib_host_to_little_u32 (x[13]), S12, 0xfd987193); /* 14 */
+ FF (c, d, a, b, clib_host_to_little_u32 (x[14]), S13, 0xa679438e); /* 15 */
+ FF (b, c, d, a, clib_host_to_little_u32 (x[15]), S14, 0x49b40821); /* 16 */
+
+ /* Round 2 */
+ GG (a, b, c, d, x[1], S21, 0xf61e2562); /* 17 */
+ GG (d, a, b, c, x[6], S22, 0xc040b340); /* 18 */
+ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+ GG (b, c, d, a, x[0], S24, 0xe9b6c7aa); /* 20 */
+ GG (a, b, c, d, x[5], S21, 0xd62f105d); /* 21 */
+ GG (d, a, b, c, x[10], S22, 0x02441453); /* 22 */
+ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+ GG (b, c, d, a, x[4], S24, 0xe7d3fbc8); /* 24 */
+ GG (a, b, c, d, x[9], S21, 0x21e1cde6); /* 25 */
+ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+ GG (c, d, a, b, x[3], S23, 0xf4d50d87); /* 27 */
+ GG (b, c, d, a, x[8], S24, 0x455a14ed); /* 28 */
+ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+ GG (d, a, b, c, x[2], S22, 0xfcefa3f8); /* 30 */
+ GG (c, d, a, b, x[7], S23, 0x676f02d9); /* 31 */
+ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+ /* Round 3 */
+ HH (a, b, c, d, x[5], S31, 0xfffa3942); /* 33 */
+ HH (d, a, b, c, x[8], S32, 0x8771f681); /* 34 */
+ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+ HH (a, b, c, d, x[1], S31, 0xa4beea44); /* 37 */
+ HH (d, a, b, c, x[4], S32, 0x4bdecfa9); /* 38 */
+ HH (c, d, a, b, x[7], S33, 0xf6bb4b60); /* 39 */
+ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+ HH (d, a, b, c, x[0], S32, 0xeaa127fa); /* 42 */
+ HH (c, d, a, b, x[3], S33, 0xd4ef3085); /* 43 */
+ HH (b, c, d, a, x[6], S34, 0x04881d05); /* 44 */
+ HH (a, b, c, d, x[9], S31, 0xd9d4d039); /* 45 */
+ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+ HH (b, c, d, a, x[2], S34, 0xc4ac5665); /* 48 */
+
+ /* Round 4 */
+ II (a, b, c, d, x[0], S41, 0xf4292244); /* 49 */
+ II (d, a, b, c, x[7], S42, 0x432aff97); /* 50 */
+ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+ II (b, c, d, a, x[5], S44, 0xfc93a039); /* 52 */
+ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+ II (d, a, b, c, x[3], S42, 0x8f0ccc92); /* 54 */
+ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+ II (b, c, d, a, x[1], S44, 0x85845dd1); /* 56 */
+ II (a, b, c, d, x[8], S41, 0x6fa87e4f); /* 57 */
+ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+ II (c, d, a, b, x[6], S43, 0xa3014314); /* 59 */
+ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+ II (a, b, c, d, x[4], S41, 0xf7537e82); /* 61 */
+ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+ II (c, d, a, b, x[2], S43, 0x2ad7d2bb); /* 63 */
+ II (b, c, d, a, x[9], S44, 0xeb86d391); /* 64 */
+
+ a += m->state[0];
+ b += m->state[1];
+ c += m->state[2];
+ d += m->state[3];
+
+ if (result)
+ {
+ result[0] = clib_host_to_little_u32 (a);
+ result[1] = clib_host_to_little_u32 (b);
+ result[2] = clib_host_to_little_u32 (c);
+ result[3] = clib_host_to_little_u32 (d);
+ }
+ else
+ {
+ m->state[0] = a;
+ m->state[1] = b;
+ m->state[2] = c;
+ m->state[3] = d;
+ }
+
+ /* Zero sensitive information. */
+ if (result)
+ memset (m, ~0, sizeof (m[0]));
+ else if (zero_buffer)
+ memset (m->input_buffer.b8, 0, sizeof (m->input_buffer));
+}
+
+/* MD5 initialization. Begins an MD5 operation, writing a new context. */
+void
+md5_init (md5_context_t * c)
+{
+ memset (c, 0, sizeof (c[0]));
+
+ /* Load magic initialization constants. */
+ c->state[0] = 0x67452301;
+ c->state[1] = 0xefcdab89;
+ c->state[2] = 0x98badcfe;
+ c->state[3] = 0x10325476;
+}
+
+always_inline void __attribute__ ((unused))
+md5_fill_buffer_aligned (md5_context_t * c, u32 * d32)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (c->input_buffer.b32); i++)
+ c->input_buffer.b32[i] = d32[i];
+}
+
+/* MD5 block update operation. Continues an MD5 message-digest
+ operation, processing another message block, and updating the
+ context.
+ */
+void
+md5_add (md5_context_t * c, void *data, int data_bytes)
+{
+ u32 data_bytes_left;
+ void *d;
+
+ if (data_bytes == 0)
+ return;
+
+ d = data;
+ data_bytes_left = data_bytes;
+
+ if ((pointer_to_uword (d) % sizeof (u32)) == 0
+ && (c->n_bits % BITS (c->input_buffer)) == 0
+ && data_bytes >= sizeof (c->input_buffer))
+ {
+ int is_last_iteration;
+ /* Fast aligned version. */
+ do
+ {
+ data_bytes_left -= sizeof (c->input_buffer);
+ is_last_iteration = data_bytes_left < sizeof (c->input_buffer);
+ md5_transform (c, d, /* result */ 0, /* zero_buffer */
+ is_last_iteration);
+ d += sizeof (c->input_buffer);
+ }
+ while (!is_last_iteration);
+ }
+
+ /* Slow unaligned version. */
+ {
+ int bi;
+ u8 *d8 = d;
+
+ bi = (c->n_bits / BITS (u8)) % ARRAY_LEN (c->input_buffer.b8);
+
+ while (data_bytes_left > 0)
+ {
+ c->input_buffer.b8[bi] = d8[0];
+ data_bytes_left -= 1;
+ d8++;
+ bi++;
+ if (bi == ARRAY_LEN (c->input_buffer.b8))
+ {
+ bi = 0;
+ md5_transform (c, c->input_buffer.b32,
+ /* result */ 0,
+ /* zero_buffer */ 1);
+ }
+ }
+ }
+
+ c->n_bits += data_bytes * BITS (u8);
+}
+
+void
+md5_finish (md5_context_t * c, u8 * digest)
+{
+ u64 n_bits_save;
+ int bi, n_pad;
+ static u8 padding[sizeof (c->input_buffer)] = { 0x80, 0, };
+
+ n_bits_save = c->n_bits;
+ bi = (n_bits_save / BITS (u8)) % ARRAY_LEN (c->input_buffer.b8);
+
+ n_pad = sizeof (c->input_buffer) - (bi + sizeof (u64));
+ if (n_pad <= 0)
+ n_pad += sizeof (c->input_buffer);
+ md5_add (c, padding, n_pad);
+
+ c->input_buffer.b64[ARRAY_LEN (c->input_buffer.b64) - 1]
+ = clib_host_to_little_u64 (n_bits_save);
+
+ md5_transform (c, c->input_buffer.b32, (u32 *) digest,
+ /* zero_buffer */ 1);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/md5.h b/src/vppinfra/md5.h
new file mode 100644
index 00000000..52123886
--- /dev/null
+++ b/src/vppinfra/md5.h
@@ -0,0 +1,57 @@
+/*
+ Copyright (c) 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_md5_h
+#define included_md5_h
+
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ u64 n_bits;
+
+ u32 state[4];
+
+ union
+ {
+ u64 b64[8];
+ u32 b32[16];
+ u8 b8[16 * 4];
+ } input_buffer;
+
+ /* Resulting message digest filled in by md5_finish. */
+} md5_context_t;
+
+void md5_init (md5_context_t * c);
+void md5_add (md5_context_t * c, void *data, int data_bytes);
+void md5_finish (md5_context_t * c, u8 digest[16]);
+
+#endif /* included_md5_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
new file mode 100644
index 00000000..69ab8803
--- /dev/null
+++ b/src/vppinfra/mem.h
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _included_clib_mem_h
+#define _included_clib_mem_h
+
+#include <stdarg.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <vppinfra/clib.h> /* uword, etc */
+#include <vppinfra/clib_error.h>
+#include <vppinfra/mheap_bootstrap.h>
+#include <vppinfra/os.h>
+#include <vppinfra/string.h> /* memcpy, memset */
+#include <vppinfra/valgrind.h>
+
+#define CLIB_MAX_MHEAPS 256
+
+/* Per CPU heaps. */
+extern void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS];
+
+always_inline void *
+clib_mem_get_per_cpu_heap (void)
+{
+ int cpu = os_get_thread_index ();
+ return clib_per_cpu_mheaps[cpu];
+}
+
+always_inline void *
+clib_mem_set_per_cpu_heap (u8 * new_heap)
+{
+ int cpu = os_get_thread_index ();
+ void *old = clib_per_cpu_mheaps[cpu];
+ clib_per_cpu_mheaps[cpu] = new_heap;
+ return old;
+}
+
+/* Memory allocator which may call os_out_of_memory() if it fails */
+always_inline void *
+clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
+ int os_out_of_memory_on_failure)
+{
+ void *heap, *p;
+ uword offset, cpu;
+
+ if (align_offset > align)
+ {
+ if (align > 0)
+ align_offset %= align;
+ else
+ align_offset = align;
+ }
+
+ cpu = os_get_thread_index ();
+ heap = clib_per_cpu_mheaps[cpu];
+ heap = mheap_get_aligned (heap, size, align, align_offset, &offset);
+ clib_per_cpu_mheaps[cpu] = heap;
+
+ if (offset != ~0)
+ {
+ p = heap + offset;
+#if CLIB_DEBUG > 0
+ VALGRIND_MALLOCLIKE_BLOCK (p, mheap_data_bytes (heap, offset), 0, 0);
+#endif
+ return p;
+ }
+ else
+ {
+ if (os_out_of_memory_on_failure)
+ os_out_of_memory ();
+ return 0;
+ }
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+always_inline void *
+clib_mem_alloc (uword size)
+{
+ return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
+ /* align_offset */ 0,
+ /* os_out_of_memory */ 1);
+}
+
+always_inline void *
+clib_mem_alloc_aligned (uword size, uword align)
+{
+ return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
+ /* os_out_of_memory */ 1);
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+always_inline void *
+clib_mem_alloc_or_null (uword size)
+{
+ return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
+ /* align_offset */ 0,
+ /* os_out_of_memory */ 0);
+}
+
+always_inline void *
+clib_mem_alloc_aligned_or_null (uword size, uword align)
+{
+ return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
+ /* os_out_of_memory */ 0);
+}
+
+
+
+/* Memory allocator which panics when it fails.
+ Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */
+#define clib_mem_alloc_aligned_no_fail(size,align) \
+({ \
+ uword _clib_mem_alloc_size = (size); \
+ void * _clib_mem_alloc_p; \
+ _clib_mem_alloc_p = clib_mem_alloc_aligned (_clib_mem_alloc_size, (align)); \
+ if (! _clib_mem_alloc_p) \
+ clib_panic ("failed to allocate %d bytes", _clib_mem_alloc_size); \
+ _clib_mem_alloc_p; \
+})
+
+#define clib_mem_alloc_no_fail(size) clib_mem_alloc_aligned_no_fail(size,1)
+
+/* Alias to stack allocator for naming consistency. */
+#define clib_mem_alloc_stack(bytes) __builtin_alloca(bytes)
+
+always_inline uword
+clib_mem_is_heap_object (void *p)
+{
+ void *heap = clib_mem_get_per_cpu_heap ();
+ uword offset = (uword) p - (uword) heap;
+ mheap_elt_t *e, *n;
+
+ if (offset >= vec_len (heap))
+ return 0;
+
+ e = mheap_elt_at_uoffset (heap, offset);
+ n = mheap_next_elt (e);
+
+ /* Check that heap forward and reverse pointers agree. */
+ return e->n_user_data == n->prev_n_user_data;
+}
+
+always_inline void
+clib_mem_free (void *p)
+{
+ u8 *heap = clib_mem_get_per_cpu_heap ();
+
+ /* Make sure object is in the correct heap. */
+ ASSERT (clib_mem_is_heap_object (p));
+
+ mheap_put (heap, (u8 *) p - heap);
+
+#if CLIB_DEBUG > 0
+ VALGRIND_FREELIKE_BLOCK (p, 0);
+#endif
+}
+
+always_inline void *
+clib_mem_realloc (void *p, uword new_size, uword old_size)
+{
+ /* By default use alloc, copy and free to emulate realloc. */
+ void *q = clib_mem_alloc (new_size);
+ if (q)
+ {
+ uword copy_size;
+ if (old_size < new_size)
+ copy_size = old_size;
+ else
+ copy_size = new_size;
+ clib_memcpy (q, p, copy_size);
+ clib_mem_free (p);
+ }
+ return q;
+}
+
+always_inline uword
+clib_mem_size (void *p)
+{
+ ASSERT (clib_mem_is_heap_object (p));
+ mheap_elt_t *e = mheap_user_pointer_to_elt (p);
+ return mheap_elt_data_bytes (e);
+}
+
+always_inline void *
+clib_mem_get_heap (void)
+{
+ return clib_mem_get_per_cpu_heap ();
+}
+
+always_inline void *
+clib_mem_set_heap (void *heap)
+{
+ return clib_mem_set_per_cpu_heap (heap);
+}
+
+void *clib_mem_init (void *heap, uword size);
+
+void clib_mem_exit (void);
+
+uword clib_mem_get_page_size (void);
+
+void clib_mem_validate (void);
+
+void clib_mem_trace (int enable);
+
+typedef struct
+{
+ /* Total number of objects allocated. */
+ uword object_count;
+
+ /* Total allocated bytes. Bytes used and free.
+ used + free = total */
+ uword bytes_total, bytes_used, bytes_free;
+
+ /* Number of bytes used by mheap data structure overhead
+ (e.g. free lists, mheap header). */
+ uword bytes_overhead;
+
+ /* Amount of free space returned to operating system. */
+ uword bytes_free_reclaimed;
+
+ /* For malloc which puts small objects in sbrk region and
+ large objects in mmap'ed regions. */
+ uword bytes_used_sbrk;
+ uword bytes_used_mmap;
+
+ /* Max. number of bytes in this heap. */
+ uword bytes_max;
+} clib_mem_usage_t;
+
+void clib_mem_usage (clib_mem_usage_t * usage);
+
+u8 *format_clib_mem_usage (u8 * s, va_list * args);
+
+/* Allocate virtual address space. */
+always_inline void *
+clib_mem_vm_alloc (uword size)
+{
+ void *mmap_addr;
+ uword flags = MAP_PRIVATE;
+
+#ifdef MAP_ANONYMOUS
+ flags |= MAP_ANONYMOUS;
+#endif
+
+ mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (mmap_addr == (void *) -1)
+ mmap_addr = 0;
+
+ return mmap_addr;
+}
+
+always_inline void
+clib_mem_vm_free (void *addr, uword size)
+{
+ munmap (addr, size);
+}
+
+always_inline void *
+clib_mem_vm_unmap (void *addr, uword size)
+{
+ void *mmap_addr;
+ uword flags = MAP_PRIVATE | MAP_FIXED;
+
+ /* To unmap we "map" with no protection. If we actually called
+ munmap then other callers could steal the address space. By
+ changing to PROT_NONE the kernel can free up the pages which is
+ really what we want "unmap" to mean. */
+ mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0);
+ if (mmap_addr == (void *) -1)
+ mmap_addr = 0;
+
+ return mmap_addr;
+}
+
+always_inline void *
+clib_mem_vm_map (void *addr, uword size)
+{
+ void *mmap_addr;
+ uword flags = MAP_PRIVATE | MAP_FIXED;
+
+ mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0);
+ if (mmap_addr == (void *) -1)
+ mmap_addr = 0;
+
+ return mmap_addr;
+}
+
+typedef struct
+{
+#define CLIB_MEM_VM_F_SHARED (1 << 0)
+#define CLIB_MEM_VM_F_HUGETLB (1 << 1)
+#define CLIB_MEM_VM_F_NUMA_PREFER (1 << 2)
+#define CLIB_MEM_VM_F_NUMA_FORCE (1 << 3)
+#define CLIB_MEM_VM_F_HUGETLB_PREALLOC (1 << 4)
+ u32 flags; /**< vm allocation flags:
+ <br> CLIB_MEM_VM_F_SHARED: request shared memory, file
+ destiptor will be provided on successful allocation.
+ <br> CLIB_MEM_VM_F_HUGETLB: request hugepages.
+ <br> CLIB_MEM_VM_F_NUMA_PREFER: numa_node field contains valid
+ numa node preference.
+ <br> CLIB_MEM_VM_F_NUMA_FORCE: fail if setting numa policy fails.
+ <br> CLIB_MEM_VM_F_HUGETLB_PREALLOC: pre-allocate hugepages if
+ number of available pages is not sufficient.
+ */
+ char *name; /**< Name for memory allocation, set by caller. */
+ uword size; /**< Allocation size, set by caller. */
+ int numa_node; /**< numa node preference. Valid if CLIB_MEM_VM_F_NUMA_PREFER set. */
+ void *addr; /**< Pointer to allocated memory, set on successful allocation. */
+ int fd; /**< File desriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set. */
+ int log2_page_size; /* Page size in log2 format, set on successful allocation. */
+ int n_pages; /* Number of pages. */
+} clib_mem_vm_alloc_t;
+
+clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a);
+int clib_mem_vm_get_log2_page_size (int fd);
+u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages);
+
+
+#include <vppinfra/error.h> /* clib_panic */
+
+#endif /* _included_clib_mem_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mem_mheap.c b/src/vppinfra/mem_mheap.c
new file mode 100644
index 00000000..9b2af520
--- /dev/null
+++ b/src/vppinfra/mem_mheap.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+
+/* Valgrind stuff. */
+#include <vppinfra/memcheck.h>
+#include <vppinfra/valgrind.h>
+
+void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS];
+
+void
+clib_mem_exit (void)
+{
+ u8 *heap = clib_mem_get_per_cpu_heap ();
+ if (heap)
+ mheap_free (heap);
+ clib_mem_set_per_cpu_heap (0);
+}
+
+/* Initialize CLIB heap based on memory/size given by user.
+ Set memory to 0 and CLIB will try to allocate its own heap. */
+void *
+clib_mem_init (void *memory, uword memory_size)
+{
+ u8 *heap;
+
+ if (memory || memory_size)
+ heap = mheap_alloc (memory, memory_size);
+ else
+ {
+ /* Allocate lots of address space since this will limit
+ the amount of memory the program can allocate.
+ In the kernel we're more conservative since some architectures
+ (e.g. mips) have pretty small kernel virtual address spaces. */
+#ifdef __KERNEL__
+#define MAX_VM_MEG 64
+#else
+#define MAX_VM_MEG 1024
+#endif
+
+ uword alloc_size = MAX_VM_MEG << 20;
+ uword tries = 16;
+
+ while (1)
+ {
+ heap = mheap_alloc (0, alloc_size);
+ if (heap)
+ break;
+ alloc_size = (alloc_size * 3) / 4;
+ tries--;
+ if (tries == 0)
+ break;
+ }
+ }
+
+ clib_mem_set_heap (heap);
+
+ return heap;
+}
+
+#ifdef CLIB_LINUX_KERNEL
+#include <asm/page.h>
+
+uword
+clib_mem_get_page_size (void)
+{
+ return PAGE_SIZE;
+}
+#endif
+
+#ifdef CLIB_UNIX
+uword
+clib_mem_get_page_size (void)
+{
+ return getpagesize ();
+}
+#endif
+
+/* Make a guess for standalone. */
+#ifdef CLIB_STANDALONE
+uword
+clib_mem_get_page_size (void)
+{
+ return 4096;
+}
+#endif
+
+u8 *
+format_clib_mem_usage (u8 * s, va_list * va)
+{
+ int verbose = va_arg (*va, int);
+ return format (s, "%U", format_mheap, clib_mem_get_heap (), verbose);
+}
+
+void
+clib_mem_usage (clib_mem_usage_t * u)
+{
+ mheap_usage (clib_mem_get_heap (), u);
+}
+
+/* Call serial number for debugger breakpoints. */
+uword clib_mem_validate_serial = 0;
+
+void
+clib_mem_validate (void)
+{
+ if (MHEAP_HAVE_SMALL_OBJECT_CACHE)
+ clib_warning ("clib_mem_validate disabled (small object cache is ON)");
+ else
+ {
+ mheap_validate (clib_mem_get_heap ());
+ clib_mem_validate_serial++;
+ }
+}
+
+void
+clib_mem_trace (int enable)
+{
+ mheap_trace (clib_mem_get_heap (), enable);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/memcheck.h b/src/vppinfra/memcheck.h
new file mode 100644
index 00000000..44db3a8a
--- /dev/null
+++ b/src/vppinfra/memcheck.h
@@ -0,0 +1,317 @@
+
+/*
+ ----------------------------------------------------------------
+
+ Notice that the following BSD-style license applies to this one
+ file (memcheck.h) only. The rest of Valgrind is licensed under the
+ terms of the GNU General Public License, version 2, unless
+ otherwise indicated. See the COPYING file in the source
+ distribution for details.
+
+ ----------------------------------------------------------------
+
+ This file is part of MemCheck, a heavyweight Valgrind tool for
+ detecting memory errors.
+
+ Copyright (C) 2000-2009 Julian Seward. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ----------------------------------------------------------------
+
+ Notice that the above BSD-style license applies to this one file
+ (memcheck.h) only. The entire rest of Valgrind is licensed under
+ the terms of the GNU General Public License, version 2. See the
+ COPYING file in the source distribution for details.
+
+ ----------------------------------------------------------------
+*/
+
+
+#ifndef __MEMCHECK_H
+#define __MEMCHECK_H
+
+
+/* This file is for inclusion into client (your!) code.
+
+ You can use these macros to manipulate and query memory permissions
+ inside your own programs.
+
+ See comment near the top of valgrind.h on how to use them.
+*/
+
+#include "valgrind.h"
+
+/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !!
+ This enum comprises an ABI exported by Valgrind to programs
+ which use client requests. DO NOT CHANGE THE ORDER OF THESE
+ ENTRIES, NOR DELETE ANY -- add new ones at the end. */
+typedef enum
+{
+ VG_USERREQ__MAKE_MEM_NOACCESS = VG_USERREQ_TOOL_BASE ('M', 'C'),
+ VG_USERREQ__MAKE_MEM_UNDEFINED,
+ VG_USERREQ__MAKE_MEM_DEFINED,
+ VG_USERREQ__DISCARD,
+ VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE,
+ VG_USERREQ__CHECK_MEM_IS_DEFINED,
+ VG_USERREQ__DO_LEAK_CHECK,
+ VG_USERREQ__COUNT_LEAKS,
+
+ VG_USERREQ__GET_VBITS,
+ VG_USERREQ__SET_VBITS,
+
+ VG_USERREQ__CREATE_BLOCK,
+
+ VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE,
+
+ /* Not next to VG_USERREQ__COUNT_LEAKS because it was added later. */
+ VG_USERREQ__COUNT_LEAK_BLOCKS,
+
+ /* This is just for memcheck's internal use - don't use it */
+ _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
+ = VG_USERREQ_TOOL_BASE ('M', 'C') + 256
+} Vg_MemCheckClientRequest;
+
+
+
+/* Client-code macros to manipulate the state of memory. */
+
+/* Mark memory at _qzz_addr as unaddressable for _qzz_len bytes. */
+#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__MAKE_MEM_NOACCESS, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Similarly, mark memory at _qzz_addr as addressable but undefined
+ for _qzz_len bytes. */
+#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__MAKE_MEM_UNDEFINED, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Similarly, mark memory at _qzz_addr as addressable and defined
+ for _qzz_len bytes. */
+#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__MAKE_MEM_DEFINED, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Similar to VALGRIND_MAKE_MEM_DEFINED except that addressability is
+ not altered: bytes which are addressable are marked as defined,
+ but those which are not addressable are left unchanged. */
+#define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Create a block-description handle. The description is an ascii
+ string which is included in any messages pertaining to addresses
+ within the specified memory range. Has no other effect on the
+ properties of the memory range. */
+#define VALGRIND_CREATE_BLOCK(_qzz_addr,_qzz_len, _qzz_desc) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__CREATE_BLOCK, \
+ _qzz_addr, _qzz_len, _qzz_desc, \
+ 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Discard a block-description-handle. Returns 1 for an
+ invalid handle, 0 for a valid handle. */
+#define VALGRIND_DISCARD(_qzz_blkindex) \
+ (__extension__ ({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* default return */, \
+ VG_USERREQ__DISCARD, \
+ 0, _qzz_blkindex, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+
+/* Client-code macros to check the state of memory. */
+
+/* Check that memory at _qzz_addr is addressable for _qzz_len bytes.
+ If suitable addressibility is not established, Valgrind prints an
+ error message and returns the address of the first offending byte.
+ Otherwise it returns zero. */
+#define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE,\
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Check that memory at _qzz_addr is addressable and defined for
+ _qzz_len bytes. If suitable addressibility and definedness are not
+ established, Valgrind prints an error message and returns the
+ address of the first offending byte. Otherwise it returns zero. */
+#define VALGRIND_CHECK_MEM_IS_DEFINED(_qzz_addr,_qzz_len) \
+ (__extension__({unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__CHECK_MEM_IS_DEFINED, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ _qzz_res; \
+ }))
+
+/* Use this macro to force the definedness and addressibility of an
+ lvalue to be checked. If suitable addressibility and definedness
+ are not established, Valgrind prints an error message and returns
+ the address of the first offending byte. Otherwise it returns
+ zero. */
+#define VALGRIND_CHECK_VALUE_IS_DEFINED(__lvalue) \
+ VALGRIND_CHECK_MEM_IS_DEFINED( \
+ (volatile unsigned char *)&(__lvalue), \
+ (unsigned long)(sizeof (__lvalue)))
+
+
+/* Do a full memory leak check (like --leak-check=full) mid-execution. */
+#define VALGRIND_DO_LEAK_CHECK \
+ {unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DO_LEAK_CHECK, \
+ 0, 0, 0, 0, 0); \
+ }
+
+/* Do a summary memory leak check (like --leak-check=summary) mid-execution. */
+#define VALGRIND_DO_QUICK_LEAK_CHECK \
+ {unsigned long _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DO_LEAK_CHECK, \
+ 1, 0, 0, 0, 0); \
+ }
+
+/* Return number of leaked, dubious, reachable and suppressed bytes found by
+ all previous leak checks. They must be lvalues. */
+#define VALGRIND_COUNT_LEAKS(leaked, dubious, reachable, suppressed) \
+ /* For safety on 64-bit platforms we assign the results to private
+ unsigned long variables, then assign these to the lvalues the user
+ specified, which works no matter what type 'leaked', 'dubious', etc
+ are. We also initialise '_qzz_leaked', etc because
+ VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
+ defined. */ \
+ {unsigned long _qzz_res; \
+ unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \
+ unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__COUNT_LEAKS, \
+ &_qzz_leaked, &_qzz_dubious, \
+ &_qzz_reachable, &_qzz_suppressed, 0); \
+ leaked = _qzz_leaked; \
+ dubious = _qzz_dubious; \
+ reachable = _qzz_reachable; \
+ suppressed = _qzz_suppressed; \
+ }
+
+/* Return number of leaked, dubious, reachable and suppressed bytes found by
+ all previous leak checks. They must be lvalues. */
+#define VALGRIND_COUNT_LEAK_BLOCKS(leaked, dubious, reachable, suppressed) \
+ /* For safety on 64-bit platforms we assign the results to private
+ unsigned long variables, then assign these to the lvalues the user
+ specified, which works no matter what type 'leaked', 'dubious', etc
+ are. We also initialise '_qzz_leaked', etc because
+ VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
+ defined. */ \
+ {unsigned long _qzz_res; \
+ unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \
+ unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__COUNT_LEAK_BLOCKS, \
+ &_qzz_leaked, &_qzz_dubious, \
+ &_qzz_reachable, &_qzz_suppressed, 0); \
+ leaked = _qzz_leaked; \
+ dubious = _qzz_dubious; \
+ reachable = _qzz_reachable; \
+ suppressed = _qzz_suppressed; \
+ }
+
+
+/* Get the validity data for addresses [zza..zza+zznbytes-1] and copy it
+ into the provided zzvbits array. Return values:
+ 0 if not running on valgrind
+ 1 success
+ 2 [previously indicated unaligned arrays; these are now allowed]
+ 3 if any parts of zzsrc/zzvbits are not addressable.
+ The metadata is not copied in cases 0, 2 or 3 so it should be
+ impossible to segfault your system by using this call.
+*/
+#define VALGRIND_GET_VBITS(zza,zzvbits,zznbytes) \
+ (__extension__({unsigned long _qzz_res; \
+ char* czza = (char*)zza; \
+ char* czzvbits = (char*)zzvbits; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__GET_VBITS, \
+ czza, czzvbits, zznbytes, 0, 0 ); \
+ _qzz_res; \
+ }))
+
+/* Set the validity data for addresses [zza..zza+zznbytes-1], copying it
+ from the provided zzvbits array. Return values:
+ 0 if not running on valgrind
+ 1 success
+ 2 [previously indicated unaligned arrays; these are now allowed]
+ 3 if any parts of zza/zzvbits are not addressable.
+ The metadata is not copied in cases 0, 2 or 3 so it should be
+ impossible to segfault your system by using this call.
+*/
+#define VALGRIND_SET_VBITS(zza,zzvbits,zznbytes) \
+ (__extension__({unsigned int _qzz_res; \
+ char* czza = (char*)zza; \
+ char* czzvbits = (char*)zzvbits; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__SET_VBITS, \
+ czza, czzvbits, zznbytes, 0, 0 ); \
+ _qzz_res; \
+ }))
+
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/memcpy_avx.h b/src/vppinfra/memcpy_avx.h
new file mode 100644
index 00000000..e987d044
--- /dev/null
+++ b/src/vppinfra/memcpy_avx.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef included_clib_memcpy_avx_h
+#define included_clib_memcpy_avx_h
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+static inline void
+clib_mov16 (u8 * dst, const u8 * src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128 ((const __m128i *) src);
+ _mm_storeu_si128 ((__m128i *) dst, xmm0);
+}
+
+static inline void
+clib_mov32 (u8 * dst, const u8 * src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
+ _mm256_storeu_si256 ((__m256i *) dst, ymm0);
+}
+
+static inline void
+clib_mov64 (u8 * dst, const u8 * src)
+{
+ clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
+ clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
+}
+
+static inline void
+clib_mov128 (u8 * dst, const u8 * src)
+{
+ clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
+ clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
+}
+
+static inline void
+clib_mov256 (u8 * dst, const u8 * src)
+{
+ clib_mov128 ((u8 *) dst + 0 * 128, (const u8 *) src + 0 * 128);
+ clib_mov128 ((u8 *) dst + 1 * 128, (const u8 *) src + 1 * 128);
+}
+
+static inline void
+clib_mov64blocks (u8 * dst, const u8 * src, size_t n)
+{
+ __m256i ymm0, ymm1;
+
+ while (n >= 64)
+ {
+ ymm0 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 0 * 32));
+ n -= 64;
+ ymm1 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 1 * 32));
+ src = (const u8 *) src + 64;
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 0 * 32), ymm0);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 1 * 32), ymm1);
+ dst = (u8 *) dst + 64;
+ }
+}
+
+static inline void
+clib_mov256blocks (u8 * dst, const u8 * src, size_t n)
+{
+ __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
+
+ while (n >= 256)
+ {
+ ymm0 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 0 * 32));
+ n -= 256;
+ ymm1 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 1 * 32));
+ ymm2 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 2 * 32));
+ ymm3 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 3 * 32));
+ ymm4 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 4 * 32));
+ ymm5 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 5 * 32));
+ ymm6 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 6 * 32));
+ ymm7 =
+ _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 7 * 32));
+ src = (const u8 *) src + 256;
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 0 * 32), ymm0);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 1 * 32), ymm1);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 2 * 32), ymm2);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 3 * 32), ymm3);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 4 * 32), ymm4);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 5 * 32), ymm5);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 6 * 32), ymm6);
+ _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 7 * 32), ymm7);
+ dst = (u8 *) dst + 256;
+ }
+}
+
+static inline void *
+clib_memcpy (void *dst, const void *src, size_t n)
+{
+ uword dstu = (uword) dst;
+ uword srcu = (uword) src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16)
+ {
+ if (n & 0x01)
+ {
+ *(u8 *) dstu = *(const u8 *) srcu;
+ srcu = (uword) ((const u8 *) srcu + 1);
+ dstu = (uword) ((u8 *) dstu + 1);
+ }
+ if (n & 0x02)
+ {
+ *(uint16_t *) dstu = *(const uint16_t *) srcu;
+ srcu = (uword) ((const uint16_t *) srcu + 1);
+ dstu = (uword) ((uint16_t *) dstu + 1);
+ }
+ if (n & 0x04)
+ {
+ *(uint32_t *) dstu = *(const uint32_t *) srcu;
+ srcu = (uword) ((const uint32_t *) srcu + 1);
+ dstu = (uword) ((uint32_t *) dstu + 1);
+ }
+ if (n & 0x08)
+ {
+ *(uint64_t *) dstu = *(const uint64_t *) srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32)
+ {
+ clib_mov16 ((u8 *) dst, (const u8 *) src);
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ return ret;
+ }
+ if (n <= 64)
+ {
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
+ return ret;
+ }
+ if (n <= 512)
+ {
+ if (n >= 256)
+ {
+ n -= 256;
+ clib_mov256 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 256;
+ dst = (u8 *) dst + 256;
+ }
+ if (n >= 128)
+ {
+ n -= 128;
+ clib_mov128 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 128;
+ dst = (u8 *) dst + 128;
+ }
+ if (n >= 64)
+ {
+ n -= 64;
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 64;
+ dst = (u8 *) dst + 64;
+ }
+ COPY_BLOCK_64_BACK31:
+ if (n > 32)
+ {
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
+ return ret;
+ }
+ if (n > 0)
+ {
+ clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes
+ */
+ dstofss = (uword) dst & 0x1F;
+ if (dstofss > 0)
+ {
+ dstofss = 32 - dstofss;
+ n -= dstofss;
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + dstofss;
+ dst = (u8 *) dst + dstofss;
+ }
+
+ /**
+ * Copy 256-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ clib_mov256blocks ((u8 *) dst, (const u8 *) src, n);
+ bits = n;
+ n = n & 255;
+ bits -= n;
+ src = (const u8 *) src + bits;
+ dst = (u8 *) dst + bits;
+
+ /**
+ * Copy 64-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ if (n >= 64)
+ {
+ clib_mov64blocks ((u8 *) dst, (const u8 *) src, n);
+ bits = n;
+ n = n & 63;
+ bits -= n;
+ src = (const u8 *) src + bits;
+ dst = (u8 *) dst + bits;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK31;
+}
+
+
+#endif /* included_clib_mamcpy_avx_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/memcpy_sse3.h b/src/vppinfra/memcpy_sse3.h
new file mode 100644
index 00000000..f61396c8
--- /dev/null
+++ b/src/vppinfra/memcpy_sse3.h
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef included_clib_memcpy_sse3_h
+#define included_clib_memcpy_sse3_h
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+static inline void
+clib_mov16 (u8 * dst, const u8 * src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128 ((const __m128i *) src);
+ _mm_storeu_si128 ((__m128i *) dst, xmm0);
+}
+
+static inline void
+clib_mov32 (u8 * dst, const u8 * src)
+{
+ clib_mov16 ((u8 *) dst + 0 * 16, (const u8 *) src + 0 * 16);
+ clib_mov16 ((u8 *) dst + 1 * 16, (const u8 *) src + 1 * 16);
+}
+
+static inline void
+clib_mov64 (u8 * dst, const u8 * src)
+{
+ clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
+ clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
+}
+
+static inline void
+clib_mov128 (u8 * dst, const u8 * src)
+{
+ clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
+ clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
+}
+
+static inline void
+clib_mov256 (u8 * dst, const u8 * src)
+{
+ clib_mov128 ((u8 *) dst + 0 * 128, (const u8 *) src + 0 * 128);
+ clib_mov128 ((u8 *) dst + 1 * 128, (const u8 *) src + 1 * 128);
+}
+
+/**
+ * Macro for copying unaligned block from one location to another with constant load offset,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be immediate value within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> must be pre-defined
+ */
+#define CLIB_MVUNALIGN_LEFT47_IMM(dst, src, len, offset) \
+({ \
+ int tmp; \
+ while (len >= 128 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
+ len -= 128; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
+ xmm3 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 3 * 16)); \
+ xmm4 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 4 * 16)); \
+ xmm5 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 5 * 16)); \
+ xmm6 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 6 * 16)); \
+ xmm7 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 7 * 16)); \
+ xmm8 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 8 * 16)); \
+ src = (const u8 *)src + 128; \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
+ dst = (u8 *)dst + 128; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 127) + 16 - offset; \
+ tmp -= len; \
+ src = (const u8 *)src + tmp; \
+ dst = (u8 *)dst + tmp; \
+ if (len >= 32 + 16 - offset) { \
+ while (len >= 32 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
+ len -= 32; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
+ src = (const u8 *)src + 32; \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ dst = (u8 *)dst + 32; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 31) + 16 - offset; \
+ tmp -= len; \
+ src = (const u8 *)src + tmp; \
+ dst = (u8 *)dst + tmp; \
+ } \
+})
+
+/**
+ * Macro for copying unaligned block from one location to another,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Use switch here because the aligning instruction requires immediate value for shift count.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> used in CLIB_MVUNALIGN_LEFT47_IMM must be pre-defined
+ */
+#define CLIB_MVUNALIGN_LEFT47(dst, src, len, offset) \
+({ \
+ switch (offset) { \
+ case 0x01: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x01); break; \
+ case 0x02: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x02); break; \
+ case 0x03: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x03); break; \
+ case 0x04: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x04); break; \
+ case 0x05: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x05); break; \
+ case 0x06: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x06); break; \
+ case 0x07: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x07); break; \
+ case 0x08: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x08); break; \
+ case 0x09: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x09); break; \
+ case 0x0A: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0A); break; \
+ case 0x0B: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0B); break; \
+ case 0x0C: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0C); break; \
+ case 0x0D: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0D); break; \
+ case 0x0E: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0E); break; \
+ case 0x0F: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0F); break; \
+ default:; \
+ } \
+})
+
+static inline void *
+clib_memcpy (void *dst, const void *src, size_t n)
+{
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
+ uword dstu = (uword) dst;
+ uword srcu = (uword) src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t srcofs;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16)
+ {
+ if (n & 0x01)
+ {
+ *(u8 *) dstu = *(const u8 *) srcu;
+ srcu = (uword) ((const u8 *) srcu + 1);
+ dstu = (uword) ((u8 *) dstu + 1);
+ }
+ if (n & 0x02)
+ {
+ *(u16 *) dstu = *(const u16 *) srcu;
+ srcu = (uword) ((const u16 *) srcu + 1);
+ dstu = (uword) ((u16 *) dstu + 1);
+ }
+ if (n & 0x04)
+ {
+ *(u32 *) dstu = *(const u32 *) srcu;
+ srcu = (uword) ((const u32 *) srcu + 1);
+ dstu = (uword) ((u32 *) dstu + 1);
+ }
+ if (n & 0x08)
+ {
+ *(u64 *) dstu = *(const u64 *) srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32)
+ {
+ clib_mov16 ((u8 *) dst, (const u8 *) src);
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ return ret;
+ }
+ if (n <= 48)
+ {
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ return ret;
+ }
+ if (n <= 64)
+ {
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ clib_mov16 ((u8 *) dst + 32, (const u8 *) src + 32);
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ return ret;
+ }
+ if (n <= 128)
+ {
+ goto COPY_BLOCK_128_BACK15;
+ }
+ if (n <= 512)
+ {
+ if (n >= 256)
+ {
+ n -= 256;
+ clib_mov128 ((u8 *) dst, (const u8 *) src);
+ clib_mov128 ((u8 *) dst + 128, (const u8 *) src + 128);
+ src = (const u8 *) src + 256;
+ dst = (u8 *) dst + 256;
+ }
+ COPY_BLOCK_255_BACK15:
+ if (n >= 128)
+ {
+ n -= 128;
+ clib_mov128 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 128;
+ dst = (u8 *) dst + 128;
+ }
+ COPY_BLOCK_128_BACK15:
+ if (n >= 64)
+ {
+ n -= 64;
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 64;
+ dst = (u8 *) dst + 64;
+ }
+ COPY_BLOCK_64_BACK15:
+ if (n >= 32)
+ {
+ n -= 32;
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + 32;
+ dst = (u8 *) dst + 32;
+ }
+ if (n > 16)
+ {
+ clib_mov16 ((u8 *) dst, (const u8 *) src);
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ return ret;
+ }
+ if (n > 0)
+ {
+ clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes,
+ * and make sure the first 15 bytes are copied, because
+ * unaligned copy functions require up to 15 bytes
+ * backwards access.
+ */
+ dstofss = 16 - ((uword) dst & 0x0F) + 16;
+ n -= dstofss;
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + dstofss;
+ dst = (u8 *) dst + dstofss;
+ srcofs = ((uword) src & 0x0F);
+
+ /**
+ * For aligned copy
+ */
+ if (srcofs == 0)
+ {
+ /**
+ * Copy 256-byte blocks
+ */
+ for (; n >= 256; n -= 256)
+ {
+ clib_mov256 ((u8 *) dst, (const u8 *) src);
+ dst = (u8 *) dst + 256;
+ src = (const u8 *) src + 256;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_255_BACK15;
+ }
+
+ /**
+ * For copy with unaligned load
+ */
+ CLIB_MVUNALIGN_LEFT47 (dst, src, n, srcofs);
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK15;
+}
+
+
+#undef CLIB_MVUNALIGN_LEFT47_IMM
+#undef CLIB_MVUNALIGN_LEFT47
+
+#endif /* included_clib_memcpy_sse3_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
new file mode 100644
index 00000000..00b67c49
--- /dev/null
+++ b/src/vppinfra/mhash.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2010 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/mhash.h>
+
+always_inline u32
+load_partial_u32 (void *d, uword n)
+{
+ if (n == 4)
+ return ((u32 *) d)[0];
+ if (n == 3)
+ return ((u16 *) d)[0] | (((u8 *) d)[2] << 16);
+ if (n == 2)
+ return ((u16 *) d)[0];
+ if (n == 1)
+ return ((u8 *) d)[0];
+ ASSERT (0);
+ return 0;
+}
+
+always_inline u32
+mhash_key_sum_inline (void *data, uword n_data_bytes, u32 seed)
+{
+ u32 *d32 = data;
+ u32 a, b, c, n_left;
+
+ a = b = c = seed;
+ n_left = n_data_bytes;
+ a ^= n_data_bytes;
+
+ while (n_left > 12)
+ {
+ a += d32[0];
+ b += d32[1];
+ c += d32[2];
+ hash_v3_mix32 (a, b, c);
+ n_left -= 12;
+ d32 += 3;
+ }
+
+ if (n_left > 8)
+ {
+ c += load_partial_u32 (d32 + 2, n_left - 8);
+ n_left = 8;
+ }
+ if (n_left > 4)
+ {
+ b += load_partial_u32 (d32 + 1, n_left - 4);
+ n_left = 4;
+ }
+ if (n_left > 0)
+ a += load_partial_u32 (d32 + 0, n_left - 0);
+
+ hash_v3_finalize32 (a, b, c);
+
+ return c;
+}
+
+#define foreach_mhash_key_size \
+ _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) \
+ _ (8) _ (12) _ (16) _ (20) \
+ _ (24) _ (28) _ (32) _ (36) \
+ _ (40) _ (44) _ (48) _ (52) \
+ _ (56) _ (60) _ (64)
+
+#define _(N_KEY_BYTES) \
+ static uword \
+ mhash_key_sum_##N_KEY_BYTES (hash_t * h, uword key) \
+ { \
+ mhash_t * hv = uword_to_pointer (h->user, mhash_t *); \
+ return mhash_key_sum_inline (mhash_key_to_mem (hv, key), \
+ (N_KEY_BYTES), \
+ hv->hash_seed); \
+ } \
+ \
+ static uword \
+ mhash_key_equal_##N_KEY_BYTES (hash_t * h, uword key1, uword key2) \
+ { \
+ mhash_t * hv = uword_to_pointer (h->user, mhash_t *); \
+ void * k1 = mhash_key_to_mem (hv, key1); \
+ void * k2 = mhash_key_to_mem (hv, key2); \
+ return ! memcmp (k1, k2, (N_KEY_BYTES)); \
+ }
+
+foreach_mhash_key_size
+#undef _
+static uword
+mhash_key_sum_c_string (hash_t * h, uword key)
+{
+ mhash_t *hv = uword_to_pointer (h->user, mhash_t *);
+ void *k = mhash_key_to_mem (hv, key);
+ return mhash_key_sum_inline (k, strlen (k), hv->hash_seed);
+}
+
+static uword
+mhash_key_equal_c_string (hash_t * h, uword key1, uword key2)
+{
+ mhash_t *hv = uword_to_pointer (h->user, mhash_t *);
+ void *k1 = mhash_key_to_mem (hv, key1);
+ void *k2 = mhash_key_to_mem (hv, key2);
+ return strcmp (k1, k2) == 0;
+}
+
+static uword
+mhash_key_sum_vec_string (hash_t * h, uword key)
+{
+ mhash_t *hv = uword_to_pointer (h->user, mhash_t *);
+ void *k = mhash_key_to_mem (hv, key);
+ return mhash_key_sum_inline (k, vec_len (k), hv->hash_seed);
+}
+
+static uword
+mhash_key_equal_vec_string (hash_t * h, uword key1, uword key2)
+{
+ mhash_t *hv = uword_to_pointer (h->user, mhash_t *);
+ void *k1 = mhash_key_to_mem (hv, key1);
+ void *k2 = mhash_key_to_mem (hv, key2);
+ return vec_len (k1) == vec_len (k2) && memcmp (k1, k2, vec_len (k1)) == 0;
+}
+
+/* The CLIB hash user pointer must always point to a valid mhash_t.
+ Now, the address of mhash_t can change (think vec_resize).
+ So we must always be careful that it points to the correct
+ address. */
+always_inline void
+mhash_sanitize_hash_user (mhash_t * mh)
+{
+ uword *hash = mh->hash;
+ hash_t *h = hash_header (hash);
+ h->user = pointer_to_uword (mh);
+}
+
+void
+mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes)
+{
+ static struct
+ {
+ hash_key_sum_function_t *key_sum;
+ hash_key_equal_function_t *key_equal;
+ } t[] =
+ {
+#define _(N_KEY_BYTES) \
+ [N_KEY_BYTES] = { \
+ .key_sum = mhash_key_sum_##N_KEY_BYTES, \
+ .key_equal = mhash_key_equal_##N_KEY_BYTES, \
+ },
+
+ foreach_mhash_key_size
+#undef _
+ [MHASH_C_STRING_KEY] =
+ {
+ .key_sum = mhash_key_sum_c_string,.key_equal = mhash_key_equal_c_string,},
+ [MHASH_VEC_STRING_KEY] =
+ {
+ .key_sum = mhash_key_sum_vec_string,.key_equal =
+ mhash_key_equal_vec_string,},};
+
+ if (mhash_key_vector_is_heap (h))
+ heap_free (h->key_vector_or_heap);
+ else
+ vec_free (h->key_vector_or_heap);
+ vec_free (h->key_vector_free_indices);
+ {
+ int i;
+ for (i = 0; i < vec_len (h->key_tmps); i++)
+ vec_free (h->key_tmps[i]);
+ }
+ vec_free (h->key_tmps);
+ hash_free (h->hash);
+
+ memset (h, 0, sizeof (h[0]));
+ h->n_key_bytes = n_key_bytes;
+
+#if 0
+ if (h->n_key_bytes > 0)
+ {
+ vec_validate (h->key_tmp, h->n_key_bytes - 1);
+ _vec_len (h->key_tmp) = 0;
+ }
+#endif
+
+ ASSERT (n_key_bytes < ARRAY_LEN (t));
+ h->hash = hash_create2 ( /* elts */ 0,
+ /* user */ pointer_to_uword (h),
+ /* value_bytes */ n_value_bytes,
+ t[n_key_bytes].key_sum, t[n_key_bytes].key_equal,
+ /* format pair/arg */
+ 0, 0);
+}
+
+static uword
+mhash_set_tmp_key (mhash_t * h, const void *key)
+{
+ u8 *key_tmp;
+ int my_cpu = os_get_thread_index ();
+
+ vec_validate (h->key_tmps, my_cpu);
+ key_tmp = h->key_tmps[my_cpu];
+
+ vec_reset_length (key_tmp);
+
+ if (mhash_key_vector_is_heap (h))
+ {
+ uword is_c_string = h->n_key_bytes == MHASH_C_STRING_KEY;
+
+ if (is_c_string)
+ vec_add (key_tmp, key, strlen (key) + 1);
+ else
+ vec_add (key_tmp, key, vec_len (key));
+ }
+ else
+ vec_add (key_tmp, key, h->n_key_bytes);
+
+ h->key_tmps[my_cpu] = key_tmp;
+
+ return ~0;
+}
+
+hash_pair_t *
+mhash_get_pair (mhash_t * h, const void *key)
+{
+ uword ikey;
+ mhash_sanitize_hash_user (h);
+ ikey = mhash_set_tmp_key (h, key);
+ return hash_get_pair (h->hash, ikey);
+}
+
+typedef struct
+{
+ u32 heap_handle;
+
+ /* Must conincide with vec_header. */
+ vec_header_t vec;
+} mhash_string_key_t;
+
+uword
+mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value)
+{
+ u8 *k;
+ uword ikey, i, l = 0, n_key_bytes, old_n_elts, key_alloc_from_free_list = 0;
+
+ mhash_sanitize_hash_user (h);
+
+ if (mhash_key_vector_is_heap (h))
+ {
+ mhash_string_key_t *sk;
+ uword is_c_string = h->n_key_bytes == MHASH_C_STRING_KEY;
+ uword handle;
+
+ n_key_bytes = is_c_string ? (strlen (key) + 1) : vec_len (key);
+ i =
+ heap_alloc (h->key_vector_or_heap, n_key_bytes + sizeof (sk[0]),
+ handle);
+
+ sk = (void *) (h->key_vector_or_heap + i);
+ sk->heap_handle = handle;
+ sk->vec.len = n_key_bytes;
+ clib_memcpy (sk->vec.vector_data, key, n_key_bytes);
+
+ /* Advance key past vector header. */
+ i += sizeof (sk[0]);
+ }
+ else
+ {
+ key_alloc_from_free_list = (l =
+ vec_len (h->key_vector_free_indices)) > 0;
+ if (key_alloc_from_free_list)
+ {
+ i = h->key_vector_free_indices[l - 1];
+ k = vec_elt_at_index (h->key_vector_or_heap, i);
+ _vec_len (h->key_vector_free_indices) = l - 1;
+ }
+ else
+ {
+ vec_add2 (h->key_vector_or_heap, k, h->n_key_bytes);
+ i = k - h->key_vector_or_heap;
+ }
+
+ n_key_bytes = h->n_key_bytes;
+ clib_memcpy (k, key, n_key_bytes);
+ }
+ ikey = i;
+
+ old_n_elts = hash_elts (h->hash);
+ h->hash = _hash_set3 (h->hash, ikey, new_value, old_value);
+
+ /* If element already existed remove duplicate key. */
+ if (hash_elts (h->hash) == old_n_elts)
+ {
+ hash_pair_t *p;
+
+ /* Fetch old key for return value. */
+ p = hash_get_pair (h->hash, ikey);
+ ikey = p->key;
+
+ /* Remove duplicate key. */
+ if (mhash_key_vector_is_heap (h))
+ {
+ mhash_string_key_t *sk;
+ sk = (void *) (h->key_vector_or_heap + i - sizeof (sk[0]));
+ heap_dealloc (h->key_vector_or_heap, sk->heap_handle);
+ }
+ else
+ {
+ if (key_alloc_from_free_list)
+ {
+ h->key_vector_free_indices[l] = i;
+ _vec_len (h->key_vector_free_indices) = l + 1;
+ }
+ else
+ _vec_len (h->key_vector_or_heap) -= h->n_key_bytes;
+ }
+ }
+
+ return ikey;
+}
+
+uword
+mhash_unset (mhash_t * h, void *key, uword * old_value)
+{
+ hash_pair_t *p;
+ uword i;
+
+ mhash_sanitize_hash_user (h);
+ i = mhash_set_tmp_key (h, key);
+
+ p = hash_get_pair (h->hash, i);
+ if (!p)
+ return 0;
+
+ ASSERT (p->key != ~0);
+ i = p->key;
+
+ if (mhash_key_vector_is_heap (h))
+ {
+ mhash_string_key_t *sk;
+ sk = (void *) (h->key_vector_or_heap + i) - sizeof (sk[0]);
+ heap_dealloc (h->key_vector_or_heap, sk->heap_handle);
+ }
+ else
+ vec_add1 (h->key_vector_free_indices, i);
+
+ hash_unset3 (h->hash, i, old_value);
+ return 1;
+}
+
+u8 *
+format_mhash_key (u8 * s, va_list * va)
+{
+ mhash_t *h = va_arg (*va, mhash_t *);
+ u32 ki = va_arg (*va, u32);
+ void *k = mhash_key_to_mem (h, ki);
+
+ if (mhash_key_vector_is_heap (h))
+ {
+ uword is_c_string = h->n_key_bytes == MHASH_C_STRING_KEY;
+ u32 l = is_c_string ? strlen (k) : vec_len (k);
+ vec_add (s, k, l);
+ }
+ else if (h->format_key)
+ s = format (s, "%U", h->format_key, k);
+ else
+ s = format (s, "%U", format_hex_bytes, k, h->n_key_bytes);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h
new file mode 100644
index 00000000..7eb19183
--- /dev/null
+++ b/src/vppinfra/mhash.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_clib_mhash_h
+#define included_clib_mhash_h
+
+/*
+ Copyright (c) 2010 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/heap.h>
+
+/* Hash table plus vector of keys. */
+typedef struct
+{
+ /* Vector or heap used to store keys. Hash table stores keys as byte
+ offsets into this vector. */
+ u8 *key_vector_or_heap;
+
+ /* Byte offsets of free keys in vector (used to store free keys when
+ n_key_bytes > 1). */
+ u32 *key_vector_free_indices;
+
+ u8 **key_tmps;
+
+ /* Possibly fixed size of key.
+ 0 means keys are vectors of u8's.
+ 1 means keys are null terminated c strings. */
+#define MHASH_VEC_STRING_KEY 0
+#define MHASH_C_STRING_KEY 1
+ u32 n_key_bytes;
+
+ /* Seed value for Jenkins hash. */
+ u32 hash_seed;
+
+ /* Hash table mapping key -> value. */
+ uword *hash;
+
+ /* Format function for keys. */
+ format_function_t *format_key;
+} mhash_t;
+
+void mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes);
+
+always_inline void
+mhash_init_c_string (mhash_t * h, uword n_value_bytes)
+{
+ mhash_init (h, n_value_bytes, MHASH_C_STRING_KEY);
+}
+
+always_inline void
+mhash_init_vec_string (mhash_t * h, uword n_value_bytes)
+{
+ mhash_init (h, n_value_bytes, MHASH_VEC_STRING_KEY);
+}
+
+always_inline void *
+mhash_key_to_mem (mhash_t * h, uword key)
+{
+ if (key == ~0)
+ {
+ u8 *key_tmp;
+
+ int my_cpu = os_get_thread_index ();
+ vec_validate (h->key_tmps, my_cpu);
+ key_tmp = h->key_tmps[my_cpu];
+ return key_tmp;
+ }
+ return vec_elt_at_index (h->key_vector_or_heap, key);
+}
+
+hash_pair_t *mhash_get_pair (mhash_t * h, const void *key);
+uword mhash_set_mem (mhash_t * h, void *key, uword * new_value,
+ uword * old_value);
+uword mhash_unset (mhash_t * h, void *key, uword * old_value);
+
+always_inline uword *
+mhash_get (mhash_t * h, const void *key)
+{
+ hash_pair_t *p = mhash_get_pair (h, key);
+ return p ? &p->value[0] : 0;
+}
+
+always_inline uword
+mhash_set (mhash_t * h, void *key, uword new_value, uword * old_value)
+{
+ return mhash_set_mem (h, key, &new_value, old_value);
+}
+
+always_inline uword
+mhash_unset_key (mhash_t * h, uword key, uword * old_value)
+{
+ void *k = mhash_key_to_mem (h, key);
+ return mhash_unset (h, k, old_value);
+}
+
+always_inline uword
+mhash_value_bytes (mhash_t * m)
+{
+ hash_t *h = hash_header (m->hash);
+ return hash_value_bytes (h);
+}
+
+always_inline uword
+mhash_elts (mhash_t * m)
+{
+ return hash_elts (m->hash);
+}
+
+always_inline uword
+mhash_key_vector_is_heap (mhash_t * h)
+{
+ return h->n_key_bytes <= 1;
+}
+
+always_inline void
+mhash_free (mhash_t * h)
+{
+ if (mhash_key_vector_is_heap (h))
+ heap_free (h->key_vector_or_heap);
+ else
+ vec_free (h->key_vector_or_heap);
+ vec_free (h->key_vector_free_indices);
+ hash_free (h->hash);
+}
+
+#define mhash_foreach(k,v,mh,body) \
+do { \
+ hash_pair_t * _mhash_foreach_p; \
+ hash_foreach_pair (_mhash_foreach_p, (mh)->hash, ({ \
+ (k) = mhash_key_to_mem ((mh), _mhash_foreach_p->key); \
+ (v) = &_mhash_foreach_p->value[0]; \
+ body; \
+ })); \
+} while (0)
+
+format_function_t format_mhash_key;
+
+#endif /* included_clib_mhash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
new file mode 100644
index 00000000..5bbbc65f
--- /dev/null
+++ b/src/vppinfra/mheap.c
@@ -0,0 +1,1643 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/bitops.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/format.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+#include <vppinfra/time.h>
+
+#ifdef CLIB_UNIX
+#include <vppinfra/elf_clib.h>
+#endif
+
+static void mheap_get_trace (void *v, uword offset, uword size);
+static void mheap_put_trace (void *v, uword offset, uword size);
+static int mheap_trace_sort (const void *t1, const void *t2);
+
+always_inline void
+mheap_maybe_lock (void *v)
+{
+ mheap_t *h = mheap_header (v);
+ if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
+ {
+ u32 my_cpu = os_get_thread_index ();
+ if (h->owner_cpu == my_cpu)
+ {
+ h->recursion_count++;
+ return;
+ }
+
+ while (__sync_lock_test_and_set (&h->lock, 1))
+ ;
+
+ h->owner_cpu = my_cpu;
+ h->recursion_count = 1;
+ }
+}
+
+always_inline void
+mheap_maybe_unlock (void *v)
+{
+ mheap_t *h = mheap_header (v);
+ if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
+ {
+ ASSERT (os_get_thread_index () == h->owner_cpu);
+ if (--h->recursion_count == 0)
+ {
+ h->owner_cpu = ~0;
+ CLIB_MEMORY_BARRIER ();
+ h->lock = 0;
+ }
+ }
+}
+
+/* Find bin for objects with size at least n_user_data_bytes. */
+always_inline uword
+user_data_size_to_bin_index (uword n_user_data_bytes)
+{
+ uword n_user_data_words;
+ word small_bin, large_bin;
+
+ /* User size must be at least big enough to hold free elt. */
+ n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
+
+ /* Round to words. */
+ n_user_data_words =
+ (round_pow2 (n_user_data_bytes, MHEAP_USER_DATA_WORD_BYTES) /
+ MHEAP_USER_DATA_WORD_BYTES);
+
+ ASSERT (n_user_data_words > 0);
+ small_bin =
+ n_user_data_words -
+ (MHEAP_MIN_USER_DATA_BYTES / MHEAP_USER_DATA_WORD_BYTES);
+ ASSERT (small_bin >= 0);
+
+ large_bin =
+ MHEAP_N_SMALL_OBJECT_BINS + max_log2 (n_user_data_bytes) -
+ MHEAP_LOG2_N_SMALL_OBJECT_BINS;
+
+ return small_bin < MHEAP_N_SMALL_OBJECT_BINS ? small_bin : large_bin;
+}
+
+always_inline uword
+mheap_elt_size_to_user_n_bytes (uword n_bytes)
+{
+ ASSERT (n_bytes >= sizeof (mheap_elt_t));
+ return (n_bytes - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
+}
+
+always_inline uword __attribute__ ((unused))
+mheap_elt_size_to_user_n_words (uword n_bytes)
+{
+ ASSERT (n_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
+ return mheap_elt_size_to_user_n_bytes (n_bytes) /
+ MHEAP_USER_DATA_WORD_BYTES;
+}
+
+always_inline void
+mheap_elt_set_size (void *v,
+ uword uoffset, uword n_user_data_bytes, uword is_free)
+{
+ mheap_elt_t *e, *n;
+
+ e = mheap_elt_at_uoffset (v, uoffset);
+
+ ASSERT (n_user_data_bytes % MHEAP_USER_DATA_WORD_BYTES == 0);
+
+ e->n_user_data = n_user_data_bytes / MHEAP_USER_DATA_WORD_BYTES;
+ e->is_free = is_free;
+ ASSERT (e->prev_n_user_data * sizeof (e->user_data[0]) >=
+ MHEAP_MIN_USER_DATA_BYTES);
+
+ n = mheap_next_elt (e);
+ n->prev_n_user_data = e->n_user_data;
+ n->prev_is_free = is_free;
+}
+
+always_inline void
+set_first_free_elt_offset (mheap_t * h, uword bin, uword uoffset)
+{
+ uword i0, i1;
+
+ h->first_free_elt_uoffset_by_bin[bin] = uoffset;
+
+ i0 = bin / BITS (h->non_empty_free_elt_heads[0]);
+ i1 = (uword) 1 << (uword) (bin % BITS (h->non_empty_free_elt_heads[0]));
+
+ ASSERT (i0 < ARRAY_LEN (h->non_empty_free_elt_heads));
+ if (h->first_free_elt_uoffset_by_bin[bin] == MHEAP_GROUNDED)
+ h->non_empty_free_elt_heads[i0] &= ~i1;
+ else
+ h->non_empty_free_elt_heads[i0] |= i1;
+}
+
+always_inline void
+set_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
+{
+ mheap_t *h = mheap_header (v);
+ mheap_elt_t *e = mheap_elt_at_uoffset (v, uoffset);
+ mheap_elt_t *n = mheap_next_elt (e);
+ uword bin = user_data_size_to_bin_index (n_user_data_bytes);
+
+ ASSERT (n->prev_is_free);
+ ASSERT (e->is_free);
+
+ e->free_elt.prev_uoffset = MHEAP_GROUNDED;
+ e->free_elt.next_uoffset = h->first_free_elt_uoffset_by_bin[bin];
+
+ /* Fill in next free elt's previous pointer. */
+ if (e->free_elt.next_uoffset != MHEAP_GROUNDED)
+ {
+ mheap_elt_t *nf = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
+ ASSERT (nf->is_free);
+ nf->free_elt.prev_uoffset = uoffset;
+ }
+
+ set_first_free_elt_offset (h, bin, uoffset);
+}
+
+always_inline void
+new_free_elt (void *v, uword uoffset, uword n_user_data_bytes)
+{
+ mheap_elt_set_size (v, uoffset, n_user_data_bytes, /* is_free */ 1);
+ set_free_elt (v, uoffset, n_user_data_bytes);
+}
+
+always_inline void
+remove_free_elt (void *v, mheap_elt_t * e, uword bin)
+{
+ mheap_t *h = mheap_header (v);
+ mheap_elt_t *p, *n;
+#if CLIB_VEC64 > 0
+ u64 no, po;
+#else
+ u32 no, po;
+#endif
+
+ no = e->free_elt.next_uoffset;
+
+ n = no != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, no) : 0;
+ po = e->free_elt.prev_uoffset;
+ p = po != MHEAP_GROUNDED ? mheap_elt_at_uoffset (v, po) : 0;
+
+ if (!p)
+ set_first_free_elt_offset (h, bin, no);
+ else
+ p->free_elt.next_uoffset = no;
+
+ if (n)
+ n->free_elt.prev_uoffset = po;
+}
+
+always_inline void
+remove_free_elt2 (void *v, mheap_elt_t * e)
+{
+ uword bin;
+ bin = user_data_size_to_bin_index (mheap_elt_data_bytes (e));
+ remove_free_elt (v, e, bin);
+}
+
+#define MHEAP_VM_MAP (1 << 0)
+#define MHEAP_VM_UNMAP (1 << 1)
+#define MHEAP_VM_NOMAP (0 << 1)
+#define MHEAP_VM_ROUND (1 << 2)
+#define MHEAP_VM_ROUND_UP MHEAP_VM_ROUND
+#define MHEAP_VM_ROUND_DOWN (0 << 2)
+
+static uword mheap_page_size;
+
+static_always_inline uword
+mheap_page_round (uword addr)
+{
+ return (addr + mheap_page_size - 1) & ~(mheap_page_size - 1);
+}
+
+static_always_inline uword
+mheap_page_truncate (uword addr)
+{
+ return addr & ~(mheap_page_size - 1);
+}
+
+static_always_inline uword
+mheap_vm (void *v, uword flags, clib_address_t start_addr, uword size)
+{
+ mheap_t *h = mheap_header (v);
+ clib_address_t start_page, end_page, end_addr;
+ uword mapped_bytes;
+
+ ASSERT (!(h->flags & MHEAP_FLAG_DISABLE_VM));
+
+ end_addr = start_addr + size;
+
+ /* Round start/end address up to page boundary. */
+ start_page = mheap_page_round (start_addr);
+
+ if ((flags & MHEAP_VM_ROUND) == MHEAP_VM_ROUND_UP)
+ end_page = mheap_page_round (end_addr);
+ else
+ end_page = mheap_page_truncate (end_addr);
+
+ mapped_bytes = 0;
+ if (end_page > start_page)
+ {
+ mapped_bytes = end_page - start_page;
+ if (flags & MHEAP_VM_MAP)
+ clib_mem_vm_map ((void *) start_page, end_page - start_page);
+ else if (flags & MHEAP_VM_UNMAP)
+ clib_mem_vm_unmap ((void *) start_page, end_page - start_page);
+ }
+
+ return mapped_bytes;
+}
+
+static_always_inline uword
+mheap_vm_elt (void *v, uword flags, uword offset)
+{
+ mheap_elt_t *e;
+ clib_address_t start_addr, end_addr;
+
+ e = mheap_elt_at_uoffset (v, offset);
+ start_addr = (clib_address_t) ((void *) e->user_data);
+ end_addr = (clib_address_t) mheap_next_elt (e);
+ return mheap_vm (v, flags, start_addr, end_addr - start_addr);
+}
+
+always_inline uword
+mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
+{
+ uword mask;
+
+/* $$$$ ELIOT FIXME: add Altivec version of this routine */
+#if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) || defined (__i386__)
+ mask = 0;
+#else
+ u8x16 b = u8x16_splat (bin);
+
+ ASSERT (bin < 256);
+
+#define _(i) ((uword) u8x16_compare_byte_mask (u8x16_is_equal (b, c->bins.as_u8x16[i])) << (uword) ((i)*16))
+ mask = _(0) | _(1);
+ if (BITS (uword) > 32)
+ mask |= _(2) | _(3);
+#undef _
+
+#endif
+ return mask;
+}
+
+always_inline uword
+mheap_get_small_object (mheap_t * h, uword bin)
+{
+ mheap_small_object_cache_t *c = &h->small_object_cache;
+ uword mask = mheap_small_object_cache_mask (c, bin + 1);
+ uword offset = MHEAP_GROUNDED;
+
+ if (mask)
+ {
+ uword i = min_log2 (mask);
+ uword o = c->offsets[i];
+ ASSERT (o != MHEAP_GROUNDED);
+ c->bins.as_u8[i] = 0;
+ offset = o;
+ }
+
+ return offset;
+}
+
+always_inline uword
+mheap_put_small_object (mheap_t * h, uword bin, uword offset)
+{
+ mheap_small_object_cache_t *c = &h->small_object_cache;
+ uword free_mask = mheap_small_object_cache_mask (c, 0);
+ uword b = bin + 1;
+ uword i;
+
+ if (free_mask != 0)
+ {
+ i = min_log2 (free_mask);
+ c->bins.as_u8[i] = b;
+ c->offsets[i] = offset;
+ return 0;
+ }
+ else
+ /* Nothing free with right size: cyclic replacement. */
+ {
+ uword old_offset;
+
+ i = c->replacement_index++;
+ i %= BITS (uword);
+ c->bins.as_u8[i] = b;
+ old_offset = c->offsets[i];
+ c->offsets[i] = offset;
+
+ /* Return old offset so it can be freed. */
+ return old_offset;
+ }
+}
+
+static uword
+mheap_get_search_free_bin (void *v,
+ uword bin,
+ uword * n_user_data_bytes_arg,
+ uword align, uword align_offset)
+{
+ mheap_t *h = mheap_header (v);
+ mheap_elt_t *e;
+
+ /* Free object is at offset f0 ... f1;
+ Allocatted object is at offset o0 ... o1. */
+ word o0, o1, f0, f1, search_n_user_data_bytes;
+ word lo_free_usize, hi_free_usize;
+
+ ASSERT (h->first_free_elt_uoffset_by_bin[bin] != MHEAP_GROUNDED);
+ e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[bin]);
+
+ search_n_user_data_bytes = *n_user_data_bytes_arg;
+
+ /* Silence compiler warning. */
+ o0 = o1 = f0 = f1 = 0;
+
+ h->stats.free_list.n_search_attempts += 1;
+
+ /* Find an object that is large enough with correct alignment at given alignment offset. */
+ while (1)
+ {
+ uword this_object_n_user_data_bytes = mheap_elt_data_bytes (e);
+
+ ASSERT (e->is_free);
+ if (bin < MHEAP_N_SMALL_OBJECT_BINS)
+ ASSERT (this_object_n_user_data_bytes >= search_n_user_data_bytes);
+
+ h->stats.free_list.n_objects_searched += 1;
+
+ if (this_object_n_user_data_bytes < search_n_user_data_bytes)
+ goto next;
+
+ /* Bounds of free object: from f0 to f1. */
+ f0 = ((void *) e->user_data - v);
+ f1 = f0 + this_object_n_user_data_bytes;
+
+ /* Place candidate object at end of free block and align as requested. */
+ o0 = ((f1 - search_n_user_data_bytes) & ~(align - 1)) - align_offset;
+ while (o0 < f0)
+ o0 += align;
+
+ /* Make sure that first free fragment is either empty or
+ large enough to be valid. */
+ while (1)
+ {
+ lo_free_usize = o0 != f0 ? o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES : 0;
+ if (o0 <= f0 || lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES)
+ break;
+ o0 -= align;
+ }
+
+ o1 = o0 + search_n_user_data_bytes;
+
+ /* Does it fit? */
+ if (o0 >= f0 && o1 <= f1)
+ goto found;
+
+ next:
+ /* Reached end of free list without finding large enough object. */
+ if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
+ return MHEAP_GROUNDED;
+
+ /* Otherwise keep searching for large enough object. */
+ e = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
+ }
+
+found:
+ /* Free fragment at end. */
+ hi_free_usize = f1 != o1 ? f1 - o1 - MHEAP_ELT_OVERHEAD_BYTES : 0;
+
+ /* If fragment at end is too small to be a new object,
+ give user's object a bit more space than requested. */
+ if (hi_free_usize < (word) MHEAP_MIN_USER_DATA_BYTES)
+ {
+ search_n_user_data_bytes += f1 - o1;
+ o1 = f1;
+ hi_free_usize = 0;
+ }
+
+ /* Need to make sure that relevant memory areas are mapped. */
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ {
+ mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
+ mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
+ mheap_elt_t *o0_elt = mheap_elt_at_uoffset (v, o0);
+ mheap_elt_t *o1_elt = mheap_elt_at_uoffset (v, o1);
+
+ uword f0_page_start, f0_page_end;
+ uword o0_page_start, o0_page_end;
+
+ /* Free elt is mapped. Addresses after that may not be mapped. */
+ f0_page_start = mheap_page_round (pointer_to_uword (f0_elt->user_data));
+ f0_page_end = mheap_page_truncate (pointer_to_uword (f1_elt));
+
+ o0_page_start = mheap_page_truncate (pointer_to_uword (o0_elt));
+ o0_page_end = mheap_page_round (pointer_to_uword (o1_elt->user_data));
+
+ if (o0_page_start < f0_page_start)
+ o0_page_start = f0_page_start;
+ if (o0_page_end > f0_page_end)
+ o0_page_end = f0_page_end;
+
+ if (o0_page_end > o0_page_start)
+ clib_mem_vm_map (uword_to_pointer (o0_page_start, void *),
+ o0_page_end - o0_page_start);
+ }
+
+ /* Remove free object from free list. */
+ remove_free_elt (v, e, bin);
+
+ /* Free fragment at begining. */
+ if (lo_free_usize > 0)
+ {
+ ASSERT (lo_free_usize >= (word) MHEAP_MIN_USER_DATA_BYTES);
+ mheap_elt_set_size (v, f0, lo_free_usize, /* is_free */ 1);
+ new_free_elt (v, f0, lo_free_usize);
+ }
+
+ mheap_elt_set_size (v, o0, search_n_user_data_bytes, /* is_free */ 0);
+
+ if (hi_free_usize > 0)
+ {
+ uword uo = o1 + MHEAP_ELT_OVERHEAD_BYTES;
+ mheap_elt_set_size (v, uo, hi_free_usize, /* is_free */ 1);
+ new_free_elt (v, uo, hi_free_usize);
+ }
+
+ /* Return actual size of block. */
+ *n_user_data_bytes_arg = search_n_user_data_bytes;
+
+ h->stats.free_list.n_objects_found += 1;
+
+ return o0;
+}
+
+/* Search free lists for object with given size and alignment. */
+static uword
+mheap_get_search_free_list (void *v,
+ uword * n_user_bytes_arg,
+ uword align, uword align_offset)
+{
+ mheap_t *h = mheap_header (v);
+ uword bin, n_user_bytes, i, bi;
+
+ n_user_bytes = *n_user_bytes_arg;
+ bin = user_data_size_to_bin_index (n_user_bytes);
+
+ if (MHEAP_HAVE_SMALL_OBJECT_CACHE
+ && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE)
+ && bin < 255
+ && align == STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
+ && align_offset == 0)
+ {
+ uword r = mheap_get_small_object (h, bin);
+ h->stats.n_small_object_cache_attempts += 1;
+ if (r != MHEAP_GROUNDED)
+ {
+ h->stats.n_small_object_cache_hits += 1;
+ return r;
+ }
+ }
+
+ for (i = bin / BITS (uword); i < ARRAY_LEN (h->non_empty_free_elt_heads);
+ i++)
+ {
+ uword non_empty_bin_mask = h->non_empty_free_elt_heads[i];
+
+ /* No need to search smaller bins. */
+ if (i == bin / BITS (uword))
+ non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
+
+ /* Search each occupied free bin which is large enough. */
+ /* *INDENT-OFF* */
+ foreach_set_bit (bi, non_empty_bin_mask,
+ ({
+ uword r =
+ mheap_get_search_free_bin (v, bi + i * BITS (uword),
+ n_user_bytes_arg,
+ align,
+ align_offset);
+ if (r != MHEAP_GROUNDED) return r;
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return MHEAP_GROUNDED;
+}
+
+static never_inline void *
+mheap_get_extend_vector (void *v,
+ uword n_user_data_bytes,
+ uword align,
+ uword align_offset, uword * offset_return)
+{
+ /* Bounds of free and allocated objects (as above). */
+ uword f0, f1, o0, o1;
+ word free_size;
+ mheap_t *h = mheap_header (v);
+ mheap_elt_t *e;
+
+ if (_vec_len (v) == 0)
+ {
+ _vec_len (v) = MHEAP_ELT_OVERHEAD_BYTES;
+
+ /* Create first element of heap. */
+ e = mheap_elt_at_uoffset (v, _vec_len (v));
+ e->prev_n_user_data = MHEAP_N_USER_DATA_INVALID;
+ }
+
+ f0 = _vec_len (v);
+
+ o0 = round_pow2 (f0, align) - align_offset;
+ while (1)
+ {
+ free_size = o0 - f0 - MHEAP_ELT_OVERHEAD_BYTES;
+ if (o0 == f0 || free_size >= (word) sizeof (mheap_elt_t))
+ break;
+
+ o0 += align;
+ }
+
+ o1 = o0 + n_user_data_bytes;
+ f1 = o1 + MHEAP_ELT_OVERHEAD_BYTES;
+
+ ASSERT (v != 0);
+ h = mheap_header (v);
+
+ /* Make sure we have space for object plus overhead. */
+ if (f1 > h->max_size)
+ {
+ *offset_return = MHEAP_GROUNDED;
+ return v;
+ }
+
+ _vec_len (v) = f1;
+
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ {
+ mheap_elt_t *f0_elt = mheap_elt_at_uoffset (v, f0);
+ mheap_elt_t *f1_elt = mheap_elt_at_uoffset (v, f1);
+
+ uword f0_page = mheap_page_round (pointer_to_uword (f0_elt->user_data));
+ uword f1_page = mheap_page_round (pointer_to_uword (f1_elt->user_data));
+
+ if (f1_page > f0_page)
+ mheap_vm (v, MHEAP_VM_MAP, f0_page, f1_page - f0_page);
+ }
+
+ if (free_size > 0)
+ new_free_elt (v, f0, free_size);
+
+ mheap_elt_set_size (v, o0, n_user_data_bytes, /* is_free */ 0);
+
+ /* Mark last element. */
+ e = mheap_elt_at_uoffset (v, f1);
+ e->n_user_data = MHEAP_N_USER_DATA_INVALID;
+
+ *offset_return = o0;
+
+ return v;
+}
+
+void *
+mheap_get_aligned (void *v,
+ uword n_user_data_bytes,
+ uword align, uword align_offset, uword * offset_return)
+{
+ mheap_t *h;
+ uword offset;
+ u64 cpu_times[2];
+
+ cpu_times[0] = clib_cpu_time_now ();
+
+ align = clib_max (align, STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
+ align = max_pow2 (align);
+
+ /* Correct align offset to be smaller than alignment. */
+ align_offset &= (align - 1);
+
+ /* Align offset must be multiple of minimum object size. */
+ if (align_offset % STRUCT_SIZE_OF (mheap_elt_t, user_data[0]) != 0)
+ {
+ *offset_return = MHEAP_GROUNDED;
+ return v;
+ }
+
+ /* Round requested size. */
+ n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
+ n_user_data_bytes =
+ round_pow2 (n_user_data_bytes,
+ STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
+
+ if (!v)
+ v = mheap_alloc (0, 64 << 20);
+
+ mheap_maybe_lock (v);
+
+ h = mheap_header (v);
+
+ if (h->flags & MHEAP_FLAG_VALIDATE)
+ mheap_validate (v);
+
+ /* First search free lists for object. */
+ offset =
+ mheap_get_search_free_list (v, &n_user_data_bytes, align, align_offset);
+
+ h = mheap_header (v);
+
+ /* If that fails allocate object at end of heap by extending vector. */
+ if (offset == MHEAP_GROUNDED && _vec_len (v) < h->max_size)
+ {
+ v =
+ mheap_get_extend_vector (v, n_user_data_bytes, align, align_offset,
+ &offset);
+ h = mheap_header (v);
+ h->stats.n_vector_expands += offset != MHEAP_GROUNDED;
+ }
+
+ *offset_return = offset;
+ if (offset != MHEAP_GROUNDED)
+ {
+ h->n_elts += 1;
+
+ if (h->flags & MHEAP_FLAG_TRACE)
+ {
+ /* Recursion block for case when we are traceing main clib heap. */
+ h->flags &= ~MHEAP_FLAG_TRACE;
+
+ mheap_get_trace (v, offset, n_user_data_bytes);
+
+ h->flags |= MHEAP_FLAG_TRACE;
+ }
+ }
+
+ if (h->flags & MHEAP_FLAG_VALIDATE)
+ mheap_validate (v);
+
+ mheap_maybe_unlock (v);
+
+ cpu_times[1] = clib_cpu_time_now ();
+ h->stats.n_clocks_get += cpu_times[1] - cpu_times[0];
+ h->stats.n_gets += 1;
+
+ return v;
+}
+
+static void
+free_last_elt (void *v, mheap_elt_t * e)
+{
+ mheap_t *h = mheap_header (v);
+
+ /* Possibly delete preceeding free element also. */
+ if (e->prev_is_free)
+ {
+ e = mheap_prev_elt (e);
+ remove_free_elt2 (v, e);
+ }
+
+ if (e->prev_n_user_data == MHEAP_N_USER_DATA_INVALID)
+ {
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ mheap_vm_elt (v, MHEAP_VM_UNMAP, mheap_elt_uoffset (v, e));
+ _vec_len (v) = 0;
+ }
+ else
+ {
+ uword uo = mheap_elt_uoffset (v, e);
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ mheap_vm_elt (v, MHEAP_VM_UNMAP, uo);
+ e->n_user_data = MHEAP_N_USER_DATA_INVALID;
+ _vec_len (v) = uo;
+ }
+}
+
+void
+mheap_put (void *v, uword uoffset)
+{
+ mheap_t *h;
+ uword n_user_data_bytes, bin;
+ mheap_elt_t *e, *n;
+ uword trace_uoffset, trace_n_user_data_bytes;
+ u64 cpu_times[2];
+
+ cpu_times[0] = clib_cpu_time_now ();
+
+ h = mheap_header (v);
+
+ mheap_maybe_lock (v);
+
+ if (h->flags & MHEAP_FLAG_VALIDATE)
+ mheap_validate (v);
+
+ ASSERT (h->n_elts > 0);
+ h->n_elts--;
+ h->stats.n_puts += 1;
+
+ e = mheap_elt_at_uoffset (v, uoffset);
+ n = mheap_next_elt (e);
+ n_user_data_bytes = mheap_elt_data_bytes (e);
+
+ trace_uoffset = uoffset;
+ trace_n_user_data_bytes = n_user_data_bytes;
+
+ bin = user_data_size_to_bin_index (n_user_data_bytes);
+ if (MHEAP_HAVE_SMALL_OBJECT_CACHE
+ && bin < 255 && (h->flags & MHEAP_FLAG_SMALL_OBJECT_CACHE))
+ {
+ uoffset = mheap_put_small_object (h, bin, uoffset);
+ if (uoffset == 0)
+ goto done;
+
+ e = mheap_elt_at_uoffset (v, uoffset);
+ n = mheap_next_elt (e);
+ n_user_data_bytes = mheap_elt_data_bytes (e);
+ }
+
+ /* Assert that forward and back pointers are equal. */
+ if (e->n_user_data != n->prev_n_user_data)
+ os_panic ();
+
+ /* Forward and backwards is_free must agree. */
+ if (e->is_free != n->prev_is_free)
+ os_panic ();
+
+ /* Object was already freed. */
+ if (e->is_free)
+ os_panic ();
+
+ /* Special case: delete last element in heap. */
+ if (n->n_user_data == MHEAP_N_USER_DATA_INVALID)
+ free_last_elt (v, e);
+
+ else
+ {
+ uword f0, f1, n_combine;
+
+ f0 = uoffset;
+ f1 = f0 + n_user_data_bytes;
+ n_combine = 0;
+
+ if (e->prev_is_free)
+ {
+ mheap_elt_t *p = mheap_prev_elt (e);
+ f0 = mheap_elt_uoffset (v, p);
+ remove_free_elt2 (v, p);
+ n_combine++;
+ }
+
+ if (n->is_free)
+ {
+ mheap_elt_t *m = mheap_next_elt (n);
+ f1 = (void *) m - v;
+ remove_free_elt2 (v, n);
+ n_combine++;
+ }
+
+ if (n_combine)
+ mheap_elt_set_size (v, f0, f1 - f0, /* is_free */ 1);
+ else
+ e->is_free = n->prev_is_free = 1;
+ set_free_elt (v, f0, f1 - f0);
+
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ mheap_vm_elt (v, MHEAP_VM_UNMAP, f0);
+ }
+
+done:
+ h = mheap_header (v);
+
+ if (h->flags & MHEAP_FLAG_TRACE)
+ {
+ /* Recursion block for case when we are traceing main clib heap. */
+ h->flags &= ~MHEAP_FLAG_TRACE;
+
+ mheap_put_trace (v, trace_uoffset, trace_n_user_data_bytes);
+
+ h->flags |= MHEAP_FLAG_TRACE;
+ }
+
+ if (h->flags & MHEAP_FLAG_VALIDATE)
+ mheap_validate (v);
+
+ mheap_maybe_unlock (v);
+
+ cpu_times[1] = clib_cpu_time_now ();
+ h->stats.n_clocks_put += cpu_times[1] - cpu_times[0];
+}
+
+void *
+mheap_alloc_with_flags (void *memory, uword memory_size, uword flags)
+{
+ mheap_t *h;
+ void *v;
+ uword size;
+
+ if (!mheap_page_size)
+ mheap_page_size = clib_mem_get_page_size ();
+
+ if (!memory)
+ {
+ /* No memory given, try to VM allocate some. */
+ memory = clib_mem_vm_alloc (memory_size);
+ if (!memory)
+ return 0;
+
+ /* No memory region implies we have virtual memory. */
+ flags &= ~MHEAP_FLAG_DISABLE_VM;
+ }
+
+ /* Make sure that given memory is page aligned. */
+ {
+ uword am, av, ah;
+
+ am = pointer_to_uword (memory);
+ av = mheap_page_round (am);
+ v = uword_to_pointer (av, void *);
+ h = mheap_header (v);
+ ah = pointer_to_uword (h);
+ while (ah < am)
+ ah += mheap_page_size;
+
+ h = uword_to_pointer (ah, void *);
+ v = mheap_vector (h);
+
+ if (PREDICT_FALSE (memory + memory_size < v))
+ {
+ /*
+ * This will happen when the requested memory_size is too
+ * small to cope with the heap header and/or memory alignment.
+ */
+ clib_mem_vm_free (memory, memory_size);
+ return 0;
+ }
+
+ size = memory + memory_size - v;
+ }
+
+ /* VM map header so we can use memory. */
+ if (!(flags & MHEAP_FLAG_DISABLE_VM))
+ clib_mem_vm_map (h, sizeof (h[0]));
+
+ /* Zero vector header: both heap header and vector length. */
+ memset (h, 0, sizeof (h[0]));
+ _vec_len (v) = 0;
+
+ h->vm_alloc_offset_from_header = (void *) h - memory;
+ h->vm_alloc_size = memory_size;
+
+ h->max_size = size;
+ h->owner_cpu = ~0;
+
+ /* Set flags based on those given less builtin-flags. */
+ h->flags |= (flags & ~MHEAP_FLAG_TRACE);
+
+ /* Unmap remainder of heap until we will be ready to use it. */
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ mheap_vm (v, MHEAP_VM_UNMAP | MHEAP_VM_ROUND_UP,
+ (clib_address_t) v, h->max_size);
+
+ /* Initialize free list heads to empty. */
+ memset (h->first_free_elt_uoffset_by_bin, 0xFF,
+ sizeof (h->first_free_elt_uoffset_by_bin));
+
+ return v;
+}
+
+void *
+mheap_alloc (void *memory, uword size)
+{
+ uword flags = 0;
+
+ if (memory != 0)
+ flags |= MHEAP_FLAG_DISABLE_VM;
+
+#ifdef CLIB_HAVE_VEC128
+ flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+#endif
+
+ return mheap_alloc_with_flags (memory, size, flags);
+}
+
+void *
+_mheap_free (void *v)
+{
+ mheap_t *h = mheap_header (v);
+
+ if (v)
+ clib_mem_vm_free ((void *) h - h->vm_alloc_offset_from_header,
+ h->vm_alloc_size);
+
+ return 0;
+}
+
+/* Call user's function with each object in heap. */
+void
+mheap_foreach (void *v,
+ uword (*func) (void *arg, void *v, void *elt_data,
+ uword elt_size), void *arg)
+{
+ mheap_elt_t *e;
+ u8 *stack_heap, *clib_mem_mheap_save;
+ u8 tmp_heap_memory[16 * 1024];
+
+ mheap_maybe_lock (v);
+
+ if (vec_len (v) == 0)
+ goto done;
+
+ clib_mem_mheap_save = 0;
+ stack_heap = 0;
+
+ /* Allocate a new temporary heap on the stack.
+ This is so that our hash table & user's callback function can
+ themselves allocate memory somewhere without getting in the way
+ of the heap we are looking at. */
+ if (v == clib_mem_get_heap ())
+ {
+ stack_heap = mheap_alloc (tmp_heap_memory, sizeof (tmp_heap_memory));
+ clib_mem_mheap_save = v;
+ clib_mem_set_heap (stack_heap);
+ }
+
+ for (e = v;
+ e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = mheap_next_elt (e))
+ {
+ void *p = mheap_elt_data (v, e);
+ if (e->is_free)
+ continue;
+ if ((*func) (arg, v, p, mheap_elt_data_bytes (e)))
+ break;
+ }
+
+ /* Restore main CLIB heap. */
+ if (clib_mem_mheap_save)
+ clib_mem_set_heap (clib_mem_mheap_save);
+
+done:
+ mheap_maybe_unlock (v);
+}
+
+/* Bytes in mheap header overhead not including data bytes. */
+always_inline uword
+mheap_bytes_overhead (void *v)
+{
+ mheap_t *h = mheap_header (v);
+ return v ? sizeof (h[0]) + h->n_elts * sizeof (mheap_elt_t) : 0;
+}
+
+/* Total number of bytes including both data and overhead. */
+uword
+mheap_bytes (void *v)
+{
+ return mheap_bytes_overhead (v) + vec_bytes (v);
+}
+
+static void
+mheap_usage_no_lock (void *v, clib_mem_usage_t * usage)
+{
+ mheap_t *h = mheap_header (v);
+ uword used = 0, free = 0, free_vm_unmapped = 0;
+
+ if (vec_len (v) > 0)
+ {
+ mheap_elt_t *e;
+
+ for (e = v;
+ e->n_user_data != MHEAP_N_USER_DATA_INVALID;
+ e = mheap_next_elt (e))
+ {
+ uword size = mheap_elt_data_bytes (e);
+ if (e->is_free)
+ {
+ free += size;
+ if (!(h->flags & MHEAP_FLAG_DISABLE_VM))
+ free_vm_unmapped +=
+ mheap_vm_elt (v, MHEAP_VM_NOMAP, mheap_elt_uoffset (v, e));
+ }
+ else
+ used += size;
+ }
+ }
+
+ usage->object_count = mheap_elts (v);
+ usage->bytes_total = mheap_bytes (v);
+ usage->bytes_overhead = mheap_bytes_overhead (v);
+ usage->bytes_max = mheap_max_size (v);
+ usage->bytes_used = used;
+ usage->bytes_free = free;
+ usage->bytes_free_reclaimed = free_vm_unmapped;
+}
+
+void
+mheap_usage (void *v, clib_mem_usage_t * usage)
+{
+ mheap_maybe_lock (v);
+ mheap_usage_no_lock (v, usage);
+ mheap_maybe_unlock (v);
+}
+
+static u8 *
+format_mheap_byte_count (u8 * s, va_list * va)
+{
+ uword n_bytes = va_arg (*va, uword);
+ if (n_bytes < 1024)
+ return format (s, "%wd", n_bytes);
+ else
+ return format (s, "%wdk", n_bytes / 1024);
+}
+
+/* Returns first corrupt heap element. */
+static mheap_elt_t *
+mheap_first_corrupt (void *v)
+{
+ mheap_elt_t *e, *n;
+
+ if (vec_len (v) == 0)
+ return 0;
+
+ e = v;
+ while (1)
+ {
+ if (e->n_user_data == MHEAP_N_USER_DATA_INVALID)
+ break;
+
+ n = mheap_next_elt (e);
+
+ if (e->n_user_data != n->prev_n_user_data)
+ return e;
+
+ if (e->is_free != n->prev_is_free)
+ return e;
+
+ e = n;
+ }
+
+ return 0;
+}
+
+static u8 *
+format_mheap_stats (u8 * s, va_list * va)
+{
+ mheap_t *h = va_arg (*va, mheap_t *);
+ mheap_stats_t *st = &h->stats;
+ uword indent = format_get_indent (s);
+
+ s =
+ format (s,
+ "alloc. from small object cache: %Ld hits %Ld attempts (%.2f%%) replacements %d",
+ st->n_small_object_cache_hits, st->n_small_object_cache_attempts,
+ (st->n_small_object_cache_attempts !=
+ 0 ? 100. * (f64) st->n_small_object_cache_hits /
+ (f64) st->n_small_object_cache_attempts : 0.),
+ h->small_object_cache.replacement_index);
+
+ s =
+ format (s,
+ "\n%Ualloc. from free-list: %Ld attempts, %Ld hits (%.2f%%), %Ld considered (per-attempt %.2f)",
+ format_white_space, indent, st->free_list.n_search_attempts,
+ st->free_list.n_objects_found,
+ (st->free_list.n_search_attempts !=
+ 0 ? 100. * (f64) st->free_list.n_objects_found /
+ (f64) st->free_list.n_search_attempts : 0.),
+ st->free_list.n_objects_searched,
+ (st->free_list.n_search_attempts !=
+ 0 ? (f64) st->free_list.n_objects_searched /
+ (f64) st->free_list.n_search_attempts : 0.));
+
+ s = format (s, "\n%Ualloc. from vector-expand: %Ld",
+ format_white_space, indent, st->n_vector_expands);
+
+ s = format (s, "\n%Uallocs: %Ld %.2f clocks/call",
+ format_white_space, indent,
+ st->n_gets, (f64) st->n_clocks_get / (f64) st->n_gets);
+
+ s = format (s, "\n%Ufrees: %Ld %.2f clocks/call",
+ format_white_space, indent,
+ st->n_puts, (f64) st->n_clocks_put / (f64) st->n_puts);
+
+ return s;
+}
+
+u8 *
+format_mheap (u8 * s, va_list * va)
+{
+ void *v = va_arg (*va, u8 *);
+ int verbose = va_arg (*va, int);
+
+ mheap_t *h;
+ uword i, size, indent;
+ clib_mem_usage_t usage;
+ mheap_elt_t *first_corrupt;
+
+ mheap_maybe_lock (v);
+
+ h = mheap_header (v);
+
+ mheap_usage_no_lock (v, &usage);
+
+ indent = format_get_indent (s);
+
+ s =
+ format (s,
+ "%d objects, %U of %U used, %U free, %U reclaimed, %U overhead",
+ usage.object_count, format_mheap_byte_count, usage.bytes_used,
+ format_mheap_byte_count, usage.bytes_total,
+ format_mheap_byte_count, usage.bytes_free,
+ format_mheap_byte_count, usage.bytes_free_reclaimed,
+ format_mheap_byte_count, usage.bytes_overhead);
+
+ if (usage.bytes_max != ~0)
+ s = format (s, ", %U capacity", format_mheap_byte_count, usage.bytes_max);
+
+ /* Show histogram of sizes. */
+ if (verbose > 1)
+ {
+ uword hist[MHEAP_N_BINS];
+ mheap_elt_t *e;
+ uword i, n_hist;
+
+ memset (hist, 0, sizeof (hist));
+
+ n_hist = 0;
+ for (e = v;
+ e->n_user_data != MHEAP_N_USER_DATA_INVALID;
+ e = mheap_next_elt (e))
+ {
+ uword n_user_data_bytes = mheap_elt_data_bytes (e);
+ uword bin = user_data_size_to_bin_index (n_user_data_bytes);
+ if (!e->is_free)
+ {
+ hist[bin] += 1;
+ n_hist += 1;
+ }
+ }
+
+ s = format (s, "\n%U%=12s%=12s%=16s",
+ format_white_space, indent + 2,
+ "Size", "Count", "Fraction");
+
+ for (i = 0; i < ARRAY_LEN (hist); i++)
+ {
+ if (hist[i] == 0)
+ continue;
+ s = format (s, "\n%U%12d%12wd%16.4f",
+ format_white_space, indent + 2,
+ MHEAP_MIN_USER_DATA_BYTES +
+ i * MHEAP_USER_DATA_WORD_BYTES, hist[i],
+ (f64) hist[i] / (f64) n_hist);
+ }
+ }
+
+ if (verbose)
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2, format_mheap_stats, h);
+
+ if ((h->flags & MHEAP_FLAG_TRACE) && vec_len (h->trace_main.traces) > 0)
+ {
+ /* Make a copy of traces since we'll be sorting them. */
+ mheap_trace_t *t, *traces_copy;
+ uword indent, total_objects_traced;
+
+ traces_copy = vec_dup (h->trace_main.traces);
+ qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
+ mheap_trace_sort);
+
+ total_objects_traced = 0;
+ s = format (s, "\n");
+ vec_foreach (t, traces_copy)
+ {
+ /* Skip over free elements. */
+ if (t->n_allocations == 0)
+ continue;
+
+ total_objects_traced += t->n_allocations;
+
+ /* When not verbose only report allocations of more than 1k. */
+ if (!verbose && t->n_bytes < 1024)
+ continue;
+
+ if (t == traces_copy)
+ s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
+ "Sample");
+ s = format (s, "%9d%9d %p", t->n_bytes, t->n_allocations,
+ t->offset + v);
+ indent = format_get_indent (s);
+ for (i = 0; i < ARRAY_LEN (t->callers) && t->callers[i]; i++)
+ {
+ if (i > 0)
+ s = format (s, "%U", format_white_space, indent);
+#ifdef CLIB_UNIX
+ s =
+ format (s, " %U\n", format_clib_elf_symbol_with_address,
+ t->callers[i]);
+#else
+ s = format (s, " %p\n", t->callers[i]);
+#endif
+ }
+ }
+
+ s = format (s, "%d total traced objects\n", total_objects_traced);
+
+ vec_free (traces_copy);
+ }
+
+ first_corrupt = mheap_first_corrupt (v);
+ if (first_corrupt)
+ {
+ size = mheap_elt_data_bytes (first_corrupt);
+ s = format (s, "\n first corrupt object: %p, size %wd\n %U",
+ first_corrupt, size, format_hex_bytes, first_corrupt, size);
+ }
+
+ /* FIXME. This output could be wrong in the unlikely case that format
+ uses the same mheap as we are currently inspecting. */
+ if (verbose > 1)
+ {
+ mheap_elt_t *e;
+ uword i, o;
+
+ s = format (s, "\n");
+
+ e = mheap_elt_at_uoffset (v, 0);
+ i = 0;
+ while (1)
+ {
+ if ((i % 8) == 0)
+ s = format (s, "%8d: ", i);
+
+ o = mheap_elt_uoffset (v, e);
+
+ if (e->is_free)
+ s = format (s, "(%8d) ", o);
+ else
+ s = format (s, " %8d ", o);
+
+ if ((i % 8) == 7 || (i + 1) >= h->n_elts)
+ s = format (s, "\n");
+ }
+ }
+
+ mheap_maybe_unlock (v);
+
+ return s;
+}
+
+void
+dmh (void *v)
+{
+ fformat (stderr, "%U", format_mheap, v, 1);
+}
+
+static void
+mheap_validate_breakpoint ()
+{
+ os_panic ();
+}
+
+void
+mheap_validate (void *v)
+{
+ mheap_t *h = mheap_header (v);
+ uword i, s;
+
+ uword elt_count, elt_size;
+ uword free_count_from_free_lists, free_size_from_free_lists;
+ uword small_elt_free_count, small_elt_free_size;
+
+#define CHECK(x) if (! (x)) { mheap_validate_breakpoint (); os_panic (); }
+
+ if (vec_len (v) == 0)
+ return;
+
+ mheap_maybe_lock (v);
+
+ /* Validate number of elements and size. */
+ free_size_from_free_lists = free_count_from_free_lists = 0;
+ for (i = 0; i < ARRAY_LEN (h->first_free_elt_uoffset_by_bin); i++)
+ {
+ mheap_elt_t *e, *n;
+ uword is_first;
+
+ CHECK ((h->first_free_elt_uoffset_by_bin[i] != MHEAP_GROUNDED)
+ ==
+ ((h->non_empty_free_elt_heads[i /
+ BITS (uword)] & ((uword) 1 <<
+ (uword) (i %
+ BITS
+ (uword))))
+ != 0));
+
+ if (h->first_free_elt_uoffset_by_bin[i] == MHEAP_GROUNDED)
+ continue;
+
+ e = mheap_elt_at_uoffset (v, h->first_free_elt_uoffset_by_bin[i]);
+ is_first = 1;
+ while (1)
+ {
+ uword s;
+
+ n = mheap_next_elt (e);
+
+ /* Object must be marked free. */
+ CHECK (e->is_free);
+
+ /* Next object's previous free bit must also be set. */
+ CHECK (n->prev_is_free);
+
+ if (is_first)
+ CHECK (e->free_elt.prev_uoffset == MHEAP_GROUNDED);
+ is_first = 0;
+
+ s = mheap_elt_data_bytes (e);
+ CHECK (user_data_size_to_bin_index (s) == i);
+
+ free_count_from_free_lists += 1;
+ free_size_from_free_lists += s;
+
+ if (e->free_elt.next_uoffset == MHEAP_GROUNDED)
+ break;
+
+ n = mheap_elt_at_uoffset (v, e->free_elt.next_uoffset);
+
+ /* Check free element linkages. */
+ CHECK (n->free_elt.prev_uoffset == mheap_elt_uoffset (v, e));
+
+ e = n;
+ }
+ }
+
+ /* Go through small object cache. */
+ small_elt_free_count = small_elt_free_size = 0;
+ for (i = 0; i < ARRAY_LEN (h->small_object_cache.bins.as_u8); i++)
+ {
+ if (h->small_object_cache.bins.as_u8[i] != 0)
+ {
+ mheap_elt_t *e;
+ uword b = h->small_object_cache.bins.as_u8[i] - 1;
+ uword o = h->small_object_cache.offsets[i];
+ uword s;
+
+ e = mheap_elt_at_uoffset (v, o);
+
+ /* Object must be allocated. */
+ CHECK (!e->is_free);
+
+ s = mheap_elt_data_bytes (e);
+ CHECK (user_data_size_to_bin_index (s) == b);
+
+ small_elt_free_count += 1;
+ small_elt_free_size += s;
+ }
+ }
+
+ {
+ mheap_elt_t *e, *n;
+ uword elt_free_size, elt_free_count;
+
+ elt_count = elt_size = elt_free_size = elt_free_count = 0;
+ for (e = v; e->n_user_data != MHEAP_N_USER_DATA_INVALID; e = n)
+ {
+ if (e->prev_n_user_data != MHEAP_N_USER_DATA_INVALID)
+ CHECK (e->prev_n_user_data * sizeof (e->user_data[0]) >=
+ MHEAP_MIN_USER_DATA_BYTES);
+
+ CHECK (e->n_user_data * sizeof (e->user_data[0]) >=
+ MHEAP_MIN_USER_DATA_BYTES);
+
+ n = mheap_next_elt (e);
+
+ CHECK (e->is_free == n->prev_is_free);
+
+ elt_count++;
+ s = mheap_elt_data_bytes (e);
+ elt_size += s;
+
+ if (e->is_free)
+ {
+ elt_free_count++;
+ elt_free_size += s;
+ }
+
+ /* Consecutive free objects should have been combined. */
+ CHECK (!(e->prev_is_free && n->prev_is_free));
+ }
+
+ CHECK (free_count_from_free_lists == elt_free_count);
+ CHECK (free_size_from_free_lists == elt_free_size);
+ CHECK (elt_count == h->n_elts + elt_free_count + small_elt_free_count);
+ CHECK (elt_size + (elt_count + 1) * MHEAP_ELT_OVERHEAD_BYTES ==
+ vec_len (v));
+ }
+
+ {
+ mheap_elt_t *e, *n;
+
+ for (e = v; e->n_user_data == MHEAP_N_USER_DATA_INVALID; e = n)
+ {
+ n = mheap_next_elt (e);
+ CHECK (e->n_user_data == n->prev_n_user_data);
+ }
+ }
+
+#undef CHECK
+
+ mheap_maybe_unlock (v);
+
+ h->validate_serial += 1;
+}
+
+static void
+mheap_get_trace (void *v, uword offset, uword size)
+{
+ mheap_t *h;
+ mheap_trace_main_t *tm;
+ mheap_trace_t *t;
+ uword i, n_callers, trace_index, *p;
+ mheap_trace_t trace;
+
+ /* Spurious Coverity warnings be gone. */
+ memset (&trace, 0, sizeof (trace));
+
+ n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers),
+ /* Skip mheap_get_aligned's frame */ 1);
+ if (n_callers == 0)
+ return;
+
+ for (i = n_callers; i < ARRAY_LEN (trace.callers); i++)
+ trace.callers[i] = 0;
+
+ h = mheap_header (v);
+ tm = &h->trace_main;
+
+ if (!tm->trace_by_callers)
+ tm->trace_by_callers =
+ hash_create_mem (0, sizeof (trace.callers), sizeof (uword));
+
+ p = hash_get_mem (tm->trace_by_callers, &trace.callers);
+ if (p)
+ {
+ trace_index = p[0];
+ t = tm->traces + trace_index;
+ }
+ else
+ {
+ i = vec_len (tm->trace_free_list);
+ if (i > 0)
+ {
+ trace_index = tm->trace_free_list[i - 1];
+ _vec_len (tm->trace_free_list) = i - 1;
+ }
+ else
+ {
+ mheap_trace_t *old_start = tm->traces;
+ mheap_trace_t *old_end = vec_end (tm->traces);
+
+ vec_add2 (tm->traces, t, 1);
+
+ if (tm->traces != old_start)
+ {
+ hash_pair_t *p;
+ mheap_trace_t *q;
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, tm->trace_by_callers,
+ ({
+ q = uword_to_pointer (p->key, mheap_trace_t *);
+ ASSERT (q >= old_start && q < old_end);
+ p->key = pointer_to_uword (tm->traces + (q - old_start));
+ }));
+ /* *INDENT-ON* */
+ }
+ trace_index = t - tm->traces;
+ }
+
+ t = tm->traces + trace_index;
+ t[0] = trace;
+ t->n_allocations = 0;
+ t->n_bytes = 0;
+ hash_set_mem (tm->trace_by_callers, t->callers, trace_index);
+ }
+
+ t->n_allocations += 1;
+ t->n_bytes += size;
+ t->offset = offset; /* keep a sample to autopsy */
+ hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
+}
+
+static void
+mheap_put_trace (void *v, uword offset, uword size)
+{
+ mheap_t *h;
+ mheap_trace_main_t *tm;
+ mheap_trace_t *t;
+ uword trace_index, *p;
+
+ h = mheap_header (v);
+ tm = &h->trace_main;
+ p = hash_get (tm->trace_index_by_offset, offset);
+ if (!p)
+ return;
+
+ trace_index = p[0];
+ hash_unset (tm->trace_index_by_offset, offset);
+ ASSERT (trace_index < vec_len (tm->traces));
+
+ t = tm->traces + trace_index;
+ ASSERT (t->n_allocations > 0);
+ ASSERT (t->n_bytes >= size);
+ t->n_allocations -= 1;
+ t->n_bytes -= size;
+ if (t->n_allocations == 0)
+ {
+ hash_unset_mem (tm->trace_by_callers, t->callers);
+ vec_add1 (tm->trace_free_list, trace_index);
+ memset (t, 0, sizeof (t[0]));
+ }
+}
+
+static int
+mheap_trace_sort (const void *_t1, const void *_t2)
+{
+ const mheap_trace_t *t1 = _t1;
+ const mheap_trace_t *t2 = _t2;
+ word cmp;
+
+ cmp = (word) t2->n_bytes - (word) t1->n_bytes;
+ if (!cmp)
+ cmp = (word) t2->n_allocations - (word) t1->n_allocations;
+ return cmp;
+}
+
+always_inline void
+mheap_trace_main_free (mheap_trace_main_t * tm)
+{
+ vec_free (tm->traces);
+ vec_free (tm->trace_free_list);
+ hash_free (tm->trace_by_callers);
+ hash_free (tm->trace_index_by_offset);
+}
+
+void
+mheap_trace (void *v, int enable)
+{
+ mheap_t *h;
+
+ h = mheap_header (v);
+
+ if (enable)
+ {
+ h->flags |= MHEAP_FLAG_TRACE;
+ }
+ else
+ {
+ mheap_trace_main_free (&h->trace_main);
+ h->flags &= ~MHEAP_FLAG_TRACE;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mheap.h b/src/vppinfra/mheap.h
new file mode 100644
index 00000000..5b7cdfba
--- /dev/null
+++ b/src/vppinfra/mheap.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_mheap_h
+#define included_mheap_h
+
+#include <vppinfra/vec.h>
+#include <vppinfra/error.h> /* clib_error_t */
+#include <vppinfra/mem.h> /* clib_mem_usage_t */
+#include <vppinfra/format.h> /* for unformat_input_t */
+
+/* Allocate size bytes. New heap and offset are returned.
+ offset == ~0 means allocation failed. */
+always_inline void *
+mheap_get (void *v, uword size, uword * offset_return)
+{
+ return mheap_get_aligned (v, size, 0, 0, offset_return);
+}
+
+/* Create allocation heap of given size.
+ * The actual usable size is smaller than the requested size.
+ * memory_bytes must be greater than mheap_page_size + sizeof (mheap_t) + 16.
+ * Otherwise, allocation may fail and return 0.
+ */
+void *mheap_alloc (void *memory, uword memory_bytes);
+void *mheap_alloc_with_flags (void *memory, uword memory_bytes, uword flags);
+
+#define mheap_free(v) (v) = _mheap_free(v)
+void *_mheap_free (void *v);
+
+void mheap_foreach (void *v,
+ uword (*func) (void *arg, void *v, void *elt_data,
+ uword elt_size), void *arg);
+
+/* Format mheap data structures as string. */
+u8 *format_mheap (u8 * s, va_list * va);
+
+/* Validate internal consistency. */
+void mheap_validate (void *h);
+
+/* Query bytes used. */
+uword mheap_bytes (void *v);
+
+void mheap_usage (void *v, clib_mem_usage_t * usage);
+
+/* Enable disable traceing. */
+void mheap_trace (void *v, int enable);
+
+/* Test routine. */
+int test_mheap_main (unformat_input_t * input);
+
+#endif /* included_mheap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mheap_bootstrap.h b/src/vppinfra/mheap_bootstrap.h
new file mode 100644
index 00000000..38f0ac84
--- /dev/null
+++ b/src/vppinfra/mheap_bootstrap.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_mem_mheap_h
+#define included_mem_mheap_h
+
+/* Bootstrap include so that #include <vppinfra/mem.h> can include e.g.
+ <vppinfra/mheap.h> which depends on <vppinfra/vec.h>. */
+
+#include <vppinfra/vec_bootstrap.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/os.h>
+#include <vppinfra/vector.h>
+
+/* Each element in heap is immediately followed by this struct. */
+typedef struct
+{
+ /* Number of mheap_size_t words of user data in previous object.
+ Used to find mheap_elt_t for previous object. */
+#if CLIB_VEC64 > 0
+ u64 prev_n_user_data:63;
+
+ /* Used to mark end/start of of doubly-linked list of mheap_elt_t's. */
+#define MHEAP_N_USER_DATA_INVALID (0x7fffffffffffffffULL)
+#define MHEAP_GROUNDED (~0ULL)
+
+ /* Set if previous object is free. */
+ u64 prev_is_free:1;
+
+ /* Number of mheap_size_t words of user data that follow this object. */
+ u64 n_user_data:63;
+
+ /* Set if this object is on free list (and therefore following free_elt
+ is valid). */
+ u64 is_free:1;
+
+#else
+ u32 prev_n_user_data:31;
+
+ /* Used to mark end/start of of doubly-linked list of mheap_elt_t's. */
+#define MHEAP_N_USER_DATA_INVALID (0x7fffffff)
+#define MHEAP_GROUNDED (~0)
+
+ /* Set if previous object is free. */
+ u32 prev_is_free:1;
+
+ /* Number of mheap_size_t words of user data that follow this object. */
+ u32 n_user_data:31;
+
+ /* Set if this object is on free list (and therefore following free_elt
+ is valid). */
+ u32 is_free:1;
+#endif
+
+ union
+ {
+#if CLIB_VEC64 > 0
+ /* For allocated objects: user data follows.
+ User data is allocated in units of typeof (user_data[0]). */
+ u64 user_data[0];
+
+ /* For free objects, offsets of next and previous free objects of this size;
+ ~0 means end of doubly-linked list.
+ This is stored in user data (guaranteed to be at least 8 bytes)
+ but only for *free* objects. */
+ struct
+ {
+ u64 next_uoffset, prev_uoffset;
+ } free_elt;
+#else
+ /* For allocated objects: user data follows.
+ User data is allocated in units of typeof (user_data[0]). */
+ u32 user_data[0];
+
+ /* For free objects, offsets of next and previous free objects of this size;
+ ~0 means end of doubly-linked list.
+ This is stored in user data (guaranteed to be at least 8 bytes)
+ but only for *free* objects. */
+ struct
+ {
+ u32 next_uoffset, prev_uoffset;
+ } free_elt;
+#endif
+ };
+} mheap_elt_t;
+
+/* Number of bytes of "overhead": e.g. not user data. */
+#define MHEAP_ELT_OVERHEAD_BYTES (sizeof (mheap_elt_t) - STRUCT_OFFSET_OF (mheap_elt_t, user_data))
+
+/* User objects must be large enough to hold 2 x u32 free offsets in free elt. */
+#define MHEAP_MIN_USER_DATA_BYTES MHEAP_ELT_OVERHEAD_BYTES
+
+/* Number of byte in user data "words". */
+#define MHEAP_USER_DATA_WORD_BYTES STRUCT_SIZE_OF (mheap_elt_t, user_data[0])
+
+typedef struct
+{
+ /* Address of callers: outer first, inner last. */
+ uword callers[12];
+
+ /* Count of allocations with this traceback. */
+#if CLIB_VEC64 > 0
+ u64 n_allocations;
+#else
+ u32 n_allocations;
+#endif
+
+ /* Count of bytes allocated with this traceback. */
+ u32 n_bytes;
+
+ /* Offset of this item */
+ uword offset;
+} mheap_trace_t;
+
+typedef struct
+{
+ mheap_trace_t *traces;
+
+ /* Indices of free traces. */
+ u32 *trace_free_list;
+
+ /* Hash table mapping callers to trace index. */
+ uword *trace_by_callers;
+
+ /* Hash table mapping mheap offset to trace index. */
+ uword *trace_index_by_offset;
+} mheap_trace_main_t;
+
+/* Without vector instructions don't bother with small object cache. */
+#ifdef CLIB_HAVE_VEC128
+#define MHEAP_HAVE_SMALL_OBJECT_CACHE 1
+#else
+#define MHEAP_HAVE_SMALL_OBJECT_CACHE 0
+#endif
+
+ /* Small object bin i is for objects with
+ user_size > sizeof (mheap_elt_t) + sizeof (mheap_elt_t) * (i - 1)
+ user_size <= sizeof (mheap_elt_t) + sizeof (mheap_size_t) * i. */
+#if MHEAP_HAVE_SMALL_OBJECT_CACHE > 0
+#define MHEAP_LOG2_N_SMALL_OBJECT_BINS 8
+#define MHEAP_N_SMALL_OBJECT_BINS (1 << MHEAP_LOG2_N_SMALL_OBJECT_BINS)
+#else
+#define MHEAP_LOG2_N_SMALL_OBJECT_BINS 0
+#define MHEAP_N_SMALL_OBJECT_BINS 0
+#endif
+
+#define MHEAP_N_BINS \
+ (MHEAP_N_SMALL_OBJECT_BINS \
+ + (STRUCT_BITS_OF (mheap_elt_t, user_data[0]) - MHEAP_LOG2_N_SMALL_OBJECT_BINS))
+
+typedef struct
+{
+ struct
+ {
+ u64 n_search_attempts;
+ u64 n_objects_searched;
+ u64 n_objects_found;
+ } free_list;
+
+ u64 n_vector_expands;
+
+ u64 n_small_object_cache_hits;
+ u64 n_small_object_cache_attempts;
+
+ u64 n_gets, n_puts;
+ u64 n_clocks_get, n_clocks_put;
+} mheap_stats_t;
+
+/* For objects with align == 4 and align_offset == 0 (e.g. vector strings). */
+typedef struct
+{
+ union
+ {
+#ifdef CLIB_HAVE_VEC128
+ u8x16 as_u8x16[BITS (uword) / 16];
+#endif
+
+ /* Store bin + 1; zero means unused. */
+ u8 as_u8[BITS (uword)];
+ } bins;
+
+ uword offsets[BITS (uword)];
+
+ u32 replacement_index;
+} mheap_small_object_cache_t;
+
+/* Vec header for heaps. */
+typedef struct
+{
+ /* User offsets for head of doubly-linked list of free objects of this size. */
+#if CLIB_VEC64 > 0
+ u64 first_free_elt_uoffset_by_bin[MHEAP_N_BINS];
+#else
+ u32 first_free_elt_uoffset_by_bin[MHEAP_N_BINS];
+#endif
+
+ /* Bitmap of non-empty free list bins. */
+ uword non_empty_free_elt_heads[(MHEAP_N_BINS + BITS (uword) - 1) /
+ BITS (uword)];
+
+ mheap_small_object_cache_t small_object_cache;
+
+ u32 flags;
+#define MHEAP_FLAG_TRACE (1 << 0)
+#define MHEAP_FLAG_DISABLE_VM (1 << 1)
+#define MHEAP_FLAG_THREAD_SAFE (1 << 2)
+#define MHEAP_FLAG_SMALL_OBJECT_CACHE (1 << 3)
+#define MHEAP_FLAG_VALIDATE (1 << 4)
+
+ /* Lock use when MHEAP_FLAG_THREAD_SAFE is set. */
+ volatile u32 lock;
+ volatile u32 owner_cpu;
+ int recursion_count;
+
+ /* Number of allocated objects. */
+ u64 n_elts;
+
+ /* Maximum size (in bytes) this heap is allowed to grow to.
+ Set to ~0 to grow heap (via vec_resize) arbitrarily. */
+ u64 max_size;
+
+ uword vm_alloc_offset_from_header;
+ uword vm_alloc_size;
+
+ /* Each successful mheap_validate call increments this serial number.
+ Used to debug heap corruption problems. GDB breakpoints can be
+ made conditional on validate_serial. */
+ u64 validate_serial;
+
+ mheap_trace_main_t trace_main;
+
+ mheap_stats_t stats;
+} mheap_t;
+
+always_inline mheap_t *
+mheap_header (u8 * v)
+{
+ return vec_aligned_header (v, sizeof (mheap_t), 16);
+}
+
+always_inline u8 *
+mheap_vector (mheap_t * h)
+{
+ return vec_aligned_header_end (h, sizeof (mheap_t), 16);
+}
+
+always_inline uword
+mheap_elt_uoffset (void *v, mheap_elt_t * e)
+{
+ return (uword) e->user_data - (uword) v;
+}
+
+always_inline mheap_elt_t *
+mheap_user_pointer_to_elt (void *v)
+{
+ return v - STRUCT_OFFSET_OF (mheap_elt_t, user_data);
+}
+
+/* For debugging we keep track of offsets for valid objects.
+ We make sure user is not trying to free object with invalid offset. */
+always_inline uword
+mheap_offset_is_valid (void *v, uword uo)
+{
+ return uo >= MHEAP_ELT_OVERHEAD_BYTES && uo <= vec_len (v);
+}
+
+always_inline mheap_elt_t *
+mheap_elt_at_uoffset (void *v, uword uo)
+{
+ ASSERT (mheap_offset_is_valid (v, uo));
+ return (mheap_elt_t *) (v + uo - STRUCT_OFFSET_OF (mheap_elt_t, user_data));
+}
+
+always_inline void *
+mheap_elt_data (void *v, mheap_elt_t * e)
+{
+ return v + mheap_elt_uoffset (v, e);
+}
+
+always_inline uword
+mheap_elt_data_bytes (mheap_elt_t * e)
+{
+ return e->n_user_data * sizeof (e->user_data[0]);
+}
+
+always_inline uword
+mheap_data_bytes (void *v, uword uo)
+{
+ mheap_elt_t *e = mheap_elt_at_uoffset (v, uo);
+ return mheap_elt_data_bytes (e);
+}
+
+#define mheap_len(v,d) (mheap_data_bytes((v),(void *) (d) - (void *) (v)) / sizeof ((d)[0]))
+
+always_inline mheap_elt_t *
+mheap_next_elt (mheap_elt_t * e)
+{
+ ASSERT (e->n_user_data < MHEAP_N_USER_DATA_INVALID);
+ return (mheap_elt_t *) (e->user_data + e->n_user_data);
+}
+
+always_inline mheap_elt_t *
+mheap_prev_elt (mheap_elt_t * e)
+{
+ ASSERT (e->prev_n_user_data < MHEAP_N_USER_DATA_INVALID);
+ return ((void *) e
+ - e->prev_n_user_data * sizeof (e->user_data[0])
+ - MHEAP_ELT_OVERHEAD_BYTES);
+}
+
+/* Exported operations. */
+
+always_inline uword
+mheap_elts (void *v)
+{
+ return v ? mheap_header (v)->n_elts : 0;
+}
+
+always_inline uword
+mheap_max_size (void *v)
+{
+ return v ? mheap_header (v)->max_size : ~0;
+}
+
+/* Free previously allocated offset. */
+void mheap_put (void *v, uword offset);
+
+/* Allocate object from mheap. */
+void *mheap_get_aligned (void *v, uword size, uword align, uword align_offset,
+ uword * offset_return);
+
+#endif /* included_mem_mheap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/mod_test_hash.c b/src/vppinfra/mod_test_hash.c
new file mode 100644
index 00000000..b3fa676d
--- /dev/null
+++ b/src/vppinfra/mod_test_hash.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/linux_kernel_init.h>
+#include <vppinfra/hash.h>
+
+CLIB_LINUX_KERNEL_MODULE ("test_hash", test_hash_main,
+ /* kernel-thread flags */ 0 & CLONE_KERNEL);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h
new file mode 100644
index 00000000..33300716
--- /dev/null
+++ b/src/vppinfra/os.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001-2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_os_h
+#define included_os_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/types.h>
+
+/* External panic function. */
+void os_panic (void);
+
+/* External exit function analagous to unix exit. */
+void os_exit (int code);
+
+/* External function to print a line. */
+void os_puts (u8 * string, uword length, uword is_error);
+
+/* External function to handle out of memory. */
+void os_out_of_memory (void);
+
+/* Estimate, measure or divine CPU timestamp clock frequency. */
+f64 os_cpu_clock_frequency (void);
+
+extern __thread uword __os_thread_index;
+
+static_always_inline uword
+os_get_thread_index (void)
+{
+ return __os_thread_index;
+}
+
+static_always_inline uword
+os_get_cpu_number (void) __attribute__ ((deprecated));
+
+static_always_inline uword
+os_get_cpu_number (void)
+{
+ return __os_thread_index;
+}
+
+uword os_get_nthreads (void);
+
+#include <vppinfra/smp.h>
+
+#endif /* included_os_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/pfhash.c b/src/vppinfra/pfhash.c
new file mode 100644
index 00000000..3b9fa8f3
--- /dev/null
+++ b/src/vppinfra/pfhash.c
@@ -0,0 +1,689 @@
+/*
+ Copyright (c) 2013 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/pfhash.h>
+#include <vppinfra/format.h>
+
+/* This is incredibly handy when debugging */
+u32 vl (void *v) __attribute__ ((weak));
+u32
+vl (void *v)
+{
+ return vec_len (v);
+}
+
+#if defined(CLIB_HAVE_VEC128) && ! defined (__ALTIVEC__)
+
+typedef struct
+{
+ u8 *key[16];
+ u64 value;
+} pfhash_show_t;
+
+static int
+sh_compare (pfhash_show_t * sh0, pfhash_show_t * sh1)
+{
+ return ((i32) (sh0->value) - ((i32) sh1->value));
+}
+
+u8 *
+format_pfhash (u8 * s, va_list * args)
+{
+ pfhash_t *p = va_arg (*args, pfhash_t *);
+ int verbose = va_arg (*args, int);
+
+ if (p == 0 || p->overflow_hash == 0 || p->buckets == 0)
+ {
+ s = format (s, "*** uninitialized ***");
+ return s;
+ }
+
+ s = format (s, "Prefetch hash '%s'\n", p->name);
+ s =
+ format (s, " %d buckets, %u bucket overflows, %.1f%% bucket overflow \n",
+ vec_len (p->buckets), p->overflow_count,
+ 100.0 * ((f64) p->overflow_count) / ((f64) vec_len (p->buckets)));
+ if (p->nitems)
+ s =
+ format (s,
+ " %u items, %u items in overflow, %.1f%% items in overflow\n",
+ p->nitems, p->nitems_in_overflow,
+ 100.0 * ((f64) p->nitems_in_overflow) / ((f64) p->nitems));
+
+ if (verbose)
+ {
+ pfhash_show_t *shs = 0, *sh;
+ hash_pair_t *hp;
+ int i, j;
+
+ for (i = 0; i < vec_len (p->buckets); i++)
+ {
+ pfhash_kv_t *kv;
+ pfhash_kv_16_t *kv16;
+ pfhash_kv_8_t *kv8;
+ pfhash_kv_8v8_t *kv8v8;
+ pfhash_kv_4_t *kv4;
+
+ if (p->buckets[i] == 0 || p->buckets[i] == PFHASH_BUCKET_OVERFLOW)
+ continue;
+
+ kv = pool_elt_at_index (p->kvp, p->buckets[i]);
+
+ switch (p->key_size)
+ {
+ case 16:
+ kv16 = &kv->kv16;
+ for (j = 0; j < 3; j++)
+ {
+ if (kv16->values[j] != (u32) ~ 0)
+ {
+ vec_add2 (shs, sh, 1);
+ clib_memcpy (sh->key, &kv16->kb.k_u32x4[j],
+ p->key_size);
+ sh->value = kv16->values[j];
+ }
+ }
+ break;
+ case 8:
+ if (p->value_size == 4)
+ {
+ kv8 = &kv->kv8;
+ for (j = 0; j < 5; j++)
+ {
+ if (kv8->values[j] != (u32) ~ 0)
+ {
+ vec_add2 (shs, sh, 1);
+ clib_memcpy (sh->key, &kv8->kb.k_u64[j],
+ p->key_size);
+ sh->value = kv8->values[j];
+ }
+ }
+ }
+ else
+ {
+ kv8v8 = &kv->kv8v8;
+ for (j = 0; j < 4; j++)
+ {
+ if (kv8v8->values[j] != (u64) ~ 0)
+ {
+ vec_add2 (shs, sh, 1);
+ clib_memcpy (sh->key, &kv8v8->kb.k_u64[j],
+ p->key_size);
+ sh->value = kv8v8->values[j];
+ }
+ }
+
+ }
+ break;
+ case 4:
+ kv4 = &kv->kv4;
+ for (j = 0; j < 8; j++)
+ {
+ if (kv4->values[j] != (u32) ~ 0)
+ {
+ vec_add2 (shs, sh, 1);
+ clib_memcpy (sh->key, &kv4->kb.kb[j], p->key_size);
+ sh->value = kv4->values[j];
+ }
+ }
+ break;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, p->overflow_hash,
+ ({
+ vec_add2 (shs, sh, 1);
+ clib_memcpy (sh->key, (u8 *)hp->key, p->key_size);
+ sh->value = hp->value[0];
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (shs, sh_compare);
+
+ for (i = 0; i < vec_len (shs); i++)
+ {
+ sh = vec_elt_at_index (shs, i);
+ s = format (s, " %U value %u\n", format_hex_bytes, sh->key,
+ p->key_size, sh->value);
+ }
+ vec_free (shs);
+ }
+ return s;
+}
+
+
+void abort (void);
+
+void
+pfhash_init (pfhash_t * p, char *name, u32 key_size, u32 value_size,
+ u32 nbuckets)
+{
+ pfhash_kv_t *kv;
+ memset (p, 0, sizeof (*p));
+ u32 key_bytes;
+
+ switch (key_size)
+ {
+ case 4:
+ key_bytes = 4;
+ break;
+ case 8:
+ key_bytes = 8;
+ break;
+ case 16:
+ key_bytes = 16;
+ break;
+ default:
+ ASSERT (0);
+ abort ();
+ }
+
+ switch (value_size)
+ {
+ case 4:
+ case 8:
+ break;
+ default:
+ ASSERT (0);
+ abort ();
+ }
+
+
+ p->name = format (0, "%s", name);
+ vec_add1 (p->name, 0);
+ p->overflow_hash = hash_create_mem (0, key_bytes, sizeof (uword));
+
+ nbuckets = 1 << (max_log2 (nbuckets));
+
+ /* This sets the entire bucket array to zero */
+ vec_validate (p->buckets, nbuckets - 1);
+ p->key_size = key_size;
+ p->value_size = value_size;
+
+ /*
+ * Unset buckets implicitly point at the 0th pool elt.
+ * All search routines will return ~0 if they go there.
+ */
+ pool_get_aligned (p->kvp, kv, 16);
+ memset (kv, 0xff, sizeof (*kv));
+}
+
+static pfhash_kv_16_t *
+pfhash_get_kv_16 (pfhash_t * p, u32 bucket_contents,
+ u32x4 * key, u32 * match_index)
+{
+ u32x4 diff[3];
+ u32 is_equal[3];
+ pfhash_kv_16_t *kv = 0;
+
+ *match_index = (u32) ~ 0;
+
+ kv = &p->kvp[bucket_contents].kv16;
+
+ diff[0] = u32x4_sub (kv->kb.k_u32x4[0], key[0]);
+ diff[1] = u32x4_sub (kv->kb.k_u32x4[1], key[0]);
+ diff[2] = u32x4_sub (kv->kb.k_u32x4[2], key[0]);
+
+ is_equal[0] = u32x4_zero_byte_mask (diff[0]) == 0xffff;
+ is_equal[1] = u32x4_zero_byte_mask (diff[1]) == 0xffff;
+ is_equal[2] = u32x4_zero_byte_mask (diff[2]) == 0xffff;
+
+ if (is_equal[0])
+ *match_index = 0;
+ if (is_equal[1])
+ *match_index = 1;
+ if (is_equal[2])
+ *match_index = 2;
+
+ return kv;
+}
+
+static pfhash_kv_8_t *
+pfhash_get_kv_8 (pfhash_t * p, u32 bucket_contents,
+ u64 * key, u32 * match_index)
+{
+ pfhash_kv_8_t *kv;
+
+ *match_index = (u32) ~ 0;
+
+ kv = &p->kvp[bucket_contents].kv8;
+
+ if (kv->kb.k_u64[0] == key[0])
+ *match_index = 0;
+ if (kv->kb.k_u64[1] == key[0])
+ *match_index = 1;
+ if (kv->kb.k_u64[2] == key[0])
+ *match_index = 2;
+ if (kv->kb.k_u64[3] == key[0])
+ *match_index = 3;
+ if (kv->kb.k_u64[4] == key[0])
+ *match_index = 4;
+
+ return kv;
+}
+
+static pfhash_kv_8v8_t *
+pfhash_get_kv_8v8 (pfhash_t * p,
+ u32 bucket_contents, u64 * key, u32 * match_index)
+{
+ pfhash_kv_8v8_t *kv;
+
+ *match_index = (u32) ~ 0;
+
+ kv = &p->kvp[bucket_contents].kv8v8;
+
+ if (kv->kb.k_u64[0] == key[0])
+ *match_index = 0;
+ if (kv->kb.k_u64[1] == key[0])
+ *match_index = 1;
+ if (kv->kb.k_u64[2] == key[0])
+ *match_index = 2;
+ if (kv->kb.k_u64[3] == key[0])
+ *match_index = 3;
+
+ return kv;
+}
+
+static pfhash_kv_4_t *
+pfhash_get_kv_4 (pfhash_t * p, u32 bucket_contents,
+ u32 * key, u32 * match_index)
+{
+ u32x4 vector_key;
+ u32x4 is_equal[2];
+ u32 zbm[2], winner_index;
+ pfhash_kv_4_t *kv;
+
+ *match_index = (u32) ~ 0;
+
+ kv = &p->kvp[bucket_contents].kv4;
+
+ vector_key = u32x4_splat (key[0]);
+
+ is_equal[0] = u32x4_is_equal (kv->kb.k_u32x4[0], vector_key);
+ is_equal[1] = u32x4_is_equal (kv->kb.k_u32x4[1], vector_key);
+ zbm[0] = ~u32x4_zero_byte_mask (is_equal[0]) & 0xFFFF;
+ zbm[1] = ~u32x4_zero_byte_mask (is_equal[1]) & 0xFFFF;
+
+ if (PREDICT_FALSE ((zbm[0] == 0) && (zbm[1] == 0)))
+ return kv;
+
+ winner_index = min_log2 (zbm[0]) >> 2;
+ winner_index = zbm[1] ? (4 + (min_log2 (zbm[1]) >> 2)) : winner_index;
+
+ *match_index = winner_index;
+ return kv;
+}
+
+static pfhash_kv_t *
+pfhash_get_internal (pfhash_t * p, u32 bucket_contents,
+ void *key, u32 * match_index)
+{
+ pfhash_kv_t *kv = 0;
+
+ switch (p->key_size)
+ {
+ case 16:
+ kv =
+ (pfhash_kv_t *) pfhash_get_kv_16 (p, bucket_contents, key,
+ match_index);
+ break;
+ case 8:
+ if (p->value_size == 4)
+ kv = (pfhash_kv_t *) pfhash_get_kv_8 (p, bucket_contents,
+ key, match_index);
+ else
+ kv = (pfhash_kv_t *) pfhash_get_kv_8v8 (p, bucket_contents,
+ key, match_index);
+ break;
+ case 4:
+ kv =
+ (pfhash_kv_t *) pfhash_get_kv_4 (p, bucket_contents, key,
+ match_index);
+ break;
+ default:
+ ASSERT (0);
+ }
+ return kv;
+}
+
+u64
+pfhash_get (pfhash_t * p, u32 bucket, void *key)
+{
+ pfhash_kv_t *kv;
+ u32 match_index = ~0;
+ pfhash_kv_16_t *kv16;
+ pfhash_kv_8_t *kv8;
+ pfhash_kv_8v8_t *kv8v8;
+ pfhash_kv_4_t *kv4;
+
+ u32 bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+
+ if (bucket_contents == PFHASH_BUCKET_OVERFLOW)
+ {
+ uword *hp;
+
+ hp = hash_get_mem (p->overflow_hash, key);
+ if (hp)
+ return hp[0];
+ return (u64) ~ 0;
+ }
+
+ kv = pfhash_get_internal (p, bucket_contents, key, &match_index);
+ if (match_index == (u32) ~ 0)
+ return (u64) ~ 0;
+
+ kv16 = (void *) kv;
+ kv8 = (void *) kv;
+ kv4 = (void *) kv;
+ kv8v8 = (void *) kv;
+
+ switch (p->key_size)
+ {
+ case 16:
+ return (kv16->values[match_index] == (u32) ~ 0)
+ ? (u64) ~ 0 : (u64) kv16->values[match_index];
+ case 8:
+ if (p->value_size == 4)
+ return (kv8->values[match_index] == (u32) ~ 0)
+ ? (u64) ~ 0 : (u64) kv8->values[match_index];
+ else
+ return kv8v8->values[match_index];
+ case 4:
+ return (kv4->values[match_index] == (u32) ~ 0)
+ ? (u64) ~ 0 : (u64) kv4->values[match_index];
+ default:
+ ASSERT (0);
+ }
+ return (u64) ~ 0;
+}
+
+void
+pfhash_set (pfhash_t * p, u32 bucket, void *key, void *value)
+{
+ u32 bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+ u32 match_index = (u32) ~ 0;
+ pfhash_kv_t *kv;
+ pfhash_kv_16_t *kv16;
+ pfhash_kv_8_t *kv8;
+ pfhash_kv_8v8_t *kv8v8;
+ pfhash_kv_4_t *kv4;
+ int i;
+ u8 *kcopy;
+
+ if (bucket_contents == PFHASH_BUCKET_OVERFLOW)
+ {
+ hash_pair_t *hp;
+ hp = hash_get_pair_mem (p->overflow_hash, key);
+ if (hp)
+ {
+ clib_warning ("replace value 0x%08x with value 0x%08x",
+ hp->value[0], (u64) value);
+ hp->value[0] = (u64) value;
+ return;
+ }
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
+ hash_set_mem (p->overflow_hash, kcopy, value);
+ p->nitems++;
+ p->nitems_in_overflow++;
+ return;
+ }
+
+ if (bucket_contents == 0)
+ {
+ pool_get_aligned (p->kvp, kv, 16);
+ memset (kv, 0xff, sizeof (*kv));
+ p->buckets[bucket] = kv - p->kvp;
+ }
+ else
+ kv = pfhash_get_internal (p, bucket_contents, key, &match_index);
+
+ kv16 = (void *) kv;
+ kv8 = (void *) kv;
+ kv8v8 = (void *) kv;
+ kv4 = (void *) kv;
+
+ p->nitems++;
+
+ if (match_index != (u32) ~ 0)
+ {
+ switch (p->key_size)
+ {
+ case 16:
+ kv16->values[match_index] = (u32) (u64) value;
+ return;
+
+ case 8:
+ if (p->value_size == 4)
+ kv8->values[match_index] = (u32) (u64) value;
+ else
+ kv8v8->values[match_index] = (u64) value;
+ return;
+
+ case 4:
+ kv4->values[match_index] = (u64) value;
+ return;
+
+ default:
+ ASSERT (0);
+ }
+ }
+
+ switch (p->key_size)
+ {
+ case 16:
+ for (i = 0; i < 3; i++)
+ {
+ if (kv16->values[i] == (u32) ~ 0)
+ {
+ clib_memcpy (&kv16->kb.k_u32x4[i], key, p->key_size);
+ kv16->values[i] = (u32) (u64) value;
+ return;
+ }
+ }
+ /* copy bucket contents to overflow hash tbl */
+ for (i = 0; i < 3; i++)
+ {
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, &kv16->kb.k_u32x4[i], p->key_size);
+ hash_set_mem (p->overflow_hash, kcopy, kv16->values[i]);
+ p->nitems_in_overflow++;
+ }
+ /* Add new key to overflow */
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
+ hash_set_mem (p->overflow_hash, kcopy, value);
+ p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
+ p->overflow_count++;
+ p->nitems_in_overflow++;
+ return;
+
+ case 8:
+ if (p->value_size == 4)
+ {
+ for (i = 0; i < 5; i++)
+ {
+ if (kv8->values[i] == (u32) ~ 0)
+ {
+ clib_memcpy (&kv8->kb.k_u64[i], key, 8);
+ kv8->values[i] = (u32) (u64) value;
+ return;
+ }
+ }
+ /* copy bucket contents to overflow hash tbl */
+ for (i = 0; i < 5; i++)
+ {
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, &kv8->kb.k_u64[i], 8);
+ hash_set_mem (p->overflow_hash, kcopy, kv8->values[i]);
+ p->nitems_in_overflow++;
+ }
+ }
+ else
+ {
+ for (i = 0; i < 4; i++)
+ {
+ if (kv8v8->values[i] == (u64) ~ 0)
+ {
+ clib_memcpy (&kv8v8->kb.k_u64[i], key, 8);
+ kv8v8->values[i] = (u64) value;
+ return;
+ }
+ }
+ /* copy bucket contents to overflow hash tbl */
+ for (i = 0; i < 4; i++)
+ {
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, &kv8v8->kb.k_u64[i], 8);
+ hash_set_mem (p->overflow_hash, kcopy, kv8v8->values[i]);
+ p->nitems_in_overflow++;
+ }
+
+ }
+ /* Add new key to overflow */
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
+ hash_set_mem (p->overflow_hash, kcopy, value);
+ p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
+ p->overflow_count++;
+ p->nitems_in_overflow++;
+ return;
+
+ case 4:
+ for (i = 0; i < 8; i++)
+ {
+ if (kv4->values[i] == (u32) ~ 0)
+ {
+ clib_memcpy (&kv4->kb.kb[i], key, 4);
+ kv4->values[i] = (u32) (u64) value;
+ return;
+ }
+ }
+ /* copy bucket contents to overflow hash tbl */
+ for (i = 0; i < 8; i++)
+ {
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, &kv4->kb.kb[i], 4);
+ hash_set_mem (p->overflow_hash, kcopy, kv4->values[i]);
+ p->nitems_in_overflow++;
+ }
+ /* Add new key to overflow */
+ kcopy = clib_mem_alloc (p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
+ hash_set_mem (p->overflow_hash, kcopy, value);
+ p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
+ p->overflow_count++;
+ p->nitems_in_overflow++;
+ return;
+
+ default:
+ ASSERT (0);
+ }
+}
+
+void
+pfhash_unset (pfhash_t * p, u32 bucket, void *key)
+{
+ u32 bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+ u32 match_index = (u32) ~ 0;
+ pfhash_kv_t *kv;
+ pfhash_kv_16_t *kv16;
+ pfhash_kv_8_t *kv8;
+ pfhash_kv_8v8_t *kv8v8;
+ pfhash_kv_4_t *kv4;
+ void *oldkey;
+
+ if (bucket_contents == PFHASH_BUCKET_OVERFLOW)
+ {
+ hash_pair_t *hp;
+ hp = hash_get_pair_mem (p->overflow_hash, key);
+ if (hp)
+ {
+ oldkey = (void *) hp->key;
+ hash_unset_mem (p->overflow_hash, key);
+ clib_mem_free (oldkey);
+ p->nitems--;
+ p->nitems_in_overflow--;
+ }
+ return;
+ }
+
+ kv = pfhash_get_internal (p, bucket_contents, key, &match_index);
+ if (match_index == (u32) ~ 0)
+ return;
+
+ p->nitems--;
+
+ kv16 = (void *) kv;
+ kv8 = (void *) kv;
+ kv8v8 = (void *) kv;
+ kv4 = (void *) kv;
+
+ switch (p->key_size)
+ {
+ case 16:
+ kv16->values[match_index] = (u32) ~ 0;
+ return;
+
+ case 8:
+ if (p->value_size == 4)
+ kv8->values[match_index] = (u32) ~ 0;
+ else
+ kv8v8->values[match_index] = (u64) ~ 0;
+ return;
+
+ case 4:
+ kv4->values[match_index] = (u32) ~ 0;
+ return;
+
+ default:
+ ASSERT (0);
+ }
+}
+
+void
+pfhash_free (pfhash_t * p)
+{
+ hash_pair_t *hp;
+ int i;
+ u8 **keys = 0;
+
+ vec_free (p->name);
+
+ pool_free (p->kvp);
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, p->overflow_hash,
+ ({
+ vec_add1 (keys, (u8 *)hp->key);
+ }));
+ /* *INDENT-ON* */
+ hash_free (p->overflow_hash);
+ for (i = 0; i < vec_len (keys); i++)
+ vec_free (keys[i]);
+ vec_free (keys);
+}
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/pfhash.h b/src/vppinfra/pfhash.h
new file mode 100644
index 00000000..e054c668
--- /dev/null
+++ b/src/vppinfra/pfhash.h
@@ -0,0 +1,276 @@
+/*
+ Copyright (c) 2013 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef included_clib_pfhash_h
+#define included_clib_pfhash_h
+
+
+#include <vppinfra/clib.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+
+#if defined(CLIB_HAVE_VEC128) && ! defined (__ALTIVEC__)
+
+typedef struct
+{
+ /* 3 x 16 = 48 key bytes */
+ union
+ {
+ u32x4 k_u32x4[3];
+ u64 k_u64[6];
+ } kb;
+ /* 3 x 4 = 12 value bytes */
+ u32 values[3];
+ u32 pad;
+} pfhash_kv_16_t;
+
+typedef struct
+{
+ /* 5 x 8 = 40 key bytes */
+ union
+ {
+ u64 k_u64[5];
+ } kb;
+
+ /* 5 x 4 = 20 value bytes */
+ u32 values[5];
+ u32 pad;
+} pfhash_kv_8_t;
+
+typedef struct
+{
+ /* 4 x 8 = 32 key bytes */
+ union
+ {
+ u64 k_u64[4];
+ } kb;
+
+ /* 4 x 8 = 32 value bytes */
+ u64 values[4];
+} pfhash_kv_8v8_t;
+
+typedef struct
+{
+ /* 8 x 4 = 32 key bytes */
+ union
+ {
+ u32x4 k_u32x4[2];
+ u32 kb[8];
+ } kb;
+
+ /* 8 x 4 = 32 value bytes */
+ u32 values[8];
+} pfhash_kv_4_t;
+
+typedef union
+{
+ pfhash_kv_16_t kv16;
+ pfhash_kv_8_t kv8;
+ pfhash_kv_8v8_t kv8v8;
+ pfhash_kv_4_t kv4;
+} pfhash_kv_t;
+
+typedef struct
+{
+ /* Bucket vector */
+ u32 *buckets;
+#define PFHASH_BUCKET_OVERFLOW (u32)~0
+
+ /* Pool of key/value pairs */
+ pfhash_kv_t *kvp;
+
+ /* overflow plain-o-hash */
+ uword *overflow_hash;
+
+ /* Pretty-print name */
+ u8 *name;
+
+ u32 key_size;
+ u32 value_size;
+
+ u32 overflow_count;
+ u32 nitems;
+ u32 nitems_in_overflow;
+} pfhash_t;
+
+void pfhash_init (pfhash_t * p, char *name, u32 key_size, u32 value_size,
+ u32 nbuckets);
+void pfhash_free (pfhash_t * p);
+u64 pfhash_get (pfhash_t * p, u32 bucket, void *key);
+void pfhash_set (pfhash_t * p, u32 bucket, void *key, void *value);
+void pfhash_unset (pfhash_t * p, u32 bucket, void *key);
+
+format_function_t format_pfhash;
+
+static inline void
+pfhash_prefetch_bucket (pfhash_t * p, u32 bucket)
+{
+ CLIB_PREFETCH (&p->buckets[bucket], CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+static inline u32
+pfhash_read_bucket_prefetch_kv (pfhash_t * p, u32 bucket)
+{
+ u32 bucket_contents = p->buckets[bucket];
+ if (PREDICT_TRUE ((bucket_contents & PFHASH_BUCKET_OVERFLOW) == 0))
+ CLIB_PREFETCH (&p->kvp[bucket_contents], CLIB_CACHE_LINE_BYTES, LOAD);
+ return bucket_contents;
+}
+
+/*
+ * pfhash_search_kv_16
+ * See if the supplied 16-byte key matches one of three 16-byte (key,value) pairs.
+ * Return the indicated value, or ~0 if no match
+ *
+ * Note: including the overflow test, the fast path is 35 instrs
+ * on x86_64. Elves will steal your keyboard in the middle of the night if
+ * you "improve" it without checking the generated code!
+ */
+static inline u32
+pfhash_search_kv_16 (pfhash_t * p, u32 bucket_contents, u32x4 * key)
+{
+ u32x4 diff0, diff1, diff2;
+ u32 is_equal0, is_equal1, is_equal2;
+ u32 no_match;
+ pfhash_kv_16_t *kv;
+ u32 rv;
+
+ if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW))
+ {
+ uword *hp;
+ hp = hash_get_mem (p->overflow_hash, key);
+ if (hp)
+ return hp[0];
+ return (u32) ~ 0;
+ }
+
+ kv = &p->kvp[bucket_contents].kv16;
+
+ diff0 = u32x4_sub (kv->kb.k_u32x4[0], key[0]);
+ diff1 = u32x4_sub (kv->kb.k_u32x4[1], key[0]);
+ diff2 = u32x4_sub (kv->kb.k_u32x4[2], key[0]);
+
+ no_match = is_equal0 = (i16) u32x4_zero_byte_mask (diff0);
+ is_equal1 = (i16) u32x4_zero_byte_mask (diff1);
+ no_match |= is_equal1;
+ is_equal2 = (i16) u32x4_zero_byte_mask (diff2);
+ no_match |= is_equal2;
+ /* If any of the three items matched, no_match will be zero after this line */
+ no_match = ~no_match;
+
+ rv = (is_equal0 & kv->values[0])
+ | (is_equal1 & kv->values[1]) | (is_equal2 & kv->values[2]) | no_match;
+
+ return rv;
+}
+
+static inline u32
+pfhash_search_kv_8 (pfhash_t * p, u32 bucket_contents, u64 * key)
+{
+ pfhash_kv_8_t *kv;
+ u32 rv = (u32) ~ 0;
+
+ if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW))
+ {
+ uword *hp;
+ hp = hash_get_mem (p->overflow_hash, key);
+ if (hp)
+ return hp[0];
+ return (u32) ~ 0;
+ }
+
+ kv = &p->kvp[bucket_contents].kv8;
+
+ rv = (kv->kb.k_u64[0] == key[0]) ? kv->values[0] : rv;
+ rv = (kv->kb.k_u64[1] == key[0]) ? kv->values[1] : rv;
+ rv = (kv->kb.k_u64[2] == key[0]) ? kv->values[2] : rv;
+ rv = (kv->kb.k_u64[3] == key[0]) ? kv->values[3] : rv;
+ rv = (kv->kb.k_u64[4] == key[0]) ? kv->values[4] : rv;
+
+ return rv;
+}
+
+static inline u64
+pfhash_search_kv_8v8 (pfhash_t * p, u32 bucket_contents, u64 * key)
+{
+ pfhash_kv_8v8_t *kv;
+ u64 rv = (u64) ~ 0;
+
+ if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW))
+ {
+ uword *hp;
+ hp = hash_get_mem (p->overflow_hash, key);
+ if (hp)
+ return hp[0];
+ return (u64) ~ 0;
+ }
+
+ kv = &p->kvp[bucket_contents].kv8v8;
+
+ rv = (kv->kb.k_u64[0] == key[0]) ? kv->values[0] : rv;
+ rv = (kv->kb.k_u64[1] == key[0]) ? kv->values[1] : rv;
+ rv = (kv->kb.k_u64[2] == key[0]) ? kv->values[2] : rv;
+ rv = (kv->kb.k_u64[3] == key[0]) ? kv->values[3] : rv;
+
+ return rv;
+}
+
+static inline u32
+pfhash_search_kv_4 (pfhash_t * p, u32 bucket_contents, u32 * key)
+{
+ u32x4 vector_key;
+ u32x4 is_equal[2];
+ u32 zbm[2], winner_index;
+ pfhash_kv_4_t *kv;
+
+ if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW))
+ {
+ uword *hp;
+ hp = hash_get_mem (p->overflow_hash, key);
+ if (hp)
+ return hp[0];
+ return (u32) ~ 0;
+ }
+
+ kv = &p->kvp[bucket_contents].kv4;
+
+ vector_key = u32x4_splat (key[0]);
+
+ is_equal[0] = u32x4_is_equal (kv->kb.k_u32x4[0], vector_key);
+ is_equal[1] = u32x4_is_equal (kv->kb.k_u32x4[1], vector_key);
+ zbm[0] = ~u32x4_zero_byte_mask (is_equal[0]) & 0xFFFF;
+ zbm[1] = ~u32x4_zero_byte_mask (is_equal[1]) & 0xFFFF;
+
+ if (PREDICT_FALSE ((zbm[0] == 0) && (zbm[1] == 0)))
+ return (u32) ~ 0;
+
+ winner_index = min_log2 (zbm[0]) >> 2;
+ winner_index = zbm[1] ? (4 + (min_log2 (zbm[1]) >> 2)) : winner_index;
+
+ return kv->values[winner_index];
+}
+
+#endif /* CLIB_HAVE_VEC128 */
+
+#endif /* included_clib_pfhash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/phash.c b/src/vppinfra/phash.c
new file mode 100644
index 00000000..14da5225
--- /dev/null
+++ b/src/vppinfra/phash.c
@@ -0,0 +1,1017 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* This is all stolen from Bob Jenkins and reworked for clib. Thanks
+ once again Bob for the great work. */
+
+/*
+------------------------------------------------------------------------------
+perfect.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996, December 1999
+You may use this code in any way you wish, and it is free. No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.c
+
+This generates a minimal perfect hash function. That means, given a
+set of n keys, this determines a hash function that maps each of
+those keys into a value in 0..n-1 with no collisions.
+
+The perfect hash function first uses a normal hash function on the key
+to determine (a,b) such that the pair (a,b) is distinct for all
+keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
+tab[] is an array of 1-byte values and scramble[] is a 256-term array of
+2-byte or 4-byte values. If there are n keys, the length of tab[] is a
+power of two between n/3 and n.
+
+I found the idea of computing distinct (a,b) values in "Practical minimal
+perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
+Communications of the ACM, January 1992. They found the idea in Chichelli
+(CACM Jan 1980). Beyond that, our methods differ.
+
+The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
+0..*blen*-1. A fast hash function determines both a and b
+simultaneously. Any decent hash function is likely to produce
+hashes so that (a,b) is distinct for all pairs. I try the hash
+using different values of *salt* until all pairs are distinct.
+
+The final hash is (a XOR scramble[tab[b]]). *scramble* is a
+predetermined mapping of 0..255 into 0..smax-1. *tab* is an
+array that we fill in in such a way as to make the hash perfect.
+
+First we fill in all values of *tab* that are used by more than one
+key. We try all possible values for each position until one works.
+
+This leaves m unmapped keys and m values that something could hash to.
+If you treat unmapped keys as lefthand nodes and unused hash values
+as righthand nodes, and draw a line connecting each key to each hash
+value it could map to, you get a bipartite graph. We attempt to
+find a perfect matching in this graph. If we succeed, we have
+determined a perfect hash for the whole set of keys.
+
+*scramble* is used because (a^tab[i]) clusters keys around *a*.
+------------------------------------------------------------------------------
+*/
+
+#include <vppinfra/bitmap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/phash.h>
+#include <vppinfra/random.h>
+
+static void
+init_keys_direct_u32 (phash_main_t * pm)
+{
+ int n_keys_left, b_mask, a_shift;
+ u32 seed;
+ phash_key_t *k;
+
+ seed = pm->hash_seed;
+ b_mask = (1 << pm->b_bits) - 1;
+ a_shift = BITS (seed) - pm->a_bits;
+
+ k = pm->keys;
+ n_keys_left = vec_len (pm->keys);
+
+ while (n_keys_left >= 2)
+ {
+ u32 x0, y0, z0;
+ u32 x1, y1, z1;
+
+ x0 = y0 = z0 = seed;
+ x1 = y1 = z1 = seed;
+ x0 += (u32) k[0].key;
+ x1 += (u32) k[1].key;
+
+ hash_mix32 (x0, y0, z0);
+ hash_mix32 (x1, y1, z1);
+
+ k[0].b = z0 & b_mask;
+ k[1].b = z1 & b_mask;
+ k[0].a = z0 >> a_shift;
+ k[1].a = z1 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = k[1].a = 0;
+
+ k += 2;
+ n_keys_left -= 2;
+ }
+
+ if (n_keys_left >= 1)
+ {
+ u32 x0, y0, z0;
+
+ x0 = y0 = z0 = seed;
+ x0 += k[0].key;
+
+ hash_mix32 (x0, y0, z0);
+
+ k[0].b = z0 & b_mask;
+ k[0].a = z0 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = 0;
+
+ k += 1;
+ n_keys_left -= 1;
+ }
+}
+
+static void
+init_keys_direct_u64 (phash_main_t * pm)
+{
+ int n_keys_left, b_mask, a_shift;
+ u64 seed;
+ phash_key_t *k;
+
+ seed = pm->hash_seed;
+ b_mask = (1 << pm->b_bits) - 1;
+ a_shift = BITS (seed) - pm->a_bits;
+
+ k = pm->keys;
+ n_keys_left = vec_len (pm->keys);
+
+ while (n_keys_left >= 2)
+ {
+ u64 x0, y0, z0;
+ u64 x1, y1, z1;
+
+ x0 = y0 = z0 = seed;
+ x1 = y1 = z1 = seed;
+ x0 += (u64) k[0].key;
+ x1 += (u64) k[1].key;
+
+ hash_mix64 (x0, y0, z0);
+ hash_mix64 (x1, y1, z1);
+
+ k[0].b = z0 & b_mask;
+ k[1].b = z1 & b_mask;
+ k[0].a = z0 >> a_shift;
+ k[1].a = z1 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = k[1].a = 0;
+
+ k += 2;
+ n_keys_left -= 2;
+ }
+
+ if (n_keys_left >= 1)
+ {
+ u64 x0, y0, z0;
+
+ x0 = y0 = z0 = seed;
+ x0 += k[0].key;
+
+ hash_mix64 (x0, y0, z0);
+
+ k[0].b = z0 & b_mask;
+ k[0].a = z0 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = 0;
+
+ k += 1;
+ n_keys_left -= 1;
+ }
+}
+
+static void
+init_keys_indirect_u32 (phash_main_t * pm)
+{
+ int n_keys_left, b_mask, a_shift;
+ u32 seed;
+ phash_key_t *k;
+
+ seed = pm->hash_seed;
+ b_mask = (1 << pm->b_bits) - 1;
+ a_shift = BITS (seed) - pm->a_bits;
+
+ k = pm->keys;
+ n_keys_left = vec_len (pm->keys);
+
+ while (n_keys_left >= 2)
+ {
+ u32 xyz[6];
+ u32 x0, y0, z0;
+ u32 x1, y1, z1;
+
+ pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz);
+
+ x0 = y0 = z0 = seed;
+ x1 = y1 = z1 = seed;
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+ x1 += xyz[3];
+ y1 += xyz[4];
+ z1 += xyz[5];
+
+ hash_mix32 (x0, y0, z0);
+ hash_mix32 (x1, y1, z1);
+
+ k[0].b = z0 & b_mask;
+ k[1].b = z1 & b_mask;
+ k[0].a = z0 >> a_shift;
+ k[1].a = z1 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = k[1].a = 0;
+
+ k += 2;
+ n_keys_left -= 2;
+ }
+
+ if (n_keys_left >= 1)
+ {
+ u32 xyz[3];
+ u32 x0, y0, z0;
+
+ pm->key_seed1 (pm->private, k[0].key, &xyz);
+
+ x0 = y0 = z0 = seed;
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+
+ hash_mix32 (x0, y0, z0);
+
+ k[0].b = z0 & b_mask;
+ k[0].a = z0 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = 0;
+
+ k += 1;
+ n_keys_left -= 1;
+ }
+}
+
+static void
+init_keys_indirect_u64 (phash_main_t * pm)
+{
+ int n_keys_left, b_mask, a_shift;
+ u64 seed;
+ phash_key_t *k;
+
+ seed = pm->hash_seed;
+ b_mask = (1 << pm->b_bits) - 1;
+ a_shift = BITS (seed) - pm->a_bits;
+
+ k = pm->keys;
+ n_keys_left = vec_len (pm->keys);
+
+ while (n_keys_left >= 2)
+ {
+ u64 xyz[6];
+ u64 x0, y0, z0;
+ u64 x1, y1, z1;
+
+ pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz);
+
+ x0 = y0 = z0 = seed;
+ x1 = y1 = z1 = seed;
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+ x1 += xyz[3];
+ y1 += xyz[4];
+ z1 += xyz[5];
+
+ hash_mix64 (x0, y0, z0);
+ hash_mix64 (x1, y1, z1);
+
+ k[0].b = z0 & b_mask;
+ k[1].b = z1 & b_mask;
+ k[0].a = z0 >> a_shift;
+ k[1].a = z1 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = k[1].a = 0;
+
+ k += 2;
+ n_keys_left -= 2;
+ }
+
+ if (n_keys_left >= 1)
+ {
+ u64 xyz[3];
+ u64 x0, y0, z0;
+
+ pm->key_seed1 (pm->private, k[0].key, &xyz);
+
+ x0 = y0 = z0 = seed;
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+
+ hash_mix64 (x0, y0, z0);
+
+ k[0].b = z0 & b_mask;
+ k[0].a = z0 >> a_shift;
+ if (PREDICT_FALSE (a_shift >= BITS (z0)))
+ k[0].a = 0;
+
+ k += 1;
+ n_keys_left -= 1;
+ }
+}
+
+/*
+ * insert keys into table according to key->b
+ * check if the initial hash might work
+ */
+static int
+init_tabb (phash_main_t * pm)
+{
+ int no_collisions;
+ phash_tabb_t *tb;
+ phash_key_t *k, *l;
+
+ if (pm->key_seed1)
+ {
+ if (pm->flags & PHASH_FLAG_MIX64)
+ init_keys_indirect_u64 (pm);
+ else
+ init_keys_indirect_u32 (pm);
+ }
+ else
+ {
+ if (pm->flags & PHASH_FLAG_MIX64)
+ init_keys_direct_u64 (pm);
+ else
+ init_keys_direct_u32 (pm);
+ }
+
+ if (!pm->tabb)
+ vec_resize (pm->tabb, 1 << pm->b_bits);
+ else
+ vec_foreach (tb, pm->tabb) phash_tabb_free (tb);
+
+ /* Two keys with the same (a,b) guarantees a collision */
+ no_collisions = 1;
+ vec_foreach (k, pm->keys)
+ {
+ u32 i, *ki;
+
+ tb = pm->tabb + k->b;
+ ki = tb->keys;
+ for (i = 0; i < vec_len (ki); i++)
+ {
+ l = pm->keys + ki[i];
+ if (k->a == l->a)
+ {
+ /* Given keys are supposed to be unique. */
+ if (pm->key_is_equal
+ && pm->key_is_equal (pm->private, l->key, k->key))
+ clib_error ("duplicate keys");
+ no_collisions = 0;
+ goto done;
+ }
+ }
+
+ vec_add1 (tb->keys, k - pm->keys);
+ }
+
+done:
+ return no_collisions;
+}
+
+/* Try to apply an augmenting list */
+static int
+apply (phash_main_t * pm, u32 tail, u32 rollback)
+{
+ phash_key_t *k;
+ phash_tabb_t *pb;
+ phash_tabq_t *q_child, *q_parent;
+ u32 ki, i, hash, child, parent;
+ u32 stabb; /* scramble[tab[b]] */
+ int no_collision;
+
+ no_collision = 1;
+
+ /* Walk from child to parent until root is reached. */
+ for (child = tail - 1; child; child = parent)
+ {
+ q_child = &pm->tabq[child];
+ parent = q_child->parent_q;
+ q_parent = &pm->tabq[parent];
+
+ /* find parent's list of siblings */
+ ASSERT (q_parent->b_q < vec_len (pm->tabb));
+ pb = pm->tabb + q_parent->b_q;
+
+ /* erase old hash values */
+ stabb = pm->scramble[pb->val_b];
+ for (i = 0; i < vec_len (pb->keys); i++)
+ {
+ ki = pb->keys[i];
+ k = pm->keys + ki;
+ hash = k->a ^ stabb;
+
+ /* Erase hash for all of child's siblings. */
+ if (ki == pm->tabh[hash])
+ pm->tabh[hash] = ~0;
+ }
+
+ /* change pb->val_b, which will change the hashes of all parent siblings */
+ pb->val_b = rollback ? q_child->oldval_q : q_child->newval_q;
+
+ /* set new hash values */
+ stabb = pm->scramble[pb->val_b];
+ for (i = 0; i < vec_len (pb->keys); i++)
+ {
+ ki = pb->keys[i];
+ k = pm->keys + ki;
+
+ hash = k->a ^ stabb;
+ if (rollback)
+ {
+ if (parent == 0)
+ continue; /* root never had a hash */
+ }
+ else if (pm->tabh[hash] != ~0)
+ {
+ /* Very rare case: roll back any changes. */
+ apply (pm, tail, /* rollback changes */ 1);
+ no_collision = 0;
+ goto done;
+ }
+ pm->tabh[hash] = ki;
+ }
+ }
+
+done:
+ return no_collision;
+}
+
+
+/*
+-------------------------------------------------------------------------------
+augment(): Add item to the mapping.
+
+Construct a spanning tree of *b*s with *item* as root, where each
+parent can have all its hashes changed (by some new val_b) with
+at most one collision, and each child is the b of that collision.
+
+I got this from Tarjan's "Data Structures and Network Algorithms". The
+path from *item* to a *b* that can be remapped with no collision is
+an "augmenting path". Change values of tab[b] along the path so that
+the unmapped key gets mapped and the unused hash value gets used.
+
+Assuming 1 key per b, if m out of n hash values are still unused,
+you should expect the transitive closure to cover n/m nodes before
+an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect
+this approach to take about nlogn time to map all single-key b's.
+-------------------------------------------------------------------------------
+
+high_water: a value higher than any now in tabb[].water_b.
+*/
+static int
+augment (phash_main_t * pm, u32 b_root, u32 high_water)
+{
+ u32 q; /* current position walking through the queue */
+ u32 tail; /* tail of the queue. 0 is the head of the queue. */
+ phash_tabb_t *tb_parent, *tb_child, *tb_hit;
+ phash_key_t *k_parent, *k_child;
+ u32 v, v_limit; /* possible value for myb->val_b */
+ u32 i, ki, hash;
+
+ v_limit =
+ 1 << ((pm->flags & PHASH_FLAG_USE_SCRAMBLE) ? pm->s_bits : BITS (u8));
+
+ /* Initialize the root of the spanning tree. */
+ pm->tabq[0].b_q = b_root;
+ tail = 1;
+
+ /* construct the spanning tree by walking the queue, add children to tail */
+ for (q = 0; q < tail; q++)
+ {
+ if ((pm->flags & PHASH_FLAG_FAST_MODE)
+ && !(pm->flags & PHASH_FLAG_MINIMAL) && q == 1)
+ break; /* don't do transitive closure */
+
+ tb_parent = pm->tabb + pm->tabq[q].b_q; /* the b for this node */
+
+ for (v = 0; v < v_limit; v++)
+ {
+ tb_child = 0;
+
+ for (i = 0; i < vec_len (tb_parent->keys); i++)
+ {
+ ki = tb_parent->keys[i];
+ k_parent = pm->keys + ki;
+
+ hash = k_parent->a ^ pm->scramble[v];
+ if (hash >= pm->hash_max)
+ goto try_next_v; /* hash code out of bounds => we can't use this v */
+
+ ki = pm->tabh[hash];
+ if (ki == ~0)
+ continue;
+
+ k_child = pm->keys + ki;
+ tb_hit = pm->tabb + k_child->b;
+
+ if (tb_child)
+ {
+ /* Hit at most one child b. */
+ if (tb_child == tb_hit)
+ goto try_next_v;
+ }
+ else
+ {
+ /* Remember this as child b. */
+ tb_child = tb_hit;
+ if (tb_hit->water_b == high_water)
+ goto try_next_v; /* already explored */
+ }
+ }
+
+ /* tb_parent with v has either one or zero collisions. */
+
+ /* add childb to the queue of reachable things */
+ if (tb_child)
+ tb_child->water_b = high_water;
+ pm->tabq[tail].b_q = tb_child ? tb_child - pm->tabb : ~0;
+ pm->tabq[tail].newval_q = v; /* how to make parent (myb) use this hash */
+ pm->tabq[tail].oldval_q = tb_parent->val_b; /* need this for rollback */
+ pm->tabq[tail].parent_q = q;
+ ++tail;
+
+ /* Found a v with no collisions? */
+ if (!tb_child)
+ {
+ /* Try to apply the augmenting path. */
+ if (apply (pm, tail, /* rollback */ 0))
+ return 1; /* success, item was added to the perfect hash */
+ --tail; /* don't know how to handle such a child! */
+ }
+
+ try_next_v:
+ ;
+ }
+ }
+ return 0;
+}
+
+
+static phash_tabb_t *sort_tabb;
+
+static int
+phash_tabb_compare (void *a1, void *a2)
+{
+ u32 *b1 = a1;
+ u32 *b2 = a2;
+ phash_tabb_t *tb1, *tb2;
+
+ tb1 = sort_tabb + b1[0];
+ tb2 = sort_tabb + b2[0];
+
+ return ((int) vec_len (tb2->keys) - (int) vec_len (tb1->keys));
+}
+
+/* find a mapping that makes this a perfect hash */
+static int
+perfect (phash_main_t * pm)
+{
+ u32 i;
+
+ /* clear any state from previous attempts */
+ if (vec_bytes (pm->tabh))
+ memset (pm->tabh, ~0, vec_bytes (pm->tabh));
+
+ vec_validate (pm->tabb_sort, vec_len (pm->tabb) - 1);
+ for (i = 0; i < vec_len (pm->tabb_sort); i++)
+ pm->tabb_sort[i] = i;
+
+ sort_tabb = pm->tabb;
+
+ vec_sort_with_function (pm->tabb_sort, phash_tabb_compare);
+
+ /* In descending order by number of keys, map all *b*s */
+ for (i = 0; i < vec_len (pm->tabb_sort); i++)
+ {
+ if (!augment (pm, pm->tabb_sort[i], i + 1))
+ return 0;
+ }
+
+ /* Success! We found a perfect hash of all keys into 0..nkeys-1. */
+ return 1;
+}
+
+
+/*
+ * Find initial a_bits = log2 (a_max), b_bits = log2 (b_max).
+ * Initial a_max and b_max values were found empirically. Some factors:
+ *
+ * If s_max<256 there is no scramble, so tab[b] needs to cover 0..s_max-1.
+ *
+ * a_max and b_max must be powers of 2 because the values in 0..a_max-1 and
+ * 0..b_max-1 are produced by applying a bitmask to the initial hash function.
+ *
+ * a_max must be less than s_max, in fact less than n_keys, because otherwise
+ * there would often be no i such that a^scramble[i] is in 0..n_keys-1 for
+ * all the *a*s associated with a given *b*, so there would be no legal
+ * value to assign to tab[b]. This only matters when we're doing a minimal
+ * perfect hash.
+ *
+ * It takes around 800 trials to find distinct (a,b) with nkey=s_max*(5/8)
+ * and a_max*b_max = s_max*s_max/32.
+ *
+ * Values of b_max less than s_max/4 never work, and s_max/2 always works.
+ *
+ * We want b_max as small as possible because it is the number of bytes in
+ * the huge array we must create for the perfect hash.
+ *
+ * When nkey <= s_max*(5/8), b_max=s_max/4 works much more often with
+ * a_max=s_max/8 than with a_max=s_max/4. Above s_max*(5/8), b_max=s_max/4
+ * doesn't seem to care whether a_max=s_max/8 or a_max=s_max/4. I think it
+ * has something to do with 5/8 = 1/8 * 5. For example examine 80000,
+ * 85000, and 90000 keys with different values of a_max. This only matters
+ * if we're doing a minimal perfect hash.
+ *
+ * When a_max*b_max <= 1<<U32BITS, the initial hash must produce one integer.
+ * Bigger than that it must produce two integers, which increases the
+ * cost of the hash per character hashed.
+ */
+static void
+guess_initial_parameters (phash_main_t * pm)
+{
+ u32 s_bits, s_max, a_max, b_max, n_keys;
+ int is_minimal, is_fast_mode;
+ const u32 b_max_use_scramble_threshold = 4096;
+
+ is_minimal = (pm->flags & PHASH_FLAG_MINIMAL) != 0;
+ is_fast_mode = (pm->flags & PHASH_FLAG_FAST_MODE) != 0;
+
+ n_keys = vec_len (pm->keys);
+ s_bits = max_log2 (n_keys);
+ s_max = 1 << s_bits;
+ a_max = 0;
+
+ if (is_minimal)
+ {
+ switch (s_bits)
+ {
+ case 0:
+ a_max = 1;
+ b_max = 1;
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ /*
+ * Was: a_max = is_minimal ? s_max / 2 : s_max;
+ * However, we know that is_minimal must be true, so the
+ * if-arm of the ternary expression is always executed.
+ */
+ a_max = s_max / 2;
+ b_max = s_max / 2;
+ break;
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ case 17:
+ if (is_fast_mode)
+ {
+ a_max = s_max / 2;
+ b_max = s_max / 4;
+ }
+ else if (s_max / 4 < b_max_use_scramble_threshold)
+ {
+ if (n_keys <= s_max * 0.52)
+ a_max = b_max = s_max / 8;
+ else
+ a_max = b_max = s_max / 4;
+ }
+ else
+ {
+ a_max = ((n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 :
+ (n_keys <=
+ s_max * (3.0 / 4.0)) ? s_max / 4 : s_max / 2);
+ b_max = s_max / 4; /* always give the small size a shot */
+ }
+ break;
+ case 18:
+ if (is_fast_mode)
+ a_max = b_max = s_max / 2;
+ else
+ {
+ a_max = s_max / 8; /* never require the multiword hash */
+ b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2;
+ }
+ break;
+ case 19:
+ case 20:
+ a_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 : s_max / 2;
+ b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2;
+ break;
+ default:
+ /* Just find a hash as quick as possible.
+ We'll be thrashing virtual memory at this size. */
+ a_max = b_max = s_max / 2;
+ break;
+ }
+ }
+ else
+ {
+ /* Non-minimal perfect hash. */
+ if (is_fast_mode && n_keys > s_max * 0.8)
+ {
+ s_max *= 2;
+ s_bits += 1;
+ }
+
+ if (s_max / 4 <= (1 << 14))
+ b_max = ((n_keys <= s_max * 0.56) ? s_max / 32 :
+ (n_keys <= s_max * 0.74) ? s_max / 16 : s_max / 8);
+ else
+ b_max = ((n_keys <= s_max * 0.6) ? s_max / 16 :
+ (n_keys <= s_max * 0.8) ? s_max / 8 : s_max / 4);
+
+ if (is_fast_mode && b_max < s_max / 8)
+ b_max = s_max / 8;
+
+ if (a_max < 1)
+ a_max = 1;
+ if (b_max < 1)
+ b_max = 1;
+ }
+
+ ASSERT (s_max == (1 << s_bits));
+ ASSERT (is_pow2 (a_max));
+ ASSERT (is_pow2 (b_max));
+ pm->s_bits = s_bits;
+ pm->a_bits = min_log2 (a_max);
+ pm->b_bits = min_log2 (b_max);
+ if (b_max >= b_max_use_scramble_threshold)
+ pm->flags |= PHASH_FLAG_USE_SCRAMBLE;
+}
+
+/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
+/* permute(0)=0. This is intended and useful. */
+always_inline u32
+scramble_permute (u32 x, u32 nbits)
+{
+ int i;
+ int mask = (1 << nbits) - 1;
+ int const2 = 1 + nbits / 2;
+ int const3 = 1 + nbits / 3;
+ int const4 = 1 + nbits / 4;
+ int const5 = 1 + nbits / 5;
+ for (i = 0; i < 20; i++)
+ {
+ x = (x + (x << const2)) & mask;
+ x = (x ^ (x >> const3));
+ x = (x + (x << const4)) & mask;
+ x = (x ^ (x >> const5));
+ }
+ return x;
+}
+
+/* initialize scramble[] with distinct random values in 0..smax-1 */
+static void
+scramble_init (phash_main_t * pm)
+{
+ u32 i;
+
+ /* fill scramble[] with distinct random integers in 0..smax-1 */
+ vec_validate (pm->scramble, (1 << (pm->s_bits < 8 ? 8 : pm->s_bits)) - 1);
+ for (i = 0; i < vec_len (pm->scramble); i++)
+ pm->scramble[i] = scramble_permute (i, pm->s_bits);
+}
+
+/* Try to find a perfect hash function. */
+clib_error_t *
+phash_find_perfect_hash (phash_main_t * pm)
+{
+ clib_error_t *error = 0;
+ u32 max_a_bits, n_tries_this_a_b, want_minimal;
+
+ /* guess initial values for s_max, a_max and b_max */
+ guess_initial_parameters (pm);
+
+ want_minimal = pm->flags & PHASH_FLAG_MINIMAL;
+
+new_s:
+ if (pm->b_bits == 0)
+ pm->a_bits = pm->s_bits;
+
+ max_a_bits = pm->s_bits - want_minimal;
+ if (max_a_bits < 1)
+ max_a_bits = 1;
+
+ pm->hash_max = want_minimal ? vec_len (pm->keys) : (1 << pm->s_bits);
+
+ scramble_init (pm);
+
+ /* Allocate working memory. */
+ vec_free (pm->tabh);
+ vec_validate_init_empty (pm->tabh, pm->hash_max - 1, ~0);
+ vec_free (pm->tabq);
+ vec_validate (pm->tabq, 1 << pm->b_bits);
+
+ /* Actually find the perfect hash */
+ n_tries_this_a_b = 0;
+ while (1)
+ {
+ /* Choose random hash seeds until keys become unique. */
+ pm->hash_seed = random_u64 (&pm->random_seed);
+ pm->n_seed_trials++;
+ if (init_tabb (pm))
+ {
+ /* Found unique (A, B). */
+
+ /* Hash may already be perfect. */
+ if (pm->b_bits == 0)
+ goto done;
+
+ pm->n_perfect_calls++;
+ if (perfect (pm))
+ goto done;
+
+ goto increase_b;
+ }
+
+ /* Keep trying with different seed value. */
+ n_tries_this_a_b++;
+ if (n_tries_this_a_b < 2048)
+ continue;
+
+ /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
+ if (pm->a_bits < max_a_bits)
+ pm->a_bits++;
+ else if (pm->b_bits < pm->s_bits)
+ {
+ increase_b:
+ vec_resize (pm->tabb, vec_len (pm->tabb));
+ vec_resize (pm->tabq, vec_len (pm->tabq));
+ pm->b_bits++;
+ }
+ else
+ {
+ /* Can't increase (A, B) any more, so try increasing S. */
+ goto new_s;
+ }
+ }
+
+done:
+ /* Construct mapping table for hash lookups. */
+ if (!error)
+ {
+ u32 b, v;
+
+ pm->a_shift = ((pm->flags & PHASH_FLAG_MIX64) ? 64 : 32) - pm->a_bits;
+ pm->b_mask = (1 << pm->b_bits) - 1;
+
+ vec_resize (pm->tab, vec_len (pm->tabb));
+ for (b = 0; b < vec_len (pm->tabb); b++)
+ {
+ v = pm->tabb[b].val_b;
+
+ /* Apply scramble now for small enough value of b_bits. */
+ if (!(pm->flags & PHASH_FLAG_USE_SCRAMBLE))
+ v = pm->scramble[v];
+
+ pm->tab[b] = v;
+ }
+ }
+
+ /* Free working memory. */
+ phash_main_free_working_memory (pm);
+
+ return error;
+}
+
+/* Slow hash computation for general keys. */
+uword
+phash_hash_slow (phash_main_t * pm, uword key)
+{
+ u32 a, b, v;
+
+ if (pm->flags & PHASH_FLAG_MIX64)
+ {
+ u64 x0, y0, z0;
+
+ x0 = y0 = z0 = pm->hash_seed;
+
+ if (pm->key_seed1)
+ {
+ u64 xyz[3];
+ pm->key_seed1 (pm->private, key, &xyz);
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+ }
+ else
+ x0 += key;
+
+ hash_mix64 (x0, y0, z0);
+
+ a = z0 >> pm->a_shift;
+ b = z0 & pm->b_mask;
+ }
+ else
+ {
+ u32 x0, y0, z0;
+
+ x0 = y0 = z0 = pm->hash_seed;
+
+ if (pm->key_seed1)
+ {
+ u32 xyz[3];
+ pm->key_seed1 (pm->private, key, &xyz);
+ x0 += xyz[0];
+ y0 += xyz[1];
+ z0 += xyz[2];
+ }
+ else
+ x0 += key;
+
+ hash_mix32 (x0, y0, z0);
+
+ a = z0 >> pm->a_shift;
+ b = z0 & pm->b_mask;
+ }
+
+ v = pm->tab[b];
+ if (pm->flags & PHASH_FLAG_USE_SCRAMBLE)
+ v = pm->scramble[v];
+ return a ^ v;
+}
+
+/* Verify that perfect hash is perfect. */
+clib_error_t *
+phash_validate (phash_main_t * pm)
+{
+ phash_key_t *k;
+ uword *unique_bitmap = 0;
+ clib_error_t *error = 0;
+
+ vec_foreach (k, pm->keys)
+ {
+ uword h = phash_hash_slow (pm, k->key);
+
+ if (h >= pm->hash_max)
+ {
+ error = clib_error_return (0, "hash out of range %wd", h);
+ goto done;
+ }
+
+ if (clib_bitmap_get (unique_bitmap, h))
+ {
+ error = clib_error_return (0, "hash non-unique");
+ goto done;
+ }
+
+ unique_bitmap = clib_bitmap_ori (unique_bitmap, h);
+ }
+
+done:
+ clib_bitmap_free (unique_bitmap);
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/phash.h b/src/vppinfra/phash.h
new file mode 100644
index 00000000..746a0fdd
--- /dev/null
+++ b/src/vppinfra/phash.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_phash_h
+#define included_phash_h
+
+#include <vppinfra/hash.h> /* for Bob's mixing functions */
+
+typedef struct
+{
+ /* Maybe either pointer to vector or inline word. */
+ uword key;
+
+ /* Hash code (A, B). */
+ u32 a, b;
+} phash_key_t;
+
+/* Table indexed by B. */
+typedef struct
+{
+ /* Vector of key indices with this same value of B. */
+ u32 *keys;
+
+ /* hash=a^tabb[b].val_b */
+ u32 val_b;
+
+ /* High watermark of who has visited this map node. */
+ u32 water_b;
+} phash_tabb_t;
+
+always_inline void
+phash_tabb_free (phash_tabb_t * b)
+{
+ vec_free (b->keys);
+ b->val_b = b->water_b = 0;
+}
+
+typedef struct
+{
+ /* b that currently occupies this hash */
+ u32 b_q;
+
+ /* Queue position of parent that could use this hash. */
+ u32 parent_q;
+
+ /* What to change parent tab[b] to use this hash. */
+ u32 newval_q;
+
+ /* Original value of tab[b]. */
+ u32 oldval_q;
+} phash_tabq_t;
+
+typedef struct
+{
+ u8 a_bits, b_bits, s_bits, a_shift;
+ u32 b_mask;
+ u32 *tab;
+ u32 *scramble;
+
+ /* Seed value for hash mixer. */
+ u64 hash_seed;
+
+ u32 flags;
+
+ /* Key functions want 64 bit keys.
+ Use hash_mix64 rather than hash_mix32. */
+#define PHASH_FLAG_MIX64 (1 << 0)
+#define PHASH_FLAG_MIX32 (0 << 0)
+
+ /* When b_bits is large enough (>= 12) we scramble. */
+#define PHASH_FLAG_USE_SCRAMBLE (1 << 1)
+
+ /* Slow mode gives smaller tables but at the expense of more run time. */
+#define PHASH_FLAG_SLOW_MODE (0 << 2)
+#define PHASH_FLAG_FAST_MODE (1 << 2)
+
+ /* Generate minimal perfect hash instead of perfect hash. */
+#define PHASH_FLAG_NON_MINIMAL (0 << 3)
+#define PHASH_FLAG_MINIMAL (1 << 3)
+
+ /* vec_len (keys) for minimal hash;
+ 1 << s_bits for non-minimal hash. */
+ u32 hash_max;
+
+ /* Vector of keys. */
+ phash_key_t *keys;
+
+ /* Used by callbacks to identify keys. */
+ void *private;
+
+ /* Key comparison callback. */
+ int (*key_is_equal) (void *private, uword key1, uword key2);
+
+ /* Callback to reduce single key -> hash seeds. */
+ void (*key_seed1) (void *private, uword key, void *seed);
+
+ /* Callback to reduce two key2 -> hash seeds. */
+ void (*key_seed2) (void *private, uword key1, uword key2, void *seed);
+
+ /* Stuff used to compute perfect hash. */
+ u32 random_seed;
+
+ /* Stuff indexed by B. */
+ phash_tabb_t *tabb;
+
+ /* Table of B ordered by number of keys in tabb[b]. */
+ u32 *tabb_sort;
+
+ /* Unique key (or ~0 if none) for a given hash
+ H = A ^ scramble[tab[B].val_b]. */
+ u32 *tabh;
+
+ /* Stuff indexed by q. */
+ phash_tabq_t *tabq;
+
+ /* Stats. */
+ u32 n_seed_trials, n_perfect_calls;
+} phash_main_t;
+
+always_inline void
+phash_main_free_working_memory (phash_main_t * pm)
+{
+ vec_free (pm->tabb);
+ vec_free (pm->tabq);
+ vec_free (pm->tabh);
+ vec_free (pm->tabb_sort);
+ if (!(pm->flags & PHASH_FLAG_USE_SCRAMBLE))
+ vec_free (pm->scramble);
+}
+
+always_inline void
+phash_main_free (phash_main_t * pm)
+{
+ phash_main_free_working_memory (pm);
+ vec_free (pm->tab);
+ vec_free (pm->keys);
+ memset (pm, 0, sizeof (pm[0]));
+}
+
+/* Slow hash computation for general keys. */
+uword phash_hash_slow (phash_main_t * pm, uword key);
+
+/* Main routine to compute perfect hash. */
+clib_error_t *phash_find_perfect_hash (phash_main_t * pm);
+
+/* Validates that hash is indeed perfect. */
+clib_error_t *phash_validate (phash_main_t * pm);
+
+/* Unit test. */
+int phash_test_main (unformat_input_t * input);
+
+#endif /* included_phash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/pipeline.h b/src/vppinfra/pipeline.h
new file mode 100644
index 00000000..5a9799b4
--- /dev/null
+++ b/src/vppinfra/pipeline.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pipeline.h: software pipeline infrastructure
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_clib_pipeline_h
+#define included_clib_pipeline_h
+
+#define clib_pipeline_stage(F,TYPE,ARG,I,BODY) \
+ always_inline void F##_inline (void * _, u32 I) \
+ { TYPE ARG = _; { BODY; } } \
+ never_inline void F##_no_inline (TYPE ARG, u32 I) \
+ { F##_inline (ARG, I); }
+
+#define clib_pipeline_stage_static(F,TYPE,ARG,I,BODY) \
+ static_always_inline void F##_inline (void * _, u32 I) \
+ { TYPE ARG = _; { BODY; } } \
+ never_inline void F##_no_inline (TYPE ARG, u32 I) \
+ { F##_inline (ARG, I); }
+
+#define clib_pipeline_stage_no_inline(F,TYPE,ARG,I,BODY) \
+ never_inline void F##_no_inline (void * _, u32 I) \
+ { TYPE ARG = _; { BODY; } } \
+ never_inline void F##_inline (TYPE ARG, u32 I) \
+ { F##_no_inline (ARG, I); }
+
+#define _clib_pipeline_var(v) _clib_pipeline_##v
+
+#define clib_pipeline_stage_execute(F,A,I,S) \
+ F##_##S (A, _clib_pipeline_var(i) - (I))
+
+#define clib_pipeline_main_stage(F,A,I) \
+ clib_pipeline_stage_execute (F, A, I, inline)
+#define clib_pipeline_init_stage(F,A,I) \
+ if (_clib_pipeline_var(i) >= (I)) clib_pipeline_stage_execute (F, A, I, no_inline)
+#define clib_pipeline_exit_stage(F,A,I) \
+ if (_clib_pipeline_var(i) >= (I) && _clib_pipeline_var(i) - (I) < _clib_pipeline_var(n_vectors)) \
+ clib_pipeline_stage_execute (F, A, I, no_inline)
+
+#define clib_pipeline_init_loop \
+ for (_clib_pipeline_var(i) = 0; \
+ _clib_pipeline_var(i) < \
+ clib_min (_clib_pipeline_var(n_stages) - 1, \
+ _clib_pipeline_var(n_vectors)); \
+ _clib_pipeline_var(i)++)
+
+#define clib_pipeline_main_loop \
+ for (; _clib_pipeline_var(i) < _clib_pipeline_var(n_vectors); \
+ _clib_pipeline_var(i)++)
+
+#define clib_pipeline_exit_loop \
+ for (; _clib_pipeline_var(i) < (_clib_pipeline_var(n_vectors) \
+ + _clib_pipeline_var(n_stages) - 1); \
+ _clib_pipeline_var(i)++)
+
+#define clib_pipeline_run_2_stage(N,ARG,STAGE0,STAGE1) \
+do { \
+ uword _clib_pipeline_var(n_vectors) = (N); \
+ uword _clib_pipeline_var(n_stages) = 2; \
+ uword _clib_pipeline_var(i); \
+ \
+ clib_pipeline_init_loop \
+ { \
+ clib_pipeline_init_stage (STAGE0, ARG, 0); \
+ } \
+ \
+ clib_pipeline_main_loop \
+ { \
+ clib_pipeline_main_stage (STAGE0, ARG, 0); \
+ clib_pipeline_main_stage (STAGE1, ARG, 1); \
+ } \
+ \
+ clib_pipeline_exit_loop \
+ { \
+ clib_pipeline_exit_stage (STAGE1, ARG, 1); \
+ } \
+} while (0)
+
+#define clib_pipeline_run_3_stage(N,ARG,STAGE0,STAGE1,STAGE2) \
+do { \
+ uword _clib_pipeline_var(n_vectors) = (N); \
+ uword _clib_pipeline_var(n_stages) = 3; \
+ uword _clib_pipeline_var(i); \
+ \
+ clib_pipeline_init_loop \
+ { \
+ clib_pipeline_init_stage (STAGE0, ARG, 0); \
+ clib_pipeline_init_stage (STAGE1, ARG, 1); \
+ } \
+ \
+ clib_pipeline_main_loop \
+ { \
+ clib_pipeline_main_stage (STAGE0, ARG, 0); \
+ clib_pipeline_main_stage (STAGE1, ARG, 1); \
+ clib_pipeline_main_stage (STAGE2, ARG, 2); \
+ } \
+ \
+ clib_pipeline_exit_loop \
+ { \
+ clib_pipeline_exit_stage (STAGE1, ARG, 1); \
+ clib_pipeline_exit_stage (STAGE2, ARG, 2); \
+ } \
+} while (0)
+
+#define clib_pipeline_run_4_stage(N,ARG,STAGE0,STAGE1,STAGE2,STAGE3) \
+do { \
+ uword _clib_pipeline_var(n_vectors) = (N); \
+ uword _clib_pipeline_var(n_stages) = 4; \
+ uword _clib_pipeline_var(i); \
+ \
+ clib_pipeline_init_loop \
+ { \
+ clib_pipeline_init_stage (STAGE0, ARG, 0); \
+ clib_pipeline_init_stage (STAGE1, ARG, 1); \
+ clib_pipeline_init_stage (STAGE2, ARG, 2); \
+ } \
+ \
+ clib_pipeline_main_loop \
+ { \
+ clib_pipeline_main_stage (STAGE0, ARG, 0); \
+ clib_pipeline_main_stage (STAGE1, ARG, 1); \
+ clib_pipeline_main_stage (STAGE2, ARG, 2); \
+ clib_pipeline_main_stage (STAGE3, ARG, 3); \
+ } \
+ \
+ clib_pipeline_exit_loop \
+ { \
+ clib_pipeline_exit_stage (STAGE1, ARG, 1); \
+ clib_pipeline_exit_stage (STAGE2, ARG, 2); \
+ clib_pipeline_exit_stage (STAGE3, ARG, 3); \
+ } \
+} while (0)
+
+#endif /* included_clib_pipeline_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c
new file mode 100644
index 00000000..ed83b41a
--- /dev/null
+++ b/src/vppinfra/pool.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003, 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/pool.h>
+
+void
+_pool_init_fixed (void **pool_ptr, u32 elt_size, u32 max_elts)
+{
+ u8 *mmap_base;
+ u64 vector_size;
+ u64 free_index_size;
+ u64 total_size;
+ u64 page_size;
+ pool_header_t *fh;
+ vec_header_t *vh;
+ u8 *v;
+ u32 *fi;
+ u32 i;
+ u32 set_bits;
+
+ ASSERT (elt_size);
+ ASSERT (max_elts);
+
+ vector_size = pool_aligned_header_bytes + vec_header_bytes (0)
+ + (u64) elt_size *max_elts;
+
+ free_index_size = vec_header_bytes (0) + sizeof (u32) * max_elts;
+
+ /* Round up to a cache line boundary */
+ vector_size = (vector_size + CLIB_CACHE_LINE_BYTES - 1)
+ & ~(CLIB_CACHE_LINE_BYTES - 1);
+
+ free_index_size = (free_index_size + CLIB_CACHE_LINE_BYTES - 1)
+ & ~(CLIB_CACHE_LINE_BYTES - 1);
+
+ total_size = vector_size + free_index_size;
+
+ /* Round up to an even number of pages */
+ page_size = clib_mem_get_page_size ();
+ total_size = (total_size + page_size - 1) & ~(page_size - 1);
+
+ /* mmap demand zero memory */
+
+ mmap_base = mmap (0, total_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (mmap_base == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ *pool_ptr = 0;
+ }
+
+ /* First comes the pool header */
+ fh = (pool_header_t *) mmap_base;
+ /* Find the user vector pointer */
+ v = (u8 *) (mmap_base + pool_aligned_header_bytes);
+ /* Finally, the vector header */
+ vh = _vec_find (v);
+
+ fh->free_bitmap = 0; /* No free elts (yet) */
+ fh->max_elts = max_elts;
+ fh->mmap_base = mmap_base;
+ fh->mmap_size = total_size;
+
+ vh->len = max_elts;
+
+ /* Build the free-index vector */
+ vh = (vec_header_t *) (v + vector_size);
+ vh->len = max_elts;
+ fi = (u32 *) (vh + 1);
+
+ fh->free_indices = fi;
+
+ /* Set the entire free bitmap */
+ clib_bitmap_alloc (fh->free_bitmap, max_elts);
+ memset (fh->free_bitmap, 0xff, vec_len (fh->free_bitmap) * sizeof (uword));
+
+ /* Clear any extraneous set bits */
+ set_bits = vec_len (fh->free_bitmap) * BITS (uword);
+
+ for (i = max_elts; i < set_bits; i++)
+ fh->free_bitmap = clib_bitmap_set (fh->free_bitmap, i, 0);
+
+ /* Create the initial free vector */
+ for (i = 0; i < max_elts; i++)
+ fi[i] = (max_elts - 1) - i;
+
+ *pool_ptr = v;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h
new file mode 100644
index 00000000..62d5b54e
--- /dev/null
+++ b/src/vppinfra/pool.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003, 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/** @file
+ * @brief Fixed length block allocator.
+ Pools are built from clib vectors and bitmaps. Use pools when
+ repeatedly allocating and freeing fixed-size data. Pools are
+ fast, and avoid memory fragmentation.
+ */
+
+#ifndef included_pool_h
+#define included_pool_h
+
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/mheap.h>
+
+
+typedef struct
+{
+ /** Bitmap of indices of free objects. */
+ uword *free_bitmap;
+
+ /** Vector of free indices. One element for each set bit in bitmap. */
+ u32 *free_indices;
+
+ /* The following fields are set for fixed-size, preallocated pools */
+
+ /** Maximum size of the pool, in elements */
+ u32 max_elts;
+
+ /** mmap segment info: base + length */
+ u8 *mmap_base;
+ u64 mmap_size;
+
+} pool_header_t;
+
+/** Align pool header so that pointers are naturally aligned. */
+#define pool_aligned_header_bytes \
+ vec_aligned_header_bytes (sizeof (pool_header_t), sizeof (void *))
+
+/** Get pool header from user pool pointer */
+always_inline pool_header_t *
+pool_header (void *v)
+{
+ return vec_aligned_header (v, sizeof (pool_header_t), sizeof (void *));
+}
+
+extern void _pool_init_fixed (void **, u32, u32);
+extern void fpool_free (void *);
+
+/** initialize a fixed-size, preallocated pool */
+#define pool_init_fixed(pool,max_elts) \
+{ \
+ _pool_init_fixed((void **)&(pool),sizeof(pool[0]),max_elts); \
+}
+
+/** Validate a pool */
+always_inline void
+pool_validate (void *v)
+{
+ pool_header_t *p = pool_header (v);
+ uword i, n_free_bitmap;
+
+ if (!v)
+ return;
+
+ n_free_bitmap = clib_bitmap_count_set_bits (p->free_bitmap);
+ ASSERT (n_free_bitmap == vec_len (p->free_indices));
+ for (i = 0; i < vec_len (p->free_indices); i++)
+ ASSERT (clib_bitmap_get (p->free_bitmap, p->free_indices[i]) == 1);
+}
+
+always_inline void
+pool_header_validate_index (void *v, uword index)
+{
+ pool_header_t *p = pool_header (v);
+
+ if (v)
+ vec_validate (p->free_bitmap, index / BITS (uword));
+}
+
+#define pool_validate_index(v,i) \
+do { \
+ uword __pool_validate_index = (i); \
+ vec_validate_ha ((v), __pool_validate_index, \
+ pool_aligned_header_bytes, /* align */ 0); \
+ pool_header_validate_index ((v), __pool_validate_index); \
+} while (0)
+
+/** Number of active elements in a pool.
+ * @return Number of active elements in a pool
+ */
+always_inline uword
+pool_elts (void *v)
+{
+ uword ret = vec_len (v);
+ if (v)
+ ret -= vec_len (pool_header (v)->free_indices);
+ return ret;
+}
+
+/** Number of elements in pool vector.
+
+ @note You probably want to call pool_elts() instead.
+*/
+#define pool_len(p) vec_len(p)
+
+/** Number of elements in pool vector (usable as an lvalue)
+
+ @note You probably don't want to use this macro.
+*/
+#define _pool_len(p) _vec_len(p)
+
+/** Memory usage of pool header. */
+always_inline uword
+pool_header_bytes (void *v)
+{
+ pool_header_t *p = pool_header (v);
+
+ if (!v)
+ return 0;
+
+ return vec_bytes (p->free_bitmap) + vec_bytes (p->free_indices);
+}
+
+/** Memory usage of pool. */
+#define pool_bytes(P) (vec_bytes (P) + pool_header_bytes (P))
+
+/** Local variable naming macro. */
+#define _pool_var(v) _pool_##v
+
+/** Queries whether pool has at least N_FREE free elements. */
+always_inline uword
+pool_free_elts (void *v)
+{
+ pool_header_t *p = pool_header (v);
+ uword n_free = 0;
+
+ if (v)
+ {
+ n_free += vec_len (p->free_indices);
+
+ /* Space left at end of vector? */
+ n_free += vec_capacity (v, sizeof (p[0])) - vec_len (v);
+ }
+
+ return n_free;
+}
+
+/** Allocate an object E from a pool P (general version).
+
+ First search free list. If nothing is free extend vector of objects.
+*/
+#define pool_get_aligned(P,E,A) \
+do { \
+ pool_header_t * _pool_var (p) = pool_header (P); \
+ uword _pool_var (l); \
+ \
+ _pool_var (l) = 0; \
+ if (P) \
+ _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
+ \
+ if (_pool_var (l) > 0) \
+ { \
+ /* Return free element from free list. */ \
+ uword _pool_var (i) = _pool_var (p)->free_indices[_pool_var (l) - 1]; \
+ (E) = (P) + _pool_var (i); \
+ _pool_var (p)->free_bitmap = \
+ clib_bitmap_andnoti (_pool_var (p)->free_bitmap, _pool_var (i)); \
+ _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \
+ } \
+ else \
+ { \
+ /* fixed-size, preallocated pools cannot expand */ \
+ if ((P) && _pool_var(p)->max_elts) \
+ { \
+ clib_warning ("can't expand fixed-size pool"); \
+ os_out_of_memory(); \
+ } \
+ /* Nothing on free list, make a new element and return it. */ \
+ P = _vec_resize (P, \
+ /* length_increment */ 1, \
+ /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
+ pool_aligned_header_bytes, \
+ /* align */ (A)); \
+ E = vec_end (P) - 1; \
+ } \
+} while (0)
+
+/** Allocate an object E from a pool P (unspecified alignment). */
+#define pool_get(P,E) pool_get_aligned(P,E,0)
+
+/** See if pool_get will expand the pool or not */
+#define pool_get_aligned_will_expand(P,YESNO,A) \
+do { \
+ pool_header_t * _pool_var (p) = pool_header (P); \
+ uword _pool_var (l); \
+ \
+ _pool_var (l) = 0; \
+ if (P) \
+ { \
+ if (_pool_var (p)->max_elts) \
+ return 0; \
+ _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
+ } \
+ \
+ /* Free elements, certainly won't expand */ \
+ if (_pool_var (l) > 0) \
+ YESNO=0; \
+ else \
+ { \
+ /* Nothing on free list, make a new element and return it. */ \
+ YESNO = _vec_resize_will_expand \
+ (P, \
+ /* length_increment */ 1, \
+ /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
+ pool_aligned_header_bytes, \
+ /* align */ (A)); \
+ } \
+} while (0)
+
+#define pool_get_will_expand(P,YESNO) pool_get_aligned_will_expand(P,YESNO,0)
+
+/** Use free bitmap to query whether given element is free. */
+#define pool_is_free(P,E) \
+({ \
+ pool_header_t * _pool_var (p) = pool_header (P); \
+ uword _pool_var (i) = (E) - (P); \
+ (_pool_var (i) < vec_len (P)) ? clib_bitmap_get (_pool_var (p)->free_bitmap, _pool_i) : 1; \
+})
+
+/** Use free bitmap to query whether given index is free */
+#define pool_is_free_index(P,I) pool_is_free((P),(P)+(I))
+
+/** Free an object E in pool P. */
+#define pool_put(P,E) \
+do { \
+ pool_header_t * _pool_var (p) = pool_header (P); \
+ uword _pool_var (l) = (E) - (P); \
+ ASSERT (vec_is_member (P, E)); \
+ ASSERT (! pool_is_free (P, E)); \
+ \
+ /* Add element to free bitmap and to free list. */ \
+ _pool_var (p)->free_bitmap = \
+ clib_bitmap_ori (_pool_var (p)->free_bitmap, _pool_var (l)); \
+ /* Preallocated pool? */ \
+ if (_pool_var (p)->max_elts) \
+ { \
+ ASSERT(_pool_var(l) < _pool_var (p)->max_elts); \
+ _pool_var(p)->free_indices[_vec_len(_pool_var(p)->free_indices)] = \
+ _pool_var(l); \
+ _vec_len(_pool_var(p)->free_indices) += 1; \
+ } \
+ else \
+ vec_add1 (_pool_var (p)->free_indices, _pool_var (l)); \
+} while (0)
+
+/** Free pool element with given index. */
+#define pool_put_index(p,i) \
+do { \
+ typeof (p) _e = (p) + (i); \
+ pool_put (p, _e); \
+} while (0)
+
+/** Allocate N more free elements to pool (general version). */
+#define pool_alloc_aligned(P,N,A) \
+do { \
+ pool_header_t * _p; \
+ \
+ if ((P)) \
+ { \
+ _p = pool_header (P); \
+ if (_p->max_elts) \
+ { \
+ clib_warning ("Can't expand fixed-size pool"); \
+ os_out_of_memory(); \
+ } \
+ } \
+ \
+ (P) = _vec_resize ((P), 0, (vec_len (P) + (N)) * sizeof (P[0]), \
+ pool_aligned_header_bytes, \
+ (A)); \
+ _p = pool_header (P); \
+ vec_resize (_p->free_indices, (N)); \
+ _vec_len (_p->free_indices) -= (N); \
+} while (0)
+
+/** Allocate N more free elements to pool (unspecified alignment). */
+#define pool_alloc(P,N) pool_alloc_aligned(P,N,0)
+
+/** Low-level free pool operator (do not call directly). */
+always_inline void *
+_pool_free (void *v)
+{
+ pool_header_t *p = pool_header (v);
+ if (!v)
+ return v;
+ clib_bitmap_free (p->free_bitmap);
+
+ if (p->max_elts)
+ {
+ int rv;
+
+ rv = munmap (p->mmap_base, p->mmap_size);
+ if (rv)
+ clib_unix_warning ("munmap");
+ }
+ else
+ {
+ vec_free (p->free_indices);
+ vec_free_h (v, pool_aligned_header_bytes);
+ }
+ return 0;
+}
+
+/** Free a pool. */
+#define pool_free(p) (p) = _pool_free(p)
+
+/** Optimized iteration through pool.
+
+ @param LO pointer to first element in chunk
+ @param HI pointer to last element in chunk
+ @param POOL pool to iterate across
+ @param BODY operation to perform
+
+ Optimized version which assumes that BODY is smart enough to
+ process multiple (LOW,HI) chunks. See also pool_foreach().
+ */
+#define pool_foreach_region(LO,HI,POOL,BODY) \
+do { \
+ uword _pool_var (i), _pool_var (lo), _pool_var (hi), _pool_var (len); \
+ uword _pool_var (bl), * _pool_var (b); \
+ pool_header_t * _pool_var (p); \
+ \
+ _pool_var (p) = pool_header (POOL); \
+ _pool_var (b) = (POOL) ? _pool_var (p)->free_bitmap : 0; \
+ _pool_var (bl) = vec_len (_pool_var (b)); \
+ _pool_var (len) = vec_len (POOL); \
+ _pool_var (lo) = 0; \
+ \
+ for (_pool_var (i) = 0; \
+ _pool_var (i) <= _pool_var (bl); \
+ _pool_var (i)++) \
+ { \
+ uword _pool_var (m), _pool_var (f); \
+ _pool_var (m) = (_pool_var (i) < _pool_var (bl) \
+ ? _pool_var (b) [_pool_var (i)] \
+ : 1); \
+ while (_pool_var (m) != 0) \
+ { \
+ _pool_var (f) = first_set (_pool_var (m)); \
+ _pool_var (hi) = (_pool_var (i) * BITS (_pool_var (b)[0]) \
+ + min_log2 (_pool_var (f))); \
+ _pool_var (hi) = (_pool_var (i) < _pool_var (bl) \
+ ? _pool_var (hi) : _pool_var (len)); \
+ _pool_var (m) ^= _pool_var (f); \
+ if (_pool_var (hi) > _pool_var (lo)) \
+ { \
+ (LO) = _pool_var (lo); \
+ (HI) = _pool_var (hi); \
+ do { BODY; } while (0); \
+ } \
+ _pool_var (lo) = _pool_var (hi) + 1; \
+ } \
+ } \
+} while (0)
+
+/** Iterate through pool.
+
+ @param VAR A variable of same type as pool vector to be used as an
+ iterator.
+ @param POOL The pool to iterate across.
+ @param BODY The operation to perform, typically a code block. See
+ the example below.
+
+ This macro will call @c BODY with each active pool element.
+
+ It is a bad idea to allocate or free pool element from within
+ @c pool_foreach. Build a vector of indices and dispose of them later.
+ Or call pool_flush.
+
+
+ @par Example
+ @code{.c}
+ proc_t *procs; // a pool of processes.
+ proc_t *proc; // pointer to one process; used as the iterator.
+
+ pool_foreach (proc, procs, ({
+ if (proc->state != PROC_STATE_RUNNING)
+ continue;
+
+ // check a running proc in some way
+ ...
+ }));
+ @endcode
+
+ @warning Because @c pool_foreach is a macro, syntax errors can be
+ difficult to find inside @c BODY, let alone actual code bugs. One
+ can temporarily split a complex @c pool_foreach into a trivial
+ @c pool_foreach which builds a vector of active indices, and a
+ vec_foreach() (or plain for-loop) to walk the active index vector.
+ */
+#define pool_foreach(VAR,POOL,BODY) \
+do { \
+ uword _pool_foreach_lo, _pool_foreach_hi; \
+ pool_foreach_region (_pool_foreach_lo, _pool_foreach_hi, (POOL), \
+ ({ \
+ for ((VAR) = (POOL) + _pool_foreach_lo; \
+ (VAR) < (POOL) + _pool_foreach_hi; \
+ (VAR)++) \
+ do { BODY; } while (0); \
+ })); \
+} while (0)
+
+/** Returns pointer to element at given index.
+
+ ASSERTs that the supplied index is valid.
+ Even though one can write correct code of the form
+ @code
+ p = pool_base + index;
+ @endcode
+ use of @c pool_elt_at_index is strongly suggested.
+ */
+#define pool_elt_at_index(p,i) \
+({ \
+ typeof (p) _e = (p) + (i); \
+ ASSERT (! pool_is_free (p, _e)); \
+ _e; \
+})
+
+/** Return next occupied pool index after @c i, useful for safe iteration. */
+#define pool_next_index(P,I) \
+({ \
+ pool_header_t * _pool_var (p) = pool_header (P); \
+ uword _pool_var (rv) = (I) + 1; \
+ \
+ _pool_var(rv) = \
+ (_pool_var (rv) < vec_len (P) ? \
+ clib_bitmap_next_clear (_pool_var (p)->free_bitmap, _pool_var(rv)) \
+ : ~0); \
+ _pool_var(rv); \
+})
+
+/** Iterate pool by index. */
+#define pool_foreach_index(i,v,body) \
+ for ((i) = 0; (i) < vec_len (v); (i)++) \
+ { \
+ if (! pool_is_free_index ((v), (i))) \
+ do { body; } while (0); \
+ }
+
+/**
+ * @brief Remove all elemenets from a pool in a safe way
+ *
+ * @param VAR each element in the pool
+ * @param POOL The pool to flush
+ * @param BODY The actions to perform on each element before it is returned to
+ * the pool. i.e. before it is 'freed'
+ */
+#define pool_flush(VAR, POOL, BODY) \
+{ \
+ uword *_pool_var(ii), *_pool_var(dv) = NULL; \
+ \
+ pool_foreach((VAR), (POOL), \
+ ({ \
+ vec_add1(_pool_var(dv), (VAR) - (POOL)); \
+ })); \
+ vec_foreach(_pool_var(ii), _pool_var(dv)) \
+ { \
+ (VAR) = pool_elt_at_index((POOL), *_pool_var(ii)); \
+ do { BODY; } while (0); \
+ pool_put((POOL), (VAR)); \
+ } \
+ vec_free(_pool_var(dv)); \
+}
+
+#endif /* included_pool_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/ptclosure.c b/src/vppinfra/ptclosure.c
new file mode 100644
index 00000000..cda873ef
--- /dev/null
+++ b/src/vppinfra/ptclosure.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/ptclosure.h>
+
+u8 **
+clib_ptclosure_alloc (int n)
+{
+ u8 **rv = 0;
+ u8 *row;
+ int i;
+
+ ASSERT (n > 0);
+
+ vec_validate (rv, n - 1);
+ for (i = 0; i < n; i++)
+ {
+ row = 0;
+ vec_validate (row, n - 1);
+
+ rv[i] = row;
+ }
+ return rv;
+}
+
+void
+clib_ptclosure_free (u8 ** ptc)
+{
+ u8 *row;
+ int n = vec_len (ptc);
+ int i;
+
+ ASSERT (n > 0);
+
+ for (i = 0; i < n; i++)
+ {
+ row = ptc[i];
+ vec_free (row);
+ }
+ vec_free (ptc);
+}
+
+void
+clib_ptclosure_copy (u8 ** dst, u8 ** src)
+{
+ int i, n;
+ u8 *src_row, *dst_row;
+
+ n = vec_len (dst);
+
+ for (i = 0; i < vec_len (dst); i++)
+ {
+ src_row = src[i];
+ dst_row = dst[i];
+ clib_memcpy (dst_row, src_row, n);
+ }
+}
+
+/*
+ * compute the positive transitive closure
+ * of a relation via Warshall's algorithm.
+ *
+ * Ref:
+ * Warshall, Stephen (January 1962). "A theorem on Boolean matrices".
+ * Journal of the ACM 9 (1): 11–12.
+ *
+ * foo[i][j] = 1 means that item i
+ * "bears the relation" to item j.
+ *
+ * For example: "item i must be before item j"
+ *
+ * You could use a bitmap, but since the algorithm is
+ * O(n**3) in the first place, large N is inadvisable...
+ *
+ */
+
+u8 **
+clib_ptclosure (u8 ** orig)
+{
+ int i, j, k;
+ int n;
+ u8 **prev, **cur;
+
+ n = vec_len (orig);
+ prev = clib_ptclosure_alloc (n);
+ cur = clib_ptclosure_alloc (n);
+
+ clib_ptclosure_copy (prev, orig);
+
+ for (k = 0; k < n; k++)
+ {
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ cur[i][j] = prev[i][j] || (prev[i][k] && prev[k][j]);
+ }
+ }
+ clib_ptclosure_copy (prev, cur);
+ }
+ clib_ptclosure_free (prev);
+ return cur;
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/ptclosure.h b/src/vppinfra/ptclosure.h
new file mode 100644
index 00000000..ee1609a1
--- /dev/null
+++ b/src/vppinfra/ptclosure.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_clib_ptclosure_h
+#define included_clib_ptclosure_h
+
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+/*
+ * set r[i][j] if item i "bears the relation to" item j
+ *
+ */
+
+u8 **clib_ptclosure_alloc (int n);
+void clib_ptclosure_free (u8 ** ptc);
+void clib_ptclosure_copy (u8 ** dst, u8 ** src);
+u8 **clib_ptclosure (u8 ** orig);
+
+#endif /* included_clib_ptclosure_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/qhash.c b/src/vppinfra/qhash.c
new file mode 100644
index 00000000..f4e38c4a
--- /dev/null
+++ b/src/vppinfra/qhash.c
@@ -0,0 +1,858 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2006 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/qhash.h>
+
+#define QHASH_ALL_VALID ((1 << QHASH_KEYS_PER_BUCKET) - 1)
+
+void *
+_qhash_resize (void *v, uword length, uword elt_bytes)
+{
+ qhash_t *h;
+ uword l;
+
+ l = clib_max (max_log2 (length), 2 + QHASH_LOG2_KEYS_PER_BUCKET);
+
+ /* Round up if less than 1/2 full. */
+ l += ((f64) length / (f64) (1 << l)) < .5;
+
+ v = _vec_resize (0, 1 << l, elt_bytes << l, sizeof (h[0]),
+ /* align */ sizeof (uword));
+
+ h = qhash_header (v);
+ h->n_elts = 0;
+ h->log2_hash_size = l;
+ h->hash_keys =
+ clib_mem_alloc_aligned_no_fail (sizeof (h->hash_keys[0]) << l,
+ CLIB_CACHE_LINE_BYTES);
+ vec_resize (h->hash_key_valid_bitmap,
+ 1 << (l - QHASH_LOG2_KEYS_PER_BUCKET));
+ memset (v, ~0, elt_bytes << l);
+
+ return v;
+}
+
+static u8 min_log2_table[256];
+
+static inline uword
+qhash_min_log2 (uword x)
+{
+ ASSERT (is_pow2 (x));
+ ASSERT (x < 256);
+ return min_log2_table[x];
+}
+
+static void
+qhash_min_log2_init ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ min_log2_table[i] = min_log2 (i);
+}
+
+always_inline uword
+qhash_get_valid_elt_mask (qhash_t * h, uword i)
+{
+ return h->hash_key_valid_bitmap[i / QHASH_KEYS_PER_BUCKET];
+}
+
+always_inline void
+qhash_set_valid_elt_mask (qhash_t * h, uword i, uword mask)
+{
+ h->hash_key_valid_bitmap[i / QHASH_KEYS_PER_BUCKET] = mask;
+}
+
+always_inline uword
+qhash_search_bucket (uword * hash_keys, uword search_key, uword m)
+{
+ uword t;
+#define _(i) ((hash_keys[i] == search_key) << i)
+ t = (_(0) | _(1) | _(2) | _(3));
+ if (QHASH_KEYS_PER_BUCKET > 4)
+ t |= (_(4) | _(5) | _(6) | _(7));
+ if (QHASH_KEYS_PER_BUCKET > 8)
+ t |= (_(8) | _(9) | _(10) | _(11) | _(12) | _(13) | _(14) | _(15));
+#undef _
+ return m & t;
+}
+
+/* Lookup multiple keys in the same hash table. */
+void
+qhash_get_multiple (void *v,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices)
+{
+ qhash_t *h = qhash_header (v);
+ uword *k, *hash_keys;
+ uword n_left, bucket_mask;
+ u32 *r;
+
+ if (!v)
+ {
+ memset (result_indices, ~0, sizeof (result_indices[0]) * n_search_keys);
+ return;
+ }
+
+ bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1);
+
+ k = search_keys;
+ n_left = n_search_keys;
+ hash_keys = h->hash_keys;
+ r = result_indices;
+
+ while (n_left >= 2)
+ {
+ u32 a0, b0, c0, bi0, valid0, match0;
+ u32 a1, b1, c1, bi1, valid1, match1;
+ uword k0, k1, *h0, *h1;
+
+ k0 = k[0];
+ k1 = k[1];
+ n_left -= 2;
+ k += 2;
+
+ a0 = a1 = h->hash_seeds[0];
+ b0 = b1 = h->hash_seeds[1];
+ c0 = c1 = h->hash_seeds[2];
+ a0 ^= k0;
+ a1 ^= k1;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+ b1 ^= k1 >> 32;
+#endif
+
+ hash_mix32_step_1 (a0, b0, c0);
+ hash_mix32_step_1 (a1, b1, c1);
+ hash_mix32_step_2 (a0, b0, c0);
+ hash_mix32_step_2 (a1, b1, c1);
+ hash_mix32_step_3 (a0, b0, c0);
+ hash_mix32_step_3 (a1, b1, c1);
+
+ bi0 = c0 & bucket_mask;
+ bi1 = c1 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+ h1 = hash_keys + bi1;
+
+ /* Search two buckets. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ valid1 = qhash_get_valid_elt_mask (h, bi1);
+
+ match0 = qhash_search_bucket (h0, k0, valid0);
+ match1 = qhash_search_bucket (h1, k1, valid1);
+
+ bi0 += qhash_min_log2 (match0);
+ bi1 += qhash_min_log2 (match1);
+
+ r[0] = match0 ? bi0 : ~0;
+ r[1] = match1 ? bi1 : ~0;
+ r += 2;
+
+ /* Full buckets trigger search of overflow hash. */
+ if (PREDICT_FALSE (!match0 && valid0 == QHASH_ALL_VALID))
+ {
+ uword *p = hash_get (h->overflow_hash, k0);
+ r[-2] = p ? p[0] : ~0;
+ }
+
+ /* Full buckets trigger search of overflow hash. */
+ if (PREDICT_FALSE (!match1 && valid1 == QHASH_ALL_VALID))
+ {
+ uword *p = hash_get (h->overflow_hash, k1);
+ r[-1] = p ? p[0] : ~0;
+ }
+ }
+
+ while (n_left >= 1)
+ {
+ u32 a0, b0, c0, bi0, valid0, match0;
+ uword k0, *h0;
+
+ k0 = k[0];
+ n_left -= 1;
+ k += 1;
+
+ a0 = h->hash_seeds[0];
+ b0 = h->hash_seeds[1];
+ c0 = h->hash_seeds[2];
+ a0 ^= k0;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+#endif
+
+ hash_mix32 (a0, b0, c0);
+
+ bi0 = c0 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+
+ /* Search one bucket. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ match0 = qhash_search_bucket (h0, k0, valid0);
+
+ bi0 += qhash_min_log2 (match0);
+
+ r[0] = match0 ? bi0 : ~0;
+ r += 1;
+
+ /* Full buckets trigger search of overflow hash. */
+ if (PREDICT_FALSE (!match0 && valid0 == QHASH_ALL_VALID))
+ {
+ uword *p = hash_get (h->overflow_hash, k0);
+ r[-1] = p ? p[0] : ~0;
+ }
+ }
+}
+
+/* Lookup multiple keys in the same hash table.
+ Returns index of first matching key. */
+u32
+qhash_get_first_match (void *v,
+ uword * search_keys,
+ uword n_search_keys, uword * matching_key)
+{
+ qhash_t *h = qhash_header (v);
+ uword *k, *hash_keys;
+ uword n_left, match_mask, bucket_mask;
+
+ if (!v)
+ return ~0;
+
+ match_mask = 0;
+ bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1);
+
+ k = search_keys;
+ n_left = n_search_keys;
+ hash_keys = h->hash_keys;
+ while (n_left >= 2)
+ {
+ u32 a0, b0, c0, bi0, valid0;
+ u32 a1, b1, c1, bi1, valid1;
+ uword k0, k1, *h0, *h1;
+
+ k0 = k[0];
+ k1 = k[1];
+ n_left -= 2;
+ k += 2;
+
+ a0 = a1 = h->hash_seeds[0];
+ b0 = b1 = h->hash_seeds[1];
+ c0 = c1 = h->hash_seeds[2];
+ a0 ^= k0;
+ a1 ^= k1;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+ b1 ^= k1 >> 32;
+#endif
+
+ hash_mix32_step_1 (a0, b0, c0);
+ hash_mix32_step_1 (a1, b1, c1);
+ hash_mix32_step_2 (a0, b0, c0);
+ hash_mix32_step_2 (a1, b1, c1);
+ hash_mix32_step_3 (a0, b0, c0);
+ hash_mix32_step_3 (a1, b1, c1);
+
+ bi0 = c0 & bucket_mask;
+ bi1 = c1 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+ h1 = hash_keys + bi1;
+
+ /* Search two buckets. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ valid1 = qhash_get_valid_elt_mask (h, bi1);
+ match_mask = qhash_search_bucket (h0, k0, valid0);
+ match_mask |= (qhash_search_bucket (h1, k1, valid1)
+ << QHASH_KEYS_PER_BUCKET);
+ if (match_mask)
+ {
+ uword bi, is_match1;
+
+ bi = qhash_min_log2 (match_mask);
+ is_match1 = bi >= QHASH_KEYS_PER_BUCKET;
+
+ bi += ((is_match1 ? bi1 : bi0)
+ - (is_match1 << QHASH_LOG2_KEYS_PER_BUCKET));
+ *matching_key = (k - 2 - search_keys) + is_match1;
+ return bi;
+ }
+
+ /* Full buckets trigger search of overflow hash. */
+ if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID
+ || valid1 == QHASH_ALL_VALID))
+ {
+ uword *p = 0;
+ uword ki = k - 2 - search_keys;
+
+ if (valid0 == QHASH_ALL_VALID)
+ p = hash_get (h->overflow_hash, k0);
+
+ if (!p && valid1 == QHASH_ALL_VALID)
+ {
+ p = hash_get (h->overflow_hash, k1);
+ ki++;
+ }
+
+ if (p)
+ {
+ *matching_key = ki;
+ return p[0];
+ }
+ }
+ }
+
+ while (n_left >= 1)
+ {
+ u32 a0, b0, c0, bi0, valid0;
+ uword k0, *h0;
+
+ k0 = k[0];
+ n_left -= 1;
+ k += 1;
+
+ a0 = h->hash_seeds[0];
+ b0 = h->hash_seeds[1];
+ c0 = h->hash_seeds[2];
+ a0 ^= k0;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+#endif
+
+ hash_mix32 (a0, b0, c0);
+
+ bi0 = c0 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+
+ /* Search one bucket. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ match_mask = qhash_search_bucket (h0, k0, valid0);
+ if (match_mask)
+ {
+ uword bi;
+ bi = bi0 + qhash_min_log2 (match_mask);
+ *matching_key = (k - 1 - search_keys);
+ return bi;
+ }
+
+ /* Full buckets trigger search of overflow hash. */
+ if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID))
+ {
+ uword *p = hash_get (h->overflow_hash, k0);
+ if (p)
+ {
+ *matching_key = (k - 1 - search_keys);
+ return p[0];
+ }
+ }
+ }
+
+ return ~0;
+}
+
+static void *
+qhash_set_overflow (void *v, uword elt_bytes,
+ uword key, uword bi, uword * n_elts, u32 * result)
+{
+ qhash_t *h = qhash_header (v);
+ uword *p = hash_get (h->overflow_hash, key);
+ uword i;
+
+ bi /= QHASH_KEYS_PER_BUCKET;
+
+ if (p)
+ i = p[0];
+ else
+ {
+ uword l = vec_len (h->overflow_free_indices);
+ if (l > 0)
+ {
+ i = h->overflow_free_indices[l - 1];
+ _vec_len (h->overflow_free_indices) = l - 1;
+ }
+ else
+ i = (1 << h->log2_hash_size) + hash_elts (h->overflow_hash);
+ hash_set (h->overflow_hash, key, i);
+ vec_validate (h->overflow_counts, bi);
+ h->overflow_counts[bi] += 1;
+ *n_elts += 1;
+
+ l = vec_len (v);
+ if (i >= l)
+ {
+ uword dl = round_pow2 (1 + i - l, 8);
+ v = _vec_resize (v, dl, (l + dl) * elt_bytes, sizeof (h[0]),
+ /* align */ sizeof (uword));
+ memset (v + l * elt_bytes, ~0, dl * elt_bytes);
+ }
+ }
+
+ *result = i;
+
+ return v;
+}
+
+static uword
+qhash_unset_overflow (void *v, uword key, uword bi, uword * n_elts)
+{
+ qhash_t *h = qhash_header (v);
+ uword *p = hash_get (h->overflow_hash, key);
+ uword result;
+
+ bi /= QHASH_KEYS_PER_BUCKET;
+
+ if (p)
+ {
+ result = p[0];
+ hash_unset (h->overflow_hash, key);
+ ASSERT (bi < vec_len (h->overflow_counts));
+ ASSERT (h->overflow_counts[bi] > 0);
+ ASSERT (*n_elts > 0);
+ vec_add1 (h->overflow_free_indices, result);
+ h->overflow_counts[bi] -= 1;
+ *n_elts -= 1;
+ }
+ else
+ result = ~0;
+
+ return result;
+}
+
+always_inline uword
+qhash_find_free (uword i, uword valid_mask)
+{
+ return first_set (~valid_mask & pow2_mask (QHASH_KEYS_PER_BUCKET));
+}
+
+void *
+_qhash_set_multiple (void *v,
+ uword elt_bytes,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices)
+{
+ qhash_t *h = qhash_header (v);
+ uword *k, *hash_keys;
+ uword n_left, n_elts, bucket_mask;
+ u32 *r;
+
+ if (vec_len (v) < n_search_keys)
+ v = _qhash_resize (v, n_search_keys, elt_bytes);
+
+ if (qhash_min_log2 (2) != 1)
+ {
+ qhash_min_log2_init ();
+ ASSERT (qhash_min_log2 (2) == 1);
+ }
+
+ ASSERT (v != 0);
+
+ bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1);
+
+ hash_keys = h->hash_keys;
+ k = search_keys;
+ r = result_indices;
+ n_left = n_search_keys;
+ n_elts = h->n_elts;
+
+ while (n_left >= 2)
+ {
+ u32 a0, b0, c0, bi0, match0, valid0, free0;
+ u32 a1, b1, c1, bi1, match1, valid1, free1;
+ uword k0, *h0;
+ uword k1, *h1;
+
+ k0 = k[0];
+ k1 = k[1];
+
+ /* Keys must be unique. */
+ ASSERT (k0 != k1);
+
+ n_left -= 2;
+ k += 2;
+
+ a0 = a1 = h->hash_seeds[0];
+ b0 = b1 = h->hash_seeds[1];
+ c0 = c1 = h->hash_seeds[2];
+ a0 ^= k0;
+ a1 ^= k1;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+ b1 ^= k1 >> 32;
+#endif
+
+ hash_mix32_step_1 (a0, b0, c0);
+ hash_mix32_step_1 (a1, b1, c1);
+ hash_mix32_step_2 (a0, b0, c0);
+ hash_mix32_step_2 (a1, b1, c1);
+ hash_mix32_step_3 (a0, b0, c0);
+ hash_mix32_step_3 (a1, b1, c1);
+
+ bi0 = c0 & bucket_mask;
+ bi1 = c1 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+ h1 = hash_keys + bi1;
+
+ /* Search two buckets. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ valid1 = qhash_get_valid_elt_mask (h, bi1);
+
+ match0 = qhash_search_bucket (h0, k0, valid0);
+ match1 = qhash_search_bucket (h1, k1, valid1);
+
+ /* Find first free element starting at hash offset into bucket. */
+ free0 = qhash_find_free (c0 & (QHASH_KEYS_PER_BUCKET - 1), valid0);
+
+ valid1 = valid1 | (bi0 == bi1 ? free0 : 0);
+ free1 = qhash_find_free (c1 & (QHASH_KEYS_PER_BUCKET - 1), valid1);
+
+ n_elts += (match0 == 0) + (match1 == 0);
+
+ match0 = match0 ? match0 : free0;
+ match1 = match1 ? match1 : free1;
+
+ valid0 |= match0;
+ valid1 |= match1;
+
+ h0 += qhash_min_log2 (match0);
+ h1 += qhash_min_log2 (match1);
+
+ if (PREDICT_FALSE (!match0 || !match1))
+ goto slow_path2;
+
+ h0[0] = k0;
+ h1[0] = k1;
+ r[0] = h0 - hash_keys;
+ r[1] = h1 - hash_keys;
+ r += 2;
+ qhash_set_valid_elt_mask (h, bi0, valid0);
+ qhash_set_valid_elt_mask (h, bi1, valid1);
+ continue;
+
+ slow_path2:
+ if (!match0)
+ {
+ n_elts -= 1;
+ v = qhash_set_overflow (v, elt_bytes, k0, bi0, &n_elts, &r[0]);
+ }
+ else
+ {
+ h0[0] = k0;
+ r[0] = h0 - hash_keys;
+ qhash_set_valid_elt_mask (h, bi0, valid0);
+ }
+ if (!match1)
+ {
+ n_elts -= 1;
+ v = qhash_set_overflow (v, elt_bytes, k1, bi1, &n_elts, &r[1]);
+ }
+ else
+ {
+ h1[0] = k1;
+ r[1] = h1 - hash_keys;
+ qhash_set_valid_elt_mask (h, bi1, valid1);
+ }
+ r += 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 a0, b0, c0, bi0, match0, valid0, free0;
+ uword k0, *h0;
+
+ k0 = k[0];
+ n_left -= 1;
+ k += 1;
+
+ a0 = h->hash_seeds[0];
+ b0 = h->hash_seeds[1];
+ c0 = h->hash_seeds[2];
+ a0 ^= k0;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+#endif
+
+ hash_mix32 (a0, b0, c0);
+
+ bi0 = c0 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+
+ /* Find first free element starting at hash offset into bucket. */
+ free0 = qhash_find_free (c0 & (QHASH_KEYS_PER_BUCKET - 1), valid0);
+
+ match0 = qhash_search_bucket (h0, k0, valid0);
+
+ n_elts += (match0 == 0);
+
+ match0 = match0 ? match0 : free0;
+
+ valid0 |= match0;
+
+ h0 += qhash_min_log2 (match0);
+
+ if (PREDICT_FALSE (!match0))
+ goto slow_path1;
+
+ h0[0] = k0;
+ r[0] = h0 - hash_keys;
+ r += 1;
+ qhash_set_valid_elt_mask (h, bi0, valid0);
+ continue;
+
+ slow_path1:
+ n_elts -= 1;
+ v = qhash_set_overflow (v, elt_bytes, k0, bi0, &n_elts, &r[0]);
+ r += 1;
+ }
+
+ h = qhash_header (v);
+ h->n_elts = n_elts;
+
+ return v;
+}
+
+static uword
+unset_slow_path (void *v, uword elt_bytes,
+ uword k0, uword bi0, uword valid0, uword match0,
+ uword * n_elts)
+{
+ qhash_t *h = qhash_header (v);
+ uword i, j = 0, k, l, t = ~0;
+ hash_pair_t *p, *found;
+
+ if (!match0)
+ {
+ if (valid0 == QHASH_ALL_VALID)
+ t = qhash_unset_overflow (v, k0, bi0, n_elts);
+ return t;
+ }
+
+ i = bi0 / QHASH_KEYS_PER_BUCKET;
+ t = bi0 + qhash_min_log2 (match0);
+
+ if (valid0 == QHASH_ALL_VALID
+ && i < vec_len (h->overflow_counts) && h->overflow_counts[i] > 0)
+ {
+ found = 0;
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, h->overflow_hash, ({
+ j = qhash_hash_mix (h, p->key) / QHASH_KEYS_PER_BUCKET;
+ if (j == i)
+ {
+ found = p;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ ASSERT (found != 0);
+ ASSERT (j == i);
+
+ l = found->value[0];
+ k = found->key;
+ hash_unset3 (h->overflow_hash, k, &j);
+ vec_add1 (h->overflow_free_indices, j);
+ h->overflow_counts[i] -= 1;
+
+ qhash_set_valid_elt_mask (h, bi0, valid0);
+
+ h->hash_keys[t] = k;
+ clib_memswap (v + t * elt_bytes, v + l * elt_bytes, elt_bytes);
+ t = l;
+ }
+ else
+ qhash_set_valid_elt_mask (h, bi0, valid0 ^ match0);
+
+ return t;
+}
+
+void
+_qhash_unset_multiple (void *v,
+ uword elt_bytes,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices)
+{
+ qhash_t *h = qhash_header (v);
+ uword *k, *hash_keys;
+ uword n_left, n_elts, bucket_mask;
+ u32 *r;
+
+ if (!v)
+ {
+ uword i;
+ for (i = 0; i < n_search_keys; i++)
+ result_indices[i] = ~0;
+ }
+
+ bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1);
+
+ hash_keys = h->hash_keys;
+ k = search_keys;
+ r = result_indices;
+ n_left = n_search_keys;
+ n_elts = h->n_elts;
+
+ while (n_left >= 2)
+ {
+ u32 a0, b0, c0, bi0, match0, valid0;
+ u32 a1, b1, c1, bi1, match1, valid1;
+ uword k0, *h0;
+ uword k1, *h1;
+
+ k0 = k[0];
+ k1 = k[1];
+
+ /* Keys must be unique. */
+ ASSERT (k0 != k1);
+
+ n_left -= 2;
+ k += 2;
+
+ a0 = a1 = h->hash_seeds[0];
+ b0 = b1 = h->hash_seeds[1];
+ c0 = c1 = h->hash_seeds[2];
+ a0 ^= k0;
+ a1 ^= k1;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+ b1 ^= k1 >> 32;
+#endif
+
+ hash_mix32_step_1 (a0, b0, c0);
+ hash_mix32_step_1 (a1, b1, c1);
+ hash_mix32_step_2 (a0, b0, c0);
+ hash_mix32_step_2 (a1, b1, c1);
+ hash_mix32_step_3 (a0, b0, c0);
+ hash_mix32_step_3 (a1, b1, c1);
+
+ bi0 = c0 & bucket_mask;
+ bi1 = c1 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+ h1 = hash_keys + bi1;
+
+ /* Search two buckets. */
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+ valid1 = qhash_get_valid_elt_mask (h, bi1);
+
+ match0 = qhash_search_bucket (h0, k0, valid0);
+ match1 = qhash_search_bucket (h1, k1, valid1);
+
+ n_elts -= (match0 != 0) + (match1 != 0);
+
+ if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID
+ || valid1 == QHASH_ALL_VALID))
+ goto slow_path2;
+
+ valid0 ^= match0;
+ qhash_set_valid_elt_mask (h, bi0, valid0);
+
+ valid1 = bi0 == bi1 ? valid0 : valid1;
+ valid1 ^= match1;
+
+ qhash_set_valid_elt_mask (h, bi1, valid1);
+
+ r[0] = match0 ? bi0 + qhash_min_log2 (match0) : ~0;
+ r[1] = match1 ? bi1 + qhash_min_log2 (match1) : ~0;
+ r += 2;
+ continue;
+
+ slow_path2:
+ r[0] = unset_slow_path (v, elt_bytes, k0, bi0, valid0, match0, &n_elts);
+ if (bi0 == bi1)
+ {
+ /* Search again in same bucket to test new overflow element. */
+ valid1 = qhash_get_valid_elt_mask (h, bi0);
+ if (!match1)
+ {
+ match1 = qhash_search_bucket (h1, k1, valid1);
+ n_elts -= (match1 != 0);
+ }
+ }
+ r[1] = unset_slow_path (v, elt_bytes, k1, bi1, valid1, match1, &n_elts);
+ r += 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 a0, b0, c0, bi0, match0, valid0;
+ uword k0, *h0;
+
+ k0 = k[0];
+ n_left -= 1;
+ k += 1;
+
+ a0 = h->hash_seeds[0];
+ b0 = h->hash_seeds[1];
+ c0 = h->hash_seeds[2];
+ a0 ^= k0;
+#if uword_bits == 64
+ b0 ^= k0 >> 32;
+#endif
+
+ hash_mix32 (a0, b0, c0);
+
+ bi0 = c0 & bucket_mask;
+
+ h0 = hash_keys + bi0;
+
+ valid0 = qhash_get_valid_elt_mask (h, bi0);
+
+ match0 = qhash_search_bucket (h0, k0, valid0);
+ n_elts -= (match0 != 0);
+ qhash_set_valid_elt_mask (h, bi0, valid0 ^ match0);
+
+ r[0] = match0 ? bi0 + qhash_min_log2 (match0) : ~0;
+ r += 1;
+
+ if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID))
+ r[-1] = unset_slow_path (v, elt_bytes, k0, bi0, valid0, match0,
+ &n_elts);
+ }
+
+ h->n_elts = n_elts;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/qhash.h b/src/vppinfra/qhash.h
new file mode 100644
index 00000000..9dbbd971
--- /dev/null
+++ b/src/vppinfra/qhash.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2006 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_qhash_h
+#define included_qhash_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/hash.h>
+
+/* Word hash tables. */
+typedef struct
+{
+ /* Number of elements in hash. */
+ u32 n_elts;
+
+ u32 log2_hash_size;
+
+ /* Jenkins hash seeds. */
+ u32 hash_seeds[3];
+
+ /* Fall back CLIB hash for overflow in fixed sized buckets. */
+ uword *overflow_hash;
+
+ u32 *overflow_counts, *overflow_free_indices;
+
+ u8 *hash_key_valid_bitmap;
+
+ uword *hash_keys;
+} qhash_t;
+
+always_inline qhash_t *
+qhash_header (void *v)
+{
+ return vec_header (v, sizeof (qhash_t));
+}
+
+always_inline uword
+qhash_elts (void *v)
+{
+ return v ? qhash_header (v)->n_elts : 0;
+}
+
+always_inline uword
+qhash_n_overflow (void *v)
+{
+ return v ? hash_elts (qhash_header (v)->overflow_hash) : 0;
+}
+
+#define QHASH_LOG2_KEYS_PER_BUCKET 2
+#define QHASH_KEYS_PER_BUCKET (1 << QHASH_LOG2_KEYS_PER_BUCKET)
+
+always_inline uword
+qhash_hash_mix (qhash_t * h, uword key)
+{
+ u32 a, b, c;
+
+ a = h->hash_seeds[0];
+ b = h->hash_seeds[1];
+ c = h->hash_seeds[2];
+
+ a ^= key;
+#if uword_bits == 64
+ b ^= key >> 32;
+#endif
+
+ hash_mix32 (a, b, c);
+
+ return c & pow2_mask (h->log2_hash_size);
+}
+
+#define qhash_resize(v,n) (v) = _qhash_resize ((v), (n), sizeof ((v)[0]))
+
+#define qhash_foreach(var,v,body)
+
+#define qhash_set_multiple(v,keys,n,results) \
+ (v) = _qhash_set_multiple ((v), sizeof ((v)[0]), (keys), (n), (results))
+
+#define qhash_unset_multiple(v,keys,n,results) \
+ _qhash_unset_multiple ((v), sizeof ((v)[0]), (keys), (n), (results))
+
+#define qhash_get(v,key) \
+({ \
+ uword _qhash_get_k = (key); \
+ qhash_get_first_match ((v), &_qhash_get_k, 1, &_qhash_get_k); \
+})
+
+#define qhash_set(v,k) \
+({ \
+ uword _qhash_set_k = (k); \
+ qhash_set_multiple ((v), &_qhash_set_k, 1, &_qhash_set_k); \
+ _qhash_set_k; \
+})
+
+#define qhash_unset(v,k) \
+({ \
+ uword _qhash_unset_k = (k); \
+ qhash_unset_multiple ((v), &_qhash_unset_k, 1, &_qhash_unset_k); \
+ _qhash_unset_k; \
+})
+
+void *_qhash_resize (void *v, uword length, uword elt_bytes);
+
+/* Lookup multiple keys in the same hash table. */
+void
+qhash_get_multiple (void *v,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices);
+
+/* Lookup multiple keys in the same hash table.
+ Returns index of first matching key. */
+u32
+qhash_get_first_match (void *v,
+ uword * search_keys,
+ uword n_search_keys, uword * matching_key);
+
+/* Set/unset helper functions. */
+void *_qhash_set_multiple (void *v,
+ uword elt_bytes,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices);
+void
+_qhash_unset_multiple (void *v,
+ uword elt_bytes,
+ uword * search_keys,
+ uword n_search_keys, u32 * result_indices);
+
+#endif /* included_qhash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/qsort.c b/src/vppinfra/qsort.c
new file mode 100644
index 00000000..2faa5897
--- /dev/null
+++ b/src/vppinfra/qsort.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Imported into CLIB by Eliot Dresselhaus from:
+ *
+ * This file is part of
+ * MakeIndex - A formatter and format independent index processor
+ *
+ * This file is public domain software donated by
+ * Nelson Beebe (beebe@science.utah.edu).
+ *
+ * modifications copyright (c) 2003 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/clib.h>
+
+/*
+ * qsort.c: Our own version of the system qsort routine which is faster by an
+ * average of 25%, with lows and highs of 10% and 50%. The THRESHold below is
+ * the insertion sort threshold, and has been adjusted for records of size 48
+ * bytes. The MTHREShold is where we stop finding a better median.
+ */
+
+#define THRESH 4 /* threshold for insertion */
+#define MTHRESH 6 /* threshold for median */
+
+typedef struct
+{
+ word qsz; /* size of each record */
+ word thresh; /* THRESHold in chars */
+ word mthresh; /* MTHRESHold in chars */
+ int (*qcmp) (const void *, const void *); /* the comparison routine */
+} qst_t;
+
+static void qst (qst_t * q, char *base, char *max);
+
+/*
+ * qqsort: First, set up some global parameters for qst to share.
+ * Then, quicksort with qst(), and then a cleanup insertion sort ourselves.
+ * Sound simple? It's not...
+ */
+
+void
+qsort (void *base, uword n, uword size,
+ int (*compar) (const void *, const void *))
+{
+ char *i;
+ char *j;
+ char *lo;
+ char *hi;
+ char *min;
+ char c;
+ char *max;
+ qst_t _q, *q = &_q;
+
+ if (n <= 1)
+ return;
+
+ q->qsz = size;
+ q->qcmp = compar;
+ q->thresh = q->qsz * THRESH;
+ q->mthresh = q->qsz * MTHRESH;
+ max = base + n * q->qsz;
+ if (n >= THRESH)
+ {
+ qst (q, base, max);
+ hi = base + q->thresh;
+ }
+ else
+ {
+ hi = max;
+ }
+ /*
+ * First put smallest element, which must be in the first THRESH, in the
+ * first position as a sentinel. This is done just by searching the
+ * first THRESH elements (or the first n if n < THRESH), finding the min,
+ * and swapping it into the first position.
+ */
+ for (j = lo = base; (lo += q->qsz) < hi;)
+ {
+ if ((*compar) (j, lo) > 0)
+ j = lo;
+ }
+ if (j != base)
+ { /* swap j into place */
+ for (i = base, hi = base + q->qsz; i < hi;)
+ {
+ c = *j;
+ *j++ = *i;
+ *i++ = c;
+ }
+ }
+ /*
+ * With our sentinel in place, we now run the following hyper-fast
+ * insertion sort. For each remaining element, min, from [1] to [n-1],
+ * set hi to the index of the element AFTER which this one goes. Then, do
+ * the standard insertion sort shift on a character at a time basis for
+ * each element in the frob.
+ */
+ for (min = base; (hi = min += q->qsz) < max;)
+ {
+ while ((*q->qcmp) (hi -= q->qsz, min) > 0);
+ if ((hi += q->qsz) != min)
+ {
+ for (lo = min + q->qsz; --lo >= min;)
+ {
+ c = *lo;
+ for (i = j = lo; (j -= q->qsz) >= hi; i = j)
+ *i = *j;
+ *i = c;
+ }
+ }
+ }
+}
+
+
+
+/*
+ * qst: Do a quicksort. First, find the median element, and put that one in
+ * the first place as the discriminator. (This "median" is just the median
+ * of the first, last and middle elements). (Using this median instead of
+ * the first element is a big win). Then, the usual partitioning/swapping,
+ * followed by moving the discriminator into the right place. Then, figure
+ * out the sizes of the two partions, do the smaller one recursively and the
+ * larger one via a repeat of this code. Stopping when there are less than
+ * THRESH elements in a partition and cleaning up with an insertion sort (in
+ * our caller) is a huge win. All data swaps are done in-line, which is
+ * space-losing but time-saving. (And there are only three places where this
+ * is done).
+ */
+
+static void
+qst (qst_t * q, char *base, char *max)
+{
+ char *i;
+ char *j;
+ char *jj;
+ char *mid;
+ int ii;
+ char c;
+ char *tmp;
+ int lo;
+ int hi;
+ int qsz = q->qsz;
+
+ lo = (int) (max - base); /* number of elements as chars */
+ do
+ {
+ /*
+ * At the top here, lo is the number of characters of elements in the
+ * current partition. (Which should be max - base). Find the median
+ * of the first, last, and middle element and make that the middle
+ * element. Set j to largest of first and middle. If max is larger
+ * than that guy, then it's that guy, else compare max with loser of
+ * first and take larger. Things are set up to prefer the middle,
+ * then the first in case of ties.
+ */
+ mid = i = base + qsz * ((unsigned) (lo / qsz) >> 1);
+ if (lo >= q->mthresh)
+ {
+ j = ((*q->qcmp) ((jj = base), i) > 0 ? jj : i);
+ if ((*q->qcmp) (j, (tmp = max - qsz)) > 0)
+ {
+ /* switch to first loser */
+ j = (j == jj ? i : jj);
+ if ((*q->qcmp) (j, tmp) < 0)
+ j = tmp;
+ }
+ if (j != i)
+ {
+ ii = qsz;
+ do
+ {
+ c = *i;
+ *i++ = *j;
+ *j++ = c;
+ }
+ while (--ii);
+ }
+ }
+ /* Semi-standard quicksort partitioning/swapping */
+ for (i = base, j = max - qsz;;)
+ {
+ while (i < mid && (*q->qcmp) (i, mid) <= 0)
+ i += qsz;
+ while (j > mid)
+ {
+ if ((*q->qcmp) (mid, j) <= 0)
+ {
+ j -= qsz;
+ continue;
+ }
+ tmp = i + qsz; /* value of i after swap */
+ if (i == mid)
+ { /* j <-> mid, new mid is j */
+ mid = jj = j;
+ }
+ else
+ { /* i <-> j */
+ jj = j;
+ j -= qsz;
+ }
+ goto swap;
+ }
+ if (i == mid)
+ {
+ break;
+ }
+ else
+ { /* i <-> mid, new mid is i */
+ jj = mid;
+ tmp = mid = i; /* value of i after swap */
+ j -= qsz;
+ }
+ swap:
+ ii = qsz;
+ do
+ {
+ c = *i;
+ *i++ = *jj;
+ *jj++ = c;
+ }
+ while (--ii);
+ i = tmp;
+ }
+ /*
+ * Look at sizes of the two partitions, do the smaller one first by
+ * recursion, then do the larger one by making sure lo is its size,
+ * base and max are update correctly, and branching back. But only
+ * repeat (recursively or by branching) if the partition is of at
+ * least size THRESH.
+ */
+ i = (j = mid) + qsz;
+ if ((lo = (int) (j - base)) <= (hi = (int) (max - i)))
+ {
+ if (lo >= q->thresh)
+ qst (q, base, j);
+ base = i;
+ lo = hi;
+ }
+ else
+ {
+ if (hi >= q->thresh)
+ qst (q, i, max);
+ max = j;
+ }
+ }
+ while (lo >= q->thresh);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random.c b/src/vppinfra/random.c
new file mode 100644
index 00000000..fa5bcc8c
--- /dev/null
+++ b/src/vppinfra/random.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/random.h>
+
+/* Default random seed for standalone version of library.
+ Value can be overridden by platform code from e.g.
+ machine's clock count register. */
+u32 standalone_random_default_seed = 1;
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random.h b/src/vppinfra/random.h
new file mode 100644
index 00000000..5c139d05
--- /dev/null
+++ b/src/vppinfra/random.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_random_h
+#define included_random_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h> /* for vec_resize */
+#include <vppinfra/format.h> /* for unformat_input_t */
+
+/** \file
+ Linear Congruential Random Number Generator
+
+ This specific random number generator is described in
+ "Numerical Recipes in C", 2nd edition, page 284. If you need
+ random numbers with really excellent statistics, take a look
+ at Chapter 7...
+
+ By definition, a linear congruential random number generator
+ is of the form: rand[i+1] = a*rand[i] + c (mod m) for specific
+ values of (a,c,m).
+
+ In this case, choose m = 2**32 and use the low-order 32-bits of
+ the 64-bit product a*N[i]. Knuth suggests the use of a=1664525,
+ H.W. Lewis has tested C=1013904223 extensively. This routine is
+ reputedly as good as any 32-bit LCRN, and costs only a single
+ multiply-add.
+
+ Several variants: 32/64-bit, machine word width,
+ f64 on the closed interval [0,1].
+*/
+
+/** \brief 32-bit random number generator */
+always_inline u32
+random_u32 (u32 * seed)
+{
+ *seed = (1664525 * *seed) + 1013904223;
+ return *seed;
+}
+
+/* External test routine. */
+int test_random_main (unformat_input_t * input);
+
+/** \brief Maximum value returned by random_u32() */
+always_inline u32
+random_u32_max (void)
+{
+ return 0xffffffff;
+}
+
+#ifdef CLIB_UNIX
+
+#include <unistd.h> /* for getpid */
+
+/** \brief Default random seed (unix/linux user-mode) */
+always_inline uword
+random_default_seed (void)
+{
+ return getpid ();
+}
+
+#endif
+
+#ifdef CLIB_LINUX_KERNEL
+
+#include <linux/sched.h> /* for jiffies */
+
+/** \brief Default random seed (Linux kernel) */
+always_inline uword
+random_default_seed (void)
+{
+ return jiffies;
+}
+
+#endif
+
+#ifdef CLIB_STANDALONE
+extern u32 standalone_random_default_seed;
+
+always_inline u32
+random_default_seed (void)
+{
+ return standalone_random_default_seed;
+}
+#endif
+
+/** \brief 64-bit random number generator
+ * Again, constants courtesy of Donald Knuth.
+ *
+ */
+always_inline u64
+random_u64 (u64 * seed)
+{
+ *seed = 6364136223846793005ULL * *seed + 1442695040888963407ULL;
+ return *seed;
+}
+
+/** \brief machine word size random number generator */
+
+always_inline uword
+random_uword (u32 * seed)
+{
+ if (sizeof (uword) == sizeof (u64))
+ return random_u64 ((u64 *) seed);
+ else
+ return random_u32 (seed);
+}
+
+/** \brief Generate f64 random number in the interval [0,1] */
+always_inline f64
+random_f64 (u32 * seed)
+{
+ return (f64) random_u32 (seed) / (f64) random_u32_max ();
+}
+
+/** \brief Generate random character vector
+
+ From the alphabet a-z, lower case.
+ Returns a vector of the supplied length which is NOT guaranteed to be
+ NULL-terminated. FIXME?
+*/
+always_inline u8 *
+random_string (u32 * seed, uword len)
+{
+ u8 *alphabet = (u8 *) "abcdefghijklmnopqrstuvwxyz";
+ u8 *s = 0;
+ word i;
+
+ vec_resize (s, len);
+ for (i = 0; i < len; i++)
+ s[i] = alphabet[random_u32 (seed) % 26];
+
+ return s;
+}
+
+#endif /* included_random_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random_buffer.c b/src/vppinfra/random_buffer.c
new file mode 100644
index 00000000..df036980
--- /dev/null
+++ b/src/vppinfra/random_buffer.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/random_buffer.h>
+
+/* Fill random buffer. */
+void
+clib_random_buffer_fill (clib_random_buffer_t * b, uword n_words)
+{
+ uword *w, n = n_words;
+
+ if (n < 256)
+ n = 256;
+
+ n = round_pow2 (n, 2 << ISAAC_LOG2_SIZE);
+
+ vec_add2 (b->buffer, w, n);
+ do
+ {
+ isaac2 (b->ctx, w);
+ w += 2 * ISAAC_SIZE;
+ n -= 2 * ISAAC_SIZE;
+ }
+ while (n > 0);
+}
+
+void
+clib_random_buffer_init (clib_random_buffer_t * b, uword seed)
+{
+ uword i, j;
+
+ memset (b, 0, sizeof (b[0]));
+
+ /* Seed ISAAC. */
+ for (i = 0; i < ARRAY_LEN (b->ctx); i++)
+ {
+ uword s[ISAAC_SIZE];
+
+ for (j = 0; j < ARRAY_LEN (s); j++)
+ s[j] = ARRAY_LEN (b->ctx) * (seed + j) + i;
+
+ isaac_init (&b->ctx[i], s);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random_buffer.h b/src/vppinfra/random_buffer.h
new file mode 100644
index 00000000..eb318548
--- /dev/null
+++ b/src/vppinfra/random_buffer.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_random_buffer_h
+#define included_clib_random_buffer_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/random_isaac.h>
+
+typedef struct
+{
+ /* Two parallel ISAAC contexts for speed. */
+ isaac_t ctx[2];
+
+ /* Random buffer. */
+ uword *buffer;
+
+ /* Cache up to 1 word worth of bytes for random data
+ less than one word at a time. */
+ uword n_cached_bytes;
+
+ union
+ {
+ u8 cached_bytes[sizeof (uword)];
+ uword cached_word;
+ };
+}
+clib_random_buffer_t;
+
+always_inline void
+clib_random_buffer_free (clib_random_buffer_t * b)
+{
+ vec_free (b->buffer);
+}
+
+/* Fill random buffer. */
+void clib_random_buffer_fill (clib_random_buffer_t * b, uword n_words);
+
+/* Initialize random buffer. */
+void clib_random_buffer_init (clib_random_buffer_t * b, uword seed);
+
+/* Returns word aligned random data, possibly filling buffer. */
+always_inline void *
+clib_random_buffer_get_data (clib_random_buffer_t * b, uword n_bytes)
+{
+ uword n_words, i, l;
+
+ l = b->n_cached_bytes;
+ if (n_bytes <= l)
+ {
+ b->n_cached_bytes = l - n_bytes;
+ return &b->cached_bytes[l];
+ }
+
+ n_words = n_bytes / sizeof (uword);
+ if (n_bytes % sizeof (uword))
+ n_words++;
+
+ /* Enough random words left? */
+ if (PREDICT_FALSE (n_words > vec_len (b->buffer)))
+ clib_random_buffer_fill (b, n_words);
+
+ i = vec_len (b->buffer) - n_words;
+ _vec_len (b->buffer) = i;
+
+ if (n_bytes < sizeof (uword))
+ {
+ b->cached_word = b->buffer[i];
+ b->n_cached_bytes = sizeof (uword) - n_bytes;
+ return b->cached_bytes;
+ }
+ else
+ return b->buffer + i;
+}
+
+#endif /* included_clib_random_buffer_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random_isaac.c b/src/vppinfra/random_isaac.c
new file mode 100644
index 00000000..6f00fc32
--- /dev/null
+++ b/src/vppinfra/random_isaac.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ ------------------------------------------------------------------------------
+ By Bob Jenkins, 1996, Public Domain
+ MODIFIED:
+ 960327: Creation (addition of randinit, really)
+ 970719: use context, not global variables, for internal state
+ 980324: renamed seed to flag
+ 980605: recommend ISAAC_LOG2_SIZE=4 for noncryptography.
+ 010626: note this is public domain
+ ------------------------------------------------------------------------------
+
+ Modified for CLIB by Eliot Dresselhaus.
+ Dear Bob, Thanks for all the great work. - Eliot
+
+ modifications copyright (c) 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* ISAAC is Bob Jenkins' random number generator.
+ http://burtleburtle.net/bob/rand/isaacafa.html */
+
+#include <vppinfra/random_isaac.h>
+
+#if uword_bits != 32 && uword_bits != 64
+#error "isaac only works for 32 or 64 bit words"
+#endif
+
+#if uword_bits == 32
+
+#define ind32(mm,x) (*(u32 *)((u8 *)(mm) + ((x) & ((ISAAC_SIZE-1)<<2))))
+#define rngstep32(mix,a,b,mm,m,m2,r,x,y) \
+{ \
+ x = *m; \
+ a = (a^(mix)) + *(m2++); \
+ *(m++) = y = ind32(mm,x) + a + b; \
+ *(r++) = b = ind32(mm,y>>ISAAC_LOG2_SIZE) + x; \
+}
+
+void
+isaac (isaac_t * ctx, uword * results)
+{
+ u32 a, b, c, x, y, *m, *mm, *m2, *r, *mend;
+
+ mm = ctx->memory;
+ r = results;
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+
+ b += ++c;
+ mend = m2 = mm + ARRAY_LEN (ctx->memory) / 2;
+ m = mm;
+ while (m < mend)
+ {
+ rngstep32 (a << 13, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a >> 6, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a << 2, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a >> 16, a, b, mm, m, m2, r, x, y);
+ }
+
+ m2 = mm;
+ while (m2 < mend)
+ {
+ rngstep32 (a << 13, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a >> 6, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a << 2, a, b, mm, m, m2, r, x, y);
+ rngstep32 (a >> 16, a, b, mm, m, m2, r, x, y);
+ }
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+}
+
+/* Perform 2 isaac runs with different contexts simultaneously. */
+void
+isaac2 (isaac_t * ctx, uword * results)
+{
+#define _(n) \
+ u32 a##n, b##n, c##n, x##n, y##n, * m##n, * mm##n, * m2##n, * r##n, * mend##n
+
+ _(0);
+ _(1);
+ (void) mend1; /* "set but unused variable" error on mend1 with gcc 4.9 */
+#undef _
+
+#define _(n) \
+do { \
+ mm##n = ctx[(n)].memory; \
+ r##n = results + (n) * ISAAC_SIZE; \
+ a##n = ctx[(n)].a; \
+ b##n = ctx[(n)].b; \
+ c##n = ctx[(n)].c; \
+ b##n += ++c##n; \
+ mend##n = m2##n = mm##n + ARRAY_LEN (ctx[(n)].memory) / 2; \
+ m##n = mm##n; \
+} while (0)
+
+ _(0);
+ _(1);
+
+#undef _
+
+ while (m0 < mend0)
+ {
+ rngstep32 (a0 << 13, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 << 13, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 >> 6, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 >> 6, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 << 2, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 << 2, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 >> 16, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 >> 16, a1, b1, mm1, m1, m21, r1, x1, y1);
+ }
+
+ m20 = mm0;
+ m21 = mm1;
+ while (m20 < mend0)
+ {
+ rngstep32 (a0 << 13, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 << 13, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 >> 6, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 >> 6, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 << 2, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 << 2, a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep32 (a0 >> 16, a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep32 (a1 >> 16, a1, b1, mm1, m1, m21, r1, x1, y1);
+ }
+
+ ctx[0].a = a0;
+ ctx[0].b = b0;
+ ctx[0].c = c0;
+ ctx[1].a = a1;
+ ctx[1].b = b1;
+ ctx[1].c = c1;
+}
+
+#define mix32(a,b,c,d,e,f,g,h) \
+{ \
+ a^=b<<11; d+=a; b+=c; \
+ b^=c>>2; e+=b; c+=d; \
+ c^=d<<8; f+=c; d+=e; \
+ d^=e>>16; g+=d; e+=f; \
+ e^=f<<10; h+=e; f+=g; \
+ f^=g>>4; a+=f; g+=h; \
+ g^=h<<8; b+=g; h+=a; \
+ h^=a>>9; c+=h; a+=b; \
+}
+
+void
+isaac_init (isaac_t * ctx, uword * seeds)
+{
+ word i;
+ u32 a, b, c, d, e, f, g, h, *m, *r;
+
+ ctx->a = ctx->b = ctx->c = 0;
+ m = ctx->memory;
+ r = seeds;
+
+ a = b = c = d = e = f = g = h = 0x9e3779b9; /* the golden ratio */
+
+ for (i = 0; i < 4; ++i) /* scramble it */
+ mix32 (a, b, c, d, e, f, g, h);
+
+ /* initialize using the contents of r[] as the seed */
+ for (i = 0; i < ISAAC_SIZE; i += 8)
+ {
+ a += r[i];
+ b += r[i + 1];
+ c += r[i + 2];
+ d += r[i + 3];
+ e += r[i + 4];
+ f += r[i + 5];
+ g += r[i + 6];
+ h += r[i + 7];
+ mix32 (a, b, c, d, e, f, g, h);
+ m[i] = a;
+ m[i + 1] = b;
+ m[i + 2] = c;
+ m[i + 3] = d;
+ m[i + 4] = e;
+ m[i + 5] = f;
+ m[i + 6] = g;
+ m[i + 7] = h;
+ }
+
+ /* do a second pass to make all of the seed affect all of m */
+ for (i = 0; i < ISAAC_SIZE; i += 8)
+ {
+ a += m[i];
+ b += m[i + 1];
+ c += m[i + 2];
+ d += m[i + 3];
+ e += m[i + 4];
+ f += m[i + 5];
+ g += m[i + 6];
+ h += m[i + 7];
+ mix32 (a, b, c, d, e, f, g, h);
+ m[i] = a;
+ m[i + 1] = b;
+ m[i + 2] = c;
+ m[i + 3] = d;
+ m[i + 4] = e;
+ m[i + 5] = f;
+ m[i + 6] = g;
+ m[i + 7] = h;
+ }
+}
+#endif /* uword_bits == 32 */
+
+#if uword_bits == 64
+
+#define ind64(mm,x) (*(u64 *)((u8 *)(mm) + ((x) & ((ISAAC_SIZE-1)<<3))))
+#define rngstep64(mix,a,b,mm,m,m2,r,x,y) \
+{ \
+ x = *m; \
+ a = (mix) + *(m2++); \
+ *(m++) = y = ind64(mm,x) + a + b; \
+ *(r++) = b = ind64(mm,y>>ISAAC_LOG2_SIZE) + x; \
+}
+
+void
+isaac (isaac_t * ctx, uword * results)
+{
+ u64 a, b, c, x, y, *m, *mm, *m2, *r, *mend;
+
+ mm = ctx->memory;
+ r = results;
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+
+ b += ++c;
+ mend = m2 = mm + ARRAY_LEN (ctx->memory) / 2;
+ m = mm;
+ while (m < mend)
+ {
+ rngstep64 (~(a ^ (a << 21)), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a >> 5), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a << 12), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a >> 33), a, b, mm, m, m2, r, x, y);
+ }
+
+ m2 = mm;
+ while (m2 < mend)
+ {
+ rngstep64 (~(a ^ (a << 21)), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a >> 5), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a << 12), a, b, mm, m, m2, r, x, y);
+ rngstep64 (a ^ (a >> 33), a, b, mm, m, m2, r, x, y);
+ }
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+}
+
+/* Perform 2 isaac runs with different contexts simultaneously. */
+void
+isaac2 (isaac_t * ctx, uword * results)
+{
+#define _(n) \
+ u64 a##n, b##n, c##n, x##n, y##n, * m##n, * mm##n, * m2##n, * r##n, * mend##n
+
+ _(0);
+ _(1);
+
+#undef _
+
+#define _(n) \
+do { \
+ mm##n = ctx[(n)].memory; \
+ r##n = results + (n) * ISAAC_SIZE; \
+ a##n = ctx[(n)].a; \
+ b##n = ctx[(n)].b; \
+ c##n = ctx[(n)].c; \
+ b##n += ++c##n; \
+ mend##n = m2##n = mm##n + ARRAY_LEN (ctx[(n)].memory) / 2; \
+ m##n = mm##n; \
+} while (0)
+
+ _(0);
+ _(1);
+
+#undef _
+
+ (void) mend1; /* compiler warning */
+
+ while (m0 < mend0)
+ {
+ rngstep64 (~(a0 ^ (a0 << 21)), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (~(a1 ^ (a1 << 21)), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 >> 5), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 >> 5), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 << 12), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 << 12), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 >> 33), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 >> 33), a1, b1, mm1, m1, m21, r1, x1, y1);
+ }
+
+ m20 = mm0;
+ m21 = mm1;
+ while (m20 < mend0)
+ {
+ rngstep64 (~(a0 ^ (a0 << 21)), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (~(a1 ^ (a1 << 21)), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 >> 5), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 >> 5), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 << 12), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 << 12), a1, b1, mm1, m1, m21, r1, x1, y1);
+ rngstep64 (a0 ^ (a0 >> 33), a0, b0, mm0, m0, m20, r0, x0, y0);
+ rngstep64 (a1 ^ (a1 >> 33), a1, b1, mm1, m1, m21, r1, x1, y1);
+ }
+
+ ctx[0].a = a0;
+ ctx[0].b = b0;
+ ctx[0].c = c0;
+ ctx[1].a = a1;
+ ctx[1].b = b1;
+ ctx[1].c = c1;
+}
+
+#define mix64(a,b,c,d,e,f,g,h) \
+{ \
+ a-=e; f^=h>>9; h+=a; \
+ b-=f; g^=a<<9; a+=b; \
+ c-=g; h^=b>>23; b+=c; \
+ d-=h; a^=c<<15; c+=d; \
+ e-=a; b^=d>>14; d+=e; \
+ f-=b; c^=e<<20; e+=f; \
+ g-=c; d^=f>>17; f+=g; \
+ h-=d; e^=g<<14; g+=h; \
+}
+
+void
+isaac_init (isaac_t * ctx, uword * seeds)
+{
+ word i;
+ u64 a, b, c, d, e, f, g, h, *m, *r;
+
+ ctx->a = ctx->b = ctx->c = 0;
+ m = ctx->memory;
+ r = seeds;
+
+ a = b = c = d = e = f = g = h = 0x9e3779b97f4a7c13LL; /* the golden ratio */
+
+ for (i = 0; i < 4; ++i) /* scramble it */
+ mix64 (a, b, c, d, e, f, g, h);
+
+ for (i = 0; i < ISAAC_SIZE; i += 8) /* fill in mm[] with messy stuff */
+ {
+ a += r[i];
+ b += r[i + 1];
+ c += r[i + 2];
+ d += r[i + 3];
+ e += r[i + 4];
+ f += r[i + 5];
+ g += r[i + 6];
+ h += r[i + 7];
+ mix64 (a, b, c, d, e, f, g, h);
+ m[i] = a;
+ m[i + 1] = b;
+ m[i + 2] = c;
+ m[i + 3] = d;
+ m[i + 4] = e;
+ m[i + 5] = f;
+ m[i + 6] = g;
+ m[i + 7] = h;
+ }
+
+ /* do a second pass to make all of the seed affect all of mm */
+ for (i = 0; i < ISAAC_SIZE; i += 8)
+ {
+ a += m[i];
+ b += m[i + 1];
+ c += m[i + 2];
+ d += m[i + 3];
+ e += m[i + 4];
+ f += m[i + 5];
+ g += m[i + 6];
+ h += m[i + 7];
+ mix64 (a, b, c, d, e, f, g, h);
+ m[i] = a;
+ m[i + 1] = b;
+ m[i + 2] = c;
+ m[i + 3] = d;
+ m[i + 4] = e;
+ m[i + 5] = f;
+ m[i + 6] = g;
+ m[i + 7] = h;
+ }
+}
+#endif /* uword_bits == 64 */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/random_isaac.h b/src/vppinfra/random_isaac.h
new file mode 100644
index 00000000..803fbd62
--- /dev/null
+++ b/src/vppinfra/random_isaac.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ ------------------------------------------------------------------------------
+ By Bob Jenkins, 1996, Public Domain
+ MODIFIED:
+ 960327: Creation (addition of randinit, really)
+ 970719: use context, not global variables, for internal state
+ 980324: renamed seed to flag
+ 980605: recommend ISAAC_LOG2_SIZE=4 for noncryptography.
+ 010626: note this is public domain
+ ------------------------------------------------------------------------------
+
+ Modified for CLIB by Eliot Dresselhaus.
+ Dear Bob, Thanks for all the great work. - Eliot
+
+ modifications copyright (c) 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_random_isaac_h
+#define included_random_isaac_h
+
+#include <vppinfra/clib.h> /* for u32/u64 */
+#include <vppinfra/format.h> /* for unformat_input_t */
+
+/* Bob recommends 8 for crypto, 4 for simulations */
+#define ISAAC_LOG2_SIZE (4)
+#define ISAAC_SIZE (1 << ISAAC_LOG2_SIZE)
+
+typedef struct
+{
+ uword memory[ISAAC_SIZE];
+ uword a, b, c;
+} isaac_t;
+
+void isaac (isaac_t * ctx, uword * results);
+void isaac2 (isaac_t * ctx, uword * results);
+void isaac_init (isaac_t * ctx, uword * results);
+
+int test_isaac_main (unformat_input_t * input);
+
+#endif /* included_random_isaac_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c
new file mode 100644
index 00000000..5d401a08
--- /dev/null
+++ b/src/vppinfra/serialize.c
@@ -0,0 +1,1254 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/* Turn data structures into byte streams for saving or transport. */
+
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/serialize.h>
+
+void
+serialize_64 (serialize_main_t * m, va_list * va)
+{
+ u64 x = va_arg (*va, u64);
+ u32 lo, hi;
+ lo = x;
+ hi = x >> 32;
+ serialize_integer (m, lo, sizeof (lo));
+ serialize_integer (m, hi, sizeof (hi));
+}
+
+void
+serialize_32 (serialize_main_t * m, va_list * va)
+{
+ u32 x = va_arg (*va, u32);
+ serialize_integer (m, x, sizeof (x));
+}
+
+void
+serialize_16 (serialize_main_t * m, va_list * va)
+{
+ u32 x = va_arg (*va, u32);
+ serialize_integer (m, x, sizeof (u16));
+}
+
+void
+serialize_8 (serialize_main_t * m, va_list * va)
+{
+ u32 x = va_arg (*va, u32);
+ serialize_integer (m, x, sizeof (u8));
+}
+
+void
+unserialize_64 (serialize_main_t * m, va_list * va)
+{
+ u64 *x = va_arg (*va, u64 *);
+ u32 lo, hi;
+ unserialize_integer (m, &lo, sizeof (lo));
+ unserialize_integer (m, &hi, sizeof (hi));
+ *x = ((u64) hi << 32) | (u64) lo;
+}
+
+void
+unserialize_32 (serialize_main_t * m, va_list * va)
+{
+ u32 *x = va_arg (*va, u32 *);
+ unserialize_integer (m, x, sizeof (x[0]));
+}
+
+void
+unserialize_16 (serialize_main_t * m, va_list * va)
+{
+ u16 *x = va_arg (*va, u16 *);
+ u32 t;
+ unserialize_integer (m, &t, sizeof (x[0]));
+ x[0] = t;
+}
+
+void
+unserialize_8 (serialize_main_t * m, va_list * va)
+{
+ u8 *x = va_arg (*va, u8 *);
+ u32 t;
+ unserialize_integer (m, &t, sizeof (x[0]));
+ x[0] = t;
+}
+
+void
+serialize_f64 (serialize_main_t * m, va_list * va)
+{
+ f64 x = va_arg (*va, f64);
+ union
+ {
+ f64 f;
+ u64 i;
+ } y;
+ y.f = x;
+ serialize (m, serialize_64, y.i);
+}
+
+void
+serialize_f32 (serialize_main_t * m, va_list * va)
+{
+ f32 x = va_arg (*va, f64);
+ union
+ {
+ f32 f;
+ u32 i;
+ } y;
+ y.f = x;
+ serialize_integer (m, y.i, sizeof (y.i));
+}
+
+void
+unserialize_f64 (serialize_main_t * m, va_list * va)
+{
+ f64 *x = va_arg (*va, f64 *);
+ union
+ {
+ f64 f;
+ u64 i;
+ } y;
+ unserialize (m, unserialize_64, &y.i);
+ *x = y.f;
+}
+
+void
+unserialize_f32 (serialize_main_t * m, va_list * va)
+{
+ f32 *x = va_arg (*va, f32 *);
+ union
+ {
+ f32 f;
+ u32 i;
+ } y;
+ unserialize_integer (m, &y.i, sizeof (y.i));
+ *x = y.f;
+}
+
+void
+serialize_cstring (serialize_main_t * m, char *s)
+{
+ u32 len = s ? strlen (s) : 0;
+ void *p;
+
+ serialize_likely_small_unsigned_integer (m, len);
+ if (len > 0)
+ {
+ p = serialize_get (m, len);
+ clib_memcpy (p, s, len);
+ }
+}
+
+void
+unserialize_cstring (serialize_main_t * m, char **s)
+{
+ char *p, *r = 0;
+ u32 len;
+
+ len = unserialize_likely_small_unsigned_integer (m);
+
+ /*
+ * Given broken enough data, we could get len = 0xFFFFFFFF.
+ * Add one, it overflows, we call vec_new (char, 0), then
+ * memcpy until we bus error.
+ */
+ if (len > 0 && len != 0xFFFFFFFF)
+ {
+ r = vec_new (char, len + 1);
+ p = unserialize_get (m, len);
+ clib_memcpy (r, p, len);
+
+ /* Null terminate. */
+ r[len] = 0;
+ }
+ *s = r;
+}
+
+/* vec_serialize/vec_unserialize helper functions for basic vector types. */
+void
+serialize_vec_8 (serialize_main_t * m, va_list * va)
+{
+ u8 *s = va_arg (*va, u8 *);
+ u32 n = va_arg (*va, u32);
+ u8 *p = serialize_get (m, n * sizeof (u8));
+ clib_memcpy (p, s, n * sizeof (u8));
+}
+
+void
+unserialize_vec_8 (serialize_main_t * m, va_list * va)
+{
+ u8 *s = va_arg (*va, u8 *);
+ u32 n = va_arg (*va, u32);
+ u8 *p = unserialize_get (m, n);
+ clib_memcpy (s, p, n);
+}
+
+#define _(n_bits) \
+ void serialize_vec_##n_bits (serialize_main_t * m, va_list * va) \
+ { \
+ u##n_bits * s = va_arg (*va, u##n_bits *); \
+ u32 n = va_arg (*va, u32); \
+ u##n_bits * p = serialize_get (m, n * sizeof (s[0])); \
+ \
+ while (n >= 4) \
+ { \
+ p[0] = clib_host_to_net_u##n_bits (s[0]); \
+ p[1] = clib_host_to_net_u##n_bits (s[1]); \
+ p[2] = clib_host_to_net_u##n_bits (s[2]); \
+ p[3] = clib_host_to_net_u##n_bits (s[3]); \
+ s += 4; \
+ p += 4; \
+ n -= 4; \
+ } \
+ \
+ while (n >= 1) \
+ { \
+ p[0] = clib_host_to_net_u##n_bits (s[0]); \
+ s += 1; \
+ p += 1; \
+ n -= 1; \
+ } \
+ } \
+ \
+ void unserialize_vec_##n_bits (serialize_main_t * m, va_list * va) \
+ { \
+ u##n_bits * s = va_arg (*va, u##n_bits *); \
+ u32 n = va_arg (*va, u32); \
+ u##n_bits * p = unserialize_get (m, n * sizeof (s[0])); \
+ \
+ while (n >= 4) \
+ { \
+ s[0] = clib_net_to_host_mem_u##n_bits (&p[0]); \
+ s[1] = clib_net_to_host_mem_u##n_bits (&p[1]); \
+ s[2] = clib_net_to_host_mem_u##n_bits (&p[2]); \
+ s[3] = clib_net_to_host_mem_u##n_bits (&p[3]); \
+ s += 4; \
+ p += 4; \
+ n -= 4; \
+ } \
+ \
+ while (n >= 1) \
+ { \
+ s[0] = clib_net_to_host_mem_u##n_bits (&p[0]); \
+ s += 1; \
+ p += 1; \
+ n -= 1; \
+ } \
+ }
+
+_(16);
+_(32);
+_(64);
+
+#undef _
+
+#define SERIALIZE_VECTOR_CHUNK_SIZE 64
+
+void
+serialize_vector (serialize_main_t * m, va_list * va)
+{
+ void *vec = va_arg (*va, void *);
+ u32 elt_bytes = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ u32 l = vec_len (vec);
+ void *p = vec;
+
+ serialize_integer (m, l, sizeof (l));
+
+ /* Serialize vector in chunks for cache locality. */
+ while (l != 0)
+ {
+ u32 n = clib_min (SERIALIZE_VECTOR_CHUNK_SIZE, l);
+ serialize (m, f, p, n);
+ l -= n;
+ p += SERIALIZE_VECTOR_CHUNK_SIZE * elt_bytes;
+ }
+}
+
+void *
+unserialize_vector_ha (serialize_main_t * m,
+ u32 elt_bytes,
+ u32 header_bytes,
+ u32 align, u32 max_length, serialize_function_t * f)
+{
+ void *v, *p;
+ u32 l;
+
+ unserialize_integer (m, &l, sizeof (l));
+ if (l > max_length)
+ serialize_error (&m->header,
+ clib_error_create ("bad vector length %d", l));
+ p = v = _vec_resize (0, l, (uword) l * elt_bytes, header_bytes,
+ /* align */ align);
+
+ while (l != 0)
+ {
+ u32 n = clib_min (SERIALIZE_VECTOR_CHUNK_SIZE, l);
+ unserialize (m, f, p, n);
+ l -= n;
+ p += SERIALIZE_VECTOR_CHUNK_SIZE * elt_bytes;
+ }
+ return v;
+}
+
+void
+unserialize_aligned_vector (serialize_main_t * m, va_list * va)
+{
+ void **vec = va_arg (*va, void **);
+ u32 elt_bytes = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ u32 align = va_arg (*va, u32);
+
+ *vec = unserialize_vector_ha (m, elt_bytes,
+ /* header_bytes */ 0,
+ /* align */ align,
+ /* max_length */ ~0,
+ f);
+}
+
+void
+unserialize_vector (serialize_main_t * m, va_list * va)
+{
+ void **vec = va_arg (*va, void **);
+ u32 elt_bytes = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+
+ *vec = unserialize_vector_ha (m, elt_bytes,
+ /* header_bytes */ 0,
+ /* align */ 0,
+ /* max_length */ ~0,
+ f);
+}
+
+void
+serialize_bitmap (serialize_main_t * m, uword * b)
+{
+ u32 l, i, n_u32s;
+
+ l = vec_len (b);
+ n_u32s = l * sizeof (b[0]) / sizeof (u32);
+ serialize_integer (m, n_u32s, sizeof (n_u32s));
+
+ /* Send 32 bit words, low-order word first on 64 bit. */
+ for (i = 0; i < l; i++)
+ {
+ serialize_integer (m, b[i], sizeof (u32));
+ if (BITS (uword) == 64)
+ serialize_integer (m, (u64) b[i] >> (u64) 32, sizeof (u32));
+ }
+}
+
+uword *
+unserialize_bitmap (serialize_main_t * m)
+{
+ uword *b = 0;
+ u32 i, n_u32s;
+
+ unserialize_integer (m, &n_u32s, sizeof (n_u32s));
+ if (n_u32s == 0)
+ return b;
+
+ i = (n_u32s * sizeof (u32) + sizeof (b[0]) - 1) / sizeof (b[0]);
+ vec_resize (b, i);
+ for (i = 0; i < n_u32s; i++)
+ {
+ u32 data;
+ unserialize_integer (m, &data, sizeof (u32));
+
+ /* Low-word is first on 64 bit. */
+ if (BITS (uword) == 64)
+ {
+ if ((i % 2) == 0)
+ b[i / 2] |= (u64) data << (u64) 0;
+ else
+ b[i / 2] |= (u64) data << (u64) 32;
+ }
+ else
+ {
+ b[i] = data;
+ }
+ }
+
+ return b;
+}
+
+void
+serialize_pool (serialize_main_t * m, va_list * va)
+{
+ void *pool = va_arg (*va, void *);
+ u32 elt_bytes = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ u32 l, lo, hi;
+ pool_header_t *p;
+
+ l = vec_len (pool);
+ serialize_integer (m, l, sizeof (u32));
+ if (l == 0)
+ return;
+ p = pool_header (pool);
+
+ /* No need to send free bitmap. Need to send index vector
+ to guarantee that unserialized pool will be identical. */
+ vec_serialize (m, p->free_indices, serialize_vec_32);
+
+ pool_foreach_region (lo, hi, pool,
+ serialize (m, f, pool + lo * elt_bytes, hi - lo));
+}
+
+static void *
+unserialize_pool_helper (serialize_main_t * m,
+ u32 elt_bytes, u32 align, serialize_function_t * f)
+{
+ void *v;
+ u32 i, l, lo, hi;
+ pool_header_t *p;
+
+ unserialize_integer (m, &l, sizeof (l));
+ if (l == 0)
+ {
+ return 0;
+ }
+
+ v = _vec_resize (0, l, (uword) l * elt_bytes, sizeof (p[0]), align);
+ p = pool_header (v);
+
+ vec_unserialize (m, &p->free_indices, unserialize_vec_32);
+
+ /* Construct free bitmap. */
+ p->free_bitmap = 0;
+ for (i = 0; i < vec_len (p->free_indices); i++)
+ p->free_bitmap = clib_bitmap_ori (p->free_bitmap, p->free_indices[i]);
+
+ pool_foreach_region (lo, hi, v,
+ unserialize (m, f, v + lo * elt_bytes, hi - lo));
+
+ return v;
+}
+
+void
+unserialize_pool (serialize_main_t * m, va_list * va)
+{
+ void **result = va_arg (*va, void **);
+ u32 elt_bytes = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ *result = unserialize_pool_helper (m, elt_bytes, /* align */ 0, f);
+}
+
+void
+unserialize_aligned_pool (serialize_main_t * m, va_list * va)
+{
+ void **result = va_arg (*va, void **);
+ u32 elt_bytes = va_arg (*va, u32);
+ u32 align = va_arg (*va, u32);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ *result = unserialize_pool_helper (m, elt_bytes, align, f);
+}
+
+static void
+serialize_vec_heap_elt (serialize_main_t * m, va_list * va)
+{
+ heap_elt_t *e = va_arg (*va, heap_elt_t *);
+ u32 i, n = va_arg (*va, u32);
+ for (i = 0; i < n; i++)
+ {
+ serialize_integer (m, e[i].offset, sizeof (e[i].offset));
+ serialize_integer (m, e[i].next, sizeof (e[i].next));
+ serialize_integer (m, e[i].prev, sizeof (e[i].prev));
+ }
+}
+
+static void
+unserialize_vec_heap_elt (serialize_main_t * m, va_list * va)
+{
+ heap_elt_t *e = va_arg (*va, heap_elt_t *);
+ u32 i, n = va_arg (*va, u32);
+ for (i = 0; i < n; i++)
+ {
+ unserialize_integer (m, &e[i].offset, sizeof (e[i].offset));
+ unserialize_integer (m, &e[i].next, sizeof (e[i].next));
+ unserialize_integer (m, &e[i].prev, sizeof (e[i].prev));
+ }
+}
+
+void
+serialize_heap (serialize_main_t * m, va_list * va)
+{
+ void *heap = va_arg (*va, void *);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ u32 i, l;
+ heap_header_t *h;
+
+ l = vec_len (heap);
+ serialize_integer (m, l, sizeof (u32));
+ if (l == 0)
+ return;
+
+ h = heap_header (heap);
+
+#define foreach_serialize_heap_header_integer \
+ _ (head) _ (tail) _ (used_count) _ (max_len) _ (flags) _ (elt_bytes)
+
+#define _(f) serialize_integer (m, h->f, sizeof (h->f));
+ foreach_serialize_heap_header_integer;
+#undef _
+
+ serialize_integer (m, vec_len (h->free_lists), sizeof (u32));
+ for (i = 0; i < vec_len (h->free_lists); i++)
+ vec_serialize (m, h->free_lists[i], serialize_vec_32);
+
+ vec_serialize (m, h->elts, serialize_vec_heap_elt);
+ vec_serialize (m, h->small_free_elt_free_index, serialize_vec_32);
+ vec_serialize (m, h->free_elts, serialize_vec_32);
+
+ /* Serialize data in heap. */
+ {
+ heap_elt_t *e, *end;
+ e = h->elts + h->head;
+ end = h->elts + h->tail;
+ while (1)
+ {
+ if (!heap_is_free (e))
+ {
+ void *v = heap + heap_offset (e) * h->elt_bytes;
+ u32 n = heap_elt_size (heap, e);
+ serialize (m, f, v, n);
+ }
+ if (e == end)
+ break;
+ e = heap_next (e);
+ }
+ }
+}
+
+void
+unserialize_heap (serialize_main_t * m, va_list * va)
+{
+ void **result = va_arg (*va, void **);
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ u32 i, vl, fl;
+ heap_header_t h;
+ void *heap;
+
+ unserialize_integer (m, &vl, sizeof (u32));
+ if (vl == 0)
+ {
+ *result = 0;
+ return;
+ }
+
+ memset (&h, 0, sizeof (h));
+#define _(f) unserialize_integer (m, &h.f, sizeof (h.f));
+ foreach_serialize_heap_header_integer;
+#undef _
+
+ unserialize_integer (m, &fl, sizeof (u32));
+ vec_resize (h.free_lists, fl);
+
+ for (i = 0; i < vec_len (h.free_lists); i++)
+ vec_unserialize (m, &h.free_lists[i], unserialize_vec_32);
+
+ vec_unserialize (m, &h.elts, unserialize_vec_heap_elt);
+ vec_unserialize (m, &h.small_free_elt_free_index, unserialize_vec_32);
+ vec_unserialize (m, &h.free_elts, unserialize_vec_32);
+
+ /* Re-construct used elt bitmap. */
+ if (CLIB_DEBUG > 0)
+ {
+ heap_elt_t *e;
+ vec_foreach (e, h.elts)
+ {
+ if (!heap_is_free (e))
+ h.used_elt_bitmap = clib_bitmap_ori (h.used_elt_bitmap, e - h.elts);
+ }
+ }
+
+ heap = *result = _heap_new (vl, h.elt_bytes);
+ heap_header (heap)[0] = h;
+
+ /* Unserialize data in heap. */
+ {
+ heap_elt_t *e, *end;
+ e = h.elts + h.head;
+ end = h.elts + h.tail;
+ while (1)
+ {
+ if (!heap_is_free (e))
+ {
+ void *v = heap + heap_offset (e) * h.elt_bytes;
+ u32 n = heap_elt_size (heap, e);
+ unserialize (m, f, v, n);
+ }
+ if (e == end)
+ break;
+ e = heap_next (e);
+ }
+ }
+}
+
+void
+serialize_magic (serialize_main_t * m, void *magic, u32 magic_bytes)
+{
+ void *p;
+ serialize_integer (m, magic_bytes, sizeof (magic_bytes));
+ p = serialize_get (m, magic_bytes);
+ clib_memcpy (p, magic, magic_bytes);
+}
+
+void
+unserialize_check_magic (serialize_main_t * m, void *magic, u32 magic_bytes)
+{
+ u32 l;
+ void *d;
+
+ unserialize_integer (m, &l, sizeof (l));
+ if (l != magic_bytes)
+ {
+ bad:
+ serialize_error_return (m, "bad magic number");
+ }
+ d = serialize_get (m, magic_bytes);
+ if (memcmp (magic, d, magic_bytes))
+ goto bad;
+}
+
+clib_error_t *
+va_serialize (serialize_main_t * sm, va_list * va)
+{
+ serialize_main_header_t *m = &sm->header;
+ serialize_function_t *f = va_arg (*va, serialize_function_t *);
+ clib_error_t *error = 0;
+
+ m->recursion_level += 1;
+ if (m->recursion_level == 1)
+ {
+ uword r = clib_setjmp (&m->error_longjmp, 0);
+ error = uword_to_pointer (r, clib_error_t *);
+ }
+
+ if (!error)
+ f (sm, va);
+
+ m->recursion_level -= 1;
+ return error;
+}
+
+clib_error_t *
+serialize (serialize_main_t * m, ...)
+{
+ clib_error_t *error;
+ va_list va;
+
+ va_start (va, m);
+ error = va_serialize (m, &va);
+ va_end (va);
+ return error;
+}
+
+clib_error_t *
+unserialize (serialize_main_t * m, ...)
+{
+ clib_error_t *error;
+ va_list va;
+
+ va_start (va, m);
+ error = va_serialize (m, &va);
+ va_end (va);
+ return error;
+}
+
+static void *
+serialize_write_not_inline (serialize_main_header_t * m,
+ serialize_stream_t * s,
+ uword n_bytes_to_write, uword flags)
+{
+ uword cur_bi, n_left_b, n_left_o;
+
+ ASSERT (s->current_buffer_index <= s->n_buffer_bytes);
+ cur_bi = s->current_buffer_index;
+ n_left_b = s->n_buffer_bytes - cur_bi;
+ n_left_o = vec_len (s->overflow_buffer);
+
+ /* Prepend overflow buffer if present. */
+ do
+ {
+ if (n_left_o > 0 && n_left_b > 0)
+ {
+ uword n = clib_min (n_left_b, n_left_o);
+ clib_memcpy (s->buffer + cur_bi, s->overflow_buffer, n);
+ cur_bi += n;
+ n_left_b -= n;
+ n_left_o -= n;
+ if (n_left_o == 0)
+ _vec_len (s->overflow_buffer) = 0;
+ else
+ vec_delete (s->overflow_buffer, n, 0);
+ }
+
+ /* Call data function when buffer is complete. Data function should
+ dispatch with current buffer and give us a new one to write more
+ data into. */
+ if (n_left_b == 0)
+ {
+ s->current_buffer_index = cur_bi;
+ m->data_function (m, s);
+ cur_bi = s->current_buffer_index;
+ n_left_b = s->n_buffer_bytes - cur_bi;
+ }
+ }
+ while (n_left_o > 0);
+
+ if (n_left_o > 0 || n_left_b < n_bytes_to_write)
+ {
+ u8 *r;
+ vec_add2 (s->overflow_buffer, r, n_bytes_to_write);
+ return r;
+ }
+ else
+ {
+ s->current_buffer_index = cur_bi + n_bytes_to_write;
+ return s->buffer + cur_bi;
+ }
+}
+
+static void *
+serialize_read_not_inline (serialize_main_header_t * m,
+ serialize_stream_t * s,
+ uword n_bytes_to_read, uword flags)
+{
+ uword cur_bi, cur_oi, n_left_b, n_left_o, n_left_to_read;
+
+ ASSERT (s->current_buffer_index <= s->n_buffer_bytes);
+
+ cur_bi = s->current_buffer_index;
+ cur_oi = s->current_overflow_index;
+
+ n_left_b = s->n_buffer_bytes - cur_bi;
+ n_left_o = vec_len (s->overflow_buffer) - cur_oi;
+
+ /* Read from overflow? */
+ if (n_left_o >= n_bytes_to_read)
+ {
+ s->current_overflow_index = cur_oi + n_bytes_to_read;
+ return vec_elt_at_index (s->overflow_buffer, cur_oi);
+ }
+
+ /* Reset overflow buffer. */
+ if (n_left_o == 0 && s->overflow_buffer)
+ {
+ s->current_overflow_index = 0;
+ _vec_len (s->overflow_buffer) = 0;
+ }
+
+ n_left_to_read = n_bytes_to_read;
+ while (n_left_to_read > 0)
+ {
+ uword n;
+
+ /* If we don't have enough data between overflow and normal buffer
+ call read function. */
+ if (n_left_o + n_left_b < n_bytes_to_read)
+ {
+ /* Save any left over buffer in overflow vector. */
+ if (n_left_b > 0)
+ {
+ vec_add (s->overflow_buffer, s->buffer + cur_bi, n_left_b);
+ n_left_o += n_left_b;
+ n_left_to_read -= n_left_b;
+ /* Advance buffer to end --- even if
+ SERIALIZE_FLAG_NO_ADVANCE_CURRENT_BUFFER_INDEX is set. */
+ cur_bi = s->n_buffer_bytes;
+ n_left_b = 0;
+ }
+
+ if (m->data_function)
+ {
+ m->data_function (m, s);
+ cur_bi = s->current_buffer_index;
+ n_left_b = s->n_buffer_bytes - cur_bi;
+ }
+ }
+
+ /* For first time through loop return if we have enough data
+ in normal buffer and overflow vector is empty. */
+ if (n_left_o == 0
+ && n_left_to_read == n_bytes_to_read && n_left_b >= n_left_to_read)
+ {
+ s->current_buffer_index = cur_bi + n_bytes_to_read;
+ return s->buffer + cur_bi;
+ }
+
+ if (!m->data_function || serialize_stream_is_end_of_stream (s))
+ {
+ /* This can happen for a peek at end of file.
+ Pad overflow buffer with 0s. */
+ vec_resize (s->overflow_buffer, n_left_to_read);
+ n_left_o += n_left_to_read;
+ n_left_to_read = 0;
+ }
+ else
+ {
+ /* Copy from buffer to overflow vector. */
+ n = clib_min (n_left_to_read, n_left_b);
+ vec_add (s->overflow_buffer, s->buffer + cur_bi, n);
+ cur_bi += n;
+ n_left_b -= n;
+ n_left_o += n;
+ n_left_to_read -= n;
+ }
+ }
+
+ s->current_buffer_index = cur_bi;
+ s->current_overflow_index = cur_oi + n_bytes_to_read;
+ return vec_elt_at_index (s->overflow_buffer, cur_oi);
+}
+
+void *
+serialize_read_write_not_inline (serialize_main_header_t * m,
+ serialize_stream_t * s,
+ uword n_bytes, uword flags)
+{
+ return (((flags & SERIALIZE_FLAG_IS_READ) ? serialize_read_not_inline :
+ serialize_write_not_inline) (m, s, n_bytes, flags));
+}
+
+static void
+serialize_read_write_close (serialize_main_header_t * m,
+ serialize_stream_t * s, uword flags)
+{
+ if (serialize_stream_is_end_of_stream (s))
+ return;
+
+ if (flags & SERIALIZE_FLAG_IS_WRITE)
+ /* "Write" 0 bytes to flush overflow vector. */
+ serialize_write_not_inline (m, s, /* n bytes */ 0, flags);
+
+ serialize_stream_set_end_of_stream (s);
+
+ /* Call it one last time to flush buffer and close. */
+ m->data_function (m, s);
+
+ vec_free (s->overflow_buffer);
+}
+
+void
+serialize_close (serialize_main_t * m)
+{
+ serialize_read_write_close (&m->header, &m->stream,
+ SERIALIZE_FLAG_IS_WRITE);
+}
+
+void
+unserialize_close (serialize_main_t * m)
+{
+ serialize_read_write_close (&m->header, &m->stream, SERIALIZE_FLAG_IS_READ);
+}
+
+void
+serialize_open_data (serialize_main_t * m, u8 * data, uword n_data_bytes)
+{
+ memset (m, 0, sizeof (m[0]));
+ m->stream.buffer = data;
+ m->stream.n_buffer_bytes = n_data_bytes;
+}
+
+void
+unserialize_open_data (serialize_main_t * m, u8 * data, uword n_data_bytes)
+{
+ serialize_open_data (m, data, n_data_bytes);
+}
+
+static void
+serialize_vector_write (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ if (!serialize_stream_is_end_of_stream (s))
+ {
+ /* Double buffer size. */
+ uword l = vec_len (s->buffer);
+ vec_resize (s->buffer, l > 0 ? l : 64);
+ s->n_buffer_bytes = vec_len (s->buffer);
+ }
+}
+
+void
+serialize_open_vector (serialize_main_t * m, u8 * vector)
+{
+ memset (m, 0, sizeof (m[0]));
+ m->header.data_function = serialize_vector_write;
+ m->stream.buffer = vector;
+ m->stream.current_buffer_index = 0;
+ m->stream.n_buffer_bytes = vec_len (vector);
+}
+
+void *
+serialize_close_vector (serialize_main_t * m)
+{
+ serialize_stream_t *s = &m->stream;
+ void *result;
+
+ serialize_close (m); /* frees overflow buffer */
+
+ if (s->buffer)
+ _vec_len (s->buffer) = s->current_buffer_index;
+ result = s->buffer;
+ memset (m, 0, sizeof (m[0]));
+ return result;
+}
+
+void
+serialize_multiple_1 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ u8 *d = data;
+ u8 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = serialize_get (m, 4 * sizeof (d[0]));
+ p[0] = d[0 * data_stride];
+ p[1] = d[1 * data_stride];
+ p[2] = d[2 * data_stride];
+ p[3] = d[3 * data_stride];
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = serialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ p[0] = d[0];
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+void
+serialize_multiple_2 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ void *d = data;
+ u16 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = serialize_get (m, 4 * sizeof (p[0]));
+ clib_mem_unaligned (p + 0, u16) =
+ clib_host_to_net_mem_u16 (d + 0 * data_stride);
+ clib_mem_unaligned (p + 1, u16) =
+ clib_host_to_net_mem_u16 (d + 1 * data_stride);
+ clib_mem_unaligned (p + 2, u16) =
+ clib_host_to_net_mem_u16 (d + 2 * data_stride);
+ clib_mem_unaligned (p + 3, u16) =
+ clib_host_to_net_mem_u16 (d + 3 * data_stride);
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = serialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ clib_mem_unaligned (p + 0, u16) =
+ clib_host_to_net_mem_u16 (d + 0 * data_stride);
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+void
+serialize_multiple_4 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ void *d = data;
+ u32 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = serialize_get (m, 4 * sizeof (p[0]));
+ clib_mem_unaligned (p + 0, u32) =
+ clib_host_to_net_mem_u32 (d + 0 * data_stride);
+ clib_mem_unaligned (p + 1, u32) =
+ clib_host_to_net_mem_u32 (d + 1 * data_stride);
+ clib_mem_unaligned (p + 2, u32) =
+ clib_host_to_net_mem_u32 (d + 2 * data_stride);
+ clib_mem_unaligned (p + 3, u32) =
+ clib_host_to_net_mem_u32 (d + 3 * data_stride);
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = serialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ clib_mem_unaligned (p + 0, u32) =
+ clib_host_to_net_mem_u32 (d + 0 * data_stride);
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+void
+unserialize_multiple_1 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ u8 *d = data;
+ u8 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = unserialize_get (m, 4 * sizeof (d[0]));
+ d[0 * data_stride] = p[0];
+ d[1 * data_stride] = p[1];
+ d[2 * data_stride] = p[2];
+ d[3 * data_stride] = p[3];
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = unserialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ d[0] = p[0];
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+void
+unserialize_multiple_2 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ void *d = data;
+ u16 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = unserialize_get (m, 4 * sizeof (p[0]));
+ clib_mem_unaligned (d + 0 * data_stride, u16) =
+ clib_net_to_host_mem_u16 (p + 0);
+ clib_mem_unaligned (d + 1 * data_stride, u16) =
+ clib_net_to_host_mem_u16 (p + 1);
+ clib_mem_unaligned (d + 2 * data_stride, u16) =
+ clib_net_to_host_mem_u16 (p + 2);
+ clib_mem_unaligned (d + 3 * data_stride, u16) =
+ clib_net_to_host_mem_u16 (p + 3);
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = unserialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ clib_mem_unaligned (d + 0 * data_stride, u16) =
+ clib_net_to_host_mem_u16 (p + 0);
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+void
+unserialize_multiple_4 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data)
+{
+ void *d = data;
+ u32 *p;
+ uword n_left = n_data;
+
+ while (n_left >= 4)
+ {
+ p = unserialize_get (m, 4 * sizeof (p[0]));
+ clib_mem_unaligned (d + 0 * data_stride, u32) =
+ clib_net_to_host_mem_u32 (p + 0);
+ clib_mem_unaligned (d + 1 * data_stride, u32) =
+ clib_net_to_host_mem_u32 (p + 1);
+ clib_mem_unaligned (d + 2 * data_stride, u32) =
+ clib_net_to_host_mem_u32 (p + 2);
+ clib_mem_unaligned (d + 3 * data_stride, u32) =
+ clib_net_to_host_mem_u32 (p + 3);
+ n_left -= 4;
+ d += 4 * data_stride;
+ }
+
+ if (n_left > 0)
+ {
+ p = unserialize_get (m, n_left * sizeof (p[0]));
+ while (n_left > 0)
+ {
+ clib_mem_unaligned (d + 0 * data_stride, u32) =
+ clib_net_to_host_mem_u32 (p + 0);
+ p += 1;
+ d += 1 * data_stride;
+ n_left -= 1;
+ }
+ }
+}
+
+#ifdef CLIB_UNIX
+
+#include <unistd.h>
+#include <fcntl.h>
+
+static void
+unix_file_write (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ int fd, n;
+
+ fd = s->data_function_opaque;
+ n = write (fd, s->buffer, s->current_buffer_index);
+ if (n < 0)
+ {
+ if (!unix_error_is_fatal (errno))
+ n = 0;
+ else
+ serialize_error (m, clib_error_return_unix (0, "write"));
+ }
+ if (n == s->current_buffer_index)
+ _vec_len (s->buffer) = 0;
+ else
+ vec_delete (s->buffer, n, 0);
+ s->current_buffer_index = vec_len (s->buffer);
+}
+
+static void
+unix_file_read (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ int fd, n;
+
+ fd = s->data_function_opaque;
+ n = read (fd, s->buffer, vec_len (s->buffer));
+ if (n < 0)
+ {
+ if (!unix_error_is_fatal (errno))
+ n = 0;
+ else
+ serialize_error (m, clib_error_return_unix (0, "read"));
+ }
+ else if (n == 0)
+ serialize_stream_set_end_of_stream (s);
+ s->current_buffer_index = 0;
+ s->n_buffer_bytes = n;
+}
+
+static void
+serialize_open_unix_file_descriptor_helper (serialize_main_t * m, int fd,
+ uword is_read)
+{
+ memset (m, 0, sizeof (m[0]));
+ vec_resize (m->stream.buffer, 4096);
+
+ if (!is_read)
+ {
+ m->stream.n_buffer_bytes = vec_len (m->stream.buffer);
+ _vec_len (m->stream.buffer) = 0;
+ }
+
+ m->header.data_function = is_read ? unix_file_read : unix_file_write;
+ m->stream.data_function_opaque = fd;
+}
+
+void
+serialize_open_unix_file_descriptor (serialize_main_t * m, int fd)
+{
+ serialize_open_unix_file_descriptor_helper (m, fd, /* is_read */ 0);
+}
+
+void
+unserialize_open_unix_file_descriptor (serialize_main_t * m, int fd)
+{
+ serialize_open_unix_file_descriptor_helper (m, fd, /* is_read */ 1);
+}
+
+static clib_error_t *
+serialize_open_unix_file_helper (serialize_main_t * m, char *file,
+ uword is_read)
+{
+ int fd, mode;
+
+ mode = is_read ? O_RDONLY : O_RDWR | O_CREAT | O_TRUNC;
+ fd = open (file, mode, 0666);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file);
+
+ serialize_open_unix_file_descriptor_helper (m, fd, is_read);
+ return 0;
+}
+
+clib_error_t *
+serialize_open_unix_file (serialize_main_t * m, char *file)
+{
+ return serialize_open_unix_file_helper (m, file, /* is_read */ 0);
+}
+
+clib_error_t *
+unserialize_open_unix_file (serialize_main_t * m, char *file)
+{
+ return serialize_open_unix_file_helper (m, file, /* is_read */ 1);
+}
+
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/serialize.h b/src/vppinfra/serialize.h
new file mode 100644
index 00000000..6cc2372e
--- /dev/null
+++ b/src/vppinfra/serialize.h
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_serialize_h
+#define included_clib_serialize_h
+
+#include <stdarg.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/types.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/longjmp.h>
+
+struct serialize_main_header_t;
+struct serialize_stream_t;
+
+typedef void (serialize_data_function_t) (struct serialize_main_header_t * h,
+ struct serialize_stream_t * s);
+
+typedef struct serialize_stream_t
+{
+ /* Current data buffer being serialized/unserialized. */
+ u8 *buffer;
+
+ /* Size of buffer in bytes. */
+ u32 n_buffer_bytes;
+
+ /* Current index into buffer. */
+ u32 current_buffer_index;
+
+ /* Overflow buffer for when there is not enough room at the end of
+ buffer to hold serialized/unserialized data. */
+ u8 *overflow_buffer;
+
+ /* Current index in overflow buffer for reads. */
+ u32 current_overflow_index;
+
+ u32 flags;
+#define SERIALIZE_END_OF_STREAM (1 << 0)
+
+ uword data_function_opaque;
+
+ u32 opaque[64 - 4 * sizeof (u32) - 1 * sizeof (uword) -
+ 2 * sizeof (void *)];
+} serialize_stream_t;
+
+always_inline void
+serialize_stream_set_end_of_stream (serialize_stream_t * s)
+{
+ s->flags |= SERIALIZE_END_OF_STREAM;
+}
+
+always_inline uword
+serialize_stream_is_end_of_stream (serialize_stream_t * s)
+{
+ return (s->flags & SERIALIZE_END_OF_STREAM) != 0;
+}
+
+typedef struct serialize_main_header_t
+{
+ u32 recursion_level;
+
+ /* Data callback function and opaque data. */
+ serialize_data_function_t *data_function;
+
+ /* Error if signaled by data function. */
+ clib_error_t *error;
+
+ /* Exit unwind point if error occurs. */
+ clib_longjmp_t error_longjmp;
+} serialize_main_header_t;
+
+always_inline void
+serialize_error (serialize_main_header_t * m, clib_error_t * error)
+{
+ clib_longjmp (&m->error_longjmp, pointer_to_uword (error));
+}
+
+#define serialize_error_return(m,args...) \
+ serialize_error (&(m)->header, clib_error_return (0, args))
+
+void *serialize_read_write_not_inline (serialize_main_header_t * m,
+ serialize_stream_t * s,
+ uword n_bytes, uword flags);
+
+#define SERIALIZE_FLAG_IS_READ (1 << 0)
+#define SERIALIZE_FLAG_IS_WRITE (1 << 1)
+
+always_inline void *
+serialize_stream_read_write (serialize_main_header_t * header,
+ serialize_stream_t * s,
+ uword n_bytes, uword flags)
+{
+ uword i, j, l;
+
+ l = vec_len (s->overflow_buffer);
+ i = s->current_buffer_index;
+ j = i + n_bytes;
+ s->current_buffer_index = j;
+ if (l == 0 && j <= s->n_buffer_bytes)
+ {
+ return s->buffer + i;
+ }
+ else
+ {
+ s->current_buffer_index = i;
+ return serialize_read_write_not_inline (header, s, n_bytes, flags);
+ }
+}
+
+typedef struct
+{
+ serialize_main_header_t header;
+ serialize_stream_t stream;
+} serialize_main_t;
+
+always_inline void
+serialize_set_end_of_stream (serialize_main_t * m)
+{
+ serialize_stream_set_end_of_stream (&m->stream);
+}
+
+always_inline uword
+serialize_is_end_of_stream (serialize_main_t * m)
+{
+ return serialize_stream_is_end_of_stream (&m->stream);
+}
+
+typedef struct
+{
+ serialize_main_header_t header;
+ serialize_stream_t *streams;
+} serialize_multiple_main_t;
+
+typedef void (serialize_function_t) (serialize_main_t * m, va_list * va);
+
+always_inline void *
+unserialize_get (serialize_main_t * m, uword n_bytes)
+{
+ return serialize_stream_read_write (&m->header, &m->stream, n_bytes,
+ SERIALIZE_FLAG_IS_READ);
+}
+
+always_inline void *
+serialize_get (serialize_main_t * m, uword n_bytes)
+{
+ return serialize_stream_read_write (&m->header, &m->stream, n_bytes,
+ SERIALIZE_FLAG_IS_WRITE);
+}
+
+always_inline void
+serialize_integer (serialize_main_t * m, u64 x, u32 n_bytes)
+{
+ u8 *p = serialize_get (m, n_bytes);
+ if (n_bytes == 1)
+ p[0] = x;
+ else if (n_bytes == 2)
+ clib_mem_unaligned (p, u16) = clib_host_to_net_u16 (x);
+ else if (n_bytes == 4)
+ clib_mem_unaligned (p, u32) = clib_host_to_net_u32 (x);
+ else if (n_bytes == 8)
+ clib_mem_unaligned (p, u64) = clib_host_to_net_u64 (x);
+ else
+ ASSERT (0);
+}
+
+always_inline void
+unserialize_integer (serialize_main_t * m, void *x, u32 n_bytes)
+{
+ u8 *p = unserialize_get (m, n_bytes);
+ if (n_bytes == 1)
+ *(u8 *) x = p[0];
+ else if (n_bytes == 2)
+ *(u16 *) x = clib_net_to_host_unaligned_mem_u16 ((u16 *) p);
+ else if (n_bytes == 4)
+ *(u32 *) x = clib_net_to_host_unaligned_mem_u32 ((u32 *) p);
+ else if (n_bytes == 8)
+ *(u64 *) x = clib_net_to_host_unaligned_mem_u64 ((u64 *) p);
+ else
+ ASSERT (0);
+}
+
+/* As above but tries to be more compact. */
+always_inline void
+serialize_likely_small_unsigned_integer (serialize_main_t * m, u64 x)
+{
+ u64 r = x;
+ u8 *p;
+
+ /* Low bit set means it fits into 1 byte. */
+ if (r < (1 << 7))
+ {
+ p = serialize_get (m, 1);
+ p[0] = 1 + 2 * r;
+ return;
+ }
+
+ /* Low 2 bits 1 0 means it fits into 2 bytes. */
+ r -= (1 << 7);
+ if (r < (1 << 14))
+ {
+ p = serialize_get (m, 2);
+ clib_mem_unaligned (p, u16) = clib_host_to_little_u16 (4 * r + 2);
+ return;
+ }
+
+ r -= (1 << 14);
+ if (r < (1 << 29))
+ {
+ p = serialize_get (m, 4);
+ clib_mem_unaligned (p, u32) = clib_host_to_little_u32 (8 * r + 4);
+ return;
+ }
+
+ p = serialize_get (m, 9);
+ p[0] = 0; /* Only low 3 bits are used. */
+ clib_mem_unaligned (p + 1, u64) = clib_host_to_little_u64 (x);
+}
+
+always_inline u64
+unserialize_likely_small_unsigned_integer (serialize_main_t * m)
+{
+ u8 *p = unserialize_get (m, 1);
+ u64 r;
+ u32 y = p[0];
+
+ if (y & 1)
+ return y / 2;
+
+ r = 1 << 7;
+ if (y & 2)
+ {
+ p = unserialize_get (m, 1);
+ r += (y / 4) + (p[0] << 6);
+ return r;
+ }
+
+ r += 1 << 14;
+ if (y & 4)
+ {
+ p = unserialize_get (m, 3);
+ r += ((y / 8)
+ + (p[0] << (5 + 8 * 0))
+ + (p[1] << (5 + 8 * 1)) + (p[2] << (5 + 8 * 2)));
+ return r;
+ }
+
+ p = unserialize_get (m, 8);
+ r = clib_mem_unaligned (p, u64);
+ r = clib_little_to_host_u64 (r);
+
+ return r;
+}
+
+always_inline void
+serialize_likely_small_signed_integer (serialize_main_t * m, i64 s)
+{
+ u64 u = s < 0 ? -(2 * s + 1) : 2 * s;
+ serialize_likely_small_unsigned_integer (m, u);
+}
+
+always_inline i64
+unserialize_likely_small_signed_integer (serialize_main_t * m)
+{
+ u64 u = unserialize_likely_small_unsigned_integer (m);
+ i64 s = u / 2;
+ return (u & 1) ? -s : s;
+}
+
+void
+serialize_multiple_1 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+void
+serialize_multiple_2 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+void
+serialize_multiple_4 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+
+void
+unserialize_multiple_1 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+void
+unserialize_multiple_2 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+void
+unserialize_multiple_4 (serialize_main_t * m,
+ void *data, uword data_stride, uword n_data);
+
+always_inline void
+serialize_multiple (serialize_main_t * m,
+ void *data,
+ uword n_data_bytes, uword data_stride, uword n_data)
+{
+ if (n_data_bytes == 1)
+ serialize_multiple_1 (m, data, data_stride, n_data);
+ else if (n_data_bytes == 2)
+ serialize_multiple_2 (m, data, data_stride, n_data);
+ else if (n_data_bytes == 4)
+ serialize_multiple_4 (m, data, data_stride, n_data);
+ else
+ ASSERT (0);
+}
+
+always_inline void
+unserialize_multiple (serialize_main_t * m,
+ void *data,
+ uword n_data_bytes, uword data_stride, uword n_data)
+{
+ if (n_data_bytes == 1)
+ unserialize_multiple_1 (m, data, data_stride, n_data);
+ else if (n_data_bytes == 2)
+ unserialize_multiple_2 (m, data, data_stride, n_data);
+ else if (n_data_bytes == 4)
+ unserialize_multiple_4 (m, data, data_stride, n_data);
+ else
+ ASSERT (0);
+}
+
+/* Basic types. */
+serialize_function_t serialize_64, unserialize_64;
+serialize_function_t serialize_32, unserialize_32;
+serialize_function_t serialize_16, unserialize_16;
+serialize_function_t serialize_8, unserialize_8;
+serialize_function_t serialize_f64, unserialize_f64;
+serialize_function_t serialize_f32, unserialize_f32;
+
+/* Basic vector types. */
+serialize_function_t serialize_vec_8, unserialize_vec_8;
+serialize_function_t serialize_vec_16, unserialize_vec_16;
+serialize_function_t serialize_vec_32, unserialize_vec_32;
+serialize_function_t serialize_vec_64, unserialize_vec_64;
+
+/* Serialize generic vectors. */
+serialize_function_t serialize_vector, unserialize_vector,
+ unserialize_aligned_vector;
+
+#define vec_serialize(m,v,f) \
+ serialize ((m), serialize_vector, (v), sizeof ((v)[0]), (f))
+
+#define vec_unserialize(m,v,f) \
+ unserialize ((m), unserialize_vector, (v), sizeof ((*(v))[0]), (f))
+
+#define vec_unserialize_aligned(m,v,f) \
+ unserialize ((m), unserialize_aligned_vector, (v), sizeof ((*(v))[0]), (f))
+
+/* Serialize pools. */
+serialize_function_t serialize_pool, unserialize_pool,
+ unserialize_aligned_pool;
+
+#define pool_serialize(m,v,f) \
+ serialize ((m), serialize_pool, (v), sizeof ((v)[0]), (f))
+
+#define pool_unserialize(m,v,f) \
+ unserialize ((m), unserialize_pool, (v), sizeof ((*(v))[0]), (f))
+
+#define pool_unserialize_aligned(m,v,a,f) \
+ unserialize ((m), unserialize_aligned_pool, (v), sizeof ((*(v))[0]), (a), (f))
+
+/* Serialize heaps. */
+serialize_function_t serialize_heap, unserialize_heap;
+
+void serialize_bitmap (serialize_main_t * m, uword * b);
+uword *unserialize_bitmap (serialize_main_t * m);
+
+void serialize_cstring (serialize_main_t * m, char *string);
+void unserialize_cstring (serialize_main_t * m, char **string);
+
+void serialize_close (serialize_main_t * m);
+void unserialize_close (serialize_main_t * m);
+
+void serialize_open_data (serialize_main_t * m, u8 * data,
+ uword n_data_bytes);
+void unserialize_open_data (serialize_main_t * m, u8 * data,
+ uword n_data_bytes);
+
+/* Starts serialization with expanding vector as buffer. */
+void serialize_open_vector (serialize_main_t * m, u8 * vector);
+
+/* Serialization is done: returns vector buffer to caller. */
+void *serialize_close_vector (serialize_main_t * m);
+
+void unserialize_open_vector (serialize_main_t * m, u8 * vector);
+
+#ifdef CLIB_UNIX
+clib_error_t *serialize_open_unix_file (serialize_main_t * m, char *file);
+clib_error_t *unserialize_open_unix_file (serialize_main_t * m, char *file);
+
+void serialize_open_unix_file_descriptor (serialize_main_t * m, int fd);
+void unserialize_open_unix_file_descriptor (serialize_main_t * m, int fd);
+#endif /* CLIB_UNIX */
+
+/* Main routines. */
+clib_error_t *serialize (serialize_main_t * m, ...);
+clib_error_t *unserialize (serialize_main_t * m, ...);
+clib_error_t *va_serialize (serialize_main_t * m, va_list * va);
+
+void serialize_magic (serialize_main_t * m, void *magic, u32 magic_bytes);
+void unserialize_check_magic (serialize_main_t * m, void *magic,
+ u32 magic_bytes);
+
+#endif /* included_clib_serialize_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/slist.c b/src/vppinfra/slist.c
new file mode 100644
index 00000000..892517bb
--- /dev/null
+++ b/src/vppinfra/slist.c
@@ -0,0 +1,336 @@
+/*
+ Copyright (c) 2012 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/slist.h>
+
+/*
+ * skip-list implementation
+ *
+ * Good news / bad news. As balanced binary tree schemes go,
+ * this one seems pretty fast and is reasonably simple. There's a very
+ * limited amount that can be done to mitigate sdram read latency.
+ *
+ * Each active clib_slist_elt_t is on from 1 to N lists. Each active element
+ * is always on the "level-0" list. Since most elements are *only* on
+ * level 0, we keep the level 0 (and level 1) in the element. For those
+ * elements on more than two lists, we switch to a vector. Hence, the
+ * "n" union in slib_slist_elt_t.
+ *
+ * The low-order bit of elt->n.next0[0] is 1 for inlined next indices,
+ * 0 for vector indices (since the allocator always aligns to at least
+ * a 4-byte boundary). We can only represent 2e9 items, but since the
+ * practical performance limit is O(1e7), it doesn't matter.
+ *
+ * We create a "head" element which (by construction) is always
+ * lexically lighter than any other element. This makes a large number
+ * of irritating special cases go away.
+ *
+ * User code is in charge of comparing a supplied key with
+ * the key component of a user pool element. The user tells this code
+ * to add or delete (opaque key, 32-bit integer) pairs to the skip-list.
+ *
+ * The algorithm adds new elements to one or more lists.
+ * For levels greater than zero, the probability of a new element landing on
+ * a list is branching_factor**N. Branching_factor = 0.2 seems to work
+ * OK, yielding about 50 compares per search at O(1e7) items.
+ */
+
+clib_error_t *
+clib_slist_init (clib_slist_t * sp, f64 branching_factor,
+ clib_slist_key_compare_function_t compare,
+ format_function_t format_user_element)
+{
+ clib_slist_elt_t *head;
+ memset (sp, 0, sizeof (sp[0]));
+ sp->branching_factor = branching_factor;
+ sp->format_user_element = format_user_element;
+ sp->compare = compare;
+ sp->seed = 0xdeaddabe;
+ pool_get (sp->elts, head);
+ vec_add1 (head->n.nexts, (u32) ~ 0);
+ head->user_pool_index = (u32) ~ 0;
+ vec_validate (sp->path, 1);
+ vec_validate (sp->occupancy, 0);
+
+ return 0;
+}
+
+/*
+ * slist_search_internal
+ */
+static inline clib_slist_search_result_t
+slist_search_internal (clib_slist_t * sp, void *key, int need_full_path)
+{
+ int level, comp_result;
+ clib_slist_elt_t *search_elt, *head_elt;
+
+ sp->ncompares = 0;
+ /*
+ * index 0 is the magic listhead element which is
+ * lexically lighter than / to the left of every element
+ */
+ search_elt = head_elt = pool_elt_at_index (sp->elts, 0);
+
+ /*
+ * Initial negotiating position, only the head_elt is
+ * lighter than the supplied key
+ */
+ memset (sp->path, 0, vec_len (head_elt->n.nexts) * sizeof (u32));
+
+ /* Walk the fastest lane first */
+ level = vec_len (head_elt->n.nexts) - 1;
+ _vec_len (sp->path) = level + 1;
+
+ while (1)
+ {
+ u32 next_index_this_level;
+ clib_slist_elt_t *prefetch_elt;
+
+ /*
+ * Prefetching the next element at this level makes a measurable
+ * difference, but doesn't fix the dependent read stall problem
+ */
+ prefetch_elt = sp->elts +
+ clib_slist_get_next_at_level (search_elt, level);
+
+ CLIB_PREFETCH (prefetch_elt, CLIB_CACHE_LINE_BYTES, READ);
+
+ /* Compare the key with the current element */
+ comp_result = (search_elt == head_elt) ? 1 :
+ sp->compare (key, search_elt->user_pool_index);
+
+ sp->ncompares++;
+ /* key "lighter" than this element */
+ if (comp_result < 0)
+ {
+ /*
+ * Back up to previous item on this list
+ * and search the next finer-grained list
+ * starting there.
+ */
+ search_elt = pool_elt_at_index (sp->elts, sp->path[level]);
+ next_list:
+ if (level > 0)
+ {
+ level--;
+ continue;
+ }
+ else
+ {
+ return CLIB_SLIST_NO_MATCH;
+ }
+ }
+ /* Match */
+ if (comp_result == 0)
+ {
+ /*
+ * If we're trying to delete an element, we need to
+ * track down all of the elements which point at it.
+ * Otherwise, don't bother with it
+ */
+ if (need_full_path && level > 0)
+ {
+ search_elt = pool_elt_at_index (sp->elts, sp->path[level]);
+ level--;
+ continue;
+ }
+ level = vec_len (head_elt->n.nexts);
+ sp->path[level] = search_elt - sp->elts;
+ _vec_len (sp->path) = level + 1;
+ return CLIB_SLIST_MATCH;
+ }
+ /*
+ * comp_result positive, key is to the right of
+ * this element
+ */
+ sp->path[level] = search_elt - sp->elts;
+
+ /* Out of list at this level? */
+ next_index_this_level =
+ clib_slist_get_next_at_level (search_elt, level);
+ if (next_index_this_level == (u32) ~ 0)
+ goto next_list;
+
+ /* No, try the next element */
+ search_elt = pool_elt_at_index (sp->elts, next_index_this_level);
+ }
+ return 0; /* notreached */
+}
+
+u32
+clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares)
+{
+ clib_slist_search_result_t rv;
+
+ rv = slist_search_internal (sp, key, 0 /* dont need full path */ );
+ if (rv == CLIB_SLIST_MATCH)
+ {
+ clib_slist_elt_t *elt;
+ elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]);
+ if (ncompares)
+ *ncompares = sp->ncompares;
+ return elt->user_pool_index;
+ }
+ return (u32) ~ 0;
+}
+
+void
+clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index)
+{
+ clib_slist_elt_t *new_elt;
+ clib_slist_search_result_t search_result;
+ int level;
+
+ search_result = slist_search_internal (sp, key,
+ 0 /* don't need full path */ );
+
+ /* Special case: key exists, just replace user_pool_index */
+ if (PREDICT_FALSE (search_result == CLIB_SLIST_MATCH))
+ {
+ clib_slist_elt_t *elt;
+ elt = pool_elt_at_index (sp->elts, sp->path[0]);
+ elt->user_pool_index = user_pool_index;
+ return;
+ }
+
+ pool_get (sp->elts, new_elt);
+ new_elt->n.nexts = 0;
+ new_elt->user_pool_index = user_pool_index;
+
+ /* sp->path lists elements to the left of key, by level */
+ for (level = 0; level < vec_len (sp->path); level++)
+ {
+ clib_slist_elt_t *prev_elt_this_level;
+ u32 prev_elt_next_index_this_level;
+
+ /* Add to list at the current level */
+ prev_elt_this_level = pool_elt_at_index (sp->elts, sp->path[level]);
+ prev_elt_next_index_this_level = clib_slist_get_next_at_level
+ (prev_elt_this_level, level);
+
+ clib_slist_set_next_at_level (new_elt, prev_elt_next_index_this_level,
+ level);
+
+ clib_slist_set_next_at_level (prev_elt_this_level, new_elt - sp->elts,
+ level);
+ sp->occupancy[level]++;
+
+ /* Randomly add to the next-higher level */
+ if (random_f64 (&sp->seed) > sp->branching_factor)
+ break;
+ }
+ {
+ /* Time to add a new ply? */
+ clib_slist_elt_t *head_elt = pool_elt_at_index (sp->elts, 0);
+ int top_level = vec_len (head_elt->n.nexts) - 1;
+ if (((f64) sp->occupancy[top_level]) * sp->branching_factor > 1.0)
+ {
+ vec_add1 (sp->occupancy, 0);
+ vec_add1 (head_elt->n.nexts, (u32) ~ 0);
+ /* full match case returns n+1 items */
+ vec_validate (sp->path, vec_len (head_elt->n.nexts));
+ }
+ }
+}
+
+clib_slist_search_result_t
+clib_slist_del (clib_slist_t * sp, void *key)
+{
+ clib_slist_search_result_t search_result;
+ clib_slist_elt_t *del_elt;
+ int level;
+
+ search_result = slist_search_internal (sp, key, 1 /* need full path */ );
+
+ if (PREDICT_FALSE (search_result == CLIB_SLIST_NO_MATCH))
+ return search_result;
+
+ del_elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]);
+ ASSERT (vec_len (sp->path) > 1);
+
+ for (level = 0; level < vec_len (sp->path) - 1; level++)
+ {
+ clib_slist_elt_t *path_elt;
+ u32 path_elt_next_index;
+
+ path_elt = pool_elt_at_index (sp->elts, sp->path[level]);
+ path_elt_next_index = clib_slist_get_next_at_level (path_elt, level);
+
+ /* Splice the item out of the list if it's adjacent to the victim */
+ if (path_elt_next_index == del_elt - sp->elts)
+ {
+ sp->occupancy[level]--;
+ path_elt_next_index = clib_slist_get_next_at_level (del_elt, level);
+ clib_slist_set_next_at_level (path_elt, path_elt_next_index, level);
+ }
+ }
+
+ /* If this element is on more than two lists it has a vector of nexts */
+ if (!(del_elt->n.next0[0] & 1))
+ vec_free (del_elt->n.nexts);
+ pool_put (sp->elts, del_elt);
+ return CLIB_SLIST_MATCH;
+}
+
+u8 *
+format_slist (u8 * s, va_list * args)
+{
+ clib_slist_t *sl = va_arg (*args, clib_slist_t *);
+ int verbose = va_arg (*args, int);
+ int i;
+ clib_slist_elt_t *head_elt, *elt;
+
+ s = format (s, "slist 0x%x, %u items, branching_factor %.2f\n", sl,
+ sl->occupancy ? sl->occupancy[0] : 0, sl->branching_factor);
+
+ if (pool_elts (sl->elts) == 0)
+ return s;
+
+ head_elt = pool_elt_at_index (sl->elts, 0);
+
+ for (i = 0; i < vec_len (head_elt->n.nexts); i++)
+ {
+ s = format (s, "level %d: %d elts\n", i,
+ sl->occupancy ? sl->occupancy[i] : 0);
+
+ if (verbose && head_elt->n.nexts[i] != (u32) ~ 0)
+ {
+ elt = pool_elt_at_index (sl->elts, head_elt->n.nexts[i]);
+ while (elt)
+ {
+ u32 next_index;
+ s = format (s, "%U(%d) ", sl->format_user_element,
+ elt->user_pool_index, elt - sl->elts);
+ next_index = clib_slist_get_next_at_level (elt, i);
+ ASSERT (next_index != 0x7fffffff);
+ if (next_index == (u32) ~ 0)
+ break;
+ else
+ elt = pool_elt_at_index (sl->elts, next_index);
+ }
+ }
+ s = format (s, "\n");
+ }
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/slist.h b/src/vppinfra/slist.h
new file mode 100644
index 00000000..a7c77e27
--- /dev/null
+++ b/src/vppinfra/slist.h
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2012 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef included_slist_h
+#define included_slist_h
+
+#include <stdarg.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cache.h>
+
+typedef word (clib_slist_key_compare_function_t)
+ (void *key, u32 elt_pool_index);
+
+typedef enum
+{
+ CLIB_SLIST_MATCH = 0,
+ CLIB_SLIST_NO_MATCH
+} clib_slist_search_result_t;
+
+typedef struct
+{
+ /* Vector of next elements. Every valid instance has at least one */
+ union
+ {
+ u32 next0[2];
+ u32 *nexts;
+ } n;
+
+ /* Index of item in user's pool */
+ u32 user_pool_index;
+ /* $$$ pad to even divisor of cache line */
+} clib_slist_elt_t;
+
+static inline u32
+clib_slist_get_next_at_level (clib_slist_elt_t * elt, int level)
+{
+ if (elt->n.next0[0] & 1)
+ {
+ ASSERT (level < 2);
+ if (level == 1)
+ return elt->n.next0[1];
+ /* preserve ~0 (end of list) */
+ return (elt->n.next0[0] == (u32) ~ 0) ? elt->n.next0[0] :
+ (elt->n.next0[0] >> 1);
+ }
+ else
+ {
+ ASSERT (level < vec_len (elt->n.nexts));
+ return elt->n.nexts[level];
+ }
+}
+
+static inline void
+clib_slist_set_next_at_level (clib_slist_elt_t * elt, u32 index, int level)
+{
+ u32 old_level0_value[2];
+ /* level0 and not a vector */
+ if (level < 2 && (elt->n.next0[0] == 0 || elt->n.next0[0] & 1))
+ {
+ if (level == 0)
+ {
+ elt->n.next0[0] = (index << 1) | 1;
+ return;
+ }
+ elt->n.next0[1] = index;
+ return;
+ }
+ /* have to save old level0 values? */
+ if (elt->n.next0[0] & 1)
+ {
+ old_level0_value[0] = (elt->n.next0[0] == (u32) ~ 0) ?
+ elt->n.next0[0] : elt->n.next0[0] >> 1;
+ old_level0_value[1] = elt->n.next0[1];
+ elt->n.nexts = 0;
+ vec_add1 (elt->n.nexts, old_level0_value[0]);
+ vec_add1 (elt->n.nexts, old_level0_value[1]);
+ }
+ vec_validate (elt->n.nexts, level);
+ elt->n.nexts[level] = index;
+}
+
+
+typedef struct
+{
+ /* pool of skip-list elements */
+ clib_slist_elt_t *elts;
+
+ /* last search path */
+ u32 *path;
+
+ /* last search number of compares */
+ u32 ncompares;
+
+ /* occupancy stats */
+ u32 *occupancy;
+
+ /* Comparison function */
+ clib_slist_key_compare_function_t *compare;
+
+ /* Format function */
+ format_function_t *format_user_element;
+
+ /* items appear in successive plies with Pr (1 / branching_factor) */
+ f64 branching_factor;
+
+ /* random seed */
+ u32 seed;
+} clib_slist_t;
+
+clib_error_t *clib_slist_init (clib_slist_t * sp, f64 branching_factor,
+ clib_slist_key_compare_function_t compare,
+ format_function_t format_user_element);
+
+format_function_t format_slist;
+
+void clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index);
+clib_slist_search_result_t clib_slist_del (clib_slist_t * sp, void *key);
+u32 clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares);
+
+#endif /* included_slist_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/smp.c b/src/vppinfra/smp.c
new file mode 100644
index 00000000..f603283e
--- /dev/null
+++ b/src/vppinfra/smp.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/longjmp.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+
+void
+clib_smp_free (clib_smp_main_t * m)
+{
+ clib_mem_vm_free (m->vm_base,
+ (uword) ((1 + m->n_cpus) << m->log2_n_per_cpu_vm_bytes));
+}
+
+static uword
+allocate_per_cpu_mheap (uword cpu)
+{
+ clib_smp_main_t *m = &clib_smp_main;
+ void *heap;
+ uword vm_size, stack_size, mheap_flags;
+
+ ASSERT (os_get_thread_index () == cpu);
+
+ vm_size = (uword) 1 << m->log2_n_per_cpu_vm_bytes;
+ stack_size = (uword) 1 << m->log2_n_per_cpu_stack_bytes;
+
+ mheap_flags = MHEAP_FLAG_SMALL_OBJECT_CACHE;
+
+ /* Heap extends up to start of stack. */
+ heap = mheap_alloc_with_flags (clib_smp_vm_base_for_cpu (m, cpu),
+ vm_size - stack_size, mheap_flags);
+ clib_mem_set_heap (heap);
+
+ if (cpu == 0)
+ {
+ /* Now that we have a heap, allocate main structure on cpu 0. */
+ vec_resize (m->per_cpu_mains, m->n_cpus);
+
+ /* Allocate shared global heap (thread safe). */
+ m->global_heap =
+ mheap_alloc_with_flags (clib_smp_vm_base_for_cpu (m, cpu + m->n_cpus),
+ vm_size,
+ mheap_flags | MHEAP_FLAG_THREAD_SAFE);
+ }
+
+ m->per_cpu_mains[cpu].heap = heap;
+ return 0;
+}
+
+void
+clib_smp_init (void)
+{
+ clib_smp_main_t *m = &clib_smp_main;
+ uword cpu;
+
+ m->vm_base =
+ clib_mem_vm_alloc ((uword) (m->n_cpus + 1) << m->log2_n_per_cpu_vm_bytes);
+ if (!m->vm_base)
+ clib_error ("error allocating virtual memory");
+
+ for (cpu = 0; cpu < m->n_cpus; cpu++)
+ clib_calljmp (allocate_per_cpu_mheap, cpu,
+ clib_smp_stack_top_for_cpu (m, cpu));
+}
+
+void
+clib_smp_lock_init (clib_smp_lock_t ** pl)
+{
+ clib_smp_lock_t *l;
+ uword i, n_bytes, n_fifo_elts;
+
+ /* No locking necessary if n_cpus <= 1.
+ Null means no locking is necessary. */
+ if (clib_smp_main.n_cpus < 2)
+ {
+ *pl = 0;
+ return;
+ }
+
+ /* Need n_cpus - 1 elts in waiting fifo. One CPU holds lock
+ and others could potentially be waiting. */
+ n_fifo_elts = clib_smp_main.n_cpus - 1;
+
+ n_bytes = sizeof (l[0]) + n_fifo_elts * sizeof (l->waiting_fifo[0]);
+ ASSERT_AND_PANIC (n_bytes % CLIB_CACHE_LINE_BYTES == 0);
+
+ l = clib_mem_alloc_aligned (n_bytes, CLIB_CACHE_LINE_BYTES);
+
+ memset (l, 0, n_bytes);
+ l->n_waiting_fifo_elts = n_fifo_elts;
+
+ for (i = 0; i < l->n_waiting_fifo_elts; i++)
+ l->waiting_fifo[i].wait_type = CLIB_SMP_LOCK_WAIT_EMPTY;
+
+ *pl = l;
+}
+
+void
+clib_smp_lock_free (clib_smp_lock_t ** pl)
+{
+ if (*pl)
+ clib_mem_free (*pl);
+ *pl = 0;
+}
+
+void
+clib_smp_lock_slow_path (clib_smp_lock_t * l,
+ uword my_cpu,
+ clib_smp_lock_header_t h0, clib_smp_lock_type_t type)
+{
+ clib_smp_lock_header_t h1, h2, h3;
+ uword is_reader = type == CLIB_SMP_LOCK_TYPE_READER;
+ uword n_fifo_elts = l->n_waiting_fifo_elts;
+ uword my_tail;
+
+ /* Atomically advance waiting FIFO tail pointer; my_tail will point
+ to entry where we can insert ourselves to wait for lock to be granted. */
+ while (1)
+ {
+ h1 = h0;
+ my_tail = h1.waiting_fifo.head_index + h1.waiting_fifo.n_elts;
+ my_tail = my_tail >= n_fifo_elts ? my_tail - n_fifo_elts : my_tail;
+ h1.waiting_fifo.n_elts += 1;
+ h1.request_cpu = my_cpu;
+
+ ASSERT_AND_PANIC (h1.waiting_fifo.n_elts <= n_fifo_elts);
+ ASSERT_AND_PANIC (my_tail >= 0 && my_tail < n_fifo_elts);
+
+ h2 = clib_smp_lock_set_header (l, h1, h0);
+
+ /* Tail successfully advanced? */
+ if (clib_smp_lock_header_is_equal (h0, h2))
+ break;
+
+ /* It is possible that if head and tail are both zero, CPU with lock would have unlocked lock. */
+ else if (type == CLIB_SMP_LOCK_TYPE_SPIN)
+ {
+ while (!h2.writer_has_lock)
+ {
+ ASSERT_AND_PANIC (h2.waiting_fifo.n_elts == 0);
+ h1 = h2;
+ h1.request_cpu = my_cpu;
+ h1.writer_has_lock = 1;
+
+ h3 = clib_smp_lock_set_header (l, h1, h2);
+
+ /* Got it? */
+ if (clib_smp_lock_header_is_equal (h2, h3))
+ return;
+
+ h2 = h3;
+ }
+ }
+
+ /* Try to advance tail again. */
+ h0 = h2;
+ }
+
+ {
+ clib_smp_lock_waiting_fifo_elt_t *w;
+
+ w = l->waiting_fifo + my_tail;
+
+ while (w->wait_type != CLIB_SMP_LOCK_WAIT_EMPTY)
+ clib_smp_pause ();
+
+ w->wait_type = (is_reader
+ ? CLIB_SMP_LOCK_WAIT_READER : CLIB_SMP_LOCK_WAIT_WRITER);
+
+ /* Wait until CPU holding the lock grants us the lock. */
+ while (w->wait_type != CLIB_SMP_LOCK_WAIT_DONE)
+ clib_smp_pause ();
+
+ w->wait_type = CLIB_SMP_LOCK_WAIT_EMPTY;
+ }
+}
+
+void
+clib_smp_unlock_slow_path (clib_smp_lock_t * l,
+ uword my_cpu,
+ clib_smp_lock_header_t h0,
+ clib_smp_lock_type_t type)
+{
+ clib_smp_lock_header_t h1, h2;
+ clib_smp_lock_waiting_fifo_elt_t *head;
+ clib_smp_lock_wait_type_t head_wait_type;
+ uword is_reader = type == CLIB_SMP_LOCK_TYPE_READER;
+ uword n_fifo_elts = l->n_waiting_fifo_elts;
+ uword head_index, must_wait_for_readers;
+
+ while (1)
+ {
+ /* Advance waiting fifo giving lock to first waiter. */
+ while (1)
+ {
+ ASSERT_AND_PANIC (h0.waiting_fifo.n_elts != 0);
+
+ h1 = h0;
+
+ head_index = h1.waiting_fifo.head_index;
+ head = l->waiting_fifo + head_index;
+ if (is_reader)
+ {
+ ASSERT_AND_PANIC (h1.n_readers_with_lock > 0);
+ h1.n_readers_with_lock -= 1;
+ }
+ else
+ {
+ /* Writer will already have lock. */
+ ASSERT_AND_PANIC (h1.writer_has_lock);
+ }
+
+ while ((head_wait_type =
+ head->wait_type) == CLIB_SMP_LOCK_WAIT_EMPTY)
+ clib_smp_pause ();
+
+ /* Don't advance FIFO to writer unless all readers have unlocked. */
+ must_wait_for_readers =
+ (type != CLIB_SMP_LOCK_TYPE_SPIN
+ && head_wait_type == CLIB_SMP_LOCK_WAIT_WRITER
+ && h1.n_readers_with_lock != 0);
+
+ if (!must_wait_for_readers)
+ {
+ head_index += 1;
+ h1.waiting_fifo.n_elts -= 1;
+ if (type != CLIB_SMP_LOCK_TYPE_SPIN)
+ {
+ if (head_wait_type == CLIB_SMP_LOCK_WAIT_WRITER)
+ h1.writer_has_lock = h1.n_readers_with_lock == 0;
+ else
+ {
+ h1.writer_has_lock = 0;
+ h1.n_readers_with_lock += 1;
+ }
+ }
+ }
+
+ h1.waiting_fifo.head_index =
+ head_index == n_fifo_elts ? 0 : head_index;
+ h1.request_cpu = my_cpu;
+
+ ASSERT_AND_PANIC (h1.waiting_fifo.head_index >= 0
+ && h1.waiting_fifo.head_index < n_fifo_elts);
+ ASSERT_AND_PANIC (h1.waiting_fifo.n_elts >= 0
+ && h1.waiting_fifo.n_elts <= n_fifo_elts);
+
+ h2 = clib_smp_lock_set_header (l, h1, h0);
+
+ if (clib_smp_lock_header_is_equal (h2, h0))
+ break;
+
+ h0 = h2;
+
+ if (h0.waiting_fifo.n_elts == 0)
+ return clib_smp_unlock_inline (l, type);
+ }
+
+ if (must_wait_for_readers)
+ return;
+
+ /* Wake up head of waiting fifo. */
+ {
+ uword done_waking;
+
+ /* Shift lock to first thread waiting in fifo. */
+ head->wait_type = CLIB_SMP_LOCK_WAIT_DONE;
+
+ /* For read locks we may be able to wake multiple readers. */
+ done_waking = 1;
+ if (head_wait_type == CLIB_SMP_LOCK_WAIT_READER)
+ {
+ uword hi = h0.waiting_fifo.head_index;
+ if (h0.waiting_fifo.n_elts != 0
+ && l->waiting_fifo[hi].wait_type == CLIB_SMP_LOCK_WAIT_READER)
+ done_waking = 0;
+ }
+
+ if (done_waking)
+ break;
+ }
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/smp.h b/src/vppinfra/smp.h
new file mode 100644
index 00000000..7e703b3d
--- /dev/null
+++ b/src/vppinfra/smp.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001-2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_smp_h
+#define included_clib_smp_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/os.h> /* for os_panic */
+
+#define clib_smp_compare_and_swap(addr,new,old) __sync_val_compare_and_swap(addr,old,new)
+#define clib_smp_swap(addr,new) __sync_lock_test_and_set(addr,new)
+#define clib_smp_atomic_add(addr,increment) __sync_fetch_and_add(addr,increment)
+
+#if defined (i386) || defined (__x86_64__)
+#define clib_smp_pause() do { asm volatile ("pause"); } while (0)
+#endif
+
+#ifndef clib_smp_pause
+#define clib_smp_pause() do { } while (0)
+#endif
+
+#ifdef CLIB_UNIX
+#include <sched.h>
+
+always_inline void
+os_sched_yield (void)
+{
+ sched_yield ();
+}
+#else
+always_inline void
+os_sched_yield (void)
+{
+ clib_smp_pause ();
+}
+#endif
+
+
+#endif /* included_clib_smp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/smp_fifo.c b/src/vppinfra/smp_fifo.c
new file mode 100644
index 00000000..bb74064d
--- /dev/null
+++ b/src/vppinfra/smp_fifo.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2012 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/smp_fifo.h>
+#include <vppinfra/mem.h>
+
+clib_smp_fifo_t *
+clib_smp_fifo_init (uword max_n_elts, uword n_bytes_per_elt)
+{
+ clib_smp_fifo_t *f;
+ uword n_bytes_per_elt_cache_aligned;
+
+ f = clib_mem_alloc_aligned (sizeof (f[0]), CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+
+ max_n_elts = max_n_elts ? max_n_elts : 32;
+ f->log2_max_n_elts = max_log2 (max_n_elts);
+ f->max_n_elts_less_one = (1 << f->log2_max_n_elts) - 1;
+
+ n_bytes_per_elt_cache_aligned =
+ clib_smp_fifo_round_elt_bytes (n_bytes_per_elt);
+ clib_exec_on_global_heap (
+ {
+ f->data =
+ clib_mem_alloc_aligned
+ (n_bytes_per_elt_cache_aligned <<
+ f->log2_max_n_elts, CLIB_CACHE_LINE_BYTES);}
+ );
+
+ /* Zero all data and mark all elements free. */
+ {
+ uword i;
+ for (i = 0; i <= f->max_n_elts_less_one; i++)
+ {
+ void *d = clib_smp_fifo_elt_at_index (f, n_bytes_per_elt, i);
+ clib_smp_fifo_data_footer_t *t;
+
+ memset (d, 0, n_bytes_per_elt_cache_aligned);
+
+ t = clib_smp_fifo_get_data_footer (d, n_bytes_per_elt);
+ clib_smp_fifo_data_footer_set_state (t,
+ CLIB_SMP_FIFO_DATA_STATE_free);
+ }
+ }
+
+ return f;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/smp_fifo.h b/src/vppinfra/smp_fifo.h
new file mode 100644
index 00000000..c74a77c8
--- /dev/null
+++ b/src/vppinfra/smp_fifo.h
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2012 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_smp_vec_h
+#define included_clib_smp_vec_h
+
+#include <vppinfra/smp.h>
+
+#define foreach_clib_smp_fifo_data_state \
+ _ (free) \
+ _ (write_alloc) \
+ _ (write_done) \
+ _ (read_fetch)
+
+typedef enum
+{
+#define _(f) CLIB_SMP_FIFO_DATA_STATE_##f,
+ foreach_clib_smp_fifo_data_state
+#undef _
+ CLIB_SMP_FIFO_N_DATA_STATE,
+} clib_smp_fifo_data_state_t;
+
+/* Footer at end of each data element. */
+typedef struct
+{
+ /* Magic number marking valid footer plus state encoded in low bits. */
+ u32 magic_state;
+} clib_smp_fifo_data_footer_t;
+
+#define CLIB_SMP_DATA_FOOTER_MAGIC 0xfafbfcf0
+
+always_inline clib_smp_fifo_data_state_t
+clib_smp_fifo_data_footer_get_state (clib_smp_fifo_data_footer_t * f)
+{
+ u32 s = f->magic_state - CLIB_SMP_DATA_FOOTER_MAGIC;
+
+ /* Check that magic number plus state is still valid. */
+ if (s >= CLIB_SMP_FIFO_N_DATA_STATE)
+ os_panic ();
+
+ return s;
+}
+
+always_inline void
+clib_smp_fifo_data_footer_set_state (clib_smp_fifo_data_footer_t * f,
+ clib_smp_fifo_data_state_t s)
+{
+ f->magic_state = CLIB_SMP_DATA_FOOTER_MAGIC + s;
+}
+
+typedef struct
+{
+ /* Read/write indices each on their own cache line.
+ Atomic incremented for each read/write. */
+ u32 read_index, write_index;
+
+ /* Power of 2 number of elements in fifo less one. */
+ u32 max_n_elts_less_one;
+
+ /* Log2 of above. */
+ u32 log2_max_n_elts;
+
+ /* Cache aligned data. */
+ void *data;
+} clib_smp_fifo_t;
+
+/* External functions. */
+clib_smp_fifo_t *clib_smp_fifo_init (uword max_n_elts, uword n_bytes_per_elt);
+
+/* Elements are always cache-line sized; this is to avoid smp cache thrashing. */
+always_inline uword
+clib_smp_fifo_round_elt_bytes (uword n_bytes_per_elt)
+{
+ return round_pow2 (n_bytes_per_elt, CLIB_CACHE_LINE_BYTES);
+}
+
+always_inline uword
+clib_smp_fifo_n_elts (clib_smp_fifo_t * f)
+{
+ uword n = f->write_index - f->read_index;
+ ASSERT (n <= f->max_n_elts_less_one + 1);
+ return n;
+}
+
+always_inline clib_smp_fifo_data_footer_t *
+clib_smp_fifo_get_data_footer (void *d, uword n_bytes_per_elt)
+{
+ clib_smp_fifo_data_footer_t *f;
+ f = d + clib_smp_fifo_round_elt_bytes (n_bytes_per_elt) - sizeof (f[0]);
+ return f;
+}
+
+always_inline void *
+clib_smp_fifo_elt_at_index (clib_smp_fifo_t * f, uword n_bytes_per_elt,
+ uword i)
+{
+ uword n_bytes_per_elt_cache_aligned;
+
+ ASSERT (i <= f->max_n_elts_less_one);
+
+ n_bytes_per_elt_cache_aligned =
+ clib_smp_fifo_round_elt_bytes (n_bytes_per_elt);
+
+ return f->data + i * n_bytes_per_elt_cache_aligned;
+}
+
+always_inline void *
+clib_smp_fifo_write_alloc (clib_smp_fifo_t * f, uword n_bytes_per_elt)
+{
+ void *d;
+ clib_smp_fifo_data_footer_t *t;
+ clib_smp_fifo_data_state_t s;
+ u32 wi0, wi1;
+
+ wi0 = f->write_index;
+
+ /* Fifo full? */
+ if (wi0 - f->read_index > f->max_n_elts_less_one)
+ return 0;
+
+ while (1)
+ {
+ wi1 = wi0 + 1;
+
+ d =
+ clib_smp_fifo_elt_at_index (f, n_bytes_per_elt,
+ wi0 & f->max_n_elts_less_one);
+ t = clib_smp_fifo_get_data_footer (d, n_bytes_per_elt);
+
+ s = clib_smp_fifo_data_footer_get_state (t);
+ if (s != CLIB_SMP_FIFO_DATA_STATE_free)
+ {
+ d = 0;
+ break;
+ }
+
+ wi1 = clib_smp_compare_and_swap (&f->write_index, wi1, wi0);
+
+ if (wi1 == wi0)
+ {
+ clib_smp_fifo_data_footer_set_state (t,
+ CLIB_SMP_FIFO_DATA_STATE_write_alloc);
+ break;
+ }
+
+ /* Other cpu wrote write index first: try again. */
+ wi0 = wi1;
+ }
+
+ return d;
+}
+
+always_inline void
+clib_smp_fifo_write_done (clib_smp_fifo_t * f, void *d, uword n_bytes_per_elt)
+{
+ clib_smp_fifo_data_footer_t *t;
+
+ /* Flush out pending writes before we change state to write_done.
+ This will hold off readers until data is flushed. */
+ CLIB_MEMORY_BARRIER ();
+
+ t = clib_smp_fifo_get_data_footer (d, n_bytes_per_elt);
+
+ ASSERT (clib_smp_fifo_data_footer_get_state (t) ==
+ CLIB_SMP_FIFO_DATA_STATE_write_alloc);
+ clib_smp_fifo_data_footer_set_state (t,
+ CLIB_SMP_FIFO_DATA_STATE_write_done);
+}
+
+always_inline void *
+clib_smp_fifo_read_fetch (clib_smp_fifo_t * f, uword n_bytes_per_elt)
+{
+ void *d;
+ clib_smp_fifo_data_footer_t *t;
+ clib_smp_fifo_data_state_t s;
+ u32 ri0, ri1;
+
+ ri0 = f->read_index;
+
+ /* Fifo empty? */
+ if (f->write_index - ri0 == 0)
+ return 0;
+
+ while (1)
+ {
+ ri1 = ri0 + 1;
+
+ d =
+ clib_smp_fifo_elt_at_index (f, n_bytes_per_elt,
+ ri0 & f->max_n_elts_less_one);
+ t = clib_smp_fifo_get_data_footer (d, n_bytes_per_elt);
+
+ s = clib_smp_fifo_data_footer_get_state (t);
+ if (s != CLIB_SMP_FIFO_DATA_STATE_write_done)
+ {
+ d = 0;
+ break;
+ }
+
+ ri1 = clib_smp_compare_and_swap (&f->read_index, ri1, ri0);
+ if (ri1 == ri0)
+ {
+ clib_smp_fifo_data_footer_set_state (t,
+ CLIB_SMP_FIFO_DATA_STATE_read_fetch);
+ break;
+ }
+
+ ri0 = ri1;
+ }
+
+ return d;
+}
+
+always_inline void
+clib_smp_fifo_read_done (clib_smp_fifo_t * f, void *d, uword n_bytes_per_elt)
+{
+ clib_smp_fifo_data_footer_t *t;
+
+ t = clib_smp_fifo_get_data_footer (d, n_bytes_per_elt);
+
+ ASSERT (clib_smp_fifo_data_footer_get_state (t) ==
+ CLIB_SMP_FIFO_DATA_STATE_read_fetch);
+ clib_smp_fifo_data_footer_set_state (t, CLIB_SMP_FIFO_DATA_STATE_free);
+}
+
+always_inline void
+clib_smp_fifo_memcpy (uword * dst, uword * src, uword n_bytes)
+{
+ word n_bytes_left = n_bytes;
+
+ while (n_bytes_left >= 4 * sizeof (uword))
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst += 4;
+ src += 4;
+ n_bytes_left -= 4 * sizeof (dst[0]);
+ }
+
+ while (n_bytes_left > 0)
+ {
+ dst[0] = src[0];
+ dst += 1;
+ src += 1;
+ n_bytes_left -= 1 * sizeof (dst[0]);
+ }
+}
+
+always_inline void
+clib_smp_fifo_write_inline (clib_smp_fifo_t * f, void *elt_to_write,
+ uword n_bytes_per_elt)
+{
+ uword *dst;
+ dst = clib_smp_fifo_write_alloc (f, n_bytes_per_elt);
+ clib_smp_fifo_memcpy (dst, elt_to_write, n_bytes_per_elt);
+ clib_smp_fifo_write_done (f, dst, n_bytes_per_elt);
+}
+
+always_inline void
+clib_smp_fifo_read_inline (clib_smp_fifo_t * f, void *elt_to_read,
+ uword n_bytes_per_elt)
+{
+ uword *src;
+ src = clib_smp_fifo_read_fetch (f, n_bytes_per_elt);
+ clib_smp_fifo_memcpy (elt_to_read, src, n_bytes_per_elt);
+ clib_smp_fifo_read_done (f, src, n_bytes_per_elt);
+}
+
+#endif /* included_clib_smp_vec_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c
new file mode 100644
index 00000000..87a9333f
--- /dev/null
+++ b/src/vppinfra/socket.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <string.h> /* strchr */
+#define __USE_GNU
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <vppinfra/mem.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/socket.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+void
+clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...)
+{
+ va_list va;
+ va_start (va, fmt);
+ clib_socket_tx_add_va_formatted (s, fmt, &va);
+ va_end (va);
+}
+
+/* Return and bind to an unused port. */
+static word
+find_free_port (word sock)
+{
+ word port;
+
+ for (port = IPPORT_USERRESERVED; port < 1 << 16; port++)
+ {
+ struct sockaddr_in a;
+
+ memset (&a, 0, sizeof (a)); /* Warnings be gone */
+
+ a.sin_family = PF_INET;
+ a.sin_addr.s_addr = INADDR_ANY;
+ a.sin_port = htons (port);
+
+ if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
+ break;
+ }
+
+ return port < 1 << 16 ? port : -1;
+}
+
+/* Convert a config string to a struct sockaddr and length for use
+ with bind or connect. */
+static clib_error_t *
+socket_config (char *config,
+ void *addr, socklen_t * addr_len, u32 ip4_default_address)
+{
+ clib_error_t *error = 0;
+
+ if (!config)
+ config = "";
+
+ /* Anything that begins with a / is a local PF_LOCAL socket. */
+ if (config[0] == '/')
+ {
+ struct sockaddr_un *su = addr;
+ su->sun_family = PF_LOCAL;
+ clib_memcpy (&su->sun_path, config,
+ clib_min (sizeof (su->sun_path), 1 + strlen (config)));
+ *addr_len = sizeof (su[0]);
+ }
+
+ /* Hostname or hostname:port or port. */
+ else
+ {
+ char *host_name;
+ int port = -1;
+ struct sockaddr_in *sa = addr;
+
+ host_name = 0;
+ port = -1;
+ if (config[0] != 0)
+ {
+ unformat_input_t i;
+
+ unformat_init_string (&i, config, strlen (config));
+ if (unformat (&i, "%s:%d", &host_name, &port)
+ || unformat (&i, "%s:0x%x", &host_name, &port))
+ ;
+ else if (unformat (&i, "%s", &host_name))
+ ;
+ else
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, &i);
+ unformat_free (&i);
+
+ if (error)
+ goto done;
+ }
+
+ sa->sin_family = PF_INET;
+ *addr_len = sizeof (sa[0]);
+ if (port != -1)
+ sa->sin_port = htons (port);
+ else
+ sa->sin_port = 0;
+
+ if (host_name)
+ {
+ struct in_addr host_addr;
+
+ /* Recognize localhost to avoid host lookup in most common cast. */
+ if (!strcmp (host_name, "localhost"))
+ sa->sin_addr.s_addr = htonl (INADDR_LOOPBACK);
+
+ else if (inet_aton (host_name, &host_addr))
+ sa->sin_addr = host_addr;
+
+ else if (host_name && strlen (host_name) > 0)
+ {
+ struct hostent *host = gethostbyname (host_name);
+ if (!host)
+ error = clib_error_return (0, "unknown host `%s'", config);
+ else
+ clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0],
+ host->h_length);
+ }
+
+ else
+ sa->sin_addr.s_addr = htonl (ip4_default_address);
+
+ vec_free (host_name);
+ if (error)
+ goto done;
+ }
+ }
+
+done:
+ return error;
+}
+
+static clib_error_t *
+default_socket_write (clib_socket_t * s)
+{
+ clib_error_t *err = 0;
+ word written = 0;
+ word fd = 0;
+ word tx_len;
+
+ fd = s->fd;
+
+ /* Map standard input to standard output.
+ Typically, fd is a socket for which read/write both work. */
+ if (fd == 0)
+ fd = 1;
+
+ tx_len = vec_len (s->tx_buffer);
+ written = write (fd, s->tx_buffer, tx_len);
+
+ /* Ignore certain errors. */
+ if (written < 0 && !unix_error_is_fatal (errno))
+ written = 0;
+
+ /* A "real" error occurred. */
+ if (written < 0)
+ {
+ err = clib_error_return_unix (0, "write %wd bytes (fd %d, '%s')",
+ tx_len, s->fd, s->config);
+ vec_free (s->tx_buffer);
+ goto done;
+ }
+
+ /* Reclaim the transmitted part of the tx buffer on successful writes. */
+ else if (written > 0)
+ {
+ if (written == tx_len)
+ _vec_len (s->tx_buffer) = 0;
+ else
+ vec_delete (s->tx_buffer, written, 0);
+ }
+
+ /* If a non-fatal error occurred AND
+ the buffer is full, then we must free it. */
+ else if (written == 0 && tx_len > 64 * 1024)
+ {
+ vec_free (s->tx_buffer);
+ }
+
+done:
+ return err;
+}
+
+static clib_error_t *
+default_socket_read (clib_socket_t * sock, int n_bytes)
+{
+ word fd, n_read;
+ u8 *buf;
+
+ /* RX side of socket is down once end of file is reached. */
+ if (sock->flags & CLIB_SOCKET_F_RX_END_OF_FILE)
+ return 0;
+
+ fd = sock->fd;
+
+ n_bytes = clib_max (n_bytes, 4096);
+ vec_add2 (sock->rx_buffer, buf, n_bytes);
+
+ if ((n_read = read (fd, buf, n_bytes)) < 0)
+ {
+ n_read = 0;
+
+ /* Ignore certain errors. */
+ if (!unix_error_is_fatal (errno))
+ goto non_fatal;
+
+ return clib_error_return_unix (0, "read %d bytes (fd %d, '%s')",
+ n_bytes, sock->fd, sock->config);
+ }
+
+ /* Other side closed the socket. */
+ if (n_read == 0)
+ sock->flags |= CLIB_SOCKET_F_RX_END_OF_FILE;
+
+non_fatal:
+ _vec_len (sock->rx_buffer) += n_read - n_bytes;
+
+ return 0;
+}
+
+static clib_error_t *
+default_socket_close (clib_socket_t * s)
+{
+ if (close (s->fd) < 0)
+ return clib_error_return_unix (0, "close (fd %d, %s)", s->fd, s->config);
+ return 0;
+}
+
+static clib_error_t *
+default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds)
+{
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ char ctl[CMSG_SPACE (sizeof (int)) * num_fds];
+ int rv;
+
+ iov[0].iov_base = msg;
+ iov[0].iov_len = msglen;
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+
+ if (num_fds > 0)
+ {
+ struct cmsghdr *cmsg;
+ memset (&ctl, 0, sizeof (ctl));
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+ cmsg = CMSG_FIRSTHDR (&mh);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds);
+ }
+ rv = sendmsg (s->fd, &mh, 0);
+ if (rv < 0)
+ return clib_error_return_unix (0, "sendmsg");
+ return 0;
+}
+
+
+static clib_error_t *
+default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds)
+{
+ char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
+ CMSG_SPACE (sizeof (struct ucred))];
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ ssize_t size;
+ struct ucred *cr = 0;
+ struct cmsghdr *cmsg;
+
+ iov[0].iov_base = msg;
+ iov[0].iov_len = msglen;
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+
+ memset (ctl, 0, sizeof (ctl));
+
+ /* receive the incoming message */
+ size = recvmsg (s->fd, &mh, 0);
+ if (size != msglen)
+ {
+ return (size == 0) ? clib_error_return (0, "disconnected") :
+ clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')",
+ s->fd, s->config);
+ }
+
+ cmsg = CMSG_FIRSTHDR (&mh);
+ while (cmsg)
+ {
+ if (cmsg->cmsg_level == SOL_SOCKET)
+ {
+ if (cmsg->cmsg_type == SCM_CREDENTIALS)
+ {
+ cr = (struct ucred *) CMSG_DATA (cmsg);
+ s->uid = cr->uid;
+ s->gid = cr->gid;
+ s->pid = cr->pid;
+ }
+ else if (cmsg->cmsg_type == SCM_RIGHTS)
+ {
+ clib_memcpy (fds, CMSG_DATA (cmsg), num_fds * sizeof (int));
+ }
+ }
+ cmsg = CMSG_NXTHDR (&mh, cmsg);
+ }
+ return 0;
+}
+
+static void
+socket_init_funcs (clib_socket_t * s)
+{
+ if (!s->write_func)
+ s->write_func = default_socket_write;
+ if (!s->read_func)
+ s->read_func = default_socket_read;
+ if (!s->close_func)
+ s->close_func = default_socket_close;
+ if (!s->sendmsg_func)
+ s->sendmsg_func = default_socket_sendmsg;
+ if (!s->recvmsg_func)
+ s->recvmsg_func = default_socket_recvmsg;
+}
+
+clib_error_t *
+clib_socket_init (clib_socket_t * s)
+{
+ union
+ {
+ struct sockaddr sa;
+ struct sockaddr_un su;
+ } addr;
+ socklen_t addr_len = 0;
+ int socket_type;
+ clib_error_t *error = 0;
+ word port;
+
+ error = socket_config (s->config, &addr.sa, &addr_len,
+ (s->flags & CLIB_SOCKET_F_IS_SERVER
+ ? INADDR_LOOPBACK : INADDR_ANY));
+ if (error)
+ goto done;
+
+ socket_init_funcs (s);
+
+ socket_type = s->flags & CLIB_SOCKET_F_SEQPACKET ?
+ SOCK_SEQPACKET : SOCK_STREAM;
+
+ s->fd = socket (addr.sa.sa_family, socket_type, 0);
+ if (s->fd < 0)
+ {
+ error = clib_error_return_unix (0, "socket (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+
+ port = 0;
+ if (addr.sa.sa_family == PF_INET)
+ port = ((struct sockaddr_in *) &addr)->sin_port;
+
+ if (s->flags & CLIB_SOCKET_F_IS_SERVER)
+ {
+ uword need_bind = 1;
+
+ if (addr.sa.sa_family == PF_INET)
+ {
+ if (port == 0)
+ {
+ port = find_free_port (s->fd);
+ if (port < 0)
+ {
+ error = clib_error_return (0, "no free port (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+ need_bind = 0;
+ }
+ }
+ if (addr.sa.sa_family == PF_LOCAL)
+ unlink (((struct sockaddr_un *) &addr)->sun_path);
+
+ /* Make address available for multiple users. */
+ {
+ int v = 1;
+ if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof (v)) < 0)
+ clib_unix_warning ("setsockopt SO_REUSEADDR fails");
+ }
+
+ if (addr.sa.sa_family == PF_LOCAL && s->flags & CLIB_SOCKET_F_PASSCRED)
+ {
+ int x = 1;
+ if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &x, sizeof (x)) < 0)
+ {
+ error = clib_error_return_unix (0, "setsockopt (SO_PASSCRED, "
+ "fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
+ }
+
+ if (need_bind && bind (s->fd, &addr.sa, addr_len) < 0)
+ {
+ error = clib_error_return_unix (0, "bind (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+
+ if (listen (s->fd, 5) < 0)
+ {
+ error = clib_error_return_unix (0, "listen (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+ if (addr.sa.sa_family == PF_LOCAL
+ && s->flags & CLIB_SOCKET_F_ALLOW_GROUP_WRITE)
+ {
+ struct stat st = { 0 };
+ if (stat (((struct sockaddr_un *) &addr)->sun_path, &st) < 0)
+ {
+ error = clib_error_return_unix (0, "stat (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+ st.st_mode |= S_IWGRP;
+ if (chmod (((struct sockaddr_un *) &addr)->sun_path, st.st_mode) <
+ 0)
+ {
+ error =
+ clib_error_return_unix (0, "chmod (fd %d, '%s', mode %o)",
+ s->fd, s->config, st.st_mode);
+ goto done;
+ }
+ }
+ }
+ else
+ {
+ if ((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT)
+ && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
+ {
+ error = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+
+ if (connect (s->fd, &addr.sa, addr_len) < 0
+ && !((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
+ errno == EINPROGRESS))
+ {
+ error = clib_error_return_unix (0, "connect (fd %d, '%s')",
+ s->fd, s->config);
+ goto done;
+ }
+ }
+
+ return error;
+
+done:
+ if (s->fd > 0)
+ close (s->fd);
+ return error;
+}
+
+clib_error_t *
+clib_socket_accept (clib_socket_t * server, clib_socket_t * client)
+{
+ clib_error_t *err = 0;
+ socklen_t len = 0;
+
+ memset (client, 0, sizeof (client[0]));
+
+ /* Accept the new socket connection. */
+ client->fd = accept (server->fd, 0, 0);
+ if (client->fd < 0)
+ return clib_error_return_unix (0, "accept (fd %d, '%s')",
+ server->fd, server->config);
+
+ /* Set the new socket to be non-blocking. */
+ if (fcntl (client->fd, F_SETFL, O_NONBLOCK) < 0)
+ {
+ err = clib_error_return_unix (0, "fcntl O_NONBLOCK (fd %d)",
+ client->fd);
+ goto close_client;
+ }
+
+ /* Get peer info. */
+ len = sizeof (client->peer);
+ if (getpeername (client->fd, (struct sockaddr *) &client->peer, &len) < 0)
+ {
+ err = clib_error_return_unix (0, "getpeername (fd %d)", client->fd);
+ goto close_client;
+ }
+
+ client->flags = CLIB_SOCKET_F_IS_CLIENT;
+
+ socket_init_funcs (client);
+ return 0;
+
+close_client:
+ close (client->fd);
+ return err;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/socket.h b/src/vppinfra/socket.h
new file mode 100644
index 00000000..4f9e9509
--- /dev/null
+++ b/src/vppinfra/socket.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _clib_included_socket_h
+#define _clib_included_socket_h
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+
+typedef struct _socket_t
+{
+ /* File descriptor. */
+ i32 fd;
+
+ /* Config string for socket HOST:PORT or just HOST. */
+ char *config;
+
+ u32 flags;
+#define CLIB_SOCKET_F_IS_SERVER (1 << 0)
+#define CLIB_SOCKET_F_IS_CLIENT (0 << 0)
+#define CLIB_SOCKET_F_RX_END_OF_FILE (1 << 2)
+#define CLIB_SOCKET_F_NON_BLOCKING_CONNECT (1 << 3)
+#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE (1 << 4)
+#define CLIB_SOCKET_F_SEQPACKET (1 << 5)
+#define CLIB_SOCKET_F_PASSCRED (1 << 6)
+
+
+ /* Transmit buffer. Holds data waiting to be written. */
+ u8 *tx_buffer;
+
+ /* Receive buffer. Holds data read from socket. */
+ u8 *rx_buffer;
+
+ /* Peer socket we are connected to. */
+ struct sockaddr_in peer;
+
+ /* Credentials, populated if CLIB_SOCKET_F_PASSCRED is set */
+ pid_t pid;
+ uid_t uid;
+ gid_t gid;
+
+ clib_error_t *(*write_func) (struct _socket_t * sock);
+ clib_error_t *(*read_func) (struct _socket_t * sock, int min_bytes);
+ clib_error_t *(*close_func) (struct _socket_t * sock);
+ clib_error_t *(*recvmsg_func) (struct _socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds);
+ clib_error_t *(*sendmsg_func) (struct _socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds);
+ uword private_data;
+} clib_socket_t;
+
+/* socket config format is host:port.
+ Unspecified port causes a free one to be chosen starting
+ from IPPORT_USERRESERVED (5000). */
+clib_error_t *clib_socket_init (clib_socket_t * socket);
+
+clib_error_t *clib_socket_accept (clib_socket_t * server,
+ clib_socket_t * client);
+
+always_inline uword
+clib_socket_is_server (clib_socket_t * sock)
+{
+ return (sock->flags & CLIB_SOCKET_F_IS_SERVER) != 0;
+}
+
+always_inline uword
+clib_socket_is_client (clib_socket_t * s)
+{
+ return !clib_socket_is_server (s);
+}
+
+always_inline uword
+clib_socket_is_connected (clib_socket_t * sock)
+{
+ return sock->fd > 0;
+}
+
+
+always_inline int
+clib_socket_rx_end_of_file (clib_socket_t * s)
+{
+ return s->flags & CLIB_SOCKET_F_RX_END_OF_FILE;
+}
+
+always_inline void *
+clib_socket_tx_add (clib_socket_t * s, int n_bytes)
+{
+ u8 *result;
+ vec_add2 (s->tx_buffer, result, n_bytes);
+ return result;
+}
+
+always_inline void
+clib_socket_tx_add_va_formatted (clib_socket_t * s, char *fmt, va_list * va)
+{
+ s->tx_buffer = va_format (s->tx_buffer, fmt, va);
+}
+
+always_inline clib_error_t *
+clib_socket_tx (clib_socket_t * s)
+{
+ return s->write_func (s);
+}
+
+always_inline clib_error_t *
+clib_socket_rx (clib_socket_t * s, int n_bytes)
+{
+ return s->read_func (s, n_bytes);
+}
+
+always_inline clib_error_t *
+clib_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds)
+{
+ return s->sendmsg_func (s, msg, msglen, fds, num_fds);
+}
+
+always_inline clib_error_t *
+clib_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
+ int fds[], int num_fds)
+{
+ return s->recvmsg_func (s, msg, msglen, fds, num_fds);
+}
+
+always_inline void
+clib_socket_free (clib_socket_t * s)
+{
+ vec_free (s->tx_buffer);
+ vec_free (s->rx_buffer);
+ if (clib_mem_is_heap_object (s->config))
+ vec_free (s->config);
+ memset (s, 0, sizeof (s[0]));
+}
+
+always_inline clib_error_t *
+clib_socket_close (clib_socket_t * sock)
+{
+ clib_error_t *err;
+ err = (*sock->close_func) (sock);
+ return err;
+}
+
+void clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...);
+
+#endif /* _clib_included_socket_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h
new file mode 100644
index 00000000..ec8f0a1c
--- /dev/null
+++ b/src/vppinfra/sparse_vec.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_sparse_vec_h
+#define included_sparse_vec_h
+
+#include <vppinfra/vec.h>
+#include <vppinfra/bitops.h>
+
+/* Sparsely indexed vectors. Basic idea taken from Hacker's delight.
+ Eliot added ranges. */
+typedef struct
+{
+ /* Bitmap one for each sparse index. */
+ uword *is_member_bitmap;
+
+ /* member_counts[i] = total number of members with j < i. */
+ u16 *member_counts;
+
+#define SPARSE_VEC_IS_RANGE (1 << 0)
+#define SPARSE_VEC_IS_VALID_RANGE (1 << 1)
+ u8 *range_flags;
+} sparse_vec_header_t;
+
+always_inline sparse_vec_header_t *
+sparse_vec_header (void *v)
+{
+ return vec_header (v, sizeof (sparse_vec_header_t));
+}
+
+/* Index 0 is always used to mark indices that are not valid in
+ sparse vector. For example, you look up V[0x1234] and 0x1234 is not
+ known you'll get 0 back as an index. */
+#define SPARSE_VEC_INVALID_INDEX (0)
+
+always_inline void *
+sparse_vec_new (uword elt_bytes, uword sparse_index_bits)
+{
+ void *v;
+ sparse_vec_header_t *h;
+ word n;
+
+ ASSERT (sparse_index_bits <= 16);
+
+ v = _vec_resize (0,
+ /* length increment */ 8,
+ /* data bytes */ 8 * elt_bytes,
+ /* header bytes */ sizeof (h[0]),
+ /* data align */ 0);
+
+ /* Make space for invalid entry (entry 0). */
+ _vec_len (v) = 1;
+
+ h = sparse_vec_header (v);
+
+ n = sparse_index_bits - min_log2 (BITS (uword));
+ if (n < 0)
+ n = 0;
+ n = 1ULL << n;
+ vec_resize (h->is_member_bitmap, n);
+ vec_resize (h->member_counts, n);
+
+ return v;
+}
+
+always_inline uword
+sparse_vec_index_internal (void *v,
+ uword sparse_index,
+ uword maybe_range, u32 * insert)
+{
+ sparse_vec_header_t *h;
+ uword i, b, d, w;
+ u8 is_member;
+
+ h = sparse_vec_header (v);
+ i = sparse_index / BITS (h->is_member_bitmap[0]);
+ b = (uword) 1 << (uword) (sparse_index % BITS (h->is_member_bitmap[0]));
+
+ ASSERT (i < vec_len (h->is_member_bitmap));
+ ASSERT (i < vec_len (h->member_counts));
+
+ w = h->is_member_bitmap[i];
+ d = h->member_counts[i] + count_set_bits (w & (b - 1));
+
+ is_member = (w & b) != 0;
+ if (maybe_range)
+ {
+ u8 r = h->range_flags[d];
+ u8 is_range, is_valid_range;
+
+ is_range = maybe_range & (r & SPARSE_VEC_IS_RANGE);
+ is_valid_range = (r & SPARSE_VEC_IS_VALID_RANGE) != 0;
+
+ is_member = is_range ? is_valid_range : is_member;
+ }
+
+ if (insert)
+ {
+ *insert = !is_member;
+ if (!is_member)
+ {
+ uword j;
+ w |= b;
+ h->is_member_bitmap[i] = w;
+ for (j = i + 1; j < vec_len (h->member_counts); j++)
+ h->member_counts[j] += 1;
+ }
+
+ return 1 + d;
+ }
+
+ d = is_member ? d : 0;
+
+ return is_member + d;
+}
+
+always_inline uword
+sparse_vec_index (void *v, uword sparse_index)
+{
+ return sparse_vec_index_internal (v, sparse_index,
+ /* maybe range */ 0,
+ /* insert? */ 0);
+}
+
+always_inline void
+sparse_vec_index2 (void *v,
+ u32 si0, u32 si1, u32 * i0_return, u32 * i1_return)
+{
+ sparse_vec_header_t *h;
+ uword b0, b1, w0, w1, v0, v1;
+ u32 i0, i1, d0, d1;
+ u8 is_member0, is_member1;
+
+ h = sparse_vec_header (v);
+
+ i0 = si0 / BITS (h->is_member_bitmap[0]);
+ i1 = si1 / BITS (h->is_member_bitmap[0]);
+
+ b0 = (uword) 1 << (uword) (si0 % BITS (h->is_member_bitmap[0]));
+ b1 = (uword) 1 << (uword) (si1 % BITS (h->is_member_bitmap[0]));
+
+ ASSERT (i0 < vec_len (h->is_member_bitmap));
+ ASSERT (i1 < vec_len (h->is_member_bitmap));
+
+ ASSERT (i0 < vec_len (h->member_counts));
+ ASSERT (i1 < vec_len (h->member_counts));
+
+ w0 = h->is_member_bitmap[i0];
+ w1 = h->is_member_bitmap[i1];
+
+ v0 = w0 & (b0 - 1);
+ v1 = w1 & (b1 - 1);
+
+ /* Speculate that masks will have zero or one bits set. */
+ d0 = h->member_counts[i0] + (v0 != 0);
+ d1 = h->member_counts[i1] + (v1 != 0);
+
+ /* Validate speculation. */
+ if (PREDICT_FALSE (!is_pow2 (v0) || !is_pow2 (v1)))
+ {
+ d0 += count_set_bits (v0) - (v0 != 0);
+ d1 += count_set_bits (v1) - (v1 != 0);
+ }
+
+ is_member0 = (w0 & b0) != 0;
+ is_member1 = (w1 & b1) != 0;
+
+ d0 = is_member0 ? d0 : 0;
+ d1 = is_member1 ? d1 : 0;
+
+ *i0_return = is_member0 + d0;
+ *i1_return = is_member1 + d1;
+}
+
+#define sparse_vec_free(v) vec_free(v)
+
+#define sparse_vec_elt_at_index(v,i) \
+ vec_elt_at_index ((v), sparse_vec_index ((v), (i)))
+
+#define sparse_vec_validate(v,i) \
+({ \
+ uword _i; \
+ u32 _insert; \
+ \
+ if (! (v)) \
+ (v) = sparse_vec_new (sizeof ((v)[0]), BITS (u16)); \
+ \
+ _i = sparse_vec_index_internal ((v), (i), \
+ /* maybe range */ 0, \
+ /* insert? */ &_insert); \
+ if (_insert) \
+ vec_insert_ha ((v), 1, _i, \
+ /* header size */ sizeof (sparse_vec_header_t), \
+ /* align */ 0); \
+ \
+ /* Invalid index is 0. */ \
+ ASSERT (_i > 0); \
+ \
+ (v) + _i; \
+})
+
+#endif /* included_sparse_vec_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/std-formats.c b/src/vppinfra/std-formats.c
new file mode 100644
index 00000000..ac98f999
--- /dev/null
+++ b/src/vppinfra/std-formats.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <ctype.h>
+
+/* Format vectors. */
+u8 *
+format_vec32 (u8 * s, va_list * va)
+{
+ u32 *v = va_arg (*va, u32 *);
+ char *fmt = va_arg (*va, char *);
+ uword i;
+ for (i = 0; i < vec_len (v); i++)
+ {
+ if (i > 0)
+ s = format (s, ", ");
+ s = format (s, fmt, v[i]);
+ }
+ return s;
+}
+
+u8 *
+format_vec_uword (u8 * s, va_list * va)
+{
+ uword *v = va_arg (*va, uword *);
+ char *fmt = va_arg (*va, char *);
+ uword i;
+ for (i = 0; i < vec_len (v); i++)
+ {
+ if (i > 0)
+ s = format (s, ", ");
+ s = format (s, fmt, v[i]);
+ }
+ return s;
+}
+
+/* Ascii buffer and length. */
+u8 *
+format_ascii_bytes (u8 * s, va_list * va)
+{
+ u8 *v = va_arg (*va, u8 *);
+ uword n_bytes = va_arg (*va, uword);
+ vec_add (s, v, n_bytes);
+ return s;
+}
+
+/* Format hex dump. */
+u8 *
+format_hex_bytes (u8 * s, va_list * va)
+{
+ u8 *bytes = va_arg (*va, u8 *);
+ int n_bytes = va_arg (*va, int);
+ uword i;
+
+ /* Print short or long form depending on byte count. */
+ uword short_form = n_bytes <= 32;
+ uword indent = format_get_indent (s);
+
+ if (n_bytes == 0)
+ return s;
+
+ for (i = 0; i < n_bytes; i++)
+ {
+ if (!short_form && (i % 32) == 0)
+ s = format (s, "%08x: ", i);
+
+ s = format (s, "%02x", bytes[i]);
+
+ if (!short_form && ((i + 1) % 32) == 0 && (i + 1) < n_bytes)
+ s = format (s, "\n%U", format_white_space, indent);
+ }
+
+ return s;
+}
+
+/* Add variable number of spaces. */
+u8 *
+format_white_space (u8 * s, va_list * va)
+{
+ uword n = va_arg (*va, uword);
+ while (n-- > 0)
+ vec_add1 (s, ' ');
+ return s;
+}
+
+u8 *
+format_time_interval (u8 * s, va_list * args)
+{
+ u8 *fmt = va_arg (*args, u8 *);
+ f64 t = va_arg (*args, f64);
+ u8 *f;
+
+ const f64 seconds_per_minute = 60;
+ const f64 seconds_per_hour = 60 * seconds_per_minute;
+ const f64 seconds_per_day = 24 * seconds_per_hour;
+ uword days, hours, minutes, secs, msecs, usecs;
+
+ days = t / seconds_per_day;
+ t -= days * seconds_per_day;
+
+ hours = t / seconds_per_hour;
+ t -= hours * seconds_per_hour;
+
+ minutes = t / seconds_per_minute;
+ t -= minutes * seconds_per_minute;
+
+ secs = t;
+ t -= secs;
+
+ msecs = 1e3 * t;
+ usecs = 1e6 * t;
+
+ for (f = fmt; *f; f++)
+ {
+ uword what, c;
+ char *what_fmt = "%d";
+
+ switch (c = *f)
+ {
+ default:
+ vec_add1 (s, c);
+ continue;
+
+ case 'd':
+ what = days;
+ what_fmt = "%d";
+ break;
+ case 'h':
+ what = hours;
+ what_fmt = "%02d";
+ break;
+ case 'm':
+ what = minutes;
+ what_fmt = "%02d";
+ break;
+ case 's':
+ what = secs;
+ what_fmt = "%02d";
+ break;
+ case 'f':
+ what = msecs;
+ what_fmt = "%03d";
+ break;
+ case 'u':
+ what = usecs;
+ what_fmt = "%06d";
+ break;
+ }
+
+ s = format (s, what_fmt, what);
+ }
+
+ return s;
+}
+
+/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
+u8 *
+format_memory_size (u8 * s, va_list * va)
+{
+ uword size = va_arg (*va, uword);
+ uword l, u, log_u;
+
+ l = size > 0 ? min_log2 (size) : 0;
+ if (l < 10)
+ log_u = 0;
+ else if (l < 20)
+ log_u = 10;
+ else if (l < 30)
+ log_u = 20;
+ else
+ log_u = 30;
+
+ u = (uword) 1 << log_u;
+ if (size & (u - 1))
+ s = format (s, "%.2f", (f64) size / (f64) u);
+ else
+ s = format (s, "%d", size >> log_u);
+
+ if (log_u != 0)
+ s = format (s, "%c", " kmg"[log_u / 10]);
+
+ return s;
+}
+
+/* Parse memory size e.g. 100, 100k, 100m, 100g. */
+uword
+unformat_memory_size (unformat_input_t * input, va_list * va)
+{
+ uword amount, shift, c;
+ uword *result = va_arg (*va, uword *);
+
+ if (!unformat (input, "%wd%_", &amount))
+ return 0;
+
+ c = unformat_get_input (input);
+ switch (c)
+ {
+ case 'k':
+ case 'K':
+ shift = 10;
+ break;
+ case 'm':
+ case 'M':
+ shift = 20;
+ break;
+ case 'g':
+ case 'G':
+ shift = 30;
+ break;
+ default:
+ shift = 0;
+ unformat_put_input (input);
+ break;
+ }
+
+ *result = amount << shift;
+ return 1;
+}
+
+/* Format c identifier: e.g. a_name -> "a name".
+ Words for both vector names and null terminated c strings. */
+u8 *
+format_c_identifier (u8 * s, va_list * va)
+{
+ u8 *id = va_arg (*va, u8 *);
+ uword i, l;
+
+ l = ~0;
+ if (clib_mem_is_vec (id))
+ l = vec_len (id);
+
+ if (id)
+ for (i = 0; id[i] != 0 && i < l; i++)
+ {
+ u8 c = id[i];
+
+ if (c == '_')
+ c = ' ';
+ vec_add1 (s, c);
+ }
+
+ return s;
+}
+
+u8 *
+format_hexdump (u8 * s, va_list * args)
+{
+ u8 *data = va_arg (*args, u8 *);
+ uword len = va_arg (*args, uword);
+ int i, index = 0;
+ const int line_len = 16;
+ u8 *line_hex = 0;
+ u8 *line_str = 0;
+ uword indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (i = 0; i < len; i++)
+ {
+ line_hex = format (line_hex, "%02x ", data[i]);
+ line_str = format (line_str, "%c", isprint (data[i]) ? data[i] : '.');
+ if (!((i + 1) % line_len))
+ {
+ s = format (s, "%U%05x: %v[%v]",
+ format_white_space, index ? indent : 0,
+ index, line_hex, line_str);
+ if (i < len - 1)
+ s = format (s, "\n");
+ index = i + 1;
+ vec_reset_length (line_hex);
+ vec_reset_length (line_str);
+ }
+ }
+
+ while (i++ % line_len)
+ line_hex = format (line_hex, " ");
+
+ if (vec_len (line_hex))
+ s = format (s, "%U%05x: %v[%v]",
+ format_white_space, indent, index, line_hex, line_str);
+
+ vec_free (line_hex);
+ vec_free (line_str);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/string.c b/src/vppinfra/string.c
new file mode 100644
index 00000000..ba21e7b3
--- /dev/null
+++ b/src/vppinfra/string.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2006 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/string.h>
+#include <vppinfra/error.h>
+
+/* Exchanges source and destination. */
+void
+clib_memswap (void *_a, void *_b, uword bytes)
+{
+ uword pa = pointer_to_uword (_a);
+ uword pb = pointer_to_uword (_b);
+
+#define _(TYPE) \
+ if (0 == ((pa | pb) & (sizeof (TYPE) - 1))) \
+ { \
+ TYPE * a = uword_to_pointer (pa, TYPE *); \
+ TYPE * b = uword_to_pointer (pb, TYPE *); \
+ \
+ while (bytes >= 2*sizeof (TYPE)) \
+ { \
+ TYPE a0, a1, b0, b1; \
+ bytes -= 2*sizeof (TYPE); \
+ a += 2; \
+ b += 2; \
+ a0 = a[-2]; a1 = a[-1]; \
+ b0 = b[-2]; b1 = b[-1]; \
+ a[-2] = b0; a[-1] = b1; \
+ b[-2] = a0; b[-1] = a1; \
+ } \
+ pa = pointer_to_uword (a); \
+ pb = pointer_to_uword (b); \
+ }
+
+ if (BITS (uword) == BITS (u64))
+ _(u64);
+ _(u32);
+ _(u16);
+ _(u8);
+
+#undef _
+
+ ASSERT (bytes < 2);
+ if (bytes)
+ {
+ u8 *a = uword_to_pointer (pa, u8 *);
+ u8 *b = uword_to_pointer (pb, u8 *);
+ u8 a0 = a[0], b0 = b[0];
+ a[0] = b0;
+ b[0] = a0;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
new file mode 100644
index 00000000..69a99a3f
--- /dev/null
+++ b/src/vppinfra/string.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_string_h
+#define included_clib_string_h
+
+#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
+#include <vppinfra/vector.h>
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/string.h>
+#endif
+
+#ifdef CLIB_UNIX
+#include <string.h>
+#endif
+
+#ifdef CLIB_STANDALONE
+#include <vppinfra/standalone_string.h>
+#endif
+
+/* Exchanges source and destination. */
+void clib_memswap (void *_a, void *_b, uword bytes);
+
+/*
+ * the vector unit memcpy variants confuse coverity
+ * so don't let it anywhere near them.
+ */
+#ifndef __COVERITY__
+#if __AVX__
+#include <vppinfra/memcpy_avx.h>
+#elif __SSSE3__
+#include <vppinfra/memcpy_sse3.h>
+#else
+#define clib_memcpy(a,b,c) memcpy(a,b,c)
+#endif
+#else /* __COVERITY__ */
+#define clib_memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+#endif /* included_clib_string_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c
new file mode 100644
index 00000000..589c815d
--- /dev/null
+++ b/src/vppinfra/test_bihash_template.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+
+#include <vppinfra/bihash_8_8.h>
+#include <vppinfra/bihash_template.h>
+
+#include <vppinfra/bihash_template.c>
+
+typedef struct
+{
+ u64 seed;
+ u32 nbuckets;
+ u32 nitems;
+ u32 search_iter;
+ int careful_delete_tests;
+ int verbose;
+ int non_random_keys;
+ uword *key_hash;
+ u64 *keys;
+ BVT (clib_bihash) hash;
+ clib_time_t clib_time;
+
+ unformat_input_t *input;
+
+} test_main_t;
+
+test_main_t test_main;
+
+uword
+vl (void *v)
+{
+ return vec_len (v);
+}
+
+static clib_error_t *
+test_bihash_vec64 (test_main_t * tm)
+{
+ u32 user_buckets = 1228800;
+ u32 user_memory_size = 209715200;
+ BVT (clib_bihash_kv) kv;
+ int i, j;
+ f64 before;
+ f64 *cum_times = 0;
+ BVT (clib_bihash) * h;
+
+ h = &tm->hash;
+
+ BV (clib_bihash_init) (h, "test", user_buckets, user_memory_size);
+
+ before = clib_time_now (&tm->clib_time);
+
+ for (j = 0; j < 10; j++)
+ {
+ for (i = 1; i <= j * 1000 + 1; i++)
+ {
+ kv.key = i;
+ kv.value = 1;
+
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */ );
+ }
+
+ vec_add1 (cum_times, clib_time_now (&tm->clib_time) - before);
+ }
+
+ for (j = 0; j < vec_len (cum_times); j++)
+ fformat (stdout, "Cum time for %d: %.4f (us)\n", (j + 1) * 1000,
+ cum_times[j] * 1e6);
+
+ return 0;
+}
+
+static clib_error_t *
+test_bihash (test_main_t * tm)
+{
+ int i, j;
+ uword *p;
+ uword total_searches;
+ f64 before, delta;
+ BVT (clib_bihash) * h;
+ BVT (clib_bihash_kv) kv;
+
+ h = &tm->hash;
+
+ BV (clib_bihash_init) (h, "test", tm->nbuckets, 3ULL << 30);
+
+ fformat (stdout, "Pick %lld unique %s keys...\n",
+ tm->nitems, tm->non_random_keys ? "non-random" : "random");
+
+ for (i = 0; i < tm->nitems; i++)
+ {
+ u64 rndkey;
+
+ if (tm->non_random_keys == 0)
+ {
+
+ again:
+ rndkey = random_u64 (&tm->seed);
+
+ p = hash_get (tm->key_hash, rndkey);
+ if (p)
+ goto again;
+ }
+ else
+ rndkey = (u64) (i + 1) << 16;
+
+ hash_set (tm->key_hash, rndkey, i + 1);
+ vec_add1 (tm->keys, rndkey);
+ }
+
+ fformat (stdout, "Add items...\n");
+ for (i = 0; i < tm->nitems; i++)
+ {
+ kv.key = tm->keys[i];
+ kv.value = i + 1;
+
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */ );
+
+ if (tm->verbose > 1)
+ {
+ fformat (stdout, "--------------------\n");
+ fformat (stdout, "After adding key %llu value %lld...\n",
+ tm->keys[i], (u64) (i + 1));
+ fformat (stdout, "%U", BV (format_bihash), h,
+ 2 /* very verbose */ );
+ }
+ }
+
+ fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */ );
+
+ fformat (stdout, "Search for items %d times...\n", tm->search_iter);
+
+ before = clib_time_now (&tm->clib_time);
+
+ for (j = 0; j < tm->search_iter; j++)
+ {
+ for (i = 0; i < tm->nitems; i++)
+ {
+ kv.key = tm->keys[i];
+ if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+ if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+ clib_warning ("[%d] search for key %lld failed unexpectedly\n",
+ i, tm->keys[i]);
+ if (kv.value != (u64) (i + 1))
+ clib_warning
+ ("[%d] search for key %lld returned %lld, not %lld\n", i,
+ tm->keys, kv.value, (u64) (i + 1));
+ }
+ }
+
+ delta = clib_time_now (&tm->clib_time) - before;
+ total_searches = (uword) tm->search_iter * (uword) tm->nitems;
+
+ if (delta > 0)
+ fformat (stdout, "%.f searches per second\n",
+ ((f64) total_searches) / delta);
+
+ fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta);
+
+ fformat (stdout, "Standard E-hash search for items %d times...\n",
+ tm->search_iter);
+
+ before = clib_time_now (&tm->clib_time);
+
+ for (j = 0; j < tm->search_iter; j++)
+ {
+ for (i = 0; i < tm->nitems; i++)
+ {
+ p = hash_get (tm->key_hash, tm->keys[i]);
+ if (p == 0 || p[0] != (uword) (i + 1))
+ clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]);
+ }
+ }
+
+ delta = clib_time_now (&tm->clib_time) - before;
+ total_searches = (uword) tm->search_iter * (uword) tm->nitems;
+
+ fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta);
+
+ if (delta > 0)
+ fformat (stdout, "%.f searches per second\n",
+ ((f64) total_searches) / delta);
+
+ fformat (stdout, "Delete items...\n");
+
+ for (i = 0; i < tm->nitems; i++)
+ {
+ int j;
+ int rv;
+
+ kv.key = tm->keys[i];
+ kv.value = (u64) (i + 1);
+ rv = BV (clib_bihash_add_del) (h, &kv, 0 /* is_add */ );
+
+ if (rv < 0)
+ clib_warning ("delete key %lld not ok but should be", tm->keys[i]);
+
+ if (tm->careful_delete_tests)
+ {
+ for (j = 0; j < tm->nitems; j++)
+ {
+ kv.key = tm->keys[j];
+ rv = BV (clib_bihash_search) (h, &kv, &kv);
+ if (j <= i && rv >= 0)
+ {
+ clib_warning
+ ("i %d j %d search ok but should not be, value %lld",
+ i, j, kv.value);
+ }
+ if (j > i && rv < 0)
+ {
+ clib_warning ("i %d j %d search not ok but should be",
+ i, j);
+ }
+ }
+ }
+ }
+
+ fformat (stdout, "After deletions, should be empty...\n");
+
+ fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */ );
+ return 0;
+}
+
+clib_error_t *
+test_bihash_cache (test_main_t * tm)
+{
+ u32 lru;
+ BVT (clib_bihash_bucket) _b, *b = &_b;
+
+ BV (clib_bihash_reset_cache) (b);
+
+ fformat (stdout, "Initial LRU config: %U\n", BV (format_bihash_lru), b);
+
+ BV (clib_bihash_update_lru_not_inline) (b, 3);
+
+ fformat (stdout, "use slot 3, LRU config: %U\n", BV (format_bihash_lru), b);
+
+ BV (clib_bihash_update_lru) (b, 1);
+
+ fformat (stdout, "use slot 1 LRU config: %U\n", BV (format_bihash_lru), b);
+
+ lru = BV (clib_bihash_get_lru) (b);
+
+ fformat (stdout, "least-recently-used is %d\n", lru);
+
+ BV (clib_bihash_update_lru) (b, 4);
+
+ fformat (stdout, "use slot 4 LRU config: %U\n", BV (format_bihash_lru), b);
+
+ lru = BV (clib_bihash_get_lru) (b);
+
+ fformat (stdout, "least-recently-used is %d\n", lru);
+
+ return 0;
+}
+
+clib_error_t *
+test_bihash_main (test_main_t * tm)
+{
+ unformat_input_t *i = tm->input;
+ clib_error_t *error;
+ int which = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "seed %u", &tm->seed))
+ ;
+
+ else if (unformat (i, "nbuckets %d", &tm->nbuckets))
+ ;
+ else if (unformat (i, "non-random-keys"))
+ tm->non_random_keys = 1;
+ else if (unformat (i, "nitems %d", &tm->nitems))
+ ;
+ else if (unformat (i, "careful %d", &tm->careful_delete_tests))
+ ;
+ else if (unformat (i, "verbose %d", &tm->verbose))
+ ;
+ else if (unformat (i, "search %d", &tm->search_iter))
+ ;
+ else if (unformat (i, "vec64"))
+ which = 1;
+ else if (unformat (i, "cache"))
+ which = 2;
+
+ else if (unformat (i, "verbose"))
+ tm->verbose = 1;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, i);
+ }
+
+ switch (which)
+ {
+ case 0:
+ error = test_bihash (tm);
+ break;
+
+ case 1:
+ error = test_bihash_vec64 (tm);
+ break;
+
+ case 2:
+ error = test_bihash_cache (tm);
+ break;
+
+ default:
+ return clib_error_return (0, "no such test?");
+ }
+
+ return error;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ clib_error_t *error;
+ test_main_t *tm = &test_main;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ tm->input = &i;
+ tm->seed = 0xdeaddabe;
+
+ tm->nbuckets = 2;
+ tm->nitems = 5;
+ tm->verbose = 1;
+ tm->search_iter = 1;
+ tm->careful_delete_tests = 0;
+ tm->key_hash = hash_create (0, sizeof (uword));
+ clib_time_init (&tm->clib_time);
+
+ unformat_init_command_line (&i, argv);
+ error = test_bihash_main (tm);
+ unformat_free (&i);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ return 0;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_dlist.c b/src/vppinfra/test_dlist.c
new file mode 100644
index 00000000..c5535c85
--- /dev/null
+++ b/src/vppinfra/test_dlist.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/dlist.h>
+
+typedef struct
+{
+ dlist_elt_t *test_pool;
+ u32 head_index;
+} test_main_t;
+
+test_main_t test_main;
+
+int
+test_dlist_main (unformat_input_t * input)
+{
+ test_main_t *tm = &test_main;
+ dlist_elt_t *head, *elt;
+ u32 elt_index, head_index;
+ u32 value;
+ int i;
+
+ pool_get (tm->test_pool, head);
+ head_index = head - tm->test_pool;
+ clib_dlist_init (tm->test_pool, head - tm->test_pool);
+
+ for (i = 1; i <= 3; i++)
+ {
+ pool_get (tm->test_pool, elt);
+ elt_index = elt - tm->test_pool;
+
+ clib_dlist_init (tm->test_pool, elt_index);
+ elt->value = i;
+ clib_dlist_addtail (tm->test_pool, head_index, elt_index);
+ }
+
+ head = pool_elt_at_index (tm->test_pool, head_index);
+
+ fformat (stdout, "Dump forward links\n");
+ elt_index = head->next;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->next;
+ value = elt->value;
+ }
+
+ fformat (stdout, "Dump reverse links\n");
+ elt_index = head->prev;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->prev;
+ value = elt->value;
+ }
+
+ fformat (stdout, "remove first element\n");
+
+ elt_index = clib_dlist_remove_head (tm->test_pool, head_index);
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+
+ fformat (stdout, "removed index %d value %d\n", elt_index, elt->value);
+
+ head = pool_elt_at_index (tm->test_pool, head_index);
+
+ fformat (stdout, "Dump forward links\n");
+ elt_index = head->next;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->next;
+ value = elt->value;
+ }
+
+ fformat (stdout, "Dump reverse links\n");
+ elt_index = head->prev;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->prev;
+ value = elt->value;
+ }
+
+ fformat (stdout, "re-insert index %d value %d at head\n", 1, 1);
+
+ clib_dlist_addhead (tm->test_pool, head_index, 1);
+
+ fformat (stdout, "Dump forward links\n");
+ elt_index = head->next;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->next;
+ value = elt->value;
+ }
+
+ fformat (stdout, "Dump reverse links\n");
+ elt_index = head->prev;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->prev;
+ value = elt->value;
+ }
+
+ fformat (stdout, "Remove middle element\n");
+
+ clib_dlist_remove (tm->test_pool, 2);
+ elt = pool_elt_at_index (tm->test_pool, 2);
+
+ fformat (stdout, "removed index %d value %d\n", elt_index, elt->value);
+
+ fformat (stdout, "Dump forward links\n");
+ elt_index = head->next;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->next;
+ value = elt->value;
+ }
+
+ fformat (stdout, "Dump reverse links\n");
+ elt_index = head->prev;
+ i = 1;
+ value = 0;
+ while (value != ~0)
+ {
+ elt = pool_elt_at_index (tm->test_pool, elt_index);
+ fformat (stdout, "elt %d value %d\n", i++, elt->value);
+ elt_index = elt->prev;
+ value = elt->value;
+ }
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ ret = test_dlist_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_elf.c b/src/vppinfra/test_elf.c
new file mode 100644
index 00000000..84fe0776
--- /dev/null
+++ b/src/vppinfra/test_elf.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2008 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/elf.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifndef CLIB_UNIX
+#error "unix only"
+#endif
+
+static clib_error_t *
+elf_set_interpreter (elf_main_t * em, char *interp)
+{
+ elf_segment_t *g;
+ elf_section_t *s;
+ clib_error_t *error;
+
+ vec_foreach (g, em->segments)
+ {
+ if (g->header.type == ELF_SEGMENT_INTERP)
+ break;
+ }
+
+ if (g >= vec_end (em->segments))
+ return clib_error_return (0, "interpreter not found");
+
+ if (g->header.memory_size < 1 + strlen (interp))
+ return clib_error_return (0,
+ "given interpreter does not fit; must be less than %d bytes (`%s' given)",
+ g->header.memory_size, interp);
+
+ error =
+ elf_get_section_by_start_address (em, g->header.virtual_address, &s);
+ if (error)
+ return error;
+
+ /* Put in new null terminated string. */
+ memset (s->contents, 0, vec_len (s->contents));
+ clib_memcpy (s->contents, interp, strlen (interp));
+
+ return 0;
+}
+
+static void
+delete_dynamic_rpath_entries_from_section (elf_main_t * em, elf_section_t * s)
+{
+ elf64_dynamic_entry_t *e;
+ elf64_dynamic_entry_t *new_es = 0;
+
+ vec_foreach (e, em->dynamic_entries)
+ {
+ switch (e->type)
+ {
+ case ELF_DYNAMIC_ENTRY_RPATH:
+ case ELF_DYNAMIC_ENTRY_RUN_PATH:
+ break;
+
+ default:
+ vec_add1 (new_es, e[0]);
+ break;
+ }
+ }
+
+ /* Pad so as to keep section size constant. */
+ {
+ elf64_dynamic_entry_t e_end;
+ e_end.type = ELF_DYNAMIC_ENTRY_END;
+ e_end.data = 0;
+ while (vec_len (new_es) < vec_len (em->dynamic_entries))
+ vec_add1 (new_es, e_end);
+ }
+
+ elf_set_dynamic_entries (em);
+}
+
+static void
+elf_delete_dynamic_rpath_entries (elf_main_t * em)
+{
+ elf_section_t *s;
+
+ vec_foreach (s, em->sections)
+ {
+ switch (s->header.type)
+ {
+ case ELF_SECTION_DYNAMIC:
+ delete_dynamic_rpath_entries_from_section (em, s);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+typedef struct
+{
+ elf_main_t elf_main;
+ char *input_file;
+ char *output_file;
+ char *set_interpreter;
+ int verbose;
+} elf_test_main_t;
+
+int
+main (int argc, char *argv[])
+{
+ elf_test_main_t _tm, *tm = &_tm;
+ elf_main_t *em = &tm->elf_main;
+ unformat_input_t i;
+ clib_error_t *error = 0;
+
+ memset (tm, 0, sizeof (tm[0]));
+
+ unformat_init_command_line (&i, argv);
+ while (unformat_check_input (&i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&i, "in %s", &tm->input_file))
+ ;
+ else if (unformat (&i, "out %s", &tm->output_file))
+ ;
+ else if (unformat (&i, "set-interpreter %s", &tm->set_interpreter))
+ ;
+ else if (unformat (&i, "verbose"))
+ tm->verbose = ~0;
+ else if (unformat (&i, "verbose-symbols"))
+ tm->verbose |= FORMAT_ELF_MAIN_SYMBOLS;
+ else if (unformat (&i, "verbose-relocations"))
+ tm->verbose |= FORMAT_ELF_MAIN_RELOCATIONS;
+ else if (unformat (&i, "verbose-dynamic"))
+ tm->verbose |= FORMAT_ELF_MAIN_DYNAMIC;
+ else
+ {
+ error = unformat_parse_error (&i);
+ goto done;
+ }
+ }
+
+ if (!tm->input_file)
+ {
+ clib_warning ("No input file! Using test_bihash_template");
+ tm->input_file = "test_bihash_template";
+ }
+
+ error = elf_read_file (em, tm->input_file);
+ if (error)
+ goto done;
+
+ if (tm->set_interpreter)
+ {
+ clib_error_t *error = elf_set_interpreter (em, tm->set_interpreter);
+ if (error)
+ goto done;
+ elf_delete_dynamic_rpath_entries (em);
+ }
+
+ if (tm->verbose)
+ fformat (stdout, "%U", format_elf_main, em, tm->verbose);
+
+ if (tm->output_file)
+ error = elf_write_file (em, tm->output_file);
+
+ elf_main_free (em);
+
+done:
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_elog.c b/src/vppinfra/test_elog.c
new file mode 100644
index 00000000..1cf5ba1f
--- /dev/null
+++ b/src/vppinfra/test_elog.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/elog.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/unix.h>
+
+int
+test_elog_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ u32 i, n_iter, seed, max_events;
+ elog_main_t _em, *em = &_em;
+ u32 verbose;
+ f64 min_sample_time;
+ char *dump_file, *load_file, *merge_file, **merge_files;
+ u8 *tag, **tags;
+ f64 align_tweak;
+ f64 *align_tweaks;
+
+ n_iter = 100;
+ max_events = 100000;
+ seed = 1;
+ verbose = 0;
+ dump_file = 0;
+ load_file = 0;
+ merge_files = 0;
+ tags = 0;
+ align_tweaks = 0;
+ min_sample_time = 2;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %d", &n_iter))
+ ;
+ else if (unformat (input, "seed %d", &seed))
+ ;
+ else if (unformat (input, "dump %s", &dump_file))
+ ;
+ else if (unformat (input, "load %s", &load_file))
+ ;
+ else if (unformat (input, "tag %s", &tag))
+ vec_add1 (tags, tag);
+ else if (unformat (input, "merge %s", &merge_file))
+ vec_add1 (merge_files, merge_file);
+
+ else if (unformat (input, "verbose %=", &verbose, 1))
+ ;
+ else if (unformat (input, "max-events %d", &max_events))
+ ;
+ else if (unformat (input, "sample-time %f", &min_sample_time))
+ ;
+ else if (unformat (input, "align-tweak %f", &align_tweak))
+ vec_add1 (align_tweaks, align_tweak);
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+#ifdef CLIB_UNIX
+ if (load_file)
+ {
+ if ((error = elog_read_file (em, load_file)))
+ goto done;
+ }
+
+ else if (merge_files)
+ {
+ uword i;
+ elog_main_t *ems;
+ vec_clone (ems, merge_files);
+
+ /* Supply default tags as needed */
+ if (vec_len (tags) < vec_len (ems))
+ {
+ for (i = vec_len (tags); i < vec_len (ems); i++)
+ vec_add1 (tags, format (0, "F%d%c", i, 0));
+ }
+
+ elog_init (em, max_events);
+ for (i = 0; i < vec_len (ems); i++)
+ {
+ if ((error =
+ elog_read_file (i == 0 ? em : &ems[i], merge_files[i])))
+ goto done;
+ if (i > 0)
+ {
+ align_tweak = 0.0;
+ if (i <= vec_len (align_tweaks))
+ align_tweak = align_tweaks[i - 1];
+ elog_merge (em, tags[0], &ems[i], tags[i], align_tweak);
+ tags[0] = 0;
+ }
+ }
+ }
+
+ else
+#endif /* CLIB_UNIX */
+ {
+ f64 t[2];
+
+ elog_init (em, max_events);
+ elog_enable_disable (em, 1);
+ t[0] = unix_time_now ();
+
+ for (i = 0; i < n_iter; i++)
+ {
+ u32 j, n, sum;
+
+ n = 1 + (random_u32 (&seed) % 128);
+ sum = 0;
+ for (j = 0; j < n; j++)
+ sum += random_u32 (&seed);
+
+ {
+ ELOG_TYPE_XF (e);
+ ELOG (em, e, sum);
+ }
+
+ {
+ ELOG_TYPE_XF (e);
+ ELOG (em, e, sum + 1);
+ }
+
+ {
+ struct
+ {
+ u32 string_index;
+ f32 f;
+ } *d;
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "fumble %s %.9f",.format_args =
+ "t4f4",.n_enum_strings = 4,.enum_strings =
+ {
+ "string0", "string1", "string2", "string3",},};
+
+ d = ELOG_DATA (em, e);
+
+ d->string_index = sum & 3;
+ d->f = (sum & 0xff) / 128.;
+ }
+
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "bar %d.%d.%d.%d",.format_args = "i1i1i1i1",};
+ ELOG_TRACK (my_track);
+ u8 *d = ELOG_TRACK_DATA (em, e, my_track);
+ d[0] = i + 0;
+ d[1] = i + 1;
+ d[2] = i + 2;
+ d[3] = i + 3;
+ }
+
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "bar `%s'",.format_args = "s20",};
+ struct
+ {
+ char s[20];
+ } *d;
+ u8 *v;
+
+ d = ELOG_DATA (em, e);
+ v = format (0, "foo %d%c", i, 0);
+ clib_memcpy (d->s, v, clib_min (vec_len (v), sizeof (d->s)));
+ }
+
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "bar `%s'",.format_args = "T4",};
+ struct
+ {
+ u32 offset;
+ } *d;
+
+ d = ELOG_DATA (em, e);
+ d->offset = elog_string (em, "string table %d", i);
+ }
+ }
+
+ do
+ {
+ t[1] = unix_time_now ();
+ }
+ while (t[1] - t[0] < min_sample_time);
+ }
+
+#ifdef CLIB_UNIX
+ if (dump_file)
+ {
+ if ((error =
+ elog_write_file (em, dump_file, 0 /* do not flush ring */ )))
+ goto done;
+ }
+#endif
+
+ if (verbose)
+ {
+ elog_event_t *e, *es;
+ es = elog_get_events (em);
+ vec_foreach (e, es)
+ {
+ clib_warning ("%18.9f: %12U %U\n", e->time,
+ format_elog_track, em, e, format_elog_event, em, e);
+ }
+ }
+
+done:
+ if (error)
+ clib_error_report (error);
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ r = test_elog_main (&i);
+ unformat_free (&i);
+ return r;
+}
+#endif
+
+/**
+ * @brief GDB callable function: vl - Return vector length of vector
+ *
+ * @param *p - void - address of vector
+ *
+ * @return length - u32
+ *
+ */
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+/**
+ * @brief GDB callable function: pe - call pool_elts - number of elements in a pool
+ *
+ * @param *v - void - address of pool
+ *
+ * @return number - uword
+ *
+ */
+#include <vppinfra/pool.h>
+uword
+pe (void *v)
+{
+ return (pool_elts (v));
+}
+
+#include <vppinfra/hash.h>
+uword
+he (void *v)
+{
+ return (hash_elts (v));
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_fifo.c b/src/vppinfra/test_fifo.c
new file mode 100644
index 00000000..45392bc3
--- /dev/null
+++ b/src/vppinfra/test_fifo.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/fifo.h>
+#include <vppinfra/random.h>
+
+typedef struct
+{
+ int a, b, c;
+} A;
+
+always_inline void
+A_set (A * a, int k)
+{
+ a->a = 1 * k;
+ a->b = 2 * k;
+ a->c = 3 * k;
+}
+
+always_inline int
+A_is_valid (A * a, int k)
+{
+ return a->a == 1 * k && a->b == 2 * k && a->c == 3 * k;
+}
+
+int
+test_fifo_main (unformat_input_t * input)
+{
+ u32 n_added, n_removed, i, j, n_iter, seed, verbose;
+ A *as = 0, *a;
+
+ n_iter = 1000;
+ seed = random_default_seed ();
+ verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %d", &n_iter))
+ ;
+ else if (unformat (input, "seed %d", &seed))
+ ;
+ else if (unformat (input, "verbose %=", &verbose, 1))
+ ;
+ else
+ {
+ clib_warning ("unknown input `%U'\n", format_unformat_error, input);
+ return 1;
+ }
+ }
+
+ if (verbose)
+ clib_warning ("iter %d seed %d\n", n_iter, seed);
+
+ n_added = n_removed = 0;
+ for (i = 0; i < n_iter; i++)
+ {
+ if (clib_fifo_elts (as) > 0 && (random_u32 (&seed) & 1))
+ {
+ A tmp;
+ clib_fifo_sub1 (as, tmp);
+ ASSERT (A_is_valid (&tmp, n_removed));
+ n_removed++;
+ }
+ else
+ {
+ clib_fifo_add2 (as, a);
+ A_set (a, n_added);
+ n_added++;
+ }
+
+ ASSERT (clib_fifo_elts (as) == n_added - n_removed);
+
+ j = 0;
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (a, as, {
+ ASSERT (A_is_valid (a, n_removed + j));
+ j++;
+ });
+ /* *INDENT-ON* */
+
+ ASSERT (j == clib_fifo_elts (as));
+ }
+
+ clib_fifo_free (as);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ unformat_init_command_line (&i, argv);
+ r = test_fifo_main (&i);
+ unformat_free (&i);
+
+ return r;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_format.c b/src/vppinfra/test_format.c
new file mode 100644
index 00000000..cc95a00e
--- /dev/null
+++ b/src/vppinfra/test_format.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+
+static int verbose;
+static u8 *test_vec;
+
+static u8 *
+format_test1 (u8 * s, va_list * va)
+{
+ uword x = va_arg (*va, uword);
+ f64 y = va_arg (*va, f64);
+ return format (s, "%12d %12f%12.4e", x, y, y);
+}
+
+static int
+expectation (const char *exp, char *fmt, ...)
+{
+ int ret = 0;
+
+ va_list va;
+ va_start (va, fmt);
+ test_vec = va_format (test_vec, fmt, &va);
+ va_end (va);
+
+ vec_add1 (test_vec, 0);
+ if (strcmp (exp, (char *) test_vec))
+ {
+ fformat (stdout, "FAIL: %s (expected vs. result)\n\"%s\"\n\"%v\"\n",
+ fmt, exp, test_vec);
+ ret = 1;
+ }
+ else if (verbose)
+ fformat (stdout, "PASS: %s\n", fmt);
+ vec_delete (test_vec, vec_len (test_vec), 0);
+ return ret;
+}
+
+int
+test_format_main (unformat_input_t * input)
+{
+ int ret = 0;
+ u8 *food = format (0, "food");
+
+ ret |= expectation ("foo", "foo");
+ ret |= expectation ("foo", "%s", "foo");
+ ret |= expectation ("9876", "%d", 9876);
+ ret |= expectation ("-9876", "%wd", (word) - 9876);
+ ret |= expectation ("98765432", "%u", 98765432);
+ ret |= expectation ("1200ffee", "%x", 0x1200ffee);
+ ret |= expectation ("BABEBABE", "%X", 0xbabebabe);
+ ret |= expectation ("10%a", "%d%%%c", 10, 'a');
+ ret |= expectation ("123456789abcdef0", "%016Lx", 0x123456789abcdef0LL);
+ ret |= expectation ("00000123", "%08x", 0x123);
+ ret |= expectation (" 23 23 2.3037e1",
+ "%40U", format_test1, 23, 23.0367);
+ ret |= expectation ("left ", "%-10s", "left");
+ ret |= expectation (" center ", "%=10s", "center");
+ ret |= expectation (" right", "%+10s", "right");
+ ret |= expectation ("123456", "%.0f", 123456.);
+ ret |= expectation ("1234567.0", "%.1f", 1234567.);
+ ret |= expectation ("foo", "%.*s", 3, "food");
+ ret |= expectation ("food ", "%.*s", 10, "food ");
+ ret |= expectation ("(nil)", "%.*s", 3, (void *) 0);
+ ret |= expectation ("foo", "%.*v", 3, food);
+ ret |= expectation ("foobar", "%.*v%s", 3, food, "bar");
+ ret |= expectation ("foo bar", "%S", "foo_bar");
+ vec_free (food);
+ vec_free (test_vec);
+ return ret;
+}
+
+typedef struct
+{
+ int a, b;
+} foo_t;
+
+static u8 *
+format_foo (u8 * s, va_list * va)
+{
+ foo_t *foo = va_arg (*va, foo_t *);
+ return format (s, "{a %d, b %d}", foo->a, foo->b);
+}
+
+static uword
+unformat_foo (unformat_input_t * i, va_list * va)
+{
+ foo_t *foo = va_arg (*va, foo_t *);
+ return unformat (i, "{%D,%D}",
+ sizeof (foo->a), &foo->a, sizeof (foo->b), &foo->b);
+}
+
+int
+test_unformat_main (unformat_input_t * input)
+{
+ u32 v[8];
+ long l;
+ long long ll;
+ f64 f;
+ u8 *s;
+ foo_t foo = {.a = ~0,.b = ~0 };
+
+ v[0] = v[1] = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "01 %d %d", &v[0], &v[1]))
+ fformat (stdout, "got 01 %d %d\n", v[0], v[1]);
+ else if (unformat (input, "d %d", &v[0]))
+ fformat (stdout, "got it d %d\n", v[0]);
+ else if (unformat (input, "ld %ld", &l))
+ fformat (stdout, "got it ld %ld\n", l);
+ else if (unformat (input, "lld %lld", &ll))
+ fformat (stdout, "got it lld %lld\n", ll);
+ else if (unformat (input, "string %s", &s))
+ fformat (stdout, "got string `%s'\n", s);
+ else if (unformat (input, "float %f", &f))
+ fformat (stdout, "got float `%.4f'\n", f);
+ else if (unformat (input, "foo %U", unformat_foo, &foo))
+ fformat (stdout, "got a foo `%U'\n", format_foo, &foo);
+ else if (unformat (input, "ignore-me1"))
+ fformat (stdout, "got an `ignore-me1'\n");
+ else if (unformat (input, "ignore-me2"))
+ fformat (stdout, "got an `ignore-me2'\n");
+ else if (unformat (input, "gi%d_%d@-", &v[0], &v[1]))
+ fformat (stdout, "got `gi%d_%d@-'\n", v[0], v[1]);
+ else if (unformat (input, "%_%d.%d.%d.%d%_->%_%d.%d.%d.%d%_",
+ &v[0], &v[1], &v[2], &v[3],
+ &v[4], &v[5], &v[6], &v[7]))
+ fformat (stdout, "got %d.%d.%d.%d -> %d.%d.%d.%d",
+ v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+ else
+ {
+ clib_warning ("unknown input `%U'\n", format_unformat_error, input);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+
+ if (unformat (&i, "unformat"))
+ return test_unformat_main (&i);
+ else
+ return test_format_main (&i);
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_fpool.c b/src/vppinfra/test_fpool.c
new file mode 100644
index 00000000..e2d67f16
--- /dev/null
+++ b/src/vppinfra/test_fpool.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/pool.h>
+
+/* can be a very large size */
+#define NELTS 1024
+
+int
+main (int argc, char *argv[])
+{
+ u32 *junk = 0;
+ int i;
+ u32 *tp = 0;
+ u32 *indices = 0;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ vec_validate (indices, NELTS - 1);
+ _vec_len (indices) = 0;
+
+ pool_init_fixed (tp, NELTS);
+
+ for (i = 0; i < NELTS; i++)
+ {
+ pool_get (tp, junk);
+ vec_add1 (indices, junk - tp);
+ *junk = i;
+ }
+
+ for (i = 0; i < NELTS; i++)
+ {
+ junk = pool_elt_at_index (tp, indices[i]);
+ ASSERT (*junk == i);
+ }
+
+ fformat (stdout, "%d pool elts before deletes\n", pool_elts (tp));
+
+ pool_put_index (tp, indices[12]);
+ pool_put_index (tp, indices[43]);
+
+ fformat (stdout, "%d pool elts after deletes\n", pool_elts (tp));
+
+ pool_validate (tp);
+
+ pool_free (tp);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_hash.c b/src/vppinfra/test_hash.c
new file mode 100644
index 00000000..94110ab6
--- /dev/null
+++ b/src/vppinfra/test_hash.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/unistd.h>
+#endif
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <vppinfra/time.h>
+#endif
+
+#include <vppinfra/random.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+typedef struct
+{
+ int n_iterations;
+
+ int n_iterations_per_print;
+
+ /* Number of pairs to insert into hash. */
+ int n_pairs;
+
+ /* True to validate correctness of hash functions. */
+ int n_iterations_per_validate;
+
+ /* Non-zero if hash table size is to be fixed. */
+ int fixed_hash_size;
+
+ /* Verbosity level for hash formats. */
+ int verbose;
+
+ /* Random number seed. */
+ u32 seed;
+} hash_test_t;
+
+static clib_error_t *
+hash_next_test (word * h)
+{
+ hash_next_t hn = { 0 };
+ hash_pair_t *p0, *p1;
+ clib_error_t *error = 0;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p0, h, {
+ p1 = hash_next (h, &hn);
+ error = CLIB_ERROR_ASSERT (p0 == p1);
+ if (error)
+ break;
+ });
+ /* *INDENT-ON* */
+
+ if (!error)
+ error = CLIB_ERROR_ASSERT (!hash_next (h, &hn));
+
+ return error;
+}
+
+static u8 *
+test1_format (u8 * s, va_list * args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+
+ return format (s, "0x%8U -> 0x%8U",
+ format_hex_bytes, &p->key, sizeof (p->key),
+ format_hex_bytes, &p->value[0], hash_value_bytes (h));
+}
+
+static clib_error_t *
+test_word_key (hash_test_t * ht)
+{
+ word *h = 0;
+ word i, j;
+
+ word *keys = 0, *vals = 0;
+ uword *is_inserted = 0;
+
+ clib_error_t *error = 0;
+
+ vec_resize (keys, ht->n_pairs);
+ vec_resize (vals, vec_len (keys));
+
+ h = hash_create (ht->fixed_hash_size, sizeof (vals[0]));
+
+ hash_set_pair_format (h, test1_format, 0);
+ if (ht->fixed_hash_size)
+ hash_set_flags (h, HASH_FLAG_NO_AUTO_GROW | HASH_FLAG_NO_AUTO_SHRINK);
+
+ {
+ uword *unique = 0;
+ u32 k;
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ do
+ {
+ k = random_u32 (&ht->seed) & 0xfffff;
+ }
+ while (clib_bitmap_get (unique, k));
+ unique = clib_bitmap_ori (unique, k);
+ keys[i] = k;
+ vals[i] = i;
+ }
+
+ clib_bitmap_free (unique);
+ }
+
+ for (i = 0; i < ht->n_iterations; i++)
+ {
+ u32 vi = random_u32 (&ht->seed) % vec_len (keys);
+
+ if (clib_bitmap_get (is_inserted, vi))
+ hash_unset (h, keys[vi]);
+ else
+ hash_set (h, keys[vi], vals[vi]);
+
+ is_inserted = clib_bitmap_xori (is_inserted, vi);
+
+ if (ht->n_iterations_per_print > 0
+ && ((i + 1) % ht->n_iterations_per_print) == 0)
+ if_verbose ("iteration %d\n %U", i + 1, format_hash, h, ht->verbose);
+
+ if (ht->n_iterations_per_validate == 0
+ || (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ {
+ hash_pair_t *p;
+ uword ki;
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (p, h, {
+ ki = p->value[0];
+ ASSERT (keys[ki] == p->key);
+ });
+ /* *INDENT-ON* */
+ }
+
+ clib_mem_validate ();
+
+ if ((error = hash_validate (h)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = hash_get (h, keys[j]);
+ if ((error =
+ CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+ if ((error = hash_next_test (h)))
+ goto done;
+
+ if_verbose ("%U", format_hash, h, ht->verbose);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ if (!clib_bitmap_get (is_inserted, i))
+ continue;
+
+ hash_unset (h, keys[i]);
+ is_inserted = clib_bitmap_xori (is_inserted, i);
+
+ if (ht->n_iterations_per_validate == 0
+ || (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ clib_mem_validate ();
+
+ if ((error = hash_validate (h)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = hash_get (h, keys[j]);
+ if ((error =
+ CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+done:
+ hash_free (h);
+ vec_free (keys);
+ vec_free (vals);
+ clib_bitmap_free (is_inserted);
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ return error;
+}
+
+static u8 *
+test2_format (u8 * s, va_list * args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+
+ return format (s, "0x%8U <- %v",
+ format_hex_bytes, &p->value[0], hash_value_bytes (h),
+ p->key);
+}
+
+static clib_error_t *
+test_string_key (hash_test_t * ht)
+{
+ word i, j;
+
+ u8 **keys = 0;
+ word *vals = 0;
+ uword *is_inserted = 0;
+
+ word *h = 0;
+
+ clib_error_t *error = 0;
+
+ vec_resize (keys, ht->n_pairs);
+ vec_resize (vals, vec_len (keys));
+
+ h =
+ hash_create_vec (ht->fixed_hash_size, sizeof (keys[0][0]),
+ sizeof (uword));
+ hash_set_pair_format (h, test2_format, 0);
+ if (ht->fixed_hash_size)
+ hash_set_flags (h, HASH_FLAG_NO_AUTO_SHRINK | HASH_FLAG_NO_AUTO_GROW);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ keys[i] = random_string (&ht->seed, 5 + (random_u32 (&ht->seed) & 0xf));
+ keys[i] = format (keys[i], "%x", i);
+ vals[i] = random_u32 (&ht->seed);
+ }
+
+ for (i = 0; i < ht->n_iterations; i++)
+ {
+ u32 vi = random_u32 (&ht->seed) % vec_len (keys);
+
+ if (clib_bitmap_get (is_inserted, vi))
+ hash_unset_mem (h, keys[vi]);
+ else
+ hash_set_mem (h, keys[vi], vals[vi]);
+
+ is_inserted = clib_bitmap_xori (is_inserted, vi);
+
+ if (ht->n_iterations_per_print > 0
+ && ((i + 1) % ht->n_iterations_per_print) == 0)
+ if_verbose ("iteration %d\n %U", i + 1, format_hash, h, ht->verbose);
+
+ if (ht->n_iterations_per_validate == 0
+ || (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ clib_mem_validate ();
+
+ if ((error = hash_validate (h)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = hash_get_mem (h, keys[j]);
+ if ((error =
+ CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+ if ((error = hash_next_test (h)))
+ goto done;
+
+ if_verbose ("%U", format_hash, h, ht->verbose);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ if (!clib_bitmap_get (is_inserted, i))
+ continue;
+
+ hash_unset_mem (h, keys[i]);
+ is_inserted = clib_bitmap_xori (is_inserted, i);
+
+ if (ht->n_iterations_per_validate == 0
+ || (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ clib_mem_validate ();
+
+ if ((error = hash_validate (h)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = hash_get_mem (h, keys[j]);
+ if ((error =
+ CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+done:
+ hash_free (h);
+ vec_free (vals);
+ clib_bitmap_free (is_inserted);
+
+ for (i = 0; i < vec_len (keys); i++)
+ vec_free (keys[i]);
+ vec_free (keys);
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ return error;
+}
+
+int
+test_hash_main (unformat_input_t * input)
+{
+ hash_test_t _ht = { 0 }, *ht = &_ht;
+ clib_error_t *error;
+
+ ht->n_iterations = 100;
+ ht->n_pairs = 10;
+ ht->fixed_hash_size = 0; /* zero means non-fixed size */
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &ht->n_iterations)
+ && 0 == unformat (input, "print %d", &ht->n_iterations_per_print)
+ && 0 == unformat (input, "elts %d", &ht->n_pairs)
+ && 0 == unformat (input, "size %d", &ht->fixed_hash_size)
+ && 0 == unformat (input, "seed %d", &ht->seed)
+ && 0 == unformat (input, "verbose %=", &ht->verbose, 1)
+ && 0 == unformat (input, "valid %d",
+ &ht->n_iterations_per_validate))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, input);
+ return 1;
+ }
+ }
+
+ if (!ht->seed)
+ ht->seed = random_default_seed ();
+
+ if_verbose ("testing %d iterations, seed %d", ht->n_iterations, ht->seed);
+
+ error = test_word_key (ht);
+ if (error)
+ clib_error_report (error);
+
+ error = test_string_key (ht);
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_hash_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_heap.c b/src/vppinfra/test_heap.c
new file mode 100644
index 00000000..3d5171bf
--- /dev/null
+++ b/src/vppinfra/test_heap.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <vppinfra/mem.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+main (int argc, char *argv[])
+{
+ word i, j, k, n, check_mask;
+ u32 seed;
+ u32 *h = 0;
+ uword *objects = 0;
+ uword *handles = 0;
+ uword objects_used;
+ uword align, fixed_size;
+
+ n = 10;
+ seed = (u32) getpid ();
+ check_mask = 0;
+ fixed_size = 0;
+
+ if (argc > 1)
+ {
+ n = atoi (argv[1]);
+ verbose = 1;
+ }
+ if (argc > 2)
+ {
+ word i = atoi (argv[2]);
+ if (i)
+ seed = i;
+ }
+ if (argc > 3)
+ check_mask = atoi (argv[3]);
+
+ align = 0;
+ if (argc > 4)
+ align = 1 << atoi (argv[4]);
+
+ if_verbose ("testing %wd iterations seed %wd\n", n, seed);
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ vec_resize (objects, 1000);
+ if (vec_bytes (objects)) /* stupid warning be gone */
+ memset (objects, ~0, vec_bytes (objects));
+ vec_resize (handles, vec_len (objects));
+
+ objects_used = 0;
+
+ if (fixed_size)
+ {
+ uword max_len = 1024 * 1024;
+ void *memory = clib_mem_alloc (max_len * sizeof (h[0]));
+ h = heap_create_from_memory (memory, max_len, sizeof (h[0]));
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ while (1)
+ {
+ j = random_u32 (&seed) % vec_len (objects);
+ if (objects[j] != ~0 || i + objects_used < n)
+ break;
+ }
+
+ if (objects[j] != ~0)
+ {
+ heap_dealloc (h, handles[j]);
+ objects_used--;
+ objects[j] = ~0;
+ }
+ else
+ {
+ u32 *data;
+ uword size;
+
+ size = 1 + (random_u32 (&seed) % 100);
+ objects[j] = heap_alloc_aligned (h, size, align, handles[j]);
+ objects_used++;
+
+ if (align)
+ ASSERT (0 == (objects[j] & (align - 1)));
+ ASSERT (objects[j] < vec_len (h));
+ ASSERT (size <= heap_len (h, handles[j]));
+
+ /* Set newly allocated object with test data. */
+ if (check_mask & 2)
+ {
+ data = h + objects[j];
+
+ for (k = 0; k < size; k++)
+ data[k] = objects[j] + k;
+ }
+ }
+
+ if (check_mask & 1)
+ heap_validate (h);
+
+ if (check_mask & 4)
+ {
+ /* Duplicate heap at each iteration. */
+ u32 *h1 = heap_dup (h);
+ heap_free (h);
+ h = h1;
+ }
+
+ /* Verify that all used objects have correct test data. */
+ if (check_mask & 2)
+ {
+ for (j = 0; j < vec_len (objects); j++)
+ if (objects[j] != ~0)
+ {
+ u32 *data = h + objects[j];
+ for (k = 0; k < heap_len (h, handles[j]); k++)
+ ASSERT (data[k] == objects[j] + k);
+ }
+ }
+ }
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_heap, h, 1);
+
+ {
+ u32 *h1 = heap_dup (h);
+ if (verbose)
+ fformat (stderr, "%U\n", format_heap, h1, 1);
+ heap_free (h1);
+ }
+
+ heap_free (h);
+ if (verbose)
+ fformat (stderr, "%U\n", format_heap, h, 1);
+ ASSERT (objects_used == 0);
+
+ vec_free (objects);
+ vec_free (handles);
+
+ if (fixed_size)
+ vec_free_h (h, sizeof (heap_header_t));
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_longjmp.c b/src/vppinfra/test_longjmp.c
new file mode 100644
index 00000000..2415c4f0
--- /dev/null
+++ b/src/vppinfra/test_longjmp.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/clib.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/format.h>
+
+static void test_calljmp (unformat_input_t * input);
+
+static int i;
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+static never_inline void
+f2 (clib_longjmp_t * env)
+{
+ i++;
+ clib_longjmp (env, 1);
+}
+
+static never_inline void
+f1 (clib_longjmp_t * env)
+{
+ i++;
+ f2 (env);
+}
+
+int
+test_longjmp_main (unformat_input_t * input)
+{
+ clib_longjmp_t env;
+
+ i = 0;
+ if (clib_setjmp (&env, 0) == 0)
+ {
+ if_verbose ("calling long jumper %d", i);
+ f1 (&env);
+ }
+ if_verbose ("back from long jump %d", i);
+
+ test_calljmp (input);
+
+ return 0;
+}
+
+static uword
+f3 (uword arg)
+{
+ uword i, j, array[10];
+
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ array[i] = arg + i;
+
+ j = 0;
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ j ^= array[i];
+
+ return j;
+}
+
+static void
+test_calljmp (unformat_input_t * input)
+{
+ static u8 stack[32 * 1024] __attribute__ ((aligned (16)));
+ uword v;
+
+ v = clib_calljmp (f3, 0, stack + sizeof (stack));
+ ASSERT (v == f3 (0));
+ if_verbose ("calljump ok");
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int res;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ res = test_longjmp_main (&i);
+ unformat_free (&i);
+ return res;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_macros.c b/src/vppinfra/test_macros.c
new file mode 100644
index 00000000..de8f2c49
--- /dev/null
+++ b/src/vppinfra/test_macros.c
@@ -0,0 +1,64 @@
+/*
+ Copyright (c) 2014 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/macros.h>
+
+macro_main_t macro_main;
+
+int
+test_macros_main (unformat_input_t * input)
+{
+ macro_main_t *mm = &macro_main;
+
+ clib_macro_init (mm);
+
+ fformat (stdout, "hostname: %s\n",
+ clib_macro_eval_dollar (mm, "hostname", 1 /* complain */ ));
+
+ clib_macro_set_value (mm, "foo", "this is foo which contains $(bar)");
+ clib_macro_set_value (mm, "bar", "bar");
+
+ fformat (stdout, "evaluate: %s\n",
+ clib_macro_eval (mm, "returns '$(foo)'", 1 /* complain */ ));
+
+ clib_macro_free (mm);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ unformat_init_command_line (&i, argv);
+ ret = test_macros_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_md5.c b/src/vppinfra/test_md5.c
new file mode 100644
index 00000000..4be6f964
--- /dev/null
+++ b/src/vppinfra/test_md5.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2004 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/md5.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+
+static clib_error_t *md5_test_suite (void);
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+ if (argc == 1)
+ {
+ clib_error_t *e;
+ e = md5_test_suite ();
+ if (e)
+ {
+ clib_error_report (e);
+ exit (1);
+ }
+ }
+
+ for (i = 1; i < argc; i++)
+ {
+ md5_context_t m;
+ u8 digest[16];
+ u8 buffer[64 * 1024];
+ int fd, n;
+
+ fd = open (argv[i], 0);
+ if (fd < 0)
+ clib_unix_error ("can't open %s", argv[i]);
+
+ md5_init (&m);
+ while ((n = read (fd, buffer, sizeof (buffer))) > 0)
+ md5_add (&m, buffer, n);
+ close (fd);
+ md5_finish (&m, digest);
+ fformat (stdout, "%U %s\n",
+ format_hex_bytes, digest, sizeof (digest), argv[i]);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+md5_test_suite (void)
+{
+ typedef struct
+ {
+ char *input;
+ char *output;
+ } md5_test_t;
+
+ static md5_test_t tests[] = {
+ {.input = "",
+ .output = "d41d8cd98f00b204e9800998ecf8427e",},
+ {.input = "a",
+ .output = "0cc175b9c0f1b6a831c399e269772661",},
+ {.input = "abc",
+ .output = "900150983cd24fb0d6963f7d28e17f72",},
+ {.input = "message digest",
+ .output = "f96b697d7cb7938d525a2f31aaf161d0",},
+ {.input = "abcdefghijklmnopqrstuvwxyz",
+ .output = "c3fcd3d76192e4007dfb496cca67e13b",},
+ {.input =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+ .output = "d174ab98d277d9f5a5611c2c9f419d9f",},
+ {.input =
+ "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
+ .output = "57edf4a22be3c955ac49da2e2107b67a",},
+ };
+
+ int i;
+ u8 *s;
+ md5_context_t m;
+ u8 digest[16];
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ md5_init (&m);
+ md5_add (&m, tests[i].input, strlen (tests[i].input));
+ md5_finish (&m, digest);
+ s = format (0, "%U", format_hex_bytes, digest, sizeof (digest));
+ if (memcmp (s, tests[i].output, 2 * sizeof (digest)))
+ return clib_error_return
+ (0, "%s -> %v expected %s", tests[i].input, s, tests[i].output);
+ vec_free (s);
+ }
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_mheap.c b/src/vppinfra/test_mheap.c
new file mode 100644
index 00000000..6bc36b89
--- /dev/null
+++ b/src/vppinfra/test_mheap.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/unistd.h>
+#endif
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h> /* scanf */
+#endif
+
+#include <vppinfra/mheap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+
+static int verbose = 0;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+test_mheap_main (unformat_input_t * input)
+{
+ int i, j, k, n_iterations;
+ void *h, *h_mem;
+ uword *objects = 0;
+ u32 objects_used, really_verbose, n_objects, max_object_size;
+ u32 check_mask, seed, trace, use_vm;
+ u32 print_every = 0;
+ u32 *data;
+ mheap_t *mh;
+
+ /* Validation flags. */
+ check_mask = 0;
+#define CHECK_VALIDITY 1
+#define CHECK_DATA 2
+#define CHECK_ALIGN 4
+
+ n_iterations = 10;
+ seed = 0;
+ max_object_size = 100;
+ n_objects = 1000;
+ trace = 0;
+ really_verbose = 0;
+ use_vm = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &n_iterations)
+ && 0 == unformat (input, "count %d", &n_objects)
+ && 0 == unformat (input, "size %d", &max_object_size)
+ && 0 == unformat (input, "seed %d", &seed)
+ && 0 == unformat (input, "print %d", &print_every)
+ && 0 == unformat (input, "validdata %|",
+ &check_mask, CHECK_DATA | CHECK_VALIDITY)
+ && 0 == unformat (input, "valid %|",
+ &check_mask, CHECK_VALIDITY)
+ && 0 == unformat (input, "verbose %=", &really_verbose, 1)
+ && 0 == unformat (input, "trace %=", &trace, 1)
+ && 0 == unformat (input, "vm %=", &use_vm, 1)
+ && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, input);
+ return 1;
+ }
+ }
+
+ /* Zero seed means use default. */
+ if (!seed)
+ seed = random_default_seed ();
+
+ if_verbose
+ ("testing %d iterations, %d %saligned objects, max. size %d, seed %d",
+ n_iterations, n_objects, (check_mask & CHECK_ALIGN) ? "randomly " : "un",
+ max_object_size, seed);
+
+ vec_resize (objects, n_objects);
+ if (vec_bytes (objects)) /* stupid warning be gone */
+ memset (objects, ~0, vec_bytes (objects));
+ objects_used = 0;
+
+ /* Allocate initial heap. */
+ {
+ uword size =
+ max_pow2 (2 * n_objects * max_object_size * sizeof (data[0]));
+
+ h_mem = clib_mem_alloc (size);
+ if (!h_mem)
+ return 0;
+
+ h = mheap_alloc (h_mem, size);
+ }
+
+ if (trace)
+ mheap_trace (h, trace);
+
+ mh = mheap_header (h);
+
+ if (use_vm)
+ mh->flags &= ~MHEAP_FLAG_DISABLE_VM;
+ else
+ mh->flags |= MHEAP_FLAG_DISABLE_VM;
+
+ if (check_mask & CHECK_VALIDITY)
+ mh->flags |= MHEAP_FLAG_VALIDATE;
+
+ for (i = 0; i < n_iterations; i++)
+ {
+ while (1)
+ {
+ j = random_u32 (&seed) % vec_len (objects);
+ if (objects[j] != ~0 || i + objects_used < n_iterations)
+ break;
+ }
+
+ if (objects[j] != ~0)
+ {
+ mheap_put (h, objects[j]);
+ objects_used--;
+ objects[j] = ~0;
+ }
+ else
+ {
+ uword size, align, align_offset;
+
+ size = (random_u32 (&seed) % max_object_size) * sizeof (data[0]);
+ align = align_offset = 0;
+ if (check_mask & CHECK_ALIGN)
+ {
+ align = 1 << (random_u32 (&seed) % 10);
+ align_offset = round_pow2 (random_u32 (&seed) & (align - 1),
+ sizeof (u32));
+ }
+
+ h = mheap_get_aligned (h, size, align, align_offset, &objects[j]);
+
+ if (align > 0)
+ ASSERT (0 == ((objects[j] + align_offset) & (align - 1)));
+
+ ASSERT (objects[j] != ~0);
+ objects_used++;
+
+ /* Set newly allocated object with test data. */
+ if (check_mask & CHECK_DATA)
+ {
+ uword len;
+
+ data = (void *) h + objects[j];
+ len = mheap_len (h, data);
+
+ ASSERT (size <= mheap_data_bytes (h, objects[j]));
+
+ data[0] = len;
+ for (k = 1; k < len; k++)
+ data[k] = objects[j] + k;
+ }
+ }
+
+ /* Verify that all used objects have correct test data. */
+ if (check_mask & 2)
+ {
+ for (j = 0; j < vec_len (objects); j++)
+ if (objects[j] != ~0)
+ {
+ u32 *data = h + objects[j];
+ uword len = data[0];
+ for (k = 1; k < len; k++)
+ ASSERT (data[k] == objects[j] + k);
+ }
+ }
+ if (print_every != 0 && i > 0 && (i % print_every) == 0)
+ fformat (stderr, "iteration %d: %U\n", i, format_mheap, h,
+ really_verbose);
+ }
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_mheap, h, really_verbose);
+ mheap_free (h);
+ clib_mem_free (h_mem);
+ vec_free (objects);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_mheap_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_pfhash.c b/src/vppinfra/test_pfhash.c
new file mode 100644
index 00000000..ddbdbb34
--- /dev/null
+++ b/src/vppinfra/test_pfhash.c
@@ -0,0 +1,322 @@
+/*
+ Copyright (c) 2013 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/pfhash.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+
+#if defined(CLIB_HAVE_VEC128) && ! defined (__ALTIVEC__)
+
+int verbose = 0;
+
+always_inline u8 *
+random_aligned_string (u32 * seed, uword len)
+{
+ u8 *alphabet = (u8 *) "abcdefghijklmnopqrstuvwxyz";
+ u8 *s = 0;
+ word i;
+
+ vec_resize_aligned (s, len, 16);
+ for (i = 0; i < len; i++)
+ s[i] = alphabet[random_u32 (seed) % 26];
+
+ return s;
+}
+
+void exit (int);
+
+int
+test_pfhash_main (unformat_input_t * input)
+{
+ u32 seed = 0xdeaddabe;
+ int i, iter;
+ u32 nkeys = 4;
+ u32 niter = 1;
+ u32 nbuckets = 1;
+ u32 bucket;
+ u32 sizes[3] = { 16, 8, 4 }, this_size, size;
+ u8 **keys = 0;
+ pfhash_t _rec, *p = &_rec;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "seed %d", &seed))
+ ;
+ else if (unformat (input, "niter %d", &niter))
+ ;
+ else if (unformat (input, "nkeys %d", &nkeys))
+ ;
+ else if (unformat (input, "nbuckets %d", &nbuckets))
+ ;
+ else if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ }
+
+ vec_validate (keys, nkeys - 1);
+
+ for (i = 0; i < nkeys; i++)
+ {
+ int j, k;
+
+ again:
+ keys[i] = random_aligned_string (&seed, 16);
+ for (j = 0; j < (i - 1); j++)
+ {
+ /* Make sure we don't have a dup key in the min key size */
+ for (k = 0; k < 4; k++)
+ {
+ if (keys[i][k] != keys[j][k])
+ goto check_next_key;
+ }
+ vec_free (keys[i]);
+ goto again;
+ check_next_key:
+ ;
+ }
+ }
+
+ /* test 8 byte key, 8 byte value case separately */
+
+ for (size = 8; size < 9; size++)
+ {
+ this_size = 8;
+
+ fformat (stdout, "%d-byte key 8 byte value test\n", this_size);
+
+ pfhash_init (p, "test", 8 /* key size */ , 8 /* value size */ ,
+ nbuckets + 1);
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ bucket = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ pfhash_set (p, bucket, keys[i],
+ (void *) (u64) 0x100000000ULL + i + 1);
+ }
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ if (pfhash_get (p, bucket, keys[i])
+ != (u64) 0x100000000ULL + i + 1)
+ {
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ (void) pfhash_get (p, bucket, keys[i]);
+ }
+ }
+
+ /* test inline functions */
+ for (i = 0; i < nkeys; i++)
+ {
+ u32 bucket_contents;
+ u64 value = 0xdeadbeef;
+ bucket = (i % nbuckets) + 1;
+
+ pfhash_prefetch_bucket (p, bucket);
+ bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+
+ value = pfhash_search_kv_8v8 (p, bucket_contents,
+ (u64 *) keys[i]);
+ if (value != (u64) 0x100000000ULL + i + 1)
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ }
+
+ if (verbose)
+ fformat (stdout, "%U\n", format_pfhash, p, verbose > 1);
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ pfhash_unset (p, bucket, keys[i]);
+ }
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ if (pfhash_get (p, bucket, keys[i]) != (u64) ~ 0)
+ {
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ (void) pfhash_get (p, bucket, keys[i]);
+ }
+ }
+ /* test inline functions */
+ for (i = 0; i < nkeys; i++)
+ {
+ u32 bucket_contents;
+ u64 value = 0xdeadbeef;
+ bucket = (i % nbuckets) + 1;
+
+ pfhash_prefetch_bucket (p, bucket);
+ bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+
+ value = pfhash_search_kv_8v8 (p, bucket_contents,
+ (u64 *) keys[i]);
+
+ if (value != (u64) ~ 0)
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ }
+ }
+ pfhash_free (p);
+ }
+
+ /* test other cases */
+
+ for (size = 0; size < ARRAY_LEN (sizes); size++)
+ {
+ this_size = sizes[size];
+
+ fformat (stdout, "%d-byte key test\n", this_size);
+
+ pfhash_init (p, "test", this_size, 4 /* value size */ , nbuckets + 1);
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ bucket = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ pfhash_set (p, bucket, keys[i], (void *) (u64) i + 1);
+ }
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ if (pfhash_get (p, bucket, keys[i]) != i + 1)
+ {
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ (void) pfhash_get (p, bucket, keys[i]);
+ }
+ }
+
+ /* test inline functions */
+ for (i = 0; i < nkeys; i++)
+ {
+ u32 bucket_contents;
+ u32 value = 0xdeadbeef;
+ bucket = (i % nbuckets) + 1;
+
+ pfhash_prefetch_bucket (p, bucket);
+ bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+ switch (p->key_size)
+ {
+ case 16:
+ value =
+ pfhash_search_kv_16 (p, bucket_contents,
+ (u32x4 *) keys[i]);
+ break;
+ case 8:
+ value =
+ pfhash_search_kv_8 (p, bucket_contents, (u64 *) keys[i]);
+ break;
+ case 4:
+ value =
+ pfhash_search_kv_4 (p, bucket_contents, (u32 *) keys[i]);
+ break;
+ }
+
+ if (value != (i + 1))
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ }
+
+ if (verbose)
+ fformat (stdout, "%U\n", format_pfhash, p, verbose > 1);
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ pfhash_unset (p, bucket, keys[i]);
+ }
+
+ for (i = 0; i < nkeys; i++)
+ {
+ bucket = (i % nbuckets) + 1;
+ if (pfhash_get (p, bucket, keys[i]) != (u64) ~ 0)
+ {
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ (void) pfhash_get (p, bucket, keys[i]);
+ }
+ }
+ /* test inline functions */
+ for (i = 0; i < nkeys; i++)
+ {
+ u32 bucket_contents;
+ u32 value = 0xdeadbeef;
+ bucket = (i % nbuckets) + 1;
+
+ pfhash_prefetch_bucket (p, bucket);
+ bucket_contents = pfhash_read_bucket_prefetch_kv (p, bucket);
+ switch (p->key_size)
+ {
+ case 16:
+ value =
+ pfhash_search_kv_16 (p, bucket_contents,
+ (u32x4 *) keys[i]);
+ break;
+ case 8:
+ value =
+ pfhash_search_kv_8 (p, bucket_contents, (u64 *) keys[i]);
+ break;
+ case 4:
+ value =
+ pfhash_search_kv_4 (p, bucket_contents, (u32 *) keys[i]);
+ break;
+ }
+ if (value != (u32) ~ 0)
+ clib_warning ("key %d bucket %d lookup FAIL\n", i, bucket);
+ }
+ }
+ pfhash_free (p);
+ }
+
+ exit (0);
+}
+#else
+int
+test_pfhash_main (unformat_input_t * input)
+{
+ clib_warning ("MMX unit not available");
+ return 0;
+}
+#endif
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ unformat_init_command_line (&i, argv);
+ ret = test_pfhash_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_phash.c b/src/vppinfra/test_phash.c
new file mode 100644
index 00000000..9ed2ac7b
--- /dev/null
+++ b/src/vppinfra/test_phash.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/phash.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+test_phash_main (unformat_input_t * input)
+{
+ phash_main_t _pm = { 0 }, *pm = &_pm;
+ int n_keys, random_keys;
+ u32 seed;
+ clib_error_t *error;
+
+ random_keys = 1;
+ n_keys = 1000;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "keys %d", &n_keys)
+ && 0 == unformat (input, "verbose %=", &verbose, 1)
+ && 0 == unformat (input, "random-keys %=", &random_keys, 1)
+ && 0 == unformat (input, "sequential-keys %=", &random_keys, 0)
+ && 0 == unformat (input, "seed %d", &pm->random_seed)
+ && 0 == unformat (input, "64-bit %|", &pm->flags, PHASH_FLAG_MIX64)
+ && 0 == unformat (input, "32-bit %|", &pm->flags, PHASH_FLAG_MIX32)
+ && 0 == unformat (input, "fast %|", &pm->flags,
+ PHASH_FLAG_FAST_MODE)
+ && 0 == unformat (input, "slow %|", &pm->flags,
+ PHASH_FLAG_SLOW_MODE)
+ && 0 == unformat (input, "minimal %|", &pm->flags,
+ PHASH_FLAG_MINIMAL)
+ && 0 == unformat (input, "non-minimal %|", &pm->flags,
+ PHASH_FLAG_NON_MINIMAL))
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ }
+
+ if (!pm->random_seed)
+ pm->random_seed = random_default_seed ();
+
+ if_verbose
+ ("%d %d-bit keys, random seed %d, %s mode, looking for %sminimal hash",
+ n_keys, (pm->flags & PHASH_FLAG_MIX64) ? 64 : 32, pm->random_seed,
+ (pm->flags & PHASH_FLAG_FAST_MODE) ? "fast" : "slow",
+ (pm->flags & PHASH_FLAG_MINIMAL) ? "" : "non-");
+
+ seed = pm->random_seed;
+
+ /* Initialize random keys. */
+ {
+ phash_key_t *k;
+
+ vec_resize (pm->keys, n_keys);
+ vec_foreach (k, pm->keys)
+ {
+ k->key = k - pm->keys;
+ if (random_keys)
+ {
+ if (pm->flags & PHASH_FLAG_MIX64)
+ k->key = random_u64 (&seed);
+ else
+ k->key = random_u32 (&seed);
+ }
+ }
+ }
+
+ error = phash_find_perfect_hash (pm);
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ else
+ {
+ if_verbose ("(%d,%d) (a,b) bits, %d seeds tried, %d tree walks",
+ pm->a_bits, pm->b_bits,
+ pm->n_seed_trials, pm->n_perfect_calls);
+
+ error = phash_validate (pm);
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int res;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ res = test_phash_main (&i);
+ unformat_free (&i);
+ return res;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_pool.c b/src/vppinfra/test_pool.c
new file mode 100644
index 00000000..67a5e50a
--- /dev/null
+++ b/src/vppinfra/test_pool.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/mem.h>
+#include <vppinfra/pool.h>
+
+#ifdef __KERNEL__
+#include <linux/unistd.h>
+#else
+#include <unistd.h>
+#endif
+
+int
+main (int argc, char *argv[])
+{
+ int i, n, seed;
+
+ int *p = 0, *e, j, *o = 0;
+
+ n = atoi (argv[1]);
+ seed = getpid ();
+ srandom (1);
+
+ for (i = 0; i < n; i++)
+ {
+ if (vec_len (o) < 10 || (random () & 1))
+ {
+ pool_get (p, e);
+ j = e - p;
+ *e = j;
+ vec_add1 (o, j);
+ }
+ else
+ {
+ j = random () % vec_len (o);
+ e = p + j;
+ pool_put (p, e);
+ vec_delete (o, 1, j);
+ }
+ }
+ p = pool_free (p);
+ vec_free (o);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_pool_iterate.c b/src/vppinfra/test_pool_iterate.c
new file mode 100644
index 00000000..27ce4bb3
--- /dev/null
+++ b/src/vppinfra/test_pool_iterate.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2011 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/mem.h>
+#include <vppinfra/pool.h>
+
+#ifdef __KERNEL__
+#include <linux/unistd.h>
+#else
+#include <unistd.h>
+#endif
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+ uword next;
+ u32 *tp = 0;
+ u32 *junk;
+
+ for (i = 0; i < 70; i++)
+ pool_get (tp, junk);
+
+ (void) junk; /* compiler warning */
+
+ pool_put_index (tp, 1);
+ pool_put_index (tp, 65);
+
+ next = ~0;
+ do
+ {
+ next = pool_next_index (tp, next);
+ fformat (stdout, "next index %d\n", next);
+ }
+ while (next != ~0);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_ptclosure.c b/src/vppinfra/test_ptclosure.c
new file mode 100644
index 00000000..be7d51df
--- /dev/null
+++ b/src/vppinfra/test_ptclosure.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/ptclosure.h>
+#include <vppinfra/hash.h>
+
+typedef struct
+{
+ uword *index_by_name;
+ u8 *items;
+} test_main_t;
+
+test_main_t test_main;
+
+static char *items[] = {
+ "d",
+ "a",
+ "b",
+ "c",
+};
+
+char *constraints[] = {
+ "a,b",
+ "b,c",
+ "d,b",
+ // "c,a", /* no partial order possible */
+};
+
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+static void
+dump_closure (test_main_t * tm, char *s, u8 ** orig)
+{
+ int i, j;
+
+ fformat (stdout, "--------- %s --------------\n", s);
+ for (i = 0; i < vec_len (orig); i++)
+ {
+ for (j = 0; j < vec_len (orig); j++)
+ if (orig[i][j])
+ {
+ fformat (stdout, "%s <before> %s\n", items[i], items[j]);
+ }
+ }
+}
+
+int
+comma_split (u8 * s, u8 ** a, u8 ** b)
+{
+ *a = s;
+
+ while (*s && *s != ',')
+ s++;
+
+ if (*s == ',')
+ *s = 0;
+ else
+ return 1;
+
+ *b = (u8 *) (s + 1);
+ return 0;
+}
+
+int
+test_ptclosure_main (unformat_input_t * input)
+{
+ test_main_t *tm = &test_main;
+ u8 *item_name;
+ int i, j;
+ u8 **orig;
+ u8 **closure;
+ u8 *a_name, *b_name;
+ int a_index, b_index;
+ uword *p;
+ u8 *this_constraint;
+ int n;
+ u32 *result = 0;
+
+ tm->index_by_name = hash_create_string (0, sizeof (uword));
+
+ n = ARRAY_LEN (items);
+
+ for (i = 0; i < n; i++)
+ {
+ item_name = (u8 *) items[i];
+ hash_set_mem (tm->index_by_name, item_name, i);
+ }
+
+ orig = clib_ptclosure_alloc (n);
+
+ for (i = 0; i < ARRAY_LEN (constraints); i++)
+ {
+ this_constraint = format (0, "%s%c", constraints[i], 0);
+
+ if (comma_split (this_constraint, &a_name, &b_name))
+ {
+ clib_warning ("couldn't split '%s'", constraints[i]);
+ return 1;
+ }
+
+ p = hash_get_mem (tm->index_by_name, a_name);
+ if (p == 0)
+ {
+ clib_warning ("couldn't find '%s'", a_name);
+ return 1;
+ }
+ a_index = p[0];
+
+ p = hash_get_mem (tm->index_by_name, b_name);
+ if (p == 0)
+ {
+ clib_warning ("couldn't find '%s'", b_name);
+ return 1;
+ }
+ b_index = p[0];
+
+ orig[a_index][b_index] = 1;
+ vec_free (this_constraint);
+ }
+
+ dump_closure (tm, "original relation", orig);
+
+ closure = clib_ptclosure (orig);
+
+ dump_closure (tm, "closure", closure);
+
+ /*
+ * Output partial order
+ */
+
+again:
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ if (closure[i][j])
+ goto item_constrained;
+ }
+ /* Item i can be output */
+ vec_add1 (result, i);
+ {
+ int k;
+ for (k = 0; k < n; k++)
+ closure[k][i] = 0;
+ /* "Magic" a before a, to keep from ever outputting it again */
+ closure[i][i] = 1;
+ goto again;
+ }
+ item_constrained:
+ ;
+ }
+
+ if (vec_len (result) != n)
+ {
+ clib_warning ("no partial order exists");
+ exit (1);
+ }
+
+ fformat (stdout, "Partial order:\n");
+
+ for (i = vec_len (result) - 1; i >= 0; i--)
+ {
+ fformat (stdout, "%s\n", items[result[i]]);
+ }
+
+ vec_free (result);
+ clib_ptclosure_free (orig);
+ clib_ptclosure_free (closure);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ ret = test_ptclosure_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_qhash.c b/src/vppinfra/test_qhash.c
new file mode 100644
index 00000000..fdbf0bbe
--- /dev/null
+++ b/src/vppinfra/test_qhash.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/bitmap.h>
+#include <vppinfra/os.h>
+#include <vppinfra/qhash.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+
+typedef struct
+{
+ u32 n_iter, seed, n_keys, n_hash_keys, verbose;
+
+ u32 max_vector;
+
+ uword *hash;
+
+ uword *keys_in_hash_bitmap;
+
+ u32 *qhash;
+
+ uword *keys;
+
+ uword *lookup_keys;
+ uword *lookup_key_indices;
+ u32 *lookup_results;
+
+ u32 *get_multiple_results;
+
+ clib_time_t time;
+
+ f64 overflow_fraction, ave_elts;
+ f64 get_time, hash_get_time;
+ f64 set_time, set_count;
+ f64 unset_time, unset_count;
+ f64 hash_set_time, hash_unset_time;
+} test_qhash_main_t;
+
+clib_error_t *
+test_qhash_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ test_qhash_main_t _tm, *tm = &_tm;
+ uword i, iter;
+
+ memset (tm, 0, sizeof (tm[0]));
+ tm->n_iter = 10;
+ tm->seed = 1;
+ tm->n_keys = 10;
+ tm->max_vector = 1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %d", &tm->n_iter))
+ ;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "keys %d", &tm->n_keys))
+ ;
+ else if (unformat (input, "size %d", &tm->n_hash_keys))
+ ;
+ else if (unformat (input, "vector %d", &tm->max_vector))
+ ;
+ else if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (!tm->seed)
+ tm->seed = random_default_seed ();
+
+ clib_time_init (&tm->time);
+
+ clib_warning ("iter %d, seed %u, keys %d, max vector %d, ",
+ tm->n_iter, tm->seed, tm->n_keys, tm->max_vector);
+
+ vec_resize (tm->keys, tm->n_keys);
+ vec_resize (tm->get_multiple_results, tm->n_keys);
+ for (i = 0; i < vec_len (tm->keys); i++)
+ tm->keys[i] = random_uword (&tm->seed);
+
+ if (!tm->n_hash_keys)
+ tm->n_hash_keys = 2 * max_pow2 (tm->n_keys);
+ tm->n_hash_keys = clib_max (tm->n_keys, tm->n_hash_keys);
+ qhash_resize (tm->qhash, tm->n_hash_keys);
+
+ {
+ qhash_t *h = qhash_header (tm->qhash);
+ int i;
+ for (i = 0; i < ARRAY_LEN (h->hash_seeds); i++)
+ h->hash_seeds[i] = random_uword (&tm->seed);
+ }
+
+ vec_resize (tm->lookup_keys, tm->max_vector);
+ vec_resize (tm->lookup_key_indices, tm->max_vector);
+ vec_resize (tm->lookup_results, tm->max_vector);
+
+ for (iter = 0; iter < tm->n_iter; iter++)
+ {
+ uword *p, j, n, is_set;
+
+ n = tm->max_vector;
+
+ is_set = random_u32 (&tm->seed) & 1;
+ is_set |= hash_elts (tm->hash) < (tm->n_keys / 4);
+ if (hash_elts (tm->hash) > (3 * tm->n_keys) / 4)
+ is_set = 0;
+
+ _vec_len (tm->lookup_keys) = n;
+ _vec_len (tm->lookup_key_indices) = n;
+ j = 0;
+ while (j < n)
+ {
+ i = random_u32 (&tm->seed) % vec_len (tm->keys);
+ if (clib_bitmap_get (tm->keys_in_hash_bitmap, i) != is_set)
+ {
+ f64 t[2];
+ tm->lookup_key_indices[j] = i;
+ tm->lookup_keys[j] = tm->keys[i];
+ t[0] = clib_time_now (&tm->time);
+ if (is_set)
+ hash_set (tm->hash, tm->keys[i], i);
+ else
+ hash_unset (tm->hash, tm->keys[i]);
+ t[1] = clib_time_now (&tm->time);
+ if (is_set)
+ tm->hash_set_time += t[1] - t[0];
+ else
+ tm->hash_unset_time += t[1] - t[0];
+ tm->keys_in_hash_bitmap
+ = clib_bitmap_set (tm->keys_in_hash_bitmap, i, is_set);
+ j++;
+ }
+ }
+
+ {
+ f64 t[2];
+
+ if (is_set)
+ {
+ t[0] = clib_time_now (&tm->time);
+ qhash_set_multiple (tm->qhash,
+ tm->lookup_keys,
+ vec_len (tm->lookup_keys),
+ tm->lookup_results);
+ t[1] = clib_time_now (&tm->time);
+ tm->set_time += t[1] - t[0];
+ tm->set_count += vec_len (tm->lookup_keys);
+ for (i = 0; i < vec_len (tm->lookup_keys); i++)
+ {
+ uword r = tm->lookup_results[i];
+ *vec_elt_at_index (tm->qhash, r) = tm->lookup_key_indices[i];
+ }
+ }
+ else
+ {
+ t[0] = clib_time_now (&tm->time);
+ qhash_unset_multiple (tm->qhash,
+ tm->lookup_keys,
+ vec_len (tm->lookup_keys),
+ tm->lookup_results);
+ t[1] = clib_time_now (&tm->time);
+ tm->unset_time += t[1] - t[0];
+ tm->unset_count += vec_len (tm->lookup_keys);
+
+ for (i = 0; i < vec_len (tm->lookup_keys); i++)
+ {
+ uword r = tm->lookup_results[i];
+ *vec_elt_at_index (tm->qhash, r) = ~0;
+ }
+ }
+ }
+
+ if (qhash_elts (tm->qhash) != hash_elts (tm->hash))
+ os_panic ();
+
+ {
+ qhash_t *h;
+ uword i, k, l, count;
+
+ h = qhash_header (tm->qhash);
+
+ for (i = k = 0; k < vec_len (h->hash_key_valid_bitmap); k++)
+ i += count_set_bits (h->hash_key_valid_bitmap[k]);
+ k = hash_elts (h->overflow_hash);
+ l = qhash_elts (tm->qhash);
+ if (i + k != l)
+ os_panic ();
+
+ count = hash_elts (h->overflow_hash);
+ for (i = 0; i < (1 << h->log2_hash_size); i++)
+ count += tm->qhash[i] != ~0;
+ if (count != qhash_elts (tm->qhash))
+ os_panic ();
+
+ {
+ u32 *tmp = 0;
+
+ /* *INDENT-OFF* */
+ hash_foreach (k, l, h->overflow_hash, ({
+ j = qhash_hash_mix (h, k) / QHASH_KEYS_PER_BUCKET;
+ vec_validate (tmp, j);
+ tmp[j] += 1;
+ }));
+ /* *INDENT-ON* */
+
+ for (k = 0; k < vec_len (tmp); k++)
+ {
+ if (k >= vec_len (h->overflow_counts))
+ os_panic ();
+ if (h->overflow_counts[k] != tmp[k])
+ os_panic ();
+ }
+ for (; k < vec_len (h->overflow_counts); k++)
+ if (h->overflow_counts[k] != 0)
+ os_panic ();
+
+ vec_free (tmp);
+ }
+ }
+
+ {
+ f64 t[2];
+
+ t[0] = clib_time_now (&tm->time);
+ qhash_get_multiple (tm->qhash, tm->keys, vec_len (tm->keys),
+ tm->get_multiple_results);
+ t[1] = clib_time_now (&tm->time);
+ tm->get_time += t[1] - t[0];
+
+ for (i = 0; i < vec_len (tm->keys); i++)
+ {
+ u32 r;
+
+ t[0] = clib_time_now (&tm->time);
+ p = hash_get (tm->hash, tm->keys[i]);
+ t[1] = clib_time_now (&tm->time);
+ tm->hash_get_time += t[1] - t[0];
+
+ r = qhash_get (tm->qhash, tm->keys[i]);
+ if (p)
+ {
+ if (p[0] != i)
+ os_panic ();
+ if (*vec_elt_at_index (tm->qhash, r) != i)
+ os_panic ();
+ }
+ else
+ {
+ if (r != ~0)
+ os_panic ();
+ }
+ if (r != tm->get_multiple_results[i])
+ os_panic ();
+ }
+ }
+
+ tm->overflow_fraction +=
+ ((f64) qhash_n_overflow (tm->qhash) / qhash_elts (tm->qhash));
+ tm->ave_elts += qhash_elts (tm->qhash);
+ }
+
+ fformat (stderr, "%d iter %.6e overflow, %.4f ave. elts\n",
+ tm->n_iter,
+ tm->overflow_fraction / tm->n_iter, tm->ave_elts / tm->n_iter);
+
+ tm->get_time /= tm->n_iter * vec_len (tm->keys);
+ tm->hash_get_time /= tm->n_iter * vec_len (tm->keys);
+
+ tm->set_time /= tm->set_count;
+ tm->unset_time /= tm->unset_count;
+ tm->hash_set_time /= tm->set_count;
+ tm->hash_unset_time /= tm->unset_count;
+
+ fformat (stderr,
+ "get/set/unset clocks %.2e %.2e %.2e clib %.2e %.2e %.2e ratio %.2f %.2f %.2f\n",
+ tm->get_time * tm->time.clocks_per_second,
+ tm->set_time * tm->time.clocks_per_second,
+ tm->unset_time * tm->time.clocks_per_second,
+ tm->hash_get_time * tm->time.clocks_per_second,
+ tm->hash_set_time * tm->time.clocks_per_second,
+ tm->hash_unset_time * tm->time.clocks_per_second,
+ tm->hash_get_time / tm->get_time, tm->hash_set_time / tm->set_time,
+ tm->hash_unset_time / tm->unset_time);
+
+
+done:
+ return error;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ clib_error_t *error;
+
+ unformat_init_command_line (&i, argv);
+ error = test_qhash_main (&i);
+ unformat_free (&i);
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ else
+ return 0;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_random.c b/src/vppinfra/test_random.c
new file mode 100644
index 00000000..49759eac
--- /dev/null
+++ b/src/vppinfra/test_random.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+static u32 known_random_sequence[] = {
+ 0x00000000, 0x3c6ef35f, 0x47502932, 0xd1ccf6e9,
+ 0xaaf95334, 0x6252e503, 0x9f2ec686, 0x57fe6c2d,
+ 0xa3d95fa8, 0x81fdbee7, 0x94f0af1a, 0xcbf633b1,
+};
+
+
+int
+test_random_main (unformat_input_t * input)
+{
+ uword n_iterations;
+ uword i, repeat_count;
+ uword *bitmap = 0;
+ uword print;
+ u32 seed;
+ u32 *seedp = &seed;
+
+ /* first, check known sequence from Numerical Recipes in C, 2nd ed.
+ page 284 */
+ seed = known_random_sequence[0];
+ for (i = 0; i < ARRAY_LEN (known_random_sequence) - 1; i++)
+ {
+ u32 rv;
+ rv = random_u32 (seedp);
+ if (rv != known_random_sequence[i + 1])
+ {
+ fformat (stderr, "known sequence check FAILS at index %d", i + 1);
+ break;
+ }
+ }
+
+ clib_warning ("known sequence check passes");
+
+ n_iterations = 1000;
+ seed = 0;
+ print = 1 << 24;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &n_iterations)
+ && 0 == unformat (input, "print %d", &print)
+ && 0 == unformat (input, "seed %d", &seed))
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ }
+
+ if (!seed)
+ seed = random_default_seed ();
+
+ if (n_iterations == 0)
+ n_iterations = random_u32_max ();
+
+ clib_warning ("%d iterations, seed %d\n", n_iterations, seed);
+
+ repeat_count = 0;
+ for (i = 0; i < n_iterations; i++)
+ {
+ uword r = random_u32 (&seed);
+ uword b, ri, rj;
+
+ ri = r / BITS (bitmap[0]);
+ rj = (uword) 1 << (r % BITS (bitmap[0]));
+
+ vec_validate (bitmap, ri);
+ b = bitmap[ri];
+
+ if (b & rj)
+ goto repeat;
+ b |= rj;
+ bitmap[ri] = b;
+
+ if (0 == (i & (print - 1)))
+ fformat (stderr, "0x%08x iterations %d repeats\n", i, repeat_count);
+ continue;
+
+ repeat:
+ fformat (stderr, "repeat found at iteration %d/%d\n", i, n_iterations);
+ repeat_count++;
+ continue;
+ }
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ ret = test_random_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_random_isaac.c b/src/vppinfra/test_random_isaac.c
new file mode 100644
index 00000000..337d30dd
--- /dev/null
+++ b/src/vppinfra/test_random_isaac.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/random.h>
+#include <vppinfra/random_isaac.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+test_isaac_main (unformat_input_t * input)
+{
+ uword n_iterations, seed;
+ uword i, repeat_count;
+ uword *hash = 0;
+ uword print;
+ isaac_t ctx;
+ uword results[ISAAC_SIZE] = { 0 };
+ uword n_results;
+
+ n_iterations = 1000;
+ seed = 0;
+ print = 1 << 24;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &n_iterations)
+ && 0 == unformat (input, "print %d", &print)
+ && 0 == unformat (input, "seed %d", &seed))
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ }
+
+ if (!seed)
+ seed = random_default_seed ();
+
+ results[0] = seed;
+
+ if (n_iterations == 0)
+ n_iterations = ~0;
+
+ if_verbose ("%d iterations, seed %d\n", n_iterations, seed);
+
+ repeat_count = 0;
+ isaac_init (&ctx, results);
+ isaac (&ctx, results);
+ n_results = 0;
+ for (i = 0; i < n_iterations; i++)
+ {
+ uword r = results[n_results++];
+
+ if (!hash)
+ hash = hash_create (0, /* value bytes */ 0);
+
+ if (hash_get (hash, r))
+ goto repeat;
+
+ hash_set1 (hash, r);
+
+ if (n_results >= ARRAY_LEN (results))
+ {
+ isaac (&ctx, results);
+ n_results = 0;
+ }
+
+ if (verbose && 0 == (i & (print - 1)))
+ fformat (stderr, "0x%08x iterations %d repeats\n", i, repeat_count);
+
+ if (hash_elts (hash) > 0x100000)
+ hash_free (hash);
+
+ continue;
+
+ repeat:
+ fformat (stderr, "repeat found at iteration %d/%d\n", i, n_iterations);
+ repeat_count++;
+ continue;
+ }
+
+ return repeat_count > 0 ? 1 : 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_isaac_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_serialize.c b/src/vppinfra/test_serialize.c
new file mode 100644
index 00000000..e00eec32
--- /dev/null
+++ b/src/vppinfra/test_serialize.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/os.h>
+
+#define foreach_my_vector_type \
+ _ (u8, a8) \
+ _ (u16, a16) \
+ _ (u32, a32)
+
+typedef struct
+{
+#define _(t,f) t f;
+ foreach_my_vector_type
+#undef _
+} my_vector_type_t;
+
+static void
+serialize_my_vector_type_single (serialize_main_t * m, va_list * va)
+{
+ my_vector_type_t *v = va_arg (*va, my_vector_type_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+
+ for (i = 0; i < n; i++)
+ {
+#define _(t,f) serialize_integer (m, v[i].f, sizeof (v[i].f));
+ foreach_my_vector_type;
+ }
+#undef _
+}
+
+static void
+unserialize_my_vector_type_single (serialize_main_t * m, va_list * va)
+{
+ my_vector_type_t *v = va_arg (*va, my_vector_type_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+
+ for (i = 0; i < n; i++)
+ {
+#define _(t,f) { u32 tmp; unserialize_integer (m, &tmp, sizeof (v[i].f)); v[i].f = tmp; }
+ foreach_my_vector_type;
+#undef _
+ }
+}
+
+static void
+serialize_my_vector_type_multiple (serialize_main_t * m, va_list * va)
+{
+ my_vector_type_t *v = va_arg (*va, my_vector_type_t *);
+ u32 n = va_arg (*va, u32);
+
+#define _(t,f) \
+ serialize_multiple \
+ (m, \
+ &v[0].f, \
+ STRUCT_SIZE_OF (my_vector_type_t, f), \
+ STRUCT_STRIDE_OF (my_vector_type_t, f), \
+ n);
+
+ foreach_my_vector_type;
+
+#undef _
+}
+
+static void
+unserialize_my_vector_type_multiple (serialize_main_t * m, va_list * va)
+{
+ my_vector_type_t *v = va_arg (*va, my_vector_type_t *);
+ u32 n = va_arg (*va, u32);
+
+#define _(t,f) \
+ unserialize_multiple \
+ (m, \
+ &v[0].f, \
+ STRUCT_SIZE_OF (my_vector_type_t, f), \
+ STRUCT_STRIDE_OF (my_vector_type_t, f), \
+ n);
+
+ foreach_my_vector_type;
+
+#undef _
+}
+
+typedef struct
+{
+ u32 n_iter;
+ u32 seed;
+ u32 verbose;
+ u32 multiple;
+ u32 max_len;
+
+ my_vector_type_t **test_vectors;
+
+ char *dump_file;
+
+ serialize_main_t serialize_main;
+ serialize_main_t unserialize_main;
+} test_serialize_main_t;
+
+int
+test_serialize_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ test_serialize_main_t _tm, *tm = &_tm;
+ serialize_main_t *sm = &tm->serialize_main;
+ serialize_main_t *um = &tm->unserialize_main;
+ uword i;
+
+ memset (tm, 0, sizeof (tm[0]));
+ tm->n_iter = 100;
+ tm->seed = 1;
+ tm->max_len = 128;
+ tm->verbose = 0;
+ tm->multiple = 1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %d", &tm->n_iter))
+ ;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "file %s", &tm->dump_file))
+ ;
+ else if (unformat (input, "max-len %d", &tm->max_len))
+ ;
+ else if (unformat (input, "multiple %=", &tm->multiple, 1))
+ ;
+ else if (unformat (input, "single %=", &tm->multiple, 0))
+ ;
+ else if (unformat (input, "verbose %=", &tm->verbose, 1))
+ ;
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (tm->seed == 0)
+ tm->seed = random_default_seed ();
+
+ clib_warning ("iter %d seed %d max-len %d", tm->n_iter, tm->seed,
+ tm->max_len);
+
+#ifdef CLIB_UNIX
+ if (tm->dump_file)
+ serialize_open_unix_file (sm, tm->dump_file);
+ else
+#endif
+ serialize_open_vector (sm, 0);
+
+ vec_resize (tm->test_vectors, tm->n_iter);
+ for (i = 0; i < tm->n_iter; i++)
+ {
+ uword l = 1 + (random_u32 (&tm->seed) % tm->max_len);
+ my_vector_type_t *mv;
+
+ vec_resize (tm->test_vectors[i], l);
+ vec_foreach (mv, tm->test_vectors[i])
+ {
+#define _(t,f) mv->f = random_u32 (&tm->seed) & pow2_mask (31);
+ foreach_my_vector_type;
+#undef _
+ }
+
+ vec_serialize (sm, tm->test_vectors[i],
+ tm->multiple ? serialize_my_vector_type_multiple :
+ serialize_my_vector_type_single);
+ }
+
+ if (tm->verbose)
+ clib_warning ("overflow vector max bytes %d",
+ vec_max_len (sm->stream.overflow_buffer));
+
+ serialize_close (sm);
+
+#ifdef CLIB_UNIX
+ if (tm->dump_file)
+ {
+ if ((error = unserialize_open_unix_file (um, tm->dump_file)))
+ goto done;
+ }
+ else
+#endif
+ {
+ u8 *v = serialize_close_vector (sm);
+ unserialize_open_data (um, v, vec_len (v));
+ }
+
+ for (i = 0; i < tm->n_iter; i++)
+ {
+ my_vector_type_t *mv0;
+ my_vector_type_t *mv1;
+
+ vec_unserialize (um, &mv0,
+ tm->multiple ? unserialize_my_vector_type_multiple :
+ unserialize_my_vector_type_single);
+ mv1 = tm->test_vectors[i];
+
+ if (vec_len (mv0) != vec_len (mv1))
+ os_panic ();
+ if (memcmp (mv0, mv1, vec_len (mv0) * sizeof (mv0[0])))
+ os_panic ();
+
+ vec_free (mv0);
+ }
+
+done:
+ if (error)
+ clib_error_report (error);
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ unformat_init_command_line (&i, argv);
+ r = test_serialize_main (&i);
+ unformat_free (&i);
+ return r;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_slist.c b/src/vppinfra/test_slist.c
new file mode 100644
index 00000000..3c3cbf73
--- /dev/null
+++ b/src/vppinfra/test_slist.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#endif
+
+#include <vppinfra/slist.h>
+
+typedef struct
+{
+ u32 *random_pool;
+ u32 seed;
+ u32 iter;
+ u32 verbose;
+ f64 branching_factor;
+ clib_slist_t slist;
+} test_main_t;
+
+test_main_t test_main;
+
+#define foreach_simple_test \
+_(2) \
+_(4) \
+_(3) \
+_(1)
+
+
+void
+run_test (test_main_t * tm)
+{
+ int i;
+ u32 *tv;
+ u32 ncompares;
+ u64 total_compares = 0;
+
+ if (1)
+ {
+ /*
+ * Add a bunch of random numbers to the skip-list,
+ * sorting them.
+ */
+ for (i = 0; i < tm->iter; i++)
+ {
+ pool_get (tm->random_pool, tv);
+ *tv = random_u32 (&tm->seed);
+ clib_slist_add (&tm->slist, tv, tv - tm->random_pool);
+ }
+ /* make sure we can find each one */
+ for (i = 0; i < tm->iter; i++)
+ {
+ u32 search_result;
+ tv = pool_elt_at_index (tm->random_pool, i);
+
+ search_result = clib_slist_search (&tm->slist, tv, &ncompares);
+ ASSERT (search_result == i);
+
+ total_compares += ncompares;
+ }
+
+ fformat (stdout, "%.2f avg compares/search\n",
+ (f64) total_compares / (f64) i);
+
+ fformat (stdout, "%U\n", format_slist, &tm->slist,
+ tm->iter < 1000 /* verbose */ );
+
+ /* delete half of them */
+ for (i = tm->iter / 2; i < tm->iter; i++)
+ {
+ tv = pool_elt_at_index (tm->random_pool, i);
+ (void) clib_slist_del (&tm->slist, tv);
+ }
+
+ /* make sure we can find the set we should find, and no others */
+ for (i = 0; i < tm->iter; i++)
+ {
+ u32 search_result;
+ tv = pool_elt_at_index (tm->random_pool, i);
+
+ search_result = clib_slist_search (&tm->slist, tv, &ncompares);
+ if (i >= tm->iter / 2)
+ ASSERT (search_result == (u32) ~ 0);
+ else
+ ASSERT (search_result == i);
+
+ }
+
+ fformat (stdout, "%U\n", format_slist, &tm->slist,
+ tm->iter < 1000 /* verbose */ );
+
+ /* delete the rest */
+ for (i = 0; i < tm->iter; i++)
+ {
+ tv = pool_elt_at_index (tm->random_pool, i);
+
+ (void) clib_slist_del (&tm->slist, tv);
+ }
+
+ fformat (stdout, "%U\n", format_slist, &tm->slist,
+ tm->iter < 1000 /* verbose */ );
+ }
+ else
+ {
+
+#define _(n) \
+ do { \
+ pool_get (tm->random_pool, tv); \
+ *tv = n; \
+ clib_slist_add (&tm->slist, tv, tv - tm->random_pool); \
+ fformat(stdout, "%U\n", format_slist, &tm->slist, 1 /* verbose */); \
+ } while (0);
+ foreach_simple_test;
+#undef _
+ }
+
+ return;
+}
+
+word
+test_compare (void *key, u32 elt_index)
+{
+ u32 *k = (u32 *) key;
+ u32 elt = test_main.random_pool[elt_index];
+
+ if (*k < elt)
+ return -1;
+ if (*k > elt)
+ return 1;
+ return 0;
+}
+
+u8 *
+test_format (u8 * s, va_list * args)
+{
+ u32 elt_index = va_arg (*args, u32);
+ u32 elt = test_main.random_pool[elt_index];
+
+ return format (s, "%u", elt);
+}
+
+void
+initialize_slist (test_main_t * tm)
+{
+ clib_slist_init (&tm->slist, tm->branching_factor,
+ test_compare, test_format);
+}
+
+int
+test_slist_main (unformat_input_t * input)
+{
+ test_main_t *tm = &test_main;
+ u32 tmp;
+
+ tm->seed = 0xbabeb00b;
+ tm->iter = 100000;
+ tm->verbose = 1;
+ tm->branching_factor = 1.0 / 5.0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "seed %d", &tm->seed))
+ continue;
+ else if (unformat (input, "iter %d", &tm->iter))
+ continue;
+ else if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else if (unformat (input, "branch %d", &tmp))
+ {
+ if (tmp > 0)
+ tm->branching_factor = 1.0 / (f64) tmp;
+ else
+ fformat (stderr, "warning: branch = 0, ignored\n");
+ }
+ else
+ {
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ goto usage;
+ }
+ }
+ initialize_slist (tm);
+ run_test (tm);
+
+ return 0;
+
+usage:
+ fformat (stderr, "usage: test_slist seed <seed> iter <iter> [verbose]\n");
+ return 1;
+
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, (u64) 4 << 30);
+
+ unformat_init_command_line (&i, argv);
+ ret = test_slist_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_socket.c b/src/vppinfra/test_socket.c
new file mode 100644
index 00000000..2f25eccd
--- /dev/null
+++ b/src/vppinfra/test_socket.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/socket.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+test_socket_main (unformat_input_t * input)
+{
+ clib_socket_t _s = { 0 }, *s = &_s;
+ char *config;
+ clib_error_t *error;
+
+ s->config = "localhost:22";
+ s->flags = CLIB_SOCKET_F_IS_CLIENT;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "server %s %=", &config,
+ &s->flags, CLIB_SOCKET_F_IS_SERVER))
+ ;
+ else if (unformat (input, "client %s %=", &config,
+ &s->flags, CLIB_SOCKET_F_IS_CLIENT))
+ ;
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ error = clib_socket_init (s);
+ if (error)
+ goto done;
+
+ if (0)
+ {
+ struct
+ {
+ int a, b;
+ } *msg;
+ msg = clib_socket_tx_add (s, sizeof (msg[0]));
+ msg->a = 99;
+ msg->b = 100;
+ }
+ else
+ clib_socket_tx_add_formatted (s, "hello there mr server %d\n", 99);
+
+ error = clib_socket_tx (s);
+ if (error)
+ goto done;
+
+ while (1)
+ {
+ error = clib_socket_rx (s, 100);
+ if (error)
+ break;
+
+ if (clib_socket_rx_end_of_file (s))
+ break;
+
+ if_verbose ("%v", s->rx_buffer);
+ _vec_len (s->rx_buffer) = 0;
+ }
+
+ error = clib_socket_close (s);
+
+done:
+ if (error)
+ clib_error_report (error);
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ r = test_socket_main (&i);
+ unformat_free (&i);
+ return r;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_time.c b/src/vppinfra/test_time.c
new file mode 100644
index 00000000..63cfeac5
--- /dev/null
+++ b/src/vppinfra/test_time.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/time.h>
+#include <vppinfra/math.h> /* for sqrt */
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+static int
+test_time_main (unformat_input_t * input)
+{
+ f64 wait, error;
+ f64 t, tu[3], ave, rms;
+ clib_time_t c;
+ int i, n, j;
+
+ clib_time_init (&c);
+ wait = 1e-3;
+ n = 1000;
+ unformat (input, "%f %d", &wait, &n);
+ ave = rms = 0;
+ tu[0] = unix_time_now ();
+ tu[1] = unix_time_now ();
+ for (i = 0; i < n; i++)
+ {
+ j = 0;
+ t = clib_time_now (&c);
+ while (clib_time_now (&c) < t + wait)
+ j++;
+ t = j;
+ ave += t;
+ rms += t * t;
+ }
+ tu[2] = unix_time_now ();
+ ave /= n;
+ rms = sqrt (rms / n - ave * ave);
+
+ error = ((tu[2] - tu[1]) - 2 * (tu[1] - tu[0]) - n * wait) / n;
+ if_verbose ("tested %d x %.6e sec waits, error %.6e loops %.6e +- %.6e\n",
+ n, wait, error, ave, rms);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_time_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_timing_wheel.c b/src/vppinfra/test_timing_wheel.c
new file mode 100644
index 00000000..0ce15ad8
--- /dev/null
+++ b/src/vppinfra/test_timing_wheel.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+#include <vppinfra/timing_wheel.h>
+#include <vppinfra/zvec.h>
+
+#include <vppinfra/math.h>
+
+#if __GNUC__ < 4
+#define SQRT(a) a
+#else
+#define SQRT(a) sqrt(a)
+#endif
+
+typedef struct
+{
+ uword n_iter;
+
+ u32 n_events;
+ u32 seed;
+ u32 verbose;
+
+ /* Time is "synthetic" e.g. not taken from CPU timer. */
+ u32 synthetic_time;
+
+ clib_time_t time;
+ timing_wheel_t timing_wheel;
+
+ u64 *events;
+
+ f64 max_time;
+ f64 wait_time;
+
+ f64 total_iterate_time;
+ f64 time_iterate_start;
+
+ f64 time_per_status_update;
+ f64 time_next_status_update;
+} test_timing_wheel_main_t;
+
+typedef struct
+{
+ f64 dt;
+ f64 fraction;
+ u64 count;
+} test_timing_wheel_tmp_t;
+
+static void
+set_event (test_timing_wheel_main_t * tm, uword i)
+{
+ timing_wheel_t *w = &tm->timing_wheel;
+ u64 cpu_time;
+
+ cpu_time = w->current_time_index << w->log2_clocks_per_bin;
+ if (tm->synthetic_time)
+ cpu_time += random_u32 (&tm->seed) % tm->n_iter;
+ else
+ cpu_time +=
+ random_f64 (&tm->seed) * tm->max_time * tm->time.clocks_per_second;
+
+ timing_wheel_insert (w, cpu_time, i);
+ timing_wheel_validate (w);
+ tm->events[i] = cpu_time;
+}
+
+static int
+test_timing_wheel_tmp_cmp (void *a1, void *a2)
+{
+ test_timing_wheel_tmp_t *f1 = a1;
+ test_timing_wheel_tmp_t *f2 = a2;
+
+ return f1->dt < f2->dt ? -1 : (f1->dt > f2->dt ? +1 : 0);
+}
+
+clib_error_t *
+test_timing_wheel_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ test_timing_wheel_main_t _tm, *tm = &_tm;
+ timing_wheel_t *w = &tm->timing_wheel;
+ uword iter, i;
+
+ memset (tm, 0, sizeof (tm[0]));
+ tm->n_iter = 10;
+ tm->time_per_status_update = 0;
+ tm->n_events = 100;
+ tm->seed = 1;
+ tm->synthetic_time = 1;
+ tm->max_time = 1;
+ tm->wait_time = 1e-3;
+
+ w->validate = 0;
+ w->n_wheel_elt_time_bits = 32;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %wd", &tm->n_iter))
+ ;
+ else if (unformat (input, "events %d", &tm->n_events))
+ ;
+ else
+ if (unformat (input, "elt-time-bits %d", &w->n_wheel_elt_time_bits))
+ ;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else if (unformat (input, "validate"))
+ w->validate = 1;
+
+ else if (unformat (input, "real-time"))
+ tm->synthetic_time = 0;
+ else if (unformat (input, "synthetic-time"))
+ tm->synthetic_time = 1;
+ else if (unformat (input, "max-time %f", &tm->max_time))
+ ;
+ else if (unformat (input, "wait-time %f", &tm->wait_time))
+ ;
+ else if (unformat (input, "iter-time %f", &tm->total_iterate_time))
+ ;
+ else if (unformat (input, "print %f", &tm->time_per_status_update))
+ ;
+
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (!tm->seed)
+ tm->seed = random_default_seed ();
+
+ clib_time_init (&tm->time);
+
+ if (tm->synthetic_time)
+ {
+ w->min_sched_time = tm->time.seconds_per_clock;
+ w->max_sched_time = w->min_sched_time * 256;
+ timing_wheel_init (w, 0, tm->time.clocks_per_second);
+ }
+ else
+ {
+ timing_wheel_init (w, clib_cpu_time_now (), tm->time.clocks_per_second);
+ }
+
+ clib_warning ("iter %wd, events %d, seed %u, %U",
+ tm->n_iter, tm->n_events, tm->seed,
+ format_timing_wheel, &tm->timing_wheel, /* verbose */ 0);
+
+ /* Make some events. */
+ vec_resize (tm->events, tm->n_events);
+ for (i = 0; i < vec_len (tm->events); i++)
+ set_event (tm, i);
+
+ {
+ u32 *expired = 0;
+ f64 ave_error = 0;
+ f64 rms_error = 0;
+ f64 max_error = 0, min_error = 1e30;
+ u32 *error_hist = 0;
+ uword n_expired = 0;
+ uword *expired_bitmap[2] = { 0 };
+ uword n_events_in_wheel = vec_len (tm->events);
+
+ vec_resize (expired, 32);
+ vec_resize (error_hist, 1024);
+
+ tm->time_iterate_start = clib_time_now (&tm->time);
+ tm->time_next_status_update =
+ tm->time_iterate_start + tm->time_per_status_update;
+
+ if (tm->total_iterate_time != 0)
+ tm->n_iter = ~0;
+
+ for (iter = 0; iter < tm->n_iter || n_events_in_wheel > 0; iter++)
+ {
+ u64 cpu_time, min_next_time[2];
+
+ if (tm->synthetic_time)
+ cpu_time = iter << w->log2_clocks_per_bin;
+ else
+ cpu_time = clib_cpu_time_now ();
+
+ _vec_len (expired) = 0;
+ expired =
+ timing_wheel_advance (w, cpu_time, expired, &min_next_time[0]);
+ timing_wheel_validate (w);
+
+ /* Update bitmap of expired events. */
+ if (w->validate)
+ {
+ for (i = 0; i < vec_len (tm->events); i++)
+ {
+ uword is_expired;
+
+ is_expired =
+ (cpu_time >> w->log2_clocks_per_bin) >=
+ (tm->events[i] >> w->log2_clocks_per_bin);
+ expired_bitmap[0] =
+ clib_bitmap_set (expired_bitmap[0], i, is_expired);
+
+ /* Validate min next time. */
+ if (is_expired)
+ ASSERT (min_next_time[0] > tm->events[i]);
+ else
+ ASSERT (min_next_time[0] <= tm->events[i]);
+ }
+ }
+
+ n_expired += vec_len (expired);
+ for (i = 0; i < vec_len (expired); i++)
+ {
+ word j, idt;
+ i64 dt_cpu;
+ f64 fdt_cpu;
+
+ j = expired[i];
+ expired_bitmap[1] = clib_bitmap_ori (expired_bitmap[1], j);
+
+ dt_cpu = cpu_time - tm->events[j];
+
+ /* Event must be scheduled in correct bin. */
+ if (tm->synthetic_time)
+ ASSERT (dt_cpu >= 0 && dt_cpu <= (1 << w->log2_clocks_per_bin));
+
+ fdt_cpu = dt_cpu * tm->time.seconds_per_clock;
+
+ ave_error += fdt_cpu;
+ rms_error += fdt_cpu * fdt_cpu;
+
+ if (fdt_cpu > max_error)
+ max_error = fdt_cpu;
+ if (fdt_cpu < min_error)
+ min_error = fdt_cpu;
+
+ idt =
+ (cpu_time >> w->log2_clocks_per_bin) -
+ (tm->events[j] >> w->log2_clocks_per_bin);
+ idt = zvec_signed_to_unsigned (idt);
+ vec_validate (error_hist, idt);
+ error_hist[idt] += 1;
+ }
+
+ if (w->validate)
+ for (i = 0; i < vec_len (tm->events); i++)
+ {
+ int is_expired = clib_bitmap_get (expired_bitmap[0], i);
+ int is_expired_w = clib_bitmap_get (expired_bitmap[1], i);
+ ASSERT (is_expired == is_expired_w);
+ }
+
+ min_next_time[1] = ~0;
+ for (i = 0; i < vec_len (tm->events); i++)
+ {
+ if (!clib_bitmap_get (expired_bitmap[1], i))
+ min_next_time[1] = clib_min (min_next_time[1], tm->events[i]);
+ }
+ if (min_next_time[0] != min_next_time[1])
+ clib_error ("min next time wrong 0x%Lx != 0x%Lx", min_next_time[0],
+ min_next_time[1]);
+
+ if (tm->time_per_status_update != 0
+ && clib_time_now (&tm->time) >= tm->time_next_status_update)
+ {
+ f64 ave = 0, rms = 0;
+
+ tm->time_next_status_update += tm->time_per_status_update;
+ if (n_expired > 0)
+ {
+ ave = ave_error / n_expired;
+ rms = SQRT (rms_error / n_expired - ave * ave);
+ }
+
+ clib_warning
+ ("%12wd iter done %10wd expired; ave. error %.4e +- %.4e, range %.4e %.4e",
+ iter, n_expired, ave, rms, min_error, max_error);
+ }
+
+ if (tm->total_iterate_time != 0
+ && (clib_time_now (&tm->time) - tm->time_iterate_start
+ >= tm->total_iterate_time))
+ tm->n_iter = iter;
+
+ /* Add new events to wheel to replace expired ones. */
+ n_events_in_wheel -= vec_len (expired);
+ if (iter < tm->n_iter)
+ {
+ for (i = 0; i < vec_len (expired); i++)
+ {
+ uword j = expired[i];
+ set_event (tm, j);
+ expired_bitmap[1] =
+ clib_bitmap_andnoti (expired_bitmap[1], j);
+ }
+ n_events_in_wheel += vec_len (expired);
+ }
+ }
+
+ ave_error /= n_expired;
+ rms_error = SQRT (rms_error / n_expired - ave_error * ave_error);
+
+ clib_warning
+ ("%wd iter done %wd expired; ave. error %.4e +- %.4e, range %.4e %.4e",
+ 1 + iter, n_expired, ave_error, rms_error, min_error, max_error);
+
+ {
+ test_timing_wheel_tmp_t *fs, *f;
+ f64 total_fraction;
+
+ fs = 0;
+ for (i = 0; i < vec_len (error_hist); i++)
+ {
+ if (error_hist[i] == 0)
+ continue;
+ vec_add2 (fs, f, 1);
+ f->dt =
+ (((i64) zvec_unsigned_to_signed (i) << w->log2_clocks_per_bin) *
+ tm->time.seconds_per_clock);
+ f->fraction = (f64) error_hist[i] / (f64) n_expired;
+ f->count = error_hist[i];
+ }
+
+ vec_sort_with_function (fs, test_timing_wheel_tmp_cmp);
+
+ total_fraction = 0;
+ vec_foreach (f, fs)
+ {
+ total_fraction += f->fraction;
+ if (f == fs)
+ fformat (stdout, "%=12s %=16s %=16s %s\n", "Error max", "Fraction",
+ "Total", "Count");
+ fformat (stdout, "%12.4e %16.4f%% %16.4f%% %Ld\n", f->dt,
+ f->fraction * 100, total_fraction * 100, f->count);
+ }
+ }
+
+ clib_warning ("%U", format_timing_wheel, w, /* verbose */ 1);
+ }
+
+done:
+ return error;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ clib_error_t *error;
+
+ unformat_init_command_line (&i, argv);
+ error = test_timing_wheel_main (&i);
+ unformat_free (&i);
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ else
+ return 0;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_tw_timer.c b/src/vppinfra/test_tw_timer.c
new file mode 100644
index 00000000..ec0baa07
--- /dev/null
+++ b/src/vppinfra/test_tw_timer.c
@@ -0,0 +1,1275 @@
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+#include <vppinfra/tw_timer_4t_3w_256sl.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+
+typedef struct
+{
+ /** Handle returned from tw_start_timer */
+ u32 stop_timer_handle;
+
+ /** Test item should expire at this clock tick */
+ u64 expected_to_expire;
+} tw_timer_test_elt_t;
+
+typedef struct
+{
+ /** Pool of test objects */
+ tw_timer_test_elt_t *test_elts;
+
+ /** The single-wheel */
+ tw_timer_wheel_2t_1w_2048sl_t single_wheel;
+
+ /** The double-wheel */
+ tw_timer_wheel_16t_2w_512sl_t double_wheel;
+
+ /* The triple wheel */
+ tw_timer_wheel_4t_3w_256sl_t triple_wheel;
+
+ /* The triple wheel with overflow vector */
+ tw_timer_wheel_1t_3w_1024sl_ov_t triple_ov_wheel;
+
+ /** random number seed */
+ u64 seed;
+
+ /** number of timers */
+ u32 ntimers;
+
+ /** number of "churn" iterations */
+ u32 niter;
+
+ /** number of clock ticks per churn iteration */
+ u32 ticks_per_iter;
+
+ /** cpu timer */
+ clib_time_t clib_time;
+} tw_timer_test_main_t;
+
+tw_timer_test_main_t tw_timer_test_main;
+
+static void
+run_single_wheel (tw_timer_wheel_2t_1w_2048sl_t * tw, u32 n_ticks)
+{
+ u32 i;
+ f64 now = tw->last_run_time + 1.01;
+
+ for (i = 0; i < n_ticks; i++)
+ {
+ tw_timer_expire_timers_2t_1w_2048sl (tw, now);
+ now += 1.01;
+ }
+}
+
+static void
+run_double_wheel (tw_timer_wheel_16t_2w_512sl_t * tw, u32 n_ticks)
+{
+ u32 i;
+ f64 now = tw->last_run_time + 1.01;
+
+ for (i = 0; i < n_ticks; i++)
+ {
+ tw_timer_expire_timers_16t_2w_512sl (tw, now);
+ now += 1.01;
+ }
+}
+
+static void
+run_triple_wheel (tw_timer_wheel_4t_3w_256sl_t * tw, u32 n_ticks)
+{
+ u32 i;
+ f64 now = tw->last_run_time + 1.01;
+
+ for (i = 0; i < n_ticks; i++)
+ {
+ tw_timer_expire_timers_4t_3w_256sl (tw, now);
+ now += 1.01;
+ }
+}
+
+static void
+run_triple_ov_wheel (tw_timer_wheel_1t_3w_1024sl_ov_t * tw, u32 n_ticks)
+{
+ u32 i;
+ f64 now = tw->last_run_time + 1.01;
+
+ for (i = 0; i < n_ticks; i++)
+ {
+ tw_timer_expire_timers_1t_3w_1024sl_ov (tw, now);
+ now += 1.01;
+ }
+}
+
+static void
+expired_timer_single_callback (u32 * expired_timers)
+{
+ int i;
+ u32 pool_index, timer_id;
+ tw_timer_test_elt_t *e;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ pool_index = expired_timers[i] & 0x7FFFFFFF;
+ timer_id = expired_timers[i] >> 31;
+
+ ASSERT (timer_id == 1);
+
+ e = pool_elt_at_index (tm->test_elts, pool_index);
+
+ if (e->expected_to_expire != tm->single_wheel.current_tick)
+ {
+ fformat (stdout, "[%d] expired at %lld not %lld\n",
+ e - tm->test_elts, tm->single_wheel.current_tick,
+ e->expected_to_expire);
+ }
+ pool_put (tm->test_elts, e);
+ }
+}
+
+static void
+expired_timer_double_callback (u32 * expired_timers)
+{
+ int i;
+ u32 pool_index, timer_id;
+ tw_timer_test_elt_t *e;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ pool_index = expired_timers[i] & 0x0FFFFFFF;
+ timer_id = expired_timers[i] >> 28;
+
+ ASSERT (timer_id == 14);
+
+ e = pool_elt_at_index (tm->test_elts, pool_index);
+
+ if (e->expected_to_expire != tm->double_wheel.current_tick)
+ {
+ fformat (stdout, "[%d] expired at %lld not %lld\n",
+ e - tm->test_elts, tm->double_wheel.current_tick,
+ e->expected_to_expire);
+ }
+ pool_put (tm->test_elts, e);
+ }
+}
+
+static void
+expired_timer_triple_callback (u32 * expired_timers)
+{
+ int i;
+ u32 pool_index, timer_id;
+ tw_timer_test_elt_t *e;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ pool_index = expired_timers[i] & 0x3FFFFFFF;
+ timer_id = expired_timers[i] >> 30;
+
+ ASSERT (timer_id == 3);
+
+ e = pool_elt_at_index (tm->test_elts, pool_index);
+
+ if (e->expected_to_expire != tm->triple_wheel.current_tick)
+ {
+ fformat (stdout, "[%d] expired at %lld not %lld\n",
+ e - tm->test_elts, tm->triple_wheel.current_tick,
+ e->expected_to_expire);
+ }
+ pool_put (tm->test_elts, e);
+ }
+}
+
+static void
+expired_timer_triple_ov_callback (u32 * expired_timers)
+{
+ int i;
+ u32 pool_index;
+ tw_timer_test_elt_t *e;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ pool_index = expired_timers[i];
+
+ e = pool_elt_at_index (tm->test_elts, pool_index);
+
+ if (e->expected_to_expire != tm->triple_ov_wheel.current_tick)
+ {
+ fformat (stdout, "[%d] expired at %lld not %lld\n",
+ e - tm->test_elts, tm->triple_ov_wheel.current_tick,
+ e->expected_to_expire);
+ }
+ pool_put (tm->test_elts, e);
+ }
+}
+
+static clib_error_t *
+test2_single (tw_timer_test_main_t * tm)
+{
+ u32 i, j;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset;
+ u64 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 *deleted_indices = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel,
+ expired_timer_single_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ /* Prime offset */
+ initial_wheel_offset = 757;
+
+ run_single_wheel (&tm->single_wheel, initial_wheel_offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d\n",
+ tm->single_wheel.current_tick,
+ tm->single_wheel.current_index[TW_TIMER_RING_FAST]);
+
+ initial_wheel_offset = tm->single_wheel.current_tick;
+
+ fformat (stdout,
+ "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n",
+ tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed);
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & (2047);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+ e->stop_timer_handle =
+ tw_timer_start_2t_1w_2048sl (&tm->single_wheel, e - tm->test_elts,
+ 1 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds += i;
+
+ for (i = 0; i < tm->niter; i++)
+ {
+ run_single_wheel (&tm->single_wheel, tm->ticks_per_iter);
+
+ j = 0;
+ vec_reset_length (deleted_indices);
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ tw_timer_stop_2t_1w_2048sl (&tm->single_wheel, e->stop_timer_handle);
+ vec_add1 (deleted_indices, e - tm->test_elts);
+ if (++j >= tm->ntimers / 4)
+ goto del_and_re_add;
+ }));
+ /* *INDENT-ON* */
+
+ del_and_re_add:
+ for (j = 0; j < vec_len (deleted_indices); j++)
+ {
+ pool_put_index (tm->test_elts, deleted_indices[j]);
+ }
+
+ deletes += j;
+
+ for (j = 0; j < tm->ntimers / 4; j++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & (2047);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire =
+ expiration_time + tm->single_wheel.current_tick;
+ e->stop_timer_handle = tw_timer_start_2t_1w_2048sl
+ (&tm->single_wheel, e - tm->test_elts, 1 /* timer id */ ,
+ expiration_time);
+ }
+ adds += j;
+ }
+
+ vec_free (deleted_indices);
+
+ run_single_wheel (&tm->single_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->single_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->single_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test2_double (tw_timer_test_main_t * tm)
+{
+ u32 i, j;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 *deleted_indices = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel,
+ expired_timer_double_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ /* Prime offset */
+ initial_wheel_offset = 7577;
+
+ run_double_wheel (&tm->double_wheel, initial_wheel_offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n",
+ tm->double_wheel.current_tick,
+ tm->double_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->double_wheel.current_index[TW_TIMER_RING_SLOW]);
+
+ initial_wheel_offset = tm->double_wheel.current_tick;
+
+ fformat (stdout,
+ "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n",
+ tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed);
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+
+ e->stop_timer_handle =
+ tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts,
+ 14 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds += i;
+
+ for (i = 0; i < tm->niter; i++)
+ {
+ run_double_wheel (&tm->double_wheel, tm->ticks_per_iter);
+
+ j = 0;
+ vec_reset_length (deleted_indices);
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ tw_timer_stop_16t_2w_512sl (&tm->double_wheel, e->stop_timer_handle);
+ vec_add1 (deleted_indices, e - tm->test_elts);
+ if (++j >= tm->ntimers / 4)
+ goto del_and_re_add;
+ }));
+ /* *INDENT-ON* */
+
+ del_and_re_add:
+ for (j = 0; j < vec_len (deleted_indices); j++)
+ pool_put_index (tm->test_elts, deleted_indices[j]);
+
+ deletes += j;
+
+ for (j = 0; j < tm->ntimers / 4; j++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time +
+ tm->double_wheel.current_tick;
+
+ e->stop_timer_handle = tw_timer_start_16t_2w_512sl
+ (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ ,
+ expiration_time);
+ }
+ adds += j;
+ }
+
+ vec_free (deleted_indices);
+
+ run_double_wheel (&tm->double_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->double_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->double_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test2_triple (tw_timer_test_main_t * tm)
+{
+ u32 i, j;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset = 0;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 *deleted_indices = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_4t_3w_256sl (&tm->triple_wheel,
+ expired_timer_triple_callback,
+ 1.0 /* timer interval */ , ~0);
+
+
+ /* Prime offset */
+ initial_wheel_offset = 75700;
+ run_triple_wheel (&tm->triple_wheel, initial_wheel_offset);
+
+ fformat (stdout,
+ "initial wheel time %d, fi %d si %d gi %d\n",
+ tm->triple_wheel.current_tick,
+ tm->triple_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->triple_wheel.current_index[TW_TIMER_RING_SLOW],
+ tm->triple_wheel.current_index[TW_TIMER_RING_GLACIER]);
+
+ initial_wheel_offset = tm->triple_wheel.current_tick;
+
+ fformat (stdout,
+ "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n",
+ tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed);
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+
+ e->stop_timer_handle =
+ tw_timer_start_4t_3w_256sl (&tm->triple_wheel, e - tm->test_elts,
+ 3 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds += i;
+
+ for (i = 0; i < tm->niter; i++)
+ {
+ run_triple_wheel (&tm->triple_wheel, tm->ticks_per_iter);
+
+ j = 0;
+ vec_reset_length (deleted_indices);
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ tw_timer_stop_4t_3w_256sl (&tm->triple_wheel, e->stop_timer_handle);
+ vec_add1 (deleted_indices, e - tm->test_elts);
+ if (++j >= tm->ntimers / 4)
+ goto del_and_re_add;
+ }));
+ /* *INDENT-ON* */
+
+ del_and_re_add:
+ for (j = 0; j < vec_len (deleted_indices); j++)
+ pool_put_index (tm->test_elts, deleted_indices[j]);
+
+ deletes += j;
+
+ for (j = 0; j < tm->ntimers / 4; j++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time +
+ tm->triple_wheel.current_tick;
+
+ e->stop_timer_handle = tw_timer_start_4t_3w_256sl
+ (&tm->triple_wheel, e - tm->test_elts, 3 /* timer id */ ,
+ expiration_time);
+ }
+ adds += j;
+ }
+
+ vec_free (deleted_indices);
+
+ run_triple_wheel (&tm->triple_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->triple_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->triple_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test2_triple_ov (tw_timer_test_main_t * tm)
+{
+ u32 i, j;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset = 0;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 *deleted_indices = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_1t_3w_1024sl_ov (&tm->triple_ov_wheel,
+ expired_timer_triple_ov_callback,
+ 1.0 /* timer interval */ , ~0);
+
+
+ /* Prime offset */
+ initial_wheel_offset = 75700;
+ run_triple_ov_wheel (&tm->triple_ov_wheel, initial_wheel_offset);
+
+ fformat (stdout,
+ "initial wheel time %d, fi %d si %d gi %d\n",
+ tm->triple_ov_wheel.current_tick,
+ tm->triple_ov_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->triple_ov_wheel.current_index[TW_TIMER_RING_SLOW],
+ tm->triple_ov_wheel.current_index[TW_TIMER_RING_GLACIER]);
+
+ initial_wheel_offset = tm->triple_ov_wheel.current_tick;
+
+ fformat (stdout,
+ "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n",
+ tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed);
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+
+ e->stop_timer_handle =
+ tw_timer_start_1t_3w_1024sl_ov (&tm->triple_ov_wheel,
+ e - tm->test_elts, 0 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds += i;
+
+ for (i = 0; i < tm->niter; i++)
+ {
+ run_triple_ov_wheel (&tm->triple_ov_wheel, tm->ticks_per_iter);
+
+ j = 0;
+ vec_reset_length (deleted_indices);
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ tw_timer_stop_1t_3w_1024sl_ov (&tm->triple_ov_wheel,
+ e->stop_timer_handle);
+ vec_add1 (deleted_indices, e - tm->test_elts);
+ if (++j >= tm->ntimers / 4)
+ goto del_and_re_add;
+ }));
+ /* *INDENT-ON* */
+
+ del_and_re_add:
+ for (j = 0; j < vec_len (deleted_indices); j++)
+ pool_put_index (tm->test_elts, deleted_indices[j]);
+
+ deletes += j;
+
+ for (j = 0; j < tm->ntimers / 4; j++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ do
+ {
+ expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1);
+ }
+ while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time +
+ tm->triple_ov_wheel.current_tick;
+
+ e->stop_timer_handle = tw_timer_start_1t_3w_1024sl_ov
+ (&tm->triple_ov_wheel, e - tm->test_elts, 0 /* timer id */ ,
+ expiration_time);
+ }
+ adds += j;
+ }
+
+ vec_free (deleted_indices);
+
+ run_triple_ov_wheel (&tm->triple_ov_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->triple_ov_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->triple_ov_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ TWT (tw_timer) * t;
+
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ t = pool_elt_at_index (tm->triple_ov_wheel.timers, e->stop_timer_handle);
+ fformat (stdout, " expiration_time %lld\n", t->expiration_time);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_1t_3w_1024sl_ov (&tm->triple_ov_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test1_single (tw_timer_test_main_t * tm)
+{
+ u32 i;
+ tw_timer_test_elt_t *e;
+ u32 offset;
+
+ tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel,
+ expired_timer_single_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ /*
+ * Prime offset, to make sure that the wheel starts in a
+ * non-trivial position
+ */
+ offset = 123;
+
+ run_single_wheel (&tm->single_wheel, offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d\n",
+ tm->single_wheel.current_tick,
+ tm->single_wheel.current_index[TW_TIMER_RING_FAST]);
+
+ offset = tm->single_wheel.current_tick;
+
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ u32 expected_to_expire;
+ u32 timer_arg;
+
+ timer_arg = 1 + i;
+ timer_arg &= 2047;
+ if (timer_arg == 0)
+ timer_arg = 1;
+
+ expected_to_expire = timer_arg + offset;
+
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+ e->expected_to_expire = expected_to_expire;
+ e->stop_timer_handle = tw_timer_start_2t_1w_2048sl
+ (&tm->single_wheel, e - tm->test_elts, 1 /* timer id */ ,
+ timer_arg);
+ }
+ run_single_wheel (&tm->single_wheel, tm->ntimers + 3);
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat(stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ fformat (stdout,
+ "final wheel time %d, fast index %d\n",
+ tm->single_wheel.current_tick,
+ tm->single_wheel.current_index[TW_TIMER_RING_FAST]);
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test1_double (tw_timer_test_main_t * tm)
+{
+ u32 i;
+ tw_timer_test_elt_t *e;
+ u32 offset;
+
+ tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel,
+ expired_timer_double_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ /*
+ * Prime offset, to make sure that the wheel starts in a
+ * non-trivial position
+ */
+ offset = 227989;
+
+ run_double_wheel (&tm->double_wheel, offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d\n",
+ tm->double_wheel.current_tick,
+ tm->double_wheel.current_index[TW_TIMER_RING_FAST]);
+
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ e->expected_to_expire = i + offset + 1;
+ e->stop_timer_handle = tw_timer_start_16t_2w_512sl
+ (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ ,
+ i + 1);
+ }
+ run_double_wheel (&tm->double_wheel, tm->ntimers + 3);
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat(stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ fformat (stdout,
+ "final wheel time %d, fast index %d\n",
+ tm->double_wheel.current_tick,
+ tm->double_wheel.current_index[TW_TIMER_RING_FAST]);
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test3_triple_double (tw_timer_test_main_t * tm)
+{
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset = 0;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_4t_3w_256sl (&tm->triple_wheel,
+ expired_timer_triple_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ initial_wheel_offset = 0;
+ run_triple_wheel (&tm->triple_wheel, initial_wheel_offset);
+
+ fformat (stdout,
+ "initial wheel time %d, fi %d si %d gi %d\n",
+ tm->triple_wheel.current_tick,
+ tm->triple_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->triple_wheel.current_index[TW_TIMER_RING_SLOW],
+ tm->triple_wheel.current_index[TW_TIMER_RING_GLACIER]);
+
+ initial_wheel_offset = tm->triple_wheel.current_tick;
+
+ fformat (stdout, "Create a timer which expires at wheel-time (1, 0, 0)\n");
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ /* 1 glacier ring tick from now */
+ expiration_time = TW_SLOTS_PER_RING * TW_SLOTS_PER_RING;
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+ max_expiration_time = expiration_time;
+
+ e->stop_timer_handle =
+ tw_timer_start_4t_3w_256sl (&tm->triple_wheel, e - tm->test_elts,
+ 3 /* timer id */ ,
+ expiration_time);
+
+ run_triple_wheel (&tm->triple_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->triple_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->triple_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test4_double_double (tw_timer_test_main_t * tm)
+{
+ u32 i;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 *deleted_indices = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel,
+ expired_timer_double_callback,
+ 1.0 /* timer interval */ , ~0);
+ /* Prime offset */
+ initial_wheel_offset = 0;
+
+ run_double_wheel (&tm->double_wheel, initial_wheel_offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n",
+ tm->double_wheel.current_tick,
+ tm->double_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->double_wheel.current_index[TW_TIMER_RING_SLOW]);
+
+ initial_wheel_offset = tm->double_wheel.current_tick;
+
+ fformat (stdout, "test timer which expires at 512 ticks\n");
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ expiration_time = 512;
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+ e->stop_timer_handle =
+ tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts,
+ 14 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds = 1;
+
+ vec_free (deleted_indices);
+
+ run_double_wheel (&tm->double_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->double_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->double_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
+ return 0;
+}
+
+static clib_error_t *
+test5_double (tw_timer_test_main_t * tm)
+{
+ u32 i;
+ tw_timer_test_elt_t *e;
+ u32 initial_wheel_offset;
+ u32 expiration_time;
+ u32 max_expiration_time = 0;
+ u32 adds = 0, deletes = 0;
+ f64 before, after;
+
+ clib_time_init (&tm->clib_time);
+
+ tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel,
+ expired_timer_double_callback,
+ 1.0 /* timer interval */ , ~0);
+
+ /* Prime offset */
+ initial_wheel_offset = 7567;
+
+ run_double_wheel (&tm->double_wheel, initial_wheel_offset);
+
+ fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n",
+ tm->double_wheel.current_tick,
+ tm->double_wheel.current_index[TW_TIMER_RING_FAST],
+ tm->double_wheel.current_index[TW_TIMER_RING_SLOW]);
+
+ initial_wheel_offset = tm->double_wheel.current_tick;
+
+ fformat (stdout,
+ "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n",
+ tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed);
+
+ before = clib_time_now (&tm->clib_time);
+
+ /* Prime the pump */
+ for (i = 0; i < tm->ntimers; i++)
+ {
+ pool_get (tm->test_elts, e);
+ memset (e, 0, sizeof (*e));
+
+ expiration_time = i + 1;
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ e->expected_to_expire = expiration_time + initial_wheel_offset;
+ e->stop_timer_handle =
+ tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts,
+ 14 /* timer id */ ,
+ expiration_time);
+ }
+
+ adds += i;
+
+ run_double_wheel (&tm->double_wheel, max_expiration_time + 1);
+
+ after = clib_time_now (&tm->clib_time);
+
+ fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes,
+ tm->double_wheel.current_tick);
+ fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n",
+ (after - before),
+ ((f64) adds + (f64) deletes +
+ (f64) tm->double_wheel.current_tick) / (after - before));
+
+ if (pool_elts (tm->test_elts))
+ fformat (stdout, "Note: %d elements remain in pool\n",
+ pool_elts (tm->test_elts));
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, tm->test_elts,
+ ({
+ fformat (stdout, "[%d] expected to expire %d\n",
+ e - tm->test_elts,
+ e->expected_to_expire);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (tm->test_elts);
+ tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
+ return 0;
+}
+
+static clib_error_t *
+timer_test_command_fn (tw_timer_test_main_t * tm, unformat_input_t * input)
+{
+
+ int is_test1 = 0;
+ int num_wheels = 1;
+ int is_test2 = 0;
+ int is_test3 = 0;
+ int is_test4 = 0;
+ int is_test5 = 0;
+ int overflow = 0;
+
+ memset (tm, 0, sizeof (*tm));
+ /* Default values */
+ tm->ntimers = 100000;
+ tm->seed = 0xDEADDABEB00BFACE;
+ tm->niter = 1000;
+ tm->ticks_per_iter = 727;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "seed %lld", &tm->seed))
+ ;
+ else if (unformat (input, "test1"))
+ is_test1 = 1;
+ else if (unformat (input, "test2"))
+ is_test2 = 1;
+ else if (unformat (input, "overflow"))
+ overflow = 1;
+ else if (unformat (input, "lebron"))
+ is_test3 = 1;
+ else if (unformat (input, "wilt"))
+ is_test4 = 1;
+ else if (unformat (input, "linear"))
+ is_test5 = 1;
+ else if (unformat (input, "wheels %d", &num_wheels))
+ ;
+ else if (unformat (input, "ntimers %d", &tm->ntimers))
+ ;
+ else if (unformat (input, "niter %d", &tm->niter))
+ ;
+ else if (unformat (input, "ticks_per_iter %d", &tm->ticks_per_iter))
+ ;
+ else
+ break;
+ }
+
+ if (is_test1 + is_test2 + is_test3 + is_test4 + is_test5 == 0)
+ return clib_error_return (0, "No test specified [test1..n]");
+
+ if (num_wheels < 1 || num_wheels > 3)
+ return clib_error_return (0, "unsupported... 1 or 2 wheels only");
+
+ if (is_test1)
+ {
+ if (num_wheels == 1)
+ return test1_single (tm);
+ else
+ return test1_double (tm);
+ }
+ if (is_test2)
+ {
+ if (num_wheels == 1)
+ return test2_single (tm);
+ else if (num_wheels == 2)
+ return test2_double (tm);
+ else if (num_wheels == 3)
+ {
+ if (overflow == 0)
+ return test2_triple (tm);
+ else
+ return test2_triple_ov (tm);
+ }
+ }
+ if (is_test3)
+ return test3_triple_double (tm);
+
+ if (is_test4)
+ return test4_double_double (tm);
+
+ if (is_test5)
+ return test5_double (tm);
+
+ /* NOTREACHED */
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ clib_error_t *error;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ unformat_init_command_line (&i, argv);
+ error = timer_test_command_fn (tm, &i);
+ unformat_free (&i);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return 1;
+ }
+ return 0;
+}
+#endif /* CLIB_UNIX */
+
+/* For debugging... */
+int
+pifi (void *p, u32 index)
+{
+ return pool_is_free_index (p, index);
+}
+
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+uword
+pe (void *v)
+{
+ return (pool_elts (v));
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_vec.c b/src/vppinfra/test_vec.c
new file mode 100644
index 00000000..f0497ac6
--- /dev/null
+++ b/src/vppinfra/test_vec.c
@@ -0,0 +1,1159 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+ Written by Fred Delley <fdelley@cisco.com> .
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/unistd.h>
+#endif
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#endif
+
+#include <vppinfra/clib.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+
+#include "test_vec.h"
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+#define MAX_CHANGE 100
+
+
+typedef enum
+{
+ /* Values have to be sequential and start with 0. */
+ OP_IS_VEC_RESIZE = 0,
+ OP_IS_VEC_ADD1,
+ OP_IS_VEC_ADD2,
+ OP_IS_VEC_ADD,
+ OP_IS_VEC_INSERT,
+ OP_IS_VEC_INSERT_ELTS,
+ OP_IS_VEC_DELETE,
+ OP_IS_VEC_DUP,
+ OP_IS_VEC_IS_EQUAL,
+ OP_IS_VEC_ZERO,
+ OP_IS_VEC_SET,
+ OP_IS_VEC_VALIDATE,
+ OP_IS_VEC_FREE,
+ OP_IS_VEC_INIT,
+ OP_IS_VEC_CLONE,
+ OP_IS_VEC_APPEND,
+ OP_IS_VEC_PREPEND,
+ /* Operations on vectors with custom headers. */
+ OP_IS_VEC_INIT_H,
+ OP_IS_VEC_RESIZE_H,
+ OP_IS_VEC_FREE_H,
+ OP_MAX,
+} op_t;
+
+#define FIRST_VEC_OP OP_IS_VEC_RESIZE
+#define LAST_VEC_OP OP_IS_VEC_PREPEND
+#define FIRST_VEC_HDR_OP OP_IS_VEC_INIT_H
+#define LAST_VEC_HDR_OP OP_IS_VEC_FREE_H
+
+uword g_prob_ratio[] = {
+ [OP_IS_VEC_RESIZE] = 5,
+ [OP_IS_VEC_ADD1] = 5,
+ [OP_IS_VEC_ADD2] = 5,
+ [OP_IS_VEC_ADD] = 5,
+ [OP_IS_VEC_INSERT] = 5,
+ [OP_IS_VEC_INSERT_ELTS] = 5,
+ [OP_IS_VEC_DELETE] = 30,
+ [OP_IS_VEC_DUP] = 5,
+ [OP_IS_VEC_IS_EQUAL] = 5,
+ [OP_IS_VEC_ZERO] = 2,
+ [OP_IS_VEC_SET] = 3,
+ [OP_IS_VEC_VALIDATE] = 5,
+ [OP_IS_VEC_FREE] = 5,
+ [OP_IS_VEC_INIT] = 5,
+ [OP_IS_VEC_CLONE] = 5,
+ [OP_IS_VEC_APPEND] = 5,
+ [OP_IS_VEC_PREPEND] = 5,
+ /* Operations on vectors with custom headers. */
+ [OP_IS_VEC_INIT_H] = 5,
+ [OP_IS_VEC_RESIZE_H] = 5,
+ [OP_IS_VEC_FREE_H] = 5,
+};
+
+op_t *g_prob;
+op_t *g_prob_wh;
+
+uword g_call_stats[OP_MAX];
+
+
+/* A structure for both vector headers and vector elements might be useful to
+ uncover potential alignement issues. */
+
+typedef struct
+{
+ u8 field1[4];
+ CLIB_PACKED (u32 field2);
+} hdr_t;
+
+typedef struct
+{
+ u8 field1[3];
+ CLIB_PACKED (u32 field2);
+} elt_t;
+
+#ifdef CLIB_UNIX
+u32 g_seed = 0xdeadbabe;
+uword g_verbose = 1;
+#endif
+
+op_t *g_op_prob;
+uword g_set_verbose_at = ~0;
+uword g_dump_period = ~0;
+
+
+static u8 *
+format_vec_op_type (u8 * s, va_list * args)
+{
+ op_t op = va_arg (*args, int);
+
+ switch (op)
+ {
+#define _(n) \
+ case OP_IS_##n: \
+ s = format (s, "OP_IS_" #n); \
+ break;
+
+ _(VEC_RESIZE);
+ _(VEC_ADD1);
+ _(VEC_ADD2);
+ _(VEC_ADD);
+ _(VEC_INSERT);
+ _(VEC_INSERT_ELTS);
+ _(VEC_DELETE);
+ _(VEC_DUP);
+ _(VEC_IS_EQUAL);
+ _(VEC_ZERO);
+ _(VEC_SET);
+ _(VEC_VALIDATE);
+ _(VEC_FREE);
+ _(VEC_INIT);
+ _(VEC_CLONE);
+ _(VEC_APPEND);
+ _(VEC_PREPEND);
+ _(VEC_INIT_H);
+ _(VEC_RESIZE_H);
+ _(VEC_FREE_H);
+
+ default:
+ s = format (s, "Unknown vec op (%d)", op);
+ break;
+ }
+
+#undef _
+
+ return s;
+}
+
+static void
+dump_call_stats (uword * stats)
+{
+ uword i;
+
+ fformat (stdout, "Call Stats\n----------\n");
+
+ for (i = 0; i < OP_MAX; i++)
+ fformat (stdout, "%-8d %U\n", stats[i], format_vec_op_type, i);
+}
+
+
+/* XXX - Purposely low value for debugging the validator. Will be set it to a
+ more sensible value later. */
+#define MAX_VEC_LEN 10
+
+#define create_random_vec_wh(elt_type, len, hdr_bytes, seed) \
+({ \
+ elt_type * _v(v) = NULL; \
+ uword _v(l) = (len); \
+ uword _v(h) = (hdr_bytes); \
+ u8 * _v(hdr); \
+ \
+ if (_v(l) == 0) \
+ goto __done__; \
+ \
+ /* ~0 means select random length between 0 and MAX_VEC_LEN. */ \
+ if (_v(l) == ~0) \
+ _v(l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \
+ \
+ _v(v) = _vec_resize (NULL, _v(l), _v(l) * sizeof (elt_type), _v(h), 0); \
+ fill_with_random_data (_v(v), vec_bytes (_v(v)), (seed)); \
+ \
+ /* Fill header with random data as well. */ \
+ if (_v(h) > 0) \
+ { \
+ _v(hdr) = vec_header (_v(v), _v(h)); \
+ fill_with_random_data (_v(hdr), _v(h), (seed)); \
+ } \
+ \
+__done__: \
+ _v(v); \
+})
+
+#define create_random_vec(elt_type, len, seed) \
+create_random_vec_wh (elt_type, len, 0, seed)
+
+#define compute_vec_hash(hash, vec) \
+({ \
+ u8 * _v(v) = (u8 *) (vec); \
+ uword _v(n) = vec_len (vec) * sizeof ((vec)[0]); \
+ u8 _v(hh) = (u8) (hash); \
+ \
+ compute_mem_hash (_v(hh), _v(v), _v(n)); \
+})
+
+static elt_t *
+validate_vec_free (elt_t * vec)
+{
+ vec_free (vec);
+ ASSERT (vec == NULL);
+ return vec;
+}
+
+static elt_t *
+validate_vec_free_h (elt_t * vec, uword hdr_bytes)
+{
+ vec_free_h (vec, hdr_bytes);
+ ASSERT (vec == NULL);
+ return vec;
+}
+
+static void
+validate_vec_hdr (elt_t * vec, uword hdr_bytes)
+{
+ u8 *hdr;
+ u8 *hdr_end;
+ vec_header_t *vh;
+
+ if (!vec)
+ return;
+
+ vh = _vec_find (vec);
+ hdr = vec_header (vec, hdr_bytes);
+ hdr_end = vec_header_end (hdr, hdr_bytes);
+
+ ASSERT (hdr_end == (u8 *) vec);
+ ASSERT ((u8 *) vh - (u8 *) hdr >= hdr_bytes);
+}
+
+static void
+validate_vec_len (elt_t * vec)
+{
+ u8 *ptr;
+ u8 *end;
+ uword len;
+ uword bytes;
+ uword i;
+ elt_t *elt;
+
+ if (!vec)
+ return;
+
+ ptr = (u8 *) vec;
+ end = (u8 *) vec_end (vec);
+ len = vec_len (vec);
+ bytes = sizeof (vec[0]) * len;
+
+ ASSERT (bytes == vec_bytes (vec));
+ ASSERT ((ptr + bytes) == end);
+
+ i = 0;
+
+ /* XXX - TODO: confirm that auto-incrementing in vec_is_member() would not
+ have the expected result. */
+ while (vec_is_member (vec, (__typeof__ (vec[0]) *) ptr))
+ {
+ ptr++;
+ i++;
+ }
+
+ ASSERT (ptr == end);
+ ASSERT (i == bytes);
+
+ i = 0;
+
+ vec_foreach (elt, vec) i++;
+
+ ASSERT (i == len);
+}
+
+static void
+validate_vec (elt_t * vec, uword hdr_bytes)
+{
+ validate_vec_hdr (vec, hdr_bytes);
+ validate_vec_len (vec);
+
+ if (!vec || vec_len (vec) == 0)
+ {
+ VERBOSE3 ("Vector at %p has zero elements.\n\n", vec);
+ }
+ else
+ {
+ if (hdr_bytes > 0)
+ VERBOSE3 ("Header: %U\n",
+ format_hex_bytes, vec_header (vec, sizeof (vec[0])),
+ sizeof (vec[0]));
+
+ VERBOSE3 ("%U\n\n",
+ format_hex_bytes, vec, vec_len (vec) * sizeof (vec[0]));
+ }
+}
+
+static elt_t *
+validate_vec_resize (elt_t * vec, uword num_elts)
+{
+ uword len1 = vec_len (vec);
+ uword len2;
+ u8 hash = compute_vec_hash (0, vec);
+
+ vec_resize (vec, num_elts);
+ len2 = vec_len (vec);
+
+ ASSERT (len2 == len1 + num_elts);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_resize_h (elt_t * vec, uword num_elts, uword hdr_bytes)
+{
+ uword len1, len2;
+ u8 *end1, *end2;
+ u8 *hdr = NULL;
+ u8 hash, hdr_hash;
+
+ len1 = vec_len (vec);
+
+ if (vec)
+ hdr = vec_header (vec, hdr_bytes);
+
+ hash = compute_vec_hash (0, vec);
+ hdr_hash = compute_mem_hash (0, hdr, hdr_bytes);
+
+ vec_resize_ha (vec, num_elts, hdr_bytes, 0);
+ len2 = vec_len (vec);
+
+ ASSERT (len2 == len1 + num_elts);
+
+ end1 = (u8 *) (vec + len1);
+ end2 = (u8 *) vec_end (vec);
+
+ while (end1 != end2)
+ {
+ ASSERT (*end1 == 0);
+ end1++;
+ }
+
+ if (vec)
+ hdr = vec_header (vec, hdr_bytes);
+
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ ASSERT (compute_mem_hash (hdr_hash, hdr, hdr_bytes) == 0);
+ validate_vec (vec, 1);
+ return vec;
+}
+
+static elt_t *
+generic_validate_vec_add (elt_t * vec, uword num_elts, uword is_add2)
+{
+ uword len1 = vec_len (vec);
+ uword len2;
+ u8 hash = compute_vec_hash (0, vec);
+ elt_t *new;
+
+ if (is_add2)
+ {
+ vec_add2 (vec, new, num_elts);
+ }
+ else
+ {
+ new = create_random_vec (elt_t, num_elts, g_seed);
+
+ VERBOSE3 ("%U\n", format_hex_bytes, new,
+ vec_len (new) * sizeof (new[0]));
+
+ /* Add the hash value of the new elements to that of the old vector. */
+ hash = compute_vec_hash (hash, new);
+
+ if (num_elts == 1)
+ vec_add1 (vec, new[0]);
+ else if (num_elts > 1)
+ vec_add (vec, new, num_elts);
+
+ vec_free (new);
+ }
+
+ len2 = vec_len (vec);
+ ASSERT (len2 == len1 + num_elts);
+
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_add1 (elt_t * vec)
+{
+ return generic_validate_vec_add (vec, 1, 0);
+}
+
+static elt_t *
+validate_vec_add2 (elt_t * vec, uword num_elts)
+{
+ return generic_validate_vec_add (vec, num_elts, 1);
+}
+
+static elt_t *
+validate_vec_add (elt_t * vec, uword num_elts)
+{
+ return generic_validate_vec_add (vec, num_elts, 0);
+}
+
+static elt_t *
+validate_vec_insert (elt_t * vec, uword num_elts, uword start_elt)
+{
+ uword len1 = vec_len (vec);
+ uword len2;
+ u8 hash;
+
+ /* vec_insert() would not handle it properly. */
+ if (start_elt > len1 || num_elts == 0)
+ return vec;
+
+ hash = compute_vec_hash (0, vec);
+ vec_insert (vec, num_elts, start_elt);
+ len2 = vec_len (vec);
+
+ ASSERT (len2 == len1 + num_elts);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_insert_elts (elt_t * vec, uword num_elts, uword start_elt)
+{
+ uword len1 = vec_len (vec);
+ uword len2;
+ elt_t *new;
+ u8 hash;
+
+ /* vec_insert_elts() would not handle it properly. */
+ if (start_elt > len1 || num_elts == 0)
+ return vec;
+
+ new = create_random_vec (elt_t, num_elts, g_seed);
+
+ VERBOSE3 ("%U\n", format_hex_bytes, new, vec_len (new) * sizeof (new[0]));
+
+ /* Add the hash value of the new elements to that of the old vector. */
+ hash = compute_vec_hash (0, vec);
+ hash = compute_vec_hash (hash, new);
+
+ vec_insert_elts (vec, new, num_elts, start_elt);
+ len2 = vec_len (vec);
+
+ vec_free (new);
+
+ ASSERT (len2 == len1 + num_elts);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_delete (elt_t * vec, uword num_elts, uword start_elt)
+{
+ uword len1 = vec_len (vec);
+ uword len2;
+ u8 *start;
+ u8 hash;
+ u8 hash_del;
+
+ /* vec_delete() would not handle it properly. */
+ if (start_elt + num_elts > len1)
+ return vec;
+
+ start = (u8 *) vec + (start_elt * sizeof (vec[0]));
+
+ hash = compute_vec_hash (0, vec);
+ hash_del = compute_mem_hash (0, start, num_elts * sizeof (vec[0]));
+ hash ^= hash_del;
+
+ vec_delete (vec, num_elts, start_elt);
+ len2 = vec_len (vec);
+
+ ASSERT (len2 == len1 - num_elts);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_dup (elt_t * vec)
+{
+ elt_t *new;
+ u8 hash;
+
+ hash = compute_vec_hash (0, vec);
+ new = vec_dup (vec);
+
+ ASSERT (compute_vec_hash (hash, new) == 0);
+
+ validate_vec (new, 0);
+ return new;
+}
+
+static elt_t *
+validate_vec_zero (elt_t * vec)
+{
+ u8 *ptr;
+ u8 *end;
+
+ vec_zero (vec);
+
+ ptr = (u8 *) vec;
+ end = (u8 *) (vec + vec_len (vec));
+
+ while (ptr != end)
+ {
+ ASSERT (ptr < (u8 *) vec_end (vec));
+ ASSERT (ptr[0] == 0);
+ ptr++;
+ }
+
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static void
+validate_vec_is_equal (elt_t * vec)
+{
+ elt_t *new = NULL;
+
+ if (vec_len (vec) <= 0)
+ return;
+
+ new = vec_dup (vec);
+ ASSERT (vec_is_equal (new, vec));
+ vec_free (new);
+}
+
+static elt_t *
+validate_vec_set (elt_t * vec)
+{
+ uword i;
+ uword len = vec_len (vec);
+ elt_t *new;
+
+ if (!vec)
+ return NULL;
+
+ new = create_random_vec (elt_t, 1, g_seed);
+
+ VERBOSE3 ("%U\n", format_hex_bytes, new, vec_len (new) * sizeof (new[0]));
+
+ vec_set (vec, new[0]);
+
+ for (i = 0; i < len; i++)
+ ASSERT (memcmp (&vec[i], &new[0], sizeof (vec[0])) == 0);
+
+ vec_free (new);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_validate (elt_t * vec, uword index)
+{
+ uword len = vec_len (vec);
+ word num_new = index - len + 1;
+ u8 *ptr;
+ u8 *end;
+ u8 hash = compute_vec_hash (0, vec);
+
+ if (num_new < 0)
+ num_new = 0;
+
+ vec_validate (vec, index);
+
+ /* Old len but new vec pointer! */
+ ptr = (u8 *) (vec + len);
+ end = (u8 *) (vec + len + num_new);
+
+ ASSERT (len + num_new == vec_len (vec));
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+
+ while (ptr != end)
+ {
+ ASSERT (ptr < (u8 *) vec_end (vec));
+ ASSERT (ptr[0] == 0);
+ ptr++;
+ }
+
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_init (uword num_elts)
+{
+ u8 *ptr;
+ u8 *end;
+ uword len;
+ elt_t *new;
+
+ new = vec_new (elt_t, num_elts);
+ len = vec_len (new);
+
+ ASSERT (len == num_elts);
+
+ ptr = (u8 *) new;
+ end = (u8 *) (new + len);
+
+ while (ptr != end)
+ {
+ ASSERT (ptr < (u8 *) vec_end (new));
+ ASSERT (ptr[0] == 0);
+ ptr++;
+ }
+
+ validate_vec (new, 0);
+ return new;
+}
+
+static elt_t *
+validate_vec_init_h (uword num_elts, uword hdr_bytes)
+{
+ uword i = 0;
+ u8 *ptr;
+ u8 *end;
+ uword len;
+ elt_t *new;
+
+ new = vec_new_ha (elt_t, num_elts, hdr_bytes, 0);
+ len = vec_len (new);
+
+ ASSERT (len == num_elts);
+
+ /* We have 2 zero-regions to check: header & vec data (skip _VEC struct). */
+ for (i = 0; i < 2; i++)
+ {
+ if (i == 0)
+ {
+ ptr = (u8 *) vec_header (new, hdr_bytes);
+ end = ptr + hdr_bytes;
+ }
+ else
+ {
+ ptr = (u8 *) new;
+ end = (u8 *) (new + len);
+ }
+
+ while (ptr != end)
+ {
+ ASSERT (ptr < (u8 *) vec_end (new));
+ ASSERT (ptr[0] == 0);
+ ptr++;
+ }
+ }
+
+ validate_vec (new, 1);
+ return new;
+}
+
+/* XXX - I don't understand the purpose of the vec_clone() call. */
+static elt_t *
+validate_vec_clone (elt_t * vec)
+{
+ elt_t *new;
+
+ vec_clone (new, vec);
+
+ ASSERT (vec_len (new) == vec_len (vec));
+ ASSERT (compute_vec_hash (0, new) == 0);
+ validate_vec (new, 0);
+ return new;
+}
+
+static elt_t *
+validate_vec_append (elt_t * vec)
+{
+ elt_t *new;
+ uword num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ uword len;
+ u8 hash = 0;
+
+ new = create_random_vec (elt_t, num_elts, g_seed);
+
+ len = vec_len (vec) + vec_len (new);
+ hash = compute_vec_hash (0, vec);
+ hash = compute_vec_hash (hash, new);
+
+ vec_append (vec, new);
+ vec_free (new);
+
+ ASSERT (vec_len (vec) == len);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static elt_t *
+validate_vec_prepend (elt_t * vec)
+{
+ elt_t *new;
+ uword num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ uword len;
+ u8 hash = 0;
+
+ new = create_random_vec (elt_t, num_elts, g_seed);
+
+ len = vec_len (vec) + vec_len (new);
+ hash = compute_vec_hash (0, vec);
+ hash = compute_vec_hash (hash, new);
+
+ vec_prepend (vec, new);
+ vec_free (new);
+
+ ASSERT (vec_len (vec) == len);
+ ASSERT (compute_vec_hash (hash, vec) == 0);
+ validate_vec (vec, 0);
+ return vec;
+}
+
+static void
+run_validator_wh (uword iter)
+{
+ elt_t *vec;
+ uword i;
+ uword op;
+ uword num_elts;
+ uword len;
+ uword dump_time;
+ f64 time[3]; /* [0]: start, [1]: last, [2]: current */
+
+ vec = create_random_vec_wh (elt_t, ~0, sizeof (hdr_t), g_seed);
+ validate_vec (vec, 0);
+ VERBOSE2 ("Start with len %d\n", vec_len (vec));
+
+ time[0] = unix_time_now ();
+ time[1] = time[0];
+ dump_time = g_dump_period;
+
+ for (i = 1; i <= iter; i++)
+ {
+ if (i >= g_set_verbose_at)
+ g_verbose = 2;
+
+ op = bounded_random_u32 (&g_seed, 0, vec_len (g_prob_wh) - 1);
+ op = g_prob_wh[op];
+
+ switch (op)
+ {
+ case OP_IS_VEC_INIT_H:
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ vec_free_h (vec, sizeof (hdr_t));
+ VERBOSE2 ("vec_init_h(), new elts %d\n", num_elts);
+ vec = validate_vec_init_h (num_elts, sizeof (hdr_t));
+ break;
+
+ case OP_IS_VEC_RESIZE_H:
+ len = vec_len (vec);
+ num_elts = bounded_random_u32 (&g_seed, len, len + MAX_CHANGE);
+ VERBOSE2 ("vec_resize_h(), %d new elts.\n", num_elts);
+ vec = validate_vec_resize_h (vec, num_elts, sizeof (hdr_t));
+ break;
+
+ case OP_IS_VEC_FREE_H:
+ VERBOSE2 ("vec_free_h()\n");
+ vec = validate_vec_free_h (vec, sizeof (hdr_t));
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ g_call_stats[op]++;
+
+ if (i == dump_time)
+ {
+ time[2] = unix_time_now ();
+ VERBOSE1 ("%d vec ops in %f secs. (last %d in %f secs.).\n",
+ i, time[2] - time[0], g_dump_period, time[2] - time[1]);
+ time[1] = time[2];
+ dump_time += g_dump_period;
+
+ VERBOSE1 ("vec len %d\n", vec_len (vec));
+ VERBOSE2 ("%U\n\n",
+ format_hex_bytes, vec, vec_len (vec) * sizeof (vec[0]));
+ }
+
+ VERBOSE2 ("len %d\n", vec_len (vec));
+ }
+
+ validate_vec (vec, sizeof (hdr_t));
+ vec_free_h (vec, sizeof (hdr_t));
+}
+
+static void
+run_validator (uword iter)
+{
+ elt_t *vec;
+ elt_t *new;
+ uword i;
+ uword op;
+ uword num_elts;
+ uword index;
+ uword len;
+ uword dump_time;
+ f64 time[3]; /* [0]: start, [1]: last, [2]: current */
+
+ vec = create_random_vec (elt_t, ~0, g_seed);
+ validate_vec (vec, 0);
+ VERBOSE2 ("Start with len %d\n", vec_len (vec));
+
+ time[0] = unix_time_now ();
+ time[1] = time[0];
+ dump_time = g_dump_period;
+
+ for (i = 1; i <= iter; i++)
+ {
+ if (i >= g_set_verbose_at)
+ g_verbose = 2;
+
+ op = bounded_random_u32 (&g_seed, 0, vec_len (g_prob) - 1);
+ op = g_prob[op];
+
+ switch (op)
+ {
+ case OP_IS_VEC_RESIZE:
+ len = vec_len (vec);
+ num_elts = bounded_random_u32 (&g_seed, len, len + MAX_CHANGE);
+ VERBOSE2 ("vec_resize(), %d new elts.\n", num_elts);
+ vec = validate_vec_resize (vec, num_elts);
+ break;
+
+ case OP_IS_VEC_ADD1:
+ VERBOSE2 ("vec_add1()\n");
+ vec = validate_vec_add1 (vec);
+ break;
+
+ case OP_IS_VEC_ADD2:
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ VERBOSE2 ("vec_add2(), %d new elts.\n", num_elts);
+ vec = validate_vec_add2 (vec, num_elts);
+ break;
+
+ case OP_IS_VEC_ADD:
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ VERBOSE2 ("vec_add(), %d new elts.\n", num_elts);
+ vec = validate_vec_add (vec, num_elts);
+ break;
+
+ case OP_IS_VEC_INSERT:
+ len = vec_len (vec);
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ index = bounded_random_u32 (&g_seed, 0,
+ (len > 0) ? (len - 1) : (0));
+ VERBOSE2 ("vec_insert(), %d new elts, index %d.\n", num_elts,
+ index);
+ vec = validate_vec_insert (vec, num_elts, index);
+ break;
+
+ case OP_IS_VEC_INSERT_ELTS:
+ len = vec_len (vec);
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ index = bounded_random_u32 (&g_seed, 0,
+ (len > 0) ? (len - 1) : (0));
+ VERBOSE2 ("vec_insert_elts(), %d new elts, index %d.\n",
+ num_elts, index);
+ vec = validate_vec_insert_elts (vec, num_elts, index);
+ break;
+
+ case OP_IS_VEC_DELETE:
+ len = vec_len (vec);
+ index = bounded_random_u32 (&g_seed, 0, len - 1);
+ num_elts = bounded_random_u32 (&g_seed, 0,
+ (len > index) ? (len - index) : (0));
+ VERBOSE2 ("vec_delete(), %d elts, index %d.\n", num_elts, index);
+ vec = validate_vec_delete (vec, num_elts, index);
+ break;
+
+ case OP_IS_VEC_DUP:
+ VERBOSE2 ("vec_dup()\n");
+ new = validate_vec_dup (vec);
+ vec_free (new);
+ break;
+
+ case OP_IS_VEC_IS_EQUAL:
+ VERBOSE2 ("vec_is_equal()\n");
+ validate_vec_is_equal (vec);
+ break;
+
+ case OP_IS_VEC_ZERO:
+ VERBOSE2 ("vec_zero()\n");
+ vec = validate_vec_zero (vec);
+ break;
+
+ case OP_IS_VEC_SET:
+ VERBOSE2 ("vec_set()\n");
+ vec = validate_vec_set (vec);
+ break;
+
+ case OP_IS_VEC_VALIDATE:
+ len = vec_len (vec);
+ index = bounded_random_u32 (&g_seed, 0, len - 1 + MAX_CHANGE);
+ VERBOSE2 ("vec_validate(), index %d\n", index);
+ vec = validate_vec_validate (vec, index);
+ break;
+
+ case OP_IS_VEC_FREE:
+ VERBOSE2 ("vec_free()\n");
+ vec = validate_vec_free (vec);
+ break;
+
+ case OP_IS_VEC_INIT:
+ num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
+ vec_free (vec);
+ VERBOSE2 ("vec_init(), new elts %d\n", num_elts);
+ vec = validate_vec_init (num_elts);
+ break;
+
+ case OP_IS_VEC_CLONE:
+ VERBOSE2 ("vec_clone()\n");
+ new = validate_vec_clone (vec);
+ vec_free (new);
+ break;
+
+ case OP_IS_VEC_APPEND:
+ VERBOSE2 ("vec_append()\n");
+ vec = validate_vec_append (vec);
+ break;
+
+ case OP_IS_VEC_PREPEND:
+ VERBOSE2 ("vec_prepend()\n");
+ vec = validate_vec_prepend (vec);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+
+ g_call_stats[op]++;
+
+ if (i == dump_time)
+ {
+ time[2] = unix_time_now ();
+ VERBOSE1 ("%d vec ops in %f secs. (last %d in %f secs.).\n",
+ i, time[2] - time[0], g_dump_period, time[2] - time[1]);
+ time[1] = time[2];
+ dump_time += g_dump_period;
+
+ VERBOSE1 ("vec len %d\n", vec_len (vec));
+ VERBOSE2 ("%U\n\n",
+ format_hex_bytes, vec, vec_len (vec) * sizeof (vec[0]));
+ }
+
+ VERBOSE2 ("len %d\n", vec_len (vec));
+ }
+
+ validate_vec (vec, 0);
+ vec_free (vec);
+}
+
+static void
+prob_init (void)
+{
+ uword i, j, ratio, len, index;
+
+ /* Create the vector to implement the statistical profile:
+ vec [ op1 op1 op1 op2 op3 op3 op3 op4 op4 .... ] */
+ for (i = FIRST_VEC_OP; i <= LAST_VEC_OP; i++)
+ {
+ ratio = g_prob_ratio[i];
+ if (ratio <= 0)
+ continue;
+
+ len = vec_len (g_prob);
+ index = len - 1 + ratio;
+ ASSERT (index >= 0);
+
+ /* Pre-allocate new elements. */
+ vec_validate (g_prob, index);
+
+ for (j = len; j <= index; j++)
+ g_prob[j] = i;
+ }
+
+ /* Operations on vectors with headers. */
+ for (i = FIRST_VEC_HDR_OP; i <= LAST_VEC_HDR_OP; i++)
+ {
+ ratio = g_prob_ratio[i];
+ if (ratio <= 0)
+ continue;
+
+ len = vec_len (g_prob_wh);
+ index = len - 1 + ratio;
+ ASSERT (index >= 0);
+
+ /* Pre-allocate new elements. */
+ vec_validate (g_prob_wh, index);
+
+ for (j = len; j <= index; j++)
+ g_prob_wh[j] = i;
+ }
+
+ VERBOSE3 ("prob_vec, len %d\n%U\n", vec_len (g_prob),
+ format_hex_bytes, g_prob, vec_len (g_prob) * sizeof (g_prob[0]));
+ VERBOSE3 ("prob_vec_wh, len %d\n%U\n", vec_len (g_prob_wh),
+ format_hex_bytes, g_prob_wh,
+ vec_len (g_prob_wh) * sizeof (g_prob_wh[0]));
+}
+
+static void
+prob_free (void)
+{
+ vec_free (g_prob);
+ vec_free (g_prob_wh);
+}
+
+int
+test_vec_main (unformat_input_t * input)
+{
+ uword iter = 1000;
+ uword help = 0;
+ uword big = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &iter)
+ && 0 == unformat (input, "seed %d", &g_seed)
+ && 0 == unformat (input, "verbose %d", &g_verbose)
+ && 0 == unformat (input, "set %d", &g_set_verbose_at)
+ && 0 == unformat (input, "dump %d", &g_dump_period)
+ && 0 == unformat (input, "help %=", &help, 1)
+ && 0 == unformat (input, "big %=", &big, 1))
+ {
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ goto usage;
+ }
+ }
+
+ if (big)
+ {
+ u8 *bigboy = 0;
+ u64 one_gig = (1 << 30);
+ u64 size;
+ u64 index;
+
+ fformat (stdout, "giant vector test...");
+ size = 5ULL * one_gig;
+
+ vec_validate (bigboy, size);
+
+ for (index = size; index >= 0; index--)
+ bigboy[index] = index & 0xff;
+ return 0;
+ }
+
+
+ if (help)
+ goto usage;
+
+ prob_init ();
+ run_validator (iter);
+ run_validator_wh (iter);
+ if (verbose)
+ dump_call_stats (g_call_stats);
+ prob_free ();
+
+ if (verbose)
+ {
+ memory_snap ();
+ }
+ return 0;
+
+usage:
+ fformat (stdout, "Usage: test_vec iter <N> seed <N> verbose <N> "
+ "set <N> dump <N>\n");
+ if (help)
+ return 0;
+
+ return -1;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ mheap_alloc (0, (uword) 10ULL << 30);
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_vec_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_vec.h b/src/vppinfra/test_vec.h
new file mode 100644
index 00000000..28e8e2a0
--- /dev/null
+++ b/src/vppinfra/test_vec.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_test_vec_h
+#define included_test_vec_h
+
+
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+
+extern uword g_verbose;
+extern u32 g_seed;
+
+always_inline u8 *
+format_u32_binary (u8 * s, va_list * va)
+{
+ u32 val = va_arg (*va, u32);
+ word i = 0;
+
+ for (i = BITS (val) - 1; i >= 0; i--)
+ {
+ if (val & (1 << i))
+ s = format (s, "1");
+ else
+ s = format (s, "0");
+ }
+
+ return s;
+}
+
+#define VERBOSE1(fmt, args...) \
+do { \
+ if (g_verbose >= 1) \
+ fformat (stdout, fmt, ## args); \
+} while (0)
+
+#define VERBOSE2(fmt, args...) \
+do { \
+ if (g_verbose >= 2) \
+ fformat (stdout, fmt, ## args); \
+} while (0)
+
+#define VERBOSE3(fmt, args...) \
+do { \
+ if (g_verbose >= 3) \
+ fformat (stdout, fmt, ## args); \
+} while (0)
+
+#define clib_mem_free_safe(p) \
+do { \
+ if (p) \
+ { \
+ clib_mem_free (p); \
+ (p) = NULL; \
+ } \
+} while (0)
+
+/* XXX - I get undefined symbol trying to call random_u32() <vppinfra/random.h> */
+/* Simple random number generator with period 2^31 - 1. */
+static u32
+my_random_u32 (u32 * seed_return)
+{
+ /* Unlikely mask value to XOR into seed.
+ Otherwise small seed values would give
+ non-random seeming smallish numbers. */
+ const u32 mask = 0x12345678;
+ u32 seed, a, b, result;
+
+ seed = *seed_return;
+ seed ^= mask;
+
+ a = seed / 127773;
+ b = seed % 127773;
+ seed = 16807 * b - 2836 * a;
+
+ if ((i32) seed < 0)
+ seed += ((u32) 1 << 31) - 1;
+
+ result = seed;
+
+ *seed_return = seed ^ mask;
+
+ return result;
+}
+
+static u32
+bounded_random_u32 (u32 * seed, uword lo, uword hi)
+{
+ if (lo == hi)
+ return lo;
+
+ ASSERT (lo < hi);
+
+ return ((my_random_u32 (seed) % (hi - lo + ((hi != ~0) ? (1) : (0)))) + lo);
+}
+
+#define fill_with_random_data(ptr, bytes, seed) \
+do { \
+ u8 * _v(p) = (u8 *) (ptr); \
+ uword _v(b) = (bytes); \
+ uword _v(i); \
+ \
+ for (_v(i) = 0; _v(i) < _v(b); _v(i)++) \
+ _v(p)[_v(i)] = (u8) bounded_random_u32 (&(seed), 0, 255); \
+ \
+} while (0)
+
+#define compute_mem_hash(hash, ptr, bytes) \
+({ \
+ u8 * _v(p) = (u8 *) (ptr); \
+ uword _v(b) = (uword) (bytes); \
+ uword _v(i); \
+ uword _v(h) = (u8) (hash); \
+ \
+ if (_v(p) && _v(b) > 0) \
+ { \
+ for (_v(i) = 0; _v(i) < _v(b); _v(i)++) \
+ _v(h) ^= _v(p)[_v(i)]; \
+ } \
+ \
+ _v(h); \
+})
+
+#define log2_align_down(value, align) \
+({ \
+ uword _v = (uword) (value); \
+ uword _a = (uword) (align); \
+ uword _m = (1 << _a) - 1; \
+ \
+ _v = _v & ~_m; \
+})
+
+#define log2_align_up(value, align) \
+({ \
+ uword _v = (uword) (value); \
+ uword _a = (uword) (align); \
+ uword _m = (1 << _a) - 1; \
+ \
+ _v = (_v + _m) & ~_m; \
+})
+
+#define log2_align_ptr_down(ptr, align) \
+uword_to_pointer (log2_align_down (pointer_to_uword (ptr), align), void *)
+
+#define log2_align_ptr_up(ptr, align) \
+uword_to_pointer (log2_align_up (pointer_to_uword (ptr), align), void *)
+
+#define MAX_LOG2_ALIGN 6
+#define MAX_UNALIGN_OFFSET ((1 << MAX_LOG2_ALIGN) - 1)
+
+/* Allocates pointer to memory whose address is:
+ addr = <log2_align>-aligned address */
+always_inline void *
+alloc_aligned (uword size, uword log2_align, void **ptr_to_free)
+{
+ void *p;
+
+ if (size <= 0)
+ return NULL;
+
+ p = (void *) clib_mem_alloc (size + (1 << log2_align) - 1);
+
+ if (ptr_to_free)
+ *ptr_to_free = p;
+
+ return (p) ? log2_align_ptr_up (p, log2_align) : (NULL);
+}
+
+/* Allocates pointer to memory whose address is:
+ addr = MAX_LOG2_ALIGN-aligned address + <offset> */
+always_inline void *
+alloc_unaligned (uword size, uword offset, void **ptr_to_free)
+{
+ void *p;
+
+ if (size <= 0)
+ return NULL;
+
+ ASSERT (offset <= MAX_UNALIGN_OFFSET);
+
+ p =
+ alloc_aligned (size + (1 << MAX_LOG2_ALIGN), MAX_LOG2_ALIGN, ptr_to_free);
+
+ if (!p)
+ return NULL;
+
+ return (void *) ((u8 *) p + (offset % MAX_UNALIGN_OFFSET));
+}
+
+#define memory_snap() \
+do { \
+ clib_mem_usage_t _usage = { 0 }; \
+ clib_mem_usage (&_usage); \
+ fformat (stdout, "%U\n", format_clib_mem_usage, _usage, 0); \
+} while (0)
+
+
+#endif /* included_test_vec_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_vhash.c b/src/vppinfra/test_vhash.c
new file mode 100644
index 00000000..7293fdde
--- /dev/null
+++ b/src/vppinfra/test_vhash.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2010 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#if 0
+#ifdef __OPTIMIZE__
+#undef CLIB_DEBUG
+#endif
+#endif
+
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/os.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+#include <vppinfra/vhash.h>
+
+#ifdef CLIB_HAVE_VEC128
+
+typedef struct
+{
+ u32 n_iter;
+ u32 seed;
+ u32 verbose;
+ u32 n_keys;
+ u32 log2_size;
+ u32 n_key_u32;
+
+ u32 n_vectors_div_4;
+ u32 n_vectors_mod_4;
+
+ u32 *keys;
+ u32 *results;
+
+ u32 *vhash_get_key_indices;
+ u32 *vhash_get_results;
+
+ u32 *vhash_key_indices;
+ u32 *vhash_results;
+
+ vhash_t vhash;
+
+ uword **key_hash;
+
+ struct
+ {
+ u64 n_clocks;
+ u64 n_vectors;
+ u64 n_calls;
+ } get_stats, set_stats, unset_stats;
+} test_vhash_main_t;
+
+always_inline u32
+test_vhash_key_gather (void *_tm, u32 vi, u32 wi, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ ASSERT (n_key_u32s == tm->n_key_u32);
+ ASSERT (wi < n_key_u32s);
+ vi = vec_elt (tm->vhash_key_indices, vi);
+ return vec_elt (tm->keys, vi * n_key_u32s + wi);
+}
+
+always_inline u32x4
+test_vhash_4key_gather (void *_tm, u32 vi, u32 wi, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ u32 *p;
+ u32x4_union_t x;
+
+ ASSERT (n_key_u32s == tm->n_key_u32);
+ ASSERT (wi < n_key_u32s);
+
+ p = vec_elt_at_index (tm->vhash_key_indices, vi + 0);
+ x.as_u32[0] = tm->keys[p[0] * n_key_u32s + wi];
+ x.as_u32[1] = tm->keys[p[1] * n_key_u32s + wi];
+ x.as_u32[2] = tm->keys[p[2] * n_key_u32s + wi];
+ x.as_u32[3] = tm->keys[p[3] * n_key_u32s + wi];
+ return x.as_u32x4;
+}
+
+always_inline u32
+test_vhash_get_result (void *_tm,
+ u32 vector_index, u32 result_index, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ u32 *p = vec_elt_at_index (tm->vhash_results, vector_index);
+ p[0] = result_index;
+ return result_index;
+}
+
+always_inline u32x4
+test_vhash_get_4result (void *_tm,
+ u32 vector_index, u32x4 results, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ u32 *p = vec_elt_at_index (tm->vhash_results, vector_index);
+ *(u32x4 *) p = results;
+ return results;
+}
+
+always_inline u32
+test_vhash_set_result (void *_tm,
+ u32 vector_index, u32 old_result, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ u32 *p = vec_elt_at_index (tm->vhash_results, vector_index);
+ u32 new_result = p[0];
+ p[0] = old_result;
+ return new_result;
+}
+
+always_inline u32
+test_vhash_unset_result (void *_tm, u32 i, u32 old_result, u32 n_key_u32s)
+{
+ test_vhash_main_t *tm = _tm;
+ u32 *p = vec_elt_at_index (tm->vhash_results, i);
+ p[0] = old_result;
+ return 0;
+}
+
+#define _(N_KEY_U32) \
+ always_inline u32 \
+ test_vhash_key_gather_##N_KEY_U32 (void * _tm, u32 vi, u32 i) \
+ { return test_vhash_key_gather (_tm, vi, i, N_KEY_U32); } \
+ \
+ always_inline u32x4 \
+ test_vhash_key_gather_4_##N_KEY_U32 (void * _tm, u32 vi, u32 i) \
+ { return test_vhash_4key_gather (_tm, vi, i, N_KEY_U32); } \
+ \
+ clib_pipeline_stage \
+ (test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_gather_4key_stage \
+ (&tm->vhash, \
+ /* vector_index */ i, \
+ test_vhash_key_gather_4_##N_KEY_U32, \
+ tm, \
+ N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_gather_key_stage \
+ (&tm->vhash, \
+ /* vector_index */ tm->n_vectors_div_4, \
+ /* n_vectors */ tm->n_vectors_mod_4, \
+ test_vhash_key_gather_##N_KEY_U32, \
+ tm, \
+ N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage \
+ (test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_finalize_stage (&tm->vhash, i, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_finalize_stage (&tm->vhash, tm->n_vectors_div_4, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage \
+ (test_vhash_get_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_get_4_stage (&tm->vhash, \
+ /* vector_index */ i, \
+ test_vhash_get_4result, \
+ tm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_get_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_get_stage (&tm->vhash, \
+ /* vector_index */ tm->n_vectors_div_4, \
+ /* n_vectors */ tm->n_vectors_mod_4, \
+ test_vhash_get_result, \
+ tm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage \
+ (test_vhash_set_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_set_stage (&tm->vhash, \
+ /* vector_index */ i, \
+ /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \
+ test_vhash_set_result, \
+ tm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_set_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_set_stage (&tm->vhash, \
+ /* vector_index */ tm->n_vectors_div_4, \
+ /* n_vectors */ tm->n_vectors_mod_4, \
+ test_vhash_set_result, \
+ tm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage \
+ (test_vhash_unset_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_unset_stage (&tm->vhash, \
+ /* vector_index */ i, \
+ /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \
+ test_vhash_unset_result, \
+ tm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_unset_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_unset_stage (&tm->vhash, \
+ /* vector_index */ tm->n_vectors_div_4, \
+ /* n_vectors */ tm->n_vectors_mod_4, \
+ test_vhash_unset_result, \
+ tm, N_KEY_U32); \
+ })
+
+_(1);
+_(2);
+_(3);
+_(4);
+_(5);
+_(6);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ clib_pipeline_stage \
+ (test_vhash_hash_mix_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_mix_stage (&tm->vhash, i, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (test_vhash_hash_mix_mod_stage_##N_KEY_U32, \
+ test_vhash_main_t *, tm, i, \
+ { \
+ vhash_mix_stage (&tm->vhash, tm->n_vectors_div_4, N_KEY_U32); \
+ })
+
+_(4);
+_(5);
+_(6);
+
+#undef _
+
+typedef enum
+{
+ GET, SET, UNSET,
+} test_vhash_op_t;
+
+static void
+test_vhash_op (test_vhash_main_t * tm,
+ u32 * key_indices,
+ u32 * results, uword n_keys, test_vhash_op_t op)
+{
+ vhash_validate_sizes (&tm->vhash, tm->n_key_u32, n_keys);
+
+ tm->vhash_results = results;
+ tm->vhash_key_indices = key_indices;
+ tm->n_vectors_div_4 = n_keys / 4;
+ tm->n_vectors_mod_4 = n_keys % 4;
+
+ if (tm->n_vectors_div_4 > 0)
+ {
+ switch (tm->n_key_u32)
+ {
+ default:
+ ASSERT (0);
+ break;
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_3_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_get_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_3_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_set_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_3_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_unset_stage_##N_KEY_U32); \
+ break;
+
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_4_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_get_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_4_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_set_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_4_stage \
+ (tm->n_vectors_div_4, \
+ tm, \
+ test_vhash_gather_keys_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_stage_##N_KEY_U32, \
+ test_vhash_unset_stage_##N_KEY_U32); \
+ break;
+
+ _(4);
+ _(5);
+ _(6);
+
+#undef _
+ }
+ }
+
+
+ if (tm->n_vectors_mod_4 > 0)
+ {
+ switch (tm->n_key_u32)
+ {
+ default:
+ ASSERT (0);
+ break;
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_3_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_get_mod_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_3_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_set_mod_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_3_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_unset_mod_stage_##N_KEY_U32); \
+ break;
+
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_4_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_get_mod_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_4_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_set_mod_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_4_stage \
+ (1, \
+ tm, \
+ test_vhash_gather_keys_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_mix_mod_stage_##N_KEY_U32, \
+ test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \
+ test_vhash_unset_mod_stage_##N_KEY_U32); \
+ break;
+
+ _(4);
+ _(5);
+ _(6);
+
+#undef _
+ }
+ }
+}
+
+int
+test_vhash_main (unformat_input_t * input)
+{
+ clib_error_t *error = 0;
+ test_vhash_main_t _tm, *tm = &_tm;
+ vhash_t *vh = &tm->vhash;
+ uword i, j;
+
+ memset (tm, 0, sizeof (tm[0]));
+ tm->n_iter = 100;
+ tm->seed = 1;
+ tm->n_keys = 1;
+ tm->n_key_u32 = 1;
+ tm->log2_size = 8;
+ tm->verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "iter %d", &tm->n_iter))
+ ;
+ else if (unformat (input, "seed %d", &tm->seed))
+ ;
+ else if (unformat (input, "n-keys %d", &tm->n_keys))
+ ;
+ else if (unformat (input, "log2-size %d", &tm->log2_size))
+ ;
+ else if (unformat (input, "key-words %d", &tm->n_key_u32))
+ ;
+ else if (unformat (input, "verbose %=", &tm->verbose, 1))
+ ;
+ else
+ {
+ error = clib_error_create ("unknown input `%U'\n",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (tm->seed == 0)
+ tm->seed = random_default_seed ();
+
+ clib_warning ("iter %d seed %d n-keys %d log2-size %d key-words %d",
+ tm->n_iter, tm->seed, tm->n_keys, tm->log2_size,
+ tm->n_key_u32);
+
+ {
+ u32 seeds[3];
+ seeds[0] = seeds[1] = seeds[2] = 0xdeadbeef;
+ vhash_init (vh, tm->log2_size, tm->n_key_u32, seeds);
+ }
+
+ /* Choose unique keys. */
+ vec_resize (tm->keys, tm->n_keys * tm->n_key_u32);
+ vec_resize (tm->key_hash, tm->n_key_u32);
+ for (i = j = 0; i < vec_len (tm->keys); i++, j++)
+ {
+ j = j == tm->n_key_u32 ? 0 : j;
+ do
+ {
+ tm->keys[i] = random_u32 (&tm->seed);
+ }
+ while (hash_get (tm->key_hash[j], tm->keys[i]));
+ hash_set (tm->key_hash[j], tm->keys[i], 0);
+ }
+
+ vec_resize (tm->results, tm->n_keys);
+ for (i = 0; i < vec_len (tm->results); i++)
+ {
+ do
+ {
+ tm->results[i] = random_u32 (&tm->seed);
+ }
+ while (tm->results[i] == ~0);
+ }
+
+ vec_resize_aligned (tm->vhash_get_results, tm->n_keys,
+ CLIB_CACHE_LINE_BYTES);
+ vec_clone (tm->vhash_get_key_indices, tm->results);
+ for (i = 0; i < vec_len (tm->vhash_get_key_indices); i++)
+ tm->vhash_get_key_indices[i] = i;
+
+ {
+ uword *is_set_bitmap = 0;
+ uword *to_set_bitmap = 0;
+ uword *to_unset_bitmap = 0;
+ u32 *to_set = 0, *to_unset = 0;
+ u32 *to_set_results = 0, *to_unset_results = 0;
+ u64 t[2];
+
+ for (i = 0; i < tm->n_iter; i++)
+ {
+ vec_reset_length (to_set);
+ vec_reset_length (to_unset);
+ vec_reset_length (to_set_results);
+ vec_reset_length (to_unset_results);
+
+ do
+ {
+ to_set_bitmap = clib_bitmap_random (to_set_bitmap,
+ tm->n_keys, &tm->seed);
+ }
+ while (clib_bitmap_is_zero (to_set_bitmap));
+ to_unset_bitmap = clib_bitmap_dup_and (to_set_bitmap, is_set_bitmap);
+ to_set_bitmap = clib_bitmap_andnot (to_set_bitmap, to_unset_bitmap);
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (j, to_set_bitmap, ({
+ vec_add1 (to_set, j);
+ vec_add1 (to_set_results, tm->results[j]);
+ }));
+ /* *INDENT-ON* */
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (j, to_unset_bitmap, ({
+ vec_add1 (to_unset, j);
+ vec_add1 (to_unset_results, 0xdeadbeef);
+ }));
+ /* *INDENT-ON* */
+
+ if (vec_len (to_set) > 0)
+ {
+ t[0] = clib_cpu_time_now ();
+ test_vhash_op (tm, to_set, to_set_results, vec_len (to_set), SET);
+ t[1] = clib_cpu_time_now ();
+ tm->set_stats.n_clocks += t[1] - t[0];
+ tm->set_stats.n_vectors += vec_len (to_set);
+ tm->set_stats.n_calls += 1;
+ is_set_bitmap = clib_bitmap_or (is_set_bitmap, to_set_bitmap);
+ }
+
+ t[0] = clib_cpu_time_now ();
+ test_vhash_op (tm, tm->vhash_get_key_indices,
+ tm->vhash_get_results,
+ vec_len (tm->vhash_get_key_indices), GET);
+ t[1] = clib_cpu_time_now ();
+ tm->get_stats.n_clocks += t[1] - t[0];
+ tm->get_stats.n_vectors += vec_len (tm->vhash_get_key_indices);
+ tm->get_stats.n_calls += 1;
+
+ for (j = 0; j < vec_len (tm->vhash_get_results); j++)
+ {
+ u32 r0 = tm->vhash_get_results[j];
+ u32 r1 = tm->results[j];
+ if (clib_bitmap_get (is_set_bitmap, j))
+ {
+ if (r0 != r1)
+ os_panic ();
+ }
+ else
+ {
+ if (r0 != ~0)
+ os_panic ();
+ }
+ }
+
+ if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap))
+ os_panic ();
+
+ if (vec_len (to_unset) > 0)
+ {
+ t[0] = clib_cpu_time_now ();
+ test_vhash_op (tm, to_unset, to_unset_results,
+ vec_len (to_unset), UNSET);
+ t[1] = clib_cpu_time_now ();
+ tm->unset_stats.n_clocks += t[1] - t[0];
+ tm->unset_stats.n_vectors += vec_len (to_unset);
+ tm->unset_stats.n_calls += 1;
+ is_set_bitmap =
+ clib_bitmap_andnot (is_set_bitmap, to_unset_bitmap);
+ }
+
+ t[0] = clib_cpu_time_now ();
+ test_vhash_op (tm, tm->vhash_get_key_indices,
+ tm->vhash_get_results,
+ vec_len (tm->vhash_get_key_indices), GET);
+ t[1] = clib_cpu_time_now ();
+ tm->get_stats.n_clocks += t[1] - t[0];
+ tm->get_stats.n_vectors += vec_len (tm->vhash_get_key_indices);
+ tm->get_stats.n_calls += 1;
+
+ for (j = 0; j < vec_len (tm->vhash_get_results); j++)
+ {
+ u32 r0 = tm->vhash_get_results[j];
+ u32 r1 = tm->results[j];
+ if (clib_bitmap_get (is_set_bitmap, j))
+ {
+ if (r0 != r1)
+ os_panic ();
+ }
+ else
+ {
+ if (r0 != ~0)
+ os_panic ();
+ }
+ }
+
+ if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap))
+ os_panic ();
+ }
+
+ vhash_resize (vh, tm->log2_size + 1);
+
+ test_vhash_op (tm, tm->vhash_get_key_indices,
+ tm->vhash_get_results,
+ vec_len (tm->vhash_get_key_indices), GET);
+
+ for (j = 0; j < vec_len (tm->vhash_get_results); j++)
+ {
+ u32 r0 = tm->vhash_get_results[j];
+ u32 r1 = tm->results[j];
+ if (clib_bitmap_get (is_set_bitmap, j))
+ {
+ if (r0 != r1)
+ os_panic ();
+ }
+ else
+ {
+ if (r0 != ~0)
+ os_panic ();
+ }
+ }
+
+ if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap))
+ os_panic ();
+ }
+
+ {
+ clib_time_t ct;
+
+ clib_time_init (&ct);
+
+ clib_warning ("%.4e clocks/get %.4e gets/call %.4e gets/sec",
+ (f64) tm->get_stats.n_clocks /
+ (f64) tm->get_stats.n_vectors,
+ (f64) tm->get_stats.n_vectors / (f64) tm->get_stats.n_calls,
+ (f64) tm->get_stats.n_vectors /
+ (f64) (tm->get_stats.n_clocks * ct.seconds_per_clock));
+ if (tm->set_stats.n_calls > 0)
+ clib_warning ("%.4e clocks/set %.4e sets/call %.4e sets/sec",
+ (f64) tm->set_stats.n_clocks /
+ (f64) tm->set_stats.n_vectors,
+ (f64) tm->set_stats.n_vectors /
+ (f64) tm->set_stats.n_calls,
+ (f64) tm->set_stats.n_vectors /
+ (f64) (tm->set_stats.n_clocks * ct.seconds_per_clock));
+ if (tm->unset_stats.n_calls > 0)
+ clib_warning ("%.4e clocks/unset %.4e unsets/call %.4e unsets/sec",
+ (f64) tm->unset_stats.n_clocks /
+ (f64) tm->unset_stats.n_vectors,
+ (f64) tm->unset_stats.n_vectors /
+ (f64) tm->unset_stats.n_calls,
+ (f64) tm->unset_stats.n_vectors /
+ (f64) (tm->unset_stats.n_clocks * ct.seconds_per_clock));
+ }
+
+done:
+ if (error)
+ clib_error_report (error);
+ return 0;
+}
+
+#endif /* CLIB_HAVE_VEC128 */
+
+#ifndef CLIB_HAVE_VEC128
+int
+test_vhash_main (unformat_input_t * input)
+{
+ clib_error ("compiled without vector support");
+ return 0;
+}
+#endif
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int r;
+
+ unformat_init_command_line (&i, argv);
+ r = test_vhash_main (&i);
+ unformat_free (&i);
+ return r;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/test_zvec.c b/src/vppinfra/test_zvec.c
new file mode 100644
index 00000000..874fdefa
--- /dev/null
+++ b/src/vppinfra/test_zvec.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/zvec.h>
+#include <vppinfra/format.h>
+#include <vppinfra/random.h>
+
+static int verbose;
+#define if_verbose(format,args...) \
+ if (verbose) { clib_warning(format, ## args); }
+
+int
+test_zvec_main (unformat_input_t * input)
+{
+ uword n_iterations;
+ uword i;
+ u32 seed;
+
+ n_iterations = 1024;
+ seed = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &n_iterations)
+ && 0 == unformat (input, "seed %d", &seed))
+ clib_error ("unknown input `%U'", format_unformat_error, input);
+ }
+
+ if_verbose ("%d iterations, seed %d\n", n_iterations, seed);
+
+ for (i = 0; i < n_iterations; i++)
+ {
+ uword coding, data, d[2], limit, n_zdata_bits[2];
+
+ if (seed)
+ coding = random_u32 (&seed);
+ else
+ coding = i;
+
+ limit = coding - 1;
+ if (limit > (1 << 16))
+ limit = 1 << 16;
+ for (data = 0; data <= limit; data++)
+ {
+ d[0] = zvec_encode (coding, data, &n_zdata_bits[0]);
+
+ if (coding != 0)
+ ASSERT ((d[0] >> n_zdata_bits[0]) == 0);
+
+ d[1] = zvec_decode (coding, d[0], &n_zdata_bits[1]);
+ ASSERT (data == d[1]);
+
+ ASSERT (n_zdata_bits[0] == n_zdata_bits[1]);
+ }
+ }
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_zvec_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c
new file mode 100644
index 00000000..168d7375
--- /dev/null
+++ b/src/vppinfra/time.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/os.h>
+#include <vppinfra/time.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cpu.h>
+
+#ifdef CLIB_UNIX
+
+#include <math.h>
+#include <sys/time.h>
+#include <fcntl.h>
+
+/* Not very accurate way of determining cpu clock frequency
+ for unix. Better to use /proc/cpuinfo on linux. */
+static f64
+estimate_clock_frequency (f64 sample_time)
+{
+ /* Round to nearest 100KHz. */
+ const f64 round_to_units = 100e5;
+
+ f64 time_now, time_start, time_limit, freq;
+ u64 ifreq, t[2];
+
+ time_start = time_now = unix_time_now ();
+ time_limit = time_now + sample_time;
+ t[0] = clib_cpu_time_now ();
+ while (time_now < time_limit)
+ time_now = unix_time_now ();
+ t[1] = clib_cpu_time_now ();
+
+ freq = (t[1] - t[0]) / (time_now - time_start);
+ ifreq = flt_round_nearest (freq / round_to_units);
+ freq = ifreq * round_to_units;
+
+ return freq;
+}
+
+/* Fetch cpu frequency via parseing /proc/cpuinfo.
+ Only works for Linux. */
+static f64
+clock_frequency_from_proc_filesystem (void)
+{
+ f64 cpu_freq = 1e9; /* better than 40... */
+ f64 ppc_timebase = 0; /* warnings be gone */
+ int fd;
+ unformat_input_t input;
+
+/* $$$$ aarch64 kernel doesn't report "cpu MHz" */
+#if defined(__aarch64__)
+ return 0.0;
+#endif
+
+ cpu_freq = 0;
+ fd = open ("/proc/cpuinfo", 0);
+ if (fd < 0)
+ return cpu_freq;
+
+ unformat_init_unix_file (&input, fd);
+
+ ppc_timebase = 0;
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "cpu MHz : %f", &cpu_freq))
+ cpu_freq *= 1e6;
+ else if (unformat (&input, "timebase : %f", &ppc_timebase))
+ ;
+ else
+ unformat_skip_line (&input);
+ }
+
+ unformat_free (&input);
+
+ close (fd);
+
+ /* Override CPU frequency with time base for PPC. */
+ if (ppc_timebase != 0)
+ cpu_freq = ppc_timebase;
+
+ return cpu_freq;
+}
+
+/* Fetch cpu frequency via reading /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ Only works for Linux. */
+static f64
+clock_frequency_from_sys_filesystem (void)
+{
+ f64 cpu_freq;
+ int fd;
+ unformat_input_t input;
+
+ /* Time stamp always runs at max frequency. */
+ cpu_freq = 0;
+ fd = open ("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 0);
+ if (fd < 0)
+ goto done;
+
+ unformat_init_unix_file (&input, fd);
+ unformat (&input, "%f", &cpu_freq);
+ cpu_freq *= 1e3; /* measured in kHz */
+ unformat_free (&input);
+ close (fd);
+done:
+ return cpu_freq;
+}
+
+f64
+os_cpu_clock_frequency (void)
+{
+ f64 cpu_freq;
+
+ if (clib_cpu_supports_invariant_tsc ())
+ return estimate_clock_frequency (1e-3);
+
+#if defined (__aarch64__)
+ u64 tsc;
+ asm volatile ("mrs %0, CNTFRQ_EL0":"=r" (tsc));
+ return (f64) tsc;
+#endif
+
+ /* First try /sys version. */
+ cpu_freq = clock_frequency_from_sys_filesystem ();
+ if (cpu_freq != 0)
+ return cpu_freq;
+
+ /* Next try /proc version. */
+ cpu_freq = clock_frequency_from_proc_filesystem ();
+ if (cpu_freq != 0)
+ return cpu_freq;
+
+ /* If /proc/cpuinfo fails (e.g. not running on Linux) fall back to
+ gettimeofday based estimated clock frequency. */
+ return estimate_clock_frequency (1e-3);
+}
+
+#endif /* CLIB_UNIX */
+
+/* Initialize time. */
+void
+clib_time_init (clib_time_t * c)
+{
+ memset (c, 0, sizeof (c[0]));
+ c->clocks_per_second = os_cpu_clock_frequency ();
+ c->seconds_per_clock = 1 / c->clocks_per_second;
+ c->log2_clocks_per_second = min_log2_u64 ((u64) c->clocks_per_second);
+
+ /* Initially verify frequency every sec */
+ c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second;
+
+ c->last_verify_reference_time = unix_time_now ();
+ c->last_cpu_time = clib_cpu_time_now ();
+ c->init_cpu_time = c->last_verify_cpu_time = c->last_cpu_time;
+}
+
+void
+clib_time_verify_frequency (clib_time_t * c)
+{
+ f64 now_reference = unix_time_now ();
+ f64 dtr = now_reference - c->last_verify_reference_time;
+ f64 dtr_max;
+ u64 dtc = c->last_cpu_time - c->last_verify_cpu_time;
+ f64 round_units = 100e5;
+
+ c->last_verify_cpu_time = c->last_cpu_time;
+ c->last_verify_reference_time = now_reference;
+
+ /*
+ * Is the reported reference interval non-positive,
+ * or off by a factor of two - or 8 seconds - whichever is larger?
+ * Someone reset the clock behind our back.
+ */
+ dtr_max = (f64) (2ULL << c->log2_clocks_per_frequency_verify) /
+ (f64) (1ULL << c->log2_clocks_per_second);
+ dtr_max = dtr_max > 8.0 ? dtr_max : 8.0;
+
+ if (dtr <= 0.0 || dtr > dtr_max)
+ {
+ c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second;
+ return;
+ }
+
+ c->clocks_per_second =
+ flt_round_nearest ((f64) dtc / (dtr * round_units)) * round_units;
+ c->seconds_per_clock = 1 / c->clocks_per_second;
+
+ /* Double time between verifies; max at 64 secs ~ 1 minute. */
+ if (c->log2_clocks_per_frequency_verify < c->log2_clocks_per_second + 6)
+ c->log2_clocks_per_frequency_verify += 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/time.h b/src/vppinfra/time.h
new file mode 100644
index 00000000..3fdc7d43
--- /dev/null
+++ b/src/vppinfra/time.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_time_h
+#define included_time_h
+
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ /* Total run time in clock cycles
+ since clib_time_init call. */
+ u64 total_cpu_time;
+
+ /* Last recorded time stamp. */
+ u64 last_cpu_time;
+
+ /* CPU clock frequency. */
+ f64 clocks_per_second;
+
+ /* 1 / cpu clock frequency: conversion factor
+ from clock cycles into seconds. */
+ f64 seconds_per_clock;
+
+ /* Time stamp of call to clib_time_init call. */
+ u64 init_cpu_time;
+
+ u64 last_verify_cpu_time;
+
+ /* Same but for reference time (if present). */
+ f64 last_verify_reference_time;
+
+ u32 log2_clocks_per_second, log2_clocks_per_frequency_verify;
+} clib_time_t;
+
+/* Return CPU time stamp as 64bit number. */
+#if defined(__x86_64__) || defined(i386)
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u32 a, d;
+ asm volatile ("rdtsc":"=a" (a), "=d" (d));
+ return (u64) a + ((u64) d << (u64) 32);
+}
+
+#elif defined (__powerpc64__)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u64 t;
+ asm volatile ("mftb %0":"=r" (t));
+ return t;
+}
+
+#elif defined (__SPU__)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+#ifdef _XLC
+ return spu_rdch (0x8);
+#else
+ return 0 /* __builtin_si_rdch (0x8) FIXME */ ;
+#endif
+}
+
+#elif defined (__powerpc__)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u32 hi1, hi2, lo;
+ asm volatile ("1:\n"
+ "mftbu %[hi1]\n"
+ "mftb %[lo]\n"
+ "mftbu %[hi2]\n"
+ "cmpw %[hi1],%[hi2]\n"
+ "bne 1b\n":[hi1] "=r" (hi1),[hi2] "=r" (hi2),[lo] "=r" (lo));
+ return (u64) lo + ((u64) hi2 << (u64) 32);
+}
+
+#elif defined (__arm__)
+#if defined(__ARM_ARCH_8A__)
+always_inline u64
+clib_cpu_time_now (void) /* We may run arm64 in aarch32 mode, to leverage 64bit counter */
+{
+ u64 tsc;
+ asm volatile ("mrrc p15, 0, %Q0, %R0, c9":"=r" (tsc));
+ return tsc;
+}
+#elif defined(__ARM_ARCH_7A__)
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u32 tsc;
+ asm volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (tsc));
+ return (u64) tsc;
+}
+#else
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u32 lo;
+ asm volatile ("mrc p15, 0, %[lo], c15, c12, 1":[lo] "=r" (lo));
+ return (u64) lo;
+}
+#endif
+
+#elif defined (__xtensa__)
+
+/* Stub for now. */
+always_inline u64
+clib_cpu_time_now (void)
+{
+ return 0;
+}
+
+#elif defined (__TMS320C6X__)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u32 l, h;
+
+ asm volatile (" dint\n"
+ " mvc .s2 TSCL,%0\n"
+ " mvc .s2 TSCH,%1\n" " rint\n":"=b" (l), "=b" (h));
+
+ return ((u64) h << 32) | l;
+}
+
+#elif defined (__aarch64__)
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u64 tsc;
+
+ /* Works on Cavium ThunderX. Other platforms: YMMV */
+ asm volatile ("mrs %0, cntvct_el0":"=r" (tsc));
+
+ return tsc;
+}
+
+#else
+#error "don't know how to read CPU time stamp"
+
+#endif
+
+void clib_time_verify_frequency (clib_time_t * c);
+
+always_inline f64
+clib_time_now_internal (clib_time_t * c, u64 n)
+{
+ u64 l = c->last_cpu_time;
+ u64 t = c->total_cpu_time;
+ t += n - l;
+ c->total_cpu_time = t;
+ c->last_cpu_time = n;
+ if (PREDICT_FALSE
+ ((c->last_cpu_time -
+ c->last_verify_cpu_time) >> c->log2_clocks_per_frequency_verify))
+ clib_time_verify_frequency (c);
+ return t * c->seconds_per_clock;
+}
+
+always_inline f64
+clib_time_now (clib_time_t * c)
+{
+ return clib_time_now_internal (c, clib_cpu_time_now ());
+}
+
+always_inline void
+clib_cpu_time_wait (u64 dt)
+{
+ u64 t_end = clib_cpu_time_now () + dt;
+ while (clib_cpu_time_now () < t_end)
+ ;
+}
+
+void clib_time_init (clib_time_t * c);
+
+#ifdef CLIB_UNIX
+
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+/* Use 64bit floating point to represent time offset from epoch. */
+always_inline f64
+unix_time_now (void)
+{
+ /* clock_gettime without indirect syscall uses GLIBC wrappers which
+ we don't want. Just the bare metal, please. */
+ struct timespec ts;
+ syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts);
+ return ts.tv_sec + 1e-9 * ts.tv_nsec;
+}
+
+/* As above but integer number of nano-seconds. */
+always_inline u64
+unix_time_now_nsec (void)
+{
+ struct timespec ts;
+ syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts);
+ return 1e9 * ts.tv_sec + ts.tv_nsec;
+}
+
+always_inline void
+unix_time_now_nsec_fraction (u32 * sec, u32 * nsec)
+{
+ struct timespec ts;
+ syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts);
+ *sec = ts.tv_sec;
+ *nsec = ts.tv_nsec;
+}
+
+always_inline f64
+unix_usage_now (void)
+{
+ struct rusage u;
+ getrusage (RUSAGE_SELF, &u);
+ return u.ru_utime.tv_sec + 1e-6 * u.ru_utime.tv_usec
+ + u.ru_stime.tv_sec + 1e-6 * u.ru_stime.tv_usec;
+}
+
+always_inline void
+unix_sleep (f64 dt)
+{
+ struct timespec t;
+ t.tv_sec = dt;
+ t.tv_nsec = 1e9 * dt;
+ nanosleep (&t, 0);
+}
+
+#else /* ! CLIB_UNIX */
+
+always_inline f64
+unix_time_now (void)
+{
+ return 0;
+}
+
+always_inline u64
+unix_time_now_nsec (void)
+{
+ return 0;
+}
+
+always_inline void
+unix_time_now_nsec_fraction (u32 * sec, u32 * nsec)
+{
+}
+
+always_inline f64
+unix_usage_now (void)
+{
+ return 0;
+}
+
+always_inline void
+unix_sleep (f64 dt)
+{
+}
+
+#endif
+
+#endif /* included_time_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/timer.c b/src/vppinfra/timer.c
new file mode 100644
index 00000000..0221cb74
--- /dev/null
+++ b/src/vppinfra/timer.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <vppinfra/vec.h>
+#include <vppinfra/time.h>
+#include <vppinfra/timer.h>
+#include <vppinfra/error.h>
+
+typedef struct
+{
+ f64 time;
+ timer_func_t *func;
+ any arg;
+} timer_callback_t;
+
+/* Vector of currently unexpired timers. */
+static timer_callback_t *timers;
+
+/* Convert time from 64bit floating format to struct timeval. */
+always_inline void
+f64_to_tv (f64 t, struct timeval *tv)
+{
+ tv->tv_sec = t;
+ tv->tv_usec = 1e6 * (t - tv->tv_sec);
+ while (tv->tv_usec >= 1000000)
+ {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec += 1;
+ }
+}
+
+/* Sort timers so that timer soonest to expire is at end. */
+static int
+timer_compare (const void *_a, const void *_b)
+{
+ const timer_callback_t *a = _a;
+ const timer_callback_t *b = _b;
+ f64 dt = b->time - a->time;
+ return dt < 0 ? -1 : (dt > 0 ? +1 : 0);
+}
+
+static inline void
+sort_timers (timer_callback_t * timers)
+{
+ qsort (timers, vec_len (timers), sizeof (timers[0]), timer_compare);
+}
+
+#define TIMER_SIGNAL SIGALRM
+
+/* Don't bother set timer if time different is less than this value. */
+/* We would like to initialize this to 0.75 / (f64) HZ,
+ * but HZ may not be a compile-time constant on some systems,
+ * so instead we do the initialization before first use.
+ */
+static f64 time_resolution;
+
+/* Interrupt handler. Call functions for all expired timers.
+ Set time for next timer interrupt. */
+static void
+timer_interrupt (int signum)
+{
+ f64 now = unix_time_now ();
+ f64 dt;
+ timer_callback_t *t;
+
+ while (1)
+ {
+ if (vec_len (timers) <= 0)
+ return;
+
+ /* Consider last (earliest) timer in reverse sorted
+ vector of pending timers. */
+ t = vec_end (timers) - 1;
+
+ ASSERT (now >= 0 && finite (now));
+
+ /* Time difference between when timer goes off and now. */
+ dt = t->time - now;
+
+ /* If timer is within threshold of going off
+ call user's callback. */
+ if (dt <= time_resolution && finite (dt))
+ {
+ _vec_len (timers) -= 1;
+ (*t->func) (t->arg, -dt);
+ }
+ else
+ {
+ /* Set timer for to go off in future. */
+ struct itimerval itv;
+ memset (&itv, 0, sizeof (itv));
+ f64_to_tv (dt, &itv.it_value);
+ if (setitimer (ITIMER_REAL, &itv, 0) < 0)
+ clib_unix_error ("sititmer");
+ return;
+ }
+ }
+}
+
+void
+timer_block (sigset_t * save)
+{
+ sigset_t block_timer;
+
+ memset (&block_timer, 0, sizeof (block_timer));
+ sigaddset (&block_timer, TIMER_SIGNAL);
+ sigprocmask (SIG_BLOCK, &block_timer, save);
+}
+
+void
+timer_unblock (sigset_t * save)
+{
+ sigprocmask (SIG_SETMASK, save, 0);
+}
+
+/* Arrange for function to be called some time,
+ roughly equal to dt seconds, in the future. */
+void
+timer_call (timer_func_t * func, any arg, f64 dt)
+{
+ timer_callback_t *t;
+ sigset_t save;
+
+ /* Install signal handler on first call. */
+ static word signal_installed = 0;
+
+ if (!signal_installed)
+ {
+ struct sigaction sa;
+
+ /* Initialize time_resolution before first call to timer_interrupt */
+ time_resolution = 0.75 / (f64) HZ;
+
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_handler = timer_interrupt;
+
+ if (sigaction (TIMER_SIGNAL, &sa, 0) < 0)
+ clib_panic ("sigaction");
+
+ signal_installed = 1;
+ }
+
+ timer_block (&save);
+
+ /* Add new timer. */
+ vec_add2 (timers, t, 1);
+
+ t->time = unix_time_now () + dt;
+ t->func = func;
+ t->arg = arg;
+
+ {
+ word reset_timer = vec_len (timers) == 1;
+
+ if (_vec_len (timers) > 1)
+ {
+ reset_timer += t->time < (t - 1)->time;
+ sort_timers (timers);
+ }
+
+ if (reset_timer)
+ timer_interrupt (TIMER_SIGNAL);
+ }
+
+ timer_unblock (&save);
+}
+
+#ifdef TEST
+
+#include <vppinfra/random.h>
+
+/* Compute average delay of function calls to foo.
+ If this is a small number over a lot of iterations we know
+ the code is working. */
+
+static f64 ave_delay = 0;
+static word ave_delay_count = 0;
+
+always_inline
+update (f64 delay)
+{
+ ave_delay += delay;
+ ave_delay_count += 1;
+}
+
+typedef struct
+{
+ f64 time_requested, time_called;
+} foo_t;
+
+static f64 foo_base_time = 0;
+static foo_t *foos = 0;
+
+void
+foo (any arg, f64 delay)
+{
+ foos[arg].time_called = unix_time_now () - foo_base_time;
+ update (delay);
+}
+
+typedef struct
+{
+ word count;
+ word limit;
+} bar_t;
+
+void
+bar (any arg, f64 delay)
+{
+ bar_t *b = (bar_t *) arg;
+
+ fformat (stdout, "bar %d delay %g\n", b->count++, delay);
+
+ update (delay);
+ if (b->count < b->limit)
+ timer_call (bar, arg, random_f64 ());
+}
+
+int
+main (int argc, char *argv[])
+{
+ word i, n = atoi (argv[1]);
+ word run_foo = argc > 2;
+bar_t b = { limit:10 };
+
+ if (run_foo)
+ {
+ f64 time_limit;
+
+ time_limit = atof (argv[2]);
+
+ vec_resize (foos, n);
+ for (i = 0; i < n; i++)
+ {
+ foos[i].time_requested = time_limit * random_f64 ();
+ foos[i].time_called = 1e100;
+ }
+
+ foo_base_time = unix_time_now ();
+ for (i = 0; i < n; i++)
+ timer_call (foo, i, foos[i].time_requested);
+ }
+ else
+ timer_call (bar, (any) & b, random_f64 ());
+
+ while (vec_len (timers) > 0)
+ sched_yield ();
+
+ if (vec_len (foos) > 0)
+ {
+ f64 min = 1e100, max = -min;
+ f64 ave = 0, rms = 0;
+
+ for (i = 0; i < n; i++)
+ {
+ f64 dt = foos[i].time_requested - foos[i].time_called;
+ if (dt < min)
+ min = dt;
+ if (dt > max)
+ max = dt;
+ ave += dt;
+ rms += dt * dt;
+ }
+ ave /= n;
+ rms = sqrt (rms / n - ave * ave);
+ fformat (stdout, "error min %g max %g ave %g +- %g\n", min, max, ave,
+ rms);
+ }
+
+ fformat (stdout, "%d function calls, ave. timer delay %g secs\n",
+ ave_delay_count, ave_delay / ave_delay_count);
+
+ return 0;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/timer.h b/src/vppinfra/timer.h
new file mode 100644
index 00000000..764103f7
--- /dev/null
+++ b/src/vppinfra/timer.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_timer_h
+#define included_timer_h
+
+#include <signal.h>
+
+typedef void (timer_func_t) (any arg, f64 delay);
+
+/* Arrange for function to be called after time interval (in seconds) has elapsed. */
+extern void timer_call (timer_func_t * func, any arg, f64 time_interval);
+
+/* Block/unblock timer interrupts. */
+extern void timer_block (sigset_t * save);
+extern void timer_unblock (sigset_t * save);
+
+#endif /* included_timer_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/timing_wheel.c b/src/vppinfra/timing_wheel.c
new file mode 100644
index 00000000..064171ab
--- /dev/null
+++ b/src/vppinfra/timing_wheel.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/bitmap.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/timing_wheel.h>
+
+void
+timing_wheel_init (timing_wheel_t * w, u64 current_cpu_time,
+ f64 cpu_clocks_per_second)
+{
+ if (w->max_sched_time <= w->min_sched_time)
+ {
+ w->min_sched_time = 1e-6;
+ w->max_sched_time = 1e-3;
+ }
+
+ w->cpu_clocks_per_second = cpu_clocks_per_second;
+ w->log2_clocks_per_bin =
+ max_log2 (w->cpu_clocks_per_second * w->min_sched_time);
+ w->log2_bins_per_wheel =
+ max_log2 (w->cpu_clocks_per_second * w->max_sched_time);
+ w->log2_bins_per_wheel -= w->log2_clocks_per_bin;
+ w->log2_clocks_per_wheel = w->log2_bins_per_wheel + w->log2_clocks_per_bin;
+ w->bins_per_wheel = 1 << w->log2_bins_per_wheel;
+ w->bins_per_wheel_mask = w->bins_per_wheel - 1;
+
+ w->current_time_index = current_cpu_time >> w->log2_clocks_per_bin;
+
+ if (w->n_wheel_elt_time_bits <= 0 ||
+ w->n_wheel_elt_time_bits >= STRUCT_BITS_OF (timing_wheel_elt_t,
+ cpu_time_relative_to_base))
+ w->n_wheel_elt_time_bits =
+ STRUCT_BITS_OF (timing_wheel_elt_t, cpu_time_relative_to_base) - 1;
+
+ w->cpu_time_base = current_cpu_time;
+ w->time_index_next_cpu_time_base_update
+ =
+ w->current_time_index +
+ ((u64) 1 << (w->n_wheel_elt_time_bits - w->log2_clocks_per_bin));
+}
+
+always_inline uword
+get_level_and_relative_time (timing_wheel_t * w, u64 cpu_time,
+ uword * rtime_result)
+{
+ u64 dt, rtime;
+ uword level_index;
+
+ dt = (cpu_time >> w->log2_clocks_per_bin);
+
+ /* Time should always move forward. */
+ ASSERT (dt >= w->current_time_index);
+
+ dt -= w->current_time_index;
+
+ /* Find level and offset within level. Level i has bins of size 2^((i+1)*M) */
+ rtime = dt;
+ for (level_index = 0; (rtime >> w->log2_bins_per_wheel) != 0; level_index++)
+ rtime = (rtime >> w->log2_bins_per_wheel) - 1;
+
+ /* Return offset within level and level index. */
+ ASSERT (rtime < w->bins_per_wheel);
+ *rtime_result = rtime;
+ return level_index;
+}
+
+always_inline uword
+time_index_to_wheel_index (timing_wheel_t * w, uword level_index, u64 ti)
+{
+ return (ti >> (level_index * w->log2_bins_per_wheel)) &
+ w->bins_per_wheel_mask;
+}
+
+/* Find current time on this level. */
+always_inline uword
+current_time_wheel_index (timing_wheel_t * w, uword level_index)
+{
+ return time_index_to_wheel_index (w, level_index, w->current_time_index);
+}
+
+/* Circular wheel indexing. */
+always_inline uword
+wheel_add (timing_wheel_t * w, word x)
+{
+ return x & w->bins_per_wheel_mask;
+}
+
+always_inline uword
+rtime_to_wheel_index (timing_wheel_t * w, uword level_index, uword rtime)
+{
+ uword t = current_time_wheel_index (w, level_index);
+ return wheel_add (w, t + rtime);
+}
+
+static clib_error_t *
+validate_level (timing_wheel_t * w, uword level_index, uword * n_elts)
+{
+ timing_wheel_level_t *level;
+ timing_wheel_elt_t *e;
+ uword wi;
+ clib_error_t *error = 0;
+
+#define _(x) \
+ do { \
+ error = CLIB_ERROR_ASSERT (x); \
+ ASSERT (! error); \
+ if (error) return error; \
+ } while (0)
+
+ level = vec_elt_at_index (w->levels, level_index);
+ for (wi = 0; wi < vec_len (level->elts); wi++)
+ {
+ /* Validate occupancy bitmap. */
+ _(clib_bitmap_get_no_check (level->occupancy_bitmap, wi) ==
+ (vec_len (level->elts[wi]) > 0));
+
+ *n_elts += vec_len (level->elts[wi]);
+
+ vec_foreach (e, level->elts[wi])
+ {
+ /* Validate time bin and level. */
+ u64 e_time;
+ uword e_ti, e_li, e_wi;
+
+ e_time = e->cpu_time_relative_to_base + w->cpu_time_base;
+ e_li = get_level_and_relative_time (w, e_time, &e_ti);
+ e_wi = rtime_to_wheel_index (w, level_index, e_ti);
+
+ if (e_li == level_index - 1)
+ /* If this element was scheduled on the previous level
+ it must be wrapped. */
+ _(e_ti + current_time_wheel_index (w, level_index - 1)
+ >= w->bins_per_wheel);
+ else
+ {
+ _(e_li == level_index);
+ if (e_li == 0)
+ _(e_wi == wi);
+ else
+ _(e_wi == wi || e_wi + 1 == wi || e_wi - 1 == wi);
+ }
+ }
+ }
+
+#undef _
+
+ return error;
+}
+
+void
+timing_wheel_validate (timing_wheel_t * w)
+{
+ uword l;
+ clib_error_t *error = 0;
+ uword n_elts;
+
+ if (!w->validate)
+ return;
+
+ n_elts = pool_elts (w->overflow_pool);
+ for (l = 0; l < vec_len (w->levels); l++)
+ {
+ error = validate_level (w, l, &n_elts);
+ if (error)
+ clib_error_report (error);
+ }
+}
+
+always_inline void
+free_elt_vector (timing_wheel_t * w, timing_wheel_elt_t * ev)
+{
+ /* Poison free elements so we never use them by mistake. */
+ if (CLIB_DEBUG > 0)
+ memset (ev, ~0, vec_len (ev) * sizeof (ev[0]));
+ _vec_len (ev) = 0;
+ vec_add1 (w->free_elt_vectors, ev);
+}
+
+static timing_wheel_elt_t *
+insert_helper (timing_wheel_t * w, uword level_index, uword rtime)
+{
+ timing_wheel_level_t *level;
+ timing_wheel_elt_t *e;
+ uword wheel_index;
+
+ /* Circular buffer. */
+ vec_validate (w->levels, level_index);
+ level = vec_elt_at_index (w->levels, level_index);
+
+ if (PREDICT_FALSE (!level->elts))
+ {
+ uword max = w->bins_per_wheel - 1;
+ clib_bitmap_validate (level->occupancy_bitmap, max);
+ vec_validate (level->elts, max);
+ }
+
+ wheel_index = rtime_to_wheel_index (w, level_index, rtime);
+
+ level->occupancy_bitmap =
+ clib_bitmap_ori (level->occupancy_bitmap, wheel_index);
+
+ /* Allocate an elt vector from free list if there is one. */
+ if (!level->elts[wheel_index] && vec_len (w->free_elt_vectors))
+ level->elts[wheel_index] = vec_pop (w->free_elt_vectors);
+
+ /* Add element to vector for this time bin. */
+ vec_add2 (level->elts[wheel_index], e, 1);
+
+ return e;
+}
+
+/* Insert user data on wheel at given CPU time stamp. */
+static void
+timing_wheel_insert_helper (timing_wheel_t * w, u64 insert_cpu_time,
+ u32 user_data)
+{
+ timing_wheel_elt_t *e;
+ u64 dt;
+ uword rtime, level_index;
+
+ level_index = get_level_and_relative_time (w, insert_cpu_time, &rtime);
+
+ dt = insert_cpu_time - w->cpu_time_base;
+ if (PREDICT_TRUE (0 == (dt >> BITS (e->cpu_time_relative_to_base))))
+ {
+ e = insert_helper (w, level_index, rtime);
+ e->user_data = user_data;
+ e->cpu_time_relative_to_base = dt;
+ if (insert_cpu_time < w->cached_min_cpu_time_on_wheel)
+ w->cached_min_cpu_time_on_wheel = insert_cpu_time;
+ }
+ else
+ {
+ /* Time too far in the future: add to overflow vector. */
+ timing_wheel_overflow_elt_t *oe;
+ pool_get (w->overflow_pool, oe);
+ oe->user_data = user_data;
+ oe->cpu_time = insert_cpu_time;
+ }
+}
+
+always_inline uword
+elt_is_deleted (timing_wheel_t * w, u32 user_data)
+{
+ return (hash_elts (w->deleted_user_data_hash) > 0
+ && hash_get (w->deleted_user_data_hash, user_data));
+}
+
+static timing_wheel_elt_t *
+delete_user_data (timing_wheel_elt_t * elts, u32 user_data)
+{
+ uword found_match;
+ timing_wheel_elt_t *e, *new_elts;
+
+ /* Quickly scan to see if there are any elements to delete
+ in this bucket. */
+ found_match = 0;
+ vec_foreach (e, elts)
+ {
+ found_match = e->user_data == user_data;
+ if (found_match)
+ break;
+ }
+ if (!found_match)
+ return elts;
+
+ /* Re-scan to build vector of new elts with matching user_data deleted. */
+ new_elts = 0;
+ vec_foreach (e, elts)
+ {
+ if (e->user_data != user_data)
+ vec_add1 (new_elts, e[0]);
+ }
+
+ vec_free (elts);
+ return new_elts;
+}
+
+/* Insert user data on wheel at given CPU time stamp. */
+void
+timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time, u32 user_data)
+{
+ /* Remove previously deleted elements. */
+ if (elt_is_deleted (w, user_data))
+ {
+ timing_wheel_level_t *l;
+ uword wi;
+
+ /* Delete elts with given user data so that stale events don't expire. */
+ vec_foreach (l, w->levels)
+ {
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (wi, l->occupancy_bitmap, ({
+ l->elts[wi] = delete_user_data (l->elts[wi], user_data);
+ if (vec_len (l->elts[wi]) == 0)
+ l->occupancy_bitmap = clib_bitmap_andnoti (l->occupancy_bitmap, wi);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ {
+ timing_wheel_overflow_elt_t *oe;
+ /* *INDENT-OFF* */
+ pool_foreach (oe, w->overflow_pool, ({
+ if (oe->user_data == user_data)
+ pool_put (w->overflow_pool, oe);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ hash_unset (w->deleted_user_data_hash, user_data);
+ }
+
+ timing_wheel_insert_helper (w, insert_cpu_time, user_data);
+}
+
+void
+timing_wheel_delete (timing_wheel_t * w, u32 user_data)
+{
+ if (!w->deleted_user_data_hash)
+ w->deleted_user_data_hash =
+ hash_create ( /* capacity */ 0, /* value bytes */ 0);
+
+ hash_set1 (w->deleted_user_data_hash, user_data);
+}
+
+/* Returns time of next expiring element. */
+u64
+timing_wheel_next_expiring_elt_time (timing_wheel_t * w)
+{
+ timing_wheel_level_t *l;
+ timing_wheel_elt_t *e;
+ uword li, wi, wi0;
+ u32 min_dt;
+ u64 min_t;
+ uword wrapped = 0;
+
+ min_dt = ~0;
+ min_t = ~0ULL;
+ vec_foreach (l, w->levels)
+ {
+ if (!l->occupancy_bitmap)
+ continue;
+
+ li = l - w->levels;
+ wi0 = wi = current_time_wheel_index (w, li);
+ wrapped = 0;
+ while (1)
+ {
+ if (clib_bitmap_get_no_check (l->occupancy_bitmap, wi))
+ {
+ vec_foreach (e, l->elts[wi])
+ min_dt = clib_min (min_dt, e->cpu_time_relative_to_base);
+
+ if (wrapped && li + 1 < vec_len (w->levels))
+ {
+ uword wi1 = current_time_wheel_index (w, li + 1);
+ if (l[1].occupancy_bitmap
+ && clib_bitmap_get_no_check (l[1].occupancy_bitmap, wi1))
+ {
+ vec_foreach (e, l[1].elts[wi1])
+ {
+ min_dt =
+ clib_min (min_dt, e->cpu_time_relative_to_base);
+ }
+ }
+ }
+
+ min_t = w->cpu_time_base + min_dt;
+ goto done;
+ }
+
+ wi = wheel_add (w, wi + 1);
+ if (wi == wi0)
+ break;
+
+ wrapped = wi != wi + 1;
+ }
+ }
+
+ {
+ timing_wheel_overflow_elt_t *oe;
+
+ if (min_dt != ~0)
+ min_t = w->cpu_time_base + min_dt;
+
+ /* *INDENT-OFF* */
+ pool_foreach (oe, w->overflow_pool,
+ ({ min_t = clib_min (min_t, oe->cpu_time); }));
+ /* *INDENT-ON* */
+
+ done:
+ return min_t;
+ }
+}
+
+static inline void
+insert_elt (timing_wheel_t * w, timing_wheel_elt_t * e)
+{
+ u64 t = w->cpu_time_base + e->cpu_time_relative_to_base;
+ timing_wheel_insert_helper (w, t, e->user_data);
+}
+
+always_inline u64
+elt_cpu_time (timing_wheel_t * w, timing_wheel_elt_t * e)
+{
+ return w->cpu_time_base + e->cpu_time_relative_to_base;
+}
+
+always_inline void
+validate_expired_elt (timing_wheel_t * w, timing_wheel_elt_t * e,
+ u64 current_cpu_time)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ u64 e_time = elt_cpu_time (w, e);
+
+ /* Verify that element is actually expired. */
+ ASSERT ((e_time >> w->log2_clocks_per_bin)
+ <= (current_cpu_time >> w->log2_clocks_per_bin));
+ }
+}
+
+static u32 *
+expire_bin (timing_wheel_t * w,
+ uword level_index,
+ uword wheel_index, u64 advance_cpu_time, u32 * expired_user_data)
+{
+ timing_wheel_level_t *level = vec_elt_at_index (w->levels, level_index);
+ timing_wheel_elt_t *e;
+ u32 *x;
+ uword i, j, e_len;
+
+ e = vec_elt (level->elts, wheel_index);
+ e_len = vec_len (e);
+
+ vec_add2 (expired_user_data, x, e_len);
+ for (i = j = 0; i < e_len; i++)
+ {
+ validate_expired_elt (w, &e[i], advance_cpu_time);
+ x[j] = e[i].user_data;
+
+ /* Only advance if elt is not to be deleted. */
+ j += !elt_is_deleted (w, e[i].user_data);
+ }
+
+ /* Adjust for deleted elts. */
+ if (j < e_len)
+ _vec_len (expired_user_data) -= e_len - j;
+
+ free_elt_vector (w, e);
+
+ level->elts[wheel_index] = 0;
+ clib_bitmap_set_no_check (level->occupancy_bitmap, wheel_index, 0);
+
+ return expired_user_data;
+}
+
+/* Called rarely. 32 bit times should only overflow every 4 seconds or so on a fast machine. */
+static u32 *
+advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
+{
+ timing_wheel_level_t *l;
+ timing_wheel_elt_t *e;
+ u64 delta;
+
+ w->stats.cpu_time_base_advances++;
+ delta = ((u64) 1 << w->n_wheel_elt_time_bits);
+ w->cpu_time_base += delta;
+ w->time_index_next_cpu_time_base_update += delta >> w->log2_clocks_per_bin;
+
+ vec_foreach (l, w->levels)
+ {
+ uword wi;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (wi, l->occupancy_bitmap, ({
+ vec_foreach (e, l->elts[wi])
+ {
+ /* This should always be true since otherwise we would have already expired
+ this element. Note that in the second half of this function we need
+ to take care not to place the expired elements ourselves. */
+ ASSERT (e->cpu_time_relative_to_base >= delta);
+ e->cpu_time_relative_to_base -= delta;
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ /* See which overflow elements fit now. */
+ {
+ timing_wheel_overflow_elt_t *oe;
+ /* *INDENT-OFF* */
+ pool_foreach (oe, w->overflow_pool, ({
+ /* It fits now into 32 bits. */
+ if (0 == ((oe->cpu_time - w->cpu_time_base) >> BITS (e->cpu_time_relative_to_base)))
+ {
+ u64 ti = oe->cpu_time >> w->log2_clocks_per_bin;
+ if (ti <= w->current_time_index)
+ {
+ /* This can happen when timing wheel is not advanced for a long time
+ (for example when at a gdb breakpoint for a while). */
+ /* Note: the ti == w->current_time_index means it is also an expired timer */
+ if (! elt_is_deleted (w, oe->user_data))
+ vec_add1 (expired_user_data, oe->user_data);
+ }
+ else
+ timing_wheel_insert_helper (w, oe->cpu_time, oe->user_data);
+ pool_put (w->overflow_pool, oe);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ return expired_user_data;
+}
+
+static u32 *
+refill_level (timing_wheel_t * w,
+ uword level_index,
+ u64 advance_cpu_time,
+ uword from_wheel_index,
+ uword to_wheel_index, u32 * expired_user_data)
+{
+ timing_wheel_level_t *level;
+ timing_wheel_elt_t *to_insert = w->unexpired_elts_pending_insert;
+ u64 advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;
+
+ vec_validate (w->stats.refills, level_index);
+ w->stats.refills[level_index] += 1;
+
+ if (level_index + 1 >= vec_len (w->levels))
+ goto done;
+
+ level = vec_elt_at_index (w->levels, level_index + 1);
+ if (!level->occupancy_bitmap)
+ goto done;
+
+ while (1)
+ {
+ timing_wheel_elt_t *e, *es;
+
+ if (clib_bitmap_get_no_check
+ (level->occupancy_bitmap, from_wheel_index))
+ {
+ es = level->elts[from_wheel_index];
+ level->elts[from_wheel_index] = 0;
+ clib_bitmap_set_no_check (level->occupancy_bitmap, from_wheel_index,
+ 0);
+
+ vec_foreach (e, es)
+ {
+ u64 e_time = elt_cpu_time (w, e);
+ u64 ti = e_time >> w->log2_clocks_per_bin;
+ if (ti <= advance_time_index)
+ {
+ validate_expired_elt (w, e, advance_cpu_time);
+ if (!elt_is_deleted (w, e->user_data))
+ vec_add1 (expired_user_data, e->user_data);
+ }
+ else
+ vec_add1 (to_insert, e[0]);
+ }
+ free_elt_vector (w, es);
+ }
+
+ if (from_wheel_index == to_wheel_index)
+ break;
+
+ from_wheel_index = wheel_add (w, from_wheel_index + 1);
+ }
+
+ timing_wheel_validate (w);
+done:
+ w->unexpired_elts_pending_insert = to_insert;
+ return expired_user_data;
+}
+
+/* Advance wheel and return any expired user data in vector. */
+u32 *
+timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
+ u32 * expired_user_data,
+ u64 * next_expiring_element_cpu_time)
+{
+ timing_wheel_level_t *level;
+ uword level_index, advance_rtime, advance_level_index, advance_wheel_index;
+ uword n_expired_user_data_before;
+ u64 current_time_index, advance_time_index;
+
+ n_expired_user_data_before = vec_len (expired_user_data);
+
+ /* Re-fill lower levels when time wraps. */
+ current_time_index = w->current_time_index;
+ advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;
+
+ {
+ u64 current_ti, advance_ti;
+
+ current_ti = current_time_index >> w->log2_bins_per_wheel;
+ advance_ti = advance_time_index >> w->log2_bins_per_wheel;
+
+ if (PREDICT_FALSE (current_ti != advance_ti))
+ {
+ if (w->unexpired_elts_pending_insert)
+ _vec_len (w->unexpired_elts_pending_insert) = 0;
+
+ level_index = 0;
+ while (current_ti != advance_ti)
+ {
+ uword c, a;
+ c = current_ti & (w->bins_per_wheel - 1);
+ a = advance_ti & (w->bins_per_wheel - 1);
+ if (c != a)
+ expired_user_data = refill_level (w,
+ level_index,
+ advance_cpu_time,
+ c, a, expired_user_data);
+ current_ti >>= w->log2_bins_per_wheel;
+ advance_ti >>= w->log2_bins_per_wheel;
+ level_index++;
+ }
+ }
+ }
+
+ advance_level_index =
+ get_level_and_relative_time (w, advance_cpu_time, &advance_rtime);
+ advance_wheel_index =
+ rtime_to_wheel_index (w, advance_level_index, advance_rtime);
+
+ /* Empty all occupied bins for entire levels that we advance past. */
+ for (level_index = 0; level_index < advance_level_index; level_index++)
+ {
+ uword wi;
+
+ if (level_index >= vec_len (w->levels))
+ break;
+
+ level = vec_elt_at_index (w->levels, level_index);
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (wi, level->occupancy_bitmap, ({
+ expired_user_data = expire_bin (w, level_index, wi, advance_cpu_time,
+ expired_user_data);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ if (PREDICT_TRUE (level_index < vec_len (w->levels)))
+ {
+ uword wi;
+ level = vec_elt_at_index (w->levels, level_index);
+ wi = current_time_wheel_index (w, level_index);
+ if (level->occupancy_bitmap)
+ while (1)
+ {
+ if (clib_bitmap_get_no_check (level->occupancy_bitmap, wi))
+ expired_user_data =
+ expire_bin (w, advance_level_index, wi, advance_cpu_time,
+ expired_user_data);
+
+ /* When we jump out, we have already just expired the bin,
+ corresponding to advance_wheel_index */
+ if (wi == advance_wheel_index)
+ break;
+
+ wi = wheel_add (w, wi + 1);
+ }
+ }
+
+ /* Advance current time index. */
+ w->current_time_index = advance_time_index;
+
+ if (vec_len (w->unexpired_elts_pending_insert) > 0)
+ {
+ timing_wheel_elt_t *e;
+ vec_foreach (e, w->unexpired_elts_pending_insert) insert_elt (w, e);
+ _vec_len (w->unexpired_elts_pending_insert) = 0;
+ }
+
+ /* Don't advance until necessary. */
+ /* However, if the timing_wheel_advance() hasn't been called for some time,
+ the while() loop will ensure multiple calls to advance_cpu_time_base()
+ in a row until the w->cpu_time_base is fresh enough. */
+ while (PREDICT_FALSE
+ (advance_time_index >= w->time_index_next_cpu_time_base_update))
+ expired_user_data = advance_cpu_time_base (w, expired_user_data);
+
+ if (next_expiring_element_cpu_time)
+ {
+ u64 min_t;
+
+ /* Anything expired? If so we need to recompute next expiring elt time. */
+ if (vec_len (expired_user_data) == n_expired_user_data_before
+ && w->cached_min_cpu_time_on_wheel != 0ULL)
+ min_t = w->cached_min_cpu_time_on_wheel;
+ else
+ {
+ min_t = timing_wheel_next_expiring_elt_time (w);
+ w->cached_min_cpu_time_on_wheel = min_t;
+ }
+
+ *next_expiring_element_cpu_time = min_t;
+ }
+
+ return expired_user_data;
+}
+
+u8 *
+format_timing_wheel (u8 * s, va_list * va)
+{
+ timing_wheel_t *w = va_arg (*va, timing_wheel_t *);
+ int verbose = va_arg (*va, int);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "level 0: %.4e - %.4e secs, 2^%d - 2^%d clocks",
+ (f64) (1 << w->log2_clocks_per_bin) / w->cpu_clocks_per_second,
+ (f64) (1 << w->log2_clocks_per_wheel) /
+ w->cpu_clocks_per_second, w->log2_clocks_per_bin,
+ w->log2_clocks_per_wheel);
+
+ if (verbose)
+ {
+ int l;
+
+ s = format (s, "\n%Utime base advances %Ld, every %.4e secs",
+ format_white_space, indent + 2,
+ w->stats.cpu_time_base_advances,
+ (f64) ((u64) 1 << w->n_wheel_elt_time_bits) /
+ w->cpu_clocks_per_second);
+
+ for (l = 0; l < vec_len (w->levels); l++)
+ s = format (s, "\n%Ulevel %d: refills %Ld",
+ format_white_space, indent + 2,
+ l,
+ l <
+ vec_len (w->stats.refills) ? w->stats.
+ refills[l] : (u64) 0);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/timing_wheel.h b/src/vppinfra/timing_wheel.h
new file mode 100644
index 00000000..7daea994
--- /dev/null
+++ b/src/vppinfra/timing_wheel.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_clib_timing_wheel_h
+#define included_clib_timing_wheel_h
+
+#include <vppinfra/format.h>
+
+typedef struct
+{
+ /* Time of this element in units cpu clock ticks relative to time
+ base. 32 bits should be large enough for serveral kilo-seconds
+ to elapse before we have to re-set time base. */
+ u32 cpu_time_relative_to_base;
+
+ /* User data to store in this bin. */
+ u32 user_data;
+} timing_wheel_elt_t;
+
+/* Overflow wheel elements where time does not fit into 32 bits. */
+typedef struct
+{
+ /* Absolute time of this element. */
+ u64 cpu_time;
+
+ /* User data to store in this bin. */
+ u32 user_data;
+
+ u32 pad;
+} timing_wheel_overflow_elt_t;
+
+typedef struct
+{
+ /* 2^M bits: 1 means vector is non-zero else zero. */
+ uword *occupancy_bitmap;
+
+ /* 2^M element table of element vectors, one for each time bin. */
+ timing_wheel_elt_t **elts;
+} timing_wheel_level_t;
+
+typedef struct
+{
+ /* Vector of refill counts per level. */
+ u64 *refills;
+
+ /* Number of times cpu time base was rescaled. */
+ u64 cpu_time_base_advances;
+} timing_wheel_stats_t;
+
+typedef struct
+{
+ /* Each bin is a power of two clock ticks (N)
+ chosen so that 2^N >= min_sched_time. */
+ u8 log2_clocks_per_bin;
+
+ /* Wheels are 2^M bins where 2^(N+M) >= max_sched_time. */
+ u8 log2_bins_per_wheel;
+
+ /* N + M. */
+ u8 log2_clocks_per_wheel;
+
+ /* Number of bits to use in cpu_time_relative_to_base field
+ of timing_wheel_elt_t. */
+ u8 n_wheel_elt_time_bits;
+
+ /* 2^M. */
+ u32 bins_per_wheel;
+
+ /* 2^M - 1. */
+ u32 bins_per_wheel_mask;
+
+ timing_wheel_level_t *levels;
+
+ timing_wheel_overflow_elt_t *overflow_pool;
+
+ /* Free list of element vector so we can recycle old allocated vectors. */
+ timing_wheel_elt_t **free_elt_vectors;
+
+ timing_wheel_elt_t *unexpired_elts_pending_insert;
+
+ /* Hash table of user data values which have been deleted but not yet re-inserted. */
+ uword *deleted_user_data_hash;
+
+ /* Enable validation for debugging. */
+ u32 validate;
+
+ /* Time index. Measures time in units of 2^N clock ticks from
+ when wheel starts. */
+ u64 current_time_index;
+
+ /* All times are 32 bit numbers relative to cpu_time_base.
+ So, roughly every 2^(32 + N) clocks we'll need to subtract from
+ all timing_wheel_elt_t times to make sure they never overflow. */
+ u64 cpu_time_base;
+
+ /* When current_time_index is >= this we update cpu_time_base
+ to avoid overflowing 32 bit cpu_time_relative_to_base
+ in timing_wheel_elt_t. */
+ u64 time_index_next_cpu_time_base_update;
+
+ /* Cached earliest element on wheel; 0 if not valid. */
+ u64 cached_min_cpu_time_on_wheel;
+
+ f64 min_sched_time, max_sched_time, cpu_clocks_per_second;
+
+ timing_wheel_stats_t stats;
+} timing_wheel_t;
+
+/* Initialization function. */
+void timing_wheel_init (timing_wheel_t * w,
+ u64 current_cpu_time, f64 cpu_clocks_per_second);
+
+/* Insert user data on wheel at given CPU time stamp. */
+void timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time,
+ u32 user_data);
+
+/* Delete user data from wheel (until it is again inserted). */
+void timing_wheel_delete (timing_wheel_t * w, u32 user_data);
+
+/* Advance wheel and return any expired user data in vector. If non-zero
+ min_next_expiring_element_cpu_time will return a cpu time stamp
+ before which there are guaranteed to be no elements in the current wheel. */
+u32 *timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
+ u32 * expired_user_data,
+ u64 * min_next_expiring_element_cpu_time);
+
+/* Returns absolute time in clock cycles of next expiring element. */
+u64 timing_wheel_next_expiring_elt_time (timing_wheel_t * w);
+
+/* Format a timing wheel. */
+format_function_t format_timing_wheel;
+
+/* Testing function to validate wheel. */
+void timing_wheel_validate (timing_wheel_t * w);
+
+#endif /* included_clib_timing_wheel_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.c b/src/vppinfra/tw_timer_16t_1w_2048sl.c
new file mode 100644
index 00000000..3f342045
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_1w_2048sl.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_16t_1w_2048sl.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h
new file mode 100644
index 00000000..761646b3
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_16t_2w_512sl_h__
+#define __included_tw_timer_16t_2w_512sl_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 1
+#define TW_SLOTS_PER_RING 2048
+#define TW_RING_SHIFT 11
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 16
+#define LOG2_TW_TIMERS_PER_OBJECT 4
+#define TW_SUFFIX _16t_1w_2048sl
+#define TW_FAST_WHEEL_BITMAP 0
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 0
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_16t_2w_512sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_2w_512sl.c b/src/vppinfra/tw_timer_16t_2w_512sl.c
new file mode 100644
index 00000000..ad1b9a4a
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_2w_512sl.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_16t_2w_512sl.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_2w_512sl.h b/src/vppinfra/tw_timer_16t_2w_512sl.h
new file mode 100644
index 00000000..029f529d
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_2w_512sl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_16t_2w_512sl_h__
+#define __included_tw_timer_16t_2w_512sl_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 2
+#define TW_SLOTS_PER_RING 512
+#define TW_RING_SHIFT 9
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 16
+#define LOG2_TW_TIMERS_PER_OBJECT 4
+#define TW_SUFFIX _16t_2w_512sl
+#define TW_FAST_WHEEL_BITMAP 0
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 1
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_16t_2w_512sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c
new file mode 100644
index 00000000..8a65752c
--- /dev/null
+++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_1t_3w_1024sl_ov.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h
new file mode 100644
index 00000000..0b455e02
--- /dev/null
+++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_1t_3w_1024sl_ov_h__
+#define __included_tw_timer_1t_3w_1024sl_ov_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 3
+#define TW_SLOTS_PER_RING 1024
+#define TW_RING_SHIFT 10
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 1
+#define LOG2_TW_TIMERS_PER_OBJECT 0
+#define TW_SUFFIX _1t_3w_1024sl_ov
+#define TW_OVERFLOW_VECTOR 1
+#define TW_FAST_WHEEL_BITMAP 1
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 1
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_1t_3w_1024sl_ov_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_2t_1w_2048sl.c b/src/vppinfra/tw_timer_2t_1w_2048sl.c
new file mode 100644
index 00000000..79d293e1
--- /dev/null
+++ b/src/vppinfra/tw_timer_2t_1w_2048sl.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_2t_1w_2048sl.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_2t_1w_2048sl.h b/src/vppinfra/tw_timer_2t_1w_2048sl.h
new file mode 100644
index 00000000..6ae86688
--- /dev/null
+++ b/src/vppinfra/tw_timer_2t_1w_2048sl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_2t_1w_2048sl_h__
+#define __included_tw_timer_2t_1w_2048sl_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 1
+#define TW_SLOTS_PER_RING 2048
+#define TW_RING_SHIFT 11
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 2
+#define LOG2_TW_TIMERS_PER_OBJECT 1
+#define TW_SUFFIX _2t_1w_2048sl
+#define TW_FAST_WHEEL_BITMAP 0
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 0
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_2t_1w_2048sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.c b/src/vppinfra/tw_timer_4t_3w_256sl.c
new file mode 100644
index 00000000..73bb34b2
--- /dev/null
+++ b/src/vppinfra/tw_timer_4t_3w_256sl.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_4t_3w_256sl.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.h b/src/vppinfra/tw_timer_4t_3w_256sl.h
new file mode 100644
index 00000000..16c41bcd
--- /dev/null
+++ b/src/vppinfra/tw_timer_4t_3w_256sl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_4t_3w_256sl_h__
+#define __included_tw_timer_4t_3w_256sl_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 3
+#define TW_SLOTS_PER_RING 256
+#define TW_RING_SHIFT 8
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 4
+#define LOG2_TW_TIMERS_PER_OBJECT 2
+#define TW_SUFFIX _4t_3w_256sl
+#define TW_FAST_WHEEL_BITMAP 0
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 0
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_4t_3w_256sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.c b/src/vppinfra/tw_timer_4t_3w_4sl_ov.c
new file mode 100644
index 00000000..e2af7b5d
--- /dev/null
+++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This wheel geometry is not prima facie useful, except for testing
+ */
+
+#if TW_TIMER_TEST_GEOMETRY > 0
+#include <vppinfra/error.h>
+#include "tw_timer_4t_3w_4sl_ov.h"
+#include "tw_timer_template.c"
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h
new file mode 100644
index 00000000..845ffeac
--- /dev/null
+++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_4t_3w_4sl_ov_h__
+#define __included_tw_timer_4t_3w_4sl_ov_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+#undef TW_OVERFLOW_VECTOR
+#undef TW_FAST_WHEEL_BITMAP
+#undef TW_TIMER_ALLOW_DUPLICATE_STOP
+#undef TW_START_STOP_TRACE_SIZE
+
+#define TW_TIMER_WHEELS 3
+#define TW_SLOTS_PER_RING 4
+#define TW_RING_SHIFT 2
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 4
+#define LOG2_TW_TIMERS_PER_OBJECT 2
+#define TW_SUFFIX _4t_3w_4sl_ov
+#define TW_OVERFLOW_VECTOR 1
+#define TW_FAST_WHEEL_BITMAP 0
+#define TW_TIMER_ALLOW_DUPLICATE_STOP 0
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_4t_3w_256sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c
new file mode 100644
index 00000000..abad3718
--- /dev/null
+++ b/src/vppinfra/tw_timer_template.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+ * @brief TW timer implementation TEMPLATE ONLY, do not compile directly
+ *
+ *
+ */
+#if TW_START_STOP_TRACE_SIZE > 0
+
+void TW (tw_timer_trace) (TWT (tw_timer_wheel) * tw, u32 timer_id,
+ u32 pool_index, u32 handle)
+{
+ TWT (trace) * t = &tw->traces[tw->trace_index];
+
+ t->timer_id = timer_id;
+ t->pool_index = pool_index;
+ t->handle = handle;
+
+ tw->trace_index++;
+ if (tw->trace_index == TW_START_STOP_TRACE_SIZE)
+ {
+ tw->trace_index = 0;
+ tw->trace_wrapped++;
+ }
+}
+
+void TW (tw_search_trace) (TWT (tw_timer_wheel) * tw, u32 handle)
+{
+ u32 i, start_pos;
+ TWT (trace) * t;
+ char *s = "bogus!";
+
+ /* reverse search for the supplied handle */
+
+ start_pos = tw->trace_index;
+ if (start_pos == 0)
+ start_pos = TW_START_STOP_TRACE_SIZE - 1;
+ else
+ start_pos--;
+
+ for (i = start_pos; i > 0; i--)
+ {
+ t = &tw->traces[i];
+ if (t->handle == handle)
+ {
+ switch (t->timer_id)
+ {
+ case 0xFF:
+ s = "stopped";
+ break;
+ case 0xFE:
+ s = "expired";
+ break;
+ default:
+ s = "started";
+ break;
+ }
+ fformat (stderr, "handle 0x%x (%d) %s at trace %d\n",
+ handle, handle, s, i);
+ }
+ }
+ if (tw->trace_wrapped > 0)
+ {
+ for (i = TW_START_STOP_TRACE_SIZE; i >= tw->trace_index; i--)
+ {
+ t = &tw->traces[i];
+ if (t->handle == handle)
+ {
+ switch (t->timer_id)
+ {
+ case 0xFF:
+ s = "stopped";
+ break;
+ case 0xFE:
+ s = "expired";
+ break;
+ default:
+ s = "started";
+ break;
+ }
+ fformat (stderr, "handle 0x%x (%d) %s at trace %d\n",
+ handle, handle, s, i);
+ }
+ }
+ }
+}
+#endif /* TW_START_STOP_TRACE_SIZE > 0 */
+
+static inline u32
+TW (make_internal_timer_handle) (u32 pool_index, u32 timer_id)
+{
+ u32 handle;
+
+ ASSERT (timer_id < TW_TIMERS_PER_OBJECT);
+#if LOG2_TW_TIMERS_PER_OBJECT > 0
+ ASSERT (pool_index < (1 << (32 - LOG2_TW_TIMERS_PER_OBJECT)));
+
+ handle = (timer_id << (32 - LOG2_TW_TIMERS_PER_OBJECT)) | (pool_index);
+#else
+ handle = pool_index;
+#endif
+ return handle;
+}
+
+static inline void
+timer_addhead (TWT (tw_timer) * pool, u32 head_index, u32 new_index)
+{
+ TWT (tw_timer) * head = pool_elt_at_index (pool, head_index);
+ TWT (tw_timer) * old_first;
+ u32 old_first_index;
+ TWT (tw_timer) * new;
+
+ new = pool_elt_at_index (pool, new_index);
+
+ if (PREDICT_FALSE (head->next == head_index))
+ {
+ head->next = head->prev = new_index;
+ new->next = new->prev = head_index;
+ return;
+ }
+
+ old_first_index = head->next;
+ old_first = pool_elt_at_index (pool, old_first_index);
+
+ new->next = old_first_index;
+ new->prev = old_first->prev;
+ old_first->prev = new_index;
+ head->next = new_index;
+}
+
+static inline void
+timer_remove (TWT (tw_timer) * pool, u32 index)
+{
+ TWT (tw_timer) * elt = pool_elt_at_index (pool, index);
+ TWT (tw_timer) * next_elt, *prev_elt;
+
+ ASSERT (elt->user_handle != ~0);
+
+ next_elt = pool_elt_at_index (pool, elt->next);
+ prev_elt = pool_elt_at_index (pool, elt->prev);
+
+ next_elt->prev = elt->prev;
+ prev_elt->next = elt->next;
+
+ elt->prev = elt->next = ~0;
+}
+
+/**
+ * @brief Start a Tw Timer
+ * @param tw_timer_wheel_t * tw timer wheel object pointer
+ * @param u32 pool_index user pool index, presumably for a tw session
+ * @param u32 timer_id app-specific timer ID. 4 bits.
+ * @param u64 interval timer interval in ticks
+ * @returns handle needed to cancel the timer
+ */
+u32
+TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id,
+ u64 interval)
+{
+#if TW_TIMER_WHEELS > 1
+ u16 slow_ring_offset;
+ u32 carry;
+#endif
+#if TW_TIMER_WHEELS > 2
+ u16 glacier_ring_offset;
+#endif
+#if TW_OVERFLOW_VECTOR > 0
+ u64 interval_plus_time_to_wrap, triple_wrap_mask;
+#endif
+ u16 fast_ring_offset;
+ tw_timer_wheel_slot_t *ts;
+ TWT (tw_timer) * t;
+
+ ASSERT (interval);
+
+ pool_get (tw->timers, t);
+ memset (t, 0xff, sizeof (*t));
+
+ t->user_handle = TW (make_internal_timer_handle) (pool_index, timer_id);
+
+ /* Factor interval into 1..3 wheel offsets */
+#if TW_TIMER_WHEELS > 2
+#if TW_OVERFLOW_VECTOR > 0
+ /*
+ * This is tricky. Put a timer onto the overflow
+ * vector if the interval PLUS the time
+ * until the next triple-wrap exceeds one full revolution
+ * of all three wheels.
+ */
+ triple_wrap_mask = (1 << (3 * TW_RING_SHIFT)) - 1;
+ interval_plus_time_to_wrap =
+ interval + (tw->current_tick & triple_wrap_mask);
+ if ((interval_plus_time_to_wrap >= 1 << (3 * TW_RING_SHIFT)))
+ {
+ t->expiration_time = tw->current_tick + interval;
+ ts = &tw->overflow;
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers);
+#endif
+ return t - tw->timers;
+ }
+#endif
+
+ glacier_ring_offset = interval >> (2 * TW_RING_SHIFT);
+ ASSERT ((u64) glacier_ring_offset < TW_SLOTS_PER_RING);
+ interval -= (((u64) glacier_ring_offset) << (2 * TW_RING_SHIFT));
+#endif
+#if TW_TIMER_WHEELS > 1
+ slow_ring_offset = interval >> TW_RING_SHIFT;
+ ASSERT ((u64) slow_ring_offset < TW_SLOTS_PER_RING);
+ interval -= (((u64) slow_ring_offset) << TW_RING_SHIFT);
+#endif
+ fast_ring_offset = interval & TW_RING_MASK;
+
+ /*
+ * Account for the current wheel positions(s)
+ * This is made slightly complicated by the fact that the current
+ * index vector will contain (TW_SLOTS_PER_RING, ...) when
+ * the actual position is (0, ...)
+ */
+
+ fast_ring_offset += tw->current_index[TW_TIMER_RING_FAST] & TW_RING_MASK;
+
+#if TW_TIMER_WHEELS > 1
+ carry = fast_ring_offset >= TW_SLOTS_PER_RING ? 1 : 0;
+ fast_ring_offset %= TW_SLOTS_PER_RING;
+ slow_ring_offset += (tw->current_index[TW_TIMER_RING_SLOW] & TW_RING_MASK)
+ + carry;
+ carry = slow_ring_offset >= TW_SLOTS_PER_RING ? 1 : 0;
+ slow_ring_offset %= TW_SLOTS_PER_RING;
+#endif
+
+#if TW_TIMER_WHEELS > 2
+ glacier_ring_offset +=
+ (tw->current_index[TW_TIMER_RING_GLACIER] & TW_RING_MASK) + carry;
+ glacier_ring_offset %= TW_SLOTS_PER_RING;
+#endif
+
+#if TW_TIMER_WHEELS > 2
+ if (glacier_ring_offset !=
+ (tw->current_index[TW_TIMER_RING_GLACIER] & TW_RING_MASK))
+ {
+ /* We'll need slow and fast ring offsets later */
+ t->slow_ring_offset = slow_ring_offset;
+ t->fast_ring_offset = fast_ring_offset;
+
+ ts = &tw->w[TW_TIMER_RING_GLACIER][glacier_ring_offset];
+
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers);
+#endif
+ return t - tw->timers;
+ }
+#endif
+
+#if TW_TIMER_WHEELS > 1
+ /* Timer expires more than 51.2 seconds from now? */
+ if (slow_ring_offset !=
+ (tw->current_index[TW_TIMER_RING_SLOW] & TW_RING_MASK))
+ {
+ /* We'll need the fast ring offset later... */
+ t->fast_ring_offset = fast_ring_offset;
+
+ ts = &tw->w[TW_TIMER_RING_SLOW][slow_ring_offset];
+
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers);
+#endif
+ return t - tw->timers;
+ }
+#else
+ fast_ring_offset %= TW_SLOTS_PER_RING;
+#endif
+
+ /* Timer expires less than one fast-ring revolution from now */
+ ts = &tw->w[TW_TIMER_RING_FAST][fast_ring_offset];
+
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+
+#if TW_FAST_WHEEL_BITMAP
+ tw->fast_slot_bitmap = clib_bitmap_set (tw->fast_slot_bitmap,
+ fast_ring_offset, 1);
+#endif
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers);
+#endif
+ return t - tw->timers;
+}
+
+#if TW_TIMER_SCAN_FOR_HANDLE > 0
+int TW (scan_for_handle) (TWT (tw_timer_wheel) * tw, u32 handle)
+{
+ int i, j;
+ tw_timer_wheel_slot_t *ts;
+ TWT (tw_timer) * t, *head;
+ u32 next_index;
+ int rv = 0;
+
+ for (i = 0; i < TW_TIMER_WHEELS; i++)
+ {
+ for (j = 0; j < TW_SLOTS_PER_RING; j++)
+ {
+ ts = &tw->w[i][j];
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ while (next_index != ts->head_index)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ if (next_index == handle)
+ {
+ clib_warning ("handle %d found in ring %d slot %d",
+ handle, i, j);
+ clib_warning ("user handle 0x%x", t->user_handle);
+ rv = 1;
+ }
+ next_index = t->next;
+ }
+ }
+ }
+ return rv;
+}
+#endif /* TW_TIMER_SCAN_FOR_HANDLE */
+
+/**
+ * @brief Stop a tw timer
+ * @param tw_timer_wheel_t * tw timer wheel object pointer
+ * @param u32 handle timer cancellation returned by tw_timer_start
+ */
+void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle)
+{
+ TWT (tw_timer) * t;
+
+#if TW_TIMER_ALLOW_DUPLICATE_STOP
+ /*
+ * A vlib process may have its timer expire, and receive
+ * an event before the expiration is processed.
+ * That results in a duplicate tw_timer_stop.
+ */
+ if (pool_is_free_index (tw->timers, handle))
+ return;
+#endif
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, ~0, ~0, handle);
+#endif
+
+ t = pool_elt_at_index (tw->timers, handle);
+
+ /* in case of idiotic handle (e.g. passing a listhead index) */
+ ASSERT (t->user_handle != ~0);
+
+ timer_remove (tw->timers, handle);
+
+ pool_put_index (tw->timers, handle);
+}
+
+/**
+ * @brief Initialize a tw timer wheel template instance
+ * @param tw_timer_wheel_t * tw timer wheel object pointer
+ * @param void * expired_timer_callback. Passed a u32 * vector of
+ * expired timer handles. The callback is optional.
+ * @param f64 timer_interval_in_seconds
+ */
+void
+TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw,
+ void *expired_timer_callback,
+ f64 timer_interval_in_seconds, u32 max_expirations)
+{
+ int ring, slot;
+ tw_timer_wheel_slot_t *ts;
+ TWT (tw_timer) * t;
+ memset (tw, 0, sizeof (*tw));
+ tw->expired_timer_callback = expired_timer_callback;
+ tw->max_expirations = max_expirations;
+ if (timer_interval_in_seconds == 0.0)
+ {
+ clib_warning ("timer interval is zero");
+ abort ();
+ }
+ tw->timer_interval = timer_interval_in_seconds;
+ tw->ticks_per_second = 1.0 / timer_interval_in_seconds;
+ tw->first_expires_tick = ~0ULL;
+
+ vec_validate (tw->expired_timer_handles, 0);
+ _vec_len (tw->expired_timer_handles) = 0;
+
+ for (ring = 0; ring < TW_TIMER_WHEELS; ring++)
+ {
+ for (slot = 0; slot < TW_SLOTS_PER_RING; slot++)
+ {
+ ts = &tw->w[ring][slot];
+ pool_get (tw->timers, t);
+ memset (t, 0xff, sizeof (*t));
+ t->next = t->prev = t - tw->timers;
+ ts->head_index = t - tw->timers;
+ }
+ }
+
+#if TW_OVERFLOW_VECTOR > 0
+ ts = &tw->overflow;
+ pool_get (tw->timers, t);
+ memset (t, 0xff, sizeof (*t));
+ t->next = t->prev = t - tw->timers;
+ ts->head_index = t - tw->timers;
+#endif
+}
+
+/**
+ * @brief Free a tw timer wheel template instance
+ * @param tw_timer_wheel_t * tw timer wheel object pointer
+ */
+void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw)
+{
+ int i, j;
+ tw_timer_wheel_slot_t *ts;
+ TWT (tw_timer) * head, *t;
+ u32 next_index;
+
+ for (i = 0; i < TW_TIMER_WHEELS; i++)
+ {
+ for (j = 0; j < TW_SLOTS_PER_RING; j++)
+ {
+ ts = &tw->w[i][j];
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ while (next_index != ts->head_index)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+ pool_put (tw->timers, t);
+ }
+ pool_put (tw->timers, head);
+ }
+ }
+
+#if TW_OVERFLOW_VECVOR > 0
+ ts = &tw->overflow;
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ while (next_index != ts->head_index)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+ pool_put (tw->timers, t);
+ }
+ pool_put (tw->timers, head);
+#endif
+
+ memset (tw, 0, sizeof (*tw));
+}
+
+/**
+ * @brief Advance a tw timer wheel. Calls the expired timer callback
+ * as needed. This routine should be called once every timer_interval seconds
+ * @param tw_timer_wheel_t * tw timer wheel template instance pointer
+ * @param f64 now the current time, e.g. from vlib_time_now(vm)
+ * @returns u32 * vector of expired user handles
+ */
+static inline
+ u32 * TW (tw_timer_expire_timers_internal) (TWT (tw_timer_wheel) * tw,
+ f64 now,
+ u32 * callback_vector_arg)
+{
+ u32 nticks, i;
+ tw_timer_wheel_slot_t *ts;
+ TWT (tw_timer) * t, *head;
+ u32 *callback_vector;
+ u32 fast_wheel_index;
+ u32 next_index;
+ u32 slow_wheel_index __attribute__ ((unused));
+ u32 glacier_wheel_index __attribute__ ((unused));
+
+ /* Shouldn't happen */
+ if (PREDICT_FALSE (now < tw->next_run_time))
+ return callback_vector_arg;
+
+ /* Number of ticks which have occurred */
+ nticks = tw->ticks_per_second * (now - tw->last_run_time);
+ if (nticks == 0)
+ return callback_vector_arg;
+
+ /* Remember when we ran, compute next runtime */
+ tw->next_run_time = (now + tw->timer_interval);
+
+ if (callback_vector_arg == 0)
+ {
+ _vec_len (tw->expired_timer_handles) = 0;
+ callback_vector = tw->expired_timer_handles;
+ }
+ else
+ callback_vector = callback_vector_arg;
+
+ for (i = 0; i < nticks; i++)
+ {
+ fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST];
+ if (TW_TIMER_WHEELS > 1)
+ slow_wheel_index = tw->current_index[TW_TIMER_RING_SLOW];
+ if (TW_TIMER_WHEELS > 2)
+ glacier_wheel_index = tw->current_index[TW_TIMER_RING_GLACIER];
+
+#if TW_OVERFLOW_VECTOR > 0
+ /* Triple odometer-click? Process the overflow vector... */
+ if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING
+ && slow_wheel_index == TW_SLOTS_PER_RING
+ && glacier_wheel_index == TW_SLOTS_PER_RING))
+ {
+ u64 interval;
+ u32 new_glacier_ring_offset, new_slow_ring_offset;
+ u32 new_fast_ring_offset;
+
+ ts = &tw->overflow;
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ /* Make slot empty */
+ head->next = head->prev = ts->head_index;
+
+ /* traverse slot, place timers wherever they go */
+ while (next_index != head - tw->timers)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+
+ /* Remove from the overflow vector (hammer) */
+ t->next = t->prev = ~0;
+
+ ASSERT (t->expiration_time >= tw->current_tick);
+
+ interval = t->expiration_time - tw->current_tick;
+
+ /* Right back onto the overflow vector? */
+ if (interval >= (1 << (3 * TW_RING_SHIFT)))
+ {
+ ts = &tw->overflow;
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+ continue;
+ }
+ /* Compute ring offsets */
+ new_glacier_ring_offset = interval >> (2 * TW_RING_SHIFT);
+
+ interval -= (new_glacier_ring_offset << (2 * TW_RING_SHIFT));
+
+ /* Note: the wheels are at (0,0,0), no add-with-carry needed */
+ new_slow_ring_offset = interval >> TW_RING_SHIFT;
+ interval -= (new_slow_ring_offset << TW_RING_SHIFT);
+ new_fast_ring_offset = interval & TW_RING_MASK;
+ t->slow_ring_offset = new_slow_ring_offset;
+ t->fast_ring_offset = new_fast_ring_offset;
+
+ /* Timer expires Right Now */
+ if (PREDICT_FALSE (t->slow_ring_offset == 0 &&
+ t->fast_ring_offset == 0 &&
+ new_glacier_ring_offset == 0))
+ {
+ vec_add1 (callback_vector, t->user_handle);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, 0xfe, t->user_handle,
+ t - tw->timers);
+#endif
+ pool_put (tw->timers, t);
+ }
+ /* Timer moves to the glacier ring */
+ else if (new_glacier_ring_offset)
+ {
+ ts = &tw->w[TW_TIMER_RING_GLACIER][new_glacier_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+ }
+ /* Timer moves to the slow ring */
+ else if (t->slow_ring_offset)
+ {
+ /* Add to slow ring */
+ ts = &tw->w[TW_TIMER_RING_SLOW][t->slow_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+ }
+ /* Timer timer moves to the fast ring */
+ else
+ {
+ ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_FAST_WHEEL_BITMAP
+ tw->fast_slot_bitmap =
+ clib_bitmap_set (tw->fast_slot_bitmap,
+ t->fast_ring_offset, 1);
+#endif
+ }
+ }
+ }
+#endif
+
+#if TW_TIMER_WHEELS > 2
+ /*
+ * Double odometer-click? Process one slot in the glacier ring...
+ */
+ if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING
+ && slow_wheel_index == TW_SLOTS_PER_RING))
+ {
+ glacier_wheel_index %= TW_SLOTS_PER_RING;
+ ts = &tw->w[TW_TIMER_RING_GLACIER][glacier_wheel_index];
+
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ /* Make slot empty */
+ head->next = head->prev = ts->head_index;
+
+ /* traverse slot, deal timers into slow ring */
+ while (next_index != head - tw->timers)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+
+ /* Remove from glacier ring slot (hammer) */
+ t->next = t->prev = ~0;
+
+ /* Timer expires Right Now */
+ if (PREDICT_FALSE (t->slow_ring_offset == 0 &&
+ t->fast_ring_offset == 0))
+ {
+ vec_add1 (callback_vector, t->user_handle);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, 0xfe, t->user_handle,
+ t - tw->timers);
+#endif
+ pool_put (tw->timers, t);
+ }
+ /* Timer expires during slow-wheel tick 0 */
+ else if (PREDICT_FALSE (t->slow_ring_offset == 0))
+ {
+ ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_FAST_WHEEL_BITMAP
+ tw->fast_slot_bitmap =
+ clib_bitmap_set (tw->fast_slot_bitmap,
+ t->fast_ring_offset, 1);
+#endif
+ }
+ else /* typical case */
+ {
+ /* Add to slow ring */
+ ts = &tw->w[TW_TIMER_RING_SLOW][t->slow_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+ }
+ }
+ }
+#endif
+
+#if TW_TIMER_WHEELS > 1
+ /*
+ * Single odometer-click? Process a slot in the slow ring,
+ */
+ if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING))
+ {
+ slow_wheel_index %= TW_SLOTS_PER_RING;
+ ts = &tw->w[TW_TIMER_RING_SLOW][slow_wheel_index];
+
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ /* Make slot empty */
+ head->next = head->prev = ts->head_index;
+
+ /* traverse slot, deal timers into fast ring */
+ while (next_index != head - tw->timers)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+
+ /* Remove from sloe ring slot (hammer) */
+ t->next = t->prev = ~0;
+
+ /* Timer expires Right Now */
+ if (PREDICT_FALSE (t->fast_ring_offset == 0))
+ {
+ vec_add1 (callback_vector, t->user_handle);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, 0xfe, t->user_handle,
+ t - tw->timers);
+#endif
+ pool_put (tw->timers, t);
+ }
+ else /* typical case */
+ {
+ /* Add to fast ring */
+ ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset];
+ timer_addhead (tw->timers, ts->head_index, t - tw->timers);
+#if TW_FAST_WHEEL_BITMAP
+ tw->fast_slot_bitmap =
+ clib_bitmap_set (tw->fast_slot_bitmap,
+ t->fast_ring_offset, 1);
+#endif
+ }
+ }
+ }
+#endif
+
+ /* Handle the fast ring */
+ fast_wheel_index %= TW_SLOTS_PER_RING;
+ ts = &tw->w[TW_TIMER_RING_FAST][fast_wheel_index];
+
+ head = pool_elt_at_index (tw->timers, ts->head_index);
+ next_index = head->next;
+
+ /* Make slot empty */
+ head->next = head->prev = ts->head_index;
+
+ /* Construct vector of expired timer handles to give the user */
+ while (next_index != ts->head_index)
+ {
+ t = pool_elt_at_index (tw->timers, next_index);
+ next_index = t->next;
+ vec_add1 (callback_vector, t->user_handle);
+#if TW_START_STOP_TRACE_SIZE > 0
+ TW (tw_timer_trace) (tw, 0xfe, t->user_handle, t - tw->timers);
+#endif
+ pool_put (tw->timers, t);
+ }
+
+ /* If any timers expired, tell the user */
+ if (callback_vector_arg == 0 && vec_len (callback_vector))
+ {
+ /* The callback is optional. We return the u32 * handle vector */
+ if (tw->expired_timer_callback)
+ tw->expired_timer_callback (callback_vector);
+ tw->expired_timer_handles = callback_vector;
+ }
+
+#if TW_FAST_WHEEL_BITMAP
+ tw->fast_slot_bitmap = clib_bitmap_set (tw->fast_slot_bitmap,
+ fast_wheel_index, 0);
+#endif
+
+ tw->current_tick++;
+ fast_wheel_index++;
+ tw->current_index[TW_TIMER_RING_FAST] = fast_wheel_index;
+
+#if TW_TIMER_WHEELS > 1
+ if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING))
+ slow_wheel_index++;
+ tw->current_index[TW_TIMER_RING_SLOW] = slow_wheel_index;
+#endif
+
+#if TW_TIMER_WHEELS > 2
+ if (PREDICT_FALSE (slow_wheel_index == TW_SLOTS_PER_RING))
+ glacier_wheel_index++;
+ tw->current_index[TW_TIMER_RING_GLACIER] = glacier_wheel_index;
+#endif
+
+ if (vec_len (callback_vector) >= tw->max_expirations)
+ break;
+ }
+
+ if (callback_vector_arg == 0)
+ tw->expired_timer_handles = callback_vector;
+
+ tw->last_run_time += i * tw->timer_interval;
+ return callback_vector;
+}
+
+u32 *TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now)
+{
+ return TW (tw_timer_expire_timers_internal) (tw, now, 0 /* no vector */ );
+}
+
+u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now,
+ u32 * vec)
+{
+ return TW (tw_timer_expire_timers_internal) (tw, now, vec);
+}
+
+#if TW_FAST_WHEEL_BITMAP
+/** Returns an approximation to the first timer expiration in
+ * timer-ticks from "now". To avoid wasting an unjustifiable
+ * amount of time on the problem, we maintain an approximate fast-wheel slot
+ * occupancy bitmap. We don't worry about clearing fast wheel bits
+ * when timers are removed from fast wheel slots.
+ */
+
+u32 TW (tw_timer_first_expires_in_ticks) (TWT (tw_timer_wheel) * tw)
+{
+ u32 first_expiring_index, fast_ring_index;
+ i32 delta;
+
+ if (clib_bitmap_is_zero (tw->fast_slot_bitmap))
+ return TW_SLOTS_PER_RING;
+
+ fast_ring_index = tw->current_index[TW_TIMER_RING_FAST];
+ if (fast_ring_index == TW_SLOTS_PER_RING)
+ fast_ring_index = 0;
+
+ first_expiring_index = clib_bitmap_next_set (tw->fast_slot_bitmap,
+ fast_ring_index);
+ if (first_expiring_index == ~0 && fast_ring_index != 0)
+ first_expiring_index = clib_bitmap_first_set (tw->fast_slot_bitmap);
+
+ ASSERT (first_expiring_index != ~0);
+
+ delta = (i32) first_expiring_index - (i32) fast_ring_index;
+ if (delta < 0)
+ delta += TW_SLOTS_PER_RING;
+
+ ASSERT (delta >= 0);
+
+ return (u32) delta;
+}
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h
new file mode 100644
index 00000000..0217644d
--- /dev/null
+++ b/src/vppinfra/tw_timer_template.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TW_SUFFIX
+#error do not include tw_timer_template.h directly
+#endif
+
+#include <vppinfra/clib.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/bitmap.h>
+
+#ifndef _twt
+#define _twt(a,b) a##b##_t
+#define __twt(a,b) _twt(a,b)
+#define TWT(a) __twt(a,TW_SUFFIX)
+
+#define _tw(a,b) a##b
+#define __tw(a,b) _tw(a,b)
+#define TW(a) __tw(a,TW_SUFFIX)
+#endif
+
+/** @file
+ @brief TW timer template header file, do not compile directly
+
+Instantiation of tw_timer_template.h generates named structures to
+implement specific timer wheel geometries. Choices include: number of
+timer wheels (currently, 1 or 2), number of slots per ring (a power of
+two), and the number of timers per "object handle".
+
+Internally, user object/timer handles are 32-bit integers, so if one
+selects 16 timers/object (4 bits), the resulting timer wheel handle is
+limited to 2**28 objects.
+
+Here are the specific settings required to generate a single 2048 slot
+wheel which supports 2 timers per object:
+
+ #define TW_TIMER_WHEELS 1
+ #define TW_SLOTS_PER_RING 2048
+ #define TW_RING_SHIFT 11
+ #define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+ #define TW_TIMERS_PER_OBJECT 2
+ #define LOG2_TW_TIMERS_PER_OBJECT 1
+ #define TW_SUFFIX _2t_1w_2048sl
+
+See tw_timer_2t_1w_2048sl.h for a complete
+example.
+
+tw_timer_template.h is not intended to be #included directly. Client
+codes can include multiple timer geometry header files, although
+extreme caution would required to use the TW and TWT macros in such a
+case.
+
+API usage example:
+
+Initialize a two-timer, single 2048-slot wheel w/ a 1-second
+timer granularity:
+
+ tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel,
+ expired_timer_single_callback,
+ 1.0 / * timer interval * / );
+
+Start a timer:
+
+ handle = tw_timer_start_2t_1w_2048sl (&tm->single_wheel, elt_index,
+ [0 | 1] / * timer id * / ,
+ expiration_time_in_u32_ticks);
+
+Stop a timer:
+
+ tw_timer_stop_2t_1w_2048sl (&tm->single_wheel, handle);
+
+Expired timer callback:
+
+ static void
+ expired_timer_single_callback (u32 * expired_timers)
+ {
+ int i;
+ u32 pool_index, timer_id;
+ tw_timer_test_elt_t *e;
+ tw_timer_test_main_t *tm = &tw_timer_test_main;
+
+ for (i = 0; i < vec_len (expired_timers);
+ {
+ pool_index = expired_timers[i] & 0x7FFFFFFF;
+ timer_id = expired_timers[i] >> 31;
+
+ ASSERT (timer_id == 1);
+
+ e = pool_elt_at_index (tm->test_elts, pool_index);
+
+ if (e->expected_to_expire != tm->single_wheel.current_tick)
+ {
+ fformat (stdout, "[%d] expired at %d not %d\n",
+ e - tm->test_elts, tm->single_wheel.current_tick,
+ e->expected_to_expire);
+ }
+ pool_put (tm->test_elts, e);
+ }
+ }
+ */
+
+#if (TW_TIMER_WHEELS != 1 && TW_TIMER_WHEELS != 2 && TW_TIMER_WHEELS != 3)
+#error TW_TIMER_WHEELS must be 1, 2 or 3
+#endif
+
+typedef struct
+{
+ /** next, previous pool indices */
+ u32 next;
+ u32 prev;
+
+ union
+ {
+ struct
+ {
+#if (TW_TIMER_WHEELS == 3)
+ /** fast ring offset, only valid in the slow ring */
+ u16 fast_ring_offset;
+ /** slow ring offset, only valid in the glacier ring */
+ u16 slow_ring_offset;
+#endif
+#if (TW_TIMER_WHEELS == 2)
+ /** fast ring offset, only valid in the slow ring */
+ u16 fast_ring_offset;
+ /** slow ring offset, only valid in the glacier ring */
+ u16 pad;
+#endif
+ };
+
+#if (TW_OVERFLOW_VECTOR > 0)
+ u64 expiration_time;
+#endif
+ };
+
+ /** user timer handle */
+ u32 user_handle;
+} TWT (tw_timer);
+
+/*
+ * These structures ar used by all geometries,
+ * so they need a private #include block...
+ */
+#ifndef __defined_tw_timer_wheel_slot__
+#define __defined_tw_timer_wheel_slot__
+typedef struct
+{
+ /** Listhead of timers which expire in this interval */
+ u32 head_index;
+} tw_timer_wheel_slot_t;
+typedef enum
+{
+ /** Fast timer ring ID */
+ TW_TIMER_RING_FAST,
+ /** Slow timer ring ID */
+ TW_TIMER_RING_SLOW,
+ /** Glacier ring ID */
+ TW_TIMER_RING_GLACIER,
+} tw_ring_index_t;
+#endif /* __defined_tw_timer_wheel_slot__ */
+
+typedef CLIB_PACKED (struct
+ {
+ u8 timer_id;
+ u32 pool_index;
+ u32 handle;
+ }) TWT (trace);
+
+typedef struct
+{
+ /** Timer pool */
+ TWT (tw_timer) * timers;
+
+ /** Next time the wheel should run */
+ f64 next_run_time;
+
+ /** Last time the wheel ran */
+ f64 last_run_time;
+
+ /** Timer ticks per second */
+ f64 ticks_per_second;
+
+ /** Timer interval, also needed to avoid fp divide in speed path */
+ f64 timer_interval;
+
+ /** current tick */
+ u64 current_tick;
+
+ /** first expiration time */
+ u64 first_expires_tick;
+
+ /** current wheel indices */
+ u32 current_index[TW_TIMER_WHEELS];
+
+ /** wheel arrays */
+ tw_timer_wheel_slot_t w[TW_TIMER_WHEELS][TW_SLOTS_PER_RING];
+
+#if TW_OVERFLOW_VECTOR > 0
+ tw_timer_wheel_slot_t overflow;
+#endif
+
+#if TW_FAST_WHEEL_BITMAP > 0
+ /** Fast wheel slot occupancy bitmap */
+ uword *fast_slot_bitmap;
+#endif
+
+ /** expired timer callback, receives a vector of handles */
+ void (*expired_timer_callback) (u32 * expired_timer_handles);
+
+ /** vectors of expired timers */
+ u32 *expired_timer_handles;
+
+ /** maximum expirations */
+ u32 max_expirations;
+
+ /** current trace index */
+#if TW_START_STOP_TRACE_SIZE > 0
+ /* Start/stop/expire tracing */
+ u32 trace_index;
+ u32 trace_wrapped;
+ TWT (trace) traces[TW_START_STOP_TRACE_SIZE];
+#endif
+
+} TWT (tw_timer_wheel);
+
+u32 TW (tw_timer_start) (TWT (tw_timer_wheel) * tw,
+ u32 pool_index, u32 timer_id, u64 interval);
+
+void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle);
+
+void TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw,
+ void *expired_timer_callback,
+ f64 timer_interval, u32 max_expirations);
+
+void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw);
+
+u32 *TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now);
+u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now,
+ u32 * vec);
+#if TW_FAST_WHEEL_BITMAP
+u32 TW (tw_timer_first_expires_in_ticks) (TWT (tw_timer_wheel) * tw);
+#endif
+
+#if TW_START_STOP_TRACE_SIZE > 0
+void TW (tw_search_trace) (TWT (tw_timer_wheel) * tw, u32 handle);
+void TW (tw_timer_trace) (TWT (tw_timer_wheel) * tw, u32 timer_id,
+ u32 pool_index, u32 handle);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h
new file mode 100644
index 00000000..f87bb48c
--- /dev/null
+++ b/src/vppinfra/types.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001-2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_types_h
+#define included_clib_types_h
+
+/* Standard CLIB types. */
+
+/* Define signed and unsigned 8, 16, 32, and 64 bit types
+ and machine signed/unsigned word for all architectures. */
+typedef char i8;
+typedef short i16;
+
+/* Avoid conflicts with Linux asm/types.h when __KERNEL__ */
+#if defined(CLIB_LINUX_KERNEL)
+/* Linux also defines u8/u16/u32/u64 types. */
+#include <asm/types.h>
+#define CLIB_AVOID_CLASH_WITH_LINUX_TYPES
+
+#else /* ! CLIB_LINUX_KERNEL */
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+#endif /* ! CLIB_LINUX_KERNEL */
+
+#if defined (__x86_64__)
+#ifndef __COVERITY__
+typedef int i128 __attribute__ ((mode (TI)));
+typedef unsigned int u128 __attribute__ ((mode (TI)));
+#endif
+#endif
+
+#if (defined(i386) || defined(_mips) || defined(powerpc) || defined (__SPU__) || defined(__sparc__) || defined(__arm__) || defined (__xtensa__) || defined(__TMS320C6X__))
+typedef int i32;
+typedef long long i64;
+
+#ifndef CLIB_AVOID_CLASH_WITH_LINUX_TYPES
+typedef unsigned int u32;
+typedef unsigned long long u64;
+#endif /* CLIB_AVOID_CLASH_WITH_LINUX_TYPES */
+
+#elif defined(_mips) && __mips == 64
+#define log2_uword_bits 6
+#define clib_address_bits _MIPS_SZPTR
+
+#elif defined(alpha) || defined(__x86_64__) || defined (__powerpc64__) || defined (__aarch64__)
+typedef int i32;
+typedef long i64;
+
+#define log2_uword_bits 6
+#define clib_address_bits 64
+
+#ifndef CLIB_AVOID_CLASH_WITH_LINUX_TYPES
+typedef unsigned int u32;
+typedef unsigned long u64;
+#endif /* CLIB_AVOID_CLASH_WITH_LINUX_TYPES */
+
+#else
+#error "can't define types"
+#endif
+
+/* Default to 32 bit machines with 32 bit addresses. */
+#ifndef log2_uword_bits
+#define log2_uword_bits 5
+#endif
+
+/* #ifdef's above define log2_uword_bits. */
+#define uword_bits (1 << log2_uword_bits)
+
+#ifndef clib_address_bits
+#define clib_address_bits 32
+#endif
+
+/* Word types. */
+#if uword_bits == 64
+/* 64 bit word machines. */
+typedef i64 word;
+typedef u64 uword;
+#else
+/* 32 bit word machines. */
+typedef i32 word;
+typedef u32 uword;
+#endif
+
+/* integral type of a pointer (used to cast pointers). */
+#if clib_address_bits == 64
+typedef u64 clib_address_t;
+#else
+typedef u32 clib_address_t;
+#endif
+
+/* These are needed to convert between pointers and machine words.
+ MIPS is currently the only machine that can have different sized
+ pointers and machine words (but only when compiling with 64 bit
+ registers and 32 bit pointers). */
+static inline __attribute__ ((always_inline)) uword
+pointer_to_uword (const void *p)
+{
+ return (uword) (clib_address_t) p;
+}
+
+#define uword_to_pointer(u,type) ((type) (clib_address_t) (u))
+
+/* Any type: can be either word or pointer. */
+typedef word any;
+
+/* Floating point types. */
+typedef double f64;
+typedef float f32;
+
+typedef __complex__ float cf32;
+typedef __complex__ double cf64;
+
+/* Floating point word size. */
+typedef f64 fword;
+
+/* Can be used as either {r,l}value, e.g. these both work
+ clib_mem_unaligned (p, u64) = 99
+ clib_mem_unaligned (p, u64) += 99 */
+
+#define clib_mem_unaligned(pointer,type) \
+ (((struct { CLIB_PACKED (type _data); } *) (pointer))->_data)
+
+/* Access memory with specified alignment depending on align argument.
+ As with clib_mem_unaligned, may be used as {r,l}value. */
+#define clib_mem_aligned(addr,type,align) \
+ (((struct { \
+ type _data \
+ __attribute__ ((aligned (align), packed)); \
+ } *) (addr))->_data)
+
+#endif /* included_clib_types_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/unformat.c b/src/vppinfra/unformat.c
new file mode 100644
index 00000000..5b17562f
--- /dev/null
+++ b/src/vppinfra/unformat.c
@@ -0,0 +1,1083 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+
+/* Call user's function to fill input buffer. */
+uword
+_unformat_fill_input (unformat_input_t * i)
+{
+ uword l, first_mark;
+
+ if (i->index == UNFORMAT_END_OF_INPUT)
+ return i->index;
+
+ first_mark = l = vec_len (i->buffer);
+ if (vec_len (i->buffer_marks) > 0)
+ first_mark = i->buffer_marks[0];
+
+ /* Re-use buffer when no marks. */
+ if (first_mark > 0)
+ vec_delete (i->buffer, first_mark, 0);
+
+ i->index = vec_len (i->buffer);
+ for (l = 0; l < vec_len (i->buffer_marks); l++)
+ i->buffer_marks[l] -= first_mark;
+
+ /* Call user's function to fill the buffer. */
+ if (i->fill_buffer)
+ i->index = i->fill_buffer (i);
+
+ /* If input pointer is still beyond end of buffer even after
+ fill then we've run out of input. */
+ if (i->index >= vec_len (i->buffer))
+ i->index = UNFORMAT_END_OF_INPUT;
+
+ return i->index;
+}
+
+always_inline uword
+is_white_space (uword c)
+{
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/* Format function for dumping input stream. */
+u8 *
+format_unformat_error (u8 * s, va_list * va)
+{
+ unformat_input_t *i = va_arg (*va, unformat_input_t *);
+ uword l = vec_len (i->buffer);
+
+ /* Only show so much of the input buffer (it could be really large). */
+ uword n_max = 30;
+
+ if (i->index < l)
+ {
+ uword n = l - i->index;
+ u8 *p, *p_end;
+
+ p = i->buffer + i->index;
+ p_end = p + (n > n_max ? n_max : n);
+
+ /* Skip white space at end. */
+ if (n <= n_max)
+ {
+ while (p_end > p && is_white_space (p_end[-1]))
+ p_end--;
+ }
+
+ while (p < p_end)
+ {
+ switch (*p)
+ {
+ case '\r':
+ vec_add (s, "\\r", 2);
+ break;
+ case '\n':
+ vec_add (s, "\\n", 2);
+ break;
+ case '\t':
+ vec_add (s, "\\t", 2);
+ break;
+ default:
+ vec_add1 (s, *p);
+ break;
+ }
+ p++;
+ }
+
+ if (n > n_max)
+ vec_add (s, "...", 3);
+ }
+
+ return s;
+}
+
+/* Print everything: not just error context. */
+u8 *
+format_unformat_input (u8 * s, va_list * va)
+{
+ unformat_input_t *i = va_arg (*va, unformat_input_t *);
+ uword l, n;
+
+ if (i->index == UNFORMAT_END_OF_INPUT)
+ s = format (s, "{END_OF_INPUT}");
+ else
+ {
+ l = vec_len (i->buffer);
+ n = l - i->index;
+ if (n > 0)
+ vec_add (s, i->buffer + i->index, n);
+ }
+
+ return s;
+}
+
+#if CLIB_DEBUG > 0
+void
+di (unformat_input_t * i)
+{
+ fformat (stderr, "%U\n", format_unformat_input, i);
+}
+#endif
+
+/* Parse delimited vector string. If string starts with { then string
+ is delimited by balenced parenthesis. Other string is delimited by
+ white space. {} were chosen since they are special to the shell. */
+static uword
+unformat_string (unformat_input_t * input,
+ uword delimiter_character,
+ uword format_character, va_list * va)
+{
+ u8 **string_return = va_arg (*va, u8 **);
+ u8 *s = 0;
+ word paren = 0;
+ word is_paren_delimited = 0;
+ word backslash = 0;
+ uword c;
+
+ switch (delimiter_character)
+ {
+ case '%':
+ case ' ':
+ case '\t':
+ delimiter_character = 0;
+ break;
+ }
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ word add_to_vector;
+
+ /* Null return string means to skip over delimited input. */
+ add_to_vector = string_return != 0;
+
+ if (backslash)
+ backslash = 0;
+ else
+ switch (c)
+ {
+ case '\\':
+ backslash = 1;
+ add_to_vector = 0;
+ break;
+
+ case '{':
+ if (paren == 0 && vec_len (s) == 0)
+ {
+ is_paren_delimited = 1;
+ add_to_vector = 0;
+ }
+ paren++;
+ break;
+
+ case '}':
+ paren--;
+ if (is_paren_delimited && paren == 0)
+ goto done;
+ break;
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (!is_paren_delimited)
+ {
+ unformat_put_input (input);
+ goto done;
+ }
+ break;
+
+ default:
+ if (!is_paren_delimited && c == delimiter_character)
+ {
+ unformat_put_input (input);
+ goto done;
+ }
+ }
+
+ if (add_to_vector)
+ vec_add1 (s, c);
+ }
+
+done:
+ if (string_return)
+ {
+ /* Match the string { END-OF-INPUT as a single brace. */
+ if (c == UNFORMAT_END_OF_INPUT && vec_len (s) == 0 && paren == 1)
+ vec_add1 (s, '{');
+
+ /* Don't match null string. */
+ if (c == UNFORMAT_END_OF_INPUT && vec_len (s) == 0)
+ return 0;
+
+ /* Null terminate C string. */
+ if (format_character == 's')
+ vec_add1 (s, 0);
+
+ *string_return = s;
+ }
+ else
+ vec_free (s); /* just to make sure */
+
+ return 1;
+}
+
+uword
+unformat_hex_string (unformat_input_t * input, va_list * va)
+{
+ u8 **hexstring_return = va_arg (*va, u8 **);
+ u8 *s;
+ uword n, d, c;
+
+ n = 0;
+ d = 0;
+ s = 0;
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (c >= '0' && c <= '9')
+ d = 16 * d + c - '0';
+ else if (c >= 'a' && c <= 'f')
+ d = 16 * d + 10 + c - 'a';
+ else if (c >= 'A' && c <= 'F')
+ d = 16 * d + 10 + c - 'A';
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+ n++;
+
+ if (n == 2)
+ {
+ vec_add1 (s, d);
+ n = d = 0;
+ }
+ }
+
+ /* Hex string must have even number of digits. */
+ if (n % 2)
+ {
+ vec_free (s);
+ return 0;
+ }
+ /* Make sure something was processed. */
+ else if (s == 0)
+ {
+ return 0;
+ }
+
+ *hexstring_return = s;
+ return 1;
+}
+
+/* unformat (input "foo%U", unformat_eof) matches terminal foo only */
+uword
+unformat_eof (unformat_input_t * input, va_list * va)
+{
+ return unformat_check_input (input) == UNFORMAT_END_OF_INPUT;
+}
+
+/* Parse a token containing given set of characters. */
+uword
+unformat_token (unformat_input_t * input, va_list * va)
+{
+ u8 *token_chars = va_arg (*va, u8 *);
+ u8 **string_return = va_arg (*va, u8 **);
+ u8 *s, map[256];
+ uword i, c;
+
+ if (!token_chars)
+ token_chars = (u8 *) "a-zA-Z0-9_";
+
+ memset (map, 0, sizeof (map));
+ for (s = token_chars; *s;)
+ {
+ /* Parse range. */
+ if (s[0] < s[2] && s[1] == '-')
+ {
+ for (i = s[0]; i <= s[2]; i++)
+ map[i] = 1;
+ s = s + 3;
+ }
+ else
+ {
+ map[s[0]] = 1;
+ s = s + 1;
+ }
+ }
+
+ s = 0;
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!map[c])
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ vec_add1 (s, c);
+ }
+
+ if (vec_len (s) == 0)
+ return 0;
+
+ *string_return = s;
+ return 1;
+}
+
+/* Unformat (parse) function which reads a %s string and converts it
+ to and unformat_input_t. */
+uword
+unformat_input (unformat_input_t * i, va_list * args)
+{
+ unformat_input_t *sub_input = va_arg (*args, unformat_input_t *);
+ u8 *s;
+
+ if (unformat (i, "%v", &s))
+ {
+ unformat_init_vector (sub_input, s);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Parse a line ending with \n and return it. */
+uword
+unformat_line (unformat_input_t * i, va_list * va)
+{
+ u8 *line = 0, **result = va_arg (*va, u8 **);
+ uword c;
+
+ while ((c = unformat_get_input (i)) != '\n' && c != UNFORMAT_END_OF_INPUT)
+ {
+ vec_add1 (line, c);
+ }
+
+ *result = line;
+ return vec_len (line);
+}
+
+/* Parse a line ending with \n and return it as an unformat_input_t. */
+uword
+unformat_line_input (unformat_input_t * i, va_list * va)
+{
+ unformat_input_t *result = va_arg (*va, unformat_input_t *);
+ u8 *line;
+ if (!unformat_user (i, unformat_line, &line))
+ return 0;
+ unformat_init_vector (result, line);
+ return 1;
+}
+
+/* Values for is_signed. */
+#define UNFORMAT_INTEGER_SIGNED 1
+#define UNFORMAT_INTEGER_UNSIGNED 0
+
+static uword
+unformat_integer (unformat_input_t * input,
+ va_list * va, uword base, uword is_signed, uword data_bytes)
+{
+ uword c, digit;
+ uword value = 0;
+ uword n_digits = 0;
+ uword n_input = 0;
+ uword sign = 0;
+
+ /* We only support bases <= 64. */
+ if (base < 2 || base > 64)
+ goto error;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case '-':
+ if (n_input == 0)
+ {
+ if (is_signed)
+ {
+ sign = 1;
+ goto next_digit;
+ }
+ else
+ /* Leading sign for unsigned number. */
+ goto error;
+ }
+ /* Sign after input (e.g. 100-200). */
+ goto put_input_done;
+
+ case '+':
+ if (n_input > 0)
+ goto put_input_done;
+ sign = 0;
+ goto next_digit;
+
+ case '0' ... '9':
+ digit = c - '0';
+ break;
+
+ case 'a' ... 'z':
+ digit = 10 + (c - 'a');
+ break;
+
+ case 'A' ... 'Z':
+ digit = 10 + (base >= 36 ? 26 : 0) + (c - 'A');
+ break;
+
+ case '/':
+ digit = 62;
+ break;
+
+ case '?':
+ digit = 63;
+ break;
+
+ default:
+ goto put_input_done;
+ }
+
+ if (digit >= base)
+ {
+ put_input_done:
+ unformat_put_input (input);
+ goto done;
+ }
+
+ {
+ uword new_value = base * value + digit;
+
+ /* Check for overflow. */
+ if (new_value < value)
+ goto error;
+ value = new_value;
+ }
+ n_digits += 1;
+
+ next_digit:
+ n_input++;
+ }
+
+done:
+ if (sign)
+ value = -value;
+
+ if (n_digits > 0)
+ {
+ void *v = va_arg (*va, void *);
+
+ if (data_bytes == ~0)
+ data_bytes = sizeof (int);
+
+ switch (data_bytes)
+ {
+ case 1:
+ *(u8 *) v = value;
+ break;
+ case 2:
+ *(u16 *) v = value;
+ break;
+ case 4:
+ *(u32 *) v = value;
+ break;
+ case 8:
+ *(u64 *) v = value;
+ break;
+ default:
+ goto error;
+ }
+
+ return 1;
+ }
+
+error:
+ return 0;
+}
+
+/* Return x 10^n */
+static f64
+times_power_of_ten (f64 x, int n)
+{
+ if (n >= 0)
+ {
+ static f64 t[8] = { 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, };
+ while (n >= 8)
+ {
+ x *= 1e+8;
+ n -= 8;
+ }
+ return x * t[n];
+ }
+ else
+ {
+ static f64 t[8] = { 1e-0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, };
+ while (n <= -8)
+ {
+ x *= 1e-8;
+ n += 8;
+ }
+ return x * t[-n];
+ }
+
+}
+
+static uword
+unformat_float (unformat_input_t * input, va_list * va)
+{
+ uword c;
+ u64 values[3];
+ uword n_digits[3], value_index = 0;
+ uword signs[2], sign_index = 0;
+ uword n_input = 0;
+
+ memset (values, 0, sizeof (values));
+ memset (n_digits, 0, sizeof (n_digits));
+ memset (signs, 0, sizeof (signs));
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case '-':
+ if (value_index == 2 && n_digits[2] == 0)
+ /* sign of exponent: it's ok. */ ;
+
+ else if (value_index < 2 && n_digits[0] > 0)
+ {
+ /* 123- */
+ unformat_put_input (input);
+ goto done;
+ }
+
+ else if (n_input > 0)
+ goto error;
+
+ signs[sign_index++] = 1;
+ goto next_digit;
+
+ case '+':
+ if (value_index == 2 && n_digits[2] == 0)
+ /* sign of exponent: it's ok. */ ;
+
+ else if (value_index < 2 && n_digits[0] > 0)
+ {
+ /* 123+ */
+ unformat_put_input (input);
+ goto done;
+ }
+
+ else if (n_input > 0)
+ goto error;
+ signs[sign_index++] = 0;
+ goto next_digit;
+
+ case 'e':
+ case 'E':
+ if (n_input == 0)
+ goto error;
+ value_index = 2;
+ sign_index = 1;
+ break;
+
+ case '.':
+ if (value_index > 0)
+ goto error;
+ value_index = 1;
+ break;
+
+ case '0' ... '9':
+ {
+ u64 tmp;
+
+ tmp = values[value_index] * 10 + c - '0';
+
+ /* Check for overflow. */
+ if (tmp < values[value_index])
+ goto error;
+ values[value_index] = tmp;
+ n_digits[value_index] += 1;
+ }
+ break;
+
+ default:
+ unformat_put_input (input);
+ goto done;
+ }
+
+ next_digit:
+ n_input++;
+ }
+
+done:
+ {
+ f64 f_values[2], *value_return;
+ word expon;
+
+ /* Must have either whole or fraction digits. */
+ if (n_digits[0] + n_digits[1] <= 0)
+ goto error;
+
+ f_values[0] = values[0];
+ if (signs[0])
+ f_values[0] = -f_values[0];
+
+ f_values[1] = values[1];
+ f_values[1] = times_power_of_ten (f_values[1], -n_digits[1]);
+
+ f_values[0] += f_values[1];
+
+ expon = values[2];
+ if (signs[1])
+ expon = -expon;
+
+ f_values[0] = times_power_of_ten (f_values[0], expon);
+
+ value_return = va_arg (*va, f64 *);
+ *value_return = f_values[0];
+ return 1;
+ }
+
+error:
+ return 0;
+}
+
+static const char *
+match_input_with_format (unformat_input_t * input, const char *f)
+{
+ uword cf, ci;
+
+ ASSERT (*f != 0);
+
+ while (1)
+ {
+ cf = *f;
+ if (cf == 0 || cf == '%' || cf == ' ')
+ break;
+ f++;
+
+ ci = unformat_get_input (input);
+
+ if (cf != ci)
+ return 0;
+ }
+ return f;
+}
+
+static const char *
+do_percent (unformat_input_t * input, va_list * va, const char *f)
+{
+ uword cf, n, data_bytes = ~0;
+
+ cf = *f++;
+
+ switch (cf)
+ {
+ default:
+ break;
+
+ case 'w':
+ /* Word types. */
+ cf = *f++;
+ data_bytes = sizeof (uword);
+ break;
+
+ case 'l':
+ cf = *f++;
+ if (cf == 'l')
+ {
+ cf = *f++;
+ data_bytes = sizeof (long long);
+ }
+ else
+ {
+ data_bytes = sizeof (long);
+ }
+ break;
+
+ case 'L':
+ cf = *f++;
+ data_bytes = sizeof (long long);
+ break;
+ }
+
+ n = 0;
+ switch (cf)
+ {
+ case 'D':
+ data_bytes = va_arg (*va, int);
+ case 'd':
+ n = unformat_integer (input, va, 10,
+ UNFORMAT_INTEGER_SIGNED, data_bytes);
+ break;
+
+ case 'u':
+ n = unformat_integer (input, va, 10,
+ UNFORMAT_INTEGER_UNSIGNED, data_bytes);
+ break;
+
+ case 'b':
+ n = unformat_integer (input, va, 2,
+ UNFORMAT_INTEGER_UNSIGNED, data_bytes);
+ break;
+
+ case 'o':
+ n = unformat_integer (input, va, 8,
+ UNFORMAT_INTEGER_UNSIGNED, data_bytes);
+ break;
+
+ case 'X':
+ data_bytes = va_arg (*va, int);
+ case 'x':
+ n = unformat_integer (input, va, 16,
+ UNFORMAT_INTEGER_UNSIGNED, data_bytes);
+ break;
+
+ case 'f':
+ n = unformat_float (input, va);
+ break;
+
+ case 's':
+ case 'v':
+ n = unformat_string (input, f[0], cf, va);
+ break;
+
+ case 'U':
+ {
+ unformat_function_t *f = va_arg (*va, unformat_function_t *);
+ n = f (input, va);
+ }
+ break;
+
+ case '=':
+ case '|':
+ {
+ int *var = va_arg (*va, int *);
+ uword val = va_arg (*va, int);
+
+ if (cf == '|')
+ val |= *var;
+ *var = val;
+ n = 1;
+ }
+ break;
+ }
+
+ return n ? f : 0;
+}
+
+uword
+unformat_skip_white_space (unformat_input_t * input)
+{
+ uword n = 0;
+ uword c;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_white_space (c))
+ {
+ unformat_put_input (input);
+ break;
+ }
+ n++;
+ }
+ return n;
+}
+
+uword
+va_unformat (unformat_input_t * input, const char *fmt, va_list * va)
+{
+ const char *f;
+ uword input_matches_format;
+ uword default_skip_input_white_space;
+ uword n_input_white_space_skipped;
+ uword last_non_white_space_match_percent;
+ uword last_non_white_space_match_format;
+
+ vec_add1_aligned (input->buffer_marks, input->index,
+ sizeof (input->buffer_marks[0]));
+
+ f = fmt;
+ default_skip_input_white_space = 1;
+ input_matches_format = 0;
+ last_non_white_space_match_percent = 0;
+ last_non_white_space_match_format = 0;
+
+ while (1)
+ {
+ char cf;
+ uword is_percent, skip_input_white_space;
+
+ cf = *f;
+ is_percent = 0;
+
+ /* Always skip input white space at start of format string.
+ Otherwise use default skip value which can be changed by %_
+ (see below). */
+ skip_input_white_space = f == fmt || default_skip_input_white_space;
+
+ /* Spaces in format request skipping input white space. */
+ if (is_white_space (cf))
+ {
+ skip_input_white_space = 1;
+
+ /* Multiple format spaces are equivalent to a single white
+ space. */
+ while (is_white_space (*++f))
+ ;
+ }
+ else if (cf == '%')
+ {
+ /* %_ toggles whether or not to skip input white space. */
+ switch (*++f)
+ {
+ case '_':
+ default_skip_input_white_space =
+ !default_skip_input_white_space;
+ f++;
+ /* For transition from skip to no-skip in middle of format
+ string, skip input white space. For example, the following matches:
+ fmt = "%_%d.%d%_->%_%d.%d%_"
+ input "1.2 -> 3.4"
+ Without this the space after -> does not get skipped. */
+ if (!default_skip_input_white_space
+ && !(f == fmt + 2 || *f == 0))
+ unformat_skip_white_space (input);
+ continue;
+
+ /* %% means match % */
+ case '%':
+ break;
+
+ /* % at end of format string. */
+ case 0:
+ goto parse_fail;
+
+ default:
+ is_percent = 1;
+ break;
+ }
+ }
+
+ n_input_white_space_skipped = 0;
+ if (skip_input_white_space)
+ n_input_white_space_skipped = unformat_skip_white_space (input);
+
+ /* End of format string. */
+ if (cf == 0)
+ {
+ /* Force parse error when format string ends and input is
+ not white or at end. As an example, this is to prevent
+ format "foo" from matching input "food".
+ The last_non_white_space_match_percent is to make
+ "foo %d" match input "foo 10,bletch" with %d matching 10. */
+ if (skip_input_white_space
+ && !last_non_white_space_match_percent
+ && !last_non_white_space_match_format
+ && n_input_white_space_skipped == 0
+ && input->index != UNFORMAT_END_OF_INPUT)
+ goto parse_fail;
+ break;
+ }
+
+ last_non_white_space_match_percent = is_percent;
+ last_non_white_space_match_format = 0;
+
+ /* Explicit spaces in format must match input white space. */
+ if (cf == ' ' && !default_skip_input_white_space)
+ {
+ if (n_input_white_space_skipped == 0)
+ goto parse_fail;
+ }
+
+ else if (is_percent)
+ {
+ if (!(f = do_percent (input, va, f)))
+ goto parse_fail;
+ }
+
+ else
+ {
+ const char *g = match_input_with_format (input, f);
+ if (!g)
+ goto parse_fail;
+ last_non_white_space_match_format = g > f;
+ f = g;
+ }
+ }
+
+ input_matches_format = 1;
+parse_fail:
+
+ /* Rewind buffer marks. */
+ {
+ uword l = vec_len (input->buffer_marks);
+
+ /* If we did not match back up buffer to last mark. */
+ if (!input_matches_format)
+ input->index = input->buffer_marks[l - 1];
+
+ _vec_len (input->buffer_marks) = l - 1;
+ }
+
+ return input_matches_format;
+}
+
+uword
+unformat (unformat_input_t * input, const char *fmt, ...)
+{
+ va_list va;
+ uword result;
+ va_start (va, fmt);
+ result = va_unformat (input, fmt, &va);
+ va_end (va);
+ return result;
+}
+
+uword
+unformat_user (unformat_input_t * input, unformat_function_t * func, ...)
+{
+ va_list va;
+ uword result, l;
+
+ /* Save place in input buffer in case parse fails. */
+ l = vec_len (input->buffer_marks);
+ vec_add1_aligned (input->buffer_marks, input->index,
+ sizeof (input->buffer_marks[0]));
+
+ va_start (va, func);
+ result = func (input, &va);
+ va_end (va);
+
+ if (!result && input->index != UNFORMAT_END_OF_INPUT)
+ input->index = input->buffer_marks[l];
+
+ _vec_len (input->buffer_marks) = l;
+
+ return result;
+}
+
+/* Setup for unformat of Unix style command line. */
+void
+unformat_init_command_line (unformat_input_t * input, char *argv[])
+{
+ uword i;
+
+ unformat_init (input, 0, 0);
+
+ /* Concatenate argument strings with space in between. */
+ for (i = 1; argv[i]; i++)
+ {
+ vec_add (input->buffer, argv[i], strlen (argv[i]));
+ if (argv[i + 1])
+ vec_add1 (input->buffer, ' ');
+ }
+}
+
+void
+unformat_init_string (unformat_input_t * input, char *string, int string_len)
+{
+ unformat_init (input, 0, 0);
+ if (string_len > 0)
+ vec_add (input->buffer, string, string_len);
+}
+
+void
+unformat_init_vector (unformat_input_t * input, u8 * vector_string)
+{
+ unformat_init (input, 0, 0);
+ input->buffer = vector_string;
+}
+
+#ifdef CLIB_UNIX
+
+static uword
+unix_file_fill_buffer (unformat_input_t * input)
+{
+ int fd = pointer_to_uword (input->fill_buffer_arg);
+ uword l, n;
+
+ l = vec_len (input->buffer);
+ vec_resize (input->buffer, 4096);
+ n = read (fd, input->buffer + l, 4096);
+ if (n > 0)
+ _vec_len (input->buffer) = l + n;
+
+ if (n <= 0)
+ return UNFORMAT_END_OF_INPUT;
+ else
+ return input->index;
+}
+
+void
+unformat_init_unix_file (unformat_input_t * input, int file_descriptor)
+{
+ unformat_init (input, unix_file_fill_buffer,
+ uword_to_pointer (file_descriptor, void *));
+}
+
+/* Take input from Unix environment variable. */
+uword
+unformat_init_unix_env (unformat_input_t * input, char *var)
+{
+ char *val = getenv (var);
+ if (val)
+ unformat_init_string (input, val, strlen (val));
+ return val != 0;
+}
+
+#endif /* CLIB_UNIX */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c
new file mode 100644
index 00000000..b09433c9
--- /dev/null
+++ b/src/vppinfra/unix-formats.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef __KERNEL__
+
+# include <linux/unistd.h>
+# include <linux/signal.h>
+
+#else /* ! __KERNEL__ */
+
+#define _GNU_SOURCE /* to get REG_* in ucontext.h */
+#include <ucontext.h>
+#undef _GNU_SOURCE
+#undef __USE_GNU
+
+#include <unistd.h>
+#include <signal.h>
+#include <grp.h>
+
+#include <time.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <math.h>
+
+#include <vppinfra/time.h>
+
+#ifdef AF_NETLINK
+#include <linux/types.h>
+#include <linux/netlink.h>
+#endif
+
+#endif /* ! __KERNEL__ */
+
+
+#ifdef __KERNEL__
+# include <linux/socket.h>
+# include <linux/in.h>
+# include <linux/ip.h>
+# include <linux/tcp.h>
+# include <linux/udp.h>
+# include <linux/icmp.h>
+# include <linux/if_ether.h>
+# include <linux/if_arp.h>
+#else
+# include <net/if.h> /* struct ifnet may live here */
+# include <netinet/in.h>
+# include <netinet/ip.h>
+# include <netinet/tcp.h>
+# include <netinet/udp.h>
+# include <netinet/ip_icmp.h>
+# include <netinet/if_ether.h>
+#endif /* __KERNEL__ */
+
+#include <vppinfra/bitops.h> /* foreach_set_bit */
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+
+/* Format unix network address family (e.g. AF_INET). */
+u8 * format_address_family (u8 * s, va_list * va)
+{
+ uword family = va_arg (*va, uword);
+ u8 * t = (u8 *) "UNKNOWN";
+ switch (family)
+ {
+#define _(x) case PF_##x: t = (u8 *) #x; break
+ _ (UNSPEC);
+ _ (UNIX); /* Unix domain sockets */
+ _ (INET); /* Internet IP Protocol */
+#ifdef PF_AX25
+ _ (AX25); /* Amateur Radio AX.25 */
+#endif
+#ifdef PF_IPX
+ _ (IPX); /* Novell IPX */
+#endif
+#ifdef PF_APPLETALK
+ _ (APPLETALK); /* AppleTalk DDP */
+#endif
+#ifdef PF_NETROM
+ _ (NETROM); /* Amateur Radio NET/ROM */
+#endif
+#ifdef PF_BRIDGE
+ _ (BRIDGE); /* Multiprotocol bridge */
+#endif
+#ifdef PF_ATMPVC
+ _ (ATMPVC); /* ATM PVCs */
+#endif
+#ifdef PF_X25
+ _ (X25); /* Reserved for X.25 project */
+#endif
+#ifdef PF_INET6
+ _ (INET6); /* IP version 6 */
+#endif
+#ifdef PF_ROSE
+ _ (ROSE); /* Amateur Radio X.25 PLP */
+#endif
+#ifdef PF_DECnet
+ _ (DECnet); /* Reserved for DECnet project */
+#endif
+#ifdef PF_NETBEUI
+ _ (NETBEUI); /* Reserved for 802.2LLC project*/
+#endif
+#ifdef PF_SECURITY
+ _ (SECURITY); /* Security callback pseudo AF */
+#endif
+#ifdef PF_KEY
+ _ (KEY); /* PF_KEY key management API */
+#endif
+#ifdef PF_NETLINK
+ _ (NETLINK);
+#endif
+#ifdef PF_PACKET
+ _ (PACKET); /* Packet family */
+#endif
+#ifdef PF_ASH
+ _ (ASH); /* Ash */
+#endif
+#ifdef PF_ECONET
+ _ (ECONET); /* Acorn Econet */
+#endif
+#ifdef PF_ATMSVC
+ _ (ATMSVC); /* ATM SVCs */
+#endif
+#ifdef PF_SNA
+ _ (SNA); /* Linux SNA Project */
+#endif
+#ifdef PF_IRDA
+ _ (IRDA); /* IRDA sockets */
+#endif
+#undef _
+ }
+ vec_add (s, t, strlen ((char *) t));
+ return s;
+}
+
+u8 * format_network_protocol (u8 * s, va_list * args)
+{
+ uword family = va_arg (*args, uword);
+ uword protocol = va_arg (*args, uword);
+
+#ifndef __KERNEL__
+ struct protoent * p = getprotobynumber (protocol);
+
+ ASSERT (family == AF_INET);
+ if (p)
+ return format (s, "%s", p->p_name);
+ else
+ return format (s, "%d", protocol);
+#else
+ return format (s, "%d/%d", family, protocol);
+#endif
+}
+
+u8 * format_network_port (u8 * s, va_list * args)
+{
+ uword proto = va_arg (*args, uword);
+ uword port = va_arg (*args, uword);
+
+#ifndef __KERNEL__
+ struct servent * p = getservbyport (port, proto == IPPROTO_UDP ? "udp" : "tcp");
+
+ if (p)
+ return format (s, "%s", p->s_name);
+ else
+ return format (s, "%d", port);
+#else
+ return format (s, "%s/%d", proto == IPPROTO_UDP ? "udp" : "tcp", port);
+#endif
+}
+
+/* Format generic network address: takes two arguments family and address.
+ Assumes network byte order. */
+u8 * format_network_address (u8 * s, va_list * args)
+{
+ uword family = va_arg (*args, uword);
+ u8 * addr = va_arg (*args, u8 *);
+
+ switch (family)
+ {
+ case AF_INET:
+ s = format (s, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
+ break;
+
+ case AF_UNSPEC:
+ /* We use AF_UNSPEC for ethernet addresses. */
+ s = format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+ break;
+
+ default:
+ clib_error ("unsupported address family %d", family);
+ }
+
+ return s;
+}
+
+u8 * format_sockaddr (u8 * s, va_list * args)
+{
+ void * v = va_arg (*args, void *);
+ struct sockaddr * sa = v;
+ static u32 local_counter;
+
+ switch (sa->sa_family)
+ {
+ case AF_INET:
+ {
+ struct sockaddr_in * i = v;
+ s = format (s, "%U:%U",
+ format_network_address, AF_INET, &i->sin_addr.s_addr,
+ format_network_port, IPPROTO_TCP, ntohs (i->sin_port));
+ }
+ break;
+
+ case AF_LOCAL:
+ {
+ /*
+ * There isn't anything useful to print.
+ * The unix cli world uses the output to make a node name,
+ * so we need to return a unique name.
+ */
+ s = format (s, "local:%u", local_counter++);
+ }
+ break;
+
+#ifndef __KERNEL__
+#ifdef AF_NETLINK
+ case AF_NETLINK:
+ {
+ struct sockaddr_nl * n = v;
+ s = format (s, "KERNEL-NETLINK");
+ if (n->nl_groups)
+ s = format (s, " (groups 0x%x)", n->nl_groups);
+ break;
+ }
+#endif
+#endif
+
+ default:
+ s = format (s, "sockaddr family %d", sa->sa_family);
+ break;
+ }
+
+ return s;
+}
+
+u8 * format_tcp4_packet (u8 * s, va_list * args)
+{
+ u8 * p = va_arg (*args, u8 *);
+ struct iphdr * ip = (void *) p;
+ struct tcphdr * tcp = (void *) (ip + 1);
+
+ s = format (s, "tcp %U:%U -> %U:%U",
+ format_network_address, AF_INET, &ip->saddr,
+ format_network_port, IPPROTO_TCP, ntohs (tcp->source),
+ format_network_address, AF_INET, &ip->daddr,
+ format_network_port, IPPROTO_TCP, ntohs (tcp->dest));
+
+ s = format (s, ", seq 0x%08x -> 0x%08x", tcp->seq, tcp->ack_seq);
+#define _(f) if (tcp->f) s = format (s, ", " #f);
+ _ (syn); _ (ack); _ (fin); _ (rst); _ (psh); _ (urg);
+#undef _
+
+ if (tcp->window)
+ s = format (s, ", window 0x%04x", tcp->window);
+ if (tcp->urg)
+ s = format (s, ", urg 0x%04x", tcp->urg_ptr);
+
+ return s;
+}
+
+u8 * format_udp4_packet (u8 * s, va_list * args)
+{
+ u8 * p = va_arg (*args, u8 *);
+ struct iphdr * ip = (void *) p;
+ struct udphdr * udp = (void *) (ip + 1);
+
+ s = format (s, "udp %U:%U -> %U:%U",
+ format_network_address, AF_INET, &ip->saddr,
+ format_network_port, IPPROTO_UDP, ntohs (udp->source),
+ format_network_address, AF_INET, &ip->daddr,
+ format_network_port, IPPROTO_UDP, ntohs (udp->dest));
+
+ return s;
+}
+
+u8 * format_icmp4_type_and_code (u8 * s, va_list * args)
+{
+ uword icmp_type = va_arg (*args, uword);
+ uword icmp_code = va_arg (*args, uword);
+
+ switch (icmp_type)
+ {
+#define _(f,str) case ICMP_##f: s = format (s, str); break;
+ _ (ECHOREPLY, "echo reply");
+ _ (DEST_UNREACH, "unreachable");
+ _ (SOURCE_QUENCH, "source quench");
+ _ (REDIRECT, "redirect");
+ _ (ECHO, "echo request");
+ _ (TIME_EXCEEDED, "time exceeded");
+ _ (PARAMETERPROB, "parameter problem");
+ _ (TIMESTAMP, "timestamp request");
+ _ (TIMESTAMPREPLY, "timestamp reply");
+ _ (INFO_REQUEST, "information request");
+ _ (INFO_REPLY, "information reply");
+ _ (ADDRESS, "address mask request");
+ _ (ADDRESSREPLY, "address mask reply");
+#undef _
+ default:
+ s = format (s, "unknown type 0x%x", icmp_type);
+ }
+
+ if (icmp_type == ICMP_DEST_UNREACH)
+ {
+ switch (icmp_code)
+ {
+#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
+ _ (NET_UNREACH, "network");
+ _ (HOST_UNREACH, "host");
+ _ (PROT_UNREACH, "protocol");
+ _ (PORT_UNREACH, "port");
+ _ (FRAG_NEEDED, ": fragmentation needed/DF set");
+ _ (SR_FAILED, "source route failed");
+ _ (NET_UNKNOWN, "network unknown");
+ _ (HOST_UNKNOWN, "host unknown");
+ _ (HOST_ISOLATED, "host isolated");
+ _ (NET_ANO, "network: admin. prohibited");
+ _ (HOST_ANO, "host: admin. prohibited");
+ _ (NET_UNR_TOS, "network for type-of-service");
+ _ (HOST_UNR_TOS, "host for type-of-service");
+ _ (PKT_FILTERED, ": packet filtered");
+ _ (PREC_VIOLATION, "precedence violation");
+ _ (PREC_CUTOFF, "precedence cut off");
+#undef _
+ default:
+ s = format (s, "unknown code 0x%x", icmp_code);
+ }
+ }
+ else if (icmp_type == ICMP_REDIRECT)
+ {
+ switch (icmp_code)
+ {
+#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
+ _ (REDIR_NET, "network");
+ _ (REDIR_HOST, "host");
+ _ (REDIR_NETTOS, "network for type-of-service");
+ _ (REDIR_HOSTTOS, "host for type-of-service");
+#undef _
+ default:
+ s = format (s, "unknown code 0x%x", icmp_code);
+ }
+ }
+ else if (icmp_type == ICMP_TIME_EXCEEDED)
+ {
+ switch (icmp_code)
+ {
+#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
+ _ (EXC_TTL, "time-to-live zero in transit");
+ _ (EXC_FRAGTIME, "time-to-live zero during reassembly");
+#undef _
+ default:
+ s = format (s, "unknown code 0x%x", icmp_code);
+ }
+ }
+
+ return s;
+}
+
+typedef struct {
+ u8 type;
+ u8 code;
+ u16 checksum;
+} icmp4_t;
+
+u8 * format_icmp4_packet (u8 * s, va_list * args)
+{
+ u8 * p = va_arg (*args, u8 *);
+ struct iphdr * ip = (void *) p;
+ icmp4_t * icmp = (void *) (ip + 1);
+ s = format (s, "icmp %U %U -> %U",
+ format_icmp4_type_and_code, icmp->type, icmp->code,
+ format_network_address, AF_INET, &ip->saddr,
+ format_network_address, AF_INET, &ip->daddr);
+
+ return s;
+}
+
+u8 * format_ip4_tos_byte (u8 * s, va_list * args)
+{
+ uword tos = va_arg (*args, uword);
+
+ if (tos & IPTOS_LOWDELAY)
+ s = format (s, "minimize-delay, ");
+ if (tos & IPTOS_MINCOST)
+ s = format (s, "minimize-cost, ");
+ if (tos & IPTOS_THROUGHPUT)
+ s = format (s, "maximize-throughput, ");
+ if (tos & IPTOS_RELIABILITY)
+ s = format (s, "maximize-reliability, ");
+
+ switch (IPTOS_PREC (tos))
+ {
+#define _(x,y) case IPTOS_PREC_##x: s = format (s, y); break
+ _ (NETCONTROL, "network");
+ _ (INTERNETCONTROL, "internet");
+ _ (CRITIC_ECP, "critical");
+ _ (FLASH, "flash");
+ _ (FLASHOVERRIDE, "flash-override");
+ _ (IMMEDIATE, "immediate");
+ _ (PRIORITY, "priority");
+ _ (ROUTINE, "routine");
+#undef _
+ }
+
+ return s;
+}
+
+u8 * format_ip4_packet (u8 * s, va_list * args)
+{
+ u8 * p = va_arg (*args, u8 *);
+ struct iphdr * ip = (void *) p;
+
+ static format_function_t * f[256];
+
+ if (! f[IPPROTO_TCP])
+ {
+ f[IPPROTO_TCP] = format_tcp4_packet;
+ f[IPPROTO_UDP] = format_udp4_packet;
+ f[IPPROTO_ICMP] = format_icmp4_packet;
+ }
+
+ if (f[ip->protocol])
+ return format (s, "%U", f[ip->protocol], p);
+
+ s = format (s, "%U: %U -> %U",
+ format_network_protocol, AF_INET, ip->protocol,
+ format_network_address, AF_INET, &ip->saddr,
+ format_network_address, AF_INET, &ip->daddr);
+
+ return s;
+}
+
+#define foreach_unix_arphrd_type \
+ _ (NETROM, 0) \
+ _ (ETHER, 1) \
+ _ (EETHER, 2) \
+ _ (AX25, 3) \
+ _ (PRONET, 4) \
+ _ (CHAOS, 5) \
+ _ (IEEE802, 6) \
+ _ (ARCNET, 7) \
+ _ (APPLETLK, 8) \
+ _ (DLCI, 15) \
+ _ (ATM, 19) \
+ _ (METRICOM, 23) \
+ _ (IEEE1394, 24) \
+ _ (EUI64, 27) \
+ _ (INFINIBAND, 32) \
+ _ (SLIP, 256) \
+ _ (CSLIP, 257) \
+ _ (SLIP6, 258) \
+ _ (CSLIP6, 259) \
+ _ (RSRVD, 260) \
+ _ (ADAPT, 264) \
+ _ (ROSE, 270) \
+ _ (X25, 271) \
+ _ (HWX25, 272) \
+ _ (PPP, 512) \
+ _ (HDLC, 513) \
+ _ (LAPB, 516) \
+ _ (DDCMP, 517) \
+ _ (RAWHDLC, 518) \
+ _ (TUNNEL, 768) \
+ _ (TUNNEL6, 769) \
+ _ (FRAD, 770) \
+ _ (SKIP, 771) \
+ _ (LOOPBACK, 772) \
+ _ (LOCALTLK, 773) \
+ _ (FDDI, 774) \
+ _ (BIF, 775) \
+ _ (SIT, 776) \
+ _ (IPDDP, 777) \
+ _ (IPGRE, 778) \
+ _ (PIMREG, 779) \
+ _ (HIPPI, 780) \
+ _ (ASH, 781) \
+ _ (ECONET, 782) \
+ _ (IRDA, 783) \
+ _ (FCPP, 784) \
+ _ (FCAL, 785) \
+ _ (FCPL, 786) \
+ _ (FCFABRIC, 787) \
+ _ (IEEE802_TR, 800) \
+ _ (IEEE80211, 801) \
+ _ (IEEE80211_PRISM, 802) \
+ _ (IEEE80211_RADIOTAP, 803) \
+ _ (VOID, 0xFFFF) \
+ _ (NONE, 0xFFFE)
+
+u8 * format_unix_arphrd (u8 * s, va_list * args)
+{
+#ifndef __COVERITY__ /* doesn't understand this at all... */
+ u32 x = va_arg (*args, u32);
+ char * t;
+ switch (x)
+ {
+#define _(f,n) case ARPHRD_##f: t = #f; break;
+ foreach_unix_arphrd_type
+#undef _
+ default:
+ t = 0;
+ break;
+ }
+
+ if (t)
+ s = format (s, "%s", t);
+ else
+ s = format (s, "unknown 0x%x", x);
+#endif
+ return s;
+}
+
+#define foreach_unix_interface_flag \
+ _ (up) \
+ _ (broadcast) \
+ _ (debug) \
+ _ (loopback) \
+ _ (pointopoint) \
+ _ (notrailers) \
+ _ (running) \
+ _ (noarp) \
+ _ (promisc) \
+ _ (allmulti) \
+ _ (master) \
+ _ (slave) \
+ _ (multicast) \
+ _ (portsel) \
+ _ (automedia) \
+ _ (dynamic) \
+ _ (lower_up) \
+ _ (dormant) \
+ _ (echo)
+
+static char * unix_interface_flag_names[] = {
+#define _(f) #f,
+ foreach_unix_interface_flag
+#undef _
+};
+
+u8 * format_unix_interface_flags (u8 * s, va_list * args)
+{
+ u32 x = va_arg (*args, u32);
+ u32 i;
+
+ if (x == 0)
+ s = format (s, "none");
+ else foreach_set_bit (i, x, ({
+ if (i < ARRAY_LEN (unix_interface_flag_names))
+ s = format (s, "%s", unix_interface_flag_names[i]);
+ else
+ s = format (s, "unknown %d", i);
+ if (x >> (i + 1))
+ s = format (s, ", ");
+ }));
+ return s;
+}
+
+typedef struct {
+ u16 ar_hrd; /* format of hardware address */
+ u16 ar_pro; /* format of protocol address */
+ u8 ar_hln; /* length of hardware address */
+ u8 ar_pln; /* length of protocol address */
+ u16 ar_op; /* ARP opcode (command) */
+ u8 ar_sha[6]; /* sender hardware address */
+ u8 ar_spa[4]; /* sender IP address */
+ u8 ar_tha[6]; /* target hardware address */
+ u8 ar_tpa[4]; /* target IP address */
+} arp_ether_ip4_t;
+
+u8 * format_arp_packet (u8 * s, va_list * args)
+{
+ arp_ether_ip4_t * a = va_arg (*args, arp_ether_ip4_t *);
+ char * op = "unknown";
+
+ if (a->ar_pro != ETH_P_IP ||
+ a->ar_hrd != ARPHRD_ETHER)
+ return s;
+
+ switch (a->ar_op)
+ {
+#define _(f) case ARPOP_##f: op = #f; break;
+ _ (REQUEST);
+ _ (REPLY);
+ _ (RREQUEST);
+ _ (RREPLY);
+#undef _
+ }
+
+ s = format (s, "%s %U %U -> %U %U",
+ op,
+ format_network_address, AF_INET, a->ar_spa,
+ format_network_address, AF_UNSPEC, a->ar_sha,
+ format_network_address, AF_INET, a->ar_tpa,
+ format_network_address, AF_UNSPEC, a->ar_tha);
+ return s;
+}
+
+u8 * format_ethernet_proto (u8 * s, va_list * args)
+{
+ uword type = va_arg (*args, uword);
+ char * t = 0;
+
+ switch (type)
+ {
+ case 0: t = "BPDU"; break;
+#define _(f) case ETH_P_##f: t = #f; break;
+ _ (LOOP);
+ _ (PUP);
+#ifdef ETH_P_PUPAT
+ _ (PUPAT);
+#endif
+ _ (IP);
+ _ (X25);
+ _ (ARP);
+ _ (BPQ);
+#ifdef ETH_P_PUPAT
+ _ (IEEEPUP);
+ _ (IEEEPUPAT);
+#endif
+ _ (DEC);
+ _ (DNA_DL);
+ _ (DNA_RC);
+ _ (DNA_RT);
+ _ (LAT);
+ _ (DIAG);
+ _ (CUST);
+ _ (SCA);
+ _ (RARP);
+ _ (ATALK);
+ _ (AARP);
+ _ (IPX);
+ _ (IPV6);
+#ifdef ETH_P_PPP_DISC
+ _ (PPP_DISC);
+ _ (PPP_SES);
+#endif
+#ifdef ETH_P_ATMMPOA
+ _ (ATMMPOA);
+ _ (ATMFATE);
+#endif
+ _ (802_3);
+ _ (AX25);
+ _ (ALL);
+ _ (802_2);
+ _ (SNAP);
+ _ (DDCMP);
+ _ (WAN_PPP);
+ _ (PPP_MP);
+ _ (LOCALTALK);
+ _ (PPPTALK);
+ _ (TR_802_2);
+ _ (MOBITEX);
+ _ (CONTROL);
+ _ (IRDA);
+#ifdef ETH_P_ECONET
+ _ (ECONET);
+#endif
+#undef _
+ }
+
+ if (t)
+ vec_add (s, t, strlen (t));
+ else
+ s = format (s, "ether-type 0x%x", type);
+ return s;
+}
+
+u8 * format_ethernet_packet (u8 * s, va_list * args)
+{
+ struct ethhdr * h = va_arg (*args, struct ethhdr *);
+ uword proto = h->h_proto;
+ u8 * payload = (void *) (h + 1);
+ uword indent;
+
+ /* Check for 802.2/802.3 encapsulation. */
+ if (proto < ETH_DATA_LEN)
+ {
+ typedef struct {
+ u8 dsap, ssap, control;
+ u8 orig_code[3];
+ u16 proto;
+ } ethhdr_802_t;
+ ethhdr_802_t * h1 = (void *) (h + 1);
+ proto = h1->proto;
+ payload = (void *) (h1 + 1);
+ }
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%U: %U -> %U",
+ format_ethernet_proto, proto,
+ format_network_address, AF_UNSPEC, h->h_source,
+ format_network_address, AF_UNSPEC, h->h_dest);
+
+ switch (proto)
+ {
+ case ETH_P_ARP:
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_arp_packet, payload);
+ break;
+ }
+
+ return s;
+}
+
+#ifndef __KERNEL__
+u8 * format_hostname (u8 * s, va_list * args)
+{
+ char buffer[1024];
+ char * b = buffer;
+ if (gethostname (b, sizeof (buffer)) < 0)
+ b = "noname";
+ return format (s, "%s", b);
+}
+#endif
+
+#ifndef __KERNEL__
+u8 * format_timeval (u8 * s, va_list * args)
+{
+ char * fmt = va_arg (*args, char *);
+ struct timeval * tv = va_arg (*args, struct timeval *);
+ struct tm * tm;
+ word msec;
+ char * f, c;
+
+ if (! fmt)
+ fmt = "y/m/d H:M:S:F";
+
+ if (! tv)
+ {
+ static struct timeval now;
+ gettimeofday (&now, 0);
+ tv = &now;
+ }
+
+ msec = flt_round_nearest (1e-3 * tv->tv_usec);
+ if (msec >= 1000)
+ { msec = 0; tv->tv_sec++; }
+
+ {
+ time_t t = tv->tv_sec;
+ tm = localtime (&t);
+ }
+
+ for (f = fmt; *f; f++)
+ {
+ uword what;
+ char * what_fmt = "%d";
+
+ switch (c = *f)
+ {
+ default:
+ vec_add1 (s, c);
+ continue;
+
+ case 'y':
+ what = 1900 + tm->tm_year;
+ what_fmt = "%4d";
+ break;
+ case 'm':
+ what = tm->tm_mon + 1;
+ what_fmt = "%2d";
+ break;
+ case 'd':
+ what = tm->tm_mday;
+ what_fmt = "%2d";
+ break;
+ case 'H':
+ what = tm->tm_hour;
+ what_fmt = "%02d";
+ break;
+ case 'M':
+ what = tm->tm_min;
+ what_fmt = "%02d";
+ break;
+ case 'S':
+ what = tm->tm_sec;
+ what_fmt = "%02d";
+ break;
+ case 'F':
+ what = msec;
+ what_fmt = "%03d";
+ break;
+ }
+
+ s = format (s, what_fmt, what);
+ }
+
+ return s;
+}
+
+u8 * format_time_float (u8 * s, va_list * args)
+{
+ u8 * fmt = va_arg (*args, u8 *);
+ f64 t = va_arg (*args, f64);
+ struct timeval tv;
+ if (t <= 0)
+ t = unix_time_now ();
+ tv.tv_sec = t;
+ tv.tv_usec = 1e6*(t - tv.tv_sec);
+ return format (s, "%U", format_timeval, fmt, &tv);
+}
+
+u8 * format_signal (u8 * s, va_list * args)
+{
+ uword signum = va_arg (*args, uword);
+ char * t = 0;
+ switch (signum)
+ {
+#define _(x) case x: t = #x; break;
+ _ (SIGHUP);
+ _ (SIGINT);
+ _ (SIGQUIT);
+ _ (SIGILL);
+ _ (SIGTRAP);
+ _ (SIGABRT);
+ _ (SIGBUS);
+ _ (SIGFPE);
+ _ (SIGKILL);
+ _ (SIGUSR1);
+ _ (SIGSEGV);
+ _ (SIGUSR2);
+ _ (SIGPIPE);
+ _ (SIGALRM);
+ _ (SIGTERM);
+#ifdef SIGSTKFLT
+ _ (SIGSTKFLT);
+#endif
+ _ (SIGCHLD);
+ _ (SIGCONT);
+ _ (SIGSTOP);
+ _ (SIGTSTP);
+ _ (SIGTTIN);
+ _ (SIGTTOU);
+ _ (SIGURG);
+ _ (SIGXCPU);
+ _ (SIGXFSZ);
+ _ (SIGVTALRM);
+ _ (SIGPROF);
+ _ (SIGWINCH);
+ _ (SIGIO);
+ _ (SIGPWR);
+#ifdef SIGSYS
+ _ (SIGSYS);
+#endif
+#undef _
+ default:
+ return format (s, "unknown %d", signum);
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+u8 * format_ucontext_pc (u8 * s, va_list * args)
+{
+ ucontext_t * uc __attribute__((unused));
+ unsigned long * regs = 0;
+ uword reg_no = 0;
+
+ uc = va_arg (*args, ucontext_t *);
+
+#if defined (powerpc)
+ regs = &uc->uc_mcontext.uc_regs->gregs[0];
+#elif defined (powerpc64)
+ regs = &uc->uc_mcontext.uc_regs->gp_regs[0];
+#elif defined (i386) || defined (__x86_64__)
+ regs = (void *) &uc->uc_mcontext.gregs[0];
+#endif
+
+#if defined (powerpc) || defined (powerpc64)
+ reg_no = PT_NIP;
+#elif defined (i386)
+ reg_no = REG_EIP;
+#elif defined (__x86_64__)
+ reg_no = REG_RIP;
+#else
+ reg_no = 0;
+ regs = 0;
+#endif
+
+ if (! regs)
+ return format (s, "unsupported");
+ else
+ return format (s, "%p", regs[reg_no]);
+}
+
+uword
+unformat_unix_gid (unformat_input_t * input, va_list * args)
+{
+ gid_t *gid = va_arg (*args, gid_t *);
+ struct group *grp = 0;
+ int r;
+ u8 *s;
+
+ if (unformat (input, "%d", &r))
+ {
+ grp = getgrgid (r);
+ }
+ else if (unformat (input, "%s", &s))
+ {
+ grp = getgrnam ((char *) s);
+ vec_free (s);
+ }
+ if (grp)
+ {
+ *gid = grp->gr_gid;
+ return 1;
+ }
+ return 0;
+}
+
+#endif /* __KERNEL__ */
diff --git a/src/vppinfra/unix-kelog.c b/src/vppinfra/unix-kelog.c
new file mode 100644
index 00000000..88428ee8
--- /dev/null
+++ b/src/vppinfra/unix-kelog.c
@@ -0,0 +1,415 @@
+/*
+ Copyright (c) 2010 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/error.h>
+#include <vppinfra/unix.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/os.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+typedef enum
+{
+ RUNNING = 0,
+ WAKEUP,
+} sched_event_type_t;
+
+typedef struct
+{
+ u32 cpu;
+ u8 *task;
+ u32 pid;
+ f64 timestamp;
+ sched_event_type_t type;
+} sched_event_t;
+
+void
+kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events)
+{
+ int enable_fd, current_tracer_fd, data_fd;
+ int len;
+ struct timespec ts, ts2;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *current_tracer = "/debug/tracing/current_tracer";
+ char *trace_data = "/debug/tracing/trace";
+ f64 realtime, monotonic;
+ f64 freq, secs_per_clock;
+
+ ASSERT (kernel_tracer);
+
+ /*$$$$ fixme */
+ n_events = 1 << 18;
+
+ /* init first so we won't hurt ourselves if we bail */
+ elog_init (em, n_events);
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close (enable_fd);
+ return;
+ }
+
+ /*
+ * open + clear the data buffer.
+ * see .../linux/kernel/trace/trace.c:tracing_open()
+ */
+ data_fd = open (trace_data, O_RDWR | O_TRUNC);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open+clear %s", trace_data);
+ return;
+ }
+ close (data_fd);
+
+ /* configure tracing */
+ current_tracer_fd = open (current_tracer, O_RDWR);
+
+ if (current_tracer_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", current_tracer);
+ close (enable_fd);
+ return;
+ }
+
+ len = strlen (kernel_tracer);
+
+ if (write (current_tracer_fd, kernel_tracer, len) != len)
+ {
+ clib_unix_warning ("configure trace");
+ close (current_tracer_fd);
+ close (enable_fd);
+ return;
+ }
+
+ close (current_tracer_fd);
+
+ /*
+ * The kernel event log uses CLOCK_MONOTONIC timestamps,
+ * not CLOCK_REALTIME timestamps. These differ by a constant
+ * but the constant is not available in user mode.
+ * This estimate will be off by one syscall round-trip.
+ */
+ clib_time_init (&em->cpu_timer);
+ em->init_time.cpu = em->cpu_timer.init_cpu_time;
+ syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts);
+
+ /* enable kernel tracing */
+ if (write (enable_fd, "1\n", 2) != 2)
+ {
+ clib_unix_warning ("enable tracing");
+ close (enable_fd);
+ return;
+ }
+
+ close (enable_fd);
+}
+
+
+u8 *
+format_sched_event (u8 * s, va_list * va)
+{
+ sched_event_t *e = va_arg (*va, sched_event_t *);
+
+ s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n",
+ e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp);
+
+ return s;
+}
+
+sched_event_t *
+parse_sched_switch_trace (u8 * tdata, u32 * index)
+{
+ u8 *cp = tdata + *index;
+ u8 *limit = tdata + vec_len (tdata);
+ int colons;
+ static sched_event_t event;
+ sched_event_t *e = &event;
+ static u8 *task_name;
+ u32 secs, usecs;
+ int i;
+
+again:
+ /* eat leading w/s */
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ return 0;
+
+ /* header line */
+ if (*cp == '#')
+ {
+ while (cp < limit && (*cp != '\n'))
+ cp++;
+ if (*cp == '\n')
+ {
+ cp++;
+ goto again;
+ }
+ clib_warning ("bugger 0");
+ return 0;
+ }
+
+ while (cp < limit && *cp != ']')
+ cp++;
+
+ if (*cp == 0)
+ return 0;
+
+ if (*cp != ']')
+ {
+ clib_warning ("bugger 0.1");
+ return 0;
+ }
+
+ cp++;
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.2");
+ return 0;
+ }
+
+ secs = atoi (cp);
+
+ while (cp < limit && (*cp != '.'))
+ cp++;
+
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.3");
+ return 0;
+ }
+
+ cp++;
+
+ usecs = atoi (cp);
+
+ e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6;
+
+ /* eat up to third colon */
+ for (i = 0; i < 3; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 1");
+ return 0;
+ }
+ /* aim at '>' (switch-to) / '+' (wakeup) */
+ cp += 5;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 2");
+ return 0;
+ }
+ if (*cp == '>')
+ e->type = RUNNING;
+ else if (*cp == '+')
+ e->type = WAKEUP;
+ else
+ {
+ clib_warning ("bugger 3");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+
+ e->cpu = atoi (cp);
+ cp += 4;
+
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+ while (cp < limit && (*cp == ' ' || *cp == '\t'))
+ cp++;
+
+ e->pid = atoi (cp);
+
+ for (i = 0; i < 2; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 5");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 6");
+ return 0;
+ }
+ while (cp < limit && (*cp != ' ' && *cp != '\n'))
+ {
+ vec_add1 (task_name, *cp);
+ cp++;
+ }
+ vec_add1 (task_name, 0);
+ /* _vec_len() = 0 in caller */
+ e->task = task_name;
+
+ if (cp < limit)
+ cp++;
+
+ *index = cp - tdata;
+ return e;
+}
+
+static u32
+elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid)
+{
+ uword *p, r;
+ mhash_t *h = &em->string_table_hash;
+
+ if (!em->string_table_hash.hash)
+ mhash_init (h, sizeof (uword), sizeof (pid));
+
+ p = mhash_get (h, &pid);
+ if (p)
+ return p[0];
+ r = elog_string (em, "%s(%d)", name, pid);
+ mhash_set (h, &pid, r, /* old_value */ 0);
+ return r;
+}
+
+void
+kelog_collect_sched_switch_trace (elog_main_t * em)
+{
+ int enable_fd, data_fd;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *trace_data = "/debug/tracing/trace";
+ u8 *data = 0;
+ u8 *dp;
+ int bytes, total_bytes;
+ u32 pos;
+ sched_event_t *evt;
+ u64 nsec_to_add;
+ u32 index;
+ f64 clocks_per_sec;
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close (enable_fd);
+ return;
+ }
+ close (enable_fd);
+
+ /* Read the trace data */
+ data_fd = open (trace_data, O_RDWR);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_data);
+ return;
+ }
+
+ /*
+ * Extract trace into a vector. Note that seq_printf() [kernel]
+ * is not guaranteed to produce 4096 bytes at a time.
+ */
+ vec_validate (data, 4095);
+ total_bytes = 0;
+ pos = 0;
+ while (1)
+ {
+ bytes = read (data_fd, data + pos, 4096);
+ if (bytes <= 0)
+ break;
+
+ total_bytes += bytes;
+ _vec_len (data) = total_bytes;
+
+ pos = vec_len (data);
+ vec_validate (data, vec_len (data) + 4095);
+ }
+ vec_add1 (data, 0);
+
+ /* Synthesize events */
+ em->is_enabled = 1;
+
+ index = 0;
+ while ((evt = parse_sched_switch_trace (data, &index)))
+ {
+ u64 fake_cpu_clock;
+
+ fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second;
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings =
+ 2,.enum_strings =
+ {
+ "running", "wakeup",}
+ ,};
+ struct
+ {
+ u32 cpu, string_table_offset, which;
+ } *ed;
+
+ ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e),
+ &em->default_track, fake_cpu_clock);
+ ed->cpu = evt->cpu;
+ ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid);
+ ed->which = evt->type;
+ }
+ _vec_len (evt->task) = 0;
+ }
+ em->is_enabled = 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
new file mode 100644
index 00000000..361015b4
--- /dev/null
+++ b/src/vppinfra/unix-misc.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/error.h>
+#include <vppinfra/os.h>
+#include <vppinfra/unix.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* writev */
+#include <fcntl.h>
+#include <stdio.h> /* for sprintf */
+
+__thread uword __os_thread_index = 0;
+
+clib_error_t *
+unix_file_n_bytes (char *file, uword * result)
+{
+ struct stat s;
+
+ if (stat (file, &s) < 0)
+ return clib_error_return_unix (0, "stat `%s'", file);
+
+ if (S_ISREG (s.st_mode))
+ *result = s.st_size;
+ else
+ *result = 0;
+
+ return /* no error */ 0;
+}
+
+clib_error_t *
+unix_file_read_contents (char *file, u8 * result, uword n_bytes)
+{
+ int fd = -1;
+ uword n_done, n_left;
+ clib_error_t *error = 0;
+ u8 *v = result;
+
+ if ((fd = open (file, 0)) < 0)
+ return clib_error_return_unix (0, "open `%s'", file);
+
+ n_left = n_bytes;
+ n_done = 0;
+ while (n_left > 0)
+ {
+ int n_read;
+ if ((n_read = read (fd, v + n_done, n_left)) < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file);
+ goto done;
+ }
+
+ /* End of file. */
+ if (n_read == 0)
+ break;
+
+ n_left -= n_read;
+ n_done += n_read;
+ }
+
+ if (n_left > 0)
+ {
+ error =
+ clib_error_return (0,
+ " `%s' expected to read %wd bytes; read only %wd",
+ file, n_bytes, n_bytes - n_left);
+ goto done;
+ }
+
+done:
+ close (fd);
+ return error;
+}
+
+clib_error_t *
+unix_file_contents (char *file, u8 ** result)
+{
+ uword n_bytes;
+ clib_error_t *error = 0;
+ u8 *v;
+
+ if ((error = unix_file_n_bytes (file, &n_bytes)))
+ return error;
+
+ v = 0;
+ vec_resize (v, n_bytes);
+
+ error = unix_file_read_contents (file, v, n_bytes);
+
+ if (error)
+ vec_free (v);
+ else
+ *result = v;
+
+ return error;
+}
+
+clib_error_t *
+unix_proc_file_contents (char *file, u8 ** result)
+{
+ u8 *rv = 0;
+ uword pos;
+ int bytes, fd;
+
+ /* Unfortunately, stat(/proc/XXX) returns zero... */
+ fd = open (file, O_RDONLY);
+
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file);
+
+ vec_validate (rv, 4095);
+ pos = 0;
+ while (1)
+ {
+ bytes = read (fd, rv + pos, 4096);
+ if (bytes < 0)
+ {
+ close (fd);
+ vec_free (rv);
+ return clib_error_return_unix (0, "read '%s'", file);
+ }
+
+ if (bytes == 0)
+ {
+ _vec_len (rv) = pos;
+ break;
+ }
+ pos += bytes;
+ vec_validate (rv, pos + 4095);
+ }
+ *result = rv;
+ close (fd);
+ return 0;
+}
+
+void os_panic (void) __attribute__ ((weak));
+
+void
+os_panic (void)
+{
+ abort ();
+}
+
+void os_exit (int) __attribute__ ((weak));
+
+void
+os_exit (int code)
+{
+ exit (code);
+}
+
+void os_puts (u8 * string, uword string_length, uword is_error)
+ __attribute__ ((weak));
+
+void
+os_puts (u8 * string, uword string_length, uword is_error)
+{
+ int cpu = os_get_thread_index ();
+ int nthreads = os_get_nthreads ();
+ char buf[64];
+ int fd = is_error ? 2 : 1;
+ struct iovec iovs[2];
+ int n_iovs = 0;
+
+ if (nthreads > 1)
+ {
+ snprintf (buf, sizeof (buf), "%d: ", cpu);
+
+ iovs[n_iovs].iov_base = buf;
+ iovs[n_iovs].iov_len = strlen (buf);
+ n_iovs++;
+ }
+
+ iovs[n_iovs].iov_base = string;
+ iovs[n_iovs].iov_len = string_length;
+ n_iovs++;
+
+ if (writev (fd, iovs, n_iovs) < 0)
+ ;
+}
+
+void os_out_of_memory (void) __attribute__ ((weak));
+void
+os_out_of_memory (void)
+{
+ os_panic ();
+}
+
+uword os_get_nthreads (void) __attribute__ ((weak));
+uword
+os_get_nthreads (void)
+{
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h
new file mode 100644
index 00000000..29114cfe
--- /dev/null
+++ b/src/vppinfra/unix.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_unix_h
+#define included_clib_unix_h
+
+#include <vppinfra/error.h>
+
+/* Number of bytes in a Unix file. */
+clib_error_t *unix_file_n_bytes (char *file, uword * result);
+
+/* Read file contents into given buffer. */
+clib_error_t *unix_file_read_contents (char *file, u8 * result,
+ uword n_bytes);
+
+/* Read and return contents of Unix file. */
+clib_error_t *unix_file_contents (char *file, u8 ** result);
+
+/* As above but for /proc file system on Linux. */
+clib_error_t *unix_proc_file_contents (char *file, u8 ** result);
+
+#endif /* included_clib_unix_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/unix_error.def b/src/vppinfra/unix_error.def
new file mode 100644
index 00000000..76633dbb
--- /dev/null
+++ b/src/vppinfra/unix_error.def
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+_ (EPERM, "Operation not permitted")
+_ (ENOENT, "No such file or directory")
+_ (ESRCH, "No such process")
+_ (EINTR, "Interrupted system call")
+_ (EIO, "I/O error")
+_ (ENXIO, "No such device or address")
+_ (E2BIG, "Arg list too long")
+_ (ENOEXEC, "Exec format error")
+_ (EBADF, "Bad file number")
+_ (ECHILD, "No child processes")
+_ (ENOMEM, "Out of memory")
+_ (EACCES, "Permission denied")
+_ (EFAULT, "Bad address")
+_ (ENOTBLK, "Block device required")
+_ (EBUSY, "Device or resource busy")
+_ (EEXIST, "File exists")
+_ (EXDEV, "Cross-device link")
+_ (ENODEV, "No such device")
+_ (ENOTDIR, "Not a directory")
+_ (EISDIR, "Is a directory")
+_ (EINVAL, "Invalid argument")
+_ (ENFILE, "File table overflow")
+_ (EMFILE, "Too many open files")
+_ (ENOTTY, "Not a typewriter")
+_ (ETXTBSY, "Text file busy")
+_ (EFBIG, "File too large")
+_ (ENOSPC, "No space left on device")
+_ (ESPIPE, "Illegal seek")
+_ (EROFS, "Read-only file system")
+_ (EMLINK, "Too many links")
+_ (EPIPE, "Broken pipe")
+_ (EDOM, "Math argument out of domain of func")
+_ (ERANGE, "Math result not representable")
+_ (EDEADLK, "Resource deadlock would occur")
+_ (ENAMETOOLONG, "File name too long")
+_ (ENOLCK, "No record locks available")
+_ (ENOSYS, "Function not implemented")
+_ (ENOTEMPTY, "Directory not empty")
+_ (ELOOP, "Too many symbolic links encountered")
+_ (EWOULDBLOCK, "Operation would block")
+_ (ENOMSG, "No message of desired type")
+_ (EIDRM, "Identifier removed")
+_ (ECHRNG, "Channel number out of range")
+_ (EL2NSYNC, "Level 2 not synchronized")
+_ (EL3HLT, "Level 3 halted")
+_ (EL3RST, "Level 3 reset")
+_ (ELNRNG, "Link number out of range")
+_ (EUNATCH, "Protocol driver not attached")
+_ (ENOCSI, "No CSI structure available")
+_ (EL2HLT, "Level 2 halted")
+_ (EBADE, "Invalid exchange")
+_ (EBADR, "Invalid request descriptor")
+_ (EXFULL, "Exchange full")
+_ (ENOANO, "No anode")
+_ (EBADRQC, "Invalid request code")
+_ (EBADSLT, "Invalid slot")
+_ (EBFONT, "Bad font file format")
+_ (ENOSTR, "Device not a stream")
+_ (ENODATA, "No data available")
+_ (ETIME, "Timer expired")
+_ (ENOSR, "Out of streams resources")
+_ (ENONET, "Machine is not on the network")
+_ (ENOPKG, "Package not installed")
+_ (EREMOTE, "Object is remote")
+_ (ENOLINK, "Link has been severed")
+_ (EADV, "Advertise error")
+_ (ESRMNT, "Srmount error")
+_ (ECOMM, "Communication error on send")
+_ (EPROTO, "Protocol error")
+_ (EMULTIHOP, "Multihop attempted")
+_ (EDOTDOT, "RFS specific error")
+_ (EBADMSG, "Not a data message")
+_ (EOVERFLOW, "Value too large for defined data type")
+_ (ENOTUNIQ, "Name not unique on network")
+_ (EBADFD, "File descriptor in bad state")
+_ (EREMCHG, "Remote address changed")
+_ (ELIBACC, "Can not access a needed shared library")
+_ (ELIBBAD, "Accessing a corrupted shared library")
+_ (ELIBSCN, "lib section in a.out corrupted")
+_ (ELIBMAX, "Attempting to link in too many shared libraries")
+_ (ELIBEXEC, "Cannot exec a shared library directly")
+_ (EILSEQ, "Illegal byte sequence")
+_ (ERESTART, "Interrupted system call should be restarted")
+_ (ESTRPIPE, "Streams pipe error")
+_ (EUSERS, "Too many users")
+_ (ENOTSOCK, "Socket operation on non-socket")
+_ (EDESTADDRREQ, "Destination address required")
+_ (EMSGSIZE, "Message too long")
+_ (EPROTOTYPE, "Protocol wrong type for socket")
+_ (ENOPROTOOPT, "Protocol not available")
+_ (EPROTONOSUPPORT, "Protocol not supported")
+_ (ESOCKTNOSUPPORT, "Socket type not supported")
+_ (EOPNOTSUPP, "Operation not supported on transport endpoint")
+_ (EPFNOSUPPORT, "Protocol family not supported")
+_ (EAFNOSUPPORT, "Address family not supported by protocol")
+_ (EADDRINUSE, "Address already in use")
+_ (EADDRNOTAVAIL, "Cannot assign requested address")
+_ (ENETDOWN, "Network is down")
+_ (ENETUNREACH, "Network is unreachable")
+_ (ENETRESET, "Network dropped connection because of reset")
+_ (ECONNABORTED, "Software caused connection abort")
+_ (ECONNRESET, "Connection reset by peer")
+_ (ENOBUFS, "No buffer space available")
+_ (EISCONN, "Transport endpoint is already connected")
+_ (ENOTCONN, "Transport endpoint is not connected")
+_ (ESHUTDOWN, "Cannot send after transport endpoint shutdown")
+_ (ETOOMANYREFS, "Too many references: cannot splice")
+_ (ETIMEDOUT, "Connection timed out")
+_ (ECONNREFUSED, "Connection refused")
+_ (EHOSTDOWN, "Host is down")
+_ (EHOSTUNREACH, "No route to host")
+_ (EALREADY, "Operation already in progress")
+_ (EINPROGRESS, "Operation now in progress")
+_ (ESTALE, "Stale NFS file handle")
+_ (EUCLEAN, "Structure needs cleaning")
+_ (ENOTNAM, "Not a XENIX named type file")
+_ (ENAVAIL, "No XENIX semaphores available")
+_ (EISNAM, "Is a named type file")
+_ (EREMOTEIO, "Remote I/O error")
+_ (EDQUOT, "Quota exceeded")
+_ (ENOMEDIUM, "No medium found")
+_ (EMEDIUMTYPE, "Wrong medium type")
diff --git a/src/vppinfra/valgrind.h b/src/vppinfra/valgrind.h
new file mode 100644
index 00000000..e74d7e82
--- /dev/null
+++ b/src/vppinfra/valgrind.h
@@ -0,0 +1,4030 @@
+/* -*- c -*-
+ ----------------------------------------------------------------
+
+ Notice that the following BSD-style license applies to this one
+ file (valgrind.h) only. The rest of Valgrind is licensed under the
+ terms of the GNU General Public License, version 2, unless
+ otherwise indicated. See the COPYING file in the source
+ distribution for details.
+
+ ----------------------------------------------------------------
+
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2000-2009 Julian Seward. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ----------------------------------------------------------------
+
+ Notice that the above BSD-style license applies to this one file
+ (valgrind.h) only. The entire rest of Valgrind is licensed under
+ the terms of the GNU General Public License, version 2. See the
+ COPYING file in the source distribution for details.
+
+ ----------------------------------------------------------------
+*/
+
+
+/* This file is for inclusion into client (your!) code.
+
+ You can use these macros to manipulate and query Valgrind's
+ execution inside your own programs.
+
+ The resulting executables will still run without Valgrind, just a
+ little bit more slowly than they otherwise would, but otherwise
+ unchanged. When not running on valgrind, each client request
+ consumes very few (eg. 7) instructions, so the resulting performance
+ loss is negligible unless you plan to execute client requests
+ millions of times per second. Nevertheless, if that is still a
+ problem, you can compile with the NVALGRIND symbol defined (gcc
+ -DNVALGRIND) so that client requests are not even compiled in. */
+
+#ifndef __VALGRIND_H
+#define __VALGRIND_H
+
+#include <stdarg.h>
+
+/* Nb: this file might be included in a file compiled with -ansi. So
+ we can't use C++ style "//" comments nor the "asm" keyword (instead
+ use "__asm__"). */
+
+/* Derive some tags indicating what the target platform is. Note
+ that in this file we're using the compiler's CPP symbols for
+ identifying architectures, which are different to the ones we use
+ within the rest of Valgrind. Note, __powerpc__ is active for both
+ 32 and 64-bit PPC, whereas __powerpc64__ is only active for the
+ latter (on Linux, that is). */
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+
+#if defined(_AIX) && defined(__64BIT__)
+#define PLAT_ppc64_aix5 1
+#elif defined(_AIX) && !defined(__64BIT__)
+#define PLAT_ppc32_aix5 1
+#elif defined(__APPLE__) && defined(__i386__)
+#define PLAT_x86_darwin 1
+#elif defined(__APPLE__) && defined(__x86_64__)
+#define PLAT_amd64_darwin 1
+#elif defined(__i386__)
+#define PLAT_x86_linux 1
+#elif defined(__x86_64__)
+#define PLAT_amd64_linux 1
+#elif defined(__powerpc__) && !defined(__powerpc64__)
+#define PLAT_ppc32_linux 1
+#elif defined(__powerpc__) && defined(__powerpc64__)
+#define PLAT_ppc64_linux 1
+#else
+/* If we're not compiling for our target platform, don't generate
+ any inline asms. */
+#if !defined(NVALGRIND)
+#define NVALGRIND 1
+#endif
+#endif
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */
+/* in here of use to end-users -- skip to the next section. */
+/* ------------------------------------------------------------------ */
+
+#if defined(NVALGRIND)
+
+/* Define NVALGRIND to completely remove the Valgrind magic sequence
+ from the compiled code (analogous to NDEBUG's effects on
+ assert()) */
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { \
+ (_zzq_rlval) = (_zzq_default); \
+ }
+
+#else /* ! NVALGRIND */
+
+/* The following defines the magic code sequences which the JITter
+ spots and handles magically. Don't look too closely at them as
+ they will rot your brain.
+
+ The assembly code sequences for all architectures is in this one
+ file. This is because this file must be stand-alone, and we don't
+ want to have multiple files.
+
+ For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
+ value gets put in the return slot, so that everything works when
+ this is executed not under Valgrind. Args are passed in a memory
+ block, and so there's no intrinsic limit to the number that could
+ be passed, but it's currently five.
+
+ The macro args are:
+ _zzq_rlval result lvalue
+ _zzq_default default value (result returned when running on real CPU)
+ _zzq_request request code
+ _zzq_arg1..5 request params
+
+ The other two macros are used to support function wrapping, and are
+ a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the
+ guest's NRADDR pseudo-register and whatever other information is
+ needed to safely run the call original from the wrapper: on
+ ppc64-linux, the R2 value at the divert point is also needed. This
+ information is abstracted into a user-visible type, OrigFn.
+
+ VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
+ guest, but guarantees that the branch instruction will not be
+ redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
+ branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a
+ complete inline asm, since it needs to be combined with more magic
+ inline asm stuff to be useful.
+*/
+
+/* ------------------------- x86-{linux,darwin} ---------------- */
+
+#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin)
+
+typedef struct
+{
+ unsigned int nraddr; /* where's the code? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "roll $3, %%edi ; roll $13, %%edi\n\t" \
+ "roll $29, %%edi ; roll $19, %%edi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { volatile unsigned int _zzq_args[6]; \
+ volatile unsigned int _zzq_result; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %EDX = client_request ( %EAX ) */ \
+ "xchgl %%ebx,%%ebx" \
+ : "=d" (_zzq_result) \
+ : "a" (&_zzq_args[0]), "0" (_zzq_default) \
+ : "cc", "memory" \
+ ); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ volatile unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %EAX = guest_NRADDR */ \
+ "xchgl %%ecx,%%ecx" \
+ : "=a" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_CALL_NOREDIR_EAX \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* call-noredir *%EAX */ \
+ "xchgl %%edx,%%edx\n\t"
+#endif /* PLAT_x86_linux || PLAT_x86_darwin */
+
+/* ------------------------ amd64-{linux,darwin} --------------- */
+
+#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin)
+
+typedef struct
+{
+ unsigned long long int nraddr; /* where's the code? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \
+ "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { volatile unsigned long long int _zzq_args[6]; \
+ volatile unsigned long long int _zzq_result; \
+ _zzq_args[0] = (unsigned long long int)(_zzq_request); \
+ _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %RDX = client_request ( %RAX ) */ \
+ "xchgq %%rbx,%%rbx" \
+ : "=d" (_zzq_result) \
+ : "a" (&_zzq_args[0]), "0" (_zzq_default) \
+ : "cc", "memory" \
+ ); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ volatile unsigned long long int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %RAX = guest_NRADDR */ \
+ "xchgq %%rcx,%%rcx" \
+ : "=a" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_CALL_NOREDIR_RAX \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* call-noredir *%RAX */ \
+ "xchgq %%rdx,%%rdx\n\t"
+#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+typedef struct
+{
+ unsigned int nraddr; /* where's the code? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
+ "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned int _zzq_args[6]; \
+ unsigned int _zzq_result; \
+ unsigned int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 3,%1\n\t" /*default*/ \
+ "mr 4,%2\n\t" /*ptr*/ \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" /*result*/ \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_default), "b" (_zzq_ptr) \
+ : "cc", "memory", "r3", "r4"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "cc", "memory", "r3" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+typedef struct
+{
+ unsigned long long int nraddr; /* where's the code? */
+ unsigned long long int r2; /* what tocptr do we need? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
+ "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned long long int _zzq_args[6]; \
+ register unsigned long long int _zzq_result __asm__("r3"); \
+ register unsigned long long int* _zzq_ptr __asm__("r4"); \
+ _zzq_args[0] = (unsigned long long int)(_zzq_request); \
+ _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1" \
+ : "=r" (_zzq_result) \
+ : "0" (_zzq_default), "r" (_zzq_ptr) \
+ : "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned long long int __addr __asm__("r3"); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2" \
+ : "=r" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4" \
+ : "=r" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+typedef struct
+{
+ unsigned int nraddr; /* where's the code? */
+ unsigned int r2; /* what tocptr do we need? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
+ "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned int _zzq_args[7]; \
+ register unsigned int _zzq_result; \
+ register unsigned int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ _zzq_args[6] = (unsigned int)(_zzq_default); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 4,%1\n\t" \
+ "lwz 3, 24(4)\n\t" \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_ptr) \
+ : "r3", "r4", "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+typedef struct
+{
+ unsigned long long int nraddr; /* where's the code? */
+ unsigned long long int r2; /* what tocptr do we need? */
+}
+OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
+ "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned long long int _zzq_args[7]; \
+ register unsigned long long int _zzq_result; \
+ register unsigned long long int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int long long)(_zzq_request); \
+ _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \
+ _zzq_args[6] = (unsigned int long long)(_zzq_default); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 4,%1\n\t" \
+ "ld 3, 48(4)\n\t" \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_ptr) \
+ : "r3", "r4", "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned long long int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_aix5 */
+
+/* Insert assembly code for other platforms here... */
+
+#endif /* NVALGRIND */
+
+
+/* ------------------------------------------------------------------ */
+/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */
+/* ugly. It's the least-worst tradeoff I can think of. */
+/* ------------------------------------------------------------------ */
+
+/* This section defines magic (a.k.a appalling-hack) macros for doing
+ guaranteed-no-redirection macros, so as to get from function
+ wrappers to the functions they are wrapping. The whole point is to
+ construct standard call sequences, but to do the call itself with a
+ special no-redirect call pseudo-instruction that the JIT
+ understands and handles specially. This section is long and
+ repetitious, and I can't see a way to make it shorter.
+
+ The naming scheme is as follows:
+
+ CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
+
+ 'W' stands for "word" and 'v' for "void". Hence there are
+ different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
+ and for each, the possibility of returning a word-typed result, or
+ no result.
+*/
+
+/* Use these to write the name of your wrapper. NOTE: duplicates
+ VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
+
+/* Use an extra level of macroisation so as to ensure the soname/fnname
+ args are fully macro-expanded before pasting them together. */
+#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd
+
+#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \
+ VG_CONCAT4(_vgwZU_,soname,_,fnname)
+
+#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \
+ VG_CONCAT4(_vgwZZ_,soname,_,fnname)
+
+/* Use this macro from within a wrapper function to collect the
+ context (address and possibly other info) of the original function.
+ Once you have that you can then use it in one of the CALL_FN_
+ macros. The type of the argument _lval is OrigFn. */
+#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval)
+
+/* Derivatives of the main macros below, for calling functions
+ returning void. */
+
+#define CALL_FN_v_v(fnptr) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_v(_junk,fnptr); } while (0)
+
+#define CALL_FN_v_W(fnptr, arg1) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
+
+#define CALL_FN_v_WW(fnptr, arg1,arg2) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
+
+#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
+
+#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0)
+
+#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0)
+
+#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0)
+
+#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0)
+
+/* ------------------------- x86-{linux,darwin} ---------------- */
+
+#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin)
+
+/* These regs are trashed by the hidden call. No need to mention eax
+ as gcc can already see that, plus causes gcc to bomb. */
+#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
+
+/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
+ long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ __asm__ volatile( \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $4, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ __asm__ volatile( \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $8, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ __asm__ volatile( \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $12, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ __asm__ volatile( \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $16, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ __asm__ volatile( \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $20, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ __asm__ volatile( \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $24, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ __asm__ volatile( \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $28, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ __asm__ volatile( \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $32, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ __asm__ volatile( \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $36, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ __asm__ volatile( \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $40, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
+ arg6,arg7,arg8,arg9,arg10, \
+ arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ __asm__ volatile( \
+ "pushl 44(%%eax)\n\t" \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $44, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
+ arg6,arg7,arg8,arg9,arg10, \
+ arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ _argvec[12] = (unsigned long)(arg12); \
+ __asm__ volatile( \
+ "pushl 48(%%eax)\n\t" \
+ "pushl 44(%%eax)\n\t" \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $48, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_x86_linux || PLAT_x86_darwin */
+
+/* ------------------------ amd64-{linux,darwin} --------------- */
+
+#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin)
+
+/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \
+ "rdi", "r8", "r9", "r10", "r11"
+
+/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
+ long) == 8. */
+
+/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_
+ macros. In order not to trash the stack redzone, we need to drop
+ %rsp by 128 before the hidden call, and restore afterwards. The
+ nastyness is that it is only by luck that the stack still appears
+ to be unwindable during the hidden call - since then the behaviour
+ of any routine using this macro does not match what the CFI data
+ says. Sigh.
+
+ Why is this important? Imagine that a wrapper has a stack
+ allocated local, and passes to the hidden call, a pointer to it.
+ Because gcc does not know about the hidden call, it may allocate
+ that local in the redzone. Unfortunately the hidden call may then
+ trash it before it comes to use it. So we must step clear of the
+ redzone, for the duration of the hidden call, to make it safe.
+
+ Probably the same problem afflicts the other redzone-style ABIs too
+ (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
+ self describing (none of this CFI nonsense) so at least messing
+ with the stack pointer doesn't give a danger of non-unwindable
+ stack. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ "addq $128,%%rsp\n\t" \
+ VALGRIND_CALL_NOREDIR_RAX \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $8, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $16, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $24, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $32, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 88(%%rax)\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $40, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ _argvec[12] = (unsigned long)(arg12); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 96(%%rax)\n\t" \
+ "pushq 88(%%rax)\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $48, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+/* This is useful for finding out about the on-stack stuff:
+
+ extern int f9 ( int,int,int,int,int,int,int,int,int );
+ extern int f10 ( int,int,int,int,int,int,int,int,int,int );
+ extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
+ extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
+
+ int g9 ( void ) {
+ return f9(11,22,33,44,55,66,77,88,99);
+ }
+ int g10 ( void ) {
+ return f10(11,22,33,44,55,66,77,88,99,110);
+ }
+ int g11 ( void ) {
+ return f11(11,22,33,44,55,66,77,88,99,110,121);
+ }
+ int g12 ( void ) {
+ return f12(11,22,33,44,55,66,77,88,99,110,121,132);
+ }
+*/
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc32-linux,
+ sizeof(unsigned long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-16\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,16\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-16\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,16\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ _argvec[11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-32\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,16(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,32\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ _argvec[11] = (unsigned long)arg11; \
+ _argvec[12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-32\n\t" \
+ /* arg12 */ \
+ "lwz 3,48(11)\n\t" \
+ "stw 3,20(1)\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,16(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,32\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+ long) == 8. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,128" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,128" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,144" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg12 */ \
+ "ld 3,96(11)\n\t" \
+ "std 3,136(1)\n\t" \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,144" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+ still works. Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
+ "addi 1,1,-" #_n_fr "\n\t" \
+ "lwz 3," #_n_fr "(1)\n\t" \
+ "stw 3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr) \
+ "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
+ long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(64) \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(64) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(64) \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(64) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(72) \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,64(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(72) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(72) \
+ /* arg12 */ \
+ "lwz 3,48(11)\n\t" \
+ "stw 3,68(1)\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,64(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(72) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+ still works. Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
+ "addi 1,1,-" #_n_fr "\n\t" \
+ "ld 3," #_n_fr "(1)\n\t" \
+ "std 3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr) \
+ "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
+ long) == 8. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(128) \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(128) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(128) \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(128) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(144) \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(144) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(144) \
+ /* arg12 */ \
+ "ld 3,96(11)\n\t" \
+ "std 3,136(1)\n\t" \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(144) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc64_aix5 */
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */
+/* */
+/* ------------------------------------------------------------------ */
+
+/* Some request codes. There are many more of these, but most are not
+ exposed to end-user view. These are the public ones, all of the
+ form 0x1000 + small_number.
+
+ Core ones are in the range 0x00000000--0x0000ffff. The non-public
+ ones start at 0x2000.
+*/
+
+/* These macros are used by tools -- they must be public, but don't
+ embed them into other programs. */
+#define VG_USERREQ_TOOL_BASE(a,b) \
+ ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
+#define VG_IS_TOOL_USERREQ(a, b, v) \
+ (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
+
+/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !!
+ This enum comprises an ABI exported by Valgrind to programs
+ which use client requests. DO NOT CHANGE THE ORDER OF THESE
+ ENTRIES, NOR DELETE ANY -- add new ones at the end. */
+typedef enum
+{ VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001,
+ VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
+
+ /* These allow any function to be called from the simulated
+ CPU but run on the real CPU. Nb: the first arg passed to
+ the function is always the ThreadId of the running
+ thread! So CLIENT_CALL0 actually requires a 1 arg
+ function, etc. */
+ VG_USERREQ__CLIENT_CALL0 = 0x1101,
+ VG_USERREQ__CLIENT_CALL1 = 0x1102,
+ VG_USERREQ__CLIENT_CALL2 = 0x1103,
+ VG_USERREQ__CLIENT_CALL3 = 0x1104,
+
+ /* Can be useful in regression testing suites -- eg. can
+ send Valgrind's output to /dev/null and still count
+ errors. */
+ VG_USERREQ__COUNT_ERRORS = 0x1201,
+
+ /* These are useful and can be interpreted by any tool that
+ tracks malloc() et al, by using vg_replace_malloc.c. */
+ VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
+ VG_USERREQ__FREELIKE_BLOCK = 0x1302,
+ /* Memory pool support. */
+ VG_USERREQ__CREATE_MEMPOOL = 0x1303,
+ VG_USERREQ__DESTROY_MEMPOOL = 0x1304,
+ VG_USERREQ__MEMPOOL_ALLOC = 0x1305,
+ VG_USERREQ__MEMPOOL_FREE = 0x1306,
+ VG_USERREQ__MEMPOOL_TRIM = 0x1307,
+ VG_USERREQ__MOVE_MEMPOOL = 0x1308,
+ VG_USERREQ__MEMPOOL_CHANGE = 0x1309,
+ VG_USERREQ__MEMPOOL_EXISTS = 0x130a,
+
+ /* Allow printfs to valgrind log. */
+ VG_USERREQ__PRINTF = 0x1401,
+ VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
+
+ /* Stack support. */
+ VG_USERREQ__STACK_REGISTER = 0x1501,
+ VG_USERREQ__STACK_DEREGISTER = 0x1502,
+ VG_USERREQ__STACK_CHANGE = 0x1503,
+
+ /* Wine support */
+ VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601
+} Vg_ClientRequest;
+
+#if !defined(__GNUC__)
+#define __extension__ /* */
+#endif
+
+/* Returns the number of Valgrinds this code is running under. That
+ is, 0 if running natively, 1 if running under Valgrind, 2 if
+ running under Valgrind which is running under another Valgrind,
+ etc. */
+#define RUNNING_ON_VALGRIND __extension__ \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \
+ VG_USERREQ__RUNNING_ON_VALGRIND, \
+ 0, 0, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+ _qzz_len - 1]. Useful if you are debugging a JITter or some such,
+ since it provides a way to make sure valgrind will retranslate the
+ invalidated area. Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DISCARD_TRANSLATIONS, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ }
+
+
+/* These requests are for getting Valgrind itself to print something.
+ Possibly with a backtrace. This is a really ugly hack. The return value
+ is the number of characters printed, excluding the "**<pid>** " part at the
+ start and the backtrace (if present). */
+
+#if defined(NVALGRIND)
+
+#define VALGRIND_PRINTF(...)
+#define VALGRIND_PRINTF_BACKTRACE(...)
+
+#else /* NVALGRIND */
+
+/* Modern GCC will optimize the static routine out if unused,
+ and unused attribute will shut down warnings about it. */
+static int VALGRIND_PRINTF (const char *format, ...)
+ __attribute__ ((format (__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF (const char *format, ...)
+{
+ unsigned long _qzz_res;
+ va_list vargs;
+ va_start (vargs, format);
+ VALGRIND_DO_CLIENT_REQUEST (_qzz_res, 0, VG_USERREQ__PRINTF,
+ (unsigned long) format, (unsigned long) vargs,
+ 0, 0, 0);
+ va_end (vargs);
+ return (int) _qzz_res;
+}
+
+static int VALGRIND_PRINTF_BACKTRACE (const char *format, ...)
+ __attribute__ ((format (__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF_BACKTRACE (const char *format, ...)
+{
+ unsigned long _qzz_res;
+ va_list vargs;
+ va_start (vargs, format);
+ VALGRIND_DO_CLIENT_REQUEST (_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
+ (unsigned long) format, (unsigned long) vargs,
+ 0, 0, 0);
+ va_end (vargs);
+ return (int) _qzz_res;
+}
+
+#endif /* NVALGRIND */
+
+
+/* These requests allow control to move from the simulated CPU to the
+ real CPU, calling an arbitary function.
+
+ Note that the current ThreadId is inserted as the first argument.
+ So this call:
+
+ VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
+
+ requires f to have this signature:
+
+ Word f(Word tid, Word arg1, Word arg2)
+
+ where "Word" is a word-sized type.
+
+ Note that these client requests are not entirely reliable. For example,
+ if you call a function with them that subsequently calls printf(),
+ there's a high chance Valgrind will crash. Generally, your prospects of
+ these working are made higher if the called function does not refer to
+ any global variables, and does not refer to any libc or other functions
+ (printf et al). Any kind of entanglement with libc or dynamic linking is
+ likely to have a bad outcome, for tricky reasons which we've grappled
+ with a lot in the past.
+*/
+#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL0, \
+ _qyy_fn, \
+ 0, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL1, \
+ _qyy_fn, \
+ _qyy_arg1, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL2, \
+ _qyy_fn, \
+ _qyy_arg1, _qyy_arg2, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL3, \
+ _qyy_fn, \
+ _qyy_arg1, _qyy_arg2, \
+ _qyy_arg3, 0); \
+ _qyy_res; \
+ })
+
+
+/* Counts the number of errors that have been recorded by a tool. Nb:
+ the tool must record the errors with VG_(maybe_record_error)() or
+ VG_(unique_error)() for them to be counted. */
+#define VALGRIND_COUNT_ERRORS \
+ __extension__ \
+ ({unsigned int _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__COUNT_ERRORS, \
+ 0, 0, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing
+ when heap blocks are allocated in order to give accurate results. This
+ happens automatically for the standard allocator functions such as
+ malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete,
+ delete[], etc.
+
+ But if your program uses a custom allocator, this doesn't automatically
+ happen, and Valgrind will not do as well. For example, if you allocate
+ superblocks with mmap() and then allocates chunks of the superblocks, all
+ Valgrind's observations will be at the mmap() level and it won't know that
+ the chunks should be considered separate entities. In Memcheck's case,
+ that means you probably won't get heap block overrun detection (because
+ there won't be redzones marked as unaddressable) and you definitely won't
+ get any leak detection.
+
+ The following client requests allow a custom allocator to be annotated so
+ that it can be handled accurately by Valgrind.
+
+ VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated
+ by a malloc()-like function. For Memcheck (an illustrative case), this
+ does two things:
+
+ - It records that the block has been allocated. This means any addresses
+ within the block mentioned in error messages will be
+ identified as belonging to the block. It also means that if the block
+ isn't freed it will be detected by the leak checker.
+
+ - It marks the block as being addressable and undefined (if 'is_zeroed' is
+ not set), or addressable and defined (if 'is_zeroed' is set). This
+ controls how accesses to the block by the program are handled.
+
+ 'addr' is the start of the usable block (ie. after any
+ redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator
+ can apply redzones -- these are blocks of padding at the start and end of
+ each block. Adding redzones is recommended as it makes it much more likely
+ Valgrind will spot block overruns. `is_zeroed' indicates if the memory is
+ zeroed (or filled with another predictable value), as is the case for
+ calloc().
+
+ VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a
+ heap block -- that will be used by the client program -- is allocated.
+ It's best to put it at the outermost level of the allocator if possible;
+ for example, if you have a function my_alloc() which calls
+ internal_alloc(), and the client request is put inside internal_alloc(),
+ stack traces relating to the heap block will contain entries for both
+ my_alloc() and internal_alloc(), which is probably not what you want.
+
+ For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out
+ custom blocks from within a heap block, B, that has been allocated with
+ malloc/calloc/new/etc, then block B will be *ignored* during leak-checking
+ -- the custom blocks will take precedence.
+
+ VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For
+ Memcheck, it does two things:
+
+ - It records that the block has been deallocated. This assumes that the
+ block was annotated as having been allocated via
+ VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued.
+
+ - It marks the block as being unaddressable.
+
+ VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a
+ heap block is deallocated.
+
+ In many cases, these two client requests will not be enough to get your
+ allocator working well with Memcheck. More specifically, if your allocator
+ writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call
+ will be necessary to mark the memory as addressable just before the zeroing
+ occurs, otherwise you'll get a lot of invalid write errors. For example,
+ you'll need to do this if your allocator recycles freed blocks, but it
+ zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK).
+ Alternatively, if your allocator reuses freed blocks for allocator-internal
+ data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary.
+
+ Really, what's happening is a blurring of the lines between the client
+ program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the
+ memory should be considered unaddressable to the client program, but the
+ allocator knows more than the rest of the client program and so may be able
+ to safely access it. Extra client requests are necessary for Valgrind to
+ understand the distinction between the allocator and the rest of the
+ program.
+
+ Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request; it
+ has to be emulated with MALLOCLIKE/FREELIKE and memory copying.
+
+ Ignored if addr == 0.
+*/
+#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MALLOCLIKE_BLOCK, \
+ addr, sizeB, rzB, is_zeroed, 0); \
+ (void) _qzz_res; /* compiler warning */ \
+ }
+
+/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details.
+ Ignored if addr == 0.
+*/
+#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__FREELIKE_BLOCK, \
+ addr, rzB, 0, 0, 0); \
+ (void) _qzz_res; /* compiler warning */ \
+ }
+
+/* Create a memory pool. */
+#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__CREATE_MEMPOOL, \
+ pool, rzB, is_zeroed, 0, 0); \
+ }
+
+/* Destroy a memory pool. */
+#define VALGRIND_DESTROY_MEMPOOL(pool) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DESTROY_MEMPOOL, \
+ pool, 0, 0, 0, 0); \
+ }
+
+/* Associate a piece of memory with a memory pool. */
+#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_ALLOC, \
+ pool, addr, size, 0, 0); \
+ }
+
+/* Disassociate a piece of memory from a memory pool. */
+#define VALGRIND_MEMPOOL_FREE(pool, addr) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_FREE, \
+ pool, addr, 0, 0, 0); \
+ }
+
+/* Disassociate any pieces outside a particular range. */
+#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_TRIM, \
+ pool, addr, size, 0, 0); \
+ }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MOVE_MEMPOOL, \
+ poolA, poolB, 0, 0, 0); \
+ }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_CHANGE, \
+ pool, addrA, addrB, size, 0); \
+ }
+
+/* Return 1 if a mempool exists, else 0. */
+#define VALGRIND_MEMPOOL_EXISTS(pool) \
+ __extension__ \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_EXISTS, \
+ pool, 0, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+/* Mark a piece of memory as being a stack. Returns a stack id. */
+#define VALGRIND_STACK_REGISTER(start, end) \
+ __extension__ \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_REGISTER, \
+ start, end, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+/* Unmark the piece of memory associated with a stack id as being a
+ stack. */
+#define VALGRIND_STACK_DEREGISTER(id) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_DEREGISTER, \
+ id, 0, 0, 0, 0); \
+ }
+
+/* Change the start and end address of the stack id. */
+#define VALGRIND_STACK_CHANGE(id, start, end) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_CHANGE, \
+ id, start, end, 0, 0); \
+ }
+
+/* Load PDB debug info for Wine PE image_map. */
+#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__LOAD_PDB_DEBUGINFO, \
+ fd, ptr, total_size, delta, 0); \
+ }
+
+
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+#endif /* __VALGRIND_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c
new file mode 100644
index 00000000..2d7ae1d4
--- /dev/null
+++ b/src/vppinfra/vec.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/vec.h>
+#include <vppinfra/mem.h>
+
+/* Vector resize operator. Called as needed by various macros such as
+ vec_add1() when we need to allocate memory. */
+void *
+vec_resize_allocate_memory (void *v,
+ word length_increment,
+ uword data_bytes,
+ uword header_bytes, uword data_align)
+{
+ vec_header_t *vh = _vec_find (v);
+ uword old_alloc_bytes, new_alloc_bytes;
+ void *old, *new;
+
+ header_bytes = vec_header_bytes (header_bytes);
+
+ data_bytes += header_bytes;
+
+ if (!v)
+ {
+ new = clib_mem_alloc_aligned_at_offset (data_bytes, data_align, header_bytes, 1 /* yes, call os_out_of_memory */
+ );
+ data_bytes = clib_mem_size (new);
+ memset (new, 0, data_bytes);
+ v = new + header_bytes;
+ _vec_len (v) = length_increment;
+ return v;
+ }
+
+ vh->len += length_increment;
+ old = v - header_bytes;
+
+ /* Vector header must start heap object. */
+ ASSERT (clib_mem_is_heap_object (old));
+
+ old_alloc_bytes = clib_mem_size (old);
+
+ /* Need to resize? */
+ if (data_bytes <= old_alloc_bytes)
+ return v;
+
+ new_alloc_bytes = (old_alloc_bytes * 3) / 2;
+ if (new_alloc_bytes < data_bytes)
+ new_alloc_bytes = data_bytes;
+
+ new =
+ clib_mem_alloc_aligned_at_offset (new_alloc_bytes, data_align,
+ header_bytes,
+ 1 /* yes, call os_out_of_memory */ );
+
+ /* FIXME fail gracefully. */
+ if (!new)
+ clib_panic
+ ("vec_resize fails, length increment %d, data bytes %d, alignment %d",
+ length_increment, data_bytes, data_align);
+
+ clib_memcpy (new, old, old_alloc_bytes);
+ clib_mem_free (old);
+ v = new;
+
+ /* Allocator may give a bit of extra room. */
+ new_alloc_bytes = clib_mem_size (v);
+
+ /* Zero new memory. */
+ memset (v + old_alloc_bytes, 0, new_alloc_bytes - old_alloc_bytes);
+
+ return v + header_bytes;
+}
+
+uword
+clib_mem_is_vec_h (void *v, uword header_bytes)
+{
+ return clib_mem_is_heap_object (vec_header (v, header_bytes));
+}
+
+/** \cond */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+void
+main (int argc, char *argv[])
+{
+ word n = atoi (argv[1]);
+ word i, *x = 0;
+
+ typedef struct
+ {
+ word x, y, z;
+ } FOO;
+
+ FOO *foos = vec_init (FOO, 10), *f;
+
+ vec_validate (foos, 100);
+ foos[100].x = 99;
+
+ _vec_len (foos) = 0;
+ for (i = 0; i < n; i++)
+ {
+ vec_add1 (x, i);
+ vec_add2 (foos, f, 1);
+ f->x = 2 * i;
+ f->y = 3 * i;
+ f->z = 4 * i;
+ }
+
+ {
+ word n = 2;
+ word m = 42;
+ vec_delete (foos, n, m);
+ }
+
+ {
+ word n = 2;
+ word m = 42;
+ vec_insert (foos, n, m);
+ }
+
+ vec_free (x);
+ vec_free (foos);
+ exit (0);
+}
+#endif
+/** \endcond */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h
new file mode 100644
index 00000000..e2cb24c5
--- /dev/null
+++ b/src/vppinfra/vec.h
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_vec_h
+#define included_vec_h
+
+#include <vppinfra/clib.h> /* word, etc */
+#include <vppinfra/mem.h> /* clib_mem_free */
+#include <vppinfra/string.h> /* memcpy, memmove */
+#include <vppinfra/vec_bootstrap.h>
+
+/** \file
+
+ CLIB vectors are ubiquitous dynamically resized arrays with by user
+ defined "headers". Many CLIB data structures (e.g. hash, heap,
+ pool) are vectors with various different headers.
+
+ The memory layout looks like this:
+
+~~~~~~~~
+ user header (aligned to uword boundary)
+ vector length: number of elements
+ user's pointer-> vector element #0
+ vector element #1
+ ...
+~~~~~~~~
+
+ The user pointer contains the address of vector element # 0. Null
+ pointer vectors are valid and mean a zero length vector.
+
+ You can reset the length of an allocated vector to zero via the
+ vec_reset_length(v) macro, or by setting the vector length field to
+ zero (e.g. _vec_len (v) = 0). Vec_reset_length(v) preferred: it
+ understands Null pointers.
+
+ Typically, the header is not present. Headers allow for other
+ data structures to be built atop CLIB vectors.
+
+ Users may specify the alignment for data elements via the
+ vec_*_aligned macros.
+
+ Vectors elements can be any C type e.g. (int, double, struct bar).
+ This is also true for data types built atop vectors (e.g. heap,
+ pool, etc.).
+
+ Many macros have _a variants supporting alignment of vector data
+ and _h variants supporting non zero length vector headers.
+ The _ha variants support both.
+
+ Standard programming error: memorize a pointer to the ith element
+ of a vector then expand it. Vectors expand by 3/2, so such code
+ may appear to work for a period of time. Memorize vector indices
+ which are invariant.
+ */
+
+/** \brief Low-level resize allocation function, usually not called directly
+
+ @param v pointer to a vector
+ @param length_increment length increment in elements
+ @param data_bytes requested size in bytes
+ @param header_bytes header size in bytes (may be zero)
+ @param data_align alignment (may be zero)
+ @return v_prime pointer to resized vector, may or may not equal v
+*/
+void *vec_resize_allocate_memory (void *v,
+ word length_increment,
+ uword data_bytes,
+ uword header_bytes, uword data_align);
+
+/** \brief Low-level vector resize function, usually not called directly
+
+ @param v pointer to a vector
+ @param length_increment length increment in elements
+ @param data_bytes requested size in bytes
+ @param header_bytes header size in bytes (may be zero)
+ @param data_align alignment (may be zero)
+ @return v_prime pointer to resized vector, may or may not equal v
+*/
+
+always_inline void *
+_vec_resize (void *v,
+ word length_increment,
+ uword data_bytes, uword header_bytes, uword data_align)
+{
+ vec_header_t *vh = _vec_find (v);
+ uword new_data_bytes, aligned_header_bytes;
+
+ aligned_header_bytes = vec_header_bytes (header_bytes);
+
+ new_data_bytes = data_bytes + aligned_header_bytes;
+
+ if (PREDICT_TRUE (v != 0))
+ {
+ void *p = v - aligned_header_bytes;
+
+ /* Vector header must start heap object. */
+ ASSERT (clib_mem_is_heap_object (p));
+
+ /* Typically we'll not need to resize. */
+ if (new_data_bytes <= clib_mem_size (p))
+ {
+ vh->len += length_increment;
+ return v;
+ }
+ }
+
+ /* Slow path: call helper function. */
+ return vec_resize_allocate_memory (v, length_increment, data_bytes,
+ header_bytes,
+ clib_max (sizeof (vec_header_t),
+ data_align));
+}
+
+/** \brief Determine if vector will resize with next allocation
+
+ @param v pointer to a vector
+ @param length_increment length increment in elements
+ @param data_bytes requested size in bytes
+ @param header_bytes header size in bytes (may be zero)
+ @param data_align alignment (may be zero)
+ @return 1 if vector will resize 0 otherwise
+*/
+
+always_inline int
+_vec_resize_will_expand (void *v,
+ word length_increment,
+ uword data_bytes, uword header_bytes,
+ uword data_align)
+{
+ uword new_data_bytes, aligned_header_bytes;
+
+ aligned_header_bytes = vec_header_bytes (header_bytes);
+
+ new_data_bytes = data_bytes + aligned_header_bytes;
+
+ if (PREDICT_TRUE (v != 0))
+ {
+ void *p = v - aligned_header_bytes;
+
+ /* Vector header must start heap object. */
+ ASSERT (clib_mem_is_heap_object (p));
+
+ /* Typically we'll not need to resize. */
+ if (new_data_bytes <= clib_mem_size (p))
+ return 0;
+ }
+ return 1;
+}
+
+/** \brief Predicate function, says whether the supplied vector is a clib heap
+ object (general version).
+
+ @param v pointer to a vector
+ @param header_bytes vector header size in bytes (may be zero)
+ @return 0 or 1
+*/
+uword clib_mem_is_vec_h (void *v, uword header_bytes);
+
+
+/** \brief Predicate function, says whether the supplied vector is a clib heap
+ object
+
+ @param v pointer to a vector
+ @return 0 or 1
+*/
+always_inline uword
+clib_mem_is_vec (void *v)
+{
+ return clib_mem_is_vec_h (v, 0);
+}
+
+/* Local variable naming macro (prevents collisions with other macro naming). */
+#define _v(var) _vec_##var
+
+/** \brief Resize a vector (general version).
+ Add N elements to end of given vector V, return pointer to start of vector.
+ Vector will have room for H header bytes and will have user's data aligned
+ at alignment A (rounded to next power of 2).
+
+ @param V pointer to a vector
+ @param N number of elements to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_resize_ha(V,N,H,A) \
+do { \
+ word _v(n) = (N); \
+ word _v(l) = vec_len (V); \
+ V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
+} while (0)
+
+/** \brief Resize a vector (no header, unspecified alignment)
+ Add N elements to end of given vector V, return pointer to start of vector.
+ Vector will have room for H header bytes and will have user's data aligned
+ at alignment A (rounded to next power of 2).
+
+ @param V pointer to a vector
+ @param N number of elements to add
+ @return V (value-result macro parameter)
+*/
+#define vec_resize(V,N) vec_resize_ha(V,N,0,0)
+
+/** \brief Resize a vector (no header, alignment specified).
+ Add N elements to end of given vector V, return pointer to start of vector.
+ Vector will have room for H header bytes and will have user's data aligned
+ at alignment A (rounded to next power of 2).
+
+ @param V pointer to a vector
+ @param N number of elements to add
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_resize_aligned(V,N,A) vec_resize_ha(V,N,0,A)
+
+/** \brief Allocate space for N more elements
+
+ @param V pointer to a vector
+ @param N number of elements to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_alloc_ha(V,N,H,A) \
+do { \
+ uword _v(l) = vec_len (V); \
+ vec_resize_ha (V, N, H, A); \
+ _vec_len (V) = _v(l); \
+} while (0)
+
+/** \brief Allocate space for N more elements
+ (no header, unspecified alignment)
+
+ @param V pointer to a vector
+ @param N number of elements to add
+ @return V (value-result macro parameter)
+*/
+#define vec_alloc(V,N) vec_alloc_ha(V,N,0,0)
+
+/** \brief Allocate space for N more elements (no header, given alignment)
+ @param V pointer to a vector
+ @param N number of elements to add
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_alloc_aligned(V,N,A) vec_alloc_ha(V,N,0,A)
+
+/** \brief Create new vector of given type and length (general version).
+ @param T type of elements in new vector
+ @param N number of elements to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V new vector
+*/
+#define vec_new_ha(T,N,H,A) \
+({ \
+ word _v(n) = (N); \
+ _vec_resize ((T *) 0, _v(n), _v(n) * sizeof (T), (H), (A)); \
+})
+
+/** \brief Create new vector of given type and length
+ (unspecified alignment, no header).
+
+ @param T type of elements in new vector
+ @param N number of elements to add
+ @return V new vector
+*/
+#define vec_new(T,N) vec_new_ha(T,N,0,0)
+/** \brief Create new vector of given type and length
+ (alignment specified, no header).
+
+ @param T type of elements in new vector
+ @param N number of elements to add
+ @param A alignment (may be zero)
+ @return V new vector
+*/
+#define vec_new_aligned(T,N,A) vec_new_ha(T,N,0,A)
+
+/** \brief Free vector's memory (general version)
+
+ @param V pointer to a vector
+ @param H size of header in bytes
+ @return V (value-result parameter, V=0)
+*/
+#define vec_free_h(V,H) \
+do { \
+ if (V) \
+ { \
+ clib_mem_free (vec_header ((V), (H))); \
+ V = 0; \
+ } \
+} while (0)
+
+/** \brief Free vector's memory (no header).
+ @param V pointer to a vector
+ @return V (value-result parameter, V=0)
+*/
+#define vec_free(V) vec_free_h(V,0)
+
+/**\brief Free vector user header (syntactic sugar)
+ @param h vector header
+ @void
+*/
+#define vec_free_header(h) clib_mem_free (h)
+
+/** \brief Return copy of vector (general version).
+
+ @param V pointer to a vector
+ @param H size of header in bytes
+ @param A alignment (may be zero)
+
+ @return Vdup copy of vector
+*/
+
+#define vec_dup_ha(V,H,A) \
+({ \
+ __typeof__ ((V)[0]) * _v(v) = 0; \
+ uword _v(l) = vec_len (V); \
+ if (_v(l) > 0) \
+ { \
+ vec_resize_ha (_v(v), _v(l), (H), (A)); \
+ clib_memcpy (_v(v), (V), _v(l) * sizeof ((V)[0]));\
+ } \
+ _v(v); \
+})
+
+/** \brief Return copy of vector (no header, no alignment)
+
+ @param V pointer to a vector
+ @return Vdup copy of vector
+*/
+#define vec_dup(V) vec_dup_ha(V,0,0)
+
+/** \brief Return copy of vector (no header, alignment specified).
+
+ @param V pointer to a vector
+ @param A alignment (may be zero)
+
+ @return Vdup copy of vector
+*/
+#define vec_dup_aligned(V,A) vec_dup_ha(V,0,A)
+
+/** \brief Copy a vector, memcpy wrapper. Assumes sizeof(SRC[0]) ==
+ sizeof(DST[0])
+
+ @param DST destination
+ @param SRC source
+*/
+#define vec_copy(DST,SRC) clib_memcpy (DST, SRC, vec_len (DST) * \
+ sizeof ((DST)[0]))
+
+/** \brief Clone a vector. Make a new vector with the
+ same size as a given vector but possibly with a different type.
+
+ @param NEW_V pointer to new vector
+ @param OLD_V pointer to old vector
+*/
+#define vec_clone(NEW_V,OLD_V) \
+do { \
+ (NEW_V) = 0; \
+ (NEW_V) = _vec_resize ((NEW_V), vec_len (OLD_V), \
+ vec_len (OLD_V) * sizeof ((NEW_V)[0]), (0), (0)); \
+} while (0)
+
+/** \brief Make sure vector is long enough for given index (general version).
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_validate_ha(V,I,H,A) \
+do { \
+ word _v(i) = (I); \
+ word _v(l) = vec_len (V); \
+ if (_v(i) >= _v(l)) \
+ { \
+ vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
+ /* Must zero new space since user may have previously \
+ used e.g. _vec_len (v) -= 10 */ \
+ memset ((V) + _v(l), 0, (1 + (_v(i) - _v(l))) * sizeof ((V)[0])); \
+ } \
+} while (0)
+
+/** \brief Make sure vector is long enough for given index
+ (no header, unspecified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @return V (value-result macro parameter)
+*/
+#define vec_validate(V,I) vec_validate_ha(V,I,0,0)
+
+/** \brief Make sure vector is long enough for given index
+ (no header, specified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_validate_aligned(V,I,A) vec_validate_ha(V,I,0,A)
+
+/** \brief Make sure vector is long enough for given index
+ and initialize empty space (general version)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param INIT initial value (can be a complex expression!)
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_validate_init_empty_ha(V,I,INIT,H,A) \
+do { \
+ word _v(i) = (I); \
+ word _v(l) = vec_len (V); \
+ if (_v(i) >= _v(l)) \
+ { \
+ vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
+ while (_v(l) <= _v(i)) \
+ { \
+ (V)[_v(l)] = (INIT); \
+ _v(l)++; \
+ } \
+ } \
+} while (0)
+
+/** \brief Make sure vector is long enough for given index
+ and initialize empty space (no header, unspecified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param INIT initial value (can be a complex expression!)
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_validate_init_empty(V,I,INIT) \
+ vec_validate_init_empty_ha(V,I,INIT,0,0)
+
+/** \brief Make sure vector is long enough for given index
+ and initialize empty space (no header, alignment alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param INIT initial value (can be a complex expression!)
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_validate_init_empty_aligned(V,I,INIT,A) \
+ vec_validate_init_empty_ha(V,I,INIT,0,A)
+
+/** \brief Add 1 element to end of vector (general version).
+
+ @param V pointer to a vector
+ @param E element to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_add1_ha(V,E,H,A) \
+do { \
+ word _v(l) = vec_len (V); \
+ V = _vec_resize ((V), 1, (_v(l) + 1) * sizeof ((V)[0]), (H), (A)); \
+ (V)[_v(l)] = (E); \
+} while (0)
+
+/** \brief Add 1 element to end of vector (unspecified alignment).
+
+ @param V pointer to a vector
+ @param E element to add
+ @return V (value-result macro parameter)
+*/
+#define vec_add1(V,E) vec_add1_ha(V,E,0,0)
+
+/** \brief Add 1 element to end of vector (alignment specified).
+
+ @param V pointer to a vector
+ @param E element to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_add1_aligned(V,E,A) vec_add1_ha(V,E,0,A)
+
+/** \brief Add N elements to end of vector V,
+ return pointer to new elements in P. (general version)
+
+ @param V pointer to a vector
+ @param P pointer to new vector element(s)
+ @param N number of elements to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V and P (value-result macro parameters)
+*/
+#define vec_add2_ha(V,P,N,H,A) \
+do { \
+ word _v(n) = (N); \
+ word _v(l) = vec_len (V); \
+ V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
+ P = (V) + _v(l); \
+} while (0)
+
+/** \brief Add N elements to end of vector V,
+ return pointer to new elements in P. (no header, unspecified alignment)
+
+ @param V pointer to a vector
+ @param P pointer to new vector element(s)
+ @param N number of elements to add
+ @return V and P (value-result macro parameters)
+*/
+
+#define vec_add2(V,P,N) vec_add2_ha(V,P,N,0,0)
+
+/** \brief Add N elements to end of vector V,
+ return pointer to new elements in P. (no header, alignment specified)
+
+ @param V pointer to a vector
+ @param P pointer to new vector element(s)
+ @param N number of elements to add
+ @param A alignment (may be zero)
+ @return V and P (value-result macro parameters)
+*/
+
+#define vec_add2_aligned(V,P,N,A) vec_add2_ha(V,P,N,0,A)
+
+/** \brief Add N elements to end of vector V (general version)
+
+ @param V pointer to a vector
+ @param E pointer to element(s) to add
+ @param N number of elements to add
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_add_ha(V,E,N,H,A) \
+do { \
+ word _v(n) = (N); \
+ word _v(l) = vec_len (V); \
+ V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
+ clib_memcpy ((V) + _v(l), (E), _v(n) * sizeof ((V)[0])); \
+} while (0)
+
+/** \brief Add N elements to end of vector V (no header, unspecified alignment)
+
+ @param V pointer to a vector
+ @param E pointer to element(s) to add
+ @param N number of elements to add
+ @return V (value-result macro parameter)
+*/
+#define vec_add(V,E,N) vec_add_ha(V,E,N,0,0)
+
+/** \brief Add N elements to end of vector V (no header, specified alignment)
+
+ @param V pointer to a vector
+ @param E pointer to element(s) to add
+ @param N number of elements to add
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_add_aligned(V,E,N,A) vec_add_ha(V,E,N,0,A)
+
+/** \brief Returns last element of a vector and decrements its length
+
+ @param V pointer to a vector
+ @return E element removed from the end of the vector
+*/
+#define vec_pop(V) \
+({ \
+ uword _v(l) = vec_len (V); \
+ ASSERT (_v(l) > 0); \
+ _v(l) -= 1; \
+ _vec_len (V) = _v (l); \
+ (V)[_v(l)]; \
+})
+
+/** \brief Set E to the last element of a vector, decrement vector length
+ @param V pointer to a vector
+ @param E pointer to the last vector element
+ @return E element removed from the end of the vector
+ (value-result macro parameter
+*/
+
+#define vec_pop2(V,E) \
+({ \
+ uword _v(l) = vec_len (V); \
+ if (_v(l) > 0) (E) = vec_pop (V); \
+ _v(l) > 0; \
+})
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements (general version).
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @param INIT initial value (can be a complex expression!)
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_init_empty_ha(V,N,M,INIT,H,A) \
+do { \
+ word _v(l) = vec_len (V); \
+ word _v(n) = (N); \
+ word _v(m) = (M); \
+ V = _vec_resize ((V), \
+ _v(n), \
+ (_v(l) + _v(n))*sizeof((V)[0]), \
+ (H), (A)); \
+ ASSERT (_v(m) <= _v(l)); \
+ memmove ((V) + _v(m) + _v(n), \
+ (V) + _v(m), \
+ (_v(l) - _v(m)) * sizeof ((V)[0])); \
+ memset ((V) + _v(m), INIT, _v(n) * sizeof ((V)[0])); \
+} while (0)
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements to zero (general version)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_ha(V,N,M,H,A) vec_insert_init_empty_ha(V,N,M,0,H,A)
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements to zero (no header, unspecified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @return V (value-result macro parameter)
+*/
+#define vec_insert(V,N,M) vec_insert_ha(V,N,M,0,0)
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements to zero (no header, alignment specified)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_aligned(V,N,M,A) vec_insert_ha(V,N,M,0,A)
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements (no header, unspecified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @param INIT initial value (can be a complex expression!)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_insert_init_empty(V,N,M,INIT) \
+ vec_insert_init_empty_ha(V,N,M,INIT,0,0)
+/* Resize vector by N elements starting from element M, initialize new elements to INIT (alignment specified, no header). */
+
+/** \brief Insert N vector elements starting at element M,
+ initialize new elements (no header, specified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param N number of elements to insert
+ @param M insertion point
+ @param INIT initial value (can be a complex expression!)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_init_empty_aligned(V,N,M,INIT,A) \
+ vec_insert_init_empty_ha(V,N,M,INIT,0,A)
+
+/** \brief Insert N vector elements starting at element M,
+ insert given elements (general version)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param E element(s) to insert
+ @param N number of elements to insert
+ @param M insertion point
+ @param H header size in bytes (may be zero)
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_insert_elts_ha(V,E,N,M,H,A) \
+do { \
+ word _v(l) = vec_len (V); \
+ word _v(n) = (N); \
+ word _v(m) = (M); \
+ V = _vec_resize ((V), \
+ _v(n), \
+ (_v(l) + _v(n))*sizeof((V)[0]), \
+ (H), (A)); \
+ ASSERT (_v(m) <= _v(l)); \
+ memmove ((V) + _v(m) + _v(n), \
+ (V) + _v(m), \
+ (_v(l) - _v(m)) * sizeof ((V)[0])); \
+ clib_memcpy ((V) + _v(m), (E), \
+ _v(n) * sizeof ((V)[0])); \
+} while (0)
+
+/** \brief Insert N vector elements starting at element M,
+ insert given elements (no header, unspecified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param E element(s) to insert
+ @param N number of elements to insert
+ @param M insertion point
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_elts(V,E,N,M) vec_insert_elts_ha(V,E,N,M,0,0)
+
+/** \brief Insert N vector elements starting at element M,
+ insert given elements (no header, specified alignment)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param E element(s) to insert
+ @param N number of elements to insert
+ @param M insertion point
+ @param A alignment (may be zero)
+ @return V (value-result macro parameter)
+*/
+#define vec_insert_elts_aligned(V,E,N,M,A) vec_insert_elts_ha(V,E,N,M,0,A)
+
+/** \brief Delete N elements starting at element M
+
+ @param V pointer to a vector
+ @param N number of elements to delete
+ @param M first element to delete
+ @return V (value-result macro parameter)
+*/
+#define vec_delete(V,N,M) \
+do { \
+ word _v(l) = vec_len (V); \
+ word _v(n) = (N); \
+ word _v(m) = (M); \
+ /* Copy over deleted elements. */ \
+ if (_v(l) - _v(n) - _v(m) > 0) \
+ memmove ((V) + _v(m), (V) + _v(m) + _v(n), \
+ (_v(l) - _v(n) - _v(m)) * sizeof ((V)[0])); \
+ /* Zero empty space at end (for future re-allocation). */ \
+ if (_v(n) > 0) \
+ memset ((V) + _v(l) - _v(n), 0, _v(n) * sizeof ((V)[0])); \
+ _vec_len (V) -= _v(n); \
+} while (0)
+
+/** \brief Delete the element at index I
+
+ @param V pointer to a vector
+ @param I index to delete
+*/
+#define vec_del1(v,i) \
+do { \
+ uword _vec_del_l = _vec_len (v) - 1; \
+ uword _vec_del_i = (i); \
+ if (_vec_del_i < _vec_del_l) \
+ (v)[_vec_del_i] = (v)[_vec_del_l]; \
+ _vec_len (v) = _vec_del_l; \
+} while (0)
+
+/** \brief Append v2 after v1. Result in v1.
+ @param V1 target vector
+ @param V2 vector to append
+*/
+
+#define vec_append(v1,v2) \
+do { \
+ uword _v(l1) = vec_len (v1); \
+ uword _v(l2) = vec_len (v2); \
+ \
+ v1 = _vec_resize ((v1), _v(l2), \
+ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \
+ clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
+} while (0)
+
+/** \brief Append v2 after v1. Result in v1. Specified alignment.
+ @param V1 target vector
+ @param V2 vector to append
+ @param align required alignment
+*/
+
+#define vec_append_aligned(v1,v2,align) \
+do { \
+ uword _v(l1) = vec_len (v1); \
+ uword _v(l2) = vec_len (v2); \
+ \
+ v1 = _vec_resize ((v1), _v(l2), \
+ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \
+ clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
+} while (0)
+
+/** \brief Prepend v2 before v1. Result in v1.
+ @param V1 target vector
+ @param V2 vector to prepend
+*/
+
+#define vec_prepend(v1,v2) \
+do { \
+ uword _v(l1) = vec_len (v1); \
+ uword _v(l2) = vec_len (v2); \
+ \
+ v1 = _vec_resize ((v1), _v(l2), \
+ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \
+ memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \
+ clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
+} while (0)
+
+/** \brief Prepend v2 before v1. Result in v1. Specified alignment
+ @param V1 target vector
+ @param V2 vector to prepend
+ @param align required alignment
+*/
+
+#define vec_prepend_aligned(v1,v2,align) \
+do { \
+ uword _v(l1) = vec_len (v1); \
+ uword _v(l2) = vec_len (v2); \
+ \
+ v1 = _vec_resize ((v1), _v(l2), \
+ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \
+ memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \
+ clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
+} while (0)
+
+
+/** \brief Zero all vector elements. Null-pointer tolerant.
+ @param var Vector to zero
+*/
+#define vec_zero(var) \
+do { \
+ if (var) \
+ memset ((var), 0, vec_len (var) * sizeof ((var)[0])); \
+} while (0)
+
+/** \brief Set all vector elements to given value. Null-pointer tolerant.
+ @param v vector to set
+ @param val value for each vector element
+*/
+#define vec_set(v,val) \
+do { \
+ word _v(i); \
+ __typeof__ ((v)[0]) _val = (val); \
+ for (_v(i) = 0; _v(i) < vec_len (v); _v(i)++) \
+ (v)[_v(i)] = _val; \
+} while (0)
+
+#ifdef CLIB_UNIX
+#include <stdlib.h> /* for qsort */
+#endif
+
+/** \brief Compare two vectors, not NULL-pointer tolerant
+
+ @param v1 Pointer to a vector
+ @param v2 Pointer to a vector
+ @return 1 if equal, 0 if unequal
+*/
+#define vec_is_equal(v1,v2) \
+ (vec_len (v1) == vec_len (v2) && ! memcmp ((v1), (v2), vec_len (v1) * sizeof ((v1)[0])))
+
+/** \brief Compare two vectors (only applicable to vectors of signed numbers).
+ Used in qsort compare functions.
+
+ @param v1 Pointer to a vector
+ @param v2 Pointer to a vector
+ @return -1, 0, +1
+*/
+#define vec_cmp(v1,v2) \
+({ \
+ word _v(i), _v(cmp), _v(l); \
+ _v(l) = clib_min (vec_len (v1), vec_len (v2)); \
+ _v(cmp) = 0; \
+ for (_v(i) = 0; _v(i) < _v(l); _v(i)++) { \
+ _v(cmp) = (v1)[_v(i)] - (v2)[_v(i)]; \
+ if (_v(cmp)) \
+ break; \
+ } \
+ if (_v(cmp) == 0 && _v(l) > 0) \
+ _v(cmp) = vec_len(v1) - vec_len(v2); \
+ (_v(cmp) < 0 ? -1 : (_v(cmp) > 0 ? +1 : 0)); \
+})
+
+/** \brief Search a vector for the index of the entry that matches.
+
+ @param v1 Pointer to a vector
+ @param v2 Entry to match
+ @return index of match or ~0
+*/
+#define vec_search(v,E) \
+({ \
+ word _v(i) = 0; \
+ while (_v(i) < vec_len(v)) \
+ { \
+ if ((v)[_v(i)] == E) \
+ break; \
+ _v(i)++; \
+ } \
+ if (_v(i) == vec_len(v)) \
+ _v(i) = ~0; \
+ _v(i); \
+})
+
+/** \brief Sort a vector using the supplied element comparison function
+
+ @param vec vector to sort
+ @param f comparison function
+*/
+#define vec_sort_with_function(vec,f) \
+do { \
+ qsort (vec, vec_len (vec), sizeof (vec[0]), (void *) (f)); \
+} while (0)
+
+/** \brief Make a vector containing a NULL terminated c-string.
+
+ @param V (possibly NULL) pointer to a vector.
+ @param S pointer to string buffer.
+ @param L string length (NOT including the terminating NULL; a la strlen())
+*/
+#define vec_validate_init_c_string(V, S, L) \
+ do { \
+ vec_reset_length (V); \
+ vec_validate ((V), (L)); \
+ if ((S) && (L)) \
+ clib_memcpy ((V), (S), (L)); \
+ (V)[(L)] = 0; \
+ } while (0)
+
+
+/** \brief Test whether a vector is a NULL terminated c-string.
+
+ @param V (possibly NULL) pointer to a vector.
+ @return BOOLEAN indicating if the vector c-string is null terminated.
+*/
+#define vec_c_string_is_terminated(V) \
+ (((V) != 0) && (vec_len (V) != 0) && ((V)[vec_len ((V)) - 1] == 0))
+
+/** \brief (If necessary) NULL terminate a vector containing a c-string.
+
+ @param V (possibly NULL) pointer to a vector.
+ @return V (value-result macro parameter)
+*/
+#define vec_terminate_c_string(V) \
+ do { \
+ u32 vl = vec_len ((V)); \
+ if (!vec_c_string_is_terminated(V)) \
+ { \
+ vec_validate ((V), vl); \
+ (V)[vl] = 0; \
+ } \
+ } while (0)
+
+#endif /* included_vec_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
new file mode 100644
index 00000000..3b8c7707
--- /dev/null
+++ b/src/vppinfra/vec_bootstrap.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_vec_bootstrap_h
+#define included_clib_vec_bootstrap_h
+
+/** \file
+ Vector bootsrap header file
+*/
+
+/* Bootstrap include so that #include <vppinfra/mem.h> can include e.g.
+ <vppinfra/mheap.h> which depends on <vppinfra/vec.h>. */
+
+/** \brief vector header structure
+
+ Bookeeping header preceding vector elements in memory.
+ User header information may preceed standard vec header.
+ If you change u32 len -> u64 len, single vectors can
+ exceed 2**32 elements. Clib heaps are vectors. */
+
+typedef struct
+{
+#if CLIB_VEC64 > 0
+ u64 len;
+#else
+ u32 len; /**< Number of elements in vector (NOT its allocated length). */
+#endif
+ u8 vector_data[0]; /**< Vector data . */
+} vec_header_t;
+
+/** \brief Find the vector header
+
+ Given the user's pointer to a vector, find the corresponding
+ vector header
+
+ @param v pointer to a vector
+ @return pointer to the vector's vector_header_t
+*/
+#define _vec_find(v) ((vec_header_t *) (v) - 1)
+
+#define _vec_round_size(s) \
+ (((s) + sizeof (uword) - 1) &~ (sizeof (uword) - 1))
+
+always_inline uword
+vec_header_bytes (uword header_bytes)
+{
+ return round_pow2 (header_bytes + sizeof (vec_header_t),
+ sizeof (vec_header_t));
+}
+
+/** \brief Find a user vector header
+
+ Finds the user header of a vector with unspecified alignment given
+ the user pointer to the vector.
+*/
+
+always_inline void *
+vec_header (void *v, uword header_bytes)
+{
+ return v - vec_header_bytes (header_bytes);
+}
+
+/** \brief Find the end of user vector header
+
+ Finds the end of the user header of a vector with unspecified
+ alignment given the user pointer to the vector.
+*/
+
+always_inline void *
+vec_header_end (void *v, uword header_bytes)
+{
+ return v + vec_header_bytes (header_bytes);
+}
+
+always_inline uword
+vec_aligned_header_bytes (uword header_bytes, uword align)
+{
+ return round_pow2 (header_bytes + sizeof (vec_header_t), align);
+}
+
+always_inline void *
+vec_aligned_header (void *v, uword header_bytes, uword align)
+{
+ return v - vec_aligned_header_bytes (header_bytes, align);
+}
+
+always_inline void *
+vec_aligned_header_end (void *v, uword header_bytes, uword align)
+{
+ return v + vec_aligned_header_bytes (header_bytes, align);
+}
+
+
+/** \brief Number of elements in vector (lvalue-capable)
+
+ _vec_len (v) does not check for null, but can be used as a lvalue
+ (e.g. _vec_len (v) = 99).
+*/
+
+#define _vec_len(v) (_vec_find(v)->len)
+
+/** \brief Number of elements in vector (rvalue-only, NULL tolerant)
+
+ vec_len (v) checks for NULL, but cannot be used as an lvalue.
+ If in doubt, use vec_len...
+*/
+
+#define vec_len(v) ((v) ? _vec_len(v) : 0)
+
+/** \brief Reset vector length to zero
+ NULL-pointer tolerant
+*/
+
+#define vec_reset_length(v) do { if (v) _vec_len (v) = 0; } while (0)
+
+/** \brief Number of data bytes in vector. */
+
+#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
+
+/** \brief Total number of bytes that can fit in vector with current allocation. */
+
+#define vec_capacity(v,b) \
+({ \
+ void * _vec_capacity_v = (void *) (v); \
+ uword _vec_capacity_b = (b); \
+ _vec_capacity_b = sizeof (vec_header_t) + _vec_round_size (_vec_capacity_b); \
+ _vec_capacity_v ? clib_mem_size (_vec_capacity_v - _vec_capacity_b) : 0; \
+})
+
+/** \brief Total number of elements that can fit into vector. */
+#define vec_max_len(v) (vec_capacity(v,0) / sizeof (v[0]))
+
+/** \brief End (last data address) of vector. */
+#define vec_end(v) ((v) + vec_len (v))
+
+/** \brief True if given pointer is within given vector. */
+#define vec_is_member(v,e) ((e) >= (v) && (e) < vec_end (v))
+
+/** \brief Get vector value at index i checking that i is in bounds. */
+#define vec_elt_at_index(v,i) \
+({ \
+ ASSERT ((i) < vec_len (v)); \
+ (v) + (i); \
+})
+
+/** \brief Get vector value at index i */
+#define vec_elt(v,i) (vec_elt_at_index(v,i))[0]
+
+/** \brief Vector iterator */
+#define vec_foreach(var,vec) for (var = (vec); var < vec_end (vec); var++)
+
+/** \brief Vector iterator (reverse) */
+#define vec_foreach_backwards(var,vec) \
+for (var = vec_end (vec) - 1; var >= (vec); var--)
+
+/** \brief Iterate over vector indices. */
+#define vec_foreach_index(var,v) for ((var) = 0; (var) < vec_len (v); (var)++)
+
+#endif /* included_clib_vec_bootstrap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector.c b/src/vppinfra/vector.c
new file mode 100644
index 00000000..68b4fdc2
--- /dev/null
+++ b/src/vppinfra/vector.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/types.h>
+
+#if defined (__SSE2__)
+u8 u32x4_compare_word_mask_table[256] = {
+ [0xf0] = (1 << 1),
+ [0x0f] = (1 << 0),
+ [0xff] = (1 << 0) | (1 << 1),
+};
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h
new file mode 100644
index 00000000..491e7cfe
--- /dev/null
+++ b/src/vppinfra/vector.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_vector_h
+#define included_clib_vector_h
+
+#include <vppinfra/clib.h>
+
+/* Vector types. */
+
+#if defined (__MMX__) || defined (__IWMMXT__) || defined (__aarch64__)
+#define CLIB_HAVE_VEC64
+#endif
+
+#if defined (__SSE2__) && __GNUC__ >= 4
+#define CLIB_HAVE_VEC128
+#endif
+
+#if defined (__ALTIVEC__)
+#define CLIB_HAVE_VEC128
+#endif
+
+/* 128 implies 64 */
+#ifdef CLIB_HAVE_VEC128
+#define CLIB_HAVE_VEC64
+#endif
+
+#define _vector_size(n) __attribute__ ((vector_size (n)))
+
+#if defined (__aarch64__) || defined (__arm__)
+typedef unsigned int u32x4 _vector_size (16);
+typedef u8 u8x16 _vector_size (16);
+typedef u16 u16x8 _vector_size (16);
+typedef u32 u32x4 _vector_size (16);
+typedef u64 u64x2 _vector_size (16);
+#endif
+
+#ifdef CLIB_HAVE_VEC64
+/* Signed 64 bit. */
+typedef char i8x8 _vector_size (8);
+typedef short i16x4 _vector_size (8);
+typedef int i32x2 _vector_size (8);
+
+/* Unsigned 64 bit. */
+typedef unsigned char u8x8 _vector_size (8);
+typedef unsigned short u16x4 _vector_size (8);
+typedef unsigned int u32x2 _vector_size (8);
+
+/* Floating point 64 bit. */
+typedef float f32x2 _vector_size (8);
+#endif /* CLIB_HAVE_VEC64 */
+
+#ifdef CLIB_HAVE_VEC128
+/* Signed 128 bit. */
+typedef i8 i8x16 _vector_size (16);
+typedef i16 i16x8 _vector_size (16);
+typedef i32 i32x4 _vector_size (16);
+typedef long long i64x2 _vector_size (16);
+
+/* Unsigned 128 bit. */
+typedef u8 u8x16 _vector_size (16);
+typedef u16 u16x8 _vector_size (16);
+typedef u32 u32x4 _vector_size (16);
+typedef u64 u64x2 _vector_size (16);
+
+typedef f32 f32x4 _vector_size (16);
+typedef f64 f64x2 _vector_size (16);
+
+/* Signed 256 bit. */
+typedef i8 i8x32 _vector_size (32);
+typedef i16 i16x16 _vector_size (32);
+typedef i32 i32x8 _vector_size (32);
+typedef long long i64x4 _vector_size (32);
+
+/* Unsigned 256 bit. */
+typedef u8 u8x32 _vector_size (32);
+typedef u16 u16x16 _vector_size (32);
+typedef u32 u32x8 _vector_size (32);
+typedef u64 u64x4 _vector_size (32);
+
+typedef f32 f32x8 _vector_size (32);
+typedef f64 f64x4 _vector_size (32);
+#endif /* CLIB_HAVE_VEC128 */
+
+/* Vector word sized types. */
+#ifndef CLIB_VECTOR_WORD_BITS
+#ifdef CLIB_HAVE_VEC128
+#define CLIB_VECTOR_WORD_BITS 128
+#else
+#define CLIB_VECTOR_WORD_BITS 64
+#endif
+#endif /* CLIB_VECTOR_WORD_BITS */
+
+/* Vector word sized types. */
+#if CLIB_VECTOR_WORD_BITS == 128
+typedef i8 i8x _vector_size (16);
+typedef i16 i16x _vector_size (16);
+typedef i32 i32x _vector_size (16);
+typedef i64 i64x _vector_size (16);
+typedef u8 u8x _vector_size (16);
+typedef u16 u16x _vector_size (16);
+typedef u32 u32x _vector_size (16);
+typedef u64 u64x _vector_size (16);
+#endif
+#if CLIB_VECTOR_WORD_BITS == 64
+typedef i8 i8x _vector_size (8);
+typedef i16 i16x _vector_size (8);
+typedef i32 i32x _vector_size (8);
+typedef i64 i64x _vector_size (8);
+typedef u8 u8x _vector_size (8);
+typedef u16 u16x _vector_size (8);
+typedef u32 u32x _vector_size (8);
+typedef u64 u64x _vector_size (8);
+#endif
+
+#undef _vector_size
+
+#define VECTOR_WORD_TYPE(t) t##x
+#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
+
+/* Union types. */
+#if (defined(CLIB_HAVE_VEC128) || defined(CLIB_HAVE_VEC64))
+
+#define _(t) \
+ typedef union { \
+ t##x as_##t##x; \
+ t as_##t[VECTOR_WORD_TYPE_LEN (t)]; \
+ } t##x##_union_t;
+
+_(u8);
+_(u16);
+_(u32);
+_(u64);
+_(i8);
+_(i16);
+_(i32);
+_(i64);
+
+#undef _
+
+#endif
+
+#ifdef CLIB_HAVE_VEC64
+
+#define _(t,n) \
+ typedef union { \
+ t##x##n as_##t##x##n; \
+ t as_##t[n]; \
+ } t##x##n##_union_t; \
+
+_(u8, 8);
+_(u16, 4);
+_(u32, 2);
+_(i8, 8);
+_(i16, 4);
+_(i32, 2);
+
+#undef _
+
+#endif
+
+#ifdef CLIB_HAVE_VEC128
+
+#define _(t,n) \
+ typedef union { \
+ t##x##n as_##t##x##n; \
+ t as_##t[n]; \
+ } t##x##n##_union_t; \
+
+_(u8, 16);
+_(u16, 8);
+_(u32, 4);
+_(u64, 2);
+_(i8, 16);
+_(i16, 8);
+_(i32, 4);
+_(i64, 2);
+_(f32, 4);
+_(f64, 2);
+
+#undef _
+
+#endif
+
+/* When we don't have vector types, still define e.g. u32x4_union_t but as an array. */
+#if !defined(CLIB_HAVE_VEC128) && !defined(CLIB_HAVE_VEC64)
+
+#define _(t,n) \
+ typedef union { \
+ t as_##t[n]; \
+ } t##x##n##_union_t; \
+
+_(u8, 16);
+_(u16, 8);
+_(u32, 4);
+_(u64, 2);
+_(i8, 16);
+_(i16, 8);
+_(i32, 4);
+_(i64, 2);
+
+#undef _
+
+#endif
+
+#if defined (__SSE2__) && __GNUC__ >= 4
+#include <vppinfra/vector_sse2.h>
+#endif
+
+#if defined (__ALTIVEC__)
+#include <vppinfra/vector_altivec.h>
+#endif
+
+#if defined (__IWMMXT__)
+#include <vppinfra/vector_iwmmxt.h>
+#endif
+
+#if defined (__aarch64__)
+#include <vppinfra/vector_neon.h>
+#endif
+
+#if (defined(CLIB_HAVE_VEC128) || defined(CLIB_HAVE_VEC64))
+#include <vppinfra/vector_funcs.h>
+#endif
+
+#endif /* included_clib_vector_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector_altivec.h b/src/vppinfra/vector_altivec.h
new file mode 100644
index 00000000..0e9de820
--- /dev/null
+++ b/src/vppinfra/vector_altivec.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2009 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_vector_altivec_h
+#define included_vector_altivec_h
+
+/* Splats. */
+#define _(t,n,ti,fi,tr,fr) \
+ always_inline t##x##n t##x##n##_splat (t v) \
+ { return (t##x##n) __builtin_altivec_##fi ((ti) v); } \
+ \
+ always_inline t##x##n t##x##n##_splat_word (t##x##n x, int word_index) \
+ { return (t##x##n) __builtin_altivec_##fr ((tr) x, word_index); }
+
+#define u16x8_splat(i) ((u16x8) __builtin_altivec_vspltish (i))
+#define i16x8_splat(i) ((i16x8) __builtin_altivec_vspltish (i))
+#define u32x4_splat(i) ((u32x4) __builtin_altivec_vspltisw (i))
+#define i32x4_splat(i) ((i32x4) __builtin_altivec_vspltisw (i))
+
+#define u16x8_splat_word(x,i) ((u16x8) __builtin_altivec_vsplth ((i16x8) (x), (i)))
+#define i16x8_splat_word(x,i) ((i16x8) __builtin_altivec_vsplth ((i16x8) (x), (i)))
+#define u32x4_splat_word(x,i) ((u32x4) __builtin_altivec_vspltw ((i32x4) (x), (i)))
+#define i32x4_splat_word(x,i) ((i32x4) __builtin_altivec_vspltw ((i32x4) (x), (i)))
+
+#undef _
+
+/* 128 bit shifts. */
+#define _(t,ti,lr,f) \
+ always_inline t t##_##lr (t x, t y) \
+ { return (t) __builtin_altivec_##f ((ti) x, (ti) y); } \
+ \
+ always_inline t t##_i##lr (t x, int i) \
+ { \
+ t j = {i,i,i,i}; \
+ return t##_##lr (x, j); \
+ }
+
+_(u16x8, i16x8, shift_left, vslh);
+_(u32x4, i32x4, shift_left, vslw);
+_(u16x8, i16x8, shift_right, vsrh);
+_(u32x4, i32x4, shift_right, vsrw);
+_(i16x8, i16x8, shift_right, vsrah);
+_(i32x4, i32x4, shift_right, vsraw);
+_(u16x8, i16x8, rotate_left, vrlh);
+_(i16x8, i16x8, rotate_left, vrlh);
+_(u32x4, i32x4, rotate_left, vrlw);
+_(i32x4, i32x4, rotate_left, vrlw);
+
+#undef _
+
+#define _(t,it,lr,f) \
+ always_inline t t##_word_shift_##lr (t x, int n_words) \
+ { \
+ i32x4 n_bits = {0,0,0,n_words * BITS (it)}; \
+ return (t) __builtin_altivec_##f ((i32x4) x, n_bits); \
+ }
+
+_(u32x4, u32, left, vslo)
+_(i32x4, i32, left, vslo)
+_(u32x4, u32, right, vsro)
+_(i32x4, i32, right, vsro)
+_(u16x8, u16, left, vslo)
+_(i16x8, i16, left, vslo)
+_(u16x8, u16, right, vsro) _(i16x8, i16, right, vsro)
+#undef _
+ always_inline
+ u32
+ u32x4_get0 (u32x4 x)
+{
+ u32x4_union_t y;
+ y.as_u32x4 = x;
+ return y.as_u32[3];
+}
+
+/* Interleave. */
+#define _(t,it,lh,f) \
+ always_inline t t##_interleave_##lh (t x, t y) \
+ { return (t) __builtin_altivec_##f ((it) x, (it) y); }
+
+_(u32x4, i32x4, lo, vmrglw)
+_(i32x4, i32x4, lo, vmrglw)
+_(u16x8, i16x8, lo, vmrglh)
+_(i16x8, i16x8, lo, vmrglh)
+_(u32x4, i32x4, hi, vmrghw)
+_(i32x4, i32x4, hi, vmrghw)
+_(u16x8, i16x8, hi, vmrghh) _(i16x8, i16x8, hi, vmrghh)
+#undef _
+/* Unaligned loads/stores. */
+#ifndef __cplusplus
+#define _(t) \
+ always_inline void t##_store_unaligned (t x, t * a) \
+ { clib_mem_unaligned (a, t) = x; } \
+ always_inline t t##_load_unaligned (t * a) \
+ { return clib_mem_unaligned (a, t); }
+ _(u8x16) _(u16x8) _(u32x4) _(u64x2) _(i8x16) _(i16x8) _(i32x4) _(i64x2)
+#undef _
+#endif
+#define _signed_binop(n,m,f,g) \
+ /* Unsigned */ \
+ always_inline u##n##x##m \
+ u##n##x##m##_##f (u##n##x##m x, u##n##x##m y) \
+ { return (u##n##x##m) __builtin_altivec_##g ((i##n##x##m) x, (i##n##x##m) y); } \
+ \
+ /* Signed */ \
+ always_inline i##n##x##m \
+ i##n##x##m##_##f (i##n##x##m x, i##n##x##m y) \
+ { return (i##n##x##m) __builtin_altivec_##g ((i##n##x##m) x, (i##n##x##m) y); }
+/* Compare operations. */
+ _signed_binop (16, 8, is_equal, vcmpequh)
+_signed_binop (32, 4, is_equal, vcmpequw)
+#undef _signed_binop
+ always_inline u16x8 u16x8_is_zero (u16x8 x)
+{
+ u16x8 zero = { 0 };
+ return u16x8_is_equal (x, zero);
+}
+
+always_inline u32x4
+u32x4_is_zero (u32x4 x)
+{
+ u32x4 zero = { 0 };
+ return u32x4_is_equal (x, zero);
+}
+
+always_inline u32
+u32x4_zero_byte_mask (u32x4 x)
+{
+ u32x4 cmp = u32x4_is_zero (x);
+ u32x4 tmp = { 0x000f, 0x00f0, 0x0f00, 0xf000, };
+ cmp &= tmp;
+ cmp |= u32x4_word_shift_right (cmp, 2);
+ cmp |= u32x4_word_shift_right (cmp, 1);
+ return u32x4_get0 (cmp);
+}
+
+#endif /* included_vector_altivec_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector_funcs.h b/src/vppinfra/vector_funcs.h
new file mode 100644
index 00000000..db09de0f
--- /dev/null
+++ b/src/vppinfra/vector_funcs.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2008 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_vector_funcs_h
+#define included_vector_funcs_h
+
+#include <vppinfra/byte_order.h>
+
+/* Addition/subtraction. */
+#if CLIB_VECTOR_WORD_BITS == 128
+#define u8x_add u8x16_add
+#define u16x_add u16x8_add
+#define u32x_add u32x4_add
+#define u64x_add u64x2_add
+#define i8x_add i8x16_add
+#define i16x_add i16x8_add
+#define i32x_add i32x4_add
+#define i64x_add i64x2_add
+#define u8x_sub u8x16_sub
+#define u16x_sub u16x8_sub
+#define u32x_sub u32x4_sub
+#define u64x_sub u64x2_sub
+#define i8x_sub i8x16_sub
+#define i16x_sub i16x8_sub
+#define i32x_sub i32x4_sub
+#define i64x_sub i64x2_sub
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 64
+#define u8x_add u8x8_add
+#define u16x_add u16x4_add
+#define u32x_add u32x2_add
+#define i8x_add i8x8_add
+#define i16x_add i16x4_add
+#define i32x_add i32x2_add
+#define u8x_sub u8x8_sub
+#define u16x_sub u16x4_sub
+#define u32x_sub u32x2_sub
+#define i8x_sub i8x8_sub
+#define i16x_sub i16x4_sub
+#define i32x_sub i32x2_sub
+#endif
+
+/* Saturating addition/subtraction. */
+#if CLIB_VECTOR_WORD_BITS == 128
+#define u8x_add_saturate u8x16_add_saturate
+#define u16x_add_saturate u16x8_add_saturate
+#define i8x_add_saturate i8x16_add_saturate
+#define i16x_add_saturate i16x8_add_saturate
+#define u8x_sub_saturate u8x16_sub_saturate
+#define u16x_sub_saturate u16x8_sub_saturate
+#define i8x_sub_saturate i8x16_sub_saturate
+#define i16x_sub_saturate i16x8_sub_saturate
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 64
+#define u8x_add_saturate u8x8_add_saturate
+#define u16x_add_saturate u16x4_add_saturate
+#define i8x_add_saturate i8x8_add_saturate
+#define i16x_add_saturate i16x4_add_saturate
+#define u8x_sub_saturate u8x8_sub_saturate
+#define u16x_sub_saturate u16x4_sub_saturate
+#define i8x_sub_saturate i8x8_sub_saturate
+#define i16x_sub_saturate i16x4_sub_saturate
+#endif
+
+#define _vector_interleave(a,b,t) \
+do { \
+ t _tmp_lo = t##_interleave_lo (a, b); \
+ t _tmp_hi = t##_interleave_hi (a, b); \
+ if (CLIB_ARCH_IS_LITTLE_ENDIAN) \
+ (a) = _tmp_lo, (b) = _tmp_hi; \
+ else \
+ (a) = _tmp_hi, (b) = _tmp_lo; \
+} while (0)
+
+/* 128 bit interleaves. */
+#define u8x16_interleave(a,b) _vector_interleave(a,b,u8x16)
+#define i8x16_interleave(a,b) _vector_interleave(a,b,i8x16)
+#define u16x8_interleave(a,b) _vector_interleave(a,b,u16x8)
+#define i16x8_interleave(a,b) _vector_interleave(a,b,i16x8)
+#define u32x4_interleave(a,b) _vector_interleave(a,b,u32x4)
+#define i32x4_interleave(a,b) _vector_interleave(a,b,i32x4)
+#define u64x2_interleave(a,b) _vector_interleave(a,b,u64x2)
+#define i64x2_interleave(a,b) _vector_interleave(a,b,i64x2)
+
+/* 64 bit interleaves. */
+#define u8x8_interleave(a,b) _vector_interleave(a,b,u8x8)
+#define i8x8_interleave(a,b) _vector_interleave(a,b,i8x8)
+#define u16x4_interleave(a,b) _vector_interleave(a,b,u16x4)
+#define i16x4_interleave(a,b) _vector_interleave(a,b,i16x4)
+#define u32x2_interleave(a,b) _vector_interleave(a,b,u32x2)
+#define i32x2_interleave(a,b) _vector_interleave(a,b,i32x2)
+
+/* Word sized interleaves. */
+#if CLIB_VECTOR_WORD_BITS == 128
+#define u8x_interleave u8x16_interleave
+#define u16x_interleave u16x8_interleave
+#define u32x_interleave u32x4_interleave
+#define u64x_interleave u64x2_interleave
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 64
+#define u8x_interleave u8x8_interleave
+#define u16x_interleave u16x4_interleave
+#define u32x_interleave u32x2_interleave
+#define u64x_interleave(a,b) /* do nothing */
+#endif
+
+/* Vector word sized shifts. */
+#if CLIB_VECTOR_WORD_BITS == 128
+#define u8x_shift_left u8x16_shift_left
+#define i8x_shift_left i8x16_shift_left
+#define u16x_shift_left u16x8_shift_left
+#define i16x_shift_left i16x8_shift_left
+#define u32x_shift_left u32x4_shift_left
+#define i32x_shift_left i32x4_shift_left
+#define u64x_shift_left u64x2_shift_left
+#define i64x_shift_left i64x2_shift_left
+#define u8x_shift_right u8x16_shift_right
+#define i8x_shift_right i8x16_shift_right
+#define u16x_shift_right u16x8_shift_right
+#define i16x_shift_right i16x8_shift_right
+#define u32x_shift_right u32x4_shift_right
+#define i32x_shift_right i32x4_shift_right
+#define u64x_shift_right u64x2_shift_right
+#define i64x_shift_right i64x2_shift_right
+#define u8x_rotate_left u8x16_rotate_left
+#define i8x_rotate_left i8x16_rotate_left
+#define u16x_rotate_left u16x8_rotate_left
+#define i16x_rotate_left i16x8_rotate_left
+#define u32x_rotate_left u32x4_rotate_left
+#define i32x_rotate_left i32x4_rotate_left
+#define u64x_rotate_left u64x2_rotate_left
+#define i64x_rotate_left i64x2_rotate_left
+#define u8x_rotate_right u8x16_rotate_right
+#define i8x_rotate_right i8x16_rotate_right
+#define u16x_rotate_right u16x8_rotate_right
+#define i16x_rotate_right i16x8_rotate_right
+#define u32x_rotate_right u32x4_rotate_right
+#define i32x_rotate_right i32x4_rotate_right
+#define u64x_rotate_right u64x2_rotate_right
+#define i64x_rotate_right i64x2_rotate_right
+#define u8x_ishift_left u8x16_ishift_left
+#define i8x_ishift_left i8x16_ishift_left
+#define u16x_ishift_left u16x8_ishift_left
+#define i16x_ishift_left i16x8_ishift_left
+#define u32x_ishift_left u32x4_ishift_left
+#define i32x_ishift_left i32x4_ishift_left
+#define u64x_ishift_left u64x2_ishift_left
+#define i64x_ishift_left i64x2_ishift_left
+#define u8x_ishift_right u8x16_ishift_right
+#define i8x_ishift_right i8x16_ishift_right
+#define u16x_ishift_right u16x8_ishift_right
+#define i16x_ishift_right i16x8_ishift_right
+#define u32x_ishift_right u32x4_ishift_right
+#define i32x_ishift_right i32x4_ishift_right
+#define u64x_ishift_right u64x2_ishift_right
+#define i64x_ishift_right i64x2_ishift_right
+#define u8x_irotate_left u8x16_irotate_left
+#define i8x_irotate_left i8x16_irotate_left
+#define u16x_irotate_left u16x8_irotate_left
+#define i16x_irotate_left i16x8_irotate_left
+#define u32x_irotate_left u32x4_irotate_left
+#define i32x_irotate_left i32x4_irotate_left
+#define u64x_irotate_left u64x2_irotate_left
+#define i64x_irotate_left i64x2_irotate_left
+#define u8x_irotate_right u8x16_irotate_right
+#define i8x_irotate_right i8x16_irotate_right
+#define u16x_irotate_right u16x8_irotate_right
+#define i16x_irotate_right i16x8_irotate_right
+#define u32x_irotate_right u32x4_irotate_right
+#define i32x_irotate_right i32x4_irotate_right
+#define u64x_irotate_right u64x2_irotate_right
+#define i64x_irotate_right i64x2_irotate_right
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 64
+#define u8x_shift_left u8x8_shift_left
+#define i8x_shift_left i8x8_shift_left
+#define u16x_shift_left u16x4_shift_left
+#define i16x_shift_left i16x4_shift_left
+#define u32x_shift_left u32x2_shift_left
+#define i32x_shift_left i32x2_shift_left
+#define u8x_shift_right u8x8_shift_right
+#define i8x_shift_right i8x8_shift_right
+#define u16x_shift_right u16x4_shift_right
+#define i16x_shift_right i16x4_shift_right
+#define u32x_shift_right u32x2_shift_right
+#define i32x_shift_right i32x2_shift_right
+#define u8x_rotate_left u8x8_rotate_left
+#define i8x_rotate_left i8x8_rotate_left
+#define u16x_rotate_left u16x4_rotate_left
+#define i16x_rotate_left i16x4_rotate_left
+#define u32x_rotate_left u32x2_rotate_left
+#define i32x_rotate_left i32x2_rotate_left
+#define u8x_rotate_right u8x8_rotate_right
+#define i8x_rotate_right i8x8_rotate_right
+#define u16x_rotate_right u16x4_rotate_right
+#define i16x_rotate_right i16x4_rotate_right
+#define u32x_rotate_right u32x2_rotate_right
+#define i32x_rotate_right i32x2_rotate_right
+#define u8x_ishift_left u8x8_ishift_left
+#define i8x_ishift_left i8x8_ishift_left
+#define u16x_ishift_left u16x4_ishift_left
+#define i16x_ishift_left i16x4_ishift_left
+#define u32x_ishift_left u32x2_ishift_left
+#define i32x_ishift_left i32x2_ishift_left
+#define u8x_ishift_right u8x8_ishift_right
+#define i8x_ishift_right i8x8_ishift_right
+#define u16x_ishift_right u16x4_ishift_right
+#define i16x_ishift_right i16x4_ishift_right
+#define u32x_ishift_right u32x2_ishift_right
+#define i32x_ishift_right i32x2_ishift_right
+#define u8x_irotate_left u8x8_irotate_left
+#define i8x_irotate_left i8x8_irotate_left
+#define u16x_irotate_left u16x4_irotate_left
+#define i16x_irotate_left i16x4_irotate_left
+#define u32x_irotate_left u32x2_irotate_left
+#define i32x_irotate_left i32x2_irotate_left
+#define u8x_irotate_right u8x8_irotate_right
+#define i8x_irotate_right i8x8_irotate_right
+#define u16x_irotate_right u16x4_irotate_right
+#define i16x_irotate_right i16x4_irotate_right
+#define u32x_irotate_right u32x2_irotate_right
+#define i32x_irotate_right i32x2_irotate_right
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 128
+#define u8x_splat u8x16_splat
+#define i8x_splat i8x16_splat
+#define u16x_splat u16x8_splat
+#define i16x_splat i16x8_splat
+#define u32x_splat u32x4_splat
+#define i32x_splat i32x4_splat
+#define u64x_splat u64x2_splat
+#define i64x_splat i64x2_splat
+#endif
+
+#if CLIB_VECTOR_WORD_BITS == 64
+#define u8x_splat u8x8_splat
+#define i8x_splat i8x8_splat
+#define u16x_splat u16x4_splat
+#define i16x_splat i16x4_splat
+#define u32x_splat u32x2_splat
+#define i32x_splat i32x2_splat
+#endif
+
+#define u32x4_transpose_step(x,y) \
+do { \
+ u32x4 _x = (x); \
+ u32x4 _y = (y); \
+ (x) = u32x4_interleave_lo (_x, _y); \
+ (y) = u32x4_interleave_hi (_x, _y); \
+} while (0)
+
+/* 4x4 transpose: x_ij -> x_ji */
+#define u32x4_transpose(x0,x1,x2,x3) \
+do { \
+ u32x4 _x0 = (u32x4) (x0); \
+ u32x4 _x1 = (u32x4) (x1); \
+ u32x4 _x2 = (u32x4) (x2); \
+ u32x4 _x3 = (u32x4) (x3); \
+ u32x4_transpose_step (_x0, _x2); \
+ u32x4_transpose_step (_x1, _x3); \
+ u32x4_transpose_step (_x0, _x1); \
+ u32x4_transpose_step (_x2, _x3); \
+ (x0) = (u32x4) _x0; \
+ (x1) = (u32x4) _x1; \
+ (x2) = (u32x4) _x2; \
+ (x3) = (u32x4) _x3; \
+} while (0)
+
+#define i32x4_transpose(x0,x1,x2,x3) \
+do { \
+ u32x4 _x0 = (u32x4) (x0); \
+ u32x4 _x1 = (u32x4) (x1); \
+ u32x4 _x2 = (u32x4) (x2); \
+ u32x4 _x3 = (u32x4) (x3); \
+ u32x4_transpose_step (_x0, _x2); \
+ u32x4_transpose_step (_x1, _x3); \
+ u32x4_transpose_step (_x0, _x1); \
+ u32x4_transpose_step (_x2, _x3); \
+ (x0) = (i32x4) _x0; \
+ (x1) = (i32x4) _x1; \
+ (x2) = (i32x4) _x2; \
+ (x3) = (i32x4) _x3; \
+} while (0)
+
+#undef _
+
+#endif /* included_vector_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector_iwmmxt.h b/src/vppinfra/vector_iwmmxt.h
new file mode 100644
index 00000000..8e662045
--- /dev/null
+++ b/src/vppinfra/vector_iwmmxt.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2008 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_vector_iwmmxt_h
+#define included_vector_iwmmxt_h
+
+#include <vppinfra/error.h> /* for ASSERT */
+
+/* 64 bit interleaves. */
+always_inline u8x8
+u8x8_interleave_hi (u8x8 a, u8x8 b)
+{
+ return __builtin_arm_wunpckihb (a, b);
+}
+
+always_inline u8x8
+u8x8_interleave_lo (u8x8 a, u8x8 b)
+{
+ return __builtin_arm_wunpckilb (a, b);
+}
+
+always_inline u16x4
+u16x4_interleave_hi (u16x4 a, u16x4 b)
+{
+ return __builtin_arm_wunpckihh (a, b);
+}
+
+always_inline u16x4
+u16x4_interleave_lo (u16x4 a, u16x4 b)
+{
+ return __builtin_arm_wunpckilh (a, b);
+}
+
+always_inline u32x2
+u32x2_interleave_hi (u32x2 a, u32x2 b)
+{
+ return __builtin_arm_wunpckihw (a, b);
+}
+
+always_inline u32x2
+u32x2_interleave_lo (u32x2 a, u32x2 b)
+{
+ return __builtin_arm_wunpckilw (a, b);
+}
+
+always_inline u32x2
+u32x2_splat (u32 a)
+{
+ u32x2 x = { a };
+ x = u32x2_interleave_lo (x, x);
+ return x;
+}
+
+always_inline u16x4
+u16x4_splat (u16 a)
+{
+ u32 t = (u32) a | ((u32) a << 16);
+ return u32x2_splat (t);
+}
+
+always_inline u8x8
+u8x8_splat (u8 a)
+{
+ u32 t = (u32) a | ((u32) a << 8);
+ t |= t << 16;
+ return u32x2_splat (t);
+}
+
+#define i32x2_splat u32x2_splat
+#define i16x4_splat u16x4_splat
+#define i8x8_splat u8x8_splat
+
+/* 64 bit shifts. */
+
+/* As of July 2008 the __builtin_arm shifts cause gcc-4.3.1 to crash
+ so we use asm versions. */
+#define _(t,u,lr,f) \
+ always_inline t \
+ t##_##lr (t x, int i) \
+ { \
+ i16x4 y; \
+ asm (#f " %[y], %[x], %[shift]" \
+ : [y] "=y" (y) \
+ : [x] "y" (x), [shift] "i" (i * u)); \
+ return y; \
+ }
+
+_(u16x4, 1, shift_left, wsllhi)
+_(u32x2, 1, shift_left, wsllwi)
+_(u16x4, 1, shift_right, wsrlhi)
+_(u32x2, 1, shift_right, wsrlwi)
+_(i16x4, 1, shift_left, wsllhi)
+_(i32x2, 1, shift_left, wsllwi)
+_(i16x4, 1, shift_right, wsrahi) _(i32x2, 1, shift_right, wsrawi)
+/* Word shifts. */
+ _(u8x8, 8, word_shift_left, wslldi)
+_(u16x4, 16, word_shift_left, wslldi)
+_(u32x2, 32, word_shift_left, wslldi)
+_(u8x8, 8, word_shift_right, wsrldi)
+_(u16x4, 16, word_shift_right, wsrldi)
+_(u32x2, 32, word_shift_right, wsrldi)
+_(i8x8, 8, word_shift_left, wslldi)
+_(i16x4, 16, word_shift_left, wslldi)
+_(i32x2, 32, word_shift_left, wslldi)
+_(i8x8, 8, word_shift_right, wsrldi)
+_(i16x4, 16, word_shift_right, wsrldi) _(i32x2, 32, word_shift_right, wsrldi)
+#undef _
+#endif /* included_vector_iwmmxt_h */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
new file mode 100644
index 00000000..cea52759
--- /dev/null
+++ b/src/vppinfra/vector_neon.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vector_neon_h
+#define included_vector_neon_h
+#include <arm_neon.h>
+
+/* Splats. */
+
+#define u8x16_splat(i) vdupq_n_u8(i)
+#define u16x8_splat(i) vdupq_n_u16(i)
+#define i16x8_splat(i) vdupq_n_s16(i)
+#define u32x4_splat(i) vdupq_n_u32(i)
+#define i32x4_splat(i) vdupq_n_s32(i)
+
+/* Arithmetic */
+#define u16x8_add(a,b) vaddq_u16(a,b)
+#define i16x8_add(a,b) vaddq_s16(a,b)
+#define u16x8_sub_saturate(a,b) vsubq_u16(a,b)
+#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
+
+#define u16x8_is_equal(a,b) vceqq_u16(a,b)
+#define i16x8_is_equal(a,b) vceqq_i16(a,b)
+
+always_inline u32
+u16x8_zero_byte_mask (u16x8 input)
+{
+ u8x16 vall_one = vdupq_n_u8 (0x0);
+ u8x16 res_values = { 0x01, 0x02, 0x04, 0x08,
+ 0x10, 0x20, 0x40, 0x80,
+ 0x01, 0x02, 0x04, 0x08,
+ 0x10, 0x20, 0x40, 0x80
+ };
+
+ /* input --> [0x80, 0x40, 0x01, 0xf0, ... ] */
+ u8x16 test_result =
+ vreinterpretq_u8_u16 (vceqq_u16 (input, vreinterpretq_u16_u8 (vall_one)));
+ u8x16 before_merge = vminq_u8 (test_result, res_values);
+ /*before_merge--> [0x80, 0x00, 0x00, 0x10, ... ] */
+ /* u8x16 --> [a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p] */
+ /* pair add until we have 2 uint64_t */
+ u16x8 merge1 = vpaddlq_u8 (before_merge);
+ /* u16x8--> [a+b,c+d, e+f,g+h, i+j,k+l, m+n,o+p] */
+ u32x4 merge2 = vpaddlq_u16 (merge1);
+ /* u32x4--> [a+b+c+d, e+f+g+h, i+j+k+l, m+n+o+p] */
+ u64x2 merge3 = vpaddlq_u32 (merge2);
+ /* u64x2--> [a+b+c+d+e+f+g+h, i+j+k+l+m+n+o+p] */
+ return (u32) (vgetq_lane_u64 (merge3, 1) << 8) + vgetq_lane_u64 (merge3, 0);
+}
+
+#endif /* included_vector_neon_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vector_sse2.h b/src/vppinfra/vector_sse2.h
new file mode 100644
index 00000000..6830d5c6
--- /dev/null
+++ b/src/vppinfra/vector_sse2.h
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_vector_sse2_h
+#define included_vector_sse2_h
+
+#include <vppinfra/error_bootstrap.h> /* for ASSERT */
+#include <x86intrin.h>
+
+/* 128 bit interleaves. */
+always_inline u8x16
+u8x16_interleave_hi (u8x16 a, u8x16 b)
+{
+ return (u8x16) _mm_unpackhi_epi8 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u8x16
+u8x16_interleave_lo (u8x16 a, u8x16 b)
+{
+ return (u8x16) _mm_unpacklo_epi8 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u16x8
+u16x8_interleave_hi (u16x8 a, u16x8 b)
+{
+ return (u16x8) _mm_unpackhi_epi16 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u16x8
+u16x8_interleave_lo (u16x8 a, u16x8 b)
+{
+ return (u16x8) _mm_unpacklo_epi16 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u32x4
+u32x4_interleave_hi (u32x4 a, u32x4 b)
+{
+ return (u32x4) _mm_unpackhi_epi32 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u32x4
+u32x4_interleave_lo (u32x4 a, u32x4 b)
+{
+ return (u32x4) _mm_unpacklo_epi32 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u64x2
+u64x2_interleave_hi (u64x2 a, u64x2 b)
+{
+ return (u64x2) _mm_unpackhi_epi64 ((__m128i) a, (__m128i) b);
+}
+
+always_inline u64x2
+u64x2_interleave_lo (u64x2 a, u64x2 b)
+{
+ return (u64x2) _mm_unpacklo_epi64 ((__m128i) a, (__m128i) b);
+}
+
+/* 64 bit interleaves. */
+always_inline u8x8
+u8x8_interleave_hi (u8x8 a, u8x8 b)
+{
+ return (u8x8) _m_punpckhbw ((__m64) a, (__m64) b);
+}
+
+always_inline u8x8
+u8x8_interleave_lo (u8x8 a, u8x8 b)
+{
+ return (u8x8) _m_punpcklbw ((__m64) a, (__m64) b);
+}
+
+always_inline u16x4
+u16x4_interleave_hi (u16x4 a, u16x4 b)
+{
+ return (u16x4) _m_punpckhwd ((__m64) a, (__m64) b);
+}
+
+always_inline u16x4
+u16x4_interleave_lo (u16x4 a, u16x4 b)
+{
+ return (u16x4) _m_punpcklwd ((__m64) a, (__m64) b);
+}
+
+always_inline u32x2
+u32x2_interleave_hi (u32x2 a, u32x2 b)
+{
+ return (u32x2) _m_punpckhdq ((__m64) a, (__m64) b);
+}
+
+always_inline u32x2
+u32x2_interleave_lo (u32x2 a, u32x2 b)
+{
+ return (u32x2) _m_punpckldq ((__m64) a, (__m64) b);
+}
+
+/* 128 bit packs. */
+always_inline u8x16
+u16x8_pack (u16x8 lo, u16x8 hi)
+{
+ return (u8x16) _mm_packus_epi16 ((__m128i) lo, (__m128i) hi);
+}
+
+always_inline i8x16
+i16x8_pack (i16x8 lo, i16x8 hi)
+{
+ return (i8x16) _mm_packs_epi16 ((__m128i) lo, (__m128i) hi);
+}
+
+always_inline u16x8
+u32x4_pack (u32x4 lo, u32x4 hi)
+{
+ return (u16x8) _mm_packs_epi32 ((__m128i) lo, (__m128i) hi);
+}
+
+/* 64 bit packs. */
+always_inline u8x8
+u16x4_pack (u16x4 lo, u16x4 hi)
+{
+ return (u8x8) _m_packuswb ((__m64) lo, (__m64) hi);
+}
+
+always_inline i8x8
+i16x4_pack (i16x4 lo, i16x4 hi)
+{
+ return (i8x8) _m_packsswb ((__m64) lo, (__m64) hi);
+}
+
+always_inline u16x4
+u32x2_pack (u32x2 lo, u32x2 hi)
+{
+ return (u16x4) _m_packssdw ((__m64) lo, (__m64) hi);
+}
+
+always_inline i16x4
+i32x2_pack (i32x2 lo, i32x2 hi)
+{
+ return (i16x4) _m_packssdw ((__m64) lo, (__m64) hi);
+}
+
+/* Splats: replicate scalar value into vector. */
+always_inline u64x2
+u64x2_splat (u64 a)
+{
+ u64x2 x = { a, a };
+ return x;
+}
+
+always_inline u32x4
+u32x4_splat (u32 a)
+{
+ u32x4 x = { a, a, a, a };
+ return x;
+}
+
+always_inline u16x8
+u16x8_splat (u16 a)
+{
+ u16x8 x = { a, a, a, a, a, a, a, a };
+ return x;
+}
+
+always_inline u8x16
+u8x16_splat (u8 a)
+{
+ u8x16 x = { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a };
+ return x;
+}
+
+always_inline u32x2
+u32x2_splat (u32 a)
+{
+ u32x2 x = { a, a };
+ return x;
+}
+
+always_inline u16x4
+u16x4_splat (u16 a)
+{
+ u16x4 x = { a, a, a, a };
+ return x;
+}
+
+always_inline u8x8
+u8x8_splat (u8 a)
+{
+ u8x8 x = { a, a, a, a, a, a, a, a };
+ return x;
+}
+
+#define i64x2_splat u64x2_splat
+#define i32x4_splat u32x4_splat
+#define i16x8_splat u16x8_splat
+#define i8x16_splat u8x16_splat
+#define i32x2_splat u32x2_splat
+#define i16x4_splat u16x4_splat
+#define i8x8_splat u8x8_splat
+
+#ifndef __ICC
+always_inline u64x2
+u64x2_read_lo (u64x2 x, u64 * a)
+{
+ return (u64x2) _mm_loadl_pi ((__m128) x, (__m64 *) a);
+}
+
+always_inline u64x2
+u64x2_read_hi (u64x2 x, u64 * a)
+{
+ return (u64x2) _mm_loadh_pi ((__m128) x, (__m64 *) a);
+}
+
+always_inline void
+u64x2_write_lo (u64x2 x, u64 * a)
+{
+ _mm_storel_pi ((__m64 *) a, (__m128) x);
+}
+
+always_inline void
+u64x2_write_hi (u64x2 x, u64 * a)
+{
+ _mm_storeh_pi ((__m64 *) a, (__m128) x);
+}
+#endif
+
+/* Unaligned loads/stores. */
+
+#define _(t) \
+ always_inline void t##_store_unaligned (t x, t * a) \
+ { _mm_storeu_si128 ((__m128i *) a, (__m128i) x); } \
+ always_inline t t##_load_unaligned (t * a) \
+ { return (t) _mm_loadu_si128 ((__m128i *) a); }
+
+_(u8x16) _(u16x8) _(u32x4) _(u64x2) _(i8x16) _(i16x8) _(i32x4) _(i64x2)
+#undef _
+#define _signed_binop(n,m,f,g) \
+ /* Unsigned */ \
+ always_inline u##n##x##m \
+ u##n##x##m##_##f (u##n##x##m x, u##n##x##m y) \
+ { return (u##n##x##m) _mm_##g##n ((__m128i) x, (__m128i) y); } \
+ \
+ /* Signed */ \
+ always_inline i##n##x##m \
+ i##n##x##m##_##f (i##n##x##m x, i##n##x##m y) \
+ { return (i##n##x##m) _mm_##g##n ((__m128i) x, (__m128i) y); }
+/* Addition/subtraction. */
+ _signed_binop (8, 16, add, add_epi)
+_signed_binop (16, 8, add, add_epi)
+_signed_binop (32, 4, add, add_epi)
+_signed_binop (64, 2, add, add_epi)
+_signed_binop (8, 16, sub, sub_epi)
+_signed_binop (16, 8, sub, sub_epi)
+_signed_binop (32, 4, sub, sub_epi) _signed_binop (64, 2, sub, sub_epi)
+/* Addition/subtraction with saturation. */
+ _signed_binop (8, 16, add_saturate, adds_epu)
+_signed_binop (16, 8, add_saturate, adds_epu)
+_signed_binop (8, 16, sub_saturate, subs_epu)
+_signed_binop (16, 8, sub_saturate, subs_epu)
+/* Multiplication. */
+ always_inline i16x8 i16x8_mul_lo (i16x8 x, i16x8 y)
+{
+ return (i16x8) _mm_mullo_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u16x8
+u16x8_mul_lo (u16x8 x, u16x8 y)
+{
+ return (u16x8) _mm_mullo_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i16x8
+i16x8_mul_hi (i16x8 x, i16x8 y)
+{
+ return (i16x8) _mm_mulhi_epu16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u16x8
+u16x8_mul_hi (u16x8 x, u16x8 y)
+{
+ return (u16x8) _mm_mulhi_epu16 ((__m128i) x, (__m128i) y);
+}
+
+/* 128 bit shifts. */
+
+#define _(p,a,b,c,f) \
+ always_inline p##a##x##b p##a##x##b##_ishift_##c (p##a##x##b x, int i) \
+ { return (p##a##x##b) _mm_##f##i_epi##a ((__m128i) x, i); } \
+ \
+ always_inline p##a##x##b p##a##x##b##_shift_##c (p##a##x##b x, p##a##x##b y) \
+ { return (p##a##x##b) _mm_##f##_epi##a ((__m128i) x, (__m128i) y); }
+
+_(u, 16, 8, left, sll)
+_(u, 32, 4, left, sll)
+_(u, 64, 2, left, sll)
+_(u, 16, 8, right, srl)
+_(u, 32, 4, right, srl)
+_(u, 64, 2, right, srl)
+_(i, 16, 8, left, sll)
+_(i, 32, 4, left, sll)
+_(i, 64, 2, left, sll) _(i, 16, 8, right, sra) _(i, 32, 4, right, sra)
+#undef _
+/* 64 bit shifts. */
+ always_inline u16x4
+u16x4_shift_left (u16x4 x, u16x4 i)
+{
+ return (u16x4) _m_psllw ((__m64) x, (__m64) i);
+};
+
+always_inline u32x2
+u32x2_shift_left (u32x2 x, u32x2 i)
+{
+ return (u32x2) _m_pslld ((__m64) x, (__m64) i);
+};
+
+always_inline u16x4
+u16x4_shift_right (u16x4 x, u16x4 i)
+{
+ return (u16x4) _m_psrlw ((__m64) x, (__m64) i);
+};
+
+always_inline u32x2
+u32x2_shift_right (u32x2 x, u32x2 i)
+{
+ return (u32x2) _m_psrld ((__m64) x, (__m64) i);
+};
+
+always_inline i16x4
+i16x4_shift_left (i16x4 x, i16x4 i)
+{
+ return (i16x4) _m_psllw ((__m64) x, (__m64) i);
+};
+
+always_inline i32x2
+i32x2_shift_left (i32x2 x, i32x2 i)
+{
+ return (i32x2) _m_pslld ((__m64) x, (__m64) i);
+};
+
+always_inline i16x4
+i16x4_shift_right (i16x4 x, i16x4 i)
+{
+ return (i16x4) _m_psraw ((__m64) x, (__m64) i);
+};
+
+always_inline i32x2
+i32x2_shift_right (i32x2 x, i32x2 i)
+{
+ return (i32x2) _m_psrad ((__m64) x, (__m64) i);
+};
+
+#define u8x16_word_shift_left(a,n) (u8x16) _mm_slli_si128((__m128i) a, n)
+#define u8x16_word_shift_right(a,n) (u8x16) _mm_srli_si128((__m128i) a, n)
+
+#define i8x16_word_shift_left(a,n) \
+ ((i8x16) u8x16_word_shift_left((u8x16) (a), (n)))
+#define i8x16_word_shift_right(a,n) \
+ ((i8x16) u8x16_word_shift_right((u8x16) (a), (n)))
+
+#define u16x8_word_shift_left(a,n) \
+ ((u16x8) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u16)))
+#define i16x8_word_shift_left(a,n) \
+ ((u16x8) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u16)))
+#define u16x8_word_shift_right(a,n) \
+ ((u16x8) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u16)))
+#define i16x8_word_shift_right(a,n) \
+ ((i16x8) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u16)))
+
+#define u32x4_word_shift_left(a,n) \
+ ((u32x4) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u32)))
+#define i32x4_word_shift_left(a,n) \
+ ((u32x4) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u32)))
+#define u32x4_word_shift_right(a,n) \
+ ((u32x4) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u32)))
+#define i32x4_word_shift_right(a,n) \
+ ((i32x4) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u32)))
+
+#define u64x2_word_shift_left(a,n) \
+ ((u64x2) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u64)))
+#define i64x2_word_shift_left(a,n) \
+ ((u64x2) u8x16_word_shift_left((u8x16) (a), (n) * sizeof (u64)))
+#define u64x2_word_shift_right(a,n) \
+ ((u64x2) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u64)))
+#define i64x2_word_shift_right(a,n) \
+ ((i64x2) u8x16_word_shift_right((u8x16) (a), (n) * sizeof (u64)))
+
+/* SSE2 has no rotate instructions: use shifts to simulate them. */
+#define _(t,n,lr1,lr2) \
+ always_inline t##x##n \
+ t##x##n##_irotate_##lr1 (t##x##n w, int i) \
+ { \
+ ASSERT (i >= 0 && i <= BITS (t)); \
+ return (t##x##n##_ishift_##lr1 (w, i) \
+ | t##x##n##_ishift_##lr2 (w, BITS (t) - i)); \
+ } \
+ \
+ always_inline t##x##n \
+ t##x##n##_rotate_##lr1 (t##x##n w, t##x##n i) \
+ { \
+ t##x##n j = t##x##n##_splat (BITS (t)); \
+ return (t##x##n##_shift_##lr1 (w, i) \
+ | t##x##n##_shift_##lr2 (w, j - i)); \
+ }
+
+_(u16, 8, left, right);
+_(u16, 8, right, left);
+_(u32, 4, left, right);
+_(u32, 4, right, left);
+_(u64, 2, left, right);
+_(u64, 2, right, left);
+
+#undef _
+
+#ifndef __clang__
+#define _(t,n,lr1,lr2) \
+ always_inline t##x##n \
+ t##x##n##_word_rotate2_##lr1 (t##x##n w0, t##x##n w1, int i) \
+ { \
+ int m = sizeof (t##x##n) / sizeof (t); \
+ ASSERT (i >= 0 && i < m); \
+ return (t##x##n##_word_shift_##lr1 (w0, i) \
+ | t##x##n##_word_shift_##lr2 (w1, m - i)); \
+ } \
+ \
+ always_inline t##x##n \
+ t##x##n##_word_rotate_##lr1 (t##x##n w0, int i) \
+ { return t##x##n##_word_rotate2_##lr1 (w0, w0, i); }
+
+_(u8, 16, left, right);
+_(u8, 16, right, left);
+_(u16, 8, left, right);
+_(u16, 8, right, left);
+_(u32, 4, left, right);
+_(u32, 4, right, left);
+_(u64, 2, left, right);
+_(u64, 2, right, left);
+
+#undef _
+#endif
+
+/* Compare operations. */
+always_inline u8x16
+u8x16_is_equal (u8x16 x, u8x16 y)
+{
+ return (u8x16) _mm_cmpeq_epi8 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i8x16
+i8x16_is_equal (i8x16 x, i8x16 y)
+{
+ return (i8x16) _mm_cmpeq_epi8 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u16x8
+u16x8_is_equal (u16x8 x, u16x8 y)
+{
+ return (u16x8) _mm_cmpeq_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i16x8
+i16x8_is_equal (i16x8 x, i16x8 y)
+{
+ return (i16x8) _mm_cmpeq_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u32x4
+u32x4_is_equal (u32x4 x, u32x4 y)
+{
+ return (u32x4) _mm_cmpeq_epi32 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i32x4
+i32x4_is_equal (i32x4 x, i32x4 y)
+{
+ return (i32x4) _mm_cmpeq_epi32 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u8x16
+i8x16_is_greater (i8x16 x, i8x16 y)
+{
+ return (u8x16) _mm_cmpgt_epi8 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u16x8
+i16x8_is_greater (i16x8 x, i16x8 y)
+{
+ return (u16x8) _mm_cmpgt_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u32x4
+i32x4_is_greater (i32x4 x, i32x4 y)
+{
+ return (u32x4) _mm_cmpgt_epi32 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u8x16
+u8x16_is_zero (u8x16 x)
+{
+ u8x16 zero = { 0 };
+ return u8x16_is_equal (x, zero);
+}
+
+always_inline u16x8
+u16x8_is_zero (u16x8 x)
+{
+ u16x8 zero = { 0 };
+ return u16x8_is_equal (x, zero);
+}
+
+always_inline u32x4
+u32x4_is_zero (u32x4 x)
+{
+ u32x4 zero = { 0 };
+ return u32x4_is_equal (x, zero);
+}
+
+#define u32x4_select(A,MASK) \
+({ \
+ u32x4 _x, _y; \
+ _x = (A); \
+ asm volatile ("pshufd %[mask], %[x], %[y]" \
+ : /* outputs */ [y] "=x" (_y) \
+ : /* inputs */ [x] "x" (_x), [mask] "i" (MASK)); \
+ _y; \
+})
+
+#define u32x4_splat_word(x,i) \
+ u32x4_select ((x), (((i) << (2*0)) \
+ | ((i) << (2*1)) \
+ | ((i) << (2*2)) \
+ | ((i) << (2*3))))
+
+/* Extract low order 32 bit word. */
+always_inline u32
+u32x4_get0 (u32x4 x)
+{
+ u32 result;
+ asm volatile ("movd %[x], %[result]": /* outputs */ [result] "=r" (result)
+ : /* inputs */ [x] "x" (x));
+ return result;
+}
+
+always_inline u32x4
+u32x4_set0 (u32 x)
+{
+ u32x4 result;
+ asm volatile ("movd %[x], %[result]": /* outputs */ [result] "=x" (result)
+ : /* inputs */ [x] "r" (x));
+ return result;
+}
+
+always_inline i32x4
+i32x4_set0 (i32 x)
+{
+ return (i32x4) u32x4_set0 ((u32) x);
+}
+
+always_inline i32
+i32x4_get0 (i32x4 x)
+{
+ return (i32) u32x4_get0 ((u32x4) x);
+}
+
+/* Converts all ones/zeros compare mask to bitmap. */
+always_inline u32
+u8x16_compare_byte_mask (u8x16 x)
+{
+ return _mm_movemask_epi8 ((__m128i) x);
+}
+
+extern u8 u32x4_compare_word_mask_table[256];
+
+always_inline u32
+u32x4_compare_word_mask (u32x4 x)
+{
+ u32 m = u8x16_compare_byte_mask ((u8x16) x);
+ return (u32x4_compare_word_mask_table[(m >> 0) & 0xff]
+ | (u32x4_compare_word_mask_table[(m >> 8) & 0xff] << 2));
+}
+
+always_inline u32
+u8x16_zero_byte_mask (u8x16 x)
+{
+ u8x16 zero = { 0 };
+ return u8x16_compare_byte_mask (u8x16_is_equal (x, zero));
+}
+
+always_inline u32
+u16x8_zero_byte_mask (u16x8 x)
+{
+ u16x8 zero = { 0 };
+ return u8x16_compare_byte_mask ((u8x16) u16x8_is_equal (x, zero));
+}
+
+always_inline u32
+u32x4_zero_byte_mask (u32x4 x)
+{
+ u32x4 zero = { 0 };
+ return u8x16_compare_byte_mask ((u8x16) u32x4_is_equal (x, zero));
+}
+
+always_inline u8x16
+u8x16_max (u8x16 x, u8x16 y)
+{
+ return (u8x16) _mm_max_epu8 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u32
+u8x16_max_scalar (u8x16 x)
+{
+ x = u8x16_max (x, u8x16_word_shift_right (x, 8));
+ x = u8x16_max (x, u8x16_word_shift_right (x, 4));
+ x = u8x16_max (x, u8x16_word_shift_right (x, 2));
+ x = u8x16_max (x, u8x16_word_shift_right (x, 1));
+ return _mm_extract_epi16 ((__m128i) x, 0) & 0xff;
+}
+
+always_inline u8x16
+u8x16_min (u8x16 x, u8x16 y)
+{
+ return (u8x16) _mm_min_epu8 ((__m128i) x, (__m128i) y);
+}
+
+always_inline u8
+u8x16_min_scalar (u8x16 x)
+{
+ x = u8x16_min (x, u8x16_word_shift_right (x, 8));
+ x = u8x16_min (x, u8x16_word_shift_right (x, 4));
+ x = u8x16_min (x, u8x16_word_shift_right (x, 2));
+ x = u8x16_min (x, u8x16_word_shift_right (x, 1));
+ return _mm_extract_epi16 ((__m128i) x, 0) & 0xff;
+}
+
+always_inline i16x8
+i16x8_max (i16x8 x, i16x8 y)
+{
+ return (i16x8) _mm_max_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i16
+i16x8_max_scalar (i16x8 x)
+{
+ x = i16x8_max (x, i16x8_word_shift_right (x, 4));
+ x = i16x8_max (x, i16x8_word_shift_right (x, 2));
+ x = i16x8_max (x, i16x8_word_shift_right (x, 1));
+ return _mm_extract_epi16 ((__m128i) x, 0);
+}
+
+always_inline i16x8
+i16x8_min (i16x8 x, i16x8 y)
+{
+ return (i16x8) _mm_min_epi16 ((__m128i) x, (__m128i) y);
+}
+
+always_inline i16
+i16x8_min_scalar (i16x8 x)
+{
+ x = i16x8_min (x, i16x8_word_shift_right (x, 4));
+ x = i16x8_min (x, i16x8_word_shift_right (x, 2));
+ x = i16x8_min (x, i16x8_word_shift_right (x, 1));
+ return _mm_extract_epi16 ((__m128i) x, 0);
+}
+
+#undef _signed_binop
+
+#endif /* included_vector_sse2_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vhash.c b/src/vppinfra/vhash.c
new file mode 100644
index 00000000..f9dac0d9
--- /dev/null
+++ b/src/vppinfra/vhash.c
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2010 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/vhash.h>
+
+#ifdef CLIB_HAVE_VEC128
+
+/* Overflow search buckets have an extra u32x4 for saving key_hash data.
+ This makes it easier to refill main search bucket from overflow vector. */
+typedef struct
+{
+ /* 4 results for this bucket. */
+ u32x4_union_t result;
+
+ /* 4 hash codes for this bucket. These are used to refill main
+ search buckets from overflow buckets when space becomes available. */
+ u32x4_union_t key_hash;
+
+ /* n_key_u32s u32x4s of key data follow. */
+ u32x4_union_t key[0];
+} vhash_overflow_search_bucket_t;
+
+always_inline void
+set_overflow_result (vhash_overflow_search_bucket_t * b,
+ u32 i, u32 result, u32 key_hash)
+{
+ b->result.as_u32[i] = result;
+ b->key_hash.as_u32[i] = key_hash;
+}
+
+always_inline void
+free_overflow_bucket (vhash_overflow_buckets_t * ob,
+ vhash_overflow_search_bucket_t * b, u32 i)
+{
+ u32 o = (u32x4_union_t *) b - ob->search_buckets;
+ ASSERT (o < vec_len (ob->search_buckets));
+ vec_add1 (ob->free_indices, 4 * o + i);
+}
+
+always_inline vhash_overflow_search_bucket_t *
+get_overflow_search_bucket (vhash_overflow_buckets_t * obs, u32 i,
+ u32 n_key_u32s)
+{
+ return ((vhash_overflow_search_bucket_t *)
+ vec_elt_at_index (obs->search_buckets, i));
+}
+
+always_inline vhash_overflow_search_bucket_t *
+next_overflow_bucket (vhash_overflow_search_bucket_t * b, u32 n_key_u32s)
+{
+ return (vhash_overflow_search_bucket_t *) & b->key[n_key_u32s];
+}
+
+#define foreach_vhash_overflow_bucket(b,ob,n_key_u32s) \
+ for ((b) = (vhash_overflow_search_bucket_t *) ob->search_buckets; \
+ (u32x4_union_t *) (b) < vec_end (ob->search_buckets); \
+ b = next_overflow_bucket (b, n_key_u32s))
+
+u32
+vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s)
+{
+ vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash);
+ vhash_overflow_search_bucket_t *b;
+ u32 i, result = 0;
+
+ foreach_vhash_overflow_bucket (b, ob, n_key_u32s)
+ {
+ u32x4 r = b->result.as_u32x4;
+
+ for (i = 0; i < n_key_u32s; i++)
+ r &= vhash_bucket_compare (h, &b->key[0], i, vi);
+
+ result = vhash_merge_results (r);
+ if (result)
+ break;
+ }
+
+ return result;
+}
+
+u32
+vhash_set_overflow (vhash_t * h,
+ u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s)
+{
+ vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash);
+ vhash_overflow_search_bucket_t *b;
+ u32 i_set, i, old_result;
+
+ foreach_vhash_overflow_bucket (b, ob, n_key_u32s)
+ {
+ u32x4 r;
+
+ r = b->result.as_u32x4;
+ for (i = 0; i < n_key_u32s; i++)
+ r &= vhash_bucket_compare (h, &b->key[0], i, vi);
+
+ old_result = vhash_merge_results (r);
+ if (old_result)
+ {
+ i_set = vhash_non_empty_result_index (r);
+ set_overflow_result (b, i_set, new_result, key_hash);
+ return old_result;
+ }
+ }
+
+ /* Check free list. */
+ if (vec_len (ob->free_indices) == 0)
+ {
+ /* Out of free overflow buckets. Resize. */
+ u32 j, *p;
+ i = vec_len (ob->search_buckets);
+ vec_resize_aligned (ob->search_buckets,
+ sizeof (b[0]) / sizeof (u32x4) + n_key_u32s,
+ CLIB_CACHE_LINE_BYTES);
+ vec_add2 (ob->free_indices, p, 4);
+ for (j = 0; j < 4; j++)
+ p[j] = 4 * i + j;
+ }
+
+ i = vec_pop (ob->free_indices);
+
+ i_set = i & 3;
+ b = ((vhash_overflow_search_bucket_t *)
+ vec_elt_at_index (ob->search_buckets, i / 4));
+
+ /* Insert result. */
+ set_overflow_result (b, i_set, new_result, key_hash);
+
+ /* Insert key. */
+ for (i = 0; i < n_key_u32s; i++)
+ b->key[i].as_u32[i_set] = vhash_get_key_word (h, i, vi);
+
+ ob->n_overflow++;
+ h->n_elts++;
+
+ return /* old result was invalid */ 0;
+}
+
+u32
+vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s)
+{
+ vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash);
+ vhash_overflow_search_bucket_t *b;
+ u32 i_set, i, old_result;
+
+ foreach_vhash_overflow_bucket (b, ob, n_key_u32s)
+ {
+ u32x4 r;
+
+ r = b->result.as_u32x4;
+ for (i = 0; i < n_key_u32s; i++)
+ r &= vhash_bucket_compare (h, &b->key[0], i, vi);
+
+ old_result = vhash_merge_results (r);
+ if (old_result)
+ {
+ i_set = vhash_non_empty_result_index (r);
+
+ /* Invalidate result and invert key hash so that this will
+ never match since all keys in this overflow bucket have
+ matching key hashs. */
+ set_overflow_result (b, i_set, 0, ~key_hash);
+
+ free_overflow_bucket (ob, b, i_set);
+
+ ASSERT (ob->n_overflow > 0);
+ ob->n_overflow--;
+ h->n_elts--;
+ return old_result;
+ }
+ }
+
+ /* Could not find key. */
+ return 0;
+}
+
+void
+vhash_unset_refill_from_overflow (vhash_t * h,
+ vhash_search_bucket_t * sb,
+ u32 key_hash, u32 n_key_u32s)
+{
+ vhash_overflow_buckets_t *obs = vhash_get_overflow_buckets (h, key_hash);
+ vhash_overflow_search_bucket_t *ob;
+ u32 i, j, i_refill, bucket_mask = h->bucket_mask.as_u32[0];
+
+ /* Find overflow element with matching key hash. */
+ foreach_vhash_overflow_bucket (ob, obs, n_key_u32s)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ if (!ob->result.as_u32[i])
+ continue;
+ if ((ob->key_hash.as_u32[i] & bucket_mask)
+ != (key_hash & bucket_mask))
+ continue;
+
+ i_refill = vhash_empty_result_index (sb->result.as_u32x4);
+ sb->result.as_u32[i_refill] = ob->result.as_u32[i];
+ for (j = 0; j < n_key_u32s; j++)
+ sb->key[j].as_u32[i_refill] = ob->key[j].as_u32[i];
+ set_overflow_result (ob, i, 0, ~key_hash);
+ free_overflow_bucket (obs, ob, i);
+ return;
+ }
+ }
+}
+
+void
+vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32, u32 * hash_seeds)
+{
+ uword i, j, m;
+ vhash_search_bucket_t *b;
+
+ memset (h, 0, sizeof (h[0]));
+
+ /* Must have at least 4 keys (e.g. one search bucket). */
+ log2_n_keys = clib_max (log2_n_keys, 2);
+
+ h->log2_n_keys = log2_n_keys;
+ h->n_key_u32 = n_key_u32;
+ m = pow2_mask (h->log2_n_keys) & ~3;
+ for (i = 0; i < VECTOR_WORD_TYPE_LEN (u32); i++)
+ h->bucket_mask.as_u32[i] = m;
+
+ /* Allocate and zero search buckets. */
+ i = (sizeof (b[0]) / sizeof (u32x4) + n_key_u32) << (log2_n_keys - 2);
+ vec_validate_aligned (h->search_buckets, i - 1, CLIB_CACHE_LINE_BYTES);
+
+ for (i = 0; i < ARRAY_LEN (h->find_first_zero_table); i++)
+ h->find_first_zero_table[i] = min_log2 (first_set (~i));
+
+ for (i = 0; i < ARRAY_LEN (h->hash_seeds); i++)
+ for (j = 0; j < VECTOR_WORD_TYPE_LEN (u32); j++)
+ h->hash_seeds[i].as_u32[j] = hash_seeds[i];
+}
+
+static_always_inline u32
+vhash_main_key_gather (void *_vm, u32 vi, u32 wi, u32 n_key_u32)
+{
+ vhash_main_t *vm = _vm;
+ return vec_elt (vm->keys, vi * n_key_u32 + wi);
+}
+
+static_always_inline u32x4
+vhash_main_4key_gather (void *_vm, u32 vi, u32 wi, u32 n_key_u32s)
+{
+ vhash_main_t *vm = _vm;
+ u32x4_union_t x;
+
+ ASSERT (n_key_u32s == vm->n_key_u32);
+ ASSERT (wi < n_key_u32s);
+
+ x.as_u32[0] = vec_elt (vm->keys, (vi + 0) * n_key_u32s + wi);
+ x.as_u32[1] = vec_elt (vm->keys, (vi + 1) * n_key_u32s + wi);
+ x.as_u32[2] = vec_elt (vm->keys, (vi + 2) * n_key_u32s + wi);
+ x.as_u32[3] = vec_elt (vm->keys, (vi + 3) * n_key_u32s + wi);
+ return x.as_u32x4;
+}
+
+static_always_inline u32
+vhash_main_set_result (void *_vm, u32 vi, u32 old_result, u32 n_key_u32)
+{
+ vhash_main_t *vm = _vm;
+ u32 *p = vec_elt_at_index (vm->results, vi);
+ u32 new_result = p[0];
+ p[0] = old_result;
+ return new_result;
+}
+
+static_always_inline u32
+vhash_main_get_result (void *_vm, u32 vi, u32 old_result, u32 n_key_u32)
+{
+ vhash_main_t *vm = _vm;
+ vec_elt (vm->results, vi) = old_result;
+ return old_result;
+}
+
+static_always_inline u32x4
+vhash_main_get_4result (void *_vm, u32 vi, u32x4 old_result, u32 n_key_u32)
+{
+ vhash_main_t *vm = _vm;
+ u32x4 *p = (u32x4 *) vec_elt_at_index (vm->results, vi);
+ p[0] = old_result;
+ return old_result;
+}
+
+#define _(N_KEY_U32) \
+ static_always_inline u32 \
+ vhash_main_key_gather_##N_KEY_U32 (void * _vm, u32 vi, u32 i) \
+ { return vhash_main_key_gather (_vm, vi, i, N_KEY_U32); } \
+ \
+ static_always_inline u32x4 \
+ vhash_main_4key_gather_##N_KEY_U32 (void * _vm, u32 vi, u32 i) \
+ { return vhash_main_4key_gather (_vm, vi, i, N_KEY_U32); } \
+ \
+ clib_pipeline_stage_static \
+ (vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_gather_4key_stage \
+ (vm->vhash, \
+ /* vector_index */ i, \
+ vhash_main_4key_gather_##N_KEY_U32, \
+ vm, \
+ N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_gather_key_stage \
+ (vm->vhash, \
+ /* vector_index */ vm->n_vectors_div_4, \
+ /* n_vectors */ vm->n_vectors_mod_4, \
+ vhash_main_key_gather_##N_KEY_U32, \
+ vm, \
+ N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage \
+ (vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_finalize_stage (vm->vhash, i, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_finalize_stage (vm->vhash, vm->n_vectors_div_4, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_static \
+ (vhash_main_get_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_get_4_stage (vm->vhash, \
+ /* vector_index */ i, \
+ vhash_main_get_4result, \
+ vm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_get_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_get_stage (vm->vhash, \
+ /* vector_index */ vm->n_vectors_div_4, \
+ /* n_vectors */ vm->n_vectors_mod_4, \
+ vhash_main_get_result, \
+ vm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_static \
+ (vhash_main_set_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_set_stage (vm->vhash, \
+ /* vector_index */ i, \
+ /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \
+ vhash_main_set_result, \
+ vm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_set_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_set_stage (vm->vhash, \
+ /* vector_index */ vm->n_vectors_div_4, \
+ /* n_vectors */ vm->n_vectors_mod_4, \
+ vhash_main_set_result, \
+ vm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_static \
+ (vhash_main_unset_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_unset_stage (vm->vhash, \
+ /* vector_index */ i, \
+ /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \
+ vhash_main_get_result, \
+ vm, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_unset_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_unset_stage (vm->vhash, \
+ /* vector_index */ vm->n_vectors_div_4, \
+ /* n_vectors */ vm->n_vectors_mod_4, \
+ vhash_main_get_result, \
+ vm, N_KEY_U32); \
+ })
+
+_(1);
+_(2);
+_(3);
+_(4);
+_(5);
+_(6);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ clib_pipeline_stage \
+ (vhash_main_hash_mix_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_mix_stage (vm->vhash, i, N_KEY_U32); \
+ }) \
+ \
+ clib_pipeline_stage_no_inline \
+ (vhash_main_hash_mix_mod_stage_##N_KEY_U32, \
+ vhash_main_t *, vm, i, \
+ { \
+ vhash_mix_stage (vm->vhash, vm->n_vectors_div_4, N_KEY_U32); \
+ })
+
+_(4);
+_(5);
+_(6);
+
+#undef _
+
+typedef enum
+{
+ GET, SET, UNSET,
+} vhash_main_op_t;
+
+static void
+vhash_main_op (vhash_main_t * vm, vhash_main_op_t op)
+{
+ u32 n_keys = vec_len (vm->results);
+
+ vm->n_key_u32 = vm->vhash->n_key_u32;
+
+ vhash_validate_sizes (vm->vhash, vm->n_key_u32, n_keys);
+
+ vm->n_vectors_div_4 = n_keys / 4;
+ vm->n_vectors_mod_4 = n_keys % 4;
+
+ if (vm->n_vectors_div_4 > 0)
+ {
+ switch (vm->n_key_u32)
+ {
+ default:
+ ASSERT (0);
+ break;
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_3_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_get_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_3_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_set_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_3_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_unset_stage_##N_KEY_U32); \
+ break;
+
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_4_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_get_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_4_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_set_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_4_stage \
+ (vm->n_vectors_div_4, \
+ vm, \
+ vhash_main_gather_keys_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_stage_##N_KEY_U32, \
+ vhash_main_unset_stage_##N_KEY_U32); \
+ break;
+
+ _(4);
+ _(5);
+ _(6);
+
+#undef _
+ }
+ }
+
+
+ if (vm->n_vectors_mod_4 > 0)
+ {
+ switch (vm->n_key_u32)
+ {
+ default:
+ ASSERT (0);
+ break;
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_3_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_get_mod_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_3_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_set_mod_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_3_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_unset_mod_stage_##N_KEY_U32); \
+ break;
+
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+
+#define _(N_KEY_U32) \
+ case N_KEY_U32: \
+ if (op == GET) \
+ clib_pipeline_run_4_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_get_mod_stage_##N_KEY_U32); \
+ else if (op == SET) \
+ clib_pipeline_run_4_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_set_mod_stage_##N_KEY_U32); \
+ else \
+ clib_pipeline_run_4_stage \
+ (1, \
+ vm, \
+ vhash_main_gather_keys_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_mix_mod_stage_##N_KEY_U32, \
+ vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \
+ vhash_main_unset_mod_stage_##N_KEY_U32); \
+ break;
+
+ _(4);
+ _(5);
+ _(6);
+
+#undef _
+ }
+ }
+}
+
+void
+vhash_main_get (vhash_main_t * vm)
+{
+ vhash_main_op (vm, GET);
+}
+
+void
+vhash_main_set (vhash_main_t * vm)
+{
+ vhash_main_op (vm, SET);
+}
+
+void
+vhash_main_unset (vhash_main_t * vm)
+{
+ vhash_main_op (vm, UNSET);
+}
+
+u32
+vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index,
+ u32 n_keys_this_call)
+{
+ vhash_t *old = vr->old;
+ vhash_main_t *vm = &vr->new;
+ vhash_t *new = vm->vhash;
+ uword i, j, n_key_u32;
+
+ n_key_u32 = old->n_key_u32;
+
+ if (vector_index == 0)
+ {
+ u32 hash_seeds[3];
+ hash_seeds[0] = old->hash_seeds[0].as_u32[0];
+ hash_seeds[1] = old->hash_seeds[1].as_u32[0];
+ hash_seeds[2] = old->hash_seeds[2].as_u32[0];
+ vhash_init (new, old->log2_n_keys + 1, n_key_u32, hash_seeds);
+ }
+
+ vec_reset_length (vm->keys);
+ vec_reset_length (vm->results);
+
+ if (0 == (vector_index >> old->log2_n_keys))
+ {
+ for (i = vector_index; 0 == (i >> (old->log2_n_keys - 2)); i++)
+ {
+ vhash_search_bucket_t *b =
+ vhash_get_search_bucket_with_index (old, 4 * i, n_key_u32);
+ u32 r, *k;
+
+#define _(I) \
+ if ((r = b->result.as_u32[I]) != 0) \
+ { \
+ vec_add1 (vm->results, r - 1); \
+ vec_add2 (vm->keys, k, n_key_u32); \
+ for (j = 0; j < n_key_u32; j++) \
+ k[j] = b->key[j].as_u32[I]; \
+ }
+
+ _(0);
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+
+ if (vec_len (vm->results) >= n_keys_this_call)
+ {
+ vhash_main_op (vm, SET);
+ return i;
+ }
+ }
+ }
+
+ /* Add overflow buckets. */
+ {
+ vhash_overflow_buckets_t *ob;
+ vhash_overflow_search_bucket_t *b;
+
+ for (ob = old->overflow_buckets;
+ ob < old->overflow_buckets + ARRAY_LEN (old->overflow_buckets); ob++)
+ {
+ foreach_vhash_overflow_bucket (b, ob, old->n_key_u32)
+ {
+ u32 r, *k;
+
+#define _(I) \
+ if ((r = b->result.as_u32[I]) != 0) \
+ { \
+ vec_add1 (vm->results, r - 1); \
+ vec_add2 (vm->keys, k, n_key_u32); \
+ for (j = 0; j < n_key_u32; j++) \
+ k[j] = b->key[j].as_u32[I]; \
+ }
+
+ _(0);
+ _(1);
+ _(2);
+ _(3);
+
+#undef _
+ }
+ }
+ }
+
+ vhash_main_op (vm, SET);
+
+ /* Let caller know we are done. */
+ return ~0;
+}
+
+void
+vhash_resize (vhash_t * old, u32 log2_n_keys)
+{
+ static vhash_resize_t vr;
+ vhash_t new;
+ u32 i = 0;
+
+ vr.old = old;
+ vr.new.vhash = &new;
+
+ while (1)
+ {
+ i = vhash_resize_incremental (&vr, i, 1024);
+ if (i == ~0)
+ break;
+ }
+
+ vhash_free (old);
+ *old = new;
+}
+
+#endif /* CLIB_HAVE_VEC128 */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/vhash.h b/src/vppinfra/vhash.h
new file mode 100644
index 00000000..5ab42292
--- /dev/null
+++ b/src/vppinfra/vhash.h
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2010 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_vhash_h
+#define included_clib_vhash_h
+
+#include <vppinfra/vector.h>
+
+#ifdef CLIB_HAVE_VEC128
+
+#include <vppinfra/cache.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pipeline.h>
+
+/* Gathers 32 bits worth of key with given index. */
+typedef u32 (vhash_key_function_t) (void *state, u32 vector_index,
+ u32 key_word_index);
+typedef u32x4 (vhash_4key_function_t) (void *state, u32 vector_index,
+ u32 key_word_index);
+/* Sets/gets result of hash lookup. */
+typedef u32 (vhash_result_function_t) (void *state, u32 vector_index,
+ u32 result, u32 n_key_u32);
+typedef u32x4 (vhash_4result_function_t) (void *state, u32 vector_index,
+ u32x4 results, u32 n_key_u32);
+
+typedef struct
+{
+ u32x4_union_t hashed_key[3];
+} vhash_hashed_key_t;
+
+/* Search buckets are really this structure. */
+typedef struct
+{
+ /* 4 results for this bucket.
+ Zero is used to mark empty results. This means user can't use the result ~0
+ since user results differ from internal results stored in buckets by 1.
+ e.g. internal result = user result + 1. */
+ u32x4_union_t result;
+
+ /* n_key_u32s u32x4s of key data follow. */
+ u32x4_union_t key[0];
+} vhash_search_bucket_t;
+
+typedef struct
+{
+ u32x4_union_t *search_buckets;
+
+ /* Vector of bucket free indices. */
+ u32 *free_indices;
+
+ /* Number of entries in this overflow bucket. */
+ u32 n_overflow;
+} vhash_overflow_buckets_t;
+
+typedef struct
+{
+ /* 2^log2_n_keys keys grouped in groups of 4.
+ Each bucket contains 4 results plus 4 keys for a
+ total of (1 + n_key_u32) u32x4s. */
+ u32x4_union_t *search_buckets;
+
+ /* When a bucket of 4 results/keys are full we search
+ the overflow. hash_key is used to select which overflow
+ bucket. */
+ vhash_overflow_buckets_t overflow_buckets[16];
+
+ /* Total count of occupied elements in hash table. */
+ u32 n_elts;
+
+ u32 log2_n_keys;
+
+ /* Number of 32 bit words in a hash key. */
+ u32 n_key_u32;
+
+ u32x4_union_t bucket_mask;
+
+ /* table[i] = min_log2 (first_set (~i)). */
+ u8 find_first_zero_table[16];
+
+ /* Hash seeds for Jenkins hash. */
+ u32x4_union_t hash_seeds[3];
+
+ /* Key work space is a vector of length
+ n_key_u32s << log2_n_key_word_len_u32x. */
+ u32 log2_n_key_word_len_u32x;
+
+ /* Work space to store keys between pipeline stages. */
+ u32x4_union_t *key_work_space;
+
+ /* Hash work space to store Jenkins hash values between
+ pipeline stages. */
+ vhash_hashed_key_t *hash_work_space;
+} vhash_t;
+
+always_inline vhash_overflow_buckets_t *
+vhash_get_overflow_buckets (vhash_t * h, u32 key)
+{
+ u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
+ ASSERT (i < ARRAY_LEN (h->overflow_buckets));
+ return h->overflow_buckets + i;
+}
+
+always_inline uword
+vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key)
+{
+ u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
+ ASSERT (i < ARRAY_LEN (h->overflow_buckets));
+ return h->overflow_buckets[i].n_overflow > 0;
+}
+
+always_inline void
+vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
+{
+ vec_free (obs->search_buckets);
+ vec_free (obs->free_indices);
+}
+
+always_inline void
+vhash_free (vhash_t * h)
+{
+ uword i;
+ for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++)
+ vhash_free_overflow_buckets (&h->overflow_buckets[i]);
+ vec_free (h->search_buckets);
+ vec_free (h->key_work_space);
+ vec_free (h->hash_work_space);
+}
+
+always_inline void
+vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value)
+{
+ u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
+ u32 i1 = vi % 4;
+ vec_elt (h->key_work_space, i0).as_u32[i1] = value;
+}
+
+always_inline void
+vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value)
+{
+ u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
+ vec_elt (h->key_work_space, i0).as_u32x4 = value;
+}
+
+always_inline u32
+vhash_get_key_word (vhash_t * h, u32 wi, u32 vi)
+{
+ u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
+ u32 i1 = vi % 4;
+ return vec_elt (h->key_work_space, i0).as_u32[i1];
+}
+
+always_inline u32x
+vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi)
+{
+ u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
+ return vec_elt (h->key_work_space, i0).as_u32x4;
+}
+
+always_inline void
+vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors)
+{
+ u32 n, l;
+
+ n = max_pow2 (n_vectors) / 4;
+ n = clib_max (n, 8);
+
+ h->log2_n_key_word_len_u32x = l = min_log2 (n);
+ vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES);
+}
+
+always_inline void
+vhash_gather_key_stage (vhash_t * h,
+ u32 vector_index,
+ u32 n_vectors,
+ vhash_key_function_t key_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 i, j, vi;
+
+ /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */
+ for (i = 0; i < n_vectors; i++)
+ {
+ vi = vector_index * 4 + i;
+ for (j = 0; j < n_key_u32s; j++)
+ vhash_set_key_word (h, j, vi, key_function (state, vi, j));
+ }
+}
+
+always_inline void
+vhash_gather_4key_stage (vhash_t * h,
+ u32 vector_index,
+ vhash_4key_function_t key_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 j, vi;
+ vi = vector_index * 4;
+ for (j = 0; j < n_key_u32s; j++)
+ vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
+}
+
+always_inline void
+vhash_mix_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
+{
+ i32 i, n_left;
+ u32x a, b, c;
+
+ /* Only need to do this for keys longer than 12 bytes. */
+ ASSERT (n_key_u32s > 3);
+
+ a = h->hash_seeds[0].as_u32x4;
+ b = h->hash_seeds[1].as_u32x4;
+ c = h->hash_seeds[2].as_u32x4;
+ for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
+ {
+ a +=
+ vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
+ if (n_left > 1)
+ b +=
+ vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
+ if (n_left > 2)
+ c +=
+ vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
+
+ hash_v3_mix_u32x (a, b, c);
+ }
+
+ /* Save away a, b, c for later finalize. */
+ {
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+ hk->hashed_key[0].as_u32x4 = a;
+ hk->hashed_key[1].as_u32x4 = b;
+ hk->hashed_key[2].as_u32x4 = c;
+ }
+}
+
+always_inline vhash_search_bucket_t *
+vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s)
+{
+ return ((vhash_search_bucket_t *)
+ vec_elt_at_index (h->search_buckets,
+ (i / 4) *
+ ((sizeof (vhash_search_bucket_t) /
+ sizeof (u32x4)) + n_key_u32s)));
+}
+
+always_inline vhash_search_bucket_t *
+vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s)
+{
+ u32 i = key_hash & h->bucket_mask.as_u32[0];
+ return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
+}
+
+always_inline u32x4
+vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash,
+ u32 n_key_u32s)
+{
+ vhash_search_bucket_t *b;
+ u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]);
+ u32x4 r = key_hash & h->bucket_mask.as_u32x4;
+
+ /* Multiply with shifts and adds to get bucket byte offset. */
+#define _(x) u32x4_ishift_left (r, (x) - 2)
+ if (n_bytes_per_bucket == (1 << 5))
+ r = _(5);
+ else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
+ r = _(5) + _(4);
+ else if (n_bytes_per_bucket == (1 << 6))
+ r = _(6);
+ else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
+ r = _(6) + _(4);
+ else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
+ r = _(6) + _(5);
+ else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
+ r = _(6) + _(5) + _(4);
+ else
+ ASSERT (0);
+#undef _
+ return r;
+}
+
+always_inline void
+vhash_finalize_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
+{
+ i32 n_left;
+ u32x a, b, c;
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+
+ if (n_key_u32s <= 3)
+ {
+ a = h->hash_seeds[0].as_u32x4;
+ b = h->hash_seeds[1].as_u32x4;
+ c = h->hash_seeds[2].as_u32x4;
+ n_left = n_key_u32s;
+ }
+ else
+ {
+ a = hk->hashed_key[0].as_u32x4;
+ b = hk->hashed_key[1].as_u32x4;
+ c = hk->hashed_key[2].as_u32x4;
+ n_left = 3;
+ }
+
+ if (n_left > 0)
+ a += vhash_get_key_word_u32x (h, 0, vector_index);
+ if (n_left > 1)
+ b += vhash_get_key_word_u32x (h, 1, vector_index);
+ if (n_left > 2)
+ c += vhash_get_key_word_u32x (h, 2, vector_index);
+
+ hash_v3_finalize_u32x (a, b, c);
+
+ /* Only save away last 32 bits of hash code. */
+ hk->hashed_key[2].as_u32x4 = c;
+
+ /* Prefetch buckets. This costs a bit for small tables but saves
+ big for large ones. */
+ {
+ vhash_search_bucket_t *b0, *b1, *b2, *b3;
+ u32x4_union_t kh;
+
+ kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
+ hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
+
+ b0 = (void *) h->search_buckets + kh.as_u32[0];
+ b1 = (void *) h->search_buckets + kh.as_u32[1];
+ b2 = (void *) h->search_buckets + kh.as_u32[2];
+ b3 = (void *) h->search_buckets + kh.as_u32[3];
+
+ CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]),
+ READ);
+ CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]),
+ READ);
+ CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]),
+ READ);
+ CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]),
+ READ);
+ }
+}
+
+always_inline u32
+vhash_merge_results (u32x4 r)
+{
+ r = r | u32x4_word_shift_right (r, 2);
+ r = r | u32x4_word_shift_right (r, 1);
+ return u32x4_get0 (r);
+}
+
+/* Bucket is full if none of its 4 results are 0. */
+always_inline u32
+vhash_search_bucket_is_full (u32x4 r)
+{
+ return u32x4_zero_byte_mask (r) == 0;
+}
+
+always_inline u32
+vhash_non_empty_result_index (u32x4 x)
+{
+ u32 empty_mask = u32x4_zero_byte_mask (x);
+ ASSERT (empty_mask != 0xffff);
+ return min_log2 (0xffff & ~empty_mask) / 4;
+}
+
+always_inline u32
+vhash_empty_result_index (u32x4 x)
+{
+ u32 empty_mask = u32x4_zero_byte_mask (x);
+ ASSERT (empty_mask != 0);
+ return min_log2 (0xffff & empty_mask) / 4;
+}
+
+always_inline u32x4
+vhash_bucket_compare (vhash_t * h,
+ u32x4_union_t * bucket, u32 key_word_index, u32 vi)
+{
+ u32 k = vhash_get_key_word (h, key_word_index, vi);
+ u32x4 x = { k, k, k, k };
+ return u32x4_is_equal (bucket[key_word_index].as_u32x4, x);
+}
+
+#define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \
+do { \
+ u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \
+ u32x4 _k0 = u32x4_splat_word (_k4, 0); \
+ u32x4 _k1 = u32x4_splat_word (_k4, 1); \
+ u32x4 _k2 = u32x4_splat_word (_k4, 2); \
+ u32x4 _k3 = u32x4_splat_word (_k4, 3); \
+ \
+ cmp0 = u32x4_is_equal (b0->key[wi].as_u32x4, _k0); \
+ cmp1 = u32x4_is_equal (b1->key[wi].as_u32x4, _k1); \
+ cmp2 = u32x4_is_equal (b2->key[wi].as_u32x4, _k2); \
+ cmp3 = u32x4_is_equal (b3->key[wi].as_u32x4, _k3); \
+} while (0)
+
+u32 vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
+
+always_inline void
+vhash_get_stage (vhash_t * h,
+ u32 vector_index,
+ u32 n_vectors,
+ vhash_result_function_t result_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 i, j;
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+ vhash_search_bucket_t *b;
+
+ for (i = 0; i < n_vectors; i++)
+ {
+ u32 vi = vector_index * 4 + i;
+ u32 key_hash = hk->hashed_key[2].as_u32[i];
+ u32 result;
+ u32x4 r, r0;
+
+ b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
+
+ r = r0 = b->result.as_u32x4;
+ for (j = 0; j < n_key_u32s; j++)
+ r &= vhash_bucket_compare (h, &b->key[0], j, vi);
+
+ /* At this point only one of 4 results should be non-zero.
+ So we can or all 4 together and get the valid result (if there is one). */
+ result = vhash_merge_results (r);
+
+ if (!result && vhash_search_bucket_is_full (r0))
+ result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
+
+ result_function (state, vi, result - 1, n_key_u32s);
+ }
+}
+
+always_inline void
+vhash_get_4_stage (vhash_t * h,
+ u32 vector_index,
+ vhash_4result_function_t result_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 i, vi;
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+ vhash_search_bucket_t *b0, *b1, *b2, *b3;
+ u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
+ u32x4_union_t kh;
+
+ kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
+
+ b0 = (void *) h->search_buckets + kh.as_u32[0];
+ b1 = (void *) h->search_buckets + kh.as_u32[1];
+ b2 = (void *) h->search_buckets + kh.as_u32[2];
+ b3 = (void *) h->search_buckets + kh.as_u32[3];
+
+ r0 = r0_before = b0->result.as_u32x4;
+ r1 = r1_before = b1->result.as_u32x4;
+ r2 = r2_before = b2->result.as_u32x4;
+ r3 = r3_before = b3->result.as_u32x4;
+
+ vi = vector_index * 4;
+
+ for (i = 0; i < n_key_u32s; i++)
+ {
+ u32x4 c0, c1, c2, c3;
+ vhash_bucket_compare_4 (h, i, vector_index,
+ b0, b1, b2, b3, c0, c1, c2, c3);
+ r0 &= c0;
+ r1 &= c1;
+ r2 &= c2;
+ r3 &= c3;
+ }
+
+ u32x4_transpose (r0, r1, r2, r3);
+
+ /* Gather together 4 results. */
+ {
+ u32x4_union_t r;
+ u32x4 ones = { 1, 1, 1, 1 };
+ u32 not_found_mask;
+
+ r.as_u32x4 = r0 | r1 | r2 | r3;
+ not_found_mask = u32x4_zero_byte_mask (r.as_u32x4);
+ not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0))
+ | (vhash_search_bucket_is_full (r1_before) << (4 * 1))
+ | (vhash_search_bucket_is_full (r2_before) << (4 * 2))
+ | (vhash_search_bucket_is_full (r3_before) <<
+ (4 * 3)));
+ if (not_found_mask)
+ {
+ u32x4_union_t key_hash;
+
+ key_hash.as_u32x4 =
+ hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
+
+ /* Slow path: one of the buckets may have been full and we need to search overflow. */
+ if (not_found_mask & (1 << (4 * 0)))
+ r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
+ vi + 0, n_key_u32s);
+ if (not_found_mask & (1 << (4 * 1)))
+ r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
+ vi + 1, n_key_u32s);
+ if (not_found_mask & (1 << (4 * 2)))
+ r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
+ vi + 2, n_key_u32s);
+ if (not_found_mask & (1 << (4 * 3)))
+ r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
+ vi + 3, n_key_u32s);
+ }
+
+ result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
+ }
+}
+
+u32
+vhash_set_overflow (vhash_t * h,
+ u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s);
+
+always_inline void
+vhash_set_stage (vhash_t * h,
+ u32 vector_index,
+ u32 n_vectors,
+ vhash_result_function_t result_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 i, j, n_new_elts = 0;
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+ vhash_search_bucket_t *b;
+
+ for (i = 0; i < n_vectors; i++)
+ {
+ u32 vi = vector_index * 4 + i;
+ u32 key_hash = hk->hashed_key[2].as_u32[i];
+ u32 old_result, new_result;
+ u32 i_set;
+ u32x4 r, r0, cmp;
+
+ b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
+
+ cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
+ for (j = 1; j < n_key_u32s; j++)
+ cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
+
+ r0 = b->result.as_u32x4;
+ r = r0 & cmp;
+
+ /* At this point only one of 4 results should be non-zero.
+ So we can or all 4 together and get the valid result (if there is one). */
+ old_result = vhash_merge_results (r);
+
+ if (!old_result && vhash_search_bucket_is_full (r0))
+ old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
+
+ /* Get new result; possibly do something with old result. */
+ new_result = result_function (state, vi, old_result - 1, n_key_u32s);
+
+ /* User cannot use ~0 as a hash result since a result of 0 is
+ used to mark unused bucket entries. */
+ ASSERT (new_result + 1 != 0);
+ new_result += 1;
+
+ /* Set over-writes existing result. */
+ if (old_result)
+ {
+ i_set = vhash_non_empty_result_index (r);
+ b->result.as_u32[i_set] = new_result;
+ }
+ else
+ {
+ /* Set allocates new result. */
+ u32 valid_mask;
+
+ valid_mask = (((b->result.as_u32[0] != 0) << 0)
+ | ((b->result.as_u32[1] != 0) << 1)
+ | ((b->result.as_u32[2] != 0) << 2)
+ | ((b->result.as_u32[3] != 0) << 3));
+
+ /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */
+ i_set = key_hash & 3;
+ valid_mask =
+ ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
+
+ /* Insert into first empty position in bucket after key_hash. */
+ i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
+
+ if (valid_mask != 0xf)
+ {
+ n_new_elts += 1;
+
+ b->result.as_u32[i_set] = new_result;
+
+ /* Insert new key into search bucket. */
+ for (j = 0; j < n_key_u32s; j++)
+ b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
+ }
+ else
+ vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
+ }
+ }
+
+ h->n_elts += n_new_elts;
+}
+
+u32 vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
+
+void
+vhash_unset_refill_from_overflow (vhash_t * h,
+ vhash_search_bucket_t * b,
+ u32 key_hash, u32 n_key_u32s);
+
+/* Note: Eliot tried doing 4 unsets at once and could not get a speed up
+ and abandoned vhash_unset_4_stage. */
+always_inline void
+vhash_unset_stage (vhash_t * h,
+ u32 vector_index,
+ u32 n_vectors,
+ vhash_result_function_t result_function,
+ void *state, u32 n_key_u32s)
+{
+ u32 i, j, n_elts_unset = 0;
+ vhash_hashed_key_t *hk =
+ vec_elt_at_index (h->hash_work_space, vector_index);
+ vhash_search_bucket_t *b;
+
+ for (i = 0; i < n_vectors; i++)
+ {
+ u32 vi = vector_index * 4 + i;
+ u32 key_hash = hk->hashed_key[2].as_u32[i];
+ u32 old_result;
+ u32x4 cmp, r0;
+
+ b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
+
+ cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
+ for (j = 1; j < n_key_u32s; j++)
+ cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
+
+ r0 = b->result.as_u32x4;
+
+ /* At this point cmp is all ones where key matches and zero otherwise.
+ So, this will invalidate results for matching key and do nothing otherwise. */
+ b->result.as_u32x4 = r0 & ~cmp;
+
+ old_result = vhash_merge_results (r0 & cmp);
+
+ n_elts_unset += old_result != 0;
+
+ if (vhash_search_bucket_is_full (r0))
+ {
+ if (old_result)
+ vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
+ else
+ old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
+ }
+
+ result_function (state, vi, old_result - 1, n_key_u32s);
+ }
+ ASSERT (h->n_elts >= n_elts_unset);
+ h->n_elts -= n_elts_unset;
+}
+
+void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32,
+ u32 * hash_seeds);
+
+void vhash_resize (vhash_t * old, u32 log2_n_keys);
+
+typedef struct
+{
+ vhash_t *vhash;
+
+ union
+ {
+ struct
+ {
+ u32 *keys;
+ u32 *results;
+ };
+
+ /* Vector layout for get keys. */
+ struct
+ {
+ u32x4_union_t *get_keys;
+ u32x4_union_t *get_results;
+ };
+ };
+
+ u32 n_vectors_div_4;
+ u32 n_vectors_mod_4;
+
+ u32 n_key_u32;
+
+ u32 n_keys;
+} vhash_main_t;
+
+always_inline u32
+vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
+{
+ u32 i, n;
+
+ i = vm->n_keys;
+ vm->n_keys = i + n_keys;
+
+ n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
+
+ vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0]));
+ vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0]));
+
+ return i;
+}
+
+always_inline void
+vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
+ u32 value)
+{
+ u32x4_union_t *k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32);
+ ASSERT (wi < n_key_u32);
+ k[wi].as_u32[vi % 4] = value;
+}
+
+always_inline u32
+vhash_get_fetch_result (vhash_main_t * vm, u32 vi)
+{
+ u32x4_union_t *r = vec_elt_at_index (vm->get_results, vi / 4);
+ return r->as_u32[vi % 4];
+}
+
+void vhash_main_get (vhash_main_t * vm);
+
+always_inline u32
+vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
+{
+ u32 i;
+
+ i = vm->n_keys;
+ vm->n_keys = i + n_keys;
+
+ vec_resize (vm->keys, n_keys * n_key_u32);
+ vec_resize (vm->results, n_keys);
+
+ return i;
+}
+
+always_inline void
+vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
+ u32 value)
+{
+ u32 *k = vec_elt_at_index (vm->keys, vi * n_key_u32);
+ ASSERT (wi < n_key_u32);
+ k[wi] = value;
+}
+
+always_inline void
+vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result)
+{
+ u32 *r = vec_elt_at_index (vm->results, vi);
+ r[0] = result;
+}
+
+always_inline u32
+vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi)
+{
+ u32 *r = vec_elt_at_index (vm->results, vi);
+ return r[0];
+}
+
+void vhash_main_set (vhash_main_t * vm);
+
+always_inline u32
+vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
+{
+ return vhash_set_alloc_keys (vm, n_keys, n_key_u32);
+}
+
+always_inline void
+vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
+ u32 value)
+{
+ vhash_set_set_key_word (vm, vi, wi, n_key_u32, value);
+}
+
+always_inline void
+vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result)
+{
+ vhash_set_set_result (vm, vi, result);
+}
+
+always_inline u32
+vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi)
+{
+ return vhash_set_fetch_old_result (vm, vi);
+}
+
+void vhash_main_unset (vhash_main_t * vm);
+
+typedef struct
+{
+ vhash_main_t new;
+
+ vhash_t *old;
+} vhash_resize_t;
+
+u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index,
+ u32 n_vectors);
+
+#endif /* CLIB_HAVE_VEC128 */
+
+#endif /* included_clib_vhash_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/xxhash.h b/src/vppinfra/xxhash.h
new file mode 100644
index 00000000..ea1e21bf
--- /dev/null
+++ b/src/vppinfra/xxhash.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Original license for the code used to construct
+ clib_xxhash(...).
+
+ xxHash - Fast Hash algorithm
+ Copyright (C) 2012-2014, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __included_xxhash_h__
+#define __included_xxhash_h__
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3 1609587929392839161ULL
+#define PRIME64_4 9650029242287828579ULL
+#define PRIME64_5 2870177450012600261ULL
+#define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+
+static inline u64
+clib_xxhash (u64 key)
+{
+ u64 k1, h64;
+
+ k1 = key;
+ h64 = 0x9e3779b97f4a7c13LL + PRIME64_5 + 8;
+ k1 *= PRIME64_2;
+ k1 = XXH_rotl64 (k1, 31);
+ k1 *= PRIME64_1;
+ h64 ^= k1;
+ h64 = XXH_rotl64 (h64, 27) * PRIME64_1 + PRIME64_4;
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+#endif /* __included_xxhash_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/xy.h b/src/vppinfra/xy.h
new file mode 100644
index 00000000..fb562161
--- /dev/null
+++ b/src/vppinfra/xy.h
@@ -0,0 +1,56 @@
+/* (X,Y) coordinates. */
+
+/*
+ Copyright (c) 2008 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_clib_xy_h
+#define included_clib_xy_h
+
+#include <vppinfra/types.h>
+
+/* Basic definitions: coordinates and points. */
+typedef double xy_float_t;
+typedef __complex__ double xy_t;
+typedef __complex__ int ixy_t;
+
+typedef __complex__ char i8xy_t;
+typedef __complex__ short i16xy_t;
+typedef __complex__ int i32xy_t;
+
+/* X/Y components of a point: can be used as either rvalue/lvalue. */
+#define xy_x(x) __real__ (x)
+#define xy_y(x) __imag__ (x)
+
+/* Unit vectors in x/y directions. */
+#define xy_x_unit_vector (1)
+#define xy_y_unit_vector (1I)
+
+#endif /* included_clib_xy_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/zvec.c b/src/vppinfra/zvec.c
new file mode 100644
index 00000000..d062e5f7
--- /dev/null
+++ b/src/vppinfra/zvec.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/bitmap.h>
+#include <vppinfra/bitops.h> /* for next_with_same_number_of_set_bits */
+#include <vppinfra/error.h> /* for ASSERT */
+#include <vppinfra/mem.h>
+#include <vppinfra/os.h> /* for os_panic */
+#include <vppinfra/vec.h>
+#include <vppinfra/zvec.h>
+
+/* Consider coding as bitmap, coding = 2^c_0 + 2^c_1 + ... + 2^c_n
+ With c_0 < c_1 < ... < c_n. coding == 0 represents c_n = BITS (uword).
+
+ Unsigned integers i = 0 ... are represented as follows:
+
+ 0 <= i < 2^c_0 (i << 1) | (1 << 0) binary: i 1
+ 2^c_0 <= i < 2^c_0 + 2^c_1 (i << 2) | (1 << 1) binary: i 1 0
+ ... binary: i 0 ... 0
+
+ Smaller numbers use less bits. Coding is chosen so that encoding
+ of given histogram of typical values gives smallest number of bits.
+ The number and position of coding bits c_i are used to best fit the
+ histogram of typical values.
+*/
+
+/* Decode given compressed data. Return number of compressed data
+ bits used. */
+uword
+zvec_decode (uword coding, uword zdata, uword * n_zdata_bits)
+{
+ uword c, d, result, n_bits;
+ uword explicit_end, implicit_end;
+
+ result = 0;
+ n_bits = 0;
+ while (1)
+ {
+ c = first_set (coding);
+ implicit_end = c == coding;
+ explicit_end = (zdata & 1) & ~implicit_end;
+ d = (zdata >> explicit_end) & (c - 1);
+ if (explicit_end | implicit_end)
+ {
+ result += d;
+ n_bits += min_log2 (c) + explicit_end;
+ break;
+ }
+ n_bits += 1;
+ result += c;
+ coding ^= c;
+ zdata >>= 1;
+ }
+
+ if (coding == 0)
+ n_bits = BITS (uword);
+
+ *n_zdata_bits = n_bits;
+ return result;
+}
+
+uword
+zvec_encode (uword coding, uword data, uword * n_result_bits)
+{
+ uword c, shift, result;
+ uword explicit_end, implicit_end;
+
+ /* Data must be in range. Note special coding == 0
+ would break for data - 1 <= coding. */
+ ASSERT (data <= coding - 1);
+
+ shift = 0;
+ while (1)
+ {
+ c = first_set (coding);
+ implicit_end = c == coding;
+ explicit_end = ((data & (c - 1)) == data);
+ if (explicit_end | implicit_end)
+ {
+ uword t = explicit_end & ~implicit_end;
+ result = ((data << t) | t) << shift;
+ *n_result_bits =
+ /* data bits */ (c == 0 ? BITS (uword) : min_log2 (c))
+ /* shift bits */ + shift + t;
+ return result;
+ }
+ data -= c;
+ coding ^= c;
+ shift++;
+ }
+
+ /* Never reached. */
+ ASSERT (0);
+ return ~0;
+}
+
+always_inline uword
+get_data (void *data, uword data_bytes, uword is_signed)
+{
+ if (data_bytes == 1)
+ return is_signed ? zvec_signed_to_unsigned (*(i8 *) data) : *(u8 *) data;
+ else if (data_bytes == 2)
+ return is_signed ? zvec_signed_to_unsigned (*(i16 *) data) : *(u16 *)
+ data;
+ else if (data_bytes == 4)
+ return is_signed ? zvec_signed_to_unsigned (*(i32 *) data) : *(u32 *)
+ data;
+ else if (data_bytes == 8)
+ return is_signed ? zvec_signed_to_unsigned (*(i64 *) data) : *(u64 *)
+ data;
+ else
+ {
+ os_panic ();
+ return ~0;
+ }
+}
+
+always_inline void
+put_data (void *data, uword data_bytes, uword is_signed, uword x)
+{
+ if (data_bytes == 1)
+ {
+ if (is_signed)
+ *(i8 *) data = zvec_unsigned_to_signed (x);
+ else
+ *(u8 *) data = x;
+ }
+ else if (data_bytes == 2)
+ {
+ if (is_signed)
+ *(i16 *) data = zvec_unsigned_to_signed (x);
+ else
+ *(u16 *) data = x;
+ }
+ else if (data_bytes == 4)
+ {
+ if (is_signed)
+ *(i32 *) data = zvec_unsigned_to_signed (x);
+ else
+ *(u32 *) data = x;
+ }
+ else if (data_bytes == 8)
+ {
+ if (is_signed)
+ *(i64 *) data = zvec_unsigned_to_signed (x);
+ else
+ *(u64 *) data = x;
+ }
+ else
+ {
+ os_panic ();
+ }
+}
+
+always_inline uword *
+zvec_encode_inline (uword * zvec,
+ uword * zvec_n_bits,
+ uword coding,
+ void *data,
+ uword data_stride,
+ uword n_data, uword data_bytes, uword is_signed)
+{
+ uword i;
+
+ i = *zvec_n_bits;
+ while (n_data >= 1)
+ {
+ uword d0, z0, l0;
+
+ d0 = get_data (data + 0 * data_stride, data_bytes, is_signed);
+ data += 1 * data_stride;
+ n_data -= 1;
+
+ z0 = zvec_encode (coding, d0, &l0);
+ zvec = clib_bitmap_set_multiple (zvec, i, z0, l0);
+ i += l0;
+ }
+
+ *zvec_n_bits = i;
+ return zvec;
+}
+
+#define _(TYPE,IS_SIGNED) \
+ uword * zvec_encode_##TYPE (uword * zvec, \
+ uword * zvec_n_bits, \
+ uword coding, \
+ void * data, \
+ uword data_stride, \
+ uword n_data) \
+ { \
+ return zvec_encode_inline (zvec, zvec_n_bits, \
+ coding, \
+ data, data_stride, n_data, \
+ /* data_bytes */ sizeof (TYPE), \
+ /* is_signed */ IS_SIGNED); \
+ }
+
+_(u8, /* is_signed */ 0);
+_(u16, /* is_signed */ 0);
+_(u32, /* is_signed */ 0);
+_(u64, /* is_signed */ 0);
+_(i8, /* is_signed */ 1);
+_(i16, /* is_signed */ 1);
+_(i32, /* is_signed */ 1);
+_(i64, /* is_signed */ 1);
+
+#undef _
+
+always_inline uword
+coding_max_n_bits (uword coding)
+{
+ uword n_bits;
+ (void) zvec_decode (coding, 0, &n_bits);
+ return n_bits;
+}
+
+always_inline void
+zvec_decode_inline (uword * zvec,
+ uword * zvec_n_bits,
+ uword coding,
+ void *data,
+ uword data_stride,
+ uword n_data, uword data_bytes, uword is_signed)
+{
+ uword i, n_max;
+
+ i = *zvec_n_bits;
+ n_max = coding_max_n_bits (coding);
+ while (n_data >= 1)
+ {
+ uword d0, z0, l0;
+
+ z0 = clib_bitmap_get_multiple (zvec, i, n_max);
+ d0 = zvec_decode (coding, z0, &l0);
+ i += l0;
+ put_data (data + 0 * data_stride, data_bytes, is_signed, d0);
+ data += 1 * data_stride;
+ n_data -= 1;
+ }
+ *zvec_n_bits = i;
+}
+
+#define _(TYPE,IS_SIGNED) \
+ void zvec_decode_##TYPE (uword * zvec, \
+ uword * zvec_n_bits, \
+ uword coding, \
+ void * data, \
+ uword data_stride, \
+ uword n_data) \
+ { \
+ return zvec_decode_inline (zvec, zvec_n_bits, \
+ coding, \
+ data, data_stride, n_data, \
+ /* data_bytes */ sizeof (TYPE), \
+ /* is_signed */ IS_SIGNED); \
+ }
+
+_(u8, /* is_signed */ 0);
+_(u16, /* is_signed */ 0);
+_(u32, /* is_signed */ 0);
+_(u64, /* is_signed */ 0);
+_(i8, /* is_signed */ 1);
+_(i16, /* is_signed */ 1);
+_(i32, /* is_signed */ 1);
+_(i64, /* is_signed */ 1);
+
+#undef _
+
+/* Compute number of bits needed to encode given histogram. */
+static uword
+zvec_coding_bits (uword coding, uword * histogram_counts, uword min_bits)
+{
+ uword n_type_bits, n_bits;
+ uword this_count, last_count, max_count_index;
+ uword i, b, l;
+
+ n_bits = 0;
+ n_type_bits = 1;
+ last_count = 0;
+ max_count_index = vec_len (histogram_counts) - 1;
+
+ /* Coding is not large enough to encode given data. */
+ if (coding <= max_count_index)
+ return ~0;
+
+ i = 0;
+ while (coding != 0)
+ {
+ b = first_set (coding);
+ l = min_log2 (b);
+ i += b;
+
+ this_count =
+ histogram_counts[i > max_count_index ? max_count_index : i - 1];
+
+ /* No more data to encode? */
+ if (this_count == last_count)
+ break;
+
+ /* Last coding is i 0 ... 0 so we don't need an extra type bit. */
+ if (coding == b)
+ n_type_bits--;
+
+ n_bits += (this_count - last_count) * (n_type_bits + l);
+
+ /* This coding cannot be minimal: so return. */
+ if (n_bits >= min_bits)
+ return ~0;
+
+ last_count = this_count;
+ coding ^= b;
+ n_type_bits++;
+ }
+
+ return n_bits;
+}
+
+uword
+_zvec_coding_from_histogram (void *histogram,
+ uword histogram_len,
+ uword histogram_elt_count_offset,
+ uword histogram_elt_bytes,
+ uword max_value_to_encode,
+ zvec_coding_info_t * coding_return)
+{
+ uword coding, min_coding;
+ uword min_coding_bits, coding_bits;
+ uword i, n_bits_set, total_count;
+ uword *counts;
+ zvec_histogram_count_t *h_count = histogram + histogram_elt_count_offset;
+
+ if (histogram_len < 1)
+ {
+ coding_return->coding = 0;
+ coding_return->min_coding_bits = 0;
+ coding_return->n_data = 0;
+ coding_return->n_codes = 0;
+ coding_return->ave_coding_bits = 0;
+ return 0;
+ }
+
+ total_count = 0;
+ counts = vec_new (uword, histogram_len);
+ for (i = 0; i < histogram_len; i++)
+ {
+ zvec_histogram_count_t this_count = h_count[0];
+ total_count += this_count;
+ counts[i] = total_count;
+ h_count =
+ (zvec_histogram_count_t *) ((void *) h_count + histogram_elt_bytes);
+ }
+
+ min_coding = 0;
+ min_coding_bits = ~0;
+
+ {
+ uword base_coding =
+ max_value_to_encode !=
+ ~0 ? (1 + max_value_to_encode) : vec_len (counts);
+ uword max_coding = max_pow2 (2 * base_coding);
+
+ for (n_bits_set = 1; n_bits_set <= 8; n_bits_set++)
+ {
+ for (coding = pow2_mask (n_bits_set);
+ coding < max_coding;
+ coding = next_with_same_number_of_set_bits (coding))
+ {
+ coding_bits = zvec_coding_bits (coding, counts, min_coding_bits);
+ if (coding_bits >= min_coding_bits)
+ continue;
+ min_coding_bits = coding_bits;
+ min_coding = coding;
+ }
+ }
+ }
+
+ if (coding_return)
+ {
+ coding_return->coding = min_coding;
+ coding_return->min_coding_bits = min_coding_bits;
+ coding_return->n_data = total_count;
+ coding_return->n_codes = vec_len (counts);
+ coding_return->ave_coding_bits =
+ (f64) min_coding_bits / (f64) total_count;
+ }
+
+ vec_free (counts);
+
+ return min_coding;
+}
+
+u8 *
+format_zvec_coding (u8 * s, va_list * args)
+{
+ zvec_coding_info_t *c = va_arg (*args, zvec_coding_info_t *);
+ return format (s,
+ "zvec coding 0x%x, %d elts, %d codes, %d bits total, %.4f ave bits/code",
+ c->coding, c->n_data, c->n_codes, c->min_coding_bits,
+ c->ave_coding_bits);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/zvec.h b/src/vppinfra/zvec.h
new file mode 100644
index 00000000..7d35a3fe
--- /dev/null
+++ b/src/vppinfra/zvec.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef included_zvec_h
+#define included_zvec_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error.h> /* for ASSERT */
+#include <vppinfra/format.h>
+
+/* zvec: compressed vectors.
+
+ Data is entropy coded with 32 bit "codings".
+
+ Consider coding as bitmap, coding = 2^c_0 + 2^c_1 + ... + 2^c_n
+ With c_0 < c_1 < ... < c_n. coding == 0 represents c_n = BITS (uword).
+
+ Unsigned integers i = 0 ... are represented as follows:
+
+ 0 <= i < 2^c_0 (i << 1) | (1 << 0) binary: i 1
+ 2^c_0 <= i < 2^c_0 + 2^c_1 (i << 2) | (1 << 1) binary: i 1 0
+ ... binary: i 0 ... 0
+
+ Smaller numbers use less bits. Coding is chosen so that encoding
+ of given histogram of typical values gives smallest number of bits.
+ The number and position of coding bits c_i are used to best fit the
+ histogram of typical values.
+*/
+
+typedef struct
+{
+ /* Smallest coding for given histogram of typical data. */
+ u32 coding;
+
+ /* Number of data in histogram. */
+ u32 n_data;
+
+ /* Number of codes (unique values) in histogram. */
+ u32 n_codes;
+
+ /* Number of bits in smallest coding of data. */
+ u32 min_coding_bits;
+
+ /* Average number of bits per code. */
+ f64 ave_coding_bits;
+} zvec_coding_info_t;
+
+/* Encode/decode data. */
+uword zvec_encode (uword coding, uword data, uword * n_result_bits);
+uword zvec_decode (uword coding, uword zdata, uword * n_zdata_bits);
+
+format_function_t format_zvec_coding;
+
+typedef u32 zvec_histogram_count_t;
+
+#define zvec_coding_from_histogram(h,count_field,len,max_value_to_encode,zc) \
+ _zvec_coding_from_histogram ((h), (len), \
+ STRUCT_OFFSET_OF_VAR (h, count_field), \
+ sizeof (h[0]), \
+ max_value_to_encode, \
+ (zc))
+
+uword
+_zvec_coding_from_histogram (void *_histogram,
+ uword histogram_len,
+ uword histogram_elt_count_offset,
+ uword histogram_elt_bytes,
+ uword max_value_to_encode,
+ zvec_coding_info_t * coding_info_return);
+
+#define _(TYPE,IS_SIGNED) \
+ uword * zvec_encode_##TYPE (uword * zvec, uword * zvec_n_bits, uword coding, \
+ void * data, uword data_stride, uword n_data);
+
+_(u8, /* is_signed */ 0);
+_(u16, /* is_signed */ 0);
+_(u32, /* is_signed */ 0);
+_(u64, /* is_signed */ 0);
+_(i8, /* is_signed */ 1);
+_(i16, /* is_signed */ 1);
+_(i32, /* is_signed */ 1);
+_(i64, /* is_signed */ 1);
+
+#undef _
+
+#define _(TYPE,IS_SIGNED) \
+ void zvec_decode_##TYPE (uword * zvec, \
+ uword * zvec_n_bits, \
+ uword coding, \
+ void * data, \
+ uword data_stride, \
+ uword n_data)
+
+_(u8, /* is_signed */ 0);
+_(u16, /* is_signed */ 0);
+_(u32, /* is_signed */ 0);
+_(u64, /* is_signed */ 0);
+_(i8, /* is_signed */ 1);
+_(i16, /* is_signed */ 1);
+_(i32, /* is_signed */ 1);
+_(i64, /* is_signed */ 1);
+
+#undef _
+
+/* Signed <=> unsigned conversion.
+ -1, -2, -3, ... => 1, 3, 5, ... odds
+ 0, +1, +2, +3, ... => 0, 2, 4, 6, ... evens */
+always_inline uword
+zvec_signed_to_unsigned (word s)
+{
+ uword a = s < 0;
+ s = 2 * s + a;
+ return a ? -s : s;
+}
+
+always_inline word
+zvec_unsigned_to_signed (uword u)
+{
+ uword a = u & 1;
+ u >>= 1;
+ return a ? -u : u;
+}
+
+#endif /* included_zvec_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */